From 324396466c720c3a5b961225c7f52aa8043f916d Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 23 May 2019 09:07:51 +0000
Subject: [PATCH 0001/1176] DWARF: Don't compute address ranges for type units

Summary:
Type units don't describe any code, so they should never be the result
of any address lookup queries.

Previously, we would compute the address ranges for the type units for
via the line tables they reference because the type units looked a lot
like line-tables-only compile units. However, this is not correct, as
the line tables are only referenced from type units so that other
declarations can use the file names contained in them.

In this patch I make the BuildAddressRangeTable function virtual, and
implement it only for compile units.

Testing this was a bit tricky, because the behavior depends on the order
in which we add things to the address range map. This rarely caused a
problem with DWARF v4 type units, as they are always added after all
CUs. It happened more frequently with DWARF v5, as there clang emits the
type units first. However, this is still not something that it is
required to do, so for testing I've created an assembly file where I've
deliberately sandwiched a compile unit between two type units, which
should isolate us from both changes in how the compiler emits the units
and changes in the order we process them.

Reviewers: clayborg, aprantl, JDevlieghere

Subscribers: jdoerfert, lldb-commits

Differential Revision: https://reviews.llvm.org/D62178

llvm-svn: 361465
---
 .../DWARF/debug-types-address-ranges.s        | 338 ++++++++++++++++++
 .../SymbolFile/DWARF/DWARFCompileUnit.cpp     |  93 +++++
 .../SymbolFile/DWARF/DWARFCompileUnit.h       |   2 +
 .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h  |   2 +
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  95 -----
 .../Plugins/SymbolFile/DWARF/DWARFUnit.h      |  38 +-
 .../SymbolFile/DWARF/SymbolFileDWARF.h        |   2 +-
 .../DWARF/SymbolFileDWARFDebugMap.h           |   2 +-
 8 files changed, 456 insertions(+), 116 deletions(-)
 create mode 100644 lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s

diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s b/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s
new file mode 100644
index 0000000000000..892c93d3822cc
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s
@@ -0,0 +1,338 @@
+# Check address lookup works correctly in the presence of type units.
+# Specifically check that we don't use the line table pointed to by the
+# DW_AT_stmt_list of the type unit (which used only for the file names) to
+# compute address range for the type unit as type units don't describe any
+# addresses. The addresses should always resolve to the relevant compile units.
+
+# RUN: llvm-mc -dwarf-version=5 -triple x86_64-pc-linux %s -filetype=obj >%t.o
+# RUN: ld.lld %t.o -o %t -image-base=0x47000
+# RUN: %lldb %t -o "image lookup -a 0x48000 -v" -o exit | FileCheck %s
+
+# CHECK:   CompileUnit: id = {0x00000001}, file = "/tmp/a.cc", language = "c++"
+# CHECK:      Function: id = {0x7fffffff0000006a}, name = "::_start({{.*}})", range = [0x0000000000048000-0x000000000004800c)
+# CHECK:     LineEntry: [0x0000000000048000-0x000000000004800a): /tmp/a.cc:4
+# CHECK:        Symbol: id = {0x00000002}, range = [0x0000000000048000-0x000000000004800c), name="_start"
+# CHECK:      Variable: id = {0x7fffffff00000075}, name = "v1", {{.*}} decl = a.cc:4
+# CHECK:      Variable: id = {0x7fffffff00000080}, name = "v2", {{.*}} decl = a.cc:4
+
+
+# Output generated via
+# clang -g -fdebug-types-section -gdwarf-5 -S
+# from
+# enum E1 { e1 };
+# enum E2 { e2 };
+# extern "C" void _start(E1 v1, E2 v2) {}
+# The output was modified to place the compile unit in between the two type
+# units.
+
+        .text
+        .file   "a.cc"
+        .file   0 "/tmp" "a.cc"
+
+        .text
+        .globl  _start                  # -- Begin function _start
+        .p2align        4, 0x90
+        .type   _start,@function
+_start:                                 # @_start
+.Lfunc_begin0:
+        .loc    0 4 0                   # /tmp/a.cc:4:0
+        .cfi_startproc
+# %bb.0:                                # %entry
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset %rbp, -16
+        movq    %rsp, %rbp
+        .cfi_def_cfa_register %rbp
+        movl    %edi, -4(%rbp)
+        movl    %esi, -8(%rbp)
+.Ltmp0:
+        .loc    0 4 23 prologue_end     # /tmp/a.cc:4:23
+        popq    %rbp
+        .cfi_def_cfa %rsp, 8
+        retq
+.Ltmp1:
+.Lfunc_end0:
+        .size   _start, .Lfunc_end0-_start
+        .cfi_endproc
+                                        # -- End function
+        .section        .debug_str_offsets,"",@progbits
+        .long   52
+        .short  5
+        .short  0
+.Lstr_offsets_base0:
+        .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "clang version 9.0.0 (trunk 360907) (llvm/trunk 360908)"
+.Linfo_string1:
+        .asciz  "a.cc"
+.Linfo_string2:
+        .asciz  "/tmp"
+.Linfo_string3:
+        .asciz  "unsigned int"
+.Linfo_string4:
+        .asciz  "e1"
+.Linfo_string5:
+        .asciz  "E1"
+.Linfo_string6:
+        .asciz  "e2"
+.Linfo_string7:
+        .asciz  "E2"
+.Linfo_string8:
+        .asciz  "_start"
+.Linfo_string9:
+        .asciz  "f"
+.Linfo_string10:
+        .asciz  "v1"
+.Linfo_string11:
+        .asciz  "v2"
+        .section        .debug_str_offsets,"",@progbits
+        .long   .Linfo_string0
+        .long   .Linfo_string1
+        .long   .Linfo_string2
+        .long   .Linfo_string3
+        .long   .Linfo_string4
+        .long   .Linfo_string5
+        .long   .Linfo_string6
+        .long   .Linfo_string7
+        .long   .Linfo_string8
+        .long   .Linfo_string9
+        .long   .Linfo_string10
+        .long   .Linfo_string11
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                       # Abbreviation Code
+        .byte   65                      # DW_TAG_type_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   16                      # DW_AT_stmt_list
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   114                     # DW_AT_str_offsets_base
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   2                       # Abbreviation Code
+        .byte   4                       # DW_TAG_enumeration_type
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   11                      # DW_AT_byte_size
+        .byte   11                      # DW_FORM_data1
+        .byte   58                      # DW_AT_decl_file
+        .byte   11                      # DW_FORM_data1
+        .byte   59                      # DW_AT_decl_line
+        .byte   11                      # DW_FORM_data1
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   3                       # Abbreviation Code
+        .byte   40                      # DW_TAG_enumerator
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   28                      # DW_AT_const_value
+        .byte   15                      # DW_FORM_udata
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   4                       # Abbreviation Code
+        .byte   36                      # DW_TAG_base_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   62                      # DW_AT_encoding
+        .byte   11                      # DW_FORM_data1
+        .byte   11                      # DW_AT_byte_size
+        .byte   11                      # DW_FORM_data1
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   5                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   37                      # DW_FORM_strx1
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   114                     # DW_AT_str_offsets_base
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   16                      # DW_AT_stmt_list
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   27                      # DW_AT_comp_dir
+        .byte   37                      # DW_FORM_strx1
+        .byte   17                      # DW_AT_low_pc
+        .byte   27                      # DW_FORM_addrx
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   115                     # DW_AT_addr_base
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   6                       # Abbreviation Code
+        .byte   4                       # DW_TAG_enumeration_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   60                      # DW_AT_declaration
+        .byte   25                      # DW_FORM_flag_present
+        .byte   105                     # DW_AT_signature
+        .byte   32                      # DW_FORM_ref_sig8
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   7                       # Abbreviation Code
+        .byte   46                      # DW_TAG_subprogram
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   17                      # DW_AT_low_pc
+        .byte   27                      # DW_FORM_addrx
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   64                      # DW_AT_frame_base
+        .byte   24                      # DW_FORM_exprloc
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   58                      # DW_AT_decl_file
+        .byte   11                      # DW_FORM_data1
+        .byte   59                      # DW_AT_decl_line
+        .byte   11                      # DW_FORM_data1
+        .byte   63                      # DW_AT_external
+        .byte   25                      # DW_FORM_flag_present
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   8                       # Abbreviation Code
+        .byte   5                       # DW_TAG_formal_parameter
+        .byte   0                       # DW_CHILDREN_no
+        .byte   2                       # DW_AT_location
+        .byte   24                      # DW_FORM_exprloc
+        .byte   3                       # DW_AT_name
+        .byte   37                      # DW_FORM_strx1
+        .byte   58                      # DW_AT_decl_file
+        .byte   11                      # DW_FORM_data1
+        .byte   59                      # DW_AT_decl_line
+        .byte   11                      # DW_FORM_data1
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                       # DWARF version number
+        .byte   2                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .quad   -6180787752776176174    # Type Signature
+        .long   35                      # Type DIE Offset
+        .byte   1                       # Abbrev [1] 0x18:0x1d DW_TAG_type_unit
+        .short  4                       # DW_AT_language
+        .long   .Lline_table_start0     # DW_AT_stmt_list
+        .long   .Lstr_offsets_base0     # DW_AT_str_offsets_base
+        .byte   2                       # Abbrev [2] 0x23:0xd DW_TAG_enumeration_type
+        .long   48                      # DW_AT_type
+        .byte   5                       # DW_AT_name
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # DW_AT_decl_file
+        .byte   1                       # DW_AT_decl_line
+        .byte   3                       # Abbrev [3] 0x2c:0x3 DW_TAG_enumerator
+        .byte   4                       # DW_AT_name
+        .byte   0                       # DW_AT_const_value
+        .byte   0                       # End Of Children Mark
+        .byte   4                       # Abbrev [4] 0x30:0x4 DW_TAG_base_type
+        .byte   3                       # DW_AT_name
+        .byte   7                       # DW_AT_encoding
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end0:
+
+        .long   .Ldebug_info_end2-.Ldebug_info_start2 # Length of Unit
+.Ldebug_info_start2:
+        .short  5                       # DWARF version number
+        .byte   1                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   5                       # Abbrev [5] 0xc:0x4d DW_TAG_compile_unit
+        .byte   0                       # DW_AT_producer
+        .short  4                       # DW_AT_language
+        .byte   1                       # DW_AT_name
+        .long   .Lstr_offsets_base0     # DW_AT_str_offsets_base
+        .long   .Lline_table_start0     # DW_AT_stmt_list
+        .byte   2                       # DW_AT_comp_dir
+        .byte   0                       # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+        .long   .Laddr_table_base0      # DW_AT_addr_base
+        .byte   6                       # Abbrev [6] 0x23:0x9 DW_TAG_enumeration_type
+                                        # DW_AT_declaration
+        .quad   -6180787752776176174    # DW_AT_signature
+        .byte   6                       # Abbrev [6] 0x2c:0x9 DW_TAG_enumeration_type
+                                        # DW_AT_declaration
+        .quad   7818257750321376053     # DW_AT_signature
+        .byte   7                       # Abbrev [7] 0x35:0x23 DW_TAG_subprogram
+        .byte   0                       # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+        .byte   1                       # DW_AT_frame_base
+        .byte   86
+        .byte   8                       # DW_AT_name
+        .byte   0                       # DW_AT_decl_file
+        .byte   4                       # DW_AT_decl_line
+                                        # DW_AT_external
+        .byte   8                       # Abbrev [8] 0x41:0xb DW_TAG_formal_parameter
+        .byte   2                       # DW_AT_location
+        .byte   145
+        .byte   124
+        .byte   10                      # DW_AT_name
+        .byte   0                       # DW_AT_decl_file
+        .byte   4                       # DW_AT_decl_line
+        .long   35                      # DW_AT_type
+        .byte   8                       # Abbrev [8] 0x4c:0xb DW_TAG_formal_parameter
+        .byte   2                       # DW_AT_location
+        .byte   145
+        .byte   120
+        .byte   11                      # DW_AT_name
+        .byte   0                       # DW_AT_decl_file
+        .byte   4                       # DW_AT_decl_line
+        .long   44                      # DW_AT_type
+        .byte   0                       # End Of Children Mark
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end2:
+
+        .long   .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+        .short  5                       # DWARF version number
+        .byte   2                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .quad   7818257750321376053     # Type Signature
+        .long   35                      # Type DIE Offset
+        .byte   1                       # Abbrev [1] 0x18:0x1d DW_TAG_type_unit
+        .short  4                       # DW_AT_language
+        .long   .Lline_table_start0     # DW_AT_stmt_list
+        .long   .Lstr_offsets_base0     # DW_AT_str_offsets_base
+        .byte   2                       # Abbrev [2] 0x23:0xd DW_TAG_enumeration_type
+        .long   48                      # DW_AT_type
+        .byte   7                       # DW_AT_name
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # DW_AT_decl_file
+        .byte   2                       # DW_AT_decl_line
+        .byte   3                       # Abbrev [3] 0x2c:0x3 DW_TAG_enumerator
+        .byte   6                       # DW_AT_name
+        .byte   0                       # DW_AT_const_value
+        .byte   0                       # End Of Children Mark
+        .byte   4                       # Abbrev [4] 0x30:0x4 DW_TAG_base_type
+        .byte   3                       # DW_AT_name
+        .byte   7                       # DW_AT_encoding
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end1:
+        .section        .debug_macinfo,"",@progbits
+        .byte   0                       # End Of Macro List Mark
+        .section        .debug_addr,"",@progbits
+        .long   .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
+.Ldebug_addr_start0:
+        .short  5                       # DWARF version number
+        .byte   8                       # Address size
+        .byte   0                       # Segment selector size
+.Laddr_table_base0:
+        .quad   .Lfunc_begin0
+.Ldebug_addr_end0:
+
+        .section        .debug_line,"",@progbits
+.Lline_table_start0:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp
index 0931c10983c67..c48ca235ec286 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp
@@ -7,7 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "DWARFCompileUnit.h"
+#include "DWARFDebugAranges.h"
+#include "SymbolFileDWARFDebugMap.h"
 
+#include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/LineTable.h"
 #include "lldb/Utility/Stream.h"
 
 using namespace lldb;
@@ -20,3 +24,92 @@ void DWARFCompileUnit::Dump(Stream *s) const {
             GetOffset(), GetLength(), GetVersion(), GetAbbrevOffset(),
             GetAddressByteSize(), GetNextUnitOffset());
 }
+
+void DWARFCompileUnit::BuildAddressRangeTable(
+    DWARFDebugAranges *debug_aranges) {
+  // This function is usually called if there in no .debug_aranges section in
+  // order to produce a compile unit level set of address ranges that is
+  // accurate.
+
+  size_t num_debug_aranges = debug_aranges->GetNumRanges();
+
+  // First get the compile unit DIE only and check if it has a DW_AT_ranges
+  const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
+
+  const dw_offset_t cu_offset = GetOffset();
+  if (die) {
+    DWARFRangeList ranges;
+    const size_t num_ranges =
+        die->GetAttributeAddressRanges(this, ranges, false);
+    if (num_ranges > 0) {
+      // This compile unit has DW_AT_ranges, assume this is correct if it is
+      // present since clang no longer makes .debug_aranges by default and it
+      // emits DW_AT_ranges for DW_TAG_compile_units. GCC also does this with
+      // recent GCC builds.
+      for (size_t i = 0; i < num_ranges; ++i) {
+        const DWARFRangeList::Entry &range = ranges.GetEntryRef(i);
+        debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
+                                   range.GetRangeEnd());
+      }
+
+      return; // We got all of our ranges from the DW_AT_ranges attribute
+    }
+  }
+  // We don't have a DW_AT_ranges attribute, so we need to parse the DWARF
+
+  // If the DIEs weren't parsed, then we don't want all dies for all compile
+  // units to stay loaded when they weren't needed. So we can end up parsing
+  // the DWARF and then throwing them all away to keep memory usage down.
+  ScopedExtractDIEs clear_dies(ExtractDIEsScoped());
+
+  die = DIEPtr();
+  if (die)
+    die->BuildAddressRangeTable(this, debug_aranges);
+
+  if (debug_aranges->GetNumRanges() == num_debug_aranges) {
+    // We got nothing from the functions, maybe we have a line tables only
+    // situation. Check the line tables and build the arange table from this.
+    SymbolContext sc;
+    sc.comp_unit = m_dwarf->GetCompUnitForDWARFCompUnit(this);
+    if (sc.comp_unit) {
+      SymbolFileDWARFDebugMap *debug_map_sym_file =
+          m_dwarf->GetDebugMapSymfile();
+      if (debug_map_sym_file == nullptr) {
+        if (LineTable *line_table = sc.comp_unit->GetLineTable()) {
+          LineTable::FileAddressRanges file_ranges;
+          const bool append = true;
+          const size_t num_ranges =
+              line_table->GetContiguousFileAddressRanges(file_ranges, append);
+          for (uint32_t idx = 0; idx < num_ranges; ++idx) {
+            const LineTable::FileAddressRanges::Entry &range =
+                file_ranges.GetEntryRef(idx);
+            debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
+                                       range.GetRangeEnd());
+          }
+        }
+      } else
+        debug_map_sym_file->AddOSOARanges(m_dwarf, debug_aranges);
+    }
+  }
+
+  if (debug_aranges->GetNumRanges() == num_debug_aranges) {
+    // We got nothing from the functions, maybe we have a line tables only
+    // situation. Check the line tables and build the arange table from this.
+    SymbolContext sc;
+    sc.comp_unit = m_dwarf->GetCompUnitForDWARFCompUnit(this);
+    if (sc.comp_unit) {
+      if (LineTable *line_table = sc.comp_unit->GetLineTable()) {
+        LineTable::FileAddressRanges file_ranges;
+        const bool append = true;
+        const size_t num_ranges =
+            line_table->GetContiguousFileAddressRanges(file_ranges, append);
+        for (uint32_t idx = 0; idx < num_ranges; ++idx) {
+          const LineTable::FileAddressRanges::Entry &range =
+              file_ranges.GetEntryRef(idx);
+          debug_aranges->AppendRange(GetOffset(), range.GetRangeBase(),
+                                     range.GetRangeEnd());
+        }
+      }
+    }
+  }
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h
index 8ed2656dc7c8f..dc5fe25b40f27 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h
@@ -14,6 +14,8 @@
 
 class DWARFCompileUnit : public DWARFUnit {
 public:
+  void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override;
+
   void Dump(lldb_private::Stream *s) const override;
 
 private:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
index 0b2e0a8e438a6..e146e6bf50339 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
@@ -14,6 +14,8 @@
 
 class DWARFTypeUnit : public DWARFUnit {
 public:
+  void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) override {}
+
   void Dump(lldb_private::Stream *s) const override;
 
 private:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index c3b9a1931f703..59202061436ff 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -10,8 +10,6 @@
 
 #include "lldb/Core/Module.h"
 #include "lldb/Host/StringConvert.h"
-#include "lldb/Symbol/CompileUnit.h"
-#include "lldb/Symbol/LineTable.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Utility/LLDBAssert.h"
 #include "lldb/Utility/StreamString.h"
@@ -23,7 +21,6 @@
 #include "DWARFDebugInfo.h"
 #include "DWARFTypeUnit.h"
 #include "LogChannelDWARF.h"
-#include "SymbolFileDWARFDebugMap.h"
 #include "SymbolFileDWARFDwo.h"
 
 using namespace lldb;
@@ -407,98 +404,6 @@ void DWARFUnit::ClearDIEsRWLocked() {
     m_dwo_symbol_file->GetCompileUnit()->ClearDIEsRWLocked();
 }
 
-void DWARFUnit::BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) {
-  // This function is usually called if there in no .debug_aranges section in
-  // order to produce a compile unit level set of address ranges that is
-  // accurate.
-
-  size_t num_debug_aranges = debug_aranges->GetNumRanges();
-
-  // First get the compile unit DIE only and check if it has a DW_AT_ranges
-  const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
-
-  const dw_offset_t cu_offset = GetOffset();
-  if (die) {
-    DWARFRangeList ranges;
-    const size_t num_ranges =
-        die->GetAttributeAddressRanges(this, ranges, false);
-    if (num_ranges > 0) {
-      // This compile unit has DW_AT_ranges, assume this is correct if it is
-      // present since clang no longer makes .debug_aranges by default and it
-      // emits DW_AT_ranges for DW_TAG_compile_units. GCC also does this with
-      // recent GCC builds.
-      for (size_t i = 0; i < num_ranges; ++i) {
-        const DWARFRangeList::Entry &range = ranges.GetEntryRef(i);
-        debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
-                                   range.GetRangeEnd());
-      }
-
-      return; // We got all of our ranges from the DW_AT_ranges attribute
-    }
-  }
-  // We don't have a DW_AT_ranges attribute, so we need to parse the DWARF
-
-  // If the DIEs weren't parsed, then we don't want all dies for all compile
-  // units to stay loaded when they weren't needed. So we can end up parsing
-  // the DWARF and then throwing them all away to keep memory usage down.
-  ScopedExtractDIEs clear_dies(ExtractDIEsScoped());
-
-  die = DIEPtr();
-  if (die)
-    die->BuildAddressRangeTable(this, debug_aranges);
-
-  if (debug_aranges->GetNumRanges() == num_debug_aranges) {
-    // We got nothing from the functions, maybe we have a line tables only
-    // situation. Check the line tables and build the arange table from this.
-    SymbolContext sc;
-    sc.comp_unit = m_dwarf->GetCompUnitForDWARFCompUnit(this);
-    if (sc.comp_unit) {
-      SymbolFileDWARFDebugMap *debug_map_sym_file =
-          m_dwarf->GetDebugMapSymfile();
-      if (debug_map_sym_file == NULL) {
-        LineTable *line_table = sc.comp_unit->GetLineTable();
-
-        if (line_table) {
-          LineTable::FileAddressRanges file_ranges;
-          const bool append = true;
-          const size_t num_ranges =
-              line_table->GetContiguousFileAddressRanges(file_ranges, append);
-          for (uint32_t idx = 0; idx < num_ranges; ++idx) {
-            const LineTable::FileAddressRanges::Entry &range =
-                file_ranges.GetEntryRef(idx);
-            debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
-                                       range.GetRangeEnd());
-          }
-        }
-      } else
-        debug_map_sym_file->AddOSOARanges(m_dwarf, debug_aranges);
-    }
-  }
-
-  if (debug_aranges->GetNumRanges() == num_debug_aranges) {
-    // We got nothing from the functions, maybe we have a line tables only
-    // situation. Check the line tables and build the arange table from this.
-    SymbolContext sc;
-    sc.comp_unit = m_dwarf->GetCompUnitForDWARFCompUnit(this);
-    if (sc.comp_unit) {
-      LineTable *line_table = sc.comp_unit->GetLineTable();
-
-      if (line_table) {
-        LineTable::FileAddressRanges file_ranges;
-        const bool append = true;
-        const size_t num_ranges =
-            line_table->GetContiguousFileAddressRanges(file_ranges, append);
-        for (uint32_t idx = 0; idx < num_ranges; ++idx) {
-          const LineTable::FileAddressRanges::Entry &range =
-              file_ranges.GetEntryRef(idx);
-          debug_aranges->AppendRange(GetOffset(), range.GetRangeBase(),
-                                     range.GetRangeEnd());
-        }
-      }
-    }
-  }
-}
-
 lldb::ByteOrder DWARFUnit::GetByteOrder() const {
   return m_dwarf->GetObjectFile()->GetByteOrder();
 }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 6b6fffc1e7b09..927ca2ed8de8b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -143,7 +143,7 @@ class DWARFUnit : public lldb_private::UserID {
   void SetRangesBase(dw_addr_t ranges_base);
   void SetBaseObjOffset(dw_offset_t base_obj_offset);
   void SetStrOffsetsBase(dw_offset_t str_offsets_base);
-  void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges);
+  virtual void BuildAddressRangeTable(DWARFDebugAranges *debug_aranges) = 0;
 
   lldb::ByteOrder GetByteOrder() const;
 
@@ -215,6 +215,24 @@ class DWARFUnit : public lldb_private::UserID {
                             const lldb_private::DWARFDataExtractor &data,
                             lldb::offset_t *offset_ptr);
 
+  // Get the DWARF unit DWARF debug information entry. Parse the single DIE
+  // if needed.
+  const DWARFDebugInfoEntry *GetUnitDIEPtrOnly() {
+    ExtractUnitDIEIfNeeded();
+    // m_first_die_mutex is not required as m_first_die is never cleared.
+    if (!m_first_die)
+      return NULL;
+    return &m_first_die;
+  }
+
+  // Get all DWARF debug informration entries. Parse all DIEs if needed.
+  const DWARFDebugInfoEntry *DIEPtr() {
+    ExtractDIEsIfNeeded();
+    if (m_die_array.empty())
+      return NULL;
+    return &m_die_array[0];
+  }
+
   SymbolFileDWARF *m_dwarf = nullptr;
   std::unique_ptr<SymbolFileDWARFDwo> m_dwo_symbol_file;
   DWARFUnitHeader m_header;
@@ -257,24 +275,6 @@ class DWARFUnit : public lldb_private::UserID {
   void ExtractDIEsRWLocked();
   void ClearDIEsRWLocked();
 
-  // Get the DWARF unit DWARF debug informration entry. Parse the single DIE
-  // if needed.
-  const DWARFDebugInfoEntry *GetUnitDIEPtrOnly() {
-    ExtractUnitDIEIfNeeded();
-    // m_first_die_mutex is not required as m_first_die is never cleared.
-    if (!m_first_die)
-      return NULL;
-    return &m_first_die;
-  }
-
-  // Get all DWARF debug informration entries. Parse all DIEs if needed.
-  const DWARFDebugInfoEntry *DIEPtr() {
-    ExtractDIEsIfNeeded();
-    if (m_die_array.empty())
-      return NULL;
-    return &m_die_array[0];
-  }
-
   void AddUnitDIE(const DWARFDebugInfoEntry &cu_die);
 
   void ComputeCompDirAndGuessPathStyle();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index 52d2d061b5dd4..426719bf0b293 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -62,7 +62,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
   friend class SymbolFileDWARFDwo;
   friend class DebugMapModule;
   friend struct DIERef;
-  friend class DWARFUnit;
+  friend class DWARFCompileUnit;
   friend class DWARFDIE;
   friend class DWARFASTParserClang;
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
index b7dc2bcb2a892..13813cd8cb28c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h
@@ -135,7 +135,7 @@ class SymbolFileDWARFDebugMap : public lldb_private::SymbolFile {
   friend class DebugMapModule;
   friend struct DIERef;
   friend class DWARFASTParserClang;
-  friend class DWARFUnit;
+  friend class DWARFCompileUnit;
   friend class SymbolFileDWARF;
   struct OSOInfo {
     lldb::ModuleSP module_sp;

From e98a8f7b2a8625e99d1024cf100933a6e8551bae Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 23 May 2019 09:18:57 +0000
Subject: [PATCH 0002/1176] [llvm-objcopy] - Many minor NFC changes to
 cleanup/improve the code in ELF/Object.cpp.

The code in ELF/Object.cpp is sometimes a bit hard to read because of
lots of auto used everywhere. The main intention of this patch is
to replace them with the real type for places where it is not obvious.
Also it cleanups few places.

It is NFC change, but I want to be sure that there is no objections to do that since it
is massive.

DIfferential revision: https://reviews.llvm.org/D62260

llvm-svn: 361466
---
 llvm/tools/llvm-objcopy/ELF/Object.cpp | 191 +++++++++++--------------
 1 file changed, 85 insertions(+), 106 deletions(-)

diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index b0f7c1e3c35fc..0c80bad6c102d 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -37,8 +37,8 @@ using namespace object;
 using namespace ELF;
 
 template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
-  uint8_t *B = Buf.getBufferStart();
-  B += Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr);
+  uint8_t *B = Buf.getBufferStart() + Obj.ProgramHdrSegment.Offset +
+               Seg.Index * sizeof(Elf_Phdr);
   Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B);
   Phdr.p_type = Seg.Type;
   Phdr.p_flags = Seg.Flags;
@@ -67,8 +67,7 @@ void SectionBase::replaceSectionReferences(
     const DenseMap<SectionBase *, SectionBase *> &) {}
 
 template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
-  uint8_t *B = Buf.getBufferStart();
-  B += Sec.HeaderOffset;
+  uint8_t *B = Buf.getBufferStart() + Sec.HeaderOffset;
   Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
   Shdr.sh_name = Sec.NameIndex;
   Shdr.sh_type = Sec.Type;
@@ -144,10 +143,8 @@ void BinarySectionWriter::visit(const GroupSection &Sec) {
 }
 
 void SectionWriter::visit(const Section &Sec) {
-  if (Sec.Type == SHT_NOBITS)
-    return;
-  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  llvm::copy(Sec.Contents, Buf);
+  if (Sec.Type != SHT_NOBITS)
+    llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
 }
 
 void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
@@ -155,8 +152,7 @@ void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
 void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
 
 void SectionWriter::visit(const OwnedDataSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  llvm::copy(Sec.Data, Buf);
+  llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
 }
 
 static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
@@ -227,9 +223,7 @@ void BinarySectionWriter::visit(const CompressedSection &Sec) {
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart();
-  Buf += Sec.Offset;
-
+  uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
   if (Sec.CompressionType == DebugCompressionType::None) {
     std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
     return;
@@ -323,8 +317,7 @@ void StringTableSection::accept(MutableSectionVisitor &Visitor) {
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
   uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-  auto *IndexesBuffer = reinterpret_cast<Elf_Word *>(Buf);
-  llvm::copy(Sec.Indexes, IndexesBuffer);
+  llvm::copy(Sec.Indexes, reinterpret_cast<Elf_Word *>(Buf));
 }
 
 void SectionIndexSection::initialize(SectionTableRef SecTable) {
@@ -481,7 +474,7 @@ void SymbolTableSection::initialize(SectionTableRef SecTable) {
 
 void SymbolTableSection::finalize() {
   uint32_t MaxLocalIndex = 0;
-  for (auto &Sym : Symbols) {
+  for (std::unique_ptr<Symbol> &Sym : Symbols) {
     Sym->NameIndex =
         SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name);
     if (Sym->Binding == STB_LOCAL)
@@ -504,7 +497,7 @@ void SymbolTableSection::prepareForLayout() {
   // If the symbol names section has been removed, don't try to add strings to
   // the table.
   if (SymbolNames != nullptr)
-    for (auto &Sym : Symbols)
+    for (std::unique_ptr<Symbol> &Sym : Symbols)
       SymbolNames->addString(Sym->Name);
 }
 
@@ -513,7 +506,7 @@ void SymbolTableSection::fillShndxTable() {
     return;
   // Fill section index table with real section indexes. This function must
   // be called after assignOffsets.
-  for (const auto &Sym : Symbols) {
+  for (const std::unique_ptr<Symbol> &Sym : Symbols) {
     if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE)
       SectionIndexTable->addIndex(Sym->DefinedIn->Index);
     else
@@ -534,11 +527,9 @@ Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) {
 
 template <class ELFT>
 void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
-  uint8_t *Buf = Out.getBufferStart();
-  Buf += Sec.Offset;
-  Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Buf);
+  Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Out.getBufferStart() + Sec.Offset);
   // Loop though symbols setting each entry of the symbol table.
-  for (auto &Symbol : Sec.Symbols) {
+  for (const std::unique_ptr<Symbol> &Symbol : Sec.Symbols) {
     Sym->st_name = Symbol->NameIndex;
     Sym->st_value = Symbol->Value;
     Sym->st_size = Symbol->Size;
@@ -671,8 +662,7 @@ void RelocationSection::replaceSectionReferences(
 }
 
 void SectionWriter::visit(const DynamicRelocationSection &Sec) {
-  llvm::copy(Sec.Contents,
-            Out.getBufferStart() + Sec.Offset);
+  llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
 }
 
 void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
@@ -680,33 +670,32 @@ void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
 }
 
 void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
-  Visitor.visit(*this);
-}
-
-Error DynamicRelocationSection::removeSectionReferences(
-    bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
-  if (ToRemove(Symbols)) {
-    if (!AllowBrokenLinks)
-      return createStringError(
-          llvm::errc::invalid_argument,
+  Visitor.visit(*this);
+}
+
+Error DynamicRelocationSection::removeSectionReferences(
+    bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(Symbols)) {
+    if (!AllowBrokenLinks)
+      return createStringError(
+          llvm::errc::invalid_argument,
           "symbol table '%s' cannot be removed because it is "
           "referenced by the relocation section '%s'",
-          Symbols->Name.data(), this->Name.data());
-    Symbols = nullptr;
-  }
-
-  // SecToApplyRel contains a section referenced by sh_info field. It keeps
-  // a section to which the relocation section applies. When we remove any
-  // sections we also remove their relocation sections. Since we do that much
-  // earlier, this assert should never be triggered.
-  assert(!SecToApplyRel || !ToRemove(SecToApplyRel));
-
-  return Error::success();
-}
-
-Error Section::removeSectionReferences(bool AllowBrokenDependency,
-    function_ref<bool(const SectionBase *)> ToRemove) {
-  if (ToRemove(LinkSection)) {
+          Symbols->Name.data(), this->Name.data());
+    Symbols = nullptr;
+  }
+
+  // SecToApplyRel contains a section referenced by sh_info field. It keeps
+  // a section to which the relocation section applies. When we remove any
+  // sections we also remove their relocation sections. Since we do that much
+  // earlier, this assert should never be triggered.
+  assert(!SecToApplyRel || !ToRemove(SecToApplyRel));
+  return Error::success();
+}
+
+Error Section::removeSectionReferences(bool AllowBrokenDependency,
+    function_ref<bool(const SectionBase *)> ToRemove) {
+  if (ToRemove(LinkSection)) {
     if (!AllowBrokenDependency)
       return createStringError(llvm::errc::invalid_argument,
                                "section '%s' cannot be removed because it is "
@@ -744,13 +733,13 @@ void GroupSection::replaceSectionReferences(
 }
 
 void Section::initialize(SectionTableRef SecTable) {
-  if (Link != ELF::SHN_UNDEF) {
-    LinkSection =
-        SecTable.getSection(Link, "Link field value " + Twine(Link) +
-                                      " in section " + Name + " is invalid");
-    if (LinkSection->Type == ELF::SHT_SYMTAB)
-      LinkSection = nullptr;
-  }
+  if (Link == ELF::SHN_UNDEF)
+    return;
+  LinkSection =
+      SecTable.getSection(Link, "Link field value " + Twine(Link) +
+                                    " in section " + Name + " is invalid");
+  if (LinkSection->Type == ELF::SHT_SYMTAB)
+    LinkSection = nullptr;
 }
 
 void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; }
@@ -800,7 +789,7 @@ void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
   ELF::Elf32_Word *Buf =
       reinterpret_cast<ELF::Elf32_Word *>(Out.getBufferStart() + Sec.Offset);
   *Buf++ = Sec.FlagWord;
-  for (const auto *S : Sec.GroupMembers)
+  for (SectionBase *S : Sec.GroupMembers)
     support::endian::write32<ELFT::TargetEndianness>(Buf++, S->Index);
 }
 
@@ -908,16 +897,15 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
 }
 
 void BinaryELFBuilder::initSections() {
-  for (auto &Section : Obj->sections()) {
+  for (SectionBase &Section : Obj->sections())
     Section.initialize(Obj->sections());
-  }
 }
 
 std::unique_ptr<Object> BinaryELFBuilder::build() {
   initFileHeader();
   initHeaderSegment();
-  StringTableSection *StrTab = addStrTab();
-  SymbolTableSection *SymTab = addSymTab(StrTab);
+
+  SymbolTableSection *SymTab = addSymTab(addStrTab());
   initSections();
   addData(SymTab);
 
@@ -925,7 +913,7 @@ std::unique_ptr<Object> BinaryELFBuilder::build() {
 }
 
 template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
-  for (auto &Parent : Obj.segments()) {
+  for (Segment &Parent : Obj.segments()) {
     // Every segment will overlap with itself but we don't want a segment to
     // be it's own parent so we avoid that situation.
     if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) {
@@ -956,7 +944,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
     Seg.MemSize = Phdr.p_memsz;
     Seg.Align = Phdr.p_align;
     Seg.Index = Index++;
-    for (auto &Section : Obj.sections()) {
+    for (SectionBase &Section : Obj.sections()) {
       if (sectionWithinSegment(Section, Seg)) {
         Seg.addSection(&Section);
         if (!Section.ParentSegment ||
@@ -987,7 +975,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
 
   // Now we do an O(n^2) loop through the segments in order to match up
   // segments.
-  for (auto &Child : Obj.segments())
+  for (Segment &Child : Obj.segments())
     setParentSegment(Child);
   setParentSegment(ElfHdr);
   setParentSegment(PrHdr);
@@ -998,14 +986,14 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
   if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0)
     error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" +
           GroupSec->Name + "'");
-  auto SecTable = Obj.sections();
+  SectionTableRef SecTable = Obj.sections();
   auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>(
       GroupSec->Link,
       "link field value '" + Twine(GroupSec->Link) + "' in section '" +
           GroupSec->Name + "' is invalid",
       "link field value '" + Twine(GroupSec->Link) + "' in section '" +
           GroupSec->Name + "' is not a symbol table");
-  auto Sym = SymTab->getSymbolByIndex(GroupSec->Info);
+  Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info);
   if (!Sym)
     error("info field value '" + Twine(GroupSec->Info) + "' in section '" +
           GroupSec->Name + "' is not a valid symbol index");
@@ -1294,8 +1282,7 @@ std::unique_ptr<Object> ELFReader::create() const {
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
-  uint8_t *B = Buf.getBufferStart();
-  Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
+  Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(Buf.getBufferStart());
   std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
   Ehdr.e_ident[EI_MAG0] = 0x7f;
   Ehdr.e_ident[EI_MAG1] = 'E';
@@ -1357,10 +1344,10 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdrs() {
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
-  uint8_t *B = Buf.getBufferStart() + Obj.SHOffset;
   // This reference serves to write the dummy section header at the begining
   // of the file. It is not used for anything else
-  Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
+  Elf_Shdr &Shdr =
+      *reinterpret_cast<Elf_Shdr *>(Buf.getBufferStart() + Obj.SHOffset);
   Shdr.sh_name = 0;
   Shdr.sh_type = SHT_NULL;
   Shdr.sh_flags = 0;
@@ -1381,12 +1368,12 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
   Shdr.sh_addralign = 0;
   Shdr.sh_entsize = 0;
 
-  for (auto &Sec : Obj.sections())
+  for (SectionBase &Sec : Obj.sections())
     writeShdr(Sec);
 }
 
 template <class ELFT> void ELFWriter<ELFT>::writeSectionData() {
-  for (auto &Sec : Obj.sections())
+  for (SectionBase &Sec : Obj.sections())
     // Segments are responsible for writing their contents, so only write the
     // section data if the section is not in a segment. Note that this renders
     // sections in segments effectively immutable.
@@ -1409,8 +1396,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
       continue;
     uint64_t Offset =
         Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset;
-    uint8_t *B = Buf.getBufferStart();
-    std::memset(B + Offset, 0, Sec.Size);
+    std::memset(Buf.getBufferStart() + Offset, 0, Sec.Size);
   }
 }
 
@@ -1515,20 +1501,20 @@ static uint64_t layoutSegments(std::vector<Segment *> &Segments,
   // then it's acceptable, but not ideal, to simply move it to after the
   // segments. So we can simply layout segments one after the other accounting
   // for alignment.
-  for (auto &Segment : Segments) {
+  for (Segment *Seg : Segments) {
     // We assume that segments have been ordered by OriginalOffset and Index
     // such that a parent segment will always come before a child segment in
     // OrderedSegments. This means that the Offset of the ParentSegment should
     // already be set and we can set our offset relative to it.
-    if (Segment->ParentSegment != nullptr) {
-      auto Parent = Segment->ParentSegment;
-      Segment->Offset =
-          Parent->Offset + Segment->OriginalOffset - Parent->OriginalOffset;
+    if (Seg->ParentSegment != nullptr) {
+      Segment *Parent = Seg->ParentSegment;
+      Seg->Offset =
+          Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset;
     } else {
-      Offset = alignToAddr(Offset, Segment->VAddr, Segment->Align);
-      Segment->Offset = Offset;
+      Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align);
+      Seg->Offset = Offset;
     }
-    Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
+    Offset = std::max(Offset, Seg->Offset + Seg->FileSize);
   }
   return Offset;
 }
@@ -1565,7 +1551,7 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
 }
 
 template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
-  auto &ElfHdr = Obj.ElfHdrSegment;
+  Segment &ElfHdr = Obj.ElfHdrSegment;
   ElfHdr.Type = PT_PHDR;
   ElfHdr.Flags = 0;
   ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
@@ -1580,7 +1566,7 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
   // so that we know that anytime ->ParentSegment is set that segment has
   // already had its offset properly set.
   std::vector<Segment *> OrderedSegments;
-  for (auto &Segment : Obj.segments())
+  for (Segment &Segment : Obj.segments())
     OrderedSegments.push_back(&Segment);
   OrderedSegments.push_back(&Obj.ElfHdrSegment);
   OrderedSegments.push_back(&Obj.ProgramHdrSegment);
@@ -1635,7 +1621,7 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
   // we go to see if we will actully need large indexes.
   bool NeedsLargeIndexes = false;
   if (Obj.sections().size() >= SHN_LORESERVE) {
-    auto Sections = Obj.sections();
+    SectionTableRef Sections = Obj.sections();
     NeedsLargeIndexes =
         std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(),
                     [](const SectionBase &Sec) { return Sec.HasSymbol; });
@@ -1693,7 +1679,7 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
 
   // Now that all strings are added we want to finalize string table builders,
   // because that affects section sizes which in turn affects section offsets.
-  for (auto &Sec : Obj.sections())
+  for (SectionBase &Sec : Obj.sections())
     if (auto StrTab = dyn_cast<StringTableSection>(&Sec))
       StrTab->prepareForLayout();
 
@@ -1707,7 +1693,7 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
   // Finally now that all offsets and indexes have been set we can finalize any
   // remaining issues.
   uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr);
-  for (auto &Section : Obj.sections()) {
+  for (SectionBase &Section : Obj.sections()) {
     Section.HeaderOffset = Offset;
     Offset += sizeof(Elf_Shdr);
     if (WriteSectionHeaders)
@@ -1722,11 +1708,9 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
 }
 
 Error BinaryWriter::write() {
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) == 0)
-      continue;
-    Section.accept(*SecWriter);
-  }
+  for (auto &Section : Obj.sections())
+    if (Section.Flags & SHF_ALLOC)
+      Section.accept(*SecWriter);
   return Buf.commit();
 }
 
@@ -1739,11 +1723,9 @@ Error BinaryWriter::finalize() {
   // already had it's offset properly set. We only want to consider the segments
   // that will affect layout of allocated sections so we only add those.
   std::vector<Segment *> OrderedSegments;
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr) {
+  for (SectionBase &Section : Obj.sections())
+    if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr)
       OrderedSegments.push_back(Section.ParentSegment);
-    }
-  }
 
   // For binary output, we're going to use physical addresses instead of
   // virtual addresses, since a binary output is used for cases like ROM
@@ -1770,8 +1752,8 @@ Error BinaryWriter::finalize() {
   // our layout algorithm to proceed as expected while not writing out the gap
   // at the start.
   if (!OrderedSegments.empty()) {
-    auto Seg = OrderedSegments[0];
-    auto Sec = Seg->firstSection();
+    Segment *Seg = OrderedSegments[0];
+    const SectionBase *Sec = Seg->firstSection();
     auto Diff = Sec->OriginalOffset - Seg->OriginalOffset;
     Seg->OriginalOffset += Diff;
     // The size needs to be shrunk as well.
@@ -1780,7 +1762,7 @@ Error BinaryWriter::finalize() {
     // section.
     Seg->PAddr += Diff;
     uint64_t LowestPAddr = Seg->PAddr;
-    for (auto &Segment : OrderedSegments) {
+    for (Segment *Segment : OrderedSegments) {
       Segment->Offset = Segment->PAddr - LowestPAddr;
       Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
     }
@@ -1791,11 +1773,9 @@ Error BinaryWriter::finalize() {
   // not hold. Then pass such a range to LayoutSections instead of constructing
   // AllocatedSections here.
   std::vector<SectionBase *> AllocatedSections;
-  for (auto &Section : Obj.sections()) {
-    if ((Section.Flags & SHF_ALLOC) == 0)
-      continue;
-    AllocatedSections.push_back(&Section);
-  }
+  for (SectionBase &Section : Obj.sections())
+    if (Section.Flags & SHF_ALLOC)
+      AllocatedSections.push_back(&Section);
   layoutSections(make_pointee_range(AllocatedSections), Offset);
 
   // Now that every section has been laid out we just need to compute the total
@@ -1803,10 +1783,9 @@ Error BinaryWriter::finalize() {
   // LayoutSections, because we want to truncate the last segment to the end of
   // its last section, to match GNU objcopy's behaviour.
   TotalSize = 0;
-  for (const auto &Section : AllocatedSections) {
+  for (SectionBase *Section : AllocatedSections)
     if (Section->Type != SHT_NOBITS)
       TotalSize = std::max(TotalSize, Section->Offset + Section->Size);
-  }
 
   if (Error E = Buf.allocate(TotalSize))
     return E;

From 50434e8df0a32f3f1f7d8de2ef995d3509c33036 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 23 May 2019 09:20:08 +0000
Subject: [PATCH 0003/1176] Enable queue_t and clk_event_t comparisons in C++
 mode

Support queue_t and clk_event_t comparisons in C++ for OpenCL mode, to
preserve backwards compatibility with OpenCL C.

Differential Revision: https://reviews.llvm.org/D62208

llvm-svn: 361467
---
 clang/lib/Sema/SemaExpr.cpp          | 2 +-
 clang/test/SemaOpenCL/clk_event_t.cl | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fbbdc666dc979..410d38cf5b6f8 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10808,7 +10808,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
     return computeResultTy();
   }
 
-  if (getLangOpts().OpenCLVersion >= 200) {
+  if (getLangOpts().OpenCLVersion >= 200 || getLangOpts().OpenCLCPlusPlus) {
     if (LHSType->isClkEventT() && RHSType->isClkEventT()) {
       return computeResultTy();
     }
diff --git a/clang/test/SemaOpenCL/clk_event_t.cl b/clang/test/SemaOpenCL/clk_event_t.cl
index b73daf92fa006..4a884bcfa6c0a 100644
--- a/clang/test/SemaOpenCL/clk_event_t.cl
+++ b/clang/test/SemaOpenCL/clk_event_t.cl
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=c++
 
 // Taken from opencl-c.h
 #define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t))

From 39192043bbfca8d4fe8562e2a2105012edaff8c3 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Thu, 23 May 2019 09:22:43 +0000
Subject: [PATCH 0004/1176] Delete default constructors, copy constructors,
 move constructors, copy assignment, move assignment operators on Expr, Stmt
 and Decl

Reviewers: ilya-biryukov, rsmith

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62187

llvm-svn: 361468
---
 clang/include/clang/AST/DeclBase.h |  7 +++++++
 clang/include/clang/AST/Expr.h     |  7 +++++++
 clang/include/clang/AST/Stmt.h     | 11 ++++++-----
 clang/lib/CodeGen/CGBuiltin.cpp    | 19 ++++++++-----------
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 674e0f4a59b2e..cebf2973c8c3c 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -368,6 +368,13 @@ class alignas(8) Decl {
     return ModuleOwnershipKind::Unowned;
   }
 
+public:
+  Decl() = delete;
+  Decl(const Decl&) = delete;
+  Decl(Decl &&) = delete;
+  Decl &operator=(const Decl&) = delete;
+  Decl &operator=(Decl&&) = delete;
+
 protected:
   Decl(Kind DK, DeclContext *DC, SourceLocation L)
       : NextInContextAndBits(nullptr, getModuleOwnershipKindForChildOf(DC)),
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 143eaae37b2af..96cb8e8f1a81a 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -108,6 +108,13 @@ struct SubobjectAdjustment {
 class Expr : public ValueStmt {
   QualType TR;
 
+public:
+  Expr() = delete;
+  Expr(const Expr&) = delete;
+  Expr(Expr &&) = delete;
+  Expr &operator=(const Expr&) = delete;
+  Expr &operator=(Expr&&) = delete;
+
 protected:
   Expr(StmtClass SC, QualType T, ExprValueKind VK, ExprObjectKind OK,
        bool TD, bool VD, bool ID, bool ContainsUnexpandedParameterPack)
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index a6ab1851fefaf..8834a60cd6c99 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -1040,6 +1040,12 @@ class alignas(void *) Stmt {
   explicit Stmt(StmtClass SC, EmptyShell) : Stmt(SC) {}
 
 public:
+  Stmt() = delete;
+  Stmt(const Stmt &) = delete;
+  Stmt(Stmt &&) = delete;
+  Stmt &operator=(const Stmt &) = delete;
+  Stmt &operator=(Stmt &&) = delete;
+
   Stmt(StmtClass SC) {
     static_assert(sizeof(*this) <= 8,
                   "changing bitfields changed sizeof(Stmt)");
@@ -1054,11 +1060,6 @@ class alignas(void *) Stmt {
     return static_cast<StmtClass>(StmtBits.sClass);
   }
 
-  Stmt(const Stmt &) = delete;
-  Stmt(Stmt &&) = delete;
-  Stmt &operator=(const Stmt &) = delete;
-  Stmt &operator=(Stmt &&) = delete;
-
   const char *getStmtClassName() const;
 
   bool isOMPStructuredBlock() const { return StmtBits.IsOMPStructuredBlock; }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 34537d3f3ef17..d59e0fc960b3e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1134,9 +1134,10 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
     return F;
 
   llvm::SmallVector<QualType, 4> ArgTys;
-  llvm::SmallVector<ImplicitParamDecl, 4> Params;
-  Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
-                      Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(ImplicitParamDecl::Create(
+      Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
+      ImplicitParamDecl::Other));
   ArgTys.emplace_back(Ctx.VoidPtrTy);
 
   for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
@@ -1145,17 +1146,13 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
       continue;
 
     QualType ArgTy = getOSLogArgType(Ctx, Size);
-    Params.emplace_back(
+    Args.push_back(ImplicitParamDecl::Create(
         Ctx, nullptr, SourceLocation(),
         &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
-        ImplicitParamDecl::Other);
+        ImplicitParamDecl::Other));
     ArgTys.emplace_back(ArgTy);
   }
 
-  FunctionArgList Args;
-  for (auto &P : Params)
-    Args.push_back(&P);
-
   QualType ReturnTy = Ctx.VoidTy;
   QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
 
@@ -1188,7 +1185,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
   auto AL = ApplyDebugLocation::CreateArtificial(*this);
 
   CharUnits Offset;
-  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"),
+  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"),
                   BufferAlignment);
   Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
                       Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
@@ -1208,7 +1205,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
     if (!Size.getQuantity())
       continue;
 
-    Address Arg = GetAddrOfLocalVar(&Params[I]);
+    Address Arg = GetAddrOfLocalVar(Args[I]);
     Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
     Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
                                  "argDataCast");

From 0baaf45be707c9e13e9f4d74001cb87701a519c1 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Thu, 23 May 2019 09:26:27 +0000
Subject: [PATCH 0005/1176] Move SymbolTable::addCombinedLTOObject() to
 LinkerDriver.

Also renames it LinkerDriver::compileBitcodeFiles.

The function doesn't logically belong to SymbolTable. We added this
function to the symbol table because symbol table used to be a
container of input files. This is no longer the case.

Differential Revision: https://reviews.llvm.org/D62291

llvm-svn: 361469
---
 lld/ELF/Driver.cpp      | 25 ++++++++++++++++++++++++-
 lld/ELF/Driver.h        |  5 +++++
 lld/ELF/SymbolTable.cpp | 28 ----------------------------
 lld/ELF/SymbolTable.h   |  5 -----
 4 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index cad182ea3ee47..6bd00e46b9143 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1436,6 +1436,29 @@ template <class ELFT> static Symbol *addUndefined(StringRef Name) {
       Undefined{nullptr, Name, STB_GLOBAL, STV_DEFAULT, 0});
 }
 
+// This function is where all the optimizations of link-time
+// optimization takes place. When LTO is in use, some input files are
+// not in native object file format but in the LLVM bitcode format.
+// This function compiles bitcode files into a few big native files
+// using LLVM functions and replaces bitcode symbols with the results.
+// Because all bitcode files that the program consists of are passed to
+// the compiler at once, it can do a whole-program optimization.
+template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
+  // Compile bitcode files and replace bitcode symbols.
+  LTO.reset(new BitcodeCompiler);
+  for (BitcodeFile *File : BitcodeFiles)
+    LTO->add(*File);
+
+  for (InputFile *File : LTO->compile()) {
+    DenseMap<CachedHashStringRef, const InputFile *> DummyGroups;
+    auto *Obj = cast<ObjFile<ELFT>>(File);
+    Obj->parse(DummyGroups);
+    for (Symbol *Sym : Obj->getGlobalSymbols())
+      Sym->parseSymbolVersion();
+    ObjectFiles.push_back(File);
+  }
+}
+
 // The --wrap option is a feature to rename symbols so that you can write
 // wrappers for existing functions. If you pass `-wrap=foo`, all
 // occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are
@@ -1645,7 +1668,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
   //
   // With this the symbol table should be complete. After this, no new names
   // except a few linker-synthesized ones will be added to the symbol table.
-  Symtab->addCombinedLTOObject<ELFT>();
+  compileBitcodeFiles<ELFT>();
   if (errorCount())
     return;
 
diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h
index 91d52c63723d3..76b91be211951 100644
--- a/lld/ELF/Driver.h
+++ b/lld/ELF/Driver.h
@@ -9,6 +9,7 @@
 #ifndef LLD_ELF_DRIVER_H
 #define LLD_ELF_DRIVER_H
 
+#include "LTO.h"
 #include "SymbolTable.h"
 #include "lld/Common/LLVM.h"
 #include "lld/Common/Reproduce.h"
@@ -33,6 +34,7 @@ class LinkerDriver {
   void createFiles(llvm::opt::InputArgList &Args);
   void inferMachineType();
   template <class ELFT> void link(llvm::opt::InputArgList &Args);
+  template <class ELFT> void compileBitcodeFiles();
 
   // True if we are in --whole-archive and --no-whole-archive.
   bool InWholeArchive = false;
@@ -40,6 +42,9 @@ class LinkerDriver {
   // True if we are in --start-lib and --end-lib.
   bool InLib = false;
 
+  // For LTO.
+  std::unique_ptr<BitcodeCompiler> LTO;
+
   std::vector<InputFile *> Files;
 };
 
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index b6d1741f856b2..44fdb96b3e0f8 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -32,29 +32,6 @@ using namespace lld::elf;
 
 SymbolTable *elf::Symtab;
 
-// This function is where all the optimizations of link-time
-// optimization happens. When LTO is in use, some input files are
-// not in native object file format but in the LLVM bitcode format.
-// This function compiles bitcode files into a few big native files
-// using LLVM functions and replaces bitcode symbols with the results.
-// Because all bitcode files that the program consists of are passed
-// to the compiler at once, it can do whole-program optimization.
-template <class ELFT> void SymbolTable::addCombinedLTOObject() {
-  // Compile bitcode files and replace bitcode symbols.
-  LTO.reset(new BitcodeCompiler);
-  for (BitcodeFile *F : BitcodeFiles)
-    LTO->add(*F);
-
-  for (InputFile *File : LTO->compile()) {
-    DenseMap<CachedHashStringRef, const InputFile *> DummyGroups;
-    auto *Obj = cast<ObjFile<ELFT>>(File);
-    Obj->parse(DummyGroups);
-    for (Symbol *Sym : Obj->getGlobalSymbols())
-      Sym->parseSymbolVersion();
-    ObjectFiles.push_back(File);
-  }
-}
-
 // Set a flag for --trace-symbol so that we can print out a log message
 // if a new symbol with the same name is inserted into the symbol table.
 void SymbolTable::trace(StringRef Name) {
@@ -609,8 +586,3 @@ void elf::resolveSymbol(Symbol *Old, const Symbol &New) {
     llvm_unreachable("bad symbol kind");
   }
 }
-
-template void SymbolTable::addCombinedLTOObject<ELF32LE>();
-template void SymbolTable::addCombinedLTOObject<ELF32BE>();
-template void SymbolTable::addCombinedLTOObject<ELF64LE>();
-template void SymbolTable::addCombinedLTOObject<ELF64BE>();
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index f77d04516b901..25b73fa5481fe 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -10,7 +10,6 @@
 #define LLD_ELF_SYMBOL_TABLE_H
 
 #include "InputFiles.h"
-#include "LTO.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
@@ -40,7 +39,6 @@ class Undefined;
 // is one add* function per symbol type.
 class SymbolTable {
 public:
-  template <class ELFT> void addCombinedLTOObject();
   void wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap);
 
   ArrayRef<Symbol *> getSymbols() const { return SymVector; }
@@ -92,9 +90,6 @@ class SymbolTable {
   // can have the same name. We use this map to handle "extern C++ {}"
   // directive in version scripts.
   llvm::Optional<llvm::StringMap<std::vector<Symbol *>>> DemangledSyms;
-
-  // For LTO.
-  std::unique_ptr<BitcodeCompiler> LTO;
 };
 
 extern SymbolTable *Symtab;

From b9889bbacb4b3cf68838fb54b792435d8d9e341e Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Thu, 23 May 2019 09:41:03 +0000
Subject: [PATCH 0006/1176] [WebAssembly] Seal imports section before counting
 imports

Summary:
Before we can assign entries in the function of global index space
we need to know the total number of function and global imports
respectively.

To avoid programmer error this change seals that imports section before
assigned function and global index space.  Any attempt to add an import
after the section is sealed will assert.

The lack this such as check caused  https://reviews.llvm.org/D61876
to be reverted.  I'm also trying to craft a test case the this
failure.

Subscribers: dschuff, jgravelle-google, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62240

llvm-svn: 361470
---
 lld/wasm/SyntheticSections.cpp | 11 +++++++----
 lld/wasm/SyntheticSections.h   | 20 +++++++++++++++++---
 lld/wasm/Writer.cpp            | 16 +++++++++-------
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 6dd68f6f10a1b..198e6dbdffee1 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -92,6 +92,7 @@ void TypeSection::writeBody() {
 }
 
 uint32_t ImportSection::numImports() const {
+  assert(IsSealed);
   uint32_t NumImports = ImportedSymbols.size() + GOTSymbols.size();
   if (Config->ImportMemory)
     ++NumImports;
@@ -101,6 +102,7 @@ uint32_t ImportSection::numImports() const {
 }
 
 void ImportSection::addGOTEntry(Symbol *Sym) {
+  assert(!IsSealed);
   if (Sym->hasGOTIndex())
     return;
   Sym->setGOTIndex(NumImportedGlobals++);
@@ -108,6 +110,7 @@ void ImportSection::addGOTEntry(Symbol *Sym) {
 }
 
 void ImportSection::addImport(Symbol *Sym) {
+  assert(!IsSealed);
   ImportedSymbols.emplace_back(Sym);
   if (auto *F = dyn_cast<FunctionSymbol>(Sym))
     F->setFunctionIndex(NumImportedFunctions++);
@@ -202,7 +205,7 @@ void FunctionSection::addFunction(InputFunction *Func) {
   if (!Func->Live)
     return;
   uint32_t FunctionIndex =
-      Out.ImportSec->NumImportedFunctions + InputFunctions.size();
+      Out.ImportSec->numImportedFunctions() + InputFunctions.size();
   InputFunctions.emplace_back(Func);
   Func->setFunctionIndex(FunctionIndex);
 }
@@ -251,7 +254,7 @@ void GlobalSection::addGlobal(InputGlobal *Global) {
   if (!Global->Live)
     return;
   uint32_t GlobalIndex =
-      Out.ImportSec->NumImportedGlobals + InputGlobals.size();
+      Out.ImportSec->numImportedGlobals() + InputGlobals.size();
   LLVM_DEBUG(dbgs() << "addGlobal: " << GlobalIndex << "\n");
   Global->setGlobalIndex(GlobalIndex);
   Out.GlobalSec->InputGlobals.push_back(Global);
@@ -270,7 +273,7 @@ void EventSection::writeBody() {
 void EventSection::addEvent(InputEvent *Event) {
   if (!Event->Live)
     return;
-  uint32_t EventIndex = Out.ImportSec->NumImportedEvents + InputEvents.size();
+  uint32_t EventIndex = Out.ImportSec->numImportedEvents() + InputEvents.size();
   LLVM_DEBUG(dbgs() << "addEvent: " << EventIndex << "\n");
   Event->setEventIndex(EventIndex);
   InputEvents.push_back(Event);
@@ -457,7 +460,7 @@ void LinkingSection::addToSymtab(Symbol *Sym) {
 }
 
 unsigned NameSection::numNames() const {
-  unsigned NumNames = Out.ImportSec->NumImportedFunctions;
+  unsigned NumNames = Out.ImportSec->numImportedFunctions();
   for (const InputFunction *F : Out.FunctionSec->InputFunctions)
     if (!F->getName().empty() || !F->getDebugName().empty())
       ++NumNames;
diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index c897132c35532..ccd66326a2461 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -101,14 +101,28 @@ class ImportSection : public SyntheticSection {
   void writeBody() override;
   void addImport(Symbol *Sym);
   void addGOTEntry(Symbol *Sym);
+  void seal() { IsSealed = true; }
   uint32_t numImports() const;
+  uint32_t numImportedGlobals() const {
+    assert(IsSealed);
+    return NumImportedGlobals;
+  }
+  uint32_t numImportedFunctions() const {
+    assert(IsSealed);
+    return NumImportedFunctions;
+  }
+  uint32_t numImportedEvents() const {
+    assert(IsSealed);
+    return NumImportedEvents;
+  }
 
-  unsigned NumImportedGlobals = 0;
-  unsigned NumImportedFunctions = 0;
-  unsigned NumImportedEvents = 0;
   std::vector<const Symbol *> ImportedSymbols;
 
 protected:
+  bool IsSealed = false;
+  unsigned NumImportedGlobals = 0;
+  unsigned NumImportedFunctions = 0;
+  unsigned NumImportedEvents = 0;
   std::vector<const Symbol *> GOTSymbols;
 };
 
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index eb567ecf5c55e..01dbd82dc3549 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -439,7 +439,7 @@ void Writer::calculateExports() {
         WasmExport{FunctionTableName, WASM_EXTERNAL_TABLE, 0});
 
   unsigned FakeGlobalIndex =
-      Out.ImportSec->NumImportedGlobals + Out.GlobalSec->InputGlobals.size();
+      Out.ImportSec->numImportedGlobals() + Out.GlobalSec->InputGlobals.size();
 
   for (Symbol *Sym : Symtab->getSymbols()) {
     if (!Sym->isExported())
@@ -532,7 +532,9 @@ static void scanRelocations() {
 }
 
 void Writer::assignIndexes() {
-  assert(Out.FunctionSec->InputFunctions.empty());
+  // Seal the import section, since other index spaces such as function and
+  // global are effected by the number of imports.
+  Out.ImportSec->seal();
 
   for (InputFunction *Func : Symtab->SyntheticFunctions)
     Out.FunctionSec->addFunction(Func);
@@ -543,8 +545,6 @@ void Writer::assignIndexes() {
       Out.FunctionSec->addFunction(Func);
   }
 
-  scanRelocations();
-
   for (InputGlobal *Global : Symtab->SyntheticGlobals)
     Out.GlobalSec->addGlobal(Global);
 
@@ -724,6 +724,8 @@ void Writer::run() {
   populateTargetFeatures();
   log("-- calculateImports");
   calculateImports();
+  log("-- scanRelocations");
+  scanRelocations();
   log("-- assignIndexes");
   assignIndexes();
   log("-- calculateInitFunctions");
@@ -750,9 +752,9 @@ void Writer::run() {
     log("Defined Functions: " + Twine(Out.FunctionSec->InputFunctions.size()));
     log("Defined Globals  : " + Twine(Out.GlobalSec->InputGlobals.size()));
     log("Defined Events   : " + Twine(Out.EventSec->InputEvents.size()));
-    log("Function Imports : " + Twine(Out.ImportSec->NumImportedFunctions));
-    log("Global Imports   : " + Twine(Out.ImportSec->NumImportedGlobals));
-    log("Event Imports    : " + Twine(Out.ImportSec->NumImportedEvents));
+    log("Function Imports : " + Twine(Out.ImportSec->numImportedFunctions()));
+    log("Global Imports   : " + Twine(Out.ImportSec->numImportedGlobals()));
+    log("Event Imports    : " + Twine(Out.ImportSec->numImportedEvents()));
     for (ObjFile *File : Symtab->ObjectFiles)
       File->dumpInfo();
   }

From 691502f61e9e6e7de657b21f7c311d1cece3f99b Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 23 May 2019 09:41:39 +0000
Subject: [PATCH 0007/1176] DWARFASTParserClang: Reduce indentation

by two levels via early returns.

llvm-svn: 361471
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 3002 ++++++++---------
 1 file changed, 1495 insertions(+), 1507 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 51a37444123c6..15b91f53a80cc 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -231,1673 +231,1661 @@ static void CompleteExternalTagDeclType(ClangASTImporter &ast_importer,
 TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                                                const DWARFDIE &die, Log *log,
                                                bool *type_is_new_ptr) {
-  TypeSP type_sp;
-
   if (type_is_new_ptr)
     *type_is_new_ptr = false;
 
   AccessType accessibility = eAccessNone;
-  if (die) {
-    SymbolFileDWARF *dwarf = die.GetDWARF();
-    if (log) {
-      DWARFDIE context_die;
-      clang::DeclContext *context =
-          GetClangDeclContextContainingDIE(die, &context_die);
-
-      dwarf->GetObjectFile()->GetModule()->LogMessage(
-          log, "SymbolFileDWARF::ParseType (die = 0x%8.8x, decl_ctx = %p (die "
-               "0x%8.8x)) %s name = '%s')",
-          die.GetOffset(), static_cast<void *>(context),
-          context_die.GetOffset(), die.GetTagAsCString(), die.GetName());
-    }
-    Type *type_ptr = dwarf->GetDIEToType().lookup(die.GetDIE());
-    TypeList *type_list = dwarf->GetTypeList();
-    if (type_ptr == NULL) {
-      if (type_is_new_ptr)
-        *type_is_new_ptr = true;
+  if (!die)
+    return nullptr;
 
-      const dw_tag_t tag = die.Tag();
+  SymbolFileDWARF *dwarf = die.GetDWARF();
+  if (log) {
+    DWARFDIE context_die;
+    clang::DeclContext *context =
+        GetClangDeclContextContainingDIE(die, &context_die);
+
+    dwarf->GetObjectFile()->GetModule()->LogMessage(
+        log,
+        "SymbolFileDWARF::ParseType (die = 0x%8.8x, decl_ctx = %p (die "
+        "0x%8.8x)) %s name = '%s')",
+        die.GetOffset(), static_cast<void *>(context), context_die.GetOffset(),
+        die.GetTagAsCString(), die.GetName());
+  }
 
-      bool is_forward_declaration = false;
-      DWARFAttributes attributes;
-      const char *type_name_cstr = NULL;
-      const char *mangled_name_cstr = NULL;
-      ConstString type_name_const_str;
-      Type::ResolveState resolve_state = Type::eResolveStateUnresolved;
-      llvm::Optional<uint64_t> byte_size;
-      Declaration decl;
-
-      Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID;
-      CompilerType clang_type;
-      DWARFFormValue form_value;
+  Type *type_ptr = dwarf->GetDIEToType().lookup(die.GetDIE());
+  if (type_ptr == DIE_IS_BEING_PARSED)
+    return nullptr;
+  if (type_ptr)
+    return type_ptr->shared_from_this();
 
-      dw_attr_t attr;
-
-      switch (tag) {
-      case DW_TAG_typedef:
-      case DW_TAG_base_type:
-      case DW_TAG_pointer_type:
-      case DW_TAG_reference_type:
-      case DW_TAG_rvalue_reference_type:
-      case DW_TAG_const_type:
-      case DW_TAG_restrict_type:
-      case DW_TAG_volatile_type:
-      case DW_TAG_unspecified_type: {
-        // Set a bit that lets us know that we are currently parsing this
-        dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
-        const size_t num_attributes = die.GetAttributes(attributes);
-        uint32_t encoding = 0;
-        DWARFFormValue encoding_uid;
-
-        if (num_attributes > 0) {
-          uint32_t i;
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_decl_file:
-                decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                    form_value.Unsigned()));
-                break;
-              case DW_AT_decl_line:
-                decl.SetLine(form_value.Unsigned());
-                break;
-              case DW_AT_decl_column:
-                decl.SetColumn(form_value.Unsigned());
-                break;
-              case DW_AT_name:
-                type_name_cstr = form_value.AsCString();
-                if (type_name_cstr)
-                  type_name_const_str.SetCString(type_name_cstr);
-                break;
-              case DW_AT_byte_size:
-                byte_size = form_value.Unsigned();
-                break;
-              case DW_AT_encoding:
-                encoding = form_value.Unsigned();
-                break;
-              case DW_AT_type:
-                encoding_uid = form_value;
-                break;
-              default:
-              case DW_AT_sibling:
-                break;
-              }
-            }
-          }
-        }
+  TypeList *type_list = dwarf->GetTypeList();
+  if (type_is_new_ptr)
+    *type_is_new_ptr = true;
 
-        if (tag == DW_TAG_typedef && encoding_uid.IsValid()) {
-          // Try to parse a typedef from the DWO file first as modules can
-          // contain typedef'ed structures that have no names like:
-          //
-          //  typedef struct { int a; } Foo;
-          //
-          // In this case we will have a structure with no name and a typedef
-          // named "Foo" that points to this unnamed structure. The name in the
-          // typedef is the only identifier for the struct, so always try to
-          // get typedefs from DWO files if possible.
-          //
-          // The type_sp returned will be empty if the typedef doesn't exist in
-          // a DWO file, so it is cheap to call this function just to check.
-          //
-          // If we don't do this we end up creating a TypeSP that says this is
-          // a typedef to type 0x123 (the DW_AT_type value would be 0x123 in
-          // the DW_TAG_typedef), and this is the unnamed structure type. We
-          // will have a hard time tracking down an unnammed structure type in
-          // the module DWO file, so we make sure we don't get into this
-          // situation by always resolving typedefs from the DWO file.
-          const DWARFDIE encoding_die = encoding_uid.Reference();
+  const dw_tag_t tag = die.Tag();
+
+  bool is_forward_declaration = false;
+  DWARFAttributes attributes;
+  const char *type_name_cstr = NULL;
+  const char *mangled_name_cstr = NULL;
+  ConstString type_name_const_str;
+  Type::ResolveState resolve_state = Type::eResolveStateUnresolved;
+  llvm::Optional<uint64_t> byte_size;
+  Declaration decl;
+
+  Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID;
+  CompilerType clang_type;
+  DWARFFormValue form_value;
+
+  dw_attr_t attr;
+  TypeSP type_sp;
+  switch (tag) {
+  case DW_TAG_typedef:
+  case DW_TAG_base_type:
+  case DW_TAG_pointer_type:
+  case DW_TAG_reference_type:
+  case DW_TAG_rvalue_reference_type:
+  case DW_TAG_const_type:
+  case DW_TAG_restrict_type:
+  case DW_TAG_volatile_type:
+  case DW_TAG_unspecified_type: {
+    // Set a bit that lets us know that we are currently parsing this
+    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
 
-          // First make sure that the die that this is typedef'ed to _is_ just
-          // a declaration (DW_AT_declaration == 1), not a full definition
-          // since template types can't be represented in modules since only
-          // concrete instances of templates are ever emitted and modules won't
-          // contain those
-          if (encoding_die &&
-              encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) ==
-                  1) {
-            type_sp = ParseTypeFromDWO(die, log);
-            if (type_sp)
-              return type_sp;
+    const size_t num_attributes = die.GetAttributes(attributes);
+    uint32_t encoding = 0;
+    DWARFFormValue encoding_uid;
+
+    if (num_attributes > 0) {
+      uint32_t i;
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_decl_file:
+            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
+                form_value.Unsigned()));
+            break;
+          case DW_AT_decl_line:
+            decl.SetLine(form_value.Unsigned());
+            break;
+          case DW_AT_decl_column:
+            decl.SetColumn(form_value.Unsigned());
+            break;
+          case DW_AT_name:
+            type_name_cstr = form_value.AsCString();
+            if (type_name_cstr)
+              type_name_const_str.SetCString(type_name_cstr);
+            break;
+          case DW_AT_byte_size:
+            byte_size = form_value.Unsigned();
+            break;
+          case DW_AT_encoding:
+            encoding = form_value.Unsigned();
+            break;
+          case DW_AT_type:
+            encoding_uid = form_value;
+            break;
+          default:
+          case DW_AT_sibling:
+            break;
           }
         }
+      }
+    }
 
-        DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n",
-                     die.GetID(), DW_TAG_value_to_name(tag), type_name_cstr,
-                     encoding_uid.Reference());
+    if (tag == DW_TAG_typedef && encoding_uid.IsValid()) {
+      // Try to parse a typedef from the DWO file first as modules can
+      // contain typedef'ed structures that have no names like:
+      //
+      //  typedef struct { int a; } Foo;
+      //
+      // In this case we will have a structure with no name and a typedef
+      // named "Foo" that points to this unnamed structure. The name in the
+      // typedef is the only identifier for the struct, so always try to
+      // get typedefs from DWO files if possible.
+      //
+      // The type_sp returned will be empty if the typedef doesn't exist in
+      // a DWO file, so it is cheap to call this function just to check.
+      //
+      // If we don't do this we end up creating a TypeSP that says this is
+      // a typedef to type 0x123 (the DW_AT_type value would be 0x123 in
+      // the DW_TAG_typedef), and this is the unnamed structure type. We
+      // will have a hard time tracking down an unnammed structure type in
+      // the module DWO file, so we make sure we don't get into this
+      // situation by always resolving typedefs from the DWO file.
+      const DWARFDIE encoding_die = encoding_uid.Reference();
+
+      // First make sure that the die that this is typedef'ed to _is_ just
+      // a declaration (DW_AT_declaration == 1), not a full definition
+      // since template types can't be represented in modules since only
+      // concrete instances of templates are ever emitted and modules won't
+      // contain those
+      if (encoding_die &&
+          encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) {
+        type_sp = ParseTypeFromDWO(die, log);
+        if (type_sp)
+          return type_sp;
+      }
+    }
 
-        switch (tag) {
-        default:
-          break;
+    DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n",
+                 die.GetID(), DW_TAG_value_to_name(tag), type_name_cstr,
+                 encoding_uid.Reference());
 
-        case DW_TAG_unspecified_type:
-          if (strcmp(type_name_cstr, "nullptr_t") == 0 ||
-              strcmp(type_name_cstr, "decltype(nullptr)") == 0) {
-            resolve_state = Type::eResolveStateFull;
-            clang_type = m_ast.GetBasicType(eBasicTypeNullPtr);
-            break;
-          }
-          // Fall through to base type below in case we can handle the type
-          // there...
-          LLVM_FALLTHROUGH;
-
-        case DW_TAG_base_type:
-          resolve_state = Type::eResolveStateFull;
-          clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
-              type_name_cstr, encoding, byte_size.getValueOr(0) * 8);
-          break;
+    switch (tag) {
+    default:
+      break;
 
-        case DW_TAG_pointer_type:
-          encoding_data_type = Type::eEncodingIsPointerUID;
-          break;
-        case DW_TAG_reference_type:
-          encoding_data_type = Type::eEncodingIsLValueReferenceUID;
-          break;
-        case DW_TAG_rvalue_reference_type:
-          encoding_data_type = Type::eEncodingIsRValueReferenceUID;
-          break;
-        case DW_TAG_typedef:
-          encoding_data_type = Type::eEncodingIsTypedefUID;
-          break;
-        case DW_TAG_const_type:
-          encoding_data_type = Type::eEncodingIsConstUID;
-          break;
-        case DW_TAG_restrict_type:
-          encoding_data_type = Type::eEncodingIsRestrictUID;
-          break;
-        case DW_TAG_volatile_type:
-          encoding_data_type = Type::eEncodingIsVolatileUID;
-          break;
-        }
+    case DW_TAG_unspecified_type:
+      if (strcmp(type_name_cstr, "nullptr_t") == 0 ||
+          strcmp(type_name_cstr, "decltype(nullptr)") == 0) {
+        resolve_state = Type::eResolveStateFull;
+        clang_type = m_ast.GetBasicType(eBasicTypeNullPtr);
+        break;
+      }
+      // Fall through to base type below in case we can handle the type
+      // there...
+      LLVM_FALLTHROUGH;
+
+    case DW_TAG_base_type:
+      resolve_state = Type::eResolveStateFull;
+      clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
+          type_name_cstr, encoding, byte_size.getValueOr(0) * 8);
+      break;
 
-        if (!clang_type &&
-            (encoding_data_type == Type::eEncodingIsPointerUID ||
-             encoding_data_type == Type::eEncodingIsTypedefUID)) {
-          if (tag == DW_TAG_pointer_type) {
-            DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type);
-
-            if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) {
-              // Blocks have a __FuncPtr inside them which is a pointer to a
-              // function of the proper type.
-
-              for (DWARFDIE child_die = target_die.GetFirstChild();
-                   child_die.IsValid(); child_die = child_die.GetSibling()) {
-                if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""),
-                            "__FuncPtr")) {
-                  DWARFDIE function_pointer_type =
-                      child_die.GetReferencedDIE(DW_AT_type);
-
-                  if (function_pointer_type) {
-                    DWARFDIE function_type =
-                        function_pointer_type.GetReferencedDIE(DW_AT_type);
-
-                    bool function_type_is_new_pointer;
-                    TypeSP lldb_function_type_sp = ParseTypeFromDWARF(
-                        sc, function_type, log, &function_type_is_new_pointer);
-
-                    if (lldb_function_type_sp) {
-                      clang_type = m_ast.CreateBlockPointerType(
-                          lldb_function_type_sp->GetForwardCompilerType());
-                      encoding_data_type = Type::eEncodingIsUID;
-                      encoding_uid.Clear();
-                      resolve_state = Type::eResolveStateFull;
-                    }
-                  }
+    case DW_TAG_pointer_type:
+      encoding_data_type = Type::eEncodingIsPointerUID;
+      break;
+    case DW_TAG_reference_type:
+      encoding_data_type = Type::eEncodingIsLValueReferenceUID;
+      break;
+    case DW_TAG_rvalue_reference_type:
+      encoding_data_type = Type::eEncodingIsRValueReferenceUID;
+      break;
+    case DW_TAG_typedef:
+      encoding_data_type = Type::eEncodingIsTypedefUID;
+      break;
+    case DW_TAG_const_type:
+      encoding_data_type = Type::eEncodingIsConstUID;
+      break;
+    case DW_TAG_restrict_type:
+      encoding_data_type = Type::eEncodingIsRestrictUID;
+      break;
+    case DW_TAG_volatile_type:
+      encoding_data_type = Type::eEncodingIsVolatileUID;
+      break;
+    }
 
-                  break;
+    if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID ||
+                        encoding_data_type == Type::eEncodingIsTypedefUID)) {
+      if (tag == DW_TAG_pointer_type) {
+        DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type);
+
+        if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) {
+          // Blocks have a __FuncPtr inside them which is a pointer to a
+          // function of the proper type.
+
+          for (DWARFDIE child_die = target_die.GetFirstChild();
+               child_die.IsValid(); child_die = child_die.GetSibling()) {
+            if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""),
+                        "__FuncPtr")) {
+              DWARFDIE function_pointer_type =
+                  child_die.GetReferencedDIE(DW_AT_type);
+
+              if (function_pointer_type) {
+                DWARFDIE function_type =
+                    function_pointer_type.GetReferencedDIE(DW_AT_type);
+
+                bool function_type_is_new_pointer;
+                TypeSP lldb_function_type_sp = ParseTypeFromDWARF(
+                    sc, function_type, log, &function_type_is_new_pointer);
+
+                if (lldb_function_type_sp) {
+                  clang_type = m_ast.CreateBlockPointerType(
+                      lldb_function_type_sp->GetForwardCompilerType());
+                  encoding_data_type = Type::eEncodingIsUID;
+                  encoding_uid.Clear();
+                  resolve_state = Type::eResolveStateFull;
                 }
               }
+
+              break;
             }
           }
+        }
+      }
 
-          bool translation_unit_is_objc =
-              (sc.comp_unit->GetLanguage() == eLanguageTypeObjC ||
-               sc.comp_unit->GetLanguage() == eLanguageTypeObjC_plus_plus);
+      bool translation_unit_is_objc =
+          (sc.comp_unit->GetLanguage() == eLanguageTypeObjC ||
+           sc.comp_unit->GetLanguage() == eLanguageTypeObjC_plus_plus);
 
-          if (translation_unit_is_objc) {
-            if (type_name_cstr != NULL) {
-              static ConstString g_objc_type_name_id("id");
-              static ConstString g_objc_type_name_Class("Class");
-              static ConstString g_objc_type_name_selector("SEL");
+      if (translation_unit_is_objc) {
+        if (type_name_cstr != NULL) {
+          static ConstString g_objc_type_name_id("id");
+          static ConstString g_objc_type_name_Class("Class");
+          static ConstString g_objc_type_name_selector("SEL");
 
-              if (type_name_const_str == g_objc_type_name_id) {
-                if (log)
-                  dwarf->GetObjectFile()->GetModule()->LogMessage(
-                      log, "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
-                           "is Objective-C 'id' built-in type.",
-                      die.GetOffset(), die.GetTagAsCString(), die.GetName());
-                clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
-                encoding_data_type = Type::eEncodingIsUID;
-                encoding_uid.Clear();
-                resolve_state = Type::eResolveStateFull;
+          if (type_name_const_str == g_objc_type_name_id) {
+            if (log)
+              dwarf->GetObjectFile()->GetModule()->LogMessage(
+                  log,
+                  "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
+                  "is Objective-C 'id' built-in type.",
+                  die.GetOffset(), die.GetTagAsCString(), die.GetName());
+            clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
+            encoding_data_type = Type::eEncodingIsUID;
+            encoding_uid.Clear();
+            resolve_state = Type::eResolveStateFull;
 
-              } else if (type_name_const_str == g_objc_type_name_Class) {
-                if (log)
-                  dwarf->GetObjectFile()->GetModule()->LogMessage(
-                      log, "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
-                           "is Objective-C 'Class' built-in type.",
-                      die.GetOffset(), die.GetTagAsCString(), die.GetName());
-                clang_type = m_ast.GetBasicType(eBasicTypeObjCClass);
-                encoding_data_type = Type::eEncodingIsUID;
-                encoding_uid.Clear();
-                resolve_state = Type::eResolveStateFull;
-              } else if (type_name_const_str == g_objc_type_name_selector) {
+          } else if (type_name_const_str == g_objc_type_name_Class) {
+            if (log)
+              dwarf->GetObjectFile()->GetModule()->LogMessage(
+                  log,
+                  "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
+                  "is Objective-C 'Class' built-in type.",
+                  die.GetOffset(), die.GetTagAsCString(), die.GetName());
+            clang_type = m_ast.GetBasicType(eBasicTypeObjCClass);
+            encoding_data_type = Type::eEncodingIsUID;
+            encoding_uid.Clear();
+            resolve_state = Type::eResolveStateFull;
+          } else if (type_name_const_str == g_objc_type_name_selector) {
+            if (log)
+              dwarf->GetObjectFile()->GetModule()->LogMessage(
+                  log,
+                  "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
+                  "is Objective-C 'selector' built-in type.",
+                  die.GetOffset(), die.GetTagAsCString(), die.GetName());
+            clang_type = m_ast.GetBasicType(eBasicTypeObjCSel);
+            encoding_data_type = Type::eEncodingIsUID;
+            encoding_uid.Clear();
+            resolve_state = Type::eResolveStateFull;
+          }
+        } else if (encoding_data_type == Type::eEncodingIsPointerUID &&
+                   encoding_uid.IsValid()) {
+          // Clang sometimes erroneously emits id as objc_object*.  In that
+          // case we fix up the type to "id".
+
+          const DWARFDIE encoding_die = encoding_uid.Reference();
+
+          if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) {
+            if (const char *struct_name = encoding_die.GetName()) {
+              if (!strcmp(struct_name, "objc_object")) {
                 if (log)
                   dwarf->GetObjectFile()->GetModule()->LogMessage(
-                      log, "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' "
-                           "is Objective-C 'selector' built-in type.",
+                      log,
+                      "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s "
+                      "'%s' is 'objc_object*', which we overrode to "
+                      "'id'.",
                       die.GetOffset(), die.GetTagAsCString(), die.GetName());
-                clang_type = m_ast.GetBasicType(eBasicTypeObjCSel);
+                clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
                 encoding_data_type = Type::eEncodingIsUID;
                 encoding_uid.Clear();
                 resolve_state = Type::eResolveStateFull;
               }
-            } else if (encoding_data_type == Type::eEncodingIsPointerUID &&
-                       encoding_uid.IsValid()) {
-              // Clang sometimes erroneously emits id as objc_object*.  In that
-              // case we fix up the type to "id".
-
-              const DWARFDIE encoding_die = encoding_uid.Reference();
-
-              if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) {
-                if (const char *struct_name = encoding_die.GetName()) {
-                  if (!strcmp(struct_name, "objc_object")) {
-                    if (log)
-                      dwarf->GetObjectFile()->GetModule()->LogMessage(
-                          log, "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s "
-                               "'%s' is 'objc_object*', which we overrode to "
-                               "'id'.",
-                          die.GetOffset(), die.GetTagAsCString(),
-                          die.GetName());
-                    clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
-                    encoding_data_type = Type::eEncodingIsUID;
-                    encoding_uid.Clear();
-                    resolve_state = Type::eResolveStateFull;
-                  }
-                }
-              }
             }
           }
         }
+      }
+    }
 
-        type_sp = std::make_shared<Type>(
-            die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-            dwarf->GetUID(DIERef(encoding_uid)), encoding_data_type, &decl,
-            clang_type, resolve_state);
+    type_sp = std::make_shared<Type>(
+        die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
+        dwarf->GetUID(DIERef(encoding_uid)), encoding_data_type, &decl,
+        clang_type, resolve_state);
 
-        dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-      } break;
+    dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+  } break;
 
-      case DW_TAG_structure_type:
-      case DW_TAG_union_type:
-      case DW_TAG_class_type: {
-        // Set a bit that lets us know that we are currently parsing this
-        dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+  case DW_TAG_structure_type:
+  case DW_TAG_union_type:
+  case DW_TAG_class_type: {
+    // Set a bit that lets us know that we are currently parsing this
+    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
 
-        LanguageType class_language = eLanguageTypeUnknown;
-        bool is_complete_objc_class = false;
-        size_t calling_convention 
-                = llvm::dwarf::CallingConvention::DW_CC_normal;
-        
-        const size_t num_attributes = die.GetAttributes(attributes);
-        if (num_attributes > 0) {
-          uint32_t i;
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_decl_file:
-                decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                    form_value.Unsigned()));
-                break;
-
-              case DW_AT_decl_line:
-                decl.SetLine(form_value.Unsigned());
-                break;
-
-              case DW_AT_decl_column:
-                decl.SetColumn(form_value.Unsigned());
-                break;
-
-              case DW_AT_name:
-                type_name_cstr = form_value.AsCString();
-                type_name_const_str.SetCString(type_name_cstr);
-                break;
-
-              case DW_AT_byte_size:
-                byte_size = form_value.Unsigned();
-                break;
-
-              case DW_AT_accessibility:
-                accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
-                break;
-
-              case DW_AT_declaration:
-                is_forward_declaration = form_value.Boolean();
-                break;
-
-              case DW_AT_APPLE_runtime_class:
-                class_language = (LanguageType)form_value.Signed();
-                break;
-
-              case DW_AT_APPLE_objc_complete_type:
-                is_complete_objc_class = form_value.Signed();
-                break;
-              case DW_AT_calling_convention:
-                calling_convention = form_value.Unsigned();
-                break;
-                
-              case DW_AT_allocated:
-              case DW_AT_associated:
-              case DW_AT_data_location:
-              case DW_AT_description:
-              case DW_AT_start_scope:
-              case DW_AT_visibility:
-              default:
-              case DW_AT_sibling:
-                break;
-              }
-            }
-          }
-        }
+    LanguageType class_language = eLanguageTypeUnknown;
+    bool is_complete_objc_class = false;
+    size_t calling_convention = llvm::dwarf::CallingConvention::DW_CC_normal;
 
-        // UniqueDWARFASTType is large, so don't create a local variables on
-        // the stack, put it on the heap. This function is often called
-        // recursively and clang isn't good and sharing the stack space for
-        // variables in different blocks.
-        std::unique_ptr<UniqueDWARFASTType> unique_ast_entry_up(
-            new UniqueDWARFASTType());
-
-        ConstString unique_typename(type_name_const_str);
-        Declaration unique_decl(decl);
-
-        if (type_name_const_str) {
-          LanguageType die_language = die.GetLanguage();
-          if (Language::LanguageIsCPlusPlus(die_language)) {
-            // For C++, we rely solely upon the one definition rule that says
-            // only one thing can exist at a given decl context. We ignore the
-            // file and line that things are declared on.
-            std::string qualified_name;
-            if (die.GetQualifiedName(qualified_name))
-              unique_typename = ConstString(qualified_name);
-            unique_decl.Clear();
-          }
+    const size_t num_attributes = die.GetAttributes(attributes);
+    if (num_attributes > 0) {
+      uint32_t i;
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_decl_file:
+            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
+                form_value.Unsigned()));
+            break;
 
-          if (dwarf->GetUniqueDWARFASTTypeMap().Find(
-                  unique_typename, die, unique_decl,
-                  byte_size ? *byte_size : -1, *unique_ast_entry_up)) {
-            type_sp = unique_ast_entry_up->m_type_sp;
-            if (type_sp) {
-              dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-              return type_sp;
-            }
-          }
-        }
+          case DW_AT_decl_line:
+            decl.SetLine(form_value.Unsigned());
+            break;
 
-        DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                     DW_TAG_value_to_name(tag), type_name_cstr);
+          case DW_AT_decl_column:
+            decl.SetColumn(form_value.Unsigned());
+            break;
 
-        int tag_decl_kind = -1;
-        AccessType default_accessibility = eAccessNone;
-        if (tag == DW_TAG_structure_type) {
-          tag_decl_kind = clang::TTK_Struct;
-          default_accessibility = eAccessPublic;
-        } else if (tag == DW_TAG_union_type) {
-          tag_decl_kind = clang::TTK_Union;
-          default_accessibility = eAccessPublic;
-        } else if (tag == DW_TAG_class_type) {
-          tag_decl_kind = clang::TTK_Class;
-          default_accessibility = eAccessPrivate;
-        }
+          case DW_AT_name:
+            type_name_cstr = form_value.AsCString();
+            type_name_const_str.SetCString(type_name_cstr);
+            break;
+
+          case DW_AT_byte_size:
+            byte_size = form_value.Unsigned();
+            break;
+
+          case DW_AT_accessibility:
+            accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
+            break;
+
+          case DW_AT_declaration:
+            is_forward_declaration = form_value.Boolean();
+            break;
+
+          case DW_AT_APPLE_runtime_class:
+            class_language = (LanguageType)form_value.Signed();
+            break;
+
+          case DW_AT_APPLE_objc_complete_type:
+            is_complete_objc_class = form_value.Signed();
+            break;
+          case DW_AT_calling_convention:
+            calling_convention = form_value.Unsigned();
+            break;
 
-        if (byte_size && *byte_size == 0 && type_name_cstr &&
-            !die.HasChildren() &&
-            sc.comp_unit->GetLanguage() == eLanguageTypeObjC) {
-          // Work around an issue with clang at the moment where forward
-          // declarations for objective C classes are emitted as:
-          //  DW_TAG_structure_type [2]
-          //  DW_AT_name( "ForwardObjcClass" )
-          //  DW_AT_byte_size( 0x00 )
-          //  DW_AT_decl_file( "..." )
-          //  DW_AT_decl_line( 1 )
-          //
-          // Note that there is no DW_AT_declaration and there are no children,
-          // and the byte size is zero.
-          is_forward_declaration = true;
+          case DW_AT_allocated:
+          case DW_AT_associated:
+          case DW_AT_data_location:
+          case DW_AT_description:
+          case DW_AT_start_scope:
+          case DW_AT_visibility:
+          default:
+          case DW_AT_sibling:
+            break;
+          }
         }
+      }
+    }
 
-        if (class_language == eLanguageTypeObjC ||
-            class_language == eLanguageTypeObjC_plus_plus) {
-          if (!is_complete_objc_class &&
-              die.Supports_DW_AT_APPLE_objc_complete_type()) {
-            // We have a valid eSymbolTypeObjCClass class symbol whose name
-            // matches the current objective C class that we are trying to find
-            // and this DIE isn't the complete definition (we checked
-            // is_complete_objc_class above and know it is false), so the real
-            // definition is in here somewhere
-            type_sp = dwarf->FindCompleteObjCDefinitionTypeForDIE(
-                die, type_name_const_str, true);
+    // UniqueDWARFASTType is large, so don't create a local variables on
+    // the stack, put it on the heap. This function is often called
+    // recursively and clang isn't good and sharing the stack space for
+    // variables in different blocks.
+    std::unique_ptr<UniqueDWARFASTType> unique_ast_entry_up(
+        new UniqueDWARFASTType());
+
+    ConstString unique_typename(type_name_const_str);
+    Declaration unique_decl(decl);
+
+    if (type_name_const_str) {
+      LanguageType die_language = die.GetLanguage();
+      if (Language::LanguageIsCPlusPlus(die_language)) {
+        // For C++, we rely solely upon the one definition rule that says
+        // only one thing can exist at a given decl context. We ignore the
+        // file and line that things are declared on.
+        std::string qualified_name;
+        if (die.GetQualifiedName(qualified_name))
+          unique_typename = ConstString(qualified_name);
+        unique_decl.Clear();
+      }
 
-            if (!type_sp) {
-              SymbolFileDWARFDebugMap *debug_map_symfile =
-                  dwarf->GetDebugMapSymfile();
-              if (debug_map_symfile) {
-                // We weren't able to find a full declaration in this DWARF,
-                // see if we have a declaration anywhere else...
-                type_sp =
-                    debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
-                        die, type_name_const_str, true);
-              }
-            }
+      if (dwarf->GetUniqueDWARFASTTypeMap().Find(
+              unique_typename, die, unique_decl, byte_size ? *byte_size : -1,
+              *unique_ast_entry_up)) {
+        type_sp = unique_ast_entry_up->m_type_sp;
+        if (type_sp) {
+          dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+          return type_sp;
+        }
+      }
+    }
 
-            if (type_sp) {
-              if (log) {
-                dwarf->GetObjectFile()->GetModule()->LogMessage(
-                    log, "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is an "
-                         "incomplete objc type, complete type is 0x%8.8" PRIx64,
-                    static_cast<void *>(this), die.GetOffset(),
-                    DW_TAG_value_to_name(tag), type_name_cstr,
-                    type_sp->GetID());
-              }
+    DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                 DW_TAG_value_to_name(tag), type_name_cstr);
+
+    int tag_decl_kind = -1;
+    AccessType default_accessibility = eAccessNone;
+    if (tag == DW_TAG_structure_type) {
+      tag_decl_kind = clang::TTK_Struct;
+      default_accessibility = eAccessPublic;
+    } else if (tag == DW_TAG_union_type) {
+      tag_decl_kind = clang::TTK_Union;
+      default_accessibility = eAccessPublic;
+    } else if (tag == DW_TAG_class_type) {
+      tag_decl_kind = clang::TTK_Class;
+      default_accessibility = eAccessPrivate;
+    }
 
-              // We found a real definition for this type elsewhere so lets use
-              // it and cache the fact that we found a complete type for this
-              // die
-              dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-              return type_sp;
-            }
+    if (byte_size && *byte_size == 0 && type_name_cstr && !die.HasChildren() &&
+        sc.comp_unit->GetLanguage() == eLanguageTypeObjC) {
+      // Work around an issue with clang at the moment where forward
+      // declarations for objective C classes are emitted as:
+      //  DW_TAG_structure_type [2]
+      //  DW_AT_name( "ForwardObjcClass" )
+      //  DW_AT_byte_size( 0x00 )
+      //  DW_AT_decl_file( "..." )
+      //  DW_AT_decl_line( 1 )
+      //
+      // Note that there is no DW_AT_declaration and there are no children,
+      // and the byte size is zero.
+      is_forward_declaration = true;
+    }
+
+    if (class_language == eLanguageTypeObjC ||
+        class_language == eLanguageTypeObjC_plus_plus) {
+      if (!is_complete_objc_class &&
+          die.Supports_DW_AT_APPLE_objc_complete_type()) {
+        // We have a valid eSymbolTypeObjCClass class symbol whose name
+        // matches the current objective C class that we are trying to find
+        // and this DIE isn't the complete definition (we checked
+        // is_complete_objc_class above and know it is false), so the real
+        // definition is in here somewhere
+        type_sp = dwarf->FindCompleteObjCDefinitionTypeForDIE(
+            die, type_name_const_str, true);
+
+        if (!type_sp) {
+          SymbolFileDWARFDebugMap *debug_map_symfile =
+              dwarf->GetDebugMapSymfile();
+          if (debug_map_symfile) {
+            // We weren't able to find a full declaration in this DWARF,
+            // see if we have a declaration anywhere else...
+            type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
+                die, type_name_const_str, true);
           }
         }
 
-        if (is_forward_declaration) {
-          // We have a forward declaration to a type and we need to try and
-          // find a full declaration. We look in the current type index just in
-          // case we have a forward declaration followed by an actual
-          // declarations in the DWARF. If this fails, we need to look
-          // elsewhere...
+        if (type_sp) {
           if (log) {
             dwarf->GetObjectFile()->GetModule()->LogMessage(
-                log, "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
-                     "forward declaration, trying to find complete type",
+                log,
+                "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is an "
+                "incomplete objc type, complete type is 0x%8.8" PRIx64,
                 static_cast<void *>(this), die.GetOffset(),
-                DW_TAG_value_to_name(tag), type_name_cstr);
+                DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
           }
 
-          // See if the type comes from a DWO module and if so, track down that
-          // type.
-          type_sp = ParseTypeFromDWO(die, log);
-          if (type_sp)
-            return type_sp;
-
-          DWARFDeclContext die_decl_ctx;
-          die.GetDWARFDeclContext(die_decl_ctx);
-
-          // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die,
-          // type_name_const_str);
-          type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
-
-          if (!type_sp) {
-            SymbolFileDWARFDebugMap *debug_map_symfile =
-                dwarf->GetDebugMapSymfile();
-            if (debug_map_symfile) {
-              // We weren't able to find a full declaration in this DWARF, see
-              // if we have a declaration anywhere else...
-              type_sp =
-                  debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
-                      die_decl_ctx);
-            }
-          }
+          // We found a real definition for this type elsewhere so lets use
+          // it and cache the fact that we found a complete type for this
+          // die
+          dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+          return type_sp;
+        }
+      }
+    }
 
-          if (type_sp) {
+    if (is_forward_declaration) {
+      // We have a forward declaration to a type and we need to try and
+      // find a full declaration. We look in the current type index just in
+      // case we have a forward declaration followed by an actual
+      // declarations in the DWARF. If this fails, we need to look
+      // elsewhere...
+      if (log) {
+        dwarf->GetObjectFile()->GetModule()->LogMessage(
+            log,
+            "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
+            "forward declaration, trying to find complete type",
+            static_cast<void *>(this), die.GetOffset(),
+            DW_TAG_value_to_name(tag), type_name_cstr);
+      }
+
+      // See if the type comes from a DWO module and if so, track down that
+      // type.
+      type_sp = ParseTypeFromDWO(die, log);
+      if (type_sp)
+        return type_sp;
+
+      DWARFDeclContext die_decl_ctx;
+      die.GetDWARFDeclContext(die_decl_ctx);
+
+      // type_sp = FindDefinitionTypeForDIE (dwarf_cu, die,
+      // type_name_const_str);
+      type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
+
+      if (!type_sp) {
+        SymbolFileDWARFDebugMap *debug_map_symfile =
+            dwarf->GetDebugMapSymfile();
+        if (debug_map_symfile) {
+          // We weren't able to find a full declaration in this DWARF, see
+          // if we have a declaration anywhere else...
+          type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
+              die_decl_ctx);
+        }
+      }
+
+      if (type_sp) {
+        if (log) {
+          dwarf->GetObjectFile()->GetModule()->LogMessage(
+              log,
+              "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
+              "forward declaration, complete type is 0x%8.8" PRIx64,
+              static_cast<void *>(this), die.GetOffset(),
+              DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
+        }
+
+        // We found a real definition for this type elsewhere so lets use
+        // it and cache the fact that we found a complete type for this die
+        dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+        clang::DeclContext *defn_decl_ctx =
+            GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
+        if (defn_decl_ctx)
+          LinkDeclContextToDIE(defn_decl_ctx, die);
+        return type_sp;
+      }
+    }
+    assert(tag_decl_kind != -1);
+    bool clang_type_was_created = false;
+    clang_type.SetCompilerType(
+        &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
+    if (!clang_type) {
+      clang::DeclContext *decl_ctx =
+          GetClangDeclContextContainingDIE(die, nullptr);
+
+      // If your decl context is a record that was imported from another
+      // AST context (in the gmodules case), we need to make sure the type
+      // backing the Decl is complete before adding children to it. This is
+      // not an issue in the non-gmodules case because the debug info will
+      // always contain a full definition of parent types in that case.
+      CompleteExternalTagDeclType(GetClangASTImporter(), decl_ctx, die,
+                                  type_name_cstr);
+
+      if (accessibility == eAccessNone && decl_ctx) {
+        // Check the decl context that contains this class/struct/union. If
+        // it is a class we must give it an accessibility.
+        const clang::Decl::Kind containing_decl_kind = decl_ctx->getDeclKind();
+        if (DeclKindIsCXXClass(containing_decl_kind))
+          accessibility = default_accessibility;
+      }
+
+      ClangASTMetadata metadata;
+      metadata.SetUserID(die.GetID());
+      metadata.SetIsDynamicCXXType(dwarf->ClassOrStructIsVirtual(die));
+
+      if (type_name_cstr && strchr(type_name_cstr, '<')) {
+        ClangASTContext::TemplateParameterInfos template_param_infos;
+        if (ParseTemplateParameterInfos(die, template_param_infos)) {
+          clang::ClassTemplateDecl *class_template_decl =
+              m_ast.ParseClassTemplateDecl(decl_ctx, accessibility,
+                                           type_name_cstr, tag_decl_kind,
+                                           template_param_infos);
+          if (!class_template_decl) {
             if (log) {
               dwarf->GetObjectFile()->GetModule()->LogMessage(
-                  log, "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
-                       "forward declaration, complete type is 0x%8.8" PRIx64,
+                  log,
+                  "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" "
+                  "clang::ClassTemplateDecl failed to return a decl.",
                   static_cast<void *>(this), die.GetOffset(),
-                  DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
+                  DW_TAG_value_to_name(tag), type_name_cstr);
             }
-
-            // We found a real definition for this type elsewhere so lets use
-            // it and cache the fact that we found a complete type for this die
-            dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-            clang::DeclContext *defn_decl_ctx = GetCachedClangDeclContextForDIE(
-                dwarf->GetDIE(type_sp->GetID()));
-            if (defn_decl_ctx)
-              LinkDeclContextToDIE(defn_decl_ctx, die);
-            return type_sp;
+            return TypeSP();
           }
+
+          clang::ClassTemplateSpecializationDecl *class_specialization_decl =
+              m_ast.CreateClassTemplateSpecializationDecl(
+                  decl_ctx, class_template_decl, tag_decl_kind,
+                  template_param_infos);
+          clang_type = m_ast.CreateClassTemplateSpecializationType(
+              class_specialization_decl);
+          clang_type_was_created = true;
+
+          m_ast.SetMetadata(class_template_decl, metadata);
+          m_ast.SetMetadata(class_specialization_decl, metadata);
         }
-        assert(tag_decl_kind != -1);
-        bool clang_type_was_created = false;
-        clang_type.SetCompilerType(
-            &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
-        if (!clang_type) {
-          clang::DeclContext *decl_ctx =
-              GetClangDeclContextContainingDIE(die, nullptr);
-
-          // If your decl context is a record that was imported from another
-          // AST context (in the gmodules case), we need to make sure the type
-          // backing the Decl is complete before adding children to it. This is
-          // not an issue in the non-gmodules case because the debug info will
-          // always contain a full definition of parent types in that case.
-          CompleteExternalTagDeclType(GetClangASTImporter(), decl_ctx, die,
-                                      type_name_cstr);
-
-          if (accessibility == eAccessNone && decl_ctx) {
-            // Check the decl context that contains this class/struct/union. If
-            // it is a class we must give it an accessibility.
-            const clang::Decl::Kind containing_decl_kind =
-                decl_ctx->getDeclKind();
-            if (DeclKindIsCXXClass(containing_decl_kind))
-              accessibility = default_accessibility;
-          }
+      }
 
-          ClangASTMetadata metadata;
-          metadata.SetUserID(die.GetID());
-          metadata.SetIsDynamicCXXType(dwarf->ClassOrStructIsVirtual(die));
+      if (!clang_type_was_created) {
+        clang_type_was_created = true;
+        clang_type =
+            m_ast.CreateRecordType(decl_ctx, accessibility, type_name_cstr,
+                                   tag_decl_kind, class_language, &metadata);
+      }
+    }
 
-          if (type_name_cstr && strchr(type_name_cstr, '<')) {
-            ClangASTContext::TemplateParameterInfos template_param_infos;
-            if (ParseTemplateParameterInfos(die, template_param_infos)) {
-              clang::ClassTemplateDecl *class_template_decl =
-                  m_ast.ParseClassTemplateDecl(decl_ctx, accessibility,
-                                               type_name_cstr, tag_decl_kind,
-                                               template_param_infos);
-              if (!class_template_decl) {
-                if (log) {
-                  dwarf->GetObjectFile()->GetModule()->LogMessage(
-                    log, "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" "
-                         "clang::ClassTemplateDecl failed to return a decl.",
-                    static_cast<void *>(this), die.GetOffset(),
-                    DW_TAG_value_to_name(tag), type_name_cstr);
-                }
-                return TypeSP();
-              }
-                
-              clang::ClassTemplateSpecializationDecl
-                  *class_specialization_decl =
-                      m_ast.CreateClassTemplateSpecializationDecl(
-                          decl_ctx, class_template_decl, tag_decl_kind,
-                          template_param_infos);
-              clang_type = m_ast.CreateClassTemplateSpecializationType(
-                  class_specialization_decl);
-              clang_type_was_created = true;
-
-              m_ast.SetMetadata(class_template_decl, metadata);
-              m_ast.SetMetadata(class_specialization_decl, metadata);
-            }
-          }
+    // Store a forward declaration to this class type in case any
+    // parameters in any class methods need it for the clang types for
+    // function prototypes.
+    LinkDeclContextToDIE(m_ast.GetDeclContextForType(clang_type), die);
+    type_sp = std::make_shared<Type>(die.GetID(), dwarf, type_name_const_str,
+                                     byte_size, nullptr, LLDB_INVALID_UID,
+                                     Type::eEncodingIsUID, &decl, clang_type,
+                                     Type::eResolveStateForward);
+
+    type_sp->SetIsCompleteObjCClass(is_complete_objc_class);
+
+    // Add our type to the unique type map so we don't end up creating many
+    // copies of the same type over and over in the ASTContext for our
+    // module
+    unique_ast_entry_up->m_type_sp = type_sp;
+    unique_ast_entry_up->m_die = die;
+    unique_ast_entry_up->m_declaration = unique_decl;
+    unique_ast_entry_up->m_byte_size = byte_size.getValueOr(0);
+    dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename,
+                                             *unique_ast_entry_up);
+
+    if (is_forward_declaration && die.HasChildren()) {
+      // Check to see if the DIE actually has a definition, some version of
+      // GCC will
+      // emit DIEs with DW_AT_declaration set to true, but yet still have
+      // subprogram, members, or inheritance, so we can't trust it
+      DWARFDIE child_die = die.GetFirstChild();
+      while (child_die) {
+        switch (child_die.Tag()) {
+        case DW_TAG_inheritance:
+        case DW_TAG_subprogram:
+        case DW_TAG_member:
+        case DW_TAG_APPLE_property:
+        case DW_TAG_class_type:
+        case DW_TAG_structure_type:
+        case DW_TAG_enumeration_type:
+        case DW_TAG_typedef:
+        case DW_TAG_union_type:
+          child_die.Clear();
+          is_forward_declaration = false;
+          break;
+        default:
+          child_die = child_die.GetSibling();
+          break;
+        }
+      }
+    }
+
+    if (!is_forward_declaration) {
+      // Always start the definition for a class type so that if the class
+      // has child classes or types that require the class to be created
+      // for use as their decl contexts the class will be ready to accept
+      // these child definitions.
+      if (!die.HasChildren()) {
+        // No children for this struct/union/class, lets finish it
+        if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
+          ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
+        } else {
+          dwarf->GetObjectFile()->GetModule()->ReportError(
+              "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
+              "definition.\nPlease file a bug and attach the file at the "
+              "start of this error message",
+              die.GetOffset(), type_name_cstr);
+        }
+
+        if (tag == DW_TAG_structure_type) // this only applies in C
+        {
+          clang::RecordDecl *record_decl =
+              ClangASTContext::GetAsRecordDecl(clang_type);
 
-          if (!clang_type_was_created) {
-            clang_type_was_created = true;
-            clang_type = m_ast.CreateRecordType(decl_ctx, accessibility,
-                                                type_name_cstr, tag_decl_kind,
-                                                class_language, &metadata);
+          if (record_decl) {
+            GetClangASTImporter().InsertRecordDecl(
+                record_decl, ClangASTImporter::LayoutInfo());
           }
         }
-        
-        // Store a forward declaration to this class type in case any
-        // parameters in any class methods need it for the clang types for
-        // function prototypes.
-        LinkDeclContextToDIE(m_ast.GetDeclContextForType(clang_type), die);
-        type_sp = std::make_shared<Type>(
-            die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-            LLDB_INVALID_UID, Type::eEncodingIsUID, &decl, clang_type,
-            Type::eResolveStateForward);
+      } else if (clang_type_was_created) {
+        // Start the definition if the class is not objective C since the
+        // underlying decls respond to isCompleteDefinition(). Objective
+        // C decls don't respond to isCompleteDefinition() so we can't
+        // start the declaration definition right away. For C++
+        // class/union/structs we want to start the definition in case the
+        // class is needed as the declaration context for a contained class
+        // or type without the need to complete that type..
+
+        if (class_language != eLanguageTypeObjC &&
+            class_language != eLanguageTypeObjC_plus_plus)
+          ClangASTContext::StartTagDeclarationDefinition(clang_type);
 
-        type_sp->SetIsCompleteObjCClass(is_complete_objc_class);
-
-        // Add our type to the unique type map so we don't end up creating many
-        // copies of the same type over and over in the ASTContext for our
-        // module
-        unique_ast_entry_up->m_type_sp = type_sp;
-        unique_ast_entry_up->m_die = die;
-        unique_ast_entry_up->m_declaration = unique_decl;
-        unique_ast_entry_up->m_byte_size = byte_size.getValueOr(0);
-        dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename,
-                                                 *unique_ast_entry_up);
-
-        if (is_forward_declaration && die.HasChildren()) {
-          // Check to see if the DIE actually has a definition, some version of
-          // GCC will
-          // emit DIEs with DW_AT_declaration set to true, but yet still have
-          // subprogram, members, or inheritance, so we can't trust it
-          DWARFDIE child_die = die.GetFirstChild();
-          while (child_die) {
-            switch (child_die.Tag()) {
-            case DW_TAG_inheritance:
-            case DW_TAG_subprogram:
-            case DW_TAG_member:
-            case DW_TAG_APPLE_property:
-            case DW_TAG_class_type:
-            case DW_TAG_structure_type:
-            case DW_TAG_enumeration_type:
-            case DW_TAG_typedef:
-            case DW_TAG_union_type:
-              child_die.Clear();
-              is_forward_declaration = false;
-              break;
-            default:
-              child_die = child_die.GetSibling();
-              break;
-            }
+        // Leave this as a forward declaration until we need to know the
+        // details of the type. lldb_private::Type will automatically call
+        // the SymbolFile virtual function
+        // "SymbolFileDWARF::CompleteType(Type *)" When the definition
+        // needs to be defined.
+        assert(!dwarf->GetForwardDeclClangTypeToDie().count(
+                   ClangUtil::RemoveFastQualifiers(clang_type)
+                       .GetOpaqueQualType()) &&
+               "Type already in the forward declaration map!");
+        // Can't assume m_ast.GetSymbolFile() is actually a
+        // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple
+        // binaries.
+        dwarf->GetForwardDeclDieToClangType()[die.GetDIE()] =
+            clang_type.GetOpaqueQualType();
+        dwarf->GetForwardDeclClangTypeToDie()
+            [ClangUtil::RemoveFastQualifiers(clang_type).GetOpaqueQualType()] =
+            die.GetDIERef();
+        m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
+      }
+    }
+
+    // If we made a clang type, set the trivial abi if applicable: We only
+    // do this for pass by value - which implies the Trivial ABI. There
+    // isn't a way to assert that something that would normally be pass by
+    // value is pass by reference, so we ignore that attribute if set.
+    if (calling_convention == llvm::dwarf::DW_CC_pass_by_value) {
+      clang::CXXRecordDecl *record_decl =
+          m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
+      if (record_decl) {
+        record_decl->setHasTrivialSpecialMemberForCall();
+      }
+    }
+
+    if (calling_convention == llvm::dwarf::DW_CC_pass_by_reference) {
+      clang::CXXRecordDecl *record_decl =
+          m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
+      if (record_decl)
+        record_decl->setArgPassingRestrictions(
+            clang::RecordDecl::APK_CannotPassInRegs);
+    }
+
+  } break;
+
+  case DW_TAG_enumeration_type: {
+    // Set a bit that lets us know that we are currently parsing this
+    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+
+    bool is_scoped = false;
+    DWARFFormValue encoding_form;
+
+    const size_t num_attributes = die.GetAttributes(attributes);
+    if (num_attributes > 0) {
+      uint32_t i;
+
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_decl_file:
+            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
+                form_value.Unsigned()));
+            break;
+          case DW_AT_decl_line:
+            decl.SetLine(form_value.Unsigned());
+            break;
+          case DW_AT_decl_column:
+            decl.SetColumn(form_value.Unsigned());
+            break;
+          case DW_AT_name:
+            type_name_cstr = form_value.AsCString();
+            type_name_const_str.SetCString(type_name_cstr);
+            break;
+          case DW_AT_type:
+            encoding_form = form_value;
+            break;
+          case DW_AT_byte_size:
+            byte_size = form_value.Unsigned();
+            break;
+          case DW_AT_accessibility:
+            break; // accessibility =
+                   // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
+          case DW_AT_declaration:
+            is_forward_declaration = form_value.Boolean();
+            break;
+          case DW_AT_enum_class:
+            is_scoped = form_value.Boolean();
+            break;
+          case DW_AT_allocated:
+          case DW_AT_associated:
+          case DW_AT_bit_stride:
+          case DW_AT_byte_stride:
+          case DW_AT_data_location:
+          case DW_AT_description:
+          case DW_AT_start_scope:
+          case DW_AT_visibility:
+          case DW_AT_specification:
+          case DW_AT_abstract_origin:
+          case DW_AT_sibling:
+            break;
           }
         }
+      }
 
-        if (!is_forward_declaration) {
-          // Always start the definition for a class type so that if the class
-          // has child classes or types that require the class to be created
-          // for use as their decl contexts the class will be ready to accept
-          // these child definitions.
-          if (!die.HasChildren()) {
-            // No children for this struct/union/class, lets finish it
-            if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
-              ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
-            } else {
-              dwarf->GetObjectFile()->GetModule()->ReportError(
-                  "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
-                  "definition.\nPlease file a bug and attach the file at the "
-                  "start of this error message",
-                  die.GetOffset(), type_name_cstr);
-            }
+      if (is_forward_declaration) {
+        type_sp = ParseTypeFromDWO(die, log);
+        if (type_sp)
+          return type_sp;
 
-            if (tag == DW_TAG_structure_type) // this only applies in C
-            {
-              clang::RecordDecl *record_decl =
-                  ClangASTContext::GetAsRecordDecl(clang_type);
+        DWARFDeclContext die_decl_ctx;
+        die.GetDWARFDeclContext(die_decl_ctx);
 
-              if (record_decl) {
-                GetClangASTImporter().InsertRecordDecl(
-                    record_decl, ClangASTImporter::LayoutInfo());
-              }
-            }
-          } else if (clang_type_was_created) {
-            // Start the definition if the class is not objective C since the
-            // underlying decls respond to isCompleteDefinition(). Objective
-            // C decls don't respond to isCompleteDefinition() so we can't
-            // start the declaration definition right away. For C++
-            // class/union/structs we want to start the definition in case the
-            // class is needed as the declaration context for a contained class
-            // or type without the need to complete that type..
-
-            if (class_language != eLanguageTypeObjC &&
-                class_language != eLanguageTypeObjC_plus_plus)
-              ClangASTContext::StartTagDeclarationDefinition(clang_type);
-
-            // Leave this as a forward declaration until we need to know the
-            // details of the type. lldb_private::Type will automatically call
-            // the SymbolFile virtual function
-            // "SymbolFileDWARF::CompleteType(Type *)" When the definition
-            // needs to be defined.
-            assert(!dwarf->GetForwardDeclClangTypeToDie().count(
-                       ClangUtil::RemoveFastQualifiers(clang_type)
-                           .GetOpaqueQualType()) &&
-                   "Type already in the forward declaration map!");
-            // Can't assume m_ast.GetSymbolFile() is actually a
-            // SymbolFileDWARF, it can be a SymbolFileDWARFDebugMap for Apple
-            // binaries.
-            dwarf->GetForwardDeclDieToClangType()[die.GetDIE()] =
-                clang_type.GetOpaqueQualType();
-            dwarf->GetForwardDeclClangTypeToDie()
-                [ClangUtil::RemoveFastQualifiers(clang_type)
-                     .GetOpaqueQualType()] = die.GetDIERef();
-            m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), true);
+        type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
+
+        if (!type_sp) {
+          SymbolFileDWARFDebugMap *debug_map_symfile =
+              dwarf->GetDebugMapSymfile();
+          if (debug_map_symfile) {
+            // We weren't able to find a full declaration in this DWARF,
+            // see if we have a declaration anywhere else...
+            type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
+                die_decl_ctx);
           }
         }
-        
-        // If we made a clang type, set the trivial abi if applicable: We only
-        // do this for pass by value - which implies the Trivial ABI. There
-        // isn't a way to assert that something that would normally be pass by
-        // value is pass by reference, so we ignore that attribute if set.
-        if (calling_convention == llvm::dwarf::DW_CC_pass_by_value) {
-          clang::CXXRecordDecl *record_decl =
-                  m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
-          if (record_decl) {
-            record_decl->setHasTrivialSpecialMemberForCall();
+
+        if (type_sp) {
+          if (log) {
+            dwarf->GetObjectFile()->GetModule()->LogMessage(
+                log,
+                "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
+                "forward declaration, complete type is 0x%8.8" PRIx64,
+                static_cast<void *>(this), die.GetOffset(),
+                DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
           }
+
+          // We found a real definition for this type elsewhere so lets use
+          // it and cache the fact that we found a complete type for this
+          // die
+          dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+          clang::DeclContext *defn_decl_ctx =
+              GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
+          if (defn_decl_ctx)
+            LinkDeclContextToDIE(defn_decl_ctx, die);
+          return type_sp;
+        }
+      }
+      DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                   DW_TAG_value_to_name(tag), type_name_cstr);
+
+      CompilerType enumerator_clang_type;
+      clang_type.SetCompilerType(
+          &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
+      if (!clang_type) {
+        if (encoding_form.IsValid()) {
+          Type *enumerator_type = dwarf->ResolveTypeUID(DIERef(encoding_form));
+          if (enumerator_type)
+            enumerator_clang_type = enumerator_type->GetFullCompilerType();
         }
 
-        if (calling_convention == llvm::dwarf::DW_CC_pass_by_reference) {
-          clang::CXXRecordDecl *record_decl =
-              m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
-          if (record_decl)
-            record_decl->setArgPassingRestrictions(
-                clang::RecordDecl::APK_CannotPassInRegs);
+        if (!enumerator_clang_type) {
+          if (byte_size) {
+            enumerator_clang_type =
+                m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
+                    NULL, DW_ATE_signed, *byte_size * 8);
+          } else {
+            enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt);
+          }
         }
 
-      } break;
+        clang_type = m_ast.CreateEnumerationType(
+            type_name_cstr, GetClangDeclContextContainingDIE(die, nullptr),
+            decl, enumerator_clang_type, is_scoped);
+      } else {
+        enumerator_clang_type =
+            m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType());
+      }
 
-      case DW_TAG_enumeration_type: {
-        // Set a bit that lets us know that we are currently parsing this
-        dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+      LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type),
+                           die);
 
-        bool is_scoped = false;
-        DWARFFormValue encoding_form;
+      type_sp = std::make_shared<Type>(
+          die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
+          dwarf->GetUID(DIERef(encoding_form)), Type::eEncodingIsUID, &decl,
+          clang_type, Type::eResolveStateForward);
 
-        const size_t num_attributes = die.GetAttributes(attributes);
-        if (num_attributes > 0) {
-          uint32_t i;
-
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_decl_file:
-                decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                    form_value.Unsigned()));
-                break;
-              case DW_AT_decl_line:
-                decl.SetLine(form_value.Unsigned());
-                break;
-              case DW_AT_decl_column:
-                decl.SetColumn(form_value.Unsigned());
-                break;
-              case DW_AT_name:
-                type_name_cstr = form_value.AsCString();
-                type_name_const_str.SetCString(type_name_cstr);
-                break;
-              case DW_AT_type:
-                encoding_form = form_value;
-                break;
-              case DW_AT_byte_size:
-                byte_size = form_value.Unsigned();
-                break;
-              case DW_AT_accessibility:
-                break; // accessibility =
-                       // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
-              case DW_AT_declaration:
-                is_forward_declaration = form_value.Boolean();
-                break;
-              case DW_AT_enum_class:
-                is_scoped = form_value.Boolean();
-                break;
-              case DW_AT_allocated:
-              case DW_AT_associated:
-              case DW_AT_bit_stride:
-              case DW_AT_byte_stride:
-              case DW_AT_data_location:
-              case DW_AT_description:
-              case DW_AT_start_scope:
-              case DW_AT_visibility:
-              case DW_AT_specification:
-              case DW_AT_abstract_origin:
-              case DW_AT_sibling:
-                break;
-              }
-            }
-          }
+      if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
+        if (die.HasChildren()) {
+          SymbolContext cu_sc(die.GetLLDBCompileUnit());
+          bool is_signed = false;
+          enumerator_clang_type.IsIntegerType(is_signed);
+          ParseChildEnumerators(cu_sc, clang_type, is_signed,
+                                type_sp->GetByteSize().getValueOr(0), die);
+        }
+        ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
+      } else {
+        dwarf->GetObjectFile()->GetModule()->ReportError(
+            "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
+            "definition.\nPlease file a bug and attach the file at the "
+            "start of this error message",
+            die.GetOffset(), type_name_cstr);
+      }
+    }
+  } break;
 
-          if (is_forward_declaration) {
-            type_sp = ParseTypeFromDWO(die, log);
-            if (type_sp)
-              return type_sp;
-
-            DWARFDeclContext die_decl_ctx;
-            die.GetDWARFDeclContext(die_decl_ctx);
-
-            type_sp =
-                dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
-
-            if (!type_sp) {
-              SymbolFileDWARFDebugMap *debug_map_symfile =
-                  dwarf->GetDebugMapSymfile();
-              if (debug_map_symfile) {
-                // We weren't able to find a full declaration in this DWARF,
-                // see if we have a declaration anywhere else...
-                type_sp =
-                    debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
-                        die_decl_ctx);
-              }
-            }
+  case DW_TAG_inlined_subroutine:
+  case DW_TAG_subprogram:
+  case DW_TAG_subroutine_type: {
+    // Set a bit that lets us know that we are currently parsing this
+    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+
+    DWARFFormValue type_die_form;
+    bool is_variadic = false;
+    bool is_inline = false;
+    bool is_static = false;
+    bool is_virtual = false;
+    bool is_explicit = false;
+    bool is_artificial = false;
+    bool has_template_params = false;
+    DWARFFormValue specification_die_form;
+    DWARFFormValue abstract_origin_die_form;
+    DWARFDIE object_pointer_die;
+
+    unsigned type_quals = 0;
+    clang::StorageClass storage =
+        clang::SC_None; //, Extern, Static, PrivateExtern
 
-            if (type_sp) {
-              if (log) {
-                dwarf->GetObjectFile()->GetModule()->LogMessage(
-                    log, "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
-                         "forward declaration, complete type is 0x%8.8" PRIx64,
-                    static_cast<void *>(this), die.GetOffset(),
-                    DW_TAG_value_to_name(tag), type_name_cstr,
-                    type_sp->GetID());
-              }
+    const size_t num_attributes = die.GetAttributes(attributes);
+    if (num_attributes > 0) {
+      uint32_t i;
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_decl_file:
+            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
+                form_value.Unsigned()));
+            break;
+          case DW_AT_decl_line:
+            decl.SetLine(form_value.Unsigned());
+            break;
+          case DW_AT_decl_column:
+            decl.SetColumn(form_value.Unsigned());
+            break;
+          case DW_AT_name:
+            type_name_cstr = form_value.AsCString();
+            type_name_const_str.SetCString(type_name_cstr);
+            break;
 
-              // We found a real definition for this type elsewhere so lets use
-              // it and cache the fact that we found a complete type for this
-              // die
-              dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-              clang::DeclContext *defn_decl_ctx =
-                  GetCachedClangDeclContextForDIE(
-                      dwarf->GetDIE(type_sp->GetID()));
-              if (defn_decl_ctx)
-                LinkDeclContextToDIE(defn_decl_ctx, die);
-              return type_sp;
-            }
-          }
-          DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                       DW_TAG_value_to_name(tag), type_name_cstr);
-
-          CompilerType enumerator_clang_type;
-          clang_type.SetCompilerType(
-              &m_ast,
-              dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
-          if (!clang_type) {
-            if (encoding_form.IsValid()) {
-              Type *enumerator_type =
-                  dwarf->ResolveTypeUID(DIERef(encoding_form));
-              if (enumerator_type)
-                enumerator_clang_type = enumerator_type->GetFullCompilerType();
-            }
+          case DW_AT_linkage_name:
+          case DW_AT_MIPS_linkage_name:
+            mangled_name_cstr = form_value.AsCString();
+            break;
+          case DW_AT_type:
+            type_die_form = form_value;
+            break;
+          case DW_AT_accessibility:
+            accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
+            break;
+          case DW_AT_declaration:
+            break; // is_forward_declaration = form_value.Boolean(); break;
+          case DW_AT_inline:
+            is_inline = form_value.Boolean();
+            break;
+          case DW_AT_virtuality:
+            is_virtual = form_value.Boolean();
+            break;
+          case DW_AT_explicit:
+            is_explicit = form_value.Boolean();
+            break;
+          case DW_AT_artificial:
+            is_artificial = form_value.Boolean();
+            break;
 
-            if (!enumerator_clang_type) {
-              if (byte_size) {
-                enumerator_clang_type =
-                    m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
-                        NULL, DW_ATE_signed, *byte_size * 8);
-              } else {
-                enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt);
-              }
+          case DW_AT_external:
+            if (form_value.Unsigned()) {
+              if (storage == clang::SC_None)
+                storage = clang::SC_Extern;
+              else
+                storage = clang::SC_PrivateExtern;
             }
+            break;
 
-            clang_type = m_ast.CreateEnumerationType(
-                type_name_cstr, GetClangDeclContextContainingDIE(die, nullptr),
-                decl, enumerator_clang_type, is_scoped);
-          } else {
-            enumerator_clang_type =
-                m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType());
-          }
+          case DW_AT_specification:
+            specification_die_form = form_value;
+            break;
 
-          LinkDeclContextToDIE(
-              ClangASTContext::GetDeclContextForType(clang_type), die);
-
-          type_sp = std::make_shared<Type>(
-              die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-              dwarf->GetUID(DIERef(encoding_form)), Type::eEncodingIsUID, &decl,
-              clang_type, Type::eResolveStateForward);
-
-          if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
-            if (die.HasChildren()) {
-              SymbolContext cu_sc(die.GetLLDBCompileUnit());
-              bool is_signed = false;
-              enumerator_clang_type.IsIntegerType(is_signed);
-              ParseChildEnumerators(cu_sc, clang_type, is_signed,
-                                    type_sp->GetByteSize().getValueOr(0), die);
-            }
-            ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
-          } else {
-            dwarf->GetObjectFile()->GetModule()->ReportError(
-                "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
-                "definition.\nPlease file a bug and attach the file at the "
-                "start of this error message",
-                die.GetOffset(), type_name_cstr);
+          case DW_AT_abstract_origin:
+            abstract_origin_die_form = form_value;
+            break;
+
+          case DW_AT_object_pointer:
+            object_pointer_die = form_value.Reference();
+            break;
+
+          case DW_AT_allocated:
+          case DW_AT_associated:
+          case DW_AT_address_class:
+          case DW_AT_calling_convention:
+          case DW_AT_data_location:
+          case DW_AT_elemental:
+          case DW_AT_entry_pc:
+          case DW_AT_frame_base:
+          case DW_AT_high_pc:
+          case DW_AT_low_pc:
+          case DW_AT_prototyped:
+          case DW_AT_pure:
+          case DW_AT_ranges:
+          case DW_AT_recursive:
+          case DW_AT_return_addr:
+          case DW_AT_segment:
+          case DW_AT_start_scope:
+          case DW_AT_static_link:
+          case DW_AT_trampoline:
+          case DW_AT_visibility:
+          case DW_AT_vtable_elem_location:
+          case DW_AT_description:
+          case DW_AT_sibling:
+            break;
           }
         }
-      } break;
+      }
+    }
 
-      case DW_TAG_inlined_subroutine:
-      case DW_TAG_subprogram:
-      case DW_TAG_subroutine_type: {
-        // Set a bit that lets us know that we are currently parsing this
-        dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+    std::string object_pointer_name;
+    if (object_pointer_die) {
+      const char *object_pointer_name_cstr = object_pointer_die.GetName();
+      if (object_pointer_name_cstr)
+        object_pointer_name = object_pointer_name_cstr;
+    }
 
-        DWARFFormValue type_die_form;
-        bool is_variadic = false;
-        bool is_inline = false;
-        bool is_static = false;
-        bool is_virtual = false;
-        bool is_explicit = false;
-        bool is_artificial = false;
-        bool has_template_params = false;
-        DWARFFormValue specification_die_form;
-        DWARFFormValue abstract_origin_die_form;
-        DWARFDIE object_pointer_die;
+    DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                 DW_TAG_value_to_name(tag), type_name_cstr);
 
-        unsigned type_quals = 0;
-        clang::StorageClass storage =
-            clang::SC_None; //, Extern, Static, PrivateExtern
-
-        const size_t num_attributes = die.GetAttributes(attributes);
-        if (num_attributes > 0) {
-          uint32_t i;
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_decl_file:
-                decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                    form_value.Unsigned()));
-                break;
-              case DW_AT_decl_line:
-                decl.SetLine(form_value.Unsigned());
-                break;
-              case DW_AT_decl_column:
-                decl.SetColumn(form_value.Unsigned());
-                break;
-              case DW_AT_name:
-                type_name_cstr = form_value.AsCString();
-                type_name_const_str.SetCString(type_name_cstr);
-                break;
-
-              case DW_AT_linkage_name:
-              case DW_AT_MIPS_linkage_name:
-                mangled_name_cstr = form_value.AsCString();
-                break;
-              case DW_AT_type:
-                type_die_form = form_value;
-                break;
-              case DW_AT_accessibility:
-                accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
-                break;
-              case DW_AT_declaration:
-                break; // is_forward_declaration = form_value.Boolean(); break;
-              case DW_AT_inline:
-                is_inline = form_value.Boolean();
-                break;
-              case DW_AT_virtuality:
-                is_virtual = form_value.Boolean();
-                break;
-              case DW_AT_explicit:
-                is_explicit = form_value.Boolean();
-                break;
-              case DW_AT_artificial:
-                is_artificial = form_value.Boolean();
-                break;
-
-              case DW_AT_external:
-                if (form_value.Unsigned()) {
-                  if (storage == clang::SC_None)
-                    storage = clang::SC_Extern;
-                  else
-                    storage = clang::SC_PrivateExtern;
-                }
-                break;
-
-              case DW_AT_specification:
-                specification_die_form = form_value;
-                break;
-
-              case DW_AT_abstract_origin:
-                abstract_origin_die_form = form_value;
-                break;
-
-              case DW_AT_object_pointer:
-                object_pointer_die = form_value.Reference();
-                break;
-
-              case DW_AT_allocated:
-              case DW_AT_associated:
-              case DW_AT_address_class:
-              case DW_AT_calling_convention:
-              case DW_AT_data_location:
-              case DW_AT_elemental:
-              case DW_AT_entry_pc:
-              case DW_AT_frame_base:
-              case DW_AT_high_pc:
-              case DW_AT_low_pc:
-              case DW_AT_prototyped:
-              case DW_AT_pure:
-              case DW_AT_ranges:
-              case DW_AT_recursive:
-              case DW_AT_return_addr:
-              case DW_AT_segment:
-              case DW_AT_start_scope:
-              case DW_AT_static_link:
-              case DW_AT_trampoline:
-              case DW_AT_visibility:
-              case DW_AT_vtable_elem_location:
-              case DW_AT_description:
-              case DW_AT_sibling:
-                break;
-              }
-            }
-          }
-        }
+    CompilerType return_clang_type;
+    Type *func_type = NULL;
 
-        std::string object_pointer_name;
-        if (object_pointer_die) {
-          const char *object_pointer_name_cstr = object_pointer_die.GetName();
-          if (object_pointer_name_cstr)
-            object_pointer_name = object_pointer_name_cstr;
-        }
+    if (type_die_form.IsValid())
+      func_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
 
-        DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                     DW_TAG_value_to_name(tag), type_name_cstr);
+    if (func_type)
+      return_clang_type = func_type->GetForwardCompilerType();
+    else
+      return_clang_type = m_ast.GetBasicType(eBasicTypeVoid);
 
-        CompilerType return_clang_type;
-        Type *func_type = NULL;
+    std::vector<CompilerType> function_param_types;
+    std::vector<clang::ParmVarDecl *> function_param_decls;
 
-        if (type_die_form.IsValid())
-          func_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
+    // Parse the function children for the parameters
 
-        if (func_type)
-          return_clang_type = func_type->GetForwardCompilerType();
-        else
-          return_clang_type = m_ast.GetBasicType(eBasicTypeVoid);
+    DWARFDIE decl_ctx_die;
+    clang::DeclContext *containing_decl_ctx =
+        GetClangDeclContextContainingDIE(die, &decl_ctx_die);
+    const clang::Decl::Kind containing_decl_kind =
+        containing_decl_ctx->getDeclKind();
 
-        std::vector<CompilerType> function_param_types;
-        std::vector<clang::ParmVarDecl *> function_param_decls;
+    bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind);
+    // Start off static. This will be set to false in
+    // ParseChildParameters(...) if we find a "this" parameters as the
+    // first parameter
+    if (is_cxx_method) {
+      is_static = true;
+    }
 
-        // Parse the function children for the parameters
+    if (die.HasChildren()) {
+      bool skip_artificial = true;
+      ParseChildParameters(*sc.comp_unit, containing_decl_ctx, die,
+                           skip_artificial, is_static, is_variadic,
+                           has_template_params, function_param_types,
+                           function_param_decls, type_quals);
+    }
 
-        DWARFDIE decl_ctx_die;
-        clang::DeclContext *containing_decl_ctx =
-            GetClangDeclContextContainingDIE(die, &decl_ctx_die);
-        const clang::Decl::Kind containing_decl_kind =
-            containing_decl_ctx->getDeclKind();
-
-        bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind);
-        // Start off static. This will be set to false in
-        // ParseChildParameters(...) if we find a "this" parameters as the
-        // first parameter
-        if (is_cxx_method) {
-          is_static = true;
-        }
+    bool ignore_containing_context = false;
+    // Check for templatized class member functions. If we had any
+    // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter
+    // the DW_TAG_subprogram DIE, then we can't let this become a method in
+    // a class. Why? Because templatized functions are only emitted if one
+    // of the templatized methods is used in the current compile unit and
+    // we will end up with classes that may or may not include these member
+    // functions and this means one class won't match another class
+    // definition and it affects our ability to use a class in the clang
+    // expression parser. So for the greater good, we currently must not
+    // allow any template member functions in a class definition.
+    if (is_cxx_method && has_template_params) {
+      ignore_containing_context = true;
+      is_cxx_method = false;
+    }
 
-        if (die.HasChildren()) {
-          bool skip_artificial = true;
-          ParseChildParameters(*sc.comp_unit, containing_decl_ctx, die,
-                               skip_artificial, is_static, is_variadic,
-                               has_template_params, function_param_types,
-                               function_param_decls, type_quals);
-        }
+    // clang_type will get the function prototype clang type after this
+    // call
+    clang_type = m_ast.CreateFunctionType(
+        return_clang_type, function_param_types.data(),
+        function_param_types.size(), is_variadic, type_quals);
+
+    if (type_name_cstr) {
+      bool type_handled = false;
+      if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) {
+        ObjCLanguage::MethodName objc_method(type_name_cstr, true);
+        if (objc_method.IsValid(true)) {
+          CompilerType class_opaque_type;
+          ConstString class_name(objc_method.GetClassName());
+          if (class_name) {
+            TypeSP complete_objc_class_type_sp(
+                dwarf->FindCompleteObjCDefinitionTypeForDIE(DWARFDIE(),
+                                                            class_name, false));
+
+            if (complete_objc_class_type_sp) {
+              CompilerType type_clang_forward_type =
+                  complete_objc_class_type_sp->GetForwardCompilerType();
+              if (ClangASTContext::IsObjCObjectOrInterfaceType(
+                      type_clang_forward_type))
+                class_opaque_type = type_clang_forward_type;
+            }
+          }
 
-        bool ignore_containing_context = false;
-        // Check for templatized class member functions. If we had any
-        // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter
-        // the DW_TAG_subprogram DIE, then we can't let this become a method in
-        // a class. Why? Because templatized functions are only emitted if one
-        // of the templatized methods is used in the current compile unit and
-        // we will end up with classes that may or may not include these member
-        // functions and this means one class won't match another class
-        // definition and it affects our ability to use a class in the clang
-        // expression parser. So for the greater good, we currently must not
-        // allow any template member functions in a class definition.
-        if (is_cxx_method && has_template_params) {
-          ignore_containing_context = true;
-          is_cxx_method = false;
-        }
+          if (class_opaque_type) {
+            // If accessibility isn't set to anything valid, assume public
+            // for now...
+            if (accessibility == eAccessNone)
+              accessibility = eAccessPublic;
 
-        // clang_type will get the function prototype clang type after this
-        // call
-        clang_type = m_ast.CreateFunctionType(
-            return_clang_type, function_param_types.data(),
-            function_param_types.size(), is_variadic, type_quals);
-
-        if (type_name_cstr) {
-          bool type_handled = false;
-          if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) {
-            ObjCLanguage::MethodName objc_method(type_name_cstr, true);
-            if (objc_method.IsValid(true)) {
-              CompilerType class_opaque_type;
-              ConstString class_name(objc_method.GetClassName());
-              if (class_name) {
-                TypeSP complete_objc_class_type_sp(
-                    dwarf->FindCompleteObjCDefinitionTypeForDIE(
-                        DWARFDIE(), class_name, false));
-
-                if (complete_objc_class_type_sp) {
-                  CompilerType type_clang_forward_type =
-                      complete_objc_class_type_sp->GetForwardCompilerType();
-                  if (ClangASTContext::IsObjCObjectOrInterfaceType(
-                          type_clang_forward_type))
-                    class_opaque_type = type_clang_forward_type;
+            clang::ObjCMethodDecl *objc_method_decl =
+                m_ast.AddMethodToObjCObjectType(
+                    class_opaque_type, type_name_cstr, clang_type,
+                    accessibility, is_artificial, is_variadic);
+            type_handled = objc_method_decl != NULL;
+            if (type_handled) {
+              LinkDeclContextToDIE(
+                  ClangASTContext::GetAsDeclContext(objc_method_decl), die);
+              m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID());
+            } else {
+              dwarf->GetObjectFile()->GetModule()->ReportError(
+                  "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), "
+                  "please file a bug and attach the file at the start of "
+                  "this error message",
+                  die.GetOffset(), tag, DW_TAG_value_to_name(tag));
+            }
+          }
+        } else if (is_cxx_method) {
+          // Look at the parent of this DIE and see if is is a class or
+          // struct and see if this is actually a C++ method
+          Type *class_type = dwarf->ResolveType(decl_ctx_die);
+          if (class_type) {
+            bool alternate_defn = false;
+            if (class_type->GetID() != decl_ctx_die.GetID() ||
+                decl_ctx_die.GetContainingDWOModuleDIE()) {
+              alternate_defn = true;
+
+              // We uniqued the parent class of this function to another
+              // class so we now need to associate all dies under
+              // "decl_ctx_die" to DIEs in the DIE for "class_type"...
+              DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
+
+              if (class_type_die) {
+                std::vector<DWARFDIE> failures;
+
+                CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
+                                           class_type, failures);
+
+                // FIXME do something with these failures that's smarter
+                // than
+                // just dropping them on the ground.  Unfortunately classes
+                // don't like having stuff added to them after their
+                // definitions are complete...
+
+                type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
+                if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
+                  type_sp = type_ptr->shared_from_this();
+                  break;
                 }
               }
+            }
 
-              if (class_opaque_type) {
-                // If accessibility isn't set to anything valid, assume public
-                // for now...
-                if (accessibility == eAccessNone)
-                  accessibility = eAccessPublic;
-
-                clang::ObjCMethodDecl *objc_method_decl =
-                    m_ast.AddMethodToObjCObjectType(
-                        class_opaque_type, type_name_cstr, clang_type,
-                        accessibility, is_artificial, is_variadic);
-                type_handled = objc_method_decl != NULL;
-                if (type_handled) {
-                  LinkDeclContextToDIE(
-                      ClangASTContext::GetAsDeclContext(objc_method_decl), die);
-                  m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID());
-                } else {
-                  dwarf->GetObjectFile()->GetModule()->ReportError(
-                      "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), "
-                      "please file a bug and attach the file at the start of "
-                      "this error message",
-                      die.GetOffset(), tag, DW_TAG_value_to_name(tag));
-                }
+            if (specification_die_form.IsValid()) {
+              // We have a specification which we are going to base our
+              // function prototype off of, so we need this type to be
+              // completed so that the m_die_to_decl_ctx for the method in
+              // the specification has a valid clang decl context.
+              class_type->GetForwardCompilerType();
+              // If we have a specification, then the function type should
+              // have been made with the specification and not with this
+              // die.
+              DWARFDIE spec_die =
+                  dwarf->DebugInfo()->GetDIE(DIERef(specification_die_form));
+              clang::DeclContext *spec_clang_decl_ctx =
+                  GetClangDeclContextForDIE(spec_die);
+              if (spec_clang_decl_ctx) {
+                LinkDeclContextToDIE(spec_clang_decl_ctx, die);
+              } else {
+                dwarf->GetObjectFile()->GetModule()->ReportWarning(
+                    "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x"
+                    ") has no decl\n",
+                    die.GetID(),
+                    specification_die_form.Reference().GetOffset());
               }
-            } else if (is_cxx_method) {
-              // Look at the parent of this DIE and see if is is a class or
-              // struct and see if this is actually a C++ method
-              Type *class_type = dwarf->ResolveType(decl_ctx_die);
-              if (class_type) {
-                bool alternate_defn = false;
-                if (class_type->GetID() != decl_ctx_die.GetID() ||
-                    decl_ctx_die.GetContainingDWOModuleDIE()) {
-                  alternate_defn = true;
-
-                  // We uniqued the parent class of this function to another
-                  // class so we now need to associate all dies under
-                  // "decl_ctx_die" to DIEs in the DIE for "class_type"...
-                  DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID());
-
-                  if (class_type_die) {
-                    std::vector<DWARFDIE> failures;
-
-                    CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die,
-                                               class_type, failures);
-
-                    // FIXME do something with these failures that's smarter
-                    // than
-                    // just dropping them on the ground.  Unfortunately classes
-                    // don't like having stuff added to them after their
-                    // definitions are complete...
-
-                    type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
-                    if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
-                      type_sp = type_ptr->shared_from_this();
-                      break;
-                    }
-                  }
-                }
-
-                if (specification_die_form.IsValid()) {
-                  // We have a specification which we are going to base our
-                  // function prototype off of, so we need this type to be
-                  // completed so that the m_die_to_decl_ctx for the method in
-                  // the specification has a valid clang decl context.
-                  class_type->GetForwardCompilerType();
-                  // If we have a specification, then the function type should
-                  // have been made with the specification and not with this
-                  // die.
-                  DWARFDIE spec_die = dwarf->DebugInfo()->GetDIE(
-                      DIERef(specification_die_form));
-                  clang::DeclContext *spec_clang_decl_ctx =
-                      GetClangDeclContextForDIE(spec_die);
-                  if (spec_clang_decl_ctx) {
-                    LinkDeclContextToDIE(spec_clang_decl_ctx, die);
-                  } else {
-                    dwarf->GetObjectFile()->GetModule()->ReportWarning(
-                        "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x"
-                        ") has no decl\n",
-                        die.GetID(),
-                        specification_die_form.Reference().GetOffset());
-                  }
-                  type_handled = true;
-                } else if (abstract_origin_die_form.IsValid()) {
-                  // We have a specification which we are going to base our
-                  // function prototype off of, so we need this type to be
-                  // completed so that the m_die_to_decl_ctx for the method in
-                  // the abstract origin has a valid clang decl context.
+              type_handled = true;
+            } else if (abstract_origin_die_form.IsValid()) {
+              // We have a specification which we are going to base our
+              // function prototype off of, so we need this type to be
+              // completed so that the m_die_to_decl_ctx for the method in
+              // the abstract origin has a valid clang decl context.
+              class_type->GetForwardCompilerType();
+
+              DWARFDIE abs_die =
+                  dwarf->DebugInfo()->GetDIE(DIERef(abstract_origin_die_form));
+              clang::DeclContext *abs_clang_decl_ctx =
+                  GetClangDeclContextForDIE(abs_die);
+              if (abs_clang_decl_ctx) {
+                LinkDeclContextToDIE(abs_clang_decl_ctx, die);
+              } else {
+                dwarf->GetObjectFile()->GetModule()->ReportWarning(
+                    "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x"
+                    ") has no decl\n",
+                    die.GetID(),
+                    abstract_origin_die_form.Reference().GetOffset());
+              }
+              type_handled = true;
+            } else {
+              CompilerType class_opaque_type =
                   class_type->GetForwardCompilerType();
-
-                  DWARFDIE abs_die = dwarf->DebugInfo()->GetDIE(
-                      DIERef(abstract_origin_die_form));
-                  clang::DeclContext *abs_clang_decl_ctx =
-                      GetClangDeclContextForDIE(abs_die);
-                  if (abs_clang_decl_ctx) {
-                    LinkDeclContextToDIE(abs_clang_decl_ctx, die);
+              if (ClangASTContext::IsCXXClassType(class_opaque_type)) {
+                if (class_opaque_type.IsBeingDefined() || alternate_defn) {
+                  if (!is_static && !die.HasChildren()) {
+                    // We have a C++ member function with no children (this
+                    // pointer!) and clang will get mad if we try and make
+                    // a function that isn't well formed in the DWARF, so
+                    // we will just skip it...
+                    type_handled = true;
                   } else {
-                    dwarf->GetObjectFile()->GetModule()->ReportWarning(
-                        "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x"
-                        ") has no decl\n",
-                        die.GetID(),
-                        abstract_origin_die_form.Reference().GetOffset());
-                  }
-                  type_handled = true;
-                } else {
-                  CompilerType class_opaque_type =
-                      class_type->GetForwardCompilerType();
-                  if (ClangASTContext::IsCXXClassType(class_opaque_type)) {
-                    if (class_opaque_type.IsBeingDefined() || alternate_defn) {
-                      if (!is_static && !die.HasChildren()) {
-                        // We have a C++ member function with no children (this
-                        // pointer!) and clang will get mad if we try and make
-                        // a function that isn't well formed in the DWARF, so
-                        // we will just skip it...
-                        type_handled = true;
-                      } else {
-                        bool add_method = true;
-                        if (alternate_defn) {
-                          // If an alternate definition for the class exists,
-                          // then add the method only if an equivalent is not
-                          // already present.
-                          clang::CXXRecordDecl *record_decl =
-                              m_ast.GetAsCXXRecordDecl(
-                                  class_opaque_type.GetOpaqueQualType());
-                          if (record_decl) {
-                            for (auto method_iter = record_decl->method_begin();
-                                 method_iter != record_decl->method_end();
-                                 method_iter++) {
-                              clang::CXXMethodDecl *method_decl = *method_iter;
-                              if (method_decl->getNameInfo().getAsString() ==
-                                  std::string(type_name_cstr)) {
-                                if (method_decl->getType() ==
-                                    ClangUtil::GetQualType(clang_type)) {
-                                  add_method = false;
-                                  LinkDeclContextToDIE(
-                                      ClangASTContext::GetAsDeclContext(
-                                          method_decl),
-                                      die);
-                                  type_handled = true;
-
-                                  break;
-                                }
-                              }
+                    bool add_method = true;
+                    if (alternate_defn) {
+                      // If an alternate definition for the class exists,
+                      // then add the method only if an equivalent is not
+                      // already present.
+                      clang::CXXRecordDecl *record_decl =
+                          m_ast.GetAsCXXRecordDecl(
+                              class_opaque_type.GetOpaqueQualType());
+                      if (record_decl) {
+                        for (auto method_iter = record_decl->method_begin();
+                             method_iter != record_decl->method_end();
+                             method_iter++) {
+                          clang::CXXMethodDecl *method_decl = *method_iter;
+                          if (method_decl->getNameInfo().getAsString() ==
+                              std::string(type_name_cstr)) {
+                            if (method_decl->getType() ==
+                                ClangUtil::GetQualType(clang_type)) {
+                              add_method = false;
+                              LinkDeclContextToDIE(
+                                  ClangASTContext::GetAsDeclContext(
+                                      method_decl),
+                                  die);
+                              type_handled = true;
+
+                              break;
                             }
                           }
                         }
+                      }
+                    }
 
-                        if (add_method) {
-                          llvm::PrettyStackTraceFormat stack_trace(
-                              "SymbolFileDWARF::ParseType() is adding a method "
-                              "%s to class %s in DIE 0x%8.8" PRIx64 " from %s",
-                              type_name_cstr,
-                              class_type->GetName().GetCString(), die.GetID(),
-                              dwarf->GetObjectFile()
-                                  ->GetFileSpec()
-                                  .GetPath()
-                                  .c_str());
-
-                          const bool is_attr_used = false;
-                          // Neither GCC 4.2 nor clang++ currently set a valid
-                          // accessibility in the DWARF for C++ methods...
-                          // Default to public for now...
-                          if (accessibility == eAccessNone)
-                            accessibility = eAccessPublic;
-
-                          clang::CXXMethodDecl *cxx_method_decl =
-                              m_ast.AddMethodToCXXRecordType(
-                                  class_opaque_type.GetOpaqueQualType(),
-                                  type_name_cstr, mangled_name_cstr, clang_type,
-                                  accessibility, is_virtual, is_static,
-                                  is_inline, is_explicit, is_attr_used,
-                                  is_artificial);
-
-                          type_handled = cxx_method_decl != NULL;
-
-                          if (type_handled) {
-                            LinkDeclContextToDIE(
-                                ClangASTContext::GetAsDeclContext(
-                                    cxx_method_decl),
-                                die);
-
-                            ClangASTMetadata metadata;
-                            metadata.SetUserID(die.GetID());
-
-                            if (!object_pointer_name.empty()) {
-                              metadata.SetObjectPtrName(
-                                  object_pointer_name.c_str());
-                              if (log)
-                                log->Printf(
-                                    "Setting object pointer name: %s on method "
-                                    "object %p.\n",
-                                    object_pointer_name.c_str(),
-                                    static_cast<void *>(cxx_method_decl));
-                            }
-                            m_ast.SetMetadata(cxx_method_decl, metadata);
-                          } else {
-                            ignore_containing_context = true;
-                          }
+                    if (add_method) {
+                      llvm::PrettyStackTraceFormat stack_trace(
+                          "SymbolFileDWARF::ParseType() is adding a method "
+                          "%s to class %s in DIE 0x%8.8" PRIx64 " from %s",
+                          type_name_cstr, class_type->GetName().GetCString(),
+                          die.GetID(),
+                          dwarf->GetObjectFile()
+                              ->GetFileSpec()
+                              .GetPath()
+                              .c_str());
+
+                      const bool is_attr_used = false;
+                      // Neither GCC 4.2 nor clang++ currently set a valid
+                      // accessibility in the DWARF for C++ methods...
+                      // Default to public for now...
+                      if (accessibility == eAccessNone)
+                        accessibility = eAccessPublic;
+
+                      clang::CXXMethodDecl *cxx_method_decl =
+                          m_ast.AddMethodToCXXRecordType(
+                              class_opaque_type.GetOpaqueQualType(),
+                              type_name_cstr, mangled_name_cstr, clang_type,
+                              accessibility, is_virtual, is_static, is_inline,
+                              is_explicit, is_attr_used, is_artificial);
+
+                      type_handled = cxx_method_decl != NULL;
+
+                      if (type_handled) {
+                        LinkDeclContextToDIE(
+                            ClangASTContext::GetAsDeclContext(cxx_method_decl),
+                            die);
+
+                        ClangASTMetadata metadata;
+                        metadata.SetUserID(die.GetID());
+
+                        if (!object_pointer_name.empty()) {
+                          metadata.SetObjectPtrName(
+                              object_pointer_name.c_str());
+                          if (log)
+                            log->Printf(
+                                "Setting object pointer name: %s on method "
+                                "object %p.\n",
+                                object_pointer_name.c_str(),
+                                static_cast<void *>(cxx_method_decl));
                         }
+                        m_ast.SetMetadata(cxx_method_decl, metadata);
+                      } else {
+                        ignore_containing_context = true;
                       }
-                    } else {
-                      // We were asked to parse the type for a method in a
-                      // class, yet the class hasn't been asked to complete
-                      // itself through the clang::ExternalASTSource protocol,
-                      // so we need to just have the class complete itself and
-                      // do things the right way, then our
-                      // DIE should then have an entry in the
-                      // dwarf->GetDIEToType() map. First
-                      // we need to modify the dwarf->GetDIEToType() so it
-                      // doesn't think we are trying to parse this DIE
-                      // anymore...
-                      dwarf->GetDIEToType()[die.GetDIE()] = NULL;
-
-                      // Now we get the full type to force our class type to
-                      // complete itself using the clang::ExternalASTSource
-                      // protocol which will parse all base classes and all
-                      // methods (including the method for this DIE).
-                      class_type->GetFullCompilerType();
-
-                      // The type for this DIE should have been filled in the
-                      // function call above
-                      type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
-                      if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
-                        type_sp = type_ptr->shared_from_this();
-                        break;
-                      }
-
-                      // FIXME This is fixing some even uglier behavior but we
-                      // really need to
-                      // uniq the methods of each class as well as the class
-                      // itself. <rdar://problem/11240464>
-                      type_handled = true;
                     }
                   }
+                } else {
+                  // We were asked to parse the type for a method in a
+                  // class, yet the class hasn't been asked to complete
+                  // itself through the clang::ExternalASTSource protocol,
+                  // so we need to just have the class complete itself and
+                  // do things the right way, then our
+                  // DIE should then have an entry in the
+                  // dwarf->GetDIEToType() map. First
+                  // we need to modify the dwarf->GetDIEToType() so it
+                  // doesn't think we are trying to parse this DIE
+                  // anymore...
+                  dwarf->GetDIEToType()[die.GetDIE()] = NULL;
+
+                  // Now we get the full type to force our class type to
+                  // complete itself using the clang::ExternalASTSource
+                  // protocol which will parse all base classes and all
+                  // methods (including the method for this DIE).
+                  class_type->GetFullCompilerType();
+
+                  // The type for this DIE should have been filled in the
+                  // function call above
+                  type_ptr = dwarf->GetDIEToType()[die.GetDIE()];
+                  if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) {
+                    type_sp = type_ptr->shared_from_this();
+                    break;
+                  }
+
+                  // FIXME This is fixing some even uglier behavior but we
+                  // really need to
+                  // uniq the methods of each class as well as the class
+                  // itself. <rdar://problem/11240464>
+                  type_handled = true;
                 }
               }
             }
           }
+        }
+      }
 
-          if (!type_handled) {
-            clang::FunctionDecl *function_decl = nullptr;
-            clang::FunctionDecl *template_function_decl = nullptr;
-
-            if (abstract_origin_die_form.IsValid()) {
-              DWARFDIE abs_die = abstract_origin_die_form.Reference();
-
-              SymbolContext sc;
+      if (!type_handled) {
+        clang::FunctionDecl *function_decl = nullptr;
+        clang::FunctionDecl *template_function_decl = nullptr;
 
-              if (dwarf->ResolveType(abs_die)) {
-                function_decl = llvm::dyn_cast_or_null<clang::FunctionDecl>(
-                    GetCachedClangDeclContextForDIE(abs_die));
+        if (abstract_origin_die_form.IsValid()) {
+          DWARFDIE abs_die = abstract_origin_die_form.Reference();
 
-                if (function_decl) {
-                  LinkDeclContextToDIE(function_decl, die);
-                }
-              }
-            }
+          SymbolContext sc;
 
-            if (!function_decl) {
-              // We just have a function that isn't part of a class
-              function_decl = m_ast.CreateFunctionDeclaration(
-                  ignore_containing_context ? m_ast.GetTranslationUnitDecl()
-                                            : containing_decl_ctx,
-                  type_name_cstr, clang_type, storage, is_inline);
-
-              if (has_template_params) {
-                ClangASTContext::TemplateParameterInfos template_param_infos;
-                ParseTemplateParameterInfos(die, template_param_infos);
-                template_function_decl = m_ast.CreateFunctionDeclaration(
-                    ignore_containing_context ? m_ast.GetTranslationUnitDecl()
-                                              : containing_decl_ctx,
-                    type_name_cstr, clang_type, storage, is_inline);
-                clang::FunctionTemplateDecl *func_template_decl =
-                    m_ast.CreateFunctionTemplateDecl(
-                        containing_decl_ctx, template_function_decl,
-                        type_name_cstr, template_param_infos);
-                m_ast.CreateFunctionTemplateSpecializationInfo(
-                    function_decl, func_template_decl, template_param_infos);
-              }
-              
-              lldbassert(function_decl);
-
-              if (function_decl) {
-                LinkDeclContextToDIE(function_decl, die);
-
-                if (!function_param_decls.empty()) {
-                  m_ast.SetFunctionParameters(function_decl,
-                                              &function_param_decls.front(),
-                                              function_param_decls.size());
-                  if (template_function_decl)
-                    m_ast.SetFunctionParameters(template_function_decl,
-                                                &function_param_decls.front(),
-                                                function_param_decls.size());
-                }
+          if (dwarf->ResolveType(abs_die)) {
+            function_decl = llvm::dyn_cast_or_null<clang::FunctionDecl>(
+                GetCachedClangDeclContextForDIE(abs_die));
 
-                ClangASTMetadata metadata;
-                metadata.SetUserID(die.GetID());
-
-                if (!object_pointer_name.empty()) {
-                  metadata.SetObjectPtrName(object_pointer_name.c_str());
-                  if (log)
-                    log->Printf("Setting object pointer name: %s on function "
-                                "object %p.",
-                                object_pointer_name.c_str(),
-                                static_cast<void *>(function_decl));
-                }
-                m_ast.SetMetadata(function_decl, metadata);
-              }
+            if (function_decl) {
+              LinkDeclContextToDIE(function_decl, die);
             }
           }
         }
-        type_sp = std::make_shared<Type>(
-            die.GetID(), dwarf, type_name_const_str, llvm::None, nullptr,
-            LLDB_INVALID_UID, Type::eEncodingIsUID, &decl, clang_type,
-            Type::eResolveStateFull);
-        assert(type_sp.get());
-      } break;
-
-      case DW_TAG_array_type: {
-        // Set a bit that lets us know that we are currently parsing this
-        dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
-        DWARFFormValue type_die_form;
-        uint32_t byte_stride = 0;
-        uint32_t bit_stride = 0;
-        bool is_vector = false;
-        const size_t num_attributes = die.GetAttributes(attributes);
-
-        if (num_attributes > 0) {
-          uint32_t i;
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_decl_file:
-                decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                    form_value.Unsigned()));
-                break;
-              case DW_AT_decl_line:
-                decl.SetLine(form_value.Unsigned());
-                break;
-              case DW_AT_decl_column:
-                decl.SetColumn(form_value.Unsigned());
-                break;
-              case DW_AT_name:
-                type_name_cstr = form_value.AsCString();
-                type_name_const_str.SetCString(type_name_cstr);
-                break;
-
-              case DW_AT_type:
-                type_die_form = form_value;
-                break;
-              case DW_AT_byte_size:
-                break; // byte_size = form_value.Unsigned(); break;
-              case DW_AT_byte_stride:
-                byte_stride = form_value.Unsigned();
-                break;
-              case DW_AT_bit_stride:
-                bit_stride = form_value.Unsigned();
-                break;
-              case DW_AT_GNU_vector:
-                is_vector = form_value.Boolean();
-                break;
-              case DW_AT_accessibility:
-                break; // accessibility =
-                       // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
-              case DW_AT_declaration:
-                break; // is_forward_declaration = form_value.Boolean(); break;
-              case DW_AT_allocated:
-              case DW_AT_associated:
-              case DW_AT_data_location:
-              case DW_AT_description:
-              case DW_AT_ordering:
-              case DW_AT_start_scope:
-              case DW_AT_visibility:
-              case DW_AT_specification:
-              case DW_AT_abstract_origin:
-              case DW_AT_sibling:
-                break;
-              }
-            }
+
+        if (!function_decl) {
+          // We just have a function that isn't part of a class
+          function_decl = m_ast.CreateFunctionDeclaration(
+              ignore_containing_context ? m_ast.GetTranslationUnitDecl()
+                                        : containing_decl_ctx,
+              type_name_cstr, clang_type, storage, is_inline);
+
+          if (has_template_params) {
+            ClangASTContext::TemplateParameterInfos template_param_infos;
+            ParseTemplateParameterInfos(die, template_param_infos);
+            template_function_decl = m_ast.CreateFunctionDeclaration(
+                ignore_containing_context ? m_ast.GetTranslationUnitDecl()
+                                          : containing_decl_ctx,
+                type_name_cstr, clang_type, storage, is_inline);
+            clang::FunctionTemplateDecl *func_template_decl =
+                m_ast.CreateFunctionTemplateDecl(
+                    containing_decl_ctx, template_function_decl, type_name_cstr,
+                    template_param_infos);
+            m_ast.CreateFunctionTemplateSpecializationInfo(
+                function_decl, func_template_decl, template_param_infos);
           }
 
-          DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                       DW_TAG_value_to_name(tag), type_name_cstr);
+          lldbassert(function_decl);
 
-          DIERef type_die_ref(type_die_form);
-          Type *element_type = dwarf->ResolveTypeUID(type_die_ref);
+          if (function_decl) {
+            LinkDeclContextToDIE(function_decl, die);
 
-          if (element_type) {
-            auto array_info = ParseChildArrayInfo(die);
-            if (array_info) {
-              byte_stride = array_info->byte_stride;
-              bit_stride = array_info->bit_stride;
+            if (!function_param_decls.empty()) {
+              m_ast.SetFunctionParameters(function_decl,
+                                          &function_param_decls.front(),
+                                          function_param_decls.size());
+              if (template_function_decl)
+                m_ast.SetFunctionParameters(template_function_decl,
+                                            &function_param_decls.front(),
+                                            function_param_decls.size());
             }
-            if (byte_stride == 0 && bit_stride == 0)
-              byte_stride = element_type->GetByteSize().getValueOr(0);
-            CompilerType array_element_type =
-                element_type->GetForwardCompilerType();
-
-            if (ClangASTContext::IsCXXClassType(array_element_type) &&
-                !array_element_type.GetCompleteType()) {
-              ModuleSP module_sp = die.GetModule();
-              if (module_sp) {
-                if (die.GetCU()->GetProducer() == eProducerClang)
-                  module_sp->ReportError(
-                      "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
-                      "class/union/struct element type DIE 0x%8.8x that is a "
-                      "forward declaration, not a complete definition.\nTry "
-                      "compiling the source file with -fstandalone-debug or "
-                      "disable -gmodules",
-                      die.GetOffset(), type_die_ref.die_offset);
-                else
-                  module_sp->ReportError(
-                      "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
-                      "class/union/struct element type DIE 0x%8.8x that is a "
-                      "forward declaration, not a complete definition.\nPlease "
-                      "file a bug against the compiler and include the "
-                      "preprocessed output for %s",
-                      die.GetOffset(), type_die_ref.die_offset,
-                      die.GetLLDBCompileUnit()
-                          ? die.GetLLDBCompileUnit()->GetPath().c_str()
-                          : "the source file");
-              }
 
-              // We have no choice other than to pretend that the element class
-              // type is complete. If we don't do this, clang will crash when
-              // trying to layout the class. Since we provide layout
-              // assistance, all ivars in this class and other classes will be
-              // fine, this is the best we can do short of crashing.
-              if (ClangASTContext::StartTagDeclarationDefinition(
-                      array_element_type)) {
-                ClangASTContext::CompleteTagDeclarationDefinition(
-                    array_element_type);
-              } else {
-                module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to "
-                                       "start its definition.\nPlease file a "
-                                       "bug and attach the file at the start "
-                                       "of this error message",
-                                       type_die_ref.die_offset);
-              }
-            }
+            ClangASTMetadata metadata;
+            metadata.SetUserID(die.GetID());
 
-            uint64_t array_element_bit_stride = byte_stride * 8 + bit_stride;
-            if (array_info && array_info->element_orders.size() > 0) {
-              uint64_t num_elements = 0;
-              auto end = array_info->element_orders.rend();
-              for (auto pos = array_info->element_orders.rbegin(); pos != end;
-                   ++pos) {
-                num_elements = *pos;
-                clang_type = m_ast.CreateArrayType(array_element_type,
-                                                   num_elements, is_vector);
-                array_element_type = clang_type;
-                array_element_bit_stride =
-                    num_elements ? array_element_bit_stride * num_elements
-                                 : array_element_bit_stride;
-              }
-            } else {
-              clang_type =
-                  m_ast.CreateArrayType(array_element_type, 0, is_vector);
+            if (!object_pointer_name.empty()) {
+              metadata.SetObjectPtrName(object_pointer_name.c_str());
+              if (log)
+                log->Printf("Setting object pointer name: %s on function "
+                            "object %p.",
+                            object_pointer_name.c_str(),
+                            static_cast<void *>(function_decl));
             }
-            ConstString empty_name;
-            type_sp = std::make_shared<Type>(
-                die.GetID(), dwarf, empty_name, array_element_bit_stride / 8,
-                nullptr, dwarf->GetUID(DIERef(type_die_form)),
-                Type::eEncodingIsUID, &decl, clang_type,
-                Type::eResolveStateFull);
-            type_sp->SetEncodingType(element_type);
-            m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(),
-                                      die.GetID());
+            m_ast.SetMetadata(function_decl, metadata);
           }
         }
-      } break;
-
-      case DW_TAG_ptr_to_member_type: {
-        DWARFFormValue type_die_form;
-        DWARFFormValue containing_type_die_form;
-
-        const size_t num_attributes = die.GetAttributes(attributes);
-
-        if (num_attributes > 0) {
-          uint32_t i;
-          for (i = 0; i < num_attributes; ++i) {
-            attr = attributes.AttributeAtIndex(i);
-            if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-              switch (attr) {
-              case DW_AT_type:
-                type_die_form = form_value;
-                break;
-              case DW_AT_containing_type:
-                containing_type_die_form = form_value;
-                break;
-              }
-            }
+      }
+    }
+    type_sp = std::make_shared<Type>(die.GetID(), dwarf, type_name_const_str,
+                                     llvm::None, nullptr, LLDB_INVALID_UID,
+                                     Type::eEncodingIsUID, &decl, clang_type,
+                                     Type::eResolveStateFull);
+    assert(type_sp.get());
+  } break;
+
+  case DW_TAG_array_type: {
+    // Set a bit that lets us know that we are currently parsing this
+    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+
+    DWARFFormValue type_die_form;
+    uint32_t byte_stride = 0;
+    uint32_t bit_stride = 0;
+    bool is_vector = false;
+    const size_t num_attributes = die.GetAttributes(attributes);
+
+    if (num_attributes > 0) {
+      uint32_t i;
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_decl_file:
+            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
+                form_value.Unsigned()));
+            break;
+          case DW_AT_decl_line:
+            decl.SetLine(form_value.Unsigned());
+            break;
+          case DW_AT_decl_column:
+            decl.SetColumn(form_value.Unsigned());
+            break;
+          case DW_AT_name:
+            type_name_cstr = form_value.AsCString();
+            type_name_const_str.SetCString(type_name_cstr);
+            break;
+
+          case DW_AT_type:
+            type_die_form = form_value;
+            break;
+          case DW_AT_byte_size:
+            break; // byte_size = form_value.Unsigned(); break;
+          case DW_AT_byte_stride:
+            byte_stride = form_value.Unsigned();
+            break;
+          case DW_AT_bit_stride:
+            bit_stride = form_value.Unsigned();
+            break;
+          case DW_AT_GNU_vector:
+            is_vector = form_value.Boolean();
+            break;
+          case DW_AT_accessibility:
+            break; // accessibility =
+                   // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
+          case DW_AT_declaration:
+            break; // is_forward_declaration = form_value.Boolean(); break;
+          case DW_AT_allocated:
+          case DW_AT_associated:
+          case DW_AT_data_location:
+          case DW_AT_description:
+          case DW_AT_ordering:
+          case DW_AT_start_scope:
+          case DW_AT_visibility:
+          case DW_AT_specification:
+          case DW_AT_abstract_origin:
+          case DW_AT_sibling:
+            break;
           }
+        }
+      }
 
-          Type *pointee_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
-          Type *class_type =
-              dwarf->ResolveTypeUID(DIERef(containing_type_die_form));
+      DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                   DW_TAG_value_to_name(tag), type_name_cstr);
 
-          CompilerType pointee_clang_type =
-              pointee_type->GetForwardCompilerType();
-          CompilerType class_clang_type = class_type->GetLayoutCompilerType();
+      DIERef type_die_ref(type_die_form);
+      Type *element_type = dwarf->ResolveTypeUID(type_die_ref);
 
-          clang_type = ClangASTContext::CreateMemberPointerType(
-              class_clang_type, pointee_clang_type);
+      if (element_type) {
+        auto array_info = ParseChildArrayInfo(die);
+        if (array_info) {
+          byte_stride = array_info->byte_stride;
+          bit_stride = array_info->bit_stride;
+        }
+        if (byte_stride == 0 && bit_stride == 0)
+          byte_stride = element_type->GetByteSize().getValueOr(0);
+        CompilerType array_element_type =
+            element_type->GetForwardCompilerType();
+
+        if (ClangASTContext::IsCXXClassType(array_element_type) &&
+            !array_element_type.GetCompleteType()) {
+          ModuleSP module_sp = die.GetModule();
+          if (module_sp) {
+            if (die.GetCU()->GetProducer() == eProducerClang)
+              module_sp->ReportError(
+                  "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
+                  "class/union/struct element type DIE 0x%8.8x that is a "
+                  "forward declaration, not a complete definition.\nTry "
+                  "compiling the source file with -fstandalone-debug or "
+                  "disable -gmodules",
+                  die.GetOffset(), type_die_ref.die_offset);
+            else
+              module_sp->ReportError(
+                  "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
+                  "class/union/struct element type DIE 0x%8.8x that is a "
+                  "forward declaration, not a complete definition.\nPlease "
+                  "file a bug against the compiler and include the "
+                  "preprocessed output for %s",
+                  die.GetOffset(), type_die_ref.die_offset,
+                  die.GetLLDBCompileUnit()
+                      ? die.GetLLDBCompileUnit()->GetPath().c_str()
+                      : "the source file");
+          }
 
-          if (llvm::Optional<uint64_t> clang_type_size =
-                  clang_type.GetByteSize(nullptr)) {
-            byte_size = *clang_type_size;
-            type_sp = std::make_shared<Type>(
-                die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-                LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type,
-                Type::eResolveStateForward);
+          // We have no choice other than to pretend that the element class
+          // type is complete. If we don't do this, clang will crash when
+          // trying to layout the class. Since we provide layout
+          // assistance, all ivars in this class and other classes will be
+          // fine, this is the best we can do short of crashing.
+          if (ClangASTContext::StartTagDeclarationDefinition(
+                  array_element_type)) {
+            ClangASTContext::CompleteTagDeclarationDefinition(
+                array_element_type);
+          } else {
+            module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to "
+                                   "start its definition.\nPlease file a "
+                                   "bug and attach the file at the start "
+                                   "of this error message",
+                                   type_die_ref.die_offset);
           }
         }
 
-        break;
-      }
-      default:
-        dwarf->GetObjectFile()->GetModule()->ReportError(
-            "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and "
-            "attach the file at the start of this error message",
-            die.GetOffset(), tag, DW_TAG_value_to_name(tag));
-        break;
+        uint64_t array_element_bit_stride = byte_stride * 8 + bit_stride;
+        if (array_info && array_info->element_orders.size() > 0) {
+          uint64_t num_elements = 0;
+          auto end = array_info->element_orders.rend();
+          for (auto pos = array_info->element_orders.rbegin(); pos != end;
+               ++pos) {
+            num_elements = *pos;
+            clang_type = m_ast.CreateArrayType(array_element_type, num_elements,
+                                               is_vector);
+            array_element_type = clang_type;
+            array_element_bit_stride =
+                num_elements ? array_element_bit_stride * num_elements
+                             : array_element_bit_stride;
+          }
+        } else {
+          clang_type = m_ast.CreateArrayType(array_element_type, 0, is_vector);
+        }
+        ConstString empty_name;
+        type_sp = std::make_shared<Type>(
+            die.GetID(), dwarf, empty_name, array_element_bit_stride / 8,
+            nullptr, dwarf->GetUID(DIERef(type_die_form)), Type::eEncodingIsUID,
+            &decl, clang_type, Type::eResolveStateFull);
+        type_sp->SetEncodingType(element_type);
+        m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID());
       }
+    }
+  } break;
 
-      if (type_sp.get()) {
-        DWARFDIE sc_parent_die =
-            SymbolFileDWARF::GetParentSymbolContextDIE(die);
-        dw_tag_t sc_parent_tag = sc_parent_die.Tag();
-
-        SymbolContextScope *symbol_context_scope = NULL;
-        if (sc_parent_tag == DW_TAG_compile_unit ||
-            sc_parent_tag == DW_TAG_partial_unit) {
-          symbol_context_scope = sc.comp_unit;
-        } else if (sc.function != NULL && sc_parent_die) {
-          symbol_context_scope =
-              sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID());
-          if (symbol_context_scope == NULL)
-            symbol_context_scope = sc.function;
-        }
+  case DW_TAG_ptr_to_member_type: {
+    DWARFFormValue type_die_form;
+    DWARFFormValue containing_type_die_form;
 
-        if (symbol_context_scope != NULL) {
-          type_sp->SetSymbolContextScope(symbol_context_scope);
+    const size_t num_attributes = die.GetAttributes(attributes);
+
+    if (num_attributes > 0) {
+      uint32_t i;
+      for (i = 0; i < num_attributes; ++i) {
+        attr = attributes.AttributeAtIndex(i);
+        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+          switch (attr) {
+          case DW_AT_type:
+            type_die_form = form_value;
+            break;
+          case DW_AT_containing_type:
+            containing_type_die_form = form_value;
+            break;
+          }
         }
+      }
 
-        // We are ready to put this type into the uniqued list up at the module
-        // level
-        type_list->Insert(type_sp);
+      Type *pointee_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
+      Type *class_type =
+          dwarf->ResolveTypeUID(DIERef(containing_type_die_form));
 
-        dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+      CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType();
+      CompilerType class_clang_type = class_type->GetLayoutCompilerType();
+
+      clang_type = ClangASTContext::CreateMemberPointerType(class_clang_type,
+                                                            pointee_clang_type);
+
+      if (llvm::Optional<uint64_t> clang_type_size =
+              clang_type.GetByteSize(nullptr)) {
+        byte_size = *clang_type_size;
+        type_sp = std::make_shared<Type>(
+            die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
+            LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type,
+            Type::eResolveStateForward);
       }
-    } else if (type_ptr != DIE_IS_BEING_PARSED) {
-      type_sp = type_ptr->shared_from_this();
     }
+
+    break;
+  }
+  default:
+    dwarf->GetObjectFile()->GetModule()->ReportError(
+        "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and "
+        "attach the file at the start of this error message",
+        die.GetOffset(), tag, DW_TAG_value_to_name(tag));
+    break;
+  }
+
+  if (type_sp.get()) {
+    DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die);
+    dw_tag_t sc_parent_tag = sc_parent_die.Tag();
+
+    SymbolContextScope *symbol_context_scope = NULL;
+    if (sc_parent_tag == DW_TAG_compile_unit ||
+        sc_parent_tag == DW_TAG_partial_unit) {
+      symbol_context_scope = sc.comp_unit;
+    } else if (sc.function != NULL && sc_parent_die) {
+      symbol_context_scope =
+          sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID());
+      if (symbol_context_scope == NULL)
+        symbol_context_scope = sc.function;
+    }
+
+    if (symbol_context_scope != NULL) {
+      type_sp->SetSymbolContextScope(symbol_context_scope);
+    }
+
+    // We are ready to put this type into the uniqued list up at the module
+    // level
+    type_list->Insert(type_sp);
+
+    dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
   }
   return type_sp;
 }

From 77b4f0abb82c419fedca2ae3a20e1201f752b894 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 23 May 2019 09:50:18 +0000
Subject: [PATCH 0008/1176] [LLD][ELF] - Improve diagnostic about unrecognized
 relocations.

This is a minor improvement inspired by https://bugs.llvm.org/show_bug.cgi?id=38303.

A person reported that he observed message complaining about unsupported R_ARM_V4BX:
error: can't create dynamic relocation R_ARM_V4BX against local symbol in readonly segment; recompile object files with -fPIC

But with -z notext he only saw a relocation number, what is not convenient:
error: ../../gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.o:(.text+0x4F0): unrecognized reloc 40

Also, in the error messages we use relocation but not reloc.

With this patch we start to print one of the following messages:
error: file.o: unrecognized relocation Unknown(999)
error: file.o: unrecognized relocation R_X_KNOWN_BY_LLVM_BUT_UNSUPPORTED_BY_LLD_NAME

There is no way to write a test for that I believe.

Differential revision: https://reviews.llvm.org/D62237

llvm-svn: 361472
---
 lld/ELF/Arch/AArch64.cpp | 2 +-
 lld/ELF/Arch/ARM.cpp     | 2 +-
 lld/ELF/Arch/AVR.cpp     | 2 +-
 lld/ELF/Arch/Hexagon.cpp | 2 +-
 lld/ELF/Arch/MSP430.cpp  | 2 +-
 lld/ELF/Arch/PPC.cpp     | 2 +-
 lld/ELF/Arch/PPC64.cpp   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 694da898aaf7a..02630c337d996 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -350,7 +350,7 @@ void AArch64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     or32AArch64Imm(Loc, Val);
     break;
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index 321b327e5302a..fe3dc8002bc87 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -518,7 +518,7 @@ void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
                   (Val & 0x00ff));           // imm8
     break;
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 
diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp
index 9ccbd64d34bcb..5a573238d6d01 100644
--- a/lld/ELF/Arch/AVR.cpp
+++ b/lld/ELF/Arch/AVR.cpp
@@ -66,7 +66,7 @@ void AVR::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     break;
   }
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 580600ade0074..0ac48c8e01c1b 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -246,7 +246,7 @@ void Hexagon::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     or32le(Loc, applyMask(0x00c03fff, Val));
     break;
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
     break;
   }
 }
diff --git a/lld/ELF/Arch/MSP430.cpp b/lld/ELF/Arch/MSP430.cpp
index e104c8c7f2aaf..0f0b5662ec841 100644
--- a/lld/ELF/Arch/MSP430.cpp
+++ b/lld/ELF/Arch/MSP430.cpp
@@ -83,7 +83,7 @@ void MSP430::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     break;
   }
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + toString(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 
diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 67bc264b83226..02797df314b88 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -69,7 +69,7 @@ void PPC::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     write32be(Loc, read32be(Loc) | (Val & 0x3FFFFFC));
     break;
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index bc199fff45a15..8a1b4f887b899 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -860,7 +860,7 @@ void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
     write64(Loc, Val - DynamicThreadPointerOffset);
     break;
   default:
-    error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
+    error(getErrorLocation(Loc) + "unrecognized relocation " + toString(Type));
   }
 }
 

From 42548403133d9b9a22f0f3588eded31f050c1664 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Thu, 23 May 2019 09:53:30 +0000
Subject: [PATCH 0009/1176] Speed up --start-lib and --end-lib.

--{start,end}-lib give files grouped by the options the archive file
semantics. That is, each object file between them acts as if it were
in an archive file whose sole member is the file.

Therefore, files between --{start,end}-lib are linked to the final
output only if they are needed to resolve some undefined symbols.

Previously, the feature was implemented this way:

 1. We read a symbol table and insert defined symbols to the symbol
    table as lazy symbols.

 2. If an undefind symbol is resolved to a lazy symbol, that lazy
    symbol instantiate ObjFile class for that symbol, which re-insert
    all defined symbols to the symbol table.

So, if an ObjFile is instantiated, defined symbols are inserted to the
symbol table twice. Since inserting long symbol names is not cheap,
there's a room to optimize here.

This patch optimzies it. Now, LazyObjFile remembers symbol handles and
passed them over to a new ObjFile instance, so that the ObjFile
doesn't insert the same strings.

Here is a quick benchmark to link clang. "Original" is the original
lld with unmodified command line options. For "Case 1" and "Case 2", I
extracted all files from archive files and replace .a's in a command
line with .o's wrapped with --{start,end}-lib. I used the original lld
for Case 1" and use this patch for Case 2.

  Original: 5.892
    Case 1: 6.001 (+1.8%)
    Case 2: 5.701 (-3.2%)

So, interestingly, --{start,end}-lib are now faster than the regular
linking scheme with archive files. That's perhaps not too surprising,
though, because for regular archive files, we look up the symbol table
with the same string twice.

Differential Revision: https://reviews.llvm.org/D62188

llvm-svn: 361473
---
 lld/ELF/InputFiles.cpp  | 162 ++++++++++++++++++++++++++--------------
 lld/ELF/InputFiles.h    |   4 +-
 lld/ELF/SymbolTable.cpp |   1 +
 3 files changed, 108 insertions(+), 59 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 6db1217e08fc5..a82d99683ccea 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -913,62 +913,91 @@ StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
   return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
 }
 
+// Initialize this->Symbols. this->Symbols is a parallel array as
+// its corresponding ELF symbol table.
 template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
-  this->Symbols.reserve(this->getELFSyms<ELFT>().size());
-  for (const Elf_Sym &Sym : this->getELFSyms<ELFT>())
-    this->Symbols.push_back(createSymbol(&Sym));
-}
-
-template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
-  uint32_t SecIdx = getSectionIndex(*Sym);
-  if (SecIdx >= this->Sections.size())
-    fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
-
-  InputSectionBase *Sec = this->Sections[SecIdx];
-  uint8_t Binding = Sym->getBinding();
-  uint8_t StOther = Sym->st_other;
-  uint8_t Type = Sym->getType();
-  uint64_t Value = Sym->st_value;
-  uint64_t Size = Sym->st_size;
-
-  if (Binding == STB_LOCAL) {
-    if (Sym->getType() == STT_FILE)
-      SourceFile = CHECK(Sym->getName(this->StringTable), this);
-
-    if (this->StringTable.size() <= Sym->st_name)
-      fatal(toString(this) + ": invalid symbol name offset");
-
-    StringRefZ Name = this->StringTable.data() + Sym->st_name;
-    if (Sym->st_shndx == SHN_UNDEF)
-      return make<Undefined>(this, Name, Binding, StOther, Type);
-    return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
-  }
+  ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
+  this->Symbols.resize(ESyms.size());
+
+  // Our symbol table may have already been partially initialized
+  // because of LazyObjFile.
+  for (size_t I = 0, End = ESyms.size(); I != End; ++I)
+    if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
+      this->Symbols[I] =
+          Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
+
+  // Fill this->Symbols. A symbol is either local or global.
+  for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
+    const Elf_Sym &ESym = ESyms[I];
+
+    // Read symbol attributes.
+    uint32_t SecIdx = getSectionIndex(ESym);
+    if (SecIdx >= this->Sections.size())
+      fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
+
+    InputSectionBase *Sec = this->Sections[SecIdx];
+    uint8_t Binding = ESym.getBinding();
+    uint8_t StOther = ESym.st_other;
+    uint8_t Type = ESym.getType();
+    uint64_t Value = ESym.st_value;
+    uint64_t Size = ESym.st_size;
+    StringRefZ Name = this->StringTable.data() + ESym.st_name;
+
+    // Handle local symbols. Local symbols are not added to the symbol
+    // table because they are not visible from other object files. We
+    // allocate symbol instances and add their pointers to Symbols.
+    if (Binding == STB_LOCAL) {
+      if (ESym.getType() == STT_FILE)
+        SourceFile = CHECK(ESym.getName(this->StringTable), this);
+
+      if (this->StringTable.size() <= ESym.st_name)
+        fatal(toString(this) + ": invalid symbol name offset");
+
+      if (ESym.st_shndx == SHN_UNDEF)
+        this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
+      else
+        this->Symbols[I] =
+            make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
+      continue;
+    }
 
-  StringRef Name = CHECK(Sym->getName(this->StringTable), this);
+    // Handle global undefined symbols.
+    if (ESym.st_shndx == SHN_UNDEF) {
+      resolveSymbol(this->Symbols[I],
+                    Undefined{this, Name, Binding, StOther, Type});
+      continue;
+    }
 
-  if (Sym->st_shndx == SHN_UNDEF)
-    return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type});
+    // Handle global common symbols.
+    if (ESym.st_shndx == SHN_COMMON) {
+      if (Value == 0 || Value >= UINT32_MAX)
+        fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
+              "' has invalid alignment: " + Twine(Value));
+      resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
+                                                   Type, Value, Size});
+      continue;
+    }
 
-  if (Sec == &InputSection::Discarded)
-    return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type,
-                                       /*DiscardedSecIdx=*/SecIdx});
+    // If a defined symbol is in a discarded section, handle it as if it
+    // were an undefined symbol. Such symbol doesn't comply with the
+    // standard, but in practice, a .eh_frame often directly refer
+    // COMDAT member sections, and if a comdat group is discarded, some
+    // defined symbol in a .eh_frame becomes dangling symbols.
+    if (Sec == &InputSection::Discarded) {
+      resolveSymbol(this->Symbols[I],
+                    Undefined{this, Name, Binding, StOther, Type, SecIdx});
+      continue;
+    }
 
-  if (Sym->st_shndx == SHN_COMMON) {
-    if (Value == 0 || Value >= UINT32_MAX)
-      fatal(toString(this) + ": common symbol '" + Name +
-            "' has invalid alignment: " + Twine(Value));
-    return Symtab->addSymbol(
-        CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
-  }
+    // Handle global defined symbols.
+    if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
+        Binding == STB_GNU_UNIQUE) {
+      resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
+                                              Type, Value, Size, Sec});
+      continue;
+    }
 
-  switch (Binding) {
-  default:
     fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
-  case STB_GLOBAL:
-  case STB_WEAK:
-  case STB_GNU_UNIQUE:
-    return Symtab->addSymbol(
-        Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
   }
 }
 
@@ -1455,10 +1484,16 @@ InputFile *LazyObjFile::fetch() {
 
   InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
   File->GroupId = GroupId;
+
+  // Copy symbol vector so that the new InputFile doesn't have to
+  // insert the same defined symbols to the symbol table again.
+  File->Symbols = std::move(Symbols);
   return File;
 }
 
 template <class ELFT> void LazyObjFile::parse() {
+  using Elf_Sym = typename ELFT::Sym;
+
   // A lazy object file wraps either a bitcode file or an ELF file.
   if (isBitcode(this->MB)) {
     std::unique_ptr<lto::InputFile> Obj =
@@ -1476,6 +1511,7 @@ template <class ELFT> void LazyObjFile::parse() {
     return;
   }
 
+  // Find a symbol table.
   ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
   ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
 
@@ -1483,16 +1519,28 @@ template <class ELFT> void LazyObjFile::parse() {
     if (Sec.sh_type != SHT_SYMTAB)
       continue;
 
-    typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
+    // A symbol table is found.
+    ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
     uint32_t FirstGlobal = Sec.sh_info;
-    StringRef StringTable =
-        CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
-
-    for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) {
-      if (Sym.st_shndx == SHN_UNDEF)
+    StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
+    this->Symbols.resize(ESyms.size());
+
+    // Get existing symbols or insert placeholder symbols.
+    for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
+      if (ESyms[I].st_shndx != SHN_UNDEF)
+        this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
+
+    // Replace existing symbols with LazyObject symbols.
+    //
+    // resolveSymbol() may trigger this->fetch() if an existing symbol
+    // is an undefined symbol. If that happens, this LazyObjFile has
+    // served its purpose, and we can exit from the loop early.
+    for (Symbol *Sym : this->Symbols) {
+      if (!Sym)
         continue;
-      Symtab->addSymbol(
-          LazyObject{*this, CHECK(Sym.getName(StringTable), this)});
+      resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
+      if (AddedToLink)
+        return;
     }
     return;
   }
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index f3d9241b5db49..7d5e9a2346fa3 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -139,10 +139,11 @@ class InputFile {
   // Index of MIPS GOT built for this file.
   llvm::Optional<size_t> MipsGotIndex;
 
+  std::vector<Symbol *> Symbols;
+
 protected:
   InputFile(Kind K, MemoryBufferRef M);
   std::vector<InputSectionBase *> Sections;
-  std::vector<Symbol *> Symbols;
 
 private:
   const Kind FileKind;
@@ -255,7 +256,6 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   StringRef getSectionName(const Elf_Shdr &Sec);
 
   bool shouldMerge(const Elf_Shdr &Sec);
-  Symbol *createSymbol(const Elf_Sym *Sym);
 
   // Each ELF symbol contains a section index which the symbol belongs to.
   // However, because the number of bits dedicated for that is limited, a
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 44fdb96b3e0f8..c77243e010d71 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -92,6 +92,7 @@ Symbol *SymbolTable::insert(StringRef Name) {
   Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
   SymVector.push_back(Sym);
 
+  Sym->setName(Name);
   Sym->SymbolKind = Symbol::PlaceholderKind;
   Sym->VersionId = Config->DefaultSymbolVersion;
   Sym->Visibility = STV_DEFAULT;

From 7f7d2b2e62ebd89d0ab1e2e066689cdfb2a94899 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Thu, 23 May 2019 09:58:08 +0000
Subject: [PATCH 0010/1176] Move code for symbol resolution from
 SymbolTable.cpp to Symbols.cpp.

My recent commits separated symbol resolution from the symbol table,
so the functions to resolve symbols are now in a somewhat wrong file.
This patch moves it to Symbols.cpp.

The functions are now member functions of the symbol.

This is code move change. I modified function names so that they are
appropriate as member functions, though. No functionality change
intended.

Differential Revision: https://reviews.llvm.org/D62290

llvm-svn: 361474
---
 lld/ELF/Driver.cpp       |   4 +-
 lld/ELF/InputFiles.cpp   |  23 ++-
 lld/ELF/LinkerScript.cpp |   4 +-
 lld/ELF/SymbolTable.cpp  | 322 +--------------------------------------
 lld/ELF/SymbolTable.h    |   5 -
 lld/ELF/Symbols.cpp      | 316 +++++++++++++++++++++++++++++++++++++-
 lld/ELF/Symbols.h        |  41 ++++-
 7 files changed, 364 insertions(+), 351 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 6bd00e46b9143..9a72876631665 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1314,7 +1314,7 @@ static void handleUndefined(StringRef Name) {
   Sym->IsUsedInRegularObj = true;
 
   if (Sym->isLazy())
-    Symtab->fetchLazy(Sym);
+    Sym->fetch();
 }
 
 static void handleLibcall(StringRef Name) {
@@ -1329,7 +1329,7 @@ static void handleLibcall(StringRef Name) {
     MB = cast<LazyArchive>(Sym)->getMemberBuffer();
 
   if (isBitcode(MB))
-    Symtab->fetchLazy(Sym);
+    Sym->fetch();
 }
 
 // Replaces common symbols with defined symbols reside in .bss sections.
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a82d99683ccea..d3a9e14377eff 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -963,8 +963,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
 
     // Handle global undefined symbols.
     if (ESym.st_shndx == SHN_UNDEF) {
-      resolveSymbol(this->Symbols[I],
-                    Undefined{this, Name, Binding, StOther, Type});
+      this->Symbols[I]->resolve(Undefined{this, Name, Binding, StOther, Type});
       continue;
     }
 
@@ -973,8 +972,8 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
       if (Value == 0 || Value >= UINT32_MAX)
         fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
               "' has invalid alignment: " + Twine(Value));
-      resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
-                                                   Type, Value, Size});
+      this->Symbols[I]->resolve(
+          CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
       continue;
     }
 
@@ -984,16 +983,16 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
     // COMDAT member sections, and if a comdat group is discarded, some
     // defined symbol in a .eh_frame becomes dangling symbols.
     if (Sec == &InputSection::Discarded) {
-      resolveSymbol(this->Symbols[I],
-                    Undefined{this, Name, Binding, StOther, Type, SecIdx});
+      this->Symbols[I]->resolve(
+          Undefined{this, Name, Binding, StOther, Type, SecIdx});
       continue;
     }
 
     // Handle global defined symbols.
     if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
         Binding == STB_GNU_UNIQUE) {
-      resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
-                                              Type, Value, Size, Sec});
+      this->Symbols[I]->resolve(
+          Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
       continue;
     }
 
@@ -1532,13 +1531,13 @@ template <class ELFT> void LazyObjFile::parse() {
 
     // Replace existing symbols with LazyObject symbols.
     //
-    // resolveSymbol() may trigger this->fetch() if an existing symbol
-    // is an undefined symbol. If that happens, this LazyObjFile has
-    // served its purpose, and we can exit from the loop early.
+    // resolve() may trigger this->fetch() if an existing symbol is an
+    // undefined symbol. If that happens, this LazyObjFile has served
+    // its purpose, and we can exit from the loop early.
     for (Symbol *Sym : this->Symbols) {
       if (!Sym)
         continue;
-      resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
+      Sym->resolve(LazyObject{*this, Sym->getName()});
       if (AddedToLink)
         return;
     }
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 8519f01181fd2..99f0853c911a2 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -185,7 +185,7 @@ void LinkerScript::addSymbol(SymbolAssignment *Cmd) {
               0, Sec);
 
   Symbol *Sym = Symtab->insert(Cmd->Name);
-  mergeSymbolProperties(Sym, New);
+  Sym->mergeProperties(New);
   Sym->replace(New);
   Cmd->Sym = cast<Defined>(Sym);
 }
@@ -202,7 +202,7 @@ static void declareSymbol(SymbolAssignment *Cmd) {
 
   // We can't calculate final value right now.
   Symbol *Sym = Symtab->insert(Cmd->Name);
-  mergeSymbolProperties(Sym, New);
+  Sym->mergeProperties(New);
   Sym->replace(New);
 
   Cmd->Sym = cast<Defined>(Sym);
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index c77243e010d71..9fc05d98366aa 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -55,14 +55,6 @@ void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) {
   Real->setName(S);
 }
 
-static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
-  if (VA == STV_DEFAULT)
-    return VB;
-  if (VB == STV_DEFAULT)
-    return VA;
-  return std::min(VA, VB);
-}
-
 // Find an existing symbol or create a new one.
 Symbol *SymbolTable::insert(StringRef Name) {
   // <name>@@<version> means the symbol is the default version. In that
@@ -105,232 +97,9 @@ Symbol *SymbolTable::insert(StringRef Name) {
 }
 
 Symbol *SymbolTable::addSymbol(const Symbol &New) {
-  Symbol *Old = Symtab->insert(New.getName());
-  resolveSymbol(Old, New);
-  return Old;
-}
-
-static void addUndefined(Symbol *Old, const Undefined &New) {
-  // An undefined symbol with non default visibility must be satisfied
-  // in the same DSO.
-  //
-  // If this is a non-weak defined symbol in a discarded section, override the
-  // existing undefined symbol for better error message later.
-  if ((Old->isShared() && New.Visibility != STV_DEFAULT) ||
-      (Old->isUndefined() && New.Binding != STB_WEAK && New.DiscardedSecIdx)) {
-    Old->replace(New);
-    return;
-  }
-
-  if (Old->isShared() || Old->isLazy() ||
-      (Old->isUndefined() && New.Binding != STB_WEAK))
-    Old->Binding = New.Binding;
-
-  if (Old->isLazy()) {
-    // An undefined weak will not fetch archive members. See comment on Lazy in
-    // Symbols.h for the details.
-    if (New.Binding == STB_WEAK) {
-      Old->Type = New.Type;
-      return;
-    }
-
-    // Do extra check for --warn-backrefs.
-    //
-    // --warn-backrefs is an option to prevent an undefined reference from
-    // fetching an archive member written earlier in the command line. It can be
-    // used to keep compatibility with GNU linkers to some degree.
-    // I'll explain the feature and why you may find it useful in this comment.
-    //
-    // lld's symbol resolution semantics is more relaxed than traditional Unix
-    // linkers. For example,
-    //
-    //   ld.lld foo.a bar.o
-    //
-    // succeeds even if bar.o contains an undefined symbol that has to be
-    // resolved by some object file in foo.a. Traditional Unix linkers don't
-    // allow this kind of backward reference, as they visit each file only once
-    // from left to right in the command line while resolving all undefined
-    // symbols at the moment of visiting.
-    //
-    // In the above case, since there's no undefined symbol when a linker visits
-    // foo.a, no files are pulled out from foo.a, and because the linker forgets
-    // about foo.a after visiting, it can't resolve undefined symbols in bar.o
-    // that could have been resolved otherwise.
-    //
-    // That lld accepts more relaxed form means that (besides it'd make more
-    // sense) you can accidentally write a command line or a build file that
-    // works only with lld, even if you have a plan to distribute it to wider
-    // users who may be using GNU linkers. With --warn-backrefs, you can detect
-    // a library order that doesn't work with other Unix linkers.
-    //
-    // The option is also useful to detect cyclic dependencies between static
-    // archives. Again, lld accepts
-    //
-    //   ld.lld foo.a bar.a
-    //
-    // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
-    // handled as an error.
-    //
-    // Here is how the option works. We assign a group ID to each file. A file
-    // with a smaller group ID can pull out object files from an archive file
-    // with an equal or greater group ID. Otherwise, it is a reverse dependency
-    // and an error.
-    //
-    // A file outside --{start,end}-group gets a fresh ID when instantiated. All
-    // files within the same --{start,end}-group get the same group ID. E.g.
-    //
-    //   ld.lld A B --start-group C D --end-group E
-    //
-    // A forms group 0. B form group 1. C and D (including their member object
-    // files) form group 2. E forms group 3. I think that you can see how this
-    // group assignment rule simulates the traditional linker's semantics.
-    bool Backref = Config->WarnBackrefs && New.File &&
-                   Old->File->GroupId < New.File->GroupId;
-    Symtab->fetchLazy(Old);
-
-    // We don't report backward references to weak symbols as they can be
-    // overridden later.
-    if (Backref && !Old->isWeak())
-      warn("backward reference detected: " + New.getName() + " in " +
-           toString(New.File) + " refers to " + toString(Old->File));
-  }
-}
-
-// Using .symver foo,foo@@VER unfortunately creates two symbols: foo and
-// foo@@VER. We want to effectively ignore foo, so give precedence to
-// foo@@VER.
-// FIXME: If users can transition to using
-// .symver foo,foo@@@VER
-// we can delete this hack.
-static int compareVersion(StringRef OldName, StringRef NewName) {
-  bool A = OldName.contains("@@");
-  bool B = NewName.contains("@@");
-  if (!A && B)
-    return 1;
-  if (A && !B)
-    return -1;
-  return 0;
-}
-
-// Compare two symbols. Return 1 if the new symbol should win, -1 if
-// the new symbol should lose, or 0 if there is a conflict.
-static int compare(const Symbol *Old, const Symbol *New) {
-  assert(New->isDefined() || New->isCommon());
-
-  if (!Old->isDefined() && !Old->isCommon())
-    return 1;
-
-  if (int Cmp = compareVersion(Old->getName(), New->getName()))
-    return Cmp;
-
-  if (New->isWeak())
-    return -1;
-
-  if (Old->isWeak())
-    return 1;
-
-  if (Old->isCommon() && New->isCommon()) {
-    if (Config->WarnCommon)
-      warn("multiple common of " + Old->getName());
-    return 0;
-  }
-
-  if (Old->isCommon()) {
-    if (Config->WarnCommon)
-      warn("common " + Old->getName() + " is overridden");
-    return 1;
-  }
-
-  if (New->isCommon()) {
-    if (Config->WarnCommon)
-      warn("common " + Old->getName() + " is overridden");
-    return -1;
-  }
-
-  auto *OldSym = cast<Defined>(Old);
-  auto *NewSym = cast<Defined>(New);
-
-  if (New->File && isa<BitcodeFile>(New->File))
-    return 0;
-
-  if (!OldSym->Section && !NewSym->Section && OldSym->Value == NewSym->Value &&
-      NewSym->Binding == STB_GLOBAL)
-    return -1;
-
-  return 0;
-}
-
-static void addCommon(Symbol *Old, const CommonSymbol &New) {
-  int Cmp = compare(Old, &New);
-  if (Cmp < 0)
-    return;
-
-  if (Cmp > 0) {
-    Old->replace(New);
-    return;
-  }
-
-  CommonSymbol *OldSym = cast<CommonSymbol>(Old);
-
-  OldSym->Alignment = std::max(OldSym->Alignment, New.Alignment);
-  if (OldSym->Size < New.Size) {
-    OldSym->File = New.File;
-    OldSym->Size = New.Size;
-  }
-}
-
-static void reportDuplicate(Symbol *Sym, InputFile *NewFile,
-                            InputSectionBase *ErrSec, uint64_t ErrOffset) {
-  if (Config->AllowMultipleDefinition)
-    return;
-
-  Defined *D = cast<Defined>(Sym);
-  if (!D->Section || !ErrSec) {
-    error("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " +
-          toString(Sym->File) + "\n>>> defined in " + toString(NewFile));
-    return;
-  }
-
-  // Construct and print an error message in the form of:
-  //
-  //   ld.lld: error: duplicate symbol: foo
-  //   >>> defined at bar.c:30
-  //   >>>            bar.o (/home/alice/src/bar.o)
-  //   >>> defined at baz.c:563
-  //   >>>            baz.o in archive libbaz.a
-  auto *Sec1 = cast<InputSectionBase>(D->Section);
-  std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value);
-  std::string Obj1 = Sec1->getObjMsg(D->Value);
-  std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset);
-  std::string Obj2 = ErrSec->getObjMsg(ErrOffset);
-
-  std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";
-  if (!Src1.empty())
-    Msg += Src1 + "\n>>>            ";
-  Msg += Obj1 + "\n>>> defined at ";
-  if (!Src2.empty())
-    Msg += Src2 + "\n>>>            ";
-  Msg += Obj2;
-  error(Msg);
-}
-
-static void addDefined(Symbol *Old, const Defined &New) {
-  int Cmp = compare(Old, &New);
-  if (Cmp > 0)
-    Old->replace(New);
-  else if (Cmp == 0)
-    reportDuplicate(Old, New.File,
-                    dyn_cast_or_null<InputSectionBase>(New.Section), New.Value);
-}
-
-static void addShared(Symbol *Old, const SharedSymbol &New) {
-  if (Old->Visibility == STV_DEFAULT && (Old->isUndefined() || Old->isLazy())) {
-    // An undefined symbol with non default visibility must be satisfied
-    // in the same DSO.
-    uint8_t Binding = Old->Binding;
-    Old->replace(New);
-    Old->Binding = Binding;
-  }
+  Symbol *Sym = Symtab->insert(New.getName());
+  Sym->resolve(New);
+  return Sym;
 }
 
 Symbol *SymbolTable::find(StringRef Name) {
@@ -342,44 +111,6 @@ Symbol *SymbolTable::find(StringRef Name) {
   return SymVector[It->second];
 }
 
-template <class LazyT> static void addLazy(Symbol *Old, const LazyT &New) {
-  if (!Old->isUndefined())
-    return;
-
-  // An undefined weak will not fetch archive members. See comment on Lazy in
-  // Symbols.h for the details.
-  if (Old->isWeak()) {
-    uint8_t Type = Old->Type;
-    Old->replace(New);
-    Old->Type = Type;
-    Old->Binding = STB_WEAK;
-    return;
-  }
-
-  if (InputFile *F = New.fetch())
-    parseFile(F);
-}
-
-static void addLazyArchive(Symbol *Old, const LazyArchive &New) {
-  addLazy(Old, New);
-}
-
-static void addLazyObject(Symbol *Old, const LazyObject &New) {
-  addLazy(Old, New);
-}
-
-void SymbolTable::fetchLazy(Symbol *Sym) {
-  if (auto *S = dyn_cast<LazyArchive>(Sym)) {
-    if (InputFile *File = S->fetch())
-      parseFile(File);
-    return;
-  }
-
-  auto *S = cast<LazyObject>(Sym);
-  if (InputFile *File = cast<LazyObjFile>(S->File)->fetch())
-    parseFile(File);
-}
-
 // Initialize DemangledSyms with a map from demangled symbols to symbol
 // objects. Used to handle "extern C++" directive in version scripts.
 //
@@ -540,50 +271,3 @@ void SymbolTable::scanVersionScript() {
   for (Symbol *Sym : SymVector)
     Sym->parseSymbolVersion();
 }
-
-// Merge symbol properties.
-//
-// When we have many symbols of the same name, we choose one of them,
-// and that's the result of symbol resolution. However, symbols that
-// were not chosen still affect some symbol properties.
-void elf::mergeSymbolProperties(Symbol *Old, const Symbol &New) {
-  // Merge symbol properties.
-  Old->ExportDynamic = Old->ExportDynamic || New.ExportDynamic;
-  Old->IsUsedInRegularObj = Old->IsUsedInRegularObj || New.IsUsedInRegularObj;
-
-  // DSO symbols do not affect visibility in the output.
-  if (!New.isShared())
-    Old->Visibility = getMinVisibility(Old->Visibility, New.Visibility);
-}
-
-void elf::resolveSymbol(Symbol *Old, const Symbol &New) {
-  mergeSymbolProperties(Old, New);
-
-  if (Old->isPlaceholder()) {
-    Old->replace(New);
-    return;
-  }
-
-  switch (New.kind()) {
-  case Symbol::UndefinedKind:
-    addUndefined(Old, cast<Undefined>(New));
-    break;
-  case Symbol::CommonKind:
-    addCommon(Old, cast<CommonSymbol>(New));
-    break;
-  case Symbol::DefinedKind:
-    addDefined(Old, cast<Defined>(New));
-    break;
-  case Symbol::LazyArchiveKind:
-    addLazyArchive(Old, cast<LazyArchive>(New));
-    break;
-  case Symbol::LazyObjectKind:
-    addLazyObject(Old, cast<LazyObject>(New));
-    break;
-  case Symbol::SharedKind:
-    addShared(Old, cast<SharedSymbol>(New));
-    break;
-  case Symbol::PlaceholderKind:
-    llvm_unreachable("bad symbol kind");
-  }
-}
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 25b73fa5481fe..8c9a8f8f2e760 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -47,8 +47,6 @@ class SymbolTable {
 
   Symbol *addSymbol(const Symbol &New);
 
-  void fetchLazy(Symbol *Sym);
-
   void scanVersionScript();
 
   Symbol *find(StringRef Name);
@@ -94,9 +92,6 @@ class SymbolTable {
 
 extern SymbolTable *Symtab;
 
-void mergeSymbolProperties(Symbol *Old, const Symbol &New);
-void resolveSymbol(Symbol *Old, const Symbol &New);
-
 } // namespace elf
 } // namespace lld
 
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index e1bbb8acedc68..94f0d6ea6cb9d 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -241,8 +241,20 @@ void Symbol::parseSymbolVersion() {
           Verstr);
 }
 
-InputFile *LazyArchive::fetch() const {
-  return cast<ArchiveFile>(File)->fetch(Sym);
+void Symbol::fetch() const {
+  if (auto *Sym = dyn_cast<LazyArchive>(this)) {
+    if (auto *F = cast<ArchiveFile>(Sym->File)->fetch(Sym->Sym))
+      parseFile(F);
+    return;
+  }
+
+  if (auto *Sym = dyn_cast<LazyObject>(this)) {
+    if (auto *F = dyn_cast<LazyObjFile>(Sym->File)->fetch())
+      parseFile(F);
+    return;
+  }
+
+  llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol");
 }
 
 MemoryBufferRef LazyArchive::getMemberBuffer() {
@@ -254,10 +266,6 @@ MemoryBufferRef LazyArchive::getMemberBuffer() {
                    Sym.getName());
 }
 
-InputFile *LazyObject::fetch() const {
-  return cast<LazyObjFile>(File)->fetch();
-}
-
 uint8_t Symbol::computeBinding() const {
   if (Config->Relocatable)
     return Binding;
@@ -338,3 +346,299 @@ std::string lld::toString(const Symbol &B) {
       return *S;
   return B.getName();
 }
+
+static uint8_t getMinVisibility(uint8_t VA, uint8_t VB) {
+  if (VA == STV_DEFAULT)
+    return VB;
+  if (VB == STV_DEFAULT)
+    return VA;
+  return std::min(VA, VB);
+}
+
+// Merge symbol properties.
+//
+// When we have many symbols of the same name, we choose one of them,
+// and that's the result of symbol resolution. However, symbols that
+// were not chosen still affect some symbol properties.
+void Symbol::mergeProperties(const Symbol &Other) {
+  if (Other.ExportDynamic)
+    ExportDynamic = true;
+  if (Other.IsUsedInRegularObj)
+    IsUsedInRegularObj = true;
+
+  // DSO symbols do not affect visibility in the output.
+  if (!Other.isShared())
+    Visibility = getMinVisibility(Visibility, Other.Visibility);
+}
+
+void Symbol::resolve(const Symbol &Other) {
+  mergeProperties(Other);
+
+  if (isPlaceholder()) {
+    replace(Other);
+    return;
+  }
+
+  switch (Other.kind()) {
+  case Symbol::UndefinedKind:
+    resolveUndefined(cast<Undefined>(Other));
+    break;
+  case Symbol::CommonKind:
+    resolveCommon(cast<CommonSymbol>(Other));
+    break;
+  case Symbol::DefinedKind:
+    resolveDefined(cast<Defined>(Other));
+    break;
+  case Symbol::LazyArchiveKind:
+    resolveLazy(cast<LazyArchive>(Other));
+    break;
+  case Symbol::LazyObjectKind:
+    resolveLazy(cast<LazyObject>(Other));
+    break;
+  case Symbol::SharedKind:
+    resolveShared(cast<SharedSymbol>(Other));
+    break;
+  case Symbol::PlaceholderKind:
+    llvm_unreachable("bad symbol kind");
+  }
+}
+
+void Symbol::resolveUndefined(const Undefined &Other) {
+  // An undefined symbol with non default visibility must be satisfied
+  // in the same DSO.
+  //
+  // If this is a non-weak defined symbol in a discarded section, override the
+  // existing undefined symbol for better error message later.
+  if ((isShared() && Other.Visibility != STV_DEFAULT) ||
+      (isUndefined() && Other.Binding != STB_WEAK && Other.DiscardedSecIdx)) {
+    replace(Other);
+    return;
+  }
+
+  if (isShared() || isLazy() || (isUndefined() && Other.Binding != STB_WEAK))
+    Binding = Other.Binding;
+
+  if (isLazy()) {
+    // An undefined weak will not fetch archive members. See comment on Lazy in
+    // Symbols.h for the details.
+    if (Other.Binding == STB_WEAK) {
+      Type = Other.Type;
+      return;
+    }
+
+    // Do extra check for --warn-backrefs.
+    //
+    // --warn-backrefs is an option to prevent an undefined reference from
+    // fetching an archive member written earlier in the command line. It can be
+    // used to keep compatibility with GNU linkers to some degree.
+    // I'll explain the feature and why you may find it useful in this comment.
+    //
+    // lld's symbol resolution semantics is more relaxed than traditional Unix
+    // linkers. For example,
+    //
+    //   ld.lld foo.a bar.o
+    //
+    // succeeds even if bar.o contains an undefined symbol that has to be
+    // resolved by some object file in foo.a. Traditional Unix linkers don't
+    // allow this kind of backward reference, as they visit each file only once
+    // from left to right in the command line while resolving all undefined
+    // symbols at the moment of visiting.
+    //
+    // In the above case, since there's no undefined symbol when a linker visits
+    // foo.a, no files are pulled out from foo.a, and because the linker forgets
+    // about foo.a after visiting, it can't resolve undefined symbols in bar.o
+    // that could have been resolved otherwise.
+    //
+    // That lld accepts more relaxed form means that (besides it'd make more
+    // sense) you can accidentally write a command line or a build file that
+    // works only with lld, even if you have a plan to distribute it to wider
+    // users who may be using GNU linkers. With --warn-backrefs, you can detect
+    // a library order that doesn't work with other Unix linkers.
+    //
+    // The option is also useful to detect cyclic dependencies between static
+    // archives. Again, lld accepts
+    //
+    //   ld.lld foo.a bar.a
+    //
+    // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is
+    // handled as an error.
+    //
+    // Here is how the option works. We assign a group ID to each file. A file
+    // with a smaller group ID can pull out object files from an archive file
+    // with an equal or greater group ID. Otherwise, it is a reverse dependency
+    // and an error.
+    //
+    // A file outside --{start,end}-group gets a fresh ID when instantiated. All
+    // files within the same --{start,end}-group get the same group ID. E.g.
+    //
+    //   ld.lld A B --start-group C D --end-group E
+    //
+    // A forms group 0. B form group 1. C and D (including their member object
+    // files) form group 2. E forms group 3. I think that you can see how this
+    // group assignment rule simulates the traditional linker's semantics.
+    bool Backref = Config->WarnBackrefs && Other.File &&
+                   File->GroupId < Other.File->GroupId;
+    fetch();
+
+    // We don't report backward references to weak symbols as they can be
+    // overridden later.
+    if (Backref && !isWeak())
+      warn("backward reference detected: " + Other.getName() + " in " +
+           toString(Other.File) + " refers to " + toString(File));
+  }
+}
+
+// Using .symver foo,foo@@VER unfortunately creates two symbols: foo and
+// foo@@VER. We want to effectively ignore foo, so give precedence to
+// foo@@VER.
+// FIXME: If users can transition to using
+// .symver foo,foo@@@VER
+// we can delete this hack.
+static int compareVersion(StringRef A, StringRef B) {
+  bool X = A.contains("@@");
+  bool Y = B.contains("@@");
+  if (!X && Y)
+    return 1;
+  if (X && !Y)
+    return -1;
+  return 0;
+}
+
+// Compare two symbols. Return 1 if the new symbol should win, -1 if
+// the new symbol should lose, or 0 if there is a conflict.
+int Symbol::compare(const Symbol *Other) const {
+  assert(Other->isDefined() || Other->isCommon());
+
+  if (!isDefined() && !isCommon())
+    return 1;
+
+  if (int Cmp = compareVersion(getName(), Other->getName()))
+    return Cmp;
+
+  if (Other->isWeak())
+    return -1;
+
+  if (isWeak())
+    return 1;
+
+  if (isCommon() && Other->isCommon()) {
+    if (Config->WarnCommon)
+      warn("multiple common of " + getName());
+    return 0;
+  }
+
+  if (isCommon()) {
+    if (Config->WarnCommon)
+      warn("common " + getName() + " is overridden");
+    return 1;
+  }
+
+  if (Other->isCommon()) {
+    if (Config->WarnCommon)
+      warn("common " + getName() + " is overridden");
+    return -1;
+  }
+
+  auto *OldSym = cast<Defined>(this);
+  auto *NewSym = cast<Defined>(Other);
+
+  if (Other->File && isa<BitcodeFile>(Other->File))
+    return 0;
+
+  if (!OldSym->Section && !NewSym->Section && OldSym->Value == NewSym->Value &&
+      NewSym->Binding == STB_GLOBAL)
+    return -1;
+
+  return 0;
+}
+
+static void reportDuplicate(Symbol *Sym, InputFile *NewFile,
+                            InputSectionBase *ErrSec, uint64_t ErrOffset) {
+  if (Config->AllowMultipleDefinition)
+    return;
+
+  Defined *D = cast<Defined>(Sym);
+  if (!D->Section || !ErrSec) {
+    error("duplicate symbol: " + toString(*Sym) + "\n>>> defined in " +
+          toString(Sym->File) + "\n>>> defined in " + toString(NewFile));
+    return;
+  }
+
+  // Construct and print an error message in the form of:
+  //
+  //   ld.lld: error: duplicate symbol: foo
+  //   >>> defined at bar.c:30
+  //   >>>            bar.o (/home/alice/src/bar.o)
+  //   >>> defined at baz.c:563
+  //   >>>            baz.o in archive libbaz.a
+  auto *Sec1 = cast<InputSectionBase>(D->Section);
+  std::string Src1 = Sec1->getSrcMsg(*Sym, D->Value);
+  std::string Obj1 = Sec1->getObjMsg(D->Value);
+  std::string Src2 = ErrSec->getSrcMsg(*Sym, ErrOffset);
+  std::string Obj2 = ErrSec->getObjMsg(ErrOffset);
+
+  std::string Msg = "duplicate symbol: " + toString(*Sym) + "\n>>> defined at ";
+  if (!Src1.empty())
+    Msg += Src1 + "\n>>>            ";
+  Msg += Obj1 + "\n>>> defined at ";
+  if (!Src2.empty())
+    Msg += Src2 + "\n>>>            ";
+  Msg += Obj2;
+  error(Msg);
+}
+
+void Symbol::resolveCommon(const CommonSymbol &Other) {
+  int Cmp = compare(&Other);
+  if (Cmp < 0)
+    return;
+
+  if (Cmp > 0) {
+    replace(Other);
+    return;
+  }
+
+  CommonSymbol *OldSym = cast<CommonSymbol>(this);
+
+  OldSym->Alignment = std::max(OldSym->Alignment, Other.Alignment);
+  if (OldSym->Size < Other.Size) {
+    OldSym->File = Other.File;
+    OldSym->Size = Other.Size;
+  }
+}
+
+void Symbol::resolveDefined(const Defined &Other) {
+  int Cmp = compare(&Other);
+  if (Cmp > 0)
+    replace(Other);
+  else if (Cmp == 0)
+    reportDuplicate(this, Other.File,
+                    dyn_cast_or_null<InputSectionBase>(Other.Section),
+                    Other.Value);
+}
+
+template <class LazyT> void Symbol::resolveLazy(const LazyT &Other) {
+  if (!isUndefined())
+    return;
+
+  // An undefined weak will not fetch archive members. See comment on Lazy in
+  // Symbols.h for the details.
+  if (isWeak()) {
+    uint8_t Ty = Type;
+    replace(Other);
+    Type = Ty;
+    Binding = STB_WEAK;
+    return;
+  }
+
+  Other.fetch();
+}
+
+void Symbol::resolveShared(const SharedSymbol &Other) {
+  if (Visibility == STV_DEFAULT && (isUndefined() || isLazy())) {
+    // An undefined symbol with non default visibility must be satisfied
+    // in the same DSO.
+    uint8_t Bind = Binding;
+    replace(Other);
+    Binding = Bind;
+  }
+}
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index a2b904f057d5c..04c23b588f02f 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -22,8 +22,14 @@
 
 namespace lld {
 namespace elf {
-class Symbol;
+class CommonSymbol;
+class Defined;
 class InputFile;
+class LazyArchive;
+class LazyObject;
+class SharedSymbol;
+class Symbol;
+class Undefined;
 } // namespace elf
 
 std::string toString(const elf::Symbol &);
@@ -174,6 +180,27 @@ class Symbol {
   uint64_t getSize() const;
   OutputSection *getOutputSection() const;
 
+  // The following two functions are used for symbol resolution.
+  //
+  // You are expected to call mergeProperties for all symbols in input
+  // files so that attributes that are attached to names rather than
+  // indivisual symbol (such as visibility) are merged together.
+  //
+  // Every time you read a new symbol from an input, you are supposed
+  // to call resolve() with the new symbol. That function replaces
+  // "this" object as a result of name resolution if the new symbol is
+  // more appropriate to be included in the output.
+  //
+  // For example, if "this" is an undefined symbol and a new symbol is
+  // a defined symbol, "this" is replaced with the new symbol.
+  void mergeProperties(const Symbol &Other);
+  void resolve(const Symbol &Other);
+
+  // If this is a lazy symbol, fetch an input file and add the symbol
+  // in the file to the symbol table. Calling this function on
+  // non-lazy object causes a runtime error.
+  void fetch() const;
+
 private:
   static bool isExportDynamic(Kind K, uint8_t Visibility) {
     if (K == SharedKind)
@@ -181,6 +208,14 @@ class Symbol {
     return Config->Shared || Config->ExportDynamic;
   }
 
+  void resolveUndefined(const Undefined &Other);
+  void resolveCommon(const CommonSymbol &Other);
+  void resolveDefined(const Defined &Other);
+  template <class LazyT> void resolveLazy(const LazyT &Other);
+  void resolveShared(const SharedSymbol &Other);
+
+  int compare(const Symbol *Other) const;
+
   inline size_t getSymbolSize() const;
 
 protected:
@@ -351,10 +386,8 @@ class LazyArchive : public Symbol {
 
   static bool classof(const Symbol *S) { return S->kind() == LazyArchiveKind; }
 
-  InputFile *fetch() const;
   MemoryBufferRef getMemberBuffer();
 
-private:
   const llvm::object::Archive::Symbol Sym;
 };
 
@@ -367,8 +400,6 @@ class LazyObject : public Symbol {
                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
 
   static bool classof(const Symbol *S) { return S->kind() == LazyObjectKind; }
-
-  InputFile *fetch() const;
 };
 
 // Some linker-generated symbols need to be created as

From b970fd718851c655c20a3b978b61d3ed63e7c00d Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 23 May 2019 09:58:29 +0000
Subject: [PATCH 0011/1176] [clangd-vscode] Do not customize uri converters in
 vscode

Summary:
Clangd is already resolving symlinks on the server side, therefore
there is no more need to handle it in client side. This was also resulting in
breakages whenever index contained a symbol coming from a non-existent file(like
a generated file), e.g. during workspace symbols whole response was dropped
since stat had failed.

Reviewers: ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62288

llvm-svn: 361475
---
 .../clangd/clients/clangd-vscode/src/extension.ts    | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts
index 2cb97d97ab70a..7b80967151883 100644
--- a/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts
+++ b/clang-tools-extra/clangd/clients/clangd-vscode/src/extension.ts
@@ -1,6 +1,5 @@
 import * as vscode from 'vscode';
 import * as vscodelc from 'vscode-languageclient';
-import { realpathSync } from 'fs';
 
 /**
  * Method to get workspace configuration option
@@ -87,17 +86,6 @@ export function activate(context: vscode.ExtensionContext) {
             fileEvents: vscode.workspace.createFileSystemWatcher(filePattern)
         },
         initializationOptions: { clangdFileStatus: true },
-        // Resolve symlinks for all files provided by clangd.
-        // This is a workaround for a bazel + clangd issue - bazel produces a symlink tree to build in,
-        // and when navigating to the included file, clangd passes its path inside the symlink tree
-        // rather than its filesystem path.
-        // FIXME: remove this once clangd knows enough about bazel to resolve the
-        // symlinks where needed (or if this causes problems for other workflows).
-        uriConverters: {
-            code2Protocol: (value: vscode.Uri) => value.toString(),
-            protocol2Code: (value: string) =>
-                vscode.Uri.file(realpathSync(vscode.Uri.parse(value).fsPath))
-        },
         // Do not switch to output window when clangd returns output
         revealOutputChannelOn: vscodelc.RevealOutputChannelOn.Never
     };

From 4bce63a0e7c5bfd0c4ee1642d0fbae89702ad490 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Thu, 23 May 2019 10:06:03 +0000
Subject: [PATCH 0012/1176] Reland: [WebAssembly] Add __start_/_stop_ symbols
 for data sections

This is a reland of rL361235.

Fixes https://bugs.llvm.org/show_bug.cgi?id=41565

Differential Revision: https://reviews.llvm.org/D61876

llvm-svn: 361476
---
 lld/test/wasm/startstop.ll | 57 ++++++++++++++++++++++++++++++++++++++
 lld/wasm/Driver.cpp        |  4 ---
 lld/wasm/SymbolTable.cpp   | 11 ++++++++
 lld/wasm/SymbolTable.h     |  2 ++
 lld/wasm/Writer.cpp        | 45 +++++++++++++++++++++++++++---
 5 files changed, 111 insertions(+), 8 deletions(-)
 create mode 100644 lld/test/wasm/startstop.ll

diff --git a/lld/test/wasm/startstop.ll b/lld/test/wasm/startstop.ll
new file mode 100644
index 0000000000000..f83d1ac537856
--- /dev/null
+++ b/lld/test/wasm/startstop.ll
@@ -0,0 +1,57 @@
+; RUN: llc -filetype=obj -o %t.o %s
+; RUN: wasm-ld --no-gc-sections %t.o -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+@foo = global i32 3, section "mysection", align 4
+@bar = global i32 4, section "mysection", align 4
+
+@__start_mysection = external global i8*
+@__stop_mysection = external global i8*
+
+define i8** @get_start() {
+  ret i8** @__start_mysection
+}
+
+define i8** @get_end() {
+  ret i8** @__stop_mysection
+}
+
+define void @_start()  {
+entry:
+  ret void
+}
+; CHECK:        - Type:            CODE
+; CHECK-NEXT:     Functions:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Locals:          []
+; CHECK-NEXT:         Body:            0B
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Locals:          []
+; CHECK-NEXT:         Body:            4180888080000B
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Locals:          []
+; CHECK-NEXT:         Body:            4188888080000B
+; CHECK-NEXT:       - Index:           3
+; CHECK-NEXT:         Locals:          []
+; CHECK-NEXT:         Body:            0B
+; CHECK-NEXT:   - Type:            DATA
+; CHECK-NEXT:     Segments:
+; CHECK-NEXT:       - SectionOffset:   7
+; CHECK-NEXT:         InitFlags:       0
+; CHECK-NEXT:         Offset:
+; CHECK-NEXT:           Opcode:          I32_CONST
+; CHECK-NEXT:           Value:           1024
+; CHECK-NEXT:         Content:         '0300000004000000'
+; CHECK-NEXT:   - Type:            CUSTOM
+; CHECK-NEXT:     Name:            name
+; CHECK-NEXT:     FunctionNames:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Name:            __wasm_call_ctors
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Name:            get_start
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Name:            get_end
+; CHECK-NEXT:       - Index:           3
+; CHECK-NEXT:         Name:            _start
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 9ff824a2eec4f..0bea11faf7dc7 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -652,10 +652,6 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
     // Add synthetic dummies for weak undefined functions.  Must happen
     // after LTO otherwise functions may not yet have signatures.
     Symtab->handleWeakUndefines();
-
-    // Make sure we have resolved all symbols.
-    if (!Config->AllowUndefined)
-      Symtab->reportRemainingUndefines();
   }
 
   if (EntrySym)
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 1a16b6390ba4e..ce1aa5132ba90 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -198,6 +198,17 @@ DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name,
                                         Flags, nullptr, Function);
 }
 
+DefinedData *SymbolTable::addOptionalDataSymbol(StringRef Name, uint32_t Value,
+                                                uint32_t Flags) {
+  Symbol *S = find(Name);
+  if (!S || S->isDefined())
+    return nullptr;
+  LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << Name << "\n");
+  auto *rtn = replaceSymbol<DefinedData>(S, Name, Flags);
+  rtn->setVirtualAddress(Value);
+  return rtn;
+}
+
 DefinedData *SymbolTable::addSyntheticDataSymbol(StringRef Name,
                                                  uint32_t Flags) {
   LLVM_DEBUG(dbgs() << "addSyntheticDataSymbol: " << Name << "\n");
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index ee4ee244ef538..a35140df136e1 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -81,6 +81,8 @@ class SymbolTable {
                                     InputGlobal *Global);
   DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags,
                                         InputFunction *Function);
+  DefinedData *addOptionalDataSymbol(StringRef Name, uint32_t Value,
+                                     uint32_t Flags);
 
   void handleSymbolVariants();
   void handleWeakUndefines();
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 01dbd82dc3549..f43191c70273d 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -73,6 +73,8 @@ class Writer {
   void addSection(OutputSection *Sec);
 
   void addSections();
+  void addStartStopSymbols(const InputSegment *Seg);
+
   void createCustomSections();
   void createSyntheticSections();
   void finalizeSections();
@@ -293,6 +295,22 @@ void Writer::addSection(OutputSection *Sec) {
   OutputSections.push_back(Sec);
 }
 
+// If a section name is valid as a C identifier (which is rare because of
+// the leading '.'), linkers are expected to define __start_<secname> and
+// __stop_<secname> symbols. They are at beginning and end of the section,
+// respectively. This is not requested by the ELF standard, but GNU ld and
+// gold provide the feature, and used by many programs.
+void Writer::addStartStopSymbols(const InputSegment *Seg) {
+  StringRef S = Seg->getName();
+  LLVM_DEBUG(dbgs() << "addStartStopSymbols: " << S << "\n");
+  if (!isValidCIdentifier(S))
+    return;
+  uint32_t Start = Seg->OutputSeg->StartVA + Seg->OutputSegmentOffset;
+  uint32_t Stop = Start + Seg->getSize();
+  Symtab->addOptionalDataSymbol(Saver.save("__start_" + S), Start, 0);
+  Symtab->addOptionalDataSymbol(Saver.save("__stop_" + S), Stop, 0);
+}
+
 void Writer::addSections() {
   addSection(Out.DylinkSec);
   addSection(Out.TypeSec);
@@ -724,21 +742,40 @@ void Writer::run() {
   populateTargetFeatures();
   log("-- calculateImports");
   calculateImports();
+  log("-- layoutMemory");
+  layoutMemory();
+
+  if (!Config->Relocatable) {
+    // Create linker synthesized __start_SECNAME/__stop_SECNAME symbols
+    // This has to be done after memory layout is performed.
+    for (const OutputSegment *Seg : Segments)
+      for (const InputSegment *S : Seg->InputSegments)
+        addStartStopSymbols(S);
+  }
+
   log("-- scanRelocations");
   scanRelocations();
   log("-- assignIndexes");
   assignIndexes();
   log("-- calculateInitFunctions");
   calculateInitFunctions();
-  log("-- calculateTypes");
-  calculateTypes();
-  log("-- layoutMemory");
-  layoutMemory();
+
   if (!Config->Relocatable) {
+    // Create linker synthesized functions
     if (Config->Pic)
       createApplyRelocationsFunction();
     createCallCtorsFunction();
+
+    // Make sure we have resolved all symbols.
+    if (!Config->AllowUndefined)
+      Symtab->reportRemainingUndefines();
+
+    if (errorCount())
+      return;
   }
+
+  log("-- calculateTypes");
+  calculateTypes();
   log("-- calculateExports");
   calculateExports();
   log("-- calculateCustomSections");

From 821a1ac0506cbaa2210eb7158df4c917c394e316 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Thu, 23 May 2019 10:08:56 +0000
Subject: [PATCH 0013/1176] Remove LazyObjFile::AddedToLink.

Instead we can just clear a MemoryBuffer so that we cannot get the
same buffer more than once.

llvm-svn: 361477
---
 lld/ELF/InputFiles.cpp | 18 +++++++-----------
 lld/ELF/InputFiles.h   |  2 --
 lld/ELF/LTO.cpp        |  2 +-
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index d3a9e14377eff..dbfc867af7ee9 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1469,21 +1469,15 @@ InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
   return F;
 }
 
-MemoryBufferRef LazyObjFile::getBuffer() {
-  if (AddedToLink)
-    return MemoryBufferRef();
-  AddedToLink = true;
-  return MB;
-}
-
 InputFile *LazyObjFile::fetch() {
-  MemoryBufferRef MBRef = getBuffer();
-  if (MBRef.getBuffer().empty())
+  if (MB.getBuffer().empty())
     return nullptr;
 
-  InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
+  InputFile *File = createObjectFile(MB, ArchiveName, OffsetInArchive);
   File->GroupId = GroupId;
 
+  MB = {};
+
   // Copy symbol vector so that the new InputFile doesn't have to
   // insert the same defined symbols to the symbol table again.
   File->Symbols = std::move(Symbols);
@@ -1538,7 +1532,9 @@ template <class ELFT> void LazyObjFile::parse() {
       if (!Sym)
         continue;
       Sym->resolve(LazyObject{*this, Sym->getName()});
-      if (AddedToLink)
+
+      // MemoryBuffer is emptied if this file is instantiated as ObjFile.
+      if (MB.getBuffer().empty())
         return;
     }
     return;
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 7d5e9a2346fa3..8b112e592503c 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -307,9 +307,7 @@ class LazyObjFile : public InputFile {
   static bool classof(const InputFile *F) { return F->kind() == LazyObjKind; }
 
   template <class ELFT> void parse();
-  MemoryBufferRef getBuffer();
   InputFile *fetch();
-  bool AddedToLink = false;
 
 private:
   uint64_t OffsetInArchive;
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index f0ad2c06d4792..dad52d8b03fc9 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -212,7 +212,7 @@ void BitcodeCompiler::add(BitcodeFile &F) {
 // distributed build system that depends on that behavior.
 static void thinLTOCreateEmptyIndexFiles() {
   for (LazyObjFile *F : LazyObjFiles) {
-    if (F->AddedToLink || !isBitcode(F->MB))
+    if (!isBitcode(F->MB))
       continue;
     std::string Path = replaceThinLTOSuffix(getThinLTOOutputFile(F->getName()));
     std::unique_ptr<raw_fd_ostream> OS = openFile(Path + ".thinlto.bc");

From f5d9d2390592f36e594d8e9f3d4b4cc1733193d3 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Thu, 23 May 2019 10:15:12 +0000
Subject: [PATCH 0014/1176] Simplify InputFile::fetch().

We don't have to return a value from the function. Instead, we can
directly call parseFile from the functions.

llvm-svn: 361478
---
 lld/ELF/InputFiles.cpp | 13 +++++++------
 lld/ELF/InputFiles.h   |  6 +++---
 lld/ELF/Symbols.cpp    |  6 ++----
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index dbfc867af7ee9..8a8bf6061e7d8 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1010,14 +1010,14 @@ void ArchiveFile::parse() {
 }
 
 // Returns a buffer pointing to a member file containing a given symbol.
-InputFile *ArchiveFile::fetch(const Archive::Symbol &Sym) {
+void ArchiveFile::fetch(const Archive::Symbol &Sym) {
   Archive::Child C =
       CHECK(Sym.getMember(), toString(this) +
                                  ": could not get the member for symbol " +
                                  Sym.getName());
 
   if (!Seen.insert(C.getChildOffset()).second)
-    return nullptr;
+    return;
 
   MemoryBufferRef MB =
       CHECK(C.getMemoryBufferRef(),
@@ -1031,7 +1031,7 @@ InputFile *ArchiveFile::fetch(const Archive::Symbol &Sym) {
   InputFile *File = createObjectFile(
       MB, getName(), C.getParent()->isThin() ? 0 : C.getChildOffset());
   File->GroupId = GroupId;
-  return File;
+  parseFile(File);
 }
 
 unsigned SharedFile::VernauxNum;
@@ -1469,9 +1469,9 @@ InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
   return F;
 }
 
-InputFile *LazyObjFile::fetch() {
+void LazyObjFile::fetch() {
   if (MB.getBuffer().empty())
-    return nullptr;
+    return;
 
   InputFile *File = createObjectFile(MB, ArchiveName, OffsetInArchive);
   File->GroupId = GroupId;
@@ -1481,7 +1481,8 @@ InputFile *LazyObjFile::fetch() {
   // Copy symbol vector so that the new InputFile doesn't have to
   // insert the same defined symbols to the symbol table again.
   File->Symbols = std::move(Symbols);
-  return File;
+
+  parseFile(File);
 }
 
 template <class ELFT> void LazyObjFile::parse() {
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 8b112e592503c..648f5b51452dc 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -307,7 +307,7 @@ class LazyObjFile : public InputFile {
   static bool classof(const InputFile *F) { return F->kind() == LazyObjKind; }
 
   template <class ELFT> void parse();
-  InputFile *fetch();
+  void fetch();
 
 private:
   uint64_t OffsetInArchive;
@@ -322,9 +322,9 @@ class ArchiveFile : public InputFile {
 
   // Pulls out an object file that contains a definition for Sym and
   // returns it. If the same file was instantiated before, this
-  // function returns a nullptr (so we don't instantiate the same file
+  // function does nothing (so we don't instantiate the same file
   // more than once.)
-  InputFile *fetch(const Archive::Symbol &Sym);
+  void fetch(const Archive::Symbol &Sym);
 
 private:
   std::unique_ptr<Archive> File;
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 94f0d6ea6cb9d..d44b24dd1b37a 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -243,14 +243,12 @@ void Symbol::parseSymbolVersion() {
 
 void Symbol::fetch() const {
   if (auto *Sym = dyn_cast<LazyArchive>(this)) {
-    if (auto *F = cast<ArchiveFile>(Sym->File)->fetch(Sym->Sym))
-      parseFile(F);
+    cast<ArchiveFile>(Sym->File)->fetch(Sym->Sym);
     return;
   }
 
   if (auto *Sym = dyn_cast<LazyObject>(this)) {
-    if (auto *F = dyn_cast<LazyObjFile>(Sym->File)->fetch())
-      parseFile(F);
+    dyn_cast<LazyObjFile>(Sym->File)->fetch();
     return;
   }
 

From e51b9e42b68c243dbc9b472f7c64c2c0fe821311 Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Thu, 23 May 2019 10:17:10 +0000
Subject: [PATCH 0015/1176] [llvm-objdump][test] Improve testing of some
 switches #2

This patch focuses on adding additional testing for the --source switch.
For reference, the source-interleave-x86_64.ll test file has been split
into two parts - the input (shared with the other tests) and the test
itself.

Reviewed by: MaskRay, rupprecht, grimar

Differential Revision: https://reviews.llvm.org/D61996

llvm-svn: 361479
---
 .../source-interleave.ll}                     | 19 ++--------
 .../X86/source-interleave-invalid-source.test | 16 +++++++++
 .../X86/source-interleave-missing-source.test | 15 ++++++++
 .../X86/source-interleave-no-debug-info.test  | 13 +++++++
 .../X86/source-interleave-relative-paths.test | 36 +++++++++++++++++++
 .../X86/source-interleave-x86_64.test         | 17 +++++++++
 6 files changed, 100 insertions(+), 16 deletions(-)
 rename llvm/test/tools/llvm-objdump/X86/{source-interleave-x86_64.ll => Inputs/source-interleave.ll} (84%)
 create mode 100644 llvm/test/tools/llvm-objdump/X86/source-interleave-invalid-source.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.test

diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.ll b/llvm/test/tools/llvm-objdump/X86/Inputs/source-interleave.ll
similarity index 84%
rename from llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.ll
rename to llvm/test/tools/llvm-objdump/X86/Inputs/source-interleave.ll
index 24a212028bf7a..1db42f5aaa8c2 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.ll
+++ b/llvm/test/tools/llvm-objdump/X86/Inputs/source-interleave.ll
@@ -1,13 +1,6 @@
-;  RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %s > %t.ll
-;  RUN: llc  -o %t.o -filetype=obj -mtriple=x86_64-pc-linux  %t.ll
-;  RUN: llvm-objdump -d -l %t.o >%t0
-;  RUN: llvm-objdump -dl %t.o >%t1
-;  RUN: llvm-objdump -d -S %t.o >%t2
-;  RUN: llvm-objdump -dS %t.o >%t3
-;  RUN: cmp %t0 %t1
-;  RUN: cmp %t2 %t3
-;  RUN: FileCheck --check-prefix=LINES %t.ll < %t0
-;  RUN: FileCheck --check-prefix=SOURCE --strict-whitespace %t.ll < %t2
+; NOTE: To be able to use this file as an input, the string SRC_CMPDIR needs
+;       replacing with a directory path by using sed or similar.
+
 ; ModuleID = 'source-interleave-x86_64.bc'
 source_filename = "source-interleave-x86_64.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -71,9 +64,3 @@ attributes #1 = { nounwind readnone }
 !21 = !DILocation(line: 8, column: 15, scope: !14)
 !22 = !DILocation(line: 8, column: 13, scope: !14)
 !23 = !DILocation(line: 8, column: 3, scope: !14)
-; LINES: main:
-; LINES-NEXT: ; {{[ -\(\)_A-Za-z0-9.\\/:]+}}source-interleave-x86_64.c:6
-
-; SOURCE: main:
-; SOURCE-NEXT: ; int main() {
-; SOURCE:      ;   int *b = &a;
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-invalid-source.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-invalid-source.test
new file mode 100644
index 0000000000000..e958173d16e90
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-invalid-source.test
@@ -0,0 +1,16 @@
+## Test llvm-objdump's --source behaviour when a line number is greater than the
+## file length.
+
+# RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %p/Inputs/source-interleave.ll > %t.ll
+# RUN: sed -e "s,line: 7,line: 9999,g" %t.ll > %t2.ll
+
+# RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
+# RUN: llc -o %t2.o -filetype=obj -mtriple=x86_64-pc-linux %t2.ll
+
+# RUN: llvm-objdump --source %t.o | FileCheck %s --check-prefixes=CHECK,GOOD
+# RUN: llvm-objdump --source %t2.o | FileCheck %s --implicit-check-not="int *b = &a;"
+
+# CHECK:      main:
+# CHECK-NEXT: ; int main() {
+# GOOD:       ;   int *b = &a;
+# CHECK:      ;   return *b + foo();
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
new file mode 100644
index 0000000000000..166caeb41b50d
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
@@ -0,0 +1,15 @@
+## Test that if the source cannot be found that disassembly is still printed,
+## and that no source is printed.
+
+# RUN: sed -e "s,SRC_COMPDIR,%/t,g" %p/Inputs/source-interleave.ll > %t.ll
+# RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %p/Inputs/source-interleave.ll > %t2.ll
+
+# RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
+# RUN: llc -o %t2.o -filetype=obj -mtriple=x86_64-pc-linux %t2.ll
+
+# RUN: llvm-objdump --source %t.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source %t2.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+
+# CHECK:       0000000000000010 main:
+# SOURCE-NEXT: ; int main() {
+# CHECK-NEXT:   10:   55                      pushq   %rbp
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
new file mode 100644
index 0000000000000..4a0a34a9eae0e
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
@@ -0,0 +1,13 @@
+## Test that if an object has no debug information, only the disassembly is
+## printed when --source is specified.
+
+# RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %p/Inputs/source-interleave.ll > %t.ll
+# RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
+# RUN: llvm-objcopy --strip-debug %t.o %t2.o
+
+# RUN: llvm-objdump --source %t.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+# RUN: llvm-objdump --source %t2.o | FileCheck %s --implicit-check-not=main
+
+# CHECK:       0000000000000010 main:
+# SOURCE-NEXT: ; int main() {
+# CHECK-NEXT:   10:   55                      pushq   %rbp
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
new file mode 100644
index 0000000000000..f9c69dfc0c06a
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
@@ -0,0 +1,36 @@
+## Test that llvm-objdump prints source and disassembly for relative directory
+## paths for appropriate working directories. We also test that when the directory
+## string is empty the behaviour is the same as the current working directory.
+
+# RUN: mkdir -p %t/a/b
+# RUN: cp %p/Inputs/source-interleave-x86_64.c %t/a/source-interleave-x86_64.c
+
+# RUN: sed -e "s,SRC_COMPDIR,a,g" %p/Inputs/source-interleave.ll > %t.ll
+# RUN: sed -e "s,SRC_COMPDIR,,g" %p/Inputs/source-interleave.ll > %t2.ll
+# RUN: sed -e "s,SRC_COMPDIR,.,g" %p/Inputs/source-interleave.ll > %t3.ll
+# RUN: sed -e "s,SRC_COMPDIR,..,g" %p/Inputs/source-interleave.ll > %t4.ll
+
+# RUN: llc -o %t/a/a.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
+# RUN: llc -o %t/a/b.o -filetype=obj -mtriple=x86_64-pc-linux %t2.ll
+# RUN: llc -o %t/a/c.o -filetype=obj -mtriple=x86_64-pc-linux %t3.ll
+# RUN: llc -o %t/a/d.o -filetype=obj -mtriple=x86_64-pc-linux %t4.ll
+
+# RUN: cd %t
+# RUN: llvm-objdump --source a/a.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+# RUN: llvm-objdump --source a/b.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source a/c.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source a/d.o | FileCheck %s --implicit-check-not=main
+# RUN: cd a
+# RUN: llvm-objdump --source a.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source b.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+# RUN: llvm-objdump --source c.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+# RUN: llvm-objdump --source d.o | FileCheck %s --implicit-check-not=main
+# RUN: cd b
+# RUN: llvm-objdump --source ../a.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source ../b.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source ../c.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source ../d.o | FileCheck %s --check-prefixes=CHECK,SOURCE
+
+# CHECK:       0000000000000010 main:
+# SOURCE-NEXT: ; int main() {
+# CHECK-NEXT:   10:   55                      pushq   %rbp
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.test
new file mode 100644
index 0000000000000..63a6c0571802a
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-x86_64.test
@@ -0,0 +1,17 @@
+# RUN: sed -e "s,SRC_COMPDIR,%/p/Inputs,g" %p/Inputs/source-interleave.ll > %t.ll
+# RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
+# RUN: llvm-objdump -d -l %t.o >%t0
+# RUN: llvm-objdump -dl %t.o >%t1
+# RUN: llvm-objdump -d -S %t.o >%t2
+# RUN: llvm-objdump -dS %t.o >%t3
+# RUN: cmp %t0 %t1
+# RUN: cmp %t2 %t3
+# RUN: FileCheck --check-prefix=LINES %s < %t0
+# RUN: FileCheck --check-prefix=SOURCE --strict-whitespace %s < %t2
+
+# LINES: main:
+# LINES-NEXT: ; {{[ -\(\)_A-Za-z0-9.\\/:]+}}source-interleave-x86_64.c:6
+
+# SOURCE: main:
+# SOURCE-NEXT: ; int main() {
+# SOURCE:      ;   int *b = &a;

From ff47d83e7820f0342ee5d0b98f8b66a84bfee350 Mon Sep 17 00:00:00 2001
From: Petar Jovanovic <petar.jovanovic@mips.com>
Date: Thu, 23 May 2019 10:37:13 +0000
Subject: [PATCH 0016/1176] [DwarfExpression] Refactor dwarf expression (NFC)

Refactor location description kind in order to be easier for extensions
(needed for D60866).
In addition, cut off some bits from the other class fields.

Patch by Djordje Todorovic.

Differential Revision: https://reviews.llvm.org/D62002

llvm-svn: 361480
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 28 ++++++-------
 llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h | 39 ++++++++++++++-----
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1235c14900574..c7c283202022e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) {
 
 void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert((LocationKind == Unknown || LocationKind == Register) &&
+ assert((isUnknownLocation() || isRegisterLocation()) &&
         "location description already locked down");
  LocationKind = Register;
  if (DwarfReg < 32) {
@@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
 
 void DwarfExpression::addBReg(int DwarfReg, int Offset) {
   assert(DwarfReg >= 0 && "invalid negative dwarf register number");
-  assert(LocationKind != Register && "location description already locked down");
+  assert(!isRegisterLocation() && "location description already locked down");
   if (DwarfReg < 32) {
     emitOp(dwarf::DW_OP_breg0 + DwarfReg);
   } else {
@@ -184,20 +184,20 @@ void DwarfExpression::addStackValue() {
 }
 
 void DwarfExpression::addSignedConstant(int64_t Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
   emitOp(dwarf::DW_OP_consts);
   emitSigned(Value);
 }
 
 void DwarfExpression::addUnsignedConstant(uint64_t Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
   emitConstu(Value);
 }
 
 void DwarfExpression::addUnsignedConstant(const APInt &Value) {
-  assert(LocationKind == Implicit || LocationKind == Unknown);
+  assert(isImplicitLocation() || isUnknownLocation());
   LocationKind = Implicit;
 
   unsigned Size = Value.getBitWidth();
@@ -242,7 +242,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
   }
 
   // Handle simple register locations.
-  if (LocationKind != Memory && !HasComplexExpression) {
+  if (!isMemoryLocation() && !HasComplexExpression) {
     for (auto &Reg : DwarfRegs) {
       if (Reg.DwarfRegNo >= 0)
         addReg(Reg.DwarfRegNo, Reg.Comment);
@@ -343,7 +343,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
         SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
 
       // Emit a DW_OP_stack_value for implicit location descriptions.
-      if (LocationKind == Implicit)
+      if (isImplicitLocation())
         addStackValue();
 
       // Emit the DW_OP_piece.
@@ -354,7 +354,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       return;
     }
     case dwarf::DW_OP_plus_uconst:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_plus_uconst);
       emitUnsigned(Op->getArg(0));
       break;
@@ -375,8 +375,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       emitOp(Op->getOp());
       break;
     case dwarf::DW_OP_deref:
-      assert(LocationKind != Register);
-      if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))
+      assert(!isRegisterLocation());
+      if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
         // Turning this into a memory location description makes the deref
         // implicit.
         LocationKind = Memory;
@@ -384,7 +384,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
         emitOp(dwarf::DW_OP_deref);
       break;
     case dwarf::DW_OP_constu:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitConstu(Op->getArg(0));
       break;
     case dwarf::DW_OP_LLVM_convert: {
@@ -427,11 +427,11 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       LocationKind = Implicit;
       break;
     case dwarf::DW_OP_swap:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_swap);
       break;
     case dwarf::DW_OP_xderef:
-      assert(LocationKind != Register);
+      assert(!isRegisterLocation());
       emitOp(dwarf::DW_OP_xderef);
       break;
     case dwarf::DW_OP_deref_size:
@@ -443,7 +443,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
     }
   }
 
-  if (LocationKind == Implicit)
+  if (isImplicitLocation())
     // Turn this into an implicit location description.
     addStackValue();
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 145504946a5cb..6985debe6138f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -111,18 +111,40 @@ class DwarfExpression {
 
   /// Current Fragment Offset in Bits.
   uint64_t OffsetInBits = 0;
-  unsigned DwarfVersion;
 
   /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
-  unsigned SubRegisterSizeInBits = 0;
-  unsigned SubRegisterOffsetInBits = 0;
+  unsigned SubRegisterSizeInBits : 16;
+  unsigned SubRegisterOffsetInBits : 16;
 
   /// The kind of location description being produced.
-  enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+  enum { Unknown = 0, Register, Memory, Implicit };
 
+  unsigned LocationKind : 3;
+  unsigned LocationFlags : 2;
+  unsigned DwarfVersion : 4;
+
+public:
+  bool isUnknownLocation() const {
+    return LocationKind == Unknown;
+  }
+
+  bool isMemoryLocation() const {
+    return LocationKind == Memory;
+  }
+
+  bool isRegisterLocation() const {
+    return LocationKind == Register;
+  }
+
+  bool isImplicitLocation() const {
+    return LocationKind == Implicit;
+  }
+
+protected:
   /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
   /// to represent a subregister.
   void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+    assert(SizeInBits < 65536 && OffsetInBits < 65536);
     SubRegisterSizeInBits = SizeInBits;
     SubRegisterOffsetInBits = OffsetInBits;
   }
@@ -206,7 +228,9 @@ class DwarfExpression {
 
 public:
   DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
-    : CU(CU), DwarfVersion(DwarfVersion) {}
+      : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
+        LocationKind(Unknown), LocationFlags(Unknown),
+        DwarfVersion(DwarfVersion) {}
 
   /// This needs to be called last to commit any pending changes.
   void finalize();
@@ -220,12 +244,9 @@ class DwarfExpression {
   /// Emit an unsigned constant.
   void addUnsignedConstant(const APInt &Value);
 
-  bool isMemoryLocation() const { return LocationKind == Memory; }
-  bool isUnknownLocation() const { return LocationKind == Unknown; }
-
   /// Lock this down to become a memory location description.
   void setMemoryLocationKind() {
-    assert(LocationKind == Unknown);
+    assert(isUnknownLocation());
     LocationKind = Memory;
   }
 

From f95b05c3df6eccb66d934c6529091400195d57ff Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 23 May 2019 10:46:35 +0000
Subject: [PATCH 0017/1176] Add REQUIRES: lld to debug-types-address-ranges.s

This should fix the green dragon bots.

llvm-svn: 361481
---
 lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s b/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s
index 892c93d3822cc..5fc7c41955c35 100644
--- a/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-address-ranges.s
@@ -4,6 +4,8 @@
 # compute address range for the type unit as type units don't describe any
 # addresses. The addresses should always resolve to the relevant compile units.
 
+# REQUIRES: lld
+
 # RUN: llvm-mc -dwarf-version=5 -triple x86_64-pc-linux %s -filetype=obj >%t.o
 # RUN: ld.lld %t.o -o %t -image-base=0x47000
 # RUN: %lldb %t -o "image lookup -a 0x48000 -v" -o exit | FileCheck %s

From 28afd8dc711261aeb082739740e86d28d1a50082 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Thu, 23 May 2019 10:50:01 +0000
Subject: [PATCH 0018/1176] [MCA] Make the bool conversion operator in class
 InstRef explicit. NFCI

This patch makes the bool conversion operator in InstRef explicit.
It also adds a operator< to hel comparing InstRef objects in sets.

llvm-svn: 361482
---
 llvm/include/llvm/MCA/Instruction.h | 6 +++++-
 llvm/lib/MCA/Stages/EntryStage.cpp  | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 0cb6f6cd4170f..a7a47fd3645ed 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -526,13 +526,17 @@ class InstRef {
   InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
 
   bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+  bool operator!=(const InstRef &Other) const { return Data != Other.Data; }
+  bool operator<(const InstRef &Other) const {
+    return Data.first < Other.Data.first;
+  }
 
   unsigned getSourceIndex() const { return Data.first; }
   Instruction *getInstruction() { return Data.second; }
   const Instruction *getInstruction() const { return Data.second; }
 
   /// Returns true if this references a valid instruction.
-  operator bool() const { return Data.second != nullptr; }
+  explicit operator bool() const { return Data.second != nullptr; }
 
   /// Invalidate this reference.
   void invalidate() { Data.second = nullptr; }
diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp
index 2028b9e2b765c..d2f5613a0fb6e 100644
--- a/llvm/lib/MCA/Stages/EntryStage.cpp
+++ b/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -18,7 +18,9 @@
 namespace llvm {
 namespace mca {
 
-bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+bool EntryStage::hasWorkToComplete() const {
+  return static_cast<bool>(CurrentInstruction);
+}
 
 bool EntryStage::isAvailable(const InstRef & /* unused */) const {
   if (CurrentInstruction)

From 32d976bac194d78656974e3e05bf52997a06f509 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 10:55:13 +0000
Subject: [PATCH 0019/1176] [NFC][X86] Fix check prefixes and autogenerate
 fold-pcmpeqd-2.ll test

Being affected by (sub %x, c) -> (add %x, (sub 0, c))
patch in an uncertain way.

llvm-svn: 361483
---
 llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll | 176 ++++++++++++++++++++++--
 1 file changed, 168 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
index d95c6323de4ee..55c3287028caa 100644
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s --check-prefixes=ALL,X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s --check-prefixes=ALL,X64
 
 ; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
@@ -7,18 +8,177 @@
 ;
 ; RAGreedy defeats the test by splitting live ranges.
 
-; Constant pool all-ones vector:
-; CHECK: .space 16,255
-
-; No pcmpeqd instructions, everybody uses the constant pool.
-; CHECK-LABEL: program_1:
-; CHECK-NOT: pcmpeqd
+; There should be no pcmpeqd instructions, everybody should the constant pool.
 
 	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
 	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
 	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
 
 define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+; X32-LABEL: program_1:
+; X32:       ## %bb.0: ## %entry
+; X32-NEXT:    cmpl $0, 0
+; X32-NEXT:    jle LBB0_2
+; X32-NEXT:  ## %bb.1: ## %forcond
+; X32-NEXT:    cmpl $0, 0
+; X32-NEXT:    jg LBB0_3
+; X32-NEXT:  LBB0_2: ## %ifthen
+; X32-NEXT:    retl
+; X32-NEXT:  LBB0_3: ## %forbody
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    subl $88, %esp
+; X32-NEXT:    movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
+; X32-NEXT:    minps LCPI0_3, %xmm1
+; X32-NEXT:    cvttps2dq %xmm1, %xmm0
+; X32-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X32-NEXT:    subps %xmm0, %xmm1
+; X32-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    mulps LCPI0_3, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    addps LCPI0_1, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    mulps %xmm1, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    addps LCPI0_2, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    psubd LCPI0_4, %xmm0
+; X32-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    mulps LCPI0_3, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    mulps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    mulps LCPI0_3, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    cmpunordps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    minps LCPI0_3, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    movl $0, (%esp)
+; X32-NEXT:    xorl %esi, %esi
+; X32-NEXT:    xorps %xmm3, %xmm3
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
+; X32-NEXT:    calll *%esi
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    minps LCPI0_3, %xmm0
+; X32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    pxor %xmm1, %xmm1
+; X32-NEXT:    psubd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Folded Reload
+; X32-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    psubd LCPI0_4, %xmm0
+; X32-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    por %xmm1, %xmm0
+; X32-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; X32-NEXT:    pxor %xmm0, %xmm0
+; X32-NEXT:    movdqa %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    movl $0, (%esp)
+; X32-NEXT:    xorps %xmm3, %xmm3
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload
+; X32-NEXT:    calll *%esi
+; X32-NEXT:    ud2
+;
+; X64-LABEL: program_1:
+; X64:       ## %bb.0: ## %entry
+; X64-NEXT:    cmpl $0, 0
+; X64-NEXT:    jle LBB0_2
+; X64-NEXT:  ## %bb.1: ## %forcond
+; X64-NEXT:    cmpl $0, 0
+; X64-NEXT:    jg LBB0_3
+; X64-NEXT:  LBB0_2: ## %ifthen
+; X64-NEXT:    retq
+; X64-NEXT:  LBB0_3: ## %forbody
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $64, %rsp
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps {{.*#+}} xmm1 = [1.28E+2,1.28E+2,1.28E+2,1.28E+2]
+; X64-NEXT:    minps {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm0
+; X64-NEXT:    cvtdq2ps %xmm0, %xmm0
+; X64-NEXT:    subps %xmm0, %xmm1
+; X64-NEXT:    movaps %xmm1, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    mulps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    addps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    mulps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    addps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movdqa (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    mulps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    mulps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    mulps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    xorps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    cmpunordps %xmm0, %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    minps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    xorps %xmm3, %xmm3
+; X64-NEXT:    xorps %xmm4, %xmm4
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    callq *%rbx
+; X64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; X64-NEXT:    minps {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    orps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Folded Reload
+; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; X64-NEXT:    xorps %xmm3, %xmm3
+; X64-NEXT:    xorps %xmm4, %xmm4
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; X64-NEXT:    movaps (%rsp), %xmm2 ## 16-byte Reload
+; X64-NEXT:    xorl %edi, %edi
+; X64-NEXT:    callq *%rbx
+; X64-NEXT:    ud2
 entry:
 	%tmp3.i = load i32, i32* null		; <i32> [#uses=1]
 	%cmp = icmp slt i32 0, %tmp3.i		; <i1> [#uses=1]

From 248a13057a4adbdb8d511b1458daf39d01a4b520 Mon Sep 17 00:00:00 2001
From: Konrad Kleine <kkleine@redhat.com>
Date: Thu, 23 May 2019 11:14:47 +0000
Subject: [PATCH 0020/1176] [lldb] NFC modernize codebase with
 modernize-use-nullptr

Summary:
NFC = [[ https://llvm.org/docs/Lexicon.html#nfc | Non functional change ]]

This commit is the result of modernizing the LLDB codebase by using
`nullptr` instread of `0` or `NULL`. See
https://clang.llvm.org/extra/clang-tidy/checks/modernize-use-nullptr.html
for more information.

This is the command I ran and I to fix and format the code base:

```
run-clang-tidy.py \
	-header-filter='.*' \
	-checks='-*,modernize-use-nullptr' \
	-fix ~/dev/llvm-project/lldb/.* \
	-format \
	-style LLVM \
	-p ~/llvm-builds/debug-ninja-gcc
```

NOTE: There were also changes to `llvm/utils/unittest` but I did not
include them because I felt that maybe this library shall be updated in
isolation somehow.

NOTE: I know this is a rather large commit but it is a nobrainer in most
parts.

Reviewers: martong, espindola, shafik, #lldb, JDevlieghere

Reviewed By: JDevlieghere

Subscribers: arsenm, jvesely, nhaehnle, hiraditya, JDevlieghere, teemperor, rnkovacs, emaste, kubamracek, nemanjai, ki.stfu, javed.absar, arichardson, kbarton, jrtc27, MaskRay, atanasyan, dexonsmith, arphaman, jfb, jsji, jdoerfert, lldb-commits, llvm-commits

Tags: #lldb, #llvm

Differential Revision: https://reviews.llvm.org/D61847

llvm-svn: 361484
---
 .../lldb/Breakpoint/BreakpointResolver.h      |   2 +-
 lldb/source/API/SBAddress.cpp                 |   6 +-
 lldb/source/API/SBBlock.cpp                   |  12 +-
 lldb/source/API/SBBreakpointLocation.cpp      |   6 +-
 lldb/source/API/SBBroadcaster.cpp             |  19 +-
 lldb/source/API/SBCommunication.cpp           |  12 +-
 lldb/source/API/SBCompileUnit.cpp             |  10 +-
 lldb/source/API/SBData.cpp                    |  10 +-
 lldb/source/API/SBDeclaration.cpp             |   2 +-
 lldb/source/API/SBError.cpp                   |   9 +-
 lldb/source/API/SBEvent.cpp                   |   6 +-
 lldb/source/API/SBFunction.cpp                |  16 +-
 lldb/source/API/SBHostOS.cpp                  |   5 +-
 lldb/source/API/SBInstruction.cpp             |  13 +-
 lldb/source/API/SBInstructionList.cpp         |   8 +-
 lldb/source/API/SBLineEntry.cpp               |   2 +-
 lldb/source/API/SBListener.cpp                |  20 +-
 lldb/source/API/SBModule.cpp                  |  23 +-
 lldb/source/API/SBPlatform.cpp                |  28 +-
 lldb/source/API/SBProcess.cpp                 |  10 +-
 lldb/source/API/SBQueue.cpp                   |   4 +-
 lldb/source/API/SBQueueItem.cpp               |   2 +-
 lldb/source/API/SBSection.cpp                 |   4 +-
 lldb/source/API/SBSourceManager.cpp           |   2 +-
 lldb/source/API/SBStream.cpp                  |  10 +-
 lldb/source/API/SBStringList.cpp              |  10 +-
 lldb/source/API/SBSymbol.cpp                  |  18 +-
 lldb/source/API/SBSymbolContext.cpp           |  17 +-
 lldb/source/API/SBSymbolContextList.cpp       |   4 +-
 lldb/source/API/SBTarget.cpp                  |  54 ++--
 lldb/source/API/SBThread.cpp                  |  22 +-
 lldb/source/API/SBThreadCollection.cpp        |   2 +-
 lldb/source/API/SBThreadPlan.cpp              |   4 +-
 lldb/source/API/SBType.cpp                    |  16 +-
 lldb/source/API/SBTypeCategory.cpp            |   4 +-
 lldb/source/API/SBTypeEnumMember.cpp          |   6 +-
 lldb/source/API/SBTypeFilter.cpp              |   4 +-
 lldb/source/API/SBTypeFormat.cpp              |   2 +-
 lldb/source/API/SBTypeNameSpecifier.cpp       |   8 +-
 lldb/source/API/SBTypeSummary.cpp             |   4 +-
 lldb/source/API/SBTypeSynthetic.cpp           |   4 +-
 lldb/source/API/SBValue.cpp                   |  26 +-
 lldb/source/API/SBValueList.cpp               |   4 +-
 lldb/source/API/SBWatchpoint.cpp              |   4 +-
 .../Breakpoint/BreakpointResolverAddress.cpp  |   2 +-
 .../Breakpoint/BreakpointResolverFileLine.cpp |   2 +-
 .../BreakpointResolverFileRegex.cpp           |   4 +-
 .../Breakpoint/BreakpointResolverScripted.cpp |   4 +-
 .../source/Commands/CommandObjectExpression.h |   2 +-
 lldb/source/Core/Communication.cpp            |   2 +-
 lldb/source/Core/Debugger.cpp                 |   4 +-
 lldb/source/Core/Mangled.cpp                  |   2 +-
 lldb/source/Core/Section.cpp                  |  14 +-
 lldb/source/Core/SourceManager.cpp            |  10 +-
 lldb/source/Core/Value.cpp                    |  30 +-
 lldb/source/Core/ValueObject.cpp              |  67 ++--
 .../Core/ValueObjectConstResultImpl.cpp       |  20 +-
 lldb/source/Core/ValueObjectMemory.cpp        |   2 +-
 lldb/source/Core/ValueObjectRegister.cpp      |  20 +-
 lldb/source/Core/ValueObjectVariable.cpp      |   8 +-
 .../DataFormatters/DataVisualization.cpp      |   2 +-
 lldb/source/DataFormatters/FormatManager.cpp  |  12 +-
 .../DataFormatters/FormattersHelpers.cpp      |   2 +-
 lldb/source/DataFormatters/StringPrinter.cpp  |   8 +-
 lldb/source/DataFormatters/TypeFormat.cpp     |   2 +-
 lldb/source/DataFormatters/TypeSynthetic.cpp  |  18 +-
 .../DataFormatters/ValueObjectPrinter.cpp     |   7 +-
 lldb/source/Expression/DWARFExpression.cpp    |  12 +-
 lldb/source/Expression/ExpressionVariable.cpp |   2 +-
 lldb/source/Expression/FunctionCaller.cpp     |  21 +-
 lldb/source/Expression/IRExecutionUnit.cpp    |   8 +-
 lldb/source/Expression/IRInterpreter.cpp      |   2 +-
 lldb/source/Expression/IRMemoryMap.cpp        |   2 +-
 lldb/source/Expression/LLVMUserExpression.cpp |   4 +-
 lldb/source/Expression/UserExpression.cpp     |   6 +-
 lldb/source/Host/common/Editline.cpp          |  14 +-
 lldb/source/Host/common/File.cpp              |   8 +-
 lldb/source/Host/common/Host.cpp              |   8 +-
 .../Host/common/HostNativeThreadBase.cpp      |   8 +-
 lldb/source/Host/common/OptionParser.cpp      |   6 +-
 lldb/source/Host/common/ProcessRunLock.cpp    |   2 +-
 lldb/source/Host/common/SocketAddress.cpp     |   6 +-
 lldb/source/Host/common/TCPSocket.cpp         |   4 +-
 lldb/source/Host/common/TaskPool.cpp          |   2 +-
 lldb/source/Host/common/Terminal.cpp          |   6 +-
 lldb/source/Host/common/ThreadLauncher.cpp    |   2 +-
 lldb/source/Host/common/XML.cpp               |   2 +-
 lldb/source/Host/linux/Host.cpp               |   8 +-
 lldb/source/Host/linux/HostInfoLinux.cpp      |   3 +-
 .../posix/ConnectionFileDescriptorPosix.cpp   |   2 +-
 lldb/source/Host/posix/HostThreadPosix.cpp    |   2 +-
 .../source/Interpreter/CommandInterpreter.cpp |   6 +-
 lldb/source/Interpreter/OptionValue.cpp       |   6 +-
 .../Plugins/ABI/SysV-arm/ABISysV_arm.cpp      |   6 +-
 .../Disassembler/llvm/DisassemblerLLVMC.cpp   |  16 +-
 .../DynamicLoaderDarwinKernel.cpp             |  42 +--
 .../Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp |   4 +-
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       |  29 +-
 .../MacOSX-DYLD/DynamicLoaderMacOS.cpp        |   4 +-
 .../MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp   |  14 +-
 .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp     |   6 +-
 .../Static/DynamicLoaderStatic.cpp            |   2 +-
 .../ExpressionParser/Clang/ASTDumper.cpp      |   2 +-
 .../Clang/ASTResultSynthesizer.cpp            |  18 +-
 .../Clang/ASTStructExtractor.cpp              |  12 +-
 .../ExpressionParser/Clang/ClangASTSource.cpp |  55 ++--
 .../ExpressionParser/Clang/ClangASTSource.h   |   6 +-
 .../Clang/ClangExpressionDeclMap.cpp          |  39 +--
 .../Clang/ClangExpressionDeclMap.h            |  15 +-
 .../Clang/ClangExpressionParser.cpp           |   2 +-
 .../Clang/ClangExpressionVariable.h           |   8 +-
 .../Clang/ClangFunctionCaller.h               |   2 +-
 .../Clang/ClangPersistentVariables.cpp        |   4 +-
 .../Clang/ClangUserExpression.cpp             |   2 +-
 .../Clang/ClangUtilityFunction.cpp            |   2 +-
 .../ExpressionParser/Clang/IRForTarget.cpp    |  55 ++--
 .../Instruction/ARM/EmulateInstructionARM.cpp |  14 +-
 .../Instruction/ARM/EmulationStateARM.cpp     |  14 +-
 .../ARM64/EmulateInstructionARM64.cpp         |   4 +-
 .../MIPS/EmulateInstructionMIPS.cpp           |  10 +-
 .../MIPS64/EmulateInstructionMIPS64.cpp       |  10 +-
 .../ASan/ASanRuntime.cpp                      |   2 +-
 .../TSan/TSanRuntime.cpp                      |   2 +-
 .../ItaniumABI/ItaniumABILanguageRuntime.cpp  |  10 +-
 .../AppleObjCRuntime/AppleObjCDeclVendor.cpp  |  27 +-
 .../AppleObjCRuntime/AppleObjCRuntime.cpp     |  10 +-
 .../AppleObjCRuntime/AppleObjCRuntimeV1.cpp   |   4 +-
 .../AppleObjCRuntime/AppleObjCRuntimeV2.cpp   |  28 +-
 .../AppleObjCTrampolineHandler.cpp            |  16 +-
 .../AppleObjCTrampolineHandler.h              |   5 +-
 ...pleThreadPlanStepThroughObjCTrampoline.cpp |   4 +-
 .../MemoryHistory/asan/MemoryHistoryASan.cpp  |   2 +-
 .../BSD-Archive/ObjectContainerBSDArchive.cpp |  10 +-
 .../BSD-Archive/ObjectContainerBSDArchive.h   |   2 +-
 .../ObjectContainerUniversalMachO.cpp         |   2 +-
 .../Plugins/ObjectFile/ELF/ELFHeader.cpp      |  32 +-
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  |  30 +-
 .../Plugins/ObjectFile/JIT/ObjectFileJIT.cpp  |  12 +-
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 298 +++++++++---------
 .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp    |  10 +-
 .../Python/OperatingSystemPython.cpp          |  16 +-
 .../Platform/FreeBSD/PlatformFreeBSD.cpp      |   4 +-
 .../Plugins/Platform/Linux/PlatformLinux.cpp  |   4 +-
 .../Platform/MacOSX/PlatformDarwin.cpp        |  16 +-
 .../Platform/MacOSX/PlatformMacOSX.cpp        |   4 +-
 .../MacOSX/PlatformRemoteDarwinDevice.cpp     |  24 +-
 .../Platform/NetBSD/PlatformNetBSD.cpp        |   4 +-
 .../Plugins/Platform/POSIX/PlatformPOSIX.cpp  |  22 +-
 .../gdb-server/PlatformRemoteGDBServer.cpp    |  23 +-
 .../NativeRegisterContextLinux_x86_64.cpp     |  10 +-
 .../Plugins/Process/Linux/ProcessorTrace.cpp  |   4 +-
 .../Plugins/Process/POSIX/ProcessMessage.cpp  |   2 +-
 .../Process/Utility/DynamicRegisterInfo.cpp   |  20 +-
 .../Plugins/Process/Utility/HistoryThread.cpp |   2 +-
 .../Process/Utility/InferiorCallPOSIX.cpp     |   6 +-
 .../Utility/RegisterContextDarwin_arm.cpp     | 102 +++---
 .../Utility/RegisterContextDarwin_arm64.cpp   |   4 +-
 .../Utility/RegisterContextDarwin_i386.cpp    |  28 +-
 .../Utility/RegisterContextDarwin_x86_64.cpp  |  38 +--
 .../Process/Utility/RegisterContextDummy.cpp  |   8 +-
 .../Utility/RegisterContextFreeBSD_i386.cpp   |   2 +-
 .../RegisterContextFreeBSD_powerpc.cpp        |   2 +-
 .../Utility/RegisterContextHistory.cpp        |   8 +-
 .../Process/Utility/RegisterContextLLDB.cpp   |  20 +-
 .../Utility/RegisterContextLinux_i386.cpp     |   6 +-
 .../Utility/RegisterContextLinux_mips.cpp     |   2 +-
 .../Utility/RegisterContextLinux_x86_64.cpp   |   4 +-
 .../Utility/RegisterContextOpenBSD_i386.cpp   |   2 +-
 .../Utility/RegisterContextPOSIX_arm.cpp      |   6 +-
 .../Utility/RegisterContextPOSIX_arm64.cpp    |   6 +-
 .../Utility/RegisterContextPOSIX_mips64.cpp   |   2 +-
 .../Utility/RegisterContextPOSIX_powerpc.cpp  |   4 +-
 .../Utility/RegisterContextPOSIX_ppc64le.cpp  |   4 +-
 .../Utility/RegisterContextPOSIX_s390x.cpp    |   6 +-
 .../Utility/RegisterContextPOSIX_x86.cpp      |   6 +-
 .../Utility/RegisterContextThreadMemory.cpp   |   4 +-
 .../Process/Utility/RegisterInfoPOSIX_arm.cpp |   2 +-
 .../Utility/RegisterInfoPOSIX_arm64.cpp       |   2 +-
 .../Utility/RegisterInfoPOSIX_ppc64le.cpp     |   2 +-
 .../Process/Utility/StopInfoMachException.cpp |   8 +-
 .../Plugins/Process/Utility/UnwindLLDB.cpp    |  12 +-
 .../Utility/UnwindMacOSXFrameBackchain.cpp    |   8 +-
 .../Process/elf-core/ProcessElfCore.cpp       |  10 +-
 .../Plugins/Process/elf-core/ThreadElfCore.h  |   2 +-
 .../gdb-remote/GDBRemoteClientBase.cpp        |   4 +-
 .../gdb-remote/GDBRemoteCommunication.cpp     |  18 +-
 .../GDBRemoteCommunicationClient.cpp          |  10 +-
 .../gdb-remote/GDBRemoteRegisterContext.cpp   |  37 +--
 .../Process/gdb-remote/ProcessGDBRemote.cpp   |  93 +++---
 .../Process/mach-core/ProcessMachCore.cpp     |  14 +-
 .../Process/minidump/ProcessMinidump.cpp      |   3 +-
 .../Python/ScriptInterpreterPython.cpp        |   2 +-
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |  48 +--
 .../SymbolFile/DWARF/DWARFDebugAbbrev.cpp     |   4 +-
 .../SymbolFile/DWARF/DWARFDebugAranges.cpp    |   2 +-
 .../SymbolFile/DWARF/DWARFDebugInfo.cpp       |   4 +-
 .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h |   4 +-
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  |  28 +-
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    |  23 +-
 .../SymbolFile/DWARF/DWARFDebugLine.cpp       |   2 +-
 .../SymbolFile/DWARF/DWARFDebugRanges.cpp     |   2 +-
 .../SymbolFile/DWARF/DWARFDeclContext.cpp     |   4 +-
 .../SymbolFile/DWARF/DWARFDeclContext.h       |   2 +-
 .../SymbolFile/DWARF/DWARFFormValue.cpp       |   6 +-
 .../Plugins/SymbolFile/DWARF/DWARFFormValue.h |   2 +-
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |   9 +-
 .../SymbolFile/DWARF/HashedNameToDIE.cpp      |   6 +-
 .../SymbolFile/DWARF/ManualDWARFIndex.cpp     |  10 +-
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      | 125 ++++----
 .../SymbolFile/DWARF/SymbolFileDWARF.h        |   2 +-
 .../DWARF/SymbolFileDWARFDebugMap.cpp         |  56 ++--
 .../SymbolFile/DWARF/UniqueDWARFASTType.cpp   |   4 +-
 .../SymbolFile/Symtab/SymbolFileSymtab.cpp    |  12 +-
 .../SymbolVendor/ELF/SymbolVendorELF.cpp      |  12 +-
 .../MacOSX/AppleGetItemInfoHandler.cpp        |   2 +-
 .../MacOSX/AppleGetPendingItemsHandler.cpp    |   4 +-
 .../MacOSX/AppleGetQueuesHandler.cpp          |   4 +-
 .../MacOSX/AppleGetThreadItemInfoHandler.cpp  |   2 +-
 .../MacOSX/SystemRuntimeMacOSX.cpp            |  17 +-
 .../UnwindAssemblyInstEmulation.cpp           |  10 +-
 .../UnwindAssemblyInstEmulation.h             |   2 +-
 .../UnwindAssembly/x86/UnwindAssembly-x86.cpp |   2 +-
 lldb/source/Symbol/ClangASTContext.cpp        |  12 +-
 lldb/source/Symbol/ClangASTImporter.cpp       |  12 +-
 lldb/source/Symbol/CompactUnwindInfo.cpp      |   2 +-
 lldb/source/Symbol/CompilerType.cpp           |   4 +-
 lldb/source/Symbol/LocateSymbolFile.cpp       |   7 +-
 lldb/source/Symbol/SymbolContext.cpp          |   2 +-
 lldb/source/Symbol/Symtab.cpp                 |   2 +-
 lldb/source/Symbol/Type.cpp                   |   2 +-
 lldb/source/Target/CPPLanguageRuntime.cpp     |   2 +-
 lldb/source/Target/Process.cpp                |   6 +-
 lldb/source/Target/SectionLoadHistory.cpp     |   2 +-
 lldb/source/Target/StopInfo.cpp               |   2 +-
 .../Target/ThreadPlanShouldStopHere.cpp       |   2 +-
 lldb/tools/lldb-mi/MICmdCmdExec.cpp           |   4 +-
 lldb/tools/lldb-mi/MICmdCmdTarget.cpp         |   2 +-
 lldb/tools/lldb-mi/MICmnBase.cpp              |   2 +-
 lldb/tools/lldb-mi/MICmnMIOutOfBandRecord.cpp |   4 +-
 lldb/tools/lldb-mi/MICmnMIResultRecord.cpp    |   2 +-
 lldb/tools/lldb-mi/MIDriver.cpp               |   4 +-
 lldb/tools/lldb-mi/MIDriverBase.cpp           |   2 +-
 lldb/tools/lldb-mi/MIDriverMgr.cpp            |   2 +-
 lldb/tools/lldb-server/lldb-gdbserver.cpp     |  20 +-
 lldb/tools/lldb-server/lldb-platform.cpp      |  18 +-
 .../unittests/Utility/StringExtractorTest.cpp |   2 +-
 .../lldb-server/inferior/thread_inferior.cpp  |   2 +-
 .../lldb-server/tests/MessageObjects.cpp      |   8 +-
 llvm/include/llvm/ADT/DenseMap.h              |   2 +-
 .../llvm/Demangle/MicrosoftDemangleNodes.h    |   2 +-
 250 files changed, 1487 insertions(+), 1461 deletions(-)

diff --git a/lldb/include/lldb/Breakpoint/BreakpointResolver.h b/lldb/include/lldb/Breakpoint/BreakpointResolver.h
index 8767e0d8b5c45..11e183b33482d 100644
--- a/lldb/include/lldb/Breakpoint/BreakpointResolver.h
+++ b/lldb/include/lldb/Breakpoint/BreakpointResolver.h
@@ -209,7 +209,7 @@ class BreakpointResolver : public Searcher {
                           const char *) = delete;
 
   lldb::BreakpointLocationSP AddLocation(Address loc_addr,
-                                         bool *new_location = NULL);
+                                         bool *new_location = nullptr);
 
   Breakpoint *m_breakpoint; // This is the breakpoint we add locations to.
   lldb::addr_t m_offset;    // A random offset the user asked us to add to any
diff --git a/lldb/source/API/SBAddress.cpp b/lldb/source/API/SBAddress.cpp
index cb67690544b25..358cb400a76cc 100644
--- a/lldb/source/API/SBAddress.cpp
+++ b/lldb/source/API/SBAddress.cpp
@@ -83,7 +83,7 @@ bool SBAddress::IsValid() const {
 SBAddress::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBAddress, operator bool);
 
-  return m_opaque_up != NULL && m_opaque_up->IsValid();
+  return m_opaque_up != nullptr && m_opaque_up->IsValid();
 }
 
 void SBAddress::Clear() {
@@ -186,7 +186,7 @@ Address *SBAddress::operator->() { return m_opaque_up.get(); }
 const Address *SBAddress::operator->() const { return m_opaque_up.get(); }
 
 Address &SBAddress::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new Address());
   return *m_opaque_up;
 }
@@ -208,7 +208,7 @@ bool SBAddress::GetDescription(SBStream &description) {
   // case there isn't one already...
   Stream &strm = description.ref();
   if (m_opaque_up->IsValid()) {
-    m_opaque_up->Dump(&strm, NULL, Address::DumpStyleResolvedDescription,
+    m_opaque_up->Dump(&strm, nullptr, Address::DumpStyleResolvedDescription,
                       Address::DumpStyleModuleWithFileAddress, 4);
     StreamString sstrm;
     //        m_opaque_up->Dump (&sstrm, NULL,
diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp
index b8ca473894fc7..f333d1d7b5f32 100644
--- a/lldb/source/API/SBBlock.cpp
+++ b/lldb/source/API/SBBlock.cpp
@@ -25,7 +25,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBlock::SBBlock() : m_opaque_ptr(NULL) {
+SBBlock::SBBlock() : m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBlock);
 }
 
@@ -44,7 +44,7 @@ const SBBlock &SBBlock::operator=(const SBBlock &rhs) {
   return LLDB_RECORD_RESULT(*this);
 }
 
-SBBlock::~SBBlock() { m_opaque_ptr = NULL; }
+SBBlock::~SBBlock() { m_opaque_ptr = nullptr; }
 
 bool SBBlock::IsValid() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsValid);
@@ -53,14 +53,14 @@ bool SBBlock::IsValid() const {
 SBBlock::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, operator bool);
 
-  return m_opaque_ptr != NULL;
+  return m_opaque_ptr != nullptr;
 }
 
 bool SBBlock::IsInlined() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsInlined);
 
   if (m_opaque_ptr)
-    return m_opaque_ptr->GetInlinedFunctionInfo() != NULL;
+    return m_opaque_ptr->GetInlinedFunctionInfo() != nullptr;
   return false;
 }
 
@@ -77,10 +77,10 @@ const char *SBBlock::GetInlinedName() const {
         language = function->GetLanguage();
       else
         language = lldb::eLanguageTypeUnknown;
-      return inlined_info->GetName(language).AsCString(NULL);
+      return inlined_info->GetName(language).AsCString(nullptr);
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 SBFileSpec SBBlock::GetInlinedCallSiteFile() const {
diff --git a/lldb/source/API/SBBreakpointLocation.cpp b/lldb/source/API/SBBreakpointLocation.cpp
index 560687507097e..640545f55ef97 100644
--- a/lldb/source/API/SBBreakpointLocation.cpp
+++ b/lldb/source/API/SBBreakpointLocation.cpp
@@ -179,7 +179,7 @@ const char *SBBreakpointLocation::GetCondition() {
         loc_sp->GetTarget().GetAPIMutex());
     return loc_sp->GetConditionText();
   }
-  return NULL;
+  return nullptr;
 }
 
 void SBBreakpointLocation::SetAutoContinue(bool auto_continue) {
@@ -357,7 +357,7 @@ const char *SBBreakpointLocation::GetThreadName() const {
         loc_sp->GetTarget().GetAPIMutex());
     return loc_sp->GetThreadName();
   }
-  return NULL;
+  return nullptr;
 }
 
 void SBBreakpointLocation::SetQueueName(const char *queue_name) {
@@ -382,7 +382,7 @@ const char *SBBreakpointLocation::GetQueueName() const {
         loc_sp->GetTarget().GetAPIMutex());
     loc_sp->GetQueueName();
   }
-  return NULL;
+  return nullptr;
 }
 
 bool SBBreakpointLocation::IsResolved() {
diff --git a/lldb/source/API/SBBroadcaster.cpp b/lldb/source/API/SBBroadcaster.cpp
index 4119fb2c134cc..e1efdf7baf61f 100644
--- a/lldb/source/API/SBBroadcaster.cpp
+++ b/lldb/source/API/SBBroadcaster.cpp
@@ -16,20 +16,19 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBroadcaster::SBBroadcaster() : m_opaque_sp(), m_opaque_ptr(NULL) {
+SBBroadcaster::SBBroadcaster() : m_opaque_sp(), m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBroadcaster);
 }
 
 SBBroadcaster::SBBroadcaster(const char *name)
-    : m_opaque_sp(new Broadcaster(NULL, name)), m_opaque_ptr(NULL) {
+    : m_opaque_sp(new Broadcaster(nullptr, name)), m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR(SBBroadcaster, (const char *), name);
 
   m_opaque_ptr = m_opaque_sp.get();
 }
 
 SBBroadcaster::SBBroadcaster(lldb_private::Broadcaster *broadcaster, bool owns)
-    : m_opaque_sp(owns ? broadcaster : NULL), m_opaque_ptr(broadcaster) {
-}
+    : m_opaque_sp(owns ? broadcaster : nullptr), m_opaque_ptr(broadcaster) {}
 
 SBBroadcaster::SBBroadcaster(const SBBroadcaster &rhs)
     : m_opaque_sp(rhs.m_opaque_sp), m_opaque_ptr(rhs.m_opaque_ptr) {
@@ -48,13 +47,13 @@ const SBBroadcaster &SBBroadcaster::operator=(const SBBroadcaster &rhs) {
   return LLDB_RECORD_RESULT(*this);
 }
 
-SBBroadcaster::~SBBroadcaster() { reset(NULL, false); }
+SBBroadcaster::~SBBroadcaster() { reset(nullptr, false); }
 
 void SBBroadcaster::BroadcastEventByType(uint32_t event_type, bool unique) {
   LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEventByType,
                      (uint32_t, bool), event_type, unique);
 
-  if (m_opaque_ptr == NULL)
+  if (m_opaque_ptr == nullptr)
     return;
 
   if (unique)
@@ -67,7 +66,7 @@ void SBBroadcaster::BroadcastEvent(const SBEvent &event, bool unique) {
   LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEvent,
                      (const lldb::SBEvent &, bool), event, unique);
 
-  if (m_opaque_ptr == NULL)
+  if (m_opaque_ptr == nullptr)
     return;
 
   EventSP event_sp = event.GetSP();
@@ -104,7 +103,7 @@ const char *SBBroadcaster::GetName() const {
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetBroadcasterName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 bool SBBroadcaster::EventTypeHasListeners(uint32_t event_type) {
@@ -144,14 +143,14 @@ bool SBBroadcaster::IsValid() const {
 SBBroadcaster::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBroadcaster, operator bool);
 
-  return m_opaque_ptr != NULL;
+  return m_opaque_ptr != nullptr;
 }
 
 void SBBroadcaster::Clear() {
   LLDB_RECORD_METHOD_NO_ARGS(void, SBBroadcaster, Clear);
 
   m_opaque_sp.reset();
-  m_opaque_ptr = NULL;
+  m_opaque_ptr = nullptr;
 }
 
 bool SBBroadcaster::operator==(const SBBroadcaster &rhs) const {
diff --git a/lldb/source/API/SBCommunication.cpp b/lldb/source/API/SBCommunication.cpp
index 21bcde12cb3de..90df70bde72f1 100644
--- a/lldb/source/API/SBCommunication.cpp
+++ b/lldb/source/API/SBCommunication.cpp
@@ -16,7 +16,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBCommunication::SBCommunication() : m_opaque(NULL), m_opaque_owned(false) {
+SBCommunication::SBCommunication() : m_opaque(nullptr), m_opaque_owned(false) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommunication);
 }
 
@@ -28,7 +28,7 @@ SBCommunication::SBCommunication(const char *broadcaster_name)
 SBCommunication::~SBCommunication() {
   if (m_opaque && m_opaque_owned)
     delete m_opaque;
-  m_opaque = NULL;
+  m_opaque = nullptr;
   m_opaque_owned = false;
 }
 
@@ -39,7 +39,7 @@ bool SBCommunication::IsValid() const {
 SBCommunication::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, operator bool);
 
-  return m_opaque != NULL;
+  return m_opaque != nullptr;
 }
 
 bool SBCommunication::GetCloseOnEOF() {
@@ -64,7 +64,7 @@ ConnectionStatus SBCommunication::Connect(const char *url) {
   if (m_opaque) {
     if (!m_opaque->HasConnection())
       m_opaque->SetConnection(Host::CreateDefaultConnection(url).release());
-    return m_opaque->Connect(url, NULL);
+    return m_opaque->Connect(url, nullptr);
   }
   return eConnectionStatusNoConnection;
 }
@@ -115,7 +115,7 @@ size_t SBCommunication::Read(void *dst, size_t dst_len, uint32_t timeout_usec,
                                     ? Timeout<std::micro>(llvm::None)
                                     : std::chrono::microseconds(timeout_usec);
   if (m_opaque)
-    bytes_read = m_opaque->Read(dst, dst_len, timeout, status, NULL);
+    bytes_read = m_opaque->Read(dst, dst_len, timeout, status, nullptr);
   else
     status = eConnectionStatusNoConnection;
 
@@ -130,7 +130,7 @@ size_t SBCommunication::Write(const void *src, size_t src_len,
 
   size_t bytes_written = 0;
   if (m_opaque)
-    bytes_written = m_opaque->Write(src, src_len, status, NULL);
+    bytes_written = m_opaque->Write(src, src_len, status, nullptr);
   else
     status = eConnectionStatusNoConnection;
 
diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp
index b5bac6bf7d1ad..48b501043e1cd 100644
--- a/lldb/source/API/SBCompileUnit.cpp
+++ b/lldb/source/API/SBCompileUnit.cpp
@@ -20,7 +20,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBCompileUnit::SBCompileUnit() : m_opaque_ptr(NULL) {
+SBCompileUnit::SBCompileUnit() : m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCompileUnit);
 }
 
@@ -41,7 +41,7 @@ const SBCompileUnit &SBCompileUnit::operator=(const SBCompileUnit &rhs) {
   return LLDB_RECORD_RESULT(*this);
 }
 
-SBCompileUnit::~SBCompileUnit() { m_opaque_ptr = NULL; }
+SBCompileUnit::~SBCompileUnit() { m_opaque_ptr = nullptr; }
 
 SBFileSpec SBCompileUnit::GetFileSpec() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBCompileUnit,
@@ -108,8 +108,8 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
       file_spec = *m_opaque_ptr;
 
     index = m_opaque_ptr->FindLineEntry(
-        start_idx, line, inline_file_spec ? inline_file_spec->get() : NULL,
-        exact, NULL);
+        start_idx, line, inline_file_spec ? inline_file_spec->get() : nullptr,
+        exact, nullptr);
   }
 
   return index;
@@ -193,7 +193,7 @@ bool SBCompileUnit::IsValid() const {
 SBCompileUnit::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCompileUnit, operator bool);
 
-  return m_opaque_ptr != NULL;
+  return m_opaque_ptr != nullptr;
 }
 
 bool SBCompileUnit::operator==(const SBCompileUnit &rhs) const {
diff --git a/lldb/source/API/SBData.cpp b/lldb/source/API/SBData.cpp
index d9e54d54a1807..528cd8d43ecce 100644
--- a/lldb/source/API/SBData.cpp
+++ b/lldb/source/API/SBData.cpp
@@ -64,7 +64,7 @@ bool SBData::IsValid() {
 SBData::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBData, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 uint8_t SBData::GetAddressByteSize() {
@@ -312,13 +312,13 @@ const char *SBData::GetString(lldb::SBError &error, lldb::offset_t offset) {
   LLDB_RECORD_METHOD(const char *, SBData, GetString,
                      (lldb::SBError &, lldb::offset_t), error, offset);
 
-  const char *value = 0;
+  const char *value = nullptr;
   if (!m_opaque_sp.get()) {
     error.SetErrorString("no value to read from");
   } else {
     uint32_t old_offset = offset;
     value = m_opaque_sp->GetCStr(&offset);
-    if (offset == old_offset || (value == NULL))
+    if (offset == old_offset || (value == nullptr))
       error.SetErrorString("unable to read data");
   }
   return value;
@@ -346,13 +346,13 @@ size_t SBData::ReadRawData(lldb::SBError &error, lldb::offset_t offset,
                     (lldb::SBError &, lldb::offset_t, void *, size_t), error,
                     offset, buf, size);
 
-  void *ok = NULL;
+  void *ok = nullptr;
   if (!m_opaque_sp.get()) {
     error.SetErrorString("no value to read from");
   } else {
     uint32_t old_offset = offset;
     ok = m_opaque_sp->GetU8(&offset, buf, size);
-    if ((offset == old_offset) || (ok == NULL))
+    if ((offset == old_offset) || (ok == nullptr))
       error.SetErrorString("unable to read data");
   }
   return ok ? size : 0;
diff --git a/lldb/source/API/SBDeclaration.cpp b/lldb/source/API/SBDeclaration.cpp
index f14ca3993f41a..a7790b2939810 100644
--- a/lldb/source/API/SBDeclaration.cpp
+++ b/lldb/source/API/SBDeclaration.cpp
@@ -147,7 +147,7 @@ const lldb_private::Declaration *SBDeclaration::operator->() const {
 }
 
 lldb_private::Declaration &SBDeclaration::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new lldb_private::Declaration());
   return *m_opaque_up;
 }
diff --git a/lldb/source/API/SBError.cpp b/lldb/source/API/SBError.cpp
index c4627898282e5..7256e8e55de94 100644
--- a/lldb/source/API/SBError.cpp
+++ b/lldb/source/API/SBError.cpp
@@ -41,7 +41,7 @@ const char *SBError::GetCString() const {
 
   if (m_opaque_up)
     return m_opaque_up->AsCString();
-  return NULL;
+  return nullptr;
 }
 
 void SBError::Clear() {
@@ -144,11 +144,11 @@ bool SBError::IsValid() const {
 SBError::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, operator bool);
 
-  return m_opaque_up != NULL;
+  return m_opaque_up != nullptr;
 }
 
 void SBError::CreateIfNeeded() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new Status());
 }
 
@@ -175,7 +175,8 @@ bool SBError::GetDescription(SBStream &description) {
       description.Printf("success");
     else {
       const char *err_string = GetCString();
-      description.Printf("error: %s", (err_string != NULL ? err_string : ""));
+      description.Printf("error: %s",
+                         (err_string != nullptr ? err_string : ""));
     }
   } else
     description.Printf("error: <NULL>");
diff --git a/lldb/source/API/SBEvent.cpp b/lldb/source/API/SBEvent.cpp
index b63108ec1fc93..75ca2830df9fb 100644
--- a/lldb/source/API/SBEvent.cpp
+++ b/lldb/source/API/SBEvent.cpp
@@ -22,7 +22,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBEvent::SBEvent() : m_event_sp(), m_opaque_ptr(NULL) {
+SBEvent::SBEvent() : m_event_sp(), m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEvent);
 }
 
@@ -69,7 +69,7 @@ const char *SBEvent::GetDataFlavor() {
     if (event_data)
       return lldb_event->GetData()->GetFlavor().AsCString();
   }
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SBEvent::GetType() const {
@@ -168,7 +168,7 @@ SBEvent::operator bool() const {
 
   // Do NOT use m_opaque_ptr directly!!! Must use the SBEvent::get() accessor.
   // See comments in SBEvent::get()....
-  return SBEvent::get() != NULL;
+  return SBEvent::get() != nullptr;
 }
 
 const char *SBEvent::GetCStringFromEvent(const SBEvent &event) {
diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp
index 35ddb1c7cd6b8..1770bede2f428 100644
--- a/lldb/source/API/SBFunction.cpp
+++ b/lldb/source/API/SBFunction.cpp
@@ -22,7 +22,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBFunction::SBFunction() : m_opaque_ptr(NULL) {
+SBFunction::SBFunction() : m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFunction);
 }
 
@@ -42,7 +42,7 @@ const SBFunction &SBFunction::operator=(const SBFunction &rhs) {
   return LLDB_RECORD_RESULT(*this);
 }
 
-SBFunction::~SBFunction() { m_opaque_ptr = NULL; }
+SBFunction::~SBFunction() { m_opaque_ptr = nullptr; }
 
 bool SBFunction::IsValid() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, IsValid);
@@ -51,13 +51,13 @@ bool SBFunction::IsValid() const {
 SBFunction::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, operator bool);
 
-  return m_opaque_ptr != NULL;
+  return m_opaque_ptr != nullptr;
 }
 
 const char *SBFunction::GetName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetName);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   if (m_opaque_ptr)
     cstr = m_opaque_ptr->GetName().AsCString();
 
@@ -67,7 +67,7 @@ const char *SBFunction::GetName() const {
 const char *SBFunction::GetDisplayName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetDisplayName);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   if (m_opaque_ptr)
     cstr = m_opaque_ptr->GetMangled()
                .GetDisplayDemangledName(m_opaque_ptr->GetLanguage())
@@ -79,7 +79,7 @@ const char *SBFunction::GetDisplayName() const {
 const char *SBFunction::GetMangledName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetMangledName);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   if (m_opaque_ptr)
     cstr = m_opaque_ptr->GetMangled().GetMangledName().AsCString();
   return cstr;
@@ -118,7 +118,7 @@ SBInstructionList SBFunction::GetInstructions(SBTarget target) {
   LLDB_RECORD_METHOD(lldb::SBInstructionList, SBFunction, GetInstructions,
                      (lldb::SBTarget), target);
 
-  return LLDB_RECORD_RESULT(GetInstructions(target, NULL));
+  return LLDB_RECORD_RESULT(GetInstructions(target, nullptr));
 }
 
 SBInstructionList SBFunction::GetInstructions(SBTarget target,
@@ -141,7 +141,7 @@ SBInstructionList SBFunction::GetInstructions(SBTarget target,
     if (module_sp) {
       const bool prefer_file_cache = false;
       sb_instructions.SetDisassembler(Disassembler::DisassembleRange(
-          module_sp->GetArchitecture(), NULL, flavor, exe_ctx,
+          module_sp->GetArchitecture(), nullptr, flavor, exe_ctx,
           m_opaque_ptr->GetAddressRange(), prefer_file_cache));
     }
   }
diff --git a/lldb/source/API/SBHostOS.cpp b/lldb/source/API/SBHostOS.cpp
index b8aa94d97a13a..b5464c4e7df71 100644
--- a/lldb/source/API/SBHostOS.cpp
+++ b/lldb/source/API/SBHostOS.cpp
@@ -107,8 +107,9 @@ lldb::thread_t SBHostOS::ThreadCreate(const char *name,
   LLDB_RECORD_DUMMY(lldb::thread_t, SBHostOS, ThreadCreate,
                     (lldb::thread_func_t, void *, SBError *), name,
                     thread_function, thread_arg, error_ptr);
-  HostThread thread(ThreadLauncher::LaunchThread(
-      name, thread_function, thread_arg, error_ptr ? error_ptr->get() : NULL));
+  HostThread thread(
+      ThreadLauncher::LaunchThread(name, thread_function, thread_arg,
+                                   error_ptr ? error_ptr->get() : nullptr));
   return thread.Release();
 }
 
diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp
index 8502aad9dbb21..fcf66fd258240 100644
--- a/lldb/source/API/SBInstruction.cpp
+++ b/lldb/source/API/SBInstruction.cpp
@@ -127,7 +127,7 @@ const char *SBInstruction::GetMnemonic(SBTarget target) {
     }
     return inst_sp->GetMnemonic(&exe_ctx);
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBInstruction::GetOperands(SBTarget target) {
@@ -147,7 +147,7 @@ const char *SBInstruction::GetOperands(SBTarget target) {
     }
     return inst_sp->GetOperands(&exe_ctx);
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBInstruction::GetComment(SBTarget target) {
@@ -167,7 +167,7 @@ const char *SBInstruction::GetComment(SBTarget target) {
     }
     return inst_sp->GetComment(&exe_ctx);
   }
-  return NULL;
+  return nullptr;
 }
 
 size_t SBInstruction::GetByteSize() {
@@ -249,7 +249,7 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) {
     // didn't have a stream already created, one will get created...
     FormatEntity::Entry format;
     FormatEntity::Parse("${addr}: ", format);
-    inst_sp->Dump(&s.ref(), 0, true, false, NULL, &sc, NULL, &format, 0);
+    inst_sp->Dump(&s.ref(), 0, true, false, nullptr, &sc, nullptr, &format, 0);
     return true;
   }
   return false;
@@ -258,7 +258,7 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) {
 void SBInstruction::Print(FILE *out) {
   LLDB_RECORD_METHOD(void, SBInstruction, Print, (FILE *), out);
 
-  if (out == NULL)
+  if (out == nullptr)
     return;
 
   lldb::InstructionSP inst_sp(GetOpaque());
@@ -272,7 +272,8 @@ void SBInstruction::Print(FILE *out) {
     StreamFile out_stream(out, false);
     FormatEntity::Entry format;
     FormatEntity::Parse("${addr}: ", format);
-    inst_sp->Dump(&out_stream, 0, true, false, NULL, &sc, NULL, &format, 0);
+    inst_sp->Dump(&out_stream, 0, true, false, nullptr, &sc, nullptr, &format,
+                  0);
   }
 }
 
diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp
index 390ad4750613e..cce923bf04a4b 100644
--- a/lldb/source/API/SBInstructionList.cpp
+++ b/lldb/source/API/SBInstructionList.cpp
@@ -49,7 +49,7 @@ bool SBInstructionList::IsValid() const {
 SBInstructionList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstructionList, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 size_t SBInstructionList::GetSize() {
@@ -119,7 +119,7 @@ void SBInstructionList::SetDisassembler(const lldb::DisassemblerSP &opaque_sp) {
 void SBInstructionList::Print(FILE *out) {
   LLDB_RECORD_METHOD(void, SBInstructionList, Print, (FILE *), out);
 
-  if (out == NULL)
+  if (out == nullptr)
     return;
 }
 
@@ -142,7 +142,7 @@ bool SBInstructionList::GetDescription(lldb::SBStream &description) {
       for (size_t i = 0; i < num_instructions; ++i) {
         Instruction *inst =
             m_opaque_sp->GetInstructionList().GetInstructionAtIndex(i).get();
-        if (inst == NULL)
+        if (inst == nullptr)
           break;
 
         const Address &addr = inst->GetAddress();
@@ -153,7 +153,7 @@ bool SBInstructionList::GetDescription(lldb::SBStream &description) {
               addr, eSymbolContextEverything, sc);
         }
 
-        inst->Dump(&sref, max_opcode_byte_size, true, false, NULL, &sc,
+        inst->Dump(&sref, max_opcode_byte_size, true, false, nullptr, &sc,
                    &prev_sc, &format, 0);
         sref.EOL();
       }
diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp
index b7aac0be93562..010a6057cd310 100644
--- a/lldb/source/API/SBLineEntry.cpp
+++ b/lldb/source/API/SBLineEntry.cpp
@@ -162,7 +162,7 @@ const lldb_private::LineEntry *SBLineEntry::operator->() const {
 }
 
 lldb_private::LineEntry &SBLineEntry::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new lldb_private::LineEntry());
   return *m_opaque_up;
 }
diff --git a/lldb/source/API/SBListener.cpp b/lldb/source/API/SBListener.cpp
index 5eb5abd80280b..4fe90f6f68620 100644
--- a/lldb/source/API/SBListener.cpp
+++ b/lldb/source/API/SBListener.cpp
@@ -20,7 +20,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBListener::SBListener() : m_opaque_sp(), m_unused_ptr(NULL) {
+SBListener::SBListener() : m_opaque_sp(), m_unused_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBListener);
 }
 
@@ -160,7 +160,7 @@ bool SBListener::WaitForEvent(uint32_t timeout_secs, SBEvent &event) {
   }
 
   if (!success)
-    event.reset(NULL);
+    event.reset(nullptr);
   return success;
 }
 
@@ -182,7 +182,7 @@ bool SBListener::WaitForEventForBroadcaster(uint32_t num_seconds,
       return true;
     }
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -205,7 +205,7 @@ bool SBListener::WaitForEventForBroadcasterWithType(
       return true;
     }
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -217,7 +217,7 @@ bool SBListener::PeekAtNextEvent(SBEvent &event) {
     event.reset(m_opaque_sp->PeekAtNextEvent());
     return event.IsValid();
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -231,7 +231,7 @@ bool SBListener::PeekAtNextEventForBroadcaster(const SBBroadcaster &broadcaster,
     event.reset(m_opaque_sp->PeekAtNextEventForBroadcaster(broadcaster.get()));
     return event.IsValid();
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -247,7 +247,7 @@ bool SBListener::PeekAtNextEventForBroadcasterWithType(
         broadcaster.get(), event_type_mask));
     return event.IsValid();
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -261,7 +261,7 @@ bool SBListener::GetNextEvent(SBEvent &event) {
       return true;
     }
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -279,7 +279,7 @@ bool SBListener::GetNextEventForBroadcaster(const SBBroadcaster &broadcaster,
       return true;
     }
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
@@ -299,7 +299,7 @@ bool SBListener::GetNextEventForBroadcasterWithType(
       return true;
     }
   }
-  event.reset(NULL);
+  event.reset(nullptr);
   return false;
 }
 
diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp
index a22d7ca298907..4bd32bce1c532 100644
--- a/lldb/source/API/SBModule.cpp
+++ b/lldb/source/API/SBModule.cpp
@@ -40,8 +40,8 @@ SBModule::SBModule(const SBModuleSpec &module_spec) : m_opaque_sp() {
   LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModuleSpec &), module_spec);
 
   ModuleSP module_sp;
-  Status error = ModuleList::GetSharedModule(*module_spec.m_opaque_up,
-                                             module_sp, NULL, NULL, NULL);
+  Status error = ModuleList::GetSharedModule(
+      *module_spec.m_opaque_up, module_sp, nullptr, nullptr, nullptr);
   if (module_sp)
     SetSP(module_sp);
 }
@@ -85,7 +85,7 @@ bool SBModule::IsValid() const {
 SBModule::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModule, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 void SBModule::Clear() {
@@ -159,7 +159,7 @@ bool SBModule::SetRemoteInstallFileSpec(lldb::SBFileSpec &file) {
 const uint8_t *SBModule::GetUUIDBytes() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const uint8_t *, SBModule, GetUUIDBytes);
 
-  const uint8_t *uuid_bytes = NULL;
+  const uint8_t *uuid_bytes = nullptr;
   ModuleSP module_sp(GetSP());
   if (module_sp)
     uuid_bytes = module_sp->GetUUID().GetBytes().data();
@@ -170,8 +170,7 @@ const uint8_t *SBModule::GetUUIDBytes() const {
 const char *SBModule::GetUUIDString() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBModule, GetUUIDString);
 
-
-  const char *uuid_cstr = NULL;
+  const char *uuid_cstr = nullptr;
   ModuleSP module_sp(GetSP());
   if (module_sp) {
     // We are going to return a "const char *" value through the public API, so
@@ -185,7 +184,7 @@ const char *SBModule::GetUUIDString() const {
     return uuid_cstr;
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool SBModule::operator==(const SBModule &rhs) const {
@@ -296,7 +295,7 @@ static Symtab *GetUnifiedSymbolTable(const lldb::ModuleSP &module_sp) {
     if (symbols)
       return symbols->GetSymtab();
   }
-  return NULL;
+  return nullptr;
 }
 
 size_t SBModule::GetNumSymbols() {
@@ -410,7 +409,7 @@ lldb::SBSymbolContextList SBModule::FindFunctions(const char *name,
     const bool symbols_ok = true;
     const bool inlines_ok = true;
     FunctionNameType type = static_cast<FunctionNameType>(name_type_mask);
-    module_sp->FindFunctions(ConstString(name), NULL, type, symbols_ok,
+    module_sp->FindFunctions(ConstString(name), nullptr, type, symbols_ok,
                              inlines_ok, append, *sb_sc_list);
   }
   return LLDB_RECORD_RESULT(sb_sc_list);
@@ -427,7 +426,7 @@ SBValueList SBModule::FindGlobalVariables(SBTarget &target, const char *name,
   if (name && module_sp) {
     VariableList variable_list;
     const uint32_t match_count = module_sp->FindGlobalVariables(
-        ConstString(name), NULL, max_matches, variable_list);
+        ConstString(name), nullptr, max_matches, variable_list);
 
     if (match_count > 0) {
       for (uint32_t i = 0; i < match_count; ++i) {
@@ -558,7 +557,7 @@ lldb::SBTypeList SBModule::GetTypes(uint32_t type_mask) {
 
   TypeClass type_class = static_cast<TypeClass>(type_mask);
   TypeList type_list;
-  vendor->GetTypes(NULL, type_class, type_list);
+  vendor->GetTypes(nullptr, type_class, type_list);
   sb_type_list.m_opaque_up->Append(type_list);
   return LLDB_RECORD_RESULT(sb_type_list);
 }
@@ -606,7 +605,7 @@ const char *SBModule::GetTriple() {
     ConstString const_triple(triple.c_str());
     return const_triple.GetCString();
   }
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SBModule::GetAddressByteSize() {
diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp
index 235c37a810197..f3708d8e084f3 100644
--- a/lldb/source/API/SBPlatform.cpp
+++ b/lldb/source/API/SBPlatform.cpp
@@ -28,7 +28,7 @@ using namespace lldb_private;
 
 // PlatformConnectOptions
 struct PlatformConnectOptions {
-  PlatformConnectOptions(const char *url = NULL)
+  PlatformConnectOptions(const char *url = nullptr)
       : m_url(), m_rsync_options(), m_rsync_remote_path_prefix(),
         m_rsync_enabled(false), m_rsync_omit_hostname_from_remote_path(false),
         m_local_cache_directory() {
@@ -48,7 +48,7 @@ struct PlatformConnectOptions {
 
 // PlatformShellCommand
 struct PlatformShellCommand {
-  PlatformShellCommand(const char *shell_command = NULL)
+  PlatformShellCommand(const char *shell_command = nullptr)
       : m_command(), m_working_dir(), m_status(0), m_signo(0) {
     if (shell_command && shell_command[0])
       m_command = shell_command;
@@ -94,7 +94,7 @@ const char *SBPlatformConnectOptions::GetURL() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformConnectOptions, GetURL);
 
   if (m_opaque_ptr->m_url.empty())
-    return NULL;
+    return nullptr;
   return m_opaque_ptr->m_url.c_str();
 }
 
@@ -188,7 +188,7 @@ const char *SBPlatformShellCommand::GetCommand() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetCommand);
 
   if (m_opaque_ptr->m_command.empty())
-    return NULL;
+    return nullptr;
   return m_opaque_ptr->m_command.c_str();
 }
 
@@ -207,7 +207,7 @@ const char *SBPlatformShellCommand::GetWorkingDirectory() {
                              GetWorkingDirectory);
 
   if (m_opaque_ptr->m_working_dir.empty())
-    return NULL;
+    return nullptr;
   return m_opaque_ptr->m_working_dir.c_str();
 }
 
@@ -256,7 +256,7 @@ const char *SBPlatformShellCommand::GetOutput() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetOutput);
 
   if (m_opaque_ptr->m_output.empty())
-    return NULL;
+    return nullptr;
   return m_opaque_ptr->m_output.c_str();
 }
 
@@ -282,7 +282,7 @@ bool SBPlatform::IsValid() const {
 SBPlatform::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBPlatform, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 void SBPlatform::Clear() {
@@ -297,7 +297,7 @@ const char *SBPlatform::GetName() {
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
     return platform_sp->GetName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 lldb::PlatformSP SBPlatform::GetSP() const { return m_opaque_sp; }
@@ -312,7 +312,7 @@ const char *SBPlatform::GetWorkingDirectory() {
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
     return platform_sp->GetWorkingDirectory().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 bool SBPlatform::SetWorkingDirectory(const char *path) {
@@ -376,7 +376,7 @@ const char *SBPlatform::GetTriple() {
       return ConstString(arch.GetTriple().getTriple().c_str()).GetCString();
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBPlatform::GetOSBuild() {
@@ -393,7 +393,7 @@ const char *SBPlatform::GetOSBuild() {
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBPlatform::GetOSDescription() {
@@ -410,7 +410,7 @@ const char *SBPlatform::GetOSDescription() {
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBPlatform::GetHostname() {
@@ -419,7 +419,7 @@ const char *SBPlatform::GetHostname() {
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
     return platform_sp->GetHostname();
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SBPlatform::GetOSMajorVersion() {
@@ -513,7 +513,7 @@ SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) {
       return Status("invalid shell command (empty)");
 
     const char *working_dir = shell_command.GetWorkingDirectory();
-    if (working_dir == NULL) {
+    if (working_dir == nullptr) {
       working_dir = platform_sp->GetWorkingDirectory().GetCString();
       if (working_dir)
         shell_command.SetWorkingDirectory(working_dir);
diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp
index d97494b38c7ea..4226ff77ecdc3 100644
--- a/lldb/source/API/SBProcess.cpp
+++ b/lldb/source/API/SBProcess.cpp
@@ -335,7 +335,7 @@ void SBProcess::ReportEventState(const SBEvent &event, FILE *out) const {
   LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState,
                            (const lldb::SBEvent &, FILE *), event, out);
 
-  if (out == NULL)
+  if (out == nullptr)
     return;
 
   ProcessSP process_sp(GetSP());
@@ -534,7 +534,7 @@ int SBProcess::GetExitStatus() {
 const char *SBProcess::GetExitDescription() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetExitDescription);
 
-  const char *exit_desc = NULL;
+  const char *exit_desc = nullptr;
   ProcessSP process_sp(GetSP());
   if (process_sp) {
     std::lock_guard<std::recursive_mutex> guard(
@@ -602,7 +602,7 @@ SBError SBProcess::Continue() {
     if (process_sp->GetTarget().GetDebugger().GetAsyncExecution())
       sb_error.ref() = process_sp->Resume();
     else
-      sb_error.ref() = process_sp->ResumeSynchronous(NULL);
+      sb_error.ref() = process_sp->ResumeSynchronous(nullptr);
   } else
     sb_error.SetErrorString("SBProcess is invalid");
 
@@ -985,7 +985,7 @@ bool SBProcess::GetDescription(SBStream &description) {
     char path[PATH_MAX];
     GetTarget().GetExecutable().GetPath(path, sizeof(path));
     Module *exe_module = process_sp->GetTarget().GetExecutableModulePointer();
-    const char *exe_name = NULL;
+    const char *exe_name = nullptr;
     if (exe_module)
       exe_name = exe_module->GetFileSpec().GetFilename().AsCString();
 
@@ -1156,7 +1156,7 @@ const char *SBProcess::GetExtendedBacktraceTypeAtIndex(uint32_t idx) {
       return names[idx].AsCString();
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 SBThreadCollection SBProcess::GetHistoryThreads(addr_t addr) {
diff --git a/lldb/source/API/SBQueue.cpp b/lldb/source/API/SBQueue.cpp
index 465bbac93872c..7d1581c42f60a 100644
--- a/lldb/source/API/SBQueue.cpp
+++ b/lldb/source/API/SBQueue.cpp
@@ -49,7 +49,7 @@ class QueueImpl {
 
   ~QueueImpl() {}
 
-  bool IsValid() { return m_queue_wp.lock() != NULL; }
+  bool IsValid() { return m_queue_wp.lock() != nullptr; }
 
   void Clear() {
     m_queue_wp.reset();
@@ -83,7 +83,7 @@ class QueueImpl {
   }
 
   const char *GetName() const {
-    const char *name = NULL;
+    const char *name = nullptr;
     lldb::QueueSP queue_sp = m_queue_wp.lock();
     if (queue_sp.get()) {
       name = queue_sp->GetName();
diff --git a/lldb/source/API/SBQueueItem.cpp b/lldb/source/API/SBQueueItem.cpp
index edb258029d1d8..5f2cbd1bdbfb9 100644
--- a/lldb/source/API/SBQueueItem.cpp
+++ b/lldb/source/API/SBQueueItem.cpp
@@ -41,7 +41,7 @@ bool SBQueueItem::IsValid() const {
 SBQueueItem::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueueItem, operator bool);
 
-  return m_queue_item_sp.get() != NULL;
+  return m_queue_item_sp.get() != nullptr;
 }
 
 void SBQueueItem::Clear() {
diff --git a/lldb/source/API/SBSection.cpp b/lldb/source/API/SBSection.cpp
index f47a799697dcf..14e1e14f59aa1 100644
--- a/lldb/source/API/SBSection.cpp
+++ b/lldb/source/API/SBSection.cpp
@@ -54,7 +54,7 @@ SBSection::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSection, operator bool);
 
   SectionSP section_sp(GetSP());
-  return section_sp && section_sp->GetModule().get() != NULL;
+  return section_sp && section_sp->GetModule().get() != nullptr;
 }
 
 const char *SBSection::GetName() {
@@ -63,7 +63,7 @@ const char *SBSection::GetName() {
   SectionSP section_sp(GetSP());
   if (section_sp)
     return section_sp->GetName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 lldb::SBSection SBSection::GetParent() {
diff --git a/lldb/source/API/SBSourceManager.cpp b/lldb/source/API/SBSourceManager.cpp
index 785f3090777cf..9c4ce3c7f4e3a 100644
--- a/lldb/source/API/SBSourceManager.cpp
+++ b/lldb/source/API/SBSourceManager.cpp
@@ -131,7 +131,7 @@ size_t SBSourceManager::DisplaySourceLinesWithLineNumbersAndColumn(
        const char *, lldb::SBStream &),
       file, line, column, context_before, context_after, current_line_cstr, s);
 
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     return 0;
 
   return m_opaque_up->DisplaySourceLinesWithLineNumbers(
diff --git a/lldb/source/API/SBStream.cpp b/lldb/source/API/SBStream.cpp
index 2f149a24d1351..ae652338e1ea5 100644
--- a/lldb/source/API/SBStream.cpp
+++ b/lldb/source/API/SBStream.cpp
@@ -34,7 +34,7 @@ bool SBStream::IsValid() const {
 SBStream::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStream, operator bool);
 
-  return (m_opaque_up != NULL);
+  return (m_opaque_up != nullptr);
 }
 
 // If this stream is not redirected to a file, it will maintain a local cache
@@ -42,8 +42,8 @@ SBStream::operator bool() const {
 const char *SBStream::GetData() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBStream, GetData);
 
-  if (m_is_file || m_opaque_up == NULL)
-    return NULL;
+  if (m_is_file || m_opaque_up == nullptr)
+    return nullptr;
 
   return static_cast<StreamString *>(m_opaque_up.get())->GetData();
 }
@@ -53,7 +53,7 @@ const char *SBStream::GetData() {
 size_t SBStream::GetSize() {
   LLDB_RECORD_METHOD_NO_ARGS(size_t, SBStream, GetSize);
 
-  if (m_is_file || m_opaque_up == NULL)
+  if (m_is_file || m_opaque_up == nullptr)
     return 0;
 
   return static_cast<StreamString *>(m_opaque_up.get())->GetSize();
@@ -160,7 +160,7 @@ lldb_private::Stream *SBStream::operator->() { return m_opaque_up.get(); }
 lldb_private::Stream *SBStream::get() { return m_opaque_up.get(); }
 
 lldb_private::Stream &SBStream::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new StreamString());
   return *m_opaque_up;
 }
diff --git a/lldb/source/API/SBStringList.cpp b/lldb/source/API/SBStringList.cpp
index b08f232389cf0..2f8bd55855a11 100644
--- a/lldb/source/API/SBStringList.cpp
+++ b/lldb/source/API/SBStringList.cpp
@@ -56,13 +56,13 @@ bool SBStringList::IsValid() const {
 SBStringList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStringList, operator bool);
 
-  return (m_opaque_up != NULL);
+  return (m_opaque_up != nullptr);
 }
 
 void SBStringList::AppendString(const char *str) {
   LLDB_RECORD_METHOD(void, SBStringList, AppendString, (const char *), str);
 
-  if (str != NULL) {
+  if (str != nullptr) {
     if (IsValid())
       m_opaque_up->AppendString(str);
     else
@@ -74,7 +74,7 @@ void SBStringList::AppendList(const char **strv, int strc) {
   LLDB_RECORD_METHOD(void, SBStringList, AppendList, (const char **, int), strv,
                      strc);
 
-  if ((strv != NULL) && (strc > 0)) {
+  if ((strv != nullptr) && (strc > 0)) {
     if (IsValid())
       m_opaque_up->AppendList(strv, strc);
     else
@@ -115,7 +115,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) {
   if (IsValid()) {
     return m_opaque_up->GetStringAtIndex(idx);
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBStringList::GetStringAtIndex(size_t idx) const {
@@ -125,7 +125,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) const {
   if (IsValid()) {
     return m_opaque_up->GetStringAtIndex(idx);
   }
-  return NULL;
+  return nullptr;
 }
 
 void SBStringList::Clear() {
diff --git a/lldb/source/API/SBSymbol.cpp b/lldb/source/API/SBSymbol.cpp
index a3c5acf3941cf..6cc90e0ee368b 100644
--- a/lldb/source/API/SBSymbol.cpp
+++ b/lldb/source/API/SBSymbol.cpp
@@ -18,7 +18,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBSymbol::SBSymbol() : m_opaque_ptr(NULL) {
+SBSymbol::SBSymbol() : m_opaque_ptr(nullptr) {
   LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbol);
 }
 
@@ -37,7 +37,7 @@ const SBSymbol &SBSymbol::operator=(const SBSymbol &rhs) {
   return LLDB_RECORD_RESULT(*this);
 }
 
-SBSymbol::~SBSymbol() { m_opaque_ptr = NULL; }
+SBSymbol::~SBSymbol() { m_opaque_ptr = nullptr; }
 
 void SBSymbol::SetSymbol(lldb_private::Symbol *lldb_object_ptr) {
   m_opaque_ptr = lldb_object_ptr;
@@ -50,13 +50,13 @@ bool SBSymbol::IsValid() const {
 SBSymbol::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbol, operator bool);
 
-  return m_opaque_ptr != NULL;
+  return m_opaque_ptr != nullptr;
 }
 
 const char *SBSymbol::GetName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   if (m_opaque_ptr)
     name = m_opaque_ptr->GetName().AsCString();
 
@@ -66,7 +66,7 @@ const char *SBSymbol::GetName() const {
 const char *SBSymbol::GetDisplayName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetDisplayName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   if (m_opaque_ptr)
     name = m_opaque_ptr->GetMangled()
                .GetDisplayDemangledName(m_opaque_ptr->GetLanguage())
@@ -78,7 +78,7 @@ const char *SBSymbol::GetDisplayName() const {
 const char *SBSymbol::GetMangledName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetMangledName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   if (m_opaque_ptr)
     name = m_opaque_ptr->GetMangled().GetMangledName().AsCString();
   return name;
@@ -105,7 +105,7 @@ bool SBSymbol::GetDescription(SBStream &description) {
   Stream &strm = description.ref();
 
   if (m_opaque_ptr) {
-    m_opaque_ptr->GetDescription(&strm, lldb::eDescriptionLevelFull, NULL);
+    m_opaque_ptr->GetDescription(&strm, lldb::eDescriptionLevelFull, nullptr);
   } else
     strm.PutCString("No value");
 
@@ -116,7 +116,7 @@ SBInstructionList SBSymbol::GetInstructions(SBTarget target) {
   LLDB_RECORD_METHOD(lldb::SBInstructionList, SBSymbol, GetInstructions,
                      (lldb::SBTarget), target);
 
-  return LLDB_RECORD_RESULT(GetInstructions(target, NULL));
+  return LLDB_RECORD_RESULT(GetInstructions(target, nullptr));
 }
 
 SBInstructionList SBSymbol::GetInstructions(SBTarget target,
@@ -141,7 +141,7 @@ SBInstructionList SBSymbol::GetInstructions(SBTarget target,
         AddressRange symbol_range(symbol_addr, m_opaque_ptr->GetByteSize());
         const bool prefer_file_cache = false;
         sb_instructions.SetDisassembler(Disassembler::DisassembleRange(
-            module_sp->GetArchitecture(), NULL, flavor_string, exe_ctx,
+            module_sp->GetArchitecture(), nullptr, flavor_string, exe_ctx,
             symbol_range, prefer_file_cache));
       }
     }
diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp
index f8f446ba57455..365f0ccc2fbf1 100644
--- a/lldb/source/API/SBSymbolContext.cpp
+++ b/lldb/source/API/SBSymbolContext.cpp
@@ -63,7 +63,7 @@ bool SBSymbolContext::IsValid() const {
 SBSymbolContext::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, operator bool);
 
-  return m_opaque_up != NULL;
+  return m_opaque_up != nullptr;
 }
 
 SBModule SBSymbolContext::GetModule() {
@@ -84,13 +84,13 @@ SBCompileUnit SBSymbolContext::GetCompileUnit() {
                              GetCompileUnit);
 
   return LLDB_RECORD_RESULT(
-      SBCompileUnit(m_opaque_up ? m_opaque_up->comp_unit : NULL));
+      SBCompileUnit(m_opaque_up ? m_opaque_up->comp_unit : nullptr));
 }
 
 SBFunction SBSymbolContext::GetFunction() {
   LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFunction, SBSymbolContext, GetFunction);
 
-  Function *function = NULL;
+  Function *function = nullptr;
 
   if (m_opaque_up)
     function = m_opaque_up->function;
@@ -103,7 +103,8 @@ SBFunction SBSymbolContext::GetFunction() {
 SBBlock SBSymbolContext::GetBlock() {
   LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBSymbolContext, GetBlock);
 
-  return LLDB_RECORD_RESULT(SBBlock(m_opaque_up ? m_opaque_up->block : NULL));
+  return LLDB_RECORD_RESULT(
+      SBBlock(m_opaque_up ? m_opaque_up->block : nullptr));
 }
 
 SBLineEntry SBSymbolContext::GetLineEntry() {
@@ -119,7 +120,7 @@ SBLineEntry SBSymbolContext::GetLineEntry() {
 SBSymbol SBSymbolContext::GetSymbol() {
   LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSymbol, SBSymbolContext, GetSymbol);
 
-  Symbol *symbol = NULL;
+  Symbol *symbol = nullptr;
 
   if (m_opaque_up)
     symbol = m_opaque_up->symbol;
@@ -183,13 +184,13 @@ const lldb_private::SymbolContext &SBSymbolContext::operator*() const {
 }
 
 lldb_private::SymbolContext &SBSymbolContext::operator*() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new SymbolContext);
   return *m_opaque_up;
 }
 
 lldb_private::SymbolContext &SBSymbolContext::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new SymbolContext);
   return *m_opaque_up;
 }
@@ -205,7 +206,7 @@ bool SBSymbolContext::GetDescription(SBStream &description) {
   Stream &strm = description.ref();
 
   if (m_opaque_up) {
-    m_opaque_up->GetDescription(&strm, lldb::eDescriptionLevelFull, NULL);
+    m_opaque_up->GetDescription(&strm, lldb::eDescriptionLevelFull, nullptr);
   } else
     strm.PutCString("No value");
 
diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp
index 7c9a9dbb871b6..915d04a0282a2 100644
--- a/lldb/source/API/SBSymbolContextList.cpp
+++ b/lldb/source/API/SBSymbolContextList.cpp
@@ -93,7 +93,7 @@ bool SBSymbolContextList::IsValid() const {
 SBSymbolContextList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContextList, operator bool);
 
-  return m_opaque_up != NULL;
+  return m_opaque_up != nullptr;
 }
 
 lldb_private::SymbolContextList *SBSymbolContextList::operator->() const {
@@ -111,7 +111,7 @@ bool SBSymbolContextList::GetDescription(lldb::SBStream &description) {
 
   Stream &strm = description.ref();
   if (m_opaque_up)
-    m_opaque_up->GetDescription(&strm, lldb::eDescriptionLevelFull, NULL);
+    m_opaque_up->GetDescription(&strm, lldb::eDescriptionLevelFull, nullptr);
   return true;
 }
 
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index ee0897e59509c..56d258187ea07 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -126,7 +126,7 @@ bool SBTarget::EventIsTargetEvent(const SBEvent &event) {
   LLDB_RECORD_STATIC_METHOD(bool, SBTarget, EventIsTargetEvent,
                             (const lldb::SBEvent &), event);
 
-  return Target::TargetEventData::GetEventDataFromEvent(event.get()) != NULL;
+  return Target::TargetEventData::GetEventDataFromEvent(event.get()) != nullptr;
 }
 
 SBTarget SBTarget::GetTargetFromEvent(const SBEvent &event) {
@@ -171,7 +171,7 @@ bool SBTarget::IsValid() const {
 SBTarget::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTarget, operator bool);
 
-  return m_opaque_sp.get() != NULL && m_opaque_sp->IsValid();
+  return m_opaque_sp.get() != nullptr && m_opaque_sp->IsValid();
 }
 
 SBProcess SBTarget::GetProcess() {
@@ -288,9 +288,9 @@ SBProcess SBTarget::LaunchSimple(char const **argv, char const **envp,
                      (const char **, const char **, const char *), argv, envp,
                      working_directory);
 
-  char *stdin_path = NULL;
-  char *stdout_path = NULL;
-  char *stderr_path = NULL;
+  char *stdin_path = nullptr;
+  char *stdout_path = nullptr;
+  char *stderr_path = nullptr;
   uint32_t launch_flags = 0;
   bool stop_at_entry = false;
   SBError error;
@@ -307,7 +307,7 @@ SBError SBTarget::Install() {
   TargetSP target_sp(GetSP());
   if (target_sp) {
     std::lock_guard<std::recursive_mutex> guard(target_sp->GetAPIMutex());
-    sb_error.ref() = target_sp->Install(NULL);
+    sb_error.ref() = target_sp->Install(nullptr);
   }
   return LLDB_RECORD_RESULT(sb_error);
 }
@@ -381,7 +381,7 @@ SBProcess SBTarget::Launch(SBListener &listener, char const **argv,
     if (listener.IsValid())
       launch_info.SetListener(listener.GetSP());
 
-    error.SetError(target_sp->Launch(launch_info, NULL));
+    error.SetError(target_sp->Launch(launch_info, nullptr));
 
     sb_process.SetSP(target_sp->GetProcessSP());
   } else {
@@ -430,7 +430,7 @@ SBProcess SBTarget::Launch(SBLaunchInfo &sb_launch_info, SBError &error) {
     if (arch_spec.IsValid())
       launch_info.GetArchitecture() = arch_spec;
 
-    error.SetError(target_sp->Launch(launch_info, NULL));
+    error.SetError(target_sp->Launch(launch_info, nullptr));
     sb_launch_info.set_ref(launch_info);
     sb_process.SetSP(target_sp->GetProcessSP());
   } else {
@@ -551,14 +551,14 @@ lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url,
     std::lock_guard<std::recursive_mutex> guard(target_sp->GetAPIMutex());
     if (listener.IsValid())
       process_sp =
-          target_sp->CreateProcess(listener.m_opaque_sp, plugin_name, NULL);
+          target_sp->CreateProcess(listener.m_opaque_sp, plugin_name, nullptr);
     else
       process_sp = target_sp->CreateProcess(
-          target_sp->GetDebugger().GetListener(), plugin_name, NULL);
+          target_sp->GetDebugger().GetListener(), plugin_name, nullptr);
 
     if (process_sp) {
       sb_process.SetSP(process_sp);
-      error.SetError(process_sp->ConnectRemote(NULL, url));
+      error.SetError(process_sp->ConnectRemote(nullptr, url));
     } else {
       error.SetErrorString("unable to create lldb_private::Process");
     }
@@ -788,12 +788,12 @@ SBBreakpoint SBTarget::BreakpointCreateByName(const char *symbol_name,
       FileSpecList module_spec_list;
       module_spec_list.Append(FileSpec(module_name));
       sb_bp = target_sp->CreateBreakpoint(
-          &module_spec_list, NULL, symbol_name, eFunctionNameTypeAuto,
+          &module_spec_list, nullptr, symbol_name, eFunctionNameTypeAuto,
           eLanguageTypeUnknown, offset, skip_prologue, internal, hardware);
     } else {
       sb_bp = target_sp->CreateBreakpoint(
-          NULL, NULL, symbol_name, eFunctionNameTypeAuto, eLanguageTypeUnknown,
-          offset, skip_prologue, internal, hardware);
+          nullptr, nullptr, symbol_name, eFunctionNameTypeAuto,
+          eLanguageTypeUnknown, offset, skip_prologue, internal, hardware);
     }
   }
 
@@ -1425,7 +1425,7 @@ lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size,
     // Target::CreateWatchpoint() is thread safe.
     Status cw_error;
     // This API doesn't take in a type, so we can't figure out what it is.
-    CompilerType *type = NULL;
+    CompilerType *type = nullptr;
     watchpoint_sp =
         target_sp->CreateWatchpoint(addr, size, type, watch_type, cw_error);
     error.SetError(cw_error);
@@ -1559,7 +1559,7 @@ lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
                      (const char *, const char *, const char *), path, triple,
                      uuid_cstr);
 
-  return LLDB_RECORD_RESULT(AddModule(path, triple, uuid_cstr, NULL));
+  return LLDB_RECORD_RESULT(AddModule(path, triple, uuid_cstr, nullptr));
 }
 
 lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
@@ -1683,7 +1683,7 @@ const char *SBTarget::GetTriple() {
     ConstString const_triple(triple.c_str());
     return const_triple.GetCString();
   }
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SBTarget::GetDataByteSize() {
@@ -1969,7 +1969,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name,
 
     if (match_count > 0) {
       ExecutionContextScope *exe_scope = target_sp->GetProcessSP().get();
-      if (exe_scope == NULL)
+      if (exe_scope == nullptr)
         exe_scope = target_sp.get();
       for (uint32_t i = 0; i < match_count; ++i) {
         lldb::ValueObjectSP valobj_sp(ValueObjectVariable::Create(
@@ -2017,7 +2017,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name,
 
     if (match_count > 0) {
       ExecutionContextScope *exe_scope = target_sp->GetProcessSP().get();
-      if (exe_scope == NULL)
+      if (exe_scope == nullptr)
         exe_scope = target_sp.get();
       for (uint32_t i = 0; i < match_count; ++i) {
         lldb::ValueObjectSP valobj_sp(ValueObjectVariable::Create(
@@ -2053,7 +2053,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
   LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, ReadInstructions,
                      (lldb::SBAddress, uint32_t), base_addr, count);
 
-  return LLDB_RECORD_RESULT(ReadInstructions(base_addr, count, NULL));
+  return LLDB_RECORD_RESULT(ReadInstructions(base_addr, count, nullptr));
 }
 
 lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
@@ -2080,7 +2080,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
                                 data.GetByteSize(), error, &load_addr);
       const bool data_from_file = load_addr == LLDB_INVALID_ADDRESS;
       sb_instructions.SetDisassembler(Disassembler::DisassembleBytes(
-          target_sp->GetArchitecture(), NULL, flavor_string, *addr_ptr,
+          target_sp->GetArchitecture(), nullptr, flavor_string, *addr_ptr,
           data.GetBytes(), bytes_read, count, data_from_file));
     }
   }
@@ -2095,7 +2095,7 @@ lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr,
                     (lldb::SBAddress, const void *, size_t), base_addr, buf,
                     size);
 
-  return GetInstructionsWithFlavor(base_addr, NULL, buf, size);
+  return GetInstructionsWithFlavor(base_addr, nullptr, buf, size);
 }
 
 lldb::SBInstructionList
@@ -2119,7 +2119,7 @@ SBTarget::GetInstructionsWithFlavor(lldb::SBAddress base_addr,
     const bool data_from_file = true;
 
     sb_instructions.SetDisassembler(Disassembler::DisassembleBytes(
-        target_sp->GetArchitecture(), NULL, flavor_string, addr, buf, size,
+        target_sp->GetArchitecture(), nullptr, flavor_string, addr, buf, size,
         UINT32_MAX, data_from_file));
   }
 
@@ -2132,7 +2132,7 @@ lldb::SBInstructionList SBTarget::GetInstructions(lldb::addr_t base_addr,
   LLDB_RECORD_DUMMY(lldb::SBInstructionList, SBTarget, GetInstructions,
                     (lldb::addr_t, const void *, size_t), base_addr, buf, size);
 
-  return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), NULL, buf,
+  return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), nullptr, buf,
                                    size);
 }
 
@@ -2353,9 +2353,9 @@ lldb::SBValue SBTarget::EvaluateExpression(const char *expr,
   SBValue expr_result;
   ValueObjectSP expr_value_sp;
   TargetSP target_sp(GetSP());
-  StackFrame *frame = NULL;
+  StackFrame *frame = nullptr;
   if (target_sp) {
-    if (expr == NULL || expr[0] == '\0') {
+    if (expr == nullptr || expr[0] == '\0') {
       return LLDB_RECORD_RESULT(expr_result);
     }
 
@@ -2409,7 +2409,7 @@ lldb::addr_t SBTarget::GetStackRedZoneSize() {
 lldb::SBLaunchInfo SBTarget::GetLaunchInfo() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBLaunchInfo, SBTarget, GetLaunchInfo);
 
-  lldb::SBLaunchInfo launch_info(NULL);
+  lldb::SBLaunchInfo launch_info(nullptr);
   TargetSP target_sp(GetSP());
   if (target_sp)
     launch_info.set_ref(m_opaque_sp->GetProcessLaunchInfo());
diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp
index 5f4f69f021364..85e9a6b47955a 100644
--- a/lldb/source/API/SBThread.cpp
+++ b/lldb/source/API/SBThread.cpp
@@ -122,7 +122,7 @@ SBThread::operator bool() const {
   if (target && process) {
     Process::StopLocker stop_locker;
     if (stop_locker.TryLock(&process->GetRunLock()))
-      return m_opaque_sp->GetThreadSP().get() != NULL;
+      return m_opaque_sp->GetThreadSP().get() != nullptr;
   }
   // Without a valid target & process, this thread can't be valid.
   return false;
@@ -359,7 +359,7 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) {
             stop_desc =
                 exe_ctx.GetProcessPtr()->GetUnixSignals()->GetSignalAsCString(
                     stop_info_sp->GetValue());
-            if (stop_desc == NULL || stop_desc[0] == '\0') {
+            if (stop_desc == nullptr || stop_desc[0] == '\0') {
               static char signal_desc[] = "signal";
               stop_desc = signal_desc;
               stop_desc_len =
@@ -452,7 +452,7 @@ uint32_t SBThread::GetIndexID() const {
 const char *SBThread::GetName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
 
@@ -469,7 +469,7 @@ const char *SBThread::GetName() const {
 const char *SBThread::GetQueueName() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetQueueName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
 
@@ -566,7 +566,7 @@ SBError SBThread::ResumeNewPlan(ExecutionContext &exe_ctx,
 
   // User level plans should be Master Plans so they can be interrupted, other
   // plans executed, and then a "continue" will resume the plan.
-  if (new_plan != NULL) {
+  if (new_plan != nullptr) {
     new_plan->SetIsMasterPlan(true);
     new_plan->SetOkayToDiscard(false);
   }
@@ -577,7 +577,7 @@ SBError SBThread::ResumeNewPlan(ExecutionContext &exe_ctx,
   if (process->GetTarget().GetDebugger().GetAsyncExecution())
     sb_error.ref() = process->Resume();
   else
-    sb_error.ref() = process->ResumeSynchronous(NULL);
+    sb_error.ref() = process->ResumeSynchronous(nullptr);
 
   return sb_error;
 }
@@ -627,7 +627,7 @@ void SBThread::StepInto(lldb::RunMode stop_other_threads) {
   LLDB_RECORD_METHOD(void, SBThread, StepInto, (lldb::RunMode),
                      stop_other_threads);
 
-  StepInto(NULL, stop_other_threads);
+  StepInto(nullptr, stop_other_threads);
 }
 
 void SBThread::StepInto(const char *target_name,
@@ -716,7 +716,7 @@ void SBThread::StepOut(SBError &error) {
   const LazyBool avoid_no_debug = eLazyBoolCalculate;
   Status new_plan_status;
   ThreadPlanSP new_plan_sp(thread->QueueThreadPlanForStepOut(
-      abort_other_plans, NULL, false, stop_other_threads, eVoteYes,
+      abort_other_plans, nullptr, false, stop_other_threads, eVoteYes,
       eVoteNoOpinion, 0, new_plan_status, avoid_no_debug));
 
   if (new_plan_status.Success())
@@ -763,7 +763,7 @@ void SBThread::StepOutOfFrame(SBFrame &sb_frame, SBError &error) {
 
   Status new_plan_status;
   ThreadPlanSP new_plan_sp(thread->QueueThreadPlanForStepOut(
-      abort_other_plans, NULL, false, stop_other_threads, eVoteYes,
+      abort_other_plans, nullptr, false, stop_other_threads, eVoteYes,
       eVoteNoOpinion, frame_sp->GetFrameIndex(), new_plan_status));
 
   if (new_plan_status.Success())
@@ -878,7 +878,7 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame,
         eSymbolContextCompUnit | eSymbolContextFunction |
         eSymbolContextLineEntry | eSymbolContextSymbol);
 
-    if (frame_sc.comp_unit == NULL) {
+    if (frame_sc.comp_unit == nullptr) {
       sb_error.SetErrorStringWithFormat(
           "frame %u doesn't have debug information", frame_sp->GetFrameIndex());
       return LLDB_RECORD_RESULT(sb_error);
@@ -1235,7 +1235,7 @@ bool SBThread::EventIsThreadEvent(const SBEvent &event) {
   LLDB_RECORD_STATIC_METHOD(bool, SBThread, EventIsThreadEvent,
                             (const lldb::SBEvent &), event);
 
-  return Thread::ThreadEventData::GetEventDataFromEvent(event.get()) != NULL;
+  return Thread::ThreadEventData::GetEventDataFromEvent(event.get()) != nullptr;
 }
 
 SBFrame SBThread::GetStackFrameFromEvent(const SBEvent &event) {
diff --git a/lldb/source/API/SBThreadCollection.cpp b/lldb/source/API/SBThreadCollection.cpp
index 766fe8f5516d6..3c1cf98650620 100644
--- a/lldb/source/API/SBThreadCollection.cpp
+++ b/lldb/source/API/SBThreadCollection.cpp
@@ -67,7 +67,7 @@ bool SBThreadCollection::IsValid() const {
 SBThreadCollection::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadCollection, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 size_t SBThreadCollection::GetSize() {
diff --git a/lldb/source/API/SBThreadPlan.cpp b/lldb/source/API/SBThreadPlan.cpp
index ecb1a4fd1d9cd..8f6802fe9cef4 100644
--- a/lldb/source/API/SBThreadPlan.cpp
+++ b/lldb/source/API/SBThreadPlan.cpp
@@ -92,7 +92,7 @@ bool SBThreadPlan::IsValid() const {
 SBThreadPlan::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadPlan, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 void SBThreadPlan::Clear() {
@@ -262,7 +262,7 @@ SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address,
     Status plan_status;
     SBThreadPlan plan =
         SBThreadPlan(m_opaque_sp->GetThread().QueueThreadPlanForStepInRange(
-            false, range, sc, NULL, eAllThreads, plan_status));
+            false, range, sc, nullptr, eAllThreads, plan_status));
 
     if (plan_status.Fail())
       error.SetErrorString(plan_status.AsCString());
diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp
index e016ee46f235b..5402128b3faeb 100644
--- a/lldb/source/API/SBType.cpp
+++ b/lldb/source/API/SBType.cpp
@@ -92,7 +92,7 @@ SBType &SBType::operator=(const SBType &rhs) {
 SBType::~SBType() {}
 
 TypeImpl &SBType::ref() {
-  if (m_opaque_sp.get() == NULL)
+  if (m_opaque_sp.get() == nullptr)
     m_opaque_sp = std::make_shared<TypeImpl>();
   return *m_opaque_sp;
 }
@@ -112,7 +112,7 @@ bool SBType::IsValid() const {
 SBType::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBType, operator bool);
 
-  if (m_opaque_sp.get() == NULL)
+  if (m_opaque_sp.get() == nullptr)
     return false;
 
   return m_opaque_sp->IsValid();
@@ -579,7 +579,7 @@ bool SBTypeList::IsValid() {
 SBTypeList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeList, operator bool);
 
-  return (m_opaque_up != NULL);
+  return (m_opaque_up != nullptr);
 }
 
 SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) {
@@ -660,7 +660,7 @@ const char *SBTypeMember::GetName() {
 
   if (m_opaque_up)
     return m_opaque_up->GetName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 SBType SBTypeMember::GetType() {
@@ -743,7 +743,7 @@ void SBTypeMember::reset(TypeMemberImpl *type_member_impl) {
 }
 
 TypeMemberImpl &SBTypeMember::ref() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new TypeMemberImpl());
   return *m_opaque_up;
 }
@@ -789,7 +789,7 @@ const char *SBTypeMemberFunction::GetName() {
 
   if (m_opaque_sp)
     return m_opaque_sp->GetName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 const char *SBTypeMemberFunction::GetDemangledName() {
@@ -803,7 +803,7 @@ const char *SBTypeMemberFunction::GetDemangledName() {
       return mangled.GetDemangledName(mangled.GuessLanguage()).GetCString();
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *SBTypeMemberFunction::GetMangledName() {
@@ -812,7 +812,7 @@ const char *SBTypeMemberFunction::GetMangledName() {
 
   if (m_opaque_sp)
     return m_opaque_sp->GetMangledName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 SBType SBTypeMemberFunction::GetType() {
diff --git a/lldb/source/API/SBTypeCategory.cpp b/lldb/source/API/SBTypeCategory.cpp
index c2ec8cf8522ee..43d5a3ab140fb 100644
--- a/lldb/source/API/SBTypeCategory.cpp
+++ b/lldb/source/API/SBTypeCategory.cpp
@@ -49,7 +49,7 @@ bool SBTypeCategory::IsValid() const {
 SBTypeCategory::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeCategory, operator bool);
 
-  return (m_opaque_sp.get() != NULL);
+  return (m_opaque_sp.get() != nullptr);
 }
 
 bool SBTypeCategory::GetEnabled() {
@@ -75,7 +75,7 @@ const char *SBTypeCategory::GetName() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeCategory, GetName);
 
   if (!IsValid())
-    return NULL;
+    return nullptr;
   return m_opaque_sp->GetName();
 }
 
diff --git a/lldb/source/API/SBTypeEnumMember.cpp b/lldb/source/API/SBTypeEnumMember.cpp
index c0e1c7942db95..bd0755a140c33 100644
--- a/lldb/source/API/SBTypeEnumMember.cpp
+++ b/lldb/source/API/SBTypeEnumMember.cpp
@@ -64,7 +64,7 @@ const char *SBTypeEnumMember::GetName() {
 
   if (m_opaque_sp.get())
     return m_opaque_sp->GetName().GetCString();
-  return NULL;
+  return nullptr;
 }
 
 int64_t SBTypeEnumMember::GetValueAsSigned() {
@@ -98,7 +98,7 @@ void SBTypeEnumMember::reset(TypeEnumMemberImpl *type_member_impl) {
 }
 
 TypeEnumMemberImpl &SBTypeEnumMember::ref() {
-  if (m_opaque_sp.get() == NULL)
+  if (m_opaque_sp.get() == nullptr)
     m_opaque_sp = std::make_shared<TypeEnumMemberImpl>();
   return *m_opaque_sp.get();
 }
@@ -130,7 +130,7 @@ bool SBTypeEnumMemberList::IsValid() {
 SBTypeEnumMemberList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMemberList, operator bool);
 
-  return (m_opaque_up != NULL);
+  return (m_opaque_up != nullptr);
 }
 
 SBTypeEnumMemberList &SBTypeEnumMemberList::
diff --git a/lldb/source/API/SBTypeFilter.cpp b/lldb/source/API/SBTypeFilter.cpp
index 104ef95650faa..d40301b4c1538 100644
--- a/lldb/source/API/SBTypeFilter.cpp
+++ b/lldb/source/API/SBTypeFilter.cpp
@@ -40,7 +40,7 @@ bool SBTypeFilter::IsValid() const {
 SBTypeFilter::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFilter, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 uint32_t SBTypeFilter::GetOptions() {
@@ -98,7 +98,7 @@ const char *SBTypeFilter::GetExpressionPathAtIndex(uint32_t i) {
       item++;
     return item;
   }
-  return NULL;
+  return nullptr;
 }
 
 bool SBTypeFilter::ReplaceExpressionPathAtIndex(uint32_t i, const char *item) {
diff --git a/lldb/source/API/SBTypeFormat.cpp b/lldb/source/API/SBTypeFormat.cpp
index 12729aaf98274..6024631e7054f 100644
--- a/lldb/source/API/SBTypeFormat.cpp
+++ b/lldb/source/API/SBTypeFormat.cpp
@@ -49,7 +49,7 @@ bool SBTypeFormat::IsValid() const {
 SBTypeFormat::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFormat, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 lldb::Format SBTypeFormat::GetFormat() {
diff --git a/lldb/source/API/SBTypeNameSpecifier.cpp b/lldb/source/API/SBTypeNameSpecifier.cpp
index cccd0ee449e08..895f697756598 100644
--- a/lldb/source/API/SBTypeNameSpecifier.cpp
+++ b/lldb/source/API/SBTypeNameSpecifier.cpp
@@ -27,7 +27,7 @@ SBTypeNameSpecifier::SBTypeNameSpecifier(const char *name, bool is_regex)
   LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (const char *, bool), name,
                           is_regex);
 
-  if (name == NULL || (*name) == 0)
+  if (name == nullptr || (*name) == 0)
     m_opaque_sp.reset();
 }
 
@@ -54,14 +54,14 @@ bool SBTypeNameSpecifier::IsValid() const {
 SBTypeNameSpecifier::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeNameSpecifier, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 const char *SBTypeNameSpecifier::GetName() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeNameSpecifier, GetName);
 
   if (!IsValid())
-    return NULL;
+    return nullptr;
 
   return m_opaque_sp->GetName();
 }
@@ -129,7 +129,7 @@ bool SBTypeNameSpecifier::IsEqualTo(lldb::SBTypeNameSpecifier &rhs) {
 
   if (IsRegex() != rhs.IsRegex())
     return false;
-  if (GetName() == NULL || rhs.GetName() == NULL)
+  if (GetName() == nullptr || rhs.GetName() == nullptr)
     return false;
 
   return (strcmp(GetName(), rhs.GetName()) == 0);
diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp
index 8ffb114470b00..8ffb234357572 100644
--- a/lldb/source/API/SBTypeSummary.cpp
+++ b/lldb/source/API/SBTypeSummary.cpp
@@ -202,7 +202,7 @@ bool SBTypeSummary::IsValid() const {
 SBTypeSummary::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummary, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 bool SBTypeSummary::IsFunctionCode() {
@@ -244,7 +244,7 @@ const char *SBTypeSummary::GetData() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSummary, GetData);
 
   if (!IsValid())
-    return NULL;
+    return nullptr;
   if (ScriptSummaryFormat *script_summary_ptr =
           llvm::dyn_cast<ScriptSummaryFormat>(m_opaque_sp.get())) {
     const char *fname = script_summary_ptr->GetFunctionName();
diff --git a/lldb/source/API/SBTypeSynthetic.cpp b/lldb/source/API/SBTypeSynthetic.cpp
index f1e15525a08d0..df6fce1269f09 100644
--- a/lldb/source/API/SBTypeSynthetic.cpp
+++ b/lldb/source/API/SBTypeSynthetic.cpp
@@ -60,7 +60,7 @@ bool SBTypeSynthetic::IsValid() const {
 SBTypeSynthetic::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSynthetic, operator bool);
 
-  return m_opaque_sp.get() != NULL;
+  return m_opaque_sp.get() != nullptr;
 }
 
 bool SBTypeSynthetic::IsClassCode() {
@@ -84,7 +84,7 @@ const char *SBTypeSynthetic::GetData() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSynthetic, GetData);
 
   if (!IsValid())
-    return NULL;
+    return nullptr;
   if (IsClassCode())
     return m_opaque_sp->GetPythonCode();
   else
diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp
index 3db84fa298049..8383007635229 100644
--- a/lldb/source/API/SBValue.cpp
+++ b/lldb/source/API/SBValue.cpp
@@ -57,7 +57,7 @@ class ValueImpl {
 
   ValueImpl(lldb::ValueObjectSP in_valobj_sp,
             lldb::DynamicValueType use_dynamic, bool use_synthetic,
-            const char *name = NULL)
+            const char *name = nullptr)
       : m_valobj_sp(), m_use_dynamic(use_dynamic),
         m_use_synthetic(use_synthetic), m_name(name) {
     if (in_valobj_sp) {
@@ -84,7 +84,7 @@ class ValueImpl {
   }
 
   bool IsValid() {
-    if (m_valobj_sp.get() == NULL)
+    if (m_valobj_sp.get() == nullptr)
       return false;
     else {
       // FIXME: This check is necessary but not sufficient.  We for sure don't
@@ -251,8 +251,8 @@ SBValue::operator bool() const {
   // If this function ever changes to anything that does more than just check
   // if the opaque shared pointer is non NULL, then we need to update all "if
   // (m_opaque_sp)" code in this file.
-  return m_opaque_sp.get() != NULL && m_opaque_sp->IsValid() &&
-         m_opaque_sp->GetRootSP().get() != NULL;
+  return m_opaque_sp.get() != nullptr && m_opaque_sp->IsValid() &&
+         m_opaque_sp->GetRootSP().get() != nullptr;
 }
 
 void SBValue::Clear() {
@@ -290,7 +290,7 @@ user_id_t SBValue::GetID() {
 const char *SBValue::GetName() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp)
@@ -302,7 +302,7 @@ const char *SBValue::GetName() {
 const char *SBValue::GetTypeName() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetTypeName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -315,7 +315,7 @@ const char *SBValue::GetTypeName() {
 const char *SBValue::GetDisplayTypeName() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetDisplayTypeName);
 
-  const char *name = NULL;
+  const char *name = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -356,7 +356,7 @@ bool SBValue::IsInScope() {
 const char *SBValue::GetValue() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetValue);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -381,7 +381,7 @@ ValueType SBValue::GetValueType() {
 const char *SBValue::GetObjectDescription() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetObjectDescription);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -394,7 +394,7 @@ const char *SBValue::GetObjectDescription() {
 const char *SBValue::GetTypeValidatorResult() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetTypeValidatorResult);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -442,7 +442,7 @@ bool SBValue::GetValueDidChange() {
 const char *SBValue::GetSummary() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetSummary);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -472,7 +472,7 @@ const char *SBValue::GetSummary(lldb::SBStream &stream,
 const char *SBValue::GetLocation() {
   LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetLocation);
 
-  const char *cstr = NULL;
+  const char *cstr = nullptr;
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp) {
@@ -1042,7 +1042,7 @@ void *SBValue::GetOpaqueType() {
   lldb::ValueObjectSP value_sp(GetSP(locker));
   if (value_sp)
     return value_sp->GetCompilerType().GetOpaqueQualType();
-  return NULL;
+  return nullptr;
 }
 
 lldb::SBTarget SBValue::GetTarget() {
diff --git a/lldb/source/API/SBValueList.cpp b/lldb/source/API/SBValueList.cpp
index 2f74c99225267..7e909df260d7d 100644
--- a/lldb/source/API/SBValueList.cpp
+++ b/lldb/source/API/SBValueList.cpp
@@ -92,7 +92,7 @@ bool SBValueList::IsValid() const {
 SBValueList::operator bool() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValueList, operator bool);
 
-  return (m_opaque_up != NULL);
+  return (m_opaque_up != nullptr);
 }
 
 void SBValueList::Clear() {
@@ -172,7 +172,7 @@ uint32_t SBValueList::GetSize() const {
 }
 
 void SBValueList::CreateIfNeeded() {
-  if (m_opaque_up == NULL)
+  if (m_opaque_up == nullptr)
     m_opaque_up.reset(new ValueListImpl());
 }
 
diff --git a/lldb/source/API/SBWatchpoint.cpp b/lldb/source/API/SBWatchpoint.cpp
index 0f35c3eda872a..d0a36b71e5c18 100644
--- a/lldb/source/API/SBWatchpoint.cpp
+++ b/lldb/source/API/SBWatchpoint.cpp
@@ -218,7 +218,7 @@ const char *SBWatchpoint::GetCondition() {
         watchpoint_sp->GetTarget().GetAPIMutex());
     return watchpoint_sp->GetConditionText();
   }
-  return NULL;
+  return nullptr;
 }
 
 void SBWatchpoint::SetCondition(const char *condition) {
@@ -277,7 +277,7 @@ bool SBWatchpoint::EventIsWatchpointEvent(const lldb::SBEvent &event) {
                             (const lldb::SBEvent &), event);
 
   return Watchpoint::WatchpointEventData::GetEventDataFromEvent(event.get()) !=
-         NULL;
+         nullptr;
 }
 
 WatchpointEventType
diff --git a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp
index d3610982026f8..8a6fd6a2692c8 100644
--- a/lldb/source/Breakpoint/BreakpointResolverAddress.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverAddress.cpp
@@ -124,7 +124,7 @@ Searcher::CallbackReturn
 BreakpointResolverAddress::SearchCallback(SearchFilter &filter,
                                           SymbolContext &context, Address *addr,
                                           bool containing) {
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
 
   if (filter.AddressPasses(m_addr)) {
     if (m_breakpoint->GetNumLocations() == 0) {
diff --git a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
index 494b4c77d0203..a6095be316471 100644
--- a/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverFileLine.cpp
@@ -204,7 +204,7 @@ BreakpointResolverFileLine::SearchCallback(SearchFilter &filter,
                                            Address *addr, bool containing) {
   SymbolContextList sc_list;
 
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
 
   // There is a tricky bit here.  You can have two compilation units that
   // #include the same file, and in one of them the function at m_line_number
diff --git a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp
index c17dcb8342c1a..0b2485245b726 100644
--- a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp
@@ -98,7 +98,7 @@ BreakpointResolverFileRegex::SearchCallback(SearchFilter &filter,
                                             SymbolContext &context,
                                             Address *addr, bool containing) {
 
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
   if (!context.target_sp)
     return eCallbackReturnContinue;
 
@@ -145,7 +145,7 @@ BreakpointResolverFileRegex::SearchCallback(SearchFilter &filter,
     BreakpointResolver::SetSCMatchesByLine(filter, sc_list, skip_prologue,
                                            m_regex.GetText());
   }
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
 
   return Searcher::eCallbackReturnContinue;
 }
diff --git a/lldb/source/Breakpoint/BreakpointResolverScripted.cpp b/lldb/source/Breakpoint/BreakpointResolverScripted.cpp
index ae9775738f2c6..8363795a4d7f4 100644
--- a/lldb/source/Breakpoint/BreakpointResolverScripted.cpp
+++ b/lldb/source/Breakpoint/BreakpointResolverScripted.cpp
@@ -127,7 +127,7 @@ Searcher::CallbackReturn
 BreakpointResolverScripted::SearchCallback(SearchFilter &filter,
                                           SymbolContext &context, Address *addr,
                                           bool containing) {
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
   bool should_continue = true;
   if (!m_implementation_sp)
     return Searcher::eCallbackReturnStop;
@@ -144,7 +144,7 @@ BreakpointResolverScripted::SearchCallback(SearchFilter &filter,
 
 lldb::SearchDepth
 BreakpointResolverScripted::GetDepth() {
-  assert(m_breakpoint != NULL);
+  assert(m_breakpoint != nullptr);
   lldb::SearchDepth depth = lldb::eSearchDepthModule;
   if (m_implementation_sp) {
     ScriptInterpreter *interp = GetScriptInterpreter();
diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h
index 5bd8384fb4ef2..89c8e1dbecebd 100644
--- a/lldb/source/Commands/CommandObjectExpression.h
+++ b/lldb/source/Commands/CommandObjectExpression.h
@@ -68,7 +68,7 @@ class CommandObjectExpression : public CommandObjectRaw,
 
   bool EvaluateExpression(llvm::StringRef expr, Stream *output_stream,
                           Stream *error_stream,
-                          CommandReturnObject *result = NULL);
+                          CommandReturnObject *result = nullptr);
 
   void GetMultilineExpression();
 
diff --git a/lldb/source/Core/Communication.cpp b/lldb/source/Core/Communication.cpp
index ed5f5a559c54f..f2919d807d596 100644
--- a/lldb/source/Core/Communication.cpp
+++ b/lldb/source/Core/Communication.cpp
@@ -359,7 +359,7 @@ lldb::thread_result_t Communication::ReadThread(lldb::thread_arg_t p) {
   // Let clients know that this thread is exiting
   comm->BroadcastEvent(eBroadcastBitNoMorePendingInput);
   comm->BroadcastEvent(eBroadcastBitReadThreadDidExit);
-  return NULL;
+  return nullptr;
 }
 
 void Communication::SetReadThreadBytesReceivedCallback(
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 7f180c1575ad6..79402813e845f 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -1616,7 +1616,7 @@ void Debugger::DefaultEventHandler() {
 
 lldb::thread_result_t Debugger::EventHandlerThread(lldb::thread_arg_t arg) {
   ((Debugger *)arg)->DefaultEventHandler();
-  return NULL;
+  return nullptr;
 }
 
 bool Debugger::StartEventHandlerThread() {
@@ -1661,7 +1661,7 @@ lldb::thread_result_t Debugger::IOHandlerThread(lldb::thread_arg_t arg) {
   Debugger *debugger = (Debugger *)arg;
   debugger->ExecuteIOHandlers();
   debugger->StopEventHandlerThread();
-  return NULL;
+  return nullptr;
 }
 
 bool Debugger::HasIOHandlerThread() { return m_io_handler_thread.IsJoinable(); }
diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp
index 608ea8964e715..c6759cc944ca9 100644
--- a/lldb/source/Core/Mangled.cpp
+++ b/lldb/source/Core/Mangled.cpp
@@ -160,7 +160,7 @@ Mangled::~Mangled() {}
 //  if (mangled)
 //  { ...
 Mangled::operator void *() const {
-  return (m_mangled) ? const_cast<Mangled *>(this) : NULL;
+  return (m_mangled) ? const_cast<Mangled *>(this) : nullptr;
 }
 
 // Logical NOT operator. This allows code to check any Mangled objects to see
diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp
index 05d1468314c7a..a0d4cee47391b 100644
--- a/lldb/source/Core/Section.cpp
+++ b/lldb/source/Core/Section.cpp
@@ -342,7 +342,7 @@ void Section::DumpName(Stream *s) const {
     s->PutChar('.');
   } else {
     // The top most section prints the module basename
-    const char *name = NULL;
+    const char *name = nullptr;
     ModuleSP module_sp(GetModule());
 
     if (m_obj_file) {
@@ -509,7 +509,7 @@ SectionList::FindSectionByName(ConstString section_dstr) const {
     const_iterator sect_iter;
     const_iterator end = m_sections.end();
     for (sect_iter = m_sections.begin();
-         sect_iter != end && sect_sp.get() == NULL; ++sect_iter) {
+         sect_iter != end && sect_sp.get() == nullptr; ++sect_iter) {
       Section *child_section = sect_iter->get();
       if (child_section) {
         if (child_section->GetName() == section_dstr) {
@@ -530,7 +530,7 @@ SectionSP SectionList::FindSectionByID(user_id_t sect_id) const {
     const_iterator sect_iter;
     const_iterator end = m_sections.end();
     for (sect_iter = m_sections.begin();
-         sect_iter != end && sect_sp.get() == NULL; ++sect_iter) {
+         sect_iter != end && sect_sp.get() == nullptr; ++sect_iter) {
       if ((*sect_iter)->GetID() == sect_id) {
         sect_sp = *sect_iter;
         break;
@@ -567,7 +567,7 @@ SectionSP SectionList::FindSectionContainingFileAddress(addr_t vm_addr,
   const_iterator sect_iter;
   const_iterator end = m_sections.end();
   for (sect_iter = m_sections.begin();
-       sect_iter != end && sect_sp.get() == NULL; ++sect_iter) {
+       sect_iter != end && sect_sp.get() == nullptr; ++sect_iter) {
     Section *sect = sect_iter->get();
     if (sect->ContainsFileAddress(vm_addr)) {
       // The file address is in this section. We need to make sure one of our
@@ -577,7 +577,7 @@ SectionSP SectionList::FindSectionContainingFileAddress(addr_t vm_addr,
         sect_sp = sect->GetChildren().FindSectionContainingFileAddress(
             vm_addr, depth - 1);
 
-      if (sect_sp.get() == NULL && !sect->IsFake())
+      if (sect_sp.get() == nullptr && !sect->IsFake())
         sect_sp = *sect_iter;
     }
   }
@@ -585,7 +585,7 @@ SectionSP SectionList::FindSectionContainingFileAddress(addr_t vm_addr,
 }
 
 bool SectionList::ContainsSection(user_id_t sect_id) const {
-  return FindSectionByID(sect_id).get() != NULL;
+  return FindSectionByID(sect_id).get() != nullptr;
 }
 
 void SectionList::Dump(Stream *s, Target *target, bool show_header,
@@ -608,7 +608,7 @@ void SectionList::Dump(Stream *s, Target *target, bool show_header,
   const_iterator sect_iter;
   const_iterator end = m_sections.end();
   for (sect_iter = m_sections.begin(); sect_iter != end; ++sect_iter) {
-    (*sect_iter)->Dump(s, target_has_loaded_sections ? target : NULL, depth);
+    (*sect_iter)->Dump(s, target_has_loaded_sections ? target : nullptr, depth);
   }
 
   if (show_header && !m_sections.empty())
diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp
index 9cd9ac163fb8f..87065ab624251 100644
--- a/lldb/source/Core/SourceManager.cpp
+++ b/lldb/source/Core/SourceManager.cpp
@@ -326,7 +326,7 @@ bool SourceManager::GetDefaultFileAndLine(FileSpec &file_spec, uint32_t &line) {
         bool inlines_okay = true;
         bool append = false;
         size_t num_matches = executable_ptr->FindFunctions(
-            main_name, NULL, lldb::eFunctionNameTypeBase, inlines_okay,
+            main_name, nullptr, lldb::eFunctionNameTypeBase, inlines_okay,
             symbols_okay, append, sc_list);
         for (size_t idx = 0; idx < num_matches; idx++) {
           SymbolContext sc;
@@ -399,7 +399,7 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec,
         if (num_matches != 0) {
           if (num_matches > 1) {
             SymbolContext sc;
-            FileSpec *test_cu_spec = NULL;
+            FileSpec *test_cu_spec = nullptr;
 
             for (unsigned i = 0; i < num_matches; i++) {
               sc_list.GetContextAtIndex(i, sc);
@@ -461,12 +461,12 @@ uint32_t SourceManager::File::GetNumLines() {
 
 const char *SourceManager::File::PeekLineData(uint32_t line) {
   if (!LineIsValid(line))
-    return NULL;
+    return nullptr;
 
   size_t line_offset = GetLineOffset(line);
   if (line_offset < m_data_sp->GetByteSize())
     return (const char *)m_data_sp->GetBytes() + line_offset;
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SourceManager::File::GetLineLength(uint32_t line,
@@ -621,7 +621,7 @@ bool SourceManager::File::CalculateLineOffsets(uint32_t line) {
       return true;
 
     if (m_offsets.empty()) {
-      if (m_data_sp.get() == NULL)
+      if (m_data_sp.get() == nullptr)
         return false;
 
       const char *start = (char *)m_data_sp->GetBytes();
diff --git a/lldb/source/Core/Value.cpp b/lldb/source/Core/Value.cpp
index 541e0933bcae7..fdb4adb5f4314 100644
--- a/lldb/source/Core/Value.cpp
+++ b/lldb/source/Core/Value.cpp
@@ -39,17 +39,17 @@ using namespace lldb;
 using namespace lldb_private;
 
 Value::Value()
-    : m_value(), m_vector(), m_compiler_type(), m_context(NULL),
+    : m_value(), m_vector(), m_compiler_type(), m_context(nullptr),
       m_value_type(eValueTypeScalar), m_context_type(eContextTypeInvalid),
       m_data_buffer() {}
 
 Value::Value(const Scalar &scalar)
-    : m_value(scalar), m_vector(), m_compiler_type(), m_context(NULL),
+    : m_value(scalar), m_vector(), m_compiler_type(), m_context(nullptr),
       m_value_type(eValueTypeScalar), m_context_type(eContextTypeInvalid),
       m_data_buffer() {}
 
 Value::Value(const void *bytes, int len)
-    : m_value(), m_vector(), m_compiler_type(), m_context(NULL),
+    : m_value(), m_vector(), m_compiler_type(), m_context(nullptr),
       m_value_type(eValueTypeHostAddress), m_context_type(eContextTypeInvalid),
       m_data_buffer() {
   SetBytes(bytes, len);
@@ -131,13 +131,13 @@ AddressType Value::GetValueAddressType() const {
 RegisterInfo *Value::GetRegisterInfo() const {
   if (m_context_type == eContextTypeRegisterInfo)
     return static_cast<RegisterInfo *>(m_context);
-  return NULL;
+  return nullptr;
 }
 
 Type *Value::GetType() {
   if (m_context_type == eContextTypeLLDBType)
     return static_cast<Type *>(m_context);
-  return NULL;
+  return nullptr;
 }
 
 size_t Value::AppendDataToHostBuffer(const Value &rhs) {
@@ -353,11 +353,11 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
     break;
   }
   case eValueTypeLoadAddress:
-    if (exe_ctx == NULL) {
+    if (exe_ctx == nullptr) {
       error.SetErrorString("can't read load address (no execution context)");
     } else {
       Process *process = exe_ctx->GetProcessPtr();
-      if (process == NULL || !process->IsAlive()) {
+      if (process == nullptr || !process->IsAlive()) {
         Target *target = exe_ctx->GetTargetPtr();
         if (target) {
           // Allow expressions to run and evaluate things when the target has
@@ -390,16 +390,16 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
     break;
 
   case eValueTypeFileAddress:
-    if (exe_ctx == NULL) {
+    if (exe_ctx == nullptr) {
       error.SetErrorString("can't read file address (no execution context)");
-    } else if (exe_ctx->GetTargetPtr() == NULL) {
+    } else if (exe_ctx->GetTargetPtr() == nullptr) {
       error.SetErrorString("can't read file address (invalid target)");
     } else {
       address = m_value.ULongLong(LLDB_INVALID_ADDRESS);
       if (address == LLDB_INVALID_ADDRESS) {
         error.SetErrorString("invalid file address");
       } else {
-        if (module == NULL) {
+        if (module == nullptr) {
           // The only thing we can currently lock down to a module so that we
           // can resolve a file address, is a variable.
           Variable *variable = GetVariable();
@@ -527,7 +527,7 @@ Status Value::GetValueAsData(ExecutionContext *exe_ctx, DataExtractor &data,
   }
 
   uint8_t *dst = const_cast<uint8_t *>(data.PeekData(data_offset, byte_size));
-  if (dst != NULL) {
+  if (dst != nullptr) {
     if (address_type == eAddressTypeHost) {
       // The address is an address in this process, so just copy it.
       if (address == 0) {
@@ -597,7 +597,7 @@ Scalar &Value::ResolveValue(ExecutionContext *exe_ctx) {
     {
       DataExtractor data;
       lldb::addr_t addr = m_value.ULongLong(LLDB_INVALID_ADDRESS);
-      Status error(GetValueAsData(exe_ctx, data, 0, NULL));
+      Status error(GetValueAsData(exe_ctx, data, 0, nullptr));
       if (error.Success()) {
         Scalar scalar;
         if (compiler_type.GetValueAsScalar(data, 0, data.GetByteSize(),
@@ -625,7 +625,7 @@ Scalar &Value::ResolveValue(ExecutionContext *exe_ctx) {
 Variable *Value::GetVariable() {
   if (m_context_type == eContextTypeVariable)
     return static_cast<Variable *>(m_context);
-  return NULL;
+  return nullptr;
 }
 
 void Value::Clear() {
@@ -633,7 +633,7 @@ void Value::Clear() {
   m_vector.Clear();
   m_compiler_type.Clear();
   m_value_type = eValueTypeScalar;
-  m_context = NULL;
+  m_context = nullptr;
   m_context_type = eContextTypeInvalid;
   m_data_buffer.Clear();
 }
@@ -702,7 +702,7 @@ Value *ValueList::GetValueAtIndex(size_t idx) {
   if (idx < GetSize()) {
     return &(m_values[idx]);
   } else
-    return NULL;
+    return nullptr;
 }
 
 void ValueList::Clear() { m_values.clear(); }
diff --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 8bb542263f95f..e083b8dc086c4 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -81,12 +81,13 @@ static user_id_t g_value_obj_uid = 0;
 // ValueObject constructor
 ValueObject::ValueObject(ValueObject &parent)
     : UserID(++g_value_obj_uid), // Unique identifier for every value object
-      m_parent(&parent), m_root(NULL), m_update_point(parent.GetUpdatePoint()),
-      m_name(), m_data(), m_value(), m_error(), m_value_str(),
-      m_old_value_str(), m_location_str(), m_summary_str(), m_object_desc_str(),
-      m_validation_result(), m_manager(parent.GetManager()), m_children(),
-      m_synthetic_children(), m_dynamic_value(NULL), m_synthetic_value(NULL),
-      m_deref_valobj(NULL), m_format(eFormatDefault),
+      m_parent(&parent), m_root(nullptr),
+      m_update_point(parent.GetUpdatePoint()), m_name(), m_data(), m_value(),
+      m_error(), m_value_str(), m_old_value_str(), m_location_str(),
+      m_summary_str(), m_object_desc_str(), m_validation_result(),
+      m_manager(parent.GetManager()), m_children(), m_synthetic_children(),
+      m_dynamic_value(nullptr), m_synthetic_value(nullptr),
+      m_deref_valobj(nullptr), m_format(eFormatDefault),
       m_last_format(eFormatDefault), m_last_format_mgr_revision(0),
       m_type_summary_sp(), m_type_format_sp(), m_synthetic_children_sp(),
       m_type_validator_sp(), m_user_id_of_forced_summary(),
@@ -108,15 +109,15 @@ ValueObject::ValueObject(ValueObject &parent)
 ValueObject::ValueObject(ExecutionContextScope *exe_scope,
                          AddressType child_ptr_or_ref_addr_type)
     : UserID(++g_value_obj_uid), // Unique identifier for every value object
-      m_parent(NULL), m_root(NULL), m_update_point(exe_scope), m_name(),
+      m_parent(nullptr), m_root(nullptr), m_update_point(exe_scope), m_name(),
       m_data(), m_value(), m_error(), m_value_str(), m_old_value_str(),
       m_location_str(), m_summary_str(), m_object_desc_str(),
       m_validation_result(), m_manager(), m_children(), m_synthetic_children(),
-      m_dynamic_value(NULL), m_synthetic_value(NULL), m_deref_valobj(NULL),
-      m_format(eFormatDefault), m_last_format(eFormatDefault),
-      m_last_format_mgr_revision(0), m_type_summary_sp(), m_type_format_sp(),
-      m_synthetic_children_sp(), m_type_validator_sp(),
-      m_user_id_of_forced_summary(),
+      m_dynamic_value(nullptr), m_synthetic_value(nullptr),
+      m_deref_valobj(nullptr), m_format(eFormatDefault),
+      m_last_format(eFormatDefault), m_last_format_mgr_revision(0),
+      m_type_summary_sp(), m_type_format_sp(), m_synthetic_children_sp(),
+      m_type_validator_sp(), m_user_id_of_forced_summary(),
       m_address_type_of_ptr_or_ref_children(child_ptr_or_ref_addr_type),
       m_value_checksum(),
       m_preferred_display_language(lldb::eLanguageTypeUnknown),
@@ -465,7 +466,7 @@ ValueObjectSP ValueObject::GetChildAtIndex(size_t idx, bool can_create) {
     }
 
     ValueObject *child = m_children.GetChildAtIndex(idx);
-    if (child != NULL)
+    if (child != nullptr)
       return child->GetSP();
   }
   return child_sp;
@@ -617,7 +618,7 @@ void ValueObject::SetName(ConstString name) { m_name = name; }
 ValueObject *ValueObject::CreateChildAtIndex(size_t idx,
                                              bool synthetic_array_member,
                                              int32_t synthetic_index) {
-  ValueObject *valobj = NULL;
+  ValueObject *valobj = nullptr;
 
   bool omit_empty_base_classes = true;
   bool ignore_array_bounds = synthetic_array_member;
@@ -710,7 +711,7 @@ const char *ValueObject::GetSummaryAsCString(lldb::LanguageType lang) {
                         summary_options);
   }
   if (m_summary_str.empty())
-    return NULL;
+    return nullptr;
   return m_summary_str.c_str();
 }
 
@@ -763,12 +764,12 @@ size_t ValueObject::GetPointeeData(DataExtractor &data, uint32_t item_idx,
     if (is_pointer_type) {
       Status error;
       ValueObjectSP pointee_sp = Dereference(error);
-      if (error.Fail() || pointee_sp.get() == NULL)
+      if (error.Fail() || pointee_sp.get() == nullptr)
         return 0;
       return pointee_sp->GetData(data, error);
     } else {
       ValueObjectSP child_sp = GetChildAtIndex(0, true);
-      if (child_sp.get() == NULL)
+      if (child_sp.get() == nullptr)
         return 0;
       Status error;
       return child_sp->GetData(data, error);
@@ -777,7 +778,7 @@ size_t ValueObject::GetPointeeData(DataExtractor &data, uint32_t item_idx,
   } else /* (items > 1) */
   {
     Status error;
-    lldb_private::DataBufferHeap *heap_buf_ptr = NULL;
+    lldb_private::DataBufferHeap *heap_buf_ptr = nullptr;
     lldb::DataBufferSP data_sp(heap_buf_ptr =
                                    new lldb_private::DataBufferHeap());
 
@@ -964,7 +965,7 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
     if (is_array) {
       // We have an array
       uint64_t array_size = 0;
-      if (compiler_type.IsArrayType(NULL, &array_size, NULL)) {
+      if (compiler_type.IsArrayType(nullptr, &array_size, nullptr)) {
         cstr_len = array_size;
         if (cstr_len > max_length) {
           capped_data = true;
@@ -1170,7 +1171,7 @@ const char *ValueObject::GetValueAsCString() {
     }
   }
   if (m_value_str.empty())
-    return NULL;
+    return nullptr;
   return m_value_str.c_str();
 }
 
@@ -1281,7 +1282,7 @@ bool ValueObject::DumpPrintableRepresentation(
             buffer_sp, lldb::eByteOrderInvalid,
             8)); // none of this matters for a string - pass some defaults
         options.SetStream(&s);
-        options.SetPrefixToken(0);
+        options.SetPrefixToken(nullptr);
         options.SetQuote('"');
         options.SetSourceSize(buffer_sp->GetByteSize());
         options.SetIsTruncated(read_string.second);
@@ -1670,7 +1671,7 @@ ValueObject::GetTypeInfo(CompilerType *pointee_or_element_compiler_type) {
 bool ValueObject::IsPointerType() { return GetCompilerType().IsPointerType(); }
 
 bool ValueObject::IsArrayType() {
-  return GetCompilerType().IsArrayType(NULL, NULL, NULL);
+  return GetCompilerType().IsArrayType(nullptr, nullptr, nullptr);
 }
 
 bool ValueObject::IsScalarType() { return GetCompilerType().IsScalarType(); }
@@ -1689,7 +1690,7 @@ bool ValueObject::IsPossibleDynamicType() {
   if (process)
     return process->IsPossibleDynamicValue(*this);
   else
-    return GetCompilerType().IsPossibleDynamicType(NULL, true, true);
+    return GetCompilerType().IsPossibleDynamicType(nullptr, true, true);
 }
 
 bool ValueObject::IsRuntimeSupportValue() {
@@ -1900,7 +1901,7 @@ ValueObject::GetSyntheticExpressionPathChild(const char *expression,
     // We haven't made a synthetic array member for expression yet, so lets
     // make one and cache it for any future reference.
     synthetic_child_sp = GetValueForExpressionPath(
-        expression, NULL, NULL,
+        expression, nullptr, nullptr,
         GetValueForExpressionPathOptions().SetSyntheticChildrenTraversal(
             GetValueForExpressionPathOptions::SyntheticChildrenTraversal::
                 None));
@@ -1923,7 +1924,7 @@ void ValueObject::CalculateSyntheticValue(bool use_synthetic) {
 
   TargetSP target_sp(GetTargetSP());
   if (target_sp && !target_sp->GetEnableSyntheticValue()) {
-    m_synthetic_value = NULL;
+    m_synthetic_value = nullptr;
     return;
   }
 
@@ -1932,7 +1933,7 @@ void ValueObject::CalculateSyntheticValue(bool use_synthetic) {
   if (!UpdateFormatsIfNeeded() && m_synthetic_value)
     return;
 
-  if (m_synthetic_children_sp.get() == NULL)
+  if (m_synthetic_children_sp.get() == nullptr)
     return;
 
   if (current_synth_sp == m_synthetic_children_sp && m_synthetic_value)
@@ -1959,7 +1960,7 @@ ValueObjectSP ValueObject::GetDynamicValue(DynamicValueType use_dynamic) {
   if (use_dynamic == eNoDynamicValues)
     return ValueObjectSP();
 
-  if (!IsDynamic() && m_dynamic_value == NULL) {
+  if (!IsDynamic() && m_dynamic_value == nullptr) {
     CalculateDynamicValue(use_dynamic);
   }
   if (m_dynamic_value)
@@ -1987,7 +1988,7 @@ ValueObjectSP ValueObject::GetSyntheticValue(bool use_synthetic) {
 bool ValueObject::HasSyntheticValue() {
   UpdateFormatsIfNeeded();
 
-  if (m_synthetic_children_sp.get() == NULL)
+  if (m_synthetic_children_sp.get() == nullptr)
     return false;
 
   CalculateSyntheticValue(true);
@@ -2020,7 +2021,7 @@ ValueObject *ValueObject::GetNonBaseClassParent() {
     else
       return GetParent();
   }
-  return NULL;
+  return nullptr;
 }
 
 bool ValueObject::IsBaseClass(uint32_t &depth) {
@@ -3005,12 +3006,12 @@ bool ValueObject::EvaluationPoint::SyncWithProcessState(
   ExecutionContext exe_ctx(
       m_exe_ctx_ref.Lock(thread_and_frame_only_if_stopped));
 
-  if (exe_ctx.GetTargetPtr() == NULL)
+  if (exe_ctx.GetTargetPtr() == nullptr)
     return false;
 
   // If we don't have a process nothing can change.
   Process *process = exe_ctx.GetProcessPtr();
-  if (process == NULL)
+  if (process == nullptr)
     return false;
 
   // If our stop id is the current stop ID, nothing has changed:
@@ -3091,7 +3092,7 @@ void ValueObject::ClearUserVisibleData(uint32_t clear_mask) {
   if ((clear_mask & eClearUserVisibleDataItemsSyntheticChildren) ==
       eClearUserVisibleDataItemsSyntheticChildren) {
     if (m_synthetic_value)
-      m_synthetic_value = NULL;
+      m_synthetic_value = nullptr;
   }
 
   if ((clear_mask & eClearUserVisibleDataItemsValidator) ==
@@ -3104,7 +3105,7 @@ SymbolContextScope *ValueObject::GetSymbolContextScope() {
     if (!m_parent->IsPointerOrReferenceType())
       return m_parent->GetSymbolContextScope();
   }
-  return NULL;
+  return nullptr;
 }
 
 lldb::ValueObjectSP
diff --git a/lldb/source/Core/ValueObjectConstResultImpl.cpp b/lldb/source/Core/ValueObjectConstResultImpl.cpp
index aeea1743682c8..de51735736b9c 100644
--- a/lldb/source/Core/ValueObjectConstResultImpl.cpp
+++ b/lldb/source/Core/ValueObjectConstResultImpl.cpp
@@ -39,7 +39,7 @@ ValueObjectConstResultImpl::ValueObjectConstResultImpl(
       m_address_of_backend() {}
 
 lldb::ValueObjectSP ValueObjectConstResultImpl::Dereference(Status &error) {
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return lldb::ValueObjectSP();
 
   return m_impl_backend->ValueObject::Dereference(error);
@@ -47,12 +47,12 @@ lldb::ValueObjectSP ValueObjectConstResultImpl::Dereference(Status &error) {
 
 ValueObject *ValueObjectConstResultImpl::CreateChildAtIndex(
     size_t idx, bool synthetic_array_member, int32_t synthetic_index) {
-  if (m_impl_backend == NULL)
-    return NULL;
+  if (m_impl_backend == nullptr)
+    return nullptr;
 
   m_impl_backend->UpdateValueIfNeeded(false);
 
-  ValueObjectConstResultChild *valobj = NULL;
+  ValueObjectConstResultChild *valobj = nullptr;
 
   bool omit_empty_base_classes = true;
   bool ignore_array_bounds = synthetic_array_member;
@@ -106,7 +106,7 @@ ValueObject *ValueObjectConstResultImpl::CreateChildAtIndex(
 lldb::ValueObjectSP ValueObjectConstResultImpl::GetSyntheticChildAtOffset(
     uint32_t offset, const CompilerType &type, bool can_create,
     ConstString name_const_str) {
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return lldb::ValueObjectSP();
 
   return m_impl_backend->ValueObject::GetSyntheticChildAtOffset(
@@ -114,10 +114,10 @@ lldb::ValueObjectSP ValueObjectConstResultImpl::GetSyntheticChildAtOffset(
 }
 
 lldb::ValueObjectSP ValueObjectConstResultImpl::AddressOf(Status &error) {
-  if (m_address_of_backend.get() != NULL)
+  if (m_address_of_backend.get() != nullptr)
     return m_address_of_backend;
 
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return lldb::ValueObjectSP();
   if (m_live_address != LLDB_INVALID_ADDRESS) {
     CompilerType compiler_type(m_impl_backend->GetCompilerType());
@@ -143,7 +143,7 @@ lldb::ValueObjectSP ValueObjectConstResultImpl::AddressOf(Status &error) {
 
 lldb::ValueObjectSP
 ValueObjectConstResultImpl::Cast(const CompilerType &compiler_type) {
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return lldb::ValueObjectSP();
 
   ValueObjectConstResultCast *result_cast =
@@ -156,7 +156,7 @@ lldb::addr_t
 ValueObjectConstResultImpl::GetAddressOf(bool scalar_is_load_address,
                                          AddressType *address_type) {
 
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return 0;
 
   if (m_live_address == LLDB_INVALID_ADDRESS) {
@@ -173,7 +173,7 @@ ValueObjectConstResultImpl::GetAddressOf(bool scalar_is_load_address,
 size_t ValueObjectConstResultImpl::GetPointeeData(DataExtractor &data,
                                                   uint32_t item_idx,
                                                   uint32_t item_count) {
-  if (m_impl_backend == NULL)
+  if (m_impl_backend == nullptr)
     return 0;
   return m_impl_backend->ValueObject::GetPointeeData(data, item_idx,
                                                      item_count);
diff --git a/lldb/source/Core/ValueObjectMemory.cpp b/lldb/source/Core/ValueObjectMemory.cpp
index 42d1263b3909f..95d4330ee0c6b 100644
--- a/lldb/source/Core/ValueObjectMemory.cpp
+++ b/lldb/source/Core/ValueObjectMemory.cpp
@@ -49,7 +49,7 @@ ValueObjectMemory::ValueObjectMemory(ExecutionContextScope *exe_scope,
     : ValueObject(exe_scope), m_address(address), m_type_sp(type_sp),
       m_compiler_type() {
   // Do not attempt to construct one of these objects with no variable!
-  assert(m_type_sp.get() != NULL);
+  assert(m_type_sp.get() != nullptr);
   SetName(ConstString(name));
   m_value.SetContext(Value::eContextTypeLLDBType, m_type_sp.get());
   TargetSP target_sp(GetTargetSP());
diff --git a/lldb/source/Core/ValueObjectRegister.cpp b/lldb/source/Core/ValueObjectRegister.cpp
index 1d12747649e3d..75a254fbbc21d 100644
--- a/lldb/source/Core/ValueObjectRegister.cpp
+++ b/lldb/source/Core/ValueObjectRegister.cpp
@@ -76,7 +76,7 @@ bool ValueObjectRegisterContext::UpdateValue() {
   else
     m_reg_ctx_sp.reset();
 
-  if (m_reg_ctx_sp.get() == NULL) {
+  if (m_reg_ctx_sp.get() == nullptr) {
     SetValueIsValid(false);
     m_error.SetErrorToGenericError();
   } else
@@ -87,7 +87,7 @@ bool ValueObjectRegisterContext::UpdateValue() {
 
 ValueObject *ValueObjectRegisterContext::CreateChildAtIndex(
     size_t idx, bool synthetic_array_member, int32_t synthetic_index) {
-  ValueObject *new_valobj = NULL;
+  ValueObject *new_valobj = nullptr;
 
   const size_t num_children = GetNumChildren();
   if (idx < num_children) {
@@ -112,7 +112,7 @@ ValueObjectRegisterSet::Create(ExecutionContextScope *exe_scope,
 ValueObjectRegisterSet::ValueObjectRegisterSet(ExecutionContextScope *exe_scope,
                                                lldb::RegisterContextSP &reg_ctx,
                                                uint32_t reg_set_idx)
-    : ValueObject(exe_scope), m_reg_ctx_sp(reg_ctx), m_reg_set(NULL),
+    : ValueObject(exe_scope), m_reg_ctx_sp(reg_ctx), m_reg_set(nullptr),
       m_reg_set_idx(reg_set_idx) {
   assert(reg_ctx);
   m_reg_set = reg_ctx->GetRegisterSet(m_reg_set_idx);
@@ -149,13 +149,13 @@ bool ValueObjectRegisterSet::UpdateValue() {
   SetValueDidChange(false);
   ExecutionContext exe_ctx(GetExecutionContextRef());
   StackFrame *frame = exe_ctx.GetFramePtr();
-  if (frame == NULL)
+  if (frame == nullptr)
     m_reg_ctx_sp.reset();
   else {
     m_reg_ctx_sp = frame->GetRegisterContext();
     if (m_reg_ctx_sp) {
       const RegisterSet *reg_set = m_reg_ctx_sp->GetRegisterSet(m_reg_set_idx);
-      if (reg_set == NULL)
+      if (reg_set == nullptr)
         m_reg_ctx_sp.reset();
       else if (m_reg_set != reg_set) {
         SetValueDidChange(true);
@@ -175,7 +175,7 @@ bool ValueObjectRegisterSet::UpdateValue() {
 
 ValueObject *ValueObjectRegisterSet::CreateChildAtIndex(
     size_t idx, bool synthetic_array_member, int32_t synthetic_index) {
-  ValueObject *valobj = NULL;
+  ValueObject *valobj = nullptr;
   if (m_reg_ctx_sp && m_reg_set) {
     const size_t num_children = GetNumChildren();
     if (idx < num_children)
@@ -188,11 +188,11 @@ ValueObject *ValueObjectRegisterSet::CreateChildAtIndex(
 lldb::ValueObjectSP
 ValueObjectRegisterSet::GetChildMemberWithName(ConstString name,
                                                bool can_create) {
-  ValueObject *valobj = NULL;
+  ValueObject *valobj = nullptr;
   if (m_reg_ctx_sp && m_reg_set) {
     const RegisterInfo *reg_info =
         m_reg_ctx_sp->GetRegisterInfoByName(name.AsCString());
-    if (reg_info != NULL)
+    if (reg_info != nullptr)
       valobj = new ValueObjectRegister(*this, m_reg_ctx_sp,
                                        reg_info->kinds[eRegisterKindLLDB]);
   }
@@ -207,7 +207,7 @@ ValueObjectRegisterSet::GetIndexOfChildWithName(ConstString name) {
   if (m_reg_ctx_sp && m_reg_set) {
     const RegisterInfo *reg_info =
         m_reg_ctx_sp->GetRegisterInfoByName(name.AsCString());
-    if (reg_info != NULL)
+    if (reg_info != nullptr)
       return reg_info->kinds[eRegisterKindLLDB];
   }
   return UINT32_MAX;
@@ -289,7 +289,7 @@ bool ValueObjectRegister::UpdateValue() {
   m_error.Clear();
   ExecutionContext exe_ctx(GetExecutionContextRef());
   StackFrame *frame = exe_ctx.GetFramePtr();
-  if (frame == NULL) {
+  if (frame == nullptr) {
     m_reg_ctx_sp.reset();
     m_reg_value.Clear();
   }
diff --git a/lldb/source/Core/ValueObjectVariable.cpp b/lldb/source/Core/ValueObjectVariable.cpp
index dae9c709ecd8c..5aee82493b28f 100644
--- a/lldb/source/Core/ValueObjectVariable.cpp
+++ b/lldb/source/Core/ValueObjectVariable.cpp
@@ -57,7 +57,7 @@ ValueObjectVariable::ValueObjectVariable(ExecutionContextScope *exe_scope,
                                          const lldb::VariableSP &var_sp)
     : ValueObject(exe_scope), m_variable_sp(var_sp) {
   // Do not attempt to construct one of these objects with no variable!
-  assert(m_variable_sp.get() != NULL);
+  assert(m_variable_sp.get() != nullptr);
   m_name = var_sp->GetName();
 }
 
@@ -135,7 +135,7 @@ bool ValueObjectVariable::UpdateValue() {
     else
       m_error.SetErrorString("empty constant data");
     // constant bytes can't be edited - sorry
-    m_resolved_value.SetContext(Value::eContextTypeInvalid, NULL);
+    m_resolved_value.SetContext(Value::eContextTypeInvalid, nullptr);
   } else {
     lldb::addr_t loclist_base_load_addr = LLDB_INVALID_ADDRESS;
     ExecutionContext exe_ctx(GetExecutionContextRef());
@@ -261,7 +261,7 @@ bool ValueObjectVariable::UpdateValue() {
       SetValueIsValid(m_error.Success());
     } else {
       // could not find location, won't allow editing
-      m_resolved_value.SetContext(Value::eContextTypeInvalid, NULL);
+      m_resolved_value.SetContext(Value::eContextTypeInvalid, nullptr);
     }
   }
   return m_error.Success();
@@ -298,7 +298,7 @@ lldb::ModuleSP ValueObjectVariable::GetModule() {
 SymbolContextScope *ValueObjectVariable::GetSymbolContextScope() {
   if (m_variable_sp)
     return m_variable_sp->GetSymbolContextScope();
-  return NULL;
+  return nullptr;
 }
 
 bool ValueObjectVariable::GetDeclaration(Declaration &decl) {
diff --git a/lldb/source/DataFormatters/DataVisualization.cpp b/lldb/source/DataFormatters/DataVisualization.cpp
index 948aa71b8ac9a..08b3b34447bba 100644
--- a/lldb/source/DataFormatters/DataVisualization.cpp
+++ b/lldb/source/DataFormatters/DataVisualization.cpp
@@ -89,7 +89,7 @@ bool DataVisualization::Categories::GetCategory(ConstString category,
                                                 lldb::TypeCategoryImplSP &entry,
                                                 bool allow_create) {
   entry = GetFormatManager().GetCategory(category, allow_create);
-  return (entry.get() != NULL);
+  return (entry.get() != nullptr);
 }
 
 bool DataVisualization::Categories::GetCategory(
diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp
index 40d6407eadeb3..dd2808a7cf7c9 100644
--- a/lldb/source/DataFormatters/FormatManager.cpp
+++ b/lldb/source/DataFormatters/FormatManager.cpp
@@ -146,7 +146,7 @@ char FormatManager::GetFormatAsFormatChar(lldb::Format format) {
 const char *FormatManager::GetFormatAsCString(Format format) {
   if (format >= eFormatDefault && format < kNumFormats)
     return g_format_infos[format].format_name;
-  return NULL;
+  return nullptr;
 }
 
 void FormatManager::EnableAllCategories() {
@@ -297,7 +297,7 @@ FormatManager::GetFormatForType(lldb::TypeNameSpecifierImplSP type_sp) {
     lldb::TypeFormatImplSP format_current_sp =
         category_sp->GetFormatForType(type_sp);
     if (format_current_sp &&
-        (format_chosen_sp.get() == NULL ||
+        (format_chosen_sp.get() == nullptr ||
          (prio_category > category_sp->GetEnabledPosition()))) {
       prio_category = category_sp->GetEnabledPosition();
       format_chosen_sp = format_current_sp;
@@ -321,7 +321,7 @@ FormatManager::GetSummaryForType(lldb::TypeNameSpecifierImplSP type_sp) {
     lldb::TypeSummaryImplSP summary_current_sp =
         category_sp->GetSummaryForType(type_sp);
     if (summary_current_sp &&
-        (summary_chosen_sp.get() == NULL ||
+        (summary_chosen_sp.get() == nullptr ||
          (prio_category > category_sp->GetEnabledPosition()))) {
       prio_category = category_sp->GetEnabledPosition();
       summary_chosen_sp = summary_current_sp;
@@ -345,7 +345,7 @@ FormatManager::GetFilterForType(lldb::TypeNameSpecifierImplSP type_sp) {
     lldb::TypeFilterImplSP filter_current_sp(
         (TypeFilterImpl *)category_sp->GetFilterForType(type_sp).get());
     if (filter_current_sp &&
-        (filter_chosen_sp.get() == NULL ||
+        (filter_chosen_sp.get() == nullptr ||
          (prio_category > category_sp->GetEnabledPosition()))) {
       prio_category = category_sp->GetEnabledPosition();
       filter_chosen_sp = filter_current_sp;
@@ -370,7 +370,7 @@ FormatManager::GetSyntheticForType(lldb::TypeNameSpecifierImplSP type_sp) {
         (ScriptedSyntheticChildren *)category_sp->GetSyntheticForType(type_sp)
             .get());
     if (synth_current_sp &&
-        (synth_chosen_sp.get() == NULL ||
+        (synth_chosen_sp.get() == nullptr ||
          (prio_category > category_sp->GetEnabledPosition()))) {
       prio_category = category_sp->GetEnabledPosition();
       synth_chosen_sp = synth_current_sp;
@@ -394,7 +394,7 @@ FormatManager::GetValidatorForType(lldb::TypeNameSpecifierImplSP type_sp) {
     lldb::TypeValidatorImplSP validator_current_sp(
         category_sp->GetValidatorForType(type_sp).get());
     if (validator_current_sp &&
-        (validator_chosen_sp.get() == NULL ||
+        (validator_chosen_sp.get() == nullptr ||
          (prio_category > category_sp->GetEnabledPosition()))) {
       prio_category = category_sp->GetEnabledPosition();
       validator_chosen_sp = validator_current_sp;
diff --git a/lldb/source/DataFormatters/FormattersHelpers.cpp b/lldb/source/DataFormatters/FormattersHelpers.cpp
index b2bd1ebdfedde..8f007df03faa4 100644
--- a/lldb/source/DataFormatters/FormattersHelpers.cpp
+++ b/lldb/source/DataFormatters/FormattersHelpers.cpp
@@ -123,7 +123,7 @@ size_t lldb_private::formatters::ExtractIndexFromString(const char *item_name) {
   if (*item_name != '[')
     return UINT32_MAX;
   item_name++;
-  char *endptr = NULL;
+  char *endptr = nullptr;
   unsigned long int idx = ::strtoul(item_name, &endptr, 0);
   if (idx == 0 && endptr == item_name)
     return UINT32_MAX;
diff --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp
index 4dfe0250c3b95..27d649bfc370e 100644
--- a/lldb/source/DataFormatters/StringPrinter.cpp
+++ b/lldb/source/DataFormatters/StringPrinter.cpp
@@ -271,7 +271,7 @@ static bool DumpUTFBufferToStream(
                                               llvm::ConversionFlags),
     const StringPrinter::ReadBufferAndDumpToStreamOptions &dump_options) {
   Stream &stream(*dump_options.GetStream());
-  if (dump_options.GetPrefixToken() != 0)
+  if (dump_options.GetPrefixToken() != nullptr)
     stream.Printf("%s", dump_options.GetPrefixToken());
   if (dump_options.GetQuote() != 0)
     stream.Printf("%c", dump_options.GetQuote());
@@ -373,7 +373,7 @@ static bool DumpUTFBufferToStream(
   }
   if (dump_options.GetQuote() != 0)
     stream.Printf("%c", dump_options.GetQuote());
-  if (dump_options.GetSuffixToken() != 0)
+  if (dump_options.GetSuffixToken() != nullptr)
     stream.Printf("%s", dump_options.GetSuffixToken());
   if (dump_options.GetIsTruncated())
     stream.Printf("...");
@@ -449,7 +449,7 @@ bool StringPrinter::ReadStringAndDumpToStream<
   const char *prefix_token = options.GetPrefixToken();
   char quote = options.GetQuote();
 
-  if (prefix_token != 0)
+  if (prefix_token != nullptr)
     options.GetStream()->Printf("%s%c", prefix_token, quote);
   else if (quote != 0)
     options.GetStream()->Printf("%c", quote);
@@ -497,7 +497,7 @@ bool StringPrinter::ReadStringAndDumpToStream<
 
   const char *suffix_token = options.GetSuffixToken();
 
-  if (suffix_token != 0)
+  if (suffix_token != nullptr)
     options.GetStream()->Printf("%c%s", quote, suffix_token);
   else if (quote != 0)
     options.GetStream()->Printf("%c", quote);
diff --git a/lldb/source/DataFormatters/TypeFormat.cpp b/lldb/source/DataFormatters/TypeFormat.cpp
index 72008b2431605..b526e9a744bc4 100644
--- a/lldb/source/DataFormatters/TypeFormat.cpp
+++ b/lldb/source/DataFormatters/TypeFormat.cpp
@@ -69,7 +69,7 @@ bool TypeFormatImpl_Format::FormatObject(ValueObject *valobj,
         // default value logic
         if (GetFormat() == eFormatCString) {
           lldb_private::Flags type_flags(compiler_type.GetTypeInfo(
-              NULL)); // disambiguate w.r.t. TypeFormatImpl::Flags
+              nullptr)); // disambiguate w.r.t. TypeFormatImpl::Flags
           if (type_flags.Test(eTypeIsPointer) &&
               !type_flags.Test(eTypeIsObjC)) {
             // if we are dumping a pointer as a c-string, get the pointee data
diff --git a/lldb/source/DataFormatters/TypeSynthetic.cpp b/lldb/source/DataFormatters/TypeSynthetic.cpp
index c40aed1f79234..23c80fc58d021 100644
--- a/lldb/source/DataFormatters/TypeSynthetic.cpp
+++ b/lldb/source/DataFormatters/TypeSynthetic.cpp
@@ -128,7 +128,7 @@ lldb::ValueObjectSP SyntheticChildrenFrontEnd::CreateValueObjectFromData(
 ScriptedSyntheticChildren::FrontEnd::FrontEnd(std::string pclass,
                                               ValueObject &backend)
     : SyntheticChildrenFrontEnd(backend), m_python_class(pclass),
-      m_wrapper_sp(), m_interpreter(NULL) {
+      m_wrapper_sp(), m_interpreter(nullptr) {
   if (backend == LLDB_INVALID_UID)
     return;
 
@@ -139,7 +139,7 @@ ScriptedSyntheticChildren::FrontEnd::FrontEnd(std::string pclass,
 
   m_interpreter = target_sp->GetDebugger().GetScriptInterpreter();
 
-  if (m_interpreter != NULL)
+  if (m_interpreter != nullptr)
     m_wrapper_sp = m_interpreter->CreateSyntheticScriptedProvider(
         m_python_class.c_str(), backend.GetSP());
 }
@@ -159,26 +159,26 @@ bool ScriptedSyntheticChildren::FrontEnd::IsValid() {
 }
 
 size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren() {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return 0;
   return m_interpreter->CalculateNumChildren(m_wrapper_sp, UINT32_MAX);
 }
 
 size_t ScriptedSyntheticChildren::FrontEnd::CalculateNumChildren(uint32_t max) {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return 0;
   return m_interpreter->CalculateNumChildren(m_wrapper_sp, max);
 }
 
 bool ScriptedSyntheticChildren::FrontEnd::Update() {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return false;
 
   return m_interpreter->UpdateSynthProviderInstance(m_wrapper_sp);
 }
 
 bool ScriptedSyntheticChildren::FrontEnd::MightHaveChildren() {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return false;
 
   return m_interpreter->MightHaveChildrenSynthProviderInstance(m_wrapper_sp);
@@ -186,21 +186,21 @@ bool ScriptedSyntheticChildren::FrontEnd::MightHaveChildren() {
 
 size_t ScriptedSyntheticChildren::FrontEnd::GetIndexOfChildWithName(
     ConstString name) {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return UINT32_MAX;
   return m_interpreter->GetIndexOfChildWithName(m_wrapper_sp,
                                                 name.GetCString());
 }
 
 lldb::ValueObjectSP ScriptedSyntheticChildren::FrontEnd::GetSyntheticValue() {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return nullptr;
 
   return m_interpreter->GetSyntheticValue(m_wrapper_sp);
 }
 
 ConstString ScriptedSyntheticChildren::FrontEnd::GetSyntheticTypeName() {
-  if (!m_wrapper_sp || m_interpreter == NULL)
+  if (!m_wrapper_sp || m_interpreter == nullptr)
     return ConstString();
 
   return m_interpreter->GetSyntheticTypeName(m_wrapper_sp);
diff --git a/lldb/source/DataFormatters/ValueObjectPrinter.cpp b/lldb/source/DataFormatters/ValueObjectPrinter.cpp
index 5fe7e20bac15b..409cffed9b0f9 100644
--- a/lldb/source/DataFormatters/ValueObjectPrinter.cpp
+++ b/lldb/source/DataFormatters/ValueObjectPrinter.cpp
@@ -331,7 +331,7 @@ TypeSummaryImpl *ValueObjectPrinter::GetSummaryFormatter(bool null_if_omitted) {
                                  : m_valobj->GetSummaryFormat().get();
 
     if (m_options.m_omit_summary_depth > 0)
-      entry = NULL;
+      entry = nullptr;
     m_summary_formatter.first = entry;
     m_summary_formatter.second = true;
   }
@@ -415,8 +415,9 @@ bool ValueObjectPrinter::PrintValueAndSummaryIfNeeded(bool &value_printed,
       // explicitly)
       TypeSummaryImpl *entry = GetSummaryFormatter();
       if (!IsNil() && !IsUninitialized() && !m_value.empty() &&
-          (entry == NULL || (entry->DoesPrintValue(m_valobj) ||
-                             m_options.m_format != eFormatDefault) ||
+          (entry == nullptr ||
+           (entry->DoesPrintValue(m_valobj) ||
+            m_options.m_format != eFormatDefault) ||
            m_summary.empty()) &&
           !m_options.m_hide_value) {
         if (m_options.m_hide_pointer_value &&
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index 45c42dfad81b8..c86def753d923 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -649,7 +649,7 @@ static bool ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
                                       lldb::RegisterKind reg_kind,
                                       uint32_t reg_num, Status *error_ptr,
                                       Value &value) {
-  if (reg_ctx == NULL) {
+  if (reg_ctx == nullptr) {
     if (error_ptr)
       error_ptr->SetErrorStringWithFormat("No register context in frame.\n");
   } else {
@@ -1249,7 +1249,7 @@ bool DWARFExpression::Evaluate(ExecutionContext *exe_ctx,
   if (IsLocationList()) {
     lldb::offset_t offset = 0;
     addr_t pc;
-    StackFrame *frame = NULL;
+    StackFrame *frame = nullptr;
     if (reg_ctx)
       pc = reg_ctx->GetPC();
     else {
@@ -1323,14 +1323,14 @@ bool DWARFExpression::Evaluate(
   }
   std::vector<Value> stack;
 
-  Process *process = NULL;
-  StackFrame *frame = NULL;
+  Process *process = nullptr;
+  StackFrame *frame = nullptr;
 
   if (exe_ctx) {
     process = exe_ctx->GetProcessPtr();
     frame = exe_ctx->GetFramePtr();
   }
-  if (reg_ctx == NULL && frame)
+  if (reg_ctx == nullptr && frame)
     reg_ctx = frame->GetRegisterContext().get();
 
   if (initial_value_ptr)
@@ -3166,7 +3166,7 @@ void DWARFExpression::PrintDWARFLocationList(
     s.Indent();
     if (cu)
       s.AddressRange(start_addr + base_addr, end_addr + base_addr,
-                     cu->GetAddressByteSize(), NULL, ": ");
+                     cu->GetAddressByteSize(), nullptr, ": ");
     uint32_t loc_length = debug_loc_data.GetU16(&offset);
 
     DataExtractor locationData(debug_loc_data, offset, loc_length);
diff --git a/lldb/source/Expression/ExpressionVariable.cpp b/lldb/source/Expression/ExpressionVariable.cpp
index faac977cde98e..97305dcf5a026 100644
--- a/lldb/source/Expression/ExpressionVariable.cpp
+++ b/lldb/source/Expression/ExpressionVariable.cpp
@@ -25,7 +25,7 @@ uint8_t *ExpressionVariable::GetValueBytes() {
     return const_cast<uint8_t *>(
         m_frozen_sp->GetDataExtractor().GetDataStart());
   }
-  return NULL;
+  return nullptr;
 }
 
 PersistentExpressionState::~PersistentExpressionState() {}
diff --git a/lldb/source/Expression/FunctionCaller.cpp b/lldb/source/Expression/FunctionCaller.cpp
index 5f5e3b86ccdbc..618c1a13212c4 100644
--- a/lldb/source/Expression/FunctionCaller.cpp
+++ b/lldb/source/Expression/FunctionCaller.cpp
@@ -35,10 +35,9 @@ FunctionCaller::FunctionCaller(ExecutionContextScope &exe_scope,
                                const Address &functionAddress,
                                const ValueList &arg_value_list,
                                const char *name)
-    : Expression(exe_scope, eKindFunctionCaller),
-      m_execution_unit_sp(), m_parser(),
-      m_jit_module_wp(), m_name(name ? name : "<unknown>"),
-      m_function_ptr(NULL), m_function_addr(functionAddress),
+    : Expression(exe_scope, eKindFunctionCaller), m_execution_unit_sp(),
+      m_parser(), m_jit_module_wp(), m_name(name ? name : "<unknown>"),
+      m_function_ptr(nullptr), m_function_addr(functionAddress),
       m_function_return_type(return_type),
       m_wrapper_function_name("__lldb_caller_function"),
       m_wrapper_struct_name("__lldb_caller_struct"), m_wrapper_args_addrs(),
@@ -138,7 +137,7 @@ bool FunctionCaller::WriteFunctionArguments(
 
   Process *process = exe_ctx.GetProcessPtr();
 
-  if (process == NULL)
+  if (process == nullptr)
     return return_value;
 
   lldb::ProcessSP jit_process_sp(m_jit_process_wp.lock());
@@ -239,11 +238,11 @@ lldb::ThreadPlanSP FunctionCaller::GetThreadPlanToCallFunction(
 
   // FIXME: Use the errors Stream for better error reporting.
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (thread == NULL) {
+  if (thread == nullptr) {
     diagnostic_manager.PutString(
         eDiagnosticSeverityError,
         "Can't call a function without a valid thread.");
-    return NULL;
+    return nullptr;
   }
 
   // Okay, now run the function:
@@ -279,7 +278,7 @@ bool FunctionCaller::FetchFunctionResults(ExecutionContext &exe_ctx,
 
   Process *process = exe_ctx.GetProcessPtr();
 
-  if (process == NULL)
+  if (process == nullptr)
     return false;
 
   lldb::ProcessSP jit_process_sp(m_jit_process_wp.lock());
@@ -326,7 +325,7 @@ lldb::ExpressionResults FunctionCaller::ExecuteFunction(
 
   lldb::addr_t args_addr;
 
-  if (args_addr_ptr != NULL)
+  if (args_addr_ptr != nullptr)
     args_addr = *args_addr_ptr;
   else
     args_addr = LLDB_INVALID_ADDRESS;
@@ -376,7 +375,7 @@ lldb::ExpressionResults FunctionCaller::ExecuteFunction(
   if (exe_ctx.GetProcessPtr())
     exe_ctx.GetProcessPtr()->SetRunningUserExpression(false);
 
-  if (args_addr_ptr != NULL)
+  if (args_addr_ptr != nullptr)
     *args_addr_ptr = args_addr;
 
   if (return_value != lldb::eExpressionCompleted)
@@ -384,7 +383,7 @@ lldb::ExpressionResults FunctionCaller::ExecuteFunction(
 
   FetchFunctionResults(exe_ctx, args_addr, results);
 
-  if (args_addr_ptr == NULL)
+  if (args_addr_ptr == nullptr)
     DeallocateFunctionResults(exe_ctx, args_addr);
 
   return lldb::eExpressionCompleted;
diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index 7c0379cdee8e6..34a3488578cde 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -165,8 +165,8 @@ Status IRExecutionUnit::DisassembleFunction(Stream &stream,
 
   ArchSpec arch(target->GetArchitecture());
 
-  const char *plugin_name = NULL;
-  const char *flavor_string = NULL;
+  const char *plugin_name = nullptr;
+  const char *flavor_string = nullptr;
   lldb::DisassemblerSP disassembler_sp =
       Disassembler::FindPlugin(arch, flavor_string, plugin_name);
 
@@ -251,7 +251,7 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr,
     std::string s;
     llvm::raw_string_ostream oss(s);
 
-    m_module->print(oss, NULL);
+    m_module->print(oss, nullptr);
 
     oss.flush();
 
@@ -839,7 +839,7 @@ lldb::addr_t IRExecutionUnit::FindInSymbols(
     };
 
     if (sc.module_sp) {
-      sc.module_sp->FindFunctions(spec.name, NULL, spec.mask,
+      sc.module_sp->FindFunctions(spec.name, nullptr, spec.mask,
                                   true,  // include_symbols
                                   false, // include_inlines
                                   true,  // append
diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp
index 85eb78f539de9..24a3cd24fdd27 100644
--- a/lldb/source/Expression/IRInterpreter.cpp
+++ b/lldb/source/Expression/IRInterpreter.cpp
@@ -654,7 +654,7 @@ bool IRInterpreter::Interpret(llvm::Module &module, llvm::Function &function,
     std::string s;
     raw_string_ostream oss(s);
 
-    module.print(oss, NULL);
+    module.print(oss, nullptr);
 
     oss.flush();
 
diff --git a/lldb/source/Expression/IRMemoryMap.cpp b/lldb/source/Expression/IRMemoryMap.cpp
index a8bf6733d8dbb..70e62ac12b0e8 100644
--- a/lldb/source/Expression/IRMemoryMap.cpp
+++ b/lldb/source/Expression/IRMemoryMap.cpp
@@ -263,7 +263,7 @@ ExecutionContextScope *IRMemoryMap::GetBestExecutionContextScope() const {
   if (target_sp)
     return target_sp.get();
 
-  return NULL;
+  return nullptr;
 }
 
 IRMemoryMap::Allocation::Allocation(lldb::addr_t process_alloc,
diff --git a/lldb/source/Expression/LLVMUserExpression.cpp b/lldb/source/Expression/LLVMUserExpression.cpp
index 6f5972b32bbd2..5a1b750318c9f 100644
--- a/lldb/source/Expression/LLVMUserExpression.cpp
+++ b/lldb/source/Expression/LLVMUserExpression.cpp
@@ -50,7 +50,7 @@ LLVMUserExpression::LLVMUserExpression(ExecutionContextScope &exe_scope,
       m_allow_objc(false), m_transformed_text(), m_execution_unit_sp(),
       m_materializer_up(), m_jit_module_wp(), m_enforce_valid_object(true),
       m_in_cplusplus_method(false), m_in_objectivec_method(false),
-      m_in_static_method(false), m_needs_object_ptr(false), m_target(NULL),
+      m_in_static_method(false), m_needs_object_ptr(false), m_target(nullptr),
       m_can_interpret(false), m_materialized_address(LLDB_INVALID_ADDRESS) {}
 
 LLVMUserExpression::~LLVMUserExpression() {
@@ -181,7 +181,7 @@ LLVMUserExpression::DoExecute(DiagnosticManager &diagnostic_manager,
 
       if (execution_result == lldb::eExpressionInterrupted ||
           execution_result == lldb::eExpressionHitBreakpoint) {
-        const char *error_desc = NULL;
+        const char *error_desc = nullptr;
 
         if (call_plan_sp) {
           lldb::StopInfoSP real_stop_info_sp = call_plan_sp->GetRealStopInfo();
diff --git a/lldb/source/Expression/UserExpression.cpp b/lldb/source/Expression/UserExpression.cpp
index 47aadba3578ac..a72e2a07599e5 100644
--- a/lldb/source/Expression/UserExpression.cpp
+++ b/lldb/source/Expression/UserExpression.cpp
@@ -174,7 +174,7 @@ lldb::ExpressionResults UserExpression::Evaluate(
 
   Process *process = exe_ctx.GetProcessPtr();
 
-  if (process == NULL || process->GetState() != lldb::eStateStopped) {
+  if (process == nullptr || process->GetState() != lldb::eStateStopped) {
     if (execution_policy == eExecutionPolicyAlways) {
       if (log)
         log->Printf("== [UserExpression::Evaluate] Expression may not run, but "
@@ -186,7 +186,7 @@ lldb::ExpressionResults UserExpression::Evaluate(
     }
   }
 
-  if (process == NULL || !process->CanJIT())
+  if (process == nullptr || !process->CanJIT())
     execution_policy = eExecutionPolicyNever;
 
   // We need to set the expression execution thread here, turns out parse can
@@ -375,7 +375,7 @@ lldb::ExpressionResults UserExpression::Evaluate(
     return lldb::eExpressionInterrupted;
   }
 
-  if (result_valobj_sp.get() == NULL) {
+  if (result_valobj_sp.get() == nullptr) {
     result_valobj_sp = ValueObjectConstResult::Create(
         exe_ctx.GetBestExecutionContextScope(), error);
   }
diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp
index 2b647be572ab5..d3a70aeaa3267 100644
--- a/lldb/source/Host/common/Editline.cpp
+++ b/lldb/source/Host/common/Editline.cpp
@@ -163,7 +163,7 @@ class EditlineHistory {
   // Use static GetHistory() function to get a EditlineHistorySP to one of
   // these objects
   EditlineHistory(const std::string &prefix, uint32_t size, bool unique_entries)
-      : m_history(NULL), m_event(), m_prefix(prefix), m_path() {
+      : m_history(nullptr), m_event(), m_prefix(prefix), m_path() {
     m_history = history_winit();
     history_w(m_history, &m_event, H_SETSIZE, size);
     if (unique_entries)
@@ -202,7 +202,7 @@ class EditlineHistory {
 
     if (m_history) {
       history_wend(m_history);
-      m_history = NULL;
+      m_history = nullptr;
     }
   }
 
@@ -224,7 +224,7 @@ class EditlineHistory {
     return history_sp;
   }
 
-  bool IsValid() const { return m_history != NULL; }
+  bool IsValid() const { return m_history != nullptr; }
 
   HistoryW *GetHistoryPtr() { return m_history; }
 
@@ -514,11 +514,13 @@ int Editline::GetCharacter(EditLineGetCharType *c) {
     // Read returns, immediately lock the mutex again and check if we were
     // interrupted.
     m_output_mutex.unlock();
-    int read_count = m_input_connection.Read(&ch, 1, llvm::None, status, NULL);
+    int read_count =
+        m_input_connection.Read(&ch, 1, llvm::None, status, nullptr);
     m_output_mutex.lock();
     if (m_editor_status == EditorStatus::Interrupted) {
       while (read_count > 0 && status == lldb::eConnectionStatusSuccess)
-        read_count = m_input_connection.Read(&ch, 1, llvm::None, status, NULL);
+        read_count =
+            m_input_connection.Read(&ch, 1, llvm::None, status, nullptr);
       lldbassert(status == lldb::eConnectionStatusInterrupted);
       return 0;
     }
@@ -1081,7 +1083,7 @@ void Editline::ConfigureEditor(bool multiline) {
 
   // Allow user-specific customization prior to registering bindings we
   // absolutely require
-  el_source(m_editline, NULL);
+  el_source(m_editline, nullptr);
 
   // Register an internal binding that external developers shouldn't use
   el_wset(m_editline, EL_ADDFN, EditLineConstString("lldb-revert-line"),
diff --git a/lldb/source/Host/common/File.cpp b/lldb/source/Host/common/File.cpp
index 85e625ca78f89..c8c8d7a0d496c 100644
--- a/lldb/source/Host/common/File.cpp
+++ b/lldb/source/Host/common/File.cpp
@@ -65,11 +65,11 @@ static const char *GetStreamOpenModeFromOptions(uint32_t options) {
   } else if (options & File::eOpenOptionWrite) {
     return "w";
   }
-  return NULL;
+  return nullptr;
 }
 
 int File::kInvalidDescriptor = -1;
-FILE *File::kInvalidStream = NULL;
+FILE *File::kInvalidStream = nullptr;
 
 File::~File() { Close(); }
 
@@ -634,9 +634,9 @@ size_t File::Printf(const char *format, ...) {
 size_t File::PrintfVarArg(const char *format, va_list args) {
   size_t result = 0;
   if (DescriptorIsValid()) {
-    char *s = NULL;
+    char *s = nullptr;
     result = vasprintf(&s, format, args);
-    if (s != NULL) {
+    if (s != nullptr) {
       if (result > 0) {
         size_t s_len = result;
         Write(s, s_len);
diff --git a/lldb/source/Host/common/Host.cpp b/lldb/source/Host/common/Host.cpp
index d3e41a2b044f6..be206406e93a6 100644
--- a/lldb/source/Host/common/Host.cpp
+++ b/lldb/source/Host/common/Host.cpp
@@ -112,7 +112,7 @@ HostThread Host::StartMonitoringChildProcess(
   ::snprintf(thread_name, sizeof(thread_name),
              "<lldb.host.wait4(pid=%" PRIu64 ")>", pid);
   return ThreadLauncher::LaunchThread(
-      thread_name, MonitorChildProcessThreadFunction, info_ptr, NULL);
+      thread_name, MonitorChildProcessThreadFunction, info_ptr, nullptr);
 }
 
 #ifndef __linux__
@@ -219,7 +219,7 @@ static thread_result_t MonitorChildProcessThreadFunction(void *arg) {
       bool exited = false;
       int signal = 0;
       int exit_status = 0;
-      const char *status_cstr = NULL;
+      const char *status_cstr = nullptr;
       if (WIFSTOPPED(status)) {
         signal = WSTOPSIG(status);
         status_cstr = "STOPPED";
@@ -282,7 +282,7 @@ static thread_result_t MonitorChildProcessThreadFunction(void *arg) {
   if (log)
     log->Printf("%s (arg = %p) thread exiting...", __FUNCTION__, arg);
 
-  return NULL;
+  return nullptr;
 }
 
 #endif // #if !defined (__APPLE__) && !defined (_WIN32)
@@ -393,7 +393,7 @@ const char *Host::GetSignalAsCString(int signo) {
   default:
     break;
   }
-  return NULL;
+  return nullptr;
 }
 
 #endif
diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp
index a5f876a7232af..82f519e14f17b 100644
--- a/lldb/source/Host/common/HostNativeThreadBase.cpp
+++ b/lldb/source/Host/common/HostNativeThreadBase.cpp
@@ -18,10 +18,10 @@ using namespace lldb;
 using namespace lldb_private;
 
 HostNativeThreadBase::HostNativeThreadBase()
-    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(0) {}
+    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(nullptr) {}
 
 HostNativeThreadBase::HostNativeThreadBase(thread_t thread)
-    : m_thread(thread), m_result(0) {}
+    : m_thread(thread), m_result(nullptr) {}
 
 lldb::thread_t HostNativeThreadBase::GetSystemHandle() const {
   return m_thread;
@@ -37,7 +37,7 @@ bool HostNativeThreadBase::IsJoinable() const {
 
 void HostNativeThreadBase::Reset() {
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = 0;
+  m_result = nullptr;
 }
 
 bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
@@ -47,7 +47,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
 lldb::thread_t HostNativeThreadBase::Release() {
   lldb::thread_t result = m_thread;
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = 0;
+  m_result = nullptr;
 
   return result;
 }
diff --git a/lldb/source/Host/common/OptionParser.cpp b/lldb/source/Host/common/OptionParser.cpp
index 97704988d36ad..92ff6f63d951f 100644
--- a/lldb/source/Host/common/OptionParser.cpp
+++ b/lldb/source/Host/common/OptionParser.cpp
@@ -55,11 +55,11 @@ std::string OptionParser::GetShortOptionString(struct option *long_options) {
   int i = 0;
   bool done = false;
   while (!done) {
-    if (long_options[i].name == 0 && long_options[i].has_arg == 0 &&
-        long_options[i].flag == 0 && long_options[i].val == 0) {
+    if (long_options[i].name == nullptr && long_options[i].has_arg == 0 &&
+        long_options[i].flag == nullptr && long_options[i].val == 0) {
       done = true;
     } else {
-      if (long_options[i].flag == NULL && isalpha(long_options[i].val)) {
+      if (long_options[i].flag == nullptr && isalpha(long_options[i].val)) {
         s.append(1, (char)long_options[i].val);
         switch (long_options[i].has_arg) {
         default:
diff --git a/lldb/source/Host/common/ProcessRunLock.cpp b/lldb/source/Host/common/ProcessRunLock.cpp
index 65a5dcca31fba..a931da7187665 100644
--- a/lldb/source/Host/common/ProcessRunLock.cpp
+++ b/lldb/source/Host/common/ProcessRunLock.cpp
@@ -12,7 +12,7 @@
 namespace lldb_private {
 
 ProcessRunLock::ProcessRunLock() : m_running(false) {
-  int err = ::pthread_rwlock_init(&m_rwlock, NULL);
+  int err = ::pthread_rwlock_init(&m_rwlock, nullptr);
   (void)err;
 }
 
diff --git a/lldb/source/Host/common/SocketAddress.cpp b/lldb/source/Host/common/SocketAddress.cpp
index 06171d49bf68f..882fd24558f74 100644
--- a/lldb/source/Host/common/SocketAddress.cpp
+++ b/lldb/source/Host/common/SocketAddress.cpp
@@ -236,11 +236,11 @@ SocketAddress::GetAddressInfo(const char *hostname, const char *servname,
   hints.ai_protocol = ai_protocol;
   hints.ai_flags = ai_flags;
 
-  struct addrinfo *service_info_list = NULL;
+  struct addrinfo *service_info_list = nullptr;
   int err = ::getaddrinfo(hostname, servname, &hints, &service_info_list);
   if (err == 0 && service_info_list) {
-    for (struct addrinfo *service_ptr = service_info_list; service_ptr != NULL;
-         service_ptr = service_ptr->ai_next) {
+    for (struct addrinfo *service_ptr = service_info_list;
+         service_ptr != nullptr; service_ptr = service_ptr->ai_next) {
       addr_list.emplace_back(SocketAddress(service_ptr));
     }
   }
diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp
index 3f11e4e3b49f3..f97ca9eb55b86 100644
--- a/lldb/source/Host/common/TCPSocket.cpp
+++ b/lldb/source/Host/common/TCPSocket.cpp
@@ -143,7 +143,7 @@ Status TCPSocket::Connect(llvm::StringRef name) {
     return error;
 
   auto addresses = lldb_private::SocketAddress::GetAddressInfo(
-      host_str.c_str(), NULL, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
+      host_str.c_str(), nullptr, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
   for (auto address : addresses) {
     error = CreateSocket(address.GetFamily());
     if (error.Fail())
@@ -182,7 +182,7 @@ Status TCPSocket::Listen(llvm::StringRef name, int backlog) {
   if (host_str == "*")
     host_str = "0.0.0.0";
   auto addresses = lldb_private::SocketAddress::GetAddressInfo(
-      host_str.c_str(), NULL, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
+      host_str.c_str(), nullptr, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
   for (auto address : addresses) {
     int fd = Socket::CreateSocket(address.GetFamily(), kType, IPPROTO_TCP,
                                   m_child_processes_inherit, error);
diff --git a/lldb/source/Host/common/TaskPool.cpp b/lldb/source/Host/common/TaskPool.cpp
index 062fa4abd06c0..d63d9f35d1e21 100644
--- a/lldb/source/Host/common/TaskPool.cpp
+++ b/lldb/source/Host/common/TaskPool.cpp
@@ -73,7 +73,7 @@ void TaskPoolImpl::AddTask(std::function<void()> &&task_fn) {
 
 lldb::thread_result_t TaskPoolImpl::WorkerPtr(void *pool) {
   Worker((TaskPoolImpl *)pool);
-  return 0;
+  return nullptr;
 }
 
 void TaskPoolImpl::Worker(TaskPoolImpl *pool) {
diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 9c60f0d1190cc..4b536b03d852b 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -112,7 +112,7 @@ bool TerminalState::Save(int fd, bool save_process_group) {
     m_tflags = ::fcntl(fd, F_GETFL, 0);
 #endif
 #ifdef LLDB_CONFIG_TERMIOS_SUPPORTED
-    if (m_termios_up == NULL)
+    if (m_termios_up == nullptr)
       m_termios_up.reset(new struct termios);
     int err = ::tcgetattr(fd, m_termios_up.get());
     if (err != 0)
@@ -151,7 +151,7 @@ bool TerminalState::Restore() const {
 
     if (ProcessGroupIsValid()) {
       // Save the original signal handler.
-      void (*saved_sigttou_callback)(int) = NULL;
+      void (*saved_sigttou_callback)(int) = nullptr;
       saved_sigttou_callback = (void (*)(int))signal(SIGTTOU, SIG_IGN);
       // Set the process group
       tcsetpgrp(fd, m_process_group);
@@ -177,7 +177,7 @@ bool TerminalState::TFlagsIsValid() const { return m_tflags != -1; }
 // Returns true if m_ttystate is valid
 bool TerminalState::TTYStateIsValid() const {
 #ifdef LLDB_CONFIG_TERMIOS_SUPPORTED
-  return m_termios_up != 0;
+  return m_termios_up != nullptr;
 #else
   return false;
 #endif
diff --git a/lldb/source/Host/common/ThreadLauncher.cpp b/lldb/source/Host/common/ThreadLauncher.cpp
index ede864db108fa..2eff981bfa85d 100644
--- a/lldb/source/Host/common/ThreadLauncher.cpp
+++ b/lldb/source/Host/common/ThreadLauncher.cpp
@@ -49,7 +49,7 @@ HostThread ThreadLauncher::LaunchThread(llvm::StringRef name,
   }
 #endif
 
-  pthread_attr_t *thread_attr_ptr = NULL;
+  pthread_attr_t *thread_attr_ptr = nullptr;
   pthread_attr_t thread_attr;
   bool destroy_attr = false;
   if (min_stack_byte_size > 0) {
diff --git a/lldb/source/Host/common/XML.cpp b/lldb/source/Host/common/XML.cpp
index 006b49f4e1064..cb23ac17ef53b 100644
--- a/lldb/source/Host/common/XML.cpp
+++ b/lldb/source/Host/common/XML.cpp
@@ -134,7 +134,7 @@ XMLNode XMLNode::GetChild() const {
 
 llvm::StringRef XMLNode::GetAttributeValue(const char *name,
                                            const char *fail_value) const {
-  const char *attr_value = NULL;
+  const char *attr_value = nullptr;
 #if defined(LIBXML2_DEFINED)
 
   if (IsValid())
diff --git a/lldb/source/Host/linux/Host.cpp b/lldb/source/Host/linux/Host.cpp
index 03fa798f0f5b8..f6a8766a71c56 100644
--- a/lldb/source/Host/linux/Host.cpp
+++ b/lldb/source/Host/linux/Host.cpp
@@ -216,12 +216,12 @@ uint32_t Host::FindProcesses(const ProcessInstanceInfoMatch &match_info,
 
   DIR *dirproc = opendir(procdir);
   if (dirproc) {
-    struct dirent *direntry = NULL;
+    struct dirent *direntry = nullptr;
     const uid_t our_uid = getuid();
     const lldb::pid_t our_pid = getpid();
     bool all_users = match_info.GetMatchAllUsers();
 
-    while ((direntry = readdir(dirproc)) != NULL) {
+    while ((direntry = readdir(dirproc)) != nullptr) {
       if (direntry->d_type != DT_DIR || !IsDirNumeric(direntry->d_name))
         continue;
 
@@ -269,8 +269,8 @@ bool Host::FindProcessThreads(const lldb::pid_t pid, TidMap &tids_to_attach) {
   DIR *dirproc = opendir(process_task_dir.c_str());
 
   if (dirproc) {
-    struct dirent *direntry = NULL;
-    while ((direntry = readdir(dirproc)) != NULL) {
+    struct dirent *direntry = nullptr;
+    while ((direntry = readdir(dirproc)) != nullptr) {
       if (direntry->d_type != DT_DIR || !IsDirNumeric(direntry->d_name))
         continue;
 
diff --git a/lldb/source/Host/linux/HostInfoLinux.cpp b/lldb/source/Host/linux/HostInfoLinux.cpp
index 2b2207e7afa9c..78dd77b61fa71 100644
--- a/lldb/source/Host/linux/HostInfoLinux.cpp
+++ b/lldb/source/Host/linux/HostInfoLinux.cpp
@@ -121,7 +121,8 @@ llvm::StringRef HostInfoLinux::GetDistributionId() {
 
       // retrieve the distribution id string.
       char distribution_id[256] = {'\0'};
-      if (fgets(distribution_id, sizeof(distribution_id) - 1, file) != NULL) {
+      if (fgets(distribution_id, sizeof(distribution_id) - 1, file) !=
+          nullptr) {
         if (log)
           log->Printf("distribution id command returned \"%s\"",
                       distribution_id);
diff --git a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
index 4bbebd627d44b..167569dca69e9 100644
--- a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
+++ b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
@@ -111,7 +111,7 @@ ConnectionFileDescriptor::~ConnectionFileDescriptor() {
   if (log)
     log->Printf("%p ConnectionFileDescriptor::~ConnectionFileDescriptor ()",
                 static_cast<void *>(this));
-  Disconnect(NULL);
+  Disconnect(nullptr);
   CloseCommandPipe();
 }
 
diff --git a/lldb/source/Host/posix/HostThreadPosix.cpp b/lldb/source/Host/posix/HostThreadPosix.cpp
index caa137ae3d029..d78bba517f69c 100644
--- a/lldb/source/Host/posix/HostThreadPosix.cpp
+++ b/lldb/source/Host/posix/HostThreadPosix.cpp
@@ -29,7 +29,7 @@ Status HostThreadPosix::Join(lldb::thread_result_t *result) {
     error.SetError(err, lldb::eErrorTypePOSIX);
   } else {
     if (result)
-      *result = NULL;
+      *result = nullptr;
     error.SetError(EINVAL, eErrorTypePOSIX);
   }
 
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index f6027636bd3cc..b951cce8c5984 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -1739,7 +1739,7 @@ bool CommandInterpreter::HandleCommand(const char *command_line,
     log->Printf("HandleCommand, (revised) command_string: '%s'",
                 command_string.c_str());
     const bool wants_raw_input =
-        (cmd_obj != NULL) ? cmd_obj->WantsRawCommandString() : false;
+        (cmd_obj != nullptr) ? cmd_obj->WantsRawCommandString() : false;
     log->Printf("HandleCommand, wants_raw_input:'%s'",
                 wants_raw_input ? "True" : "False");
   }
@@ -2214,7 +2214,7 @@ void CommandInterpreter::SourceInitFileHome(CommandReturnObject &result) {
 
 const char *CommandInterpreter::GetCommandPrefix() {
   const char *prefix = GetDebugger().GetIOHandlerCommandPrefix();
-  return prefix == NULL ? "" : prefix;
+  return prefix == nullptr ? "" : prefix;
 }
 
 PlatformSP CommandInterpreter::GetPlatform(bool prefer_target_platform) {
@@ -3206,7 +3206,7 @@ CommandInterpreter::ResolveCommandImpl(std::string &command_line,
   if (!scratch_command.empty())
     revised_command_line.Printf(" %s", scratch_command.c_str());
 
-  if (cmd_obj != NULL)
+  if (cmd_obj != nullptr)
     command_line = revised_command_line.GetString();
 
   return cmd_obj;
diff --git a/lldb/source/Interpreter/OptionValue.cpp b/lldb/source/Interpreter/OptionValue.cpp
index f45109f31fba5..00c8642595b72 100644
--- a/lldb/source/Interpreter/OptionValue.cpp
+++ b/lldb/source/Interpreter/OptionValue.cpp
@@ -165,13 +165,13 @@ const OptionValueFormat *OptionValue::GetAsFormat() const {
 OptionValueLanguage *OptionValue::GetAsLanguage() {
   if (GetType() == OptionValue::eTypeLanguage)
     return static_cast<OptionValueLanguage *>(this);
-  return NULL;
+  return nullptr;
 }
 
 const OptionValueLanguage *OptionValue::GetAsLanguage() const {
   if (GetType() == OptionValue::eTypeLanguage)
     return static_cast<const OptionValueLanguage *>(this);
-  return NULL;
+  return nullptr;
 }
 
 OptionValueFormatEntity *OptionValue::GetAsFormatEntity() {
@@ -520,7 +520,7 @@ lldb::OptionValueSP OptionValue::CreateValueFromCStringForTypeMask(
     value_sp.reset(new OptionValueFormat(eFormatInvalid));
     break;
   case 1u << eTypeFormatEntity:
-    value_sp.reset(new OptionValueFormatEntity(NULL));
+    value_sp.reset(new OptionValueFormatEntity(nullptr));
     break;
   case 1u << eTypeLanguage:
     value_sp.reset(new OptionValueLanguage(eLanguageTypeUnknown));
diff --git a/lldb/source/Plugins/ABI/SysV-arm/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/SysV-arm/ABISysV_arm.cpp
index c97c76c0a91d2..dd47ac7cbe3cc 100644
--- a/lldb/source/Plugins/ABI/SysV-arm/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/SysV-arm/ABISysV_arm.cpp
@@ -1739,8 +1739,8 @@ ValueObjectSP ABISysV_arm::GetReturnValueObjectImpl(
           uint32_t index = 0;
           for (index = 0; index < num_children; index++) {
             std::string name;
-            base_type =
-                compiler_type.GetFieldAtIndex(index, name, NULL, NULL, NULL);
+            base_type = compiler_type.GetFieldAtIndex(index, name, nullptr,
+                                                      nullptr, nullptr);
 
             if (base_type.IsFloatingPointType(float_count, is_complex)) {
               llvm::Optional<uint64_t> base_byte_size =
@@ -1799,7 +1799,7 @@ ValueObjectSP ABISysV_arm::GetReturnValueObjectImpl(
 
       const RegisterInfo *reg_info =
           reg_ctx->GetRegisterInfo(eRegisterKindDWARF, regnum);
-      if (reg_info == NULL)
+      if (reg_info == nullptr)
         break;
 
       RegisterValue reg_value;
diff --git a/lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
index 36261a335a8ba..44c75fc953c82 100644
--- a/lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
+++ b/lldb/source/Plugins/Disassembler/llvm/DisassemblerLLVMC.cpp
@@ -356,7 +356,7 @@ class InstructionLLVMC : public lldb_private::Instruction {
               return;
             else {
               const uint8_t *bytes = data.PeekData(offset, inst_size);
-              if (bytes == NULL)
+              if (bytes == nullptr)
                 return;
               m_opcode_name.assign(".byte");
               m_opcode.SetOpcodeBytes(bytes, inst_size);
@@ -956,7 +956,7 @@ DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
     return Instance();
 
   std::unique_ptr<llvm::MCContext> context_up(
-      new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), 0));
+      new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), nullptr));
   if (!context_up)
     return Instance();
 
@@ -1079,7 +1079,7 @@ bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
 
 DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
                                      const char *flavor_string)
-    : Disassembler(arch, flavor_string), m_exe_ctx(NULL), m_inst(NULL),
+    : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
       m_data_from_file(false) {
   if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
     m_flavor.assign("default");
@@ -1242,7 +1242,7 @@ Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
     if (disasm_up.get() && disasm_up->IsValid())
       return disasm_up.release();
   }
-  return NULL;
+  return nullptr;
 }
 
 size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
@@ -1329,7 +1329,7 @@ const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
 bool DisassemblerLLVMC::FlavorValidForArchSpec(
     const lldb_private::ArchSpec &arch, const char *flavor) {
   llvm::Triple triple = arch.GetTriple();
-  if (flavor == NULL || strcmp(flavor, "default") == 0)
+  if (flavor == nullptr || strcmp(flavor, "default") == 0)
     return true;
 
   if (triple.getArch() == llvm::Triple::x86 ||
@@ -1358,7 +1358,7 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
   if (*type_ptr) {
     if (m_exe_ctx && m_inst) {
       // std::string remove_this_prior_to_checkin;
-      Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : NULL;
+      Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
       Address value_so_addr;
       Address pc_so_addr;
       if (m_inst->UsingFileAddress()) {
@@ -1423,8 +1423,8 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
   }
 
   *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
-  *name = NULL;
-  return NULL;
+  *name = nullptr;
+  return nullptr;
 }
 
 // PluginInterface protocol
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index 760f344e5ab45..242085ac872a3 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -73,9 +73,9 @@ static constexpr OptionEnumValueElement g_kaslr_kernel_scan_enum_values[] = {
      "on 32-bit targets)."}};
 
 static constexpr PropertyDefinition g_properties[] = {
-    {"load-kexts", OptionValue::eTypeBoolean, true, true, NULL, {},
+    {"load-kexts", OptionValue::eTypeBoolean, true, true, nullptr, {},
      "Automatically loads kext images when attaching to a kernel."},
-    {"scan-type", OptionValue::eTypeEnum, true, eKASLRScanNearPC, NULL,
+    {"scan-type", OptionValue::eTypeEnum, true, eKASLRScanNearPC, nullptr,
      OptionEnumValues(g_kaslr_kernel_scan_enum_values),
      "Control how many reads lldb will make while searching for a Darwin "
      "kernel on attach."}};
@@ -99,13 +99,13 @@ class DynamicLoaderDarwinKernelProperties : public Properties {
   bool GetLoadKexts() const {
     const uint32_t idx = ePropertyLoadKexts;
     return m_collection_sp->GetPropertyAtIndexAsBoolean(
-        NULL, idx, g_properties[idx].default_uint_value != 0);
+        nullptr, idx, g_properties[idx].default_uint_value != 0);
   }
 
   KASLRScanType GetScanType() const {
     const uint32_t idx = ePropertyScanType;
     return (KASLRScanType)m_collection_sp->GetPropertyAtIndexAsEnumeration(
-        NULL, idx, g_properties[idx].default_uint_value);
+        nullptr, idx, g_properties[idx].default_uint_value);
   }
 };
 
@@ -132,7 +132,7 @@ DynamicLoader *DynamicLoaderDarwinKernel::CreateInstance(Process *process,
       ObjectFile *object_file = exe_module->GetObjectFile();
       if (object_file) {
         if (object_file->GetStrata() != ObjectFile::eStrataKernel) {
-          return NULL;
+          return nullptr;
         }
       }
     }
@@ -149,7 +149,7 @@ DynamicLoader *DynamicLoaderDarwinKernel::CreateInstance(Process *process,
     case llvm::Triple::WatchOS:
     // NEED_BRIDGEOS_TRIPLE case llvm::Triple::BridgeOS:
       if (triple_ref.getVendor() != llvm::Triple::Apple) {
-        return NULL;
+        return nullptr;
       }
       break;
     // If we have triple like armv7-unknown-unknown, we should try looking for
@@ -157,7 +157,7 @@ DynamicLoader *DynamicLoaderDarwinKernel::CreateInstance(Process *process,
     case llvm::Triple::UnknownOS:
       break;
     default:
-      return NULL;
+      return nullptr;
       break;
     }
   }
@@ -171,7 +171,7 @@ DynamicLoader *DynamicLoaderDarwinKernel::CreateInstance(Process *process,
     process->SetCanRunCode(false);
     return new DynamicLoaderDarwinKernel(process, kernel_load_address);
   }
-  return NULL;
+  return nullptr;
 }
 
 lldb::addr_t
@@ -198,11 +198,11 @@ DynamicLoaderDarwinKernel::SearchForDarwinKernel(Process *process) {
 lldb::addr_t
 DynamicLoaderDarwinKernel::SearchForKernelAtSameLoadAddr(Process *process) {
   Module *exe_module = process->GetTarget().GetExecutableModulePointer();
-  if (exe_module == NULL)
+  if (exe_module == nullptr)
     return LLDB_INVALID_ADDRESS;
 
   ObjectFile *exe_objfile = exe_module->GetObjectFile();
-  if (exe_objfile == NULL)
+  if (exe_objfile == nullptr)
     return LLDB_INVALID_ADDRESS;
 
   if (exe_objfile->GetType() != ObjectFile::eTypeExecutable ||
@@ -282,7 +282,7 @@ DynamicLoaderDarwinKernel::SearchForKernelNearPC(Process *process) {
   }
 
   ThreadSP thread = process->GetThreadList().GetSelectedThread();
-  if (thread.get() == NULL)
+  if (thread.get() == nullptr)
     return LLDB_INVALID_ADDRESS;
   addr_t pc = thread->GetRegisterContext()->GetPC(LLDB_INVALID_ADDRESS);
 
@@ -454,7 +454,7 @@ DynamicLoaderDarwinKernel::CheckForKernelImageAtAddress(lldb::addr_t addr,
       return UUID();
 
     ObjectFile *exe_objfile = memory_module_sp->GetObjectFile();
-    if (exe_objfile == NULL) {
+    if (exe_objfile == nullptr) {
       if (log)
         log->Printf("DynamicLoaderDarwinKernel::CheckForKernelImageAtAddress "
                     "found a binary at 0x%" PRIx64
@@ -541,7 +541,7 @@ void DynamicLoaderDarwinKernel::Clear(bool clear_process) {
     m_process->ClearBreakpointSiteByID(m_break_id);
 
   if (clear_process)
-    m_process = NULL;
+    m_process = nullptr;
   m_kernel.Clear();
   m_known_kexts.clear();
   m_kext_summary_header_ptr_addr.Clear();
@@ -638,7 +638,7 @@ UUID DynamicLoaderDarwinKernel::KextImageInfo::GetUUID() const {
 bool DynamicLoaderDarwinKernel::KextImageInfo::ReadMemoryModule(
     Process *process) {
   Log *log = lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_HOST);
-  if (m_memory_module_sp.get() != NULL)
+  if (m_memory_module_sp.get() != nullptr)
     return true;
   if (m_load_address == LLDB_INVALID_ADDRESS)
     return false;
@@ -658,7 +658,7 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::ReadMemoryModule(
   ModuleSP memory_module_sp =
       process->ReadModuleFromMemory(file_spec, m_load_address, size_to_read);
 
-  if (memory_module_sp.get() == NULL)
+  if (memory_module_sp.get() == nullptr)
     return false;
 
   bool is_kernel = false;
@@ -806,9 +806,9 @@ bool DynamicLoaderDarwinKernel::KextImageInfo::LoadImageUsingMemoryModule(
           FileSpec kext_filespec(m_name.c_str());
 	  FileSpecList search_paths = target.GetExecutableSearchPaths();
           kext_bundle_module_spec.GetFileSpec() = kext_filespec;
-          platform_sp->GetSharedModule(
-              kext_bundle_module_spec, process, m_module_sp,
-              &search_paths, NULL, NULL);
+          platform_sp->GetSharedModule(kext_bundle_module_spec, process,
+                                       m_module_sp, &search_paths, nullptr,
+                                       nullptr);
         }
       }
 
@@ -1378,7 +1378,7 @@ uint32_t DynamicLoaderDarwinKernel::ReadKextSummaries(
       lldb::offset_t offset = kext_summary_offset;
       const void *name_data =
           extractor.GetData(&offset, KERNEL_MODULE_MAX_NAME);
-      if (name_data == NULL)
+      if (name_data == nullptr)
         break;
       image_infos[i].SetName((const char *)name_data);
       UUID uuid = UUID::fromOptionalData(extractor.GetData(&offset, 16), 16);
@@ -1426,7 +1426,7 @@ void DynamicLoaderDarwinKernel::KextImageInfo::PutToLog(Log *log) const {
 // Dump the _dyld_all_image_infos members and all current image infos that we
 // have parsed to the file handle provided.
 void DynamicLoaderDarwinKernel::PutToLog(Log *log) const {
-  if (log == NULL)
+  if (log == nullptr)
     return;
 
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
@@ -1464,7 +1464,7 @@ void DynamicLoaderDarwinKernel::SetNotificationBreakpointIfNeeded() {
     module_spec_list.Append(m_kernel.GetModule()->GetFileSpec());
     Breakpoint *bp =
         m_process->GetTarget()
-            .CreateBreakpoint(&module_spec_list, NULL,
+            .CreateBreakpoint(&module_spec_list, nullptr,
                               "OSKextLoadedKextSummariesUpdated",
                               eFunctionNameTypeFull, eLanguageTypeUnknown, 0,
                               skip_prologue, internal_bp, hardware)
diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
index 0fb05e99a072e..23c8416f49860 100644
--- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp
@@ -104,7 +104,7 @@ DynamicLoader *DynamicLoaderHexagonDYLD::CreateInstance(Process *process,
 
   if (create)
     return new DynamicLoaderHexagonDYLD(process);
-  return NULL;
+  return nullptr;
 }
 
 DynamicLoaderHexagonDYLD::DynamicLoaderHexagonDYLD(Process *process)
@@ -420,7 +420,7 @@ DynamicLoaderHexagonDYLD::GetStepThroughTrampolinePlan(Thread &thread,
   const SymbolContext &context = frame->GetSymbolContext(eSymbolContextSymbol);
   Symbol *sym = context.symbol;
 
-  if (sym == NULL || !sym->IsTrampoline())
+  if (sym == nullptr || !sym->IsTrampoline())
     return thread_plan_sp;
 
   const ConstString sym_name = sym->GetMangled().GetName(
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index 7475b1a87aad5..339aeaec50f78 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -84,7 +84,7 @@ void DynamicLoaderDarwin::DidLaunch() {
 void DynamicLoaderDarwin::Clear(bool clear_process) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
   if (clear_process)
-    m_process = NULL;
+    m_process = nullptr;
   m_dyld_image_infos.clear();
   m_dyld_image_infos_stop_id = UINT32_MAX;
   m_dyld.Clear(false);
@@ -115,7 +115,7 @@ ModuleSP DynamicLoaderDarwin::FindTargetModuleForImageInfo(
       // We'll call Target::ModulesDidLoad after all the modules have been
       // added to the target, don't let it be called for every one.
       module_sp = target.GetOrCreateModule(module_spec, false /* notify */);
-      if (!module_sp || module_sp->GetObjectFile() == NULL)
+      if (!module_sp || module_sp->GetObjectFile() == nullptr)
         module_sp = m_process->ReadModuleFromMemory(image_info.file_spec,
                                                     image_info.address);
 
@@ -533,8 +533,8 @@ void DynamicLoaderDarwin::UpdateSpecialBinariesFromNewImageInfos(
 
   if (exe_idx != UINT32_MAX) {
     const bool can_create = true;
-    ModuleSP exe_module_sp(
-        FindTargetModuleForImageInfo(image_infos[exe_idx], can_create, NULL));
+    ModuleSP exe_module_sp(FindTargetModuleForImageInfo(image_infos[exe_idx],
+                                                        can_create, nullptr));
     if (exe_module_sp) {
       if (log)
         log->Printf("Found executable module: %s",
@@ -549,8 +549,8 @@ void DynamicLoaderDarwin::UpdateSpecialBinariesFromNewImageInfos(
 
   if (dyld_idx != UINT32_MAX) {
     const bool can_create = true;
-    ModuleSP dyld_sp =
-        FindTargetModuleForImageInfo(image_infos[dyld_idx], can_create, NULL);
+    ModuleSP dyld_sp = FindTargetModuleForImageInfo(image_infos[dyld_idx],
+                                                    can_create, nullptr);
     if (dyld_sp.get()) {
       if (log)
         log->Printf("Found dyld module: %s",
@@ -567,7 +567,7 @@ void DynamicLoaderDarwin::UpdateDYLDImageInfoFromNewImageInfo(
   if (image_info.header.filetype == llvm::MachO::MH_DYLINKER) {
     const bool can_create = true;
     ModuleSP dyld_sp =
-        FindTargetModuleForImageInfo(image_info, can_create, NULL);
+        FindTargetModuleForImageInfo(image_info, can_create, nullptr);
     if (dyld_sp.get()) {
       Target &target = m_process->GetTarget();
       target.GetImages().AppendIfNeeded(dyld_sp);
@@ -605,7 +605,7 @@ bool DynamicLoaderDarwin::AddModulesUsingImageInfos(
     m_dyld_image_infos.push_back(image_infos[idx]);
 
     ModuleSP image_module_sp(
-        FindTargetModuleForImageInfo(image_infos[idx], true, NULL));
+        FindTargetModuleForImageInfo(image_infos[idx], true, nullptr));
 
     if (image_module_sp) {
       ObjectFile *objfile = image_module_sp->GetObjectFile();
@@ -628,7 +628,7 @@ bool DynamicLoaderDarwin::AddModulesUsingImageInfos(
               commpage_image_module_sp = target.GetOrCreateModule(module_spec, 
                                                                true /* notify */);
               if (!commpage_image_module_sp ||
-                  commpage_image_module_sp->GetObjectFile() == NULL) {
+                  commpage_image_module_sp->GetObjectFile() == nullptr) {
                 commpage_image_module_sp = m_process->ReadModuleFromMemory(
                     image_infos[idx].file_spec, image_infos[idx].address);
                 // Always load a memory image right away in the target in case
@@ -686,15 +686,16 @@ bool DynamicLoaderDarwin::AlwaysRelyOnEHUnwindInfo(SymbolContext &sym_ctx) {
   if (sym_ctx.symbol) {
     module_sp = sym_ctx.symbol->GetAddressRef().GetModule();
   }
-  if (module_sp.get() == NULL && sym_ctx.function) {
+  if (module_sp.get() == nullptr && sym_ctx.function) {
     module_sp =
         sym_ctx.function->GetAddressRange().GetBaseAddress().GetModule();
   }
-  if (module_sp.get() == NULL)
+  if (module_sp.get() == nullptr)
     return false;
 
   ObjCLanguageRuntime *objc_runtime = m_process->GetObjCLanguageRuntime();
-  return objc_runtime != NULL && objc_runtime->IsModuleObjCLibrary(module_sp);
+  return objc_runtime != nullptr &&
+         objc_runtime->IsModuleObjCLibrary(module_sp);
 }
 
 // Dump a Segment to the file handle provided.
@@ -719,7 +720,7 @@ DynamicLoaderDarwin::ImageInfo::FindSegment(ConstString name) const {
     if (segments[i].name == name)
       return &segments[i];
   }
-  return NULL;
+  return nullptr;
 }
 
 // Dump an image info structure to the file handle provided.
@@ -791,7 +792,7 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));
   TargetSP target_sp(thread.CalculateTarget());
 
-  if (current_symbol != NULL) {
+  if (current_symbol != nullptr) {
     std::vector<Address> addresses;
 
     if (current_symbol->IsTrampoline()) {
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOS.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOS.cpp
index 29e4ac0654ab0..6bc65ecef35dd 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOS.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOS.cpp
@@ -68,7 +68,7 @@ DynamicLoader *DynamicLoaderMacOS::CreateInstance(Process *process,
 
   if (create)
     return new DynamicLoaderMacOS(process);
-  return NULL;
+  return nullptr;
 }
 
 // Constructor
@@ -334,7 +334,7 @@ void DynamicLoaderMacOS::AddBinaries(
 // Dump the _dyld_all_image_infos members and all current image infos that we
 // have parsed to the file handle provided.
 void DynamicLoaderMacOS::PutToLog(Log *log) const {
-  if (log == NULL)
+  if (log == nullptr)
     return;
 }
 
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
index 9c0125a6db045..7d00380bfcd45 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
@@ -88,7 +88,7 @@ DynamicLoader *DynamicLoaderMacOSXDYLD::CreateInstance(Process *process,
 
   if (create)
     return new DynamicLoaderMacOSXDYLD(process);
-  return NULL;
+  return nullptr;
 }
 
 // Constructor
@@ -608,7 +608,7 @@ bool DynamicLoaderMacOSXDYLD::RemoveModulesUsingImageInfosAddress(
         // We'll remove them all at one go later on.
 
         ModuleSP unload_image_module_sp(
-            FindTargetModuleForImageInfo(image_infos[idx], false, NULL));
+            FindTargetModuleForImageInfo(image_infos[idx], false, nullptr));
         if (unload_image_module_sp.get()) {
           // When we unload, be sure to use the image info from the old list,
           // since that has sections correctly filled in.
@@ -794,7 +794,7 @@ bool DynamicLoaderMacOSXDYLD::ReadMachHeader(lldb::addr_t addr,
     if (data.GetU32(&offset, &header->cputype,
                     (sizeof(llvm::MachO::mach_header) / sizeof(uint32_t)) -
                         1)) {
-      if (load_command_data == NULL)
+      if (load_command_data == nullptr)
         return true; // We were able to read the mach_header and weren't asked
                      // to read the load command bytes
 
@@ -922,7 +922,7 @@ void DynamicLoaderMacOSXDYLD::UpdateImageInfosHeaderAndLoadCommands(
                           &data))
         continue;
 
-      ParseLoadCommands(data, image_infos[i], NULL);
+      ParseLoadCommands(data, image_infos[i], nullptr);
 
       if (image_infos[i].header.filetype == llvm::MachO::MH_EXECUTE)
         exe_idx = i;
@@ -933,8 +933,8 @@ void DynamicLoaderMacOSXDYLD::UpdateImageInfosHeaderAndLoadCommands(
 
   if (exe_idx < image_infos.size()) {
     const bool can_create = true;
-    ModuleSP exe_module_sp(
-        FindTargetModuleForImageInfo(image_infos[exe_idx], can_create, NULL));
+    ModuleSP exe_module_sp(FindTargetModuleForImageInfo(image_infos[exe_idx],
+                                                        can_create, nullptr));
 
     if (exe_module_sp) {
       UpdateImageLoadAddress(exe_module_sp.get(), image_infos[exe_idx]);
@@ -969,7 +969,7 @@ void DynamicLoaderMacOSXDYLD::UpdateImageInfosHeaderAndLoadCommands(
 // Dump the _dyld_all_image_infos members and all current image infos that we
 // have parsed to the file handle provided.
 void DynamicLoaderMacOSXDYLD::PutToLog(Log *log) const {
-  if (log == NULL)
+  if (log == nullptr)
     return;
 
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
index 31ab9faca705a..587b2d36accf2 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
@@ -68,7 +68,7 @@ DynamicLoader *DynamicLoaderPOSIXDYLD::CreateInstance(Process *process,
 
   if (create)
     return new DynamicLoaderPOSIXDYLD(process);
-  return NULL;
+  return nullptr;
 }
 
 DynamicLoaderPOSIXDYLD::DynamicLoaderPOSIXDYLD(Process *process)
@@ -463,7 +463,7 @@ DynamicLoaderPOSIXDYLD::GetStepThroughTrampolinePlan(Thread &thread,
   const SymbolContext &context = frame->GetSymbolContext(eSymbolContextSymbol);
   Symbol *sym = context.symbol;
 
-  if (sym == NULL || !sym->IsTrampoline())
+  if (sym == nullptr || !sym->IsTrampoline())
     return thread_plan_sp;
 
   ConstString sym_name = sym->GetName();
@@ -638,7 +638,7 @@ addr_t DynamicLoaderPOSIXDYLD::GetEntryPoint() {
   if (m_entry_point != LLDB_INVALID_ADDRESS)
     return m_entry_point;
 
-  if (m_auxv == NULL)
+  if (m_auxv == nullptr)
     return LLDB_INVALID_ADDRESS;
 
   AuxVector::iterator I = m_auxv->FindEntry(AuxVector::AUXV_AT_ENTRY);
diff --git a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
index 88ca2e2de01c9..6bc951c4d35b2 100644
--- a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp
@@ -43,7 +43,7 @@ DynamicLoader *DynamicLoaderStatic::CreateInstance(Process *process,
 
   if (create)
     return new DynamicLoaderStatic(process);
-  return NULL;
+  return nullptr;
 }
 
 // Constructor
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTDumper.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTDumper.cpp
index 3fecb0c7da929..369f88327dd9d 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ASTDumper.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTDumper.cpp
@@ -85,7 +85,7 @@ void ASTDumper::ToLog(Log *log, const char *prefix) {
 
   memcpy(str, m_dump.c_str(), len);
 
-  char *end = NULL;
+  char *end = nullptr;
 
   end = strchr(str, '\n');
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
index 71e130708976e..5dc39e8dac4d0 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
@@ -34,8 +34,9 @@ using namespace lldb_private;
 
 ASTResultSynthesizer::ASTResultSynthesizer(ASTConsumer *passthrough,
                                            bool top_level, Target &target)
-    : m_ast_context(NULL), m_passthrough(passthrough), m_passthrough_sema(NULL),
-      m_target(target), m_sema(NULL), m_top_level(top_level) {
+    : m_ast_context(nullptr), m_passthrough(passthrough),
+      m_passthrough_sema(nullptr), m_target(target), m_sema(nullptr),
+      m_top_level(top_level) {
   if (!m_passthrough)
     return;
 
@@ -311,7 +312,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body,
                 (is_lvalue ? "lvalue" : "rvalue"), s.c_str());
   }
 
-  clang::VarDecl *result_decl = NULL;
+  clang::VarDecl *result_decl = nullptr;
 
   if (is_lvalue) {
     IdentifierInfo *result_ptr_id;
@@ -329,14 +330,14 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body,
 
     QualType ptr_qual_type;
 
-    if (expr_qual_type->getAs<ObjCObjectType>() != NULL)
+    if (expr_qual_type->getAs<ObjCObjectType>() != nullptr)
       ptr_qual_type = Ctx.getObjCObjectPointerType(expr_qual_type);
     else
       ptr_qual_type = Ctx.getPointerType(expr_qual_type);
 
     result_decl =
         VarDecl::Create(Ctx, DC, SourceLocation(), SourceLocation(),
-                        result_ptr_id, ptr_qual_type, NULL, SC_Static);
+                        result_ptr_id, ptr_qual_type, nullptr, SC_Static);
 
     if (!result_decl)
       return false;
@@ -350,8 +351,9 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body,
   } else {
     IdentifierInfo &result_id = Ctx.Idents.get("$__lldb_expr_result");
 
-    result_decl = VarDecl::Create(Ctx, DC, SourceLocation(), SourceLocation(),
-                                  &result_id, expr_qual_type, NULL, SC_Static);
+    result_decl =
+        VarDecl::Create(Ctx, DC, SourceLocation(), SourceLocation(), &result_id,
+                        expr_qual_type, nullptr, SC_Static);
 
     if (!result_decl)
       return false;
@@ -507,7 +509,7 @@ void ASTResultSynthesizer::InitializeSema(Sema &S) {
 }
 
 void ASTResultSynthesizer::ForgetSema() {
-  m_sema = NULL;
+  m_sema = nullptr;
 
   if (m_passthrough_sema)
     m_passthrough_sema->ForgetSema();
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp
index 1d8e6931cb9ad..190eacaa2b62b 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp
@@ -29,9 +29,9 @@ using namespace lldb_private;
 ASTStructExtractor::ASTStructExtractor(ASTConsumer *passthrough,
                                        const char *struct_name,
                                        ClangFunctionCaller &function)
-    : m_ast_context(NULL), m_passthrough(passthrough), m_passthrough_sema(NULL),
-      m_sema(NULL), m_action(NULL), m_function(function),
-      m_struct_name(struct_name) {
+    : m_ast_context(nullptr), m_passthrough(passthrough),
+      m_passthrough_sema(nullptr), m_sema(nullptr), m_action(nullptr),
+      m_function(function), m_struct_name(struct_name) {
   if (!m_passthrough)
     return;
 
@@ -57,7 +57,7 @@ void ASTStructExtractor::ExtractFromFunctionDecl(FunctionDecl *F) {
   if (!body_compound_stmt)
     return; // do we have to handle this?
 
-  RecordDecl *struct_decl = NULL;
+  RecordDecl *struct_decl = nullptr;
 
   StringRef desired_name(m_struct_name);
 
@@ -177,8 +177,8 @@ void ASTStructExtractor::InitializeSema(Sema &S) {
 }
 
 void ASTStructExtractor::ForgetSema() {
-  m_sema = NULL;
-  m_action = NULL;
+  m_sema = nullptr;
+  m_action = nullptr;
 
   if (m_passthrough_sema)
     m_passthrough_sema->ForgetSema();
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index 92191793d19e9..632594f1c460a 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -51,7 +51,7 @@ class ScopedLexicalDeclEraser {
 
 ClangASTSource::ClangASTSource(const lldb::TargetSP &target)
     : m_import_in_progress(false), m_lookups_enabled(false), m_target(target),
-      m_ast_context(NULL), m_active_lexical_decls(), m_active_lookups() {
+      m_ast_context(nullptr), m_active_lexical_decls(), m_active_lookups() {
   if (!target->GetUseModernTypeLookup()) {
     m_ast_importer_sp = m_target->GetClangASTImporter();
   }
@@ -441,8 +441,8 @@ void ClangASTSource::CompleteType(clang::ObjCInterfaceDecl *interface_decl) {
     return;
   }
 
-  Decl *original_decl = NULL;
-  ASTContext *original_ctx = NULL;
+  Decl *original_decl = nullptr;
+  ASTContext *original_ctx = nullptr;
 
   if (m_ast_importer_sp->ResolveDeclOrigin(interface_decl, &original_decl,
                                            &original_ctx)) {
@@ -475,12 +475,12 @@ clang::ObjCInterfaceDecl *ClangASTSource::GetCompleteObjCInterface(
   lldb::ProcessSP process(m_target->GetProcessSP());
 
   if (!process)
-    return NULL;
+    return nullptr;
 
   ObjCLanguageRuntime *language_runtime(process->GetObjCLanguageRuntime());
 
   if (!language_runtime)
-    return NULL;
+    return nullptr;
 
   ConstString class_name(interface_decl->getNameAsString().c_str());
 
@@ -488,7 +488,7 @@ clang::ObjCInterfaceDecl *ClangASTSource::GetCompleteObjCInterface(
       language_runtime->LookupInCompleteClassCache(class_name));
 
   if (!complete_type_sp)
-    return NULL;
+    return nullptr;
 
   TypeFromUser complete_type =
       TypeFromUser(complete_type_sp->GetFullCompilerType());
@@ -496,7 +496,7 @@ clang::ObjCInterfaceDecl *ClangASTSource::GetCompleteObjCInterface(
       complete_type.GetOpaqueQualType();
 
   if (!complete_opaque_type)
-    return NULL;
+    return nullptr;
 
   const clang::Type *complete_clang_type =
       QualType::getFromOpaquePtr(complete_opaque_type).getTypePtr();
@@ -504,7 +504,7 @@ clang::ObjCInterfaceDecl *ClangASTSource::GetCompleteObjCInterface(
       dyn_cast<ObjCInterfaceType>(complete_clang_type);
 
   if (!complete_interface_type)
-    return NULL;
+    return nullptr;
 
   ObjCInterfaceDecl *complete_iface_decl(complete_interface_type->getDecl());
 
@@ -569,8 +569,8 @@ void ClangASTSource::FindExternalLexicalDecls(
           current_id, static_cast<const void *>(m_ast_context));
   }
 
-  Decl *original_decl = NULL;
-  ASTContext *original_ctx = NULL;
+  Decl *original_decl = nullptr;
+  ASTContext *original_ctx = nullptr;
 
   if (!m_ast_importer_sp->ResolveDeclOrigin(context_decl, &original_decl,
                                             &original_ctx))
@@ -993,9 +993,9 @@ void ClangASTSource::FindExternalVisibleDecls(
 
 template <class D> class TaggedASTDecl {
 public:
-  TaggedASTDecl() : decl(NULL) {}
+  TaggedASTDecl() : decl(nullptr) {}
   TaggedASTDecl(D *_decl) : decl(_decl) {}
-  bool IsValid() const { return (decl != NULL); }
+  bool IsValid() const { return (decl != nullptr); }
   bool IsInvalid() const { return !IsValid(); }
   D *operator->() const { return decl; }
   D *decl;
@@ -1028,7 +1028,7 @@ template <class D> class DeclFromUser : public TaggedASTDecl<D> {
 template <class D>
 DeclFromUser<D> DeclFromParser<D>::GetOrigin(ClangASTSource &source) {
   DeclFromUser<> origin_decl;
-  source.ResolveDeclOrigin(this->decl, &origin_decl.decl, NULL);
+  source.ResolveDeclOrigin(this->decl, &origin_decl.decl, nullptr);
   if (origin_decl.IsInvalid())
     return DeclFromUser<D>();
   return DeclFromUser<D>(dyn_cast<D>(origin_decl.decl));
@@ -1158,8 +1158,8 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
     return;
 
   do {
-    Decl *original_decl = NULL;
-    ASTContext *original_ctx = NULL;
+    Decl *original_decl = nullptr;
+    ASTContext *original_ctx = nullptr;
 
     m_ast_importer_sp->ResolveDeclOrigin(interface_decl, &original_decl,
                                          &original_ctx);
@@ -2063,12 +2063,12 @@ clang::NamedDecl *NameSearchContext::AddVarDecl(const CompilerType &type) {
   assert(type && "Type for variable must be valid!");
 
   if (!type.IsValid())
-    return NULL;
+    return nullptr;
 
   ClangASTContext *lldb_ast =
       llvm::dyn_cast<ClangASTContext>(type.GetTypeSystem());
   if (!lldb_ast)
-    return NULL;
+    return nullptr;
 
   IdentifierInfo *ii = m_decl_name.getAsIdentifierInfo();
 
@@ -2076,7 +2076,7 @@ clang::NamedDecl *NameSearchContext::AddVarDecl(const CompilerType &type) {
 
   clang::NamedDecl *Decl = VarDecl::Create(
       *ast, const_cast<DeclContext *>(m_decl_context), SourceLocation(),
-      SourceLocation(), ii, ClangUtil::GetQualType(type), 0, SC_Static);
+      SourceLocation(), ii, ClangUtil::GetQualType(type), nullptr, SC_Static);
   m_decls.push_back(Decl);
 
   return Decl;
@@ -2087,15 +2087,15 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
   assert(type && "Type for variable must be valid!");
 
   if (!type.IsValid())
-    return NULL;
+    return nullptr;
 
   if (m_function_types.count(type))
-    return NULL;
+    return nullptr;
 
   ClangASTContext *lldb_ast =
       llvm::dyn_cast<ClangASTContext>(type.GetTypeSystem());
   if (!lldb_ast)
-    return NULL;
+    return nullptr;
 
   m_function_types.insert(type);
 
@@ -2124,7 +2124,7 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
 
   clang::FunctionDecl *func_decl = FunctionDecl::Create(
       *ast, context, SourceLocation(), SourceLocation(), decl_name, qual_type,
-      NULL, SC_Extern, isInlineSpecified, hasWrittenPrototype,
+      nullptr, SC_Extern, isInlineSpecified, hasWrittenPrototype,
       isConstexprSpecified);
 
   // We have to do more than just synthesize the FunctionDecl.  We have to
@@ -2143,9 +2143,10 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
     for (ArgIndex = 0; ArgIndex < NumArgs; ++ArgIndex) {
       QualType arg_qual_type(func_proto_type->getParamType(ArgIndex));
 
-      parm_var_decls.push_back(ParmVarDecl::Create(
-          *ast, const_cast<DeclContext *>(context), SourceLocation(),
-          SourceLocation(), NULL, arg_qual_type, NULL, SC_Static, NULL));
+      parm_var_decls.push_back(
+          ParmVarDecl::Create(*ast, const_cast<DeclContext *>(context),
+                              SourceLocation(), SourceLocation(), nullptr,
+                              arg_qual_type, nullptr, SC_Static, nullptr));
     }
 
     func_decl->setParams(ArrayRef<ParmVarDecl *>(parm_var_decls));
@@ -2166,7 +2167,7 @@ clang::NamedDecl *NameSearchContext::AddFunDecl(const CompilerType &type,
       ClangASTContext::IsOperator(decl_name.getAsString().c_str(), op_kind)) {
     if (!ClangASTContext::CheckOverloadedOperatorKindParameterCount(
             false, op_kind, func_proto_type->getNumParams()))
-      return NULL;
+      return nullptr;
   }
   m_decls.push_back(func_decl);
 
@@ -2214,7 +2215,7 @@ NameSearchContext::AddTypeDecl(const CompilerType &clang_type) {
       return (NamedDecl *)interface_decl;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 void NameSearchContext::AddLookupResult(clang::DeclContextLookupResult result) {
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h
index 624c86a51740d..7a8bacf48a8fd 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h
@@ -45,15 +45,15 @@ class ClangASTSource : public ClangExternalASTSourceCommon,
   ~ClangASTSource() override;
 
   /// Interface stubs.
-  clang::Decl *GetExternalDecl(uint32_t) override { return NULL; }
-  clang::Stmt *GetExternalDeclStmt(uint64_t) override { return NULL; }
+  clang::Decl *GetExternalDecl(uint32_t) override { return nullptr; }
+  clang::Stmt *GetExternalDeclStmt(uint64_t) override { return nullptr; }
   clang::Selector GetExternalSelector(uint32_t) override {
     return clang::Selector();
   }
   uint32_t GetNumExternalSelectors() override { return 0; }
   clang::CXXBaseSpecifier *
   GetExternalCXXBaseSpecifiers(uint64_t Offset) override {
-    return NULL;
+    return nullptr;
   }
   void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; }
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 83cbe517643a1..496d5b40e3e79 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -367,7 +367,7 @@ bool ClangExpressionDeclMap::AddPersistentVariable(const NamedDecl *decl,
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
   ExecutionContext &exe_ctx = m_parser_vars->m_exe_ctx;
   Target *target = exe_ctx.GetTargetPtr();
-  if (target == NULL)
+  if (target == nullptr)
     return false;
 
   ClangASTContext *context(target->GetScratchClangASTContext());
@@ -951,7 +951,7 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
 
       // Clang is looking for the type of "this"
 
-      if (frame == NULL)
+      if (frame == nullptr)
         return;
 
       // Find the block that defines the function represented by "sym_ctx"
@@ -1277,7 +1277,8 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
       }
     }
     if (target) {
-      var = FindGlobalVariable(*target, module_sp, name, &namespace_decl, NULL);
+      var = FindGlobalVariable(*target, module_sp, name, &namespace_decl,
+                               nullptr);
 
       if (var) {
         valobj = ValueObjectVariable::Create(target, var);
@@ -1434,8 +1435,8 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
     }
 
     if (sc_list.GetSize()) {
-      Symbol *extern_symbol = NULL;
-      Symbol *non_extern_symbol = NULL;
+      Symbol *extern_symbol = nullptr;
+      Symbol *non_extern_symbol = nullptr;
 
       for (uint32_t index = 0, num_indices = sc_list.GetSize();
            index < num_indices; ++index) {
@@ -1452,13 +1453,13 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
           if (decl_ctx.IsClassMethod(nullptr, nullptr, nullptr))
             continue;
 
-          AddOneFunction(context, sym_ctx.function, NULL, current_id);
+          AddOneFunction(context, sym_ctx.function, nullptr, current_id);
           context.m_found.function_with_type_info = true;
           context.m_found.function = true;
         } else if (sym_ctx.symbol) {
           if (sym_ctx.symbol->GetType() == eSymbolTypeReExported && target) {
             sym_ctx.symbol = sym_ctx.symbol->ResolveReExportedSymbol(*target);
-            if (sym_ctx.symbol == NULL)
+            if (sym_ctx.symbol == nullptr)
               continue;
           }
 
@@ -1484,10 +1485,10 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
 
       if (!context.m_found.function_with_type_info) {
         if (extern_symbol) {
-          AddOneFunction(context, NULL, extern_symbol, current_id);
+          AddOneFunction(context, nullptr, extern_symbol, current_id);
           context.m_found.function = true;
         } else if (non_extern_symbol) {
-          AddOneFunction(context, NULL, non_extern_symbol, current_id);
+          AddOneFunction(context, nullptr, non_extern_symbol, current_id);
           context.m_found.function = true;
         }
       }
@@ -1723,7 +1724,7 @@ void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context,
 
   bool is_reference = pt.IsReferenceType();
 
-  NamedDecl *var_decl = NULL;
+  NamedDecl *var_decl = nullptr;
   if (is_reference)
     var_decl = context.AddVarDecl(pt);
   else
@@ -1740,7 +1741,7 @@ void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context,
       entity->GetParserVars(GetParserID());
   parser_vars->m_parser_type = pt;
   parser_vars->m_named_decl = var_decl;
-  parser_vars->m_llvm_value = NULL;
+  parser_vars->m_llvm_value = nullptr;
   parser_vars->m_lldb_value = var_location;
   parser_vars->m_lldb_var = var;
 
@@ -1783,7 +1784,7 @@ void ClangExpressionDeclMap::AddOneVariable(NameSearchContext &context,
           ->GetParserVars(GetParserID());
   parser_vars->m_parser_type = parser_type;
   parser_vars->m_named_decl = var_decl;
-  parser_vars->m_llvm_value = NULL;
+  parser_vars->m_llvm_value = nullptr;
   parser_vars->m_lldb_value.Clear();
 
   if (log) {
@@ -1802,7 +1803,7 @@ void ClangExpressionDeclMap::AddOneGenericVariable(NameSearchContext &context,
 
   Target *target = m_parser_vars->m_exe_ctx.GetTargetPtr();
 
-  if (target == NULL)
+  if (target == nullptr)
     return;
 
   ASTContext *scratch_ast_context =
@@ -1841,7 +1842,7 @@ void ClangExpressionDeclMap::AddOneGenericVariable(NameSearchContext &context,
 
   parser_vars->m_parser_type = parser_type;
   parser_vars->m_named_decl = var_decl;
-  parser_vars->m_llvm_value = NULL;
+  parser_vars->m_llvm_value = nullptr;
   parser_vars->m_lldb_sym = &symbol;
 
   if (log) {
@@ -1889,7 +1890,7 @@ bool ClangExpressionDeclMap::ResolveUnknownTypes() {
           var_type.getAsOpaquePtr(),
           ClangASTContext::GetASTContext(&var_decl->getASTContext()));
 
-      lldb::opaque_compiler_type_t copied_type = 0;
+      lldb::opaque_compiler_type_t copied_type = nullptr;
       if (m_ast_importer_sp) {
         copied_type = m_ast_importer_sp->CopyType(
             scratch_ast_context->getASTContext(), &var_decl->getASTContext(),
@@ -1962,7 +1963,7 @@ void ClangExpressionDeclMap::AddOneRegister(NameSearchContext &context,
       entity->GetParserVars(GetParserID());
   parser_vars->m_parser_type = parser_clang_type;
   parser_vars->m_named_decl = var_decl;
-  parser_vars->m_llvm_value = NULL;
+  parser_vars->m_llvm_value = nullptr;
   parser_vars->m_lldb_value.Clear();
   entity->m_flags |= ClangExpressionVariable::EVBareRegister;
 
@@ -1981,7 +1982,7 @@ void ClangExpressionDeclMap::AddOneFunction(NameSearchContext &context,
 
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
 
-  NamedDecl *function_decl = NULL;
+  NamedDecl *function_decl = nullptr;
   Address fun_address;
   CompilerType function_clang_type;
 
@@ -2141,7 +2142,7 @@ void ClangExpressionDeclMap::AddOneFunction(NameSearchContext &context,
   }
 
   parser_vars->m_named_decl = function_decl;
-  parser_vars->m_llvm_value = NULL;
+  parser_vars->m_llvm_value = nullptr;
 
   if (log) {
     std::string function_str =
@@ -2194,7 +2195,7 @@ void ClangExpressionDeclMap::AddThisType(NameSearchContext &context,
     CXXMethodDecl *method_decl =
         ClangASTContext::GetASTContext(m_ast_context)
             ->AddMethodToCXXRecordType(
-                copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", NULL,
+                copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", nullptr,
                 method_type, lldb::eAccessPublic, is_virtual, is_static,
                 is_inline, is_explicit, is_attr_used, is_artificial);
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
index a7e2ced2c4563..03b73e6be391f 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.h
@@ -242,9 +242,8 @@ class ClangExpressionDeclMap : public ClangASTSource {
   /// \return
   ///     Valid load address for the symbol
   lldb::addr_t GetSymbolAddress(Target &target, Process *process,
-                                ConstString name,
-                                lldb::SymbolType symbol_type,
-                                Module *module = NULL);
+                                ConstString name, lldb::SymbolType symbol_type,
+                                Module *module = nullptr);
 
   lldb::addr_t GetSymbolAddress(ConstString name,
                                 lldb::SymbolType symbol_type);
@@ -331,7 +330,7 @@ class ClangExpressionDeclMap : public ClangASTSource {
         return m_exe_ctx.GetTargetPtr();
       else if (m_sym_ctx.target_sp)
         m_sym_ctx.target_sp.get();
-      return NULL;
+      return nullptr;
     }
 
     ExecutionContext m_exe_ctx; ///< The execution context to use when parsing.
@@ -368,7 +367,7 @@ class ClangExpressionDeclMap : public ClangASTSource {
   struct StructVars {
     StructVars()
         : m_struct_alignment(0), m_struct_size(0), m_struct_laid_out(false),
-          m_result_name(), m_object_pointer_type(NULL, NULL) {}
+          m_result_name(), m_object_pointer_type(nullptr, nullptr) {}
 
     lldb::offset_t
         m_struct_alignment; ///< The alignment of the struct in bytes.
@@ -421,7 +420,7 @@ class ClangExpressionDeclMap : public ClangASTSource {
   lldb::VariableSP FindGlobalVariable(Target &target, lldb::ModuleSP &module,
                                       ConstString name,
                                       CompilerDeclContext *namespace_decl,
-                                      TypeFromUser *type = NULL);
+                                      TypeFromUser *type = nullptr);
 
   /// Get the value of a variable in a given execution context and return the
   /// associated Types if needed.
@@ -449,8 +448,8 @@ class ClangExpressionDeclMap : public ClangASTSource {
   ///     Return true if the value was successfully filled in.
   bool GetVariableValue(lldb::VariableSP &var,
                         lldb_private::Value &var_location,
-                        TypeFromUser *found_type = NULL,
-                        TypeFromParser *parser_type = NULL);
+                        TypeFromUser *found_type = nullptr,
+                        TypeFromParser *parser_type = nullptr);
 
   /// Use the NameSearchContext to generate a Decl for the given LLDB
   /// Variable, and put it in the Tuple list.
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
index 0659851fa08b4..9fd9fe9a59f69 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
@@ -1229,7 +1229,7 @@ lldb_private::Status ClangExpressionParser::PrepareForExecution(
       type_system_helper->DeclMap(); // result can be NULL
 
   if (decl_map) {
-    Stream *error_stream = NULL;
+    Stream *error_stream = nullptr;
     Target *target = exe_ctx.GetTargetPtr();
     error_stream = target->GetDebugger().GetErrorFile().get();
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h
index bb5e6e7987030..eb7f74f20a20d 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.h
@@ -117,8 +117,8 @@ class ClangExpressionVariable : public ExpressionVariable {
   class ParserVars {
   public:
     ParserVars()
-        : m_parser_type(), m_named_decl(NULL), m_llvm_value(NULL),
-          m_lldb_value(), m_lldb_var(), m_lldb_sym(NULL) {}
+        : m_parser_type(), m_named_decl(nullptr), m_llvm_value(nullptr),
+          m_lldb_value(), m_lldb_var(), m_lldb_sym(nullptr) {}
 
     TypeFromParser
         m_parser_type; ///< The type of the variable according to the parser
@@ -152,7 +152,7 @@ class ClangExpressionVariable : public ExpressionVariable {
     ParserVarMap::iterator i = m_parser_vars.find(parser_id);
 
     if (i == m_parser_vars.end())
-      return NULL;
+      return nullptr;
     else
       return &i->second;
   }
@@ -186,7 +186,7 @@ class ClangExpressionVariable : public ExpressionVariable {
     JITVarMap::iterator i = m_jit_vars.find(parser_id);
 
     if (i == m_jit_vars.end())
-      return NULL;
+      return nullptr;
     else
       return &i->second;
   }
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h
index bdcead230c5d3..24f6f2eb91b3b 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.h
@@ -72,7 +72,7 @@ class ClangFunctionCaller : public FunctionCaller {
 
     /// Return the object that the parser should use when resolving external
     /// values.  May be NULL if everything should be self-contained.
-    ClangExpressionDeclMap *DeclMap() override { return NULL; }
+    ClangExpressionDeclMap *DeclMap() override { return nullptr; }
 
     /// Return the object that the parser should allow to access ASTs. May be
     /// NULL if the ASTs do not need to be transformed.
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangPersistentVariables.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangPersistentVariables.cpp
index e77ba86eb100f..912c9ef2e1b82 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangPersistentVariables.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangPersistentVariables.cpp
@@ -48,7 +48,7 @@ void ClangPersistentVariables::RemovePersistentVariable(
     return;
   name++;
 
-  if (strtoul(name, NULL, 0) == m_next_persistent_variable_id - 1)
+  if (strtoul(name, nullptr, 0) == m_next_persistent_variable_id - 1)
     m_next_persistent_variable_id--;
 }
 
@@ -72,7 +72,7 @@ ClangPersistentVariables::GetPersistentDecl(ConstString name) {
       m_persistent_decls.find(name.GetCString());
 
   if (i == m_persistent_decls.end())
-    return NULL;
+    return nullptr;
   else
     return i->second;
 }
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
index 9fcf2d49845e3..2dae5b7022f30 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
@@ -102,7 +102,7 @@ void ClangUserExpression::ScanContext(ExecutionContext &exe_ctx, Status &err) {
   }
 
   StackFrame *frame = exe_ctx.GetFramePtr();
-  if (frame == NULL) {
+  if (frame == nullptr) {
     if (log)
       log->Printf("  [CUE::SC] Null stack frame");
     return;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
index 684cffd815e93..5eec224477fcb 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.cpp
@@ -90,7 +90,7 @@ bool ClangUtilityFunction::Install(DiagnosticManager &diagnostic_manager,
 
   ResetDeclMap(exe_ctx, keep_result_in_memory);
 
-  if (!DeclMap()->WillParse(exe_ctx, NULL)) {
+  if (!DeclMap()->WillParse(exe_ctx, nullptr)) {
     diagnostic_manager.PutString(
         eDiagnosticSeverityError,
         "current process state is unsuitable for expression parsing");
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
index 9c928054cfb5e..f083b92e3d627 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp
@@ -60,7 +60,7 @@ IRForTarget::FunctionValueCache::GetValue(llvm::Function *function) {
 
 static llvm::Value *FindEntryInstruction(llvm::Function *function) {
   if (function->empty())
-    return NULL;
+    return nullptr;
 
   return function->getEntryBlock().getFirstNonPHIOrDbg();
 }
@@ -71,11 +71,12 @@ IRForTarget::IRForTarget(lldb_private::ClangExpressionDeclMap *decl_map,
                          lldb_private::Stream &error_stream,
                          const char *func_name)
     : ModulePass(ID), m_resolve_vars(resolve_vars), m_func_name(func_name),
-      m_module(NULL), m_decl_map(decl_map), m_CFStringCreateWithBytes(NULL),
-      m_sel_registerName(NULL), m_objc_getClass(NULL), m_intptr_ty(NULL),
-      m_error_stream(error_stream),
-      m_execution_unit(execution_unit), m_result_store(NULL),
-      m_result_is_pointer(false), m_reloc_placeholder(NULL),
+      m_module(nullptr), m_decl_map(decl_map),
+      m_CFStringCreateWithBytes(nullptr), m_sel_registerName(nullptr),
+      m_objc_getClass(nullptr), m_intptr_ty(nullptr),
+      m_error_stream(error_stream), m_execution_unit(execution_unit),
+      m_result_store(nullptr), m_result_is_pointer(false),
+      m_reloc_placeholder(nullptr),
       m_entry_instruction_finder(FindEntryInstruction) {}
 
 /* Handy utility functions used at several places in the code */
@@ -116,7 +117,7 @@ clang::NamedDecl *IRForTarget::DeclForGlobal(const GlobalValue *global_val,
       module->getNamedMetadata("clang.global.decl.ptrs");
 
   if (!named_metadata)
-    return NULL;
+    return nullptr;
 
   unsigned num_nodes = named_metadata->getNumOperands();
   unsigned node_index;
@@ -125,7 +126,7 @@ clang::NamedDecl *IRForTarget::DeclForGlobal(const GlobalValue *global_val,
     llvm::MDNode *metadata_node =
         dyn_cast<llvm::MDNode>(named_metadata->getOperand(node_index));
     if (!metadata_node)
-      return NULL;
+      return nullptr;
 
     if (metadata_node->getNumOperands() != 2)
       continue;
@@ -138,14 +139,14 @@ clang::NamedDecl *IRForTarget::DeclForGlobal(const GlobalValue *global_val,
         mdconst::dyn_extract<ConstantInt>(metadata_node->getOperand(1));
 
     if (!constant_int)
-      return NULL;
+      return nullptr;
 
     uintptr_t ptr = constant_int->getZExtValue();
 
     return reinterpret_cast<clang::NamedDecl *>(ptr);
   }
 
-  return NULL;
+  return nullptr;
 }
 
 clang::NamedDecl *IRForTarget::DeclForGlobal(GlobalValue *global_val) {
@@ -164,7 +165,7 @@ bool IRForTarget::CreateResultVariable(llvm::Function &llvm_function) {
   ValueSymbolTable &value_symbol_table = m_module->getValueSymbolTable();
 
   std::string result_name_str;
-  const char *result_name = NULL;
+  const char *result_name = nullptr;
 
   for (ValueSymbolTable::iterator vi = value_symbol_table.begin(),
                                   ve = value_symbol_table.end();
@@ -342,8 +343,8 @@ bool IRForTarget::CreateResultVariable(llvm::Function &llvm_function) {
 
   GlobalVariable *new_result_global = new GlobalVariable(
       (*m_module), result_global->getType()->getElementType(),
-      false,                              /* not constant */
-      GlobalValue::ExternalLinkage, NULL, /* no initializer */
+      false,                                 /* not constant */
+      GlobalValue::ExternalLinkage, nullptr, /* no initializer */
       m_result_name.GetCString());
 
   // It's too late in compilation to create a new VarDecl for this, but we
@@ -488,7 +489,7 @@ bool IRForTarget::RewriteObjCConstString(llvm::GlobalVariable *ns_str,
         CFSCWB_ty, ConstantExpr::getIntToPtr(CFSCWB_addr_int, CFSCWB_ptr_ty)};
   }
 
-  ConstantDataSequential *string_array = NULL;
+  ConstantDataSequential *string_array = nullptr;
 
   if (cstr)
     string_array = dyn_cast<ConstantDataSequential>(cstr->getInitializer());
@@ -733,7 +734,7 @@ bool IRForTarget::RewriteObjCConstStrings() {
       }
 
       if (!cstr_array)
-        cstr_global = NULL;
+        cstr_global = nullptr;
 
       if (!RewriteObjCConstString(nsstring_global, cstr_global)) {
         if (log)
@@ -1149,8 +1150,8 @@ bool IRForTarget::RewritePersistentAlloc(llvm::Instruction *persistent_alloc) {
     return false;
 
   GlobalVariable *persistent_global = new GlobalVariable(
-      (*m_module), alloc->getType(), false, /* not constant */
-      GlobalValue::ExternalLinkage, NULL,   /* no initializer */
+      (*m_module), alloc->getType(), false,  /* not constant */
+      GlobalValue::ExternalLinkage, nullptr, /* no initializer */
       alloc->getName().str());
 
   // What we're going to do here is make believe this was a regular old
@@ -1346,13 +1347,13 @@ bool IRForTarget::MaybeHandleVariable(Value *llvm_value_ptr) {
     std::string name(named_decl->getName().str());
 
     clang::ValueDecl *value_decl = dyn_cast<clang::ValueDecl>(named_decl);
-    if (value_decl == NULL)
+    if (value_decl == nullptr)
       return false;
 
     lldb_private::CompilerType compiler_type(&value_decl->getASTContext(),
                                              value_decl->getType());
 
-    const Type *value_type = NULL;
+    const Type *value_type = nullptr;
 
     if (name[0] == '$') {
       // The $__lldb_expr_result name indicates the return value has allocated
@@ -1630,12 +1631,12 @@ bool IRForTarget::ResolveExternals(Function &llvm_function) {
 }
 
 static bool isGuardVariableRef(Value *V) {
-  Constant *Old = NULL;
+  Constant *Old = nullptr;
 
   if (!(Old = dyn_cast<Constant>(V)))
     return false;
 
-  ConstantExpr *CE = NULL;
+  ConstantExpr *CE = nullptr;
 
   if ((CE = dyn_cast<ConstantExpr>(V))) {
     if (CE->getOpcode() != Instruction::BitCast)
@@ -1930,8 +1931,8 @@ bool IRForTarget::ReplaceVariables(Function &llvm_function) {
   }
 
   for (element_index = 0; element_index < num_elements; ++element_index) {
-    const clang::NamedDecl *decl = NULL;
-    Value *value = NULL;
+    const clang::NamedDecl *decl = nullptr;
+    Value *value = nullptr;
     lldb::offset_t offset;
     lldb_private::ConstString name;
 
@@ -2051,7 +2052,7 @@ bool IRForTarget::runOnModule(Module &llvm_module) {
     std::string s;
     raw_string_ostream oss(s);
 
-    m_module->print(oss, NULL);
+    m_module->print(oss, nullptr);
 
     oss.flush();
 
@@ -2088,7 +2089,7 @@ bool IRForTarget::runOnModule(Module &llvm_module) {
   m_reloc_placeholder = new llvm::GlobalVariable(
       (*m_module), int8_ty, false /* IsConstant */,
       GlobalVariable::InternalLinkage, Constant::getNullValue(int8_ty),
-      "reloc_placeholder", NULL /* InsertBefore */,
+      "reloc_placeholder", nullptr /* InsertBefore */,
       GlobalVariable::NotThreadLocal /* ThreadLocal */, 0 /* AddressSpace */);
 
   ////////////////////////////////////////////////////////////
@@ -2110,7 +2111,7 @@ bool IRForTarget::runOnModule(Module &llvm_module) {
     std::string s;
     raw_string_ostream oss(s);
 
-    m_module->print(oss, NULL);
+    m_module->print(oss, nullptr);
 
     oss.flush();
 
@@ -2245,7 +2246,7 @@ bool IRForTarget::runOnModule(Module &llvm_module) {
     std::string s;
     raw_string_ostream oss(s);
 
-    m_module->print(oss, NULL);
+    m_module->print(oss, nullptr);
 
     oss.flush();
 
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
index 130adbd607143..6323889c2e093 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
+++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
@@ -749,7 +749,7 @@ EmulateInstructionARM::CreateInstance(const ArchSpec &arch,
     }
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionARM::SetTargetTriple(const ArchSpec &arch) {
@@ -13213,7 +13213,7 @@ EmulateInstructionARM::GetARMOpcodeForInstruction(const uint32_t opcode,
         (g_arm_opcodes[i].variants & arm_isa) != 0)
       return &g_arm_opcodes[i];
   }
-  return NULL;
+  return nullptr;
 }
 
 EmulateInstructionARM::ARMOpcode *
@@ -13763,7 +13763,7 @@ EmulateInstructionARM::GetThumbOpcodeForInstruction(const uint32_t opcode,
         (g_thumb_opcodes[i].variants & arm_isa) != 0)
       return &g_thumb_opcodes[i];
   }
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionARM::SetArchitecture(const ArchSpec &arch) {
@@ -14311,7 +14311,7 @@ bool EmulateInstructionARM::WriteFlags(Context &context, const uint32_t result,
 }
 
 bool EmulateInstructionARM::EvaluateInstruction(uint32_t evaluate_options) {
-  ARMOpcode *opcode_data = NULL;
+  ARMOpcode *opcode_data = nullptr;
 
   if (m_opcode_mode == eModeThumb)
     opcode_data =
@@ -14400,7 +14400,7 @@ bool EmulateInstructionARM::TestEmulation(Stream *out_stream, ArchSpec &arch,
   OptionValueSP value_sp = test_data->GetValueForKey(opcode_key);
 
   uint32_t test_opcode;
-  if ((value_sp.get() == NULL) ||
+  if ((value_sp.get() == nullptr) ||
       (value_sp->GetType() != OptionValue::eTypeUInt64)) {
     out_stream->Printf("TestEmulation: Error reading opcode from test file.\n");
     return false;
@@ -14426,7 +14426,7 @@ bool EmulateInstructionARM::TestEmulation(Stream *out_stream, ArchSpec &arch,
   EmulationStateARM after_state;
 
   value_sp = test_data->GetValueForKey(before_key);
-  if ((value_sp.get() == NULL) ||
+  if ((value_sp.get() == nullptr) ||
       (value_sp->GetType() != OptionValue::eTypeDictionary)) {
     out_stream->Printf("TestEmulation:  Failed to find 'before' state.\n");
     return false;
@@ -14439,7 +14439,7 @@ bool EmulateInstructionARM::TestEmulation(Stream *out_stream, ArchSpec &arch,
   }
 
   value_sp = test_data->GetValueForKey(after_key);
-  if ((value_sp.get() == NULL) ||
+  if ((value_sp.get() == nullptr) ||
       (value_sp->GetType() != OptionValue::eTypeDictionary)) {
     out_stream->Printf("TestEmulation:  Failed to find 'after' state.\n");
     return false;
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp
index 37ec53bba2b3b..11c7677c201a3 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp
+++ b/lldb/source/Plugins/Instruction/ARM/EmulationStateARM.cpp
@@ -284,14 +284,14 @@ bool EmulationStateARM::LoadStateFromDictionary(
 
   // Load memory, if present.
 
-  if (value_sp.get() != NULL) {
+  if (value_sp.get() != nullptr) {
     static ConstString address_key("address");
     static ConstString data_key("data");
     uint64_t start_address = 0;
 
     OptionValueDictionary *mem_dict = value_sp->GetAsDictionary();
     value_sp = mem_dict->GetValueForKey(address_key);
-    if (value_sp.get() == NULL)
+    if (value_sp.get() == nullptr)
       return false;
     else
       start_address = value_sp->GetUInt64Value();
@@ -306,7 +306,7 @@ bool EmulationStateARM::LoadStateFromDictionary(
 
     for (uint32_t i = 0; i < num_elts; ++i) {
       value_sp = mem_array->GetValueAtIndex(i);
-      if (value_sp.get() == NULL)
+      if (value_sp.get() == nullptr)
         return false;
       uint64_t value = value_sp->GetUInt64Value();
       StoreToPseudoAddress(address, value);
@@ -315,7 +315,7 @@ bool EmulationStateARM::LoadStateFromDictionary(
   }
 
   value_sp = test_data->GetValueForKey(registers_key);
-  if (value_sp.get() == NULL)
+  if (value_sp.get() == nullptr)
     return false;
 
   // Load General Registers
@@ -328,7 +328,7 @@ bool EmulationStateARM::LoadStateFromDictionary(
     sstr.Printf("r%d", i);
     ConstString reg_name(sstr.GetString());
     value_sp = reg_dict->GetValueForKey(reg_name);
-    if (value_sp.get() == NULL)
+    if (value_sp.get() == nullptr)
       return false;
     uint64_t reg_value = value_sp->GetUInt64Value();
     StorePseudoRegisterValue(dwarf_r0 + i, reg_value);
@@ -336,7 +336,7 @@ bool EmulationStateARM::LoadStateFromDictionary(
 
   static ConstString cpsr_name("cpsr");
   value_sp = reg_dict->GetValueForKey(cpsr_name);
-  if (value_sp.get() == NULL)
+  if (value_sp.get() == nullptr)
     return false;
   StorePseudoRegisterValue(dwarf_cpsr, value_sp->GetUInt64Value());
 
@@ -346,7 +346,7 @@ bool EmulationStateARM::LoadStateFromDictionary(
     sstr.Printf("s%d", i);
     ConstString reg_name(sstr.GetString());
     value_sp = reg_dict->GetValueForKey(reg_name);
-    if (value_sp.get() == NULL)
+    if (value_sp.get() == nullptr)
       return false;
     uint64_t reg_value = value_sp->GetUInt64Value();
     StorePseudoRegisterValue(dwarf_s0 + i, reg_value);
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
index c30d9bc906d2b..d835d62ad2e07 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
@@ -154,7 +154,7 @@ EmulateInstructionARM64::CreateInstance(const ArchSpec &arch,
     }
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionARM64::SetTargetTriple(const ArchSpec &arch) {
@@ -411,7 +411,7 @@ bool EmulateInstructionARM64::ReadInstruction() {
 bool EmulateInstructionARM64::EvaluateInstruction(uint32_t evaluate_options) {
   const uint32_t opcode = m_opcode.GetOpcode32();
   Opcode *opcode_data = GetOpcodeForInstruction(opcode);
-  if (opcode_data == NULL)
+  if (opcode_data == nullptr)
     return false;
 
   // printf ("opcode template for 0x%8.8x: %s\n", opcode, opcode_data->name);
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
index b5a657819af14..cbf3dda7896e4 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
@@ -213,7 +213,7 @@ EmulateInstructionMIPS::CreateInstance(const ArchSpec &arch,
     }
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionMIPS::SetTargetTriple(const ArchSpec &arch) {
@@ -966,7 +966,7 @@ EmulateInstructionMIPS::GetOpcodeForInstruction(const char *op_name) {
       return &g_opcodes[i];
   }
 
-  return NULL;
+  return nullptr;
 }
 
 uint32_t
@@ -1083,7 +1083,7 @@ bool EmulateInstructionMIPS::EvaluateInstruction(uint32_t evaluate_options) {
   */
   const char *op_name = m_insn_info->getName(mc_insn.getOpcode()).data();
 
-  if (op_name == NULL)
+  if (op_name == nullptr)
     return false;
 
   /*
@@ -1092,7 +1092,7 @@ bool EmulateInstructionMIPS::EvaluateInstruction(uint32_t evaluate_options) {
   */
   MipsOpcode *opcode_data = GetOpcodeForInstruction(op_name);
 
-  if (opcode_data == NULL)
+  if (opcode_data == nullptr)
     return false;
 
   uint64_t old_pc = 0, new_pc = 0;
@@ -2875,7 +2875,7 @@ bool EmulateInstructionMIPS::Emulate_MSA_Branch_DF(llvm::MCInst &insn,
   bool success = false, branch_hit = true;
   int32_t target = 0;
   RegisterValue reg_value;
-  const uint8_t *ptr = NULL;
+  const uint8_t *ptr = nullptr;
 
   uint32_t wt = m_reg_info->getEncodingValue(insn.getOperand(0).getReg());
   int32_t offset = insn.getOperand(1).getImm();
diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
index 7c1044c499ae7..69f0278d14377 100644
--- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
@@ -200,7 +200,7 @@ EmulateInstructionMIPS64::CreateInstance(const ArchSpec &arch,
     }
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionMIPS64::SetTargetTriple(const ArchSpec &arch) {
@@ -931,7 +931,7 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction(const char *op_name) {
       return &g_opcodes[i];
   }
 
-  return NULL;
+  return nullptr;
 }
 
 bool EmulateInstructionMIPS64::ReadInstruction() {
@@ -974,7 +974,7 @@ bool EmulateInstructionMIPS64::EvaluateInstruction(uint32_t evaluate_options) {
   */
   const char *op_name = m_insn_info->getName(mc_insn.getOpcode()).data();
 
-  if (op_name == NULL)
+  if (op_name == nullptr)
     return false;
 
   /*
@@ -983,7 +983,7 @@ bool EmulateInstructionMIPS64::EvaluateInstruction(uint32_t evaluate_options) {
   */
   MipsOpcode *opcode_data = GetOpcodeForInstruction(op_name);
 
-  if (opcode_data == NULL)
+  if (opcode_data == nullptr)
     return false;
 
   uint64_t old_pc = 0, new_pc = 0;
@@ -2186,7 +2186,7 @@ bool EmulateInstructionMIPS64::Emulate_MSA_Branch_DF(llvm::MCInst &insn,
   bool success = false, branch_hit = true;
   int64_t target = 0;
   RegisterValue reg_value;
-  const uint8_t *ptr = NULL;
+  const uint8_t *ptr = nullptr;
 
   uint32_t wt = m_reg_info->getEncodingValue(insn.getOperand(0).getReg());
   int64_t offset = insn.getOperand(1).getImm();
diff --git a/lldb/source/Plugins/InstrumentationRuntime/ASan/ASanRuntime.cpp b/lldb/source/Plugins/InstrumentationRuntime/ASan/ASanRuntime.cpp
index 60c8ed01ac1f4..c8ac04641e685 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/ASan/ASanRuntime.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/ASan/ASanRuntime.cpp
@@ -287,7 +287,7 @@ void AddressSanitizerRuntime::Activate() {
   const Symbol *symbol = GetRuntimeModuleSP()->FindFirstSymbolWithNameAndType(
       symbol_name, eSymbolTypeCode);
 
-  if (symbol == NULL)
+  if (symbol == nullptr)
     return;
 
   if (!symbol->ValueIsAddress() || !symbol->GetAddressRef().IsValid())
diff --git a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
index afdd97e6a74bd..fac20de0eb4bd 100644
--- a/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
+++ b/lldb/source/Plugins/InstrumentationRuntime/TSan/TSanRuntime.cpp
@@ -900,7 +900,7 @@ void ThreadSanitizerRuntime::Activate() {
   const Symbol *symbol = GetRuntimeModuleSP()->FindFirstSymbolWithNameAndType(
       symbol_name, eSymbolTypeCode);
 
-  if (symbol == NULL)
+  if (symbol == nullptr)
     return;
 
   if (!symbol->ValueIsAddress() || !symbol->GetAddressRef().IsValid())
diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
index ca2847f6a8c46..24683e687188e 100644
--- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/ItaniumABILanguageRuntime.cpp
@@ -46,7 +46,7 @@ static const char *vtable_demangled_prefix = "vtable for ";
 bool ItaniumABILanguageRuntime::CouldHaveDynamicValue(ValueObject &in_value) {
   const bool check_cxx = true;
   const bool check_objc = false;
-  return in_value.GetCompilerType().IsPossibleDynamicType(NULL, check_cxx,
+  return in_value.GetCompilerType().IsPossibleDynamicType(nullptr, check_cxx,
                                                           check_objc);
 }
 
@@ -69,7 +69,7 @@ TypeAndOrName ItaniumABILanguageRuntime::GetTypeInfoFromVTableAddress(
         target.GetImages().ResolveSymbolContextForAddress(
             vtable_addr, eSymbolContextSymbol, sc);
         Symbol *symbol = sc.symbol;
-        if (symbol != NULL) {
+        if (symbol != nullptr) {
           const char *name =
               symbol->GetMangled()
                   .GetDemangledName(lldb::eLanguageTypeC_plus_plus)
@@ -306,7 +306,7 @@ TypeAndOrName ItaniumABILanguageRuntime::FixUpDynamicType(
 }
 
 bool ItaniumABILanguageRuntime::IsVTableName(const char *name) {
-  if (name == NULL)
+  if (name == nullptr)
     return false;
 
   // Can we maybe ask Clang about this?
@@ -326,7 +326,7 @@ ItaniumABILanguageRuntime::CreateInstance(Process *process,
       language == eLanguageTypeC_plus_plus_14)
     return new ItaniumABILanguageRuntime(process);
   else
-    return NULL;
+    return nullptr;
 }
 
 class CommandObjectMultiwordItaniumABI_Demangle : public CommandObjectParsed {
@@ -490,7 +490,7 @@ lldb::BreakpointSP ItaniumABILanguageRuntime::CreateExceptionBreakpoint(
   Target &target = m_process->GetTarget();
   FileSpecList filter_modules;
   BreakpointResolverSP exception_resolver_sp =
-      CreateExceptionResolver(NULL, catch_bp, throw_bp, for_expressions);
+      CreateExceptionResolver(nullptr, catch_bp, throw_bp, for_expressions);
   SearchFilterSP filter_sp(CreateExceptionSearchFilter());
   const bool hardware = false;
   const bool resolve_indirect_functions = false;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
index 6b51177437c6a..b5cac92213b29 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
@@ -173,7 +173,7 @@ AppleObjCDeclVendor::GetDeclForISA(ObjCLanguageRuntime::ObjCISA isa) {
       m_runtime.GetClassDescriptorFromISA(isa);
 
   if (!descriptor)
-    return NULL;
+    return nullptr;
 
   ConstString name(descriptor->GetClassName());
 
@@ -203,7 +203,7 @@ class ObjCRuntimeMethodType {
   ObjCRuntimeMethodType(const char *types) : m_is_valid(false) {
     const char *cursor = types;
     enum ParserState { Start = 0, InType, InPos } state = Start;
-    const char *type = NULL;
+    const char *type = nullptr;
     int brace_depth = 0;
 
     uint32_t stepsLeft = 256;
@@ -261,7 +261,7 @@ class ObjCRuntimeMethodType {
               m_is_valid = false;
               return;
             }
-            type = NULL;
+            type = nullptr;
           } else {
             ++cursor;
           }
@@ -319,7 +319,7 @@ class ObjCRuntimeMethodType {
               bool instance,
               ObjCLanguageRuntime::EncodingToTypeSP type_realizer_sp) {
     if (!m_is_valid || m_type_vector.size() < 3)
-      return NULL;
+      return nullptr;
 
     clang::ASTContext &ast_ctx(interface_decl->getASTContext());
 
@@ -354,7 +354,7 @@ class ObjCRuntimeMethodType {
 
     clang::IdentifierInfo **identifier_infos = selector_components.data();
     if (!identifier_infos) {
-      return NULL;
+      return nullptr;
     }
 
     clang::Selector sel = ast_ctx.Selectors.getSelector(
@@ -367,12 +367,13 @@ class ObjCRuntimeMethodType {
             for_expression));
 
     if (ret_type.isNull())
-      return NULL;
+      return nullptr;
 
     clang::ObjCMethodDecl *ret = clang::ObjCMethodDecl::Create(
         ast_ctx, clang::SourceLocation(), clang::SourceLocation(), sel,
-        ret_type, NULL, interface_decl, isInstance, isVariadic, isSynthesized,
-        isImplicitlyDeclared, isDefined, impControl, HasRelatedResultType);
+        ret_type, nullptr, interface_decl, isInstance, isVariadic,
+        isSynthesized, isImplicitlyDeclared, isDefined, impControl,
+        HasRelatedResultType);
 
     std::vector<clang::ParmVarDecl *> parm_vars;
 
@@ -383,12 +384,12 @@ class ObjCRuntimeMethodType {
               ast_ctx, m_type_vector[ai].c_str(), for_expression));
 
       if (arg_type.isNull())
-        return NULL; // well, we just wasted a bunch of time.  Wish we could
-                     // delete the stuff we'd just made!
+        return nullptr; // well, we just wasted a bunch of time.  Wish we could
+                        // delete the stuff we'd just made!
 
       parm_vars.push_back(clang::ParmVarDecl::Create(
-          ast_ctx, ret, clang::SourceLocation(), clang::SourceLocation(), NULL,
-          arg_type, NULL, clang::SC_None, NULL));
+          ast_ctx, ret, clang::SourceLocation(), clang::SourceLocation(),
+          nullptr, arg_type, nullptr, clang::SC_None, nullptr));
     }
 
     ret->setMethodParams(ast_ctx,
@@ -512,7 +513,7 @@ bool AppleObjCDeclVendor::FinishDecl(clang::ObjCInterfaceDecl *interface_decl) {
           clang::SourceLocation(), &m_ast_ctx.getASTContext()->Idents.get(name),
           ClangUtil::GetQualType(ivar_type),
           type_source_info, // TypeSourceInfo *
-          clang::ObjCIvarDecl::Public, 0, is_synthesized);
+          clang::ObjCIvarDecl::Public, nullptr, is_synthesized);
 
       if (ivar_decl) {
         interface_decl->addDecl(ivar_decl);
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
index 8e7014dcd6118..8a0b5bf392ccb 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
@@ -128,9 +128,9 @@ bool AppleObjCRuntime::GetObjectDescription(Stream &strm, Value &value,
   //    ret.SetContext(Value::eContextTypeClangType, return_compiler_type);
   ret.SetCompilerType(return_compiler_type);
 
-  if (exe_ctx.GetFramePtr() == NULL) {
+  if (exe_ctx.GetFramePtr() == nullptr) {
     Thread *thread = exe_ctx.GetThreadPtr();
-    if (thread == NULL) {
+    if (thread == nullptr) {
       exe_ctx.SetThreadSP(process->GetThreadList().GetSelectedThread());
       thread = exe_ctx.GetThreadPtr();
     }
@@ -225,7 +225,7 @@ Address *AppleObjCRuntime::GetPrintForDebuggerAddr() {
                                              eSymbolTypeCode, contexts)) &&
         (!modules.FindSymbolsWithNameAndType(ConstString("_CFPrintForDebugger"),
                                              eSymbolTypeCode, contexts)))
-      return NULL;
+      return nullptr;
 
     contexts.GetContextAtIndex(0, context);
 
@@ -237,7 +237,7 @@ Address *AppleObjCRuntime::GetPrintForDebuggerAddr() {
 
 bool AppleObjCRuntime::CouldHaveDynamicValue(ValueObject &in_value) {
   return in_value.GetCompilerType().IsPossibleDynamicType(
-      NULL,
+      nullptr,
       false, // do not check C++
       true); // check ObjC
 }
@@ -327,7 +327,7 @@ bool AppleObjCRuntime::ReadObjCLibrary(const ModuleSP &module_sp) {
   // reread it?
   m_objc_trampoline_handler_up.reset(
       new AppleObjCTrampolineHandler(m_process->shared_from_this(), module_sp));
-  if (m_objc_trampoline_handler_up != NULL) {
+  if (m_objc_trampoline_handler_up != nullptr) {
     m_read_objc_library = true;
     return true;
   } else
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
index 116f4d78ea206..31f9c40066ee7 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.cpp
@@ -75,9 +75,9 @@ AppleObjCRuntimeV1::CreateInstance(Process *process,
         ObjCRuntimeVersions::eAppleObjC_V1)
       return new AppleObjCRuntimeV1(process);
     else
-      return NULL;
+      return nullptr;
   } else
-    return NULL;
+    return nullptr;
 }
 
 void AppleObjCRuntimeV1::Initialize() {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index 9f2540fd6255d..5d8d068a76c3d 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -414,8 +414,9 @@ AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process,
       m_encoding_to_type_sp(), m_noclasses_warning_emitted(false),
       m_CFBoolean_values() {
   static const ConstString g_gdb_object_getClass("gdb_object_getClass");
-  m_has_object_getClass = (objc_module_sp->FindFirstSymbolWithNameAndType(
-                               g_gdb_object_getClass, eSymbolTypeCode) != NULL);
+  m_has_object_getClass =
+      (objc_module_sp->FindFirstSymbolWithNameAndType(
+           g_gdb_object_getClass, eSymbolTypeCode) != nullptr);
   RegisterObjCExceptionRecognizer();
 }
 
@@ -424,7 +425,7 @@ bool AppleObjCRuntimeV2::GetDynamicTypeAndAddress(
     TypeAndOrName &class_type_or_name, Address &address,
     Value::ValueType &value_type) {
   // We should never get here with a null process...
-  assert(m_process != NULL);
+  assert(m_process != nullptr);
 
   // The Runtime is attached to a particular process, you shouldn't pass in a
   // value from another process. Note, however, the process might be NULL (e.g.
@@ -488,9 +489,9 @@ LanguageRuntime *AppleObjCRuntimeV2::CreateInstance(Process *process,
         ObjCRuntimeVersions::eAppleObjC_V2)
       return new AppleObjCRuntimeV2(process, objc_module_sp);
     else
-      return NULL;
+      return nullptr;
   } else
-    return NULL;
+    return nullptr;
 }
 
 static constexpr OptionDefinition g_objc_classtable_dump_options[] = {
@@ -938,7 +939,7 @@ class RemoteNXMapTable {
 public:
   RemoteNXMapTable()
       : m_count(0), m_num_buckets_minus_one(0),
-        m_buckets_ptr(LLDB_INVALID_ADDRESS), m_process(NULL),
+        m_buckets_ptr(LLDB_INVALID_ADDRESS), m_process(nullptr),
         m_end_iterator(*this, -1), m_load_addr(LLDB_INVALID_ADDRESS),
         m_map_pair_size(0), m_invalid_key(0) {}
 
@@ -1273,7 +1274,7 @@ AppleObjCRuntimeV2::UpdateISAToDescriptorMapDynamic(
     RemoteNXMapTable &hash_table) {
   Process *process = GetProcess();
 
-  if (process == NULL)
+  if (process == nullptr)
     return DescriptorMapUpdateResult::Fail();
 
   uint32_t num_class_infos = 0;
@@ -1509,7 +1510,8 @@ uint32_t AppleObjCRuntimeV2::ParseClassInfoArray(const DataExtractor &data,
     } else {
       // Read the 32 bit hash for the class name
       const uint32_t name_hash = data.GetU32(&offset);
-      ClassDescriptorSP descriptor_sp(new ClassDescriptorV2(*this, isa, NULL));
+      ClassDescriptorSP descriptor_sp(
+          new ClassDescriptorV2(*this, isa, nullptr));
 
       // The code in g_get_shared_cache_class_info_body sets the value of the hash
       // to 0 to signal a demangled symbol. We use class_getName() in that code to
@@ -1538,7 +1540,7 @@ AppleObjCRuntimeV2::DescriptorMapUpdateResult
 AppleObjCRuntimeV2::UpdateISAToDescriptorMapSharedCache() {
   Process *process = GetProcess();
 
-  if (process == NULL)
+  if (process == nullptr)
     return DescriptorMapUpdateResult::Fail();
 
   Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_TYPES));
@@ -1752,7 +1754,7 @@ bool AppleObjCRuntimeV2::UpdateISAToDescriptorMapFromMemory(
 
   Process *process = GetProcess();
 
-  if (process == NULL)
+  if (process == nullptr)
     return false;
 
   uint32_t num_map_table_isas = 0;
@@ -2037,18 +2039,18 @@ AppleObjCRuntimeV2::NonPointerISACache::CreateInstance(
   auto objc_debug_isa_magic_mask = ExtractRuntimeGlobalSymbol(
       process, ConstString("objc_debug_isa_magic_mask"), objc_module_sp, error);
   if (error.Fail())
-    return NULL;
+    return nullptr;
 
   auto objc_debug_isa_magic_value = ExtractRuntimeGlobalSymbol(
       process, ConstString("objc_debug_isa_magic_value"), objc_module_sp,
       error);
   if (error.Fail())
-    return NULL;
+    return nullptr;
 
   auto objc_debug_isa_class_mask = ExtractRuntimeGlobalSymbol(
       process, ConstString("objc_debug_isa_class_mask"), objc_module_sp, error);
   if (error.Fail())
-    return NULL;
+    return nullptr;
 
   if (log)
     log->PutCString("AOCRT::NPI: Found all the non-indexed ISA masks");
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
index 377c8b3c713a5..4589b1f5d5e39 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
@@ -470,7 +470,7 @@ bool AppleObjCTrampolineHandler::AppleObjCVTables::InitializeVTableSymbols() {
       const Symbol *trampoline_symbol =
           m_objc_module_sp->FindFirstSymbolWithNameAndType(trampoline_name,
                                                            eSymbolTypeData);
-      if (trampoline_symbol != NULL) {
+      if (trampoline_symbol != nullptr) {
         m_trampoline_header = trampoline_symbol->GetLoadAddress(&target);
         if (m_trampoline_header == LLDB_INVALID_ADDRESS)
           return false;
@@ -480,7 +480,7 @@ bool AppleObjCTrampolineHandler::AppleObjCVTables::InitializeVTableSymbols() {
         const Symbol *changed_symbol =
             m_objc_module_sp->FindFirstSymbolWithNameAndType(changed_name,
                                                              eSymbolTypeCode);
-        if (changed_symbol != NULL) {
+        if (changed_symbol != nullptr) {
           const Address changed_symbol_addr = changed_symbol->GetAddress();
           if (!changed_symbol_addr.IsValid())
             return false;
@@ -541,7 +541,7 @@ bool AppleObjCTrampolineHandler::AppleObjCVTables::RefreshTrampolines(
     Status error;
     DataExtractor data;
     error = argument_values.GetValueAtIndex(0)->GetValueAsData(&exe_ctx, data,
-                                                               0, NULL);
+                                                               0, nullptr);
     lldb::offset_t offset = 0;
     lldb::addr_t region_addr = data.GetPointer(&offset);
 
@@ -668,7 +668,7 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler(
   ConstString msg_forward_name("_objc_msgForward");
   ConstString msg_forward_stret_name("_objc_msgForward_stret");
 
-  Target *target = process_sp ? &process_sp->GetTarget() : NULL;
+  Target *target = process_sp ? &process_sp->GetTarget() : nullptr;
   const Symbol *class_getMethodImplementation =
       m_objc_module_sp->FindFirstSymbolWithNameAndType(get_impl_name,
                                                        eSymbolTypeCode);
@@ -771,7 +771,7 @@ AppleObjCTrampolineHandler::SetupDispatchFunction(Thread &thread,
     // First stage is to make the ClangUtility to hold our injected function:
 
     if (!m_impl_code) {
-      if (m_lookup_implementation_function_code != NULL) {
+      if (m_lookup_implementation_function_code != nullptr) {
         Status error;
         m_impl_code.reset(exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
             m_lookup_implementation_function_code, eLanguageTypeObjC,
@@ -886,11 +886,11 @@ AppleObjCTrampolineHandler::GetStepThroughDispatchPlan(Thread &thread,
 
     lldb::StackFrameSP thread_cur_frame = thread.GetStackFrameAtIndex(0);
 
-    const ABI *abi = NULL;
+    const ABI *abi = nullptr;
     ProcessSP process_sp(thread.CalculateProcess());
     if (process_sp)
       abi = process_sp->GetABI().get();
-    if (abi == NULL)
+    if (abi == nullptr)
       return ret_plan_sp;
 
     TargetSP target_sp(thread.CalculateTarget());
@@ -1037,7 +1037,7 @@ AppleObjCTrampolineHandler::GetStepThroughDispatchPlan(Thread &thread,
       }
       ObjCLanguageRuntime *objc_runtime =
           thread.GetProcess()->GetObjCLanguageRuntime();
-      assert(objc_runtime != NULL);
+      assert(objc_runtime != nullptr);
 
       impl_addr = objc_runtime->LookupInMethodCache(isa_addr, sel_addr);
     }
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.h
index f1356afe6df3f..d120d671eeb3e 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.h
@@ -74,8 +74,9 @@ class AppleObjCTrampolineHandler {
     class VTableRegion {
     public:
       VTableRegion()
-          : m_valid(false), m_owner(NULL), m_header_addr(LLDB_INVALID_ADDRESS),
-            m_code_start_addr(0), m_code_end_addr(0), m_next_region(0) {}
+          : m_valid(false), m_owner(nullptr),
+            m_header_addr(LLDB_INVALID_ADDRESS), m_code_start_addr(0),
+            m_code_end_addr(0), m_next_region(0) {}
 
       VTableRegion(AppleObjCVTables *owner, lldb::addr_t header_addr);
 
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
index 3436464d02486..f3f38d3f9de60 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
@@ -36,7 +36,7 @@ AppleThreadPlanStepThroughObjCTrampoline::
                  eVoteNoOpinion),
       m_trampoline_handler(trampoline_handler),
       m_args_addr(LLDB_INVALID_ADDRESS), m_input_values(input_values),
-      m_isa_addr(isa_addr), m_sel_addr(sel_addr), m_impl_function(NULL),
+      m_isa_addr(isa_addr), m_sel_addr(sel_addr), m_impl_function(nullptr),
       m_stop_others(stop_others) {}
 
 // Destructor
@@ -172,7 +172,7 @@ bool AppleThreadPlanStepThroughObjCTrampoline::ShouldStop(Event *event_ptr) {
 
     ObjCLanguageRuntime *objc_runtime =
         GetThread().GetProcess()->GetObjCLanguageRuntime();
-    assert(objc_runtime != NULL);
+    assert(objc_runtime != nullptr);
     objc_runtime->AddToMethodCache(m_isa_addr, m_sel_addr, target_addr);
     if (log)
       log->Printf("Adding {isa-addr=0x%" PRIx64 ", sel-addr=0x%" PRIx64
diff --git a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
index 95aeb41e55c92..a2dafdbd52681 100644
--- a/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
+++ b/lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp
@@ -30,7 +30,7 @@ using namespace lldb_private;
 
 MemoryHistorySP MemoryHistoryASan::CreateInstance(const ProcessSP &process_sp) {
   if (!process_sp.get())
-    return NULL;
+    return nullptr;
 
   Target &target = process_sp->GetTarget();
 
diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
index c364dd0757820..ddd7ba7ce823d 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
@@ -121,7 +121,7 @@ ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data,
     if (ar_name_len > 0) {
       const void *ar_name_ptr = data.GetData(&offset, ar_name_len);
       // Make sure there was enough data for the string value and bail if not
-      if (ar_name_ptr == NULL)
+      if (ar_name_ptr == nullptr)
         return LLDB_INVALID_OFFSET;
       str.assign((const char *)ar_name_ptr, ar_name_len);
       ar_name.SetCString(str.c_str());
@@ -190,7 +190,7 @@ ObjectContainerBSDArchive::Archive::FindObject(
       return &m_objects[match->value];
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 ObjectContainerBSDArchive::Archive::shared_ptr
@@ -351,7 +351,7 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance(
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 bool ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) {
@@ -378,7 +378,7 @@ void ObjectContainerBSDArchive::SetArchive(Archive::shared_ptr &archive_sp) {
 ObjectContainerBSDArchive::~ObjectContainerBSDArchive() {}
 
 bool ObjectContainerBSDArchive::ParseHeader() {
-  if (m_archive_sp.get() == NULL) {
+  if (m_archive_sp.get() == nullptr) {
     if (m_data.GetByteSize() > 0) {
       ModuleSP module_sp(GetModule());
       if (module_sp) {
@@ -391,7 +391,7 @@ bool ObjectContainerBSDArchive::ParseHeader() {
       m_data.Clear();
     }
   }
-  return m_archive_sp.get() != NULL;
+  return m_archive_sp.get() != nullptr;
 }
 
 void ObjectContainerBSDArchive::Dump(Stream *s) const {
diff --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
index 84ddd058571e1..fdcf39ed576b1 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
@@ -125,7 +125,7 @@ class ObjectContainerBSDArchive : public lldb_private::ObjectContainer {
     const Object *GetObjectAtIndex(size_t idx) {
       if (idx < m_objects.size())
         return &m_objects[idx];
-      return NULL;
+      return nullptr;
     }
 
     size_t ParseObjects();
diff --git a/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp b/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp
index c392addc82b55..839a71cfdc7bc 100644
--- a/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp
+++ b/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp
@@ -57,7 +57,7 @@ ObjectContainer *ObjectContainerUniversalMachO::CreateInstance(
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 bool ObjectContainerUniversalMachO::MagicBytesMatch(const DataExtractor &data) {
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
index 7f9665af9a3b0..aa9871071b0e0 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
@@ -114,7 +114,7 @@ void ELFHeader::ParseHeaderExtension(lldb_private::DataExtractor &data) {
 bool ELFHeader::Parse(lldb_private::DataExtractor &data,
                       lldb::offset_t *offset) {
   // Read e_ident.  This provides byte order and address size info.
-  if (data.GetU8(offset, &e_ident, EI_NIDENT) == NULL)
+  if (data.GetU8(offset, &e_ident, EI_NIDENT) == nullptr)
     return false;
 
   const unsigned byte_size = Is32Bit() ? 4 : 8;
@@ -122,11 +122,11 @@ bool ELFHeader::Parse(lldb_private::DataExtractor &data,
   data.SetAddressByteSize(byte_size);
 
   // Read e_type and e_machine.
-  if (data.GetU16(offset, &e_type, 2) == NULL)
+  if (data.GetU16(offset, &e_type, 2) == nullptr)
     return false;
 
   // Read e_version.
-  if (data.GetU32(offset, &e_version, 1) == NULL)
+  if (data.GetU32(offset, &e_version, 1) == nullptr)
     return false;
 
   // Read e_entry, e_phoff and e_shoff.
@@ -134,11 +134,11 @@ bool ELFHeader::Parse(lldb_private::DataExtractor &data,
     return false;
 
   // Read e_flags.
-  if (data.GetU32(offset, &e_flags, 1) == NULL)
+  if (data.GetU32(offset, &e_flags, 1) == nullptr)
     return false;
 
   // Read e_ehsize, e_phentsize, e_phnum, e_shentsize, e_shnum and e_shstrndx.
-  if (data.GetU16(offset, &e_ehsize, 6) == NULL)
+  if (data.GetU16(offset, &e_ehsize, 6) == nullptr)
     return false;
 
   // Initialize e_phnum, e_shnum, and e_shstrndx with the values read from the
@@ -224,7 +224,7 @@ bool ELFSectionHeader::Parse(const lldb_private::DataExtractor &data,
   const unsigned byte_size = data.GetAddressByteSize();
 
   // Read sh_name and sh_type.
-  if (data.GetU32(offset, &sh_name, 2) == NULL)
+  if (data.GetU32(offset, &sh_name, 2) == nullptr)
     return false;
 
   // Read sh_flags.
@@ -236,7 +236,7 @@ bool ELFSectionHeader::Parse(const lldb_private::DataExtractor &data,
     return false;
 
   // Read sh_link and sh_info.
-  if (data.GetU32(offset, &sh_link, 2) == NULL)
+  if (data.GetU32(offset, &sh_link, 2) == nullptr)
     return false;
 
   // Read sh_addralign and sh_entsize.
@@ -322,7 +322,7 @@ bool ELFSymbol::Parse(const lldb_private::DataExtractor &data,
   const bool parsing_32 = byte_size == 4;
 
   // Read st_name.
-  if (data.GetU32(offset, &st_name, 1) == NULL)
+  if (data.GetU32(offset, &st_name, 1) == nullptr)
     return false;
 
   if (parsing_32) {
@@ -331,23 +331,23 @@ bool ELFSymbol::Parse(const lldb_private::DataExtractor &data,
       return false;
 
     // Read st_info and st_other.
-    if (data.GetU8(offset, &st_info, 2) == NULL)
+    if (data.GetU8(offset, &st_info, 2) == nullptr)
       return false;
 
     // Read st_shndx.
-    if (data.GetU16(offset, &st_shndx, 1) == NULL)
+    if (data.GetU16(offset, &st_shndx, 1) == nullptr)
       return false;
   } else {
     // Read st_info and st_other.
-    if (data.GetU8(offset, &st_info, 2) == NULL)
+    if (data.GetU8(offset, &st_info, 2) == nullptr)
       return false;
 
     // Read st_shndx.
-    if (data.GetU16(offset, &st_shndx, 1) == NULL)
+    if (data.GetU16(offset, &st_shndx, 1) == nullptr)
       return false;
 
     // Read st_value and st_size.
-    if (data.GetU64(offset, &st_value, 2) == NULL)
+    if (data.GetU64(offset, &st_value, 2) == nullptr)
       return false;
   }
   return true;
@@ -365,7 +365,7 @@ bool ELFProgramHeader::Parse(const lldb_private::DataExtractor &data,
   const bool parsing_32 = byte_size == 4;
 
   // Read p_type;
-  if (data.GetU32(offset, &p_type, 1) == NULL)
+  if (data.GetU32(offset, &p_type, 1) == nullptr)
     return false;
 
   if (parsing_32) {
@@ -374,7 +374,7 @@ bool ELFProgramHeader::Parse(const lldb_private::DataExtractor &data,
       return false;
 
     // Read p_flags.
-    if (data.GetU32(offset, &p_flags, 1) == NULL)
+    if (data.GetU32(offset, &p_flags, 1) == nullptr)
       return false;
 
     // Read p_align.
@@ -382,7 +382,7 @@ bool ELFProgramHeader::Parse(const lldb_private::DataExtractor &data,
       return false;
   } else {
     // Read p_flags.
-    if (data.GetU32(offset, &p_flags, 1) == NULL)
+    if (data.GetU32(offset, &p_flags, 1) == nullptr)
       return false;
 
     // Read p_offset, p_vaddr, p_paddr, p_filesz, p_memsz and p_align.
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 0040599c1b5f6..bc802040bf7fd 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -162,7 +162,7 @@ ELFRelocation::ELFRelocation(unsigned type) {
     reloc = new ELFRela();
   else {
     assert(false && "unexpected relocation type");
-    reloc = static_cast<ELFRel *>(NULL);
+    reloc = static_cast<ELFRel *>(nullptr);
   }
 }
 
@@ -243,7 +243,7 @@ static user_id_t SegmentID(size_t PHdrIndex) { return ~PHdrIndex; }
 
 bool ELFNote::Parse(const DataExtractor &data, lldb::offset_t *offset) {
   // Read all fields.
-  if (data.GetU32(offset, &n_namesz, 3) == NULL)
+  if (data.GetU32(offset, &n_namesz, 3) == nullptr)
     return false;
 
   // The name field is required to be nul-terminated, and n_namesz includes the
@@ -262,7 +262,7 @@ bool ELFNote::Parse(const DataExtractor &data, lldb::offset_t *offset) {
   }
 
   const char *cstr = data.GetCStr(offset, llvm::alignTo(n_namesz, 4));
-  if (cstr == NULL) {
+  if (cstr == nullptr) {
     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_SYMBOLS));
     if (log)
       log->Printf("Failed to parse note name lacking nul terminator");
@@ -396,7 +396,7 @@ ObjectFile *ObjectFileELF::CreateInstance(const lldb::ModuleSP &module_sp,
       return objfile_up.release();
   }
 
-  return NULL;
+  return nullptr;
 }
 
 ObjectFile *ObjectFileELF::CreateMemoryInstance(
@@ -415,7 +415,7 @@ ObjectFile *ObjectFileELF::CreateMemoryInstance(
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 bool ObjectFileELF::MagicBytesMatch(DataBufferSP &data_sp,
@@ -1656,12 +1656,12 @@ size_t ObjectFileELF::ParseSectionHeaders() {
 const ObjectFileELF::ELFSectionHeaderInfo *
 ObjectFileELF::GetSectionHeaderByIndex(lldb::user_id_t id) {
   if (!ParseSectionHeaders())
-    return NULL;
+    return nullptr;
 
   if (id < m_section_headers.size())
     return &m_section_headers[id];
 
-  return NULL;
+  return nullptr;
 }
 
 lldb::user_id_t ObjectFileELF::GetSectionIndexByName(const char *name) {
@@ -2376,7 +2376,7 @@ size_t ObjectFileELF::ParseDynamicSymbols() {
 
 const ELFDynamic *ObjectFileELF::FindDynamicSymbol(unsigned tag) {
   if (!ParseDynamicSymbols())
-    return NULL;
+    return nullptr;
 
   DynamicSymbolCollIter I = m_dynamic_symbols.begin();
   DynamicSymbolCollIter E = m_dynamic_symbols.end();
@@ -2387,7 +2387,7 @@ const ELFDynamic *ObjectFileELF::FindDynamicSymbol(unsigned tag) {
       return symbol;
   }
 
-  return NULL;
+  return nullptr;
 }
 
 unsigned ObjectFileELF::PLTRelocationType() {
@@ -2604,7 +2604,7 @@ unsigned ObjectFileELF::ApplyRelocations(
     if (!rel.Parse(rel_data, &offset))
       break;
 
-    Symbol *symbol = NULL;
+    Symbol *symbol = nullptr;
 
     if (hdr->Is32Bit()) {
       switch (reloc_type(rel)) {
@@ -2723,7 +2723,7 @@ unsigned ObjectFileELF::RelocateDebugSections(const ELFSectionHeader *rel_hdr,
 Symtab *ObjectFileELF::GetSymtab() {
   ModuleSP module_sp(GetModule());
   if (!module_sp)
-    return NULL;
+    return nullptr;
 
   // We always want to use the main object file so we (hopefully) only have one
   // cached copy of our symtab, dynamic sections, etc.
@@ -2731,10 +2731,10 @@ Symtab *ObjectFileELF::GetSymtab() {
   if (module_obj_file && module_obj_file != this)
     return module_obj_file->GetSymtab();
 
-  if (m_symtab_up == NULL) {
+  if (m_symtab_up == nullptr) {
     SectionList *section_list = module_sp->GetSectionList();
     if (!section_list)
-      return NULL;
+      return nullptr;
 
     uint64_t symbol_id = 0;
     std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
@@ -2934,10 +2934,10 @@ void ObjectFileELF::Dump(Stream *s) {
   s->EOL();
   SectionList *section_list = GetSectionList();
   if (section_list)
-    section_list->Dump(s, NULL, true, UINT32_MAX);
+    section_list->Dump(s, nullptr, true, UINT32_MAX);
   Symtab *symtab = GetSymtab();
   if (symtab)
-    symtab->Dump(s, NULL, eSortOrderNone);
+    symtab->Dump(s, nullptr, eSortOrderNone);
   s->EOL();
   DumpDependentModules(s);
   s->EOL();
diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
index ad15f961736c1..7d7453c0a87ae 100644
--- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
+++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
@@ -66,7 +66,7 @@ ObjectFile *ObjectFileJIT::CreateInstance(const lldb::ModuleSP &module_sp,
                                           lldb::offset_t length) {
   // JIT'ed object file is backed by the ObjectFileJITDelegate, never read from
   // a file
-  return NULL;
+  return nullptr;
 }
 
 ObjectFile *ObjectFileJIT::CreateMemoryInstance(const lldb::ModuleSP &module_sp,
@@ -75,7 +75,7 @@ ObjectFile *ObjectFileJIT::CreateMemoryInstance(const lldb::ModuleSP &module_sp,
                                                 lldb::addr_t header_addr) {
   // JIT'ed object file is backed by the ObjectFileJITDelegate, never read from
   // memory
-  return NULL;
+  return nullptr;
 }
 
 size_t ObjectFileJIT::GetModuleSpecifications(
@@ -88,7 +88,7 @@ size_t ObjectFileJIT::GetModuleSpecifications(
 
 ObjectFileJIT::ObjectFileJIT(const lldb::ModuleSP &module_sp,
                              const ObjectFileJITDelegateSP &delegate_sp)
-    : ObjectFile(module_sp, NULL, 0, 0, DataBufferSP(), 0), m_delegate_wp() {
+    : ObjectFile(module_sp, nullptr, 0, 0, DataBufferSP(), 0), m_delegate_wp() {
   if (delegate_sp) {
     m_delegate_wp = delegate_sp;
     m_data.SetByteOrder(delegate_sp->GetByteOrder());
@@ -115,7 +115,7 @@ Symtab *ObjectFileJIT::GetSymtab() {
   ModuleSP module_sp(GetModule());
   if (module_sp) {
     std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
-    if (m_symtab_up == NULL) {
+    if (m_symtab_up == nullptr) {
       m_symtab_up.reset(new Symtab(this));
       std::lock_guard<std::recursive_mutex> symtab_guard(
           m_symtab_up->GetMutex());
@@ -159,10 +159,10 @@ void ObjectFileJIT::Dump(Stream *s) {
 
     SectionList *sections = GetSectionList();
     if (sections)
-      sections->Dump(s, NULL, true, UINT32_MAX);
+      sections->Dump(s, nullptr, true, UINT32_MAX);
 
     if (m_symtab_up)
-      m_symtab_up->Dump(s, NULL, eSortOrderNone);
+      m_symtab_up->Dump(s, nullptr, eSortOrderNone);
   }
 }
 
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index b6db8d77a2643..62991dc2095e8 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -173,7 +173,7 @@ class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
                               const char *alt_name, size_t reg_byte_size,
                               Stream &data) {
     const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
-    if (reg_info == NULL)
+    if (reg_info == nullptr)
       reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
     if (reg_info) {
       lldb_private::RegisterValue reg_value;
@@ -202,27 +202,27 @@ class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
 
       data.PutHex32(GPRRegSet); // Flavor
       data.PutHex32(GPRWordCount);
-      WriteRegister(reg_ctx, "rax", NULL, 8, data);
-      WriteRegister(reg_ctx, "rbx", NULL, 8, data);
-      WriteRegister(reg_ctx, "rcx", NULL, 8, data);
-      WriteRegister(reg_ctx, "rdx", NULL, 8, data);
-      WriteRegister(reg_ctx, "rdi", NULL, 8, data);
-      WriteRegister(reg_ctx, "rsi", NULL, 8, data);
-      WriteRegister(reg_ctx, "rbp", NULL, 8, data);
-      WriteRegister(reg_ctx, "rsp", NULL, 8, data);
-      WriteRegister(reg_ctx, "r8", NULL, 8, data);
-      WriteRegister(reg_ctx, "r9", NULL, 8, data);
-      WriteRegister(reg_ctx, "r10", NULL, 8, data);
-      WriteRegister(reg_ctx, "r11", NULL, 8, data);
-      WriteRegister(reg_ctx, "r12", NULL, 8, data);
-      WriteRegister(reg_ctx, "r13", NULL, 8, data);
-      WriteRegister(reg_ctx, "r14", NULL, 8, data);
-      WriteRegister(reg_ctx, "r15", NULL, 8, data);
-      WriteRegister(reg_ctx, "rip", NULL, 8, data);
-      WriteRegister(reg_ctx, "rflags", NULL, 8, data);
-      WriteRegister(reg_ctx, "cs", NULL, 8, data);
-      WriteRegister(reg_ctx, "fs", NULL, 8, data);
-      WriteRegister(reg_ctx, "gs", NULL, 8, data);
+      WriteRegister(reg_ctx, "rax", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rbx", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rcx", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rdx", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rdi", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rsi", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rbp", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rsp", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r8", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r9", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r10", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r11", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r12", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r13", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r14", nullptr, 8, data);
+      WriteRegister(reg_ctx, "r15", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rip", nullptr, 8, data);
+      WriteRegister(reg_ctx, "rflags", nullptr, 8, data);
+      WriteRegister(reg_ctx, "cs", nullptr, 8, data);
+      WriteRegister(reg_ctx, "fs", nullptr, 8, data);
+      WriteRegister(reg_ctx, "gs", nullptr, 8, data);
 
       //            // Write out the FPU registers
       //            const size_t fpu_byte_size = sizeof(FPU);
@@ -311,9 +311,9 @@ class RegisterContextDarwin_x86_64_Mach : public RegisterContextDarwin_x86_64 {
       // Write out the EXC registers
       data.PutHex32(EXCRegSet);
       data.PutHex32(EXCWordCount);
-      WriteRegister(reg_ctx, "trapno", NULL, 4, data);
-      WriteRegister(reg_ctx, "err", NULL, 4, data);
-      WriteRegister(reg_ctx, "faultvaddr", NULL, 8, data);
+      WriteRegister(reg_ctx, "trapno", nullptr, 4, data);
+      WriteRegister(reg_ctx, "err", nullptr, 4, data);
+      WriteRegister(reg_ctx, "faultvaddr", nullptr, 8, data);
       return true;
     }
     return false;
@@ -404,7 +404,7 @@ class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 {
                               const char *alt_name, size_t reg_byte_size,
                               Stream &data) {
     const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
-    if (reg_info == NULL)
+    if (reg_info == nullptr)
       reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
     if (reg_info) {
       lldb_private::RegisterValue reg_value;
@@ -433,29 +433,29 @@ class RegisterContextDarwin_i386_Mach : public RegisterContextDarwin_i386 {
 
       data.PutHex32(GPRRegSet); // Flavor
       data.PutHex32(GPRWordCount);
-      WriteRegister(reg_ctx, "eax", NULL, 4, data);
-      WriteRegister(reg_ctx, "ebx", NULL, 4, data);
-      WriteRegister(reg_ctx, "ecx", NULL, 4, data);
-      WriteRegister(reg_ctx, "edx", NULL, 4, data);
-      WriteRegister(reg_ctx, "edi", NULL, 4, data);
-      WriteRegister(reg_ctx, "esi", NULL, 4, data);
-      WriteRegister(reg_ctx, "ebp", NULL, 4, data);
-      WriteRegister(reg_ctx, "esp", NULL, 4, data);
-      WriteRegister(reg_ctx, "ss", NULL, 4, data);
-      WriteRegister(reg_ctx, "eflags", NULL, 4, data);
-      WriteRegister(reg_ctx, "eip", NULL, 4, data);
-      WriteRegister(reg_ctx, "cs", NULL, 4, data);
-      WriteRegister(reg_ctx, "ds", NULL, 4, data);
-      WriteRegister(reg_ctx, "es", NULL, 4, data);
-      WriteRegister(reg_ctx, "fs", NULL, 4, data);
-      WriteRegister(reg_ctx, "gs", NULL, 4, data);
+      WriteRegister(reg_ctx, "eax", nullptr, 4, data);
+      WriteRegister(reg_ctx, "ebx", nullptr, 4, data);
+      WriteRegister(reg_ctx, "ecx", nullptr, 4, data);
+      WriteRegister(reg_ctx, "edx", nullptr, 4, data);
+      WriteRegister(reg_ctx, "edi", nullptr, 4, data);
+      WriteRegister(reg_ctx, "esi", nullptr, 4, data);
+      WriteRegister(reg_ctx, "ebp", nullptr, 4, data);
+      WriteRegister(reg_ctx, "esp", nullptr, 4, data);
+      WriteRegister(reg_ctx, "ss", nullptr, 4, data);
+      WriteRegister(reg_ctx, "eflags", nullptr, 4, data);
+      WriteRegister(reg_ctx, "eip", nullptr, 4, data);
+      WriteRegister(reg_ctx, "cs", nullptr, 4, data);
+      WriteRegister(reg_ctx, "ds", nullptr, 4, data);
+      WriteRegister(reg_ctx, "es", nullptr, 4, data);
+      WriteRegister(reg_ctx, "fs", nullptr, 4, data);
+      WriteRegister(reg_ctx, "gs", nullptr, 4, data);
 
       // Write out the EXC registers
       data.PutHex32(EXCRegSet);
       data.PutHex32(EXCWordCount);
-      WriteRegister(reg_ctx, "trapno", NULL, 4, data);
-      WriteRegister(reg_ctx, "err", NULL, 4, data);
-      WriteRegister(reg_ctx, "faultvaddr", NULL, 4, data);
+      WriteRegister(reg_ctx, "trapno", nullptr, 4, data);
+      WriteRegister(reg_ctx, "err", nullptr, 4, data);
+      WriteRegister(reg_ctx, "faultvaddr", nullptr, 4, data);
       return true;
     }
     return false;
@@ -555,7 +555,7 @@ class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm {
                               const char *alt_name, size_t reg_byte_size,
                               Stream &data) {
     const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
-    if (reg_info == NULL)
+    if (reg_info == nullptr)
       reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
     if (reg_info) {
       lldb_private::RegisterValue reg_value;
@@ -584,23 +584,23 @@ class RegisterContextDarwin_arm_Mach : public RegisterContextDarwin_arm {
 
       data.PutHex32(GPRRegSet); // Flavor
       data.PutHex32(GPRWordCount);
-      WriteRegister(reg_ctx, "r0", NULL, 4, data);
-      WriteRegister(reg_ctx, "r1", NULL, 4, data);
-      WriteRegister(reg_ctx, "r2", NULL, 4, data);
-      WriteRegister(reg_ctx, "r3", NULL, 4, data);
-      WriteRegister(reg_ctx, "r4", NULL, 4, data);
-      WriteRegister(reg_ctx, "r5", NULL, 4, data);
-      WriteRegister(reg_ctx, "r6", NULL, 4, data);
-      WriteRegister(reg_ctx, "r7", NULL, 4, data);
-      WriteRegister(reg_ctx, "r8", NULL, 4, data);
-      WriteRegister(reg_ctx, "r9", NULL, 4, data);
-      WriteRegister(reg_ctx, "r10", NULL, 4, data);
-      WriteRegister(reg_ctx, "r11", NULL, 4, data);
-      WriteRegister(reg_ctx, "r12", NULL, 4, data);
-      WriteRegister(reg_ctx, "sp", NULL, 4, data);
-      WriteRegister(reg_ctx, "lr", NULL, 4, data);
-      WriteRegister(reg_ctx, "pc", NULL, 4, data);
-      WriteRegister(reg_ctx, "cpsr", NULL, 4, data);
+      WriteRegister(reg_ctx, "r0", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r1", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r2", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r3", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r4", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r5", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r6", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r7", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r8", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r9", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r10", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r11", nullptr, 4, data);
+      WriteRegister(reg_ctx, "r12", nullptr, 4, data);
+      WriteRegister(reg_ctx, "sp", nullptr, 4, data);
+      WriteRegister(reg_ctx, "lr", nullptr, 4, data);
+      WriteRegister(reg_ctx, "pc", nullptr, 4, data);
+      WriteRegister(reg_ctx, "cpsr", nullptr, 4, data);
 
       // Write out the EXC registers
       //            data.PutHex32 (EXCRegSet);
@@ -710,7 +710,7 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 {
                               const char *alt_name, size_t reg_byte_size,
                               Stream &data) {
     const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
-    if (reg_info == NULL)
+    if (reg_info == nullptr)
       reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
     if (reg_info) {
       lldb_private::RegisterValue reg_value;
@@ -739,40 +739,40 @@ class RegisterContextDarwin_arm64_Mach : public RegisterContextDarwin_arm64 {
 
       data.PutHex32(GPRRegSet); // Flavor
       data.PutHex32(GPRWordCount);
-      WriteRegister(reg_ctx, "x0", NULL, 8, data);
-      WriteRegister(reg_ctx, "x1", NULL, 8, data);
-      WriteRegister(reg_ctx, "x2", NULL, 8, data);
-      WriteRegister(reg_ctx, "x3", NULL, 8, data);
-      WriteRegister(reg_ctx, "x4", NULL, 8, data);
-      WriteRegister(reg_ctx, "x5", NULL, 8, data);
-      WriteRegister(reg_ctx, "x6", NULL, 8, data);
-      WriteRegister(reg_ctx, "x7", NULL, 8, data);
-      WriteRegister(reg_ctx, "x8", NULL, 8, data);
-      WriteRegister(reg_ctx, "x9", NULL, 8, data);
-      WriteRegister(reg_ctx, "x10", NULL, 8, data);
-      WriteRegister(reg_ctx, "x11", NULL, 8, data);
-      WriteRegister(reg_ctx, "x12", NULL, 8, data);
-      WriteRegister(reg_ctx, "x13", NULL, 8, data);
-      WriteRegister(reg_ctx, "x14", NULL, 8, data);
-      WriteRegister(reg_ctx, "x15", NULL, 8, data);
-      WriteRegister(reg_ctx, "x16", NULL, 8, data);
-      WriteRegister(reg_ctx, "x17", NULL, 8, data);
-      WriteRegister(reg_ctx, "x18", NULL, 8, data);
-      WriteRegister(reg_ctx, "x19", NULL, 8, data);
-      WriteRegister(reg_ctx, "x20", NULL, 8, data);
-      WriteRegister(reg_ctx, "x21", NULL, 8, data);
-      WriteRegister(reg_ctx, "x22", NULL, 8, data);
-      WriteRegister(reg_ctx, "x23", NULL, 8, data);
-      WriteRegister(reg_ctx, "x24", NULL, 8, data);
-      WriteRegister(reg_ctx, "x25", NULL, 8, data);
-      WriteRegister(reg_ctx, "x26", NULL, 8, data);
-      WriteRegister(reg_ctx, "x27", NULL, 8, data);
-      WriteRegister(reg_ctx, "x28", NULL, 8, data);
-      WriteRegister(reg_ctx, "fp", NULL, 8, data);
-      WriteRegister(reg_ctx, "lr", NULL, 8, data);
-      WriteRegister(reg_ctx, "sp", NULL, 8, data);
-      WriteRegister(reg_ctx, "pc", NULL, 8, data);
-      WriteRegister(reg_ctx, "cpsr", NULL, 4, data);
+      WriteRegister(reg_ctx, "x0", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x1", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x2", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x3", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x4", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x5", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x6", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x7", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x8", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x9", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x10", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x11", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x12", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x13", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x14", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x15", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x16", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x17", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x18", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x19", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x20", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x21", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x22", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x23", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x24", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x25", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x26", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x27", nullptr, 8, data);
+      WriteRegister(reg_ctx, "x28", nullptr, 8, data);
+      WriteRegister(reg_ctx, "fp", nullptr, 8, data);
+      WriteRegister(reg_ctx, "lr", nullptr, 8, data);
+      WriteRegister(reg_ctx, "sp", nullptr, 8, data);
+      WriteRegister(reg_ctx, "pc", nullptr, 8, data);
+      WriteRegister(reg_ctx, "cpsr", nullptr, 4, data);
 
       // Write out the EXC registers
       //            data.PutHex32 (EXCRegSet);
@@ -889,7 +889,7 @@ ObjectFile *ObjectFileMachO::CreateMemoryInstance(
     if (objfile_up.get() && objfile_up->ParseHeader())
       return objfile_up.release();
   }
-  return NULL;
+  return nullptr;
 }
 
 size_t ObjectFileMachO::GetModuleSpecifications(
@@ -1312,7 +1312,7 @@ Symtab *ObjectFileMachO::GetSymtab() {
   ModuleSP module_sp(GetModule());
   if (module_sp) {
     std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
-    if (m_symtab_up == NULL) {
+    if (m_symtab_up == nullptr) {
       m_symtab_up.reset(new Symtab(this));
       std::lock_guard<std::recursive_mutex> symtab_guard(
           m_symtab_up->GetMutex());
@@ -1332,14 +1332,14 @@ bool ObjectFileMachO::IsStripped() {
         const lldb::offset_t load_cmd_offset = offset;
 
         load_command lc;
-        if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
+        if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
           break;
         if (lc.cmd == LC_DYSYMTAB) {
           m_dysymtab.cmd = lc.cmd;
           m_dysymtab.cmdsize = lc.cmdsize;
           if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
                             (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
-              NULL) {
+              nullptr) {
             // Clear m_dysymtab if we were unable to read all items from the
             // load command
             ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
@@ -1361,7 +1361,7 @@ ObjectFileMachO::EncryptedFileRanges ObjectFileMachO::GetEncryptedFileRanges() {
   encryption_info_command encryption_cmd;
   for (uint32_t i = 0; i < m_header.ncmds; ++i) {
     const lldb::offset_t load_cmd_offset = offset;
-    if (m_data.GetU32(&offset, &encryption_cmd, 2) == NULL)
+    if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
       break;
 
     // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
@@ -1713,15 +1713,15 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_,
   for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
        ++segment_sect_idx) {
     if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
-                     sizeof(sect64.sectname)) == NULL)
+                     sizeof(sect64.sectname)) == nullptr)
       break;
     if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
-                     sizeof(sect64.segname)) == NULL)
+                     sizeof(sect64.segname)) == nullptr)
       break;
     sect64.addr = m_data.GetAddress(&offset);
     sect64.size = m_data.GetAddress(&offset);
 
-    if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == NULL)
+    if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
       break;
 
     // Keep a list of mach sections around in case we need to get at data that
@@ -1830,7 +1830,7 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_,
       bool section_is_encrypted = false;
       if (!segment_is_encrypted && load_cmd.filesize != 0)
         section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
-                                   sect64.offset) != NULL;
+                                   sect64.offset) != nullptr;
 
       section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
       section_sp->SetPermissions(segment_permissions);
@@ -1856,7 +1856,7 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_,
 
         if (curr_section_sp.get()) {
           if (curr_section_sp->GetByteSize() == 0) {
-            if (next_section_sp.get() != NULL)
+            if (next_section_sp.get() != nullptr)
               curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
                                            curr_section_sp->GetFileAddress());
             else
@@ -1892,7 +1892,7 @@ void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
   struct load_command load_cmd;
   for (uint32_t i = 0; i < m_header.ncmds; ++i) {
     const lldb::offset_t load_cmd_offset = offset;
-    if (m_data.GetU32(&offset, &load_cmd, 2) == NULL)
+    if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
       break;
 
     if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
@@ -2021,7 +2021,7 @@ static bool ParseTrieEntries(DataExtractor &data, lldb::offset_t offset,
   if (terminalSize != 0) {
     TrieEntryWithOffset e(offset);
     e.entry.flags = data.GetULEB128(&offset);
-    const char *import_name = NULL;
+    const char *import_name = nullptr;
     if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
       e.entry.address = 0;
       e.entry.other = data.GetULEB128(&offset); // dylib ordinal
@@ -2129,7 +2129,7 @@ size_t ObjectFileMachO::ParseSymtab() {
     const lldb::offset_t cmd_offset = offset;
     // Read in the load command and load command size
     struct load_command lc;
-    if (m_data.GetU32(&offset, &lc, 2) == NULL)
+    if (m_data.GetU32(&offset, &lc, 2) == nullptr)
       break;
     // Watch for the symbol table load command
     switch (lc.cmd) {
@@ -2138,7 +2138,7 @@ size_t ObjectFileMachO::ParseSymtab() {
       symtab_load_command.cmdsize = lc.cmdsize;
       // Read in the rest of the symtab load command
       if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
-          0) // fill in symoff, nsyms, stroff, strsize fields
+          nullptr) // fill in symoff, nsyms, stroff, strsize fields
         return 0;
       if (symtab_load_command.symoff == 0) {
         if (log)
@@ -2201,7 +2201,7 @@ size_t ObjectFileMachO::ParseSymtab() {
       function_starts_load_command.cmd = lc.cmd;
       function_starts_load_command.cmdsize = lc.cmdsize;
       if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
-          NULL) // fill in symoff, nsyms, stroff, strsize fields
+          nullptr) // fill in symoff, nsyms, stroff, strsize fields
         memset(&function_starts_load_command, 0,
                sizeof(function_starts_load_command));
       break;
@@ -2215,7 +2215,7 @@ size_t ObjectFileMachO::ParseSymtab() {
   if (symtab_load_command.cmd) {
     Symtab *symtab = m_symtab_up.get();
     SectionList *section_list = GetSectionList();
-    if (section_list == NULL)
+    if (section_list == nullptr)
       return 0;
 
     const uint32_t addr_byte_size = m_data.GetAddressByteSize();
@@ -2224,12 +2224,12 @@ size_t ObjectFileMachO::ParseSymtab() {
     const size_t nlist_byte_size =
         bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
 
-    DataExtractor nlist_data(NULL, 0, byte_order, addr_byte_size);
-    DataExtractor strtab_data(NULL, 0, byte_order, addr_byte_size);
-    DataExtractor function_starts_data(NULL, 0, byte_order, addr_byte_size);
-    DataExtractor indirect_symbol_index_data(NULL, 0, byte_order,
+    DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
+    DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
+    DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
+    DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
                                              addr_byte_size);
-    DataExtractor dyld_trie_data(NULL, 0, byte_order, addr_byte_size);
+    DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
 
     const addr_t nlist_data_byte_size =
         symtab_load_command.nsyms * nlist_byte_size;
@@ -2550,10 +2550,10 @@ size_t ObjectFileMachO::ParseSymtab() {
     // so we know
     NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
     uint32_t nlist_idx = 0;
-    Symbol *symbol_ptr = NULL;
+    Symbol *symbol_ptr = nullptr;
 
     uint32_t sym_idx = 0;
-    Symbol *sym = NULL;
+    Symbol *sym = nullptr;
     size_t num_syms = 0;
     std::string memory_symbol_name;
     uint32_t unmapped_local_symbols_found = 0;
@@ -3749,7 +3749,7 @@ size_t ObjectFileMachO::ParseSymtab() {
 
       // If the sym array was not created while parsing the DSC unmapped
       // symbols, create it now.
-      if (sym == NULL) {
+      if (sym == nullptr) {
         sym = symtab->Resize(symtab_load_command.nsyms +
                              m_dysymtab.nindirectsyms);
         num_syms = symtab->GetNumSymbols();
@@ -3780,12 +3780,12 @@ size_t ObjectFileMachO::ParseSymtab() {
         nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
 
         SymbolType type = eSymbolTypeInvalid;
-        const char *symbol_name = NULL;
+        const char *symbol_name = nullptr;
 
         if (have_strtab_data) {
           symbol_name = strtab_data.PeekCStr(nlist.n_strx);
 
-          if (symbol_name == NULL) {
+          if (symbol_name == nullptr) {
             // No symbol should be NULL, even the symbols with no string values
             // should have an offset zero which points to an empty C-string
             Host::SystemLog(Host::eSystemLogError,
@@ -3796,7 +3796,7 @@ size_t ObjectFileMachO::ParseSymtab() {
             continue;
           }
           if (symbol_name[0] == '\0')
-            symbol_name = NULL;
+            symbol_name = nullptr;
         } else {
           const addr_t str_addr = strtab_addr + nlist.n_strx;
           Status str_error;
@@ -3804,7 +3804,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                                              str_error))
             symbol_name = memory_symbol_name.c_str();
         }
-        const char *symbol_name_non_abi_mangled = NULL;
+        const char *symbol_name_non_abi_mangled = nullptr;
 
         SectionSP symbol_section;
         lldb::addr_t symbol_byte_size = 0;
@@ -3957,7 +3957,7 @@ size_t ObjectFileMachO::ParseSymtab() {
           case N_SO:
             // source file name
             type = eSymbolTypeSourceFile;
-            if (symbol_name == NULL) {
+            if (symbol_name == nullptr) {
               add_nlist = false;
               if (N_SO_index != UINT32_MAX) {
                 // Set the size of the N_SO to the terminating index of this
@@ -4426,7 +4426,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                 if (func_start_entry->addr != symbol_lookup_file_addr &&
                     func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
                   // Not the right entry, NULL it out...
-                  func_start_entry = NULL;
+                  func_start_entry = nullptr;
                 }
               }
               if (func_start_entry) {
@@ -4710,7 +4710,7 @@ size_t ObjectFileMachO::ParseSymtab() {
 
                 NListIndexToSymbolIndexMap::const_iterator index_pos =
                     m_nlist_idx_to_sym_idx.find(stub_sym_id);
-                Symbol *stub_symbol = NULL;
+                Symbol *stub_symbol = nullptr;
                 if (index_pos != end_index_pos) {
                   // We have a remapping from the original nlist index to a
                   // current symbol index, so just look this up by index
@@ -4742,7 +4742,7 @@ size_t ObjectFileMachO::ParseSymtab() {
                     Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
                     if (sym_idx >= num_syms) {
                       sym = symtab->Resize(++num_syms);
-                      stub_symbol = NULL; // this pointer no longer valid
+                      stub_symbol = nullptr; // this pointer no longer valid
                     }
                     sym[sym_idx].SetID(synthetic_sym_id++);
                     sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
@@ -4828,10 +4828,10 @@ void ObjectFileMachO::Dump(Stream *s) {
 
     SectionList *sections = GetSectionList();
     if (sections)
-      sections->Dump(s, NULL, true, UINT32_MAX);
+      sections->Dump(s, nullptr, true, UINT32_MAX);
 
     if (m_symtab_up)
-      m_symtab_up->Dump(s, NULL, eSortOrderNone);
+      m_symtab_up->Dump(s, nullptr, eSortOrderNone);
   }
 }
 
@@ -4844,7 +4844,7 @@ UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
   lldb::offset_t offset = lc_offset;
   for (i = 0; i < header.ncmds; ++i) {
     const lldb::offset_t cmd_offset = offset;
-    if (data.GetU32(&offset, &load_cmd, 2) == NULL)
+    if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
       break;
 
     if (load_cmd.cmd == LC_UUID) {
@@ -4982,7 +4982,7 @@ ObjectFileMachO::GetArchitecture(const llvm::MachO::mach_header &header,
       lldb::offset_t offset = lc_offset;
       for (uint32_t i = 0; i < header.ncmds; ++i) {
         const lldb::offset_t cmd_offset = offset;
-        if (data.GetU32(&offset, &load_cmd, 2) == NULL)
+        if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
           break;
 
         struct version_min_command version_min;
@@ -5015,7 +5015,7 @@ ObjectFileMachO::GetArchitecture(const llvm::MachO::mach_header &header,
       offset = lc_offset;
       for (uint32_t i = 0; i < header.ncmds; ++i) {
         const lldb::offset_t cmd_offset = offset;
-        if (data.GetU32(&offset, &load_cmd, 2) == NULL)
+        if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
           break;
         do {
           if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
@@ -5077,7 +5077,7 @@ uint32_t ObjectFileMachO::GetDependentModules(FileSpecList &files) {
     uint32_t i;
     for (i = 0; i < m_header.ncmds; ++i) {
       const uint32_t cmd_offset = offset;
-      if (m_data.GetU32(&offset, &load_cmd, 2) == NULL)
+      if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
         break;
 
       switch (load_cmd.cmd) {
@@ -5211,7 +5211,7 @@ lldb_private::Address ObjectFileMachO::GetEntryPointAddress() {
 
     for (i = 0; i < m_header.ncmds; ++i) {
       const lldb::offset_t cmd_offset = offset;
-      if (m_data.GetU32(&offset, &load_cmd, 2) == NULL)
+      if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
         break;
 
       switch (load_cmd.cmd) {
@@ -5350,7 +5350,7 @@ uint32_t ObjectFileMachO::GetNumThreadContexts() {
       thread_command thread_cmd;
       for (uint32_t i = 0; i < m_header.ncmds; ++i) {
         const uint32_t cmd_offset = offset;
-        if (m_data.GetU32(&offset, &thread_cmd, 2) == NULL)
+        if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
           break;
 
         if (thread_cmd.cmd == LC_THREAD) {
@@ -5377,8 +5377,8 @@ std::string ObjectFileMachO::GetIdentifierString() {
     for (uint32_t i = 0; i < m_header.ncmds; ++i) {
       const uint32_t cmd_offset = offset;
       load_command lc;
-      if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
-          break;
+      if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
+        break;
       if (lc.cmd == LC_NOTE)
       {
           char data_owner[17];
@@ -5422,7 +5422,7 @@ std::string ObjectFileMachO::GetIdentifierString() {
     for (uint32_t i = 0; i < m_header.ncmds; ++i) {
       const uint32_t cmd_offset = offset;
       struct ident_command ident_command;
-      if (m_data.GetU32(&offset, &ident_command, 2) == NULL)
+      if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
         break;
       if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
         char *buf = (char *) malloc (ident_command.cmdsize);
@@ -5451,8 +5451,8 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo (addr_t &address, UUID &uuid) {
     for (uint32_t i = 0; i < m_header.ncmds; ++i) {
       const uint32_t cmd_offset = offset;
       load_command lc;
-      if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
-          break;
+      if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
+        break;
       if (lc.cmd == LC_NOTE)
       {
           char data_owner[17];
@@ -5649,13 +5649,13 @@ llvm::VersionTuple ObjectFileMachO::GetVersion() {
     uint32_t i;
     for (i = 0; i < m_header.ncmds; ++i) {
       const lldb::offset_t cmd_offset = offset;
-      if (m_data.GetU32(&offset, &load_cmd, 2) == NULL)
+      if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
         break;
 
       if (load_cmd.cmd == LC_ID_DYLIB) {
         if (version_cmd == 0) {
           version_cmd = load_cmd.cmd;
-          if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == NULL)
+          if (m_data.GetU32(&offset, &load_cmd.dylib, 4) == nullptr)
             break;
           version = load_cmd.dylib.current_version;
         }
@@ -5795,7 +5795,7 @@ llvm::VersionTuple ObjectFileMachO::GetMinimumOSVersion() {
       const lldb::offset_t load_cmd_offset = offset;
 
       version_min_command lc;
-      if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
+      if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
         break;
       if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX ||
           lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS ||
@@ -5855,7 +5855,7 @@ uint32_t ObjectFileMachO::GetSDKVersion(uint32_t *versions,
       const lldb::offset_t load_cmd_offset = offset;
 
       version_min_command lc;
-      if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
+      if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
         break;
       if (lc.cmd == llvm::MachO::LC_VERSION_MIN_MACOSX ||
           lc.cmd == llvm::MachO::LC_VERSION_MIN_IPHONEOS ||
@@ -5886,7 +5886,7 @@ uint32_t ObjectFileMachO::GetSDKVersion(uint32_t *versions,
         const lldb::offset_t load_cmd_offset = offset;
 
         version_min_command lc;
-        if (m_data.GetU32(&offset, &lc.cmd, 2) == NULL)
+        if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
           break;
         if (lc.cmd == llvm::MachO::LC_BUILD_VERSION) {
           // struct build_version_command {
@@ -5932,7 +5932,7 @@ uint32_t ObjectFileMachO::GetSDKVersion(uint32_t *versions,
   // the sentinel value indicating that this object file
   // does not have a valid minimum os version #.
   if (m_sdk_versions.size() > 1) {
-    if (versions != NULL && num_versions > 0) {
+    if (versions != nullptr && num_versions > 0) {
       for (size_t i = 0; i < num_versions; ++i) {
         if (i < m_sdk_versions.size())
           versions[i] = m_sdk_versions[i];
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index 2bd75a4fadf95..fab379037fc05 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -596,7 +596,7 @@ Symtab *ObjectFilePECOFF::GetSymtab() {
   ModuleSP module_sp(GetModule());
   if (module_sp) {
     std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
-    if (m_symtab_up == NULL) {
+    if (m_symtab_up == nullptr) {
       SectionList *sect_list = GetSectionList();
       m_symtab_up.reset(new Symtab(this));
       std::lock_guard<std::recursive_mutex> guard(m_symtab_up->GetMutex());
@@ -627,7 +627,7 @@ Symtab *ObjectFilePECOFF::GetSymtab() {
           for (uint32_t i = 0; i < num_syms; ++i) {
             coff_symbol_t symbol;
             const uint32_t symbol_offset = offset;
-            const char *symbol_name_cstr = NULL;
+            const char *symbol_name_cstr = nullptr;
             // If the first 4 bytes of the symbol string are zero, then they
             // are followed by a 4-byte string table offset. Else these
             // 8 bytes contain the symbol name
@@ -642,7 +642,7 @@ Symtab *ObjectFilePECOFF::GetSymtab() {
               // bytes
               offset += sizeof(symbol.name) - 4; // Skip remaining
               symbol_name_cstr = symtab_data.PeekCStr(symbol_offset);
-              if (symbol_name_cstr == NULL)
+              if (symbol_name_cstr == nullptr)
                 break;
               symbol_name.assign(symbol_name_cstr, sizeof(symbol.name));
             }
@@ -1006,10 +1006,10 @@ void ObjectFilePECOFF::Dump(Stream *s) {
 
     SectionList *sections = GetSectionList();
     if (sections)
-      sections->Dump(s, NULL, true, UINT32_MAX);
+      sections->Dump(s, nullptr, true, UINT32_MAX);
 
     if (m_symtab_up)
-      m_symtab_up->Dump(s, NULL, eSortOrderNone);
+      m_symtab_up->Dump(s, nullptr, eSortOrderNone);
 
     if (m_dos_header.e_magic)
       DumpDOSHeader(s, m_dos_header);
diff --git a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
index 874ca85fb4374..c1fe0cc8dddaf 100644
--- a/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
+++ b/lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp
@@ -59,7 +59,7 @@ OperatingSystem *OperatingSystemPython::CreateInstance(Process *process,
     if (os_up.get() && os_up->IsValid())
       return os_up.release();
   }
-  return NULL;
+  return nullptr;
 }
 
 ConstString OperatingSystemPython::GetPluginNameStatic() {
@@ -75,7 +75,7 @@ const char *OperatingSystemPython::GetPluginDescriptionStatic() {
 OperatingSystemPython::OperatingSystemPython(lldb_private::Process *process,
                                              const FileSpec &python_module_path)
     : OperatingSystem(process), m_thread_list_valobj_sp(), m_register_info_up(),
-      m_interpreter(NULL), m_python_object_sp() {
+      m_interpreter(nullptr), m_python_object_sp() {
   if (!process)
     return;
   TargetSP target_sp = process->CalculateTarget();
@@ -115,9 +115,9 @@ OperatingSystemPython::OperatingSystemPython(lldb_private::Process *process,
 OperatingSystemPython::~OperatingSystemPython() {}
 
 DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() {
-  if (m_register_info_up == NULL) {
+  if (m_register_info_up == nullptr) {
     if (!m_interpreter || !m_python_object_sp)
-      return NULL;
+      return nullptr;
     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OS));
 
     if (log)
@@ -128,7 +128,7 @@ DynamicRegisterInfo *OperatingSystemPython::GetDynamicRegisterInfo() {
     StructuredData::DictionarySP dictionary =
         m_interpreter->OSPlugin_RegisterInfo(m_python_object_sp);
     if (!dictionary)
-      return NULL;
+      return nullptr;
 
     m_register_info_up.reset(new DynamicRegisterInfo(
         *dictionary, m_process->GetTarget().GetArchitecture()));
@@ -198,9 +198,9 @@ bool OperatingSystemPython::UpdateThreadList(ThreadList &old_thread_list,
       StructuredData::ObjectSP thread_dict_obj =
           threads_list->GetItemAtIndex(i);
       if (auto thread_dict = thread_dict_obj->GetAsDictionary()) {
-        ThreadSP thread_sp(
-            CreateThreadFromThreadInfo(*thread_dict, core_thread_list,
-                                       old_thread_list, core_used_map, NULL));
+        ThreadSP thread_sp(CreateThreadFromThreadInfo(
+            *thread_dict, core_thread_list, old_thread_list, core_used_map,
+            nullptr));
         if (thread_sp)
           new_thread_list.AddThread(thread_sp);
       }
diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
index 7d8f43a697acd..946f0ea3a5cfe 100644
--- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
+++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
@@ -262,7 +262,7 @@ lldb::ProcessSP PlatformFreeBSD::Attach(ProcessAttachInfo &attach_info,
                                         Status &error) {
   lldb::ProcessSP process_sp;
   if (IsHost()) {
-    if (target == NULL) {
+    if (target == nullptr) {
       TargetSP new_target_sp;
       ArchSpec emptyArchSpec;
 
@@ -279,7 +279,7 @@ lldb::ProcessSP PlatformFreeBSD::Attach(ProcessAttachInfo &attach_info,
       // even when debugging locally we are debugging remotely! Just like the
       // darwin plugin.
       process_sp = target->CreateProcess(
-          attach_info.GetListenerForProcess(debugger), "gdb-remote", NULL);
+          attach_info.GetListenerForProcess(debugger), "gdb-remote", nullptr);
 
       if (process_sp)
         error = process_sp->Attach(attach_info);
diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
index a4d381eafe95b..d0ad2f34d11d8 100644
--- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
+++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
@@ -237,7 +237,7 @@ PlatformLinux::GetResumeCountForLaunchInfo(ProcessLaunchInfo &launch_info) {
 
   // Figure out what shell we're planning on using.
   const char *shell_name = strrchr(shell_string.c_str(), '/');
-  if (shell_name == NULL)
+  if (shell_name == nullptr)
     shell_name = shell_string.c_str();
   else
     shell_name++;
@@ -372,7 +372,7 @@ PlatformLinux::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
     // Handle the hijacking of process events.
     if (listener_sp) {
       const StateType state = process_sp->WaitForProcessToStop(
-          llvm::None, NULL, false, listener_sp);
+          llvm::None, nullptr, false, listener_sp);
 
       LLDB_LOG(log, "pid {0} state {0}", process_sp->GetID(), state);
     }
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index a4b17a93d7d50..955987121d62b 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -374,7 +374,7 @@ Status PlatformDarwin::GetSharedModule(
           new_module_spec.GetFileSpec() = bundle_directory;
           if (Host::ResolveExecutableInBundle(new_module_spec.GetFileSpec())) {
             Status new_error(Platform::GetSharedModule(
-                new_module_spec, process, module_sp, NULL, old_module_sp_ptr,
+                new_module_spec, process, module_sp, nullptr, old_module_sp_ptr,
                 did_create_ptr));
 
             if (module_sp)
@@ -401,7 +401,7 @@ Status PlatformDarwin::GetSharedModule(
                 ModuleSpec new_module_spec(module_spec);
                 new_module_spec.GetFileSpec() = new_file_spec;
                 Status new_error(Platform::GetSharedModule(
-                    new_module_spec, process, module_sp, NULL,
+                    new_module_spec, process, module_sp, nullptr,
                     old_module_sp_ptr, did_create_ptr));
 
                 if (module_sp) {
@@ -1171,7 +1171,7 @@ const char *PlatformDarwin::GetDeveloperDirectory() {
         std::string command_output;
         Status error =
             Host::RunShellCommand("/usr/bin/xcode-select --print-path",
-                                  NULL, // current working directory
+                                  nullptr, // current working directory
                                   &exit_status, &signo, &command_output,
                                   std::chrono::seconds(2), // short timeout
                                   false); // don't run in a shell
@@ -1212,7 +1212,7 @@ const char *PlatformDarwin::GetDeveloperDirectory() {
   assert(m_developer_directory.empty() == false);
   if (m_developer_directory[0])
     return m_developer_directory.c_str();
-  return NULL;
+  return nullptr;
 }
 
 BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
@@ -1233,7 +1233,7 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
   bool internal = true;
   bool hardware = false;
   LazyBool skip_prologue = eLazyBoolNo;
-  bp_sp = target.CreateBreakpoint(&bp_modules, NULL, g_bp_names,
+  bp_sp = target.CreateBreakpoint(&bp_modules, nullptr, g_bp_names,
                                   llvm::array_lengthof(g_bp_names),
                                   eFunctionNameTypeFull, eLanguageTypeUnknown,
                                   0, skip_prologue, internal, hardware);
@@ -1250,7 +1250,7 @@ PlatformDarwin::GetResumeCountForLaunchInfo(ProcessLaunchInfo &launch_info) {
 
   std::string shell_string = shell.GetPath();
   const char *shell_name = strrchr(shell_string.c_str(), '/');
-  if (shell_name == NULL)
+  if (shell_name == nullptr)
     shell_name = shell_string.c_str();
   else
     shell_name++;
@@ -1341,7 +1341,7 @@ static FileSpec GetXcodeContentsPath() {
         const char *command = "/usr/bin/xcode-select -p";
         lldb_private::Status error = Host::RunShellCommand(
             command, // shell command to run
-            NULL,    // current working directory
+            nullptr, // current working directory
             &status, // Put the exit status of the process in here
             &signo,  // Put the signal that caused the process to exit in here
             &output, // Get the output from the command and place it in this
@@ -1742,7 +1742,7 @@ PlatformDarwin::FindBundleBinaryInExecSearchPaths (const ModuleSpec &module_spec
           ModuleSpec new_module_spec(module_spec);
           new_module_spec.GetFileSpec() = path_to_try;
           Status new_error(Platform::GetSharedModule(
-              new_module_spec, process, module_sp, NULL, old_module_sp_ptr,
+              new_module_spec, process, module_sp, nullptr, old_module_sp_ptr,
               did_create_ptr));
 
           if (module_sp) {
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
index 47b7b9f77ebf3..5fbb0f1babd45 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp
@@ -183,7 +183,7 @@ ConstString PlatformMacOSX::GetSDKDirectory(lldb_private::Target &target) {
             const char *command = "xcrun -sdk macosx --show-sdk-path";
             lldb_private::Status error = RunShellCommand(
                 command, // shell command to run
-                NULL,    // current working directory
+                nullptr, // current working directory
                 &status, // Put the exit status of the process in here
                 &signo,  // Put the signal that caused the process to exit in
                          // here
@@ -309,7 +309,7 @@ lldb_private::Status PlatformMacOSX::GetSharedModule(
     if (module_spec.GetArchitecture().GetCore() ==
         ArchSpec::eCore_x86_64_x86_64h) {
       ObjectFile *objfile = module_sp->GetObjectFile();
-      if (objfile == NULL) {
+      if (objfile == nullptr) {
         // We didn't find an x86_64h slice, fall back to a x86_64 slice
         ModuleSpec module_spec_x86_64(module_spec);
         module_spec_x86_64.GetArchitecture() = ArchSpec("x86_64-apple-macosx");
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
index b77a54958b8e8..439ca8fcf0505 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp
@@ -81,7 +81,7 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable(
     if (resolved_module_spec.GetArchitecture().IsValid() ||
         resolved_module_spec.GetUUID().IsValid()) {
       error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
-                                          NULL, NULL, NULL);
+                                          nullptr, nullptr, nullptr);
 
       if (exe_module_sp && exe_module_sp->GetObjectFile())
         return error;
@@ -95,7 +95,7 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable(
              idx, resolved_module_spec.GetArchitecture());
          ++idx) {
       error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
-                                          NULL, NULL, NULL);
+                                          nullptr, nullptr, nullptr);
       // Did we find an executable using one of the
       if (error.Success()) {
         if (exe_module_sp && exe_module_sp->GetObjectFile())
@@ -308,12 +308,12 @@ PlatformRemoteDarwinDevice::GetSDKDirectoryForCurrentOSVersion() {
           return &m_sdk_directory_infos[i];
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const PlatformRemoteDarwinDevice::SDKDirectoryInfo *
 PlatformRemoteDarwinDevice::GetSDKDirectoryForLatestOSVersion() {
-  const PlatformRemoteDarwinDevice::SDKDirectoryInfo *result = NULL;
+  const PlatformRemoteDarwinDevice::SDKDirectoryInfo *result = nullptr;
   if (UpdateSDKDirectoryInfosIfNeeded()) {
     auto max = std::max_element(
         m_sdk_directory_infos.begin(), m_sdk_directory_infos.end(),
@@ -344,7 +344,7 @@ const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectory() {
   assert(m_device_support_directory.empty() == false);
   if (m_device_support_directory[0])
     return m_device_support_directory.c_str();
-  return NULL;
+  return nullptr;
 }
 
 const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectoryForOSVersion() {
@@ -354,7 +354,7 @@ const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectoryForOSVersion()
   if (m_device_support_directory_for_os_version.empty()) {
     const PlatformRemoteDarwinDevice::SDKDirectoryInfo *sdk_dir_info =
         GetSDKDirectoryForCurrentOSVersion();
-    if (sdk_dir_info == NULL)
+    if (sdk_dir_info == nullptr)
       sdk_dir_info = GetSDKDirectoryForLatestOSVersion();
     if (sdk_dir_info) {
       char path[PATH_MAX];
@@ -374,7 +374,7 @@ const char *PlatformRemoteDarwinDevice::GetDeviceSupportDirectoryForOSVersion()
   assert(m_device_support_directory_for_os_version.empty() == false);
   if (m_device_support_directory_for_os_version[0])
     return m_device_support_directory_for_os_version.c_str();
-  return NULL;
+  return nullptr;
 }
 
 uint32_t PlatformRemoteDarwinDevice::FindFileInAllSDKs(const char *platform_file_path,
@@ -523,7 +523,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
       if (GetFileInSDK(platform_file_path, connected_sdk_idx,
                        platform_module_spec.GetFileSpec())) {
         module_sp.reset();
-        error = ResolveExecutable(platform_module_spec, module_sp, NULL);
+        error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
         if (module_sp) {
           m_last_module_sdk_idx = connected_sdk_idx;
           error.Clear();
@@ -540,7 +540,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
       if (GetFileInSDK(platform_file_path, m_last_module_sdk_idx,
                        platform_module_spec.GetFileSpec())) {
         module_sp.reset();
-        error = ResolveExecutable(platform_module_spec, module_sp, NULL);
+        error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
         if (module_sp) {
           error.Clear();
           return error;
@@ -562,7 +562,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
       if (GetFileInSDK(platform_file_path, current_sdk_idx,
                        platform_module_spec.GetFileSpec())) {
         module_sp.reset();
-        error = ResolveExecutable(platform_module_spec, module_sp, NULL);
+        error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
         if (module_sp) {
           m_last_module_sdk_idx = current_sdk_idx;
           error.Clear();
@@ -583,7 +583,7 @@ Status PlatformRemoteDarwinDevice::GetSharedModule(
                        platform_module_spec.GetFileSpec())) {
         // printf ("sdk[%u]: '%s'\n", sdk_idx, local_file.GetPath().c_str());
 
-        error = ResolveExecutable(platform_module_spec, module_sp, NULL);
+        error = ResolveExecutable(platform_module_spec, module_sp, nullptr);
         if (module_sp) {
           // Remember the index of the last SDK that we found a file in in case
           // the wrong SDK was selected.
@@ -648,7 +648,7 @@ uint32_t PlatformRemoteDarwinDevice::GetConnectedSDKIndex() {
 
 uint32_t PlatformRemoteDarwinDevice::GetSDKIndexBySDKDirectoryInfo(
     const SDKDirectoryInfo *sdk_info) {
-  if (sdk_info == NULL) {
+  if (sdk_info == nullptr) {
     return UINT32_MAX;
   }
 
diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
index f741f4a05160f..63245d18fc5c3 100644
--- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
+++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
@@ -206,7 +206,7 @@ PlatformNetBSD::GetResumeCountForLaunchInfo(ProcessLaunchInfo &launch_info) {
 
   // Figure out what shell we're planning on using.
   const char *shell_name = strrchr(shell_string.c_str(), '/');
-  if (shell_name == NULL)
+  if (shell_name == nullptr)
     shell_name = shell_string.c_str();
   else
     shell_name++;
@@ -322,7 +322,7 @@ PlatformNetBSD::DebugProcess(ProcessLaunchInfo &launch_info, Debugger &debugger,
     // Handle the hijacking of process events.
     if (listener_sp) {
       const StateType state = process_sp->WaitForProcessToStop(
-          llvm::None, NULL, false, listener_sp);
+          llvm::None, nullptr, false, listener_sp);
 
       LLDB_LOG(log, "pid {0} state {0}", process_sp->GetID(), state);
     }
diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
index 4b5a985b6d53a..d10557596ff80 100644
--- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
+++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp
@@ -223,8 +223,8 @@ static uint32_t chown_file(Platform *platform, const char *path,
     command.Printf(":%d", gid);
   command.Printf("%s", path);
   int status;
-  platform->RunShellCommand(command.GetData(), NULL, &status, NULL, NULL,
-                            std::chrono::seconds(10));
+  platform->RunShellCommand(command.GetData(), nullptr, &status, nullptr,
+                            nullptr, std::chrono::seconds(10));
   return status;
 }
 
@@ -248,7 +248,7 @@ PlatformPOSIX::PutFile(const lldb_private::FileSpec &source,
     StreamString command;
     command.Printf("cp %s %s", src_path.c_str(), dst_path.c_str());
     int status;
-    RunShellCommand(command.GetData(), NULL, &status, NULL, NULL,
+    RunShellCommand(command.GetData(), nullptr, &status, nullptr, nullptr,
                     std::chrono::seconds(10));
     if (status != 0)
       return Status("unable to perform copy");
@@ -279,8 +279,8 @@ PlatformPOSIX::PutFile(const lldb_private::FileSpec &source,
       if (log)
         log->Printf("[PutFile] Running command: %s\n", command.GetData());
       int retcode;
-      Host::RunShellCommand(command.GetData(), NULL, &retcode, NULL, NULL,
-                            std::chrono::minutes(1));
+      Host::RunShellCommand(command.GetData(), nullptr, &retcode, nullptr,
+                            nullptr, std::chrono::minutes(1));
       if (retcode == 0) {
         // Don't chown a local file for a remote system
         //                if (chown_file(this,dst_path.c_str(),uid,gid) != 0)
@@ -315,7 +315,7 @@ lldb_private::Status PlatformPOSIX::GetFile(
     StreamString cp_command;
     cp_command.Printf("cp %s %s", src_path.c_str(), dst_path.c_str());
     int status;
-    RunShellCommand(cp_command.GetData(), NULL, &status, NULL, NULL,
+    RunShellCommand(cp_command.GetData(), nullptr, &status, nullptr, nullptr,
                     std::chrono::seconds(10));
     if (status != 0)
       return Status("unable to perform copy");
@@ -337,8 +337,8 @@ lldb_private::Status PlatformPOSIX::GetFile(
       if (log)
         log->Printf("[GetFile] Running command: %s\n", command.GetData());
       int retcode;
-      Host::RunShellCommand(command.GetData(), NULL, &retcode, NULL, NULL,
-                            std::chrono::minutes(1));
+      Host::RunShellCommand(command.GetData(), nullptr, &retcode, nullptr,
+                            nullptr, std::chrono::minutes(1));
       if (retcode == 0)
         return Status();
       // If we are here, rsync has failed - let's try the slow way before
@@ -509,11 +509,11 @@ lldb::ProcessSP PlatformPOSIX::Attach(ProcessAttachInfo &attach_info,
   Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM));
 
   if (IsHost()) {
-    if (target == NULL) {
+    if (target == nullptr) {
       TargetSP new_target_sp;
 
       error = debugger.GetTargetList().CreateTarget(
-          debugger, "", "", eLoadDependentsNo, NULL, new_target_sp);
+          debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp);
       target = new_target_sp.get();
       if (log)
         log->Printf("PlatformPOSIX::%s created new target", __FUNCTION__);
@@ -537,7 +537,7 @@ lldb::ProcessSP PlatformPOSIX::Attach(ProcessAttachInfo &attach_info,
 
       process_sp =
           target->CreateProcess(attach_info.GetListenerForProcess(debugger),
-                                attach_info.GetProcessPluginName(), NULL);
+                                attach_info.GetProcessPluginName(), nullptr);
 
       if (process_sp) {
         ListenerSP listener_sp = attach_info.GetHijackListener();
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index dea7ccf04221f..9c52b59e2b06d 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -109,7 +109,8 @@ Status PlatformRemoteGDBServer::ResolveExecutable(
     if (resolved_module_spec.GetArchitecture().IsValid() ||
         resolved_module_spec.GetUUID().IsValid()) {
       error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
-                                          module_search_paths_ptr, NULL, NULL);
+                                          module_search_paths_ptr, nullptr,
+                                          nullptr);
 
       if (exe_module_sp && exe_module_sp->GetObjectFile())
         return error;
@@ -123,7 +124,8 @@ Status PlatformRemoteGDBServer::ResolveExecutable(
              idx, resolved_module_spec.GetArchitecture());
          ++idx) {
       error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
-                                          module_search_paths_ptr, NULL, NULL);
+                                          module_search_paths_ptr, nullptr,
+                                          nullptr);
       // Did we find an executable using one of the
       if (error.Success()) {
         if (exe_module_sp && exe_module_sp->GetObjectFile())
@@ -333,7 +335,7 @@ Status PlatformRemoteGDBServer::DisconnectRemote() {
 const char *PlatformRemoteGDBServer::GetHostname() {
   m_gdb_client.GetHostname(m_name);
   if (m_name.empty())
-    return NULL;
+    return nullptr;
   return m_name.c_str();
 }
 
@@ -469,11 +471,11 @@ lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess(
         error.SetErrorStringWithFormat("unable to launch a GDB server on '%s'",
                                        GetHostname());
       } else {
-        if (target == NULL) {
+        if (target == nullptr) {
           TargetSP new_target_sp;
 
           error = debugger.GetTargetList().CreateTarget(
-              debugger, "", "", eLoadDependentsNo, NULL, new_target_sp);
+              debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp);
           target = new_target_sp.get();
         } else
           error.Clear();
@@ -484,7 +486,7 @@ lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess(
           // The darwin always currently uses the GDB remote debugger plug-in
           // so even when debugging locally we are debugging remotely!
           process_sp = target->CreateProcess(launch_info.GetListener(),
-                                             "gdb-remote", NULL);
+                                             "gdb-remote", nullptr);
 
           if (process_sp) {
             error = process_sp->ConnectRemote(nullptr, connect_url.c_str());
@@ -555,11 +557,11 @@ lldb::ProcessSP PlatformRemoteGDBServer::Attach(
         error.SetErrorStringWithFormat("unable to launch a GDB server on '%s'",
                                        GetHostname());
       } else {
-        if (target == NULL) {
+        if (target == nullptr) {
           TargetSP new_target_sp;
 
           error = debugger.GetTargetList().CreateTarget(
-              debugger, "", "", eLoadDependentsNo, NULL, new_target_sp);
+              debugger, "", "", eLoadDependentsNo, nullptr, new_target_sp);
           target = new_target_sp.get();
         } else
           error.Clear();
@@ -569,8 +571,9 @@ lldb::ProcessSP PlatformRemoteGDBServer::Attach(
 
           // The darwin always currently uses the GDB remote debugger plug-in
           // so even when debugging locally we are debugging remotely!
-          process_sp = target->CreateProcess(
-              attach_info.GetListenerForProcess(debugger), "gdb-remote", NULL);
+          process_sp =
+              target->CreateProcess(attach_info.GetListenerForProcess(debugger),
+                                    "gdb-remote", nullptr);
           if (process_sp) {
             error = process_sp->ConnectRemote(nullptr, connect_url.c_str());
             if (error.Success()) {
diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp
index c35cb54bb69ca..029bdf48b12ae 100644
--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.cpp
@@ -29,13 +29,13 @@ static inline int get_cpuid_count(unsigned int __leaf,
                                   unsigned int *__eax, unsigned int *__ebx,
                                   unsigned int *__ecx, unsigned int *__edx)
 {
-    unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, 0);
+  unsigned int __max_leaf = __get_cpuid_max(__leaf & 0x80000000, nullptr);
 
-    if (__max_leaf == 0 || __max_leaf < __leaf)
-        return 0;
+  if (__max_leaf == 0 || __max_leaf < __leaf)
+    return 0;
 
-    __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
-    return 1;
+  __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
+  return 1;
 }
 
 using namespace lldb_private;
diff --git a/lldb/source/Plugins/Process/Linux/ProcessorTrace.cpp b/lldb/source/Plugins/Process/Linux/ProcessorTrace.cpp
index 749a765731b6b..a7e8c00f0fe94 100644
--- a/lldb/source/Plugins/Process/Linux/ProcessorTrace.cpp
+++ b/lldb/source/Plugins/Process/Linux/ProcessorTrace.cpp
@@ -117,7 +117,7 @@ Status ProcessorTraceMonitor::StartTrace(lldb::pid_t pid, lldb::tid_t tid,
 
   errno = 0;
   auto base =
-      mmap(NULL, (metabufsize + page_size), PROT_WRITE, MAP_SHARED, fd, 0);
+      mmap(nullptr, (metabufsize + page_size), PROT_WRITE, MAP_SHARED, fd, 0);
 
   if (base == MAP_FAILED) {
     LLDB_LOG(log, "mmap base error {0}", errno);
@@ -133,7 +133,7 @@ Status ProcessorTraceMonitor::StartTrace(lldb::pid_t pid, lldb::tid_t tid,
   m_mmap_meta->aux_size = bufsize;
 
   errno = 0;
-  auto mmap_aux = mmap(NULL, bufsize, PROT_READ, MAP_SHARED, fd,
+  auto mmap_aux = mmap(nullptr, bufsize, PROT_READ, MAP_SHARED, fd,
                        static_cast<long int>(m_mmap_meta->aux_offset));
 
   if (mmap_aux == MAP_FAILED) {
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp b/lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp
index 3dde2f4394319..aa8449131a689 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp
+++ b/lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp
@@ -20,7 +20,7 @@ const char *ProcessMessage::PrintKind(Kind kind) {
   chcar str[8];
   sprintf(str, "%d", reason);
 #else
-  const char *str = NULL;
+  const char *str = nullptr;
 
   switch (kind) {
   case eInvalidMessage:
diff --git a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
index f59d0374f634e..1afe4d920599d 100644
--- a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
+++ b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
@@ -67,7 +67,7 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
     for (uint32_t i = 0; i < num_sets; ++i) {
       ConstString set_name;
       if (sets->GetItemAtIndexAsString(i, set_name) && !set_name.IsEmpty()) {
-        m_sets.push_back({ set_name.AsCString(), NULL, 0, NULL });
+        m_sets.push_back({set_name.AsCString(), nullptr, 0, nullptr});
       } else {
         Clear();
         printf("error: register sets must have valid names\n");
@@ -302,7 +302,7 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
     llvm::StringRef format_str;
     if (reg_info_dict->GetValueForKeyAsString("format", format_str, nullptr)) {
       if (OptionArgParser::ToFormat(format_str.str().c_str(), reg_info.format,
-                                    NULL)
+                                    nullptr)
               .Fail()) {
         Clear();
         printf("error: invalid 'format' value in register dictionary\n");
@@ -414,7 +414,7 @@ void DynamicRegisterInfo::AddRegister(RegisterInfo &reg_info,
   const uint32_t reg_num = m_regs.size();
   reg_info.name = reg_name.AsCString();
   assert(reg_info.name);
-  reg_info.alt_name = reg_alt_name.AsCString(NULL);
+  reg_info.alt_name = reg_alt_name.AsCString(nullptr);
   uint32_t i;
   if (reg_info.value_regs) {
     for (i = 0; reg_info.value_regs[i] != LLDB_INVALID_REGNUM; ++i)
@@ -480,7 +480,7 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) {
     if (m_value_regs_map.find(i) != m_value_regs_map.end())
       m_regs[i].value_regs = m_value_regs_map[i].data();
     else
-      m_regs[i].value_regs = NULL;
+      m_regs[i].value_regs = nullptr;
   }
 
   // Expand all invalidation dependencies
@@ -529,7 +529,7 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) {
     if (m_invalidate_regs_map.find(i) != m_invalidate_regs_map.end())
       m_regs[i].invalidate_regs = m_invalidate_regs_map[i].data();
     else
-      m_regs[i].invalidate_regs = NULL;
+      m_regs[i].invalidate_regs = nullptr;
   }
 
   // Check if we need to automatically set the generic registers in case they
@@ -639,19 +639,19 @@ const RegisterInfo *
 DynamicRegisterInfo::GetRegisterInfoAtIndex(uint32_t i) const {
   if (i < m_regs.size())
     return &m_regs[i];
-  return NULL;
+  return nullptr;
 }
 
 RegisterInfo *DynamicRegisterInfo::GetRegisterInfoAtIndex(uint32_t i) {
   if (i < m_regs.size())
     return &m_regs[i];
-  return NULL;
+  return nullptr;
 }
 
 const RegisterSet *DynamicRegisterInfo::GetRegisterSet(uint32_t i) const {
   if (i < m_sets.size())
     return &m_sets[i];
-  return NULL;
+  return nullptr;
 }
 
 uint32_t DynamicRegisterInfo::GetRegisterSetIndexByName(ConstString &set_name,
@@ -664,7 +664,7 @@ uint32_t DynamicRegisterInfo::GetRegisterSetIndexByName(ConstString &set_name,
 
   m_set_names.push_back(set_name);
   m_set_reg_nums.resize(m_set_reg_nums.size() + 1);
-  RegisterSet new_set = {set_name.AsCString(), NULL, 0, NULL};
+  RegisterSet new_set = {set_name.AsCString(), nullptr, 0, nullptr};
   m_sets.push_back(new_set);
   return m_sets.size() - 1;
 }
@@ -754,5 +754,5 @@ const lldb_private::RegisterInfo *DynamicRegisterInfo::GetRegisterInfo(
       return &reg_info;
     }
   }
-  return NULL;
+  return nullptr;
 }
diff --git a/lldb/source/Plugins/Process/Utility/HistoryThread.cpp b/lldb/source/Plugins/Process/Utility/HistoryThread.cpp
index 23f6077b32c15..f0e39eed0924f 100644
--- a/lldb/source/Plugins/Process/Utility/HistoryThread.cpp
+++ b/lldb/source/Plugins/Process/Utility/HistoryThread.cpp
@@ -66,7 +66,7 @@ lldb::StackFrameListSP HistoryThread::GetStackFrameList() {
   // FIXME do not throw away the lock after we acquire it..
   std::unique_lock<std::mutex> lock(m_framelist_mutex);
   lock.unlock();
-  if (m_framelist.get() == NULL) {
+  if (m_framelist.get() == nullptr) {
     m_framelist =
         std::make_shared<StackFrameList>(*this, StackFrameListSP(), true);
   }
diff --git a/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp b/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
index 3bf5d9b52a776..9beaf2fc7ac87 100644
--- a/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
+++ b/lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp
@@ -38,7 +38,7 @@ bool lldb_private::InferiorCallMmap(Process *process, addr_t &allocated_addr,
                                     unsigned flags, addr_t fd, addr_t offset) {
   Thread *thread =
       process->GetThreadList().GetExpressionExecutionThread().get();
-  if (thread == NULL)
+  if (thread == nullptr)
     return false;
 
   const bool append = true;
@@ -126,7 +126,7 @@ bool lldb_private::InferiorCallMunmap(Process *process, addr_t addr,
                                       addr_t length) {
   Thread *thread =
       process->GetThreadList().GetExpressionExecutionThread().get();
-  if (thread == NULL)
+  if (thread == nullptr)
     return false;
 
   const bool append = true;
@@ -188,7 +188,7 @@ bool lldb_private::InferiorCall(Process *process, const Address *address,
                                 addr_t &returned_func, bool trap_exceptions) {
   Thread *thread =
       process->GetThreadList().GetExpressionExecutionThread().get();
-  if (thread == NULL || address == NULL)
+  if (thread == nullptr || address == nullptr)
     return false;
 
   EvaluateExpressionOptions options;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
index ea950cee75a8f..e804a4d251f73 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
@@ -198,7 +198,7 @@ static RegisterInfo g_register_infos[] = {
     //  ===============         ===============     =========================
     //  =====================   =============
     {"r0",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(0),
      eEncodingUint,
@@ -209,7 +209,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r1",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(1),
      eEncodingUint,
@@ -220,7 +220,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r2",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(2),
      eEncodingUint,
@@ -231,7 +231,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r3",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(3),
      eEncodingUint,
@@ -242,7 +242,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r4",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(4),
      eEncodingUint,
@@ -253,7 +253,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r5",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(5),
      eEncodingUint,
@@ -264,7 +264,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r6",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(6),
      eEncodingUint,
@@ -275,7 +275,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r7",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(7),
      eEncodingUint,
@@ -287,7 +287,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r8",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(8),
      eEncodingUint,
@@ -298,7 +298,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r9",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(9),
      eEncodingUint,
@@ -309,7 +309,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r10",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(10),
      eEncodingUint,
@@ -321,7 +321,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r11",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(11),
      eEncodingUint,
@@ -333,7 +333,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"r12",
-     NULL,
+     nullptr,
      4,
      GPR_OFFSET(12),
      eEncodingUint,
@@ -394,7 +394,7 @@ static RegisterInfo g_register_infos[] = {
      0},
 
     {"s0",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(0),
      eEncodingIEEE754,
@@ -406,7 +406,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s1",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(1),
      eEncodingIEEE754,
@@ -418,7 +418,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s2",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(2),
      eEncodingIEEE754,
@@ -430,7 +430,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s3",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(3),
      eEncodingIEEE754,
@@ -442,7 +442,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s4",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(4),
      eEncodingIEEE754,
@@ -454,7 +454,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s5",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(5),
      eEncodingIEEE754,
@@ -466,7 +466,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s6",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(6),
      eEncodingIEEE754,
@@ -478,7 +478,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s7",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(7),
      eEncodingIEEE754,
@@ -490,7 +490,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s8",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(8),
      eEncodingIEEE754,
@@ -502,7 +502,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s9",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(9),
      eEncodingIEEE754,
@@ -514,7 +514,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s10",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(10),
      eEncodingIEEE754,
@@ -526,7 +526,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s11",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(11),
      eEncodingIEEE754,
@@ -538,7 +538,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s12",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(12),
      eEncodingIEEE754,
@@ -550,7 +550,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s13",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(13),
      eEncodingIEEE754,
@@ -562,7 +562,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s14",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(14),
      eEncodingIEEE754,
@@ -574,7 +574,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s15",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(15),
      eEncodingIEEE754,
@@ -586,7 +586,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s16",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(16),
      eEncodingIEEE754,
@@ -598,7 +598,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s17",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(17),
      eEncodingIEEE754,
@@ -610,7 +610,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s18",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(18),
      eEncodingIEEE754,
@@ -622,7 +622,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s19",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(19),
      eEncodingIEEE754,
@@ -634,7 +634,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s20",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(20),
      eEncodingIEEE754,
@@ -646,7 +646,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s21",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(21),
      eEncodingIEEE754,
@@ -658,7 +658,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s22",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(22),
      eEncodingIEEE754,
@@ -670,7 +670,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s23",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(23),
      eEncodingIEEE754,
@@ -682,7 +682,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s24",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(24),
      eEncodingIEEE754,
@@ -694,7 +694,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s25",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(25),
      eEncodingIEEE754,
@@ -706,7 +706,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s26",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(26),
      eEncodingIEEE754,
@@ -718,7 +718,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s27",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(27),
      eEncodingIEEE754,
@@ -730,7 +730,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s28",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(28),
      eEncodingIEEE754,
@@ -742,7 +742,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s29",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(29),
      eEncodingIEEE754,
@@ -754,7 +754,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s30",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(30),
      eEncodingIEEE754,
@@ -766,7 +766,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"s31",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(31),
      eEncodingIEEE754,
@@ -778,7 +778,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"fpscr",
-     NULL,
+     nullptr,
      4,
      FPU_OFFSET(32),
      eEncodingUint,
@@ -791,7 +791,7 @@ static RegisterInfo g_register_infos[] = {
      0},
 
     {"exception",
-     NULL,
+     nullptr,
      4,
      EXC_OFFSET(0),
      eEncodingUint,
@@ -803,7 +803,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"fsr",
-     NULL,
+     nullptr,
      4,
      EXC_OFFSET(1),
      eEncodingUint,
@@ -815,7 +815,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      0},
     {"far",
-     NULL,
+     nullptr,
      4,
      EXC_OFFSET(2),
      eEncodingUint,
@@ -944,7 +944,7 @@ RegisterContextDarwin_arm::GetRegisterInfoAtIndex(size_t reg) {
   assert(k_num_register_infos == k_num_registers);
   if (reg < k_num_registers)
     return &g_register_infos[reg];
-  return NULL;
+  return nullptr;
 }
 
 size_t RegisterContextDarwin_arm::GetRegisterInfosCount() {
@@ -979,7 +979,7 @@ size_t RegisterContextDarwin_arm::GetRegisterSetCount() {
 const RegisterSet *RegisterContextDarwin_arm::GetRegisterSet(size_t reg_set) {
   if (reg_set < k_num_regsets)
     return &g_reg_sets[reg_set];
-  return NULL;
+  return nullptr;
 }
 
 // Register information definitions for 32 bit i386.
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
index c264831c0ddb9..e0d67d12d4971 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
@@ -122,7 +122,7 @@ RegisterContextDarwin_arm64::GetRegisterInfoAtIndex(size_t reg) {
   assert(k_num_register_infos == k_num_registers);
   if (reg < k_num_registers)
     return &g_register_infos_arm64_le[reg];
-  return NULL;
+  return nullptr;
 }
 
 size_t RegisterContextDarwin_arm64::GetRegisterInfosCount() {
@@ -157,7 +157,7 @@ size_t RegisterContextDarwin_arm64::GetRegisterSetCount() {
 const RegisterSet *RegisterContextDarwin_arm64::GetRegisterSet(size_t reg_set) {
   if (reg_set < k_num_regsets)
     return &g_reg_sets[reg_set];
-  return NULL;
+  return nullptr;
 }
 
 // Register information definitions for arm64
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
index 130a2673fde0a..69b56242c86ce 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
@@ -176,42 +176,42 @@ static RegisterInfo g_register_infos[] = {
     //  =============================== =======================
     //  ===================   =========================  ==================
     //  =================
-    {DEFINE_GPR(eax, NULL),
+    {DEFINE_GPR(eax, nullptr),
      {ehframe_eax, dwarf_eax, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_eax},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(ebx, NULL),
+    {DEFINE_GPR(ebx, nullptr),
      {ehframe_ebx, dwarf_ebx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_ebx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(ecx, NULL),
+    {DEFINE_GPR(ecx, nullptr),
      {ehframe_ecx, dwarf_ecx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_ecx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(edx, NULL),
+    {DEFINE_GPR(edx, nullptr),
      {ehframe_edx, dwarf_edx, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_edx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(edi, NULL),
+    {DEFINE_GPR(edi, nullptr),
      {ehframe_edi, dwarf_edi, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_edi},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(esi, NULL),
+    {DEFINE_GPR(esi, nullptr),
      {ehframe_esi, dwarf_esi, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       gpr_esi},
      nullptr,
@@ -232,7 +232,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(ss, NULL),
+    {DEFINE_GPR(ss, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_ss},
      nullptr,
@@ -253,35 +253,35 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(cs, NULL),
+    {DEFINE_GPR(cs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_cs},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(ds, NULL),
+    {DEFINE_GPR(ds, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_ds},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(es, NULL),
+    {DEFINE_GPR(es, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_es},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(fs, NULL),
+    {DEFINE_GPR(fs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_fs},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(gs, NULL),
+    {DEFINE_GPR(gs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_gs},
      nullptr,
@@ -427,7 +427,7 @@ RegisterContextDarwin_i386::GetRegisterInfoAtIndex(size_t reg) {
   assert(k_num_register_infos == k_num_registers);
   if (reg < k_num_registers)
     return &g_register_infos[reg];
-  return NULL;
+  return nullptr;
 }
 
 size_t RegisterContextDarwin_i386::GetRegisterInfosCount() {
@@ -479,7 +479,7 @@ size_t RegisterContextDarwin_i386::GetRegisterSetCount() {
 const RegisterSet *RegisterContextDarwin_i386::GetRegisterSet(size_t reg_set) {
   if (reg_set < k_num_regsets)
     return &g_reg_sets[reg_set];
-  return NULL;
+  return nullptr;
 }
 
 // Register information definitions for 32 bit i386.
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
index 4a37454ba8c5f..e908c6dffafcf 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
@@ -195,42 +195,42 @@ static RegisterInfo g_register_infos[] = {
     //  =============================== ======================
     //  ===================      ========================== ====================
     //  ===================
-    {DEFINE_GPR(rax, NULL),
+    {DEFINE_GPR(rax, nullptr),
      {ehframe_dwarf_gpr_rax, ehframe_dwarf_gpr_rax, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rax},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(rbx, NULL),
+    {DEFINE_GPR(rbx, nullptr),
      {ehframe_dwarf_gpr_rbx, ehframe_dwarf_gpr_rbx, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rbx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(rcx, NULL),
+    {DEFINE_GPR(rcx, nullptr),
      {ehframe_dwarf_gpr_rcx, ehframe_dwarf_gpr_rcx, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rcx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(rdx, NULL),
+    {DEFINE_GPR(rdx, nullptr),
      {ehframe_dwarf_gpr_rdx, ehframe_dwarf_gpr_rdx, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rdx},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(rdi, NULL),
+    {DEFINE_GPR(rdi, nullptr),
      {ehframe_dwarf_gpr_rdi, ehframe_dwarf_gpr_rdi, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rdi},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(rsi, NULL),
+    {DEFINE_GPR(rsi, nullptr),
      {ehframe_dwarf_gpr_rsi, ehframe_dwarf_gpr_rsi, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_rsi},
      nullptr,
@@ -251,56 +251,56 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r8, NULL),
+    {DEFINE_GPR(r8, nullptr),
      {ehframe_dwarf_gpr_r8, ehframe_dwarf_gpr_r8, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r8},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r9, NULL),
+    {DEFINE_GPR(r9, nullptr),
      {ehframe_dwarf_gpr_r9, ehframe_dwarf_gpr_r9, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r9},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r10, NULL),
+    {DEFINE_GPR(r10, nullptr),
      {ehframe_dwarf_gpr_r10, ehframe_dwarf_gpr_r10, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r10},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r11, NULL),
+    {DEFINE_GPR(r11, nullptr),
      {ehframe_dwarf_gpr_r11, ehframe_dwarf_gpr_r11, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r11},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r12, NULL),
+    {DEFINE_GPR(r12, nullptr),
      {ehframe_dwarf_gpr_r12, ehframe_dwarf_gpr_r12, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r12},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r13, NULL),
+    {DEFINE_GPR(r13, nullptr),
      {ehframe_dwarf_gpr_r13, ehframe_dwarf_gpr_r13, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r13},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r14, NULL),
+    {DEFINE_GPR(r14, nullptr),
      {ehframe_dwarf_gpr_r14, ehframe_dwarf_gpr_r14, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r14},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(r15, NULL),
+    {DEFINE_GPR(r15, nullptr),
      {ehframe_dwarf_gpr_r15, ehframe_dwarf_gpr_r15, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_r15},
      nullptr,
@@ -321,21 +321,21 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(cs, NULL),
+    {DEFINE_GPR(cs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_cs},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(fs, NULL),
+    {DEFINE_GPR(fs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_fs},
      nullptr,
      nullptr,
      nullptr,
      0},
-    {DEFINE_GPR(gs, NULL),
+    {DEFINE_GPR(gs, nullptr),
      {LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM, LLDB_INVALID_REGNUM,
       LLDB_INVALID_REGNUM, gpr_gs},
      nullptr,
@@ -489,7 +489,7 @@ RegisterContextDarwin_x86_64::GetRegisterInfoAtIndex(size_t reg) {
   assert(k_num_register_infos == k_num_registers);
   if (reg < k_num_registers)
     return &g_register_infos[reg];
-  return NULL;
+  return nullptr;
 }
 
 size_t RegisterContextDarwin_x86_64::GetRegisterInfosCount() {
@@ -541,7 +541,7 @@ const RegisterSet *
 RegisterContextDarwin_x86_64::GetRegisterSet(size_t reg_set) {
   if (reg_set < k_num_regsets)
     return &g_reg_sets[reg_set];
-  return NULL;
+  return nullptr;
 }
 
 int RegisterContextDarwin_x86_64::GetSetForNativeRegNum(int reg_num) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDummy.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDummy.cpp
index 5ea13a54fd2ba..6832b6095931b 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDummy.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDummy.cpp
@@ -49,8 +49,8 @@ RegisterContextDummy::RegisterContextDummy(Thread &thread,
   m_pc_reg_info.byte_size = address_byte_size;
   m_pc_reg_info.encoding = eEncodingUint;
   m_pc_reg_info.format = eFormatPointer;
-  m_pc_reg_info.invalidate_regs = NULL;
-  m_pc_reg_info.value_regs = NULL;
+  m_pc_reg_info.invalidate_regs = nullptr;
+  m_pc_reg_info.value_regs = nullptr;
   m_pc_reg_info.kinds[eRegisterKindEHFrame] = LLDB_INVALID_REGNUM;
   m_pc_reg_info.kinds[eRegisterKindDWARF] = LLDB_INVALID_REGNUM;
   m_pc_reg_info.kinds[eRegisterKindGeneric] = LLDB_REGNUM_GENERIC_PC;
@@ -71,7 +71,7 @@ size_t RegisterContextDummy::GetRegisterCount() { return 1; }
 const lldb_private::RegisterInfo *
 RegisterContextDummy::GetRegisterInfoAtIndex(size_t reg) {
   if (reg)
-    return NULL;
+    return nullptr;
   return &m_pc_reg_info;
 }
 
@@ -80,7 +80,7 @@ size_t RegisterContextDummy::GetRegisterSetCount() { return 1; }
 const lldb_private::RegisterSet *
 RegisterContextDummy::GetRegisterSet(size_t reg_set) {
   if (reg_set)
-    return NULL;
+    return nullptr;
   return &m_reg_set0;
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
index 1c3f33d287c78..b90b381082672 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
@@ -70,7 +70,7 @@ const RegisterInfo *RegisterContextFreeBSD_i386::GetRegisterInfo() const {
     return g_register_infos_i386;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
index a0c314653af2e..4f869eb3b1771 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
@@ -188,7 +188,7 @@ size_t RegisterContextFreeBSD_powerpc::GetGPRSize() const {
 const RegisterInfo *RegisterContextFreeBSD_powerpc::GetRegisterInfo() const {
   // assert (m_target_arch.GetCore() == ArchSpec::eCore_powerpc);
   llvm_unreachable("Abstract class!");
-  return NULL;
+  return nullptr;
 }
 
 uint32_t RegisterContextFreeBSD_powerpc::GetRegisterCount() const { return 0; }
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextHistory.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextHistory.cpp
index 8c2268aeaacba..c19a2bfae668a 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextHistory.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextHistory.cpp
@@ -50,8 +50,8 @@ RegisterContextHistory::RegisterContextHistory(Thread &thread,
   m_pc_reg_info.byte_size = address_byte_size;
   m_pc_reg_info.encoding = eEncodingUint;
   m_pc_reg_info.format = eFormatPointer;
-  m_pc_reg_info.invalidate_regs = NULL;
-  m_pc_reg_info.value_regs = NULL;
+  m_pc_reg_info.invalidate_regs = nullptr;
+  m_pc_reg_info.value_regs = nullptr;
   m_pc_reg_info.kinds[eRegisterKindEHFrame] = LLDB_INVALID_REGNUM;
   m_pc_reg_info.kinds[eRegisterKindDWARF] = LLDB_INVALID_REGNUM;
   m_pc_reg_info.kinds[eRegisterKindGeneric] = LLDB_REGNUM_GENERIC_PC;
@@ -72,7 +72,7 @@ size_t RegisterContextHistory::GetRegisterCount() { return 1; }
 const lldb_private::RegisterInfo *
 RegisterContextHistory::GetRegisterInfoAtIndex(size_t reg) {
   if (reg)
-    return NULL;
+    return nullptr;
   return &m_pc_reg_info;
 }
 
@@ -81,7 +81,7 @@ size_t RegisterContextHistory::GetRegisterSetCount() { return 1; }
 const lldb_private::RegisterSet *
 RegisterContextHistory::GetRegisterSet(size_t reg_set) {
   if (reg_set)
-    return NULL;
+    return nullptr;
   return &m_reg_set0;
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
index ecda5556e606e..4ec8bba098148 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
@@ -113,7 +113,7 @@ void RegisterContextLLDB::InitializeZerothFrame() {
   ExecutionContext exe_ctx(m_thread.shared_from_this());
   RegisterContextSP reg_ctx_sp = m_thread.GetRegisterContext();
 
-  if (reg_ctx_sp.get() == NULL) {
+  if (reg_ctx_sp.get() == nullptr) {
     m_frame_type = eNotAValidFrame;
     UnwindLogMsg("frame does not have a register context");
     return;
@@ -664,7 +664,7 @@ UnwindPlanSP RegisterContextLLDB::GetFastUnwindPlanForFrame() {
   ModuleSP pc_module_sp(m_current_pc.GetModule());
 
   if (!m_current_pc.IsValid() || !pc_module_sp ||
-      pc_module_sp->GetObjectFile() == NULL)
+      pc_module_sp->GetObjectFile() == nullptr)
     return unwind_plan_sp;
 
   if (IsFrameZero())
@@ -715,7 +715,7 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
   UnwindPlanSP arch_default_unwind_plan_sp;
   ExecutionContext exe_ctx(m_thread.shared_from_this());
   Process *process = exe_ctx.GetProcessPtr();
-  ABI *abi = process ? process->GetABI().get() : NULL;
+  ABI *abi = process ? process->GetABI().get() : nullptr;
   if (abi) {
     arch_default_unwind_plan_sp =
         std::make_shared<UnwindPlan>(lldb::eRegisterKindGeneric);
@@ -743,7 +743,7 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
   // This is for jumping to memory regions without any information available.
 
   if ((!m_sym_ctx_valid ||
-       (m_sym_ctx.function == NULL && m_sym_ctx.symbol == NULL)) &&
+       (m_sym_ctx.function == nullptr && m_sym_ctx.symbol == nullptr)) &&
       behaves_like_zeroth_frame && m_current_pc.IsValid()) {
     uint32_t permissions;
     addr_t current_pc_addr =
@@ -765,7 +765,7 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
   // No Module for the current pc, try using the architecture default unwind.
   ModuleSP pc_module_sp(m_current_pc.GetModule());
   if (!m_current_pc.IsValid() || !pc_module_sp ||
-      pc_module_sp->GetObjectFile() == NULL) {
+      pc_module_sp->GetObjectFile() == nullptr) {
     m_frame_type = eNormalFrame;
     return arch_default_unwind_plan_sp;
   }
@@ -1349,7 +1349,7 @@ RegisterContextLLDB::SavedLocationForRegister(
     // register, we may be able to fall back to some ABI-defined default.  For
     // example, some ABIs allow to determine the caller's SP via the CFA. Also,
     // the ABI may set volatile registers to the undefined state.
-    ABI *abi = process ? process->GetABI().get() : NULL;
+    ABI *abi = process ? process->GetABI().get() : nullptr;
     if (abi) {
       const RegisterInfo *reg_info =
           GetRegisterInfoAtIndex(regnum.GetAsKind(eRegisterKindLLDB));
@@ -1702,10 +1702,10 @@ bool RegisterContextLLDB::TryFallbackUnwindPlan() {
 }
 
 bool RegisterContextLLDB::ForceSwitchToFallbackUnwindPlan() {
-  if (m_fallback_unwind_plan_sp.get() == NULL)
+  if (m_fallback_unwind_plan_sp.get() == nullptr)
     return false;
 
-  if (m_full_unwind_plan_sp.get() == NULL)
+  if (m_full_unwind_plan_sp.get() == nullptr)
     return false;
 
   if (m_full_unwind_plan_sp.get() == m_fallback_unwind_plan_sp.get() ||
@@ -2067,7 +2067,7 @@ void RegisterContextLLDB::UnwindLogMsg(const char *fmt, ...) {
     va_start(args, fmt);
 
     char *logmsg;
-    if (vasprintf(&logmsg, fmt, args) == -1 || logmsg == NULL) {
+    if (vasprintf(&logmsg, fmt, args) == -1 || logmsg == nullptr) {
       if (logmsg)
         free(logmsg);
       va_end(args);
@@ -2088,7 +2088,7 @@ void RegisterContextLLDB::UnwindLogMsgVerbose(const char *fmt, ...) {
     va_start(args, fmt);
 
     char *logmsg;
-    if (vasprintf(&logmsg, fmt, args) == -1 || logmsg == NULL) {
+    if (vasprintf(&logmsg, fmt, args) == -1 || logmsg == nullptr) {
       if (logmsg)
         free(logmsg);
       va_end(args);
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
index 1210a7e385757..79979639dc7ee 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp
@@ -90,8 +90,8 @@ RegisterContextLinux_i386::RegisterContextLinux_i386(
     const ArchSpec &target_arch)
     : RegisterInfoInterface(target_arch) {
   RegisterInfo orig_ax = {"orig_eax",
-                          NULL,
-                          sizeof(((GPR *)NULL)->orig_eax),
+                          nullptr,
+                          sizeof(((GPR *)nullptr)->orig_eax),
                           (LLVM_EXTENSION offsetof(GPR, orig_eax)),
                           eEncodingUint,
                           eFormatHex,
@@ -114,7 +114,7 @@ const RegisterInfo *RegisterContextLinux_i386::GetRegisterInfo() const {
     return g_register_infos_i386;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_mips.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_mips.cpp
index 4797afbc0eeb4..fc60fea791761 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_mips.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_mips.cpp
@@ -116,7 +116,7 @@ const RegisterInfo *RegisterContextLinux_mips::GetRegisterInfo() const {
     return g_register_infos_mips;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
index 6f78f84e93f89..640d5bc022569 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp
@@ -157,8 +157,8 @@ RegisterContextLinux_x86_64::RegisterContextLinux_x86_64(
       m_register_info_count(GetRegisterInfoCount(target_arch)),
       m_user_register_count(GetUserRegisterInfoCount(target_arch)) {
   RegisterInfo orig_ax = {"orig_rax",
-                          NULL,
-                          sizeof(((GPR *)NULL)->orig_rax),
+                          nullptr,
+                          sizeof(((GPR *)nullptr)->orig_rax),
                           (LLVM_EXTENSION offsetof(GPR, orig_rax)),
                           eEncodingUint,
                           eFormatHex,
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.cpp
index c1f141cf2d6ce..06eac6f7f9910 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.cpp
@@ -67,7 +67,7 @@ const RegisterInfo *RegisterContextOpenBSD_i386::GetRegisterInfo() const {
     return g_register_infos_i386;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp
index 7366130dd9b1b..821e2aa73b5b1 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp
@@ -146,7 +146,7 @@ RegisterContextPOSIX_arm::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < m_reg_info.num_registers)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_arm::GetRegisterSetCount() {
@@ -167,10 +167,10 @@ RegisterContextPOSIX_arm::GetRegisterSet(size_t set) {
       return &g_reg_sets_arm[set];
     default:
       assert(false && "Unhandled target architecture.");
-      return NULL;
+      return nullptr;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *RegisterContextPOSIX_arm::GetRegisterName(unsigned reg) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
index 5fb9f1f85a10d..99b897d441b59 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
@@ -166,7 +166,7 @@ RegisterContextPOSIX_arm64::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < m_reg_info.num_registers)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_arm64::GetRegisterSetCount() {
@@ -187,10 +187,10 @@ RegisterContextPOSIX_arm64::GetRegisterSet(size_t set) {
       return &g_reg_sets_arm64[set];
     default:
       assert(false && "Unhandled target architecture.");
-      return NULL;
+      return nullptr;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *RegisterContextPOSIX_arm64::GetRegisterName(unsigned reg) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_mips64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_mips64.cpp
index cedbcd42373e7..f1fa3035b2ef7 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_mips64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_mips64.cpp
@@ -96,7 +96,7 @@ RegisterContextPOSIX_mips64::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < m_num_registers)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_mips64::GetRegisterSetCount() {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.cpp
index 9698b2d84d64f..a78e9ed37947d 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.cpp
@@ -133,7 +133,7 @@ RegisterContextPOSIX_powerpc::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < k_num_registers_powerpc)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_powerpc::GetRegisterSetCount() {
@@ -150,7 +150,7 @@ const RegisterSet *RegisterContextPOSIX_powerpc::GetRegisterSet(size_t set) {
   if (IsRegisterSetAvailable(set))
     return &g_reg_sets_powerpc[set];
   else
-    return NULL;
+    return nullptr;
 }
 
 const char *RegisterContextPOSIX_powerpc::GetRegisterName(unsigned reg) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_ppc64le.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_ppc64le.cpp
index 2b380797f1928..02546c0ed16fa 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_ppc64le.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_ppc64le.cpp
@@ -151,7 +151,7 @@ RegisterContextPOSIX_ppc64le::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < k_num_registers_ppc64le)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_ppc64le::GetRegisterSetCount() {
@@ -168,7 +168,7 @@ const RegisterSet *RegisterContextPOSIX_ppc64le::GetRegisterSet(size_t set) {
   if (IsRegisterSetAvailable(set))
     return &g_reg_sets_ppc64le[set];
   else
-    return NULL;
+    return nullptr;
 }
 
 const char *RegisterContextPOSIX_ppc64le::GetRegisterName(unsigned reg) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.cpp
index 2301c82290712..e040e5075721e 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.cpp
@@ -113,7 +113,7 @@ RegisterContextPOSIX_s390x::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < m_reg_info.num_registers)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_s390x::GetRegisterCount() {
@@ -156,10 +156,10 @@ const RegisterSet *RegisterContextPOSIX_s390x::GetRegisterSet(size_t set) {
       return &g_reg_sets_s390x[set];
     default:
       assert(false && "Unhandled target architecture.");
-      return NULL;
+      return nullptr;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 lldb::ByteOrder RegisterContextPOSIX_s390x::GetByteOrder() {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp
index ee447642426c3..856ce9448dbbd 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.cpp
@@ -423,7 +423,7 @@ RegisterContextPOSIX_x86::GetRegisterInfoAtIndex(size_t reg) {
   if (reg < m_reg_info.num_registers)
     return &GetRegisterInfo()[reg];
   else
-    return NULL;
+    return nullptr;
 }
 
 size_t RegisterContextPOSIX_x86::GetRegisterSetCount() {
@@ -445,10 +445,10 @@ const RegisterSet *RegisterContextPOSIX_x86::GetRegisterSet(size_t set) {
       return &g_reg_sets_x86_64[set];
     default:
       assert(false && "Unhandled target architecture.");
-      return NULL;
+      return nullptr;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 const char *RegisterContextPOSIX_x86::GetRegisterName(unsigned reg) {
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp
index c21c02f24979d..bcf60cc7a3384 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp
@@ -73,7 +73,7 @@ RegisterContextThreadMemory::GetRegisterInfoAtIndex(size_t reg) {
   UpdateRegisterContext();
   if (m_reg_ctx_sp)
     return m_reg_ctx_sp->GetRegisterInfoAtIndex(reg);
-  return NULL;
+  return nullptr;
 }
 
 size_t RegisterContextThreadMemory::GetRegisterSetCount() {
@@ -87,7 +87,7 @@ const RegisterSet *RegisterContextThreadMemory::GetRegisterSet(size_t reg_set) {
   UpdateRegisterContext();
   if (m_reg_ctx_sp)
     return m_reg_ctx_sp->GetRegisterSet(reg_set);
-  return NULL;
+  return nullptr;
 }
 
 bool RegisterContextThreadMemory::ReadRegister(const RegisterInfo *reg_info,
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
index 118c9ffe654ea..d392d3be1c41c 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm.cpp
@@ -55,7 +55,7 @@ GetRegisterInfoPtr(const lldb_private::ArchSpec &target_arch) {
     return g_register_infos_arm;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
index 14d1119026464..f7471526d0548 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
@@ -60,7 +60,7 @@ GetRegisterInfoPtr(const lldb_private::ArchSpec &target_arch) {
     return g_register_infos_arm64_le;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_ppc64le.cpp b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_ppc64le.cpp
index 63d35c291e98a..35051a3ce095f 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_ppc64le.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_ppc64le.cpp
@@ -27,7 +27,7 @@ GetRegisterInfoPtr(const lldb_private::ArchSpec &target_arch) {
     return g_register_infos_ppc64le;
   default:
     assert(false && "Unhandled target architecture.");
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
index 64b88e665046b..588015a51ef16 100644
--- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
+++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp
@@ -37,11 +37,11 @@ const char *StopInfoMachException::GetDescription() {
         target ? target->GetArchitecture().GetMachine()
                : llvm::Triple::UnknownArch;
 
-    const char *exc_desc = NULL;
+    const char *exc_desc = nullptr;
     const char *code_label = "code";
-    const char *code_desc = NULL;
+    const char *code_desc = nullptr;
     const char *subcode_label = "subcode";
-    const char *subcode_desc = NULL;
+    const char *subcode_desc = nullptr;
 
 #if defined(__APPLE__)
     char code_desc_buf[32];
@@ -593,7 +593,7 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException(
           // the thread ID so we must always report the breakpoint regardless
           // of the thread.
           if (bp_site_sp->ValidForThisThread(&thread) ||
-              thread.GetProcess()->GetOperatingSystem() != NULL)
+              thread.GetProcess()->GetOperatingSystem() != nullptr)
             return StopInfo::CreateStopReasonWithBreakpointSiteID(
                 thread, bp_site_sp->GetID());
           else if (is_trace_if_actual_breakpoint_missing)
diff --git a/lldb/source/Plugins/Process/Utility/UnwindLLDB.cpp b/lldb/source/Plugins/Process/Utility/UnwindLLDB.cpp
index d11fda293f8cc..38209fb249483 100644
--- a/lldb/source/Plugins/Process/Utility/UnwindLLDB.cpp
+++ b/lldb/source/Plugins/Process/Utility/UnwindLLDB.cpp
@@ -50,7 +50,7 @@ uint32_t UnwindLLDB::DoGetFrameCount() {
       return 0;
 
     ProcessSP process_sp(m_thread.GetProcess());
-    ABI *abi = process_sp ? process_sp->GetABI().get() : NULL;
+    ABI *abi = process_sp ? process_sp->GetABI().get() : nullptr;
 
     while (AddOneMoreFrame(abi)) {
 #if DEBUG_FRAME_SPEED
@@ -73,13 +73,13 @@ bool UnwindLLDB::AddFirstFrame() {
     return true;
 
   ProcessSP process_sp(m_thread.GetProcess());
-  ABI *abi = process_sp ? process_sp->GetABI().get() : NULL;
+  ABI *abi = process_sp ? process_sp->GetABI().get() : nullptr;
 
   // First, set up the 0th (initial) frame
   CursorSP first_cursor_sp(new Cursor());
   RegisterContextLLDBSP reg_ctx_sp(new RegisterContextLLDB(
       m_thread, RegisterContextLLDBSP(), first_cursor_sp->sctx, 0, *this));
-  if (reg_ctx_sp.get() == NULL)
+  if (reg_ctx_sp.get() == nullptr)
     goto unwind_done;
 
   if (!reg_ctx_sp->IsValid())
@@ -147,7 +147,7 @@ UnwindLLDB::CursorSP UnwindLLDB::GetOneMoreFrame(ABI *abi) {
     return nullptr;
   }
 
-  if (reg_ctx_sp.get() == NULL) {
+  if (reg_ctx_sp.get() == nullptr) {
     // If the RegisterContextLLDB has a fallback UnwindPlan, it will switch to
     // that and return true.  Subsequent calls to TryFallbackUnwindPlan() will
     // return false.
@@ -402,7 +402,7 @@ bool UnwindLLDB::DoGetFrameInfoAtIndex(uint32_t idx, addr_t &cfa, addr_t &pc) {
   }
 
   ProcessSP process_sp(m_thread.GetProcess());
-  ABI *abi = process_sp ? process_sp->GetABI().get() : NULL;
+  ABI *abi = process_sp ? process_sp->GetABI().get() : nullptr;
 
   while (idx >= m_frames.size() && AddOneMoreFrame(abi))
     ;
@@ -430,7 +430,7 @@ UnwindLLDB::DoCreateRegisterContextForFrame(StackFrame *frame) {
   }
 
   ProcessSP process_sp(m_thread.GetProcess());
-  ABI *abi = process_sp ? process_sp->GetABI().get() : NULL;
+  ABI *abi = process_sp ? process_sp->GetABI().get() : nullptr;
 
   while (idx >= m_frames.size()) {
     if (!AddOneMoreFrame(abi))
diff --git a/lldb/source/Plugins/Process/Utility/UnwindMacOSXFrameBackchain.cpp b/lldb/source/Plugins/Process/Utility/UnwindMacOSXFrameBackchain.cpp
index 4a35637cffbc2..7dc5a5f5fdd1b 100644
--- a/lldb/source/Plugins/Process/Utility/UnwindMacOSXFrameBackchain.cpp
+++ b/lldb/source/Plugins/Process/Utility/UnwindMacOSXFrameBackchain.cpp
@@ -79,7 +79,7 @@ size_t UnwindMacOSXFrameBackchain::GetStackFrameData_i386(
   StackFrame *first_frame = exe_ctx.GetFramePtr();
 
   Process *process = exe_ctx.GetProcessPtr();
-  if (process == NULL)
+  if (process == nullptr)
     return 0;
 
   struct Frame_i386 {
@@ -121,7 +121,7 @@ size_t UnwindMacOSXFrameBackchain::GetStackFrameData_i386(
 
       SymbolContext first_frame_sc(
           first_frame->GetSymbolContext(resolve_scope));
-      const AddressRange *addr_range_ptr = NULL;
+      const AddressRange *addr_range_ptr = nullptr;
       AddressRange range;
       if (first_frame_sc.function)
         addr_range_ptr = &first_frame_sc.function->GetAddressRange();
@@ -169,7 +169,7 @@ size_t UnwindMacOSXFrameBackchain::GetStackFrameData_x86_64(
   m_cursors.clear();
 
   Process *process = exe_ctx.GetProcessPtr();
-  if (process == NULL)
+  if (process == nullptr)
     return 0;
 
   StackFrame *first_frame = exe_ctx.GetFramePtr();
@@ -212,7 +212,7 @@ size_t UnwindMacOSXFrameBackchain::GetStackFrameData_x86_64(
 
       SymbolContext first_frame_sc(
           first_frame->GetSymbolContext(resolve_scope));
-      const AddressRange *addr_range_ptr = NULL;
+      const AddressRange *addr_range_ptr = nullptr;
       AddressRange range;
       if (first_frame_sc.function)
         addr_range_ptr = &first_frame_sc.function->GetAddressRange();
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 25074f854920e..f40ff130cf515 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -80,7 +80,7 @@ bool ProcessElfCore::CanDebug(lldb::TargetSP target_sp,
   if (!m_core_module_sp && FileSystem::Instance().Exists(m_core_file)) {
     ModuleSpec core_module_spec(m_core_file, target_sp->GetArchitecture());
     Status error(ModuleList::GetSharedModule(core_module_spec, m_core_module_sp,
-                                             NULL, NULL, NULL));
+                                             nullptr, nullptr, nullptr));
     if (m_core_module_sp) {
       ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
       if (core_objfile && core_objfile->GetType() == ObjectFile::eTypeCoreFile)
@@ -149,7 +149,7 @@ Status ProcessElfCore::DoLoadCore() {
   }
 
   ObjectFileELF *core = (ObjectFileELF *)(m_core_module_sp->GetObjectFile());
-  if (core == NULL) {
+  if (core == nullptr) {
     error.SetErrorString("invalid core object file");
     return error;
   }
@@ -247,7 +247,7 @@ Status ProcessElfCore::DoLoadCore() {
 }
 
 lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() {
-  if (m_dyld_up.get() == NULL)
+  if (m_dyld_up.get() == nullptr)
     m_dyld_up.reset(DynamicLoader::FindPlugin(
         this, DynamicLoaderPOSIXDYLD::GetPluginNameStatic().GetCString()));
   return m_dyld_up.get();
@@ -327,13 +327,13 @@ size_t ProcessElfCore::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                                     Status &error) {
   ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
 
-  if (core_objfile == NULL)
+  if (core_objfile == nullptr)
     return 0;
 
   // Get the address range
   const VMRangeToFileOffset::Entry *address_range =
       m_core_aranges.FindEntryThatContains(addr);
-  if (address_range == NULL || address_range->GetRangeEnd() < addr) {
+  if (address_range == nullptr || address_range->GetRangeEnd() < addr) {
     error.SetErrorStringWithFormat("core file does not contain 0x%" PRIx64,
                                    addr);
     return 0;
diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
index 13291d56a0cbc..ddcf35013b345 100644
--- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h
@@ -149,7 +149,7 @@ class ThreadElfCore : public lldb_private::Thread {
 
   const char *GetName() override {
     if (m_thread_name.empty())
-      return NULL;
+      return nullptr;
     return m_thread_name.c_str();
   }
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
index 3bf0f5c2ce541..fe7ef6b3aceab 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.cpp
@@ -285,7 +285,7 @@ bool GDBRemoteClientBase::ShouldStop(const UnixSignals &signals,
 
 void GDBRemoteClientBase::OnRunPacketSent(bool first) {
   if (first)
-    BroadcastEvent(eBroadcastBitRunPacketSent, NULL);
+    BroadcastEvent(eBroadcastBitRunPacketSent, nullptr);
 }
 
 ///////////////////////////////////////
@@ -365,7 +365,7 @@ void GDBRemoteClientBase::Lock::SyncWithContinueThread(bool interrupt) {
       // packet. Let's interrupt it.
       const char ctrl_c = '\x03';
       ConnectionStatus status = eConnectionStatusSuccess;
-      size_t bytes_written = m_comm.Write(&ctrl_c, 1, status, NULL);
+      size_t bytes_written = m_comm.Write(&ctrl_c, 1, status, nullptr);
       if (bytes_written == 0) {
         --m_comm.m_async_count;
         if (log)
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index 31af1cd26b781..9c3a02e77e58a 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -98,7 +98,7 @@ size_t GDBRemoteCommunication::SendAck() {
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
   ConnectionStatus status = eConnectionStatusSuccess;
   char ch = '+';
-  const size_t bytes_written = Write(&ch, 1, status, NULL);
+  const size_t bytes_written = Write(&ch, 1, status, nullptr);
   if (log)
     log->Printf("<%4" PRIu64 "> send packet: %c", (uint64_t)bytes_written, ch);
   m_history.AddPacket(ch, GDBRemoteCommunicationHistory::ePacketTypeSend,
@@ -110,7 +110,7 @@ size_t GDBRemoteCommunication::SendNack() {
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
   ConnectionStatus status = eConnectionStatusSuccess;
   char ch = '-';
-  const size_t bytes_written = Write(&ch, 1, status, NULL);
+  const size_t bytes_written = Write(&ch, 1, status, nullptr);
   if (log)
     log->Printf("<%4" PRIu64 "> send packet: %c", (uint64_t)bytes_written, ch);
   m_history.AddPacket(ch, GDBRemoteCommunicationHistory::ePacketTypeSend,
@@ -138,7 +138,7 @@ GDBRemoteCommunication::SendRawPacketNoLock(llvm::StringRef packet,
     ConnectionStatus status = eConnectionStatusSuccess;
     const char *packet_data = packet.data();
     const size_t packet_length = packet.size();
-    size_t bytes_written = Write(packet_data, packet_length, status, NULL);
+    size_t bytes_written = Write(packet_data, packet_length, status, nullptr);
     if (log) {
       size_t binary_start_offset = 0;
       if (strncmp(packet_data, "$vFile:pwrite:", strlen("$vFile:pwrite:")) ==
@@ -274,7 +274,7 @@ GDBRemoteCommunication::WaitForPacketNoLock(StringExtractorGDBRemote &packet,
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
 
   // Check for a packet from our cache first without trying any reading...
-  if (CheckForPacket(NULL, 0, packet) != PacketType::Invalid)
+  if (CheckForPacket(nullptr, 0, packet) != PacketType::Invalid)
     return PacketResult::Success;
 
   bool timed_out = false;
@@ -470,7 +470,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
       content_length = hash_mark_idx - content_start;
       std::string bufsize_str(m_bytes.data() + 2, i - 2 - 1);
       errno = 0;
-      decompressed_bufsize = ::strtoul(bufsize_str.c_str(), NULL, 10);
+      decompressed_bufsize = ::strtoul(bufsize_str.c_str(), nullptr, 10);
       if (errno != 0 || decompressed_bufsize == ULONG_MAX) {
         m_bytes.erase(0, size_of_first_packet);
         return false;
@@ -483,7 +483,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
     packet_checksum_cstr[0] = m_bytes[checksum_idx];
     packet_checksum_cstr[1] = m_bytes[checksum_idx + 1];
     packet_checksum_cstr[2] = '\0';
-    long packet_checksum = strtol(packet_checksum_cstr, NULL, 16);
+    long packet_checksum = strtol(packet_checksum_cstr, nullptr, 16);
 
     long actual_checksum = CalculcateChecksum(
         llvm::StringRef(m_bytes).substr(1, hash_mark_idx - 1));
@@ -848,7 +848,7 @@ GDBRemoteCommunication::CheckForPacket(const uint8_t *src, size_t src_len,
             ::isxdigit(m_bytes[checksum_idx + 1])) {
           if (GetSendAcks()) {
             const char *packet_checksum_cstr = &m_bytes[checksum_idx];
-            char packet_checksum = strtol(packet_checksum_cstr, NULL, 16);
+            char packet_checksum = strtol(packet_checksum_cstr, nullptr, 16);
             char actual_checksum = CalculcateChecksum(
                 llvm::StringRef(m_bytes).slice(content_start, content_end));
             success = packet_checksum == actual_checksum;
@@ -923,9 +923,9 @@ GDBRemoteCommunication::ListenThread(lldb::thread_arg_t arg) {
     // Do the listen on another thread so we can continue on...
     if (connection->Connect(comm->m_listen_url.c_str(), &error) !=
         eConnectionStatusSuccess)
-      comm->SetConnection(NULL);
+      comm->SetConnection(nullptr);
   }
-  return NULL;
+  return nullptr;
 }
 
 Status GDBRemoteCommunication::StartDebugserverProcess(
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index c6b792e70fcb9..9797184026e06 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -386,14 +386,14 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
         std::vector<std::string> supported_compressions;
         compressions += sizeof("SupportedCompressions=") - 1;
         const char *end_of_compressions = strchr(compressions, ';');
-        if (end_of_compressions == NULL) {
+        if (end_of_compressions == nullptr) {
           end_of_compressions = strchr(compressions, '\0');
         }
         const char *current_compression = compressions;
         while (current_compression < end_of_compressions) {
           const char *next_compression_name = strchr(current_compression, ',');
           const char *end_of_this_word = next_compression_name;
-          if (next_compression_name == NULL ||
+          if (next_compression_name == nullptr ||
               end_of_compressions < next_compression_name) {
             end_of_this_word = end_of_compressions;
           }
@@ -775,7 +775,7 @@ int GDBRemoteCommunicationClient::SendArgumentsPacket(
   std::vector<const char *> argv;
   FileSpec exe_file = launch_info.GetExecutableFile();
   std::string exe_path;
-  const char *arg = NULL;
+  const char *arg = nullptr;
   const Args &launch_args = launch_info.GetArguments();
   if (exe_file)
     exe_path = exe_file.GetPath(false);
@@ -786,7 +786,7 @@ int GDBRemoteCommunicationClient::SendArgumentsPacket(
   }
   if (!exe_path.empty()) {
     argv.push_back(exe_path.c_str());
-    for (uint32_t i = 1; (arg = launch_args.GetArgumentAtIndex(i)) != NULL;
+    for (uint32_t i = 1; (arg = launch_args.GetArgumentAtIndex(i)) != nullptr;
          ++i) {
       if (arg)
         argv.push_back(arg);
@@ -1094,7 +1094,7 @@ const char *GDBRemoteCommunicationClient::GetGDBServerProgramName() {
     if (!m_gdb_server_name.empty())
       return m_gdb_server_name.c_str();
   }
-  return NULL;
+  return nullptr;
 }
 
 uint32_t GDBRemoteCommunicationClient::GetGDBServerProgramVersion() {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
index 56ee0a1463c1a..a77e659a55fa2 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
@@ -98,7 +98,7 @@ bool GDBRemoteRegisterContext::ReadRegister(const RegisterInfo *reg_info,
 bool GDBRemoteRegisterContext::PrivateSetRegisterValue(
     uint32_t reg, llvm::ArrayRef<uint8_t> data) {
   const RegisterInfo *reg_info = GetRegisterInfoAtIndex(reg);
-  if (reg_info == NULL)
+  if (reg_info == nullptr)
     return false;
 
   // Invalidate if needed
@@ -122,7 +122,7 @@ bool GDBRemoteRegisterContext::PrivateSetRegisterValue(
 bool GDBRemoteRegisterContext::PrivateSetRegisterValue(uint32_t reg,
                                                        uint64_t new_reg_val) {
   const RegisterInfo *reg_info = GetRegisterInfoAtIndex(reg);
-  if (reg_info == NULL)
+  if (reg_info == nullptr)
     return false;
 
   // Early in process startup, we can get a thread that has an invalid byte
@@ -148,7 +148,7 @@ bool GDBRemoteRegisterContext::PrivateSetRegisterValue(uint32_t reg,
   uint8_t *dst = const_cast<uint8_t *>(
       m_reg_data.PeekData(reg_info->byte_offset, reg_info->byte_size));
 
-  if (dst == NULL)
+  if (dst == nullptr)
     return false;
 
   if (data.CopyByteOrderedData(0,                          // src offset
@@ -183,7 +183,7 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
 
   Process *process = exe_ctx.GetProcessPtr();
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (process == NULL || thread == NULL)
+  if (process == nullptr || thread == nullptr)
     return false;
 
   GDBRemoteCommunicationClient &gdb_comm(
@@ -228,7 +228,7 @@ bool GDBRemoteRegisterContext::ReadRegisterBytes(const RegisterInfo *reg_info,
         // We have a valid primordial register as our constituent. Grab the
         // corresponding register info.
         const RegisterInfo *prim_reg_info = GetRegisterInfoAtIndex(prim_reg);
-        if (prim_reg_info == NULL)
+        if (prim_reg_info == nullptr)
           success = false;
         else {
           // Read the containing register if it hasn't already been read
@@ -301,7 +301,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info,
 
   Process *process = exe_ctx.GetProcessPtr();
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (process == NULL || thread == NULL)
+  if (process == nullptr || thread == nullptr)
     return false;
 
   GDBRemoteCommunicationClient &gdb_comm(
@@ -319,7 +319,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info,
   uint8_t *dst = const_cast<uint8_t *>(
       m_reg_data.PeekData(reg_info->byte_offset, reg_info->byte_size));
 
-  if (dst == NULL)
+  if (dst == nullptr)
     return false;
 
   if (data.CopyByteOrderedData(data_offset,                // src offset
@@ -361,7 +361,7 @@ bool GDBRemoteRegisterContext::WriteRegisterBytes(const RegisterInfo *reg_info,
             // We have a valid primordial register as our constituent. Grab the
             // corresponding register info.
             const RegisterInfo *value_reg_info = GetRegisterInfoAtIndex(reg);
-            if (value_reg_info == NULL)
+            if (value_reg_info == nullptr)
               success = false;
             else
               success = SetPrimordialRegister(value_reg_info, gdb_comm);
@@ -409,7 +409,7 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues(
 
   Process *process = exe_ctx.GetProcessPtr();
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (process == NULL || thread == NULL)
+  if (process == nullptr || thread == nullptr)
     return false;
 
   GDBRemoteCommunicationClient &gdb_comm(
@@ -434,7 +434,7 @@ bool GDBRemoteRegisterContext::WriteAllRegisterValues(
 
     Process *process = exe_ctx.GetProcessPtr();
     Thread *thread = exe_ctx.GetThreadPtr();
-    if (process == NULL || thread == NULL)
+    if (process == nullptr || thread == nullptr)
       return false;
 
     GDBRemoteCommunicationClient &gdb_comm(
@@ -452,7 +452,7 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues(
 
   Process *process = exe_ctx.GetProcessPtr();
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (process == NULL || thread == NULL)
+  if (process == nullptr || thread == nullptr)
     return false;
 
   GDBRemoteCommunicationClient &gdb_comm(
@@ -474,7 +474,8 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues(
     // individually and store them as binary data in a buffer.
     const RegisterInfo *reg_info;
 
-    for (uint32_t i = 0; (reg_info = GetRegisterInfoAtIndex(i)) != NULL; i++) {
+    for (uint32_t i = 0; (reg_info = GetRegisterInfoAtIndex(i)) != nullptr;
+         i++) {
       if (reg_info
               ->value_regs) // skip registers that are slices of real registers
         continue;
@@ -508,14 +509,14 @@ bool GDBRemoteRegisterContext::ReadAllRegisterValues(
 
 bool GDBRemoteRegisterContext::WriteAllRegisterValues(
     const lldb::DataBufferSP &data_sp) {
-  if (!data_sp || data_sp->GetBytes() == NULL || data_sp->GetByteSize() == 0)
+  if (!data_sp || data_sp->GetBytes() == nullptr || data_sp->GetByteSize() == 0)
     return false;
 
   ExecutionContext exe_ctx(CalculateThread());
 
   Process *process = exe_ctx.GetProcessPtr();
   Thread *thread = exe_ctx.GetThreadPtr();
-  if (process == NULL || thread == NULL)
+  if (process == nullptr || thread == nullptr)
     return false;
 
   GDBRemoteCommunicationClient &gdb_comm(
@@ -557,9 +558,9 @@ bool GDBRemoteRegisterContext::WriteAllRegisterValues(
       uint64_t size_by_highest_offset = 0;
 
       for (uint32_t reg_idx = 0;
-           (reg_info = GetRegisterInfoAtIndex(reg_idx)) != NULL; ++reg_idx) {
+           (reg_info = GetRegisterInfoAtIndex(reg_idx)) != nullptr; ++reg_idx) {
         size_including_slice_registers += reg_info->byte_size;
-        if (reg_info->value_regs == NULL)
+        if (reg_info->value_regs == nullptr)
           size_not_including_slice_registers += reg_info->byte_size;
         if (reg_info->byte_offset >= size_by_highest_offset)
           size_by_highest_offset = reg_info->byte_offset + reg_info->byte_size;
@@ -591,7 +592,7 @@ bool GDBRemoteRegisterContext::WriteAllRegisterValues(
       // keep track of the size of each reg & compute offset based on that.
       uint32_t running_byte_offset = 0;
       for (uint32_t reg_idx = 0;
-           (reg_info = GetRegisterInfoAtIndex(reg_idx)) != NULL;
+           (reg_info = GetRegisterInfoAtIndex(reg_idx)) != nullptr;
            ++reg_idx, running_byte_offset += reg_info->byte_size) {
         // Skip composite aka slice registers (e.g. eax is a slice of rax).
         if (reg_info->value_regs)
@@ -637,7 +638,7 @@ bool GDBRemoteRegisterContext::WriteAllRegisterValues(
       }
       uint32_t num_restored = 0;
       const RegisterInfo *reg_info;
-      for (uint32_t i = 0; (reg_info = GetRegisterInfoAtIndex(i)) != NULL;
+      for (uint32_t i = 0; (reg_info = GetRegisterInfoAtIndex(i)) != nullptr;
            i++) {
         if (reg_info->value_regs) // skip registers that are slices of real
                                   // registers
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index db95d91449e64..422b092895592 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -114,12 +114,12 @@ static constexpr PropertyDefinition g_properties[] = {
     {"packet-timeout", OptionValue::eTypeUInt64, true, 5
 #if defined(__has_feature)
 #if __has_feature(address_sanitizer)
-      * 2
+         * 2
 #endif
 #endif
-      , NULL, {},
+     , nullptr, {},
      "Specify the default packet timeout in seconds."},
-    {"target-definition-file", OptionValue::eTypeFileSpec, true, 0, NULL, {},
+    {"target-definition-file", OptionValue::eTypeFileSpec, true, 0, nullptr, {},
      "The file that provides the description for remote target registers."}};
 
 enum { ePropertyPacketTimeout, ePropertyTargetDefinitionFile };
@@ -140,17 +140,17 @@ class PluginProperties : public Properties {
   uint64_t GetPacketTimeout() {
     const uint32_t idx = ePropertyPacketTimeout;
     return m_collection_sp->GetPropertyAtIndexAsUInt64(
-        NULL, idx, g_properties[idx].default_uint_value);
+        nullptr, idx, g_properties[idx].default_uint_value);
   }
 
   bool SetPacketTimeout(uint64_t timeout) {
     const uint32_t idx = ePropertyPacketTimeout;
-    return m_collection_sp->SetPropertyAtIndexAsUInt64(NULL, idx, timeout);
+    return m_collection_sp->SetPropertyAtIndexAsUInt64(nullptr, idx, timeout);
   }
 
   FileSpec GetTargetDefinitionFile() const {
     const uint32_t idx = ePropertyTargetDefinitionFile;
-    return m_collection_sp->GetPropertyAtIndexAsFileSpec(NULL, idx);
+    return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr, idx);
   }
 };
 
@@ -252,7 +252,7 @@ ProcessGDBRemote::CreateInstance(lldb::TargetSP target_sp,
                                  ListenerSP listener_sp,
                                  const FileSpec *crash_file_path) {
   lldb::ProcessSP process_sp;
-  if (crash_file_path == NULL)
+  if (crash_file_path == nullptr)
     process_sp = std::make_shared<ProcessGDBRemote>(target_sp, listener_sp);
   return process_sp;
 }
@@ -294,7 +294,7 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
     : Process(target_sp, listener_sp),
       m_debugserver_pid(LLDB_INVALID_PROCESS_ID), m_last_stop_packet_mutex(),
       m_register_info(),
-      m_async_broadcaster(NULL, "lldb.process.gdb-remote.async-broadcaster"),
+      m_async_broadcaster(nullptr, "lldb.process.gdb-remote.async-broadcaster"),
       m_async_listener_sp(
           Listener::MakeListener("lldb.process.gdb-remote.async-listener")),
       m_async_thread_state_mutex(), m_thread_ids(), m_thread_pcs(),
@@ -547,8 +547,8 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
         std::vector<uint32_t> invalidate_regs;
         std::vector<uint8_t> dwarf_opcode_bytes;
         RegisterInfo reg_info = {
-            NULL,          // Name
-            NULL,          // Alt name
+            nullptr,       // Name
+            nullptr,       // Alt name
             0,             // byte size
             reg_offset,    // offset
             eEncodingUint, // encoding
@@ -560,10 +560,10 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
                 reg_num,             // process plugin reg num
                 reg_num              // native register number
             },
-            NULL,
-            NULL,
-            NULL, // Dwarf expression opcode bytes pointer
-            0     // Dwarf expression opcode bytes length
+            nullptr,
+            nullptr,
+            nullptr, // Dwarf expression opcode bytes pointer
+            0        // Dwarf expression opcode bytes length
         };
 
         while (response.GetNameColonValue(name, value)) {
@@ -583,7 +583,7 @@ void ProcessGDBRemote::BuildDynamicRegisterInfo(bool force) {
               reg_info.encoding = encoding;
           } else if (name.equals("format")) {
             Format format = eFormatInvalid;
-            if (OptionArgParser::ToFormat(value.str().c_str(), format, NULL)
+            if (OptionArgParser::ToFormat(value.str().c_str(), format, nullptr)
                     .Success())
               reg_info.format = format;
             else {
@@ -888,8 +888,8 @@ Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module,
         // since 'O' packets can really slow down debugging if the inferior
         // does a lot of output.
         if ((!stdin_file_spec || !stdout_file_spec || !stderr_file_spec) &&
-            pty.OpenFirstAvailableMaster(O_RDWR | O_NOCTTY, NULL, 0)) {
-          FileSpec slave_name{pty.GetSlaveName(NULL, 0)};
+            pty.OpenFirstAvailableMaster(O_RDWR | O_NOCTTY, nullptr, 0)) {
+          FileSpec slave_name{pty.GetSlaveName(nullptr, 0)};
 
           if (!stdin_file_spec)
             stdin_file_spec = slave_name;
@@ -933,7 +933,7 @@ Status ProcessGDBRemote::DoLaunch(lldb_private::Module *exe_module,
           GetTarget().GetArchitecture().GetArchitectureName());
 
       const char *launch_event_data = launch_info.GetLaunchEventData();
-      if (launch_event_data != NULL && *launch_event_data != '\0')
+      if (launch_event_data != nullptr && *launch_event_data != '\0')
         m_gdb_comm.SendLaunchEventDataPacket(launch_event_data);
 
       if (working_dir) {
@@ -1847,7 +1847,7 @@ ThreadSP ProcessGDBRemote::SetThreadStopInfo(
         gdb_thread->PrivateSetRegisterValue(pair.first, buffer_sp->GetData());
       }
 
-      thread_sp->SetName(thread_name.empty() ? NULL : thread_name.c_str());
+      thread_sp->SetName(thread_name.empty() ? nullptr : thread_name.c_str());
 
       gdb_thread->SetThreadDispatchQAddr(thread_dispatch_qaddr);
       // Check if the GDB server was able to provide the queue name, kind and
@@ -3126,7 +3126,7 @@ size_t ProcessGDBRemote::PutSTDIN(const char *src, size_t src_len,
                                   Status &error) {
   if (m_stdio_communication.IsConnected()) {
     ConnectionStatus status;
-    m_stdio_communication.Write(src, src_len, status, NULL);
+    m_stdio_communication.Write(src, src_len, status, nullptr);
   } else if (m_stdin_forward) {
     m_gdb_comm.SendStdinNotification(src, src_len);
   }
@@ -3135,7 +3135,7 @@ size_t ProcessGDBRemote::PutSTDIN(const char *src, size_t src_len,
 
 Status ProcessGDBRemote::EnableBreakpointSite(BreakpointSite *bp_site) {
   Status error;
-  assert(bp_site != NULL);
+  assert(bp_site != nullptr);
 
   // Get logging info
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_BREAKPOINTS));
@@ -3261,7 +3261,7 @@ Status ProcessGDBRemote::EnableBreakpointSite(BreakpointSite *bp_site) {
 
 Status ProcessGDBRemote::DisableBreakpointSite(BreakpointSite *bp_site) {
   Status error;
-  assert(bp_site != NULL);
+  assert(bp_site != nullptr);
   addr_t addr = bp_site->GetLoadAddress();
   user_id_t site_id = bp_site->GetID();
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_BREAKPOINTS));
@@ -3672,9 +3672,9 @@ bool ProcessGDBRemote::StartAsyncThread() {
     // Create a thread that watches our internal state and controls which
     // events make it to clients (into the DCProcess event queue).
 
-    m_async_thread =
-        ThreadLauncher::LaunchThread("<lldb.process.gdb-remote.async>",
-                                     ProcessGDBRemote::AsyncThread, this, NULL);
+    m_async_thread = ThreadLauncher::LaunchThread(
+        "<lldb.process.gdb-remote.async>", ProcessGDBRemote::AsyncThread, this,
+        nullptr);
   } else if (log)
     log->Printf("ProcessGDBRemote::%s () - Called when Async thread was "
                 "already running.",
@@ -3768,7 +3768,7 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
                           ") got eBroadcastBitAsyncContinue: %s",
                           __FUNCTION__, arg, process->GetID(), continue_cstr);
 
-            if (::strstr(continue_cstr, "vAttach") == NULL)
+            if (::strstr(continue_cstr, "vAttach") == nullptr)
               process->SetPrivateState(eStateRunning);
             StringExtractorGDBRemote response;
 
@@ -3834,11 +3834,11 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
                 // the "E87" error code from debugserver -- this indicates that
                 // the process is not debuggable.  Return a slightly more
                 // helpful error message about why the attach failed.
-                if (::strstr(continue_cstr, "vAttach") != NULL &&
+                if (::strstr(continue_cstr, "vAttach") != nullptr &&
                     response.GetError() == 0x87) {
                   process->SetExitStatus(-1, "cannot attach to process due to "
                                              "System Integrity Protection");
-                } else if (::strstr(continue_cstr, "vAttach") != NULL &&
+                } else if (::strstr(continue_cstr, "vAttach") != nullptr &&
                            response.GetStatus().Fail()) {
                   process->SetExitStatus(-1, response.GetStatus().AsCString());
                 } else {
@@ -3913,7 +3913,7 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
                 ") thread exiting...",
                 __FUNCTION__, arg, process->GetID());
 
-  return NULL;
+  return nullptr;
 }
 
 // uint32_t
@@ -4009,7 +4009,7 @@ bool ProcessGDBRemote::StartNoticingNewThreads() {
       }
     }
   }
-  return m_thread_create_bp_sp.get() != NULL;
+  return m_thread_create_bp_sp.get() != nullptr;
 }
 
 bool ProcessGDBRemote::StopNoticingNewThreads() {
@@ -4024,8 +4024,8 @@ bool ProcessGDBRemote::StopNoticingNewThreads() {
 }
 
 DynamicLoader *ProcessGDBRemote::GetDynamicLoader() {
-  if (m_dyld_up.get() == NULL)
-    m_dyld_up.reset(DynamicLoader::FindPlugin(this, NULL));
+  if (m_dyld_up.get() == nullptr)
+    m_dyld_up.reset(DynamicLoader::FindPlugin(this, nullptr));
   return m_dyld_up.get();
 }
 
@@ -4366,8 +4366,8 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
         bool encoding_set = false;
         bool format_set = false;
         RegisterInfo reg_info = {
-            NULL,          // Name
-            NULL,          // Alt name
+            nullptr,       // Name
+            nullptr,       // Alt name
             0,             // byte size
             reg_offset,    // offset
             eEncodingUint, // encoding
@@ -4379,10 +4379,10 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
                 cur_reg_num,         // process plugin reg num
                 cur_reg_num          // native register number
             },
-            NULL,
-            NULL,
-            NULL, // Dwarf Expression opcode bytes pointer
-            0     // Dwarf Expression opcode bytes length
+            nullptr,
+            nullptr,
+            nullptr, // Dwarf Expression opcode bytes pointer
+            0        // Dwarf Expression opcode bytes length
         };
 
         reg_node.ForEachAttribute([&target_info, &gdb_group, &gdb_type,
@@ -4416,7 +4416,8 @@ bool ParseRegisters(XMLNode feature_node, GdbServerTargetInfo &target_info,
           } else if (name == "format") {
             format_set = true;
             Format format = eFormatInvalid;
-            if (OptionArgParser::ToFormat(value.data(), format, NULL).Success())
+            if (OptionArgParser::ToFormat(value.data(), format, nullptr)
+                    .Success())
               reg_info.format = format;
             else if (value == "vector-sint8")
               reg_info.format = eFormatVectorOfSInt8;
@@ -5133,7 +5134,7 @@ class CommandObjectProcessGDBRemoteSpeedTest : public CommandObjectParsed {
                             "Tests packet speeds of various sizes to determine "
                             "the performance characteristics of the GDB remote "
                             "connection. ",
-                            NULL),
+                            nullptr),
         m_option_group(),
         m_num_packets(LLDB_OPT_SET_1, false, "count", 'c', 0, eArgTypeCount,
                       "The number of packets to send of each varying size "
@@ -5207,7 +5208,7 @@ class CommandObjectProcessGDBRemotePacketHistory : public CommandObjectParsed {
 public:
   CommandObjectProcessGDBRemotePacketHistory(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "process plugin packet history",
-                            "Dumps the packet history buffer. ", NULL) {}
+                            "Dumps the packet history buffer. ", nullptr) {}
 
   ~CommandObjectProcessGDBRemotePacketHistory() override {}
 
@@ -5238,7 +5239,7 @@ class CommandObjectProcessGDBRemotePacketXferSize : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "process plugin packet xfer-size",
             "Maximum size that lldb will try to read/write one one chunk.",
-            NULL) {}
+            nullptr) {}
 
   ~CommandObjectProcessGDBRemotePacketXferSize() override {}
 
@@ -5258,7 +5259,7 @@ class CommandObjectProcessGDBRemotePacketXferSize : public CommandObjectParsed {
     if (process) {
       const char *packet_size = command.GetArgumentAtIndex(0);
       errno = 0;
-      uint64_t user_specified_max = strtoul(packet_size, NULL, 10);
+      uint64_t user_specified_max = strtoul(packet_size, nullptr, 10);
       if (errno == 0 && user_specified_max != 0) {
         process->SetUserSpecifiedMaxMemoryTransferSize(user_specified_max);
         result.SetStatus(eReturnStatusSuccessFinishResult);
@@ -5280,7 +5281,7 @@ class CommandObjectProcessGDBRemotePacketSend : public CommandObjectParsed {
                             "The packet header and footer will automatically "
                             "be added to the packet prior to sending and "
                             "stripped from the result.",
-                            NULL) {}
+                            nullptr) {}
 
   ~CommandObjectProcessGDBRemotePacketSend() override {}
 
@@ -5308,7 +5309,7 @@ class CommandObjectProcessGDBRemotePacketSend : public CommandObjectParsed {
         output_strm.Printf("  packet: %s\n", packet_cstr);
         std::string &response_str = response.GetStringRef();
 
-        if (strstr(packet_cstr, "qGetProfileData") != NULL) {
+        if (strstr(packet_cstr, "qGetProfileData") != nullptr) {
           response_str = process->HarmonizeThreadIdsForProfileData(response);
         }
 
@@ -5376,7 +5377,7 @@ class CommandObjectProcessGDBRemotePacket : public CommandObjectMultiword {
   CommandObjectProcessGDBRemotePacket(CommandInterpreter &interpreter)
       : CommandObjectMultiword(interpreter, "process plugin packet",
                                "Commands that deal with GDB remote packets.",
-                               NULL) {
+                               nullptr) {
     LoadSubCommand(
         "history",
         CommandObjectSP(
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index f27c80b5d1a1c..cdb7aa006242c 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -93,7 +93,7 @@ bool ProcessMachCore::CanDebug(lldb::TargetSP target_sp,
     // ModuleSpecList::FindMatchingModuleSpec enforces a strict arch mach.
     ModuleSpec core_module_spec(m_core_file);
     Status error(ModuleList::GetSharedModule(core_module_spec, m_core_module_sp,
-                                             NULL, NULL, NULL));
+                                             nullptr, nullptr, nullptr));
 
     if (m_core_module_sp) {
       ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
@@ -196,7 +196,7 @@ Status ProcessMachCore::DoLoadCore() {
   }
 
   ObjectFile *core_objfile = m_core_module_sp->GetObjectFile();
-  if (core_objfile == NULL) {
+  if (core_objfile == nullptr) {
     error.SetErrorString("invalid core object file");
     return error;
   }
@@ -209,7 +209,7 @@ Status ProcessMachCore::DoLoadCore() {
   }
 
   SectionList *section_list = core_objfile->GetSectionList();
-  if (section_list == NULL) {
+  if (section_list == nullptr) {
     error.SetErrorString("core file has no sections");
     return error;
   }
@@ -308,7 +308,7 @@ Status ProcessMachCore::DoLoadCore() {
       size_t p = corefile_identifier.find("stext=") + strlen("stext=");
       if (corefile_identifier[p] == '0' && corefile_identifier[p + 1] == 'x') {
         errno = 0;
-        addr = ::strtoul(corefile_identifier.c_str() + p, NULL, 16);
+        addr = ::strtoul(corefile_identifier.c_str() + p, nullptr, 16);
         if (errno != 0 || addr == 0)
           addr = LLDB_INVALID_ADDRESS;
       }
@@ -447,10 +447,10 @@ Status ProcessMachCore::DoLoadCore() {
 }
 
 lldb_private::DynamicLoader *ProcessMachCore::GetDynamicLoader() {
-  if (m_dyld_up.get() == NULL)
+  if (m_dyld_up.get() == nullptr)
     m_dyld_up.reset(DynamicLoader::FindPlugin(
-        this,
-        m_dyld_plugin_name.IsEmpty() ? NULL : m_dyld_plugin_name.GetCString()));
+        this, m_dyld_plugin_name.IsEmpty() ? nullptr
+                                           : m_dyld_plugin_name.GetCString()));
   return m_dyld_up.get();
 }
 
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index d3d2a07afac66..a7fc42cad16c9 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -600,10 +600,9 @@ class CommandObjectProcessMinidumpDump : public CommandObjectParsed {
     return DumpFacebook() || m_fb_logcat.GetOptionValue().GetCurrentValue();
   }
 public:
-
   CommandObjectProcessMinidumpDump(CommandInterpreter &interpreter)
   : CommandObjectParsed(interpreter, "process plugin dump",
-      "Dump information from the minidump file.", NULL),
+      "Dump information from the minidump file.", nullptr),
     m_option_group(),
     INIT_BOOL(m_dump_all, "all", 'a',
               "Dump the everything in the minidump."),
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index a4959265fadce..2d2b68ceaaa6b 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -1943,7 +1943,7 @@ lldb::StateType ScriptInterpreterPythonImpl::ScriptedThreadPlanGetRunState(
     Locker py_lock(this,
                    Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN);
     should_step = LLDBSWIGPythonCallThreadPlan(
-        generic->GetValue(), "should_step", NULL, script_error);
+        generic->GetValue(), "should_step", nullptr, script_error);
     if (script_error)
       should_step = true;
   }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 15b91f53a80cc..def7eb2e1eb28 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -185,7 +185,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWO(const DWARFDIE &die, Log *log) {
   SymbolFileDWARF *dwarf = die.GetDWARF();
   TypeSP type_sp(new Type(
       die.GetID(), dwarf, dwo_type_sp->GetName(), dwo_type_sp->GetByteSize(),
-      NULL, LLDB_INVALID_UID, Type::eEncodingInvalid,
+      nullptr, LLDB_INVALID_UID, Type::eEncodingInvalid,
       &dwo_type_sp->GetDeclaration(), type, Type::eResolveStateForward));
 
   dwarf->GetTypeList()->Insert(type_sp);
@@ -461,7 +461,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
            sc.comp_unit->GetLanguage() == eLanguageTypeObjC_plus_plus);
 
       if (translation_unit_is_objc) {
-        if (type_name_cstr != NULL) {
+        if (type_name_cstr != nullptr) {
           static ConstString g_objc_type_name_id("id");
           static ConstString g_objc_type_name_Class("Class");
           static ConstString g_objc_type_name_selector("SEL");
@@ -1907,7 +1907,7 @@ class DWARFASTParserClang::DelayedAddObjCClassProperty {
         m_property_setter_name(property_setter_name),
         m_property_getter_name(property_getter_name),
         m_property_attributes(property_attributes) {
-    if (metadata != NULL) {
+    if (metadata != nullptr) {
       m_metadata_up.reset(new ClangASTMetadata());
       *m_metadata_up = *metadata;
     }
@@ -2442,7 +2442,7 @@ size_t DWARFASTParserClang::ParseChildEnumerators(
       DWARFAttributes attributes;
       const size_t num_child_attributes = die.GetAttributes(attributes);
       if (num_child_attributes > 0) {
-        const char *name = NULL;
+        const char *name = nullptr;
         bool got_value = false;
         int64_t enum_value = 0;
         Declaration decl;
@@ -2545,8 +2545,8 @@ class DIEStack {
 Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
                                                       const DWARFDIE &die) {
   DWARFRangeList func_ranges;
-  const char *name = NULL;
-  const char *mangled = NULL;
+  const char *name = nullptr;
+  const char *mangled = nullptr;
   int decl_file = 0;
   int decl_line = 0;
   int decl_column = 0;
@@ -2558,7 +2558,7 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
   const dw_tag_t tag = die.Tag();
 
   if (tag != DW_TAG_subprogram)
-    return NULL;
+    return nullptr;
 
   if (die.GetDIENamesAndRanges(name, mangled, func_ranges, decl_file, decl_line,
                                decl_column, call_file, call_line, call_column,
@@ -2633,7 +2633,7 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
       // Supply the type _only_ if it has already been parsed
       Type *func_type = dwarf->GetDIEToType().lookup(die.GetDIE());
 
-      assert(func_type == NULL || func_type != DIE_IS_BEING_PARSED);
+      assert(func_type == nullptr || func_type != DIE_IS_BEING_PARSED);
 
       if (dwarf->FixupAddress(func_range.GetBaseAddress())) {
         const user_id_t func_user_id = die.GetID();
@@ -2643,7 +2643,7 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
                                    func_user_id, func_name, func_type,
                                        func_range); // first address range
 
-        if (func_sp.get() != NULL) {
+        if (func_sp.get() != nullptr) {
           if (frame_base.IsValid())
             func_sp->GetFrameBaseExpression() = frame_base;
           comp_unit.AddFunction(func_sp);
@@ -2652,7 +2652,7 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 bool DWARFASTParserClang::ParseChildMembers(
@@ -2693,10 +2693,10 @@ bool DWARFASTParserClang::ParseChildMembers(
       if (num_attributes > 0) {
         Declaration decl;
         // DWARFExpression location;
-        const char *name = NULL;
-        const char *prop_name = NULL;
-        const char *prop_getter_name = NULL;
-        const char *prop_setter_name = NULL;
+        const char *name = nullptr;
+        const char *prop_name = nullptr;
+        const char *prop_getter_name = nullptr;
+        const char *prop_setter_name = nullptr;
         uint32_t prop_attributes = 0;
 
         bool is_artificial = false;
@@ -2758,7 +2758,8 @@ bool DWARFASTParserClang::ParseChildMembers(
                         module_sp, debug_info_data, die.GetCU(), block_offset,
                         block_length, eRegisterKindDWARF, &initialValue,
                         nullptr, memberOffset, nullptr)) {
-                  member_byte_offset = memberOffset.ResolveValue(NULL).UInt();
+                  member_byte_offset =
+                      memberOffset.ResolveValue(nullptr).UInt();
                 }
               } else {
                 // With DWARF 3 and later, if the value is an integer constant,
@@ -2874,7 +2875,7 @@ bool DWARFASTParserClang::ParseChildMembers(
         if (!is_artificial) {
           Type *member_type = die.ResolveTypeUID(DIERef(encoding_form));
 
-          clang::FieldDecl *field_decl = NULL;
+          clang::FieldDecl *field_decl = nullptr;
           if (tag == DW_TAG_member) {
             if (member_type) {
               if (accessibility == eAccessNone)
@@ -3133,12 +3134,12 @@ bool DWARFASTParserClang::ParseChildMembers(
             }
           }
 
-          if (prop_name != NULL && member_type) {
-            clang::ObjCIvarDecl *ivar_decl = NULL;
+          if (prop_name != nullptr && member_type) {
+            clang::ObjCIvarDecl *ivar_decl = nullptr;
 
             if (field_decl) {
               ivar_decl = clang::dyn_cast<clang::ObjCIvarDecl>(field_decl);
-              assert(ivar_decl != NULL);
+              assert(ivar_decl != nullptr);
             }
 
             ClangASTMetadata metadata;
@@ -3209,7 +3210,8 @@ bool DWARFASTParserClang::ParseChildMembers(
                                               block_offset, block_length,
                                               eRegisterKindDWARF, &initialValue,
                                               nullptr, memberOffset, nullptr)) {
-                  member_byte_offset = memberOffset.ResolveValue(NULL).UInt();
+                  member_byte_offset =
+                      memberOffset.ResolveValue(nullptr).UInt();
                 }
               } else {
                 // With DWARF 3 and later, if the value is an integer constant,
@@ -3237,7 +3239,7 @@ bool DWARFASTParserClang::ParseChildMembers(
         }
 
         Type *base_class_type = die.ResolveTypeUID(DIERef(encoding_form));
-        if (base_class_type == NULL) {
+        if (base_class_type == nullptr) {
           module_sp->ReportError("0x%8.8x: DW_TAG_inheritance failed to "
                                  "resolve the base class at 0x%8.8x"
                                  " from enclosing type 0x%8.8x. \nPlease file "
@@ -3314,7 +3316,7 @@ size_t DWARFASTParserClang::ParseChildParameters(
       DWARFAttributes attributes;
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
-        const char *name = NULL;
+        const char *name = nullptr;
         Declaration decl;
         DWARFFormValue param_type_die_form;
         bool is_artificial = false;
@@ -3371,7 +3373,7 @@ size_t DWARFASTParserClang::ParseChildParameters(
               // Often times compilers omit the "this" name for the
               // specification DIEs, so we can't rely upon the name being in
               // the formal parameter DIE...
-              (name == NULL || ::strcmp(name, "this") == 0)) {
+              (name == nullptr || ::strcmp(name, "this") == 0)) {
             Type *this_type = die.ResolveTypeUID(DIERef(param_type_die_form));
             if (this_type) {
               uint32_t encoding_mask = this_type->GetEncodingMask();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
index 25d64d4f62f13..e9aa8f360bcab 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
@@ -66,7 +66,7 @@ DWARFAbbreviationDeclarationSet::GetAbbreviationDeclaration(
     if (idx < m_decls.size())
       return &m_decls[idx];
   }
-  return NULL;
+  return nullptr;
 }
 
 
@@ -137,7 +137,7 @@ DWARFDebugAbbrev::GetAbbreviationDeclarationSet(
 
   if (pos != m_abbrevCollMap.end())
     return &(pos->second);
-  return NULL;
+  return nullptr;
 }
 
 // DWARFDebugAbbrev::GetUnsupportedForms()
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
index 4d22c825390e6..e2de7d536f17e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
@@ -68,7 +68,7 @@ DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
 }
 
 void DWARFDebugAranges::Dump(Log *log) const {
-  if (log == NULL)
+  if (log == nullptr)
     return;
 
   const size_t num_entries = m_aranges.GetSize();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
index f17caf1bca576..c81b6ce05f0f7 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
@@ -30,7 +30,7 @@ using namespace std;
 
 // Constructor
 DWARFDebugInfo::DWARFDebugInfo(lldb_private::DWARFContext &context)
-    : m_dwarf2Data(NULL), m_context(context), m_units(), m_cu_aranges_up() {}
+    : m_dwarf2Data(nullptr), m_context(context), m_units(), m_cu_aranges_up() {}
 
 // SetDwarfData
 void DWARFDebugInfo::SetDwarfData(SymbolFileDWARF *dwarf2Data) {
@@ -114,7 +114,7 @@ size_t DWARFDebugInfo::GetNumUnits() {
 }
 
 DWARFUnit *DWARFDebugInfo::GetUnitAtIndex(user_id_t idx) {
-  DWARFUnit *cu = NULL;
+  DWARFUnit *cu = nullptr;
   if (idx < GetNumUnits())
     cu = m_units[idx].get();
   return cu;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
index 25aecf3accf44..215862c1aa7b4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
@@ -41,8 +41,8 @@ class DWARFDebugInfo {
 
   size_t GetNumUnits();
   DWARFUnit *GetUnitAtIndex(lldb::user_id_t idx);
-  DWARFUnit *GetUnitAtOffset(DIERef::Section section,
-                             dw_offset_t cu_offset, uint32_t *idx_ptr = NULL);
+  DWARFUnit *GetUnitAtOffset(DIERef::Section section, dw_offset_t cu_offset,
+                             uint32_t *idx_ptr = nullptr);
   DWARFUnit *GetUnitContainingDIEOffset(DIERef::Section section,
                                         dw_offset_t die_offset);
   DWARFUnit *GetUnit(const DIERef &die_ref);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index 7dd3eef2f4629..aba80705ba9da 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -51,7 +51,7 @@ bool DWARFDebugInfoEntry::FastExtract(
     const DWARFAbbreviationDeclaration *abbrevDecl =
         cu->GetAbbreviations()->GetAbbreviationDeclaration(m_abbr_idx);
 
-    if (abbrevDecl == NULL) {
+    if (abbrevDecl == nullptr) {
       cu->GetSymbolFileDWARF()->GetObjectFile()->GetModule()->ReportError(
           "{0x%8.8x}: invalid abbreviation code %u, please file a bug and "
           "attach the file at the start of this error message",
@@ -451,13 +451,13 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
         } break;
 
         case DW_AT_name:
-          if (name == NULL)
+          if (name == nullptr)
             name = form_value.AsCString();
           break;
 
         case DW_AT_MIPS_linkage_name:
         case DW_AT_linkage_name:
-          if (mangled == NULL)
+          if (mangled == nullptr)
             mangled = form_value.AsCString();
           break;
 
@@ -551,7 +551,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
     frame_base->SetLocationListSlide(lowest_range_pc - cu->GetBaseAddress());
   }
 
-  if (ranges.IsEmpty() || name == NULL || mangled == NULL) {
+  if (ranges.IsEmpty() || name == nullptr || mangled == nullptr) {
     for (const DIERef &die_ref : die_refs) {
       if (die_ref.die_offset != DW_INVALID_OFFSET) {
         DWARFDIE die = dwarf2Data->GetDIE(die_ref);
@@ -1223,7 +1223,7 @@ DWARFDebugInfoEntry::GetQualifiedName(DWARFUnit *cu,
     storage.append(name);
   }
   if (storage.empty())
-    return NULL;
+    return nullptr;
   return storage.c_str();
 }
 
@@ -1385,13 +1385,14 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
             switch (m_tag) {
             case DW_TAG_compile_unit: // File
             case DW_TAG_partial_unit: // File
-              check_children = ((function_die != NULL) || (block_die != NULL));
+              check_children =
+                  ((function_die != nullptr) || (block_die != nullptr));
               break;
 
             case DW_TAG_subprogram: // Function
               if (function_die)
                 *function_die = this;
-              check_children = (block_die != NULL);
+              check_children = (block_die != nullptr);
               break;
 
             case DW_TAG_inlined_subroutine: // Inlined Function
@@ -1411,9 +1412,9 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
           // Compile units may not have a valid high/low pc when there
           // are address gaps in subroutines so we must always search
           // if there is no valid high and low PC.
-          check_children = (m_tag == DW_TAG_compile_unit ||
-                            m_tag == DW_TAG_partial_unit) &&
-                           ((function_die != NULL) || (block_die != NULL));
+          check_children =
+              (m_tag == DW_TAG_compile_unit || m_tag == DW_TAG_partial_unit) &&
+              ((function_die != nullptr) || (block_die != nullptr));
         }
       } else {
         DWARFFormValue form_value;
@@ -1430,13 +1431,14 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
             switch (m_tag) {
             case DW_TAG_compile_unit: // File
             case DW_TAG_partial_unit: // File
-              check_children = ((function_die != NULL) || (block_die != NULL));
+              check_children =
+                  ((function_die != nullptr) || (block_die != nullptr));
               break;
 
             case DW_TAG_subprogram: // Function
               if (function_die)
                 *function_die = this;
-              check_children = (block_die != NULL);
+              check_children = (block_die != nullptr);
               break;
 
             case DW_TAG_inlined_subroutine: // Inlined Function
@@ -1500,7 +1502,7 @@ DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(
     }
   }
   offset = DW_INVALID_OFFSET;
-  return NULL;
+  return nullptr;
 }
 
 bool DWARFDebugInfoEntry::OffsetLessThan(const DWARFDebugInfoEntry &a,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index 6c1057a0cf026..caf2915cc74d1 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -143,12 +143,11 @@ class DWARFDebugInfoEntry {
                 lldb::offset_t *offset_ptr, lldb_private::Stream &s,
                 dw_attr_t attr, DWARFFormValue &form_value);
 
-  bool
-  GetDIENamesAndRanges(const DWARFUnit *cu, const char *&name,
-                       const char *&mangled, DWARFRangeList &rangeList,
-                       int &decl_file, int &decl_line, int &decl_column,
-                       int &call_file, int &call_line, int &call_column,
-                       lldb_private::DWARFExpression *frame_base = NULL) const;
+  bool GetDIENamesAndRanges(
+      const DWARFUnit *cu, const char *&name, const char *&mangled,
+      DWARFRangeList &rangeList, int &decl_file, int &decl_line,
+      int &decl_column, int &call_file, int &call_line, int &call_column,
+      lldb_private::DWARFExpression *frame_base = nullptr) const;
 
   const DWARFAbbreviationDeclaration *
   GetAbbreviationDeclarationPtr(const DWARFUnit *cu,
@@ -167,27 +166,27 @@ class DWARFDebugInfoEntry {
   // We know we are kept in a vector of contiguous entries, so we know
   // our parent will be some index behind "this".
   DWARFDebugInfoEntry *GetParent() {
-    return m_parent_idx > 0 ? this - m_parent_idx : NULL;
+    return m_parent_idx > 0 ? this - m_parent_idx : nullptr;
   }
   const DWARFDebugInfoEntry *GetParent() const {
-    return m_parent_idx > 0 ? this - m_parent_idx : NULL;
+    return m_parent_idx > 0 ? this - m_parent_idx : nullptr;
   }
   // We know we are kept in a vector of contiguous entries, so we know
   // our sibling will be some index after "this".
   DWARFDebugInfoEntry *GetSibling() {
-    return m_sibling_idx > 0 ? this + m_sibling_idx : NULL;
+    return m_sibling_idx > 0 ? this + m_sibling_idx : nullptr;
   }
   const DWARFDebugInfoEntry *GetSibling() const {
-    return m_sibling_idx > 0 ? this + m_sibling_idx : NULL;
+    return m_sibling_idx > 0 ? this + m_sibling_idx : nullptr;
   }
   // We know we are kept in a vector of contiguous entries, so we know
   // we don't need to store our child pointer, if we have a child it will
   // be the next entry in the list...
   DWARFDebugInfoEntry *GetFirstChild() {
-    return HasChildren() ? this + 1 : NULL;
+    return HasChildren() ? this + 1 : nullptr;
   }
   const DWARFDebugInfoEntry *GetFirstChild() const {
-    return HasChildren() ? this + 1 : NULL;
+    return HasChildren() ? this + 1 : nullptr;
   }
 
   std::vector<DWARFDIE> GetDeclContextDIEs(DWARFUnit *cu) const;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
index 0a7a44ac28296..953089fee22b5 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugLine.cpp
@@ -38,7 +38,7 @@ void DWARFDebugLine::Parse(const DWARFDataExtractor &debug_line_data) {
   while (debug_line_data.ValidOffset(offset)) {
     const lldb::offset_t debug_line_offset = offset;
 
-    if (line_table_sp.get() == NULL)
+    if (line_table_sp.get() == nullptr)
       break;
 
     if (ParseStatementTable(debug_line_data, &offset, line_table_sp.get(), nullptr)) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp
index 5e2646a0ced79..cb7f26264a498 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.cpp
@@ -105,7 +105,7 @@ void DWARFDebugRanges::Dump(Stream &s,
       dw_addr_t begin_addr = begin + base_addr;
       dw_addr_t end_addr = end + base_addr;
 
-      s.AddressRange(begin_addr, end_addr, sizeof(dw_addr_t), NULL);
+      s.AddressRange(begin_addr, end_addr, sizeof(dw_addr_t), nullptr);
     }
   }
 }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp
index 455cb01e71a4a..a664314035e4d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp
@@ -29,7 +29,7 @@ const char *DWARFDeclContext::GetQualifiedName() const {
         for (pos = begin; pos != end; ++pos) {
           if (pos != begin)
             m_qualified_name.append("::");
-          if (pos->name == NULL) {
+          if (pos->name == nullptr) {
             if (pos->tag == DW_TAG_namespace)
               m_qualified_name.append("(anonymous namespace)");
             else if (pos->tag == DW_TAG_class_type)
@@ -47,7 +47,7 @@ const char *DWARFDeclContext::GetQualifiedName() const {
     }
   }
   if (m_qualified_name.empty())
-    return NULL;
+    return nullptr;
   return m_qualified_name.c_str();
 }
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h
index 9f5e28eb747d6..d0d70dd5123e1 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.h
@@ -23,7 +23,7 @@
 class DWARFDeclContext {
 public:
   struct Entry {
-    Entry() : tag(0), name(NULL) {}
+    Entry() : tag(0), name(nullptr) {}
     Entry(dw_tag_t t, const char *n) : tag(t), name(n) {}
 
     bool NameMatches(const Entry &rhs) const {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
index 5e2caed4d4662..9b5faa6ed9bd6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
@@ -120,7 +120,7 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data,
 
   bool indirect = false;
   bool is_block = false;
-  m_value.data = NULL;
+  m_value.data = nullptr;
   uint8_t ref_addr_size;
   // Read the value for the form into value and follow and DW_FORM_indirect
   // instances we run into
@@ -223,7 +223,7 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data,
 
   if (is_block) {
     m_value.data = data.PeekData(*offset_ptr, m_value.value.uval);
-    if (m_value.data != NULL) {
+    if (m_value.data != nullptr) {
       *offset_ptr += m_value.value.uval;
     }
   }
@@ -687,7 +687,7 @@ int DWARFFormValue::Compare(const DWARFFormValue &a_value,
       return 0;
     else if (a_string && b_string)
       return strcmp(a_string, b_string);
-    else if (a_string == NULL)
+    else if (a_string == nullptr)
       return -1; // A string is NULL, and B is valid
     else
       return 1; // A string valid, and B is NULL
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
index 668bd5a568afe..2143921587f5c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
@@ -19,7 +19,7 @@ class DWARFDIE;
 class DWARFFormValue {
 public:
   typedef struct ValueTypeTag {
-    ValueTypeTag() : value(), data(NULL) { value.uval = 0; }
+    ValueTypeTag() : value(), data(nullptr) { value.uval = 0; }
 
     union {
       uint64_t uval;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 59202061436ff..65a1fbf74f822 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -502,7 +502,7 @@ void DWARFUnit::ParseProducerInfo() {
   if (die) {
 
     const char *producer_cstr =
-        die->GetAttributeValueAsString(this, DW_AT_producer, NULL);
+        die->GetAttributeValueAsString(this, DW_AT_producer, nullptr);
     if (producer_cstr) {
       RegularExpression llvm_gcc_regex(
           llvm::StringRef("^4\\.[012]\\.[01] \\(Based on Apple "
@@ -653,7 +653,7 @@ void DWARFUnit::ComputeCompDirAndGuessPathStyle() {
     return;
 
   llvm::StringRef comp_dir = removeHostnameFromPathname(
-      die->GetAttributeValueAsString(this, DW_AT_comp_dir, NULL));
+      die->GetAttributeValueAsString(this, DW_AT_comp_dir, nullptr));
   if (!comp_dir.empty()) {
     FileSpec::Style comp_dir_style =
         FileSpec::GuessPathStyle(comp_dir).getValueOr(FileSpec::Style::native);
@@ -661,7 +661,8 @@ void DWARFUnit::ComputeCompDirAndGuessPathStyle() {
   } else {
     // Try to detect the style based on the DW_AT_name attribute, but just store
     // the detected style in the m_comp_dir field.
-    const char *name = die->GetAttributeValueAsString(this, DW_AT_name, NULL);
+    const char *name =
+        die->GetAttributeValueAsString(this, DW_AT_name, nullptr);
     m_comp_dir = FileSpec(
         "", FileSpec::GuessPathStyle(name).getValueOr(FileSpec::Style::native));
   }
@@ -674,7 +675,7 @@ SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile() const {
 dw_offset_t DWARFUnit::GetBaseObjOffset() const { return m_base_obj_offset; }
 
 const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() {
-  if (m_func_aranges_up == NULL) {
+  if (m_func_aranges_up == nullptr) {
     m_func_aranges_up.reset(new DWARFDebugAranges());
     const DWARFDebugInfoEntry *die = DIEPtr();
     if (die)
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
index 333e203c92890..2cd7c92f1ca81 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp
@@ -264,7 +264,7 @@ bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
     return false;
 
   for (size_t i = 0; i < num_atoms; ++i) {
-    DWARFFormValue form_value(NULL, header_data.atoms[i].form);
+    DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
 
     if (!form_value.ExtractValue(data, offset_ptr))
       return false;
@@ -341,7 +341,7 @@ DWARFMappedHash::MemoryTable::GetHashDataForName(
   // There definitely should be a string for this string offset, if there
   // isn't, there is something wrong, return and error
   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
-  if (strp_cstr == NULL) {
+  if (strp_cstr == nullptr) {
     *hash_data_offset_ptr = UINT32_MAX;
     return eResultError;
   }
@@ -408,7 +408,7 @@ DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
   // There definitely should be a string for this string offset, if there
   // isn't, there is something wrong, return and error
   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
-  if (strp_cstr == NULL)
+  if (strp_cstr == nullptr)
     return eResultError;
 
   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index cefe81bb176bb..a6fae61a6a4fc 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -141,8 +141,8 @@ void ManualDWARFIndex::IndexUnitImpl(
     }
 
     DWARFAttributes attributes;
-    const char *name = NULL;
-    const char *mangled_cstr = NULL;
+    const char *name = nullptr;
+    const char *mangled_cstr = nullptr;
     bool is_declaration = false;
     // bool is_artificial = false;
     bool has_address = false;
@@ -194,7 +194,7 @@ void ManualDWARFIndex::IndexUnitImpl(
           has_location_or_const_value = true;
           if (tag == DW_TAG_variable) {
             const DWARFDebugInfoEntry *parent_die = die.GetParent();
-            while (parent_die != NULL) {
+            while (parent_die != nullptr) {
               switch (parent_die->Tag()) {
               case DW_TAG_subprogram:
               case DW_TAG_lexical_block:
@@ -219,13 +219,13 @@ void ManualDWARFIndex::IndexUnitImpl(
                 //     }
                 //   }
                 // }
-                parent_die = NULL; // Terminate the while loop.
+                parent_die = nullptr; // Terminate the while loop.
                 break;
 
               case DW_TAG_compile_unit:
               case DW_TAG_partial_unit:
                 is_global_or_static_variable = true;
-                parent_die = NULL; // Terminate the while loop.
+                parent_die = nullptr; // Terminate the while loop.
                 break;
 
               default:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index f639b031b1206..c692e8bf18d08 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -292,14 +292,14 @@ size_t SymbolFileDWARF::GetTypes(SymbolContextScope *sc_scope,
   ASSERT_MODULE_LOCK(this);
   TypeSet type_set;
 
-  CompileUnit *comp_unit = NULL;
-  DWARFUnit *dwarf_cu = NULL;
+  CompileUnit *comp_unit = nullptr;
+  DWARFUnit *dwarf_cu = nullptr;
   if (sc_scope)
     comp_unit = sc_scope->CalculateSymbolContextCompileUnit();
 
   if (comp_unit) {
     dwarf_cu = GetDWARFCompileUnit(comp_unit);
-    if (dwarf_cu == 0)
+    if (dwarf_cu == nullptr)
       return 0;
     GetTypes(dwarf_cu->DIE(), dwarf_cu->GetOffset(),
              dwarf_cu->GetNextUnitOffset(), type_mask, type_set);
@@ -355,7 +355,7 @@ SymbolFileDWARF::SymbolFileDWARF(ObjectFile *objfile,
       UserID(0x7fffffff00000000), // Used by SymbolFileDWARFDebugMap to
                                   // when this class parses .o files to
                                   // contain the .o file index/ID
-      m_debug_map_module_wp(), m_debug_map_symfile(NULL),
+      m_debug_map_module_wp(), m_debug_map_symfile(nullptr),
       m_context(objfile->GetModule()->GetSectionList(), dwo_section_list),
       m_data_debug_loc(), m_data_debug_ranges(), m_data_debug_rnglists(),
       m_abbr(), m_info(), m_line(), m_fetched_external_modules(false),
@@ -433,10 +433,10 @@ bool SymbolFileDWARF::SupportedVersion(uint16_t version) {
 
 uint32_t SymbolFileDWARF::CalculateAbilities() {
   uint32_t abilities = 0;
-  if (m_obj_file != NULL) {
-    const Section *section = NULL;
+  if (m_obj_file != nullptr) {
+    const Section *section = nullptr;
     const SectionList *section_list = m_obj_file->GetSectionList();
-    if (section_list == NULL)
+    if (section_list == nullptr)
       return 0;
 
     uint64_t debug_abbrev_file_size = 0;
@@ -450,7 +450,7 @@ uint32_t SymbolFileDWARF::CalculateAbilities() {
 
     section =
         section_list->FindSectionByType(eSectionTypeDWARFDebugInfo, true).get();
-    if (section != NULL) {
+    if (section != nullptr) {
       debug_info_file_size = section->GetFileSize();
 
       section =
@@ -587,7 +587,7 @@ const DWARFDebugAbbrev *SymbolFileDWARF::DebugAbbrev() const {
 }
 
 DWARFDebugInfo *SymbolFileDWARF::DebugInfo() {
-  if (m_info == NULL) {
+  if (m_info == nullptr) {
     static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
     Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION,
                        static_cast<void *>(this));
@@ -612,15 +612,15 @@ SymbolFileDWARF::GetDWARFCompileUnit(lldb_private::CompileUnit *comp_unit) {
   if (info) {
     // The compile unit ID is the index of the DWARF unit.
     DWARFUnit *dwarf_cu = info->GetUnitAtIndex(comp_unit->GetID());
-    if (dwarf_cu && dwarf_cu->GetUserData() == NULL)
+    if (dwarf_cu && dwarf_cu->GetUserData() == nullptr)
       dwarf_cu->SetUserData(comp_unit);
     return dwarf_cu;
   }
-  return NULL;
+  return nullptr;
 }
 
 DWARFDebugRangesBase *SymbolFileDWARF::DebugRanges() {
-  if (m_ranges == NULL) {
+  if (m_ranges == nullptr) {
     static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
     Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION,
                        static_cast<void *>(this));
@@ -905,7 +905,7 @@ static void ParseDWARFLineTableCallback(dw_offset_t offset,
 
 bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) {
   ASSERT_MODULE_LOCK(this);
-  if (comp_unit.GetLineTable() != NULL)
+  if (comp_unit.GetLineTable() != nullptr)
     return true;
 
   DWARFUnit *dwarf_cu = GetDWARFCompileUnit(&comp_unit);
@@ -1022,7 +1022,7 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(
     case DW_TAG_inlined_subroutine:
     case DW_TAG_subprogram:
     case DW_TAG_lexical_block: {
-      Block *block = NULL;
+      Block *block = nullptr;
       if (tag == DW_TAG_subprogram) {
         // Skip any DW_TAG_subprogram DIEs that are inside of a normal or
         // inlined functions. These will be parsed on their own as separate
@@ -1038,8 +1038,8 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(
         block = block_sp.get();
       }
       DWARFRangeList ranges;
-      const char *name = NULL;
-      const char *mangled_name = NULL;
+      const char *name = nullptr;
+      const char *mangled_name = nullptr;
 
       int decl_file = 0;
       int decl_line = 0;
@@ -1087,7 +1087,7 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(
         block->FinalizeRanges();
 
         if (tag != DW_TAG_subprogram &&
-            (name != NULL || mangled_name != NULL)) {
+            (name != nullptr || mangled_name != nullptr)) {
           std::unique_ptr<Declaration> decl_up;
           if (decl_file != 0 || decl_line != 0 || decl_column != 0)
             decl_up.reset(new Declaration(
@@ -1312,7 +1312,7 @@ Type *SymbolFileDWARF::ResolveTypeUID(const DWARFDIE &die,
     }
     return ResolveType(die);
   }
-  return NULL;
+  return nullptr;
 }
 
 // This function is used when SymbolFileDWARFDebugMap owns a bunch of
@@ -1409,7 +1409,7 @@ CompileUnit *
 SymbolFileDWARF::GetCompUnitForDWARFCompUnit(DWARFUnit *dwarf_cu,
                                              uint32_t cu_idx) {
   // Check if the symbol vendor already knows about this compile unit?
-  if (dwarf_cu->GetUserData() == NULL) {
+  if (dwarf_cu->GetUserData() == nullptr) {
     // The symbol vendor doesn't know about this compile unit, we need to parse
     // and add it to the symbol vendor object.
     return ParseCompileUnit(dwarf_cu, cu_idx).get();
@@ -1432,7 +1432,7 @@ bool SymbolFileDWARF::GetFunction(const DWARFDIE &die, SymbolContext &sc) {
     sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU(), UINT32_MAX);
 
     sc.function = sc.comp_unit->FindFunctionByUID(die.GetID()).get();
-    if (sc.function == NULL)
+    if (sc.function == nullptr)
       sc.function = ParseFunction(*sc.comp_unit, die);
 
     if (sc.function) {
@@ -1573,7 +1573,7 @@ void SymbolFileDWARF::UpdateExternalModuleListIfNeeded() {
             }
 
             Status error = ModuleList::GetSharedModule(
-                dwo_module_spec, module_sp, NULL, NULL, NULL);
+                dwo_module_spec, module_sp, nullptr, nullptr, nullptr);
             if (!module_sp) {
               GetObjectFile()->GetModule()->ReportWarning(
                   "0x%8.8x: unable to locate module needed for external types: "
@@ -1703,7 +1703,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr,
               if (function_die) {
                 sc.function =
                     sc.comp_unit->FindFunctionByUID(function_die.GetID()).get();
-                if (sc.function == NULL)
+                if (sc.function == nullptr)
                   sc.function = ParseFunction(*sc.comp_unit, function_die);
 
                 if (sc.function && (resolve_scope & eSymbolContextBlock))
@@ -1718,7 +1718,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr,
                 force_check_line_table = true;
               }
 
-              if (sc.function != NULL) {
+              if (sc.function != nullptr) {
                 resolved |= eSymbolContextFunction;
 
                 if (resolve_scope & eSymbolContextBlock) {
@@ -1737,7 +1737,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr,
             if ((resolve_scope & eSymbolContextLineEntry) ||
                 force_check_line_table) {
               LineTable *line_table = sc.comp_unit->GetLineTable();
-              if (line_table != NULL) {
+              if (line_table != nullptr) {
                 // And address that makes it into this function should be in
                 // terms of this debug file if there is no debug map, or it
                 // will be an address in the .o file which needs to be fixed up
@@ -1761,7 +1761,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr,
               // only happen when there aren't other functions from other
               // compile units in these gaps. This helps keep the size of the
               // aranges down.
-              sc.comp_unit = NULL;
+              sc.comp_unit = nullptr;
               resolved &= ~eSymbolContextCompUnit;
             }
           } else {
@@ -1787,14 +1787,15 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
     DWARFDebugInfo *debug_info = DebugInfo();
     if (debug_info) {
       uint32_t cu_idx;
-      DWARFUnit *dwarf_cu = NULL;
+      DWARFUnit *dwarf_cu = nullptr;
 
-      for (cu_idx = 0; (dwarf_cu = debug_info->GetUnitAtIndex(cu_idx)) != NULL;
+      for (cu_idx = 0;
+           (dwarf_cu = debug_info->GetUnitAtIndex(cu_idx)) != nullptr;
            ++cu_idx) {
         CompileUnit *dc_cu = GetCompUnitForDWARFCompUnit(dwarf_cu, cu_idx);
         const bool full_match = (bool)file_spec.GetDirectory();
         bool file_spec_matches_cu_file_spec =
-            dc_cu != NULL && FileSpec::Equal(file_spec, *dc_cu, full_match);
+            dc_cu != nullptr && FileSpec::Equal(file_spec, *dc_cu, full_match);
         if (check_inlines || file_spec_matches_cu_file_spec) {
           SymbolContext sc(m_obj_file->GetModule());
           sc.comp_unit = GetCompUnitForDWARFCompUnit(dwarf_cu, cu_idx);
@@ -1813,7 +1814,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
             if (line != 0) {
               LineTable *line_table = sc.comp_unit->GetLineTable();
 
-              if (line_table != NULL && line != 0) {
+              if (line_table != nullptr && line != 0) {
                 // We will have already looked up the file index if we are
                 // searching for inline entries.
                 if (!check_inlines)
@@ -1827,8 +1828,8 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
                   found_line = sc.line_entry.line;
 
                   while (line_idx != UINT32_MAX) {
-                    sc.function = NULL;
-                    sc.block = NULL;
+                    sc.function = nullptr;
+                    sc.block = nullptr;
                     if (resolve_scope &
                         (eSymbolContextFunction | eSymbolContextBlock)) {
                       const lldb::addr_t file_vm_addr =
@@ -1842,7 +1843,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
                               sc.comp_unit
                                   ->FindFunctionByUID(function_die.GetID())
                                   .get();
-                          if (sc.function == NULL)
+                          if (sc.function == nullptr)
                             sc.function =
                                 ParseFunction(*sc.comp_unit, function_die);
 
@@ -1852,7 +1853,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
                                 function_die.LookupDeepestBlock(file_vm_addr);
                         }
 
-                        if (sc.function != NULL) {
+                        if (sc.function != nullptr) {
                           Block &block = sc.function->GetBlock(true);
 
                           if (block_die)
@@ -1947,7 +1948,7 @@ uint32_t SymbolFileDWARF::FindGlobalVariables(
     return 0;
 
   DWARFDebugInfo *info = DebugInfo();
-  if (info == NULL)
+  if (info == nullptr)
     return 0;
 
   // Remember how many variables are in the list before we search.
@@ -2050,7 +2051,7 @@ uint32_t SymbolFileDWARF::FindGlobalVariables(const RegularExpression &regex,
   }
 
   DWARFDebugInfo *info = DebugInfo();
-  if (info == NULL)
+  if (info == nullptr)
     return 0;
 
   // Remember how many variables are in the list before we search.
@@ -2120,12 +2121,12 @@ bool SymbolFileDWARF::ResolveFunction(const DWARFDIE &orig_die,
     if (inlined_die) {
       Block &function_block = sc.function->GetBlock(true);
       sc.block = function_block.FindBlockByID(inlined_die.GetID());
-      if (sc.block == NULL)
+      if (sc.block == nullptr)
         sc.block = function_block.FindBlockByID(inlined_die.GetOffset());
-      if (sc.block == NULL || !sc.block->GetStartAddress(addr))
+      if (sc.block == nullptr || !sc.block->GetStartAddress(addr))
         addr.Clear();
     } else {
-      sc.block = NULL;
+      sc.block = nullptr;
       addr = sc.function->GetAddressRange().GetBaseAddress();
     }
 
@@ -2196,7 +2197,7 @@ uint32_t SymbolFileDWARF::FindFunctions(
   const uint32_t original_size = sc_list.GetSize();
 
   DWARFDebugInfo *info = DebugInfo();
-  if (info == NULL)
+  if (info == nullptr)
     return 0;
 
   llvm::DenseSet<const DWARFDebugInfoEntry *> resolved_dies;
@@ -2319,7 +2320,7 @@ uint32_t SymbolFileDWARF::FindTypes(
     searched_symbol_files.insert(this);
 
   DWARFDebugInfo *info = DebugInfo();
-  if (info == NULL)
+  if (info == nullptr)
     return 0;
 
   Log *log(LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS));
@@ -2513,7 +2514,7 @@ TypeSP SymbolFileDWARF::GetTypeForDIE(const DWARFDIE &die,
   TypeSP type_sp;
   if (die) {
     Type *type_ptr = GetDIEToType().lookup(die.GetDIE());
-    if (type_ptr == NULL) {
+    if (type_ptr == nullptr) {
       CompileUnit *lldb_cu = GetCompUnitForDWARFCompUnit(die.GetCU());
       assert(lldb_cu);
       SymbolContext sc(lldb_cu);
@@ -2528,7 +2529,7 @@ TypeSP SymbolFileDWARF::GetTypeForDIE(const DWARFDIE &die,
           !GetFunction(DWARFDIE(die.GetCU(), parent_die), sc))
         sc = sc_backup;
 
-      type_sp = ParseType(sc, die, NULL);
+      type_sp = ParseType(sc, die, nullptr);
     } else if (type_ptr != DIE_IS_BEING_PARSED) {
       // Grab the existing type from the master types lists
       type_sp = type_ptr->shared_from_this();
@@ -2591,7 +2592,7 @@ SymbolFileDWARF::GetDeclContextDIEContainingDIE(const DWARFDIE &orig_die) {
 
 Symbol *
 SymbolFileDWARF::GetObjCClassSymbol(ConstString objc_class_name) {
-  Symbol *objc_class_symbol = NULL;
+  Symbol *objc_class_symbol = nullptr;
   if (m_obj_file) {
     Symtab *symtab = m_obj_file->GetSymtab();
     if (symtab) {
@@ -3026,9 +3027,9 @@ size_t SymbolFileDWARF::ParseTypes(CompileUnit &comp_unit) {
 
 size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
   ASSERT_MODULE_LOCK(this);
-  if (sc.comp_unit != NULL) {
+  if (sc.comp_unit != nullptr) {
     DWARFDebugInfo *info = DebugInfo();
-    if (info == NULL)
+    if (info == nullptr)
       return 0;
 
     if (sc.function) {
@@ -3047,13 +3048,13 @@ size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
     } else if (sc.comp_unit) {
       DWARFUnit *dwarf_cu = info->GetUnitAtIndex(sc.comp_unit->GetID());
 
-      if (dwarf_cu == NULL)
+      if (dwarf_cu == nullptr)
         return 0;
 
       uint32_t vars_added = 0;
       VariableListSP variables(sc.comp_unit->GetVariableList(false));
 
-      if (variables.get() == NULL) {
+      if (variables.get() == nullptr) {
         variables = std::make_shared<VariableList>();
         sc.comp_unit->SetVariableList(variables);
 
@@ -3105,8 +3106,8 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
     const size_t num_attributes = die.GetAttributes(attributes);
     DWARFDIE spec_die;
     if (num_attributes > 0) {
-      const char *name = NULL;
-      const char *mangled = NULL;
+      const char *name = nullptr;
+      const char *mangled = nullptr;
       Declaration decl;
       uint32_t i;
       DWARFFormValue type_die_form;
@@ -3288,7 +3289,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
       ValueType scope = eValueTypeInvalid;
 
       const DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die);
-      SymbolContextScope *symbol_context_scope = NULL;
+      SymbolContextScope *symbol_context_scope = nullptr;
 
       bool has_explicit_mangled = mangled != nullptr;
       if (!mangled) {
@@ -3334,7 +3335,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
           if (op_error) {
             StreamString strm;
             location.DumpLocationForAddress(&strm, eDescriptionLevelFull, 0, 0,
-                                            NULL);
+                                            nullptr);
             GetObjectFile()->GetModule()->ReportError(
                 "0x%8.8x: %s has an invalid location: %s", die.GetOffset(),
                 die.GetTagAsCString(), strm.GetData());
@@ -3437,7 +3438,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
         }
       }
 
-      if (symbol_context_scope == NULL) {
+      if (symbol_context_scope == nullptr) {
         switch (parent_tag) {
         case DW_TAG_subprogram:
         case DW_TAG_inlined_subroutine:
@@ -3445,7 +3446,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
           if (sc.function) {
             symbol_context_scope = sc.function->GetBlock(true).FindBlockByID(
                 sc_parent_die.GetID());
-            if (symbol_context_scope == NULL)
+            if (symbol_context_scope == nullptr)
               symbol_context_scope = sc.function;
           }
           break;
@@ -3557,15 +3558,15 @@ size_t SymbolFileDWARF::ParseVariables(const SymbolContext &sc,
       // We haven't already parsed it, lets do that now.
       if ((tag == DW_TAG_variable) || (tag == DW_TAG_constant) ||
           (tag == DW_TAG_formal_parameter && sc.function)) {
-        if (variable_list_sp.get() == NULL) {
+        if (variable_list_sp.get() == nullptr) {
           DWARFDIE sc_parent_die = GetParentSymbolContextDIE(orig_die);
           dw_tag_t parent_tag = sc_parent_die.Tag();
           switch (parent_tag) {
           case DW_TAG_compile_unit:
           case DW_TAG_partial_unit:
-            if (sc.comp_unit != NULL) {
+            if (sc.comp_unit != nullptr) {
               variable_list_sp = sc.comp_unit->GetVariableList(false);
-              if (variable_list_sp.get() == NULL) {
+              if (variable_list_sp.get() == nullptr) {
                 variable_list_sp = std::make_shared<VariableList>();
               }
             } else {
@@ -3581,13 +3582,13 @@ size_t SymbolFileDWARF::ParseVariables(const SymbolContext &sc,
           case DW_TAG_subprogram:
           case DW_TAG_inlined_subroutine:
           case DW_TAG_lexical_block:
-            if (sc.function != NULL) {
+            if (sc.function != nullptr) {
               // Check to see if we already have parsed the variables for the
               // given scope
 
               Block *block = sc.function->GetBlock(true).FindBlockByID(
                   sc_parent_die.GetID());
-              if (block == NULL) {
+              if (block == nullptr) {
                 // This must be a specification or abstract origin with a
                 // concrete block counterpart in the current function. We need
                 // to find the concrete block so we can correctly add the
@@ -3601,10 +3602,10 @@ size_t SymbolFileDWARF::ParseVariables(const SymbolContext &sc,
                       concrete_block_die.GetID());
               }
 
-              if (block != NULL) {
+              if (block != nullptr) {
                 const bool can_create = false;
                 variable_list_sp = block->GetBlockVariableList(can_create);
-                if (variable_list_sp.get() == NULL) {
+                if (variable_list_sp.get() == nullptr) {
                   variable_list_sp = std::make_shared<VariableList>();
                   block->SetVariableList(variable_list_sp);
                 }
@@ -3633,7 +3634,7 @@ size_t SymbolFileDWARF::ParseVariables(const SymbolContext &sc,
       }
     }
 
-    bool skip_children = (sc.function == NULL && tag == DW_TAG_subprogram);
+    bool skip_children = (sc.function == nullptr && tag == DW_TAG_subprogram);
 
     if (!skip_children && parse_children && die.HasChildren()) {
       vars_added += ParseVariables(sc, die.GetFirstChild(), func_low_pc, true,
@@ -3718,7 +3719,7 @@ void SymbolFileDWARF::DumpClangAST(Stream &s) {
 }
 
 SymbolFileDWARFDebugMap *SymbolFileDWARF::GetDebugMapSymfile() {
-  if (m_debug_map_symfile == NULL && !m_debug_map_module_wp.expired()) {
+  if (m_debug_map_symfile == nullptr && !m_debug_map_module_wp.expired()) {
     lldb::ModuleSP module_sp(m_debug_map_module_wp.lock());
     if (module_sp) {
       SymbolVendor *sym_vendor = module_sp->GetSymbolVendor();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index 426719bf0b293..d3272454d0a5e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -364,7 +364,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
                         const DWARFDIE &orig_die,
                         const lldb::addr_t func_low_pc, bool parse_siblings,
                         bool parse_children,
-                        lldb_private::VariableList *cc_variable_list = NULL);
+                        lldb_private::VariableList *cc_variable_list = nullptr);
 
   bool ClassOrStructIsVirtual(const DWARFDIE &die);
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
index e5bc63b3cb85b..8ec64dbaf7641 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp
@@ -175,7 +175,7 @@ class DebugMapModule : public Module {
 
   SymbolVendor *
   GetSymbolVendor(bool can_create = true,
-                  lldb_private::Stream *feedback_strm = NULL) override {
+                  lldb_private::Stream *feedback_strm = nullptr) override {
     // Scope for locker
     if (m_symfile_up.get() || !can_create)
       return m_symfile_up.get();
@@ -197,7 +197,7 @@ class DebugMapModule : public Module {
                   symbol_vendor->GetSymbolFile());
 
           if (!oso_symfile)
-            return NULL;
+            return nullptr;
 
           ObjectFile *exe_objfile = exe_module_sp->GetObjectFile();
           SymbolVendor *exe_sym_vendor = exe_module_sp->GetSymbolVendor();
@@ -213,7 +213,7 @@ class DebugMapModule : public Module {
         }
       }
     }
-    return NULL;
+    return nullptr;
   }
 
 protected:
@@ -368,12 +368,12 @@ void SymbolFileDWARFDebugMap::InitOSO() {
                           oso_symbol->GetName().GetCString());
           }
         } else {
-          if (oso_symbol == NULL)
+          if (oso_symbol == nullptr)
             m_obj_file->GetModule()->ReportError(
                 "N_OSO symbol[%u] can't be found, please file a bug and attach "
                 "the binary listed in this error",
                 oso_idx);
-          else if (so_symbol == NULL)
+          else if (so_symbol == nullptr)
             m_obj_file->GetModule()->ReportError(
                 "N_SO not found for N_OSO symbol[%u], please file a bug and "
                 "attach the binary listed in this error",
@@ -398,7 +398,7 @@ Module *SymbolFileDWARFDebugMap::GetModuleByOSOIndex(uint32_t oso_idx) {
   const uint32_t cu_count = GetNumCompileUnits();
   if (oso_idx < cu_count)
     return GetModuleByCompUnitInfo(&m_compile_unit_infos[oso_idx]);
-  return NULL;
+  return nullptr;
 }
 
 Module *SymbolFileDWARFDebugMap::GetModuleByCompUnitInfo(
@@ -428,7 +428,7 @@ Module *SymbolFileDWARFDebugMap::GetModuleByCompUnitInfo(
               ") since this executable was linked, file will be ignored",
               oso_file.GetPath().c_str(), llvm::to_string(oso_mod_time).c_str(),
               llvm::to_string(comp_unit_info->oso_mod_time).c_str());
-          return NULL;
+          return nullptr;
         }
 
       } else {
@@ -436,7 +436,7 @@ Module *SymbolFileDWARFDebugMap::GetModuleByCompUnitInfo(
 
         if (!ObjectFile::SplitArchivePathWithObject(oso_path, oso_file,
                                                     oso_object, must_exist)) {
-          return NULL;
+          return nullptr;
         }
       }
       // Always create a new module for .o files. Why? Because we use the debug
@@ -462,7 +462,7 @@ Module *SymbolFileDWARFDebugMap::GetModuleByCompUnitInfo(
   }
   if (comp_unit_info->oso_sp)
     return comp_unit_info->oso_sp->module_sp.get();
-  return NULL;
+  return nullptr;
 }
 
 bool SymbolFileDWARFDebugMap::GetFileSpecForSO(uint32_t oso_idx,
@@ -480,7 +480,7 @@ ObjectFile *SymbolFileDWARFDebugMap::GetObjectFileByOSOIndex(uint32_t oso_idx) {
   Module *oso_module = GetModuleByOSOIndex(oso_idx);
   if (oso_module)
     return oso_module->GetObjectFile();
-  return NULL;
+  return nullptr;
 }
 
 SymbolFileDWARF *
@@ -493,7 +493,7 @@ SymbolFileDWARFDebugMap::GetSymbolFile(const CompileUnit &comp_unit) {
   CompileUnitInfo *comp_unit_info = GetCompUnitInfo(comp_unit);
   if (comp_unit_info)
     return GetSymbolFileByCompUnitInfo(comp_unit_info);
-  return NULL;
+  return nullptr;
 }
 
 ObjectFile *SymbolFileDWARFDebugMap::GetObjectFileByCompUnitInfo(
@@ -501,7 +501,7 @@ ObjectFile *SymbolFileDWARFDebugMap::GetObjectFileByCompUnitInfo(
   Module *oso_module = GetModuleByCompUnitInfo(comp_unit_info);
   if (oso_module)
     return oso_module->GetObjectFile();
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SymbolFileDWARFDebugMap::GetCompUnitInfoIndex(
@@ -521,7 +521,7 @@ SymbolFileDWARFDebugMap::GetSymbolFileByOSOIndex(uint32_t oso_idx) {
   unsigned size = m_compile_unit_infos.size();
   if (oso_idx < size)
     return GetSymbolFileByCompUnitInfo(&m_compile_unit_infos[oso_idx]);
-  return NULL;
+  return nullptr;
 }
 
 SymbolFileDWARF *
@@ -529,7 +529,7 @@ SymbolFileDWARFDebugMap::GetSymbolFileAsSymbolFileDWARF(SymbolFile *sym_file) {
   if (sym_file &&
       sym_file->GetPluginName() == SymbolFileDWARF::GetPluginNameStatic())
     return (SymbolFileDWARF *)sym_file;
-  return NULL;
+  return nullptr;
 }
 
 SymbolFileDWARF *SymbolFileDWARFDebugMap::GetSymbolFileByCompUnitInfo(
@@ -540,7 +540,7 @@ SymbolFileDWARF *SymbolFileDWARFDebugMap::GetSymbolFileByCompUnitInfo(
     if (sym_vendor)
       return GetSymbolFileAsSymbolFileDWARF(sym_vendor->GetSymbolFile());
   }
-  return NULL;
+  return nullptr;
 }
 
 uint32_t SymbolFileDWARFDebugMap::CalculateAbilities() {
@@ -609,7 +609,7 @@ SymbolFileDWARFDebugMap::GetCompUnitInfo(const CompileUnit &comp_unit) {
     if (comp_unit == m_compile_unit_infos[i].compile_unit_sp.get())
       return &m_compile_unit_infos[i];
   }
-  return NULL;
+  return nullptr;
 }
 
 size_t SymbolFileDWARFDebugMap::GetCompUnitInfosForModule(
@@ -706,7 +706,7 @@ Type *SymbolFileDWARFDebugMap::ResolveTypeUID(lldb::user_id_t type_uid) {
   SymbolFileDWARF *oso_dwarf = GetSymbolFileByOSOIndex(oso_idx);
   if (oso_dwarf)
     return oso_dwarf->ResolveTypeUID(type_uid);
-  return NULL;
+  return nullptr;
 }
 
 llvm::Optional<SymbolFile::ArrayInfo>
@@ -750,7 +750,7 @@ SymbolFileDWARFDebugMap::ResolveSymbolContext(const Address &exe_so_addr,
       sc.symbol =
           symtab->SymbolAtIndex(debug_map_entry->data.GetExeSymbolIndex());
 
-      if (sc.symbol != NULL) {
+      if (sc.symbol != nullptr) {
         resolved_flags |= eSymbolContextSymbol;
 
         uint32_t oso_idx = 0;
@@ -930,7 +930,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo *
 SymbolFileDWARFDebugMap::GetCompileUnitInfoForSymbolWithIndex(
     uint32_t symbol_idx, uint32_t *oso_idx_ptr) {
   const uint32_t oso_index_count = m_compile_unit_infos.size();
-  CompileUnitInfo *comp_unit_info = NULL;
+  CompileUnitInfo *comp_unit_info = nullptr;
   if (oso_index_count) {
     comp_unit_info = (CompileUnitInfo *)bsearch(
         &symbol_idx, &m_compile_unit_infos[0], m_compile_unit_infos.size(),
@@ -939,7 +939,7 @@ SymbolFileDWARFDebugMap::GetCompileUnitInfoForSymbolWithIndex(
   }
 
   if (oso_idx_ptr) {
-    if (comp_unit_info != NULL)
+    if (comp_unit_info != nullptr)
       *oso_idx_ptr = comp_unit_info - &m_compile_unit_infos[0];
     else
       *oso_idx_ptr = UINT32_MAX;
@@ -951,7 +951,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo *
 SymbolFileDWARFDebugMap::GetCompileUnitInfoForSymbolWithID(
     user_id_t symbol_id, uint32_t *oso_idx_ptr) {
   const uint32_t oso_index_count = m_compile_unit_infos.size();
-  CompileUnitInfo *comp_unit_info = NULL;
+  CompileUnitInfo *comp_unit_info = nullptr;
   if (oso_index_count) {
     comp_unit_info = (CompileUnitInfo *)::bsearch(
         &symbol_id, &m_compile_unit_infos[0], m_compile_unit_infos.size(),
@@ -960,7 +960,7 @@ SymbolFileDWARFDebugMap::GetCompileUnitInfoForSymbolWithID(
   }
 
   if (oso_idx_ptr) {
-    if (comp_unit_info != NULL)
+    if (comp_unit_info != nullptr)
       *oso_idx_ptr = comp_unit_info - &m_compile_unit_infos[0];
     else
       *oso_idx_ptr = UINT32_MAX;
@@ -1058,7 +1058,7 @@ size_t SymbolFileDWARFDebugMap::GetTypes(SymbolContextScope *sc_scope,
                      type_mask);
 
   uint32_t initial_size = type_list.GetSize();
-  SymbolFileDWARF *oso_dwarf = NULL;
+  SymbolFileDWARF *oso_dwarf = nullptr;
   if (sc_scope) {
     SymbolContext sc;
     sc_scope->CalculateSymbolContext(&sc);
@@ -1103,7 +1103,7 @@ bool SymbolFileDWARFDebugMap::Supports_DW_AT_APPLE_objc_complete_type(
     m_supports_DW_AT_APPLE_objc_complete_type = eLazyBoolNo;
     ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool {
       if (skip_dwarf_oso != oso_dwarf &&
-          oso_dwarf->Supports_DW_AT_APPLE_objc_complete_type(NULL)) {
+          oso_dwarf->Supports_DW_AT_APPLE_objc_complete_type(nullptr)) {
         m_supports_DW_AT_APPLE_objc_complete_type = eLazyBoolYes;
         return true;
       }
@@ -1122,7 +1122,7 @@ TypeSP SymbolFileDWARFDebugMap::FindCompleteObjCDefinitionTypeForDIE(
   // contain the implementation definition since it will be scoped inside the
   // N_SO and we can then locate the SymbolFileDWARF that corresponds to that
   // N_SO.
-  SymbolFileDWARF *oso_dwarf = NULL;
+  SymbolFileDWARF *oso_dwarf = nullptr;
   TypeSP type_sp;
   ObjectFile *module_objfile = m_obj_file->GetModule()->GetObjectFile();
   if (module_objfile) {
@@ -1143,7 +1143,7 @@ TypeSP SymbolFileDWARFDebugMap::FindCompleteObjCDefinitionTypeForDIE(
           if (source_file_symbol_idx != UINT32_MAX) {
             CompileUnitInfo *compile_unit_info =
                 GetCompileUnitInfoForSymbolWithIndex(source_file_symbol_idx,
-                                                     NULL);
+                                                     nullptr);
             if (compile_unit_info) {
               oso_dwarf = GetSymbolFileByCompUnitInfo(compile_unit_info);
               if (oso_dwarf) {
@@ -1268,7 +1268,7 @@ SymbolFileDWARFDebugMap::GetCompileUnitInfo(SymbolFileDWARF *oso_dwarf) {
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 void SymbolFileDWARFDebugMap::SetCompileUnit(SymbolFileDWARF *oso_dwarf,
@@ -1414,7 +1414,7 @@ LineTable *SymbolFileDWARFDebugMap::LinkOSOLineTable(SymbolFileDWARF *oso_dwarf,
   CompileUnitInfo *cu_info = GetCompileUnitInfo(oso_dwarf);
   if (cu_info)
     return line_table->LinkLineTable(cu_info->GetFileRangeMap(this));
-  return NULL;
+  return nullptr;
 }
 
 size_t
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
index 0f5262d2f4542..8da7e22262664 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp
@@ -39,12 +39,12 @@ bool UniqueDWARFASTTypeList::Find(const DWARFDIE &die,
               case DW_TAG_namespace: {
                 const char *parent_arg_die_name = parent_arg_die.GetName();
                 if (parent_arg_die_name ==
-                    NULL) // Anonymous (i.e. no-name) struct
+                    nullptr) // Anonymous (i.e. no-name) struct
                 {
                   match = false;
                 } else {
                   const char *parent_pos_die_name = parent_pos_die.GetName();
-                  if (parent_pos_die_name == NULL ||
+                  if (parent_pos_die_name == nullptr ||
                       ((parent_arg_die_name != parent_pos_die_name) &&
                        strcmp(parent_arg_die_name, parent_pos_die_name)))
                     match = false;
diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
index 7fa3a1c5d6509..a1b21e51b0fe5 100644
--- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
+++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
@@ -139,8 +139,8 @@ size_t SymbolFileSymtab::ParseFunctions(CompileUnit &comp_unit) {
   size_t num_added = 0;
   // We must at least have a valid compile unit
   const Symtab *symtab = m_obj_file->GetSymtab();
-  const Symbol *curr_symbol = NULL;
-  const Symbol *next_symbol = NULL;
+  const Symbol *curr_symbol = nullptr;
+  const Symbol *next_symbol = nullptr;
   //  const char *prefix = m_obj_file->SymbolPrefix();
   //  if (prefix == NULL)
   //      prefix == "";
@@ -188,10 +188,10 @@ size_t SymbolFileSymtab::ParseFunctions(CompileUnit &comp_unit) {
                              LLDB_INVALID_UID, // We don't have any type info
                                                // for this function
                              curr_symbol->GetMangled(), // Linker/mangled name
-                             NULL, // no return type for a code symbol...
+                             nullptr, // no return type for a code symbol...
                              func_range)); // first address range
 
-            if (func_sp.get() != NULL) {
+            if (func_sp.get() != nullptr) {
               comp_unit.AddFunction(func_sp);
               ++num_added;
             }
@@ -230,7 +230,7 @@ size_t SymbolFileSymtab::ParseVariablesForContext(const SymbolContext &sc) {
 }
 
 Type *SymbolFileSymtab::ResolveTypeUID(lldb::user_id_t type_uid) {
-  return NULL;
+  return nullptr;
 }
 
 llvm::Optional<SymbolFile::ArrayInfo>
@@ -246,7 +246,7 @@ bool SymbolFileSymtab::CompleteType(lldb_private::CompilerType &compiler_type) {
 uint32_t SymbolFileSymtab::ResolveSymbolContext(const Address &so_addr,
                                                 SymbolContextItem resolve_scope,
                                                 SymbolContext &sc) {
-  if (m_obj_file->GetSymtab() == NULL)
+  if (m_obj_file->GetSymtab() == nullptr)
     return 0;
 
   uint32_t resolved_flags = 0;
diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
index 1efbeab934c44..f279af61a1315 100644
--- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
+++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp
@@ -59,20 +59,20 @@ SymbolVendor *
 SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp,
                                 lldb_private::Stream *feedback_strm) {
   if (!module_sp)
-    return NULL;
+    return nullptr;
 
   ObjectFile *obj_file = module_sp->GetObjectFile();
   if (!obj_file)
-    return NULL;
+    return nullptr;
 
   static ConstString obj_file_elf("elf");
   ConstString obj_name = obj_file->GetPluginName();
   if (obj_name != obj_file_elf)
-    return NULL;
+    return nullptr;
 
   lldb_private::UUID uuid = obj_file->GetUUID();
   if (!uuid)
-    return NULL;
+    return nullptr;
 
   // Get the .gnu_debuglink file (if specified).
   FileSpecList file_spec_list = obj_file->GetDebugSymbolFilePaths();
@@ -84,7 +84,7 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp,
 
   // If we have no debug symbol files, then nothing to do.
   if (file_spec_list.IsEmpty())
-    return NULL;
+    return nullptr;
 
   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
   Timer scoped_timer(func_cat, "SymbolVendorELF::CreateInstance (module = %s)",
@@ -153,7 +153,7 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp,
       }
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 // PluginInterface protocol
diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetItemInfoHandler.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetItemInfoHandler.cpp
index 00b2826e1f021..c97eb98557a79 100644
--- a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetItemInfoHandler.cpp
+++ b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetItemInfoHandler.cpp
@@ -143,7 +143,7 @@ lldb::addr_t AppleGetItemInfoHandler::SetupGetItemInfoFunction(
     // function:
 
     if (!m_get_item_info_impl_code) {
-      if (g_get_item_info_function_code != NULL) {
+      if (g_get_item_info_function_code != nullptr) {
         Status error;
         m_get_item_info_impl_code.reset(
             exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetPendingItemsHandler.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetPendingItemsHandler.cpp
index ddd3cb5d7cbae..12fad7f561b03 100644
--- a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetPendingItemsHandler.cpp
+++ b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetPendingItemsHandler.cpp
@@ -147,7 +147,7 @@ lldb::addr_t AppleGetPendingItemsHandler::SetupGetPendingItemsFunction(
     // First stage is to make the ClangUtility to hold our injected function:
 
     if (!m_get_pending_items_impl_code) {
-      if (g_get_pending_items_function_code != NULL) {
+      if (g_get_pending_items_function_code != nullptr) {
         Status error;
         m_get_pending_items_impl_code.reset(
             exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
@@ -347,7 +347,7 @@ AppleGetPendingItemsHandler::GetPendingItems(Thread &thread, addr_t queue,
   options.SetIsForUtilityExpr(true);
   thread.CalculateExecutionContext(exe_ctx);
 
-  if (get_pending_items_caller == NULL) {
+  if (get_pending_items_caller == nullptr) {
     error.SetErrorString("Unable to compile function to call "
                          "__introspection_dispatch_queue_get_pending_items");
     return return_value;
diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetQueuesHandler.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetQueuesHandler.cpp
index 2b6c553dd435e..e1dabae0d587a 100644
--- a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetQueuesHandler.cpp
+++ b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetQueuesHandler.cpp
@@ -158,7 +158,7 @@ AppleGetQueuesHandler::SetupGetQueuesFunction(Thread &thread,
     // First stage is to make the ClangUtility to hold our injected function:
 
     if (!m_get_queues_impl_code_up) {
-      if (g_get_current_queues_function_code != NULL) {
+      if (g_get_current_queues_function_code != nullptr) {
         Status error;
         m_get_queues_impl_code_up.reset(
             exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
@@ -335,7 +335,7 @@ AppleGetQueuesHandler::GetCurrentQueues(Thread &thread, addr_t page_to_free,
   FunctionCaller *get_queues_caller =
       m_get_queues_impl_code_up->GetFunctionCaller();
 
-  if (get_queues_caller == NULL) {
+  if (get_queues_caller == nullptr) {
     error.SetErrorString(
         "Unable to get caller for call __introspection_dispatch_get_queues");
     return return_value;
diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetThreadItemInfoHandler.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetThreadItemInfoHandler.cpp
index f409b674c531e..7d0cbc0c100b4 100644
--- a/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetThreadItemInfoHandler.cpp
+++ b/lldb/source/Plugins/SystemRuntime/MacOSX/AppleGetThreadItemInfoHandler.cpp
@@ -154,7 +154,7 @@ lldb::addr_t AppleGetThreadItemInfoHandler::SetupGetThreadItemInfoFunction(
 
     if (!m_get_thread_item_info_impl_code) {
       Status error;
-      if (g_get_thread_item_info_function_code != NULL) {
+      if (g_get_thread_item_info_function_code != nullptr) {
         m_get_thread_item_info_impl_code.reset(
             exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
                 g_get_thread_item_info_function_code, eLanguageTypeC,
diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp
index 82be85699b2c3..91a60b17fc9b0 100644
--- a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp
+++ b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp
@@ -70,7 +70,7 @@ SystemRuntime *SystemRuntimeMacOSX::CreateInstance(Process *process) {
 
   if (create)
     return new SystemRuntimeMacOSX(process);
-  return NULL;
+  return nullptr;
 }
 
 // Constructor
@@ -106,7 +106,7 @@ void SystemRuntimeMacOSX::Clear(bool clear_process) {
     m_process->ClearBreakpointSiteByID(m_break_id);
 
   if (clear_process)
-    m_process = NULL;
+    m_process = nullptr;
   m_break_id = LLDB_INVALID_BREAK_ID;
 }
 
@@ -266,7 +266,7 @@ void SystemRuntimeMacOSX::ReadLibdispatchOffsetsAddress() {
 
   static ConstString g_dispatch_queue_offsets_symbol_name(
       "dispatch_queue_offsets");
-  const Symbol *dispatch_queue_offsets_symbol = NULL;
+  const Symbol *dispatch_queue_offsets_symbol = nullptr;
 
   // libdispatch symbols were in libSystem.B.dylib up through Mac OS X 10.6
   // ("Snow Leopard")
@@ -279,7 +279,7 @@ void SystemRuntimeMacOSX::ReadLibdispatchOffsetsAddress() {
 
   // libdispatch symbols are in their own dylib as of Mac OS X 10.7 ("Lion")
   // and later
-  if (dispatch_queue_offsets_symbol == NULL) {
+  if (dispatch_queue_offsets_symbol == nullptr) {
     ModuleSpec libdispatch_module_spec(FileSpec("libdispatch.dylib"));
     module_sp = m_process->GetTarget().GetImages().FindFirstModule(
         libdispatch_module_spec);
@@ -322,7 +322,7 @@ void SystemRuntimeMacOSX::ReadLibpthreadOffsetsAddress() {
 
   static ConstString g_libpthread_layout_offsets_symbol_name(
       "pthread_layout_offsets");
-  const Symbol *libpthread_layout_offsets_symbol = NULL;
+  const Symbol *libpthread_layout_offsets_symbol = nullptr;
 
   ModuleSpec libpthread_module_spec(FileSpec("libsystem_pthread.dylib"));
   ModuleSP module_sp(m_process->GetTarget().GetImages().FindFirstModule(
@@ -370,7 +370,7 @@ void SystemRuntimeMacOSX::ReadLibdispatchTSDIndexesAddress() {
 
   static ConstString g_libdispatch_tsd_indexes_symbol_name(
       "dispatch_tsd_indexes");
-  const Symbol *libdispatch_tsd_indexes_symbol = NULL;
+  const Symbol *libdispatch_tsd_indexes_symbol = nullptr;
 
   ModuleSpec libpthread_module_spec(FileSpec("libdispatch.dylib"));
   ModuleSP module_sp(m_process->GetTarget().GetImages().FindFirstModule(
@@ -726,7 +726,8 @@ void SystemRuntimeMacOSX::PopulateQueueList(
   for (ThreadSP thread_sp : m_process->Threads()) {
     if (thread_sp->GetAssociatedWithLibdispatchQueue() != eLazyBoolNo) {
       if (thread_sp->GetQueueID() != LLDB_INVALID_QUEUE_ID) {
-        if (queue_list.FindQueueByID(thread_sp->GetQueueID()).get() == NULL) {
+        if (queue_list.FindQueueByID(thread_sp->GetQueueID()).get() ==
+            nullptr) {
           QueueSP queue_sp(new Queue(m_process->shared_from_this(),
                                      thread_sp->GetQueueID(),
                                      thread_sp->GetQueueName()));
@@ -933,7 +934,7 @@ void SystemRuntimeMacOSX::PopulateQueuesUsingLibBTR(
       offset = start_of_this_item +
                m_lib_backtrace_recording_info.queue_info_data_offset;
       const char *queue_label = extractor.GetCStr(&offset);
-      if (queue_label == NULL)
+      if (queue_label == nullptr)
         queue_label = "";
 
       offset_t start_of_next_item = start_of_this_item + offset_to_next;
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index 70c1318218304..4aa9fb634b61a 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -67,8 +67,8 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
 
     const bool prefer_file_cache = true;
     DisassemblerSP disasm_sp(Disassembler::DisassembleBytes(
-        m_arch, NULL, NULL, range.GetBaseAddress(), opcode_data, opcode_size,
-        99999, prefer_file_cache));
+        m_arch, nullptr, nullptr, range.GetBaseAddress(), opcode_data,
+        opcode_size, 99999, prefer_file_cache));
 
     Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_UNWIND));
 
@@ -213,7 +213,7 @@ bool UnwindAssemblyInstEmulation::GetNonCallSiteUnwindPlanFromAssembly(
               lldb_private::FormatEntity::Entry format;
               FormatEntity::Parse("${frame.pc}: ", format);
               inst->Dump(&strm, inst_list.GetMaxOpcocdeByteSize(), show_address,
-                         show_bytes, NULL, NULL, NULL, &format, 0);
+                         show_bytes, nullptr, nullptr, nullptr, &format, 0);
               log->PutString(strm.GetString());
             }
 
@@ -296,11 +296,11 @@ UnwindAssembly *
 UnwindAssemblyInstEmulation::CreateInstance(const ArchSpec &arch) {
   std::unique_ptr<EmulateInstruction> inst_emulator_up(
       EmulateInstruction::FindPlugin(arch, eInstructionTypePrologueEpilogue,
-                                     NULL));
+                                     nullptr));
   // Make sure that all prologue instructions are handled
   if (inst_emulator_up)
     return new UnwindAssemblyInstEmulation(arch, inst_emulator_up.release());
-  return NULL;
+  return nullptr;
 }
 
 // PluginInterface protocol in UnwindAssemblyParser_x86
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
index 5232d08332192..9125bd5b1fe3c 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h
@@ -65,7 +65,7 @@ class UnwindAssemblyInstEmulation : public lldb_private::UnwindAssembly {
   UnwindAssemblyInstEmulation(const lldb_private::ArchSpec &arch,
                               lldb_private::EmulateInstruction *inst_emulator)
       : UnwindAssembly(arch), m_inst_emulator_up(inst_emulator),
-        m_range_ptr(NULL), m_unwind_plan_ptr(NULL), m_curr_row(),
+        m_range_ptr(nullptr), m_unwind_plan_ptr(nullptr), m_curr_row(),
         m_cfa_reg_info(), m_fp_is_cfa(false), m_register_values(),
         m_pushed_regs(), m_curr_row_modified(false),
         m_forward_branch_offset(0) {
diff --git a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
index 2157855009d28..ce168f0210471 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp
@@ -239,7 +239,7 @@ UnwindAssembly *UnwindAssembly_x86::CreateInstance(const ArchSpec &arch) {
   const llvm::Triple::ArchType cpu = arch.GetMachine();
   if (cpu == llvm::Triple::x86 || cpu == llvm::Triple::x86_64)
     return new UnwindAssembly_x86(arch);
-  return NULL;
+  return nullptr;
 }
 
 // PluginInterface protocol in UnwindAssemblyParser_x86
diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp
index 72a33739245cf..62321d135646e 100644
--- a/lldb/source/Symbol/ClangASTContext.cpp
+++ b/lldb/source/Symbol/ClangASTContext.cpp
@@ -6894,7 +6894,7 @@ CompilerType ClangASTContext::GetChildCompilerTypeAtIndex(
       } else {
         child_is_deref_of_parent = true;
         const char *parent_name =
-            valobj ? valobj->GetName().GetCString() : NULL;
+            valobj ? valobj->GetName().GetCString() : nullptr;
         if (parent_name) {
           child_name.assign(1, '*');
           child_name += parent_name;
@@ -6975,7 +6975,7 @@ CompilerType ClangASTContext::GetChildCompilerTypeAtIndex(
       child_is_deref_of_parent = true;
 
       const char *parent_name =
-          valobj ? valobj->GetName().GetCString() : NULL;
+          valobj ? valobj->GetName().GetCString() : nullptr;
       if (parent_name) {
         child_name.assign(1, '*');
         child_name += parent_name;
@@ -7012,7 +7012,7 @@ CompilerType ClangASTContext::GetChildCompilerTypeAtIndex(
             language_flags);
       } else {
         const char *parent_name =
-            valobj ? valobj->GetName().GetCString() : NULL;
+            valobj ? valobj->GetName().GetCString() : nullptr;
         if (parent_name) {
           child_name.assign(1, '&');
           child_name += parent_name;
@@ -7842,7 +7842,7 @@ clang::EnumDecl *ClangASTContext::GetAsEnumDecl(const CompilerType &type) {
       llvm::dyn_cast<clang::EnumType>(ClangUtil::GetCanonicalQualType(type));
   if (enutype)
     return enutype->getDecl();
-  return NULL;
+  return nullptr;
 }
 
 clang::RecordDecl *ClangASTContext::GetAsRecordDecl(const CompilerType &type) {
@@ -8261,7 +8261,7 @@ clang::CXXMethodDecl *ClangASTContext::AddMethodToCXXRecordType(
   if (is_attr_used)
     cxx_method_decl->addAttr(clang::UsedAttr::CreateImplicit(*getASTContext()));
 
-  if (mangled_name != NULL) {
+  if (mangled_name != nullptr) {
     cxx_method_decl->addAttr(
         clang::AsmLabelAttr::CreateImplicit(*getASTContext(), mangled_name));
   }
@@ -9868,7 +9868,7 @@ clang::ClassTemplateDecl *ClangASTContext::ParseClassTemplateDecl(
                                    template_basename.c_str(), tag_decl_kind,
                                    template_param_infos);
   }
-  return NULL;
+  return nullptr;
 }
 
 void ClangASTContext::CompleteTagDecl(void *baton, clang::TagDecl *decl) {
diff --git a/lldb/source/Symbol/ClangASTImporter.cpp b/lldb/source/Symbol/ClangASTImporter.cpp
index fa37ba316cf8b..3a9a8f3c4cd83 100644
--- a/lldb/source/Symbol/ClangASTImporter.cpp
+++ b/lldb/source/Symbol/ClangASTImporter.cpp
@@ -346,7 +346,7 @@ bool ClangASTImporter::CanImport(const CompilerType &type) {
     const clang::CXXRecordDecl *cxx_record_decl =
         qual_type->getAsCXXRecordDecl();
     if (cxx_record_decl) {
-      if (ResolveDeclOrigin(cxx_record_decl, NULL, NULL))
+      if (ResolveDeclOrigin(cxx_record_decl, nullptr, nullptr))
         return true;
     }
   } break;
@@ -355,7 +355,7 @@ bool ClangASTImporter::CanImport(const CompilerType &type) {
     clang::EnumDecl *enum_decl =
         llvm::cast<clang::EnumType>(qual_type)->getDecl();
     if (enum_decl) {
-      if (ResolveDeclOrigin(enum_decl, NULL, NULL))
+      if (ResolveDeclOrigin(enum_decl, nullptr, nullptr))
         return true;
     }
   } break;
@@ -370,7 +370,7 @@ bool ClangASTImporter::CanImport(const CompilerType &type) {
       // We currently can't complete objective C types through the newly added
       // ASTContext because it only supports TagDecl objects right now...
       if (class_interface_decl) {
-        if (ResolveDeclOrigin(class_interface_decl, NULL, NULL))
+        if (ResolveDeclOrigin(class_interface_decl, nullptr, nullptr))
           return true;
       }
     }
@@ -422,7 +422,7 @@ bool ClangASTImporter::Import(const CompilerType &type) {
     const clang::CXXRecordDecl *cxx_record_decl =
         qual_type->getAsCXXRecordDecl();
     if (cxx_record_decl) {
-      if (ResolveDeclOrigin(cxx_record_decl, NULL, NULL))
+      if (ResolveDeclOrigin(cxx_record_decl, nullptr, nullptr))
         return CompleteAndFetchChildren(qual_type);
     }
   } break;
@@ -431,7 +431,7 @@ bool ClangASTImporter::Import(const CompilerType &type) {
     clang::EnumDecl *enum_decl =
         llvm::cast<clang::EnumType>(qual_type)->getDecl();
     if (enum_decl) {
-      if (ResolveDeclOrigin(enum_decl, NULL, NULL))
+      if (ResolveDeclOrigin(enum_decl, nullptr, nullptr))
         return CompleteAndFetchChildren(qual_type);
     }
   } break;
@@ -446,7 +446,7 @@ bool ClangASTImporter::Import(const CompilerType &type) {
       // We currently can't complete objective C types through the newly added
       // ASTContext because it only supports TagDecl objects right now...
       if (class_interface_decl) {
-        if (ResolveDeclOrigin(class_interface_decl, NULL, NULL))
+        if (ResolveDeclOrigin(class_interface_decl, nullptr, nullptr))
           return CompleteAndFetchChildren(qual_type);
       }
     }
diff --git a/lldb/source/Symbol/CompactUnwindInfo.cpp b/lldb/source/Symbol/CompactUnwindInfo.cpp
index d2e176b141f22..3a2a4d3a09e62 100644
--- a/lldb/source/Symbol/CompactUnwindInfo.cpp
+++ b/lldb/source/Symbol/CompactUnwindInfo.cpp
@@ -186,7 +186,7 @@ bool CompactUnwindInfo::GetUnwindPlan(Target &target, Address addr,
       if (log && log->GetVerbose()) {
         StreamString strm;
         addr.Dump(
-            &strm, NULL,
+            &strm, nullptr,
             Address::DumpStyle::DumpStyleResolvedDescriptionNoFunctionArguments,
             Address::DumpStyle::DumpStyleFileAddress,
             arch.GetAddressByteSize());
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index a727568d75b13..98061d2d33439 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -995,7 +995,7 @@ bool CompilerType::ReadFromMemory(lldb_private::ExecutionContext *exe_ctx,
     return false;
 
   auto byte_size =
-      GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : NULL);
+      GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr);
   if (!byte_size)
     return false;
 
@@ -1040,7 +1040,7 @@ bool CompilerType::WriteToMemory(lldb_private::ExecutionContext *exe_ctx,
     return false;
 
   auto byte_size =
-      GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : NULL);
+      GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr);
   if (!byte_size)
     return false;
 
diff --git a/lldb/source/Symbol/LocateSymbolFile.cpp b/lldb/source/Symbol/LocateSymbolFile.cpp
index 033594bae350b..bfdb6e705f4a0 100644
--- a/lldb/source/Symbol/LocateSymbolFile.cpp
+++ b/lldb/source/Symbol/LocateSymbolFile.cpp
@@ -55,9 +55,10 @@ static bool FileAtPathContainsArchAndUUID(const FileSpec &file_fspec,
       bool got_spec = module_specs.GetModuleSpecAtIndex(i, spec);
       UNUSED_IF_ASSERT_DISABLED(got_spec);
       assert(got_spec);
-      if ((uuid == NULL || (spec.GetUUIDPtr() && spec.GetUUID() == *uuid)) &&
-          (arch == NULL || (spec.GetArchitecturePtr() &&
-                            spec.GetArchitecture().IsCompatibleMatch(*arch)))) {
+      if ((uuid == nullptr || (spec.GetUUIDPtr() && spec.GetUUID() == *uuid)) &&
+          (arch == nullptr ||
+           (spec.GetArchitecturePtr() &&
+            spec.GetArchitecture().IsCompatibleMatch(*arch)))) {
         return true;
       }
     }
diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp
index 187a7b17215ab..1b7b3ce54865b 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -501,7 +501,7 @@ bool SymbolContext::GetParentOfInlinedScope(const Address &curr_frame_pc,
         }
 #ifdef LLDB_CONFIGURATION_DEBUG
         else {
-          ObjectFile *objfile = NULL;
+          ObjectFile *objfile = nullptr;
           if (module_sp) {
             SymbolVendor *symbol_vendor = module_sp->GetSymbolVendor();
             if (symbol_vendor) {
diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index 4670d7738c3af..f5861c291f2f6 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -1149,5 +1149,5 @@ const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
         return symbol;
     }
   }
-  return NULL;
+  return nullptr;
 }
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index 35c1a265f829e..4ee8330ce288f 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -978,7 +978,7 @@ TypeSystem *TypeImpl::GetTypeSystem(bool prefer_dynamic) {
     }
     return m_static_type.GetTypeSystem();
   }
-  return NULL;
+  return nullptr;
 }
 
 bool TypeImpl::GetDescription(lldb_private::Stream &strm,
diff --git a/lldb/source/Target/CPPLanguageRuntime.cpp b/lldb/source/Target/CPPLanguageRuntime.cpp
index ef1202170052c..70e5aee3e45b1 100644
--- a/lldb/source/Target/CPPLanguageRuntime.cpp
+++ b/lldb/source/Target/CPPLanguageRuntime.cpp
@@ -214,7 +214,7 @@ CPPLanguageRuntime::FindLibCppStdFunctionCallableInfo(
       return llvm::Regex::escape(first_template_parameter.str()) +
              R"(::operator\(\)\(.*\))";
 
-    if (symbol != NULL &&
+    if (symbol != nullptr &&
         symbol->GetName().GetStringRef().contains("__invoke")) {
 
       llvm::StringRef symbol_name = symbol->GetName().GetStringRef();
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 2be3774d076a8..b018a3115a026 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1425,7 +1425,7 @@ Status Process::ResumeSynchronous(Stream *stream) {
   Status error = PrivateResume();
   if (error.Success()) {
     StateType state =
-        WaitForProcessToStop(llvm::None, NULL, true, listener_sp, stream);
+        WaitForProcessToStop(llvm::None, nullptr, true, listener_sp, stream);
     const bool must_be_alive =
         false; // eStateExited is ok, so this must be false
     if (!StateIsStoppedState(state, must_be_alive))
@@ -3638,7 +3638,7 @@ void Process::ControlPrivateStateThread(uint32_t signal) {
     }
 
     if (signal == eBroadcastInternalStateControlStop) {
-      thread_result_t result = NULL;
+      thread_result_t result = nullptr;
       m_private_state_thread.Join(&result);
       m_private_state_thread.Reset();
     }
@@ -3913,7 +3913,7 @@ thread_result_t Process::RunPrivateStateThread(bool is_secondary_thread) {
   // it was doing yet, so don't try to change it on the way out.
   if (!is_secondary_thread)
     m_public_run_lock.SetStopped();
-  return NULL;
+  return nullptr;
 }
 
 // Process Event Data
diff --git a/lldb/source/Target/SectionLoadHistory.cpp b/lldb/source/Target/SectionLoadHistory.cpp
index 391a6c22f7f58..ec16b58b4451c 100644
--- a/lldb/source/Target/SectionLoadHistory.cpp
+++ b/lldb/source/Target/SectionLoadHistory.cpp
@@ -97,7 +97,7 @@ SectionLoadList &SectionLoadHistory::GetCurrentSectionLoadList() {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
   SectionLoadList *section_load_list =
       GetSectionLoadListForStopID(eStopIDNow, read_only);
-  assert(section_load_list != NULL);
+  assert(section_load_list != nullptr);
   return *section_load_list;
 }
 
diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp
index 8f0ab34dbd1ec..6db0c2b037e9b 100644
--- a/lldb/source/Target/StopInfo.cpp
+++ b/lldb/source/Target/StopInfo.cpp
@@ -1196,7 +1196,7 @@ StopInfo::GetCrashingDereference(StopInfoSP &stop_info_sp,
 
   address_loc += (sizeof(address_string) - 1);
 
-  uint64_t address = strtoull(address_loc, 0, 0);
+  uint64_t address = strtoull(address_loc, nullptr, 0);
   if (crashing_address) {
     *crashing_address = address;
   }
diff --git a/lldb/source/Target/ThreadPlanShouldStopHere.cpp b/lldb/source/Target/ThreadPlanShouldStopHere.cpp
index 111c5b4f6114b..a0b7072a10719 100644
--- a/lldb/source/Target/ThreadPlanShouldStopHere.cpp
+++ b/lldb/source/Target/ThreadPlanShouldStopHere.cpp
@@ -130,7 +130,7 @@ ThreadPlanSP ThreadPlanShouldStopHere::DefaultStepFromHereCallback(
                     "Queueing StepInRange plan to step through line 0 code.");
 
       return_plan_sp = current_plan->GetThread().QueueThreadPlanForStepInRange(
-          false, range, sc, NULL, eOnlyDuringStepping, status,
+          false, range, sc, nullptr, eOnlyDuringStepping, status,
           eLazyBoolCalculate, eLazyBoolNo);
     }
   }
diff --git a/lldb/tools/lldb-mi/MICmdCmdExec.cpp b/lldb/tools/lldb-mi/MICmdCmdExec.cpp
index fd13a919242bd..ffdf171aef08a 100644
--- a/lldb/tools/lldb-mi/MICmdCmdExec.cpp
+++ b/lldb/tools/lldb-mi/MICmdCmdExec.cpp
@@ -983,13 +983,13 @@ bool CMICmdCmdExecArguments::Execute() {
   }
 
   lldb::SBLaunchInfo sbLaunchInfo = sbTarget.GetLaunchInfo();
-  sbLaunchInfo.SetArguments(NULL, false);
+  sbLaunchInfo.SetArguments(nullptr, false);
 
   CMIUtilString strArg;
   size_t nArgIndex = 0;
   while (pArgArguments->GetExpectedOption<CMICmdArgValString, CMIUtilString>(
       strArg, nArgIndex)) {
-    const char *argv[2] = {strArg.c_str(), NULL};
+    const char *argv[2] = {strArg.c_str(), nullptr};
     sbLaunchInfo.SetArguments(argv, true);
     ++nArgIndex;
   }
diff --git a/lldb/tools/lldb-mi/MICmdCmdTarget.cpp b/lldb/tools/lldb-mi/MICmdCmdTarget.cpp
index b3e0beac8d47b..18ce038b16858 100644
--- a/lldb/tools/lldb-mi/MICmdCmdTarget.cpp
+++ b/lldb/tools/lldb-mi/MICmdCmdTarget.cpp
@@ -265,7 +265,7 @@ bool CMICmdCmdTargetAttach::Execute() {
   // If the current target is invalid, create one
   lldb::SBTarget target = rSessionInfo.GetTarget();
   if (!target.IsValid()) {
-    target = rSessionInfo.GetDebugger().CreateTarget(NULL);
+    target = rSessionInfo.GetDebugger().CreateTarget(nullptr);
     if (!target.IsValid()) {
       SetError(CMIUtilString::Format(MIRSRC(IDS_CMD_ERR_INVALID_TARGET_CURRENT),
                                      m_cmdData.strMiCmd.c_str()));
diff --git a/lldb/tools/lldb-mi/MICmnBase.cpp b/lldb/tools/lldb-mi/MICmnBase.cpp
index 72096d56febec..9d87064aa64cf 100644
--- a/lldb/tools/lldb-mi/MICmnBase.cpp
+++ b/lldb/tools/lldb-mi/MICmnBase.cpp
@@ -32,7 +32,7 @@ CMICmnBase::CMICmnBase()
 // Return:  None.
 // Throws:  None.
 //--
-CMICmnBase::~CMICmnBase() { m_pLog = NULL; }
+CMICmnBase::~CMICmnBase() { m_pLog = nullptr; }
 
 //++
 // Details: Retrieve whether *this object has an error description set.
diff --git a/lldb/tools/lldb-mi/MICmnMIOutOfBandRecord.cpp b/lldb/tools/lldb-mi/MICmnMIOutOfBandRecord.cpp
index 6cef283512809..59856a6f165e7 100644
--- a/lldb/tools/lldb-mi/MICmnMIOutOfBandRecord.cpp
+++ b/lldb/tools/lldb-mi/MICmnMIOutOfBandRecord.cpp
@@ -53,7 +53,7 @@ MapOutOfBandToText(CMICmnMIOutOfBandRecord::OutOfBand_e veType) {
     return "";
   }
   assert(false && "unknown CMICmnMIOutofBandRecord::OutOfBand_e");
-  return NULL;
+  return nullptr;
 }
 
 static const char *
@@ -95,7 +95,7 @@ MapOutOfBandToToken(CMICmnMIOutOfBandRecord::OutOfBand_e veType) {
     return "&";
   }
   assert(false && "unknown CMICmnMIOutofBandRecord::OutOfBand_e");
-  return NULL;
+  return nullptr;
 }
 
 //++
diff --git a/lldb/tools/lldb-mi/MICmnMIResultRecord.cpp b/lldb/tools/lldb-mi/MICmnMIResultRecord.cpp
index 00d1ce13d3457..93bb5a60ec3dc 100644
--- a/lldb/tools/lldb-mi/MICmnMIResultRecord.cpp
+++ b/lldb/tools/lldb-mi/MICmnMIResultRecord.cpp
@@ -34,7 +34,7 @@ MapResultClassToResultClassText(CMICmnMIResultRecord::ResultClass_e veType) {
     return "exit";
   }
   assert(false && "unknown CMICmnMIResultRecord::ResultClass_e");
-  return NULL;
+  return nullptr;
 }
 
 //++
diff --git a/lldb/tools/lldb-mi/MIDriver.cpp b/lldb/tools/lldb-mi/MIDriver.cpp
index 02361f4ccb97d..3bf888e303d41 100644
--- a/lldb/tools/lldb-mi/MIDriver.cpp
+++ b/lldb/tools/lldb-mi/MIDriver.cpp
@@ -51,7 +51,7 @@ const CMIUtilString CMIDriver::ms_constAppNameLong(MIRSRC(IDS_MI_APPNAME_LONG));
 //--
 CMIDriver::CMIDriver()
     : m_bFallThruToOtherDriverEnabled(false), m_bDriverIsExiting(false),
-      m_handleMainThread(0), m_rStdin(CMICmnStreamStdin::Instance()),
+      m_handleMainThread(nullptr), m_rStdin(CMICmnStreamStdin::Instance()),
       m_rLldbDebugger(CMICmnLLDBDebugger::Instance()),
       m_rStdOut(CMICmnStreamStdout::Instance()),
       m_eCurrentDriverState(eDriverState_NotRunning),
@@ -737,7 +737,7 @@ FILE *CMIDriver::GetStdout() const {
   // available before *this driver has been initialized! Flaw?
 
   // Do not want to pass through driver to write to stdout
-  return NULL;
+  return nullptr;
 }
 
 //++
diff --git a/lldb/tools/lldb-mi/MIDriverBase.cpp b/lldb/tools/lldb-mi/MIDriverBase.cpp
index 9958770ff42d0..b8844e288430f 100644
--- a/lldb/tools/lldb-mi/MIDriverBase.cpp
+++ b/lldb/tools/lldb-mi/MIDriverBase.cpp
@@ -30,7 +30,7 @@ CMIDriverBase::CMIDriverBase()
 // Return:  None.
 // Throws:  None.
 //--
-CMIDriverBase::~CMIDriverBase() { m_pDriverFallThru = NULL; }
+CMIDriverBase::~CMIDriverBase() { m_pDriverFallThru = nullptr; }
 
 //++
 // Details: This function allows *this driver to call on another driver to
diff --git a/lldb/tools/lldb-mi/MIDriverMgr.cpp b/lldb/tools/lldb-mi/MIDriverMgr.cpp
index 26c4add8e3f9d..26195cdb74141 100644
--- a/lldb/tools/lldb-mi/MIDriverMgr.cpp
+++ b/lldb/tools/lldb-mi/MIDriverMgr.cpp
@@ -135,7 +135,7 @@ bool CMIDriverMgr::UnregisterDriverAll() {
   }
 
   m_mapDriverIdToDriver.clear();
-  m_pDriverCurrent = NULL;
+  m_pDriverCurrent = nullptr;
 
   return MIstatus::success;
 }
diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp
index a0bc5f2c68ea9..b479c2197bffa 100644
--- a/lldb/tools/lldb-server/lldb-gdbserver.cpp
+++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp
@@ -87,22 +87,22 @@ static int g_verbose = 0;
 static struct option g_long_options[] = {
     {"debug", no_argument, &g_debug, 1},
     {"verbose", no_argument, &g_verbose, 1},
-    {"log-file", required_argument, NULL, 'l'},
-    {"log-channels", required_argument, NULL, 'c'},
-    {"attach", required_argument, NULL, 'a'},
-    {"named-pipe", required_argument, NULL, 'N'},
-    {"pipe", required_argument, NULL, 'U'},
-    {"native-regs", no_argument, NULL,
+    {"log-file", required_argument, nullptr, 'l'},
+    {"log-channels", required_argument, nullptr, 'c'},
+    {"attach", required_argument, nullptr, 'a'},
+    {"named-pipe", required_argument, nullptr, 'N'},
+    {"pipe", required_argument, nullptr, 'U'},
+    {"native-regs", no_argument, nullptr,
      'r'}, // Specify to use the native registers instead of the gdb defaults
            // for the architecture.  NOTE: this is a do-nothing arg as it's
            // behavior is default now.  FIXME remove call from lldb-platform.
-    {"reverse-connect", no_argument, NULL,
+    {"reverse-connect", no_argument, nullptr,
      'R'}, // Specifies that llgs attaches to the client address:port rather
            // than llgs listening for a connection from address on port.
-    {"setsid", no_argument, NULL,
+    {"setsid", no_argument, nullptr,
      'S'}, // Call setsid() to make llgs run in its own session.
-    {"fd", required_argument, NULL, 'F'},
-    {NULL, 0, NULL, 0}};
+    {"fd", required_argument, nullptr, 'F'},
+    {nullptr, 0, nullptr, 0}};
 
 // Watch for signals
 static int g_sighup_received_count = 0;
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index 595a0e82da205..af78f624073e2 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -48,16 +48,16 @@ static int g_server = 0;
 static struct option g_long_options[] = {
     {"debug", no_argument, &g_debug, 1},
     {"verbose", no_argument, &g_verbose, 1},
-    {"log-file", required_argument, NULL, 'l'},
-    {"log-channels", required_argument, NULL, 'c'},
-    {"listen", required_argument, NULL, 'L'},
-    {"port-offset", required_argument, NULL, 'p'},
-    {"gdbserver-port", required_argument, NULL, 'P'},
-    {"min-gdbserver-port", required_argument, NULL, 'm'},
-    {"max-gdbserver-port", required_argument, NULL, 'M'},
-    {"socket-file", required_argument, NULL, 'f'},
+    {"log-file", required_argument, nullptr, 'l'},
+    {"log-channels", required_argument, nullptr, 'c'},
+    {"listen", required_argument, nullptr, 'L'},
+    {"port-offset", required_argument, nullptr, 'p'},
+    {"gdbserver-port", required_argument, nullptr, 'P'},
+    {"min-gdbserver-port", required_argument, nullptr, 'm'},
+    {"max-gdbserver-port", required_argument, nullptr, 'M'},
+    {"socket-file", required_argument, nullptr, 'f'},
     {"server", no_argument, &g_server, 1},
-    {NULL, 0, NULL, 0}};
+    {nullptr, 0, nullptr, 0}};
 
 #if defined(__APPLE__)
 #define LOW_PORT (IPPORT_RESERVED)
diff --git a/lldb/unittests/Utility/StringExtractorTest.cpp b/lldb/unittests/Utility/StringExtractorTest.cpp
index 6a67a8c3a1505..b86e4660257ae 100644
--- a/lldb/unittests/Utility/StringExtractorTest.cpp
+++ b/lldb/unittests/Utility/StringExtractorTest.cpp
@@ -318,7 +318,7 @@ TEST_F(StringExtractorTest, GetHexBytes_Underflow) {
   ASSERT_EQ(UINT64_MAX, ex.GetFilePos());
   ASSERT_EQ(false, ex.Empty());
   ASSERT_EQ(0u, ex.GetBytesLeft());
-  ASSERT_EQ(0, ex.Peek());
+  ASSERT_EQ(nullptr, ex.Peek());
 }
 
 TEST_F(StringExtractorTest, GetHexBytes_Partial) {
diff --git a/lldb/unittests/tools/lldb-server/inferior/thread_inferior.cpp b/lldb/unittests/tools/lldb-server/inferior/thread_inferior.cpp
index 79d5f91f340be..278b975dd91e5 100644
--- a/lldb/unittests/tools/lldb-server/inferior/thread_inferior.cpp
+++ b/lldb/unittests/tools/lldb-server/inferior/thread_inferior.cpp
@@ -28,7 +28,7 @@ int main(int argc, char* argv[]) {
   }
 
   // Cause a break.
-  volatile char *p = NULL;
+  volatile char *p = nullptr;
   *p = 'a';
 
   delay.store(false);
diff --git a/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp b/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp
index 3cc0830dd3daf..de4c46b32b432 100644
--- a/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp
+++ b/lldb/unittests/tools/lldb-server/tests/MessageObjects.cpp
@@ -154,10 +154,10 @@ Expected<RegisterInfo> RegisterInfoParser::create(StringRef Response) {
           LLDB_INVALID_REGNUM, // process plugin reg num
           LLDB_INVALID_REGNUM  // native register number
       },
-      NULL,
-      NULL,
-      NULL, // Dwarf expression opcode bytes pointer
-      0     // Dwarf expression opcode bytes length
+      nullptr,
+      nullptr,
+      nullptr, // Dwarf expression opcode bytes pointer
+      0        // Dwarf expression opcode bytes length
   };
   Info.name = ConstString(Elements["name"]).GetCString();
   if (!Info.name)
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index e7cd370df6ff2..a05cf8130d3cf 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -63,7 +63,7 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
   template <typename AltPairT>
   DenseMapPair(AltPairT &&AltPair,
                typename std::enable_if<std::is_convertible<
-                   AltPairT, std::pair<KeyT, ValueT>>::value>::type * = 0)
+                   AltPairT, std::pair<KeyT, ValueT>>::value>::type * = nullptr)
       : std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
 
   KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 6638cb8e721f9..dde9ae7bca636 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -513,7 +513,7 @@ struct NodeArrayNode : public Node {
 
   void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
 
-  Node **Nodes = 0;
+  Node **Nodes = nullptr;
   size_t Count = 0;
 };
 

From 46165b24093af151bfa72ae6415a599635ad383f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 23 May 2019 11:27:28 +0000
Subject: [PATCH 0021/1176] [AMDGPU] Regenerate vector sub tests

llvm-svn: 361485
---
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll | 656 +++++++++++++++++++++-----
 1 file changed, 525 insertions(+), 131 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 442193f72295d..c7a08a6651156 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -1,16 +1,56 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX89,GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,GFX89,GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,GFX89,GFX9
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,GFX89,VI
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16:
-; GFX89: {{flat|global}}_load_dword
-; GFX89: {{flat|global}}_load_dword
-
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-
-; VI-DAG: v_sub_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; VI-DAG: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
+; GFX9-LABEL: v_test_sub_v2i16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    global_load_dword v1, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s7
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v1, v[2:3]
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u16_e32 v2, v0, v1
+; VI-NEXT:    v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v0, v2, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -22,15 +62,46 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
   ret void
 }
 
-; GCN-LABEL: {{^}}s_test_sub_v2i16:
-; GFX9: s_load_dword [[VAL0:s[0-9]+]]
-; GFX9: s_load_dword [[VAL1:s[0-9]+]]
-; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]]
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]]
-
-; VI: s_sub_i32
-; VI: s_sub_i32
 define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0, <2 x i16> addrspace(4)* %in1) #1 {
+; GFX9-LABEL: s_test_sub_v2i16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dword s6, s[6:7], 0x0
+; GFX9-NEXT:    s_load_dword s7, s[0:1], 0x0
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-NEXT:    v_pk_sub_i16 v0, s6, v0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: s_test_sub_v2i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_load_dword s6, s[6:7], 0x0
+; VI-NEXT:    s_load_dword s7, s[0:1], 0x0
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_lshr_b32 s4, s6, 16
+; VI-NEXT:    s_lshr_b32 s5, s7, 16
+; VI-NEXT:    s_sub_i32 s4, s4, s5
+; VI-NEXT:    s_sub_i32 s6, s6, s7
+; VI-NEXT:    s_and_b32 s5, s6, 0xffff
+; VI-NEXT:    s_lshl_b32 s4, s4, 16
+; VI-NEXT:    s_or_b32 s4, s5, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
   %b = load <2 x i16>, <2 x i16> addrspace(4)* %in1
   %add = sub <2 x i16> %a, %b
@@ -38,10 +109,16 @@ define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
   ret void
 }
 
-; GCN-LABEL: {{^}}s_test_sub_self_v2i16:
-; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]]
-; GCN: buffer_store_dword [[ZERO]]
 define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in0) #1 {
+; GCN-LABEL: s_test_sub_self_v2i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
   %a = load <2 x i16>, <2 x i16> addrspace(4)* %in0
   %add = sub <2 x i16> %a, %a
   store <2 x i16> %add, <2 x i16> addrspace(1)* %out
@@ -49,33 +126,83 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <
 }
 
 ; FIXME: VI should not scalarize arg access.
-; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
-
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-
-; VI: s_sub_i32
-; VI: s_sub_i32
-; VI: s_lshl_b32
-; VI: s_and_b32
 define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
+; GFX9-LABEL: s_test_sub_v2i16_kernarg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x30
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_pk_sub_i16 v0, s2, v0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: s_test_sub_v2i16_kernarg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x30
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_lshr_b32 s1, s2, 16
+; VI-NEXT:    s_lshr_b32 s3, s0, 16
+; VI-NEXT:    s_sub_i32 s1, s1, s3
+; VI-NEXT:    s_sub_i32 s0, s2, s0
+; VI-NEXT:    s_lshl_b32 s1, s1, 16
+; VI-NEXT:    s_and_b32 s0, s0, 0xffff
+; VI-NEXT:    s_or_b32 s0, s0, s1
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %add = sub <2 x i16> %a, %b
   store <2 x i16> %add, <2 x i16> addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_sub_v2i16_constant:
-; GFX89-DAG: {{flat|global}}_load_dword
-
-; GFX9-DAG: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}}
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
-
-; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}}
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI: v_or_b32
 define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_constant:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s8, 0x1c8007b
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, s8
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_constant:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xfffffe38
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u16_e32 v1, 0xffffff85, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -86,14 +213,46 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16_neg_constant:
-; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}}
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]]
-
-; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}}
 define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_neg_constant:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s8, 0xfc21fcb3
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, s8
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_neg_constant:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0x3df
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u16_e32 v1, 0x34d, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -103,15 +262,45 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)*
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_neg1:
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, -1 op_sel_hi:[1,0]{{$}}
-
-; VI-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
-; VI-DAG: flat_load_dword [[LOAD:v[0-9]+]]
-; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD]]
-; VI: v_or_b32_e32
 define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_inline_neg1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_inline_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 1
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u16_e32 v1, 1, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -121,14 +310,44 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)*
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_lo_zero_hi:
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, 32{{$}}
-
-; VI: flat_load_dword [[LOAD:v[0-9]+]]
-; VI-DAG: v_and_b32_e32 [[AND:v[0-9]+]], 0xffff0000, [[LOAD]]
-; VI-DAG: v_add_u16_e32 [[ADD:v[0-9]+]], 0xffffffe0, [[LOAD]]
-; VI: v_or_b32_e32 v{{[0-9]+}}, [[ADD]], [[AND]]
 define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_inline_lo_zero_hi:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, 32
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_inline_lo_zero_hi:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
+; VI-NEXT:    v_add_u16_e32 v0, 0xffffffe0, v0
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -139,17 +358,45 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
 }
 
 ; The high element gives fp
-; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_fp_split:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0
-; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
-
-; VI-NOT: v_subrev_i16
-; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080
-; VI: flat_load_dword
-; VI: v_add_u16_sdwa [[ADD:v[0-9]+]], v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NOT: v_subrev_i16
-; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_inline_fp_split:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    s_mov_b32 s8, 1.0
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, s8
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_inline_fp_split:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v2, 0xffffc080
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u16_sdwa v1, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -160,22 +407,55 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i32:
-; GFX9: global_load_dword [[A:v[0-9]+]]
-; GFX9: global_load_dword [[B:v[0-9]+]]
-
-; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
-; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
-; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
-
-; VI: flat_load_dword v[[A:[0-9]+]]
-; VI: flat_load_dword v[[B:[0-9]+]]
-
-; VI: v_sub_u16_e32 v[[ADD_LO:[0-9]+]], v[[A]], v[[B]]
-; VI-NEXT: v_sub_u16_sdwa v[[ADD_HI:[0-9]+]], v[[A]], v[[B]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[ADD_LO]]:[[ADD_HI]]{{\]}}
 define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    global_load_dword v1, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_zext_to_v2i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s7
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v1, v[0:1]
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u16_e32 v0, v1, v2
+; VI-NEXT:    v_sub_u16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -189,21 +469,59 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i64:
-; GFX9: global_load_dword [[A:v[0-9]+]]
-; GFX9: global_load_dword [[B:v[0-9]+]]
-
-; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
-; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
-; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9: buffer_store_dwordx4
-
-; VI: flat_load_dword [[A:v[0-9]+]]
-; VI: flat_load_dword [[B:v[0-9]+]]
-; VI: v_sub_u16_e32 v[[ADD_LO:[0-9]+]], [[A]], [[B]]
-; VI: v_sub_u16_sdwa v[[ADD_HI:[0-9]+]], [[A]], [[B]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: buffer_store_dwordx4
 define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    global_load_dword v1, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v1, v0, v1
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_zext_to_v2i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v3, s7
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s6, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    flat_load_dword v4, v[4:5]
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    v_mov_b32_e32 v3, v1
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u16_e32 v0, v2, v4
+; VI-NEXT:    v_sub_u16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -217,22 +535,57 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16_sext_to_v2i32:
-; GFX9: global_load_dword [[A:v[0-9]+]]
-; GFX9: global_load_dword [[B:v[0-9]+]]
-
-; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
-; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16
-; GFX9-DAG: v_ashrrev_i32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9: buffer_store_dwordx2 v{{\[}}[[ELT0]]:[[ELT1]]{{\]}}
-
-; VI: flat_load_dword
-; VI: flat_load_dword
-; VI-DAG: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-DAG: v_sub_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-
-; VI: buffer_store_dwordx2
 define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    global_load_dword v1, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_sext_to_v2i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s7
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v1, v[2:3]
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_sub_u16_e32 v0, v0, v1
+; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT:    v_bfe_i32 v1, v2, 0, 16
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid
@@ -246,21 +599,62 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; GCN-LABEL: {{^}}v_test_sub_v2i16_sext_to_v2i64:
-; GCN: {{flat|global}}_load_dword
-; GCN: {{flat|global}}_load_dword
-
-; GFX9: v_pk_sub_i16
-; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
-
-; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI: v_sub_u16_e32
-
-; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
-; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
-; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
 define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
+; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i64:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s7
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_sub_i16 v1, v0, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX9-NEXT:    v_bfe_i32 v0, v1, 0, 16
+; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_sub_v2i16_sext_to_v2i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s7
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u16_sdwa v1, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_sub_u16_e32 v0, v0, v2
+; VI-NEXT:    v_bfe_i32 v2, v1, 0, 16
+; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in0, i32 %tid

From 7d230d2661bfd4fdb41caf2d34e923b42c758e9f Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 23 May 2019 11:58:03 +0000
Subject: [PATCH 0022/1176] [clangd] Bump vscode extension version

llvm-svn: 361486
---
 clang-tools-extra/clangd/clients/clangd-vscode/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/package.json b/clang-tools-extra/clangd/clients/clangd-vscode/package.json
index b8125808d53af..1a423292d8afd 100644
--- a/clang-tools-extra/clangd/clients/clangd-vscode/package.json
+++ b/clang-tools-extra/clangd/clients/clangd-vscode/package.json
@@ -2,7 +2,7 @@
     "name": "vscode-clangd",
     "displayName": "vscode-clangd",
     "description": "Clang Language Server",
-    "version": "0.0.12",
+    "version": "0.0.13",
     "publisher": "llvm-vs-code-extensions",
     "homepage": "https://clang.llvm.org/extra/clangd.html",
     "engines": {

From 1520dafa20ede121c950652141a7f0d639d308fc Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Thu, 23 May 2019 12:01:26 +0000
Subject: [PATCH 0023/1176] [clang-tidy] New check calling out uses of +new in
 Objective-C code

Summary:
Google's Objective-C style guide forbids calling or overriding +new to instantiate objects. This check warns on violations.

Style guide reference: https://google.github.io/styleguide/objcguide.html#do-not-use-new

Patch by Michael Wyman.

Reviewers: benhamilton, aaron.ballman, JonasToth, gribozavr, ilya-biryukov, stephanemoore, mwyman

Reviewed By: aaron.ballman, gribozavr, stephanemoore, mwyman

Subscribers: stephanemoore, xazax.hun, Eugene.Zelenko, mgorny, cfe-commits

Tags: #clang, #clang-tools-extra

Differential Revision: https://reviews.llvm.org/D61350

llvm-svn: 361487
---
 .../google/AvoidNSObjectNewCheck.cpp          | 130 ++++++++++++++++++
 .../clang-tidy/google/AvoidNSObjectNewCheck.h |  38 +++++
 .../clang-tidy/google/CMakeLists.txt          |   1 +
 .../clang-tidy/google/GoogleTidyModule.cpp    |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   6 +
 .../checks/google-objc-avoid-nsobject-new.rst |  29 ++++
 .../docs/clang-tidy/checks/list.rst           |   1 +
 .../google-objc-avoid-nsobject-new.m          |  80 +++++++++++
 8 files changed, 288 insertions(+)
 create mode 100644 clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp
 create mode 100644 clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/google-objc-avoid-nsobject-new.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/google-objc-avoid-nsobject-new.m

diff --git a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp
new file mode 100644
index 0000000000000..1b147ace89868
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp
@@ -0,0 +1,130 @@
+//===--- AvoidNSObjectNewCheck.cpp - clang-tidy ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AvoidNSObjectNewCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <map>
+#include <string>
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace google {
+namespace objc {
+
+static bool isMessageExpressionInsideMacro(const ObjCMessageExpr *Expr) {
+  SourceLocation ReceiverLocation = Expr->getReceiverRange().getBegin();
+  if (ReceiverLocation.isMacroID())
+    return true;
+
+  SourceLocation SelectorLocation = Expr->getSelectorStartLoc();
+  if (SelectorLocation.isMacroID())
+    return true;
+
+  return false;
+}
+
+// Walk up the class hierarchy looking for an -init method, returning true
+// if one is found and has not been marked unavailable.
+static bool isInitMethodAvailable(const ObjCInterfaceDecl *ClassDecl) {
+  while (ClassDecl != nullptr) {
+    for (const auto *MethodDecl : ClassDecl->instance_methods()) {
+      if (MethodDecl->getSelector().getAsString() == "init")
+        return !MethodDecl->isUnavailable();
+    }
+    ClassDecl = ClassDecl->getSuperClass();
+  }
+
+  // No -init method found in the class hierarchy. This should occur only rarely
+  // in Objective-C code, and only really applies to classes not derived from
+  // NSObject.
+  return false;
+}
+
+// Returns the string for the Objective-C message receiver. Keeps any generics
+// included in the receiver class type, which are stripped if the class type is
+// used. While the generics arguments will not make any difference to the
+// returned code at this time, the style guide allows them and they should be
+// left in any fix-it hint.
+static StringRef getReceiverString(SourceRange ReceiverRange,
+                                   const SourceManager &SM,
+                                   const LangOptions &LangOpts) {
+  CharSourceRange CharRange = Lexer::makeFileCharRange(
+      CharSourceRange::getTokenRange(ReceiverRange), SM, LangOpts);
+  return Lexer::getSourceText(CharRange, SM, LangOpts);
+}
+
+static FixItHint getCallFixItHint(const ObjCMessageExpr *Expr,
+                                  const SourceManager &SM,
+                                  const LangOptions &LangOpts) {
+  // Check whether the messaged class has a known factory method to use instead
+  // of -init.
+  StringRef Receiver =
+      getReceiverString(Expr->getReceiverRange(), SM, LangOpts);
+  // Some classes should use standard factory methods instead of alloc/init.
+  std::map<StringRef, StringRef> ClassToFactoryMethodMap = {{"NSDate", "date"},
+                                                            {"NSNull", "null"}};
+  auto FoundClassFactory = ClassToFactoryMethodMap.find(Receiver);
+  if (FoundClassFactory != ClassToFactoryMethodMap.end()) {
+    StringRef ClassName = FoundClassFactory->first;
+    StringRef FactorySelector = FoundClassFactory->second;
+    std::string NewCall =
+        llvm::formatv("[{0} {1}]", ClassName, FactorySelector);
+    return FixItHint::CreateReplacement(Expr->getSourceRange(), NewCall);
+  }
+
+  if (isInitMethodAvailable(Expr->getReceiverInterface())) {
+    std::string NewCall = llvm::formatv("[[{0} alloc] init]", Receiver);
+    return FixItHint::CreateReplacement(Expr->getSourceRange(), NewCall);
+  }
+
+  return {}; // No known replacement available.
+}
+
+void AvoidNSObjectNewCheck::registerMatchers(MatchFinder *Finder) {
+  if (!getLangOpts().ObjC)
+    return;
+
+  // Add two matchers, to catch calls to +new and implementations of +new.
+  Finder->addMatcher(
+      objcMessageExpr(isClassMessage(), hasSelector("new")).bind("new_call"),
+      this);
+  Finder->addMatcher(
+      objcMethodDecl(isClassMethod(), isDefinition(), hasName("new"))
+          .bind("new_override"),
+      this);
+}
+
+void AvoidNSObjectNewCheck::check(const MatchFinder::MatchResult &Result) {
+  if (const auto *CallExpr =
+          Result.Nodes.getNodeAs<ObjCMessageExpr>("new_call")) {
+    // Don't warn if the call expression originates from a macro expansion.
+    if (isMessageExpressionInsideMacro(CallExpr))
+      return;
+
+    diag(CallExpr->getExprLoc(), "do not create objects with +new")
+        << getCallFixItHint(CallExpr, *Result.SourceManager,
+                            Result.Context->getLangOpts());
+  }
+
+  if (const auto *DeclExpr =
+          Result.Nodes.getNodeAs<ObjCMethodDecl>("new_override")) {
+    diag(DeclExpr->getBeginLoc(), "classes should not override +new");
+  }
+}
+
+} // namespace objc
+} // namespace google
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h
new file mode 100644
index 0000000000000..97988c903e8e5
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h
@@ -0,0 +1,38 @@
+//===--- AvoidNSObjectNewCheck.h - clang-tidy -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDNSOBJECTNEWCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDNSOBJECTNEWCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace google {
+namespace objc {
+
+/// This check finds Objective-C code that uses +new to create object instances,
+/// or overrides +new in classes. Both are forbidden by Google's Objective-C
+/// style guide.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/google-avoid-nsobject-new.html
+class AvoidNSObjectNewCheck : public ClangTidyCheck {
+public:
+  AvoidNSObjectNewCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace objc
+} // namespace google
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_GOOGLE_AVOIDNSOBJECTNEWCHECK_H
diff --git a/clang-tools-extra/clang-tidy/google/CMakeLists.txt b/clang-tools-extra/clang-tidy/google/CMakeLists.txt
index 4d0a326f73b16..b78088cf06bab 100644
--- a/clang-tools-extra/clang-tidy/google/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/google/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangTidyGoogleModule
   AvoidCStyleCastsCheck.cpp
+  AvoidNSObjectNewCheck.cpp
   AvoidThrowingObjCExceptionCheck.cpp
   AvoidUnderscoreInGoogletestNameCheck.cpp
   DefaultArgumentsCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
index c2a9ec5edbc48..1e3410fb8a5ef 100644
--- a/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/google/GoogleTidyModule.cpp
@@ -13,6 +13,7 @@
 #include "../readability/FunctionSizeCheck.h"
 #include "../readability/NamespaceCommentCheck.h"
 #include "AvoidCStyleCastsCheck.h"
+#include "AvoidNSObjectNewCheck.h"
 #include "AvoidThrowingObjCExceptionCheck.h"
 #include "AvoidUnderscoreInGoogletestNameCheck.h"
 #include "DefaultArgumentsCheck.h"
@@ -49,6 +50,8 @@ class GoogleModule : public ClangTidyModule {
         "google-explicit-constructor");
     CheckFactories.registerCheck<readability::GlobalNamesInHeadersCheck>(
         "google-global-names-in-headers");
+    CheckFactories.registerCheck<objc::AvoidNSObjectNewCheck>(
+        "google-objc-avoid-nsobject-new");
     CheckFactories.registerCheck<objc::AvoidThrowingObjCExceptionCheck>(
         "google-objc-avoid-throwing-exception");
     CheckFactories.registerCheck<objc::FunctionNamingCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index f1ee2fe44e9aa..4c709f0370df9 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -122,6 +122,12 @@ Improvements to clang-tidy
   Checks whether there are underscores in googletest test and test case names in
   test macros, which is prohibited by the Googletest FAQ.
 
+- New :doc:`google-objc-avoid-nsobject-new
+  <clang-tidy/checks/google-objc-avoid-nsobject-new>` check.
+
+  Checks for calls to ``+new`` or overrides of it, which are prohibited by the
+  Google Objective-C style guide.
+
 - New :doc:`objc-super-self <clang-tidy/checks/objc-super-self>` check.
 
   Finds invocations of ``-self`` on super instances in initializers of
diff --git a/clang-tools-extra/docs/clang-tidy/checks/google-objc-avoid-nsobject-new.rst b/clang-tools-extra/docs/clang-tidy/checks/google-objc-avoid-nsobject-new.rst
new file mode 100644
index 0000000000000..4eae4470c872b
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/google-objc-avoid-nsobject-new.rst
@@ -0,0 +1,29 @@
+.. title:: clang-tidy - google-objc-avoid-nsobject-new
+
+google-objc-avoid-nsobject-new
+==============================
+
+Finds calls to ``+new`` or overrides of it, which are prohibited by the
+Google Objective-C style guide.
+
+The Google Objective-C style guide forbids calling ``+new`` or overriding it in
+class implementations, preferring ``+alloc`` and ``-init`` methods to
+instantiate objects.
+
+An example:
+
+.. code-block:: objc
+
+  NSDate *now = [NSDate new];
+  Foo *bar = [Foo new];
+
+Instead, code should use ``+alloc``/``-init`` or class factory methods.
+
+.. code-block:: objc
+
+  NSDate *now = [NSDate date];
+  Foo *bar = [[Foo alloc] init];
+
+This check corresponds to the Google Objective-C Style Guide rule
+`Do Not Use +new
+<https://google.github.io/styleguide/objcguide.html#do-not-use-new>`_.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 74bf2a87e190a..7a0ebc292e468 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -135,6 +135,7 @@ Clang-Tidy Checks
    google-default-arguments
    google-explicit-constructor
    google-global-names-in-headers
+   google-objc-avoid-nsobject-new
    google-objc-avoid-throwing-exception
    google-objc-function-naming
    google-objc-global-variable-declaration
diff --git a/clang-tools-extra/test/clang-tidy/google-objc-avoid-nsobject-new.m b/clang-tools-extra/test/clang-tidy/google-objc-avoid-nsobject-new.m
new file mode 100644
index 0000000000000..f8b1d20f4f6da
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/google-objc-avoid-nsobject-new.m
@@ -0,0 +1,80 @@
+// RUN: %check_clang_tidy %s google-objc-avoid-nsobject-new %t
+
+@interface NSObject
++ (instancetype)new;
++ (instancetype)alloc;
+- (instancetype)init;
+@end
+
+@interface NSProxy  // Root class with no -init method.
+@end
+
+// NSDate provides a specific factory method.
+@interface NSDate : NSObject
++ (instancetype)date;
+@end
+
+// For testing behavior with Objective-C Generics.
+@interface NSMutableDictionary<__covariant KeyType, __covariant ObjectType> : NSObject
+@end
+
+@class NSString;
+
+#define ALLOCATE_OBJECT(_Type) [_Type new]
+
+void CheckSpecificInitRecommendations(void) {
+  NSObject *object = [NSObject new];
+  // CHECK-MESSAGES: [[@LINE-1]]:22: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+  // CHECK-FIXES: [NSObject alloc] init];
+
+  NSDate *correctDate = [NSDate date];
+  NSDate *incorrectDate = [NSDate new];
+  // CHECK-MESSAGES: [[@LINE-1]]:27: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+  // CHECK-FIXES: [NSDate date];
+
+  NSObject *macroCreated = ALLOCATE_OBJECT(NSObject);  // Shouldn't warn on macros.
+
+  NSMutableDictionary *dict = [NSMutableDictionary<NSString *, NSString *> new];
+  // CHECK-MESSAGES: [[@LINE-1]]:31: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+  // CHECK-FIXES: [NSMutableDictionary<NSString *, NSString *> alloc] init];
+}
+
+@interface Foo : NSObject
++ (instancetype)new; // Declare again to suppress warning.
+- (instancetype)initWithInt:(int)anInt;
+- (instancetype)init __attribute__((unavailable));
+
+- (id)new;
+@end
+
+@interface Baz : Foo // Check unavailable -init through inheritance.
+@end
+
+@interface ProxyFoo : NSProxy
++ (instancetype)new;
+@end
+
+void CallNewWhenInitUnavailable(void) {
+  Foo *foo = [Foo new];
+  // CHECK-MESSAGES: [[@LINE-1]]:14: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+
+  Baz *baz = [Baz new];
+  // CHECK-MESSAGES: [[@LINE-1]]:14: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+
+  // Instance method -new calls may be weird, but are not strictly forbidden.
+  Foo *bar = [[Foo alloc] initWithInt:4];
+  [bar new];
+
+  ProxyFoo *proxy = [ProxyFoo new];
+  // CHECK-MESSAGES: [[@LINE-1]]:21: warning: do not create objects with +new [google-objc-avoid-nsobject-new]
+}
+
+@interface HasNewOverride : NSObject
+@end
+
+@implementation HasNewOverride
++ (instancetype)new {
+  return [[self alloc] init];
+}
+// CHECK-MESSAGES: [[@LINE-3]]:1: warning: classes should not override +new [google-objc-avoid-nsobject-new]
+@end

From fb6ee67ab14805aa75cb11404d426535242bff55 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 23 May 2019 12:02:14 +0000
Subject: [PATCH 0024/1176] [clangd] Also update package-lock.json

llvm-svn: 361488
---
 .../clangd/clients/clangd-vscode/package-lock.json              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/clients/clangd-vscode/package-lock.json b/clang-tools-extra/clangd/clients/clangd-vscode/package-lock.json
index 47373624481ae..8108e789b2e36 100644
--- a/clang-tools-extra/clangd/clients/clangd-vscode/package-lock.json
+++ b/clang-tools-extra/clangd/clients/clangd-vscode/package-lock.json
@@ -1,6 +1,6 @@
 {
     "name": "vscode-clangd",
-    "version": "0.0.12",
+    "version": "0.0.13",
     "lockfileVersion": 1,
     "requires": true,
     "dependencies": {

From 903f5b05e264cca4bb24296f012854afe3102f6e Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Thu, 23 May 2019 12:30:39 +0000
Subject: [PATCH 0025/1176] [llvm-objdump][test] Improve testing of some
 switches #3

This is the third commit in a series of patches to improve test coverage
of llvm-objdump. In this patch I have added a number of tests testing
various aspects of disassembly.

Reviewed by: MaskRay, grimar, rupprecht

Differential Revision: https://reviews.llvm.org/D62255

llvm-svn: 361489
---
 .../disasm-specific-funcs-mangled-name.test   |  27 +++++
 .../X86/disasm-specific-funcs.test            |  44 ++++----
 .../disassemble-invalid-byte-sequences.test   |  20 ++++
 .../X86/disassemble-long-instructions.test    |  17 +++
 .../X86/elf-disassemble-dynamic-symbols.test  |  99 +++++++++++++++++
 .../X86/elf-disassemble-no-symtab.test        |  24 +++++
 .../X86/elf-disassemble-relocs.test           |  42 ++++++++
 .../elf-disassemble-symbol-labels-exec.test   |  54 ++++++++++
 .../elf-disassemble-symbol-labels-rel.test    |  58 ++++++++++
 .../elf-disassemble-symbol-references.yaml    | 101 ++++++++++++++++++
 .../llvm-objdump/X86/elf-disassemble.test     |  50 +++++++++
 ...start-stop-address-relocatable-object.test |  46 ++++++++
 .../llvm-objdump/X86/start-stop-address.test  |  46 +++++++-
 .../llvm-objdump/X86/stripped-shared.test     |  10 --
 14 files changed, 604 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-invalid-byte-sequences.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-long-instructions.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-dynamic-symbols.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-no-symtab.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml
 create mode 100644 llvm/test/tools/llvm-objdump/X86/elf-disassemble.test
 create mode 100644 llvm/test/tools/llvm-objdump/X86/start-stop-address-relocatable-object.test
 delete mode 100644 llvm/test/tools/llvm-objdump/X86/stripped-shared.test

diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test b/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test
new file mode 100644
index 0000000000000..c3f243c6d3dfe
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test
@@ -0,0 +1,27 @@
+## Show that the --disassemble-functions switch takes mangled names, not
+## demangled names.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump -d --disassemble-functions=_Z3foov %t.o | FileCheck %s
+# RUN: llvm-objdump -d --disassemble-functions='foo()' %t.o | FileCheck %s --check-prefix=NOFOO
+# RUN: llvm-objdump -d -C --disassemble-functions='foo()' %t.o | FileCheck %s --check-prefix=NOFOO
+# RUN: llvm-objdump -d --disassemble-functions=foo %t.o | FileCheck %s --check-prefix=NOFOO
+
+# CHECK: _Z3foov:
+
+# NOFOO-NOT: foo
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:  .text
+    Type:  SHT_PROGBITS
+    Flags: [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '90'
+Symbols:
+  - Name:    _Z3foov
+    Section: .text
diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test b/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test
index 8e6155710a46b..988afb37b77b7 100644
--- a/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test
+++ b/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test
@@ -1,21 +1,25 @@
-// RUN: yaml2obj -o %t.out %p/Inputs/simple-executable-x86_64.yaml
-// RUN: llvm-objdump -d %t.out -disassemble-functions=main | FileCheck %s
+## Show that the --diassemble-functions switch disassembles only the specified
+## functions.
 
-// CHECK:              Disassembly of section .anothertext:
-// CHECK-EMPTY:
-// CHECK-NEXT:         main:
-// CHECK-NEXT:         10:       55      pushq   %rbp
-// CHECK-NEXT:         11:       48 89 e5        movq    %rsp, %rbp
-// CHECK-NEXT:         14:       48 83 ec 20     subq    $32, %rsp
-// CHECK-NEXT:         18:       48 8d 04 25 a8 00 00 00         leaq    168, %rax
-// CHECK-NEXT:         20:       c7 45 fc 00 00 00 00    movl    $0, -4(%rbp)
-// CHECK-NEXT:         27:       48 89 45 f0     movq    %rax, -16(%rbp)
-// CHECK-NEXT:         2b:       48 8b 45 f0     movq    -16(%rbp), %rax
-// CHECK-NEXT:         2f:       8b 08   movl    (%rax), %ecx
-// CHECK-NEXT:         31:       89 4d ec        movl    %ecx, -20(%rbp)
-// CHECK-NEXT:         34:       e8 c7 ff ff ff  callq   -57
-// CHECK-NEXT:         39:       8b 4d ec        movl    -20(%rbp), %ecx
-// CHECK-NEXT:         3c:       01 c1   addl    %eax, %ecx
-// CHECK-NEXT:         3e:       89 c8   movl    %ecx, %eax
-// CHECK-NEXT:         40:       48 83 c4 20     addq    $32, %rsp
-// CHECK-NEXT:         44:       5d      popq    %rbp
+# RUN: yaml2obj -o %t.out %p/Inputs/simple-executable-x86_64.yaml
+# RUN: llvm-objdump -d %t.out --disassemble-functions=main \
+# RUN:   | FileCheck %s --check-prefix=MAIN --implicit-check-not=foo --implicit-check-not=somedata
+# RUN: llvm-objdump -d %t.out --disassemble-functions=main,foo \
+# RUN:   | FileCheck %s --check-prefixes=MAIN,FOO --implicit-check-not=somedata
+
+# FOO: foo:
+# MAIN: main:
+
+## Unknown symbol name.
+# RUN: llvm-objdump -d %t.out --disassemble-functions=baz \
+# RUN:   | FileCheck %s --implicit-check-not=Disassembly
+
+## Data symbol.
+# RUN: llvm-objdump -d %t.out --disassemble-functions=a \
+# RUN:   | FileCheck %s --implicit-check-not=Disassembly
+
+## Data symbol + --disassemble-all.
+# RUN: llvm-objdump -D %t.out --disassemble-functions=a \
+# RUN:   | FileCheck %s --check-prefix=DATA
+
+# DATA: a:
diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-invalid-byte-sequences.test b/llvm/test/tools/llvm-objdump/X86/disassemble-invalid-byte-sequences.test
new file mode 100644
index 0000000000000..a9d7f6ac0e48e
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disassemble-invalid-byte-sequences.test
@@ -0,0 +1,20 @@
+## Show that llvm-objdump handles invalid byte sequences, and continues.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump %t.o -d | FileCheck %s
+
+# CHECK:      0000000000000000 .text:
+# CHECK:         0: d9 e2 <unknown>
+# CHECK-NEXT:    2: 90    nop
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'd9e290'
diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-long-instructions.test b/llvm/test/tools/llvm-objdump/X86/disassemble-long-instructions.test
new file mode 100644
index 0000000000000..9596c01e460d6
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disassemble-long-instructions.test
@@ -0,0 +1,17 @@
+## This test shows that llvm-objdump can disassemble a long instruction.
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump -d %t.o | FileCheck %s --strict-whitespace
+
+# CHECK: 0: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 	nopw	%cs:(%rax,%rax)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:  .text
+    Type:  SHT_PROGBITS
+    Flags: [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '6666666666662E0F1F840000000000' # 15-byte nop
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-dynamic-symbols.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-dynamic-symbols.test
new file mode 100644
index 0000000000000..94efd7e583844
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-dynamic-symbols.test
@@ -0,0 +1,99 @@
+## Show the disassembly behaviour when dynamic symbols are present.
+# RUN: yaml2obj %s -o %t
+
+## Case 1: Both static and dynamic symbols are present. Only static
+## symbols are displayed.
+# RUN: llvm-objdump -d %t | FileCheck %s --check-prefixes=CHECK,STATIC
+
+## Case 2: Only the dynamic symbols are present. These should be displayed, if
+## they meet the necessary criteria.
+# RUN: llvm-objcopy --strip-all %t %t2
+# RUN: llvm-objdump -d %t2 | FileCheck %s --check-prefixes=CHECK,DYN
+
+# CHECK:       Disassembly of section .text:
+# CHECK-EMPTY:
+# STATIC-NEXT: 0000000000001000 .text:
+# DYN-NEXT:    0000000000001000 only_dyn:
+# CHECK-NEXT:      1000:
+# CHECK-EMPTY:
+# STATIC-NEXT: 0000000000001001 both_static:
+# DYN-NEXT:    0000000000001001 both_dyn:
+# CHECK-NEXT:      1001:
+# STATIC-EMPTY:
+# STATIC-NEXT: 0000000000001002 only_static:
+# CHECK-NEXT:      1002:
+# CHECK-NEXT:      1003:
+# CHECK-NEXT:      1004:
+# CHECK-NEXT:      1005:{{.*}}
+# CHECK-NOT:   {{.}}
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_DYN
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Address: 0x1000
+    Content: 909090909090
+ProgramHeaders:
+  - Type: PT_LOAD
+    VAddr: 0x1000
+    Sections:
+      - Section: .text
+Symbols:
+  - Name:    both_static
+    Value:   0x1001
+    Section: .text
+    Binding: STB_GLOBAL
+  - Name:    only_static
+    Value:   0x1002
+    Section: .text
+    Binding: STB_GLOBAL
+DynamicSymbols:
+  - Name:    only_dyn
+    Value:   0x1000
+    Section: .text
+    Size:    1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  - Name:    both_dyn
+    Value:   0x1001
+    Section: .text
+    Size:    1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  ## The rest of the dynamic symbols won't be used for various reasons.
+  ## FIXME: the first two symbols here should be dumped.
+  ##        See https://bugs.llvm.org/show_bug.cgi?id=41947
+  - Name:    not_func
+    Value:   0x1003
+    Section: .text
+    Type:    STT_OBJECT
+    Binding: STB_GLOBAL
+  - Name:    zero_sized
+    Value:   0x1004
+    Section: .text
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  - Name:    '' # No name
+    Value:   0x1005
+    Section: .text
+    Size:    1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  - Name:    absolute
+    Value:   0x1005
+    Index:   SHN_ABS
+    Size:    1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  - Name:    undefined
+    Value:   0x1005
+    Index:   SHN_UNDEF
+    Size:    1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-no-symtab.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-no-symtab.test
new file mode 100644
index 0000000000000..b45c905bb3db9
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-no-symtab.test
@@ -0,0 +1,24 @@
+## Show that llvm-objdump can handle a missing symbol table when printing
+## references and labels.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objcopy -R .symtab %t
+# RUN: llvm-objdump %t -d | FileCheck %s
+
+# CHECK:       Disassembly of section .text:
+# CHECK-EMPTY:
+# CHECK-NEXT:  0000000000004000 .text:
+# CHECK-NEXT:    4000: e8 42 00 00 00                callq   66 <.text+0x47>
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Address: 0x4000
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e842000000'
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test
new file mode 100644
index 0000000000000..04390eb4e3084
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-relocs.test
@@ -0,0 +1,42 @@
+## Show that --disassemble + --reloc prints relocations inline and does not dump
+## the relocation sections.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump %t.o -d -r | FileCheck %s --implicit-check-not="RELOCATION RECORDS"
+
+# CHECK:      0: e8 00 00 00 00                callq   0 <.text+0x5>
+# CHECK-NEXT:          0000000000000001:  R_X86_64_PC32        foo-4
+# CHECK-NEXT:          0000000000000002:  R_X86_64_NONE        bar+8
+# CHECK-NEXT: 5: e8 00 00 00 00                callq   0 <.text+0xa>
+# CHECK-NEXT:          0000000000000006:  R_X86_64_PLT32       foo+1
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e800000000e800000000'
+  - Name:    .rela.text
+    Type:    SHT_RELA
+    Info:    .text
+    Relocations:
+      - Offset: 1
+        Symbol: foo
+        Type:   R_X86_64_PC32
+        Addend: -4
+      - Offset: 2
+        Symbol: bar
+        Type:   R_X86_64_NONE
+        Addend: 8
+      - Offset: 6
+        Symbol: foo
+        Type:   R_X86_64_PLT32
+        Addend: 1
+Symbols:
+  - Name: foo
+  - Name: bar
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test
new file mode 100644
index 0000000000000..548ee4b182dd1
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-exec.test
@@ -0,0 +1,54 @@
+## Show which labels are printed in disassembly of an executable.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objdump %t -d | FileCheck %s --implicit-check-not=stt_section \
+# RUN:                                   --implicit-check-not=fourth \
+# RUN:                                   --implicit-check-not=absolute \
+# RUN:                                   --implicit-check-not=other
+
+# CHECK:     0000000000004000 first:
+# CHECK:     0000000000004001 second:
+# CHECK:     0000000000004002 third:
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Address: 0x4000
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '9090909090'
+  - Name:    .text2
+    Type:    SHT_PROGBITS
+    Address: 0x4004
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Size:    0
+Symbols:
+  - Name:    first # Shows, with second, that symbol sizes are not used to delineate functions.
+    Value:   0x4000
+    Section: .text
+    Size:    0x2
+  - Name:    second
+    Value:   0x4001
+    Size:    0x1
+    Section: .text
+  - Name:    third # Shows, with fourth, that first symbol is picked.
+    Value:   0x4002
+    Section: .text
+  - Name:    fourth
+    Value:   0x4002
+    Section: .text
+  - Name:    stt_section # Shows that STT_SECTION symbols are ignored even if no other symbol present.
+    Value:   0x4003
+    Type:    STT_SECTION
+    Section: .text
+  - Name:    absolute # Show that absolute symbols are ignored.
+    Value:   0x4004
+    Index:   SHN_ABS
+  - Name:    other # Show that symbols from other sections are ignored.
+    Value:   0x4004
+    Section: .text2
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
new file mode 100644
index 0000000000000..d2377f44621f4
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
@@ -0,0 +1,58 @@
+## Show which labels are printed in disassembly of a relocatable object.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objdump %t -d | FileCheck %s --implicit-check-not=stt_section \
+# RUN:                                   --implicit-check-not=first \
+# RUN:                                   --implicit-check-not=second \
+# RUN:                                   --implicit-check-not=third \
+# RUN:                                   --implicit-check-not=fourth \
+# RUN:                                   --implicit-check-not=absolute \
+# RUN:                                   --implicit-check-not=other
+
+# CHECK: Disassembly of section .text:
+# CHECK: 0000000000000000 first:
+# CHECK: 0000000000000001 second:
+# CHECK: 0000000000000002 third:
+# CHECK: Disassembly of section .text2:
+# CHECK: 0000000000000004 other:
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '909090909090'
+  - Name:    .text2
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '9090909090'
+Symbols:
+  - Name:    first
+    Value:   0
+    Section: .text
+    Size:    2
+  - Name:    second # Shows, with first, that symbol sizes are not used to delineate functions.
+    Value:   1
+    Size:    1
+    Section: .text
+  - Name:    third # Shows, with fourth, that first symbol is picked.
+    Value:   2
+    Section: .text
+  - Name:    fourth
+    Value:   2
+    Section: .text
+  - Name:    stt_section # Shows that STT_SECTION symbols are ignored even if no other symbol present.
+    Value:   3
+    Type:    STT_SECTION
+    Section: .text
+  - Name:    absolute # Show that absolute symbols are ignored.
+    Value:   4
+    Index:   SHN_ABS
+  - Name:    other # Show that symbols in other sections are ignored.
+    Value:   4
+    Section: .text2
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml
new file mode 100644
index 0000000000000..d627b207a23da
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-references.yaml
@@ -0,0 +1,101 @@
+## Show that references in disassembly are labelled with the correct symbol.
+# RUN: yaml2obj %s --docnum=1 -o %t
+# RUN: llvm-objdump %t -d | FileCheck %s --check-prefix=EXEC
+
+# EXEC: Disassembly of section .text1:
+# EXEC:     4000: e8 00 00 00 00                callq   0 <third>
+# EXEC: Disassembly of section .text2:
+# EXEC:     4005: e8 12 34 56 78                callq   2018915346 <fourth+0x78563412>
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text1
+    Type:    SHT_PROGBITS
+    Address: 0x4000
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e800000000' # Case 1: Referencing an address with a symbol.
+  - Name:    .text2
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Address: 0x4005
+    Content: 'e812345678' # Case 2: Referencing an address without a symbol.
+  - Name:    .text3
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Address: 0x400A
+Symbols:
+  - Name:    first
+    Section: .text1
+    Value:   0x4000
+  - Name:    second
+    Section: .text1
+    Value:   0x4005
+  - Name:    third
+    Section: .text2
+    Value:   0x4005
+  - Name:    fourth
+    Section: .text3
+    Value:   0x400A
+
+# RUN: yaml2obj %s --docnum=2 -o %t.o
+# RUN: llvm-objdump %t.o -d | FileCheck %s --check-prefix=REL
+
+# REL:      Disassembly of section .text1:
+# REL-EMPTY:
+# REL-NEXT: 0000000000000000 .text1:
+# REL-NEXT:        0: e8 00 00 00 00                callq   0 <.text1+0x5>
+# REL-EMPTY:
+# REL-NEXT: Disassembly of section .text2:
+# REL-EMPTY:
+# REL-NEXT: 0000000000000000 .text2:
+# REL-NEXT:        0: e8 00 00 00 00                callq   0 <sym2>
+# REL-EMPTY:
+# REL-NEXT: Disassembly of section .text3:
+# REL-EMPTY:
+# REL-NEXT: 0000000000000000 .text3:
+# REL-NEXT:        0: e8 00 00 00 00                callq   0 <.text3+0x5>
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text1
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e800000000' # Case 1: Instruction is patched by a relocation.
+  - Name:    .rela.text1
+    Type:    SHT_RELA
+    Info:    .text1
+    Relocations:
+      - Offset: 1
+        Type:   R_X86_64_PC32
+        Symbol: sym3
+  - Name:    .text2
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e800000000' # Case 2: Referencing an address with a matching symbol in that section.
+  - Name:    .text3
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: 'e800000000' # Case 3: Referencing an address without a matching symbol in that section.
+  - Name:    .other
+    Type:    SHT_PROGBITS
+Symbols:
+  # Shouldn't be picked, despite matching value, as not in right section.
+  - Name:    sym1
+    Section: .other
+    Value:   5
+  # Symbol in correct section, with right value should be picked for disassembly of .text2.
+  - Name:    sym2
+    Section: .text2
+    Value:   5
+  # Symbol referenced by relocation could be picked for disassembly of .text1, but isn't.
+  - Name:    sym3
diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble.test
new file mode 100644
index 0000000000000..4db8d32947fd6
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble.test
@@ -0,0 +1,50 @@
+## Show that disassembly is printed correctly, for only the desired sections.
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump %t.o --disassemble | FileCheck %s --implicit-check-not=Disassembly
+# RUN: llvm-objdump %t.o --disassemble-all | FileCheck %s --check-prefixes=CHECK,ALL
+
+# CHECK:       Disassembly of section .executable:
+# CHECK-EMPTY:
+# CHECK-NEXT:  0000000000001000 .executable:
+# CHECK-NEXT:         0: 90                            nop
+# ALL-EMPTY:
+# ALL-NEXT:    Disassembly of section .writable:
+# ALL-EMPTY:
+# ALL-NEXT:    0000000000002000 .writable:
+# ALL-NEXT:           0: c3                            retq
+# ALL-EMPTY:
+# ALL-NEXT:    Disassembly of section .readonly:
+# ALL-EMPTY:
+# ALL-NEXT:    0000000000003000 .readonly:
+# ALL-NEXT:           0: 01 00                         addl    %eax, (%rax)
+# ALL-EMPTY:
+# ALL-NEXT:    Disassembly of section .nobits:
+# ALL-EMPTY:
+# ALL-NEXT:    0000000000004000 .nobits:
+# ALL-NEXT:    ...
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .executable
+    Type:    SHT_PROGBITS
+    Address: 0x1000
+    Flags:   [SHF_EXECINSTR]
+    Content: '90'
+  - Name:    .writable
+    Type:    SHT_PROGBITS
+    Address: 0x2000
+    Flags:   [SHF_WRITE]
+    Content: 'c3'
+  - Name:    .readonly
+    Type:    SHT_PROGBITS
+    Address: 0x3000
+    Content: '0100'
+  - Name:    .nobits
+    Type:    SHT_NOBITS
+    Address: 0x4000
+    Size:    4
diff --git a/llvm/test/tools/llvm-objdump/X86/start-stop-address-relocatable-object.test b/llvm/test/tools/llvm-objdump/X86/start-stop-address-relocatable-object.test
new file mode 100644
index 0000000000000..37b522052607e
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/start-stop-address-relocatable-object.test
@@ -0,0 +1,46 @@
+## Show how --start-address and --stop-address work in a relocatable object.
+## They limit the disassembly to the relative offset ranges within sections.
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-objdump -d %t.o --start-address=1 --stop-address=2 | FileCheck %s --check-prefix=COMMON
+# RUN: llvm-objdump -d %t.o --start-address=1 | FileCheck %s --check-prefixes=START,COMMON
+# RUN: llvm-objdump -d %t.o --stop-address=2 | FileCheck %s --check-prefixes=STOP,COMMON
+
+# COMMON:       Disassembly of section .text:
+# COMMON-EMPTY:
+# COMMON-NEXT:  0000000000000000 .text:
+# STOP-NEXT:           0: 90                            nop
+# COMMON-NEXT:         1: 90                            nop
+# START-NEXT:          2: 90                            nop
+# COMMON-EMPTY:
+# COMMON-NEXT:  Disassembly of section .text2:
+# COMMON-EMPTY:
+# COMMON-NEXT:  0000000000000000 .text2:
+# STOP-NEXT:           0: c3                            retq
+# COMMON-NEXT:         1: c3                            retq
+# START-NEXT:          2: c3                            retq
+# STOP-EMPTY:
+# STOP-NEXT:    Disassembly of section .text3:
+# STOP-EMPTY:
+# STOP-NEXT:    0000000000000000 .text3:
+# STOP-NEXT:           0: cc                            int3
+# COMMON-NOT:   {{.}}
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:         .text
+    Type:         SHT_PROGBITS
+    Flags:        [SHF_ALLOC, SHF_EXECINSTR]
+    Content:      '909090'
+  - Name:         .text2
+    Type:         SHT_PROGBITS
+    Flags:        [SHF_ALLOC, SHF_EXECINSTR]
+    Content:      'c3c3c3'
+  - Name:         .text3
+    Type:         SHT_PROGBITS
+    Flags:        [SHF_ALLOC, SHF_EXECINSTR]
+    Content:      'cc'
diff --git a/llvm/test/tools/llvm-objdump/X86/start-stop-address.test b/llvm/test/tools/llvm-objdump/X86/start-stop-address.test
index 4df49a92818ec..e8b390f25f0b8 100644
--- a/llvm/test/tools/llvm-objdump/X86/start-stop-address.test
+++ b/llvm/test/tools/llvm-objdump/X86/start-stop-address.test
@@ -1,8 +1,12 @@
 // RUN: yaml2obj -o %t.out %p/Inputs/simple-executable-x86_64.yaml 
 // RUN: llvm-objdump -d %t.out --start-address=0x18 --stop-address=0x2f | FileCheck %s 
-// RUN: llvm-objdump -d %t.out --start-address=0xc --stop-address=0x11 | FileCheck %s --check-prefix "CROSSSECTION"
-// RUN: llvm-objdump -d %t.out --start-address=0x40 --stop-address=0x47 | FileCheck %s --check-prefix "CROSSDATA"
+// RUN: llvm-objdump -d %t.out --start-address=0xc --stop-address=0x11 | FileCheck %s --check-prefix CROSSSECTION
+// RUN: llvm-objdump -d %t.out --start-address=0x40 --stop-address=0x47 | FileCheck %s --check-prefix CROSSDATA
+// RUN: llvm-objdump -d %t.out --start-address=0x40 | FileCheck %s --check-prefix START
+// RUN: llvm-objdump -d %t.out --stop-address=0x11 | FileCheck %s --check-prefix STOP
+// RUN: llvm-objdump -d %t.out --start-address=0xffffffff | FileCheck %s --check-prefix OUT-OF-RANGE
 
+// CHECK-NOT:          Disassembly
 // CHECK:              Disassembly of section .anothertext:
 // CHECK-EMPTY:
 // CHECK-NEXT:         main:
@@ -10,8 +14,9 @@
 // CHECK-NEXT:         20:	c7 45 fc 00 00 00 00 	movl	$0, -4(%rbp)
 // CHECK-NEXT:         27:	48 89 45 f0 	movq	%rax, -16(%rbp)
 // CHECK-NEXT:         2b:	48 8b 45 f0 	movq	-16(%rbp), %rax
-// CHECK-NOT:          2f:
+// CHECK-NOT:          {{.}}
 
+// CROSSECTION-NOT:    Disassembly
 // CROSSSECTION:       Disassembly of section .text:
 // CROSSSECTION-EMPTY:
 // CROSSSECTION-NEXT:  foo:
@@ -22,12 +27,45 @@
 // CROSSSECTION-EMPTY:
 // CROSSSECTION-NEXT:  main:
 // CROSSSECTION-NEXT:  10:	55 	pushq	%rbp
-// CROSSSECTION-NOT:   11:
+// CROSSSECTION-NOT:   {{.}}
 
+// CROSSDATA-NOT:      Disassembly
 // CROSSDATA:          Disassembly of section .anothertext:
 // CROSSDATA:          main:
 // CROSSDATA:          40:	48 83 c4 20 	addq	$32, %rsp
 // CROSSDATA:          44:	5d 	popq	%rbp
 // CROSSDATA-DAG:      somedata:
 // CROSSDATA-NEXT:     45:	 74 65                           te
+// CROSSDATA-NOT:      {{.}}
 
+// START-NOT:   Disassembly
+// START:       Disassembly of section .anothertext:
+// START-EMPTY:
+// START-NEXT:  0000000000000010 main:
+// START-NEXT:        40: 48 83 c4 20                   addq    $32, %rsp
+// START-NEXT:        44: 5d                            popq    %rbp
+// START-EMPTY:
+// START-NEXT:  0000000000000045 somedata:
+// START-NEXT:        45:        74 65 73 74 20 73 74 72         test str
+// START-NEXT:        4d:        00 c3                           ..
+
+// STOP:       Disassembly of section .text:
+// STOP-EMPTY:
+// STOP-NEXT:       0000000000000000 foo:
+// STOP-NEXT:       0: 55                            pushq   %rbp
+// STOP-NEXT:       1: 48 89 e5                      movq    %rsp, %rbp
+// STOP-NEXT:       4: 8b 04 25 a8 00 00 00          movl    168, %eax
+// STOP-NEXT:       b: 5d                            popq    %rbp
+// STOP-NEXT:       c: c3                            retq
+// STOP-NEXT:       d: 0f 1f 00                      nopl    (%rax)
+// STOP-EMPTY:
+// STOP-NEXT:  Disassembly of section .anothertext:
+// STOP-EMPTY:
+// STOP-NEXT:  0000000000000010 main:
+// STOP-NEXT:      10: 55                            pushq   %rbp
+// STOP-NOT:       {{.}}
+
+// OUT-OF-RANGE-NOT: Disassembly
+
+// RUN: not llvm-objdump -d %t.out --start-address=0x40 --stop-address=0x3f 2>&1 | FileCheck %s --check-prefix ERRMSG
+// ERRMSG: error: Start address should be less than stop address.
diff --git a/llvm/test/tools/llvm-objdump/X86/stripped-shared.test b/llvm/test/tools/llvm-objdump/X86/stripped-shared.test
deleted file mode 100644
index c57155f4cd7ba..0000000000000
--- a/llvm/test/tools/llvm-objdump/X86/stripped-shared.test
+++ /dev/null
@@ -1,10 +0,0 @@
-// This test checks that dynamic symbols are used when disassembling elf files.
-// RUN: llvm-objdump -d %p/Inputs/stripped-elf.so | FileCheck %s
-
-# CHECK: .init
-# CHECK: .plt
-# CHECK: .text
-# CHECK: func0
-# CHECK: func1
-# CHECK: func2
-# CHECK: .fini

From 43882b16a343fc848a9485d59479f48a34abdbdc Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Thu, 23 May 2019 12:35:26 +0000
Subject: [PATCH 0026/1176] [MergeICmps] Make the pass compatible with the new
 pass manager.

Reviewers: gchatelet, spatel

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62287

llvm-svn: 361490
---
 llvm/include/llvm/InitializePasses.h          |   2 +-
 llvm/include/llvm/LinkAllPasses.h             |   2 +-
 llvm/include/llvm/Transforms/Scalar.h         |   2 +-
 .../llvm/Transforms/Scalar/MergeICmps.h       |  25 +++
 llvm/lib/CodeGen/TargetPassConfig.cpp         |   2 +-
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Transforms/Scalar/MergeICmps.cpp     | 148 +++++++++---------
 llvm/lib/Transforms/Scalar/Scalar.cpp         |   2 +-
 .../MergeICmps/X86/pair-int32-int32.ll        |   2 +-
 10 files changed, 110 insertions(+), 77 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Scalar/MergeICmps.h

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index f60a99c62051b..fa7909f14cd3d 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -279,7 +279,7 @@ void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
 void initializeMemorySSAWrapperPassPass(PassRegistry&);
 void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
 void initializeMergeFunctionsPass(PassRegistry&);
-void initializeMergeICmpsPass(PassRegistry&);
+void initializeMergeICmpsLegacyPassPass(PassRegistry &);
 void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
 void initializeMetaRenamerPass(PassRegistry&);
 void initializeModuleDebugInfoPrinterPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index fb3b19e2dab95..a8354125c9d38 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -191,7 +191,7 @@ namespace {
       (void) llvm::createPostOrderFunctionAttrsLegacyPass();
       (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
-      (void) llvm::createMergeICmpsPass();
+      (void) llvm::createMergeICmpsLegacyPass();
       (void) llvm::createExpandMemCmpPass();
       std::string buf;
       llvm::raw_string_ostream os(buf);
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 8f9d337e385bd..f9360b5ee2c82 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -371,7 +371,7 @@ Pass *createLowerWidenableConditionPass();
 //
 // MergeICmps - Merge integer comparison chains into a memcmp
 //
-Pass *createMergeICmpsPass();
+Pass *createMergeICmpsLegacyPass();
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Scalar/MergeICmps.h b/llvm/include/llvm/Transforms/Scalar/MergeICmps.h
new file mode 100644
index 0000000000000..63bdbf8f4d095
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/MergeICmps.h
@@ -0,0 +1,25 @@
+//===- MergeICmps.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+#define LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct MergeICmpsPass
+    : PassInfoMixin<MergeICmpsPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 09ae7bc02e454..1b7d6be4d7b8e 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -646,7 +646,7 @@ void TargetPassConfig::addIRPasses() {
     // into optimally-sized loads and compares. The transforms are enabled by a
     // target lowering hook.
     if (!DisableMergeICmps)
-      addPass(createMergeICmpsPass());
+      addPass(createMergeICmpsLegacyPass());
     addPass(createExpandMemCmpPass());
   }
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index e4d15277899be..c7976ce2702d2 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -142,6 +142,7 @@
 #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
 #include "llvm/Transforms/Scalar/NaryReassociate.h"
 #include "llvm/Transforms/Scalar/NewGVN.h"
 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 0d614d1d69b62..cf601ec9ebc54 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -190,6 +190,7 @@ FUNCTION_PASS("loop-sink", LoopSinkPass())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass())
 FUNCTION_PASS("mem2reg", PromotePass())
 FUNCTION_PASS("memcpyopt", MemCpyOptPass())
+FUNCTION_PASS("mergeicmps", MergeICmpsPass())
 FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
 FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
 FUNCTION_PASS("newgvn", NewGVNPass())
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index b55d28c3455b3..086c2f33afb6f 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -41,6 +41,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/MergeICmps.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
@@ -214,19 +215,19 @@ class BCECmpBlock {
 
   // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
   // instructions in the block.
-  bool canSplit(AliasAnalysis *AA) const;
+  bool canSplit(AliasAnalysis &AA) const;
 
   // Return true if this all the relevant instructions in the BCE-cmp-block can
   // be sunk below this instruction. By doing this, we know we can separate the
   // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
   // block.
   bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
-                         AliasAnalysis *AA) const;
+                         AliasAnalysis &AA) const;
 
   // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
   // instructions. Split the old block and move all non-BCE-cmp-insts into the
   // new parent block.
-  void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
+  void split(BasicBlock *NewParent, AliasAnalysis &AA) const;
 
   // The basic block where this comparison happens.
   BasicBlock *BB = nullptr;
@@ -245,7 +246,7 @@ class BCECmpBlock {
 
 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
                                     DenseSet<Instruction *> &BlockInsts,
-                                    AliasAnalysis *AA) const {
+                                    AliasAnalysis &AA) const {
   // If this instruction has side effects and its in middle of the BCE cmp block
   // instructions, then bail for now.
   if (Inst->mayHaveSideEffects()) {
@@ -255,9 +256,9 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
     // Disallow stores that might alias the BCE operands
     MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
     MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
-    if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
-        isModSet(AA->getModRefInfo(Inst, RLoc)))
-        return false;
+    if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
+        isModSet(AA.getModRefInfo(Inst, RLoc)))
+      return false;
   }
   // Make sure this instruction does not use any of the BCE cmp block
   // instructions as operand.
@@ -268,7 +269,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
   return true;
 }
 
-void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   llvm::SmallVector<Instruction *, 4> OtherInsts;
@@ -288,7 +289,7 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
   }
 }
 
-bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
+bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
   DenseSet<Instruction *> BlockInsts(
       {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
   for (Instruction &Inst : *BB) {
@@ -404,16 +405,16 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
 // A chain of comparisons.
 class BCECmpChain {
  public:
-  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
-              AliasAnalysis *AA);
+   BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+               AliasAnalysis &AA);
 
-  int size() const { return Comparisons_.size(); }
+   int size() const { return Comparisons_.size(); }
 
 #ifdef MERGEICMPS_DOT_ON
   void dump() const;
 #endif  // MERGEICMPS_DOT_ON
 
-  bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA,
+  bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
                 DomTreeUpdater &DTU);
 
 private:
@@ -432,7 +433,7 @@ class BCECmpChain {
 };
 
 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
-                         AliasAnalysis *AA)
+                         AliasAnalysis &AA)
     : Phi_(Phi) {
   assert(!Blocks.empty() && "a chain should have at least one block");
   // Now look inside blocks to check for BCE comparisons.
@@ -604,9 +605,8 @@ class MergedBlockName {
 static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
                                     BasicBlock *const InsertBefore,
                                     BasicBlock *const NextCmpBlock,
-                                    PHINode &Phi,
-                                    const TargetLibraryInfo *const TLI,
-                                    AliasAnalysis *AA, DomTreeUpdater &DTU) {
+                                    PHINode &Phi, const TargetLibraryInfo &TLI,
+                                    AliasAnalysis &AA, DomTreeUpdater &DTU) {
   assert(!Comparisons.empty() && "merging zero comparisons");
   LLVMContext &Context = NextCmpBlock->getContext();
   const BCECmpBlock &FirstCmp = Comparisons[0];
@@ -652,7 +652,7 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
     Value *const MemCmpCall = emitMemCmp(
         Lhs, Rhs,
         ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
-        DL, TLI);
+        DL, &TLI);
     IsEqual = Builder.CreateICmpEQ(
         MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
   }
@@ -674,8 +674,8 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
   return BB;
 }
 
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
-                           AliasAnalysis *AA, DomTreeUpdater &DTU) {
+bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+                           DomTreeUpdater &DTU) {
   assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
   // First pass to check if there is at least one merge. If not, we don't do
   // anything and we keep analysis passes intact.
@@ -694,9 +694,9 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
 
   // Effectively merge blocks. We go in the reverse direction from the phi block
   // so that the next block is always available to branch to.
-  const auto mergeRange = [this, TLI, AA, &DTU](int I, int Num,
-                                                BasicBlock *InsertBefore,
-                                                BasicBlock *Next) {
+  const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
+                                                  BasicBlock *InsertBefore,
+                                                  BasicBlock *Next) {
     return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
                             InsertBefore, Next, Phi_, TLI, AA, DTU);
   };
@@ -790,8 +790,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
   return Blocks;
 }
 
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
-                AliasAnalysis *AA, DomTreeUpdater &DTU) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+                DomTreeUpdater &DTU) {
   LLVM_DEBUG(dbgs() << "processPhi()\n");
   if (Phi.getNumIncomingValues() <= 1) {
     LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -859,12 +859,40 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
   return CmpChain.simplify(TLI, AA, DTU);
 }
 
-class MergeICmps : public FunctionPass {
- public:
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+                    const TargetTransformInfo &TTI, AliasAnalysis &AA,
+                    DominatorTree *DT) {
+  LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n");
+
+  // We only try merging comparisons if the target wants to expand memcmp later.
+  // The rationale is to avoid turning small chains into memcmp calls.
+  if (!TTI.enableMemCmpExpansion(true))
+    return false;
+
+  // If we don't have memcmp avaiable we can't emit calls to it.
+  if (!TLI.has(LibFunc_memcmp))
+    return false;
+
+  DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr,
+                     DomTreeUpdater::UpdateStrategy::Eager);
+
+  bool MadeChange = false;
+
+  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+    // A Phi operation is always first in a basic block.
+    if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+      MadeChange |= processPhi(*Phi, TLI, AA, DTU);
+  }
+
+  return MadeChange;
+}
+
+class MergeICmpsLegacyPass : public FunctionPass {
+public:
   static char ID;
 
-  MergeICmps() : FunctionPass(ID) {
-    initializeMergeICmpsPass(*PassRegistry::getPassRegistry());
+  MergeICmpsLegacyPass() : FunctionPass(ID) {
+    initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override {
@@ -874,12 +902,8 @@ class MergeICmps : public FunctionPass {
     // MergeICmps does not need the DominatorTree, but we update it if it's
     // already available.
     auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-    DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
-                       /*PostDominatorTree*/ nullptr,
-                       DomTreeUpdater::UpdateStrategy::Eager);
-    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-    auto PA = runImpl(F, &TLI, &TTI, AA, DTU);
-    return !PA.areAllPreserved();
+    auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+    return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);
   }
 
  private:
@@ -890,50 +914,32 @@ class MergeICmps : public FunctionPass {
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
   }
-
-  PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
-                            const TargetTransformInfo *TTI, AliasAnalysis *AA,
-                            DomTreeUpdater &DTU);
 };
 
-PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
-                                      const TargetTransformInfo *TTI,
-                                      AliasAnalysis *AA, DomTreeUpdater &DTU) {
-  LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
-
-  // We only try merging comparisons if the target wants to expand memcmp later.
-  // The rationale is to avoid turning small chains into memcmp calls.
-  if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
+} // namespace
 
-  // If we don't have memcmp avaiable we can't emit calls to it.
-  if (!TLI->has(LibFunc_memcmp))
-    return PreservedAnalyses::all();
+char MergeICmpsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps",
+                      "Merge contiguous icmps into a memcmp", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps",
+                    "Merge contiguous icmps into a memcmp", false, false)
 
-  bool MadeChange = false;
+Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); }
 
-  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
-    // A Phi operation is always first in a basic block.
-    if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
-      MadeChange |= processPhi(*Phi, TLI, AA, DTU);
-  }
-
-  if (!MadeChange)
+PreservedAnalyses MergeICmpsPass::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  auto &AA = AM.getResult<AAManager>(F);
+  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT);
+  if (!MadeChanges)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
   PA.preserve<GlobalsAA>();
   PA.preserve<DominatorTreeAnalysis>();
   return PA;
 }
-
-}  // namespace
-
-char MergeICmps::ID = 0;
-INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
-                      "Merge contiguous icmps into a memcmp", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
-                    "Merge contiguous icmps into a memcmp", false, false)
-
-Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 2584cf04c9337..869cf00e0a89c 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -83,7 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerGuardIntrinsicLegacyPassPass(Registry);
   initializeLowerWidenableConditionLegacyPassPass(Registry);
   initializeMemCpyOptLegacyPassPass(Registry);
-  initializeMergeICmpsPass(Registry);
+  initializeMergeICmpsLegacyPassPass(Registry);
   initializeMergedLoadStoreMotionLegacyPassPass(Registry);
   initializeNaryReassociateLegacyPassPass(Registry);
   initializePartiallyInlineLibCallsLegacyPassPass(Registry);
diff --git a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
index c4f0ea8ee54e3..66ba101cdb25e 100644
--- a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
+++ b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
+; RUN: opt < %s -passes='require<domtree>,mergeicmps,verify<domtree>' -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
 ; RUN: opt < %s -mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN
 
 %S = type { i32, i32 }

From 591c793b48e487382c30e352c0c3b7ae60b45a87 Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Thu, 23 May 2019 12:38:06 +0000
Subject: [PATCH 0027/1176] [llvm-objdump][test] Make test names consistent

This change renames a number of the disassembly tests to standardise
disasm/diassemble/disassembly to disassemble. Requested in
https://reviews.llvm.org/D62255.

llvm-svn: 361491
---
 ...-mangled-name.test => disassemble-functions-mangled-name.test} | 0
 .../{disasm-specific-funcs.test => disassemble-functions.test}    | 0
 .../X86/{disassembly-show-raw.test => disassemble-show-raw.test}  | 0
 .../llvm-objdump/X86/{disasm-text.test => disassemble-text.test}  | 0
 ...eroes-relocations.test => disassemble-zeroes-relocations.test} | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/tools/llvm-objdump/X86/{disasm-specific-funcs-mangled-name.test => disassemble-functions-mangled-name.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{disasm-specific-funcs.test => disassemble-functions.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{disassembly-show-raw.test => disassemble-show-raw.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{disasm-text.test => disassemble-text.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{disasm-zeroes-relocations.test => disassemble-zeroes-relocations.test} (100%)

diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test b/llvm/test/tools/llvm-objdump/X86/disassemble-functions-mangled-name.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs-mangled-name.test
rename to llvm/test/tools/llvm-objdump/X86/disassemble-functions-mangled-name.test
diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test b/llvm/test/tools/llvm-objdump/X86/disassemble-functions.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/disasm-specific-funcs.test
rename to llvm/test/tools/llvm-objdump/X86/disassemble-functions.test
diff --git a/llvm/test/tools/llvm-objdump/X86/disassembly-show-raw.test b/llvm/test/tools/llvm-objdump/X86/disassemble-show-raw.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/disassembly-show-raw.test
rename to llvm/test/tools/llvm-objdump/X86/disassemble-show-raw.test
diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-text.test b/llvm/test/tools/llvm-objdump/X86/disassemble-text.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/disasm-text.test
rename to llvm/test/tools/llvm-objdump/X86/disassemble-text.test
diff --git a/llvm/test/tools/llvm-objdump/X86/disasm-zeroes-relocations.test b/llvm/test/tools/llvm-objdump/X86/disassemble-zeroes-relocations.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/disasm-zeroes-relocations.test
rename to llvm/test/tools/llvm-objdump/X86/disassemble-zeroes-relocations.test

From a4c7873dac5f416ff3f03f627484fbcfe49f654e Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 23 May 2019 12:43:08 +0000
Subject: [PATCH 0028/1176] [llvm-objdump][test] Make MachO test names
 consistent

We have macho-disassembl{e,y}-*. Rename macho-disassembly-* to
macho-disassemble-* for consistency.

llvm-svn: 361492
---
 ...acho-disassembly-g-dsym.test => macho-disassemble-g-dsym.test} | 0
 ...assembly-kextbundle.test => macho-disassemble-kextbundle.test} | 0
 ...-disassembly-stripped.test => macho-disassemble-stripped.test} | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/tools/llvm-objdump/X86/{macho-disassembly-g-dsym.test => macho-disassemble-g-dsym.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{macho-disassembly-kextbundle.test => macho-disassemble-kextbundle.test} (100%)
 rename llvm/test/tools/llvm-objdump/X86/{macho-disassembly-stripped.test => macho-disassemble-stripped.test} (100%)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassembly-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/macho-disassembly-g-dsym.test
rename to llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassembly-kextbundle.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-kextbundle.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/macho-disassembly-kextbundle.test
rename to llvm/test/tools/llvm-objdump/X86/macho-disassemble-kextbundle.test
diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassembly-stripped.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-stripped.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/X86/macho-disassembly-stripped.test
rename to llvm/test/tools/llvm-objdump/X86/macho-disassemble-stripped.test

From 5dabe03b4178c8d276cc1732fed2813418513dce Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 23 May 2019 12:43:13 +0000
Subject: [PATCH 0029/1176] [RISCV][NFC] Add nounwind attribute to functions
 missing it in test/CodeGen/RISCV

r360897 was incomplete, must have applied an old/wip patch. This is in preparation for emitting CFI directives.

llvm-svn: 361493
---
 .../test/CodeGen/RISCV/addc-adde-sube-subc.ll |  4 +-
 llvm/test/CodeGen/RISCV/addcarry.ll           |  2 +-
 llvm/test/CodeGen/RISCV/alu64.ll              | 20 +++----
 .../test/CodeGen/RISCV/atomic-cmpxchg-flag.ll |  2 +-
 llvm/test/CodeGen/RISCV/bare-select.ll        |  4 +-
 llvm/test/CodeGen/RISCV/branch-relaxation.ll  |  4 +-
 llvm/test/CodeGen/RISCV/branch.ll             |  2 +-
 .../CodeGen/RISCV/get-setcc-result-type.ll    |  2 +-
 .../CodeGen/RISCV/hoist-global-addr-base.ll   | 17 +++---
 llvm/test/CodeGen/RISCV/inline-asm.ll         |  8 +--
 llvm/test/CodeGen/RISCV/jumptable.ll          |  2 +-
 llvm/test/CodeGen/RISCV/legalize-fneg.ll      |  6 +-
 llvm/test/CodeGen/RISCV/rotl-rotr.ll          |  4 +-
 .../test/CodeGen/RISCV/rv64i-tricky-shifts.ll |  6 +-
 llvm/test/CodeGen/RISCV/select-cc.ll          |  2 +-
 llvm/test/CodeGen/RISCV/sext-zext-trunc.ll    | 60 +++++++++----------
 llvm/test/CodeGen/RISCV/tail-calls.ll         | 18 +++---
 .../CodeGen/RISCV/zext-with-load-is-free.ll   |  4 +-
 18 files changed, 83 insertions(+), 84 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll
index 7c28df4c30ff3..068e52f6175eb 100644
--- a/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll
+++ b/llvm/test/CodeGen/RISCV/addc-adde-sube-subc.ll
@@ -4,7 +4,7 @@
 
 ; Ensure that the ISDOpcodes ADDC, ADDE, SUBC, SUBE are handled correctly
 
-define i64 @addc_adde(i64 %a, i64 %b) {
+define i64 @addc_adde(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: addc_adde:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    add a1, a1, a3
@@ -17,7 +17,7 @@ define i64 @addc_adde(i64 %a, i64 %b) {
   ret i64 %1
 }
 
-define i64 @subc_sube(i64 %a, i64 %b) {
+define i64 @subc_sube(i64 %a, i64 %b) nounwind {
 ; RV32I-LABEL: subc_sube:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    sub a1, a1, a3
diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll
index f409dac1a71a5..15fe53e9f24a7 100644
--- a/llvm/test/CodeGen/RISCV/addcarry.ll
+++ b/llvm/test/CodeGen/RISCV/addcarry.ll
@@ -6,7 +6,7 @@
 
 declare  i64 @llvm.smul.fix.i64  (i64, i64, i32)
 
-define i64 @addcarry(i64 %x, i64 %y) {
+define i64 @addcarry(i64 %x, i64 %y) nounwind {
 ; RISCV32-LABEL: addcarry:
 ; RISCV32:       # %bb.0:
 ; RISCV32-NEXT:    mul a4, a0, a3
diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
index fcf695ff2504b..0a44a380df72e 100644
--- a/llvm/test/CodeGen/RISCV/alu64.ll
+++ b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -375,7 +375,7 @@ define i64 @and(i64 %a, i64 %b) nounwind {
 
 ; RV64I-only instructions
 
-define signext i32 @addiw(i32 signext %a) {
+define signext i32 @addiw(i32 signext %a) nounwind {
 ; RV64I-LABEL: addiw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addiw a0, a0, 123
@@ -389,7 +389,7 @@ define signext i32 @addiw(i32 signext %a) {
   ret i32 %1
 }
 
-define signext i32 @slliw(i32 signext %a) {
+define signext i32 @slliw(i32 signext %a) nounwind {
 ; RV64I-LABEL: slliw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slliw a0, a0, 17
@@ -403,7 +403,7 @@ define signext i32 @slliw(i32 signext %a) {
   ret i32 %1
 }
 
-define signext i32 @srliw(i32 %a) {
+define signext i32 @srliw(i32 %a) nounwind {
 ; RV64I-LABEL: srliw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srliw a0, a0, 8
@@ -417,7 +417,7 @@ define signext i32 @srliw(i32 %a) {
   ret i32 %1
 }
 
-define signext i32 @sraiw(i32 %a) {
+define signext i32 @sraiw(i32 %a) nounwind {
 ; RV64I-LABEL: sraiw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sraiw a0, a0, 9
@@ -431,7 +431,7 @@ define signext i32 @sraiw(i32 %a) {
   ret i32 %1
 }
 
-define signext i32 @sextw(i32 zeroext %a) {
+define signext i32 @sextw(i32 zeroext %a) nounwind {
 ; RV64I-LABEL: sextw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a0, a0
@@ -443,7 +443,7 @@ define signext i32 @sextw(i32 zeroext %a) {
   ret i32 %a
 }
 
-define signext i32 @addw(i32 signext %a, i32 signext %b) {
+define signext i32 @addw(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: addw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addw a0, a0, a1
@@ -457,7 +457,7 @@ define signext i32 @addw(i32 signext %a, i32 signext %b) {
   ret i32 %1
 }
 
-define signext i32 @subw(i32 signext %a, i32 signext %b) {
+define signext i32 @subw(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: subw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    subw a0, a0, a1
@@ -471,7 +471,7 @@ define signext i32 @subw(i32 signext %a, i32 signext %b) {
   ret i32 %1
 }
 
-define signext i32 @sllw(i32 signext %a, i32 zeroext %b) {
+define signext i32 @sllw(i32 signext %a, i32 zeroext %b) nounwind {
 ; RV64I-LABEL: sllw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sllw a0, a0, a1
@@ -485,7 +485,7 @@ define signext i32 @sllw(i32 signext %a, i32 zeroext %b) {
   ret i32 %1
 }
 
-define signext i32 @srlw(i32 signext %a, i32 zeroext %b) {
+define signext i32 @srlw(i32 signext %a, i32 zeroext %b) nounwind {
 ; RV64I-LABEL: srlw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srlw a0, a0, a1
@@ -499,7 +499,7 @@ define signext i32 @srlw(i32 signext %a, i32 zeroext %b) {
   ret i32 %1
 }
 
-define signext i32 @sraw(i64 %a, i32 zeroext %b) {
+define signext i32 @sraw(i64 %a, i32 zeroext %b) nounwind {
 ; RV64I-LABEL: sraw:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sraw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
index e5d619dc369b3..b331b4b9926b6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll
@@ -7,7 +7,7 @@
 ; higher bits were masked to zero for the comparison.
 
 define i1 @cmpxchg_i32_seq_cst_seq_cst(i32* %ptr, i32 signext %cmp,
-        i32 signext %val) {
+        i32 signext %val) nounwind {
 ; RV64IA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
 ; RV64IA:       # %bb.0: # %entry
 ; RV64IA-NEXT:  .LBB0_1: # %entry
diff --git a/llvm/test/CodeGen/RISCV/bare-select.ll b/llvm/test/CodeGen/RISCV/bare-select.ll
index 59add65d1db78..1b8f2f63e99d5 100644
--- a/llvm/test/CodeGen/RISCV/bare-select.ll
+++ b/llvm/test/CodeGen/RISCV/bare-select.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32I
 
-define i32 @bare_select(i1 %a, i32 %b, i32 %c) {
+define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
 ; RV32I-LABEL: bare_select:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -16,7 +16,7 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) {
   ret i32 %1
 }
 
-define float @bare_select_float(i1 %a, float %b, float %c) {
+define float @bare_select_float(i1 %a, float %b, float %c) nounwind {
 ; RV32I-LABEL: bare_select_float:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index 2be50d10ae948..cd589dd9cab37 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -3,7 +3,7 @@
 ; RUN:   -o /dev/null 2>&1
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s
 
-define void @relax_bcc(i1 %a) {
+define void @relax_bcc(i1 %a) nounwind {
 ; CHECK-LABEL: relax_bcc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a0, a0, 1
@@ -25,7 +25,7 @@ tail:
   ret void
 }
 
-define i32 @relax_jal(i1 %a) {
+define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-LABEL: relax_jal:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll
index 71daf0e57b90e..e834499280328 100644
--- a/llvm/test/CodeGen/RISCV/branch.ll
+++ b/llvm/test/CodeGen/RISCV/branch.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 
-define void @foo(i32 %a, i32 *%b, i1 %c) {
+define void @foo(i32 %a, i32 *%b, i1 %c) nounwind {
 ; RV32I-LABEL: foo:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lw a3, 0(a1)
diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
index 507f04822b865..fe326b383ec0a 100644
--- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
+++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 
-define void @getSetCCResultType(<4 x i32>* %p, <4 x i32>* %q) {
+define void @getSetCCResultType(<4 x i32>* %p, <4 x i32>* %q) nounwind {
 ; RV32I-LABEL: getSetCCResultType:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lw a1, 12(a0)
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 2a1d5ed1a0844..b00873ce486d6 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -7,7 +7,7 @@
 @g = global [1048576 x i8] zeroinitializer, align 1
 
 
-define dso_local void @multiple_stores() local_unnamed_addr {
+define dso_local void @multiple_stores() local_unnamed_addr nounwind {
 ; CHECK-LABEL: multiple_stores:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s)
@@ -23,7 +23,7 @@ entry:
   ret void
 }
 
-define dso_local void @control_flow_with_mem_access() local_unnamed_addr #0 {
+define dso_local void @control_flow_with_mem_access() local_unnamed_addr nounwind {
 ; CHECK-LABEL: control_flow_with_mem_access:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s)
@@ -57,7 +57,7 @@ if.end:                                           ; preds = %if.then, %entry
 ; lui  a0, 18     ---> offset
 ; addi a0, a0, -160
 ; add  a0, a0, a1  ---> base + offset.
-define i8* @big_offset_neg_addi() {
+define i8* @big_offset_neg_addi() nounwind {
 ; CHECK-LABEL: big_offset_neg_addi:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lui a0, %hi(g+73568)
@@ -72,7 +72,7 @@ define i8* @big_offset_neg_addi() {
 ; addi a0, a0, %lo(g)
 ; lui  a1, 128     ---> offset
 ; add  a0, a0, a1  ---> base + offset.
-define i8* @big_offset_lui_tail() {
+define i8* @big_offset_lui_tail() nounwind {
 ; CHECK-LABEL: big_offset_lui_tail:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lui a0, %hi(g+524288)
@@ -81,7 +81,7 @@ define i8* @big_offset_lui_tail() {
   ret i8* getelementptr inbounds ([1048576 x i8], [1048576 x i8]* @g, i32 0, i32 524288)
 }
 
-define dso_local i32* @big_offset_one_use() local_unnamed_addr {
+define dso_local i32* @big_offset_one_use() local_unnamed_addr nounwind {
 ; CHECK-LABEL: big_offset_one_use:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s+16572)
@@ -91,7 +91,7 @@ entry:
   ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5)
 }
 
-define dso_local i32* @small_offset_one_use() local_unnamed_addr {
+define dso_local i32* @small_offset_one_use() local_unnamed_addr nounwind {
 ; CHECK-LABEL: small_offset_one_use:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s+160)
@@ -101,8 +101,7 @@ entry:
   ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1)
 }
 
-; Function Attrs: norecurse nounwind optsize readonly
-define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr #1 {
+define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr nounwind {
 ; CHECK-LABEL: control_flow_no_mem:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s)
@@ -156,7 +155,7 @@ if.end:
 
 declare void @abort()
 
-define dso_local void @one_store() local_unnamed_addr {
+define dso_local void @one_store() local_unnamed_addr nounwind {
 ; CHECK-LABEL: one_store:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(s+160)
diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll
index f17852cff25ae..5096d45e3ca08 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm.ll
@@ -6,7 +6,7 @@
 
 @gi = external global i32
 
-define i32 @constraint_r(i32 %a) {
+define i32 @constraint_r(i32 %a) nounwind {
 ; RV32I-LABEL: constraint_r:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a1, %hi(gi)
@@ -29,7 +29,7 @@ define i32 @constraint_r(i32 %a) {
   ret i32 %2
 }
 
-define i32 @constraint_i(i32 %a) {
+define i32 @constraint_i(i32 %a) nounwind {
 ; RV32I-LABEL: constraint_i:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    #APP
@@ -48,7 +48,7 @@ define i32 @constraint_i(i32 %a) {
   ret i32 %2
 }
 
-define void @constraint_m(i32* %a) {
+define void @constraint_m(i32* %a) nounwind {
 ; RV32I-LABEL: constraint_m:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    #APP
@@ -64,7 +64,7 @@ define void @constraint_m(i32* %a) {
   ret void
 }
 
-define i32 @constraint_m2(i32* %a) {
+define i32 @constraint_m2(i32* %a) nounwind {
 ; RV32I-LABEL: constraint_m2:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    #APP
diff --git a/llvm/test/CodeGen/RISCV/jumptable.ll b/llvm/test/CodeGen/RISCV/jumptable.ll
index f849dc959abc4..5a5f5b65111a1 100644
--- a/llvm/test/CodeGen/RISCV/jumptable.ll
+++ b/llvm/test/CodeGen/RISCV/jumptable.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32I
 
-define void @jt(i32 %in, i32* %out) {
+define void @jt(i32 %in, i32* %out) nounwind {
 ; RV32I-LABEL: jt:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    addi a2, zero, 2
diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
index 42440e4cb445f..d2d0b9ea6c2c6 100644
--- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll
+++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64 %s
 
-define void @test1(float* %a, float* %b) {
+define void @test1(float* %a, float* %b) nounwind {
 ; RV32-LABEL: test1:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    lw a1, 0(a1)
@@ -28,7 +28,7 @@ entry:
   ret void
 }
 
-define void @test2(double* %a, double* %b) {
+define void @test2(double* %a, double* %b) nounwind {
 ; RV32-LABEL: test2:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    lw a2, 4(a1)
@@ -54,7 +54,7 @@ entry:
   ret void
 }
 
-define void @test3(fp128* %a, fp128* %b) {
+define void @test3(fp128* %a, fp128* %b) nounwind {
 ; RV32-LABEL: test3:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    lw a2, 12(a1)
diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index 49b540eb2b1b3..d3f08804a3d4d 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -5,7 +5,7 @@
 ; These IR sequences will generate ISD::ROTL and ISD::ROTR nodes, that the
 ; RISC-V backend must be able to select
 
-define i32 @rotl(i32 %x, i32 %y) {
+define i32 @rotl(i32 %x, i32 %y) nounwind {
 ; RV32I-LABEL: rotl:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi a2, zero, 32
@@ -21,7 +21,7 @@ define i32 @rotl(i32 %x, i32 %y) {
   ret i32 %d
 }
 
-define i32 @rotr(i32 %x, i32 %y) {
+define i32 @rotr(i32 %x, i32 %y) nounwind {
 ; RV32I-LABEL: rotr:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi a2, zero, 32
diff --git a/llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll b/llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll
index 73eeed7553cce..b01833152ef41 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-tricky-shifts.ll
@@ -7,7 +7,7 @@
 ; patterns might make the mistake of assuming that a (sext_inreg foo, i32) can
 ; only be produced when sign-extending an i32 type.
 
-define i64 @tricky_shl(i64 %a, i64 %b) {
+define i64 @tricky_shl(i64 %a, i64 %b) nounwind {
 ; RV64I-LABEL: tricky_shl:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sll a0, a0, a1
@@ -19,7 +19,7 @@ define i64 @tricky_shl(i64 %a, i64 %b) {
   ret i64 %3
 }
 
-define i64 @tricky_lshr(i64 %a, i64 %b) {
+define i64 @tricky_lshr(i64 %a, i64 %b) nounwind {
 ; RV64I-LABEL: tricky_lshr:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a0, a0, 32
@@ -31,7 +31,7 @@ define i64 @tricky_lshr(i64 %a, i64 %b) {
   ret i64 %2
 }
 
-define i64 @tricky_ashr(i64 %a, i64 %b) {
+define i64 @tricky_ashr(i64 %a, i64 %b) nounwind {
 ; RV64I-LABEL: tricky_ashr:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a0, a0
diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll
index 96eb6ff101070..7faad3bbc095e 100644
--- a/llvm/test/CodeGen/RISCV/select-cc.ll
+++ b/llvm/test/CodeGen/RISCV/select-cc.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -disable-block-placement -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 
-define i32 @foo(i32 %a, i32 *%b) {
+define i32 @foo(i32 %a, i32 *%b) nounwind {
 ; RV32I-LABEL: foo:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lw a2, 0(a1)
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index 280c68e2ab496..53e68f5e95d73 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I
 
-define i8 @sext_i1_to_i8(i1 %a) {
+define i8 @sext_i1_to_i8(i1 %a) nounwind {
 ; RV32I-LABEL: sext_i1_to_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -20,7 +20,7 @@ define i8 @sext_i1_to_i8(i1 %a) {
   ret i8 %1
 }
 
-define i16 @sext_i1_to_i16(i1 %a) {
+define i16 @sext_i1_to_i16(i1 %a) nounwind {
 ; RV32I-LABEL: sext_i1_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -36,7 +36,7 @@ define i16 @sext_i1_to_i16(i1 %a) {
   ret i16 %1
 }
 
-define i32 @sext_i1_to_i32(i1 %a) {
+define i32 @sext_i1_to_i32(i1 %a) nounwind {
 ; RV32I-LABEL: sext_i1_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -52,7 +52,7 @@ define i32 @sext_i1_to_i32(i1 %a) {
   ret i32 %1
 }
 
-define i64 @sext_i1_to_i64(i1 %a) {
+define i64 @sext_i1_to_i64(i1 %a) nounwind {
 ; RV32I-LABEL: sext_i1_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -69,7 +69,7 @@ define i64 @sext_i1_to_i64(i1 %a) {
   ret i64 %1
 }
 
-define i16 @sext_i8_to_i16(i8 %a) {
+define i16 @sext_i8_to_i16(i8 %a) nounwind {
 ; RV32I-LABEL: sext_i8_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a0, a0, 24
@@ -85,7 +85,7 @@ define i16 @sext_i8_to_i16(i8 %a) {
   ret i16 %1
 }
 
-define i32 @sext_i8_to_i32(i8 %a) {
+define i32 @sext_i8_to_i32(i8 %a) nounwind {
 ; RV32I-LABEL: sext_i8_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a0, a0, 24
@@ -101,7 +101,7 @@ define i32 @sext_i8_to_i32(i8 %a) {
   ret i32 %1
 }
 
-define i64 @sext_i8_to_i64(i8 %a) {
+define i64 @sext_i8_to_i64(i8 %a) nounwind {
 ; RV32I-LABEL: sext_i8_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 24
@@ -118,7 +118,7 @@ define i64 @sext_i8_to_i64(i8 %a) {
   ret i64 %1
 }
 
-define i32 @sext_i16_to_i32(i16 %a) {
+define i32 @sext_i16_to_i32(i16 %a) nounwind {
 ; RV32I-LABEL: sext_i16_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a0, a0, 16
@@ -134,7 +134,7 @@ define i32 @sext_i16_to_i32(i16 %a) {
   ret i32 %1
 }
 
-define i64 @sext_i16_to_i64(i16 %a) {
+define i64 @sext_i16_to_i64(i16 %a) nounwind {
 ; RV32I-LABEL: sext_i16_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 16
@@ -151,7 +151,7 @@ define i64 @sext_i16_to_i64(i16 %a) {
   ret i64 %1
 }
 
-define i64 @sext_i32_to_i64(i32 %a) {
+define i64 @sext_i32_to_i64(i32 %a) nounwind {
 ; RV32I-LABEL: sext_i32_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srai a1, a0, 31
@@ -165,7 +165,7 @@ define i64 @sext_i32_to_i64(i32 %a) {
   ret i64 %1
 }
 
-define i8 @zext_i1_to_i8(i1 %a) {
+define i8 @zext_i1_to_i8(i1 %a) nounwind {
 ; RV32I-LABEL: zext_i1_to_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -179,7 +179,7 @@ define i8 @zext_i1_to_i8(i1 %a) {
   ret i8 %1
 }
 
-define i16 @zext_i1_to_i16(i1 %a) {
+define i16 @zext_i1_to_i16(i1 %a) nounwind {
 ; RV32I-LABEL: zext_i1_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -193,7 +193,7 @@ define i16 @zext_i1_to_i16(i1 %a) {
   ret i16 %1
 }
 
-define i32 @zext_i1_to_i32(i1 %a) {
+define i32 @zext_i1_to_i32(i1 %a) nounwind {
 ; RV32I-LABEL: zext_i1_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -207,7 +207,7 @@ define i32 @zext_i1_to_i32(i1 %a) {
   ret i32 %1
 }
 
-define i64 @zext_i1_to_i64(i1 %a) {
+define i64 @zext_i1_to_i64(i1 %a) nounwind {
 ; RV32I-LABEL: zext_i1_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 1
@@ -222,7 +222,7 @@ define i64 @zext_i1_to_i64(i1 %a) {
   ret i64 %1
 }
 
-define i16 @zext_i8_to_i16(i8 %a) {
+define i16 @zext_i8_to_i16(i8 %a) nounwind {
 ; RV32I-LABEL: zext_i8_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 255
@@ -236,7 +236,7 @@ define i16 @zext_i8_to_i16(i8 %a) {
   ret i16 %1
 }
 
-define i32 @zext_i8_to_i32(i8 %a) {
+define i32 @zext_i8_to_i32(i8 %a) nounwind {
 ; RV32I-LABEL: zext_i8_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 255
@@ -250,7 +250,7 @@ define i32 @zext_i8_to_i32(i8 %a) {
   ret i32 %1
 }
 
-define i64 @zext_i8_to_i64(i8 %a) {
+define i64 @zext_i8_to_i64(i8 %a) nounwind {
 ; RV32I-LABEL: zext_i8_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a0, a0, 255
@@ -265,7 +265,7 @@ define i64 @zext_i8_to_i64(i8 %a) {
   ret i64 %1
 }
 
-define i32 @zext_i16_to_i32(i16 %a) {
+define i32 @zext_i16_to_i32(i16 %a) nounwind {
 ; RV32I-LABEL: zext_i16_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a1, 16
@@ -283,7 +283,7 @@ define i32 @zext_i16_to_i32(i16 %a) {
   ret i32 %1
 }
 
-define i64 @zext_i16_to_i64(i16 %a) {
+define i64 @zext_i16_to_i64(i16 %a) nounwind {
 ; RV32I-LABEL: zext_i16_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a1, 16
@@ -302,7 +302,7 @@ define i64 @zext_i16_to_i64(i16 %a) {
   ret i64 %1
 }
 
-define i64 @zext_i32_to_i64(i32 %a) {
+define i64 @zext_i32_to_i64(i32 %a) nounwind {
 ; RV32I-LABEL: zext_i32_to_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    mv a1, zero
@@ -317,7 +317,7 @@ define i64 @zext_i32_to_i64(i32 %a) {
   ret i64 %1
 }
 
-define i1 @trunc_i8_to_i1(i8 %a) {
+define i1 @trunc_i8_to_i1(i8 %a) nounwind {
 ; RV32I-LABEL: trunc_i8_to_i1:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -329,7 +329,7 @@ define i1 @trunc_i8_to_i1(i8 %a) {
   ret i1 %1
 }
 
-define i1 @trunc_i16_to_i1(i16 %a) {
+define i1 @trunc_i16_to_i1(i16 %a) nounwind {
 ; RV32I-LABEL: trunc_i16_to_i1:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -341,7 +341,7 @@ define i1 @trunc_i16_to_i1(i16 %a) {
   ret i1 %1
 }
 
-define i1 @trunc_i32_to_i1(i32 %a) {
+define i1 @trunc_i32_to_i1(i32 %a) nounwind {
 ; RV32I-LABEL: trunc_i32_to_i1:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -353,7 +353,7 @@ define i1 @trunc_i32_to_i1(i32 %a) {
   ret i1 %1
 }
 
-define i1 @trunc_i64_to_i1(i64 %a) {
+define i1 @trunc_i64_to_i1(i64 %a) nounwind {
 ; RV32I-LABEL: trunc_i64_to_i1:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -365,7 +365,7 @@ define i1 @trunc_i64_to_i1(i64 %a) {
   ret i1 %1
 }
 
-define i8 @trunc_i16_to_i8(i16 %a) {
+define i8 @trunc_i16_to_i8(i16 %a) nounwind {
 ; RV32I-LABEL: trunc_i16_to_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -377,7 +377,7 @@ define i8 @trunc_i16_to_i8(i16 %a) {
   ret i8 %1
 }
 
-define i8 @trunc_i32_to_i8(i32 %a) {
+define i8 @trunc_i32_to_i8(i32 %a) nounwind {
 ; RV32I-LABEL: trunc_i32_to_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -389,7 +389,7 @@ define i8 @trunc_i32_to_i8(i32 %a) {
   ret i8 %1
 }
 
-define i8 @trunc_i64_to_i8(i64 %a) {
+define i8 @trunc_i64_to_i8(i64 %a) nounwind {
 ; RV32I-LABEL: trunc_i64_to_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -401,7 +401,7 @@ define i8 @trunc_i64_to_i8(i64 %a) {
   ret i8 %1
 }
 
-define i16 @trunc_i32_to_i16(i32 %a) {
+define i16 @trunc_i32_to_i16(i32 %a) nounwind {
 ; RV32I-LABEL: trunc_i32_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -413,7 +413,7 @@ define i16 @trunc_i32_to_i16(i32 %a) {
   ret i16 %1
 }
 
-define i16 @trunc_i64_to_i16(i64 %a) {
+define i16 @trunc_i64_to_i16(i64 %a) nounwind {
 ; RV32I-LABEL: trunc_i64_to_i16:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
@@ -425,7 +425,7 @@ define i16 @trunc_i64_to_i16(i64 %a) {
   ret i16 %1
 }
 
-define i32 @trunc_i64_to_i32(i64 %a) {
+define i32 @trunc_i64_to_i32(i64 %a) nounwind {
 ; RV32I-LABEL: trunc_i64_to_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 4d7db01d1fb7f..dea1521b05c49 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -3,7 +3,7 @@
 
 ; Perform tail call optimization for global address.
 declare i32 @callee_tail(i32 %i)
-define i32 @caller_tail(i32 %i) {
+define i32 @caller_tail(i32 %i) nounwind {
 ; CHECK-LABEL: caller_tail
 ; CHECK: tail callee_tail
 entry:
@@ -26,7 +26,7 @@ entry:
 ; Perform indirect tail call optimization (for function pointer call).
 declare void @callee_indirect1()
 declare void @callee_indirect2()
-define void @caller_indirect_tail(i32 %a) {
+define void @caller_indirect_tail(i32 %a) nounwind {
 ; CHECK-LABEL: caller_indirect_tail
 ; CHECK-NOT: call callee_indirect1
 ; CHECK-NOT: call callee_indirect2
@@ -49,7 +49,7 @@ entry:
 
 ; Do not tail call optimize functions with varargs.
 declare i32 @callee_varargs(i32, ...)
-define void @caller_varargs(i32 %a, i32 %b) {
+define void @caller_varargs(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: caller_varargs
 ; CHECK-NOT: tail callee_varargs
 ; CHECK: call callee_varargs
@@ -60,7 +60,7 @@ entry:
 
 ; Do not tail call optimize if stack is used to pass parameters.
 declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
-define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) {
+define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
 ; CHECK-LABEL: caller_args
 ; CHECK-NOT: tail callee_args
 ; CHECK: call callee_args
@@ -71,7 +71,7 @@ entry:
 
 ; Do not tail call optimize if parameters need to be passed indirectly.
 declare i32 @callee_indirect_args(fp128 %a)
-define void @caller_indirect_args() {
+define void @caller_indirect_args() nounwind {
 ; CHECK-LABEL: caller_indirect_args
 ; CHECK-NOT: tail callee_indirect_args
 ; CHECK: call callee_indirect_args
@@ -85,7 +85,7 @@ entry:
 ; calls) is implementation-defined, so we cannot rely on the linker replacing
 ; the tail call with a return.
 declare extern_weak void @callee_weak()
-define void @caller_weak() {
+define void @caller_weak() nounwind {
 ; CHECK-LABEL: caller_weak
 ; CHECK-NOT: tail callee_weak
 ; CHECK: call callee_weak
@@ -112,7 +112,7 @@ attributes #0 = { "interrupt"="machine" }
 ; we want to reuse during a tail call. Do not tail call optimize functions with
 ; byval parameters.
 declare i32 @callee_byval(i32** byval %a)
-define i32 @caller_byval() {
+define i32 @caller_byval() nounwind {
 ; CHECK-LABEL: caller_byval
 ; CHECK-NOT: tail callee_byval
 ; CHECK: call callee_byval
@@ -127,7 +127,7 @@ entry:
 @a = global %struct.A zeroinitializer
 
 declare void @callee_struct(%struct.A* sret %a)
-define void @caller_nostruct() {
+define void @caller_nostruct() nounwind {
 ; CHECK-LABEL: caller_nostruct
 ; CHECK-NOT: tail callee_struct
 ; CHECK: call callee_struct
@@ -138,7 +138,7 @@ entry:
 
 ; Do not tail call optimize if caller uses structret semantics.
 declare void @callee_nostruct()
-define void @caller_struct(%struct.A* sret %a) {
+define void @caller_struct(%struct.A* sret %a) nounwind {
 ; CHECK-LABEL: caller_struct
 ; CHECK-NOT: tail callee_nostruct
 ; CHECK: call callee_nostruct
diff --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
index 9ea4c36a866e3..12c7796995a81 100644
--- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
+++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
@@ -6,7 +6,7 @@
 
 @bytes = global [5 x i8] zeroinitializer, align 1
 
-define i32 @test_zext_i8() {
+define i32 @test_zext_i8() nounwind {
 ; RV32I-LABEL: test_zext_i8:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(bytes)
@@ -41,7 +41,7 @@ if.end:
 
 @shorts = global [5 x i16] zeroinitializer, align 2
 
-define i32 @test_zext_i16() {
+define i32 @test_zext_i16() nounwind {
 ; RV32I-LABEL: test_zext_i16:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(shorts)

From 6b48742e43f603b6458ed02dd4dc5d8021922a21 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 23 May 2019 13:11:00 +0000
Subject: [PATCH 0030/1176] [libcxx][tests] Fix order checking in
 unordered_multiset tests.

Some tests assume that iteration through an unordered multiset elements
will return them in the same order as at the container creation. This
assumption is not true since the container is unordered, so that no
specific order of elements is ever guaranteed for such container. This
patch introduces checks verifying that any iteration will return
elements exactly from a set of valid values and without repetition,
but in no particular order.

Thanks to Andrey Maksimov for  the patch.

Differential Revision: https://reviews.llvm.org/D56500

llvm-svn: 361494
---
 .../unord.multiset.cnstr/assign_copy.pass.cpp | 49 +++++--------------
 .../unord.multiset.cnstr/assign_move.pass.cpp | 17 ++-----
 .../unord.multiset.cnstr/copy.pass.cpp        | 49 +++++--------------
 .../unord.multiset.cnstr/copy_alloc.pass.cpp  | 33 ++++---------
 .../unord.multiset.cnstr/move_alloc.pass.cpp  | 17 ++-----
 5 files changed, 45 insertions(+), 120 deletions(-)

diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_copy.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_copy.pass.cpp
index 1fcd71002e955..1f8cc68b38235 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_copy.pass.cpp
@@ -22,6 +22,7 @@
 #include <cstddef>
 
 #include "test_macros.h"
+#include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
@@ -61,18 +62,10 @@ int main(int, char**)
         c = c0;
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == A(4));
@@ -133,18 +126,10 @@ int main(int, char**)
         c = c0;
         assert(c.bucket_count() >= 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == A(10));
@@ -187,18 +172,10 @@ int main(int, char**)
         c = c0;
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == A());
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_move.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_move.pass.cpp
index 1ed77851cf3f2..f375baeaf2c61 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_move.pass.cpp
@@ -23,6 +23,7 @@
 #include <cstddef>
 
 #include "test_macros.h"
+#include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
@@ -62,18 +63,10 @@ int main(int, char**)
         c = std::move(c0);
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == A(4));
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy.pass.cpp
index f3ca15241ad9d..aa2edf1de4415 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy.pass.cpp
@@ -21,6 +21,7 @@
 #include <cstddef>
 
 #include "test_macros.h"
+#include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
@@ -53,18 +54,10 @@ int main(int, char**)
         C c = c0;
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == test_allocator<int>(10));
@@ -100,18 +93,10 @@ int main(int, char**)
         C c = c0;
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == other_allocator<int>(-2));
@@ -146,18 +131,10 @@ int main(int, char**)
         C c = c0;
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == min_allocator<int>());
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy_alloc.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy_alloc.pass.cpp
index 4e99490ea6111..d0579292af401 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/copy_alloc.pass.cpp
@@ -21,6 +21,7 @@
 #include <cstddef>
 
 #include "test_macros.h"
+#include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
@@ -53,18 +54,10 @@ int main(int, char**)
         C c(c0, test_allocator<int>(5));
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == test_allocator<int>(5));
@@ -100,18 +93,10 @@ int main(int, char**)
         C c(c0, min_allocator<int>());
         LIBCPP_ASSERT(c.bucket_count() == 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == min_allocator<int>());
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.pass.cpp
index eb567a9da7533..3ed04574bb0f1 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.pass.cpp
@@ -23,6 +23,7 @@
 #include <cstddef>
 
 #include "test_macros.h"
+#include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
@@ -56,18 +57,10 @@ int main(int, char**)
         C c(std::move(c0), A(12));
         assert(c.bucket_count() >= 7);
         assert(c.size() == 6);
-        C::const_iterator i = c.cbegin();
-        assert(*i == 1);
-        ++i;
-        assert(*i == 1);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 2);
-        ++i;
-        assert(*i == 3);
-        ++i;
-        assert(*i == 4);
+        CheckConsecutiveValues<C::const_iterator>(c.find(1), c.end(), 1, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(2), c.end(), 2, 2);
+        CheckConsecutiveValues<C::const_iterator>(c.find(3), c.end(), 3, 1);
+        CheckConsecutiveValues<C::const_iterator>(c.find(4), c.end(), 4, 1);
         assert(c.hash_function() == test_hash<std::hash<int> >(8));
         assert(c.key_eq() == test_compare<std::equal_to<int> >(9));
         assert(c.get_allocator() == A(12));

From 46806749ac3a334bdcabb59b7082471aaf19a28a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 23 May 2019 13:30:10 +0000
Subject: [PATCH 0031/1176] [X86] Regenerate LZCNT tests on x86/x32/x64 targets

llvm-svn: 361495
---
 llvm/test/CodeGen/X86/lzcnt.ll | 171 +++++++++++++++++++++++++++++----
 1 file changed, 154 insertions(+), 17 deletions(-)

diff --git a/llvm/test/CodeGen/X86/lzcnt.ll b/llvm/test/CodeGen/X86/lzcnt.ll
index 1f0c6b3da2bf5..b85c1c388e467 100644
--- a/llvm/test/CodeGen/X86/lzcnt.ll
+++ b/llvm/test/CodeGen/X86/lzcnt.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32  -mattr=+lzcnt | FileCheck %s --check-prefixes=CHECK,X32
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s --check-prefixes=CHECK,X64
 
 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
@@ -6,57 +9,191 @@ declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i8 @t1(i8 %x) nounwind  {
+; X86-LABEL: t1:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    lzcntl %eax, %eax
+; X86-NEXT:    addl $-24, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t1:
+; X32:       # %bb.0:
+; X32-NEXT:    movzbl %dil, %eax
+; X32-NEXT:    lzcntl %eax, %eax
+; X32-NEXT:    addl $-24, %eax
+; X32-NEXT:    # kill: def $al killed $al killed $eax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t1:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    lzcntl %eax, %eax
+; X64-NEXT:    addl $-24, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
 	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
 	ret i8 %tmp
-; CHECK-LABEL: t1:
-; CHECK: lzcntl
 }
 
 define i16 @t2(i16 %x) nounwind  {
+; X86-LABEL: t2:
+; X86:       # %bb.0:
+; X86-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t2:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntw %di, %ax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t2:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntw %di, %ax
+; X64-NEXT:    retq
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
-; CHECK-LABEL: t2:
-; CHECK: lzcntw
 }
 
 define i32 @t3(i32 %x) nounwind  {
+; X86-LABEL: t3:
+; X86:       # %bb.0:
+; X86-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t3:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntl %edi, %eax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t3:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntl %edi, %eax
+; X64-NEXT:    retq
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
 	ret i32 %tmp
-; CHECK-LABEL: t3:
-; CHECK: lzcntl
 }
 
 define i64 @t4(i64 %x) nounwind  {
+; X86-LABEL: t4:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB3_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB3_1:
+; X86-NEXT:    lzcntl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+;
+; X32-LABEL: t4:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntq %rdi, %rax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t4:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntq %rdi, %rax
+; X64-NEXT:    retq
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK-LABEL: t4:
-; CHECK: lzcntq
 }
 
 define i8 @t5(i8 %x) nounwind  {
+; X86-LABEL: t5:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    lzcntl %eax, %eax
+; X86-NEXT:    addl $-24, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t5:
+; X32:       # %bb.0:
+; X32-NEXT:    movzbl %dil, %eax
+; X32-NEXT:    lzcntl %eax, %eax
+; X32-NEXT:    addl $-24, %eax
+; X32-NEXT:    # kill: def $al killed $al killed $eax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t5:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    lzcntl %eax, %eax
+; X64-NEXT:    addl $-24, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
 	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK-LABEL: t5:
-; CHECK: lzcntl
 }
 
 define i16 @t6(i16 %x) nounwind  {
+; X86-LABEL: t6:
+; X86:       # %bb.0:
+; X86-NEXT:    lzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t6:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntw %di, %ax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t6:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntw %di, %ax
+; X64-NEXT:    retq
 	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
 	ret i16 %tmp
-; CHECK-LABEL: t6:
-; CHECK: lzcntw
 }
 
 define i32 @t7(i32 %x) nounwind  {
+; X86-LABEL: t7:
+; X86:       # %bb.0:
+; X86-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X32-LABEL: t7:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntl %edi, %eax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t7:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntl %edi, %eax
+; X64-NEXT:    retq
 	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
 	ret i32 %tmp
-; CHECK-LABEL: t7:
-; CHECK: lzcntl
 }
 
 define i64 @t8(i64 %x) nounwind  {
+; X86-LABEL: t8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    jne .LBB7_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    lzcntl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $32, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB7_1:
+; X86-NEXT:    lzcntl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    retl
+;
+; X32-LABEL: t8:
+; X32:       # %bb.0:
+; X32-NEXT:    lzcntq %rdi, %rax
+; X32-NEXT:    retq
+;
+; X64-LABEL: t8:
+; X64:       # %bb.0:
+; X64-NEXT:    lzcntq %rdi, %rax
+; X64-NEXT:    retq
 	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
 	ret i64 %tmp
-; CHECK-LABEL: t8:
-; CHECK: lzcntq
 }

From dd0d9e01eeaab849701cce1a22342b04e79df54b Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Thu, 23 May 2019 13:42:47 +0000
Subject: [PATCH 0032/1176] [MCA] Introduce class LSUnitBase and let LSUnit
 derive from it.

Class LSUnitBase provides a abstract interface for all the concrete LS units in
llvm-mca.

Methods exposed by the public abstract LSUnitBase interface are:
 - Status isAvailable(const InstRef&);
 - void dispatch(const InstRef &);
 - const InstRef &isReady(const InstRef &);

LSUnitBase standardises the API, but not the data structures internally used by
LS units. This allows for more flexibility.
Previously, only method `isReady()` was declared virtual by class LSUnit.
Also, derived classes had to inherit all the internal data members of LSUnit.

No functional change intended.

llvm-svn: 361496
---
 llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 174 +++++++++++++------
 llvm/lib/MCA/HardwareUnits/LSUnit.cpp        | 131 +++++++-------
 llvm/lib/MCA/HardwareUnits/Scheduler.cpp     |  17 +-
 3 files changed, 196 insertions(+), 126 deletions(-)

diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
index c9e1cc3cdc4f1..e2ba9cbbf8bee 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -18,13 +18,83 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
 
 namespace llvm {
 namespace mca {
 
-class InstRef;
 class Scheduler;
 
+/// Abstract base interface for LS (load/store) units in llvm-mca.
+class LSUnitBase : public HardwareUnit {
+  /// Load queue size.
+  ///
+  /// A value of zero for this field means that the load queue is unbounded.
+  /// Processor models can declare the size of a load queue via tablegen (see
+  /// the definition of tablegen class LoadQueue in
+  /// llvm/Target/TargetSchedule.td).
+  unsigned LQSize;
+
+  /// Load queue size.
+  ///
+  /// A value of zero for this field means that the store queue is unbounded.
+  /// Processor models can declare the size of a store queue via tablegen (see
+  /// the definition of tablegen class StoreQueue in
+  /// llvm/Target/TargetSchedule.td).
+  unsigned SQSize;
+
+  /// True if loads don't alias with stores.
+  ///
+  /// By default, the LS unit assumes that loads and stores don't alias with
+  /// eachother. If this field is set to false, then loads are always assumed to
+  /// alias with stores.
+  const bool NoAlias;
+
+public:
+  LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
+             unsigned StoreQueueSize, bool AssumeNoAlias);
+
+  virtual ~LSUnitBase();
+
+  /// Returns the total number of entries in the load queue.
+  unsigned getLoadQueueSize() const { return LQSize; }
+
+  /// Returns the total number of entries in the store queue.
+  unsigned getStoreQueueSize() const { return SQSize; }
+
+  bool assumeNoAlias() const { return NoAlias; }
+
+  enum Status {
+    LSU_AVAILABLE = 0,
+    LSU_LQUEUE_FULL, // Load Queue unavailable
+    LSU_SQUEUE_FULL  // Store Queue unavailable
+  };
+
+  /// This method checks the availability of the load/store buffers.
+  ///
+  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+  /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
+  /// not a memory operation.
+  virtual Status isAvailable(const InstRef &IR) const = 0;
+
+  /// Allocates LS resources for instruction IR.
+  ///
+  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+  /// with a LSUnitBase::Status value of LSU_AVAILABLE.
+  virtual void dispatch(const InstRef &IR) = 0;
+
+  /// Check if a peviously dispatched instruction IR is now ready for execution.
+  ///
+  /// Instruction IR is assumed to be a memory operation. If IR is still waiting
+  /// on another memory instruction M, then M is returned to the caller. If IR
+  /// depends on more than one memory operations, then this method returns one
+  /// of them.
+  ///
+  /// Derived classes can implement memory consistency rules for simulated
+  /// processor within this member function.
+  virtual const InstRef &isReady(const InstRef &IR) const = 0;
+};
+
 /// A Load/Store Unit implementing a load and store queues.
 ///
 /// This class implements a load queue and a store queue to emulate the
@@ -88,18 +158,7 @@ class Scheduler;
 /// A load/store barrier is "executed" when it becomes the oldest entry in
 /// the load/store queue(s). That also means, all the older loads/stores have
 /// already been executed.
-class LSUnit : public HardwareUnit {
-  // Load queue size.
-  // LQ_Size == 0 means that there are infinite slots in the load queue.
-  unsigned LQ_Size;
-
-  // Store queue size.
-  // SQ_Size == 0 means that there are infinite slots in the store queue.
-  unsigned SQ_Size;
-
-  // If true, loads will never alias with stores. This is the default.
-  bool NoAlias;
-
+class LSUnit : public LSUnitBase {
   // When a `MayLoad` instruction is dispatched to the schedulers for execution,
   // the LSUnit reserves an entry in the `LoadQueue` for it.
   //
@@ -138,68 +197,75 @@ class LSUnit : public HardwareUnit {
   // alternative approaches that let instructions specify the number of
   // load/store queue entries which they consume at dispatch stage (See
   // PR39830).
-  SmallSet<unsigned, 16> LoadQueue;
-  SmallSet<unsigned, 16> StoreQueue;
+  SmallSet<InstRef, 16> LoadQueue;
+  SmallSet<InstRef, 16> StoreQueue;
 
-  void assignLQSlot(unsigned Index);
-  void assignSQSlot(unsigned Index);
+  void assignLQSlot(const InstRef &IR);
+  void assignSQSlot(const InstRef &IR);
 
   // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a store barrier. It forces older store to be
   // executed before newer stores are issued.
-  SmallSet<unsigned, 8> StoreBarriers;
+  SmallSet<InstRef, 8> StoreBarriers;
 
   // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a load barrier. It forces older loads to execute
   // before newer loads are issued.
-  SmallSet<unsigned, 8> LoadBarriers;
+  SmallSet<InstRef, 8> LoadBarriers;
 
   bool isSQEmpty() const { return StoreQueue.empty(); }
   bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
-  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+  bool isSQFull() const {
+    return getStoreQueueSize() != 0 && StoreQueue.size() == getStoreQueueSize();
+  }
+  bool isLQFull() const {
+    return getLoadQueueSize() != 0 && LoadQueue.size() == getLoadQueueSize();
+  }
 
 public:
-  LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
-         bool AssumeNoAlias = false);
+  LSUnit(const MCSchedModel &SM)
+      : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
+  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
+      : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
+  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
+      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias) {}
 
 #ifndef NDEBUG
   void dump() const;
 #endif
 
-  enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+  /// accomodate instruction IR.
+  Status isAvailable(const InstRef &IR) const override;
 
-  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
-  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
-  Status isAvailable(const InstRef &IR) const;
+  /// Allocates LS resources for instruction IR.
+  ///
+  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+  /// returning LSU_AVAILABLE.
+  void dispatch(const InstRef &IR) override;
 
-  // Allocates load/store queue resources for IR.
-  //
-  // This method assumes that a previous call to `isAvailable(IR)` returned
-  // LSU_AVAILABLE, and that IR is a memory operation.
-  void dispatch(const InstRef &IR);
-
-  // By default, rules are:
-  // 1. A store may not pass a previous store.
-  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
-  // 3. A load may pass a previous load.
-  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
-  // 5. A load has to wait until an older load barrier is fully executed.
-  // 6. A store has to wait until an older store barrier is fully executed.
-  //
-  // Returns an instruction identifier. If IR is ready, then this method returns
-  // `IR.getSourceIndex()`. Otherwise it returns the instruction ID of the
-  // dependent (i.e. conflicting) memory instruction.
-  virtual unsigned isReady(const InstRef &IR) const;
-
-  // Load and store instructions are tracked by their corresponding queues from
-  // dispatch until the "instruction executed" event.
-  // Only when a load instruction reaches the 'Executed' stage, its value
-  // becomes available to the users. At that point, the load no longer needs to
-  // be tracked by the load queue.
-  // FIXME: For simplicity, we optimistically assume a similar behavior for
-  // store instructions. In practice, store operations don't tend to leave the
-  // store queue until they reach the 'Retired' stage (See PR39830).
+  /// Check if a peviously dispatched instruction IR is now ready for execution.
+  ///
+  /// Rules are:
+  /// By default, rules are:
+  /// 1. A store may not pass a previous store.
+  /// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+  /// 3. A load may pass a previous load.
+  /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+  /// 5. A load has to wait until an older load barrier is fully executed.
+  /// 6. A store has to wait until an older store barrier is fully executed.
+  const InstRef &isReady(const InstRef &IR) const override;
+
+  /// Instruction executed event handler.
+  ///
+  /// Load and store instructions are tracked by their corresponding queues from
+  /// dispatch until "instruction executed" event.
+  /// When a load instruction Ld reaches the 'Executed' stage, its value
+  /// is propagated to all the dependent users, and the LS unit stops tracking
+  /// Ld.
+  /// FIXME: For simplicity, we optimistically assume a similar behavior for
+  /// store instructions. In practice, store operations don't tend to leave the
+  /// store queue until they reach the 'Retired' stage (See PR39830).
   void onInstructionExecuted(const InstRef &IR);
 };
 
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
index 4f49fbd2bb401..c3866d6bba7a0 100644
--- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -21,48 +21,48 @@
 namespace llvm {
 namespace mca {
 
-LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
-               bool AssumeNoAlias)
-    : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+                       bool AssumeNoAlias)
+    : LQSize(LQ), SQSize(SQ), NoAlias(AssumeNoAlias) {
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
-    if (!LQ_Size && EPI.LoadQueueID) {
+    if (!LQSize && EPI.LoadQueueID) {
       const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
-      LQ_Size = LdQDesc.BufferSize;
+      LQSize = LdQDesc.BufferSize;
     }
 
-    if (!SQ_Size && EPI.StoreQueueID) {
+    if (!SQSize && EPI.StoreQueueID) {
       const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
-      SQ_Size = StQDesc.BufferSize;
+      SQSize = StQDesc.BufferSize;
     }
   }
 }
 
+LSUnitBase::~LSUnitBase() {}
+
 #ifndef NDEBUG
 void LSUnit::dump() const {
-  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
-  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+  dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
+  dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
   dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
   dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
 }
 #endif
 
-void LSUnit::assignLQSlot(unsigned Index) {
-  assert(!isLQFull());
-  assert(LoadQueue.count(Index) == 0);
+void LSUnit::assignLQSlot(const InstRef &IR) {
+  assert(!isLQFull() && "Load Queue is full!");
 
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << IR.getSourceIndex()
                     << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(Index);
+  LoadQueue.insert(IR);
 }
 
-void LSUnit::assignSQSlot(unsigned Index) {
-  assert(!isSQFull());
-  assert(StoreQueue.count(Index) == 0);
+void LSUnit::assignSQSlot(const InstRef &IR) {
+  assert(!isSQFull() && "Store Queue is full!");
 
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << IR.getSourceIndex()
                     << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(Index);
+  StoreQueue.insert(IR);
 }
 
 void LSUnit::dispatch(const InstRef &IR) {
@@ -70,17 +70,16 @@ void LSUnit::dispatch(const InstRef &IR) {
   unsigned IsMemBarrier = Desc.HasSideEffects;
   assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
 
-  const unsigned Index = IR.getSourceIndex();
   if (Desc.MayLoad) {
     if (IsMemBarrier)
-      LoadBarriers.insert(Index);
-    assignLQSlot(Index);
+      LoadBarriers.insert(IR);
+    assignLQSlot(IR);
   }
 
   if (Desc.MayStore) {
     if (IsMemBarrier)
-      StoreBarriers.insert(Index);
-    assignSQSlot(Index);
+      StoreBarriers.insert(IR);
+    assignSQSlot(IR);
   }
 }
 
@@ -93,65 +92,67 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
   return LSUnit::LSU_AVAILABLE;
 }
 
-unsigned LSUnit::isReady(const InstRef &IR) const {
+const InstRef &LSUnit::isReady(const InstRef &IR) const {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
   const unsigned Index = IR.getSourceIndex();
   bool IsALoad = Desc.MayLoad;
   bool IsAStore = Desc.MayStore;
   assert((IsALoad || IsAStore) && "Not a memory operation!");
-  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
-  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
 
   if (IsALoad && !LoadBarriers.empty()) {
-    unsigned LoadBarrierIndex = *LoadBarriers.begin();
+    const InstRef &LoadBarrier = *LoadBarriers.begin();
     // A younger load cannot pass a older load barrier.
-    if (Index > LoadBarrierIndex)
-      return LoadBarrierIndex;
+    if (Index > LoadBarrier.getSourceIndex())
+      return LoadBarrier;
     // A load barrier cannot pass a older load.
-    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
-      return *LoadQueue.begin();
+    if (Index == LoadBarrier.getSourceIndex()) {
+      const InstRef &Load = *LoadQueue.begin();
+      if (Index != Load.getSourceIndex())
+        return Load;
+    }
   }
 
   if (IsAStore && !StoreBarriers.empty()) {
-    unsigned StoreBarrierIndex = *StoreBarriers.begin();
+    const InstRef &StoreBarrier = *StoreBarriers.begin();
     // A younger store cannot pass a older store barrier.
-    if (Index > StoreBarrierIndex)
-      return StoreBarrierIndex;
+    if (Index > StoreBarrier.getSourceIndex())
+      return StoreBarrier;
     // A store barrier cannot pass a older store.
-    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
-      return *StoreQueue.begin();
+    if (Index == StoreBarrier.getSourceIndex()) {
+      const InstRef &Store = *StoreQueue.begin();
+      if (Index != Store.getSourceIndex())
+        return Store;
+    }
   }
 
   // A load may not pass a previous store unless flag 'NoAlias' is set.
   // A load may pass a previous load.
-  if (NoAlias && IsALoad)
-    return Index;
+  if (assumeNoAlias() && IsALoad)
+    return IR;
 
   if (StoreQueue.size()) {
     // A load may not pass a previous store.
     // A store may not pass a previous store.
-    if (Index > *StoreQueue.begin())
-      return *StoreQueue.begin();
+    const InstRef &Store = *StoreQueue.begin();
+    if (Index > Store.getSourceIndex())
+      return Store;
   }
 
   // Okay, we are older than the oldest store in the queue.
-  // If there are no pending loads, then we can say for sure that this
-  // instruction is ready.
   if (isLQEmpty())
-    return Index;
+    return IR;
 
   // Check if there are no older loads.
-  if (Index <= *LoadQueue.begin())
-    return Index;
+  const InstRef &Load = *LoadQueue.begin();
+  if (Index <= Load.getSourceIndex())
+    return IR;
 
-  // There is at least one younger load.
-  //
   // A load may pass a previous load.
   if (IsALoad)
-    return Index;
+    return IR;
 
   // A store may not pass a previous load.
-  return *LoadQueue.begin();
+  return Load;
 }
 
 void LSUnit::onInstructionExecuted(const InstRef &IR) {
@@ -161,29 +162,35 @@ void LSUnit::onInstructionExecuted(const InstRef &IR) {
   bool IsAStore = Desc.MayStore;
 
   if (IsALoad) {
-    if (LoadQueue.erase(Index)) {
+    if (LoadQueue.erase(IR)) {
       LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
                         << " has been removed from the load queue.\n");
     }
-    if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
-      LLVM_DEBUG(
-          dbgs() << "[LSUnit]: Instruction idx=" << Index
-                 << " has been removed from the set of load barriers.\n");
-      LoadBarriers.erase(Index);
+    if (!LoadBarriers.empty()) {
+      const InstRef &LoadBarrier = *LoadBarriers.begin();
+      if (Index == LoadBarrier.getSourceIndex()) {
+        LLVM_DEBUG(
+            dbgs() << "[LSUnit]: Instruction idx=" << Index
+                   << " has been removed from the set of load barriers.\n");
+        LoadBarriers.erase(IR);
+      }
     }
   }
 
   if (IsAStore) {
-    if (StoreQueue.erase(Index)) {
+    if (StoreQueue.erase(IR)) {
       LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
                         << " has been removed from the store queue.\n");
     }
 
-    if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
-      LLVM_DEBUG(
-          dbgs() << "[LSUnit]: Instruction idx=" << Index
-                 << " has been removed from the set of store barriers.\n");
-      StoreBarriers.erase(Index);
+    if (!StoreBarriers.empty()) {
+      const InstRef &StoreBarrier = *StoreBarriers.begin();
+      if (Index == StoreBarrier.getSourceIndex()) {
+        LLVM_DEBUG(
+            dbgs() << "[LSUnit]: Instruction idx=" << Index
+                   << " has been removed from the set of store barriers.\n");
+        StoreBarriers.erase(IR);
+      }
     }
   }
 }
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index 9eeea9d011340..bf48d9288994c 100644
--- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -119,9 +119,9 @@ bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
     // Check if there are still unsolved memory dependencies.
     Instruction &IS = *IR.getInstruction();
     if (IS.isMemOp()) {
-      unsigned CriticalMemDep = LSU.isReady(IR);
-      if (CriticalMemDep != IR.getSourceIndex()) {
-        IS.setCriticalMemDep(CriticalMemDep);
+      const InstRef &CriticalMemDep = LSU.isReady(IR);
+      if (CriticalMemDep != IR) {
+        IS.setCriticalMemDep(CriticalMemDep.getSourceIndex());
         ++I;
         continue;
       }
@@ -158,7 +158,7 @@ bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) {
       break;
 
     // Check if this instruction is now ready. In case, force
-    // a transition in state using method 'update()'.
+    // a transition in state using method 'updateDispatched()'.
     Instruction &IS = *IR.getInstruction();
     if (IS.isDispatched() && !IS.updateDispatched()) {
       ++I;
@@ -242,12 +242,10 @@ void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
     if (Resources->checkAvailability(IS.getDesc()))
       continue;
 
-    if (IS.isReady() ||
-        (IS.isMemOp() && LSU.isReady(IR) != IR.getSourceIndex())) {
+    if (IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR))
       MemDeps.emplace_back(IR);
-    } else {
+    else
       RegDeps.emplace_back(IR);
-    }
   }
 }
 
@@ -304,8 +302,7 @@ bool Scheduler::dispatch(const InstRef &IR) {
 
   // Memory operations that are not in a ready state are initially assigned to
   // the WaitSet. 
-  if (!IS.isReady() ||
-      (IS.isMemOp() && LSU.isReady(IR) != IR.getSourceIndex())) {
+  if (!IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR)) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
     WaitSet.push_back(IR);
     return false;

From aa28b6d198a6dcf4f2c53676259b0f91e0babc35 Mon Sep 17 00:00:00 2001
From: Petar Jovanovic <petar.jovanovic@mips.com>
Date: Thu, 23 May 2019 13:49:06 +0000
Subject: [PATCH 0033/1176] [LiveDebugValues] Rename 'DMI' into 'DebugInstr'
 (NFC)

This will improve code readability.

Patch by Djordje Todorovic.

Differential Revision: https://reviews.llvm.org/D62295

llvm-svn: 361497
---
 llvm/lib/CodeGen/LiveDebugValues.cpp | 70 +++++++++++++++-------------
 1 file changed, 37 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index c062542e9c8b5..c56ba124a8de3 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -425,17 +425,17 @@ void LiveDebugValues::insertTransferDebugPair(
     MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
     VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
     unsigned NewReg) {
-  const MachineInstr *DMI = &VarLocIDs[OldVarID].MI;
+  const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
   MachineFunction *MF = MI.getParent()->getParent();
-  MachineInstr *NewDMI;
+  MachineInstr *NewDebugInstr;
 
   auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers,
-                        &VarLocIDs](VarLoc &VL, MachineInstr *NewDMI) {
+                        &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
     unsigned LocId = VarLocIDs.insert(VL);
     OpenRanges.insert(LocId, VL.Var);
-    // The newly created DBG_VALUE instruction NewDMI must be inserted after
-    // MI. Keep track of the pairing.
-    TransferDebugPair MIP = {&MI, NewDMI};
+    // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
+    // after MI. Keep track of the pairing.
+    TransferDebugPair MIP = {&MI, NewDebugInstr};
     Transfers.push_back(MIP);
   };
 
@@ -447,31 +447,33 @@ void LiveDebugValues::insertTransferDebugPair(
            "No register supplied when handling a copy of a debug value");
     // Create a DBG_VALUE instruction to describe the Var in its new
     // register location.
-    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(),
-                     DMI->isIndirectDebugValue(), NewReg,
-                     DMI->getDebugVariable(), DMI->getDebugExpression());
-    if (DMI->isIndirectDebugValue())
-      NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm());
-    VarLoc VL(*NewDMI, LS);
-    ProcessVarLoc(VL, NewDMI);
+    NewDebugInstr = BuildMI(
+        *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
+        DebugInstr->isIndirectDebugValue(), NewReg,
+        DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
+    if (DebugInstr->isIndirectDebugValue())
+      NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+    VarLoc VL(*NewDebugInstr, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
-               NewDMI->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), false, false, false, TII));
     return;
   }
   case TransferKind::TransferSpill: {
     // Create a DBG_VALUE instruction to describe the Var in its spilled
     // location.
     VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
-    auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(),
+    auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
                                             DIExpression::ApplyOffset,
                                             SpillLocation.SpillOffset);
-    NewDMI =
-        BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true,
-                SpillLocation.SpillBase, DMI->getDebugVariable(), SpillExpr);
-    VarLoc VL(*NewDMI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
-    ProcessVarLoc(VL, NewDMI);
+    NewDebugInstr = BuildMI(
+        *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
+        SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
+    VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
+              SpillLocation.SpillOffset, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
-               NewDMI->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), false, false, false, TII));
     return;
   }
   case TransferKind::TransferRestore: {
@@ -479,12 +481,13 @@ void LiveDebugValues::insertTransferDebugPair(
            "No register supplied when handling a restore of a debug value");
     MachineFunction *MF = MI.getMF();
     DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
-    NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), false, NewReg,
-                     DMI->getDebugVariable(), DIB.createExpression());
-    VarLoc VL(*NewDMI, LS);
-    ProcessVarLoc(VL, NewDMI);
+    NewDebugInstr =
+        BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
+                NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
+    VarLoc VL(*NewDebugInstr, LS);
+    ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
-               NewDMI->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), false, false, false, TII));
     return;
   }
   }
@@ -805,13 +808,14 @@ bool LiveDebugValues::join(
     // new range is started for the var from the mbb's beginning by inserting
     // a new DBG_VALUE. process() will end this range however appropriate.
     const VarLoc &DiffIt = VarLocIDs[ID];
-    const MachineInstr *DMI = &DiffIt.MI;
-    MachineInstr *MI =
-        BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
-                DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
-                DMI->getDebugVariable(), DMI->getDebugExpression());
-    if (DMI->isIndirectDebugValue())
-      MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+    const MachineInstr *DebugInstr = &DiffIt.MI;
+    MachineInstr *MI = BuildMI(
+        MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+        DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
+        DebugInstr->getOperand(0).getReg(), DebugInstr->getDebugVariable(),
+        DebugInstr->getDebugExpression());
+    if (DebugInstr->isIndirectDebugValue())
+      MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
     LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
     ILS.set(ID);
     ++NumInserted;

From 465868d63273e317ec1bab0624c8c84038701086 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Thu, 23 May 2019 13:59:44 +0000
Subject: [PATCH 0034/1176] gn build: Merge r361487

llvm-svn: 361498
---
 .../gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn
index 1dcccc71d9a67..2efa42a124eec 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/google/BUILD.gn
@@ -13,6 +13,7 @@ static_library("google") {
   ]
   sources = [
     "AvoidCStyleCastsCheck.cpp",
+    "AvoidNSObjectNewCheck.cpp",
     "AvoidThrowingObjCExceptionCheck.cpp",
     "AvoidUnderscoreInGoogletestNameCheck.cpp",
     "DefaultArgumentsCheck.cpp",

From 74927554e22b3164e536a3e1a0ea4b70314b18e2 Mon Sep 17 00:00:00 2001
From: Lewis Revill <lewis.revill@embecosm.com>
Date: Thu, 23 May 2019 14:46:27 +0000
Subject: [PATCH 0035/1176] [RISCV] Support assembling TLS LA pseudo
 instructions

This patch adds the pseudo instructions la.tls.ie and la.tls.gd, used in
the initial-exec and global-dynamic TLS models respectively when
addressing a global. The pseudo instructions are expanded in the
assembly parser.

llvm-svn: 361499
---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp |  43 ++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |  10 ++
 llvm/test/MC/RISCV/rvi-pseudos.s              | 101 ++++++++++++++----
 3 files changed, 133 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index a1d0854e20263..bac43187ca770 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -93,6 +93,14 @@ class RISCVAsmParser : public MCTargetAsmParser {
   // Helper to emit pseudo instruction "la" used in GOT/PC-rel addressing.
   void emitLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
 
+  // Helper to emit pseudo instruction "la.tls.ie" used in initial-exec TLS
+  // addressing.
+  void emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+  // Helper to emit pseudo instruction "la.tls.gd" used in global-dynamic TLS
+  // addressing.
+  void emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
   // Helper to emit pseudo load/store instruction with a symbol.
   void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
                            MCStreamer &Out, bool HasTmpReg);
@@ -1590,6 +1598,35 @@ void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
   emitAuipcInstPair(DestReg, DestReg, Symbol, VKHi, SecondOpcode, IDLoc, Out);
 }
 
+void RISCVAsmParser::emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The load TLS IE address pseudo-instruction "la.tls.ie" is used in
+  // initial-exec TLS model addressing of global symbols:
+  //   la.tls.ie rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %tls_ie_pcrel_hi(symbol)
+  //             Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  unsigned SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+  emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GOT_HI,
+                    SecondOpcode, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out) {
+  // The load TLS GD address pseudo-instruction "la.tls.gd" is used in
+  // global-dynamic TLS model addressing of global symbols:
+  //   la.tls.gd rdest, symbol
+  // expands to
+  //   TmpLabel: AUIPC rdest, %tls_gd_pcrel_hi(symbol)
+  //             ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+  MCOperand DestReg = Inst.getOperand(0);
+  const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+  emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GD_HI,
+                    RISCV::ADDI, IDLoc, Out);
+}
+
 void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
                                          SMLoc IDLoc, MCStreamer &Out,
                                          bool HasTmpReg) {
@@ -1657,6 +1694,12 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
   case RISCV::PseudoLA:
     emitLoadAddress(Inst, IDLoc, Out);
     return false;
+  case RISCV::PseudoLA_TLS_IE:
+    emitLoadTLSIEAddress(Inst, IDLoc, Out);
+    return false;
+  case RISCV::PseudoLA_TLS_GD:
+    emitLoadTLSGDAddress(Inst, IDLoc, Out);
+    return false;
   case RISCV::PseudoLB:
     emitLoadStoreSymbol(Inst, RISCV::LB, IDLoc, Out, /*HasTmpReg=*/false);
     return false;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 99002386281a6..d7bcc21f3b49d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -926,6 +926,16 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
 def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
                       "la", "$dst, $src">;
 
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                             "la.tls.ie", "$dst, $src">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+    isAsmParserOnly = 1 in
+def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+                             "la.tls.gd", "$dst, $src">;
+
 /// Loads
 
 multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
diff --git a/llvm/test/MC/RISCV/rvi-pseudos.s b/llvm/test/MC/RISCV/rvi-pseudos.s
index 41e6e9c31850b..7ecdb8fb4ce10 100644
--- a/llvm/test/MC/RISCV/rvi-pseudos.s
+++ b/llvm/test/MC/RISCV/rvi-pseudos.s
@@ -1,9 +1,11 @@
-# RUN: llvm-mc %s -triple=riscv32 | FileCheck %s --check-prefixes=CHECK,CHECK-NOPIC
-# RUN: llvm-mc %s -triple=riscv64 | FileCheck %s --check-prefixes=CHECK,CHECK-NOPIC
+# RUN: llvm-mc %s -triple=riscv32 \
+# RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOPIC,CHECK-RV32
+# RUN: llvm-mc %s -triple=riscv64 \
+# RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-NOPIC,CHECK-RV64
 # RUN: llvm-mc %s -triple=riscv32 -position-independent \
-# RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-PIC,CHECK-PIC-RV32
+# RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-PIC,CHECK-RV32,CHECK-PIC-RV32
 # RUN: llvm-mc %s -triple=riscv64 -position-independent \
-# RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-PIC,CHECK-PIC-RV64
+# RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-PIC,CHECK-RV64,CHECK-PIC-RV64
 
 # CHECK: .Lpcrel_hi0:
 # CHECK: auipc a0, %pcrel_hi(a_symbol)
@@ -73,47 +75,104 @@ la a3, ra
 la a4, f1
 
 # CHECK: .Lpcrel_hi10:
+# CHECK: auipc a0, %tls_ie_pcrel_hi(a_symbol)
+# CHECK-RV32: lw    a0, %pcrel_lo(.Lpcrel_hi10)(a0)
+# CHECK-RV64: ld    a0, %pcrel_lo(.Lpcrel_hi10)(a0)
+la.tls.ie a0, a_symbol
+
+# CHECK: .Lpcrel_hi11:
+# CHECK: auipc a1, %tls_ie_pcrel_hi(another_symbol)
+# CHECK-RV32: lw    a1, %pcrel_lo(.Lpcrel_hi11)(a1)
+# CHECK-RV64: ld    a1, %pcrel_lo(.Lpcrel_hi11)(a1)
+la.tls.ie a1, another_symbol
+
+# Check that we can load the address of symbols that are spelled like a register
+# CHECK: .Lpcrel_hi12:
+# CHECK: auipc a2, %tls_ie_pcrel_hi(zero)
+# CHECK-RV32: lw    a2, %pcrel_lo(.Lpcrel_hi12)(a2)
+# CHECK-RV64: ld    a2, %pcrel_lo(.Lpcrel_hi12)(a2)
+la.tls.ie a2, zero
+
+# CHECK: .Lpcrel_hi13:
+# CHECK: auipc a3, %tls_ie_pcrel_hi(ra)
+# CHECK-RV32: lw    a3, %pcrel_lo(.Lpcrel_hi13)(a3)
+# CHECK-RV64: ld    a3, %pcrel_lo(.Lpcrel_hi13)(a3)
+la.tls.ie a3, ra
+
+# CHECK: .Lpcrel_hi14:
+# CHECK: auipc a4, %tls_ie_pcrel_hi(f1)
+# CHECK-RV32: lw    a4, %pcrel_lo(.Lpcrel_hi14)(a4)
+# CHECK-RV64: ld    a4, %pcrel_lo(.Lpcrel_hi14)(a4)
+la.tls.ie a4, f1
+
+# CHECK: .Lpcrel_hi15:
+# CHECK: auipc a0, %tls_gd_pcrel_hi(a_symbol)
+# CHECK: addi  a0, a0, %pcrel_lo(.Lpcrel_hi15)
+la.tls.gd a0, a_symbol
+
+# CHECK: .Lpcrel_hi16:
+# CHECK: auipc a1, %tls_gd_pcrel_hi(another_symbol)
+# CHECK: addi  a1, a1, %pcrel_lo(.Lpcrel_hi16)
+la.tls.gd a1, another_symbol
+
+# Check that we can load the address of symbols that are spelled like a register
+# CHECK: .Lpcrel_hi17:
+# CHECK: auipc a2, %tls_gd_pcrel_hi(zero)
+# CHECK: addi  a2, a2, %pcrel_lo(.Lpcrel_hi17)
+la.tls.gd a2, zero
+
+# CHECK: .Lpcrel_hi18:
+# CHECK: auipc a3, %tls_gd_pcrel_hi(ra)
+# CHECK: addi  a3, a3, %pcrel_lo(.Lpcrel_hi18)
+la.tls.gd a3, ra
+
+# CHECK: .Lpcrel_hi19:
+# CHECK: auipc a4, %tls_gd_pcrel_hi(f1)
+# CHECK: addi  a4, a4, %pcrel_lo(.Lpcrel_hi19)
+la.tls.gd a4, f1
+
+# CHECK: .Lpcrel_hi20:
 # CHECK: auipc a0, %pcrel_hi(a_symbol)
-# CHECK: lb  a0, %pcrel_lo(.Lpcrel_hi10)(a0)
+# CHECK: lb  a0, %pcrel_lo(.Lpcrel_hi20)(a0)
 lb a0, a_symbol
 
-# CHECK: .Lpcrel_hi11:
+# CHECK: .Lpcrel_hi21:
 # CHECK: auipc a1, %pcrel_hi(a_symbol)
-# CHECK: lh  a1, %pcrel_lo(.Lpcrel_hi11)(a1)
+# CHECK: lh  a1, %pcrel_lo(.Lpcrel_hi21)(a1)
 lh a1, a_symbol
 
-# CHECK: .Lpcrel_hi12:
+# CHECK: .Lpcrel_hi22:
 # CHECK: auipc a2, %pcrel_hi(a_symbol)
-# CHECK: lhu  a2, %pcrel_lo(.Lpcrel_hi12)(a2)
+# CHECK: lhu  a2, %pcrel_lo(.Lpcrel_hi22)(a2)
 lhu a2, a_symbol
 
-# CHECK: .Lpcrel_hi13:
+# CHECK: .Lpcrel_hi23:
 # CHECK: auipc a3, %pcrel_hi(a_symbol)
-# CHECK: lw  a3, %pcrel_lo(.Lpcrel_hi13)(a3)
+# CHECK: lw  a3, %pcrel_lo(.Lpcrel_hi23)(a3)
 lw a3, a_symbol
 
-# CHECK: .Lpcrel_hi14:
+# CHECK: .Lpcrel_hi24:
 # CHECK: auipc a4, %pcrel_hi(a_symbol)
-# CHECK: sb  a3, %pcrel_lo(.Lpcrel_hi14)(a4)
+# CHECK: sb  a3, %pcrel_lo(.Lpcrel_hi24)(a4)
 sb a3, a_symbol, a4
 
-# CHECK: .Lpcrel_hi15:
+# CHECK: .Lpcrel_hi25:
 # CHECK: auipc a4, %pcrel_hi(a_symbol)
-# CHECK: sh  a3, %pcrel_lo(.Lpcrel_hi15)(a4)
+# CHECK: sh  a3, %pcrel_lo(.Lpcrel_hi25)(a4)
 sh a3, a_symbol, a4
 
-# CHECK: .Lpcrel_hi16:
+# CHECK: .Lpcrel_hi26:
 # CHECK: auipc a4, %pcrel_hi(a_symbol)
-# CHECK: sw  a3, %pcrel_lo(.Lpcrel_hi16)(a4)
+# CHECK: sw  a3, %pcrel_lo(.Lpcrel_hi26)(a4)
 sw a3, a_symbol, a4
 
 # Check that we can load the address of symbols that are spelled like a register
-# CHECK: .Lpcrel_hi17:
+# CHECK: .Lpcrel_hi27:
 # CHECK: auipc a2, %pcrel_hi(zero)
-# CHECK: lw  a2, %pcrel_lo(.Lpcrel_hi17)(a2)
+# CHECK: lw  a2, %pcrel_lo(.Lpcrel_hi27)(a2)
 lw a2, zero
 
-# CHECK: .Lpcrel_hi18:
+# CHECK: .Lpcrel_hi28:
 # CHECK: auipc a4, %pcrel_hi(zero)
-# CHECK: sw  a3, %pcrel_lo(.Lpcrel_hi18)(a4)
+# CHECK: sw  a3, %pcrel_lo(.Lpcrel_hi28)(a4)
 sw a3, zero, a4

From 1312225f8c46efe24a1f80b3935a72d172c2f61f Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Thu, 23 May 2019 14:53:42 +0000
Subject: [PATCH 0036/1176] [NFC][InstCombine] Add unary FNeg tests to
 maximum.ll/minimum.ll

llvm-svn: 361500
---
 llvm/test/Transforms/InstCombine/maximum.ll | 71 +++++++++++++++++++++
 llvm/test/Transforms/InstCombine/minimum.ll | 71 +++++++++++++++++++++
 2 files changed, 142 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/maximum.ll b/llvm/test/Transforms/InstCombine/maximum.ll
index bd97a3794d45b..d02cb99e27812 100644
--- a/llvm/test/Transforms/InstCombine/maximum.ll
+++ b/llvm/test/Transforms/InstCombine/maximum.ll
@@ -226,6 +226,18 @@ define <2 x float> @neg_neg(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %r
 }
 
+define <2 x float> @unary_neg_neg(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.minimum.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fneg <2 x float> %x
+  %negy = fneg <2 x float> %y
+  %r = call <2 x float> @llvm.maximum.v2f32(<2 x float> %negx, <2 x float> %negy)
+  ret <2 x float> %r
+}
+
 ; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
 
 define float @neg_neg_vec_fmf(float %x, float %y) {
@@ -240,6 +252,18 @@ define float @neg_neg_vec_fmf(float %x, float %y) {
   ret float %r
 }
 
+define float @unary_neg_neg_vec_fmf(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.minimum.f32(float [[X:%.*]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fneg arcp float %x
+  %negy = fneg afn float %y
+  %r = call fast float @llvm.maximum.f32(float %negx, float %negy)
+  ret float %r
+}
+
 ; 1 extra use of an intermediate value should still allow the fold,
 ; but 2 would require more instructions than we started with.
 
@@ -259,6 +283,21 @@ define float @neg_neg_extra_use_x(float %x, float %y) {
   ret float %r
 }
 
+define float @unary_neg_neg_extra_use_x(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minimum.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fneg float %x
+  %negy = fneg float %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  ret float %r
+}
+
 define float @neg_neg_extra_use_y(float %x, float %y) {
 ; CHECK-LABEL: @neg_neg_extra_use_y(
 ; CHECK-NEXT:    [[NEGY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
@@ -274,6 +313,21 @@ define float @neg_neg_extra_use_y(float %x, float %y) {
   ret float %r
 }
 
+define float @unary_neg_neg_extra_use_y(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fneg float [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.minimum.f32(float [[X:%.*]], float [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fneg float %x
+  %negy = fneg float %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negy)
+  ret float %r
+}
+
 define float @neg_neg_extra_use_x_and_y(float %x, float %y) {
 ; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
 ; CHECK-NEXT:    [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
@@ -290,3 +344,20 @@ define float @neg_neg_extra_use_x_and_y(float %x, float %y) {
   call void @use(float %negy)
   ret float %r
 }
+
+define float @unary_neg_neg_extra_use_x_and_y(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fneg float [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call float @llvm.maximum.f32(float [[NEGX]], float [[NEGY]])
+; CHECK-NEXT:    call void @use(float [[NEGX]])
+; CHECK-NEXT:    call void @use(float [[NEGY]])
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negx = fneg float %x
+  %negy = fneg float %y
+  %r = call float @llvm.maximum.f32(float %negx, float %negy)
+  call void @use(float %negx)
+  call void @use(float %negy)
+  ret float %r
+}
diff --git a/llvm/test/Transforms/InstCombine/minimum.ll b/llvm/test/Transforms/InstCombine/minimum.ll
index 32aae6417eba4..9ef9cb75137f0 100644
--- a/llvm/test/Transforms/InstCombine/minimum.ll
+++ b/llvm/test/Transforms/InstCombine/minimum.ll
@@ -250,6 +250,18 @@ define double @neg_neg(double %x, double %y) {
   ret double %r
 }
 
+define double @unary_neg_neg(double %x, double %y) {
+; CHECK-LABEL: @unary_neg_neg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X:%.*]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fneg double %x
+  %negy = fneg double %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  ret double %r
+}
+
 ; FMF is not required, but it should be propagated from the intrinsic (not the fnegs).
 ; Also, make sure this works with vectors.
 
@@ -265,6 +277,18 @@ define <2 x double> @neg_neg_vec_fmf(<2 x double> %x, <2 x double> %y) {
   ret <2 x double> %r
 }
 
+define <2 x double> @unary_neg_neg_vec_fmf(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @unary_neg_neg_vec_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x double> [[R]]
+;
+  %negx = fneg reassoc <2 x double> %x
+  %negy = fneg fast <2 x double> %y
+  %r = call nnan ninf <2 x double> @llvm.minimum.v2f64(<2 x double> %negx, <2 x double> %negy)
+  ret <2 x double> %r
+}
+
 ; 1 extra use of an intermediate value should still allow the fold,
 ; but 2 would require more instructions than we started with.
 
@@ -284,6 +308,21 @@ define double @neg_neg_extra_use_x(double %x, double %y) {
   ret double %r
 }
 
+define double @unary_neg_neg_extra_use_x(double %x, double %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_x(
+; CHECK-NEXT:    [[NEGX:%.*]] = fneg double [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X]], double [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fneg double %x
+  %negy = fneg double %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  ret double %r
+}
+
 define double @neg_neg_extra_use_y(double %x, double %y) {
 ; CHECK-LABEL: @neg_neg_extra_use_y(
 ; CHECK-NEXT:    [[NEGY:%.*]] = fsub double -0.000000e+00, [[Y:%.*]]
@@ -299,6 +338,21 @@ define double @neg_neg_extra_use_y(double %x, double %y) {
   ret double %r
 }
 
+define double @unary_neg_neg_extra_use_y(double %x, double %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_y(
+; CHECK-NEXT:    [[NEGY:%.*]] = fneg double [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.maximum.f64(double [[X:%.*]], double [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fneg double %x
+  %negy = fneg double %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negy)
+  ret double %r
+}
+
 define double @neg_neg_extra_use_x_and_y(double %x, double %y) {
 ; CHECK-LABEL: @neg_neg_extra_use_x_and_y(
 ; CHECK-NEXT:    [[NEGX:%.*]] = fsub double -0.000000e+00, [[X:%.*]]
@@ -315,3 +369,20 @@ define double @neg_neg_extra_use_x_and_y(double %x, double %y) {
   call void @use(double %negy)
   ret double %r
 }
+
+define double @unary_neg_neg_extra_use_x_and_y(double %x, double %y) {
+; CHECK-LABEL: @unary_neg_neg_extra_use_x_and_y(
+; CHECK-NEXT:    [[NEGX:%.*]] = fneg double [[X:%.*]]
+; CHECK-NEXT:    [[NEGY:%.*]] = fneg double [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = call double @llvm.minimum.f64(double [[NEGX]], double [[NEGY]])
+; CHECK-NEXT:    call void @use(double [[NEGX]])
+; CHECK-NEXT:    call void @use(double [[NEGY]])
+; CHECK-NEXT:    ret double [[R]]
+;
+  %negx = fneg double %x
+  %negy = fneg double %y
+  %r = call double @llvm.minimum.f64(double %negx, double %negy)
+  call void @use(double %negx)
+  call void @use(double %negy)
+  ret double %r
+}

From 0857a4ec20db4b038fb5346ed09e3253842f0169 Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Thu, 23 May 2019 15:03:22 +0000
Subject: [PATCH 0037/1176] [cmake] When getting Ninja version, don't include
 CMakeNinjaFindMake which doesn't play well with passing CMAKE_MAKE_PROGRAM
 from the commandline without a path.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes a bug introduced in r361280.

Thanks to Mikael Holmén for reporting this!

llvm-svn: 361501
---
 llvm/cmake/config-ix.cmake | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 36ec98a938623..e1e5cd3198b22 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -555,14 +555,11 @@ set(LLVM_BINUTILS_INCDIR "" CACHE PATH
 	"PATH to binutils/include containing plugin-api.h for gold plugin.")
 
 if(CMAKE_GENERATOR STREQUAL "Ninja")
-  include(CMakeNinjaFindMake OPTIONAL)
-  if(CMAKE_MAKE_PROGRAM)
-    execute_process(COMMAND ${CMAKE_MAKE_PROGRAM} --version
-      OUTPUT_VARIABLE NINJA_VERSION
-      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    set(NINJA_VERSION ${NINJA_VERSION} CACHE STRING "Ninja version number" FORCE)
-    message(STATUS "Ninja version: ${NINJA_VERSION}")
-  endif()
+  execute_process(COMMAND ${CMAKE_MAKE_PROGRAM} --version
+    OUTPUT_VARIABLE NINJA_VERSION
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  set(NINJA_VERSION ${NINJA_VERSION} CACHE STRING "Ninja version number" FORCE)
+  message(STATUS "Ninja version: ${NINJA_VERSION}")
 endif()
 
 if(CMAKE_GENERATOR STREQUAL "Ninja" AND

From c63b37dd408eb49ed056e97e50a275026b2545b9 Mon Sep 17 00:00:00 2001
From: Paul Robinson <paul.robinson@sony.com>
Date: Thu, 23 May 2019 15:07:46 +0000
Subject: [PATCH 0038/1176] Work around a Visual C++ bug.

Using a static function as a template parameter gets a bogus compile-time
error with Visual Studio 2017, prior to version 15.8. Our current
minimum-version requirement is a particular update to VS2015, and we
assume all Visual Studio 2017 versions are usable. This patch makes the
code buildable with older versions of VS2017, and can be reverted after
we upgrade the minimum version sometime in the future.

Description of the Microsoft bug:
https://developercommunity.visualstudio.com/content/problem/25334/error-code-c2971-when-specifying-a-function-as-the.html

Differential Revision: https://reviews.llvm.org/D62202

llvm-svn: 361502
---
 .../lib/Tooling/Refactoring/RangeSelector.cpp | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Tooling/Refactoring/RangeSelector.cpp b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
index 23479c58fdf99..92426db3a5ebe 100644
--- a/clang/lib/Tooling/Refactoring/RangeSelector.cpp
+++ b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
@@ -218,37 +218,47 @@ class RelativeSelector {
 };
 } // namespace
 
+// FIXME: Change the following functions from being in an anonymous namespace
+// to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
+// (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
+// namespace works around a bug in earlier versions.
+namespace {
 // Returns the range of the statements (all source between the braces).
-static CharSourceRange getStatementsRange(const MatchResult &,
-                                          const CompoundStmt &CS) {
+CharSourceRange getStatementsRange(const MatchResult &,
+                                   const CompoundStmt &CS) {
   return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
                                        CS.getRBracLoc());
 }
+} // namespace
 
 RangeSelector tooling::statements(StringRef ID) {
   return RelativeSelector<CompoundStmt, getStatementsRange>(ID);
 }
 
+namespace {
 // Returns the range of the source between the call's parentheses.
-static CharSourceRange getCallArgumentsRange(const MatchResult &Result,
-                                             const CallExpr &CE) {
+CharSourceRange getCallArgumentsRange(const MatchResult &Result,
+                                      const CallExpr &CE) {
   return CharSourceRange::getCharRange(
       findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
           .getLocWithOffset(1),
       CE.getRParenLoc());
 }
+} // namespace
 
 RangeSelector tooling::callArgs(StringRef ID) {
   return RelativeSelector<CallExpr, getCallArgumentsRange>(ID);
 }
 
+namespace {
 // Returns the range of the elements of the initializer list. Includes all
 // source between the braces.
-static CharSourceRange getElementsRange(const MatchResult &,
-                                        const InitListExpr &E) {
+CharSourceRange getElementsRange(const MatchResult &,
+                                 const InitListExpr &E) {
   return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
                                        E.getRBraceLoc());
 }
+} // namespace
 
 RangeSelector tooling::initListElements(StringRef ID) {
   return RelativeSelector<InitListExpr, getElementsRange>(ID);

From 85200645c6e86fb684b6e10eb8193f460e31c18d Mon Sep 17 00:00:00 2001
From: Konrad Kleine <kkleine@redhat.com>
Date: Thu, 23 May 2019 15:17:39 +0000
Subject: [PATCH 0039/1176] [lldb] fix cannot convert from 'nullptr' to
 'lldb::thread_result_t'

Summary:
On Windows `lldb::thread_result_t` resolves to `typedef unsigned thread_result_t;` and on other platforms it resolves to `typedef void *thread_result_t;`.
 Therefore one cannot use `nullptr` when returning from a function that returns `thread_result_t`.

I've made this change because a windows build bot fails with these errors:

```
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Communication.cpp(362): error C2440: 'return': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Communication.cpp(362): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
```

and

```
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Debugger.cpp(1619): error C2440: 'return': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Debugger.cpp(1619): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Debugger.cpp(1664): error C2440: 'return': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Core\Debugger.cpp(1664): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
```

This is the failing build: http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/5035/steps/build/logs/stdio

Reviewers: JDevlieghere, teemperor, jankratochvil, labath, clayborg, RKSimon, courbet, jhenderson

Reviewed By: labath, clayborg

Subscribers: labath, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62305

llvm-svn: 361503
---
 lldb/source/Core/Communication.cpp                          | 2 +-
 lldb/source/Core/Debugger.cpp                               | 4 ++--
 lldb/source/Host/common/TaskPool.cpp                        | 2 +-
 lldb/source/Host/windows/HostProcessWindows.cpp             | 2 +-
 .../Plugins/Process/Windows/Common/DebuggerThread.cpp       | 6 +++---
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Core/Communication.cpp b/lldb/source/Core/Communication.cpp
index f2919d807d596..c39976d6556b1 100644
--- a/lldb/source/Core/Communication.cpp
+++ b/lldb/source/Core/Communication.cpp
@@ -359,7 +359,7 @@ lldb::thread_result_t Communication::ReadThread(lldb::thread_arg_t p) {
   // Let clients know that this thread is exiting
   comm->BroadcastEvent(eBroadcastBitNoMorePendingInput);
   comm->BroadcastEvent(eBroadcastBitReadThreadDidExit);
-  return nullptr;
+  return {};
 }
 
 void Communication::SetReadThreadBytesReceivedCallback(
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 79402813e845f..0afdf0690214d 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -1616,7 +1616,7 @@ void Debugger::DefaultEventHandler() {
 
 lldb::thread_result_t Debugger::EventHandlerThread(lldb::thread_arg_t arg) {
   ((Debugger *)arg)->DefaultEventHandler();
-  return nullptr;
+  return {};
 }
 
 bool Debugger::StartEventHandlerThread() {
@@ -1661,7 +1661,7 @@ lldb::thread_result_t Debugger::IOHandlerThread(lldb::thread_arg_t arg) {
   Debugger *debugger = (Debugger *)arg;
   debugger->ExecuteIOHandlers();
   debugger->StopEventHandlerThread();
-  return nullptr;
+  return {};
 }
 
 bool Debugger::HasIOHandlerThread() { return m_io_handler_thread.IsJoinable(); }
diff --git a/lldb/source/Host/common/TaskPool.cpp b/lldb/source/Host/common/TaskPool.cpp
index d63d9f35d1e21..dcc4363078da0 100644
--- a/lldb/source/Host/common/TaskPool.cpp
+++ b/lldb/source/Host/common/TaskPool.cpp
@@ -73,7 +73,7 @@ void TaskPoolImpl::AddTask(std::function<void()> &&task_fn) {
 
 lldb::thread_result_t TaskPoolImpl::WorkerPtr(void *pool) {
   Worker((TaskPoolImpl *)pool);
-  return nullptr;
+  return {};
 }
 
 void TaskPoolImpl::Worker(TaskPoolImpl *pool) {
diff --git a/lldb/source/Host/windows/HostProcessWindows.cpp b/lldb/source/Host/windows/HostProcessWindows.cpp
index 6b016bf53dab0..573218ed8f623 100644
--- a/lldb/source/Host/windows/HostProcessWindows.cpp
+++ b/lldb/source/Host/windows/HostProcessWindows.cpp
@@ -109,7 +109,7 @@ lldb::thread_result_t HostProcessWindows::MonitorThread(void *thread_arg) {
     ::CloseHandle(info->process_handle);
     delete (info);
   }
-  return 0;
+  return {};
 }
 
 void HostProcessWindows::Close() {
diff --git a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
index ad7feaa2d3703..104ac229f2f41 100644
--- a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
@@ -132,7 +132,7 @@ lldb::thread_result_t DebuggerThread::DebuggerThreadLaunchRoutine(
   else
     m_debug_delegate->OnDebuggerError(error, 0);
 
-  return 0;
+  return {};
 }
 
 lldb::thread_result_t DebuggerThread::DebuggerThreadAttachRoutine(
@@ -148,7 +148,7 @@ lldb::thread_result_t DebuggerThread::DebuggerThreadAttachRoutine(
   if (!DebugActiveProcess((DWORD)pid)) {
     Status error(::GetLastError(), eErrorTypeWin32);
     m_debug_delegate->OnDebuggerError(error, 0);
-    return 0;
+    return {};
   }
 
   // The attach was successful, enter the debug loop.  From here on out, this
@@ -156,7 +156,7 @@ lldb::thread_result_t DebuggerThread::DebuggerThreadAttachRoutine(
   // in DebugLaunch should apply from this point out.
   DebugLoop();
 
-  return 0;
+  return {};
 }
 
 Status DebuggerThread::StopDebugging(bool terminate) {

From 762d008a7c35c84468f713b93748190a52f0dfe5 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 23 May 2019 15:23:16 +0000
Subject: [PATCH 0040/1176] [Driver] Try normalized triple when looking for C++
 libraries

This addresses the issue introduced in r361432 where we would only
try effective triple but not the normalized one as we do for other
runtimes.

Differential Revision: https://reviews.llvm.org/D62286

llvm-svn: 361504
---
 clang/lib/Driver/ToolChain.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 3b0e8f2dbaa85..08d1ebb75d7bc 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -80,6 +80,11 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
     llvm::sys::path::append(P, "..", "lib", D.getTargetTriple(), "c++");
     if (getVFS().exists(P))
       getLibraryPaths().push_back(P.str());
+
+    P.assign(D.Dir);
+    llvm::sys::path::append(P, "..", "lib", Triple.str(), "c++");
+    if (getVFS().exists(P))
+      getLibraryPaths().push_back(P.str());
   }
 
   P.assign(D.ResourceDir);

From 8cffa848509f3a16a863a31ce51e226fcf79875e Mon Sep 17 00:00:00 2001
From: Kristof Umann <kristof.umann@ericsson.com>
Date: Thu, 23 May 2019 15:49:04 +0000
Subject: [PATCH 0041/1176] [analyzer][NFC] Prettify some RUN: lines in test
 files.

This is a test commit in disguise.

llvm-svn: 361505
---
 clang/test/Analysis/bsd-string.c           |  6 ++-
 clang/test/Analysis/bstring.c              | 31 ++++++++++++--
 clang/test/Analysis/cstring-plist.c        |  4 +-
 clang/test/Analysis/null-deref-ps-region.c |  6 ++-
 clang/test/Analysis/string.c               | 48 +++++++++++++++++++---
 5 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/clang/test/Analysis/bsd-string.c b/clang/test/Analysis/bsd-string.c
index 4fbfd48ad8aef..6e04a62ecfec5 100644
--- a/clang/test/Analysis/bsd-string.c
+++ b/clang/test/Analysis/bsd-string.c
@@ -1,4 +1,8 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring.NullArg,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring.NullArg \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection
 
 #define NULL ((void *)0)
 
diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c
index f472a3e0111ae..beabb0f0241e4 100644
--- a/clang/test/Analysis/bstring.c
+++ b/clang/test/Analysis/bstring.c
@@ -1,7 +1,30 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -DVARIANT \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS -DVARIANT \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
 
 //===----------------------------------------------------------------------===
 // Declarations
diff --git a/clang/test/Analysis/cstring-plist.c b/clang/test/Analysis/cstring-plist.c
index c527564d49e37..65fa9fe74d1d3 100644
--- a/clang/test/Analysis/cstring-plist.c
+++ b/clang/test/Analysis/cstring-plist.c
@@ -1,6 +1,8 @@
 // RUN: rm -f %t
 // RUN: %clang_analyze_cc1 -fblocks \
-// RUN:   -analyzer-checker=core,unix.Malloc,unix.cstring.NullArg \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=unix.cstring.NullArg \
 // RUN:   -analyzer-disable-checker=alpha.unix.cstring.OutOfBounds \
 // RUN:   -analyzer-output=plist -o %t %s
 // RUN: FileCheck -input-file %t %s
diff --git a/clang/test/Analysis/null-deref-ps-region.c b/clang/test/Analysis/null-deref-ps-region.c
index 2bc338cd3f2a7..71b7a1ddb0502 100644
--- a/clang/test/Analysis/null-deref-ps-region.c
+++ b/clang/test/Analysis/null-deref-ps-region.c
@@ -1,4 +1,8 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.core,unix,alpha.unix -std=gnu99 -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -verify %s -std=gnu99 \
+// RUN:  -analyzer-checker=core \
+// RUN:  -analyzer-checker=alpha.core \
+// RUN:  -analyzer-checker=unix \
+// RUN:  -analyzer-checker=alpha.unix
 
 #include "Inputs/system-header-simulator.h"
 
diff --git a/clang/test/Analysis/string.c b/clang/test/Analysis/string.c
index 107c199c68dc2..d3b131ec9a64f 100644
--- a/clang/test/Analysis/string.c
+++ b/clang/test/Analysis/string.c
@@ -1,8 +1,46 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -Wno-null-dereference -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -Wno-null-dereference -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DVARIANT -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -Wno-null-dereference -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -DVARIANT -analyzer-checker=alpha.security.taint,core,unix.cstring,unix.Malloc,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -Wno-null-dereference -verify -analyzer-config eagerly-assume=false %s
-// RUN: %clang_analyze_cc1 -DSUPPRESS_OUT_OF_BOUND -analyzer-checker=core,unix.cstring,unix.Malloc,alpha.unix.cstring.BufferOverlap,alpha.unix.cstring.NotNullTerminated,debug.ExprInspection -analyzer-store=region -Wno-null-dereference -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference -DUSE_BUILTINS \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference -DVARIANT \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference \
+// RUN:   -DUSE_BUILTINS -DVARIANT \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
+//
+// RUN: %clang_analyze_cc1 -verify %s -Wno-null-dereference \
+// RUN:   -DSUPPRESS_OUT_OF_BOUND \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=unix.cstring \
+// RUN:   -analyzer-checker=unix.Malloc \
+// RUN:   -analyzer-checker=alpha.unix.cstring.BufferOverlap \
+// RUN:   -analyzer-checker=alpha.unix.cstring.NotNullTerminated \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false
 
 //===----------------------------------------------------------------------===
 // Declarations

From 6aebd8394a2bb9e514d0eaf8c8124b79974a2ba8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 23 May 2019 16:01:59 +0000
Subject: [PATCH 0042/1176] [Object] object::ELFObjectFile::symbol_begin():
 skip symbol index 0

For clients iterating the symbol table, none expects to handle index 0
(STN_UNDEF). Skip it to improve consistency with other binary formats.
Clients that need STN_UNDEF (e.g. lld) can use
getSectionContentsAsArray(). A test will be added in D62148.

Reviewed By: mtrent

Differential Revision: https://reviews.llvm.org/D62296

llvm-svn: 361506
---
 llvm/include/llvm/Object/ELFObjectFile.h | 4 +++-
 llvm/tools/llvm-objdump/llvm-objdump.cpp | 5 -----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 0aa6c935a8354..ed54ad02ccff7 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -982,7 +982,9 @@ ELFObjectFile<ELFT>::ELFObjectFile(ELFObjectFile<ELFT> &&Other)
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const {
-  DataRefImpl Sym = toDRI(DotSymtabSec, 0);
+  DataRefImpl Sym =
+      toDRI(DotSymtabSec,
+            DotSymtabSec && DotSymtabSec->sh_size >= sizeof(Elf_Sym) ? 1 : 0);
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 318e0ade9425c..1312ea8bbd1ba 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1643,11 +1643,6 @@ void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
 
   const StringRef FileName = O->getFileName();
   for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
-    // Skip printing the special zero symbol when dumping an ELF file.
-    // This makes the output consistent with the GNU objdump.
-    if (I == O->symbol_begin() && isa<ELFObjectFileBase>(O))
-      continue;
-
     const SymbolRef &Symbol = *I;
     uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName,
                                      ArchitectureName);

From 000228183bf17a0f64afccdda35867553c9b75f6 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Thu, 23 May 2019 16:05:21 +0000
Subject: [PATCH 0043/1176] Ensure builtins use the target default Calling
 Convention

r355317 changed builtins/allocation functions to use the default calling
convention in order to support platforms that use non-cdecl calling
conventions by default.

However the default calling convention is overridable on Windows 32 bit
implementations with some of the /G options. The intent is to permit the
user to set the calling convention of normal functions, however it
should NOT apply to builtins and C++ allocation functions.

This patch ensures that the builtin/allocation functions always use the
Target specific Calling Convention, ignoring the user overridden version
of said default.

llvm-svn: 361507
---
 clang/include/clang/AST/ASTContext.h          |  3 +-
 clang/lib/AST/ASTContext.cpp                  | 57 ++++++++++---------
 clang/lib/Sema/SemaExprCXX.cpp                |  2 +-
 .../test/CodeGenCXX/builtin-calling-conv.cpp  | 11 ++++
 4 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 5cb4a82ced198..6856cb6585f82 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2395,7 +2395,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
 
   /// Retrieves the default calling convention for the current target.
   CallingConv getDefaultCallingConvention(bool IsVariadic,
-                                          bool IsCXXMethod) const;
+                                          bool IsCXXMethod,
+                                          bool IsBuiltin = false) const;
 
   /// Retrieves the "canonical" template name that refers to a
   /// given template.
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 71745042fd890..c3a09723ef189 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -9627,8 +9627,8 @@ QualType ASTContext::GetBuiltinType(unsigned Id,
 
   bool Variadic = (TypeStr[0] == '.');
 
-  FunctionType::ExtInfo EI(
-      getDefaultCallingConvention(Variadic, /*IsCXXMethod=*/false));
+  FunctionType::ExtInfo EI(getDefaultCallingConvention(
+      Variadic, /*IsCXXMethod=*/false, /*IsBuiltin=*/true));
   if (BuiltinInfo.isNoReturn(Id)) EI = EI.withNoReturn(true);
 
 
@@ -10005,34 +10005,39 @@ void ASTContext::forEachMultiversionedFunctionVersion(
 }
 
 CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
-                                                    bool IsCXXMethod) const {
+                                                    bool IsCXXMethod,
+                                                    bool IsBuiltin) const {
   // Pass through to the C++ ABI object
   if (IsCXXMethod)
     return ABI->getDefaultMethodCallConv(IsVariadic);
 
-  switch (LangOpts.getDefaultCallingConv()) {
-  case LangOptions::DCC_None:
-    break;
-  case LangOptions::DCC_CDecl:
-    return CC_C;
-  case LangOptions::DCC_FastCall:
-    if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
-      return CC_X86FastCall;
-    break;
-  case LangOptions::DCC_StdCall:
-    if (!IsVariadic)
-      return CC_X86StdCall;
-    break;
-  case LangOptions::DCC_VectorCall:
-    // __vectorcall cannot be applied to variadic functions.
-    if (!IsVariadic)
-      return CC_X86VectorCall;
-    break;
-  case LangOptions::DCC_RegCall:
-    // __regcall cannot be applied to variadic functions.
-    if (!IsVariadic)
-      return CC_X86RegCall;
-    break;
+  // Builtins ignore user-specified default calling convention and remain the
+  // Target's default calling convention.
+  if (!IsBuiltin) {
+    switch (LangOpts.getDefaultCallingConv()) {
+    case LangOptions::DCC_None:
+      break;
+    case LangOptions::DCC_CDecl:
+      return CC_C;
+    case LangOptions::DCC_FastCall:
+      if (getTargetInfo().hasFeature("sse2") && !IsVariadic)
+        return CC_X86FastCall;
+      break;
+    case LangOptions::DCC_StdCall:
+      if (!IsVariadic)
+        return CC_X86StdCall;
+      break;
+    case LangOptions::DCC_VectorCall:
+      // __vectorcall cannot be applied to variadic functions.
+      if (!IsVariadic)
+        return CC_X86VectorCall;
+      break;
+    case LangOptions::DCC_RegCall:
+      // __regcall cannot be applied to variadic functions.
+      if (!IsVariadic)
+        return CC_X86RegCall;
+      break;
+    }
   }
   return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown);
 }
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 87dc3a9a54a9a..455a71bd0ac0a 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -2816,7 +2816,7 @@ void Sema::DeclareGlobalAllocationFunction(DeclarationName Name,
   }
 
   FunctionProtoType::ExtProtoInfo EPI(Context.getDefaultCallingConvention(
-      /*IsVariadic=*/false, /*IsCXXMethod=*/false));
+      /*IsVariadic=*/false, /*IsCXXMethod=*/false, /*IsBuiltin=*/true));
 
   QualType BadAllocType;
   bool HasBadAllocExceptionSpec
diff --git a/clang/test/CodeGenCXX/builtin-calling-conv.cpp b/clang/test/CodeGenCXX/builtin-calling-conv.cpp
index 6fdeca0d2c473..f7759e3e8cad6 100644
--- a/clang/test/CodeGenCXX/builtin-calling-conv.cpp
+++ b/clang/test/CodeGenCXX/builtin-calling-conv.cpp
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple spir-unknown-unknown -DREDECL -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR
 // RUN: %clang_cc1 -triple x86_64-linux-pc -emit-llvm %s -o - | FileCheck %s -check-prefix LINUX
 // RUN: %clang_cc1 -triple spir-unknown-unknown -DSPIR -emit-llvm %s -o - | FileCheck %s -check-prefix SPIR
+// RUN: %clang_cc1 -triple i386-windows-pc -fdefault-calling-conv=stdcall -emit-llvm %s -o - | FileCheck %s -check-prefix WIN32
 
 #ifdef REDECL
 namespace std {
@@ -40,3 +41,13 @@ void user() {
 // SPIR: declare spir_func noalias i8* @_Znwj(i32)
 // SPIR: declare spir_func float @atan2f(float, float)
 // SPIR: declare spir_func void @_Z3foov()
+
+// Note: Windows /G options should not change the platform default calling
+// convention of builtins.
+// WIN32: define dso_local x86_stdcallcc void @"?user@@YGXXZ"()
+// WIN32: call i8* @"??2@YAPAXI@Z"
+// WIN32: call float @atan2f
+// WIN32: call x86_stdcallcc void @"?foo@@YGXXZ"
+// WIN32: declare dso_local noalias i8* @"??2@YAPAXI@Z"(
+// WIN32: declare dso_local float @atan2f(float, float)
+// WIN32: declare dso_local x86_stdcallcc void @"?foo@@YGXXZ"()

From 87226a720293281f49a312fe8342d1fdd0aa38ca Mon Sep 17 00:00:00 2001
From: Shoaib Meenai <smeenai@fb.com>
Date: Thu, 23 May 2019 16:29:09 +0000
Subject: [PATCH 0044/1176] [AsmPrinter] Treat a narrowing PtrToInt like Trunc

When printing assembly for PtrToInt, AsmPrinter::lowerConstant
incorrectly assumed that if PtrToInt was not converting to an
int with exactly the same number of bits, it must be widening
to a larger int. But this isn't necessarily true; PtrToInt can
also shrink the size, which is useful when you want to produce
a known 32-bit pointer on a 64-bit platform (on x86_64 ELF
this yields a R_X86_64_32 relocation).

The old behavior of falling through to the widening case for a
narrowing PtrToInt yields bogus assembly code like this, which
fails to assemble because the no-op bit and it accidentally
creates is not a valid relocation:

```
        .long   a&-1
```

The fix is to treat a narrowing PtrToInt exactly the same as
it already treats Trunc: just emit the expression and let
the assembler deal with truncating it in the appropriate way.

Patch by Mat Hostetter <mjh@fb.com>.

Differential Revision: https://reviews.llvm.org/D61325

llvm-svn: 361508
---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 5 ++++-
 llvm/test/CodeGen/X86/ptrtoint-narrow.ll   | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/ptrtoint-narrow.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b57eac3d72e97..bf7776b1dc000 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2231,7 +2231,10 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
 
     // We can emit the pointer value into this slot if the slot is an
     // integer slot equal to the size of the pointer.
-    if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+    //
+    // If the pointer is larger than the resultant integer, then
+    // as with Trunc just depend on the assembler to truncate it.
+    if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
       return OpExpr;
 
     // Otherwise the pointer is smaller than the resultant integer, mask off
diff --git a/llvm/test/CodeGen/X86/ptrtoint-narrow.ll b/llvm/test/CodeGen/X86/ptrtoint-narrow.ll
new file mode 100644
index 0000000000000..c34e7b1ec51de
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ptrtoint-narrow.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+@ptr = external global i8, align 1
+@ref = constant i32 ptrtoint (i8* @ptr to i32), align 4
+
+; CHECK: .long  ptr{{$}}

From 27b3b5d952c5981bb850832c86c797616ece8dd6 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Thu, 23 May 2019 16:32:19 +0000
Subject: [PATCH 0045/1176] [MCA] Add the ability to compute critical register
 dependency of an instruction.

This patch adds the methods `getCriticalRegDep()` and `computeCriticalRegDep()` to
class InstructionBase.
The goal is to allow users to obtain information about the critical register
dependency that most affects the latency of an instruction.

These methods are currently unused. However, the long term plan is to use them
in order to allow the computation of a critical-path as part of the bottleneck
analysis. So, this is yet another step towards fixing PR37494.

llvm-svn: 361509
---
 llvm/include/llvm/MCA/Instruction.h | 43 ++++++++++++++++++++---------
 llvm/lib/MCA/Instruction.cpp        | 34 ++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index a7a47fd3645ed..c4d0f6aace35a 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -80,6 +80,16 @@ struct ReadDescriptor {
 
 class ReadState;
 
+/// Longest register dependency.
+///
+/// Used internally by WriteState/ReadState/InstructionBase to help with the
+/// computation of the longest register dependency for an instruction.
+struct CriticalRegDep {
+  unsigned IID;
+  unsigned RegID;
+  unsigned Cycles;
+};
+
 /// Tracks uses of a register definition (e.g. register write).
 ///
 /// Each implicit/explicit register write is associated with an instance of
@@ -123,9 +133,11 @@ class WriteState {
 
   // A partial write that is in a false dependency with this write.
   WriteState *PartialWrite;
-
   unsigned DependentWriteCyclesLeft;
 
+  // Critical register dependency for this write.
+  CriticalRegDep CRD;
+
   // A list of dependent reads. Users is a set of dependent
   // reads. A dependent read is added to the set only if CyclesLeft
   // is "unknown". As soon as CyclesLeft is 'known', each user in the set
@@ -140,7 +152,7 @@ class WriteState {
       : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
         ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
         IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
-        DependentWriteCyclesLeft(0) {}
+        DependentWriteCyclesLeft(0), CRD() {}
 
   WriteState(const WriteState &Other) = default;
   WriteState &operator=(const WriteState &Other) = default;
@@ -150,7 +162,11 @@ class WriteState {
   unsigned getRegisterID() const { return RegisterID; }
   unsigned getRegisterFileID() const { return PRFID; }
   unsigned getLatency() const { return WD->Latency; }
+  unsigned getDependentWriteCyclesLeft() const {
+    return DependentWriteCyclesLeft;
+  }
   const WriteState *getDependentWrite() const { return DependentWrite; }
+  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
 
   // This method adds Use to the set of data dependent reads. IID is the
   // instruction identifier associated with this write. ReadAdvance is the
@@ -162,10 +178,6 @@ class WriteState {
   // write. IID is the instruction identifier associated with this write.
   void addUser(unsigned IID, WriteState *Use);
 
-  unsigned getDependentWriteCyclesLeft() const {
-    return DependentWriteCyclesLeft;
-  }
-
   unsigned getNumUsers() const {
     unsigned NumUsers = Users.size();
     if (PartialWrite)
@@ -189,11 +201,7 @@ class WriteState {
   }
 
   void setDependentWrite(const WriteState *Other) { DependentWrite = Other; }
-  void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
-    DependentWriteCyclesLeft = Cycles;
-    DependentWrite = nullptr;
-  }
-
+  void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
   void setWriteZero() { WritesZero = true; }
   void setEliminated() {
     assert(Users.empty() && "Write is in an inconsistent state.");
@@ -235,6 +243,8 @@ class ReadState {
   // dependent writes (i.e. field DependentWrite) is zero, this value is
   // propagated to field CyclesLeft.
   unsigned TotalCycles;
+  // Longest register dependency.
+  CriticalRegDep CRD;
   // This field is set to true only if there are no dependent writes, and
   // there are no `CyclesLeft' to wait.
   bool IsReady;
@@ -246,13 +256,14 @@ class ReadState {
 public:
   ReadState(const ReadDescriptor &Desc, unsigned RegID)
       : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
-        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
-        IsZero(false), IndependentFromDef(false) {}
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(),
+        IsReady(true), IsZero(false), IndependentFromDef(false) {}
 
   const ReadDescriptor &getDescriptor() const { return *RD; }
   unsigned getSchedClass() const { return RD->SchedClassID; }
   unsigned getRegisterID() const { return RegisterID; }
   unsigned getRegisterFileID() const { return PRFID; }
+  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
 
   bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
   bool isReady() const { return IsReady; }
@@ -394,6 +405,9 @@ class InstructionBase {
   // One entry per each implicit and explicit register use.
   SmallVector<ReadState, 4> Uses;
 
+  // Critical register dependency.
+  CriticalRegDep CRD;
+
 public:
   InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
 
@@ -405,6 +419,9 @@ class InstructionBase {
 
   unsigned getLatency() const { return Desc.MaxLatency; }
 
+  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
+  const CriticalRegDep &computeCriticalRegDep();
+
   bool hasDependentUsers() const {
     return any_of(Defs,
                   [](const WriteState &Def) { return Def.getNumUsers() > 0; });
diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
index 58f0250313784..5e2fb771e4f5f 100644
--- a/llvm/lib/MCA/Instruction.cpp
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -18,6 +18,14 @@
 namespace llvm {
 namespace mca {
 
+void WriteState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
+  CRD.IID = IID;
+  CRD.RegID = RegID;
+  CRD.Cycles = Cycles;
+  DependentWriteCyclesLeft = Cycles;
+  DependentWrite = nullptr;
+}
+
 void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
   assert(DependentWrites);
   assert(CyclesLeft == UNKNOWN_CYCLES);
@@ -28,7 +36,12 @@ void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
   // The HW is forced to do some extra bookkeeping to track of all the
   // dependent writes, and implement a merging scheme for the partial writes.
   --DependentWrites;
-  TotalCycles = std::max(TotalCycles, Cycles);
+  if (TotalCycles < Cycles) {
+    CRD.IID = IID;
+    CRD.RegID = RegID;
+    CRD.Cycles = Cycles;
+    TotalCycles = Cycles;
+  }
 
   if (!DependentWrites) {
     CyclesLeft = TotalCycles;
@@ -121,6 +134,25 @@ void WriteRef::dump() const {
 }
 #endif
 
+const CriticalRegDep &InstructionBase::computeCriticalRegDep() {
+  if (CRD.Cycles || (Defs.empty() && Uses.empty()))
+    return CRD;
+  unsigned MaxLatency = 0;
+  for (const WriteState &WS : Defs) {
+    const CriticalRegDep &WriteCRD = WS.getCriticalRegDep();
+    if (WriteCRD.Cycles > MaxLatency)
+      CRD = WriteCRD;
+  }
+
+  for (const ReadState &RS : Uses) {
+    const CriticalRegDep &ReadCRD = RS.getCriticalRegDep();
+    if (ReadCRD.Cycles > MaxLatency)
+      CRD = ReadCRD;
+  }
+
+  return CRD;
+}
+
 void Instruction::dispatch(unsigned RCUToken) {
   assert(Stage == IS_INVALID);
   Stage = IS_DISPATCHED;

From fd11a5f47d00dd6176938a3767fd902742617a1e Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Thu, 23 May 2019 16:39:26 +0000
Subject: [PATCH 0046/1176] [CodeComplete] Only show lambda completions if
 patterns are requested

This is a trivial follow-up to r361461, so sending without review.

llvm-svn: 361510
---
 clang/lib/Sema/SemaCodeComplete.cpp   | 2 ++
 clang/test/CodeCompletion/lambdas.cpp | 9 +++++++++
 2 files changed, 11 insertions(+)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index e6093141ab76c..e6c0b68b8d5fd 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -4134,6 +4134,8 @@ static const FunctionProtoType *TryDeconstructFunctionLike(QualType T) {
 static void AddLambdaCompletion(ResultBuilder &Results,
                                 llvm::ArrayRef<QualType> Parameters,
                                 const LangOptions &LangOpts) {
+  if (!Results.includeCodePatterns())
+    return;
   CodeCompletionBuilder Completion(Results.getAllocator(),
                                    Results.getCodeCompletionTUInfo());
   // [](<parameters>) {}
diff --git a/clang/test/CodeCompletion/lambdas.cpp b/clang/test/CodeCompletion/lambdas.cpp
index 3467cec0e3bdc..1ab804ad818e6 100644
--- a/clang/test/CodeCompletion/lambdas.cpp
+++ b/clang/test/CodeCompletion/lambdas.cpp
@@ -51,3 +51,12 @@ void test4() {
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:50:35 %s -o - | FileCheck -check-prefix=CHECK-6 %s
   // CHECK-6-NOT: COMPLETION: Pattern : [<#=
 }
+
+void test5() {
+  // Completions are only added when -code-completion-patterns are enabled.
+  function<void()> b = {};
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:57:24 %s -o - | FileCheck -check-prefix=CHECK-7 %s
+  // CHECK-7: COMPLETION: Pattern : [<#=
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:57:24 %s -o - | FileCheck -check-prefix=CHECK-8 %s
+  // CHECK-8-NOT: COMPLETION: Pattern : [<#=
+}

From 346758407e1dabb0d68af6bae7fba131b74e4e54 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Thu, 23 May 2019 16:48:47 +0000
Subject: [PATCH 0047/1176] [Index] Fix reported references in presence of
 template type aliases

Summary: See the added test for an example.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62303

llvm-svn: 361511
---
 .../clangd/unittests/XRefsTests.cpp           | 11 +++++
 clang/lib/Index/IndexTypeSourceInfo.cpp       | 44 ++++++++++++-------
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 77fa042c5277b..2badcffd04c87 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -497,6 +497,17 @@ TEST(LocateSymbol, Ambiguous) {
               ElementsAre(Sym("Foo"), Sym("Foo")));
 }
 
+TEST(LocateSymbol, TemplateTypedefs) {
+  auto T = Annotations(R"cpp(
+    template <class T> struct function {};
+    template <class T> using callback = function<T()>;
+
+    c^allback<int> foo;
+  )cpp");
+  auto AST = TestTU::withCode(T.code()).build();
+  EXPECT_THAT(locateSymbolAt(AST, T.point()), ElementsAre(Sym("callback")));
+}
+
 TEST(LocateSymbol, RelPathsInCompileCommand) {
   // The source is in "/clangd-test/src".
   // We build in "/clangd-test/build".
diff --git a/clang/lib/Index/IndexTypeSourceInfo.cpp b/clang/lib/Index/IndexTypeSourceInfo.cpp
index 9f9740b607975..959d5f1197fee 100644
--- a/clang/lib/Index/IndexTypeSourceInfo.cpp
+++ b/clang/lib/Index/IndexTypeSourceInfo.cpp
@@ -133,29 +133,41 @@ class TypeIndexer : public RecursiveASTVisitor<TypeIndexer> {
     return true;
   }
 
-  template<typename TypeLocType>
-  bool HandleTemplateSpecializationTypeLoc(TypeLocType TL) {
-    if (const auto *T = TL.getTypePtr()) {
-      if (CXXRecordDecl *RD = T->getAsCXXRecordDecl()) {
-        if (!RD->isImplicit() || IndexCtx.shouldIndexImplicitInstantiation()) {
-          IndexCtx.handleReference(RD, TL.getTemplateNameLoc(), Parent,
-                                   ParentDC, SymbolRoleSet(), Relations);
-          return true;
-        }
-      }
-      if (const TemplateDecl *D = T->getTemplateName().getAsTemplateDecl())
-        IndexCtx.handleReference(D, TL.getTemplateNameLoc(), Parent, ParentDC,
-                                 SymbolRoleSet(), Relations);
+  void HandleTemplateSpecializationTypeLoc(TemplateName TemplName,
+                                           SourceLocation TemplNameLoc,
+                                           CXXRecordDecl *ResolvedClass,
+                                           bool IsTypeAlias) {
+    // In presence of type aliases, the resolved class was never written in
+    // the code so don't report it.
+    if (!IsTypeAlias && ResolvedClass &&
+        (!ResolvedClass->isImplicit() ||
+         IndexCtx.shouldIndexImplicitInstantiation())) {
+      IndexCtx.handleReference(ResolvedClass, TemplNameLoc, Parent, ParentDC,
+                               SymbolRoleSet(), Relations);
+    } else if (const TemplateDecl *D = TemplName.getAsTemplateDecl()) {
+      IndexCtx.handleReference(D, TemplNameLoc, Parent, ParentDC,
+                               SymbolRoleSet(), Relations);
     }
-    return true;
   }
 
   bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) {
-    return HandleTemplateSpecializationTypeLoc(TL);
+    auto *T = TL.getTypePtr();
+    if (!T)
+      return true;
+    HandleTemplateSpecializationTypeLoc(
+        T->getTemplateName(), TL.getTemplateNameLoc(), T->getAsCXXRecordDecl(),
+        T->isTypeAlias());
+    return true;
   }
 
   bool VisitDeducedTemplateSpecializationTypeLoc(DeducedTemplateSpecializationTypeLoc TL) {
-    return HandleTemplateSpecializationTypeLoc(TL);
+    auto *T = TL.getTypePtr();
+    if (!T)
+      return true;
+    HandleTemplateSpecializationTypeLoc(
+        T->getTemplateName(), TL.getTemplateNameLoc(), T->getAsCXXRecordDecl(),
+        /*IsTypeAlias=*/false);
+    return true;
   }
 
   bool VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {

From 7bbefb13ee521c2385788fa3e2f1aac4c36c313d Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Thu, 23 May 2019 17:03:43 +0000
Subject: [PATCH 0048/1176] Transforms: lower fadd and fsub atomicrmw
 instructions

`fadd` and `fsub` have recently (r351850) been added as `atomicrmw`
operations. This diff adds lowering cases for them to the LowerAtomic
transform.

Patch by Josh Berdine!

llvm-svn: 361512
---
 llvm/lib/Transforms/Scalar/LowerAtomic.cpp    |  6 +++++
 .../Transforms/LowerAtomic/atomic-load.ll     | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
index f39ca23964468..e076424d90425 100644
--- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -86,6 +86,12 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
     Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
                                Orig, Val);
     break;
+  case AtomicRMWInst::FAdd:
+    Res = Builder.CreateFAdd(Orig, Val);
+    break;
+  case AtomicRMWInst::FSub:
+    Res = Builder.CreateFSub(Orig, Val);
+    break;
   }
   Builder.CreateStore(Res, Ptr);
   RMWI->replaceAllUsesWith(Orig);
diff --git a/llvm/test/Transforms/LowerAtomic/atomic-load.ll b/llvm/test/Transforms/LowerAtomic/atomic-load.ll
index e73417f3d407a..f4988c20b6a42 100644
--- a/llvm/test/Transforms/LowerAtomic/atomic-load.ll
+++ b/llvm/test/Transforms/LowerAtomic/atomic-load.ll
@@ -35,3 +35,25 @@ define i8 @min() {
   ret i8 %j
 ; CHECK: ret i8 [[INST]]
 }
+
+define float @fadd() {
+; CHECK-LABEL: @fadd(
+  %i = alloca float
+  %j = atomicrmw fadd float* %i, float 42.0 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: fadd
+; CHECK-NEXT: store
+  ret float %j
+; CHECK: ret float [[INST]]
+}
+
+define float @fsub() {
+; CHECK-LABEL: @fsub(
+  %i = alloca float
+  %j = atomicrmw fsub float* %i, float 42.0 monotonic
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: fsub
+; CHECK-NEXT: store
+  ret float %j
+; CHECK: ret float [[INST]]
+}

From c5ec2a2bc1980e21910ca5c649cc0c3346979fa7 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Thu, 23 May 2019 17:06:46 +0000
Subject: [PATCH 0049/1176] [CMake] Copy C++ headers before configuring
 runtimes build

Summary: On some platforms C++ headers are packaged with the compiler not the sysroot. If you don't copy C++ headers into the build include directory during configuraiton of the outer build the C++ check during the runtime configuration may get inaccurate results.

Reviewers: phosek, compnerd, smeenai, EricWF

Reviewed By: compnerd

Subscribers: EricWF, christof, libcxx-commits, mgorny, llvm-commits

Tags: #llvm, #libc

Differential Revision: https://reviews.llvm.org/D62155

llvm-svn: 361513
---
 libcxx/include/CMakeLists.txt | 30 +++++++++++++++++++-----------
 llvm/runtimes/CMakeLists.txt  | 17 +++++++++++++----
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 9880115c86a4c..6f8f0a59bf034 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -208,6 +208,14 @@ else()
     )
 endif()
 
+# In some build configuraitons (like boostrapping clang), we need to be able to
+# install the libcxx headers before CMake configuraiton for libcxx runs. Making
+# the name of this target configurable allows LLVM/runtimes/CMakeLists.txt to
+# add this subdirectory to the LLVM build to put libcxx's headers in place
+# before libcxx's build configuration is run.
+if (NOT CXX_HEADER_TARGET)
+  set(CXX_HEADER_TARGET cxx-headers)
+endif()
 if(NOT LIBCXX_USING_INSTALLED_LLVM AND LIBCXX_HEADER_DIR)
   set(output_dir ${LIBCXX_HEADER_DIR}/include/c++/v1)
 
@@ -233,18 +241,18 @@ if(NOT LIBCXX_USING_INSTALLED_LLVM AND LIBCXX_HEADER_DIR)
     list(APPEND out_files ${dst})
   endif()
 
-  add_custom_target(cxx-headers ALL DEPENDS ${out_files} ${LIBCXX_CXX_ABI_HEADER_TARGET})
+  add_custom_target(${CXX_HEADER_TARGET} ALL DEPENDS ${out_files} ${LIBCXX_CXX_ABI_HEADER_TARGET})
 else()
-  add_custom_target(cxx-headers)
+  add_custom_target(${CXX_HEADER_TARGET})
 endif()
-set_target_properties(cxx-headers PROPERTIES FOLDER "Misc")
+set_target_properties(${CXX_HEADER_TARGET} PROPERTIES FOLDER "Misc")
 
 if (LIBCXX_INSTALL_HEADERS)
   foreach(file ${files})
     get_filename_component(dir ${file} DIRECTORY)
     install(FILES ${file}
       DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1/${dir}
-      COMPONENT cxx-headers
+      COMPONENT ${CXX_HEADER_TARGET}
       PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
     )
   endforeach()
@@ -255,19 +263,19 @@ if (LIBCXX_INSTALL_HEADERS)
       DESTINATION ${LIBCXX_INSTALL_HEADER_PREFIX}include/c++/v1
       PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
       RENAME __config
-      COMPONENT cxx-headers)
+      COMPONENT ${CXX_HEADER_TARGET})
   endif()
 
   if (NOT CMAKE_CONFIGURATION_TYPES)
-    add_custom_target(install-cxx-headers
-                      DEPENDS cxx-headers ${generated_config_deps}
+    add_custom_target(install-${CXX_HEADER_TARGET}
+                      DEPENDS ${CXX_HEADER_TARGET} ${generated_config_deps}
                       COMMAND "${CMAKE_COMMAND}"
-                              -DCMAKE_INSTALL_COMPONENT=cxx-headers
+                              -DCMAKE_INSTALL_COMPONENT=${CXX_HEADER_TARGET}
                               -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
     # Stripping is a no-op for headers
-    add_custom_target(install-cxx-headers-stripped DEPENDS install-cxx-headers)
+    add_custom_target(install-${CXX_HEADER_TARGET}-stripped DEPENDS install-${CXX_HEADER_TARGET})
 
-    add_custom_target(install-libcxx-headers DEPENDS install-cxx-headers)
-    add_custom_target(install-libcxx-headers-stripped DEPENDS install-cxx-headers-stripped)
+    add_custom_target(install-libcxx-headers DEPENDS install-${CXX_HEADER_TARGET})
+    add_custom_target(install-libcxx-headers-stripped DEPENDS install-${CXX_HEADER_TARGET}-stripped)
   endif()
 endif()
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 285e1fcae1d35..b9436025f5cd1 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -4,8 +4,12 @@
 # should be built with the LLVM toolchain from the build directory. This file is
 # a first step to formalizing runtime build interfaces.
 
-# In the current state this file only works with compiler-rt, other runtimes
-# will work as the runtime build interface standardizes.
+# Setting CMake minimum required version should be at the very top of the file
+# if this is the entry point.
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  cmake_minimum_required(VERSION 3.4.3)
+  project(Runtimes C CXX ASM)
+endif()
 
 # Find all subdirectories containing CMake projects
 file(GLOB entries *)
@@ -205,6 +209,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
 
 else() # if this is included from LLVM's CMake
   include(LLVMExternalProjectUtils)
+  if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR)
+    set(LIBCXX_HEADER_DIR ${LLVM_INCLUDE_DIR}/c++/v1/)
+    set(CXX_HEADER_TARGET runtime-libcxx-headers)
+    add_subdirectory(${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}/include ${CXX_HEADER_TARGET})
+  endif()
 
   if(NOT LLVM_BUILD_RUNTIMES)
     set(EXTRA_ARGS EXCLUDE_FROM_ALL)
@@ -354,7 +363,7 @@ else() # if this is included from LLVM's CMake
 
     llvm_ExternalProject_Add(runtimes
                              ${CMAKE_CURRENT_SOURCE_DIR}
-                             DEPENDS ${ARG_DEPENDS}
+                             DEPENDS ${ARG_DEPENDS} runtime-libcxx-headers
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
@@ -445,7 +454,7 @@ else() # if this is included from LLVM's CMake
 
     llvm_ExternalProject_Add(runtimes-${name}
                              ${CMAKE_CURRENT_SOURCE_DIR}
-                             DEPENDS ${${name}_deps}
+                             DEPENDS ${${name}_deps} runtime-libcxx-headers
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}

From aa7a2c547e2802f15c9346bff18c5d5c585ab882 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Thu, 23 May 2019 17:11:33 +0000
Subject: [PATCH 0050/1176] [LibTooling] Fix dangling references in
 RangeSelector.

Summary:
RangeSelector had a number of cases of capturing a StringRef in a lambda, which
lead to dangling references. This change converts all uses in the API of
`StringRef` to `std::string` to avoid this problem. `std::string` in the API is
a reasonable choice, because the combinators are always storing the string
beyond the life of the combinator construction.

Reviewers: ilya-biryukov, gribozavr

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62328

llvm-svn: 361514
---
 .../clang/Tooling/Refactoring/RangeSelector.h | 18 ++++++-------
 .../lib/Tooling/Refactoring/RangeSelector.cpp | 26 +++++++++----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Tooling/Refactoring/RangeSelector.h b/clang/include/clang/Tooling/Refactoring/RangeSelector.h
index d1d6bc8b6a4a9..2d878b90aa5e8 100644
--- a/clang/include/clang/Tooling/Refactoring/RangeSelector.h
+++ b/clang/include/clang/Tooling/Refactoring/RangeSelector.h
@@ -17,9 +17,9 @@
 
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Basic/SourceLocation.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 #include <functional>
+#include <string>
 
 namespace clang {
 namespace tooling {
@@ -35,19 +35,19 @@ inline RangeSelector charRange(CharSourceRange R) {
 RangeSelector range(RangeSelector Begin, RangeSelector End);
 
 /// Convenience version of \c range where end-points are bound nodes.
-RangeSelector range(StringRef BeginID, StringRef EndID);
+RangeSelector range(std::string BeginID, std::string EndID);
 
 /// Selects a node, including trailing semicolon (for non-expression
 /// statements). \p ID is the node's binding in the match result.
-RangeSelector node(StringRef ID);
+RangeSelector node(std::string ID);
 
 /// Selects a node, including trailing semicolon (always). Useful for selecting
 /// expression statements. \p ID is the node's binding in the match result.
-RangeSelector statement(StringRef ID);
+RangeSelector statement(std::string ID);
 
 /// Given a \c MemberExpr, selects the member token. \p ID is the node's
 /// binding in the match result.
-RangeSelector member(StringRef ID);
+RangeSelector member(std::string ID);
 
 /// Given a node with a "name", (like \c NamedDecl, \c DeclRefExpr or \c
 /// CxxCtorInitializer) selects the name's token.  Only selects the final
@@ -56,19 +56,19 @@ RangeSelector member(StringRef ID);
 /// it selects only `baz`.
 ///
 /// \param ID is the node's binding in the match result.
-RangeSelector name(StringRef ID);
+RangeSelector name(std::string ID);
 
 // Given a \c CallExpr (bound to \p ID), selects the arguments' source text (all
 // source between the call's parentheses).
-RangeSelector callArgs(StringRef ID);
+RangeSelector callArgs(std::string ID);
 
 // Given a \c CompoundStmt (bound to \p ID), selects the source of the
 // statements (all source between the braces).
-RangeSelector statements(StringRef ID);
+RangeSelector statements(std::string ID);
 
 // Given a \c InitListExpr (bound to \p ID), selects the range of the elements
 // (all source between the braces).
-RangeSelector initListElements(StringRef ID);
+RangeSelector initListElements(std::string ID);
 
 /// Selects the range from which `S` was expanded (possibly along with other
 /// source), if `S` is an expansion, and `S` itself, otherwise.  Corresponds to
diff --git a/clang/lib/Tooling/Refactoring/RangeSelector.cpp b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
index 92426db3a5ebe..d5f82d4262bec 100644
--- a/clang/lib/Tooling/Refactoring/RangeSelector.cpp
+++ b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
@@ -104,7 +104,7 @@ static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
   return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
 }
 
-RangeSelector tooling::node(StringRef ID) {
+RangeSelector tooling::node(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
     if (!Node)
@@ -115,7 +115,7 @@ RangeSelector tooling::node(StringRef ID) {
   };
 }
 
-RangeSelector tooling::statement(StringRef ID) {
+RangeSelector tooling::statement(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
     if (!Node)
@@ -143,11 +143,11 @@ RangeSelector tooling::range(RangeSelector Begin, RangeSelector End) {
   };
 }
 
-RangeSelector tooling::range(StringRef BeginID, StringRef EndID) {
-  return tooling::range(node(BeginID), node(EndID));
+RangeSelector tooling::range(std::string BeginID, std::string EndID) {
+  return tooling::range(node(std::move(BeginID)), node(std::move(EndID)));
 }
 
-RangeSelector tooling::member(StringRef ID) {
+RangeSelector tooling::member(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
     if (!Node)
@@ -159,7 +159,7 @@ RangeSelector tooling::member(StringRef ID) {
   };
 }
 
-RangeSelector tooling::name(StringRef ID) {
+RangeSelector tooling::name(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
     if (!N)
@@ -205,7 +205,7 @@ class RelativeSelector {
   std::string ID;
 
 public:
-  RelativeSelector(StringRef ID) : ID(ID) {}
+  RelativeSelector(std::string ID) : ID(std::move(ID)) {}
 
   Expected<CharSourceRange> operator()(const MatchResult &Result) {
     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
@@ -231,8 +231,8 @@ CharSourceRange getStatementsRange(const MatchResult &,
 }
 } // namespace
 
-RangeSelector tooling::statements(StringRef ID) {
-  return RelativeSelector<CompoundStmt, getStatementsRange>(ID);
+RangeSelector tooling::statements(std::string ID) {
+  return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
 }
 
 namespace {
@@ -246,8 +246,8 @@ CharSourceRange getCallArgumentsRange(const MatchResult &Result,
 }
 } // namespace
 
-RangeSelector tooling::callArgs(StringRef ID) {
-  return RelativeSelector<CallExpr, getCallArgumentsRange>(ID);
+RangeSelector tooling::callArgs(std::string ID) {
+  return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
 }
 
 namespace {
@@ -260,8 +260,8 @@ CharSourceRange getElementsRange(const MatchResult &,
 }
 } // namespace
 
-RangeSelector tooling::initListElements(StringRef ID) {
-  return RelativeSelector<InitListExpr, getElementsRange>(ID);
+RangeSelector tooling::initListElements(std::string ID) {
+  return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
 }
 
 RangeSelector tooling::expansion(RangeSelector S) {

From 7b7683d7a6c4b3839629403a85dc0bd5b9a502b6 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomasp@graphcore.ai>
Date: Thu, 23 May 2019 17:19:36 +0000
Subject: [PATCH 0051/1176] [FileCheck] Remove llvm:: prefix

Summary:
Remove all llvm:: prefixes in FileCheck library header and
implementation except for calls to make_unique and make_shared since
both files already use the llvm namespace.

Reviewers: jhenderson, jdenny, probinson, arichardson

Subscribers: hiraditya, arichardson, probinson, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62323

llvm-svn: 361515
---
 llvm/include/llvm/Support/FileCheck.h | 14 ++++----
 llvm/lib/Support/FileCheck.cpp        | 48 +++++++++++++--------------
 2 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h
index 087cc2d8bbd4c..25b3af501687d 100644
--- a/llvm/include/llvm/Support/FileCheck.h
+++ b/llvm/include/llvm/Support/FileCheck.h
@@ -48,7 +48,7 @@ class FileCheckNumericVariable {
   StringRef Name;
 
   /// Value of numeric variable, if defined, or None otherwise.
-  llvm::Optional<uint64_t> Value;
+  Optional<uint64_t> Value;
 
 public:
   /// Constructor for numeric variable \p Name with a known \p Value at parse
@@ -60,7 +60,7 @@ class FileCheckNumericVariable {
   StringRef getName() const { return Name; }
 
   /// \returns value of this numeric variable.
-  llvm::Optional<uint64_t> getValue() const { return Value; }
+  Optional<uint64_t> getValue() const { return Value; }
 
   /// Sets value of this numeric variable if not defined. \returns whether the
   /// variable was already defined.
@@ -96,7 +96,7 @@ class FileCheckNumExpr {
   /// Evaluates the value of this numeric expression, using EvalBinop to
   /// perform the binary operation it consists of. \returns None if the numeric
   /// variable used is undefined, or the expression value otherwise.
-  llvm::Optional<uint64_t> eval() const;
+  Optional<uint64_t> eval() const;
 
   /// \returns the name of the undefined variable used in this expression if
   /// any or an empty string otherwise.
@@ -139,7 +139,7 @@ class FileCheckSubstitution {
 
   /// \returns a string containing the result of the substitution represented
   /// by this class instance or None if substitution failed.
-  virtual llvm::Optional<std::string> getResult() const = 0;
+  virtual Optional<std::string> getResult() const = 0;
 
   /// \returns the name of the variable used in this substitution if undefined,
   /// or an empty string otherwise.
@@ -154,7 +154,7 @@ class FileCheckStringSubstitution : public FileCheckSubstitution {
 
   /// \returns the text that the string variable in this substitution matched
   /// when defined, or None if the variable is undefined.
-  llvm::Optional<std::string> getResult() const override;
+  Optional<std::string> getResult() const override;
 
   /// \returns the name of the string variable used in this substitution if
   /// undefined, or an empty string otherwise.
@@ -174,7 +174,7 @@ class FileCheckNumericSubstitution : public FileCheckSubstitution {
 
   /// \returns a string containing the result of evaluating the numeric
   /// expression in this substitution, or None if evaluation failed.
-  llvm::Optional<std::string> getResult() const override;
+  Optional<std::string> getResult() const override;
 
   /// \returns the name of the numeric variable used in this substitution if
   /// undefined, or an empty string otherwise.
@@ -268,7 +268,7 @@ class FileCheckPatternContext {
 public:
   /// \returns the value of string variable \p VarName or None if no such
   /// variable has been defined.
-  llvm::Optional<StringRef> getPatternVarValue(StringRef VarName);
+  Optional<StringRef> getPatternVarValue(StringRef VarName);
 
   /// Defines string and numeric variables from definitions given on the
   /// command line, passed as a vector of [#]VAR=VAL strings in
diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp
index a2b0f84e2c96f..1263ec5e02840 100644
--- a/llvm/lib/Support/FileCheck.cpp
+++ b/llvm/lib/Support/FileCheck.cpp
@@ -34,15 +34,15 @@ bool FileCheckNumericVariable::setValue(uint64_t NewValue) {
 bool FileCheckNumericVariable::clearValue() {
   if (!Value)
     return true;
-  Value = llvm::None;
+  Value = None;
   return false;
 }
 
-llvm::Optional<uint64_t> FileCheckNumExpr::eval() const {
-  llvm::Optional<uint64_t> LeftOp = this->LeftOp->getValue();
+Optional<uint64_t> FileCheckNumExpr::eval() const {
+  Optional<uint64_t> LeftOp = this->LeftOp->getValue();
   // Variable is undefined.
   if (!LeftOp)
-    return llvm::None;
+    return None;
   return EvalBinop(*LeftOp, RightOp);
 }
 
@@ -52,18 +52,18 @@ StringRef FileCheckNumExpr::getUndefVarName() const {
   return StringRef();
 }
 
-llvm::Optional<std::string> FileCheckNumericSubstitution::getResult() const {
-  llvm::Optional<uint64_t> EvaluatedValue = NumExpr->eval();
+Optional<std::string> FileCheckNumericSubstitution::getResult() const {
+  Optional<uint64_t> EvaluatedValue = NumExpr->eval();
   if (!EvaluatedValue)
-    return llvm::None;
+    return None;
   return utostr(*EvaluatedValue);
 }
 
-llvm::Optional<std::string> FileCheckStringSubstitution::getResult() const {
+Optional<std::string> FileCheckStringSubstitution::getResult() const {
   // Look up the value and escape it so that we can put it into the regex.
-  llvm::Optional<StringRef> VarVal = Context->getPatternVarValue(FromStr);
+  Optional<StringRef> VarVal = Context->getPatternVarValue(FromStr);
   if (!VarVal)
-    return llvm::None;
+    return None;
   return Regex::escape(*VarVal);
 }
 
@@ -472,7 +472,7 @@ size_t FileCheckPattern::match(StringRef Buffer, size_t &MatchLen) const {
     // handled by back-references.
     for (const auto &Substitution : Substitutions) {
       // Substitute and check for failure (e.g. use of undefined variable).
-      llvm::Optional<std::string> Value = Substitution->getResult();
+      Optional<std::string> Value = Substitution->getResult();
       if (!Value)
         return StringRef::npos;
 
@@ -533,7 +533,7 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
     for (const auto &Substitution : Substitutions) {
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
-      llvm::Optional<std::string> MatchedValue = Substitution->getResult();
+      Optional<std::string> MatchedValue = Substitution->getResult();
 
       // Substitution failed or is not known at match time, print the undefined
       // variable it uses.
@@ -625,11 +625,11 @@ void FileCheckPattern::printFuzzyMatch(
   }
 }
 
-llvm::Optional<StringRef>
+Optional<StringRef>
 FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
   auto VarIter = GlobalVariableTable.find(VarName);
   if (VarIter == GlobalVariableTable.end())
-    return llvm::None;
+    return None;
 
   return VarIter->second;
 }
@@ -703,9 +703,8 @@ size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
   return StringRef::npos;
 }
 
-StringRef
-llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
-                                  SmallVectorImpl<char> &OutputBuffer) {
+StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
+                                      SmallVectorImpl<char> &OutputBuffer) {
   OutputBuffer.reserve(MB.getBufferSize());
 
   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
@@ -923,9 +922,8 @@ FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
   return {StringRef(), StringRef()};
 }
 
-bool llvm::FileCheck::ReadCheckFile(
-    SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
-    std::vector<FileCheckString> &CheckStrings) {
+bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+                              std::vector<FileCheckString> &CheckStrings) {
   if (PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM))
     return true;
 
@@ -1499,7 +1497,7 @@ static bool ValidateCheckPrefix(StringRef CheckPrefix) {
   return Validator.match(CheckPrefix);
 }
 
-bool llvm::FileCheck::ValidateCheckPrefixes() {
+bool FileCheck::ValidateCheckPrefixes() {
   StringSet<> PrefixSet;
 
   for (StringRef Prefix : Req.CheckPrefixes) {
@@ -1517,7 +1515,7 @@ bool llvm::FileCheck::ValidateCheckPrefixes() {
   return true;
 }
 
-Regex llvm::FileCheck::buildCheckPrefixRegex() {
+Regex FileCheck::buildCheckPrefixRegex() {
   // I don't think there's a way to specify an initial value for cl::list,
   // so if nothing was specified, add the default
   if (Req.CheckPrefixes.empty())
@@ -1682,9 +1680,9 @@ void FileCheckPatternContext::clearLocalVars() {
     GlobalNumericVariableTable.erase(Var);
 }
 
-bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
-                                 ArrayRef<FileCheckString> CheckStrings,
-                                 std::vector<FileCheckDiag> *Diags) {
+bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
+                           ArrayRef<FileCheckString> CheckStrings,
+                           std::vector<FileCheckDiag> *Diags) {
   bool ChecksFailed = false;
 
   unsigned i = 0, j = 0, e = CheckStrings.size();

From eafe8ef6f2b44baf5a84658caca90c2f9c1849ca Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 23 May 2019 17:26:47 +0000
Subject: [PATCH 0052/1176] [WebAssembly] Add multivalue and tail-call target
 features

Summary:
These features will both be implemented soon, so I thought I would
save time by adding the boilerplate for both of them at the same time.

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62047

llvm-svn: 361516
---
 clang/include/clang/Driver/Options.td         |  4 +++
 clang/lib/Basic/Targets/WebAssembly.cpp       | 26 +++++++++++++++++
 clang/lib/Basic/Targets/WebAssembly.h         |  2 ++
 .../test/Preprocessor/wasm-target-features.c  | 22 +++++++++++++++
 llvm/lib/Target/WebAssembly/WebAssembly.td    | 11 ++++++++
 .../WebAssembly/WebAssemblyInstrInfo.td       | 14 +++++-----
 .../Target/WebAssembly/WebAssemblySubtarget.h |  4 +++
 llvm/test/CodeGen/WebAssembly/multivalue.ll   | 28 +++++++++++++++++++
 llvm/test/CodeGen/WebAssembly/tailcall.ll     | 21 ++++++++++++++
 9 files changed, 125 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/multivalue.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/tailcall.ll

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 26a06a6556a95..68f415fb31d63 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2185,6 +2185,10 @@ def mbulk_memory : Flag<["-"], "mbulk-memory">, Group<m_wasm_Features_Group>;
 def mno_bulk_memory : Flag<["-"], "mno-bulk-memory">, Group<m_wasm_Features_Group>;
 def mmutable_globals : Flag<["-"], "mmutable-globals">, Group<m_wasm_Features_Group>;
 def mno_mutable_globals : Flag<["-"], "mno-mutable-globals">, Group<m_wasm_Features_Group>;
+def mmultivalue : Flag<["-"], "mmultivalue">, Group<m_wasm_Features_Group>;
+def mno_multivalue : Flag<["-"], "mno-multivalue">, Group<m_wasm_Features_Group>;
+def mtail_call : Flag<["-"], "mtail-call">, Group<m_wasm_Features_Group>;
+def mno_tail_call : Flag<["-"], "mno-tail-call">, Group<m_wasm_Features_Group>;
 
 def mamdgpu_debugger_abi : Joined<["-"], "mamdgpu-debugger-abi=">,
   Flags<[HelpHidden]>,
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index 2fceed2ad1f00..b16442b99b625 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -43,6 +43,8 @@ bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
       .Case("bulk-memory", HasBulkMemory)
       .Case("atomics", HasAtomics)
       .Case("mutable-globals", HasMutableGlobals)
+      .Case("multivalue", HasMultivalue)
+      .Case("tail-call", HasTailCall)
       .Default(false);
 }
 
@@ -74,6 +76,10 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__wasm_atomics__");
   if (HasMutableGlobals)
     Builder.defineMacro("__wasm_mutable_globals__");
+  if (HasMultivalue)
+    Builder.defineMacro("__wasm_multivalue__");
+  if (HasTailCall)
+    Builder.defineMacro("__wasm_tail_call__");
 }
 
 void WebAssemblyTargetInfo::setSIMDLevel(llvm::StringMap<bool> &Features,
@@ -116,6 +122,10 @@ bool WebAssemblyTargetInfo::initFeatureMap(
     Features["atomics"] = true;
   if (HasMutableGlobals)
     Features["mutable-globals"] = true;
+  if (HasMultivalue)
+    Features["multivalue"] = true;
+  if (HasTailCall)
+    Features["tail-call"] = true;
 
   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
 }
@@ -187,6 +197,22 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
       HasMutableGlobals = false;
       continue;
     }
+    if (Feature == "+multivalue") {
+      HasMultivalue = true;
+      continue;
+    }
+    if (Feature == "-multivalue") {
+      HasMultivalue = false;
+      continue;
+    }
+    if (Feature == "+tail-call") {
+      HasTailCall = true;
+      continue;
+    }
+    if (Feature == "-tail-call") {
+      HasTailCall = false;
+      continue;
+    }
 
     Diags.Report(diag::err_opt_not_valid_with_opt)
         << Feature << "-target-feature";
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index a0516da286c14..9665156b143f1 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -36,6 +36,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
   bool HasBulkMemory = false;
   bool HasAtomics = false;
   bool HasMutableGlobals = false;
+  bool HasMultivalue = false;
+  bool HasTailCall = false;
 
 public:
   explicit WebAssemblyTargetInfo(const llvm::Triple &T, const TargetOptions &)
diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c
index 2bf94398a1dc3..41681123f2068 100644
--- a/clang/test/Preprocessor/wasm-target-features.c
+++ b/clang/test/Preprocessor/wasm-target-features.c
@@ -79,6 +79,24 @@
 //
 // MUTABLE-GLOBALS:#define __wasm_mutable_globals__ 1{{$}}
 
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mmultivalue \
+// RUN:   | FileCheck %s -check-prefix=MULTIVALUE
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mmultivalue \
+// RUN:   | FileCheck %s -check-prefix=MULTIVALUE
+//
+// MULTIVALUE:#define __wasm_multivalue__ 1{{$}}
+
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mtail-call \
+// RUN:   | FileCheck %s -check-prefix=TAIL-CALL
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mtail-call \
+// RUN:   | FileCheck %s -check-prefix=TAIL-CALL
+//
+// TAIL-CALL:#define __wasm_tail_call__ 1{{$}}
+
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mcpu=mvp \
 // RUN:   | FileCheck %s -check-prefix=MVP
@@ -94,6 +112,8 @@
 // MVP-NOT:#define __wasm_bulk_memory__
 // MVP-NOT:#define __wasm_atomics__
 // MVP-NOT:#define __wasm_mutable_globals__
+// MVP-NOT:#define __wasm_multivalue__
+// MVP-NOT:#define __wasm_tail_call__
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mcpu=bleeding-edge \
@@ -108,6 +128,8 @@
 // BLEEDING-EDGE-DAG:#define __wasm_atomics__ 1{{$}}
 // BLEEDING-EDGE-DAG:#define __wasm_mutable_globals__ 1{{$}}
 // BLEEDING-EDGE-NOT:#define __wasm_unimplemented_simd128__ 1{{$}}
+// BLEEDING-EDGE-NOT:#define __wasm_multivalue__ 1{{$}}
+// BLEEDING-EDGE-NOT:#define __wasm_tail_call__ 1{{$}}
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mcpu=bleeding-edge -mno-simd128 \
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index 813c7e652e4a3..b0b8a9b996a37 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -33,6 +33,7 @@ def FeatureUnimplementedSIMD128 :
 
 def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
                                       "Enable Atomics">;
+
 def FeatureNontrappingFPToInt :
       SubtargetFeature<"nontrapping-fptoint",
                        "HasNontrappingFPToInt", "true",
@@ -43,6 +44,11 @@ def FeatureSignExt :
                        "HasSignExt", "true",
                        "Enable sign extension operators">;
 
+def FeatureTailCall :
+      SubtargetFeature<"tail-call",
+                       "HasTailCall", "true",
+                       "Enable tail call instructions">;
+
 def FeatureExceptionHandling :
       SubtargetFeature<"exception-handling", "HasExceptionHandling", "true",
                        "Enable Wasm exception handling">;
@@ -51,6 +57,11 @@ def FeatureBulkMemory :
       SubtargetFeature<"bulk-memory", "HasBulkMemory", "true",
                        "Enable bulk memory operations">;
 
+def FeatureMultivalue :
+      SubtargetFeature<"multivalue",
+                       "HasMultivalue", "true",
+                       "Enable multivalue blocks, instructions, and functions">;
+
 def FeatureMutableGlobals :
       SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true",
                        "Enable mutable globals">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 40a8f6089c2af..a15bb2cce0bba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -34,6 +34,10 @@ def HasAtomics :
     Predicate<"Subtarget->hasAtomics()">,
     AssemblerPredicate<"FeatureAtomics", "atomics">;
 
+def HasMultivalue :
+    Predicate<"Subtarget->hasMultivalue()">,
+    AssemblerPredicate<"FeatureMultivalue", "multivalue">;
+
 def HasNontrappingFPToInt :
     Predicate<"Subtarget->hasNontrappingFPToInt()">,
     AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">;
@@ -46,18 +50,14 @@ def HasSignExt :
     Predicate<"Subtarget->hasSignExt()">,
     AssemblerPredicate<"FeatureSignExt", "sign-ext">;
 
-def NotHasSignExt :
-    Predicate<"!Subtarget->hasSignExt()">,
-    AssemblerPredicate<"!FeatureSignExt", "sign-ext">;
+def HasTailCall :
+    Predicate<"Subtarget->hasTailCall()">,
+    AssemblerPredicate<"FeatureTailCall", "tail-call">;
 
 def HasExceptionHandling :
     Predicate<"Subtarget->hasExceptionHandling()">,
     AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">;
 
-def NotHasExceptionHandling :
-    Predicate<"!Subtarget->hasExceptionHandling()">,
-    AssemblerPredicate<"!FeatureExceptionHandling", "exception-handling">;
-
 def HasBulkMemory :
     Predicate<"Subtarget->hasBulkMemory()">,
     AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 22e11726f33e7..c5d9cf1eb953e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -44,7 +44,9 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
   bool HasSignExt = false;
   bool HasExceptionHandling = false;
   bool HasBulkMemory = false;
+  bool HasMultivalue = false;
   bool HasMutableGlobals = false;
+  bool HasTailCall = false;
 
   /// String name of used CPU.
   std::string CPUString;
@@ -98,7 +100,9 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
   bool hasSignExt() const { return HasSignExt; }
   bool hasExceptionHandling() const { return HasExceptionHandling; }
   bool hasBulkMemory() const { return HasBulkMemory; }
+  bool hasMultivalue() const { return HasMultivalue; }
   bool hasMutableGlobals() const { return HasMutableGlobals; }
+  bool hasTailCall() const { return HasTailCall; }
 
   /// Parses features string setting specified subtarget options. Definition of
   /// function is auto generated by tblgen.
diff --git a/llvm/test/CodeGen/WebAssembly/multivalue.ll b/llvm/test/CodeGen/WebAssembly/multivalue.ll
new file mode 100644
index 0000000000000..cbf8d4e0a0d0c
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/multivalue.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+multivalue | FileCheck %s
+
+; Test that the multivalue attribute is accepted
+; TODO(tlively): implement multivalue
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+%pair = type { i32, i32 }
+%packed_pair = type <{ i32, i32 }>
+
+; CHECK-LABEL: sret:
+; CHECK-NEXT: sret (i32, i32, i32) -> ()
+define %pair @sret(%pair %p) {
+  ret %pair %p
+}
+
+; CHECK-LABEL: packed_sret:
+; CHECK-NEXT: packed_sret (i32, i32, i32) -> ()
+define %packed_pair @packed_sret(%packed_pair %p) {
+  ret %packed_pair %p
+}
+
+; CHECK-LABEL: .section .custom_section.target_features
+; CHECK-NEXT: .int8 1
+; CHECK-NEXT: .int8 43
+; CHECK-NEXT: .int8 10
+; CHECK-NEXT: .ascii "multivalue"
diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll
new file mode 100644
index 0000000000000..809d46ae4a4a3
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+tail-call | FileCheck %s
+
+; Test that the tail-call attribute is accepted
+; TODO(tlively): implement tail call
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: recursive_tail:
+; CHECK:      i32.call $push[[L0:[0-9]+]]=, recursive_tail{{$}}
+; CHECK-NEXT: return $pop[[L0]]{{$}}
+define i32 @recursive_tail() {
+  %v = tail call i32 @recursive_tail()
+  ret i32 %v
+}
+
+; CHECK-LABEL: .section .custom_section.target_features
+; CHECK-NEXT: .int8 1
+; CHECK-NEXT: .int8 43
+; CHECK-NEXT: .int8 9
+; CHECK-NEXT: .ascii "tail-call"

From 2049e4dd8f61100f88f14db33bd95d197bcbfbbc Mon Sep 17 00:00:00 2001
From: Kit Barton <kbarton@ca.ibm.com>
Date: Thu, 23 May 2019 17:56:35 +0000
Subject: [PATCH 0053/1176]     [LOOPINFO] Extend Loop object to add utilities
 to get the loop bounds, step, induction variable, and guard branch.

    Summary:
    This PR extends the loop object with more utilities to get loop bounds, step, induction variable, and guard branch. There already exists passes which try to obtain the loop induction variable in their own pass, e.g. loop interchange. It would be useful to have a common area to get these information. Moreover, loop fusion (https://reviews.llvm.org/D55851) is planning to use getGuard() to extend the kind of loops it is able to fuse, e.g. rotated loop with non-constant upper bound, which would have a loop guard.

      /// Example:
      /// for (int i = lb; i < ub; i+=step)
      ///   <loop body>
      /// --- pseudo LLVMIR ---
      /// beforeloop:
      ///   guardcmp = (lb < ub)
      ///   if (guardcmp) goto preheader; else goto afterloop
      /// preheader:
      /// loop:
      ///   i1 = phi[{lb, preheader}, {i2, latch}]
      ///   <loop body>
      ///   i2 = i1 + step
      /// latch:
      ///   cmp = (i2 < ub)
      ///   if (cmp) goto loop
      /// exit:
      /// afterloop:
      ///
      /// getBounds
      ///   getInitialIVValue      --> lb
      ///   getStepInst            --> i2 = i1 + step
      ///   getStepValue           --> step
      ///   getFinalIVValue        --> ub
      ///   getCanonicalPredicate  --> '<'
      ///   getDirection           --> Increasing
      /// getGuard             --> if (guardcmp) goto loop; else goto afterloop
      /// getInductionVariable          --> i1
      /// getAuxiliaryInductionVariable --> {i1}
      /// isCanonical                   --> false

    Committed on behalf of @Whitney (Whitney Tsang).

    Reviewers: kbarton, hfinkel, dmgreen, Meinersbur, jdoerfert, syzaara, fhahn

    Reviewed By: kbarton

    Subscribers: tvvikram, bmahjour, etiotto, fhahn, jsji, hiraditya, llvm-commits

    Tags: #llvm

    Differential Revision: https://reviews.llvm.org/D60565

llvm-svn: 361517
---
 llvm/include/llvm/Analysis/LoopInfo.h         | 162 ++++
 llvm/lib/Analysis/LoopInfo.cpp                | 246 +++++
 .../lib/Transforms/Scalar/LoopInterchange.cpp |  29 +-
 llvm/unittests/Analysis/LoopInfoTest.cpp      | 900 ++++++++++++++++++
 4 files changed, 1309 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 6b964cdf9eae5..dd789de493875 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -54,9 +54,12 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
+class InductionDescriptor;
 class MDNode;
 class MemorySSAUpdater;
 class PHINode;
+class PostDominatorTree;
+class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -529,6 +532,165 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
+  /// Below are some utilities to get loop bounds and induction variable, and
+  /// check if a given phinode is an auxiliary induction variable, as well as
+  /// checking if the loop is canonical.
+  ///
+  /// Here is an example:
+  /// \code
+  /// for (int i = lb; i < ub; i+=step)
+  ///   <loop body>
+  /// --- pseudo LLVMIR ---
+  /// beforeloop:
+  ///   guardcmp = (lb < ub)
+  ///   if (guardcmp) goto preheader; else goto afterloop
+  /// preheader:
+  /// loop:
+  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
+  ///   <loop body>
+  ///   i_2 = i_1 + step
+  /// latch:
+  ///   cmp = (i_2 < ub)
+  ///   if (cmp) goto loop
+  /// exit:
+  /// afterloop:
+  /// \endcode
+  ///
+  /// - getBounds
+  ///   - getInitialIVValue      --> lb
+  ///   - getStepInst            --> i_2 = i_1 + step
+  ///   - getStepValue           --> step
+  ///   - getFinalIVValue        --> ub
+  ///   - getCanonicalPredicate  --> '<'
+  ///   - getDirection           --> Increasing
+  ///
+  /// - getInductionVariable            --> i_1
+  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+  /// - isCanonical                     --> false
+  struct LoopBounds {
+    /// Return the LoopBounds object if
+    /// - the given \p IndVar is an induction variable
+    /// - the initial value of the induction variable can be found
+    /// - the step instruction of the induction variable can be found
+    /// - the final value of the induction variable can be found
+    ///
+    /// Else None.
+    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+                                                ScalarEvolution &SE);
+
+    /// Get the initial value of the loop induction variable.
+    Value &getInitialIVValue() const { return InitialIVValue; }
+
+    /// Get the instruction that updates the loop induction variable.
+    Instruction &getStepInst() const { return StepInst; }
+
+    /// Get the step that the loop induction variable gets updated by in each
+    /// loop iteration. Return nullptr if not found.
+    Value *getStepValue() const { return StepValue; }
+
+    /// Get the final value of the loop induction variable.
+    Value &getFinalIVValue() const { return FinalIVValue; }
+
+    /// Return the canonical predicate for the latch compare instruction, if
+    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+    ///
+    /// A predicate is considered as canonical if requirements below are all
+    /// satisfied:
+    /// 1. The first successor of the latch branch is the loop header
+    ///    If not, inverse the predicate.
+    /// 2. One of the operands of the latch comparison is StepInst
+    ///    If not, and
+    ///    - if the current calcuated predicate is not ne or eq, flip the
+    ///      predicate.
+    ///    - else if the loop is increasing, return slt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///    - else if the loop is decreasing, return sgt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///
+    /// Here is an example when both (1) and (2) are not satisfied:
+    /// \code
+    /// loop.header:
+    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+    ///  %inc = add %iv, %step
+    ///  %cmp = slt %iv, %finaliv
+    ///  br %cmp, %loop.exit, %loop.header
+    /// loop.exit:
+    /// \endcode
+    /// - The second successor of the latch branch is the loop header instead
+    ///   of the first successor (slt -> sge)
+    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+    ///   instead of the StepInst (%inc) (sge -> sgt)
+    ///
+    /// The predicate would be sgt if both (1) and (2) are satisfied.
+    /// getCanonicalPredicate() returns sgt for this example.
+    /// Note: The IR is not changed.
+    ICmpInst::Predicate getCanonicalPredicate() const;
+
+    /// An enum for the direction of the loop
+    /// - for (int i = 0; i < ub; ++i)  --> Increasing
+    /// - for (int i = ub; i > 0; --i)  --> Descresing
+    /// - for (int i = x; i != y; i+=z) --> Unknown
+    enum class Direction { Increasing, Decreasing, Unknown };
+
+    /// Get the direction of the loop.
+    Direction getDirection() const;
+
+  private:
+    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+               ScalarEvolution &SE)
+        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+          FinalIVValue(F), SE(SE) {}
+
+    const Loop &L;
+
+    // The initial value of the loop induction variable
+    Value &InitialIVValue;
+
+    // The instruction that updates the loop induction variable
+    Instruction &StepInst;
+
+    // The value that the loop induction variable gets updated by in each loop
+    // iteration
+    Value *StepValue;
+
+    // The final value of the loop induction variable
+    Value &FinalIVValue;
+
+    ScalarEvolution &SE;
+  };
+
+  /// Return the struct LoopBounds collected if all struct members are found,
+  /// else None.
+  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+  /// Return the loop induction variable if found, else return nullptr.
+  /// An instruction is considered as the loop induction variable if
+  /// - it is an induction variable of the loop; and
+  /// - it is used to determine the condition of the branch in the loop latch
+  ///
+  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+  /// zero and increments by one each time through the loop (but it can be).
+  PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+  /// Get the loop induction descriptor for the loop induction variable. Return
+  /// true if the loop induction variable is found.
+  bool getInductionDescriptor(ScalarEvolution &SE,
+                              InductionDescriptor &IndDesc) const;
+
+  /// Return true if the given PHINode \p AuxIndVar is
+  /// - in the loop header
+  /// - not used outside of the loop
+  /// - incremented by a loop invariant step for each loop iteration
+  /// - step instruction opcode should be add or sub
+  /// Note: auxiliary induction variable is not required to be used in the
+  ///       conditional branch in the loop latch. (but it can be)
+  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                    ScalarEvolution &SE) const;
+
+  /// Return true if the loop induction variable starts at zero and increments
+  /// by one each time through the loop.
+  bool isCanonical(ScalarEvolution &SE) const;
+
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index aa933d98f249b..50e08e994876a 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,10 +17,13 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -164,6 +167,249 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
+/// Return true if V1 and V2 have the same value ignoring bit width.
+static bool isEqualIgnoreBitwidth(Value &V1, Value &V2, ScalarEvolution &SE) {
+  const SCEV *S1 = SE.getSCEV(&V1);
+  const SCEV *S2 = SE.getSCEV(&V2);
+  Type *WiderType = SE.getWiderType(S1->getType(), S2->getType());
+  S1 = SE.getNoopOrAnyExtend(S1, WiderType);
+  S2 = SE.getNoopOrAnyExtend(S2, WiderType);
+  return SE.getMinusSCEV(S1, S2)->isZero();
+}
+
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+  if (BasicBlock *Latch = L.getLoopLatch())
+    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+      if (BI->isConditional())
+        return dyn_cast<ICmpInst>(BI->getCondition());
+
+  return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+                               const Instruction &StepInst) {
+  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+  if (!LatchCmpInst)
+    return nullptr;
+
+  Value *Op0 = LatchCmpInst->getOperand(0);
+  Value *Op1 = LatchCmpInst->getOperand(1);
+  if (Op0 == &IndVar || Op0 == &StepInst)
+    return Op1;
+
+  if (Op1 == &IndVar || Op1 == &StepInst)
+    return Op0;
+
+  return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+                                                       PHINode &IndVar,
+                                                       ScalarEvolution &SE) {
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+    return None;
+
+  Value *InitialIVValue = IndDesc.getStartValue();
+  Instruction *StepInst = IndDesc.getInductionBinOp();
+  if (!InitialIVValue || !StepInst)
+    return None;
+
+  const SCEV *Step = IndDesc.getStep();
+  Value *StepInstOp1 = StepInst->getOperand(1);
+  Value *StepInstOp0 = StepInst->getOperand(0);
+  Value *StepValue = nullptr;
+  if (SE.getSCEV(StepInstOp1) == Step)
+    StepValue = StepInstOp1;
+  else if (SE.getSCEV(StepInstOp0) == Step)
+    StepValue = StepInstOp0;
+
+  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+  if (!FinalIVValue)
+    return None;
+
+  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+                    SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+  BasicBlock *Latch = L.getLoopLatch();
+  assert(Latch && "Expecting valid latch");
+
+  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+  assert(LatchCmpInst &&
+         "Expecting the latch compare instruction to be a CmpInst");
+
+  // Need to inverse the predicate when first successor is not the loop
+  // header
+  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+                                 ? LatchCmpInst->getPredicate()
+                                 : LatchCmpInst->getInversePredicate();
+
+  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+
+  // Need to flip strictness of the predicate when the latch compare instruction
+  // is not using StepInst
+  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+      LatchCmpInst->getOperand(1) == &getStepInst())
+    return Pred;
+
+  // Cannot flip strictness of NE and EQ
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+  Direction D = getDirection();
+  if (D == Direction::Increasing)
+    return ICmpInst::ICMP_SLT;
+
+  if (D == Direction::Decreasing)
+    return ICmpInst::ICMP_SGT;
+
+  // If cannot determine the direction, then unable to find the canonical
+  // predicate
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+  if (const SCEVAddRecExpr *StepAddRecExpr =
+          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+      if (SE.isKnownPositive(StepRecur))
+        return Direction::Increasing;
+      if (SE.isKnownNegative(StepRecur))
+        return Direction::Decreasing;
+    }
+
+  return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return LoopBounds::getBounds(*this, *IndVar, SE);
+
+  return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+  if (!isLoopSimplifyForm())
+    return nullptr;
+
+  BasicBlock *Header = getHeader();
+  assert(Header && "Expected a valid loop header");
+  BasicBlock *Latch = getLoopLatch();
+  assert(Latch && "Expected a valid loop latch");
+  ICmpInst *CmpInst = getLatchCmpInst(*this);
+  if (!CmpInst)
+    return nullptr;
+
+  // case 1:
+  // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+  // StepInst = IndVar + step
+  // cmp = StepInst < FinalValue
+  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+  // Loop over all of the PHI nodes in loop header, store the PHI node that has
+  // incoming value from latch equals to the StepInst
+  BinaryOperator *StepInst = nullptr;
+  PHINode *IndVar = nullptr;
+  for (PHINode &PN : Header->phis()) {
+    Value *IncomingValue = PN.getIncomingValueForBlock(Latch);
+    assert(IncomingValue && "Expecting valid incoming value from latch");
+    if (IncomingValue == LatchCmpOp0 || IncomingValue == LatchCmpOp1) {
+      IndVar = &PN;
+      StepInst = dyn_cast<BinaryOperator>(IncomingValue);
+      if (StepInst)
+        if (isEqualIgnoreBitwidth(*StepInst->getOperand(0), *IndVar, SE) ||
+            isEqualIgnoreBitwidth(*StepInst->getOperand(1), *IndVar, SE))
+          return IndVar;
+    }
+  }
+
+  // case 2:
+  // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+  // StepInst = IndVar + step
+  // cmp = IndVar < FinalValue
+  for (Value *Op : CmpInst->operands()) {
+    PHINode *IndVar = dyn_cast<PHINode>(Op);
+    if (!IndVar)
+      continue;
+
+    if (IndVar->getParent() != Header)
+      continue;
+
+    Value *IncomingValue = IndVar->getIncomingValueForBlock(Latch);
+    assert(IncomingValue && "Expecting valid incoming value from latch");
+    StepInst = dyn_cast<BinaryOperator>(IncomingValue);
+    if (StepInst)
+      if (StepInst->getOperand(0) == IndVar ||
+          StepInst->getOperand(1) == IndVar)
+        return IndVar;
+  }
+
+  return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+                                  InductionDescriptor &IndDesc) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+  return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                        ScalarEvolution &SE) const {
+  // Located in the loop header
+  BasicBlock *Header = getHeader();
+  if (AuxIndVar.getParent() != Header)
+    return false;
+
+  // No uses outside of the loop
+  for (User *U : AuxIndVar.users())
+    if (const Instruction *I = dyn_cast<Instruction>(U))
+      if (!contains(I))
+        return false;
+
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+    return false;
+
+  // The step instruction opcode should be add or sub.
+  if (IndDesc.getInductionOpcode() != Instruction::Add &&
+      IndDesc.getInductionOpcode() != Instruction::Sub)
+    return false;
+
+  // Incremented by a loop invariant step for each loop iteration
+  return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+  InductionDescriptor IndDesc;
+  if (!getInductionDescriptor(SE, IndDesc))
+    return false;
+
+  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+  if (!Init || !Init->isZero())
+    return false;
+
+  if (IndDesc.getInductionOpcode() != Instruction::Add)
+    return false;
+
+  ConstantInt *Step = IndDesc.getConstIntStepValue();
+  if (!Step || !Step->isOne())
+    return false;
+
+  return true;
+}
+
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index bec5af584f438..ad7113cb0e9a0 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -292,33 +292,6 @@ static LoopVector populateWorklist(Loop &L) {
   return LoopList;
 }
 
-static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
-  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
-  if (InnerIndexVar)
-    return InnerIndexVar;
-  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
-    return nullptr;
-  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PhiVar = cast<PHINode>(I);
-    Type *PhiTy = PhiVar->getType();
-    if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
-        !PhiTy->isPointerTy())
-      return nullptr;
-    const SCEVAddRecExpr *AddRec =
-        dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
-    if (!AddRec || !AddRec->isAffine())
-      continue;
-    const SCEV *Step = AddRec->getStepRecurrence(*SE);
-    if (!isa<SCEVConstant>(Step))
-      continue;
-    // Found the induction variable.
-    // FIXME: Handle loops with more than one induction variable. Note that,
-    // currently, legality makes sure we have only one induction variable.
-    return PhiVar;
-  }
-  return nullptr;
-}
-
 namespace {
 
 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -1227,7 +1200,7 @@ bool LoopInterchangeTransform::transform() {
   if (InnerLoop->getSubLoops().empty()) {
     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
     LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
-    PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
+    PHINode *InductionPHI = InnerLoop->getInductionVariable(*SE);
     if (!InductionPHI) {
       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
       return false;
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 483532a187527..005e1dc405b75 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -7,6 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/SourceMgr.h"
@@ -26,6 +30,26 @@ runWithLoopInfo(Module &M, StringRef FuncName,
   Test(*F, LI);
 }
 
+/// Build the loop info and scalar evolution for the function and run the Test.
+static void runWithLoopInfoPlus(
+    Module &M, StringRef FuncName,
+    function_ref<void(Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                      PostDominatorTree &PDT)>
+        Test) {
+  auto *F = M.getFunction(FuncName);
+  ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
+
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  AssumptionCache AC(*F);
+  DominatorTree DT(*F);
+  LoopInfo LI(DT);
+  ScalarEvolution SE(*F, TLI, AC, DT, LI);
+
+  PostDominatorTree PDT(*F);
+  Test(*F, LI, SE, PDT);
+}
+
 static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
                                               const char *ModuleStr) {
   SMDiagnostic Err;
@@ -210,3 +234,879 @@ TEST(LoopInfoTest, PreorderTraversals) {
   EXPECT_EQ(&L_0_1, ReverseSiblingPreorder[6]);
   EXPECT_EQ(&L_0_0, ReverseSiblingPreorder[7]);
 }
+
+TEST(LoopInfoTest, CanonicalLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseGuardSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sge i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.end, label %for.preheader\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithSwappedGuardCmp) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sgt i32 %ub, 0\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseLatchSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithLatchCmpNE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithGuardCmpSLE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %ubPlusOne = add i32 %ub, 1\n"
+      "  %guardcmp = icmp sle i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ubPlusOne\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ubPlusOne");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNonConstantStep) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(), Loop::LoopBounds::Direction::Unknown);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopUnsignedBounds) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp ult i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add i32 %i, 1\n"
+      "  %cmp = icmp ult i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_ULT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, DecreasingLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ %ub, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = sub nsw i32 %i, 1\n"
+      "  %cmp = icmp sgt i32 %inc, 0\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        EXPECT_EQ(Bounds->getInitialIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_EQ(StepValue, nullptr);
+        ConstantInt *FinalIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getFinalIVValue());
+        EXPECT_TRUE(FinalIVValue && FinalIVValue->isZero());
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SGT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Decreasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, CannotFindDirection) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(*M, "foo",
+                      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                          PostDominatorTree &PDT) {
+                        Function::iterator FI = F.begin();
+                        // First two basic block are entry and for.preheader
+                        // - skip them.
+                        ++FI;
+                        BasicBlock *Header = &*(++FI);
+                        assert(Header->getName() == "for.body");
+                        Loop *L = LI.getLoopFor(Header);
+                        EXPECT_NE(L, nullptr);
+
+                        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+                        EXPECT_NE(Bounds, None);
+                        ConstantInt *InitialIVValue =
+                            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+                        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+                        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+                        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+                        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+                        EXPECT_EQ(Bounds->getCanonicalPredicate(),
+                                  ICmpInst::BAD_ICMP_PREDICATE);
+                        EXPECT_EQ(Bounds->getDirection(),
+                                  Loop::LoopBounds::Direction::Unknown);
+                        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+                      });
+}
+
+TEST(LoopInfoTest, ZextIndVar) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %for.body ]\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %wide.trip.count = zext i32 %ub to i64\n"
+      "  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count\n"
+      "  br i1 %exitcond, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "indvars.iv.next");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "wide.trip.count");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_NE);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "indvars.iv");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First basic block is entry - skip it.
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoopWithControlFlow) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i1 %cond) {\n"
+      "entry:\n"
+      "  br i1 %cond, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNest) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.outer.preheader, label %for.end\n"
+      "for.outer.preheader:\n"
+      "  br label %for.outer\n"
+      "for.outer:\n"
+      "  %j = phi i32 [ 0, %for.outer.preheader ], [ %inc.outer, %for.outer.latch ]\n"
+      "  br i1 %guardcmp, label %for.inner.preheader, label %for.outer.latch\n"
+      "for.inner.preheader:\n"
+      "  br label %for.inner\n"
+      "for.inner:\n"
+      "  %i = phi i32 [ 0, %for.inner.preheader ], [ %inc, %for.inner ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.inner, label %for.inner.exit\n"
+      "for.inner.exit:\n"
+      "  br label %for.outer.latch\n"
+      "for.outer.latch:\n"
+      "  %inc.outer = add nsw i32 %j, 1\n"
+      "  %cmp.outer = icmp slt i32 %inc.outer, %ub\n"
+      "  br i1 %cmp.outer, label %for.outer, label %for.outer.exit\n"
+      "for.outer.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.outer.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.outer");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc.outer");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "j");
+
+        // Next two basic blocks are for.outer and for.inner.preheader - skip
+        // them.
+        ++FI;
+        Header = &*(++FI);
+        assert(Header->getName() == "for.inner");
+        L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> InnerBounds = L->getBounds(SE);
+        EXPECT_NE(InnerBounds, None);
+        InitialIVValue =
+            dyn_cast<ConstantInt>(&InnerBounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(InnerBounds->getStepInst().getName(), "inc");
+        StepValue = dyn_cast_or_null<ConstantInt>(InnerBounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(InnerBounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(InnerBounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(InnerBounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, AuxiliaryIV) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %aux = phi i32 [ 0, %for.preheader ], [ %auxinc, %for.body ]\n"
+      "  %loopvariant = phi i32 [ 0, %for.preheader ], [ %loopvariantinc, %for.body ]\n"
+      "  %usedoutside = phi i32 [ 0, %for.preheader ], [ %usedoutsideinc, %for.body ]\n"
+      "  %mulopcode = phi i32 [ 0, %for.preheader ], [ %mulopcodeinc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %mulopcodeinc = mul nsw i32 %mulopcode, 5\n"
+      "  %usedoutsideinc = add nsw i32 %usedoutside, 5\n"
+      "  %loopvariantinc = add nsw i32 %loopvariant, %i\n"
+      "  %auxinc = add nsw i32 %aux, 5\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  %lcssa = phi i32 [ %usedoutside, %for.body ]\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+        BasicBlock::iterator II = Header->begin();
+        PHINode &Instruction_i = cast<PHINode>(*(II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_i, SE));
+        PHINode &Instruction_aux = cast<PHINode>(*(++II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_aux, SE));
+        PHINode &Instruction_loopvariant = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_loopvariant, SE));
+        PHINode &Instruction_usedoutside = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_usedoutside, SE));
+        PHINode &Instruction_mulopcode = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
+      });
+}

From 9b2830b46e1c8fb1418956d8835d85225949e3c7 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Thu, 23 May 2019 17:58:33 +0000
Subject: [PATCH 0054/1176] lld-link, clang: Treat non-existent input files as
 possible spellos for option flags

OptTable treats arguments starting with / that aren't a known option
as filenames. This means lld-link's and clang-cl's typo correction for
unknown flags didn't do spell checking for misspelled options that start
with /.

I first tried changing OptTable, but that got pretty messy, see PR41787
comments 2 and 3.

Instead, let lld-link's and clang's (including clang-cl's) "file not
found" diagnostic check if a non-existent file looks like it could be a
mis-spelled option, and if so add a "did you mean" suggestion to the
"file not found" diagnostic.

While here, make formatting of a few diagnostics a bit more
self-consistent.

Fixes PR41787.

Differential Revision: https://reviews.llvm.org/D62276

llvm-svn: 361518
---
 .../clang/Basic/DiagnosticDriverKinds.td      | 12 +++---
 clang/include/clang/Driver/Driver.h           |  8 ++++
 clang/lib/Driver/Driver.cpp                   | 41 ++++++++++++++-----
 clang/test/Driver/unknown-arg.c               | 37 +++++++++--------
 clang/test/Driver/unsupported-option.c        |  2 +-
 clang/test/Frontend/unknown-arg.c             |  2 +-
 lld/COFF/Driver.cpp                           | 17 ++++++--
 lld/COFF/DriverUtils.cpp                      |  3 +-
 lld/test/COFF/color-diagnostics.test          |  4 +-
 lld/test/COFF/could-not-open.test             |  2 +-
 lld/test/COFF/driver.test                     | 12 +++++-
 lld/test/COFF/error-limit.test                | 20 ++++-----
 lld/test/COFF/nodefaultlib.test               |  4 +-
 lld/test/COFF/responsefile.test               |  6 +--
 14 files changed, 112 insertions(+), 58 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 9ce7def88e522..2ece1f0f57890 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -9,9 +9,11 @@
 let Component = "Driver" in {
 
 def err_drv_no_such_file : Error<"no such file or directory: '%0'">;
+def err_drv_no_such_file_with_suggestion : Error<
+  "no such file or directory: '%0'; did you mean '%1'?">;
 def err_drv_unsupported_opt : Error<"unsupported option '%0'">;
-def err_drv_unsupported_opt_with_suggestion
-  : Error<"unsupported option '%0', did you mean '%1'?">;
+def err_drv_unsupported_opt_with_suggestion : Error<
+  "unsupported option '%0'; did you mean '%1'?">;
 def err_drv_unsupported_opt_for_target : Error<
   "unsupported option '%0' for target '%1'">;
 def err_drv_unsupported_option_argument : Error<
@@ -166,13 +168,13 @@ def err_arch_unsupported_isa
 def err_drv_I_dash_not_supported : Error<
   "'%0' not supported, please use -iquote instead">;
 def err_drv_unknown_argument : Error<"unknown argument: '%0'">;
-def err_drv_unknown_argument_with_suggestion
-  : Error<"unknown argument '%0', did you mean '%1'?">;
+def err_drv_unknown_argument_with_suggestion : Error<
+  "unknown argument '%0'; did you mean '%1'?">;
 def warn_drv_unknown_argument_clang_cl : Warning<
   "unknown argument ignored in clang-cl: '%0'">,
   InGroup<UnknownArgument>;
 def warn_drv_unknown_argument_clang_cl_with_suggestion : Warning<
-  "unknown argument ignored in clang-cl '%0' (did you mean '%1'?)">,
+  "unknown argument ignored in clang-cl '%0'; did you mean '%1'?">,
   InGroup<UnknownArgument>;
 
 def warn_drv_ycyu_different_arg_clang_cl : Warning<
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index 03e6458a5e5d5..244a0f3276c69 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -394,6 +394,14 @@ class Driver {
   void BuildUniversalActions(Compilation &C, const ToolChain &TC,
                              const InputList &BAInputs) const;
 
+  /// Check that the file referenced by Value exists. If it doesn't,
+  /// issue a diagnostic and return false.
+  /// If TypoCorrect is true and the file does not exist, see if it looks
+  /// like a likely typo for a flag and if so print a "did you mean" blurb.
+  bool DiagnoseInputExistence(const llvm::opt::DerivedArgList &Args,
+                              StringRef Value, types::ID Ty,
+                              bool TypoCorrect) const;
+
   /// BuildJobs - Bind actions to concrete tools and translate
   /// arguments to form the list of jobs to run.
   ///
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 06c0e3bdb3777..0f7bbb6eb0fbc 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -1975,11 +1975,9 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
   }
 }
 
-/// Check that the file referenced by Value exists. If it doesn't,
-/// issue a diagnostic and return false.
-static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
-                                   StringRef Value, types::ID Ty) {
-  if (!D.getCheckInputsExist())
+bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
+                                    types::ID Ty, bool TypoCorrect) const {
+  if (!getCheckInputsExist())
     return true;
 
   // stdin always exists.
@@ -1995,10 +1993,10 @@ static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
     }
   }
 
-  if (D.getVFS().exists(Path))
+  if (getVFS().exists(Path))
     return true;
 
-  if (D.IsCLMode()) {
+  if (IsCLMode()) {
     if (!llvm::sys::path::is_absolute(Twine(Path)) &&
         llvm::sys::Process::FindInEnvPath("LIB", Value))
       return true;
@@ -2011,7 +2009,26 @@ static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
     }
   }
 
-  D.Diag(clang::diag::err_drv_no_such_file) << Path;
+  if (TypoCorrect) {
+    // Check if the filename is a typo for an option flag. OptTable thinks
+    // that all args that are not known options and that start with / are
+    // filenames, but e.g. `/diagnostic:caret` is more likely a typo for
+    // the option `/diagnostics:caret` than a reference to a file in the root
+    // directory.
+    unsigned IncludedFlagsBitmask;
+    unsigned ExcludedFlagsBitmask;
+    std::tie(IncludedFlagsBitmask, ExcludedFlagsBitmask) =
+        getIncludeExcludeOptionFlagMasks(IsCLMode());
+    std::string Nearest;
+    if (getOpts().findNearest(Value, Nearest, IncludedFlagsBitmask,
+                              ExcludedFlagsBitmask) <= 1) {
+      Diag(clang::diag::err_drv_no_such_file_with_suggestion)
+          << Path << Nearest;
+      return false;
+    }
+  }
+
+  Diag(clang::diag::err_drv_no_such_file) << Path;
   return false;
 }
 
@@ -2128,19 +2145,21 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
         }
       }
 
-      if (DiagnoseInputExistence(*this, Args, Value, Ty))
+      if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
         Inputs.push_back(std::make_pair(Ty, A));
 
     } else if (A->getOption().matches(options::OPT__SLASH_Tc)) {
       StringRef Value = A->getValue();
-      if (DiagnoseInputExistence(*this, Args, Value, types::TY_C)) {
+      if (DiagnoseInputExistence(Args, Value, types::TY_C,
+                                 /*TypoCorrect=*/false)) {
         Arg *InputArg = MakeInputArg(Args, *Opts, A->getValue());
         Inputs.push_back(std::make_pair(types::TY_C, InputArg));
       }
       A->claim();
     } else if (A->getOption().matches(options::OPT__SLASH_Tp)) {
       StringRef Value = A->getValue();
-      if (DiagnoseInputExistence(*this, Args, Value, types::TY_CXX)) {
+      if (DiagnoseInputExistence(Args, Value, types::TY_CXX,
+                                 /*TypoCorrect=*/false)) {
         Arg *InputArg = MakeInputArg(Args, *Opts, A->getValue());
         Inputs.push_back(std::make_pair(types::TY_CXX, InputArg));
       }
diff --git a/clang/test/Driver/unknown-arg.c b/clang/test/Driver/unknown-arg.c
index 4ea43278b7ea8..074a57bb6f8ef 100644
--- a/clang/test/Driver/unknown-arg.c
+++ b/clang/test/Driver/unknown-arg.c
@@ -1,23 +1,25 @@
 // RUN: not %clang %s -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -ifoo -imultilib dir -### 2>&1 | \
-// RUN: FileCheck %s
+// RUN:     FileCheck %s
 // RUN: %clang %s -imultilib dir -### 2>&1 | \
-// RUN: FileCheck %s --check-prefix=MULTILIB
+// RUN:     FileCheck %s --check-prefix=MULTILIB
 // RUN: not %clang %s -stdlibs=foo -hell -version -### 2>&1 | \
-// RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
+// RUN:     FileCheck %s --check-prefix=DID-YOU-MEAN
 // RUN: %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -### -c -- %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CL
+// RUN:     FileCheck %s --check-prefix=CL
 // RUN: %clang_cl -Brepo -### -- %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CL-DID-YOU-MEAN
+// RUN:     FileCheck %s --check-prefix=CL-DID-YOU-MEAN
+// RUN: %clang_cl /Brepo -### -- %s 2>&1 | \
+// RUN:     FileCheck %s --check-prefix=CL-DID-YOU-MEAN-SLASH
 // RUN: not %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -c -Werror=unknown-argument -### -- %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CL-ERROR
+// RUN:     FileCheck %s --check-prefix=CL-ERROR
 // RUN: not %clang_cl -helo -Werror=unknown-argument -### -- %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CL-ERROR-DID-YOU-MEAN
+// RUN:     FileCheck %s --check-prefix=CL-ERROR-DID-YOU-MEAN
 // RUN: %clang_cl -cake-is-lie -%0 -%d -HHHH -munknown-to-clang-option -print-stats -funknown-to-clang-option -c -Wno-unknown-argument -### -- %s 2>&1 | \
-// RUN: FileCheck %s --check-prefix=SILENT
+// RUN:     FileCheck %s --check-prefix=SILENT
 // RUN: not %clang -cc1as -hell --version 2>&1 | \
-// RUN: FileCheck %s --check-prefix=CC1AS-DID-YOU-MEAN
+// RUN:     FileCheck %s --check-prefix=CC1AS-DID-YOU-MEAN
 // RUN: not %clang -cc1asphalt -help 2>&1 | \
-// RUN: FileCheck %s --check-prefix=UNKNOWN-INTEGRATED
+// RUN:     FileCheck %s --check-prefix=UNKNOWN-INTEGRATED
 
 // CHECK: error: unknown argument: '-cake-is-lie'
 // CHECK: error: unknown argument: '-%0'
@@ -28,9 +30,9 @@
 // CHECK: error: unknown argument: '-funknown-to-clang-option'
 // CHECK: error: unknown argument: '-ifoo'
 // MULTILIB: warning: argument unused during compilation: '-imultilib dir'
-// DID-YOU-MEAN: error: unknown argument '-stdlibs=foo', did you mean '-stdlib=foo'?
-// DID-YOU-MEAN: error: unknown argument '-hell', did you mean '-help'?
-// DID-YOU-MEAN: error: unknown argument '-version', did you mean '--version'?
+// DID-YOU-MEAN: error: unknown argument '-stdlibs=foo'; did you mean '-stdlib=foo'?
+// DID-YOU-MEAN: error: unknown argument '-hell'; did you mean '-help'?
+// DID-YOU-MEAN: error: unknown argument '-version'; did you mean '--version'?
 // CL: warning: unknown argument ignored in clang-cl: '-cake-is-lie'
 // CL: warning: unknown argument ignored in clang-cl: '-%0'
 // CL: warning: unknown argument ignored in clang-cl: '-%d'
@@ -38,7 +40,8 @@
 // CL: warning: unknown argument ignored in clang-cl: '-munknown-to-clang-option'
 // CL: warning: unknown argument ignored in clang-cl: '-print-stats'
 // CL: warning: unknown argument ignored in clang-cl: '-funknown-to-clang-option'
-// CL-DID-YOU-MEAN: warning: unknown argument ignored in clang-cl '-Brepo' (did you mean '-Brepro'?)
+// CL-DID-YOU-MEAN: warning: unknown argument ignored in clang-cl '-Brepo'; did you mean '-Brepro'?
+// CL-DID-YOU-MEAN-SLASH: error: no such file or directory: '/Brepo'; did you mean '/Brepro'?
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-cake-is-lie'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-%0'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-%d'
@@ -46,11 +49,11 @@
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-munknown-to-clang-option'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-print-stats'
 // CL-ERROR: error: unknown argument ignored in clang-cl: '-funknown-to-clang-option'
-// CL-ERROR-DID-YOU-MEAN: error: unknown argument ignored in clang-cl '-helo' (did you mean '-help'?)
+// CL-ERROR-DID-YOU-MEAN: error: unknown argument ignored in clang-cl '-helo'; did you mean '-help'?
 // SILENT-NOT: error:
 // SILENT-NOT: warning:
-// CC1AS-DID-YOU-MEAN: error: unknown argument '-hell', did you mean '-help'?
-// CC1AS-DID-YOU-MEAN: error: unknown argument '--version', did you mean '-version'?
+// CC1AS-DID-YOU-MEAN: error: unknown argument '-hell'; did you mean '-help'?
+// CC1AS-DID-YOU-MEAN: error: unknown argument '--version'; did you mean '-version'?
 // UNKNOWN-INTEGRATED: error: unknown integrated tool 'asphalt'. Valid tools include '-cc1' and '-cc1as'.
 
 // RUN: %clang -S %s -o %t.s  -Wunknown-to-clang-option 2>&1 | FileCheck --check-prefix=IGNORED %s
diff --git a/clang/test/Driver/unsupported-option.c b/clang/test/Driver/unsupported-option.c
index 39f135e683485..d0611977a99e1 100644
--- a/clang/test/Driver/unsupported-option.c
+++ b/clang/test/Driver/unsupported-option.c
@@ -4,4 +4,4 @@
 // RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
 
 // CHECK: error: unsupported option '--hedonism'
-// DID-YOU-MEAN: error: unsupported option '--hell', did you mean '--help'?
+// DID-YOU-MEAN: error: unsupported option '--hell'; did you mean '--help'?
diff --git a/clang/test/Frontend/unknown-arg.c b/clang/test/Frontend/unknown-arg.c
index 00f2da6242c7c..eb2fb1aee2dd5 100644
--- a/clang/test/Frontend/unknown-arg.c
+++ b/clang/test/Frontend/unknown-arg.c
@@ -6,4 +6,4 @@
 // RUN: FileCheck %s --check-prefix=DID-YOU-MEAN
 
 // CHECK: error: unknown argument: '--helium'
-// DID-YOU-MEAN: error: unknown argument '--hel[', did you mean '--help'?
+// DID-YOU-MEAN: error: unknown argument '--hel['; did you mean '--help'?
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 605f52e11a293..39d476c5079b6 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -203,9 +203,20 @@ void LinkerDriver::enqueuePath(StringRef Path, bool WholeArchive) {
   std::string PathStr = Path;
   enqueueTask([=]() {
     auto MBOrErr = Future->get();
-    if (MBOrErr.second)
-      error("could not open " + PathStr + ": " + MBOrErr.second.message());
-    else
+    if (MBOrErr.second) {
+      std::string Error =
+          "could not open '" + PathStr + "': " + MBOrErr.second.message();
+      // Check if the filename is a typo for an option flag. OptTable thinks
+      // that all args that are not known options and that start with / are
+      // filenames, but e.g. `/nodefaultlibs` is more likely a typo for
+      // the option `/nodefaultlib` than a reference to a file in the root
+      // directory.
+      std::string Nearest;
+      if (COFFOptTable().findNearest(PathStr, Nearest) > 1)
+        error(Error);
+      else
+        error(Error + "; did you mean '" + Nearest + "'");
+    } else
       Driver->addBuffer(std::move(MBOrErr.first), WholeArchive);
   });
 }
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 4a99cd6f2e836..f689cd1d4193f 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -833,7 +833,8 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> Argv) {
 
   // Expand response files (arguments in the form of @<filename>)
   // and then parse the argument again.
-  SmallVector<const char *, 256> ExpandedArgv(Argv.data(), Argv.data() + Argv.size());
+  SmallVector<const char *, 256> ExpandedArgv(Argv.data(),
+                                              Argv.data() + Argv.size());
   cl::ExpandResponseFiles(Saver, getQuotingStyle(Args), ExpandedArgv);
   Args = Table.ParseArgs(makeArrayRef(ExpandedArgv).drop_front(), MissingIndex,
                          MissingCount);
diff --git a/lld/test/COFF/color-diagnostics.test b/lld/test/COFF/color-diagnostics.test
index 9cb145a335ef2..11cf8aff7a0ae 100644
--- a/lld/test/COFF/color-diagnostics.test
+++ b/lld/test/COFF/color-diagnostics.test
@@ -7,7 +7,7 @@
 # RUN:   | FileCheck -check-prefix=COLOR %s
 
 # COLOR: {{lld-link: .\[0;1;35mwarning: .\[0mignoring unknown argument '-xyz'}}
-# COLOR: {{lld-link: .\[0;1;31merror: .\[0mcould not open /nosuchfile}}
+# COLOR: {{lld-link: .\[0;1;31merror: .\[0mcould not open '/nosuchfile'}}
 
 # RUN: not lld-link /nosuchfile 2>&1 | FileCheck -check-prefix=NOCOLOR %s
 # RUN: not lld-link -color-diagnostics=never /nosuchfile 2>&1 \
@@ -15,4 +15,4 @@
 # RUN: not lld-link -color-diagnostics=always -no-color-diagnostics \
 # RUN:   /nosuchfile 2>&1 | FileCheck -check-prefix=NOCOLOR %s
 
-# NOCOLOR: lld-link: error: could not open /nosuchfile
+# NOCOLOR: lld-link: error: could not open '/nosuchfile'
diff --git a/lld/test/COFF/could-not-open.test b/lld/test/COFF/could-not-open.test
index 87f11c34e7d45..91fe9ea617978 100644
--- a/lld/test/COFF/could-not-open.test
+++ b/lld/test/COFF/could-not-open.test
@@ -1,5 +1,5 @@
 RUN: not lld-link 01 2>&1 | FileCheck %s
 
-CHECK:     could not open 01
+CHECK:     could not open '01'
 CHECK-NOT: /machine is not specified
 CHECK-NOT: subsystem must be defined
diff --git a/lld/test/COFF/driver.test b/lld/test/COFF/driver.test
index f4d0e6ae1c900..44abe4d63ee1d 100644
--- a/lld/test/COFF/driver.test
+++ b/lld/test/COFF/driver.test
@@ -1,6 +1,6 @@
 # RUN: not lld-link nosuchfile.obj >& %t.log
 # RUN: FileCheck -check-prefix=MISSING %s < %t.log
-MISSING: nosuchfile.obj: {{[Nn]}}o such file or directory
+MISSING: 'nosuchfile.obj': {{[Nn]}}o such file or directory
 
 # RUN: lld-link --version | FileCheck -check-prefix=VERSION %s
 VERSION: {{LLD [0-9]+\.[0-9]+}}
@@ -27,3 +27,13 @@ SPELLVERSION: no input files
 # RUN: not lld-link -nodefaultlibs 2>&1 | FileCheck -check-prefix=SPELLNODEFAULTLIB %s
 SPELLNODEFAULTLIB: ignoring unknown argument '-nodefaultlibs', did you mean '-nodefaultlib'
 SPELLNODEFAULTLIB: no input files
+
+# RUN: not lld-link /nodefaultlibs 2>&1 | FileCheck -check-prefix=SPELLNODEFAULTLIB_SLASH %s
+SPELLNODEFAULTLIB_SLASH: could not open '/nodefaultlibs': {{.*}}; did you mean '/nodefaultlib'
+SPELLNODEFAULTLIB_SLASH-NOT: no input files
+
+# Getting flags as typo corrections for normal input files is a side effect
+# of how spell checking for /-style flags is implemented.
+# RUN: not lld-link force 2>&1 | FileCheck -check-prefix=SPELLFORCE %s
+SPELLFORCE: could not open 'force': {{.*}}; did you mean '/force'
+SPELLFORCE-NOT: no input files
diff --git a/lld/test/COFF/error-limit.test b/lld/test/COFF/error-limit.test
index 09c3b9d07f30e..eddd2ab9a64dc 100644
--- a/lld/test/COFF/error-limit.test
+++ b/lld/test/COFF/error-limit.test
@@ -1,26 +1,26 @@
 RUN: not lld-link 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 \
 RUN:   21 22 2>&1 | FileCheck -check-prefix=DEFAULT %s
 
-DEFAULT:      could not open 01
-DEFAULT:      could not open 20
+DEFAULT:      could not open '01'
+DEFAULT:      could not open '20'
 DEFAULT-NEXT: too many errors emitted, stopping now (use /errorlimit:0 to see all errors)
-DEFAULT-NOT:  could not open 21
+DEFAULT-NOT:  could not open '21'
 
 RUN: not lld-link /errorlimit:5 01 02 03 04 05 06 07 08 09 10 2>&1 \
 RUN:   | FileCheck -check-prefix=LIMIT5 %s
 
-LIMIT5:      could not open 01
-LIMIT5:      could not open 05
+LIMIT5:      could not open '01'
+LIMIT5:      could not open '05'
 LIMIT5-NEXT: too many errors emitted, stopping now (use /errorlimit:0 to see all errors)
-LIMIT5-NOT:  could not open 06
+LIMIT5-NOT:  could not open '06'
 
 RUN: not lld-link /errorlimit:0 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 \
 RUN:   16 17 18 19 20 21 22 2>&1 | FileCheck -check-prefix=UNLIMITED %s
 
-UNLIMITED:     could not open 01
-UNLIMITED:     could not open 20
-UNLIMITED:     could not open 21
-UNLIMITED:     could not open 22
+UNLIMITED:     could not open '01'
+UNLIMITED:     could not open '20'
+UNLIMITED:     could not open '21'
+UNLIMITED:     could not open '22'
 UNLIMITED-NOT: too many errors emitted, stopping now (use /errorlimit:0 to see all errors)
 
 RUN: not lld-link /errorlimit:XYZ 01 02 03 04 05 06 07 08 09 10 11 12 13 14 \
diff --git a/lld/test/COFF/nodefaultlib.test b/lld/test/COFF/nodefaultlib.test
index 8f4da3a21efee..2a6bec933f438 100644
--- a/lld/test/COFF/nodefaultlib.test
+++ b/lld/test/COFF/nodefaultlib.test
@@ -19,8 +19,8 @@
 # RUN:   /nodefaultlib:std64.lib >& %t.log || true
 # RUN: FileCheck -check-prefix=CHECK3 %s < %t.log
 
-CHECK1: error: could not open hello64.obj: {{[Nn]}}o such file or directory
-CHECK2: error: could not open hello64: {{[Nn]}}o such file or directory
+CHECK1: error: could not open 'hello64.obj': {{[Nn]}}o such file or directory
+CHECK2: error: could not open 'hello64': {{[Nn]}}o such file or directory
 CHECK3: error: undefined symbol: MessageBoxA
 CHECK3-NEXT: >>> referenced by {{.*}}hello64.obj:(main)
 
diff --git a/lld/test/COFF/responsefile.test b/lld/test/COFF/responsefile.test
index 3ad15bb418151..1480406d3486d 100644
--- a/lld/test/COFF/responsefile.test
+++ b/lld/test/COFF/responsefile.test
@@ -12,14 +12,14 @@ INVRSP: invalid response file quoting: foobar
 # RUN: echo "blah\foo" > %t.rsp
 # RUN: not lld-link @%t.rsp 2>&1 | \
 # RUN:     FileCheck --check-prefix=DEFRSP %s
-DEFRSP: error: could not open blah\foo
+DEFRSP: error: could not open 'blah\foo'
 
 # RUN: echo "blah\foo" > %t.rsp
 # RUN: not lld-link --rsp-quoting=windows @%t.rsp 2>&1 | \
 # RUN:     FileCheck --check-prefix=WINRSP %s
-WINRSP: error: could not open blah\foo
+WINRSP: error: could not open 'blah\foo'
 
 # RUN: echo "blah\foo" > %t.rsp
 # RUN: not lld-link --rsp-quoting=posix @%t.rsp 2>&1 | \
 # RUN:     FileCheck --check-prefix=POSRSP %s
-POSRSP: error: could not open blahfoo
+POSRSP: error: could not open 'blahfoo'

From 0f3ba44b57b0033a92a04530bcecd1b965adcd4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 23 May 2019 17:58:48 +0000
Subject: [PATCH 0055/1176] AMDGPU/GlobalISel: Legality for integer min/max

llvm-svn: 361519
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  14 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  23 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   7 +
 .../AMDGPU/GlobalISel/legalize-smax.mir       | 423 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-smin.mir       | 423 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-umax.mir       | 413 +++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-umin.mir       | 413 +++++++++++++++++
 .../AMDGPU/GlobalISel/regbankselect-smax.mir  |  73 +++
 .../AMDGPU/GlobalISel/regbankselect-smin.mir  |  73 +++
 .../AMDGPU/GlobalISel/regbankselect-umax.mir  |  73 +++
 .../AMDGPU/GlobalISel/regbankselect-umin.mir  |  73 +++
 11 files changed, 2007 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index aed1e4c590de6..f9e8c835662dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1083,6 +1083,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
 
   case TargetOpcode::G_SDIV:
   case TargetOpcode::G_SREM:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
@@ -1111,6 +1113,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     return Legalized;
   case TargetOpcode::G_UDIV:
   case TargetOpcode::G_UREM:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX:
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
@@ -2207,6 +2211,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FSQRT:
   case G_BSWAP:
   case G_SDIV:
+  case G_SMIN:
+  case G_SMAX:
+  case G_UMIN:
+  case G_UMAX:
     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
   case G_SHL:
   case G_LSHR:
@@ -2494,7 +2502,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   }
   case TargetOpcode::G_AND:
   case TargetOpcode::G_OR:
-  case TargetOpcode::G_XOR: {
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_UMAX: {
     Observer.changingInstr(MI);
     moreElementsVectorSrc(MI, MoreTy, 1);
     moreElementsVectorSrc(MI, MoreTy, 2);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c4ca42e6af3c0..f93b61035ff6f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -353,6 +353,29 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     .clampScalar(0, S32, S32)
     .scalarize(0);
 
+  if (ST.has16BitInsts()) {
+    if (ST.hasVOP3PInsts()) {
+      getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+        .legalFor({S32, S16, V2S16})
+        .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+        .clampMaxNumElements(0, S16, 2)
+        .clampScalar(0, S16, S32)
+        .widenScalarToNextPow2(0)
+        .scalarize(0);
+    } else {
+      getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+        .legalFor({S32, S16})
+        .widenScalarToNextPow2(0)
+        .clampScalar(0, S16, S32)
+        .scalarize(0);
+    }
+  } else {
+    getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+      .legalFor({S32})
+      .clampScalar(0, S32, S32)
+      .widenScalarToNextPow2(0)
+      .scalarize(0);
+  }
 
   auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
     return [=](const LegalityQuery &Query) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index a6d68f3cd0cd5..1f2b551e1af53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1055,6 +1055,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       return getDefaultMappingSOP(MI);
     LLVM_FALLTHROUGH;
 
+  case AMDGPU::G_SMIN:
+  case AMDGPU::G_SMAX:
+  case AMDGPU::G_UMIN:
+  case AMDGPU::G_UMAX:
+    // TODO: min/max can be scalar, but requires expanding as a compare and
+    // select.
+
   case AMDGPU::G_FADD:
   case AMDGPU::G_FSUB:
   case AMDGPU::G_FPTOSI:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
new file mode 100644
index 0000000000000..678c3d1e7594c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir
@@ -0,0 +1,423 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI  %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
+
+---
+name: test_smax_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_s32
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[SMAX]](s32)
+    ; VI-LABEL: name: test_smax_s32
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[SMAX]](s32)
+    ; GFX9-LABEL: name: test_smax_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SMAX]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_smax_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_s16
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smax_s16
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_smax_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_SMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smax_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_s8
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smax_s8
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC]](s16)
+    ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC]](s16)
+    ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_smax_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC]](s16)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC]](s16)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_SMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smax_s17
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_s17
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smax_s17
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; VI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; VI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32)
+    ; VI: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_smax_s17
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[ASHR]], [[ASHR1]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMAX]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s17) = G_TRUNC %0
+    %3:_(s17) = G_TRUNC %1
+    %4:_(s17) = G_SMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smax_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_smax_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]]
+    ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-LABEL: name: test_smax_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]]
+    ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-LABEL: name: test_smax_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]]
+    ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_SMAX %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_smax_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; SI-LABEL: name: test_smax_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]]
+    ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]]
+    ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-LABEL: name: test_smax_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]]
+    ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]]
+    ; VI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-LABEL: name: test_smax_v3s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]]
+    ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]]
+    ; GFX9: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<3 x s32>) = G_SMAX %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_smax_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; VI-LABEL: name: test_smax_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[UV]], [[UV2]]
+    ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16)
+    ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9-LABEL: name: test_smax_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_smax_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smax_v3s16
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16)
+    ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32)
+    ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16)
+    ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT4]], [[SEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; VI-LABEL: name: test_smax_v3s16
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[UV]], [[UV3]]
+    ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[UV1]], [[UV4]]
+    ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16), [[SMAX2]](s16)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; GFX9-LABEL: name: test_smax_v3s16
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]]
+    ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>)
+    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+    ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(<3 x s16>) = G_IMPLICIT_DEF
+    %2:_(<3 x s16>) = G_SMAX %0, %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_smax_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_smax_v4s16
+    ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16)
+    ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16)
+    ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX1]](s32)
+    ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV6]](s16)
+    ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT4]], [[SEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX2]](s32)
+    ; SI: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[UV7]](s16)
+    ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT6]], [[SEXT7]]
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMAX3]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; VI-LABEL: name: test_smax_v4s16
+    ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[UV]], [[UV4]]
+    ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[UV1]], [[UV5]]
+    ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[UV2]], [[UV6]]
+    ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[UV3]], [[UV7]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMAX]](s16), [[SMAX1]](s16), [[SMAX2]](s16), [[SMAX3]](s16)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; GFX9-LABEL: name: test_smax_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]]
+    ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_SMAX %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
new file mode 100644
index 0000000000000..594483448c03a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir
@@ -0,0 +1,423 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI  %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
+
+---
+name: test_smin_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_s32
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[SMIN]](s32)
+    ; VI-LABEL: name: test_smin_s32
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[SMIN]](s32)
+    ; GFX9-LABEL: name: test_smin_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SMIN]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_smin_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_s16
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smin_s16
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_smin_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_SMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smin_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_s8
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smin_s8
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC]](s16)
+    ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC]](s16)
+    ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_smin_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC]](s16)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC]](s16)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_SMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smin_s17
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_s17
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; SI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; SI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_smin_s17
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; VI: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; VI: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32)
+    ; VI: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_smin_s17
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[ASHR]], [[ASHR1]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SMIN]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s17) = G_TRUNC %0
+    %3:_(s17) = G_TRUNC %1
+    %4:_(s17) = G_SMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_smin_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_smin_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]]
+    ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-LABEL: name: test_smin_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]]
+    ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-LABEL: name: test_smin_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]]
+    ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_SMIN %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_smin_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; SI-LABEL: name: test_smin_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]]
+    ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]]
+    ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-LABEL: name: test_smin_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]]
+    ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]]
+    ; VI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-LABEL: name: test_smin_v3s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]]
+    ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]]
+    ; GFX9: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<3 x s32>) = G_SMIN %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_smin_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; VI-LABEL: name: test_smin_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[UV]], [[UV2]]
+    ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16)
+    ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9-LABEL: name: test_smin_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_smin_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_smin_v3s16
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16)
+    ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32)
+    ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16)
+    ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT4]], [[SEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; VI-LABEL: name: test_smin_v3s16
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[UV]], [[UV3]]
+    ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[UV1]], [[UV4]]
+    ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16), [[SMIN2]](s16)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; GFX9-LABEL: name: test_smin_v3s16
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]]
+    ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>)
+    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+    ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(<3 x s16>) = G_IMPLICIT_DEF
+    %2:_(<3 x s16>) = G_SMIN %0, %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_smin_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_smin_v4s16
+    ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16)
+    ; SI: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV4]](s16)
+    ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT]], [[SEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN]](s32)
+    ; SI: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16)
+    ; SI: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV5]](s16)
+    ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT2]], [[SEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN1]](s32)
+    ; SI: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16)
+    ; SI: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[UV6]](s16)
+    ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT4]], [[SEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN2]](s32)
+    ; SI: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16)
+    ; SI: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[UV7]](s16)
+    ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT6]], [[SEXT7]]
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SMIN3]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; VI-LABEL: name: test_smin_v4s16
+    ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[UV]], [[UV4]]
+    ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[UV1]], [[UV5]]
+    ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[UV2]], [[UV6]]
+    ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[UV3]], [[UV7]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SMIN]](s16), [[SMIN1]](s16), [[SMIN2]](s16), [[SMIN3]](s16)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; GFX9-LABEL: name: test_smin_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]]
+    ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_SMIN %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
new file mode 100644
index 0000000000000..1d9b6cd2f9fd2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
@@ -0,0 +1,413 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI  %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
+
+---
+name: test_umax_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_s32
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[UMAX]](s32)
+    ; VI-LABEL: name: test_umax_s32
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[UMAX]](s32)
+    ; GFX9-LABEL: name: test_umax_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UMAX]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_UMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_umax_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_s16
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umax_s16
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_umax_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_UMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umax_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_s8
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umax_s8
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_umax_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_UMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umax_s17
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_s17
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umax_s17
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32)
+    ; VI: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_umax_s17
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMAX]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s17) = G_TRUNC %0
+    %3:_(s17) = G_TRUNC %1
+    %4:_(s17) = G_UMAX %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umax_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_umax_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]]
+    ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-LABEL: name: test_umax_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]]
+    ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-LABEL: name: test_umax_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]]
+    ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_UMAX %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_umax_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; SI-LABEL: name: test_umax_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]]
+    ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]]
+    ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-LABEL: name: test_umax_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]]
+    ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]]
+    ; VI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-LABEL: name: test_umax_v3s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]]
+    ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]]
+    ; GFX9: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<3 x s32>) = G_UMAX %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_umax_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; VI-LABEL: name: test_umax_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[UV]], [[UV2]]
+    ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16)
+    ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9-LABEL: name: test_umax_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_UMAX %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_umax_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umax_v3s16
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16)
+    ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16)
+    ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT4]], [[ZEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; VI-LABEL: name: test_umax_v3s16
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[UV]], [[UV3]]
+    ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[UV1]], [[UV4]]
+    ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16), [[UMAX2]](s16)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; GFX9-LABEL: name: test_umax_v3s16
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]]
+    ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>)
+    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+    ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(<3 x s16>) = G_IMPLICIT_DEF
+    %2:_(<3 x s16>) = G_UMAX %0, %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_umax_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_umax_v4s16
+    ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16)
+    ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16)
+    ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX1]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s16)
+    ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT4]], [[ZEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX2]](s32)
+    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s16)
+    ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[ZEXT6]], [[ZEXT7]]
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMAX3]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; VI-LABEL: name: test_umax_v4s16
+    ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[UV]], [[UV4]]
+    ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[UV1]], [[UV5]]
+    ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[UV2]], [[UV6]]
+    ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[UV3]], [[UV7]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMAX]](s16), [[UMAX1]](s16), [[UMAX2]](s16), [[UMAX3]](s16)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; GFX9-LABEL: name: test_umax_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]]
+    ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_UMAX %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
new file mode 100644
index 0000000000000..d9775a54719cb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
@@ -0,0 +1,413 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI  %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
+
+---
+name: test_umin_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_s32
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[UMIN]](s32)
+    ; VI-LABEL: name: test_umin_s32
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[UMIN]](s32)
+    ; GFX9-LABEL: name: test_umin_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UMIN]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_UMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_umin_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_s16
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umin_s16
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_umin_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_UMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umin_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_s8
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umin_s8
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: test_umin_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_UMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umin_s17
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_s17
+    ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
+    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
+    ; SI: $vgpr0 = COPY [[COPY4]](s32)
+    ; VI-LABEL: name: test_umin_s17
+    ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
+    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
+    ; VI: $vgpr0 = COPY [[COPY4]](s32)
+    ; GFX9-LABEL: name: test_umin_s17
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071
+    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
+    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]]
+    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UMIN]](s32)
+    ; GFX9: $vgpr0 = COPY [[COPY4]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s17) = G_TRUNC %0
+    %3:_(s17) = G_TRUNC %1
+    %4:_(s17) = G_UMIN %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: test_umin_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_umin_v2s32
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]]
+    ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; VI-LABEL: name: test_umin_v2s32
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]]
+    ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    ; GFX9-LABEL: name: test_umin_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]]
+    ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_UMIN %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_umin_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; SI-LABEL: name: test_umin_v3s32
+    ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]]
+    ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]]
+    ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32)
+    ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; VI-LABEL: name: test_umin_v3s32
+    ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]]
+    ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]]
+    ; VI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32)
+    ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    ; GFX9-LABEL: name: test_umin_v3s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]]
+    ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]]
+    ; GFX9: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(<3 x s32>) = G_UMIN %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_umin_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_v2s16
+    ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; SI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; SI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; VI-LABEL: name: test_umin_v2s16
+    ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; VI: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[UV]], [[UV2]]
+    ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[UV1]], [[UV3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16)
+    ; VI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
+    ; GFX9-LABEL: name: test_umin_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UMIN]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_UMIN %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_umin_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; SI-LABEL: name: test_umin_v3s16
+    ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; SI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16)
+    ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16)
+    ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT4]], [[ZEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
+    ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; VI-LABEL: name: test_umin_v3s16
+    ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>)
+    ; VI: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>)
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[UV]], [[UV3]]
+    ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[UV1]], [[UV4]]
+    ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[UV2]], [[UV5]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16), [[UMIN2]](s16)
+    ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s16>)
+    ; GFX9-LABEL: name: test_umin_v3s16
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0
+    ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]]
+    ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>)
+    ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
+    ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(<3 x s16>) = G_IMPLICIT_DEF
+    %2:_(<3 x s16>) = G_UMIN %0, %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_umin_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; SI-LABEL: name: test_umin_v4s16
+    ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; SI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; SI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16)
+    ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV4]](s16)
+    ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT]], [[ZEXT1]]
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN]](s32)
+    ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16)
+    ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV5]](s16)
+    ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT2]], [[ZEXT3]]
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN1]](s32)
+    ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16)
+    ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UV6]](s16)
+    ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT4]], [[ZEXT5]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN2]](s32)
+    ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16)
+    ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s16)
+    ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[ZEXT6]], [[ZEXT7]]
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UMIN3]](s32)
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; VI-LABEL: name: test_umin_v4s16
+    ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; VI: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; VI: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[UV]], [[UV4]]
+    ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[UV1]], [[UV5]]
+    ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[UV2]], [[UV6]]
+    ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[UV3]], [[UV7]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UMIN]](s16), [[UMIN1]](s16), [[UMIN2]](s16), [[UMIN3]](s16)
+    ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
+    ; GFX9-LABEL: name: test_umin_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]]
+    ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_UMIN %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
new file mode 100644
index 0000000000000..144c82530bcee
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: smax_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: smax_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_SMAX %0, %1
+...
+
+---
+name: smax_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smax_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_SMAX %0, %1
+...
+
+---
+name: smax_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smax_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_SMAX %0, %1
+...
+
+---
+name: smax_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: smax_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[SMAX:%[0-9]+]]:vgpr(s32) = G_SMAX [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SMAX %0, %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
new file mode 100644
index 0000000000000..5fa5ef4276eba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: smin_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: smin_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_SMIN %0, %1
+...
+
+---
+name: smin_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smin_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_SMIN %0, %1
+...
+
+---
+name: smin_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: smin_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_SMIN %0, %1
+...
+
+---
+name: smin_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: smin_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[SMIN:%[0-9]+]]:vgpr(s32) = G_SMIN [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SMIN %0, %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
new file mode 100644
index 0000000000000..123c12b5ddce5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: umax_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: umax_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_UMAX %0, %1
+...
+
+---
+name: umax_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umax_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_UMAX %0, %1
+...
+
+---
+name: umax_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umax_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_UMAX %0, %1
+...
+
+---
+name: umax_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: umax_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[UMAX:%[0-9]+]]:vgpr(s32) = G_UMAX [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_UMAX %0, %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
new file mode 100644
index 0000000000000..f78cfecacabef
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: umin_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: umin_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_UMIN %0, %1
+...
+
+---
+name: umin_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umin_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_UMIN %0, %1
+...
+
+---
+name: umin_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; CHECK-LABEL: name: umin_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY2]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = G_UMIN %0, %1
+...
+
+---
+name: umin_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: umin_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[UMIN:%[0-9]+]]:vgpr(s32) = G_UMIN [[COPY]], [[COPY1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_UMIN %0, %1
+...

From 4d7054615cc3cfe3e715111edf8a54b65074fba3 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Thu, 23 May 2019 18:01:16 +0000
Subject: [PATCH 0056/1176] gn build: Merge r361418 more

llvm-svn: 361520
---
 .../gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn     | 1 +
 .../gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn | 1 +
 2 files changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn
index ea5783ff566df..2deb47501ce6c 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn
@@ -7,6 +7,7 @@ static_library("utils") {
     "//clang/lib/ASTMatchers",
     "//clang/lib/Basic",
     "//clang/lib/Lex",
+    "//clang/lib/Tooling/Refactoring",
     "//llvm/lib/Support",
   ]
   sources = [
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn
index 12ea944e3b25b..7d1f256458957 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-tidy/BUILD.gn
@@ -18,6 +18,7 @@ unittest("ClangTidyTests") {
     "//clang/lib/Serialization",
     "//clang/lib/Tooling",
     "//clang/lib/Tooling/Core",
+    "//clang/lib/Tooling/Refactoring",
     "//llvm/lib/Support",
   ]
   include_dirs = [ "//clang-tools-extra/clang-tidy" ]

From d19a36efd14f1474dd73866cd122e51343d612b3 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 18:08:00 +0000
Subject: [PATCH 0057/1176] UpdateTestChecks: -march=mips/-march=mipsel is mips
 triple.

Again, a mixture of march and triple, with majority being march:

llvm/test/CodeGen/Mips$ grep -ri triple | wc -l
818
llvm/test/CodeGen/Mips$ grep -ri march | wc -l
1457

llvm-svn: 361521
---
 llvm/utils/UpdateTestChecks/asm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index ccc68d06f5ba5..459aa42381ab7 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -219,6 +219,7 @@ def scrub_asm_wasm32(asm, args):
 def get_triple_from_march(march):
   triples = {
       'amdgcn': 'amdgcn',
+      'mips': 'mips',
       'sparc': 'sparc',
   }
   for prefix, triple in triples.items():

From 06688fe7158e730d191f21469b410c06c54bfaf2 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 18:08:13 +0000
Subject: [PATCH 0058/1176] [NFC][Mips] Autogenerate msa/arithmetic.ll test

Being affected by (sub %x, C) -> add %X, (sub 0, C) 'for vectors' patch.

llvm-svn: 361522
---
 llvm/test/CodeGen/Mips/msa/arithmetic.ll | 689 +++++++++++------------
 1 file changed, 323 insertions(+), 366 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/msa/arithmetic.ll b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
index 3ecd0e43589f8..86597d52fda95 100644
--- a/llvm/test/CodeGen/Mips/msa/arithmetic.ll
+++ b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
@@ -1,726 +1,683 @@
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=ALL,MIPS
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=ALL,MIPSEL
 
 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: add_v16i8:
-
+; ALL-LABEL: add_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    addv.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = add <16 x i8> %1, %2
-  ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v16i8
 }
 
 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: add_v8i16:
-
+; ALL-LABEL: add_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    addv.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = add <8 x i16> %1, %2
-  ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v8i16
 }
 
 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: add_v4i32:
-
+; ALL-LABEL: add_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    addv.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = add <4 x i32> %1, %2
-  ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v4i32
 }
 
 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: add_v2i64:
-
+; ALL-LABEL: add_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    addv.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = add <2 x i64> %1, %2
-  ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v2i64
 }
 
 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
-  ; CHECK: add_v16i8_i:
-
+; ALL-LABEL: add_v16i8_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($5)
+; ALL-NEXT:    addvi.b $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
-                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+              i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   store <16 x i8> %2, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v16i8_i
 }
 
 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
-  ; CHECK: add_v8i16_i:
-
+; ALL-LABEL: add_v8i16_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($5)
+; ALL-NEXT:    addvi.h $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
-                          i16 1, i16 1, i16 1, i16 1>
-  ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+              i16 1, i16 1, i16 1, i16 1>
   store <8 x i16> %2, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v8i16_i
 }
 
 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
-  ; CHECK: add_v4i32_i:
-
+; ALL-LABEL: add_v4i32_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($5)
+; ALL-NEXT:    addvi.w $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
   store <4 x i32> %2, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v4i32_i
 }
 
 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
-  ; CHECK: add_v2i64_i:
-
+; ALL-LABEL: add_v2i64_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($5)
+; ALL-NEXT:    addvi.d $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = add <2 x i64> %1, <i64 1, i64 1>
-  ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
   store <2 x i64> %2, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size add_v2i64_i
 }
 
 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: sub_v16i8:
-
+; ALL-LABEL: sub_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    subv.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <16 x i8> %1, %2
-  ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v16i8
 }
 
 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: sub_v8i16:
-
+; ALL-LABEL: sub_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    subv.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <8 x i16> %1, %2
-  ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v8i16
 }
 
 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: sub_v4i32:
-
+; ALL-LABEL: sub_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    subv.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <4 x i32> %1, %2
-  ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v4i32
 }
 
 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: sub_v2i64:
-
+; ALL-LABEL: sub_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    subv.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <2 x i64> %1, %2
-  ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v2i64
 }
 
 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
-  ; CHECK: sub_v16i8_i:
-
+; ALL-LABEL: sub_v16i8_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($5)
+; ALL-NEXT:    subvi.b $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
-                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
+              i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   store <16 x i8> %2, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v16i8_i
 }
 
 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
-  ; CHECK: sub_v8i16_i:
-
+; ALL-LABEL: sub_v8i16_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($5)
+; ALL-NEXT:    subvi.h $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
-                          i16 1, i16 1, i16 1, i16 1>
-  ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
+              i16 1, i16 1, i16 1, i16 1>
   store <8 x i16> %2, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v8i16_i
 }
 
 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
-  ; CHECK: sub_v4i32_i:
-
+; ALL-LABEL: sub_v4i32_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($5)
+; ALL-NEXT:    subvi.w $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
-  ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
   store <4 x i32> %2, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v4i32_i
 }
 
 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
-  ; CHECK: sub_v2i64_i:
-
+; ALL-LABEL: sub_v2i64_i:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($5)
+; ALL-NEXT:    subvi.d $w0, $w0, 1
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <2 x i64> %1, <i64 1, i64 1>
-  ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
   store <2 x i64> %2, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size sub_v2i64_i
 }
 
 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: mul_v16i8:
-
+; ALL-LABEL: mul_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    mulv.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <16 x i8> %1, %2
-  ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mul_v16i8
 }
 
 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: mul_v8i16:
-
+; ALL-LABEL: mul_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    mulv.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <8 x i16> %1, %2
-  ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mul_v8i16
 }
 
 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: mul_v4i32:
-
+; ALL-LABEL: mul_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    mulv.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <4 x i32> %1, %2
-  ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mul_v4i32
 }
 
 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: mul_v2i64:
-
+; ALL-LABEL: mul_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    mulv.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <2 x i64> %1, %2
-  ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mul_v2i64
 }
 
 define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
-                         <16 x i8>* %c) nounwind {
-  ; CHECK: maddv_v16i8:
-
+; ALL-LABEL: maddv_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($7)
+; ALL-NEXT:    ld.b $w1, 0($6)
+; ALL-NEXT:    ld.b $w2, 0($5)
+; ALL-NEXT:    maddv.b $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w2, 0($4)
+             <16 x i8>* %c) nounwind {
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = load <16 x i8>, <16 x i8>* %c
-  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <16 x i8> %2, %3
   %5 = add <16 x i8> %4, %1
-  ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
   store <16 x i8> %5, <16 x i8>* %d
-  ; CHECK-DAG: st.b [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size maddv_v16i8
 }
 
 define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
-                         <8 x i16>* %c) nounwind {
-  ; CHECK: maddv_v8i16:
-
+; ALL-LABEL: maddv_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($7)
+; ALL-NEXT:    ld.h $w1, 0($6)
+; ALL-NEXT:    ld.h $w2, 0($5)
+; ALL-NEXT:    maddv.h $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w2, 0($4)
+             <8 x i16>* %c) nounwind {
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = load <8 x i16>, <8 x i16>* %c
-  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <8 x i16> %2, %3
   %5 = add <8 x i16> %4, %1
-  ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
   store <8 x i16> %5, <8 x i16>* %d
-  ; CHECK-DAG: st.h [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size maddv_v8i16
 }
 
 define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
-                         <4 x i32>* %c) nounwind {
-  ; CHECK: maddv_v4i32:
-
+; ALL-LABEL: maddv_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($7)
+; ALL-NEXT:    ld.w $w1, 0($6)
+; ALL-NEXT:    ld.w $w2, 0($5)
+; ALL-NEXT:    maddv.w $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w2, 0($4)
+             <4 x i32>* %c) nounwind {
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = load <4 x i32>, <4 x i32>* %c
-  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <4 x i32> %2, %3
   %5 = add <4 x i32> %4, %1
-  ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
   store <4 x i32> %5, <4 x i32>* %d
-  ; CHECK-DAG: st.w [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size maddv_v4i32
 }
 
 define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
-                         <2 x i64>* %c) nounwind {
-  ; CHECK: maddv_v2i64:
-
+; ALL-LABEL: maddv_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($7)
+; ALL-NEXT:    ld.d $w1, 0($6)
+; ALL-NEXT:    ld.d $w2, 0($5)
+; ALL-NEXT:    maddv.d $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w2, 0($4)
+             <2 x i64>* %c) nounwind {
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = load <2 x i64>, <2 x i64>* %c
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <2 x i64> %2, %3
   %5 = add <2 x i64> %4, %1
-  ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
   store <2 x i64> %5, <2 x i64>* %d
-  ; CHECK-DAG: st.d [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size maddv_v2i64
 }
 
 define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
-                         <16 x i8>* %c) nounwind {
-  ; CHECK: msubv_v16i8:
-
+; ALL-LABEL: msubv_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($7)
+; ALL-NEXT:    ld.b $w1, 0($6)
+; ALL-NEXT:    ld.b $w2, 0($5)
+; ALL-NEXT:    msubv.b $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w2, 0($4)
+             <16 x i8>* %c) nounwind {
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = load <16 x i8>, <16 x i8>* %c
-  ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <16 x i8> %2, %3
   %5 = sub <16 x i8> %1, %4
-  ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
   store <16 x i8> %5, <16 x i8>* %d
-  ; CHECK-DAG: st.b [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size msubv_v16i8
 }
 
 define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
-                         <8 x i16>* %c) nounwind {
-  ; CHECK: msubv_v8i16:
-
+; ALL-LABEL: msubv_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($7)
+; ALL-NEXT:    ld.h $w1, 0($6)
+; ALL-NEXT:    ld.h $w2, 0($5)
+; ALL-NEXT:    msubv.h $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w2, 0($4)
+             <8 x i16>* %c) nounwind {
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = load <8 x i16>, <8 x i16>* %c
-  ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <8 x i16> %2, %3
   %5 = sub <8 x i16> %1, %4
-  ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
   store <8 x i16> %5, <8 x i16>* %d
-  ; CHECK-DAG: st.h [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size msubv_v8i16
 }
 
 define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
-                         <4 x i32>* %c) nounwind {
-  ; CHECK: msubv_v4i32:
-
+; ALL-LABEL: msubv_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($7)
+; ALL-NEXT:    ld.w $w1, 0($6)
+; ALL-NEXT:    ld.w $w2, 0($5)
+; ALL-NEXT:    msubv.w $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w2, 0($4)
+             <4 x i32>* %c) nounwind {
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = load <4 x i32>, <4 x i32>* %c
-  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <4 x i32> %2, %3
   %5 = sub <4 x i32> %1, %4
-  ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
   store <4 x i32> %5, <4 x i32>* %d
-  ; CHECK-DAG: st.w [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size msubv_v4i32
 }
 
 define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
-                         <2 x i64>* %c) nounwind {
-  ; CHECK: msubv_v2i64:
-
+; ALL-LABEL: msubv_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($7)
+; ALL-NEXT:    ld.d $w1, 0($6)
+; ALL-NEXT:    ld.d $w2, 0($5)
+; ALL-NEXT:    msubv.d $w2, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w2, 0($4)
+             <2 x i64>* %c) nounwind {
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = load <2 x i64>, <2 x i64>* %c
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <2 x i64> %2, %3
   %5 = sub <2 x i64> %1, %4
-  ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
   store <2 x i64> %5, <2 x i64>* %d
-  ; CHECK-DAG: st.d [[R1]], 0($4)
-
   ret void
-  ; CHECK: .size msubv_v2i64
 }
 
 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: div_s_v16i8:
-
+; ALL-LABEL: div_s_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    div_s.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <16 x i8> %1, %2
-  ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_s_v16i8
 }
 
 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: div_s_v8i16:
-
+; ALL-LABEL: div_s_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    div_s.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <8 x i16> %1, %2
-  ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_s_v8i16
 }
 
 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: div_s_v4i32:
-
+; ALL-LABEL: div_s_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    div_s.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <4 x i32> %1, %2
-  ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_s_v4i32
 }
 
 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: div_s_v2i64:
-
+; ALL-LABEL: div_s_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    div_s.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <2 x i64> %1, %2
-  ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_s_v2i64
 }
 
 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: div_u_v16i8:
-
+; ALL-LABEL: div_u_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    div_u.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <16 x i8> %1, %2
-  ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_u_v16i8
 }
 
 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: div_u_v8i16:
-
+; ALL-LABEL: div_u_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    div_u.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <8 x i16> %1, %2
-  ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_u_v8i16
 }
 
 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: div_u_v4i32:
-
+; ALL-LABEL: div_u_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    div_u.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <4 x i32> %1, %2
-  ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_u_v4i32
 }
 
 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: div_u_v2i64:
-
+; ALL-LABEL: div_u_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    div_u.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <2 x i64> %1, %2
-  ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size div_u_v2i64
 }
 
 define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: mod_s_v16i8:
-
+; ALL-LABEL: mod_s_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    mod_s.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <16 x i8> %1, %2
-  ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_s_v16i8
 }
 
 define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: mod_s_v8i16:
-
+; ALL-LABEL: mod_s_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    mod_s.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <8 x i16> %1, %2
-  ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_s_v8i16
 }
 
 define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: mod_s_v4i32:
-
+; ALL-LABEL: mod_s_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    mod_s.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <4 x i32> %1, %2
-  ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_s_v4i32
 }
 
 define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: mod_s_v2i64:
-
+; ALL-LABEL: mod_s_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    mod_s.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <2 x i64> %1, %2
-  ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_s_v2i64
 }
 
 define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
-  ; CHECK: mod_u_v16i8:
-
+; ALL-LABEL: mod_u_v16i8:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.b $w0, 0($6)
+; ALL-NEXT:    ld.b $w1, 0($5)
+; ALL-NEXT:    mod_u.b $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($4)
   %1 = load <16 x i8>, <16 x i8>* %a
-  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = load <16 x i8>, <16 x i8>* %b
-  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <16 x i8> %1, %2
-  ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <16 x i8> %3, <16 x i8>* %c
-  ; CHECK-DAG: st.b [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_u_v16i8
 }
 
 define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
-  ; CHECK: mod_u_v8i16:
-
+; ALL-LABEL: mod_u_v8i16:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.h $w0, 0($6)
+; ALL-NEXT:    ld.h $w1, 0($5)
+; ALL-NEXT:    mod_u.h $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($4)
   %1 = load <8 x i16>, <8 x i16>* %a
-  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = load <8 x i16>, <8 x i16>* %b
-  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <8 x i16> %1, %2
-  ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <8 x i16> %3, <8 x i16>* %c
-  ; CHECK-DAG: st.h [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_u_v8i16
 }
 
 define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
-  ; CHECK: mod_u_v4i32:
-
+; ALL-LABEL: mod_u_v4i32:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.w $w0, 0($6)
+; ALL-NEXT:    ld.w $w1, 0($5)
+; ALL-NEXT:    mod_u.w $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($4)
   %1 = load <4 x i32>, <4 x i32>* %a
-  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = load <4 x i32>, <4 x i32>* %b
-  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <4 x i32> %1, %2
-  ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <4 x i32> %3, <4 x i32>* %c
-  ; CHECK-DAG: st.w [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_u_v4i32
 }
 
 define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
-  ; CHECK: mod_u_v2i64:
-
+; ALL-LABEL: mod_u_v2i64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ld.d $w0, 0($6)
+; ALL-NEXT:    ld.d $w1, 0($5)
+; ALL-NEXT:    mod_u.d $w0, $w1, $w0
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($4)
   %1 = load <2 x i64>, <2 x i64>* %a
-  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = load <2 x i64>, <2 x i64>* %b
-  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <2 x i64> %1, %2
-  ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   store <2 x i64> %3, <2 x i64>* %c
-  ; CHECK-DAG: st.d [[R3]], 0($4)
-
   ret void
-  ; CHECK: .size mod_u_v2i64
 }

From a8a470c45b8702f8d41ba59d76600377e23bc5f2 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 18:08:17 +0000
Subject: [PATCH 0059/1176] [NFC][Mips] Autogenerate msa/i5-s.ll test

Being affected by (sub %x, C) -> add %X, (sub 0, C) 'for vectors' patch.

llvm-svn: 361523
---
 llvm/test/CodeGen/Mips/msa/i5-s.ll | 72 ++++++++++++++++++------------
 1 file changed, 44 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/msa/i5-s.ll b/llvm/test/CodeGen/Mips/msa/i5-s.ll
index db331b1476ce5..ce5e4eb091b11 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-s.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-s.ll
@@ -1,13 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=ALL,MIPS
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s --check-prefixes=ALL,MIPSEL
+
 ; Test the MSA intrinsics that are encoded with the I5 instruction format.
 ; There are lots of these so this covers those beginning with 's'
 
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
-
 @llvm_mips_subvi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_subvi_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
 
 define void @llvm_mips_subvi_b_test() nounwind {
+; ALL-LABEL: llvm_mips_subvi_b_test:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    lui $1, %hi(llvm_mips_subvi_b_RES)
+; ALL-NEXT:    addiu $1, $1, %lo(llvm_mips_subvi_b_RES)
+; ALL-NEXT:    lui $2, %hi(llvm_mips_subvi_b_ARG1)
+; ALL-NEXT:    addiu $2, $2, %lo(llvm_mips_subvi_b_ARG1)
+; ALL-NEXT:    ld.b $w0, 0($2)
+; ALL-NEXT:    subvi.b $w0, $w0, 14
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.b $w0, 0($1)
 entry:
   %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subvi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.subvi.b(<16 x i8> %0, i32 14)
@@ -17,16 +28,20 @@ entry:
 
 declare <16 x i8> @llvm.mips.subvi.b(<16 x i8>, i32) nounwind
 
-; CHECK: llvm_mips_subvi_b_test:
-; CHECK: ld.b
-; CHECK: subvi.b
-; CHECK: st.b
-; CHECK: .size llvm_mips_subvi_b_test
-;
 @llvm_mips_subvi_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
 @llvm_mips_subvi_h_RES  = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
 
 define void @llvm_mips_subvi_h_test() nounwind {
+; ALL-LABEL: llvm_mips_subvi_h_test:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    lui $1, %hi(llvm_mips_subvi_h_RES)
+; ALL-NEXT:    addiu $1, $1, %lo(llvm_mips_subvi_h_RES)
+; ALL-NEXT:    lui $2, %hi(llvm_mips_subvi_h_ARG1)
+; ALL-NEXT:    addiu $2, $2, %lo(llvm_mips_subvi_h_ARG1)
+; ALL-NEXT:    ld.h $w0, 0($2)
+; ALL-NEXT:    subvi.h $w0, $w0, 14
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.h $w0, 0($1)
 entry:
   %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subvi_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.subvi.h(<8 x i16> %0, i32 14)
@@ -36,16 +51,20 @@ entry:
 
 declare <8 x i16> @llvm.mips.subvi.h(<8 x i16>, i32) nounwind
 
-; CHECK: llvm_mips_subvi_h_test:
-; CHECK: ld.h
-; CHECK: subvi.h
-; CHECK: st.h
-; CHECK: .size llvm_mips_subvi_h_test
-;
 @llvm_mips_subvi_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
 @llvm_mips_subvi_w_RES  = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
 
 define void @llvm_mips_subvi_w_test() nounwind {
+; ALL-LABEL: llvm_mips_subvi_w_test:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    lui $1, %hi(llvm_mips_subvi_w_RES)
+; ALL-NEXT:    addiu $1, $1, %lo(llvm_mips_subvi_w_RES)
+; ALL-NEXT:    lui $2, %hi(llvm_mips_subvi_w_ARG1)
+; ALL-NEXT:    addiu $2, $2, %lo(llvm_mips_subvi_w_ARG1)
+; ALL-NEXT:    ld.w $w0, 0($2)
+; ALL-NEXT:    subvi.w $w0, $w0, 14
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.w $w0, 0($1)
 entry:
   %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subvi_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.subvi.w(<4 x i32> %0, i32 14)
@@ -55,16 +74,20 @@ entry:
 
 declare <4 x i32> @llvm.mips.subvi.w(<4 x i32>, i32) nounwind
 
-; CHECK: llvm_mips_subvi_w_test:
-; CHECK: ld.w
-; CHECK: subvi.w
-; CHECK: st.w
-; CHECK: .size llvm_mips_subvi_w_test
-;
 @llvm_mips_subvi_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
 @llvm_mips_subvi_d_RES  = global <2 x i64> <i64 0, i64 0>, align 16
 
 define void @llvm_mips_subvi_d_test() nounwind {
+; ALL-LABEL: llvm_mips_subvi_d_test:
+; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    lui $1, %hi(llvm_mips_subvi_d_RES)
+; ALL-NEXT:    addiu $1, $1, %lo(llvm_mips_subvi_d_RES)
+; ALL-NEXT:    lui $2, %hi(llvm_mips_subvi_d_ARG1)
+; ALL-NEXT:    addiu $2, $2, %lo(llvm_mips_subvi_d_ARG1)
+; ALL-NEXT:    ld.d $w0, 0($2)
+; ALL-NEXT:    subvi.d $w0, $w0, 14
+; ALL-NEXT:    jr $ra
+; ALL-NEXT:    st.d $w0, 0($1)
 entry:
   %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subvi_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.subvi.d(<2 x i64> %0, i32 14)
@@ -73,10 +96,3 @@ entry:
 }
 
 declare <2 x i64> @llvm.mips.subvi.d(<2 x i64>, i32) nounwind
-
-; CHECK: llvm_mips_subvi_d_test:
-; CHECK: ld.d
-; CHECK: subvi.d
-; CHECK: st.d
-; CHECK: .size llvm_mips_subvi_d_test
-;

From c8364ef567d3168842920b5d39c02fca1742ecff Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 18:08:21 +0000
Subject: [PATCH 0060/1176] [NFC][PPC] Autogenerate vec_add_sub_doubleword.ll
 test

Being affected by (sub %x, C) -> add %X, (sub 0, C) 'for vectors' patch.

llvm-svn: 361524
---
 .../CodeGen/PowerPC/vec_add_sub_doubleword.ll | 140 ++++++++++++------
 1 file changed, 98 insertions(+), 42 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
index 2c3bee14ccdbd..62f17acb8a749 100644
--- a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
@@ -1,62 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=ALL,VSX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefixes=ALL,NOVSX
+
 ; Check VMX 64-bit integer operations
-;
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 
 define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
-       %result = add <2 x i64> %x, %y
-       ret <2 x i64> %result
-; CHECK: vaddudm 2, 2, 3
+; ALL-LABEL: test_add:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vaddudm 2, 2, 3
+; ALL-NEXT:    blr
+  %result = add <2 x i64> %x, %y
+  ret <2 x i64> %result
 }
 
 define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
-       %result = add <2 x i64> %x, <i64 1, i64 1>
-       ret <2 x i64> %result
-; CHECK: vaddudm 2, 2, 3
+; VSX-LABEL: increment_by_one:
+; VSX:       # %bb.0:
+; VSX-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; VSX-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; VSX-NEXT:    lxvd2x 35, 0, 3
+; VSX-NEXT:    vaddudm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: increment_by_one:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; NOVSX-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vaddudm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %result = add <2 x i64> %x, <i64 1, i64 1>
+  ret <2 x i64> %result
 }
 
 define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
-       %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
-       %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
-       %result = add <2 x i64> %x, %tmpvec2
-       ret <2 x i64> %result
-; CHECK: vaddudm 2, 2, 3
-; FIXME: This is currently generating the following instruction sequence
+; VSX-LABEL: increment_by_val:
+; VSX:       # %bb.0:
+; VSX-NEXT:    mtvsrd 0, 5
+; VSX-NEXT:    xxspltd 35, 0, 0
+; VSX-NEXT:    vaddudm 2, 2, 3
+; VSX-NEXT:    blr
 ;
-;        std 5, -8(1)
-;        std 5, -16(1)
-;        addi 3, 1, -16
-;        ori 2, 2, 0
-;        lxvd2x 35, 0, 3
-;        vaddudm 2, 2, 3
-;        blr
-;        
-;        This will almost certainly cause a load-hit-store hazard.
-;        Since val is a value parameter, it should not need to be
-;        saved onto the stack at all (unless we're using this to set
-;        up the vector register). Instead, it would be better to splat
-;        the value into a vector register.
+; NOVSX-LABEL: increment_by_val:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    std 5, -8(1)
+; NOVSX-NEXT:    std 5, -16(1)
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vaddudm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+  %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+  %result = add <2 x i64> %x, %tmpvec2
+  ret <2 x i64> %result
+; FIXME: This is currently generating the following instruction sequence
+;   std 5, -8(1)
+;   std 5, -16(1)
+;   addi 3, 1, -16
+;   ori 2, 2, 0
+;   lxvd2x 35, 0, 3
+;   vaddudm 2, 2, 3
+;   blr
+;   This will almost certainly cause a load-hit-store hazard.
+;   Since val is a value parameter, it should not need to be
+;   saved onto the stack at all (unless we're using this to set
+;   up the vector register). Instead, it would be better to splat
+;   the value into a vector register.
 }
 
 define <2 x i64> @test_sub(<2 x i64> %x, <2 x i64> %y) nounwind {
-       %result = sub <2 x i64> %x, %y
-       ret <2 x i64> %result
-; CHECK: vsubudm 2, 2, 3
+; ALL-LABEL: test_sub:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vsubudm 2, 2, 3
+; ALL-NEXT:    blr
+  %result = sub <2 x i64> %x, %y
+  ret <2 x i64> %result
 }
 
 define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind {
-       %result = sub <2 x i64> %x, <i64 -1, i64 -1>
-       ret <2 x i64> %result
-; CHECK: vsubudm 2, 2, 3
+; VSX-LABEL: decrement_by_one:
+; VSX:       # %bb.0:
+; VSX-NEXT:    vspltisb 3, -1
+; VSX-NEXT:    vsubudm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: decrement_by_one:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; NOVSX-NEXT:    addi 3, 3, .LCPI4_0@toc@l
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vsubudm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %result = sub <2 x i64> %x, <i64 -1, i64 -1>
+  ret <2 x i64> %result
 }
 
 define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind {
-       %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
-       %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
-       %result = sub <2 x i64> %x, %tmpvec2
-       ret <2 x i64> %result
-; CHECK: vsubudm 2, 2, 3
+; VSX-LABEL: decrement_by_val:
+; VSX:       # %bb.0:
+; VSX-NEXT:    mtvsrd 0, 5
+; VSX-NEXT:    xxspltd 35, 0, 0
+; VSX-NEXT:    vsubudm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: decrement_by_val:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    std 5, -8(1)
+; NOVSX-NEXT:    std 5, -16(1)
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vsubudm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+  %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+  %result = sub <2 x i64> %x, %tmpvec2
+  ret <2 x i64> %result
 }
-
-
-

From 702a152e6ad0846c2168d88074c056d37e8a5122 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 18:08:26 +0000
Subject: [PATCH 0061/1176] [NFC][PPC] Autogenerate vec_add_sub_quadword.ll
 test

Being affected by (sub %x, C) -> add %X, (sub 0, C) 'for vectors' patch.

llvm-svn: 361525
---
 .../CodeGen/PowerPC/vec_add_sub_quadword.ll   | 230 +++++++++++-------
 1 file changed, 140 insertions(+), 90 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
index 8f3864ff26836..8ddf0ad9b50d9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
@@ -1,137 +1,187 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=ALL,VSX
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s --check-prefixes=ALL,NOVSX
+
 ; Check VMX 128-bit integer operations
-;
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 
 define <1 x i128> @out_of_bounds_insertelement(<1 x i128> %x, i128 %val) nounwind {
-       %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 1
-       %result = add <1 x i128> %x, %tmpvec
-       ret <1 x i128> %result
-; CHECK-LABEL: @out_of_bounds_insertelement
-; CHECK: # %bb.0:
-; CHECK-NEXT: blr
+; ALL-LABEL: out_of_bounds_insertelement:
+; ALL:       # %bb.0:
+; ALL-NEXT:    blr
+  %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 1
+  %result = add <1 x i128> %x, %tmpvec
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @test_add(<1 x i128> %x, <1 x i128> %y) nounwind {
-       %result = add <1 x i128> %x, %y
-       ret <1 x i128> %result
-; CHECK-LABEL: @test_add
-; CHECK: vadduqm 2, 2, 3
+; ALL-LABEL: test_add:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vadduqm 2, 2, 3
+; ALL-NEXT:    blr
+  %result = add <1 x i128> %x, %y
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind {
-       %result = add <1 x i128> %x, <i128 1>
-       ret <1 x i128> %result
-; CHECK-LABEL: @increment_by_one
-; CHECK: vadduqm 2, 2, 3
+; VSX-LABEL: increment_by_one:
+; VSX:       # %bb.0:
+; VSX-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; VSX-NEXT:    addi 3, 3, .LCPI2_0@toc@l
+; VSX-NEXT:    lxvd2x 35, 0, 3
+; VSX-NEXT:    vadduqm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: increment_by_one:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; NOVSX-NEXT:    addi 3, 3, .LCPI2_0@toc@l
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vadduqm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %result = add <1 x i128> %x, <i128 1>
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind {
-       %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
-       %result = add <1 x i128> %x, %tmpvec
-       ret <1 x i128> %result
-; CHECK-LABEL: @increment_by_val
-; CHECK: vadduqm 2, 2, 3
+; VSX-LABEL: increment_by_val:
+; VSX:       # %bb.0:
+; VSX-NEXT:    mtvsrd 0, 6
+; VSX-NEXT:    mtvsrd 1, 5
+; VSX-NEXT:    xxmrghd 35, 1, 0
+; VSX-NEXT:    vadduqm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: increment_by_val:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    std 6, -8(1)
+; NOVSX-NEXT:    std 5, -16(1)
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vadduqm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
+  %result = add <1 x i128> %x, %tmpvec
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @test_sub(<1 x i128> %x, <1 x i128> %y) nounwind {
-       %result = sub <1 x i128> %x, %y
-       ret <1 x i128> %result
-; CHECK-LABEL: @test_sub
-; CHECK: vsubuqm 2, 2, 3
+; ALL-LABEL: test_sub:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vsubuqm 2, 2, 3
+; ALL-NEXT:    blr
+  %result = sub <1 x i128> %x, %y
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind {
-       %result = sub <1 x i128> %x, <i128 1>
-       ret <1 x i128> %result
-; CHECK-LABEL: @decrement_by_one
-; CHECK: vsubuqm 2, 2, 3
+; VSX-LABEL: decrement_by_one:
+; VSX:       # %bb.0:
+; VSX-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; VSX-NEXT:    addi 3, 3, .LCPI5_0@toc@l
+; VSX-NEXT:    lxvd2x 35, 0, 3
+; VSX-NEXT:    vsubuqm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: decrement_by_one:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; NOVSX-NEXT:    addi 3, 3, .LCPI5_0@toc@l
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vsubuqm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %result = sub <1 x i128> %x, <i128 1>
+  ret <1 x i128> %result
 }
 
 define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
-       %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
-       %result = sub <1 x i128> %x, %tmpvec
-       ret <1 x i128> %result
-; CHECK-LABEL: @decrement_by_val
-; CHECK: vsubuqm 2, 2, 3
+; VSX-LABEL: decrement_by_val:
+; VSX:       # %bb.0:
+; VSX-NEXT:    mtvsrd 0, 6
+; VSX-NEXT:    mtvsrd 1, 5
+; VSX-NEXT:    xxmrghd 35, 1, 0
+; VSX-NEXT:    vsubuqm 2, 2, 3
+; VSX-NEXT:    blr
+;
+; NOVSX-LABEL: decrement_by_val:
+; NOVSX:       # %bb.0:
+; NOVSX-NEXT:    addi 3, 1, -16
+; NOVSX-NEXT:    std 6, -8(1)
+; NOVSX-NEXT:    std 5, -16(1)
+; NOVSX-NEXT:    lvx 3, 0, 3
+; NOVSX-NEXT:    vsubuqm 2, 2, 3
+; NOVSX-NEXT:    blr
+  %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
+  %result = sub <1 x i128> %x, %tmpvec
+  ret <1 x i128> %result
 }
 
-declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x,
-                                              <1 x i128> %y,
-                                              <1 x i128> %z) nounwind readnone
-declare <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x,
-                                             <1 x i128> %y) nounwind readnone
-declare <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x,
-                                              <1 x i128> %y,
-                                              <1 x i128> %z) nounwind readnone
-declare <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x,
-                                              <1 x i128> %y,
-                                              <1 x i128> %z) nounwind readnone
-declare <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x,
-                                             <1 x i128> %y) nounwind readnone
-declare <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x,
-                                              <1 x i128> %y,
-                                              <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind readnone
 
-define <1 x i128> @test_vaddeuqm(<1 x i128> %x,
-       	    	                 <1 x i128> %y,
-                                 <1 x i128> %z) nounwind {
+define <1 x i128> @test_vaddeuqm(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind {
+; ALL-LABEL: test_vaddeuqm:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vaddeuqm 2, 2, 3, 4
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x,
-                                                         <1 x i128> %y,
-                                                         <1 x i128> %z)
+                 <1 x i128> %y,
+                 <1 x i128> %z)
   ret <1 x i128> %tmp
-; CHECK-LABEL: @test_vaddeuqm
-; CHECK: vaddeuqm 2, 2, 3, 4
 }
 
-define <1 x i128> @test_vaddcuq(<1 x i128> %x,
-       	    	                <1 x i128> %y) nounwind {
+define <1 x i128> @test_vaddcuq(<1 x i128> %x, <1 x i128> %y) nounwind {
+; ALL-LABEL: test_vaddcuq:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vaddcuq 2, 2, 3
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x,
-                                                        <1 x i128> %y)
+                <1 x i128> %y)
   ret <1 x i128> %tmp
-; CHECK-LABEL: @test_vaddcuq
-; CHECK: vaddcuq 2, 2, 3
 }
 
-define <1 x i128> @test_vaddecuq(<1 x i128> %x,
-       	    	                 <1 x i128> %y,
-                                 <1 x i128> %z) nounwind {
+define <1 x i128> @test_vaddecuq(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind {
+; ALL-LABEL: test_vaddecuq:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vaddecuq 2, 2, 3, 4
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x,
-                                                         <1 x i128> %y,
-                                                         <1 x i128> %z)
+                 <1 x i128> %y,
+                 <1 x i128> %z)
   ret <1 x i128> %tmp
-; CHECK-LABEL: @test_vaddecuq
-; CHECK: vaddecuq 2, 2, 3, 4
 }
 
-define <1 x i128> @test_vsubeuqm(<1 x i128> %x,
-       	    	                 <1 x i128> %y,
-                                 <1 x i128> %z) nounwind {
+define <1 x i128> @test_vsubeuqm(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind {
+; ALL-LABEL: test_vsubeuqm:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vsubeuqm 2, 2, 3, 4
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x,
-                                                         <1 x i128> %y,
-                                                         <1 x i128> %z)
+                 <1 x i128> %y,
+                 <1 x i128> %z)
   ret <1 x i128> %tmp
-; CHECK-LABEL: test_vsubeuqm
-; CHECK: vsubeuqm 2, 2, 3, 4
 }
 
-define <1 x i128> @test_vsubcuq(<1 x i128> %x,
-       	    	                <1 x i128> %y) nounwind {
+define <1 x i128> @test_vsubcuq(<1 x i128> %x, <1 x i128> %y) nounwind {
+; ALL-LABEL: test_vsubcuq:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vsubcuq 2, 2, 3
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x,
-                                                        <1 x i128> %y)
+                <1 x i128> %y)
   ret <1 x i128> %tmp
-; CHECK-LABEL: test_vsubcuq
-; CHECK: vsubcuq 2, 2, 3
 }
 
-define <1 x i128> @test_vsubecuq(<1 x i128> %x,
-       	    	                 <1 x i128> %y,
-                                 <1 x i128> %z) nounwind {
+define <1 x i128> @test_vsubecuq(<1 x i128> %x, <1 x i128> %y, <1 x i128> %z) nounwind {
+; ALL-LABEL: test_vsubecuq:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vsubecuq 2, 2, 3, 4
+; ALL-NEXT:    blr
   %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x,
-                                                         <1 x i128> %y,
-                                                         <1 x i128> %z)
+                 <1 x i128> %y,
+                 <1 x i128> %z)
   ret <1 x i128> %tmp
-; CHECK-LABEL: test_vsubecuq
-; CHECK: vsubecuq 2, 2, 3, 4
 }
-

From e18b5c62376ce5258c19ba60358cffd9e8fa67bd Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 23 May 2019 18:09:26 +0000
Subject: [PATCH 0062/1176] [WebAssembly] Implement ReplaceNodeResults to fix a
 SIMD crash

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61037

llvm-svn: 361526
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 15 ++++++++++++
 .../WebAssembly/WebAssemblyISelLowering.h     |  3 +++
 .../WebAssembly/simd-illegal-signext.ll       | 24 +++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 1f72c654ee8f0..06aee6e80a04f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -897,6 +897,21 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
   return Chain;
 }
 
+void WebAssemblyTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  case ISD::SIGN_EXTEND_INREG:
+    // Do not add any results, signifying that N should not be custom lowered
+    // after all. This happens because simd128 turns on custom lowering for
+    // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
+    // illegal type.
+    break;
+  default:
+    llvm_unreachable(
+        "ReplaceNodeResults not implemented for this op for WebAssembly!");
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Custom lowering hooks.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index f899f0feee417..80fca28764724 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -85,6 +85,9 @@ class WebAssemblyTargetLowering final : public TargetLowering {
                                const SDLoc &DL, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
 
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
+
   // Custom lowering hooks.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll b/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll
new file mode 100644
index 0000000000000..20775f0ee3a54
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-illegal-signext.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mattr=+simd128 | FileCheck %s
+
+; Regression test for a crash caused by
+; WebAssemblyTargetLowering::ReplaceNodeResults not being
+; implemented. Since SIMD is enabled, sign_ext_inreg is custom lowered
+; but the result is i16, an illegal value. This requires
+; ReplaceNodeResults to resolve, but the default implementation is to
+; abort.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-emscripten"
+
+; CHECK: i32.load8_s
+; CHECK-NEXT: i32.store16
+define void @foo() {
+entry:
+  %0 = load i32*, i32** undef, align 4
+  %1 = load i32, i32* %0, align 4
+  %2 = load i32, i32* undef, align 4
+  %conv67 = trunc i32 %2 to i8
+  %conv68 = sext i8 %conv67 to i16
+  store i16 %conv68, i16* null, align 2
+  ret void
+}

From 170dfeb2ff06d6eb9157054c9b321a5bb2fc10f7 Mon Sep 17 00:00:00 2001
From: Robert Lougher <rob.lougher@gmail.com>
Date: Thu, 23 May 2019 18:15:12 +0000
Subject: [PATCH 0063/1176] Resubmit r360436 "[X86] Avoid SFB - Fix
 inconsistent codegen with/without debug info"

Fixes https://bugs.llvm.org/show_bug.cgi?id=40969

The functions findPotentiallyBlockedCopies and buildCopy are currently not
accounting for the presence of debug instructions. In the former this results
in the optimization not being trigerred, and in the latter results in
inconsistent codegen.

This patch enables the optimization to be performed in a debug build and
ensures the codegen is consistent with non-debug builds.

Patch by Chris Dawson.

Differential Revision: https://reviews.llvm.org/D61680

llvm-svn: 361527
---
 .../X86/X86AvoidStoreForwardingBlocks.cpp     |  14 +-
 .../CodeGen/X86/avoid-sfb-g-no-change.mir     | 222 ++++++++++++++++++
 2 files changed, 232 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir

diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 3ac0b1ae5143d..a9d807b733b7e 100644
--- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -406,7 +406,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
   // If the load and store are consecutive, use the loadInst location to
   // reduce register pressure.
   MachineInstr *StInst = StoreInst;
-  if (StoreInst->getPrevNode() == LoadInst)
+  auto PrevInstrIt = skipDebugInstructionsBackward(
+      std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+      MBB->instr_begin());
+  if (PrevInstrIt.getNodePtr() == LoadInst)
     StInst = LoadInst;
   MachineInstr *NewStore =
       BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
@@ -491,19 +494,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
 static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
   MachineOperand &LoadBase = getBaseOperand(LoadInst);
   MachineOperand &StoreBase = getBaseOperand(StoreInst);
+  auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
+          std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+          LoadInst->getParent()->instr_begin()).getNodePtr();
   if (LoadBase.isReg()) {
     MachineInstr *LastLoad = LoadInst->getPrevNode();
     // If the original load and store to xmm/ymm were consecutive
     // then the partial copies were also created in
     // a consecutive order to reduce register pressure,
     // and the location of the last load is before the last store.
-    if (StoreInst->getPrevNode() == LoadInst)
+    if (StorePrevNonDbgInstr == LoadInst)
       LastLoad = LoadInst->getPrevNode()->getPrevNode();
     getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
   }
   if (StoreBase.isReg()) {
     MachineInstr *StInst = StoreInst;
-    if (StoreInst->getPrevNode() == LoadInst)
+    if (StorePrevNonDbgInstr == LoadInst)
       StInst = LoadInst;
     getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
   }
@@ -530,7 +536,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
       if (!isPotentialBlockedMemCpyLd(MI.getOpcode()))
         continue;
       int DefVR = MI.getOperand(0).getReg();
-      if (!MRI->hasOneUse(DefVR))
+      if (!MRI->hasOneNonDBGUse(DefVR))
         continue;
       for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
            UI != UE;) {
diff --git a/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir
new file mode 100644
index 0000000000000..82cb9a786c29a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avoid-sfb-g-no-change.mir
@@ -0,0 +1,222 @@
+# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes DEBUG-LABEL,CHECK
+# RUN: llc %s -run-pass x86-avoid-SFB -mtriple=x86_64-unknown-linux-gnu -o - | FileCheck %s -check-prefixes NODEBUG-LABEL,CHECK
+#
+# This was generated from:
+#
+# using alpha = float __attribute__((ext_vector_type(4)));
+#
+# void bravo(alpha * __restrict__ p1, alpha * __restrict__ p2) {
+#   char *p3 = (char *)p1;
+#   *p3 = 0;
+#   alpha t = *p1;
+#   *p2 = t;
+# }
+#
+# Using the command line:
+# clang -g -c 1.cpp -O2 -S -emit-llvm -fno-strict-aliasing --target=x86_64-unknown-unknown -o test.ll
+# llc -stop-before=x86-avoid-SFB test.ll -o before.mir
+
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "1.cpp"
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-unknown"
+
+  ; Function Attrs: norecurse nounwind uwtable
+  define dso_local void @debug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 !dbg !10 {
+  entry:
+    call void @llvm.dbg.value(metadata <4 x float>* %p1, metadata !21, metadata !DIExpression()), !dbg !25
+    call void @llvm.dbg.value(metadata <4 x float>* %p2, metadata !22, metadata !DIExpression()), !dbg !25
+    %0 = bitcast <4 x float>* %p1 to i8*, !dbg !26
+    call void @llvm.dbg.value(metadata i8* %0, metadata !23, metadata !DIExpression()), !dbg !25
+    store i8 0, i8* %0, align 1, !dbg !27
+    %1 = load <4 x float>, <4 x float>* %p1, align 16, !dbg !28
+    call void @llvm.dbg.value(metadata <4 x float> %1, metadata !24, metadata !DIExpression()), !dbg !25
+    store <4 x float> %1, <4 x float>* %p2, align 16, !dbg !29
+    ret void, !dbg !30
+  }
+
+  ; Function Attrs: norecurse nounwind uwtable
+  define dso_local void @nodebug(<4 x float>* noalias nocapture %p1, <4 x float>* noalias nocapture %p2) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast <4 x float>* %p1 to i8*
+    store i8 0, i8* %0, align 1
+    %1 = load <4 x float>, <4 x float>* %p1, align 16
+    store <4 x float> %1, <4 x float>* %p2, align 16
+    ret void
+  }
+
+  ; Function Attrs: nounwind readnone speculatable
+  declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #2
+
+  attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { nounwind readnone speculatable }
+  attributes #2 = { nounwind }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!6, !7, !8}
+  !llvm.ident = !{!9}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, nameTableKind: None)
+  !1 = !DIFile(filename: "1.cpp", directory: "C:\5CUsers\5Cgbdawsoc\5CDocuments\5Cllvm\5Cbg40969")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+  !5 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+  !6 = !{i32 2, !"Dwarf Version", i32 4}
+  !7 = !{i32 2, !"Debug Info Version", i32 3}
+  !8 = !{i32 1, !"wchar_size", i32 4}
+  !9 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 9afc4764dd24bd2f23c44e51ad33f8e58234a8b6)"}
+  !10 = distinct !DISubprogram(name: "bravo", linkageName: "_Z5bravoPDv4_fS0_", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
+  !11 = !DISubroutineType(types: !12)
+  !12 = !{null, !13, !13}
+  !13 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !14)
+  !14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
+  !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "alpha", file: !1, line: 2, baseType: !16)
+  !16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 128, flags: DIFlagVector, elements: !18)
+  !17 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+  !18 = !{!19}
+  !19 = !DISubrange(count: 4)
+  !20 = !{!21, !22, !23, !24}
+  !21 = !DILocalVariable(name: "p1", arg: 1, scope: !10, file: !1, line: 4, type: !13)
+  !22 = !DILocalVariable(name: "p2", arg: 2, scope: !10, file: !1, line: 4, type: !13)
+  !23 = !DILocalVariable(name: "p3", scope: !10, file: !1, line: 5, type: !4)
+  !24 = !DILocalVariable(name: "t", scope: !10, file: !1, line: 7, type: !15)
+  !25 = !DILocation(line: 0, scope: !10)
+  !26 = !DILocation(line: 5, column: 14, scope: !10)
+  !27 = !DILocation(line: 6, column: 7, scope: !10)
+  !28 = !DILocation(line: 7, column: 13, scope: !10)
+  !29 = !DILocation(line: 8, column: 7, scope: !10)
+  !30 = !DILocation(line: 9, column: 1, scope: !10)
+
+...
+---
+name:            debug
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64, preferred-register: '' }
+  - { id: 2, class: vr128, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $rdi, $rsi
+
+    DBG_VALUE $rdi, $noreg, !21, !DIExpression(), debug-location !25
+    DBG_VALUE $rsi, $noreg, !22, !DIExpression(), debug-location !25
+    %1:gr64 = COPY $rsi
+    DBG_VALUE %1, $noreg, !22, !DIExpression(), debug-location !25
+    %0:gr64 = COPY $rdi
+    DBG_VALUE %0, $noreg, !21, !DIExpression(), debug-location !25
+    DBG_VALUE %0, $noreg, !23, !DIExpression(), debug-location !25
+    MOV8mi %0, 1, $noreg, 0, $noreg, 0, debug-location !27 :: (store 1 into %ir.0)
+    %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg, debug-location !28 :: (load 16 from %ir.p1)
+    DBG_VALUE %2, $noreg, !24, !DIExpression(), debug-location !25
+    MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2, debug-location !29 :: (store 16 into %ir.p2)
+    RET 0, debug-location !30
+
+...
+---
+name:            nodebug
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64, preferred-register: '' }
+  - { id: 2, class: vr128, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $rdi, $rsi
+
+    %1:gr64 = COPY $rsi
+    %0:gr64 = COPY $rdi
+    MOV8mi %0, 1, $noreg, 0, $noreg, 0 :: (store 1 into %ir.0)
+    %2:vr128 = MOVAPSrm %0, 1, $noreg, 0, $noreg :: (load 16 from %ir.p1)
+    MOVAPSmr %1, 1, $noreg, 0, $noreg, killed %2 :: (store 16 into %ir.p2)
+    RET 0
+
+    ; DEBUG-LABEL: name: debug
+    ; NODEBUG-LABEL: name: nodebug
+    ; CHECK: %1:gr64 = COPY
+    ; CHECK: %0:gr64 = COPY
+    ; CHECK: MOV8mi
+    ; CHECK: %3:gr8 = MOV8rm
+    ; CHECK: MOV8mr
+    ; CHECK: %4:gr64 = MOV64rm
+    ; CHECK: MOV64mr
+    ; CHECK: %5:gr32 = MOV32rm
+    ; CHECK: MOV32mr
+    ; CHECK: %6:gr16 = MOV16rm
+    ; CHECK: MOV16mr
+    ; CHECK: %7:gr8 = MOV8rm
+    ; CHECK: MOV8mr
+    ; CHECK: RET 0
+    ; DEBUG-LABEL: name: nodebug
+...

From a21d5ab369ead9b9517b13aa8de3606bc641bfa5 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 23 May 2019 18:15:43 +0000
Subject: [PATCH 0064/1176] [HostNativeThreadBase] Undo nullptr changes

The thread result type is an unsigned instead of a pointer on windows,
so we shouldn't replace 0 with nullptr here.

llvm-svn: 361528
---
 lldb/source/Host/common/HostNativeThreadBase.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp
index 82f519e14f17b..a5f876a7232af 100644
--- a/lldb/source/Host/common/HostNativeThreadBase.cpp
+++ b/lldb/source/Host/common/HostNativeThreadBase.cpp
@@ -18,10 +18,10 @@ using namespace lldb;
 using namespace lldb_private;
 
 HostNativeThreadBase::HostNativeThreadBase()
-    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(nullptr) {}
+    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(0) {}
 
 HostNativeThreadBase::HostNativeThreadBase(thread_t thread)
-    : m_thread(thread), m_result(nullptr) {}
+    : m_thread(thread), m_result(0) {}
 
 lldb::thread_t HostNativeThreadBase::GetSystemHandle() const {
   return m_thread;
@@ -37,7 +37,7 @@ bool HostNativeThreadBase::IsJoinable() const {
 
 void HostNativeThreadBase::Reset() {
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = nullptr;
+  m_result = 0;
 }
 
 bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
@@ -47,7 +47,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
 lldb::thread_t HostNativeThreadBase::Release() {
   lldb::thread_t result = m_thread;
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = nullptr;
+  m_result = 0;
 
   return result;
 }

From 06e80f642612a7f80a15543f1cd84a19d688d3f7 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Thu, 23 May 2019 18:19:54 +0000
Subject: [PATCH 0065/1176] [OPENMP]Simplify codegen for the outlined regions.

Simplified codegen for the outlined regions, excluding duplication code
for handling variables with the reference types.

llvm-svn: 361529
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp            | 59 +++++---------
 .../distribute_firstprivate_codegen.cpp       | 15 +++-
 .../OpenMP/distribute_lastprivate_codegen.cpp | 19 +++--
 ...bute_parallel_for_firstprivate_codegen.cpp | 15 ++--
 ...ibute_parallel_for_lastprivate_codegen.cpp | 20 ++---
 ...parallel_for_simd_firstprivate_codegen.cpp | 10 +++
 ..._parallel_for_simd_lastprivate_codegen.cpp | 18 ++---
 .../distribute_simd_firstprivate_codegen.cpp  |  6 +-
 .../distribute_simd_lastprivate_codegen.cpp   | 12 +--
 clang/test/OpenMP/for_reduction_codegen.cpp   | 13 +++-
 .../test/OpenMP/for_reduction_codegen_UDR.cpp |  4 +
 .../OpenMP/parallel_firstprivate_codegen.cpp  | 12 +--
 clang/test/OpenMP/target_map_codegen.cpp      |  8 ++
 ..._teams_distribute_firstprivate_codegen.cpp |  9 ++-
 ...t_teams_distribute_lastprivate_codegen.cpp | 18 ++---
 ...bute_parallel_for_firstprivate_codegen.cpp | 15 +++-
 ...ibute_parallel_for_lastprivate_codegen.cpp | 78 ++++++++++---------
 ...parallel_for_simd_firstprivate_codegen.cpp |  7 +-
 ..._parallel_for_simd_lastprivate_codegen.cpp | 22 +++---
 ...s_distribute_simd_firstprivate_codegen.cpp |  4 +-
 ...ms_distribute_simd_lastprivate_codegen.cpp | 15 ++--
 .../teams_distribute_firstprivate_codegen.cpp |  8 +-
 .../teams_distribute_lastprivate_codegen.cpp  | 13 ++--
 ...bute_parallel_for_firstprivate_codegen.cpp | 15 ++--
 ...ibute_parallel_for_lastprivate_codegen.cpp | 18 +++--
 ...parallel_for_simd_firstprivate_codegen.cpp | 14 ++--
 ..._parallel_for_simd_lastprivate_codegen.cpp | 18 +++--
 ...s_distribute_simd_firstprivate_codegen.cpp |  8 +-
 ...ms_distribute_simd_lastprivate_codegen.cpp | 13 +++-
 29 files changed, 288 insertions(+), 198 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d27afcdd33068..01194e3a60fa1 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -296,8 +296,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
 
 static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
                                     QualType DstType, StringRef Name,
-                                    LValue AddrLV,
-                                    bool isReferenceType = false) {
+                                    LValue AddrLV) {
   ASTContext &Ctx = CGF.getContext();
 
   llvm::Value *CastedPtr = CGF.EmitScalarConversion(
@@ -306,17 +305,6 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
   Address TmpAddr =
       CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
           .getAddress();
-
-  // If we are dealing with references we need to return the address of the
-  // reference instead of the reference of the value.
-  if (isReferenceType) {
-    QualType RefType = Ctx.getLValueReferenceType(DstType);
-    llvm::Value *RefVal = TmpAddr.getPointer();
-    TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref"));
-    LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
-    CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true);
-  }
-
   return TmpAddr;
 }
 
@@ -473,14 +461,6 @@ static llvm::Function *emitOutlinedFunctionPrologue(
     // use the value that we get from the arguments.
     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
       const VarDecl *CurVD = I->getCapturedVar();
-      // If the variable is a reference we need to materialize it here.
-      if (CurVD->getType()->isReferenceType()) {
-        Address RefAddr = CGF.CreateMemTemp(
-            CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
-        CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
-                              /*Volatile=*/false, CurVD->getType());
-        LocalAddr = RefAddr;
-      }
       if (!FO.RegisterCastedArgsOnly)
         LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
       ++Cnt;
@@ -504,15 +484,12 @@ static llvm::Function *emitOutlinedFunctionPrologue(
       const VarDecl *Var = I->getCapturedVar();
       QualType VarTy = Var->getType();
       Address ArgAddr = ArgLVal.getAddress();
-      if (!VarTy->isReferenceType()) {
-        if (ArgLVal.getType()->isLValueReferenceType()) {
-          ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
-        } else if (!VarTy->isVariablyModifiedType() ||
-                   !VarTy->isPointerType()) {
-          assert(ArgLVal.getType()->isPointerType());
-          ArgAddr = CGF.EmitLoadOfPointer(
-              ArgAddr, ArgLVal.getType()->castAs<PointerType>());
-        }
+      if (ArgLVal.getType()->isLValueReferenceType()) {
+        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
+      } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+        assert(ArgLVal.getType()->isPointerType());
+        ArgAddr = CGF.EmitLoadOfPointer(
+            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
       }
       if (!FO.RegisterCastedArgsOnly) {
         LocalAddrs.insert(
@@ -523,14 +500,12 @@ static llvm::Function *emitOutlinedFunctionPrologue(
       assert(!FD->getType()->isAnyPointerType() &&
              "Not expecting a captured pointer.");
       const VarDecl *Var = I->getCapturedVar();
-      QualType VarTy = Var->getType();
-      LocalAddrs.insert(
-          {Args[Cnt],
-           {Var, FO.UIntPtrCastRequired
-                     ? castValueFromUintptr(CGF, I->getLocation(),
-                                            FD->getType(), Args[Cnt]->getName(),
-                                            ArgLVal, VarTy->isReferenceType())
-                     : ArgLVal.getAddress()}});
+      LocalAddrs.insert({Args[Cnt],
+                         {Var, FO.UIntPtrCastRequired
+                                   ? castValueFromUintptr(
+                                         CGF, I->getLocation(), FD->getType(),
+                                         Args[Cnt]->getName(), ArgLVal)
+                                   : ArgLVal.getAddress()}});
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
@@ -566,16 +541,20 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
                      Out.str());
   llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
                                                    VLASizes, CXXThisValue, FO);
+  CodeGenFunction::OMPPrivateScope LocalScope(*this);
   for (const auto &LocalAddrPair : LocalAddrs) {
     if (LocalAddrPair.second.first) {
-      setAddrOfLocalVar(LocalAddrPair.second.first,
-                        LocalAddrPair.second.second);
+      LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
+        return LocalAddrPair.second.second;
+      });
     }
   }
+  (void)LocalScope.Privatize();
   for (const auto &VLASizePair : VLASizes)
     VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
   PGO.assignRegionCounters(GlobalDecl(CD), F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
+  (void)LocalScope.ForceCleanup();
   FinishFunction(CD->getBodyRBrace());
   if (!NeedWrapperFunction)
     return F;
diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
index d7fdd1c0acd4b..245d8a544688e 100644
--- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -86,6 +86,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF1:%.+]] = alloca double*,
 
       // Actual private variables to be used in the body (tmp is used for the reference type)
       // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
@@ -105,9 +106,11 @@ int main() {
       // LAMBDA-DAG: [[SFVAR_ADDR_VAL:%.+]] = load float*, float** [[SFVAR_ADDR]],
       // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_ADDR]],
       // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF]],
+      // LAMBDA-DAG: [[G1_ADDR_VAL:%.+]] = load double*, double** [[G1_REF]],
+      // LAMBDA-DAG: store double* [[G1_ADDR_VAL]], double** [[G1_REF1]],
       // LAMBDA-DAG: [[G_CONV_VAL:%.+]] = load{{.*}} double, double* [[G_ADDR_VAL]],
       // LAMBDA-DAG: store double [[G_CONV_VAL]], double* [[G_PRIVATE]],
-      // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[G1_REF]],
+      // LAMBDA-DAG: [[TMP_VAL:%.+]] = load double*, double** [[G1_REF1]],
       // LAMBDA-DAG: [[TMP_VAL_VAL:%.+]] = load{{.*}} double, double* [[TMP_VAL]],
       // LAMBDA-DAG: store double [[TMP_VAL_VAL]], double* [[G1_PRIVATE]],
       // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]],
@@ -224,6 +227,7 @@ int main() {
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_FLOAT_TY]]*,
 
 // discard omp loop variables
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
@@ -280,7 +284,9 @@ int main() {
 // init var
 // CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
 // CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
-// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP1]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}}
 // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}}
 // CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
@@ -321,6 +327,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_INT_TY]]*,
 
 // discard omp loop variables
 // CHECK: {{.*}} = alloca i{{[0-9]+}},
@@ -375,7 +382,9 @@ int main() {
 // init var
 // CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
 // CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
-// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: [[VAR_ADDR_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP1]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}}
 // CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}}
 // CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
index 175f3c8bde26f..8d6ceb6ee9352 100644
--- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -85,6 +85,7 @@ int main() {
       // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
       // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
       // LAMBDA: [[TMP_G1:%.+]] = alloca double*,
+      // LAMBDA: [[TMP1_G1:%.+]] = alloca double*,
       // loop variables
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
@@ -104,11 +105,13 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
-      // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[TMP_G1]],
+      // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP1_G1]],
+      // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP1_G1]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -214,6 +217,7 @@ int main() {
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[TMP1:%.*]] = alloca [[S_FLOAT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -239,11 +243,13 @@ int main() {
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP1]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP1]],
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -316,6 +322,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -344,8 +351,10 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP1]],
 // CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
-// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP1]],
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
 // CHECK: [[IV_VAL1:%.+]] =
diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index 705418276856c..9dc38b3ddf488 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -86,6 +86,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca float*,
       // LAMBDA: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF1:%.+]] = alloca double*,
 
       // private alloca's
       // LAMBDA: [[G_PRIV:%.+]] = alloca double,
@@ -111,7 +112,7 @@ int main() {
       // LAMBDA-DAG: store {{.+}} [[G_ADDR_VAL]], {{.+}}* [[G_PRIV]],
 
       // g1
-      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
+      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF1]],
       // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
       // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
       // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
@@ -293,6 +294,7 @@ int main() {
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_FLOAT_TY]]*,
 
 // skip loop alloca's
 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
@@ -342,7 +344,7 @@ int main() {
 // CHECK-DAG: [[CPY_DONE]]:
 
 // var
-// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
@@ -393,6 +395,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TMP:%.+]] = alloca [[S_FLOAT_TY]]*,
 
 // skip loop alloca's
 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
@@ -435,7 +438,7 @@ int main() {
 // CHECK-DAG: [[CPY_DONE]]:
 
 // var
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
@@ -477,6 +480,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_INT_TY]]*,
 
 // skip loop alloca's
 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
@@ -524,7 +528,7 @@ int main() {
 // CHECK-DAG: [[CPY_DONE]]:
 
 // var
-// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
@@ -564,6 +568,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
 
 // skip loop alloca's
 // CHECK: [[OMP_IV:.omp.iv+]] = alloca i{{[0-9]+}},
@@ -605,7 +610,7 @@ int main() {
 // CHECK-DAG: [[CPY_DONE]]:
 
 // var
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index b10c02ba7964d..20b233f0bb88b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -106,9 +106,9 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
       // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
@@ -129,7 +129,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
 
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
@@ -176,10 +176,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
 
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
 
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
@@ -212,7 +212,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
       // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
@@ -311,8 +311,8 @@ int main() {
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
 
@@ -327,7 +327,7 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
@@ -424,7 +424,7 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
@@ -567,7 +567,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}})
 // CHECK: ret void
@@ -605,6 +605,7 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
 
 // call constructor for s_arr
@@ -618,7 +619,6 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
 // CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
 
@@ -667,7 +667,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index 50e45be0ee287..62e2c8c0b8ecc 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -108,6 +108,8 @@ int main() {
 
       // g1
       // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[G1_REF]],
+      // LAMBDA-DAG: store {{.+}} [[TMP_REF]], {{.+}}* [[TMP:%.+]],
+      // LAMBDA-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP]],
       // LAMBDA-DAG: [[TMP_VAL:%.+]] = load {{.+}}, {{.+}}* [[TMP_REF]],
       // LAMBDA-DAG: store {{.+}} [[TMP_VAL]], {{.+}}* [[G1_PRIV]]
       // LAMBDA-DAG: store {{.+}}* [[G1_PRIV]], {{.+}}** [[TMP_PRIV]],
@@ -341,6 +343,8 @@ int main() {
 
 // var
 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: store {{.+}} [[TMP_REF]], {{.+}}* [[TMP1:%.+]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
@@ -434,6 +438,8 @@ int main() {
 
 // var
 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_ADDR_REF]], {{.+}}* [[TMP1:%.+]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
@@ -523,6 +529,8 @@ int main() {
 
 // var
 // CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}* [[TMP]],
+// CHECK-DAG: store {{.+}} [[TMP_REF]], {{.+}}* [[TMP1:%.+]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[TMP_REF_BCAST:%.+]] = bitcast {{.+}}* [[TMP_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_REF_BCAST]],
@@ -604,6 +612,8 @@ int main() {
 
 // var
 // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}* [[VAR_ADDR]],
+// CHECK-DAG: store {{.+}} [[VAR_ADDR_REF]], {{.+}}* [[TMP1:%.+]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load {{.+}}*, {{.+}}** [[TMP1]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast {{.+}}* [[VAR_PRIV]] to
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}}* [[VAR_ADDR_REF]] to
 // CHECK-DAG: call void @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
index c79bceef455fa..516e9c067c56f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -106,9 +106,9 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
       // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
@@ -138,7 +138,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
 
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
@@ -185,10 +185,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
 
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
 
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
@@ -222,7 +222,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
       // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
@@ -321,8 +321,8 @@ int main() {
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
 
@@ -337,7 +337,7 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
@@ -434,7 +434,7 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: call void [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
@@ -577,7 +577,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL_BCAST]],{{.+}})
 // CHECK: ret void
@@ -628,7 +628,7 @@ int main() {
 // CHECK: [[S_ARR_DONE:%.+]] = icmp {{.+}} [[S_ARR_NEXT]], [[S_ARR_END]]
 // CHECK: br i1 [[S_ARR_DONE]], label %[[S_ARR_CST_END:.+]], label %[[S_ARR_CST_LOOP]]
 // CHECK: [[S_ARR_CST_END]]:
-// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
 
diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
index 36bd1456af5b3..858a7b98d25ec 100644
--- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
@@ -109,7 +109,7 @@ int main() {
       // LAMBDA-DAG: [[G_VAL:%.+]] = load{{.+}} double, double* [[G_ADDR_VAL]],
       // LAMBDA-DAG: store double [[G_VAL]], double* [[G_PRIVATE]],
       // LAMBDA-DAG: [[G1_VAL_REF:%.+]] = load double*, double** [[G1_REF]],
-      // LAMBDA-DAG: [[G1_VAL:%.+]] = load{{.+}} double, double* [[G1_VAL_REF]],
+      // LAMBDA-DAG: [[G1_VAL:%.+]] = load{{.+}} double, double* %
       // LAMBDA-DAG: store double [[G1_VAL]], double* [[G1_PRIVATE]],
       // LAMBDA-DAG: store double* [[G1_PRIVATE]], double** [[TMP_PRIVATE]],
       // LAMBDA-DAG: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_ADDR_VAL]],
@@ -284,7 +284,7 @@ int main() {
 // CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to{{.+}}
-// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to{{.+}}
+// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* %{{.+}} to{{.+}}
 // CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
 // CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]** [[TMP_PRIV]],
 
@@ -379,7 +379,7 @@ int main() {
 // CHECK-DAG: store{{.+}} [[VAR_ADDR_VAL]],{{.+}} [[TMP]],
 // CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
 // CHECK-DAG: [[VAR_PRIV_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to{{.+}}
-// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL]] to{{.+}}
+// CHECK-DAG: [[TMP_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to{{.+}}
 // CHECK-DAG: call{{.+}} @llvm.memcpy.{{.+}}({{.+}}* align {{[0-9]+}} [[VAR_PRIV_BCAST]], {{.+}}* align {{[0-9]+}} [[TMP_BCAST]],{{.+}})
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP_PRIV]],
 
diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
index 22a24a66495a5..6bc146f728aea 100644
--- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
@@ -104,9 +104,9 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
       // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
@@ -151,7 +151,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
 
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
@@ -250,11 +250,13 @@ int main() {
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
-// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
-// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
 // CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: store [[S_FLOAT_TY]]* [[TMP_REF]], [[S_FLOAT_TY]]** [[TMP1:%.+]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP1]],
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -400,7 +402,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp
index 68946a8b4f79b..2e616db1a5553 100644
--- a/clang/test/OpenMP/for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen.cpp
@@ -292,14 +292,16 @@ int main() {
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
 
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
 
-// For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+
+// For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 
 // For && reduction operation initial value of private variable is 1.0.
@@ -1101,6 +1103,8 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
@@ -1132,6 +1136,8 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
@@ -1160,6 +1166,8 @@ int main() {
 // Reduction list for runtime.
 // CHECK: [[RED_LIST:%.+]] = alloca [2 x i8*],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
@@ -1194,6 +1202,8 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
@@ -1233,6 +1243,7 @@ int main() {
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
 
diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
index 0eee597d0232b..c7c3f93748734 100644
--- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -790,6 +790,8 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
@@ -820,6 +822,8 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
+// CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
 // CHECK: [[VAR3_ORIG:%.+]] = load [4 x [[S_FLOAT_TY]]]*, [4 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
index 66964152aa2a2..8aa64668f0525 100644
--- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -180,10 +180,10 @@ int main() {
     // LAMBDA: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
     // LAMBDA-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV]] to i32*
-    // LAMBDA-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
-    // LAMBDA-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
     // LAMBDA-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV]] to i32*
     // LAMBDA-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV]] to i32*
+    // LAMBDA-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
+    // LAMBDA-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
     // LAMBDA-64: store i32* [[C_CONV]], i32** [[REFC:%.+]],
     // LAMBDA-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]],
     // LAMBDA-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
@@ -303,10 +303,10 @@ int main() {
 // BLOCKS: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
 // BLOCKS-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV]] to i32*
-// BLOCKS-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
-// BLOCKS-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
 // BLOCKS-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV]] to i32*
 // BLOCKS-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV]] to i32*
+// BLOCKS-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
+// BLOCKS-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
 // BLOCKS-64: store i32* [[C_CONV]], i32** [[REFC:%.+]],
 // BLOCKS-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]],
 // BLOCKS-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REFA]],
@@ -452,10 +452,10 @@ int main() {
 // CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]]
 // CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]]
 // CHECK-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32*
-// CHECK-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
-// CHECK-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
 // CHECK-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32*
 // CHECK-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32*
+// CHECK-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
+// CHECK-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
 // CHECK-64: store i32* [[C_CONV]], i32** [[REFC:%.+]],
 // CHECK-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]],
 // CHECK: bitcast [4 x i{{[0-9]+}}]* [[E_PRIV]] to i8*
diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp
index 44d7ffc9cc5e1..8ed201a7ae1ac 100644
--- a/clang/test/OpenMP/target_map_codegen.cpp
+++ b/clang/test/OpenMP/target_map_codegen.cpp
@@ -4581,12 +4581,16 @@ struct CC {
 // CK26: define {{.+}}[[CALL00]]({{.*}}i32*{{.*}}[[PVTARG:%.+]])
 // CK26: store i32* [[PVTARG]], i32** [[PVTADDR:%.+]],
 // CK26: [[ADDR:%.+]] = load i32*, i32** [[PVTADDR]],
+// CK26: store i32* [[ADDR]], i32** [[PVTADDR:%.+]],
+// CK26: [[ADDR:%.+]] = load i32*, i32** [[PVTADDR]],
 // CK26: [[VAL:%.+]] = load i32, i32* [[ADDR]],
 // CK26: add nsw i32 [[VAL]], 1
 
 // CK26: define {{.+}}[[CALL01]]({{.*}}float*{{.*}}[[PVTARG:%.+]])
 // CK26: store float* [[PVTARG]], float** [[PVTADDR:%.+]],
 // CK26: [[ADDR:%.+]] = load float*, float** [[PVTADDR]],
+// CK26: store float* [[ADDR]], float** [[PVTADDR:%.+]],
+// CK26: [[ADDR:%.+]] = load float*, float** [[PVTADDR]],
 // CK26: [[VAL:%.+]] = load float, float* [[ADDR]],
 // CK26: [[EXT:%.+]] = fpext float [[VAL]] to double
 // CK26: fadd double [[EXT]], 1.000000e+00
@@ -4594,12 +4598,16 @@ struct CC {
 // CK26: define {{.+}}[[CALL02]]({{.*}}i32*{{.*}}[[PVTARG:%.+]])
 // CK26: store i32* [[PVTARG]], i32** [[PVTADDR:%.+]],
 // CK26: [[ADDR:%.+]] = load i32*, i32** [[PVTADDR]],
+// CK26: store i32* [[ADDR]], i32** [[PVTADDR:%.+]],
+// CK26: [[ADDR:%.+]] = load i32*, i32** [[PVTADDR]],
 // CK26: [[VAL:%.+]] = load i32, i32* [[ADDR]],
 // CK26: add nsw i32 [[VAL]], 1
 
 // CK26: define {{.+}}[[CALL03]]({{.*}}float*{{.*}}[[PVTARG:%.+]])
 // CK26: store float* [[PVTARG]], float** [[PVTADDR:%.+]],
 // CK26: [[ADDR:%.+]] = load float*, float** [[PVTADDR]],
+// CK26: store float* [[ADDR]], float** [[PVTADDR:%.+]],
+// CK26: [[ADDR:%.+]] = load float*, float** [[PVTADDR]],
 // CK26: [[VAL:%.+]] = load float, float* [[ADDR]],
 // CK26: [[EXT:%.+]] = fpext float [[VAL]] to double
 // CK26: fadd double [[EXT]], 1.000000e+00
diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
index 5487fa86e8f01..c268b72a25edd 100644
--- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
@@ -274,6 +274,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -304,15 +305,17 @@ int main() {
 // CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 // CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
 
-
 // T_VAR and preparation variables
 // CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
+// CHECK: [[VAR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR:%.+]],
+
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
index 26a7c0d61da83..459384f464fdc 100644
--- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
@@ -107,15 +107,15 @@ int main() {
       // LAMBDA-32: store i32 [[SFVAR_IN]], i32* [[SFVAR_PRIVATE_ADDR]],
 
       // init private variables
-      // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double*
       // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double*
       // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double*
-      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
       // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32*
       // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float*
-      // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
       // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]],
-      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -147,7 +147,7 @@ int main() {
       // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
       // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
       // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
-      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
+      // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* %
 
       // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
       // LAMBDA-64: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
@@ -244,9 +244,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32*
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -291,7 +291,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
@@ -346,8 +346,8 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32*
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -393,7 +393,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* %{{.+}} to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index 843b667316a56..9c91538ce7e0f 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -391,6 +391,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -426,9 +427,12 @@ int main() {
 // CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
+// CHECK: [[VAR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR:%.+]],
+
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -487,9 +491,12 @@ int main() {
 // CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
+// CHECK: [[VAR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK: store [[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]** [[VAR_ADDR:%.+]],
+
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
index acb1c0b636ece..c0f9ecaea20fb 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -83,7 +83,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
-      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
       // loop variables
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
@@ -91,19 +91,18 @@ int main() {
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-       
+
       // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
-    
+
       // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
       // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
-      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
-      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
       // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
       // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
-      
+
       g1 = 1;
       svar = 3;
       sfvar = 4.0;
@@ -135,7 +134,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
-      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
       // loop variables
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
@@ -143,16 +142,15 @@ int main() {
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
-       
+
       // LAMBDA-DAG: store {{.+}} [[G_IN]], {{.+}} [[G_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[G1_IN]], {{.+}} [[G1_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[SVAR_IN]], {{.+}} [[SVAR_ADDR]],
       // LAMBDA-DAG: store {{.+}} [[SFVAR_IN]], {{.+}} [[SFVAR_ADDR]],
-    
+
       // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
       // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
-      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
-      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
       // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
       // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
 
@@ -164,7 +162,7 @@ int main() {
       // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
       // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
       // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
-      
+
       // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
       // LAMBDA-DAG: store {{.+}}, {{.+}} [[G_TGT]],
       // LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_TGT]],
@@ -176,26 +174,26 @@ int main() {
       // LAMBDA: ret
 
       [&]() {
-	// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
-	// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
-	g = 2;
-	g1 = 2;
-	svar = 4;
-	sfvar = 8.0;
-	// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
-	// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-	// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
-	// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
-
-	// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-	// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
-	// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
-	// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-	// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
-	// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
-	// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
-	// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
-	// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+        // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+        // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+        g = 2;
+        g1 = 2;
+        svar = 4;
+        sfvar = 8.0;
+        // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+        // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+        // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+        // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+        // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+        // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+        // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+        // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+        // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+        // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+        // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+        // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
       }();
     }
   }();
@@ -237,6 +235,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -256,7 +255,7 @@ int main() {
 // CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR]],
 // CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
 
 // the distribute loop
@@ -295,6 +294,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -314,7 +314,7 @@ int main() {
 // CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR]],
 // CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
 
 // the distribute loop
@@ -360,6 +360,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -378,7 +379,7 @@ int main() {
 // CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]], 
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
@@ -397,7 +398,7 @@ int main() {
 // CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
 // CHECK: call void @llvm.memcpy.{{.+}}(
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],  
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
 // CHECK: ret void
 
 // CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
@@ -411,6 +412,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -429,7 +431,7 @@ int main() {
 // CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]], 
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // skip body: code generation routine is same as distribute parallel for lastprivate
@@ -448,7 +450,7 @@ int main() {
 // CHECK-DAG: {{.+}} = getelementptr {{.+}} [[S_ARR_TGT]],
 // CHECK: call void @llvm.memcpy.{{.+}}(
 // CHECK-DAG: [[VAR_ADDR_BCAST:%.+]] = bitcast {{.+}} [[VAR_TGT]] to
-// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],  
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_BCAST]],
 // CHECK: ret void
 
 #endif
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 0b09cc6c4027b..978e90dec5432 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -391,6 +391,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -402,6 +403,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -444,7 +446,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
@@ -463,6 +465,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -505,7 +508,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index c3d38612d63d9..ca09990fbaf62 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -83,7 +83,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
-      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
       // loop variables
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
@@ -99,8 +99,7 @@ int main() {
 
       // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
       // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
-      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
-      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
       // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
       // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
 
@@ -136,7 +135,7 @@ int main() {
       // LAMBDA: [[SVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[SFVAR_ADDR:%.+]] = alloca {{.+}},
       // LAMBDA: [[G_ADDR:%.+]] = alloca {{.+}},
-      // LAMBDA-64: [[G1_REF:%.+]] = alloca double*,
+      // LAMBDA: [[G1_REF:%.+]] = alloca double*,
       // loop variables
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
       // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
@@ -152,8 +151,7 @@ int main() {
 
       // LAMBDA-64-DAG: [[G_TGT:%.+]] = bitcast {{.+}} [[G_ADDR]] to
       // LAMBDA-32-DAG: [[G_TGT:%.+]] = load {{.+}}, {{.+}} [[G_ADDR]],
-      // LAMBDA-64-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
-      // LAMBDA-32-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_ADDR]],
+      // LAMBDA-DAG: [[G1_TGT:%.+]] = load {{.+}}, {{.+}} [[G1_REF]],
       // LAMBDA-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
       // LAMBDA-DAG: [[SFVAR_TGT:%.+]] = bitcast {{.+}} [[SFVAR_ADDR]] to
 
@@ -239,6 +237,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -258,7 +257,7 @@ int main() {
 // CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR]],
 // CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
 
 // the distribute loop
@@ -297,6 +296,7 @@ int main() {
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -316,7 +316,7 @@ int main() {
 // CHECK-64-DAG: [[TVAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR]],
 // CHECK-64-DAG: [[SVAR_TGT:%.+]] = bitcast {{.+}} [[SVAR_ADDR]] to
 
 // the distribute loop
@@ -362,6 +362,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -380,7 +381,7 @@ int main() {
 // CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
@@ -413,6 +414,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -431,7 +433,7 @@ int main() {
 // CHECK-64-DAG: [[T_VAR_TGT:%.+]] = bitcast {{.+}} [[T_VAR_ADDR1]] to
 // CHECK-DAG: [[VEC_TGT:%.+]] = load {{.+}}, {{.+}} [[VEC_ADDR1]],
 // CHECK-DAG: [[S_ARR_TGT:%.+]] = load {{.+}}, {{.+}} [[S_ARR_ADDR1]],
-// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR1]],
+// CHECK-DAG: [[VAR_TGT:%.+]] = load {{.+}}, {{.+}} [[TMP_VAR_ADDR1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
 // skip body: code generation routine is same as distribute parallel for lastprivate
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
index 463a6afd2d604..05ea0e897a4b5 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
@@ -274,6 +274,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -285,6 +286,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -328,7 +330,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
index fdf6d2f3a7dc0..6fae7829874f9 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
@@ -110,12 +110,13 @@ int main() {
       // LAMBDA-64: [[G_IN_REF:%.+]] = bitcast i64* [[G_PRIVATE_ADDR]] to double*
       // LAMBDA-32: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
       // LAMBDA-64: [[G1_IN_REF:%.+]] = bitcast i64* [[G1_PRIVATE_ADDR]] to double*
-      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
+      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA-64: [[SVAR_IN_REF:%.+]] = bitcast i64* [[SVAR_PRIVATE_ADDR]] to i32*
       // LAMBDA-64: [[SFVAR_IN_REF:%.+]] = bitcast i64* [[SFVAR_PRIVATE_ADDR]] to float*
       // LAMBDA-32: [[SFVAR_IN_REF:%.+]] = bitcast i32* [[SFVAR_PRIVATE_ADDR]] to float*
+      // LAMBDA-64: store double* [[G1_IN_REF]], double** [[G1_IN_ADDR_REF:%.+]],
       // LAMBDA-64: [[G1_IN_REF:%.+]] = load double*, double** [[G1_IN_ADDR_REF]],
-      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA-32: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -245,9 +246,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK-64: [[T_VAR_ADDR_REF:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK-64: [[SVAR_ADDR_REF:%.+]] = bitcast i64* [[SVAR_ADDR]] to i32*
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[TMP_VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -292,7 +294,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAR_ADDR_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
@@ -347,8 +349,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK-64: [[T_VAR_ADDR_REF1:%.+]] = bitcast i64* [[T_VAR_ADDR1]] to i32*
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK:  [[TMP_VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -394,7 +397,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAR_ADDR1_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
index 562e986df034c..672ed8e1b1690 100644
--- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
@@ -277,6 +277,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -288,6 +289,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -314,8 +316,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -331,7 +333,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
index 883234185aae8..581bb1d15cc9e 100644
--- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
@@ -101,9 +101,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -232,9 +233,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK: [[TMP_VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -278,7 +280,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAR_ADDR_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
@@ -332,8 +334,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[TMP_VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -378,7 +381,7 @@ int main() {
 // CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
 // CHECK: [[S_ARR_COPY_DONE]]:
 // CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
-// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAR_ADDR1_REF]] to i8*
 // CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VAR_ADDR_REF_BCAST]], i8* align {{[0-9]+}} [[TMP_VAL1_BCAST]],{{.+}})
 // CHECK: ret void
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
index 7e161b1d2a62a..14bccc0c53cc4 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -367,6 +367,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -378,6 +379,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -403,8 +405,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -420,7 +422,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
@@ -439,6 +441,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -464,8 +467,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -481,7 +484,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
index 451dc0579cd32..bc0b210718372 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -101,9 +101,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -156,9 +157,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
 
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
@@ -286,9 +288,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
@@ -365,9 +368,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 
@@ -466,8 +470,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
@@ -543,8 +548,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 68b0016a97462..6e8c0d18a73fe 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -370,6 +370,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -381,6 +382,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -406,8 +408,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -423,7 +425,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
@@ -467,8 +469,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -484,7 +486,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index c9141b01494fd..4d21a05b1be01 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -101,9 +101,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -157,9 +158,10 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** %
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
 
       // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
@@ -293,9 +295,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
@@ -372,9 +375,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 
@@ -476,8 +480,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
@@ -553,8 +558,9 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 
 // CHECK: call void @__kmpc_for_static_init_4(
diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
index ec0b006a37743..f76e2c71ec606 100644
--- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
@@ -277,6 +277,7 @@ int main() {
 // CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
 // CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
 
+// CHECK: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
 // CHECK: ret void
 
@@ -288,6 +289,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -314,8 +316,8 @@ int main() {
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
 
 // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
-// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* 
-// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* 
+// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}})
 
 // firstprivate(s_arr)
@@ -331,7 +333,7 @@ int main() {
 // CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
 
 // firstprivate(var)
-// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
+// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[TMP_VAR_ADDR]],
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
 // CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
index 86cfd00f8c7a3..c7b507f254bcc 100644
--- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
@@ -101,9 +101,11 @@ int main() {
 
       // init private variables
       // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
       // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
       // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
-      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+      // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP:%.+]],
+      // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[TMP]],
       // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
       g = 1;
       g1 = 1;
@@ -233,9 +235,11 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
 // CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP:%.+]],
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
-// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
+// CHECK:  [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[TMP]],
 // the distribute loop
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;
@@ -307,6 +311,7 @@ int main() {
 // CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
 // CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
 // CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP1:%.+]] = alloca [[S_INT_TY]]*,
 // skip loop variables
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
 // CHECK: {{.+}} = alloca i{{[0-9]+}},
@@ -333,8 +338,10 @@ int main() {
 // CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
 // CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
 // CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
-// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
 // CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP1]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP1]],
 // CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
 // CHECK: call void @__kmpc_for_static_init_4(
 // assignment: vec[i] = t_var;

From 549ed544c3b37f45a8e0c26d18468339e0b7bd7e Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Thu, 23 May 2019 18:35:43 +0000
Subject: [PATCH 0066/1176] [Driver] Move the "-o OUT -x TYPE SRC.c" flags to
 the end of -cc1

New -cc1 arguments, such as -faddrsig, have started appearing after the
input name. I personally find it convenient for the input to be the last
argument to the compile command line, since I often need to edit it when
running crash reproduction scripts.

Differential Revision: https://reviews.llvm.org/D62270

llvm-svn: 361530
---
 clang/lib/Driver/ToolChains/Clang.cpp      |  50 +++++-----
 clang/test/Driver/cuda-options.cu          |   6 +-
 clang/test/Driver/hip-toolchain-no-rdc.hip |   4 +-
 clang/test/Driver/openmp-offload.c         | 102 +++++++++++----------
 4 files changed, 84 insertions(+), 78 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d0ec3289acc86..d76e175959835 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5215,30 +5215,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       isa<CompileJobAction>(JA))
     CmdArgs.push_back("-disable-llvm-passes");
 
-  if (Output.getType() == types::TY_Dependencies) {
-    // Handled with other dependency code.
-  } else if (Output.isFilename()) {
-    CmdArgs.push_back("-o");
-    CmdArgs.push_back(Output.getFilename());
-  } else {
-    assert(Output.isNothing() && "Invalid output.");
-  }
-
-  addDashXForInput(Args, Input, CmdArgs);
-
-  ArrayRef<InputInfo> FrontendInputs = Input;
-  if (IsHeaderModulePrecompile)
-    FrontendInputs = ModuleHeaderInputs;
-  else if (Input.isNothing())
-    FrontendInputs = {};
-
-  for (const InputInfo &Input : FrontendInputs) {
-    if (Input.isFilename())
-      CmdArgs.push_back(Input.getFilename());
-    else
-      Input.getInputArg().renderAsInput(Args, CmdArgs);
-  }
-
   Args.AddAllArgs(CmdArgs, options::OPT_undef);
 
   const char *Exec = D.getClangProgramPath();
@@ -5436,6 +5412,32 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                        TC.useIntegratedAs()))
     CmdArgs.push_back("-faddrsig");
 
+  // Add the "-o out -x type src.c" flags last. This is done primarily to make
+  // the -cc1 command easier to edit when reproducing compiler crashes.
+  if (Output.getType() == types::TY_Dependencies) {
+    // Handled with other dependency code.
+  } else if (Output.isFilename()) {
+    CmdArgs.push_back("-o");
+    CmdArgs.push_back(Output.getFilename());
+  } else {
+    assert(Output.isNothing() && "Invalid output.");
+  }
+
+  addDashXForInput(Args, Input, CmdArgs);
+
+  ArrayRef<InputInfo> FrontendInputs = Input;
+  if (IsHeaderModulePrecompile)
+    FrontendInputs = ModuleHeaderInputs;
+  else if (Input.isNothing())
+    FrontendInputs = {};
+
+  for (const InputInfo &Input : FrontendInputs) {
+    if (Input.isFilename())
+      CmdArgs.push_back(Input.getFilename());
+    else
+      Input.getInputArg().renderAsInput(Args, CmdArgs);
+  }
+
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 73190fd5741a1..175e4b877ce94 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -253,14 +253,14 @@
 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // HOST-NOT: "-fcuda-is-device"
-// HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
-// HOST-NOSAVE-SAME: "-x" "cuda"
-// HOST-SAVE-SAME: "-x" "cuda-cpp-output"
 // There is only one GPU binary after combining it with fatbinary!
 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
 // There is only one GPU binary after combining it with fatbinary.
 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
+// HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
+// HOST-NOSAVE-SAME: "-x" "cuda"
+// HOST-SAVE-SAME: "-x" "cuda-cpp-output"
 
 // Match external assembler that uses compilation output.
 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip
index d5e1e7dd87859..229839db6c85d 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -80,9 +80,9 @@
 // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-emit-obj"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
+// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"
 // CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
-// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"
 
 //
 // Compile device code in b.hip to code object for gfx803.
@@ -152,9 +152,9 @@
 // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-emit-obj"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
+// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"
 // CHECK-SAME: {{.*}} "-o" [[B_OBJ_HOST:".*o"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
-// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"
 
 //
 // Link host objects.
diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c
index aee7e0dbb1d18..413e24bce110f 100644
--- a/clang/test/Driver/openmp-offload.c
+++ b/clang/test/Driver/openmp-offload.c
@@ -294,26 +294,29 @@
 //
 // Generate host BC file.
 //
-// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-o" "
+// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"
+// CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-SAME: "-o" "
 // CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
-// CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
 // CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
-// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 
 //
 // Compile for the powerpc device.
 //
-// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp"
+// CHK-COMMANDS-SAME: "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
 // CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-COMMANDS-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
-// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -323,14 +326,15 @@
 //
 // Compile for the x86 device.
 //
-// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp"  {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp"
+// CHK-COMMANDS-SAME: "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
 // CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-COMMANDS-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[T2PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
-// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-pic-level" "2" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -443,35 +447,35 @@
 // RUN:   | FileCheck -check-prefix=CHK-BUJOBS-ST %s
 
 // Create host BC.
-// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-BUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
-// CHK-BUJOBS-SAME: [[INPUT:[^\\/]+\.c]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-BUJOBS-SAME: [[INPUT:[^\\/]+\.c]]"
 
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E"  {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
 // CHK-BUJOBS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
-// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-BUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-BUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 
 // Create target 1 object.
-// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-BUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-BUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[T1PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
-// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-BUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-BUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
 
 // Create target 2 object.
-// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-BUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-BUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-BUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "c" "{{.*}}[[INPUT]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[T2PP:[^\\/]+\.i]]" "-x" "c" "{{.*}}[[INPUT]]"
-// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-BUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-BUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-BUJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
 // CHK-BUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -509,23 +513,23 @@
 // CHK-UBJOBS-SAME: [[HOSTPP:[^\\/]+\.i]],
 // CHK-UBJOBS-SAME: [[T1PP:[^\\/]+\.i]],
 // CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle"
-// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 // CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs=
 // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]],
 // CHK-UBJOBS-ST-SAME: [[T1PP:[^\\/,]+\.i]],
 // CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle"
-// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 
 // Create target 1 object.
-// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
 // CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
-// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -534,12 +538,12 @@
 // CHK-UBJOBS-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
 
 // Create target 2 object.
-// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
 // CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
-// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -605,32 +609,32 @@
 // CHK-UBUJOBS-SAME: [[HOSTPP:[^\\/]+\.i]],
 // CHK-UBUJOBS-SAME: [[T1PP:[^\\/]+\.i]],
 // CHK-UBUJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle"
-// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-UBUJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 
 // CHK-UBUJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBUJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs=
 // CHK-UBUJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]],
 // CHK-UBUJOBS-ST-SAME: [[T1PP:[^\\/,]+\.i]],
 // CHK-UBUJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle"
-// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
+// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
+// CHK-UBUJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
 
 // Create target 1 object.
-// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
-// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBUJOBS-SAME: [[T1OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
+// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBUJOBS-ST-SAME: [[T1BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T1PP]]"
 // CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-UBUJOBS-ST-SAME: [[T1ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T1BC]]"
 // CHK-UBUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-ibm-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
 // CHK-UBUJOBS-ST-SAME: [[T1OBJ:[^\\/]+\.o]]" "{{.*}}[[T1ASM]]"
 
 // Create target 2 object.
-// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
-// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]" "-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]"
+// CHK-UBUJOBS: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBUJOBS-SAME: [[T2OBJ:[^\\/]+\.o]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
+// CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
+// CHK-UBUJOBS-ST-SAME: [[T2BC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[T2PP]]"
 // CHK-UBUJOBS-ST: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-UBUJOBS-ST-SAME: [[T2ASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[T2BC]]"
 // CHK-UBUJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "x86_64-pc-linux-gnu" "-filetype" "obj" {{.*}}"-o" "
@@ -654,4 +658,4 @@
 // RUN:   %clang -### -no-canonical-prefixes -target powerpc64le-linux -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s
 
-// CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le-unknown-linux" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path"
+// CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" {{.*}}.c"

From 517e3cb0a57d0df784a4d246edda36ec2cc2c254 Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Thu, 23 May 2019 18:35:54 +0000
Subject: [PATCH 0067/1176] Test commit access by removing a empty line

llvm-svn: 361531
---
 lldb/source/Core/ModuleList.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp
index e02c6e5af27b6..fd943d7fc1d63 100644
--- a/lldb/source/Core/ModuleList.cpp
+++ b/lldb/source/Core/ModuleList.cpp
@@ -114,7 +114,6 @@ bool ModuleListProperties::SetClangModulesCachePath(llvm::StringRef path) {
       nullptr, ePropertyClangModulesCachePath, path);
 }
 
-
 ModuleList::ModuleList()
     : m_modules(), m_modules_mutex(), m_notifier(nullptr) {}
 

From 4fb41a24bcf281afcbed45fff7fdd1129c3ea0d5 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Thu, 23 May 2019 18:43:19 +0000
Subject: [PATCH 0068/1176] [git] Be more specific when looking for llvm-svn

Summary:
A commit may, for some reason, have `llvm-svn:` in it multiple times. It may even take up the whole line and look identical to what gets added automatically when svn commits land in github.

To workaround this, make changes to both lookups:

1) When doing the git -> svn lookup, make sure to go through the whole message, and:
 a) Only look for llvm-svn starting at the beginning of the line (excluding the whitespace that `git log` adds).
 b) Take the last one (at the end of the commit message), if there are multiple matches.

2) When doing the svn -> git lookup, look through a sizeable but still reasonably small number of git commits (10k, about 4-5 months right now), and:
 a) Only consider commits with the '^llvm-svn: NNNNNN' we expect, and
 b) Only consider those that also follow the same git -> svn matching above. (Error if it's not exactly one commit).

Reviewers: jyknight

Reviewed By: jyknight

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60017

llvm-svn: 361532
---
 llvm/utils/git-svn/git-llvm | 62 +++++++++++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm
index 28a0a9bbf4d16..53abc538ccaf3 100755
--- a/llvm/utils/git-svn/git-llvm
+++ b/llvm/utils/git-svn/git-llvm
@@ -413,12 +413,22 @@ def cmd_push(args):
 
 
 def lookup_llvm_svn_id(git_commit_hash):
-    commit_msg = git('log', '-1', git_commit_hash, ignore_errors=True)
+    # Use --format=%b to get the raw commit message, without any extra
+    # whitespace.
+    commit_msg = git('log', '-1', '--format=%b', git_commit_hash,
+                     ignore_errors=True)
     if len(commit_msg) == 0:
       die("Can't find git commit " + git_commit_hash)
-    svn_match = re.search('llvm-svn: (\d{5,7})$', commit_msg)
+    # If a commit has multiple "llvm-svn:" lines (e.g. if the commit is
+    # reverting/quoting a previous commit), choose the last one, which should
+    # be the authoritative one.
+    svn_match_iter = re.finditer('^llvm-svn: (\d{5,7})$', commit_msg,
+                                 re.MULTILINE)
+    svn_match = None
+    for m in svn_match_iter:
+      svn_match = m.group(1)
     if svn_match:
-      return int(svn_match.group(1))
+      return int(svn_match)
     die("Can't find svn revision in git commit " + git_commit_hash)
 
 
@@ -437,6 +447,28 @@ def cmd_svn_lookup(args):
     log('r' + str(lookup_llvm_svn_id(args.git_commit_hash)))
 
 
+def git_hash_by_svn_rev(svn_rev):
+    '''Find the git hash for a given svn revision.
+
+    This check is paranoid: 'llvm-svn: NNNNNN' could exist on its own line
+    somewhere else in the commit message. Look in the full log message to see
+    if it's actually on the last line.
+
+    Since this check is expensive (we're searching every single commit), limit
+    to the past 10k commits (about 5 months).
+    '''
+    possible_hashes = git(
+        'log', '--format=%H', '--grep', '^llvm-svn: %d$' % svn_rev,
+        'HEAD~10000...HEAD').split('\n')
+    matching_hashes = [h for h in possible_hashes
+                       if lookup_llvm_svn_id(h) == svn_rev]
+    if len(matching_hashes) > 1:
+      die("svn revision r%d has ambiguous commits: %s" % (
+          svn_rev, ', '.join(matching_hashes)))
+    elif len(matching_hashes) < 1:
+      die("svn revision r%d matches no commits" % svn_rev)
+    return matching_hashes[0]
+
 def cmd_revert(args):
     '''Revert a commit by either SVN id (rNNNNNN) or git hash. This also
     populates the git commit message with both the SVN revision and git hash of
@@ -459,24 +491,28 @@ def cmd_revert(args):
     # the git commit.
     svn_match = re.match('^r(\d{5,7})$', args.revision)
     if svn_match:
-      svn_rev = svn_match.group(1)
+      # If the revision looks like rNNNNNN, use that as the svn revision, and
+      # grep through git commits to find which one corresponds to that svn
+      # revision.
+      svn_rev = int(svn_match.group(1))
+      git_hash = git_hash_by_svn_rev(svn_rev)
     else:
-      svn_rev = str(lookup_llvm_svn_id(args.revision))
+      # Otherwise, this looks like a git hash, so we just need to grab the svn
+      # revision from the end of the commit message.
+      # Get the actual git hash in case the revision is something like "HEAD~1"
+      git_hash = git('rev-parse', '--verify', args.revision + '^{commit}')
+      svn_rev = lookup_llvm_svn_id(git_hash)
 
-    oneline = git('log', '--all',  '-1', '--format=%H %s', '--grep',
-                  'llvm-svn: ' + svn_rev)
-    if len(oneline) == 0:
-      die("Can't find svn revision r" + svn_rev)
-    (git_hash, msg) = oneline.split(' ', 1)
+    msg = git('log', '-1', '--format=%s', git_hash)
 
-    log_verbose('Ready to revert r%s/%s: "%s"' % (svn_rev, git_hash, msg))
+    log_verbose('Ready to revert r%d (%s): "%s"' % (svn_rev, git_hash, msg))
 
     revert_args = ['revert', '--no-commit', git_hash]
     # TODO: Running --edit doesn't seem to work, with errors that stdin is not
     # a tty.
     commit_args = [
         'commit', '-m', 'Revert ' + msg,
-        '-m', 'This reverts r%s (git commit %s)' % (svn_rev, git_hash)]
+        '-m', 'This reverts r%d (git commit %s)' % (svn_rev, git_hash)]
     if args.dry_run:
       log("Would have run the following commands, if this weren't a dry run:\n"
           '1) git %s\n2) git %s' % (
@@ -487,7 +523,7 @@ def cmd_revert(args):
     git(*revert_args)
     commit_log = git(*commit_args)
 
-    log('Created revert of r%s: %s' % (svn_rev, commit_log))
+    log('Created revert of r%d: %s' % (svn_rev, commit_log))
     log("Run 'git llvm push -n' to inspect your changes and "
         "run 'git llvm push' when ready")
 

From 3249be1e03c672ffe68f5ad8e0e31cdcb8e441f8 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 23 May 2019 18:46:03 +0000
Subject: [PATCH 0069/1176] [InstCombine] be more careful when transforming a
 shuffle mask

This is reduced from a fuzzer test:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=14890

Usually, demanded elements should be able to simplify shuffle
mask elements that are pointing to undef elements of its source
operands, but that doesn't happen in the test case.

llvm-svn: 361533
---
 .../InstCombine/InstCombineVectorOps.cpp      | 25 ++++++++++++++++---
 .../Transforms/InstCombine/vec_shuffle.ll     | 17 +++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index ecc4df179efd4..c2ea0733a48e4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1635,8 +1635,8 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
   // We limit this transform to power-of-2 types because we expect that the
   // backend can convert the simplified IR patterns to identical nodes as the
   // original IR.
-  // TODO: If we can verify that behavior for arbitrary types, the power-of-2
-  // checks can be removed.
+  // TODO: If we can verify the same behavior for arbitrary types, the
+  //       power-of-2 checks can be removed.
   Value *X = Shuffle0->getOperand(0);
   Value *Y = Shuffle1->getOperand(0);
   if (X->getType() != Y->getType() ||
@@ -1663,10 +1663,27 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
   for (int i = 0, e = Mask.size(); i != e; ++i) {
     if (Mask[i] == -1)
       continue;
-    if (Mask[i] < WideElts)
+
+    // If this shuffle is choosing an undef element from 1 of the sources, that
+    // element is undef.
+    if (Mask[i] < WideElts) {
+      if (Shuffle0->getMaskValue(Mask[i]) == -1)
+        continue;
+    } else {
+      if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
+        continue;
+    }
+
+    // If this shuffle is choosing from the 1st narrow op, the mask element is
+    // the same. If this shuffle is choosing from the 2nd narrow op, the mask
+    // element is offset down to adjust for the narrow vector widths.
+    if (Mask[i] < WideElts) {
+      assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
       NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
-    else
+    } else {
+      assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
       NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+    }
   }
   return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
 }
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 3dcd7d7bf335f..65af43ea2a1fa 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1235,3 +1235,20 @@ define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> %
   %s3 = shufflevector <4 x double> %s2, <4 x double> %s1, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
   ret <4 x double> %s3
 }
+
+; Demanded vector elements may not be able to simplify a shuffle mask
+; before we try to narrow it. This used to crash.
+
+define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, <4 x float>* %p) {
+; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt(
+; CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    store <4 x float> [[I]], <4 x float>* [[P:%.*]], align 16
+; CHECK-NEXT:    ret <4 x float> [[WIDEN]]
+;
+  %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %ext2 = extractelement <2 x float> %x, i32 0
+  %I = insertelement <4 x float> %widen, float %ext2, i16 0
+  store <4 x float> %I, <4 x float>* %p
+  ret <4 x float> %widen
+}

From 267ac925fb4ed2dad94177807d9a38ba83f2f4f0 Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 18:51:02 +0000
Subject: [PATCH 0070/1176] [NewPassManager] Add tuning option:
 SLPVectorization [clang-change]

Summary:
NewPassManager is not using CodeGenOpts values before this patch.
[to be coupled with D61616]

Reviewers: chandlerc

Subscribers: jlebar, cfe-commits, llvm-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61617

llvm-svn: 361534
---
 clang/lib/CodeGen/BackendUtil.cpp   |  9 ++++++++-
 clang/test/CodeGen/loop-vectorize.c | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGen/loop-vectorize.c

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 32eb776a6d218..1dbeec1c17833 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1050,7 +1050,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
                           CodeGenOpts.DebugInfoForProfiling);
   }
 
-  PassBuilder PB(TM.get(), PipelineTuningOptions(), PGOOpt);
+  PipelineTuningOptions PTO;
+  // For historical reasons, loop interleaving is set to mirror setting for loop
+  // unrolling.
+  PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
+  PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
+  PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
+
+  PassBuilder PB(TM.get(), PTO, PGOOpt);
 
   // Attempt to load pass plugins and register their callbacks with PB.
   for (auto &PluginFN : CodeGenOpts.PassPlugins) {
diff --git a/clang/test/CodeGen/loop-vectorize.c b/clang/test/CodeGen/loop-vectorize.c
new file mode 100644
index 0000000000000..28bd50bbd2e92
--- /dev/null
+++ b/clang/test/CodeGen/loop-vectorize.c
@@ -0,0 +1,25 @@
+// RUN: %clang -target x86_64 -S -c -O1 -fvectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
+// RUN: %clang -target x86_64 -S -c -O1 -fno-vectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
+// RUN: %clang -target x86_64 -fexperimental-new-pass-manager -S -c -O1 -fvectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
+// RUN: %clang -target x86_64 -fexperimental-new-pass-manager -S -c -O1 -fno-vectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
+
+// CHECK-ENABLE-VECT-LABEL: @for_test()
+// CHECK-ENABLE-VECT: fmul <{{[0-9]+}} x double>
+
+// CHECK-DISABLE-VECT-LABEL: @for_test()
+// CHECK-DISABLE-VECT: fmul double
+// CHECK-DISABLE-VECT-NOT: fmul <{{[0-9]+}} x double>
+
+#include <stdio.h>
+
+void for_test() {
+  double A[1000], B[1000];
+  int L = 500;
+  for (int i = 0; i < L; i++) {
+    A[i] = i;
+  }
+  for (int i = 0; i < L; i++) {
+    B[i] = A[i]*5;
+  }
+  printf("%lf %lf\n", A[0], B[0]);
+}

From e836096f01f1b9d63359c5b781fcfb9d1e3c39d8 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Thu, 23 May 2019 18:51:52 +0000
Subject: [PATCH 0071/1176] [CMake] Fixing errors in r361513

Summary:
I somehow messed this up. libcxx appends the subdirectories itself, so we don't need to add them here.

Also, r361513 broke the "projects" build of libcxx because it always included the extra targets.

Reviewers: lebedev.ri, mclow.lists

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62336

llvm-svn: 361535
---
 llvm/runtimes/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index b9436025f5cd1..9016efe600e79 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -209,7 +209,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
 
 else() # if this is included from LLVM's CMake
   include(LLVMExternalProjectUtils)
-  if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR)
+  if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR AND "libcxx" IN_LIST LLVM_ENABLE_RUNTIMES)
     set(LIBCXX_HEADER_DIR ${LLVM_INCLUDE_DIR}/c++/v1/)
     set(CXX_HEADER_TARGET runtime-libcxx-headers)
     add_subdirectory(${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}/include ${CXX_HEADER_TARGET})

From a8e0d49c0ce0f1015cc20d3fcac823facc69df66 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 23 May 2019 18:55:00 +0000
Subject: [PATCH 0072/1176] Fix unresolved symbols when linking
 tools/clang/unittests/Tooling/ToolingTests

Summary: Add correct cmake dependencies so that `ToolingTests` link
successfully.

Patch by Guanzhong Chen

Reviewers: tlively, aheejin

Reviewed By: tlively

Subscribers: mgorny, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62333

llvm-svn: 361536
---
 clang/unittests/Tooling/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index af8a35d92517d..111e07e8c907f 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -71,6 +71,7 @@ target_link_libraries(ToolingTests
   clangToolingCore
   clangToolingInclusions
   clangToolingRefactor
+  LLVMTestingSupport
   )
 
 
From 63729b0c49d29ea96cb89c54187f5a3d2e1a0602 Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 19:07:41 +0000
Subject: [PATCH 0073/1176] [SLPVectorizer] Set flag to previous default.

Summary:
The refactoring in r360276 moved the `RunSLPVectorization` flag and added the default explicitly. The default should have been `false`, as before.

The new pass manager used to have SLPVectorization on by default, now it's off in opt, and needs D61617 checked in to enable it in clang.

Reviewers: chandlerc

Subscribers: mehdi_amini, jlebar, eraman, steven_wu, dexonsmith, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61955

llvm-svn: 361537
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 llvm/test/Other/new-pm-defaults.ll              | 1 -
 llvm/test/Other/new-pm-thinlto-defaults.ll      | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5b40e5ef1f08a..c0c2c85b8f483 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -106,7 +106,7 @@ using namespace slpvectorizer;
 STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
 
 cl::opt<bool>
-    llvm::RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
+    llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
                               cl::desc("Run the SLP vectorization passes"));
 
 static cl::opt<int>
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 317bffcefdc12..d29c127d98453 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -243,7 +243,6 @@
 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-O-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 079cea9255e5b..8dc6bf7a239c2 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -217,7 +217,6 @@
 ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
-; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass

From cb64cd9b6041c2c8aaca6d01023e5d7963873701 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 19:15:05 +0000
Subject: [PATCH 0074/1176] [NFC] UpdateTestChecks: asm.py: fix whitespace
 issue

llvm-svn: 361538
---
 llvm/utils/UpdateTestChecks/asm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 459aa42381ab7..78da89d2730ca 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -29,7 +29,7 @@ class string:
 
 ASM_FUNCTION_AARCH64_RE = re.compile(
      r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
-     r'(?:[ \t]+.cfi_startproc\n)?'  # drop optional cfi noise 
+     r'(?:[ \t]+.cfi_startproc\n)?'  # drop optional cfi noise
      r'(?P<body>.*?)\n'
      # This list is incomplete
      r'.Lfunc_end[0-9]+:\n',

From a83bf477702e96f2ca9e1e21a59704b11f58f958 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 23 May 2019 19:32:46 +0000
Subject: [PATCH 0075/1176] [gdb-remote] Fix more issues with thread_result_t

More fixes needed to un-break the Windows bot.

llvm-svn: 361539
---
 .../Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp   | 6 +++---
 lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index 9c3a02e77e58a..3886b6cfe009e 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -541,7 +541,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
 #if defined(HAVE_LIBCOMPRESSION)
   if (m_compression_type == CompressionType::ZlibDeflate ||
       m_compression_type == CompressionType::LZFSE ||
-      m_compression_type == CompressionType::LZ4 || 
+      m_compression_type == CompressionType::LZ4 ||
       m_compression_type == CompressionType::LZMA) {
     compression_algorithm compression_type;
     if (m_compression_type == CompressionType::LZFSE)
@@ -578,7 +578,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
     if (decompressed_bufsize != ULONG_MAX && decompressed_buffer != nullptr) {
       decompressed_bytes = compression_decode_buffer(
           decompressed_buffer, decompressed_bufsize,
-          (uint8_t *)unescaped_content.data(), unescaped_content.size(), 
+          (uint8_t *)unescaped_content.data(), unescaped_content.size(),
           m_decompression_scratch, compression_type);
     }
   }
@@ -925,7 +925,7 @@ GDBRemoteCommunication::ListenThread(lldb::thread_arg_t arg) {
         eConnectionStatusSuccess)
       comm->SetConnection(nullptr);
   }
-  return nullptr;
+  return {};
 }
 
 Status GDBRemoteCommunication::StartDebugserverProcess(
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 422b092895592..c4df4e716d0df 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -3913,7 +3913,7 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
                 ") thread exiting...",
                 __FUNCTION__, arg, process->GetID());
 
-  return nullptr;
+  return {};
 }
 
 // uint32_t

From e4b27869c60cb1311c4396e057c21573e19e62cd Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 19:35:40 +0000
Subject: [PATCH 0076/1176] [NewPassManager] Add tuning option: LoopUnrolling
 [NFC].

Summary: Mirror tuning option from old pass manager in new pass manager.

Reviewers: chandlerc

Subscribers: jlebar, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61618

llvm-svn: 361540
---
 llvm/include/llvm/Passes/PassBuilder.h | 3 +++
 llvm/lib/Passes/PassBuilder.cpp        | 9 ++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 69756dd9b46f6..383f49e0d7585 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -85,6 +85,9 @@ class PipelineTuningOptions {
   /// is that of the flag: `vectorize-slp`.
   bool SLPVectorization;
 
+  /// Tuning option to enable/disable loop unrolling. Its default value is true.
+  bool LoopUnrolling;
+
   /// Tuning option to cap the number of calls to retrive clobbering accesses in
   /// MemorySSA, in LICM.
   unsigned LicmMssaOptCap;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c7976ce2702d2..1d17f91d5a8d3 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -217,6 +217,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = EnableLoopInterleaving;
   LoopVectorization = EnableLoopVectorization;
   SLPVectorization = RunSLPVectorization;
+  LoopUnrolling = true;
   LicmMssaOptCap = SetLicmMssaOptCap;
   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
 }
@@ -459,8 +460,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
   // because it changes IR to makes profile annotation in back compile
   // inaccurate.
-  if (Phase != ThinLTOPhase::PreLink || !PGOOpt ||
-      PGOOpt->Action != PGOOptions::SampleUse)
+  if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+       PGOOpt->Action != PGOOptions::SampleUse) &&
+      PTO.LoopUnrolling)
     LPM2.addPass(LoopFullUnrollPass(Level));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
@@ -907,7 +909,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
     OptimizePM.addPass(
         createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
   }
-  OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+  if (PTO.LoopUnrolling)
+    OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
   OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());

From 5c714cbdd83166e10b27b8e5ea2700654da2e90b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 23 May 2019 19:38:14 +0000
Subject: [PATCH 0077/1176] AMDGPU: Correct maximum possible private allocation
 size

We were assuming a much larger possible per-wave visible stack
allocation than is possible:

https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/faa3ae51388517353afcdaf9c16621f879ef0a59/src/core/runtime/amd_gpu_agent.cpp#L70

Based on this, we can assume the high 15 bits of a frame index or sret
are 0. The frame index value is the per-lane offset, so the maximum
frame index value is MAX_WAVE_SCRATCH / wavesize.

Remove the corresponding subtarget feature and option that made
this configurable.

llvm-svn: 361541
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  7 -----
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp    |  1 -
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      | 13 +++++---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 21 ++++---------
 .../CodeGen/AMDGPU/frame-index-elimination.ll |  6 ++--
 llvm/test/CodeGen/AMDGPU/function-returns.ll  | 20 ++++++++++++
 .../CodeGen/AMDGPU/huge-private-buffer.ll     | 31 +++++++++++++------
 7 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 341ef73a21c91..9938eeaa528eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -458,13 +458,6 @@ def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
 
-def FeatureEnableHugePrivateBuffer : SubtargetFeature<
-  "huge-private-buffer",
-  "EnableHugePrivateBuffer",
-  "true",
-  "Enable private/scratch buffer sizes greater than 128 GB"
->;
-
 def FeatureDumpCode : SubtargetFeature <"DumpCode",
   "DumpCode",
   "true",
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a88218f68b543..09b806bd06a55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -190,7 +190,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     EnableCuMode(false),
     TrapHandler(false),
 
-    EnableHugePrivateBuffer(false),
     EnableLoadStoreOpt(false),
     EnableUnsafeDSOffsetFolding(false),
     EnableSIScheduler(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 1ef72622980bf..34166aacf41a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -299,7 +299,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
   bool TrapHandler;
 
   // Used as options.
-  bool EnableHugePrivateBuffer;
   bool EnableLoadStoreOpt;
   bool EnableUnsafeDSOffsetFolding;
   bool EnableSIScheduler;
@@ -377,6 +376,9 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
   SITargetLowering TLInfo;
   SIFrameLowering FrameLowering;
 
+  // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
+  static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
+
 public:
   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                const GCNTargetMachine &TM);
@@ -436,6 +438,11 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return Log2_32(WavefrontSize);
   }
 
+  /// Return the number of high bits known to be zero fror a frame index.
+  unsigned getKnownHighZeroBitsForFrameIndex() const {
+    return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+  }
+
   int getLDSBankCount() const {
     return LDSBankCount;
   }
@@ -526,10 +533,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
   }
 
-  bool enableHugePrivateBuffer() const {
-    return EnableHugePrivateBuffer;
-  }
-
   bool unsafeDSOffsetFoldingEnabled() const {
     return EnableUnsafeDSOffsetFolding;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c4c0e4047fcfb..c2cda5ef4d7ce 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -93,12 +93,6 @@ static cl::opt<bool> EnableVGPRIndexMode(
   cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
   cl::init(false));
 
-static cl::opt<unsigned> AssumeFrameIndexHighZeroBits(
-  "amdgpu-frame-index-zero-bits",
-  cl::desc("High bits of frame index assumed to be zero"),
-  cl::init(5),
-  cl::ReallyHidden);
-
 static cl::opt<bool> DisableLoopAlignment(
   "amdgpu-disable-loop-alignment",
   cl::desc("Do not align and prefetch loops"),
@@ -2059,13 +2053,14 @@ SDValue SITargetLowering::LowerFormalArguments(
     Reg = MF.addLiveIn(Reg, RC);
     SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
 
-    if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) {
+    if (Arg.Flags.isSRet()) {
       // The return object should be reasonably addressable.
 
       // FIXME: This helps when the return is a real sret. If it is a
       // automatically inserted sret (i.e. CanLowerReturn returns false), an
       // extra copy is inserted in SelectionDAGBuilder which obscures this.
-      unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits;
+      unsigned NumBits
+        = 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex();
       Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
         DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
     }
@@ -9970,14 +9965,10 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
   TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
                                                 DAG, Depth);
 
-  if (getSubtarget()->enableHugePrivateBuffer())
-    return;
-
-  // Technically it may be possible to have a dispatch with a single workitem
-  // that uses the full private memory size, but that's not really useful. We
-  // can't use vaddr in MUBUF instructions if we don't know the address
+  // Set the high bits to zero based on the maximum allowed scratch size per
+  // wave. We can't use vaddr in MUBUF instructions if we don't know the address
   // calculation won't overflow, so assume the sign bit is never set.
-  Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
+  Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
 }
 
 unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 28521af83e04d..92a255ceae6de 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -60,7 +60,7 @@ define void @func_add_constant_to_fi_i32() #0 {
 ; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6
 ; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]
 
-; GCN-NEXT: v_mul_lo_u32 v0, v0, 9
+; GCN-NEXT: v_mul_u32_u24_e32 v0, 9, v0
 ; GCN-NOT: v_mov
 ; GCN: ds_write_b32 v0, v0
 define void @func_other_fi_user_i32() #0 {
@@ -172,7 +172,7 @@ ret:
 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6
 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], s6, [[SCALED]]
 
-; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
+; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]]
 ; GCN: ds_write_b32 v0, [[VZ]]
 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
   %alloca0 = alloca [128 x i32], align 4, addrspace(5)
@@ -196,7 +196,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
 ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[DIFF]]
 ; GFX9: v_add_u32_e32 [[VZ:v[0-9]+]], [[OFFSET]], [[SCALED]]
 
-; GCN: v_mul_lo_u32 [[VZ]], [[VZ]], 9
+; GCN: v_mul_u32_u24_e32 [[VZ]], 9, [[VZ]]
 ; GCN: ds_write_b32 v0, [[VZ]]
 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 {
   %alloca0 = alloca [128 x i32], align 4, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index f9631e615c9db..8e73ee3c1f105 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -570,4 +570,24 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ret { <3 x float>, i32 } %insert.4
 }
 
+; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
+; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
+; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
+
+; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
+; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
+; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
+define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 {
+  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
+
+  %lshr0 = lshr i32 %arg0.int, 16
+  %lshr1 = lshr i32 %arg0.int, 17
+  %lshr2 = lshr i32 %arg0.int, 18
+
+  store volatile i32 %lshr0, i32 addrspace(3)* undef
+  store volatile i32 %lshr1, i32 addrspace(3)* undef
+  store volatile i32 %lshr2, i32 addrspace(3)* undef
+  ret void
+}
+
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
index 8e54dcbd16934..dfd75235f808e 100644
--- a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
@@ -1,31 +1,42 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_small:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
+; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
+define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
+  %alloca = alloca i32, align 4, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  %toint = ptrtoint i32 addrspace(5)* %alloca to i32
+  %masked = and i32 %toint, 65535
+  store volatile i32 %masked, i32 addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
 ; GCN-NOT: [[FI]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
+define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
   %toint = ptrtoint i32 addrspace(5)* %alloca to i32
-  %masked = and i32 %toint, 2147483647
+  %masked = and i32 %toint, 131071
   store volatile i32 %masked, i32 addrspace(1)* undef
   ret void
 }
 
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18:
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
-; GCN-DAG: buffer_store_dword
-; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
+; GCN-NOT: [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
+define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
   %toint = ptrtoint i32 addrspace(5)* %alloca to i32
-  %masked = and i32 %toint, 2147483647
+  %masked = and i32 %toint, 262143
   store volatile i32 %masked, i32 addrspace(1)* undef
   ret void
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "target-features"="+huge-private-buffer" }

From 9925ef78ce2c5d464a9148493892847ee505853a Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 19:51:16 +0000
Subject: [PATCH 0078/1176] Update breaking test.

llvm-svn: 361542
---
 clang/test/CodeGen/loop-vectorize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/loop-vectorize.c b/clang/test/CodeGen/loop-vectorize.c
index 28bd50bbd2e92..8c81fbebecb8c 100644
--- a/clang/test/CodeGen/loop-vectorize.c
+++ b/clang/test/CodeGen/loop-vectorize.c
@@ -10,7 +10,7 @@
 // CHECK-DISABLE-VECT: fmul double
 // CHECK-DISABLE-VECT-NOT: fmul <{{[0-9]+}} x double>
 
-#include <stdio.h>
+int printf(const char * restrict format, ...);
 
 void for_test() {
   double A[1000], B[1000];

From f81ebfb045b8b85b460a51d48f652741cce303a3 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 23 May 2019 19:54:41 +0000
Subject: [PATCH 0079/1176] UpdateTestChecks: ppc32 triple support

Summary:
Appears identical to powerpc64{,le}.
Regenerate test that is being affected by upcoming patch.

Reviewers: RKSimon

Reviewed By: RKSimon

Subscribers: nemanjai, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62339

llvm-svn: 361543
---
 llvm/test/CodeGen/PowerPC/vec_splat.ll | 292 ++++++++++++++++++++-----
 llvm/utils/UpdateTestChecks/asm.py     |   7 +-
 2 files changed, 245 insertions(+), 54 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll
index 7829f6fbede34..7c048ff371085 100644
--- a/llvm/test/CodeGen/PowerPC/vec_splat.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll
@@ -1,71 +1,261 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g3 | FileCheck %s --check-prefixes=ALL,G3
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s --check-prefixes=ALL,G5
+
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g3 | \
-; RUN:    grep stfs | count 4
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 -o %t
-; RUN: grep vspltw %t | count 2
-; RUN: grep vsplti %t | count 3
-; RUN: grep vsplth %t | count 1
 
-        %f4 = type <4 x float>
-        %i4 = type <4 x i32>
+%f4 = type <4 x float>
+%i4 = type <4 x i32>
 
 define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
-        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]
-        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
-        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
-        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
-        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
-        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
-        store %f4 %R, %f4* %P
-        ret void
+; G3-LABEL: splat:
+; G3:       # %bb.0:
+; G3-NEXT:    lfs 0, 0(4)
+; G3-NEXT:    lfs 2, 8(4)
+; G3-NEXT:    lfs 3, 4(4)
+; G3-NEXT:    lfs 4, 12(4)
+; G3-NEXT:    fadds 0, 0, 1
+; G3-NEXT:    fadds 2, 2, 1
+; G3-NEXT:    fadds 3, 3, 1
+; G3-NEXT:    fadds 1, 4, 1
+; G3-NEXT:    stfs 1, 12(3)
+; G3-NEXT:    stfs 2, 8(3)
+; G3-NEXT:    stfs 3, 4(3)
+; G3-NEXT:    stfs 0, 0(3)
+; G3-NEXT:    blr
+;
+; G5-LABEL: splat:
+; G5:       # %bb.0:
+; G5-NEXT:    stwu 1, -32(1)
+; G5-NEXT:    stfs 1, 16(1)
+; G5-NEXT:    addi 5, 1, 16
+; G5-NEXT:    lvx 2, 0, 5
+; G5-NEXT:    lvx 3, 0, 4
+; G5-NEXT:    vspltw 2, 2, 0
+; G5-NEXT:    vaddfp 2, 3, 2
+; G5-NEXT:    stvx 2, 0, 3
+; G5-NEXT:    addi 1, 1, 32
+; G5-NEXT:    blr
+  %tmp = insertelement %f4 undef, float %X, i32 0   ; <%f4> [#uses=1]
+  %tmp2 = insertelement %f4 %tmp, float %X, i32 1   ; <%f4> [#uses=1]
+  %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    ; <%f4> [#uses=1]
+  %tmp6 = insertelement %f4 %tmp4, float %X, i32 3    ; <%f4> [#uses=1]
+  %q = load %f4, %f4* %Q         ; <%f4> [#uses=1]
+  %R = fadd %f4 %q, %tmp6    ; <%f4> [#uses=1]
+  store %f4 %R, %f4* %P
+  ret void
 }
 
 define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
-        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]
-        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
-        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
-        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
-        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
-        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
-        store %i4 %R, %i4* %P
-        ret void
+; G3-LABEL: splat_i4:
+; G3:       # %bb.0:
+; G3-NEXT:    lwz 6, 0(4)
+; G3-NEXT:    lwz 7, 8(4)
+; G3-NEXT:    lwz 8, 4(4)
+; G3-NEXT:    lwz 4, 12(4)
+; G3-NEXT:    add 6, 6, 5
+; G3-NEXT:    add 8, 8, 5
+; G3-NEXT:    add 7, 7, 5
+; G3-NEXT:    add 4, 4, 5
+; G3-NEXT:    stw 4, 12(3)
+; G3-NEXT:    stw 7, 8(3)
+; G3-NEXT:    stw 8, 4(3)
+; G3-NEXT:    stw 6, 0(3)
+; G3-NEXT:    blr
+;
+; G5-LABEL: splat_i4:
+; G5:       # %bb.0:
+; G5-NEXT:    stwu 1, -32(1)
+; G5-NEXT:    stw 5, 16(1)
+; G5-NEXT:    addi 5, 1, 16
+; G5-NEXT:    lvx 2, 0, 5
+; G5-NEXT:    lvx 3, 0, 4
+; G5-NEXT:    vspltw 2, 2, 0
+; G5-NEXT:    vadduwm 2, 3, 2
+; G5-NEXT:    stvx 2, 0, 3
+; G5-NEXT:    addi 1, 1, 32
+; G5-NEXT:    blr
+  %tmp = insertelement %i4 undef, i32 %X, i32 0     ; <%i4> [#uses=1]
+  %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1     ; <%i4> [#uses=1]
+  %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2    ; <%i4> [#uses=1]
+  %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3    ; <%i4> [#uses=1]
+  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
+  %R = add %i4 %q, %tmp6    ; <%i4> [#uses=1]
+  store %i4 %R, %i4* %P
+  ret void
 }
 
 define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
-        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
-        %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >             ; <%i4> [#uses=1]
-        store %i4 %R, %i4* %P
-        ret void
+; G3-LABEL: splat_imm_i32:
+; G3:       # %bb.0:
+; G3-NEXT:    lwz 5, 0(4)
+; G3-NEXT:    lwz 6, 8(4)
+; G3-NEXT:    lwz 7, 4(4)
+; G3-NEXT:    lwz 4, 12(4)
+; G3-NEXT:    addi 5, 5, -1
+; G3-NEXT:    addi 7, 7, -1
+; G3-NEXT:    addi 6, 6, -1
+; G3-NEXT:    addi 4, 4, -1
+; G3-NEXT:    stw 4, 12(3)
+; G3-NEXT:    stw 6, 8(3)
+; G3-NEXT:    stw 7, 4(3)
+; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    blr
+;
+; G5-LABEL: splat_imm_i32:
+; G5:       # %bb.0:
+; G5-NEXT:    lvx 2, 0, 4
+; G5-NEXT:    vspltisb 3, -1
+; G5-NEXT:    vadduwm 2, 2, 3
+; G5-NEXT:    stvx 2, 0, 3
+; G5-NEXT:    blr
+  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
+  %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >       ; <%i4> [#uses=1]
+  store %i4 %R, %i4* %P
+  ret void
 }
 
 define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
-        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
-        %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >         ; <%i4> [#uses=1]
-        store %i4 %R, %i4* %P
-        ret void
+; G3-LABEL: splat_imm_i16:
+; G3:       # %bb.0:
+; G3-NEXT:    lwz 5, 0(4)
+; G3-NEXT:    lwz 6, 8(4)
+; G3-NEXT:    lwz 7, 4(4)
+; G3-NEXT:    lwz 4, 12(4)
+; G3-NEXT:    addi 5, 5, 1
+; G3-NEXT:    addi 7, 7, 1
+; G3-NEXT:    addi 6, 6, 1
+; G3-NEXT:    addi 4, 4, 1
+; G3-NEXT:    addis 5, 5, 1
+; G3-NEXT:    addis 7, 7, 1
+; G3-NEXT:    addis 6, 6, 1
+; G3-NEXT:    addis 4, 4, 1
+; G3-NEXT:    stw 4, 12(3)
+; G3-NEXT:    stw 6, 8(3)
+; G3-NEXT:    stw 7, 4(3)
+; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    blr
+;
+; G5-LABEL: splat_imm_i16:
+; G5:       # %bb.0:
+; G5-NEXT:    lvx 2, 0, 4
+; G5-NEXT:    vspltish 3, 1
+; G5-NEXT:    vadduwm 2, 2, 3
+; G5-NEXT:    stvx 2, 0, 3
+; G5-NEXT:    blr
+  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
+  %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >   ; <%i4> [#uses=1]
+  store %i4 %R, %i4* %P
+  ret void
 }
 
 define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind {
-        %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0           
-        %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1 
-        %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2 
-        %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3
-        %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4 
-        %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5
-        %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6 
-        %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7 
-        %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>  
-        store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst
-        ret void
+; G3-LABEL: splat_h:
+; G3:       # %bb.0:
+; G3-NEXT:    sth 3, 14(4)
+; G3-NEXT:    sth 3, 12(4)
+; G3-NEXT:    sth 3, 10(4)
+; G3-NEXT:    sth 3, 8(4)
+; G3-NEXT:    sth 3, 6(4)
+; G3-NEXT:    sth 3, 4(4)
+; G3-NEXT:    sth 3, 2(4)
+; G3-NEXT:    sth 3, 0(4)
+; G3-NEXT:    blr
+;
+; G5-LABEL: splat_h:
+; G5:       # %bb.0:
+; G5-NEXT:    stwu 1, -32(1)
+; G5-NEXT:    sth 3, 16(1)
+; G5-NEXT:    addi 3, 1, 16
+; G5-NEXT:    lvx 2, 0, 3
+; G5-NEXT:    vsplth 2, 2, 0
+; G5-NEXT:    stvx 2, 0, 4
+; G5-NEXT:    addi 1, 1, 32
+; G5-NEXT:    blr
+  %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0
+  %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1
+  %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2
+  %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3
+  %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4
+  %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5
+  %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6
+  %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7
+  %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>
+  store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst
+  ret void
 }
 
 define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-        %tmp = load <16 x i8>, <16 x i8>* %B               ; <<16 x i8>> [#uses=1]
-        %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>            ; <<16 x i8>> [#uses=1]
-        %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
- 15, i16 15, i16 15 > to <16 x i8>)             ; <<16 x i8>> [#uses=1]
-        %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8>          ; <<16 x i8>> [#uses=1]
-        store <16 x i8> %tmp4.u, <16 x i8>* %A
-        ret void
+; G3-LABEL: spltish:
+; G3:       # %bb.0:
+; G3-NEXT:    stwu 1, -48(1)
+; G3-NEXT:    stw 25, 20(1) # 4-byte Folded Spill
+; G3-NEXT:    stw 26, 24(1) # 4-byte Folded Spill
+; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
+; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
+; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 5, 5(4)
+; G3-NEXT:    lbz 6, 3(4)
+; G3-NEXT:    lbz 7, 1(4)
+; G3-NEXT:    lbz 8, 0(4)
+; G3-NEXT:    lbz 9, 2(4)
+; G3-NEXT:    lbz 10, 4(4)
+; G3-NEXT:    lbz 11, 6(4)
+; G3-NEXT:    lbz 12, 8(4)
+; G3-NEXT:    lbz 0, 10(4)
+; G3-NEXT:    addi 7, 7, -15
+; G3-NEXT:    lbz 30, 12(4)
+; G3-NEXT:    lbz 29, 14(4)
+; G3-NEXT:    lbz 28, 15(4)
+; G3-NEXT:    lbz 27, 13(4)
+; G3-NEXT:    lbz 26, 11(4)
+; G3-NEXT:    lbz 25, 9(4)
+; G3-NEXT:    addi 6, 6, -15
+; G3-NEXT:    lbz 4, 7(4)
+; G3-NEXT:    addi 5, 5, -15
+; G3-NEXT:    addi 25, 25, -15
+; G3-NEXT:    addi 26, 26, -15
+; G3-NEXT:    addi 4, 4, -15
+; G3-NEXT:    addi 27, 27, -15
+; G3-NEXT:    addi 28, 28, -15
+; G3-NEXT:    stb 29, 14(3)
+; G3-NEXT:    stb 30, 12(3)
+; G3-NEXT:    stb 0, 10(3)
+; G3-NEXT:    stb 12, 8(3)
+; G3-NEXT:    stb 11, 6(3)
+; G3-NEXT:    stb 10, 4(3)
+; G3-NEXT:    stb 9, 2(3)
+; G3-NEXT:    stb 8, 0(3)
+; G3-NEXT:    stb 28, 15(3)
+; G3-NEXT:    stb 27, 13(3)
+; G3-NEXT:    stb 26, 11(3)
+; G3-NEXT:    stb 25, 9(3)
+; G3-NEXT:    stb 4, 7(3)
+; G3-NEXT:    stb 5, 5(3)
+; G3-NEXT:    stb 6, 3(3)
+; G3-NEXT:    stb 7, 1(3)
+; G3-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 28, 32(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 27, 28(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 26, 24(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 25, 20(1) # 4-byte Folded Reload
+; G3-NEXT:    addi 1, 1, 48
+; G3-NEXT:    blr
+;
+; G5-LABEL: spltish:
+; G5:       # %bb.0:
+; G5-NEXT:    lvx 2, 0, 4
+; G5-NEXT:    vspltish 3, 15
+; G5-NEXT:    vsububm 2, 2, 3
+; G5-NEXT:    stvx 2, 0, 3
+; G5-NEXT:    blr
+  %tmp = load <16 x i8>, <16 x i8>* %B         ; <<16 x i8>> [#uses=1]
+  %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>      ; <<16 x i8>> [#uses=1]
+  %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
+ 15, i16 15, i16 15 > to <16 x i8>)       ; <<16 x i8>> [#uses=1]
+  %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8>    ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp4.u, <16 x i8>* %A
+  ret void
 }
-
diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 78da89d2730ca..07ba2644ef4bd 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -154,7 +154,7 @@ def scrub_asm_arm_eabi(asm, args):
   asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
-def scrub_asm_powerpc64(asm, args):
+def scrub_asm_powerpc(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
@@ -261,8 +261,9 @@ def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, pre
       'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'armv8a': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
       'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
-      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
-      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'ppc32': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
+      'powerpc64': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
+      'powerpc64le': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
       'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
       'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
       'sparc': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE),

From 266b65f8404fd2dbac6b17ebdb90507273418cb0 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 23 May 2019 20:05:21 +0000
Subject: [PATCH 0080/1176] [Utility] Avoid a few unnecessary copies (NFC)

Avoid unnecessary copies by either passing by const-reference or moving
the argument.

llvm-svn: 361544
---
 lldb/include/lldb/Utility/Broadcaster.h | 30 +++++++++++++------------
 lldb/include/lldb/Utility/Listener.h    |  7 +++---
 lldb/source/Utility/Broadcaster.cpp     |  9 ++++----
 lldb/source/Utility/Listener.cpp        | 10 ++++-----
 4 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/lldb/include/lldb/Utility/Broadcaster.h b/lldb/include/lldb/Utility/Broadcaster.h
index df1a6640244e3..1444282c7f7b0 100644
--- a/lldb/include/lldb/Utility/Broadcaster.h
+++ b/lldb/include/lldb/Utility/Broadcaster.h
@@ -51,7 +51,7 @@ class BroadcastEventSpec {
   // Tell whether this BroadcastEventSpec is contained in in_spec. That is: (a)
   // the two spec's share the same broadcaster class (b) the event bits of this
   // spec are wholly contained in those of in_spec.
-  bool IsContainedIn(BroadcastEventSpec in_spec) const {
+  bool IsContainedIn(const BroadcastEventSpec &in_spec) const {
     if (m_broadcaster_class != in_spec.GetBroadcasterClass())
       return false;
     uint32_t in_bits = in_spec.GetEventBits();
@@ -90,12 +90,13 @@ class BroadcasterManager
   ~BroadcasterManager() = default;
 
   uint32_t RegisterListenerForEvents(const lldb::ListenerSP &listener_sp,
-                                     BroadcastEventSpec event_spec);
+                                     const BroadcastEventSpec &event_spec);
 
   bool UnregisterListenerForEvents(const lldb::ListenerSP &listener_sp,
-                                   BroadcastEventSpec event_spec);
+                                   const BroadcastEventSpec &event_spec);
 
-  lldb::ListenerSP GetListenerForEventSpec(BroadcastEventSpec event_spec) const;
+  lldb::ListenerSP
+  GetListenerForEventSpec(const BroadcastEventSpec &event_spec) const;
 
   void SignUpListenersForBroadcaster(Broadcaster &broadcaster);
 
@@ -123,7 +124,7 @@ class BroadcasterManager
 
     ~BroadcasterClassMatches() = default;
 
-    bool operator()(const event_listener_key input) const {
+    bool operator()(const event_listener_key &input) const {
       return (input.first.GetBroadcasterClass() == m_broadcaster_class);
     }
 
@@ -133,12 +134,12 @@ class BroadcasterManager
 
   class BroadcastEventSpecMatches {
   public:
-    BroadcastEventSpecMatches(BroadcastEventSpec broadcaster_spec)
+    BroadcastEventSpecMatches(const BroadcastEventSpec &broadcaster_spec)
         : m_broadcaster_spec(broadcaster_spec) {}
 
     ~BroadcastEventSpecMatches() = default;
 
-    bool operator()(const event_listener_key input) const {
+    bool operator()(const event_listener_key &input) const {
       return (input.first.IsContainedIn(m_broadcaster_spec));
     }
 
@@ -148,13 +149,14 @@ class BroadcasterManager
 
   class ListenerMatchesAndSharedBits {
   public:
-    explicit ListenerMatchesAndSharedBits(BroadcastEventSpec broadcaster_spec,
-                                          const lldb::ListenerSP listener_sp)
+    explicit ListenerMatchesAndSharedBits(
+        const BroadcastEventSpec &broadcaster_spec,
+        const lldb::ListenerSP &listener_sp)
         : m_broadcaster_spec(broadcaster_spec), m_listener_sp(listener_sp) {}
 
     ~ListenerMatchesAndSharedBits() = default;
 
-    bool operator()(const event_listener_key input) const {
+    bool operator()(const event_listener_key &input) const {
       return (input.first.GetBroadcasterClass() ==
                   m_broadcaster_spec.GetBroadcasterClass() &&
               (input.first.GetEventBits() &
@@ -169,12 +171,12 @@ class BroadcasterManager
 
   class ListenerMatches {
   public:
-    explicit ListenerMatches(const lldb::ListenerSP in_listener_sp)
+    explicit ListenerMatches(const lldb::ListenerSP &in_listener_sp)
         : m_listener_sp(in_listener_sp) {}
 
     ~ListenerMatches() = default;
 
-    bool operator()(const event_listener_key input) const {
+    bool operator()(const event_listener_key &input) const {
       if (input.second == m_listener_sp)
         return true;
       else
@@ -192,14 +194,14 @@ class BroadcasterManager
 
     ~ListenerMatchesPointer() = default;
 
-    bool operator()(const event_listener_key input) const {
+    bool operator()(const event_listener_key &input) const {
       if (input.second.get() == m_listener)
         return true;
       else
         return false;
     }
 
-    bool operator()(const lldb::ListenerSP input) const {
+    bool operator()(const lldb::ListenerSP &input) const {
       if (input.get() == m_listener)
         return true;
       else
diff --git a/lldb/include/lldb/Utility/Listener.h b/lldb/include/lldb/Utility/Listener.h
index 8a8e2755272c4..17fc47880e8f8 100644
--- a/lldb/include/lldb/Utility/Listener.h
+++ b/lldb/include/lldb/Utility/Listener.h
@@ -58,10 +58,11 @@ class Listener : public std::enable_shared_from_this<Listener> {
 
   const char *GetName() { return m_name.c_str(); }
 
-  uint32_t StartListeningForEventSpec(lldb::BroadcasterManagerSP manager_sp,
-                                      const BroadcastEventSpec &event_spec);
+  uint32_t
+  StartListeningForEventSpec(const lldb::BroadcasterManagerSP &manager_sp,
+                             const BroadcastEventSpec &event_spec);
 
-  bool StopListeningForEventSpec(lldb::BroadcasterManagerSP manager_sp,
+  bool StopListeningForEventSpec(const lldb::BroadcasterManagerSP &manager_sp,
                                  const BroadcastEventSpec &event_spec);
 
   uint32_t StartListeningForEvents(Broadcaster *broadcaster,
diff --git a/lldb/source/Utility/Broadcaster.cpp b/lldb/source/Utility/Broadcaster.cpp
index b42af00b526eb..c0b8567558eb3 100644
--- a/lldb/source/Utility/Broadcaster.cpp
+++ b/lldb/source/Utility/Broadcaster.cpp
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <memory>
 #include <type_traits>
+#include <utility>
 
 #include <assert.h>
 #include <stddef.h>
@@ -27,7 +28,7 @@ using namespace lldb_private;
 
 Broadcaster::Broadcaster(BroadcasterManagerSP manager_sp, const char *name)
     : m_broadcaster_sp(std::make_shared<BroadcasterImpl>(*this)),
-      m_manager_sp(manager_sp), m_broadcaster_name(name) {
+      m_manager_sp(std::move(manager_sp)), m_broadcaster_name(name) {
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
   if (log)
     log->Printf("%p Broadcaster::Broadcaster(\"%s\")",
@@ -334,7 +335,7 @@ lldb::BroadcasterManagerSP BroadcasterManager::MakeBroadcasterManager() {
 }
 
 uint32_t BroadcasterManager::RegisterListenerForEvents(
-    const lldb::ListenerSP &listener_sp, BroadcastEventSpec event_spec) {
+    const lldb::ListenerSP &listener_sp, const BroadcastEventSpec &event_spec) {
   std::lock_guard<std::recursive_mutex> guard(m_manager_mutex);
 
   collection::iterator iter = m_event_map.begin(), end_iter = m_event_map.end();
@@ -359,7 +360,7 @@ uint32_t BroadcasterManager::RegisterListenerForEvents(
 }
 
 bool BroadcasterManager::UnregisterListenerForEvents(
-    const lldb::ListenerSP &listener_sp, BroadcastEventSpec event_spec) {
+    const lldb::ListenerSP &listener_sp, const BroadcastEventSpec &event_spec) {
   std::lock_guard<std::recursive_mutex> guard(m_manager_mutex);
   bool removed_some = false;
 
@@ -399,7 +400,7 @@ bool BroadcasterManager::UnregisterListenerForEvents(
 }
 
 ListenerSP BroadcasterManager::GetListenerForEventSpec(
-    BroadcastEventSpec event_spec) const {
+    const BroadcastEventSpec &event_spec) const {
   std::lock_guard<std::recursive_mutex> guard(m_manager_mutex);
 
   collection::const_iterator iter, end_iter = m_event_map.end();
diff --git a/lldb/source/Utility/Listener.cpp b/lldb/source/Utility/Listener.cpp
index 5b97fbe923dec..50c56406c2ca5 100644
--- a/lldb/source/Utility/Listener.cpp
+++ b/lldb/source/Utility/Listener.cpp
@@ -27,8 +27,8 @@ namespace {
 class BroadcasterManagerWPMatcher {
 public:
   BroadcasterManagerWPMatcher(BroadcasterManagerSP manager_sp)
-      : m_manager_sp(manager_sp) {}
-  bool operator()(const BroadcasterManagerWP input_wp) const {
+      : m_manager_sp(std::move(manager_sp)) {}
+  bool operator()(const BroadcasterManagerWP &input_wp) const {
     BroadcasterManagerSP input_sp = input_wp.lock();
     return (input_sp && input_sp == m_manager_sp);
   }
@@ -191,7 +191,7 @@ void Listener::BroadcasterManagerWillDestruct(BroadcasterManagerSP manager_sp) {
       end_iter = m_broadcaster_managers.end();
   BroadcasterManagerWP manager_wp;
 
-  BroadcasterManagerWPMatcher matcher(manager_sp);
+  BroadcasterManagerWPMatcher matcher(std::move(manager_sp));
   iter = std::find_if<broadcaster_manager_collection::iterator,
                       BroadcasterManagerWPMatcher>(
       m_broadcaster_managers.begin(), end_iter, matcher);
@@ -424,7 +424,7 @@ size_t Listener::HandleBroadcastEvent(EventSP &event_sp) {
 }
 
 uint32_t
-Listener::StartListeningForEventSpec(BroadcasterManagerSP manager_sp,
+Listener::StartListeningForEventSpec(const BroadcasterManagerSP &manager_sp,
                                      const BroadcastEventSpec &event_spec) {
   if (!manager_sp)
     return 0;
@@ -452,7 +452,7 @@ Listener::StartListeningForEventSpec(BroadcasterManagerSP manager_sp,
   return bits_acquired;
 }
 
-bool Listener::StopListeningForEventSpec(BroadcasterManagerSP manager_sp,
+bool Listener::StopListeningForEventSpec(const BroadcasterManagerSP &manager_sp,
                                          const BroadcastEventSpec &event_spec) {
   if (!manager_sp)
     return false;

From 30905a375ea2f7970cc896b89ec4ebf3c812cd4f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 23 May 2019 20:07:27 +0000
Subject: [PATCH 0081/1176] Fix sphinx unknown document error

llvm-svn: 361545
---
 clang-tools-extra/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 4c709f0370df9..e998fa1b189a0 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -177,7 +177,7 @@ Improvements to clang-tidy
   Rewrites function signatures to use a trailing return type.
 
 - The :doc:`misc-throw-by-value-catch-by-reference
-  <clang-tidy/misc-throw-by-value-catch-by-reference.rst>` now supports
+  <clang-tidy/checks/misc-throw-by-value-catch-by-reference.rst>` now supports
   `WarnOnLargeObject` and `MaxSize` options to warn on any large trivial
   object caught by value.
 

From 56d69ef8ca1f2826b1e7c620b871f5074926a43a Mon Sep 17 00:00:00 2001
From: Jorge Gorbe Moya <jgorbe@google.com>
Date: Thu, 23 May 2019 20:11:17 +0000
Subject: [PATCH 0082/1176] [lldb] Make sure RegularExpression constructors
 always initialize member variables

The copy constructor of RegularExpression doesn't initialize m_comp_err. This causes an use-of-initialized-value error when a RegularExpression is copied: the copy constructor calls Compile, which calls Free to free the existing regex if needed, which in turn reads m_comp_err to check if there's any regex to be freed.

This change calls the default constructor from the other constructors to make sure members are always initialized with sensible values. This also avoids duplicating init logic, like the RegularExpression(llvm:StringRef) constructor does, which is error prone.

Differential Revision: https://reviews.llvm.org/D62334

llvm-svn: 361546
---
 lldb/source/Utility/RegularExpression.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Utility/RegularExpression.cpp b/lldb/source/Utility/RegularExpression.cpp
index 71fe301c45117..0192e8b8a01a0 100644
--- a/lldb/source/Utility/RegularExpression.cpp
+++ b/lldb/source/Utility/RegularExpression.cpp
@@ -29,13 +29,12 @@ RegularExpression::RegularExpression() : m_re(), m_comp_err(1), m_preg() {
 // Constructor that compiles "re" using "flags" and stores the resulting
 // compiled regular expression into this object.
 RegularExpression::RegularExpression(llvm::StringRef str)
-    : m_re(), m_comp_err(1), m_preg() {
-  memset(&m_preg, 0, sizeof(m_preg));
+    : RegularExpression() {
   Compile(str);
 }
 
-RegularExpression::RegularExpression(const RegularExpression &rhs) {
-  memset(&m_preg, 0, sizeof(m_preg));
+RegularExpression::RegularExpression(const RegularExpression &rhs)
+    : RegularExpression() {
   Compile(rhs.GetText());
 }
 

From 7d6c0bce503fd92fa48db88a944b01fa2e7402b4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 23 May 2019 20:17:25 +0000
Subject: [PATCH 0083/1176] [DAGCombiner] make folds of binops safe for opcodes
 that produce >1 value

This is no-functional-change-intended currently because the definition
of isBinOp() only includes opcodes that produce 1 value. But if we
share that implementation with isCommutativeBinOp() as proposed in
D62191, then we need to make sure that the callers bail out for
opcodes that they are not prepared to handle correctly.

llvm-svn: 361547
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0aa481ff2e298..b5bb86580fb61 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1968,7 +1968,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
 }
 
 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
-  assert(TLI.isBinOp(BO->getOpcode()) && "Unexpected binary operator");
+  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+         "Unexpected binary operator");
 
   // Don't do this unless the old select is going away. We want to eliminate the
   // binary operator, not replace a binop with a select.
@@ -16172,7 +16173,8 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
   SDValue Vec = ExtElt->getOperand(0);
   SDValue Index = ExtElt->getOperand(1);
   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
-  if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse())
+  if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+      Vec.getNode()->getNumValues() != 1)
     return SDValue();
 
   // Targets may want to avoid this to prevent an expensive register transfer.
@@ -17412,7 +17414,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
                                               SelectionDAG &DAG) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue BinOp = Extract->getOperand(0);
-  if (!TLI.isBinOp(BinOp.getOpcode()))
+  if (!TLI.isBinOp(BinOp.getOpcode()) || BinOp.getNode()->getNumValues() != 1)
     return SDValue();
 
   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
@@ -17456,7 +17458,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
   // feeding an extract subvector.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
-  if (!TLI.isBinOp(BinOp.getOpcode()))
+  if (!TLI.isBinOp(BinOp.getOpcode()) || BinOp.getNode()->getNumValues() != 1)
     return SDValue();
 
   // The binop must be a vector type, so we can extract some fraction of it.
@@ -18268,7 +18270,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     int SplatIndex = SVN->getSplatIndex();
     if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
-        TLI.isBinOp(N0.getOpcode())) {
+        TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
       // splat (vector_bo L, R), Index -->
       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
       SDValue L = N0.getOperand(0), R = N0.getOperand(1);

From edb52e2e7d075b1fec13034deaa56d3c32d100ac Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 23 May 2019 20:25:49 +0000
Subject: [PATCH 0084/1176] [Process] Fix another thread_result_t & nullptr
 incompatibility.

llvm-svn: 361548
---
 lldb/source/Target/Process.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index b018a3115a026..7c668a216a3a2 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -3638,7 +3638,7 @@ void Process::ControlPrivateStateThread(uint32_t signal) {
     }
 
     if (signal == eBroadcastInternalStateControlStop) {
-      thread_result_t result = nullptr;
+      thread_result_t result = {};
       m_private_state_thread.Join(&result);
       m_private_state_thread.Reset();
     }
@@ -3913,7 +3913,7 @@ thread_result_t Process::RunPrivateStateThread(bool is_secondary_thread) {
   // it was doing yet, so don't try to change it on the way out.
   if (!is_secondary_thread)
     m_public_run_lock.SetStopped();
-  return nullptr;
+  return {};
 }
 
 // Process Event Data
@@ -4072,15 +4072,15 @@ void Process::ProcessEventData::DoOnRemoval(Event *event_ptr) {
         // public resume.
         process_sp->PrivateResume();
       } else {
-        bool hijacked = 
-          process_sp->IsHijackedForEvent(eBroadcastBitStateChanged)
-          && !process_sp->StateChangedIsHijackedForSynchronousResume();
+        bool hijacked =
+            process_sp->IsHijackedForEvent(eBroadcastBitStateChanged) &&
+            !process_sp->StateChangedIsHijackedForSynchronousResume();
 
         if (!hijacked) {
           // If we didn't restart, run the Stop Hooks here.
           // Don't do that if state changed events aren't hooked up to the
-          // public (or SyncResume) broadcasters.  StopHooks are just for 
-          // real public stops.  They might also restart the target, 
+          // public (or SyncResume) broadcasters.  StopHooks are just for
+          // real public stops.  They might also restart the target,
           // so watch for that.
           process_sp->GetTarget().RunStopHooks();
           if (process_sp->GetPrivateState() == eStateRunning)

From 14f4ff6e8972ddc7755c72f6bfc2ba372ac9638f Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Thu, 23 May 2019 20:26:41 +0000
Subject: [PATCH 0085/1176] [COFF] Move KeepUnique bit from Chunk to
 SectionChunk, NFC

The KeepUnique bit is used during ICF, which only operates on
SectionChunks, so only SectionChunks need it. This frees up a byte in
Chunk, which I plan to use in a follow-up change.

llvm-svn: 361549
---
 lld/COFF/Chunks.h   | 12 +++++-------
 lld/COFF/Driver.cpp |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 8525feedf2e67..d15638e7b0cc6 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -122,14 +122,9 @@ class Chunk {
 
 protected:
   Chunk(Kind K = OtherKind) : ChunkKind(K) {}
-  const Kind ChunkKind;
 
-public:
-  // Whether this section needs to be kept distinct from other sections during
-  // ICF. This is set by the driver using address-significance tables.
-  bool KeepUnique = false;
+  const Kind ChunkKind;
 
-protected:
   // The alignment of this chunk, stored in log2 form. The writer uses the
   // value.
   uint8_t P2Align = 0;
@@ -137,7 +132,6 @@ class Chunk {
   // The RVA of this chunk in the output. The writer sets a value.
   uint32_t RVA = 0;
 
-protected:
   // The output section for this chunk.
   OutputSection *Out = nullptr;
 };
@@ -283,6 +277,10 @@ class SectionChunk final : public Chunk {
   // Used by the garbage collector.
   bool Live;
 
+  // Whether this section needs to be kept distinct from other sections during
+  // ICF. This is set by the driver using address-significance tables.
+  bool KeepUnique = false;
+
   // The COMDAT selection if this is a COMDAT chunk.
   llvm::COFF::COMDATType Selection = (llvm::COFF::COMDATType)0;
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 39d476c5079b6..006984309e184 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -858,7 +858,7 @@ static void parseOrderFile(StringRef Arg) {
 
 static void markAddrsig(Symbol *S) {
   if (auto *D = dyn_cast_or_null<Defined>(S))
-    if (Chunk *C = D->getChunk())
+    if (SectionChunk *C = dyn_cast_or_null<SectionChunk>(D->getChunk()))
       C->KeepUnique = true;
 }
 

From dab31924e9c790555f916d21e6575e7f1e1cd5b7 Mon Sep 17 00:00:00 2001
From: Tamas Zolnai <zolnaitamas2000@gmail.com>
Date: Thu, 23 May 2019 20:29:04 +0000
Subject: [PATCH 0086/1176] [clang-tidy]: Add cert-oop54-cpp alias for
 bugprone-unhandled-self-assignment

Summary:
Added WarnOnlyIfThisHasSuspiciousField option to allow
to catch any copy assignment operator independently from
the container class's fields.
Added the cert alias using this option.

Reviewers: aaron.ballman

Reviewed By: aaron.ballman

Subscribers: mgorny, Eugene.Zelenko, xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62192

llvm-svn: 361550
---
 .../bugprone/UnhandledSelfAssignmentCheck.cpp | 61 ++++++++++++-------
 .../bugprone/UnhandledSelfAssignmentCheck.h   |  8 ++-
 .../clang-tidy/cert/CERTTidyModule.cpp        |  4 ++
 .../clang-tidy/cert/CMakeLists.txt            |  1 +
 clang-tools-extra/docs/ReleaseNotes.rst       |  5 ++
 .../bugprone-unhandled-self-assignment.rst    | 10 ++-
 .../docs/clang-tidy/checks/cert-oop54-cpp.rst | 10 +++
 .../docs/clang-tidy/checks/list.rst           |  1 +
 ...warn-only-if-this-has-suspicious-field.cpp | 41 +++++++++++++
 .../test/clang-tidy/cert-oop54-cpp.cpp        | 16 +++++
 10 files changed, 131 insertions(+), 26 deletions(-)
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/cert-oop54-cpp.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment-warn-only-if-this-has-suspicious-field.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/cert-oop54-cpp.cpp

diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp
index b529f72ddae32..14f5e1532474b 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp
@@ -16,6 +16,18 @@ namespace clang {
 namespace tidy {
 namespace bugprone {
 
+UnhandledSelfAssignmentCheck::UnhandledSelfAssignmentCheck(
+    StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context),
+      WarnOnlyIfThisHasSuspiciousField(
+          Options.get("WarnOnlyIfThisHasSuspiciousField", true)) {}
+
+void UnhandledSelfAssignmentCheck::storeOptions(
+    ClangTidyOptions::OptionMap &Opts) {
+  Options.store(Opts, "WarnOnlyIfThisHasSuspiciousField",
+                WarnOnlyIfThisHasSuspiciousField);
+}
+
 void UnhandledSelfAssignmentCheck::registerMatchers(MatchFinder *Finder) {
   if (!getLangOpts().CPlusPlus)
     return;
@@ -61,29 +73,32 @@ void UnhandledSelfAssignmentCheck::registerMatchers(MatchFinder *Finder) {
       cxxMethodDecl(unless(hasDescendant(cxxMemberCallExpr(callee(cxxMethodDecl(
           hasName("operator="), ofClass(equalsBoundNode("class"))))))));
 
-  // Matcher for standard smart pointers.
-  const auto SmartPointerType = qualType(hasUnqualifiedDesugaredType(
-      recordType(hasDeclaration(classTemplateSpecializationDecl(
-          hasAnyName("::std::shared_ptr", "::std::unique_ptr",
-                     "::std::weak_ptr", "::std::auto_ptr"),
-          templateArgumentCountIs(1))))));
-
-  // We will warn only if the class has a pointer or a C array field which
-  // probably causes a problem during self-assignment (e.g. first resetting the
-  // pointer member, then trying to access the object pointed by the pointer, or
-  // memcpy overlapping arrays).
-  const auto ThisHasSuspiciousField = cxxMethodDecl(ofClass(cxxRecordDecl(
-      has(fieldDecl(anyOf(hasType(pointerType()), hasType(SmartPointerType),
-                          hasType(arrayType())))))));
-
-  Finder->addMatcher(
-      cxxMethodDecl(ofClass(cxxRecordDecl().bind("class")),
-                    isCopyAssignmentOperator(), IsUserDefined,
-                    HasReferenceParam, HasNoSelfCheck,
-                    unless(HasNonTemplateSelfCopy), unless(HasTemplateSelfCopy),
-                    HasNoNestedSelfAssign, ThisHasSuspiciousField)
-          .bind("copyAssignmentOperator"),
-      this);
+  DeclarationMatcher AdditionalMatcher = cxxMethodDecl();
+  if (WarnOnlyIfThisHasSuspiciousField) {
+    // Matcher for standard smart pointers.
+    const auto SmartPointerType = qualType(hasUnqualifiedDesugaredType(
+        recordType(hasDeclaration(classTemplateSpecializationDecl(
+            hasAnyName("::std::shared_ptr", "::std::unique_ptr",
+                       "::std::weak_ptr", "::std::auto_ptr"),
+            templateArgumentCountIs(1))))));
+
+    // We will warn only if the class has a pointer or a C array field which
+    // probably causes a problem during self-assignment (e.g. first resetting
+    // the pointer member, then trying to access the object pointed by the
+    // pointer, or memcpy overlapping arrays).
+    AdditionalMatcher = cxxMethodDecl(ofClass(cxxRecordDecl(
+        has(fieldDecl(anyOf(hasType(pointerType()), hasType(SmartPointerType),
+                            hasType(arrayType())))))));
+  }
+
+  Finder->addMatcher(cxxMethodDecl(ofClass(cxxRecordDecl().bind("class")),
+                                   isCopyAssignmentOperator(), IsUserDefined,
+                                   HasReferenceParam, HasNoSelfCheck,
+                                   unless(HasNonTemplateSelfCopy),
+                                   unless(HasTemplateSelfCopy),
+                                   HasNoNestedSelfAssign, AdditionalMatcher)
+                         .bind("copyAssignmentOperator"),
+                     this);
 }
 
 void UnhandledSelfAssignmentCheck::check(
diff --git a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h
index 1747246143552..d7a2b7c619ff8 100644
--- a/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h
@@ -23,10 +23,14 @@ namespace bugprone {
 /// http://clang.llvm.org/extra/clang-tidy/checks/bugprone-unhandled-self-assignment.html
 class UnhandledSelfAssignmentCheck : public ClangTidyCheck {
 public:
-  UnhandledSelfAssignmentCheck(StringRef Name, ClangTidyContext *Context)
-      : ClangTidyCheck(Name, Context) {}
+  UnhandledSelfAssignmentCheck(StringRef Name, ClangTidyContext *Context);
+
+  void storeOptions(ClangTidyOptions::OptionMap &Opts) override;
   void registerMatchers(ast_matchers::MatchFinder *Finder) override;
   void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+  const bool WarnOnlyIfThisHasSuspiciousField;
 };
 
 } // namespace bugprone
diff --git a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
index cd8da0c663643..341968b6fa6b1 100644
--- a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp
@@ -9,6 +9,7 @@
 #include "../ClangTidy.h"
 #include "../ClangTidyModule.h"
 #include "../ClangTidyModuleRegistry.h"
+#include "../bugprone/UnhandledSelfAssignmentCheck.h"
 #include "../google/UnnamedNamespaceInHeaderCheck.h"
 #include "../misc/NewDeleteOverloadsCheck.h"
 #include "../misc/NonCopyableObjects.h"
@@ -49,6 +50,8 @@ class CERTModule : public ClangTidyModule {
     // OOP
     CheckFactories.registerCheck<performance::MoveConstructorInitCheck>(
         "cert-oop11-cpp");
+    CheckFactories.registerCheck<bugprone::UnhandledSelfAssignmentCheck>(
+        "cert-oop54-cpp");
     // ERR
     CheckFactories.registerCheck<misc::ThrowByValueCatchByReferenceCheck>(
         "cert-err09-cpp");
@@ -85,6 +88,7 @@ class CERTModule : public ClangTidyModule {
     ClangTidyOptions Options;
     ClangTidyOptions::OptionMap &Opts = Options.CheckOptions;
     Opts["cert-dcl16-c.NewSuffixes"] = "L;LL;LU;LLU";
+    Opts["cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField"] = "0";
     return Options;
   }
 };
diff --git a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
index b50ddf014c634..474d9356adfbf 100644
--- a/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/cert/CMakeLists.txt
@@ -20,6 +20,7 @@ add_clang_library(clangTidyCERTModule
   clangBasic
   clangLex
   clangTidy
+  clangTidyBugproneModule
   clangTidyGoogleModule
   clangTidyMiscModule
   clangTidyPerformanceModule
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index e998fa1b189a0..22acfa33feec4 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -134,6 +134,11 @@ Improvements to clang-tidy
   subclasses of ``NSObject`` and recommends calling a superclass initializer
   instead.
 
+- New alias :doc:`cert-oop54-cpp
+  <clang-tidy/checks/cert-oop54-cpp>` to
+  :doc:`bugprone-unhandled-self-assignment
+  <clang-tidy/checks/bugprone-unhandled-self-assignment>` was added.
+
 - New alias :doc:`cppcoreguidelines-explicit-virtual-functions
   <clang-tidy/checks/cppcoreguidelines-explicit-virtual-functions>` to
   :doc:`modernize-use-override
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-unhandled-self-assignment.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-unhandled-self-assignment.rst
index 64412ba049437..c4ccdd9579d6a 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-unhandled-self-assignment.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-unhandled-self-assignment.rst
@@ -3,11 +3,14 @@
 bugprone-unhandled-self-assignment
 ==================================
 
+`cert-oop54-cpp` redirects here as an alias for this check. For the CERT alias,
+the `WarnOnlyIfThisHasSuspiciousField` option is set to `0`.
+
 Finds user-defined copy assignment operators which do not protect the code
 against self-assignment either by checking self-assignment explicitly or
 using the copy-and-swap or the copy-and-move method.
 
-This check now searches only those classes which have any pointer or C array field
+By default, this check searches only those classes which have any pointer or C array field
 to avoid false positives. In case of a pointer or a C array, it's likely that self-copy
 assignment breaks the object if the copy assignment operator was not written with care.
 
@@ -114,3 +117,8 @@ temporary object into ``this`` (needs a move assignment operator):
       return *this;
     }
   };
+
+.. option:: WarnOnlyIfThisHasSuspiciousField
+
+  When non-zero, the check will warn only if the container class of the copy assignment operator
+  has any suspicious fields (pointer or C array). This option is set to `1` by default.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert-oop54-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert-oop54-cpp.rst
new file mode 100644
index 0000000000000..fe5095211f046
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/cert-oop54-cpp.rst
@@ -0,0 +1,10 @@
+.. title:: clang-tidy - cert-oop54-cpp
+.. meta::
+   :http-equiv=refresh: 5;URL=bugprone-unhandled-self-assignment.html
+
+cert-oop54-cpp
+==============
+
+The cert-oop54-cpp check is an alias, please see
+`bugprone-unhandled-self-assignment <bugprone-unhandled-self-assignment.html>`_
+for more information.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 7a0ebc292e468..c860d6a4753c8 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -98,6 +98,7 @@ Clang-Tidy Checks
    cert-msc50-cpp
    cert-msc51-cpp
    cert-oop11-cpp (redirects to performance-move-constructor-init) <cert-oop11-cpp>
+   cert-oop54-cpp (redirects to bugprone-unhandled-self-assignment) <cert-oop54-cpp>
    cppcoreguidelines-avoid-c-arrays (redirects to modernize-avoid-c-arrays) <cppcoreguidelines-avoid-c-arrays>
    cppcoreguidelines-avoid-goto
    cppcoreguidelines-avoid-magic-numbers (redirects to readability-magic-numbers) <cppcoreguidelines-avoid-magic-numbers>
diff --git a/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment-warn-only-if-this-has-suspicious-field.cpp b/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment-warn-only-if-this-has-suspicious-field.cpp
new file mode 100644
index 0000000000000..0e6ee47478dc1
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/bugprone-unhandled-self-assignment-warn-only-if-this-has-suspicious-field.cpp
@@ -0,0 +1,41 @@
+// RUN: %check_clang_tidy %s bugprone-unhandled-self-assignment %t -- \
+// RUN:   -config="{CheckOptions: \
+// RUN:             [{key: bugprone-unhandled-self-assignment.WarnOnlyIfThisHasSuspiciousField, \
+// RUN:               value: 0}]}"
+
+// Classes with pointer field are still caught.
+class PtrField {
+public:
+  PtrField &operator=(const PtrField &object) {
+    // CHECK-MESSAGES: [[@LINE-1]]:13: warning: operator=() does not handle self-assignment properly [bugprone-unhandled-self-assignment]
+    return *this;
+  }
+
+private:
+  int *p;
+};
+
+// With the option, check catches classes with trivial fields.
+class TrivialFields {
+public:
+  TrivialFields &operator=(const TrivialFields &object) {
+    // CHECK-MESSAGES: [[@LINE-1]]:18: warning: operator=() does not handle self-assignment properly [bugprone-unhandled-self-assignment]
+    return *this;
+  }
+
+private:
+  int m;
+  float f;
+  double d;
+  bool b;
+};
+
+// The check warns also when there is no field at all.
+// In this case, user-defined copy assignment operator is useless anyway.
+class ClassWithoutFields {
+public:
+  ClassWithoutFields &operator=(const ClassWithoutFields &object) {
+    // CHECK-MESSAGES: [[@LINE-1]]:23: warning: operator=() does not handle self-assignment properly [bugprone-unhandled-self-assignment]
+    return *this;
+  }
+};
diff --git a/clang-tools-extra/test/clang-tidy/cert-oop54-cpp.cpp b/clang-tools-extra/test/clang-tidy/cert-oop54-cpp.cpp
new file mode 100644
index 0000000000000..f601e672820b1
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/cert-oop54-cpp.cpp
@@ -0,0 +1,16 @@
+// RUN: %check_clang_tidy %s cert-oop54-cpp %t
+
+// Test whether bugprone-unhandled-self-assignment.WarnOnlyIfThisHasSuspiciousField option is set correctly.
+class TrivialFields {
+public:
+  TrivialFields &operator=(const TrivialFields &object) {
+    // CHECK-MESSAGES: [[@LINE-1]]:18: warning: operator=() does not handle self-assignment properly [cert-oop54-cpp]
+    return *this;
+  }
+
+private:
+  int m;
+  float f;
+  double d;
+  bool b;
+};

From e8df27d9256b38ec1a2467a1b9c087b00ffd17cc Mon Sep 17 00:00:00 2001
From: Kristof Umann <kristof.umann@ericsson.com>
Date: Thu, 23 May 2019 20:47:28 +0000
Subject: [PATCH 0087/1176] [analyzer] Add a new frontend flag to display all
 checker options

Add the new frontend flag -analyzer-checker-option-help to display all
checker/package options.

Differential Revision: https://reviews.llvm.org/D57858

llvm-svn: 361552
---
 clang/include/clang/Driver/CC1Options.td      |  3 ++
 .../StaticAnalyzer/Core/AnalyzerOptions.h     | 34 +++++++++++--
 .../StaticAnalyzer/Frontend/CheckerRegistry.h |  1 +
 .../StaticAnalyzer/Frontend/FrontendActions.h |  4 ++
 clang/lib/Frontend/CompilerInvocation.cpp     |  1 +
 .../ExecuteCompilerInvocation.cpp             | 10 ++++
 .../StaticAnalyzer/Core/AnalyzerOptions.cpp   | 32 ++++++++++++
 .../Frontend/CheckerRegistration.cpp          | 51 ++++++-------------
 .../Frontend/CheckerRegistry.cpp              | 51 +++++++++++++++----
 .../Analysis/analyzer-checker-option-help.c   | 19 +++++++
 clang/test/Analysis/analyzer-list-configs.c   | 11 ++--
 clang/test/Analysis/checker-plugins.c         |  9 ++++
 12 files changed, 168 insertions(+), 58 deletions(-)
 create mode 100644 clang/test/Analysis/analyzer-checker-option-help.c

diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index b9b6677853fd4..f7da3746bd4f3 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -143,6 +143,9 @@ def analyzer_list_enabled_checkers : Flag<["-"], "analyzer-list-enabled-checkers
 def analyzer_config : Separate<["-"], "analyzer-config">,
   HelpText<"Choose analyzer options to enable">;
 
+def analyzer_checker_option_help : Flag<["-"], "analyzer-checker-option-help">,
+  HelpText<"Display the list of checker and package options">;
+
 def analyzer_config_compatibility_mode : Separate<["-"], "analyzer-config-compatibility-mode">,
   HelpText<"Don't emit errors on invalid analyzer-config inputs">;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 6db3a269a2db8..6a54e157e88e1 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -166,6 +166,29 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   static std::vector<StringRef>
   getRegisteredCheckers(bool IncludeExperimental = false);
 
+  /// Convenience function for printing options or checkers and their
+  /// description in a formatted manner. If \p MinLineWidth is set to 0, no line
+  /// breaks are introduced for the description.
+  ///
+  /// Format, depending whether the option name's length is less then
+  /// \p OptionWidth:
+  ///
+  ///   <padding>EntryName<padding>Description
+  ///   <---------padding--------->Description
+  ///   <---------padding--------->Description
+  ///
+  ///   <padding>VeryVeryLongOptionName
+  ///   <---------padding--------->Description
+  ///   <---------padding--------->Description
+  ///   ^~~~~~~~ InitialPad
+  ///   ^~~~~~~~~~~~~~~~~~~~~~~~~~ EntryWidth
+  ///   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~MinLineWidth
+  static void printFormattedEntry(
+      llvm::raw_ostream &Out,
+      std::pair<StringRef, StringRef> EntryDescPair,
+      size_t EntryWidth, size_t InitialPad, size_t MinLineWidth = 0);
+
+
   /// Pair of checker name and enable/disable.
   std::vector<std::pair<std::string, bool>> CheckersControlList;
 
@@ -199,6 +222,7 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   unsigned ShowCheckerHelp : 1;
   unsigned ShowCheckerHelpHidden : 1;
   unsigned ShowEnabledCheckerList : 1;
+  unsigned ShowCheckerOptionList : 1;
   unsigned ShowConfigOptionsList : 1;
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
@@ -262,11 +286,11 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   AnalyzerOptions()
       : DisableAllChecks(false), ShowCheckerHelp(false),
         ShowCheckerHelpHidden(false), ShowEnabledCheckerList(false),
-        ShowConfigOptionsList(false), AnalyzeAll(false),
-        AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false),
-        eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
-        visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false),
-        PrintStats(false), NoRetryExhausted(false) {
+        ShowCheckerOptionList(false), ShowConfigOptionsList(false),
+        AnalyzeAll(false), AnalyzerDisplayProgress(false),
+        AnalyzeNestedBlocks(false), eagerlyAssumeBinOpBifurcation(false),
+        TrimGraph(false), visualizeExplodedGraphWithGraphViz(false),
+        UnoptimizedCFG(false), PrintStats(false), NoRetryExhausted(false) {
     llvm::sort(AnalyzerConfigCmdFlags);
   }
 
diff --git a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
index 875cb8edb8ed0..3a05c928774c0 100644
--- a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
+++ b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
@@ -272,6 +272,7 @@ class CheckerRegistry {
   void printCheckerWithDescList(raw_ostream &Out,
                                 size_t MaxNameChars = 30) const;
   void printEnabledCheckerList(raw_ostream &Out) const;
+  void printCheckerOptionList(raw_ostream &Out) const;
 
 private:
   /// Collect all enabled checkers. The returned container preserves the order
diff --git a/clang/include/clang/StaticAnalyzer/Frontend/FrontendActions.h b/clang/include/clang/StaticAnalyzer/Frontend/FrontendActions.h
index 5f26a4893c6d2..878b65a1b143c 100644
--- a/clang/include/clang/StaticAnalyzer/Frontend/FrontendActions.h
+++ b/clang/include/clang/StaticAnalyzer/Frontend/FrontendActions.h
@@ -61,6 +61,10 @@ void printEnabledCheckerList(raw_ostream &OS, ArrayRef<std::string> plugins,
                              DiagnosticsEngine &diags,
                              const LangOptions &LangOpts);
 void printAnalyzerConfigList(raw_ostream &OS);
+void printCheckerConfigList(raw_ostream &OS, ArrayRef<std::string> plugins,
+                            AnalyzerOptions &opts,
+                            DiagnosticsEngine &diags,
+                            const LangOptions &LangOpts);
 
 } // end GR namespace
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7cdc050e4673d..877e70b6616cb 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -286,6 +286,7 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
 
   Opts.ShowCheckerHelp = Args.hasArg(OPT_analyzer_checker_help);
   Opts.ShowCheckerHelpHidden = Args.hasArg(OPT_analyzer_checker_help_hidden);
+  Opts.ShowCheckerOptionList = Args.hasArg(OPT_analyzer_checker_option_help);
   Opts.ShowConfigOptionsList = Args.hasArg(OPT_analyzer_config_help);
   Opts.ShowEnabledCheckerList = Args.hasArg(OPT_analyzer_list_enabled_checkers);
   Opts.ShouldEmitErrorsOnInvalidConfigValue =
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index f77a865efa70a..27690be777b8d 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -247,6 +247,16 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
     return true;
   }
 
+  // Honor -analyzer-checker-option-help.
+  if (Clang->getAnalyzerOpts()->ShowCheckerOptionList) {
+    ento::printCheckerConfigList(llvm::outs(),
+                                 Clang->getFrontendOpts().Plugins,
+                                 *Clang->getAnalyzerOpts(),
+                                 Clang->getDiagnostics(),
+                                 Clang->getLangOpts());
+    return true;
+  }
+
   // Honor -analyzer-list-enabled-checkers.
   if (AnOpts.ShowEnabledCheckerList) {
     ento::printEnabledCheckerList(llvm::outs(),
diff --git a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
index 68b2c052305b5..71abe2ae6c0e8 100644
--- a/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
+++ b/clang/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstddef>
@@ -48,6 +49,37 @@ AnalyzerOptions::getRegisteredCheckers(bool IncludeExperimental /* = false */) {
   return Result;
 }
 
+void AnalyzerOptions::printFormattedEntry(
+    llvm::raw_ostream &Out,
+    std::pair<StringRef, StringRef> EntryDescPair,
+    size_t InitialPad, size_t EntryWidth, size_t MinLineWidth) {
+
+  llvm::formatted_raw_ostream FOut(Out);
+
+  const size_t PadForDesc = InitialPad + EntryWidth;
+
+  FOut.PadToColumn(InitialPad) << EntryDescPair.first;
+  // If the buffer's length is greater then PadForDesc, print a newline.
+  if (FOut.getColumn() > PadForDesc)
+    FOut << '\n';
+
+  FOut.PadToColumn(PadForDesc);
+
+  if (MinLineWidth == 0) {
+    FOut << EntryDescPair.second;
+    return;
+  }
+
+  for (char C : EntryDescPair.second) {
+    if (FOut.getColumn() > MinLineWidth && C == ' ') {
+      FOut << '\n';
+      FOut.PadToColumn(PadForDesc);
+      continue;
+    }
+    FOut << C;
+  }
+}
+
 ExplorationStrategyKind
 AnalyzerOptions::getExplorationStrategy() const {
   auto K =
diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
index 4ad362fe1e34e..1e45ee96145ab 100644
--- a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
@@ -18,7 +18,6 @@
 #include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
 #include "clang/StaticAnalyzer/Frontend/FrontendActions.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
 
@@ -65,17 +64,20 @@ void ento::printEnabledCheckerList(raw_ostream &out,
       .printEnabledCheckerList(out);
 }
 
+void ento::printCheckerConfigList(raw_ostream &OS,
+                                  ArrayRef<std::string> plugins,
+                                  AnalyzerOptions &opts,
+                                  DiagnosticsEngine &diags,
+                                  const LangOptions &LangOpts) {
+  CheckerRegistry(plugins, diags, opts, LangOpts)
+      .printCheckerOptionList(OS);
+}
+
 void ento::printAnalyzerConfigList(raw_ostream &out) {
   out << "OVERVIEW: Clang Static Analyzer -analyzer-config Option List\n\n";
-  out << "USAGE: clang -cc1 [CLANG_OPTIONS] -analyzer-config "
-                                        "<OPTION1=VALUE,OPTION2=VALUE,...>\n\n";
-  out << "       clang -cc1 [CLANG_OPTIONS] -analyzer-config OPTION1=VALUE, "
-                                      "-analyzer-config OPTION2=VALUE, ...\n\n";
-  out << "       clang [CLANG_OPTIONS] -Xclang -analyzer-config -Xclang"
-                                        "<OPTION1=VALUE,OPTION2=VALUE,...>\n\n";
-  out << "       clang [CLANG_OPTIONS] -Xclang -analyzer-config -Xclang "
-                              "OPTION1=VALUE, -Xclang -analyzer-config -Xclang "
-                              "OPTION2=VALUE, ...\n\n";
+  out << "USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>\n\n";
+  out << "       -analyzer-config OPTION1=VALUE, -analyzer-config "
+         "OPTION2=VALUE, ...\n\n";
   out << "OPTIONS:\n\n";
 
   using OptionAndDescriptionTy = std::pair<StringRef, std::string>;
@@ -109,31 +111,10 @@ void ento::printAnalyzerConfigList(raw_ostream &out) {
     return LHS.first < RHS.first;
   });
 
-  constexpr size_t MinLineWidth = 70;
-  constexpr size_t PadForOpt = 2;
-  constexpr size_t OptionWidth = 30;
-  constexpr size_t PadForDesc = PadForOpt + OptionWidth;
-  static_assert(MinLineWidth > PadForDesc, "MinLineWidth must be greater!");
-
-  llvm::formatted_raw_ostream FOut(out);
-
   for (const auto &Pair : PrintableOptions) {
-    FOut.PadToColumn(PadForOpt) << Pair.first;
-
-    // If the buffer's length is greater then PadForDesc, print a newline.
-    if (FOut.getColumn() > PadForDesc)
-      FOut << '\n';
-
-    FOut.PadToColumn(PadForDesc);
-
-    for (char C : Pair.second) {
-      if (FOut.getColumn() > MinLineWidth && C == ' ') {
-        FOut << '\n';
-        FOut.PadToColumn(PadForDesc);
-        continue;
-      }
-      FOut << C;
-    }
-    FOut << "\n\n";
+    AnalyzerOptions::printFormattedEntry(out, Pair, /*InitialPad*/ 2,
+                                         /*EntryWidth*/ 30,
+                                         /*MinLineWidth*/ 70);
+    out << "\n\n";
   }
 }
diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
index d41ca0a8f32f6..d405933ca65c5 100644
--- a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
@@ -518,17 +518,8 @@ void CheckerRegistry::printCheckerWithDescList(raw_ostream &Out,
     if (!AnOpts.ShowCheckerHelpHidden && Checker.IsHidden)
       continue;
 
-    Out.indent(InitialPad) << Checker.FullName;
-
-    int Pad = OptionFieldWidth - Checker.FullName.size();
-
-    // Break on long option names.
-    if (Pad < 0) {
-      Out << '\n';
-      Pad = OptionFieldWidth + InitialPad;
-    }
-    Out.indent(Pad + 2) << Checker.Desc;
-
+    AnalyzerOptions::printFormattedEntry(Out, {Checker.FullName, Checker.Desc},
+                                         InitialPad, OptionFieldWidth);
     Out << '\n';
   }
 }
@@ -540,3 +531,41 @@ void CheckerRegistry::printEnabledCheckerList(raw_ostream &Out) const {
   for (const auto *i : EnabledCheckers)
     Out << i->FullName << '\n';
 }
+
+void CheckerRegistry::printCheckerOptionList(raw_ostream &Out) const {
+  Out << "OVERVIEW: Clang Static Analyzer Checker and Package Option List\n\n";
+  Out << "USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>\n\n";
+  Out << "       -analyzer-config OPTION1=VALUE, -analyzer-config "
+         "OPTION2=VALUE, ...\n\n";
+  Out << "OPTIONS:\n\n";
+
+  std::multimap<StringRef, const CmdLineOption &> OptionMap;
+
+  for (const CheckerInfo &Checker : Checkers) {
+    for (const CmdLineOption &Option : Checker.CmdLineOptions) {
+      OptionMap.insert({Checker.FullName, Option});
+    }
+  }
+
+  for (const PackageInfo &Package : Packages) {
+    for (const CmdLineOption &Option : Package.CmdLineOptions) {
+      OptionMap.insert({Package.FullName, Option});
+    }
+  }
+
+  for (const std::pair<StringRef, const CmdLineOption &> &Entry : OptionMap) {
+    const CmdLineOption &Option = Entry.second;
+    std::string FullOption = (Entry.first + ":" + Option.OptionName).str();
+
+    std::string Desc =
+        ("(" + Option.OptionType + ") " + Option.Description + " (default: " +
+         (Option.DefaultValStr.empty() ? "\"\"" : Option.DefaultValStr) + ")")
+            .str();
+
+    AnalyzerOptions::printFormattedEntry(Out, {FullOption, Desc},
+                                         /*InitialPad*/ 2,
+                                         /*EntryWidth*/ 50,
+                                         /*MinLineWidth*/ 90);
+    Out << "\n\n";
+  }
+}
diff --git a/clang/test/Analysis/analyzer-checker-option-help.c b/clang/test/Analysis/analyzer-checker-option-help.c
new file mode 100644
index 0000000000000..f59d8515823cc
--- /dev/null
+++ b/clang/test/Analysis/analyzer-checker-option-help.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyzer-checker-option-help 2>&1 | FileCheck %s
+
+// CHECK: OVERVIEW: Clang Static Analyzer Checker and Package Option List
+//
+// CHECK: USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
+//
+// CHECK:        -analyzer-config OPTION1=VALUE, -analyzer-config
+// CHECK-SAME:   OPTION2=VALUE, ...
+//
+// CHECK: OPTIONS:
+//
+// CHECK:   alpha.clone.CloneChecker:MinimumCloneComplexity
+// CHECK-SAME:   (int) Ensures that every clone has at least
+// CHECK:        the given complexity. Complexity is here
+// CHECK:        defined as the total amount of children
+// CHECK:        of a statement. This constraint assumes
+// CHECK:        the first statement in the group is representative
+// CHECK:        for all other statements in the group in
+// CHECK:        terms of complexity. (default: 50)
diff --git a/clang/test/Analysis/analyzer-list-configs.c b/clang/test/Analysis/analyzer-list-configs.c
index a02b2a9a85454..67fa906429bae 100644
--- a/clang/test/Analysis/analyzer-list-configs.c
+++ b/clang/test/Analysis/analyzer-list-configs.c
@@ -1,14 +1,11 @@
 // RUN: %clang_cc1 -analyzer-config-help 2>&1 | FileCheck %s
+
 // CHECK: OVERVIEW: Clang Static Analyzer -analyzer-config Option List
 //
-// CHECK: USAGE: clang -cc1 [CLANG_OPTIONS] -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
-//
-// CHECK:      clang -cc1 [CLANG_OPTIONS] -analyzer-config OPTION1=VALUE, -analyzer-config OPTION2=VALUE, ...
-//
-// CHECK:      clang [CLANG_OPTIONS] -Xclang -analyzer-config -Xclang<OPTION1=VALUE,OPTION2=VALUE,...>
-//
-// CHECK:      clang [CLANG_OPTIONS] -Xclang -analyzer-config -Xclang OPTION1=VALUE, -Xclang -analyzer-config -Xclang OPTION2=VALUE, ...
+// CHECK: USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
 //
+// CHECK:        -analyzer-config OPTION1=VALUE, -analyzer-config
+// CHECK-SAME:   OPTION2=VALUE, ...
 //
 // CHECK: OPTIONS:
 //
diff --git a/clang/test/Analysis/checker-plugins.c b/clang/test/Analysis/checker-plugins.c
index 2dbebfe29d7e0..b5444fa6cbf7f 100644
--- a/clang/test/Analysis/checker-plugins.c
+++ b/clang/test/Analysis/checker-plugins.c
@@ -104,3 +104,12 @@ void caller() {
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-CORRECTED-BOOL-VALUE
 
 // CHECK-CORRECTED-BOOL-VALUE: example.MyChecker:ExampleOption = false
+
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -load %llvmshlibdir/CheckerOptionHandlingAnalyzerPlugin%pluginext\
+// RUN:   -analyzer-checker=example.MyChecker \
+// RUN:   -analyzer-checker-option-help \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-CHECKER-OPTION-HELP
+
+// CHECK-CHECKER-OPTION-HELP: example.MyChecker:ExampleOption  (bool) This is an
+// CHECK-CHECKER-OPTION-HELP-SAME: example checker opt. (default: false)

From 987fdfd9a7197d4d1542817fd6c17b5fbb5856d7 Mon Sep 17 00:00:00 2001
From: Kit Barton <kbarton@ca.ibm.com>
Date: Thu, 23 May 2019 20:53:05 +0000
Subject: [PATCH 0088/1176] Revert     [LOOPINFO] Extend Loop object to add
 utilities to get the loop bounds, step, induction variable, and guard branch.

This reverts r361517 (git commit 2049e4dd8f61100f88f14db33bd95d197bcbfbbc)

llvm-svn: 361553
---
 llvm/include/llvm/Analysis/LoopInfo.h         | 162 ----
 llvm/lib/Analysis/LoopInfo.cpp                | 246 -----
 .../lib/Transforms/Scalar/LoopInterchange.cpp |  29 +-
 llvm/unittests/Analysis/LoopInfoTest.cpp      | 900 ------------------
 4 files changed, 28 insertions(+), 1309 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index dd789de493875..6b964cdf9eae5 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -54,12 +54,9 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
-class InductionDescriptor;
 class MDNode;
 class MemorySSAUpdater;
 class PHINode;
-class PostDominatorTree;
-class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -532,165 +529,6 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
-  /// Below are some utilities to get loop bounds and induction variable, and
-  /// check if a given phinode is an auxiliary induction variable, as well as
-  /// checking if the loop is canonical.
-  ///
-  /// Here is an example:
-  /// \code
-  /// for (int i = lb; i < ub; i+=step)
-  ///   <loop body>
-  /// --- pseudo LLVMIR ---
-  /// beforeloop:
-  ///   guardcmp = (lb < ub)
-  ///   if (guardcmp) goto preheader; else goto afterloop
-  /// preheader:
-  /// loop:
-  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
-  ///   <loop body>
-  ///   i_2 = i_1 + step
-  /// latch:
-  ///   cmp = (i_2 < ub)
-  ///   if (cmp) goto loop
-  /// exit:
-  /// afterloop:
-  /// \endcode
-  ///
-  /// - getBounds
-  ///   - getInitialIVValue      --> lb
-  ///   - getStepInst            --> i_2 = i_1 + step
-  ///   - getStepValue           --> step
-  ///   - getFinalIVValue        --> ub
-  ///   - getCanonicalPredicate  --> '<'
-  ///   - getDirection           --> Increasing
-  ///
-  /// - getInductionVariable            --> i_1
-  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
-  /// - isCanonical                     --> false
-  struct LoopBounds {
-    /// Return the LoopBounds object if
-    /// - the given \p IndVar is an induction variable
-    /// - the initial value of the induction variable can be found
-    /// - the step instruction of the induction variable can be found
-    /// - the final value of the induction variable can be found
-    ///
-    /// Else None.
-    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
-                                                ScalarEvolution &SE);
-
-    /// Get the initial value of the loop induction variable.
-    Value &getInitialIVValue() const { return InitialIVValue; }
-
-    /// Get the instruction that updates the loop induction variable.
-    Instruction &getStepInst() const { return StepInst; }
-
-    /// Get the step that the loop induction variable gets updated by in each
-    /// loop iteration. Return nullptr if not found.
-    Value *getStepValue() const { return StepValue; }
-
-    /// Get the final value of the loop induction variable.
-    Value &getFinalIVValue() const { return FinalIVValue; }
-
-    /// Return the canonical predicate for the latch compare instruction, if
-    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
-    ///
-    /// A predicate is considered as canonical if requirements below are all
-    /// satisfied:
-    /// 1. The first successor of the latch branch is the loop header
-    ///    If not, inverse the predicate.
-    /// 2. One of the operands of the latch comparison is StepInst
-    ///    If not, and
-    ///    - if the current calcuated predicate is not ne or eq, flip the
-    ///      predicate.
-    ///    - else if the loop is increasing, return slt
-    ///      (notice that it is safe to change from ne or eq to sign compare)
-    ///    - else if the loop is decreasing, return sgt
-    ///      (notice that it is safe to change from ne or eq to sign compare)
-    ///
-    /// Here is an example when both (1) and (2) are not satisfied:
-    /// \code
-    /// loop.header:
-    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
-    ///  %inc = add %iv, %step
-    ///  %cmp = slt %iv, %finaliv
-    ///  br %cmp, %loop.exit, %loop.header
-    /// loop.exit:
-    /// \endcode
-    /// - The second successor of the latch branch is the loop header instead
-    ///   of the first successor (slt -> sge)
-    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
-    ///   instead of the StepInst (%inc) (sge -> sgt)
-    ///
-    /// The predicate would be sgt if both (1) and (2) are satisfied.
-    /// getCanonicalPredicate() returns sgt for this example.
-    /// Note: The IR is not changed.
-    ICmpInst::Predicate getCanonicalPredicate() const;
-
-    /// An enum for the direction of the loop
-    /// - for (int i = 0; i < ub; ++i)  --> Increasing
-    /// - for (int i = ub; i > 0; --i)  --> Descresing
-    /// - for (int i = x; i != y; i+=z) --> Unknown
-    enum class Direction { Increasing, Decreasing, Unknown };
-
-    /// Get the direction of the loop.
-    Direction getDirection() const;
-
-  private:
-    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
-               ScalarEvolution &SE)
-        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
-          FinalIVValue(F), SE(SE) {}
-
-    const Loop &L;
-
-    // The initial value of the loop induction variable
-    Value &InitialIVValue;
-
-    // The instruction that updates the loop induction variable
-    Instruction &StepInst;
-
-    // The value that the loop induction variable gets updated by in each loop
-    // iteration
-    Value *StepValue;
-
-    // The final value of the loop induction variable
-    Value &FinalIVValue;
-
-    ScalarEvolution &SE;
-  };
-
-  /// Return the struct LoopBounds collected if all struct members are found,
-  /// else None.
-  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
-
-  /// Return the loop induction variable if found, else return nullptr.
-  /// An instruction is considered as the loop induction variable if
-  /// - it is an induction variable of the loop; and
-  /// - it is used to determine the condition of the branch in the loop latch
-  ///
-  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
-  /// zero and increments by one each time through the loop (but it can be).
-  PHINode *getInductionVariable(ScalarEvolution &SE) const;
-
-  /// Get the loop induction descriptor for the loop induction variable. Return
-  /// true if the loop induction variable is found.
-  bool getInductionDescriptor(ScalarEvolution &SE,
-                              InductionDescriptor &IndDesc) const;
-
-  /// Return true if the given PHINode \p AuxIndVar is
-  /// - in the loop header
-  /// - not used outside of the loop
-  /// - incremented by a loop invariant step for each loop iteration
-  /// - step instruction opcode should be add or sub
-  /// Note: auxiliary induction variable is not required to be used in the
-  ///       conditional branch in the loop latch. (but it can be)
-  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
-                                    ScalarEvolution &SE) const;
-
-  /// Return true if the loop induction variable starts at zero and increments
-  /// by one each time through the loop.
-  bool isCanonical(ScalarEvolution &SE) const;
-
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 50e08e994876a..aa933d98f249b 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,13 +17,10 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -167,249 +164,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
-/// Return true if V1 and V2 have the same value ignoring bit width.
-static bool isEqualIgnoreBitwidth(Value &V1, Value &V2, ScalarEvolution &SE) {
-  const SCEV *S1 = SE.getSCEV(&V1);
-  const SCEV *S2 = SE.getSCEV(&V2);
-  Type *WiderType = SE.getWiderType(S1->getType(), S2->getType());
-  S1 = SE.getNoopOrAnyExtend(S1, WiderType);
-  S2 = SE.getNoopOrAnyExtend(S2, WiderType);
-  return SE.getMinusSCEV(S1, S2)->isZero();
-}
-
-/// Get the latch condition instruction.
-static ICmpInst *getLatchCmpInst(const Loop &L) {
-  if (BasicBlock *Latch = L.getLoopLatch())
-    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
-      if (BI->isConditional())
-        return dyn_cast<ICmpInst>(BI->getCondition());
-
-  return nullptr;
-}
-
-/// Return the final value of the loop induction variable if found.
-static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
-                               const Instruction &StepInst) {
-  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
-  if (!LatchCmpInst)
-    return nullptr;
-
-  Value *Op0 = LatchCmpInst->getOperand(0);
-  Value *Op1 = LatchCmpInst->getOperand(1);
-  if (Op0 == &IndVar || Op0 == &StepInst)
-    return Op1;
-
-  if (Op1 == &IndVar || Op1 == &StepInst)
-    return Op0;
-
-  return nullptr;
-}
-
-Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
-                                                       PHINode &IndVar,
-                                                       ScalarEvolution &SE) {
-  InductionDescriptor IndDesc;
-  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
-    return None;
-
-  Value *InitialIVValue = IndDesc.getStartValue();
-  Instruction *StepInst = IndDesc.getInductionBinOp();
-  if (!InitialIVValue || !StepInst)
-    return None;
-
-  const SCEV *Step = IndDesc.getStep();
-  Value *StepInstOp1 = StepInst->getOperand(1);
-  Value *StepInstOp0 = StepInst->getOperand(0);
-  Value *StepValue = nullptr;
-  if (SE.getSCEV(StepInstOp1) == Step)
-    StepValue = StepInstOp1;
-  else if (SE.getSCEV(StepInstOp0) == Step)
-    StepValue = StepInstOp0;
-
-  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
-  if (!FinalIVValue)
-    return None;
-
-  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
-                    SE);
-}
-
-using Direction = Loop::LoopBounds::Direction;
-
-ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
-  BasicBlock *Latch = L.getLoopLatch();
-  assert(Latch && "Expecting valid latch");
-
-  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
-  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
-
-  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
-  assert(LatchCmpInst &&
-         "Expecting the latch compare instruction to be a CmpInst");
-
-  // Need to inverse the predicate when first successor is not the loop
-  // header
-  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
-                                 ? LatchCmpInst->getPredicate()
-                                 : LatchCmpInst->getInversePredicate();
-
-  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
-    Pred = ICmpInst::getSwappedPredicate(Pred);
-
-  // Need to flip strictness of the predicate when the latch compare instruction
-  // is not using StepInst
-  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
-      LatchCmpInst->getOperand(1) == &getStepInst())
-    return Pred;
-
-  // Cannot flip strictness of NE and EQ
-  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
-    return ICmpInst::getFlippedStrictnessPredicate(Pred);
-
-  Direction D = getDirection();
-  if (D == Direction::Increasing)
-    return ICmpInst::ICMP_SLT;
-
-  if (D == Direction::Decreasing)
-    return ICmpInst::ICMP_SGT;
-
-  // If cannot determine the direction, then unable to find the canonical
-  // predicate
-  return ICmpInst::BAD_ICMP_PREDICATE;
-}
-
-Direction Loop::LoopBounds::getDirection() const {
-  if (const SCEVAddRecExpr *StepAddRecExpr =
-          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
-    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
-      if (SE.isKnownPositive(StepRecur))
-        return Direction::Increasing;
-      if (SE.isKnownNegative(StepRecur))
-        return Direction::Decreasing;
-    }
-
-  return Direction::Unknown;
-}
-
-Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
-  if (PHINode *IndVar = getInductionVariable(SE))
-    return LoopBounds::getBounds(*this, *IndVar, SE);
-
-  return None;
-}
-
-PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
-  if (!isLoopSimplifyForm())
-    return nullptr;
-
-  BasicBlock *Header = getHeader();
-  assert(Header && "Expected a valid loop header");
-  BasicBlock *Latch = getLoopLatch();
-  assert(Latch && "Expected a valid loop latch");
-  ICmpInst *CmpInst = getLatchCmpInst(*this);
-  if (!CmpInst)
-    return nullptr;
-
-  // case 1:
-  // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
-  // StepInst = IndVar + step
-  // cmp = StepInst < FinalValue
-  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
-  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
-  // Loop over all of the PHI nodes in loop header, store the PHI node that has
-  // incoming value from latch equals to the StepInst
-  BinaryOperator *StepInst = nullptr;
-  PHINode *IndVar = nullptr;
-  for (PHINode &PN : Header->phis()) {
-    Value *IncomingValue = PN.getIncomingValueForBlock(Latch);
-    assert(IncomingValue && "Expecting valid incoming value from latch");
-    if (IncomingValue == LatchCmpOp0 || IncomingValue == LatchCmpOp1) {
-      IndVar = &PN;
-      StepInst = dyn_cast<BinaryOperator>(IncomingValue);
-      if (StepInst)
-        if (isEqualIgnoreBitwidth(*StepInst->getOperand(0), *IndVar, SE) ||
-            isEqualIgnoreBitwidth(*StepInst->getOperand(1), *IndVar, SE))
-          return IndVar;
-    }
-  }
-
-  // case 2:
-  // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
-  // StepInst = IndVar + step
-  // cmp = IndVar < FinalValue
-  for (Value *Op : CmpInst->operands()) {
-    PHINode *IndVar = dyn_cast<PHINode>(Op);
-    if (!IndVar)
-      continue;
-
-    if (IndVar->getParent() != Header)
-      continue;
-
-    Value *IncomingValue = IndVar->getIncomingValueForBlock(Latch);
-    assert(IncomingValue && "Expecting valid incoming value from latch");
-    StepInst = dyn_cast<BinaryOperator>(IncomingValue);
-    if (StepInst)
-      if (StepInst->getOperand(0) == IndVar ||
-          StepInst->getOperand(1) == IndVar)
-        return IndVar;
-  }
-
-  return nullptr;
-}
-
-bool Loop::getInductionDescriptor(ScalarEvolution &SE,
-                                  InductionDescriptor &IndDesc) const {
-  if (PHINode *IndVar = getInductionVariable(SE))
-    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
-
-  return false;
-}
-
-bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
-                                        ScalarEvolution &SE) const {
-  // Located in the loop header
-  BasicBlock *Header = getHeader();
-  if (AuxIndVar.getParent() != Header)
-    return false;
-
-  // No uses outside of the loop
-  for (User *U : AuxIndVar.users())
-    if (const Instruction *I = dyn_cast<Instruction>(U))
-      if (!contains(I))
-        return false;
-
-  InductionDescriptor IndDesc;
-  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
-    return false;
-
-  // The step instruction opcode should be add or sub.
-  if (IndDesc.getInductionOpcode() != Instruction::Add &&
-      IndDesc.getInductionOpcode() != Instruction::Sub)
-    return false;
-
-  // Incremented by a loop invariant step for each loop iteration
-  return SE.isLoopInvariant(IndDesc.getStep(), this);
-}
-
-bool Loop::isCanonical(ScalarEvolution &SE) const {
-  InductionDescriptor IndDesc;
-  if (!getInductionDescriptor(SE, IndDesc))
-    return false;
-
-  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
-  if (!Init || !Init->isZero())
-    return false;
-
-  if (IndDesc.getInductionOpcode() != Instruction::Add)
-    return false;
-
-  ConstantInt *Step = IndDesc.getConstIntStepValue();
-  if (!Step || !Step->isOne())
-    return false;
-
-  return true;
-}
-
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index ad7113cb0e9a0..bec5af584f438 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -292,6 +292,33 @@ static LoopVector populateWorklist(Loop &L) {
   return LoopList;
 }
 
+static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
+  PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
+  if (InnerIndexVar)
+    return InnerIndexVar;
+  if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
+    return nullptr;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PhiVar = cast<PHINode>(I);
+    Type *PhiTy = PhiVar->getType();
+    if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+        !PhiTy->isPointerTy())
+      return nullptr;
+    const SCEVAddRecExpr *AddRec =
+        dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
+    if (!AddRec || !AddRec->isAffine())
+      continue;
+    const SCEV *Step = AddRec->getStepRecurrence(*SE);
+    if (!isa<SCEVConstant>(Step))
+      continue;
+    // Found the induction variable.
+    // FIXME: Handle loops with more than one induction variable. Note that,
+    // currently, legality makes sure we have only one induction variable.
+    return PhiVar;
+  }
+  return nullptr;
+}
+
 namespace {
 
 /// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -1200,7 +1227,7 @@ bool LoopInterchangeTransform::transform() {
   if (InnerLoop->getSubLoops().empty()) {
     BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
     LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n");
-    PHINode *InductionPHI = InnerLoop->getInductionVariable(*SE);
+    PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
     if (!InductionPHI) {
       LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
       return false;
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 005e1dc405b75..483532a187527 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -7,10 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/SourceMgr.h"
@@ -30,26 +26,6 @@ runWithLoopInfo(Module &M, StringRef FuncName,
   Test(*F, LI);
 }
 
-/// Build the loop info and scalar evolution for the function and run the Test.
-static void runWithLoopInfoPlus(
-    Module &M, StringRef FuncName,
-    function_ref<void(Function &F, LoopInfo &LI, ScalarEvolution &SE,
-                      PostDominatorTree &PDT)>
-        Test) {
-  auto *F = M.getFunction(FuncName);
-  ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
-
-  TargetLibraryInfoImpl TLII;
-  TargetLibraryInfo TLI(TLII);
-  AssumptionCache AC(*F);
-  DominatorTree DT(*F);
-  LoopInfo LI(DT);
-  ScalarEvolution SE(*F, TLI, AC, DT, LI);
-
-  PostDominatorTree PDT(*F);
-  Test(*F, LI, SE, PDT);
-}
-
 static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
                                               const char *ModuleStr) {
   SMDiagnostic Err;
@@ -234,879 +210,3 @@ TEST(LoopInfoTest, PreorderTraversals) {
   EXPECT_EQ(&L_0_1, ReverseSiblingPreorder[6]);
   EXPECT_EQ(&L_0_0, ReverseSiblingPreorder[7]);
 }
-
-TEST(LoopInfoTest, CanonicalLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithInverseGuardSuccs) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp sge i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.end, label %for.preheader\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithSwappedGuardCmp) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp sgt i32 %ub, 0\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp sge i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.exit, label %for.body\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithInverseLatchSuccs) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp sge i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.exit, label %for.body\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithLatchCmpNE) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp ne i32 %i, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithGuardCmpSLE) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %ubPlusOne = add i32 %ub, 1\n"
-      "  %guardcmp = icmp sle i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp ne i32 %i, %ubPlusOne\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ubPlusOne");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopNonConstantStep) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = zext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, %step\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(), Loop::LoopBounds::Direction::Unknown);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopUnsignedBounds) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp ult i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = zext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add i32 %i, 1\n"
-      "  %cmp = icmp ult i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_ULT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, DecreasingLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ %ub, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = sub nsw i32 %i, 1\n"
-      "  %cmp = icmp sgt i32 %inc, 0\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        EXPECT_EQ(Bounds->getInitialIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_EQ(StepValue, nullptr);
-        ConstantInt *FinalIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getFinalIVValue());
-        EXPECT_TRUE(FinalIVValue && FinalIVValue->isZero());
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SGT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Decreasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, CannotFindDirection) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, %step\n"
-      "  %cmp = icmp ne i32 %i, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(*M, "foo",
-                      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-                          PostDominatorTree &PDT) {
-                        Function::iterator FI = F.begin();
-                        // First two basic block are entry and for.preheader
-                        // - skip them.
-                        ++FI;
-                        BasicBlock *Header = &*(++FI);
-                        assert(Header->getName() == "for.body");
-                        Loop *L = LI.getLoopFor(Header);
-                        EXPECT_NE(L, nullptr);
-
-                        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-                        EXPECT_NE(Bounds, None);
-                        ConstantInt *InitialIVValue =
-                            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-                        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-                        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-                        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
-                        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-                        EXPECT_EQ(Bounds->getCanonicalPredicate(),
-                                  ICmpInst::BAD_ICMP_PREDICATE);
-                        EXPECT_EQ(Bounds->getDirection(),
-                                  Loop::LoopBounds::Direction::Unknown);
-                        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-                      });
-}
-
-TEST(LoopInfoTest, ZextIndVar) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %for.body ]\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %wide.trip.count = zext i32 %ub to i64\n"
-      "  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count\n"
-      "  br i1 %exitcond, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "indvars.iv.next");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "wide.trip.count");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_NE);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "indvars.iv");
-      });
-}
-
-TEST(LoopInfoTest, UnguardedLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First basic block is entry - skip it.
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, UnguardedLoopWithControlFlow) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i1 %cond) {\n"
-      "entry:\n"
-      "  br i1 %cond, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopNest) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.outer.preheader, label %for.end\n"
-      "for.outer.preheader:\n"
-      "  br label %for.outer\n"
-      "for.outer:\n"
-      "  %j = phi i32 [ 0, %for.outer.preheader ], [ %inc.outer, %for.outer.latch ]\n"
-      "  br i1 %guardcmp, label %for.inner.preheader, label %for.outer.latch\n"
-      "for.inner.preheader:\n"
-      "  br label %for.inner\n"
-      "for.inner:\n"
-      "  %i = phi i32 [ 0, %for.inner.preheader ], [ %inc, %for.inner ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.inner, label %for.inner.exit\n"
-      "for.inner.exit:\n"
-      "  br label %for.outer.latch\n"
-      "for.outer.latch:\n"
-      "  %inc.outer = add nsw i32 %j, 1\n"
-      "  %cmp.outer = icmp slt i32 %inc.outer, %ub\n"
-      "  br i1 %cmp.outer, label %for.outer, label %for.outer.exit\n"
-      "for.outer.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.outer.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.outer");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc.outer");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "j");
-
-        // Next two basic blocks are for.outer and for.inner.preheader - skip
-        // them.
-        ++FI;
-        Header = &*(++FI);
-        assert(Header->getName() == "for.inner");
-        L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> InnerBounds = L->getBounds(SE);
-        EXPECT_NE(InnerBounds, None);
-        InitialIVValue =
-            dyn_cast<ConstantInt>(&InnerBounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(InnerBounds->getStepInst().getName(), "inc");
-        StepValue = dyn_cast_or_null<ConstantInt>(InnerBounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(InnerBounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(InnerBounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(InnerBounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, AuxiliaryIV) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %aux = phi i32 [ 0, %for.preheader ], [ %auxinc, %for.body ]\n"
-      "  %loopvariant = phi i32 [ 0, %for.preheader ], [ %loopvariantinc, %for.body ]\n"
-      "  %usedoutside = phi i32 [ 0, %for.preheader ], [ %usedoutsideinc, %for.body ]\n"
-      "  %mulopcode = phi i32 [ 0, %for.preheader ], [ %mulopcodeinc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %mulopcodeinc = mul nsw i32 %mulopcode, 5\n"
-      "  %usedoutsideinc = add nsw i32 %usedoutside, 5\n"
-      "  %loopvariantinc = add nsw i32 %loopvariant, %i\n"
-      "  %auxinc = add nsw i32 %aux, 5\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  %lcssa = phi i32 [ %usedoutside, %for.body ]\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-        BasicBlock::iterator II = Header->begin();
-        PHINode &Instruction_i = cast<PHINode>(*(II));
-        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_i, SE));
-        PHINode &Instruction_aux = cast<PHINode>(*(++II));
-        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_aux, SE));
-        PHINode &Instruction_loopvariant = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_loopvariant, SE));
-        PHINode &Instruction_usedoutside = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_usedoutside, SE));
-        PHINode &Instruction_mulopcode = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
-      });
-}

From f53c502e0b5abb5a72bea5f7081baa45a8a99639 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 23 May 2019 21:04:01 +0000
Subject: [PATCH 0089/1176] [TTI] Fix some typos in comments. NFC

'implementaion' -> 'implementation'
'non-unform' -> 'non-uniform'
'mimimum' -> 'minimum'

Patch by Pavel Samolysov

Differential Revision: https://reviews.llvm.org/D62136

llvm-svn: 361554
---
 llvm/include/llvm/Analysis/TargetTransformInfo.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 75f07989785b5..5a94bfad7e037 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -80,7 +80,7 @@ class TargetTransformInfo {
   /// API below.
   ///
   /// This is used by targets to construct a TTI wrapping their target-specific
-  /// implementaion that encodes appropriate costs for their target.
+  /// implementation that encodes appropriate costs for their target.
   template <typename T> TargetTransformInfo(T Impl);
 
   /// Construct a baseline TTI object using a minimal implementation of
@@ -246,7 +246,7 @@ class TargetTransformInfo {
                        ArrayRef<const Value *> Arguments,
                        const User *U = nullptr) const;
 
-  /// \Return the expected cost of a memcpy, which could e.g. depend on the
+  /// \return the expected cost of a memcpy, which could e.g. depend on the
   /// source/destination type and alignment and the number of bytes copied.
   int getMemcpyCost(const Instruction *I) const;
 
@@ -304,7 +304,7 @@ class TargetTransformInfo {
 
   // Returns true for the target specific
   // set of operations which produce uniform result
-  // even taking non-unform arguments
+  // even taking non-uniform arguments
   bool isAlwaysUniform(const Value *V) const;
 
   /// Returns the address space ID for a target's 'flat' address space. Note
@@ -716,7 +716,7 @@ class TargetTransformInfo {
   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
 
   /// \return The minimum vectorization factor for types of given element
-  /// bit width, or 0 if there is no mimimum VF. The returned value only
+  /// bit width, or 0 if there is no minimum VF. The returned value only
   /// applies when shouldMaximizeVectorBandwidth returns true.
   unsigned getMinimumVF(unsigned ElemWidth) const;
 

From b4cb7d8045e38b7fcca3cf6da254b2a1ba7b5710 Mon Sep 17 00:00:00 2001
From: "J. Ryan Stinnett" <jryans@gmail.com>
Date: Thu, 23 May 2019 21:13:50 +0000
Subject: [PATCH 0090/1176] [NFC] Add blank line (test commit)

llvm-svn: 361555
---
 lldb/tools/debugserver/source/debugserver.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/tools/debugserver/source/debugserver.cpp b/lldb/tools/debugserver/source/debugserver.cpp
index 5a0681847a8e1..0698d69375f6c 100644
--- a/lldb/tools/debugserver/source/debugserver.cpp
+++ b/lldb/tools/debugserver/source/debugserver.cpp
@@ -490,6 +490,7 @@ RNBRunLoopMode HandleProcessStateChange(RNBRemote *remote, bool initialize) {
   // Catch all...
   return eRNBRunLoopModeExit;
 }
+
 // This function handles the case where our inferior program is stopped and
 // we are waiting for gdb remote protocol packets. When a packet occurs that
 // makes the inferior run, we need to leave this function with a new state

From ca6a8ae0bffe88f3f0974316b2408b2548ff6f77 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 23 May 2019 21:30:30 +0000
Subject: [PATCH 0091/1176] ELF: Remove a comparison against In.EhFrame. NFCI.

This won't work once we have multiple .eh_frame sections.

Differential Revision: https://reviews.llvm.org/D62280

llvm-svn: 361556
---
 lld/ELF/MapFile.cpp         | 9 +++++----
 lld/ELF/SyntheticSections.h | 4 ++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 11720e196d3a9..2f1921ec9837d 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -106,7 +106,7 @@ getSymbolStrings(ArrayRef<Defined *> Syms) {
 // .eh_frame tend to contain a lot of section pieces that are contiguous
 // both in input file and output file. Such pieces are squashed before
 // being displayed to make output compact.
-static void printEhFrame(raw_ostream &OS, OutputSection *OSec) {
+static void printEhFrame(raw_ostream &OS, const EhFrameSection *Sec) {
   std::vector<EhSectionPiece> Pieces;
 
   auto Add = [&](const EhSectionPiece &P) {
@@ -123,13 +123,14 @@ static void printEhFrame(raw_ostream &OS, OutputSection *OSec) {
   };
 
   // Gather section pieces.
-  for (const CieRecord *Rec : In.EhFrame->getCieRecords()) {
+  for (const CieRecord *Rec : Sec->getCieRecords()) {
     Add(*Rec->Cie);
     for (const EhSectionPiece *Fde : Rec->Fdes)
       Add(*Fde);
   }
 
   // Print out section pieces.
+  const OutputSection *OSec = Sec->getOutputSection();
   for (EhSectionPiece &P : Pieces) {
     writeHeader(OS, OSec->Addr + P.OutputOff, OSec->getLMA() + P.OutputOff,
                 P.Size, 1);
@@ -179,8 +180,8 @@ void elf::writeMapFile() {
     for (BaseCommand *Base : OSec->SectionCommands) {
       if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) {
         for (InputSection *IS : ISD->Sections) {
-          if (IS == In.EhFrame) {
-            printEhFrame(OS, OSec);
+          if (auto *EhSec = dyn_cast<EhFrameSection>(IS)) {
+            printEhFrame(OS, EhSec);
             continue;
           }
 
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index e848d0d76ab6a..58a01ea6c54cb 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -69,6 +69,10 @@ class EhFrameSection final : public SyntheticSection {
   bool isNeeded() const override { return !Sections.empty(); }
   size_t getSize() const override { return Size; }
 
+  static bool classof(const SectionBase *D) {
+    return SyntheticSection::classof(D) && D->Name == ".eh_frame";
+  }
+
   template <class ELFT> void addSection(InputSectionBase *S);
 
   std::vector<EhInputSection *> Sections;

From a85c0fd918749fe88c3a3bbc5ca23061e88b4536 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 23 May 2019 21:34:36 +0000
Subject: [PATCH 0092/1176] [X86] Split multi-line chained assignments into
 single lines to avoid making clang-format create triangle shaped indentation.
 Simplify one if statement to remove a bunch of string matches. NFCI

We had an if statement that checked over every avx512* feature to see if it should enabled avx512f. Since they are all prefixed with avx512 just check for that instead.

llvm-svn: 361557
---
 clang/lib/Basic/Targets/X86.cpp | 50 ++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 44f5fbf5a7d14..7bef7ce9c66e2 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -451,7 +451,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
   if (Enabled) {
     switch (Level) {
     case AVX512F:
-      Features["avx512f"] = Features["fma"] = Features["f16c"] = true;
+      Features["avx512f"] = true;
+      Features["fma"] = true;
+      Features["f16c"] = true;
       LLVM_FALLTHROUGH;
     case AVX2:
       Features["avx2"] = true;
@@ -490,8 +492,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
     Features["sse"] = false;
     LLVM_FALLTHROUGH;
   case SSE2:
-    Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] =
-        Features["gfni"] = false;
+    Features["sse2"] = Features["pclmul"] = Features["aes"] = false;
+    Features["sha"] = Features["gfni"] = false;
     LLVM_FALLTHROUGH;
   case SSE3:
     Features["sse3"] = false;
@@ -507,21 +509,21 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
     Features["sse4.2"] = false;
     LLVM_FALLTHROUGH;
   case AVX:
-    Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
-        Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;
+    Features["fma"] = Features["avx"] = Features["f16c"] = false;
+    Features["xsave"] = Features["xsaveopt"] = Features["vaes"] = false;
+    Features["vpclmulqdq"] = false;
     setXOPLevel(Features, FMA4, false);
     LLVM_FALLTHROUGH;
   case AVX2:
     Features["avx2"] = false;
     LLVM_FALLTHROUGH;
   case AVX512F:
-    Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
-        Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
-            Features["avx512vl"] = Features["avx512vbmi"] =
-                Features["avx512ifma"] = Features["avx512vpopcntdq"] =
-                    Features["avx512bitalg"] = Features["avx512vnni"] =
-                        Features["avx512vbmi2"] = false;
-                        Features["avx512bf16"] = false;
+    Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = false;
+    Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = false;
+    Features["avx512vl"] = Features["avx512vbmi"] = false;
+    Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
+    Features["avx512bitalg"] = Features["avx512vnni"] = false;
+    Features["avx512vbmi2"] = Features["avx512bf16"] = false;
     break;
   }
 }
@@ -649,24 +651,20 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
     setSSELevel(Features, AVX2, Enabled);
   } else if (Name == "avx512f") {
     setSSELevel(Features, AVX512F, Enabled);
-  } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
-             Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
-             Name == "avx512vbmi" || Name == "avx512ifma" ||
-             Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
-             Name == "avx512bf16" ||
-             Name == "avx512vnni" || Name == "avx512vbmi2") {
+  } else if (Name.startswith("avx512")) {
     if (Enabled)
       setSSELevel(Features, AVX512F, Enabled);
-    // Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
-    if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
+    // Enable BWI instruction if certain features are being enabled.
+    if ((Name == "avx512vbmi" || Name == "avx512vbmi2" ||
+         Name == "avx512bitalg" || Name == "avx512bf16") && Enabled)
       Features["avx512bw"] = true;
-    if (Name == "avx512bf16" && Enabled)
-      Features["avx512bw"] = true;
-    // Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
-    if (Name == "avx512bw" && !Enabled)
-      Features["avx512vbmi"] = Features["avx512vbmi2"] =
-      Features["avx512bf16"] =
+    // Also disable some features if BWI is being disabled.
+    if (Name == "avx512bw" && !Enabled) {
+      Features["avx512vbmi"] = false;
+      Features["avx512vbmi2"] = false;
       Features["avx512bitalg"] = false;
+      Features["avx512bf16"] = false;
+    }
   } else if (Name == "fma") {
     if (Enabled)
       setSSELevel(Features, AVX, Enabled);

From 5bc40d9b188bb43e2aafafe58d8d169cc7c9b4f1 Mon Sep 17 00:00:00 2001
From: Kristof Umann <kristof.umann@ericsson.com>
Date: Thu, 23 May 2019 21:46:51 +0000
Subject: [PATCH 0093/1176] [analyzer] List checkers in 3 categories: released,
 alpha, developer

Previously, the only way to display the list of available checkers was
to invoke the analyzer with -analyzer-checker-help frontend flag. This
however wasn't really great from a maintainer standpoint: users came
across checkers meant strictly for development purposes that weren't to
be tinkered with, or those that were still in development. This patch
creates a clearer division in between these categories.

From now on, we'll have 3 flags to display the list checkers. These
lists are mutually exclusive and can be used in any combination (for
example to display both stable and alpha checkers).

-analyzer-checker-help: Displays the list for stable, production ready
                        checkers.

-analyzer-checker-help-alpha: Displays the list for in development
                              checkers. Enabling is discouraged
                              for non-development purposes.

-analyzer-checker-help-developer: Modeling and debug checkers. Modeling
                                  checkers shouldn't be enabled/disabled
                                  by hand, and debug checkers shouldn't
                                  be touched by users.

Differential Revision: https://reviews.llvm.org/D62093

llvm-svn: 361558
---
 clang/include/clang/Driver/CC1Options.td      | 11 +++-
 .../StaticAnalyzer/Checkers/CheckerBase.td    |  4 +-
 .../StaticAnalyzer/Core/AnalyzerOptions.h     | 16 ++---
 clang/lib/Frontend/CompilerInvocation.cpp     |  4 +-
 .../ExecuteCompilerInvocation.cpp             |  3 +-
 .../Frontend/CheckerRegistry.cpp              | 31 +++++++--
 clang/test/Analysis/show-checker-list.c       | 63 ++++++++++++++++---
 7 files changed, 107 insertions(+), 25 deletions(-)

diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index f7da3746bd4f3..7605b3fc131ac 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -130,9 +130,14 @@ def analyzer_disable_all_checks : Flag<["-"], "analyzer-disable-all-checks">,
 def analyzer_checker_help : Flag<["-"], "analyzer-checker-help">,
   HelpText<"Display the list of analyzer checkers that are available">;
 
-def analyzer_checker_help_hidden : Flag<["-"], "analyzer-checker-help-hidden">,
-  HelpText<"Display the list of analyzer checkers that are available, "
-           "including modeling checkers">;
+def analyzer_checker_help_alpha : Flag<["-"], "analyzer-checker-help-alpha">,
+  HelpText<"Display the list of in development analyzer checkers. These "
+           "are NOT considered safe, they are unstable and will emit incorrect "
+           "reports. Enable ONLY FOR DEVELOPMENT purposes">;
+
+def analyzer_checker_help_developer : Flag<["-"], "analyzer-checker-help-developer">,
+  HelpText<"Display the list of developer-only checkers such as modeling "
+           "and debug checkers">;
 
 def analyzer_config_help : Flag<["-"], "analyzer-config-help">,
   HelpText<"Display the list of -analyzer-config options">;
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
index c381d4b13ecd4..3c7c6fe9b2abc 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
@@ -111,6 +111,6 @@ class Dependencies<list<Checker> Deps = []> {
   list<Checker> Dependencies = Deps;
 }
 
-/// Marks a checker or a package hidden. Hidden entries won't be displayed in
-/// -analyzer-checker-help, which is desirable for alpha or modeling checkers.
+/// Marks a checker or a package hidden. Hidden entries are meant for developers
+/// only, and aren't exposed to end users.
 class Hidden { bit Hidden = 1; }
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 6a54e157e88e1..1c45ffdff89af 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -220,7 +220,8 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   unsigned DisableAllChecks : 1;
 
   unsigned ShowCheckerHelp : 1;
-  unsigned ShowCheckerHelpHidden : 1;
+  unsigned ShowCheckerHelpAlpha : 1;
+  unsigned ShowCheckerHelpDeveloper : 1;
   unsigned ShowEnabledCheckerList : 1;
   unsigned ShowCheckerOptionList : 1;
   unsigned ShowConfigOptionsList : 1;
@@ -285,12 +286,13 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
 
   AnalyzerOptions()
       : DisableAllChecks(false), ShowCheckerHelp(false),
-        ShowCheckerHelpHidden(false), ShowEnabledCheckerList(false),
-        ShowCheckerOptionList(false), ShowConfigOptionsList(false),
-        AnalyzeAll(false), AnalyzerDisplayProgress(false),
-        AnalyzeNestedBlocks(false), eagerlyAssumeBinOpBifurcation(false),
-        TrimGraph(false), visualizeExplodedGraphWithGraphViz(false),
-        UnoptimizedCFG(false), PrintStats(false), NoRetryExhausted(false) {
+        ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false),
+        ShowEnabledCheckerList(false), ShowCheckerOptionList(false),
+        ShowConfigOptionsList(false), AnalyzeAll(false),
+        AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false),
+        eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
+        visualizeExplodedGraphWithGraphViz(false), UnoptimizedCFG(false),
+        PrintStats(false), NoRetryExhausted(false) {
     llvm::sort(AnalyzerConfigCmdFlags);
   }
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 877e70b6616cb..34693af8f4fd3 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -285,7 +285,9 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
   }
 
   Opts.ShowCheckerHelp = Args.hasArg(OPT_analyzer_checker_help);
-  Opts.ShowCheckerHelpHidden = Args.hasArg(OPT_analyzer_checker_help_hidden);
+  Opts.ShowCheckerHelpAlpha = Args.hasArg(OPT_analyzer_checker_help_alpha);
+  Opts.ShowCheckerHelpDeveloper =
+      Args.hasArg(OPT_analyzer_checker_help_developer);
   Opts.ShowCheckerOptionList = Args.hasArg(OPT_analyzer_checker_option_help);
   Opts.ShowConfigOptionsList = Args.hasArg(OPT_analyzer_config_help);
   Opts.ShowEnabledCheckerList = Args.hasArg(OPT_analyzer_list_enabled_checkers);
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 27690be777b8d..ea720c83c2933 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -238,7 +238,8 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
 
   AnalyzerOptions &AnOpts = *Clang->getAnalyzerOpts();
   // Honor -analyzer-checker-help and -analyzer-checker-help-hidden.
-  if (AnOpts.ShowCheckerHelp || AnOpts.ShowCheckerHelpHidden) {
+  if (AnOpts.ShowCheckerHelp || AnOpts.ShowCheckerHelpAlpha ||
+      AnOpts.ShowCheckerHelpDeveloper) {
     ento::printCheckerHelp(llvm::outs(),
                            Clang->getFrontendOpts().Plugins,
                            AnOpts,
diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
index d405933ca65c5..5f96389e597b4 100644
--- a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
@@ -514,13 +514,36 @@ void CheckerRegistry::printCheckerWithDescList(raw_ostream &Out,
   }
 
   const size_t InitialPad = 2;
-  for (const auto &Checker : Checkers) {
-    if (!AnOpts.ShowCheckerHelpHidden && Checker.IsHidden)
-      continue;
 
-    AnalyzerOptions::printFormattedEntry(Out, {Checker.FullName, Checker.Desc},
+  auto Print = [=](llvm::raw_ostream &Out, const CheckerInfo &Checker,
+                   StringRef Description) {
+    AnalyzerOptions::printFormattedEntry(Out, {Checker.FullName, Description},
                                          InitialPad, OptionFieldWidth);
     Out << '\n';
+  };
+
+  for (const auto &Checker : Checkers) {
+    // The order of this if branches is significant, we wouldn't like to display
+    // developer checkers even in the alpha output. For example,
+    // alpha.cplusplus.IteratorModeling is a modeling checker, hence it's hidden
+    // by default, and users (even when the user is a developer of an alpha
+    // checker) shouldn't normally tinker with whether they should be enabled.
+
+    if (Checker.IsHidden) {
+      if (AnOpts.ShowCheckerHelpDeveloper)
+        Print(Out, Checker, Checker.Desc);
+      continue;
+    }
+
+    if (Checker.FullName.startswith("alpha")) {
+      if (AnOpts.ShowCheckerHelpAlpha)
+        Print(Out, Checker,
+              ("(Enable only for development!) " + Checker.Desc).str());
+      continue;
+    }
+
+    if (AnOpts.ShowCheckerHelp)
+        Print(Out, Checker, Checker.Desc);
   }
 }
 
diff --git a/clang/test/Analysis/show-checker-list.c b/clang/test/Analysis/show-checker-list.c
index 83ed6e4897943..3d354c338b9b3 100644
--- a/clang/test/Analysis/show-checker-list.c
+++ b/clang/test/Analysis/show-checker-list.c
@@ -1,11 +1,60 @@
 // RUN: %clang_cc1 -analyzer-checker-help \
-// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE
 
-// RUN: %clang_cc1 -analyzer-checker-help-hidden \
-// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-HIDDEN
+// RUN: %clang_cc1 -analyzer-checker-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-ALPHA
 
-// CHECK: core.DivideZero
-// CHECK-HIDDEN: core.DivideZero
+// RUN: %clang_cc1 -analyzer-checker-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-DEVELOPER
 
-// CHECK-NOT: unix.DynamicMemoryModeling
-// CHECK-HIDDEN: unix.DynamicMemoryModeling
+// RUN: %clang_cc1 -analyzer-checker-help-developer \
+// RUN:   -analyzer-checker-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-DEVELOPER-ALPHA
+
+// RUN: %clang_cc1 -analyzer-checker-help \
+// RUN:   -analyzer-checker-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-ALPHA
+
+// RUN: %clang_cc1 -analyzer-checker-help \
+// RUN:   -analyzer-checker-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-DEVELOPER
+
+// RUN: %clang_cc1 -analyzer-checker-help \
+// RUN:   -analyzer-checker-help-alpha \
+// RUN:   -analyzer-checker-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-ALPHA-DEVELOPER
+
+// CHECK-STABLE-NOT:    alpha.unix.Chroot
+// CHECK-DEVELOPER-NOT: alpha.unix.Chroot
+// CHECK-ALPHA:         alpha.unix.Chroot
+
+// Note that alpha.cplusplus.IteratorModeling is not only an alpha, but also a
+// hidden checker. In this case, we'd only like to see it in the developer list.
+// CHECK-ALPHA-NOT: alpha.cplusplus.IteratorModeling
+// CHECK-DEVELOPER: alpha.cplusplus.IteratorModeling
+
+// CHECK-STABLE:        core.DivideZero
+// CHECK-DEVELOPER-NOT: core.DivideZero
+// CHECK-ALPHA-NOT:     core.DivideZero
+
+// CHECK-STABLE-NOT: debug.ConfigDumper
+// CHECK-DEVELOPER:  debug.ConfigDumper
+// CHECK-ALPHA-NOT:  debug.ConfigDumper
+
+
+// CHECK-STABLE-ALPHA:         alpha.unix.Chroot
+// CHECK-DEVELOPER-ALPHA:      alpha.unix.Chroot
+// CHECK-STABLE-DEVELOPER-NOT: alpha.unix.Chroot
+
+// CHECK-STABLE-ALPHA:        core.DivideZero
+// CHECK-DEVELOPER-ALPHA-NOT: core.DivideZero
+// CHECK-STABLE-DEVELOPER:    core.DivideZero
+
+// CHECK-STABLE-ALPHA-NOT: debug.ConfigDumper
+// CHECK-DEVELOPER-ALPHA:  debug.ConfigDumper
+// CHECK-STABLE-DEVELOPER: debug.ConfigDumper
+
+
+// CHECK-STABLE-ALPHA-DEVELOPER: alpha.unix.Chroot
+// CHECK-STABLE-ALPHA-DEVELOPER: core.DivideZero
+// CHECK-STABLE-ALPHA-DEVELOPER: debug.ConfigDumper

From e60cb7d1be4aac850c69486cf69f0c5fe250e3af Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 23 May 2019 21:49:47 +0000
Subject: [PATCH 0094/1176] [InstSimplify] insertelement V, undef, ? --> V

This was part of InstCombine, but it's better placed in
InstSimplify. InstCombine also had an unreachable but weaker
fold for insertelement with undef index, so that is deleted.

llvm-svn: 361559
---
 llvm/lib/Analysis/InstructionSimplify.cpp     |  5 ++++
 .../InstCombine/InstCombineVectorOps.cpp      |  4 ---
 .../Transforms/InstCombine/vec_insertelt.ll   |  8 -----
 .../Transforms/InstSimplify/insertelement.ll  | 30 +++++++++++++++----
 4 files changed, 30 insertions(+), 17 deletions(-)
 delete mode 100644 llvm/test/Transforms/InstCombine/vec_insertelt.ll

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index b71841a1607dc..6e421dcaa737f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4011,6 +4011,11 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
   if (isa<UndefValue>(Idx))
     return UndefValue::get(Vec->getType());
 
+  // Inserting an undef scalar? Assume it is the same value as the existing
+  // vector element.
+  if (isa<UndefValue>(Val))
+    return Vec;
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c2ea0733a48e4..44130d3246b67 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -863,10 +863,6 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
     return replaceInstUsesWith(IE, V);
 
-  // Inserting an undef or into an undefined place, remove this.
-  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
-    replaceInstUsesWith(IE, VecOp);
-
   // If the vector and scalar are both bitcast from the same element type, do
   // the insert in that source type followed by bitcast.
   Value *VecSrc, *ScalarSrc;
diff --git a/llvm/test/Transforms/InstCombine/vec_insertelt.ll b/llvm/test/Transforms/InstCombine/vec_insertelt.ll
deleted file mode 100644
index 3b949209c4d76..0000000000000
--- a/llvm/test/Transforms/InstCombine/vec_insertelt.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: ret <4 x i32> %A
-
-; PR1286
-define <4 x i32> @test1(<4 x i32> %A) {
-	%B = insertelement <4 x i32> %A, i32 undef, i32 1
-	ret <4 x i32> %B
-}
diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll
index 3524f2145acb0..c7db869d056d2 100644
--- a/llvm/test/Transforms/InstSimplify/insertelement.ll
+++ b/llvm/test/Transforms/InstSimplify/insertelement.ll
@@ -1,31 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -instsimplify < %s | FileCheck %s
 
 define <4 x i32> @test1(<4 x i32> %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
   %I = insertelement <4 x i32> %A, i32 5, i64 4294967296
-  ; CHECK: ret <4 x i32> undef
   ret <4 x i32> %I
 }
 
 define <4 x i32> @test2(<4 x i32> %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
   %I = insertelement <4 x i32> %A, i32 5, i64 4
-  ; CHECK: ret <4 x i32> undef
   ret <4 x i32> %I
 }
 
 define <4 x i32> @test3(<4 x i32> %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 5, i64 1
+; CHECK-NEXT:    ret <4 x i32> [[I]]
+;
   %I = insertelement <4 x i32> %A, i32 5, i64 1
-  ; CHECK: ret <4 x i32> %I
   ret <4 x i32> %I
 }
 
 define <4 x i32> @test4(<4 x i32> %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
   %I = insertelement <4 x i32> %A, i32 5, i128 100
-  ; CHECK: ret <4 x i32> undef
   ret <4 x i32> %I
 }
 
 define <4 x i32> @test5(<4 x i32> %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret <4 x i32> undef
+;
   %I = insertelement <4 x i32> %A, i32 5, i64 undef
-  ; CHECK: ret <4 x i32> undef
   ret <4 x i32> %I
 }
+
+define <4 x i32> @PR1286(<4 x i32> %A) {
+; CHECK-LABEL: @PR1286(
+; CHECK-NEXT:    ret <4 x i32> [[A:%.*]]
+;
+  %B = insertelement <4 x i32> %A, i32 undef, i32 1
+  ret <4 x i32> %B
+}

From d82ddfa7c37d4814c91f1391d42eff87bc75c9dc Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 21:52:59 +0000
Subject: [PATCH 0095/1176] [NewPassManager] Add tuning option:
 ForgetAllSCEVInLoopUnroll [NFC].

Summary: Mirror tuning option from old pass manager in new pass manager.

Reviewers: chandlerc

Subscribers: mehdi_amini, jlebar, zzheng, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61612

llvm-svn: 361560
---
 llvm/include/llvm/Passes/PassBuilder.h        |  4 ++++
 .../llvm/Transforms/Scalar/LoopUnrollPass.h   | 24 +++++++++++++++----
 llvm/lib/Passes/PassBuilder.cpp               |  7 ++++--
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  7 +-----
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 10 ++++++--
 5 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 383f49e0d7585..5e6660599f934 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -88,6 +88,10 @@ class PipelineTuningOptions {
   /// Tuning option to enable/disable loop unrolling. Its default value is true.
   bool LoopUnrolling;
 
+  /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
+  /// is that of the flag: `-forget-scev-loop-unroll`.
+  bool ForgetAllSCEVInLoopUnroll;
+
   /// Tuning option to cap the number of calls to retrive clobbering accesses in
   /// MemorySSA, in LICM.
   unsigned LicmMssaOptCap;
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 1445373eb4f2d..a84d889a83ad9 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -15,6 +15,8 @@
 
 namespace llvm {
 
+extern cl::opt<bool> ForgetSCEVInLoopUnroll;
+
 class Function;
 class Loop;
 class LPMUpdater;
@@ -28,9 +30,16 @@ class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
   /// metadata are considered. All other loops are skipped.
   const bool OnlyWhenForced;
 
+  /// If true, forget all loops when unrolling. If false, forget top-most loop
+  /// of the currently processed loops, which removes one entry at a time from
+  /// the internal SCEV records. For large loops, the former is faster.
+  const bool ForgetSCEV;
+
 public:
-  explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
-      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+  explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+                              bool ForgetSCEV = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+        ForgetSCEV(ForgetSCEV) {}
 
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
@@ -60,8 +69,15 @@ struct LoopUnrollOptions {
   /// metadata are considered. All other loops are skipped.
   bool OnlyWhenForced;
 
-  LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
-      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+  /// If true, forget all loops when unrolling. If false, forget top-most loop
+  /// of the currently processed loops, which removes one entry at a time from
+  /// the internal SCEV records. For large loops, the former is faster.
+  const bool ForgetSCEV;
+
+  LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false,
+                    bool ForgetSCEV = false)
+      : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+        ForgetSCEV(ForgetSCEV) {}
 
   /// Enables or disables partial unrolling. When disabled only full unrolling
   /// is allowed.
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1d17f91d5a8d3..3a0d0c29466ab 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -218,6 +218,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
   LoopVectorization = EnableLoopVectorization;
   SLPVectorization = RunSLPVectorization;
   LoopUnrolling = true;
+  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
   LicmMssaOptCap = SetLicmMssaOptCap;
   LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
 }
@@ -463,7 +464,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
        PGOOpt->Action != PGOOptions::SampleUse) &&
       PTO.LoopUnrolling)
-    LPM2.addPass(LoopFullUnrollPass(Level));
+    LPM2.addPass(
+        LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll));
 
   for (auto &C : LoopOptimizerEndEPCallbacks)
     C(LPM2, Level);
@@ -910,7 +912,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
         createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
   }
   if (PTO.LoopUnrolling)
-    OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+    OptimizePM.addPass(LoopUnrollPass(
+        LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll)));
   OptimizePM.addPass(WarnMissedTransformationsPass());
   OptimizePM.addPass(InstCombinePass());
   OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 298cf47ed83b3..6ae7c859379a3 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InstSimplifyPass.h"
 #include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Vectorize.h"
@@ -145,12 +146,6 @@ cl::opt<bool> EnableOrderFileInstrumentation(
     "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
     cl::desc("Enable order file instrumentation (default = off)"));
 
-cl::opt<bool> ForgetSCEVInLoopUnroll(
-    "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
-    cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
-             " the current top-most loop. This is somtimes preferred to reduce"
-             " compile time."));
-
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 7e9daf0b1fb6c..2fa7436213dd5 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -71,6 +71,12 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-unroll"
 
+cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
+    "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
+    cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
+             " the current top-most loop. This is somtimes preferred to reduce"
+             " compile time."));
+
 static cl::opt<unsigned>
     UnrollThreshold("unroll-threshold", cl::Hidden,
                     cl::desc("The cost threshold for loop unrolling"));
@@ -1281,7 +1287,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
                       /*BFI*/ nullptr, /*PSI*/ nullptr,
                       /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
-                      /*ForgetAllSCEV*/ false, /*Count*/ None,
+                      ForgetSCEV, /*Count*/ None,
                       /*Threshold*/ None, /*AllowPartial*/ false,
                       /*Runtime*/ false, /*UpperBound*/ false,
                       /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1422,7 +1428,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     LoopUnrollResult Result = tryToUnrollLoop(
         &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
         /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
-        /*ForgetAllSCEV*/ false, /*Count*/ None,
+        UnrollOpts.ForgetSCEV, /*Count*/ None,
         /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
         UnrollOpts.AllowUpperBound, LocalAllowPeeling);
     Changed |= Result != LoopUnrollResult::Unmodified;

From 7e55ed84d0598f6cbd282985855a6b810025fa93 Mon Sep 17 00:00:00 2001
From: Kristof Umann <kristof.umann@ericsson.com>
Date: Thu, 23 May 2019 22:07:16 +0000
Subject: [PATCH 0096/1176] [analyzer] Hide developer-only checker/package
 options by default

These options are now only visible under
-analyzer-checker-option-help-developer.

Differential Revision: https://reviews.llvm.org/D61839

llvm-svn: 361561
---
 clang/include/clang/Driver/CC1Options.td      |  4 ++
 .../StaticAnalyzer/Checkers/CheckerBase.td    | 17 +++--
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 57 ++++++++++-----
 .../StaticAnalyzer/Core/AnalyzerOptions.h     |  4 +-
 .../StaticAnalyzer/Frontend/CheckerRegistry.h | 10 +--
 clang/lib/Frontend/CompilerInvocation.cpp     |  2 +
 .../ExecuteCompilerInvocation.cpp             |  2 +-
 .../Frontend/CheckerRegistry.cpp              | 23 ++++---
 .../Analysis/analyzer-checker-option-help.c   |  6 ++
 .../utils/TableGen/ClangSACheckersEmitter.cpp | 69 ++++++++++---------
 10 files changed, 123 insertions(+), 71 deletions(-)

diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 7605b3fc131ac..6ebd679ec3c21 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -151,6 +151,10 @@ def analyzer_config : Separate<["-"], "analyzer-config">,
 def analyzer_checker_option_help : Flag<["-"], "analyzer-checker-option-help">,
   HelpText<"Display the list of checker and package options">;
 
+def analyzer_checker_option_help_developer : Flag<["-"], "analyzer-checker-option-help-developer">,
+  HelpText<"Display the list of checker and package options meant for "
+           "development purposes only">;
+
 def analyzer_config_compatibility_mode : Separate<["-"], "analyzer-config-compatibility-mode">,
   HelpText<"Don't emit errors on invalid analyzer-config inputs">;
 
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
index 3c7c6fe9b2abc..9fb3184cfe2a0 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
@@ -25,13 +25,22 @@ class Type<CmdLineOptionTypeEnum val> {
   bits<2> Type = val.Type;
 }
 
+/// Marks the entry hidden. Hidden entries won't be displayed in
+/// -analyzer-checker-option-help.
+class HiddenEnum<bit val> {
+  bit Val = val;
+}
+def DontHide : HiddenEnum<0>;
+def Hide : HiddenEnum<1>;
+
 /// Describes an option for a checker or a package.
 class CmdLineOption<CmdLineOptionTypeEnum type, string cmdFlag, string desc,
-                    string defaultVal> {
+                    string defaultVal, HiddenEnum isHidden = DontHide> {
   bits<2> Type = type.Type;
-  string CmdFlag = cmdFlag;
-  string Desc = desc;
-  string DefaultVal = defaultVal;
+  string  CmdFlag = cmdFlag;
+  string  Desc = desc;
+  string  DefaultVal = defaultVal;
+  bit     Hidden = isHidden.Val;
 }
 
 /// Describes a list of package options.
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 911edd8066cbc..7669d22310048 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -519,7 +519,8 @@ def UninitializedObjectChecker: Checker<"UninitializedObject">,
                   "for each uninitalized field, as opposed to emitting one "
                   "warning per constructor call, and listing the uninitialized "
                   "fields that belongs to it in notes.",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "CheckPointeeInitialization",
                   "If set to false, the checker will not analyze "
@@ -918,7 +919,8 @@ def RetainCountChecker : Checker<"RetainCount">,
                   "OSObject instances. By default, the checker only checks "
                   "retain-release rules for Objective-C NSObject instances "
                   "and CoreFoundation objects.",
-                  "true">,
+                  "true",
+                  Hide>,
     CmdLineOption<Boolean,
                   "TrackNSCFStartParam",
                   "Check not only that the code follows retain-release rules "
@@ -1046,7 +1048,8 @@ def NonLocalizedStringChecker : Checker<"NonLocalizedStringChecker">,
                   "(Aggressive) or NonLocalized only if it is not backed by a "
                   "SymRegion (Non-Aggressive), basically leaving only string "
                   "literals as NonLocalized.",
-                  "false">
+                  "false",
+                  Hide>
   ]>,
   Documentation<HasDocumentation>;
 
@@ -1110,67 +1113,83 @@ def AnalysisOrderChecker : Checker<"AnalysisOrder">,
     CmdLineOption<Boolean,
                   "PreStmtCastExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PostStmtCastExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PreStmtArraySubscriptExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PostStmtArraySubscriptExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PreStmtCXXNewExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PostStmtCXXNewExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PreStmtOffsetOfExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PostStmtOffsetOfExpr",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PreCall",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "PostCall",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "EndFunction",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "NewAllocator",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "Bind",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "LiveSymbols",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "RegionChanges",
                   "",
-                  "false">,
+                  "false",
+                  Hide>,
     CmdLineOption<Boolean,
                   "*",
                   "Enables all callbacks.",
-                  "false">
+                  "false",
+                  Hide>
   ]>,
   Documentation<NotDocumented>;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index 1c45ffdff89af..c83696d70bfde 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -224,6 +224,7 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   unsigned ShowCheckerHelpDeveloper : 1;
   unsigned ShowEnabledCheckerList : 1;
   unsigned ShowCheckerOptionList : 1;
+  unsigned ShowCheckerOptionDeveloperList : 1;
   unsigned ShowConfigOptionsList : 1;
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
@@ -287,7 +288,8 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   AnalyzerOptions()
       : DisableAllChecks(false), ShowCheckerHelp(false),
         ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false),
-        ShowEnabledCheckerList(false), ShowCheckerOptionList(false),
+        ShowEnabledCheckerList(false),
+        ShowCheckerOptionList(false), ShowCheckerOptionDeveloperList(false),
         ShowConfigOptionsList(false), AnalyzeAll(false),
         AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false),
         eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
diff --git a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
index 3a05c928774c0..9af8f91e70277 100644
--- a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
+++ b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
@@ -98,11 +98,13 @@ class CheckerRegistry {
     StringRef OptionName;
     StringRef DefaultValStr;
     StringRef Description;
+    bool IsHidden;
 
     CmdLineOption(StringRef OptionType, StringRef OptionName,
-                  StringRef DefaultValStr, StringRef Description)
+                  StringRef DefaultValStr, StringRef Description, bool IsHidden)
         : OptionType(OptionType), OptionName(OptionName),
-          DefaultValStr(DefaultValStr), Description(Description) {
+          DefaultValStr(DefaultValStr), Description(Description),
+          IsHidden(IsHidden) {
 
       assert((OptionType == "bool" || OptionType == "string" ||
               OptionType == "int") &&
@@ -239,7 +241,7 @@ class CheckerRegistry {
   /// non-compatibility mode.
   void addCheckerOption(StringRef OptionType, StringRef CheckerFullName,
                         StringRef OptionName, StringRef DefaultValStr,
-                        StringRef Description);
+                        StringRef Description, bool IsHidden = false);
 
   /// Adds a package to the registry.
   void addPackage(StringRef FullName);
@@ -255,7 +257,7 @@ class CheckerRegistry {
   /// non-compatibility mode.
   void addPackageOption(StringRef OptionType, StringRef PackageFullName,
                         StringRef OptionName, StringRef DefaultValStr,
-                        StringRef Description);
+                        StringRef Description, bool IsHidden = false);
 
   // FIXME: This *really* should be added to the frontend flag descriptions.
   /// Initializes a CheckerManager by calling the initialization functions for
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 34693af8f4fd3..36519c6d487a8 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -289,6 +289,8 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
   Opts.ShowCheckerHelpDeveloper =
       Args.hasArg(OPT_analyzer_checker_help_developer);
   Opts.ShowCheckerOptionList = Args.hasArg(OPT_analyzer_checker_option_help);
+  Opts.ShowCheckerOptionDeveloperList =
+      Args.hasArg(OPT_analyzer_checker_option_help_developer);
   Opts.ShowConfigOptionsList = Args.hasArg(OPT_analyzer_config_help);
   Opts.ShowEnabledCheckerList = Args.hasArg(OPT_analyzer_list_enabled_checkers);
   Opts.ShouldEmitErrorsOnInvalidConfigValue =
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index ea720c83c2933..a5f51ab7ae293 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -249,7 +249,7 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
   }
 
   // Honor -analyzer-checker-option-help.
-  if (Clang->getAnalyzerOpts()->ShowCheckerOptionList) {
+  if (AnOpts.ShowCheckerOptionList || AnOpts.ShowCheckerOptionDeveloperList) {
     ento::printCheckerConfigList(llvm::outs(),
                                  Clang->getFrontendOpts().Plugins,
                                  *Clang->getAnalyzerOpts(),
diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
index 5f96389e597b4..f3a4765c9ee0f 100644
--- a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
@@ -180,12 +180,12 @@ CheckerRegistry::CheckerRegistry(
   addDependency(FULLNAME, DEPENDENCY);
 
 #define GET_CHECKER_OPTIONS
-#define CHECKER_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL)             \
-  addCheckerOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC);
+#define CHECKER_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, IS_HIDDEN)  \
+  addCheckerOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, IS_HIDDEN);
 
 #define GET_PACKAGE_OPTIONS
-#define PACKAGE_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL)             \
-  addPackageOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC);
+#define PACKAGE_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, IS_HIDDEN)  \
+  addPackageOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, IS_HIDDEN);
 
 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
 #undef CHECKER_DEPENDENCY
@@ -396,10 +396,10 @@ void CheckerRegistry::addPackageOption(StringRef OptionType,
                                        StringRef PackageFullName,
                                        StringRef OptionName,
                                        StringRef DefaultValStr,
-                                       StringRef Description) {
+                                       StringRef Description, bool IsHidden) {
   PackageOptions.emplace_back(
-      PackageFullName,
-      CmdLineOption{OptionType, OptionName, DefaultValStr, Description});
+      PackageFullName, CmdLineOption{OptionType, OptionName, DefaultValStr,
+                                     Description, IsHidden});
 }
 
 void CheckerRegistry::addChecker(InitializationFunction Rfn,
@@ -421,10 +421,10 @@ void CheckerRegistry::addCheckerOption(StringRef OptionType,
                                        StringRef CheckerFullName,
                                        StringRef OptionName,
                                        StringRef DefaultValStr,
-                                       StringRef Description) {
+                                       StringRef Description, bool IsHidden) {
   CheckerOptions.emplace_back(
-      CheckerFullName,
-      CmdLineOption{OptionType, OptionName, DefaultValStr, Description});
+      CheckerFullName, CmdLineOption{OptionType, OptionName, DefaultValStr,
+                                     Description, IsHidden});
 }
 
 void CheckerRegistry::initializeManager(CheckerManager &CheckerMgr) const {
@@ -577,6 +577,9 @@ void CheckerRegistry::printCheckerOptionList(raw_ostream &Out) const {
   }
 
   for (const std::pair<StringRef, const CmdLineOption &> &Entry : OptionMap) {
+    if (!AnOpts.ShowCheckerOptionDeveloperList && Entry.second.IsHidden)
+      continue;
+
     const CmdLineOption &Option = Entry.second;
     std::string FullOption = (Entry.first + ":" + Option.OptionName).str();
 
diff --git a/clang/test/Analysis/analyzer-checker-option-help.c b/clang/test/Analysis/analyzer-checker-option-help.c
index f59d8515823cc..bc8ed09d43e17 100644
--- a/clang/test/Analysis/analyzer-checker-option-help.c
+++ b/clang/test/Analysis/analyzer-checker-option-help.c
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -analyzer-checker-option-help 2>&1 | FileCheck %s
 
+// RUN: %clang_cc1 -analyzer-checker-option-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-HIDDEN
+
 // CHECK: OVERVIEW: Clang Static Analyzer Checker and Package Option List
 //
 // CHECK: USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
@@ -17,3 +20,6 @@
 // CHECK:        the first statement in the group is representative
 // CHECK:        for all other statements in the group in
 // CHECK:        terms of complexity. (default: 50)
+
+// CHECK-NOT:     optin.cplusplus.UninitializedObject:NotesAsWarnings
+// CHECK-HIDDEN:  optin.cplusplus.UninitializedObject:NotesAsWarnings
diff --git a/clang/utils/TableGen/ClangSACheckersEmitter.cpp b/clang/utils/TableGen/ClangSACheckersEmitter.cpp
index 428a5c81276b8..7c1827ec2e48a 100644
--- a/clang/utils/TableGen/ClangSACheckersEmitter.cpp
+++ b/clang/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -113,6 +113,7 @@ static std::string getCheckerOptionType(const Record &R) {
 static bool isHidden(const Record *R) {
   if (R->getValueAsBit("Hidden"))
     return true;
+
   // Not declared as hidden, check the parent package if it is hidden.
   if (DefInit *DI = dyn_cast<DefInit>(R->getValueInit("ParentPackage")))
     return isHidden(DI->getDef());
@@ -121,21 +122,38 @@ static bool isHidden(const Record *R) {
 }
 
 static void printChecker(llvm::raw_ostream &OS, const Record &R) {
-    OS << "CHECKER(" << "\"";
-    OS.write_escaped(getCheckerFullName(&R)) << "\", ";
-    OS << R.getName() << ", ";
-    OS << "\"";
-    OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
-    OS << "\"";
-    OS.write_escaped(getCheckerDocs(R));
-    OS << "\", ";
-
-    if (!isHidden(&R))
-      OS << "false";
-    else
-      OS << "true";
+  OS << "CHECKER(" << "\"";
+  OS.write_escaped(getCheckerFullName(&R)) << "\", ";
+  OS << R.getName() << ", ";
+  OS << "\"";
+  OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
+  OS << "\"";
+  OS.write_escaped(getCheckerDocs(R));
+  OS << "\", ";
+
+  if (!isHidden(&R))
+    OS << "false";
+  else
+    OS << "true";
+
+  OS << ")\n";
+}
+
+static void printOption(llvm::raw_ostream &OS, StringRef FullName,
+                        const Record &R) {
+  OS << "\"";
+  OS.write_escaped(getCheckerOptionType(R)) << "\", \"";
+  OS.write_escaped(FullName) << "\", ";
+  OS << '\"' << getStringValue(R, "CmdFlag") << "\", ";
+  OS << '\"';
+  OS.write_escaped(getStringValue(R, "Desc")) << "\", ";
+  OS << '\"';
+  OS.write_escaped(getStringValue(R, "DefaultVal")) << "\", ";
 
-    OS << ")\n";
+  if (!R.getValueAsBit("Hidden"))
+    OS << "false";
+  else
+    OS << "true";
 }
 
 namespace clang {
@@ -196,14 +214,8 @@ void EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS) {
     std::vector<Record *> PackageOptions = Package
                                        ->getValueAsListOfDefs("PackageOptions");
     for (Record *PackageOpt : PackageOptions) {
-      OS << "PACKAGE_OPTION(\"";
-      OS.write_escaped(getCheckerOptionType(*PackageOpt)) << "\", \"";
-      OS.write_escaped(getPackageFullName(Package)) << "\", ";
-      OS << '\"' << getStringValue(*PackageOpt, "CmdFlag") << "\", ";
-      OS << '\"';
-      OS.write_escaped(getStringValue(*PackageOpt, "Desc")) << "\", ";
-      OS << '\"';
-      OS.write_escaped(getStringValue(*PackageOpt, "DefaultVal")) << "\"";
+      OS << "PACKAGE_OPTION(";
+      printOption(OS, getPackageFullName(Package), *PackageOpt);
       OS << ")\n";
     }
   }
@@ -277,16 +289,9 @@ void EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS) {
     std::vector<Record *> CheckerOptions = Checker
                                        ->getValueAsListOfDefs("CheckerOptions");
     for (Record *CheckerOpt : CheckerOptions) {
-      OS << "CHECKER_OPTION(\"";
-      OS << getCheckerOptionType(*CheckerOpt) << "\", \"";
-      OS.write_escaped(getCheckerFullName(Checker)) << "\", ";
-      OS << '\"' << getStringValue(*CheckerOpt, "CmdFlag") << "\", ";
-      OS << '\"';
-      OS.write_escaped(getStringValue(*CheckerOpt, "Desc")) << "\", ";
-      OS << '\"';
-      OS.write_escaped(getStringValue(*CheckerOpt, "DefaultVal")) << "\"";
-      OS << ")";
-      OS << '\n';
+      OS << "CHECKER_OPTION(";
+      printOption(OS, getCheckerFullName(Checker), *CheckerOpt);
+      OS << ")\n";
     }
   }
   OS << "#endif // GET_CHECKER_OPTIONS\n"

From f2e41dd6ed1ac052e89dac224e67218c479cf952 Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Thu, 23 May 2019 22:07:37 +0000
Subject: [PATCH 0097/1176] Use clang_cc1 instead of clang in CodeGen test.

llvm-svn: 361562
---
 clang/test/CodeGen/loop-vectorize.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGen/loop-vectorize.c b/clang/test/CodeGen/loop-vectorize.c
index 8c81fbebecb8c..f1c5c0cee77c6 100644
--- a/clang/test/CodeGen/loop-vectorize.c
+++ b/clang/test/CodeGen/loop-vectorize.c
@@ -1,7 +1,7 @@
-// RUN: %clang -target x86_64 -S -c -O1 -fvectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
-// RUN: %clang -target x86_64 -S -c -O1 -fno-vectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
-// RUN: %clang -target x86_64 -fexperimental-new-pass-manager -S -c -O1 -fvectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
-// RUN: %clang -target x86_64 -fexperimental-new-pass-manager -S -c -O1 -fno-vectorize -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -vectorize-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -vectorize-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
 
 // CHECK-ENABLE-VECT-LABEL: @for_test()
 // CHECK-ENABLE-VECT: fmul <{{[0-9]+}} x double>

From 5554a5fcbdd7a889e133a80c064e4e553fa4fae4 Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Thu, 23 May 2019 22:28:18 +0000
Subject: [PATCH 0098/1176] fix accidental implicit matches in
 elf-disassemble-symbol-labels-rel.test

llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
uses --implicit-check-not to verify that certain patterns do not occur
in llvm-objdump's output, except in places where they are explicitly
checked. Unfortunately, the patterns are generic enough that they may
be part of the file name which is also output by llvm-objdump. This
change matches the line with the filename explicitly so that the
implicit patterns are not applied to it.

llvm-svn: 361563
---
 .../llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
index d2377f44621f4..189803cbc0968 100644
--- a/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
+++ b/llvm/test/tools/llvm-objdump/X86/elf-disassemble-symbol-labels-rel.test
@@ -9,6 +9,7 @@
 # RUN:                                   --implicit-check-not=absolute \
 # RUN:                                   --implicit-check-not=other
 
+# CHECK: {{.*}}: file format {{.*}}
 # CHECK: Disassembly of section .text:
 # CHECK: 0000000000000000 first:
 # CHECK: 0000000000000001 second:

From e0ef04f8cb295a95c49e8436f59d5a4b58e92d15 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Thu, 23 May 2019 22:30:43 +0000
Subject: [PATCH 0099/1176] [OPENMP]Do not crash for const firstprivates.

If the variable is a firstprivate variable and it was not emitted beause
this a constant variable with the constant initializer, we can use the
initial value instead of the variable itself. It also fixes the problem
with the compiler crash in this case.

llvm-svn: 361564
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp            | 20 ++++++++++++++++++-
 .../OpenMP/parallel_firstprivate_codegen.cpp  |  7 +++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 01194e3a60fa1..6a973cdb57e3a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -758,7 +758,25 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
         DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
                         /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
                         (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
-        LValue OriginalLVal = EmitLValue(&DRE);
+        LValue OriginalLVal;
+        if (!FD) {
+          // Check if the firstprivate variable is just a constant value.
+          ConstantEmission CE = tryEmitAsConstant(&DRE);
+          if (CE && !CE.isReference()) {
+            // Constant value, no need to create a copy.
+            ++IRef;
+            ++InitsRef;
+            continue;
+          }
+          if (CE && CE.isReference()) {
+            OriginalLVal = CE.getReferenceLValue(*this, &DRE);
+          } else {
+            assert(!CE && "Expected non-constant firstprivate.");
+            OriginalLVal = EmitLValue(&DRE);
+          }
+        } else {
+          OriginalLVal = EmitLValue(&DRE);
+        }
         QualType Type = VD->getType();
         if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
index 8aa64668f0525..2a306d1272f82 100644
--- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -335,8 +335,9 @@ int main() {
     s_arr[0] = var;
     sivar = 2;
   }
-#pragma omp parallel allocate(omp_default_mem_alloc: t_var) firstprivate(t_var)
-  {}
+  const int a = 0;
+#pragma omp parallel allocate(omp_default_mem_alloc: t_var) firstprivate(t_var, a)
+  { t_var = a; }
   return tmain<int>();
 #endif
 }
@@ -346,6 +347,7 @@ int main() {
 // CHECK: [[T_VAR:%.+]] = alloca i32,
 // CHECK: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]],
 // CHECK: [[SIVARCAST:%.+]] = alloca [[iz]],
+// CHECK: [[A:%.+]] = alloca i32,
 // CHECK: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
@@ -420,6 +422,7 @@ int main() {
 // CHECK-32: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]],
 // CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]],
 // CHECK:    store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]],
+// CHECK:    store i32 0, i32* [[T_VAR_PRIV]],
 // CHECK:    call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*))
 // CHECK:    ret void
 

From 342571e8d6eb1afb151ae1103431798e3d24054f Mon Sep 17 00:00:00 2001
From: Konrad Kleine <kkleine@redhat.com>
Date: Thu, 23 May 2019 22:39:13 +0000
Subject: [PATCH 0100/1176] [lldb] followup fix for
 https://reviews.llvm.org/D62305

Summary:
Fixing this error on windows build bot:

```
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(21): error C2440: 'initializing': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(21): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(21): error C2439: 'lldb_private::HostNativeThreadBase::m_result': member could not be initialized
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\include\lldb/Host/HostNativeThreadBase.h(48): note: see declaration of 'lldb_private::HostNativeThreadBase::m_result'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(24): error C2440: 'initializing': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(24): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(24): error C2439: 'lldb_private::HostNativeThreadBase::m_result': member could not be initialized
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\include\lldb/Host/HostNativeThreadBase.h(48): note: see declaration of 'lldb_private::HostNativeThreadBase::m_result'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(40): error C2440: '=': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(40): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(50): error C2440: '=': cannot convert from 'nullptr' to 'lldb::thread_result_t'
E:\build_slave\lldb-x64-windows-ninja\llvm\tools\lldb\source\Host\common\HostNativeThreadBase.cpp(50): note: A native nullptr can only be converted to bool or, using reinterpret_cast, to an integral type
```

see http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/5050/steps/build/logs/stdio

Reviewers: stella.stamenova, JDevlieghere

Reviewed By: JDevlieghere

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62337

llvm-svn: 361565
---
 lldb/source/Host/common/HostNativeThreadBase.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp
index a5f876a7232af..091e7b515a602 100644
--- a/lldb/source/Host/common/HostNativeThreadBase.cpp
+++ b/lldb/source/Host/common/HostNativeThreadBase.cpp
@@ -18,10 +18,10 @@ using namespace lldb;
 using namespace lldb_private;
 
 HostNativeThreadBase::HostNativeThreadBase()
-    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(0) {}
+    : m_thread(LLDB_INVALID_HOST_THREAD), m_result({}) {}
 
 HostNativeThreadBase::HostNativeThreadBase(thread_t thread)
-    : m_thread(thread), m_result(0) {}
+    : m_thread(thread), m_result({}) {}
 
 lldb::thread_t HostNativeThreadBase::GetSystemHandle() const {
   return m_thread;
@@ -37,7 +37,7 @@ bool HostNativeThreadBase::IsJoinable() const {
 
 void HostNativeThreadBase::Reset() {
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = 0;
+  m_result = {};
 }
 
 bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
@@ -47,7 +47,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
 lldb::thread_t HostNativeThreadBase::Release() {
   lldb::thread_t result = m_thread;
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = 0;
+  m_result = {};
 
   return result;
 }

From ac95c86511342b5d42c2915f681fe1c15ab8aac9 Mon Sep 17 00:00:00 2001
From: Kristof Umann <kristof.umann@ericsson.com>
Date: Thu, 23 May 2019 22:52:09 +0000
Subject: [PATCH 0101/1176] [analyzer] List checker/plugin options in 3
 categories: released, alpha, developer

Same patch as D62093, but for checker/plugin options, the only
difference being that options for alpha checkers are implicitly marked
as alpha.

Differential Revision: https://reviews.llvm.org/D62093

llvm-svn: 361566
---
 clang/include/clang/Driver/CC1Options.td      |  5 +
 .../StaticAnalyzer/Checkers/CheckerBase.td    | 19 +++-
 .../clang/StaticAnalyzer/Checkers/Checkers.td | 67 ++++++++++---
 .../StaticAnalyzer/Core/AnalyzerOptions.h     |  9 +-
 .../StaticAnalyzer/Frontend/CheckerRegistry.h | 16 ++-
 clang/lib/Frontend/CompilerInvocation.cpp     |  4 +
 .../ExecuteCompilerInvocation.cpp             |  3 +-
 .../Frontend/CheckerRegistry.cpp              | 55 ++++++++---
 .../Analysis/analyzer-checker-option-help.c   | 98 +++++++++++++++----
 .../CheckerOptionHandling.cpp                 |  3 +-
 .../utils/TableGen/ClangSACheckersEmitter.cpp | 18 ++++
 11 files changed, 235 insertions(+), 62 deletions(-)

diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 6ebd679ec3c21..d2d471089e5e4 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -151,6 +151,11 @@ def analyzer_config : Separate<["-"], "analyzer-config">,
 def analyzer_checker_option_help : Flag<["-"], "analyzer-checker-option-help">,
   HelpText<"Display the list of checker and package options">;
 
+def analyzer_checker_option_help_alpha : Flag<["-"], "analyzer-checker-option-help-alpha">,
+  HelpText<"Display the list of in development checker and package options. "
+           "These are NOT considered safe, they are unstable and will emit "
+           "incorrect reports. Enable ONLY FOR DEVELOPMENT purposes">;
+
 def analyzer_checker_option_help_developer : Flag<["-"], "analyzer-checker-option-help-developer">,
   HelpText<"Display the list of checker and package options meant for "
            "development purposes only">;
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
index 9fb3184cfe2a0..6625d79559f56 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/CheckerBase.td
@@ -21,10 +21,21 @@ def Integer : CmdLineOptionTypeEnum<0>;
 def String : CmdLineOptionTypeEnum<1>;
 def Boolean : CmdLineOptionTypeEnum<2>;
 
-class Type<CmdLineOptionTypeEnum val> {
-  bits<2> Type = val.Type;
+/// Describes the state of the entry. We wouldn't like to display, for example,
+/// developer only entries for a list meant for end users.
+class DevelopmentStageEnum<bits<1> val> {
+  bits<1> Val = val;
 }
 
+/// Alpha entries are under development, might be incomplet, inkorrekt and
+/// unstable.
+def InAlpha : DevelopmentStageEnum<0>;
+
+/// Released entries are stable, produce minimal, if any false positives,
+/// and emits reports that explain the occurance of the bug understandably and
+/// thoroughly.
+def Released : DevelopmentStageEnum<1>;
+
 /// Marks the entry hidden. Hidden entries won't be displayed in
 /// -analyzer-checker-option-help.
 class HiddenEnum<bit val> {
@@ -35,11 +46,13 @@ def Hide : HiddenEnum<1>;
 
 /// Describes an option for a checker or a package.
 class CmdLineOption<CmdLineOptionTypeEnum type, string cmdFlag, string desc,
-                    string defaultVal, HiddenEnum isHidden = DontHide> {
+                    string defaultVal, DevelopmentStageEnum stage,
+                    HiddenEnum isHidden = DontHide> {
   bits<2> Type = type.Type;
   string  CmdFlag = cmdFlag;
   string  Desc = desc;
   string  DefaultVal = defaultVal;
+  bits<1> DevelopmentStage = stage.Val;
   bit     Hidden = isHidden.Val;
 }
 
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 7669d22310048..35beb51f0c47e 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -51,7 +51,8 @@ def Nullability : Package<"nullability">,
                   "concerned with your custom nullability annotations more "
                   "than with following nullability specifications of system "
                   "header functions.",
-                  "false">
+                  "false",
+                  Released>
   ]>;
 
 def Cplusplus : Package<"cplusplus">;
@@ -391,7 +392,8 @@ def DynamicMemoryModeling: Checker<"DynamicMemoryModeling">,
                   "If set to true, the checker assumes that all the "
                   "allocating and deallocating functions are annotated with "
                   "ownership_holds, ownership_takes and ownership_returns.",
-                  "false">
+                  "false",
+                  InAlpha>
   ]>,
   Dependencies<[CStringModeling]>,
   Documentation<NotDocumented>,
@@ -496,7 +498,8 @@ def MoveChecker: Checker<"Move">,
                   "user has intentionally asked us to completely eliminate "
                   "use-after-move in his code. Values: \"KnownsOnly\", "
                   "\"KnownsAndLocals\", \"All\".",
-                  "KnownsAndLocals">
+                  "KnownsAndLocals",
+                  Released>
   ]>,
   Documentation<HasDocumentation>;
 
@@ -512,7 +515,8 @@ def UninitializedObjectChecker: Checker<"UninitializedObject">,
                   "If set to false, the checker won't emit warnings "
                   "for objects that don't have at least one initialized "
                   "field.",
-                  "false">,
+                  "false",
+                  Released>,
     CmdLineOption<Boolean,
                   "NotesAsWarnings",
                   "If set to true, the checker will emit a warning "
@@ -520,25 +524,29 @@ def UninitializedObjectChecker: Checker<"UninitializedObject">,
                   "warning per constructor call, and listing the uninitialized "
                   "fields that belongs to it in notes.",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "CheckPointeeInitialization",
                   "If set to false, the checker will not analyze "
                   "the pointee of pointer/reference fields, and will only "
                   "check whether the object itself is initialized.",
-                  "false">,
+                  "false",
+                  InAlpha>,
     CmdLineOption<String,
                   "IgnoreRecordsWithField",
                   "If supplied, the checker will not analyze "
                   "structures that have a field with a name or type name that "
                   "matches the given pattern.",
-                  "\"\"">,
+                  "\"\"",
+                  Released>,
     CmdLineOption<Boolean,
                   "IgnoreGuardedFields",
                   "If set to true, the checker will analyze _syntactically_ "
                   "whether the found uninitialized object is used without a "
                   "preceding assert call. Defaults to false.",
-                  "false">
+                  "false",
+                  InAlpha>
   ]>,
   Documentation<HasAlphaDocumentation>;
 
@@ -548,7 +556,8 @@ def VirtualCallChecker : Checker<"VirtualCall">,
     CmdLineOption<Boolean,
                   "PureOnly",
                   "Whether to only report calls to pure virtual methods.",
-                  "false">
+                  "false",
+                  Released>
   ]>,
   Documentation<HasDocumentation>;
 
@@ -651,7 +660,8 @@ def PaddingChecker : Checker<"Padding">,
                   "AllowedPad",
                   "Reports are only generated if the excessive padding exceeds "
                   "'AllowedPad' in bytes.",
-                  "24">
+                  "24",
+                  Released>
   ]>,
   Documentation<NotDocumented>;
 
@@ -768,11 +778,13 @@ def MmapWriteExecChecker : Checker<"MmapWriteExec">,
     CmdLineOption<Integer,
                   "MmapProtExec",
                   "Specifies the value of PROT_EXEC",
-                  "0x04">,
+                  "0x04",
+                  Released>,
     CmdLineOption<Integer,
                   "MmapProtRead",
                   "Specifies the value of PROT_READ",
-                  "0x01">
+                  "0x01",
+                  Released>
   ]>,
   Documentation<HasAlphaDocumentation>;
 
@@ -819,7 +831,8 @@ def NumberObjectConversionChecker : Checker<"NumberObjectConversion">,
                   "Enables detection of more conversion patterns (which are "
                   "most likely more harmless, and therefore are more likely to "
                   "produce false positives).",
-                  "false">
+                  "false",
+                  Released>
   ]>,
   Documentation<NotDocumented>;
 
@@ -920,6 +933,7 @@ def RetainCountChecker : Checker<"RetainCount">,
                   "retain-release rules for Objective-C NSObject instances "
                   "and CoreFoundation objects.",
                   "true",
+                  InAlpha,
                   Hide>,
     CmdLineOption<Boolean,
                   "TrackNSCFStartParam",
@@ -928,7 +942,8 @@ def RetainCountChecker : Checker<"RetainCount">,
                   "elsewhere, but also that it fulfills its own retain count "
                   "specification with respect to objects that it receives as "
                   "arguments.",
-                  "false">
+                  "false",
+                  Released>
   ]>,
   Dependencies<[RetainCountBase]>,
   Documentation<HasDocumentation>;
@@ -1049,6 +1064,7 @@ def NonLocalizedStringChecker : Checker<"NonLocalizedStringChecker">,
                   "SymRegion (Non-Aggressive), basically leaving only string "
                   "literals as NonLocalized.",
                   "false",
+                  InAlpha,
                   Hide>
   ]>,
   Documentation<HasDocumentation>;
@@ -1114,81 +1130,97 @@ def AnalysisOrderChecker : Checker<"AnalysisOrder">,
                   "PreStmtCastExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PostStmtCastExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PreStmtArraySubscriptExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PostStmtArraySubscriptExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PreStmtCXXNewExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PostStmtCXXNewExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PreStmtOffsetOfExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PostStmtOffsetOfExpr",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PreCall",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "PostCall",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "EndFunction",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "NewAllocator",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "Bind",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "LiveSymbols",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "RegionChanges",
                   "",
                   "false",
+                  Released,
                   Hide>,
     CmdLineOption<Boolean,
                   "*",
                   "Enables all callbacks.",
                   "false",
+                  Released,
                   Hide>
   ]>,
   Documentation<NotDocumented>;
@@ -1272,16 +1304,19 @@ def CloneChecker : Checker<"CloneChecker">,
                   "of a statement. This constraint assumes the first statement "
                   "in the group is representative for all other statements in "
                   "the group in terms of complexity.",
-                  "50">,
+                  "50",
+                  Released>,
     CmdLineOption<Boolean,
                   "ReportNormalClones",
                   "Report all clones, even less suspicious ones.",
-                  "true">,
+                  "true",
+                  Released>,
     CmdLineOption<String,
                   "IgnoredFilesPattern",
                   "If supplied, the checker wont analyze files with a filename "
                   "that matches the given pattern.",
-                  "\"\"">
+                  "\"\"",
+                  Released>
   ]>,
   Documentation<HasAlphaDocumentation>;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index c83696d70bfde..4d81f90961cb9 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -222,9 +222,12 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   unsigned ShowCheckerHelp : 1;
   unsigned ShowCheckerHelpAlpha : 1;
   unsigned ShowCheckerHelpDeveloper : 1;
-  unsigned ShowEnabledCheckerList : 1;
+
   unsigned ShowCheckerOptionList : 1;
+  unsigned ShowCheckerOptionAlphaList : 1;
   unsigned ShowCheckerOptionDeveloperList : 1;
+
+  unsigned ShowEnabledCheckerList : 1;
   unsigned ShowConfigOptionsList : 1;
   unsigned ShouldEmitErrorsOnInvalidConfigValue : 1;
   unsigned AnalyzeAll : 1;
@@ -288,8 +291,8 @@ class AnalyzerOptions : public RefCountedBase<AnalyzerOptions> {
   AnalyzerOptions()
       : DisableAllChecks(false), ShowCheckerHelp(false),
         ShowCheckerHelpAlpha(false), ShowCheckerHelpDeveloper(false),
-        ShowEnabledCheckerList(false),
-        ShowCheckerOptionList(false), ShowCheckerOptionDeveloperList(false),
+        ShowCheckerOptionList(false), ShowCheckerOptionAlphaList(false),
+        ShowCheckerOptionDeveloperList(false), ShowEnabledCheckerList(false),
         ShowConfigOptionsList(false), AnalyzeAll(false),
         AnalyzerDisplayProgress(false), AnalyzeNestedBlocks(false),
         eagerlyAssumeBinOpBifurcation(false), TrimGraph(false),
diff --git a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
index 9af8f91e70277..bc258160ada49 100644
--- a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
+++ b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
@@ -98,13 +98,15 @@ class CheckerRegistry {
     StringRef OptionName;
     StringRef DefaultValStr;
     StringRef Description;
+    StringRef DevelopmentStatus;
     bool IsHidden;
 
     CmdLineOption(StringRef OptionType, StringRef OptionName,
-                  StringRef DefaultValStr, StringRef Description, bool IsHidden)
+                  StringRef DefaultValStr, StringRef Description,
+                  StringRef DevelopmentStatus, bool IsHidden)
         : OptionType(OptionType), OptionName(OptionName),
           DefaultValStr(DefaultValStr), Description(Description),
-          IsHidden(IsHidden) {
+          DevelopmentStatus(DevelopmentStatus), IsHidden(IsHidden) {
 
       assert((OptionType == "bool" || OptionType == "string" ||
               OptionType == "int") &&
@@ -120,6 +122,10 @@ class CheckerRegistry {
              "Invalid value for integer command line option! Maybe incorrect "
              "parameters to the addCheckerOption or addPackageOption method?");
       (void)Tmp;
+
+      assert((DevelopmentStatus == "alpha" || DevelopmentStatus == "beta" ||
+              DevelopmentStatus == "released") &&
+             "Invalid development status!");
     }
   };
 
@@ -241,7 +247,8 @@ class CheckerRegistry {
   /// non-compatibility mode.
   void addCheckerOption(StringRef OptionType, StringRef CheckerFullName,
                         StringRef OptionName, StringRef DefaultValStr,
-                        StringRef Description, bool IsHidden = false);
+                        StringRef Description, StringRef DevelopmentStatus,
+                        bool IsHidden = false);
 
   /// Adds a package to the registry.
   void addPackage(StringRef FullName);
@@ -257,7 +264,8 @@ class CheckerRegistry {
   /// non-compatibility mode.
   void addPackageOption(StringRef OptionType, StringRef PackageFullName,
                         StringRef OptionName, StringRef DefaultValStr,
-                        StringRef Description, bool IsHidden = false);
+                        StringRef Description, StringRef DevelopmentStatus,
+                         bool IsHidden = false);
 
   // FIXME: This *really* should be added to the frontend flag descriptions.
   /// Initializes a CheckerManager by calling the initialization functions for
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 36519c6d487a8..96580804576d3 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -288,9 +288,13 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args,
   Opts.ShowCheckerHelpAlpha = Args.hasArg(OPT_analyzer_checker_help_alpha);
   Opts.ShowCheckerHelpDeveloper =
       Args.hasArg(OPT_analyzer_checker_help_developer);
+
   Opts.ShowCheckerOptionList = Args.hasArg(OPT_analyzer_checker_option_help);
+  Opts.ShowCheckerOptionAlphaList =
+      Args.hasArg(OPT_analyzer_checker_option_help_alpha);
   Opts.ShowCheckerOptionDeveloperList =
       Args.hasArg(OPT_analyzer_checker_option_help_developer);
+
   Opts.ShowConfigOptionsList = Args.hasArg(OPT_analyzer_config_help);
   Opts.ShowEnabledCheckerList = Args.hasArg(OPT_analyzer_list_enabled_checkers);
   Opts.ShouldEmitErrorsOnInvalidConfigValue =
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index a5f51ab7ae293..da7aa7b82a39c 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -249,7 +249,8 @@ bool ExecuteCompilerInvocation(CompilerInstance *Clang) {
   }
 
   // Honor -analyzer-checker-option-help.
-  if (AnOpts.ShowCheckerOptionList || AnOpts.ShowCheckerOptionDeveloperList) {
+  if (AnOpts.ShowCheckerOptionList || AnOpts.ShowCheckerOptionAlphaList ||
+      AnOpts.ShowCheckerOptionDeveloperList) {
     ento::printCheckerConfigList(llvm::outs(),
                                  Clang->getFrontendOpts().Plugins,
                                  *Clang->getAnalyzerOpts(),
diff --git a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
index f3a4765c9ee0f..3fd4c36947cbb 100644
--- a/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/CheckerRegistry.cpp
@@ -180,12 +180,12 @@ CheckerRegistry::CheckerRegistry(
   addDependency(FULLNAME, DEPENDENCY);
 
 #define GET_CHECKER_OPTIONS
-#define CHECKER_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, IS_HIDDEN)  \
-  addCheckerOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, IS_HIDDEN);
+#define CHECKER_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, DEVELOPMENT_STATUS, IS_HIDDEN)  \
+  addCheckerOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, DEVELOPMENT_STATUS, IS_HIDDEN);
 
 #define GET_PACKAGE_OPTIONS
-#define PACKAGE_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, IS_HIDDEN)  \
-  addPackageOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, IS_HIDDEN);
+#define PACKAGE_OPTION(TYPE, FULLNAME, CMDFLAG, DESC, DEFAULT_VAL, DEVELOPMENT_STATUS, IS_HIDDEN)  \
+  addPackageOption(TYPE, FULLNAME, CMDFLAG, DEFAULT_VAL, DESC, DEVELOPMENT_STATUS, IS_HIDDEN);
 
 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
 #undef CHECKER_DEPENDENCY
@@ -396,10 +396,12 @@ void CheckerRegistry::addPackageOption(StringRef OptionType,
                                        StringRef PackageFullName,
                                        StringRef OptionName,
                                        StringRef DefaultValStr,
-                                       StringRef Description, bool IsHidden) {
+                                       StringRef Description,
+                                       StringRef DevelopmentStatus,
+                                       bool IsHidden) {
   PackageOptions.emplace_back(
       PackageFullName, CmdLineOption{OptionType, OptionName, DefaultValStr,
-                                     Description, IsHidden});
+                                     Description, DevelopmentStatus, IsHidden});
 }
 
 void CheckerRegistry::addChecker(InitializationFunction Rfn,
@@ -421,10 +423,12 @@ void CheckerRegistry::addCheckerOption(StringRef OptionType,
                                        StringRef CheckerFullName,
                                        StringRef OptionName,
                                        StringRef DefaultValStr,
-                                       StringRef Description, bool IsHidden) {
+                                       StringRef Description,
+                                       StringRef DevelopmentStatus,
+                                       bool IsHidden) {
   CheckerOptions.emplace_back(
       CheckerFullName, CmdLineOption{OptionType, OptionName, DefaultValStr,
-                                     Description, IsHidden});
+                                     Description, DevelopmentStatus, IsHidden});
 }
 
 void CheckerRegistry::initializeManager(CheckerManager &CheckerMgr) const {
@@ -576,10 +580,14 @@ void CheckerRegistry::printCheckerOptionList(raw_ostream &Out) const {
     }
   }
 
+  auto Print = [] (llvm::raw_ostream &Out, StringRef FullOption, StringRef Desc) {
+    AnalyzerOptions::printFormattedEntry(Out, {FullOption, Desc},
+                                         /*InitialPad*/ 2,
+                                         /*EntryWidth*/ 50,
+                                         /*MinLineWidth*/ 90);
+    Out << "\n\n";
+  };
   for (const std::pair<StringRef, const CmdLineOption &> &Entry : OptionMap) {
-    if (!AnOpts.ShowCheckerOptionDeveloperList && Entry.second.IsHidden)
-      continue;
-
     const CmdLineOption &Option = Entry.second;
     std::string FullOption = (Entry.first + ":" + Option.OptionName).str();
 
@@ -588,10 +596,25 @@ void CheckerRegistry::printCheckerOptionList(raw_ostream &Out) const {
          (Option.DefaultValStr.empty() ? "\"\"" : Option.DefaultValStr) + ")")
             .str();
 
-    AnalyzerOptions::printFormattedEntry(Out, {FullOption, Desc},
-                                         /*InitialPad*/ 2,
-                                         /*EntryWidth*/ 50,
-                                         /*MinLineWidth*/ 90);
-    Out << "\n\n";
+    // The list of these if branches is significant, we wouldn't like to
+    // display hidden alpha checker options for
+    // -analyzer-checker-option-help-alpha.
+
+    if (Option.IsHidden) {
+      if (AnOpts.ShowCheckerOptionDeveloperList)
+        Print(Out, FullOption, Desc);
+      continue;
+    }
+
+    if (Option.DevelopmentStatus == "alpha" ||
+        Entry.first.startswith("alpha")) {
+      if (AnOpts.ShowCheckerOptionAlphaList)
+        Print(Out, FullOption,
+              llvm::Twine("(Enable only for development!) " + Desc).str());
+      continue;
+    }
+
+    if (AnOpts.ShowCheckerOptionList)
+      Print(Out, FullOption, Desc);
   }
 }
diff --git a/clang/test/Analysis/analyzer-checker-option-help.c b/clang/test/Analysis/analyzer-checker-option-help.c
index bc8ed09d43e17..5f95569e58498 100644
--- a/clang/test/Analysis/analyzer-checker-option-help.c
+++ b/clang/test/Analysis/analyzer-checker-option-help.c
@@ -1,25 +1,87 @@
-// RUN: %clang_cc1 -analyzer-checker-option-help 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -analyzer-checker-option-help \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE
+
+// RUN: %clang_cc1 -analyzer-checker-option-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-ALPHA
+
+// RUN: %clang_cc1 -analyzer-checker-option-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-DEVELOPER
 
 // RUN: %clang_cc1 -analyzer-checker-option-help-developer \
-// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-HIDDEN
+// RUN:   -analyzer-checker-option-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-DEVELOPER-ALPHA
+
+// RUN: %clang_cc1 -analyzer-checker-option-help \
+// RUN:   -analyzer-checker-option-help-alpha \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-ALPHA
 
-// CHECK: OVERVIEW: Clang Static Analyzer Checker and Package Option List
+// RUN: %clang_cc1 -analyzer-checker-option-help \
+// RUN:   -analyzer-checker-option-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-DEVELOPER
+
+// RUN: %clang_cc1 -analyzer-checker-option-help \
+// RUN:   -analyzer-checker-option-help-alpha \
+// RUN:   -analyzer-checker-option-help-developer \
+// RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-STABLE-ALPHA-DEVELOPER
+
+// CHECK-STABLE: OVERVIEW: Clang Static Analyzer Checker and Package Option List
 //
-// CHECK: USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
+// CHECK-STABLE: USAGE: -analyzer-config <OPTION1=VALUE,OPTION2=VALUE,...>
 //
-// CHECK:        -analyzer-config OPTION1=VALUE, -analyzer-config
-// CHECK-SAME:   OPTION2=VALUE, ...
+// CHECK-STABLE:        -analyzer-config OPTION1=VALUE, -analyzer-config
+// CHECK-STABLE-SAME:   OPTION2=VALUE, ...
 //
-// CHECK: OPTIONS:
+// CHECK-STABLE: OPTIONS:
 //
-// CHECK:   alpha.clone.CloneChecker:MinimumCloneComplexity
-// CHECK-SAME:   (int) Ensures that every clone has at least
-// CHECK:        the given complexity. Complexity is here
-// CHECK:        defined as the total amount of children
-// CHECK:        of a statement. This constraint assumes
-// CHECK:        the first statement in the group is representative
-// CHECK:        for all other statements in the group in
-// CHECK:        terms of complexity. (default: 50)
-
-// CHECK-NOT:     optin.cplusplus.UninitializedObject:NotesAsWarnings
-// CHECK-HIDDEN:  optin.cplusplus.UninitializedObject:NotesAsWarnings
+// CHECK-STABLE:   cplusplus.Move:WarnOn
+// CHECK-STABLE-SAME:          (string) In non-aggressive mode, only warn
+// CHECK-STABLLE:              on use-after-move of local variables (or
+// CHECK-STABLLE:              local rvalue references) and of STL objects.
+// CHECK-STABLLE:              The former is possible because local variables
+// CHECK-STABLLE:              (or local rvalue references) are not tempting
+// CHECK-STABLLE:              their user to re-use the storage. The latter
+// CHECK-STABLLE:              is possible because STL objects are known
+// CHECK-STABLLE:              to end up in a valid but unspecified state
+// CHECK-STABLLE:              after the move and their state-reset methods
+// CHECK-STABLLE:              are also known, which allows us to predict
+// CHECK-STABLLE:              precisely when use-after-move is invalid.
+// CHECK-STABLLE:              Some STL objects are known to conform to
+// CHECK-STABLLE:              additional contracts after move, so they
+// CHECK-STABLLE:              are not tracked. However, smart pointers
+// CHECK-STABLLE:              specifically are tracked because we can
+// CHECK-STABLLE:              perform extra checking over them. In aggressive
+// CHECK-STABLLE:              mode, warn on any use-after-move because
+// CHECK-STABLLE:              the user has intentionally asked us to completely
+// CHECK-STABLLE:              eliminate use-after-move in his code. Values:
+// CHECK-STABLLE:              "KnownsOnly", "KnownsAndLocals", "All".
+// CHECK-STABLLE:              (default: KnownsAndLocals)
+
+// CHECK-STABLE-NOT: debug.AnalysisOrder:*
+// CHECK-DEVELOPER:  debug.AnalysisOrder:*
+// CHECK-ALPHA-NOT:  debug.AnalysisOrder:*
+
+// CHECK-STABLE-NOT:    optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+// CHECK-DEVELOPER-NOT: optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+// CHECK-ALPHA:         optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+
+// CHECK-STABLE:        optin.performance.Padding:AllowedPad
+// CHECK-DEVELOPER-NOT: optin.performance.Padding:AllowedPad
+// CHECK-ALPHA-NOT:     optin.performance.Padding:AllowedPad
+
+
+// CHECK-STABLE-ALPHA-NOT: debug.AnalysisOrder:*
+// CHECK-DEVELOPER-ALPHA:  debug.AnalysisOrder:*
+// CHECK-STABLE-DEVELOPER: debug.AnalysisOrder:*
+
+// CHECK-STABLE-ALPHA:         optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+// CHECK-DEVELOPER-ALPHA:      optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+// CHECK-STABLE-DEVELOPER-NOT: optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+
+// CHECK-STABLE-ALPHA:        optin.performance.Padding:AllowedPad
+// CHECK-DEVELOPER-ALPHA-NOT: optin.performance.Padding:AllowedPad
+// CHECK-STABLE-DEVELOPER:    optin.performance.Padding:AllowedPad
+
+
+// CHECK-STABLE-ALPHA-DEVELOPER: debug.AnalysisOrder:*
+// CHECK-STABLE-ALPHA-DEVELOPER: optin.cplusplus.UninitializedObject:IgnoreGuardedFields
+// CHECK-STABLE-ALPHA-DEVELOPER: optin.performance.Padding:AllowedPad
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
index c06a19df7dfe0..77de3630ae7ec 100644
--- a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
+++ b/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
@@ -36,7 +36,8 @@ extern "C" void clang_registerCheckers(CheckerRegistry &registry) {
                             /*CheckerFullName*/ "example.MyChecker",
                             /*OptionName*/ "ExampleOption",
                             /*DefaultValStr*/ "false",
-                            /*Description*/ "This is an example checker opt.");
+                            /*Description*/ "This is an example checker opt.",
+                            /*DevelopmentStage*/ "released");
 }
 
 extern "C" const char clang_analyzerAPIVersionString[] =
diff --git a/clang/utils/TableGen/ClangSACheckersEmitter.cpp b/clang/utils/TableGen/ClangSACheckersEmitter.cpp
index 7c1827ec2e48a..7dd0895b76d45 100644
--- a/clang/utils/TableGen/ClangSACheckersEmitter.cpp
+++ b/clang/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -110,6 +110,22 @@ static std::string getCheckerOptionType(const Record &R) {
   return "";
 }
 
+static std::string getDevelopmentStage(const Record &R) {
+  if (BitsInit *BI = R.getValueAsBitsInit("DevelopmentStage")) {
+    switch(getValueFromBitsInit(BI, R)) {
+    case 0:
+      return "alpha";
+    case 1:
+      return "released";
+    }
+  }
+
+  PrintFatalError(R.getLoc(),
+                  "unable to parse command line option type for "
+                  + getCheckerFullName(&R));
+  return "";
+}
+
 static bool isHidden(const Record *R) {
   if (R->getValueAsBit("Hidden"))
     return true;
@@ -149,6 +165,8 @@ static void printOption(llvm::raw_ostream &OS, StringRef FullName,
   OS.write_escaped(getStringValue(R, "Desc")) << "\", ";
   OS << '\"';
   OS.write_escaped(getStringValue(R, "DefaultVal")) << "\", ";
+  OS << '\"';
+  OS << getDevelopmentStage(R) << "\", ";
 
   if (!R.getValueAsBit("Hidden"))
     OS << "false";

From 3e4acaabb9d9d8f56cdbd0aff7d89165cab2320f Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Thu, 23 May 2019 23:02:56 +0000
Subject: [PATCH 0102/1176] Break false dependencies on target libraries

Summary:
For the most part this consists of replacing ${LLVM_TARGETS_TO_BUILD} with
some combination of AllTargets* so that they depend on specific components
of a target backend rather than all of it. The overall effect of this is
that, for example, tools like opt no longer falsely depend on the
disassembler, while tools like llvm-ar no longer depend on the code
generator.

There's a couple quirks to point out here:
* AllTargetsCodeGens is a bit more prevalent than expected. Tools like dsymutil
  seem to need it which I was surprised by.
* llvm-xray linked to all the backends but doesn't seem to need any of them.
  It builds and passes the tests so that seems to be correct.
* I left gold out as it's not built when binutils is not available so I'm
  unable to test it

Reviewers: bogner, JDevlieghere

Reviewed By: bogner

Subscribers: mehdi_amini, mgorny, steven_wu, dexonsmith, rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62331

llvm-svn: 361567
---
 llvm/cmake/modules/LLVM-Config.cmake   | 7 +++++++
 llvm/tools/bugpoint/CMakeLists.txt     | 3 ++-
 llvm/tools/dsymutil/CMakeLists.txt     | 5 ++++-
 llvm/tools/llc/CMakeLists.txt          | 3 ++-
 llvm/tools/llvm-ar/CMakeLists.txt      | 4 +++-
 llvm/tools/llvm-c-test/CMakeLists.txt  | 4 +++-
 llvm/tools/llvm-cxxdump/CMakeLists.txt | 2 +-
 llvm/tools/llvm-dwp/CMakeLists.txt     | 3 ++-
 llvm/tools/llvm-lto/CMakeLists.txt     | 3 ++-
 llvm/tools/llvm-lto2/CMakeLists.txt    | 4 +++-
 llvm/tools/llvm-rtdyld/CMakeLists.txt  | 4 +++-
 llvm/tools/llvm-xray/CMakeLists.txt    | 1 -
 llvm/tools/lto/CMakeLists.txt          | 6 +++++-
 llvm/tools/opt/CMakeLists.txt          | 4 +++-
 14 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/llvm/cmake/modules/LLVM-Config.cmake b/llvm/cmake/modules/LLVM-Config.cmake
index 474a874a4426e..57c02f170e18f 100644
--- a/llvm/cmake/modules/LLVM-Config.cmake
+++ b/llvm/cmake/modules/LLVM-Config.cmake
@@ -209,6 +209,13 @@ function(llvm_map_components_to_libnames out_libs)
       # already processed
     elseif( c STREQUAL "all" )
       list(APPEND expanded_components ${LLVM_AVAILABLE_LIBS})
+    elseif( c STREQUAL "AllTargetsCodeGens" )
+      # Link all the asm printers from all the targets
+      foreach(t ${LLVM_TARGETS_TO_BUILD})
+        if( TARGET LLVM${t}CodeGen)
+          list(APPEND expanded_components "LLVM${t}CodeGen")
+        endif()
+      endforeach(t)
     elseif( c STREQUAL "AllTargetsAsmPrinters" )
       # Link all the asm printers from all the targets
       foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/llvm/tools/bugpoint/CMakeLists.txt b/llvm/tools/bugpoint/CMakeLists.txt
index 654ecc496a919..6ed15a24a2d4d 100644
--- a/llvm/tools/bugpoint/CMakeLists.txt
+++ b/llvm/tools/bugpoint/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
   Analysis
   BitWriter
   CodeGen
diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt
index 480f78fb1888c..19865e3d20e14 100644
--- a/llvm/tools/dsymutil/CMakeLists.txt
+++ b/llvm/tools/dsymutil/CMakeLists.txt
@@ -1,5 +1,8 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmPrinters
+  AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
   AsmPrinter
   DebugInfoDWARF
   MC
diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt
index 4f8181a1b6577..863358b5e71c7 100644
--- a/llvm/tools/llc/CMakeLists.txt
+++ b/llvm/tools/llc/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
   Analysis
   AsmPrinter
   CodeGen
diff --git a/llvm/tools/llvm-ar/CMakeLists.txt b/llvm/tools/llvm-ar/CMakeLists.txt
index 191c684d5245e..602b4a46ea055 100644
--- a/llvm/tools/llvm-ar/CMakeLists.txt
+++ b/llvm/tools/llvm-ar/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsDescs
+  AllTargetsInfos
   BinaryFormat
   Core
   DlltoolDriver
diff --git a/llvm/tools/llvm-c-test/CMakeLists.txt b/llvm/tools/llvm-c-test/CMakeLists.txt
index bce0f4a5a4209..78eaafc3b5a40 100644
--- a/llvm/tools/llvm-c-test/CMakeLists.txt
+++ b/llvm/tools/llvm-c-test/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
+  AllTargetsDisassemblers
   BitReader
   Core
   MCDisassembler
diff --git a/llvm/tools/llvm-cxxdump/CMakeLists.txt b/llvm/tools/llvm-cxxdump/CMakeLists.txt
index ada886e888734..519e728e87b6f 100644
--- a/llvm/tools/llvm-cxxdump/CMakeLists.txt
+++ b/llvm/tools/llvm-cxxdump/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsInfos
   Object
   Support
   )
diff --git a/llvm/tools/llvm-dwp/CMakeLists.txt b/llvm/tools/llvm-dwp/CMakeLists.txt
index 1b5fbddc1f750..49f40b5c6397b 100644
--- a/llvm/tools/llvm-dwp/CMakeLists.txt
+++ b/llvm/tools/llvm-dwp/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
   AsmPrinter
   DebugInfoDWARF
   MC
diff --git a/llvm/tools/llvm-lto/CMakeLists.txt b/llvm/tools/llvm-lto/CMakeLists.txt
index f0b385b7993d2..d0222315d25f7 100644
--- a/llvm/tools/llvm-lto/CMakeLists.txt
+++ b/llvm/tools/llvm-lto/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
   BitReader
   BitWriter
   Core
diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 9be12d71ed793..233817a387f9d 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
+  AllTargetsDescs
   BitReader
   Core
   Linker
diff --git a/llvm/tools/llvm-rtdyld/CMakeLists.txt b/llvm/tools/llvm-rtdyld/CMakeLists.txt
index c1acbe5846bdb..e1738633cfc39 100644
--- a/llvm/tools/llvm-rtdyld/CMakeLists.txt
+++ b/llvm/tools/llvm-rtdyld/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsDisassemblers
+  AllTargetsDescs
+  AllTargetsInfos
   DebugInfoDWARF
   ExecutionEngine
   MC
diff --git a/llvm/tools/llvm-xray/CMakeLists.txt b/llvm/tools/llvm-xray/CMakeLists.txt
index 4b056d10758f9..2551b97e550dc 100644
--- a/llvm/tools/llvm-xray/CMakeLists.txt
+++ b/llvm/tools/llvm-xray/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
   DebugInfoDWARF
   Object
   Support
diff --git a/llvm/tools/lto/CMakeLists.txt b/llvm/tools/lto/CMakeLists.txt
index 6e913519a809a..20118e029f827 100644
--- a/llvm/tools/lto/CMakeLists.txt
+++ b/llvm/tools/lto/CMakeLists.txt
@@ -1,5 +1,9 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsDisassemblers
+  AllTargetsInfos
   BitReader
   Core
   LTO
diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt
index f03d11516657f..2f9665c689807 100644
--- a/llvm/tools/opt/CMakeLists.txt
+++ b/llvm/tools/opt/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
+  AllTargetsInfos
   AggressiveInstCombine
   Analysis
   BitWriter

From 4d4df6f144d93a901387c66d73183a72e198ee4f Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 23 May 2019 23:33:34 +0000
Subject: [PATCH 0103/1176] [InstCombine] remove redundant fold for
 insertelement; NFC

The out-of-bounds index pattern is handled by InstSimplify.

llvm-svn: 361569
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 44130d3246b67..063175b45cba7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -884,14 +884,10 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
                                        m_ConstantInt(ExtractedIdx)))) {
-    unsigned NumInsertVectorElts = IE.getType()->getNumElements();
     unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
     if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
       return replaceInstUsesWith(IE, VecOp);
 
-    if (InsertedIdx >= NumInsertVectorElts)  // Out of range insert.
-      return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
     // If we are extracting a value from a vector, then inserting it right
     // back into the same place, just use the input vector.
     if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)

From 093c922205fe53be35edcf99258aa5a00981c605 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 23 May 2019 23:33:38 +0000
Subject: [PATCH 0104/1176] [InstCombine] remove redundant fold for
 extractelement; NFC

The out-of-bounds index pattern is handled by InstSimplify,
so the extractelement should be eliminated next time it is
visited.

llvm-svn: 361570
---
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 063175b45cba7..c3fd612a9d007 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -884,10 +884,6 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
                                        m_ConstantInt(ExtractedIdx)))) {
-    unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
-    if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
-      return replaceInstUsesWith(IE, VecOp);
-
     // If we are extracting a value from a vector, then inserting it right
     // back into the same place, just use the input vector.
     if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)

From ffafdb9afc84126fe3156b8075bc3d7d3dad6dfe Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Thu, 23 May 2019 23:34:43 +0000
Subject: [PATCH 0105/1176] Fix hang during constant evaluation of union
 assignment.

HandleUnionActiveMemberChange forgot to walk over a nop implicit
conversion node and got stuck in the process.

As a cleanup I changed the declaration of `E` so it can't
be accidentally accessed after the loop.

llvm-svn: 361571
---
 clang/lib/AST/ExprConstant.cpp                   | 5 ++---
 clang/test/SemaCXX/constant-expression-cxx2a.cpp | 8 ++++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index dab40cc5754d6..ac21b63cc79bb 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -4994,9 +4994,8 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr,
   llvm::SmallVector<std::pair<unsigned, const FieldDecl*>, 4> UnionPathLengths;
   // C++ [class.union]p5:
   //   define the set S(E) of subexpressions of E as follows:
-  const Expr *E = LHSExpr;
   unsigned PathLength = LHS.Designator.Entries.size();
-  while (E) {
+  for (const Expr *E = LHSExpr; E != nullptr;) {
     //   -- If E is of the form A.B, S(E) contains the elements of S(A)...
     if (auto *ME = dyn_cast<MemberExpr>(E)) {
       auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
@@ -5026,6 +5025,7 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr,
 
     } else if (auto *ICE = dyn_cast<ImplicitCastExpr>(E)) {
       // Step over a derived-to-base conversion.
+      E = ICE->getSubExpr();
       if (ICE->getCastKind() == CK_NoOp)
         continue;
       if (ICE->getCastKind() != CK_DerivedToBase &&
@@ -5038,7 +5038,6 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr,
                                   LHS.Designator.Entries[PathLength]
                                       .getAsBaseOrMember().getPointer()));
       }
-      E = ICE->getSubExpr();
 
     //   -- Otherwise, S(E) is empty.
     } else {
diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
index a0f92691c27f9..aa534ce592e34 100644
--- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
@@ -513,4 +513,12 @@ namespace Union {
   static_assert(return_init_all().a.p == 7); // expected-error {{}} expected-note {{read of member 'p' of union with no active member}}
   static_assert(return_init_all().a.q == 8); // expected-error {{}} expected-note {{read of member 'q' of union with no active member}}
   constexpr B init_all = return_init_all();
+
+  constexpr bool test_no_member_change =  []{
+    union U { char dummy = {}; };
+    U u1;
+    U u2;
+    u1 = u2;
+    return true;
+  }();
 }

From ae02e8944807c7b611ca3645a983c62d464f27a4 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Thu, 23 May 2019 23:46:44 +0000
Subject: [PATCH 0106/1176] P0722R3:  Implement library support for destroying
 delete

Summary:
This provides the `std::destroying_delete_t` declaration in C++2a and after. (Even when the compiler doesn't support the language feature).

However, the feature test macro `__cpp_lib_destroying_delete` is only defined when we have both language support and  C++2a.


Reviewers: ldionne, ckennelly, serge-sans-paille, EricWF

Reviewed By: EricWF

Subscribers: dexonsmith, riccibruno, christof, jwakely, jdoerfert, mclow.lists, ldionne, libcxx-commits

Differential Revision: https://reviews.llvm.org/D55840

llvm-svn: 361572
---
 libcxx/docs/FeatureTestMacroTable.rst         |  2 +-
 libcxx/include/new                            | 15 +++++
 libcxx/include/version                        |  4 +-
 .../destroying_delete_t.pass.cpp              | 65 +++++++++++++++++++
 .../new.version.pass.cpp                      |  6 +-
 .../version.version.pass.cpp                  |  6 +-
 .../generate_feature_test_macro_components.py |  9 ++-
 7 files changed, 98 insertions(+), 9 deletions(-)
 create mode 100644 libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 0748c4c624d9b..2200a998aa4a1 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -182,7 +182,7 @@ Status
     ------------------------------------------------- -----------------
     ``__cpp_lib_constexpr_swap_algorithms``           *unimplemented*  
     ------------------------------------------------- -----------------
-    ``__cpp_lib_destroying_delete``                   *unimplemented*  
+    ``__cpp_lib_destroying_delete``                   ``201806L``      
     ------------------------------------------------- -----------------
     ``__cpp_lib_erase_if``                            ``201811L``      
     ------------------------------------------------- -----------------
diff --git a/libcxx/include/new b/libcxx/include/new
index 4cf4c4c1e7b98..85e4c4b3fcf2a 100644
--- a/libcxx/include/new
+++ b/libcxx/include/new
@@ -33,6 +33,12 @@ public:
 };
 
 enum class align_val_t : size_t {}; // C++17
+
+struct destroying_delete_t { // C++20
+  explicit destroying_delete_t() = default;
+};
+inline constexpr destroying_delete_t destroying_delete{}; // C++20
+
 struct nothrow_t {};
 extern const nothrow_t nothrow;
 typedef void (*new_handler)();
@@ -158,6 +164,15 @@ enum align_val_t { __zero = 0, __max = (size_t)-1 };
 #endif
 #endif
 
+#if _LIBCPP_STD_VER > 17
+// Enable the declaration even if the compiler doesn't support the language
+// feature.
+struct destroying_delete_t {
+  explicit destroying_delete_t() = default;
+};
+_LIBCPP_INLINE_VAR constexpr destroying_delete_t destroying_delete{};
+#endif // _LIBCPP_STD_VER > 17
+
 }  // std
 
 #if defined(_LIBCPP_CXX03_LANG)
diff --git a/libcxx/include/version b/libcxx/include/version
index 102da672e42dd..fe9cfed876b08 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -220,7 +220,9 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_concepts                             201806L
 // # define __cpp_lib_constexpr_misc                       201811L
 // # define __cpp_lib_constexpr_swap_algorithms            201806L
-// # define __cpp_lib_destroying_delete                    201806L
+# if _LIBCPP_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L
+#   define __cpp_lib_destroying_delete                  201806L
+# endif
 # define __cpp_lib_erase_if                             201811L
 // # define __cpp_lib_generic_unordered_lookup             201811L
 # define __cpp_lib_interpolate                          201902L
diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
new file mode 100644
index 0000000000000..d544b0e7cab6e
--- /dev/null
+++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// struct destroying_delete_t {
+//   explicit destroying_delete_t() = default;
+// };
+// inline constexpr destroying_delete_t destroying_delete{};
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+#include <new>
+
+#include <cassert>
+#include "test_macros.h"
+
+struct A {
+  void *data;
+  A();
+  ~A();
+
+  static A* New();
+  void operator delete(A*, std::destroying_delete_t);
+};
+
+bool A_constructed = false;
+bool A_destroyed = false;
+bool A_destroying_deleted = false;
+
+A::A() {
+  A_constructed = true;
+}
+
+A::~A() {
+  A_destroyed = true;
+}
+
+A* A::New() {
+  return new(::operator new(sizeof(A))) A();
+}
+
+void A::operator delete(A* a, std::destroying_delete_t) {
+  A_destroying_deleted = true;
+  ::operator delete(a);
+}
+
+#ifndef __cpp_lib_destroying_delete
+#error "Expected __cpp_lib_destroying_delete to be defined"
+#elif __cpp_lib_destroying_delete < 201806L
+#error "Unexpected value of __cpp_lib_destroying_delete"
+#endif
+
+int main() {
+  // Ensure that we call the destroying delete and not the destructor.
+  A* ap = A::New();
+  assert(A_constructed);
+  delete ap;
+  assert(!A_destroyed);
+  assert(A_destroying_deleted);
+}
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.pass.cpp
index 5f012cd55f6c2..294c29a739ff1 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/new.version.pass.cpp
@@ -72,16 +72,16 @@
 
 #elif TEST_STD_VER > 17
 
-# if !defined(_LIBCPP_VERSION)
+# if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L
 #   ifndef __cpp_lib_destroying_delete
 #     error "__cpp_lib_destroying_delete should be defined in c++2a"
 #   endif
 #   if __cpp_lib_destroying_delete != 201806L
 #     error "__cpp_lib_destroying_delete should have the value 201806L in c++2a"
 #   endif
-# else // _LIBCPP_VERSION
+# else
 #   ifdef __cpp_lib_destroying_delete
-#     error "__cpp_lib_destroying_delete should not be defined because it is unimplemented in libc++!"
+#     error "__cpp_lib_destroying_delete should not be defined when TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L is not defined!"
 #   endif
 # endif
 
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp
index 0ed0a512286dc..3503051afad7f 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.pass.cpp
@@ -1662,16 +1662,16 @@
 #   endif
 # endif
 
-# if !defined(_LIBCPP_VERSION)
+# if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L
 #   ifndef __cpp_lib_destroying_delete
 #     error "__cpp_lib_destroying_delete should be defined in c++2a"
 #   endif
 #   if __cpp_lib_destroying_delete != 201806L
 #     error "__cpp_lib_destroying_delete should have the value 201806L in c++2a"
 #   endif
-# else // _LIBCPP_VERSION
+# else
 #   ifdef __cpp_lib_destroying_delete
-#     error "__cpp_lib_destroying_delete should not be defined because it is unimplemented in libc++!"
+#     error "__cpp_lib_destroying_delete should not be defined when TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L is not defined!"
 #   endif
 # endif
 
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index f5e770de5bb22..0384d32558087 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -483,7 +483,14 @@ def add_version_header(tc):
      "c++2a": 201806L,
    },
    "headers": ["new"],
-   "unimplemented": True,
+   "depends":
+      "TEST_STD_VER > 17"
+      " && defined(__cpp_impl_destroying_delete)"
+      " && __cpp_impl_destroying_delete >= 201806L",
+   "internal_depends":
+      "_LIBCPP_STD_VER > 17"
+      " && defined(__cpp_impl_destroying_delete)"
+      " && __cpp_impl_destroying_delete >= 201806L",
    },
   {"name": "__cpp_lib_three_way_comparison",
    "values": {

From 11c141eb68531eec30af8ff8f82b8159de99e555 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 24 May 2019 00:02:00 +0000
Subject: [PATCH 0107/1176] [COFF] Remove finalizeContents virtual method from
 Chunk, NFC

This only needs to be done for MergeChunks, so just do that in a
separate pass in the Writer.

This is one small step towards eliminating the vtable in Chunk.

llvm-svn: 361573
---
 lld/COFF/Chunks.cpp | 15 ++++++++-------
 lld/COFF/Chunks.h   |  7 ++-----
 lld/COFF/Writer.cpp | 13 ++++++++++---
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 76b0c8301b904..b016ac1e86d35 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -873,14 +873,15 @@ void MergeChunk::addSection(SectionChunk *C) {
 }
 
 void MergeChunk::finalizeContents() {
-  if (!Finalized) {
-    for (SectionChunk *C : Sections)
-      if (C->Live)
-        Builder.add(toStringRef(C->getContents()));
-    Builder.finalize();
-    Finalized = true;
-  }
+  assert(!Finalized && "should only finalize once");
+  for (SectionChunk *C : Sections)
+    if (C->Live)
+      Builder.add(toStringRef(C->getContents()));
+  Builder.finalize();
+  Finalized = true;
+}
 
+void MergeChunk::assignSubsectionRVAs() {
   for (SectionChunk *C : Sections) {
     if (!C->Live)
       continue;
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index d15638e7b0cc6..672003b31f98f 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -78,10 +78,6 @@ class Chunk {
   // before calling this function.
   virtual void writeTo(uint8_t *Buf) const {}
 
-  // Called by the writer after an RVA is assigned, but before calling
-  // getSize().
-  virtual void finalizeContents() {}
-
   // The writer sets and uses the addresses. In practice, PE images cannot be
   // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
   // can be stored with 32 bits.
@@ -320,7 +316,8 @@ class MergeChunk : public Chunk {
 public:
   MergeChunk(uint32_t Alignment);
   static void addSection(SectionChunk *C);
-  void finalizeContents() override;
+  void finalizeContents();
+  void assignSubsectionRVAs();
 
   uint32_t getOutputCharacteristics() const override;
   StringRef getSectionName() const override { return ".rdata"; }
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index f629715fbd2ac..d673fc8de8527 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -865,9 +865,12 @@ void Writer::createSections() {
 }
 
 void Writer::createMiscChunks() {
-  for (MergeChunk *P : MergeChunk::Instances)
-    if (P)
+  for (MergeChunk *P : MergeChunk::Instances) {
+    if (P) {
+      P->finalizeContents();
       RdataSec->addChunk(P);
+    }
+  }
 
   // Create thunks for locally-dllimported symbols.
   if (!Symtab->LocalImportChunks.empty()) {
@@ -1162,7 +1165,6 @@ void Writer::assignAddresses() {
         VirtualSize += Padding;
       VirtualSize = alignTo(VirtualSize, C->getAlignment());
       C->setRVA(RVA + VirtualSize);
-      C->finalizeContents();
       VirtualSize += C->getSize();
       if (C->hasData())
         RawSize = alignTo(VirtualSize, SectorSize);
@@ -1177,6 +1179,11 @@ void Writer::assignAddresses() {
     FileSize += alignTo(RawSize, SectorSize);
   }
   SizeOfImage = alignTo(RVA, PageSize);
+
+  // Assign addresses to sections in MergeChunks.
+  for (MergeChunk *MC : MergeChunk::Instances)
+    if (MC)
+      MC->assignSubsectionRVAs();
 }
 
 template <typename PEHeaderTy> void Writer::writeHeader() {

From 3c9c9ea2c98e341a4f39df448f508321ff317ac0 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Fri, 24 May 2019 00:10:33 +0000
Subject: [PATCH 0108/1176] Update C++2a status for destroying delete

llvm-svn: 361574
---
 libcxx/www/cxx2a_status.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index 9b06a4e493951..99438fffa0615 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -91,7 +91,7 @@ <h3>Paper Status</h3>
 	<tr><td><a href="https://wg21.link/P0556R3">P0556R3</a></td><td>LWG</td><td>Integral power-of-2 operations</td><td>Rapperswil</td><td><i>In Progress</i></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0619R4">P0619R4</a></td><td>LWG</td><td>Reviewing Deprecated Facilities of C++17 for C++20</td><td>Rapperswil</td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0646R1">P0646R1</a></td><td>LWG</td><td>Improving the Return Value of Erase-Like Algorithms</td><td>Rapperswil</td><td></td><td></td></tr>
-	<tr><td><a href="https://wg21.link/P0722R3">P0722R3</a></td><td>CWG</td><td>Efficient sized delete for variable sized classes</td><td>Rapperswil</td><td></td><td></td></tr>
+	<tr><td><a href="https://wg21.link/P0722R3">P0722R3</a></td><td>CWG</td><td>Efficient sized delete for variable sized classes</td><td>Rapperswil</td><td>Complete</td><td>9.0</td></tr>
 	<tr><td><a href="https://wg21.link/P0758R1">P0758R1</a></td><td>LWG</td><td>Implicit conversion traits and utility functions</td><td>Rapperswil</td><td>Complete</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0759R1">P0759R1</a></td><td>LWG</td><td>fpos Requirements</td><td>Rapperswil</td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0769R2">P0769R2</a></td><td>LWG</td><td>Add shift to &lt;algorithm&gt;</td><td>Rapperswil</td><td></td><td></td></tr>

From 3e15f833819d6e18988c62e877637258e649808c Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 24 May 2019 00:11:23 +0000
Subject: [PATCH 0109/1176] [InstSimplify] add tests for insert-of-extract; NFC

llvm-svn: 361575
---
 .../Transforms/InstSimplify/insertelement.ll  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll
index c7db869d056d2..6e78ea78a1b98 100644
--- a/llvm/test/Transforms/InstSimplify/insertelement.ll
+++ b/llvm/test/Transforms/InstSimplify/insertelement.ll
@@ -49,3 +49,25 @@ define <4 x i32> @PR1286(<4 x i32> %A) {
   %B = insertelement <4 x i32> %A, i32 undef, i32 1
   ret <4 x i32> %B
 }
+
+define <8 x i8> @extract_insert_same_vec_and_index(<8 x i8> %in) {
+; CHECK-LABEL: @extract_insert_same_vec_and_index(
+; CHECK-NEXT:    [[VAL:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 5
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i8> [[IN]], i8 [[VAL]], i32 5
+; CHECK-NEXT:    ret <8 x i8> [[VEC]]
+;
+  %val = extractelement <8 x i8> %in, i32 5
+  %vec = insertelement <8 x i8> %in, i8 %val, i32 5
+  ret <8 x i8> %vec
+}
+
+define <8 x i8> @extract_insert_same_vec_and_index2(<8 x i8> %in, i32 %index) {
+; CHECK-LABEL: @extract_insert_same_vec_and_index2(
+; CHECK-NEXT:    [[VAL:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 [[INDEX:%.*]]
+; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i8> [[IN]], i8 [[VAL]], i32 [[INDEX]]
+; CHECK-NEXT:    ret <8 x i8> [[VEC]]
+;
+  %val = extractelement <8 x i8> %in, i32 %index
+  %vec = insertelement <8 x i8> %in, i8 %val, i32 %index
+  ret <8 x i8> %vec
+}

From 8869a98e82552ef698112df840575693780802a4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 24 May 2019 00:13:58 +0000
Subject: [PATCH 0110/1176] [InstSimplify] fold insertelement-of-extractelement

This was partly handled in InstCombine (only the constant
index case), so delete that and zap it more generally in
InstSimplify.

llvm-svn: 361576
---
 llvm/lib/Analysis/InstructionSimplify.cpp                | 6 ++++++
 llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 5 -----
 llvm/test/Transforms/InstSimplify/insertelement.ll       | 8 ++------
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 6e421dcaa737f..1f8245d30f6ff 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4016,6 +4016,12 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
   if (isa<UndefValue>(Val))
     return Vec;
 
+  // If we are extracting a value from a vector, then inserting it into the same
+  // place, that's the input vector:
+  // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec
+  if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx))))
+    return Vec;
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c3fd612a9d007..308569395a9ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -884,11 +884,6 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
                                        m_ConstantInt(ExtractedIdx)))) {
-    // If we are extracting a value from a vector, then inserting it right
-    // back into the same place, just use the input vector.
-    if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
-      return replaceInstUsesWith(IE, VecOp);
-
     // TODO: Looking at the user(s) to determine if this insert is a
     // fold-to-shuffle opportunity does not match the usual instcombine
     // constraints. We should decide if the transform is worthy based only
diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll
index 6e78ea78a1b98..e487eeb96b06c 100644
--- a/llvm/test/Transforms/InstSimplify/insertelement.ll
+++ b/llvm/test/Transforms/InstSimplify/insertelement.ll
@@ -52,9 +52,7 @@ define <4 x i32> @PR1286(<4 x i32> %A) {
 
 define <8 x i8> @extract_insert_same_vec_and_index(<8 x i8> %in) {
 ; CHECK-LABEL: @extract_insert_same_vec_and_index(
-; CHECK-NEXT:    [[VAL:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 5
-; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i8> [[IN]], i8 [[VAL]], i32 5
-; CHECK-NEXT:    ret <8 x i8> [[VEC]]
+; CHECK-NEXT:    ret <8 x i8> [[IN:%.*]]
 ;
   %val = extractelement <8 x i8> %in, i32 5
   %vec = insertelement <8 x i8> %in, i8 %val, i32 5
@@ -63,9 +61,7 @@ define <8 x i8> @extract_insert_same_vec_and_index(<8 x i8> %in) {
 
 define <8 x i8> @extract_insert_same_vec_and_index2(<8 x i8> %in, i32 %index) {
 ; CHECK-LABEL: @extract_insert_same_vec_and_index2(
-; CHECK-NEXT:    [[VAL:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 [[INDEX:%.*]]
-; CHECK-NEXT:    [[VEC:%.*]] = insertelement <8 x i8> [[IN]], i8 [[VAL]], i32 [[INDEX]]
-; CHECK-NEXT:    ret <8 x i8> [[VEC]]
+; CHECK-NEXT:    ret <8 x i8> [[IN:%.*]]
 ;
   %val = extractelement <8 x i8> %in, i32 %index
   %vec = insertelement <8 x i8> %in, i8 %val, i32 %index

From 55229f6b10276526d76f7f6b900053e3c82b5bf7 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Fri, 24 May 2019 00:15:04 +0000
Subject: [PATCH 0111/1176] [WebAssembly] Expand more SIMD float ops

Summary: These were previously causing ISel failures.

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62354

llvm-svn: 361577
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |   3 +-
 llvm/test/CodeGen/WebAssembly/libcalls.ll     |  17 +-
 .../CodeGen/WebAssembly/simd-unsupported.ll   | 180 +++++++++++++++++-
 3 files changed, 195 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 06aee6e80a04f..65db1ebf50fca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -182,7 +182,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 
     // Expand float operations supported for scalars but not SIMD
     for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
-                    ISD::FCOPYSIGN}) {
+                    ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+                    ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
       setOperationAction(Op, MVT::v4f32, Expand);
       if (Subtarget->hasUnimplementedSIMD128())
         setOperationAction(Op, MVT::v2f64, Expand);
diff --git a/llvm/test/CodeGen/WebAssembly/libcalls.ll b/llvm/test/CodeGen/WebAssembly/libcalls.ll
index 3849f1978a23f..bccb438f3c088 100644
--- a/llvm/test/CodeGen/WebAssembly/libcalls.ll
+++ b/llvm/test/CodeGen/WebAssembly/libcalls.ll
@@ -13,6 +13,11 @@ declare fp128 @llvm.pow.f128(fp128, fp128)
 
 declare double @llvm.cos.f64(double)
 declare double @llvm.log10.f64(double)
+declare double @llvm.pow.f64(double, double)
+declare double @llvm.log.f64(double)
+declare double @llvm.exp.f64(double)
+declare i32 @llvm.lround(double)
+
 
 
 ; CHECK-LABEL: fp128libcalls:
@@ -51,12 +56,20 @@ define i128 @i128libcalls(i128 %x, i128 %y) {
 }
 
 ; CHECK-LABEL: f64libcalls:
-define double @f64libcalls(double %x, double %y) {
+define i32 @f64libcalls(double %x, double %y) {
  ; CHECK: f64.call $push{{[0-9]}}=, cos
  %a = call double @llvm.cos.f64(double %x)
  ; CHECK: f64.call $push{{[0-9]}}=, log10
  %b = call double @llvm.log10.f64(double %a)
- ret double %b
+ ; CHECK: f64.call $push{{[0-9]}}=, pow
+ %c = call double @llvm.pow.f64(double %b, double %y)
+ ; CHECK: f64.call $push{{[0-9]}}=, log
+ %d = call double @llvm.log.f64(double %c)
+ ; CHECK: f64.call $push{{[0-9]}}=, exp
+ %e = call double @llvm.exp.f64(double %d)
+ ; CHECK: i32.call $push{{[0-9]}}=, lround
+ %f = call i32 @llvm.lround(double %e)
+ ret i32 %f
 }
 
 ; fcmp ord and unord (RTLIB::O_F32 / RTLIB::UO_F32 etc) are a special case (see
diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
index 97da90d8bb489..b539c885c087a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s
 
-; Test that operations that are supported by MVP but not SIMD are
-; properly unrolled.
+; Test that operations that are not supported by SIMD are properly
+; unrolled.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
@@ -405,6 +405,94 @@ define <4 x float> @copysign_v4f32(<4 x float> %x, <4 x float> %y) {
   ret <4 x float> %v
 }
 
+; CHECK-LABEL: sin_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, sinf
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
+define <4 x float> @sin_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: cos_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, cosf
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
+define <4 x float> @cos_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.cos.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: powi_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, __powisf2
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32)
+define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) {
+  %v = call <4 x float> @llvm.powi.v4f32(<4 x float> %x, i32 %y)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: pow_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, powf
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @pow_v4f32(<4 x float> %x, <4 x float> %y) {
+  %v = call <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: log_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, logf
+declare <4 x float> @llvm.log.v4f32(<4 x float>)
+define <4 x float> @log_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.log.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: log2_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, log2f
+declare <4 x float> @llvm.log2.v4f32(<4 x float>)
+define <4 x float> @log2_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.log2.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: log10_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, log10f
+declare <4 x float> @llvm.log10.v4f32(<4 x float>)
+define <4 x float> @log10_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.log10.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: exp_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, expf
+declare <4 x float> @llvm.exp.v4f32(<4 x float>)
+define <4 x float> @exp_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.exp.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: exp2_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, exp2f
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
+define <4 x float> @exp2_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.exp2.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: rint_v4f32:
+; CHECK: f32.nearest
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
+define <4 x float> @rint_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.rint.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: round_v4f32:
+; CHECK: f32.call $push[[L:[0-9]+]]=, roundf
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
+define <4 x float> @round_v4f32(<4 x float> %x) {
+  %v = call <4 x float> @llvm.round.v4f32(<4 x float> %x)
+  ret <4 x float> %v
+}
+
 ; ==============================================================================
 ; 2 x f64
 ; ==============================================================================
@@ -448,3 +536,91 @@ define <2 x double> @copysign_v2f64(<2 x double> %x, <2 x double> %y) {
   %v = call <2 x double> @llvm.copysign.v2f64(<2 x double> %x, <2 x double> %y)
   ret <2 x double> %v
 }
+
+; CHECK-LABEL: sin_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, sin
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+define <2 x double> @sin_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.sin.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: cos_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, cos
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+define <2 x double> @cos_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.cos.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: powi_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, __powidf2
+declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32)
+define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) {
+  %v = call <2 x double> @llvm.powi.v2f64(<2 x double> %x, i32 %y)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: pow_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, pow
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @pow_v2f64(<2 x double> %x, <2 x double> %y) {
+  %v = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: log_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, log
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+define <2 x double> @log_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.log.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: log2_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, log2
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+define <2 x double> @log2_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.log2.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: log10_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, log10
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+define <2 x double> @log10_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.log10.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: exp_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, exp
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+define <2 x double> @exp_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.exp.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: exp2_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, exp2
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+define <2 x double> @exp2_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.exp2.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: rint_v2f64:
+; CHECK: f64.nearest
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+define <2 x double> @rint_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.rint.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: round_v2f64:
+; CHECK: f64.call $push[[L:[0-9]+]]=, round
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+define <2 x double> @round_v2f64(<2 x double> %x) {
+  %v = call <2 x double> @llvm.round.v2f64(<2 x double> %x)
+  ret <2 x double> %v
+}

From ab09cca310e8de1b79766f93b3b83897eabb52dd Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Fri, 24 May 2019 00:21:46 +0000
Subject: [PATCH 0112/1176] llvm-objcopy: Change sectionWithinSegment() to use
 virtual addresses instead of file offsets for SHT_NOBITS sections.

Without this, sectionWithinSegment() will return the wrong answer for bss
sections. This doesn't seem to matter now (for non-broken ELF files), but
it will matter with a change that I'm working on.

Differential Revision: https://reviews.llvm.org/D58426

llvm-svn: 361578
---
 llvm/tools/llvm-objcopy/ELF/Object.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 0c80bad6c102d..85e7ffa6d8ecb 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -809,6 +809,20 @@ static bool sectionWithinSegment(const SectionBase &Section,
   // segments and ensures that the section "belongs" to the second segment and
   // not the first.
   uint64_t SecSize = Section.Size ? Section.Size : 1;
+
+  if (Section.Type == SHT_NOBITS) {
+    if (!(Section.Flags & SHF_ALLOC))
+      return false;
+
+    bool SectionIsTLS = Section.Flags & SHF_TLS;
+    bool SegmentIsTLS = Segment.Type == PT_TLS;
+    if (SectionIsTLS != SegmentIsTLS)
+      return false;
+
+    return Segment.VAddr <= Section.Addr &&
+           Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize;
+  }
+
   return Segment.Offset <= Section.OriginalOffset &&
          Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize;
 }

From 060f4b48d55c761de2722c0b7387966d1e8ede0c Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Fri, 24 May 2019 00:43:52 +0000
Subject: [PATCH 0113/1176] [libFuzzer] when using data-flow-trace (DFT) only
 load the DFT for the files present in the corpus

llvm-svn: 361579
---
 compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp | 16 ++++++++++------
 compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h   |  4 +++-
 compiler-rt/lib/fuzzer/FuzzerLoop.cpp          |  7 ++++---
 compiler-rt/test/fuzzer/dataflow.test          |  2 +-
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
index c67238ec28e99..1fba3913c96a2 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp
@@ -100,6 +100,7 @@ void DataFlowTrace::ReadCoverage(const std::string &DirPath) {
   for (auto &SF : Files) {
     auto Name = Basename(SF.File);
     if (Name == kFunctionsTxt) continue;
+    if (!CorporaHashes.count(Name)) continue;
     std::ifstream IF(SF.File);
     Coverage.AppendCoverage(IF);
   }
@@ -154,9 +155,8 @@ static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum,
   return true;
 }
 
-bool DataFlowTrace::Init(const std::string &DirPath,
-                         std::string *FocusFunction,
-                         Random &Rand) {
+bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
+                         Vector<SizedFile> &CorporaFiles, Random &Rand) {
   if (DirPath.empty()) return false;
   Printf("INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
   Vector<SizedFile> Files;
@@ -165,6 +165,10 @@ bool DataFlowTrace::Init(const std::string &DirPath,
   size_t FocusFuncIdx = SIZE_MAX;
   Vector<std::string> FunctionNames;
 
+  // Collect the hashes of the corpus files.
+  for (auto &SF : CorporaFiles)
+    CorporaHashes.insert(Hash(FileToVector(SF.File)));
+
   // Read functions.txt
   std::ifstream IF(DirPlusFile(DirPath, kFunctionsTxt));
   size_t NumFunctions = 0;
@@ -211,6 +215,7 @@ bool DataFlowTrace::Init(const std::string &DirPath,
   for (auto &SF : Files) {
     auto Name = Basename(SF.File);
     if (Name == kFunctionsTxt) continue;
+    if (!CorporaHashes.count(Name)) continue;  // not in the corpus.
     NumTraceFiles++;
     // Printf("=== %s\n", Name.c_str());
     std::ifstream IF(SF.File);
@@ -231,11 +236,10 @@ bool DataFlowTrace::Init(const std::string &DirPath,
       }
     }
   }
-  assert(NumTraceFiles == Files.size() - 1);
   Printf("INFO: DataFlowTrace: %zd trace files, %zd functions, "
          "%zd traces with focus function\n",
          NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
-  return true;
+  return NumTraceFiles > 0;
 }
 
 int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
@@ -311,7 +315,7 @@ int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
   }
   RemoveFile(Temp);
   // Write functions.txt if it's currently empty or doesn't exist.
-  auto FunctionsTxtPath = DirPlusFile(DirPath, "functions.txt");
+  auto FunctionsTxtPath = DirPlusFile(DirPath, kFunctionsTxt);
   if (FileToString(FunctionsTxtPath).empty()) {
     Command Cmd;
     Cmd.addArgument(DFTBinary);
diff --git a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
index cfb04ad3ad394..022e8543fd865 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.h
@@ -32,6 +32,7 @@
 #include "FuzzerIO.h"
 
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 #include <string>
 
@@ -112,7 +113,7 @@ class DataFlowTrace {
  public:
   void ReadCoverage(const std::string &DirPath);
   bool Init(const std::string &DirPath, std::string *FocusFunction,
-            Random &Rand);
+            Vector<SizedFile> &CorporaFiles, Random &Rand);
   void Clear() { Traces.clear(); }
   const Vector<uint8_t> *Get(const std::string &InputSha1) const {
     auto It = Traces.find(InputSha1);
@@ -125,6 +126,7 @@ class DataFlowTrace {
   // Input's sha1 => DFT for the FocusFunction.
   std::unordered_map<std::string, Vector<uint8_t> > Traces;
   BlockCoverage Coverage;
+  std::unordered_set<std::string> CorporaHashes;
 };
 }  // namespace fuzzer
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index d7adc90c9961a..7081daa899066 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -157,9 +157,6 @@ Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
   AllocateCurrentUnitData();
   CurrentUnitSize = 0;
   memset(BaseSha1, 0, sizeof(BaseSha1));
-  auto FocusFunctionOrAuto = Options.FocusFunction;
-  DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto , MD.GetRand());
-  TPC.SetFocusFunction(FocusFunctionOrAuto);
 }
 
 Fuzzer::~Fuzzer() {}
@@ -789,6 +786,10 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
 }
 
 void Fuzzer::Loop(Vector<SizedFile> &CorporaFiles) {
+  auto FocusFunctionOrAuto = Options.FocusFunction;
+  DFT.Init(Options.DataFlowTrace, &FocusFunctionOrAuto, CorporaFiles,
+           MD.GetRand());
+  TPC.SetFocusFunction(FocusFunctionOrAuto);
   ReadAndExecuteSeedCorpora(CorporaFiles);
   DFT.Clear();  // No need for DFT any more.
   TPC.SetPrintNewPCs(Options.PrintNewCovPcs);
diff --git a/compiler-rt/test/fuzzer/dataflow.test b/compiler-rt/test/fuzzer/dataflow.test
index bc42c7d1dc5f8..9446fe4023f43 100644
--- a/compiler-rt/test/fuzzer/dataflow.test
+++ b/compiler-rt/test/fuzzer/dataflow.test
@@ -92,7 +92,7 @@ RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flo
 # Test that we can run collect_data_flow on the entire corpus dir
 RUN: rm -rf %t/OUT
 RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN
-RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
+RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 %t/IN 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE
 
 
 USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT

From 09ad8c8f73dbc64efada18e7ae2f9b64eb1be215 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 24 May 2019 00:44:33 +0000
Subject: [PATCH 0114/1176] Fix integer literals which are cast to bool

This change replaces built-in types that are implicitly converted to
booleans.

Differential revision: https://reviews.llvm.org/D62284

llvm-svn: 361580
---
 .../source/Commands/CommandObjectPlatform.cpp |  2 +-
 lldb/source/Commands/CommandObjectTarget.cpp  |  2 +-
 lldb/source/Commands/CommandObjectThread.cpp  |  2 +-
 lldb/source/Core/Address.cpp                  |  2 +-
 lldb/source/Host/macosx/objcxx/Host.mm        |  2 +-
 lldb/source/Interpreter/Options.cpp           |  6 ++--
 .../ABI/SysV-mips64/ABISysV_mips64.cpp        | 30 +++++++++----------
 .../Clang/ASTResultSynthesizer.cpp            |  2 +-
 .../ExpressionParser/Clang/ClangASTSource.cpp | 30 ++++++++++---------
 .../Clang/ClangExpressionDeclMap.cpp          |  4 +--
 .../ARM64/EmulateInstructionARM64.cpp         |  4 +--
 .../AppleObjCClassDescriptorV2.cpp            |  4 +--
 .../AppleObjCRuntime/AppleObjCDeclVendor.cpp  |  6 ++--
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     |  2 +-
 .../MacOSX-Kernel/CommunicationKDP.cpp        |  2 +-
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |  4 +--
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  2 +-
 lldb/source/Symbol/ClangASTContext.cpp        |  6 ++--
 lldb/source/Symbol/ClangASTImporter.cpp       |  2 +-
 lldb/source/Symbol/CompilerType.cpp           |  2 +-
 lldb/source/Symbol/Function.cpp               |  2 +-
 lldb/source/Symbol/SymbolContext.cpp          |  2 +-
 lldb/source/Target/Process.cpp                |  2 +-
 lldb/source/Target/Target.cpp                 |  2 +-
 lldb/source/Target/Thread.cpp                 |  8 ++---
 lldb/source/Utility/JSON.cpp                  |  6 ++--
 lldb/source/Utility/SelectHelper.cpp          |  2 +-
 lldb/source/Utility/StructuredData.cpp        |  4 +--
 lldb/tools/debugserver/source/DNB.cpp         |  4 +--
 lldb/tools/debugserver/source/JSON.cpp        |  6 ++--
 .../source/MacOSX/MachThreadList.cpp          |  2 +-
 lldb/tools/debugserver/source/RNBRemote.cpp   |  6 ++--
 lldb/tools/debugserver/source/debugserver.cpp |  2 +-
 .../debugserver/source/libdebugserver.cpp     |  2 +-
 34 files changed, 84 insertions(+), 82 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp
index 0007ae66a85d9..53549cdeee326 100644
--- a/lldb/source/Commands/CommandObjectPlatform.cpp
+++ b/lldb/source/Commands/CommandObjectPlatform.cpp
@@ -235,7 +235,7 @@ class CommandObjectPlatformList : public CommandObjectParsed {
                  host_platform_sp->GetDescription());
 
     uint32_t idx;
-    for (idx = 0; 1; ++idx) {
+    for (idx = 0; true; ++idx) {
       const char *plugin_name =
           PluginManager::GetPlatformPluginNameAtIndex(idx);
       if (plugin_name == nullptr)
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 53e7347d4f65f..c1f5ff0d283ca 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -3740,7 +3740,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed {
         break;
 
       case 'v':
-        m_verbose = 1;
+        m_verbose = true;
         break;
 
       case 'A':
diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp
index 519a2865fe9d0..3c6088d6e192e 100644
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -482,7 +482,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed {
 
       case 'e':
         if (option_arg == "block") {
-          m_end_line_is_block_end = 1;
+          m_end_line_is_block_end = true;
           break;
         }
         if (option_arg.getAsInteger(0, m_end_line))
diff --git a/lldb/source/Core/Address.cpp b/lldb/source/Core/Address.cpp
index 9520e43697e8c..0da83eb98edb1 100644
--- a/lldb/source/Core/Address.cpp
+++ b/lldb/source/Core/Address.cpp
@@ -161,7 +161,7 @@ static bool ReadAddress(ExecutionContextScope *exe_scope,
 static bool DumpUInt(ExecutionContextScope *exe_scope, const Address &address,
                      uint32_t byte_size, Stream *strm) {
   if (exe_scope == nullptr || byte_size == 0)
-    return 0;
+    return false;
   std::vector<uint8_t> buf(byte_size, 0);
 
   if (ReadBytes(exe_scope, address, &buf[0], buf.size()) == buf.size()) {
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 4994c8139780b..99b5601a9b3d1 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -547,7 +547,7 @@ DataExtractor data(arg_data.GetBytes(), arg_data_size,
                 match_info_ptr->GetNameMatchType(),
                 match_info_ptr->GetProcessInfo().GetName())) {
           // Skip NULLs
-          while (1) {
+          while (true) {
             const uint8_t *p = data.PeekData(offset, 1);
             if ((p == NULL) || (*p != '\0'))
               break;
diff --git a/lldb/source/Interpreter/Options.cpp b/lldb/source/Interpreter/Options.cpp
index 4ee9c4d7f5431..814998ec68fc3 100644
--- a/lldb/source/Interpreter/Options.cpp
+++ b/lldb/source/Interpreter/Options.cpp
@@ -1007,7 +1007,7 @@ llvm::Expected<Args> Options::ParseAlias(const Args &args,
   std::unique_lock<std::mutex> lock;
   OptionParser::Prepare(lock);
   int val;
-  while (1) {
+  while (true) {
     int long_options_index = -1;
     val = OptionParser::Parse(argv.size(), &*argv.begin(), sstr.GetString(),
                               long_options, &long_options_index);
@@ -1160,7 +1160,7 @@ OptionElementVector Options::ParseForCompletion(const Args &args,
   bool failed_once = false;
   uint32_t dash_dash_pos = -1;
 
-  while (1) {
+  while (true) {
     bool missing_argument = false;
     int long_options_index = -1;
 
@@ -1358,7 +1358,7 @@ llvm::Expected<Args> Options::Parse(const Args &args,
   std::unique_lock<std::mutex> lock;
   OptionParser::Prepare(lock);
   int val;
-  while (1) {
+  while (true) {
     int long_options_index = -1;
     val = OptionParser::Parse(argv.size(), &*argv.begin(), sstr.GetString(),
                               long_options, &long_options_index);
diff --git a/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp b/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
index 8856c54d3c928..18011cfb6b9ea 100644
--- a/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
+++ b/lldb/source/Plugins/ABI/SysV-mips64/ABISysV_mips64.cpp
@@ -917,15 +917,15 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
       uint32_t integer_bytes = 0;
 
       // True if return values are in FP return registers.
-      bool use_fp_regs = 0;
+      bool use_fp_regs = false;
       // True if we found any non floating point field in structure.
-      bool found_non_fp_field = 0;
+      bool found_non_fp_field = false;
       // True if return values are in r2 register.
-      bool use_r2 = 0;
+      bool use_r2 = false;
       // True if return values are in r3 register.
-      bool use_r3 = 0;
+      bool use_r3 = false;
       // True if the result is copied into our data buffer
-      bool sucess = 0;
+      bool sucess = false;
       std::string name;
       bool is_complex;
       uint32_t count;
@@ -943,9 +943,9 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
                                                    nullptr, nullptr);
 
           if (field_compiler_type.IsFloatingPointType(count, is_complex))
-            use_fp_regs = 1;
+            use_fp_regs = true;
           else
-            found_non_fp_field = 1;
+            found_non_fp_field = true;
         }
 
         if (use_fp_regs && !found_non_fp_field) {
@@ -1059,20 +1059,20 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
               // structure
               integer_bytes = integer_bytes + *field_byte_width +
                               padding; // Increase the consumed bytes.
-              use_r2 = 1;
+              use_r2 = true;
             } else {
               // There isn't enough space left in r2 for this field, so this
               // will be in r3.
               integer_bytes = integer_bytes + *field_byte_width +
                               padding; // Increase the consumed bytes.
-              use_r3 = 1;
+              use_r3 = true;
             }
           }
           // We already have consumed at-least 8 bytes that means r2 is done,
           // and this field will be in r3. Check if this field can fit in r3.
           else if (integer_bytes + *field_byte_width + padding <= 16) {
             integer_bytes = integer_bytes + *field_byte_width + padding;
-            use_r3 = 1;
+            use_r3 = true;
           } else {
             // There isn't any space left for this field, this should not
             // happen as we have already checked the overall size is not
@@ -1085,10 +1085,10 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
       // Vector types up to 16 bytes are returned in GP return registers
       if (type_flags & eTypeIsVector) {
         if (*byte_size <= 8)
-          use_r2 = 1;
+          use_r2 = true;
         else {
-          use_r2 = 1;
-          use_r3 = 1;
+          use_r2 = true;
+          use_r3 = true;
         }
       }
 
@@ -1100,7 +1100,7 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
             error);
         if (bytes_copied != r2_info->byte_size)
           return return_valobj_sp;
-        sucess = 1;
+        sucess = true;
       }
       if (use_r3) {
         reg_ctx->ReadRegister(r3_info, r3_value);
@@ -1110,7 +1110,7 @@ ValueObjectSP ABISysV_mips64::GetReturnValueObjectImpl(
 
         if (bytes_copied != r3_info->byte_size)
           return return_valobj_sp;
-        sucess = 1;
+        sucess = true;
       }
       if (sucess) {
         // The result is in our data buffer.  Create a variable object out of
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
index 5dc39e8dac4d0..526ef90782ef3 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp
@@ -239,7 +239,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body,
       break;
 
     last_expr = implicit_cast->getSubExpr();
-  } while (0);
+  } while (false);
 
   // is_lvalue is used to record whether the expression returns an assignable
   // Lvalue or an Rvalue.  This is relevant because they are handled
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index 632594f1c460a..e9dd73c5fa640 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -101,7 +101,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context,
         break;
 
       sources.push_back(runtime_decl_vendor->GetImporterSource());
-    } while (0);
+    } while (false);
 
     do {
       DeclVendor *modules_decl_vendor =
@@ -111,7 +111,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context,
         break;
 
       sources.push_back(modules_decl_vendor->GetImporterSource());
-    } while (0);
+    } while (false);
 
     if (!is_shared_context) {
       // Update the scratch AST context's merger to reflect any new sources we
@@ -125,7 +125,9 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context,
       sources.push_back({*scratch_ast_context->getASTContext(),
                          *scratch_ast_context->getFileManager(),
                          scratch_ast_context->GetOriginMap()});
-    } while (0);
+    }
+    while (false)
+      ;
 
     m_merger_up =
         llvm::make_unique<clang::ExternalASTMerger>(target, sources);
@@ -934,7 +936,7 @@ void ClangASTSource::FindExternalVisibleDecls(
             context.m_found.type = true;
           }
         }
-      } while (0);
+      } while (false);
     }
 
     if (!context.m_found.type) {
@@ -985,10 +987,10 @@ void ClangASTSource::FindExternalVisibleDecls(
         }
 
         context.AddNamedDecl(copied_named_decl);
-      } while (0);
+      } while (false);
     }
 
-  } while (0);
+  } while (false);
 }
 
 template <class D> class TaggedASTDecl {
@@ -1173,7 +1175,7 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
     if (FindObjCMethodDeclsWithOrigin(current_id, context,
                                       original_interface_decl, "at origin"))
       return; // found it, no need to look any further
-  } while (0);
+  } while (false);
 
   StreamString ss;
 
@@ -1278,7 +1280,7 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
       if (*cursor == ' ' || *cursor == '(')
         sc_list.Append(candidate_sc);
     }
-  } while (0);
+  } while (false);
 
   if (sc_list.GetSize()) {
     // We found a good function symbol.  Use that.
@@ -1361,7 +1363,7 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
                                   "in debug info");
 
     return;
-  } while (0);
+  } while (false);
 
   do {
     // Check the modules only if the debug information didn't have a complete
@@ -1388,7 +1390,7 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
               current_id, context, interface_decl_from_modules, "in modules"))
         return;
     }
-  } while (0);
+  } while (false);
 
   do {
     // Check the runtime only if the debug information didn't have a complete
@@ -1425,7 +1427,7 @@ void ClangASTSource::FindObjCMethodDecls(NameSearchContext &context) {
 
     FindObjCMethodDeclsWithOrigin(current_id, context, runtime_interface_decl,
                                   "in runtime");
-  } while (0);
+  } while (false);
 }
 
 static bool FindObjCPropertyAndIvarDeclsWithOrigin(
@@ -1544,7 +1546,7 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) {
                                            complete_iface_decl);
 
     return;
-  } while (0);
+  } while (false);
 
   do {
     // Check the modules only if the debug information didn't have a complete
@@ -1580,7 +1582,7 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) {
     if (FindObjCPropertyAndIvarDeclsWithOrigin(current_id, context, *this,
                                                interface_decl_from_modules))
       return;
-  } while (0);
+  } while (false);
 
   do {
     // Check the runtime only if the debug information didn't have a complete
@@ -1625,7 +1627,7 @@ void ClangASTSource::FindObjCPropertyAndIvarDecls(NameSearchContext &context) {
     if (FindObjCPropertyAndIvarDeclsWithOrigin(
             current_id, context, *this, interface_decl_from_runtime))
       return;
-  } while (0);
+  } while (false);
 }
 
 typedef llvm::DenseMap<const FieldDecl *, uint64_t> FieldOffsetMap;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index 496d5b40e3e79..c2ebfe9ce4e2e 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -927,7 +927,7 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
                     name.GetCString());
 
       context.AddNamedDecl(parser_named_decl);
-    } while (0);
+    } while (false);
   }
 
   if (name.GetCString()[0] == '$' && !namespace_decl) {
@@ -1562,7 +1562,7 @@ void ClangExpressionDeclMap::FindExternalVisibleDecls(
             context.m_found.variable = true;
           }
         }
-      } while (0);
+      } while (false);
     }
 
     if (target && !context.m_found.variable && !namespace_decl) {
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
index d835d62ad2e07..d7e8e04913426 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
@@ -657,10 +657,10 @@ bool EmulateInstructionARM64::EmulateADDSUBImm(const uint32_t opcode) {
 
   if (sub_op) {
     operand2 = NOT(operand2);
-    carry_in = 1;
+    carry_in = true;
     imm = -imm; // For the Register plug offset context below
   } else {
-    carry_in = 0;
+    carry_in = false;
   }
 
   ProcState proc_state;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
index 0e26de569e888..93aa07f89165e 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.cpp
@@ -334,9 +334,9 @@ bool ClassDescriptorV2::Describe(
   std::unique_ptr<class_rw_t> class_rw;
 
   if (!Read_objc_class(process, objc_class))
-    return 0;
+    return false;
   if (!Read_class_row(process, *objc_class, class_ro, class_rw))
-    return 0;
+    return false;
 
   static ConstString NSObject_name("NSObject");
 
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
index b5cac92213b29..501114ad02810 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
@@ -62,7 +62,7 @@ class lldb_private::AppleObjCExternalASTSource
           non_const_interface_decl->lookup(name);
 
       return (result.size() != 0);
-    } while (0);
+    } while (false);
 
     SetNoExternalVisibleDeclsForName(decl_ctx, name);
     return false;
@@ -208,7 +208,7 @@ class ObjCRuntimeMethodType {
 
     uint32_t stepsLeft = 256;
 
-    while (1) {
+    while (true) {
       if (--stepsLeft == 0) {
         m_is_valid = false;
         return;
@@ -647,7 +647,7 @@ AppleObjCDeclVendor::FindDecls(ConstString name, bool append,
     decls.push_back(iface_decl);
     ret++;
     break;
-  } while (0);
+  } while (false);
 
   return ret;
 }
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 62991dc2095e8..42c14aa9f50cb 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -5038,7 +5038,7 @@ ObjectFileMachO::GetArchitecture(const llvm::MachO::mach_header &header,
               triple.setEnvironmentName(os_env.environment);
             return arch;
           }
-        } while (0);
+        } while (false);
         offset = cmd_offset + load_cmd.cmdsize;
       }
 
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/CommunicationKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/CommunicationKDP.cpp
index ded0983fac794..1a75326f3b208 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/CommunicationKDP.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/CommunicationKDP.cpp
@@ -87,7 +87,7 @@ bool CommunicationKDP::SendRequestAndGetReply(
   for (uint32_t i = 0; i < num_retries; ++i) {
     if (SendRequestPacketNoLock(request_packet)) {
       const uint8_t request_sequence_id = (uint8_t)request_packet.GetData()[1];
-      while (1) {
+      while (true) {
         if (WaitForPacketWithTimeoutMicroSecondsNoLock(
                 reply_packet,
                 std::chrono::microseconds(GetPacketTimeout()).count())) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index def7eb2e1eb28..b3c9367861506 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2664,7 +2664,7 @@ bool DWARFASTParserClang::ParseChildMembers(
     DelayedPropertyList &delayed_properties, AccessType &default_accessibility,
     bool &is_a_class, ClangASTImporter::LayoutInfo &layout_info) {
   if (!parent_die)
-    return 0;
+    return false;
 
   // Get the parent byte size so we can verify any members will fit
   const uint64_t parent_byte_size =
@@ -2679,7 +2679,7 @@ bool DWARFASTParserClang::ParseChildMembers(
   ClangASTContext *ast =
       llvm::dyn_cast_or_null<ClangASTContext>(class_clang_type.GetTypeSystem());
   if (ast == nullptr)
-    return 0;
+    return false;
 
   for (DWARFDIE die = parent_die.GetFirstChild(); die.IsValid();
        die = die.GetSibling()) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index c692e8bf18d08..2871017baeccc 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -2104,7 +2104,7 @@ bool SymbolFileDWARF::ResolveFunction(const DWARFDIE &orig_die,
   if (die.Tag() == DW_TAG_inlined_subroutine) {
     inlined_die = die;
 
-    while (1) {
+    while (true) {
       die = die.GetParent();
 
       if (die) {
diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp
index 62321d135646e..55befb4bbcf3f 100644
--- a/lldb/source/Symbol/ClangASTContext.cpp
+++ b/lldb/source/Symbol/ClangASTContext.cpp
@@ -2457,7 +2457,7 @@ bool ClangASTContext::DeclsAreEquivalent(clang::Decl *lhs_decl,
       clang::DeclContext *lhs_decl_ctx = lhs_decl->getDeclContext();
       clang::DeclContext *rhs_decl_ctx = rhs_decl->getDeclContext();
       if (lhs_decl_ctx && rhs_decl_ctx) {
-        while (1) {
+        while (true) {
           if (lhs_decl_ctx && rhs_decl_ctx) {
             const clang::Decl::Kind lhs_decl_ctx_kind =
                 lhs_decl_ctx->getDeclKind();
@@ -2495,7 +2495,7 @@ bool ClangASTContext::DeclsAreEquivalent(clang::Decl *lhs_decl,
         // make sure the names match as well
         lhs_decl_ctx = lhs_decl->getDeclContext();
         rhs_decl_ctx = rhs_decl->getDeclContext();
-        while (1) {
+        while (true) {
           switch (lhs_decl_ctx->getDeclKind()) {
           case clang::Decl::TranslationUnit:
             // We don't care about the translation unit names
@@ -9629,7 +9629,7 @@ bool ClangASTContext::DumpTypeValue(
       break;
     }
   }
-  return 0;
+  return false;
 }
 
 void ClangASTContext::DumpSummary(lldb::opaque_compiler_type_t type,
diff --git a/lldb/source/Symbol/ClangASTImporter.cpp b/lldb/source/Symbol/ClangASTImporter.cpp
index 3a9a8f3c4cd83..32d0c47693b0e 100644
--- a/lldb/source/Symbol/ClangASTImporter.cpp
+++ b/lldb/source/Symbol/ClangASTImporter.cpp
@@ -1018,7 +1018,7 @@ void ClangASTImporter::ASTImporterDelegate::ImportDefinitionTo(
 
       to_objc_interface->setSuperClass(m_source_ctx->getTrivialTypeSourceInfo(
           m_source_ctx->getObjCInterfaceType(imported_from_superclass)));
-    } while (0);
+    } while (false);
   }
 }
 
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index 98061d2d33439..bb9a1a642e422 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -144,7 +144,7 @@ bool CompilerType::IsBlockPointerType(
     CompilerType *function_pointer_type_ptr) const {
   if (IsValid())
     return m_type_system->IsBlockPointerType(m_type, function_pointer_type_ptr);
-  return 0;
+  return false;
 }
 
 bool CompilerType::IsIntegerType(bool &is_signed) const {
diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index 7b3ea5b27102d..0538a9e351cb9 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -546,7 +546,7 @@ uint32_t Function::GetPrologueByteSize() {
 
         // Now calculate the offset to pass the subsequent line 0 entries.
         uint32_t first_non_zero_line = prologue_end_line_idx;
-        while (1) {
+        while (true) {
           LineEntry line_entry;
           if (line_table->GetLineEntryAtIndex(first_non_zero_line,
                                               line_entry)) {
diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp
index 1b7b3ce54865b..a0b35cf3d0b99 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -729,7 +729,7 @@ bool SymbolContext::GetAddressRangeFromHereToEndLine(uint32_t end_line,
 
   uint32_t line_index = 0;
   bool found = false;
-  while (1) {
+  while (true) {
     LineEntry this_line;
     line_index = comp_unit->FindLineEntry(line_index, line_entry.line, nullptr,
                                           false, &this_line);
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 7c668a216a3a2..871eae56d4b49 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -5350,7 +5350,7 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx,
 
             event_explanation = ts.GetData();
           }
-        } while (0);
+        } while (false);
 
         if (event_explanation)
           log->Printf("Process::RunThreadPlan(): execution interrupted: %s %s",
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index fd54d4062dfc9..14755f60c5522 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -1856,7 +1856,7 @@ size_t Target::ReadCStringFromMemory(const Address &addr, std::string &out_str,
   out_str.clear();
   addr_t curr_addr = addr.GetLoadAddress(this);
   Address address(addr);
-  while (1) {
+  while (true) {
     size_t length = ReadCStringFromMemory(address, buf, sizeof(buf), error);
     if (length == 0)
       break;
diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp
index a8b57c86f5974..39086529c114e 100644
--- a/lldb/source/Target/Thread.cpp
+++ b/lldb/source/Target/Thread.cpp
@@ -853,7 +853,7 @@ bool Thread::ShouldStop(Event *event_ptr) {
       // Otherwise, don't let the base plan override what the other plans say
       // to do, since presumably if there were other plans they would know what
       // to do...
-      while (1) {
+      while (true) {
         if (PlanIsBasePlan(current_plan))
           break;
 
@@ -978,7 +978,7 @@ Vote Thread::ShouldReportStop(Event *event_ptr) {
   } else {
     Vote thread_vote = eVoteNoOpinion;
     ThreadPlan *plan_ptr = GetCurrentPlan();
-    while (1) {
+    while (true) {
       if (plan_ptr->PlanExplainsStop(event_ptr)) {
         thread_vote = plan_ptr->ShouldReportStop(event_ptr);
         break;
@@ -1298,7 +1298,7 @@ void Thread::DiscardThreadPlans(bool force) {
     return;
   }
 
-  while (1) {
+  while (true) {
     int master_plan_idx;
     bool discard = true;
 
@@ -1677,7 +1677,7 @@ Status Thread::ReturnFromFrame(lldb::StackFrameSP frame_sp,
     // FIXME: ValueObject::Cast doesn't currently work correctly, at least not
     // for scalars.
     // Turn that back on when that works.
-    if (/* DISABLES CODE */ (0) && sc.function != nullptr) {
+    if (/* DISABLES CODE */ (false) && sc.function != nullptr) {
       Type *function_type = sc.function->GetType();
       if (function_type) {
         CompilerType return_type =
diff --git a/lldb/source/Utility/JSON.cpp b/lldb/source/Utility/JSON.cpp
index 54b87394505f8..2c3f6229eda11 100644
--- a/lldb/source/Utility/JSON.cpp
+++ b/lldb/source/Utility/JSON.cpp
@@ -238,7 +238,7 @@ JSONParser::Token JSONParser::GetToken(std::string &value) {
     break;
 
   case '"': {
-    while (1) {
+    while (true) {
       bool was_escaped = false;
       int escaped_ch = GetEscapedChar(was_escaped);
       if (escaped_ch == -1) {
@@ -453,7 +453,7 @@ JSONValue::SP JSONParser::ParseJSONObject() {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     JSONParser::Token token = GetToken(value);
 
     if (token == JSONParser::Token::String) {
@@ -484,7 +484,7 @@ JSONValue::SP JSONParser::ParseJSONArray() {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     JSONValue::SP value_sp = ParseJSONValue();
     if (value_sp)
       array_up->AppendObject(value_sp);
diff --git a/lldb/source/Utility/SelectHelper.cpp b/lldb/source/Utility/SelectHelper.cpp
index 5b60dea718416..ff21d99e400ab 100644
--- a/lldb/source/Utility/SelectHelper.cpp
+++ b/lldb/source/Utility/SelectHelper.cpp
@@ -192,7 +192,7 @@ lldb_private::Status SelectHelper::Select() {
   struct timeval *tv_ptr = nullptr;
   struct timeval tv = {0, 0};
 
-  while (1) {
+  while (true) {
     using namespace std::chrono;
     // Setup out relative timeout based on the end time if we have one
     if (m_end_time.hasValue()) {
diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp
index c486913d13d6e..0e203f9739d16 100644
--- a/lldb/source/Utility/StructuredData.cpp
+++ b/lldb/source/Utility/StructuredData.cpp
@@ -51,7 +51,7 @@ static StructuredData::ObjectSP ParseJSONObject(JSONParser &json_parser) {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     JSONParser::Token token = json_parser.GetToken(value);
 
     if (token == JSONParser::Token::String) {
@@ -82,7 +82,7 @@ static StructuredData::ObjectSP ParseJSONArray(JSONParser &json_parser) {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     StructuredData::ObjectSP value_sp = ParseJSONValue(json_parser);
     if (value_sp)
       array_up->AddItem(value_sp);
diff --git a/lldb/tools/debugserver/source/DNB.cpp b/lldb/tools/debugserver/source/DNB.cpp
index cb02ee0680728..32a734833792c 100644
--- a/lldb/tools/debugserver/source/DNB.cpp
+++ b/lldb/tools/debugserver/source/DNB.cpp
@@ -141,7 +141,7 @@ void *kqueue_thread(void *arg) {
 #endif
 
   struct kevent death_event;
-  while (1) {
+  while (true) {
     int n_events = kevent(kq_id, NULL, 0, &death_event, 1, NULL);
     if (n_events == -1) {
       if (errno == EINTR)
@@ -267,7 +267,7 @@ static void *waitpid_thread(void *arg) {
 #endif
 #endif
 
-  while (1) {
+  while (true) {
     pid_t child_pid = waitpid(pid, &status, 0);
     DNBLogThreadedIf(LOG_PROCESS, "waitpid_thread (): waitpid (pid = %i, "
                                   "&status, 0) => %i, status = %i, errno = %i",
diff --git a/lldb/tools/debugserver/source/JSON.cpp b/lldb/tools/debugserver/source/JSON.cpp
index 548ee14079527..1b37767256d7c 100644
--- a/lldb/tools/debugserver/source/JSON.cpp
+++ b/lldb/tools/debugserver/source/JSON.cpp
@@ -271,7 +271,7 @@ JSONParser::Token JSONParser::GetToken(std::string &value) {
     break;
 
   case '"': {
-    while (1) {
+    while (true) {
       bool was_escaped = false;
       int escaped_ch = GetEscapedChar(was_escaped);
       if (escaped_ch == -1) {
@@ -483,7 +483,7 @@ JSONValue::SP JSONParser::ParseJSONObject() {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     JSONParser::Token token = GetToken(value);
 
     if (token == JSONParser::Token::String) {
@@ -515,7 +515,7 @@ JSONValue::SP JSONParser::ParseJSONArray() {
 
   std::string value;
   std::string key;
-  while (1) {
+  while (true) {
     JSONValue::SP value_sp = ParseJSONValue();
     if (value_sp)
       array_up->AppendObject(value_sp);
diff --git a/lldb/tools/debugserver/source/MacOSX/MachThreadList.cpp b/lldb/tools/debugserver/source/MacOSX/MachThreadList.cpp
index 8d2165b0f2435..0fa4437843a62 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachThreadList.cpp
+++ b/lldb/tools/debugserver/source/MacOSX/MachThreadList.cpp
@@ -214,7 +214,7 @@ bool MachThreadList::RestoreRegisterState(nub_thread_t tid, uint32_t save_id) {
   MachThreadSP thread_sp(GetThreadByID(tid));
   if (thread_sp)
     return thread_sp->RestoreRegisterState(save_id);
-  return 0;
+  return false;
 }
 
 nub_size_t MachThreadList::NumThreads() const {
diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp
index 16c15f64c25ea..3a4035b0b9b3f 100644
--- a/lldb/tools/debugserver/source/RNBRemote.cpp
+++ b/lldb/tools/debugserver/source/RNBRemote.cpp
@@ -4263,7 +4263,7 @@ rnb_err_t RNBRemote::HandlePacket_SetEnableAsyncProfiling(const char *p) {
   }
 
   if (interval_usec == 0) {
-    enable = 0;
+    enable = false;
   }
 
   DNBProcessSetEnableAsyncProfiling(pid, enable, interval_usec, scan_type);
@@ -5174,7 +5174,7 @@ bool get_array_of_ints_value_for_key_name_from_json(
         while (*c != '\0' &&
                (*c == ' ' || *c == '\t' || *c == '\n' || *c == '\r'))
           c++;
-        while (1) {
+        while (true) {
           if (!isdigit(*c)) {
             return true;
           }
@@ -6109,7 +6109,7 @@ rnb_err_t RNBRemote::HandlePacket_qProcessInfo(const char *p) {
           cstr = data.GetCStr(&offset);
           if (cstr) {
             // Skip NULLs
-            while (1) {
+            while (true) {
               const char *p = data.PeekCStr(offset);
               if ((p == NULL) || (*p != '\0'))
                 break;
diff --git a/lldb/tools/debugserver/source/debugserver.cpp b/lldb/tools/debugserver/source/debugserver.cpp
index 0698d69375f6c..fa19bba58fa79 100644
--- a/lldb/tools/debugserver/source/debugserver.cpp
+++ b/lldb/tools/debugserver/source/debugserver.cpp
@@ -96,7 +96,7 @@ RNBRunLoopMode RNBRunLoopGetStartModeFromRemote(RNBRemote *remote) {
                           RNBContext::event_read_thread_exiting;
 
     // Spin waiting to get the A packet.
-    while (1) {
+    while (true) {
       DNBLogThreadedIf(LOG_RNB_MAX,
                        "%s ctx.Events().WaitForSetEvents( 0x%08x ) ...",
                        __FUNCTION__, event_mask);
diff --git a/lldb/tools/debugserver/source/libdebugserver.cpp b/lldb/tools/debugserver/source/libdebugserver.cpp
index 0b40dd9049fd0..0c53fa4039c55 100644
--- a/lldb/tools/debugserver/source/libdebugserver.cpp
+++ b/lldb/tools/debugserver/source/libdebugserver.cpp
@@ -69,7 +69,7 @@ RNBRunLoopMode RNBRunLoopGetStartModeFromRemote(RNBRemoteSP &remoteSP) {
     uint32_t event_mask = RNBContext::event_read_packet_available;
 
     // Spin waiting to get the A packet.
-    while (1) {
+    while (true) {
       DNBLogThreadedIf(LOG_RNB_MAX,
                        "%s ctx.Events().WaitForSetEvents( 0x%08x ) ...",
                        __FUNCTION__, event_mask);

From 052f87ae36163cbd2033617eba655af7f1438733 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Fri, 24 May 2019 01:03:51 +0000
Subject: [PATCH 0115/1176] Revert r361460

It regresses https://bugs.llvm.org/show_bug.cgi?id=38309 (represented
by the testcase test/Transforms/GlobalOpt/globalsra-multigep.ll).

llvm-svn: 361581
---
 llvm/lib/Transforms/IPO/GlobalOpt.cpp         | 27 +++----------------
 .../GlobalOpt/globalsra-multigep.ll           | 11 ++------
 .../Transforms/GlobalOpt/globalsra-struct.ll  | 18 -------------
 3 files changed, 6 insertions(+), 50 deletions(-)
 delete mode 100644 llvm/test/Transforms/GlobalOpt/globalsra-struct.ll

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index c4f268a6511d7..c4fb3ce77f6ee 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -184,7 +184,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
 /// This GV is a pointer root.  Loop over all users of the global and clean up
 /// any that obviously don't assign the global a value that isn't dynamically
 /// allocated.
-static bool CleanupPointerRootUsers(Value *V,
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
                                     const TargetLibraryInfo *TLI) {
   // A brief explanation of leak checkers.  The goal is to find bugs where
   // pointers are forgotten, causing an accumulating growth in memory
@@ -202,7 +202,7 @@ static bool CleanupPointerRootUsers(Value *V,
   SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
 
   // Constants can't be pointers to dynamically allocated memory.
-  for (Value::user_iterator UI = V->user_begin(), E = V->user_end();
+  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
        UI != E;) {
     User *U = *UI++;
     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
@@ -232,9 +232,6 @@ static bool CleanupPointerRootUsers(Value *V,
           Dead.push_back(std::make_pair(I, MTI));
       }
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr) {
-        Changed |= CleanupPointerRootUsers(CE, TLI);
-      }
       if (CE->use_empty()) {
         CE->destroyConstant();
         Changed = true;
@@ -244,7 +241,7 @@ static bool CleanupPointerRootUsers(Value *V,
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
         Dead.clear();
-        CleanupPointerRootUsers(V, TLI);
+        CleanupPointerRootUsers(GV, TLI);
         return true;
       }
     }
@@ -394,22 +391,6 @@ static bool isSafeSROAGEP(User *U) {
                       [](User *UU) { return isSafeSROAElementUse(UU); });
 }
 
-/// Return true if the specified GEP is a safe user of a derived
-/// expression from a global that we want to SROA.
-static bool isSafeSubSROAGEP(User *U) {
-
-  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
-  // don't like < 3 operand CE's, and we don't like non-constant integer
-  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
-  // value of C.
-  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
-      !cast<Constant>(U->getOperand(1))->isNullValue())
-    return false;
-
-  return llvm::all_of(U->users(),
-                      [](User *UU) { return isSafeSROAElementUse(UU); });
-}
-
 /// Return true if the specified instruction is a safe user of a derived
 /// expression from a global that we want to SROA.
 static bool isSafeSROAElementUse(Value *V) {
@@ -428,7 +409,7 @@ static bool isSafeSROAElementUse(Value *V) {
     return SI->getOperand(0) != V;
 
   // Otherwise, it must be a GEP. Check it and its users are safe to SRA.
-  return isa<GetElementPtrInst>(I) && isSafeSubSROAGEP(I);
+  return isa<GetElementPtrInst>(I) && isSafeSROAGEP(I);
 }
 
 /// Look at all uses of the global and decide whether it is safe for us to
diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll b/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll
index c32a620c47624..87a8486d8818a 100644
--- a/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll
+++ b/llvm/test/Transforms/GlobalOpt/globalsra-multigep.ll
@@ -4,20 +4,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 @g_data = internal unnamed_addr global <{ [8 x i16], [8 x i16] }> <{ [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], [8 x i16] zeroinitializer }>, align 16
-; We normally cannot SRA here due to the second gep meaning the access to g_data may be to either element,
-; unless the value is always zero.
-; CHECK: @g_data.0 = internal unnamed_addr constant [8 x i16] [i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16], align 16
+; We cannot SRA here due to the second gep meaning the access to g_data may be to either element
+; CHECK: @g_data = internal unnamed_addr constant <{ [8 x i16], [8 x i16] }>
 
 define i16 @test(i64 %a1) {
 entry:
   %g1 = getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0
   %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* %g1, i64 0, i64 %a1
   %r = load i16, i16* %arrayidx.i, align 2
-
-; CHECK-NOT: getelementptr inbounds <{ [8 x i16], [8 x i16] }>, <{ [8 x i16], [8 x i16] }>* @g_data, i64 0, i32 0
-; CHECK:  %arrayidx.i = getelementptr inbounds [8 x i16], [8 x i16]* @g_data.0, i64 0, i64 %a1
-
   ret i16 %r
-
-
 }
diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll b/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll
deleted file mode 100644
index 957fba810687a..0000000000000
--- a/llvm/test/Transforms/GlobalOpt/globalsra-struct.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -globalopt -S | FileCheck %s
-
-%struct.Expr = type { [1 x i32], i32 }
-
-@e = internal global %struct.Expr zeroinitializer, align 4
-; CHECK-NOT: @e = internal global %struct.Expr zeroinitializer, align 4
-
-define dso_local i32 @foo(i32 %i) {
-entry:
-  %i.addr = alloca i32, align 4
-  store i32 %i, i32* %i.addr, align 4
-  %0 = load i32, i32* %i.addr, align 4
-  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* getelementptr inbounds (%struct.Expr, %struct.Expr* @e, i32 0, i32 0), i32 0, i32 %0
-  store i32 57005, i32* %arrayidx, align 4
-  %1 = load i32, i32* getelementptr inbounds (%struct.Expr, %struct.Expr* @e, i32 0, i32 1), align 4
-  ret i32 %1
-; CHECK:  ret i32 0
-}

From 79872a88a0662ee91cdb194a8ea477c79a824e9f Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 24 May 2019 01:05:52 +0000
Subject: [PATCH 0116/1176] dwarfdump: Add a bit more DWARF64 support

This test case was incorrect because it mixed DWARF32 and DWARF64 for a
single unit (DWARF32 unit referencing a DWARF64 str_offsets section). So
fix enough of the unit parsing for DWARF64 and make the test valid.

(not sure if anyone needs DWARF64 support though - support in
libDebugInfoDWARF has been added piecemeal and LLVM doesn't produce it
at all)

llvm-svn: 361582
---
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h   | 11 +++++++----
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp          | 12 ++++++++----
 llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp      | 13 +++++--------
 llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s |  7 ++++---
 4 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index c89451b20278b..4e92df2fdb14f 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -48,7 +48,7 @@ class DWARFUnitHeader {
   uint32_t Offset = 0;
   // Version, address size, and DWARF format.
   dwarf::FormParams FormParams;
-  uint32_t Length = 0;
+  uint64_t Length = 0;
   uint64_t AbbrOffset = 0;
 
   // For DWO units only.
@@ -82,7 +82,7 @@ class DWARFUnitHeader {
   uint8_t getDwarfOffsetByteSize() const {
     return FormParams.getDwarfOffsetByteSize();
   }
-  uint32_t getLength() const { return Length; }
+  uint64_t getLength() const { return Length; }
   uint64_t getAbbrOffset() const { return AbbrOffset; }
   Optional<uint64_t> getDWOId() const { return DWOId; }
   void setDWOId(uint64_t Id) {
@@ -97,8 +97,11 @@ class DWARFUnitHeader {
     return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
   }
   uint8_t getSize() const { return Size; }
-  // FIXME: Support DWARF64.
-  uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
+  uint32_t getNextUnitOffset() const {
+    return Offset + Length +
+           (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) +
+           FormParams.getDwarfOffsetByteSize();
+  }
 };
 
 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 65b118f6091ac..7bc5221549063 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -242,16 +242,20 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
   if (!IndexEntry && Index)
     IndexEntry = Index->getFromOffset(*offset_ptr);
   Length = debug_info.getU32(offset_ptr);
-  // FIXME: Support DWARF64.
-  unsigned SizeOfLength = 4;
   FormParams.Format = DWARF32;
+  unsigned SizeOfLength = 4;
+  if (Length == 0xffffffff) {
+    Length = debug_info.getU64(offset_ptr);
+    FormParams.Format = DWARF64;
+    SizeOfLength = 8;
+  }
   FormParams.Version = debug_info.getU16(offset_ptr);
   if (FormParams.Version >= 5) {
     UnitType = debug_info.getU8(offset_ptr);
     FormParams.AddrSize = debug_info.getU8(offset_ptr);
-    AbbrOffset = debug_info.getU32(offset_ptr);
+    AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
   } else {
-    AbbrOffset = debug_info.getRelocatedValue(4, offset_ptr);
+    AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
     FormParams.AddrSize = debug_info.getU8(offset_ptr);
     // Fake a unit type based on the section type.  This isn't perfect,
     // but distinguishing compile and type units is generally enough.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 8fea97aa3c207..c2b3189514a85 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -100,7 +100,7 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
 bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
                                      uint32_t *Offset, unsigned UnitIndex,
                                      uint8_t &UnitType, bool &isUnitDWARF64) {
-  uint32_t AbbrOffset, Length;
+  uint64_t AbbrOffset, Length;
   uint8_t AddrSize = 0;
   uint16_t Version;
   bool Success = true;
@@ -114,22 +114,19 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
   uint32_t OffsetStart = *Offset;
   Length = DebugInfoData.getU32(Offset);
   if (Length == UINT32_MAX) {
+    Length = DebugInfoData.getU64(Offset);
     isUnitDWARF64 = true;
-    OS << format(
-        "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n",
-        UnitIndex);
-    return false;
   }
   Version = DebugInfoData.getU16(Offset);
 
   if (Version >= 5) {
     UnitType = DebugInfoData.getU8(Offset);
     AddrSize = DebugInfoData.getU8(Offset);
-    AbbrOffset = DebugInfoData.getU32(Offset);
+    AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
     ValidType = dwarf::isUnitType(UnitType);
   } else {
     UnitType = 0;
-    AbbrOffset = DebugInfoData.getU32(Offset);
+    AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
     AddrSize = DebugInfoData.getU8(Offset);
   }
 
@@ -157,7 +154,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
     if (!ValidAddrSize)
       note() << "The address size is unsupported.\n";
   }
-  *Offset = OffsetStart + Length + 4;
+  *Offset = OffsetStart + Length + (isUnitDWARF64 ? 12 : 4);
   return Success;
 }
 
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
index 230c660850692..064061b5847f6 100644
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
+++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
@@ -239,18 +239,19 @@ TypeDie:
 CU1_5_end:
 
 # DWARF v5 CU header
-        .long  CU2_5_end-CU2_5_version  # Length of Unit
+        .long 0xffffffff
+        .quad CU2_5_end-CU2_5_version  # Length of Unit
 CU2_5_version:
         .short 5               # DWARF version number
         .byte 1                # DWARF Unit Type
         .byte 8                # Address Size (in bytes)
-        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .quad .debug_abbrev    # Offset Into Abbrev. Section
 # The compile-unit DIE, which has a DW_AT_producer, DW_AT_name, 
 # DW_AT_str_offsets and DW_AT_compdir.
         .byte 1                # Abbreviation code
         .byte 0                # The index of the producer string
         .byte 1                # The index of the CU name string
-        .long .debug_str_offsets_base1
+        .quad .debug_str_offsets_base1
         .byte 2                # The index of the comp dir string
         .byte 0 # NULL
 CU2_5_end:

From ecd111533df82d6d39acc7595edcccbe1692d68b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 24 May 2019 01:08:54 +0000
Subject: [PATCH 0117/1176] Revert "[lldb] followup fix for
 https://reviews.llvm.org/D62305"

This fails on the Windows bot:

cannot convert from 'initializer list' to 'lldb::thread_result_t'

llvm-svn: 361583
---
 lldb/source/Host/common/HostNativeThreadBase.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Host/common/HostNativeThreadBase.cpp b/lldb/source/Host/common/HostNativeThreadBase.cpp
index 091e7b515a602..a5f876a7232af 100644
--- a/lldb/source/Host/common/HostNativeThreadBase.cpp
+++ b/lldb/source/Host/common/HostNativeThreadBase.cpp
@@ -18,10 +18,10 @@ using namespace lldb;
 using namespace lldb_private;
 
 HostNativeThreadBase::HostNativeThreadBase()
-    : m_thread(LLDB_INVALID_HOST_THREAD), m_result({}) {}
+    : m_thread(LLDB_INVALID_HOST_THREAD), m_result(0) {}
 
 HostNativeThreadBase::HostNativeThreadBase(thread_t thread)
-    : m_thread(thread), m_result({}) {}
+    : m_thread(thread), m_result(0) {}
 
 lldb::thread_t HostNativeThreadBase::GetSystemHandle() const {
   return m_thread;
@@ -37,7 +37,7 @@ bool HostNativeThreadBase::IsJoinable() const {
 
 void HostNativeThreadBase::Reset() {
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = {};
+  m_result = 0;
 }
 
 bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
@@ -47,7 +47,7 @@ bool HostNativeThreadBase::EqualsThread(lldb::thread_t thread) const {
 lldb::thread_t HostNativeThreadBase::Release() {
   lldb::thread_t result = m_thread;
   m_thread = LLDB_INVALID_HOST_THREAD;
-  m_result = {};
+  m_result = 0;
 
   return result;
 }

From ed595e8627b37131d1f0146c24655a1825c5cf13 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <sepavloff@gmail.com>
Date: Fri, 24 May 2019 01:20:34 +0000
Subject: [PATCH 0118/1176] [AArch64] Add nvcast patterns for v2f32 -> v1f64

Summary: Constant stores of f32 values can create such NvCast nodes.

Reviewers: t.p.northover

Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62285

llvm-svn: 361584
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td |  1 +
 llvm/test/CodeGen/AArch64/arm64-nvcast.ll   | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f426da4f1c832..8b702901d51a9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6203,6 +6203,7 @@ def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
 def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
 def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
 def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
 
 // Natural vector casts (128 bit)
 def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
index d9486127bf11c..59b956c7d90c1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -47,3 +47,15 @@ entry:
   store <2 x float> <float 0xC7DFDFDFC0000000, float 0xC7DFDFDFC0000000>, <2 x float>* bitcast (%"st1"* @_gv to <2 x float>*), align 8
   ret void
 }
+
+%struct.Vector3 = type { float, float, float }
+
+define void @nvcast_v2f32_v1f64(%struct.Vector3*) {
+; CHECK-LABEL: _nvcast_v2f32_v1f64
+; CHECK: fmov.2s v[[REG:[0-9]+]], #1.00000000
+; CHECK: str d[[REG]], [x0]
+entry:
+  %a13 = bitcast %struct.Vector3* %0 to <1 x double>*
+  store <1 x double> <double 0x3F8000003F800000>, <1 x double>* %a13, align 8
+  ret void
+}

From b7a78c7dff18485e5ac85aa8c902571da4b06a33 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 24 May 2019 01:27:20 +0000
Subject: [PATCH 0119/1176] [AArch64] Preserve X8 for thunks ending in variadic
 musttail calls

Summary:
On Windows, X8 may be used to pass in the address of an aggregate that
is returned indirectly. Therefore, it should be forwarded to variadic
musttail calls and preserved in thunks.

Fixes PR41997

Reviewers: mgrang, efriedma

Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62344

llvm-svn: 361585
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++++++
 llvm/test/CodeGen/AArch64/vararg-tallcall.ll    | 6 +++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4b027e9363346..ac656959bcbaa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3207,6 +3207,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
                                        FuncInfo->getForwardedMustTailRegParms();
       CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
                                                CC_AArch64_AAPCS);
+
+      // Conservatively forward X8, since it might be used for aggregate return.
+      if (!CCInfo.isAllocated(AArch64::X8)) {
+        unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+        Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 2818222680335..56c56213af354 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -28,7 +28,7 @@ entry:
 attributes #1 = { noinline optnone "thunk" }
 
 ; CHECK: mov     v16.16b, v0.16b
-; CHECK: ldr     x8, [x0]
-; CHECK: ldr     x8, [x8]
+; CHECK: ldr     x9, [x0]
+; CHECK: ldr     x9, [x9]
 ; CHECK: mov     v0.16b, v16.16b
-; CHECK: br      x8
+; CHECK: br      x9

From 4e53032d9bdb2b24e465aa934e083fc507b58a61 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 24 May 2019 01:34:22 +0000
Subject: [PATCH 0120/1176] [CFG] NFC: Remove implicit conversion from
 CFGTerminator to Stmt *.

Turn it into a variant class instead. This conversion does indeed save some code
but there's a plan to add support for more kinds of terminators that aren't
necessarily based on statements, and with those in mind it becomes more and more
confusing to have CFGTerminators implicitly convertible to a Stmt *.

Differential Revision: https://reviews.llvm.org/D61814

llvm-svn: 361586
---
 clang/include/clang/Analysis/CFG.h            | 57 ++++++++++++-------
 clang/include/clang/Analysis/ProgramPoint.h   |  2 +-
 clang/lib/Analysis/CFG.cpp                    | 29 ++++++----
 clang/lib/Analysis/CFGStmtMap.cpp             |  2 +-
 clang/lib/Analysis/Consumed.cpp               |  2 +-
 clang/lib/Analysis/LiveVariables.cpp          |  2 +-
 clang/lib/Analysis/ProgramPoint.cpp           |  2 +-
 clang/lib/Analysis/ReachableCode.cpp          | 17 +++---
 clang/lib/Analysis/ThreadSafety.cpp           |  8 +--
 clang/lib/Analysis/UninitializedValues.cpp    |  2 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp      | 14 +++--
 .../Checkers/UnreachableCodeChecker.cpp       |  4 +-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp |  4 +-
 .../Core/BugReporterVisitors.cpp              |  6 +-
 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp  |  4 +-
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |  8 +--
 .../lib/StaticAnalyzer/Core/LoopUnrolling.cpp |  2 +-
 .../StaticAnalyzer/Core/PathDiagnostic.cpp    |  2 +-
 18 files changed, 95 insertions(+), 72 deletions(-)

diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 5722cbee860dc..212fd1baef5d6 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -494,33 +494,44 @@ class CFGTemporaryDtor : public CFGImplicitDtor {
 
 /// Represents CFGBlock terminator statement.
 ///
-/// TemporaryDtorsBranch bit is set to true if the terminator marks a branch
-/// in control flow of destructors of temporaries. In this case terminator
-/// statement is the same statement that branches control flow in evaluation
-/// of matching full expression.
 class CFGTerminator {
-  llvm::PointerIntPair<Stmt *, 1> Data;
+public:
+  enum Kind {
+    /// A branch that corresponds to a statement in the code,
+    /// such as an if-statement.
+    StmtBranch,
+    /// A branch in control flow of destructors of temporaries. In this case
+    /// terminator statement is the same statement that branches control flow
+    /// in evaluation of matching full expression.
+    TemporaryDtorsBranch,
+
+    /// Number of different kinds, for sanity checks. We subtract 1 so that
+    /// to keep receiving compiler warnings when we don't cover all enum values
+    /// in a switch.
+    NumKindsMinusOne = TemporaryDtorsBranch
+  };
+
+private:
+  static constexpr int KindBits = 1;
+  static_assert((1 << KindBits) > NumKindsMinusOne,
+                "Not enough room for kind!");
+  llvm::PointerIntPair<Stmt *, KindBits> Data;
 
 public:
-  CFGTerminator() = default;
-  CFGTerminator(Stmt *S, bool TemporaryDtorsBranch = false)
-      : Data(S, TemporaryDtorsBranch) {}
+  CFGTerminator() { assert(!isValid()); }
+  CFGTerminator(Stmt *S, Kind K = StmtBranch) : Data(S, K) {}
 
+  bool isValid() const { return Data.getOpaqueValue() != nullptr; }
   Stmt *getStmt() { return Data.getPointer(); }
   const Stmt *getStmt() const { return Data.getPointer(); }
+  Kind getKind() const { return static_cast<Kind>(Data.getInt()); }
 
-  bool isTemporaryDtorsBranch() const { return Data.getInt(); }
-
-  operator Stmt *() { return getStmt(); }
-  operator const Stmt *() const { return getStmt(); }
-
-  Stmt *operator->() { return getStmt(); }
-  const Stmt *operator->() const { return getStmt(); }
-
-  Stmt &operator*() { return *getStmt(); }
-  const Stmt &operator*() const { return *getStmt(); }
-
-  explicit operator bool() const { return getStmt(); }
+  bool isStmtBranch() const {
+    return getKind() == StmtBranch;
+  }
+  bool isTemporaryDtorsBranch() const {
+    return getKind() == TemporaryDtorsBranch;
+  }
 };
 
 /// Represents a single basic block in a source-level CFG.
@@ -836,8 +847,10 @@ class CFGBlock {
   void setLoopTarget(const Stmt *loopTarget) { LoopTarget = loopTarget; }
   void setHasNoReturnElement() { HasNoReturnElement = true; }
 
-  CFGTerminator getTerminator() { return Terminator; }
-  const CFGTerminator getTerminator() const { return Terminator; }
+  CFGTerminator getTerminator() const { return Terminator; }
+
+  Stmt *getTerminatorStmt() { return Terminator.getStmt(); }
+  const Stmt *getTerminatorStmt() const { return Terminator.getStmt(); }
 
   Stmt *getTerminatorCondition(bool StripParens = true);
 
diff --git a/clang/include/clang/Analysis/ProgramPoint.h b/clang/include/clang/Analysis/ProgramPoint.h
index 5b554c150947d..ffc2a82d9e08e 100644
--- a/clang/include/clang/Analysis/ProgramPoint.h
+++ b/clang/include/clang/Analysis/ProgramPoint.h
@@ -257,7 +257,7 @@ class BlockExit : public ProgramPoint {
   }
 
   const Stmt *getTerminator() const {
-    return getBlock()->getTerminator();
+    return getBlock()->getTerminatorStmt();
   }
 
 private:
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 0928fa27866d6..915e5cc222f5b 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -1956,7 +1956,7 @@ void CFGBuilder::prependAutomaticObjDtorsWithTerminator(CFGBlock *Blk,
     = Blk->beginAutomaticObjDtorsInsert(Blk->end(), B.distance(E), C);
   for (LocalScope::const_iterator I = B; I != E; ++I)
     InsertPos = Blk->insertAutomaticObjDtor(InsertPos, *I,
-                                            Blk->getTerminator());
+                                            Blk->getTerminatorStmt());
 }
 
 /// prependAutomaticObjLifetimeWithTerminator - Prepend lifetime CFGElements for
@@ -1971,8 +1971,10 @@ void CFGBuilder::prependAutomaticObjLifetimeWithTerminator(
   BumpVectorContext &C = cfg->getBumpVectorContext();
   CFGBlock::iterator InsertPos =
       Blk->beginLifetimeEndsInsert(Blk->end(), B.distance(E), C);
-  for (LocalScope::const_iterator I = B; I != E; ++I)
-    InsertPos = Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminator());
+  for (LocalScope::const_iterator I = B; I != E; ++I) {
+    InsertPos =
+        Blk->insertLifetimeEnds(InsertPos, *I, Blk->getTerminatorStmt());
+  }
 }
 
 /// prependAutomaticObjScopeEndWithTerminator - Prepend scope end CFGElements for
@@ -1991,7 +1993,7 @@ CFGBuilder::prependAutomaticObjScopeEndWithTerminator(
   LocalScope::const_iterator PlaceToInsert = B;
   for (LocalScope::const_iterator I = B; I != E; ++I)
     PlaceToInsert = I;
-  Blk->insertScopeEnd(InsertPos, *PlaceToInsert, Blk->getTerminator());
+  Blk->insertScopeEnd(InsertPos, *PlaceToInsert, Blk->getTerminatorStmt());
   return *PlaceToInsert;
 }
 
@@ -4612,7 +4614,8 @@ void CFGBuilder::InsertTempDtorDecisionBlock(const TempDtorContext &Context,
   }
   assert(Context.TerminatorExpr);
   CFGBlock *Decision = createBlock(false);
-  Decision->setTerminator(CFGTerminator(Context.TerminatorExpr, true));
+  Decision->setTerminator(CFGTerminator(Context.TerminatorExpr,
+                                        CFGTerminator::TemporaryDtorsBranch));
   addSuccessor(Decision, Block, !Context.KnownExecuted.isFalse());
   addSuccessor(Decision, FalseSucc ? FalseSucc : Context.Succ,
                !Context.KnownExecuted.isTrue());
@@ -4820,7 +4823,7 @@ bool CFGBlock::FilterEdge(const CFGBlock::FilterOptions &F,
     // If the 'To' has no label or is labeled but the label isn't a
     // CaseStmt then filter this edge.
     if (const SwitchStmt *S =
-        dyn_cast_or_null<SwitchStmt>(From->getTerminator().getStmt())) {
+        dyn_cast_or_null<SwitchStmt>(From->getTerminatorStmt())) {
       if (S->isAllEnumCasesCovered()) {
         const Stmt *L = To->getLabel();
         if (!L || !isa<CaseStmt>(L))
@@ -5055,9 +5058,15 @@ class CFGBlockTerminatorPrint
 
 public:
   void print(CFGTerminator T) {
-    if (T.isTemporaryDtorsBranch())
+    switch (T.getKind()) {
+    case CFGTerminator::StmtBranch:
+      Visit(T.getStmt());
+      break;
+    case CFGTerminator::TemporaryDtorsBranch:
       OS << "(Temp Dtor) ";
-    Visit(T.getStmt());
+      Visit(T.getStmt());
+      break;
+    }
   }
 };
 
@@ -5366,7 +5375,7 @@ static void print_block(raw_ostream &OS, const CFG* cfg,
   }
 
   // Print the terminator of this block.
-  if (B.getTerminator()) {
+  if (B.getTerminator().isValid()) {
     if (ShowColors)
       OS.changeColor(raw_ostream::GREEN);
 
@@ -5519,7 +5528,7 @@ void CFGBlock::printTerminator(raw_ostream &OS,
 }
 
 Stmt *CFGBlock::getTerminatorCondition(bool StripParens) {
-  Stmt *Terminator = this->Terminator;
+  Stmt *Terminator = getTerminatorStmt();
   if (!Terminator)
     return nullptr;
 
diff --git a/clang/lib/Analysis/CFGStmtMap.cpp b/clang/lib/Analysis/CFGStmtMap.cpp
index eab2fafb54699..d1c23e3c879b4 100644
--- a/clang/lib/Analysis/CFGStmtMap.cpp
+++ b/clang/lib/Analysis/CFGStmtMap.cpp
@@ -70,7 +70,7 @@ static void Accumulate(SMap &SM, CFGBlock *B) {
   // Finally, look at the terminator.  If the terminator was already added
   // because it is a block-level expression in another block, overwrite
   // that mapping.
-  if (Stmt *Term = B->getTerminator())
+  if (Stmt *Term = B->getTerminatorStmt())
     SM[Term] = B;
 }
 
diff --git a/clang/lib/Analysis/Consumed.cpp b/clang/lib/Analysis/Consumed.cpp
index 112ef5f91f337..eee36d9caf7f1 100644
--- a/clang/lib/Analysis/Consumed.cpp
+++ b/clang/lib/Analysis/Consumed.cpp
@@ -76,7 +76,7 @@ static SourceLocation getFirstStmtLoc(const CFGBlock *Block) {
 static SourceLocation getLastStmtLoc(const CFGBlock *Block) {
   // Find the source location of the last statement in the block, if the block
   // is not empty.
-  if (const Stmt *StmtNode = Block->getTerminator()) {
+  if (const Stmt *StmtNode = Block->getTerminatorStmt()) {
     return StmtNode->getBeginLoc();
   } else {
     for (CFGBlock::const_reverse_iterator BI = Block->rbegin(),
diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp
index e435ff2ee170c..2cd607d8a4932 100644
--- a/clang/lib/Analysis/LiveVariables.cpp
+++ b/clang/lib/Analysis/LiveVariables.cpp
@@ -501,7 +501,7 @@ LiveVariablesImpl::runOnBlock(const CFGBlock *block,
   TransferFunctions TF(*this, val, obs, block);
 
   // Visit the terminator (if any).
-  if (const Stmt *term = block->getTerminator())
+  if (const Stmt *term = block->getTerminatorStmt())
     TF.Visit(const_cast<Stmt*>(term));
 
   // Apply the transfer function for all Stmts in the block.
diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index 828388716ea0a..697d2e57cedb3 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -144,7 +144,7 @@ void ProgramPoint::print(StringRef CR, llvm::raw_ostream &Out) const {
     Out << "Edge: (B" << E.getSrc()->getBlockID() << ", B"
         << E.getDst()->getBlockID() << ')';
 
-    if (const Stmt *T = E.getSrc()->getTerminator()) {
+    if (const Stmt *T = E.getSrc()->getTerminatorStmt()) {
       SourceLocation SLoc = T->getBeginLoc();
 
       Out << "\\|Terminator: ";
diff --git a/clang/lib/Analysis/ReachableCode.cpp b/clang/lib/Analysis/ReachableCode.cpp
index f3bc0c7d8a531..2fea88ea2eff4 100644
--- a/clang/lib/Analysis/ReachableCode.cpp
+++ b/clang/lib/Analysis/ReachableCode.cpp
@@ -48,7 +48,7 @@ static bool isTrivialExpression(const Expr *Ex) {
 static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) {
   // Check if the block ends with a do...while() and see if 'S' is the
   // condition.
-  if (const Stmt *Term = B->getTerminator()) {
+  if (const Stmt *Term = B->getTerminatorStmt()) {
     if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) {
       const Expr *Cond = DS->getCond()->IgnoreParenCasts();
       return Cond == S && isTrivialExpression(Cond);
@@ -116,7 +116,7 @@ static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
       // the call to the destructor.
       assert(Current->succ_size() == 2);
       Current = *(Current->succ_begin() + 1);
-    } else if (!Current->getTerminator() && Current->succ_size() == 1) {
+    } else if (!Current->getTerminatorStmt() && Current->succ_size() == 1) {
       // If there is only one successor, we're not dealing with outgoing control
       // flow. Thus, look into the next block.
       Current = *Current->succ_begin();
@@ -292,7 +292,7 @@ static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP) {
 /// Returns true if we should always explore all successors of a block.
 static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B,
                                              Preprocessor &PP) {
-  if (const Stmt *Term = B->getTerminator()) {
+  if (const Stmt *Term = B->getTerminatorStmt()) {
     if (isa<SwitchStmt>(Term))
       return true;
     // Specially handle '||' and '&&'.
@@ -461,12 +461,11 @@ const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
         return S;
     }
 
-  if (CFGTerminator T = Block->getTerminator()) {
-    if (!T.isTemporaryDtorsBranch()) {
-      const Stmt *S = T.getStmt();
-      if (isValidDeadStmt(S))
-        return S;
-    }
+  CFGTerminator T = Block->getTerminator();
+  if (T.isStmtBranch()) {
+    const Stmt *S = T.getStmt();
+    if (S && isValidDeadStmt(S))
+      return S;
   }
 
   return nullptr;
diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp
index bd65ea711b9fb..c7b4c4455664a 100644
--- a/clang/lib/Analysis/ThreadSafety.cpp
+++ b/clang/lib/Analysis/ThreadSafety.cpp
@@ -815,7 +815,7 @@ static void findBlockLocations(CFG *CFGraph,
 
     // Find the source location of the last statement in the block, if the
     // block is not empty.
-    if (const Stmt *S = CurrBlock->getTerminator()) {
+    if (const Stmt *S = CurrBlock->getTerminatorStmt()) {
       CurrBlockInfo->EntryLoc = CurrBlockInfo->ExitLoc = S->getBeginLoc();
     } else {
       for (CFGBlock::const_reverse_iterator BI = CurrBlock->rbegin(),
@@ -1499,7 +1499,7 @@ void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result,
 
   const Stmt *Cond = PredBlock->getTerminatorCondition();
   // We don't acquire try-locks on ?: branches, only when its result is used.
-  if (!Cond || isa<ConditionalOperator>(PredBlock->getTerminator()))
+  if (!Cond || isa<ConditionalOperator>(PredBlock->getTerminatorStmt()))
     return;
 
   bool Negate = false;
@@ -2402,7 +2402,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
       // a difference in locksets is probably due to a bug in that block, rather
       // than in some other predecessor. In that case, keep the other
       // predecessor's lockset.
-      if (const Stmt *Terminator = (*PI)->getTerminator()) {
+      if (const Stmt *Terminator = (*PI)->getTerminatorStmt()) {
         if (isa<ContinueStmt>(Terminator) || isa<BreakStmt>(Terminator)) {
           SpecialBlocks.push_back(*PI);
           continue;
@@ -2441,7 +2441,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) {
         // it might also be part of a switch. Also, a subsequent destructor
         // might add to the lockset, in which case the real issue might be a
         // double lock on the other path.
-        const Stmt *Terminator = PrevBlock->getTerminator();
+        const Stmt *Terminator = PrevBlock->getTerminatorStmt();
         bool IsLoop = Terminator && isa<ContinueStmt>(Terminator);
 
         FactSet PrevLockset;
diff --git a/clang/lib/Analysis/UninitializedValues.cpp b/clang/lib/Analysis/UninitializedValues.cpp
index 96f4cc51b7a12..cea401ab5c3f7 100644
--- a/clang/lib/Analysis/UninitializedValues.cpp
+++ b/clang/lib/Analysis/UninitializedValues.cpp
@@ -651,7 +651,7 @@ class TransferFunctions : public StmtVisitor<TransferFunctions> {
     // uninitialized.
     for (const auto *Block : cfg) {
       unsigned BlockID = Block->getBlockID();
-      const Stmt *Term = Block->getTerminator();
+      const Stmt *Term = Block->getTerminatorStmt();
       if (SuccsVisited[BlockID] && SuccsVisited[BlockID] < Block->succ_size() &&
           Term) {
         // This block inevitably leads to the use. If we have an edge from here
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 6c95b6000380b..bac407b832e15 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -398,7 +398,8 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) {
     for (const auto *B : *cfg) {
       if (!live[B->getBlockID()]) {
         if (B->pred_begin() == B->pred_end()) {
-          if (B->getTerminator() && isa<CXXTryStmt>(B->getTerminator()))
+          const Stmt *Term = B->getTerminatorStmt();
+          if (Term && isa<CXXTryStmt>(Term))
             // When not adding EH edges from calls, catch clauses
             // can otherwise seem dead.  Avoid noting them as dead.
             count += reachable_code::ScanReachableFromBlock(B, live);
@@ -446,7 +447,8 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) {
 
     // No more CFGElements in the block?
     if (ri == re) {
-      if (B.getTerminator() && isa<CXXTryStmt>(B.getTerminator())) {
+      const Stmt *Term = B.getTerminatorStmt();
+      if (Term && isa<CXXTryStmt>(Term)) {
         HasAbnormalEdge = true;
         continue;
       }
@@ -1077,7 +1079,7 @@ namespace {
         BlockQueue.pop_front();
         if (!P) continue;
 
-        const Stmt *Term = P->getTerminator();
+        const Stmt *Term = P->getTerminatorStmt();
         if (Term && isa<SwitchStmt>(Term))
           continue; // Switch statement, good.
 
@@ -1175,7 +1177,7 @@ namespace {
     }
 
     static const Stmt *getLastStmt(const CFGBlock &B) {
-      if (const Stmt *Term = B.getTerminator())
+      if (const Stmt *Term = B.getTerminatorStmt())
         return Term;
       for (CFGBlock::const_reverse_iterator ElemIt = B.rbegin(),
                                             ElemEnd = B.rend();
@@ -1281,11 +1283,11 @@ static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
       if (L.isMacroID())
         continue;
       if (S.getLangOpts().CPlusPlus11) {
-        const Stmt *Term = B->getTerminator();
+        const Stmt *Term = B->getTerminatorStmt();
         // Skip empty cases.
         while (B->empty() && !Term && B->succ_size() == 1) {
           B = *B->succ_begin();
-          Term = B->getTerminator();
+          Term = B->getTerminatorStmt();
         }
         if (!(B->empty() && Term && isa<BreakStmt>(Term))) {
           Preprocessor &PP = S.getPreprocessor();
diff --git a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
index 76854e0382e29..0b0bf8465c9dd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
@@ -204,7 +204,7 @@ const Stmt *UnreachableCodeChecker::getUnreachableStmt(const CFGBlock *CB) {
         return S->getStmt();
     }
   }
-  if (const Stmt *S = CB->getTerminator())
+  if (const Stmt *S = CB->getTerminatorStmt())
     return S;
   else
     return nullptr;
@@ -250,7 +250,7 @@ bool UnreachableCodeChecker::isInvalidPath(const CFGBlock *CB,
 bool UnreachableCodeChecker::isEmptyCFGBlock(const CFGBlock *CB) {
   return CB->getLabel() == nullptr // No labels
       && CB->size() == 0           // No statements
-      && !CB->getTerminator();     // No terminator
+      && !CB->getTerminatorStmt(); // No terminator
 }
 
 void ento::registerUnreachableCodeChecker(CheckerManager &mgr) {
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 168050955f2ef..cc93675344e14 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -678,7 +678,7 @@ void generateMinimalDiagForBlockEdge(const ExplodedNode *N, BlockEdge BE,
   const LocationContext *LC = N->getLocationContext();
   const CFGBlock *Src = BE.getSrc();
   const CFGBlock *Dst = BE.getDst();
-  const Stmt *T = Src->getTerminator();
+  const Stmt *T = Src->getTerminatorStmt();
   if (!T)
     return;
 
@@ -1203,7 +1203,7 @@ static void generatePathDiagnosticsForNode(const ExplodedNode *N,
     const CFGBlock *BSrc = BE->getSrc();
     ParentMap &PM = PDB.getParentMap();
 
-    if (const Stmt *Term = BSrc->getTerminator()) {
+    if (const Stmt *Term = BSrc->getTerminatorStmt()) {
       // Are we jumping past the loop body without ever executing the
       // loop (because the condition was false)?
       if (isLoop(Term)) {
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index 0c48c430a2cec..bc34472020c40 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -1494,7 +1494,7 @@ SuppressInlineDefensiveChecksVisitor::VisitNode(const ExplodedNode *Succ,
         return nullptr;
 
       CFGStmtMap *Map = CurLC->getAnalysisDeclContext()->getCFGStmtMap();
-      CurTerminatorStmt = Map->getBlock(CurStmt)->getTerminator();
+      CurTerminatorStmt = Map->getBlock(CurStmt)->getTerminatorStmt();
     } else {
       return nullptr;
     }
@@ -1566,7 +1566,7 @@ static const Expr *peelOffOuterExpr(const Expr *Ex,
       ProgramPoint ProgPoint = NI->getLocation();
       if (Optional<BlockEdge> BE = ProgPoint.getAs<BlockEdge>()) {
         const CFGBlock *srcBlk = BE->getSrc();
-        if (const Stmt *term = srcBlk->getTerminator()) {
+        if (const Stmt *term = srcBlk->getTerminatorStmt()) {
           if (term == CO) {
             bool TookTrueBranch = (*(srcBlk->succ_begin()) == BE->getDst());
             if (TookTrueBranch)
@@ -1852,7 +1852,7 @@ ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N,
   // here by looking at the state transition.
   if (Optional<BlockEdge> BE = progPoint.getAs<BlockEdge>()) {
     const CFGBlock *srcBlk = BE->getSrc();
-    if (const Stmt *term = srcBlk->getTerminator())
+    if (const Stmt *term = srcBlk->getTerminatorStmt())
       return VisitTerminator(term, N, srcBlk, BE->getDst(), BR, BRC);
     return nullptr;
   }
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index cbe997669ba73..ca9a48ef9808c 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -275,14 +275,14 @@ void CoreEngine::HandleBlockEntrance(const BlockEntrance &L,
 }
 
 void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
-  if (const Stmt *Term = B->getTerminator()) {
+  if (const Stmt *Term = B->getTerminatorStmt()) {
     switch (Term->getStmtClass()) {
       default:
         llvm_unreachable("Analysis for this terminator not implemented.");
 
       case Stmt::CXXBindTemporaryExprClass:
         HandleCleanupTemporaryBranch(
-            cast<CXXBindTemporaryExpr>(B->getTerminator().getStmt()), B, Pred);
+            cast<CXXBindTemporaryExpr>(Term), B, Pred);
         return;
 
       // Model static initializers.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 1742921884d60..975af4743927c 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1862,7 +1862,7 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
   // other constraints) then consider completely unrolling it.
   if(AMgr.options.ShouldUnrollLoops) {
     unsigned maxBlockVisitOnPath = AMgr.options.maxBlockVisitOnPath;
-    const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminator();
+    const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
     if (Term) {
       ProgramStateRef NewState = updateLoopStack(Term, AMgr.getASTContext(),
                                                  Pred, maxBlockVisitOnPath);
@@ -1883,7 +1883,7 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
   unsigned int BlockCount = nodeBuilder.getContext().blockCount();
   if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
       AMgr.options.ShouldWidenLoops) {
-    const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminator();
+    const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
     if (!(Term &&
           (isa<ForStmt>(Term) || isa<WhileStmt>(Term) || isa<DoStmt>(Term))))
       return;
@@ -2008,8 +2008,8 @@ static const Stmt *ResolveCondition(const Stmt *Condition,
   if (!BO || !BO->isLogicalOp())
     return Condition;
 
-  assert(!B->getTerminator().isTemporaryDtorsBranch() &&
-         "Temporary destructor branches handled by processBindTemporary.");
+  assert(B->getTerminator().isStmtBranch() &&
+         "Other kinds of branches are handled separately!");
 
   // For logical operations, we still have the case where some branches
   // use the traditional "merge" approach and others sink the branch
diff --git a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
index ae9e073416da6..9838249ae82ca 100644
--- a/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
+++ b/clang/lib/StaticAnalyzer/Core/LoopUnrolling.cpp
@@ -234,7 +234,7 @@ bool madeNewBranch(ExplodedNode *N, const Stmt *LoopStmt) {
 
     ProgramPoint P = N->getLocation();
     if (Optional<BlockEntrance> BE = P.getAs<BlockEntrance>())
-      S = BE->getBlock()->getTerminator();
+      S = BE->getBlock()->getTerminatorStmt();
 
     if (S == LoopStmt)
       return false;
diff --git a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index cc1e7e1798f54..9032068892100 100644
--- a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -794,7 +794,7 @@ const Stmt *PathDiagnosticLocation::getStmt(const ExplodedNode *N) {
   if (auto SP = P.getAs<StmtPoint>())
     return SP->getStmt();
   if (auto BE = P.getAs<BlockEdge>())
-    return BE->getSrc()->getTerminator();
+    return BE->getSrc()->getTerminatorStmt();
   if (auto CE = P.getAs<CallEnter>())
     return CE->getCallExpr();
   if (auto CEE = P.getAs<CallExitEnd>())

From 1293de8b1733e85d13960c150b86a1fe3813ae3f Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 24 May 2019 01:34:26 +0000
Subject: [PATCH 0121/1176] [CFG] NFC: Modernize a test file for constructor
 initializer CFGs.

Move FileCheck directives around so that it was easy to understand
what tests what and what effect do changes have.

Differential Revision: https://reviews.llvm.org/D61815

llvm-svn: 361587
---
 .../test/Analysis/initializers-cfg-output.cpp | 222 ++++++++++++------
 1 file changed, 151 insertions(+), 71 deletions(-)

diff --git a/clang/test/Analysis/initializers-cfg-output.cpp b/clang/test/Analysis/initializers-cfg-output.cpp
index 8d1039ddf34ed..a69e78faeda08 100644
--- a/clang/test/Analysis/initializers-cfg-output.cpp
+++ b/clang/test/Analysis/initializers-cfg-output.cpp
@@ -12,22 +12,84 @@
 
 class A {
 public:
+  // CHECK:       A()
+  // CHECK:        [B1 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   A() {}
+
+  // CHECK:       A(int i)
+  // CHECK:        [B1 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   A(int i) {}
 };
 
 class B : public virtual A {
 public:
+  // CHECK:       B()
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
+  // ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
+  // CHECK-NEXT:     2: A([B1.1]) (Base initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   B() {}
+
+  // CHECK:       B(int i)
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // CHECK-NEXT:     1: i
+  // CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, LValueToRValue, int)
+  // WARNINGS-NEXT:     3: [B1.2] (CXXConstructExpr, class A)
+  // ANALYZER-NEXT:     3: [B1.2] (CXXConstructExpr, A([B1.2]) (Base initializer), class A)
+  // CHECK-NEXT:     4: A([B1.3]) (Base initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   B(int i) : A(i) {}
 };
 
 class C : public virtual A {
 public:
+  // CHECK:       C()
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
+  // ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
+  // CHECK-NEXT:     2: A([B1.1]) (Base initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   C() {}
+
+  // CHECK:       C(int i)
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // CHECK-NEXT:     1: i
+  // CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, LValueToRValue, int)
+  // WARNINGS-NEXT:     3: [B1.2] (CXXConstructExpr, class A)
+  // ANALYZER-NEXT:     3: [B1.2] (CXXConstructExpr, A([B1.2]) (Base initializer), class A)
+  // CHECK-NEXT:     4: A([B1.3]) (Base initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   C(int i) : A(i) {}
 };
 
+
 class TestOrder : public C, public B, public A {
   int i;
   int& r;
@@ -35,6 +97,34 @@ class TestOrder : public C, public B, public A {
   TestOrder();
 };
 
+// CHECK:       TestOrder::TestOrder()
+// CHECK:        [B2 (ENTRY)]
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B1]
+// WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
+// CHECK-NEXT:     2: A([B1.1]) (Base initializer)
+// WARNINGS-NEXT:     3:  (CXXConstructExpr, class C)
+// ANALYZER-NEXT:     3:  (CXXConstructExpr, C() (Base initializer), class C)
+// CHECK-NEXT:     4: C([B1.3]) (Base initializer)
+// WARNINGS-NEXT:     5:  (CXXConstructExpr, class B)
+// ANALYZER-NEXT:     5:  (CXXConstructExpr, B() (Base initializer), class B)
+// CHECK-NEXT:     6: B([B1.5]) (Base initializer)
+// WARNINGS-NEXT:     7:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:     7:  (CXXConstructExpr, A() (Base initializer), class A)
+// CHECK-NEXT:     8: A([B1.7]) (Base initializer)
+// CHECK-NEXT:     9: /*implicit*/(int)0
+// CHECK-NEXT:    10: i([B1.9]) (Member initializer)
+// CHECK-NEXT:    11: this
+// CHECK-NEXT:    12: [B1.11]->i
+// CHECK-NEXT:    13: r([B1.12]) (Member initializer)
+// WARNINGS-NEXT:    14:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:    14:  (CXXConstructExpr, [B1.15], class A)
+// CHECK-NEXT:    15: A a;
+// CHECK-NEXT:     Preds (1): B2
+// CHECK-NEXT:     Succs (1): B0
+// CHECK:        [B0 (EXIT)]
+// CHECK-NEXT:     Preds (1): B1
 TestOrder::TestOrder()
   : r(i), B(), i(), C() {
   A a;
@@ -46,6 +136,37 @@ class TestControlFlow {
   TestControlFlow(bool b);
 };
 
+// CHECK:       TestControlFlow::TestControlFlow(bool b)
+// CHECK:        [B5 (ENTRY)]
+// CHECK-NEXT:     Succs (1): B4
+// CHECK:        [B1]
+// CHECK-NEXT:     1: [B4.4] ? [B2.1] : [B3.1]
+// CHECK-NEXT:     2: y([B1.1]) (Member initializer)
+// CHECK-NEXT:     3: this
+// CHECK-NEXT:     4: [B1.3]->y
+// CHECK-NEXT:     5: [B1.4] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:     6: z([B1.5]) (Member initializer)
+// CHECK-NEXT:     7: int v;
+// CHECK-NEXT:     Preds (2): B2 B3
+// CHECK-NEXT:     Succs (1): B0
+// CHECK:        [B2]
+// CHECK-NEXT:     1: 0
+// CHECK-NEXT:     Preds (1): B4
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B3]
+// CHECK-NEXT:     1: 1
+// CHECK-NEXT:     Preds (1): B4
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B4]
+// CHECK-NEXT:     1: 0
+// CHECK-NEXT:     2: x([B4.1]) (Member initializer)
+// CHECK-NEXT:     3: b
+// CHECK-NEXT:     4: [B4.3] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B4.4] ? ... : ...
+// CHECK-NEXT:     Preds (1): B5
+// CHECK-NEXT:     Succs (2): B2 B3
+// CHECK:        [B0 (EXIT)]
+// CHECK-NEXT:     Preds (1): B1
 TestControlFlow::TestControlFlow(bool b)
   : y(b ? 0 : 1)
   , x(0)
@@ -55,77 +176,36 @@ TestControlFlow::TestControlFlow(bool b)
 
 class TestDelegating {
   int x, z;
- public:
+public:
+
+  // CHECK:       TestDelegating()
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // CHECK-NEXT:     1: 2
+  // CHECK-NEXT:     2: 3
+  // WARNINGS-NEXT:     3: [B1.1], [B1.2] (CXXConstructExpr, class TestDelegating)
+  // ANALYZER-NEXT:     3: [B1.1], [B1.2] (CXXConstructExpr, TestDelegating([B1.1], [B1.2]) (Delegating initializer), class TestDelegating)
+  // CHECK-NEXT:     4: TestDelegating([B1.3]) (Delegating initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   TestDelegating() : TestDelegating(2, 3) {}
+
+  // CHECK:       TestDelegating(int x, int z)
+  // CHECK:        [B2 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B1]
+  // CHECK-NEXT:     1: x
+  // CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, LValueToRValue, int)
+  // CHECK-NEXT:     3: x([B1.2]) (Member initializer)
+  // CHECK-NEXT:     4: z
+  // CHECK-NEXT:     5: [B1.4] (ImplicitCastExpr, LValueToRValue, int)
+  // CHECK-NEXT:     6: z([B1.5]) (Member initializer)
+  // CHECK-NEXT:     Preds (1): B2
+  // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B0 (EXIT)]
+  // CHECK-NEXT:     Preds (1): B1
   TestDelegating(int x, int z) : x(x), z(z) {}
 };
-
-// CHECK:  [B2 (ENTRY)]
-// CHECK:    Succs (1): B1
-// CHECK:  [B1]
-// WARNINGS:    1:  (CXXConstructExpr, class A)
-// ANALYZER:    1:  (CXXConstructExpr, A() (Base initializer), class A)
-// CHECK:    2: A([B1.1]) (Base initializer)
-// WARNINGS:    3:  (CXXConstructExpr, class C)
-// ANALYZER:    3:  (CXXConstructExpr, C() (Base initializer), class C)
-// CHECK:    4: C([B1.3]) (Base initializer)
-// WARNINGS:    5:  (CXXConstructExpr, class B)
-// ANALYZER:    5:  (CXXConstructExpr, B() (Base initializer), class B)
-// CHECK:    6: B([B1.5]) (Base initializer)
-// WARNINGS:    7:  (CXXConstructExpr, class A)
-// ANALYZER:    7:  (CXXConstructExpr, A() (Base initializer), class A)
-// CHECK:    8: A([B1.7]) (Base initializer)
-// CHECK:    9: /*implicit*/(int)0
-// CHECK:   10: i([B1.9]) (Member initializer)
-// CHECK:   11: this
-// CHECK:   12: [B1.11]->i
-// CHECK:   13: r([B1.12]) (Member initializer)
-// WARNINGS:   14:  (CXXConstructExpr, class A)
-// ANALYZER:   14:  (CXXConstructExpr, [B1.15], class A)
-// CHECK:   15: A a;
-// CHECK:    Preds (1): B2
-// CHECK:    Succs (1): B0
-// CHECK:  [B0 (EXIT)]
-// CHECK:    Preds (1): B1
-// CHECK:  [B5 (ENTRY)]
-// CHECK:    Succs (1): B4
-// CHECK:  [B1]
-// CHECK:    1: [B4.4] ? [B2.1] : [B3.1]
-// CHECK:    2: y([B1.1]) (Member initializer)
-// CHECK:    3: this
-// CHECK:    4: [B1.3]->y
-// CHECK:    5: [B1.4] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK:    6: z([B1.5]) (Member initializer)
-// CHECK:    7: int v;
-// CHECK:    Preds (2): B2 B3
-// CHECK:    Succs (1): B0
-// CHECK:  [B2]
-// CHECK:    1: 0
-// CHECK:    Preds (1): B4
-// CHECK:    Succs (1): B1
-// CHECK:  [B3]
-// CHECK:    1: 1
-// CHECK:    Preds (1): B4
-// CHECK:    Succs (1): B1
-// CHECK:  [B4]
-// CHECK:    1: 0
-// CHECK:    2: x([B4.1]) (Member initializer)
-// CHECK:    3: b
-// CHECK:    4: [B4.3] (ImplicitCastExpr, LValueToRValue, _Bool)
-// CHECK:    T: [B4.4] ? ... : ...
-// CHECK:    Preds (1): B5
-// CHECK:    Succs (2): B2 B3
-// CHECK:  [B0 (EXIT)]
-// CHECK:    Preds (1): B1
-// CHECK:  [B2 (ENTRY)]
-// CHECK:    Succs (1): B1
-// CHECK:  [B1]
-// CHECK:    1: 2
-// CHECK:    2: 3
-// WARNINGS:    3: [B1.1], [B1.2] (CXXConstructExpr, class TestDelegating)
-// ANALYZER:    3: [B1.1], [B1.2] (CXXConstructExpr, TestDelegating([B1.1], [B1.2]) (Delegating initializer), class TestDelegating)
-// CHECK:    4: TestDelegating([B1.3]) (Delegating initializer)
-// CHECK:    Preds (1): B2
-// CHECK:    Succs (1): B0
-// CHECK:  [B0 (EXIT)]
-// CHECK:    Preds (1): B1

From 8458c9ef42390b3042765c33b09b4ade0e9c22fc Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 24 May 2019 01:35:07 +0000
Subject: [PATCH 0122/1176] Factor out repeated code to build 'this'
 expressions and mark them referenced.

llvm-svn: 361588
---
 clang/include/clang/Sema/Sema.h   |  4 ++++
 clang/lib/Sema/SemaExprCXX.cpp    | 16 +++++++++++++---
 clang/lib/Sema/SemaExprMember.cpp |  6 ++----
 clang/lib/Sema/SemaOverload.cpp   |  6 ++----
 clang/lib/Sema/TreeTransform.h    |  8 ++++----
 5 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ddf393d46e21f..60480d98bebe3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5223,6 +5223,10 @@ class Sema {
   //// ActOnCXXThis -  Parse 'this' pointer.
   ExprResult ActOnCXXThis(SourceLocation loc);
 
+  /// Build a CXXThisExpr and mark it referenced in the current context.
+  Expr *BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit);
+  void MarkThisReferenced(CXXThisExpr *This);
+
   /// Try to retrieve the type of the 'this' pointer.
   ///
   /// \returns The type of 'this', if possible. Otherwise, returns a NULL type.
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 455a71bd0ac0a..e3286e8943f2a 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1298,10 +1298,20 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
   /// which the function is called.
 
   QualType ThisTy = getCurrentThisType();
-  if (ThisTy.isNull()) return Diag(Loc, diag::err_invalid_this_use);
+  if (ThisTy.isNull())
+    return Diag(Loc, diag::err_invalid_this_use);
+  return BuildCXXThisExpr(Loc, ThisTy, /*isImplicit=*/false);
+}
+
+Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type,
+                             bool IsImplicit) {
+  auto *This = new (Context) CXXThisExpr(Loc, Type, IsImplicit);
+  MarkThisReferenced(This);
+  return This;
+}
 
-  CheckCXXThisCapture(Loc);
-  return new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit=*/false);
+void Sema::MarkThisReferenced(CXXThisExpr *This) {
+  CheckCXXThisCapture(This->getExprLoc());
 }
 
 bool Sema::isThisOutsideMemberFunctionBody(QualType BaseType) {
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index b07bba5584bdd..3d7b8db2f6710 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -1092,8 +1092,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
     SourceLocation Loc = R.getNameLoc();
     if (SS.getRange().isValid())
       Loc = SS.getRange().getBegin();
-    CheckCXXThisCapture(Loc);
-    BaseExpr = new (Context) CXXThisExpr(Loc, BaseExprType,/*isImplicit=*/true);
+    BaseExpr = BuildCXXThisExpr(Loc, BaseExprType, /*isImplicit=*/true);
   }
 
   // Check the use of this member.
@@ -1836,8 +1835,7 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS,
     SourceLocation Loc = R.getNameLoc();
     if (SS.getRange().isValid())
       Loc = SS.getRange().getBegin();
-    CheckCXXThisCapture(Loc);
-    baseExpr = new (Context) CXXThisExpr(loc, ThisTy, /*isImplicit=*/true);
+    baseExpr = BuildCXXThisExpr(loc, ThisTy, /*isImplicit=*/true);
   }
 
   return BuildMemberReferenceExpr(baseExpr, ThisTy,
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index eadc01e5efb6b..e5cbd1d0a81c0 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13910,10 +13910,8 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
         SourceLocation Loc = MemExpr->getMemberLoc();
         if (MemExpr->getQualifier())
           Loc = MemExpr->getQualifierLoc().getBeginLoc();
-        CheckCXXThisCapture(Loc);
-        Base = new (Context) CXXThisExpr(Loc,
-                                         MemExpr->getBaseType(),
-                                         /*isImplicit=*/true);
+        Base =
+            BuildCXXThisExpr(Loc, MemExpr->getBaseType(), /*isImplicit=*/true);
       }
     } else
       Base = MemExpr->getBase();
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index c653fb1d6e2c4..b5114eeef3011 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -2697,8 +2697,7 @@ class TreeTransform {
   ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc,
                                 QualType ThisType,
                                 bool isImplicit) {
-    getSema().CheckCXXThisCapture(ThisLoc);
-    return new (getSema().Context) CXXThisExpr(ThisLoc, ThisType, isImplicit);
+    return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit);
   }
 
   /// Build a new C++ throw expression.
@@ -10355,8 +10354,9 @@ TreeTransform<Derived>::TransformCXXThisExpr(CXXThisExpr *E) {
   QualType T = getSema().getCurrentThisType();
 
   if (!getDerived().AlwaysRebuild() && T == E->getType()) {
-    // Make sure that we capture 'this'.
-    getSema().CheckCXXThisCapture(E->getBeginLoc());
+    // Mark it referenced in the new context regardless.
+    // FIXME: this is a bit instantiation-specific.
+    getSema().MarkThisReferenced(E);
     return E;
   }
 

From fc302c2b7f1c224be03caba3a82282a943a31519 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 24 May 2019 01:41:58 +0000
Subject: [PATCH 0123/1176] dwarfdump: Deterministically... determine whether
 parsing a DWARF32 or DWARF64 str_offsets header

Rather than trying one and then the other - use the kind of the CU to
select which kind of header to parse.

llvm-svn: 361589
---
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp              | 13 ++++++++++---
 .../DebugInfo/X86/dwarfdump-str-offsets-macho.s     |  7 ++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 7bc5221549063..fa165cf2d4022 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -811,12 +811,19 @@ DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
   auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
   Optional<StrOffsetsContributionDescriptor> Descriptor;
   // Attempt to find a DWARF64 contribution 16 bytes before the base.
-  if (Offset >= 16)
+  switch (Header.getFormat()) {
+  case dwarf::DwarfFormat::DWARF64:
+    if (Offset < 16)
+      return None;
     Descriptor =
         parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
-  // Try to find a DWARF32 contribution 8 bytes before the base.
-  if (!Descriptor && Offset >= 8)
+    break;
+  case dwarf::DwarfFormat::DWARF32:
+    if (Offset < 8)
+      return None;
     Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+    break;
+  }
   return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
 }
 
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-macho.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-macho.s
index 1332a94ec3c71..10a810a350d19 100644
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-macho.s
+++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-macho.s
@@ -160,18 +160,19 @@ CU1_5_version:
 CU1_5_end:
 
 # DWARF v5 CU header
-        .long  CU2_5_end-CU2_5_version  # Length of Unit
+        .long  0xffffffff
+        .quad  CU2_5_end-CU2_5_version  # Length of Unit
 CU2_5_version:
         .short 5               # DWARF version number
         .byte 1                # DWARF Unit Type
         .byte 8                # Address Size (in bytes)
-        .long 0                # Offset Into Abbrev. Section
+        .quad 0                # Offset Into Abbrev. Section
 # The compile-unit DIE, which has a DW_AT_producer, DW_AT_name,
 # DW_AT_str_offsets and DW_AT_compdir.
         .byte 1                # Abbreviation code
         .byte 0                # The index of the producer string
         .byte 1                # The index of the CU name string
-        .long Ldebug_str_offsets_base1-Ldebug_str_offsets
+        .quad Ldebug_str_offsets_base1-Ldebug_str_offsets
         .byte 2                # The index of the comp dir string
         .byte 0 # NULL
 CU2_5_end:

From 414da9d66a5469e46a804611a2fc2a6fad543484 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 24 May 2019 01:45:47 +0000
Subject: [PATCH 0124/1176] Clarify how musttail can be used to create
 forwarding thunks

llvm-svn: 361590
---
 llvm/docs/LangRef.rst | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 5ea27c976d2ae..6311f6f616369 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -10006,12 +10006,16 @@ This instruction requires several arguments:
       recursive cycle in the call graph.
    #. Arguments with the :ref:`inalloca <attr_inalloca>` attribute are
       forwarded in place.
+   #. If the musttail call appears in a function with the ``"thunk"`` attribute
+      and the caller and callee both have varargs, than any unprototyped
+      arguments in register or memory are forwarded to the callee. Similarly,
+      the return value of the callee is returned the the caller's caller, even
+      if a void return type is in use.
 
    Both markers imply that the callee does not access allocas from the caller.
    The ``tail`` marker additionally implies that the callee does not access
-   varargs from the caller, while ``musttail`` implies that varargs from the
-   caller are passed to the callee. Calls marked ``musttail`` must obey the
-   following additional  rules:
+   varargs from the caller. Calls marked ``musttail`` must obey the following
+   additional  rules:
 
    - The call must immediately precede a :ref:`ret <i_ret>` instruction,
      or a pointer bitcast followed by a ret instruction.

From 4cecdaa05f8069aa3d9449f44e1e7c1847850bae Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Fri, 24 May 2019 02:15:27 +0000
Subject: [PATCH 0125/1176] Fix BUILD_SHARED_LIBS builds after r361567

Also fixed a comment I noticed while debugging this build

llvm-svn: 361591
---
 llvm/cmake/modules/LLVM-Config.cmake  | 2 +-
 llvm/tools/bugpoint/CMakeLists.txt    | 2 ++
 llvm/tools/llc/CMakeLists.txt         | 2 ++
 llvm/tools/llvm-c-test/CMakeLists.txt | 2 ++
 llvm/tools/llvm-dwp/CMakeLists.txt    | 2 ++
 llvm/tools/llvm-lto/CMakeLists.txt    | 2 ++
 llvm/tools/llvm-lto2/CMakeLists.txt   | 1 +
 llvm/tools/opt/CMakeLists.txt         | 1 +
 8 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/cmake/modules/LLVM-Config.cmake b/llvm/cmake/modules/LLVM-Config.cmake
index 57c02f170e18f..be28ca4f052ea 100644
--- a/llvm/cmake/modules/LLVM-Config.cmake
+++ b/llvm/cmake/modules/LLVM-Config.cmake
@@ -210,7 +210,7 @@ function(llvm_map_components_to_libnames out_libs)
     elseif( c STREQUAL "all" )
       list(APPEND expanded_components ${LLVM_AVAILABLE_LIBS})
     elseif( c STREQUAL "AllTargetsCodeGens" )
-      # Link all the asm printers from all the targets
+      # Link all the codegens from all the targets
       foreach(t ${LLVM_TARGETS_TO_BUILD})
         if( TARGET LLVM${t}CodeGen)
           list(APPEND expanded_components "LLVM${t}CodeGen")
diff --git a/llvm/tools/bugpoint/CMakeLists.txt b/llvm/tools/bugpoint/CMakeLists.txt
index 6ed15a24a2d4d..031f51480cce7 100644
--- a/llvm/tools/bugpoint/CMakeLists.txt
+++ b/llvm/tools/bugpoint/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
   Analysis
   BitWriter
   CodeGen
diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt
index 863358b5e71c7..130f0cd3cb9e8 100644
--- a/llvm/tools/llc/CMakeLists.txt
+++ b/llvm/tools/llc/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
   Analysis
   AsmPrinter
   CodeGen
diff --git a/llvm/tools/llvm-c-test/CMakeLists.txt b/llvm/tools/llvm-c-test/CMakeLists.txt
index 78eaafc3b5a40..939164e636216 100644
--- a/llvm/tools/llvm-c-test/CMakeLists.txt
+++ b/llvm/tools/llvm-c-test/CMakeLists.txt
@@ -1,7 +1,9 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
   AllTargetsDisassemblers
+  AllTargetsInfos
   BitReader
   Core
   MCDisassembler
diff --git a/llvm/tools/llvm-dwp/CMakeLists.txt b/llvm/tools/llvm-dwp/CMakeLists.txt
index 49f40b5c6397b..bf40768ebd4a5 100644
--- a/llvm/tools/llvm-dwp/CMakeLists.txt
+++ b/llvm/tools/llvm-dwp/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
   AsmPrinter
   DebugInfoDWARF
   MC
diff --git a/llvm/tools/llvm-lto/CMakeLists.txt b/llvm/tools/llvm-lto/CMakeLists.txt
index d0222315d25f7..69868fb870c04 100644
--- a/llvm/tools/llvm-lto/CMakeLists.txt
+++ b/llvm/tools/llvm-lto/CMakeLists.txt
@@ -1,6 +1,8 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
   BitReader
   BitWriter
   Core
diff --git a/llvm/tools/llvm-lto2/CMakeLists.txt b/llvm/tools/llvm-lto2/CMakeLists.txt
index 233817a387f9d..7f2db01c9c916 100644
--- a/llvm/tools/llvm-lto2/CMakeLists.txt
+++ b/llvm/tools/llvm-lto2/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
   AllTargetsDescs
+  AllTargetsInfos
   BitReader
   Core
   Linker
diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt
index 2f9665c689807..c9e44449dc2fb 100644
--- a/llvm/tools/opt/CMakeLists.txt
+++ b/llvm/tools/opt/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmParsers
   AllTargetsCodeGens
+  AllTargetsDescs
   AllTargetsInfos
   AggressiveInstCombine
   Analysis

From 425e565783ddb7c440953ae74daf6d54f4ee9d74 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 24 May 2019 02:29:18 +0000
Subject: [PATCH 0126/1176] [analyzer] NFC: Prevent multi-file plist test from
 spamming up the build folder.

It was producing an HTML report with a random name on every tests run
and never cleaned those up.

llvm-svn: 361592
---
 .../Inputs/expected-plists/plist-multi-file.c.plist           | 4 ----
 clang/test/Analysis/diagnostics/plist-multi-file.c            | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-plists/plist-multi-file.c.plist b/clang/test/Analysis/diagnostics/Inputs/expected-plists/plist-multi-file.c.plist
index 86c203245d4e7..74ccc7903c1f8 100644
--- a/clang/test/Analysis/diagnostics/Inputs/expected-plists/plist-multi-file.c.plist
+++ b/clang/test/Analysis/diagnostics/Inputs/expected-plists/plist-multi-file.c.plist
@@ -184,10 +184,6 @@
    <key>col</key><integer>8</integer>
    <key>file</key><integer>1</integer>
   </dict>
-  <key>HTMLDiagnostics_files</key>
-  <array>
-   <string>report-288847.html</string>
-  </array>
   <key>ExecutedLines</key>
   <dict>
    <key>0</key>
diff --git a/clang/test/Analysis/diagnostics/plist-multi-file.c b/clang/test/Analysis/diagnostics/plist-multi-file.c
index a70c9aa93537f..f6ff8097ff094 100644
--- a/clang/test/Analysis/diagnostics/plist-multi-file.c
+++ b/clang/test/Analysis/diagnostics/plist-multi-file.c
@@ -1,5 +1,5 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-output=plist-html -o %t.plist -verify %s
-// RUN: tail -n +11 %t.plist | %diff_plist --ignore-matching-lines=report %S/Inputs/expected-plists/plist-multi-file.c.plist -
+// RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-output=plist-multi-file -o %t.plist -verify %s
+// RUN: tail -n +11 %t.plist | %diff_plist %S/Inputs/expected-plists/plist-multi-file.c.plist -
 
 #include "plist-multi-file.h"
 

From e46721a153440646c5a2f0c59700a34e24c26be1 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Fri, 24 May 2019 02:46:34 +0000
Subject: [PATCH 0127/1176] fix destroying delete test with older apple
 compilers

llvm-svn: 361593
---
 .../destroying_delete_t.pass.cpp              |  2 +
 .../destroying_delete_t_declaration.pass.cpp  | 52 +++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp

diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
index d544b0e7cab6e..a2c9b8b5f42f5 100644
--- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
@@ -14,6 +14,8 @@
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
 
+// UNSUPPORTED: apple-clang-9, apple-clang-10
+
 #include <new>
 
 #include <cassert>
diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp
new file mode 100644
index 0000000000000..b0e42ecfd58c0
--- /dev/null
+++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// struct destroying_delete_t {
+//   explicit destroying_delete_t() = default;
+// };
+// inline constexpr destroying_delete_t destroying_delete{};
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+// Test only the library parts of destroying delete in this test.
+// Verify that it's properly declared after C++17 and that it's constexpr.
+//
+// Other tests will check the language side of things -- but those are
+// limited to newer compilers.
+
+#include <new>
+
+#include <cassert>
+#include "test_macros.h"
+#include "test_convertible.hpp"
+
+#ifdef __cpp_impl_destroying_delete
+# ifndef __cpp_lib_destroying_delete
+#   error "Expected __cpp_lib_destroying_delete to be defined"
+#   elif __cpp_lib_destroying_delete < 201806L
+#     error "Unexpected value of __cpp_lib_destroying_delete"
+#   endif
+#else
+# ifdef __cpp_lib_destroying_delete
+#   error "__cpp_lib_destroying_delete should not be defined unless the compiler supports it"
+# endif
+#endif
+
+constexpr bool test_constexpr(std::destroying_delete_t) {
+  return true;
+}
+
+int main() {
+  static_assert(std::is_default_constructible<std::destroying_delete_t>::value, "");
+  static_assert(!test_convertible<std::destroying_delete_t>(), "");
+  constexpr std::destroying_delete_t dd{};
+  static_assert((dd, true), "");
+  static_assert(&dd != &std::destroying_delete, "");
+  static_assert(test_constexpr(std::destroying_delete), "");
+}

From a38ddc36fdc7a18795e067cb68ec770f3fc5a982 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Fri, 24 May 2019 03:15:32 +0000
Subject: [PATCH 0128/1176] fix test for older clang versions

llvm-svn: 361594
---
 .../support.dynamic/destroying_delete_t.pass.cpp                 | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
index a2c9b8b5f42f5..2ca6d19757316 100644
--- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
@@ -15,6 +15,7 @@
 // UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
 
 // UNSUPPORTED: apple-clang-9, apple-clang-10
+// UNSUPPORTED: clang-6
 
 #include <new>
 

From 01d6173667f77722a31070fd186a50d3b3740207 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Fri, 24 May 2019 04:02:05 +0000
Subject: [PATCH 0129/1176] [llvm-nm] Fix Bug 41353 - unique symbols printed as
 D instead of u

Summary:
https://bugs.llvm.org/show_bug.cgi?id=41353

I'm new to LLVM and C++ so please do not hesitate to iterate with me on this fix.

Patch by Mike Pozulp!

Reviewers: rupprecht, zbrid, grimar, jhenderson

Reviewed By: rupprecht, jhenderson

Subscribers: jhenderson, chrisjackson, MaskRay, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61117

llvm-svn: 361595
---
 llvm/include/llvm/Object/ELFObjectFile.h | 11 ++++++
 llvm/test/tools/llvm-nm/X86/unique.test  | 50 ++++++++++++++++++++++++
 llvm/tools/llvm-nm/llvm-nm.cpp           | 12 ++++--
 3 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/tools/llvm-nm/X86/unique.test

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index ed54ad02ccff7..d5e9d3638dc3c 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -56,6 +56,7 @@ class ELFObjectFileBase : public ObjectFile {
 
   virtual uint16_t getEMachine() const = 0;
   virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
+  virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
   virtual uint8_t getSymbolELFType(DataRefImpl Symb) const = 0;
 
@@ -145,6 +146,10 @@ class ELFSymbolRef : public SymbolRef {
     return getObject()->getSymbolSize(getRawDataRefImpl());
   }
 
+  uint8_t getBinding() const {
+    return getObject()->getSymbolBinding(getRawDataRefImpl());
+  }
+
   uint8_t getOther() const {
     return getObject()->getSymbolOther(getRawDataRefImpl());
   }
@@ -252,6 +257,7 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase {
   uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+  uint8_t getSymbolBinding(DataRefImpl Symb) const override;
   uint8_t getSymbolOther(DataRefImpl Symb) const override;
   uint8_t getSymbolELFType(DataRefImpl Symb) const override;
   Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
@@ -553,6 +559,11 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
   return getSymbol(Symb)->st_size;
 }
 
+template <class ELFT>
+uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
+  return getSymbol(Symb)->getBinding();
+}
+
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
   return getSymbol(Symb)->st_other;
diff --git a/llvm/test/tools/llvm-nm/X86/unique.test b/llvm/test/tools/llvm-nm/X86/unique.test
new file mode 100644
index 0000000000000..a8879d74cf9f2
--- /dev/null
+++ b/llvm/test/tools/llvm-nm/X86/unique.test
@@ -0,0 +1,50 @@
+## Check that we print 'u' for unique symbols
+## and 'U' for a unique symbol without a section.
+# RUN: yaml2obj %s | llvm-nm - | FileCheck %s
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:  .nobits
+    Type:  SHT_NOBITS
+  - Name:  .progbits
+    Type:  SHT_PROGBITS
+  - Name:  .progbits_alloc
+    Type:  SHT_PROGBITS
+    Flags: [SHF_ALLOC]
+  - Name:  .progbits_alloc_write
+    Type:  SHT_PROGBITS
+    Flags: [SHF_ALLOC, SHF_WRITE]
+  - Name:  .progbits_execinstr
+    Type:  SHT_PROGBITS
+    Flags: [SHF_EXECINSTR]
+Symbols:
+  - Name:    nosection
+    Binding: STB_GNU_UNIQUE
+  - Name:    nobits
+    Section: .nobits
+    Binding: STB_GNU_UNIQUE
+  - Name:    progbits
+    Section: .progbits
+    Binding: STB_GNU_UNIQUE
+  - Name:    progbits_alloc
+    Section: .progbits_alloc
+    Binding: STB_GNU_UNIQUE
+  - Name:    progbits_alloc_write
+    Section: .progbits_alloc_write
+    Binding: STB_GNU_UNIQUE
+  - Name:    progbits_execinstr
+    Section: .progbits_execinstr
+    Binding: STB_GNU_UNIQUE
+...
+
+# CHECK: 0000000000000000 u nobits
+# CHECK:                  U nosection
+# CHECK: 0000000000000000 u progbits
+# CHECK: 0000000000000000 u progbits_alloc
+# CHECK: 0000000000000000 u progbits_alloc_write
+# CHECK: 0000000000000000 u progbits_execinstr
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index f21978d50cc12..871ca638d9d75 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -894,6 +894,9 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
     return '?';
   }
 
+  if (SymI->getBinding() == ELF::STB_GNU_UNIQUE)
+    return 'u';
+
   elf_section_iterator SecI = *SecIOrErr;
   if (SecI != Obj.section_end()) {
     uint32_t Type = SecI->getType();
@@ -1119,10 +1122,13 @@ static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
   else
     Ret = getSymbolNMTypeChar(cast<ELFObjectFileBase>(Obj), I);
 
-  if (Symflags & object::SymbolRef::SF_Global)
-    Ret = toupper(Ret);
+  if (!(Symflags & object::SymbolRef::SF_Global))
+    return Ret;
+
+  if (Obj.isELF() && ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
+    return Ret;
 
-  return Ret;
+  return toupper(Ret);
 }
 
 // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"

From c652b3455ec55704f2e7d152fa7e4f45283b3258 Mon Sep 17 00:00:00 2001
From: Yevgeny Rouban <yevgeny.rouban@azul.com>
Date: Fri, 24 May 2019 04:34:23 +0000
Subject: [PATCH 0130/1176] [NFC] SwitchInst: Introduce wrapper for prof
 branch_weights handling

This patch introduces a wrapper class that re-implements
several mutator methods of SwitchInst to handle changes
of prof branch_weights metadata along with remove/add
switch case methods.
Subsequent patches will use this wrapper to implement
prof branch_weights metadata handling for SwitchInst.

Reviewers: davidx, eraman, reames, chandlerc
Reviewed By: davidx
Differential Revision: https://reviews.llvm.org/D62122

llvm-svn: 361596
---
 llvm/include/llvm/IR/Instructions.h |  46 +++++++++++
 llvm/lib/IR/Instructions.cpp        | 120 ++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 3e0c6d803d209..82833658c4182 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -3435,6 +3435,52 @@ class SwitchInst : public Instruction {
   }
 };
 
+/// A wrapper class to simplify modification of SwitchInst cases along with
+/// their prof branch_weights metadata.
+class SwitchInstProfUpdateWrapper {
+  SwitchInst &SI;
+  Optional<SmallVector<uint32_t, 8> > Weights;
+  bool Changed = false;
+
+protected:
+  static MDNode *getProfBranchWeightsMD(const SwitchInst &SI);
+
+  MDNode *buildProfBranchWeightsMD();
+
+  Optional<SmallVector<uint32_t, 8> > getProfBranchWeights();
+
+public:
+  using CaseWeightOpt = Optional<uint32_t>;
+  SwitchInst *operator->() { return &SI; }
+  SwitchInst &operator*() { return SI; }
+  operator SwitchInst *() { return &SI; }
+
+  SwitchInstProfUpdateWrapper(SwitchInst &SI)
+      : SI(SI), Weights(getProfBranchWeights()) {}
+
+  ~SwitchInstProfUpdateWrapper() {
+    if (Changed)
+      SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD());
+  }
+
+  /// Delegate the call to the underlying SwitchInst::removeCase() and remove
+  /// correspondent branch weight.
+  SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I);
+
+  /// Delegate the call to the underlying SwitchInst::addCase() and set the
+  /// specified branch weight for the added case.
+  void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W);
+
+  /// Delegate the call to the underlying SwitchInst::eraseFromParent() and mark
+  /// this object to not touch the underlying SwitchInst in destructor.
+  SymbolTableList<Instruction>::iterator eraseFromParent();
+
+  void setSuccessorWeight(unsigned idx, CaseWeightOpt W);
+  CaseWeightOpt getSuccessorWeight(unsigned idx);
+
+  static CaseWeightOpt getSuccessorWeight(const SwitchInst &SI, unsigned idx);
+};
+
 template <>
 struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
 };
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 9dc753e960c5b..8812df35e26b2 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -3870,6 +3870,126 @@ void SwitchInst::growOperands() {
   growHungoffUses(ReservedSpace);
 }
 
+MDNode *
+SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) {
+  if (MDNode *ProfileData = SI.getMetadata(LLVMContext::MD_prof))
+    if (auto *MDName = dyn_cast<MDString>(ProfileData->getOperand(0)))
+      if (MDName->getString() == "branch_weights")
+        return ProfileData;
+  return nullptr;
+}
+
+MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
+  assert(Changed && "called only if metadata has changed");
+
+  if (!Weights)
+    return nullptr;
+
+  assert(SI.getNumSuccessors() == Weights->size() &&
+         "num of prof branch_weights must accord with num of successors");
+
+  bool AllZeroes =
+      all_of(Weights.getValue(), [](uint32_t W) { return W == 0; });
+
+  if (AllZeroes || Weights.getValue().size() < 2)
+    return nullptr;
+
+  return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights);
+}
+
+Optional<SmallVector<uint32_t, 8> >
+SwitchInstProfUpdateWrapper::getProfBranchWeights() {
+  MDNode *ProfileData = getProfBranchWeightsMD(SI);
+  if (!ProfileData)
+    return None;
+
+  SmallVector<uint32_t, 8> Weights;
+  for (unsigned CI = 1, CE = SI.getNumSuccessors(); CI <= CE; ++CI) {
+    ConstantInt *C = mdconst::extract<ConstantInt>(ProfileData->getOperand(CI));
+    uint32_t CW = C->getValue().getZExtValue();
+    Weights.push_back(CW);
+  }
+  return Weights;
+}
+
+SwitchInst::CaseIt
+SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
+  if (Weights) {
+    assert(SI.getNumSuccessors() == Weights->size() &&
+           "num of prof branch_weights must accord with num of successors");
+    Changed = true;
+    // Copy the last case to the place of the removed one and shrink.
+    // This is tightly coupled with the way SwitchInst::removeCase() removes
+    // the cases in SwitchInst::removeCase(CaseIt).
+    Weights.getValue()[I->getCaseIndex() + 1] = Weights.getValue().back();
+    Weights.getValue().pop_back();
+  }
+  return SI.removeCase(I);
+}
+
+void SwitchInstProfUpdateWrapper::addCase(
+    ConstantInt *OnVal, BasicBlock *Dest,
+    SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+  SI.addCase(OnVal, Dest);
+
+  if (!Weights && W && *W) {
+    Changed = true;
+    Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+    Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
+  } else if (Weights) {
+    Changed = true;
+    Weights.getValue().push_back(W ? *W : 0);
+  }
+  if (Weights)
+    assert(SI.getNumSuccessors() == Weights->size() &&
+           "num of prof branch_weights must accord with num of successors");
+}
+
+SymbolTableList<Instruction>::iterator
+SwitchInstProfUpdateWrapper::eraseFromParent() {
+  // Instruction is erased. Mark as unchanged to not touch it in the destructor.
+  Changed = false;
+
+  if (Weights)
+    Weights->resize(0);
+  return SI.eraseFromParent();
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
+  if (!Weights)
+    return None;
+  return Weights.getValue()[idx];
+}
+
+void SwitchInstProfUpdateWrapper::setSuccessorWeight(
+    unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+  if (!W)
+    return;
+
+  if (!Weights && *W)
+    Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+
+  if (Weights) {
+    auto &OldW = Weights.getValue()[idx];
+    if (*W != OldW) {
+      Changed = true;
+      OldW = *W;
+    }
+  }
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(const SwitchInst &SI,
+                                                unsigned idx) {
+  if (MDNode *ProfileData = getProfBranchWeightsMD(SI))
+    return mdconst::extract<ConstantInt>(ProfileData->getOperand(idx + 1))
+        ->getValue()
+        .getZExtValue();
+
+  return None;
+}
+
 //===----------------------------------------------------------------------===//
 //                        IndirectBrInst Implementation
 //===----------------------------------------------------------------------===//

From 0ee23c958bbcf6955568d5287d1495f485426800 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 24 May 2019 04:41:47 +0000
Subject: [PATCH 0131/1176] [Utility] Small improvements to the Broadcaster
 class (NFC)

I touched the Broadcaster class earlier today (r361544) and noticed a
few things that could be improved. This patch includes variety of small
fixes: use early returns, use LLDB_LOG macro, use doxygen comments and
finally format the class.

llvm-svn: 361597
---
 lldb/include/lldb/Utility/Broadcaster.h | 121 ++++++++++++------------
 lldb/source/Utility/Broadcaster.cpp     |  85 ++++++++---------
 2 files changed, 99 insertions(+), 107 deletions(-)

diff --git a/lldb/include/lldb/Utility/Broadcaster.h b/lldb/include/lldb/Utility/Broadcaster.h
index 1444282c7f7b0..fe4d1ca479b89 100644
--- a/lldb/include/lldb/Utility/Broadcaster.h
+++ b/lldb/include/lldb/Utility/Broadcaster.h
@@ -29,14 +29,14 @@ class Broadcaster;
 class EventData;
 class Listener;
 class Stream;
-}
+} // namespace lldb_private
 
 namespace lldb_private {
 
-// lldb::BroadcastEventSpec
-//
-// This class is used to specify a kind of event to register for.  The Debugger
-// maintains a list of BroadcastEventSpec's and when it is made
+/// lldb::BroadcastEventSpec
+///
+/// This class is used to specify a kind of event to register for.  The
+/// Debugger maintains a list of BroadcastEventSpec's and when it is made
 class BroadcastEventSpec {
 public:
   BroadcastEventSpec(ConstString broadcaster_class, uint32_t event_bits)
@@ -48,19 +48,19 @@ class BroadcastEventSpec {
 
   uint32_t GetEventBits() const { return m_event_bits; }
 
-  // Tell whether this BroadcastEventSpec is contained in in_spec. That is: (a)
-  // the two spec's share the same broadcaster class (b) the event bits of this
-  // spec are wholly contained in those of in_spec.
+  /// Tell whether this BroadcastEventSpec is contained in in_spec. That is:
+  /// (a) the two spec's share the same broadcaster class (b) the event bits of
+  /// this spec are wholly contained in those of in_spec.
   bool IsContainedIn(const BroadcastEventSpec &in_spec) const {
     if (m_broadcaster_class != in_spec.GetBroadcasterClass())
       return false;
     uint32_t in_bits = in_spec.GetEventBits();
     if (in_bits == m_event_bits)
       return true;
-    else {
-      if ((m_event_bits & in_bits) != 0 && (m_event_bits & ~in_bits) == 0)
-        return true;
-    }
+
+    if ((m_event_bits & in_bits) != 0 && (m_event_bits & ~in_bits) == 0)
+      return true;
+
     return false;
   }
 
@@ -81,10 +81,9 @@ class BroadcasterManager
   BroadcasterManager();
 
 public:
-  // Listeners hold onto weak pointers to their broadcaster managers.  So they
-  // must be made into shared pointers, which you do with
-  // MakeBroadcasterManager.
-
+  /// Listeners hold onto weak pointers to their broadcaster managers.  So they
+  /// must be made into shared pointers, which you do with
+  /// MakeBroadcasterManager.
   static lldb::BroadcasterManagerSP MakeBroadcasterManager();
 
   ~BroadcasterManager() = default;
@@ -179,8 +178,8 @@ class BroadcasterManager
     bool operator()(const event_listener_key &input) const {
       if (input.second == m_listener_sp)
         return true;
-      else
-        return false;
+
+      return false;
     }
 
   private:
@@ -197,15 +196,15 @@ class BroadcasterManager
     bool operator()(const event_listener_key &input) const {
       if (input.second.get() == m_listener)
         return true;
-      else
-        return false;
+
+      return false;
     }
 
     bool operator()(const lldb::ListenerSP &input) const {
       if (input.get() == m_listener)
         return true;
-      else
-        return false;
+
+      return false;
     }
 
   private:
@@ -413,32 +412,30 @@ class Broadcaster {
   }
 
   /// Restore the state of the Broadcaster from a previous hijack attempt.
-  ///
   void RestoreBroadcaster() { m_broadcaster_sp->RestoreBroadcaster(); }
 
-  // This needs to be filled in if you are going to register the broadcaster
-  // with the broadcaster manager and do broadcaster class matching.
-  // FIXME: Probably should make a ManagedBroadcaster subclass with all the bits
-  // needed to work
-  // with the BroadcasterManager, so that it is clearer how to add one.
+  /// This needs to be filled in if you are going to register the broadcaster
+  /// with the broadcaster manager and do broadcaster class matching.
+  /// FIXME: Probably should make a ManagedBroadcaster subclass with all the
+  /// bits needed to work with the BroadcasterManager, so that it is clearer
+  /// how to add one.
   virtual ConstString &GetBroadcasterClass() const;
 
   lldb::BroadcasterManagerSP GetManager();
 
 protected:
-  // BroadcasterImpl contains the actual Broadcaster implementation.  The
-  // Broadcaster makes a BroadcasterImpl which lives as long as it does.  The
-  // Listeners & the Events hold a weak pointer to the BroadcasterImpl, so that
-  // they can survive if a Broadcaster they were listening to is destroyed w/o
-  // their being able to unregister from it (which can happen if the
-  // Broadcasters & Listeners are being destroyed on separate threads
-  // simultaneously. The Broadcaster itself can't be shared out as a weak
-  // pointer, because some things that are broadcasters (e.g. the Target and
-  // the Process) are shared in their own right.
-  //
-  // For the most part, the Broadcaster functions dispatch to the
-  // BroadcasterImpl, and are documented in the public Broadcaster API above.
-
+  /// BroadcasterImpl contains the actual Broadcaster implementation.  The
+  /// Broadcaster makes a BroadcasterImpl which lives as long as it does.  The
+  /// Listeners & the Events hold a weak pointer to the BroadcasterImpl, so
+  /// that they can survive if a Broadcaster they were listening to is
+  /// destroyed w/o their being able to unregister from it (which can happen if
+  /// the Broadcasters & Listeners are being destroyed on separate threads
+  /// simultaneously. The Broadcaster itself can't be shared out as a weak
+  /// pointer, because some things that are broadcasters (e.g. the Target and
+  /// the Process) are shared in their own right.
+  ///
+  /// For the most part, the Broadcaster functions dispatch to the
+  /// BroadcasterImpl, and are documented in the public Broadcaster API above.
   class BroadcasterImpl {
     friend class Listener;
     friend class Broadcaster;
@@ -505,7 +502,6 @@ class Broadcaster {
 
     const char *GetHijackingListenerName();
 
-    //
     typedef llvm::SmallVector<std::pair<lldb::ListenerWP, uint32_t>, 4>
         collection;
     typedef std::map<uint32_t, std::string> event_names_map;
@@ -513,22 +509,28 @@ class Broadcaster {
     llvm::SmallVector<std::pair<lldb::ListenerSP, uint32_t &>, 4>
     GetListeners();
 
-    Broadcaster &m_broadcaster;    ///< The broadcaster that this implements
-    event_names_map m_event_names; ///< Optionally define event names for
-                                   ///readability and logging for each event bit
-    collection m_listeners; ///< A list of Listener / event_mask pairs that are
-                            ///listening to this broadcaster.
-    std::recursive_mutex
-        m_listeners_mutex; ///< A mutex that protects \a m_listeners.
-    std::vector<lldb::ListenerSP> m_hijacking_listeners; // A simple mechanism
-                                                         // to intercept events
-                                                         // from a broadcaster
-    std::vector<uint32_t> m_hijacking_masks; // At some point we may want to
-                                             // have a stack or Listener
-    // collections, but for now this is just for private hijacking.
+    /// The broadcaster that this implements.
+    Broadcaster &m_broadcaster;
+
+    /// Optionally define event names for readability and logging for each
+    /// event bit.
+    event_names_map m_event_names;
+
+    /// A list of Listener / event_mask pairs that are listening to this
+    /// broadcaster.
+    collection m_listeners;
+
+    /// A mutex that protects \a m_listeners.
+    std::recursive_mutex m_listeners_mutex;
+
+    /// A simple mechanism to intercept events from a broadcaster
+    std::vector<lldb::ListenerSP> m_hijacking_listeners;
+
+    /// At some point we may want to have a stack or Listener collections, but
+    /// for now this is just for private hijacking.
+    std::vector<uint32_t> m_hijacking_masks;
 
   private:
-    // For Broadcaster only
     DISALLOW_COPY_AND_ASSIGN(BroadcasterImpl);
   };
 
@@ -540,14 +542,13 @@ class Broadcaster {
   const char *GetHijackingListenerName() {
     return m_broadcaster_sp->GetHijackingListenerName();
   }
-  // Classes that inherit from Broadcaster can see and modify these
 
 private:
-  // For Broadcaster only
   BroadcasterImplSP m_broadcaster_sp;
   lldb::BroadcasterManagerSP m_manager_sp;
-  const ConstString
-      m_broadcaster_name; ///< The name of this broadcaster object.
+
+  /// The name of this broadcaster object.
+  const ConstString m_broadcaster_name;
 
   DISALLOW_COPY_AND_ASSIGN(Broadcaster);
 };
diff --git a/lldb/source/Utility/Broadcaster.cpp b/lldb/source/Utility/Broadcaster.cpp
index c0b8567558eb3..597888cfa0e2e 100644
--- a/lldb/source/Utility/Broadcaster.cpp
+++ b/lldb/source/Utility/Broadcaster.cpp
@@ -30,9 +30,8 @@ Broadcaster::Broadcaster(BroadcasterManagerSP manager_sp, const char *name)
     : m_broadcaster_sp(std::make_shared<BroadcasterImpl>(*this)),
       m_manager_sp(std::move(manager_sp)), m_broadcaster_name(name) {
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
-  if (log)
-    log->Printf("%p Broadcaster::Broadcaster(\"%s\")",
-                static_cast<void *>(this), GetBroadcasterName().AsCString());
+  LLDB_LOG(log, "{0} Broadcaster::Broadcaster(\"{1}\")",
+           static_cast<void *>(this), GetBroadcasterName().AsCString());
 }
 
 Broadcaster::BroadcasterImpl::BroadcasterImpl(Broadcaster &broadcaster)
@@ -41,9 +40,8 @@ Broadcaster::BroadcasterImpl::BroadcasterImpl(Broadcaster &broadcaster)
 
 Broadcaster::~Broadcaster() {
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
-  if (log)
-    log->Printf("%p Broadcaster::~Broadcaster(\"%s\")",
-                static_cast<void *>(this), m_broadcaster_name.AsCString());
+  LLDB_LOG(log, "{0} Broadcaster::~Broadcaster(\"{1}\")",
+           static_cast<void *>(this), GetBroadcasterName().AsCString());
 
   Clear();
 }
@@ -213,8 +211,7 @@ void Broadcaster::BroadcasterImpl::PrivateBroadcastEvent(EventSP &event_sp,
       hijacking_listener_sp.reset();
   }
 
-  Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_EVENTS));
-  if (log) {
+  if (Log *log = lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_EVENTS)) {
     StreamString event_description;
     event_sp->Dump(&event_description);
     log->Printf("%p Broadcaster(\"%s\")::BroadcastEvent (event_sp = {%s}, "
@@ -225,18 +222,16 @@ void Broadcaster::BroadcasterImpl::PrivateBroadcastEvent(EventSP &event_sp,
   }
 
   if (hijacking_listener_sp) {
-    if (unique &&
-        hijacking_listener_sp->PeekAtNextEventForBroadcasterWithType(
-            &m_broadcaster, event_type))
+    if (unique && hijacking_listener_sp->PeekAtNextEventForBroadcasterWithType(
+                      &m_broadcaster, event_type))
       return;
     hijacking_listener_sp->AddEvent(event_sp);
   } else {
     for (auto &pair : GetListeners()) {
       if (!(pair.second & event_type))
         continue;
-      if (unique &&
-          pair.first->PeekAtNextEventForBroadcasterWithType(&m_broadcaster,
-                                                            event_type))
+      if (unique && pair.first->PeekAtNextEventForBroadcasterWithType(
+                        &m_broadcaster, event_type))
         continue;
 
       pair.first->AddEvent(event_sp);
@@ -267,11 +262,11 @@ bool Broadcaster::BroadcasterImpl::HijackBroadcaster(
   std::lock_guard<std::recursive_mutex> guard(m_listeners_mutex);
 
   Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_EVENTS));
-  if (log)
-    log->Printf(
-        "%p Broadcaster(\"%s\")::HijackBroadcaster (listener(\"%s\")=%p)",
-        static_cast<void *>(this), GetBroadcasterName(),
-        listener_sp->m_name.c_str(), static_cast<void *>(listener_sp.get()));
+  LLDB_LOG(
+      log,
+      "{0} Broadcaster(\"{1}\")::HijackBroadcaster (listener(\"{2}\")={3})",
+      static_cast<void *>(this), GetBroadcasterName(),
+      listener_sp->m_name.c_str(), static_cast<void *>(listener_sp.get()));
   m_hijacking_listeners.push_back(listener_sp);
   m_hijacking_masks.push_back(event_mask);
   return true;
@@ -288,24 +283,22 @@ bool Broadcaster::BroadcasterImpl::IsHijackedForEvent(uint32_t event_mask) {
 const char *Broadcaster::BroadcasterImpl::GetHijackingListenerName() {
   if (m_hijacking_listeners.size()) {
     return m_hijacking_listeners.back()->GetName();
-  } else {
-    return nullptr;
   }
+  return nullptr;
 }
 
 void Broadcaster::BroadcasterImpl::RestoreBroadcaster() {
   std::lock_guard<std::recursive_mutex> guard(m_listeners_mutex);
 
   if (!m_hijacking_listeners.empty()) {
+    ListenerSP listener_sp = m_hijacking_listeners.back();
     Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_EVENTS));
-    if (log) {
-      ListenerSP listener_sp = m_hijacking_listeners.back();
-      log->Printf("%p Broadcaster(\"%s\")::RestoreBroadcaster (about to pop "
-                  "listener(\"%s\")=%p)",
-                  static_cast<void *>(this), GetBroadcasterName(),
-                  listener_sp->m_name.c_str(),
-                  static_cast<void *>(listener_sp.get()));
-    }
+    LLDB_LOG(log,
+             "{0} Broadcaster(\"{1}\")::RestoreBroadcaster (about to pop "
+             "listener(\"{2}\")={3})",
+             static_cast<void *>(this), GetBroadcasterName(),
+             listener_sp->m_name.c_str(),
+             static_cast<void *>(listener_sp.get()));
     m_hijacking_listeners.pop_back();
   }
   if (!m_hijacking_masks.empty())
@@ -320,9 +313,8 @@ ConstString &Broadcaster::GetBroadcasterClass() const {
 bool BroadcastEventSpec::operator<(const BroadcastEventSpec &rhs) const {
   if (GetBroadcasterClass() == rhs.GetBroadcasterClass()) {
     return GetEventBits() < rhs.GetEventBits();
-  } else {
-    return GetBroadcasterClass() < rhs.GetBroadcasterClass();
   }
+  return GetBroadcasterClass() < rhs.GetBroadcasterClass();
 }
 
 BroadcastEventSpec &BroadcastEventSpec::
@@ -378,17 +370,16 @@ bool BroadcasterManager::UnregisterListenerForEvents(
     iter = find_if(m_event_map.begin(), end_iter, predicate);
     if (iter == end_iter) {
       break;
-    } else {
-      uint32_t iter_event_bits = (*iter).first.GetEventBits();
-      removed_some = true;
-
-      if (event_bits_to_remove != iter_event_bits) {
-        uint32_t new_event_bits = iter_event_bits & ~event_bits_to_remove;
-        to_be_readded.push_back(BroadcastEventSpec(
-            event_spec.GetBroadcasterClass(), new_event_bits));
-      }
-      m_event_map.erase(iter);
     }
+    uint32_t iter_event_bits = (*iter).first.GetEventBits();
+    removed_some = true;
+
+    if (event_bits_to_remove != iter_event_bits) {
+      uint32_t new_event_bits = iter_event_bits & ~event_bits_to_remove;
+      to_be_readded.push_back(
+          BroadcastEventSpec(event_spec.GetBroadcasterClass(), new_event_bits));
+    }
+    m_event_map.erase(iter);
   }
 
   // Okay now add back the bits that weren't completely removed:
@@ -408,8 +399,8 @@ ListenerSP BroadcasterManager::GetListenerForEventSpec(
                  BroadcastEventSpecMatches(event_spec));
   if (iter != end_iter)
     return (*iter).second;
-  else
-    return nullptr;
+
+  return nullptr;
 }
 
 void BroadcasterManager::RemoveListener(Listener *listener) {
@@ -427,8 +418,8 @@ void BroadcasterManager::RemoveListener(Listener *listener) {
     iter = find_if(m_event_map.begin(), end_iter, predicate);
     if (iter == end_iter)
       break;
-    else
-      m_event_map.erase(iter);
+
+    m_event_map.erase(iter);
   }
 }
 
@@ -444,8 +435,8 @@ void BroadcasterManager::RemoveListener(const lldb::ListenerSP &listener_sp) {
     iter = find_if(m_event_map.begin(), end_iter, predicate);
     if (iter == end_iter)
       break;
-    else
-      m_event_map.erase(iter);
+
+    m_event_map.erase(iter);
   }
 }
 

From 0de4e935bb591b8c9ee41a9acbb5cda02838d8a6 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Fri, 24 May 2019 04:46:22 +0000
Subject: [PATCH 0132/1176] Do not resolve directory junctions for
 `-fdiagnostics-absolute-paths` on Windows.

If the source file path contains directory junctions, and we resolve them when
printing diagnostic messages, these paths look independent for an IDE.
For example, both Visual Studio and Visual Studio Code open separate editors
for such paths, which is not only inconvenient but might even result in losing
changes made in one of them.

Differential Revision: https://reviews.llvm.org/D59415

llvm-svn: 361598
---
 clang/lib/Frontend/TextDiagnostic.cpp         | 21 +++++++++++++++++++
 .../test/Frontend/absolute-paths-windows.test |  9 ++++++++
 clang/test/Frontend/lit.local.cfg             |  2 +-
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Frontend/absolute-paths-windows.test

diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index c6ebdcaf9a8fa..d0c91286250e7 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -765,7 +765,28 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
     const DirectoryEntry *Dir = SM.getFileManager().getDirectory(
         llvm::sys::path::parent_path(Filename));
     if (Dir) {
+      // We want to print a simplified absolute path, i. e. without "dots".
+      //
+      // The hardest part here are the paths like "<part1>/<link>/../<part2>".
+      // On Unix-like systems, we cannot just collapse "<link>/..", because
+      // paths are resolved sequentially, and, thereby, the path
+      // "<part1>/<part2>" may point to a different location. That is why
+      // we use FileManager::getCanonicalName(), which expands all indirections
+      // with llvm::sys::fs::real_path() and caches the result.
+      //
+      // On the other hand, it would be better to preserve as much of the
+      // original path as possible, because that helps a user to recognize it.
+      // real_path() expands all links, which sometimes too much. Luckily,
+      // on Windows we can just use llvm::sys::path::remove_dots(), because,
+      // on that system, both aforementioned paths point to the same place.
+#ifdef _WIN32
+      SmallString<4096> DirName = Dir->getName();
+      llvm::sys::fs::make_absolute(DirName);
+      llvm::sys::path::native(DirName);
+      llvm::sys::path::remove_dots(DirName, /* remove_dot_dot */ true);
+#else
       StringRef DirName = SM.getFileManager().getCanonicalName(Dir);
+#endif
       llvm::sys::path::append(AbsoluteFilename, DirName,
                               llvm::sys::path::filename(Filename));
       Filename = StringRef(AbsoluteFilename.data(), AbsoluteFilename.size());
diff --git a/clang/test/Frontend/absolute-paths-windows.test b/clang/test/Frontend/absolute-paths-windows.test
new file mode 100644
index 0000000000000..10741d56452b3
--- /dev/null
+++ b/clang/test/Frontend/absolute-paths-windows.test
@@ -0,0 +1,9 @@
+// REQUIRES: system-windows
+// RUN: rm -rf %t.dir
+// RUN: mkdir -p %t.dir\real
+// RUN: cmd /c mklink /j %t.dir\junc %t.dir\real
+// RUN: echo "wrong code" > %t.dir\real\foo.cpp
+// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-absolute-paths %t.dir\junc\foo.cpp 2>&1 | FileCheck %s
+
+// CHECK-NOT: .dir\real\foo.cpp
+// CHECK: .dir\junc\foo.cpp
diff --git a/clang/test/Frontend/lit.local.cfg b/clang/test/Frontend/lit.local.cfg
index 7a05c5dfd2597..835360be27551 100644
--- a/clang/test/Frontend/lit.local.cfg
+++ b/clang/test/Frontend/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.c', '.cpp', '.m', '.mm', '.ll', '.cl']
+config.suffixes = ['.c', '.cpp', '.m', '.mm', '.ll', '.cl', '.test']

From af0add6c39f7fcc641a2ae38753a9bc4eae47b28 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 24 May 2019 04:46:56 +0000
Subject: [PATCH 0133/1176] [X86] Add test case that was supposed to go with
 r360102.

Found in my working area. Guess I forgot 'git add' before committing.

llvm-svn: 361599
---
 .../CodeGen/X86/asm-reg-type-mismatch-avx512.ll   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll

diff --git a/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
new file mode 100644
index 0000000000000..1c5e1ce8a6b99
--- /dev/null
+++ b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s
+
+define i64 @test1() nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    vmovq {{.*#+}} xmm16 = mem[0],zero
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vmovq %xmm16, %rax
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 asm sideeffect "vmovq $1, $0", "={xmm16},*m,~{dirflag},~{fpsr},~{flags}"(i64* null) nounwind
+  ret i64 %0
+}

From 449bfdd1b02bf441f6862dac1169bb5208eaccbc Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Fri, 24 May 2019 05:30:09 +0000
Subject: [PATCH 0134/1176] [Power9] Add a specific heuristic to schedule the
 addi before the load  When we are scheduling the load and addi, if all other
 heuristic didn't take effect,  we will try to schedule the addi before the
 load, to hide the latency, and avoid the  true dependency added by RA. And
 this only take effects for Power9.

Differential Revision: https://reviews.llvm.org/D61930

llvm-svn: 361600
---
 .../Target/PowerPC/PPCMachineScheduler.cpp    | 51 +++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCMachineScheduler.h |  7 +++
 .../CodeGen/PowerPC/schedule-addi-load.mir    | 19 ++++++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 19aa53d54f149..d57e38acef683 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -5,9 +5,60 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "PPC.h"
 #include "PPCMachineScheduler.h"
 using namespace llvm;
 
+static cl::opt<bool> 
+DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
+                         cl::desc("Disable scheduling addi instruction before" 
+                                  "load for ppc"), cl::Hidden);
+
+bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
+                                                  SchedCandidate &TryCand,
+                                                  SchedBoundary &Zone) const {
+  if (DisableAddiLoadHeuristic)
+    return false;
+
+  auto isADDIInstr = [&] (const MachineInstr &Inst) {
+    return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
+  };
+
+  SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
+  SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
+  if (isADDIInstr(*FirstCand.SU->getInstr()) &&
+      SecondCand.SU->getInstr()->mayLoad()) {
+    TryCand.Reason = Stall;
+    return true;
+  }
+  if (FirstCand.SU->getInstr()->mayLoad() &&
+      isADDIInstr(*SecondCand.SU->getInstr())) {
+    TryCand.Reason = NoCand;
+    return true;
+  }
+
+  return false;
+}
+
+void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+                                         SchedCandidate &TryCand,
+                                         SchedBoundary *Zone) const {
+  GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+  if (!Cand.isValid() || !Zone)
+    return;
+
+  // Add powerpc specific heuristic only when TryCand isn't selected or
+  // selected as node order.
+  if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+    return;
+
+  // There are some benefits to schedule the ADDI before the load to hide the
+  // latency, as RA may create a true dependency between the load and addi.
+  if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+    return;
+}
+
 void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
   // Custom PPC PostRA specific behavior here.
   PostGenericScheduler::enterMBB(MBB);
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
index ea6d3ffbb262b..93532d9545a6e 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -22,6 +22,13 @@ class PPCPreRASchedStrategy : public GenericScheduler {
 public:
   PPCPreRASchedStrategy(const MachineSchedContext *C) :
     GenericScheduler(C) {}
+protected:
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone) const override;
+private:
+  bool biasAddiLoadCandidate(SchedCandidate &Cand,
+                             SchedCandidate &TryCand,
+                             SchedBoundary &Zone) const;
 };
 
 /// A MachineSchedStrategy implementation for PowerPC post RA scheduling.
diff --git a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir
index f0c9ea66f6b01..f9820062cfdf1 100644
--- a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir
+++ b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir
@@ -1,4 +1,7 @@
 # RUN: llc -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu -start-before machine-scheduler -stop-after machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu -disable-ppc-sched-addi-load -start-before machine-scheduler -stop-after machine-scheduler \
+# RUN:   -verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-DISABLE %s
+# RUN: llc -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu -start-before machine-scheduler -stop-after machine-scheduler -verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-P8 %s
 
 # Test that if the scheduler moves the addi before the load.
 --- |
@@ -93,11 +96,25 @@ body:             |
     B %bb.2
     ; CHECK-LABEL: foo
     ; CHECK: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32
+    ; CHECK-NEXT: %9:g8rc = ADDI8 %5, 1
     ; CHECK-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) 
     ; CHECK-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) 
-    ; CHECK-NEXT: %9:g8rc = ADDI8 %5, 1
     ; CHECK-NEXT: %8:crrc = CMPLW %6, %7 
     ; CHECK-NEXT: BCC 76, %8
+    ; CHECK-DISABLE-LABEL: foo
+    ; CHECK-DISABLE: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32
+    ; CHECK-DISABLE-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx) 
+    ; CHECK-DISABLE-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4) 
+    ; CHECK-DISABLE-NEXT: %9:g8rc = ADDI8 %5, 1
+    ; CHECK-DISABLE-NEXT: %8:crrc = CMPLW %6, %7 
+    ; CHECK-DISABLE-NEXT: BCC 76, %8
+    ; CHECK-P8-LABEL: foo
+    ; CHECK-P8: %5:g8rc_and_g8rc_nox0 = RLDICL %0, 0, 32
+    ; CHECK-P8-NEXT: %6:gprc = LBZX %2, %5 :: (load 1 from %ir.arrayidx)
+    ; CHECK-P8-NEXT: %7:gprc = LBZX %3, %5 :: (load 1 from %ir.arrayidx4)
+    ; CHECK-P8-NEXT: %8:crrc = CMPLW %6, %7
+    ; CHECK-P8-NEXT: %9:g8rc = ADDI8 %5, 1
+    ; CHECK-P8-NEXT: BCC 76, %8
   
   bb.2.while.end:
     $x3 = COPY %0

From b087129b5d3bde08ddaefa021bbefb2f152ed409 Mon Sep 17 00:00:00 2001
From: Matthias Gehre <M.Gehre@gmx.de>
Date: Fri, 24 May 2019 05:46:57 +0000
Subject: [PATCH 0135/1176] [clang-tidy] Add option "LiteralInitializers" to
 cppcoreguidelines-pro-type-member-init

Differential Revision: D24892

llvm-svn: 361601
---
 .../ProTypeMemberInitCheck.cpp                | 56 ++++++++++++++++++-
 .../ProTypeMemberInitCheck.h                  |  5 ++
 clang-tools-extra/docs/ReleaseNotes.rst       |  5 ++
 ...cppcoreguidelines-pro-type-member-init.rst |  4 ++
 ...es-pro-type-member-init-use-assignment.cpp | 40 +++++++++++++
 5 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp

diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
index c6cfe5ec0002f..fc334fb5b3baf 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp
@@ -250,7 +250,8 @@ void fixInitializerList(const ASTContext &Context, DiagnosticBuilder &Diag,
 ProTypeMemberInitCheck::ProTypeMemberInitCheck(StringRef Name,
                                                ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
-      IgnoreArrays(Options.get("IgnoreArrays", false)) {}
+      IgnoreArrays(Options.get("IgnoreArrays", false)),
+      UseAssignment(Options.getLocalOrGlobal("UseAssignment", false)) {}
 
 void ProTypeMemberInitCheck::registerMatchers(MatchFinder *Finder) {
   if (!getLangOpts().CPlusPlus)
@@ -314,6 +315,7 @@ void ProTypeMemberInitCheck::check(const MatchFinder::MatchResult &Result) {
 
 void ProTypeMemberInitCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "IgnoreArrays", IgnoreArrays);
+  Options.store(Opts, "UseAssignment", UseAssignment);
 }
 
 // FIXME: Copied from clang/lib/Sema/SemaDeclCXX.cpp.
@@ -338,6 +340,56 @@ static bool isEmpty(ASTContext &Context, const QualType &Type) {
   return isIncompleteOrZeroLengthArrayType(Context, Type);
 }
 
+static const char *getInitializer(QualType QT, bool UseAssignment) {
+  const char *DefaultInitializer = "{}";
+  if (!UseAssignment)
+    return DefaultInitializer;
+
+  if (QT->isPointerType())
+    return " = nullptr";
+
+  const BuiltinType *BT =
+      dyn_cast<BuiltinType>(QT.getCanonicalType().getTypePtr());
+  if (!BT)
+    return DefaultInitializer;
+
+  switch (BT->getKind()) {
+  case BuiltinType::Bool:
+    return " = false";
+  case BuiltinType::Float:
+    return " = 0.0F";
+  case BuiltinType::Double:
+    return " = 0.0";
+  case BuiltinType::LongDouble:
+    return " = 0.0L";
+  case BuiltinType::SChar:
+  case BuiltinType::Char_S:
+  case BuiltinType::WChar_S:
+  case BuiltinType::Char16:
+  case BuiltinType::Char32:
+  case BuiltinType::Short:
+  case BuiltinType::Int:
+    return " = 0";
+  case BuiltinType::UChar:
+  case BuiltinType::Char_U:
+  case BuiltinType::WChar_U:
+  case BuiltinType::UShort:
+  case BuiltinType::UInt:
+    return " = 0U";
+  case BuiltinType::Long:
+    return " = 0L";
+  case BuiltinType::ULong:
+    return " = 0UL";
+  case BuiltinType::LongLong:
+    return " = 0LL";
+  case BuiltinType::ULongLong:
+    return " = 0ULL";
+
+  default:
+    return DefaultInitializer;
+  }
+}
+
 void ProTypeMemberInitCheck::checkMissingMemberInitializer(
     ASTContext &Context, const CXXRecordDecl &ClassDecl,
     const CXXConstructorDecl *Ctor) {
@@ -420,7 +472,7 @@ void ProTypeMemberInitCheck::checkMissingMemberInitializer(
     for (const FieldDecl *Field : FieldsToFix) {
       Diag << FixItHint::CreateInsertion(
           getLocationForEndOfToken(Context, Field->getSourceRange().getEnd()),
-          "{}");
+          getInitializer(Field->getType(), UseAssignment));
     }
   } else if (Ctor) {
     // Otherwise, rewrite the constructor's initializer list.
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
index 807acfe3bb220..2ec8fb16342b5 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h
@@ -64,6 +64,11 @@ class ProTypeMemberInitCheck : public ClangTidyCheck {
 
   // Whether arrays need to be initialized or not. Default is false.
   bool IgnoreArrays;
+
+  // Whether fix-its for initialization of fundamental type use assignment
+  // instead of brace initalization. Only effective in C++11 mode. Default is
+  // false.
+  bool UseAssignment;
 };
 
 } // namespace cppcoreguidelines
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 22acfa33feec4..d14a4938fa55e 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -186,6 +186,11 @@ Improvements to clang-tidy
   `WarnOnLargeObject` and `MaxSize` options to warn on any large trivial
   object caught by value.
 
+- Added `UseAssignment` option to :doc:`cppcoreguidelines-pro-type-member-init`
+
+  If set to true, the check will provide fix-its with literal initializers
+  (``int i = 0;``) instead of curly braces (``int i{};``).
+
 Improvements to include-fixer
 -----------------------------
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
index 2fdb4e3698ab5..5ac0465e5462f 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
@@ -33,6 +33,10 @@ Options
    zero-initialized during construction. For performance critical code, it may
    be important to not initialize fixed-size array members. Default is `0`.
 
+.. option:: UseAssignment
+   If set to non-zero, the check will provide fix-its with literal initializers
+   (``int i = 0;``) instead of curly braces (``int i{};``).
+
 This rule is part of the "Type safety" profile of the C++ Core
 Guidelines, corresponding to rule Type.6. See
 https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Pro-type-memberinit.
diff --git a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
new file mode 100644
index 0000000000000..dd1f9ac4aa9a5
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
@@ -0,0 +1,40 @@
+// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-member-init %t -- -config="{CheckOptions: [{key: "cppcoreguidelines-pro-type-member-init.UseAssignment", value: 1}]}" -- -std=c++11
+
+struct T {
+  int i;
+};
+
+struct S {
+  bool b;
+  // CHECK-FIXES: bool b = false;
+  char c;
+  // CHECK-FIXES: char c = 0;
+  signed char sc;
+  // CHECK-FIXES: signed char sc = 0;
+  unsigned char uc;
+  // CHECK-FIXES: unsigned char uc = 0U;
+  int i;
+  // CHECK-FIXES: int i = 0;
+  unsigned u;
+  // CHECK-FIXES: unsigned u = 0U;
+  long l;
+  // CHECK-FIXES: long l = 0L;
+  unsigned long ul;
+  // CHECK-FIXES: unsigned long ul = 0UL;
+  long long ll;
+  // CHECK-FIXES: long long ll = 0LL;
+  unsigned long long ull;
+  // CHECK-FIXES: unsigned long long ull = 0ULL;
+  float f;
+  // CHECK-FIXES: float f = 0.0F;
+  double d;
+  // CHECK-FIXES: double d = 0.0;
+  long double ld;
+  // CHECK-FIXES: double ld = 0.0L;
+  int *ptr;
+  // CHECK-FIXES: int *ptr = nullptr;
+  T t;
+  // CHECK-FIXES: T t{};
+  S() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: constructor does not initialize these fields:
+};

From 8ac0bc9832a251837681b3a1cd53085ed8d594dc Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Fri, 24 May 2019 08:04:03 +0000
Subject: [PATCH 0136/1176] DWARFContext: Make loading of sections thread-safe

Summary:
SymbolFileDWARF used to load debug sections in a thread-safe manner.
When we moved to DWARFContext, we dropped the thread-safe part, because
we thought it was not necessary.

It turns out this was only mostly correct.

The "mostly" part is there because this is a problem only if we use the
manual index, as that is the only source of intra-module paralelism.
Also, this only seems to occur for extremely simple files (like the ones
I've been creating for tests lately), where we've managed to start
indexing before loading the debug_str section. Then, two threads start
to load the section simultaneously and produce wrong results.

On more complex files, something seems to be loading the debug_str section
before we start indexing, as I haven't been able to reproduce this
there, but I have not investigated what it is.

I've tried to come up with a test for this, but I haven't been able to
reproduce the problem reliably. Still, while doing so, I created a way
to generate many compile units on demand. Given that most of our tests
work with only one or two compile units, it seems like this could be
useful anyway.

Reviewers: aprantl, JDevlieghere, clayborg

Subscribers: arphaman, lldb-commits

Differential Revision: https://reviews.llvm.org/D62316

llvm-svn: 361602
---
 .../DWARF/parallel-indexing-stress.s          | 82 +++++++++++++++++++
 .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 56 ++++++-------
 .../Plugins/SymbolFile/DWARF/DWARFContext.h   | 31 ++++---
 3 files changed, 128 insertions(+), 41 deletions(-)
 create mode 100644 lldb/lit/SymbolFile/DWARF/parallel-indexing-stress.s

diff --git a/lldb/lit/SymbolFile/DWARF/parallel-indexing-stress.s b/lldb/lit/SymbolFile/DWARF/parallel-indexing-stress.s
new file mode 100644
index 0000000000000..46e4c5565fd1d
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/parallel-indexing-stress.s
@@ -0,0 +1,82 @@
+# Stress-test the parallel indexing of compile units.
+
+# RUN: llvm-mc -triple x86_64-pc-linux %s -o %t -filetype=obj
+# RUN: %lldb %t -o "target variable A" -b | FileCheck %s
+
+# CHECK-COUNT-256: A = 47
+
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	"Hand-written DWARF"
+.Lname:
+	.asciz	"A"
+.Linfo_string4:
+	.asciz	"int"                   # string offset=95
+
+	.section	.debug_abbrev,"",@progbits
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	37                      # DW_AT_producer
+	.byte	14                      # DW_FORM_strp
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	2                       # Abbreviation Code
+	.byte	52                      # DW_TAG_variable
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	73                      # DW_AT_type
+	.byte	19                      # DW_FORM_ref4
+	.byte	2                       # DW_AT_location
+	.byte	24                      # DW_FORM_exprloc
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	3                       # Abbreviation Code
+	.byte	36                      # DW_TAG_base_type
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	62                      # DW_AT_encoding
+	.byte	11                      # DW_FORM_data1
+	.byte	11                      # DW_AT_byte_size
+	.byte	11                      # DW_FORM_data1
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+
+.macro generate_unit
+	.data
+A\@:
+	.long	47
+
+	.section	.debug_str,"MS",@progbits,1
+
+	.section	.debug_info,"",@progbits
+.Lcu_begin\@:
+	.long	.Ldebug_info_end\@-.Ldebug_info_start\@ # Length of Unit
+.Ldebug_info_start\@:
+	.short	4                       # DWARF version number
+	.long	.debug_abbrev           # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x30 DW_TAG_compile_unit
+	.long	.Linfo_string0          # DW_AT_producer
+	.byte	2                       # Abbrev [2] 0x1e:0x15 DW_TAG_variable
+	.long	.Lname                  # DW_AT_name
+	.long	.Ltype\@-.Lcu_begin\@   # DW_AT_type
+	.byte	9                       # DW_AT_location
+	.byte	3
+	.quad	A\@
+.Ltype\@:
+	.byte	3                       # Abbrev [3] 0x33:0x7 DW_TAG_base_type
+	.long	.Linfo_string4          # DW_AT_name
+	.byte	5                       # DW_AT_encoding
+	.byte	4                       # DW_AT_byte_size
+	.byte	0                       # End Of Children Mark
+.Ldebug_info_end\@:
+
+.endm
+
+.rept 256
+generate_unit
+.endr
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
index e5a72f8846556..951c72543b924 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
@@ -27,72 +27,66 @@ static DWARFDataExtractor LoadSection(SectionList *section_list,
   return data;
 }
 
-static const DWARFDataExtractor &
-LoadOrGetSection(SectionList *section_list, SectionType section_type,
-                 llvm::Optional<DWARFDataExtractor> &extractor) {
-  if (!extractor)
-    extractor = LoadSection(section_list, section_type);
-  return *extractor;
+const DWARFDataExtractor &
+DWARFContext::LoadOrGetSection(SectionType main_section_type,
+                               llvm::Optional<SectionType> dwo_section_type,
+                               SectionData &data) {
+  llvm::call_once(data.flag, [&] {
+    if (dwo_section_type && isDwo())
+      data.data = LoadSection(m_dwo_section_list, *dwo_section_type);
+    else
+      data.data = LoadSection(m_main_section_list, main_section_type);
+  });
+  return data.data;
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadAbbrevData() {
-  if (isDwo())
-    return LoadOrGetSection(m_dwo_section_list, eSectionTypeDWARFDebugAbbrevDwo,
-                            m_data_debug_abbrev);
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugAbbrev,
-                          m_data_debug_abbrev);
+  return LoadOrGetSection(eSectionTypeDWARFDebugAbbrev,
+                          eSectionTypeDWARFDebugAbbrevDwo, m_data_debug_abbrev);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadArangesData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugAranges,
+  return LoadOrGetSection(eSectionTypeDWARFDebugAranges, llvm::None,
                           m_data_debug_aranges);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadAddrData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugAddr,
+  return LoadOrGetSection(eSectionTypeDWARFDebugAddr, llvm::None,
                           m_data_debug_addr);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadDebugInfoData() {
-  if (isDwo())
-    return LoadOrGetSection(m_dwo_section_list, eSectionTypeDWARFDebugInfoDwo,
-                            m_data_debug_info);
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugInfo,
-                          m_data_debug_info);
+  return LoadOrGetSection(eSectionTypeDWARFDebugInfo,
+                          eSectionTypeDWARFDebugInfoDwo, m_data_debug_info);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadLineData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugLine,
+  return LoadOrGetSection(eSectionTypeDWARFDebugLine, llvm::None,
                           m_data_debug_line);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadLineStrData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugLineStr,
+  return LoadOrGetSection(eSectionTypeDWARFDebugLineStr, llvm::None,
                           m_data_debug_line_str);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadMacroData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugMacro,
+  return LoadOrGetSection(eSectionTypeDWARFDebugMacro, llvm::None,
                           m_data_debug_macro);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadStrData() {
-  if (isDwo())
-    return LoadOrGetSection(m_dwo_section_list, eSectionTypeDWARFDebugStrDwo,
-                            m_data_debug_str);
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugStr,
-                          m_data_debug_str);
+  return LoadOrGetSection(eSectionTypeDWARFDebugStr,
+                          eSectionTypeDWARFDebugStrDwo, m_data_debug_str);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadStrOffsetsData() {
-  if (isDwo())
-    return LoadOrGetSection(m_dwo_section_list, eSectionTypeDWARFDebugStrOffsetsDwo,
-                            m_data_debug_str_offsets);
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugStrOffsets,
+  return LoadOrGetSection(eSectionTypeDWARFDebugStrOffsets,
+                          eSectionTypeDWARFDebugStrOffsetsDwo,
                           m_data_debug_str_offsets);
 }
 
 const DWARFDataExtractor &DWARFContext::getOrLoadDebugTypesData() {
-  return LoadOrGetSection(m_main_section_list, eSectionTypeDWARFDebugTypes,
+  return LoadOrGetSection(eSectionTypeDWARFDebugTypes, llvm::None,
                           m_data_debug_types);
 }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h
index b5ef2254e71ec..4234587fa97bc 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h
@@ -12,6 +12,7 @@
 #include "DWARFDataExtractor.h"
 #include "lldb/Core/Section.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/Support/Threading.h"
 #include <memory>
 
 namespace lldb_private {
@@ -20,19 +21,29 @@ class DWARFContext {
   SectionList *m_main_section_list;
   SectionList *m_dwo_section_list;
 
-  llvm::Optional<DWARFDataExtractor> m_data_debug_abbrev;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_addr;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_aranges;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_info;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_line;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_line_str;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_macro;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_str;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_str_offsets;
-  llvm::Optional<DWARFDataExtractor> m_data_debug_types;
+  struct SectionData {
+    llvm::once_flag flag;
+    DWARFDataExtractor data;
+  };
+
+  SectionData m_data_debug_abbrev;
+  SectionData m_data_debug_addr;
+  SectionData m_data_debug_aranges;
+  SectionData m_data_debug_info;
+  SectionData m_data_debug_line;
+  SectionData m_data_debug_line_str;
+  SectionData m_data_debug_macro;
+  SectionData m_data_debug_str;
+  SectionData m_data_debug_str_offsets;
+  SectionData m_data_debug_types;
 
   bool isDwo() { return m_dwo_section_list != nullptr; }
 
+  const DWARFDataExtractor &
+  LoadOrGetSection(lldb::SectionType main_section_type,
+                   llvm::Optional<lldb::SectionType> dwo_section_type,
+                   SectionData &data);
+
 public:
   explicit DWARFContext(SectionList *main_section_list,
                         SectionList *dwo_section_list)

From f750842c8b35c56b58402c1842869a885bfcff56 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Fri, 24 May 2019 08:11:12 +0000
Subject: [PATCH 0137/1176] DWARF: Implement DW_AT_signature lookup for type
 unit support

Summary:
This patch implements the main feature of type units. When completing a
type, if we encounter a DW_AT_signature attribute, we use it's value to
lookup the complete definition of the type in the relevant type unit.

To enable this lookup, we build up a map of all type units in a symbol
file when parsing the units. Then we consult this map when resolving the
DW_AT_signature attribute.

I include add a couple of tests which exercise the type lookup feature,
including one that ensure we do something reasonable in case we fail to
lookup the type.

A lot of the ideas in this patch have been taken from D32167 and D61505.

Reviewers: clayborg, JDevlieghere, aprantl, alexshap

Subscribers: mgrang, lldb-commits

Differential Revision: https://reviews.llvm.org/D62246

llvm-svn: 361603
---
 .../DWARF/Inputs/debug-types-basic.cpp        |  10 +-
 .../DWARF/Inputs/debug-types-expressions.cpp  |   8 ++
 .../SymbolFile/DWARF/debug-types-basic.test   |   8 ++
 .../DWARF/debug-types-expressions.test        |  15 +++
 .../DWARF/debug-types-missing-signature.test  |  26 ++++
 .../DWARF/debug-types-signature-loop.s        | 115 ++++++++++++++++++
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |  32 ++---
 .../SymbolFile/DWARF/DWARFDebugInfo.cpp       |  35 ++++--
 .../Plugins/SymbolFile/DWARF/DWARFDebugInfo.h |   6 +
 .../SymbolFile/DWARF/DWARFFormValue.cpp       |   8 ++
 .../SymbolFile/DWARF/DWARFTypeUnit.cpp        |   1 -
 .../Plugins/SymbolFile/DWARF/DWARFTypeUnit.h  |   8 ++
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  10 ++
 .../Plugins/SymbolFile/DWARF/DWARFUnit.h      |   8 ++
 14 files changed, 258 insertions(+), 32 deletions(-)
 create mode 100644 lldb/lit/SymbolFile/DWARF/debug-types-missing-signature.test
 create mode 100644 lldb/lit/SymbolFile/DWARF/debug-types-signature-loop.s

diff --git a/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-basic.cpp b/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-basic.cpp
index e28515269b414..defa8ba5c69e7 100644
--- a/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-basic.cpp
+++ b/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-basic.cpp
@@ -1,13 +1,15 @@
+enum E { e1, e2, e3 };
+enum class EC { e1, e2, e3 };
+
 struct A {
   int i;
   long l;
   float f;
   double d;
+  E e;
+  EC ec;
 };
 
-enum E { e1, e2, e3 };
-enum class EC { e1, e2, e3 };
-
-extern constexpr A a{42, 47l, 4.2f, 4.7};
+extern constexpr A a{42, 47l, 4.2f, 4.7, e1, EC::e3};
 extern constexpr E e(e2);
 extern constexpr EC ec(EC::e2);
diff --git a/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-expressions.cpp b/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-expressions.cpp
index 7bdd79d097359..fe728cf040dc8 100644
--- a/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-expressions.cpp
+++ b/lldb/lit/SymbolFile/DWARF/Inputs/debug-types-expressions.cpp
@@ -11,11 +11,19 @@ struct B: public A {
 namespace ns {
 struct A {
   int i = 147;
+  ::A getA();
   A();
 };
 A::A() = default;
+
+::A A::getA() {
+  ::A a;
+  a.i = i - 1;
+  return a;
 }
 
+} // namespace ns
+
 int foo(A *a) {
   return a->f();
 }
diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-basic.test b/lldb/lit/SymbolFile/DWARF/debug-types-basic.test
index 468a4e8567cfc..bf2c9c69d1dbc 100644
--- a/lldb/lit/SymbolFile/DWARF/debug-types-basic.test
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-basic.test
@@ -19,6 +19,8 @@ type lookup A
 # CHECK-NEXT:   long l;
 # CHECK-NEXT:   float f;
 # CHECK-NEXT:   double d;
+# CHECK-NEXT:   E e;
+# CHECK-NEXT:   EC ec;
 # CHECK-NEXT: }
 
 type lookup E
@@ -44,3 +46,9 @@ print (E) 1
 print (EC) 1
 # CHECK-LABEL: print (EC) 1
 # CHECK: (EC) $1 = e2
+
+target variable a e ec
+# CHECK-LABEL: target variable a e ec
+# CHECK: (const A) a = (i = 42, l = 47, f = 4.{{[12].*}}, d = 4.{{[67].*}}, e = e1, ec = e3)
+# CHECK: (const E) e = e2
+# CHECK: (const EC) ec = e2
diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-expressions.test b/lldb/lit/SymbolFile/DWARF/debug-types-expressions.test
index db67f8a39586e..da40269461afe 100644
--- a/lldb/lit/SymbolFile/DWARF/debug-types-expressions.test
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-expressions.test
@@ -19,6 +19,13 @@ frame variable a
 # CHECK-LABEL: frame variable a
 # CHECK: (B *) a =
 
+frame variable *a
+# CHECK-LABEL: frame variable *a
+# CHECK:      (B) *a = {
+# CHECK-NEXT:   A = (i = 47)
+# CHECK-NEXT:   j = 42
+# CHECK-NEXT: }
+
 print a->f()
 # CHECK-LABEL: print a->f()
 # CHECK: (int) $0 = 47
@@ -26,3 +33,11 @@ print a->f()
 print ns::A()
 # CHECK-LABEL: print ns::A()
 # CHECK: (ns::A) $1 = (i = 147)
+
+print ns::A().i + a->i
+# CHECK-LABEL: print ns::A().i + a->i
+# CHECK: (int) $2 = 194
+
+print ns::A().getA()
+# CHECK-LABEL: ns::A().getA()
+# CHECK: (A) $3 = (i = 146)
diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-missing-signature.test b/lldb/lit/SymbolFile/DWARF/debug-types-missing-signature.test
new file mode 100644
index 0000000000000..ca5c759136b45
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-missing-signature.test
@@ -0,0 +1,26 @@
+Create a dangling DW_AT_signature reference by stripping the debug_types
+section, and make sure lldb does something reasonable.
+RUN: %clangxx -target x86_64-pc-linux %S/Inputs/debug-types-basic.cpp \
+RUN:   -g -gdwarf-4 -fdebug-types-section -c -o %t.o
+RUN: llvm-objcopy --remove-section=.debug_types %t.o %t
+
+
+RUN: %lldb %t -b -o "type lookup A" | FileCheck --check-prefix=LOOKUPA %s
+LOOKUPA: no type was found matching 'A'
+
+RUN: %lldb %t -b -o "type lookup E" | FileCheck --check-prefix=LOOKUPE %s
+LOOKUPE: no type was found matching 'E'
+
+RUN: %lldb %t -b -o "type lookup EC" | FileCheck --check-prefix=LOOKUPEC %s
+LOOKUPEC: no type was found matching 'EC'
+
+RUN: %lldb %t -b -o "print (E) 1" 2>&1 | FileCheck --check-prefix=PRINTE %s
+PRINTE: use of undeclared identifier 'E'
+
+RUN: %lldb %t -b -o "print (EC) 1" 2>&1 | FileCheck --check-prefix=PRINTEC %s
+PRINTEC: use of undeclared identifier 'EC'
+
+RUN: %lldb %t -b -o "target variable a e ec" | FileCheck --check-prefix=VARS %s
+VARS: (const (anonymous struct)) a = {}
+VARS: (const (anonymous enum)) e = 1
+VARS: (const (anonymous enum)) ec = 1
diff --git a/lldb/lit/SymbolFile/DWARF/debug-types-signature-loop.s b/lldb/lit/SymbolFile/DWARF/debug-types-signature-loop.s
new file mode 100644
index 0000000000000..d0d0fd5705a45
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/debug-types-signature-loop.s
@@ -0,0 +1,115 @@
+# REQUIRES: lld
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux -o %t.o %s
+# RUN: ld.lld %t.o -o %t
+# RUN: %lldb %t -o "target variable e" -b | FileCheck %s
+
+# CHECK: e = <could not resolve type>
+
+        .type   e,@object               # @e
+        .section        .rodata,"a",@progbits
+        .globl  e
+        .p2align        2
+e:
+        .long   0                       # 0x0
+        .size   e, 4
+
+.Lstr_offsets_base0:
+        .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "Hand-written DWARF"
+.Linfo_string1:
+        .asciz  "a.cpp"            
+.Linfo_string3:
+        .asciz  "e"           
+.Linfo_string4:
+        .asciz  "unsigned int"
+.Linfo_string5:
+        .asciz  "e1"          
+.Linfo_string6:
+        .asciz  "E"           
+
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                       # Abbreviation Code
+        .byte   65                      # DW_TAG_type_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   5                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   14                      # DW_FORM_strp
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   6                       # Abbreviation Code
+        .byte   52                      # DW_TAG_variable
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   2                       # DW_AT_location
+        .byte   24                      # DW_FORM_exprloc
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   8                       # Abbreviation Code
+        .byte   4                       # DW_TAG_enumeration_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   60                      # DW_AT_declaration
+        .byte   25                      # DW_FORM_flag_present
+        .byte   105                     # DW_AT_signature
+        .byte   32                      # DW_FORM_ref_sig8
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+        .section        .debug_info,"",@progbits
+.Ltu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                       # DWARF version number
+        .byte   2                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .quad   5390450678491038984     # Type Signature
+        .long   .LE-.Ltu_begin0         # Type DIE Offset
+        .byte   1                       # Abbrev [1] 0x18:0x1d DW_TAG_type_unit
+        .short  4                       # DW_AT_language
+.LE:
+        .byte   8                       # Abbrev [8] 0x23:0xd DW_TAG_enumeration_type
+                                        # DW_AT_declaration
+        .quad   5390450678491038984     # DW_AT_signature
+.Lbase:
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end0:
+
+.Lcu_begin0:
+        .long   .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit
+.Ldebug_info_start1:
+        .short  5                       # DWARF version number
+        .byte   1                       # DWARF Unit Type
+        .byte   8                       # Address Size (in bytes)
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   5                       # Abbrev [5] 0xc:0x2c DW_TAG_compile_unit
+        .long   .Linfo_string0          # DW_AT_producer
+        .short  4                       # DW_AT_language
+        .long   .Linfo_string1          # DW_AT_name
+        .byte   6                       # Abbrev [6] 0x1e:0xb DW_TAG_variable
+        .long   .Linfo_string3          # DW_AT_name
+        .long   .LE_sig-.Lcu_begin0     # DW_AT_type
+        .byte   9                       # DW_AT_location
+        .byte   3
+        .quad   e
+.LE_sig:
+        .byte   8                       # Abbrev [8] 0x2e:0x9 DW_TAG_enumeration_type
+                                        # DW_AT_declaration
+        .quad   5390450678491038984     # DW_AT_signature
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end1:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index b3c9367861506..1e7cc0468f8bb 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -237,7 +237,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   AccessType accessibility = eAccessNone;
   if (!die)
     return nullptr;
-
   SymbolFileDWARF *dwarf = die.GetDWARF();
   if (log) {
     DWARFDIE context_die;
@@ -252,11 +251,27 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         die.GetTagAsCString(), die.GetName());
   }
 
+
   Type *type_ptr = dwarf->GetDIEToType().lookup(die.GetDIE());
   if (type_ptr == DIE_IS_BEING_PARSED)
     return nullptr;
   if (type_ptr)
     return type_ptr->shared_from_this();
+  // Set a bit that lets us know that we are currently parsing this
+  dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
+
+  if (DWARFDIE signature_die =
+          die.GetAttributeValueAsReferenceDIE(DW_AT_signature)) {
+    if (TypeSP type_sp =
+            ParseTypeFromDWARF(sc, signature_die, log, type_is_new_ptr)) {
+      dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+      if (clang::DeclContext *decl_ctx =
+              GetCachedClangDeclContextForDIE(signature_die))
+        LinkDeclContextToDIE(decl_ctx, die);
+      return type_sp;
+    }
+    return nullptr;
+  }
 
   TypeList *type_list = dwarf->GetTypeList();
   if (type_is_new_ptr)
@@ -289,9 +304,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_restrict_type:
   case DW_TAG_volatile_type:
   case DW_TAG_unspecified_type: {
-    // Set a bit that lets us know that we are currently parsing this
-    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
     const size_t num_attributes = die.GetAttributes(attributes);
     uint32_t encoding = 0;
     DWARFFormValue encoding_uid;
@@ -540,9 +552,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_structure_type:
   case DW_TAG_union_type:
   case DW_TAG_class_type: {
-    // Set a bit that lets us know that we are currently parsing this
-    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
     LanguageType class_language = eLanguageTypeUnknown;
     bool is_complete_objc_class = false;
     size_t calling_convention = llvm::dwarf::CallingConvention::DW_CC_normal;
@@ -974,9 +983,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   } break;
 
   case DW_TAG_enumeration_type: {
-    // Set a bit that lets us know that we are currently parsing this
-    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
     bool is_scoped = false;
     DWARFFormValue encoding_form;
 
@@ -1136,9 +1142,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_inlined_subroutine:
   case DW_TAG_subprogram:
   case DW_TAG_subroutine_type: {
-    // Set a bit that lets us know that we are currently parsing this
-    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
     DWARFFormValue type_die_form;
     bool is_variadic = false;
     bool is_inline = false;
@@ -1658,9 +1661,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   } break;
 
   case DW_TAG_array_type: {
-    // Set a bit that lets us know that we are currently parsing this
-    dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
-
     DWARFFormValue type_die_form;
     uint32_t byte_stride = 0;
     uint32_t bit_stride = 0;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
index c81b6ce05f0f7..0810afc8b1a1d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.cpp
@@ -15,6 +15,7 @@
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Utility/RegularExpression.h"
 #include "lldb/Utility/Stream.h"
+#include "llvm/Support/Casting.h"
 
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
@@ -74,12 +75,14 @@ llvm::Expected<DWARFDebugAranges &> DWARFDebugInfo::GetCompileUnitAranges() {
   return *m_cu_aranges_up;
 }
 
-void Parse(SymbolFileDWARF *dwarf, const DWARFDataExtractor &data,
-           DIERef::Section section, std::vector<DWARFUnitSP> &units) {
+void DWARFDebugInfo::ParseUnitsFor(DIERef::Section section) {
+  DWARFDataExtractor data = section == DIERef::Section::DebugTypes
+                                ? m_context.getOrLoadDebugTypesData()
+                                : m_context.getOrLoadDebugInfoData();
   lldb::offset_t offset = 0;
   while (data.ValidOffset(offset)) {
-    llvm::Expected<DWARFUnitSP> unit_sp =
-        DWARFUnit::extract(dwarf, units.size(), data, section, &offset);
+    llvm::Expected<DWARFUnitSP> unit_sp = DWARFUnit::extract(
+        m_dwarf2Data, m_units.size(), data, section, &offset);
 
     if (!unit_sp) {
       // FIXME: Propagate this error up.
@@ -89,10 +92,13 @@ void Parse(SymbolFileDWARF *dwarf, const DWARFDataExtractor &data,
 
     // If it didn't return an error, then it should be returning a valid Unit.
     assert(*unit_sp);
-
-    units.push_back(*unit_sp);
-
+    m_units.push_back(*unit_sp);
     offset = (*unit_sp)->GetNextUnitOffset();
+
+    if (auto *type_unit = llvm::dyn_cast<DWARFTypeUnit>(unit_sp->get())) {
+      m_type_hash_to_unit_index.emplace_back(type_unit->GetTypeHash(),
+                                             unit_sp.get()->GetID());
+    }
   }
 }
 
@@ -102,10 +108,9 @@ void DWARFDebugInfo::ParseUnitHeadersIfNeeded() {
   if (!m_dwarf2Data)
     return;
 
-  Parse(m_dwarf2Data, m_context.getOrLoadDebugInfoData(),
-        DIERef::Section::DebugInfo, m_units);
-  Parse(m_dwarf2Data, m_context.getOrLoadDebugTypesData(),
-        DIERef::Section::DebugTypes, m_units);
+  ParseUnitsFor(DIERef::Section::DebugInfo);
+  ParseUnitsFor(DIERef::Section::DebugTypes);
+  llvm::sort(m_type_hash_to_unit_index, llvm::less_first());
 }
 
 size_t DWARFDebugInfo::GetNumUnits() {
@@ -169,6 +174,14 @@ DWARFDebugInfo::GetUnitContainingDIEOffset(DIERef::Section section,
   return result;
 }
 
+DWARFTypeUnit *DWARFDebugInfo::GetTypeUnitForHash(uint64_t hash) {
+  auto pos = llvm::lower_bound(m_type_hash_to_unit_index,
+                               std::make_pair(hash, 0u), llvm::less_first());
+  if (pos == m_type_hash_to_unit_index.end() || pos->first != hash)
+    return nullptr;
+  return llvm::cast<DWARFTypeUnit>(GetUnitAtIndex(pos->second));
+}
+
 DWARFDIE
 DWARFDebugInfo::GetDIEForDIEOffset(DIERef::Section section,
                                    dw_offset_t die_offset) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
index 215862c1aa7b4..0fcf72c0bf029 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include "DWARFDIE.h"
+#include "DWARFTypeUnit.h"
 #include "DWARFUnit.h"
 #include "SymbolFileDWARF.h"
 #include "lldb/Core/STLUtils.h"
@@ -46,6 +47,7 @@ class DWARFDebugInfo {
   DWARFUnit *GetUnitContainingDIEOffset(DIERef::Section section,
                                         dw_offset_t die_offset);
   DWARFUnit *GetUnit(const DIERef &die_ref);
+  DWARFTypeUnit *GetTypeUnitForHash(uint64_t hash);
   DWARFDIE GetDIEForDIEOffset(DIERef::Section section,
                               dw_offset_t die_offset);
   DWARFDIE GetDIE(const DIERef &die_ref);
@@ -69,11 +71,15 @@ class DWARFDebugInfo {
   std::unique_ptr<DWARFDebugAranges>
       m_cu_aranges_up; // A quick address to compile unit table
 
+  std::vector<std::pair<uint64_t, uint32_t>> m_type_hash_to_unit_index;
+
 private:
   // All parsing needs to be done partially any managed by this class as
   // accessors are called.
   void ParseUnitHeadersIfNeeded();
 
+  void ParseUnitsFor(DIERef::Section section);
+
   uint32_t FindUnitIndex(DIERef::Section section, dw_offset_t offset);
 
   DISALLOW_COPY_AND_ASSIGN(DWARFDebugInfo);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
index 9b5faa6ed9bd6..ee4759c547549 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
@@ -585,6 +585,14 @@ DWARFDIE DWARFFormValue::Reference() const {
     return ref_cu->GetDIE(value);
   }
 
+  case DW_FORM_ref_sig8: {
+    DWARFTypeUnit *tu =
+        m_unit->GetSymbolFileDWARF()->DebugInfo()->GetTypeUnitForHash(value);
+    if (!tu)
+      return {};
+    return tu->GetDIE(tu->GetTypeOffset());
+  }
+
   default:
     return {};
   }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp
index 45090039196ef..fcc031bf1ea0f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp
@@ -14,7 +14,6 @@
 using namespace lldb;
 using namespace lldb_private;
 
-
 void DWARFTypeUnit::Dump(Stream *s) const {
   s->Printf("0x%8.8x: Type Unit: length = 0x%8.8x, version = 0x%4.4x, "
             "abbr_offset = 0x%8.8x, addr_size = 0x%2.2x (next CU at "
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
index e146e6bf50339..aaf94df96b387 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h
@@ -18,6 +18,14 @@ class DWARFTypeUnit : public DWARFUnit {
 
   void Dump(lldb_private::Stream *s) const override;
 
+  uint64_t GetTypeHash() { return m_header.GetTypeHash(); }
+
+  dw_offset_t GetTypeOffset() { return GetOffset() + m_header.GetTypeOffset(); }
+
+  static bool classof(const DWARFUnit *unit) {
+    return unit->GetUnitType() == DW_UT_type;
+  }
+
 private:
   DWARFTypeUnit(SymbolFileDWARF *dwarf, lldb::user_id_t uid,
                 const DWARFUnitHeader &header,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 65a1fbf74f822..7049fe7d280fd 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -715,9 +715,16 @@ DWARFUnitHeader::extract(const DWARFDataExtractor &data, DIERef::Section section
         section == DIERef::Section::DebugTypes ? DW_UT_type : DW_UT_compile;
   }
 
+  if (header.IsTypeUnit()) {
+    header.m_type_hash = data.GetU64(offset_ptr);
+    header.m_type_offset = data.GetDWARFOffset(offset_ptr);
+  }
+
   bool length_OK = data.ValidOffset(header.GetNextUnitOffset() - 1);
   bool version_OK = SymbolFileDWARF::SupportedVersion(header.m_version);
   bool addr_size_OK = (header.m_addr_size == 4) || (header.m_addr_size == 8);
+  bool type_offset_OK =
+      !header.IsTypeUnit() || (header.m_type_offset <= header.GetLength());
 
   if (!length_OK)
     return llvm::make_error<llvm::object::GenericBinaryError>(
@@ -728,6 +735,9 @@ DWARFUnitHeader::extract(const DWARFDataExtractor &data, DIERef::Section section
   if (!addr_size_OK)
     return llvm::make_error<llvm::object::GenericBinaryError>(
         "Invalid unit address size");
+  if (!type_offset_OK)
+    return llvm::make_error<llvm::object::GenericBinaryError>(
+        "Type offset out of range");
 
   return header;
 }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 927ca2ed8de8b..165f862ed46f9 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -41,6 +41,10 @@ class DWARFUnitHeader {
   dw_offset_t m_abbr_offset = 0;
   uint8_t m_unit_type = 0;
   uint8_t m_addr_size = 0;
+
+  uint64_t m_type_hash = 0;
+  uint32_t m_type_offset = 0;
+
   uint64_t m_dwo_id = 0;
 
   DWARFUnitHeader() = default;
@@ -52,6 +56,8 @@ class DWARFUnitHeader {
   dw_offset_t GetLength() const { return m_length; }
   dw_offset_t GetAbbrOffset() const { return m_abbr_offset; }
   uint8_t GetUnitType() const { return m_unit_type; }
+  uint64_t GetTypeHash() const { return m_type_hash; }
+  dw_offset_t GetTypeOffset() const { return m_type_offset; }
   bool IsTypeUnit() const {
     return m_unit_type == DW_UT_type || m_unit_type == DW_UT_split_type;
   }
@@ -205,6 +211,8 @@ class DWARFUnit : public lldb_private::UserID {
 
   DIERef::Section GetDebugSection() const { return m_section; }
 
+  uint8_t GetUnitType() const { return m_header.GetUnitType(); }
+
 protected:
   DWARFUnit(SymbolFileDWARF *dwarf, lldb::user_id_t uid,
             const DWARFUnitHeader &header,

From 937af54666c4f4f1cb332bb28cecc86d0e1c5de1 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 24 May 2019 08:25:02 +0000
Subject: [PATCH 0138/1176] [ARM] ARMExpandPseudoInsts: add debug messages

This pass wasn't printing any messages at all, which I find really inconvenient
while debugging/tracing things. It now dumps the before and after of expanded
instructions. It doesn't do this yet for all instructions, but this is a good
start I guess.

Differential Revision: https://reviews.llvm.org/D62297

llvm-svn: 361604
---
 llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 680142c228899..a27f7f157de81 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
@@ -469,6 +470,7 @@ static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -570,8 +572,8 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
 
   // Transfer memoperands.
   MIB.cloneMemRefs(MI);
-
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
@@ -579,6 +581,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -645,8 +648,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
 
   // Transfer memoperands.
   MIB.cloneMemRefs(MI);
-
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
@@ -654,6 +657,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
   assert(TableEntry && "NEONLdStTable lookup failed");
@@ -744,6 +748,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
                                  unsigned Opc, bool IsExt) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
   unsigned OpIdx = 0;
@@ -773,6 +778,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
   MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
   TransferImpOps(MI, MIB, MIB);
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
 }
 
 static bool IsAnAddressOperand(const MachineOperand &MO) {
@@ -829,6 +835,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
   const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
   bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
   MachineInstrBuilder LO16, HI16;
+  LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
 
   if (!STI->hasV6T2Ops() &&
       (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
@@ -910,6 +917,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     LO16.add(makeImplicit(MI.getOperand(1)));
   TransferImpOps(MI, LO16, HI16);
   MI.eraseFromParent();
+  LLVM_DEBUG(dbgs() << "To:        "; LO16.getInstr()->dump(););
+  LLVM_DEBUG(dbgs() << "And:       "; HI16.getInstr()->dump(););
 }
 
 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
@@ -1929,11 +1938,16 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   TRI = STI->getRegisterInfo();
   AFI = MF.getInfo<ARMFunctionInfo>();
 
+  LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
+                    << "********** Function: " << MF.getName() << '\n');
+
   bool Modified = false;
   for (MachineBasicBlock &MBB : MF)
     Modified |= ExpandMBB(MBB);
   if (VerifyARMPseudo)
     MF.verify(this, "After expanding ARM pseudo instructions.");
+
+  LLVM_DEBUG(dbgs() << "***************************************************\n");
   return Modified;
 }
 

From d63a2bb35fb25ca17b3cb6e82bcb2050179381b0 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Fri, 24 May 2019 08:32:02 +0000
Subject: [PATCH 0139/1176] [DSE] Bugfix to avoid PartialStoreMerging involving
 non byte-sized stores

Summary:
The DeadStoreElimination pass now skips doing
PartialStoreMerging when stores overlap according to
OW_PartialEarlierWithFullLater and at least one of
the stores is having a store size that is different
from the size of the type being stored.

This solves problems seen in
  https://bugs.llvm.org/show_bug.cgi?id=41949
for which we in the past could end up with
mis-compiles or assertions.

The content and location of the padding bits is not
formally described (or undefined) in the LangRef
at the moment. So the solution is chosen based on
that we cannot assume anything about the padding bits
when having a store that clobbers more memory than
indicated by the type of the value that is stored
(such as storing an i6 using an 8-bit store instruction).

Fixes: https://bugs.llvm.org/show_bug.cgi?id=41949

Reviewers: spatel, efriedma, fhahn

Reviewed By: efriedma

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62250

llvm-svn: 361605
---
 llvm/include/llvm/IR/DataLayout.h             |  8 +++
 .../Scalar/DeadStoreElimination.cpp           |  5 ++
 .../DeadStoreElimination/PartialStore2.ll     | 55 +++++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 05c72f10d3096..ac9770a15120d 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -453,6 +453,14 @@ class DataLayout {
     return 8 * getTypeStoreSize(Ty);
   }
 
+  /// Returns true if no extra padding bits are needed when storing the
+  /// specified type.
+  ///
+  /// For example, returns false for i19 that has a 24-bit store size.
+  bool typeSizeEqualsStoreSize(Type *Ty) const {
+    return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty);
+  }
+
   /// Returns the offset in bytes between successive objects of the
   /// specified type, including alignment padding.
   ///
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 75206c28dde34..a81645745b487 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1211,12 +1211,17 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
           auto *Earlier = dyn_cast<StoreInst>(DepWrite);
           auto *Later = dyn_cast<StoreInst>(Inst);
           if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+              DL.typeSizeEqualsStoreSize(
+                  Earlier->getValueOperand()->getType()) &&
               Later && isa<ConstantInt>(Later->getValueOperand()) &&
+              DL.typeSizeEqualsStoreSize(
+                  Later->getValueOperand()->getType()) &&
               memoryIsNotModifiedBetween(Earlier, Later, AA)) {
             // If the store we find is:
             //   a) partially overwritten by the store to 'Loc'
             //   b) the later store is fully contained in the earlier one and
             //   c) they both have a constant value
+            //   d) none of the two stores need padding
             // Merge the two stores, replacing the earlier store's value with a
             // merge of both values.
             // TODO: Deal with other constant types (vectors, etc), and probably
diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll b/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll
new file mode 100644
index 0000000000000..ebcb0c3808a15
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/PartialStore2.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s --data-layout "e" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-LE %s
+; RUN: opt < %s --data-layout "E" -dse -enable-dse-partial-store-merging=true -S | FileCheck --check-prefix CHECK --check-prefix CHECK-BE %s
+
+; This test used to hit an assertion (see PR41949).
+;
+; Better safe than sorry, do not assume anything about the padding for the
+; i28 store that has 32 bits as store size.
+define void @test1(i32* %p) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[A:%.*]] = alloca i32
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32* [[A]] to i28*
+; CHECK-NEXT:    [[C:%.*]] = bitcast i32* [[A]] to { i16, i16 }*
+; CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* [[C]], i32 0, i32 1
+; CHECK-NEXT:    store i28 10, i28* [[B]]
+; CHECK-NEXT:    store i16 20, i16* [[C1]]
+; CHECK-NEXT:    call void @test1(i32* [[A]])
+; CHECK-NEXT:    ret void
+;
+  %a = alloca i32
+  %b = bitcast i32* %a to i28*
+  %c = bitcast i32* %a to { i16, i16 }*
+  %c1 = getelementptr inbounds { i16, i16 }, { i16, i16 }* %c, i32 0, i32 1
+  store i28 10, i28* %b
+  store i16 20, i16* %c1
+
+  call void @test1(i32* %a)
+  ret void
+}
+
+
+; This test used to mis-compile (see PR41949).
+;
+; Better safe than sorry, do not assume anything about the padding for the
+; i12 store that has 16 bits as store size.
+define void @test2(i32* %p) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[U:%.*]] = alloca i32
+; CHECK-NEXT:    [[A:%.*]] = bitcast i32* [[U]] to i32*
+; CHECK-NEXT:    [[B:%.*]] = bitcast i32* [[U]] to i12*
+; CHECK-NEXT:    store i32 -1, i32* [[A]]
+; CHECK-NEXT:    store i12 20, i12* [[B]]
+; CHECK-NEXT:    call void @test2(i32* [[U]])
+; CHECK-NEXT:    ret void
+;
+  %u = alloca i32
+  %a = bitcast i32* %u to i32*
+  %b = bitcast i32* %u to i12*
+  store i32 -1, i32* %a
+  store i12 20, i12* %b
+
+  call void @test2(i32* %u)
+  ret void
+}
+

From c1b482f2a5d0b51770276d5bd6cd17b59b5d2dfd Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Fri, 24 May 2019 08:39:40 +0000
Subject: [PATCH 0140/1176] [mips] Always check that `shift and add`
 optimization is efficient.

The D45316 introduced the `shouldTransformMulToShiftsAddsSubs` function
to check that breaking down constant multiplications into a series
of shifts, adds, and subs is efficient. Unfortunately, this function
does not check maximum number of steps on all paths of the algorithm.
This patch fixes this bug.

Fix for PR41929.

Differential Revision: https://reviews.llvm.org/D62166

llvm-svn: 361606
---
 llvm/lib/Target/Mips/MipsSEISelLowering.cpp |  57 ++--
 llvm/test/CodeGen/Mips/const-mult.ll        | 311 ++++----------------
 2 files changed, 87 insertions(+), 281 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 6fbdba4c69190..2fe2f1694f109 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -719,8 +719,31 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
                                                SelectionDAG &DAG,
                                                const MipsSubtarget &Subtarget) {
   // Estimate the number of operations the below transform will turn a
-  // constant multiply into. The number is approximately how many powers
-  // of two summed together that the constant can be broken down into.
+  // constant multiply into. The number is approximately equal to the minimal
+  // number of powers of two that constant can be broken down to by adding
+  // or subtracting them.
+  //
+  // If we have taken more than 12[1] / 8[2] steps to attempt the
+  // optimization for a native sized value, it is more than likely that this
+  // optimization will make things worse.
+  //
+  // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+  //     multiplication requires at least 4 cycles, but another cycle (or two)
+  //     to retrieve the result from the HI/LO registers.
+  //
+  // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+  //     materialized in 2 instructions, multiplication requires at least 4
+  //     cycles, but another cycle (or two) to retrieve the result from the
+  //     HI/LO registers.
+  //
+  // TODO:
+  // - MaxSteps needs to consider the `VT` of the constant for the current
+  //   target.
+  // - Consider to perform this optimization after type legalization.
+  //   That allows to remove a workaround for types not supported natively.
+  // - Take in account `-Os, -Oz` flags because this optimization
+  //   increases code size.
+  unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
 
   SmallVector<APInt, 16> WorkStack(1, C);
   unsigned Steps = 0;
@@ -732,6 +755,9 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
     if (Val == 0 || Val == 1)
       continue;
 
+    if (Steps >= MaxSteps)
+      return false;
+
     if (Val.isPowerOf2()) {
       ++Steps;
       continue;
@@ -740,36 +766,15 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
     APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
     APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
                                   : APInt(BitWidth, 1) << C.ceilLogBase2();
-
     if ((Val - Floor).ule(Ceil - Val)) {
       WorkStack.push_back(Floor);
       WorkStack.push_back(Val - Floor);
-      ++Steps;
-      continue;
+    } else {
+      WorkStack.push_back(Ceil);
+      WorkStack.push_back(Ceil - Val);
     }
 
-    WorkStack.push_back(Ceil);
-    WorkStack.push_back(Ceil - Val);
     ++Steps;
-
-    // If we have taken more than 12[1] / 8[2] steps to attempt the
-    // optimization for a native sized value, it is more than likely that this
-    // optimization will make things worse.
-    //
-    // [1] MIPS64 requires 6 instructions at most to materialize any constant,
-    //     multiplication requires at least 4 cycles, but another cycle (or two)
-    //     to retrieve the result from the HI/LO registers.
-    //
-    // [2] For MIPS32, more than 8 steps is expensive as the constant could be
-    //     materialized in 2 instructions, multiplication requires at least 4
-    //     cycles, but another cycle (or two) to retrieve the result from the
-    //     HI/LO registers.
-
-    if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
-      return false;
-
-    if (Steps > 8 && Subtarget.isABI_O32())
-      return false;
   }
 
   // If the value being multiplied is not supported natively, we have to pay
diff --git a/llvm/test/CodeGen/Mips/const-mult.ll b/llvm/test/CodeGen/Mips/const-mult.ll
index cbb3c91299fa9..dd90971dcee0b 100644
--- a/llvm/test/CodeGen/Mips/const-mult.ll
+++ b/llvm/test/CodeGen/Mips/const-mult.ll
@@ -212,44 +212,18 @@ entry:
 define i32 @mul42949673_32(i32 %a) {
 ; MIPS32-LABEL: mul42949673_32:
 ; MIPS32:       # %bb.0:
-; MIPS32-NEXT:    sll $1, $4, 3
-; MIPS32-NEXT:    addu $1, $1, $4
-; MIPS32-NEXT:    sll $2, $4, 5
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 10
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 13
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 15
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 20
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 25
-; MIPS32-NEXT:    sll $3, $4, 23
-; MIPS32-NEXT:    addu $1, $3, $1
+; MIPS32-NEXT:    lui $1, 655
+; MIPS32-NEXT:    ori $1, $1, 23593
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    mul $2, $4, $1
 ;
 ; MIPS64-LABEL: mul42949673_32:
 ; MIPS64:       # %bb.0:
-; MIPS64-NEXT:    sll $1, $4, 0
-; MIPS64-NEXT:    sll $2, $1, 3
-; MIPS64-NEXT:    addu $2, $2, $1
-; MIPS64-NEXT:    sll $3, $1, 5
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 10
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 13
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 15
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 20
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 25
-; MIPS64-NEXT:    sll $1, $1, 23
-; MIPS64-NEXT:    addu $1, $1, $2
+; MIPS64-NEXT:    lui $1, 655
+; MIPS64-NEXT:    ori $1, $1, 23593
+; MIPS64-NEXT:    sll $2, $4, 0
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    addu $2, $3, $1
+; MIPS64-NEXT:    mul $2, $2, $1
   %b = mul i32 %a, 42949673
   ret i32 %b
 }
@@ -261,45 +235,18 @@ define i64 @mul42949673_64(i64 %a) {
 ; MIPS32-NEXT:    ori $1, $1, 23593
 ; MIPS32-NEXT:    multu $4, $1
 ; MIPS32-NEXT:    mflo $2
-; MIPS32-NEXT:    mfhi $1
-; MIPS32-NEXT:    sll $3, $5, 3
-; MIPS32-NEXT:    addu $3, $3, $5
-; MIPS32-NEXT:    sll $4, $5, 5
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 10
-; MIPS32-NEXT:    subu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 13
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 15
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 20
-; MIPS32-NEXT:    subu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 25
-; MIPS32-NEXT:    sll $5, $5, 23
-; MIPS32-NEXT:    addu $3, $5, $3
-; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    mfhi $3
+; MIPS32-NEXT:    mul $1, $5, $1
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $3, $1, $3
+; MIPS32-NEXT:    addu $3, $3, $1
 ;
 ; MIPS64-LABEL: mul42949673_64:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    dsll $1, $4, 3
-; MIPS64-NEXT:    daddu $1, $1, $4
-; MIPS64-NEXT:    dsll $2, $4, 5
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 10
-; MIPS64-NEXT:    dsubu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 13
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 15
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 20
-; MIPS64-NEXT:    dsubu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 25
-; MIPS64-NEXT:    dsll $3, $4, 23
-; MIPS64-NEXT:    daddu $1, $3, $1
+; MIPS64-NEXT:    lui $1, 655
+; MIPS64-NEXT:    ori $1, $1, 23593
+; MIPS64-NEXT:    dmult $4, $1
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    daddu $2, $2, $1
+; MIPS64-NEXT:    mflo $2
 entry:
   %b = mul i64 %a, 42949673
   ret i64 %b
@@ -308,54 +255,18 @@ entry:
 define i32 @mul22224078_32(i32 %a) {
 ; MIPS32-LABEL: mul22224078_32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 1
-; MIPS32-NEXT:    sll $2, $4, 4
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 6
-; MIPS32-NEXT:    subu $1, $1, $2
-; MIPS32-NEXT:    sll $2, $4, 8
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 10
-; MIPS32-NEXT:    subu $1, $1, $2
-; MIPS32-NEXT:    sll $2, $4, 13
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 16
-; MIPS32-NEXT:    subu $1, $1, $2
-; MIPS32-NEXT:    sll $2, $4, 24
-; MIPS32-NEXT:    sll $3, $4, 22
-; MIPS32-NEXT:    sll $5, $4, 20
-; MIPS32-NEXT:    sll $4, $4, 18
-; MIPS32-NEXT:    addu $1, $4, $1
-; MIPS32-NEXT:    addu $1, $5, $1
-; MIPS32-NEXT:    addu $1, $3, $1
+; MIPS32-NEXT:    lui $1, 339
+; MIPS32-NEXT:    ori $1, $1, 7374
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    mul $2, $4, $1
 ;
 ; MIPS64-LABEL: mul22224078_32:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    sll $1, $4, 0
-; MIPS64-NEXT:    sll $2, $1, 1
-; MIPS64-NEXT:    sll $3, $1, 4
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 6
-; MIPS64-NEXT:    subu $2, $2, $3
-; MIPS64-NEXT:    sll $3, $1, 8
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 10
-; MIPS64-NEXT:    subu $2, $2, $3
-; MIPS64-NEXT:    sll $3, $1, 13
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 16
-; MIPS64-NEXT:    subu $2, $2, $3
-; MIPS64-NEXT:    sll $3, $1, 24
-; MIPS64-NEXT:    sll $4, $1, 22
-; MIPS64-NEXT:    sll $5, $1, 20
-; MIPS64-NEXT:    sll $1, $1, 18
-; MIPS64-NEXT:    addu $1, $1, $2
-; MIPS64-NEXT:    addu $1, $5, $1
-; MIPS64-NEXT:    addu $1, $4, $1
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 7374
+; MIPS64-NEXT:    sll $2, $4, 0
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    addu $2, $3, $1
+; MIPS64-NEXT:    mul $2, $2, $1
 entry:
   %b = mul i32 %a, 22224078
   ret i32 %b
@@ -368,55 +279,18 @@ define i64 @mul22224078_64(i64 %a) {
 ; MIPS32-NEXT:    ori $1, $1, 7374
 ; MIPS32-NEXT:    multu $4, $1
 ; MIPS32-NEXT:    mflo $2
-; MIPS32-NEXT:    mfhi $1
-; MIPS32-NEXT:    sll $3, $5, 1
-; MIPS32-NEXT:    sll $4, $5, 4
-; MIPS32-NEXT:    subu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 6
-; MIPS32-NEXT:    subu $3, $3, $4
-; MIPS32-NEXT:    sll $4, $5, 8
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 10
-; MIPS32-NEXT:    subu $3, $3, $4
-; MIPS32-NEXT:    sll $4, $5, 13
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 16
-; MIPS32-NEXT:    subu $3, $3, $4
-; MIPS32-NEXT:    sll $4, $5, 24
-; MIPS32-NEXT:    sll $6, $5, 22
-; MIPS32-NEXT:    sll $7, $5, 20
-; MIPS32-NEXT:    sll $5, $5, 18
-; MIPS32-NEXT:    addu $3, $5, $3
-; MIPS32-NEXT:    addu $3, $7, $3
-; MIPS32-NEXT:    addu $3, $6, $3
-; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    mfhi $3
+; MIPS32-NEXT:    mul $1, $5, $1
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $3, $1, $3
+; MIPS32-NEXT:    addu $3, $3, $1
 ;
 ; MIPS64-LABEL: mul22224078_64:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    dsll $1, $4, 1
-; MIPS64-NEXT:    dsll $2, $4, 4
-; MIPS64-NEXT:    dsubu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 6
-; MIPS64-NEXT:    dsubu $1, $1, $2
-; MIPS64-NEXT:    dsll $2, $4, 8
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 10
-; MIPS64-NEXT:    dsubu $1, $1, $2
-; MIPS64-NEXT:    dsll $2, $4, 13
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 16
-; MIPS64-NEXT:    dsubu $1, $1, $2
-; MIPS64-NEXT:    dsll $2, $4, 24
-; MIPS64-NEXT:    dsll $3, $4, 22
-; MIPS64-NEXT:    dsll $5, $4, 20
-; MIPS64-NEXT:    dsll $4, $4, 18
-; MIPS64-NEXT:    daddu $1, $4, $1
-; MIPS64-NEXT:    daddu $1, $5, $1
-; MIPS64-NEXT:    daddu $1, $3, $1
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 7374
+; MIPS64-NEXT:    dmult $4, $1
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    daddu $2, $2, $1
+; MIPS64-NEXT:    mflo $2
 entry:
   %b = mul i64 %a, 22224078
   ret i64 %b
@@ -425,36 +299,18 @@ entry:
 define i32 @mul22245375_32(i32 %a) {
 ; MIPS32-LABEL: mul22245375_32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 12
-; MIPS32-NEXT:    addu $1, $1, $4
-; MIPS32-NEXT:    sll $2, $4, 15
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 18
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 20
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 22
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 24
+; MIPS32-NEXT:    lui $1, 339
+; MIPS32-NEXT:    ori $1, $1, 28671
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    mul $2, $4, $1
 ;
 ; MIPS64-LABEL: mul22245375_32:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    sll $1, $4, 0
-; MIPS64-NEXT:    sll $2, $1, 12
-; MIPS64-NEXT:    addu $2, $2, $1
-; MIPS64-NEXT:    sll $3, $1, 15
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 18
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 20
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 22
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $1, $1, 24
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 28671
+; MIPS64-NEXT:    sll $2, $4, 0
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    addu $2, $1, $2
+; MIPS64-NEXT:    mul $2, $2, $1
 entry:
   %b = mul i32 %a, 22245375
   ret i32 %b
@@ -467,37 +323,18 @@ define i64 @mul22245375_64(i64 %a) {
 ; MIPS32-NEXT:    ori $1, $1, 28671
 ; MIPS32-NEXT:    multu $4, $1
 ; MIPS32-NEXT:    mflo $2
-; MIPS32-NEXT:    mfhi $1
-; MIPS32-NEXT:    sll $3, $5, 12
-; MIPS32-NEXT:    addu $3, $3, $5
-; MIPS32-NEXT:    sll $4, $5, 15
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 18
-; MIPS32-NEXT:    subu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 20
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 22
-; MIPS32-NEXT:    addu $3, $4, $3
-; MIPS32-NEXT:    sll $4, $5, 24
-; MIPS32-NEXT:    addu $3, $4, $3
+; MIPS32-NEXT:    mfhi $3
+; MIPS32-NEXT:    mul $1, $5, $1
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $3, $1, $3
+; MIPS32-NEXT:    addu $3, $3, $1
 ;
 ; MIPS64-LABEL: mul22245375_64:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    dsll $1, $4, 12
-; MIPS64-NEXT:    daddu $1, $1, $4
-; MIPS64-NEXT:    dsll $2, $4, 15
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 18
-; MIPS64-NEXT:    dsubu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 20
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 22
-; MIPS64-NEXT:    daddu $1, $2, $1
-; MIPS64-NEXT:    dsll $2, $4, 24
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 28671
+; MIPS64-NEXT:    dmult $4, $1
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    daddu $2, $2, $1
+; MIPS64-NEXT:    mflo $2
 entry:
   %b = mul i64 %a, 22245375
   ret i64 %b
@@ -506,36 +343,18 @@ entry:
 define i32 @mul25165824_32(i32 %a) {
 ; MIPS32-LABEL: mul25165824_32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 12
-; MIPS32-NEXT:    addu $1, $1, $4
-; MIPS32-NEXT:    sll $2, $4, 15
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 18
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 20
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 22
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 24
+; MIPS32-NEXT:    lui $1, 339
+; MIPS32-NEXT:    ori $1, $1, 28671
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    mul $2, $4, $1
 ;
 ; MIPS64-LABEL: mul25165824_32:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    sll $1, $4, 0
-; MIPS64-NEXT:    sll $2, $1, 12
-; MIPS64-NEXT:    addu $2, $2, $1
-; MIPS64-NEXT:    sll $3, $1, 15
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 18
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 20
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 22
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $1, $1, 24
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 28671
+; MIPS64-NEXT:    sll $2, $4, 0
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    addu $2, $1, $2
+; MIPS64-NEXT:    mul $2, $2, $1
 entry:
   %b = mul i32 %a, 22245375
   ret i32 %b
@@ -572,36 +391,18 @@ entry:
 define i32 @mul33554432_32(i32 %a) {
 ; MIPS32-LABEL: mul33554432_32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 12
-; MIPS32-NEXT:    addu $1, $1, $4
-; MIPS32-NEXT:    sll $2, $4, 15
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 18
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 20
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 22
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    sll $2, $4, 24
+; MIPS32-NEXT:    lui $1, 339
+; MIPS32-NEXT:    ori $1, $1, 28671
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    mul $2, $4, $1
 ;
 ; MIPS64-LABEL: mul33554432_32:
 ; MIPS64:       # %bb.0: # %entry
-; MIPS64-NEXT:    sll $1, $4, 0
-; MIPS64-NEXT:    sll $2, $1, 12
-; MIPS64-NEXT:    addu $2, $2, $1
-; MIPS64-NEXT:    sll $3, $1, 15
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 18
-; MIPS64-NEXT:    subu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 20
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $3, $1, 22
-; MIPS64-NEXT:    addu $2, $3, $2
-; MIPS64-NEXT:    sll $1, $1, 24
+; MIPS64-NEXT:    lui $1, 339
+; MIPS64-NEXT:    ori $1, $1, 28671
+; MIPS64-NEXT:    sll $2, $4, 0
 ; MIPS64-NEXT:    jr $ra
-; MIPS64-NEXT:    addu $2, $1, $2
+; MIPS64-NEXT:    mul $2, $2, $1
 entry:
   %b = mul i32 %a, 22245375
   ret i32 %b

From 3d7a057b0d1d978da382c4ae71653b03f09efa54 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Fri, 24 May 2019 08:39:43 +0000
Subject: [PATCH 0141/1176] CodeGen: factor out swifterror value tracking.

llvm-svn: 361607
---
 .../llvm/CodeGen/FunctionLoweringInfo.h       |  42 ---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |   2 +
 .../llvm/CodeGen/SwiftErrorValueTracking.h    | 109 ++++++
 llvm/lib/CodeGen/CMakeLists.txt               |   1 +
 .../SelectionDAG/FunctionLoweringInfo.cpp     |  50 ---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  36 +-
 .../SelectionDAG/SelectionDAGBuilder.h        |  10 +-
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 278 +---------------
 llvm/lib/CodeGen/SwiftErrorValueTracking.cpp  | 312 ++++++++++++++++++
 9 files changed, 457 insertions(+), 383 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
 create mode 100644 llvm/lib/CodeGen/SwiftErrorValueTracking.cpp

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index f5f37d1403a3a..b3077fcaabd4f 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -71,48 +71,6 @@ class FunctionLoweringInfo {
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
-  /// A map from swifterror value in a basic block to the virtual register it is
-  /// currently represented by.
-  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
-      SwiftErrorVRegDefMap;
-
-  /// A list of upward exposed vreg uses that need to be satisfied by either a
-  /// copy def or a phi node at the beginning of the basic block representing
-  /// the predecessor(s) swifterror value.
-  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
-      SwiftErrorVRegUpwardsUse;
-
-  /// A map from instructions that define/use a swifterror value to the virtual
-  /// register that represents that def/use.
-  llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, unsigned>
-      SwiftErrorVRegDefUses;
-
-  /// The swifterror argument of the current function.
-  const Value *SwiftErrorArg;
-
-  using SwiftErrorValues = SmallVector<const Value*, 1>;
-  /// A function can only have a single swifterror argument. And if it does
-  /// have a swifterror argument, it must be the first entry in
-  /// SwiftErrorVals.
-  SwiftErrorValues SwiftErrorVals;
-
-  /// Get or create the swifterror value virtual register in
-  /// SwiftErrorVRegDefMap for this basic block.
-  unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *,
-                                     const Value *);
-
-  /// Set the swifterror virtual register in the SwiftErrorVRegDefMap for this
-  /// basic block.
-  void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *,
-                                unsigned);
-
-  /// Get or create the swifterror value virtual register for a def of a
-  /// swifterror by an instruction.
-  std::pair<unsigned, bool> getOrCreateSwiftErrorVRegDefAt(const Instruction *);
-  std::pair<unsigned, bool>
-  getOrCreateSwiftErrorVRegUseAt(const Instruction *, const MachineBasicBlock *,
-                                 const Value *);
-
   /// ValueMap - Since we emit code for the function a basic block at a time,
   /// we must remember which virtual registers hold the values for
   /// cross-basic-block values.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 36bc6c0bd7398..147c325342fcc 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -34,6 +34,7 @@ namespace llvm {
   class TargetLibraryInfo;
   class FunctionLoweringInfo;
   class ScheduleHazardRecognizer;
+  class SwiftErrorValueTracking;
   class GCFunctionInfo;
   class ScheduleDAGSDNodes;
   class LoadInst;
@@ -45,6 +46,7 @@ class SelectionDAGISel : public MachineFunctionPass {
   TargetMachine &TM;
   const TargetLibraryInfo *LibInfo;
   FunctionLoweringInfo *FuncInfo;
+  SwiftErrorValueTracking *SwiftError;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
   SelectionDAG *CurDAG;
diff --git a/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
new file mode 100644
index 0000000000000..52dccc714d653
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/SwiftErrorValueTracking.h
@@ -0,0 +1,109 @@
+//===- SwiftErrorValueTracking.h - Track swifterror VReg vals --*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SWIFTERRORVALUETRACKING_H
+#define SWIFTERRORVALUETRACKING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+
+namespace llvm {
+  class Function;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class TargetInstrInfo;
+  class TargetLowering;
+
+class SwiftErrorValueTracking {
+  // Some useful objects to reduce the number of function arguments needed.
+  MachineFunction *MF;
+  const Function *Fn;
+  const TargetLowering *TLI;
+  const TargetInstrInfo *TII;
+
+  /// A map from swifterror value in a basic block to the virtual register it is
+  /// currently represented by.
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+      VRegDefMap;
+
+  /// A list of upward exposed vreg uses that need to be satisfied by either a
+  /// copy def or a phi node at the beginning of the basic block representing
+  /// the predecessor(s) swifterror value.
+  DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+      VRegUpwardsUse;
+
+  /// A map from instructions that define/use a swifterror value to the virtual
+  /// register that represents that def/use.
+  llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, unsigned>
+      VRegDefUses;
+
+  /// The swifterror argument of the current function.
+  const Value *SwiftErrorArg;
+
+  using SwiftErrorValues = SmallVector<const Value*, 1>;
+  /// A function can only have a single swifterror argument. And if it does
+  /// have a swifterror argument, it must be the first entry in
+  /// SwiftErrorVals.
+  SwiftErrorValues SwiftErrorVals;
+
+public:
+  /// Initialize data structures for specified new function.
+  void setFunction(MachineFunction &MF);
+
+  /// Get the (unique) function argument that was marked swifterror, or nullptr
+  /// if this function has no swifterror args.
+  const Value *getFunctionArg() const {
+    return SwiftErrorArg;
+  }
+
+  /// Get or create the swifterror value virtual register in
+  /// VRegDefMap for this basic block.
+  unsigned getOrCreateVReg(const MachineBasicBlock *, const Value *);
+
+  /// Set the swifterror virtual register in the VRegDefMap for this
+  /// basic block.
+  void setCurrentVReg(const MachineBasicBlock *MBB, const Value *, unsigned);
+
+  /// Get or create the swifterror value virtual register for a def of a
+  /// swifterror by an instruction.
+  unsigned getOrCreateVRegDefAt(const Instruction *, const MachineBasicBlock *,
+                                const Value *);
+
+  /// Get or create the swifterror value virtual register for a use of a
+  /// swifterror by an instruction.
+  unsigned getOrCreateVRegUseAt(const Instruction *, const MachineBasicBlock *,
+                                const Value *);
+
+  /// Create initial definitions of swifterror values in the entry block of the
+  /// current function.
+  bool createEntriesInEntryBlock(DebugLoc DbgLoc);
+
+  /// Propagate assigned swifterror vregs through a function, synthesizing PHI
+  /// nodes when needed to maintain consistency.
+  void propagateVRegs();
+
+  void preassignVRegs(MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+                      BasicBlock::const_iterator End);
+};
+
+}
+
+#endif
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index e76f9f8ed4e7c..fedf04270d297 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -143,6 +143,7 @@ add_llvm_library(LLVMCodeGen
   StackMaps.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
+  SwiftErrorValueTracking.cpp
   TailDuplication.cpp
   TailDuplicator.cpp
   TargetFrameLoweringImpl.cpp
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index ee2ca90e5d9fc..d8ef10f58aa7c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -519,56 +519,6 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
   return VReg;
 }
 
-unsigned
-FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                                const Value *Val) {
-  auto Key = std::make_pair(MBB, Val);
-  auto It = SwiftErrorVRegDefMap.find(Key);
-  // If this is the first use of this swifterror value in this basic block,
-  // create a new virtual register.
-  // After we processed all basic blocks we will satisfy this "upwards exposed
-  // use" by inserting a copy or phi at the beginning of this block.
-  if (It == SwiftErrorVRegDefMap.end()) {
-    auto &DL = MF->getDataLayout();
-    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
-    SwiftErrorVRegDefMap[Key] = VReg;
-    SwiftErrorVRegUpwardsUse[Key] = VReg;
-    return VReg;
-  } else return It->second;
-}
-
-void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
-    const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
-  SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
-  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
-  auto It = SwiftErrorVRegDefUses.find(Key);
-  if (It == SwiftErrorVRegDefUses.end()) {
-    auto &DL = MF->getDataLayout();
-    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-    unsigned VReg =  MF->getRegInfo().createVirtualRegister(RC);
-    SwiftErrorVRegDefUses[Key] = VReg;
-    return std::make_pair(VReg, true);
-  }
-  return std::make_pair(It->second, false);
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
-  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
-  auto It = SwiftErrorVRegDefUses.find(Key);
-  if (It == SwiftErrorVRegDefUses.end()) {
-    unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
-    SwiftErrorVRegDefUses[Key] = VReg;
-    return std::make_pair(VReg, true);
-  }
-  return std::make_pair(It->second, false);
-}
-
 const Value *
 FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
   if (VirtReg2Value.empty()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5ac9d796f7823..76e5847ba111d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -54,6 +54,7 @@
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -1895,7 +1896,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const Function *F = I.getParent()->getParent();
   if (TLI.supportSwiftError() &&
       F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
-    assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
+    assert(SwiftError.getFunctionArg() && "Need a swift error argument");
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
     Flags.setSwiftError();
     Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1904,8 +1905,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                   0 /*partOffs*/));
     // Create SDNode for the swifterror virtual register.
     OutVals.push_back(
-        DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
-                            &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+        DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+                            &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
                         EVT(TLI.getPointerTy(DL))));
   }
 
@@ -4146,15 +4147,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
 
   SDValue Src = getValue(SrcV);
   // Create a virtual register, then update the virtual register.
-  unsigned VReg; bool CreatedVReg;
-  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
+  unsigned VReg =
+      SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
   // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
   // Chain can be getRoot or getControlRoot.
   SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
                                       SDValue(Src.getNode(), Src.getResNo()));
   DAG.setRoot(CopyNode);
-  if (CreatedVReg)
-    FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
 }
 
 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -4187,8 +4186,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
   // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
   SDValue L = DAG.getCopyFromReg(
       getRoot(), getCurSDLoc(),
-      FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
-      ValueVTs[0]);
+      SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
 
   setValue(&I, L);
 }
@@ -7073,11 +7071,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SwiftErrorVal = V;
       // We find the virtual register for the actual swifterror argument.
       // Instead of using the Value, we use the virtual register instead.
-      Entry.Node = DAG.getRegister(FuncInfo
-                                       .getOrCreateSwiftErrorVRegUseAt(
-                                           CS.getInstruction(), FuncInfo.MBB, V)
-                                       .first,
-                                   EVT(TLI.getPointerTy(DL)));
+      Entry.Node = DAG.getRegister(
+          SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
+          EVT(TLI.getPointerTy(DL)));
     }
 
     Args.push_back(Entry);
@@ -7118,13 +7114,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   if (SwiftErrorVal && TLI.supportSwiftError()) {
     // Get the last element of InVals.
     SDValue Src = CLI.InVals.back();
-    unsigned VReg; bool CreatedVReg;
-    std::tie(VReg, CreatedVReg) =
-        FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
+    unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+        CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
     SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
-    // We update the virtual register for the actual swifterror argument.
-    if (CreatedVReg)
-      FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
     DAG.setRoot(CopyNode);
   }
 }
@@ -9761,8 +9753,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg))
-        FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
-                                           FuncInfo->SwiftErrorArg, Reg);
+        SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+                                   Reg);
     }
 
     // If this argument is live outside of the entry block, insert a copy from
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 176d726985d75..f68fcd1aa9fe9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -77,6 +77,7 @@ class ResumeInst;
 class ReturnInst;
 class SDDbgValue;
 class StoreInst;
+class SwiftErrorValueTracking;
 class SwitchInst;
 class TargetLibraryInfo;
 class TargetMachine;
@@ -613,6 +614,9 @@ class SelectionDAGBuilder {
   /// Information about the function as a whole.
   FunctionLoweringInfo &FuncInfo;
 
+  /// Information about the swifterror values used throughout the function.
+  SwiftErrorValueTracking &SwiftError;
+
   /// Garbage collection metadata for the function.
   GCFunctionInfo *GFI;
 
@@ -626,9 +630,9 @@ class SelectionDAGBuilder {
   LLVMContext *Context;
 
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
-                      CodeGenOpt::Level ol)
-    : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
-      FuncInfo(funcinfo) {}
+                      SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+      : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+        FuncInfo(funcinfo), SwiftError(swifterror) {}
 
   void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
             const TargetLibraryInfo *li);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index e1c6b18a1bd95..6c9a1cd646ef3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -49,6 +49,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -307,8 +308,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm),
   FuncInfo(new FunctionLoweringInfo()),
+  SwiftError(new SwiftErrorValueTracking()),
   CurDAG(new SelectionDAG(tm, OL)),
-  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
   AA(), GFI(),
   OptLevel(OL),
   DAGSize(0) {
@@ -324,6 +326,7 @@ SelectionDAGISel::~SelectionDAGISel() {
   delete SDB;
   delete CurDAG;
   delete FuncInfo;
+  delete SwiftError;
 }
 
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -446,6 +449,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   CurDAG->init(*MF, *ORE, this, LibInfo,
    getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
   FuncInfo->set(Fn, *MF, CurDAG);
+  SwiftError->setFunction(*MF);
 
   // Now get the optional analyzes if we want to.
   // This is based on the possibly changed OptLevel (after optnone is taken
@@ -1254,77 +1258,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
-/// Set up SwiftErrorVals by going through the function. If the function has
-/// swifterror argument, it will be the first entry.
-static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
-                                FunctionLoweringInfo *FuncInfo) {
-  if (!TLI->supportSwiftError())
-    return;
-
-  FuncInfo->SwiftErrorVals.clear();
-  FuncInfo->SwiftErrorVRegDefMap.clear();
-  FuncInfo->SwiftErrorVRegUpwardsUse.clear();
-  FuncInfo->SwiftErrorVRegDefUses.clear();
-  FuncInfo->SwiftErrorArg = nullptr;
-
-  // Check if function has a swifterror argument.
-  bool HaveSeenSwiftErrorArg = false;
-  for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
-       AI != AE; ++AI)
-    if (AI->hasSwiftErrorAttr()) {
-      assert(!HaveSeenSwiftErrorArg &&
-             "Must have only one swifterror parameter");
-      (void)HaveSeenSwiftErrorArg; // silence warning.
-      HaveSeenSwiftErrorArg = true;
-      FuncInfo->SwiftErrorArg = &*AI;
-      FuncInfo->SwiftErrorVals.push_back(&*AI);
-    }
-
-  for (const auto &LLVMBB : Fn)
-    for (const auto &Inst : LLVMBB) {
-      if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
-        if (Alloca->isSwiftError())
-          FuncInfo->SwiftErrorVals.push_back(Alloca);
-    }
-}
-
-static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
-                                                FastISel *FastIS,
-                                                const TargetLowering *TLI,
-                                                const TargetInstrInfo *TII,
-                                                SelectionDAGBuilder *SDB) {
-  if (!TLI->supportSwiftError())
-    return;
-
-  // We only need to do this when we have swifterror parameter or swifterror
-  // alloc.
-  if (FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
-         "expected to insert into entry block");
-  auto &DL = FuncInfo->MF->getDataLayout();
-  auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-  for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
-    // We will always generate a copy from the argument. It is always used at
-    // least by the 'return' of the swifterror.
-    if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
-      continue;
-    unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-    // Assign Undef to Vreg. We construct MI directly to make sure it works
-    // with FastISel.
-    BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
-            SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
-            VReg);
-
-    // Keep FastIS informed about the value we just inserted.
-    if (FastIS)
-      FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
-
-    FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
-  }
-}
-
 /// Collect llvm.dbg.declare information. This is done after argument lowering
 /// in case the declarations refer to arguments.
 static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
@@ -1370,195 +1303,6 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
   }
 }
 
-/// Propagate swifterror values through the machine function CFG.
-static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
-  auto *TLI = FuncInfo->TLI;
-  if (!TLI->supportSwiftError())
-    return;
-
-  // We only need to do this when we have swifterror parameter or swifterror
-  // alloc.
-  if (FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  // For each machine basic block in reverse post order.
-  ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
-  for (MachineBasicBlock *MBB : RPOT) {
-    // For each swifterror value in the function.
-    for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
-      auto Key = std::make_pair(MBB, SwiftErrorVal);
-      auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
-      auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
-      bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
-      unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
-      bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
-      assert(!(UpwardsUse && !DownwardDef) &&
-             "We can't have an upwards use but no downwards def");
-
-      // If there is no upwards exposed use and an entry for the swifterror in
-      // the def map for this value we don't need to do anything: We already
-      // have a downward def for this basic block.
-      if (!UpwardsUse && DownwardDef)
-        continue;
-
-      // Otherwise we either have an upwards exposed use vreg that we need to
-      // materialize or need to forward the downward def from predecessors.
-
-      // Check whether we have a single vreg def from all predecessors.
-      // Otherwise we need a phi.
-      SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
-      SmallSet<const MachineBasicBlock*, 8> Visited;
-      for (auto *Pred : MBB->predecessors()) {
-        if (!Visited.insert(Pred).second)
-          continue;
-        VRegs.push_back(std::make_pair(
-            Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
-        if (Pred != MBB)
-          continue;
-        // We have a self-edge.
-        // If there was no upwards use in this basic block there is now one: the
-        // phi needs to use it self.
-        if (!UpwardsUse) {
-          UpwardsUse = true;
-          UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
-          assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
-          UUseVReg = UUseIt->second;
-        }
-      }
-
-      // We need a phi node if we have more than one predecessor with different
-      // downward defs.
-      bool needPHI =
-          VRegs.size() >= 1 &&
-          std::find_if(
-              VRegs.begin(), VRegs.end(),
-              [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
-                  -> bool { return V.second != VRegs[0].second; }) !=
-              VRegs.end();
-
-      // If there is no upwards exposed used and we don't need a phi just
-      // forward the swifterror vreg from the predecessor(s).
-      if (!UpwardsUse && !needPHI) {
-        assert(!VRegs.empty() &&
-               "No predecessors? The entry block should bail out earlier");
-        // Just forward the swifterror vreg from the predecessor(s).
-        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
-        continue;
-      }
-
-      auto DLoc = isa<Instruction>(SwiftErrorVal)
-                      ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
-                      : DebugLoc();
-      const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
-
-      // If we don't need a phi create a copy to the upward exposed vreg.
-      if (!needPHI) {
-        assert(UpwardsUse);
-        assert(!VRegs.empty() &&
-               "No predecessors?  Is the Calling Convention correct?");
-        unsigned DestReg = UUseVReg;
-        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
-                DestReg)
-            .addReg(VRegs[0].second);
-        continue;
-      }
-
-      // We need a phi: if there is an upwards exposed use we already have a
-      // destination virtual register number otherwise we generate a new one.
-      auto &DL = FuncInfo->MF->getDataLayout();
-      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
-      unsigned PHIVReg =
-          UpwardsUse ? UUseVReg
-                     : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-      MachineInstrBuilder SwiftErrorPHI =
-          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
-                  TII->get(TargetOpcode::PHI), PHIVReg);
-      for (auto BBRegPair : VRegs) {
-        SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
-      }
-
-      // We did not have a definition in this block before: store the phi's vreg
-      // as this block downward exposed def.
-      if (!UpwardsUse)
-        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
-    }
-  }
-}
-
-static void preassignSwiftErrorRegs(const TargetLowering *TLI,
-                                    FunctionLoweringInfo *FuncInfo,
-                                    BasicBlock::const_iterator Begin,
-                                    BasicBlock::const_iterator End) {
-  if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
-    return;
-
-  // Iterator over instructions and assign vregs to swifterror defs and uses.
-  for (auto It = Begin; It != End; ++It) {
-    ImmutableCallSite CS(&*It);
-    if (CS) {
-      // A call-site with a swifterror argument is both use and def.
-      const Value *SwiftErrorAddr = nullptr;
-      for (auto &Arg : CS.args()) {
-        if (!Arg->isSwiftError())
-          continue;
-        // Use of swifterror.
-        assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
-        SwiftErrorAddr = &*Arg;
-        assert(SwiftErrorAddr->isSwiftError() &&
-               "Must have a swifterror value argument");
-        unsigned VReg; bool CreatedReg;
-        std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
-          &*It, FuncInfo->MBB, SwiftErrorAddr);
-        assert(CreatedReg);
-      }
-      if (!SwiftErrorAddr)
-        continue;
-
-      // Def of swifterror.
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
-      assert(CreatedReg);
-      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
-    // A load is a use.
-    } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
-      const Value *V = LI->getOperand(0);
-      if (!V->isSwiftError())
-        continue;
-
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
-      assert(CreatedReg);
-
-    // A store is a def.
-    } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
-      const Value *SwiftErrorAddr = SI->getOperand(1);
-      if (!SwiftErrorAddr->isSwiftError())
-        continue;
-
-      // Def of swifterror.
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) =
-          FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
-      assert(CreatedReg);
-      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
-    // A return in a swiferror returning function is a use.
-    } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
-      const Function *F = R->getParent()->getParent();
-      if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
-        continue;
-
-      unsigned VReg; bool CreatedReg;
-      std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
-          R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
-      assert(CreatedReg);
-    }
-  }
-}
-
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   FastISelFailed = false;
   // Initialize the Fast-ISel state, if needed.
@@ -1568,8 +1312,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
   }
 
-  setupSwiftErrorVals(Fn, TLI, FuncInfo);
-
   ReversePostOrderTraversal<const Function*> RPOT(&Fn);
 
   // Lower arguments up front. An RPO iteration always visits the entry block
@@ -1615,7 +1357,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     else
       FastIS->setLastLocalValue(nullptr);
   }
-  createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+  bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+  if (FastIS && Inserted)
+    FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
 
   processDbgDeclares(FuncInfo);
 
@@ -1670,7 +1416,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       unsigned NumFastIselRemaining = std::distance(Begin, End);
 
       // Pre-assign swifterror vregs.
-      preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+      SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
 
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
@@ -1826,7 +1572,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
   SP.copyToMachineFrameInfo(MF->getFrameInfo());
 
-  propagateSwiftErrorVRegs(FuncInfo);
+  SwiftError->propagateVRegs();
 
   delete FastIS;
   SDB->clearDanglingDebugInfo();
diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
new file mode 100644
index 0000000000000..0359053d8ab7a
--- /dev/null
+++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -0,0 +1,312 @@
+//===-- SwiftErrorValueTracking.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+unsigned SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB,
+                                                  const Value *Val) {
+  auto Key = std::make_pair(MBB, Val);
+  auto It = VRegDefMap.find(Key);
+  // If this is the first use of this swifterror value in this basic block,
+  // create a new virtual register.
+  // After we processed all basic blocks we will satisfy this "upwards exposed
+  // use" by inserting a copy or phi at the beginning of this block.
+  if (It == VRegDefMap.end()) {
+    auto &DL = MF->getDataLayout();
+    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+    VRegDefMap[Key] = VReg;
+    VRegUpwardsUse[Key] = VReg;
+    return VReg;
+  } else
+    return It->second;
+}
+
+void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB,
+                                             const Value *Val, unsigned VReg) {
+  VRegDefMap[std::make_pair(MBB, Val)] = VReg;
+}
+
+unsigned SwiftErrorValueTracking::getOrCreateVRegDefAt(
+    const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+  auto It = VRegDefUses.find(Key);
+  if (It != VRegDefUses.end())
+    return It->second;
+
+  auto &DL = MF->getDataLayout();
+  const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+  unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+  VRegDefUses[Key] = VReg;
+  setCurrentVReg(MBB, Val, VReg);
+  return VReg;
+}
+
+unsigned SwiftErrorValueTracking::getOrCreateVRegUseAt(
+    const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+  auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+  auto It = VRegDefUses.find(Key);
+  if (It != VRegDefUses.end())
+    return It->second;
+
+  unsigned VReg = getOrCreateVReg(MBB, Val);
+  VRegDefUses[Key] = VReg;
+  return VReg;
+}
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+void SwiftErrorValueTracking::setFunction(MachineFunction &mf) {
+  MF = &mf;
+  Fn = &MF->getFunction();
+  TLI = MF->getSubtarget().getTargetLowering();
+  TII = MF->getSubtarget().getInstrInfo();
+
+  if (!TLI->supportSwiftError())
+    return;
+
+  SwiftErrorVals.clear();
+  VRegDefMap.clear();
+  VRegUpwardsUse.clear();
+  VRegDefUses.clear();
+  SwiftErrorArg = nullptr;
+
+  // Check if function has a swifterror argument.
+  bool HaveSeenSwiftErrorArg = false;
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+       AI != AE; ++AI)
+    if (AI->hasSwiftErrorAttr()) {
+      assert(!HaveSeenSwiftErrorArg &&
+             "Must have only one swifterror parameter");
+      (void)HaveSeenSwiftErrorArg; // silence warning.
+      HaveSeenSwiftErrorArg = true;
+      SwiftErrorArg = &*AI;
+      SwiftErrorVals.push_back(&*AI);
+    }
+
+  for (const auto &LLVMBB : *Fn)
+    for (const auto &Inst : LLVMBB) {
+      if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+        if (Alloca->isSwiftError())
+          SwiftErrorVals.push_back(Alloca);
+    }
+}
+
+bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) {
+  if (!TLI->supportSwiftError())
+    return false;
+
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (SwiftErrorVals.empty())
+    return false;
+
+  MachineBasicBlock *MBB = &*MF->begin();
+  auto &DL = MF->getDataLayout();
+  auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+  bool Inserted = false;
+  for (const auto *SwiftErrorVal : SwiftErrorVals) {
+    // We will always generate a copy from the argument. It is always used at
+    // least by the 'return' of the swifterror.
+    if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal)
+      continue;
+    unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+    // Assign Undef to Vreg. We construct MI directly to make sure it works
+    // with FastISel.
+    BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc,
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+
+    setCurrentVReg(MBB, SwiftErrorVal, VReg);
+    Inserted = true;
+  }
+
+  return Inserted;
+}
+
+/// Propagate swifterror values through the machine function CFG.
+void SwiftErrorValueTracking::propagateVRegs() {
+  if (!TLI->supportSwiftError())
+    return;
+
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (SwiftErrorVals.empty())
+    return;
+
+  // For each machine basic block in reverse post order.
+  ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+  for (MachineBasicBlock *MBB : RPOT) {
+    // For each swifterror value in the function.
+    for (const auto *SwiftErrorVal : SwiftErrorVals) {
+      auto Key = std::make_pair(MBB, SwiftErrorVal);
+      auto UUseIt = VRegUpwardsUse.find(Key);
+      auto VRegDefIt = VRegDefMap.find(Key);
+      bool UpwardsUse = UUseIt != VRegUpwardsUse.end();
+      unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
+      bool DownwardDef = VRegDefIt != VRegDefMap.end();
+      assert(!(UpwardsUse && !DownwardDef) &&
+             "We can't have an upwards use but no downwards def");
+
+      // If there is no upwards exposed use and an entry for the swifterror in
+      // the def map for this value we don't need to do anything: We already
+      // have a downward def for this basic block.
+      if (!UpwardsUse && DownwardDef)
+        continue;
+
+      // Otherwise we either have an upwards exposed use vreg that we need to
+      // materialize or need to forward the downward def from predecessors.
+
+      // Check whether we have a single vreg def from all predecessors.
+      // Otherwise we need a phi.
+      SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
+      SmallSet<const MachineBasicBlock *, 8> Visited;
+      for (auto *Pred : MBB->predecessors()) {
+        if (!Visited.insert(Pred).second)
+          continue;
+        VRegs.push_back(std::make_pair(
+            Pred, getOrCreateVReg(Pred, SwiftErrorVal)));
+        if (Pred != MBB)
+          continue;
+        // We have a self-edge.
+        // If there was no upwards use in this basic block there is now one: the
+        // phi needs to use it self.
+        if (!UpwardsUse) {
+          UpwardsUse = true;
+          UUseIt = VRegUpwardsUse.find(Key);
+          assert(UUseIt != VRegUpwardsUse.end());
+          UUseVReg = UUseIt->second;
+        }
+      }
+
+      // We need a phi node if we have more than one predecessor with different
+      // downward defs.
+      bool needPHI =
+          VRegs.size() >= 1 &&
+          std::find_if(
+              VRegs.begin(), VRegs.end(),
+              [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
+                  -> bool { return V.second != VRegs[0].second; }) !=
+              VRegs.end();
+
+      // If there is no upwards exposed used and we don't need a phi just
+      // forward the swifterror vreg from the predecessor(s).
+      if (!UpwardsUse && !needPHI) {
+        assert(!VRegs.empty() &&
+               "No predecessors? The entry block should bail out earlier");
+        // Just forward the swifterror vreg from the predecessor(s).
+        setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second);
+        continue;
+      }
+
+      auto DLoc = isa<Instruction>(SwiftErrorVal)
+                      ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+                      : DebugLoc();
+      const auto *TII = MF->getSubtarget().getInstrInfo();
+
+      // If we don't need a phi create a copy to the upward exposed vreg.
+      if (!needPHI) {
+        assert(UpwardsUse);
+        assert(!VRegs.empty() &&
+               "No predecessors?  Is the Calling Convention correct?");
+        unsigned DestReg = UUseVReg;
+        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+                DestReg)
+            .addReg(VRegs[0].second);
+        continue;
+      }
+
+      // We need a phi: if there is an upwards exposed use we already have a
+      // destination virtual register number otherwise we generate a new one.
+      auto &DL = MF->getDataLayout();
+      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+      unsigned PHIVReg =
+          UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC);
+      MachineInstrBuilder PHI =
+          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+                  TII->get(TargetOpcode::PHI), PHIVReg);
+      for (auto BBRegPair : VRegs) {
+        PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+      }
+
+      // We did not have a definition in this block before: store the phi's vreg
+      // as this block downward exposed def.
+      if (!UpwardsUse)
+        setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
+    }
+  }
+}
+
+void SwiftErrorValueTracking::preassignVRegs(
+    MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+    BasicBlock::const_iterator End) {
+  if (!TLI->supportSwiftError() || SwiftErrorVals.empty())
+    return;
+
+  // Iterator over instructions and assign vregs to swifterror defs and uses.
+  for (auto It = Begin; It != End; ++It) {
+    ImmutableCallSite CS(&*It);
+    if (CS) {
+      // A call-site with a swifterror argument is both use and def.
+      const Value *SwiftErrorAddr = nullptr;
+      for (auto &Arg : CS.args()) {
+        if (!Arg->isSwiftError())
+          continue;
+        // Use of swifterror.
+        assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+        SwiftErrorAddr = &*Arg;
+        assert(SwiftErrorAddr->isSwiftError() &&
+               "Must have a swifterror value argument");
+        getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr);
+      }
+      if (!SwiftErrorAddr)
+        continue;
+
+      // Def of swifterror.
+      getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+      // A load is a use.
+    } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+      const Value *V = LI->getOperand(0);
+      if (!V->isSwiftError())
+        continue;
+
+      getOrCreateVRegUseAt(LI, MBB, V);
+
+      // A store is a def.
+    } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+      const Value *SwiftErrorAddr = SI->getOperand(1);
+      if (!SwiftErrorAddr->isSwiftError())
+        continue;
+
+      // Def of swifterror.
+      getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+      // A return in a swiferror returning function is a use.
+    } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+      const Function *F = R->getParent()->getParent();
+      if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+        continue;
+
+      getOrCreateVRegUseAt(R, MBB, SwiftErrorArg);
+    }
+  }
+}

From 3b2157aeed845b0cf70f38cf7d3b29da50291cf8 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Fri, 24 May 2019 08:40:13 +0000
Subject: [PATCH 0142/1176] GlobalISel: support swifterror attribute on
 AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608
---
 .../llvm/CodeGen/GlobalISel/CallLowering.h    |  56 +-
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    |   3 +
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |  14 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  89 ++-
 .../Target/AArch64/AArch64CallLowering.cpp    |  16 +-
 llvm/lib/Target/AArch64/AArch64CallLowering.h |  14 +-
 .../AArch64/GlobalISel/arm64-fallback.ll      |  21 +-
 .../CodeGen/AArch64/GlobalISel/swifterror.ll  | 518 ++++++++++++++++++
 8 files changed, 684 insertions(+), 47 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 426906af34299..33152e9779ad4 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -147,16 +147,39 @@ class CallLowering {
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
   virtual ~CallLowering() = default;
 
+  /// \return true if the target is capable of handling swifterror values that
+  /// have been promoted to a specified register. The extended versions of
+  /// lowerReturn and lowerCall should be implemented.
+  virtual bool supportSwiftError() const {
+    return false;
+  }
+
   /// This hook must be implemented to lower outgoing return values, described
   /// by \p Val, into the specified virtual registers \p VRegs.
   /// This hook is used by GlobalISel.
   ///
+  /// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter
+  /// that needs to be implicitly returned.
+  ///
   /// \return True if the lowering succeeds, false otherwise.
+  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+                           ArrayRef<unsigned> VRegs,
+                           unsigned SwiftErrorVReg) const {
+    if (!supportSwiftError()) {
+      assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror");
+      return lowerReturn(MIRBuilder, Val, VRegs);
+    }
+    return false;
+  }
+
+  /// This hook behaves as the extended lowerReturn function, but for targets
+  /// that do not support swifterror value promotion.
   virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
                            ArrayRef<unsigned> VRegs) const {
     return false;
   }
 
+
   /// This hook must be implemented to lower the incoming (formal)
   /// arguments, described by \p Args, for GlobalISel. Each argument
   /// must end up in the related virtual register described by VRegs.
@@ -180,18 +203,29 @@ class CallLowering {
   /// \p Callee is the destination of the call. It should be either a register,
   /// globaladdress, or externalsymbol.
   ///
-  /// \p ResTy is the type returned by the function
+  /// \p OrigRet is a descriptor for the return type of the function.
   ///
-  /// \p ResReg is the generic virtual register that the returned
-  /// value should be lowered into.
+  /// \p OrigArgs is a list of descriptors of the arguments passed to the
+  /// function.
   ///
-  /// \p ArgTys is a list of the types each member of \p ArgRegs has; used by
-  /// the target to decide which register/stack slot should be allocated.
-  ///
-  /// \p ArgRegs is a list of virtual registers containing each argument that
-  /// needs to be passed.
+  /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+  /// parameter, and contains the vreg that the swifterror should be copied into
+  /// after the call.
   ///
   /// \return true if the lowering succeeded, false otherwise.
+  virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+                         const MachineOperand &Callee, const ArgInfo &OrigRet,
+                         ArrayRef<ArgInfo> OrigArgs,
+                         unsigned SwiftErrorVReg) const {
+    if (!supportSwiftError()) {
+      assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror");
+      return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs);
+    }
+    return false;
+  }
+
+  /// This hook behaves as the extended lowerCall function, but for targets that
+  /// do not support swifterror value promotion.
   virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                          const MachineOperand &Callee, const ArgInfo &OrigRet,
                          ArrayRef<ArgInfo> OrigArgs) const {
@@ -209,6 +243,10 @@ class CallLowering {
   /// \p ArgRegs is a list of virtual registers containing each argument that
   /// needs to be passed.
   ///
+  /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+  /// parameter, and contains the vreg that the swifterror should be copied into
+  /// after the call.
+  ///
   /// \p GetCalleeReg is a callback to materialize a register for the callee if
   /// the target determines it cannot jump to the destination based purely on \p
   /// CI. This might be because \p CI is indirect, or because of the limited
@@ -217,7 +255,9 @@ class CallLowering {
   /// \return true if the lowering succeeded, false otherwise.
   bool lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
                  unsigned ResReg, ArrayRef<unsigned> ArgRegs,
+                 unsigned SwiftErrorVReg,
                  std::function<unsigned()> GetCalleeReg) const;
+
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index c75d823501acd..7433a4760151b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Types.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Allocator.h"
@@ -163,6 +164,8 @@ class IRTranslator : public MachineFunctionPass {
   /// this function.
   DenseMap<const AllocaInst *, int> FrameIndices;
 
+  SwiftErrorValueTracking SwiftError;
+
   /// \name Methods for translating form LLVM IR to MachineInstr.
   /// \see ::translate for general information on the translate methods.
   /// @{
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b6b1bef26007e..f144b18aa6358 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -26,9 +26,10 @@ using namespace llvm;
 
 void CallLowering::anchor() {}
 
-bool CallLowering::lowerCall(
-    MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
-    ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+                             unsigned ResReg, ArrayRef<unsigned> ArgRegs,
+                             unsigned SwiftErrorVReg,
+                             std::function<unsigned()> GetCalleeReg) const {
   auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
 
   // First step is to marshall all the function's parameters into the correct
@@ -41,8 +42,8 @@ bool CallLowering::lowerCall(
     ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
                     i < NumFixedArgs};
     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
-    // We don't currently support swifterror or swiftself args.
-    if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf())
+    // We don't currently support swiftself args.
+    if (OrigArg.Flags.isSwiftSelf())
       return false;
     OrigArgs.push_back(OrigArg);
     ++i;
@@ -58,7 +59,8 @@ bool CallLowering::lowerCall(
   if (!OrigRet.Ty->isVoidTy())
     setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
 
-  return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
+  return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
+                   SwiftErrorVReg);
 }
 
 template <typename FuncInfoTy>
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 4ac720181edcf..b1a53c540247a 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -354,11 +354,16 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
   if (Ret)
     VRegs = getOrCreateVRegs(*Ret);
 
+  unsigned SwiftErrorVReg = 0;
+  if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
+    SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
+        &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
+  }
+
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
-
-  return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
+  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
 }
 
 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -447,6 +452,14 @@ bool IRTranslator::translateIndirectBr(const User &U,
   return true;
 }
 
+static bool isSwiftError(const Value *V) {
+  if (auto Arg = dyn_cast<Argument>(V))
+    return Arg->hasSwiftErrorAttr();
+  if (auto AI = dyn_cast<AllocaInst>(V))
+    return AI->isSwiftError();
+  return false;
+}
+
 bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
   const LoadInst &LI = cast<LoadInst>(U);
 
@@ -464,6 +477,15 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
   Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
 
+  if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
+    assert(Regs.size() == 1 && "swifterror should be single pointer");
+    unsigned VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
+                                                    LI.getPointerOperand());
+    MIRBuilder.buildCopy(Regs[0], VReg);
+    return true;
+  }
+
+
   for (unsigned i = 0; i < Regs.size(); ++i) {
     unsigned Addr = 0;
     MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
@@ -496,6 +518,15 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
   Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
 
+  if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
+    assert(Vals.size() == 1 && "swifterror should be single pointer");
+
+    unsigned VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
+                                                    SI.getPointerOperand());
+    MIRBuilder.buildCopy(VReg, Vals[0]);
+    return true;
+  }
+
   for (unsigned i = 0; i < Vals.size(); ++i) {
     unsigned Addr = 0;
     MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
@@ -1154,16 +1185,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
                                : getOrCreateVReg(CI);
 
     SmallVector<unsigned, 8> Args;
-    for (auto &Arg: CI.arg_operands())
+    unsigned SwiftErrorVReg = 0;
+    for (auto &Arg: CI.arg_operands()) {
+      if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+        LLT Ty = getLLTForType(*Arg->getType(), *DL);
+        unsigned InVReg = MRI->createGenericVirtualRegister(Ty);
+        MIRBuilder.buildCopy(InVReg, SwiftError.getOrCreateVRegUseAt(
+                                         &CI, &MIRBuilder.getMBB(), Arg));
+        Args.push_back(InVReg);
+        SwiftErrorVReg =
+            SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
+        continue;
+      }
       Args.push_back(packRegs(*Arg, MIRBuilder));
+    }
 
     MF->getFrameInfo().setHasCalls(true);
-    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
-      return getOrCreateVReg(*CI.getCalledValue());
-    });
+    bool Success =
+        CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
+                       [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
 
     if (IsSplitType)
       unpackRegs(CI, Res, MIRBuilder);
+
     return Success;
   }
 
@@ -1239,10 +1283,23 @@ bool IRTranslator::translateInvoke(const User &U,
   if (!I.getType()->isVoidTy())
     Res = MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
   SmallVector<unsigned, 8> Args;
-  for (auto &Arg: I.arg_operands())
+  unsigned SwiftErrorVReg = 0;
+  for (auto &Arg : I.arg_operands()) {
+    if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+      LLT Ty = getLLTForType(*Arg->getType(), *DL);
+      unsigned InVReg = MRI->createGenericVirtualRegister(Ty);
+      MIRBuilder.buildCopy(InVReg, SwiftError.getOrCreateVRegUseAt(
+                                       &I, &MIRBuilder.getMBB(), Arg));
+      Args.push_back(InVReg);
+      SwiftErrorVReg =
+          SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+      continue;
+    }
+
     Args.push_back(packRegs(*Arg, MIRBuilder));
+  }
 
-  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
                       [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
     return false;
 
@@ -1331,7 +1388,7 @@ bool IRTranslator::translateAlloca(const User &U,
   auto &AI = cast<AllocaInst>(U);
 
   if (AI.isSwiftError())
-    return false;
+    return true;
 
   if (AI.isStaticAlloca()) {
     unsigned Res = getOrCreateVReg(AI);
@@ -1776,6 +1833,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
   MF->push_back(EntryBB);
   EntryBuilder->setMBB(*EntryBB);
 
+  DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
+  SwiftError.setFunction(CurMF);
+  SwiftError.createEntriesInEntryBlock(DbgLoc);
+
   // Create all blocks, in IR order, to preserve the layout.
   for (const BasicBlock &BB: F) {
     auto *&MBB = BBToMBB[&BB];
@@ -1797,14 +1858,18 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
       continue; // Don't handle zero sized types.
     VRegArgs.push_back(
         MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
+
+    if (Arg.hasSwiftErrorAttr())
+      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(),
+                                VRegArgs.back());
   }
 
   // We don't currently support translating swifterror or swiftself functions.
   for (auto &Arg : F.args()) {
-    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
+    if (Arg.hasSwiftSelfAttr()) {
       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                  F.getSubprogram(), &F.getEntryBlock());
-      R << "unable to lower arguments due to swifterror/swiftself: "
+      R << "unable to lower arguments due to swiftself: "
         << ore::NV("Prototype", F.getType());
       reportTranslationError(*MF, *TPC, *ORE, R);
       return false;
@@ -1880,6 +1945,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 
   finishPendingPhis();
 
+  SwiftError.propagateVRegs();
+
   // Merge the argument lowering and constants block with its single
   // successor, the LLVM-IR entry block.  We want the basic block to
   // be maximal.
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index c568cd9b7c3e3..9d04dd8658347 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -232,7 +232,8 @@ void AArch64CallLowering::splitToValueTypes(
 
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
                                       const Value *Val,
-                                      ArrayRef<unsigned> VRegs) const {
+                                      ArrayRef<unsigned> VRegs,
+                                      unsigned SwiftErrorVReg) const {
   auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
          "Return value without a vreg");
@@ -340,6 +341,11 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
     Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
   }
 
+  if (SwiftErrorVReg) {
+    MIB.addUse(AArch64::X21, RegState::Implicit);
+    MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
+  }
+
   MIRBuilder.insertInstr(MIB);
   return Success;
 }
@@ -420,7 +426,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
                                     CallingConv::ID CallConv,
                                     const MachineOperand &Callee,
                                     const ArgInfo &OrigRet,
-                                    ArrayRef<ArgInfo> OrigArgs) const {
+                                    ArrayRef<ArgInfo> OrigArgs,
+                                    unsigned SwiftErrorVReg) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -503,6 +510,11 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
   }
 
+  if (SwiftErrorVReg) {
+    MIB.addDef(AArch64::X21, RegState::Implicit);
+    MIRBuilder.buildCopy(SwiftErrorVReg, AArch64::X21);
+  }
+
   CallSeqStart.addImm(Handler.StackSize).addImm(0);
   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
       .addImm(Handler.StackSize)
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.h b/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 76337567c7a50..6aab6bd170317 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -34,14 +34,24 @@ class AArch64CallLowering: public CallLowering {
   AArch64CallLowering(const AArch64TargetLowering &TLI);
 
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                   ArrayRef<unsigned> VRegs) const override;
+                   ArrayRef<unsigned> VRegs,
+                   unsigned SwiftErrorVReg) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
                             ArrayRef<unsigned> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
-                 ArrayRef<ArgInfo> OrigArgs) const override;
+                 ArrayRef<ArgInfo> OrigArgs,
+                 unsigned SwiftErrorVReg) const override;
+
+  bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+                 const MachineOperand &Callee, const ArgInfo &OrigRet,
+                 ArrayRef<ArgInfo> OrigArgs) const override {
+    return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0);
+  }
+
+  bool supportSwiftError() const override { return true; }
 
 private:
   using RegHandler = std::function<void(MachineIRBuilder &, Type *, unsigned,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index a0c3af5c1b56a..fa6f19a5cfa01 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -230,23 +230,8 @@ define void @nonpow2_vector_add_fewerelements() {
 
 %swift_error = type {i64, i8}
 
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to lower arguments due to swifterror/swiftself: void (%swift_error**)* (in function: swifterror_param)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for swifterror_param
-define void @swifterror_param(%swift_error** swifterror %error_ptr_ref) {
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to lower arguments due to swiftself: void (%swift_error**)* (in function: swiftself_param)
+; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for swiftself_param
+define void @swiftself_param(%swift_error** swiftself %error_ptr_ref) {
   ret void
 }
-
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: alloca: '  %error_ptr_ref = alloca swifterror %swift_error*' (in function: swifterror_alloca)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for swifterror_alloca
-; We can't currently test the call parameters being swifterror because the value
-; must come from a swifterror alloca or parameter, at which point we already
-; fallback. As long as those cases work however we should be fine.
-define void @swifterror_alloca(i8* %error_ref) {
-entry:
-  %error_ptr_ref = alloca swifterror %swift_error*
-  store %swift_error* null, %swift_error** %error_ptr_ref
-  call void @swifterror_param(%swift_error** swifterror %error_ptr_ref)
-  ret void
-}
-
-
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
new file mode 100644
index 0000000000000..83e48a6a504c7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll
@@ -0,0 +1,518 @@
+; RUN: llc -verify-machineinstrs -frame-pointer=all -global-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck %s
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+%swift_error = type {i64, i8}
+
+; This tests the basic usage of a swifterror parameter. "foo" is the function
+; that takes a swifterror parameter and "caller" is the caller of "foo".
+define float @foo(%swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: foo:
+; CHECK: mov [[ID:w[0-9]+]], #1
+; CHECK: mov x0, #16
+; CHECK: malloc
+; CHECK: strb [[ID]], [x0, #8]
+; CHECK: mov x21, x0
+; CHECK-NOT: x21
+
+entry:
+  %call = call i8* @malloc(i64 16)
+  %call.0 = bitcast i8* %call to %swift_error*
+  store %swift_error* %call.0, %swift_error** %error_ptr_ref
+  %tmp = getelementptr inbounds i8, i8* %call, i64 8
+  store i8 1, i8* %tmp
+  ret float 1.0
+}
+
+; "caller" calls "foo" that takes a swifterror parameter.
+define float @caller(i8* %error_ref) {
+; CHECK-LABEL: caller:
+; CHECK: mov [[ID:x[0-9]+]], x0
+; CHECK: mov [[ZERO:x[0-9]+]], #0
+; CHECK: mov x21, #0
+; CHECK: bl {{.*}}foo
+; CHECK: mov x0, x21
+; CHECK: cmp x21, [[ZERO]]
+; CHECK: b.ne
+; Access part of the error object and save it to error_ref
+; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
+; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK: bl {{.*}}free
+
+entry:
+  %error_ptr_ref = alloca swifterror %swift_error*
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  %call = call float @foo(%swift_error** swifterror %error_ptr_ref)
+  %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+  %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+  %tmp = bitcast %swift_error* %error_from_foo to i8*
+  br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+  %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+  %t = load i8, i8* %v1
+  store i8 %t, i8* %error_ref
+  br label %handler
+handler:
+  call void @free(i8* %tmp)
+  ret float 1.0
+}
+
+; "caller2" is the caller of "foo", it calls "foo" inside a loop.
+define float @caller2(i8* %error_ref) {
+; CHECK-LABEL: caller2:
+; CHECK: mov [[ID:x[0-9]+]], x0
+; CHECK: mov [[ZERO:x[0-9]+]], #0
+; CHECK: fmov [[CMP:s[0-9]+]], #1.0
+; CHECK: mov x21, #0
+; CHECK: bl {{.*}}foo
+; CHECK: cmp x21, [[ZERO]]
+; CHECK: b.ne
+; CHECK: fcmp s0, [[CMP]]
+; CHECK: b.le
+; Access part of the error object and save it to error_ref
+; CHECK: ldrb [[CODE:w[0-9]+]], [x21, #8]
+; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK: mov x0, x21
+; CHECK: bl {{.*}}free
+
+entry:
+  %error_ptr_ref = alloca swifterror %swift_error*
+  br label %bb_loop
+bb_loop:
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  %call = call float @foo(%swift_error** swifterror %error_ptr_ref)
+  %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+  %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+  %tmp = bitcast %swift_error* %error_from_foo to i8*
+  br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+  %cmp = fcmp ogt float %call, 1.000000e+00
+  br i1 %cmp, label %bb_end, label %bb_loop
+bb_end:
+  %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+  %t = load i8, i8* %v1
+  store i8 %t, i8* %error_ref
+  br label %handler
+handler:
+  call void @free(i8* %tmp)
+  ret float 1.0
+}
+
+; "foo_if" is a function that takes a swifterror parameter, it sets swifterror
+; under a certain condition.
+define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
+; CHECK-LABEL: foo_if:
+; CHECK: cbz w0
+; CHECK: mov [[ID:w[0-9]+]], #1
+; CHECK: mov x0, #16
+; CHECK: malloc
+; CHECK: strb [[ID]], [x0, #8]
+; CHECK: mov x21, x0
+; CHECK-NOT: x21
+; CHECK: ret
+
+entry:
+  %cond = icmp ne i32 %cc, 0
+  br i1 %cond, label %gen_error, label %normal
+
+gen_error:
+  %call = call i8* @malloc(i64 16)
+  %call.0 = bitcast i8* %call to %swift_error*
+  store %swift_error* %call.0, %swift_error** %error_ptr_ref
+  %tmp = getelementptr inbounds i8, i8* %call, i64 8
+  store i8 1, i8* %tmp
+  ret float 1.0
+
+normal:
+  ret float 0.0
+}
+
+; "foo_loop" is a function that takes a swifterror parameter, it sets swifterror
+; under a certain condition inside a loop.
+define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float %cc2) {
+; CHECK-LABEL: foo_loop:
+; CHECK: cbz
+; CHECK: mov x0, #16
+; CHECK: malloc
+; CHECK: mov x21, x0
+; CHECK: strb w{{.*}}, [x0, #8]
+; CHECK: fcmp
+; CHECK: b.le
+; CHECK: ret
+
+entry:
+  br label %bb_loop
+
+bb_loop:
+  %cond = icmp ne i32 %cc, 0
+  br i1 %cond, label %gen_error, label %bb_cont
+
+gen_error:
+  %call = call i8* @malloc(i64 16)
+  %call.0 = bitcast i8* %call to %swift_error*
+  store %swift_error* %call.0, %swift_error** %error_ptr_ref
+  %tmp = getelementptr inbounds i8, i8* %call, i64 8
+  store i8 1, i8* %tmp
+  br label %bb_cont
+
+bb_cont:
+  %cmp = fcmp ogt float %cc2, 1.000000e+00
+  br i1 %cmp, label %bb_end, label %bb_loop
+bb_end:
+  ret float 0.0
+}
+
+%struct.S = type { i32, i32, i32, i32, i32, i32 }
+
+; "foo_sret" is a function that takes a swifterror parameter, it also has a sret
+; parameter.
+define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) {
+; CHECK-LABEL: foo_sret:
+; CHECK: mov [[SRET:x[0-9]+]], x8
+; CHECK: mov [[ID:w[0-9]+]], #1
+; CHECK: mov x0, #16
+; CHECK: malloc
+; CHECK: strb [[ID]], [x0, #8]
+; CHECK: str w{{.*}}, [{{.*}}[[SRET]], #4]
+; CHECK: mov x21, x0
+; CHECK-NOT: x21
+
+entry:
+  %call = call i8* @malloc(i64 16)
+  %call.0 = bitcast i8* %call to %swift_error*
+  store %swift_error* %call.0, %swift_error** %error_ptr_ref
+  %tmp = getelementptr inbounds i8, i8* %call, i64 8
+  store i8 1, i8* %tmp
+  %v2 = getelementptr inbounds %struct.S, %struct.S* %agg.result, i32 0, i32 1
+  store i32 %val1, i32* %v2
+  ret void
+}
+
+; "caller3" calls "foo_sret" that takes a swifterror parameter.
+define float @caller3(i8* %error_ref) {
+; CHECK-LABEL: caller3:
+; CHECK: mov [[ID:x[0-9]+]], x0
+; CHECK: mov [[ZERO:x[0-9]+]], #0
+; CHECK: mov x21, #0
+; CHECK: bl {{.*}}foo_sret
+; CHECK: mov x0, x21
+; CHECK: cmp x21, [[ZERO]]
+; CHECK: b.ne
+; Access part of the error object and save it to error_ref
+; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
+; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK: bl {{.*}}free
+
+entry:
+  %s = alloca %struct.S, align 8
+  %error_ptr_ref = alloca swifterror %swift_error*
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call void @foo_sret(%struct.S* sret %s, i32 1, %swift_error** swifterror %error_ptr_ref)
+  %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+  %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+  %tmp = bitcast %swift_error* %error_from_foo to i8*
+  br i1 %had_error_from_foo, label %handler, label %cont
+cont:
+  %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+  %t = load i8, i8* %v1
+  store i8 %t, i8* %error_ref
+  br label %handler
+handler:
+  call void @free(i8* %tmp)
+  ret float 1.0
+}
+
+; "foo_vararg" is a function that takes a swifterror parameter, it also has
+; variable number of arguments.
+declare void @llvm.va_start(i8*) nounwind
+define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
+; CHECK-LABEL: foo_vararg:
+; CHECK-DAG: mov [[ID:w[0-9]+]], #1
+; CHECK: mov x0, #16
+; CHECK: malloc
+; CHECK-DAG: strb [[ID]], [x0, #8]
+
+; First vararg
+; CHECK: ldr {{w[0-9]+}}, [x[[ARG1:[0-9]+]]]
+; Second vararg
+; CHECK: mov [[EIGHT:x[0-9]+]], #8
+; CHECK: add x[[ARG2:[0-9]+]], x[[ARG1]], [[EIGHT]]
+; CHECK: ldr {{w[0-9]+}}, [x[[ARG2]]]
+; Third vararg
+; CHECK: add x[[ARG3:[0-9]+]], x[[ARG2]], [[EIGHT]]
+; CHECK: ldr {{w[0-9]+}}, [x[[ARG3]]]
+
+; CHECK: mov x21, x0
+; CHECK-NOT: x21
+entry:
+  %call = call i8* @malloc(i64 16)
+  %call.0 = bitcast i8* %call to %swift_error*
+  store %swift_error* %call.0, %swift_error** %error_ptr_ref
+  %tmp = getelementptr inbounds i8, i8* %call, i64 8
+  store i8 1, i8* %tmp
+
+  %args = alloca i8*, align 8
+  %a10 = alloca i32, align 4
+  %a11 = alloca i32, align 4
+  %a12 = alloca i32, align 4
+  %v10 = bitcast i8** %args to i8*
+  call void @llvm.va_start(i8* %v10)
+  %v11 = va_arg i8** %args, i32
+  store i32 %v11, i32* %a10, align 4
+  %v12 = va_arg i8** %args, i32
+  store i32 %v12, i32* %a11, align 4
+  %v13 = va_arg i8** %args, i32
+  store i32 %v13, i32* %a12, align 4
+
+  ret float 1.0
+}
+
+; "caller4" calls "foo_vararg" that takes a swifterror parameter.
+define float @caller4(i8* %error_ref) {
+; CHECK-LABEL: caller4:
+
+; CHECK: mov [[ID:x[0-9]+]], x0
+; CHECK: mov [[ZERO:x[0-9]+]], #0
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
+; CHECK: mov x21, #0
+; CHECK: str {{x[0-9]+}}, [sp, #16]
+
+; CHECK: bl {{.*}}foo_vararg
+; CHECK: mov x0, x21
+; CHECK: cmp x21, [[ZERO]]
+; CHECK: b.ne
+; Access part of the error object and save it to error_ref
+; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8]
+; CHECK: strb [[CODE]], [{{.*}}[[ID]]]
+; CHECK: bl {{.*}}free
+entry:
+  %error_ptr_ref = alloca swifterror %swift_error*
+  store %swift_error* null, %swift_error** %error_ptr_ref
+
+  %a10 = alloca i32, align 4
+  %a11 = alloca i32, align 4
+  %a12 = alloca i32, align 4
+  store i32 10, i32* %a10, align 4
+  store i32 11, i32* %a11, align 4
+  store i32 12, i32* %a12, align 4
+  %v10 = load i32, i32* %a10, align 4
+  %v11 = load i32, i32* %a11, align 4
+  %v12 = load i32, i32* %a12, align 4
+
+  %call = call float (%swift_error**, ...) @foo_vararg(%swift_error** swifterror %error_ptr_ref, i32 %v10, i32 %v11, i32 %v12)
+  %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref
+  %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null
+  %tmp = bitcast %swift_error* %error_from_foo to i8*
+  br i1 %had_error_from_foo, label %handler, label %cont
+
+cont:
+  %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1
+  %t = load i8, i8* %v1
+  store i8 %t, i8* %error_ref
+  br label %handler
+handler:
+  call void @free(i8* %tmp)
+  ret float 1.0
+}
+
+; Check that we don't blow up on tail calling swifterror argument functions.
+define float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+  %0 = tail call float @tailcallswifterror(%swift_error** swifterror %error_ptr_ref)
+  ret float %0
+}
+define swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref) {
+entry:
+  %0 = tail call swiftcc float @tailcallswifterror_swiftcc(%swift_error** swifterror %error_ptr_ref)
+  ret float %0
+}
+
+; CHECK-LABEL: params_in_reg
+; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2.
+; CHECK:  stp     x28, x0, [sp
+; CHECK:  stp     x27, x26, [sp
+; CHECK:  stp     x25, x24, [sp
+; CHECK:  stp     x23, x22, [sp
+; CHECK:  stp     x20, x19, [sp
+; CHECK:  stp     x29, x30, [sp
+; Store argument registers.
+; CHECK:  mov      x20, x1
+; CHECK:  mov      x22, x2
+; CHECK:  mov      x23, x3
+; CHECK:  mov      x24, x4
+; CHECK:  mov      x25, x5
+; CHECK:  mov      x26, x6
+; CHECK:  mov      x27, x7
+; CHECK:  mov      x28, x21
+; Setup call.
+; CHECK:  mov     x8, #0
+; CHECK:  mov     x0, #1
+; CHECK:  mov     x1, #2
+; CHECK:  mov     x2, #3
+; CHECK:  mov     x3, #4
+; CHECK:  mov     x4, #5
+; CHECK:  mov     x5, #6
+; CHECK:  mov     x6, #7
+; CHECK:  mov     x7, #8
+; CHECK:  mov      x21, #0
+; CHECK:  bl      _params_in_reg2
+; Restore original arguments for next call.
+; CHECK:  ldr      x0, [sp
+; CHECK:  mov      x1, x20
+; CHECK:  mov      x2, x22
+; CHECK:  mov      x3, x23
+; CHECK:  mov      x4, x24
+; CHECK:  mov      x5, x25
+; CHECK:  mov      x6, x26
+; CHECK:  mov      x7, x27
+; Restore original swiftself argument and swifterror %err.
+; CHECK:  mov      x21, x28
+; CHECK:  bl      _params_in_reg2
+; Restore calle save registers but don't clober swifterror x21.
+; CHECK-NOT: x21
+; CHECK:  ldp     x29, x30, [sp
+; CHECK-NOT: x21
+; CHECK:  ldp     x20, x19, [sp
+; CHECK-NOT: x21
+; CHECK:  ldp     x23, x22, [sp
+; CHECK-NOT: x21
+; CHECK:  ldp     x25, x24, [sp
+; CHECK-NOT: x21
+; CHECK:  ldp     x27, x26, [sp
+; CHECK-NOT: x21
+; CHECK:  ldr     x28, [sp
+; CHECK-NOT: x21
+; CHECK:  ret
+define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8*, %swift_error** nocapture swifterror %err) {
+  %error_ptr_ref = alloca swifterror %swift_error*, align 8
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8*  null, %swift_error** nocapture swifterror %error_ptr_ref)
+  call swiftcc void @params_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8*  %8, %swift_error** nocapture swifterror %err)
+  ret void
+}
+declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err)
+
+; CHECK-LABEL: params_and_return_in_reg
+; Store callee saved registers.
+; CHECK:  stp     x28, x0, [sp, #16
+; CHECK:  stp     x27, x26, [sp
+; CHECK:  stp     x25, x24, [sp
+; CHECK:  stp     x23, x22, [sp
+; CHECK:  stp     x20, x19, [sp
+; CHECK:  stp     x29, x30, [sp
+; Save original arguments.
+; CHECK:  mov      x20, x1
+; CHECK:  mov      x22, x2
+; CHECK:  mov      x23, x3
+; CHECK:  mov      x24, x4
+; CHECK:  mov      x25, x5
+; CHECK:  mov      x26, x6
+; CHECK:  mov      x27, x7
+; CHECK:  mov      x28, x21
+; Setup call arguments.
+; CHECK:  mov     x0, #1
+; CHECK:  mov     x1, #2
+; CHECK:  mov     x2, #3
+; CHECK:  mov     x3, #4
+; CHECK:  mov     x4, #5
+; CHECK:  mov     x5, #6
+; CHECK:  mov     x6, #7
+; CHECK:  mov     x7, #8
+; CHECK:  mov      x21, #0
+; CHECK:  bl      _params_in_reg2
+; Store swifterror %error_ptr_ref.
+; CHECK:  stp     {{x[0-9]+}}, x21, [sp]
+; Setup call arguments from original arguments.
+; CHECK:  ldr      x0, [sp, #24
+; CHECK:  mov      x1, x20
+; CHECK:  mov      x2, x22
+; CHECK:  mov      x3, x23
+; CHECK:  mov      x4, x24
+; CHECK:  mov      x5, x25
+; CHECK:  mov      x6, x26
+; CHECK:  mov      x7, x27
+; CHECK:  mov      x21, x28
+; CHECK:  bl      _params_and_return_in_reg2
+; Store return values.
+; CHECK:  mov      x20, x0
+; CHECK:  mov      x22, x1
+; CHECK:  mov      x23, x2
+; CHECK:  mov      x24, x3
+; CHECK:  mov      x25, x4
+; CHECK:  mov      x26, x5
+; CHECK:  mov      x27, x6
+; CHECK:  mov      x28, x7
+; Save swifterror %err.
+; CHECK:  mov      x19, x21
+; Setup call.
+; CHECK:  mov     x0, #1
+; CHECK:  mov     x1, #2
+; CHECK:  mov     x2, #3
+; CHECK:  mov     x3, #4
+; CHECK:  mov     x4, #5
+; CHECK:  mov     x5, #6
+; CHECK:  mov     x6, #7
+; CHECK:  mov     x7, #8
+; ... setup call with swiferror %error_ptr_ref.
+; CHECK:  ldr     x21, [sp, #8]
+; CHECK:  bl      _params_in_reg2
+; Restore return values for return from this function.
+; CHECK:  mov      x0, x20
+; CHECK:  mov      x1, x22
+; CHECK:  mov      x2, x23
+; CHECK:  mov      x3, x24
+; CHECK:  mov      x4, x25
+; CHECK:  mov      x5, x26
+; CHECK:  mov      x6, x27
+; CHECK:  mov      x7, x28
+; CHECK:  mov      x21, x19
+; Restore callee save registers.
+; CHECK:  ldp     x29, x30, [sp
+; CHECK:  ldp     x20, x19, [sp
+; CHECK:  ldp     x23, x22, [sp
+; CHECK:  ldp     x25, x24, [sp
+; CHECK:  ldp     x27, x26, [sp
+; CHECK:  ldr     x28, [sp
+; CHECK:  ret
+define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err) {
+  %error_ptr_ref = alloca swifterror %swift_error*, align 8
+  store %swift_error* null, %swift_error** %error_ptr_ref
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8*  null, %swift_error** nocapture swifterror %error_ptr_ref)
+  %val = call swiftcc  { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i8*  %8, %swift_error** nocapture swifterror %err)
+  call swiftcc void @params_in_reg2(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i8*  null, %swift_error** nocapture swifterror %error_ptr_ref)
+  ret { i64, i64, i64, i64, i64, i64, i64, i64 } %val
+}
+
+declare swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* , %swift_error** nocapture swifterror %err)
+
+declare void @acallee(i8*)
+
+; Make sure we don't tail call if the caller returns a swifterror value. We
+; would have to move into the swifterror register before the tail call.
+; CHECK: tailcall_from_swifterror:
+; CHECK-NOT: b _acallee
+; CHECK: bl _acallee
+
+define swiftcc void @tailcall_from_swifterror(%swift_error** swifterror %error_ptr_ref) {
+entry:
+  tail call void @acallee(i8* null)
+  ret void
+}
+
+declare swiftcc void @foo2(%swift_error** swifterror)
+; CHECK-LABEL: testAssign
+; CHECK: mov      x21, #0
+; CHECK: bl      _foo2
+; CHECK: mov      x0, x21
+
+define swiftcc %swift_error* @testAssign(i8* %error_ref) {
+entry:
+  %error_ptr = alloca swifterror %swift_error*
+  store %swift_error* null, %swift_error** %error_ptr
+  call swiftcc void @foo2(%swift_error** swifterror %error_ptr)
+  br label %a
+
+a:
+  %error = load %swift_error*, %swift_error** %error_ptr
+  ret %swift_error* %error
+}

From d9bb7b69abe266ab8d3ba54186604af9cc8750ad Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 08:45:37 +0000
Subject: [PATCH 0143/1176] [AArch64][SVE2] Asm: fix overlapping bit

Summary:
Bit 20 in sve2_int_arith_pred TableGen class was overlapping. The
encodings are not affected as bit 20 is defined by the opc bits
and this was overwriting the earlier error of setting bit 20 to 0.

Raised by Momchil: https://reviews.llvm.org/D62130

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62292

llvm-svn: 361609
---
 llvm/lib/Target/AArch64/SVEInstrFormats.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 79f2dab932f9e..ac4d800197b70 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2070,7 +2070,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
   bits<5> Zdn;
   let Inst{31-24} = 0b01000100;
   let Inst{23-22} = sz;
-  let Inst{21-20} = 0b01;
+  let Inst{21}    = 0b0;
   let Inst{20-16} = opc{5-1};
   let Inst{15-14} = 0b10;
   let Inst{13}    = opc{0};

From 119c31ad9374210b5a950960a401582aac0c5333 Mon Sep 17 00:00:00 2001
From: Neil Henning <neil.henning@amd.com>
Date: Fri, 24 May 2019 08:59:17 +0000
Subject: [PATCH 0144/1176] StructurizeCFG: Relax uniformity checks.

This change relaxes the checks for hasOnlyUniformBranches such that our
region is uniform if:

1. All conditional branches that are direct children are uniform.
2. And either:
  a. All sub-regions are uniform.
  b. There is one or less conditional branches among the direct
     children.

Differential Revision: https://reviews.llvm.org/D62198

llvm-svn: 361610
---
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp |  33 +++++-
 .../StructurizeCFG/AMDGPU/uniform-regions.ll  | 108 +++++++++++++++++-
 2 files changed, 137 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index b3ef31cea0a88..42318e47dae4a 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -62,6 +62,11 @@ static cl::opt<bool> ForceSkipUniformRegions(
   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
   cl::init(false));
 
+static cl::opt<bool>
+    RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
+                          cl::desc("Allow relaxed uniform region checks"),
+                          cl::init(false));
+
 // Definition of the complex types used in this pass.
 
 using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -936,6 +941,11 @@ void StructurizeCFG::rebuildSSA() {
 
 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
                                    const LegacyDivergenceAnalysis &DA) {
+  // Bool for if all sub-regions are uniform.
+  bool SubRegionsAreUniform = true;
+  // Count of how many direct children are conditional.
+  unsigned ConditionalDirectChildren = 0;
+
   for (auto E : R->elements()) {
     if (!E->isSubRegion()) {
       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -944,6 +954,10 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
 
       if (!DA.isUniform(Br))
         return false;
+
+      // One of our direct children is conditional.
+      ConditionalDirectChildren++;
+
       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
                         << " has uniform terminator\n");
     } else {
@@ -961,12 +975,25 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
         if (!Br || !Br->isConditional())
           continue;
 
-        if (!Br->getMetadata(UniformMDKindID))
-          return false;
+        if (!Br->getMetadata(UniformMDKindID)) {
+          // Early exit if we cannot have relaxed uniform regions.
+          if (!RelaxedUniformRegions)
+            return false;
+
+          SubRegionsAreUniform = false;
+          break;
+        }
       }
     }
   }
-  return true;
+
+  // Our region is uniform if:
+  // 1. All conditional branches that are direct children are uniform (checked
+  // above).
+  // 2. And either:
+  //   a. All sub-regions are uniform.
+  //   b. There is one or less conditional branches among the direct children.
+  return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
 }
 
 /// Run the transformation for each region found
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
index 7c8c09b782b51..e91e73ceb2b60 100644
--- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions -structurizecfg-relaxed-uniform-regions < %s | FileCheck %s
 
 define amdgpu_cs void @uniform(i32 inreg %v) {
 ; CHECK-LABEL: @uniform(
@@ -79,4 +79,110 @@ end:
   ret void
 }
 
+define amdgpu_cs void @uniform_branch_to_nonuniform_subregions(i32 addrspace(4)* %ptr, i32 inreg %data) {
+; CHECK-LABEL: @uniform_branch_to_nonuniform_subregions(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[DATA:%.*]], 42
+; CHECK-NEXT:    br i1 [[C]], label [[UNIFORM_FOR_BODY:%.*]], label [[FOR_BODY:%.*]], !structurizecfg.uniform !0
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW1:%.*]] ]
+; CHECK-NEXT:    [[CC:%.*]] = icmp ult i32 [[I]], 4
+; CHECK-NEXT:    br i1 [[CC]], label [[MID_LOOP:%.*]], label [[FLOW1]]
+; CHECK:       mid.loop:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[CC2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[CC2]], label [[END_LOOP:%.*]], label [[FLOW2:%.*]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    [[TMP0]] = phi i32 [ [[TMP2:%.*]], [[FLOW2]] ], [ undef, [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], [[FLOW2]] ], [ true, [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       end.loop:
+; CHECK-NEXT:    [[I_INC:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    br label [[FLOW2]]
+; CHECK:       Flow2:
+; CHECK-NEXT:    [[TMP2]] = phi i32 [ [[I_INC]], [[END_LOOP]] ], [ undef, [[MID_LOOP]] ]
+; CHECK-NEXT:    [[TMP3]] = phi i1 [ false, [[END_LOOP]] ], [ true, [[MID_LOOP]] ]
+; CHECK-NEXT:    br label [[FLOW1]]
+; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[CC]], label [[IF:%.*]], label [[FLOW:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       uniform.for.body:
+; CHECK-NEXT:    [[UNIFORM_I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP4:%.*]], [[FLOW4:%.*]] ]
+; CHECK-NEXT:    [[UNIFORM_CC:%.*]] = icmp ult i32 [[UNIFORM_I]], 4
+; CHECK-NEXT:    br i1 [[UNIFORM_CC]], label [[UNIFORM_MID_LOOP:%.*]], label [[FLOW4]]
+; CHECK:       uniform.mid.loop:
+; CHECK-NEXT:    [[UNIFORM_V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[UNIFORM_CC2:%.*]] = icmp eq i32 [[UNIFORM_V]], 0
+; CHECK-NEXT:    br i1 [[UNIFORM_CC2]], label [[UNIFORM_END_LOOP:%.*]], label [[FLOW5:%.*]]
+; CHECK:       Flow4:
+; CHECK-NEXT:    [[TMP4]] = phi i32 [ [[TMP6:%.*]], [[FLOW5]] ], [ undef, [[UNIFORM_FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW5]] ], [ true, [[UNIFORM_FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[UNIFORM_FOR_END:%.*]], label [[UNIFORM_FOR_BODY]]
+; CHECK:       uniform.end.loop:
+; CHECK-NEXT:    [[UNIFORM_I_INC:%.*]] = add i32 [[UNIFORM_I]], 1
+; CHECK-NEXT:    br label [[FLOW5]]
+; CHECK:       Flow5:
+; CHECK-NEXT:    [[TMP6]] = phi i32 [ [[UNIFORM_I_INC]], [[UNIFORM_END_LOOP]] ], [ undef, [[UNIFORM_MID_LOOP]] ]
+; CHECK-NEXT:    [[TMP7]] = phi i1 [ false, [[UNIFORM_END_LOOP]] ], [ true, [[UNIFORM_MID_LOOP]] ]
+; CHECK-NEXT:    br label [[FLOW4]]
+; CHECK:       uniform.for.end:
+; CHECK-NEXT:    br i1 [[UNIFORM_CC]], label [[UNIFORM_IF:%.*]], label [[FLOW3:%.*]]
+; CHECK:       uniform.if:
+; CHECK-NEXT:    br label [[FLOW3]]
+; CHECK:       Flow:
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       Flow3:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c = icmp eq i32 %data, 42
+  br i1 %c, label %uniform.for.body, label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+  %cc = icmp ult i32 %i, 4
+  br i1 %cc, label %mid.loop, label %for.end
+
+mid.loop:
+  %v = call i32 @llvm.amdgcn.workitem.id.x()
+  %cc2 = icmp eq i32 %v, 0
+  br i1 %cc2, label %end.loop, label %for.end
+
+end.loop:
+  %i.inc = add i32 %i, 1
+  br label %for.body
+
+for.end:
+  br i1 %cc, label %if, label %end
+
+if:
+  br label %end
+
+uniform.for.body:
+  %uniform.i = phi i32 [0, %entry], [%uniform.i.inc, %uniform.end.loop]
+  %uniform.cc = icmp ult i32 %uniform.i, 4
+  br i1 %uniform.cc, label %uniform.mid.loop, label %uniform.for.end
+
+uniform.mid.loop:
+  %uniform.v = call i32 @llvm.amdgcn.workitem.id.x()
+  %uniform.cc2 = icmp eq i32 %uniform.v, 0
+  br i1 %uniform.cc2, label %uniform.end.loop, label %uniform.for.end
+
+uniform.end.loop:
+  %uniform.i.inc = add i32 %uniform.i, 1
+  br label %uniform.for.body
+
+uniform.for.end:
+  br i1 %uniform.cc, label %uniform.if, label %end
+
+uniform.if:
+  br label %end
+
+end:
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()

From 6bca64fe5e9cf04cd39217e5ae2e148dc96b7dae Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 09:06:37 +0000
Subject: [PATCH 0145/1176] [AArch64][SVE2] Asm: add saturating add/sub
 instructions

Summary:
Patch adds support for the following instructions:

    * SQADD, UQADD, SUQADD, USQADD
    * SQSUB, UQSUB, SQSUBR, UQSUBR

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62130

llvm-svn: 361611
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++++
 llvm/test/MC/AArch64/SVE2/sqadd-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/sqadd.s             | 59 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqsub-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/sqsub.s             | 59 +++++++++++++++++++
 .../test/MC/AArch64/SVE2/sqsubr-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/sqsubr.s            | 59 +++++++++++++++++++
 .../test/MC/AArch64/SVE2/suqadd-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/suqadd.s            | 59 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqadd-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/uqadd.s             | 59 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqsub-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/uqsub.s             | 59 +++++++++++++++++++
 .../test/MC/AArch64/SVE2/uqsubr-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/uqsubr.s            | 59 +++++++++++++++++++
 .../test/MC/AArch64/SVE2/usqadd-diagnostics.s | 37 ++++++++++++
 llvm/test/MC/AArch64/SVE2/usqadd.s            | 59 +++++++++++++++++++
 17 files changed, 778 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqadd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqsub-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqsub.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqsubr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqsubr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/suqadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/suqadd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqadd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqsub-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqsub.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqsubr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqsubr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usqadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usqadd.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 62dfdf1345aae..7e2b152395a43 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1140,6 +1140,16 @@ let Predicates = [HasSVE2] in {
   defm SQABS_ZPmZ   : sve2_int_un_pred_arit<0b100, "sqabs">;
   defm SQNEG_ZPmZ   : sve2_int_un_pred_arit<0b101, "sqneg">;
 
+  // SVE2 saturating add/subtract
+  defm SQADD_ZPmZ  : sve2_int_arith_pred<0b110000, "sqadd">;
+  defm UQADD_ZPmZ  : sve2_int_arith_pred<0b110010, "uqadd">;
+  defm SQSUB_ZPmZ  : sve2_int_arith_pred<0b110100, "sqsub">;
+  defm UQSUB_ZPmZ  : sve2_int_arith_pred<0b110110, "uqsub">;
+  defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
+  defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
+  defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
+  defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
+
   // SVE2 integer multiply long
   defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
   defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
diff --git a/llvm/test/MC/AArch64/SVE2/sqadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqadd-diagnostics.s
new file mode 100644
index 0000000000000..4ba83c1e42e1a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqadd-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqadd z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqadd z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqadd z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqadd z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqadd z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqadd z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqadd z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqadd z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqadd z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqadd z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqadd.s b/llvm/test/MC/AArch64/SVE2/sqadd.s
new file mode 100644
index 0000000000000..77c3ca9f499fe
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqadd.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x18,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 18 44 <unknown>
+
+sqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x58,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 58 44 <unknown>
+
+sqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x98,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 98 44 <unknown>
+
+sqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d8 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d8 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d8 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqsub-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqsub-diagnostics.s
new file mode 100644
index 0000000000000..9960dde0201ec
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqsub-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqsub z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqsub z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqsub z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqsub z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqsub z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqsub z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqsub z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqsub z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqsub z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqsub z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqsub.s b/llvm/test/MC/AArch64/SVE2/sqsub.s
new file mode 100644
index 0000000000000..9a3e4c73bfcec
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqsub.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqsub z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqsub z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1a 44 <unknown>
+
+sqsub z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqsub z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5a 44 <unknown>
+
+sqsub z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqsub z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9a 44 <unknown>
+
+sqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xda,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f da 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqsub z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqsub z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xda,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 da 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xda,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f da 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqsubr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqsubr-diagnostics.s
new file mode 100644
index 0000000000000..af7caaa67ab7d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqsubr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqsubr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqsubr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqsubr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqsubr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqsubr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqsubr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqsubr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqsubr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqsubr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqsubr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqsubr.s b/llvm/test/MC/AArch64/SVE2/sqsubr.s
new file mode 100644
index 0000000000000..a19d4fef39ecd
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqsubr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqsubr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqsubr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1e 44 <unknown>
+
+sqsubr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqsubr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5e 44 <unknown>
+
+sqsubr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqsubr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9e 44 <unknown>
+
+sqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xde,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f de 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqsubr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqsubr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xde,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 de 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xde,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f de 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/suqadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/suqadd-diagnostics.s
new file mode 100644
index 0000000000000..9faa706b69cd5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/suqadd-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+suqadd z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: suqadd z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+suqadd z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: suqadd z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+suqadd z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: suqadd z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+suqadd z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: suqadd z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+suqadd z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: suqadd z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/suqadd.s b/llvm/test/MC/AArch64/SVE2/suqadd.s
new file mode 100644
index 0000000000000..016275da2e0c2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/suqadd.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+suqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: suqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1c 44 <unknown>
+
+suqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: suqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5c 44 <unknown>
+
+suqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: suqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9c 44 <unknown>
+
+suqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: suqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f dc 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+suqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: suqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xdc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 dc 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+suqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: suqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f dc 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqadd-diagnostics.s
new file mode 100644
index 0000000000000..c08a0cc4b4639
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqadd-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqadd z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqadd z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqadd z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqadd z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqadd z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqadd z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqadd z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqadd z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqadd z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqadd z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqadd.s b/llvm/test/MC/AArch64/SVE2/uqadd.s
new file mode 100644
index 0000000000000..54d96163994ed
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqadd.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x19,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 19 44 <unknown>
+
+uqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x59,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 59 44 <unknown>
+
+uqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x99,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 99 44 <unknown>
+
+uqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d9 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d9 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d9 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqsub-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqsub-diagnostics.s
new file mode 100644
index 0000000000000..486c458d43e7a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqsub-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqsub z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqsub z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqsub z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqsub z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqsub z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqsub z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqsub z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqsub z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqsub z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqsub z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqsub.s b/llvm/test/MC/AArch64/SVE2/uqsub.s
new file mode 100644
index 0000000000000..0df03aa2f30d0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqsub.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqsub z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqsub z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1b 44 <unknown>
+
+uqsub z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqsub z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5b 44 <unknown>
+
+uqsub z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqsub z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9b 44 <unknown>
+
+uqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f db 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqsub z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqsub z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xdb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 db 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqsub z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f db 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqsubr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqsubr-diagnostics.s
new file mode 100644
index 0000000000000..e6300a2e54bbb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqsubr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqsubr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqsubr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqsubr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqsubr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqsubr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqsubr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqsubr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqsubr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqsubr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqsubr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqsubr.s b/llvm/test/MC/AArch64/SVE2/uqsubr.s
new file mode 100644
index 0000000000000..64f95c72d8612
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqsubr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqsubr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqsubr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f 44 <unknown>
+
+uqsubr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqsubr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5f 44 <unknown>
+
+uqsubr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqsubr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9f 44 <unknown>
+
+uqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f df 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqsubr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqsubr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xdf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 df 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqsubr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f df 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usqadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usqadd-diagnostics.s
new file mode 100644
index 0000000000000..4c7271f983061
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usqadd-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+usqadd z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: usqadd z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+usqadd z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usqadd z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usqadd z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usqadd z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+usqadd z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: usqadd z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usqadd z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: usqadd z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usqadd.s b/llvm/test/MC/AArch64/SVE2/usqadd.s
new file mode 100644
index 0000000000000..9bec555dcbd6b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usqadd.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+usqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: usqadd z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x1d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1d 44 <unknown>
+
+usqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: usqadd z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x5d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5d 44 <unknown>
+
+usqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: usqadd z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x9d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 9d 44 <unknown>
+
+usqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: usqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f dd 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+usqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: usqadd z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xdd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 dd 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+usqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: usqadd z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xdd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f dd 44 <unknown>

From 968cb0e0499436cdbe4dd97e61b1ffb8abd5817c Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 09:17:23 +0000
Subject: [PATCH 0146/1176] [AArch64][SVE2] Asm: add various bitwise shift
 instructions

Summary:
This patch adds support for the SVE2 saturating/rounding bitwise shift
left (predicated) group of instructions:

    * SRSHL, URSHL, SRSHLR, URSHLR, SQSHL, UQSHL, SQRSHL, UQRSHL,
      SQSHLR, UQSHLR, SQRSHLR, UQRSHLR

Immediate forms of the SQSHL and UQSHL instructions are also added to
the existing SVE bitwise shift by immediate (predicated) group, as well
as three new instructions SRSHR/URSHR/SQSHLU. The new instructions in
this group are encoded similarly and are implemented using the same
TableGen class with a minimal change (1 bit in encoding).

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62140

llvm-svn: 361612
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  29 +++-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  14 +-
 .../test/MC/AArch64/SVE2/sqrshl-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/sqrshl.s            |  59 ++++++++
 .../MC/AArch64/SVE2/sqrshlr-diagnostics.s     |  37 +++++
 llvm/test/MC/AArch64/SVE2/sqrshlr.s           |  59 ++++++++
 llvm/test/MC/AArch64/SVE2/sqshl-diagnostics.s |  98 +++++++++++++
 llvm/test/MC/AArch64/SVE2/sqshl.s             | 131 ++++++++++++++++++
 .../test/MC/AArch64/SVE2/sqshlr-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/sqshlr.s            |  59 ++++++++
 .../test/MC/AArch64/SVE2/sqshlu-diagnostics.s |  78 +++++++++++
 llvm/test/MC/AArch64/SVE2/sqshlu.s            |  83 +++++++++++
 llvm/test/MC/AArch64/SVE2/srshl-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/srshl.s             |  59 ++++++++
 .../test/MC/AArch64/SVE2/srshlr-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/srshlr.s            |  59 ++++++++
 llvm/test/MC/AArch64/SVE2/srshr-diagnostics.s |  78 +++++++++++
 llvm/test/MC/AArch64/SVE2/srshr.s             |  84 +++++++++++
 .../test/MC/AArch64/SVE2/uqrshl-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/uqrshl.s            |  59 ++++++++
 .../MC/AArch64/SVE2/uqrshlr-diagnostics.s     |  37 +++++
 llvm/test/MC/AArch64/SVE2/uqrshlr.s           |  59 ++++++++
 llvm/test/MC/AArch64/SVE2/uqshl-diagnostics.s |  98 +++++++++++++
 llvm/test/MC/AArch64/SVE2/uqshl.s             | 131 ++++++++++++++++++
 .../test/MC/AArch64/SVE2/uqshlr-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/uqshlr.s            |  59 ++++++++
 llvm/test/MC/AArch64/SVE2/urshl-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/urshl.s             |  59 ++++++++
 .../test/MC/AArch64/SVE2/urshlr-diagnostics.s |  37 +++++
 llvm/test/MC/AArch64/SVE2/urshlr.s            |  59 ++++++++
 llvm/test/MC/AArch64/SVE2/urshr-diagnostics.s |  78 +++++++++++
 llvm/test/MC/AArch64/SVE2/urshr.s             |  84 +++++++++++
 32 files changed, 1935 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshlu-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshlu.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srshr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshlr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshlr.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/urshr.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7e2b152395a43..9d775ec8d5f7f 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -875,10 +875,10 @@ let Predicates = [HasSVE] in {
   defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
 
   // Predicated shifts
-  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b000, "asr">;
-  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b001, "lsr">;
-  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">;
-  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">;
+  defm ASR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
+  defm LSR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
+  defm LSL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
+  defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
 
   defm ASR_ZPmZ  : sve_int_bin_pred_shift<0b000, "asr">;
   defm LSR_ZPmZ  : sve_int_bin_pred_shift<0b001, "lsr">;
@@ -1150,6 +1150,20 @@ let Predicates = [HasSVE2] in {
   defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
   defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
 
+  // SVE2 saturating/rounding bitwise shift left (predicated)
+  defm SRSHL_ZPmZ   : sve2_int_arith_pred<0b000100, "srshl">;
+  defm URSHL_ZPmZ   : sve2_int_arith_pred<0b000110, "urshl">;
+  defm SRSHLR_ZPmZ  : sve2_int_arith_pred<0b001100, "srshlr">;
+  defm URSHLR_ZPmZ  : sve2_int_arith_pred<0b001110, "urshlr">;
+  defm SQSHL_ZPmZ   : sve2_int_arith_pred<0b010000, "sqshl">;
+  defm UQSHL_ZPmZ   : sve2_int_arith_pred<0b010010, "uqshl">;
+  defm SQRSHL_ZPmZ  : sve2_int_arith_pred<0b010100, "sqrshl">;
+  defm UQRSHL_ZPmZ  : sve2_int_arith_pred<0b010110, "uqrshl">;
+  defm SQSHLR_ZPmZ  : sve2_int_arith_pred<0b011000, "sqshlr">;
+  defm UQSHLR_ZPmZ  : sve2_int_arith_pred<0b011010, "uqshlr">;
+  defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
+  defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+
   // SVE2 integer multiply long
   defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
   defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
@@ -1157,4 +1171,11 @@ let Predicates = [HasSVE2] in {
   defm SMULLT_ZZZ   : sve2_wide_int_arith_long<0b11101, "smullt">;
   defm UMULLB_ZZZ   : sve2_wide_int_arith_long<0b11110, "umullb">;
   defm UMULLT_ZZZ   : sve2_wide_int_arith_long<0b11111, "umullt">;
+
+  // Predicated shifts
+  defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+  defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+  defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
+  defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
+  defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
 }
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ac4d800197b70..8b4c00935e524 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2926,9 +2926,9 @@ multiclass sve_int_index_rr<string asm> {
 //===----------------------------------------------------------------------===//
 // SVE Bitwise Shift - Predicated Group
 //===----------------------------------------------------------------------===//
-class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
-                               ZPRRegOp zprty, Operand immtype,
-                               ElementSizeEnum size>
+class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
+                                 ZPRRegOp zprty, Operand immtype,
+                                 ElementSizeEnum size>
 : I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
   asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
   "",
@@ -2938,8 +2938,8 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
   bits<6> imm;
   let Inst{31-24} = 0b00000100;
   let Inst{23-22} = tsz8_64{3-2};
-  let Inst{21-19} = 0b000;
-  let Inst{18-16} = opc;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = opc;
   let Inst{15-13} = 0b100;
   let Inst{12-10} = Pg;
   let Inst{9-8}   = tsz8_64{1-0};
@@ -2951,7 +2951,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
   let ElementSize = size;
 }
 
-multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
   def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
                                       ElementSizeB>;
   def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
@@ -2969,7 +2969,7 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
   }
 }
 
-multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
   def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
                                       ElementSizeB>;
   def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshl-diagnostics.s
new file mode 100644
index 0000000000000..b56264a3cf070
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshl-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqrshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqrshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqrshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqrshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqrshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqrshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshl.s b/llvm/test/MC/AArch64/SVE2/sqrshl.s
new file mode 100644
index 0000000000000..048d952629674
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshl.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqrshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0a 44 <unknown>
+
+sqrshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqrshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4a 44 <unknown>
+
+sqrshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqrshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8a,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8a 44 <unknown>
+
+sqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xca,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f ca 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqrshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqrshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xca,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 ca 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xca,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f ca 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshlr-diagnostics.s
new file mode 100644
index 0000000000000..76f7d155f3dc4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqrshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqrshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqrshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqrshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqrshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqrshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshlr.s b/llvm/test/MC/AArch64/SVE2/sqrshlr.s
new file mode 100644
index 0000000000000..4566611ee14a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqrshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0e 44 <unknown>
+
+sqrshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqrshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4e 44 <unknown>
+
+sqrshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqrshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8e,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8e 44 <unknown>
+
+sqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xce,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f ce 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqrshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqrshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xce,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 ce 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xce,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f ce 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshl-diagnostics.s
new file mode 100644
index 0000000000000..565cbf19c7720
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshl-diagnostics.s
@@ -0,0 +1,98 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+sqshl z0.b, p0/m, z0.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p0/m, z0.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.h, p0/m, z0.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.h, p0/m, z0.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.s, p0/m, z0.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.s, p0/m, z0.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.d, p0/m, z0.d, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.d, p0/m, z0.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p0/m, z1.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqshl z0.b, p0/m, z1.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p0/m, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshl z0.b, p0/m, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.d, p0/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshl z0.d, p0/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshl z0.b, p8/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqshl z0.b, p8/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshl.s b/llvm/test/MC/AArch64/SVE2/sqshl.s
new file mode 100644
index 0000000000000..c7065a50ccd53
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshl.s
@@ -0,0 +1,131 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x08,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 08 44 <unknown>
+
+sqshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x48,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 48 44 <unknown>
+
+sqshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x88,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 88 44 <unknown>
+
+sqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c8 44 <unknown>
+
+sqshl z0.b, p0/m, z0.b, #0
+// CHECK-INST: sqshl z0.b, p0/m, z0.b, #0
+// CHECK-ENCODING: [0x00,0x81,0x06,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 81 06 04 <unknown>
+
+sqshl z31.b, p0/m, z31.b, #7
+// CHECK-INST: sqshl z31.b, p0/m, z31.b, #7
+// CHECK-ENCODING: [0xff,0x81,0x06,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 81 06 04 <unknown>
+
+sqshl z0.h, p0/m, z0.h, #0
+// CHECK-INST: sqshl z0.h, p0/m, z0.h, #0
+// CHECK-ENCODING: [0x00,0x82,0x06,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 82 06 04 <unknown>
+
+sqshl z31.h, p0/m, z31.h, #15
+// CHECK-INST: sqshl z31.h, p0/m, z31.h, #15
+// CHECK-ENCODING: [0xff,0x83,0x06,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 06 04 <unknown>
+
+sqshl z0.s, p0/m, z0.s, #0
+// CHECK-INST: sqshl z0.s, p0/m, z0.s, #0
+// CHECK-ENCODING: [0x00,0x80,0x46,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 46 04 <unknown>
+
+sqshl z31.s, p0/m, z31.s, #31
+// CHECK-INST: sqshl z31.s, p0/m, z31.s, #31
+// CHECK-ENCODING: [0xff,0x83,0x46,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 46 04 <unknown>
+
+sqshl z0.d, p0/m, z0.d, #0
+// CHECK-INST: sqshl z0.d, p0/m, z0.d, #0
+// CHECK-ENCODING: [0x00,0x80,0x86,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 86 04 <unknown>
+
+sqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc6,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c6 04 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c8 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc8,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c8 44 <unknown>
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc6,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c6 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc6,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c6 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshlr-diagnostics.s
new file mode 100644
index 0000000000000..a33ea84e548c2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshlr.s b/llvm/test/MC/AArch64/SVE2/sqshlr.s
new file mode 100644
index 0000000000000..36e95d7fd3b7d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: sqshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0c 44 <unknown>
+
+sqshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: sqshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4c 44 <unknown>
+
+sqshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: sqshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8c,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8c 44 <unknown>
+
+sqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cc 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: sqshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xcc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 cc 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: sqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcc,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cc 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshlu-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshlu-diagnostics.s
new file mode 100644
index 0000000000000..0c2601a598a8c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshlu-diagnostics.s
@@ -0,0 +1,78 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+sqshlu z0.b, p0/m, z0.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.b, p0/m, z0.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.h, p0/m, z0.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.h, p0/m, z0.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.s, p0/m, z0.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.s, p0/m, z0.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.d, p0/m, z0.d, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.d, p0/m, z0.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqshlu z0.b, p0/m, z1.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqshlu z0.b, p0/m, z1.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sqshlu z0.b, p0/m, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshlu z0.b, p0/m, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.d, p0/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshlu z0.d, p0/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+sqshlu z0.b, p0/z, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: sqshlu z0.b, p0/z, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshlu z0.b, p8/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: sqshlu z0.b, p8/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshlu.s b/llvm/test/MC/AArch64/SVE2/sqshlu.s
new file mode 100644
index 0000000000000..196db03a26dfe
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshlu.s
@@ -0,0 +1,83 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshlu z0.b, p0/m, z0.b, #0
+// CHECK-INST: sqshlu z0.b, p0/m, z0.b, #0
+// CHECK-ENCODING: [0x00,0x81,0x0f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 81 0f 04 <unknown>
+
+sqshlu z31.b, p0/m, z31.b, #7
+// CHECK-INST: sqshlu z31.b, p0/m, z31.b, #7
+// CHECK-ENCODING: [0xff,0x81,0x0f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 81 0f 04 <unknown>
+
+sqshlu z0.h, p0/m, z0.h, #0
+// CHECK-INST: sqshlu z0.h, p0/m, z0.h, #0
+// CHECK-ENCODING: [0x00,0x82,0x0f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 82 0f 04 <unknown>
+
+sqshlu z31.h, p0/m, z31.h, #15
+// CHECK-INST: sqshlu z31.h, p0/m, z31.h, #15
+// CHECK-ENCODING: [0xff,0x83,0x0f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 0f 04 <unknown>
+
+sqshlu z0.s, p0/m, z0.s, #0
+// CHECK-INST: sqshlu z0.s, p0/m, z0.s, #0
+// CHECK-ENCODING: [0x00,0x80,0x4f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 4f 04 <unknown>
+
+sqshlu z31.s, p0/m, z31.s, #31
+// CHECK-INST: sqshlu z31.s, p0/m, z31.s, #31
+// CHECK-ENCODING: [0xff,0x83,0x4f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 4f 04 <unknown>
+
+sqshlu z0.d, p0/m, z0.d, #0
+// CHECK-INST: sqshlu z0.d, p0/m, z0.d, #0
+// CHECK-ENCODING: [0x00,0x80,0x8f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 8f 04 <unknown>
+
+sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xcf,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 cf 04 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xcf,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 cf 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-INST: sqshlu z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xcf,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 cf 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/srshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/srshl-diagnostics.s
new file mode 100644
index 0000000000000..6d599aa08df1a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshl-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+srshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: srshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+srshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+srshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: srshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: srshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/srshl.s b/llvm/test/MC/AArch64/SVE2/srshl.s
new file mode 100644
index 0000000000000..ab0c8f2944522
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshl.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+srshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: srshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x02,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 02 44 <unknown>
+
+srshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: srshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x42,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 42 44 <unknown>
+
+srshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: srshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x82,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 82 44 <unknown>
+
+srshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: srshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc2,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c2 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+srshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: srshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc2,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c2 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+srshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: srshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc2,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c2 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/srshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/srshlr-diagnostics.s
new file mode 100644
index 0000000000000..f1f760da9c69d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+srshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: srshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+srshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+srshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: srshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: srshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/srshlr.s b/llvm/test/MC/AArch64/SVE2/srshlr.s
new file mode 100644
index 0000000000000..822a4b82e2fb8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+srshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: srshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x06,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 06 44 <unknown>
+
+srshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: srshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x46,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 46 44 <unknown>
+
+srshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: srshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x86,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 86 44 <unknown>
+
+srshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: srshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc6,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c6 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+srshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: srshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc6,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c6 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+srshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: srshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc6,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c6 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/srshr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/srshr-diagnostics.s
new file mode 100644
index 0000000000000..0440f08a36446
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshr-diagnostics.s
@@ -0,0 +1,78 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+srshr z18.b, p0/m, z18.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: srshr z18.b, p0/m, z18.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z1.b, p0/m, z1.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: srshr z1.b, p0/m, z1.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z21.h, p0/m, z21.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: srshr z21.h, p0/m, z21.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z14.h, p0/m, z14.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: srshr z14.h, p0/m, z14.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z6.s, p0/m, z6.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: srshr z6.s, p0/m, z6.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z23.s, p0/m, z23.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: srshr z23.s, p0/m, z23.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z3.d, p0/m, z3.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: srshr z3.d, p0/m, z3.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z25.d, p0/m, z25.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: srshr z25.d, p0/m, z25.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+srshr z0.b, p0/m, z1.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: srshr z0.b, p0/m, z1.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+srshr z0.b, p0/m, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshr z0.b, p0/m, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z0.d, p0/m, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srshr z0.d, p0/m, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+srshr z0.b, p0/z, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: srshr z0.b, p0/z, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srshr z0.b, p8/m, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: srshr z0.b, p8/m, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/srshr.s b/llvm/test/MC/AArch64/SVE2/srshr.s
new file mode 100644
index 0000000000000..34955fed55d3e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srshr.s
@@ -0,0 +1,84 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+srshr    z0.b, p0/m, z0.b, #1
+// CHECK-INST: srshr	z0.b, p0/m, z0.b, #1
+// CHECK-ENCODING: [0xe0,0x81,0x0c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 81 0c 04 <unknown>
+
+srshr    z31.b, p0/m, z31.b, #8
+// CHECK-INST: srshr	z31.b, p0/m, z31.b, #8
+// CHECK-ENCODING: [0x1f,0x81,0x0c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 81 0c 04 <unknown>
+
+srshr    z0.h, p0/m, z0.h, #1
+// CHECK-INST: srshr	z0.h, p0/m, z0.h, #1
+// CHECK-ENCODING: [0xe0,0x83,0x0c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 0c 04 <unknown>
+
+srshr    z31.h, p0/m, z31.h, #16
+// CHECK-INST: srshr	z31.h, p0/m, z31.h, #16
+// CHECK-ENCODING: [0x1f,0x82,0x0c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 82 0c 04 <unknown>
+
+srshr    z0.s, p0/m, z0.s, #1
+// CHECK-INST: srshr	z0.s, p0/m, z0.s, #1
+// CHECK-ENCODING: [0xe0,0x83,0x4c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 4c 04 <unknown>
+
+srshr    z31.s, p0/m, z31.s, #32
+// CHECK-INST: srshr	z31.s, p0/m, z31.s, #32
+// CHECK-ENCODING: [0x1f,0x80,0x4c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 4c 04 <unknown>
+
+srshr    z0.d, p0/m, z0.d, #1
+// CHECK-INST: srshr	z0.d, p0/m, z0.d, #1
+// CHECK-ENCODING: [0xe0,0x83,0xcc,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 cc 04 <unknown>
+
+srshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: srshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8c 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx	z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+srshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: srshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8c 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx	z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+srshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: srshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8c,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8c 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqrshl-diagnostics.s
new file mode 100644
index 0000000000000..120ad54deb055
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshl-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqrshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqrshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqrshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqrshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqrshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqrshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshl.s b/llvm/test/MC/AArch64/SVE2/uqrshl.s
new file mode 100644
index 0000000000000..8dde5efb5e244
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshl.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqrshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqrshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0b 44 <unknown>
+
+uqrshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqrshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4b 44 <unknown>
+
+uqrshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqrshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8b,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8b 44 <unknown>
+
+uqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cb 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqrshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqrshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xcb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 cb 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqrshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcb,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cb 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqrshlr-diagnostics.s
new file mode 100644
index 0000000000000..b235ae8aaa55f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqrshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqrshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqrshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqrshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqrshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqrshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshlr.s b/llvm/test/MC/AArch64/SVE2/uqrshlr.s
new file mode 100644
index 0000000000000..eb6aabf6a014b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqrshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqrshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0f 44 <unknown>
+
+uqrshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqrshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4f 44 <unknown>
+
+uqrshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqrshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8f,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8f 44 <unknown>
+
+uqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cf 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqrshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqrshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xcf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 cf 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqrshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcf,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cf 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqshl-diagnostics.s
new file mode 100644
index 0000000000000..e3a44cb05b945
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshl-diagnostics.s
@@ -0,0 +1,98 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+uqshl z0.b, p0/m, z0.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p0/m, z0.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.h, p0/m, z0.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.h, p0/m, z0.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.s, p0/m, z0.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.s, p0/m, z0.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.d, p0/m, z0.d, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.d, p0/m, z0.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p0/m, z1.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqshl z0.b, p0/m, z1.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p0/m, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshl z0.b, p0/m, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.d, p0/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshl z0.d, p0/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshl z0.b, p8/m, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqshl z0.b, p8/m, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqshl.s b/llvm/test/MC/AArch64/SVE2/uqshl.s
new file mode 100644
index 0000000000000..36e8c6e11757a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshl.s
@@ -0,0 +1,131 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x09,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 09 44 <unknown>
+
+uqshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x49,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 49 44 <unknown>
+
+uqshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x89,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 89 44 <unknown>
+
+uqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c9 44 <unknown>
+
+uqshl z0.b, p0/m, z0.b, #0
+// CHECK-INST: uqshl z0.b, p0/m, z0.b, #0
+// CHECK-ENCODING: [0x00,0x81,0x07,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 81 07 04 <unknown>
+
+uqshl z31.b, p0/m, z31.b, #7
+// CHECK-INST: uqshl z31.b, p0/m, z31.b, #7
+// CHECK-ENCODING: [0xff,0x81,0x07,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 81 07 04 <unknown>
+
+uqshl z0.h, p0/m, z0.h, #0
+// CHECK-INST: uqshl z0.h, p0/m, z0.h, #0
+// CHECK-ENCODING: [0x00,0x82,0x07,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 82 07 04 <unknown>
+
+uqshl z31.h, p0/m, z31.h, #15
+// CHECK-INST: uqshl z31.h, p0/m, z31.h, #15
+// CHECK-ENCODING: [0xff,0x83,0x07,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 07 04 <unknown>
+
+uqshl z0.s, p0/m, z0.s, #0
+// CHECK-INST: uqshl z0.s, p0/m, z0.s, #0
+// CHECK-ENCODING: [0x00,0x80,0x47,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 47 04 <unknown>
+
+uqshl z31.s, p0/m, z31.s, #31
+// CHECK-INST: uqshl z31.s, p0/m, z31.s, #31
+// CHECK-ENCODING: [0xff,0x83,0x47,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 47 04 <unknown>
+
+uqshl z0.d, p0/m, z0.d, #0
+// CHECK-INST: uqshl z0.d, p0/m, z0.d, #0
+// CHECK-ENCODING: [0x00,0x80,0x87,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 87 04 <unknown>
+
+uqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: uqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc7,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c7 04 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c9 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc9,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c9 44 <unknown>
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: uqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc7,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c7 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqshl z31.d, p0/m, z31.d, #63
+// CHECK-INST: uqshl z31.d, p0/m, z31.d, #63
+// CHECK-ENCODING: [0xff,0x83,0xc7,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 83 c7 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqshlr-diagnostics.s
new file mode 100644
index 0000000000000..c8006eab6f1fc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+uqshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: uqshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+uqshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+uqshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: uqshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: uqshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqshlr.s b/llvm/test/MC/AArch64/SVE2/uqshlr.s
new file mode 100644
index 0000000000000..eaadc0ba72157
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: uqshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x0d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 0d 44 <unknown>
+
+uqshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: uqshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x4d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 4d 44 <unknown>
+
+uqshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: uqshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x8d,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 8d 44 <unknown>
+
+uqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cd 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+uqshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: uqshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xcd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 cd 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+uqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: uqshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xcd,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f cd 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/urshl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/urshl-diagnostics.s
new file mode 100644
index 0000000000000..d1ec794871d86
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshl-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+urshl z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: urshl z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+urshl z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshl z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshl z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshl z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+urshl z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: urshl z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshl z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: urshl z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/urshl.s b/llvm/test/MC/AArch64/SVE2/urshl.s
new file mode 100644
index 0000000000000..9e057fb261156
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshl.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+urshl z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: urshl z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x03,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 03 44 <unknown>
+
+urshl z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: urshl z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x43,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 43 44 <unknown>
+
+urshl z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: urshl z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x83,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 83 44 <unknown>
+
+urshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: urshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc3,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c3 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+urshl z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: urshl z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc3,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c3 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+urshl z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: urshl z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc3,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c3 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/urshlr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/urshlr-diagnostics.s
new file mode 100644
index 0000000000000..2241eb3885d02
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshlr-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+urshlr z0.b, p0/m, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: urshlr z0.b, p0/m, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+urshlr z0.b, p0/m, z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshlr z0.b, p0/m, z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshlr z0.b, p0/m, z0.b, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+urshlr z0.b, p0/z, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: urshlr z0.b, p0/z, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshlr z0.b, p8/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: urshlr z0.b, p8/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/urshlr.s b/llvm/test/MC/AArch64/SVE2/urshlr.s
new file mode 100644
index 0000000000000..170eb4a7aeb64
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshlr.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+urshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-INST: urshlr z0.b, p0/m, z0.b, z1.b
+// CHECK-ENCODING: [0x20,0x80,0x07,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 07 44 <unknown>
+
+urshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: urshlr z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x47,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 47 44 <unknown>
+
+urshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-INST: urshlr z29.s, p7/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x9f,0x87,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 87 44 <unknown>
+
+urshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: urshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc7,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c7 44 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+urshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: urshlr z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xc7,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 c7 44 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+urshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: urshlr z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xc7,0x44]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f c7 44 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/urshr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/urshr-diagnostics.s
new file mode 100644
index 0000000000000..f021bb9de7aba
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshr-diagnostics.s
@@ -0,0 +1,78 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+urshr z18.b, p0/m, z18.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: urshr z18.b, p0/m, z18.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z1.b, p0/m, z1.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: urshr z1.b, p0/m, z1.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z21.h, p0/m, z21.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: urshr z21.h, p0/m, z21.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z14.h, p0/m, z14.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: urshr z14.h, p0/m, z14.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z6.s, p0/m, z6.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: urshr z6.s, p0/m, z6.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z23.s, p0/m, z23.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: urshr z23.s, p0/m, z23.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z3.d, p0/m, z3.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: urshr z3.d, p0/m, z3.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z25.d, p0/m, z25.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: urshr z25.d, p0/m, z25.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+urshr z0.b, p0/m, z1.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: urshr z0.b, p0/m, z1.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+urshr z0.b, p0/m, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshr z0.b, p0/m, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z0.d, p0/m, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urshr z0.d, p0/m, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate
+
+urshr z0.b, p0/z, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: urshr z0.b, p0/z, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urshr z0.b, p8/m, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: urshr z0.b, p8/m, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/urshr.s b/llvm/test/MC/AArch64/SVE2/urshr.s
new file mode 100644
index 0000000000000..b06edfeb9b792
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/urshr.s
@@ -0,0 +1,84 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+urshr    z0.b, p0/m, z0.b, #1
+// CHECK-INST: urshr	z0.b, p0/m, z0.b, #1
+// CHECK-ENCODING: [0xe0,0x81,0x0d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 81 0d 04 <unknown>
+
+urshr    z31.b, p0/m, z31.b, #8
+// CHECK-INST: urshr	z31.b, p0/m, z31.b, #8
+// CHECK-ENCODING: [0x1f,0x81,0x0d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 81 0d 04 <unknown>
+
+urshr    z0.h, p0/m, z0.h, #1
+// CHECK-INST: urshr	z0.h, p0/m, z0.h, #1
+// CHECK-ENCODING: [0xe0,0x83,0x0d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 0d 04 <unknown>
+
+urshr    z31.h, p0/m, z31.h, #16
+// CHECK-INST: urshr	z31.h, p0/m, z31.h, #16
+// CHECK-ENCODING: [0x1f,0x82,0x0d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 82 0d 04 <unknown>
+
+urshr    z0.s, p0/m, z0.s, #1
+// CHECK-INST: urshr	z0.s, p0/m, z0.s, #1
+// CHECK-ENCODING: [0xe0,0x83,0x4d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 4d 04 <unknown>
+
+urshr    z31.s, p0/m, z31.s, #32
+// CHECK-INST: urshr	z31.s, p0/m, z31.s, #32
+// CHECK-ENCODING: [0x1f,0x80,0x4d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 4d 04 <unknown>
+
+urshr    z0.d, p0/m, z0.d, #1
+// CHECK-INST: urshr	z0.d, p0/m, z0.d, #1
+// CHECK-ENCODING: [0xe0,0x83,0xcd,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 83 cd 04 <unknown>
+
+urshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: urshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8d 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx	z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+urshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: urshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8d 04 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx	z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+urshr    z31.d, p0/m, z31.d, #64
+// CHECK-INST: urshr	z31.d, p0/m, z31.d, #64
+// CHECK-ENCODING: [0x1f,0x80,0x8d,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 80 8d 04 <unknown>

From b4771425f53c7c7efa911eac0399b4029073e7d0 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Fri, 24 May 2019 09:20:20 +0000
Subject: [PATCH 0147/1176] Use the DataLayout::typeSizeEqualsStoreSize helper.
 NFC

Just a minor refactoring to use the new helper method
DataLayout::typeSizeEqualsStoreSize(). This is done when
checking if getTypeSizeInBits is equal/non-equal to
getTypeStoreSizeInBits.

llvm-svn: 361613
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp                         | 5 ++---
 .../Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp   | 2 +-
 llvm/lib/Transforms/Scalar/SROA.cpp                         | 6 ++----
 llvm/lib/Transforms/Scalar/Scalarizer.cpp                   | 3 +--
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 76f82c27847d0..488cfe6b6c89c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6649,14 +6649,13 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
                                 const TargetLowering &TLI) {
   // Handle simple but common cases only.
   Type *StoreType = SI.getValueOperand()->getType();
-  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+  if (!DL.typeSizeEqualsStoreSize(StoreType) ||
       DL.getTypeSizeInBits(StoreType) == 0)
     return false;
 
   unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
   Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
-  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
-      DL.getTypeSizeInBits(SplitStoreType))
+  if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
     return false;
 
   // Don't split the store if it is volatile.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index dd07561d4646f..0df80e07e840a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -630,7 +630,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   // infinite loop).
   if (!Ty->isIntegerTy() && Ty->isSized() &&
       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
-      DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
+      DL.typeSizeEqualsStoreSize(Ty) &&
       !DL.isNonIntegralPointerType(Ty) &&
       !isMinMaxWithLoads(
           peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 9786614db0af3..790a16d2aff77 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2519,8 +2519,7 @@ class llvm::sroa::AllocaSliceRewriter
              "Only integer type loads and stores are split");
       assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
              "Split load isn't smaller than original load");
-      assert(LI.getType()->getIntegerBitWidth() ==
-                 DL.getTypeStoreSizeInBits(LI.getType()) &&
+      assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
              "Non-byte-multiple bit width");
       // Move the insertion point just past the load so that we can refer to it.
       IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
@@ -2615,8 +2614,7 @@ class llvm::sroa::AllocaSliceRewriter
       assert(!SI.isVolatile());
       assert(V->getType()->isIntegerTy() &&
              "Only integer type loads and stores are split");
-      assert(V->getType()->getIntegerBitWidth() ==
-                 DL.getTypeStoreSizeInBits(V->getType()) &&
+      assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
              "Non-byte-multiple bit width");
       IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
       V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3d554f15af2cf..0bd0fff1aa59b 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -408,8 +408,7 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
 
   // Check that we're dealing with full-byte elements.
   Layout.ElemTy = Layout.VecTy->getElementType();
-  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
-      DL.getTypeStoreSizeInBits(Layout.ElemTy))
+  if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
     return false;
 
   if (Alignment)

From 7f1ff68a165a8a593dd96b4e77691a61a91612c5 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 24 May 2019 09:25:47 +0000
Subject: [PATCH 0148/1176] [ELF] Deleted unused forward declarations. NFC

llvm-svn: 361614
---
 lld/ELF/LinkerScript.h | 1 -
 lld/ELF/SymbolTable.h  | 8 --------
 2 files changed, 9 deletions(-)

diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index e5f464ff1fd5e..fe092b27d6f03 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -31,7 +31,6 @@ namespace elf {
 class Defined;
 class InputSection;
 class InputSectionBase;
-class InputSectionBase;
 class OutputSection;
 class SectionBase;
 class Symbol;
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 8c9a8f8f2e760..6e93e0a144d9a 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -17,14 +17,6 @@
 namespace lld {
 namespace elf {
 
-class CommonSymbol;
-class Defined;
-class LazyArchive;
-class LazyObject;
-class SectionBase;
-class SharedSymbol;
-class Undefined;
-
 // SymbolTable is a bucket of all known symbols, including defined,
 // undefined, or lazy symbols (the last one is symbols in archive
 // files whose archive members are not yet loaded).

From 8bcea9daaa6236b92ccc4453e398ed46d1f32745 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 09:28:27 +0000
Subject: [PATCH 0149/1176] [AArch64][SVE2] Asm: add integer add/sub long/wide
 instructions

Summary:
Patch adds support for the following instructions:

SVE2 integer add/subtract long:
    * SADDLB, SADDLT, UADDLB, UADDLT, SSUBLB, SSUBLT, USUBLB, USUBLT,
      SABDLB, SABDLT, UABDLB, UABDLT

SVE2 integer add/subtract wide:
    * SADDWB, SADDWT, UADDWB, UADDWT, SSUBWB, SSUBWT, USUBWB, USUBWT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62142

llvm-svn: 361615
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 24 +++++++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  6 +++
 .../test/MC/AArch64/SVE2/sabdlb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sabdlb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/sabdlt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sabdlt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/saddlb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/saddlb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/saddlt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/saddlt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/saddwb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/saddwb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/saddwt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/saddwt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/ssublb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ssublb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/ssublt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ssublt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/ssubwb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ssubwb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/ssubwt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ssubwt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uabdlb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uabdlb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uabdlt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uabdlt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uaddlb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uaddlb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uaddlt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uaddlt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uaddwb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uaddwb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/uaddwt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/uaddwt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/usublb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/usublb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/usublt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/usublt.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/usubwb-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/usubwb.s            | 27 +++++++++++++
 .../test/MC/AArch64/SVE2/usubwt-diagnostics.s | 40 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/usubwt.s            | 27 +++++++++++++
 42 files changed, 1370 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabdlb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabdlb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabdlt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabdlt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddwb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddwb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddwt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddwt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubwb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubwb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubwt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubwt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabdlb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabdlb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabdlt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabdlt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddlb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddlb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddlt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddlt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddwb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddwb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddwt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaddwt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usublb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usublb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usublt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usublt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usubwb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usubwb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usubwt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usubwt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 9d775ec8d5f7f..dd14e4ba815e4 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1164,6 +1164,30 @@ let Predicates = [HasSVE2] in {
   defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
   defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
 
+  // SVE2 integer add/subtract long
+  defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
+  defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
+  defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">;
+  defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">;
+  defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">;
+  defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">;
+  defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">;
+  defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">;
+  defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">;
+  defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">;
+  defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">;
+  defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">;
+
+  // SVE2 integer add/subtract wide
+  defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">;
+  defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">;
+  defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">;
+  defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">;
+  defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">;
+  defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">;
+  defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">;
+  defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">;
+
   // SVE2 integer multiply long
   defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
   defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8b4c00935e524..b741f09886b55 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2180,6 +2180,12 @@ multiclass sve2_wide_int_arith_long<bits<5> opc, string asm> {
   def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>;
 }
 
+multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm> {
+  def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>;
+  def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>;
+  def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Arithmetic - Unary Predicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/sabdlb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sabdlb-diagnostics.s
new file mode 100644
index 0000000000000..2e7d0f5388bfb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabdlb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sabdlb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sabdlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sabdlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sabdlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sabdlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sabdlb.s b/llvm/test/MC/AArch64/SVE2/sabdlb.s
new file mode 100644
index 0000000000000..af2b358cea737
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabdlb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sabdlb z0.h, z1.b, z2.b
+// CHECK-INST: sabdlb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x30,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 30 42 45 <unknown>
+
+sabdlb z29.s, z30.h, z31.h
+// CHECK-INST: sabdlb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x33,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 33 9f 45 <unknown>
+
+sabdlb z31.d, z31.s, z31.s
+// CHECK-INST: sabdlb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x33,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 33 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sabdlt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sabdlt-diagnostics.s
new file mode 100644
index 0000000000000..b08ff54aec3f7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabdlt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sabdlt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabdlt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabdlt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+sabdlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sabdlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+sabdlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sabdlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sabdlt.s b/llvm/test/MC/AArch64/SVE2/sabdlt.s
new file mode 100644
index 0000000000000..f8819b5a388ae
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabdlt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sabdlt z0.h, z1.b, z2.b
+// CHECK-INST: sabdlt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x34,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 34 42 45 <unknown>
+
+sabdlt z29.s, z30.h, z31.h
+// CHECK-INST: sabdlt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x37,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 37 9f 45 <unknown>
+
+sabdlt z31.d, z31.s, z31.s
+// CHECK-INST: sabdlt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x37,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 37 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saddlb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saddlb-diagnostics.s
new file mode 100644
index 0000000000000..46d9bc408f18f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saddlb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+saddlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+saddlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saddlb.s b/llvm/test/MC/AArch64/SVE2/saddlb.s
new file mode 100644
index 0000000000000..d592ae8ec6a79
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+saddlb z0.h, z1.b, z2.b
+// CHECK-INST: saddlb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x00,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 00 42 45 <unknown>
+
+saddlb z29.s, z30.h, z31.h
+// CHECK-INST: saddlb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x03,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 03 9f 45 <unknown>
+
+saddlb z31.d, z31.s, z31.s
+// CHECK-INST: saddlb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x03,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 03 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saddlt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saddlt-diagnostics.s
new file mode 100644
index 0000000000000..437a146576452
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saddlt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+saddlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+saddlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saddlt.s b/llvm/test/MC/AArch64/SVE2/saddlt.s
new file mode 100644
index 0000000000000..047bc4fa2ba37
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+saddlt z0.h, z1.b, z2.b
+// CHECK-INST: saddlt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x04,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 04 42 45 <unknown>
+
+saddlt z29.s, z30.h, z31.h
+// CHECK-INST: saddlt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x07,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 07 9f 45 <unknown>
+
+saddlt z31.d, z31.s, z31.s
+// CHECK-INST: saddlt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x07,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 07 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saddwb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saddwb-diagnostics.s
new file mode 100644
index 0000000000000..b308fdd1042ec
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddwb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saddwb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+saddwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+saddwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saddwb.s b/llvm/test/MC/AArch64/SVE2/saddwb.s
new file mode 100644
index 0000000000000..2b9213de26070
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddwb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+saddwb z0.h, z1.h, z2.b
+// CHECK-INST: saddwb z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x40,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 40 42 45 <unknown>
+
+saddwb z29.s, z30.s, z31.h
+// CHECK-INST: saddwb z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x43,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 43 9f 45 <unknown>
+
+saddwb z31.d, z31.d, z31.s
+// CHECK-INST: saddwb z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x43,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 43 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saddwt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saddwt-diagnostics.s
new file mode 100644
index 0000000000000..133830d39bca4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddwt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saddwt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddwt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddwt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+saddwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+saddwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saddwt.s b/llvm/test/MC/AArch64/SVE2/saddwt.s
new file mode 100644
index 0000000000000..7521eb6f0133e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddwt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+saddwt z0.h, z1.h, z2.b
+// CHECK-INST: saddwt z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x44,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 44 42 45 <unknown>
+
+saddwt z29.s, z30.s, z31.h
+// CHECK-INST: saddwt z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x47,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 47 9f 45 <unknown>
+
+saddwt z31.d, z31.d, z31.s
+// CHECK-INST: saddwt z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x47,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 47 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssublb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssublb-diagnostics.s
new file mode 100644
index 0000000000000..d608194a9124a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssublb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+ssublb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ssublb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssublb.s b/llvm/test/MC/AArch64/SVE2/ssublb.s
new file mode 100644
index 0000000000000..3ccc3abf52dc1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssublb z0.h, z1.b, z2.b
+// CHECK-INST: ssublb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x10,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 10 42 45 <unknown>
+
+ssublb z29.s, z30.h, z31.h
+// CHECK-INST: ssublb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x13,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 13 9f 45 <unknown>
+
+ssublb z31.d, z31.s, z31.s
+// CHECK-INST: ssublb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x13,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 13 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssublt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssublt-diagnostics.s
new file mode 100644
index 0000000000000..5c7d7b952da89
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssublt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+ssublt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ssublt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssublt.s b/llvm/test/MC/AArch64/SVE2/ssublt.s
new file mode 100644
index 0000000000000..ffd7d54cab975
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssublt z0.h, z1.b, z2.b
+// CHECK-INST: ssublt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x14,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 14 42 45 <unknown>
+
+ssublt z29.s, z30.h, z31.h
+// CHECK-INST: ssublt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x17,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 17 9f 45 <unknown>
+
+ssublt z31.d, z31.s, z31.s
+// CHECK-INST: ssublt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x17,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 17 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssubwb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssubwb-diagnostics.s
new file mode 100644
index 0000000000000..94679b2d6db7f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubwb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssubwb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+ssubwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ssubwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssubwb.s b/llvm/test/MC/AArch64/SVE2/ssubwb.s
new file mode 100644
index 0000000000000..be3a41e91322e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubwb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssubwb z0.h, z1.h, z2.b
+// CHECK-INST: ssubwb z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x50,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 50 42 45 <unknown>
+
+ssubwb z29.s, z30.s, z31.h
+// CHECK-INST: ssubwb z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x53,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 53 9f 45 <unknown>
+
+ssubwb z31.d, z31.d, z31.s
+// CHECK-INST: ssubwb z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x53,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 53 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssubwt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssubwt-diagnostics.s
new file mode 100644
index 0000000000000..e1ba02da627c0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubwt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssubwt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubwt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubwt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+ssubwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+ssubwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssubwt.s b/llvm/test/MC/AArch64/SVE2/ssubwt.s
new file mode 100644
index 0000000000000..1894548a6d3f9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubwt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssubwt z0.h, z1.h, z2.b
+// CHECK-INST: ssubwt z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x54,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 54 42 45 <unknown>
+
+ssubwt z29.s, z30.s, z31.h
+// CHECK-INST: ssubwt z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x57,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 57 9f 45 <unknown>
+
+ssubwt z31.d, z31.d, z31.s
+// CHECK-INST: ssubwt z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x57,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 57 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uabdlb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uabdlb-diagnostics.s
new file mode 100644
index 0000000000000..a24e3b7c617d0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabdlb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uabdlb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uabdlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uabdlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uabdlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uabdlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uabdlb.s b/llvm/test/MC/AArch64/SVE2/uabdlb.s
new file mode 100644
index 0000000000000..7782fdb9fec70
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabdlb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uabdlb z0.h, z1.b, z2.b
+// CHECK-INST: uabdlb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x38,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 38 42 45 <unknown>
+
+uabdlb z29.s, z30.h, z31.h
+// CHECK-INST: uabdlb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x3b,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 3b 9f 45 <unknown>
+
+uabdlb z31.d, z31.s, z31.s
+// CHECK-INST: uabdlb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x3b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uabdlt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uabdlt-diagnostics.s
new file mode 100644
index 0000000000000..ed63851a2e92b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabdlt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uabdlt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabdlt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabdlt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uabdlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uabdlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uabdlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uabdlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uabdlt.s b/llvm/test/MC/AArch64/SVE2/uabdlt.s
new file mode 100644
index 0000000000000..118c286c18739
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabdlt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uabdlt z0.h, z1.b, z2.b
+// CHECK-INST: uabdlt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x3c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 3c 42 45 <unknown>
+
+uabdlt z29.s, z30.h, z31.h
+// CHECK-INST: uabdlt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x3f,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 3f 9f 45 <unknown>
+
+uabdlt z31.d, z31.s, z31.s
+// CHECK-INST: uabdlt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x3f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uaddlb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uaddlb-diagnostics.s
new file mode 100644
index 0000000000000..0272eb29a1de8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddlb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uaddlb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uaddlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uaddlb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddlb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uaddlb.s b/llvm/test/MC/AArch64/SVE2/uaddlb.s
new file mode 100644
index 0000000000000..6804540fdf0d7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddlb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uaddlb z0.h, z1.b, z2.b
+// CHECK-INST: uaddlb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x08,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 08 42 45 <unknown>
+
+uaddlb z29.s, z30.h, z31.h
+// CHECK-INST: uaddlb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x0b,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 0b 9f 45 <unknown>
+
+uaddlb z31.d, z31.s, z31.s
+// CHECK-INST: uaddlb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x0b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uaddlt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uaddlt-diagnostics.s
new file mode 100644
index 0000000000000..84f68dd2c313e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddlt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uaddlt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddlt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddlt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uaddlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uaddlt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddlt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uaddlt.s b/llvm/test/MC/AArch64/SVE2/uaddlt.s
new file mode 100644
index 0000000000000..366f2c42ed4d6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddlt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uaddlt z0.h, z1.b, z2.b
+// CHECK-INST: uaddlt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x0c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 0c 42 45 <unknown>
+
+uaddlt z29.s, z30.h, z31.h
+// CHECK-INST: uaddlt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x0f,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 0f 9f 45 <unknown>
+
+uaddlt z31.d, z31.s, z31.s
+// CHECK-INST: uaddlt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x0f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0f df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uaddwb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uaddwb-diagnostics.s
new file mode 100644
index 0000000000000..f7f3084ce2d31
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddwb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uaddwb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uaddwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uaddwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uaddwb.s b/llvm/test/MC/AArch64/SVE2/uaddwb.s
new file mode 100644
index 0000000000000..d07d779b1c27c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddwb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uaddwb z0.h, z1.h, z2.b
+// CHECK-INST: uaddwb z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x48,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 48 42 45 <unknown>
+
+uaddwb z29.s, z30.s, z31.h
+// CHECK-INST: uaddwb z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x4b,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 4b 9f 45 <unknown>
+
+uaddwb z31.d, z31.d, z31.s
+// CHECK-INST: uaddwb z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x4b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 4b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uaddwt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uaddwt-diagnostics.s
new file mode 100644
index 0000000000000..b916ec6014f96
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddwt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uaddwt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uaddwt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaddwt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+uaddwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+uaddwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uaddwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uaddwt.s b/llvm/test/MC/AArch64/SVE2/uaddwt.s
new file mode 100644
index 0000000000000..6520bd449187f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaddwt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uaddwt z0.h, z1.h, z2.b
+// CHECK-INST: uaddwt z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x4c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 4c 42 45 <unknown>
+
+uaddwt z29.s, z30.s, z31.h
+// CHECK-INST: uaddwt z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x4f,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 4f 9f 45 <unknown>
+
+uaddwt z31.d, z31.d, z31.s
+// CHECK-INST: uaddwt z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x4f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 4f df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usublb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usublb-diagnostics.s
new file mode 100644
index 0000000000000..074235809097a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usublb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+usublb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+usublb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usublb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+usublb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usublb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usublb.s b/llvm/test/MC/AArch64/SVE2/usublb.s
new file mode 100644
index 0000000000000..a985e38af659d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usublb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+usublb z0.h, z1.b, z2.b
+// CHECK-INST: usublb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x18,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 18 42 45 <unknown>
+
+usublb z29.s, z30.h, z31.h
+// CHECK-INST: usublb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x1b,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 1b 9f 45 <unknown>
+
+usublb z31.d, z31.s, z31.s
+// CHECK-INST: usublb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x1b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usublt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usublt-diagnostics.s
new file mode 100644
index 0000000000000..2b7f444c10d9d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usublt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+usublt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usublt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usublt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+usublt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usublt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+usublt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usublt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usublt.s b/llvm/test/MC/AArch64/SVE2/usublt.s
new file mode 100644
index 0000000000000..9a7f23d6a5427
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usublt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+usublt z0.h, z1.b, z2.b
+// CHECK-INST: usublt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x1c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 1c 42 45 <unknown>
+
+usublt z29.s, z30.h, z31.h
+// CHECK-INST: usublt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x1f,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 1f 9f 45 <unknown>
+
+usublt z31.d, z31.s, z31.s
+// CHECK-INST: usublt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x1f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1f df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usubwb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usubwb-diagnostics.s
new file mode 100644
index 0000000000000..61047157ca65f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usubwb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+usubwb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+usubwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usubwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+usubwb z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usubwb z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usubwb.s b/llvm/test/MC/AArch64/SVE2/usubwb.s
new file mode 100644
index 0000000000000..d1ebf20c725ce
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usubwb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+usubwb z0.h, z1.h, z2.b
+// CHECK-INST: usubwb z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x58,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 58 42 45 <unknown>
+
+usubwb z29.s, z30.s, z31.h
+// CHECK-INST: usubwb z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x5b,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 5b 9f 45 <unknown>
+
+usubwb z31.d, z31.d, z31.s
+// CHECK-INST: usubwb z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x5b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 5b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usubwt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usubwt-diagnostics.s
new file mode 100644
index 0000000000000..69f4c1865b31f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usubwt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+usubwt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usubwt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usubwt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+usubwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usubwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+usubwt z0.d, z1.d, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: usubwt z0.d, z1.d, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usubwt.s b/llvm/test/MC/AArch64/SVE2/usubwt.s
new file mode 100644
index 0000000000000..2b3874ac89531
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usubwt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+usubwt z0.h, z1.h, z2.b
+// CHECK-INST: usubwt z0.h, z1.h, z2.b
+// CHECK-ENCODING: [0x20,0x5c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 5c 42 45 <unknown>
+
+usubwt z29.s, z30.s, z31.h
+// CHECK-INST: usubwt z29.s, z30.s, z31.h
+// CHECK-ENCODING: [0xdd,0x5f,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 5f 9f 45 <unknown>
+
+usubwt z31.d, z31.d, z31.s
+// CHECK-INST: usubwt z31.d, z31.d, z31.s
+// CHECK-ENCODING: [0xff,0x5f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 5f df 45 <unknown>

From c472f7b010de533cb33a21fad603a0de9cec65c1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 24 May 2019 09:31:32 +0000
Subject: [PATCH 0150/1176] Fix sphinx "Malformed option description" warning

llvm-svn: 361616
---
 .../checks/cppcoreguidelines-pro-type-member-init.rst           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
index 5ac0465e5462f..6df091b733e94 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-type-member-init.rst
@@ -35,7 +35,7 @@ Options
 
 .. option:: UseAssignment
    If set to non-zero, the check will provide fix-its with literal initializers
-   (``int i = 0;``) instead of curly braces (``int i{};``).
+   \( ``int i = 0;`` \) instead of curly braces \( ``int i{};`` \).
 
 This rule is part of the "Type safety" profile of the C++ Core
 Guidelines, corresponding to rule Type.6. See

From 7d4a67852da7ecce3ae43b61baa9f0cd068b05c6 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 24 May 2019 09:53:25 +0000
Subject: [PATCH 0151/1176] [ELF] Fix a doc typo. NFC

llvm-svn: 361617
---
 lld/ELF/MarkLive.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index d040a214aa6cf..5132bb4b72b9b 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -287,7 +287,7 @@ template <class ELFT> void elf::markLive() {
   // unconditionally make non-SHF_ALLOC sections alive except
   // SHF_LINK_ORDER and SHT_REL/SHT_RELA sections.
   //
-  // Usually, SHF_ALLOC sections are not removed even if they are
+  // Usually, non-SHF_ALLOC sections are not removed even if they are
   // unreachable through relocations because reachability is not
   // a good signal whether they are garbage or not (e.g. there is
   // usually no section referring to a .comment section, but we

From 1a0312ca0b20d16edb859065bbace75f6701c92e Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Fri, 24 May 2019 09:54:39 +0000
Subject: [PATCH 0152/1176] [FuncUnwinders] Use "symbol file" unwind plans for
 unwinding

Summary:
Previous patch (r360409) introduced the "symbol file unwind plan"
concept, but that plan wasn't used for unwinding yet. With this patch,
we start to consider the new plan as a possible strategy for both
synchronous and asynchronous unwinding. I also add a test that asserts
that unwinding via breakpad STACK CFI info works end-to-end.

Reviewers: jasonmolenda, clayborg

Subscribers: lldb-commits, amccarth, markmentovai

Differential Revision: https://reviews.llvm.org/D61853

llvm-svn: 361618
---
 lldb/include/lldb/Symbol/FuncUnwinders.h      |  2 +-
 .../Breakpad/Inputs/unwind-via-stack-cfi.syms |  6 +++
 .../Breakpad/Inputs/unwind-via-stack-cfi.yaml | 43 +++++++++++++++++++
 .../Breakpad/stack-cfi-parsing.test           |  4 ++
 .../Breakpad/unwind-via-stack-cfi.test        | 20 +++++++++
 lldb/source/Commands/CommandObjectTarget.cpp  |  2 +-
 .../Process/Utility/RegisterContextLLDB.cpp   | 14 +++---
 lldb/source/Symbol/FuncUnwinders.cpp          |  7 ++-
 8 files changed, 89 insertions(+), 9 deletions(-)
 create mode 100644 lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.syms
 create mode 100644 lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.yaml
 create mode 100644 lldb/lit/SymbolFile/Breakpad/unwind-via-stack-cfi.test

diff --git a/lldb/include/lldb/Symbol/FuncUnwinders.h b/lldb/include/lldb/Symbol/FuncUnwinders.h
index 1872e77fb82ba..cc767d4e1e82f 100644
--- a/lldb/include/lldb/Symbol/FuncUnwinders.h
+++ b/lldb/include/lldb/Symbol/FuncUnwinders.h
@@ -35,7 +35,7 @@ class FuncUnwinders {
 
   ~FuncUnwinders();
 
-  lldb::UnwindPlanSP GetUnwindPlanAtCallSite(Target &target);
+  lldb::UnwindPlanSP GetUnwindPlanAtCallSite(Target &target, Thread &thread);
 
   lldb::UnwindPlanSP GetUnwindPlanAtNonCallSite(Target &target,
                                                 lldb_private::Thread &thread);
diff --git a/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.syms b/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.syms
new file mode 100644
index 0000000000000..d4c8287e68a29
--- /dev/null
+++ b/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.syms
@@ -0,0 +1,6 @@
+MODULE Linux x86_64 603FCF6CA7FF4BCC86AE8FF44DB2576A0 linux-x86_64_not_crashed
+INFO CODE_ID 6CCF3F60FFA7CC4B86AE8FF44DB2576A68983611
+PUBLIC 420 0 bar
+PUBLIC 450 0 foo
+PUBLIC 480 0 _start
+STACK CFI INIT 420 29 .cfa: $rbp ^ .ra: .cfa 8 + ^ $rsp: .cfa 16 + $rbp: .cfa ^
diff --git a/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.yaml b/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.yaml
new file mode 100644
index 0000000000000..0672f9e612360
--- /dev/null
+++ b/lldb/lit/SymbolFile/Breakpad/Inputs/unwind-via-stack-cfi.yaml
@@ -0,0 +1,43 @@
+--- !minidump
+Streams:         
+  - Type:            ThreadList
+    Threads:         
+      - Thread Id:       0x000074F3
+        Context:         0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000B001000000000006CAE000000006B7FC05A0000C81D415A0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000A2BF9E5A6B7F0000000000000000000000000000000000008850C14BFD7F00009850C14BFD7F00000100000000000000B04AC14BFD7F0000000000000000000060812D01000000000800000000000000B065E05A6B7F00008004400000000000E050C14BFD7F00000000000000000000000000000000000030044000000000007F03FFFF0000FFFFFFFFFFFF000000000000000000000000801F00006B7F00000400000000000000B84CC14BFD7F0000304D405A6B7F0000C84DC14BFD7F0000C0AA405A6B7F00004F033D0000000000B84DC14BFD7F0000E84DC14BFD7F0000000000000000000000000000000000000070E05A6B7F000078629E5A6B7F0000C81D415A6B7F0000804F9E5A6B7F00000000000001000000E603000001000000E093115A6B7F0000804EC14BFD7F0000584EC14BFD7F000099ADC05A6B7F00000100000000000000AAAAD77D0000000002000000000000000800000000000000B065E05A6B7F0000E6B7C05A6B7F0000010000006B7F0000884DC14BFD7F0000106F7C5A6B7F0000984EC14BFD7F0000488B7C5A6B7F0000C4A71CB90000000001000000000000000800000000000000B065E05A6B7F000048B6C05A6B7F0000702AE25A6B7F0000D84DC14BFD7F000030489E5A6B7F0000E84EC14BFD7F0000E05E9E5A6B7F00000991F0460000000001000000000000000800000000000000B065E05A6B7F000048B6C05A6B7F00000100000000000000284EC14BFD7F00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+        Stack:           
+          Start of Memory Range: 0x00007FFD4BC15080
+          Content:         30044000000000000000000000000000FFFFFFFF03000000B850C14BFD7F0000670440000000000000000000000000000000000001000000D850C14BFD7F0000970440000000000000000000000000000000000000000000000000000000000001000000000000009F67C14BFD7F00000000000000000000BA67C14BFD7F0000
+  - Type:            ModuleList
+    Modules:         
+      - Base of Image:   0x0000000000400000
+        Size of Image:   0x00001000
+        Module Name:     '/tmp/unwind-via-stack-cfi'
+        CodeView Record: 4C4570426CCF3F60FFA7CC4B86AE8FF44DB2576A68983611
+  - Type:            MemoryList
+    Memory Ranges:   
+      - Start of Memory Range: 0x00007FFD4BC15080
+        Content:         30044000000000000000000000000000FFFFFFFF03000000B850C14BFD7F0000670440000000000000000000000000000000000001000000D850C14BFD7F0000970440000000000000000000000000000000000000000000000000000000000001000000000000009F67C14BFD7F00000000000000000000BA67C14BFD7F0000
+  - Type:            SystemInfo
+    Processor Arch:  AMD64
+    Processor Level: 6
+    Processor Revision: 15876
+    Number of Processors: 40
+    Platform ID:     Linux
+    CSD Version:     'Linux 3.13.0-91-generic'
+    CPU:             
+      Vendor ID:       GenuineIntel
+      Version Info:    0x00000000
+      Feature Info:    0x00000000
+  - Type:            LinuxProcStatus
+    Text:             |
+      Name:	unwind-via-stack-cfi
+      State:	t (tracing stop)
+      Tgid:	29939
+      Ngid:	0
+      Pid:	29939
+      PPid:	29370
+      TracerPid:	29940
+      Uid:	1001	1001	1001	1001
+      Gid:	1001	1001	1001	1001
+
+...
diff --git a/lldb/lit/SymbolFile/Breakpad/stack-cfi-parsing.test b/lldb/lit/SymbolFile/Breakpad/stack-cfi-parsing.test
index c8d9694172c75..ffb64602c15e5 100644
--- a/lldb/lit/SymbolFile/Breakpad/stack-cfi-parsing.test
+++ b/lldb/lit/SymbolFile/Breakpad/stack-cfi-parsing.test
@@ -4,6 +4,8 @@
 
 image show-unwind -n func0
 # CHECK-LABEL: image show-unwind -n func0
+# CHECK: Asynchronous (not restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
+# CHECK: Synchronous (restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
 # CHECK:      Symbol file UnwindPlan:
 # CHECK-NEXT: This UnwindPlan originally sourced from breakpad STACK CFI
 # CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes.
@@ -42,6 +44,8 @@ image show-unwind -n func7
 # Finally, try an unwind plan with just a single row
 image show-unwind -n func9
 # CHECK-LABEL: image show-unwind -n func9
+# CHECK: Asynchronous (not restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
+# CHECK: Synchronous (restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
 # CHECK: Symbol file UnwindPlan:
 # CHECK: Address range of this UnwindPlan: [stack-cfi-parsing.out..module_image + 9-0x000000000000000a)
 # CHECK: row[0]:    0: CFA=DW_OP_breg6 +0 => rip=DW_OP_breg0 +0
diff --git a/lldb/lit/SymbolFile/Breakpad/unwind-via-stack-cfi.test b/lldb/lit/SymbolFile/Breakpad/unwind-via-stack-cfi.test
new file mode 100644
index 0000000000000..9a6c2166c1300
--- /dev/null
+++ b/lldb/lit/SymbolFile/Breakpad/unwind-via-stack-cfi.test
@@ -0,0 +1,20 @@
+# RUN: yaml2obj %S/Inputs/unwind-via-stack-cfi.yaml > %t
+# RUN: %lldb -c %t -o "target symbols add %S/Inputs/unwind-via-stack-cfi.syms" \
+# RUN:   -s %s -b | FileCheck %s
+
+image show-unwind -n bar
+# CHECK-LABEL: image show-unwind -n bar
+# CHECK: Asynchronous (not restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
+# CHECK: Synchronous (restricted to call-sites) UnwindPlan is 'breakpad STACK CFI'
+# CHECK:      Symbol file UnwindPlan:
+# CHECK-NEXT: This UnwindPlan originally sourced from breakpad STACK CFI
+# CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes.
+# CHECK-NEXT: This UnwindPlan is valid at all instruction locations: no.
+# CHECK-NEXT: Address range of this UnwindPlan: [unwind-via-stack-cfi..module_image + 1056-0x0000000000000449)
+# CHECK-NEXT: row[0]: 0: CFA=DW_OP_breg6 +0, DW_OP_deref => rbp=DW_OP_pick 0x00, DW_OP_deref rsp=DW_OP_pick 0x00, DW_OP_consts +16, DW_OP_plus rip=DW_OP_pick 0x00, DW_OP_consts +8, DW_OP_plus , DW_OP_deref
+
+thread backtrace
+# CHECK-LABEL: thread backtrace
+# CHECK: frame #0: 0x0000000000400430 unwind-via-stack-cfi`bar + 16
+# CHECK: frame #1: 0x0000000000400497 unwind-via-stack-cfi`_start + 23
+# CHECK-NOT: frame
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index c1f5ff0d283ca..764461ee92b84 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -3521,7 +3521,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed {
             non_callsite_unwind_plan->GetSourceName().AsCString());
       }
       UnwindPlanSP callsite_unwind_plan =
-          func_unwinders_sp->GetUnwindPlanAtCallSite(*target);
+          func_unwinders_sp->GetUnwindPlanAtCallSite(*target, *thread);
       if (callsite_unwind_plan) {
         result.GetOutputStream().Printf(
             "Synchronous (restricted to call-sites) UnwindPlan is '%s'\n",
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
index 4ec8bba098148..76646d8897d19 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextLLDB.cpp
@@ -244,8 +244,8 @@ void RegisterContextLLDB::InitializeZerothFrame() {
     }
 
     if (func_unwinders_sp.get() != nullptr)
-      call_site_unwind_plan =
-          func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget());
+      call_site_unwind_plan = func_unwinders_sp->GetUnwindPlanAtCallSite(
+          process->GetTarget(), m_thread);
 
     if (call_site_unwind_plan.get() != nullptr) {
       m_fallback_unwind_plan_sp = call_site_unwind_plan;
@@ -873,7 +873,8 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
         // location what helps in the most common cases when the instruction
         // emulation fails.
         UnwindPlanSP call_site_unwind_plan =
-            func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget());
+            func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget(),
+                                                       m_thread);
         if (call_site_unwind_plan &&
             call_site_unwind_plan.get() != unwind_plan_sp.get() &&
             call_site_unwind_plan->GetSourceName() !=
@@ -909,8 +910,8 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
   // Typically this is unwind info from an eh_frame section intended for
   // exception handling; only valid at call sites
   if (process) {
-    unwind_plan_sp =
-        func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget());
+    unwind_plan_sp = func_unwinders_sp->GetUnwindPlanAtCallSite(
+        process->GetTarget(), m_thread);
   }
   int valid_offset = -1;
   if (IsUnwindPlanValidForCurrentPC(unwind_plan_sp, valid_offset)) {
@@ -940,7 +941,8 @@ UnwindPlanSP RegisterContextLLDB::GetFullUnwindPlanForFrame() {
     // code it is often written in a way that it valid at all location what
     // helps in the most common cases when the instruction emulation fails.
     UnwindPlanSP call_site_unwind_plan =
-        func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget());
+        func_unwinders_sp->GetUnwindPlanAtCallSite(process->GetTarget(),
+                                                   m_thread);
     if (call_site_unwind_plan &&
         call_site_unwind_plan.get() != unwind_plan_sp.get() &&
         call_site_unwind_plan->GetSourceName() !=
diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp
index bbdb5e1bbf79c..33b2e29598985 100644
--- a/lldb/source/Symbol/FuncUnwinders.cpp
+++ b/lldb/source/Symbol/FuncUnwinders.cpp
@@ -54,9 +54,12 @@ FuncUnwinders::FuncUnwinders(UnwindTable &unwind_table, AddressRange range)
 
 FuncUnwinders::~FuncUnwinders() {}
 
-UnwindPlanSP FuncUnwinders::GetUnwindPlanAtCallSite(Target &target) {
+UnwindPlanSP FuncUnwinders::GetUnwindPlanAtCallSite(Target &target,
+                                                    Thread &thread) {
   std::lock_guard<std::recursive_mutex> guard(m_mutex);
 
+  if (UnwindPlanSP plan_sp = GetSymbolFileUnwindPlan(thread))
+    return plan_sp;
   if (UnwindPlanSP plan_sp = GetEHFrameUnwindPlan(target))
     return plan_sp;
   if (UnwindPlanSP plan_sp = GetDebugFrameUnwindPlan(target))
@@ -357,6 +360,8 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtNonCallSite(Target &target,
     return eh_frame_sp;
   }
 
+  if (UnwindPlanSP plan_sp = GetSymbolFileUnwindPlan(thread))
+    return plan_sp;
   if (UnwindPlanSP plan_sp = GetEHFrameAugmentedUnwindPlan(target, thread))
     return plan_sp;
   if (UnwindPlanSP plan_sp = GetDebugFrameAugmentedUnwindPlan(target, thread))

From 980f7605156e76fe2310fccca1581e5860c1512e Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 09:56:23 +0000
Subject: [PATCH 0153/1176] [AArch64][SVE2] Asm: add PMULLB/PMULLT instructions

Summary:
This patch adds support for the polynomial multiplication instructions
PMULLB/PMULLT. The 64-bit source and 128-bit destination element
variants are enabled with crypto extensions (+sve2-aes), similar to the
NEON PMULL2 instruction. All other variants are enabled with +sve2.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62145

llvm-svn: 361619
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  5 +++
 .../MC/AArch64/SVE2/pmullb-128-diagnostics.s  | 25 +++++++++++
 llvm/test/MC/AArch64/SVE2/pmullb-128.s        | 15 +++++++
 .../test/MC/AArch64/SVE2/pmullb-diagnostics.s | 45 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/pmullb.s            | 21 +++++++++
 .../MC/AArch64/SVE2/pmullt-128-diagnostics.s  | 25 +++++++++++
 llvm/test/MC/AArch64/SVE2/pmullt-128.s        | 15 +++++++
 .../test/MC/AArch64/SVE2/pmullt-diagnostics.s | 45 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/pmullt.s            | 21 +++++++++
 10 files changed, 229 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullb-128-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullb-128.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullt-128-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullt-128.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/pmullt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index dd14e4ba815e4..18f874fccb66c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1195,6 +1195,8 @@ let Predicates = [HasSVE2] in {
   defm SMULLT_ZZZ   : sve2_wide_int_arith_long<0b11101, "smullt">;
   defm UMULLB_ZZZ   : sve2_wide_int_arith_long<0b11110, "umullb">;
   defm UMULLT_ZZZ   : sve2_wide_int_arith_long<0b11111, "umullt">;
+  defm PMULLB_ZZZ   : sve2_pmul_long<0b0, "pmullb">;
+  defm PMULLT_ZZZ   : sve2_pmul_long<0b1, "pmullt">;
 
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
@@ -1203,3 +1205,13 @@ let Predicates = [HasSVE2] in {
   defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
   defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
 }
+
+let Predicates = [HasSVE2AES] in {
+  // PMULLB and PMULLT instructions which operate with 64-bit source and
+  // 128-bit destination elements are enabled with crypto extensions, similar
+  // to NEON PMULL2 instruction.
+  def PMULLB_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11010, "pmullb",
+                                         ZPR128, ZPR64, ZPR64>;
+  def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
+                                         ZPR128, ZPR64, ZPR64>;
+}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index b741f09886b55..333fa72500cf4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2186,6 +2186,11 @@ multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm> {
   def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>;
 }
 
+multiclass sve2_pmul_long<bits<1> opc, string asm> {
+  def _H : sve2_wide_int_arith<0b01, {0b1101, opc}, asm, ZPR16, ZPR8, ZPR8>;
+  def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Arithmetic - Unary Predicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/pmullb-128-diagnostics.s b/llvm/test/MC/AArch64/SVE2/pmullb-128-diagnostics.s
new file mode 100644
index 0000000000000..001b40c69fc30
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullb-128-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+pmullb z0.q, z0.q, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullb z0.q, z0.q, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+pmullb z0.q, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullb z0.q, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+pmullb z0.q, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullb z0.q, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/pmullb-128.s b/llvm/test/MC/AArch64/SVE2/pmullb-128.s
new file mode 100644
index 0000000000000..021874e9e69a0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullb-128.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+pmullb z29.q, z30.d, z31.d
+// CHECK-INST: pmullb z29.q, z30.d, z31.d
+// CHECK-ENCODING: [0xdd,0x6b,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: dd 6b 1f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/pmullb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/pmullb-diagnostics.s
new file mode 100644
index 0000000000000..e4710e1f96f01
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullb-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+pmullb z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullb z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullb z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullb z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullb z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullb z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullb z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullb z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullb z0.q, z0.q, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmullb z0.q, z0.q, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+pmullb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+pmullb z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullb z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/pmullb.s b/llvm/test/MC/AArch64/SVE2/pmullb.s
new file mode 100644
index 0000000000000..5a9989ab3837e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullb.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+pmullb z0.h, z1.b, z2.b
+// CHECK-INST: pmullb z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x68,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 68 42 45 <unknown>
+
+pmullb z31.d, z31.s, z31.s
+// CHECK-INST: pmullb z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x6b,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 6b df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/pmullt-128-diagnostics.s b/llvm/test/MC/AArch64/SVE2/pmullt-128-diagnostics.s
new file mode 100644
index 0000000000000..301824e868172
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullt-128-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+pmullt z0.q, z0.q, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullt z0.q, z0.q, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+pmullt z0.q, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullt z0.q, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+pmullt z0.q, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullt z0.q, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/pmullt-128.s b/llvm/test/MC/AArch64/SVE2/pmullt-128.s
new file mode 100644
index 0000000000000..5b454b01f33b0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullt-128.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+pmullt z29.q, z30.d, z31.d
+// CHECK-INST: pmullt z29.q, z30.d, z31.d
+// CHECK-ENCODING: [0xdd,0x6f,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: dd 6f 1f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/pmullt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/pmullt-diagnostics.s
new file mode 100644
index 0000000000000..f068bd3bac1a5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullt-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+pmullt z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullt z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullt z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullt z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullt z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullt z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullt z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmullt z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmullt z0.q, z0.q, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmullt z0.q, z0.q, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+pmullt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+pmullt z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: pmullt z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/pmullt.s b/llvm/test/MC/AArch64/SVE2/pmullt.s
new file mode 100644
index 0000000000000..9493e2a1a8248
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/pmullt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+pmullt z0.h, z1.b, z2.b
+// CHECK-INST: pmullt z0.h, z1.b, z2.b
+// CHECK-ENCODING: [0x20,0x6c,0x42,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 6c 42 45 <unknown>
+
+pmullt z31.d, z31.s, z31.s
+// CHECK-INST: pmullt z31.d, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x6f,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 6f df 45 <unknown>

From 95b8d9bbf852428fc738bb93c78ef7b00f39341f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 24 May 2019 10:03:11 +0000
Subject: [PATCH 0154/1176] [SelectionDAG] computeKnownBits - support constant
 pool values from target

This patch adds the overridable TargetLowering::getTargetConstantFromLoad function which allows targets to return any constant value loaded by a LoadSDNode node - only X86 makes use of this so far but everything should be in place for other targets.

computeKnownBits then uses this function to improve codegen, notably vector code after legalization.

A future commit will do the same for ComputeNumSignBits but computeKnownBits sees the bigger benefit.

This required a couple of fixes:
* SimplifyDemandedBits must early-out for getTargetConstantFromLoad cases to prevent infinite loops of constant regeneration (similar to what we already do for BUILD_VECTOR).
* Fix a DAGCombiner::visitTRUNCATE issue as we had trunc(shl(v8i32),v8i16) <-> shl(trunc(v8i16),v8i32) infinite loops after legalization on AVX512 targets.

Differential Revision: https://reviews.llvm.org/D61887

llvm-svn: 361620
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |    4 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |    2 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   55 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   12 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   16 +-
 llvm/lib/Target/X86/X86ISelLowering.h         |    2 +
 llvm/test/CodeGen/X86/avx512-vec-cmp.ll       |    5 +-
 llvm/test/CodeGen/X86/bitreverse.ll           |   18 +-
 llvm/test/CodeGen/X86/combine-bitreverse.ll   |   18 +-
 llvm/test/CodeGen/X86/combine-shl.ll          |   11 +-
 llvm/test/CodeGen/X86/movmsk-cmp.ll           |   16 -
 llvm/test/CodeGen/X86/vector-bitreverse.ll    |  553 +++-----
 llvm/test/CodeGen/X86/vector-pcmp.ll          |    3 +-
 .../CodeGen/X86/vector-reduce-mul-widen.ll    | 1147 +++++++----------
 llvm/test/CodeGen/X86/vector-reduce-mul.ll    | 1122 +++++++---------
 .../CodeGen/X86/vector-shift-lshr-sub128.ll   |   24 +-
 llvm/test/CodeGen/X86/vector-zext.ll          |    2 -
 17 files changed, 1193 insertions(+), 1817 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7f4b2bad803de..b1a64744f0642 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3119,6 +3119,10 @@ class TargetLowering : public TargetLoweringBase {
                                                  TargetLoweringOpt &TLO,
                                                  unsigned Depth = 0) const;
 
+  /// This method returns the constant pool value that will be loaded by LD.
+  /// NOTE: You must check for implicit extensions of the constant by LD.
+  virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
+
   /// If \p SNaN is false, \returns true if \p Op is known to never be any
   /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
   /// NaN.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b5bb86580fb61..117654bc7a3f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10110,7 +10110,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
 
   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
     SDValue Amt = N0.getOperand(1);
     KnownBits Known = DAG.computeKnownBits(Amt);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 366b404b30446..553a46f6ec160 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2886,8 +2886,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
-    // If this is a ZEXTLoad and we are looking at the loaded value.
-    if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+    const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+    if (ISD::isNON_EXTLoad(LD) && Cst) {
+      // Determine any common known bits from the loaded constant pool value.
+      Type *CstTy = Cst->getType();
+      if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+        // If its a vector splat, then we can (quickly) reuse the scalar path.
+        // NOTE: We assume all elements match and none are UNDEF.
+        if (CstTy->isVectorTy()) {
+          if (const Constant *Splat = Cst->getSplatValue()) {
+            Cst = Splat;
+            CstTy = Cst->getType();
+          }
+        }
+        // TODO - do we need to handle different bitwidths?
+        if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+          // Iterate across all vector elements finding common known bits.
+          Known.One.setAllBits();
+          Known.Zero.setAllBits();
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (!DemandedElts[i])
+              continue;
+            if (Constant *Elt = Cst->getAggregateElement(i)) {
+              if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+                const APInt &Value = CInt->getValue();
+                Known.One &= Value;
+                Known.Zero &= ~Value;
+                continue;
+              }
+              if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+                APInt Value = CFP->getValueAPF().bitcastToAPInt();
+                Known.One &= Value;
+                Known.Zero &= ~Value;
+                continue;
+              }
+            }
+            Known.One.clearAllBits();
+            Known.Zero.clearAllBits();
+            break;
+          }
+        } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+          if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+            const APInt &Value = CInt->getValue();
+            Known.One = Value;
+            Known.Zero = ~Value;
+          } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+            APInt Value = CFP->getValueAPF().bitcastToAPInt();
+            Known.One = Value;
+            Known.Zero = ~Value;
+          }
+        }
+      }
+    } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+      // If this is a ZEXTLoad and we are looking at the loaded value.
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarSizeInBits();
       Known.Zero.setBitsFrom(MemBits);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4d950984b29ce..d636e613363e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -659,6 +659,14 @@ bool TargetLowering::SimplifyDemandedBits(
       Known.Zero &= Known2.Zero;
     }
     return false; // Don't fall through, will infinitely loop.
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    if (getTargetConstantFromLoad(LD)) {
+      Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+      return false; // Don't fall through, will infinitely loop.
+    }
+    break;
+  }
   case ISD::INSERT_VECTOR_ELT: {
     SDValue Vec = Op.getOperand(0);
     SDValue Scl = Op.getOperand(1);
@@ -2314,6 +2322,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
   return false;
 }
 
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+  return nullptr;
+}
+
 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
                                                   const SelectionDAG &DAG,
                                                   bool SNaN,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b5940b1d780eb..246e494de782c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5731,10 +5731,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
 }
 
-static const Constant *getTargetConstantFromNode(SDValue Op) {
-  Op = peekThroughBitcasts(Op);
-
-  auto *Load = dyn_cast<LoadSDNode>(Op);
+static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
   if (!Load)
     return nullptr;
 
@@ -5750,6 +5747,17 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
   return CNode->getConstVal();
 }
 
+static const Constant *getTargetConstantFromNode(SDValue Op) {
+  Op = peekThroughBitcasts(Op);
+  return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
+}
+
+const Constant *
+X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
+  assert(LD && "Unexpected null LoadSDNode");
+  return getTargetConstantFromNode(LD);
+}
+
 // Extract raw constant bits from constant pools.
 static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
                                           APInt &UndefElts,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 70bcbe3c8a5de..be3d29019db7c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -908,6 +908,8 @@ namespace llvm {
                                            TargetLoweringOpt &TLO,
                                            unsigned Depth) const override;
 
+    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
+
     SDValue unwrapAddress(SDValue N) const override;
 
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 2c7d63d5ab959..a823e1b275b39 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -940,9 +940,8 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
 ; AVX512-LABEL: test46:
 ; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
-; AVX512-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; AVX512-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x14,0xc1]
-; AVX512-NEXT:    ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512-NEXT:    vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
+; AVX512-NEXT:    ## xmm0 = xmm0[0,1,1,3]
 ; AVX512-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
 ; AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte
 ; AVX512-NEXT:    retq ## encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 5da95c574eb43..23056f9e802a5 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -61,31 +61,25 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
 ; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; X64-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
-; X64-NEXT:    packuswb %xmm2, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X64-NEXT:    movdqa %xmm1, %xmm2
-; X64-NEXT:    pand %xmm0, %xmm2
-; X64-NEXT:    psllw $4, %xmm2
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; X64-NEXT:    packuswb %xmm2, %xmm0
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:    psllw $4, %xmm1
 ; X64-NEXT:    pand {{.*}}(%rip), %xmm1
-; X64-NEXT:    psrlw $4, %xmm1
-; X64-NEXT:    pand %xmm0, %xmm1
-; X64-NEXT:    pandn %xmm2, %xmm0
+; X64-NEXT:    psrlw $4, %xmm0
+; X64-NEXT:    pand {{.*}}(%rip), %xmm0
 ; X64-NEXT:    por %xmm1, %xmm0
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; X64-NEXT:    pand %xmm0, %xmm1
 ; X64-NEXT:    psllw $2, %xmm1
-; X64-NEXT:    pand {{.*}}(%rip), %xmm1
 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psrlw $2, %xmm0
-; X64-NEXT:    pand {{.*}}(%rip), %xmm0
 ; X64-NEXT:    por %xmm1, %xmm0
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; X64-NEXT:    pand %xmm0, %xmm1
 ; X64-NEXT:    paddb %xmm1, %xmm1
 ; X64-NEXT:    pand {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psrlw $1, %xmm0
-; X64-NEXT:    pand {{.*}}(%rip), %xmm0
 ; X64-NEXT:    por %xmm1, %xmm0
 ; X64-NEXT:    psrlq $48, %xmm0
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll
index 29a0cdadfd41a..8d268ddd75ee3 100644
--- a/llvm/test/CodeGen/X86/combine-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll
@@ -47,31 +47,25 @@ define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind {
 ; X86-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; X86-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; X86-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; X86-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
-; X86-NEXT:    packuswb %xmm2, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X86-NEXT:    movdqa %xmm1, %xmm2
-; X86-NEXT:    pand %xmm0, %xmm2
-; X86-NEXT:    psllw $4, %xmm2
+; X86-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; X86-NEXT:    packuswb %xmm2, %xmm0
+; X86-NEXT:    movdqa %xmm0, %xmm1
+; X86-NEXT:    psllw $4, %xmm1
 ; X86-NEXT:    pand {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    psrlw $4, %xmm1
-; X86-NEXT:    pand %xmm0, %xmm1
-; X86-NEXT:    pandn %xmm2, %xmm0
+; X86-NEXT:    psrlw $4, %xmm0
+; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    por %xmm1, %xmm0
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; X86-NEXT:    pand %xmm0, %xmm1
 ; X86-NEXT:    psllw $2, %xmm1
-; X86-NEXT:    pand {{\.LCPI.*}}, %xmm1
 ; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    psrlw $2, %xmm0
-; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    por %xmm1, %xmm0
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; X86-NEXT:    pand %xmm0, %xmm1
 ; X86-NEXT:    paddb %xmm1, %xmm1
 ; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    psrlw $1, %xmm0
-; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    por %xmm1, %xmm0
 ; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll
index f9b1b93022442..3e54d29cef95d 100644
--- a/llvm/test/CodeGen/X86/combine-shl.ll
+++ b/llvm/test/CodeGen/X86/combine-shl.ll
@@ -268,16 +268,11 @@ define <8 x i32> @combine_vec_shl_ext_shl1(<8 x i16> %x) {
 ; SSE2-LABEL: combine_vec_shl_ext_shl1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pslld $29, %xmm2
-; SSE2-NEXT:    pslld $28, %xmm1
-; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
 ; SSE2-NEXT:    pslld $30, %xmm0
-; SSE2-NEXT:    xorpd %xmm2, %xmm2
-; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; SSE2-NEXT:    xorpd %xmm1, %xmm1
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm1[0,1]
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: combine_vec_shl_ext_shl1:
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 6f62787c950ac..6f7ec7d3a207f 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -1309,7 +1309,6 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
 ; AVX1-NEXT:    vpmovmskb %xmm1, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
@@ -1368,7 +1367,6 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
 ; AVX1-NEXT:    vpmovmskb %xmm1, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
@@ -1432,8 +1430,6 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ; AVX1-LABEL: allones_v64i8_and1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovmskb %xmm2, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -1441,7 +1437,6 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
 ; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -1518,8 +1513,6 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
 ; AVX1-LABEL: allzeros_v64i8_and1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovmskb %xmm2, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -1527,7 +1520,6 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
 ; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
@@ -2728,7 +2720,6 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
 ; AVX1-NEXT:    vpmovmskb %xmm1, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
-; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
@@ -2787,7 +2778,6 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
 ; AVX1-NEXT:    vpmovmskb %xmm1, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
-; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
@@ -2851,8 +2841,6 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ; AVX1-LABEL: allones_v64i8_and4:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovmskb %xmm2, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
@@ -2860,7 +2848,6 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
 ; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
@@ -2937,8 +2924,6 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
 ; AVX1-LABEL: allzeros_v64i8_and4:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovmskb %xmm2, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
@@ -2946,7 +2931,6 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
 ; AVX1-NEXT:    shll $16, %ecx
 ; AVX1-NEXT:    orl %eax, %ecx
 ; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index a564bbc1743cc..bfbfda08cd0eb 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -237,29 +237,22 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    psllw $4, %xmm2
+; SSE2-NEXT:    psllw $4, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    psllw $2, %xmm1
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    paddb %xmm1, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $1, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -309,31 +302,25 @@ define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6]
-; SSE2-NEXT:    packuswb %xmm2, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    psllw $4, %xmm2
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psllw $4, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    psllw $2, %xmm1
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    paddb %xmm1, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $1, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -385,31 +372,25 @@ define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    packuswb %xmm2, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    psllw $4, %xmm2
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psllw $4, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    psllw $2, %xmm1
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    paddb %xmm1, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $1, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -463,31 +444,25 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    packuswb %xmm2, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    psllw $4, %xmm2
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psllw $4, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    psrlw $4, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    pandn %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    psllw $2, %xmm1
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    paddb %xmm1, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    psrlw $1, %xmm0
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -533,14 +508,11 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v32i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $4, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pandn %xmm3, %xmm4
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    por %xmm4, %xmm0
@@ -548,44 +520,33 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm4
 ; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm5, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm4, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    pand %xmm4, %xmm5
-; SSE2-NEXT:    psrlw $1, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm6, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pand %xmm4, %xmm6
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
 ; SSE2-NEXT:    pand %xmm7, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
-; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    pand %xmm1, %xmm5
-; SSE2-NEXT:    psllw $4, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    psllw $4, %xmm6
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm5, %xmm1
+; SSE2-NEXT:    pandn %xmm6, %xmm1
 ; SSE2-NEXT:    por %xmm2, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $2, %xmm3
-; SSE2-NEXT:    pand %xmm9, %xmm3
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm5, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm6, %xmm4
+; SSE2-NEXT:    paddb %xmm4, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    retq
 ;
@@ -695,14 +656,11 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $4, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm5
 ; SSE2-NEXT:    pandn %xmm3, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
@@ -710,23 +668,17 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm5, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
 ; SSE2-NEXT:    pand %xmm7, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm2, %xmm6
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15]
@@ -737,25 +689,20 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm1, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
 ; SSE2-NEXT:    pandn %xmm4, %xmm1
 ; SSE2-NEXT:    por %xmm2, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $2, %xmm3
-; SSE2-NEXT:    pand %xmm9, %xmm3
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm5
-; SSE2-NEXT:    psrlw $1, %xmm5
-; SSE2-NEXT:    pand %xmm12, %xmm5
+; SSE2-NEXT:    paddb %xmm5, %xmm5
 ; SSE2-NEXT:    pand %xmm7, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm5, %xmm1
 ; SSE2-NEXT:    retq
 ;
@@ -873,14 +820,11 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $4, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm5
 ; SSE2-NEXT:    pandn %xmm3, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
@@ -888,23 +832,17 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm5, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
 ; SSE2-NEXT:    pand %xmm7, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm2, %xmm6
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15]
@@ -915,25 +853,20 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm1, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
 ; SSE2-NEXT:    pandn %xmm4, %xmm1
 ; SSE2-NEXT:    por %xmm2, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $2, %xmm3
-; SSE2-NEXT:    pand %xmm9, %xmm3
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm5
-; SSE2-NEXT:    psrlw $1, %xmm5
-; SSE2-NEXT:    pand %xmm12, %xmm5
+; SSE2-NEXT:    paddb %xmm5, %xmm5
 ; SSE2-NEXT:    pand %xmm7, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm5, %xmm1
 ; SSE2-NEXT:    retq
 ;
@@ -1053,14 +986,11 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $4, %xmm3
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm5
 ; SSE2-NEXT:    pandn %xmm3, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
@@ -1068,23 +998,17 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm5, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
 ; SSE2-NEXT:    pand %xmm7, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm2, %xmm6
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm4[8],xmm6[9],xmm4[9],xmm6[10],xmm4[10],xmm6[11],xmm4[11],xmm6[12],xmm4[12],xmm6[13],xmm4[13],xmm6[14],xmm4[14],xmm6[15],xmm4[15]
@@ -1097,25 +1021,20 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm1, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
 ; SSE2-NEXT:    pandn %xmm4, %xmm1
 ; SSE2-NEXT:    por %xmm2, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    psllw $2, %xmm3
-; SSE2-NEXT:    pand %xmm9, %xmm3
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pand %xmm1, %xmm5
-; SSE2-NEXT:    psrlw $1, %xmm5
-; SSE2-NEXT:    pand %xmm12, %xmm5
+; SSE2-NEXT:    paddb %xmm5, %xmm5
 ; SSE2-NEXT:    pand %xmm7, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm5, %xmm1
 ; SSE2-NEXT:    retq
 ;
@@ -1223,15 +1142,12 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
 define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v64i8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm3, %xmm14
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE2-NEXT:    movdqa %xmm3, %xmm10
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $4, %xmm5
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm5, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm9, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
@@ -1239,92 +1155,71 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm5, %xmm6
 ; SSE2-NEXT:    psllw $2, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm8, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm7
 ; SSE2-NEXT:    pand %xmm6, %xmm7
-; SSE2-NEXT:    psrlw $1, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm13 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; SSE2-NEXT:    pand %xmm13, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    paddb %xmm7, %xmm7
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    pand %xmm9, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm7
-; SSE2-NEXT:    pand %xmm3, %xmm7
 ; SSE2-NEXT:    psllw $4, %xmm7
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    pandn %xmm7, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    pand %xmm3, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm6, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
 ; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    pandn %xmm4, %xmm7
-; SSE2-NEXT:    pand %xmm9, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm3, %xmm2
 ; SSE2-NEXT:    por %xmm7, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $2, %xmm2
-; SSE2-NEXT:    pand %xmm11, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm6, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm2
-; SSE2-NEXT:    paddb %xmm2, %xmm2
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
-; SSE2-NEXT:    movdqa %xmm14, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm10, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm14
-; SSE2-NEXT:    psrlw $4, %xmm14
-; SSE2-NEXT:    pand %xmm3, %xmm14
+; SSE2-NEXT:    psrlw $4, %xmm10
+; SSE2-NEXT:    pand %xmm3, %xmm10
 ; SSE2-NEXT:    pandn %xmm4, %xmm3
-; SSE2-NEXT:    por %xmm14, %xmm3
+; SSE2-NEXT:    por %xmm10, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
-; SSE2-NEXT:    pand %xmm10, %xmm3
+; SSE2-NEXT:    pand %xmm8, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm3
-; SSE2-NEXT:    pand %xmm11, %xmm3
 ; SSE2-NEXT:    por %xmm5, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    pand %xmm13, %xmm3
-; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    pand %xmm9, %xmm3
+; SSE2-NEXT:    psrlw $1, %xmm3
 ; SSE2-NEXT:    por %xmm6, %xmm3
 ; SSE2-NEXT:    retq
 ;
@@ -1490,24 +1385,21 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
 define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v32i16:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm3, %xmm15
-; SSE2-NEXT:    pxor %xmm14, %xmm14
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm10
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm14[8],xmm3[9],xmm14[9],xmm3[10],xmm14[10],xmm3[11],xmm14[11],xmm3[12],xmm14[12],xmm3[13],xmm14[13],xmm3[14],xmm14[14],xmm3[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm10[8],xmm3[9],xmm10[9],xmm3[10],xmm10[10],xmm3[11],xmm10[11],xmm3[12],xmm10[12],xmm3[13],xmm10[13],xmm3[14],xmm10[14],xmm3[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $4, %xmm5
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    pandn %xmm5, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
@@ -1515,116 +1407,95 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm7
 ; SSE2-NEXT:    pand %xmm5, %xmm7
 ; SSE2-NEXT:    psllw $2, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm7, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm13 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; SSE2-NEXT:    pand %xmm13, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    pand %xmm9, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm14[8],xmm6[9],xmm14[9],xmm6[10],xmm14[10],xmm6[11],xmm14[11],xmm6[12],xmm14[12],xmm6[13],xmm14[13],xmm6[14],xmm14[14],xmm6[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm10[8],xmm6[9],xmm10[9],xmm6[10],xmm10[10],xmm6[11],xmm10[11],xmm6[12],xmm10[12],xmm6[13],xmm10[13],xmm6[14],xmm10[14],xmm6[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm6[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,4,7,6]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1],xmm1[2],xmm10[2],xmm1[3],xmm10[3],xmm1[4],xmm10[4],xmm1[5],xmm10[5],xmm1[6],xmm10[6],xmm1[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    psllw $4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    pandn %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    pand %xmm3, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,4,7,6]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3],xmm2[4],xmm14[4],xmm2[5],xmm14[5],xmm2[6],xmm14[6],xmm2[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
 ; SSE2-NEXT:    movdqa %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm6
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm3, %xmm2
 ; SSE2-NEXT:    por %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $2, %xmm2
-; SSE2-NEXT:    pand %xmm11, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm2
-; SSE2-NEXT:    paddb %xmm2, %xmm2
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
-; SSE2-NEXT:    movdqa %xmm15, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    movdqa %xmm11, %xmm4
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,4,7,6]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm15[1,0,3,2,4,5,6,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm11[1,0,3,2,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,4,7,6]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm6
 ; SSE2-NEXT:    psrlw $4, %xmm6
 ; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm3
 ; SSE2-NEXT:    por %xmm6, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    pand %xmm10, %xmm3
+; SSE2-NEXT:    pand %xmm8, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm3
-; SSE2-NEXT:    pand %xmm11, %xmm3
 ; SSE2-NEXT:    por %xmm5, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm7
-; SSE2-NEXT:    psrlw $1, %xmm7
-; SSE2-NEXT:    pand %xmm12, %xmm7
-; SSE2-NEXT:    pand %xmm13, %xmm3
-; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    paddb %xmm7, %xmm7
+; SSE2-NEXT:    pand %xmm9, %xmm3
+; SSE2-NEXT:    psrlw $1, %xmm3
 ; SSE2-NEXT:    por %xmm7, %xmm3
 ; SSE2-NEXT:    retq
 ;
@@ -1808,24 +1679,21 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
 define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v16i32:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm3, %xmm15
-; SSE2-NEXT:    pxor %xmm14, %xmm14
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm10
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm14[8],xmm3[9],xmm14[9],xmm3[10],xmm14[10],xmm3[11],xmm14[11],xmm3[12],xmm14[12],xmm3[13],xmm14[13],xmm3[14],xmm14[14],xmm3[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm10[8],xmm3[9],xmm10[9],xmm3[10],xmm10[10],xmm3[11],xmm10[11],xmm3[12],xmm10[12],xmm3[13],xmm10[13],xmm3[14],xmm10[14],xmm3[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $4, %xmm5
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    pandn %xmm5, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
@@ -1833,116 +1701,95 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm7
 ; SSE2-NEXT:    pand %xmm5, %xmm7
 ; SSE2-NEXT:    psllw $2, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm7, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm13 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; SSE2-NEXT:    pand %xmm13, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    pand %xmm9, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm14[8],xmm6[9],xmm14[9],xmm6[10],xmm14[10],xmm6[11],xmm14[11],xmm6[12],xmm14[12],xmm6[13],xmm14[13],xmm6[14],xmm14[14],xmm6[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm10[8],xmm6[9],xmm10[9],xmm6[10],xmm10[10],xmm6[11],xmm10[11],xmm6[12],xmm10[12],xmm6[13],xmm10[13],xmm6[14],xmm10[14],xmm6[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1],xmm1[2],xmm10[2],xmm1[3],xmm10[3],xmm1[4],xmm10[4],xmm1[5],xmm10[5],xmm1[6],xmm10[6],xmm1[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    psllw $4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    pandn %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    pand %xmm3, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3],xmm2[4],xmm14[4],xmm2[5],xmm14[5],xmm2[6],xmm14[6],xmm2[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
 ; SSE2-NEXT:    movdqa %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm6
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm3, %xmm2
 ; SSE2-NEXT:    por %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $2, %xmm2
-; SSE2-NEXT:    pand %xmm11, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm2
-; SSE2-NEXT:    paddb %xmm2, %xmm2
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
-; SSE2-NEXT:    movdqa %xmm15, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    movdqa %xmm11, %xmm4
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm15[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm11[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm6
 ; SSE2-NEXT:    psrlw $4, %xmm6
 ; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm3
 ; SSE2-NEXT:    por %xmm6, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    pand %xmm10, %xmm3
+; SSE2-NEXT:    pand %xmm8, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm3
-; SSE2-NEXT:    pand %xmm11, %xmm3
 ; SSE2-NEXT:    por %xmm5, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm7
-; SSE2-NEXT:    psrlw $1, %xmm7
-; SSE2-NEXT:    pand %xmm12, %xmm7
-; SSE2-NEXT:    pand %xmm13, %xmm3
-; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    paddb %xmm7, %xmm7
+; SSE2-NEXT:    pand %xmm9, %xmm3
+; SSE2-NEXT:    psrlw $1, %xmm3
 ; SSE2-NEXT:    por %xmm7, %xmm3
 ; SSE2-NEXT:    retq
 ;
@@ -2132,26 +1979,23 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
 define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
 ; SSE2-LABEL: test_bitreverse_v8i64:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm3, %xmm15
-; SSE2-NEXT:    pxor %xmm14, %xmm14
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm10
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm14[8],xmm3[9],xmm14[9],xmm3[10],xmm14[10],xmm3[11],xmm14[11],xmm3[12],xmm14[12],xmm3[13],xmm14[13],xmm3[14],xmm14[14],xmm3[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm10[8],xmm3[9],xmm10[9],xmm3[10],xmm10[10],xmm3[11],xmm10[11],xmm3[12],xmm10[12],xmm3[13],xmm10[13],xmm3[14],xmm10[14],xmm3[15],xmm10[15]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $4, %xmm5
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; SSE2-NEXT:    movdqa %xmm3, %xmm7
 ; SSE2-NEXT:    pandn %xmm5, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $4, %xmm0
 ; SSE2-NEXT:    pand %xmm3, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
@@ -2159,122 +2003,101 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm7
 ; SSE2-NEXT:    pand %xmm5, %xmm7
 ; SSE2-NEXT:    psllw $2, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; SSE2-NEXT:    pand %xmm9, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
-; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [204,204,204,204,204,204,204,204,204,204,204,204,204,204,204,204]
+; SSE2-NEXT:    pand %xmm8, %xmm0
 ; SSE2-NEXT:    psrlw $2, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; SSE2-NEXT:    pand %xmm11, %xmm0
 ; SSE2-NEXT:    por %xmm7, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm6
 ; SSE2-NEXT:    pand %xmm7, %xmm6
-; SSE2-NEXT:    psrlw $1, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; SSE2-NEXT:    pand %xmm12, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm13 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; SSE2-NEXT:    pand %xmm13, %xmm0
-; SSE2-NEXT:    paddb %xmm0, %xmm0
+; SSE2-NEXT:    paddb %xmm6, %xmm6
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [170,170,170,170,170,170,170,170,170,170,170,170,170,170,170,170]
+; SSE2-NEXT:    pand %xmm9, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
 ; SSE2-NEXT:    por %xmm6, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm14[8],xmm6[9],xmm14[9],xmm6[10],xmm14[10],xmm6[11],xmm14[11],xmm6[12],xmm14[12],xmm6[13],xmm14[13],xmm6[14],xmm14[14],xmm6[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm10[8],xmm6[9],xmm10[9],xmm6[10],xmm10[10],xmm6[11],xmm10[11],xmm6[12],xmm10[12],xmm6[13],xmm10[13],xmm6[14],xmm10[14],xmm6[15],xmm10[15]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1],xmm1[2],xmm10[2],xmm1[3],xmm10[3],xmm1[4],xmm10[4],xmm1[5],xmm10[5],xmm1[6],xmm10[6],xmm1[7],xmm10[7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm6, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    psllw $4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    pandn %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $4, %xmm1
 ; SSE2-NEXT:    pand %xmm3, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm8, %xmm1
 ; SSE2-NEXT:    psrlw $2, %xmm1
-; SSE2-NEXT:    pand %xmm11, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm1
-; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm1
 ; SSE2-NEXT:    por %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3],xmm2[4],xmm14[4],xmm2[5],xmm14[5],xmm2[6],xmm14[6],xmm2[7],xmm14[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
 ; SSE2-NEXT:    movdqa %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm6
-; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $4, %xmm2
 ; SSE2-NEXT:    pand %xmm3, %xmm2
 ; SSE2-NEXT:    por %xmm6, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm5, %xmm4
 ; SSE2-NEXT:    psllw $2, %xmm4
-; SSE2-NEXT:    pand %xmm9, %xmm4
-; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm2
 ; SSE2-NEXT:    psrlw $2, %xmm2
-; SSE2-NEXT:    pand %xmm11, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    pand %xmm7, %xmm4
-; SSE2-NEXT:    psrlw $1, %xmm4
-; SSE2-NEXT:    pand %xmm12, %xmm4
-; SSE2-NEXT:    pand %xmm13, %xmm2
-; SSE2-NEXT:    paddb %xmm2, %xmm2
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    psrlw $1, %xmm2
 ; SSE2-NEXT:    por %xmm4, %xmm2
-; SSE2-NEXT:    movdqa %xmm15, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm14[8],xmm4[9],xmm14[9],xmm4[10],xmm14[10],xmm4[11],xmm14[11],xmm4[12],xmm14[12],xmm4[13],xmm14[13],xmm4[14],xmm14[14],xmm4[15],xmm14[15]
+; SSE2-NEXT:    movdqa %xmm11, %xmm4
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm10[8],xmm4[9],xmm10[9],xmm4[10],xmm10[10],xmm4[11],xmm10[11],xmm4[12],xmm10[12],xmm4[13],xmm10[13],xmm4[14],xmm10[14],xmm4[15],xmm10[15]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,7,6,5,4]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm15[2,3,0,1]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm11[2,3,0,1]
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm6 = xmm6[3,2,1,0,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,7,6,5,4]
 ; SSE2-NEXT:    packuswb %xmm4, %xmm6
 ; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    pand %xmm3, %xmm4
 ; SSE2-NEXT:    psllw $4, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm6
 ; SSE2-NEXT:    psrlw $4, %xmm6
 ; SSE2-NEXT:    pand %xmm3, %xmm6
 ; SSE2-NEXT:    pandn %xmm4, %xmm3
 ; SSE2-NEXT:    por %xmm6, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm5
 ; SSE2-NEXT:    psllw $2, %xmm5
-; SSE2-NEXT:    pand %xmm9, %xmm5
-; SSE2-NEXT:    pand %xmm10, %xmm3
+; SSE2-NEXT:    pand %xmm8, %xmm3
 ; SSE2-NEXT:    psrlw $2, %xmm3
-; SSE2-NEXT:    pand %xmm11, %xmm3
 ; SSE2-NEXT:    por %xmm5, %xmm3
 ; SSE2-NEXT:    pand %xmm3, %xmm7
-; SSE2-NEXT:    psrlw $1, %xmm7
-; SSE2-NEXT:    pand %xmm12, %xmm7
-; SSE2-NEXT:    pand %xmm13, %xmm3
-; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    paddb %xmm7, %xmm7
+; SSE2-NEXT:    pand %xmm9, %xmm3
+; SSE2-NEXT:    psrlw $1, %xmm3
 ; SSE2-NEXT:    por %xmm7, %xmm3
 ; SSE2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll
index 6e010f75ad26a..89eaad82fd227 100644
--- a/llvm/test/CodeGen/X86/vector-pcmp.ll
+++ b/llvm/test/CodeGen/X86/vector-pcmp.ll
@@ -414,10 +414,9 @@ define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
index b2668ef75c51b..76ace68eb8c03 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
@@ -1658,20 +1658,14 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm0, %xmm1
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm2, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,2,3,0]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    packuswb %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
+; SSE2-NEXT:    pmullw %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
@@ -1744,29 +1738,27 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    packuswb %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    packuswb %xmm3, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
@@ -1831,7 +1823,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpackuswb %xmm0, %xmm0, %xmm2
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
@@ -1965,40 +1957,32 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm2, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pmullw %xmm3, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
@@ -2009,32 +1993,29 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; SSE41-NEXT:    pmullw %xmm1, %xmm0
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pmullw %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    pxor %xmm0, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm3, %xmm0
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2044,37 +2025,31 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2086,16 +2061,15 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2182,16 +2156,15 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2222,33 +2195,25 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2262,102 +2227,74 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm4, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    packuswb %xmm5, %xmm0
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm5
-; SSE2-NEXT:    pand %xmm4, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    packuswb %xmm5, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm6
+; SSE2-NEXT:    pmullw %xmm5, %xmm6
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm1
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm6, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm3, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
 ; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pmullw %xmm5, %xmm4
-; SSE41-NEXT:    pand %xmm2, %xmm4
-; SSE41-NEXT:    packuswb %xmm0, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm5
-; SSE41-NEXT:    pand %xmm2, %xmm5
-; SSE41-NEXT:    packuswb %xmm1, %xmm5
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm6
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm1, %xmm6
+; SSE41-NEXT:    pmullw %xmm4, %xmm5
+; SSE41-NEXT:    pshufb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm5, %xmm4
-; SSE41-NEXT:    pand %xmm2, %xmm4
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm4, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm0, %xmm1
 ; SSE41-NEXT:    pand %xmm2, %xmm1
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
@@ -2383,59 +2320,48 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ;
 ; AVX1-LABEL: test_v64i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm4
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm3
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT:    vpmullw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2457,16 +2383,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2508,16 +2433,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BW-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512BW-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2559,33 +2483,25 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BWVL-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512BWVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2609,16 +2525,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2659,33 +2574,25 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2699,298 +2606,211 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm5, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm8, %xmm9
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm5, %xmm1
-; SSE2-NEXT:    pand %xmm8, %xmm1
-; SSE2-NEXT:    packuswb %xmm9, %xmm1
-; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm8, %xmm10
+; SSE2-NEXT:    pmullw %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm3, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm9, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm7, %xmm3
-; SSE2-NEXT:    pand %xmm8, %xmm3
-; SSE2-NEXT:    packuswb %xmm5, %xmm3
-; SSE2-NEXT:    movdqa %xmm4, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm7
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm5, %xmm7
-; SSE2-NEXT:    pand %xmm8, %xmm7
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm7, %xmm0
-; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pmullw %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm1, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm11, %xmm8
+; SSE2-NEXT:    pmullw %xmm9, %xmm8
+; SSE2-NEXT:    pmullw %xmm10, %xmm8
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm6, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm5, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm0
 ; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm5, %xmm0
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm7, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm5, %xmm1
 ; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pand %xmm8, %xmm1
-; SSE2-NEXT:    packuswb %xmm4, %xmm1
+; SSE2-NEXT:    pmullw %xmm0, %xmm1
+; SSE2-NEXT:    pmullw %xmm8, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm8, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packuswb %xmm3, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm8 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm5, %xmm1
-; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm5, %xmm1
-; SSE41-NEXT:    pmullw %xmm9, %xmm8
-; SSE41-NEXT:    pand %xmm5, %xmm8
-; SSE41-NEXT:    packuswb %xmm1, %xmm8
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm7, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    pmullw %xmm9, %xmm1
-; SSE41-NEXT:    pand %xmm5, %xmm1
-; SSE41-NEXT:    packuswb %xmm3, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    pmullw %xmm7, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm8 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm6, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    pmullw %xmm0, %xmm4
-; SSE41-NEXT:    pand %xmm5, %xmm4
-; SSE41-NEXT:    packuswb %xmm2, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    pmullw %xmm4, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    packuswb %xmm3, %xmm0
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm11 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm7, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero,xmm8[4],zero,xmm8[5],zero,xmm8[6],zero,xmm8[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm1, %xmm8
-; SSE41-NEXT:    pand %xmm5, %xmm8
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    packuswb %xmm8, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm5, %xmm1
+; SSE41-NEXT:    pmullw %xmm3, %xmm1
+; SSE41-NEXT:    pmullw %xmm0, %xmm1
+; SSE41-NEXT:    pmullw %xmm7, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmullw %xmm11, %xmm4
+; SSE41-NEXT:    pshufb %xmm0, %xmm4
+; SSE41-NEXT:    pmullw %xmm10, %xmm6
+; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmullw %xmm8, %xmm9
+; SSE41-NEXT:    pshufb %xmm0, %xmm9
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm5, %xmm6
+; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmullw %xmm4, %xmm2
+; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm2, %xmm4
+; SSE41-NEXT:    pshufb %xmm0, %xmm4
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm3, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm0, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pextrb $0, %xmm0, %eax
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pmullw %xmm0, %xmm1
+; SSE41-NEXT:    pextrb $0, %xmm1, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v128i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmullw %xmm5, %xmm6, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpackuswb %xmm7, %xmm5, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm9 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm9, %xmm7, %xmm7
-; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; AVX1-NEXT:    vpmullw %xmm6, %xmm5, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpackuswb %xmm7, %xmm5, %xmm6
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
+; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm9
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm8 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm11
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm7, %xmm5, %xmm10
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm10, %xmm5, %xmm10
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm10, %xmm6, %xmm6
+; AVX1-NEXT:    vpmullw %xmm6, %xmm9, %xmm9
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; AVX1-NEXT:    vpmullw %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero,xmm11[4],zero,xmm11[5],zero,xmm11[6],zero,xmm11[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT:    vpmullw %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpshufb %xmm7, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm5, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; AVX1-NEXT:    vpshufb %xmm7, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmullw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero,xmm8[4],zero,xmm8[5],zero,xmm8[6],zero,xmm8[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm7, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; AVX1-NEXT:    vpmullw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpshufb %xmm7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpshufb %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm9, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm8, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2998,48 +2818,37 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX2-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX2-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX2-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX2-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3070,18 +2879,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
 ; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm4
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpackuswb %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
 ; AVX512BW-NEXT:    vpmullw %zmm4, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm0
@@ -3090,16 +2897,15 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BW-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512BW-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3130,18 +2936,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
 ; AVX512BWVL-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
 ; AVX512BWVL-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm2
+; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm4
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BWVL-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm1
+; AVX512BWVL-NEXT:    vpackuswb %zmm4, %zmm1, %zmm1
+; AVX512BWVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
 ; AVX512BWVL-NEXT:    vpmullw %zmm4, %zmm2, %zmm2
 ; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BWVL-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
 ; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm0
@@ -3150,33 +2954,25 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BWVL-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512BWVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -3186,48 +2982,37 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX512DQ-LABEL: test_v128i8:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512DQ-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512DQ-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX512DQ-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3254,65 +3039,47 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX512DQVL-LABEL: test_v128i8:
 ; AVX512DQVL:       # %bb.0:
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512DQVL-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512DQVL-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX512DQVL-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX512DQVL-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX512DQVL-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 9bd16753332d7..662415bf1d2c5 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -1708,29 +1708,27 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    packuswb %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    packuswb %xmm3, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm1, %xmm3
-; SSE2-NEXT:    packuswb %xmm2, %xmm3
-; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
@@ -1791,8 +1789,7 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ;
 ; AVX2-LABEL: test_v16i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
@@ -1924,40 +1921,32 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm2, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pmullw %xmm3, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movdqa %xmm3, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packuswb %xmm1, %xmm3
-; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
@@ -1968,32 +1957,29 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; SSE41-NEXT:    pmullw %xmm1, %xmm0
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm0, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm1, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pmullw %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    pxor %xmm0, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm3, %xmm0
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2004,36 +1990,28 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX1-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2045,16 +2023,15 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2141,16 +2118,15 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2181,33 +2157,25 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2221,102 +2189,74 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm4, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    packuswb %xmm5, %xmm0
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm5
-; SSE2-NEXT:    pand %xmm4, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pand %xmm4, %xmm1
-; SSE2-NEXT:    packuswb %xmm5, %xmm1
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm6
+; SSE2-NEXT:    pmullw %xmm5, %xmm6
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm4, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm1
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm4, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm6, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm3, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
 ; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm4, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm2, %xmm0
-; SSE41-NEXT:    pmullw %xmm5, %xmm4
-; SSE41-NEXT:    pand %xmm2, %xmm4
-; SSE41-NEXT:    packuswb %xmm0, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm5
-; SSE41-NEXT:    pand %xmm2, %xmm5
-; SSE41-NEXT:    packuswb %xmm1, %xmm5
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm6
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm1, %xmm6
+; SSE41-NEXT:    pmullw %xmm4, %xmm5
+; SSE41-NEXT:    pshufb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm1, %xmm4
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm5, %xmm4
-; SSE41-NEXT:    pand %xmm2, %xmm4
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm4, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm0, %xmm1
 ; SSE41-NEXT:    pand %xmm2, %xmm1
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
@@ -2342,59 +2282,42 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ;
 ; AVX1-LABEL: test_v64i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm4
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm3
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm4
+; AVX1-NEXT:    vpmullw %xmm3, %xmm5, %xmm3
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT:    vpmullw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX1-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpmullw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2416,16 +2339,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2467,16 +2389,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BW-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512BW-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2518,33 +2439,25 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BWVL-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512BWVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2568,16 +2481,15 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -2618,33 +2530,25 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -2658,298 +2562,201 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm5, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm8, %xmm9
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm5, %xmm1
-; SSE2-NEXT:    pand %xmm8, %xmm1
-; SSE2-NEXT:    packuswb %xmm9, %xmm1
-; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm8, %xmm10
+; SSE2-NEXT:    pmullw %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm3, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm9, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm7, %xmm3
-; SSE2-NEXT:    pand %xmm8, %xmm3
-; SSE2-NEXT:    packuswb %xmm5, %xmm3
-; SSE2-NEXT:    movdqa %xmm4, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm7
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm5, %xmm7
-; SSE2-NEXT:    pand %xmm8, %xmm7
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm7, %xmm0
-; SSE2-NEXT:    movdqa %xmm6, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pmullw %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; SSE2-NEXT:    movdqa %xmm1, %xmm8
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm11, %xmm8
+; SSE2-NEXT:    pmullw %xmm9, %xmm8
+; SSE2-NEXT:    pmullw %xmm10, %xmm8
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm6, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm5, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm5
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm4, %xmm5
-; SSE2-NEXT:    pand %xmm8, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pmullw %xmm4, %xmm0
 ; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm5, %xmm0
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm4
-; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm7, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm5, %xmm1
 ; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pand %xmm8, %xmm1
-; SSE2-NEXT:    packuswb %xmm4, %xmm1
+; SSE2-NEXT:    pmullw %xmm0, %xmm1
+; SSE2-NEXT:    pmullw %xmm8, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm8, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    packuswb %xmm3, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pand %xmm8, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,2,3,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,2,3,3]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm8, %xmm2
-; SSE2-NEXT:    packuswb %xmm1, %xmm2
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packuswb %xmm3, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    psrlw $8, %xmm2
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm8 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm5, %xmm1
-; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm5, %xmm1
-; SSE41-NEXT:    pmullw %xmm9, %xmm8
-; SSE41-NEXT:    pand %xmm5, %xmm8
-; SSE41-NEXT:    packuswb %xmm1, %xmm8
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm7, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    pmullw %xmm9, %xmm1
-; SSE41-NEXT:    pand %xmm5, %xmm1
-; SSE41-NEXT:    packuswb %xmm3, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    pmullw %xmm7, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    packuswb %xmm0, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm8 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm9 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm6, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    pmullw %xmm0, %xmm4
-; SSE41-NEXT:    pand %xmm5, %xmm4
-; SSE41-NEXT:    packuswb %xmm2, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    pmullw %xmm4, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    packuswb %xmm3, %xmm0
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm11 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm7, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero,xmm8[4],zero,xmm8[5],zero,xmm8[6],zero,xmm8[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm1, %xmm8
-; SSE41-NEXT:    pand %xmm5, %xmm8
-; SSE41-NEXT:    pmullw %xmm2, %xmm3
-; SSE41-NEXT:    pand %xmm5, %xmm3
-; SSE41-NEXT:    packuswb %xmm8, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm5, %xmm1
+; SSE41-NEXT:    pmullw %xmm3, %xmm1
+; SSE41-NEXT:    pmullw %xmm0, %xmm1
+; SSE41-NEXT:    pmullw %xmm7, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmullw %xmm11, %xmm4
+; SSE41-NEXT:    pshufb %xmm0, %xmm4
+; SSE41-NEXT:    pmullw %xmm10, %xmm6
+; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmullw %xmm8, %xmm9
+; SSE41-NEXT:    pshufb %xmm0, %xmm9
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm5, %xmm6
+; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmullw %xmm4, %xmm2
+; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm2, %xmm4
+; SSE41-NEXT:    pshufb %xmm0, %xmm4
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm3, %xmm0
-; SSE41-NEXT:    pand %xmm5, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; SSE41-NEXT:    pmullw %xmm0, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm1, %xmm2
-; SSE41-NEXT:    pand %xmm5, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pextrb $0, %xmm0, %eax
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    packuswb %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pmullw %xmm0, %xmm1
+; SSE41-NEXT:    pextrb $0, %xmm1, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v128i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmullw %xmm5, %xmm6, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpackuswb %xmm7, %xmm5, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm9 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm9, %xmm7, %xmm7
-; AVX1-NEXT:    vpand %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; AVX1-NEXT:    vpmullw %xmm6, %xmm5, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpackuswb %xmm7, %xmm5, %xmm6
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm5
+; AVX1-NEXT:    vpmullw %xmm4, %xmm5, %xmm8
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm9 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm11
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm7 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm7, %xmm5, %xmm10
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm10, %xmm5, %xmm10
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm10, %xmm6, %xmm6
+; AVX1-NEXT:    vpmullw %xmm6, %xmm8, %xmm6
+; AVX1-NEXT:    vpmullw %xmm6, %xmm9, %xmm6
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; AVX1-NEXT:    vpmullw %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm7 = xmm11[0],zero,xmm11[1],zero,xmm11[2],zero,xmm11[3],zero,xmm11[4],zero,xmm11[5],zero,xmm11[6],zero,xmm11[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; AVX1-NEXT:    vpmullw %xmm7, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm5, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX1-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm4
+; AVX1-NEXT:    vpmullw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm3
 ; AVX1-NEXT:    vpmullw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; AVX1-NEXT:    vpmullw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm8[0],zero,xmm8[1],zero,xmm8[2],zero,xmm8[3],zero,xmm8[4],zero,xmm8[5],zero,xmm8[6],zero,xmm8[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmullw %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX1-NEXT:    vpmullw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpmullw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
@@ -2957,48 +2764,37 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX2-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX2-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX2-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX2-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX2-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX2-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3029,18 +2825,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
 ; AVX512BW-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm4
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm0
-; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpackuswb %zmm4, %zmm1, %zmm1
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
 ; AVX512BW-NEXT:    vpmullw %zmm4, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpandq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BW-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq %zmm3, %zmm0, %zmm0
@@ -3049,16 +2843,15 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BW-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512BW-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512BW-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512BW-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3089,18 +2882,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm3 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
 ; AVX512BWVL-NEXT:    vpmullw %zmm2, %zmm3, %zmm2
 ; AVX512BWVL-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm2
+; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm4
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BWVL-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
-; AVX512BWVL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63]
+; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm1
+; AVX512BWVL-NEXT:    vpackuswb %zmm4, %zmm1, %zmm1
+; AVX512BWVL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} zmm4 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63]
 ; AVX512BWVL-NEXT:    vpmullw %zmm4, %zmm2, %zmm2
 ; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm2, %zmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55]
 ; AVX512BWVL-NEXT:    vpmullw %zmm1, %zmm0, %zmm0
 ; AVX512BWVL-NEXT:    vpandq %zmm3, %zmm0, %zmm0
@@ -3109,33 +2900,25 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512BWVL-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512BWVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512BWVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512BWVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512BWVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512BWVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512BWVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    vpextrb $0, %xmm0, %eax
@@ -3145,48 +2928,37 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX512DQ-LABEL: test_v128i8:
 ; AVX512DQ:       # %bb.0:
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512DQ-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512DQ-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX512DQ-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT:    vpand %xmm3, %xmm2, %xmm4
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512DQ-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpand %xmm3, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
@@ -3213,65 +2985,47 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ;
 ; AVX512DQVL-LABEL: test_v128i8:
 ; AVX512DQVL:       # %bb.0:
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm4, %ymm2
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23]
+; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm5 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQVL-NEXT:    vpmullw %ymm4, %ymm5, %ymm4
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31]
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512DQVL-NEXT:    vpmullw %ymm3, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpackuswb %ymm2, %ymm1, %ymm1
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512DQVL-NEXT:    vpmullw %ymm2, %ymm3, %ymm2
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31]
+; AVX512DQVL-NEXT:    vpmullw %ymm1, %ymm2, %ymm1
+; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
 ; AVX512DQVL-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512DQVL-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512DQVL-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; AVX512DQVL-NEXT:    vpackuswb %ymm0, %ymm1, %ymm0
 ; AVX512DQVL-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX512DQVL-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm2, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX512DQVL-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512DQVL-NEXT:    vpand %xmm1, %xmm0, %xmm1
+; AVX512DQVL-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX512DQVL-NEXT:    vpsrlw $8, %xmm1, %xmm1
 ; AVX512DQVL-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; AVX512DQVL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
 ; AVX512DQVL-NEXT:    vpextrb $0, %xmm0, %eax
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index fc3bb0350afa7..546f723e68be6 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -339,9 +339,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; SSE2-LABEL: var_shift_v8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    psllw $12, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    psraw $15, %xmm0
@@ -505,9 +503,7 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; X32-SSE-LABEL: var_shift_v8i8:
 ; X32-SSE:       # %bb.0:
 ; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; X32-SSE-NEXT:    pand %xmm0, %xmm2
-; X32-SSE-NEXT:    pand %xmm0, %xmm1
+; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm2
 ; X32-SSE-NEXT:    psllw $12, %xmm1
 ; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
 ; X32-SSE-NEXT:    psraw $15, %xmm0
@@ -1122,11 +1118,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT:    pand %xmm0, %xmm2
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
 ; SSE2-NEXT:    psllw $12, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    psraw $15, %xmm0
@@ -1287,11 +1281,9 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; X32-SSE-LABEL: splatvar_shift_v8i8:
 ; X32-SSE:       # %bb.0:
 ; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; X32-SSE-NEXT:    pand %xmm0, %xmm2
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm0, %xmm1
+; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm2
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
 ; X32-SSE-NEXT:    psllw $12, %xmm1
 ; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
 ; X32-SSE-NEXT:    psraw $15, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index da6923c2b83a8..7403eb5b81e42 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -1660,9 +1660,7 @@ define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
 ; SSE2:       # %bb.0: # %entry
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT:    packuswb %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]

From 8654b8c55b56fd2a14dbe504da865adfcce22d54 Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Fri, 24 May 2019 10:07:24 +0000
Subject: [PATCH 0155/1176] [llvm-objdump][test] Fix for spurious matches
 against file paths

r361479 added tests that did --implicit-check-not=main, but a user found
that they failed on his machine, due to it having 'main' in a file path
printed earlier in the output. This test fixes this issue by making the
check pattern more explicit.

llvm-svn: 361621
---
 .../X86/source-interleave-missing-source.test    |  2 +-
 .../X86/source-interleave-no-debug-info.test     |  2 +-
 .../X86/source-interleave-relative-paths.test    | 16 ++++++++--------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
index 166caeb41b50d..d987728472167 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-missing-source.test
@@ -7,7 +7,7 @@
 # RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll
 # RUN: llc -o %t2.o -filetype=obj -mtriple=x86_64-pc-linux %t2.ll
 
-# RUN: llvm-objdump --source %t.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source %t.o | FileCheck %s --implicit-check-not='main()'
 # RUN: llvm-objdump --source %t2.o | FileCheck %s --check-prefixes=CHECK,SOURCE
 
 # CHECK:       0000000000000010 main:
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
index 4a0a34a9eae0e..15f7b7ecb222d 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-no-debug-info.test
@@ -6,7 +6,7 @@
 # RUN: llvm-objcopy --strip-debug %t.o %t2.o
 
 # RUN: llvm-objdump --source %t.o | FileCheck %s --check-prefixes=CHECK,SOURCE
-# RUN: llvm-objdump --source %t2.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source %t2.o | FileCheck %s --implicit-check-not='main()'
 
 # CHECK:       0000000000000010 main:
 # SOURCE-NEXT: ; int main() {
diff --git a/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test b/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
index f9c69dfc0c06a..baf4ec919c000 100644
--- a/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
+++ b/llvm/test/tools/llvm-objdump/X86/source-interleave-relative-paths.test
@@ -17,18 +17,18 @@
 
 # RUN: cd %t
 # RUN: llvm-objdump --source a/a.o | FileCheck %s --check-prefixes=CHECK,SOURCE
-# RUN: llvm-objdump --source a/b.o | FileCheck %s --implicit-check-not=main
-# RUN: llvm-objdump --source a/c.o | FileCheck %s --implicit-check-not=main
-# RUN: llvm-objdump --source a/d.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source a/b.o | FileCheck %s --implicit-check-not='main()'
+# RUN: llvm-objdump --source a/c.o | FileCheck %s --implicit-check-not='main()'
+# RUN: llvm-objdump --source a/d.o | FileCheck %s --implicit-check-not='main()'
 # RUN: cd a
-# RUN: llvm-objdump --source a.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source a.o | FileCheck %s --implicit-check-not='main()'
 # RUN: llvm-objdump --source b.o | FileCheck %s --check-prefixes=CHECK,SOURCE
 # RUN: llvm-objdump --source c.o | FileCheck %s --check-prefixes=CHECK,SOURCE
-# RUN: llvm-objdump --source d.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source d.o | FileCheck %s --implicit-check-not='main()'
 # RUN: cd b
-# RUN: llvm-objdump --source ../a.o | FileCheck %s --implicit-check-not=main
-# RUN: llvm-objdump --source ../b.o | FileCheck %s --implicit-check-not=main
-# RUN: llvm-objdump --source ../c.o | FileCheck %s --implicit-check-not=main
+# RUN: llvm-objdump --source ../a.o | FileCheck %s --implicit-check-not='main()'
+# RUN: llvm-objdump --source ../b.o | FileCheck %s --implicit-check-not='main()'
+# RUN: llvm-objdump --source ../c.o | FileCheck %s --implicit-check-not='main()'
 # RUN: llvm-objdump --source ../d.o | FileCheck %s --check-prefixes=CHECK,SOURCE
 
 # CHECK:       0000000000000010 main:

From 5f04f0028209bb582076405b8e984d4f91335945 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 10:10:34 +0000
Subject: [PATCH 0156/1176] [AArch64][SVE2] Asm: support SVE2 Accumulate Group

Summary:
Patch adds support for the following instructions:

SVE2 bitwise shift and insert:
    * SRI, SLI

SVE2 bitwise shift right and accumulate:
    * SSRA, USRA, SRSRA, URSRA

SVE2 complex integer add:
    * CADD, SQCADD

SVE2 integer absolute difference and accumulate:
    * SABA, UABA

SVE2 integer absolute difference and accumulate long:
    * SABALB, SABALT, UABALB, UABALT

SVE2 integer add/subtract long with carry:
    * ADCLB, ADCLT, SBCLB, SBCLT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62204

llvm-svn: 361622
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  30 ++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 156 ++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s |  25 +++
 llvm/test/MC/AArch64/SVE2/adclb.s             |  36 ++++
 llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s |  25 +++
 llvm/test/MC/AArch64/SVE2/adclt.s             |  36 ++++
 llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s  |  38 +++++
 llvm/test/MC/AArch64/SVE2/cadd.s              |  72 ++++++++
 llvm/test/MC/AArch64/SVE2/saba-diagnostics.s  |  20 +++
 llvm/test/MC/AArch64/SVE2/saba.s              |  48 ++++++
 .../test/MC/AArch64/SVE2/sabalb-diagnostics.s |  34 ++++
 llvm/test/MC/AArch64/SVE2/sabalb.s            |  43 +++++
 .../test/MC/AArch64/SVE2/sabalt-diagnostics.s |  34 ++++
 llvm/test/MC/AArch64/SVE2/sabalt.s            |  43 +++++
 llvm/test/MC/AArch64/SVE2/sbclb-diagnostics.s |  25 +++
 llvm/test/MC/AArch64/SVE2/sbclb.s             |  36 ++++
 llvm/test/MC/AArch64/SVE2/sbclt-diagnostics.s |  25 +++
 llvm/test/MC/AArch64/SVE2/sbclt.s             |  36 ++++
 llvm/test/MC/AArch64/SVE2/sli-diagnostics.s   |  59 +++++++
 llvm/test/MC/AArch64/SVE2/sli.s               |  56 +++++++
 .../test/MC/AArch64/SVE2/sqcadd-diagnostics.s |  38 +++++
 llvm/test/MC/AArch64/SVE2/sqcadd.s            |  72 ++++++++
 llvm/test/MC/AArch64/SVE2/sri-diagnostics.s   |  60 +++++++
 llvm/test/MC/AArch64/SVE2/sri.s               |  56 +++++++
 llvm/test/MC/AArch64/SVE2/srsra-diagnostics.s |  60 +++++++
 llvm/test/MC/AArch64/SVE2/srsra.s             |  72 ++++++++
 llvm/test/MC/AArch64/SVE2/ssra-diagnostics.s  |  60 +++++++
 llvm/test/MC/AArch64/SVE2/ssra.s              |  72 ++++++++
 llvm/test/MC/AArch64/SVE2/uaba-diagnostics.s  |  20 +++
 llvm/test/MC/AArch64/SVE2/uaba.s              |  48 ++++++
 .../test/MC/AArch64/SVE2/uabalb-diagnostics.s |  34 ++++
 llvm/test/MC/AArch64/SVE2/uabalb.s            |  43 +++++
 .../test/MC/AArch64/SVE2/uabalt-diagnostics.s |  34 ++++
 llvm/test/MC/AArch64/SVE2/uabalt.s            |  43 +++++
 llvm/test/MC/AArch64/SVE2/ursra-diagnostics.s |  60 +++++++
 llvm/test/MC/AArch64/SVE2/ursra.s             |  72 ++++++++
 llvm/test/MC/AArch64/SVE2/usra-diagnostics.s  |  60 +++++++
 llvm/test/MC/AArch64/SVE2/usra.s              |  72 ++++++++
 38 files changed, 1853 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/adclb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/adclt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/cadd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saba-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saba.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabalb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabalb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabalt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sabalt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sbclb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sbclb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sbclt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sbclt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sli-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sli.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqcadd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqcadd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sri-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sri.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srsra-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/srsra.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssra-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssra.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaba-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uaba.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabalb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabalb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabalt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uabalt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ursra-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ursra.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usra-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/usra.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 18f874fccb66c..3f48490bcc178 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1198,6 +1198,36 @@ let Predicates = [HasSVE2] in {
   defm PMULLB_ZZZ   : sve2_pmul_long<0b0, "pmullb">;
   defm PMULLT_ZZZ   : sve2_pmul_long<0b1, "pmullt">;
 
+  // SVE2 bitwise shift and insert
+  defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
+  defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
+
+  // SVE2 bitwise shift right and accumulate
+  defm SSRA_ZZI  : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
+  defm USRA_ZZI  : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
+  defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
+  defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
+
+  // SVE2 complex integer add
+  defm CADD_ZZI   : sve2_int_cadd<0b0, "cadd">;
+  defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
+
+  // SVE2 integer absolute difference and accumulate
+  defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
+  defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;
+
+  // SVE2 integer absolute difference and accumulate long
+  defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
+  defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
+  defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
+  defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;
+
+  // SVE2 integer add/subtract long with carry
+  defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
+  defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
+  defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
+  defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 333fa72500cf4..90c8076d2907f 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2191,6 +2191,162 @@ multiclass sve2_pmul_long<bits<1> opc, string asm> {
   def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Accumulate Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
+                                  ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<6> imm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-11} = 0b11110;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+  def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+                                        ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
+  asm, "\t$Zda, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<6> imm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-12} = 0b1110;
+  let Inst{11-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
+  def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, complexrotateopodd:$rot),
+  asm, "\t$Zdn, $_Zdn, $Zm, $rot", "", []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  bit rot;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21-17} = 0b00000;
+  let Inst{16}    = opc;
+  let Inst{15-11} = 0b11011;
+  let Inst{10}    = rot;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_cadd<bit opc, string asm> {
+  def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>;
+  def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>;
+  def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>;
+  def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>;
+}
+
+class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
+                             ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b11;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_absdiff_accum<bit opc, string asm> {
+  def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
+  def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
+  def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
+  def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> {
+  def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+  def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+  def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
+  def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
+                                  ZPR32, ZPR32>;
+  def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
+                                  ZPR64, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Arithmetic - Unary Predicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s
new file mode 100644
index 0000000000000..e2077dea9d591
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/adclb-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+adclb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: adclb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+adclb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: adclb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+adclb z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: adclb z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/adclb.s b/llvm/test/MC/AArch64/SVE2/adclb.s
new file mode 100644
index 0000000000000..bcc0297173d03
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/adclb.s
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+adclb z0.s, z1.s, z31.s
+// CHECK-INST: adclb z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xd0,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 1f 45 <unknown>
+
+adclb z0.d, z1.d, z31.d
+// CHECK-INST: adclb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 5f 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+adclb z0.d, z1.d, z31.d
+// CHECK-INST: adclb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 5f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s
new file mode 100644
index 0000000000000..d5a226d9eef44
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/adclt-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+adclt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: adclt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+adclt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: adclt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+adclt z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: adclt z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/adclt.s b/llvm/test/MC/AArch64/SVE2/adclt.s
new file mode 100644
index 0000000000000..46de92fd5a542
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/adclt.s
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+adclt z0.s, z1.s, z31.s
+// CHECK-INST: adclt z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xd4,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 1f 45 <unknown>
+
+adclt z0.d, z1.d, z31.d
+// CHECK-INST: adclt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd4,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 5f 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+adclt z0.d, z1.d, z31.d
+// CHECK-INST: adclt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd4,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 5f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s
new file mode 100644
index 0000000000000..3537f7b788046
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/cadd-diagnostics.s
@@ -0,0 +1,38 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+cadd z0.d, z1.d, z2.d, #90
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: cadd z0.d, z1.d, z2.d, #90
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid rotation
+
+cadd z0.d, z0.d, z1.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cadd z0.d, z0.d, z1.d, #180
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #180
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cadd z0.d, z0.d, z1.d, #450
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: cadd z0.d, z0.d, z1.d, #450
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+cadd z0.d, z0.d, z31.d, #90
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: cadd z0.d, z0.d, z31.d, #90
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/cadd.s b/llvm/test/MC/AArch64/SVE2/cadd.s
new file mode 100644
index 0000000000000..75d1deb29f0fc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/cadd.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+cadd   z0.b, z0.b, z0.b, #90
+// CHECK-INST: cadd   z0.b, z0.b, z0.b, #90
+// CHECK-ENCODING: [0x00,0xd8,0x00,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 00 45 <unknown>
+
+cadd   z0.h, z0.h, z0.h, #90
+// CHECK-INST: cadd   z0.h, z0.h, z0.h, #90
+// CHECK-ENCODING: [0x00,0xd8,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 40 45 <unknown>
+
+cadd   z0.s, z0.s, z0.s, #90
+// CHECK-INST: cadd   z0.s, z0.s, z0.s, #90
+// CHECK-ENCODING: [0x00,0xd8,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 80 45 <unknown>
+
+cadd   z0.d, z0.d, z0.d, #90
+// CHECK-INST: cadd   z0.d, z0.d, z0.d, #90
+// CHECK-ENCODING: [0x00,0xd8,0xc0,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 c0 45 <unknown>
+
+cadd   z31.b, z31.b, z31.b, #270
+// CHECK-INST: cadd   z31.b, z31.b, z31.b, #270
+// CHECK-ENCODING: [0xff,0xdf,0x00,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 00 45 <unknown>
+
+cadd   z31.h, z31.h, z31.h, #270
+// CHECK-INST: cadd   z31.h, z31.h, z31.h, #270
+// CHECK-ENCODING: [0xff,0xdf,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 40 45 <unknown>
+
+cadd   z31.s, z31.s, z31.s, #270
+// CHECK-INST: cadd   z31.s, z31.s, z31.s, #270
+// CHECK-ENCODING: [0xff,0xdf,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 80 45 <unknown>
+
+cadd   z31.d, z31.d, z31.d, #270
+// CHECK-INST: cadd   z31.d, z31.d, z31.d, #270
+// CHECK-ENCODING: [0xff,0xdf,0xc0,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df c0 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4, z6
+// CHECK-INST: movprfx	z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+cadd   z4.d, z4.d, z31.d, #270
+// CHECK-INST: cadd	z4.d, z4.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0xdf,0xc0,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e4 df c0 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saba-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saba-diagnostics.s
new file mode 100644
index 0000000000000..3d6a8347be168
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saba-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saba z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saba z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+saba z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: saba z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saba.s b/llvm/test/MC/AArch64/SVE2/saba.s
new file mode 100644
index 0000000000000..7d02e9816fbcb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saba.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+saba z0.b, z1.b, z31.b
+// CHECK-INST: saba z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xf8,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 f8 1f 45 <unknown>
+
+saba z0.h, z1.h, z31.h
+// CHECK-INST: saba z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xf8,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 f8 5f 45 <unknown>
+
+saba z0.s, z1.s, z31.s
+// CHECK-INST: saba z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xf8,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 f8 9f 45 <unknown>
+
+saba z0.d, z1.d, z31.d
+// CHECK-INST: saba z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xf8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 f8 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+saba z0.d, z1.d, z31.d
+// CHECK-INST: saba z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xf8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 f8 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sabalb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sabalb-diagnostics.s
new file mode 100644
index 0000000000000..1f928af82df91
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabalb-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sabalb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sabalb  z0.d, z1.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sabalb  z0.d, z1.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sabalb.s b/llvm/test/MC/AArch64/SVE2/sabalb.s
new file mode 100644
index 0000000000000..450c005fb55ef
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabalb.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sabalb z0.h, z1.b, z31.b
+// CHECK-INST: sabalb	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xc0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 5f 45 <unknown>
+
+sabalb z0.s, z1.h, z31.h
+// CHECK-INST: sabalb	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xc0,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 9f 45 <unknown>
+
+sabalb z0.d, z1.s, z31.s
+// CHECK-INST: sabalb	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xc0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+sabalb z21.d, z1.s, z31.s
+// CHECK-INST: sabalb	z21.d, z1.s, z31.s
+// CHECK-ENCODING: [0x35,0xc0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 c0 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sabalt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sabalt-diagnostics.s
new file mode 100644
index 0000000000000..a6a96abf34c4b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabalt-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sabalt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sabalt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sabalt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sabalt  z0.d, z1.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sabalt  z0.d, z1.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sabalt.s b/llvm/test/MC/AArch64/SVE2/sabalt.s
new file mode 100644
index 0000000000000..2653848efb172
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sabalt.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sabalt z0.h, z1.b, z31.b
+// CHECK-INST: sabalt	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xc4,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c4 5f 45 <unknown>
+
+sabalt z0.s, z1.h, z31.h
+// CHECK-INST: sabalt	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xc4,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c4 9f 45 <unknown>
+
+sabalt z0.d, z1.s, z31.s
+// CHECK-INST: sabalt	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xc4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c4 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+sabalt z21.d, z1.s, z31.s
+// CHECK-INST: sabalt	z21.d, z1.s, z31.s
+// CHECK-ENCODING: [0x35,0xc4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 c4 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sbclb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sbclb-diagnostics.s
new file mode 100644
index 0000000000000..f05f0fd9c4cf5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sbclb-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sbclb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sbclb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sbclb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sbclb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sbclb z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sbclb z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sbclb.s b/llvm/test/MC/AArch64/SVE2/sbclb.s
new file mode 100644
index 0000000000000..bec58658aa431
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sbclb.s
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sbclb z0.s, z1.s, z31.s
+// CHECK-INST: sbclb z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xd0,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 9f 45 <unknown>
+
+sbclb z0.d, z1.d, z31.d
+// CHECK-INST: sbclb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sbclb z0.d, z1.d, z31.d
+// CHECK-INST: sbclb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d0 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sbclt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sbclt-diagnostics.s
new file mode 100644
index 0000000000000..ead1aae029dc6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sbclt-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sbclt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sbclt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sbclt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sbclt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sbclt z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sbclt z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sbclt.s b/llvm/test/MC/AArch64/SVE2/sbclt.s
new file mode 100644
index 0000000000000..e35499f72a31b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sbclt.s
@@ -0,0 +1,36 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sbclt z0.s, z1.s, z31.s
+// CHECK-INST: sbclt z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xd4,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 9f 45 <unknown>
+
+sbclt z0.d, z1.d, z31.d
+// CHECK-INST: sbclt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+sbclt z0.d, z1.d, z31.d
+// CHECK-INST: sbclt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xd4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 d4 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sli-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sli-diagnostics.s
new file mode 100644
index 0000000000000..150bf95a693a2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sli-diagnostics.s
@@ -0,0 +1,59 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sli z18.b, z28.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sli z18.b, z28.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z1.b, z9.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sli z1.b, z9.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z21.h, z2.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sli z21.h, z2.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z14.h, z30.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sli z14.h, z30.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z6.s, z12.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sli z6.s, z12.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z23.s, z19.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sli z23.s, z19.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z3.d, z24.d, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sli z3.d, z24.d, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sli z25.d, z16.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 63]
+// CHECK-NEXT: sli z25.d, z16.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sli z0.b, z0.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sli z0.b, z0.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sli     z31.d, z31.d, #63
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sli     z31.d, z31.d, #63
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sli.s b/llvm/test/MC/AArch64/SVE2/sli.s
new file mode 100644
index 0000000000000..e5100fd057874
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sli.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sli     z0.b, z0.b, #0
+// CHECK-INST: sli	z0.b, z0.b, #0
+// CHECK-ENCODING: [0x00,0xf4,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f4 08 45 <unknown>
+
+sli     z31.b, z31.b, #7
+// CHECK-INST: sli	z31.b, z31.b, #7
+// CHECK-ENCODING: [0xff,0xf7,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f7 0f 45 <unknown>
+
+sli     z0.h, z0.h, #0
+// CHECK-INST: sli	z0.h, z0.h, #0
+// CHECK-ENCODING: [0x00,0xf4,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f4 10 45 <unknown>
+
+sli     z31.h, z31.h, #15
+// CHECK-INST: sli	z31.h, z31.h, #15
+// CHECK-ENCODING: [0xff,0xf7,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f7 1f 45 <unknown>
+
+sli     z0.s, z0.s, #0
+// CHECK-INST: sli	z0.s, z0.s, #0
+// CHECK-ENCODING: [0x00,0xf4,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f4 40 45 <unknown>
+
+sli     z31.s, z31.s, #31
+// CHECK-INST: sli	z31.s, z31.s, #31
+// CHECK-ENCODING: [0xff,0xf7,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f7 5f 45 <unknown>
+
+sli     z0.d, z0.d, #0
+// CHECK-INST: sli	z0.d, z0.d, #0
+// CHECK-ENCODING: [0x00,0xf4,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f4 80 45 <unknown>
+
+sli     z31.d, z31.d, #63
+// CHECK-INST: sli	z31.d, z31.d, #63
+// CHECK-ENCODING: [0xff,0xf7,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f7 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqcadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqcadd-diagnostics.s
new file mode 100644
index 0000000000000..e399f9e87cfe9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqcadd-diagnostics.s
@@ -0,0 +1,38 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sqcadd z0.d, z1.d, z2.d, #90
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sqcadd z0.d, z1.d, z2.d, #90
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid rotation
+
+sqcadd z0.d, z0.d, z1.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: sqcadd z0.d, z0.d, z1.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqcadd z0.d, z0.d, z1.d, #180
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: sqcadd z0.d, z0.d, z1.d, #180
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqcadd z0.d, z0.d, z1.d, #450
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: complex rotation must be 90 or 270.
+// CHECK-NEXT: sqcadd z0.d, z0.d, z1.d, #450
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+sqcadd z0.d, z0.d, z31.d, #90
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: sqcadd z0.d, z0.d, z31.d, #90
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqcadd.s b/llvm/test/MC/AArch64/SVE2/sqcadd.s
new file mode 100644
index 0000000000000..2784d1d15bd0b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqcadd.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqcadd   z0.b, z0.b, z0.b, #90
+// CHECK-INST: sqcadd   z0.b, z0.b, z0.b, #90
+// CHECK-ENCODING: [0x00,0xd8,0x01,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 01 45 <unknown>
+
+sqcadd   z0.h, z0.h, z0.h, #90
+// CHECK-INST: sqcadd   z0.h, z0.h, z0.h, #90
+// CHECK-ENCODING: [0x00,0xd8,0x41,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 41 45 <unknown>
+
+sqcadd   z0.s, z0.s, z0.s, #90
+// CHECK-INST: sqcadd   z0.s, z0.s, z0.s, #90
+// CHECK-ENCODING: [0x00,0xd8,0x81,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 81 45 <unknown>
+
+sqcadd   z0.d, z0.d, z0.d, #90
+// CHECK-INST: sqcadd   z0.d, z0.d, z0.d, #90
+// CHECK-ENCODING: [0x00,0xd8,0xc1,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 d8 c1 45 <unknown>
+
+sqcadd   z31.b, z31.b, z31.b, #270
+// CHECK-INST: sqcadd   z31.b, z31.b, z31.b, #270
+// CHECK-ENCODING: [0xff,0xdf,0x01,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 01 45 <unknown>
+
+sqcadd   z31.h, z31.h, z31.h, #270
+// CHECK-INST: sqcadd   z31.h, z31.h, z31.h, #270
+// CHECK-ENCODING: [0xff,0xdf,0x41,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 41 45 <unknown>
+
+sqcadd   z31.s, z31.s, z31.s, #270
+// CHECK-INST: sqcadd   z31.s, z31.s, z31.s, #270
+// CHECK-ENCODING: [0xff,0xdf,0x81,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 81 45 <unknown>
+
+sqcadd   z31.d, z31.d, z31.d, #270
+// CHECK-INST: sqcadd   z31.d, z31.d, z31.d, #270
+// CHECK-ENCODING: [0xff,0xdf,0xc1,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df c1 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4, z6
+// CHECK-INST: movprfx	z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+sqcadd   z4.d, z4.d, z31.d, #270
+// CHECK-INST: sqcadd	z4.d, z4.d, z31.d, #270
+// CHECK-ENCODING: [0xe4,0xdf,0xc1,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e4 df c1 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sri-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sri-diagnostics.s
new file mode 100644
index 0000000000000..da88d51642650
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sri-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sri z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sri z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sri z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sri z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sri z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sri z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sri z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: sri z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sri z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: sri z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+sri z0.b, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sri z0.b, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sri     z31.d, z31.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sri     z31.d, z31.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sri.s b/llvm/test/MC/AArch64/SVE2/sri.s
new file mode 100644
index 0000000000000..c06fa6bc4527e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sri.s
@@ -0,0 +1,56 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sri     z0.b, z0.b, #1
+// CHECK-INST: sri	z0.b, z0.b, #1
+// CHECK-ENCODING: [0x00,0xf0,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f0 0f 45 <unknown>
+
+sri     z31.b, z31.b, #8
+// CHECK-INST: sri	z31.b, z31.b, #8
+// CHECK-ENCODING: [0xff,0xf3,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f3 08 45 <unknown>
+
+sri     z0.h, z0.h, #1
+// CHECK-INST: sri	z0.h, z0.h, #1
+// CHECK-ENCODING: [0x00,0xf0,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f0 1f 45 <unknown>
+
+sri     z31.h, z31.h, #16
+// CHECK-INST: sri	z31.h, z31.h, #16
+// CHECK-ENCODING: [0xff,0xf3,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f3 10 45 <unknown>
+
+sri     z0.s, z0.s, #1
+// CHECK-INST: sri	z0.s, z0.s, #1
+// CHECK-ENCODING: [0x00,0xf0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f0 5f 45 <unknown>
+
+sri     z31.s, z31.s, #32
+// CHECK-INST: sri	z31.s, z31.s, #32
+// CHECK-ENCODING: [0xff,0xf3,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f3 40 45 <unknown>
+
+sri     z0.d, z0.d, #1
+// CHECK-INST: sri	z0.d, z0.d, #1
+// CHECK-ENCODING: [0x00,0xf0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 f0 df 45 <unknown>
+
+sri     z31.d, z31.d, #64
+// CHECK-INST: sri	z31.d, z31.d, #64
+// CHECK-ENCODING: [0xff,0xf3,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff f3 80 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/srsra-diagnostics.s b/llvm/test/MC/AArch64/SVE2/srsra-diagnostics.s
new file mode 100644
index 0000000000000..ed21d5d01d149
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srsra-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+srsra z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: srsra z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: srsra z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: srsra z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: srsra z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: srsra z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: srsra z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: srsra z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+srsra z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: srsra z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+srsra z0.b, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: srsra z0.b, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+srsra     z0.d, z1.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: srsra     z0.d, z1.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/srsra.s b/llvm/test/MC/AArch64/SVE2/srsra.s
new file mode 100644
index 0000000000000..2ac7fa78736a3
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/srsra.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+srsra     z0.b, z0.b, #1
+// CHECK-INST: srsra	z0.b, z0.b, #1
+// CHECK-ENCODING: [0x00,0xe8,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e8 0f 45 <unknown>
+
+srsra     z31.b, z31.b, #8
+// CHECK-INST: srsra	z31.b, z31.b, #8
+// CHECK-ENCODING: [0xff,0xeb,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff eb 08 45 <unknown>
+
+srsra     z0.h, z0.h, #1
+// CHECK-INST: srsra	z0.h, z0.h, #1
+// CHECK-ENCODING: [0x00,0xe8,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e8 1f 45 <unknown>
+
+srsra     z31.h, z31.h, #16
+// CHECK-INST: srsra	z31.h, z31.h, #16
+// CHECK-ENCODING: [0xff,0xeb,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff eb 10 45 <unknown>
+
+srsra     z0.s, z0.s, #1
+// CHECK-INST: srsra	z0.s, z0.s, #1
+// CHECK-ENCODING: [0x00,0xe8,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e8 5f 45 <unknown>
+
+srsra     z31.s, z31.s, #32
+// CHECK-INST: srsra	z31.s, z31.s, #32
+// CHECK-ENCODING: [0xff,0xeb,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff eb 40 45 <unknown>
+
+srsra     z0.d, z0.d, #1
+// CHECK-INST: srsra	z0.d, z0.d, #1
+// CHECK-ENCODING: [0x00,0xe8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e8 df 45 <unknown>
+
+srsra     z31.d, z31.d, #64
+// CHECK-INST: srsra	z31.d, z31.d, #64
+// CHECK-ENCODING: [0xff,0xeb,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff eb 80 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+srsra     z0.d, z1.d, #1
+// CHECK-INST: srsra	z0.d, z1.d, #1
+// CHECK-ENCODING: [0x20,0xe8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 e8 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssra-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssra-diagnostics.s
new file mode 100644
index 0000000000000..17d8fdf0ced3d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssra-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+ssra z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: ssra z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: ssra z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: ssra z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: ssra z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: ssra z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: ssra z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: ssra z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssra z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: ssra z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+ssra z0.b, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssra z0.b, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ssra     z0.d, z1.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: ssra     z0.d, z1.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssra.s b/llvm/test/MC/AArch64/SVE2/ssra.s
new file mode 100644
index 0000000000000..5daeb9a667e3a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssra.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ssra     z0.b, z0.b, #1
+// CHECK-INST: ssra	z0.b, z0.b, #1
+// CHECK-ENCODING: [0x00,0xe0,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e0 0f 45 <unknown>
+
+ssra     z31.b, z31.b, #8
+// CHECK-INST: ssra	z31.b, z31.b, #8
+// CHECK-ENCODING: [0xff,0xe3,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e3 08 45 <unknown>
+
+ssra     z0.h, z0.h, #1
+// CHECK-INST: ssra	z0.h, z0.h, #1
+// CHECK-ENCODING: [0x00,0xe0,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e0 1f 45 <unknown>
+
+ssra     z31.h, z31.h, #16
+// CHECK-INST: ssra	z31.h, z31.h, #16
+// CHECK-ENCODING: [0xff,0xe3,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e3 10 45 <unknown>
+
+ssra     z0.s, z0.s, #1
+// CHECK-INST: ssra	z0.s, z0.s, #1
+// CHECK-ENCODING: [0x00,0xe0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e0 5f 45 <unknown>
+
+ssra     z31.s, z31.s, #32
+// CHECK-INST: ssra	z31.s, z31.s, #32
+// CHECK-ENCODING: [0xff,0xe3,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e3 40 45 <unknown>
+
+ssra     z0.d, z0.d, #1
+// CHECK-INST: ssra	z0.d, z0.d, #1
+// CHECK-ENCODING: [0x00,0xe0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e0 df 45 <unknown>
+
+ssra     z31.d, z31.d, #64
+// CHECK-INST: ssra	z31.d, z31.d, #64
+// CHECK-ENCODING: [0xff,0xe3,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e3 80 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+ssra     z0.d, z1.d, #1
+// CHECK-INST: ssra	z0.d, z1.d, #1
+// CHECK-ENCODING: [0x20,0xe0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 e0 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uaba-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uaba-diagnostics.s
new file mode 100644
index 0000000000000..4a03e83e57624
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaba-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uaba z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uaba z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uaba z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uaba z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uaba.s b/llvm/test/MC/AArch64/SVE2/uaba.s
new file mode 100644
index 0000000000000..f714902fabe5f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uaba.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uaba z0.b, z1.b, z31.b
+// CHECK-INST: uaba z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xfc,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 fc 1f 45 <unknown>
+
+uaba z0.h, z1.h, z31.h
+// CHECK-INST: uaba z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xfc,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 fc 5f 45 <unknown>
+
+uaba z0.s, z1.s, z31.s
+// CHECK-INST: uaba z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xfc,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 fc 9f 45 <unknown>
+
+uaba z0.d, z1.d, z31.d
+// CHECK-INST: uaba z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xfc,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 fc df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+uaba z0.d, z1.d, z31.d
+// CHECK-INST: uaba z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xfc,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 fc df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uabalb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uabalb-diagnostics.s
new file mode 100644
index 0000000000000..c35bccc9a0156
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabalb-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uabalb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uabalb  z0.d, z1.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uabalb  z0.d, z1.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uabalb.s b/llvm/test/MC/AArch64/SVE2/uabalb.s
new file mode 100644
index 0000000000000..b2cda012973f6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabalb.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uabalb z0.h, z1.b, z31.b
+// CHECK-INST: uabalb	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xc8,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c8 5f 45 <unknown>
+
+uabalb z0.s, z1.h, z31.h
+// CHECK-INST: uabalb	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xc8,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c8 9f 45 <unknown>
+
+uabalb z0.d, z1.s, z31.s
+// CHECK-INST: uabalb	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xc8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c8 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+uabalb z21.d, z1.s, z31.s
+// CHECK-INST: uabalb	z21.d, z1.s, z31.s
+// CHECK-ENCODING: [0x35,0xc8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 c8 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uabalt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uabalt-diagnostics.s
new file mode 100644
index 0000000000000..16fe160620fd3
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabalt-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uabalt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uabalt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uabalt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+uabalt  z0.d, z1.s, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: uabalt  z0.d, z1.s, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uabalt.s b/llvm/test/MC/AArch64/SVE2/uabalt.s
new file mode 100644
index 0000000000000..f49dede472ae9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uabalt.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uabalt z0.h, z1.b, z31.b
+// CHECK-INST: uabalt	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xcc,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 cc 5f 45 <unknown>
+
+uabalt z0.s, z1.h, z31.h
+// CHECK-INST: uabalt	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xcc,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 cc 9f 45 <unknown>
+
+uabalt z0.d, z1.s, z31.s
+// CHECK-INST: uabalt	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xcc,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 cc df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+uabalt z21.d, z1.s, z31.s
+// CHECK-INST: uabalt	z21.d, z1.s, z31.s
+// CHECK-ENCODING: [0x35,0xcc,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 cc df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ursra-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ursra-diagnostics.s
new file mode 100644
index 0000000000000..323901b00f97e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ursra-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+ursra z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: ursra z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: ursra z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: ursra z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: ursra z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: ursra z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: ursra z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: ursra z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursra z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: ursra z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+ursra z0.b, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ursra z0.b, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ursra     z0.d, z1.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: ursra     z0.d, z1.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ursra.s b/llvm/test/MC/AArch64/SVE2/ursra.s
new file mode 100644
index 0000000000000..dfa1e8454962b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ursra.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ursra     z0.b, z0.b, #1
+// CHECK-INST: ursra	z0.b, z0.b, #1
+// CHECK-ENCODING: [0x00,0xec,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ec 0f 45 <unknown>
+
+ursra     z31.b, z31.b, #8
+// CHECK-INST: ursra	z31.b, z31.b, #8
+// CHECK-ENCODING: [0xff,0xef,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ef 08 45 <unknown>
+
+ursra     z0.h, z0.h, #1
+// CHECK-INST: ursra	z0.h, z0.h, #1
+// CHECK-ENCODING: [0x00,0xec,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ec 1f 45 <unknown>
+
+ursra     z31.h, z31.h, #16
+// CHECK-INST: ursra	z31.h, z31.h, #16
+// CHECK-ENCODING: [0xff,0xef,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ef 10 45 <unknown>
+
+ursra     z0.s, z0.s, #1
+// CHECK-INST: ursra	z0.s, z0.s, #1
+// CHECK-ENCODING: [0x00,0xec,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ec 5f 45 <unknown>
+
+ursra     z31.s, z31.s, #32
+// CHECK-INST: ursra	z31.s, z31.s, #32
+// CHECK-ENCODING: [0xff,0xef,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ef 40 45 <unknown>
+
+ursra     z0.d, z0.d, #1
+// CHECK-INST: ursra	z0.d, z0.d, #1
+// CHECK-ENCODING: [0x00,0xec,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ec df 45 <unknown>
+
+ursra     z31.d, z31.d, #64
+// CHECK-INST: ursra	z31.d, z31.d, #64
+// CHECK-ENCODING: [0xff,0xef,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ef 80 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+ursra     z0.d, z1.d, #1
+// CHECK-INST: ursra	z0.d, z1.d, #1
+// CHECK-ENCODING: [0x20,0xec,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 ec df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/usra-diagnostics.s b/llvm/test/MC/AArch64/SVE2/usra-diagnostics.s
new file mode 100644
index 0000000000000..bca811ec87740
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usra-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+usra z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: usra z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: usra z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: usra z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: usra z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: usra z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: usra z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: usra z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+usra z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: usra z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+usra z0.b, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: usra z0.b, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+usra     z0.d, z1.d, #64
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: usra     z0.d, z1.d, #64
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/usra.s b/llvm/test/MC/AArch64/SVE2/usra.s
new file mode 100644
index 0000000000000..6e7c35ad415f7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/usra.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+usra     z0.b, z0.b, #1
+// CHECK-INST: usra	z0.b, z0.b, #1
+// CHECK-ENCODING: [0x00,0xe4,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e4 0f 45 <unknown>
+
+usra     z31.b, z31.b, #8
+// CHECK-INST: usra	z31.b, z31.b, #8
+// CHECK-ENCODING: [0xff,0xe7,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e7 08 45 <unknown>
+
+usra     z0.h, z0.h, #1
+// CHECK-INST: usra	z0.h, z0.h, #1
+// CHECK-ENCODING: [0x00,0xe4,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e4 1f 45 <unknown>
+
+usra     z31.h, z31.h, #16
+// CHECK-INST: usra	z31.h, z31.h, #16
+// CHECK-ENCODING: [0xff,0xe7,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e7 10 45 <unknown>
+
+usra     z0.s, z0.s, #1
+// CHECK-INST: usra	z0.s, z0.s, #1
+// CHECK-ENCODING: [0x00,0xe4,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e4 5f 45 <unknown>
+
+usra     z31.s, z31.s, #32
+// CHECK-INST: usra	z31.s, z31.s, #32
+// CHECK-ENCODING: [0xff,0xe7,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e7 40 45 <unknown>
+
+usra     z0.d, z0.d, #1
+// CHECK-INST: usra	z0.d, z0.d, #1
+// CHECK-ENCODING: [0x00,0xe4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 e4 df 45 <unknown>
+
+usra     z31.d, z31.d, #64
+// CHECK-INST: usra	z31.d, z31.d, #64
+// CHECK-ENCODING: [0xff,0xe7,0x80,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff e7 80 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+usra     z0.d, z1.d, #1
+// CHECK-INST: usra	z0.d, z1.d, #1
+// CHECK-ENCODING: [0x20,0xe4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 e4 df 45 <unknown>

From cabab29af2d86ec03459cc3b57dd727b0a9c96d2 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Fri, 24 May 2019 10:18:39 +0000
Subject: [PATCH 0157/1176] [CodeComplete] Filter override completions by
 function name

Summary:
We put only part of the signature starting with a function name into "typed text"
chunks now, previously the whole signature was "typed text".

This leads to meaningful fuzzy match scores, giving better signals to
compare with other completion items.

Ideally, we would not display the result type to the user, but that requires adding
a new kind of completion chunk.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62298

llvm-svn: 361623
---
 .../clangd/unittests/CodeCompleteTests.cpp    | 14 ++++--
 clang/lib/Sema/SemaCodeComplete.cpp           | 47 ++++++++++++-------
 clang/test/CodeCompletion/overrides.cpp       | 12 ++---
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index e584597e7a90c..8f8376e25bc4c 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -49,6 +49,9 @@ class IgnoreDiagnostics : public DiagnosticsConsumer {
 
 // GMock helpers for matching completion items.
 MATCHER_P(Named, Name, "") { return arg.Name == Name; }
+MATCHER_P(NameStartsWith, Prefix, "") {
+  return llvm::StringRef(arg.Name).startswith(Prefix);
+}
 MATCHER_P(Scope, S, "") { return arg.Scope == S; }
 MATCHER_P(Qualifier, Q, "") { return arg.RequiredQualifier == Q; }
 MATCHER_P(Labeled, Label, "") {
@@ -1946,10 +1949,13 @@ TEST(CompletionTest, SuggestOverrides) {
   };
   )cpp");
   const auto Results = completions(Text);
-  EXPECT_THAT(Results.Completions,
-              AllOf(Contains(Labeled("void vfunc(bool param, int p) override")),
-                    Contains(Labeled("void ttt(bool param) const override")),
-                    Not(Contains(Labeled("void vfunc(bool param) override")))));
+  EXPECT_THAT(
+      Results.Completions,
+      AllOf(Contains(AllOf(Labeled("void vfunc(bool param, int p) override"),
+                           NameStartsWith("vfunc"))),
+            Contains(AllOf(Labeled("void ttt(bool param) const override"),
+                           NameStartsWith("ttt"))),
+            Not(Contains(Labeled("void vfunc(bool param) override")))));
 }
 
 TEST(CompletionTest, OverridesNonIdentName) {
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index e6c0b68b8d5fd..27e684252f51d 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Path.h"
 #include <list>
 #include <map>
+#include <string>
 #include <vector>
 
 using namespace clang;
@@ -1828,19 +1829,6 @@ static void AddStaticAssertResult(CodeCompletionBuilder &Builder,
   Results.AddResult(CodeCompletionResult(Builder.TakeString()));
 }
 
-static void printOverrideString(llvm::raw_ostream &OS,
-                                CodeCompletionString *CCS) {
-  for (const auto &C : *CCS) {
-    if (C.Kind == CodeCompletionString::CK_Optional)
-      printOverrideString(OS, C.Optional);
-    else
-      OS << C.Text;
-    // Add a space after return type.
-    if (C.Kind == CodeCompletionString::CK_ResultType)
-      OS << ' ';
-  }
-}
-
 static void AddOverrideResults(ResultBuilder &Results,
                                const CodeCompletionContext &CCContext,
                                CodeCompletionBuilder &Builder) {
@@ -3162,19 +3150,42 @@ CodeCompletionString *CodeCompletionResult::CreateCodeCompletionString(
       PP, Ctx, Result, IncludeBriefComments, CCContext, Policy);
 }
 
+static void printOverrideString(const CodeCompletionString &CCS,
+                                std::string &BeforeName,
+                                std::string &NameAndSignature) {
+  bool SeenTypedChunk = false;
+  for (auto &Chunk : CCS) {
+    if (Chunk.Kind == CodeCompletionString::CK_Optional) {
+      assert(SeenTypedChunk && "optional parameter before name");
+      // Note that we put all chunks inside into NameAndSignature.
+      printOverrideString(*Chunk.Optional, NameAndSignature, NameAndSignature);
+      continue;
+    }
+    SeenTypedChunk |= Chunk.Kind == CodeCompletionString::CK_TypedText;
+    if (SeenTypedChunk)
+      NameAndSignature += Chunk.Text;
+    else
+      BeforeName += Chunk.Text;
+  }
+}
+
 CodeCompletionString *
 CodeCompletionResult::createCodeCompletionStringForOverride(
     Preprocessor &PP, ASTContext &Ctx, CodeCompletionBuilder &Result,
     bool IncludeBriefComments, const CodeCompletionContext &CCContext,
     PrintingPolicy &Policy) {
-  std::string OverrideSignature;
-  llvm::raw_string_ostream OS(OverrideSignature);
   auto *CCS = createCodeCompletionStringForDecl(PP, Ctx, Result,
                                                 /*IncludeBriefComments=*/false,
                                                 CCContext, Policy);
-  printOverrideString(OS, CCS);
-  OS << " override";
-  Result.AddTypedTextChunk(Result.getAllocator().CopyString(OS.str()));
+  std::string BeforeName;
+  std::string NameAndSignature;
+  // For overrides all chunks go into the result, none are informative.
+  printOverrideString(*CCS, BeforeName, NameAndSignature);
+  NameAndSignature += " override";
+
+  Result.AddTextChunk(Result.getAllocator().CopyString(BeforeName));
+  Result.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+  Result.AddTypedTextChunk(Result.getAllocator().CopyString(NameAndSignature));
   return Result.TakeString();
 }
 
diff --git a/clang/test/CodeCompletion/overrides.cpp b/clang/test/CodeCompletion/overrides.cpp
index 06cff6af4d12f..6645569d82a77 100644
--- a/clang/test/CodeCompletion/overrides.cpp
+++ b/clang/test/CodeCompletion/overrides.cpp
@@ -11,23 +11,23 @@ void vfunc(bool param, int p) override;
 class C : public B {
  public:
   void vfunc(bool param) override;
-  void
+  vf
 };
 
-// Runs completion at ^void.
+// Runs completion at ^vf
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:14:3 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
 // CHECK-CC1: COMPLETION: Pattern : int ttt(bool param, int x = 3) const override{{$}}
 // CHECK-CC1: COMPLETION: Pattern : void vfunc(bool param, int p) override{{$}}
 // CHECK-CC1-NOT: COMPLETION: Pattern : void vfunc(bool param) override{{$}}
 //
-// Runs completion at vo^id.
+// Runs completion at vf^
 // RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:14:5 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
-// CHECK-CC2: COMPLETION: Pattern : void vfunc(bool param, int p) override{{$}}
 // CHECK-CC2-NOT: COMPLETION: Pattern : int ttt(bool param, int x = 3) const override{{$}}
+// CHECK-CC2: COMPLETION: Pattern : void vfunc(bool param, int p) override{{$}}
 // CHECK-CC2-NOT: COMPLETION: Pattern : void vfunc(bool param) override{{$}}
 //
-// Runs completion at void ^.
-// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:14:8 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+// Runs completion at void ^ on line 13.
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:13:8 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
 // CHECK-CC3-NOT: COMPLETION: Pattern : int ttt(bool param, int x = 3) const override{{$}}
 // CHECK-CC3-NOT: COMPLETION: Pattern : void vfunc(bool param, int p) override{{$}}
 // CHECK-CC3-NOT: COMPLETION: Pattern : void vfunc(bool param) override{{$}}

From adb1d74bf9e02952567c1a6e2149fb504cff2275 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 10:22:30 +0000
Subject: [PATCH 0158/1176] [AArch64][SVE2] Asm: support SVE2 Narrowing Group

Summary:
Patch adds support for the following instructions:

SVE2 bitwise shift right narrow:
    * SQSHRUNB, SQSHRUNT, SQRSHRUNB, SQRSHRUNT, SHRNB, SHRNT, RSHRNB, RSHRNT,
      SQSHRNB, SQSHRNT, SQRSHRNB, SQRSHRNT, UQSHRNB, UQSHRNT, UQRSHRNB,
      UQRSHRNT

SVE2 integer add/subtract narrow high part:
    * ADDHNB, ADDHNT, RADDHNB, RADDHNT, SUBHNB, SUBHNT, RSUBHNB, RSUBHNT

SVE2 saturating extract narrow:
    * SQXTNB, SQXTNT, UQXTNB, UQXTNT, SQXTUNB, SQXTUNT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62205

llvm-svn: 361624
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 36 ++++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 82 +++++++++++++++++++
 .../test/MC/AArch64/SVE2/addhnb-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/addhnb.s            | 27 ++++++
 .../test/MC/AArch64/SVE2/addhnt-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/addhnt.s            | 27 ++++++
 .../MC/AArch64/SVE2/raddhnb-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/raddhnb.s           | 27 ++++++
 .../MC/AArch64/SVE2/raddhnt-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/raddhnt.s           | 27 ++++++
 .../test/MC/AArch64/SVE2/rshrnb-diagnostics.s | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/rshrnb.s            | 44 ++++++++++
 .../test/MC/AArch64/SVE2/rshrnt-diagnostics.s | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/rshrnt.s            | 44 ++++++++++
 .../MC/AArch64/SVE2/rsubhnb-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/rsubhnb.s           | 27 ++++++
 .../MC/AArch64/SVE2/rsubhnt-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/rsubhnt.s           | 27 ++++++
 llvm/test/MC/AArch64/SVE2/shrnb-diagnostics.s | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/shrnb.s             | 44 ++++++++++
 llvm/test/MC/AArch64/SVE2/shrnt-diagnostics.s | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/shrnt.s             | 44 ++++++++++
 .../MC/AArch64/SVE2/sqrshrnb-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqrshrnb.s          | 44 ++++++++++
 .../MC/AArch64/SVE2/sqrshrnt-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqrshrnt.s          | 44 ++++++++++
 .../MC/AArch64/SVE2/sqrshrunb-diagnostics.s   | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqrshrunb.s         | 44 ++++++++++
 .../MC/AArch64/SVE2/sqrshrunt-diagnostics.s   | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqrshrunt.s         | 44 ++++++++++
 .../MC/AArch64/SVE2/sqshrnb-diagnostics.s     | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqshrnb.s           | 44 ++++++++++
 .../MC/AArch64/SVE2/sqshrnt-diagnostics.s     | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqshrnt.s           | 44 ++++++++++
 .../MC/AArch64/SVE2/sqshrunb-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqshrunb.s          | 44 ++++++++++
 .../MC/AArch64/SVE2/sqshrunt-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/sqshrunt.s          | 44 ++++++++++
 .../test/MC/AArch64/SVE2/sqxtnb-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/sqxtnb.s            | 27 ++++++
 .../test/MC/AArch64/SVE2/sqxtnt-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/sqxtnt.s            | 27 ++++++
 .../MC/AArch64/SVE2/sqxtunb-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/sqxtunb.s           | 27 ++++++
 .../MC/AArch64/SVE2/sqxtunt-diagnostics.s     | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/sqxtunt.s           | 27 ++++++
 .../test/MC/AArch64/SVE2/subhnb-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/subhnb.s            | 27 ++++++
 .../test/MC/AArch64/SVE2/subhnt-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/subhnt.s            | 27 ++++++
 .../MC/AArch64/SVE2/uqrshrnb-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqrshrnb.s          | 44 ++++++++++
 .../MC/AArch64/SVE2/uqrshrnt-diagnostics.s    | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqrshrnt.s          | 44 ++++++++++
 .../MC/AArch64/SVE2/uqshrnb-diagnostics.s     | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqshrnb.s           | 44 ++++++++++
 .../MC/AArch64/SVE2/uqshrnt-diagnostics.s     | 65 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/uqshrnt.s           | 44 ++++++++++
 .../test/MC/AArch64/SVE2/uqxtnb-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/uqxtnb.s            | 27 ++++++
 .../test/MC/AArch64/SVE2/uqxtnt-diagnostics.s | 40 +++++++++
 llvm/test/MC/AArch64/SVE2/uqxtnt.s            | 27 ++++++
 62 files changed, 2800 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/addhnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/addhnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/addhnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/addhnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/raddhnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/raddhnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/raddhnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/raddhnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rshrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rshrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rshrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rshrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rsubhnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rsubhnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rsubhnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rsubhnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/shrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/shrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/shrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/shrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrunb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrunb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrunt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqrshrunt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrunb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrunb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrunt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqshrunt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtunb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtunb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtunt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sqxtunt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/subhnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/subhnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/subhnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/subhnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqrshrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshrnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshrnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshrnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqshrnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqxtnb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqxtnb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqxtnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/uqxtnt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3f48490bcc178..3e17e687bdc0b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1228,6 +1228,42 @@ let Predicates = [HasSVE2] in {
   defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
   defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
 
+  // SVE2 bitwise shift right narrow
+  defm SQSHRUNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
+  defm SQSHRUNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
+  defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
+  defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
+  defm SHRNB_ZZI     : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
+  defm SHRNT_ZZI     : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
+  defm RSHRNB_ZZI    : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
+  defm RSHRNT_ZZI    : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
+  defm SQSHRNB_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
+  defm SQSHRNT_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
+  defm SQRSHRNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
+  defm SQRSHRNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
+  defm UQSHRNB_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
+  defm UQSHRNT_ZZI   : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
+  defm UQRSHRNB_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
+  defm UQRSHRNT_ZZI  : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
+
+  // SVE2 integer add/subtract narrow high part
+  defm ADDHNB_ZZZ  : sve2_int_addsub_narrow_high<0b000, "addhnb">;
+  defm ADDHNT_ZZZ  : sve2_int_addsub_narrow_high<0b001, "addhnt">;
+  defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
+  defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
+  defm SUBHNB_ZZZ  : sve2_int_addsub_narrow_high<0b100, "subhnb">;
+  defm SUBHNT_ZZZ  : sve2_int_addsub_narrow_high<0b101, "subhnt">;
+  defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
+  defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
+
+  // SVE2 saturating extract narrow
+  defm SQXTNB_ZZ  : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
+  defm SQXTNT_ZZ  : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
+  defm UQXTNB_ZZ  : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
+  defm UQXTNT_ZZ  : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
+  defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
+  defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 90c8076d2907f..c4c890a63ecd0 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2347,6 +2347,88 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
                                   ZPR64, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Narrowing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
+                                         string asm, ZPRRegOp zprty1,
+                                         ZPRRegOp zprty2, Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> imm;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-14} = 0b00;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
+  def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
+                                              vecshiftR8>;
+  def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
+                                              vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
+                                              vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
+class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
+                                  ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b011;
+  let Inst{12-10} = opc; // S, R, T
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
+  def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
+  def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
+  def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
+                                  ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
+  asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-13} = 0b000010;
+  let Inst{12-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
+  def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
+  def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
+  def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Arithmetic - Unary Predicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/addhnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/addhnb-diagnostics.s
new file mode 100644
index 0000000000000..c85456ab8e319
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/addhnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+addhnb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+addhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: addhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+addhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: addhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/addhnb.s b/llvm/test/MC/AArch64/SVE2/addhnb.s
new file mode 100644
index 0000000000000..fa9e855634bac
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/addhnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+addhnb z0.b, z1.h, z31.h
+// CHECK-INST: addhnb	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x60,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 60 7f 45 <unknown>
+
+addhnb z0.h, z1.s, z31.s
+// CHECK-INST: addhnb	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x60,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 60 bf 45 <unknown>
+
+addhnb z0.s, z1.d, z31.d
+// CHECK-INST: addhnb	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x60,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 60 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/addhnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/addhnt-diagnostics.s
new file mode 100644
index 0000000000000..0bebf9236bf43
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/addhnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+addhnt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+addhnt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: addhnt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+addhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: addhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+addhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: addhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/addhnt.s b/llvm/test/MC/AArch64/SVE2/addhnt.s
new file mode 100644
index 0000000000000..fe77e5abe713e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/addhnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+addhnt z0.b, z1.h, z31.h
+// CHECK-INST: addhnt	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x64,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 64 7f 45 <unknown>
+
+addhnt z0.h, z1.s, z31.s
+// CHECK-INST: addhnt	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x64,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 64 bf 45 <unknown>
+
+addhnt z0.s, z1.d, z31.d
+// CHECK-INST: addhnt	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x64,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 64 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/raddhnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/raddhnb-diagnostics.s
new file mode 100644
index 0000000000000..94a3cbb81dc4c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/raddhnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+raddhnb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+raddhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: raddhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+raddhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: raddhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/raddhnb.s b/llvm/test/MC/AArch64/SVE2/raddhnb.s
new file mode 100644
index 0000000000000..ff04d6b512c0f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/raddhnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+raddhnb z0.b, z1.h, z31.h
+// CHECK-INST: raddhnb	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x68,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 68 7f 45 <unknown>
+
+raddhnb z0.h, z1.s, z31.s
+// CHECK-INST: raddhnb	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x68,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 68 bf 45 <unknown>
+
+raddhnb z0.s, z1.d, z31.d
+// CHECK-INST: raddhnb	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x68,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 68 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/raddhnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/raddhnt-diagnostics.s
new file mode 100644
index 0000000000000..87ddd59464451
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/raddhnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+raddhnt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+raddhnt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: raddhnt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+raddhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: raddhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+raddhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: raddhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/raddhnt.s b/llvm/test/MC/AArch64/SVE2/raddhnt.s
new file mode 100644
index 0000000000000..f6e79af85e5e7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/raddhnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+raddhnt z0.b, z1.h, z31.h
+// CHECK-INST: raddhnt	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x6c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 6c 7f 45 <unknown>
+
+raddhnt z0.h, z1.s, z31.s
+// CHECK-INST: raddhnt	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x6c,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 6c bf 45 <unknown>
+
+raddhnt z0.s, z1.d, z31.d
+// CHECK-INST: raddhnt	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x6c,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 6c ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/rshrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/rshrnb-diagnostics.s
new file mode 100644
index 0000000000000..f8a1758c81923
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rshrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+rshrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: rshrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: rshrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: rshrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: rshrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: rshrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: rshrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+rshrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+rshrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rshrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/rshrnb.s b/llvm/test/MC/AArch64/SVE2/rshrnb.s
new file mode 100644
index 0000000000000..1d503099b1452
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rshrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+rshrnb     z0.b, z0.h, #1
+// CHECK-INST: rshrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x18,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 18 2f 45 <unknown>
+
+rshrnb     z31.b, z31.h, #8
+// CHECK-INST: rshrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x1b,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1b 28 45 <unknown>
+
+rshrnb     z0.h, z0.s, #1
+// CHECK-INST: rshrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x18,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 18 3f 45 <unknown>
+
+rshrnb     z31.h, z31.s, #16
+// CHECK-INST: rshrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x1b,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1b 30 45 <unknown>
+
+rshrnb     z0.s, z0.d, #1
+// CHECK-INST: rshrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x18,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 18 7f 45 <unknown>
+
+rshrnb     z31.s, z31.d, #32
+// CHECK-INST: rshrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x1b,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1b 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/rshrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/rshrnt-diagnostics.s
new file mode 100644
index 0000000000000..820ff0f2f7f70
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rshrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+rshrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: rshrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: rshrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: rshrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: rshrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: rshrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: rshrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+rshrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rshrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rshrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+rshrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rshrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/rshrnt.s b/llvm/test/MC/AArch64/SVE2/rshrnt.s
new file mode 100644
index 0000000000000..4e997d841e7ec
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rshrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+rshrnt     z0.b, z0.h, #1
+// CHECK-INST: rshrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x1c,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 1c 2f 45 <unknown>
+
+rshrnt     z31.b, z31.h, #8
+// CHECK-INST: rshrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x1f,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1f 28 45 <unknown>
+
+rshrnt     z0.h, z0.s, #1
+// CHECK-INST: rshrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x1c,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 1c 3f 45 <unknown>
+
+rshrnt     z31.h, z31.s, #16
+// CHECK-INST: rshrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x1f,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1f 30 45 <unknown>
+
+rshrnt     z0.s, z0.d, #1
+// CHECK-INST: rshrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x1c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 1c 7f 45 <unknown>
+
+rshrnt     z31.s, z31.d, #32
+// CHECK-INST: rshrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x1f,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1f 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/rsubhnb-diagnostics.s
new file mode 100644
index 0000000000000..2b3dbd7592cf5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rsubhnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+rsubhnb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+rsubhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rsubhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+rsubhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rsubhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnb.s b/llvm/test/MC/AArch64/SVE2/rsubhnb.s
new file mode 100644
index 0000000000000..55a93177d78e6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rsubhnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+rsubhnb z0.b, z1.h, z31.h
+// CHECK-INST: rsubhnb	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x78,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 78 7f 45 <unknown>
+
+rsubhnb z0.h, z1.s, z31.s
+// CHECK-INST: rsubhnb	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x78,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 78 bf 45 <unknown>
+
+rsubhnb z0.s, z1.d, z31.d
+// CHECK-INST: rsubhnb	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x78,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 78 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/rsubhnt-diagnostics.s
new file mode 100644
index 0000000000000..305dc061852c6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rsubhnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+rsubhnt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rsubhnt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rsubhnt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+rsubhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rsubhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+rsubhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rsubhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/rsubhnt.s b/llvm/test/MC/AArch64/SVE2/rsubhnt.s
new file mode 100644
index 0000000000000..16505e5e31787
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rsubhnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+rsubhnt z0.b, z1.h, z31.h
+// CHECK-INST: rsubhnt	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x7c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 7c 7f 45 <unknown>
+
+rsubhnt z0.h, z1.s, z31.s
+// CHECK-INST: rsubhnt	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x7c,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 7c bf 45 <unknown>
+
+rsubhnt z0.s, z1.d, z31.d
+// CHECK-INST: rsubhnt	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x7c,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 7c ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/shrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/shrnb-diagnostics.s
new file mode 100644
index 0000000000000..b54b45a9e0eb6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/shrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+shrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: shrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: shrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: shrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: shrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: shrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: shrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+shrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+shrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: shrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/shrnb.s b/llvm/test/MC/AArch64/SVE2/shrnb.s
new file mode 100644
index 0000000000000..e59f35a275cd5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/shrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+shrnb     z0.b, z0.h, #1
+// CHECK-INST: shrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x10,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 10 2f 45 <unknown>
+
+shrnb     z31.b, z31.h, #8
+// CHECK-INST: shrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x13,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 13 28 45 <unknown>
+
+shrnb     z0.h, z0.s, #1
+// CHECK-INST: shrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x10,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 10 3f 45 <unknown>
+
+shrnb     z31.h, z31.s, #16
+// CHECK-INST: shrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x13,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 13 30 45 <unknown>
+
+shrnb     z0.s, z0.d, #1
+// CHECK-INST: shrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x10,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 10 7f 45 <unknown>
+
+shrnb     z31.s, z31.d, #32
+// CHECK-INST: shrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x13,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 13 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/shrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/shrnt-diagnostics.s
new file mode 100644
index 0000000000000..bf1717bd7bf9b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/shrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+shrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: shrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: shrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: shrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: shrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: shrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: shrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+shrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+shrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: shrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+shrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: shrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/shrnt.s b/llvm/test/MC/AArch64/SVE2/shrnt.s
new file mode 100644
index 0000000000000..539f415a2de74
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/shrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+shrnt     z0.b, z0.h, #1
+// CHECK-INST: shrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x14,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 14 2f 45 <unknown>
+
+shrnt     z31.b, z31.h, #8
+// CHECK-INST: shrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x17,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 17 28 45 <unknown>
+
+shrnt     z0.h, z0.s, #1
+// CHECK-INST: shrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x14,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 14 3f 45 <unknown>
+
+shrnt     z31.h, z31.s, #16
+// CHECK-INST: shrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x17,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 17 30 45 <unknown>
+
+shrnt     z0.s, z0.d, #1
+// CHECK-INST: shrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x14,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 14 7f 45 <unknown>
+
+shrnt     z31.s, z31.d, #32
+// CHECK-INST: shrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x17,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 17 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshrnb-diagnostics.s
new file mode 100644
index 0000000000000..1748e77ae2cd6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqrshrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqrshrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqrshrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqrshrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnb.s b/llvm/test/MC/AArch64/SVE2/sqrshrnb.s
new file mode 100644
index 0000000000000..d5c6e8cd85138
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshrnb     z0.b, z0.h, #1
+// CHECK-INST: sqrshrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x28,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 28 2f 45 <unknown>
+
+sqrshrnb     z31.b, z31.h, #8
+// CHECK-INST: sqrshrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x2b,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2b 28 45 <unknown>
+
+sqrshrnb     z0.h, z0.s, #1
+// CHECK-INST: sqrshrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x28,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 28 3f 45 <unknown>
+
+sqrshrnb     z31.h, z31.s, #16
+// CHECK-INST: sqrshrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x2b,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2b 30 45 <unknown>
+
+sqrshrnb     z0.s, z0.d, #1
+// CHECK-INST: sqrshrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x28,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 28 7f 45 <unknown>
+
+sqrshrnb     z31.s, z31.d, #32
+// CHECK-INST: sqrshrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x2b,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2b 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshrnt-diagnostics.s
new file mode 100644
index 0000000000000..7fef281c81d73
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqrshrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqrshrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqrshrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqrshrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrnt.s b/llvm/test/MC/AArch64/SVE2/sqrshrnt.s
new file mode 100644
index 0000000000000..f299e9db34804
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshrnt     z0.b, z0.h, #1
+// CHECK-INST: sqrshrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x2c,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 2c 2f 45 <unknown>
+
+sqrshrnt     z31.b, z31.h, #8
+// CHECK-INST: sqrshrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x2f,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f 28 45 <unknown>
+
+sqrshrnt     z0.h, z0.s, #1
+// CHECK-INST: sqrshrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x2c,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 2c 3f 45 <unknown>
+
+sqrshrnt     z31.h, z31.s, #16
+// CHECK-INST: sqrshrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x2f,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f 30 45 <unknown>
+
+sqrshrnt     z0.s, z0.d, #1
+// CHECK-INST: sqrshrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x2c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 2c 7f 45 <unknown>
+
+sqrshrnt     z31.s, z31.d, #32
+// CHECK-INST: sqrshrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x2f,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshrunb-diagnostics.s
new file mode 100644
index 0000000000000..b4dc68109a61d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrunb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqrshrunb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrunb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrunb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrunb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrunb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrunb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrunb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqrshrunb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqrshrunb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqrshrunb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunb.s b/llvm/test/MC/AArch64/SVE2/sqrshrunb.s
new file mode 100644
index 0000000000000..2380347f8c34b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrunb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshrunb     z0.b, z0.h, #1
+// CHECK-INST: sqrshrunb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x08,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 08 2f 45 <unknown>
+
+sqrshrunb     z31.b, z31.h, #8
+// CHECK-INST: sqrshrunb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x0b,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0b 28 45 <unknown>
+
+sqrshrunb     z0.h, z0.s, #1
+// CHECK-INST: sqrshrunb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x08,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 08 3f 45 <unknown>
+
+sqrshrunb     z31.h, z31.s, #16
+// CHECK-INST: sqrshrunb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x0b,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0b 30 45 <unknown>
+
+sqrshrunb     z0.s, z0.d, #1
+// CHECK-INST: sqrshrunb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x08,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 08 7f 45 <unknown>
+
+sqrshrunb     z31.s, z31.d, #32
+// CHECK-INST: sqrshrunb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x0b,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0b 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqrshrunt-diagnostics.s
new file mode 100644
index 0000000000000..4ff61510759a0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrunt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqrshrunt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrunt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqrshrunt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrunt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqrshrunt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrunt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqrshrunt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqrshrunt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqrshrunt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqrshrunt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqrshrunt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqrshrunt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqrshrunt.s b/llvm/test/MC/AArch64/SVE2/sqrshrunt.s
new file mode 100644
index 0000000000000..95f060db6231c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqrshrunt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqrshrunt     z0.b, z0.h, #1
+// CHECK-INST: sqrshrunt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x0c,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 0c 2f 45 <unknown>
+
+sqrshrunt     z31.b, z31.h, #8
+// CHECK-INST: sqrshrunt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x0f,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0f 28 45 <unknown>
+
+sqrshrunt     z0.h, z0.s, #1
+// CHECK-INST: sqrshrunt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x0c,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 0c 3f 45 <unknown>
+
+sqrshrunt     z31.h, z31.s, #16
+// CHECK-INST: sqrshrunt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x0f,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0f 30 45 <unknown>
+
+sqrshrunt     z0.s, z0.d, #1
+// CHECK-INST: sqrshrunt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x0c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 0c 7f 45 <unknown>
+
+sqrshrunt     z31.s, z31.d, #32
+// CHECK-INST: sqrshrunt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x0f,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0f 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshrnb-diagnostics.s
new file mode 100644
index 0000000000000..09a98bcfe21a8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqshrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqshrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqshrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqshrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnb.s b/llvm/test/MC/AArch64/SVE2/sqshrnb.s
new file mode 100644
index 0000000000000..e829f4995f0c6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshrnb     z0.b, z0.h, #1
+// CHECK-INST: sqshrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x20,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 20 2f 45 <unknown>
+
+sqshrnb     z31.b, z31.h, #8
+// CHECK-INST: sqshrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x23,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 23 28 45 <unknown>
+
+sqshrnb     z0.h, z0.s, #1
+// CHECK-INST: sqshrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x20,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 20 3f 45 <unknown>
+
+sqshrnb     z31.h, z31.s, #16
+// CHECK-INST: sqshrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x23,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 23 30 45 <unknown>
+
+sqshrnb     z0.s, z0.d, #1
+// CHECK-INST: sqshrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x20,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 20 7f 45 <unknown>
+
+sqshrnb     z31.s, z31.d, #32
+// CHECK-INST: sqshrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x23,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 23 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshrnt-diagnostics.s
new file mode 100644
index 0000000000000..199741bef6f4a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqshrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqshrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqshrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqshrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrnt.s b/llvm/test/MC/AArch64/SVE2/sqshrnt.s
new file mode 100644
index 0000000000000..f57b2c00369a8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshrnt     z0.b, z0.h, #1
+// CHECK-INST: sqshrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x24,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 24 2f 45 <unknown>
+
+sqshrnt     z31.b, z31.h, #8
+// CHECK-INST: sqshrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x27,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 27 28 45 <unknown>
+
+sqshrnt     z0.h, z0.s, #1
+// CHECK-INST: sqshrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x24,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 24 3f 45 <unknown>
+
+sqshrnt     z31.h, z31.s, #16
+// CHECK-INST: sqshrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x27,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 27 30 45 <unknown>
+
+sqshrnt     z0.s, z0.d, #1
+// CHECK-INST: sqshrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x24,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 24 7f 45 <unknown>
+
+sqshrnt     z31.s, z31.d, #32
+// CHECK-INST: sqshrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x27,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 27 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshrunb-diagnostics.s
new file mode 100644
index 0000000000000..d157918dbaa9e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrunb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqshrunb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrunb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrunb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrunb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrunb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrunb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrunb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqshrunb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqshrunb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqshrunb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunb.s b/llvm/test/MC/AArch64/SVE2/sqshrunb.s
new file mode 100644
index 0000000000000..8275da0f16ff3
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrunb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshrunb     z0.b, z0.h, #1
+// CHECK-INST: sqshrunb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x00,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 00 2f 45 <unknown>
+
+sqshrunb     z31.b, z31.h, #8
+// CHECK-INST: sqshrunb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x03,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 03 28 45 <unknown>
+
+sqshrunb     z0.h, z0.s, #1
+// CHECK-INST: sqshrunb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x00,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 00 3f 45 <unknown>
+
+sqshrunb     z31.h, z31.s, #16
+// CHECK-INST: sqshrunb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x03,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 03 30 45 <unknown>
+
+sqshrunb     z0.s, z0.d, #1
+// CHECK-INST: sqshrunb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x00,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 00 7f 45 <unknown>
+
+sqshrunb     z31.s, z31.d, #32
+// CHECK-INST: sqshrunb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x03,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 03 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqshrunt-diagnostics.s
new file mode 100644
index 0000000000000..072870648467c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrunt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sqshrunt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrunt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: sqshrunt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrunt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: sqshrunt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrunt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: sqshrunt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sqshrunt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqshrunt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqshrunt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sqshrunt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqshrunt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqshrunt.s b/llvm/test/MC/AArch64/SVE2/sqshrunt.s
new file mode 100644
index 0000000000000..86d62285a6134
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqshrunt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sqshrunt     z0.b, z0.h, #1
+// CHECK-INST: sqshrunt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x04,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 04 2f 45 <unknown>
+
+sqshrunt     z31.b, z31.h, #8
+// CHECK-INST: sqshrunt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x07,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 07 28 45 <unknown>
+
+sqshrunt     z0.h, z0.s, #1
+// CHECK-INST: sqshrunt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x04,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 04 3f 45 <unknown>
+
+sqshrunt     z31.h, z31.s, #16
+// CHECK-INST: sqshrunt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x07,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 07 30 45 <unknown>
+
+sqshrunt     z0.s, z0.d, #1
+// CHECK-INST: sqshrunt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x04,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 04 7f 45 <unknown>
+
+sqshrunt     z31.s, z31.d, #32
+// CHECK-INST: sqshrunt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x07,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 07 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqxtnb-diagnostics.s
new file mode 100644
index 0000000000000..bfce6761aaafa
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqxtnb z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnb z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnb z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnb z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnb z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnb z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnb z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnb z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+sqxtnb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtnb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqxtnb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtnb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnb.s b/llvm/test/MC/AArch64/SVE2/sqxtnb.s
new file mode 100644
index 0000000000000..7f0dcea20a3e8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sqxtnb z0.b, z31.h
+// CHECK-INST: sqxtnb	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x43,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 43 28 45 <unknown>
+
+sqxtnb z0.h, z31.s
+// CHECK-INST: sqxtnb	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x43,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 43 30 45 <unknown>
+
+sqxtnb z0.s, z31.d
+// CHECK-INST: sqxtnb	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x43,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 43 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqxtnt-diagnostics.s
new file mode 100644
index 0000000000000..2a137d09eb81e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqxtnt z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnt z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnt z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnt z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnt z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnt z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtnt z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtnt z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+sqxtnt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtnt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqxtnt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtnt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtnt.s b/llvm/test/MC/AArch64/SVE2/sqxtnt.s
new file mode 100644
index 0000000000000..9715b97b14f25
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sqxtnt z0.b, z31.h
+// CHECK-INST: sqxtnt	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x47,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 47 28 45 <unknown>
+
+sqxtnt z0.h, z31.s
+// CHECK-INST: sqxtnt	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x47,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 47 30 45 <unknown>
+
+sqxtnt z0.s, z31.d
+// CHECK-INST: sqxtnt	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x47,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 47 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqxtunb-diagnostics.s
new file mode 100644
index 0000000000000..0f1be1a05243d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtunb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqxtunb z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunb z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunb z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunb z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunb z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunb z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunb z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunb z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+sqxtunb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtunb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqxtunb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtunb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunb.s b/llvm/test/MC/AArch64/SVE2/sqxtunb.s
new file mode 100644
index 0000000000000..6db3885001226
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtunb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sqxtunb z0.b, z31.h
+// CHECK-INST: sqxtunb	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x53,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 53 28 45 <unknown>
+
+sqxtunb z0.h, z31.s
+// CHECK-INST: sqxtunb	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x53,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 53 30 45 <unknown>
+
+sqxtunb z0.s, z31.d
+// CHECK-INST: sqxtunb	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x53,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 53 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqxtunt-diagnostics.s
new file mode 100644
index 0000000000000..42a7cca3f831e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtunt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqxtunt z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunt z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunt z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunt z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunt z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunt z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqxtunt z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqxtunt z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+sqxtunt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtunt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sqxtunt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqxtunt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sqxtunt.s b/llvm/test/MC/AArch64/SVE2/sqxtunt.s
new file mode 100644
index 0000000000000..bd09163c08bcc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sqxtunt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sqxtunt z0.b, z31.h
+// CHECK-INST: sqxtunt	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x57,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 57 28 45 <unknown>
+
+sqxtunt z0.h, z31.s
+// CHECK-INST: sqxtunt	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x57,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 57 30 45 <unknown>
+
+sqxtunt z0.s, z31.d
+// CHECK-INST: sqxtunt	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x57,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 57 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/subhnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/subhnb-diagnostics.s
new file mode 100644
index 0000000000000..3d8dd545fecb0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/subhnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+subhnb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+subhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: subhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+subhnb  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: subhnb  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/subhnb.s b/llvm/test/MC/AArch64/SVE2/subhnb.s
new file mode 100644
index 0000000000000..4b90166a5b048
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/subhnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+subhnb z0.b, z1.h, z31.h
+// CHECK-INST: subhnb	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x70,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 70 7f 45 <unknown>
+
+subhnb z0.h, z1.s, z31.s
+// CHECK-INST: subhnb	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x70,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 70 bf 45 <unknown>
+
+subhnb z0.s, z1.d, z31.d
+// CHECK-INST: subhnb	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x70,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 70 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/subhnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/subhnt-diagnostics.s
new file mode 100644
index 0000000000000..2c0b2fd107384
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/subhnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+subhnt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+subhnt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: subhnt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+subhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: subhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+subhnt  z0.s, z1.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: subhnt  z0.s, z1.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/subhnt.s b/llvm/test/MC/AArch64/SVE2/subhnt.s
new file mode 100644
index 0000000000000..624147e66b829
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/subhnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+subhnt z0.b, z1.h, z31.h
+// CHECK-INST: subhnt	z0.b, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x74,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 74 7f 45 <unknown>
+
+subhnt z0.h, z1.s, z31.s
+// CHECK-INST: subhnt	z0.h, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x74,0xbf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 74 bf 45 <unknown>
+
+subhnt z0.s, z1.d, z31.d
+// CHECK-INST: subhnt	z0.s, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x74,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 74 ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqrshrnb-diagnostics.s
new file mode 100644
index 0000000000000..8e87ad29b2368
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+uqrshrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqrshrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqrshrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqrshrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqrshrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqrshrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqrshrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+uqrshrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+uqrshrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqrshrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnb.s b/llvm/test/MC/AArch64/SVE2/uqrshrnb.s
new file mode 100644
index 0000000000000..a10b02f097b35
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqrshrnb     z0.b, z0.h, #1
+// CHECK-INST: uqrshrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x38,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 38 2f 45 <unknown>
+
+uqrshrnb     z31.b, z31.h, #8
+// CHECK-INST: uqrshrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x3b,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3b 28 45 <unknown>
+
+uqrshrnb     z0.h, z0.s, #1
+// CHECK-INST: uqrshrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x38,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 38 3f 45 <unknown>
+
+uqrshrnb     z31.h, z31.s, #16
+// CHECK-INST: uqrshrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x3b,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3b 30 45 <unknown>
+
+uqrshrnb     z0.s, z0.d, #1
+// CHECK-INST: uqrshrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x38,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 38 7f 45 <unknown>
+
+uqrshrnb     z31.s, z31.d, #32
+// CHECK-INST: uqrshrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x3b,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3b 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqrshrnt-diagnostics.s
new file mode 100644
index 0000000000000..34f9829873dce
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+uqrshrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqrshrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqrshrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqrshrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqrshrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqrshrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqrshrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+uqrshrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqrshrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqrshrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+uqrshrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqrshrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqrshrnt.s b/llvm/test/MC/AArch64/SVE2/uqrshrnt.s
new file mode 100644
index 0000000000000..6eaf48440f78a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqrshrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqrshrnt     z0.b, z0.h, #1
+// CHECK-INST: uqrshrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x3c,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 3c 2f 45 <unknown>
+
+uqrshrnt     z31.b, z31.h, #8
+// CHECK-INST: uqrshrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x3f,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 28 45 <unknown>
+
+uqrshrnt     z0.h, z0.s, #1
+// CHECK-INST: uqrshrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x3c,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 3c 3f 45 <unknown>
+
+uqrshrnt     z31.h, z31.s, #16
+// CHECK-INST: uqrshrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x3f,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 30 45 <unknown>
+
+uqrshrnt     z0.s, z0.d, #1
+// CHECK-INST: uqrshrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x3c,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 3c 7f 45 <unknown>
+
+uqrshrnt     z31.s, z31.d, #32
+// CHECK-INST: uqrshrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x3f,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqshrnb-diagnostics.s
new file mode 100644
index 0000000000000..a5c6b10150860
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshrnb-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+uqshrnb z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqshrnb z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqshrnb z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqshrnb z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqshrnb z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqshrnb z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqshrnb z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+uqshrnb z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnb z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnb z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnb z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnb z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnb z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+uqshrnb     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqshrnb     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnb.s b/llvm/test/MC/AArch64/SVE2/uqshrnb.s
new file mode 100644
index 0000000000000..9f04d2346da08
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshrnb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqshrnb     z0.b, z0.h, #1
+// CHECK-INST: uqshrnb	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x30,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 30 2f 45 <unknown>
+
+uqshrnb     z31.b, z31.h, #8
+// CHECK-INST: uqshrnb	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x33,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 33 28 45 <unknown>
+
+uqshrnb     z0.h, z0.s, #1
+// CHECK-INST: uqshrnb	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x30,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 30 3f 45 <unknown>
+
+uqshrnb     z31.h, z31.s, #16
+// CHECK-INST: uqshrnb	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x33,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 33 30 45 <unknown>
+
+uqshrnb     z0.s, z0.d, #1
+// CHECK-INST: uqshrnb	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x30,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 30 7f 45 <unknown>
+
+uqshrnb     z31.s, z31.d, #32
+// CHECK-INST: uqshrnb	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x33,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 33 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqshrnt-diagnostics.s
new file mode 100644
index 0000000000000..562b8695f7665
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshrnt-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+uqshrnt z30.b, z10.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqshrnt z30.b, z10.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z18.b, z27.h, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: uqshrnt z18.b, z27.h, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z26.h, z4.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqshrnt z26.h, z4.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z25.h, z10.s, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: uqshrnt z25.h, z10.s, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z17.s, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqshrnt z17.s, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z0.s, z15.d, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: uqshrnt z0.s, z15.d, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+uqshrnt z0.b, z0.b, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnt z0.b, z0.b, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z0.h, z0.h, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnt z0.h, z0.h, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z0.s, z0.s, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnt z0.s, z0.s, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqshrnt z0.d, z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqshrnt z0.d, z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+uqshrnt     z31.s, z31.d, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqshrnt     z31.s, z31.d, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqshrnt.s b/llvm/test/MC/AArch64/SVE2/uqshrnt.s
new file mode 100644
index 0000000000000..dc151a2c786c7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqshrnt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+uqshrnt     z0.b, z0.h, #1
+// CHECK-INST: uqshrnt	z0.b, z0.h, #1
+// CHECK-ENCODING: [0x00,0x34,0x2f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 34 2f 45 <unknown>
+
+uqshrnt     z31.b, z31.h, #8
+// CHECK-INST: uqshrnt	z31.b, z31.h, #8
+// CHECK-ENCODING: [0xff,0x37,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 37 28 45 <unknown>
+
+uqshrnt     z0.h, z0.s, #1
+// CHECK-INST: uqshrnt	z0.h, z0.s, #1
+// CHECK-ENCODING: [0x00,0x34,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 34 3f 45 <unknown>
+
+uqshrnt     z31.h, z31.s, #16
+// CHECK-INST: uqshrnt	z31.h, z31.s, #16
+// CHECK-ENCODING: [0xff,0x37,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 37 30 45 <unknown>
+
+uqshrnt     z0.s, z0.d, #1
+// CHECK-INST: uqshrnt	z0.s, z0.d, #1
+// CHECK-ENCODING: [0x00,0x34,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 34 7f 45 <unknown>
+
+uqshrnt     z31.s, z31.d, #32
+// CHECK-INST: uqshrnt	z31.s, z31.d, #32
+// CHECK-ENCODING: [0xff,0x37,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 37 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqxtnb-diagnostics.s
new file mode 100644
index 0000000000000..5868197b94490
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqxtnb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uqxtnb z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnb z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnb z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnb z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnb z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnb z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnb z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnb z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+uqxtnb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqxtnb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqxtnb  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqxtnb  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnb.s b/llvm/test/MC/AArch64/SVE2/uqxtnb.s
new file mode 100644
index 0000000000000..e0361addc8eec
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqxtnb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uqxtnb z0.b, z31.h
+// CHECK-INST: uqxtnb	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x4b,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4b 28 45 <unknown>
+
+uqxtnb z0.h, z31.s
+// CHECK-INST: uqxtnb	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x4b,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4b 30 45 <unknown>
+
+uqxtnb z0.s, z31.d
+// CHECK-INST: uqxtnb	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x4b,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4b 60 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/uqxtnt-diagnostics.s
new file mode 100644
index 0000000000000..d61fc764a374b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqxtnt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+uqxtnt z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnt z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnt z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnt z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnt z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnt z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uqxtnt z0.d, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uqxtnt z0.d, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+uqxtnt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqxtnt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uqxtnt  z0.s, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uqxtnt  z0.s, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/uqxtnt.s b/llvm/test/MC/AArch64/SVE2/uqxtnt.s
new file mode 100644
index 0000000000000..73dfc27a0fda0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/uqxtnt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+uqxtnt z0.b, z31.h
+// CHECK-INST: uqxtnt	z0.b, z31.h
+// CHECK-ENCODING: [0xe0,0x4f,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4f 28 45 <unknown>
+
+uqxtnt z0.h, z31.s
+// CHECK-INST: uqxtnt	z0.h, z31.s
+// CHECK-ENCODING: [0xe0,0x4f,0x30,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4f 30 45 <unknown>
+
+uqxtnt z0.s, z31.d
+// CHECK-INST: uqxtnt	z0.s, z31.d
+// CHECK-ENCODING: [0xe0,0x4f,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e0 4f 60 45 <unknown>

From 0f748e6e9b974a427d0b699d4d2534aa865ba9b6 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Fri, 24 May 2019 10:26:23 +0000
Subject: [PATCH 0159/1176] [clangd] Limit the size of synthesized fix message

Summary: A temporary workaround until we figure out a better way to present fixes.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62372

llvm-svn: 361625
---
 clang-tools-extra/clangd/Diagnostics.cpp      | 38 ++++++++++++++++---
 .../clangd/unittests/DiagnosticsTests.cpp     | 32 +++++++++++++++-
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clangd/Diagnostics.cpp b/clang-tools-extra/clangd/Diagnostics.cpp
index 5f42841db7717..a7bc1f1dcdb86 100644
--- a/clang-tools-extra/clangd/Diagnostics.cpp
+++ b/clang-tools-extra/clangd/Diagnostics.cpp
@@ -25,7 +25,9 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cstddef>
 
 namespace clang {
 namespace clangd {
@@ -437,6 +439,21 @@ void StoreDiags::EndSourceFile() {
   LangOpts = None;
 }
 
+/// Sanitizes a piece for presenting it in a synthesized fix message. Ensures
+/// the result is not too large and does not contain newlines.
+static void writeCodeToFixMessage(llvm::raw_ostream &OS, llvm::StringRef Code) {
+  constexpr unsigned MaxLen = 50;
+
+  // Only show the first line if there are many.
+  llvm::StringRef R = Code.split('\n').first;
+  // Shorten the message if it's too long.
+  R = R.take_front(MaxLen);
+
+  OS << R;
+  if (R.size() != Code.size())
+    OS << "…";
+}
+
 void StoreDiags::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
                                   const clang::Diagnostic &Info) {
   DiagnosticConsumer::HandleDiagnostic(DiagLevel, Info);
@@ -494,12 +511,21 @@ void StoreDiags::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
       llvm::StringRef Insert = FixIt.CodeToInsert;
       if (!Invalid) {
         llvm::raw_svector_ostream M(Message);
-        if (!Remove.empty() && !Insert.empty())
-          M << "change '" << Remove << "' to '" << Insert << "'";
-        else if (!Remove.empty())
-          M << "remove '" << Remove << "'";
-        else if (!Insert.empty())
-          M << "insert '" << Insert << "'";
+        if (!Remove.empty() && !Insert.empty()) {
+          M << "change '";
+          writeCodeToFixMessage(M, Remove);
+          M << "' to '";
+          writeCodeToFixMessage(M, Insert);
+          M << "'";
+        } else if (!Remove.empty()) {
+          M << "remove '";
+          writeCodeToFixMessage(M, Remove);
+          M << "'";
+        } else if (!Insert.empty()) {
+          M << "insert '";
+          writeCodeToFixMessage(M, Insert);
+          M << "'";
+        }
         // Don't allow source code to inject newlines into diagnostics.
         std::replace(Message.begin(), Message.end(), '\n', ' ');
       }
diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp
index 9d0492aa84d20..02126833df65c 100644
--- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp
@@ -120,7 +120,7 @@ o]]();
                      "use of undeclared identifier 'goo'; did you mean 'foo'?"),
                 DiagSource(Diag::Clang), DiagName("undeclared_var_use_suggest"),
                 WithFix(
-                    Fix(Test.range("typo"), "foo", "change 'go\\ o' to 'foo'")),
+                    Fix(Test.range("typo"), "foo", "change 'go\\…' to 'foo'")),
                 // This is a pretty normal range.
                 WithNote(Diag(Test.range("decl"), "'foo' declared here"))),
           // This range is zero-width and insertion. Therefore make sure we are
@@ -247,6 +247,36 @@ TEST(DiagnosticTest, ClangTidyWarningAsError) {
           DiagSeverity(DiagnosticsEngine::Error))));
 }
 
+TEST(DiagnosticTest, LongFixMessages) {
+  // We limit the size of printed code.
+  Annotations Source(R"cpp(
+    int main() {
+      int somereallyreallyreallyreallyreallyreallyreallyreallylongidentifier;
+      [[omereallyreallyreallyreallyreallyreallyreallyreallylongidentifier]]= 10;
+    }
+  )cpp");
+  TestTU TU = TestTU::withCode(Source.code());
+  EXPECT_THAT(
+      TU.build().getDiagnostics(),
+      ElementsAre(WithFix(Fix(
+          Source.range(),
+          "somereallyreallyreallyreallyreallyreallyreallyreallylongidentifier",
+          "change 'omereallyreallyreallyreallyreallyreallyreallyreall…' to "
+          "'somereallyreallyreallyreallyreallyreallyreallyreal…'"))));
+  // Only show changes up to a first newline.
+  Source = Annotations(R"cpp(
+    int main() {
+      int ident;
+      [[ide\
+n]] = 10;
+    }
+  )cpp");
+  TU = TestTU::withCode(Source.code());
+  EXPECT_THAT(TU.build().getDiagnostics(),
+              ElementsAre(WithFix(
+                  Fix(Source.range(), "ident", "change 'ide\\…' to 'ident'"))));
+}
+
 TEST(DiagnosticTest, ClangTidyWarningAsErrorTrumpsSuppressionComment) {
   Annotations Main(R"cpp(
     int main() {

From e4f01ec50c467ebf743191ca6895ef4595762462 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Fri, 24 May 2019 10:26:48 +0000
Subject: [PATCH 0160/1176] [llvm-readobj][mips] Align GOT columns headers
 properly in 64-bit case

llvm-svn: 361626
---
 llvm/test/tools/llvm-readobj/mips-got.test | 10 +++++-----
 llvm/tools/llvm-readobj/ELFDumper.cpp      | 16 +++++++++++++---
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/mips-got.test b/llvm/test/tools/llvm-readobj/mips-got.test
index 021dcfb116d3c..b3424dbc6f1f4 100644
--- a/llvm/test/tools/llvm-readobj/mips-got.test
+++ b/llvm/test/tools/llvm-readobj/mips-got.test
@@ -14,15 +14,15 @@ RUN:   FileCheck %s -check-prefix GOT-STATIC
 RUN: not llvm-readelf --mips-plt-got %p/Inputs/relocs.obj.elf-mips 2>&1 | \
 RUN:   FileCheck %s -check-prefix GNU-GOT-OBJ
 RUN: llvm-readelf --mips-plt-got %p/Inputs/dynamic-table-exe.mips | \
-RUN:   FileCheck %s -check-prefix GNU-GOT-EXE
+RUN:   FileCheck %s --strict-whitespace -check-prefix GNU-GOT-EXE
 RUN: llvm-readelf --mips-plt-got %p/Inputs/dynamic-table-so.mips | \
-RUN:   FileCheck %s -check-prefix GNU-GOT-SO
+RUN:   FileCheck %s --strict-whitespace -check-prefix GNU-GOT-SO
 RUN: llvm-readelf --mips-plt-got %p/Inputs/got-tls.so.elf-mips64el | \
-RUN:   FileCheck %s -check-prefix GNU-GOT-TLS
+RUN:   FileCheck %s --strict-whitespace -check-prefix GNU-GOT-TLS
 RUN: llvm-readelf --mips-plt-got %p/Inputs/got-empty.exe.mipsel | \
-RUN:   FileCheck %s -check-prefix GNU-GOT-EMPTY
+RUN:   FileCheck %s --strict-whitespace -check-prefix GNU-GOT-EMPTY
 RUN: llvm-readelf --mips-plt-got %p/Inputs/got-static.exe.mips | \
-RUN:   FileCheck %s -check-prefix GNU-GOT-STATIC
+RUN:   FileCheck %s --strict-whitespace -check-prefix GNU-GOT-STATIC
 
 GOT-OBJ: error: Cannot find .got section
 
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 65abdbd6109f0..159e300de3058 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -4061,7 +4061,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
      << format_hex_no_prefix(Parser.getGp(), 8 + Bias) << "\n\n";
 
   OS << " Reserved entries:\n";
-  OS << "   Address     Access  Initial Purpose\n";
+  if (ELFT::Is64Bits)
+    OS << "           Address     Access          Initial Purpose\n";
+  else
+    OS << "   Address     Access  Initial Purpose\n";
   PrintEntry(Parser.getGotLazyResolver(), "Lazy resolver");
   if (Parser.getGotModulePointer())
     PrintEntry(Parser.getGotModulePointer(), "Module pointer (GNU extension)");
@@ -4069,7 +4072,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   if (!Parser.getLocalEntries().empty()) {
     OS << "\n";
     OS << " Local entries:\n";
-    OS << "   Address     Access  Initial\n";
+    if (ELFT::Is64Bits)
+      OS << "           Address     Access          Initial\n";
+    else
+      OS << "   Address     Access  Initial\n";
     for (auto &E : Parser.getLocalEntries())
       PrintEntry(&E, "");
   }
@@ -4080,7 +4086,11 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   if (!Parser.getGlobalEntries().empty()) {
     OS << "\n";
     OS << " Global entries:\n";
-    OS << "   Address     Access  Initial Sym.Val. Type    Ndx Name\n";
+    if (ELFT::Is64Bits)
+      OS << "           Address     Access          Initial         Sym.Val."
+         << " Type    Ndx Name\n";
+    else
+      OS << "   Address     Access  Initial Sym.Val. Type    Ndx Name\n";
     for (auto &E : Parser.getGlobalEntries()) {
       const Elf_Sym *Sym = Parser.getGotSym(&E);
       std::string SymName = this->dumper()->getFullSymbolName(

From b3e58df80c592b3ce3de77455b9b8cdfd66bb7c4 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 24 May 2019 10:32:01 +0000
Subject: [PATCH 0161/1176] [AArch64][SVE2] Asm: support SVE2 String Processing
 Group

Summary:
Patch adds support for the SVE2 character match instructions MATCH and NMATCH.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62206

llvm-svn: 361627
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  4 ++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 31 ++++++++++
 llvm/test/MC/AArch64/SVE2/match-diagnostics.s | 61 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/match.s             | 32 ++++++++++
 .../test/MC/AArch64/SVE2/nmatch-diagnostics.s | 61 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/nmatch.s            | 32 ++++++++++
 6 files changed, 221 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/match-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/match.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/nmatch-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/nmatch.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3e17e687bdc0b..da26b409a4570 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1264,6 +1264,10 @@ let Predicates = [HasSVE2] in {
   defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
   defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
 
+  // SVE2 character match
+  defm MATCH_PPzZZ  : sve2_char_match<0b0, "match">;
+  defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c4c890a63ecd0..a05533b18dae1 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5132,3 +5132,34 @@ multiclass sve_int_break_z<bits<3> opc, string asm> {
   def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 String Processing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_char_match<bit sz, bit opc, string asm,
+                      PPRRegOp pprty, ZPRRegOp zprty>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+  asm, "\t$Pd, $Pg/z, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<4> Pd;
+  bits<3> Pg;
+  bits<5> Zm;
+  bits<5> Zn;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4}     = opc;
+  let Inst{3-0}   = Pd;
+
+  let Defs = [NZCV];
+}
+
+multiclass sve2_char_match<bit opc, string asm> {
+  def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
+  def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+}
diff --git a/llvm/test/MC/AArch64/SVE2/match-diagnostics.s b/llvm/test/MC/AArch64/SVE2/match-diagnostics.s
new file mode 100644
index 0000000000000..349747fdeb45a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/match-diagnostics.s
@@ -0,0 +1,61 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Restricted predicate out of range.
+
+match p0.b, p8/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: match p0.b, p8/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+match p0.b, p0/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: match p0.b, p0/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid destination predicate register
+
+match p0.s, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: match p0.s, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+match p0.d, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: match p0.d, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+match p0.b, p0/z, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: match p0.b, p0/z, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+match p0.b, p0/z, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: match p0.b, p0/z, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+match p0.b, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: match p0.b, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+match p0.b, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: match p0.b, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/match.s b/llvm/test/MC/AArch64/SVE2/match.s
new file mode 100644
index 0000000000000..07008f0d506d1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/match.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+match p0.b, p0/z, z0.b, z0.b
+// CHECK-INST: match p0.b, p0/z, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0x80,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 20 45 <unknown>
+
+match p0.h, p0/z, z0.h, z0.h
+// CHECK-INST: match p0.h, p0/z, z0.h, z0.h
+// CHECK-ENCODING: [0x00,0x80,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 80 60 45 <unknown>
+
+match p15.b, p7/z, z30.b, z31.b
+// CHECK-INST: match p15.b, p7/z, z30.b, z31.b
+// CHECK-ENCODING: [0xcf,0x9f,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 9f 3f 45 <unknown>
+
+match p15.h, p7/z, z30.h, z31.h
+// CHECK-INST: match p15.h, p7/z, z30.h, z31.h
+// CHECK-ENCODING: [0xcf,0x9f,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 9f 7f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/nmatch-diagnostics.s b/llvm/test/MC/AArch64/SVE2/nmatch-diagnostics.s
new file mode 100644
index 0000000000000..e53b9e6299879
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/nmatch-diagnostics.s
@@ -0,0 +1,61 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Restricted predicate out of range.
+
+nmatch p0.b, p8/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: nmatch p0.b, p8/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+nmatch p0.b, p0/m, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: nmatch p0.b, p0/m, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid destination predicate register
+
+nmatch p0.s, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: nmatch p0.s, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+nmatch p0.d, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: nmatch p0.d, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+nmatch p0.b, p0/z, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: nmatch p0.b, p0/z, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+nmatch p0.b, p0/z, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: nmatch p0.b, p0/z, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/nmatch.s b/llvm/test/MC/AArch64/SVE2/nmatch.s
new file mode 100644
index 0000000000000..6121f5530c446
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/nmatch.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK-INST: nmatch p0.b, p0/z, z0.b, z0.b
+// CHECK-ENCODING: [0x10,0x80,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 10 80 20 45 <unknown>
+
+nmatch p0.h, p0/z, z0.h, z0.h
+// CHECK-INST: nmatch p0.h, p0/z, z0.h, z0.h
+// CHECK-ENCODING: [0x10,0x80,0x60,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 10 80 60 45 <unknown>
+
+nmatch p15.b, p7/z, z30.b, z31.b
+// CHECK-INST: nmatch p15.b, p7/z, z30.b, z31.b
+// CHECK-ENCODING: [0xdf,0x9f,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f 3f 45 <unknown>
+
+nmatch p15.h, p7/z, z30.h, z31.h
+// CHECK-INST: nmatch p15.h, p7/z, z30.h, z31.h
+// CHECK-ENCODING: [0xdf,0x9f,0x7f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f 7f 45 <unknown>

From f2d5b7a4fa01b076553db7a5fb0f3b773fe26693 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Fri, 24 May 2019 10:39:00 +0000
Subject: [PATCH 0162/1176] Made
 cppcoreguidelines-pro-type-member-init-use-assignment run in all language
 modes

llvm-svn: 361628
---
 .../cppcoreguidelines-pro-type-member-init-use-assignment.cpp   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
index dd1f9ac4aa9a5..6cf12b6e2db2b 100644
--- a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
+++ b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-member-init %t -- -config="{CheckOptions: [{key: "cppcoreguidelines-pro-type-member-init.UseAssignment", value: 1}]}" -- -std=c++11
+// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-member-init %t -- -config="{CheckOptions: [{key: "cppcoreguidelines-pro-type-member-init.UseAssignment", value: 1}]}"
 
 struct T {
   int i;

From b7f2a2b4c0ec40e17371c5b15d4e92b39b196c5c Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Fri, 24 May 2019 10:50:15 +0000
Subject: [PATCH 0163/1176] Make
 cppcoreguidelines-pro-type-member-init-use-assignment.cpp pass on platforms
 where char is unsigned

The other options are to completely specify the triple (reduces test
coverage), or to specify a regex that allows either '0' or '0U' for char
initializers, however, that relaxes the test.

llvm-svn: 361629
---
 .../cppcoreguidelines-pro-type-member-init-use-assignment.cpp   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
index 6cf12b6e2db2b..d6570502c92d9 100644
--- a/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
+++ b/clang-tools-extra/test/clang-tidy/cppcoreguidelines-pro-type-member-init-use-assignment.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-member-init %t -- -config="{CheckOptions: [{key: "cppcoreguidelines-pro-type-member-init.UseAssignment", value: 1}]}"
+// RUN: %check_clang_tidy %s cppcoreguidelines-pro-type-member-init %t -- -config="{CheckOptions: [{key: "cppcoreguidelines-pro-type-member-init.UseAssignment", value: 1}]}" -- -fsigned-char
 
 struct T {
   int i;

From f835fcf412c92eac20bf2110e13c49fc86d0c55c Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Fri, 24 May 2019 11:12:50 +0000
Subject: [PATCH 0164/1176] [llvm-readelf] - Allow dumping of the .dynamic
 section even if there is no PT_DYNAMIC header.

It is now possible after D61937 was landed and was discussed
in it's review comments. It is not consistent with GNU, which
does not output .dynamic section content in this case for
no visible reason.

Differential revision: https://reviews.llvm.org/D62179

llvm-svn: 361630
---
 .../elf-dynamic-no-pt-dynamic.test             | 15 +++++++++++----
 llvm/tools/llvm-readobj/ELFDumper.cpp          | 18 +++++++++---------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
index 3ef293714a897..5a03d04ab4d54 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
@@ -1,16 +1,23 @@
-# Show that no dumping occurs if there is no PT_DYNAMIC header.
+## Show that dumping occurs even if there is no PT_DYNAMIC header.
+## This is inconsistent with the GNU behavior, but seems to be more reasonable.
 # RUN: yaml2obj %s -o %t.no-phdr
 # RUN: llvm-readobj --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=LLVM
-# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU --allow-empty
+# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU
 
 # LLVM:      File: {{.*}}.no-phdr
 # LLVM-NEXT: Format: ELF64-x86-64
 # LLVM-NEXT: Arch: x86_64
 # LLVM-NEXT: AddressSize: 64bit
 # LLVM-NEXT: LoadName:{{ *}}
-# LLVM-NOT:  {{.}}
+# LLVM-NEXT: DynamicSection [ (1 entries)
+# LLVM-NEXT:   Tag                Type Name/Value
+# LLVM-NEXT:   0x0000000000000000 NULL 0x0
+# LLVM-NEXT: ]
 
-# GNU-NOT: {{.}}
+# GNU:      DynamicSection [ (1 entries)
+# GNU-NEXT:   Tag                Type Name/Value
+# GNU-NEXT:   0x0000000000000000 NULL 0x0
+# GNU-NEXT: ]
 
 --- !ELF
 FileHeader:
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 159e300de3058..b86a6c192cab3 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1331,6 +1331,7 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 
 template <typename ELFT>
 void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+  // Try to locate the PT_DYNAMIC header.
   const Elf_Phdr *DynamicPhdr = nullptr;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type != ELF::PT_DYNAMIC)
@@ -1339,11 +1340,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     break;
   }
 
-  // We do not want to dump dynamic section if we have no PT_DYNAMIC header.
-  // This matches GNU's behavior.
-  if (!DynamicPhdr)
-    return;
-
   // Try to locate the .dynamic section in the sections header table.
   const Elf_Shdr *DynamicSec = nullptr;
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
@@ -1358,9 +1354,16 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   // Ignore sh_entsize and use the expected value for entry size explicitly.
   // This allows us to dump the dynamic sections with a broken sh_entsize
   // field.
-  if (DynamicSec)
+  if (DynamicSec) {
     DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
                              DynamicSec->sh_size, sizeof(Elf_Dyn)});
+    parseDynamicTable();
+  }
+
+  // If we have a PT_DYNAMIC header, we will either check the found dynamic
+  // section or take the dynamic table data directly from the header.
+  if (!DynamicPhdr)
+    return;
 
   if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
       ObjF->getMemoryBufferRef().getBufferSize())
@@ -1374,7 +1377,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   }
 
   StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
-
   if (DynamicSec->sh_addr + DynamicSec->sh_size >
           DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
       DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
@@ -1386,8 +1388,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     reportWarning("The SHT_DYNAMIC section '" + Name +
                   "' is not at the start of "
                   "PT_DYNAMIC segment");
-
-  parseDynamicTable();
 }
 
 template <typename ELFT>

From 33bee053c39b46c6aa46abca6cc758979da44b14 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Fri, 24 May 2019 11:24:42 +0000
Subject: [PATCH 0165/1176] Revert r361630 "[llvm-readelf] - Allow dumping of
 the .dynamic section even if there is no PT_DYNAMIC header."

It broke BB:
http://lab.llvm.org:8011/builders/ppc64le-lld-multistage-test/builds/3748

llvm-svn: 361631
---
 .../elf-dynamic-no-pt-dynamic.test             | 15 ++++-----------
 llvm/tools/llvm-readobj/ELFDumper.cpp          | 18 +++++++++---------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
index 5a03d04ab4d54..3ef293714a897 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
@@ -1,23 +1,16 @@
-## Show that dumping occurs even if there is no PT_DYNAMIC header.
-## This is inconsistent with the GNU behavior, but seems to be more reasonable.
+# Show that no dumping occurs if there is no PT_DYNAMIC header.
 # RUN: yaml2obj %s -o %t.no-phdr
 # RUN: llvm-readobj --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=LLVM
-# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU
+# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU --allow-empty
 
 # LLVM:      File: {{.*}}.no-phdr
 # LLVM-NEXT: Format: ELF64-x86-64
 # LLVM-NEXT: Arch: x86_64
 # LLVM-NEXT: AddressSize: 64bit
 # LLVM-NEXT: LoadName:{{ *}}
-# LLVM-NEXT: DynamicSection [ (1 entries)
-# LLVM-NEXT:   Tag                Type Name/Value
-# LLVM-NEXT:   0x0000000000000000 NULL 0x0
-# LLVM-NEXT: ]
+# LLVM-NOT:  {{.}}
 
-# GNU:      DynamicSection [ (1 entries)
-# GNU-NEXT:   Tag                Type Name/Value
-# GNU-NEXT:   0x0000000000000000 NULL 0x0
-# GNU-NEXT: ]
+# GNU-NOT: {{.}}
 
 --- !ELF
 FileHeader:
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index b86a6c192cab3..159e300de3058 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1331,7 +1331,6 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 
 template <typename ELFT>
 void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
-  // Try to locate the PT_DYNAMIC header.
   const Elf_Phdr *DynamicPhdr = nullptr;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type != ELF::PT_DYNAMIC)
@@ -1340,6 +1339,11 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     break;
   }
 
+  // We do not want to dump dynamic section if we have no PT_DYNAMIC header.
+  // This matches GNU's behavior.
+  if (!DynamicPhdr)
+    return;
+
   // Try to locate the .dynamic section in the sections header table.
   const Elf_Shdr *DynamicSec = nullptr;
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
@@ -1354,16 +1358,9 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   // Ignore sh_entsize and use the expected value for entry size explicitly.
   // This allows us to dump the dynamic sections with a broken sh_entsize
   // field.
-  if (DynamicSec) {
+  if (DynamicSec)
     DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
                              DynamicSec->sh_size, sizeof(Elf_Dyn)});
-    parseDynamicTable();
-  }
-
-  // If we have a PT_DYNAMIC header, we will either check the found dynamic
-  // section or take the dynamic table data directly from the header.
-  if (!DynamicPhdr)
-    return;
 
   if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
       ObjF->getMemoryBufferRef().getBufferSize())
@@ -1377,6 +1374,7 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   }
 
   StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
+
   if (DynamicSec->sh_addr + DynamicSec->sh_size >
           DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
       DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
@@ -1388,6 +1386,8 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     reportWarning("The SHT_DYNAMIC section '" + Name +
                   "' is not at the start of "
                   "PT_DYNAMIC segment");
+
+  parseDynamicTable();
 }
 
 template <typename ELFT>

From 522307fa40da4b4d892e273c058418e112d844a4 Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp@ca.ibm.com>
Date: Fri, 24 May 2019 12:05:37 +0000
Subject: [PATCH 0166/1176] [PowerPC] Remove CRBits Copy Of Unset/set CBit

For the situation, where we generate the following code:

       crxor 8, 8, 8
       < Some instructions>
.LBB0_1:
       < Some instructions>
       cror 1, 8, 8

cror (COPY of CRbit) depends on the result of the crxor instruction.
CR8 is known to be zero as crxor is equivalent to CRUNSET. We can simply use
crxor 1, 1, 1 instead to zero out CR1, which does not have any dependency on
any previous instruction.

This patch will optimize it to:

        < Some instructions>
.LBB0_1:
        < Some instructions>
        cror 1, 1, 1

Patch By: Victor Huang (NeHuang)

Differential Revision: https://reviews.llvm.org/D62044

llvm-svn: 361632
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |   2 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   2 +
 llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll   |  14 +-
 .../PowerPC/remove-copy-crunsetcrbit.mir      | 178 ++++++++++++++++++
 4 files changed, 192 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index a03742d74025c..25f4c9aa8ebd1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -341,6 +341,8 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case PPC::V_SETALLONESB:
   case PPC::V_SETALLONESH:
   case PPC::V_SETALLONES:
+  case PPC::CRSET:
+  case PPC::CRUNSET:
     return true;
   }
   return false;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 64511a0c79e1c..14fe0cd87fa52 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2546,6 +2546,7 @@ def CRORC  : XLForm_1<19, 417, (outs crbitrc:$CRD),
                       [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
 
 let isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
               "creqv $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 1)]>;
@@ -2553,6 +2554,7 @@ def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
 def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
               "crxor $dst, $dst, $dst", IIC_BrCR,
               [(set i1:$dst, 0)]>;
+}
 
 let Defs = [CR1EQ], CRD = 6 in {
 def CR6SET  : XLForm_1_ext<19, 289, (outs), (ins),
diff --git a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
index be05c57b83f21..f49a70325b5d4 100644
--- a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
+++ b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:     -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 \
 ; RUN:     -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
 
 
@@ -16,13 +16,19 @@
 ; Function Attrs: nounwind
 define dso_local signext i32 @spillCRSET(i32 signext %p1, i32 signext %p2) {
 ; CHECK-LABEL: spillCRSET:
-; CHECK:       # %bb.0: # %entry
-; CHECK:        lis [[REG1:.*]], -32768
+; CHECK:        # %bb.2:
+; CHECK-DAG:    crnor [[CREG:.*]]*cr5+lt, eq, eq
+; CHECK-DAG:    mfocrf [[REG2:.*]], [[CREG]]
+; CHECK-DAG:    rlwinm [[REG2]], [[REG2]]
+; CHECK:        .LBB0_3:
 ; CHECK-DAG:    creqv [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt
+; CHECK:        lis [[REG1:.*]], -32768
+; CHECK:        .LBB0_4:
 ; CHECK-NOT:    mfocrf [[REG2:.*]], [[CREG]]
 ; CHECK-NOT:    rlwinm [[REG2]], [[REG2]]
 ; CHECK:        stw [[REG1]]
-; CHECK:  .LBB0_1: # %redo_first_pass
+; CHECK:        # %bb.5:
+
 entry:
   %tobool = icmp eq i32 %p2, 0
   %tobool2 = icmp eq i32 %p1, 0
diff --git a/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir
new file mode 100644
index 0000000000000..7ede66c32e4d1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir
@@ -0,0 +1,178 @@
+# RUN: llc -run-pass simple-register-coalescing %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+
+  @b = common dso_local local_unnamed_addr global i32 0, align 4
+  @d = common dso_local local_unnamed_addr global i32 0, align 4
+  @e = common dso_local local_unnamed_addr global i32* null, align 8
+  @c = common dso_local local_unnamed_addr global i32 0, align 4
+  @a = common dso_local local_unnamed_addr global [1 x i32] zeroinitializer, align 4
+
+  ; Function Attrs: norecurse nounwind
+  define dso_local signext i32 @copycrunset() local_unnamed_addr #0 {
+  entry:
+    %0 = load i32, i32* @b, align 4
+    %tobool3 = icmp eq i32 %0, 0
+    br i1 %tobool3, label %while.end, label %while.body.preheader
+
+  while.body.preheader:                             ; preds = %entry
+    %.pre = load i32, i32* @d, align 4
+    %tobool1 = icmp eq i32 %.pre, 0
+    br label %while.body
+
+  while.body:                                       ; preds = %land.end, %while.body.preheader
+    br i1 %tobool1, label %land.end, label %land.rhs
+
+  land.rhs:                                         ; preds = %while.body
+    %1 = load i32*, i32** @e, align 8
+    %2 = load i32, i32* %1, align 4
+    %idxprom = sext i32 %2 to i64
+    %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 %idxprom
+    %3 = load i32, i32* %arrayidx, align 4
+    %tobool2 = icmp ne i32 %3, 0
+    br label %land.end
+
+  land.end:                                         ; preds = %land.rhs, %while.body
+    %4 = phi i1 [ false, %while.body ], [ %tobool2, %land.rhs ]
+    %land.ext = zext i1 %4 to i32
+    store i32 %land.ext, i32* @c, align 4
+    br label %while.body
+
+  while.end:                                        ; preds = %entry
+    ret i32 undef
+  }
+
+...
+---
+name:            copycrunset
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: crbitrc, preferred-register: '' }
+  - { id: 1, class: crbitrc, preferred-register: '' }
+  - { id: 2, class: crbitrc, preferred-register: '' }
+  - { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: crrc, preferred-register: '' }
+  - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 7, class: gprc, preferred-register: '' }
+  - { id: 8, class: crrc, preferred-register: '' }
+  - { id: 9, class: crbitrc, preferred-register: '' }
+  - { id: 10, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 11, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 12, class: g8rc, preferred-register: '' }
+  - { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 15, class: g8rc, preferred-register: '' }
+  - { id: 16, class: gprc, preferred-register: '' }
+  - { id: 17, class: crrc, preferred-register: '' }
+  - { id: 18, class: crbitrc, preferred-register: '' }
+  - { id: 19, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 20, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 21, class: gprc, preferred-register: '' }
+  - { id: 22, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 23, class: g8rc, preferred-register: '' }
+  - { id: 24, class: crbitrc, preferred-register: '' }
+liveins:
+  - { reg: '$x2', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.5(0x30000000), %bb.1(0x50000000)
+    liveins: $x2
+
+    %3:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @b
+    %4:gprc = LWZ target-flags(ppc-toc-lo) @b, killed %3, implicit $x2 :: (dereferenceable load 4 from @b)
+    %5:crrc = CMPLWI killed %4, 0
+    BCC 76, killed %5, %bb.5
+    B %bb.1
+
+  bb.1.while.body.preheader:
+    successors: %bb.2(0x80000000)
+    liveins: $x2
+
+    %6:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @d
+    %7:gprc = LWZ target-flags(ppc-toc-lo) @d, killed %6, implicit $x2 :: (dereferenceable load 4 from @d)
+    %8:crrc = CMPWI killed %7, 0
+    %0:crbitrc = COPY killed %8.sub_eq
+    %9:crbitrc = CRUNSET
+    %19:gprc_and_gprc_nor0 = LI 0
+    %20:gprc_and_gprc_nor0 = LI 1
+    %22:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @c
+    %10:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @e
+    %13:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @a
+    %14:g8rc_and_g8rc_nox0 = ADDItocL killed %13, @a, implicit $x2
+
+  bb.2.while.body:
+    successors: %bb.4(0x30000000), %bb.3(0x50000000)
+    liveins: $x2
+
+    %24:crbitrc = COPY %9
+    BC %0, %bb.4
+    B %bb.3
+
+  bb.3.land.rhs:
+    successors: %bb.4(0x80000000)
+    liveins: $x2
+
+    %11:g8rc_and_g8rc_nox0 = LD target-flags(ppc-toc-lo) @e, %10, implicit $x2 :: (dereferenceable load 8 from @e)
+    %12:g8rc = LWA 0, killed %11 :: (load 4 from %ir.1)
+    %15:g8rc = RLDICR killed %12, 2, 61
+    %16:gprc = LWZX %14, killed %15 :: (load 4 from %ir.arrayidx)
+    %17:crrc = CMPWI killed %16, 0
+    %18:crbitrc = COPY killed %17.sub_eq
+    %1:crbitrc = CRNOR killed %18, %18
+    %24:crbitrc = COPY killed %1
+
+  bb.4.land.end:
+    successors: %bb.2(0x80000000)
+    liveins: $x2
+
+    %2:crbitrc = COPY killed %24
+    %21:gprc = ISEL %20, %19, killed %2
+    STW killed %21, target-flags(ppc-toc-lo) @c, %22, implicit $x2 :: (store 4 into @c)
+    B %bb.2
+
+  bb.5.while.end:
+    %23:g8rc = LI8 0
+    $x3 = COPY killed %23
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+
+...
+#Copy of CRUNSET should be removed in simple register coalescing pass
+#CHECK-LABEL: copycrunset
+#CHECK: bb.1.while.body.preheader:
+#CHECK-NOT: %9:crbitrc = CRUNSET
+#CHECK: bb.2.while.body:
+#CHECK-NOT: %24:crbitrc = COPY %9
+#CHECK: %24:crbitrc = CRUNSET
+#CHECK: B %bb.3

From 8362cbe13b3226c7048b8c523e2cc8363ace47c5 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Fri, 24 May 2019 12:22:53 +0000
Subject: [PATCH 0167/1176] [llvm-readobj] Implement GNU-style output for
 dynamic table

GNU readelf tool prints slightly different dynamic table "header" and
surrounds dynamic tag names by brackets. This patch implements the same
formatting for GNU-style output of the `llvm-readobj`.

LLVM
```
DynamicSection [ (13 entries)
  Tag        Type                 Name/Value
  0x00000006 SYMTAB               0x168
  ...
]
```

GNU
```
Dynamic section at offset 0x1d0 contains 13 entries:
  Tag        Type                 Name/Value
  0x00000006 (SYMTAB)             0x168
  ...
```

Differential Revision: https://reviews.llvm.org/D62256

llvm-svn: 361633
---
 lld/test/ELF/ppc64-dynamic-relocations.s      |   2 +-
 llvm/test/tools/llvm-readobj/dynamic.test     |  59 ++++++++
 .../llvm-readobj/elf-dynamic-malformed.test   |  19 ++-
 .../elf-dynamic-not-in-pt-dynamic.test        |  24 ++--
 .../llvm-readobj/elf-dynamic-table-dtnull.s   |  35 +++--
 .../elf-dynamic-tags-machine-specific.test    | 127 +++++++++---------
 .../tools/llvm-readobj/elf-dynamic-tags.test  | 125 +++++++++--------
 .../elf-non-dynamic-in-pt-dynamic.test        |  22 ++-
 .../tools/llvm-readobj/elf-versioninfo.test   |   6 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 115 ++++++++++------
 10 files changed, 327 insertions(+), 207 deletions(-)

diff --git a/lld/test/ELF/ppc64-dynamic-relocations.s b/lld/test/ELF/ppc64-dynamic-relocations.s
index a3ede240ee225..71a26137a7793 100644
--- a/lld/test/ELF/ppc64-dynamic-relocations.s
+++ b/lld/test/ELF/ppc64-dynamic-relocations.s
@@ -29,7 +29,7 @@
 // DIS:     .plt       00000018  0000000010030000 BSS
 
 // DT_PLTGOT should point to the start of the .plt section.
-// DT: 0x0000000000000003 PLTGOT               0x10030000
+// DT: 0x0000000000000003 (PLTGOT)             0x10030000
 
     .text
     .abiversion 2
diff --git a/llvm/test/tools/llvm-readobj/dynamic.test b/llvm/test/tools/llvm-readobj/dynamic.test
index 02ae622038ca2..5f3cdabad62ef 100644
--- a/llvm/test/tools/llvm-readobj/dynamic.test
+++ b/llvm/test/tools/llvm-readobj/dynamic.test
@@ -1,6 +1,8 @@
 // Check dynamic section tags in case of shared library file.
 RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-so.mips \
 RUN:     | FileCheck %s -check-prefix ELF-MIPS
+RUN: llvm-readelf --dynamic-table %p/Inputs/dynamic-table-so.mips \
+RUN:     | FileCheck %s --strict-whitespace -check-prefix ELF-MIPS-GNU
 
 ELF-MIPS: Format: ELF32-mips
 ELF-MIPS: Arch: mipsel
@@ -33,6 +35,32 @@ ELF-MIPS:   0x6FFFFFF0 VERSYM               0x4C0
 ELF-MIPS:   0x00000000 NULL                 0x0
 ELF-MIPS: ]
 
+ELF-MIPS-GNU:      Dynamic section at offset 0x{{.*}} contains 23 entries:
+ELF-MIPS-GNU-NEXT:   Tag        Type                 Name/Value
+ELF-MIPS-GNU-NEXT:   0x00000001 (NEEDED)             Shared library: [libc.so.6]
+ELF-MIPS-GNU-NEXT:   0x0000000c (INIT)               0x528
+ELF-MIPS-GNU-NEXT:   0x0000000d (FINI)               0x860
+ELF-MIPS-GNU-NEXT:   0x00000004 (HASH)               0x210
+ELF-MIPS-GNU-NEXT:   0x00000005 (STRTAB)             0x3d8
+ELF-MIPS-GNU-NEXT:   0x00000006 (SYMTAB)             0x2a8
+ELF-MIPS-GNU-NEXT:   0x0000000a (STRSZ)              231 (bytes)
+ELF-MIPS-GNU-NEXT:   0x0000000b (SYMENT)             16 (bytes)
+ELF-MIPS-GNU-NEXT:   0x00000003 (PLTGOT)             0x108e0
+ELF-MIPS-GNU-NEXT:   0x00000011 (REL)                0x518
+ELF-MIPS-GNU-NEXT:   0x00000012 (RELSZ)              16 (bytes)
+ELF-MIPS-GNU-NEXT:   0x00000013 (RELENT)             8 (bytes)
+ELF-MIPS-GNU-NEXT:   0x70000001 (MIPS_RLD_VERSION)   1
+ELF-MIPS-GNU-NEXT:   0x70000005 (MIPS_FLAGS)         NOTPOT
+ELF-MIPS-GNU-NEXT:   0x70000006 (MIPS_BASE_ADDRESS)  0x0
+ELF-MIPS-GNU-NEXT:   0x7000000a (MIPS_LOCAL_GOTNO)   10
+ELF-MIPS-GNU-NEXT:   0x70000011 (MIPS_SYMTABNO)      19
+ELF-MIPS-GNU-NEXT:   0x70000012 (MIPS_UNREFEXTNO)    26
+ELF-MIPS-GNU-NEXT:   0x70000013 (MIPS_GOTSYM)        0xd
+ELF-MIPS-GNU-NEXT:   0x6ffffffe (VERNEED)            0x4e8
+ELF-MIPS-GNU-NEXT:   0x6fffffff (VERNEEDNUM)         1
+ELF-MIPS-GNU-NEXT:   0x6ffffff0 (VERSYM)             0x4c0
+ELF-MIPS-GNU-NEXT:   0x00000000 (NULL)               0x0
+
 // Check dynamic section tags in case of non-pic executable file.
 RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-exe.mips \
 RUN:     | FileCheck %s -check-prefix ELF-MIPS-EXE
@@ -151,6 +179,8 @@ ELF-X86-SO:   0x0000000000000000 NULL                 0x0
 
 RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-so.aarch64 \
 RUN:     | FileCheck %s -check-prefix ELF-AARCH64-SO
+RUN: llvm-readelf --dynamic-table %p/Inputs/dynamic-table-so.aarch64 \
+RUN:     | FileCheck %s --strict-whitespace -check-prefix ELF-AARCH64-SO-GNU
 
 ELF-AARCH64-SO: Format: ELF64-aarch64-little
 ELF-AARCH64-SO: Arch: aarch64
@@ -185,3 +215,32 @@ ELF-AARCH64-SO:   0x000000006FFFFFF0 VERSYM               0x4F2
 ELF-AARCH64-SO:   0x000000006FFFFFF9 RELACOUNT            3
 ELF-AARCH64-SO:   0x0000000000000000 NULL                 0x0
 ELF-AARCH64-SO: ]
+
+ELF-AARCH64-SO-GNU:      Dynamic section at offset 0x{{.*}} contains 26 entries:
+ELF-AARCH64-SO-GNU-NEXT:   Tag                Type                 Name/Value
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000c (INIT)               0x660
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000d (FINI)               0x83c
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000019 (INIT_ARRAY)         0x10db8
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001b (INIT_ARRAYSZ)       8 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001a (FINI_ARRAY)         0x10dc0
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001c (FINI_ARRAYSZ)       8 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef5 (GNU_HASH)           0x1f0
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000005 (STRTAB)             0x420
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000006 (SYMTAB)             0x240
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000a (STRSZ)              210 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000b (SYMENT)             24 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000003 (PLTGOT)             0x10fe8
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000002 (PLTRELSZ)           96 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000014 (PLTREL)             RELA
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000017 (JMPREL)             0x600
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef6 (TLSDESC_PLT)        0x6d0
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef7 (TLSDESC_GOT)        0x10fe0
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000007 (RELA)               0x540
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000008 (RELASZ)             192 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000009 (RELAENT)            24 (bytes)
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffffe (VERNEED)            0x520
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006fffffff (VERNEEDNUM)         1
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffff0 (VERSYM)             0x4f2
+ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffff9 (RELACOUNT)          3
+ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000000 (NULL)               0x0
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
index 4e04423d7b18c..aaee340e1cca6 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
@@ -28,14 +28,19 @@ ProgramHeaders:
 
 # Test handling of a .dynamic section with an invalid entsize (i.e. not 2 * sizeof(Elf_Dyn)).
 # RUN: yaml2obj %s --docnum=2 -o %t.bad-entsize
-# RUN: llvm-readobj --dynamic-table %t.bad-entsize | FileCheck %s --check-prefix BAD-ENTSIZE
-# RUN: llvm-readelf --dynamic-table %t.bad-entsize | FileCheck %s --check-prefix BAD-ENTSIZE
+# RUN: llvm-readobj --dynamic-table %t.bad-entsize | FileCheck %s --check-prefix BAD-ENTSIZE-LLVM
+# RUN: llvm-readelf --dynamic-table %t.bad-entsize | FileCheck %s --check-prefix BAD-ENTSIZE-GNU
 
-# BAD-ENTSIZE:      DynamicSection [ (2 entries)
-# BAD-ENTSIZE-NEXT:   Tag                Type                 Name/Value
-# BAD-ENTSIZE-NEXT:   0x0000000000000015 DEBUG                0x0
-# BAD-ENTSIZE-NEXT:   0x0000000000000000 NULL                 0x0
-# BAD-ENTSIZE-NEXT: ]
+# BAD-ENTSIZE-LLVM:      DynamicSection [ (2 entries)
+# BAD-ENTSIZE-LLVM-NEXT:   Tag                Type                 Name/Value
+# BAD-ENTSIZE-LLVM-NEXT:   0x0000000000000015 DEBUG                0x0
+# BAD-ENTSIZE-LLVM-NEXT:   0x0000000000000000 NULL                 0x0
+# BAD-ENTSIZE-LLVM-NEXT: ]
+
+# BAD-ENTSIZE-GNU:      Dynamic section at offset 0x{{.*}} contains 2 entries:
+# BAD-ENTSIZE-GNU-NEXT:   Tag                Type                 Name/Value
+# BAD-ENTSIZE-GNU-NEXT:   0x0000000000000015 (DEBUG)              0x0
+# BAD-ENTSIZE-GNU-NEXT:   0x0000000000000000 (NULL)               0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-not-in-pt-dynamic.test b/llvm/test/tools/llvm-readobj/elf-dynamic-not-in-pt-dynamic.test
index d52ec7696fdfd..482d683f5c108 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-not-in-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-not-in-pt-dynamic.test
@@ -2,15 +2,23 @@
 ## section when it is not in a PT_DYNAMIC segment.
 
 # RUN: yaml2obj %s -o %t.o
-# RUN: llvm-readobj --dynamic-table %t.o 2>&1 | FileCheck %s
-# RUN: llvm-readelf --dynamic-table %t.o 2>&1 | FileCheck %s
+# RUN: llvm-readobj --dynamic-table %t.o 2>&1 \
+# RUN:   | FileCheck --check-prefixes=WARNING,LLVM %s
+# RUN: llvm-readelf --dynamic-table %t.o 2>&1 \
+# RUN:   | FileCheck --check-prefixes=WARNING,GNU %s
 
-# CHECK:      warning: The SHT_DYNAMIC section '.dynamic' is not contained within the PT_DYNAMIC segment
-# CHECK:      DynamicSection [ (2 entries)
-# CHECK-NEXT:   Tag                Type     Name/Value
-# CHECK-NEXT:   0x0000000000000018 BIND_NOW 0x1
-# CHECK-NEXT:   0x0000000000000000 NULL     0x0
-# CHECK-NEXT: ]
+# WARNING:   warning: The SHT_DYNAMIC section '.dynamic' is not contained within the PT_DYNAMIC segment
+
+# LLVM:      DynamicSection [ (2 entries)
+# LLVM-NEXT:   Tag                Type     Name/Value
+# LLVM-NEXT:   0x0000000000000018 BIND_NOW 0x1
+# LLVM-NEXT:   0x0000000000000000 NULL     0x0
+# LLVM-NEXT: ]
+
+# GNU:      Dynamic section at offset 0x{{.*}} contains 2 entries:
+# GNU-NEXT:   Tag                Type       Name/Value
+# GNU-NEXT:   0x0000000000000018 (BIND_NOW) 0x1
+# GNU-NEXT:   0x0000000000000000 (NULL)     0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-table-dtnull.s b/llvm/test/tools/llvm-readobj/elf-dynamic-table-dtnull.s
index 8bb8d055bfba9..b613e4137d125 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-table-dtnull.s
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-table-dtnull.s
@@ -1,13 +1,17 @@
 # Check we are able to dump the dynamic section without a DT_NULL entry correctly.
 
 # RUN: yaml2obj -docnum=1 %s -o %t.o
-# RUN: llvm-readobj --dynamic-table %t.o | FileCheck %s --check-prefix=NONULL
-# RUN: llvm-readelf --dynamic-table %t.o | FileCheck %s --check-prefix=NONULL
+# RUN: llvm-readobj --dynamic-table %t.o | FileCheck %s --check-prefix=NONULL-LLVM
+# RUN: llvm-readelf --dynamic-table %t.o | FileCheck %s --check-prefix=NONULL-GNU
 
-# NONULL:      DynamicSection [ (1 entries)
-# NONULL-NEXT:   Tag                Type   Name/Value
-# NONULL-NEXT:   0x0000000000000015 DEBUG  0x0
-# NONULL-NEXT: ]
+# NONULL-LLVM:      DynamicSection [ (1 entries)
+# NONULL-LLVM-NEXT:   Tag                Type   Name/Value
+# NONULL-LLVM-NEXT:   0x0000000000000015 DEBUG  0x0
+# NONULL-LLVM-NEXT: ]
+
+# NONULL-GNU:      Dynamic section at offset {{.*}} contains 1 entries:
+# NONULL-GNU-NEXT:   Tag                Type     Name/Value
+# NONULL-GNU-NEXT:   0x0000000000000015 (DEBUG)  0x0
 
 --- !ELF
 FileHeader:
@@ -39,14 +43,19 @@ ProgramHeaders:
 # past the DT_NULL entry, which works as a terminator.
 
 # RUN: yaml2obj -docnum=2 %s -o %t.o
-# RUN: llvm-readobj --dynamic-table %t.o | FileCheck %s --check-prefix=LONG
-# RUN: llvm-readelf --dynamic-table %t.o | FileCheck %s --check-prefix=LONG
+# RUN: llvm-readobj --dynamic-table %t.o | FileCheck %s --check-prefix=LONG-LLVM
+# RUN: llvm-readelf --dynamic-table %t.o | FileCheck %s --check-prefix=LONG-GNU
+
+# LONG-LLVM:      DynamicSection [ (2 entries)
+# LONG-LLVM-NEXT:   Tag                Type                 Name/Value
+# LONG-LLVM-NEXT:   0x0000000000000015 DEBUG                0x0
+# LONG-LLVM-NEXT:   0x0000000000000000 NULL                 0x0
+# LONG-LLVM-NEXT: ]
 
-# LONG:      DynamicSection [ (2 entries)
-# LONG-NEXT:   Tag                Type                 Name/Value
-# LONG-NEXT:   0x0000000000000015 DEBUG                0x0
-# LONG-NEXT:   0x0000000000000000 NULL                 0x0
-# LONG-NEXT: ]
+# LONG-GNU:      Dynamic section at offset {{.*}} contains 2 entries:
+# LONG-GNU-NEXT:   Tag                Type                 Name/Value
+# LONG-GNU-NEXT:   0x0000000000000015 (DEBUG)              0x0
+# LONG-GNU-NEXT:   0x0000000000000000 (NULL)               0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
index 95c008dd37437..06c8b6d3fbe70 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
@@ -13,15 +13,14 @@
 # LLVM-HEXAGON-NEXT:   0x0000000000000000 NULL                 0x0
 # LLVM-HEXAGON-NEXT: ]
 
-# GNU-HEXAGON:      DynamicSection [ (6 entries)
+# GNU-HEXAGON:      Dynamic section at offset {{.*}} contains 6 entries:
 # GNU-HEXAGON-NEXT:   Tag                Type                 Name/Value
-# GNU-HEXAGON-NEXT:   0x0000000000000004 HASH                 0x1000
-# GNU-HEXAGON-NEXT:   0x0000000070000000 HEXAGON_SYMSZ        0x10
-# GNU-HEXAGON-NEXT:   0x0000000070000001 HEXAGON_VER          4096
-# GNU-HEXAGON-NEXT:   0x0000000070000002 HEXAGON_PLT          0x1000
-# GNU-HEXAGON-NEXT:   0x000000001234abcd unknown              0x1
-# GNU-HEXAGON-NEXT:   0x0000000000000000 NULL                 0x0
-# GNU-HEXAGON-NEXT: ]
+# GNU-HEXAGON-NEXT:   0x0000000000000004 (HASH)               0x1000
+# GNU-HEXAGON-NEXT:   0x0000000070000000 (HEXAGON_SYMSZ)      0x10
+# GNU-HEXAGON-NEXT:   0x0000000070000001 (HEXAGON_VER)        4096
+# GNU-HEXAGON-NEXT:   0x0000000070000002 (HEXAGON_PLT)        0x1000
+# GNU-HEXAGON-NEXT:   0x000000001234abcd (unknown)            0x1
+# GNU-HEXAGON-NEXT:   0x0000000000000000 (NULL)               0x0
 
 
 # Test that MIPS machine-specific tags can be dumped.
@@ -81,57 +80,56 @@
 # LLVM-MIPS-NEXT:   0x0000000000000000 NULL                 0x0
 # LLVM-MIPS-NEXT: ]
 
-# GNU-MIPS:      DynamicSection [ (48 entries)
-# GNU-MIPS-NEXT:     Tag                Type                 Name/Value
-# GNU-MIPS-NEXT:   0x0000000000000004 HASH                 0x1000
-# GNU-MIPS-NEXT:   0x0000000070000001 MIPS_RLD_VERSION     305419896
-# GNU-MIPS-NEXT:   0x0000000070000002 MIPS_TIME_STAMP      0x11223344
-# GNU-MIPS-NEXT:   0x0000000070000003 MIPS_ICHECKSUM       0x11112222
-# GNU-MIPS-NEXT:   0x0000000070000004 MIPS_IVERSION        0x1
-# GNU-MIPS-NEXT:   0x0000000070000005 MIPS_FLAGS           QUICKSTART SGI_ONLY PIXIE CORD
-# GNU-MIPS-NEXT:   0x0000000070000006 MIPS_BASE_ADDRESS    0x87654321
-# GNU-MIPS-NEXT:   0x0000000070000007 MIPS_MSYM            0x1000
-# GNU-MIPS-NEXT:   0x0000000070000008 MIPS_CONFLICT        0x1000
-# GNU-MIPS-NEXT:   0x0000000070000009 MIPS_LIBLIST         0x1000
-# GNU-MIPS-NEXT:   0x000000007000000a MIPS_LOCAL_GOTNO     1
-# GNU-MIPS-NEXT:   0x000000007000000b MIPS_CONFLICTNO      0x1
-# GNU-MIPS-NEXT:   0x0000000070000010 MIPS_LIBLISTNO       0x1
-# GNU-MIPS-NEXT:   0x0000000070000011 MIPS_SYMTABNO        1
-# GNU-MIPS-NEXT:   0x0000000070000012 MIPS_UNREFEXTNO      0
-# GNU-MIPS-NEXT:   0x0000000070000013 MIPS_GOTSYM          0x0
-# GNU-MIPS-NEXT:   0x0000000070000014 MIPS_HIPAGENO        0x88776655
-# GNU-MIPS-NEXT:   0x0000000070000016 MIPS_RLD_MAP         0x1000
-# GNU-MIPS-NEXT:   0x0000000070000017 MIPS_DELTA_CLASS     0x1000
-# GNU-MIPS-NEXT:   0x0000000070000018 MIPS_DELTA_CLASS_NO  0x1
-# GNU-MIPS-NEXT:   0x0000000070000019 MIPS_DELTA_INSTANCE  0x1000
-# GNU-MIPS-NEXT:   0x000000007000001a MIPS_DELTA_INSTANCE_NO0x1
-# GNU-MIPS-NEXT:   0x000000007000001b MIPS_DELTA_RELOC     0x1000
-# GNU-MIPS-NEXT:   0x000000007000001c MIPS_DELTA_RELOC_NO  0x1
-# GNU-MIPS-NEXT:   0x000000007000001d MIPS_DELTA_SYM       0x1000
-# GNU-MIPS-NEXT:   0x000000007000001e MIPS_DELTA_SYM_NO    0x1
-# GNU-MIPS-NEXT:   0x0000000070000020 MIPS_DELTA_CLASSSYM  0x1000
-# GNU-MIPS-NEXT:   0x0000000070000021 MIPS_DELTA_CLASSSYM_NO0x1
-# GNU-MIPS-NEXT:   0x0000000070000022 MIPS_CXX_FLAGS       0x88887777
-# GNU-MIPS-NEXT:   0x0000000070000023 MIPS_PIXIE_INIT      0x1000
-# GNU-MIPS-NEXT:   0x0000000070000025 MIPS_LOCALPAGE_GOTIDX0x1
-# GNU-MIPS-NEXT:   0x0000000070000026 MIPS_LOCAL_GOTIDX    0x1
-# GNU-MIPS-NEXT:   0x0000000070000027 MIPS_HIDDEN_GOTIDX   0x1
-# GNU-MIPS-NEXT:   0x0000000070000028 MIPS_PROTECTED_GOTIDX0x1
-# GNU-MIPS-NEXT:   0x0000000070000029 MIPS_OPTIONS         0x1000
-# GNU-MIPS-NEXT:   0x000000007000002a MIPS_INTERFACE       0x1000
-# GNU-MIPS-NEXT:   0x000000007000002b MIPS_DYNSTR_ALIGN    0x88888888
-# GNU-MIPS-NEXT:   0x000000007000002c MIPS_INTERFACE_SIZE  0x10
-# GNU-MIPS-NEXT:   0x000000007000002d MIPS_RLD_TEXT_RESOLVE_ADDR0x8
-# GNU-MIPS-NEXT:   0x000000007000002e MIPS_PERF_SUFFIX     0x0
-# GNU-MIPS-NEXT:   0x000000007000002f MIPS_COMPACT_SIZE    0x10
-# GNU-MIPS-NEXT:   0x0000000070000030 MIPS_GP_VALUE        0x1
-# GNU-MIPS-NEXT:   0x0000000070000031 MIPS_AUX_DYNAMIC     0x1000
-# GNU-MIPS-NEXT:   0x0000000070000032 MIPS_PLTGOT          0x1000
-# GNU-MIPS-NEXT:   0x0000000070000034 MIPS_RWPLT           0x1000
-# GNU-MIPS-NEXT:   0x0000000070000035 MIPS_RLD_MAP_REL     0x1000
-# GNU-MIPS-NEXT:   0x000000001234abcd unknown              0x1
-# GNU-MIPS-NEXT:   0x0000000000000000 NULL                 0x0
-# GNU-MIPS-NEXT: ]
+# GNU-MIPS:      Dynamic section at offset {{.*}} contains 48 entries:
+# GNU-MIPS-NEXT:   Tag                Type                 Name/Value
+# GNU-MIPS-NEXT:   0x0000000000000004 (HASH)               0x1000
+# GNU-MIPS-NEXT:   0x0000000070000001 (MIPS_RLD_VERSION)   305419896
+# GNU-MIPS-NEXT:   0x0000000070000002 (MIPS_TIME_STAMP)    0x11223344
+# GNU-MIPS-NEXT:   0x0000000070000003 (MIPS_ICHECKSUM)     0x11112222
+# GNU-MIPS-NEXT:   0x0000000070000004 (MIPS_IVERSION)      0x1
+# GNU-MIPS-NEXT:   0x0000000070000005 (MIPS_FLAGS)         QUICKSTART SGI_ONLY PIXIE CORD
+# GNU-MIPS-NEXT:   0x0000000070000006 (MIPS_BASE_ADDRESS)  0x87654321
+# GNU-MIPS-NEXT:   0x0000000070000007 (MIPS_MSYM)          0x1000
+# GNU-MIPS-NEXT:   0x0000000070000008 (MIPS_CONFLICT)      0x1000
+# GNU-MIPS-NEXT:   0x0000000070000009 (MIPS_LIBLIST)       0x1000
+# GNU-MIPS-NEXT:   0x000000007000000a (MIPS_LOCAL_GOTNO)   1
+# GNU-MIPS-NEXT:   0x000000007000000b (MIPS_CONFLICTNO)    0x1
+# GNU-MIPS-NEXT:   0x0000000070000010 (MIPS_LIBLISTNO)     0x1
+# GNU-MIPS-NEXT:   0x0000000070000011 (MIPS_SYMTABNO)      1
+# GNU-MIPS-NEXT:   0x0000000070000012 (MIPS_UNREFEXTNO)    0
+# GNU-MIPS-NEXT:   0x0000000070000013 (MIPS_GOTSYM)        0x0
+# GNU-MIPS-NEXT:   0x0000000070000014 (MIPS_HIPAGENO)      0x88776655
+# GNU-MIPS-NEXT:   0x0000000070000016 (MIPS_RLD_MAP)       0x1000
+# GNU-MIPS-NEXT:   0x0000000070000017 (MIPS_DELTA_CLASS)   0x1000
+# GNU-MIPS-NEXT:   0x0000000070000018 (MIPS_DELTA_CLASS_NO) 0x1
+# GNU-MIPS-NEXT:   0x0000000070000019 (MIPS_DELTA_INSTANCE) 0x1000
+# GNU-MIPS-NEXT:   0x000000007000001a (MIPS_DELTA_INSTANCE_NO) 0x1
+# GNU-MIPS-NEXT:   0x000000007000001b (MIPS_DELTA_RELOC)   0x1000
+# GNU-MIPS-NEXT:   0x000000007000001c (MIPS_DELTA_RELOC_NO) 0x1
+# GNU-MIPS-NEXT:   0x000000007000001d (MIPS_DELTA_SYM)     0x1000
+# GNU-MIPS-NEXT:   0x000000007000001e (MIPS_DELTA_SYM_NO)  0x1
+# GNU-MIPS-NEXT:   0x0000000070000020 (MIPS_DELTA_CLASSSYM) 0x1000
+# GNU-MIPS-NEXT:   0x0000000070000021 (MIPS_DELTA_CLASSSYM_NO) 0x1
+# GNU-MIPS-NEXT:   0x0000000070000022 (MIPS_CXX_FLAGS)     0x88887777
+# GNU-MIPS-NEXT:   0x0000000070000023 (MIPS_PIXIE_INIT)    0x1000
+# GNU-MIPS-NEXT:   0x0000000070000025 (MIPS_LOCALPAGE_GOTIDX) 0x1
+# GNU-MIPS-NEXT:   0x0000000070000026 (MIPS_LOCAL_GOTIDX)  0x1
+# GNU-MIPS-NEXT:   0x0000000070000027 (MIPS_HIDDEN_GOTIDX) 0x1
+# GNU-MIPS-NEXT:   0x0000000070000028 (MIPS_PROTECTED_GOTIDX) 0x1
+# GNU-MIPS-NEXT:   0x0000000070000029 (MIPS_OPTIONS)       0x1000
+# GNU-MIPS-NEXT:   0x000000007000002a (MIPS_INTERFACE)     0x1000
+# GNU-MIPS-NEXT:   0x000000007000002b (MIPS_DYNSTR_ALIGN)  0x88888888
+# GNU-MIPS-NEXT:   0x000000007000002c (MIPS_INTERFACE_SIZE) 0x10
+# GNU-MIPS-NEXT:   0x000000007000002d (MIPS_RLD_TEXT_RESOLVE_ADDR) 0x8
+# GNU-MIPS-NEXT:   0x000000007000002e (MIPS_PERF_SUFFIX)   0x0
+# GNU-MIPS-NEXT:   0x000000007000002f (MIPS_COMPACT_SIZE)  0x10
+# GNU-MIPS-NEXT:   0x0000000070000030 (MIPS_GP_VALUE)      0x1
+# GNU-MIPS-NEXT:   0x0000000070000031 (MIPS_AUX_DYNAMIC)   0x1000
+# GNU-MIPS-NEXT:   0x0000000070000032 (MIPS_PLTGOT)        0x1000
+# GNU-MIPS-NEXT:   0x0000000070000034 (MIPS_RWPLT)         0x1000
+# GNU-MIPS-NEXT:   0x0000000070000035 (MIPS_RLD_MAP_REL)   0x1000
+# GNU-MIPS-NEXT:   0x000000001234abcd (unknown)            0x1
+# GNU-MIPS-NEXT:   0x0000000000000000 (NULL)               0x0
 
 
 # Test that PPC64 machine-specific tags can be dumped.
@@ -147,10 +145,9 @@
 # LLVM-PPC-NEXT:   0x0000000000000000 NULL                 0x0
 # LLVM-PPC-NEXT: ]
 
-# GNU-PPC:      DynamicSection [ (4 entries)
+# GNU-PPC:      Dynamic section at offset {{.*}} contains 4 entries:
 # GNU-PPC-NEXT:   Tag                Type                 Name/Value
-# GNU-PPC-NEXT:   0x0000000000000004 HASH                 0x1000
-# GNU-PPC-NEXT:   0x0000000070000000 PPC64_GLINK          0x1000
-# GNU-PPC-NEXT:   0x000000001234abcd unknown              0x1
-# GNU-PPC-NEXT:   0x0000000000000000 NULL                 0x0
-# GNU-PPC-NEXT: ]
+# GNU-PPC-NEXT:   0x0000000000000004 (HASH)               0x1000
+# GNU-PPC-NEXT:   0x0000000070000000 (PPC64_GLINK)        0x1000
+# GNU-PPC-NEXT:   0x000000001234abcd (unknown)            0x1
+# GNU-PPC-NEXT:   0x0000000000000000 (NULL)               0x0
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-tags.test b/llvm/test/tools/llvm-readobj/elf-dynamic-tags.test
index 15b8d678d189f..2e4f390920fda 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-tags.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-tags.test
@@ -68,70 +68,69 @@
 # LLVM-NEXT:   0x0000000000000000 NULL                 0x0
 # LLVM-NEXT: ]
 
-# GNU:      DynamicSection [ (61 entries)
+# GNU:      Dynamic section at offset {{.*}} contains 61 entries:
 # GNU-NEXT:   Tag                Type                 Name/Value
-# GNU-NEXT:   0x0000000000000001 NEEDED               Shared library: [D]
-# GNU-NEXT:   0x0000000000000002 PLTRELSZ             16 (bytes)
-# GNU-NEXT:   0x0000000000000003 PLTGOT               0x1000
-# GNU-NEXT:   0x0000000000000004 HASH                 0x1000
-# GNU-NEXT:   0x0000000000000005 STRTAB               0x1000
-# GNU-NEXT:   0x0000000000000006 SYMTAB               0x1000
-# GNU-NEXT:   0x0000000000000007 RELA                 0x1000
-# GNU-NEXT:   0x0000000000000008 RELASZ               16 (bytes)
-# GNU-NEXT:   0x0000000000000009 RELAENT              1929 (bytes)
-# GNU-NEXT:   0x000000000000000a STRSZ                16 (bytes)
-# GNU-NEXT:   0x000000000000000b SYMENT               2439 (bytes)
-# GNU-NEXT:   0x000000000000000c INIT                 0x1000
-# GNU-NEXT:   0x000000000000000d FINI                 0x1000
-# GNU-NEXT:   0x000000000000000e SONAME               Library soname: [U]
-# GNU-NEXT:   0x000000000000000f RPATH                f
-# GNU-NEXT:   0x0000000000000010 SYMBOLIC             0x1234567890abcdef
-# GNU-NEXT:   0x0000000000000011 REL                  0x1000
-# GNU-NEXT:   0x0000000000000012 RELSZ                16 (bytes)
-# GNU-NEXT:   0x0000000000000013 RELENT               291 (bytes)
-# GNU-NEXT:   0x0000000000000014 PLTREL               RELA
-# GNU-NEXT:   0x0000000000000015 DEBUG                0xfedcba0987654321
-# GNU-NEXT:   0x0000000000000016 TEXTREL              0x1122334455667788
-# GNU-NEXT:   0x0000000000000017 JMPREL               0x1000
-# GNU-NEXT:   0x0000000000000018 BIND_NOW             0x8877665544332211
-# GNU-NEXT:   0x0000000000000019 INIT_ARRAY           0x1000
-# GNU-NEXT:   0x000000000000001a FINI_ARRAY           0x1000
-# GNU-NEXT:   0x000000000000001b INIT_ARRAYSZ         16 (bytes)
-# GNU-NEXT:   0x000000000000001c FINI_ARRAYSZ         16 (bytes)
-# GNU-NEXT:   0x000000000000001d RUNPATH              w
-# GNU-NEXT:   0x000000000000001e FLAGS                ORIGIN SYMBOLIC TEXTREL BIND_NOW STATIC_TLS {{$}}
-# GNU-NEXT:   0x0000000000000020 PREINIT_ARRAY        0x1000
-# GNU-NEXT:   0x0000000000000021 PREINIT_ARRAYSZ      16 (bytes)
-# GNU-NEXT:   0x0000000000000022 SYMTAB_SHNDX         0x1000
-# GNU-NEXT:   0x0000000000000023 RELRSZ               0x10
-# GNU-NEXT:   0x0000000000000024 RELR                 0x1000
-# GNU-NEXT:   0x0000000000000025 RELRENT              0x4321
-# GNU-NEXT:   0x000000006000000f ANDROID_REL          0x1000
-# GNU-NEXT:   0x0000000060000010 ANDROID_RELSZ        16 (bytes)
-# GNU-NEXT:   0x0000000060000011 ANDROID_RELA         0x1000
-# GNU-NEXT:   0x0000000060000012 ANDROID_RELASZ       16 (bytes)
-# GNU-NEXT:   0x000000006fffe000 ANDROID_RELR         0x1000
-# GNU-NEXT:   0x000000006fffe001 ANDROID_RELRSZ       0x10
-# GNU-NEXT:   0x000000006fffe003 ANDROID_RELRENT      0x1234
-# GNU-NEXT:   0x000000006ffffef5 GNU_HASH             0x1000
-# GNU-NEXT:   0x000000006ffffef6 TLSDESC_PLT          0x1000
-# GNU-NEXT:   0x000000006ffffef7 TLSDESC_GOT          0x1000
-# GNU-NEXT:   0x000000006ffffff9 RELACOUNT            0
-# GNU-NEXT:   0x000000006ffffffa RELCOUNT             0
-# GNU-NEXT:   0x000000006ffffffb FLAGS_1              NOW GLOBAL GROUP NODELETE LOADFLTR INITFIRST NOOPEN ORIGIN DIRECT TRANS INTERPOSE NODEFLIB NODUMP CONFALT ENDFILTEE DISPRELDNE DISPRELPND NODIRECT IGNMULDEF NOKSYMS NOHDR EDITED NORELOC SYMINTPOSE GLOBAUDIT SINGLETON {{$}}
-# GNU-NEXT:   0x000000006ffffff0 VERSYM               0x1000
-# GNU-NEXT:   0x000000006ffffffc VERDEF               0x1000
-# GNU-NEXT:   0x000000006ffffffd VERDEFNUM            0
-# GNU-NEXT:   0x000000006ffffffe VERNEED              0x1000
-# GNU-NEXT:   0x000000006fffffff VERNEEDNUM           0
-# GNU-NEXT:   0x000000007ffffffd AUXILIARY            Auxiliary library: [D]
-# GNU-NEXT:   0x000000007ffffffe USED                 Not needed object: [U]
-# GNU-NEXT:   0x000000007fffffff FILTER               Filter library: [U]
-# GNU-NEXT:   0x0000000012345678 unknown              0x8765432187654321
-# GNU-NEXT:   0x000000006abcdef0 unknown              0x9988776655443322
-# GNU-NEXT:   0x0000000076543210 unknown              0x5555666677778888
-# GNU-NEXT:   0x0000000000000000 NULL                 0x0
-# GNU-NEXT: ]
+# GNU-NEXT:   0x0000000000000001 (NEEDED)             Shared library: [D]
+# GNU-NEXT:   0x0000000000000002 (PLTRELSZ)           16 (bytes)
+# GNU-NEXT:   0x0000000000000003 (PLTGOT)             0x1000
+# GNU-NEXT:   0x0000000000000004 (HASH)               0x1000
+# GNU-NEXT:   0x0000000000000005 (STRTAB)             0x1000
+# GNU-NEXT:   0x0000000000000006 (SYMTAB)             0x1000
+# GNU-NEXT:   0x0000000000000007 (RELA)               0x1000
+# GNU-NEXT:   0x0000000000000008 (RELASZ)             16 (bytes)
+# GNU-NEXT:   0x0000000000000009 (RELAENT)            1929 (bytes)
+# GNU-NEXT:   0x000000000000000a (STRSZ)              16 (bytes)
+# GNU-NEXT:   0x000000000000000b (SYMENT)             2439 (bytes)
+# GNU-NEXT:   0x000000000000000c (INIT)               0x1000
+# GNU-NEXT:   0x000000000000000d (FINI)               0x1000
+# GNU-NEXT:   0x000000000000000e (SONAME)             Library soname: [U]
+# GNU-NEXT:   0x000000000000000f (RPATH)              f
+# GNU-NEXT:   0x0000000000000010 (SYMBOLIC)           0x1234567890abcdef
+# GNU-NEXT:   0x0000000000000011 (REL)                0x1000
+# GNU-NEXT:   0x0000000000000012 (RELSZ)              16 (bytes)
+# GNU-NEXT:   0x0000000000000013 (RELENT)             291 (bytes)
+# GNU-NEXT:   0x0000000000000014 (PLTREL)             RELA
+# GNU-NEXT:   0x0000000000000015 (DEBUG)              0xfedcba0987654321
+# GNU-NEXT:   0x0000000000000016 (TEXTREL)            0x1122334455667788
+# GNU-NEXT:   0x0000000000000017 (JMPREL)             0x1000
+# GNU-NEXT:   0x0000000000000018 (BIND_NOW)           0x8877665544332211
+# GNU-NEXT:   0x0000000000000019 (INIT_ARRAY)         0x1000
+# GNU-NEXT:   0x000000000000001a (FINI_ARRAY)         0x1000
+# GNU-NEXT:   0x000000000000001b (INIT_ARRAYSZ)       16 (bytes)
+# GNU-NEXT:   0x000000000000001c (FINI_ARRAYSZ)       16 (bytes)
+# GNU-NEXT:   0x000000000000001d (RUNPATH)            w
+# GNU-NEXT:   0x000000000000001e (FLAGS)              ORIGIN SYMBOLIC TEXTREL BIND_NOW STATIC_TLS {{$}}
+# GNU-NEXT:   0x0000000000000020 (PREINIT_ARRAY)      0x1000
+# GNU-NEXT:   0x0000000000000021 (PREINIT_ARRAYSZ)    16 (bytes)
+# GNU-NEXT:   0x0000000000000022 (SYMTAB_SHNDX)       0x1000
+# GNU-NEXT:   0x0000000000000023 (RELRSZ)             0x10
+# GNU-NEXT:   0x0000000000000024 (RELR)               0x1000
+# GNU-NEXT:   0x0000000000000025 (RELRENT)            0x4321
+# GNU-NEXT:   0x000000006000000f (ANDROID_REL)        0x1000
+# GNU-NEXT:   0x0000000060000010 (ANDROID_RELSZ)      16 (bytes)
+# GNU-NEXT:   0x0000000060000011 (ANDROID_RELA)       0x1000
+# GNU-NEXT:   0x0000000060000012 (ANDROID_RELASZ)     16 (bytes)
+# GNU-NEXT:   0x000000006fffe000 (ANDROID_RELR)       0x1000
+# GNU-NEXT:   0x000000006fffe001 (ANDROID_RELRSZ)     0x10
+# GNU-NEXT:   0x000000006fffe003 (ANDROID_RELRENT)    0x1234
+# GNU-NEXT:   0x000000006ffffef5 (GNU_HASH)           0x1000
+# GNU-NEXT:   0x000000006ffffef6 (TLSDESC_PLT)        0x1000
+# GNU-NEXT:   0x000000006ffffef7 (TLSDESC_GOT)        0x1000
+# GNU-NEXT:   0x000000006ffffff9 (RELACOUNT)          0
+# GNU-NEXT:   0x000000006ffffffa (RELCOUNT)           0
+# GNU-NEXT:   0x000000006ffffffb (FLAGS_1)            NOW GLOBAL GROUP NODELETE LOADFLTR INITFIRST NOOPEN ORIGIN DIRECT TRANS INTERPOSE NODEFLIB NODUMP CONFALT ENDFILTEE DISPRELDNE DISPRELPND NODIRECT IGNMULDEF NOKSYMS NOHDR EDITED NORELOC SYMINTPOSE GLOBAUDIT SINGLETON {{$}}
+# GNU-NEXT:   0x000000006ffffff0 (VERSYM)             0x1000
+# GNU-NEXT:   0x000000006ffffffc (VERDEF)             0x1000
+# GNU-NEXT:   0x000000006ffffffd (VERDEFNUM)          0
+# GNU-NEXT:   0x000000006ffffffe (VERNEED)            0x1000
+# GNU-NEXT:   0x000000006fffffff (VERNEEDNUM)         0
+# GNU-NEXT:   0x000000007ffffffd (AUXILIARY)          Auxiliary library: [D]
+# GNU-NEXT:   0x000000007ffffffe (USED)               Not needed object: [U]
+# GNU-NEXT:   0x000000007fffffff (FILTER)             Filter library: [U]
+# GNU-NEXT:   0x0000000012345678 (unknown)            0x8765432187654321
+# GNU-NEXT:   0x000000006abcdef0 (unknown)            0x9988776655443322
+# GNU-NEXT:   0x0000000076543210 (unknown)            0x5555666677778888
+# GNU-NEXT:   0x0000000000000000 (NULL)               0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-non-dynamic-in-pt-dynamic.test b/llvm/test/tools/llvm-readobj/elf-non-dynamic-in-pt-dynamic.test
index 610148f26ffc3..37c4ad538a426 100644
--- a/llvm/test/tools/llvm-readobj/elf-non-dynamic-in-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/elf-non-dynamic-in-pt-dynamic.test
@@ -5,15 +5,23 @@
 ## We check that we warn about this case.
 
 # RUN: yaml2obj --docnum=1 %s -o %t.o
-# RUN: llvm-readobj --dynamic-table %t.o 2>&1 | FileCheck %s --check-prefixes=WARNING,CHECK
-# RUN: llvm-readelf --dynamic-table %t.o 2>&1 | FileCheck %s --check-prefixes=WARNING,CHECK
+# RUN: llvm-readobj --dynamic-table %t.o 2>&1 \
+# RUN:   | FileCheck %s --check-prefixes=WARNING,LLVM
+# RUN: llvm-readelf --dynamic-table %t.o 2>&1 \
+# RUN:   | FileCheck %s --check-prefixes=WARNING,GNU
 
 # WARNING:    warning: The SHT_DYNAMIC section '.dynamic' is not at the start of PT_DYNAMIC segment
-# CHECK:      DynamicSection [ (2 entries)
-# CHECK-NEXT:   Tag                Type     Name/Value
-# CHECK-NEXT:   0x0000000000000018 BIND_NOW 0x1
-# CHECK-NEXT:   0x0000000000000000 NULL     0x0
-# CHECK-NEXT: ]
+
+# LLVM:      DynamicSection [ (2 entries)
+# LLVM-NEXT:   Tag                Type     Name/Value
+# LLVM-NEXT:   0x0000000000000018 BIND_NOW 0x1
+# LLVM-NEXT:   0x0000000000000000 NULL     0x0
+# LLVM-NEXT: ]
+
+# GNU:      Dynamic section at offset 0x{{.*}} contains 2 entries:
+# GNU-NEXT:   Tag                Type       Name/Value
+# GNU-NEXT:   0x0000000000000018 (BIND_NOW) 0x1
+# GNU-NEXT:   0x0000000000000000 (NULL)     0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
index 8138f09715fa3..28653b7608b76 100644
--- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test
+++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
@@ -73,9 +73,9 @@ LLVM-VERDEF-NEXT:     Predecessor: VERSION1
 LLVM-VERDEF-NEXT:   }
 LLVM-VERDEF-NEXT: }
 
-GNU-VERDEF: 0x000000006ffffff0 VERSYM               0x24c
-GNU-VERDEF: 0x000000006ffffffc VERDEF               0x25c
-GNU-VERDEF: 0x000000006ffffffd VERDEFNUM            3
+GNU-VERDEF: 0x000000006ffffff0 (VERSYM)             0x24c
+GNU-VERDEF: 0x000000006ffffffc (VERDEF)             0x25c
+GNU-VERDEF: 0x000000006ffffffd (VERDEFNUM)          3
 
 GNU-VERDEF:      Version symbols section '.gnu.version' contains 8 entries:
 GNU-VERDEF-NEXT:  Addr: 000000000000024c  Offset: 0x00024c  Link: 1 (.dynsym)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 159e300de3058..48dd47d10824a 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -206,8 +206,6 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
   void loadDynamicTable(const ELFFile<ELFT> *Obj);
   void parseDynamicTable();
 
-  void printValue(uint64_t Type, uint64_t Value);
-
   StringRef getDynamicString(uint64_t Offset) const;
   StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
                              bool &IsDefault) const;
@@ -262,7 +260,18 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
 
 public:
   Elf_Dyn_Range dynamic_table() const {
-    return DynamicTable.getAsArrayRef<Elf_Dyn>();
+    // A valid .dynamic section contains an array of entries terminated
+    // with a DT_NULL entry. However, sometimes the section content may
+    // continue past the DT_NULL entry, so to dump the section correctly,
+    // we first find the end of the entries by iterating over them.
+    Elf_Dyn_Range Table = DynamicTable.getAsArrayRef<Elf_Dyn>();
+
+    size_t Size = 0;
+    while (Size < Table.size())
+      if (Table[Size++].getTag() == DT_NULL)
+        break;
+
+    return Table.slice(0, Size);
   }
 
   Elf_Sym_Range dynamic_symbols() const {
@@ -283,6 +292,8 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
                                     bool &IsDefault) const;
 
   void printSymbolsHelper(bool IsDynamic) const;
+  void printDynamicEntry(raw_ostream &OS, uint64_t Type, uint64_t Value) const;
+
   const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
   const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; }
   const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; }
@@ -292,6 +303,7 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
   const DynRegionInfo &getDynRelaRegion() const { return DynRelaRegion; }
   const DynRegionInfo &getDynRelrRegion() const { return DynRelrRegion; }
   const DynRegionInfo &getDynPLTRelRegion() const { return DynPLTRelRegion; }
+  const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; }
   const Elf_Hash *getHashTable() const { return HashTable; }
   const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; }
 };
@@ -340,6 +352,7 @@ template <typename ELFT> class DumpStyle {
   virtual void printSymbols(const ELFFile<ELFT> *Obj, bool PrintSymbols,
                             bool PrintDynamicSymbols) = 0;
   virtual void printHashSymbols(const ELFFile<ELFT> *Obj) {}
+  virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
   virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
   virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
                                   size_t Offset) {}
@@ -384,6 +397,7 @@ template <typename ELFT> class GNUStyle : public DumpStyle<ELFT> {
   void printSymbols(const ELFO *Obj, bool PrintSymbols,
                     bool PrintDynamicSymbols) override;
   void printHashSymbols(const ELFO *Obj) override;
+  void printDynamic(const ELFFile<ELFT> *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
   void printSymtabMessage(const ELFO *Obj, StringRef Name,
                           size_t Offset) override;
@@ -488,6 +502,7 @@ template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
   void printSectionHeaders(const ELFO *Obj) override;
   void printSymbols(const ELFO *Obj, bool PrintSymbols,
                     bool PrintDynamicSymbols) override;
+  void printDynamic(const ELFFile<ELFT> *Obj) override;
   void printDynamicRelocations(const ELFO *Obj) override;
   void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
                            cl::boolOrDefault PrintSectionMapping) override;
@@ -1764,8 +1779,8 @@ static void printLibrary(raw_ostream &OS, const Twine &Tag, const Twine &Name) {
 }
 
 template <class ELFT>
-void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
-  raw_ostream &OS = W.getOStream();
+void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
+                                        uint64_t Value) const {
   const char *ConvChar =
       (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
   switch (Type) {
@@ -1883,41 +1898,7 @@ template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
 } // end anonymous namespace
 
 template <class ELFT> void ELFDumper<ELFT>::printDynamicTable() {
-  // A valid .dynamic section contains an array of entries terminated with
-  // a DT_NULL entry. However, sometimes the section content may continue
-  // past the DT_NULL entry, so to dump the section correctly, we first find
-  // the end of the entries by iterating over them.
-  size_t Size = 0;
-  Elf_Dyn_Range DynTableEntries = dynamic_table();
-  for (; Size < DynTableEntries.size();)
-    if (DynTableEntries[Size++].getTag() == DT_NULL)
-      break;
-
-  if (!Size)
-    return;
-
-  raw_ostream &OS = W.getOStream();
-  W.startLine() << "DynamicSection [ (" << Size << " entries)\n";
-
-  bool Is64 = ELFT::Is64Bits;
-  W.startLine() << "  Tag" << (Is64 ? "                " : "        ") << "Type"
-                << "                 "
-                << "Name/Value\n";
-  for (size_t I = 0; I < Size; ++I) {
-    const Elf_Dyn &Entry = DynTableEntries[I];
-    uintX_t Tag = Entry.getTag();
-    W.startLine() << "  "
-                  << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU)
-                  << " "
-                  << format(
-                         "%-21s",
-                         getTypeString(
-                             ObjF->getELFFile()->getHeader()->e_machine, Tag));
-    printValue(Tag, Entry.getVal());
-    OS << "\n";
-  }
-
-  W.startLine() << "]\n";
+  ELFDumperStyle->printDynamic(ObjF->getELFFile());
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
@@ -3344,6 +3325,35 @@ void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
   printRelocation(Obj, Sym, SymbolName, R, IsRela);
 }
 
+template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
+  Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+  if (Table.empty())
+    return;
+
+  const DynRegionInfo &DynamicTableRegion =
+      this->dumper()->getDynamicTableRegion();
+
+  OS << "Dynamic section at offset "
+     << format_hex(reinterpret_cast<const uint8_t *>(DynamicTableRegion.Addr) -
+                       Obj->base(),
+                   1)
+     << " contains " << Table.size() << " entries:\n";
+
+  bool Is64 = ELFT::Is64Bits;
+  if (Is64)
+    OS << "  Tag                Type                 Name/Value\n";
+  else
+    OS << "  Tag        Type                 Name/Value\n";
+  for (auto Entry : Table) {
+    uintX_t Tag = Entry.getTag();
+    std::string TypeString = getTypeString(Obj->getHeader()->e_machine, Tag);
+    OS << format("  0x%0*x %-20s ", Is64 ? 16 : 8, Tag,
+                 ("(" + TypeString + ")").c_str());
+    this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+    OS << "\n";
+  }
+}
+
 template <class ELFT>
 void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
   const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();
@@ -4502,6 +4512,31 @@ void LLVMStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
   this->dumper()->printSymbolsHelper(true);
 }
 
+template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Obj) {
+  Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+  if (Table.empty())
+    return;
+
+  raw_ostream &OS = W.getOStream();
+  W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n";
+
+  bool Is64 = ELFT::Is64Bits;
+  if (Is64)
+    W.startLine() << "  Tag                Type                 Name/Value\n";
+  else
+    W.startLine() << "  Tag        Type                 Name/Value\n";
+  for (auto Entry : Table) {
+    uintX_t Tag = Entry.getTag();
+    W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
+                  << format("%-21s",
+                            getTypeString(Obj->getHeader()->e_machine, Tag));
+    this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+    OS << "\n";
+  }
+
+  W.startLine() << "]\n";
+}
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
   const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();

From 74de6203efd12c5f57561a44b248051652a3de2c Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Fri, 24 May 2019 12:42:36 +0000
Subject: [PATCH 0168/1176] [LLD][COFF] Implement /filealign parameter

Patch by Stefan Schmidt.

This adds the /filealign parameter to lld, which allows to specify the
section alignment in the output file (as it does on Microsoft's
link.exe).

This is required to be able to load dynamically linked libraries on the
original Xbox, where the debugger monitor expects the section alignment
in the file to be the same as in memory.

llvm-svn: 361634
---
 lld/COFF/Config.h            |  1 +
 lld/COFF/Driver.cpp          |  7 +++++
 lld/COFF/Options.td          |  1 +
 lld/COFF/Writer.cpp          | 11 ++++----
 lld/test/COFF/filealign.test | 51 ++++++++++++++++++++++++++++++++++++
 5 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/COFF/filealign.test

diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index 3b2fe2769c6a9..ba2bbcdebaf5d 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -180,6 +180,7 @@ struct Configuration {
   std::string MapFile;
 
   uint64_t ImageBase = -1;
+  uint64_t FileAlign = 512;
   uint64_t StackReserve = 1024 * 1024;
   uint64_t StackCommit = 4096;
   uint64_t HeapReserve = 1024 * 1024;
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 006984309e184..df374f518d94d 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1197,6 +1197,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   if (auto *Arg = Args.getLastArg(OPT_base))
     parseNumbers(Arg->getValue(), &Config->ImageBase);
 
+  // Handle /filealign
+  if (auto *Arg = Args.getLastArg(OPT_filealign)) {
+    parseNumbers(Arg->getValue(), &Config->FileAlign);
+    if (!isPowerOf2_64(Config->FileAlign))
+      error("/filealign: not a power of two: " + Twine(Config->FileAlign));
+  }
+
   // Handle /stack
   if (auto *Arg = Args.getLastArg(OPT_stack))
     parseNumbers(Arg->getValue(), &Config->StackReserve, &Config->StackCommit);
diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td
index d87183c7e8096..f92349f27bd90 100644
--- a/lld/COFF/Options.td
+++ b/lld/COFF/Options.td
@@ -32,6 +32,7 @@ def errorlimit : P<"errorlimit",
 def export  : P<"export", "Export a function">;
 // No help text because /failifmismatch is not intended to be used by the user.
 def failifmismatch : P<"failifmismatch", "">;
+def filealign : P<"filealign", "Section alignment in the output file">;
 def functionpadmin : F<"functionpadmin">;
 def functionpadmin_opt : P<"functionpadmin", "Prepares an image for hotpatching">;
 def guard   : P<"guard", "Control flow guard">;
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index d673fc8de8527..19614058a2b69 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -73,7 +73,6 @@ static unsigned char DOSProgram[] = {
 static_assert(sizeof(DOSProgram) % 8 == 0,
               "DOSProgram size must be multiple of 8");
 
-static const int SectorSize = 512;
 static const int DOSStubSize = sizeof(dos_header) + sizeof(DOSProgram);
 static_assert(DOSStubSize % 8 == 0, "DOSStub size must be multiple of 8");
 
@@ -1100,7 +1099,7 @@ void Writer::createSymbolAndStringTable() {
   PointerToSymbolTable = FileOff;
   FileOff += OutputSymtab.size() * sizeof(coff_symbol16);
   FileOff += 4 + Strtab.size();
-  FileSize = alignTo(FileOff, SectorSize);
+  FileSize = alignTo(FileOff, Config->FileAlign);
 }
 
 void Writer::mergeSections() {
@@ -1142,7 +1141,7 @@ void Writer::assignAddresses() {
                   sizeof(coff_section) * OutputSections.size();
   SizeOfHeaders +=
       Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header);
-  SizeOfHeaders = alignTo(SizeOfHeaders, SectorSize);
+  SizeOfHeaders = alignTo(SizeOfHeaders, Config->FileAlign);
   uint64_t RVA = PageSize; // The first page is kept unmapped.
   FileSize = SizeOfHeaders;
 
@@ -1167,7 +1166,7 @@ void Writer::assignAddresses() {
       C->setRVA(RVA + VirtualSize);
       VirtualSize += C->getSize();
       if (C->hasData())
-        RawSize = alignTo(VirtualSize, SectorSize);
+        RawSize = alignTo(VirtualSize, Config->FileAlign);
     }
     if (VirtualSize > UINT32_MAX)
       error("section larger than 4 GiB: " + Sec->Name);
@@ -1176,7 +1175,7 @@ void Writer::assignAddresses() {
     if (RawSize != 0)
       Sec->Header.PointerToRawData = FileSize;
     RVA += alignTo(VirtualSize, PageSize);
-    FileSize += alignTo(RawSize, SectorSize);
+    FileSize += alignTo(RawSize, Config->FileAlign);
   }
   SizeOfImage = alignTo(RVA, PageSize);
 
@@ -1248,7 +1247,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
 
   PE->ImageBase = Config->ImageBase;
   PE->SectionAlignment = PageSize;
-  PE->FileAlignment = SectorSize;
+  PE->FileAlignment = Config->FileAlign;
   PE->MajorImageVersion = Config->MajorImageVersion;
   PE->MinorImageVersion = Config->MinorImageVersion;
   PE->MajorOperatingSystemVersion = Config->MajorOSVersion;
diff --git a/lld/test/COFF/filealign.test b/lld/test/COFF/filealign.test
new file mode 100644
index 0000000000000..d6535101c4a21
--- /dev/null
+++ b/lld/test/COFF/filealign.test
@@ -0,0 +1,51 @@
+# RUN: yaml2obj < %s > %t.obj
+
+# RUN: lld-link /out:%t.exe /entry:main %t.obj
+# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=DEFAULT-HEADER %s
+
+# DEFAULT-HEADER:    FileAlignment: 512
+
+# RUN: lld-link /out:%t.exe /entry:main %t.obj /filealign:4096
+# RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=FILEALIGN-HEADER %s
+
+# FILEALIGN-HEADER: FileAlignment: 4096
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: []
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4096
+    SectionData:     0000000000000000
+    Relocations:
+      - VirtualAddress:  0
+        SymbolName:      __ImageBase
+        Type:            IMAGE_REL_AMD64_ADDR64
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          8
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            main
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            __ImageBase
+    Value:           0
+    SectionNumber:   0
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+...

From 35be7ff80c1be14d9f7077c0d5a63b4bee6dd6c1 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 24 May 2019 13:28:27 +0000
Subject: [PATCH 0169/1176] [WebAssembly] Add support for -emit-relocs

This can be useful for post-link tools and for testing.  Sometimes
it can be useful to produces a regular executable but with relocations
preserved.

Differential Revision: https://reviews.llvm.org/D62378

llvm-svn: 361635
---
 lld/test/wasm/emit-relocs.ll | 39 ++++++++++++++++++++++++++++++++++++
 lld/wasm/Config.h            |  1 +
 lld/wasm/Driver.cpp          |  1 +
 lld/wasm/Options.td          |  2 ++
 lld/wasm/SyntheticSections.h |  4 +++-
 lld/wasm/Writer.cpp          | 10 ++++-----
 6 files changed, 51 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/wasm/emit-relocs.ll

diff --git a/lld/test/wasm/emit-relocs.ll b/lld/test/wasm/emit-relocs.ll
new file mode 100644
index 0000000000000..cdf492ab8fbde
--- /dev/null
+++ b/lld/test/wasm/emit-relocs.ll
@@ -0,0 +1,39 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o
+; RUN: wasm-ld --emit-relocs -o %t.wasm %t.o %t.ret32.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @ret32(float)
+
+define void @unused_function() {
+  ret void
+}
+
+define hidden void @_start() local_unnamed_addr #0 {
+entry:
+  call i32 @ret32(float 0.0)
+  ret void
+}
+
+; CHECK:        - Type:            CODE
+; CHECK-NEXT:     Relocations:
+; CHECK-NEXT:       - Type:            R_WASM_FUNCTION_INDEX_LEB
+; CHECK-NEXT:         Index:           1
+; CHECK-NEXT:         Offset:          0x00000009
+
+; CHECK:        - Type:            CUSTOM
+; CHECK-NEXT:     Name:            linking
+; CHECK-NEXT:     Version:         2
+; CHECK-NEXT:     SymbolTable:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            _start
+; CHECK-NEXT:         Flags:           [  ]
+; CHECK-NEXT:         Function:        0
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            ret32
+; CHECK-NEXT:         Flags:           [ VISIBILITY_HIDDEN ]
+; CHECK-NEXT:         Function:        1
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 7881d01502d8d..6650e9e89df5a 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -27,6 +27,7 @@ struct Configuration {
   bool CompressRelocations;
   bool Demangle;
   bool DisableVerify;
+  bool EmitRelocs;
   bool ExportAll;
   bool ExportDynamic;
   bool ExportTable;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 0bea11faf7dc7..06868f34f8e14 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -302,6 +302,7 @@ static void readConfigs(opt::InputArgList &Args) {
   Config->CompressRelocations = Args.hasArg(OPT_compress_relocations);
   Config->Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, true);
   Config->DisableVerify = Args.hasArg(OPT_disable_verify);
+  Config->EmitRelocs = Args.hasArg(OPT_emit_relocs);
   Config->Entry = getEntry(Args);
   Config->ExportAll = Args.hasArg(OPT_export_all);
   Config->ExportDynamic = Args.hasFlag(OPT_export_dynamic,
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index a7760e853afdd..b95d9ded5f64f 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -31,6 +31,8 @@ defm demangle: B<"demangle",
     "Demangle symbol names",
     "Do not demangle symbol names">;
 
+def emit_relocs: F<"emit-relocs">, HelpText<"Generate relocations in output">;
+
 defm export_dynamic: B<"export-dynamic",
     "Put symbols in the dynamic symbol table",
     "Do not put symbols in the dynamic symbol table (default)">;
diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index ccd66326a2461..9f5266188331a 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -246,7 +246,9 @@ class LinkingSection : public SyntheticSection {
                  const std::vector<OutputSegment *> &DataSegments)
       : SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, "linking"),
         InitFunctions(InitFunctions), DataSegments(DataSegments) {}
-  bool isNeeded() const override { return Config->Relocatable; }
+  bool isNeeded() const override {
+    return Config->Relocatable || Config->EmitRelocs;
+  }
   void writeBody() override;
   void addToSymtab(Symbol *Sym);
 
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index f43191c70273d..5df364fb7706d 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -129,7 +129,7 @@ void Writer::createCustomSections() {
     LLVM_DEBUG(dbgs() << "createCustomSection: " << Name << "\n");
 
     OutputSection *Sec = make<CustomSection>(Name, Pair.second);
-    if (Config->Relocatable) {
+    if (Config->Relocatable || Config->EmitRelocs) {
       auto *Sym = make<OutputSectionSymbol>(Sec);
       Out.LinkingSec->addToSymtab(Sym);
       Sec->SectionSym = Sym;
@@ -330,7 +330,7 @@ void Writer::addSections() {
   createCustomSections();
 
   addSection(Out.LinkingSec);
-  if (Config->Relocatable) {
+  if (Config->EmitRelocs || Config->Relocatable) {
     createRelocSections();
   }
 
@@ -493,17 +493,17 @@ void Writer::calculateExports() {
 }
 
 void Writer::populateSymtab() {
-  if (!Config->Relocatable)
+  if (!Config->Relocatable && !Config->EmitRelocs)
     return;
 
   for (Symbol *Sym : Symtab->getSymbols())
-    if (Sym->IsUsedInRegularObj)
+    if (Sym->IsUsedInRegularObj && Sym->isLive())
       Out.LinkingSec->addToSymtab(Sym);
 
   for (ObjFile *File : Symtab->ObjectFiles) {
     LLVM_DEBUG(dbgs() << "Local symtab entries: " << File->getName() << "\n");
     for (Symbol *Sym : File->getSymbols())
-      if (Sym->isLocal() && !isa<SectionSymbol>(Sym))
+      if (Sym->isLocal() && !isa<SectionSymbol>(Sym) && Sym->isLive())
         Out.LinkingSec->addToSymtab(Sym);
   }
 }

From 7991b6828484ec6deb466809a36f7ab7b0af90ce Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 24 May 2019 13:29:17 +0000
Subject: [PATCH 0170/1176] [lld] Trace all references with lld --trace-symbol

Previously undefined symbol references were only traced if they were
seen before that definition.

Fixes https://bugs.llvm.org/show_bug.cgi?id=41878

Differential Revision: https://reviews.llvm.org/D61929

llvm-svn: 361636
---
 lld/ELF/Symbols.cpp           |  5 ++++-
 lld/ELF/Symbols.h             |  2 +-
 lld/test/ELF/trace-symbols.s  |  7 ++++++-
 lld/test/wasm/trace-symbol.ll | 19 ++++++++++++-------
 lld/wasm/SymbolTable.cpp      |  6 ++++++
 lld/wasm/Symbols.cpp          | 13 ++++++++++---
 lld/wasm/Symbols.h            |  1 +
 7 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index d44b24dd1b37a..45c545d532962 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -291,7 +291,7 @@ bool Symbol::includeInDynsym() const {
 }
 
 // Print out a log message for --trace-symbol.
-void elf::printTraceSymbol(Symbol *Sym) {
+void elf::printTraceSymbol(const Symbol *Sym) {
   std::string S;
   if (Sym->isUndefined())
     S = ": reference to ";
@@ -413,6 +413,9 @@ void Symbol::resolveUndefined(const Undefined &Other) {
     return;
   }
 
+  if (Traced)
+    printTraceSymbol(&Other);
+
   if (isShared() || isLazy() || (isUndefined() && Other.Binding != STB_WEAK))
     Binding = Other.Binding;
 
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 04c23b588f02f..64fa30db25205 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -464,7 +464,7 @@ static inline void assertSymbols() {
   AssertSymbol<LazyObject>();
 }
 
-void printTraceSymbol(Symbol *Sym);
+void printTraceSymbol(const Symbol *Sym);
 
 size_t Symbol::getSymbolSize() const {
   switch (kind()) {
diff --git a/lld/test/ELF/trace-symbols.s b/lld/test/ELF/trace-symbols.s
index dfcce16e6b169..b6f8bea79d120 100644
--- a/lld/test/ELF/trace-symbols.s
+++ b/lld/test/ELF/trace-symbols.s
@@ -28,10 +28,15 @@
 # OBJECTD1FOO: trace-symbols.s.tmp1: definition of foo
 # OBJECTD1FOO: trace-symbols.s.tmp2: definition of foo
 
+# RUN: ld.lld -y foo %t1 %t2 %t -o %t3 | FileCheck -check-prefix=REFLAST %s
+# REFLAST: trace-symbols.s.tmp1: definition of foo
+# REFLAST: trace-symbols.s.tmp2: definition of foo
+# REFLAST: trace-symbols.s.tmp: reference to foo
+
 # RUN: ld.lld -y foo -trace-symbol=common -trace-symbol=hsymbol \
 # RUN:   %t %t1 %t2 -o %t3 | FileCheck -check-prefix=OBJECTD2FOO %s
 # RUN: ld.lld -y foo -y common --trace-symbol=hsymbol \
-# RUN:   %t %t2 %t1 -o /dev/null | FileCheck -check-prefix=OBJECTD2FOO %s
+# RUN:   %t %t2 %t1 -o %t3 | FileCheck -check-prefix=OBJECTD2FOO %s
 # RUN: ld.lld -y foo -y common %t %t1.so %t2 -o %t3 | \
 # RUN:   FileCheck -check-prefix=OBJECTD2FOO %s
 # OBJECTD2FOO: trace-symbols.s.tmp2: definition of foo
diff --git a/lld/test/wasm/trace-symbol.ll b/lld/test/wasm/trace-symbol.ll
index 4d167014752b4..e589de0f6d43e 100644
--- a/lld/test/wasm/trace-symbol.ll
+++ b/lld/test/wasm/trace-symbol.ll
@@ -1,9 +1,10 @@
 ; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o
-; RUN: llc -filetype=obj -o %t.o %s
-; RUN: wasm-ld -o %t.wasm %t.o %t.ret32.o -y ret32 -y _start | FileCheck %s -check-prefix=BOTH
+; RUN: llc -filetype=obj -o %t.start.o %s
+; RUN: wasm-ld -o %t.wasm %t.start.o %t.ret32.o -y ret32 -y _start | FileCheck %s -check-prefix=BOTH
+; RUN: wasm-ld -o %t.wasm %t.ret32.o %t.start.o -y ret32 -y _start | FileCheck %s -check-prefix=REVERSED
 
 ; check alias
-; RUN: wasm-ld -o %t.wasm %t.o %t.ret32.o -trace-symbol=_start | FileCheck %s -check-prefixes=JUST-START
+; RUN: wasm-ld -o %t.wasm %t.start.o %t.ret32.o -trace-symbol=_start | FileCheck %s -check-prefixes=JUST-START
 
 target triple = "wasm32-unknown-unknown"
 
@@ -15,9 +16,13 @@ entry:
   ret void
 }
 
-; BOTH: .o: definition of _start
-; BOTH: .o: reference to ret32
-; BOTH: .ret32.o: definition of ret32
+; BOTH:          start.o: definition of _start
+; BOTH-NEXT:     start.o: reference to ret32
+; BOTH-NEXT:     ret32.o: definition of ret32
 
-; JUST-START: .o: definition of _start
+; REVERSED:      ret32.o: definition of ret32
+; REVERSED-NEXT: start.o: definition of _start
+; REVERSED-NEXT: start.o: reference to ret32
+
+; JUST-START: start.o: definition of _start
 ; JUST-START-NOT: ret32
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index ce1aa5132ba90..244e24eb996d6 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -389,6 +389,8 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
   Symbol *S;
   bool WasInserted;
   std::tie(S, WasInserted) = insert(Name, File);
+  if (S->Traced)
+    printTraceSymbolUndefined(Name, File);
 
   auto Replace = [&]() {
     replaceSymbol<UndefinedFunction>(S, Name, ImportName, ImportModule, Flags,
@@ -420,6 +422,8 @@ Symbol *SymbolTable::addUndefinedData(StringRef Name, uint32_t Flags,
   Symbol *S;
   bool WasInserted;
   std::tie(S, WasInserted) = insert(Name, File);
+  if (S->Traced)
+    printTraceSymbolUndefined(Name, File);
 
   if (WasInserted)
     replaceSymbol<UndefinedData>(S, Name, Flags, File);
@@ -439,6 +443,8 @@ Symbol *SymbolTable::addUndefinedGlobal(StringRef Name, StringRef ImportName,
   Symbol *S;
   bool WasInserted;
   std::tie(S, WasInserted) = insert(Name, File);
+  if (S->Traced)
+    printTraceSymbolUndefined(Name, File);
 
   if (WasInserted)
     replaceSymbol<UndefinedGlobal>(S, Name, ImportName, ImportModule, Flags,
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index ba5bb5d43cdd4..07b895a2e67d2 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -307,12 +307,19 @@ std::string lld::toString(wasm::Symbol::Kind Kind) {
   llvm_unreachable("invalid symbol kind");
 }
 
+
+void lld::wasm::printTraceSymbolUndefined(StringRef Name, const InputFile* File) {
+  message(toString(File) + ": reference to " + Name);
+}
+
 // Print out a log message for --trace-symbol.
 void lld::wasm::printTraceSymbol(Symbol *Sym) {
-  std::string S;
+  // Undefined symbols are traced via printTraceSymbolUndefined
   if (Sym->isUndefined())
-    S = ": reference to ";
-  else if (Sym->isLazy())
+    return;
+
+  std::string S;
+  if (Sym->isLazy())
     S = ": lazy definition of ";
   else
     S = ": definition of ";
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 8b8b8eb834e4b..0bac81ced05f6 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -457,6 +457,7 @@ union SymbolUnion {
 };
 
 void printTraceSymbol(Symbol *Sym);
+void printTraceSymbolUndefined(StringRef Name, const InputFile* File);
 
 template <typename T, typename... ArgT>
 T *replaceSymbol(Symbol *S, ArgT &&... Arg) {

From 21977d8e29f84f6dca6ee64205d229bf49d40973 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Fri, 24 May 2019 13:56:01 +0000
Subject: [PATCH 0171/1176] [MCA] Zero-initialize field CRD in InstructionBase.
 Also run clang-format on a couple of files. NFC

llvm-svn: 361637
---
 llvm/include/llvm/MCA/Instruction.h |  7 ++++---
 llvm/tools/llvm-mca/llvm-mca.cpp    | 13 +++++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index c4d0f6aace35a..9ac1fffb4430c 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -256,8 +256,8 @@ class ReadState {
 public:
   ReadState(const ReadDescriptor &Desc, unsigned RegID)
       : RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
-        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(),
-        IsReady(true), IsZero(false), IndependentFromDef(false) {}
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true),
+        IsZero(false), IndependentFromDef(false) {}
 
   const ReadDescriptor &getDescriptor() const { return *RD; }
   unsigned getSchedClass() const { return RD->SchedClassID; }
@@ -409,7 +409,8 @@ class InstructionBase {
   CriticalRegDep CRD;
 
 public:
-  InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
+  InstructionBase(const InstrDesc &D)
+      : Desc(D), IsOptimizableMove(false), CRD() {}
 
   SmallVectorImpl<WriteState> &getDefs() { return Defs; }
   const ArrayRef<WriteState> getDefs() const { return Defs; }
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index e70c8f627eff4..8ce1d03c785e0 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -68,8 +68,9 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
                                            cl::value_desc("filename"));
 
 static cl::opt<std::string>
-    ArchName("march", cl::desc("Target architecture. "
-                               "See -version for available targets"),
+    ArchName("march",
+             cl::desc("Target architecture. "
+                      "See -version for available targets"),
              cl::cat(ToolOptions));
 
 static cl::opt<std::string>
@@ -441,8 +442,8 @@ int main(int argc, char **argv) {
                   WithColor::error() << IE.Message << '\n';
                   IP->printInst(&IE.Inst, SS, "", *STI);
                   SS.flush();
-                  WithColor::note() << "instruction: " << InstructionStr
-                                    << '\n';
+                  WithColor::note()
+                      << "instruction: " << InstructionStr << '\n';
                 })) {
           // Default case.
           WithColor::error() << toString(std::move(NewE));
@@ -482,8 +483,8 @@ int main(int argc, char **argv) {
     mca::PipelinePrinter Printer(*P);
 
     if (PrintSummaryView)
-      Printer.addView(llvm::make_unique<mca::SummaryView>(
-          SM, Insts, DispatchWidth));
+      Printer.addView(
+          llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
 
     if (EnableBottleneckAnalysis)
       Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(SM));

From e1947b84c12afe4207cab5e9365361822cc4a011 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Fri, 24 May 2019 14:06:47 +0000
Subject: [PATCH 0172/1176] Revert "[OPENMP][NVPTX]Fix barriers and parallel
 level counters, NFC."

This reverts commit r361421 to split the patch into 3 parts.

llvm-svn: 361638
---
 openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu       | 3 +--
 openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h | 2 +-
 openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu           | 4 +---
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 0cd9b57fd7cf5..d369da1cb7e73 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -31,8 +31,7 @@ __device__ omptarget_nvptx_SimpleMemoryManager
 __device__ __shared__ uint32_t usedMemIdx;
 __device__ __shared__ uint32_t usedSlotIdx;
 
-__device__ __shared__ volatile uint8_t
-    parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 __device__ __shared__ uint16_t threadLimit;
 __device__ __shared__ uint16_t threadsInTeam;
 __device__ __shared__ uint16_t nThreads;
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index b85d0a750f2a9..cd51538ad795c 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -398,7 +398,7 @@ extern __device__ omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
 extern __device__ __shared__ uint32_t usedMemIdx;
 extern __device__ __shared__ uint32_t usedSlotIdx;
-extern __device__ __shared__ volatile uint8_t
+extern __device__ __shared__ uint8_t
     parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
 extern __device__ __shared__ uint16_t threadLimit;
 extern __device__ __shared__ uint16_t threadsInTeam;
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 191b046c9f46f..d81aa8f0f3acc 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -62,8 +62,6 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
         // Barrier #1 is for synchronization among active threads.
         named_sync(L1_BARRIER, threads);
       }
-    } else {
-      __kmpc_flush(loc_ref);
     } // numberOfActiveOMPThreads > 1
     PRINT0(LD_SYNC, "completed kmpc_barrier\n");
   }
@@ -132,7 +130,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
 
 EXTERN void __kmpc_flush(kmp_Ident *loc) {
   PRINT0(LD_IO, "call kmpc_flush\n");
-  __threadfence();
+  __threadfence_system();
 }
 
 ////////////////////////////////////////////////////////////////////////////////

From a5ca34e6b3c2508ee1504ff6e18776f8cef25af9 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 24 May 2019 14:14:25 +0000
Subject: [PATCH 0173/1176] [WebAssebmly] Add support for --wrap

The code for implementing this features is taken almost verbatim
from the ELF backend.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=41681

Differential Revision: https://reviews.llvm.org/D62380

llvm-svn: 361639
---
 lld/ELF/Driver.cpp            |  2 +-
 lld/ELF/InputFiles.h          |  2 +-
 lld/include/lld/Common/LLVM.h |  2 +
 lld/test/wasm/wrap.ll         | 40 +++++++++++++++++
 lld/wasm/Driver.cpp           | 85 +++++++++++++++++++++++++++++++++++
 lld/wasm/InputFiles.h         |  2 +
 lld/wasm/LTO.cpp              |  5 +++
 lld/wasm/Options.td           |  3 ++
 lld/wasm/SymbolTable.cpp      | 17 ++++++-
 lld/wasm/SymbolTable.h        | 15 ++++---
 lld/wasm/Symbols.h            | 10 ++++-
 11 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100644 lld/test/wasm/wrap.ll

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 9a72876631665..416fbb12b65d5 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1522,7 +1522,7 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
 
   // Update pointers in input files.
   parallelForEach(ObjectFiles, [&](InputFile *File) {
-    std::vector<Symbol *> &Syms = File->getMutableSymbols();
+    MutableArrayRef<Symbol *> Syms = File->getMutableSymbols();
     for (size_t I = 0, E = Syms.size(); I != E; ++I)
       if (Symbol *S = Map.lookup(Syms[I]))
         Syms[I] = S;
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 648f5b51452dc..81ee0302da020 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -90,7 +90,7 @@ class InputFile {
   // function on files of other types.
   ArrayRef<Symbol *> getSymbols() { return getMutableSymbols(); }
 
-  std::vector<Symbol *> &getMutableSymbols() {
+  MutableArrayRef<Symbol *> getMutableSymbols() {
     assert(FileKind == BinaryKind || FileKind == ObjKind ||
            FileKind == BitcodeKind);
     return Symbols;
diff --git a/lld/include/lld/Common/LLVM.h b/lld/include/lld/Common/LLVM.h
index 944bb412a2e62..f7ed1d793ca7b 100644
--- a/lld/include/lld/Common/LLVM.h
+++ b/lld/include/lld/Common/LLVM.h
@@ -29,6 +29,7 @@ class Twine;
 class MemoryBuffer;
 class MemoryBufferRef;
 template <typename T> class ArrayRef;
+template <typename T> class MutableArrayRef;
 template <unsigned InternalLen> class SmallString;
 template <typename T, unsigned N> class SmallVector;
 template <typename T> class ErrorOr;
@@ -62,6 +63,7 @@ using llvm::isa;
 
 // ADT's.
 using llvm::ArrayRef;
+using llvm::MutableArrayRef;
 using llvm::Error;
 using llvm::ErrorOr;
 using llvm::Expected;
diff --git a/lld/test/wasm/wrap.ll b/lld/test/wasm/wrap.ll
new file mode 100644
index 0000000000000..ff850accfd596
--- /dev/null
+++ b/lld/test/wasm/wrap.ll
@@ -0,0 +1,40 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld -wrap nosuchsym -wrap foo -o %t.wasm %t.o
+; RUN: wasm-ld -emit-relocs -wrap foo -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define i32 @foo() {
+  ret i32 1
+}
+
+define void @_start() {
+entry:
+  call i32 @foo()
+  ret void
+}
+
+declare i32 @__real_foo()
+
+define i32 @__wrap_foo() {
+  %rtn = call i32 @__real_foo()
+  ret i32 %rtn
+}
+
+; CHECK:      - Type:            CODE
+; CHECK-NEXT:   Relocations:     
+; CHECK-NEXT:     - Type:            R_WASM_FUNCTION_INDEX_LEB
+; CHECK-NEXT:       Index:           2
+; CHECK-NEXT:       Offset:          0x00000009
+; CHECK-NEXT:     - Type:            R_WASM_FUNCTION_INDEX_LEB
+; CHECK-NEXT:       Index:           0
+; CHECK-NEXT:       Offset:          0x00000013
+
+; CHECK:        FunctionNames:
+; CHECK-NEXT:      - Index:           0
+; CHECK-NEXT:        Name:            foo
+; CHECK-NEXT:      - Index:           1
+; CHECK-NEXT:        Name:            _start
+; CHECK-NEXT:      - Index:           2
+; CHECK-NEXT:        Name:            __wrap_foo
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 06868f34f8e14..4ac5aff2494f4 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -535,6 +535,84 @@ static std::string createResponseFile(const opt::InputArgList &Args) {
   return Data.str();
 }
 
+// The --wrap option is a feature to rename symbols so that you can write
+// wrappers for existing functions. If you pass `-wrap=foo`, all
+// occurrences of symbol `foo` are resolved to `wrap_foo` (so, you are
+// expected to write `wrap_foo` function as a wrapper). The original
+// symbol becomes accessible as `real_foo`, so you can call that from your
+// wrapper.
+//
+// This data structure is instantiated for each -wrap option.
+struct WrappedSymbol {
+  Symbol *Sym;
+  Symbol *Real;
+  Symbol *Wrap;
+};
+
+static Symbol *addUndefined(StringRef Name) {
+  return Symtab->addUndefinedFunction(Name, "", "", 0, nullptr, nullptr);
+}
+
+// Handles -wrap option.
+//
+// This function instantiates wrapper symbols. At this point, they seem
+// like they are not being used at all, so we explicitly set some flags so
+// that LTO won't eliminate them.
+static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
+  std::vector<WrappedSymbol> V;
+  DenseSet<StringRef> Seen;
+
+  for (auto *Arg : Args.filtered(OPT_wrap)) {
+    StringRef Name = Arg->getValue();
+    if (!Seen.insert(Name).second)
+      continue;
+
+    Symbol *Sym = Symtab->find(Name);
+    if (!Sym)
+      continue;
+
+    Symbol *Real = addUndefined(Saver.save("__real_" + Name));
+    Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name));
+    V.push_back({Sym, Real, Wrap});
+
+    // We want to tell LTO not to inline symbols to be overwritten
+    // because LTO doesn't know the final symbol contents after renaming.
+    Real->CanInline = false;
+    Sym->CanInline = false;
+
+    // Tell LTO not to eliminate these symbols.
+    Sym->IsUsedInRegularObj = true;
+    Wrap->IsUsedInRegularObj = true;
+    Real->IsUsedInRegularObj = false;
+  }
+  return V;
+}
+
+// Do renaming for -wrap by updating pointers to symbols.
+//
+// When this function is executed, only InputFiles and symbol table
+// contain pointers to symbol objects. We visit them to replace pointers,
+// so that wrapped symbols are swapped as instructed by the command line.
+static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
+  DenseMap<Symbol *, Symbol *> Map;
+  for (const WrappedSymbol &W : Wrapped) {
+    Map[W.Sym] = W.Wrap;
+    Map[W.Real] = W.Sym;
+  }
+
+  // Update pointers in input files.
+  parallelForEach(Symtab->ObjectFiles, [&](InputFile *File) {
+    MutableArrayRef<Symbol *> Syms = File->getMutableSymbols();
+    for (size_t I = 0, E = Syms.size(); I != E; ++I)
+      if (Symbol *S = Map.lookup(Syms[I]))
+        Syms[I] = S;
+  });
+
+  // Update pointers in the symbol table.
+  for (const WrappedSymbol &W : Wrapped)
+    Symtab->wrap(W.Sym, W.Real, W.Wrap);
+}
+
 void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   WasmOptTable Parser;
   opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
@@ -628,6 +706,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   for (auto *Arg : Args.filtered(OPT_export))
     handleUndefined(Arg->getValue());
 
+  // Create wrapped symbols for -wrap option.
+  std::vector<WrappedSymbol> Wrapped = addWrappedSymbols(Args);
+
   // Do link-time optimization if given files are LLVM bitcode files.
   // This compiles bitcode files into real object files.
   Symtab->addCombinedLTOObject();
@@ -640,6 +721,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   if (errorCount())
     return;
 
+  // Apply symbol renames for -wrap.
+  if (!Wrapped.empty())
+    wrapSymbols(Wrapped);
+
   for (auto *Arg : Args.filtered(OPT_export)) {
     Symbol *Sym = Symtab->find(Arg->getValue());
     if (Sym && Sym->isDefined())
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index aedcb15ca3e24..7c8601e29f2a8 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -61,6 +61,8 @@ class InputFile {
 
   ArrayRef<Symbol *> getSymbols() const { return Symbols; }
 
+  MutableArrayRef<Symbol *> getMutableSymbols() { return Symbols; }
+
 protected:
   InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {}
   MemoryBufferRef MB;
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index 2b49bac5cf2f7..0c42e0e212333 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -108,6 +108,11 @@ void BitcodeCompiler::add(BitcodeFile &F) {
                             (R.Prevailing && Sym->isExported());
     if (R.Prevailing)
       undefine(Sym);
+
+    // We tell LTO to not apply interprocedural optimization for wrapped
+    // (with --wrap) symbols because otherwise LTO would inline them while
+    // their values are still not final.
+    R.LinkerRedefined = !Sym->CanInline;
   }
   checkError(LTOObj->add(std::move(F.Obj), Resols));
 }
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index b95d9ded5f64f..b6fe25d5d25ca 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -112,6 +112,9 @@ def version: F<"version">, HelpText<"Display the version number and exit">;
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;
 
+defm wrap: Eq<"wrap", "Use wrapper functions for symbol">,
+  MetaVarName<"<symbol>=<symbol>">;
+
 // The follow flags are unique to wasm
 
 def allow_undefined: F<"allow-undefined">,
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 244e24eb996d6..5328e9e784523 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -65,7 +65,8 @@ void SymbolTable::addCombinedLTOObject() {
 }
 
 void SymbolTable::reportRemainingUndefines() {
-  for (Symbol *Sym : SymVector) {
+  for (const auto& Pair : SymMap) {
+    const Symbol *Sym = SymVector[Pair.second];
     if (!Sym->isUndefined() || Sym->isWeak())
       continue;
     if (Config->AllowUndefinedSymbols.count(Sym->getName()) != 0)
@@ -104,6 +105,7 @@ std::pair<Symbol *, bool> SymbolTable::insertName(StringRef Name) {
 
   Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
   Sym->IsUsedInRegularObj = false;
+  Sym->CanInline = true;
   Sym->Traced = Trace;
   SymVector.emplace_back(Sym);
   return {Sym, true};
@@ -539,6 +541,19 @@ void SymbolTable::trace(StringRef Name) {
   SymMap.insert({CachedHashStringRef(Name), -1});
 }
 
+void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) {
+  // Swap symbols as instructed by -wrap.
+  int &OrigIdx = SymMap[CachedHashStringRef(Sym->getName())];
+  int &RealIdx= SymMap[CachedHashStringRef(Real->getName())];
+  int &WrapIdx = SymMap[CachedHashStringRef(Wrap->getName())];
+  LLVM_DEBUG(dbgs() << "wrap: " << Sym->getName() << "\n");
+
+  // Anyone looking up __real symbols should get the original
+  RealIdx = OrigIdx;
+  // Anyone looking up the original should get the __wrap symbol
+  OrigIdx = WrapIdx;
+}
+
 static const uint8_t UnreachableFn[] = {
     0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
     0x00 /* opcode unreachable */, 0x0b /* opcode end */
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index a35140df136e1..d6cb13b43f8a3 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -35,14 +35,11 @@ class InputSegment;
 // There is one add* function per symbol type.
 class SymbolTable {
 public:
+  void wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap);
+
   void addFile(InputFile *File);
-  void addCombinedLTOObject();
 
-  std::vector<ObjFile *> ObjectFiles;
-  std::vector<SharedFile *> SharedFiles;
-  std::vector<BitcodeFile *> BitcodeFiles;
-  std::vector<InputFunction *> SyntheticFunctions;
-  std::vector<InputGlobal *> SyntheticGlobals;
+  void addCombinedLTOObject();
 
   void reportRemainingUndefines();
 
@@ -87,6 +84,12 @@ class SymbolTable {
   void handleSymbolVariants();
   void handleWeakUndefines();
 
+  std::vector<ObjFile *> ObjectFiles;
+  std::vector<SharedFile *> SharedFiles;
+  std::vector<BitcodeFile *> BitcodeFiles;
+  std::vector<InputFunction *> SyntheticFunctions;
+  std::vector<InputGlobal *> SyntheticGlobals;
+
 private:
   std::pair<Symbol *, bool> insert(StringRef Name, const InputFile *File);
   std::pair<Symbol *, bool> insertName(StringRef Name);
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 0bac81ced05f6..3691b3f48b43b 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -114,6 +114,11 @@ class Symbol {
   // command line flag)
   unsigned ForceExport : 1;
 
+  // False if LTO shouldn't inline whatever this symbol points to. If a symbol
+  // is overwritten after LTO, LTO shouldn't inline the symbol because it
+  // doesn't know the final contents of the symbol.
+  unsigned CanInline : 1;
+
   // True if this symbol is specified by --trace-symbol option.
   unsigned Traced : 1;
 
@@ -131,8 +136,8 @@ class Symbol {
 
 protected:
   Symbol(StringRef Name, Kind K, uint32_t Flags, InputFile *F)
-      : IsUsedInRegularObj(false), ForceExport(false), Traced(false),
-        Name(Name), SymbolKind(K), Flags(Flags), File(F),
+      : IsUsedInRegularObj(false), ForceExport(false), CanInline(false),
+        Traced(false), Name(Name), SymbolKind(K), Flags(Flags), File(F),
         Referenced(!Config->GcSections) {}
 
   StringRef Name;
@@ -474,6 +479,7 @@ T *replaceSymbol(Symbol *S, ArgT &&... Arg) {
   T *S2 = new (S) T(std::forward<ArgT>(Arg)...);
   S2->IsUsedInRegularObj = SymCopy.IsUsedInRegularObj;
   S2->ForceExport = SymCopy.ForceExport;
+  S2->CanInline = SymCopy.CanInline;
   S2->Traced = SymCopy.Traced;
 
   // Print out a log message if --trace-symbol was specified.

From c0ea5b2d514d33fe045087976b2009c0462a610c Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Fri, 24 May 2019 14:24:25 +0000
Subject: [PATCH 0174/1176] gn build: Merge r361607

llvm-svn: 361640
---
 llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index 222978cc2aff3..f91e00f9b87df 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -161,6 +161,7 @@ static_library("CodeGen") {
     "StackMaps.cpp",
     "StackProtector.cpp",
     "StackSlotColoring.cpp",
+    "SwiftErrorValueTracking.cpp",
     "TailDuplication.cpp",
     "TailDuplicator.cpp",
     "TargetFrameLoweringImpl.cpp",

From aa7754cc9038b2c8a2bf1fb0e3a148f94cf4e8c5 Mon Sep 17 00:00:00 2001
From: Kevin Petit <kevin.petit@arm.com>
Date: Fri, 24 May 2019 14:53:52 +0000
Subject: [PATCH 0175/1176] [OpenCL] Add support for the
 cl_arm_integer_dot_product extensions

The specification is available in the Khronos OpenCL registry:

https://www.khronos.org/registry/OpenCL/extensions/arm/cl_arm_integer_dot_product.txt

Signed-off-by: Kevin Petit <kevin.petit@arm.com>
llvm-svn: 361641
---
 .../include/clang/Basic/OpenCLExtensions.def  |  6 +++
 clang/lib/Headers/opencl-c.h                  | 28 ++++++++++++
 .../CodeGenOpenCL/arm-integer-dot-product.cl  | 38 ++++++++++++++++
 .../SemaOpenCL/arm-integer-dot-product.cl     | 43 +++++++++++++++++++
 4 files changed, 115 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
 create mode 100644 clang/test/SemaOpenCL/arm-integer-dot-product.cl

diff --git a/clang/include/clang/Basic/OpenCLExtensions.def b/clang/include/clang/Basic/OpenCLExtensions.def
index 40ac88f60d97b..5536a6e8e4dfc 100644
--- a/clang/include/clang/Basic/OpenCLExtensions.def
+++ b/clang/include/clang/Basic/OpenCLExtensions.def
@@ -81,6 +81,12 @@ OPENCLEXT_INTERNAL(cl_clang_storage_class_specifiers, 100, ~0U)
 OPENCLEXT_INTERNAL(cl_amd_media_ops, 100, ~0U)
 OPENCLEXT_INTERNAL(cl_amd_media_ops2, 100, ~0U)
 
+// ARM OpenCL extensions
+OPENCLEXT_INTERNAL(cl_arm_integer_dot_product_int8, 120, ~0U)
+OPENCLEXT_INTERNAL(cl_arm_integer_dot_product_accumulate_int8, 120, ~0U)
+OPENCLEXT_INTERNAL(cl_arm_integer_dot_product_accumulate_int16, 120, ~0U)
+OPENCLEXT_INTERNAL(cl_arm_integer_dot_product_accumulate_saturate_int8, 120, ~0U)
+
 // Intel OpenCL extensions
 OPENCLEXT_INTERNAL(cl_intel_subgroups, 120, ~0U)
 OPENCLEXT_INTERNAL(cl_intel_subgroups_short, 120, ~0U)
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 0a36a84deb857..ba98fa6eb6c61 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -17033,6 +17033,34 @@ uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2);
 uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2);
 #endif // cl_amd_media_ops2
 
+#if defined(cl_arm_integer_dot_product_int8)
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : begin
+uint __ovld arm_dot(uchar4 a, uchar4 b);
+int __ovld arm_dot(char4 a, char4 b);
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : end
+#endif // defined(cl_arm_integer_dot_product_int8)
+
+#if defined(cl_arm_integer_dot_product_accumulate_int8)
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : begin
+uint __ovld arm_dot_acc(uchar4 a, uchar4 b, uint c);
+int __ovld arm_dot_acc(char4 a, char4 b, int c);
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : end
+#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
+
+#if defined(cl_arm_integer_dot_product_accumulate_int16)
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : begin
+uint __ovld arm_dot_acc(ushort2 a, ushort2 b, uint c);
+int __ovld arm_dot_acc(short2 a, short2 b, int c);
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : end
+#endif // defined(cl_arm_integer_dot_product_accumulate_int16)
+
+#if defined(cl_arm_integer_dot_product_accumulate_saturate_int8)
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : begin
+uint __ovld arm_dot_acc_sat(uchar4 a, uchar4 b, uint c);
+int __ovld arm_dot_acc_sat(char4 a, char4 b, int c);
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : end
+#endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8)
+
 // Disable any extensions we may have enabled previously.
 #pragma OPENCL EXTENSION all : disable
 
diff --git a/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
new file mode 100644
index 0000000000000..d1ab6aceac5cf
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/arm-integer-dot-product.cl
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -cl-std=CL1.2 -emit-llvm -o - -O0 | FileCheck %s
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
+void test_int8(uchar4 ua, uchar4 ub, char4 sa, char4 sb) {
+    uint ur = arm_dot(ua, ub);
+    // CHECK: call spir_func i32 @_Z7arm_dotDv4_hS_
+    int sr = arm_dot(sa, sb);
+    // CHECK: call spir_func i32 @_Z7arm_dotDv4_cS_
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
+void test_accumulate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+    uint ur = arm_dot_acc(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_hS_j
+    int sr = arm_dot_acc(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : enable
+void test_accumulate_int16(ushort2 ua, ushort2 ub, uint uc, short2 sa, short2 sb, int c) {
+    uint ur = arm_dot_acc(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_tS_j
+    int sr = arm_dot_acc(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z11arm_dot_accDv2_sS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : disable
+
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : enable
+void test_accumulate_saturate_int8(uchar4 ua, uchar4 ub, uint uc, char4 sa, char4 sb, int c) {
+    uint ur = arm_dot_acc_sat(ua, ub, uc);
+    // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_hS_j
+    int sr = arm_dot_acc_sat(sa, sb, c);
+    // CHECK: call spir_func i32 @_Z15arm_dot_acc_satDv4_cS_i
+}
+#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : disable
+
diff --git a/clang/test/SemaOpenCL/arm-integer-dot-product.cl b/clang/test/SemaOpenCL/arm-integer-dot-product.cl
new file mode 100644
index 0000000000000..d7219d7402a90
--- /dev/null
+++ b/clang/test/SemaOpenCL/arm-integer-dot-product.cl
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -finclude-default-header -verify -cl-std=CL1.2 -emit-llvm -o - -O0
+
+void test_negative() {
+    uchar4 ua8, ub8;
+    char4 sa8, sb8;
+    ushort2 ua16, ub16;
+    short2 sa16, sb16;
+    uint ur;
+    int sr;
+    ur = arm_dot(ua8, ub8); // expected-error{{no matching function for call to 'arm_dot'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_int8' to be enabled}}
+    sr = arm_dot(sa8, sb8); // expected-error{{no matching function for call to 'arm_dot'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_int8' to be enabled}}
+    ur = arm_dot_acc(ua8, ub8, ur); // expected-error{{no matching function for call to 'arm_dot_acc'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_int8' to be enabled}}
+    sr = arm_dot_acc(sa8, sb8, sr); // expected-error{{no matching function for call to 'arm_dot_acc'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_int8' to be enabled}}
+    ur = arm_dot_acc(ua16, ub16, ur); // expected-error{{no matching function for call to 'arm_dot_acc'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_int16' to be enabled}}
+    sr = arm_dot_acc(sa16, sb16, sr); // expected-error{{no matching function for call to 'arm_dot_acc'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_int16' to be enabled}}
+    ur = arm_dot_acc_sat(ua8, ub8, ur); // expected-error{{no matching function for call to 'arm_dot_acc_sat'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_saturate_int8' to be enabled}}
+    sr = arm_dot_acc_sat(sa8, sb8, sr); // expected-error{{no matching function for call to 'arm_dot_acc_sat'}}
+    // expected-note@opencl-c.h:* {{candidate function not viable}}
+    // expected-note@opencl-c.h:* {{candidate unavailable as it requires OpenCL extension 'cl_arm_integer_dot_product_accumulate_saturate_int8' to be enabled}}
+}
+

From c1cc8d0eca459a5f49e8e6b99a93b0e1d05a13ba Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Fri, 24 May 2019 15:04:50 +0000
Subject: [PATCH 0176/1176] [llvm-objcopy] - Strip undefined symbols if they
 are no longer referenced following --only-section

This is https://bugs.llvm.org/show_bug.cgi?id=40004.

In this patch I teach llvm-objcopy to remove undefined symbols if
them are not used anymore after applying -j/--only-section option.

Differential revision: https://reviews.llvm.org/D62317

llvm-svn: 361642
---
 .../ELF/only-section-strip-undefined.test     | 60 +++++++++++++++++++
 llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp    |  8 ++-
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/only-section-strip-undefined.test

diff --git a/llvm/test/tools/llvm-objcopy/ELF/only-section-strip-undefined.test b/llvm/test/tools/llvm-objcopy/ELF/only-section-strip-undefined.test
new file mode 100644
index 0000000000000..c9d21ac8b2ee2
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/only-section-strip-undefined.test
@@ -0,0 +1,60 @@
+## Here we want to check that llvm-objcopy removes an undefined symbol
+## if all references to it have been stripped.
+
+# RUN: yaml2obj --docnum=1 %s -o %t.o
+# RUN: llvm-objcopy -j .other.section %t.o %t2.o
+# RUN: llvm-readobj --symbols %t2.o | FileCheck %s --implicit-check-not=bar
+
+# RUN: llvm-objcopy -j .text -j .rela.text1 %t.o %t2.o
+# RUN: llvm-readobj --symbols %t2.o | FileCheck %s --check-prefix=BAR
+
+# BAR: bar
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:        .text
+    Type:        SHT_PROGBITS
+  - Name:        .rela.text1
+    Type:        SHT_RELA
+    Relocations:
+      - Offset:  0x0000000000000001
+        Symbol:  bar
+        Type:    R_X86_64_32
+  - Name:        .rela.text2
+    Type:        SHT_RELA
+    Relocations:
+      - Offset:  0x0000000000000001
+        Symbol:  bar
+        Type:    R_X86_64_32
+  - Name:        .other.section
+    Type:        SHT_PROGBITS
+Symbols:
+  - Name:    bar
+    Binding: STB_GLOBAL
+...
+
+## Check we remove unreferenced undefined symbols, even if
+## they weren't previously referenced. This follows GNU.
+
+# RUN: yaml2obj --docnum=2 %s -o %t.o
+# RUN: llvm-objcopy -j .keep_me %t.o %t2.o
+# RUN: llvm-readobj --symbols %t2.o | FileCheck %s --implicit-check-not=bar
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .keep_me
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    bar
+    Binding: STB_GLOBAL
+...
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index b2e750d15f03f..be25bd5ee4391 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -387,7 +387,8 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
   // The purpose of this loop is to mark symbols referenced by sections
   // (like GroupSection or RelocationSection). This way, we know which
   // symbols are still 'needed' and which are not.
-  if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty()) {
+  if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() ||
+      !Config.OnlySection.empty()) {
     for (auto &Section : Obj.sections())
       Section.markSymbols();
   }
@@ -415,6 +416,11 @@ static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
         isUnneededSymbol(Sym))
       return true;
 
+    // We want to remove undefined symbols if all references have been stripped.
+    if (!Config.OnlySection.empty() && !Sym.Referenced &&
+        Sym.getShndx() == SHN_UNDEF)
+      return true;
+
     return false;
   };
 

From fab7205ac454cb1459216f0524294aa4beb1b720 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Fri, 24 May 2019 15:11:45 +0000
Subject: [PATCH 0177/1176] [LibTooling] Add Explanation parameter to
 `makeRule`.

Summary:
Conceptually, a single-case RewriteRule has a matcher, edit(s) and an (optional)
explanation. `makeRule` previously only took the matcher and edit(s). This
change adds (optional) support for the explanation.

Reviewers: ilya-biryukov

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62390

llvm-svn: 361643
---
 clang/include/clang/Tooling/Refactoring/Transformer.h | 8 +++++---
 clang/lib/Tooling/Refactoring/Transformer.cpp         | 8 ++++----
 clang/unittests/Tooling/TransformerTest.cpp           | 3 +--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Tooling/Refactoring/Transformer.h b/clang/include/clang/Tooling/Refactoring/Transformer.h
index 4fecd2a4909e0..e66dd0550247e 100644
--- a/clang/include/clang/Tooling/Refactoring/Transformer.h
+++ b/clang/include/clang/Tooling/Refactoring/Transformer.h
@@ -125,14 +125,16 @@ struct RewriteRule {
 
 /// Convenience function for constructing a simple \c RewriteRule.
 RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M,
-                     SmallVector<ASTEdit, 1> Edits);
+                     SmallVector<ASTEdit, 1> Edits,
+                     TextGenerator Explanation = nullptr);
 
 /// Convenience overload of \c makeRule for common case of only one edit.
 inline RewriteRule makeRule(ast_matchers::internal::DynTypedMatcher M,
-                            ASTEdit Edit) {
+                            ASTEdit Edit,
+                            TextGenerator Explanation = nullptr) {
   SmallVector<ASTEdit, 1> Edits;
   Edits.emplace_back(std::move(Edit));
-  return makeRule(std::move(M), std::move(Edits));
+  return makeRule(std::move(M), std::move(Edits), std::move(Explanation));
 }
 
 /// Applies the first rule whose pattern matches; other rules are ignored.
diff --git a/clang/lib/Tooling/Refactoring/Transformer.cpp b/clang/lib/Tooling/Refactoring/Transformer.cpp
index bcbe00131074e..76573d6b59f69 100644
--- a/clang/lib/Tooling/Refactoring/Transformer.cpp
+++ b/clang/lib/Tooling/Refactoring/Transformer.cpp
@@ -96,10 +96,10 @@ ASTEdit tooling::change(RangeSelector S, TextGenerator Replacement) {
   return E;
 }
 
-RewriteRule tooling::makeRule(DynTypedMatcher M,
-                              SmallVector<ASTEdit, 1> Edits) {
-  return RewriteRule{
-      {RewriteRule::Case{std::move(M), std::move(Edits), nullptr}}};
+RewriteRule tooling::makeRule(DynTypedMatcher M, SmallVector<ASTEdit, 1> Edits,
+                              TextGenerator Explanation) {
+  return RewriteRule{{RewriteRule::Case{std::move(M), std::move(Edits),
+                                        std::move(Explanation)}}};
 }
 
 // Determines whether A is a base type of B in the class hierarchy, including
diff --git a/clang/unittests/Tooling/TransformerTest.cpp b/clang/unittests/Tooling/TransformerTest.cpp
index 0bf012a9fc552..41c7e7a28bd5a 100644
--- a/clang/unittests/Tooling/TransformerTest.cpp
+++ b/clang/unittests/Tooling/TransformerTest.cpp
@@ -147,8 +147,7 @@ static RewriteRule ruleStrlenSize() {
                                   on(expr(hasType(isOrPointsTo(StringType)))
                                          .bind(StringExpr)),
                                   callee(cxxMethodDecl(hasName("c_str")))))),
-      change(text("REPLACED")));
-  R.Cases[0].Explanation = text("Use size() method directly on string.");
+      change(text("REPLACED")), text("Use size() method directly on string."));
   return R;
 }
 

From dffedea014822de908c36b15fe3b784326bd6491 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <Alexander.Timofeev@amd.com>
Date: Fri, 24 May 2019 15:32:18 +0000
Subject: [PATCH 0178/1176] [AMDGPU] Divergence driven ISel. Assign register
 class for cross block values according to the divergence.

Details: To make instruction selection really divergence driven it is necessary to assign
         the correct register classes to the cross block values beforehand. For the divergent targets
         same value type requires different register classes dependent on the value divergence.

Reviewers: rampitec, nhaehnle

Differential Revision: https://reviews.llvm.org/D59990

llvm-svn: 361644
---
 .../llvm/CodeGen/FunctionLoweringInfo.h       |  11 +-
 llvm/include/llvm/CodeGen/SelectionDAG.h      |   1 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |  11 +-
 .../include/llvm/CodeGen/TargetRegisterInfo.h |   5 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   6 +-
 .../SelectionDAG/FunctionLoweringInfo.cpp     |  14 +-
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp |  33 ++--
 llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h  |   2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   4 +-
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |   2 +-
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp    | 142 ++++++++----------
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  91 ++++++++++-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   5 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  13 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |   5 +
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   4 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |   3 +-
 llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll    |  12 +-
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |   3 +-
 llvm/test/CodeGen/AMDGPU/branch-uniformity.ll |   4 +-
 .../AMDGPU/control-flow-fastregalloc.ll       |   7 +-
 .../divergent-branch-uniform-condition.ll     |  55 +++----
 .../AMDGPU/extract_subvector_vec4_vec3.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/fabs.ll              |  12 +-
 .../CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll   |  58 +++----
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |   8 +-
 llvm/test/CodeGen/AMDGPU/fneg-fabs.ll         |  16 +-
 llvm/test/CodeGen/AMDGPU/fsub.ll              |  12 +-
 llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll |  10 +-
 .../AMDGPU/i1-copy-phi-uniform-branch.ll      |   1 -
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll   |   2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll     |   2 +
 .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll     |   2 +-
 .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/loop_break.ll        |   8 +-
 llvm/test/CodeGen/AMDGPU/madak.ll             |  12 +-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |   5 +-
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll  |   5 +-
 llvm/test/CodeGen/AMDGPU/select-opt.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |   3 +-
 .../CodeGen/AMDGPU/si-fix-sgpr-copies.mir     |   2 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll              |   1 -
 .../AMDGPU/subreg-coalescer-undef-use.ll      |  53 ++++---
 .../AMDGPU/uniform-loop-inside-nonuniform.ll  |   5 +-
 .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll |   9 +-
 llvm/test/CodeGen/AMDGPU/valu-i1.ll           |   6 +-
 ...vgpr-spill-emergency-stack-slot-compute.ll |   1 +
 49 files changed, 413 insertions(+), 279 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index b3077fcaabd4f..fb60191abd3a0 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -13,7 +13,6 @@
 
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
-
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -21,6 +20,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,6 +57,7 @@ class FunctionLoweringInfo {
   const TargetLowering *TLI;
   MachineRegisterInfo *RegInfo;
   BranchProbabilityInfo *BPI;
+  const LegacyDivergenceAnalysis *DA;
   /// CanLowerReturn - true iff the function's return value can be lowered to
   /// registers.
   bool CanLowerReturn;
@@ -198,9 +199,11 @@ class FunctionLoweringInfo {
     return ValueMap.count(V);
   }
 
-  unsigned CreateReg(MVT VT);
+  unsigned CreateReg(MVT VT, bool isDivergent = false);
+
+  unsigned CreateRegs(const Value *V);
 
-  unsigned CreateRegs(Type *Ty);
+  unsigned CreateRegs(Type *Ty, bool isDivergent = false);
 
   unsigned InitializeRegForValue(const Value *V) {
     // Tokens never live in vregs.
@@ -209,7 +212,7 @@ class FunctionLoweringInfo {
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
     assert(VirtReg2Value.empty());
-    return R = CreateRegs(V->getType());
+    return R = CreateRegs(V);
   }
 
   /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 56dd1ccbb7309..8afd3b2df53c9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -406,6 +406,7 @@ class SelectionDAG {
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
   const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
   const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
+  const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
   LLVMContext *getContext() const {return Context; }
   OptimizationRemarkEmitter &getORE() const { return *ORE; }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b1a64744f0642..97537cf7d4b21 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -636,12 +636,21 @@ class TargetLoweringBase {
 
   /// Return the register class that should be used for the specified value
   /// type.
-  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
+    (void)isDivergent;
     const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
 
+  /// Allows target to decide about the register class of the
+  /// specific value that is live outside the defining block.
+  /// Returns true if the value needs uniform register class.
+  virtual bool requiresUniformRegister(MachineFunction &MF,
+                                       const Value *) const {
+    return false;
+  }
+
   /// Return the 'representative' register class for the specified value
   /// type.
   ///
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 5ed1e448575fc..7c65e7407d9e2 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -520,6 +520,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
   /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
   virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
 
+  /// Returns true if the register class is considered divergent.
+  virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
+    return false;
+  }
+
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 117654bc7a3f7..d53ee3134d550 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13919,9 +13919,11 @@ struct LoadedSlice {
     assert(DAG && "Missing context");
     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
     EVT ResVT = Use->getValueType(0);
-    const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+    const TargetRegisterClass *ResRC =
+        TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
     const TargetRegisterClass *ArgRC =
-        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+                           Use->getOperand(0)->isDivergent());
     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
       return false;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d8ef10f58aa7c..8b405562904f3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -85,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   RegInfo = &MF->getRegInfo();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   unsigned StackAlign = TFI->getStackAlignment();
+  DA = DAG->getDivergenceAnalysis();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -345,9 +346,9 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
   return RegInfo->createVirtualRegister(
-      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
 }
 
 /// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -357,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
 /// In the case that the given value has struct or array type, this function
 /// will assign registers for each member or element.
 ///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -370,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
 
     unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = CreateReg(RegisterVT);
+      unsigned R = CreateReg(RegisterVT, isDivergent);
       if (!FirstReg) FirstReg = R;
     }
   }
   return FirstReg;
 }
 
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+  return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+                                      DA->isDivergent(V));
+}
+
 /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
 /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
 /// the register's LiveOutInfo is for a smaller bit width, it is extended to
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 059e5f7c8dd33..4b78d1bb6b160 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
-    UseRC = TLI->getRegClassFor(VT);
+    UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
 
   if (!IsClone && !IsCloned)
     for (SDNode *User : Node->uses()) {
@@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
            "Incompatible phys register def and uses!");
     DstRC = UseRC;
   } else {
-    DstRC = TLI->getRegClassFor(VT);
+    DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
   }
 
   // If all uses are reading from the src physical register and copying the
@@ -225,8 +225,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
     // type correctly. For example, a 64-bit float (X86::FR64) can't live in
     // the 32-bit float super-class (X86::FR32).
     if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
-      const TargetRegisterClass *VTRC =
-        TLI->getRegClassFor(Node->getSimpleValueType(i));
+      const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+          Node->getSimpleValueType(i),
+          (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
       if (RC)
         VTRC = TRI->getCommonSubClass(RC, VTRC);
       if (VTRC)
@@ -289,8 +290,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
-      const TargetRegisterClass *RC =
-        TLI->getRegClassFor(Op.getSimpleValueType());
+      const TargetRegisterClass *RC = TLI->getRegClassFor(
+          Op.getSimpleValueType(), Op.getNode()->isDivergent());
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -395,11 +396,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     unsigned VReg = R->getReg();
     MVT OpVT = Op.getSimpleValueType();
-    const TargetRegisterClass *OpRC =
-        TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
     const TargetRegisterClass *IIRC =
         II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
            : nullptr;
+    const TargetRegisterClass *OpRC =
+        TLI->isTypeLegal(OpVT)
+            ? TLI->getRegClassFor(OpVT,
+                                  Op.getNode()->isDivergent() ||
+                                      (IIRC && TRI->isDivergentRegClass(IIRC)))
+            : nullptr;
 
     if (OpRC && IIRC && OpRC != IIRC &&
         TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -464,7 +469,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          MVT VT, const DebugLoc &DL) {
+                                          MVT VT, bool isDivergent, const DebugLoc &DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -479,7 +484,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
 
   // VReg couldn't be reasonably constrained.  Emit a COPY to a new virtual
   // register instead.
-  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
   assert(RC && "No legal register class for VT supports that SubIdx");
   unsigned NewReg = MRI->createVirtualRegister(RC);
   BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -514,7 +519,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     const TargetRegisterClass *TRC =
-      TLI->getRegClassFor(Node->getSimpleValueType(0));
+      TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
 
     unsigned Reg;
     MachineInstr *DefMI;
@@ -548,8 +553,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       if (TargetRegisterInfo::isVirtualRegister(Reg))
         Reg = ConstrainForSubReg(Reg, SubIdx,
                                  Node->getOperand(0).getSimpleValueType(),
-                                 Node->getDebugLoc());
-
+                                 Node->isDivergent(), Node->getDebugLoc());
       // Create the destreg if it is missing.
       if (VRBase == 0)
         VRBase = MRI->createVirtualRegister(TRC);
@@ -584,7 +588,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+    const TargetRegisterClass *SRC =
+        TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 3188c2678f1af..42f7846fe7c3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
-                              const DebugLoc &DL);
+                              bool isDivergent, const DebugLoc &DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 76e5847ba111d..a5274877ecee4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
         unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo.CreateRegs(C->getType());
+          RegOut = FuncInfo.CreateRegs(C);
           CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
@@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
           assert(isa<AllocaInst>(PHIOp) &&
                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo.CreateRegs(PHIOp->getType());
+          Reg = FuncInfo.CreateRegs(PHIOp);
           CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c9a1cd646ef3..6f55f98c51fd4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
               !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
             if (!R)
-              R = FuncInfo->CreateRegs(Inst->getType());
+              R = FuncInfo->CreateRegs(Inst);
           }
 
           bool HadTailCall = false;
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 94b1e636c7b15..cb24d1fe32bb1 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,18 +302,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
-static bool phiHasVGPROperands(const MachineInstr &PHI,
-                               const MachineRegisterInfo &MRI,
-                               const SIRegisterInfo *TRI,
-                               const SIInstrInfo *TII) {
-  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
-    unsigned Reg = PHI.getOperand(i).getReg();
-    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
-      return true;
-  }
-  return false;
-}
-
 static bool phiHasBreakDef(const MachineInstr &PHI,
                            const MachineRegisterInfo &MRI,
                            SmallSet<unsigned, 8> &Visited) {
@@ -338,16 +326,6 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
   return false;
 }
 
-static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
-                                          const TargetRegisterInfo &TRI) {
-  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
-       E = MBB.end(); I != E; ++I) {
-    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
-      return true;
-  }
-  return false;
-}
-
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -409,12 +387,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
   return false;
 }
 
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
-                                        const TargetRegisterInfo *TRI) {
-  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
-           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -621,63 +593,73 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
       case AMDGPU::PHI: {
-        unsigned Reg = MI.getOperand(0).getReg();
-        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
-          break;
-
-        // We don't need to fix the PHI if the common dominator of the
-        // two incoming blocks terminates with a uniform branch.
-        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
-        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
-          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
-          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
-          if (!predsHasDivergentTerminator(MBB0, TRI) &&
-              !predsHasDivergentTerminator(MBB1, TRI)) {
-            LLVM_DEBUG(dbgs()
-                       << "Not fixing PHI for uniform branch: " << MI << '\n');
+        unsigned hasVGPRUses = 0;
+        SetVector<const MachineInstr *> worklist;
+        worklist.insert(&MI);
+        while (!worklist.empty()) {
+          const MachineInstr *Instr = worklist.pop_back_val();
+          unsigned Reg = Instr->getOperand(0).getReg();
+          for (const auto &Use : MRI.use_operands(Reg)) {
+            const MachineInstr *UseMI = Use.getParent();
+            if (UseMI->isCopy() || UseMI->isRegSequence()) {
+              if (UseMI->isCopy() &&
+                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
+                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
+                hasVGPRUses++;
+              }
+              worklist.insert(UseMI);
+              continue;
+            }
+
+            if (UseMI->isPHI()) {
+              if (!TRI->isSGPRReg(MRI, Use.getReg()))
+                hasVGPRUses++;
+              continue;
+            }
+
+            unsigned OpNo = UseMI->getOperandNo(&Use);
+            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
+            if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
+              const TargetRegisterClass *OpRC =
+                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
+              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
+                  OpRC != &AMDGPU::VS_64RegClass) {
+                hasVGPRUses++;
+              }
+            }
+          }
+        }
+        bool hasVGPRInput = false;
+        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+          unsigned InputReg = MI.getOperand(i).getReg();
+          MachineInstr *Def = MRI.getVRegDef(InputReg);
+          if (TRI->isVGPR(MRI, InputReg)) {
+            if (Def->isCopy()) {
+              unsigned SrcReg = Def->getOperand(1).getReg();
+              const TargetRegisterClass *RC =
+                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
+                                                 : TRI->getPhysRegClass(SrcReg);
+              if (TRI->isSGPRClass(RC))
+                continue;
+            }
+            hasVGPRInput = true;
+            break;
+          } else if (Def->isCopy() &&
+                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
+            hasVGPRInput = true;
             break;
           }
         }
+        unsigned PHIRes = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        // If a PHI node defines an SGPR and any of its operands are VGPRs,
-        // then we need to move it to the VALU.
-        //
-        // Also, if a PHI node defines an SGPR and has all SGPR operands
-        // we must move it to the VALU, because the SGPR operands will
-        // all end up being assigned the same register, which means
-        // there is a potential for a conflict if different threads take
-        // different control flow paths.
-        //
-        // For Example:
-        //
-        // sgpr0 = def;
-        // ...
-        // sgpr1 = def;
-        // ...
-        // sgpr2 = PHI sgpr0, sgpr1
-        // use sgpr2;
-        //
-        // Will Become:
-        //
-        // sgpr2 = def;
-        // ...
-        // sgpr2 = def;
-        // ...
-        // use sgpr2
-        //
-        // The one exception to this rule is when one of the operands
-        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
-        // instruction.  In this case, there we know the program will
-        // never enter the second block (the loop) without entering
-        // the first block (where the condition is computed), so there
-        // is no chance for values to be over-written.
-
-        SmallSet<unsigned, 8> Visited;
-        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
-          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI, MDT);
+        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
+            (hasVGPRInput || hasVGPRUses > 1)) {
+          TII->moveToVALU(MI);
+        } else {
+          TII->legalizeOperands(MI, MDT);
         }
+
         break;
       }
       case AMDGPU::REG_SEQUENCE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c2cda5ef4d7ce..8f93c63046caf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9637,7 +9637,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
       break;
 
     MVT VT = Src0.getValueType().getSimpleVT();
-    const TargetRegisterClass *RC = getRegClassFor(VT);
+    const TargetRegisterClass *RC =
+        getRegClassFor(VT, Src0.getNode()->isDivergent());
 
     MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -10171,3 +10172,91 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
 
   return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
 }
+
+const TargetRegisterClass *
+SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
+  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
+    return &AMDGPU::SReg_64RegClass;
+  if (!TRI->isSGPRClass(RC) && !isDivergent)
+    return TRI->getEquivalentSGPRClass(RC);
+  else if (TRI->isSGPRClass(RC) && isDivergent)
+    return TRI->getEquivalentVGPRClass(RC);
+
+  return RC;
+}
+
+static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
+  if (Visited.count(V))
+    return false;
+  Visited.insert(V);
+  bool Result = false;
+  for (auto U : V->users()) {
+    if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
+      if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
+          (V == U->getOperand(1)))
+        Result = true;
+    } else {
+      Result = hasIfBreakUser(U, Visited);
+    }
+    if (Result)
+      break;
+  }
+  return Result;
+}
+
+bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
+                                               const Value *V) const {
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+    switch (Intrinsic->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::amdgcn_if_break:
+      return true;
+    }
+  }
+  if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
+    if (const IntrinsicInst *Intrinsic =
+            dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
+      switch (Intrinsic->getIntrinsicID()) {
+      default:
+        return false;
+      case Intrinsic::amdgcn_if:
+      case Intrinsic::amdgcn_else: {
+        ArrayRef<unsigned> Indices = ExtValue->getIndices();
+        if (Indices.size() == 1 && Indices[0] == 1) {
+          return true;
+        }
+      }
+      }
+    }
+  }
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (isa<InlineAsm>(CI->getCalledValue())) {
+      const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
+      ImmutableCallSite CS(CI);
+      TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
+          MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
+      for (auto &TC : TargetConstraints) {
+        if (TC.Type == InlineAsm::isOutput) {
+          ComputeConstraintToUse(TC, SDValue());
+          unsigned AssignedReg;
+          const TargetRegisterClass *RC;
+          std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
+              SIRI, TC.ConstraintCode,
+              getSimpleValueType(MF.getDataLayout(), CS.getType()));
+          if (RC) {
+            MachineRegisterInfo &MRI = MF.getRegInfo();
+            if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
+              return true;
+            else if (SIRI->isSGPRClass(RC))
+              return true;
+          }
+        }
+      }
+    }
+  }
+  SetVector<const Value *> Visited;
+  return hasIfBreakUser(V, Visited);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 60a474f51e5c4..094a0b054e235 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -367,7 +367,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                     bool SNaN = false,
                                     unsigned Depth = 0) const override;
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-
+  virtual const TargetRegisterClass *
+  getRegClassFor(MVT VT, bool isDivergent) const override;
+  virtual bool requiresUniformRegister(MachineFunction &MF,
+                                       const Value *V) const override;
   unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
 };
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e42ed3505cf5c..14f5dbe6ad496 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2219,6 +2219,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       // These come before src2.
       removeModOperands(UseMI);
       UseMI.setDesc(get(NewOpc));
+      // It might happen that UseMI was commuted
+      // and we now have SGPR as SRC1. If so 2 inlined
+      // constant and SGPR are illegal.
+      legalizeOperands(UseMI);
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -3913,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
     return;
 
   // Try to eliminate the copy if it is copying an immediate value.
-  if (Def->isMoveImmediate())
+  if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
     FoldImmediate(*Copy, *Def, OpReg, &MRI);
 }
 
@@ -4147,7 +4151,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
     if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
       if (!VRC) {
         assert(SRC);
-        VRC = RI.getEquivalentVGPRClass(SRC);
+       if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
+          VRC = &AMDGPU::VReg_1RegClass;
+        } else
+          VRC = RI.getEquivalentVGPRClass(SRC);
       }
       RC = VRC;
     } else {
@@ -5309,7 +5316,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
   case AMDGPU::INSERT_SUBREG:
   case AMDGPU::WQM:
   case AMDGPU::WWM:
-    if (RI.hasVGPRs(NewDstRC))
+    if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
       return nullptr;
 
     NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index bfdc1ef9645de..e2df3ae5ea7e9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -195,6 +195,11 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
                                                unsigned Reg) const;
   bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
 
+  virtual bool
+  isDivergentRegClass(const TargetRegisterClass *RC) const override {
+    return !isSGPRClass(RC);
+  }
+
   bool isSGPRPressureSet(unsigned SetID) const {
     return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
   }
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 643d2806c521e..fc735ae5d95f6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1429,7 +1429,9 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+const TargetRegisterClass *
+ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  (void)isDivergent;
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   // load / store 4 to 8 consecutive D registers.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 3b94cb0dcb0fa..8e254d75b1c30 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -456,7 +456,8 @@ class VectorType;
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+    const TargetRegisterClass *
+    getRegClassFor(MVT VT, bool isDivergent = false) const override;
 
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
index 3d457fdd50e81..454c56cbca5d0 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
@@ -5,11 +5,12 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_lds:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    ds_read_b32 v2, v0
+; GCN-NEXT:    ds_read_b32 v1, v0
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB0_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    v_not_b32_e32 v1, v2
 ; GCN-NEXT:    v_or_b32_e32 v1, -5, v1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -17,7 +18,6 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB0_1
@@ -33,11 +33,12 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_global:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    global_load_dword v3, v[0:1], off
+; GCN-NEXT:    global_load_dword v2, v[0:1], off
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB1_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -45,7 +46,6 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB1_1
@@ -61,11 +61,12 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_flat:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_load_dword v3, v[0:1]
+; GCN-NEXT:    flat_load_dword v2, v[0:1]
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB2_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -74,7 +75,6 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB2_1
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 45ed056567c2e..a2facaafb41f9 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -99,7 +99,7 @@ bb3:
 
 ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
 ; GCN: s_load_dword [[CND:s[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
+
 ; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
 ; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
@@ -117,6 +117,7 @@ bb3:
 ; GCN: v_nop_e64
 
 ; GCN: [[ENDBB]]:
+; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
 ; GCN: buffer_store_dword [[V_CND]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
index e6f684178035e..c9c801fb1911e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
@@ -8,8 +8,8 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: ; %LOOP49
-; CHECK: v_cmp_ne_u32_e32 vcc,
-; CHECK: s_cbranch_vccnz
+; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; CHECK: s_cbranch_scc1
 ; CHECK: ; %ENDIF53
 define amdgpu_vs float @main(i32 %in) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 41ecdd403d736..15e807a3e0230 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -89,7 +89,7 @@ endif:
 }
 
 ; GCN-LABEL: {{^}}divergent_loop:
-; VGPR: workitem_private_segment_byte_size = 16{{$}}
+; VGPR: workitem_private_segment_byte_size = 12{{$}}
 
 ; GCN: {{^}}; %bb.0:
 
@@ -123,10 +123,9 @@ endif:
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
 ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
-; GCN: v_cmp_ne_u32_e32 vcc,
-; GCN: s_and_b64 vcc, exec, vcc
+; GCN: s_cmp_lg_u32
 ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
+; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
 
 
 ; GCN: [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 8d21050ebee01..08a95ecbf5ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -13,55 +13,50 @@ define amdgpu_ps void @main(i32, float) {
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
 ; CHECK-NEXT:    s_mov_b32 m0, s0
-; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
 ; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; CHECK-NEXT:  BB0_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
-; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
-; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_cbranch_vccz BB0_5
+; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], exec
+; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
+; CHECK-NEXT:    s_mov_b64 s[6:7], -1
+; CHECK-NEXT:    s_cbranch_scc0 BB0_5
 ; CHECK-NEXT:  ; %bb.2: ; %endif1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_mov_b64 s[6:7], -1
-; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
-; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; CHECK-NEXT:    ; mask branch BB0_4
 ; CHECK-NEXT:  BB0_3: ; %endif2
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
+; CHECK-NEXT:    s_add_i32 s0, s0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], exec, -1
 ; CHECK-NEXT:  BB0_4: ; %Flow1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_branch BB0_6
-; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    ; implicit-def: $vgpr1
-; CHECK-NEXT:  BB0_6: ; %Flow
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[6:7], 0
+; CHECK-NEXT:  BB0_5: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[2:3]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[8:9]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz BB0_1
-; CHECK-NEXT:  ; %bb.7: ; %Flow2
+; CHECK-NEXT:  ; %bb.6: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; this is the divergent branch with the condition not marked as divergent
-; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3]
-; CHECK-NEXT:    ; mask branch BB0_9
-; CHECK-NEXT:  BB0_8: ; %if1
+; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[6:7]
+; CHECK-NEXT:    ; mask branch BB0_8
+; CHECK-NEXT:  BB0_7: ; %if1
 ; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  BB0_9: ; %endloop
+; CHECK-NEXT:  BB0_8: ; %endloop
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
 ; CHECK-NEXT:    s_endpgm
+; this is the divergent branch with the condition not marked as divergent
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index a39833455a153..fe8f31a0cd2ee 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -13,9 +13,9 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
   ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; GCN:   [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
   ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
-  ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
   ; GCN:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2
   ; GCN:   [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
   ; GCN:   [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index f96019dba6dcc..badaa16bbfcc5 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -48,8 +48,8 @@ define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: v_and_b32
-; GCN: v_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
 define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -62,10 +62,10 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: v_and_b32
-; GCN: v_and_b32
-; GCN: v_and_b32
-; GCN: v_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
 define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   store <4 x float> %fabs, <4 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index a3f176b3ef025..01499e681eafa 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -85,15 +85,15 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
 
 ; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -121,15 +121,15 @@ define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
@@ -156,15 +156,15 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
@@ -194,15 +194,15 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -231,8 +231,6 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
-; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -240,9 +238,12 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
+
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -273,8 +274,6 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
-; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -282,9 +281,12 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
+
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index ca80c4edbfb29..075115a2ee6cf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -33,9 +33,13 @@ define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(
 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
 
-; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
 
-; SI-SAFE: v_min_legacy_f32_e64 {{v[0-9]+}}, [[VB]], s[[A]]
+; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+
+; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
 
 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index 0ff5d9652c104..a621b04a346c0 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -4,7 +4,7 @@
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
 ; SI-NOT: and
-; SI: v_sub_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{s[0-9]+}}|
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
 define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
   %fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
 ; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{s[0-9]+}}|
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
 ; SI-NOT: and
 define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
@@ -85,8 +85,8 @@ define amdgpu_kernel void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrs
 
 ; FIXME: In this case two uses of the constant should be folded
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
 define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -96,10 +96,10 @@ define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
 define amdgpu_kernel void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 48647a2cdb898..6e4635ec43877 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -27,8 +27,8 @@ define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
 
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
   %sub = fsub <2 x float> %a, %b
   store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -55,10 +55,10 @@ define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x flo
 }
 
 ; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: s_endpgm
 define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
   %result = fsub <4 x float> %a, %b
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index ae78a1ecf3252..87c9a565f08b2 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -4,17 +4,11 @@
 ; SI-LABEL: {{^}}i1_copy_from_loop:
 ;
 ; SI: ; %for.body
-; SI:      v_cmp_gt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
-; SI-DAG:  s_andn2_b64       [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
-; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
-
-; SI: ; %Flow1
-; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], exec
+; SI:      v_cmp_lt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], s{{[0-9+]}}, 4
 
 ; SI: ; %Flow
 ; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
+; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
 ; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
 
 ; SI: ; %for.end
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
index 0aacbbfda182b..c65683d4fab61 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
@@ -7,7 +7,6 @@
 ; GCN:      s_cbranch_scc1  [[PREEXIT:BB[0-9_]+]]
 
 ; GCN: ; %blocka
-; GCN:      s_xor_b64       s[{{[0-9:]+}}], exec, -1
 ; GCN:      s_cmp_eq_u32    s1, 0
 ; GCN:      s_cbranch_scc1  [[EXIT:BB[0-9_]+]]
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 47e080a94baa4..2584f30573fdc 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -11,12 +11,12 @@
 
 ; GCN-LABEL: {{^}}insertelement_v4f32_0:
 ; GCN: s_load_dwordx4
+; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
+; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
+
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
-; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
 define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index 2a5e81a6dd6ae..60ec52c229bca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)*
 
 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
 ; SI-NOT: v0
-; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, v0, v0, v0
+; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
index c47d02f716bdb..05b074bfe2d41 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -53,8 +53,8 @@ define amdgpu_kernel void @test_fabs_fmed3(float addrspace(1)* %out, float %src0
 }
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
-; GCN: s_brev_b32 [[NEG0:s[0-9]+]], 1
-; GCN: v_med3_f32 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
+; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
 define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
@@ -88,8 +88,8 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out,
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_r_inv2pi_0_foldable_user:
 ; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN-DAG: s_mov_b32 [[NEG_INV:s[0-9]+]], 0xbe22f983
-; GCN: v_med3_f32 [[MED3:v[0-9]+]], -v{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
+; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
+; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 18ede50f40c0a..a7fb618c23430 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -42,6 +42,8 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
 ; VI-OPT: s_mov_b32
 ; VI-OPT: s_mov_b32
 ; VI-NOOPT: s_waitcnt
+; VI-NOOPT-NEXT: v_mov_b32_e32
+; VI-NOOPT-NEXT: s_nop 0
 ; VI-NOOPT-NEXT: s_nop 0
 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
 ; VI-OPT: s_nop 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index bc04f6f28f608..83bc8b2347245 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index 2cab9c28db374..1f46613a8db0d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index f37b3a3637a43..5c2ec5021f1a9 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -26,10 +26,9 @@
 ; GCN:      s_mov_b64         [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
-; GCN:      v_cmp_lt_i32_e32  vcc, -1
-; GCN:      s_and_b64         vcc, exec, vcc
-; GCN:      s_or_b64          [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
-; GCN:      s_cbranch_vccnz   [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN:     s_or_b64         [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
+; GCN:     s_cmp_gt_i32 s4, -1
+; GCN:     s_cbranch_scc1   [[FLOW:BB[0-9]+_[0-9]+]]
 
 ; GCN: ; %bb4
 ; GCN:      buffer_load_dword
@@ -39,6 +38,7 @@
 ; GCN:      s_or_b64          [[INNER_MASK]], [[INNER_MASK]], [[TMP0]]
 
 ; GCN: [[FLOW]]: ; %Flow
+; GCN:           ;   in Loop: Header=BB0_1 Depth=1
 ; GCN:      s_and_b64         [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
 ; GCN:      s_or_b64          [[TMP1]], [[TMP1]], [[OUTER_MASK]]
 ; GCN:      s_mov_b64         [[OUTER_MASK]], [[TMP1]]
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index eed0218766481..8e4b6806f98ae 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,MAD,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -17,6 +17,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+; GFX10-MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -79,6 +80,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
 ; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
 ; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
+; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -106,6 +108,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
+; GFX10-MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -234,9 +237,12 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
 ; On GFX10+ we can use two scalar operands.
 ; GCN-LABEL: {{^}}madak_constant_bus_violation:
 ; GCN:       s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
-; GCN:       v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+
 ; GCN:       {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]]
-; MAD:       v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; MAD:       v_mov_b32_e32 [[MADAK:v[0-9]+]], 0x42280000
+; MAD:       v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
+; GFX10:     v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; FMA:       v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; GCN:       v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
 ; GFX6:      buffer_store_dword [[MUL]]
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index e8ecf5e25abce..4822818e901af 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -155,8 +155,9 @@ entry:
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
 ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-
-; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
+; CHECK-O0: v_readlane_b32 s[[S1:[0-9]+]], v{{[0-9]+}}, 4
+; CHECK-O0: v_readlane_b32 s[[S2:[0-9]+]], v{{[0-9]+}}, 5
+; CHECK-O0: s_mov_b64 exec, s{{\[}}[[S1]]:[[S2]]{{\]}}
 ; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 4c1a769d59958..ddda7baef7412 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -96,7 +96,6 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN:      s_mov_b64          [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
 
 ; GCN: ; %LeafBlock1
-; GCN:      s_mov_b64
 ; GCN:      s_mov_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 
 ; GCN: ; %case1
@@ -109,8 +108,6 @@ ENDIF:                                            ; preds = %LOOP
 
 ; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
 
-; GCN: [[FLOW]]: ; %Flow
-
 ; GCN: ; %case0
 ; GCN:      buffer_load_dword  [[LOAD1:v[0-9]+]],
 ; GCN-DAG:  s_andn2_b64        [[BREAK]], [[BREAK]], exec
@@ -118,7 +115,7 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN-DAG:  s_and_b64          [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
 ; GCN:      s_or_b64           [[BREAK]], [[BREAK]], [[TMP]]
 
-; GCN: ; %Flow4
+; GCN: [[FLOW]]: ; %Flow4
 ; GCN:      s_and_b64          [[BREAK]], exec, [[BREAK]]
 ; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT]]
 ; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll
index f773357976cce..24df126e4cafc 100644
--- a/llvm/test/CodeGen/AMDGPU/select-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll
@@ -135,8 +135,8 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo
 
 ; GCN-LABEL: {{^}}regression:
 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
-; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
-; GCN: v_cmp_eq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
+; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
 
 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index e0971b8456fdc..3d5c3285cba71 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -104,7 +104,8 @@ endif:
 
 ; SI: ; %else
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
-; SI:      v_cmp_gt_i32_e64   [[PHI:s\[[0-9]+:[0-9]+\]]], 0, [[AVAL]]
+; SI:      v_cmp_gt_i32_e32   vcc, 0, [[AVAL]]
+; SI:      s_and_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], vcc, exec
 
 ; SI: ; %if
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index 3ec7a6678a9ed..03e81a0431c54 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -16,7 +16,7 @@ registers:
 
 body: |
   ; GCN-LABEL: name: phi_visit_order
-  ; GCN: V_ADD_I32
+  ; GCN: S_ADD_I32
   bb.0:
     liveins: $vgpr0
     %7 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index c83eb378a1e1f..904de8111fabf 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -571,7 +571,6 @@ main_body:
 ;
 ; TODO: we should keep the loop counter in an SGPR
 ;
-; GCN: v_readfirstlane_b32
 ; GCN: s_buffer_load_dword
 define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 80071e3407e9c..e7555a6703383 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -1,28 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
 ; Don't crash when the use of an undefined value is only detected by the
 ; register coalescer because it is hidden with subregister insert/extract.
 target triple="amdgcn--"
 
-; CHECK-LABEL: foobar:
-; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
-; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
-; CHECK-NEXT: s_mov_b32 s2, -1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
-
-; CHECK: BB0_1:
-; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
-; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-
-; CHECK: BB0_2:
-; CHECK: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_mov_b32 s3, 0xf000
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; CHECK-NEXT: s_endpgm
 define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
+; CHECK-LABEL: foobar:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CHECK-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    s_mov_b32 s2, -1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+
+; FIXME: The change related to the fact that
+; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
+; and hence it cannot derive the fact that the vector element is unused.
+; Such a copies appear because the float4 vectors and their elements in the test are uniform
+; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
+
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    v_mov_b32_e32 v2, s6
+; CHECK-NEXT:    v_mov_b32_e32 v3, s7
+
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT:    ; mask branch BB0_2
+; CHECK-NEXT:  BB0_1: ; %ift
+; CHECK-NEXT:    s_mov_b32 s4, s5
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    v_mov_b32_e32 v2, s6
+; CHECK-NEXT:    v_mov_b32_e32 v3, s7
+; CHECK-NEXT:  BB0_2: ; %ife
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_mov_b32 s3, 0xf000
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; CHECK-NEXT:    s_endpgm
 entry:
   %v0 = insertelement <4 x float> undef, float %a0, i32 0
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index 82283f39792ee..a1cf6cf630048 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -7,10 +7,9 @@
 ; CHECK: s_and_saveexec_b64
 ; CHECK-NEXT: ; mask branch
 ; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
-; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
 
-; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
-; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]]
+; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
+; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]]
 
 ; CHECK: s_endpgm
 define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index 50cf85e28ae13..fbf7364bfc4bb 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -226,13 +226,12 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addr
 ; GCN-LABEL: {{^}}test_s0_s1_k_f32:
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GCN-DAG: s_mov_b32 [[SK0:s[0-9]+]], 0x44800000
+; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
-; GCN-DAG: v_mov_b32_e32 [[VS0:v[0-9]+]], s[[SGPR0]]
 
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS0]], [[VS1]], [[SK0]]
-; GCN-DAG: s_mov_b32 [[SK1:s[0-9]+]], 0x45800000
-; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VS0]], [[VS1]], [[SK1]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK0]]
+; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK1]]
 
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 3a9970e78e38f..79a753cc046f9 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -165,8 +165,8 @@ exit:
 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
 ; SI: buffer_load_dword
 ; SI-DAG: buffer_store_dword
-; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
-; SI: s_cbranch_vccz [[LABEL_LOOP]]
+; SI-DAG: s_cmpk_eq_i32 s{{[0-9+]}}, 0x100
+; SI: s_cbranch_scc0 [[LABEL_LOOP]]
 ; SI: [[LABEL_EXIT]]:
 ; SI: s_endpgm
 
@@ -214,7 +214,7 @@ exit:
 ; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
+; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
 
 ; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
 ; SI: buffer_store_dword
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 0c52daca04738..b0e9171cbb007 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s

From 47fd4f06c55381e215b803850169377ac2bfe7a5 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Fri, 24 May 2019 16:16:15 +0000
Subject: [PATCH 0179/1176] [CodeComplete] Add whitespace around braces in
 lambda completions

This produces nicer output.
Trivial follow-up to r361461, so sending without review.

llvm-svn: 361645
---
 clang/lib/Sema/SemaCodeComplete.cpp   |  3 +++
 clang/test/CodeCompletion/lambdas.cpp | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 27e684252f51d..f5e5a84de7a40 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -4177,8 +4177,11 @@ static void AddLambdaCompletion(ResultBuilder &Results,
     };
     Completion.AddChunk(CodeCompletionString::CK_RightParen);
   }
+  Completion.AddChunk(clang::CodeCompletionString::CK_HorizontalSpace);
   Completion.AddChunk(CodeCompletionString::CK_LeftBrace);
+  Completion.AddChunk(CodeCompletionString::CK_HorizontalSpace);
   Completion.AddPlaceholderChunk("body");
+  Completion.AddChunk(CodeCompletionString::CK_HorizontalSpace);
   Completion.AddChunk(CodeCompletionString::CK_RightBrace);
 
   Results.AddResult(Completion.TakeString());
diff --git a/clang/test/CodeCompletion/lambdas.cpp b/clang/test/CodeCompletion/lambdas.cpp
index 1ab804ad818e6..05c47b8c2a4f9 100644
--- a/clang/test/CodeCompletion/lambdas.cpp
+++ b/clang/test/CodeCompletion/lambdas.cpp
@@ -9,17 +9,17 @@ void test() {
   function<void(int, double)> y = {};
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:7:28 %s -o - | FileCheck -check-prefix=CHECK-1 %s
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:9:35 %s -o - | FileCheck -check-prefix=CHECK-1 %s
-  // CHECK-1: COMPLETION: Pattern : [<#=#>](int <#parameter#>, double <#parameter#>){<#body#>}
+  // CHECK-1: COMPLETION: Pattern : [<#=#>](int <#parameter#>, double <#parameter#>) { <#body#> }
 
   // == Placeholders for suffix types must be placed properly.
   function<void(void(*)(int))> z = {};
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:15:36 %s -o - | FileCheck -check-prefix=CHECK-2 %s
-  // CHECK-2: COMPLETION: Pattern : [<#=#>](void (* <#parameter#>)(int)){<#body#>}
+  // CHECK-2: COMPLETION: Pattern : [<#=#>](void (* <#parameter#>)(int)) { <#body#> }
 
   // == No need for a parameter list if function has no parameters.
   function<void()> a = {};
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:20:24 %s -o - | FileCheck -check-prefix=CHECK-3 %s
-  // CHECK-3: COMPLETION: Pattern : [<#=#>]{<#body#>}
+  // CHECK-3: COMPLETION: Pattern : [<#=#>] { <#body#> }
 }
 
 template <class T, class Allocator = int>
@@ -33,7 +33,7 @@ void test2() {
   function_typedef b = {};
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:30:35 %s -o - | FileCheck -check-prefix=CHECK-4 %s
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:33:24 %s -o - | FileCheck -check-prefix=CHECK-4 %s
-  // CHECK-4: COMPLETION: Pattern : [<#=#>](vector<int> <#parameter#>){<#body#>}
+  // CHECK-4: COMPLETION: Pattern : [<#=#>](vector<int> <#parameter#>) { <#body#> }
 }
 
 // Check another common function wrapper name.
@@ -42,7 +42,7 @@ template <class T> struct unique_function {};
 void test3() {
   unique_function<void()> a = {};
   // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:43:31 %s -o - | FileCheck -check-prefix=CHECK-5 %s
-  // CHECK-5: COMPLETION: Pattern : [<#=#>]{<#body#>}
+  // CHECK-5: COMPLETION: Pattern : [<#=#>] { <#body#> }
 }
 
 template <class T, class U> struct weird_function {};

From 07745a131fa99931e83077e19cfaa4ae46e6c2bc Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Fri, 24 May 2019 16:21:38 +0000
Subject: [PATCH 0180/1176] [CMake] Fix issues building runtimes

This resolves two issues:
(1) LIBCXX_HEADER_DIR is a very misleadingly named variable because it shouldn't be set to the header directory, instead it needs to be the root binary dir.
(2) If you build runtimes without libcxx, we can't depend on the libcxx header target, so we should instaed refer to it by the variable name which will be unset if libcxx isn't present.

llvm-svn: 361646
---
 llvm/runtimes/CMakeLists.txt | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 9016efe600e79..acf4f3e3e57ae 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -210,7 +210,9 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
 else() # if this is included from LLVM's CMake
   include(LLVMExternalProjectUtils)
   if (LLVM_EXTERNAL_LIBCXX_SOURCE_DIR AND "libcxx" IN_LIST LLVM_ENABLE_RUNTIMES)
-    set(LIBCXX_HEADER_DIR ${LLVM_INCLUDE_DIR}/c++/v1/)
+    # This looks wrong, but libcxx's build actually wants the header dir to be
+    # the root build dir, not the include directory.
+    set(LIBCXX_HEADER_DIR ${LLVM_BINARY_DIR})
     set(CXX_HEADER_TARGET runtime-libcxx-headers)
     add_subdirectory(${LLVM_EXTERNAL_LIBCXX_SOURCE_DIR}/include ${CXX_HEADER_TARGET})
   endif()
@@ -363,7 +365,7 @@ else() # if this is included from LLVM's CMake
 
     llvm_ExternalProject_Add(runtimes
                              ${CMAKE_CURRENT_SOURCE_DIR}
-                             DEPENDS ${ARG_DEPENDS} runtime-libcxx-headers
+                             DEPENDS ${ARG_DEPENDS} ${CXX_HEADER_TARGET}
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
@@ -454,7 +456,7 @@ else() # if this is included from LLVM's CMake
 
     llvm_ExternalProject_Add(runtimes-${name}
                              ${CMAKE_CURRENT_SOURCE_DIR}
-                             DEPENDS ${${name}_deps} runtime-libcxx-headers
+                             DEPENDS ${${name}_deps} ${CXX_HEADER_TARGET}
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}

From 5b33554319cb3aeb88c43ecc6acbbef06a779190 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Fri, 24 May 2019 16:32:03 +0000
Subject: [PATCH 0181/1176] [clang-tidy] In TransformerClangTidyCheck, require
 Explanation field.

Summary:
In general, the `Explanation` field is optional in `RewriteRule` cases. But,
because the primary purpose of clang-tidy checks is to provide users with
diagnostics, we assume that a missing explanation is a bug.  This change adds an
assertion that checks all cases for an explanation, and updates the code to rely
on that assertion correspondingly.

Reviewers: ilya-biryukov

Subscribers: xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62340

llvm-svn: 361647
---
 .../utils/TransformerClangTidyCheck.cpp       | 25 +++++++++++++------
 .../utils/TransformerClangTidyCheck.h         | 10 +++++---
 .../TransformerClangTidyCheckTest.cpp         | 15 ++++++-----
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
index f142de4644184..12be8a6dce732 100644
--- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
@@ -13,6 +13,17 @@ namespace tidy {
 namespace utils {
 using tooling::RewriteRule;
 
+TransformerClangTidyCheck::TransformerClangTidyCheck(tooling::RewriteRule R,
+                                                     StringRef Name,
+                                                     ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context), Rule(std::move(R)) {
+  for (const auto &Case : Rule.Cases) {
+    assert(Case.Explanation != nullptr &&
+           "clang-tidy checks must have an explanation by default;"
+           " explicitly provide an empty explanation if none is desired");
+  }
+}
+
 void TransformerClangTidyCheck::registerMatchers(
     ast_matchers::MatchFinder *Finder) {
   Finder->addDynamicMatcher(tooling::detail::buildMatcher(Rule), this);
@@ -44,15 +55,13 @@ void TransformerClangTidyCheck::check(
   if (Transformations->empty())
     return;
 
-  StringRef Message = "no explanation";
-  if (Case.Explanation) {
-    if (Expected<std::string> E = Case.Explanation(Result))
-      Message = *E;
-    else
-      llvm::errs() << "Error in explanation: " << llvm::toString(E.takeError())
-                   << "\n";
+  Expected<std::string> Explanation = Case.Explanation(Result);
+  if (!Explanation) {
+    llvm::errs() << "Error in explanation: "
+                 << llvm::toString(Explanation.takeError()) << "\n";
+    return;
   }
-  DiagnosticBuilder Diag = diag(RootLoc, Message);
+  DiagnosticBuilder Diag = diag(RootLoc, *Explanation);
   for (const auto &T : *Transformations) {
     Diag << FixItHint::CreateReplacement(T.Range, T.Replacement);
   }
diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
index 6d0f86795bfdc..faf946ceb0feb 100644
--- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
+++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h
@@ -31,10 +31,14 @@ namespace utils {
 // };
 class TransformerClangTidyCheck : public ClangTidyCheck {
 public:
+  // All cases in \p R must have a non-null \c Explanation, even though \c
+  // Explanation is optional for RewriteRule in general. Because the primary
+  // purpose of clang-tidy checks is to provide users with diagnostics, we
+  // assume that a missing explanation is a bug.  If no explanation is desired,
+  // indicate that explicitly (for example, by passing `text("no explanation")`
+  //  to `makeRule` as the `Explanation` argument).
   TransformerClangTidyCheck(tooling::RewriteRule R, StringRef Name,
-                            ClangTidyContext *Context)
-      : ClangTidyCheck(Name, Context), Rule(std::move(R)) {}
-
+                            ClangTidyContext *Context);
   void registerMatchers(ast_matchers::MatchFinder *Finder) final;
   void check(const ast_matchers::MatchFinder::MatchResult &Result) final;
 
diff --git a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp
index 1426dbbc09b75..6b8763810e981 100644
--- a/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp
@@ -26,15 +26,18 @@ RewriteRule invertIf() {
   using tooling::change;
   using tooling::node;
   using tooling::statement;
+  using tooling::text;
   using tooling::stencil::cat;
 
   StringRef C = "C", T = "T", E = "E";
-  return tooling::makeRule(ifStmt(hasCondition(expr().bind(C)),
-                                  hasThen(stmt().bind(T)),
-                                  hasElse(stmt().bind(E))),
-                           change(statement(RewriteRule::RootID),
-                                  cat("if(!(", node(C), ")) ", statement(E),
-                                      " else ", statement(T))));
+  RewriteRule Rule = tooling::makeRule(
+      ifStmt(hasCondition(expr().bind(C)), hasThen(stmt().bind(T)),
+             hasElse(stmt().bind(E))),
+      change(
+          statement(RewriteRule::RootID),
+          cat("if(!(", node(C), ")) ", statement(E), " else ", statement(T))),
+      text("negate condition and reverse `then` and `else` branches"));
+  return Rule;
 }
 
 class IfInverterCheck : public TransformerClangTidyCheck {

From 6f7734a1255975125c1ceb9b14adfd17ee7be177 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 24 May 2019 16:46:09 +0000
Subject: [PATCH 0182/1176] [LoopVectorize] update test to be independent of
 instcombine; NFC

This is a regression test for vectorization, so remove instcombine
from the RUN line and adjust the comparison predicates to show what
the vectorizer is creating rather than how instcombine cleans it up.

llvm-svn: 361648
---
 .../LoopVectorize/minmax_reduction.ll         | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index fc2f8dcec2f02..f5d7f5ba24743 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -244,7 +244,7 @@ for.end:
 ; SGE -> SLT
 ; Turn this into a min reduction (select inputs are reversed).
 ; CHECK-LABEL: @sge_min_red(
-; CHECK: icmp slt <2 x i32>
+; CHECK: icmp sge <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp slt <2 x i32>
@@ -273,7 +273,7 @@ for.end:
 ; SLE -> SGT
 ; Turn this into a max reduction (select inputs are reversed).
 ; CHECK-LABEL: @sle_min_red(
-; CHECK: icmp sgt <2 x i32>
+; CHECK: icmp sle <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp sgt <2 x i32>
@@ -302,7 +302,7 @@ for.end:
 ; UGE -> ULT
 ; Turn this into a min reduction (select inputs are reversed).
 ; CHECK-LABEL: @uge_min_red(
-; CHECK: icmp ult <2 x i32>
+; CHECK: icmp uge <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ult <2 x i32>
@@ -331,7 +331,7 @@ for.end:
 ; ULE -> UGT
 ; Turn this into a max reduction (select inputs are reversed).
 ; CHECK-LABEL: @ule_min_red(
-; CHECK: icmp ugt <2 x i32>
+; CHECK: icmp ule <2 x i32>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: icmp ugt <2 x i32>
@@ -516,7 +516,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp fast ole <2 x float>
+; CHECK: fcmp fast ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
@@ -542,7 +542,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp fast olt <2 x float>
+; CHECK: fcmp fast uge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
@@ -568,7 +568,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp fast oge <2 x float>
+; CHECK: fcmp fast ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
@@ -594,7 +594,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp fast ogt <2 x float>
+; CHECK: fcmp fast ule <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast ogt <2 x float>
@@ -727,7 +727,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp fast oge <2 x float>
+; CHECK: fcmp fast ult <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
@@ -753,7 +753,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp fast ogt <2 x float>
+; CHECK: fcmp fast ule <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
@@ -779,7 +779,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp fast ole <2 x float>
+; CHECK: fcmp fast ugt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>
@@ -805,7 +805,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp fast olt <2 x float>
+; CHECK: fcmp fast uge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp fast olt <2 x float>

From 0ff901fba0ae5ca950a23c3b993240b62877f42f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 24 May 2019 16:52:35 +0000
Subject: [PATCH 0183/1176] AMDGPU: Boost inline threshold with addrspacecasted
 alloca arguments

This was skipping GetUnderlyingObject for nonprivate addresses, but an
alloca could also be found through an addrspacecast if it's flat.

llvm-svn: 361649
---
 llvm/lib/Target/AMDGPU/AMDGPUInline.cpp       |  7 +-
 .../AMDGPU/amdgpu-inline-alloca-argument.ll   | 70 +++++++++++++++++++
 2 files changed, 74 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 84419a147c82e..22c7c0d51f457 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -123,10 +123,11 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
   uint64_t AllocaSize = 0;
   SmallPtrSet<const AllocaInst *, 8> AIVisited;
   for (Value *PtrArg : CS.args()) {
-    Type *Ty = PtrArg->getType();
-    if (!Ty->isPointerTy() ||
-        Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+    PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
+    if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
+                Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
       continue;
+
     PtrArg = GetUnderlyingObject(PtrArg, DL);
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
       if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
diff --git a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll
new file mode 100644
index 0000000000000..e3154027c4661
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument.ll
@@ -0,0 +1,70 @@
+; RUN: opt -mtriple=amdgcn--amdhsa -S -amdgpu-inline -inline-threshold=0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+
+define void @use_flat_ptr_arg(float* nocapture %p) {
+entry:
+  %tmp1 = load float, float* %p, align 4
+  %div = fdiv float 1.000000e+00, %tmp1
+  %add0 = fadd float %div, 1.0
+  %add1 = fadd float %add0, 1.0
+  %add2 = fadd float %add1, 1.0
+  %add3 = fadd float %add2, 1.0
+  %add4 = fadd float %add3, 1.0
+  %add5 = fadd float %add4, 1.0
+  %add6 = fadd float %add5, 1.0
+  %add7 = fadd float %add6, 1.0
+  %add8 = fadd float %add7, 1.0
+  %add9 = fadd float %add8, 1.0
+  %add10 = fadd float %add9, 1.0
+  store float %add10, float* %p, align 4
+  ret void
+}
+
+define void @use_private_ptr_arg(float addrspace(5)* nocapture %p) {
+entry:
+  %tmp1 = load float, float addrspace(5)* %p, align 4
+  %div = fdiv float 1.000000e+00, %tmp1
+  %add0 = fadd float %div, 1.0
+  %add1 = fadd float %add0, 1.0
+  %add2 = fadd float %add1, 1.0
+  %add3 = fadd float %add2, 1.0
+  %add4 = fadd float %add3, 1.0
+  %add5 = fadd float %add4, 1.0
+  %add6 = fadd float %add5, 1.0
+  %add7 = fadd float %add6, 1.0
+  %add8 = fadd float %add7, 1.0
+  %add9 = fadd float %add8, 1.0
+  %add10 = fadd float %add9, 1.0
+  store float %add10, float addrspace(5)* %p, align 4
+  ret void
+}
+
+; Test that the inline threshold is boosted if called with an
+; addrspacecasted' alloca.
+; CHECK-LABEL: @test_inliner_flat_ptr(
+; CHECK: call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NOT: call
+; CHECK-NOT: call
+define amdgpu_kernel void @test_inliner_flat_ptr(float addrspace(1)* nocapture %a, i32 %n) {
+entry:
+  %pvt_arr = alloca [64 x float], align 4, addrspace(5)
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid
+  %tmp2 = load float, float addrspace(1)* %arrayidx, align 4
+  %add = add i32 %tid, 1
+  %arrayidx2 = getelementptr inbounds float, float addrspace(1)* %a, i32 %add
+  %tmp5 = load float, float addrspace(1)* %arrayidx2, align 4
+  %or = or i32 %tid, %n
+  %arrayidx5 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+  %arrayidx7 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or
+  %to.flat = addrspacecast float addrspace(5)* %arrayidx7 to float*
+  call void @use_private_ptr_arg(float addrspace(5)* %arrayidx7)
+  call void @use_flat_ptr_arg(float* %to.flat)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { noinline }
+attributes #1 = { nounwind readnone }

From 468724eed2911168f6b6913e83fc4d50e214d51f Mon Sep 17 00:00:00 2001
From: Shafik Yaghmour <syaghmour@apple.com>
Date: Fri, 24 May 2019 16:53:44 +0000
Subject: [PATCH 0184/1176] [ASTImporter] Call to HandleNameConflict in
 VisitRecordDecl mistakeningly using Name instead of SearchName

Summary:
https://reviews.llvm.org/D51633 added error handling to the ASTNodeImporter::VisitRecordDecl for the conflicting names case. This could lead to erroneous return of an error in that case since we should have been using SearchName. Name may be empty in the case where we find the name via D->getTypedefNameForAnonDecl()->getDeclName().

This fix is very similar to https://reviews.llvm.org/D59665

Differential Revision: https://reviews.llvm.org/D62352

llvm-svn: 361650
---
 clang/lib/AST/ASTImporter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 74a1887753c57..2b7470410f720 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -2585,7 +2585,7 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
     } // for
 
     if (!ConflictingDecls.empty() && SearchName) {
-      Name = Importer.HandleNameConflict(Name, DC, IDNS,
+      Name = Importer.HandleNameConflict(SearchName, DC, IDNS,
                                          ConflictingDecls.data(),
                                          ConflictingDecls.size());
       if (!Name)

From cebce0d49a997cdda62ee349e2a8763ddfbb84fe Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 24 May 2019 17:36:07 +0000
Subject: [PATCH 0185/1176] [WebAssembly] Use "linker" as linker shortname.

This is in line with other platforms.

Also, move the single statement methods into the header (also
in line with other platform).

Differential Revision: https://reviews.llvm.org/D62406

llvm-svn: 361651
---
 clang/lib/Driver/ToolChains/WebAssembly.cpp | 7 -------
 clang/lib/Driver/ToolChains/WebAssembly.h   | 7 ++++---
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index 657f686f8ae12..af6e856b9f9a2 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -22,9 +22,6 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;
 
-wasm::Linker::Linker(const ToolChain &TC)
-    : GnuTool("wasm::Linker", "lld", TC) {}
-
 /// Following the conventions in https://wiki.debian.org/Multiarch/Tuples,
 /// we remove the vendor field to form the multiarch triple.
 static std::string getMultiarchTriple(const Driver &D,
@@ -34,10 +31,6 @@ static std::string getMultiarchTriple(const Driver &D,
             TargetTriple.getOSAndEnvironmentName()).str();
 }
 
-bool wasm::Linker::isLinkJob() const { return true; }
-
-bool wasm::Linker::hasIntegratedCPP() const { return false; }
-
 std::string wasm::Linker::getLinkerPath(const ArgList &Args) const {
   const ToolChain &ToolChain = getToolChain();
   if (const Arg* A = Args.getLastArg(options::OPT_fuse_ld_EQ)) {
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h
index 75ae1fc5a09c4..8e4e545c98511 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -20,9 +20,10 @@ namespace wasm {
 
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
-  explicit Linker(const ToolChain &TC);
-  bool isLinkJob() const override;
-  bool hasIntegratedCPP() const override;
+  explicit Linker(const ToolChain &TC)
+      : GnuTool("wasm::Linker", "linker", TC) {}
+  bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
   std::string getLinkerPath(const llvm::opt::ArgList &Args) const;
   void ConstructJob(Compilation &C, const JobAction &JA,
                     const InputInfo &Output, const InputInfoList &Inputs,

From 4105882b87e5070d5918bec7b80157b9749180f8 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Fri, 24 May 2019 17:39:55 +0000
Subject: [PATCH 0186/1176] Add support for dumping Objective C AST declaration
 nodes to JSON.

llvm-svn: 361652
---
 clang/include/clang/AST/JSONNodeDumper.h |   13 +
 clang/lib/AST/JSONNodeDumper.cpp         |  159 +-
 clang/test/AST/ast-dump-decl-json.m      | 1704 ++++++++++++++++++++++
 3 files changed, 1872 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/AST/ast-dump-decl-json.m

diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index 8995a8aeb8054..8f7eabe114636 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -218,6 +218,19 @@ class JSONNodeDumper
   void VisitAccessSpecDecl(const AccessSpecDecl *ASD);
   void VisitFriendDecl(const FriendDecl *FD);
 
+  void VisitObjCIvarDecl(const ObjCIvarDecl *D);
+  void VisitObjCMethodDecl(const ObjCMethodDecl *D);
+  void VisitObjCTypeParamDecl(const ObjCTypeParamDecl *D);
+  void VisitObjCCategoryDecl(const ObjCCategoryDecl *D);
+  void VisitObjCCategoryImplDecl(const ObjCCategoryImplDecl *D);
+  void VisitObjCProtocolDecl(const ObjCProtocolDecl *D);
+  void VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D);
+  void VisitObjCImplementationDecl(const ObjCImplementationDecl *D);
+  void VisitObjCCompatibleAliasDecl(const ObjCCompatibleAliasDecl *D);
+  void VisitObjCPropertyDecl(const ObjCPropertyDecl *D);
+  void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D);
+  void VisitBlockDecl(const BlockDecl *D);
+
   void VisitDeclRefExpr(const DeclRefExpr *DRE);
   void VisitPredefinedExpr(const PredefinedExpr *PE);
   void VisitUnaryOperator(const UnaryOperator *UO);
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 4b207b606321c..524e94a516584 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -150,7 +150,15 @@ void JSONNodeDumper::Visit(const CXXCtorInitializer *Init) {
 }
 
 void JSONNodeDumper::Visit(const OMPClause *C) {}
-void JSONNodeDumper::Visit(const BlockDecl::Capture &C) {}
+
+void JSONNodeDumper::Visit(const BlockDecl::Capture &C) {
+  JOS.attribute("kind", "Capture");
+  attributeOnlyIfTrue("byref", C.isByRef());
+  attributeOnlyIfTrue("nested", C.isNested());
+  if (C.getVariable())
+    JOS.attribute("var", createBareDeclRef(C.getVariable()));
+}
+
 void JSONNodeDumper::Visit(const GenericSelectionExpr::ConstAssociation &A) {
   JOS.attribute("associationKind", A.getTypeSourceInfo() ? "case" : "default");
   attributeOnlyIfTrue("selected", A.isSelected());
@@ -215,9 +223,11 @@ llvm::json::Object JSONNodeDumper::createQualType(QualType QT, bool Desugar) {
 }
 
 llvm::json::Object JSONNodeDumper::createBareDeclRef(const Decl *D) {
-  llvm::json::Object Ret{
-      {"id", createPointerRepresentation(D)},
-      {"kind", (llvm::Twine(D->getDeclKindName()) + "Decl").str()}};
+  llvm::json::Object Ret{{"id", createPointerRepresentation(D)}};
+  if (!D)
+    return Ret;
+
+  Ret["kind"] = (llvm::Twine(D->getDeclKindName()) + "Decl").str();
   if (const auto *ND = dyn_cast<NamedDecl>(D))
     Ret["name"] = ND->getDeclName().getAsString();
   if (const auto *VD = dyn_cast<ValueDecl>(D))
@@ -645,6 +655,147 @@ void JSONNodeDumper::VisitFriendDecl(const FriendDecl *FD) {
     JOS.attribute("type", createQualType(T->getType()));
 }
 
+void JSONNodeDumper::VisitObjCIvarDecl(const ObjCIvarDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("type", createQualType(D->getType()));
+  attributeOnlyIfTrue("synthesized", D->getSynthesize());
+  switch (D->getAccessControl()) {
+  case ObjCIvarDecl::None: JOS.attribute("access", "none"); break;
+  case ObjCIvarDecl::Private: JOS.attribute("access", "private"); break;
+  case ObjCIvarDecl::Protected: JOS.attribute("access", "protected"); break;
+  case ObjCIvarDecl::Public: JOS.attribute("access", "public"); break;
+  case ObjCIvarDecl::Package: JOS.attribute("access", "package"); break;
+  }
+}
+
+void JSONNodeDumper::VisitObjCMethodDecl(const ObjCMethodDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("returnType", createQualType(D->getReturnType()));
+  JOS.attribute("instance", D->isInstanceMethod());
+  attributeOnlyIfTrue("variadic", D->isVariadic());
+}
+
+void JSONNodeDumper::VisitObjCTypeParamDecl(const ObjCTypeParamDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("type", createQualType(D->getUnderlyingType()));
+  attributeOnlyIfTrue("bounded", D->hasExplicitBound());
+  switch (D->getVariance()) {
+  case ObjCTypeParamVariance::Invariant:
+    break;
+  case ObjCTypeParamVariance::Covariant:
+    JOS.attribute("variance", "covariant");
+    break;
+  case ObjCTypeParamVariance::Contravariant:
+    JOS.attribute("variance", "contravariant");
+    break;
+  }
+}
+
+void JSONNodeDumper::VisitObjCCategoryDecl(const ObjCCategoryDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("interface", createBareDeclRef(D->getClassInterface()));
+  JOS.attribute("implementation", createBareDeclRef(D->getImplementation()));
+
+  llvm::json::Array Protocols;
+  for (const auto* P : D->protocols())
+    Protocols.push_back(createBareDeclRef(P));
+  if (!Protocols.empty())
+    JOS.attribute("protocols", std::move(Protocols));
+}
+
+void JSONNodeDumper::VisitObjCCategoryImplDecl(const ObjCCategoryImplDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("interface", createBareDeclRef(D->getClassInterface()));
+  JOS.attribute("categoryDecl", createBareDeclRef(D->getCategoryDecl()));
+}
+
+void JSONNodeDumper::VisitObjCProtocolDecl(const ObjCProtocolDecl *D) {
+  VisitNamedDecl(D);
+
+  llvm::json::Array Protocols;
+  for (const auto *P : D->protocols())
+    Protocols.push_back(createBareDeclRef(P));
+  if (!Protocols.empty())
+    JOS.attribute("protocols", std::move(Protocols));
+}
+
+void JSONNodeDumper::VisitObjCInterfaceDecl(const ObjCInterfaceDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("super", createBareDeclRef(D->getSuperClass()));
+  JOS.attribute("implementation", createBareDeclRef(D->getImplementation()));
+
+  llvm::json::Array Protocols;
+  for (const auto* P : D->protocols())
+    Protocols.push_back(createBareDeclRef(P));
+  if (!Protocols.empty())
+    JOS.attribute("protocols", std::move(Protocols));
+}
+
+void JSONNodeDumper::VisitObjCImplementationDecl(
+    const ObjCImplementationDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("super", createBareDeclRef(D->getSuperClass()));
+  JOS.attribute("interface", createBareDeclRef(D->getClassInterface()));
+}
+
+void JSONNodeDumper::VisitObjCCompatibleAliasDecl(
+    const ObjCCompatibleAliasDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("interface", createBareDeclRef(D->getClassInterface()));
+}
+
+void JSONNodeDumper::VisitObjCPropertyDecl(const ObjCPropertyDecl *D) {
+  VisitNamedDecl(D);
+  JOS.attribute("type", createQualType(D->getType()));
+
+  switch (D->getPropertyImplementation()) {
+  case ObjCPropertyDecl::None: break;
+  case ObjCPropertyDecl::Required: JOS.attribute("control", "required"); break;
+  case ObjCPropertyDecl::Optional: JOS.attribute("control", "optional"); break;
+  }
+  
+  ObjCPropertyDecl::PropertyAttributeKind Attrs = D->getPropertyAttributes();
+  if (Attrs != ObjCPropertyDecl::OBJC_PR_noattr) {
+    if (Attrs & ObjCPropertyDecl::OBJC_PR_getter)
+      JOS.attribute("getter", createBareDeclRef(D->getGetterMethodDecl()));
+    if (Attrs & ObjCPropertyDecl::OBJC_PR_setter)
+      JOS.attribute("setter", createBareDeclRef(D->getSetterMethodDecl()));
+    attributeOnlyIfTrue("readonly", Attrs & ObjCPropertyDecl::OBJC_PR_readonly);
+    attributeOnlyIfTrue("assign", Attrs & ObjCPropertyDecl::OBJC_PR_assign);
+    attributeOnlyIfTrue("readwrite",
+                        Attrs & ObjCPropertyDecl::OBJC_PR_readwrite);
+    attributeOnlyIfTrue("retain", Attrs & ObjCPropertyDecl::OBJC_PR_retain);
+    attributeOnlyIfTrue("copy", Attrs & ObjCPropertyDecl::OBJC_PR_copy);
+    attributeOnlyIfTrue("nonatomic",
+                        Attrs & ObjCPropertyDecl::OBJC_PR_nonatomic);
+    attributeOnlyIfTrue("atomic", Attrs & ObjCPropertyDecl::OBJC_PR_atomic);
+    attributeOnlyIfTrue("weak", Attrs & ObjCPropertyDecl::OBJC_PR_weak);
+    attributeOnlyIfTrue("strong", Attrs & ObjCPropertyDecl::OBJC_PR_strong);
+    attributeOnlyIfTrue("unsafe_unretained",
+                        Attrs & ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
+    attributeOnlyIfTrue("class", Attrs & ObjCPropertyDecl::OBJC_PR_class);
+    attributeOnlyIfTrue("nullability",
+                        Attrs & ObjCPropertyDecl::OBJC_PR_nullability);
+    attributeOnlyIfTrue("null_resettable",
+                        Attrs & ObjCPropertyDecl::OBJC_PR_null_resettable);
+  }
+}
+
+void JSONNodeDumper::VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D) {
+  VisitNamedDecl(D->getPropertyDecl());
+  JOS.attribute("implKind", D->getPropertyImplementation() ==
+                                    ObjCPropertyImplDecl::Synthesize
+                                ? "synthesize"
+                                : "dynamic");
+  JOS.attribute("propertyDecl", createBareDeclRef(D->getPropertyDecl()));
+  JOS.attribute("ivarDecl", createBareDeclRef(D->getPropertyIvarDecl()));
+}
+
+void JSONNodeDumper::VisitBlockDecl(const BlockDecl *D) {
+  attributeOnlyIfTrue("variadic", D->isVariadic());
+  attributeOnlyIfTrue("capturesThis", D->capturesCXXThis());
+}
+
 void JSONNodeDumper::VisitDeclRefExpr(const DeclRefExpr *DRE) {
   JOS.attribute("referencedDecl", createBareDeclRef(DRE->getDecl()));
   if (DRE->getDecl() != DRE->getFoundDecl())
diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m
new file mode 100644
index 0000000000000..1c816f3111bad
--- /dev/null
+++ b/clang/test/AST/ast-dump-decl-json.m
@@ -0,0 +1,1704 @@
+// RUN: %clang_cc1 -Wno-unused -fblocks -ast-dump=json -ast-dump-filter Test %s | FileCheck %s
+
+@protocol P
+@end
+
+@interface A
+@end
+
+@interface TestObjCIvarDecl : A
+@end
+
+@implementation TestObjCIvarDecl {
+  int varDefault;
+  @private int varPrivate;
+  @protected int varProtected;
+  @public int varPublic;
+  @package int varPackage;
+}
+@end
+
+@interface testObjCMethodDecl : A {
+}
+- (int) TestObjCMethodDecl: (int)i, ...;
+@end
+
+@implementation testObjCMethodDecl
+- (int) TestObjCMethodDecl: (int)i, ... {
+  return 0;
+}
+@end
+
+@protocol TestObjCProtocolDecl
+- (void) foo;
+@end
+
+@interface TestObjCClass : A <P>
+- (void) foo;
+@end
+
+@implementation TestObjCClass : A {
+  int i;
+}
+- (void) foo {
+}
+@end
+
+@interface TestObjCClass (TestObjCCategoryDecl) <P>
+- (void) bar;
+@end
+
+@interface TestGenericInterface<T> : A<P> {
+}
+@end
+
+@implementation TestObjCClass (TestObjCCategoryDecl)
+- (void) bar {
+}
+@end
+
+@compatibility_alias TestObjCCompatibleAliasDecl A;
+
+@interface TestObjCProperty: A
+@property(getter=getterFoo, setter=setterFoo:) int foo;
+@property int bar;
+@end
+
+@implementation TestObjCProperty {
+  int i;
+}
+@synthesize foo=i;
+@synthesize bar;
+@end
+
+void TestBlockDecl(int x) {
+  ^(int y, ...){ x; };
+}
+
+@interface B
++ (int) foo;
+@end
+
+void f() {
+  __typeof__(B.foo) Test;
+}
+
+// CHECK:  "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 12, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 9
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 9
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 10
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCIvarDecl", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "implementation": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:   "name": "TestObjCIvarDecl"
+// CHECK-NEXT:  }
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 17, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 12
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 12
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 19
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCIvarDecl", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "TestObjCIvarDecl"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 7, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 13
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 3, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 13
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 7, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 13
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "varDefault", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "private"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 16, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 14
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 12, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 14
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 16, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 14
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "varPrivate", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "private"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 18, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 15
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 14, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 15
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 18, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 15
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "varProtected", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "protected"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 15, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 16
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 11, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 16
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 16
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "varPublic", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "public"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 16, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 17
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 12, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 17
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 16, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 17
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "varPackage", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "package"
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 1, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 23
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 23
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 40, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 23
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCMethodDecl:", 
+// CHECK-NEXT:  "returnType": {
+// CHECK-NEXT:   "qualType": "int"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "instance": true, 
+// CHECK-NEXT:  "variadic": true, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ParmVarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 34, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 23
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 30, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 23
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 34, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 23
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "i", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 1, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 27
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 27
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 29
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCMethodDecl:", 
+// CHECK-NEXT:  "returnType": {
+// CHECK-NEXT:   "qualType": "int"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "instance": true, 
+// CHECK-NEXT:  "variadic": true, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:    "loc": {}, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {}, 
+// CHECK-NEXT:     "end": {}
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "self", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "testObjCMethodDecl *"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:    "loc": {}, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {}, 
+// CHECK-NEXT:     "end": {}
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "_cmd", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "desugaredQualType": "SEL *", 
+// CHECK-NEXT:     "qualType": "SEL"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ParmVarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 34, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 27
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 30, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 27
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 34, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 27
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "i", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "CompoundStmt", 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 41, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 27
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 29
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ReturnStmt", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 28
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 28
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "IntegerLiteral", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 10, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 28
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 10, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 28
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "valueCategory": "rvalue", 
+// CHECK-NEXT:        "value": "0"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCProtocolDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 11, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 32
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 32
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 34
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCProtocolDecl", 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 1, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 33
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 33
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 33
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "foo", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 12, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 36
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 36
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 38
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCClass", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "implementation": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:   "name": "TestObjCClass"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "protocols": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCProtocolDecl", 
+// CHECK-NEXT:    "name": "P"
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ], 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 1, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 37
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 37
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 37
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "foo", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 17, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 40
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 40
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 45
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCClass", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "TestObjCClass"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 7, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 41
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 3, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 41
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 7, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 41
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "i", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "private"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 1, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 43
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 43
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 44
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "foo", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:      "loc": {}, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {}, 
+// CHECK-NEXT:       "end": {}
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "isImplicit": true, 
+// CHECK-NEXT:      "name": "self", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "TestObjCClass *"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:      "loc": {}, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {}, 
+// CHECK-NEXT:       "end": {}
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "isImplicit": true, 
+// CHECK-NEXT:      "name": "_cmd", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "desugaredQualType": "SEL *", 
+// CHECK-NEXT:       "qualType": "SEL"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "CompoundStmt", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 14, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 43
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 1, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 44
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCCategoryDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 12, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 47
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 47
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 49
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCCategoryDecl", 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "TestObjCClass"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "implementation": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCCategoryImplDecl", 
+// CHECK-NEXT:   "name": "TestObjCCategoryDecl"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "protocols": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCProtocolDecl", 
+// CHECK-NEXT:    "name": "P"
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ], 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 1, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 48
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 48
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 48
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 12, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 51
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 51
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 53
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestGenericInterface", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "implementation": {
+// CHECK-NEXT:   "id": "0x{{.*}}"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "protocols": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCProtocolDecl", 
+// CHECK-NEXT:    "name": "P"
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ], 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCTypeParamDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 33, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 51
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 33, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 51
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 33, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 51
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "T", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "desugaredQualType": "id", 
+// CHECK-NEXT:     "qualType": "id"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCCategoryImplDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 17, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 55
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 55
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 58
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCCategoryDecl", 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "TestObjCClass"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "categoryDecl": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCCategoryDecl", 
+// CHECK-NEXT:   "name": "TestObjCCategoryDecl"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 1, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 56
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 56
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 57
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:      "loc": {}, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {}, 
+// CHECK-NEXT:       "end": {}
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "isImplicit": true, 
+// CHECK-NEXT:      "name": "self", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "TestObjCClass *"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:      "loc": {}, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {}, 
+// CHECK-NEXT:       "end": {}
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "isImplicit": true, 
+// CHECK-NEXT:      "name": "_cmd", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "desugaredQualType": "SEL *", 
+// CHECK-NEXT:       "qualType": "SEL"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "CompoundStmt", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 14, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 56
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 1, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 57
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCCompatibleAliasDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 1, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 60
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 60
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 60
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCCompatibleAliasDecl", 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 12, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 62
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 62
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 2, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 65
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCProperty", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "A"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "implementation": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:   "name": "TestObjCProperty"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCPropertyDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 52, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 63
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 52, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "foo", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "getter": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:     "name": "getterFoo"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "setter": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:     "name": "setterFoo:"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "assign": true, 
+// CHECK-NEXT:    "readwrite": true, 
+// CHECK-NEXT:    "atomic": true, 
+// CHECK-NEXT:    "unsafe_unretained": true
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCPropertyDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 15, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 64
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "assign": true, 
+// CHECK-NEXT:    "readwrite": true, 
+// CHECK-NEXT:    "atomic": true, 
+// CHECK-NEXT:    "unsafe_unretained": true
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 52, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 63
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 52, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 52, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "getterFoo", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 52, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 63
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 52, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 52, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 63
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "setterFoo:", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ParmVarDecl", 
+// CHECK-NEXT:      "loc": {
+// CHECK-NEXT:       "col": 52, 
+// CHECK-NEXT:       "file": "{{.*}}", 
+// CHECK-NEXT:       "line": 63
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 52, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 63
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 52, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 63
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "name": "foo", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "int"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 15, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 64
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCMethodDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 15, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 64
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 15, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 64
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isImplicit": true, 
+// CHECK-NEXT:    "name": "setBar:", 
+// CHECK-NEXT:    "returnType": {
+// CHECK-NEXT:     "qualType": "void"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "instance": true, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ParmVarDecl", 
+// CHECK-NEXT:      "loc": {
+// CHECK-NEXT:       "col": 15, 
+// CHECK-NEXT:       "file": "{{.*}}", 
+// CHECK-NEXT:       "line": 64
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 15, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 64
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 15, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 64
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "name": "bar", 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "int"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "ObjCImplementationDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 17, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 67
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 67
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 72
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCProperty", 
+// CHECK-NEXT:  "super": {
+// CHECK-NEXT:   "id": "0x{{.*}}"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "interface": {
+// CHECK-NEXT:   "id": "0x{{.*}}", 
+// CHECK-NEXT:   "kind": "ObjCInterfaceDecl", 
+// CHECK-NEXT:   "name": "TestObjCProperty"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 7, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 68
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 3, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 68
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 7, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 68
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "i", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "access": "private"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCPropertyImplDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 13, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 70
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 70
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 17, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 70
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "foo", 
+// CHECK-NEXT:    "implKind": "synthesize", 
+// CHECK-NEXT:    "propertyDecl": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCPropertyDecl", 
+// CHECK-NEXT:     "name": "foo"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "ivarDecl": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:     "name": "i", 
+// CHECK-NEXT:     "type": {
+// CHECK-NEXT:      "qualType": "int"
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 13, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 71
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 71
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 71
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "synthesized": true, 
+// CHECK-NEXT:    "access": "private"
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ObjCPropertyImplDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 13, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 71
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 71
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 13, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 71
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "name": "bar", 
+// CHECK-NEXT:    "implKind": "synthesize", 
+// CHECK-NEXT:    "propertyDecl": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCPropertyDecl", 
+// CHECK-NEXT:     "name": "bar"
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "ivarDecl": {
+// CHECK-NEXT:     "id": "0x{{.*}}", 
+// CHECK-NEXT:     "kind": "ObjCIvarDecl", 
+// CHECK-NEXT:     "name": "bar", 
+// CHECK-NEXT:     "type": {
+// CHECK-NEXT:      "qualType": "int"
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 74
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 74
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 76
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestBlockDecl", 
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (int)"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ParmVarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 24, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 74
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 20, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 74
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 24, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 74
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isUsed": true, 
+// CHECK-NEXT:    "name": "x", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "CompoundStmt", 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 27, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 74
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 76
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ExprWithCleanups", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 75
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 21, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 75
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void (^)(int, ...)"
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "valueCategory": "rvalue", 
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "BlockExpr", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 75
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 21, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 75
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (^)(int, ...)"
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "valueCategory": "rvalue", 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "BlockDecl", 
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3, 
+// CHECK-NEXT:           "file": "{{.*}}", 
+// CHECK-NEXT:           "line": 75
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 75
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 21, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 75
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "variadic": true, 
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "ParmVarDecl", 
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 9, 
+// CHECK-NEXT:             "file": "{{.*}}", 
+// CHECK-NEXT:             "line": 75
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 9, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "name": "y", 
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "kind": "Capture", 
+// CHECK-NEXT:            "var": {
+// CHECK-NEXT:             "id": "0x{{.*}}", 
+// CHECK-NEXT:             "kind": "ParmVarDecl", 
+// CHECK-NEXT:             "name": "x", 
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "CompoundStmt", 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 16, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 21, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}", 
+// CHECK-NEXT:              "kind": "ImplicitCastExpr", 
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 18, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 75
+// CHECK-NEXT:               }, 
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 18, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 75
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "valueCategory": "rvalue", 
+// CHECK-NEXT:              "castKind": "LValueToRValue", 
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}", 
+// CHECK-NEXT:                "kind": "DeclRefExpr", 
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 18, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 75
+// CHECK-NEXT:                 }, 
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 18, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 75
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "const int"
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "valueCategory": "lvalue", 
+// CHECK-NEXT:                "referencedDecl": {
+// CHECK-NEXT:                 "id": "0x{{.*}}", 
+// CHECK-NEXT:                 "kind": "ParmVarDecl", 
+// CHECK-NEXT:                 "name": "x", 
+// CHECK-NEXT:                 "type": {
+// CHECK-NEXT:                  "qualType": "int"
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "ParmVarDecl", 
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 9, 
+// CHECK-NEXT:             "file": "{{.*}}", 
+// CHECK-NEXT:             "line": 75
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 9, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 75
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "name": "y", 
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "VarDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 21, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 83
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 3, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 83
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 21, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 83
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "Test", 
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "desugaredQualType": "int", 
+// CHECK-NEXT:   "qualType": "typeof (B.foo)"
+// CHECK-NEXT:  }
+// CHECK-NEXT: }
+

From 21efe2afed7b743f37780f39b090af6145b4d527 Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Fri, 24 May 2019 17:40:52 +0000
Subject: [PATCH 0187/1176] [NewPassManager] Add tuning option: LoopUnrolling
 [clang-change]

Summary:
Use CodeGenOpts's setting for loop unrolling.
[to be coupled with D61618]

Reviewers: chandlerc

Subscribers: jlebar, dmgreen, cfe-commits, llvm-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61620

llvm-svn: 361653
---
 clang/lib/CodeGen/BackendUtil.cpp |  1 +
 clang/test/CodeGen/loop-unroll.c  | 55 +++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 clang/test/CodeGen/loop-unroll.c

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 1dbeec1c17833..c5e56c7a06914 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1051,6 +1051,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
   }
 
   PipelineTuningOptions PTO;
+  PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
   // For historical reasons, loop interleaving is set to mirror setting for loop
   // unrolling.
   PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
diff --git a/clang/test/CodeGen/loop-unroll.c b/clang/test/CodeGen/loop-unroll.c
new file mode 100644
index 0000000000000..c37411fa052bc
--- /dev/null
+++ b/clang/test/CodeGen/loop-unroll.c
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -funroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-UNROLL
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -fno-unroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-UNROLL
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -funroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-UNROLL
+// RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -fno-unroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-UNROLL
+
+// CHECK-ENABLE-UNROLL-LABEL: @for_test()
+// CHECK-ENABLE-UNROLL: br label %[[FORBODY:[a-z0-9_\.]+]]
+// CHECK-ENABLE-UNROLL: [[FORBODY]]:
+// CHECK-ENABLE-UNROLL: store
+// CHECK-ENABLE-UNROLL: store
+// CHECK-ENABLE-UNROLL: br i1 %[[EXITCOND:[a-z0-9_\.]+]], label %[[FORBODY5:[a-z0-9_\.]+]], label %[[FORBODY]]
+// CHECK-ENABLE-UNROLL: [[FORBODY5]]:
+// CHECK-ENABLE-UNROLL: fmul
+// CHECK-ENABLE-UNROLL: fadd
+// CHECK-ENABLE-UNROLL: store
+// CHECK-ENABLE-UNROLL: fmul
+// CHECK-ENABLE-UNROLL: fadd
+// CHECK-ENABLE-UNROLL: store
+// CHECK-ENABLE-UNROLL: fmul
+// CHECK-ENABLE-UNROLL: fadd
+// CHECK-ENABLE-UNROLL: store
+
+// CHECK-DISABLE-UNROLL-LABEL: @for_test()
+// CHECK-DISABLE-UNROLL: br label %[[FORBODY:[a-z0-9_\.]+]]
+// CHECK-DISABLE-UNROLL: [[FORBODY]]:
+// CHECK-DISABLE-UNROLL: store
+// CHECK-DISABLE-UNROLL-NOT: store
+// CHECK-DISABLE-UNROLL: br i1 %[[EXITCOND:[a-z0-9_\.]+]], label %[[FORBODY5:[a-z0-9_\.]+]], label %[[FORBODY]]
+// CHECK-DISABLE-UNROLL: [[FORBODY5]]:
+// CHECK-DISABLE-UNROLL: fmul
+// CHECK-DISABLE-UNROLL: fadd
+// CHECK-DISABLE-UNROLL: store
+// CHECK-DISABLE-UNROLL: fmul
+// CHECK-DISABLE-UNROLL: fadd
+// CHECK-DISABLE-UNROLL: store
+// CHECK-DISABLE-UNROLL-NOT: fmul
+// CHECK-DISABLE-UNROLL-NOT: fadd
+// CHECK-DISABLE-UNROLL-NOT: store
+
+int printf(const char * restrict format, ...);
+
+void for_test() {
+  double A[1000], B[1000];
+  int L = 500;
+  for (int i = 0; i < L; i++) {
+    A[i] = i;
+  }
+  for (int i = 0; i < L; i++) {
+    B[i] = A[i]*5;
+    B[i]++;
+    A[i] *= 7;
+    A[i]++;
+  }
+  printf("%lf %lf\n", A[0], B[0]);
+}

From 3d59e388ca252615beb573768015d32526fd1d56 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 24 May 2019 18:18:51 +0000
Subject: [PATCH 0188/1176] AMDGPU: Activate all lanes when spilling CSR VGPR
 for SGPR spills

If some lanes weren't active on entry to the function, this could
clobber their VGPR values.

llvm-svn: 361655
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    | 92 +++++++++++++------
 llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll |  2 +-
 .../AMDGPU/call-preserved-registers.ll        |  8 +-
 .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 26 ++++--
 .../AMDGPU/callee-special-input-vgprs.ll      |  2 +-
 .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 34 +++++--
 llvm/test/CodeGen/AMDGPU/nested-calls.ll      | 12 ++-
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      | 13 ++-
 8 files changed, 135 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d2dd3491f8600..1eea77be6200e 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -523,22 +523,20 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
 // but we would then have to make sure that we were in fact saving at least one
 // callee-save register in the prologue, which is additional complexity that
 // doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
-  MachineFunction *MF = MBB.getParent();
-
-  const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
+static unsigned findScratchNonCalleeSaveRegister(MachineFunction &MF,
+                                                 LivePhysRegs &LiveRegs,
+                                                 const TargetRegisterClass &RC) {
+  const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
-  LivePhysRegs LiveRegs(TRI);
-  LiveRegs.addLiveIns(MBB);
 
   // Mark callee saved registers as used so we will not choose them.
-  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
+  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
   for (unsigned i = 0; CSRegs[i]; ++i)
     LiveRegs.addReg(CSRegs[i]);
 
-  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  for (unsigned Reg : AMDGPU::SReg_32_XM0RegClass) {
+  for (unsigned Reg : RC) {
     if (LiveRegs.available(MRI, Reg))
       return Reg;
   }
@@ -561,6 +559,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
   unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
+  LivePhysRegs LiveRegs;
 
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc DL;
@@ -578,7 +577,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
 
     RoundedSize += Alignment;
 
-    unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(MBB);
+    LiveRegs.init(TRI);
+    LiveRegs.addLiveIns(MBB);
+
+    unsigned ScratchSPReg
+      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                         AMDGPU::SReg_32_XM0RegClass);
     assert(ScratchSPReg != AMDGPU::NoRegister);
 
     // s_add_u32 tmp_reg, s32, NumBytes
@@ -609,13 +613,33 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-         : FuncInfo->getSGPRSpillVGPRs()) {
-    if (!Reg.FI.hasValue())
-      continue;
-    TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
-                             Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                             &TII->getRegisterInfo());
+  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
+    if (LiveRegs.empty()) {
+      LiveRegs.init(TRI);
+      LiveRegs.addLiveIns(MBB);
+    }
+
+    // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+    // turn on all lanes before doing the spill to memory.
+    unsigned ScratchExecCopy
+      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                         AMDGPU::SReg_64_XEXECRegClass);
+
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+      .addImm(-1);
+
+    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+           : FuncInfo->getSGPRSpillVGPRs()) {
+      if (!Reg.FI.hasValue())
+        continue;
+      TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
+                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                               &TII->getRegisterInfo());
+    }
+
+    // FIXME: Split block and make terminator.
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+      .addReg(ScratchExecCopy);
   }
 }
 
@@ -628,14 +652,32 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc DL;
 
-  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-         : FuncInfo->getSGPRSpillVGPRs()) {
-    if (!Reg.FI.hasValue())
-      continue;
-    TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
-                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                              &TII->getRegisterInfo());
+  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
+    // See emitPrologue
+    LivePhysRegs LiveRegs(*ST.getRegisterInfo());
+    LiveRegs.addLiveIns(MBB);
+
+    unsigned ScratchExecCopy
+      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                         AMDGPU::SReg_64_XEXECRegClass);
+
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+      .addImm(-1);
+
+    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+           : FuncInfo->getSGPRSpillVGPRs()) {
+      if (!Reg.FI.hasValue())
+        continue;
+      TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
+                                Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                                &TII->getRegisterInfo());
+    }
+
+    // FIXME: Split block and make terminator.
+    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+      .addReg(ScratchExecCopy);
   }
 
   unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
@@ -645,8 +687,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   uint32_t NumBytes = MFI.getStackSize();
 
-  DebugLoc DL;
-
   // FIXME: Clarify distinction between no set SP and SP. For callee functions,
   // it's really whether we need SP to be accurate or not.
 
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index c4b2561a8f291..cd1ce13eb16b9 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -30,11 +30,11 @@ entry:
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
 ; GCN: s_mov_b32 s5, s32
+; GCN: s_add_u32 s32, s32, 0xc00{{$}}
 ; GCN-DAG: buffer_store_dword v32
 ; GCN-DAG: buffer_store_dword v33
 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
 ; GCN-DAG: v_writelane_b32
-; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
 ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
 ; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index ee18d322914bd..2fef190f8ccab 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -38,8 +38,8 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
 ; GCN-NEXT: ;;#ASMSTART
 ; GCN-NEXT: ;;#ASMEND
 ; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN: v_readlane_b32 s37, v32, 4
+; GCN-DAG: s_mov_b32 s5, s33
+; GCN-DAG: v_readlane_b32 s37, v32, 4
 ; GCN: v_readlane_b32 s36, v32, 3
 ; GCN: v_readlane_b32 s35, v32, 2
 ; GCN: v_readlane_b32 s34, v32, 1
@@ -59,7 +59,7 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
 ; GCN-NEXT: s_mov_b32 s5, s33
 ; GCN-NEXT: s_mov_b32 s33, s5
 ; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
+; GCN: s_mov_b32 s5, s33
 define void @test_func_call_external_void_funcx2() #0 {
   call void @external_void_func_void()
   call void @external_void_func_void()
@@ -175,7 +175,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_v32(i32 addrspace(
 ; GCN-NEXT: ; clobber
 ; GCN-NEXT: #ASMEND
 ; GCN-NEXT:	v_readlane_b32 s33, v0, 0
-; GCN-NEXT: s_setpc_b64
+; GCN: s_setpc_b64
 define hidden void @void_func_void_clobber_s33() #2 {
   call void asm sideeffect "; clobber", "~{s33}"() #0
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index daec4930e6795..ebd6f96a5b836 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -37,19 +37,19 @@ define void @callee_with_stack() #0 {
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
 ; GCN: s_mov_b32 s5, s32
+; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
 
 ; GCN-DAG: v_writelane_b32 v32, s33,
 ; GCN-DAG: v_writelane_b32 v32, s34,
 ; GCN-DAG: v_writelane_b32 v32, s35,
-; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
 ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
 ; GCN-DAG: s_mov_b32 s33, s5
 
 
 ; GCN: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN-DAG: s_mov_b32 s5, s33
 ; GCN-DAG: v_readlane_b32 s35,
 ; GCN-DAG: v_readlane_b32 s34,
 ; GCN-DAG: v_readlane_b32 s33,
@@ -72,7 +72,9 @@ define void @callee_with_stack_and_call() #0 {
 ; GCN-LABEL: {{^}}callee_no_stack_with_call:
 ; GCN: s_waitcnt
 ; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
 ; GCN-DAG: v_writelane_b32 v32, s33, 0
 ; GCN-DAG: v_writelane_b32 v32, s34, 1
 ; GCN: s_mov_b32 s33, s5
@@ -81,9 +83,12 @@ define void @callee_with_stack_and_call() #0 {
 
 ; GCN-DAG: v_readlane_b32 s34, v32, 1
 ; GCN-DAG: v_readlane_b32 s33, v32, 0
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
-; GCN: s_sub_u32 s32, s32, 0x400
 
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
+; GCN: s_sub_u32 s32, s32, 0x400
 ; GCN: s_setpc_b64
 define void @callee_no_stack_with_call() #0 {
   call void @external_void_func_void()
@@ -94,11 +99,18 @@ declare void @external_void_func_void() #0
 
 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored
 ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
+
 ; GCN: v_writelane_b32 v32
 ; GCN: ;;#ASMSTART
 ; GCN: v_readlane_b32 s{{[0-9]+}}, v32
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 750a0203c9bfc..c63d96917d914 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -326,8 +326,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
 
 ; Requires loading and storing to stack slot.
 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
 ; GCN: s_add_u32 s32, s32, 0x400{{$}}
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
 
 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 10573461b3795..4b38fb8e68d92 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -28,10 +28,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b32 s5, s32
+; GCN-NEXT:    s_add_u32 s32, s32, 0x400
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    v_writelane_b32 v32, s33, 0
 ; GCN-NEXT:    v_writelane_b32 v32, s34, 1
-; GCN-NEXT:    s_add_u32 s32, s32, 0x400
 ; GCN-NEXT:    v_writelane_b32 v32, s35, 2
 ; GCN-NEXT:    s_getpc_b64 s[6:7]
 ; GCN-NEXT:    s_add_u32 s6, s6, func_v2f32@rel32@lo+4
@@ -39,12 +41,14 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
 ; GCN-NEXT:    s_mov_b64 s[34:35], s[30:31]
 ; GCN-NEXT:    s_mov_b32 s33, s5
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    s_mov_b64 s[30:31], s[34:35]
 ; GCN-NEXT:    v_readlane_b32 s35, v32, 2
+; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    v_readlane_b32 s34, v32, 1
 ; GCN-NEXT:    v_readlane_b32 s33, v32, 0
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x400
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -62,10 +66,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b32 s5, s32
+; GCN-NEXT:    s_add_u32 s32, s32, 0x400
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    v_writelane_b32 v32, s33, 0
 ; GCN-NEXT:    v_writelane_b32 v32, s34, 1
-; GCN-NEXT:    s_add_u32 s32, s32, 0x400
 ; GCN-NEXT:    v_writelane_b32 v32, s35, 2
 ; GCN-NEXT:    s_getpc_b64 s[6:7]
 ; GCN-NEXT:    s_add_u32 s6, s6, func_v3f32@rel32@lo+4
@@ -73,12 +79,14 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
 ; GCN-NEXT:    s_mov_b64 s[34:35], s[30:31]
 ; GCN-NEXT:    s_mov_b32 s33, s5
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    s_mov_b64 s[30:31], s[34:35]
 ; GCN-NEXT:    v_readlane_b32 s35, v32, 2
+; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    v_readlane_b32 s34, v32, 1
 ; GCN-NEXT:    v_readlane_b32 s33, v32, 0
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x400
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -96,10 +104,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b32 s5, s32
+; GCN-NEXT:    s_add_u32 s32, s32, 0x400
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    v_writelane_b32 v32, s33, 0
 ; GCN-NEXT:    v_writelane_b32 v32, s34, 1
-; GCN-NEXT:    s_add_u32 s32, s32, 0x400
 ; GCN-NEXT:    v_writelane_b32 v32, s35, 2
 ; GCN-NEXT:    s_getpc_b64 s[6:7]
 ; GCN-NEXT:    s_add_u32 s6, s6, func_v4f16@rel32@lo+4
@@ -107,12 +117,14 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
 ; GCN-NEXT:    s_mov_b64 s[34:35], s[30:31]
 ; GCN-NEXT:    s_mov_b32 s33, s5
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    s_mov_b64 s[30:31], s[34:35]
 ; GCN-NEXT:    v_readlane_b32 s35, v32, 2
+; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    v_readlane_b32 s34, v32, 1
 ; GCN-NEXT:    v_readlane_b32 s33, v32, 0
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x400
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -130,10 +142,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
 ; GCN:       ; %bb.0: ; %bb0
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b32 s5, s32
+; GCN-NEXT:    s_add_u32 s32, s32, 0x400
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    v_writelane_b32 v32, s33, 0
 ; GCN-NEXT:    v_writelane_b32 v32, s34, 1
-; GCN-NEXT:    s_add_u32 s32, s32, 0x400
 ; GCN-NEXT:    v_writelane_b32 v32, s35, 2
 ; GCN-NEXT:    s_getpc_b64 s[6:7]
 ; GCN-NEXT:    s_add_u32 s6, s6, func_struct@rel32@lo+4
@@ -141,13 +155,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
 ; GCN-NEXT:    s_mov_b64 s[34:35], s[30:31]
 ; GCN-NEXT:    s_mov_b32 s33, s5
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    s_mov_b64 s[30:31], s[34:35]
 ; GCN-NEXT:    v_readlane_b32 s35, v32, 2
+; GCN-NEXT:    s_mov_b32 s5, s33
 ; GCN-NEXT:    v_readlane_b32 s34, v32, 1
+; GCN-NEXT:    v_mov_b32_e32 v1, v4
 ; GCN-NEXT:    v_readlane_b32 s33, v32, 0
+; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
 ; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    v_mov_b32_e32 v1, v4
+; GCN-NEXT:    s_mov_b64 exec, s[6:7]
 ; GCN-NEXT:    s_sub_u32 s32, s32, 0x400
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 7fbcb9706a891..66e6988fbe692 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -10,9 +10,12 @@ declare void @external_void_func_i32(i32) #0
 ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
 ; GCN: s_waitcnt
 ; GCN: s_mov_b32 s5, s32
-; Spill CSR VGPR used for SGPR spilling
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
 ; GCN-DAG: s_add_u32 s32, s32, 0x400
+; Spill CSR VGPR used for SGPR spilling
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
+
 ; GCN-DAG: v_writelane_b32 v32, s33, 0
 ; GCN-DAG: v_writelane_b32 v32, s34, 1
 ; GCN-DAG: v_writelane_b32 v32, s35, 2
@@ -22,7 +25,10 @@ declare void @external_void_func_i32(i32) #0
 ; GCN: v_readlane_b32 s35, v32, 2
 ; GCN: v_readlane_b32 s34, v32, 1
 ; GCN: v_readlane_b32 s33, v32, 0
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
 ; GCN: s_sub_u32 s32, s32, 0x400
 ; GCN: s_setpc_b64
 define void @test_func_call_external_void_func_i32_imm() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 1881b526dcda8..ba0acbc2573a9 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -207,13 +207,17 @@ entry:
 ; Have another non-tail in the function
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call:
 ; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:12
+; GCN: s_add_u32 s32, s32, 0x400
+
+; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
+; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:12
+; GCN-NEXT: s_mov_b64 exec
+
 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
 ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
 ; GCN-DAG: v_writelane_b32 v34, s33, 0
 ; GCN-DAG: v_writelane_b32 v34, s34, 1
 ; GCN-DAG: v_writelane_b32 v34, s35, 2
-; GCN-DAG: s_add_u32 s32, s32, 0x400
 
 ; GCN-DAG: s_getpc_b64
 ; GCN: s_swappc_b64
@@ -228,7 +232,10 @@ entry:
 
 ; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4
 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
-; GCN: buffer_load_dword v34, off, s[0:3], s5 offset:12
+; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
+; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:12
+; GCN-NEXT: s_mov_b64 exec
+
 ; GCN: s_sub_u32 s32, s32, 0x400
 ; GCN: s_setpc_b64 s[6:7]
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {

From 56bee1a90a71876cb5067b108bf5715fa1c4e843 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 24 May 2019 18:25:49 +0000
Subject: [PATCH 0189/1176] [COFF] Replace OutputSection* with uint16_t index
 in Chunk

Shaves another 8 bytes off of SectionChunk, the most commonly allocated
type in LLD.

These indices are only valid after we've assigned chunks to output
sections and removed empty sections, so do that in a new pass.

Reviewers: ruiu, aganea

Differential Revision: https://reviews.llvm.org/D62356

llvm-svn: 361657
---
 lld/COFF/Chunks.cpp         |  5 ++--
 lld/COFF/Chunks.h           | 12 ++++++----
 lld/COFF/PDB.cpp            |  2 +-
 lld/COFF/Writer.cpp         | 48 +++++++++++++++++++++++++++----------
 lld/test/COFF/strtab-size.s |  2 +-
 5 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index b016ac1e86d35..5af3b52eda93d 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -53,7 +53,7 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
 // SectionChunk is one of the most frequently allocated classes, so it is
 // important to keep it as compact as possible. As of this writing, the number
 // below is the size of this class on x64 platforms.
-static_assert(sizeof(SectionChunk) <= 104, "SectionChunk grew unexpectedly");
+static_assert(sizeof(SectionChunk) <= 96, "SectionChunk grew unexpectedly");
 
 static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
 static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
@@ -858,7 +858,7 @@ uint8_t Baserel::getDefaultType() {
 MergeChunk *MergeChunk::Instances[Log2MaxSectionAlignment + 1] = {};
 
 MergeChunk::MergeChunk(uint32_t Alignment)
-    : Builder(StringTableBuilder::RAW, Alignment) {
+    : Chunk(OtherKind), Builder(StringTableBuilder::RAW, Alignment) {
   setAlignment(Alignment);
 }
 
@@ -886,7 +886,6 @@ void MergeChunk::assignSubsectionRVAs() {
     if (!C->Live)
       continue;
     size_t Off = Builder.getOffset(toStringRef(C->getContents()));
-    C->setOutputSection(Out);
     C->setRVA(RVA + Off);
   }
 }
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 672003b31f98f..619a4886cd931 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -103,8 +103,9 @@ class Chunk {
 
   // An output section has pointers to chunks in the section, and each
   // chunk has a back pointer to an output section.
-  void setOutputSection(OutputSection *O) { Out = O; }
-  OutputSection *getOutputSection() const { return Out; }
+  void setOutputSectionIdx(uint16_t O) { OSIdx = O; }
+  uint16_t getOutputSectionIdx() const { return OSIdx; }
+  OutputSection *getOutputSection() const;
 
   // Windows-specific.
   // Collect all locations that contain absolute addresses for base relocations.
@@ -125,11 +126,12 @@ class Chunk {
   // value.
   uint8_t P2Align = 0;
 
+  // The output section index for this chunk. The first valid section number is
+  // one.
+  uint16_t OSIdx = 0;
+
   // The RVA of this chunk in the output. The writer sets a value.
   uint32_t RVA = 0;
-
-  // The output section for this chunk.
-  OutputSection *Out = nullptr;
 };
 
 // A chunk corresponding a section of an input file.
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index f237485c0bbe3..242235154d05f 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -1030,7 +1030,7 @@ void PDBLinker::mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap,
 static ArrayRef<uint8_t> relocateDebugChunk(BumpPtrAllocator &Alloc,
                                             SectionChunk &DebugChunk) {
   uint8_t *Buffer = Alloc.Allocate<uint8_t>(DebugChunk.getSize());
-  assert(DebugChunk.getOutputSection() == nullptr &&
+  assert(DebugChunk.getOutputSectionIdx() == 0 &&
          "debug sections should not be in output sections");
   DebugChunk.writeTo(Buffer);
   return makeArrayRef(Buffer, DebugChunk.getSize());
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 19614058a2b69..0e6cc9de853f6 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -78,6 +78,14 @@ static_assert(DOSStubSize % 8 == 0, "DOSStub size must be multiple of 8");
 
 static const int NumberOfDataDirectory = 16;
 
+// Global vector of all output sections. After output sections are finalized,
+// this can be indexed by Chunk::getOutputSection.
+static std::vector<OutputSection *> OutputSections;
+
+OutputSection *Chunk::getOutputSection() const {
+  return OSIdx == 0 ? nullptr : OutputSections[OSIdx - 1];
+}
+
 namespace {
 
 class DebugDirectoryChunk : public Chunk {
@@ -192,6 +200,7 @@ class Writer {
   void assignAddresses();
   void finalizeAddresses();
   void removeEmptySections();
+  void assignOutputSectionIndices();
   void createSymbolAndStringTable();
   void openFile(StringRef OutputPath);
   template <typename PEHeaderTy> void writeHeader();
@@ -225,7 +234,6 @@ class Writer {
 
   std::unique_ptr<FileOutputBuffer> &Buffer;
   std::map<PartialSectionKey, PartialSection *> PartialSections;
-  std::vector<OutputSection *> OutputSections;
   std::vector<char> Strtab;
   std::vector<llvm::object::coff_symbol16> OutputSymtab;
   IdataContents Idata;
@@ -284,12 +292,10 @@ void writeResult() { Writer().run(); }
 
 void OutputSection::addChunk(Chunk *C) {
   Chunks.push_back(C);
-  C->setOutputSection(this);
 }
 
 void OutputSection::insertChunkAtStart(Chunk *C) {
   Chunks.insert(Chunks.begin(), C);
-  C->setOutputSection(this);
 }
 
 void OutputSection::setPermissions(uint32_t C) {
@@ -298,8 +304,6 @@ void OutputSection::setPermissions(uint32_t C) {
 }
 
 void OutputSection::merge(OutputSection *Other) {
-  for (Chunk *C : Other->Chunks)
-    C->setOutputSection(this);
   Chunks.insert(Chunks.end(), Other->Chunks.begin(), Other->Chunks.end());
   Other->Chunks.clear();
   ContribSections.insert(ContribSections.end(), Other->ContribSections.begin(),
@@ -444,7 +448,6 @@ static bool createThunks(OutputSection *OS, int Margin) {
         Chunk *ThunkChunk = Thunk->getChunk();
         ThunkChunk->setRVA(
             ThunkInsertionRVA); // Estimate of where it will be located.
-        ThunkChunk->setOutputSection(OS);
         OS->Chunks.insert(OS->Chunks.begin() + ThunkInsertionSpot, ThunkChunk);
         ThunkInsertionSpot++;
         ThunksSize += ThunkChunk->getSize();
@@ -595,6 +598,7 @@ void Writer::run() {
   removeUnusedSections();
   finalizeAddresses();
   removeEmptySections();
+  assignOutputSectionIndices();
   setSectionPermissions();
   createSymbolAndStringTable();
 
@@ -1000,9 +1004,26 @@ void Writer::removeEmptySections() {
   OutputSections.erase(
       std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty),
       OutputSections.end());
+}
+
+void Writer::assignOutputSectionIndices() {
+  // Assign final output section indices, and assign each chunk to its output
+  // section.
   uint32_t Idx = 1;
-  for (OutputSection *Sec : OutputSections)
-    Sec->SectionIndex = Idx++;
+  for (OutputSection *OS : OutputSections) {
+    OS->SectionIndex = Idx;
+    for (Chunk *C : OS->Chunks)
+      C->setOutputSectionIdx(Idx);
+    ++Idx;
+  }
+
+  // Merge chunks are containers of chunks, so assign those an output section
+  // too.
+  for (MergeChunk *MC : MergeChunk::Instances)
+    if (MC)
+      for (SectionChunk *SC : MC->Sections)
+        if (SC && SC->Live)
+          SC->setOutputSectionIdx(MC->getOutputSectionIdx());
 }
 
 size_t Writer::addEntryToStringTable(StringRef Str) {
@@ -1463,9 +1484,9 @@ static void maybeAddAddressTakenFunction(SymbolRVASet &AddressTakenSyms,
     // section.
     auto *D = cast<DefinedRegular>(S);
     if (D->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
-      Chunk *RefChunk = D->getChunk();
-      OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr;
-      if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE)
+      SectionChunk *SC = dyn_cast<SectionChunk>(D->getChunk());
+      if (SC && SC->Live &&
+          SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE)
         addSymbolToRVASet(AddressTakenSyms, D);
     }
     break;
@@ -1744,8 +1765,9 @@ void Writer::sortExceptionTable() {
     return;
   // We assume .pdata contains function table entries only.
   auto BufAddr = [&](Chunk *C) {
-    return Buffer->getBufferStart() + C->getOutputSection()->getFileOff() +
-           C->getRVA() - C->getOutputSection()->getRVA();
+    OutputSection *OS = C->getOutputSection();
+    return Buffer->getBufferStart() + OS->getFileOff() + C->getRVA() -
+           OS->getRVA();
   };
   uint8_t *Begin = BufAddr(FirstPdata);
   uint8_t *End = BufAddr(LastPdata) + LastPdata->getSize();
diff --git a/lld/test/COFF/strtab-size.s b/lld/test/COFF/strtab-size.s
index 91e4c7e012dbe..574b1a0265ba6 100644
--- a/lld/test/COFF/strtab-size.s
+++ b/lld/test/COFF/strtab-size.s
@@ -13,7 +13,7 @@
 # or disk full, cannot seek to 0x1602").
 
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck %s
-# CHECK: SymbolCount: 199
+# CHECK: SymbolCount: 197
 
 .global main
 .text

From 93d2c8a646c50f5b8be4d212c23ecc4533465aa5 Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Date: Fri, 24 May 2019 18:48:42 +0000
Subject: [PATCH 0190/1176] [OpenMP] Add test for requires and unified shared
 memory clause with declare target link

Summary:
This patch adds a test for requires with unified share memory clause when a declare target link is present.

This test needs to go in prior to changes to declare target link for comparison purposes.

Reviewers: ABataev, caomhin

Reviewed By: ABataev

Subscribers: guansong, jdoerfert, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62407

llvm-svn: 361658
---
 ..._target_requires_unified_shared_memory.cpp | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp

diff --git a/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
new file mode 100644
index 0000000000000..6ce2ffb45e3cc
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
@@ -0,0 +1,67 @@
+// Test declare target link under unified memory requirement.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#define N 1000
+
+double var = 10.0;
+
+#pragma omp requires unified_shared_memory
+#pragma omp declare target link(var)
+
+int bar(int n){
+  double sum = 0;
+
+#pragma omp target
+  for(int i = 0; i < n; i++) {
+    sum += var;
+  }
+
+  return sum;
+}
+
+// CHECK: [[VAR:@.+]] = global double 1.000000e+01
+// CHECK: [[VAR_DECL_TGT_LINK_PTR:@.+]] = global double* [[VAR]]
+
+// CHECK: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 8, i64 8]
+// CHECK: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 800, i64 800, i64 531]
+
+// CHECK: [[N_CASTED:%.+]] = alloca i64
+// CHECK: [[SUM_CASTED:%.+]] = alloca i64
+
+// CHECK: [[OFFLOAD_BASEPTRS:%.+]] = alloca [3 x i8*]
+// CHECK: [[OFFLOAD_PTRS:%.+]] = alloca [3 x i8*]
+
+// CHECK: [[LOAD1:%.+]] = load i64, i64* [[N_CASTED]]
+// CHECK: [[LOAD2:%.+]] = load i64, i64* [[SUM_CASTED]]
+
+// CHECK: [[BPTR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK: [[BCAST1:%.+]] = bitcast i8** [[BPTR1]] to i64*
+// CHECK: store i64 [[LOAD1]], i64* [[BCAST1]]
+// CHECK: [[BPTR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
+// CHECK: [[BCAST2:%.+]] = bitcast i8** [[BPTR2]] to i64*
+// CHECK: store i64 [[LOAD1]], i64* [[BCAST2]]
+
+// CHECK: [[BPTR3:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK: [[BCAST3:%.+]] = bitcast i8** [[BPTR3]] to i64*
+// CHECK: store i64 [[LOAD2]], i64* [[BCAST3]]
+// CHECK: [[BPTR4:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 1
+// CHECK: [[BCAST4:%.+]] = bitcast i8** [[BPTR4]] to i64*
+// CHECK: store i64 [[LOAD2]], i64* [[BCAST4]]
+
+// CHECK: [[BPTR5:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK: [[BCAST5:%.+]] = bitcast i8** [[BPTR5]] to double***
+// CHECK: store double** [[VAR_DECL_TGT_LINK_PTR]], double*** [[BCAST5]]
+// CHECK: [[BPTR6:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 2
+// CHECK: [[BCAST6:%.+]] = bitcast i8** [[BPTR6]] to double**
+// CHECK: store double* [[VAR]], double** [[BCAST6]]
+
+// CHECK: [[BPTR7:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK: [[BPTR8:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
+
+// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 3, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
+
+#endif

From 9f7bd71cf581c67538bec0cf38ae33398edde83b Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 24 May 2019 18:58:21 +0000
Subject: [PATCH 0191/1176] [ARM] additionally check for ARM::INLINEASM_BR w/
 ARM::INLINEASM

Summary:
We were observing failures for arm32 allyesconfigs of the Linux kernel
with the asm goto Clang patch, where ldr's were being generated to
offsets too far away to encode in imm12.

It looks like since INLINEASM_BR was created off of INLINEASM, a few
checks for INLINEASM needed to be updated to check for either case.

pr/41999

Link: https://github.com/ClangBuiltLinux/linux/issues/490

Reviewers: peter.smith, kristof.beyls, ostannard, rengolin, t.p.northover

Reviewed By: peter.smith

Subscribers: jyu2, javed.absar, hiraditya, llvm-commits, nathanchance, craig.topper, kees, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62400

llvm-svn: 361659
---
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 20 ++++++++++----------
 llvm/lib/Target/ARM/Thumb2InstrInfo.cpp  |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b55af47d7db11..22c53d9e26cf3 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -706,15 +706,7 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   if (MCID.getSize())
     return MCID.getSize();
 
-  // If this machine instr is an inline asm, measure it.
-  if (MI.getOpcode() == ARM::INLINEASM) {
-    unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
-    if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
-      Size = alignTo(Size, 4);
-    return Size;
-  }
-  unsigned Opc = MI.getOpcode();
-  switch (Opc) {
+  switch (MI.getOpcode()) {
   default:
     // pseudo-instruction sizes are zero.
     return 0;
@@ -751,6 +743,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return 12;
   case ARM::SPACE:
     return MI.getOperand(1).getImm();
+  case ARM::INLINEASM:
+  case ARM::INLINEASM_BR: {
+    // If this machine instr is an inline asm, measure it.
+    unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+    if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+      Size = alignTo(Size, 4);
+    return Size;
+  }
   }
 }
 
@@ -2392,7 +2392,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   bool isSub = false;
 
   // Memory operands in inline assembly always use AddrMode2.
-  if (Opcode == ARM::INLINEASM)
+  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
     AddrMode = ARMII::AddrMode2;
 
   if (Opcode == ARM::ADDri) {
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 6ddd71cf8d3e4..9a79089a960b1 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -477,7 +477,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   bool isSub = false;
 
   // Memory operands in inline assembly always use AddrModeT2_i12.
-  if (Opcode == ARM::INLINEASM)
+  if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
     AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
 
   if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {

From fa643d70714a956489ee48424c59bf27641cbb5e Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Fri, 24 May 2019 18:58:29 +0000
Subject: [PATCH 0192/1176] Add JSON dumping tests for ObjC statements; add
 support for dumping @catch catch-all statements.

llvm-svn: 361660
---
 clang/include/clang/AST/JSONNodeDumper.h |   1 +
 clang/lib/AST/JSONNodeDumper.cpp         |   7 +
 clang/test/AST/ast-dump-stmt-json.m      | 719 +++++++++++++++++++++++
 3 files changed, 727 insertions(+)
 create mode 100644 clang/test/AST/ast-dump-stmt-json.m

diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index 8f7eabe114636..b966747f42399 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -260,6 +260,7 @@ class JSONNodeDumper
   void VisitLabelStmt(const LabelStmt *LS);
   void VisitGotoStmt(const GotoStmt *GS);
   void VisitWhileStmt(const WhileStmt *WS);
+  void VisitObjCAtCatchStmt(const ObjCAtCatchStmt *OACS);
 
   void visitTextComment(const comments::TextComment *C,
                         const comments::FullComment *);
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 524e94a516584..afe43e436dcf9 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -966,6 +966,13 @@ void JSONNodeDumper::VisitWhileStmt(const WhileStmt *WS) {
   attributeOnlyIfTrue("hasVar", WS->hasVarStorage());
 }
 
+void JSONNodeDumper::VisitObjCAtCatchStmt(const ObjCAtCatchStmt* OACS) {
+  // FIXME: it would be nice for the ASTNodeTraverser would handle the catch
+  // parameter the same way for C++ and ObjC rather. In this case, C++ gets a
+  // null child node and ObjC gets no child node.
+  attributeOnlyIfTrue("isCatchAll", OACS->getCatchParamDecl() == nullptr);
+}
+
 StringRef JSONNodeDumper::getCommentCommandName(unsigned CommandID) const {
   if (Traits)
     return Traits->getCommandInfo(CommandID)->Name;
diff --git a/clang/test/AST/ast-dump-stmt-json.m b/clang/test/AST/ast-dump-stmt-json.m
new file mode 100644
index 0000000000000..141b53b96e1cc
--- /dev/null
+++ b/clang/test/AST/ast-dump-stmt-json.m
@@ -0,0 +1,719 @@
+// RUN: %clang_cc1 -Wno-unused -fblocks -fobjc-exceptions -ast-dump=json -ast-dump-filter Test %s | FileCheck %s
+
+void TestBlockExpr(int x) {
+  ^{ x; };
+}
+
+void TestExprWithCleanup(int x) {
+  ^{ x; };
+}
+
+@interface A
+@end
+
+void TestObjCAtCatchStmt() {
+  @try {
+  } @catch(A *a) {
+  } @catch(...) {
+  } @finally {
+  }
+}
+
+
+// CHECK:  "kind": "FunctionDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 3
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 3
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 5
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestBlockExpr", 
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (int)"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ParmVarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 24, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 3
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 20, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 3
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 24, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 3
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isUsed": true, 
+// CHECK-NEXT:    "name": "x", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "CompoundStmt", 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 27, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 3
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 5
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ExprWithCleanups", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 4
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 9, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 4
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void (^)(void)"
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "valueCategory": "rvalue", 
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "BlockExpr", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 4
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 4
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (^)(void)"
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "valueCategory": "rvalue", 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "BlockDecl", 
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3, 
+// CHECK-NEXT:           "file": "{{.*}}", 
+// CHECK-NEXT:           "line": 4
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 4
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 4
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "kind": "Capture", 
+// CHECK-NEXT:            "var": {
+// CHECK-NEXT:             "id": "0x{{.*}}", 
+// CHECK-NEXT:             "kind": "ParmVarDecl", 
+// CHECK-NEXT:             "name": "x", 
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "CompoundStmt", 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 4
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 9, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 4
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}", 
+// CHECK-NEXT:              "kind": "ImplicitCastExpr", 
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 4
+// CHECK-NEXT:               }, 
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 6, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 4
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "valueCategory": "rvalue", 
+// CHECK-NEXT:              "castKind": "LValueToRValue", 
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}", 
+// CHECK-NEXT:                "kind": "DeclRefExpr", 
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 6, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 4
+// CHECK-NEXT:                 }, 
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 6, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 4
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "const int"
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "valueCategory": "lvalue", 
+// CHECK-NEXT:                "referencedDecl": {
+// CHECK-NEXT:                 "id": "0x{{.*}}", 
+// CHECK-NEXT:                 "kind": "ParmVarDecl", 
+// CHECK-NEXT:                 "name": "x", 
+// CHECK-NEXT:                 "type": {
+// CHECK-NEXT:                  "qualType": "int"
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 7
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 7
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 9
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestExprWithCleanup", 
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (int)"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "ParmVarDecl", 
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 30, 
+// CHECK-NEXT:     "file": "{{.*}}", 
+// CHECK-NEXT:     "line": 7
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 26, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 7
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 30, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 7
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "isUsed": true, 
+// CHECK-NEXT:    "name": "x", 
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "CompoundStmt", 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 33, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 7
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 9
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ExprWithCleanups", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 8
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 9, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 8
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void (^)(void)"
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "valueCategory": "rvalue", 
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "BlockExpr", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 8
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 8
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (^)(void)"
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "valueCategory": "rvalue", 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "BlockDecl", 
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3, 
+// CHECK-NEXT:           "file": "{{.*}}", 
+// CHECK-NEXT:           "line": 8
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 8
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 8
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "kind": "Capture", 
+// CHECK-NEXT:            "var": {
+// CHECK-NEXT:             "id": "0x{{.*}}", 
+// CHECK-NEXT:             "kind": "ParmVarDecl", 
+// CHECK-NEXT:             "name": "x", 
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "CompoundStmt", 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 8
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 9, 
+// CHECK-NEXT:              "file": "{{.*}}", 
+// CHECK-NEXT:              "line": 8
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}", 
+// CHECK-NEXT:              "kind": "ImplicitCastExpr", 
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 8
+// CHECK-NEXT:               }, 
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 6, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 8
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "valueCategory": "rvalue", 
+// CHECK-NEXT:              "castKind": "LValueToRValue", 
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}", 
+// CHECK-NEXT:                "kind": "DeclRefExpr", 
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 6, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 8
+// CHECK-NEXT:                 }, 
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 6, 
+// CHECK-NEXT:                  "file": "{{.*}}", 
+// CHECK-NEXT:                  "line": 8
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "const int"
+// CHECK-NEXT:                }, 
+// CHECK-NEXT:                "valueCategory": "lvalue", 
+// CHECK-NEXT:                "referencedDecl": {
+// CHECK-NEXT:                 "id": "0x{{.*}}", 
+// CHECK-NEXT:                 "kind": "ParmVarDecl", 
+// CHECK-NEXT:                 "name": "x", 
+// CHECK-NEXT:                 "type": {
+// CHECK-NEXT:                  "qualType": "int"
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                }
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl", 
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6, 
+// CHECK-NEXT:   "file": "{{.*}}", 
+// CHECK-NEXT:   "line": 14
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 14
+// CHECK-NEXT:   }, 
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1, 
+// CHECK-NEXT:    "file": "{{.*}}", 
+// CHECK-NEXT:    "line": 20
+// CHECK-NEXT:   }
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "name": "TestObjCAtCatchStmt", 
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  }, 
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}", 
+// CHECK-NEXT:    "kind": "CompoundStmt", 
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 28, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 14
+// CHECK-NEXT:     }, 
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1, 
+// CHECK-NEXT:      "file": "{{.*}}", 
+// CHECK-NEXT:      "line": 20
+// CHECK-NEXT:     }
+// CHECK-NEXT:    }, 
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}", 
+// CHECK-NEXT:      "kind": "ObjCAtTryStmt", 
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 15
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 3, 
+// CHECK-NEXT:        "file": "{{.*}}", 
+// CHECK-NEXT:        "line": 19
+// CHECK-NEXT:       }
+// CHECK-NEXT:      }, 
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "CompoundStmt", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 8, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 15
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 16
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "ObjCAtCatchStmt", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 16
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 17
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "VarDecl", 
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 15, 
+// CHECK-NEXT:           "file": "{{.*}}", 
+// CHECK-NEXT:           "line": 16
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 12, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 16
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 15, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 16
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "name": "a", 
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "A *"
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "CompoundStmt", 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 18, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 16
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 17
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "ObjCAtCatchStmt", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 17
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 18
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "isCatchAll": true, 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "CompoundStmt", 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 17, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 17
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 18
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }, 
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}", 
+// CHECK-NEXT:        "kind": "ObjCAtFinallyStmt", 
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 18
+// CHECK-NEXT:         }, 
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3, 
+// CHECK-NEXT:          "file": "{{.*}}", 
+// CHECK-NEXT:          "line": 19
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }, 
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}", 
+// CHECK-NEXT:          "kind": "CapturedStmt", 
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 14, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 18
+// CHECK-NEXT:           }, 
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3, 
+// CHECK-NEXT:            "file": "{{.*}}", 
+// CHECK-NEXT:            "line": 19
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }, 
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}", 
+// CHECK-NEXT:            "kind": "CapturedDecl", 
+// CHECK-NEXT:            "loc": {}, 
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {}, 
+// CHECK-NEXT:             "end": {}
+// CHECK-NEXT:            }, 
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}", 
+// CHECK-NEXT:              "kind": "CompoundStmt", 
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 14, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 18
+// CHECK-NEXT:               }, 
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 3, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 19
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }, 
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}", 
+// CHECK-NEXT:              "kind": "ImplicitParamDecl", 
+// CHECK-NEXT:              "loc": {
+// CHECK-NEXT:               "col": 14, 
+// CHECK-NEXT:               "file": "{{.*}}", 
+// CHECK-NEXT:               "line": 18
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 14, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 18
+// CHECK-NEXT:               }, 
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 14, 
+// CHECK-NEXT:                "file": "{{.*}}", 
+// CHECK-NEXT:                "line": 18
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }, 
+// CHECK-NEXT:              "isImplicit": true, 
+// CHECK-NEXT:              "name": "__context", 
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "struct (anonymous at {{.*}}:18:14) *"
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+

From 33bc64202b3262a6b3951f77eea3cb04309cc9c6 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 24 May 2019 19:00:13 +0000
Subject: [PATCH 0193/1176] [AArch64] check for INLINEASM_BR along w/ INLINEASM

Summary:
It looks like since INLINEASM_BR was created off of INLINEASM, a few
checks for INLINEASM needed to be updated to check for either case.

pr/41999

Reviewers: t.p.northover, peter.smith

Reviewed By: peter.smith

Subscribers: craig.topper, javed.absar, kristof.beyls, hiraditya, llvm-commits, peter.smith, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62402

llvm-svn: 361661
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 951a4ae0c8e6a..c1d3453a498b0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -76,8 +76,11 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
-  if (MI.getOpcode() == AArch64::INLINEASM)
-    return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+  {
+    auto Op = MI.getOpcode();
+    if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
+      return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+  }
 
   // FIXME: We currently only handle pseudoinstructions that don't get expanded
   //        before the assembly printer.

From 4934f95000d8b3a8cd12bec99acaae41ed82a4b1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Fri, 24 May 2019 19:19:00 +0000
Subject: [PATCH 0194/1176] Adding an explicit triple to this test to appease
 build bots.

llvm-svn: 361662
---
 clang/test/AST/ast-dump-stmt-json.m | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/AST/ast-dump-stmt-json.m b/clang/test/AST/ast-dump-stmt-json.m
index 141b53b96e1cc..0aee71c244008 100644
--- a/clang/test/AST/ast-dump-stmt-json.m
+++ b/clang/test/AST/ast-dump-stmt-json.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -Wno-unused -fblocks -fobjc-exceptions -ast-dump=json -ast-dump-filter Test %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-pc-win32 -Wno-unused -fblocks -fobjc-exceptions -ast-dump=json -ast-dump-filter Test %s | FileCheck %s
 
 void TestBlockExpr(int x) {
   ^{ x; };

From 3e8b9d4a57ca4b5bdffa21a9101a790affe829ad Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Fri, 24 May 2019 19:21:21 +0000
Subject: [PATCH 0195/1176] [cmake] Remove old unused version of FindZ3.cmake
 from clang [NFC]

Summary: This file was moved to llvm in D54978, r356929, but the old
file was never removed.

Reviewed By: beanz

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62343

llvm-svn: 361663
---
 clang/cmake/modules/FindZ3.cmake | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 clang/cmake/modules/FindZ3.cmake

diff --git a/clang/cmake/modules/FindZ3.cmake b/clang/cmake/modules/FindZ3.cmake
deleted file mode 100644
index e69de29bb2d1d..0000000000000

From 0cdc5dddca009422ee6a1d72b487fb5c54a654db Mon Sep 17 00:00:00 2001
From: Mandeep Singh Grang <mgrang@quicinc.com>
Date: Fri, 24 May 2019 19:24:08 +0000
Subject: [PATCH 0196/1176] [Analyzer] Checker for non-determinism caused by
 iteration of unordered container of pointers

Summary: Added a checker for non-determinism caused by iterating unordered containers like std::unordered_set containing pointer elements.

Reviewers: NoQ, george.karpenkov, whisperity, Szelethus, baloghadamsoftware

Reviewed By: Szelethus

Subscribers: mgorny, xazax.hun, baloghadamsoftware, szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy, dkrupp, jdoerfert, Charusso, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D59279

llvm-svn: 361664
---
 clang/docs/analyzer/checkers.rst              |  18 +++-
 .../clang/StaticAnalyzer/Checkers/Checkers.td |   4 +
 .../StaticAnalyzer/Checkers/CMakeLists.txt    |   1 +
 .../Checkers/PointerIterationChecker.cpp      | 100 ++++++++++++++++++
 .../Inputs/system-header-simulator-cxx.h      |  61 +++++++++++
 clang/test/Analysis/ptr-iter.cpp              |  28 +++++
 clang/www/analyzer/alpha_checks.html          |  18 ++++
 7 files changed, 228 insertions(+), 2 deletions(-)
 create mode 100644 clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp
 create mode 100644 clang/test/Analysis/ptr-iter.cpp

diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index b13d4d04d4db9..6a266eb1d9e90 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -211,8 +211,8 @@ Check for uninitialized values being returned to the caller.
 .. _cplusplus-checkers:
 
 
-cpluslus
-^^^^^^^^
+cplusplus
+^^^^^^^^^
 
 C++ Checkers.
 
@@ -1951,6 +1951,20 @@ Check for out-of-bounds access in string functions; applies to:`` strncopy, strn
    int y = strlen((char *)&test); // warn
  }
 
+alpha.nondeterminism.PointerIteration (C++)
+"""""""""""""""""""""""""""""""""""""""""""
+Check for non-determinism caused by iterating unordered containers of pointers.
+
+.. code-block:: c
+
+ void test() {
+  int a = 1, b = 2;
+  std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
+
+  for (auto i : UnorderedPtrSet) // warn
+    f(i);
+ }
+
 alpha.nondeterminism.PointerSorting (C++)
 """""""""""""""""""""""""""""""""""""""""
 Check for non-determinism caused by sorting of pointers.
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 35beb51f0c47e..bc081498ac9e2 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -1340,6 +1340,10 @@ def UnixAPIPortabilityChecker : Checker<"UnixAPI">,
 
 let ParentPackage = NonDeterminismAlpha in {
 
+def PointerIterationChecker : Checker<"PointerIteration">,
+  HelpText<"Checks for non-determinism caused by iteration of unordered containers of pointers">,
+  Documentation<HasDocumentation>;
+
 def PointerSortingChecker : Checker<"PointerSorting">,
   HelpText<"Check for non-determinism caused by sorting of pointers">,
   Documentation<HasDocumentation>;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index f8201f33c48ef..df12fa5c9a11a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -75,6 +75,7 @@ add_clang_library(clangStaticAnalyzerCheckers
   OSObjectCStyleCast.cpp
   PaddingChecker.cpp
   PointerArithChecker.cpp
+  PointerIterationChecker.cpp
   PointerSortingChecker.cpp
   PointerSubChecker.cpp
   PthreadLockChecker.cpp
diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp
new file mode 100644
index 0000000000000..307e59b8eebc4
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp
@@ -0,0 +1,100 @@
+//== PointerIterationChecker.cpp ------------------------------- -*- C++ -*--=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines PointerIterationChecker which checks for non-determinism
+// caused due to iteration of unordered containers of pointer elements.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+using namespace ast_matchers;
+
+namespace {
+
+// ID of a node at which the diagnostic would be emitted.
+constexpr llvm::StringLiteral WarnAtNode = "iter";
+
+class PointerIterationChecker : public Checker<check::ASTCodeBody> {
+public:
+  void checkASTCodeBody(const Decl *D,
+                        AnalysisManager &AM,
+                        BugReporter &BR) const;
+};
+
+static void emitDiagnostics(const BoundNodes &Match, const Decl *D,
+                            BugReporter &BR, AnalysisManager &AM,
+                            const PointerIterationChecker *Checker) {
+  auto *ADC = AM.getAnalysisDeclContext(D);
+
+  const auto *MarkedStmt = Match.getNodeAs<Stmt>(WarnAtNode);
+  assert(MarkedStmt);
+
+  auto Range = MarkedStmt->getSourceRange();
+  auto Location = PathDiagnosticLocation::createBegin(MarkedStmt,
+                                                      BR.getSourceManager(),
+                                                      ADC);
+  std::string Diagnostics;
+  llvm::raw_string_ostream OS(Diagnostics);
+  OS << "Iteration of pointer-like elements "
+     << "can result in non-deterministic ordering";
+
+  BR.EmitBasicReport(ADC->getDecl(), Checker,
+                     "Iteration of pointer-like elements", "Non-determinism",
+                     OS.str(), Location, Range);
+}
+
+// Assumption: Iteration of ordered containers of pointers is deterministic.
+
+// TODO: Currently, we only check for std::unordered_set. Other unordered
+// containers like std::unordered_map also need to be handled.
+
+// TODO: Currently, we do not check what the for loop does with the iterated
+// pointer values. Not all iterations may cause non-determinism. For example,
+// counting or summing up the elements should not be non-deterministic.
+
+auto matchUnorderedIterWithPointers() -> decltype(decl()) {
+
+  auto UnorderedContainerM = declRefExpr(to(varDecl(hasType(
+                               recordDecl(hasName("std::unordered_set")
+                             )))));
+
+  auto PointerTypeM = varDecl(hasType(hasCanonicalType(pointerType())));
+
+  auto PointerIterM = stmt(cxxForRangeStmt(
+                             hasLoopVariable(PointerTypeM),
+                             hasRangeInit(UnorderedContainerM)
+                      )).bind(WarnAtNode);
+
+  return decl(forEachDescendant(PointerIterM));
+}
+
+void PointerIterationChecker::checkASTCodeBody(const Decl *D,
+                                             AnalysisManager &AM,
+                                             BugReporter &BR) const {
+  auto MatcherM = matchUnorderedIterWithPointers();
+
+  auto Matches = match(MatcherM, *D, AM.getASTContext());
+  for (const auto &Match : Matches)
+    emitDiagnostics(Match, D, BR, AM, this);
+}
+
+} // end of anonymous namespace
+
+void ento::registerPointerIterationChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker<PointerIterationChecker>();
+}
+
+bool ento::shouldRegisterPointerIterationChecker(const LangOptions &LO) {
+  return LO.CPlusPlus;
+}
diff --git a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
index 3b3ac83b42721..5b37e96f60277 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -846,3 +846,64 @@ namespace std {
   template<class BidirIt, class UnaryPredicate>
   BidirIt stable_partition(BidirIt first, BidirIt last, UnaryPredicate p);
 }
+
+namespace std {
+
+template< class T = void >
+struct less;
+
+template< class T >
+struct allocator;
+
+template< class Key >
+struct hash;
+
+template<
+  class Key,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class set {
+  public:
+    set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+template<
+  class Key,
+  class Hash = std::hash<Key>,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class unordered_set {
+  public:
+    unordered_set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+}
diff --git a/clang/test/Analysis/ptr-iter.cpp b/clang/test/Analysis/ptr-iter.cpp
new file mode 100644
index 0000000000000..a35fae470a7ef
--- /dev/null
+++ b/clang/test/Analysis/ptr-iter.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_analyze_cc1 %s -analyzer-output=text -verify \
+// RUN: -analyzer-checker=core,alpha.nondeterminism.PointerIteration
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+template<class T>
+void f(T x);
+
+void PointerIteration() {
+  int a = 1, b = 2;
+  std::set<int> OrderedIntSet = {a, b};
+  std::set<int *> OrderedPtrSet = {&a, &b};
+  std::unordered_set<int> UnorderedIntSet = {a, b};
+  std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
+
+  for (auto i : OrderedIntSet) // no-warning
+    f(i);
+
+  for (auto i : OrderedPtrSet) // no-warning
+    f(i);
+
+  for (auto i : UnorderedIntSet) // no-warning
+    f(i);
+
+  for (auto i : UnorderedPtrSet) // expected-warning {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration]
+// expected-note@-1 {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration]
+    f(i);
+}
diff --git a/clang/www/analyzer/alpha_checks.html b/clang/www/analyzer/alpha_checks.html
index d406b2c755b48..91ced375710f3 100644
--- a/clang/www/analyzer/alpha_checks.html
+++ b/clang/www/analyzer/alpha_checks.html
@@ -1067,6 +1067,24 @@ <h3 id="nondeterminism_alpha_checkers">Non-determinism Alpha Checkers</h3>
 <colgroup><col class="namedescr"><col class="example"></colgroup>
 <thead><tr><td>Name, Description</td><td>Example</td></tr></thead>
 
+<tbody>
+<tr><td><a id="alpha.nondeterminism.PointerIteration"><div class="namedescr expandable"><span class="name">
+alpha.nondeterminism.PointerIteration</span><span class="lang">
+(C++)</span><div class="descr">
+Check for non-determinism caused by iterating unordered containers of pointers.</div></div></a></td>
+<td><div class="exampleContainer expandable">
+<div class="example"><pre>
+// C++
+void test() {
+ int a = 1, b = 2;
+ std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
+
+ for (auto i : UnorderedPtrSet) // warn
+   f(i);
+}
+</pre></div></div></td></tr>
+</tbody></table>
+
 <tbody>
 <tr><td><a id="alpha.nondeterminism.PointerSorting"><div class="namedescr expandable"><span class="name">
 alpha.nondeterminism.PointerSorting</span><span class="lang">

From 56503865ed4d034d46efd84b274c42428fb61d9f Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Fri, 24 May 2019 19:35:25 +0000
Subject: [PATCH 0197/1176] [GlobalISel][AArch64] Improve register bank
 mappings for G_SELECT

The fcsel and csel instructions differ in only the register banks they work on.

So, they're entirely interchangeable otherwise.

With this in mind, this does two things:

- Teach AArch64RegisterBankInfo to consider the inputs to G_SELECT as well as
  the outputs.
- Teach it to choose the best register bank mapping based off the constraints
  of the inputs and outputs.

The "best" in this case means the one that requires the smallest number of
copies to properly emit a fcsel/csel.

For example, if the inputs are all already going to be on FPRs, we should
emit a fcsel, even if the output is a GPR. This costs one copy to produce the
result, but saves us from copying the inputs into GPRs.

Also update the regbank-select.mir to check that we end up with the right
select instruction.

Differential Revision: https://reviews.llvm.org/D62267

llvm-svn: 361665
---
 .../AArch64/AArch64RegisterBankInfo.cpp       |  55 +++++++-
 .../AArch64/GlobalISel/regbank-select.mir     | 130 ++++++++++++++++++
 2 files changed, 179 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 7fdcde44e514a..699343614cc42 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -679,15 +679,58 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // If the destination is FPR, preserve that.
     if (OpRegBankIdx[0] != PMI_FirstGPR)
       break;
+
+    // If we're taking in vectors, we have no choice but to put everything on
+    // FPRs.
     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
-    if (SrcTy.isVector() ||
-        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
-               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
-      // Set the register bank of every operand to FPR.
-      for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
-           Idx < NumOperands; ++Idx)
+    if (SrcTy.isVector()) {
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
         OpRegBankIdx[Idx] = PMI_FirstFPR;
+      break;
+    }
+
+    // Try to minimize the number of copies. If we have more floating point
+    // constrained values than not, then we'll put everything on FPR. Otherwise,
+    // everything has to be on GPR.
+    unsigned NumFP = 0;
+
+    // Check if the uses of the result always produce floating point values.
+    //
+    // For example:
+    //
+    // %z = G_SELECT %cond %x %y
+    // fpr = G_FOO %z ...
+    if (any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return HasFPConstraints(MI); }))
+      ++NumFP;
+
+    // Check if the defs of the source values always produce floating point
+    // values.
+    //
+    // For example:
+    //
+    // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+    // %z = G_SELECT %cond %x %y
+    //
+    // Also check whether or not the sources have already been decided to be
+    // FPR. Keep track of this.
+    //
+    // This doesn't check the condition, since it's just whatever is in NZCV.
+    // This isn't passed explicitly in a register to fcsel/csel.
+    for (unsigned Idx = 2; Idx < 4; ++Idx) {
+      unsigned VReg = MI.getOperand(Idx).getReg();
+      MachineInstr *DefMI = MRI.getVRegDef(VReg);
+      if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
+          HasFPConstraints(*DefMI))
+        ++NumFP;
     }
+
+    // If we have more FP constraints than not, then move everything over to
+    // FPR.
+    if (NumFP >= 2)
+      for (unsigned Idx = 0; Idx < 4; ++Idx)
+        OpRegBankIdx[Idx] = PMI_FirstFPR;
+
     break;
   }
   case TargetOpcode::G_UNMERGE_VALUES: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
index 97b543442bd8d..99c69160e67a3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
@@ -58,3 +58,133 @@ body:             |
     %4:_(s64) = G_SELECT %0(s1), %1, %2
     $d0 = COPY %4(s64)
     RET_ReallyLR implicit $d0
+
+...
+---
+name:            two_fpr_inputs_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $w0
+    ; CHECK-LABEL: name: two_fpr_inputs_gpr_output
+    ; CHECK: liveins: $d0, $d1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Verify that the G_SELECT only has FPRs.
+    ; The only difference between fcsel and csel are the register banks. So,
+    ; if we have two FPR inputs and a GPR output, we should do a floating point
+    ; select anyway. This will cost one copy for the output, but that's less
+    ; than doing two to put the inputs on GPRs.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $d1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            one_fpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_fpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr(s64) = COPY [[COPY2]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY4]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same idea as the above test. If the output is an FPR, and one of the
+    ; inputs is an FPR, then it's fewer copies to just do a FCSEL.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            one_fpr_input_gpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0, $x1, $w0
+    ; CHECK-LABEL: name: one_fpr_input_gpr_output
+    ; CHECK: liveins: $d0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr(s64) = COPY [[COPY1]](s64)
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY3]], [[COPY2]]
+    ; CHECK: $x0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+
+    ; Now we have more GPR registers on the G_SELECT. It's cheaper here to put
+    ; everything on GPR.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $d0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $x0 = COPY %4(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            two_gpr_input_fpr_output
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $w0
+    ; CHECK-LABEL: name: two_gpr_input_fpr_output
+    ; CHECK: liveins: $x0, $x1, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr(s64) = COPY $x0
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s64) = COPY $x1
+    ; CHECK: [[SELECT:%[0-9]+]]:gpr(s64) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]]
+    ; CHECK: $d0 = COPY [[SELECT]](s64)
+    ; CHECK: RET_ReallyLR implicit $d0
+
+    ; Same as above. The G_SELECT should get all GPRS.
+
+    %3:_(s32) = COPY $w0
+    %0:_(s1) = G_TRUNC %3(s32)
+    %1:_(s64) = COPY $x0
+    %2:_(s64) = COPY $x1
+    %4:_(s64) = G_SELECT %0(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0

From 96f02a8db843daa5c016c097d0375e6dfa65bb4d Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Fri, 24 May 2019 19:39:50 +0000
Subject: [PATCH 0198/1176] [Process] Clean up some logic around
 LanguageRuntimes

llvm-svn: 361666
---
 lldb/source/Target/Process.cpp | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 871eae56d4b49..4162571fbd608 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1569,19 +1569,22 @@ CPPLanguageRuntime *Process::GetCPPLanguageRuntime(bool retry_if_null) {
   std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
   LanguageRuntime *runtime =
       GetLanguageRuntime(eLanguageTypeC_plus_plus, retry_if_null);
-  if (runtime != nullptr &&
-      runtime->GetLanguageType() == eLanguageTypeC_plus_plus)
-    return static_cast<CPPLanguageRuntime *>(runtime);
-  return nullptr;
+  if (!runtime)
+    return nullptr;
+
+  assert(runtime->GetLanguageType() == eLanguageTypeC_plus_plus);
+  return static_cast<CPPLanguageRuntime *>(runtime);
 }
 
 ObjCLanguageRuntime *Process::GetObjCLanguageRuntime(bool retry_if_null) {
   std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
   LanguageRuntime *runtime =
       GetLanguageRuntime(eLanguageTypeObjC, retry_if_null);
-  if (runtime != nullptr && runtime->GetLanguageType() == eLanguageTypeObjC)
-    return static_cast<ObjCLanguageRuntime *>(runtime);
-  return nullptr;
+  if (!runtime)
+    return nullptr;
+
+  assert(runtime->GetLanguageType() == eLanguageTypeObjC);
+  return static_cast<ObjCLanguageRuntime *>(runtime);
 }
 
 bool Process::IsPossibleDynamicValue(ValueObject &in_value) {

From a431dd7ae7dcf4bd8c6889291c35d14d7152caa1 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 24 May 2019 20:25:40 +0000
Subject: [PATCH 0199/1176] [COFF] De-virtualize Chunk and SectionChunk

Shaves another pointer off of SectionChunk, reducing the size from 96 to
88 bytes, down from 144 before I started working on this. Combined with
D62356, this reduced peak memory usage when linking chrome_child.dll
from 713MB to 675MB, or 5%.

Create NonSectionChunk to provide virtual dispatch to the rest of the
chunk types.

Reviewers: ruiu, aganea

Differential Revision: https://reviews.llvm.org/D62362

llvm-svn: 361667
---
 lld/COFF/Chunks.cpp |  17 ++--
 lld/COFF/Chunks.h   | 186 ++++++++++++++++++++++++++++++++++----------
 lld/COFF/DLL.cpp    |  33 ++++----
 lld/COFF/Writer.cpp |   4 +-
 4 files changed, 169 insertions(+), 71 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 5af3b52eda93d..688d69ba087ef 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -43,6 +43,8 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
 
   setAlignment(Header->getAlignment());
 
+  HasData = !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
+
   // If linker GC is disabled, every chunk starts out alive.  If linker GC is
   // enabled, treat non-comdat sections as roots. Generally optimized object
   // files will be built with -ffunction-sections or /Gy, so most things worth
@@ -53,7 +55,7 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H)
 // SectionChunk is one of the most frequently allocated classes, so it is
 // important to keep it as compact as possible. As of this writing, the number
 // below is the size of this class on x64 platforms.
-static_assert(sizeof(SectionChunk) <= 96, "SectionChunk grew unexpectedly");
+static_assert(sizeof(SectionChunk) <= 88, "SectionChunk grew unexpectedly");
 
 static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); }
 static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); }
@@ -559,14 +561,6 @@ void SectionChunk::getRuntimePseudoRelocs(
   }
 }
 
-bool SectionChunk::hasData() const {
-  return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA);
-}
-
-uint32_t SectionChunk::getOutputCharacteristics() const {
-  return Header->Characteristics & (PermMask | TypeMask);
-}
-
 bool SectionChunk::isCOMDAT() const {
   return Header->Characteristics & IMAGE_SCN_LNK_COMDAT;
 }
@@ -578,7 +572,7 @@ void SectionChunk::printDiscardedMessage() const {
     message("Discarded " + Sym->getName());
 }
 
-StringRef SectionChunk::getDebugName() {
+StringRef SectionChunk::getDebugName() const {
   if (Sym)
     return Sym->getName();
   return "";
@@ -642,6 +636,7 @@ CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) {
   // than 32 bytes naturally, i.e. round the size up to the next power of two.
   // This is what MSVC link.exe does.
   setAlignment(std::min(32U, uint32_t(PowerOf2Ceil(Sym.getValue()))));
+  HasData = false;
 }
 
 uint32_t CommonChunk::getOutputCharacteristics() const {
@@ -858,7 +853,7 @@ uint8_t Baserel::getDefaultType() {
 MergeChunk *MergeChunk::Instances[Log2MaxSectionAlignment + 1] = {};
 
 MergeChunk::MergeChunk(uint32_t Alignment)
-    : Chunk(OtherKind), Builder(StringTableBuilder::RAW, Alignment) {
+    : Builder(StringTableBuilder::RAW, Alignment) {
   setAlignment(Alignment);
 }
 
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index 619a4886cd931..cfa71e3ff1c60 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -13,6 +13,7 @@
 #include "InputFiles.h"
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/MC/StringTableBuilder.h"
@@ -55,14 +56,16 @@ class Chunk {
 public:
   enum Kind : uint8_t { SectionKind, OtherKind };
   Kind kind() const { return ChunkKind; }
-  virtual ~Chunk() = default;
 
   // Returns the size of this chunk (even if this is a common or BSS.)
-  virtual size_t getSize() const = 0;
+  size_t getSize() const;
 
   // Returns chunk alignment in power of two form. Value values are powers of
   // two from 1 to 8192.
   uint32_t getAlignment() const { return 1U << P2Align; }
+
+  // Update the chunk section alignment measured in bytes. Internally alignment
+  // is stored in log2.
   void setAlignment(uint32_t Align) {
     // Treat zero byte alignment as 1 byte alignment.
     Align = Align ? Align : 1;
@@ -76,7 +79,7 @@ class Chunk {
   // beginning of the file. Because this function may use RVA values
   // of other chunks for relocations, you need to set them properly
   // before calling this function.
-  virtual void writeTo(uint8_t *Buf) const {}
+  void writeTo(uint8_t *Buf) const;
 
   // The writer sets and uses the addresses. In practice, PE images cannot be
   // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
@@ -90,16 +93,14 @@ class Chunk {
   // Returns true if this has non-zero data. BSS chunks return
   // false. If false is returned, the space occupied by this chunk
   // will be filled with zeros.
-  virtual bool hasData() const { return true; }
+  bool hasData() const { return HasData; }
 
   // Returns readable/writable/executable bits.
-  virtual uint32_t getOutputCharacteristics() const { return 0; }
+  uint32_t getOutputCharacteristics() const;
 
   // Returns the section name if this is a section chunk.
   // It is illegal to call this function on non-section chunks.
-  virtual StringRef getSectionName() const {
-    llvm_unreachable("unimplemented getSectionName");
-  }
+  StringRef getSectionName() const;
 
   // An output section has pointers to chunks in the section, and each
   // chunk has a back pointer to an output section.
@@ -109,22 +110,29 @@ class Chunk {
 
   // Windows-specific.
   // Collect all locations that contain absolute addresses for base relocations.
-  virtual void getBaserels(std::vector<Baserel> *Res) {}
+  void getBaserels(std::vector<Baserel> *Res);
 
   // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
   // bytes, so this is used only for logging or debugging.
-  virtual StringRef getDebugName() { return ""; }
+  StringRef getDebugName() const;
 
-  virtual bool isHotPatchable() const { return false; }
+  // Return true if this file has the hotpatch flag set to true in the
+  // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
+  // synthesized by the linker.
+  bool isHotPatchable() const;
 
 protected:
-  Chunk(Kind K = OtherKind) : ChunkKind(K) {}
+  Chunk(Kind K = OtherKind) : ChunkKind(K), HasData(true), P2Align(0) {}
 
   const Kind ChunkKind;
 
+  // True if the section has data. Corresponds to the
+  // IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
+  uint8_t HasData : 1;
+
   // The alignment of this chunk, stored in log2 form. The writer uses the
   // value.
-  uint8_t P2Align = 0;
+  uint8_t P2Align : 7;
 
   // The output section index for this chunk. The first valid section number is
   // one.
@@ -134,6 +142,46 @@ class Chunk {
   uint32_t RVA = 0;
 };
 
+class NonSectionChunk : public Chunk {
+public:
+  virtual ~NonSectionChunk() = default;
+
+  // Returns the size of this chunk (even if this is a common or BSS.)
+  virtual size_t getSize() const = 0;
+
+  virtual uint32_t getOutputCharacteristics() const { return 0; }
+
+  // Write this chunk to a mmap'ed file, assuming Buf is pointing to
+  // beginning of the file. Because this function may use RVA values
+  // of other chunks for relocations, you need to set them properly
+  // before calling this function.
+  virtual void writeTo(uint8_t *Buf) const {}
+
+  // Returns the section name if this is a section chunk.
+  // It is illegal to call this function on non-section chunks.
+  virtual StringRef getSectionName() const {
+    llvm_unreachable("unimplemented getSectionName");
+  }
+
+  // Windows-specific.
+  // Collect all locations that contain absolute addresses for base relocations.
+  virtual void getBaserels(std::vector<Baserel> *Res) {}
+
+  // Return true if this file has the hotpatch flag set to true in the
+  // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
+  // synthesized by the linker.
+  virtual bool isHotPatchable() const { return false; }
+
+  // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
+  // bytes, so this is used only for logging or debugging.
+  virtual StringRef getDebugName() const { return ""; }
+
+  static bool classof(const Chunk *C) { return C->kind() == OtherKind; }
+
+protected:
+  NonSectionChunk() : Chunk(OtherKind) {}
+};
+
 // A chunk corresponding a section of an input file.
 class SectionChunk final : public Chunk {
   // Identical COMDAT Folding feature accesses section internal data.
@@ -158,15 +206,17 @@ class SectionChunk final : public Chunk {
 
   SectionChunk(ObjFile *File, const coff_section *Header);
   static bool classof(const Chunk *C) { return C->kind() == SectionKind; }
-  size_t getSize() const override { return Header->SizeOfRawData; }
+  size_t getSize() const { return Header->SizeOfRawData; }
   ArrayRef<uint8_t> getContents() const;
-  void writeTo(uint8_t *Buf) const override;
-  bool hasData() const override;
-  uint32_t getOutputCharacteristics() const override;
-  StringRef getSectionName() const override {
+  void writeTo(uint8_t *Buf) const;
+
+  uint32_t getOutputCharacteristics() const {
+    return Header->Characteristics & (PermMask | TypeMask);
+  }
+  StringRef getSectionName() const {
     return StringRef(SectionNameData, SectionNameSize);
   }
-  void getBaserels(std::vector<Baserel> *Res) override;
+  void getBaserels(std::vector<Baserel> *Res);
   bool isCOMDAT() const;
   void applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S,
                    uint64_t P) const;
@@ -187,7 +237,7 @@ class SectionChunk final : public Chunk {
   // and its children are treated as a group by the garbage collector.
   void addAssociative(SectionChunk *Child);
 
-  StringRef getDebugName() override;
+  StringRef getDebugName() const;
 
   // True if this is a codeview debug info chunk. These will not be laid out in
   // the image. Instead they will end up in the PDB, if one is requested.
@@ -200,6 +250,8 @@ class SectionChunk final : public Chunk {
     return getSectionName().startswith(".debug_") || getSectionName() == ".eh_frame";
   }
 
+  bool isHotPatchable() const { return File->HotPatchable; }
+
   // Allow iteration over the bodies of this chunk's relocated symbols.
   llvm::iterator_range<symbol_iterator> symbols() const {
     return llvm::make_range(symbol_iterator(File, RelocsData),
@@ -257,8 +309,6 @@ class SectionChunk final : public Chunk {
   static SectionChunk *findByName(ArrayRef<SectionChunk *> Sections,
                                   StringRef Name);
 
-  bool isHotPatchable() const override { return File->HotPatchable; }
-
   // The file that this chunk was created from.
   ObjFile *File;
 
@@ -305,6 +355,58 @@ class SectionChunk final : public Chunk {
   uint32_t SectionNameSize = 0;
 };
 
+// Inline methods to implement faux-virtual dispatch for SectionChunk.
+
+inline size_t Chunk::getSize() const {
+  if (isa<SectionChunk>(this))
+    return static_cast<const SectionChunk *>(this)->getSize();
+  else
+    return static_cast<const NonSectionChunk *>(this)->getSize();
+}
+
+inline uint32_t Chunk::getOutputCharacteristics() const {
+  if (isa<SectionChunk>(this))
+    return static_cast<const SectionChunk *>(this)->getOutputCharacteristics();
+  else
+    return static_cast<const NonSectionChunk *>(this)
+        ->getOutputCharacteristics();
+}
+
+inline void Chunk::writeTo(uint8_t *Buf) const {
+  if (isa<SectionChunk>(this))
+    static_cast<const SectionChunk *>(this)->writeTo(Buf);
+  else
+    static_cast<const NonSectionChunk *>(this)->writeTo(Buf);
+}
+
+inline bool Chunk::isHotPatchable() const {
+  if (isa<SectionChunk>(this))
+    return static_cast<const SectionChunk *>(this)->isHotPatchable();
+  else
+    return static_cast<const NonSectionChunk *>(this)->isHotPatchable();
+}
+
+inline StringRef Chunk::getSectionName() const {
+  if (isa<SectionChunk>(this))
+    return static_cast<const SectionChunk *>(this)->getSectionName();
+  else
+    return static_cast<const NonSectionChunk *>(this)->getSectionName();
+}
+
+inline void Chunk::getBaserels(std::vector<Baserel> *Res) {
+  if (isa<SectionChunk>(this))
+    static_cast<SectionChunk *>(this)->getBaserels(Res);
+  else
+    static_cast<NonSectionChunk *>(this)->getBaserels(Res);
+}
+
+inline StringRef Chunk::getDebugName() const {
+  if (isa<SectionChunk>(this))
+    return static_cast<const SectionChunk *>(this)->getDebugName();
+  else
+    return static_cast<const NonSectionChunk *>(this)->getDebugName();
+}
+
 // This class is used to implement an lld-specific feature (not implemented in
 // MSVC) that minimizes the output size by finding string literals sharing tail
 // parts and merging them.
@@ -314,7 +416,7 @@ class SectionChunk final : public Chunk {
 // The MergeChunk then tail merges the strings using the StringTableBuilder
 // class and assigns RVAs and section offsets to each of the member chunks based
 // on the offsets assigned by the StringTableBuilder.
-class MergeChunk : public Chunk {
+class MergeChunk : public NonSectionChunk {
 public:
   MergeChunk(uint32_t Alignment);
   static void addSection(SectionChunk *C);
@@ -335,11 +437,10 @@ class MergeChunk : public Chunk {
 };
 
 // A chunk for common symbols. Common chunks don't have actual data.
-class CommonChunk : public Chunk {
+class CommonChunk : public NonSectionChunk {
 public:
   CommonChunk(const COFFSymbolRef Sym);
   size_t getSize() const override { return Sym.getValue(); }
-  bool hasData() const override { return false; }
   uint32_t getOutputCharacteristics() const override;
   StringRef getSectionName() const override { return ".bss"; }
 
@@ -348,7 +449,7 @@ class CommonChunk : public Chunk {
 };
 
 // A chunk for linker-created strings.
-class StringChunk : public Chunk {
+class StringChunk : public NonSectionChunk {
 public:
   explicit StringChunk(StringRef S) : Str(S) {}
   size_t getSize() const override { return Str.size() + 1; }
@@ -377,7 +478,7 @@ static const uint8_t ImportThunkARM64[] = {
 // Windows-specific.
 // A chunk for DLL import jump table entry. In a final output, its
 // contents will be a JMP instruction to some __imp_ symbol.
-class ImportThunkChunkX64 : public Chunk {
+class ImportThunkChunkX64 : public NonSectionChunk {
 public:
   explicit ImportThunkChunkX64(Defined *S);
   size_t getSize() const override { return sizeof(ImportThunkX86); }
@@ -389,9 +490,10 @@ class ImportThunkChunkX64 : public Chunk {
   Defined *ImpSymbol;
 };
 
-class ImportThunkChunkX86 : public Chunk {
+class ImportThunkChunkX86 : public NonSectionChunk {
 public:
-  explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {}
+  explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {
+  }
   size_t getSize() const override { return sizeof(ImportThunkX86); }
   void getBaserels(std::vector<Baserel> *Res) override;
   void writeTo(uint8_t *Buf) const override;
@@ -402,9 +504,10 @@ class ImportThunkChunkX86 : public Chunk {
   Defined *ImpSymbol;
 };
 
-class ImportThunkChunkARM : public Chunk {
+class ImportThunkChunkARM : public NonSectionChunk {
 public:
-  explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {}
+  explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {
+  }
   size_t getSize() const override { return sizeof(ImportThunkARM); }
   void getBaserels(std::vector<Baserel> *Res) override;
   void writeTo(uint8_t *Buf) const override;
@@ -415,9 +518,10 @@ class ImportThunkChunkARM : public Chunk {
   Defined *ImpSymbol;
 };
 
-class ImportThunkChunkARM64 : public Chunk {
+class ImportThunkChunkARM64 : public NonSectionChunk {
 public:
-  explicit ImportThunkChunkARM64(Defined *S) : ImpSymbol(S) {}
+  explicit ImportThunkChunkARM64(Defined *S) : ImpSymbol(S) {
+  }
   size_t getSize() const override { return sizeof(ImportThunkARM64); }
   void writeTo(uint8_t *Buf) const override;
 
@@ -427,7 +531,7 @@ class ImportThunkChunkARM64 : public Chunk {
   Defined *ImpSymbol;
 };
 
-class RangeExtensionThunkARM : public Chunk {
+class RangeExtensionThunkARM : public NonSectionChunk {
 public:
   explicit RangeExtensionThunkARM(Defined *T) : Target(T) {}
   size_t getSize() const override;
@@ -436,7 +540,7 @@ class RangeExtensionThunkARM : public Chunk {
   Defined *Target;
 };
 
-class RangeExtensionThunkARM64 : public Chunk {
+class RangeExtensionThunkARM64 : public NonSectionChunk {
 public:
   explicit RangeExtensionThunkARM64(Defined *T) : Target(T) {}
   size_t getSize() const override;
@@ -447,7 +551,7 @@ class RangeExtensionThunkARM64 : public Chunk {
 
 // Windows-specific.
 // See comments for DefinedLocalImport class.
-class LocalImportChunk : public Chunk {
+class LocalImportChunk : public NonSectionChunk {
 public:
   explicit LocalImportChunk(Defined *S) : Sym(S) {
     setAlignment(Config->Wordsize);
@@ -487,7 +591,7 @@ struct ChunkAndOffset {
 using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
 
 // Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
-class RVATableChunk : public Chunk {
+class RVATableChunk : public NonSectionChunk {
 public:
   explicit RVATableChunk(SymbolRVASet S) : Syms(std::move(S)) {}
   size_t getSize() const override { return Syms.size() * 4; }
@@ -500,7 +604,7 @@ class RVATableChunk : public Chunk {
 // Windows-specific.
 // This class represents a block in .reloc section.
 // See the PE/COFF spec 5.6 for details.
-class BaserelChunk : public Chunk {
+class BaserelChunk : public NonSectionChunk {
 public:
   BaserelChunk(uint32_t Page, Baserel *Begin, Baserel *End);
   size_t getSize() const override { return Data.size(); }
@@ -524,7 +628,7 @@ class Baserel {
 // specific place in a section, without any data. This is used for the MinGW
 // specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
 // of an empty chunk isn't MinGW specific.
-class EmptyChunk : public Chunk {
+class EmptyChunk : public NonSectionChunk {
 public:
   EmptyChunk() {}
   size_t getSize() const override { return 0; }
@@ -537,7 +641,7 @@ class EmptyChunk : public Chunk {
 // the reference didn't use the dllimport attribute. The MinGW runtime will
 // process this table after loading, before handling control over to user
 // code.
-class PseudoRelocTableChunk : public Chunk {
+class PseudoRelocTableChunk : public NonSectionChunk {
 public:
   PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs)
       : Relocs(std::move(Relocs)) {
@@ -568,7 +672,7 @@ class RuntimePseudoReloc {
 };
 
 // MinGW specific. A Chunk that contains one pointer-sized absolute value.
-class AbsolutePointerChunk : public Chunk {
+class AbsolutePointerChunk : public NonSectionChunk {
 public:
   AbsolutePointerChunk(uint64_t Value) : Value(Value) {
     setAlignment(getSize());
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index b2bf53285af2c..769f02e7d0d45 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -35,7 +35,7 @@ namespace {
 // Import table
 
 // A chunk for the import descriptor table.
-class HintNameChunk : public Chunk {
+class HintNameChunk : public NonSectionChunk {
 public:
   HintNameChunk(StringRef N, uint16_t H) : Name(N), Hint(H) {}
 
@@ -57,7 +57,7 @@ class HintNameChunk : public Chunk {
 };
 
 // A chunk for the import descriptor table.
-class LookupChunk : public Chunk {
+class LookupChunk : public NonSectionChunk {
 public:
   explicit LookupChunk(Chunk *C) : HintName(C) {
     setAlignment(Config->Wordsize);
@@ -77,7 +77,7 @@ class LookupChunk : public Chunk {
 // A chunk for the import descriptor table.
 // This chunk represent import-by-ordinal symbols.
 // See Microsoft PE/COFF spec 7.1. Import Header for details.
-class OrdinalOnlyChunk : public Chunk {
+class OrdinalOnlyChunk : public NonSectionChunk {
 public:
   explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) {
     setAlignment(Config->Wordsize);
@@ -98,7 +98,7 @@ class OrdinalOnlyChunk : public Chunk {
 };
 
 // A chunk for the import descriptor table.
-class ImportDirectoryChunk : public Chunk {
+class ImportDirectoryChunk : public NonSectionChunk {
 public:
   explicit ImportDirectoryChunk(Chunk *N) : DLLName(N) {}
   size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); }
@@ -119,10 +119,9 @@ class ImportDirectoryChunk : public Chunk {
 
 // A chunk representing null terminator in the import table.
 // Contents of this chunk is always null bytes.
-class NullChunk : public Chunk {
+class NullChunk : public NonSectionChunk {
 public:
-  explicit NullChunk(size_t N) : Size(N) {}
-  bool hasData() const override { return false; }
+  explicit NullChunk(size_t N) : Size(N) { HasData = false; }
   size_t getSize() const override { return Size; }
 
   void writeTo(uint8_t *Buf) const override {
@@ -162,7 +161,7 @@ binImports(const std::vector<DefinedImportData *> &Imports) {
 // See Microsoft PE/COFF spec 4.3 for details.
 
 // A chunk for the delay import descriptor table etnry.
-class DelayDirectoryChunk : public Chunk {
+class DelayDirectoryChunk : public NonSectionChunk {
 public:
   explicit DelayDirectoryChunk(Chunk *N) : DLLName(N) {}
 
@@ -274,7 +273,7 @@ static const uint8_t ThunkARM64[] = {
 };
 
 // A chunk for the delay import thunk.
-class ThunkChunkX64 : public Chunk {
+class ThunkChunkX64 : public NonSectionChunk {
 public:
   ThunkChunkX64(Defined *I, Chunk *D, Defined *H)
       : Imp(I), Desc(D), Helper(H) {}
@@ -293,7 +292,7 @@ class ThunkChunkX64 : public Chunk {
   Defined *Helper = nullptr;
 };
 
-class ThunkChunkX86 : public Chunk {
+class ThunkChunkX86 : public NonSectionChunk {
 public:
   ThunkChunkX86(Defined *I, Chunk *D, Defined *H)
       : Imp(I), Desc(D), Helper(H) {}
@@ -317,7 +316,7 @@ class ThunkChunkX86 : public Chunk {
   Defined *Helper = nullptr;
 };
 
-class ThunkChunkARM : public Chunk {
+class ThunkChunkARM : public NonSectionChunk {
 public:
   ThunkChunkARM(Defined *I, Chunk *D, Defined *H)
       : Imp(I), Desc(D), Helper(H) {}
@@ -341,7 +340,7 @@ class ThunkChunkARM : public Chunk {
   Defined *Helper = nullptr;
 };
 
-class ThunkChunkARM64 : public Chunk {
+class ThunkChunkARM64 : public NonSectionChunk {
 public:
   ThunkChunkARM64(Defined *I, Chunk *D, Defined *H)
       : Imp(I), Desc(D), Helper(H) {}
@@ -363,7 +362,7 @@ class ThunkChunkARM64 : public Chunk {
 };
 
 // A chunk for the import descriptor table.
-class DelayAddressChunk : public Chunk {
+class DelayAddressChunk : public NonSectionChunk {
 public:
   explicit DelayAddressChunk(Chunk *C) : Thunk(C) {
     setAlignment(Config->Wordsize);
@@ -393,7 +392,7 @@ class DelayAddressChunk : public Chunk {
 // Read Microsoft PE/COFF spec 5.3 for details.
 
 // A chunk for the export descriptor table.
-class ExportDirectoryChunk : public Chunk {
+class ExportDirectoryChunk : public NonSectionChunk {
 public:
   ExportDirectoryChunk(int I, int J, Chunk *D, Chunk *A, Chunk *N, Chunk *O)
       : MaxOrdinal(I), NameTabSize(J), DLLName(D), AddressTab(A), NameTab(N),
@@ -424,7 +423,7 @@ class ExportDirectoryChunk : public Chunk {
   Chunk *OrdinalTab;
 };
 
-class AddressTableChunk : public Chunk {
+class AddressTableChunk : public NonSectionChunk {
 public:
   explicit AddressTableChunk(size_t MaxOrdinal) : Size(MaxOrdinal + 1) {}
   size_t getSize() const override { return Size * 4; }
@@ -450,7 +449,7 @@ class AddressTableChunk : public Chunk {
   size_t Size;
 };
 
-class NamePointersChunk : public Chunk {
+class NamePointersChunk : public NonSectionChunk {
 public:
   explicit NamePointersChunk(std::vector<Chunk *> &V) : Chunks(V) {}
   size_t getSize() const override { return Chunks.size() * 4; }
@@ -466,7 +465,7 @@ class NamePointersChunk : public Chunk {
   std::vector<Chunk *> Chunks;
 };
 
-class ExportOrdinalChunk : public Chunk {
+class ExportOrdinalChunk : public NonSectionChunk {
 public:
   explicit ExportOrdinalChunk(size_t I) : Size(I) {}
   size_t getSize() const override { return Size * 2; }
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 0e6cc9de853f6..8415c38dcdc29 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -88,7 +88,7 @@ OutputSection *Chunk::getOutputSection() const {
 
 namespace {
 
-class DebugDirectoryChunk : public Chunk {
+class DebugDirectoryChunk : public NonSectionChunk {
 public:
   DebugDirectoryChunk(const std::vector<Chunk *> &R, bool WriteRepro)
       : Records(R), WriteRepro(WriteRepro) {}
@@ -143,7 +143,7 @@ class DebugDirectoryChunk : public Chunk {
   bool WriteRepro;
 };
 
-class CVDebugRecordChunk : public Chunk {
+class CVDebugRecordChunk : public NonSectionChunk {
 public:
   size_t getSize() const override {
     return sizeof(codeview::DebugInfo) + Config->PDBAltPath.size() + 1;

From 5c3b1fd9ef8967dde4a17f02b06ad813bf475f98 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 24 May 2019 20:42:25 +0000
Subject: [PATCH 0200/1176] Refactor use-marking to better match standard
 terminology. No functionality change intended.

llvm-svn: 361668
---
 clang/lib/Sema/SemaExpr.cpp | 406 ++++++++++++++++++++++--------------
 1 file changed, 244 insertions(+), 162 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 410d38cf5b6f8..8ebc9bcf025eb 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14730,55 +14730,84 @@ ExprResult Sema::HandleExprEvaluationContextForTypeof(Expr *E) {
   return TransformToPotentiallyEvaluated(E);
 }
 
-/// Are we within a context in which some evaluation could be performed (be it
-/// constant evaluation or runtime evaluation)? Sadly, this notion is not quite
-/// captured by C++'s idea of an "unevaluated context".
-static bool isEvaluatableContext(Sema &SemaRef) {
+/// Are we in a context that is potentially constant evaluated per C++20
+/// [expr.const]p12?
+static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) {
+  /// C++2a [expr.const]p12:
+  //   An expression or conversion is potentially constant evaluated if it is
   switch (SemaRef.ExprEvalContexts.back().Context) {
-    case Sema::ExpressionEvaluationContext::Unevaluated:
-    case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
-      // Expressions in this context are never evaluated.
-      return false;
-
-    case Sema::ExpressionEvaluationContext::UnevaluatedList:
     case Sema::ExpressionEvaluationContext::ConstantEvaluated:
+      // -- a manifestly constant-evaluated expression,
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
     case Sema::ExpressionEvaluationContext::DiscardedStatement:
-      // Expressions in this context could be evaluated.
+      // -- a potentially-evaluated expression,
+    case Sema::ExpressionEvaluationContext::UnevaluatedList:
+      // -- an immediate subexpression of a braced-init-list,
+
+      // -- [FIXME] an expression of the form & cast-expression that occurs
+      //    within a templated entity
+      // -- a subexpression of one of the above that is not a subexpression of
+      // a nested unevaluated operand.
       return true;
 
+    case Sema::ExpressionEvaluationContext::Unevaluated:
+    case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
+      // Expressions in this context are never evaluated.
+      return false;
+
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
-      // Referenced declarations will only be used if the construct in the
-      // containing expression is used, at which point we'll be given another
-      // turn to mark them.
+      // FIXME: This is wrong. Default arguemnts are potentially constant
+      // evaluated even if they are never used.
       return false;
   }
   llvm_unreachable("Invalid context");
 }
 
+namespace {
+enum class OdrUseContext {
+  /// Declarations in this context are not odr-used.
+  None,
+  /// Declarations in this context are formally odr-used, but this is a
+  /// dependent context.
+  Dependent,
+  /// Declarations in this context are odr-used but not actually used (yet).
+  FormallyOdrUsed,
+  /// Declarations in this context are used.
+  Used
+};
+}
+
 /// Are we within a context in which references to resolved functions or to
 /// variables result in odr-use?
-static bool isOdrUseContext(Sema &SemaRef, bool SkipDependentUses = true) {
-  // An expression in a template is not really an expression until it's been
-  // instantiated, so it doesn't trigger odr-use.
-  if (SkipDependentUses && SemaRef.CurContext->isDependentContext())
-    return false;
+static OdrUseContext isOdrUseContext(Sema &SemaRef) {
+  OdrUseContext Result;
 
   switch (SemaRef.ExprEvalContexts.back().Context) {
     case Sema::ExpressionEvaluationContext::Unevaluated:
     case Sema::ExpressionEvaluationContext::UnevaluatedList:
     case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
-    case Sema::ExpressionEvaluationContext::DiscardedStatement:
-      return false;
+      return OdrUseContext::None;
 
     case Sema::ExpressionEvaluationContext::ConstantEvaluated:
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
-      return true;
+      Result = OdrUseContext::Used;
+      break;
+
+    case Sema::ExpressionEvaluationContext::DiscardedStatement:
+      Result = OdrUseContext::FormallyOdrUsed;
+      break;
 
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
-      return false;
+      // A default argument formally results in odr-use, but doesn't actually
+      // result in a use in any real sense until it itself is used.
+      Result = OdrUseContext::FormallyOdrUsed;
+      break;
   }
-  llvm_unreachable("Invalid context");
+
+  if (SemaRef.CurContext->isDependentContext())
+    return OdrUseContext::Dependent;
+
+  return Result;
 }
 
 static bool isImplicitlyDefinableConstexprFunction(FunctionDecl *Func) {
@@ -14795,6 +14824,10 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
 
   Func->setReferenced();
 
+  // Recursive functions aren't really used until they're used from some other
+  // context.
+  bool IsRecursiveCall = CurContext == Func;
+
   // C++11 [basic.def.odr]p3:
   //   A function whose name appears as a potentially-evaluated expression is
   //   odr-used if it is the unique lookup result or the selected member of a
@@ -14802,7 +14835,18 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   //
   // We (incorrectly) mark overload resolution as an unevaluated context, so we
   // can just check that here.
-  bool OdrUse = MightBeOdrUse && isOdrUseContext(*this);
+  OdrUseContext OdrUse =
+      MightBeOdrUse ? isOdrUseContext(*this) : OdrUseContext::None;
+  if (IsRecursiveCall && OdrUse == OdrUseContext::Used)
+    OdrUse = OdrUseContext::FormallyOdrUsed;
+
+  // C++20 [expr.const]p12:
+  //   A function [...] is needed for constant evaluation if it is [...] a
+  //   constexpr function that is named by an expression that is potentially
+  //   constant evaluated
+  bool NeededForConstantEvaluation =
+      isPotentiallyConstantEvaluatedContext(*this) &&
+      isImplicitlyDefinableConstexprFunction(Func);
 
   // Determine whether we require a function definition to exist, per
   // C++11 [temp.inst]p3:
@@ -14810,12 +14854,23 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   //   instantiated or explicitly specialized, the function template
   //   specialization is implicitly instantiated when the specialization is
   //   referenced in a context that requires a function definition to exist.
+  // C++20 [temp.inst]p7:
+  //   The existence of a definition of a [...] function is considered to
+  //   affect the semantics of the program if the [...] function is needed for
+  //   constant evaluation by an expression
+  // C++20 [basic.def.odr]p10:
+  //   Every program shall contain exactly one definition of every non-inline
+  //   function or variable that is odr-used in that program outside of a
+  //   discarded statement
+  // C++20 [special]p1:
+  //   The implementation will implicitly define [defaulted special members]
+  //   if they are odr-used or needed for constant evaluation.
   //
-  // That is either when this is an odr-use, or when a usage of a constexpr
-  // function occurs within an evaluatable context.
-  bool NeedDefinition =
-      OdrUse || (isEvaluatableContext(*this) &&
-                 isImplicitlyDefinableConstexprFunction(Func));
+  // Note that we skip the implicit instantiation of templates that are only
+  // used in unused default arguments or by recursive calls to themselves.
+  // This is formally non-conforming, but seems reasonable in practice.
+  bool NeedDefinition = !IsRecursiveCall && (OdrUse == OdrUseContext::Used ||
+                                             NeededForConstantEvaluation);
 
   // C++14 [temp.expl.spec]p6:
   //   If a template [...] is explicitly specialized then that specialization
@@ -14843,127 +14898,121 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   if (getLangOpts().CUDA)
     CheckCUDACall(Loc, Func);
 
-  // If we don't need to mark the function as used, and we don't need to
-  // try to provide a definition, there's nothing more to do.
-  if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&
-      (!NeedDefinition || Func->getBody()))
-    return;
-
-  // Note that this declaration has been used.
-  if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Func)) {
-    Constructor = cast<CXXConstructorDecl>(Constructor->getFirstDecl());
-    if (Constructor->isDefaulted() && !Constructor->isDeleted()) {
-      if (Constructor->isDefaultConstructor()) {
-        if (Constructor->isTrivial() && !Constructor->hasAttr<DLLExportAttr>())
+  // If we need a definition, try to create one.
+  if (NeedDefinition && !Func->getBody()) {
+    if (CXXConstructorDecl *Constructor = dyn_cast<CXXConstructorDecl>(Func)) {
+      Constructor = cast<CXXConstructorDecl>(Constructor->getFirstDecl());
+      if (Constructor->isDefaulted() && !Constructor->isDeleted()) {
+        if (Constructor->isDefaultConstructor()) {
+          if (Constructor->isTrivial() &&
+              !Constructor->hasAttr<DLLExportAttr>())
+            return;
+          DefineImplicitDefaultConstructor(Loc, Constructor);
+        } else if (Constructor->isCopyConstructor()) {
+          DefineImplicitCopyConstructor(Loc, Constructor);
+        } else if (Constructor->isMoveConstructor()) {
+          DefineImplicitMoveConstructor(Loc, Constructor);
+        }
+      } else if (Constructor->getInheritedConstructor()) {
+        DefineInheritingConstructor(Loc, Constructor);
+      }
+    } else if (CXXDestructorDecl *Destructor =
+                   dyn_cast<CXXDestructorDecl>(Func)) {
+      Destructor = cast<CXXDestructorDecl>(Destructor->getFirstDecl());
+      if (Destructor->isDefaulted() && !Destructor->isDeleted()) {
+        if (Destructor->isTrivial() && !Destructor->hasAttr<DLLExportAttr>())
           return;
-        DefineImplicitDefaultConstructor(Loc, Constructor);
-      } else if (Constructor->isCopyConstructor()) {
-        DefineImplicitCopyConstructor(Loc, Constructor);
-      } else if (Constructor->isMoveConstructor()) {
-        DefineImplicitMoveConstructor(Loc, Constructor);
+        DefineImplicitDestructor(Loc, Destructor);
       }
-    } else if (Constructor->getInheritedConstructor()) {
-      DefineInheritingConstructor(Loc, Constructor);
-    }
-  } else if (CXXDestructorDecl *Destructor =
-                 dyn_cast<CXXDestructorDecl>(Func)) {
-    Destructor = cast<CXXDestructorDecl>(Destructor->getFirstDecl());
-    if (Destructor->isDefaulted() && !Destructor->isDeleted()) {
-      if (Destructor->isTrivial() && !Destructor->hasAttr<DLLExportAttr>())
-        return;
-      DefineImplicitDestructor(Loc, Destructor);
-    }
-    if (Destructor->isVirtual() && getLangOpts().AppleKext)
-      MarkVTableUsed(Loc, Destructor->getParent());
-  } else if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(Func)) {
-    if (MethodDecl->isOverloadedOperator() &&
-        MethodDecl->getOverloadedOperator() == OO_Equal) {
-      MethodDecl = cast<CXXMethodDecl>(MethodDecl->getFirstDecl());
-      if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) {
-        if (MethodDecl->isCopyAssignmentOperator())
-          DefineImplicitCopyAssignment(Loc, MethodDecl);
-        else if (MethodDecl->isMoveAssignmentOperator())
-          DefineImplicitMoveAssignment(Loc, MethodDecl);
+      if (Destructor->isVirtual() && getLangOpts().AppleKext)
+        MarkVTableUsed(Loc, Destructor->getParent());
+    } else if (CXXMethodDecl *MethodDecl = dyn_cast<CXXMethodDecl>(Func)) {
+      if (MethodDecl->isOverloadedOperator() &&
+          MethodDecl->getOverloadedOperator() == OO_Equal) {
+        MethodDecl = cast<CXXMethodDecl>(MethodDecl->getFirstDecl());
+        if (MethodDecl->isDefaulted() && !MethodDecl->isDeleted()) {
+          if (MethodDecl->isCopyAssignmentOperator())
+            DefineImplicitCopyAssignment(Loc, MethodDecl);
+          else if (MethodDecl->isMoveAssignmentOperator())
+            DefineImplicitMoveAssignment(Loc, MethodDecl);
+        }
+      } else if (isa<CXXConversionDecl>(MethodDecl) &&
+                 MethodDecl->getParent()->isLambda()) {
+        CXXConversionDecl *Conversion =
+            cast<CXXConversionDecl>(MethodDecl->getFirstDecl());
+        if (Conversion->isLambdaToBlockPointerConversion())
+          DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion);
+        else
+          DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion);
+      } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext)
+        MarkVTableUsed(Loc, MethodDecl->getParent());
+    }
+
+    // Implicit instantiation of function templates and member functions of
+    // class templates.
+    if (Func->isImplicitlyInstantiable()) {
+      TemplateSpecializationKind TSK =
+          Func->getTemplateSpecializationKindForInstantiation();
+      SourceLocation PointOfInstantiation = Func->getPointOfInstantiation();
+      bool FirstInstantiation = PointOfInstantiation.isInvalid();
+      if (FirstInstantiation) {
+        PointOfInstantiation = Loc;
+        Func->setTemplateSpecializationKind(TSK, PointOfInstantiation);
+      } else if (TSK != TSK_ImplicitInstantiation) {
+        // Use the point of use as the point of instantiation, instead of the
+        // point of explicit instantiation (which we track as the actual point
+        // of instantiation). This gives better backtraces in diagnostics.
+        PointOfInstantiation = Loc;
       }
-    } else if (isa<CXXConversionDecl>(MethodDecl) &&
-               MethodDecl->getParent()->isLambda()) {
-      CXXConversionDecl *Conversion =
-          cast<CXXConversionDecl>(MethodDecl->getFirstDecl());
-      if (Conversion->isLambdaToBlockPointerConversion())
-        DefineImplicitLambdaToBlockPointerConversion(Loc, Conversion);
-      else
-        DefineImplicitLambdaToFunctionPointerConversion(Loc, Conversion);
-    } else if (MethodDecl->isVirtual() && getLangOpts().AppleKext)
-      MarkVTableUsed(Loc, MethodDecl->getParent());
-  }
-
-  // Recursive functions should be marked when used from another function.
-  // FIXME: Is this really right?
-  if (CurContext == Func) return;
-
-  // Implicit instantiation of function templates and member functions of
-  // class templates.
-  if (Func->isImplicitlyInstantiable()) {
-    TemplateSpecializationKind TSK =
-        Func->getTemplateSpecializationKindForInstantiation();
-    SourceLocation PointOfInstantiation = Func->getPointOfInstantiation();
-    bool FirstInstantiation = PointOfInstantiation.isInvalid();
-    if (FirstInstantiation) {
-      PointOfInstantiation = Loc;
-      Func->setTemplateSpecializationKind(TSK, PointOfInstantiation);
-    } else if (TSK != TSK_ImplicitInstantiation) {
-      // Use the point of use as the point of instantiation, instead of the
-      // point of explicit instantiation (which we track as the actual point of
-      // instantiation). This gives better backtraces in diagnostics.
-      PointOfInstantiation = Loc;
-    }
-
-    if (FirstInstantiation || TSK != TSK_ImplicitInstantiation ||
-        Func->isConstexpr()) {
-      if (isa<CXXRecordDecl>(Func->getDeclContext()) &&
-          cast<CXXRecordDecl>(Func->getDeclContext())->isLocalClass() &&
-          CodeSynthesisContexts.size())
-        PendingLocalImplicitInstantiations.push_back(
-            std::make_pair(Func, PointOfInstantiation));
-      else if (Func->isConstexpr())
-        // Do not defer instantiations of constexpr functions, to avoid the
-        // expression evaluator needing to call back into Sema if it sees a
-        // call to such a function.
-        InstantiateFunctionDefinition(PointOfInstantiation, Func);
-      else {
-        Func->setInstantiationIsPending(true);
-        PendingInstantiations.push_back(std::make_pair(Func,
-                                                       PointOfInstantiation));
-        // Notify the consumer that a function was implicitly instantiated.
-        Consumer.HandleCXXImplicitFunctionInstantiation(Func);
+
+      if (FirstInstantiation || TSK != TSK_ImplicitInstantiation ||
+          Func->isConstexpr()) {
+        if (isa<CXXRecordDecl>(Func->getDeclContext()) &&
+            cast<CXXRecordDecl>(Func->getDeclContext())->isLocalClass() &&
+            CodeSynthesisContexts.size())
+          PendingLocalImplicitInstantiations.push_back(
+              std::make_pair(Func, PointOfInstantiation));
+        else if (Func->isConstexpr())
+          // Do not defer instantiations of constexpr functions, to avoid the
+          // expression evaluator needing to call back into Sema if it sees a
+          // call to such a function.
+          InstantiateFunctionDefinition(PointOfInstantiation, Func);
+        else {
+          Func->setInstantiationIsPending(true);
+          PendingInstantiations.push_back(
+              std::make_pair(Func, PointOfInstantiation));
+          // Notify the consumer that a function was implicitly instantiated.
+          Consumer.HandleCXXImplicitFunctionInstantiation(Func);
+        }
+      }
+    } else {
+      // Walk redefinitions, as some of them may be instantiable.
+      for (auto i : Func->redecls()) {
+        if (!i->isUsed(false) && i->isImplicitlyInstantiable())
+          MarkFunctionReferenced(Loc, i, MightBeOdrUse);
       }
-    }
-  } else {
-    // Walk redefinitions, as some of them may be instantiable.
-    for (auto i : Func->redecls()) {
-      if (!i->isUsed(false) && i->isImplicitlyInstantiable())
-        MarkFunctionReferenced(Loc, i, OdrUse);
     }
   }
 
-  if (!OdrUse) return;
-
-  // Keep track of used but undefined functions.
-  if (!Func->isDefined()) {
-    if (mightHaveNonExternalLinkage(Func))
-      UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
-    else if (Func->getMostRecentDecl()->isInlined() &&
-             !LangOpts.GNUInline &&
-             !Func->getMostRecentDecl()->hasAttr<GNUInlineAttr>())
-      UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
-    else if (isExternalWithNoLinkageType(Func))
-      UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
-  }
+  // If this is the first "real" use, act on that.
+  if (OdrUse == OdrUseContext::Used && !Func->isUsed(/*CheckUsedAttr=*/false)) {
+    // Keep track of used but undefined functions.
+    if (!Func->isDefined()) {
+      if (mightHaveNonExternalLinkage(Func))
+        UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
+      else if (Func->getMostRecentDecl()->isInlined() &&
+               !LangOpts.GNUInline &&
+               !Func->getMostRecentDecl()->hasAttr<GNUInlineAttr>())
+        UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
+      else if (isExternalWithNoLinkageType(Func))
+        UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
+    }
 
-  Func->markUsed(Context);
+    Func->markUsed(Context);
 
-  if (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)
-    checkOpenMPDeviceFunction(Loc, Func);
+    if (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)
+      checkOpenMPDeviceFunction(Loc, Func);
+  }
 }
 
 static void
@@ -15775,11 +15824,20 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
   TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind()
                                        : Var->getTemplateSpecializationKind();
 
-  bool OdrUseContext = isOdrUseContext(SemaRef);
+  OdrUseContext OdrUse = isOdrUseContext(SemaRef);
   bool UsableInConstantExpr =
       Var->isUsableInConstantExpressions(SemaRef.Context);
+
+  // C++20 [expr.const]p12:
+  //   A variable [...] is needed for constant evaluation if it is [...] a
+  //   variable whose name appears as a potentially constant evaluated
+  //   expression that is either a contexpr variable or is of non-volatile
+  //   const-qualified integral type or of reference type
+  bool NeededForConstantEvaluation =
+      isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
+
   bool NeedDefinition =
-      OdrUseContext || (isEvaluatableContext(SemaRef) && UsableInConstantExpr);
+      OdrUse == OdrUseContext::Used || NeededForConstantEvaluation;
 
   VarTemplateSpecializationDecl *VarSpec =
       dyn_cast<VarTemplateSpecializationDecl>(Var);
@@ -15843,25 +15901,46 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
     }
   }
 
-  // Per C++11 [basic.def.odr], a variable is odr-used "unless it satisfies
-  // the requirements for appearing in a constant expression (5.19) and, if
-  // it is an object, the lvalue-to-rvalue conversion (4.1)
-  // is immediately applied."  We check the first part here, and
+  // C++20 [basic.def.odr]p4:
+  //   A variable x whose name appears as a potentially-evaluated expression e
+  //   is odr-used by e unless
+  //   -- x is a reference that is usable in constant expressions
+  //   -- x is a variable of non-reference type that is usable in constant
+  //      expressions and has no mutable subobjects [FIXME], and e is an
+  //      element of the set of potential results of an expression of
+  //      non-volatile-qualified non-class type to which the lvalue-to-rvalue
+  //      conversion is applied
+  //   -- x is a variable of non-reference type, and e is an element of the set
+  //      of potential results of a discarded-value expression to which the
+  //      lvalue-to-rvalue conversion is not applied [FIXME]
+  //
+  // We check the first part of the second bullet here, and
   // Sema::UpdateMarkingForLValueToRValue deals with the second part.
-  // Note that we use the C++11 definition everywhere because nothing in
-  // C++03 depends on whether we get the C++03 version correct. The second
-  // part does not apply to references, since they are not objects.
-  if (OdrUseContext && E &&
-      IsVariableAConstantExpression(Var, SemaRef.Context)) {
-    // A reference initialized by a constant expression can never be
-    // odr-used, so simply ignore it.
-    if (!Var->getType()->isReferenceType() ||
-        (SemaRef.LangOpts.OpenMP && SemaRef.isOpenMPCapturedDecl(Var)))
-      SemaRef.MaybeODRUseExprs.insert(E);
-  } else if (OdrUseContext) {
-    MarkVarDeclODRUsed(Var, Loc, SemaRef,
-                       /*MaxFunctionScopeIndex ptr*/ nullptr);
-  } else if (isOdrUseContext(SemaRef, /*SkipDependentUses*/false)) {
+  // FIXME: To get the third bullet right, we need to delay this even for
+  // variables that are not usable in constant expressions.
+  switch (OdrUse) {
+  case OdrUseContext::None:
+    break;
+
+  case OdrUseContext::FormallyOdrUsed:
+    // FIXME: Ignoring formal odr-uses results in incorrect lambda capture
+    // behavior.
+    break;
+
+  case OdrUseContext::Used:
+    if (E && IsVariableAConstantExpression(Var, SemaRef.Context)) {
+      // A reference initialized by a constant expression can never be
+      // odr-used, so simply ignore it.
+      if (!Var->getType()->isReferenceType() ||
+          (SemaRef.LangOpts.OpenMP && SemaRef.isOpenMPCapturedDecl(Var)))
+        SemaRef.MaybeODRUseExprs.insert(E);
+    } else {
+      MarkVarDeclODRUsed(Var, Loc, SemaRef,
+                         /*MaxFunctionScopeIndex ptr*/ nullptr);
+    }
+    break;
+
+  case OdrUseContext::Dependent:
     // If this is a dependent context, we don't need to mark variables as
     // odr-used, but we may still need to track them for lambda capture.
     // FIXME: Do we also need to do this inside dependent typeid expressions
@@ -15882,12 +15961,15 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
         // later (ActOnFinishFullExpr) for eventual capture and odr-use marking
         // unless the variable is a reference that was initialized by a constant
         // expression (this will never need to be captured or odr-used).
+        //
+        // FIXME: We can simplify this a lot after implementing P0588R1.
         assert(E && "Capture variable should be used in an expression.");
         if (!Var->getType()->isReferenceType() ||
             !IsVariableNonDependentAndAConstantExpression(Var, SemaRef.Context))
           LSI->addPotentialCapture(E->IgnoreParens());
       }
     }
+    break;
   }
 }
 

From 8e1d921bb37c57e75660e259f5751797c5bd0d9d Mon Sep 17 00:00:00 2001
From: Jason Liu <jasonliu.development@gmail.com>
Date: Fri, 24 May 2019 20:54:35 +0000
Subject: [PATCH 0201/1176] Implement call lowering without parameters on AIX

Summary:dd
This patch implements call lowering for calls without parameters
on AIX as initial support.

Reviewers: sfertile, hubert.reinterpretcast, aheejin, efriedma

Differential Revision: https://reviews.llvm.org/D61948

llvm-svn: 361669
---
 llvm/lib/Target/PowerPC/P9InstrResources.td  |  2 +-
 llvm/lib/Target/PowerPC/PPCCallingConv.td    | 14 +++
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp |  4 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  | 98 +++++++++++++++++---
 llvm/lib/Target/PowerPC/PPCISelLowering.h    | 11 ++-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td      |  6 ++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp  |  4 +
 llvm/lib/Target/PowerPC/PPCSubtarget.h       |  3 +-
 llvm/lib/Target/TargetMachine.cpp            |  5 +
 llvm/test/CodeGen/PowerPC/test_call_aix.ll   | 40 ++++++++
 10 files changed, 168 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/test_call_aix.ll

diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 4443c469ae451..f7ee2a83e5d60 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1288,7 +1288,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
   (instregex "BCCTR(L)?(8)?(n)?$"),
   (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
   (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
-  (instregex "BL(_TLS)?$"),
+  (instregex "BL(_TLS|_NOP)?$"),
   (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
   (instregex "BLA(8|8_NOP)?$"),
   (instregex "BLR(8|L)?$"),
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 806c923897735..de8b2b0986b05 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -306,6 +306,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
 
 def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
 
+def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+                                     R21, R22, R23, R24, R25, R26, R27, R28,
+                                     R29, R30, R31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
                                         X21, X22, X23, X24, X25, X26, X27, X28,
                                         X29, X30, X31, F14, F15, F16, F17, F18,
@@ -322,6 +329,13 @@ def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
                                         F27, F28, F29, F30, F31, CR2, CR3, CR4
                                    )>;
 
+def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+                                     X21, X22, X23, X24, X25, X26, X27, X28,
+                                     X29, X30, X31, F14, F15, F16, F17, F18,
+                                     F19, F20, F21, F22, F23, F24, F25, F26,
+                                     F27, F28, F29, F30, F31, CR2, CR3, CR4
+                                )>;
+
 // CSRs that are handled by prologue, epilogue.
 def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 9ff8312d98897..fe2aa1c2b0dca 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -71,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
 }
 
 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
-  if (STI.isDarwinABI() || STI.isPPC64())
+  if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
 
-  // SVR4 ABI:
+  // 32-bit SVR4 ABI:
   return 8;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c5e4a41d8537c..b2381077fa9a8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5160,18 +5160,23 @@ SDValue PPCTargetLowering::FinishCall(
   }
 
   // Add a NOP immediately after the branch instruction when using the 64-bit
-  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // SVR4 or the AIX ABI.
+  // At link time, if caller and callee are in a different module and
   // thus have a different TOC, the call will be replaced with a call to a stub
   // function which saves the current TOC, loads the TOC of the callee and
   // branches to the callee. The NOP will be replaced with a load instruction
   // which restores the TOC of the caller from the TOC save slot of the current
   // stack frame. If caller and callee belong to the same module (and have the
-  // same TOC), the NOP will remain unchanged.
+  // same TOC), the NOP will remain unchanged, or become some other NOP.
 
   MachineFunction &MF = DAG.getMachineFunction();
-  if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
-      !isPatchPoint) {
+  if (!isTailCall && !isPatchPoint &&
+      ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
+       Subtarget.isAIXABI())) {
     if (CallOpc == PPCISD::BCTRL) {
+      if (Subtarget.isAIXABI())
+        report_fatal_error("Indirect call on AIX is not implemented.");
+
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
       // See PrepareCall() for more information about calls through function
@@ -5268,16 +5273,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       !isTailCall)
     Callee = LowerGlobalAddress(Callee, DAG);
 
-  if (Subtarget.isSVR4ABI()) {
-    if (Subtarget.isPPC64())
-      return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-    else
-      return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
-                              isTailCall, isPatchPoint, Outs, OutVals, Ins,
-                              dl, DAG, InVals, CS);
-  }
+  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+    return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isSVR4ABI())
+    return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+                            isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                            dl, DAG, InVals, CS);
+
+  if (Subtarget.isAIXABI())
+    return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
+                         isTailCall, isPatchPoint, Outs, OutVals, Ins,
+                         dl, DAG, InVals, CS);
 
   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
                           isTailCall, isPatchPoint, Outs, OutVals, Ins,
@@ -6567,6 +6576,67 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
                     NumBytes, Ins, InVals, CS);
 }
 
+
+SDValue PPCTargetLowering::LowerCall_AIX(
+    SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
+    bool isTailCall, bool isPatchPoint,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+    ImmutableCallSite CS) const {
+
+  assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
+         "Unimplemented calling convention!");
+  if (isVarArg || isPatchPoint)
+    report_fatal_error("This call type is unimplemented on AIX.");
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  bool isPPC64 = PtrVT == MVT::i64;
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+  unsigned NumOps = Outs.size();
+
+  if (NumOps != 0)
+    report_fatal_error("Call lowering with parameters is not implemented "
+                       "on AIX yet.");
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area.
+  // On XCOFF, we start with 24/48, which is reserved space for
+  // [SP][CR][LR][2 x reserved][TOC].
+  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+  // The prolog code of the callee may store up to 8 GPR argument registers to
+  // the stack, allowing va_start to index over them in memory if the callee
+  // is variadic.
+  // Because we cannot tell if this is needed on the caller side, we have to
+  // conservatively assume that it is needed.  As such, make sure we have at
+  // least enough stack space for the caller to store the 8 GPRs.
+  unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog
+  // inserter pass.
+  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  SDValue CallSeqStart = Chain;
+
+  if (!isFunctionGlobalAddress(Callee) &&
+      !isa<ExternalSymbolSDNode>(Callee))
+    report_fatal_error("Handling of indirect call is unimplemented!");
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SDValue InFlag;
+
+  if (isTailCall)
+    report_fatal_error("Handling of tail call is unimplemented!");
+  int SPDiff = 0;
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+                    /* unused except on PPC64 ELFv1 */ false, DAG,
+                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+                    NumBytes, Ins, InVals, CS);
+}
+
 bool
 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   MachineFunction &MF, bool isVarArg,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 758f958dddf3f..4ebb3b79b51c8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -160,7 +160,7 @@ namespace llvm {
 
       /// CALL - A direct function call.
       /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls.
+      /// SVR4 calls and 32-bit/64-bit AIX calls.
       CALL, CALL_NOP,
 
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -1120,6 +1120,15 @@ namespace llvm {
                              const SDLoc &dl, SelectionDAG &DAG,
                              SmallVectorImpl<SDValue> &InVals,
                              ImmutableCallSite CS) const;
+    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+                          CallingConv::ID CallConv, bool isVarArg,
+                          bool isTailCall, bool isPatchPoint,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          const SmallVectorImpl<SDValue> &OutVals,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          const SDLoc &dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals,
+                          ImmutableCallSite CS) const;
 
     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 14fe0cd87fa52..c32357239d9a0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1469,6 +1469,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
       def BCLn : BForm_4<16, 4, 0, 1, (outs),
                          (ins crbitrc:$bi, condbrtarget:$dst),
                          "bcl 4, $bi, $dst">;
+      def BL_NOP  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                                           (outs), (ins calltarget:$func),
+                                           "bl $func\n\tnop", IIC_BrB, []>;
     }
   }
   let Uses = [CTR, RM] in {
@@ -3029,6 +3032,9 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
 // Calls
 def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
           (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i32 tglobaladdr:$dst)),
+          (BL_NOP tglobaladdr:$dst)>;
+
 def : Pat<(PPCcall (i32 texternalsym:$dst)),
           (BL texternalsym:$dst)>;
 
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 1f36d67b09f68..b4e375b5c9e6e 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -228,6 +228,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                                   : CSR_Darwin64_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
                                                   : CSR_Darwin32_RegMask);
+  if (Subtarget.isAIXABI()) {
+    assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
+    return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+  }
 
   if (CC == CallingConv::Cold) {
     return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 1e03726ba76d9..e1f4a9680da74 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -314,7 +314,8 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
 
   bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
-  bool isSVR4ABI() const { return !isDarwinABI(); }
+  bool isAIXABI() const { return TargetTriple.isOSAIX(); }
+  bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
   bool isELFv2ABI() const;
 
   /// Originally, this function return hasISEL(). Now we always enable it,
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index fd70b836421a6..634866d935707 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -173,6 +173,11 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
     return GV && GV->isStrongDefinitionForLinker();
   }
 
+  // Due to the AIX linkage model, any global with default visibility is
+  // considered non-local.
+  if (TT.isOSBinFormatXCOFF())
+    return false;
+
   assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
   assert(RM != Reloc::DynamicNoPIC);
 
diff --git a/llvm/test/CodeGen/PowerPC/test_call_aix.ll b/llvm/test/CodeGen/PowerPC/test_call_aix.ll
new file mode 100644
index 0000000000000..785d6d6e86fd5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/test_call_aix.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \
+; RUN: FileCheck --check-prefix=32BIT %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \
+; RUN: FileCheck --check-prefix=64BIT %s
+
+declare void @foo(...)
+
+define void @test_call() {
+entry:
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT: BL_NOP @foo, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1
+; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT: BL8_NOP @foo, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+  call void bitcast (void (...)* @foo to void ()*)()
+  ret void
+}
+
+define hidden void @foo_local() {
+entry:
+  ret void
+}
+
+define void @test_local_call() {
+entry:
+; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
+; 32BIT: BL @foo_local, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1
+; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
+
+; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+; 64BIT: BL8 @foo_local, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+  call void @foo_local()
+  ret void
+}

From de47d66191e50e8a741ed551c25d60bbf86da5e4 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 24 May 2019 21:08:12 +0000
Subject: [PATCH 0202/1176] Default arguments are potentially constant
 evaluated.

We need to eagerly instantiate constexpr functions used in them even if
the default argument is never actually used, because we might evaluate
portions of it when performing semantic checks.

llvm-svn: 361670
---
 clang/lib/Sema/SemaExpr.cpp     |  6 +-----
 clang/test/SemaCXX/default1.cpp | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8ebc9bcf025eb..0c04f03f06d6d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14739,6 +14739,7 @@ static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) {
     case Sema::ExpressionEvaluationContext::ConstantEvaluated:
       // -- a manifestly constant-evaluated expression,
     case Sema::ExpressionEvaluationContext::PotentiallyEvaluated:
+    case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
     case Sema::ExpressionEvaluationContext::DiscardedStatement:
       // -- a potentially-evaluated expression,
     case Sema::ExpressionEvaluationContext::UnevaluatedList:
@@ -14754,11 +14755,6 @@ static bool isPotentiallyConstantEvaluatedContext(Sema &SemaRef) {
     case Sema::ExpressionEvaluationContext::UnevaluatedAbstract:
       // Expressions in this context are never evaluated.
       return false;
-
-    case Sema::ExpressionEvaluationContext::PotentiallyEvaluatedIfUsed:
-      // FIXME: This is wrong. Default arguemnts are potentially constant
-      // evaluated even if they are never used.
-      return false;
   }
   llvm_unreachable("Invalid context");
 }
diff --git a/clang/test/SemaCXX/default1.cpp b/clang/test/SemaCXX/default1.cpp
index fcaa2c839d6cb..3bc6f832b686b 100644
--- a/clang/test/SemaCXX/default1.cpp
+++ b/clang/test/SemaCXX/default1.cpp
@@ -78,3 +78,21 @@ void PR20769(int = 2);
 
 void PR20769_b(int = 1);
 void PR20769_b() { void PR20769_b(int = 2); }
+
+#if __cplusplus >= 201103L
+template<typename T> constexpr int f1() { return 0; }
+// This is OK, but in order to see that we must instantiate f<int>, despite it
+// being in an unused default argument.
+void g1(char c = {f1<int>()}) {} // expected-warning {{braces around scalar}}
+
+// This is formally ill-formed, but we choose to not trigger instantiation here
+// (at least, not until g2 is actually called in a way that uses the default
+// argument).
+template<typename T> int f2() { return T::error; }
+void g2(int c = f2<int>()) {}
+
+// FIXME: Provide a note pointing at the first use of the default argument?
+template<typename T> int f3() { return T::error; } // expected-error {{no members}}
+void g3(int c = f3<int>()) {} // expected-note {{in instantiation of}}
+void use_g3() { g3(); }
+#endif

From 0da8160df354f50bffe339d6f2a83f9bf0049cd4 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 24 May 2019 21:11:28 +0000
Subject: [PATCH 0203/1176] [dwarfdump] Add flag to limit the number of parents
 DIEs

This adds `-parent-recurse-depth` which limits the number of parent DIEs
being dumped.

Differential revision: https://reviews.llvm.org/D62359

llvm-svn: 361671
---
 llvm/include/llvm/DebugInfo/DIContext.h      | 15 +++++++-----
 llvm/lib/DebugInfo/DWARF/DWARFDie.cpp        | 10 ++++----
 llvm/test/tools/llvm-dwarfdump/X86/enum.s    |  5 ++++
 llvm/test/tools/llvm-dwarfdump/cmdline.test  |  1 +
 llvm/tools/dsymutil/DwarfLinker.cpp          | 18 ++++++++-------
 llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp | 24 ++++++++++++--------
 6 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h
index 87c47dcf106bd..b3f773909844f 100644
--- a/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/llvm/include/llvm/DebugInfo/DIContext.h
@@ -97,11 +97,10 @@ class DIInliningInfo {
   void addFrame(const DILineInfo &Frame) {
     Frames.push_back(Frame);
   }
-  
+
   void resize(unsigned i) {
     Frames.resize(i);
   }
-  
 };
 
 /// Container for description of a global variable.
@@ -157,7 +156,8 @@ enum DIDumpType : unsigned {
 /// dumped.
 struct DIDumpOptions {
   unsigned DumpType = DIDT_All;
-  unsigned RecurseDepth = -1U;
+  unsigned ChildRecurseDepth = -1U;
+  unsigned ParentRecurseDepth = -1U;
   uint16_t Version = 0; // DWARF version to assume when extracting.
   uint8_t AddrSize = 4; // Address byte size to assume when extracting.
   bool ShowAddresses = true;
@@ -171,15 +171,18 @@ struct DIDumpOptions {
   /// Return default option set for printing a single DIE without children.
   static DIDumpOptions getForSingleDIE() {
     DIDumpOptions Opts;
-    Opts.RecurseDepth = 0;
+    Opts.ChildRecurseDepth = 0;
+    Opts.ParentRecurseDepth = 0;
     return Opts;
   }
 
   /// Return the options with RecurseDepth set to 0 unless explicitly required.
   DIDumpOptions noImplicitRecursion() const {
     DIDumpOptions Opts = *this;
-    if (RecurseDepth == -1U && !ShowChildren)
-      Opts.RecurseDepth = 0;
+    if (ChildRecurseDepth == -1U && !ShowChildren)
+      Opts.ChildRecurseDepth = 0;
+    if (ParentRecurseDepth == -1U && !ShowParents)
+      Opts.ParentRecurseDepth = 0;
     return Opts;
   }
 };
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 7d80f2eb75bde..6212842983669 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -553,10 +553,12 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
 
 /// Helper to dump a DIE with all of its parents, but no siblings.
 static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent,
-                                DIDumpOptions DumpOpts) {
+                                DIDumpOptions DumpOpts, unsigned Depth = 0) {
   if (!Die)
     return Indent;
-  Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts);
+  if (DumpOpts.ParentRecurseDepth > 0 && Depth >= DumpOpts.ParentRecurseDepth)
+    return Indent;
+  Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts, Depth + 1);
   Die.dump(OS, Indent, DumpOpts);
   return Indent + 2;
 }
@@ -604,8 +606,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
         }
 
         DWARFDie child = getFirstChild();
-        if (DumpOpts.ShowChildren && DumpOpts.RecurseDepth > 0 && child) {
-          DumpOpts.RecurseDepth--;
+        if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) {
+          DumpOpts.ChildRecurseDepth--;
           DIDumpOptions ChildDumpOpts = DumpOpts;
           ChildDumpOpts.ShowParents = false;
           while (child) {
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/enum.s b/llvm/test/tools/llvm-dwarfdump/X86/enum.s
index acd1947d89db2..25864b38f5c98 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/enum.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/enum.s
@@ -2,11 +2,16 @@
 # RUN: llvm-dwarfdump --debug-info=0x0000002a -p %t | FileCheck %s --check-prefix=PARENTS
 # RUN: llvm-dwarfdump --debug-info=0x0000002a -c %t | FileCheck %s --check-prefix=CHILDREN
 # RUN: llvm-dwarfdump --debug-info=0x0000002a -p -c %t | FileCheck %s --check-prefix=BOTH
+# RUN: llvm-dwarfdump  --debug-info=0x00000032 -p -parent-recurse-depth 1 -c %t | FileCheck %s --check-prefix=ONEPARENT
 
 # PARENTS: DW_TAG_compile_unit
 # PARENTS: DW_TAG_enumeration_type
 # PARENTS-NOT: DW_TAG_enumerator
 
+# ONEPARENT-NOT: DW_TAG_compile_unit
+# ONEPARENT: DW_TAG_enumeration_type
+# ONEPARENT: DW_TAG_enumerator
+
 # CHILDREN-NOT: DW_TAG_compile_unit
 # CHILDREN:   DW_TAG_enumerator
 # CHILDREN:     DW_AT_name	("first")
diff --git a/llvm/test/tools/llvm-dwarfdump/cmdline.test b/llvm/test/tools/llvm-dwarfdump/cmdline.test
index 5b5ea618c1a3b..bbcb14933dce2 100644
--- a/llvm/test/tools/llvm-dwarfdump/cmdline.test
+++ b/llvm/test/tools/llvm-dwarfdump/cmdline.test
@@ -13,6 +13,7 @@ HELP: -find
 HELP: -ignore-case
 HELP: -lookup
 HELP: -name
+HELP: -parent-recurse-depth=<N>
 HELP: -recurse-depth=<N>
 HELP: -regex
 HELP: -show-children
diff --git a/llvm/tools/dsymutil/DwarfLinker.cpp b/llvm/tools/dsymutil/DwarfLinker.cpp
index 79e3437a04d04..be8bcc648ad41 100644
--- a/llvm/tools/dsymutil/DwarfLinker.cpp
+++ b/llvm/tools/dsymutil/DwarfLinker.cpp
@@ -227,7 +227,7 @@ void DwarfLinker::reportWarning(const Twine &Warning, const DebugMapObject &DMO,
     return;
 
   DIDumpOptions DumpOpts;
-  DumpOpts.RecurseDepth = 0;
+  DumpOpts.ChildRecurseDepth = 0;
   DumpOpts.Verbose = Options.Verbose;
 
   WithColor::note() << "    in DIE:\n";
@@ -649,7 +649,7 @@ unsigned DwarfLinker::shouldKeepVariableDIE(RelocationManager &RelocMgr,
 
   if (Options.Verbose) {
     DIDumpOptions DumpOpts;
-    DumpOpts.RecurseDepth = 0;
+    DumpOpts.ChildRecurseDepth = 0;
     DumpOpts.Verbose = Options.Verbose;
     DIE.dump(outs(), 8 /* Indent */, DumpOpts);
   }
@@ -685,7 +685,7 @@ unsigned DwarfLinker::shouldKeepSubprogramDIE(
 
   if (Options.Verbose) {
     DIDumpOptions DumpOpts;
-    DumpOpts.RecurseDepth = 0;
+    DumpOpts.ChildRecurseDepth = 0;
     DumpOpts.Verbose = Options.Verbose;
     DIE.dump(outs(), 8 /* Indent */, DumpOpts);
   }
@@ -2096,8 +2096,10 @@ void DwarfLinker::DIECloner::copyAbbrev(
   Linker.AssignAbbrev(Copy);
 }
 
-uint32_t DwarfLinker::DIECloner::hashFullyQualifiedName(
-    DWARFDie DIE, CompileUnit &U, const DebugMapObject &DMO, int RecurseDepth) {
+uint32_t
+DwarfLinker::DIECloner::hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U,
+                                               const DebugMapObject &DMO,
+                                               int ChildRecurseDepth) {
   const char *Name = nullptr;
   DWARFUnit *OrigUnit = &U.getOrigUnit();
   CompileUnit *CU = &U;
@@ -2131,13 +2133,13 @@ uint32_t DwarfLinker::DIECloner::hashFullyQualifiedName(
       // FIXME: dsymutil-classic compatibility. Ignore modules.
       CU->getOrigUnit().getDIEAtIndex(CU->getInfo(Idx).ParentIdx).getTag() ==
           dwarf::DW_TAG_module)
-    return djbHash(Name ? Name : "", djbHash(RecurseDepth ? "" : "::"));
+    return djbHash(Name ? Name : "", djbHash(ChildRecurseDepth ? "" : "::"));
 
   DWARFDie Die = OrigUnit->getDIEAtIndex(CU->getInfo(Idx).ParentIdx);
   return djbHash(
       (Name ? Name : ""),
       djbHash((Name ? "::" : ""),
-              hashFullyQualifiedName(Die, *CU, DMO, ++RecurseDepth)));
+              hashFullyQualifiedName(Die, *CU, DMO, ++ChildRecurseDepth)));
 }
 
 static uint64_t getDwoId(const DWARFDie &CUDie, const DWARFUnit &Unit) {
@@ -2656,7 +2658,7 @@ bool DwarfLinker::link(const DebugMap &Map) {
       if (Options.Verbose) {
         outs() << "Input compilation unit:";
         DIDumpOptions DumpOpts;
-        DumpOpts.RecurseDepth = 0;
+        DumpOpts.ChildRecurseDepth = 0;
         DumpOpts.Verbose = Options.Verbose;
         CUDie.dump(outs(), 0, DumpOpts);
       }
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 603d801b8b541..34117f79a610a 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -192,13 +192,18 @@ static opt<bool>
              cat(DwarfDumpCategory));
 static alias ShowFormAlias("F", desc("Alias for -show-form."),
                            aliasopt(ShowForm), cat(DwarfDumpCategory));
-static opt<unsigned> RecurseDepth(
-    "recurse-depth",
-    desc("Only recurse to a depth of N when displaying debug info entries."),
-    cat(DwarfDumpCategory), init(-1U), value_desc("N"));
-static alias RecurseDepthAlias("r", desc("Alias for -recurse-depth."),
-                               aliasopt(RecurseDepth));
-
+static opt<unsigned>
+    ChildRecurseDepth("recurse-depth",
+                      desc("Only recurse to a depth of N when displaying "
+                           "children of debug info entries."),
+                      cat(DwarfDumpCategory), init(-1U), value_desc("N"));
+static alias ChildRecurseDepthAlias("r", desc("Alias for -recurse-depth."),
+                                    aliasopt(ChildRecurseDepth));
+static opt<unsigned>
+    ParentRecurseDepth("parent-recurse-depth",
+                       desc("Only recurse to a depth of N when displaying "
+                            "parents of debug info entries."),
+                       cat(DwarfDumpCategory), init(-1U), value_desc("N"));
 static opt<bool>
     SummarizeTypes("summarize-types",
                    desc("Abbreviate the description of type unit entries."),
@@ -233,7 +238,8 @@ static void error(StringRef Prefix, std::error_code EC) {
 static DIDumpOptions getDumpOpts() {
   DIDumpOptions DumpOpts;
   DumpOpts.DumpType = DumpType;
-  DumpOpts.RecurseDepth = RecurseDepth;
+  DumpOpts.ChildRecurseDepth = ChildRecurseDepth;
+  DumpOpts.ParentRecurseDepth = ParentRecurseDepth;
   DumpOpts.ShowAddresses = !Diff;
   DumpOpts.ShowChildren = ShowChildren;
   DumpOpts.ShowParents = ShowParents;
@@ -389,7 +395,7 @@ static bool lookup(ObjectFile &Obj, DWARFContext &DICtx, uint64_t Address,
     return false;
 
   DIDumpOptions DumpOpts = getDumpOpts();
-  DumpOpts.RecurseDepth = 0;
+  DumpOpts.ChildRecurseDepth = 0;
   DIEsForAddr.CompileUnit->dump(OS, DumpOpts);
   if (DIEsForAddr.FunctionDIE) {
     DIEsForAddr.FunctionDIE.dump(OS, 2, DumpOpts);

From 7c67dec9b80a3ddcdaa881a744740e35fa7abcb7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 24 May 2019 21:26:30 +0000
Subject: [PATCH 0204/1176] [DWARFExpression] Remove commented-out code (NFC)

llvm-svn: 361672
---
 lldb/source/Expression/DWARFExpression.cpp | 119 ---------------------
 1 file changed, 119 deletions(-)

diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index c86def753d923..d6fa41bfb74b7 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -483,23 +483,6 @@ void DWARFExpression::DumpLocation(Stream *s, lldb::offset_t offset,
     case DW_OP_call_ref: // 0x9a DWARF3 1 4- or 8-byte offset of DIE
       s->Printf("DW_OP_call_ref(0x%8.8" PRIx64 ")", m_data.GetAddress(&offset));
       break;
-    //      case DW_OP_call_frame_cfa: s << "call_frame_cfa"; break;
-    //      // 0x9c DWARF3
-    //      case DW_OP_bit_piece: // 0x9d DWARF3 2
-    //          s->Printf("DW_OP_bit_piece(0x%x, 0x%x)",
-    //          m_data.GetULEB128(&offset), m_data.GetULEB128(&offset));
-    //          break;
-    //      case DW_OP_lo_user:     s->PutCString("DW_OP_lo_user"); break;
-    //      // 0xe0
-    //      case DW_OP_hi_user:     s->PutCString("DW_OP_hi_user"); break;
-    //      // 0xff
-    //        case DW_OP_APPLE_extern:
-    //            s->Printf("DW_OP_APPLE_extern(%" PRIu64 ")",
-    //            m_data.GetULEB128(&offset));
-    //            break;
-    //        case DW_OP_APPLE_array_ref:
-    //            s->PutCString("DW_OP_APPLE_array_ref");
-    //            break;
     case DW_OP_form_tls_address:
       s->PutCString("DW_OP_form_tls_address"); // 0x9b
       break;
@@ -521,62 +504,6 @@ void DWARFExpression::DumpLocation(Stream *s, lldb::offset_t offset,
     case DW_OP_APPLE_uninit:
       s->PutCString("DW_OP_APPLE_uninit"); // 0xF0
       break;
-      //        case DW_OP_APPLE_assign:        // 0xF1 - pops value off and
-      //        assigns it to second item on stack (2nd item must have
-      //        assignable context)
-      //            s->PutCString("DW_OP_APPLE_assign");
-      //            break;
-      //        case DW_OP_APPLE_address_of:    // 0xF2 - gets the address of
-      //        the top stack item (top item must be a variable, or have
-      //        value_type that is an address already)
-      //            s->PutCString("DW_OP_APPLE_address_of");
-      //            break;
-      //        case DW_OP_APPLE_value_of:      // 0xF3 - pops the value off the
-      //        stack and pushes the value of that object (top item must be a
-      //        variable, or expression local)
-      //            s->PutCString("DW_OP_APPLE_value_of");
-      //            break;
-      //        case DW_OP_APPLE_deref_type:    // 0xF4 - gets the address of
-      //        the top stack item (top item must be a variable, or a clang
-      //        type)
-      //            s->PutCString("DW_OP_APPLE_deref_type");
-      //            break;
-      //        case DW_OP_APPLE_expr_local:    // 0xF5 - ULEB128 expression
-      //        local index
-      //            s->Printf("DW_OP_APPLE_expr_local(%" PRIu64 ")",
-      //            m_data.GetULEB128(&offset));
-      //            break;
-      //        case DW_OP_APPLE_constf:        // 0xF6 - 1 byte float size,
-      //        followed by constant float data
-      //            {
-      //                uint8_t float_length = m_data.GetU8(&offset);
-      //                s->Printf("DW_OP_APPLE_constf(<%u> ", float_length);
-      //                m_data.Dump(s, offset, eFormatHex, float_length, 1,
-      //                UINT32_MAX, DW_INVALID_ADDRESS, 0, 0);
-      //                s->PutChar(')');
-      //                // Consume the float data
-      //                m_data.GetData(&offset, float_length);
-      //            }
-      //            break;
-      //        case DW_OP_APPLE_scalar_cast:
-      //            s->Printf("DW_OP_APPLE_scalar_cast(%s)",
-      //            Scalar::GetValueTypeAsCString
-      //            ((Scalar::Type)m_data.GetU8(&offset)));
-      //            break;
-      //        case DW_OP_APPLE_clang_cast:
-      //            {
-      //                clang::Type *clang_type = (clang::Type
-      //                *)m_data.GetMaxU64(&offset, sizeof(void*));
-      //                s->Printf("DW_OP_APPLE_clang_cast(%p)", clang_type);
-      //            }
-      //            break;
-      //        case DW_OP_APPLE_clear:
-      //            s->PutCString("DW_OP_APPLE_clear");
-      //            break;
-      //        case DW_OP_APPLE_error:         // 0xFF - Stops expression
-      //        evaluation and returns an error (no args)
-      //            s->PutCString("DW_OP_APPLE_error");
-      //            break;
     }
   }
 }
@@ -691,52 +618,6 @@ static bool ReadRegisterValueAsScalar(RegisterContext *reg_ctx,
   return false;
 }
 
-// bool
-// DWARFExpression::LocationListContainsLoadAddress (Process* process, const
-// Address &addr) const
-//{
-//    return LocationListContainsLoadAddress(process,
-//    addr.GetLoadAddress(process));
-//}
-//
-// bool
-// DWARFExpression::LocationListContainsLoadAddress (Process* process, addr_t
-// load_addr) const
-//{
-//    if (load_addr == LLDB_INVALID_ADDRESS)
-//        return false;
-//
-//    if (IsLocationList())
-//    {
-//        lldb::offset_t offset = 0;
-//
-//        addr_t loc_list_base_addr = m_loclist_slide.GetLoadAddress(process);
-//
-//        if (loc_list_base_addr == LLDB_INVALID_ADDRESS)
-//            return false;
-//
-//        while (m_data.ValidOffset(offset))
-//        {
-//            // We need to figure out what the value is for the location.
-//            addr_t lo_pc = m_data.GetAddress(&offset);
-//            addr_t hi_pc = m_data.GetAddress(&offset);
-//            if (lo_pc == 0 && hi_pc == 0)
-//                break;
-//            else
-//            {
-//                lo_pc += loc_list_base_addr;
-//                hi_pc += loc_list_base_addr;
-//
-//                if (lo_pc <= load_addr && load_addr < hi_pc)
-//                    return true;
-//
-//                offset += m_data.GetU16(&offset);
-//            }
-//        }
-//    }
-//    return false;
-//}
-
 static offset_t GetOpcodeDataSize(const DataExtractor &data,
                                   const lldb::offset_t data_offset,
                                   const uint8_t op) {

From b77a60ce40d05592d67980df4dd180c57b8fec97 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Fri, 24 May 2019 21:27:37 +0000
Subject: [PATCH 0205/1176] [Target] Make Processes' GetLanguageRuntime
 non-virtual

llvm-svn: 361673
---
 lldb/include/lldb/Target/Process.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index b98975cbe3606..90118c87b2f73 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -2178,13 +2178,12 @@ class Process : public std::enable_shared_from_this<Process>,
 
   OperatingSystem *GetOperatingSystem() { return m_os_up.get(); }
 
-  virtual LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language,
-                                              bool retry_if_null = true);
+  LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language,
+                                      bool retry_if_null = true);
 
-  virtual CPPLanguageRuntime *GetCPPLanguageRuntime(bool retry_if_null = true);
+  CPPLanguageRuntime *GetCPPLanguageRuntime(bool retry_if_null = true);
 
-  virtual ObjCLanguageRuntime *
-  GetObjCLanguageRuntime(bool retry_if_null = true);
+  ObjCLanguageRuntime *GetObjCLanguageRuntime(bool retry_if_null = true);
 
   bool IsPossibleDynamicValue(ValueObject &in_value);
 

From b4c756dc1cde8a4abdef08f4f43ed28a494c5384 Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Fri, 24 May 2019 21:49:27 +0000
Subject: [PATCH 0206/1176] Mark tests as x86.

llvm-svn: 361674
---
 clang/test/CodeGen/loop-unroll.c    | 1 +
 clang/test/CodeGen/loop-vectorize.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/clang/test/CodeGen/loop-unroll.c b/clang/test/CodeGen/loop-unroll.c
index c37411fa052bc..601bf43d82677 100644
--- a/clang/test/CodeGen/loop-unroll.c
+++ b/clang/test/CodeGen/loop-unroll.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -fno-unroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-UNROLL
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -funroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-UNROLL
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -fno-unroll-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-UNROLL
+// REQUIRES: x86-registered-target
 
 // CHECK-ENABLE-UNROLL-LABEL: @for_test()
 // CHECK-ENABLE-UNROLL: br label %[[FORBODY:[a-z0-9_\.]+]]
diff --git a/clang/test/CodeGen/loop-vectorize.c b/clang/test/CodeGen/loop-vectorize.c
index f1c5c0cee77c6..c4c8715ffe8a5 100644
--- a/clang/test/CodeGen/loop-vectorize.c
+++ b/clang/test/CodeGen/loop-vectorize.c
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -S -O1 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -vectorize-loops -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-ENABLE-VECT
 // RUN: %clang_cc1 -triple x86_64 -target-cpu x86-64 -fexperimental-new-pass-manager -S -O1 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-DISABLE-VECT
+// REQUIRES: x86-registered-target
 
 // CHECK-ENABLE-VECT-LABEL: @for_test()
 // CHECK-ENABLE-VECT: fmul <{{[0-9]+}} x double>

From 6aad81cd960a7113e5d0c4cfb43fbbd3f7273977 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Fri, 24 May 2019 22:08:50 +0000
Subject: [PATCH 0207/1176] Cleanup fixed form sizes.

The fix form sizes use to have two arrays: one for 4 byte addresses and in for 8 byte addresses. The table had an issue where DW_FORM_flag_present wasn't being represented as a fixed size form because its actual size _is_ zero and zero was used to indicate the form isn't fixed in size. Any code that needed to quickly access the DWARF had to get a FixedFormSizes instance using the address byte size.

This fix cleans things up by adding a DWARFFormValue::GetFixedSize() both as a static method and as a member function on DWARFFormValue. It correctly can indicate if a form size is zero. This cleanup is a precursor to a follow up patch where I hope to speed up DWARF parsing.

I verified performance doesn't regress by loading hundreds of DWARF files and setting a breakpoint by file and line and by name in files that do not have DWARF indexes. Performance remained consistent between the two approaches.


Differential Revision: https://reviews.llvm.org/D62416

llvm-svn: 361675
---
 .../xcshareddata/xcschemes/desktop.xcscheme   |   8 +-
 .../Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp |   6 +-
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  |  21 +--
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    |   2 -
 .../SymbolFile/DWARF/DWARFFormValue.cpp       | 139 +++++++-----------
 .../Plugins/SymbolFile/DWARF/DWARFFormValue.h |  23 +--
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  13 +-
 .../Plugins/SymbolFile/DWARF/DWARFUnit.h      |   2 -
 .../SymbolFile/DWARF/ManualDWARFIndex.cpp     |   9 +-
 .../SymbolFile/DWARF/ManualDWARFIndex.h       |   1 -
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  25 +---
 11 files changed, 87 insertions(+), 162 deletions(-)

diff --git a/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme b/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
index d31912ef4837b..51852371ca833 100644
--- a/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
+++ b/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
@@ -45,7 +45,7 @@
       buildConfiguration = "DebugClang"
       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
-      launchStyle = "1"
+      launchStyle = "0"
       useCustomWorkingDirectory = "NO"
       ignoresPersistentStateOnLaunch = "NO"
       debugDocumentVersioning = "YES"
@@ -61,6 +61,12 @@
             ReferencedContainer = "container:lldb.xcodeproj">
          </BuildableReference>
       </BuildableProductRunnable>
+      <CommandLineArguments>
+         <CommandLineArgument
+            argument = "~/Documents/src/args/a.out "
+            isEnabled = "YES">
+         </CommandLineArgument>
+      </CommandLineArguments>
       <AdditionalOptions>
       </AdditionalOptions>
    </LaunchAction>
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp
index dc85b97886e6b..0722e63a1c2c9 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.cpp
@@ -136,10 +136,8 @@ bool DWARFBaseDIE::Supports_DW_AT_APPLE_objc_complete_type() const {
 
 size_t DWARFBaseDIE::GetAttributes(DWARFAttributes &attributes,
                                uint32_t depth) const {
-  if (IsValid()) {
-    return m_die->GetAttributes(m_cu, m_cu->GetFixedFormSizes(), attributes,
-                                depth);
-  }
+  if (IsValid())
+    return m_die->GetAttributes(m_cu, attributes, depth);
   if (depth == 0)
     attributes.Clear();
   return 0;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index aba80705ba9da..87d1a4be2e902 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -33,7 +33,6 @@ extern int g_verbose;
 
 bool DWARFDebugInfoEntry::FastExtract(
     const DWARFDataExtractor &debug_info_data, const DWARFUnit *cu,
-    const DWARFFormValue::FixedFormSizes &fixed_form_sizes,
     lldb::offset_t *offset_ptr) {
   m_offset = *offset_ptr;
   m_parent_idx = 0;
@@ -69,9 +68,9 @@ bool DWARFDebugInfoEntry::FastExtract(
     for (i = 0; i < numAttributes; ++i) {
       form = abbrevDecl->GetFormByIndexUnchecked(i);
 
-      const uint8_t fixed_skip_size = fixed_form_sizes.GetSize(form);
+      llvm::Optional<uint8_t> fixed_skip_size = DWARFFormValue::GetFixedSize(form, cu);
       if (fixed_skip_size)
-        offset += fixed_skip_size;
+        offset += *fixed_skip_size;
       else {
         bool form_is_indirect = false;
         do {
@@ -723,8 +722,8 @@ void DWARFDebugInfoEntry::DumpAttribute(
 // results. Any duplicate attributes will have the first instance take
 // precedence (this can happen for declaration attributes).
 size_t DWARFDebugInfoEntry::GetAttributes(
-    const DWARFUnit *cu, DWARFFormValue::FixedFormSizes fixed_form_sizes,
-    DWARFAttributes &attributes, uint32_t curr_depth) const {
+    const DWARFUnit *cu, DWARFAttributes &attributes,
+    uint32_t curr_depth) const {
   const DWARFAbbreviationDeclaration *abbrevDecl = nullptr;
   lldb::offset_t offset = 0;
   if (cu)
@@ -733,10 +732,6 @@ size_t DWARFDebugInfoEntry::GetAttributes(
   if (abbrevDecl) {
     const DWARFDataExtractor &debug_info_data = cu->GetData();
 
-    if (fixed_form_sizes.Empty())
-      fixed_form_sizes = DWARFFormValue::GetFixedFormSizesForAddressSize(
-          cu->GetAddressByteSize());
-
     const uint32_t num_attributes = abbrevDecl->NumAttributes();
     for (uint32_t i = 0; i < num_attributes; ++i) {
       DWARFFormValue form_value(cu);
@@ -769,9 +764,9 @@ size_t DWARFDebugInfoEntry::GetAttributes(
             spec_die.GetAttributes(attributes, curr_depth + 1);
         }
       } else {
-        const uint8_t fixed_skip_size = fixed_form_sizes.GetSize(form);
+        llvm::Optional<uint8_t> fixed_skip_size = DWARFFormValue::GetFixedSize(form, cu);
         if (fixed_skip_size)
-          offset += fixed_skip_size;
+          offset += *fixed_skip_size;
         else
           DWARFFormValue::SkipValue(form, debug_info_data, &offset, cu);
       }
@@ -1120,7 +1115,7 @@ bool DWARFDebugInfoEntry::MatchesDWARFDeclContext(
 DWARFDIE
 DWARFDebugInfoEntry::GetParentDeclContextDIE(DWARFUnit *cu) const {
   DWARFAttributes attributes;
-  GetAttributes(cu, DWARFFormValue::FixedFormSizes(), attributes);
+  GetAttributes(cu, attributes);
   return GetParentDeclContextDIE(cu, attributes);
 }
 
@@ -1170,7 +1165,7 @@ DWARFDebugInfoEntry::GetParentDeclContextDIE(
 const char *DWARFDebugInfoEntry::GetQualifiedName(DWARFUnit *cu,
                                                   std::string &storage) const {
   DWARFAttributes attributes;
-  GetAttributes(cu, DWARFFormValue::FixedFormSizes(), attributes);
+  GetAttributes(cu, attributes);
   return GetQualifiedName(cu, attributes, storage);
 }
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index caf2915cc74d1..7b7459ad5d14c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -69,7 +69,6 @@ class DWARFDebugInfoEntry {
 
   bool FastExtract(const lldb_private::DWARFDataExtractor &debug_info_data,
                    const DWARFUnit *cu,
-                   const DWARFFormValue::FixedFormSizes &fixed_form_sizes,
                    lldb::offset_t *offset_ptr);
 
   bool Extract(const DWARFUnit *cu, lldb::offset_t *offset_ptr);
@@ -79,7 +78,6 @@ class DWARFDebugInfoEntry {
                      DWARFDebugInfoEntry **block_die);
 
   size_t GetAttributes(const DWARFUnit *cu,
-                       DWARFFormValue::FixedFormSizes fixed_form_sizes,
                        DWARFAttributes &attrs,
                        uint32_t curr_depth = 0)
       const; // "curr_depth" for internal use only, don't set this yourself!!!
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
index ee4759c547549..b707c340a3fa8 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.cpp
@@ -21,92 +21,6 @@ class DWARFUnit;
 
 using namespace lldb_private;
 
-static uint8_t g_form_sizes_addr4[] = {
-    0, // 0x00 unused
-    4, // 0x01 DW_FORM_addr
-    0, // 0x02 unused
-    0, // 0x03 DW_FORM_block2
-    0, // 0x04 DW_FORM_block4
-    2, // 0x05 DW_FORM_data2
-    4, // 0x06 DW_FORM_data4
-    8, // 0x07 DW_FORM_data8
-    0, // 0x08 DW_FORM_string
-    0, // 0x09 DW_FORM_block
-    0, // 0x0a DW_FORM_block1
-    1, // 0x0b DW_FORM_data1
-    1, // 0x0c DW_FORM_flag
-    0, // 0x0d DW_FORM_sdata
-    4, // 0x0e DW_FORM_strp
-    0, // 0x0f DW_FORM_udata
-    0, // 0x10 DW_FORM_ref_addr (addr size for DWARF2 and earlier, 4 bytes for
-       // DWARF32, 8 bytes for DWARF32 in DWARF 3 and later
-    1, // 0x11 DW_FORM_ref1
-    2, // 0x12 DW_FORM_ref2
-    4, // 0x13 DW_FORM_ref4
-    8, // 0x14 DW_FORM_ref8
-    0, // 0x15 DW_FORM_ref_udata
-    0, // 0x16 DW_FORM_indirect
-    4, // 0x17 DW_FORM_sec_offset
-    0, // 0x18 DW_FORM_exprloc
-    0, // 0x19 DW_FORM_flag_present
-    0, // 0x1a
-    0, // 0x1b
-    0, // 0x1c
-    0, // 0x1d
-    0, // 0x1e
-    0, // 0x1f
-    8, // 0x20 DW_FORM_ref_sig8
-
-};
-
-static uint8_t g_form_sizes_addr8[] = {
-    0, // 0x00 unused
-    8, // 0x01 DW_FORM_addr
-    0, // 0x02 unused
-    0, // 0x03 DW_FORM_block2
-    0, // 0x04 DW_FORM_block4
-    2, // 0x05 DW_FORM_data2
-    4, // 0x06 DW_FORM_data4
-    8, // 0x07 DW_FORM_data8
-    0, // 0x08 DW_FORM_string
-    0, // 0x09 DW_FORM_block
-    0, // 0x0a DW_FORM_block1
-    1, // 0x0b DW_FORM_data1
-    1, // 0x0c DW_FORM_flag
-    0, // 0x0d DW_FORM_sdata
-    4, // 0x0e DW_FORM_strp
-    0, // 0x0f DW_FORM_udata
-    0, // 0x10 DW_FORM_ref_addr (addr size for DWARF2 and earlier, 4 bytes for
-       // DWARF32, 8 bytes for DWARF32 in DWARF 3 and later
-    1, // 0x11 DW_FORM_ref1
-    2, // 0x12 DW_FORM_ref2
-    4, // 0x13 DW_FORM_ref4
-    8, // 0x14 DW_FORM_ref8
-    0, // 0x15 DW_FORM_ref_udata
-    0, // 0x16 DW_FORM_indirect
-    4, // 0x17 DW_FORM_sec_offset
-    0, // 0x18 DW_FORM_exprloc
-    0, // 0x19 DW_FORM_flag_present
-    0, // 0x1a
-    0, // 0x1b
-    0, // 0x1c
-    0, // 0x1d
-    0, // 0x1e
-    0, // 0x1f
-    8, // 0x20 DW_FORM_ref_sig8
-};
-
-DWARFFormValue::FixedFormSizes
-DWARFFormValue::GetFixedFormSizesForAddressSize(uint8_t addr_size) {
-  switch (addr_size) {
-  case 4:
-    return FixedFormSizes(g_form_sizes_addr4, sizeof(g_form_sizes_addr4));
-  case 8:
-    return FixedFormSizes(g_form_sizes_addr8, sizeof(g_form_sizes_addr8));
-  }
-  return FixedFormSizes();
-}
-
 void DWARFFormValue::Clear() {
   m_unit = nullptr;
   m_form = 0;
@@ -231,6 +145,59 @@ bool DWARFFormValue::ExtractValue(const DWARFDataExtractor &data,
   return true;
 }
 
+struct FormSize {
+  uint8_t valid:1, size:7;
+};
+static FormSize g_form_sizes[] = {
+  {0,0}, // 0x00 unused
+  {0,0}, // 0x01 DW_FORM_addr
+  {0,0}, // 0x02 unused
+  {0,0}, // 0x03 DW_FORM_block2
+  {0,0}, // 0x04 DW_FORM_block4
+  {1,2}, // 0x05 DW_FORM_data2
+  {1,4}, // 0x06 DW_FORM_data4
+  {1,8}, // 0x07 DW_FORM_data8
+  {0,0}, // 0x08 DW_FORM_string
+  {0,0}, // 0x09 DW_FORM_block
+  {0,0}, // 0x0a DW_FORM_block1
+  {1,1}, // 0x0b DW_FORM_data1
+  {1,1}, // 0x0c DW_FORM_flag
+  {0,0}, // 0x0d DW_FORM_sdata
+  {1,4}, // 0x0e DW_FORM_strp
+  {0,0}, // 0x0f DW_FORM_udata
+  {0,0}, // 0x10 DW_FORM_ref_addr (addr size for DWARF2 and earlier, 4 bytes for
+         // DWARF32, 8 bytes for DWARF32 in DWARF 3 and later
+  {1,1}, // 0x11 DW_FORM_ref1
+  {1,2}, // 0x12 DW_FORM_ref2
+  {1,4}, // 0x13 DW_FORM_ref4
+  {1,8}, // 0x14 DW_FORM_ref8
+  {0,0}, // 0x15 DW_FORM_ref_udata
+  {0,0}, // 0x16 DW_FORM_indirect
+  {1,4}, // 0x17 DW_FORM_sec_offset
+  {0,0}, // 0x18 DW_FORM_exprloc
+  {1,0}, // 0x19 DW_FORM_flag_present
+  {0,0}, // 0x1a
+  {0,0}, // 0x1b
+  {0,0}, // 0x1c
+  {0,0}, // 0x1d
+  {0,0}, // 0x1e
+  {0,0}, // 0x1f
+  {1,8}, // 0x20 DW_FORM_ref_sig8
+};
+
+llvm::Optional<uint8_t>
+DWARFFormValue::GetFixedSize(dw_form_t form, const DWARFUnit *u) {
+  if (form <= DW_FORM_ref_sig8 && g_form_sizes[form].valid)
+    return g_form_sizes[form].size;
+  if (form == DW_FORM_addr && u)
+    return u->GetAddressByteSize();
+  return llvm::None;
+}
+
+llvm::Optional<uint8_t> DWARFFormValue::GetFixedSize() const {
+  return GetFixedSize(m_form, m_unit);
+}
+
 bool DWARFFormValue::SkipValue(const DWARFDataExtractor &debug_info_data,
                                lldb::offset_t *offset_ptr) const {
   return DWARFFormValue::SkipValue(m_form, debug_info_data, offset_ptr, m_unit);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
index 2143921587f5c..848db2990ded7 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFFormValue.h
@@ -11,6 +11,7 @@
 
 #include "DWARFDataExtractor.h"
 #include <stddef.h>
+#include "llvm/ADT/Optional.h"
 
 class DWARFUnit;
 class SymbolFileDWARF;
@@ -29,24 +30,6 @@ class DWARFFormValue {
     const uint8_t *data;
   } ValueType;
 
-  class FixedFormSizes {
-  public:
-    FixedFormSizes() : m_fix_sizes(nullptr), m_size(0) {}
-
-    FixedFormSizes(const uint8_t *fix_sizes, size_t size)
-        : m_fix_sizes(fix_sizes), m_size(size) {}
-
-    uint8_t GetSize(uint32_t index) const {
-      return index < m_size ? m_fix_sizes[index] : 0;
-    }
-
-    bool Empty() const { return m_size == 0; }
-
-  private:
-    const uint8_t *m_fix_sizes;
-    size_t m_size;
-  };
-
   enum {
     eValueTypeInvalid = 0,
     eValueTypeUnsigned,
@@ -71,6 +54,9 @@ class DWARFFormValue {
   bool ExtractValue(const lldb_private::DWARFDataExtractor &data,
                     lldb::offset_t *offset_ptr);
   const uint8_t *BlockData() const;
+  static llvm::Optional<uint8_t> GetFixedSize(dw_form_t form,
+                                              const DWARFUnit *u);
+  llvm::Optional<uint8_t> GetFixedSize() const;
   DWARFDIE Reference() const;
   uint64_t Reference(dw_offset_t offset) const;
   bool Boolean() const { return m_value.value.uval != 0; }
@@ -88,7 +74,6 @@ class DWARFFormValue {
                         lldb::offset_t *offset_ptr, const DWARFUnit *unit);
   static bool IsBlockForm(const dw_form_t form);
   static bool IsDataForm(const dw_form_t form);
-  static FixedFormSizes GetFixedFormSizesForAddressSize(uint8_t addr_size);
   static int Compare(const DWARFFormValue &a, const DWARFFormValue &b);
   void Clear();
   static bool FormIsSupported(dw_form_t form);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 7049fe7d280fd..cd8492356c38a 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -60,10 +60,8 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
   // We are in our compile unit, parse starting at the offset we were told to
   // parse
   const DWARFDataExtractor &data = GetData();
-  DWARFFormValue::FixedFormSizes fixed_form_sizes =
-      DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
   if (offset < GetNextUnitOffset() &&
-      m_first_die.FastExtract(data, this, fixed_form_sizes, &offset)) {
+      m_first_die.FastExtract(data, this, &offset)) {
     AddUnitDIE(m_first_die);
     return;
   }
@@ -167,10 +165,7 @@ void DWARFUnit::ExtractDIEsRWLocked() {
   die_index_stack.reserve(32);
   die_index_stack.push_back(0);
   bool prev_die_had_children = false;
-  DWARFFormValue::FixedFormSizes fixed_form_sizes =
-      DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
-  while (offset < next_cu_offset &&
-         die.FastExtract(data, this, fixed_form_sizes, &offset)) {
+  while (offset < next_cu_offset && die.FastExtract(data, this, &offset)) {
     const bool null_die = die.IsNULL();
     if (depth == 0) {
       assert(m_die_array.empty() && "Compile unit DIE already added");
@@ -415,10 +410,6 @@ TypeSystem *DWARFUnit::GetTypeSystem() {
     return nullptr;
 }
 
-DWARFFormValue::FixedFormSizes DWARFUnit::GetFixedFormSizes() {
-  return DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
-}
-
 void DWARFUnit::SetBaseAddress(dw_addr_t base_addr) { m_base_addr = base_addr; }
 
 // Compare function DWARFDebugAranges::Range structures
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 165f862ed46f9..da516aecd8ed1 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -157,8 +157,6 @@ class DWARFUnit : public lldb_private::UserID {
 
   const DWARFDebugAranges &GetFunctionAranges();
 
-  DWARFFormValue::FixedFormSizes GetFixedFormSizes();
-
   void SetBaseAddress(dw_addr_t base_addr);
 
   DWARFBaseDIE GetUnitDIEOnly() { return DWARFDIE(this, GetUnitDIEPtrOnly()); }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index a6fae61a6a4fc..69d364803e2f5 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -100,20 +100,18 @@ void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, IndexSet &set) {
   }
 
   const LanguageType cu_language = unit.GetLanguageType();
-  DWARFFormValue::FixedFormSizes fixed_form_sizes = unit.GetFixedFormSizes();
 
-  IndexUnitImpl(unit, cu_language, fixed_form_sizes, unit.GetOffset(), set);
+  IndexUnitImpl(unit, cu_language, unit.GetOffset(), set);
 
   SymbolFileDWARFDwo *dwo_symbol_file = unit.GetDwoSymbolFile();
   if (dwo_symbol_file && dwo_symbol_file->GetCompileUnit()) {
     IndexUnitImpl(*dwo_symbol_file->GetCompileUnit(), cu_language,
-                  fixed_form_sizes, unit.GetOffset(), set);
+                  unit.GetOffset(), set);
   }
 }
 
 void ManualDWARFIndex::IndexUnitImpl(
     DWARFUnit &unit, const LanguageType cu_language,
-    const DWARFFormValue::FixedFormSizes &fixed_form_sizes,
     const dw_offset_t cu_offset, IndexSet &set) {
   for (const DWARFDebugInfoEntry &die : unit.dies()) {
     const dw_tag_t tag = die.Tag();
@@ -150,8 +148,7 @@ void ManualDWARFIndex::IndexUnitImpl(
     bool is_global_or_static_variable = false;
 
     DWARFFormValue specification_die_form;
-    const size_t num_attributes =
-        die.GetAttributes(&unit, fixed_form_sizes, attributes);
+    const size_t num_attributes = die.GetAttributes(&unit, attributes);
     if (num_attributes > 0) {
       for (uint32_t i = 0; i < num_attributes; ++i) {
         dw_attr_t attr = attributes.AttributeAtIndex(i);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
index 5311b0c213cd0..590d228e87af4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h
@@ -59,7 +59,6 @@ class ManualDWARFIndex : public DWARFIndex {
 
   static void
   IndexUnitImpl(DWARFUnit &unit, const lldb::LanguageType cu_language,
-                const DWARFFormValue::FixedFormSizes &fixed_form_sizes,
                 const dw_offset_t cu_offset, IndexSet &set);
 
   /// Non-null value means we haven't built the index yet.
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 2871017baeccc..11a89db7aa5de 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3166,13 +3166,11 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
                                         block_length);
               } else if (DWARFFormValue::IsDataForm(form_value.Form())) {
                 // Retrieve the value as a data expression.
-                DWARFFormValue::FixedFormSizes fixed_form_sizes =
-                    DWARFFormValue::GetFixedFormSizesForAddressSize(
-                        attributes.CompileUnitAtIndex(i)->GetAddressByteSize());
                 uint32_t data_offset = attributes.DIEOffsetAtIndex(i);
-                uint32_t data_length =
-                    fixed_form_sizes.GetSize(form_value.Form());
-                if (data_length == 0) {
+                if (auto data_length = form_value.GetFixedSize())
+                  location.CopyOpcodeData(module, debug_info_data, data_offset,
+                                          *data_length);
+                else {
                   const uint8_t *data_pointer = form_value.BlockData();
                   if (data_pointer) {
                     form_value.Unsigned();
@@ -3181,21 +3179,14 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
                     // create the variable
                     const_value = form_value;
                   }
-                } else
-                  location.CopyOpcodeData(module, debug_info_data, data_offset,
-                                          data_length);
+                }
               } else {
                 // Retrieve the value as a string expression.
                 if (form_value.Form() == DW_FORM_strp) {
-                  DWARFFormValue::FixedFormSizes fixed_form_sizes =
-                      DWARFFormValue::GetFixedFormSizesForAddressSize(
-                          attributes.CompileUnitAtIndex(i)
-                              ->GetAddressByteSize());
                   uint32_t data_offset = attributes.DIEOffsetAtIndex(i);
-                  uint32_t data_length =
-                      fixed_form_sizes.GetSize(form_value.Form());
-                  location.CopyOpcodeData(module, debug_info_data, data_offset,
-                                          data_length);
+                  if (auto data_length = form_value.GetFixedSize())
+                    location.CopyOpcodeData(module, debug_info_data,
+                                            data_offset, *data_length);
                 } else {
                   const char *str = form_value.AsCString();
                   uint32_t string_offset =

From 78a6ae738c1b0faa9d6944b0a8fa75e45b472884 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Fri, 24 May 2019 22:12:01 +0000
Subject: [PATCH 0208/1176] Revert Xcode scheme changes from 361675

llvm-svn: 361676
---
 .../xcshareddata/xcschemes/desktop.xcscheme               | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme b/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
index 51852371ca833..d31912ef4837b 100644
--- a/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
+++ b/lldb/lldb.xcodeproj/xcshareddata/xcschemes/desktop.xcscheme
@@ -45,7 +45,7 @@
       buildConfiguration = "DebugClang"
       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
-      launchStyle = "0"
+      launchStyle = "1"
       useCustomWorkingDirectory = "NO"
       ignoresPersistentStateOnLaunch = "NO"
       debugDocumentVersioning = "YES"
@@ -61,12 +61,6 @@
             ReferencedContainer = "container:lldb.xcodeproj">
          </BuildableReference>
       </BuildableProductRunnable>
-      <CommandLineArguments>
-         <CommandLineArgument
-            argument = "~/Documents/src/args/a.out "
-            isEnabled = "YES">
-         </CommandLineArgument>
-      </CommandLineArguments>
       <AdditionalOptions>
       </AdditionalOptions>
    </LaunchAction>

From bede937b1678503215f7add86fb52aacefa280c3 Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Fri, 24 May 2019 22:12:21 +0000
Subject: [PATCH 0209/1176] [GlobalISel][AArch64] NFC: Factor out
 HasFPConstraints into a proper function

Factor it out into a function, and replace places where we had the same check
with the new function.

Differential Revision: https://reviews.llvm.org/D62421

llvm-svn: 361677
---
 .../AArch64/AArch64RegisterBankInfo.cpp       | 69 ++++++++-----------
 .../Target/AArch64/AArch64RegisterBankInfo.h  |  4 ++
 2 files changed, 32 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 699343614cc42..4dede2fe9f9f2 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -457,6 +457,25 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
                                getValueMapping(RBIdx, Size), NumOperands);
 }
 
+bool AArch64RegisterBankInfo::hasFPConstraints(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI) const {
+  unsigned Op = MI.getOpcode();
+
+  // Do we have an explicit floating point instruction?
+  if (isPreISelGenericFloatingPointOpcode(Op))
+    return true;
+
+  // No. Check if we have a copy-like instruction. If we do, then we could
+  // still be fed by floating point instructions.
+  if (Op != TargetOpcode::COPY && !MI.isPHI())
+    return false;
+
+  // MI is copy-like. Return true if it outputs an FPR.
+  return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+         &AArch64::FPRRegBank;
+}
+
 const RegisterBankInfo::InstructionMapping &
 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
@@ -476,24 +495,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
-  // Helper lambda that returns true if MI has floating point constraints.
-  auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
-    unsigned Op = MI.getOpcode();
-
-    // Do we have an explicit floating point instruction?
-    if (isPreISelGenericFloatingPointOpcode(Op))
-      return true;
-
-    // No. Check if we have a copy-like instruction. If we do, then we could
-    // still be fed by floating point instructions.
-    if (Op != TargetOpcode::COPY && !MI.isPHI())
-      return false;
-
-    // MI is copy-like. Return true if it's using an FPR.
-    return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
-           &AArch64::FPRRegBank;
-  };
-
   switch (Opc) {
     // G_{F|S|U}REM are not listed because they are not legal.
     // Arithmetic ops.
@@ -641,15 +642,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // assume this was a floating point load in the IR.
         // If it was not, we would have had a bitcast before
         // reaching that instruction.
-        unsigned UseOpc = UseMI.getOpcode();
-        if (isPreISelGenericFloatingPointOpcode(UseOpc) ||
-            // Check if we feed a copy-like instruction with
-            // floating point constraints. In that case, we are still
-            // feeding fp instructions, but indirectly
-            // (e.g., through ABI copies).
-            ((UseOpc == TargetOpcode::COPY || UseMI.isPHI()) &&
-             getRegBank(UseMI.getOperand(0).getReg(), MRI, TRI) ==
-                 &AArch64::FPRRegBank)) {
+        if (hasFPConstraints(UseMI, MRI, TRI)) {
           OpRegBankIdx[0] = PMI_FirstFPR;
           break;
         }
@@ -662,15 +655,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       if (!VReg)
         break;
       MachineInstr *DefMI = MRI.getVRegDef(VReg);
-      unsigned DefOpc = DefMI->getOpcode();
-      if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
-          // Check if we come from a copy-like instruction with
-          // floating point constraints. In that case, we are still
-          // fed by fp instructions, but indirectly
-          // (e.g., through ABI copies).
-          ((DefOpc == TargetOpcode::COPY || DefMI->isPHI()) &&
-           getRegBank(DefMI->getOperand(0).getReg(), MRI, TRI) ==
-               &AArch64::FPRRegBank))
+      if (hasFPConstraints(*DefMI, MRI, TRI))
         OpRegBankIdx[0] = PMI_FirstFPR;
       break;
     }
@@ -700,8 +685,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     //
     // %z = G_SELECT %cond %x %y
     // fpr = G_FOO %z ...
-    if (any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
-               [&](MachineInstr &MI) { return HasFPConstraints(MI); }))
+    if (any_of(
+            MRI.use_instructions(MI.getOperand(0).getReg()),
+            [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); }))
       ++NumFP;
 
     // Check if the defs of the source values always produce floating point
@@ -721,7 +707,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       unsigned VReg = MI.getOperand(Idx).getReg();
       MachineInstr *DefMI = MRI.getVRegDef(VReg);
       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
-          HasFPConstraints(*DefMI))
+          hasFPConstraints(*DefMI, MRI, TRI))
         ++NumFP;
     }
 
@@ -743,8 +729,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // UNMERGE into scalars from a vector should always use FPR.
     // Likewise if any of the uses are FP instructions.
     if (SrcTy.isVector() ||
-        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
-               [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
+        any_of(
+            MRI.use_instructions(MI.getOperand(0).getReg()),
+            [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); })) {
       // Set the register bank of every operand to FPR.
       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
            Idx < NumOperands; ++Idx)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 98a51c339360e..cdcde1ec1bcfa 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -113,6 +113,10 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   const InstructionMapping &
   getSameKindOfOperandsMapping(const MachineInstr &MI) const;
 
+  /// Returns true if the output of \p MI must be stored on a FPR register.
+  bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI) const;
+
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
 

From 59f959ff333bf42962287957b51d0da285087e41 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 24 May 2019 22:45:08 +0000
Subject: [PATCH 0210/1176] [WebAssembly] Relax signature checking for
 undefined functions that are not called directly

When function signatures don't match and the undefined function is not
called directly (i.e. only has its address taken) we don't issue a
warning or create a runtime thunk for the undefined function.

Instead in this case we simply use the defined version of the function.
This is possible since checking signatures of dynamic calls happens
at runtime so any invalid usage will still result in a runtime error.

This is needed to allow C++ programs to link without generating
warnings.  Its not uncommon in C++ for vtables to be populated by
function address whee the signature of the function is not known in the
compilation unit.  In this case clang declares the method as void(void)
and relies on the vtable caller casting the data back to the correct
signature.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=40412

Differential Revision: https://reviews.llvm.org/D62153

llvm-svn: 361678
---
 lld/test/wasm/signature-mismatch-unknown.ll | 19 ++++++++++++
 lld/wasm/Driver.cpp                         |  2 +-
 lld/wasm/InputFiles.cpp                     | 34 +++++++++++++--------
 lld/wasm/InputFiles.h                       |  9 +++++-
 lld/wasm/SymbolTable.cpp                    | 13 +++++---
 lld/wasm/SymbolTable.h                      |  3 +-
 lld/wasm/Symbols.h                          |  6 ++--
 7 files changed, 65 insertions(+), 21 deletions(-)
 create mode 100644 lld/test/wasm/signature-mismatch-unknown.ll

diff --git a/lld/test/wasm/signature-mismatch-unknown.ll b/lld/test/wasm/signature-mismatch-unknown.ll
new file mode 100644
index 0000000000000..65bb31511d1cf
--- /dev/null
+++ b/lld/test/wasm/signature-mismatch-unknown.ll
@@ -0,0 +1,19 @@
+; RUN: llc -filetype=obj %p/Inputs/ret32.ll -o %t.ret32.o
+; RUN: llc -filetype=obj %s -o %t.main.o
+; RUN: wasm-ld --fatal-warnings -o %t.wasm %t.ret32.o %t.main.o
+; RUN: wasm-ld --fatal-warnings -o %t.wasm %t.main.o %t.ret32.o
+
+target triple = "wasm32-unknown-unknown"
+
+; Function declartion with incorrect signature.
+declare dso_local void @ret32()
+
+; Simply taking the address of the function should *not* generate the
+; the signature mismatch warning.
+@ptr = dso_local global i8* bitcast (void ()* @ret32 to i8*), align 8
+
+define hidden void @_start() local_unnamed_addr {
+  %addr = load i32 ()*, i32 ()** bitcast (i8** @ptr to i32 ()**), align 8
+  call i32 %addr()
+  ret void
+}
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 4ac5aff2494f4..5d27ac4bec6d1 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -550,7 +550,7 @@ struct WrappedSymbol {
 };
 
 static Symbol *addUndefined(StringRef Name) {
-  return Symtab->addUndefinedFunction(Name, "", "", 0, nullptr, nullptr);
+  return Symtab->addUndefinedFunction(Name, "", "", 0, nullptr, nullptr, false);
 }
 
 // Handles -wrap option.
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 185a865dcab11..a9d6abff4abf0 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -271,14 +271,16 @@ void ObjFile::parse(bool IgnoreComdats) {
     }
   }
 
-  // Find the code and data sections.  Wasm objects can have at most one code
-  // and one data section.
   uint32_t SectionIndex = 0;
+  SymbolIsCalledDirectly.resize(WasmObj->getNumberOfSymbols(), false);
   for (const SectionRef &Sec : WasmObj->sections()) {
     const WasmSection &Section = WasmObj->getWasmSection(Sec);
+    // Wasm objects can have at most one code and one data section.
     if (Section.Type == WASM_SEC_CODE) {
+      assert(!CodeSection);
       CodeSection = &Section;
     } else if (Section.Type == WASM_SEC_DATA) {
+      assert(!DataSection);
       DataSection = &Section;
     } else if (Section.Type == WASM_SEC_CUSTOM) {
       CustomSections.emplace_back(make<InputSection>(Section, this));
@@ -286,6 +288,11 @@ void ObjFile::parse(bool IgnoreComdats) {
       CustomSectionsByIndex[SectionIndex] = CustomSections.back();
     }
     SectionIndex++;
+    // Scans relocations to dermine determine if a function symbol is called
+    // directly
+    for (const WasmRelocation &Reloc : Section.Relocations)
+      if (Reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
+        SymbolIsCalledDirectly[Reloc.Index] = true;
   }
 
   TypeMap.resize(getWasmObj()->types().size());
@@ -326,10 +333,16 @@ void ObjFile::parse(bool IgnoreComdats) {
   Symbols.reserve(WasmObj->getNumberOfSymbols());
   for (const SymbolRef &Sym : WasmObj->symbols()) {
     const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl());
-    if (Symbol *Sym = createDefined(WasmSym))
-      Symbols.push_back(Sym);
-    else
-      Symbols.push_back(createUndefined(WasmSym));
+    if (WasmSym.isDefined()) {
+      // createDefined may fail if the symbol is comdat excluded in which case
+      // we fall back to creating an undefined symbol
+      if (Symbol *D = createDefined(WasmSym)) {
+        Symbols.push_back(D);
+        continue;
+      }
+    }
+    size_t Idx = Symbols.size();
+    Symbols.push_back(createUndefined(WasmSym, SymbolIsCalledDirectly[Idx]));
   }
 }
 
@@ -361,9 +374,6 @@ DataSymbol *ObjFile::getDataSymbol(uint32_t Index) const {
 }
 
 Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
-  if (!Sym.isDefined())
-    return nullptr;
-
   StringRef Name = Sym.Info.Name;
   uint32_t Flags = Sym.Info.Flags;
 
@@ -417,7 +427,7 @@ Symbol *ObjFile::createDefined(const WasmSymbol &Sym) {
   llvm_unreachable("unknown symbol kind");
 }
 
-Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
+Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, bool IsCalledDirectly) {
   StringRef Name = Sym.Info.Name;
   uint32_t Flags = Sym.Info.Flags;
 
@@ -425,7 +435,7 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &Sym) {
   case WASM_SYMBOL_TYPE_FUNCTION:
     return Symtab->addUndefinedFunction(Name, Sym.Info.ImportName,
                                         Sym.Info.ImportModule, Flags, this,
-                                        Sym.Signature);
+                                        Sym.Signature, IsCalledDirectly);
   case WASM_SYMBOL_TYPE_DATA:
     return Symtab->addUndefinedData(Name, Flags, this);
   case WASM_SYMBOL_TYPE_GLOBAL:
@@ -499,7 +509,7 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
   if (ObjSym.isUndefined() || ExcludedByComdat) {
     if (ObjSym.isExecutable())
       return Symtab->addUndefinedFunction(Name, Name, DefaultModule, Flags, &F,
-                                          nullptr);
+                                          nullptr, true);
     return Symtab->addUndefinedData(Name, Flags, &F);
   }
 
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index 7c8601e29f2a8..64ac208daa6f7 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -69,6 +69,13 @@ class InputFile {
 
   // List of all symbols referenced or defined by this file.
   std::vector<Symbol *> Symbols;
+  // Bool for each symbol, true if called directly.  This allows us to implement
+  // a weaker form of signature checking where undefined functions that are not
+  // called directly (i.e. only address taken) don't have to match the defined
+  // function's signature.  We cannot do this for directly called functions
+  // because those signatures are checked at validation times.
+  // See https://bugs.llvm.org/show_bug.cgi?id=40412
+  std::vector<bool> SymbolIsCalledDirectly;
 
 private:
   const Kind FileKind;
@@ -138,7 +145,7 @@ class ObjFile : public InputFile {
 
 private:
   Symbol *createDefined(const WasmSymbol &Sym);
-  Symbol *createUndefined(const WasmSymbol &Sym);
+  Symbol *createUndefined(const WasmSymbol &Sym, bool IsCalledDirectly);
 
   bool isExcludedByComdat(InputChunk *Chunk) const;
 
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 5328e9e784523..ae424749ffc63 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -286,7 +286,11 @@ Symbol *SymbolTable::addDefinedFunction(StringRef Name, uint32_t Flags,
     return S;
   }
 
-  if (Function && !signatureMatches(ExistingFunction, &Function->Signature)) {
+  bool CheckSig = true;
+  if (auto UD = dyn_cast<UndefinedFunction>(ExistingFunction))
+    CheckSig = UD->IsCalledDirectly;
+
+  if (CheckSig && Function && !signatureMatches(ExistingFunction, &Function->Signature)) {
     Symbol* Variant;
     if (getFunctionVariant(S, &Function->Signature, File, &Variant))
       // New variant, always replace
@@ -384,7 +388,8 @@ Symbol *SymbolTable::addDefinedEvent(StringRef Name, uint32_t Flags,
 Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
                                           StringRef ImportModule,
                                           uint32_t Flags, InputFile *File,
-                                          const WasmSignature *Sig) {
+                                          const WasmSignature *Sig,
+                                          bool IsCalledDirectly) {
   LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name <<
              " [" << (Sig ? toString(*Sig) : "none") << "]\n");
 
@@ -396,7 +401,7 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
 
   auto Replace = [&]() {
     replaceSymbol<UndefinedFunction>(S, Name, ImportName, ImportModule, Flags,
-                                     File, Sig);
+                                     File, Sig, IsCalledDirectly);
   };
 
   if (WasInserted)
@@ -409,7 +414,7 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
       reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION);
       return S;
     }
-    if (!signatureMatches(ExistingFunction, Sig))
+    if (IsCalledDirectly && !signatureMatches(ExistingFunction, Sig))
       if (getFunctionVariant(S, Sig, File, &S))
         Replace();
   }
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index d6cb13b43f8a3..8c96c616330e2 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -63,7 +63,8 @@ class SymbolTable {
 
   Symbol *addUndefinedFunction(StringRef Name, StringRef ImportName,
                                StringRef ImportModule, uint32_t Flags,
-                               InputFile *File, const WasmSignature *Signature);
+                               InputFile *File, const WasmSignature *Signature,
+                               bool IsCalledDirectly);
   Symbol *addUndefinedData(StringRef Name, uint32_t Flags, InputFile *File);
   Symbol *addUndefinedGlobal(StringRef Name, StringRef ImportName,
                              StringRef ImportModule,  uint32_t Flags,
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 3691b3f48b43b..98ff155fc5a68 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -194,9 +194,10 @@ class UndefinedFunction : public FunctionSymbol {
   UndefinedFunction(StringRef Name, StringRef ImportName,
                     StringRef ImportModule, uint32_t Flags,
                     InputFile *File = nullptr,
-                    const WasmSignature *Type = nullptr)
+                    const WasmSignature *Type = nullptr,
+                    bool IsCalledDirectly = true)
       : FunctionSymbol(Name, UndefinedFunctionKind, Flags, File, Type),
-        ImportName(ImportName), ImportModule(ImportModule) {}
+        ImportName(ImportName), ImportModule(ImportModule), IsCalledDirectly(IsCalledDirectly) {}
 
   static bool classof(const Symbol *S) {
     return S->kind() == UndefinedFunctionKind;
@@ -204,6 +205,7 @@ class UndefinedFunction : public FunctionSymbol {
 
   StringRef ImportName;
   StringRef ImportModule;
+  bool IsCalledDirectly;
 };
 
 // Section symbols for output sections are different from those for input

From 97d668d70f526f6fc4406053193502b83eec688b Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Fri, 24 May 2019 23:08:45 +0000
Subject: [PATCH 0211/1176] [GlobalISel][AArch64] Make FP constraint checks
 consider possible use/def banks

In a few places in getInstrMapping, we check if use/def instructions for the
instruction we're mapping have floating point constraints.

We can improve this check and reduce the number of copies in GISel-compiled code
if we make a couple observations:

- For a def instruction, it only matters if the def instruction must always
  output a value stored on a FPR

- For a use instruction, it only matters if the use instruction must always
  only take in values stored in FPRs

This adds two new functions:

- onlyUsesFP
- onlyDefinesFP

Then we can use those when we're checking the uses/defs instead.

Without this patch, the load, unmerge, store, and select in the added test
would have unnecessary copies.

Differential Revision: https://reviews.llvm.org/D62426

llvm-svn: 361679
---
 .../AArch64/AArch64RegisterBankInfo.cpp       |  42 +++++--
 .../Target/AArch64/AArch64RegisterBankInfo.h  |   8 ++
 .../AArch64/GlobalISel/regbank-fp-use-def.mir | 104 ++++++++++++++++++
 3 files changed, 147 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir

diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 4dede2fe9f9f2..7c57d618f1a49 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -476,6 +476,35 @@ bool AArch64RegisterBankInfo::hasFPConstraints(
          &AArch64::FPRRegBank;
 }
 
+bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+                                         const MachineRegisterInfo &MRI,
+                                         const TargetRegisterInfo &TRI) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI:
+  case TargetOpcode::G_FCMP:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool AArch64RegisterBankInfo::onlyDefinesFP(
+    const MachineInstr &MI, const MachineRegisterInfo &MRI,
+    const TargetRegisterInfo &TRI) const {
+  switch (MI.getOpcode()) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return true;
+  default:
+    break;
+  }
+  return hasFPConstraints(MI, MRI, TRI);
+}
+
 const RegisterBankInfo::InstructionMapping &
 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const unsigned Opc = MI.getOpcode();
@@ -642,7 +671,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
         // assume this was a floating point load in the IR.
         // If it was not, we would have had a bitcast before
         // reaching that instruction.
-        if (hasFPConstraints(UseMI, MRI, TRI)) {
+        if (onlyUsesFP(UseMI, MRI, TRI)) {
           OpRegBankIdx[0] = PMI_FirstFPR;
           break;
         }
@@ -655,7 +684,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       if (!VReg)
         break;
       MachineInstr *DefMI = MRI.getVRegDef(VReg);
-      if (hasFPConstraints(*DefMI, MRI, TRI))
+      if (onlyDefinesFP(*DefMI, MRI, TRI))
         OpRegBankIdx[0] = PMI_FirstFPR;
       break;
     }
@@ -687,7 +716,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // fpr = G_FOO %z ...
     if (any_of(
             MRI.use_instructions(MI.getOperand(0).getReg()),
-            [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); }))
+            [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
       ++NumFP;
 
     // Check if the defs of the source values always produce floating point
@@ -707,7 +736,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       unsigned VReg = MI.getOperand(Idx).getReg();
       MachineInstr *DefMI = MRI.getVRegDef(VReg);
       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
-          hasFPConstraints(*DefMI, MRI, TRI))
+          onlyDefinesFP(*DefMI, MRI, TRI))
         ++NumFP;
     }
 
@@ -729,9 +758,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // UNMERGE into scalars from a vector should always use FPR.
     // Likewise if any of the uses are FP instructions.
     if (SrcTy.isVector() ||
-        any_of(
-            MRI.use_instructions(MI.getOperand(0).getReg()),
-            [&](MachineInstr &MI) { return hasFPConstraints(MI, MRI, TRI); })) {
+        any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+               [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
       // Set the register bank of every operand to FPR.
       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
            Idx < NumOperands; ++Idx)
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index cdcde1ec1bcfa..31bd36e971d27 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -117,6 +117,14 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
   bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
                      const TargetRegisterInfo &TRI) const;
 
+  /// Returns true if the source registers of \p MI must all be FPRs.
+  bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                  const TargetRegisterInfo &TRI) const;
+
+  /// Returns true if the destination register of \p MI must be a FPR.
+  bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+                     const TargetRegisterInfo &TRI) const;
+
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
new file mode 100644
index 0000000000000..57ddcc9b04065
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -0,0 +1,104 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect %s -o - | FileCheck %s
+
+# Check that we correctly assign register banks based off of instructions which
+# only use or only define FPRs.
+#
+# For example, G_SITOFP takes in a GPR, but only ever produces values on FPRs.
+# Some instructions can have inputs/outputs on either FPRs or GPRs. If one of
+# those instructions takes in the result of a G_SITOFP as a source, we should
+# put that source on a FPR.
+#
+# Similarly, G_FPTOSI can only take in a value on a FPR. So, if the result of
+# an instruction is consumed by a G_FPTOSI, we should put the instruction on
+# FPRs.
+
+---
+name:            load_only_uses_fp
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: load_only_uses_fp
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+    ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00
+    ; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4)
+    ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[C]](s32), [[LOAD]]
+    ; CHECK: $w0 = COPY [[FCMP]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = G_FCONSTANT float 2.0
+    %2:_(s32) = G_LOAD %0 :: (load 4)
+    %3:_(s32) = G_FCMP floatpred(uno), %1, %2
+    $w0 = COPY %3(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            unmerge_only_uses_fp
+
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: unmerge_only_uses_fp
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[UV:%[0-9]+]]:fpr(s32), [[UV1:%[0-9]+]]:fpr(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
+    ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(uno), [[UV]](s32), [[UV1]]
+    ; CHECK: $w0 = COPY [[FCMP]](s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %0:_(s64) = COPY $x0
+    %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(s64)
+    %3:_(s32) = G_FCMP floatpred(uno), %1, %2
+    $w0 = COPY %3(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            store_defined_by_fp
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0, $w1
+    ; CHECK-LABEL: name: store_defined_by_fp
+    ; CHECK: liveins: $x0, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1
+    ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
+    ; CHECK: G_STORE [[SITOFP]](s32), [[COPY]](p0) :: (store 4)
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = G_SITOFP %1
+    G_STORE %2, %0 :: (store 4)
+
+...
+---
+name:            select_defined_by_fp_using_fp
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; CHECK-LABEL: name: select_defined_by_fp_using_fp
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+    ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC %2(s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY $w2
+    ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[SITOFP]]
+    ; CHECK: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s1) = G_TRUNC %3(s32)
+    %2:_(s32) = COPY $w1
+    %3:_(s32) = COPY $w2
+    %4:_(s32) = G_SITOFP %2
+    %6:_(s32) = G_SELECT %1(s1), %3, %4
+    %8:_(s32) = G_FPTOSI %6

From fd42079255d04742e60954e98205a9212e2685ff Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 24 May 2019 23:26:07 +0000
Subject: [PATCH 0212/1176] Fix crash deserializing a CUDAKernelCallExpr with a
 +Asserts binary.

The assertion in setConfig read from the (uninitialized) CONFIG
expression.

llvm-svn: 361680
---
 clang/include/clang/AST/ExprCXX.h         | 16 ++--------------
 clang/lib/Serialization/ASTReaderStmt.cpp |  2 +-
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 4a6aa9014c9ec..551a677570ff9 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -216,6 +216,8 @@ class CXXMemberCallExpr final : public CallExpr {
 
 /// Represents a call to a CUDA kernel function.
 class CUDAKernelCallExpr final : public CallExpr {
+  friend class ASTStmtReader;
+
   enum { CONFIG, END_PREARG };
 
   // CUDAKernelCallExpr has some trailing objects belonging
@@ -241,20 +243,6 @@ class CUDAKernelCallExpr final : public CallExpr {
   }
   CallExpr *getConfig() { return cast_or_null<CallExpr>(getPreArg(CONFIG)); }
 
-  /// Sets the kernel configuration expression.
-  ///
-  /// Note that this method cannot be called if config has already been set to a
-  /// non-null value.
-  void setConfig(CallExpr *E) {
-    assert(!getConfig() &&
-           "Cannot call setConfig if config is not null");
-    setPreArg(CONFIG, E);
-    setInstantiationDependent(isInstantiationDependent() ||
-                              E->isInstantiationDependent());
-    setContainsUnexpandedParameterPack(containsUnexpandedParameterPack() ||
-                                       E->containsUnexpandedParameterPack());
-  }
-
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CUDAKernelCallExprClass;
   }
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index e0647fc6b82bd..4d879b46e1a4a 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1904,7 +1904,7 @@ void ASTStmtReader::VisitSEHTryStmt(SEHTryStmt *S) {
 
 void ASTStmtReader::VisitCUDAKernelCallExpr(CUDAKernelCallExpr *E) {
   VisitCallExpr(E);
-  E->setConfig(cast<CallExpr>(Record.readSubExpr()));
+  E->setPreArg(CUDAKernelCallExpr::CONFIG, Record.readSubExpr());
 }
 
 //===----------------------------------------------------------------------===//

From 192a7474d6bc93918043d6f47cf1ad294c711dde Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 24 May 2019 23:37:08 +0000
Subject: [PATCH 0213/1176] [CFG] Add branch to skip vbase inits when they're
 handled by superclass.

This patch adds the run-time CFG branch that would skip initialization of
virtual base classes depending on whether the constructor is called from a
superclass constructor or not. Previously the Static Analyzer was already
skipping virtual base-class initializers in such constructors, but it wasn't
skipping their arguments and their potential side effects, which was causing
pr41300 (and was generally incorrect). The previous skipping behavior is
now replaced with a hard assertion that we're not even getting there due
to how our CFG works.

The new CFG element is under a CFG build option so that not to break other
consumers of the CFG by this change. Static Analyzer support for this change
is implemented.

Differential Revision: https://reviews.llvm.org/D61816

llvm-svn: 361681
---
 .../clang/Analysis/AnalysisDeclContext.h      |   1 +
 clang/include/clang/Analysis/CFG.h            |  23 ++-
 .../Core/PathSensitive/CoreEngine.h           |   2 +
 clang/lib/Analysis/AnalysisDeclContext.cpp    |   3 +-
 clang/lib/Analysis/CFG.cpp                    |  38 ++++-
 .../StaticAnalyzer/Core/AnalysisManager.cpp   |   4 +-
 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp  |  28 ++++
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp |  27 ++--
 clang/test/Analysis/initializer.cpp           |  91 +++++++++++
 .../test/Analysis/initializers-cfg-output.cpp | 152 ++++++++++++++----
 10 files changed, 308 insertions(+), 61 deletions(-)

diff --git a/clang/include/clang/Analysis/AnalysisDeclContext.h b/clang/include/clang/Analysis/AnalysisDeclContext.h
index d42432a28d06a..86f331d26a9bb 100644
--- a/clang/include/clang/Analysis/AnalysisDeclContext.h
+++ b/clang/include/clang/Analysis/AnalysisDeclContext.h
@@ -459,6 +459,7 @@ class AnalysisDeclContextManager {
                              bool addCXXNewAllocator = true,
                              bool addRichCXXConstructors = true,
                              bool markElidedCXXConstructors = true,
+                             bool addVirtualBaseBranches = true,
                              CodeInjector *injector = nullptr);
 
   AnalysisDeclContext *getContext(const Decl *D);
diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 212fd1baef5d6..734c49881e3e5 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -504,15 +504,19 @@ class CFGTerminator {
     /// terminator statement is the same statement that branches control flow
     /// in evaluation of matching full expression.
     TemporaryDtorsBranch,
+    /// A shortcut around virtual base initializers. It gets taken when
+    /// virtual base classes have already been initialized by the constructor
+    /// of the most derived class while we're in the base class.
+    VirtualBaseBranch,
 
     /// Number of different kinds, for sanity checks. We subtract 1 so that
     /// to keep receiving compiler warnings when we don't cover all enum values
     /// in a switch.
-    NumKindsMinusOne = TemporaryDtorsBranch
+    NumKindsMinusOne = VirtualBaseBranch
   };
 
 private:
-  static constexpr int KindBits = 1;
+  static constexpr int KindBits = 2;
   static_assert((1 << KindBits) > NumKindsMinusOne,
                 "Not enough room for kind!");
   llvm::PointerIntPair<Stmt *, KindBits> Data;
@@ -532,6 +536,9 @@ class CFGTerminator {
   bool isTemporaryDtorsBranch() const {
     return getKind() == TemporaryDtorsBranch;
   }
+  bool isVirtualBaseBranch() const {
+    return getKind() == VirtualBaseBranch;
+  }
 };
 
 /// Represents a single basic block in a source-level CFG.
@@ -552,11 +559,12 @@ class CFGTerminator {
 /// Successors: the order in the set of successors is NOT arbitrary.  We
 ///  currently have the following orderings based on the terminator:
 ///
-///     Terminator       Successor Ordering
-///  -----------------------------------------------------
-///       if            Then Block;  Else Block
-///     ? operator      LHS expression;  RHS expression
-///     &&, ||          expression that uses result of && or ||, RHS
+///     Terminator     |   Successor Ordering
+///  ------------------|------------------------------------
+///       if           |  Then Block;  Else Block
+///     ? operator     |  LHS expression;  RHS expression
+///     logical and/or |  expression that consumes the op, RHS
+///     vbase inits    |  already handled by the most derived class; not yet
 ///
 /// But note that any of that may be NULL in case of optimized-out edges.
 class CFGBlock {
@@ -1039,6 +1047,7 @@ class CFG {
     bool AddCXXDefaultInitExprInCtors = false;
     bool AddRichCXXConstructors = false;
     bool MarkElidedCXXConstructors = false;
+    bool AddVirtualBaseBranches = false;
 
     BuildOptions() = default;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
index 310c2a43aa412..019acc0b7d9f8 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
@@ -116,6 +116,8 @@ class CoreEngine {
   void HandleStaticInit(const DeclStmt *DS, const CFGBlock *B,
                         ExplodedNode *Pred);
 
+  void HandleVirtualBaseBranch(const CFGBlock *B, ExplodedNode *Pred);
+
 private:
   ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
                                           const ReturnStmt *RS);
diff --git a/clang/lib/Analysis/AnalysisDeclContext.cpp b/clang/lib/Analysis/AnalysisDeclContext.cpp
index f32c9f903f4a1..750d9bb1202ae 100644
--- a/clang/lib/Analysis/AnalysisDeclContext.cpp
+++ b/clang/lib/Analysis/AnalysisDeclContext.cpp
@@ -70,7 +70,7 @@ AnalysisDeclContextManager::AnalysisDeclContextManager(
     bool addLoopExit, bool addScopes, bool synthesizeBodies,
     bool addStaticInitBranch, bool addCXXNewAllocator,
     bool addRichCXXConstructors, bool markElidedCXXConstructors,
-    CodeInjector *injector)
+    bool addVirtualBaseBranches, CodeInjector *injector)
     : Injector(injector), FunctionBodyFarm(ASTCtx, injector),
       SynthesizeBodies(synthesizeBodies) {
   cfgBuildOptions.PruneTriviallyFalseEdges = !useUnoptimizedCFG;
@@ -84,6 +84,7 @@ AnalysisDeclContextManager::AnalysisDeclContextManager(
   cfgBuildOptions.AddCXXNewAllocator = addCXXNewAllocator;
   cfgBuildOptions.AddRichCXXConstructors = addRichCXXConstructors;
   cfgBuildOptions.MarkElidedCXXConstructors = markElidedCXXConstructors;
+  cfgBuildOptions.AddVirtualBaseBranches = addVirtualBaseBranches;
 }
 
 void AnalysisDeclContextManager::clear() { Contexts.clear(); }
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 915e5cc222f5b..5d50cfb474e19 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -1431,13 +1431,41 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
   if (badCFG)
     return nullptr;
 
-  // For C++ constructor add initializers to CFG.
-  if (const CXXConstructorDecl *CD = dyn_cast_or_null<CXXConstructorDecl>(D)) {
+  // For C++ constructor add initializers to CFG. Constructors of virtual bases
+  // are ignored unless the object is of the most derived class.
+  //   class VBase { VBase() = default; VBase(int) {} };
+  //   class A : virtual public VBase { A() : VBase(0) {} };
+  //   class B : public A {};
+  //   B b; // Constructor calls in order: VBase(), A(), B().
+  //        // VBase(0) is ignored because A isn't the most derived class.
+  // This may result in the virtual base(s) being already initialized at this
+  // point, in which case we should jump right onto non-virtual bases and
+  // fields. To handle this, make a CFG branch. We only need to add one such
+  // branch per constructor, since the Standard states that all virtual bases
+  // shall be initialized before non-virtual bases and direct data members.
+  if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(D)) {
+    CFGBlock *VBaseSucc = nullptr;
     for (auto *I : llvm::reverse(CD->inits())) {
+      if (BuildOpts.AddVirtualBaseBranches && !VBaseSucc &&
+          I->isBaseInitializer() && I->isBaseVirtual()) {
+        // We've reached the first virtual base init while iterating in reverse
+        // order. Make a new block for virtual base initializers so that we
+        // could skip them.
+        VBaseSucc = Succ = B ? B : &cfg->getExit();
+        Block = createBlock();
+      }
       B = addInitializer(I);
       if (badCFG)
         return nullptr;
     }
+    if (VBaseSucc) {
+      // Make a branch block for potentially skipping virtual base initializers.
+      Succ = VBaseSucc;
+      B = createBlock();
+      B->setTerminator(
+          CFGTerminator(nullptr, CFGTerminator::VirtualBaseBranch));
+      addSuccessor(B, Block, true);
+    }
   }
 
   if (B)
@@ -1769,6 +1797,9 @@ void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) {
 
   // At the end destroy virtual base objects.
   for (const auto &VI : RD->vbases()) {
+    // TODO: Add a VirtualBaseBranch to see if the most derived class
+    // (which is different from the current class) is responsible for
+    // destroying them.
     const CXXRecordDecl *CD = VI.getType()->getAsCXXRecordDecl();
     if (!CD->hasTrivialDestructor()) {
       autoCreateBlock();
@@ -5066,6 +5097,9 @@ class CFGBlockTerminatorPrint
       OS << "(Temp Dtor) ";
       Visit(T.getStmt());
       break;
+    case CFGTerminator::VirtualBaseBranch:
+      OS << "(See if most derived ctor has already initialized vbases)";
+      break;
     }
   }
 };
diff --git a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
index 2e69c2c43b625..95f2b703cdd65 100644
--- a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
@@ -35,7 +35,9 @@ AnalysisManager::AnalysisManager(ASTContext &ASTCtx, DiagnosticsEngine &diags,
           Options.ShouldConditionalizeStaticInitializers,
           /*addCXXNewAllocator=*/true,
           Options.ShouldIncludeRichConstructorsInCFG,
-          Options.ShouldElideConstructors, injector),
+          Options.ShouldElideConstructors,
+          /*addVirtualBaseBranches=*/true,
+          injector),
       Ctx(ASTCtx), Diags(diags), LangOpts(ASTCtx.getLangOpts()),
       PathConsumers(PDC), CreateStoreMgr(storemgr),
       CreateConstraintMgr(constraintmgr), CheckerMgr(checkerMgr),
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index ca9a48ef9808c..500995b053ef9 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -380,6 +380,11 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
     }
   }
 
+  if (B->getTerminator().isVirtualBaseBranch()) {
+    HandleVirtualBaseBranch(B, Pred);
+    return;
+  }
+
   assert(B->succ_size() == 1 &&
          "Blocks with no terminator should have at most 1 successor.");
 
@@ -439,6 +444,29 @@ void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx,
   }
 }
 
+void CoreEngine::HandleVirtualBaseBranch(const CFGBlock *B,
+                                         ExplodedNode *Pred) {
+  const LocationContext *LCtx = Pred->getLocationContext();
+  if (const auto *CallerCtor = dyn_cast_or_null<CXXConstructExpr>(
+          LCtx->getStackFrame()->getCallSite())) {
+    switch (CallerCtor->getConstructionKind()) {
+    case CXXConstructExpr::CK_NonVirtualBase:
+    case CXXConstructExpr::CK_VirtualBase: {
+      BlockEdge Loc(B, *B->succ_begin(), LCtx);
+      HandleBlockEdge(Loc, Pred);
+      return;
+    }
+    default:
+      break;
+    }
+  }
+
+  // We either don't see a parent stack frame because we're in the top frame,
+  // or the parent stack frame doesn't initialize our virtual bases.
+  BlockEdge Loc(B, *(B->succ_begin() + 1), LCtx);
+  HandleBlockEdge(Loc, Pred);
+}
+
 /// generateNode - Utility method to generate nodes, hook up successors,
 ///  and add nodes to the worklist.
 void CoreEngine::generateNode(const ProgramPoint &Loc,
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 62699fb3186b5..1cbd09ea57932 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -428,25 +428,20 @@ void ExprEngine::VisitCXXConstructExpr(const CXXConstructExpr *CE,
         prepareForObjectConstruction(CE, State, LCtx, CC, CallOpts);
     break;
   }
-  case CXXConstructExpr::CK_VirtualBase:
+  case CXXConstructExpr::CK_VirtualBase: {
     // Make sure we are not calling virtual base class initializers twice.
     // Only the most-derived object should initialize virtual base classes.
-    if (const Stmt *Outer = LCtx->getStackFrame()->getCallSite()) {
-      const CXXConstructExpr *OuterCtor = dyn_cast<CXXConstructExpr>(Outer);
-      if (OuterCtor) {
-        switch (OuterCtor->getConstructionKind()) {
-        case CXXConstructExpr::CK_NonVirtualBase:
-        case CXXConstructExpr::CK_VirtualBase:
-          // Bail out!
-          destNodes.Add(Pred);
-          return;
-        case CXXConstructExpr::CK_Complete:
-        case CXXConstructExpr::CK_Delegating:
-          break;
-        }
-      }
-    }
+    const auto *OuterCtor = dyn_cast_or_null<CXXConstructExpr>(
+        LCtx->getStackFrame()->getCallSite());
+    assert(
+        (!OuterCtor ||
+         OuterCtor->getConstructionKind() == CXXConstructExpr::CK_Complete ||
+         OuterCtor->getConstructionKind() == CXXConstructExpr::CK_Delegating) &&
+        ("This virtual base should have already been initialized by "
+         "the most derived class!"));
+    (void)OuterCtor;
     LLVM_FALLTHROUGH;
+  }
   case CXXConstructExpr::CK_NonVirtualBase:
     // In C++17, classes with non-virtual bases may be aggregates, so they would
     // be initialized as aggregates without a constructor call, so we may have
diff --git a/clang/test/Analysis/initializer.cpp b/clang/test/Analysis/initializer.cpp
index 56b0a09d47455..16d7a348fdfb6 100644
--- a/clang/test/Analysis/initializer.cpp
+++ b/clang/test/Analysis/initializer.cpp
@@ -275,3 +275,94 @@ B foo_recursive() {
   B b { foo_recursive() };
 }
 } // namespace CXX17_transparent_init_list_exprs
+
+namespace skip_vbase_initializer_side_effects {
+int glob;
+struct S {
+  S() { ++glob; }
+};
+
+struct A {
+  A() {}
+  A(S s) {}
+};
+
+struct B : virtual A {
+  B() : A(S()) {}
+};
+
+struct C : B {
+  C() {}
+};
+
+void foo() {
+  glob = 0;
+  B b;
+  clang_analyzer_eval(glob == 1); // expected-warning{{TRUE}}
+  C c; // no-crash
+  clang_analyzer_eval(glob == 1); // expected-warning{{TRUE}}
+}
+} // namespace skip_vbase_initializer_side_effects
+
+namespace dont_skip_vbase_initializers_in_most_derived_class {
+struct A {
+  static int a;
+  A() { a = 0; }
+  A(int x) { a = x; }
+};
+
+struct B {
+  static int b;
+  B() { b = 0; }
+  B(int y) { b = y; }
+};
+
+struct C : virtual A {
+  C() : A(1) {}
+};
+struct D : C, virtual B {
+  D() : B(2) {}
+};
+
+void testD() {
+  D d;
+  clang_analyzer_eval(A::a == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
+}
+
+struct E : virtual B, C {
+  E() : B(2) {}
+};
+
+void testE() {
+  E e;
+  clang_analyzer_eval(A::a == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
+}
+
+struct F : virtual A, virtual B {
+  F() : A(1) {}
+};
+struct G : F {
+  G(): B(2) {}
+};
+
+void testG() {
+  G g;
+  clang_analyzer_eval(A::a == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
+}
+
+struct H : virtual B, virtual A {
+  H(): A(1) {}
+};
+struct I : H {
+  I(): B(2) {}
+};
+
+void testI() {
+  I i;
+  clang_analyzer_eval(A::a == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
+}
+} // namespace dont_skip_vbase_initializers_in_most_derived_class
diff --git a/clang/test/Analysis/initializers-cfg-output.cpp b/clang/test/Analysis/initializers-cfg-output.cpp
index a69e78faeda08..f83386492656a 100644
--- a/clang/test/Analysis/initializers-cfg-output.cpp
+++ b/clang/test/Analysis/initializers-cfg-output.cpp
@@ -30,21 +30,25 @@ class A {
 class B : public virtual A {
 public:
   // CHECK:       B()
-  // CHECK:        [B2 (ENTRY)]
-  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B3 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B2
   // CHECK:        [B1]
   // WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
   // ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
   // CHECK-NEXT:     2: A([B1.1]) (Base initializer)
   // CHECK-NEXT:     Preds (1): B2
   // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B2]
+  // CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+  // CHECK-NEXT:     Preds (1): B3
+  // CHECK-NEXT:     Succs (2): B0 B1
   // CHECK:        [B0 (EXIT)]
-  // CHECK-NEXT:     Preds (1): B1
+  // CHECK-NEXT:     Preds (2): B1 B2
   B() {}
 
   // CHECK:       B(int i)
-  // CHECK:        [B2 (ENTRY)]
-  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B3 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B2
   // CHECK:        [B1]
   // CHECK-NEXT:     1: i
   // CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, LValueToRValue, int)
@@ -53,29 +57,37 @@ class B : public virtual A {
   // CHECK-NEXT:     4: A([B1.3]) (Base initializer)
   // CHECK-NEXT:     Preds (1): B2
   // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B2]
+  // CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+  // CHECK-NEXT:     Preds (1): B3
+  // CHECK-NEXT:     Succs (2): B0 B1
   // CHECK:        [B0 (EXIT)]
-  // CHECK-NEXT:     Preds (1): B1
+  // CHECK-NEXT:     Preds (2): B1 B2
   B(int i) : A(i) {}
 };
 
 class C : public virtual A {
 public:
   // CHECK:       C()
-  // CHECK:        [B2 (ENTRY)]
-  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B3 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B2
   // CHECK:        [B1]
   // WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
   // ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
   // CHECK-NEXT:     2: A([B1.1]) (Base initializer)
   // CHECK-NEXT:     Preds (1): B2
   // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B2]
+  // CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+  // CHECK-NEXT:     Preds (1): B3
+  // CHECK-NEXT:     Succs (2): B0 B1
   // CHECK:        [B0 (EXIT)]
-  // CHECK-NEXT:     Preds (1): B1
+  // CHECK-NEXT:     Preds (2): B1 B2
   C() {}
 
   // CHECK:       C(int i)
-  // CHECK:        [B2 (ENTRY)]
-  // CHECK-NEXT:     Succs (1): B1
+  // CHECK:        [B3 (ENTRY)]
+  // CHECK-NEXT:     Succs (1): B2
   // CHECK:        [B1]
   // CHECK-NEXT:     1: i
   // CHECK-NEXT:     2: [B1.1] (ImplicitCastExpr, LValueToRValue, int)
@@ -84,8 +96,12 @@ class C : public virtual A {
   // CHECK-NEXT:     4: A([B1.3]) (Base initializer)
   // CHECK-NEXT:     Preds (1): B2
   // CHECK-NEXT:     Succs (1): B0
+  // CHECK:        [B2]
+  // CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+  // CHECK-NEXT:     Preds (1): B3
+  // CHECK-NEXT:     Succs (2): B0 B1
   // CHECK:        [B0 (EXIT)]
-  // CHECK-NEXT:     Preds (1): B1
+  // CHECK-NEXT:     Preds (2): B1 B2
   C(int i) : A(i) {}
 };
 
@@ -98,31 +114,38 @@ class TestOrder : public C, public B, public A {
 };
 
 // CHECK:       TestOrder::TestOrder()
-// CHECK:        [B2 (ENTRY)]
-// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B4 (ENTRY)]
+// CHECK-NEXT:     Succs (1): B3
 // CHECK:        [B1]
+// WARNINGS-NEXT:     1:  (CXXConstructExpr, class C)
+// ANALYZER-NEXT:     1:  (CXXConstructExpr, C() (Base initializer), class C)
+// CHECK-NEXT:     2: C([B1.1]) (Base initializer)
+// WARNINGS-NEXT:     3:  (CXXConstructExpr, class B)
+// ANALYZER-NEXT:     3:  (CXXConstructExpr, B() (Base initializer), class B)
+// CHECK-NEXT:     4: B([B1.3]) (Base initializer)
+// WARNINGS-NEXT:     5:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:     5:  (CXXConstructExpr, A() (Base initializer), class A)
+// CHECK-NEXT:     6: A([B1.5]) (Base initializer)
+// CHECK-NEXT:     7: /*implicit*/(int)0
+// CHECK-NEXT:     8: i([B1.7]) (Member initializer)
+// CHECK-NEXT:     9: this
+// CHECK-NEXT:    10: [B1.9]->i
+// CHECK-NEXT:    11: r([B1.10]) (Member initializer)
+// WARNINGS-NEXT:    12:  (CXXConstructExpr, class A)
+// ANALYZER-NEXT:    12:  (CXXConstructExpr, [B1.13], class A)
+// CHECK-NEXT:    13: A a;
+// CHECK-NEXT:     Preds (2): B2 B3
+// CHECK-NEXT:     Succs (1): B0
+// CHECK:        [B2]
 // WARNINGS-NEXT:     1:  (CXXConstructExpr, class A)
 // ANALYZER-NEXT:     1:  (CXXConstructExpr, A() (Base initializer), class A)
-// CHECK-NEXT:     2: A([B1.1]) (Base initializer)
-// WARNINGS-NEXT:     3:  (CXXConstructExpr, class C)
-// ANALYZER-NEXT:     3:  (CXXConstructExpr, C() (Base initializer), class C)
-// CHECK-NEXT:     4: C([B1.3]) (Base initializer)
-// WARNINGS-NEXT:     5:  (CXXConstructExpr, class B)
-// ANALYZER-NEXT:     5:  (CXXConstructExpr, B() (Base initializer), class B)
-// CHECK-NEXT:     6: B([B1.5]) (Base initializer)
-// WARNINGS-NEXT:     7:  (CXXConstructExpr, class A)
-// ANALYZER-NEXT:     7:  (CXXConstructExpr, A() (Base initializer), class A)
-// CHECK-NEXT:     8: A([B1.7]) (Base initializer)
-// CHECK-NEXT:     9: /*implicit*/(int)0
-// CHECK-NEXT:    10: i([B1.9]) (Member initializer)
-// CHECK-NEXT:    11: this
-// CHECK-NEXT:    12: [B1.11]->i
-// CHECK-NEXT:    13: r([B1.12]) (Member initializer)
-// WARNINGS-NEXT:    14:  (CXXConstructExpr, class A)
-// ANALYZER-NEXT:    14:  (CXXConstructExpr, [B1.15], class A)
-// CHECK-NEXT:    15: A a;
-// CHECK-NEXT:     Preds (1): B2
-// CHECK-NEXT:     Succs (1): B0
+// CHECK-NEXT:     2: A([B2.1]) (Base initializer)
+// CHECK-NEXT:     Preds (1): B3
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B3]
+// CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+// CHECK-NEXT:     Preds (1): B4
+// CHECK-NEXT:     Succs (2): B1 B2
 // CHECK:        [B0 (EXIT)]
 // CHECK-NEXT:     Preds (1): B1
 TestOrder::TestOrder()
@@ -209,3 +232,64 @@ class TestDelegating {
   // CHECK-NEXT:     Preds (1): B1
   TestDelegating(int x, int z) : x(x), z(z) {}
 };
+
+class TestMoreControlFlow : public virtual A {
+  A a;
+
+public:
+  TestMoreControlFlow(bool coin);
+};
+
+// CHECK:       TestMoreControlFlow::TestMoreControlFlow(bool coin)
+// CHECK:        [B10 (ENTRY)]
+// CHECK-NEXT:     Succs (1): B9
+// CHECK:        [B1]
+// CHECK-NEXT:     1: [B4.2] ? [B2.1] : [B3.1]
+// WARNINGS-NEXT:     2: [B1.1] (CXXConstructExpr, class A)
+// ANALYZER-NEXT:     2: [B1.1] (CXXConstructExpr, a([B1.1]) (Member initializer), class A)
+// CHECK-NEXT:     3: a([B1.2]) (Member initializer)
+// CHECK-NEXT:     Preds (2): B2 B3
+// CHECK-NEXT:     Succs (1): B0
+// CHECK:        [B2]
+// CHECK-NEXT:     1: 3
+// CHECK-NEXT:     Preds (1): B4
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B3]
+// CHECK-NEXT:     1: 4
+// CHECK-NEXT:     Preds (1): B4
+// CHECK-NEXT:     Succs (1): B1
+// CHECK:        [B4]
+// CHECK-NEXT:     1: coin
+// CHECK-NEXT:     2: [B4.1] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B4.2] ? ... : ...
+// CHECK-NEXT:     Preds (2): B5 B9
+// CHECK-NEXT:     Succs (2): B2 B3
+// CHECK:        [B5]
+// CHECK-NEXT:     1: [B8.2] ? [B6.1] : [B7.1]
+// WARNINGS-NEXT:     2: [B5.1] (CXXConstructExpr, class A)
+// ANALYZER-NEXT:     2: [B5.1] (CXXConstructExpr, A([B5.1]) (Base initializer), class A)
+// CHECK-NEXT:     3: A([B5.2]) (Base initializer)
+// CHECK-NEXT:     Preds (2): B6 B7
+// CHECK-NEXT:     Succs (1): B4
+// CHECK:        [B6]
+// CHECK-NEXT:     1: 1
+// CHECK-NEXT:     Preds (1): B8
+// CHECK-NEXT:     Succs (1): B5
+// CHECK:        [B7]
+// CHECK-NEXT:     1: 2
+// CHECK-NEXT:     Preds (1): B8
+// CHECK-NEXT:     Succs (1): B5
+// CHECK:        [B8]
+// CHECK-NEXT:     1: coin
+// CHECK-NEXT:     2: [B8.1] (ImplicitCastExpr, LValueToRValue, _Bool)
+// CHECK-NEXT:     T: [B8.2] ? ... : ...
+// CHECK-NEXT:     Preds (1): B9
+// CHECK-NEXT:     Succs (2): B6 B7
+// CHECK:        [B9]
+// CHECK-NEXT:     T: (See if most derived ctor has already initialized vbases)
+// CHECK-NEXT:     Preds (1): B10
+// CHECK-NEXT:     Succs (2): B4 B8
+// CHECK:        [B0 (EXIT)]
+// CHECK-NEXT:     Preds (1): B1
+TestMoreControlFlow::TestMoreControlFlow(bool coin)
+    : A(coin ? 1 : 2), a(coin ? 3 : 4) {}

From ef0aab3138ac8a57c370623c32854f797713dda2 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 24 May 2019 23:37:11 +0000
Subject: [PATCH 0214/1176] [analyzer] Add a prunable note for skipping vbase
 inits in subclasses.

When initialization of virtual base classes is skipped, we now tell the user
about it, because this aspect of C++ isn't very well-known.

The implementation is based on the new "note tags" feature (r358781).
In order to make use of it, allow note tags to produce prunable notes,
and move the note tag factory to CoreEngine.

Differential Revision: https://reviews.llvm.org/D61817

llvm-svn: 361682
---
 .../Core/BugReporter/BugReporter.h            | 10 +++--
 .../Core/PathSensitive/CoreEngine.h           |  7 +++
 .../Core/PathSensitive/ExprEngine.h           |  4 +-
 .../Core/BugReporterVisitors.cpp              |  4 +-
 clang/lib/StaticAnalyzer/Core/CoreEngine.cpp  | 19 ++++++++
 .../StaticAnalyzer/Core/PathDiagnostic.cpp    | 10 ++++-
 .../test/Analysis/diagnostics/initializer.cpp | 44 +++++++++++++++++++
 7 files changed, 90 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Analysis/diagnostics/initializer.cpp

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
index 46b15d0c6f78c..4cccb38ce24fa 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
@@ -604,8 +604,10 @@ class NoteTag : public ProgramPointTag {
   static int Kind;
 
   const Callback Cb;
+  const bool IsPrunable;
 
-  NoteTag(Callback &&Cb) : ProgramPointTag(&Kind), Cb(std::move(Cb)) {}
+  NoteTag(Callback &&Cb, bool IsPrunable)
+      : ProgramPointTag(&Kind), Cb(std::move(Cb)), IsPrunable(IsPrunable) {}
 
 public:
   static bool classof(const ProgramPointTag *T) {
@@ -628,15 +630,17 @@ class NoteTag : public ProgramPointTag {
     return "Note Tag";
   }
 
+  bool isPrunable() const { return IsPrunable; }
+
   // Manage memory for NoteTag objects.
   class Factory {
     std::vector<std::unique_ptr<NoteTag>> Tags;
 
   public:
-    const NoteTag *makeNoteTag(Callback &&Cb) {
+    const NoteTag *makeNoteTag(Callback &&Cb, bool IsPrunable = false) {
       // We cannot use make_unique because we cannot access the private
       // constructor from inside it.
-      std::unique_ptr<NoteTag> T(new NoteTag(std::move(Cb)));
+      std::unique_ptr<NoteTag> T(new NoteTag(std::move(Cb), IsPrunable));
       Tags.push_back(std::move(T));
       return Tags.back().get();
     }
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
index 019acc0b7d9f8..278193ef99ede 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h
@@ -19,6 +19,7 @@
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
@@ -95,6 +96,10 @@ class CoreEngine {
   /// (This data is owned by AnalysisConsumer.)
   FunctionSummariesTy *FunctionSummaries;
 
+  /// Add path note tags along the path when we see that something interesting
+  /// is happening. This field is the allocator for such tags.
+  NoteTag::Factory NoteTags;
+
   void generateNode(const ProgramPoint &Loc,
                     ProgramStateRef State,
                     ExplodedNode *Pred);
@@ -194,6 +199,8 @@ class CoreEngine {
 
   /// Enqueue a single node created as a result of statement processing.
   void enqueueStmtNode(ExplodedNode *N, const CFGBlock *Block, unsigned Idx);
+
+  NoteTag::Factory &getNoteTags() { return NoteTags; }
 };
 
 // TODO: Turn into a class.
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index f0b01f182bf8c..8bc599a96a596 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -156,8 +156,6 @@ class ExprEngine : public SubEngine {
   /// The flag, which specifies the mode of inlining for the engine.
   InliningModes HowToInline;
 
-  NoteTag::Factory NoteTags;
-
 public:
   ExprEngine(cross_tu::CrossTranslationUnitContext &CTU, AnalysisManager &mgr,
              SetOfConstDecls *VisitedCalleesIn,
@@ -399,7 +397,7 @@ class ExprEngine : public SubEngine {
   SymbolManager &getSymbolManager() { return SymMgr; }
   MemRegionManager &getRegionManager() { return MRMgr; }
 
-  NoteTag::Factory &getNoteTags() { return NoteTags; }
+  NoteTag::Factory &getNoteTags() { return Engine.getNoteTags(); }
 
 
   // Functions for external checking of whether we have unfinished work
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index bc34472020c40..21320b1cdd884 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -2501,7 +2501,9 @@ TagVisitor::VisitNode(const ExplodedNode *N, BugReporterContext &BRC,
   if (Optional<std::string> Msg = T->generateMessage(BRC, R)) {
     PathDiagnosticLocation Loc =
         PathDiagnosticLocation::create(PP, BRC.getSourceManager());
-    return std::make_shared<PathDiagnosticEventPiece>(Loc, *Msg);
+    auto Piece = std::make_shared<PathDiagnosticEventPiece>(Loc, *Msg);
+    Piece->setPrunable(T->isPrunable());
+    return Piece;
   }
 
   return nullptr;
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 500995b053ef9..431d07dab1e18 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -216,6 +216,25 @@ void CoreEngine::HandleBlockEdge(const BlockEdge &L, ExplodedNode *Pred) {
                                            LC->getDecl(),
                                            LC->getCFG()->getNumBlockIDs());
 
+  // Display a prunable path note to the user if it's a virtual bases branch
+  // and we're taking the path that skips virtual base constructors.
+  if (L.getSrc()->getTerminator().isVirtualBaseBranch() &&
+      L.getDst() == *L.getSrc()->succ_begin()) {
+    ProgramPoint P = L.withTag(getNoteTags().makeNoteTag(
+        [](BugReporterContext &, BugReport &) -> std::string {
+          // TODO: Just call out the name of the most derived class
+          // when we know it.
+          return "Virtual base initialization skipped because "
+                 "it has already been handled by the most derived class";
+        }, /*IsPrunable=*/true));
+    // Perform the transition.
+    ExplodedNodeSet Dst;
+    NodeBuilder Bldr(Pred, Dst, BuilderCtx);
+    Pred = Bldr.generateNode(P, Pred->getState(), Pred);
+    if (!Pred)
+      return;
+  }
+
   // Check if we are entering the EXIT block.
   if (Blk == &(L.getLocationContext()->getCFG()->getExit())) {
     assert(L.getLocationContext()->getCFG()->getExit().empty() &&
diff --git a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index 9032068892100..b3008479fe358 100644
--- a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -724,7 +724,15 @@ PathDiagnosticLocation::create(const ProgramPoint& P,
   const Stmt* S = nullptr;
   if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
     const CFGBlock *BSrc = BE->getSrc();
-    S = BSrc->getTerminatorCondition();
+    if (BSrc->getTerminator().isVirtualBaseBranch()) {
+      // TODO: VirtualBaseBranches should also appear for destructors.
+      // In this case we should put the diagnostic at the end of decl.
+      return PathDiagnosticLocation::createBegin(
+          P.getLocationContext()->getDecl(), SMng);
+
+    } else {
+      S = BSrc->getTerminatorCondition();
+    }
   } else if (Optional<StmtPoint> SP = P.getAs<StmtPoint>()) {
     S = SP->getStmt();
     if (P.getAs<PostStmtPurgeDeadSymbols>())
diff --git a/clang/test/Analysis/diagnostics/initializer.cpp b/clang/test/Analysis/diagnostics/initializer.cpp
new file mode 100644
index 0000000000000..db744efd8cfeb
--- /dev/null
+++ b/clang/test/Analysis/diagnostics/initializer.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_analyze_cc1 -w -analyzer-checker=core -analyzer-output=text \
+// RUN:   -verify %s
+
+namespace note_on_skipped_vbases {
+struct A {
+  int x;
+  A() : x(0) {} // expected-note{{The value 0 is assigned to 'c.x'}}
+  A(int x) : x(x) {}
+};
+
+struct B : virtual A {
+  int y;
+  // This note appears only once, when this constructor is called from C.
+  // When this constructor is called from D, this note is still correct but
+  // it doesn't appear because it's pruned out because it's irrelevant to the
+  // bug report.
+  B(): // expected-note{{Virtual base initialization skipped because it has already been handled by the most derived class}}
+    A(1),
+    y(1 / x) // expected-warning{{Division by zero}}
+             // expected-note@-1{{Division by zero}}
+  {}
+};
+
+struct C : B {
+  C(): // expected-note{{Calling default constructor for 'A'}}
+       // expected-note@-1{{Returning from default constructor for 'A'}}
+    B() // expected-note{{Calling default constructor for 'B'}}
+  {}
+};
+
+void test_note() {
+  C c; // expected-note{{Calling default constructor for 'C'}}
+}
+
+struct D: B {
+  D() : A(1), B() {}
+};
+
+void test_prunability() {
+  D d;
+  1 / 0; // expected-warning{{Division by zero}}
+         // expected-note@-1{{Division by zero}}
+}
+} // namespace note_on_skipped_vbases

From a17564c2f1d2720fa1adbdafeea4fb2e0de817ac Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sat, 25 May 2019 00:07:22 +0000
Subject: [PATCH 0215/1176] llvm-dwarfdump: Don't error on mixed units
 using/not using str_offsets

This lead to errors when dumping binaries with v4 and v5 units linked
together (but could've also errored on v5 units that did/didn't use
str_offsets).

Also improves error handling and messages around invalid str_offsets
contributions.

llvm-svn: 361683
---
 llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h |   7 +-
 llvm/lib/DebugInfo/DWARF/DWARFContext.cpp     |   9 +-
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp        | 115 ++++++++-----
 .../X86/dwarfdump-str-offsets-invalid-1.s     |  37 -----
 .../X86/dwarfdump-str-offsets-invalid-2.s     |  39 -----
 .../X86/dwarfdump-str-offsets-invalid-3.s     |  93 -----------
 .../X86/dwarfdump-str-offsets-invalid-4.s     |  56 -------
 .../X86/dwarfdump-str-offsets-invalid-6.s     |   4 +-
 .../X86/dwarfdump-str-offsets-invalid.s       | 154 ++++++++++++++++++
 .../DebugInfo/X86/dwarfdump-str-offsets.s     |  21 +++
 10 files changed, 262 insertions(+), 273 deletions(-)
 delete mode 100644 llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-1.s
 delete mode 100644 llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-2.s
 delete mode 100644 llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-3.s
 delete mode 100644 llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-4.s
 create mode 100644 llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid.s

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 4e92df2fdb14f..f01b6ac03882c 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -175,6 +175,7 @@ struct StrOffsetsContributionDescriptor {
   StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
                                    uint8_t Version, dwarf::DwarfFormat Format)
       : Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
+  StrOffsetsContributionDescriptor() = default;
 
   uint8_t getVersion() const { return FormParams.Version; }
   dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
@@ -184,7 +185,7 @@ struct StrOffsetsContributionDescriptor {
   /// Determine whether a contribution to the string offsets table is
   /// consistent with the relevant section size and that its length is
   /// a multiple of the size of one of its entries.
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<StrOffsetsContributionDescriptor>
   validateContributionSize(DWARFDataExtractor &DA);
 };
 
@@ -249,14 +250,14 @@ class DWARFUnit {
   /// Find the unit's contribution to the string offsets table and determine its
   /// length and form. The given offset is expected to be derived from the unit
   /// DIE's DW_AT_str_offsets_base attribute.
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<Optional<StrOffsetsContributionDescriptor>>
   determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
 
   /// Find the unit's contribution to the string offsets table and determine its
   /// length and form. The given offset is expected to be 0 in a dwo file or,
   /// in a dwp file, the start of the unit's contribution to the string offsets
   /// table section (as determined by the index table).
-  Optional<StrOffsetsContributionDescriptor>
+  Expected<Optional<StrOffsetsContributionDescriptor>>
   determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
 
 public:
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 160a171176af0..09a42a66ed17e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -101,7 +101,8 @@ static ContributionCollection
 collectContributionData(DWARFContext::unit_iterator_range Units) {
   ContributionCollection Contributions;
   for (const auto &U : Units)
-    Contributions.push_back(U->getStringOffsetsTableContribution());
+    if (const auto &C = U->getStringOffsetsTableContribution())
+      Contributions.push_back(C);
   // Sort the contributions so that any invalid ones are placed at
   // the start of the contributions vector. This way they are reported
   // first.
@@ -157,9 +158,9 @@ static void dumpDWARFv5StringOffsetsSection(
 
     // Detect overlapping contributions.
     if (Offset > ContributionHeader) {
-      OS << "error: overlapping contributions to string offsets table in "
-            "section ."
-         << SectionName << ".\n";
+      WithColor::error()
+          << "overlapping contributions to string offsets table in section ."
+          << SectionName << ".\n";
       return;
     }
     // Report a gap in the table.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index fa165cf2d4022..94bfc8c148f68 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -435,12 +435,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
     // which may differ from the unit's format.
     DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
                           isLittleEndian, 0);
-    if (IsDWO)
-      StringOffsetsTableContribution =
-          determineStringOffsetsTableContributionDWO(DA);
-    else if (getVersion() >= 5)
-      StringOffsetsTableContribution =
-          determineStringOffsetsTableContribution(DA);
+    if (IsDWO || getVersion() >= 5) {
+      auto StringOffsetOrError =
+          IsDWO ? determineStringOffsetsTableContributionDWO(DA)
+                : determineStringOffsetsTableContribution(DA);
+      if (!StringOffsetOrError) {
+        WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: "
+                           << toString(StringOffsetOrError.takeError()) << '\n';
+      } else {
+        StringOffsetsTableContribution = *StringOffsetOrError;
+      }
+    }
 
     // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
     // describe address ranges.
@@ -758,7 +763,7 @@ llvm::Optional<object::SectionedAddress> DWARFUnit::getBaseAddress() {
   return BaseAddr;
 }
 
-Optional<StrOffsetsContributionDescriptor>
+Expected<StrOffsetsContributionDescriptor>
 StrOffsetsContributionDescriptor::validateContributionSize(
     DWARFDataExtractor &DA) {
   uint8_t EntrySize = getDwarfOffsetByteSize();
@@ -769,65 +774,94 @@ StrOffsetsContributionDescriptor::validateContributionSize(
   if (ValidationSize >= Size)
     if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
       return *this;
-  return None;
+  return createStringError(errc::invalid_argument, "length exceeds section size");
 }
 
 // Look for a DWARF64-formatted contribution to the string offsets table
 // starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
 parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 16))
-    return None;
+    return createStringError(errc::invalid_argument, "section offset exceeds section size");
 
   if (DA.getU32(&Offset) != 0xffffffff)
-    return None;
+    return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit");
 
   uint64_t Size = DA.getU64(&Offset);
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return {{Offset, Size - 4, Version, DWARF64}};
+  return StrOffsetsContributionDescriptor(Offset, Size - 4, Version, DWARF64);
 }
 
 // Look for a DWARF32-formatted contribution to the string offsets table
 // starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
 parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
   if (!DA.isValidOffsetForDataOfSize(Offset, 8))
-    return None;
+    return createStringError(errc::invalid_argument, "section offset exceeds section size");
+
   uint32_t ContributionSize = DA.getU32(&Offset);
   if (ContributionSize >= 0xfffffff0)
-    return None;
+    return createStringError(errc::invalid_argument, "invalid length");
+
   uint8_t Version = DA.getU16(&Offset);
   (void)DA.getU16(&Offset); // padding
   // The encoded length includes the 2-byte version field and the 2-byte
   // padding, so we need to subtract them out when we populate the descriptor.
-  return {{Offset, ContributionSize - 4, Version, DWARF32}};
+  return StrOffsetsContributionDescriptor(Offset, ContributionSize - 4, Version,
+                                          DWARF32);
 }
 
-Optional<StrOffsetsContributionDescriptor>
-DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
-  auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
-  Optional<StrOffsetsContributionDescriptor> Descriptor;
-  // Attempt to find a DWARF64 contribution 16 bytes before the base.
-  switch (Header.getFormat()) {
-  case dwarf::DwarfFormat::DWARF64:
+static Expected<StrOffsetsContributionDescriptor>
+parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA,
+                                   llvm::dwarf::DwarfFormat Format,
+                                   uint64_t Offset) {
+  StrOffsetsContributionDescriptor Desc;
+  switch (Format) {
+  case dwarf::DwarfFormat::DWARF64: {
     if (Offset < 16)
-      return None;
-    Descriptor =
-        parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+      return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix");
+    auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    Desc = *DescOrError;
     break;
-  case dwarf::DwarfFormat::DWARF32:
+  }
+  case dwarf::DwarfFormat::DWARF32: {
     if (Offset < 8)
-      return None;
-    Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+      return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix");
+    auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    Desc = *DescOrError;
     break;
   }
-  return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+  }
+  return Desc.validateContributionSize(DA);
 }
 
-Optional<StrOffsetsContributionDescriptor>
+Expected<Optional<StrOffsetsContributionDescriptor>>
+DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
+  uint64_t Offset;
+  if (IsDWO) {
+    Offset = 0;
+    if (DA.getData().data() == nullptr)
+      return None;
+  } else {
+    auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base));
+    if (!OptOffset)
+      return None;
+    Offset = *OptOffset;
+  }
+  auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+  if (!DescOrError)
+    return DescOrError.takeError();
+  return *DescOrError;
+}
+
+Expected<Optional<StrOffsetsContributionDescriptor>>
 DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
   uint64_t Offset = 0;
   auto IndexEntry = Header.getIndexEntry();
@@ -836,19 +870,24 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
   if (C)
     Offset = C->Offset;
   if (getVersion() >= 5) {
+    if (DA.getData().data() == nullptr)
+      return None;
+    Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
     // Look for a valid contribution at the given offset.
-    auto Descriptor =
-        parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset);
-    if (!Descriptor)
-      Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset);
-    return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+    auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+    if (!DescOrError)
+      return DescOrError.takeError();
+    return *DescOrError;
   }
   // Prior to DWARF v5, we derive the contribution size from the
   // index table (in a package file). In a .dwo file it is simply
   // the length of the string offsets section.
   if (!IndexEntry)
-    return {{0, StringOffsetSection.Data.size(), 4, DWARF32}};
+    return {
+        Optional<StrOffsetsContributionDescriptor>(
+            {0, StringOffsetSection.Data.size(), 4, DWARF32})};
   if (C)
-    return {{C->Offset, C->Length, 4, DWARF32}};
+    return {Optional<StrOffsetsContributionDescriptor>(
+        {C->Offset, C->Length, 4, DWARF32})};
   return None;
 }
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-1.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-1.s
deleted file mode 100644
index 180029202c5d2..0000000000000
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-1.s
+++ /dev/null
@@ -1,37 +0,0 @@
-# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
-# RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=INVALIDCONTRIB %s
-#
-# Test object to verify that llvm-dwarfdump handles an invalid string offsets
-# table.
-#
-# A rudimentary abbrev section.
-        .section .debug_abbrev,"",@progbits
-        .byte 0x01  # Abbrev code
-        .byte 0x11  # DW_TAG_compile_unit
-        .byte 0x00  # DW_CHILDREN_no
-        .byte 0x00  # EOM(1)
-        .byte 0x00  # EOM(2)
-        .byte 0x00  # EOM(3)
-
-# A rudimentary compile unit to convince dwarfdump that we are dealing with a 
-# DWARF v5 string offsets table.
-        .section .debug_info,"",@progbits
-
-# DWARF v5 CU header.
-        .long  CU1_5_end-CU1_5_version  # Length of Unit
-CU1_5_version:
-        .short 5               # DWARF version number
-        .byte 1                # DWARF Unit Type
-        .byte 8                # Address Size (in bytes)
-        .long .debug_abbrev    # Offset Into Abbrev. Section
-# A compile-unit DIE, which has no attributes.
-        .byte 1                # Abbreviation code
-CU1_5_end:
-
-        .section .debug_str_offsets,"",@progbits
-# A degenerate section, not enough for a single contribution size.
-        .byte 2
-
-# INVALIDCONTRIB:            .debug_str_offsets contents:
-# INVALIDCONTRIB-NOT:        contents:
-# INVALIDCONTRIB:            error: invalid contribution to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-2.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-2.s
deleted file mode 100644
index e8819628f35b8..0000000000000
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-2.s
+++ /dev/null
@@ -1,39 +0,0 @@
-# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
-# RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=INVALIDCONTRIB %s
-#
-# Test object to verify that llvm-dwarfdump handles an invalid string offsets
-# table.
-#
-# A rudimentary abbrev section.
-        .section .debug_abbrev,"",@progbits
-        .byte 0x01  # Abbrev code
-        .byte 0x11  # DW_TAG_compile_unit
-        .byte 0x00  # DW_CHILDREN_no
-        .byte 0x00  # EOM(1)
-        .byte 0x00  # EOM(2)
-        .byte 0x00  # EOM(3)
-
-# A rudimentary compile unit to convince dwarfdump that we are dealing with a
-# DWARF v5 string offsets table.
-        .section .debug_info,"",@progbits
-
-# DWARF v5 CU header.
-        .long  CU1_5_end-CU1_5_version  # Length of Unit
-CU1_5_version:
-        .short 5               # DWARF version number
-        .byte 1                # DWARF Unit Type
-        .byte 8                # Address Size (in bytes)
-        .long .debug_abbrev    # Offset Into Abbrev. Section
-# A compile-unit DIE, which has no attributes.
-        .byte 1                # Abbreviation code
-CU1_5_end:
-
-        .section .debug_str_offsets,"",@progbits
-# A degenerate section with fewer bytes than required for a DWARF64 size.
-        .long 0xffffffff
-        .long 0
-        .short 4
-
-# INVALIDCONTRIB:            .debug_str_offsets contents:
-# INVALIDCONTRIB-NOT:        contents:
-# INVALIDCONTRIB:            error: invalid contribution to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-3.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-3.s
deleted file mode 100644
index 07c7cfde13f0e..0000000000000
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-3.s
+++ /dev/null
@@ -1,93 +0,0 @@
-# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
-# RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=INVALIDCONTRIB %s
-#
-# Test object to verify that llvm-dwarfdump handles an invalid string offsets
-# table.
-
-        .section .debug_str,"MS",@progbits,1
-str_producer:
-        .asciz "Handmade DWARF producer"
-str_CU1:
-        .asciz "Compile_Unit_1"
-str_CU1_dir:
-        .asciz "/home/test/CU1"
-str_CU2:
-        .asciz "Compile_Unit_2"
-str_CU2_dir:
-        .asciz "/home/test/CU2"
-str_TU:
-        .asciz "Type_Unit"
-str_TU_type:
-        .asciz "MyStruct"
-
-        .section .debug_str.dwo,"MS",@progbits,1
-dwo_str_CU_5_producer:
-        .asciz "Handmade split DWARF producer"
-dwo_str_CU_5_name:
-        .asciz "V5_split_compile_unit"
-dwo_str_CU_5_comp_dir:
-        .asciz "/home/test/splitCU"
-dwo_str_TU_5:
-        .asciz "V5_split_type_unit"
-dwo_str_TU_5_type:
-        .asciz "V5_split_Mystruct"
-
-# A rudimentary abbrev section.
-        .section .debug_abbrev,"",@progbits
-        .byte 0x01  # Abbrev code
-        .byte 0x11  # DW_TAG_compile_unit
-        .byte 0x00  # DW_CHILDREN_no
-        .byte 0x72  # DW_AT_str_offsets_base
-        .byte 0x17  # DW_FORM_sec_offset
-        .byte 0x00  # EOM(1)
-        .byte 0x00  # EOM(2)
-        .byte 0x00  # EOM(3)
-
-# A rudimentary compile unit to convince dwarfdump that we are dealing with a
-# DWARF v5 string offsets table.
-        .section .debug_info,"",@progbits
-
-# DWARF v5 CU header.
-        .long  CU1_5_end-CU1_5_version  # Length of Unit
-CU1_5_version:
-        .short 5               # DWARF version number
-        .byte 1                # DWARF Unit Type
-        .byte 8                # Address Size (in bytes)
-        .long .debug_abbrev    # Offset Into Abbrev. Section
-# A compile-unit DIE, which has no attributes.
-        .byte 1                # Abbreviation code
-        .long .debug_str_offsets_base0
-CU1_5_end:
-
-        .section .debug_str_offsets,"",@progbits
-# CU1's contribution
-# Invalid length
-        .long 0xfffffffe
-        .short 5    # DWARF version
-        .short 0    # Padding
-.debug_str_offsets_base0:
-        .long str_producer
-        .long str_CU1
-        .long str_CU1_dir
-.debug_str_offsets_segment0_end:
-# CU2's contribution
-        .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1+4
-        .short 5    # DWARF version
-        .short 0    # Padding
-.debug_str_offsets_base1:
-        .long str_producer
-        .long str_CU2
-        .long str_CU2_dir
-.debug_str_offsets_segment1_end:
-# The TU's contribution
-        .long .debug_str_offsets_segment2_end-.debug_str_offsets_base2+4
-        .short 5    # DWARF version
-        .short 0    # Padding
-.debug_str_offsets_base2:
-        .long str_TU
-        .long str_TU_type
-.debug_str_offsets_segment2_end:
-
-# INVALIDCONTRIB:            .debug_str_offsets contents:
-# INVALIDCONTRIB-NOT:        contents:
-# INVALIDCONTRIB:            error: invalid contribution to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-4.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-4.s
deleted file mode 100644
index d4d56577206cd..0000000000000
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-4.s
+++ /dev/null
@@ -1,56 +0,0 @@
-# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
-# RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=INVALIDLENGTH %s
-#
-# Test object to verify that llvm-dwarfdump handles an invalid string offsets
-# table.
-
-        .section .debug_str,"MS",@progbits,1
-str_producer:
-        .asciz "Handmade DWARF producer"
-str_CU1:
-        .asciz "Compile_Unit_1"
-
-# A rudimentary abbrev section.
-        .section .debug_abbrev,"",@progbits
-        .byte 0x01  # Abbrev code
-        .byte 0x11  # DW_TAG_compile_unit
-        .byte 0x00  # DW_CHILDREN_no
-        .byte 0x72  # DW_AT_str_offsets_base
-        .byte 0x17  # DW_FORM_sec_offset
-        .byte 0x00  # EOM(1)
-        .byte 0x00  # EOM(2)
-        .byte 0x00  # EOM(3)
-
-# A rudimentary compile unit to convince dwarfdump that we are dealing with a
-# DWARF v5 string offsets table.
-        .section .debug_info,"",@progbits
-
-# DWARF v5 CU header.
-        .long  CU1_5_end-CU1_5_version  # Length of Unit
-CU1_5_version:
-        .short 5               # DWARF version number
-        .byte 1                # DWARF Unit Type
-        .byte 8                # Address Size (in bytes)
-        .long .debug_abbrev    # Offset Into Abbrev. Section
-# A compile-unit DIE, which has no attributes.
-        .byte 1                # Abbreviation code
-        .long .debug_str_offsets_base0
-CU1_5_end:
-
-# Every unit contributes to the string_offsets table.
-        .section .debug_str_offsets,"",@progbits
-# CU1's contribution
-# The length is not a multiple of 4. Check that we don't read off the
-# end.
-        .long .debug_str_offsets_segment0_end-.debug_str_offsets_base0+4
-        .short 5    # DWARF version
-        .short 0    # Padding
-.debug_str_offsets_base0:
-        .long str_producer
-        .long str_CU1
-        .byte 0
-.debug_str_offsets_segment0_end:
-
-# INVALIDLENGTH:             .debug_str_offsets contents:
-# INVALIDLENGTH-NOT:         contents:
-# INVALIDLENGTH:             error: invalid contribution to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-6.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-6.s
index 0a35c5e93db31..03d70347a2f22 100644
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-6.s
+++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid-6.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
-# RUN: llvm-dwarfdump -v %t.o | FileCheck --check-prefix=OVERLAP %s
+# RUN: llvm-dwarfdump -v %t.o 2>&1 | FileCheck --check-prefix=OVERLAP %s
 #
 # Test object to verify that llvm-dwarfdump handles an invalid string offsets
 # table with overlapping contributions.
@@ -89,6 +89,4 @@ CU2_5_end:
         .long str_CU2_dir
 .debug_str_offsets_segment1_end:
 
-# OVERLAP:            .debug_str_offsets contents:
-# OVERLAP-NOT:        contents:
 # OVERLAP:            error: overlapping contributions to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid.s
new file mode 100644
index 0000000000000..a4c22b961b332
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets-invalid.s
@@ -0,0 +1,154 @@
+# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.o
+# RUN: llvm-dwarfdump -v %t.o 2>&1 | FileCheck %s
+#
+# Test object to verify that llvm-dwarfdump handles an invalid string offsets
+# table.
+#
+# A rudimentary abbrev section.
+        .section .debug_abbrev,"",@progbits
+        .byte 0x01  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x72  # DW_AT_str_offsets_base
+        .byte 0x17  # DW_FORM_sec_offset
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
+        .byte 0x00  # EOM(3)
+
+# A rudimentary compile unit to convince dwarfdump that we are dealing with a 
+# DWARF v5 string offsets table.
+        .section .debug_info,"",@progbits
+
+# DWARF v5 32 bit CU header.
+        .long  CU1_end-CU1_begin  # Length of Unit
+CU1_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.long 0                # DW_AT_str_offsets_base
+CU1_end:
+
+# DWARF v5 64 bit CU header.
+	.long 0xffffffff
+        .quad  CU2_end-CU2_begin  # Length of Unit
+CU2_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .quad .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.quad 0                # DW_AT_str_offsets_base
+CU2_end:
+        .long  CU3_end-CU3_begin  # Length of Unit
+CU3_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.quad .str_off0        # DW_AT_str_offsets_base
+CU3_end:
+        .long  CU4_end-CU4_begin  # Length of Unit
+CU4_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.quad .str_off1        # DW_AT_str_offsets_base
+CU4_end:
+        .long  CU5_end-CU5_begin  # Length of Unit
+CU5_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+        .long .str_off2_begin  # DW_AT_str_offsets_base
+CU5_end:
+        .long  CU6_end-CU6_begin  # Length of Unit
+CU6_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+        .long .str_off3_begin  # DW_AT_str_offsets_base
+CU6_end:
+	.long 0xffffffff
+        .quad  CU7_end-CU7_begin  # Length of Unit
+CU7_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .quad .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.quad .str_off4_begin  # DW_AT_str_offsets_base
+CU7_end:
+        .long  CU8_end-CU8_begin  # Length of Unit
+CU8_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+        .long .str_off_end+16  # DW_AT_str_offsets_base
+CU8_end:
+	.long 0xffffffff
+        .quad  CU9_end-CU9_begin  # Length of Unit
+CU9_begin:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .quad .debug_abbrev    # Offset Into Abbrev. Section
+        .byte 1                # Abbreviation code: DW_TAG_compile_unit
+	.quad .str_off_end+8  # DW_AT_str_offsets_base
+CU9_end:
+
+        .section .debug_str_offsets,"",@progbits
+# Invalid length
+        .long 0xfffffff4
+        .short 5    # DWARF version
+        .short 0    # Padding
+.str_off0:
+        .long 0
+# Length beyond section bounds
+        .long .str_off_end-.str_off1+8
+        .short 5    # DWARF version
+        .short 0    # Padding
+.str_off1:
+        .long 0
+# Length intrudes on following unit
+        .long .str_off2_end-.str_off2_begin+8
+        .short 5    # DWARF version
+        .short 0    # Padding
+.str_off2_begin:
+        .long 0
+.str_off2_end:
+# Plain contribution, no errors here
+        .long .str_off3_end-.str_off3_begin
+        .short 5    # DWARF version
+        .short 0    # Padding
+.str_off3_begin:
+        .long 0
+.str_off3_end:
+# 32 bit contribution referenced from a 64 bit unit
+        .long .str_off4_end-.str_off4_begin
+        .short 5    # DWARF version
+        .short 0    # Padding
+.str_off4_begin:
+        .long 0
+.str_off4_end:
+.str_off_end:
+
+
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: insufficient space for 32 bit header prefix
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: insufficient space for 64 bit header prefix
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: invalid length
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: length exceeds section size
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: 32 bit contribution referenced from a 64 bit unit
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: section offset exceeds section size
+# CHECK: error: invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: section offset exceeds section size
+# CHECK: error: overlapping contributions to string offsets table in section .debug_str_offsets.
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
index 064061b5847f6..f6303f123ad3f 100644
--- a/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
+++ b/llvm/test/DebugInfo/X86/dwarfdump-str-offsets.s
@@ -166,6 +166,11 @@ dwo_str_TU_5_type:
         .byte 0x0b  # DW_FORM_data1
         .byte 0x00  # EOM(1)
         .byte 0x00  # EOM(2)
+        .byte 0x09  # Abbrev code
+        .byte 0x11  # DW_TAG_compile_unit
+        .byte 0x00  # DW_CHILDREN_no
+        .byte 0x00  # EOM(1)
+        .byte 0x00  # EOM(2)
         .byte 0x00  # EOM(3)
 
 # And a .dwo copy of a subset for the .dwo sections.
@@ -255,6 +260,22 @@ CU2_5_version:
         .byte 2                # The index of the comp dir string
         .byte 0 # NULL
 CU2_5_end:
+# DWARF v5 CU without str_offsets_base - this shouldn't produce an error/nor
+# prevent other str_offsets contributions from being dumped.
+	.long CU3_5_end-CU3_5_version  # Length of Unit
+CU3_5_version:
+        .short 5               # DWARF version number
+        .byte 1                # DWARF Unit Type
+        .byte 8                # Address Size (in bytes)
+        .long .debug_abbrev    # Offset Into Abbrev. Section
+# The compile-unit DIE with no attributes.
+        .byte 9                # Abbreviation code
+CU3_5_end:
+
+
+
+
+
 
         .section .debug_types,"",@progbits
 # DWARF v5 Type unit header.

From bab1d8edcf4b29c3529db3c6f665d2131fc917fa Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sat, 25 May 2019 00:27:19 +0000
Subject: [PATCH 0216/1176] Rename clangToolingRefactor to
 clangToolingRefactoring for consistency with its directory

See "[cfe-dev] The name of clang/lib/Tooling/Refactoring".

Differential Revision: https://reviews.llvm.org/D62420

llvm-svn: 361684
---
 clang-tools-extra/clang-apply-replacements/CMakeLists.txt       | 2 +-
 clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt  | 2 +-
 clang-tools-extra/clang-tidy/utils/CMakeLists.txt               | 2 +-
 clang-tools-extra/clangd/CMakeLists.txt                         | 2 +-
 clang-tools-extra/tool-template/CMakeLists.txt                  | 2 +-
 .../unittests/clang-apply-replacements/CMakeLists.txt           | 2 +-
 clang-tools-extra/unittests/clang-tidy/CMakeLists.txt           | 2 +-
 clang/lib/Tooling/Refactoring/CMakeLists.txt                    | 2 +-
 clang/tools/clang-refactor/CMakeLists.txt                       | 2 +-
 clang/tools/clang-rename/CMakeLists.txt                         | 2 +-
 clang/unittests/Rename/CMakeLists.txt                           | 2 +-
 clang/unittests/Tooling/CMakeLists.txt                          | 2 +-
 llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn  | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/clang-tools-extra/clang-apply-replacements/CMakeLists.txt b/clang-tools-extra/clang-apply-replacements/CMakeLists.txt
index 02da0851a72be..5bfdcb487e17a 100644
--- a/clang-tools-extra/clang-apply-replacements/CMakeLists.txt
+++ b/clang-tools-extra/clang-apply-replacements/CMakeLists.txt
@@ -10,7 +10,7 @@ add_clang_library(clangApplyReplacements
   clangBasic
   clangRewrite
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
 
 include_directories(
diff --git a/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt b/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt
index 945b486103b2e..26aa760c731d8 100644
--- a/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-apply-replacements/tool/CMakeLists.txt
@@ -12,7 +12,7 @@ target_link_libraries(clang-apply-replacements
   clangFormat
   clangRewrite
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
 
 install(TARGETS clang-apply-replacements
diff --git a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt
index e093215b8e8e8..5b2cc93296420 100644
--- a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt
@@ -23,5 +23,5 @@ add_clang_library(clangTidyUtils
   clangBasic
   clangLex
   clangTidy
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 0a10b6d0d2039..55c7ee5b805a9 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -125,7 +125,7 @@ add_clang_library(clangDaemon
   clangTooling
   clangToolingCore
   clangToolingInclusions
-  clangToolingRefactor
+  clangToolingRefactoring
   ${LLVM_PTHREAD_LIB}
   ${CLANGD_ATOMIC_LIB}
   )
diff --git a/clang-tools-extra/tool-template/CMakeLists.txt b/clang-tools-extra/tool-template/CMakeLists.txt
index 6478157049290..9a304d4344ee8 100644
--- a/clang-tools-extra/tool-template/CMakeLists.txt
+++ b/clang-tools-extra/tool-template/CMakeLists.txt
@@ -13,5 +13,5 @@ target_link_libraries(tool-template
   clangBasic
   clangFrontend
   clangTooling
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt b/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt
index 9e5fac2a7bcef..d3200d76b0837 100644
--- a/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-apply-replacements/CMakeLists.txt
@@ -16,5 +16,5 @@ target_link_libraries(ClangApplyReplacementsTests
   clangApplyReplacements
   clangBasic
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
index e00b2a6eedc98..0d91c6e719966 100644
--- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt
@@ -37,5 +37,5 @@ target_link_libraries(ClangTidyTests
   clangTidyUtils
   clangTooling
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang/lib/Tooling/Refactoring/CMakeLists.txt b/clang/lib/Tooling/Refactoring/CMakeLists.txt
index 3b8290155b7bc..d1f092f261c9f 100644
--- a/clang/lib/Tooling/Refactoring/CMakeLists.txt
+++ b/clang/lib/Tooling/Refactoring/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_LINK_COMPONENTS Support)
 
-add_clang_library(clangToolingRefactor
+add_clang_library(clangToolingRefactoring
   ASTSelection.cpp
   ASTSelectionRequirements.cpp
   AtomicChange.cpp
diff --git a/clang/tools/clang-refactor/CMakeLists.txt b/clang/tools/clang-refactor/CMakeLists.txt
index 48206e7306580..5340d7e9b2037 100644
--- a/clang/tools/clang-refactor/CMakeLists.txt
+++ b/clang/tools/clang-refactor/CMakeLists.txt
@@ -19,5 +19,5 @@ target_link_libraries(clang-refactor
   clangSerialization
   clangTooling
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang/tools/clang-rename/CMakeLists.txt b/clang/tools/clang-rename/CMakeLists.txt
index 45cbd763425c0..1abf7ed4e82b4 100644
--- a/clang/tools/clang-rename/CMakeLists.txt
+++ b/clang/tools/clang-rename/CMakeLists.txt
@@ -15,7 +15,7 @@ target_link_libraries(clang-rename
   clangSerialization
   clangTooling
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
 
 install(PROGRAMS clang-rename.py
diff --git a/clang/unittests/Rename/CMakeLists.txt b/clang/unittests/Rename/CMakeLists.txt
index f91021dd15457..4db6049e15120 100644
--- a/clang/unittests/Rename/CMakeLists.txt
+++ b/clang/unittests/Rename/CMakeLists.txt
@@ -24,5 +24,5 @@ target_link_libraries(ClangRenameTests
   clangSerialization
   clangTooling
   clangToolingCore
-  clangToolingRefactor
+  clangToolingRefactoring
   )
diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index 111e07e8c907f..a3d2fc2842f80 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -70,7 +70,7 @@ target_link_libraries(ToolingTests
   clangTooling
   clangToolingCore
   clangToolingInclusions
-  clangToolingRefactor
+  clangToolingRefactoring
   LLVMTestingSupport
   )
 
diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn
index 1f5b0b52891b5..ef25bec41a0fd 100644
--- a/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Tooling/Refactoring/BUILD.gn
@@ -1,5 +1,5 @@
 static_library("Refactoring") {
-  output_name = "clangToolingRefactor"
+  output_name = "clangToolingRefactoring"
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [
     "//clang/lib/AST",

From a846427ad0ac94506661a65f9949c627011a73a4 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Sat, 25 May 2019 00:50:03 +0000
Subject: [PATCH 0217/1176] Revert "[Analysis] Link library dependencies to
 Analysis plugins"

This reverts commit r361340. The following builder has been broken for
the past few days because of this commit:

http://green.lab.llvm.org/green/job/clang-stage2-cmake-RgSan/

Also revert r361399, which was committed to fix r361340.

llvm-svn: 361685
---
 .../plugins/CheckerDependencyHandling/CMakeLists.txt   |  3 +--
 .../plugins/CheckerOptionHandling/CMakeLists.txt       |  3 +--
 .../Analysis/plugins/SampleAnalyzer/CMakeLists.txt     |  3 +--
 llvm/cmake/modules/HandleLLVMOptions.cmake             | 10 +++-------
 4 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 149bccf1e0caa..80e2cdbd3a258 100644
--- a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -1,12 +1,11 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS)
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
   target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
     clangAnalysis
     clangAST
     clangStaticAnalyzerCore
-    clangStaticAnalyzerFrontend
     LLVMSupport
     )
 endif()
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 2d6a2095091dc..6a1d5e8527941 100644
--- a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -1,12 +1,11 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE CheckerOptionHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS)
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
   target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
     clangAnalysis
     clangAST
     clangStaticAnalyzerCore
-    clangStaticAnalyzerFrontend
     LLVMSupport
     )
 endif()
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
index d0a935c1676ef..7c7b2aec1988d 100644
--- a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -1,12 +1,11 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
 add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS)
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
   target_link_libraries(SampleAnalyzerPlugin PRIVATE
     clangAnalysis
     clangAST
     clangStaticAnalyzerCore
-    clangStaticAnalyzerFrontend
     LLVMSupport
     )
 endif()
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index f172092508afc..cb9a01e1d39f7 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -914,14 +914,10 @@ endif()
 
 # Plugin support
 # FIXME: Make this configurable.
-if(WIN32 OR CYGWIN)
-  if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
-    set(LLVM_ENABLE_PLUGINS ON)
-  else()
-    set(LLVM_ENABLE_PLUGINS OFF)
-  endif()
+if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
+  set(LLVM_ENABLE_PLUGINS ON)
 else()
-  set(LLVM_ENABLE_PLUGINS ${LLVM_ENABLE_PIC})
+  set(LLVM_ENABLE_PLUGINS OFF)
 endif()
 
 # By default we should enable LLVM_ENABLE_IDE only for multi-configuration

From 0353e5a6cdc8df518ab0cfebcc0b92747265a55e Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Sat, 25 May 2019 01:04:17 +0000
Subject: [PATCH 0218/1176] Permit static local structured bindings to be named
 from arbitrary scopes inside their declaring scope.

llvm-svn: 361686
---
 clang/include/clang/AST/DeclCXX.h             | 12 ++++++++++++
 clang/lib/AST/DeclCXX.cpp                     |  6 ++++++
 clang/lib/Sema/SemaExpr.cpp                   |  8 +++++---
 clang/lib/Serialization/ASTReaderDecl.cpp     |  4 +++-
 clang/test/CodeGenCXX/cxx1z-decomposition.cpp | 12 +++++++++++-
 clang/test/SemaCXX/cxx1z-decomposition.cpp    |  9 +++++++--
 6 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index fc243de896eec..1f879c53f4776 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -63,6 +63,7 @@ class CXXDestructorDecl;
 class CXXFinalOverriderMap;
 class CXXIndirectPrimaryBaseSet;
 class CXXMethodDecl;
+class DecompositionDecl;
 class DiagnosticBuilder;
 class FriendDecl;
 class FunctionTemplateDecl;
@@ -3918,6 +3919,8 @@ class StaticAssertDecl : public Decl {
 /// x[0], x[1], and x[2] respectively, where x is the implicit
 /// DecompositionDecl of type 'int (&)[3]'.
 class BindingDecl : public ValueDecl {
+  /// The declaration that this binding binds to part of.
+  LazyDeclPtr Decomp;
   /// The binding represented by this declaration. References to this
   /// declaration are effectively equivalent to this expression (except
   /// that it is only evaluated once at the point of declaration of the
@@ -3941,6 +3944,10 @@ class BindingDecl : public ValueDecl {
   /// decomposition declaration, and when the initializer is type-dependent.
   Expr *getBinding() const { return Binding; }
 
+  /// Get the decomposition declaration that this binding represents a
+  /// decomposition of.
+  ValueDecl *getDecomposedDecl() const;
+
   /// Get the variable (if any) that holds the value of evaluating the binding.
   /// Only present for user-defined bindings for tuple-like types.
   VarDecl *getHoldingVar() const;
@@ -3953,6 +3960,9 @@ class BindingDecl : public ValueDecl {
     this->Binding = Binding;
   }
 
+  /// Set the decomposed variable for this BindingDecl.
+  void setDecomposedDecl(ValueDecl *Decomposed) { Decomp = Decomposed; }
+
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == Decl::Binding; }
 };
@@ -3980,6 +3990,8 @@ class DecompositionDecl final
         NumBindings(Bindings.size()) {
     std::uninitialized_copy(Bindings.begin(), Bindings.end(),
                             getTrailingObjects<BindingDecl *>());
+    for (auto *B : Bindings)
+      B->setDecomposedDecl(this);
   }
 
   void anchor() override;
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index f9f70ecb59041..941fd66c7bab5 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2929,6 +2929,12 @@ BindingDecl *BindingDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
   return new (C, ID) BindingDecl(nullptr, SourceLocation(), nullptr);
 }
 
+ValueDecl *BindingDecl::getDecomposedDecl() const {
+  ExternalASTSource *Source =
+      Decomp.isOffset() ? getASTContext().getExternalSource() : nullptr;
+  return cast_or_null<ValueDecl>(Decomp.get(Source));
+}
+
 VarDecl *BindingDecl::getHoldingVar() const {
   Expr *B = getBinding();
   if (!B)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 0c04f03f06d6d..3a12c2dd84ffb 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3059,9 +3059,11 @@ ExprResult Sema::BuildDeclarationNameExpr(
       // FIXME: Support lambda-capture of BindingDecls, once CWG actually
       // decides how that's supposed to work.
       auto *BD = cast<BindingDecl>(VD);
-      if (BD->getDeclContext()->isFunctionOrMethod() &&
-          BD->getDeclContext() != CurContext)
-        diagnoseUncapturableValueReference(*this, Loc, BD, CurContext);
+      if (BD->getDeclContext() != CurContext) {
+        auto *DD = dyn_cast_or_null<VarDecl>(BD->getDecomposedDecl());
+        if (DD && DD->hasLocalStorage())
+          diagnoseUncapturableValueReference(*this, Loc, BD, CurContext);
+      }
       break;
     }
 
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 27fcc9e40b517..5d40b85b03df7 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1459,8 +1459,10 @@ void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) {
 void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) {
   VisitVarDecl(DD);
   auto **BDs = DD->getTrailingObjects<BindingDecl *>();
-  for (unsigned I = 0; I != DD->NumBindings; ++I)
+  for (unsigned I = 0; I != DD->NumBindings; ++I) {
     BDs[I] = ReadDeclAs<BindingDecl>();
+    BDs[I]->setDecomposedDecl(DD);
+  }
 }
 
 void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) {
diff --git a/clang/test/CodeGenCXX/cxx1z-decomposition.cpp b/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
index 8e71b1230c4d5..31ade6f5fdb28 100644
--- a/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
+++ b/clang/test/CodeGenCXX/cxx1z-decomposition.cpp
@@ -129,7 +129,7 @@ void test_static_simple() {
 }
 
 // CHECK-LABEL: define {{.*}}@_Z17test_static_tuple
-void test_static_tuple() {
+int test_static_tuple() {
   // Note that the desugaring specified for this construct requires three
   // separate guarded initializations. It is possible for an exception to be
   // thrown after the first initialization and before the second, and if that
@@ -162,4 +162,14 @@ void test_static_tuple() {
   // CHECK: store {{.*}}, {{.*}} @_ZGRZ17test_static_tuplevE2x2_
   // CHECK: store {{.*}} @_ZGRZ17test_static_tuplevE2x2_, {{.*}} @_ZZ17test_static_tuplevE2x2
   // CHECK: call void @__cxa_guard_release({{.*}} @_ZGVZ17test_static_tuplevE2x2)
+
+  struct Inner {
+    // CHECK-LABEL: define {{.*}}@_ZZ17test_static_tuplevEN5Inner1fEv(
+    // FIXME: This first load should be constant-folded to the _ZGV... temporary.
+    // CHECK: load {{.*}} @_ZZ17test_static_tuplevE2x2
+    // CHECK: load
+    // CHECK: ret
+    int f() { return x2; }
+  };
+  return Inner().f();
 }
diff --git a/clang/test/SemaCXX/cxx1z-decomposition.cpp b/clang/test/SemaCXX/cxx1z-decomposition.cpp
index f174c79e59573..d2dc939beb5df 100644
--- a/clang/test/SemaCXX/cxx1z-decomposition.cpp
+++ b/clang/test/SemaCXX/cxx1z-decomposition.cpp
@@ -37,14 +37,19 @@ constexpr bool g(S &&s) {
 }
 static_assert(g({1, 2}));
 
+auto [outer1, outer2] = S{1, 2};
 void enclosing() {
-  struct S { int a; };
+  struct S { int a = outer1; };
   auto [n] = S(); // expected-note 2{{'n' declared here}}
 
   struct Q { int f() { return n; } }; // expected-error {{reference to local binding 'n' declared in enclosing function}}
-  // FIXME: This is probably supposed to be valid, but we do not have clear rules on how it's supposed to work.
   (void) [&] { return n; }; // expected-error {{reference to local binding 'n' declared in enclosing function}}
   (void) [n] {}; // expected-error {{'n' in capture list does not name a variable}}
+
+  static auto [m] = S(); // expected-warning {{extension}}
+  struct R { int f() { return m; } };
+  (void) [&] { return m; };
+  (void) [m] {}; // expected-error {{'m' in capture list does not name a variable}}
 }
 
 void bitfield() {

From e6e038c322242393268a7ae26d7706db0d1c1e14 Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Sat, 25 May 2019 01:35:14 +0000
Subject: [PATCH 0219/1176] [clangd] tweaks: Add clangBasic dependency to
 LINK_LIBS

This is necessary to make builds with `-DBUILD_SHARED_LIBS=ON` work.

llvm-svn: 361687
---
 clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt
index 837853139689a..d8bb7bc4bf4cc 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt
+++ b/clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt
@@ -17,6 +17,7 @@ add_clang_library(clangDaemonTweaks OBJECT
 
   LINK_LIBS
   clangAST
+  clangBasic
   clangDaemon
   clangToolingCore
   )

From 3b9373744691bee20b71a66e8211c87fcc1d4c19 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Sat, 25 May 2019 01:52:38 +0000
Subject: [PATCH 0220/1176] Revert r361644, "[AMDGPU] Divergence driven ISel.
 Assign register class for cross block values according to the divergence."

Broke sanitizer bots:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux/builds/21694/steps/bootstrap%20clang/logs/stdio
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/32478/steps/check-llvm%20asan/logs/stdio

llvm-svn: 361688
---
 .../llvm/CodeGen/FunctionLoweringInfo.h       |  11 +-
 llvm/include/llvm/CodeGen/SelectionDAG.h      |   1 -
 llvm/include/llvm/CodeGen/TargetLowering.h    |  11 +-
 .../include/llvm/CodeGen/TargetRegisterInfo.h |   5 -
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   6 +-
 .../SelectionDAG/FunctionLoweringInfo.cpp     |  14 +-
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp |  33 ++--
 llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h  |   2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   4 +-
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |   2 +-
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp    | 142 ++++++++++--------
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  91 +----------
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   5 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  13 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |   5 -
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   4 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |   3 +-
 llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll    |  12 +-
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |   3 +-
 llvm/test/CodeGen/AMDGPU/branch-uniformity.ll |   4 +-
 .../AMDGPU/control-flow-fastregalloc.ll       |   7 +-
 .../divergent-branch-uniform-condition.ll     |  55 ++++---
 .../AMDGPU/extract_subvector_vec4_vec3.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/fabs.ll              |  12 +-
 .../CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll   |  58 ++++---
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |   8 +-
 llvm/test/CodeGen/AMDGPU/fneg-fabs.ll         |  16 +-
 llvm/test/CodeGen/AMDGPU/fsub.ll              |  12 +-
 llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll |  10 +-
 .../AMDGPU/i1-copy-phi-uniform-branch.ll      |   1 +
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll   |   2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll     |   2 -
 .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll     |   2 +-
 .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/loop_break.ll        |   8 +-
 llvm/test/CodeGen/AMDGPU/madak.ll             |  12 +-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |   5 +-
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll  |   5 +-
 llvm/test/CodeGen/AMDGPU/select-opt.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |   3 +-
 .../CodeGen/AMDGPU/si-fix-sgpr-copies.mir     |   2 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll              |   1 +
 .../AMDGPU/subreg-coalescer-undef-use.ll      |  53 +++----
 .../AMDGPU/uniform-loop-inside-nonuniform.ll  |   5 +-
 .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll |   9 +-
 llvm/test/CodeGen/AMDGPU/valu-i1.ll           |   6 +-
 ...vgpr-spill-emergency-stack-slot-compute.ll |   1 -
 49 files changed, 279 insertions(+), 413 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index fb60191abd3a0..b3077fcaabd4f 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -13,6 +13,7 @@
 
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
+
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -20,7 +21,6 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,7 +57,6 @@ class FunctionLoweringInfo {
   const TargetLowering *TLI;
   MachineRegisterInfo *RegInfo;
   BranchProbabilityInfo *BPI;
-  const LegacyDivergenceAnalysis *DA;
   /// CanLowerReturn - true iff the function's return value can be lowered to
   /// registers.
   bool CanLowerReturn;
@@ -199,11 +198,9 @@ class FunctionLoweringInfo {
     return ValueMap.count(V);
   }
 
-  unsigned CreateReg(MVT VT, bool isDivergent = false);
-
-  unsigned CreateRegs(const Value *V);
+  unsigned CreateReg(MVT VT);
 
-  unsigned CreateRegs(Type *Ty, bool isDivergent = false);
+  unsigned CreateRegs(Type *Ty);
 
   unsigned InitializeRegForValue(const Value *V) {
     // Tokens never live in vregs.
@@ -212,7 +209,7 @@ class FunctionLoweringInfo {
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
     assert(VirtReg2Value.empty());
-    return R = CreateRegs(V);
+    return R = CreateRegs(V->getType());
   }
 
   /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 8afd3b2df53c9..56dd1ccbb7309 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -406,7 +406,6 @@ class SelectionDAG {
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
   const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
   const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
-  const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
   LLVMContext *getContext() const {return Context; }
   OptimizationRemarkEmitter &getORE() const { return *ORE; }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 97537cf7d4b21..b1a64744f0642 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -636,21 +636,12 @@ class TargetLoweringBase {
 
   /// Return the register class that should be used for the specified value
   /// type.
-  virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
-    (void)isDivergent;
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
     const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
 
-  /// Allows target to decide about the register class of the
-  /// specific value that is live outside the defining block.
-  /// Returns true if the value needs uniform register class.
-  virtual bool requiresUniformRegister(MachineFunction &MF,
-                                       const Value *) const {
-    return false;
-  }
-
   /// Return the 'representative' register class for the specified value
   /// type.
   ///
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 7c65e7407d9e2..5ed1e448575fc 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -520,11 +520,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
   /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
   virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
 
-  /// Returns true if the register class is considered divergent.
-  virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
-    return false;
-  }
-
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d53ee3134d550..117654bc7a3f7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13919,11 +13919,9 @@ struct LoadedSlice {
     assert(DAG && "Missing context");
     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
     EVT ResVT = Use->getValueType(0);
-    const TargetRegisterClass *ResRC =
-        TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
+    const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
     const TargetRegisterClass *ArgRC =
-        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
-                           Use->getOperand(0)->isDivergent());
+        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
       return false;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b405562904f3..d8ef10f58aa7c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   RegInfo = &MF->getRegInfo();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   unsigned StackAlign = TFI->getStackAlignment();
-  DA = DAG->getDivergenceAnalysis();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -346,9 +345,9 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
   return RegInfo->createVirtualRegister(
-      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
+      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
 }
 
 /// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -358,7 +357,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
 /// In the case that the given value has struct or array type, this function
 /// will assign registers for each member or element.
 ///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -371,18 +370,13 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
 
     unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = CreateReg(RegisterVT, isDivergent);
+      unsigned R = CreateReg(RegisterVT);
       if (!FirstReg) FirstReg = R;
     }
   }
   return FirstReg;
 }
 
-unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
-  return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
-                                      DA->isDivergent(V));
-}
-
 /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
 /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
 /// the register's LiveOutInfo is for a smaller bit width, it is extended to
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4b78d1bb6b160..059e5f7c8dd33 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
-    UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
+    UseRC = TLI->getRegClassFor(VT);
 
   if (!IsClone && !IsCloned)
     for (SDNode *User : Node->uses()) {
@@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
            "Incompatible phys register def and uses!");
     DstRC = UseRC;
   } else {
-    DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
+    DstRC = TLI->getRegClassFor(VT);
   }
 
   // If all uses are reading from the src physical register and copying the
@@ -225,9 +225,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
     // type correctly. For example, a 64-bit float (X86::FR64) can't live in
     // the 32-bit float super-class (X86::FR32).
     if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
-      const TargetRegisterClass *VTRC = TLI->getRegClassFor(
-          Node->getSimpleValueType(i),
-          (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
+      const TargetRegisterClass *VTRC =
+        TLI->getRegClassFor(Node->getSimpleValueType(i));
       if (RC)
         VTRC = TRI->getCommonSubClass(RC, VTRC);
       if (VTRC)
@@ -290,8 +289,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
-      const TargetRegisterClass *RC = TLI->getRegClassFor(
-          Op.getSimpleValueType(), Op.getNode()->isDivergent());
+      const TargetRegisterClass *RC =
+        TLI->getRegClassFor(Op.getSimpleValueType());
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -396,15 +395,11 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     unsigned VReg = R->getReg();
     MVT OpVT = Op.getSimpleValueType();
+    const TargetRegisterClass *OpRC =
+        TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
     const TargetRegisterClass *IIRC =
         II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
            : nullptr;
-    const TargetRegisterClass *OpRC =
-        TLI->isTypeLegal(OpVT)
-            ? TLI->getRegClassFor(OpVT,
-                                  Op.getNode()->isDivergent() ||
-                                      (IIRC && TRI->isDivergentRegClass(IIRC)))
-            : nullptr;
 
     if (OpRC && IIRC && OpRC != IIRC &&
         TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -469,7 +464,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          MVT VT, bool isDivergent, const DebugLoc &DL) {
+                                          MVT VT, const DebugLoc &DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -484,7 +479,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
 
   // VReg couldn't be reasonably constrained.  Emit a COPY to a new virtual
   // register instead.
-  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
+  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
   assert(RC && "No legal register class for VT supports that SubIdx");
   unsigned NewReg = MRI->createVirtualRegister(RC);
   BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -519,7 +514,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     const TargetRegisterClass *TRC =
-      TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+      TLI->getRegClassFor(Node->getSimpleValueType(0));
 
     unsigned Reg;
     MachineInstr *DefMI;
@@ -553,7 +548,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       if (TargetRegisterInfo::isVirtualRegister(Reg))
         Reg = ConstrainForSubReg(Reg, SubIdx,
                                  Node->getOperand(0).getSimpleValueType(),
-                                 Node->isDivergent(), Node->getDebugLoc());
+                                 Node->getDebugLoc());
+
       // Create the destreg if it is missing.
       if (VRBase == 0)
         VRBase = MRI->createVirtualRegister(TRC);
@@ -588,8 +584,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC =
-        TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
+    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 42f7846fe7c3a..3188c2678f1af 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
-                              bool isDivergent, const DebugLoc &DL);
+                              const DebugLoc &DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a5274877ecee4..76e5847ba111d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
         unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo.CreateRegs(C);
+          RegOut = FuncInfo.CreateRegs(C->getType());
           CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
@@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
           assert(isa<AllocaInst>(PHIOp) &&
                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo.CreateRegs(PHIOp);
+          Reg = FuncInfo.CreateRegs(PHIOp->getType());
           CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6f55f98c51fd4..6c9a1cd646ef3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
               !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
             if (!R)
-              R = FuncInfo->CreateRegs(Inst);
+              R = FuncInfo->CreateRegs(Inst->getType());
           }
 
           bool HadTailCall = false;
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index cb24d1fe32bb1..94b1e636c7b15 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,6 +302,18 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+
 static bool phiHasBreakDef(const MachineInstr &PHI,
                            const MachineRegisterInfo &MRI,
                            SmallSet<unsigned, 8> &Visited) {
@@ -326,6 +338,16 @@ static bool phiHasBreakDef(const MachineInstr &PHI,
   return false;
 }
 
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -387,6 +409,12 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
   return false;
 }
 
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+                                        const TargetRegisterInfo *TRI) {
+  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -593,73 +621,63 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
       case AMDGPU::PHI: {
-        unsigned hasVGPRUses = 0;
-        SetVector<const MachineInstr *> worklist;
-        worklist.insert(&MI);
-        while (!worklist.empty()) {
-          const MachineInstr *Instr = worklist.pop_back_val();
-          unsigned Reg = Instr->getOperand(0).getReg();
-          for (const auto &Use : MRI.use_operands(Reg)) {
-            const MachineInstr *UseMI = Use.getParent();
-            if (UseMI->isCopy() || UseMI->isRegSequence()) {
-              if (UseMI->isCopy() &&
-                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
-                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
-                hasVGPRUses++;
-              }
-              worklist.insert(UseMI);
-              continue;
-            }
-
-            if (UseMI->isPHI()) {
-              if (!TRI->isSGPRReg(MRI, Use.getReg()))
-                hasVGPRUses++;
-              continue;
-            }
-
-            unsigned OpNo = UseMI->getOperandNo(&Use);
-            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
-            if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
-              const TargetRegisterClass *OpRC =
-                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
-              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
-                  OpRC != &AMDGPU::VS_64RegClass) {
-                hasVGPRUses++;
-              }
-            }
-          }
-        }
-        bool hasVGPRInput = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
-          unsigned InputReg = MI.getOperand(i).getReg();
-          MachineInstr *Def = MRI.getVRegDef(InputReg);
-          if (TRI->isVGPR(MRI, InputReg)) {
-            if (Def->isCopy()) {
-              unsigned SrcReg = Def->getOperand(1).getReg();
-              const TargetRegisterClass *RC =
-                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
-                                                 : TRI->getPhysRegClass(SrcReg);
-              if (TRI->isSGPRClass(RC))
-                continue;
-            }
-            hasVGPRInput = true;
-            break;
-          } else if (Def->isCopy() &&
-                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
-            hasVGPRInput = true;
+        unsigned Reg = MI.getOperand(0).getReg();
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
+
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
+        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+          if (!predsHasDivergentTerminator(MBB0, TRI) &&
+              !predsHasDivergentTerminator(MBB1, TRI)) {
+            LLVM_DEBUG(dbgs()
+                       << "Not fixing PHI for uniform branch: " << MI << '\n');
             break;
           }
         }
-        unsigned PHIRes = MI.getOperand(0).getReg();
-        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
-            (hasVGPRInput || hasVGPRUses > 1)) {
-          TII->moveToVALU(MI);
-        } else {
-          TII->legalizeOperands(MI, MDT);
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        SmallSet<unsigned, 8> Visited;
+        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
+          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
+          TII->moveToVALU(MI, MDT);
         }
-
         break;
       }
       case AMDGPU::REG_SEQUENCE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8f93c63046caf..c2cda5ef4d7ce 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9637,8 +9637,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
       break;
 
     MVT VT = Src0.getValueType().getSimpleVT();
-    const TargetRegisterClass *RC =
-        getRegClassFor(VT, Src0.getNode()->isDivergent());
+    const TargetRegisterClass *RC = getRegClassFor(VT);
 
     MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -10172,91 +10171,3 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
 
   return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
 }
-
-const TargetRegisterClass *
-SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
-  const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
-  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
-  if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
-    return &AMDGPU::SReg_64RegClass;
-  if (!TRI->isSGPRClass(RC) && !isDivergent)
-    return TRI->getEquivalentSGPRClass(RC);
-  else if (TRI->isSGPRClass(RC) && isDivergent)
-    return TRI->getEquivalentVGPRClass(RC);
-
-  return RC;
-}
-
-static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
-  if (Visited.count(V))
-    return false;
-  Visited.insert(V);
-  bool Result = false;
-  for (auto U : V->users()) {
-    if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
-      if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
-          (V == U->getOperand(1)))
-        Result = true;
-    } else {
-      Result = hasIfBreakUser(U, Visited);
-    }
-    if (Result)
-      break;
-  }
-  return Result;
-}
-
-bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
-                                               const Value *V) const {
-  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
-    switch (Intrinsic->getIntrinsicID()) {
-    default:
-      return false;
-    case Intrinsic::amdgcn_if_break:
-      return true;
-    }
-  }
-  if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
-    if (const IntrinsicInst *Intrinsic =
-            dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
-      switch (Intrinsic->getIntrinsicID()) {
-      default:
-        return false;
-      case Intrinsic::amdgcn_if:
-      case Intrinsic::amdgcn_else: {
-        ArrayRef<unsigned> Indices = ExtValue->getIndices();
-        if (Indices.size() == 1 && Indices[0] == 1) {
-          return true;
-        }
-      }
-      }
-    }
-  }
-  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
-    if (isa<InlineAsm>(CI->getCalledValue())) {
-      const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
-      ImmutableCallSite CS(CI);
-      TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
-          MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
-      for (auto &TC : TargetConstraints) {
-        if (TC.Type == InlineAsm::isOutput) {
-          ComputeConstraintToUse(TC, SDValue());
-          unsigned AssignedReg;
-          const TargetRegisterClass *RC;
-          std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
-              SIRI, TC.ConstraintCode,
-              getSimpleValueType(MF.getDataLayout(), CS.getType()));
-          if (RC) {
-            MachineRegisterInfo &MRI = MF.getRegInfo();
-            if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
-              return true;
-            else if (SIRI->isSGPRClass(RC))
-              return true;
-          }
-        }
-      }
-    }
-  }
-  SetVector<const Value *> Visited;
-  return hasIfBreakUser(V, Visited);
-}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 094a0b054e235..60a474f51e5c4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -367,10 +367,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                     bool SNaN = false,
                                     unsigned Depth = 0) const override;
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-  virtual const TargetRegisterClass *
-  getRegClassFor(MVT VT, bool isDivergent) const override;
-  virtual bool requiresUniformRegister(MachineFunction &MF,
-                                       const Value *V) const override;
+
   unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
 };
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 14f5dbe6ad496..e42ed3505cf5c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2219,10 +2219,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       // These come before src2.
       removeModOperands(UseMI);
       UseMI.setDesc(get(NewOpc));
-      // It might happen that UseMI was commuted
-      // and we now have SGPR as SRC1. If so 2 inlined
-      // constant and SGPR are illegal.
-      legalizeOperands(UseMI);
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -3917,7 +3913,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
     return;
 
   // Try to eliminate the copy if it is copying an immediate value.
-  if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
+  if (Def->isMoveImmediate())
     FoldImmediate(*Copy, *Def, OpReg, &MRI);
 }
 
@@ -4151,10 +4147,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
     if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
       if (!VRC) {
         assert(SRC);
-       if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
-          VRC = &AMDGPU::VReg_1RegClass;
-        } else
-          VRC = RI.getEquivalentVGPRClass(SRC);
+        VRC = RI.getEquivalentVGPRClass(SRC);
       }
       RC = VRC;
     } else {
@@ -5316,7 +5309,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
   case AMDGPU::INSERT_SUBREG:
   case AMDGPU::WQM:
   case AMDGPU::WWM:
-    if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+    if (RI.hasVGPRs(NewDstRC))
       return nullptr;
 
     NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index e2df3ae5ea7e9..bfdc1ef9645de 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -195,11 +195,6 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
                                                unsigned Reg) const;
   bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
 
-  virtual bool
-  isDivergentRegClass(const TargetRegisterClass *RC) const override {
-    return !isSGPRClass(RC);
-  }
-
   bool isSGPRPressureSet(unsigned SetID) const {
     return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
   }
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fc735ae5d95f6..643d2806c521e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1429,9 +1429,7 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-const TargetRegisterClass *
-ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
-  (void)isDivergent;
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   // load / store 4 to 8 consecutive D registers.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8e254d75b1c30..3b94cb0dcb0fa 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -456,8 +456,7 @@ class VectorType;
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    const TargetRegisterClass *
-    getRegClassFor(MVT VT, bool isDivergent = false) const override;
+    const TargetRegisterClass *getRegClassFor(MVT VT) const override;
 
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
index 454c56cbca5d0..3d457fdd50e81 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
@@ -5,12 +5,11 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_lds:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    ds_read_b32 v1, v0
+; GCN-NEXT:    ds_read_b32 v2, v0
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB0_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    v_not_b32_e32 v1, v2
 ; GCN-NEXT:    v_or_b32_e32 v1, -5, v1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -18,6 +17,7 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB0_1
@@ -33,12 +33,11 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_global:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    global_load_dword v2, v[0:1], off
+; GCN-NEXT:    global_load_dword v3, v[0:1], off
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB1_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -46,6 +45,7 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB1_1
@@ -61,12 +61,11 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_flat:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_load_dword v2, v[0:1]
+; GCN-NEXT:    flat_load_dword v3, v[0:1]
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB2_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -75,6 +74,7 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB2_1
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index a2facaafb41f9..45ed056567c2e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -99,7 +99,7 @@ bb3:
 
 ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
 ; GCN: s_load_dword [[CND:s[0-9]+]]
-
+; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
 ; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
 ; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
@@ -117,7 +117,6 @@ bb3:
 ; GCN: v_nop_e64
 
 ; GCN: [[ENDBB]]:
-; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
 ; GCN: buffer_store_dword [[V_CND]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
index c9c801fb1911e..e6f684178035e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
@@ -8,8 +8,8 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: ; %LOOP49
-; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
-; CHECK: s_cbranch_scc1
+; CHECK: v_cmp_ne_u32_e32 vcc,
+; CHECK: s_cbranch_vccnz
 ; CHECK: ; %ENDIF53
 define amdgpu_vs float @main(i32 %in) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 15e807a3e0230..41ecdd403d736 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -89,7 +89,7 @@ endif:
 }
 
 ; GCN-LABEL: {{^}}divergent_loop:
-; VGPR: workitem_private_segment_byte_size = 12{{$}}
+; VGPR: workitem_private_segment_byte_size = 16{{$}}
 
 ; GCN: {{^}}; %bb.0:
 
@@ -123,9 +123,10 @@ endif:
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
 ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
-; GCN: s_cmp_lg_u32
+; GCN: v_cmp_ne_u32_e32 vcc,
+; GCN: s_and_b64 vcc, exec, vcc
 ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
+; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
 
 
 ; GCN: [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 08a95ecbf5ad0..8d21050ebee01 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -13,50 +13,55 @@ define amdgpu_ps void @main(i32, float) {
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
 ; CHECK-NEXT:    s_mov_b32 m0, s0
-; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
-; CHECK-NEXT:    s_mov_b64 s[2:3], 0
-; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
+; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; CHECK-NEXT:  BB0_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], exec
-; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
-; CHECK-NEXT:    s_mov_b64 s[6:7], -1
-; CHECK-NEXT:    s_cbranch_scc0 BB0_5
+; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
+; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
+; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
+; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT:    s_cbranch_vccz BB0_5
 ; CHECK-NEXT:  ; %bb.2: ; %endif1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_mov_b64 s[4:5], -1
-; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[6:7], -1
+; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
+; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; CHECK-NEXT:    ; mask branch BB0_4
 ; CHECK-NEXT:  BB0_3: ; %endif2
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_add_i32 s0, s0, 1
-; CHECK-NEXT:    s_xor_b64 s[4:5], exec, -1
+; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
 ; CHECK-NEXT:  BB0_4: ; %Flow1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT:    s_mov_b64 s[6:7], 0
-; CHECK-NEXT:  BB0_5: ; %Flow
+; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
+; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT:    s_branch BB0_6
+; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    ; implicit-def: $vgpr1
+; CHECK-NEXT:  BB0_6: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[2:3]
-; CHECK-NEXT:    s_mov_b64 s[2:3], s[8:9]
+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
+; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz BB0_1
-; CHECK-NEXT:  ; %bb.6: ; %Flow2
+; CHECK-NEXT:  ; %bb.7: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[6:7]
-; CHECK-NEXT:    ; mask branch BB0_8
-; CHECK-NEXT:  BB0_7: ; %if1
+; this is the divergent branch with the condition not marked as divergent
+; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3]
+; CHECK-NEXT:    ; mask branch BB0_9
+; CHECK-NEXT:  BB0_8: ; %if1
 ; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  BB0_8: ; %endloop
+; CHECK-NEXT:  BB0_9: ; %endloop
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
 ; CHECK-NEXT:    s_endpgm
-; this is the divergent branch with the condition not marked as divergent
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index fe8f31a0cd2ee..a39833455a153 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -13,9 +13,9 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
   ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; GCN:   [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
   ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+  ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
   ; GCN:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2
   ; GCN:   [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
   ; GCN:   [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index badaa16bbfcc5..f96019dba6dcc 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -48,8 +48,8 @@ define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: s_and_b32
-; GCN: s_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
 define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -62,10 +62,10 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: s_and_b32
-; GCN: s_and_b32
-; GCN: s_and_b32
-; GCN: s_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
 define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   store <4 x float> %fabs, <4 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index 01499e681eafa..a3f176b3ef025 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -85,15 +85,15 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
 
 ; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -121,15 +121,15 @@ define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
@@ -156,15 +156,15 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
@@ -194,15 +194,15 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -231,6 +231,8 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
+; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -238,12 +240,9 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -274,6 +273,8 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
+; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -281,12 +282,9 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 075115a2ee6cf..ca80c4edbfb29 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -33,13 +33,9 @@ define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(
 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
 
-; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
+; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
 
-; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
-
-; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
-
-; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
+; SI-SAFE: v_min_legacy_f32_e64 {{v[0-9]+}}, [[VB]], s[[A]]
 
 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index a621b04a346c0..0ff5d9652c104 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -4,7 +4,7 @@
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
 ; SI-NOT: and
-; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{s[0-9]+}}|
 define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
   %fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
 ; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{s[0-9]+}}|
 ; SI-NOT: and
 define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
@@ -85,8 +85,8 @@ define amdgpu_kernel void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrs
 
 ; FIXME: In this case two uses of the constant should be folded
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
 define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -96,10 +96,10 @@ define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
 define amdgpu_kernel void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 6e4635ec43877..48647a2cdb898 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -27,8 +27,8 @@ define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
 
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
   %sub = fsub <2 x float> %a, %b
   store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -55,10 +55,10 @@ define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x flo
 }
 
 ; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: s_endpgm
 define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
   %result = fsub <4 x float> %a, %b
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index 87c9a565f08b2..ae78a1ecf3252 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -4,11 +4,17 @@
 ; SI-LABEL: {{^}}i1_copy_from_loop:
 ;
 ; SI: ; %for.body
-; SI:      v_cmp_lt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], s{{[0-9+]}}, 4
+; SI:      v_cmp_gt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
+; SI-DAG:  s_andn2_b64       [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
+; SI-DAG:  s_and_b64         [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
+; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
+
+; SI: ; %Flow1
+; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], exec
 
 ; SI: ; %Flow
 ; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
+; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
 ; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
 
 ; SI: ; %for.end
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
index c65683d4fab61..0aacbbfda182b 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
@@ -7,6 +7,7 @@
 ; GCN:      s_cbranch_scc1  [[PREEXIT:BB[0-9_]+]]
 
 ; GCN: ; %blocka
+; GCN:      s_xor_b64       s[{{[0-9:]+}}], exec, -1
 ; GCN:      s_cmp_eq_u32    s1, 0
 ; GCN:      s_cbranch_scc1  [[EXIT:BB[0-9_]+]]
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 2584f30573fdc..47e080a94baa4 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -11,12 +11,12 @@
 
 ; GCN-LABEL: {{^}}insertelement_v4f32_0:
 ; GCN: s_load_dwordx4
-; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
-; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
-
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
+; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
 define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index 60ec52c229bca..2a5e81a6dd6ae 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)*
 
 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
 ; SI-NOT: v0
-; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
+; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, v0, v0, v0
 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
index 05b074bfe2d41..c47d02f716bdb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -53,8 +53,8 @@ define amdgpu_kernel void @test_fabs_fmed3(float addrspace(1)* %out, float %src0
 }
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
-; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
+; GCN: s_brev_b32 [[NEG0:s[0-9]+]], 1
+; GCN: v_med3_f32 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
 define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
@@ -88,8 +88,8 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out,
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_r_inv2pi_0_foldable_user:
 ; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
-; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
+; GCN-DAG: s_mov_b32 [[NEG_INV:s[0-9]+]], 0xbe22f983
+; GCN: v_med3_f32 [[MED3:v[0-9]+]], -v{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index a7fb618c23430..18ede50f40c0a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -42,8 +42,6 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
 ; VI-OPT: s_mov_b32
 ; VI-OPT: s_mov_b32
 ; VI-NOOPT: s_waitcnt
-; VI-NOOPT-NEXT: v_mov_b32_e32
-; VI-NOOPT-NEXT: s_nop 0
 ; VI-NOOPT-NEXT: s_nop 0
 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
 ; VI-OPT: s_nop 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index 83bc8b2347245..bc04f6f28f608 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index 1f46613a8db0d..2cab9c28db374 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index 5c2ec5021f1a9..f37b3a3637a43 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -26,9 +26,10 @@
 ; GCN:      s_mov_b64         [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
-; GCN:     s_or_b64         [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
-; GCN:     s_cmp_gt_i32 s4, -1
-; GCN:     s_cbranch_scc1   [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN:      v_cmp_lt_i32_e32  vcc, -1
+; GCN:      s_and_b64         vcc, exec, vcc
+; GCN:      s_or_b64          [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
+; GCN:      s_cbranch_vccnz   [[FLOW:BB[0-9]+_[0-9]+]]
 
 ; GCN: ; %bb4
 ; GCN:      buffer_load_dword
@@ -38,7 +39,6 @@
 ; GCN:      s_or_b64          [[INNER_MASK]], [[INNER_MASK]], [[TMP0]]
 
 ; GCN: [[FLOW]]: ; %Flow
-; GCN:           ;   in Loop: Header=BB0_1 Depth=1
 ; GCN:      s_and_b64         [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
 ; GCN:      s_or_b64          [[TMP1]], [[TMP1]], [[OUTER_MASK]]
 ; GCN:      s_mov_b64         [[OUTER_MASK]], [[TMP1]]
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 8e4b6806f98ae..eed0218766481 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,MAD,GFX10-MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -17,7 +17,6 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
-; GFX10-MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -80,7 +79,6 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
 ; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
 ; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
-; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -108,7 +106,6 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
-; GFX10-MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -237,12 +234,9 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
 ; On GFX10+ we can use two scalar operands.
 ; GCN-LABEL: {{^}}madak_constant_bus_violation:
 ; GCN:       s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
-
+; GCN:       v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
 ; GCN:       {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]]
-; MAD:       v_mov_b32_e32 [[MADAK:v[0-9]+]], 0x42280000
-; MAD:       v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
-; GFX10:     v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
-; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; MAD:       v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; FMA:       v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; GCN:       v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
 ; GFX6:      buffer_store_dword [[MUL]]
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 4822818e901af..e8ecf5e25abce 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -155,9 +155,8 @@ entry:
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
 ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-; CHECK-O0: v_readlane_b32 s[[S1:[0-9]+]], v{{[0-9]+}}, 4
-; CHECK-O0: v_readlane_b32 s[[S2:[0-9]+]], v{{[0-9]+}}, 5
-; CHECK-O0: s_mov_b64 exec, s{{\[}}[[S1]]:[[S2]]{{\]}}
+
+; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
 ; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index ddda7baef7412..4c1a769d59958 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -96,6 +96,7 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN:      s_mov_b64          [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
 
 ; GCN: ; %LeafBlock1
+; GCN:      s_mov_b64
 ; GCN:      s_mov_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 
 ; GCN: ; %case1
@@ -108,6 +109,8 @@ ENDIF:                                            ; preds = %LOOP
 
 ; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
 
+; GCN: [[FLOW]]: ; %Flow
+
 ; GCN: ; %case0
 ; GCN:      buffer_load_dword  [[LOAD1:v[0-9]+]],
 ; GCN-DAG:  s_andn2_b64        [[BREAK]], [[BREAK]], exec
@@ -115,7 +118,7 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN-DAG:  s_and_b64          [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
 ; GCN:      s_or_b64           [[BREAK]], [[BREAK]], [[TMP]]
 
-; GCN: [[FLOW]]: ; %Flow4
+; GCN: ; %Flow4
 ; GCN:      s_and_b64          [[BREAK]], exec, [[BREAK]]
 ; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT]]
 ; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll
index 24df126e4cafc..f773357976cce 100644
--- a/llvm/test/CodeGen/AMDGPU/select-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll
@@ -135,8 +135,8 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo
 
 ; GCN-LABEL: {{^}}regression:
 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
-; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
-; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
+; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN: v_cmp_eq_f32_e32 vcc, 0, v{{[0-9]+}}
 
 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index 3d5c3285cba71..e0971b8456fdc 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -104,8 +104,7 @@ endif:
 
 ; SI: ; %else
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
-; SI:      v_cmp_gt_i32_e32   vcc, 0, [[AVAL]]
-; SI:      s_and_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], vcc, exec
+; SI:      v_cmp_gt_i32_e64   [[PHI:s\[[0-9]+:[0-9]+\]]], 0, [[AVAL]]
 
 ; SI: ; %if
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index 03e81a0431c54..3ec7a6678a9ed 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -16,7 +16,7 @@ registers:
 
 body: |
   ; GCN-LABEL: name: phi_visit_order
-  ; GCN: S_ADD_I32
+  ; GCN: V_ADD_I32
   bb.0:
     liveins: $vgpr0
     %7 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index 904de8111fabf..c83eb378a1e1f 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -571,6 +571,7 @@ main_body:
 ;
 ; TODO: we should keep the loop counter in an SGPR
 ;
+; GCN: v_readfirstlane_b32
 ; GCN: s_buffer_load_dword
 define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index e7555a6703383..80071e3407e9c 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -1,43 +1,28 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
 ; Don't crash when the use of an undefined value is only detected by the
 ; register coalescer because it is hidden with subregister insert/extract.
 target triple="amdgcn--"
 
-define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
 ; CHECK-LABEL: foobar:
-; CHECK:       ; %bb.0: ; %entry
-; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; CHECK-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; CHECK-NEXT:    s_mov_b32 s2, -1
-; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-
-; FIXME: The change related to the fact that
-; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
-; and hence it cannot derive the fact that the vector element is unused.
-; Such a copies appear because the float4 vectors and their elements in the test are uniform
-; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
-
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    v_mov_b32_e32 v2, s6
-; CHECK-NEXT:    v_mov_b32_e32 v3, s7
-
-; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; CHECK-NEXT:    ; mask branch BB0_2
-; CHECK-NEXT:  BB0_1: ; %ift
-; CHECK-NEXT:    s_mov_b32 s4, s5
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    v_mov_b32_e32 v2, s6
-; CHECK-NEXT:    v_mov_b32_e32 v3, s7
-; CHECK-NEXT:  BB0_2: ; %ife
-; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
-; CHECK-NEXT:    s_mov_b32 s3, 0xf000
-; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], 0
-; CHECK-NEXT:    s_endpgm
+; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
+; CHECK-NEXT: s_mov_b32 s2, -1
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+
+; CHECK: BB0_1:
+; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
+; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
+
+; CHECK: BB0_2:
+; CHECK: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_mov_b32 s3, 0xf000
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; CHECK-NEXT: s_endpgm
+define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
 entry:
   %v0 = insertelement <4 x float> undef, float %a0, i32 0
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index a1cf6cf630048..82283f39792ee 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -7,9 +7,10 @@
 ; CHECK: s_and_saveexec_b64
 ; CHECK-NEXT: ; mask branch
 ; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
+; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
 
-; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
-; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]]
+; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
+; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]]
 
 ; CHECK: s_endpgm
 define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index fbf7364bfc4bb..50cf85e28ae13 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -226,12 +226,13 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addr
 ; GCN-LABEL: {{^}}test_s0_s1_k_f32:
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
+; GCN-DAG: s_mov_b32 [[SK0:s[0-9]+]], 0x44800000
 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
+; GCN-DAG: v_mov_b32_e32 [[VS0:v[0-9]+]], s[[SGPR0]]
 
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK0]]
-; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
-; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK1]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS0]], [[VS1]], [[SK0]]
+; GCN-DAG: s_mov_b32 [[SK1:s[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VS0]], [[VS1]], [[SK1]]
 
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 79a753cc046f9..3a9970e78e38f 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -165,8 +165,8 @@ exit:
 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
 ; SI: buffer_load_dword
 ; SI-DAG: buffer_store_dword
-; SI-DAG: s_cmpk_eq_i32 s{{[0-9+]}}, 0x100
-; SI: s_cbranch_scc0 [[LABEL_LOOP]]
+; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
+; SI: s_cbranch_vccz [[LABEL_LOOP]]
 ; SI: [[LABEL_EXIT]]:
 ; SI: s_endpgm
 
@@ -214,7 +214,7 @@ exit:
 ; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
+; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
 
 ; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
 ; SI: buffer_store_dword
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index b0e9171cbb007..0c52daca04738 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s

From af6c9df163831b3a977d5dbaa25f2974baf13518 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 25 May 2019 04:47:42 +0000
Subject: [PATCH 0221/1176] [X86][llvm-mca] Add zero idiom tests for Intel
 CPUs. NFC

This pre-commits tests for D62360

llvm-svn: 361689
---
 .../llvm-mca/X86/Broadwell/zero-idioms.s      | 450 ++++++++++
 .../tools/llvm-mca/X86/Haswell/zero-idioms.s  | 492 +++++++++++
 .../llvm-mca/X86/SandyBridge/zero-idioms.s    | 128 ++-
 .../llvm-mca/X86/SkylakeClient/zero-idioms.s  | 492 +++++++++++
 .../llvm-mca/X86/SkylakeServer/zero-idioms.s  | 778 ++++++++++++++++++
 5 files changed, 2296 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
 create mode 100644 llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
 create mode 100644 llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
 create mode 100644 llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s

diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
new file mode 100644
index 0000000000000..6e505bfb0626f
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
@@ -0,0 +1,450 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+
+# On BDW, renamer-based zeroing does not work for:
+#  - 16 and 8-bit GPRs
+#  - MMX
+#  - ANDN variants
+
+subl  %eax, %eax
+subq  %rax, %rax
+xorl  %eax, %eax
+xorq  %rax, %rax
+
+pcmpgtb   %mm2, %mm2
+pcmpgtd   %mm2, %mm2
+# pcmpgtq   %mm2, %mm2 # invalid operand for instruction
+pcmpgtw   %mm2, %mm2
+
+pcmpgtb   %xmm2, %xmm2
+pcmpgtd   %xmm2, %xmm2
+pcmpgtq   %xmm2, %xmm2
+pcmpgtw   %xmm2, %xmm2
+
+vpcmpgtb  %xmm3, %xmm3, %xmm3
+vpcmpgtd  %xmm3, %xmm3, %xmm3
+vpcmpgtq  %xmm3, %xmm3, %xmm3
+vpcmpgtw  %xmm3, %xmm3, %xmm3
+
+vpcmpgtb  %xmm3, %xmm3, %xmm5
+vpcmpgtd  %xmm3, %xmm3, %xmm5
+vpcmpgtq  %xmm3, %xmm3, %xmm5
+vpcmpgtw  %xmm3, %xmm3, %xmm5
+
+psubb   %mm2, %mm2
+psubd   %mm2, %mm2
+psubq   %mm2, %mm2
+psubw   %mm2, %mm2
+psubb   %xmm2, %xmm2
+psubd   %xmm2, %xmm2
+psubq   %xmm2, %xmm2
+psubw   %xmm2, %xmm2
+vpsubb  %xmm3, %xmm3, %xmm3
+vpsubd  %xmm3, %xmm3, %xmm3
+vpsubq  %xmm3, %xmm3, %xmm3
+vpsubw  %xmm3, %xmm3, %xmm3
+vpsubb  %ymm3, %ymm3, %ymm3
+vpsubd  %ymm3, %ymm3, %ymm3
+vpsubq  %ymm3, %ymm3, %ymm3
+vpsubw  %ymm3, %ymm3, %ymm3
+
+vpsubb  %xmm3, %xmm3, %xmm5
+vpsubd  %xmm3, %xmm3, %xmm5
+vpsubq  %xmm3, %xmm3, %xmm5
+vpsubw  %xmm3, %xmm3, %xmm5
+vpsubb  %ymm3, %ymm3, %ymm5
+vpsubd  %ymm3, %ymm3, %ymm5
+vpsubq  %ymm3, %ymm3, %ymm5
+vpsubw  %ymm3, %ymm3, %ymm5
+
+andnps  %xmm0, %xmm0
+andnpd  %xmm1, %xmm1
+vandnps %xmm2, %xmm2, %xmm2
+vandnpd %xmm1, %xmm1, %xmm1
+vandnps %ymm2, %ymm2, %ymm2
+vandnpd %ymm1, %ymm1, %ymm1
+pandn   %mm2, %mm2
+pandn   %xmm2, %xmm2
+vpandn  %xmm3, %xmm3, %xmm3
+vpandn  %ymm3, %ymm3, %ymm3
+
+vandnps %xmm2, %xmm2, %xmm5
+vandnpd %xmm1, %xmm1, %xmm5
+vpandn  %xmm3, %xmm3, %xmm5
+vandnps %ymm2, %ymm2, %ymm5
+vandnpd %ymm1, %ymm1, %ymm5
+vpandn  %ymm3, %ymm3, %ymm5
+
+xorps  %xmm0, %xmm0
+xorpd  %xmm1, %xmm1
+vxorps %xmm2, %xmm2, %xmm2
+vxorpd %xmm1, %xmm1, %xmm1
+vxorps %ymm2, %ymm2, %ymm2
+vxorpd %ymm1, %ymm1, %ymm1
+pxor   %mm2, %mm2
+pxor   %xmm2, %xmm2
+vpxor  %xmm3, %xmm3, %xmm3
+vpxor  %ymm3, %ymm3, %ymm3
+
+vxorps %xmm4, %xmm4, %xmm5
+vxorpd %xmm1, %xmm1, %xmm3
+vxorps %ymm4, %ymm4, %ymm5
+vxorpd %ymm1, %ymm1, %ymm3
+vpxor  %xmm3, %xmm3, %xmm5
+vpxor  %ymm3, %ymm3, %ymm5
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      75
+# CHECK-NEXT: Total Cycles:      41
+# CHECK-NEXT: Total uOps:        75
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.83
+# CHECK-NEXT: IPC:               1.83
+# CHECK-NEXT: Block RThroughput: 20.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      5     1.00                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     1.00                        andnps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     1.00                        andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.33                        pandn	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pandn	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     1.00                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     1.00                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    79
+# CHECK-NEXT: Max number of mappings used:         58
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - BWDivider
+# CHECK-NEXT: [1]   - BWFPDivider
+# CHECK-NEXT: [2]   - BWPort0
+# CHECK-NEXT: [3]   - BWPort1
+# CHECK-NEXT: [4]   - BWPort2
+# CHECK-NEXT: [5]   - BWPort3
+# CHECK-NEXT: [6]   - BWPort4
+# CHECK-NEXT: [7]   - BWPort5
+# CHECK-NEXT: [8]   - BWPort6
+# CHECK-NEXT: [9]   - BWPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     14.00  24.00   -      -      -     35.00  2.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123456789
+# CHECK-NEXT: Index     0123456789          0123456789          0
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     .DeE--R   .    .    .    .    .    .    .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     .D=eE-R   .    .    .    .    .    .    .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D==eER   .    .    .    .    .    .    .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D=eE-R   .    .    .    .    .    .    .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     . D=eER   .    .    .    .    .    .    .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     . D==eeeeeER   .    .    .    .    .    .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    . D=======eER  .    .    .    .    .    .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    . D==eE-----R  .    .    .    .    .    .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    .  D==eE----R  .    .    .    .    .    .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    .  D===eeeeeER .    .    .    .    .    .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    .  D========eER.    .    .    .    .    .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    .  D=========eER    .    .    .    .    .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    .   D========eER    .    .    .    .    .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    .   D========eeeeeER.    .    .    .    .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .   D=========eE---R.    .    .    .    .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .   DeE------------R.    .    .    .    .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,20]    .    DeE-----------R.    .    .    .    .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,21]    .    D=eE----------R.    .    .    .    .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,22]    .    D==eE---------R.    .    .    .    .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,23]    .    D=====eE------R.    .    .    .    .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,24]    .    .D=====eE-----R.    .    .    .    .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,25]    .    .D=======eE---R.    .    .    .    .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,26]    .    .D========eE--R.    .    .    .    .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,27]    .    .D========eE--R.    .    .    .    .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,28]    .    . D========eE-R.    .    .    .    .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,29]    .    . D=========eER.    .    .    .    .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,30]    .    . D==========eER    .    .    .    .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,31]    .    . D===========eER   .    .    .    .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,32]    .    .  D===========eER  .    .    .    .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,33]    .    .  D============eER .    .    .    .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,34]    .    .  D=============eER.    .    .    .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,35]    .    .  D==============eER    .    .    .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,36]    .    .   D=============eER    .    .    .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,37]    .    .   D==============eER   .    .    .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,38]    .    .   D==============eER   .    .    .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,39]    .    .   D===============eER  .    .    .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,40]    .    .    D==============eER  .    .    .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,41]    .    .    D===============eER .    .    .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,42]    .    .    D===============eER .    .    .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,43]    .    .    DeE---------------R .    .    .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,44]    .    .    .D====eE----------R .    .    .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,45]    .    .    .D=====eE---------R .    .    .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,46]    .    .    .D=======eE-------R .    .    .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,47]    .    .    .D======eE--------R .    .    .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,48]    .    .    . D=======eE------R .    .    .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,49]    .    .    . D=eE------------R .    .    .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,50]    .    .    . D======eE-------R .    .    .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,51]    .    .    . D==========eE---R .    .    .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,52]    .    .    .  D==========eE--R .    .    .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,53]    .    .    .  D=======eE-----R .    .    .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,54]    .    .    .  D========eE----R .    .    .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,55]    .    .    .  D===========eE-R .    .    .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,56]    .    .    .   D============eER.    .    .   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,57]    .    .    .   D=============eER    .    .   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,58]    .    .    .   D===========eE--R    .    .   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,59]    .    .    .   D===============eER  .    .   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,60]    .    .    .    D=============eE-R  .    .   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,61]    .    .    .    D===============eER .    .   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,62]    .    .    .    D================eER.    .   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,63]    .    .    .    D==================eER   .   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,64]    .    .    .    .D================eE-R   .   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,65]    .    .    .    .DeE-----------------R   .   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,66]    .    .    .    .D==================eER  .   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,67]    .    .    .    .D==========eE--------R  .   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,68]    .    .    .    . D==========eE-------R  .   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,69]    .    .    .    . D==================eER .   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,70]    .    .    .    . D===================eER.   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,71]    .    .    .    . D====================eER   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,72]    .    .    .    .  D================eE---R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,73]    .    .    .    .  D=================eE--R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,74]    .    .    .    .  D=================eE--R   vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    2.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    1.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     3.0    0.0    0.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     2.0    2.0    1.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     2.0    0.0    0.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     3.0    0.0    0.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     8.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     3.0    3.0    5.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     3.0    0.0    4.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     9.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     10.0   0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     9.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     9.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     10.0   1.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     1.0    0.0    12.0      psubb	%mm2, %mm2
+# CHECK-NEXT: 20.    1     1.0    0.0    11.0      psubd	%mm2, %mm2
+# CHECK-NEXT: 21.    1     2.0    0.0    10.0      psubq	%mm2, %mm2
+# CHECK-NEXT: 22.    1     3.0    0.0    9.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 23.    1     6.0    0.0    6.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 24.    1     6.0    0.0    5.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 25.    1     8.0    1.0    3.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 26.    1     9.0    0.0    2.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 27.    1     9.0    2.0    2.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 28.    1     9.0    0.0    1.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 29.    1     10.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 30.    1     11.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 31.    1     12.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 32.    1     12.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 33.    1     13.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 34.    1     14.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 35.    1     15.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 36.    1     14.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 37.    1     15.0   1.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 38.    1     15.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 39.    1     16.0   2.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 40.    1     15.0   2.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 41.    1     16.0   3.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 42.    1     16.0   3.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 43.    1     1.0    1.0    15.0      andnps	%xmm0, %xmm0
+# CHECK-NEXT: 44.    1     5.0    5.0    10.0      andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 45.    1     6.0    1.0    9.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 46.    1     8.0    2.0    7.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 47.    1     7.0    0.0    8.0       vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 48.    1     8.0    0.0    6.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 49.    1     2.0    2.0    12.0      pandn	%mm2, %mm2
+# CHECK-NEXT: 50.    1     7.0    0.0    7.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 51.    1     11.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 52.    1     11.0   0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 53.    1     8.0    1.0    5.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 54.    1     9.0    1.0    4.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 55.    1     12.0   0.0    1.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 56.    1     13.0   7.0    0.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 57.    1     14.0   7.0    0.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 58.    1     12.0   1.0    2.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 59.    1     16.0   16.0   0.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 60.    1     14.0   8.0    1.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 61.    1     16.0   11.0   0.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 62.    1     17.0   2.0    0.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 63.    1     19.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 64.    1     17.0   0.0    1.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 65.    1     1.0    1.0    17.0      pxor	%mm2, %mm2
+# CHECK-NEXT: 66.    1     19.0   0.0    0.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 67.    1     11.0   2.0    8.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 68.    1     11.0   0.0    7.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 69.    1     19.0   19.0   0.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 70.    1     20.0   3.0    0.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 71.    1     21.0   21.0   0.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 72.    1     17.0   1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 73.    1     18.0   0.0    2.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 74.    1     18.0   0.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
new file mode 100644
index 0000000000000..3644f3d2f9989
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
@@ -0,0 +1,492 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+
+# On HSW, renamer-based zeroing does not work for:
+#  - 16 and 8-bit GPRs
+#  - MMX
+#  - ANDN variants
+
+subl  %eax, %eax
+subq  %rax, %rax
+xorl  %eax, %eax
+xorq  %rax, %rax
+
+pcmpgtb   %mm2, %mm2
+pcmpgtd   %mm2, %mm2
+# pcmpgtq   %mm2, %mm2 # invalid operand for instruction
+pcmpgtw   %mm2, %mm2
+
+pcmpgtb   %xmm2, %xmm2
+pcmpgtd   %xmm2, %xmm2
+pcmpgtq   %xmm2, %xmm2
+pcmpgtw   %xmm2, %xmm2
+
+vpcmpgtb  %xmm3, %xmm3, %xmm3
+vpcmpgtd  %xmm3, %xmm3, %xmm3
+vpcmpgtq  %xmm3, %xmm3, %xmm3
+vpcmpgtw  %xmm3, %xmm3, %xmm3
+
+vpcmpgtb  %xmm3, %xmm3, %xmm5
+vpcmpgtd  %xmm3, %xmm3, %xmm5
+vpcmpgtq  %xmm3, %xmm3, %xmm5
+vpcmpgtw  %xmm3, %xmm3, %xmm5
+
+vpcmpgtb  %ymm3, %ymm3, %ymm3
+vpcmpgtd  %ymm3, %ymm3, %ymm3
+vpcmpgtq  %ymm3, %ymm3, %ymm3
+vpcmpgtw  %ymm3, %ymm3, %ymm3
+
+vpcmpgtb  %ymm3, %ymm3, %ymm5
+vpcmpgtd  %ymm3, %ymm3, %ymm5
+vpcmpgtq  %ymm3, %ymm3, %ymm5
+vpcmpgtw  %ymm3, %ymm3, %ymm5
+
+psubb   %mm2, %mm2
+psubd   %mm2, %mm2
+psubq   %mm2, %mm2
+psubw   %mm2, %mm2
+psubb   %xmm2, %xmm2
+psubd   %xmm2, %xmm2
+psubq   %xmm2, %xmm2
+psubw   %xmm2, %xmm2
+vpsubb  %xmm3, %xmm3, %xmm3
+vpsubd  %xmm3, %xmm3, %xmm3
+vpsubq  %xmm3, %xmm3, %xmm3
+vpsubw  %xmm3, %xmm3, %xmm3
+vpsubb  %ymm3, %ymm3, %ymm3
+vpsubd  %ymm3, %ymm3, %ymm3
+vpsubq  %ymm3, %ymm3, %ymm3
+vpsubw  %ymm3, %ymm3, %ymm3
+
+vpsubb  %xmm3, %xmm3, %xmm5
+vpsubd  %xmm3, %xmm3, %xmm5
+vpsubq  %xmm3, %xmm3, %xmm5
+vpsubw  %xmm3, %xmm3, %xmm5
+vpsubb  %ymm3, %ymm3, %ymm5
+vpsubd  %ymm3, %ymm3, %ymm5
+vpsubq  %ymm3, %ymm3, %ymm5
+vpsubw  %ymm3, %ymm3, %ymm5
+
+andnps  %xmm0, %xmm0
+andnpd  %xmm1, %xmm1
+vandnps %xmm2, %xmm2, %xmm2
+vandnpd %xmm1, %xmm1, %xmm1
+vandnps %ymm2, %ymm2, %ymm2
+vandnpd %ymm1, %ymm1, %ymm1
+pandn   %mm2, %mm2
+pandn   %xmm2, %xmm2
+vpandn  %xmm3, %xmm3, %xmm3
+vpandn  %ymm3, %ymm3, %ymm3
+
+vandnps %xmm2, %xmm2, %xmm5
+vandnpd %xmm1, %xmm1, %xmm5
+vpandn  %xmm3, %xmm3, %xmm5
+vandnps %ymm2, %ymm2, %ymm5
+vandnpd %ymm1, %ymm1, %ymm5
+vpandn  %ymm3, %ymm3, %ymm5
+
+xorps  %xmm0, %xmm0
+xorpd  %xmm1, %xmm1
+vxorps %xmm2, %xmm2, %xmm2
+vxorpd %xmm1, %xmm1, %xmm1
+vxorps %ymm2, %ymm2, %ymm2
+vxorpd %ymm1, %ymm1, %ymm1
+pxor   %mm2, %mm2
+pxor   %xmm2, %xmm2
+vpxor  %xmm3, %xmm3, %xmm3
+vpxor  %ymm3, %ymm3, %ymm3
+
+vxorps %xmm4, %xmm4, %xmm5
+vxorpd %xmm1, %xmm1, %xmm3
+vxorps %ymm4, %ymm4, %ymm5
+vxorpd %ymm1, %ymm1, %ymm3
+vpxor  %xmm3, %xmm3, %xmm5
+vpxor  %ymm3, %ymm3, %ymm5
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      83
+# CHECK-NEXT: Total Cycles:      45
+# CHECK-NEXT: Total uOps:        83
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    1.84
+# CHECK-NEXT: IPC:               1.84
+# CHECK-NEXT: Block RThroughput: 21.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      5     1.00                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     1.00                        andnps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     1.00                        andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.33                        pandn	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pandn	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     1.00                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     1.00                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    87
+# CHECK-NEXT: Max number of mappings used:         62
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - HWDivider
+# CHECK-NEXT: [1]   - HWFPDivider
+# CHECK-NEXT: [2]   - HWPort0
+# CHECK-NEXT: [3]   - HWPort1
+# CHECK-NEXT: [4]   - HWPort2
+# CHECK-NEXT: [5]   - HWPort3
+# CHECK-NEXT: [6]   - HWPort4
+# CHECK-NEXT: [7]   - HWPort5
+# CHECK-NEXT: [8]   - HWPort6
+# CHECK-NEXT: [9]   - HWPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     15.00  28.00   -      -      -     38.00  2.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123456789
+# CHECK-NEXT: Index     0123456789          0123456789          01234
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .   .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .   .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .   .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .   .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     .DeE--R   .    .    .    .    .    .    .   .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     .D=eE-R   .    .    .    .    .    .    .   .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D==eER   .    .    .    .    .    .    .   .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D=eE-R   .    .    .    .    .    .    .   .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     . D=eER   .    .    .    .    .    .    .   .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     . D==eeeeeER   .    .    .    .    .    .   .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    . D=======eER  .    .    .    .    .    .   .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    . D==eE-----R  .    .    .    .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    .  D==eE----R  .    .    .    .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    .  D===eeeeeER .    .    .    .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    .  D========eER.    .    .    .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    .  D=========eER    .    .    .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    .   D========eER    .    .    .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    .   D========eeeeeER.    .    .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .   D=========eE---R.    .    .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .   D=========eE---R.    .    .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .    D=========eE--R.    .    .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .    D==========eeeeeER  .    .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .    D===============eER .    .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .    D================eER.    .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .    .D===============eER.    .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .    .D===============eeeeeER .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .    .D================eE---R .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .    .DeE-------------------R .    .    .   .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .    . DeE------------------R .    .    .   .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .    . D=eE-----------------R .    .    .   .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    . D==eE----------------R .    .    .   .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    . D===eE---------------R .    .    .   .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    .  D===eE--------------R .    .    .   .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    .  D======eE-----------R .    .    .   .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    .  D=======eE----------R .    .    .   .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    .  D==============eE---R .    .    .   .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .   D==============eE--R .    .    .   .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .   D===============eE-R .    .    .   .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .   D================eER .    .    .   .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .   D=================eER.    .    .   .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .    D=================eER    .    .   .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .    D==================eER   .    .   .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    .    D===================eER  .    .   .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    .    D====================eER .    .   .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    .    .D===================eER .    .   .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    .    .D====================eER.    .   .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    .    .D====================eER.    .   .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    .    .D=====================eER    .   .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .    . D====================eER    .   .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .    . D=====================eER   .   .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .    . D=====================eER   .   .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .    . D====eE-----------------R   .   .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,52]    .    .    .  D====eE----------------R   .   .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,53]    .    .    .  D=====eE---------------R   .   .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54]    .    .    .  D=======eE-------------R   .   .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55]    .    .    .  D======eE--------------R   .   .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,56]    .    .    .   D=========eE----------R   .   .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,57]    .    .    .   DeE-------------------R   .   .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,58]    .    .    .   D======eE-------------R   .   .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,59]    .    .    .   D================eE---R   .   .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,60]    .    .    .    D================eE--R   .   .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,61]    .    .    .    D=========eE---------R   .   .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,62]    .    .    .    D==========eE--------R   .   .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,63]    .    .    .    D=================eE-R   .   .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,64]    .    .    .    .D==========eE-------R   .   .   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,65]    .    .    .    .D===========eE------R   .   .   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,66]    .    .    .    .D=================eER   .   .   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,67]    .    .    .    .D=============eE----R   .   .   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,68]    .    .    .    . D===========eE-----R   .   .   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,69]    .    .    .    . D=================eER  .   .   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,70]    .    .    .    . D==================eER .   .   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,71]    .    .    .    . D====================eER   .   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,72]    .    .    .    .  D==================eE-R   .   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,73]    .    .    .    .  DeE-------------------R   .   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,74]    .    .    .    .  D====================eER  .   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,75]    .    .    .    .  D================eE----R  .   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,76]    .    .    .    .   D================eE---R  .   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,77]    .    .    .    .   D====================eER .   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,78]    .    .    .    .   D=====================eER.   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,79]    .    .    .    .   D======================eER   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,80]    .    .    .    .    D==================eE---R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,81]    .    .    .    .    D===================eE--R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,82]    .    .    .    .    D===================eE--R   vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    2.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    1.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     3.0    0.0    0.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     2.0    2.0    1.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     2.0    0.0    0.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     3.0    0.0    0.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     8.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     3.0    3.0    5.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     3.0    0.0    4.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     9.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     10.0   0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     9.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     9.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     10.0   1.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     10.0   1.0    3.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     10.0   0.0    2.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     11.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     16.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     17.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     16.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     16.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     17.0   1.0    3.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     1.0    1.0    19.0      psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     1.0    0.0    18.0      psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     2.0    0.0    17.0      psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     3.0    0.0    16.0      psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     4.0    0.0    15.0      psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     4.0    0.0    14.0      psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     7.0    2.0    11.0      psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     8.0    0.0    10.0      psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     15.0   1.0    3.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     15.0   0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     16.0   0.0    1.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     17.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     18.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     18.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     19.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     20.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     21.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     20.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     21.0   1.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     21.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     22.0   2.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     21.0   2.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     22.0   3.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     22.0   3.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     5.0    5.0    17.0      andnps	%xmm0, %xmm0
+# CHECK-NEXT: 52.    1     5.0    5.0    16.0      andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 53.    1     6.0    2.0    15.0      vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 54.    1     8.0    2.0    13.0      vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 55.    1     7.0    0.0    14.0      vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 56.    1     10.0   2.0    10.0      vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 57.    1     1.0    1.0    19.0      pandn	%mm2, %mm2
+# CHECK-NEXT: 58.    1     7.0    0.0    13.0      pandn	%xmm2, %xmm2
+# CHECK-NEXT: 59.    1     17.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 60.    1     17.0   0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 61.    1     10.0   3.0    9.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 62.    1     11.0   1.0    8.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 63.    1     18.0   0.0    1.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 64.    1     11.0   5.0    7.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 65.    1     12.0   3.0    6.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 66.    1     18.0   1.0    0.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 67.    1     14.0   12.0   4.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 68.    1     12.0   4.0    5.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 69.    1     18.0   13.0   0.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 70.    1     19.0   6.0    0.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 71.    1     21.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 72.    1     19.0   0.0    1.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 73.    1     1.0    1.0    19.0      pxor	%mm2, %mm2
+# CHECK-NEXT: 74.    1     21.0   0.0    0.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 75.    1     17.0   2.0    4.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 76.    1     17.0   0.0    3.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 77.    1     21.0   21.0   0.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 78.    1     22.0   3.0    0.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 79.    1     23.0   23.0   0.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 80.    1     19.0   1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 81.    1     20.0   0.0    2.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 82.    1     20.0   0.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
index b91af96481dd8..32932a05492fe 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
@@ -53,6 +53,8 @@ andnps  %xmm0, %xmm0
 andnpd  %xmm1, %xmm1
 vandnps %xmm2, %xmm2, %xmm2
 vandnpd %xmm1, %xmm1, %xmm1
+vandnps %ymm2, %ymm2, %ymm2
+vandnpd %ymm1, %ymm1, %ymm1
 pandn   %mm2, %mm2
 pandn   %xmm2, %xmm2
 vpandn  %xmm3, %xmm3, %xmm3
@@ -60,28 +62,34 @@ vpandn  %xmm3, %xmm3, %xmm3
 vandnps %xmm2, %xmm2, %xmm5
 vandnpd %xmm1, %xmm1, %xmm5
 vpandn  %xmm3, %xmm3, %xmm5
+vandnps %ymm2, %ymm2, %ymm5
+vandnpd %ymm1, %ymm1, %ymm5
 
 xorps  %xmm0, %xmm0
 xorpd  %xmm1, %xmm1
 vxorps %xmm2, %xmm2, %xmm2
 vxorpd %xmm1, %xmm1, %xmm1
+vxorps %ymm2, %ymm2, %ymm2
+vxorpd %ymm1, %ymm1, %ymm1
 pxor   %mm2, %mm2
 pxor   %xmm2, %xmm2
 vpxor  %xmm3, %xmm3, %xmm3
 
 vxorps %xmm4, %xmm4, %xmm5
 vxorpd %xmm1, %xmm1, %xmm3
+vxorps %ymm4, %ymm4, %ymm5
+vxorpd %ymm1, %ymm1, %ymm3
 vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      Iterations:        1
-# CHECK-NEXT: Instructions:      55
+# CHECK-NEXT: Instructions:      63
 # CHECK-NEXT: Total Cycles:      27
-# CHECK-NEXT: Total uOps:        55
+# CHECK-NEXT: Total uOps:        63
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    2.04
-# CHECK-NEXT: IPC:               2.04
-# CHECK-NEXT: Block RThroughput: 13.8
+# CHECK-NEXT: uOps Per Cycle:    2.33
+# CHECK-NEXT: IPC:               2.33
+# CHECK-NEXT: Block RThroughput: 15.8
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -131,26 +139,34 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  1      1     1.00                        andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.33                        pandn	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.33                        pandn	%xmm2, %xmm2
 # CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm5
 # CHECK-NEXT:  1      1     1.00                        vandnpd	%xmm1, %xmm1, %xmm5
 # CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  1      0     0.25                        xorps	%xmm0, %xmm0
 # CHECK-NEXT:  1      0     0.25                        xorpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      0     0.25                        vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
 # CHECK-NEXT:  1      0     0.25                        pxor	%xmm2, %xmm2
 # CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  1      0     0.25                        vxorps	%xmm4, %xmm4, %xmm5
 # CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
 # CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Register File statistics:
-# CHECK-NEXT: Total number of mappings created:    59
-# CHECK-NEXT: Max number of mappings used:         42
+# CHECK-NEXT: Total number of mappings created:    67
+# CHECK-NEXT: Max number of mappings used:         43
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SBDivider
@@ -164,7 +180,7 @@ vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     4.00   8.00    -     6.00    -      -
+# CHECK-NEXT:  -      -     4.00   8.00    -     14.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -207,21 +223,29 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     pandn	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     pandn	%xmm2, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm5
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     pxor	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Timeline view:
@@ -266,23 +290,31 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: [0,35]    .    .  DeE-------------R..   andnps	%xmm0, %xmm0
 # CHECK-NEXT: [0,36]    .    .   DeE------------R..   andnpd	%xmm1, %xmm1
 # CHECK-NEXT: [0,37]    .    .   D=eE-----------R..   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,38]    .    .   D==eE----------R..   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,39]    .    .   D=============eER.   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,40]    .    .    D=eE-----------R.   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,41]    .    .    DeE------------R.   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,42]    .    .    D==eE----------R.   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,43]    .    .    D===eE---------R.   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,44]    .    .    .DeE-----------R.   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45]    .    .    .D-------------R.   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,46]    .    .    .D=E-----------R.   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,47]    .    .    .D=E-----------R.   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,48]    .    .    . DE-----------R.   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,49]    .    .    . D===========eER   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,50]    .    .    . DE------------R   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,51]    .    .    . D-------------R   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,52]    .    .    .  D------------R   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,53]    .    .    .  D------------R   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,54]    .    .    .  D------------R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,38]    .    .   D===eE---------R..   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,39]    .    .   D==eE----------R..   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,40]    .    .    D===eE--------R..   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,41]    .    .    D============eER.   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,42]    .    .    D==eE----------R.   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,43]    .    .    DeE------------R.   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,44]    .    .    .D===eE--------R.   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,45]    .    .    .D====eE-------R.   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,46]    .    .    .DeE-----------R.   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    .    .D=====eE------R.   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,48]    .    .    . D=====eE-----R.   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,49]    .    .    . D------------R.   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,50]    .    .    . D==E---------R.   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,51]    .    .    . D=E----------R.   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,52]    .    .    .  D=E---------R.   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,53]    .    .    .  D======eE---R.   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,54]    .    .    .  D=====eE----R.   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,55]    .    .    .  D==========eER   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,56]    .    .    .   D======E----R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,57]    .    .    .   D-----------R   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,58]    .    .    .   D-----------R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,59]    .    .    .   D=====E-----R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,60]    .    .    .    D======eE--R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,61]    .    .    .    D=====eE---R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,62]    .    .    .    D======E---R   vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -329,20 +361,28 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 35.    1     1.0    1.0    13.0      andnps	%xmm0, %xmm0
 # CHECK-NEXT: 36.    1     1.0    1.0    12.0      andnpd	%xmm1, %xmm1
 # CHECK-NEXT: 37.    1     2.0    2.0    11.0      vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 38.    1     3.0    1.0    10.0      vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 39.    1     14.0   0.0    0.0       pandn	%mm2, %mm2
-# CHECK-NEXT: 40.    1     2.0    0.0    11.0      pandn	%xmm2, %xmm2
-# CHECK-NEXT: 41.    1     1.0    1.0    12.0      vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 42.    1     3.0    0.0    10.0      vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 43.    1     4.0    1.0    9.0       vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 44.    1     1.0    0.0    11.0      vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45.    1     0.0    0.0    13.0      xorps	%xmm0, %xmm0
-# CHECK-NEXT: 46.    1     2.0    0.0    11.0      xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 47.    1     2.0    0.0    11.0      vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 48.    1     1.0    0.0    11.0      vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 49.    1     12.0   0.0    0.0       pxor	%mm2, %mm2
-# CHECK-NEXT: 50.    1     1.0    0.0    12.0      pxor	%xmm2, %xmm2
-# CHECK-NEXT: 51.    1     0.0    0.0    13.0      vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 52.    1     0.0    0.0    12.0      vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 53.    1     0.0    0.0    12.0      vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 54.    1     0.0    0.0    12.0      vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 38.    1     4.0    2.0    9.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 39.    1     3.0    0.0    10.0      vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 40.    1     4.0    0.0    8.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 41.    1     13.0   0.0    0.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 42.    1     3.0    0.0    10.0      pandn	%xmm2, %xmm2
+# CHECK-NEXT: 43.    1     1.0    1.0    12.0      vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 44.    1     4.0    1.0    8.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 45.    1     5.0    1.0    7.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 46.    1     1.0    0.0    11.0      vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     6.0    3.0    6.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 48.    1     6.0    3.0    5.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 49.    1     0.0    0.0    12.0      xorps	%xmm0, %xmm0
+# CHECK-NEXT: 50.    1     3.0    0.0    9.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 51.    1     2.0    0.0    10.0      vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 52.    1     2.0    0.0    9.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 53.    1     7.0    6.0    3.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 54.    1     6.0    4.0    4.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 55.    1     11.0   0.0    0.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 56.    1     7.0    0.0    4.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 57.    1     0.0    0.0    11.0      vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 58.    1     0.0    0.0    11.0      vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 59.    1     6.0    0.0    5.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 60.    1     7.0    7.0    2.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 61.    1     6.0    1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 62.    1     7.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
new file mode 100644
index 0000000000000..d10e7890e45b3
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
@@ -0,0 +1,492 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+
+# On SKL, renamer-based zeroing does not work for:
+#  - 16 and 8-bit GPRs
+#  - MMX
+#  - ANDN variants
+
+subl  %eax, %eax
+subq  %rax, %rax
+xorl  %eax, %eax
+xorq  %rax, %rax
+
+pcmpgtb   %mm2, %mm2
+pcmpgtd   %mm2, %mm2
+# pcmpgtq   %mm2, %mm2 # invalid operand for instruction
+pcmpgtw   %mm2, %mm2
+
+pcmpgtb   %xmm2, %xmm2
+pcmpgtd   %xmm2, %xmm2
+pcmpgtq   %xmm2, %xmm2
+pcmpgtw   %xmm2, %xmm2
+
+vpcmpgtb  %xmm3, %xmm3, %xmm3
+vpcmpgtd  %xmm3, %xmm3, %xmm3
+vpcmpgtq  %xmm3, %xmm3, %xmm3
+vpcmpgtw  %xmm3, %xmm3, %xmm3
+
+vpcmpgtb  %xmm3, %xmm3, %xmm5
+vpcmpgtd  %xmm3, %xmm3, %xmm5
+vpcmpgtq  %xmm3, %xmm3, %xmm5
+vpcmpgtw  %xmm3, %xmm3, %xmm5
+
+vpcmpgtb  %ymm3, %ymm3, %ymm3
+vpcmpgtd  %ymm3, %ymm3, %ymm3
+vpcmpgtq  %ymm3, %ymm3, %ymm3
+vpcmpgtw  %ymm3, %ymm3, %ymm3
+
+vpcmpgtb  %ymm3, %ymm3, %ymm5
+vpcmpgtd  %ymm3, %ymm3, %ymm5
+vpcmpgtq  %ymm3, %ymm3, %ymm5
+vpcmpgtw  %ymm3, %ymm3, %ymm5
+
+psubb   %mm2, %mm2
+psubd   %mm2, %mm2
+psubq   %mm2, %mm2
+psubw   %mm2, %mm2
+psubb   %xmm2, %xmm2
+psubd   %xmm2, %xmm2
+psubq   %xmm2, %xmm2
+psubw   %xmm2, %xmm2
+vpsubb  %xmm3, %xmm3, %xmm3
+vpsubd  %xmm3, %xmm3, %xmm3
+vpsubq  %xmm3, %xmm3, %xmm3
+vpsubw  %xmm3, %xmm3, %xmm3
+vpsubb  %ymm3, %ymm3, %ymm3
+vpsubd  %ymm3, %ymm3, %ymm3
+vpsubq  %ymm3, %ymm3, %ymm3
+vpsubw  %ymm3, %ymm3, %ymm3
+
+vpsubb  %xmm3, %xmm3, %xmm5
+vpsubd  %xmm3, %xmm3, %xmm5
+vpsubq  %xmm3, %xmm3, %xmm5
+vpsubw  %xmm3, %xmm3, %xmm5
+vpsubb  %ymm3, %ymm3, %ymm5
+vpsubd  %ymm3, %ymm3, %ymm5
+vpsubq  %ymm3, %ymm3, %ymm5
+vpsubw  %ymm3, %ymm3, %ymm5
+
+andnps  %xmm0, %xmm0
+andnpd  %xmm1, %xmm1
+vandnps %xmm2, %xmm2, %xmm2
+vandnpd %xmm1, %xmm1, %xmm1
+vandnps %ymm2, %ymm2, %ymm2
+vandnpd %ymm1, %ymm1, %ymm1
+pandn   %mm2, %mm2
+pandn   %xmm2, %xmm2
+vpandn  %xmm3, %xmm3, %xmm3
+vpandn  %ymm3, %ymm3, %ymm3
+
+vandnps %xmm2, %xmm2, %xmm5
+vandnpd %xmm1, %xmm1, %xmm5
+vpandn  %xmm3, %xmm3, %xmm5
+vandnps %ymm2, %ymm2, %ymm5
+vandnpd %ymm1, %ymm1, %ymm5
+vpandn  %ymm3, %ymm3, %ymm5
+
+xorps  %xmm0, %xmm0
+xorpd  %xmm1, %xmm1
+vxorps %xmm2, %xmm2, %xmm2
+vxorpd %xmm1, %xmm1, %xmm1
+vxorps %ymm2, %ymm2, %ymm2
+vxorpd %ymm1, %ymm1, %ymm1
+pxor   %mm2, %mm2
+pxor   %xmm2, %xmm2
+vpxor  %xmm3, %xmm3, %xmm3
+vpxor  %ymm3, %ymm3, %ymm3
+
+vxorps %xmm4, %xmm4, %xmm5
+vxorpd %xmm1, %xmm1, %xmm3
+vxorps %ymm4, %ymm4, %ymm5
+vxorpd %ymm1, %ymm1, %ymm3
+vpxor  %xmm3, %xmm3, %xmm5
+vpxor  %ymm3, %ymm3, %ymm5
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      83
+# CHECK-NEXT: Total Cycles:      34
+# CHECK-NEXT: Total uOps:        83
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    2.44
+# CHECK-NEXT: IPC:               2.44
+# CHECK-NEXT: Block RThroughput: 16.7
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      1     1.00                        pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  1      1     1.00                        pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  1      1     1.00                        pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      3     1.00                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        andnps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     0.33                        andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.50                        pandn	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pandn	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     0.33                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.50                        pxor	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    87
+# CHECK-NEXT: Max number of mappings used:         66
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SKLDivider
+# CHECK-NEXT: [1]   - SKLFPDivider
+# CHECK-NEXT: [2]   - SKLPort0
+# CHECK-NEXT: [3]   - SKLPort1
+# CHECK-NEXT: [4]   - SKLPort2
+# CHECK-NEXT: [5]   - SKLPort3
+# CHECK-NEXT: [6]   - SKLPort4
+# CHECK-NEXT: [7]   - SKLPort5
+# CHECK-NEXT: [8]   - SKLPort6
+# CHECK-NEXT: [9]   - SKLPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     28.00  26.00   -      -      -     27.00  2.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123
+# CHECK-NEXT: Index     0123456789          0123456789
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .  .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .  .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .  .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .  .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     DeE---R   .    .    .    .    .  .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     D=eE--R   .    .    .    .    .  .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D=eE-R   .    .    .    .    .  .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .DeE--R   .    .    .    .    .  .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     .D=eE-R   .    .    .    .    .  .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     .D===eeeER.    .    .    .    .  .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    .D======eER    .    .    .    .  .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    .D==eE----R    .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    . D==eE---R    .    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    . D===eeeER    .    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    . D======eER   .    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    . D=======eER  .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    . D=======eER  .    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    . D=======eeeER.    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .  D=======eE-R.    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .  D=======eE-R.    .    .    .  .   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .  D========eER.    .    .    .  .   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .  D=========eeeER  .    .    .  .   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .  D============eER .    .    .  .   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .  D=============eER.    .    .  .   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .   D============eER.    .    .  .   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .   D============eeeER   .    .  .   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .   D=============eE-R   .    .  .   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .   D=eE-------------R   .    .  .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .   D==eE------------R   .    .  .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .   D===eE-----------R   .    .  .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    D===eE----------R   .    .  .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    D===eE----------R   .    .  .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    D=====eE--------R   .    .  .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    D======eE-------R   .    .  .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    D=======eE------R   .    .  .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    D============eE-R   .    .  .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .D============eER   .    .  .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .D=============eER  .    .  .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .D==============eER .    .  .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .D===============eER.    .  .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .D================eER    .  .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .D=================eER   .  .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    . D=================eER  .  .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    . D==================eER .  .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    . D==================eER .  .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    . D==================eER .  .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    . D===================eER.  .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    . D===================eER.  .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .  D==================eER.  .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .  D===================eER  .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .  D===================eER  .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .  D===eE----------------R  .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,52]    .    .  D====eE---------------R  .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,53]    .    .  D=====eE--------------R  .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54]    .    .   D====eE--------------R  .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55]    .    .   D=====eE-------------R  .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,56]    .    .   D=====eE-------------R  .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,57]    .    .   D====eE--------------R  .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,58]    .    .   D======eE------------R  .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,59]    .    .   D==================eER  .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,60]    .    .    D==================eER .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,61]    .    .    D=======eE-----------R .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,62]    .    .    D=====eE-------------R .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,63]    .    .    D===================eER.   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,64]    .    .    D========eE-----------R.   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,65]    .    .    D========eE-----------R.   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,66]    .    .    .D==================eER.   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,67]    .    .    .D===eE---------------R.   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,68]    .    .    .D========eE----------R.   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,69]    .    .    .D========eE----------R.   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,70]    .    .    .D=========eE---------R.   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,71]    .    .    .D=========eE---------R.   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,72]    .    .    . D=========eE--------R.   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,73]    .    .    . D=========eE--------R.   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,74]    .    .    . D==========eE-------R.   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,75]    .    .    . D=================eER.   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,76]    .    .    . D==================eER   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,77]    .    .    . D===========eE-------R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,78]    .    .    .  D==========eE-------R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,79]    .    .    .  D===========eE------R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,80]    .    .    .  D=========eE--------R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,81]    .    .    .  D===========eE------R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,82]    .    .    .  D===============eE--R   vpxor	%ymm3, %ymm3, %ymm5
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    3.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    2.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     2.0    0.0    1.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     1.0    1.0    2.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     2.0    0.0    1.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     4.0    1.0    0.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     7.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     3.0    3.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     3.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     7.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     8.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     8.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     8.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     8.0    1.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     8.0    1.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     9.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     10.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     13.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     14.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     13.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     13.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     14.0   1.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     2.0    2.0    13.0      psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     3.0    0.0    12.0      psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     4.0    0.0    11.0      psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     4.0    0.0    10.0      psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     4.0    0.0    10.0      psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     6.0    1.0    8.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     7.0    0.0    7.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     8.0    0.0    6.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     13.0   1.0    1.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     13.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     14.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     15.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     16.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     17.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     18.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     18.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     19.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     19.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     19.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     20.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     20.0   1.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     19.0   1.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     20.0   2.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     20.0   2.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     4.0    4.0    16.0      andnps	%xmm0, %xmm0
+# CHECK-NEXT: 52.    1     5.0    5.0    15.0      andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 53.    1     6.0    0.0    14.0      vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 54.    1     5.0    0.0    14.0      vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 55.    1     6.0    0.0    13.0      vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 56.    1     6.0    0.0    13.0      vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 57.    1     5.0    4.0    14.0      pandn	%mm2, %mm2
+# CHECK-NEXT: 58.    1     7.0    0.0    12.0      pandn	%xmm2, %xmm2
+# CHECK-NEXT: 59.    1     19.0   2.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 60.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 61.    1     8.0    1.0    11.0      vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 62.    1     6.0    0.0    13.0      vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 63.    1     20.0   0.0    0.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 64.    1     9.0    2.0    11.0      vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 65.    1     9.0    3.0    11.0      vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 66.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 67.    1     4.0    2.0    15.0      xorps	%xmm0, %xmm0
+# CHECK-NEXT: 68.    1     9.0    4.0    10.0      xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 69.    1     9.0    3.0    10.0      vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 70.    1     10.0   0.0    9.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 71.    1     10.0   0.0    9.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 72.    1     10.0   0.0    8.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 73.    1     10.0   7.0    8.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 74.    1     11.0   1.0    7.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 75.    1     18.0   0.0    0.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 76.    1     19.0   0.0    0.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 77.    1     12.0   12.0   7.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 78.    1     11.0   1.0    7.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 79.    1     12.0   12.0   6.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 80.    1     10.0   0.0    8.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 81.    1     12.0   1.0    6.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 82.    1     16.0   5.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s
new file mode 100644
index 0000000000000..d273667b7d8d3
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s
@@ -0,0 +1,778 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -register-file-stats -iterations=1 < %s | FileCheck %s
+
+# On SKX, renamer-based zeroing does not work for:
+#  - 16 and 8-bit GPRs
+#  - MMX
+#  - ANDN variants
+
+subl  %eax, %eax
+subq  %rax, %rax
+xorl  %eax, %eax
+xorq  %rax, %rax
+
+pcmpgtb   %mm2, %mm2
+pcmpgtd   %mm2, %mm2
+# pcmpgtq   %mm2, %mm2 # invalid operand for instruction
+pcmpgtw   %mm2, %mm2
+
+pcmpgtb   %xmm2, %xmm2
+pcmpgtd   %xmm2, %xmm2
+pcmpgtq   %xmm2, %xmm2
+pcmpgtw   %xmm2, %xmm2
+
+vpcmpgtb  %xmm3, %xmm3, %xmm3
+vpcmpgtd  %xmm3, %xmm3, %xmm3
+vpcmpgtq  %xmm3, %xmm3, %xmm3
+vpcmpgtw  %xmm3, %xmm3, %xmm3
+
+vpcmpgtb  %xmm3, %xmm3, %xmm5
+vpcmpgtd  %xmm3, %xmm3, %xmm5
+vpcmpgtq  %xmm3, %xmm3, %xmm5
+vpcmpgtw  %xmm3, %xmm3, %xmm5
+
+vpcmpgtb  %ymm3, %ymm3, %ymm3
+vpcmpgtd  %ymm3, %ymm3, %ymm3
+vpcmpgtq  %ymm3, %ymm3, %ymm3
+vpcmpgtw  %ymm3, %ymm3, %ymm3
+
+vpcmpgtb  %ymm3, %ymm3, %ymm5
+vpcmpgtd  %ymm3, %ymm3, %ymm5
+vpcmpgtq  %ymm3, %ymm3, %ymm5
+vpcmpgtw  %ymm3, %ymm3, %ymm5
+
+psubb   %mm2, %mm2
+psubd   %mm2, %mm2
+psubq   %mm2, %mm2
+psubw   %mm2, %mm2
+psubb   %xmm2, %xmm2
+psubd   %xmm2, %xmm2
+psubq   %xmm2, %xmm2
+psubw   %xmm2, %xmm2
+vpsubb  %xmm3, %xmm3, %xmm3
+vpsubd  %xmm3, %xmm3, %xmm3
+vpsubq  %xmm3, %xmm3, %xmm3
+vpsubw  %xmm3, %xmm3, %xmm3
+vpsubb  %ymm3, %ymm3, %ymm3
+vpsubd  %ymm3, %ymm3, %ymm3
+vpsubq  %ymm3, %ymm3, %ymm3
+vpsubw  %ymm3, %ymm3, %ymm3
+
+vpsubb  %xmm3, %xmm3, %xmm5
+vpsubd  %xmm3, %xmm3, %xmm5
+vpsubq  %xmm3, %xmm3, %xmm5
+vpsubw  %xmm3, %xmm3, %xmm5
+vpsubb  %ymm3, %ymm3, %ymm5
+vpsubd  %ymm3, %ymm3, %ymm5
+vpsubq  %ymm3, %ymm3, %ymm5
+vpsubw  %ymm3, %ymm3, %ymm5
+
+vpsubb  %xmm19, %xmm19, %xmm19
+vpsubd  %xmm19, %xmm19, %xmm19
+vpsubq  %xmm19, %xmm19, %xmm19
+vpsubw  %xmm19, %xmm19, %xmm19
+vpsubb  %ymm19, %ymm19, %ymm19
+vpsubd  %ymm19, %ymm19, %ymm19
+vpsubq  %ymm19, %ymm19, %ymm19
+vpsubw  %ymm19, %ymm19, %ymm19
+vpsubb  %zmm19, %zmm19, %zmm19
+vpsubd  %zmm19, %zmm19, %zmm19
+vpsubq  %zmm19, %zmm19, %zmm19
+vpsubw  %zmm19, %zmm19, %zmm19
+
+vpsubb  %xmm19, %xmm19, %xmm21
+vpsubd  %xmm19, %xmm19, %xmm21
+vpsubq  %xmm19, %xmm19, %xmm21
+vpsubw  %xmm19, %xmm19, %xmm21
+vpsubb  %ymm19, %ymm19, %ymm21
+vpsubd  %ymm19, %ymm19, %ymm21
+vpsubq  %ymm19, %ymm19, %ymm21
+vpsubw  %ymm19, %ymm19, %ymm21
+vpsubb  %zmm19, %zmm19, %zmm21
+vpsubd  %zmm19, %zmm19, %zmm21
+vpsubq  %zmm19, %zmm19, %zmm21
+vpsubw  %zmm19, %zmm19, %zmm21
+
+andnps  %xmm0, %xmm0
+andnpd  %xmm1, %xmm1
+vandnps %xmm2, %xmm2, %xmm2
+vandnpd %xmm1, %xmm1, %xmm1
+vandnps %ymm2, %ymm2, %ymm2
+vandnpd %ymm1, %ymm1, %ymm1
+vandnps %zmm2, %zmm2, %zmm2
+vandnpd %zmm1, %zmm1, %zmm1
+pandn   %mm2, %mm2
+pandn   %xmm2, %xmm2
+vpandn  %xmm3, %xmm3, %xmm3
+vpandn  %ymm3, %ymm3, %ymm3
+
+vpandnd  %xmm19, %xmm19, %xmm19
+vpandnq  %xmm19, %xmm19, %xmm19
+vpandnd  %ymm19, %ymm19, %ymm19
+vpandnq  %ymm19, %ymm19, %ymm19
+vpandnd  %zmm19, %zmm19, %zmm19
+vpandnq  %zmm19, %zmm19, %zmm19
+
+vandnps %xmm2, %xmm2, %xmm5
+vandnpd %xmm1, %xmm1, %xmm5
+vpandn  %xmm3, %xmm3, %xmm5
+vandnps %ymm2, %ymm2, %ymm5
+vandnpd %ymm1, %ymm1, %ymm5
+vpandn  %ymm3, %ymm3, %ymm5
+vandnps %zmm2, %zmm2, %zmm5
+vandnpd %zmm1, %zmm1, %zmm5
+
+vpandnd  %xmm19, %xmm19, %xmm21
+vpandnq  %xmm19, %xmm19, %xmm21
+vpandnd  %ymm19, %ymm19, %ymm21
+vpandnq  %ymm19, %ymm19, %ymm21
+vpandnd  %zmm19, %zmm19, %zmm21
+vpandnq  %zmm19, %zmm19, %zmm21
+
+xorps  %xmm0, %xmm0
+xorpd  %xmm1, %xmm1
+vxorps %xmm2, %xmm2, %xmm2
+vxorpd %xmm1, %xmm1, %xmm1
+vxorps %ymm2, %ymm2, %ymm2
+vxorpd %ymm1, %ymm1, %ymm1
+vxorps %zmm2, %zmm2, %zmm2
+vxorpd %zmm1, %zmm1, %zmm1
+pxor   %mm2, %mm2
+pxor   %xmm2, %xmm2
+vpxor  %xmm3, %xmm3, %xmm3
+vpxor  %ymm3, %ymm3, %ymm3
+
+vpxord  %xmm19, %xmm19, %xmm19
+vpxorq  %xmm19, %xmm19, %xmm19
+vpxord  %ymm19, %ymm19, %ymm19
+vpxorq  %ymm19, %ymm19, %ymm19
+vpxord  %zmm19, %zmm19, %zmm19
+vpxorq  %zmm19, %zmm19, %zmm19
+
+vxorps %xmm4, %xmm4, %xmm5
+vxorpd %xmm1, %xmm1, %xmm3
+vxorps %ymm4, %ymm4, %ymm5
+vxorpd %ymm1, %ymm1, %ymm3
+vxorps %zmm4, %zmm4, %zmm5
+vxorpd %zmm1, %zmm1, %zmm3
+vpxor  %xmm3, %xmm3, %xmm5
+vpxor  %ymm3, %ymm3, %ymm5
+
+vpxord  %xmm19, %xmm19, %xmm21
+vpxorq  %xmm19, %xmm19, %xmm21
+vpxord  %ymm19, %ymm19, %ymm21
+vpxorq  %ymm19, %ymm19, %ymm21
+vpxord  %zmm19, %zmm19, %zmm21
+vpxorq  %zmm19, %zmm19, %zmm21
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      139
+# CHECK-NEXT: Total Cycles:      53
+# CHECK-NEXT: Total uOps:        139
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    2.62
+# CHECK-NEXT: IPC:               2.62
+# CHECK-NEXT: Block RThroughput: 30.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      1     1.00                        pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  1      1     1.00                        pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  1      1     1.00                        pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      3     1.00                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.33                        vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.33                        andnps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     0.33                        andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.50                        vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  1      1     0.50                        vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  1      1     0.50                        pandn	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pandn	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.50                        vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.50                        vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.50                        vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT:  1      1     0.50                        vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT:  1      1     0.33                        vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.50                        vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.50                        vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.33                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      1     0.33                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      1     0.50                        vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  1      1     0.50                        vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  1      1     0.50                        pxor	%mm2, %mm2
+# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      1     0.33                        vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      1     0.33                        vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.33                        vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      1     0.50                        vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.50                        vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      1     0.50                        vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT:  1      1     0.50                        vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      1     0.33                        vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      1     0.33                        vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.33                        vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      1     0.50                        vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      1     0.50                        vpxorq	%zmm19, %zmm19, %zmm21
+
+# CHECK:      Register File statistics:
+# CHECK-NEXT: Total number of mappings created:    143
+# CHECK-NEXT: Max number of mappings used:         91
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SKXDivider
+# CHECK-NEXT: [1]   - SKXFPDivider
+# CHECK-NEXT: [2]   - SKXPort0
+# CHECK-NEXT: [3]   - SKXPort1
+# CHECK-NEXT: [4]   - SKXPort2
+# CHECK-NEXT: [5]   - SKXPort3
+# CHECK-NEXT: [6]   - SKXPort4
+# CHECK-NEXT: [7]   - SKXPort5
+# CHECK-NEXT: [8]   - SKXPort6
+# CHECK-NEXT: [9]   - SKXPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     47.00  44.00   -      -      -     46.00  2.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pxor	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxorq	%zmm19, %zmm19, %zmm21
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0123456789          0123456789          012
+# CHECK-NEXT: Index     0123456789          0123456789          0123456789
+
+# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .    .    . .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .    .    . .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .    .    . .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .    .    . .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     DeE---R   .    .    .    .    .    .    .    .    . .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     D=eE--R   .    .    .    .    .    .    .    .    . .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D=eE-R   .    .    .    .    .    .    .    .    . .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .DeE--R   .    .    .    .    .    .    .    .    . .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     .D=eE-R   .    .    .    .    .    .    .    .    . .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     .D===eeeER.    .    .    .    .    .    .    .    . .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    .D======eER    .    .    .    .    .    .    .    . .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    .D==eE----R    .    .    .    .    .    .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    . D==eE---R    .    .    .    .    .    .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    . D===eeeER    .    .    .    .    .    .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    . D======eER   .    .    .    .    .    .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    . D=======eER  .    .    .    .    .    .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    . D=======eER  .    .    .    .    .    .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    . D=======eeeER.    .    .    .    .    .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .  D=======eE-R.    .    .    .    .    .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .  D=======eE-R.    .    .    .    .    .    .    . .   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .  D========eER.    .    .    .    .    .    .    . .   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .  D=========eeeER  .    .    .    .    .    .    . .   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .  D============eER .    .    .    .    .    .    . .   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .  D=============eER.    .    .    .    .    .    . .   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .   D============eER.    .    .    .    .    .    . .   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .   D============eeeER   .    .    .    .    .    . .   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .   D=============eE-R   .    .    .    .    .    . .   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .   D=eE-------------R   .    .    .    .    .    . .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .   D==eE------------R   .    .    .    .    .    . .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .   D===eE-----------R   .    .    .    .    .    . .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    D===eE----------R   .    .    .    .    .    . .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    D===eE----------R   .    .    .    .    .    . .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    D=====eE--------R   .    .    .    .    .    . .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    D======eE-------R   .    .    .    .    .    . .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    D=======eE------R   .    .    .    .    .    . .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    D============eE-R   .    .    .    .    .    . .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .D============eER   .    .    .    .    .    . .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .D=============eER  .    .    .    .    .    . .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .D==============eER .    .    .    .    .    . .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .D===============eER.    .    .    .    .    . .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .D================eER    .    .    .    .    . .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .D=================eER   .    .    .    .    . .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    . D=================eER  .    .    .    .    . .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    . D==================eER .    .    .    .    . .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    . D==================eER .    .    .    .    . .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    . D==================eER .    .    .    .    . .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    . D===================eER.    .    .    .    . .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    . D===================eER.    .    .    .    . .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .  D==================eER.    .    .    .    . .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .  D===================eER    .    .    .    . .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .  D===================eER    .    .    .    . .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .  D===eE----------------R    .    .    .    . .   vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,52]    .    .  D====eE---------------R    .    .    .    . .   vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,53]    .    .  D=====eE--------------R    .    .    .    . .   vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,54]    .    .   D=====eE-------------R    .    .    .    . .   vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,55]    .    .   D======eE------------R    .    .    .    . .   vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,56]    .    .   D========eE----------R    .    .    .    . .   vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,57]    .    .   D=========eE---------R    .    .    .    . .   vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,58]    .    .   D==========eE--------R    .    .    .    . .   vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,59]    .    .   D===========eE-------R    .    .    .    . .   vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,60]    .    .    D===========eE------R    .    .    .    . .   vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,61]    .    .    D============eE-----R    .    .    .    . .   vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,62]    .    .    D=============eE----R    .    .    .    . .   vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,63]    .    .    D==============eE---R    .    .    .    . .   vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,64]    .    .    D==============eE---R    .    .    .    . .   vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,65]    .    .    D=================eER    .    .    .    . .   vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,66]    .    .    .D=================eER   .    .    .    . .   vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,67]    .    .    .D=================eER   .    .    .    . .   vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,68]    .    .    .D=================eER   .    .    .    . .   vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,69]    .    .    .D==================eER  .    .    .    . .   vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,70]    .    .    .D==================eER  .    .    .    . .   vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,71]    .    .    .D==================eER  .    .    .    . .   vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,72]    .    .    . D==================eER .    .    .    . .   vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,73]    .    .    . D==================eER .    .    .    . .   vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,74]    .    .    . D==================eER .    .    .    . .   vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,75]    .    .    . D=eE-----------------R .    .    .    . .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,76]    .    .    . D=eE-----------------R .    .    .    . .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,77]    .    .    . D==eE----------------R .    .    .    . .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,78]    .    .    .  D=eE----------------R .    .    .    . .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,79]    .    .    .  D==eE---------------R .    .    .    . .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,80]    .    .    .  D=======eE----------R .    .    .    . .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,81]    .    .    .  D=====eE------------R .    .    .    . .   vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,82]    .    .    .  D========eE---------R .    .    .    . .   vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,83]    .    .    .  D=========eE--------R .    .    .    . .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,84]    .    .    .   D=====eE-----------R .    .    .    . .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,85]    .    .    .   D=================eER.    .    .    . .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,86]    .    .    .   D==================eER    .    .    . .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,87]    .    .    .   D=================eE-R    .    .    . .   vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,88]    .    .    .   D==================eER    .    .    . .   vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,89]    .    .    .   D===================eER   .    .    . .   vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,90]    .    .    .    D===================eER  .    .    . .   vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,91]    .    .    .    D====================eER .    .    . .   vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,92]    .    .    .    D=====================eER.    .    . .   vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,93]    .    .    .    D========eE-------------R.    .    . .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,94]    .    .    .    D================eE-----R.    .    . .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,95]    .    .    .    D==================eE---R.    .    . .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,96]    .    .    .    .D================eE----R.    .    . .   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,97]    .    .    .    .D=================eE---R.    .    . .   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,98]    .    .    .    .D==================eE--R.    .    . .   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,99]    .    .    .    .D==================eE--R.    .    . .   vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT: [0,100]   .    .    .    .D===================eE-R.    .    . .   vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT: [0,101]   .    .    .    . D====================eER    .    . .   vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,102]   .    .    .    . D====================eER    .    . .   vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,103]   .    .    .    . D====================eER    .    . .   vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,104]   .    .    .    .  D====================eER   .    . .   vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,105]   .    .    .    .  D====================eER   .    . .   vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,106]   .    .    .    .  D=====================eER  .    . .   vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,107]   .    .    .    .   D=================eE---R  .    . .   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,108]   .    .    .    .   D================eE----R  .    . .   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,109]   .    .    .    .   D=================eE---R  .    . .   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,110]   .    .    .    .    D==================eE-R  .    . .   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,111]   .    .    .    .    D===================eER  .    . .   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,112]   .    .    .    .    D===================eER  .    . .   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,113]   .    .    .    .    .D===================eER .    . .   vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,114]   .    .    .    .    .D===================eER .    . .   vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,115]   .    .    .    .    .D====================eER.    . .   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,116]   .    .    .    .    . D===================eER.    . .   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,117]   .    .    .    .    . D==================eE-R.    . .   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,118]   .    .    .    .    . D====================eER    . .   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,119]   .    .    .    .    .  D==================eE-R    . .   vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,120]   .    .    .    .    .  D===================eER    . .   vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,121]   .    .    .    .    .  D====================eER   . .   vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,122]   .    .    .    .    .   D====================eER  . .   vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,123]   .    .    .    .    .   D=====================eER . .   vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,124]   .    .    .    .    .   D======================eER. .   vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,125]   .    .    .    .    .    D=================eE----R. .   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,126]   .    .    .    .    .    D==================eE---R. .   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,127]   .    .    .    .    .    D===================eE--R. .   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,128]   .    .    .    .    .    .D==================eE--R. .   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,129]   .    .    .    .    .    .D===================eE-R. .   vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT: [0,130]   .    .    .    .    .    .D=================eE---R. .   vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT: [0,131]   .    .    .    .    .    . D==================eE-R. .   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,132]   .    .    .    .    .    . D===================eER. .   vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,133]   .    .    .    .    .    . D====================eER .   vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,134]   .    .    .    .    .    .  D===================eER .   vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,135]   .    .    .    .    .    .  D===================eER .   vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,136]   .    .    .    .    .    .  D====================eER.   vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,137]   .    .    .    .    .    .   D===================eER.   vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,138]   .    .    .    .    .    .   D====================eER   vpxorq	%zmm19, %zmm19, %zmm21
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    3.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    2.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     2.0    0.0    1.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     1.0    1.0    2.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     2.0    0.0    1.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     4.0    1.0    0.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     7.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     3.0    3.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     3.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     7.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     8.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     8.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     8.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     8.0    1.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     8.0    1.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     9.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     10.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     13.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     14.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     13.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     13.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     14.0   1.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     2.0    2.0    13.0      psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     3.0    0.0    12.0      psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     4.0    0.0    11.0      psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     4.0    0.0    10.0      psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     4.0    0.0    10.0      psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     6.0    1.0    8.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     7.0    0.0    7.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     8.0    0.0    6.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     13.0   1.0    1.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     13.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     14.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     15.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     16.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     17.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     18.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     18.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     19.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     19.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     19.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     20.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     20.0   1.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     19.0   1.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     20.0   2.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     20.0   2.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     4.0    4.0    16.0      vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 52.    1     5.0    0.0    15.0      vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 53.    1     6.0    0.0    14.0      vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 54.    1     6.0    0.0    13.0      vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 55.    1     7.0    0.0    12.0      vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 56.    1     9.0    1.0    10.0      vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 57.    1     10.0   0.0    9.0       vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 58.    1     11.0   0.0    8.0       vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 59.    1     12.0   0.0    7.0       vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 60.    1     12.0   0.0    6.0       vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 61.    1     13.0   0.0    5.0       vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 62.    1     14.0   0.0    4.0       vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 63.    1     15.0   0.0    3.0       vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 64.    1     15.0   0.0    3.0       vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 65.    1     18.0   3.0    0.0       vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 66.    1     18.0   4.0    0.0       vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 67.    1     18.0   4.0    0.0       vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 68.    1     18.0   4.0    0.0       vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 69.    1     19.0   5.0    0.0       vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 70.    1     19.0   5.0    0.0       vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 71.    1     19.0   5.0    0.0       vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 72.    1     19.0   6.0    0.0       vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 73.    1     19.0   6.0    0.0       vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 74.    1     19.0   6.0    0.0       vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 75.    1     2.0    2.0    17.0      andnps	%xmm0, %xmm0
+# CHECK-NEXT: 76.    1     2.0    2.0    17.0      andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 77.    1     3.0    1.0    16.0      vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 78.    1     2.0    0.0    16.0      vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 79.    1     3.0    0.0    15.0      vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 80.    1     8.0    5.0    10.0      vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 81.    1     6.0    2.0    12.0      vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: 82.    1     9.0    0.0    9.0       vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: 83.    1     10.0   10.0   8.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 84.    1     6.0    0.0    11.0      pandn	%xmm2, %xmm2
+# CHECK-NEXT: 85.    1     18.0   6.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 86.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 87.    1     18.0   7.0    1.0       vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 88.    1     19.0   0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 89.    1     20.0   0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 90.    1     20.0   0.0    0.0       vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 91.    1     21.0   0.0    0.0       vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 92.    1     22.0   0.0    0.0       vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 93.    1     9.0    3.0    13.0      vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 94.    1     17.0   9.0    5.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 95.    1     19.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 96.    1     17.0   12.0   4.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 97.    1     18.0   11.0   3.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 98.    1     19.0   1.0    2.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 99.    1     19.0   14.0   2.0       vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT: 100.   1     20.0   13.0   1.0       vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT: 101.   1     21.0   0.0    0.0       vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 102.   1     21.0   0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 103.   1     21.0   0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 104.   1     21.0   1.0    0.0       vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 105.   1     21.0   1.0    0.0       vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 106.   1     22.0   2.0    0.0       vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 107.   1     18.0   18.0   3.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 108.   1     17.0   13.0   4.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 109.   1     18.0   16.0   3.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 110.   1     19.0   2.0    1.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 111.   1     20.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 112.   1     20.0   0.0    0.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 113.   1     20.0   0.0    0.0       vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: 114.   1     20.0   0.0    0.0       vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: 115.   1     21.0   18.0   0.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 116.   1     20.0   0.0    0.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 117.   1     19.0   7.0    1.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 118.   1     21.0   1.0    0.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 119.   1     19.0   4.0    1.0       vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 120.   1     20.0   0.0    0.0       vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 121.   1     21.0   0.0    0.0       vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 122.   1     21.0   0.0    0.0       vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 123.   1     22.0   0.0    0.0       vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 124.   1     23.0   0.0    0.0       vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 125.   1     18.0   18.0   4.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 126.   1     19.0   2.0    3.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 127.   1     20.0   20.0   2.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 128.   1     19.0   3.0    2.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 129.   1     20.0   20.0   1.0       vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT: 130.   1     18.0   2.0    3.0       vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT: 131.   1     19.0   1.0    1.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 132.   1     20.0   2.0    0.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 133.   1     21.0   0.0    0.0       vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 134.   1     20.0   0.0    0.0       vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 135.   1     20.0   0.0    0.0       vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 136.   1     21.0   1.0    0.0       vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 137.   1     20.0   1.0    0.0       vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 138.   1     21.0   2.0    0.0       vpxorq	%zmm19, %zmm19, %zmm21

From 4b08fcdeb13c0d6ebb32688e0b7b0915a1e5c9bd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 25 May 2019 04:47:49 +0000
Subject: [PATCH 0222/1176] [X86] Add zero idioms to the haswell, broadwell,
 and skylake schedule models. Add 256-bit fp xor to sandybridge zero idioms

This copies the Sandy Bridge zero idiom support to later CPUs. Adding the AVX2 and AVX512F/VL instructions as appropriate.

Differential Revision: https://reviews.llvm.org/D62360

llvm-svn: 361690
---
 llvm/lib/Target/X86/X86SchedBroadwell.td      |   87 +-
 llvm/lib/Target/X86/X86SchedHaswell.td        |   87 +-
 llvm/lib/Target/X86/X86SchedSandyBridge.td    |   20 +-
 llvm/lib/Target/X86/X86SchedSkylakeClient.td  |  100 +-
 llvm/lib/Target/X86/X86SchedSkylakeServer.td  |  119 +-
 .../llvm-mca/X86/Broadwell/zero-idioms.s      |  534 ++++-----
 .../tools/llvm-mca/X86/Haswell/zero-idioms.s  |  600 +++++-----
 .../llvm-mca/X86/SandyBridge/zero-idioms.s    |   46 +-
 .../llvm-mca/X86/SkylakeClient/zero-idioms.s  |  612 +++++-----
 .../llvm-mca/X86/SkylakeServer/zero-idioms.s  | 1018 ++++++++---------
 10 files changed, 1800 insertions(+), 1423 deletions(-)

diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 60e2721c795ee..7574e4b8f8963 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -889,8 +889,7 @@ def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[BWWriteResGroup47], (instregex "(V?)PCMPGTQ(Y?)rr",
-                                            "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
 
 def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
   let Latency = 5;
@@ -1600,6 +1599,90 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def BWWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def BWWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[BWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def BWWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[BWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                          VXORPDrr)>;
+
+def BWWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[BWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def BWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def BWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def BWWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+def BWWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+                                              VPSUBDYrr,
+                                              VPSUBQYrr,
+                                              VPSUBWYrr,
+                                              VPCMPGTBYrr,
+                                              VPCMPGTDYrr,
+                                              VPCMPGTWYrr)>;
+
+def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [BWWritePCMPGTQ]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                 VPCMPGTQYrr)>;
+
+
 // CMOVs that use both Z and C flag require an extra uop.
 def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> {
   let Latency = 2;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 6ddb542e41590..284d1567c5c64 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -1448,8 +1448,7 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
-                                            "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
 
 def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
   let Latency = 11;
@@ -1853,6 +1852,90 @@ def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def HWWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def HWWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[HWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                         XOR32rr, XOR64rr)>;
+
+def HWWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[HWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                          VXORPDrr)>;
+
+def HWWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[HWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def HWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def HWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def HWWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                              PSUBDrr, VPSUBDrr,
+                                              PSUBQrr, VPSUBQrr,
+                                              PSUBWrr, VPSUBWrr,
+                                              PCMPGTBrr, VPCMPGTBrr,
+                                              PCMPGTDrr, VPCMPGTDrr,
+                                              PCMPGTWrr, VPCMPGTWrr)>;
+
+def HWWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+                                              VPSUBDYrr,
+                                              VPSUBQYrr,
+                                              VPSUBWYrr,
+                                              VPCMPGTBYrr,
+                                              VPCMPGTDYrr,
+                                              VPCMPGTWYrr)>;
+
+def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [HWWritePCMPGTQ]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                 VPCMPGTQYrr)>;
+
+
 // The 0x83 ADC/SBB opcodes have special support for immediate 0 to only require
 // a single uop. It does not apply to the GR8 encoding. And only applies to the
 // 8-bit immediate since using larger immediate for 0 would be silly.
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 842d67b5c8249..d40bdf728a48d 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -698,12 +698,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
 }
 def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
 
-def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-
 def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
   let Latency = 5;
   let NumMicroOps = 1;
@@ -1134,6 +1128,12 @@ def SBWriteFZeroIdiom : SchedWriteVariant<[
 def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
                                           VXORPDrr)>;
 
+def SBWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
 def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
     SchedVar<NoSchedPred,                          [WriteVecLogicX]>
@@ -1152,9 +1152,15 @@ def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
                                               PCMPGTDrr, VPCMPGTDrr,
                                               PCMPGTWrr, VPCMPGTWrr)>;
 
+def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
 def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
-    SchedVar<NoSchedPred,                          [SBWriteResGroup30]>
+    SchedVar<NoSchedPred,                          [SBWritePCMPGTQ]>
 ]>;
 def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
 
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 1119fd3fc11c9..8f3e4ae62d53a 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -659,8 +659,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr",
-                                            "VPBLENDD(Y?)rri",
-                                            "(V?)PSUB(B|D|Q|W)(Y?)rr")>;
+                                            "VPBLENDD(Y?)rri")>;
 
 def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
   let Latency = 1;
@@ -770,8 +769,7 @@ def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
   let ResourceCycles = [1];
 }
 def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
-                                             "VPBROADCAST(B|W)rr",
-                                             "(V?)PCMPGTQ(Y?)rr")>;
+                                             "VPBROADCAST(B|W)rr")>;
 
 def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
   let Latency = 3;
@@ -1742,6 +1740,100 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKLWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def SKLWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[SKLWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                          XOR32rr, XOR64rr)>;
+
+def SKLWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+                                           VXORPDrr)>;
+
+def SKLWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def SKLWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SKLWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def SKLWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+                                               PCMPGTDrr, VPCMPGTDrr,
+                                               PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKLWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+                                               VPCMPGTDYrr,
+                                               VPCMPGTWYrr)>;
+
+def SKLWritePSUB : SchedWriteRes<[SKLPort015]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKLWritePSUB]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr,
+                                               PSUBDrr, VPSUBDrr,
+                                               PSUBQrr, VPSUBQrr,
+                                               PSUBWrr, VPSUBWrr,
+                                               VPSUBBYrr,
+                                               VPSUBDYrr,
+                                               VPSUBQYrr,
+                                               VPSUBWYrr)>;
+
+def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKLWritePCMPGTQ]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                  VPCMPGTQYrr)>;
+
+
 // CMOVs that use both Z and C flag require an extra uop.
 def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> {
   let Latency = 2;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index e3456073de305..58caf1dacfcb4 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -680,8 +680,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
                                             "VPBLENDMD(Z128|Z256)rr",
                                             "VPBLENDMQ(Z128|Z256)rr",
                                             "VPBLENDMW(Z128|Z256)rr",
-                                            "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr",
-                                            "(V?)PSUB(B|D|Q|W)rr",
+                                            "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
                                             "VPTERNLOGD(Z|Z128|Z256)rri",
                                             "VPTERNLOGQ(Z|Z128|Z256)rri")>;
 
@@ -828,7 +827,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
                                              "VPCMPD(Z|Z128|Z256)rri",
                                              "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
                                              "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
-                                             "(V?)PCMPGTQ(Y?)rr",
                                              "VPCMPQ(Z|Z128|Z256)rri",
                                              "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
                                              "VPCMPW(Z|Z128|Z256)rri",
@@ -2458,6 +2456,121 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
 
 def: InstRW<[WriteZero], (instrs CLC)>;
 
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKXWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+def SKXWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteALU]>
+]>;
+def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                          XOR32rr, XOR64rr)>;
+
+def SKXWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogic]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+                                           XORPDrr, VXORPDrr,
+                                           VXORPSZ128rr,
+                                           VXORPDZ128rr)>;
+
+def SKXWriteFZeroIdiomY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicY]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+                                            VXORPSZ256rr, VXORPDZ256rr)>;
+
+def SKXWriteFZeroIdiomZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicZ]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
+
+def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+                                                 VPXORDZ128rr, VPXORQZ128rr)>;
+
+def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
+                                                 VPXORDZ256rr, VPXORQZ256rr)>;
+
+def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecLogicZ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
+
+def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+                                               PCMPGTDrr, VPCMPGTDrr,
+                                               PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+                                               VPCMPGTDYrr,
+                                               VPCMPGTWYrr)>;
+
+def SKXWritePSUB : SchedWriteRes<[SKXPort015]> {
+  let Latency = 1;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKXWritePSUB]>
+]>;
+
+def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
+                                               PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
+                                               PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
+                                               PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
+                                               VPSUBBYrr, VPSUBBZ256rr,
+                                               VPSUBDYrr, VPSUBDZ256rr,
+                                               VPSUBQYrr, VPSUBQZ256rr,
+                                               VPSUBWYrr, VPSUBWZ256rr,
+                                               VPSUBBZrr,
+                                               VPSUBDZrr,
+                                               VPSUBQZrr,
+                                               VPSUBWZrr)>;
+def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> {
+  let Latency = 3;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [SKXWritePCMPGTQ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+                                                  VPCMPGTQYrr)>;
+
+
 // CMOVs that use both Z and C flag require an extra uop.
 def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> {
   let Latency = 2;
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
index 6e505bfb0626f..16a9ca4b51beb 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
@@ -95,13 +95,13 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      75
-# CHECK-NEXT: Total Cycles:      41
+# CHECK-NEXT: Total Cycles:      23
 # CHECK-NEXT: Total uOps:        75
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    1.83
-# CHECK-NEXT: IPC:               1.83
-# CHECK-NEXT: Block RThroughput: 20.0
+# CHECK-NEXT: uOps Per Cycle:    3.26
+# CHECK-NEXT: IPC:               3.26
+# CHECK-NEXT: Block RThroughput: 18.8
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -112,49 +112,49 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
-# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.25                        xorq	%rax, %rax
 # CHECK-NEXT:  1      1     0.50                        pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  1      5     1.00                        pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        psubb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubd	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     1.00                        andnps	%xmm0, %xmm0
 # CHECK-NEXT:  1      1     1.00                        andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm2
@@ -171,26 +171,26 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     1.00                        xorps	%xmm0, %xmm0
-# CHECK-NEXT:  1      1     1.00                        xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      0     0.25                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      0     0.25                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.25                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    79
-# CHECK-NEXT: Max number of mappings used:         58
+# CHECK-NEXT: Max number of mappings used:         24
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - BWDivider
@@ -206,53 +206,53 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     14.00  24.00   -      -      -     35.00  2.00    -
+# CHECK-NEXT:  -      -     4.00   6.00    -      -      -     14.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorq	%rax, %rax
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnps	%xmm0, %xmm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
@@ -260,111 +260,111 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm3
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorps	%xmm0, %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          0123456789
-# CHECK-NEXT: Index     0123456789          0123456789          0
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          012
 
-# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .   subq	%rax, %rax
-# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .   xorl	%eax, %eax
-# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .   xorq	%rax, %rax
-# CHECK-NEXT: [0,4]     .DeE--R   .    .    .    .    .    .    .   pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: [0,5]     .D=eE-R   .    .    .    .    .    .    .   pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: [0,6]     .D==eER   .    .    .    .    .    .    .   pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: [0,7]     .D=eE-R   .    .    .    .    .    .    .   pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: [0,8]     . D=eER   .    .    .    .    .    .    .   pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: [0,9]     . D==eeeeeER   .    .    .    .    .    .   pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: [0,10]    . D=======eER  .    .    .    .    .    .   pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: [0,11]    . D==eE-----R  .    .    .    .    .    .   vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12]    .  D==eE----R  .    .    .    .    .    .   vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13]    .  D===eeeeeER .    .    .    .    .    .   vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14]    .  D========eER.    .    .    .    .    .   vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15]    .  D=========eER    .    .    .    .    .   vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16]    .   D========eER    .    .    .    .    .   vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17]    .   D========eeeeeER.    .    .    .    .   vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18]    .   D=========eE---R.    .    .    .    .   vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19]    .   DeE------------R.    .    .    .    .   psubb	%mm2, %mm2
-# CHECK-NEXT: [0,20]    .    DeE-----------R.    .    .    .    .   psubd	%mm2, %mm2
-# CHECK-NEXT: [0,21]    .    D=eE----------R.    .    .    .    .   psubq	%mm2, %mm2
-# CHECK-NEXT: [0,22]    .    D==eE---------R.    .    .    .    .   psubw	%mm2, %mm2
-# CHECK-NEXT: [0,23]    .    D=====eE------R.    .    .    .    .   psubb	%xmm2, %xmm2
-# CHECK-NEXT: [0,24]    .    .D=====eE-----R.    .    .    .    .   psubd	%xmm2, %xmm2
-# CHECK-NEXT: [0,25]    .    .D=======eE---R.    .    .    .    .   psubq	%xmm2, %xmm2
-# CHECK-NEXT: [0,26]    .    .D========eE--R.    .    .    .    .   psubw	%xmm2, %xmm2
-# CHECK-NEXT: [0,27]    .    .D========eE--R.    .    .    .    .   vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,28]    .    . D========eE-R.    .    .    .    .   vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,29]    .    . D=========eER.    .    .    .    .   vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,30]    .    . D==========eER    .    .    .    .   vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,31]    .    . D===========eER   .    .    .    .   vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,32]    .    .  D===========eER  .    .    .    .   vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,33]    .    .  D============eER .    .    .    .   vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,34]    .    .  D=============eER.    .    .    .   vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,35]    .    .  D==============eER    .    .    .   vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,36]    .    .   D=============eER    .    .    .   vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,37]    .    .   D==============eER   .    .    .   vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,38]    .    .   D==============eER   .    .    .   vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,39]    .    .   D===============eER  .    .    .   vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,40]    .    .    D==============eER  .    .    .   vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,41]    .    .    D===============eER .    .    .   vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,42]    .    .    D===============eER .    .    .   vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,43]    .    .    DeE---------------R .    .    .   andnps	%xmm0, %xmm0
-# CHECK-NEXT: [0,44]    .    .    .D====eE----------R .    .    .   andnpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,45]    .    .    .D=====eE---------R .    .    .   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,46]    .    .    .D=======eE-------R .    .    .   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,47]    .    .    .D======eE--------R .    .    .   vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,48]    .    .    . D=======eE------R .    .    .   vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,49]    .    .    . D=eE------------R .    .    .   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,50]    .    .    . D======eE-------R .    .    .   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,51]    .    .    . D==========eE---R .    .    .   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,52]    .    .    .  D==========eE--R .    .    .   vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,53]    .    .    .  D=======eE-----R .    .    .   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,54]    .    .    .  D========eE----R .    .    .   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,55]    .    .    .  D===========eE-R .    .    .   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,56]    .    .    .   D============eER.    .    .   vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,57]    .    .    .   D=============eER    .    .   vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,58]    .    .    .   D===========eE--R    .    .   vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,59]    .    .    .   D===============eER  .    .   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,60]    .    .    .    D=============eE-R  .    .   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,61]    .    .    .    D===============eER .    .   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,62]    .    .    .    D================eER.    .   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,63]    .    .    .    D==================eER   .   vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,64]    .    .    .    .D================eE-R   .   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,65]    .    .    .    .DeE-----------------R   .   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,66]    .    .    .    .D==================eER  .   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,67]    .    .    .    .D==========eE--------R  .   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,68]    .    .    .    . D==========eE-------R  .   vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,69]    .    .    .    . D==================eER .   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,70]    .    .    .    . D===================eER.   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,71]    .    .    .    . D====================eER   vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,72]    .    .    .    .  D================eE---R   vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,73]    .    .    .    .  D=================eE--R   vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,74]    .    .    .    .  D=================eE--R   vpxor	%ymm3, %ymm3, %ymm5
+# CHECK:      [0,0]     DR   .    .    .    . .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DR   .    .    .    . .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     DR   .    .    .    . .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     DR   .    .    .    . .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     .DeER.    .    .    . .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     .D=eER    .    .    . .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D==eER   .    .    . .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D----R   .    .    . .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     . D---R   .    .    . .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     . D---R   .    .    . .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    . D---R   .    .    . .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    . D---R   .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    .  D--R   .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    .  D--R   .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    .  D--R   .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    .  D--R   .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    .   D-R   .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    .   D-R   .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .   D-R   .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .   DeER  .    .    . .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,20]    .    DeER .    .    . .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,21]    .    D=eER.    .    . .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,22]    .    D==eER    .    . .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,23]    .    D----R    .    . .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,24]    .    .D---R    .    . .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,25]    .    .D---R    .    . .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,26]    .    .D---R    .    . .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,27]    .    .D---R    .    . .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,28]    .    . D--R    .    . .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,29]    .    . D--R    .    . .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,30]    .    . D--R    .    . .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,31]    .    . D--R    .    . .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,32]    .    .  D-R    .    . .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,33]    .    .  D-R    .    . .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,34]    .    .  D-R    .    . .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,35]    .    .  D-R    .    . .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,36]    .    .   DR    .    . .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,37]    .    .   DR    .    . .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,38]    .    .   DR    .    . .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,39]    .    .   DR    .    . .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,40]    .    .    DR   .    . .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,41]    .    .    DR   .    . .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,42]    .    .    DR   .    . .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,43]    .    .    DeER .    . .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,44]    .    .    .DeER.    . .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,45]    .    .    .D=eER    . .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,46]    .    .    .D==eER   . .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,47]    .    .    .D===eER  . .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,48]    .    .    . D===eER . .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,49]    .    .    . DeE---R . .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,50]    .    .    . D===eER . .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,51]    .    .    . DeE---R . .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,52]    .    .    .  DeE--R . .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,53]    .    .    .  D===eER. .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,54]    .    .    .  D====eER .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,55]    .    .    .  D=eE---R .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,56]    .    .    .   D====eER.   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,57]    .    .    .   D=====eER   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,58]    .    .    .   DeE-----R   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,59]    .    .    .   D-------R   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,60]    .    .    .    D=E----R   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,61]    .    .    .    D=E----R   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,62]    .    .    .    D=E----R   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,63]    .    .    .    D=E----R   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,64]    .    .    .    .DE----R   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,65]    .    .    .    .DeE---R   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,66]    .    .    .    .DE----R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,67]    .    .    .    .D-----R   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,68]    .    .    .    . D----R   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,69]    .    .    .    . D----R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,70]    .    .    .    . D----R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,71]    .    .    .    . D----R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,72]    .    .    .    .  D---R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,73]    .    .    .    .  D---R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,74]    .    .    .    .  D---R   vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -373,78 +373,78 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 4.     1     1.0    1.0    2.0       pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: 5.     1     2.0    0.0    1.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    0.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    0.0       pcmpgtd	%mm2, %mm2
 # CHECK-NEXT: 6.     1     3.0    0.0    0.0       pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: 7.     1     2.0    2.0    1.0       pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: 8.     1     2.0    0.0    0.0       pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: 9.     1     3.0    0.0    0.0       pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: 10.    1     8.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: 11.    1     3.0    3.0    5.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 12.    1     3.0    0.0    4.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14.    1     9.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15.    1     10.0   0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16.    1     9.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17.    1     9.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18.    1     10.0   1.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19.    1     1.0    0.0    12.0      psubb	%mm2, %mm2
-# CHECK-NEXT: 20.    1     1.0    0.0    11.0      psubd	%mm2, %mm2
-# CHECK-NEXT: 21.    1     2.0    0.0    10.0      psubq	%mm2, %mm2
-# CHECK-NEXT: 22.    1     3.0    0.0    9.0       psubw	%mm2, %mm2
-# CHECK-NEXT: 23.    1     6.0    0.0    6.0       psubb	%xmm2, %xmm2
-# CHECK-NEXT: 24.    1     6.0    0.0    5.0       psubd	%xmm2, %xmm2
-# CHECK-NEXT: 25.    1     8.0    1.0    3.0       psubq	%xmm2, %xmm2
-# CHECK-NEXT: 26.    1     9.0    0.0    2.0       psubw	%xmm2, %xmm2
-# CHECK-NEXT: 27.    1     9.0    2.0    2.0       vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 28.    1     9.0    0.0    1.0       vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 29.    1     10.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 30.    1     11.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 31.    1     12.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 32.    1     12.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 33.    1     13.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 34.    1     14.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 35.    1     15.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 36.    1     14.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 37.    1     15.0   1.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 38.    1     15.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 39.    1     16.0   2.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 40.    1     15.0   2.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 41.    1     16.0   3.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 42.    1     16.0   3.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 43.    1     1.0    1.0    15.0      andnps	%xmm0, %xmm0
-# CHECK-NEXT: 44.    1     5.0    5.0    10.0      andnpd	%xmm1, %xmm1
-# CHECK-NEXT: 45.    1     6.0    1.0    9.0       vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 46.    1     8.0    2.0    7.0       vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 47.    1     7.0    0.0    8.0       vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 48.    1     8.0    0.0    6.0       vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 49.    1     2.0    2.0    12.0      pandn	%mm2, %mm2
-# CHECK-NEXT: 50.    1     7.0    0.0    7.0       pandn	%xmm2, %xmm2
-# CHECK-NEXT: 51.    1     11.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 52.    1     11.0   0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 53.    1     8.0    1.0    5.0       vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 54.    1     9.0    1.0    4.0       vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 55.    1     12.0   0.0    1.0       vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 56.    1     13.0   7.0    0.0       vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: 57.    1     14.0   7.0    0.0       vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: 58.    1     12.0   1.0    2.0       vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 59.    1     16.0   16.0   0.0       xorps	%xmm0, %xmm0
-# CHECK-NEXT: 60.    1     14.0   8.0    1.0       xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 61.    1     16.0   11.0   0.0       vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 62.    1     17.0   2.0    0.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 63.    1     19.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 64.    1     17.0   0.0    1.0       vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 65.    1     1.0    1.0    17.0      pxor	%mm2, %mm2
-# CHECK-NEXT: 66.    1     19.0   0.0    0.0       pxor	%xmm2, %xmm2
-# CHECK-NEXT: 67.    1     11.0   2.0    8.0       vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 68.    1     11.0   0.0    7.0       vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 69.    1     19.0   19.0   0.0       vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 70.    1     20.0   3.0    0.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 71.    1     21.0   21.0   0.0       vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 72.    1     17.0   1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 73.    1     18.0   0.0    2.0       vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 74.    1     18.0   0.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 7.     1     0.0    0.0    4.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     0.0    0.0    3.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     0.0    0.0    3.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     0.0    0.0    3.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     0.0    0.0    3.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     0.0    0.0    2.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     0.0    0.0    2.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     0.0    0.0    1.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     0.0    0.0    1.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     0.0    0.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     1.0    0.0    0.0       psubb	%mm2, %mm2
+# CHECK-NEXT: 20.    1     1.0    0.0    0.0       psubd	%mm2, %mm2
+# CHECK-NEXT: 21.    1     2.0    0.0    0.0       psubq	%mm2, %mm2
+# CHECK-NEXT: 22.    1     3.0    0.0    0.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 23.    1     0.0    0.0    4.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 24.    1     0.0    0.0    3.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 25.    1     0.0    0.0    3.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 26.    1     0.0    0.0    3.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 27.    1     0.0    0.0    3.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 28.    1     0.0    0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 29.    1     0.0    0.0    2.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 30.    1     0.0    0.0    2.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 31.    1     0.0    0.0    2.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 32.    1     0.0    0.0    1.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 33.    1     0.0    0.0    1.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 34.    1     0.0    0.0    1.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 35.    1     0.0    0.0    1.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 36.    1     0.0    0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 37.    1     0.0    0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 38.    1     0.0    0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 39.    1     0.0    0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 40.    1     0.0    0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 41.    1     0.0    0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 42.    1     0.0    0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 43.    1     1.0    1.0    0.0       andnps	%xmm0, %xmm0
+# CHECK-NEXT: 44.    1     1.0    1.0    0.0       andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 45.    1     2.0    2.0    0.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 46.    1     3.0    1.0    0.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 47.    1     4.0    1.0    0.0       vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 48.    1     4.0    1.0    0.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 49.    1     1.0    1.0    3.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 50.    1     4.0    0.0    0.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 51.    1     1.0    1.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 52.    1     1.0    0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 53.    1     4.0    0.0    0.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 54.    1     5.0    1.0    0.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 55.    1     2.0    0.0    3.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 56.    1     5.0    2.0    0.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 57.    1     6.0    3.0    0.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 58.    1     1.0    0.0    5.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 59.    1     0.0    0.0    7.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 60.    1     2.0    0.0    4.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 61.    1     2.0    0.0    4.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 62.    1     2.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 63.    1     2.0    0.0    4.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 64.    1     1.0    0.0    4.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 65.    1     1.0    1.0    3.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 66.    1     1.0    0.0    4.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 67.    1     0.0    0.0    5.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 68.    1     0.0    0.0    4.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 69.    1     0.0    0.0    4.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 70.    1     0.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 71.    1     0.0    0.0    4.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 72.    1     0.0    0.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 73.    1     0.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 74.    1     0.0    0.0    3.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
index 3644f3d2f9989..90592655067f8 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s
@@ -105,13 +105,13 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      83
-# CHECK-NEXT: Total Cycles:      45
+# CHECK-NEXT: Total Cycles:      25
 # CHECK-NEXT: Total uOps:        83
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    1.84
-# CHECK-NEXT: IPC:               1.84
-# CHECK-NEXT: Block RThroughput: 21.0
+# CHECK-NEXT: uOps Per Cycle:    3.32
+# CHECK-NEXT: IPC:               3.32
+# CHECK-NEXT: Block RThroughput: 20.8
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -122,57 +122,57 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
-# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.25                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.25                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.25                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.25                        xorq	%rax, %rax
 # CHECK-NEXT:  1      1     0.50                        pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  1      5     1.00                        pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      5     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpcmpgtw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        psubb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubd	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        psubw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vpsubw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     1.00                        andnps	%xmm0, %xmm0
 # CHECK-NEXT:  1      1     1.00                        andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      1     1.00                        vandnps	%xmm2, %xmm2, %xmm2
@@ -189,26 +189,26 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     1.00                        vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  1      1     1.00                        vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     1.00                        xorps	%xmm0, %xmm0
-# CHECK-NEXT:  1      1     1.00                        xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      0     0.25                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      0     0.25                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.25                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     1.00                        vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.25                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.25                        vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    87
-# CHECK-NEXT: Max number of mappings used:         62
+# CHECK-NEXT: Max number of mappings used:         24
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - HWDivider
@@ -224,61 +224,61 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     15.00  28.00   -      -      -     38.00  2.00    -
+# CHECK-NEXT:  -      -     4.00   6.00    -      -      -     14.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorq	%rax, %rax
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubq	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnps	%xmm0, %xmm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
@@ -286,119 +286,119 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm3
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorps	%xmm0, %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          0123456789
-# CHECK-NEXT: Index     0123456789          0123456789          01234
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          01234
 
-# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .   .   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .   .   subq	%rax, %rax
-# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .   .   xorl	%eax, %eax
-# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .   .   xorq	%rax, %rax
-# CHECK-NEXT: [0,4]     .DeE--R   .    .    .    .    .    .    .   .   pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: [0,5]     .D=eE-R   .    .    .    .    .    .    .   .   pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: [0,6]     .D==eER   .    .    .    .    .    .    .   .   pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: [0,7]     .D=eE-R   .    .    .    .    .    .    .   .   pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: [0,8]     . D=eER   .    .    .    .    .    .    .   .   pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: [0,9]     . D==eeeeeER   .    .    .    .    .    .   .   pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: [0,10]    . D=======eER  .    .    .    .    .    .   .   pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: [0,11]    . D==eE-----R  .    .    .    .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12]    .  D==eE----R  .    .    .    .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13]    .  D===eeeeeER .    .    .    .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14]    .  D========eER.    .    .    .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15]    .  D=========eER    .    .    .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16]    .   D========eER    .    .    .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17]    .   D========eeeeeER.    .    .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18]    .   D=========eE---R.    .    .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19]    .   D=========eE---R.    .    .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20]    .    D=========eE--R.    .    .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21]    .    D==========eeeeeER  .    .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22]    .    D===============eER .    .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23]    .    D================eER.    .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24]    .    .D===============eER.    .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25]    .    .D===============eeeeeER .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26]    .    .D================eE---R .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27]    .    .DeE-------------------R .    .    .   .   psubb	%mm2, %mm2
-# CHECK-NEXT: [0,28]    .    . DeE------------------R .    .    .   .   psubd	%mm2, %mm2
-# CHECK-NEXT: [0,29]    .    . D=eE-----------------R .    .    .   .   psubq	%mm2, %mm2
-# CHECK-NEXT: [0,30]    .    . D==eE----------------R .    .    .   .   psubw	%mm2, %mm2
-# CHECK-NEXT: [0,31]    .    . D===eE---------------R .    .    .   .   psubb	%xmm2, %xmm2
-# CHECK-NEXT: [0,32]    .    .  D===eE--------------R .    .    .   .   psubd	%xmm2, %xmm2
-# CHECK-NEXT: [0,33]    .    .  D======eE-----------R .    .    .   .   psubq	%xmm2, %xmm2
-# CHECK-NEXT: [0,34]    .    .  D=======eE----------R .    .    .   .   psubw	%xmm2, %xmm2
-# CHECK-NEXT: [0,35]    .    .  D==============eE---R .    .    .   .   vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36]    .    .   D==============eE--R .    .    .   .   vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37]    .    .   D===============eE-R .    .    .   .   vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38]    .    .   D================eER .    .    .   .   vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39]    .    .   D=================eER.    .    .   .   vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40]    .    .    D=================eER    .    .   .   vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41]    .    .    D==================eER   .    .   .   vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42]    .    .    D===================eER  .    .   .   vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43]    .    .    D====================eER .    .   .   vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44]    .    .    .D===================eER .    .   .   vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45]    .    .    .D====================eER.    .   .   vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46]    .    .    .D====================eER.    .   .   vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47]    .    .    .D=====================eER    .   .   vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48]    .    .    . D====================eER    .   .   vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49]    .    .    . D=====================eER   .   .   vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50]    .    .    . D=====================eER   .   .   vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51]    .    .    . D====eE-----------------R   .   .   andnps	%xmm0, %xmm0
-# CHECK-NEXT: [0,52]    .    .    .  D====eE----------------R   .   .   andnpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,53]    .    .    .  D=====eE---------------R   .   .   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,54]    .    .    .  D=======eE-------------R   .   .   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,55]    .    .    .  D======eE--------------R   .   .   vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,56]    .    .    .   D=========eE----------R   .   .   vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,57]    .    .    .   DeE-------------------R   .   .   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,58]    .    .    .   D======eE-------------R   .   .   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,59]    .    .    .   D================eE---R   .   .   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,60]    .    .    .    D================eE--R   .   .   vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,61]    .    .    .    D=========eE---------R   .   .   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,62]    .    .    .    D==========eE--------R   .   .   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,63]    .    .    .    D=================eE-R   .   .   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,64]    .    .    .    .D==========eE-------R   .   .   vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,65]    .    .    .    .D===========eE------R   .   .   vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,66]    .    .    .    .D=================eER   .   .   vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,67]    .    .    .    .D=============eE----R   .   .   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,68]    .    .    .    . D===========eE-----R   .   .   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,69]    .    .    .    . D=================eER  .   .   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,70]    .    .    .    . D==================eER .   .   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,71]    .    .    .    . D====================eER   .   vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,72]    .    .    .    .  D==================eE-R   .   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,73]    .    .    .    .  DeE-------------------R   .   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,74]    .    .    .    .  D====================eER  .   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,75]    .    .    .    .  D================eE----R  .   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,76]    .    .    .    .   D================eE---R  .   vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,77]    .    .    .    .   D====================eER .   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,78]    .    .    .    .   D=====================eER.   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,79]    .    .    .    .   D======================eER   vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,80]    .    .    .    .    D==================eE---R   vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,81]    .    .    .    .    D===================eE--R   vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,82]    .    .    .    .    D===================eE--R   vpxor	%ymm3, %ymm3, %ymm5
+# CHECK:      [0,0]     DR   .    .    .    .   .   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DR   .    .    .    .   .   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     DR   .    .    .    .   .   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     DR   .    .    .    .   .   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     .DeER.    .    .    .   .   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     .D=eER    .    .    .   .   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D==eER   .    .    .   .   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D----R   .    .    .   .   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     . D---R   .    .    .   .   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     . D---R   .    .    .   .   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    . D---R   .    .    .   .   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    . D---R   .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    .  D--R   .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    .  D--R   .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    .  D--R   .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    .  D--R   .    .    .   .   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    .   D-R   .    .    .   .   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    .   D-R   .    .    .   .   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .   D-R   .    .    .   .   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .   D-R   .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .    DR   .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .    DR   .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .    DR   .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .    DR   .    .    .   .   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .    .DR  .    .    .   .   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .    .DR  .    .    .   .   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .    .DR  .    .    .   .   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .    .DeER.    .    .   .   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .    . DeER    .    .   .   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .    . D=eER   .    .   .   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    . D==eER  .    .   .   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    . D----R  .    .   .   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    .  D---R  .    .   .   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    .  D---R  .    .   .   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    .  D---R  .    .   .   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    .  D---R  .    .   .   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .   D--R  .    .   .   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .   D--R  .    .   .   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .   D--R  .    .   .   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .   D--R  .    .   .   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .    D-R  .    .   .   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .    D-R  .    .   .   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    .    D-R  .    .   .   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    .    D-R  .    .   .   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    .    .DR  .    .   .   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    .    .DR  .    .   .   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    .    .DR  .    .   .   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    .    .DR  .    .   .   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .    . DR .    .   .   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .    . DR .    .   .   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .    . DR .    .   .   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .    . DeER    .   .   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,52]    .    .    .  DeER   .   .   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,53]    .    .    .  D=eER  .   .   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54]    .    .    .  D==eER .   .   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55]    .    .    .  D===eER.   .   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,56]    .    .    .   D===eER   .   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,57]    .    .    .   DeE---R   .   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,58]    .    .    .   D===eER   .   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,59]    .    .    .   DeE---R   .   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,60]    .    .    .    DeE--R   .   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,61]    .    .    .    D===eER  .   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,62]    .    .    .    D====eER .   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,63]    .    .    .    D=eE---R .   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,64]    .    .    .    .D====eER.   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,65]    .    .    .    .D=====eER   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,66]    .    .    .    .DeE-----R   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,67]    .    .    .    .D-------R   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,68]    .    .    .    . D=E----R   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,69]    .    .    .    . D=E----R   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,70]    .    .    .    . D=E----R   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,71]    .    .    .    . D=E----R   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,72]    .    .    .    .  DE----R   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,73]    .    .    .    .  DeE---R   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,74]    .    .    .    .  DE----R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,75]    .    .    .    .  D-----R   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,76]    .    .    .    .   D----R   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,77]    .    .    .    .   D----R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,78]    .    .    .    .   D----R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,79]    .    .    .    .   D----R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,80]    .    .    .    .    D---R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,81]    .    .    .    .    D---R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,82]    .    .    .    .    D---R   vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -407,86 +407,86 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 4.     1     1.0    1.0    2.0       pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: 5.     1     2.0    0.0    1.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    0.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    0.0       pcmpgtd	%mm2, %mm2
 # CHECK-NEXT: 6.     1     3.0    0.0    0.0       pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: 7.     1     2.0    2.0    1.0       pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: 8.     1     2.0    0.0    0.0       pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: 9.     1     3.0    0.0    0.0       pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: 10.    1     8.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: 11.    1     3.0    3.0    5.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 12.    1     3.0    0.0    4.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14.    1     9.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15.    1     10.0   0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16.    1     9.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17.    1     9.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18.    1     10.0   1.0    3.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19.    1     10.0   1.0    3.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 20.    1     10.0   0.0    2.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 21.    1     11.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 22.    1     16.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 23.    1     17.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 24.    1     16.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 25.    1     16.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 26.    1     17.0   1.0    3.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 27.    1     1.0    1.0    19.0      psubb	%mm2, %mm2
-# CHECK-NEXT: 28.    1     1.0    0.0    18.0      psubd	%mm2, %mm2
-# CHECK-NEXT: 29.    1     2.0    0.0    17.0      psubq	%mm2, %mm2
-# CHECK-NEXT: 30.    1     3.0    0.0    16.0      psubw	%mm2, %mm2
-# CHECK-NEXT: 31.    1     4.0    0.0    15.0      psubb	%xmm2, %xmm2
-# CHECK-NEXT: 32.    1     4.0    0.0    14.0      psubd	%xmm2, %xmm2
-# CHECK-NEXT: 33.    1     7.0    2.0    11.0      psubq	%xmm2, %xmm2
-# CHECK-NEXT: 34.    1     8.0    0.0    10.0      psubw	%xmm2, %xmm2
-# CHECK-NEXT: 35.    1     15.0   1.0    3.0       vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 36.    1     15.0   0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 37.    1     16.0   0.0    1.0       vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 38.    1     17.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 39.    1     18.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 40.    1     18.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 41.    1     19.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 42.    1     20.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 43.    1     21.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 44.    1     20.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45.    1     21.0   1.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 46.    1     21.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 47.    1     22.0   2.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 48.    1     21.0   2.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 49.    1     22.0   3.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 50.    1     22.0   3.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 51.    1     5.0    5.0    17.0      andnps	%xmm0, %xmm0
-# CHECK-NEXT: 52.    1     5.0    5.0    16.0      andnpd	%xmm1, %xmm1
-# CHECK-NEXT: 53.    1     6.0    2.0    15.0      vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 54.    1     8.0    2.0    13.0      vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 55.    1     7.0    0.0    14.0      vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 56.    1     10.0   2.0    10.0      vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 57.    1     1.0    1.0    19.0      pandn	%mm2, %mm2
-# CHECK-NEXT: 58.    1     7.0    0.0    13.0      pandn	%xmm2, %xmm2
-# CHECK-NEXT: 59.    1     17.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 60.    1     17.0   0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 61.    1     10.0   3.0    9.0       vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 62.    1     11.0   1.0    8.0       vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 63.    1     18.0   0.0    1.0       vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 64.    1     11.0   5.0    7.0       vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: 65.    1     12.0   3.0    6.0       vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: 66.    1     18.0   1.0    0.0       vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 67.    1     14.0   12.0   4.0       xorps	%xmm0, %xmm0
-# CHECK-NEXT: 68.    1     12.0   4.0    5.0       xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 69.    1     18.0   13.0   0.0       vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 70.    1     19.0   6.0    0.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 71.    1     21.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 72.    1     19.0   0.0    1.0       vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 73.    1     1.0    1.0    19.0      pxor	%mm2, %mm2
-# CHECK-NEXT: 74.    1     21.0   0.0    0.0       pxor	%xmm2, %xmm2
-# CHECK-NEXT: 75.    1     17.0   2.0    4.0       vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 76.    1     17.0   0.0    3.0       vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 77.    1     21.0   21.0   0.0       vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 78.    1     22.0   3.0    0.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 79.    1     23.0   23.0   0.0       vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 80.    1     19.0   1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 81.    1     20.0   0.0    2.0       vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 82.    1     20.0   0.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 7.     1     0.0    0.0    4.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     0.0    0.0    3.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     0.0    0.0    3.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     0.0    0.0    3.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     0.0    0.0    3.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     0.0    0.0    2.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     0.0    0.0    2.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     0.0    0.0    1.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     0.0    0.0    1.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     0.0    0.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     0.0    0.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     0.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     0.0    0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     0.0    0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     0.0    0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     0.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     0.0    0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     0.0    0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     1.0    1.0    0.0       psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     1.0    0.0    0.0       psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     2.0    0.0    0.0       psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     3.0    0.0    0.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     0.0    0.0    4.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     0.0    0.0    3.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     0.0    0.0    3.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     0.0    0.0    3.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     0.0    0.0    3.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     0.0    0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     0.0    0.0    2.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     0.0    0.0    2.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     0.0    0.0    2.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     0.0    0.0    1.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     0.0    0.0    1.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     0.0    0.0    1.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     0.0    0.0    1.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     0.0    0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     0.0    0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     0.0    0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     0.0    0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     0.0    0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     0.0    0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     0.0    0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     1.0    1.0    0.0       andnps	%xmm0, %xmm0
+# CHECK-NEXT: 52.    1     1.0    1.0    0.0       andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 53.    1     2.0    2.0    0.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 54.    1     3.0    1.0    0.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 55.    1     4.0    1.0    0.0       vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 56.    1     4.0    1.0    0.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 57.    1     1.0    1.0    3.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 58.    1     4.0    0.0    0.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 59.    1     1.0    1.0    3.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 60.    1     1.0    0.0    2.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 61.    1     4.0    0.0    0.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 62.    1     5.0    1.0    0.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 63.    1     2.0    0.0    3.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 64.    1     5.0    2.0    0.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 65.    1     6.0    3.0    0.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 66.    1     1.0    0.0    5.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 67.    1     0.0    0.0    7.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 68.    1     2.0    0.0    4.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 69.    1     2.0    0.0    4.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 70.    1     2.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 71.    1     2.0    0.0    4.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 72.    1     1.0    0.0    4.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 73.    1     1.0    1.0    3.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 74.    1     1.0    0.0    4.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 75.    1     0.0    0.0    5.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 76.    1     0.0    0.0    4.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 77.    1     0.0    0.0    4.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 78.    1     0.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 79.    1     0.0    0.0    4.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 80.    1     0.0    0.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 81.    1     0.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 82.    1     0.0    0.0    3.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
index 32932a05492fe..d100946031d12 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s
@@ -153,15 +153,15 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  1      0     0.25                        xorpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      0     0.25                        vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.33                        pxor	%mm2, %mm2
 # CHECK-NEXT:  1      0     0.25                        pxor	%xmm2, %xmm2
 # CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  1      0     0.25                        vxorps	%xmm4, %xmm4, %xmm5
 # CHECK-NEXT:  1      0     0.25                        vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  1      1     1.00                        vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  1      1     1.00                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      0     0.25                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      0     0.25                        vxorpd	%ymm1, %ymm1, %ymm3
 # CHECK-NEXT:  1      0     0.25                        vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Register File statistics:
@@ -180,7 +180,7 @@ vpxor  %xmm3, %xmm3, %xmm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     4.00   8.00    -     14.00   -      -
+# CHECK-NEXT:  -      -     4.00   8.00    -     10.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -237,15 +237,15 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     pxor	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Timeline view:
@@ -305,16 +305,16 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: [0,50]    .    .    . D==E---------R.   xorpd	%xmm1, %xmm1
 # CHECK-NEXT: [0,51]    .    .    . D=E----------R.   vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT: [0,52]    .    .    .  D=E---------R.   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,53]    .    .    .  D======eE---R.   vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,54]    .    .    .  D=====eE----R.   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,53]    .    .    .  DE----------R.   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,54]    .    .    .  D=E---------R.   vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT: [0,55]    .    .    .  D==========eER   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,56]    .    .    .   D======E----R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,56]    .    .    .   D-----------R   pxor	%xmm2, %xmm2
 # CHECK-NEXT: [0,57]    .    .    .   D-----------R   vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT: [0,58]    .    .    .   D-----------R   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,59]    .    .    .   D=====E-----R   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,60]    .    .    .    D======eE--R   vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,61]    .    .    .    D=====eE---R   vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,62]    .    .    .    D======E---R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,59]    .    .    .   DE----------R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,60]    .    .    .    D----------R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,61]    .    .    .    D----------R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,62]    .    .    .    D----------R   vpxor	%xmm3, %xmm3, %xmm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -376,13 +376,13 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 50.    1     3.0    0.0    9.0       xorpd	%xmm1, %xmm1
 # CHECK-NEXT: 51.    1     2.0    0.0    10.0      vxorps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT: 52.    1     2.0    0.0    9.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 53.    1     7.0    6.0    3.0       vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 54.    1     6.0    4.0    4.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 53.    1     1.0    0.0    10.0      vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 54.    1     2.0    0.0    9.0       vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT: 55.    1     11.0   0.0    0.0       pxor	%mm2, %mm2
-# CHECK-NEXT: 56.    1     7.0    0.0    4.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 56.    1     0.0    0.0    11.0      pxor	%xmm2, %xmm2
 # CHECK-NEXT: 57.    1     0.0    0.0    11.0      vpxor	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT: 58.    1     0.0    0.0    11.0      vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 59.    1     6.0    0.0    5.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 60.    1     7.0    7.0    2.0       vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 61.    1     6.0    1.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 62.    1     7.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 59.    1     1.0    0.0    10.0      vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 60.    1     0.0    0.0    10.0      vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 61.    1     0.0    0.0    10.0      vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 62.    1     0.0    0.0    10.0      vpxor	%xmm3, %xmm3, %xmm5
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
index d10e7890e45b3..093d418b21dbf 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
@@ -105,13 +105,13 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      83
-# CHECK-NEXT: Total Cycles:      34
+# CHECK-NEXT: Total Cycles:      17
 # CHECK-NEXT: Total uOps:        83
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    2.44
-# CHECK-NEXT: IPC:               2.44
-# CHECK-NEXT: Block RThroughput: 16.7
+# CHECK-NEXT: uOps Per Cycle:    4.88
+# CHECK-NEXT: IPC:               4.88
+# CHECK-NEXT: Block RThroughput: 13.8
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -122,57 +122,57 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
-# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        xorq	%rax, %rax
 # CHECK-NEXT:  1      1     1.00                        pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  1      1     1.00                        pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  1      1     1.00                        pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  1      3     1.00                        pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        psubb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubd	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     0.33                        andnps	%xmm0, %xmm0
 # CHECK-NEXT:  1      1     0.33                        andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm2
@@ -189,26 +189,26 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     0.33                        vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  1      1     0.33                        vandnpd	%ymm1, %ymm1, %ymm5
 # CHECK-NEXT:  1      1     0.33                        vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        xorps	%xmm0, %xmm0
-# CHECK-NEXT:  1      1     0.33                        xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      0     0.17                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      0     0.17                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.17                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.17                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  1      1     0.50                        pxor	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    87
-# CHECK-NEXT: Max number of mappings used:         66
+# CHECK-NEXT: Max number of mappings used:         30
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SKLDivider
@@ -224,181 +224,181 @@ vpxor  %ymm3, %ymm3, %ymm5
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     28.00  26.00   -      -      -     27.00  2.00    -
+# CHECK-NEXT:  -      -     10.00  6.00    -      -      -     8.00    -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubq	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     andnps	%xmm0, %xmm0
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pandn	%xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm3
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pxor	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          0123
-# CHECK-NEXT: Index     0123456789          0123456789
+# CHECK-NEXT:                     0123456
+# CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeER .    .    .    .    .    .  .   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .  .   subq	%rax, %rax
-# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .  .   xorl	%eax, %eax
-# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .  .   xorq	%rax, %rax
-# CHECK-NEXT: [0,4]     DeE---R   .    .    .    .    .  .   pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: [0,5]     D=eE--R   .    .    .    .    .  .   pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: [0,6]     .D=eE-R   .    .    .    .    .  .   pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: [0,7]     .DeE--R   .    .    .    .    .  .   pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: [0,8]     .D=eE-R   .    .    .    .    .  .   pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: [0,9]     .D===eeeER.    .    .    .    .  .   pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: [0,10]    .D======eER    .    .    .    .  .   pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: [0,11]    .D==eE----R    .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12]    . D==eE---R    .    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13]    . D===eeeER    .    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14]    . D======eER   .    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15]    . D=======eER  .    .    .    .  .   vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16]    . D=======eER  .    .    .    .  .   vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17]    . D=======eeeER.    .    .    .  .   vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18]    .  D=======eE-R.    .    .    .  .   vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19]    .  D=======eE-R.    .    .    .  .   vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20]    .  D========eER.    .    .    .  .   vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21]    .  D=========eeeER  .    .    .  .   vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22]    .  D============eER .    .    .  .   vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23]    .  D=============eER.    .    .  .   vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24]    .   D============eER.    .    .  .   vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25]    .   D============eeeER   .    .  .   vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26]    .   D=============eE-R   .    .  .   vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27]    .   D=eE-------------R   .    .  .   psubb	%mm2, %mm2
-# CHECK-NEXT: [0,28]    .   D==eE------------R   .    .  .   psubd	%mm2, %mm2
-# CHECK-NEXT: [0,29]    .   D===eE-----------R   .    .  .   psubq	%mm2, %mm2
-# CHECK-NEXT: [0,30]    .    D===eE----------R   .    .  .   psubw	%mm2, %mm2
-# CHECK-NEXT: [0,31]    .    D===eE----------R   .    .  .   psubb	%xmm2, %xmm2
-# CHECK-NEXT: [0,32]    .    D=====eE--------R   .    .  .   psubd	%xmm2, %xmm2
-# CHECK-NEXT: [0,33]    .    D======eE-------R   .    .  .   psubq	%xmm2, %xmm2
-# CHECK-NEXT: [0,34]    .    D=======eE------R   .    .  .   psubw	%xmm2, %xmm2
-# CHECK-NEXT: [0,35]    .    D============eE-R   .    .  .   vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36]    .    .D============eER   .    .  .   vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37]    .    .D=============eER  .    .  .   vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38]    .    .D==============eER .    .  .   vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39]    .    .D===============eER.    .  .   vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40]    .    .D================eER    .  .   vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41]    .    .D=================eER   .  .   vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42]    .    . D=================eER  .  .   vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43]    .    . D==================eER .  .   vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44]    .    . D==================eER .  .   vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45]    .    . D==================eER .  .   vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46]    .    . D===================eER.  .   vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47]    .    . D===================eER.  .   vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48]    .    .  D==================eER.  .   vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49]    .    .  D===================eER  .   vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50]    .    .  D===================eER  .   vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51]    .    .  D===eE----------------R  .   andnps	%xmm0, %xmm0
-# CHECK-NEXT: [0,52]    .    .  D====eE---------------R  .   andnpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,53]    .    .  D=====eE--------------R  .   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,54]    .    .   D====eE--------------R  .   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,55]    .    .   D=====eE-------------R  .   vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,56]    .    .   D=====eE-------------R  .   vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,57]    .    .   D====eE--------------R  .   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,58]    .    .   D======eE------------R  .   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,59]    .    .   D==================eER  .   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,60]    .    .    D==================eER .   vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,61]    .    .    D=======eE-----------R .   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,62]    .    .    D=====eE-------------R .   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,63]    .    .    D===================eER.   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,64]    .    .    D========eE-----------R.   vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,65]    .    .    D========eE-----------R.   vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,66]    .    .    .D==================eER.   vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,67]    .    .    .D===eE---------------R.   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,68]    .    .    .D========eE----------R.   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,69]    .    .    .D========eE----------R.   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,70]    .    .    .D=========eE---------R.   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,71]    .    .    .D=========eE---------R.   vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,72]    .    .    . D=========eE--------R.   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,73]    .    .    . D=========eE--------R.   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,74]    .    .    . D==========eE-------R.   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,75]    .    .    . D=================eER.   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,76]    .    .    . D==================eER   vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,77]    .    .    . D===========eE-------R   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,78]    .    .    .  D==========eE-------R   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,79]    .    .    .  D===========eE------R   vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,80]    .    .    .  D=========eE--------R   vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,81]    .    .    .  D===========eE------R   vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,82]    .    .    .  D===============eE--R   vpxor	%ymm3, %ymm3, %ymm5
+# CHECK:      [0,0]     DR   .    .    ..   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DR   .    .    ..   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     DR   .    .    ..   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     DR   .    .    ..   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     DeER .    .    ..   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     D=eER.    .    ..   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D=eER    .    ..   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D---R    .    ..   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     .D---R    .    ..   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     .D---R    .    ..   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    .D---R    .    ..   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    .D---R    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    . D--R    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    . D--R    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    . D--R    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    . D--R    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    . D--R    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    . D--R    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .  D-R    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .  D-R    .    ..   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .  D-R    .    ..   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .  D-R    .    ..   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .  D-R    .    ..   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .  D-R    .    ..   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .   DR    .    ..   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .   DR    .    ..   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .   DR    .    ..   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .   DeER  .    ..   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .   D=eER .    ..   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .   D==eER.    ..   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    D==eER    ..   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    D----R    ..   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    D----R    ..   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    D----R    ..   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    D----R    ..   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    D----R    ..   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .D---R    ..   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .D---R    ..   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .D---R    ..   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .D---R    ..   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .D---R    ..   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .D---R    ..   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    . D--R    ..   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    . D--R    ..   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    . D--R    ..   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    . D--R    ..   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    . D--R    ..   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    . D--R    ..   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .  D-R    ..   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .  D-R    ..   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .  D-R    ..   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .  DeER   ..   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,52]    .    .  DeER   ..   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,53]    .    .  DeER   ..   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54]    .    .   DeER  ..   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55]    .    .   DeER  ..   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,56]    .    .   D=eER ..   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,57]    .    .   DeE-R ..   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,58]    .    .   D=eER ..   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,59]    .    .   D=eER ..   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,60]    .    .    D=eER..   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,61]    .    .    D=eER..   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,62]    .    .    D=eER..   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,63]    .    .    D==eER.   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,64]    .    .    D==eER.   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,65]    .    .    D==eER.   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,66]    .    .    .D==eER   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,67]    .    .    .D----R   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,68]    .    .    .DE---R   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,69]    .    .    .DE---R   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,70]    .    .    .DE---R   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,71]    .    .    .DE---R   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,72]    .    .    . D---R   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,73]    .    .    . D=eER   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,74]    .    .    . D---R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,75]    .    .    . DE--R   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,76]    .    .    . DE--R   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,77]    .    .    . D---R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,78]    .    .    .  D--R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,79]    .    .    .  D--R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,80]    .    .    .  D--R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,81]    .    .    .  D--R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,82]    .    .    .  D--R   vpxor	%ymm3, %ymm3, %ymm5
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -407,86 +407,86 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 4.     1     1.0    1.0    3.0       pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: 5.     1     2.0    0.0    2.0       pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: 6.     1     2.0    0.0    1.0       pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: 7.     1     1.0    1.0    2.0       pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: 8.     1     2.0    0.0    1.0       pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: 9.     1     4.0    1.0    0.0       pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: 10.    1     7.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: 11.    1     3.0    3.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 12.    1     3.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14.    1     7.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15.    1     8.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16.    1     8.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17.    1     8.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18.    1     8.0    1.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19.    1     8.0    1.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 20.    1     9.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 21.    1     10.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 22.    1     13.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 23.    1     14.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 24.    1     13.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 25.    1     13.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 26.    1     14.0   1.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 27.    1     2.0    2.0    13.0      psubb	%mm2, %mm2
-# CHECK-NEXT: 28.    1     3.0    0.0    12.0      psubd	%mm2, %mm2
-# CHECK-NEXT: 29.    1     4.0    0.0    11.0      psubq	%mm2, %mm2
-# CHECK-NEXT: 30.    1     4.0    0.0    10.0      psubw	%mm2, %mm2
-# CHECK-NEXT: 31.    1     4.0    0.0    10.0      psubb	%xmm2, %xmm2
-# CHECK-NEXT: 32.    1     6.0    1.0    8.0       psubd	%xmm2, %xmm2
-# CHECK-NEXT: 33.    1     7.0    0.0    7.0       psubq	%xmm2, %xmm2
-# CHECK-NEXT: 34.    1     8.0    0.0    6.0       psubw	%xmm2, %xmm2
-# CHECK-NEXT: 35.    1     13.0   1.0    1.0       vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 36.    1     13.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 37.    1     14.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 38.    1     15.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 39.    1     16.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 40.    1     17.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 41.    1     18.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 42.    1     18.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 43.    1     19.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 44.    1     19.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45.    1     19.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 46.    1     20.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 47.    1     20.0   1.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 48.    1     19.0   1.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 49.    1     20.0   2.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 50.    1     20.0   2.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 51.    1     4.0    4.0    16.0      andnps	%xmm0, %xmm0
-# CHECK-NEXT: 52.    1     5.0    5.0    15.0      andnpd	%xmm1, %xmm1
-# CHECK-NEXT: 53.    1     6.0    0.0    14.0      vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 54.    1     5.0    0.0    14.0      vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 55.    1     6.0    0.0    13.0      vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 56.    1     6.0    0.0    13.0      vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 57.    1     5.0    4.0    14.0      pandn	%mm2, %mm2
-# CHECK-NEXT: 58.    1     7.0    0.0    12.0      pandn	%xmm2, %xmm2
-# CHECK-NEXT: 59.    1     19.0   2.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 60.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 61.    1     8.0    1.0    11.0      vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 62.    1     6.0    0.0    13.0      vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 63.    1     20.0   0.0    0.0       vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 64.    1     9.0    2.0    11.0      vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: 65.    1     9.0    3.0    11.0      vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: 66.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 67.    1     4.0    2.0    15.0      xorps	%xmm0, %xmm0
-# CHECK-NEXT: 68.    1     9.0    4.0    10.0      xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 69.    1     9.0    3.0    10.0      vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 70.    1     10.0   0.0    9.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 71.    1     10.0   0.0    9.0       vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 72.    1     10.0   0.0    8.0       vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 73.    1     10.0   7.0    8.0       pxor	%mm2, %mm2
-# CHECK-NEXT: 74.    1     11.0   1.0    7.0       pxor	%xmm2, %xmm2
-# CHECK-NEXT: 75.    1     18.0   0.0    0.0       vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 76.    1     19.0   0.0    0.0       vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 77.    1     12.0   12.0   7.0       vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 78.    1     11.0   1.0    7.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 79.    1     12.0   12.0   6.0       vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 80.    1     10.0   0.0    8.0       vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 81.    1     12.0   1.0    6.0       vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 82.    1     16.0   5.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    0.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    0.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     2.0    0.0    0.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     0.0    0.0    3.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     0.0    0.0    3.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     0.0    0.0    3.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     0.0    0.0    3.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     0.0    0.0    3.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     0.0    0.0    2.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     0.0    0.0    2.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     0.0    0.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     0.0    0.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     0.0    0.0    1.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     0.0    0.0    1.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     0.0    0.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     0.0    0.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     0.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     0.0    0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     0.0    0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     1.0    1.0    0.0       psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     2.0    0.0    0.0       psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     3.0    0.0    0.0       psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     3.0    0.0    0.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     0.0    0.0    4.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     0.0    0.0    4.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     0.0    0.0    4.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     0.0    0.0    4.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     0.0    0.0    4.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     0.0    0.0    3.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     0.0    0.0    3.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     0.0    0.0    3.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     0.0    0.0    3.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     0.0    0.0    3.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     0.0    0.0    3.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     0.0    0.0    2.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     0.0    0.0    2.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     0.0    0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     0.0    0.0    2.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     0.0    0.0    2.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     0.0    0.0    2.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     0.0    0.0    1.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     0.0    0.0    1.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     0.0    0.0    1.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     1.0    1.0    0.0       andnps	%xmm0, %xmm0
+# CHECK-NEXT: 52.    1     1.0    1.0    0.0       andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 53.    1     1.0    1.0    0.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 54.    1     1.0    0.0    0.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 55.    1     1.0    0.0    0.0       vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 56.    1     2.0    0.0    0.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 57.    1     1.0    1.0    1.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 58.    1     2.0    0.0    0.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 59.    1     2.0    2.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 60.    1     2.0    0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 61.    1     2.0    0.0    0.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 62.    1     2.0    0.0    0.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 63.    1     3.0    0.0    0.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 64.    1     3.0    1.0    0.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 65.    1     3.0    1.0    0.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 66.    1     3.0    1.0    0.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 67.    1     0.0    0.0    4.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 68.    1     1.0    0.0    3.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 69.    1     1.0    0.0    3.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 70.    1     1.0    0.0    3.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 71.    1     1.0    0.0    3.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 72.    1     0.0    0.0    3.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 73.    1     2.0    2.0    0.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 74.    1     0.0    0.0    3.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 75.    1     1.0    0.0    2.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 76.    1     1.0    0.0    2.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 77.    1     0.0    0.0    3.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 78.    1     0.0    0.0    2.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 79.    1     0.0    0.0    2.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 80.    1     0.0    0.0    2.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 81.    1     0.0    0.0    2.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 82.    1     0.0    0.0    2.0       vpxor	%ymm3, %ymm3, %ymm5
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s
index d273667b7d8d3..d4f5445e1fb65 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s
@@ -167,13 +167,13 @@ vpxorq  %zmm19, %zmm19, %zmm21
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      139
-# CHECK-NEXT: Total Cycles:      53
+# CHECK-NEXT: Total Cycles:      27
 # CHECK-NEXT: Total uOps:        139
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    2.62
-# CHECK-NEXT: IPC:               2.62
-# CHECK-NEXT: Block RThroughput: 30.0
+# CHECK-NEXT: uOps Per Cycle:    5.15
+# CHECK-NEXT: IPC:               5.15
+# CHECK-NEXT: Block RThroughput: 23.2
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -184,81 +184,81 @@ vpxorq  %zmm19, %zmm19, %zmm21
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        subl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        subq	%rax, %rax
-# CHECK-NEXT:  1      1     0.25                        xorl	%eax, %eax
-# CHECK-NEXT:  1      1     0.25                        xorq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        subl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        subq	%rax, %rax
+# CHECK-NEXT:  1      0     0.17                        xorl	%eax, %eax
+# CHECK-NEXT:  1      0     0.17                        xorq	%rax, %rax
 # CHECK-NEXT:  1      1     1.00                        pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  1      1     1.00                        pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  1      1     1.00                        pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  1      3     1.00                        pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      3     1.00                        vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpcmpgtw	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  1      1     0.50                        psubb	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubd	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubq	%mm2, %mm2
 # CHECK-NEXT:  1      1     0.50                        psubw	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        psubb	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubd	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubq	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        psubw	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.33                        vpsubb	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  1      1     0.33                        vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        psubb	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubd	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubq	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        psubw	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        vpsubw	%zmm19, %zmm19, %zmm21
 # CHECK-NEXT:  1      1     0.33                        andnps	%xmm0, %xmm0
 # CHECK-NEXT:  1      1     0.33                        andnpd	%xmm1, %xmm1
 # CHECK-NEXT:  1      1     0.33                        vandnps	%xmm2, %xmm2, %xmm2
@@ -291,42 +291,42 @@ vpxorq  %zmm19, %zmm19, %zmm21
 # CHECK-NEXT:  1      1     0.33                        vpandnq	%ymm19, %ymm19, %ymm21
 # CHECK-NEXT:  1      1     0.50                        vpandnd	%zmm19, %zmm19, %zmm21
 # CHECK-NEXT:  1      1     0.50                        vpandnq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  1      1     0.33                        xorps	%xmm0, %xmm0
-# CHECK-NEXT:  1      1     0.33                        xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT:  1      1     0.50                        vxorps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT:  1      1     0.50                        vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  1      0     0.17                        xorps	%xmm0, %xmm0
+# CHECK-NEXT:  1      0     0.17                        xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.17                        vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  1      0     0.17                        vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  1      0     0.17                        vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%zmm1, %zmm1, %zmm1
 # CHECK-NEXT:  1      1     0.50                        pxor	%mm2, %mm2
-# CHECK-NEXT:  1      1     0.33                        pxor	%xmm2, %xmm2
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  1      1     0.33                        vpxord	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpxorq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  1      1     0.33                        vpxord	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.33                        vpxorq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  1      1     0.50                        vpxord	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.50                        vpxorq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  1      1     0.33                        vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  1      1     0.33                        vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  1      1     0.50                        vxorps	%zmm4, %zmm4, %zmm5
-# CHECK-NEXT:  1      1     0.50                        vxorpd	%zmm1, %zmm1, %zmm3
-# CHECK-NEXT:  1      1     0.33                        vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  1      1     0.33                        vpxor	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  1      1     0.33                        vpxord	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpxorq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  1      1     0.33                        vpxord	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.33                        vpxorq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  1      1     0.50                        vpxord	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  1      1     0.50                        vpxorq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        pxor	%xmm2, %xmm2
+# CHECK-NEXT:  1      0     0.17                        vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  1      0     0.17                        vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  1      0     0.17                        vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  1      0     0.17                        vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  1      0     0.17                        vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  1      0     0.17                        vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT:  1      0     0.17                        vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT:  1      0     0.17                        vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  1      0     0.17                        vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  1      0     0.17                        vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  1      0     0.17                        vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  1      0     0.17                        vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  1      0     0.17                        vpxorq	%zmm19, %zmm19, %zmm21
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    143
-# CHECK-NEXT: Max number of mappings used:         91
+# CHECK-NEXT: Max number of mappings used:         47
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - SKXDivider
@@ -342,293 +342,293 @@ vpxorq  %zmm19, %zmm19, %zmm21
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     47.00  44.00   -      -      -     46.00  2.00    -
+# CHECK-NEXT:  -      -     16.00  10.00   -      -      -     14.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     subl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     subq	%rax, %rax
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     xorl	%eax, %eax
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     xorq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     subq	%rax, %rax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorl	%eax, %eax
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorq	%rax, %rax
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtb	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtd	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pcmpgtw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubb	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%mm2, %mm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%mm2, %mm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubq	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     psubw	%mm2, %mm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubb	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubd	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubq	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     psubw	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubb	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubw	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubb	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpsubd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpsubw	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnps	%xmm0, %xmm0
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     andnpd	%xmm1, %xmm1
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     psubw	%mm2, %mm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubb	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubd	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubq	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     psubw	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     andnps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     andnpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%zmm2, %zmm2, %zmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%zmm1, %zmm1, %zmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%mm2, %mm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     pandn	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pandn	%mm2, %mm2
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pandn	%xmm2, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%xmm3, %xmm3, %xmm3
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm3
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnd	%xmm19, %xmm19, %xmm19
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnq	%zmm19, %zmm19, %zmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnps	%ymm2, %ymm2, %ymm5
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandn	%ymm3, %ymm3, %ymm5
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnps	%zmm2, %zmm2, %zmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vandnpd	%zmm1, %zmm1, %zmm5
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorps	%xmm0, %xmm0
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     xorpd	%xmm1, %xmm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorps	%xmm0, %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     xorpd	%xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%zmm1, %zmm1, %zmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     pxor	%mm2, %mm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     pxor	%xmm2, %xmm2
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxorq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorps	%zmm4, %zmm4, %zmm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vxorpd	%zmm1, %zmm1, %zmm3
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxorq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vpxord	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxorq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpxord	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vpxorq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     pxor	%xmm2, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     vpxorq	%zmm19, %zmm19, %zmm21
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789          0123456789          012
-# CHECK-NEXT: Index     0123456789          0123456789          0123456789
-
-# CHECK:      [0,0]     DeER .    .    .    .    .    .    .    .    .    . .   subl	%eax, %eax
-# CHECK-NEXT: [0,1]     D=eER.    .    .    .    .    .    .    .    .    . .   subq	%rax, %rax
-# CHECK-NEXT: [0,2]     D==eER    .    .    .    .    .    .    .    .    . .   xorl	%eax, %eax
-# CHECK-NEXT: [0,3]     D===eER   .    .    .    .    .    .    .    .    . .   xorq	%rax, %rax
-# CHECK-NEXT: [0,4]     DeE---R   .    .    .    .    .    .    .    .    . .   pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: [0,5]     D=eE--R   .    .    .    .    .    .    .    .    . .   pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: [0,6]     .D=eE-R   .    .    .    .    .    .    .    .    . .   pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: [0,7]     .DeE--R   .    .    .    .    .    .    .    .    . .   pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: [0,8]     .D=eE-R   .    .    .    .    .    .    .    .    . .   pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: [0,9]     .D===eeeER.    .    .    .    .    .    .    .    . .   pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: [0,10]    .D======eER    .    .    .    .    .    .    .    . .   pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: [0,11]    .D==eE----R    .    .    .    .    .    .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12]    . D==eE---R    .    .    .    .    .    .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13]    . D===eeeER    .    .    .    .    .    .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14]    . D======eER   .    .    .    .    .    .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15]    . D=======eER  .    .    .    .    .    .    .    . .   vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16]    . D=======eER  .    .    .    .    .    .    .    . .   vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17]    . D=======eeeER.    .    .    .    .    .    .    . .   vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18]    .  D=======eE-R.    .    .    .    .    .    .    . .   vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19]    .  D=======eE-R.    .    .    .    .    .    .    . .   vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20]    .  D========eER.    .    .    .    .    .    .    . .   vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21]    .  D=========eeeER  .    .    .    .    .    .    . .   vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22]    .  D============eER .    .    .    .    .    .    . .   vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23]    .  D=============eER.    .    .    .    .    .    . .   vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24]    .   D============eER.    .    .    .    .    .    . .   vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25]    .   D============eeeER   .    .    .    .    .    . .   vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26]    .   D=============eE-R   .    .    .    .    .    . .   vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27]    .   D=eE-------------R   .    .    .    .    .    . .   psubb	%mm2, %mm2
-# CHECK-NEXT: [0,28]    .   D==eE------------R   .    .    .    .    .    . .   psubd	%mm2, %mm2
-# CHECK-NEXT: [0,29]    .   D===eE-----------R   .    .    .    .    .    . .   psubq	%mm2, %mm2
-# CHECK-NEXT: [0,30]    .    D===eE----------R   .    .    .    .    .    . .   psubw	%mm2, %mm2
-# CHECK-NEXT: [0,31]    .    D===eE----------R   .    .    .    .    .    . .   psubb	%xmm2, %xmm2
-# CHECK-NEXT: [0,32]    .    D=====eE--------R   .    .    .    .    .    . .   psubd	%xmm2, %xmm2
-# CHECK-NEXT: [0,33]    .    D======eE-------R   .    .    .    .    .    . .   psubq	%xmm2, %xmm2
-# CHECK-NEXT: [0,34]    .    D=======eE------R   .    .    .    .    .    . .   psubw	%xmm2, %xmm2
-# CHECK-NEXT: [0,35]    .    D============eE-R   .    .    .    .    .    . .   vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36]    .    .D============eER   .    .    .    .    .    . .   vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37]    .    .D=============eER  .    .    .    .    .    . .   vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38]    .    .D==============eER .    .    .    .    .    . .   vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39]    .    .D===============eER.    .    .    .    .    . .   vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40]    .    .D================eER    .    .    .    .    . .   vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41]    .    .D=================eER   .    .    .    .    . .   vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42]    .    . D=================eER  .    .    .    .    . .   vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43]    .    . D==================eER .    .    .    .    . .   vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44]    .    . D==================eER .    .    .    .    . .   vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45]    .    . D==================eER .    .    .    .    . .   vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46]    .    . D===================eER.    .    .    .    . .   vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47]    .    . D===================eER.    .    .    .    . .   vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48]    .    .  D==================eER.    .    .    .    . .   vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49]    .    .  D===================eER    .    .    .    . .   vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50]    .    .  D===================eER    .    .    .    . .   vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51]    .    .  D===eE----------------R    .    .    .    . .   vpsubb	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,52]    .    .  D====eE---------------R    .    .    .    . .   vpsubd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,53]    .    .  D=====eE--------------R    .    .    .    . .   vpsubq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,54]    .    .   D=====eE-------------R    .    .    .    . .   vpsubw	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,55]    .    .   D======eE------------R    .    .    .    . .   vpsubb	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,56]    .    .   D========eE----------R    .    .    .    . .   vpsubd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,57]    .    .   D=========eE---------R    .    .    .    . .   vpsubq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,58]    .    .   D==========eE--------R    .    .    .    . .   vpsubw	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,59]    .    .   D===========eE-------R    .    .    .    . .   vpsubb	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,60]    .    .    D===========eE------R    .    .    .    . .   vpsubd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,61]    .    .    D============eE-----R    .    .    .    . .   vpsubq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,62]    .    .    D=============eE----R    .    .    .    . .   vpsubw	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,63]    .    .    D==============eE---R    .    .    .    . .   vpsubb	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,64]    .    .    D==============eE---R    .    .    .    . .   vpsubd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,65]    .    .    D=================eER    .    .    .    . .   vpsubq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,66]    .    .    .D=================eER   .    .    .    . .   vpsubw	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,67]    .    .    .D=================eER   .    .    .    . .   vpsubb	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,68]    .    .    .D=================eER   .    .    .    . .   vpsubd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,69]    .    .    .D==================eER  .    .    .    . .   vpsubq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,70]    .    .    .D==================eER  .    .    .    . .   vpsubw	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,71]    .    .    .D==================eER  .    .    .    . .   vpsubb	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,72]    .    .    . D==================eER .    .    .    . .   vpsubd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,73]    .    .    . D==================eER .    .    .    . .   vpsubq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,74]    .    .    . D==================eER .    .    .    . .   vpsubw	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,75]    .    .    . D=eE-----------------R .    .    .    . .   andnps	%xmm0, %xmm0
-# CHECK-NEXT: [0,76]    .    .    . D=eE-----------------R .    .    .    . .   andnpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,77]    .    .    . D==eE----------------R .    .    .    . .   vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,78]    .    .    .  D=eE----------------R .    .    .    . .   vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,79]    .    .    .  D==eE---------------R .    .    .    . .   vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,80]    .    .    .  D=======eE----------R .    .    .    . .   vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,81]    .    .    .  D=====eE------------R .    .    .    . .   vandnps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT: [0,82]    .    .    .  D========eE---------R .    .    .    . .   vandnpd	%zmm1, %zmm1, %zmm1
-# CHECK-NEXT: [0,83]    .    .    .  D=========eE--------R .    .    .    . .   pandn	%mm2, %mm2
-# CHECK-NEXT: [0,84]    .    .    .   D=====eE-----------R .    .    .    . .   pandn	%xmm2, %xmm2
-# CHECK-NEXT: [0,85]    .    .    .   D=================eER.    .    .    . .   vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,86]    .    .    .   D==================eER    .    .    . .   vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,87]    .    .    .   D=================eE-R    .    .    . .   vpandnd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,88]    .    .    .   D==================eER    .    .    . .   vpandnq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,89]    .    .    .   D===================eER   .    .    . .   vpandnd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,90]    .    .    .    D===================eER  .    .    . .   vpandnq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,91]    .    .    .    D====================eER .    .    . .   vpandnd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,92]    .    .    .    D=====================eER.    .    . .   vpandnq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,93]    .    .    .    D========eE-------------R.    .    . .   vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,94]    .    .    .    D================eE-----R.    .    . .   vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,95]    .    .    .    D==================eE---R.    .    . .   vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,96]    .    .    .    .D================eE----R.    .    . .   vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,97]    .    .    .    .D=================eE---R.    .    . .   vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,98]    .    .    .    .D==================eE--R.    .    . .   vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,99]    .    .    .    .D==================eE--R.    .    . .   vandnps	%zmm2, %zmm2, %zmm5
-# CHECK-NEXT: [0,100]   .    .    .    .D===================eE-R.    .    . .   vandnpd	%zmm1, %zmm1, %zmm5
-# CHECK-NEXT: [0,101]   .    .    .    . D====================eER    .    . .   vpandnd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,102]   .    .    .    . D====================eER    .    . .   vpandnq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,103]   .    .    .    . D====================eER    .    . .   vpandnd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,104]   .    .    .    .  D====================eER   .    . .   vpandnq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,105]   .    .    .    .  D====================eER   .    . .   vpandnd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,106]   .    .    .    .  D=====================eER  .    . .   vpandnq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,107]   .    .    .    .   D=================eE---R  .    . .   xorps	%xmm0, %xmm0
-# CHECK-NEXT: [0,108]   .    .    .    .   D================eE----R  .    . .   xorpd	%xmm1, %xmm1
-# CHECK-NEXT: [0,109]   .    .    .    .   D=================eE---R  .    . .   vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,110]   .    .    .    .    D==================eE-R  .    . .   vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,111]   .    .    .    .    D===================eER  .    . .   vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,112]   .    .    .    .    D===================eER  .    . .   vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,113]   .    .    .    .    .D===================eER .    . .   vxorps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT: [0,114]   .    .    .    .    .D===================eER .    . .   vxorpd	%zmm1, %zmm1, %zmm1
-# CHECK-NEXT: [0,115]   .    .    .    .    .D====================eER.    . .   pxor	%mm2, %mm2
-# CHECK-NEXT: [0,116]   .    .    .    .    . D===================eER.    . .   pxor	%xmm2, %xmm2
-# CHECK-NEXT: [0,117]   .    .    .    .    . D==================eE-R.    . .   vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,118]   .    .    .    .    . D====================eER    . .   vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,119]   .    .    .    .    .  D==================eE-R    . .   vpxord	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,120]   .    .    .    .    .  D===================eER    . .   vpxorq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: [0,121]   .    .    .    .    .  D====================eER   . .   vpxord	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,122]   .    .    .    .    .   D====================eER  . .   vpxorq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: [0,123]   .    .    .    .    .   D=====================eER . .   vpxord	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,124]   .    .    .    .    .   D======================eER. .   vpxorq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: [0,125]   .    .    .    .    .    D=================eE----R. .   vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,126]   .    .    .    .    .    D==================eE---R. .   vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,127]   .    .    .    .    .    D===================eE--R. .   vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,128]   .    .    .    .    .    .D==================eE--R. .   vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,129]   .    .    .    .    .    .D===================eE-R. .   vxorps	%zmm4, %zmm4, %zmm5
-# CHECK-NEXT: [0,130]   .    .    .    .    .    .D=================eE---R. .   vxorpd	%zmm1, %zmm1, %zmm3
-# CHECK-NEXT: [0,131]   .    .    .    .    .    . D==================eE-R. .   vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,132]   .    .    .    .    .    . D===================eER. .   vpxor	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,133]   .    .    .    .    .    . D====================eER .   vpxord	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,134]   .    .    .    .    .    .  D===================eER .   vpxorq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: [0,135]   .    .    .    .    .    .  D===================eER .   vpxord	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,136]   .    .    .    .    .    .  D====================eER.   vpxorq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: [0,137]   .    .    .    .    .    .   D===================eER.   vpxord	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: [0,138]   .    .    .    .    .    .   D====================eER   vpxorq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT:                     0123456789
+# CHECK-NEXT: Index     0123456789          0123456
+
+# CHECK:      [0,0]     DR   .    .    .    .    ..   subl	%eax, %eax
+# CHECK-NEXT: [0,1]     DR   .    .    .    .    ..   subq	%rax, %rax
+# CHECK-NEXT: [0,2]     DR   .    .    .    .    ..   xorl	%eax, %eax
+# CHECK-NEXT: [0,3]     DR   .    .    .    .    ..   xorq	%rax, %rax
+# CHECK-NEXT: [0,4]     DeER .    .    .    .    ..   pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: [0,5]     D=eER.    .    .    .    ..   pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: [0,6]     .D=eER    .    .    .    ..   pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: [0,7]     .D---R    .    .    .    ..   pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: [0,8]     .D---R    .    .    .    ..   pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: [0,9]     .D---R    .    .    .    ..   pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: [0,10]    .D---R    .    .    .    ..   pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: [0,11]    .D---R    .    .    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12]    . D--R    .    .    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13]    . D--R    .    .    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14]    . D--R    .    .    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15]    . D--R    .    .    .    ..   vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16]    . D--R    .    .    .    ..   vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17]    . D--R    .    .    .    ..   vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18]    .  D-R    .    .    .    ..   vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19]    .  D-R    .    .    .    ..   vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20]    .  D-R    .    .    .    ..   vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21]    .  D-R    .    .    .    ..   vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22]    .  D-R    .    .    .    ..   vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23]    .  D-R    .    .    .    ..   vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24]    .   DR    .    .    .    ..   vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25]    .   DR    .    .    .    ..   vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26]    .   DR    .    .    .    ..   vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27]    .   DeER  .    .    .    ..   psubb	%mm2, %mm2
+# CHECK-NEXT: [0,28]    .   D=eER .    .    .    ..   psubd	%mm2, %mm2
+# CHECK-NEXT: [0,29]    .   D==eER.    .    .    ..   psubq	%mm2, %mm2
+# CHECK-NEXT: [0,30]    .    D==eER    .    .    ..   psubw	%mm2, %mm2
+# CHECK-NEXT: [0,31]    .    D----R    .    .    ..   psubb	%xmm2, %xmm2
+# CHECK-NEXT: [0,32]    .    D----R    .    .    ..   psubd	%xmm2, %xmm2
+# CHECK-NEXT: [0,33]    .    D----R    .    .    ..   psubq	%xmm2, %xmm2
+# CHECK-NEXT: [0,34]    .    D----R    .    .    ..   psubw	%xmm2, %xmm2
+# CHECK-NEXT: [0,35]    .    D----R    .    .    ..   vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36]    .    .D---R    .    .    ..   vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37]    .    .D---R    .    .    ..   vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38]    .    .D---R    .    .    ..   vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39]    .    .D---R    .    .    ..   vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40]    .    .D---R    .    .    ..   vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41]    .    .D---R    .    .    ..   vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42]    .    . D--R    .    .    ..   vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43]    .    . D--R    .    .    ..   vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44]    .    . D--R    .    .    ..   vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45]    .    . D--R    .    .    ..   vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46]    .    . D--R    .    .    ..   vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47]    .    . D--R    .    .    ..   vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48]    .    .  D-R    .    .    ..   vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49]    .    .  D-R    .    .    ..   vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50]    .    .  D-R    .    .    ..   vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51]    .    .  D-R    .    .    ..   vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,52]    .    .  D-R    .    .    ..   vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,53]    .    .  D-R    .    .    ..   vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,54]    .    .   DR    .    .    ..   vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,55]    .    .   DR    .    .    ..   vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,56]    .    .   DR    .    .    ..   vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,57]    .    .   DR    .    .    ..   vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,58]    .    .   DR    .    .    ..   vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,59]    .    .   DR    .    .    ..   vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,60]    .    .    DR   .    .    ..   vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,61]    .    .    DR   .    .    ..   vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,62]    .    .    DR   .    .    ..   vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,63]    .    .    DR   .    .    ..   vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,64]    .    .    DR   .    .    ..   vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,65]    .    .    DR   .    .    ..   vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,66]    .    .    .DR  .    .    ..   vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,67]    .    .    .DR  .    .    ..   vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,68]    .    .    .DR  .    .    ..   vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,69]    .    .    .DR  .    .    ..   vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,70]    .    .    .DR  .    .    ..   vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,71]    .    .    .DR  .    .    ..   vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,72]    .    .    . DR .    .    ..   vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,73]    .    .    . DR .    .    ..   vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,74]    .    .    . DR .    .    ..   vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,75]    .    .    . DeER    .    ..   andnps	%xmm0, %xmm0
+# CHECK-NEXT: [0,76]    .    .    . DeER    .    ..   andnpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,77]    .    .    . DeER    .    ..   vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,78]    .    .    .  DeER   .    ..   vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,79]    .    .    .  DeER   .    ..   vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,80]    .    .    .  D=eER  .    ..   vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,81]    .    .    .  D=eER  .    ..   vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,82]    .    .    .  D==eER .    ..   vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,83]    .    .    .  DeE--R .    ..   pandn	%mm2, %mm2
+# CHECK-NEXT: [0,84]    .    .    .   D=eER .    ..   pandn	%xmm2, %xmm2
+# CHECK-NEXT: [0,85]    .    .    .   DeE-R .    ..   vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,86]    .    .    .   D=eER .    ..   vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,87]    .    .    .   D==eER.    ..   vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,88]    .    .    .   D===eER    ..   vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,89]    .    .    .   D====eER   ..   vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,90]    .    .    .    D====eER  ..   vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,91]    .    .    .    D=====eER ..   vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,92]    .    .    .    D======eER..   vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,93]    .    .    .    D=eE-----R..   vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,94]    .    .    .    D=eE-----R..   vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,95]    .    .    .    D==eE----R..   vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,96]    .    .    .    .D=eE----R..   vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,97]    .    .    .    .D==eE---R..   vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,98]    .    .    .    .D==eE---R..   vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,99]    .    .    .    .D===eE--R..   vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT: [0,100]   .    .    .    .D===eE--R..   vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT: [0,101]   .    .    .    .D======eER.   vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,102]   .    .    .    . D=====eER.   vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,103]   .    .    .    . D=====eER.   vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,104]   .    .    .    . D======eER   vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,105]   .    .    .    . D======eER   vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,106]   .    .    .    . D======eER   vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,107]   .    .    .    . D--------R   xorps	%xmm0, %xmm0
+# CHECK-NEXT: [0,108]   .    .    .    .  D-------R   xorpd	%xmm1, %xmm1
+# CHECK-NEXT: [0,109]   .    .    .    .  D-------R   vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,110]   .    .    .    .  D-------R   vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,111]   .    .    .    .  D-------R   vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,112]   .    .    .    .  D-------R   vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,113]   .    .    .    .  D-------R   vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: [0,114]   .    .    .    .   D------R   vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: [0,115]   .    .    .    .   D=eE---R   pxor	%mm2, %mm2
+# CHECK-NEXT: [0,116]   .    .    .    .   D------R   pxor	%xmm2, %xmm2
+# CHECK-NEXT: [0,117]   .    .    .    .   D------R   vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,118]   .    .    .    .   D------R   vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,119]   .    .    .    .   D===E--R   vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,120]   .    .    .    .    D==E--R   vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: [0,121]   .    .    .    .    D==E--R   vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,122]   .    .    .    .    D==E--R   vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: [0,123]   .    .    .    .    D==E--R   vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,124]   .    .    .    .    D==E--R   vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: [0,125]   .    .    .    .    D-----R   vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,126]   .    .    .    .    .D----R   vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,127]   .    .    .    .    .D----R   vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,128]   .    .    .    .    .D----R   vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,129]   .    .    .    .    .D----R   vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT: [0,130]   .    .    .    .    .D----R   vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT: [0,131]   .    .    .    .    .D----R   vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,132]   .    .    .    .    . D---R   vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,133]   .    .    .    .    . DE--R   vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,134]   .    .    .    .    . DE--R   vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: [0,135]   .    .    .    .    . DE--R   vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,136]   .    .    .    .    . DE--R   vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: [0,137]   .    .    .    .    . DE--R   vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: [0,138]   .    .    .    .    .  D--R   vpxorq	%zmm19, %zmm19, %zmm21
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -637,142 +637,142 @@ vpxorq  %zmm19, %zmm19, %zmm21
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       subl	%eax, %eax
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       subq	%rax, %rax
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       xorl	%eax, %eax
-# CHECK-NEXT: 3.     1     4.0    0.0    0.0       xorq	%rax, %rax
-# CHECK-NEXT: 4.     1     1.0    1.0    3.0       pcmpgtb	%mm2, %mm2
-# CHECK-NEXT: 5.     1     2.0    0.0    2.0       pcmpgtd	%mm2, %mm2
-# CHECK-NEXT: 6.     1     2.0    0.0    1.0       pcmpgtw	%mm2, %mm2
-# CHECK-NEXT: 7.     1     1.0    1.0    2.0       pcmpgtb	%xmm2, %xmm2
-# CHECK-NEXT: 8.     1     2.0    0.0    1.0       pcmpgtd	%xmm2, %xmm2
-# CHECK-NEXT: 9.     1     4.0    1.0    0.0       pcmpgtq	%xmm2, %xmm2
-# CHECK-NEXT: 10.    1     7.0    0.0    0.0       pcmpgtw	%xmm2, %xmm2
-# CHECK-NEXT: 11.    1     3.0    3.0    4.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 12.    1     3.0    0.0    3.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 13.    1     4.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14.    1     7.0    0.0    0.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15.    1     8.0    0.0    0.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16.    1     8.0    0.0    0.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17.    1     8.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18.    1     8.0    1.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19.    1     8.0    1.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 20.    1     9.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 21.    1     10.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 22.    1     13.0   0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 23.    1     14.0   0.0    0.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 24.    1     13.0   0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 25.    1     13.0   0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 26.    1     14.0   1.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 27.    1     2.0    2.0    13.0      psubb	%mm2, %mm2
-# CHECK-NEXT: 28.    1     3.0    0.0    12.0      psubd	%mm2, %mm2
-# CHECK-NEXT: 29.    1     4.0    0.0    11.0      psubq	%mm2, %mm2
-# CHECK-NEXT: 30.    1     4.0    0.0    10.0      psubw	%mm2, %mm2
-# CHECK-NEXT: 31.    1     4.0    0.0    10.0      psubb	%xmm2, %xmm2
-# CHECK-NEXT: 32.    1     6.0    1.0    8.0       psubd	%xmm2, %xmm2
-# CHECK-NEXT: 33.    1     7.0    0.0    7.0       psubq	%xmm2, %xmm2
-# CHECK-NEXT: 34.    1     8.0    0.0    6.0       psubw	%xmm2, %xmm2
-# CHECK-NEXT: 35.    1     13.0   1.0    1.0       vpsubb	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 36.    1     13.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 37.    1     14.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 38.    1     15.0   0.0    0.0       vpsubw	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 39.    1     16.0   0.0    0.0       vpsubb	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 40.    1     17.0   0.0    0.0       vpsubd	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 41.    1     18.0   0.0    0.0       vpsubq	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 42.    1     18.0   0.0    0.0       vpsubw	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 43.    1     19.0   0.0    0.0       vpsubb	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 44.    1     19.0   0.0    0.0       vpsubd	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45.    1     19.0   0.0    0.0       vpsubq	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 46.    1     20.0   1.0    0.0       vpsubw	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 47.    1     20.0   1.0    0.0       vpsubb	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 48.    1     19.0   1.0    0.0       vpsubd	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 49.    1     20.0   2.0    0.0       vpsubq	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 50.    1     20.0   2.0    0.0       vpsubw	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 51.    1     4.0    4.0    16.0      vpsubb	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 52.    1     5.0    0.0    15.0      vpsubd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 53.    1     6.0    0.0    14.0      vpsubq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 54.    1     6.0    0.0    13.0      vpsubw	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 55.    1     7.0    0.0    12.0      vpsubb	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 56.    1     9.0    1.0    10.0      vpsubd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 57.    1     10.0   0.0    9.0       vpsubq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 58.    1     11.0   0.0    8.0       vpsubw	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 59.    1     12.0   0.0    7.0       vpsubb	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 60.    1     12.0   0.0    6.0       vpsubd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 61.    1     13.0   0.0    5.0       vpsubq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 62.    1     14.0   0.0    4.0       vpsubw	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 63.    1     15.0   0.0    3.0       vpsubb	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 64.    1     15.0   0.0    3.0       vpsubd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 65.    1     18.0   3.0    0.0       vpsubq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 66.    1     18.0   4.0    0.0       vpsubw	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 67.    1     18.0   4.0    0.0       vpsubb	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 68.    1     18.0   4.0    0.0       vpsubd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 69.    1     19.0   5.0    0.0       vpsubq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 70.    1     19.0   5.0    0.0       vpsubw	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 71.    1     19.0   5.0    0.0       vpsubb	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 72.    1     19.0   6.0    0.0       vpsubd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 73.    1     19.0   6.0    0.0       vpsubq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 74.    1     19.0   6.0    0.0       vpsubw	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 75.    1     2.0    2.0    17.0      andnps	%xmm0, %xmm0
-# CHECK-NEXT: 76.    1     2.0    2.0    17.0      andnpd	%xmm1, %xmm1
-# CHECK-NEXT: 77.    1     3.0    1.0    16.0      vandnps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 78.    1     2.0    0.0    16.0      vandnpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 79.    1     3.0    0.0    15.0      vandnps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 80.    1     8.0    5.0    10.0      vandnpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 81.    1     6.0    2.0    12.0      vandnps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT: 82.    1     9.0    0.0    9.0       vandnpd	%zmm1, %zmm1, %zmm1
-# CHECK-NEXT: 83.    1     10.0   10.0   8.0       pandn	%mm2, %mm2
-# CHECK-NEXT: 84.    1     6.0    0.0    11.0      pandn	%xmm2, %xmm2
-# CHECK-NEXT: 85.    1     18.0   6.0    0.0       vpandn	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 86.    1     19.0   0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 87.    1     18.0   7.0    1.0       vpandnd	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 88.    1     19.0   0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 89.    1     20.0   0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 90.    1     20.0   0.0    0.0       vpandnq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 91.    1     21.0   0.0    0.0       vpandnd	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 92.    1     22.0   0.0    0.0       vpandnq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 93.    1     9.0    3.0    13.0      vandnps	%xmm2, %xmm2, %xmm5
-# CHECK-NEXT: 94.    1     17.0   9.0    5.0       vandnpd	%xmm1, %xmm1, %xmm5
-# CHECK-NEXT: 95.    1     19.0   0.0    3.0       vpandn	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 96.    1     17.0   12.0   4.0       vandnps	%ymm2, %ymm2, %ymm5
-# CHECK-NEXT: 97.    1     18.0   11.0   3.0       vandnpd	%ymm1, %ymm1, %ymm5
-# CHECK-NEXT: 98.    1     19.0   1.0    2.0       vpandn	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 99.    1     19.0   14.0   2.0       vandnps	%zmm2, %zmm2, %zmm5
-# CHECK-NEXT: 100.   1     20.0   13.0   1.0       vandnpd	%zmm1, %zmm1, %zmm5
-# CHECK-NEXT: 101.   1     21.0   0.0    0.0       vpandnd	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 102.   1     21.0   0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 103.   1     21.0   0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 104.   1     21.0   1.0    0.0       vpandnq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 105.   1     21.0   1.0    0.0       vpandnd	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 106.   1     22.0   2.0    0.0       vpandnq	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 107.   1     18.0   18.0   3.0       xorps	%xmm0, %xmm0
-# CHECK-NEXT: 108.   1     17.0   13.0   4.0       xorpd	%xmm1, %xmm1
-# CHECK-NEXT: 109.   1     18.0   16.0   3.0       vxorps	%xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 110.   1     19.0   2.0    1.0       vxorpd	%xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 111.   1     20.0   2.0    0.0       vxorps	%ymm2, %ymm2, %ymm2
-# CHECK-NEXT: 112.   1     20.0   0.0    0.0       vxorpd	%ymm1, %ymm1, %ymm1
-# CHECK-NEXT: 113.   1     20.0   0.0    0.0       vxorps	%zmm2, %zmm2, %zmm2
-# CHECK-NEXT: 114.   1     20.0   0.0    0.0       vxorpd	%zmm1, %zmm1, %zmm1
-# CHECK-NEXT: 115.   1     21.0   18.0   0.0       pxor	%mm2, %mm2
-# CHECK-NEXT: 116.   1     20.0   0.0    0.0       pxor	%xmm2, %xmm2
-# CHECK-NEXT: 117.   1     19.0   7.0    1.0       vpxor	%xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 118.   1     21.0   1.0    0.0       vpxor	%ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 119.   1     19.0   4.0    1.0       vpxord	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 120.   1     20.0   0.0    0.0       vpxorq	%xmm19, %xmm19, %xmm19
-# CHECK-NEXT: 121.   1     21.0   0.0    0.0       vpxord	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 122.   1     21.0   0.0    0.0       vpxorq	%ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 123.   1     22.0   0.0    0.0       vpxord	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 124.   1     23.0   0.0    0.0       vpxorq	%zmm19, %zmm19, %zmm19
-# CHECK-NEXT: 125.   1     18.0   18.0   4.0       vxorps	%xmm4, %xmm4, %xmm5
-# CHECK-NEXT: 126.   1     19.0   2.0    3.0       vxorpd	%xmm1, %xmm1, %xmm3
-# CHECK-NEXT: 127.   1     20.0   20.0   2.0       vxorps	%ymm4, %ymm4, %ymm5
-# CHECK-NEXT: 128.   1     19.0   3.0    2.0       vxorpd	%ymm1, %ymm1, %ymm3
-# CHECK-NEXT: 129.   1     20.0   20.0   1.0       vxorps	%zmm4, %zmm4, %zmm5
-# CHECK-NEXT: 130.   1     18.0   2.0    3.0       vxorpd	%zmm1, %zmm1, %zmm3
-# CHECK-NEXT: 131.   1     19.0   1.0    1.0       vpxor	%xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 132.   1     20.0   2.0    0.0       vpxor	%ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 133.   1     21.0   0.0    0.0       vpxord	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 134.   1     20.0   0.0    0.0       vpxorq	%xmm19, %xmm19, %xmm21
-# CHECK-NEXT: 135.   1     20.0   0.0    0.0       vpxord	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 136.   1     21.0   1.0    0.0       vpxorq	%ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 137.   1     20.0   1.0    0.0       vpxord	%zmm19, %zmm19, %zmm21
-# CHECK-NEXT: 138.   1     21.0   2.0    0.0       vpxorq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       subl	%eax, %eax
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       subq	%rax, %rax
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       xorl	%eax, %eax
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       xorq	%rax, %rax
+# CHECK-NEXT: 4.     1     1.0    1.0    0.0       pcmpgtb	%mm2, %mm2
+# CHECK-NEXT: 5.     1     2.0    0.0    0.0       pcmpgtd	%mm2, %mm2
+# CHECK-NEXT: 6.     1     2.0    0.0    0.0       pcmpgtw	%mm2, %mm2
+# CHECK-NEXT: 7.     1     0.0    0.0    3.0       pcmpgtb	%xmm2, %xmm2
+# CHECK-NEXT: 8.     1     0.0    0.0    3.0       pcmpgtd	%xmm2, %xmm2
+# CHECK-NEXT: 9.     1     0.0    0.0    3.0       pcmpgtq	%xmm2, %xmm2
+# CHECK-NEXT: 10.    1     0.0    0.0    3.0       pcmpgtw	%xmm2, %xmm2
+# CHECK-NEXT: 11.    1     0.0    0.0    3.0       vpcmpgtb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 12.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 13.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 14.    1     0.0    0.0    2.0       vpcmpgtw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15.    1     0.0    0.0    2.0       vpcmpgtb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16.    1     0.0    0.0    2.0       vpcmpgtd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17.    1     0.0    0.0    2.0       vpcmpgtq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18.    1     0.0    0.0    1.0       vpcmpgtw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19.    1     0.0    0.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20.    1     0.0    0.0    1.0       vpcmpgtd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21.    1     0.0    0.0    1.0       vpcmpgtq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22.    1     0.0    0.0    1.0       vpcmpgtw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23.    1     0.0    0.0    1.0       vpcmpgtb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24.    1     0.0    0.0    0.0       vpcmpgtd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25.    1     0.0    0.0    0.0       vpcmpgtq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26.    1     0.0    0.0    0.0       vpcmpgtw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 27.    1     1.0    1.0    0.0       psubb	%mm2, %mm2
+# CHECK-NEXT: 28.    1     2.0    0.0    0.0       psubd	%mm2, %mm2
+# CHECK-NEXT: 29.    1     3.0    0.0    0.0       psubq	%mm2, %mm2
+# CHECK-NEXT: 30.    1     3.0    0.0    0.0       psubw	%mm2, %mm2
+# CHECK-NEXT: 31.    1     0.0    0.0    4.0       psubb	%xmm2, %xmm2
+# CHECK-NEXT: 32.    1     0.0    0.0    4.0       psubd	%xmm2, %xmm2
+# CHECK-NEXT: 33.    1     0.0    0.0    4.0       psubq	%xmm2, %xmm2
+# CHECK-NEXT: 34.    1     0.0    0.0    4.0       psubw	%xmm2, %xmm2
+# CHECK-NEXT: 35.    1     0.0    0.0    4.0       vpsubb	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 36.    1     0.0    0.0    3.0       vpsubd	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 37.    1     0.0    0.0    3.0       vpsubq	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 38.    1     0.0    0.0    3.0       vpsubw	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39.    1     0.0    0.0    3.0       vpsubb	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40.    1     0.0    0.0    3.0       vpsubd	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41.    1     0.0    0.0    3.0       vpsubq	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42.    1     0.0    0.0    2.0       vpsubw	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43.    1     0.0    0.0    2.0       vpsubb	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44.    1     0.0    0.0    2.0       vpsubd	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45.    1     0.0    0.0    2.0       vpsubq	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46.    1     0.0    0.0    2.0       vpsubw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47.    1     0.0    0.0    2.0       vpsubb	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48.    1     0.0    0.0    1.0       vpsubd	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49.    1     0.0    0.0    1.0       vpsubq	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50.    1     0.0    0.0    1.0       vpsubw	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51.    1     0.0    0.0    1.0       vpsubb	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 52.    1     0.0    0.0    1.0       vpsubd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 53.    1     0.0    0.0    1.0       vpsubq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 54.    1     0.0    0.0    0.0       vpsubw	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 55.    1     0.0    0.0    0.0       vpsubb	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 56.    1     0.0    0.0    0.0       vpsubd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 57.    1     0.0    0.0    0.0       vpsubq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 58.    1     0.0    0.0    0.0       vpsubw	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 59.    1     0.0    0.0    0.0       vpsubb	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 60.    1     0.0    0.0    0.0       vpsubd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 61.    1     0.0    0.0    0.0       vpsubq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 62.    1     0.0    0.0    0.0       vpsubw	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 63.    1     0.0    0.0    0.0       vpsubb	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 64.    1     0.0    0.0    0.0       vpsubd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 65.    1     0.0    0.0    0.0       vpsubq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 66.    1     0.0    0.0    0.0       vpsubw	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 67.    1     0.0    0.0    0.0       vpsubb	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 68.    1     0.0    0.0    0.0       vpsubd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 69.    1     0.0    0.0    0.0       vpsubq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 70.    1     0.0    0.0    0.0       vpsubw	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 71.    1     0.0    0.0    0.0       vpsubb	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 72.    1     0.0    0.0    0.0       vpsubd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 73.    1     0.0    0.0    0.0       vpsubq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 74.    1     0.0    0.0    0.0       vpsubw	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 75.    1     1.0    1.0    0.0       andnps	%xmm0, %xmm0
+# CHECK-NEXT: 76.    1     1.0    1.0    0.0       andnpd	%xmm1, %xmm1
+# CHECK-NEXT: 77.    1     1.0    1.0    0.0       vandnps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 78.    1     1.0    0.0    0.0       vandnpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 79.    1     1.0    0.0    0.0       vandnps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 80.    1     2.0    0.0    0.0       vandnpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 81.    1     2.0    0.0    0.0       vandnps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: 82.    1     3.0    0.0    0.0       vandnpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: 83.    1     1.0    1.0    2.0       pandn	%mm2, %mm2
+# CHECK-NEXT: 84.    1     2.0    0.0    0.0       pandn	%xmm2, %xmm2
+# CHECK-NEXT: 85.    1     1.0    1.0    1.0       vpandn	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 86.    1     2.0    0.0    0.0       vpandn	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 87.    1     3.0    3.0    0.0       vpandnd	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 88.    1     4.0    0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 89.    1     5.0    0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 90.    1     5.0    0.0    0.0       vpandnq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 91.    1     6.0    0.0    0.0       vpandnd	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 92.    1     7.0    0.0    0.0       vpandnq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 93.    1     2.0    0.0    5.0       vandnps	%xmm2, %xmm2, %xmm5
+# CHECK-NEXT: 94.    1     2.0    0.0    5.0       vandnpd	%xmm1, %xmm1, %xmm5
+# CHECK-NEXT: 95.    1     3.0    1.0    4.0       vpandn	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 96.    1     2.0    1.0    4.0       vandnps	%ymm2, %ymm2, %ymm5
+# CHECK-NEXT: 97.    1     3.0    2.0    3.0       vandnpd	%ymm1, %ymm1, %ymm5
+# CHECK-NEXT: 98.    1     3.0    2.0    3.0       vpandn	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 99.    1     4.0    3.0    2.0       vandnps	%zmm2, %zmm2, %zmm5
+# CHECK-NEXT: 100.   1     4.0    3.0    2.0       vandnpd	%zmm1, %zmm1, %zmm5
+# CHECK-NEXT: 101.   1     7.0    0.0    0.0       vpandnd	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 102.   1     6.0    0.0    0.0       vpandnq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 103.   1     6.0    0.0    0.0       vpandnd	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 104.   1     7.0    1.0    0.0       vpandnq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 105.   1     7.0    1.0    0.0       vpandnd	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 106.   1     7.0    1.0    0.0       vpandnq	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 107.   1     0.0    0.0    8.0       xorps	%xmm0, %xmm0
+# CHECK-NEXT: 108.   1     0.0    0.0    7.0       xorpd	%xmm1, %xmm1
+# CHECK-NEXT: 109.   1     0.0    0.0    7.0       vxorps	%xmm2, %xmm2, %xmm2
+# CHECK-NEXT: 110.   1     0.0    0.0    7.0       vxorpd	%xmm1, %xmm1, %xmm1
+# CHECK-NEXT: 111.   1     0.0    0.0    7.0       vxorps	%ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 112.   1     0.0    0.0    7.0       vxorpd	%ymm1, %ymm1, %ymm1
+# CHECK-NEXT: 113.   1     0.0    0.0    7.0       vxorps	%zmm2, %zmm2, %zmm2
+# CHECK-NEXT: 114.   1     0.0    0.0    6.0       vxorpd	%zmm1, %zmm1, %zmm1
+# CHECK-NEXT: 115.   1     2.0    2.0    3.0       pxor	%mm2, %mm2
+# CHECK-NEXT: 116.   1     0.0    0.0    6.0       pxor	%xmm2, %xmm2
+# CHECK-NEXT: 117.   1     0.0    0.0    6.0       vpxor	%xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 118.   1     0.0    0.0    6.0       vpxor	%ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 119.   1     4.0    0.0    2.0       vpxord	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 120.   1     3.0    0.0    2.0       vpxorq	%xmm19, %xmm19, %xmm19
+# CHECK-NEXT: 121.   1     3.0    0.0    2.0       vpxord	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 122.   1     3.0    0.0    2.0       vpxorq	%ymm19, %ymm19, %ymm19
+# CHECK-NEXT: 123.   1     3.0    0.0    2.0       vpxord	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 124.   1     3.0    0.0    2.0       vpxorq	%zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 125.   1     0.0    0.0    5.0       vxorps	%xmm4, %xmm4, %xmm5
+# CHECK-NEXT: 126.   1     0.0    0.0    4.0       vxorpd	%xmm1, %xmm1, %xmm3
+# CHECK-NEXT: 127.   1     0.0    0.0    4.0       vxorps	%ymm4, %ymm4, %ymm5
+# CHECK-NEXT: 128.   1     0.0    0.0    4.0       vxorpd	%ymm1, %ymm1, %ymm3
+# CHECK-NEXT: 129.   1     0.0    0.0    4.0       vxorps	%zmm4, %zmm4, %zmm5
+# CHECK-NEXT: 130.   1     0.0    0.0    4.0       vxorpd	%zmm1, %zmm1, %zmm3
+# CHECK-NEXT: 131.   1     0.0    0.0    4.0       vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 132.   1     0.0    0.0    3.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 133.   1     1.0    0.0    2.0       vpxord	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 134.   1     1.0    0.0    2.0       vpxorq	%xmm19, %xmm19, %xmm21
+# CHECK-NEXT: 135.   1     1.0    0.0    2.0       vpxord	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 136.   1     1.0    0.0    2.0       vpxorq	%ymm19, %ymm19, %ymm21
+# CHECK-NEXT: 137.   1     1.0    0.0    2.0       vpxord	%zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 138.   1     0.0    0.0    2.0       vpxorq	%zmm19, %zmm19, %zmm21

From 46e5052b8e2de46473959797ae310c3801a7cf17 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 25 May 2019 06:17:47 +0000
Subject: [PATCH 0223/1176] [X86FixupLEAs] Turn optIncDec into a generic two
 address LEA optimizer. Support LEA64_32r properly.

INC/DEC is really a special case of a more generic issue. We should also turn leas into add reg/reg or add reg/imm regardless of the slow lea flags.

This also supports LEA64_32 which has 64 bit input registers and 32 bit output registers. So we need to convert the 64 bit inputs to their 32 bit equivalents to check if they are equal to base reg.

One thing to note, the original code preserved the kill flags by adding operands to the new instruction instead of using addReg. But I think tied operands aren't supposed to have the kill flag set. I dropped the kill flags, but I could probably try to preserve it in the add reg/reg case if we think its important. Not sure which operand its supposed to go on for the LEA64_32r instruction due to the super reg implicit uses. Though I'm also not sure those are needed since they were probably just created by an INSERT_SUBREG from a 32-bit input.

Differential Revision: https://reviews.llvm.org/D61472

llvm-svn: 361691
---
 llvm/lib/Target/X86/X86FixupLEAs.cpp          | 154 ++++++++++++------
 llvm/test/CodeGen/X86/GlobalISel/add-ext.ll   |  12 +-
 .../CodeGen/X86/GlobalISel/callingconv.ll     |   2 +-
 llvm/test/CodeGen/X86/GlobalISel/gep.ll       |  14 +-
 .../CodeGen/X86/GlobalISel/memop-scalar.ll    |   2 +-
 .../CodeGen/X86/MergeConsecutiveStores.ll     |   4 +-
 llvm/test/CodeGen/X86/atomic-unordered.ll     |   2 +-
 .../X86/avx512vl-intrinsics-upgrade.ll        |  12 +-
 llvm/test/CodeGen/X86/bitreverse.ll           |   4 +-
 llvm/test/CodeGen/X86/bswap_tree2.ll          |   2 +-
 .../CodeGen/X86/bypass-slow-division-32.ll    |   2 +-
 llvm/test/CodeGen/X86/combine-srem.ll         |   2 +-
 llvm/test/CodeGen/X86/dagcombine-shifts.ll    |   4 +-
 llvm/test/CodeGen/X86/fixup-bw-copy.ll        |   2 +-
 llvm/test/CodeGen/X86/fixup-lea.ll            |   4 +-
 llvm/test/CodeGen/X86/imul.ll                 |  10 +-
 llvm/test/CodeGen/X86/leaFixup32.mir          |   2 +-
 llvm/test/CodeGen/X86/leaFixup64.mir          |   4 +-
 llvm/test/CodeGen/X86/mul-constant-i16.ll     |   2 +-
 llvm/test/CodeGen/X86/mul-constant-i32.ll     |   4 +-
 llvm/test/CodeGen/X86/mul-constant-i64.ll     |  26 +--
 llvm/test/CodeGen/X86/mul-constant-i8.ll      |   2 +-
 llvm/test/CodeGen/X86/popcnt.ll               |   6 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |   2 +-
 llvm/test/CodeGen/X86/reverse_branches.ll     |   2 +-
 llvm/test/CodeGen/X86/rotate-extract.ll       |   4 +-
 llvm/test/CodeGen/X86/sat-add.ll              |   6 +-
 llvm/test/CodeGen/X86/twoaddr-lea.ll          |   2 +-
 llvm/test/CodeGen/X86/vector-bitreverse.ll    |   4 +-
 llvm/test/CodeGen/X86/win_coreclr_chkstk.ll   |   2 +-
 .../LoopStrengthReduce/X86/ivchain-X86.ll     |   8 +-
 31 files changed, 176 insertions(+), 132 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index c8ef92f35c223..91ab4f8ab24a9 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -8,7 +8,7 @@
 //
 // This file defines the pass that finds instructions that can be
 // re-written as LEA instructions in order to reduce pipeline delays.
-// When optimizing for size it replaces suitable LEAs with INC or DEC.
+// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
 //
 //===----------------------------------------------------------------------===//
 
@@ -70,10 +70,11 @@ class FixupLEAPass : public MachineFunctionPass {
   MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
                                           MachineBasicBlock &MBB);
 
-  /// Look for LEAs that add 1 to reg or subtract 1 from reg
-  /// and convert them to INC or DEC respectively.
-  bool fixupIncDec(MachineBasicBlock::iterator &I,
-                   MachineBasicBlock &MBB) const;
+  /// Look for LEAs that are really two address LEAs that we might be able to
+  /// turn into regular ADD instructions.
+  bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                     MachineBasicBlock &MBB, bool OptIncDec,
+                     bool UseLEAForSP) const;
 
   /// Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
@@ -114,7 +115,8 @@ class FixupLEAPass : public MachineFunctionPass {
 
 private:
   TargetSchedModel TSM;
-  const X86InstrInfo *TII; // Machine instruction info.
+  const X86InstrInfo *TII;
+  const X86RegisterInfo *TRI;
 };
 }
 
@@ -197,13 +199,11 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool LEAUsesAG = ST.LEAusesAG();
 
   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
-  bool OptLEA = LEAUsesAG || IsSlowLEA || IsSlow3OpsLEA;
-
-  if (!OptLEA && !OptIncDec)
-    return false;
+  bool UseLEAForSP = ST.useLeaForSP();
 
   TSM.init(&ST);
   TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
 
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   for (MachineBasicBlock &MBB : MF) {
@@ -212,7 +212,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
       if (!isLEA(I->getOpcode()))
         continue;
 
-      if (OptIncDec && fixupIncDec(I, MBB))
+      if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
         continue;
 
       if (IsSlowLEA) {
@@ -323,8 +323,8 @@ static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
-    return X86::ADD32rr;
   case X86::LEA64_32r:
+    return X86::ADD32rr;
   case X86::LEA64r:
     return X86::ADD64rr;
   }
@@ -344,48 +344,106 @@ static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
   }
 }
 
-/// isLEASimpleIncOrDec - Does this LEA have one these forms:
-/// lea  %reg, 1(%reg)
-/// lea  %reg, -1(%reg)
-static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
-  unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
-  unsigned DstReg = LEA.getOperand(0).getReg();
-  const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
-  return SrcReg == DstReg &&
-         LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
-         LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
-         AddrDisp.isImm() &&
-         (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
+static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA32r:
+  case X86::LEA64_32r:
+    return IsINC ? X86::INC32r : X86::DEC32r;
+  case X86::LEA64r:
+    return IsINC ? X86::INC64r : X86::DEC64r;
+  }
 }
 
-bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
-                               MachineBasicBlock &MBB) const {
+bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                                 MachineBasicBlock &MBB, bool OptIncDec,
+                                 bool UseLEAForSP) const {
   MachineInstr &MI = *I;
 
-  if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(MBB, I)) {
-    unsigned NewOpcode;
-    bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
-    switch (MI.getOpcode()) {
-    default:
-      llvm_unreachable("Unexpected LEA instruction");
-    case X86::LEA32r:
-    case X86::LEA64_32r:
-      NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
-      break;
-    case X86::LEA64r:
-      NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
-      break;
-    }
+  const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
+  const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
+  const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 
-    MachineInstr *NewMI =
-        BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(1 + X86::AddrBaseReg));
-    MBB.erase(I);
-    I = static_cast<MachineBasicBlock::iterator>(NewMI);
-    return true;
+  if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
+      !TII->isSafeToClobberEFLAGS(MBB, I))
+    return false;
+
+  unsigned DestReg  = MI.getOperand(0).getReg();
+  unsigned BaseReg  = Base.getReg();
+  unsigned IndexReg = Index.getReg();
+
+  // Don't change stack adjustment LEAs.
+  if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
+    return false;
+
+  // LEA64_32 has 64-bit operands but 32-bit result.
+  if (MI.getOpcode() == X86::LEA64_32r) {
+    if (BaseReg != 0)
+      BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+    if (IndexReg != 0)
+      IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }
-  return false;
+
+  MachineInstr *NewMI = nullptr;
+
+  // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
+  // which can be turned into add %reg2, %reg1
+  if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
+      (DestReg == BaseReg || DestReg == IndexReg)) {
+    unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
+    if (DestReg != BaseReg)
+      std::swap(BaseReg, IndexReg);
+
+    if (MI.getOpcode() == X86::LEA64_32r) {
+      // TODO: Do we need the super register implicit use?
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg)
+        .addReg(Base.getReg(), RegState::Implicit)
+        .addReg(Index.getReg(), RegState::Implicit);
+    } else {
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg);
+    }
+  } else if (DestReg == BaseReg && IndexReg == 0) {
+    // This is an LEA with only a base register and a displacement,
+    // We can use ADDri or INC/DEC.
+
+    // Does this LEA have one these forms:
+    // lea  %reg, 1(%reg)
+    // lea  %reg, -1(%reg)
+    if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
+      bool IsINC = Disp.getImm() == 1;
+      unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
+
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg);
+      }
+    } else {
+      unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm())
+          .addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm());
+      }
+    }
+  } else
+    return false;
+
+  MBB.erase(I);
+  I = NewMI;
+  return true;
 }
 
 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll b/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll
index da5a6f182ccf3..bf081b355ad4c 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll
@@ -79,7 +79,7 @@ define i8* @gep8(i32 %i, i8* %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addl $5, %edi
 ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax), %rax
+; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    retq
 
   %add = add nsw i32 %i, 5
@@ -166,16 +166,16 @@ define void @PR20134(i32* %a, i32 %i) {
 ; CHECK-NEXT:    cltq
 ; CHECK-NEXT:    movq $4, %rcx
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    leal 2(%rsi), %edx
 ; CHECK-NEXT:    movslq %edx, %rdx
 ; CHECK-NEXT:    imulq %rcx, %rdx
-; CHECK-NEXT:    leaq (%rdi,%rdx), %rdx
+; CHECK-NEXT:    addq %rdi, %rdx
 ; CHECK-NEXT:    movl (%rdx), %edx
 ; CHECK-NEXT:    addl (%rax), %edx
 ; CHECK-NEXT:    movslq %esi, %rax
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    movl %edx, (%rax)
 ; CHECK-NEXT:    retq
 
@@ -204,10 +204,10 @@ define void @PR20134_zext(i32* %a, i32 %i) {
 ; CHECK-NEXT:    leal 1(%rsi), %eax
 ; CHECK-NEXT:    movq $4, %rcx
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    leal 2(%rsi), %edx
 ; CHECK-NEXT:    imulq %rcx, %rdx
-; CHECK-NEXT:    leaq (%rdi,%rdx), %rdx
+; CHECK-NEXT:    addq %rdi, %rdx
 ; CHECK-NEXT:    movl (%rdx), %edx
 ; CHECK-NEXT:    addl (%rax), %edx
 ; CHECK-NEXT:    imulq %rcx, %rsi
diff --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
index 33e16893473c7..b8deacdb19b39 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -409,7 +409,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
 ; X32-NEXT:    movl 4(%ecx), %ecx
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl $4, %eax
-; X32-NEXT:    leal (%esp,%eax), %eax
+; X32-NEXT:    addl %esp, %eax
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    calll variadic_callee
diff --git a/llvm/test/CodeGen/X86/GlobalISel/gep.ll b/llvm/test/CodeGen/X86/GlobalISel/gep.ll
index e0e40810af202..20047fd7b0813 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/gep.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/gep.ll
@@ -12,7 +12,7 @@ define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
 ; X64_GISEL-NEXT:    sarq %cl, %rsi
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8:
@@ -29,7 +29,7 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i8_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8_const:
@@ -50,7 +50,7 @@ define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
 ; X64_GISEL-NEXT:    sarq %cl, %rsi
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16:
@@ -67,7 +67,7 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i16_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16_const:
@@ -100,7 +100,7 @@ define i32* @test_gep_i32_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i32_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32_const:
@@ -116,7 +116,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) {
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64:
@@ -131,7 +131,7 @@ define i32* @test_gep_i64_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i64_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64_const:
diff --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
index 089263359ce97..b98d7ca38f9ca 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
@@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) {
 ; ALL-LABEL: test_gep_folding_largeGepIndex:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    movabsq $228719476720, %rax # imm = 0x3540BE3FF0
-; ALL-NEXT:    leaq (%rdi,%rax), %rax
+; ALL-NEXT:    addq %rdi, %rax
 ; ALL-NEXT:    movl %esi, (%rax)
 ; ALL-NEXT:    movl (%rax), %eax
 ; ALL-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
index edec3fdd7f5a4..de74c8055834a 100644
--- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -632,7 +632,7 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
 ; BWON-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; BWON-NEXT:    movsbq (%rdi,%rcx), %rax
 ; BWON-NEXT:    movzbl (%rdx,%rax), %r9d
-; BWON-NEXT:    leal 1(%rax), %eax
+; BWON-NEXT:    incl %eax
 ; BWON-NEXT:    movsbq %al, %rax
 ; BWON-NEXT:    movzbl (%rdx,%rax), %eax
 ; BWON-NEXT:    movb %r9b, (%rsi,%rcx,2)
@@ -651,7 +651,7 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
 ; BWOFF-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; BWOFF-NEXT:    movsbq (%rdi,%rcx), %rax
 ; BWOFF-NEXT:    movb (%rdx,%rax), %r9b
-; BWOFF-NEXT:    leal 1(%rax), %eax
+; BWOFF-NEXT:    incl %eax
 ; BWOFF-NEXT:    movsbq %al, %rax
 ; BWOFF-NEXT:    movb (%rdx,%rax), %al
 ; BWOFF-NEXT:    movb %r9b, (%rsi,%rcx,2)
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index 316f124c79b5d..567e8b47c4ba6 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -772,7 +772,7 @@ define i64 @load_fold_sdiv1(i64* %p) {
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    shrq $63, %rax
 ; CHECK-O3-NEXT:    sarq $3, %rdx
-; CHECK-O3-NEXT:    leaq (%rdx,%rax), %rax
+; CHECK-O3-NEXT:    addq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = sdiv i64 %v, 15
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 0159d9196daa7..fa82fe6a9c1c7 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -9268,7 +9268,7 @@ define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
@@ -9327,7 +9327,7 @@ define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
@@ -9359,7 +9359,7 @@ define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
@@ -9391,7 +9391,7 @@ define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
@@ -9450,7 +9450,7 @@ define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
@@ -9482,7 +9482,7 @@ define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2
 ; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    leal (%rcx,%rax), %eax # encoding: [0x8d,0x04,0x01]
+; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll
index 23056f9e802a5..5dd2b36bebd04 100644
--- a/llvm/test/CodeGen/X86/bitreverse.ll
+++ b/llvm/test/CodeGen/X86/bitreverse.ll
@@ -347,7 +347,7 @@ define i8 @test_bitreverse_i8(i8 %a) {
 ; X64-NEXT:    addb %al, %al
 ; X64-NEXT:    andb $-86, %dil
 ; X64-NEXT:    shrb %dil
-; X64-NEXT:    leal (%rdi,%rax), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %b = call i8 @llvm.bitreverse.i8(i8 %a)
@@ -391,7 +391,7 @@ define i4 @test_bitreverse_i4(i4 %a) {
 ; X64-NEXT:    addb %al, %al
 ; X64-NEXT:    andb $-96, %dil
 ; X64-NEXT:    shrb %dil
-; X64-NEXT:    leal (%rdi,%rax), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    shrb $4, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/bswap_tree2.ll b/llvm/test/CodeGen/X86/bswap_tree2.ll
index 3f64d6f94d804..7e06c8a11b81b 100644
--- a/llvm/test/CodeGen/X86/bswap_tree2.ll
+++ b/llvm/test/CodeGen/X86/bswap_tree2.ll
@@ -81,7 +81,7 @@ define i32 @test2(i32 %x) nounwind {
 ; CHECK64-NEXT:    andl $-16777216, %edi # imm = 0xFF000000
 ; CHECK64-NEXT:    andl $16711680, %eax # imm = 0xFF0000
 ; CHECK64-NEXT:    orl %edi, %eax
-; CHECK64-NEXT:    leal (%rax,%rcx), %eax
+; CHECK64-NEXT:    addl %ecx, %eax
 ; CHECK64-NEXT:    retq
   %byte1 = lshr i32 %x, 8
   %byte0 = shl  i32 %x, 8
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-32.ll b/llvm/test/CodeGen/X86/bypass-slow-division-32.ll
index 1533a393cfbff..66aacf19cb8d5 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-32.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-32.ll
@@ -143,7 +143,7 @@ define i32 @Test_use_div_reg_imm(i32 %a) nounwind {
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    shrl $31, %eax
 ; CHECK-NEXT:    sarl $3, %edx
-; CHECK-NEXT:    leal (%edx,%eax), %eax
+; CHECK-NEXT:    addl %edx, %eax
 ; CHECK-NEXT:    retl
   %resultdiv = sdiv i32 %a, 33
   ret i32 %resultdiv
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index 4878d708e4815..ef338b371091b 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -61,7 +61,7 @@ define i32 @combine_srem_by_minsigned(i32 %x) {
 ; CHECK-NEXT:    shrl %eax
 ; CHECK-NEXT:    addl %edi, %eax
 ; CHECK-NEXT:    andl $-2147483648, %eax # imm = 0x80000000
-; CHECK-NEXT:    leal (%rax,%rdi), %eax
+; CHECK-NEXT:    addl %edi, %eax
 ; CHECK-NEXT:    retq
   %1 = srem i32 %x, -2147483648
   ret i32 %1
diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
index d650bd18eafda..d8996251e9aa8 100644
--- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
@@ -161,7 +161,7 @@ define i64 @fun11(i16 zeroext %v) {
 ; CHECK-NEXT:    shrl $4, %edi
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    leaq (%rax,%rdi), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %shr = lshr i16 %v, 4
@@ -178,7 +178,7 @@ define i64 @fun12(i32 zeroext %v) {
 ; CHECK-NEXT:    shrl $4, %edi
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    shlq $4, %rax
-; CHECK-NEXT:    leaq (%rax,%rdi), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %shr = lshr i32 %v, 4
diff --git a/llvm/test/CodeGen/X86/fixup-bw-copy.ll b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
index 9e434ef7333bb..ed15ec3b8a925 100644
--- a/llvm/test/CodeGen/X86/fixup-bw-copy.ll
+++ b/llvm/test/CodeGen/X86/fixup-bw-copy.ll
@@ -46,7 +46,7 @@ define i8 @test_movb_hreg(i16 %a0) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrl $8, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/fixup-lea.ll b/llvm/test/CodeGen/X86/fixup-lea.ll
index da9a1613fef85..8d8a1cd19f052 100644
--- a/llvm/test/CodeGen/X86/fixup-lea.ll
+++ b/llvm/test/CodeGen/X86/fixup-lea.ll
@@ -129,7 +129,7 @@ define void @foo_nosize(i32 inreg %dns) {
 ; FAST-NEXT:  .LBB4_1: # %for.body
 ; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
 ; FAST-NEXT:    movzwl %cx, %edx
-; FAST-NEXT:    leal -1(%ecx), %ecx
+; FAST-NEXT:    addl $-1, %ecx
 ; FAST-NEXT:    cmpl %eax, %edx
 ; FAST-NEXT:    jl .LBB4_1
 ; FAST-NEXT:  # %bb.2: # %for.end
@@ -169,7 +169,7 @@ define void @bar_nosize(i32 inreg %dns) {
 ; FAST-NEXT:  .LBB5_1: # %for.body
 ; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
 ; FAST-NEXT:    movzwl %cx, %edx
-; FAST-NEXT:    leal 1(%ecx), %ecx
+; FAST-NEXT:    addl $1, %ecx
 ; FAST-NEXT:    cmpl %eax, %edx
 ; FAST-NEXT:    jl .LBB5_1
 ; FAST-NEXT:  # %bb.2: # %for.end
diff --git a/llvm/test/CodeGen/X86/imul.ll b/llvm/test/CodeGen/X86/imul.ll
index d3ec8e975a1d7..450b19142dab1 100644
--- a/llvm/test/CodeGen/X86/imul.ll
+++ b/llvm/test/CodeGen/X86/imul.ll
@@ -220,7 +220,7 @@ define i32 @mul33_32(i32 %A) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: mul33_32:
@@ -349,7 +349,7 @@ define i32 @test2(i32 %a) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test2:
@@ -370,7 +370,7 @@ define i32 @test3(i32 %a) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    negl %eax
 ; X64-NEXT:    retq
 ;
@@ -448,7 +448,7 @@ define i64 @test6(i64 %a) {
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shlq $5, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
+; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test6:
@@ -471,7 +471,7 @@ define i64 @test7(i64 %a) {
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shlq $5, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
+; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    negq %rax
 ; X64-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/leaFixup32.mir b/llvm/test/CodeGen/X86/leaFixup32.mir
index 5928575306fa9..6d57cf2d97764 100644
--- a/llvm/test/CodeGen/X86/leaFixup32.mir
+++ b/llvm/test/CodeGen/X86/leaFixup32.mir
@@ -174,7 +174,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $eax, $ebp
-    ; CHECK: $ebp = ADD32rr $ebp, killed $eax
+    ; CHECK: $ebp = ADD32rr $ebp, $eax
  
     $ebp = LEA32r killed $ebp, 1, killed $eax, 0, $noreg
     RETQ $ebp
diff --git a/llvm/test/CodeGen/X86/leaFixup64.mir b/llvm/test/CodeGen/X86/leaFixup64.mir
index dccb99661f0c7..fa738adfd0656 100644
--- a/llvm/test/CodeGen/X86/leaFixup64.mir
+++ b/llvm/test/CodeGen/X86/leaFixup64.mir
@@ -247,7 +247,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $ebp = LEA64_32r killed $rax, 1, killed $rbp, 0
+    ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax
  
     $ebp = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg
     RETQ $ebp
@@ -351,7 +351,7 @@ frameInfo:
 body:             |
   bb.0 (%ir-block.0):
     liveins: $rax, $rbp
-    ; CHECK: $rbp = ADD64rr $rbp, killed $rax
+    ; CHECK: $rbp = ADD64rr $rbp, $rax
  
     $rbp = LEA64r killed $rbp, 1, killed $rax, 0, $noreg
     RETQ $ebp
diff --git a/llvm/test/CodeGen/X86/mul-constant-i16.ll b/llvm/test/CodeGen/X86/mul-constant-i16.ll
index c2950cda510ef..f127a0936e5ec 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i16.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i16.ll
@@ -321,7 +321,7 @@ define i16 @test_mul_by_17(i16 %x) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %mul = mul nsw i16 %x, 17
diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll
index 3b2abf8c1f351..cd23c6424efb8 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i32.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll
@@ -490,7 +490,7 @@ define i32 @test_mul_by_17(i32 %x) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-NOOPT-LABEL: test_mul_by_17:
@@ -1183,7 +1183,7 @@ define i32 @test_mul_by_66(i32 %x) {
 ; X64-SLM-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-SLM-NEXT:    movl %edi, %eax
 ; X64-SLM-NEXT:    shll $6, %eax
-; X64-SLM-NEXT:    leal (%rax,%rdi), %eax
+; X64-SLM-NEXT:    addl %edi, %eax
 ; X64-SLM-NEXT:    addl %edi, %eax
 ; X64-SLM-NEXT:    retq
   %mul = mul nsw i32 %x, 66
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 4dd94de46367e..c875e084f4861 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -515,26 +515,12 @@ define i64 @test_mul_by_17(i64 %x) {
 ; X86-NOOPT-NEXT:    addl %ecx, %edx
 ; X86-NOOPT-NEXT:    retl
 ;
-; X64-HSW-LABEL: test_mul_by_17:
-; X64-HSW:       # %bb.0:
-; X64-HSW-NEXT:    movq %rdi, %rax
-; X64-HSW-NEXT:    shlq $4, %rax
-; X64-HSW-NEXT:    leaq (%rax,%rdi), %rax
-; X64-HSW-NEXT:    retq
-;
-; X64-JAG-LABEL: test_mul_by_17:
-; X64-JAG:       # %bb.0:
-; X64-JAG-NEXT:    movq %rdi, %rax
-; X64-JAG-NEXT:    shlq $4, %rax
-; X64-JAG-NEXT:    leaq (%rax,%rdi), %rax
-; X64-JAG-NEXT:    retq
-;
-; X64-SLM-LABEL: test_mul_by_17:
-; X64-SLM:       # %bb.0:
-; X64-SLM-NEXT:    movq %rdi, %rax
-; X64-SLM-NEXT:    shlq $4, %rax
-; X64-SLM-NEXT:    addq %rdi, %rax
-; X64-SLM-NEXT:    retq
+; X64-OPT-LABEL: test_mul_by_17:
+; X64-OPT:       # %bb.0:
+; X64-OPT-NEXT:    movq %rdi, %rax
+; X64-OPT-NEXT:    shlq $4, %rax
+; X64-OPT-NEXT:    addq %rdi, %rax
+; X64-OPT-NEXT:    retq
 ;
 ; X64-NOOPT-LABEL: test_mul_by_17:
 ; X64-NOOPT:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/mul-constant-i8.ll b/llvm/test/CodeGen/X86/mul-constant-i8.ll
index 5a33888e0a5ad..7cb245a2eeef8 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i8.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i8.ll
@@ -191,7 +191,7 @@ define i8 @test_mul_by_17(i8 %x) {
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %m = mul i8 %x, 17
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 46a56e93a7012..5c4f07e782c7b 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -39,7 +39,7 @@ define i8 @cnt8(i8 %x) nounwind readnone {
 ; X64-NEXT:    addb %al, %dil
 ; X64-NEXT:    movl %edi, %eax
 ; X64-NEXT:    shrb $4, %al
-; X64-NEXT:    leal (%rax,%rdi), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    andb $15, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -227,7 +227,7 @@ define i64 @cnt64(i64 %x) nounwind readnone {
 ; X64-NEXT:    addq %rcx, %rdi
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
+; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
 ; X64-NEXT:    andq %rax, %rcx
 ; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
@@ -347,7 +347,7 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat  {
 ; X64-NEXT:    addq %rcx, %rdi
 ; X64-NEXT:    movq %rdi, %rax
 ; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
+; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
 ; X64-NEXT:    andq %rax, %rcx
 ; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 449d23204f43c..c9a577dbaa92b 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -166,7 +166,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; CHECK-NEXT:    leal -324(%rax), %eax
+; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll
index a0ca990415c4c..9f51a6313cca4 100644
--- a/llvm/test/CodeGen/X86/reverse_branches.ll
+++ b/llvm/test/CodeGen/X86/reverse_branches.ll
@@ -62,7 +62,7 @@ define i32 @test_branches_order() uwtable ssp {
 ; CHECK-NEXT:    ## in Loop: Header=BB0_3 Depth=2
 ; CHECK-NEXT:    addq $1002, %rbp ## imm = 0x3EA
 ; CHECK-NEXT:    movq %rbx, %rdi
-; CHECK-NEXT:    leaq 1001(%rbx), %rbx
+; CHECK-NEXT:    addq $1001, %rbx ## imm = 0x3E9
 ; CHECK-NEXT:    movl $1000, %edx ## imm = 0x3E8
 ; CHECK-NEXT:    movl $120, %esi
 ; CHECK-NEXT:    callq _memchr
diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index a1babd1d3cc38..e5228d271e940 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -156,7 +156,7 @@ define i64 @no_extract_shl(i64 %i) nounwind {
 ; X64-NEXT:    shlq $5, %rax
 ; X64-NEXT:    shlq $10, %rdi
 ; X64-NEXT:    shrq $57, %rax
-; X64-NEXT:    leaq (%rax,%rdi), %rax
+; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
   %lhs_mul = shl i64 %i, 5
   %rhs_mul = shl i64 %i, 10
@@ -184,7 +184,7 @@ define i32 @no_extract_shrl(i32 %i) nounwind {
 ; X64-NEXT:    andl $-8, %eax
 ; X64-NEXT:    shll $25, %eax
 ; X64-NEXT:    shrl $9, %edi
-; X64-NEXT:    leal (%rdi,%rax), %eax
+; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
   %lhs_div = lshr i32 %i, 3
   %rhs_div = lshr i32 %i, 9
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
index 63c78fa7c72ce..70d102667ff6e 100644
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -236,7 +236,7 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
 ; ANY-NEXT:    notl %eax
 ; ANY-NEXT:    cmpw %ax, %di
 ; ANY-NEXT:    cmovbl %edi, %eax
-; ANY-NEXT:    leal (%rax,%rsi), %eax
+; ANY-NEXT:    addl %esi, %eax
 ; ANY-NEXT:    # kill: def $ax killed $ax killed $eax
 ; ANY-NEXT:    retq
   %noty = xor i16 %y, -1
@@ -287,7 +287,7 @@ define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) {
 ; ANY-NEXT:    notl %eax
 ; ANY-NEXT:    cmpl %eax, %edi
 ; ANY-NEXT:    cmovbl %edi, %eax
-; ANY-NEXT:    leal (%rax,%rsi), %eax
+; ANY-NEXT:    addl %esi, %eax
 ; ANY-NEXT:    retq
   %noty = xor i32 %y, -1
   %c = icmp ult i32 %x, %noty
@@ -334,7 +334,7 @@ define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) {
 ; ANY-NEXT:    notq %rax
 ; ANY-NEXT:    cmpq %rax, %rdi
 ; ANY-NEXT:    cmovbq %rdi, %rax
-; ANY-NEXT:    leaq (%rax,%rsi), %rax
+; ANY-NEXT:    addq %rsi, %rax
 ; ANY-NEXT:    retq
   %noty = xor i64 %y, -1
   %c = icmp ult i64 %x, %noty
diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll
index fdcd99adef531..077cf805bcb15 100644
--- a/llvm/test/CodeGen/X86/twoaddr-lea.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll
@@ -69,7 +69,7 @@ bb2:
 
 bb3:
 ; CHECK: subl %e[[REG0:[a-z0-9]+]],
-; CHECK: leaq 4({{%[a-z0-9]+}}), %r[[REG0]]
+; CHECK: addq $4, %r[[REG0]]
   %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ]
   %tmp15 = add nuw i64 %tmp14, 4
   %tmp16 = trunc i64 %tmp14 to i32
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index bfbfda08cd0eb..e152785b08f6d 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -27,7 +27,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ; SSE-NEXT:    addb %al, %al
 ; SSE-NEXT:    andb $-86, %dil
 ; SSE-NEXT:    shrb %dil
-; SSE-NEXT:    leal (%rdi,%rax), %eax
+; SSE-NEXT:    addl %edi, %eax
 ; SSE-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE-NEXT:    retq
 ;
@@ -46,7 +46,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
 ; AVX-NEXT:    addb %al, %al
 ; AVX-NEXT:    andb $-86, %dil
 ; AVX-NEXT:    shrb %dil
-; AVX-NEXT:    leal (%rdi,%rax), %eax
+; AVX-NEXT:    addl %edi, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/win_coreclr_chkstk.ll b/llvm/test/CodeGen/X86/win_coreclr_chkstk.ll
index 54789dc32d25e..86aa295b7c87f 100644
--- a/llvm/test/CodeGen/X86/win_coreclr_chkstk.ll
+++ b/llvm/test/CodeGen/X86/win_coreclr_chkstk.ll
@@ -21,7 +21,7 @@ entry:
 ; WIN_X64:# %bb.1:
 ; WIN_X64:	andq	$-4096, %rdx
 ; WIN_X64:.LBB0_2:
-; WIN_X64:	leaq	-4096(%rcx), %rcx
+; WIN_X64:	addq	$-4096, %rcx
 ; WIN_X64:	movb	$0, (%rcx)
 ; WIN_X64:	cmpq	%rcx, %rdx
 ; WIN_X64:	jne	.LBB0_2
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index 0be39d3814a9c..c17f4a8a233bd 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -243,14 +243,14 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X32-NEXT:    movl (%ebx,%esi), %ebp
 ; X32-NEXT:    addl (%ebx), %ebp
-; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    addl (%esi,%ebx), %ebp
-; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    addl (%esi,%ebx), %ebp
-; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    addl (%esi,%ebx), %ebp
 ; X32-NEXT:    movl %ebp, (%edx)
-; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl %esi, %ebx
 ; X32-NEXT:    addl %edi, %ebx
 ; X32-NEXT:    addl %ecx, %edx
 ; X32-NEXT:    decl %eax

From 17367b0d895d9d886ea8cc647c8f732f41ce3d7a Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 09:53:37 +0000
Subject: [PATCH 0224/1176] [LVI] Extract helper for binary range calculations;
 NFC

llvm-svn: 361692
---
 llvm/lib/Analysis/LazyValueInfo.cpp | 51 ++++++++++++++++-------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 4f6a344170122..fab2bad16fff8 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -422,6 +422,10 @@ namespace {
                              BasicBlock *BB);
   Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
                                              BasicBlock *BB);
+  bool solveBlockValueBinaryOpImpl(
+      ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+      std::function<ConstantRange(const ConstantRange &,
+                                  const ConstantRange &)> OpFn);
   bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
                                BasicBlock *BB);
   bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
@@ -1040,6 +1044,26 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
   return true;
 }
 
+bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
+    ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+    std::function<ConstantRange(const ConstantRange &,
+                                const ConstantRange &)> OpFn) {
+  // Figure out the ranges of the operands.  If that fails, use a
+  // conservative range, but apply the transfer rule anyways.  This
+  // lets us pick up facts from expressions like "and i32 (call i32
+  // @foo()), 32"
+  Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB);
+  Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB);
+  if (!LHSRes.hasValue() || !RHSRes.hasValue())
+    // More work to do before applying this transfer rule.
+    return false;
+
+  ConstantRange LHSRange = LHSRes.getValue();
+  ConstantRange RHSRange = RHSRes.getValue();
+  BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+  return true;
+}
+
 bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
                                                 BinaryOperator *BO,
                                                 BasicBlock *BB) {
@@ -1060,8 +1084,10 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
   case Instruction::AShr:
   case Instruction::And:
   case Instruction::Or:
-    // continue into the code below
-    break;
+    return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
+        [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.binaryOp(BO->getOpcode(), CR2);
+        });
   default:
     // Unhandled instructions are overdefined.
     LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -1069,27 +1095,6 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
     BBLV = ValueLatticeElement::getOverdefined();
     return true;
   };
-
-  // Figure out the ranges of the operands.  If that fails, use a
-  // conservative range, but apply the transfer rule anyways.  This
-  // lets us pick up facts from expressions like "and i32 (call i32
-  // @foo()), 32"
-  Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
-  Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
-
-  if (!LHSRes.hasValue() || !RHSRes.hasValue())
-    // More work to do before applying this transfer rule.
-    return false;
-
-  ConstantRange LHSRange = LHSRes.getValue();
-  ConstantRange RHSRange = RHSRes.getValue();
-
-  // NOTE: We're currently limited by the set of operations that ConstantRange
-  // can evaluate symbolically.  Enhancing that set will allows us to analyze
-  // more definitions.
-  Instruction::BinaryOps BinOp = BO->getOpcode();
-  BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange));
-  return true;
 }
 
 static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,

From 024b18aca7c57c15aa3faf0e650c52b2f7e21ae5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 09:53:45 +0000
Subject: [PATCH 0225/1176] [LVI][CVP] Calculate with.overflow result range

In LVI, calculate the range of extractvalue(op.with.overflow(%x, %y), 0)
as the range of op(%x, %y). This is mainly useful in conjunction with
D60650: If the result of the operation is extracted in a branch guarded
against overflow, then the value of %x will be appropriately constrained
and the result range of the operation will be calculated taking that
into account.

Differential Revision: https://reviews.llvm.org/D60656

llvm-svn: 361693
---
 llvm/lib/Analysis/LazyValueInfo.cpp           | 15 +++++++++++++
 .../overflow_predicate.ll                     | 21 +++++++------------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index fab2bad16fff8..4feff1c776439 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -430,6 +430,8 @@ namespace {
                                BasicBlock *BB);
   bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
                            BasicBlock *BB);
+  bool solveBlockValueOverflowIntrinsic(
+      ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
   void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
                                                      ValueLatticeElement &BBLV,
                                                      Instruction *BBI);
@@ -642,6 +644,11 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
 
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
       return solveBlockValueBinaryOp(Res, BO, BB);
+
+    if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
+      if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+        if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+          return solveBlockValueOverflowIntrinsic(Res, WO, BB);
   }
 
   LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -1097,6 +1104,14 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
   };
 }
 
+bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
+    ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) {
+  return solveBlockValueBinaryOpImpl(BBLV, WO, BB,
+      [WO](const ConstantRange &CR1, const ConstantRange &CR2) {
+        return CR1.binaryOp(WO->getBinaryOp(), CR2);
+      });
+}
+
 static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
                                                      bool isTrueDest) {
   Value *LHS = ICI->getOperand(0);
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll
index e651237e65ae3..aff6e03553405 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll
@@ -470,8 +470,7 @@ define i1 @uadd_val(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp ugt i8 [[VAL]], 100
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp uge i8 [[VAL]], 100
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -506,8 +505,7 @@ define i1 @sadd_val(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[VAL]], -28
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp sge i8 [[VAL]], -28
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -542,8 +540,7 @@ define i1 @usub_val(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[VAL]], -101
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp ule i8 [[VAL]], -101
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -578,8 +575,7 @@ define i1 @ssub_val(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[VAL]], 27
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp sle i8 [[VAL]], 27
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -614,8 +610,7 @@ define i1 @umul_val(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp ult i8 [[VAL]], -6
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp ule i8 [[VAL]], -6
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -650,8 +645,7 @@ define i1 @smul_val_bound1(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp slt i8 [[VAL]], 120
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp sle i8 [[VAL]], 120
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
@@ -686,8 +680,7 @@ define i1 @smul_val_bound2(i8 %x, i1* %pc) {
 ; CHECK:       split:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sgt i8 [[VAL]], -120
 ; CHECK-NEXT:    store i1 [[C1]], i1* [[PC:%.*]]
-; CHECK-NEXT:    [[C2:%.*]] = icmp sge i8 [[VAL]], -120
-; CHECK-NEXT:    ret i1 [[C2]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       trap:
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable

From 9a33dc9fb82150f999c6fa02746c0f648fde1ce1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 09:53:51 +0000
Subject: [PATCH 0226/1176] [CVP] Add tests for saturating add/sub ranges; NFC

llvm-svn: 361694
---
 .../CorrelatedValuePropagation/basic.ll       | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
index b768ebd85acc9..19b149a31b2a9 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -986,3 +986,100 @@ exit1:
 exit2:
   ret i1 %cmp2
 }
+
+define i1 @uadd_sat_unknown(i32 %a) {
+; CHECK-LABEL: @uadd_sat_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[A:%.*]], i32 100)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp uge i32 [[VAL]], 100
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[VAL]], 100
+; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+entry:
+  %val = call i32 @llvm.uadd.sat.i32(i32 %a, i32 100)
+  %cmp1 = icmp uge i32 %val, 100
+  %cmp2 = icmp ugt i32 %val, 100
+  br i1 undef, label %exit1, label %exit2
+exit1:
+  ret i1 %cmp1
+exit2:
+  ret i1 %cmp2
+}
+
+define i1 @usub_sat_unknown(i32 %a) {
+; CHECK-LABEL: @usub_sat_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A:%.*]], i32 100)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ule i32 [[VAL]], -101
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[VAL]], -101
+; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+entry:
+  %val = call i32 @llvm.usub.sat.i32(i32 %a, i32 100)
+  %cmp1 = icmp ule i32 %val, 4294967195
+  %cmp2 = icmp ult i32 %val, 4294967195
+  br i1 undef, label %exit1, label %exit2
+exit1:
+  ret i1 %cmp1
+exit2:
+  ret i1 %cmp2
+}
+
+define i1 @sadd_sat_unknown(i32 %a) {
+; CHECK-LABEL: @sadd_sat_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A:%.*]], i32 100)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[VAL]], -2147483548
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[VAL]], -2147483548
+; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+entry:
+  %val = call i32 @llvm.sadd.sat.i32(i32 %a, i32 100)
+  %cmp1 = icmp sge i32 %val, -2147483548
+  %cmp2 = icmp sgt i32 %val, -2147483548
+  br i1 undef, label %exit1, label %exit2
+exit1:
+  ret i1 %cmp1
+exit2:
+  ret i1 %cmp2
+}
+
+define i1 @ssub_sat_unknown(i32 %a) {
+; CHECK-LABEL: @ssub_sat_unknown(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[A:%.*]], i32 100)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sle i32 [[VAL]], 2147483547
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[VAL]], 2147483547
+; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; CHECK:       exit1:
+; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK:       exit2:
+; CHECK-NEXT:    ret i1 [[CMP2]]
+;
+entry:
+  %val = call i32 @llvm.ssub.sat.i32(i32 %a, i32 100)
+  %cmp1 = icmp sle i32 %val, 2147483547
+  %cmp2 = icmp slt i32 %val, 2147483547
+  br i1 undef, label %exit1, label %exit2
+exit1:
+  ret i1 %cmp1
+exit2:
+  ret i1 %cmp2
+}
+
+declare i32 @llvm.uadd.sat.i32(i32, i32)
+declare i32 @llvm.usub.sat.i32(i32, i32)
+declare i32 @llvm.sadd.sat.i32(i32, i32)
+declare i32 @llvm.ssub.sat.i32(i32, i32)

From bb76cf0f964d00d7673a3d673809618343f308cc Mon Sep 17 00:00:00 2001
From: David Bolvansky <david.bolvansky@gmail.com>
Date: Sat, 25 May 2019 13:11:22 +0000
Subject: [PATCH 0227/1176] [NFC] Update test checks

llvm-svn: 361695
---
 .../Analysis/ValueTracking/select-pattern.ll  |   1 +
 .../CallSiteSplitting/split-loop.ll           |  15 +-
 .../LoopVectorize/if-pred-stores.ll           | 518 +++++++++++++++---
 llvm/test/Transforms/SimplifyCFG/PR30210.ll   |  14 +-
 .../SimplifyCFG/UnreachableEliminate.ll       | 131 +++--
 .../SimplifyCFG/unreachable_assume.ll         |  49 ++
 6 files changed, 572 insertions(+), 156 deletions(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll

diff --git a/llvm/test/Analysis/ValueTracking/select-pattern.ll b/llvm/test/Analysis/ValueTracking/select-pattern.ll
index 455df00ef1218..4f19c292baedd 100644
--- a/llvm/test/Analysis/ValueTracking/select-pattern.ll
+++ b/llvm/test/Analysis/ValueTracking/select-pattern.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -simplifycfg < %s -S | FileCheck %s
 
 ; The dead code would cause a select that had itself
diff --git a/llvm/test/Transforms/CallSiteSplitting/split-loop.ll b/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
index 3e49a73627f5b..b64a072a5836e 100644
--- a/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
@@ -5,7 +5,6 @@ define i16 @test1() {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 undef, i16 1, i16 0
-; CHECK-NEXT:    call void @callee(i16 0)
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
 ; CHECK-NEXT:    call void @callee(i16 [[SPEC_SELECT]])
@@ -28,12 +27,11 @@ define i16 @test2() {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = select i1 undef, i16 1, i16 0
-; CHECK-NEXT:    call void @callee(i16 0)
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
+; CHECK-NEXT:    call void @callee(i16 [[S]])
 ; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[S]], 10
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i16 [[S]], 10
-; CHECK-NEXT:    call void @callee(i16 [[S]])
 ; CHECK-NEXT:    br label [[FOR_COND12]]
 ;
 entry:
@@ -55,15 +53,12 @@ define i16 @test3() {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = select i1 undef, i16 1, i16 0
-; CHECK-NEXT:    call void @callee(i16 0)
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
+; CHECK-NEXT:    call void @callee(i16 [[S]])
 ; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[S]], 10
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i16 [[ADD]], 10
-; CHECK-NEXT:    br i1 undef, label [[FOR_COND12_SPLIT:%.*]], label [[EXIT:%.*]]
-; CHECK:       for.cond12.split:
-; CHECK-NEXT:    call void @callee(i16 [[S]])
-; CHECK-NEXT:    br label [[FOR_COND12]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_COND12]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i16 [[ADD2]]
 ;
@@ -85,6 +80,4 @@ exit:
   ret i16 %add2
 }
 
-define internal void @callee(i16 %flag) {
-  ret void
-}
+declare void @callee(i16 %flag)
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 61c05d3154c62..8dd12f5d30c38 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=VEC
@@ -6,62 +7,166 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Test predication of stores.
 define i32 @test(i32* nocapture %f) #0 {
-entry:
-  br label %for.body
-
-; VEC-LABEL: test
-; VEC:   %[[v0:.+]] = add i64 %index, 0
-; VEC:   %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]]
-; VEC:   %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
-; VEC:   %[[v11:.+]] = extractelement <2 x i1> %[[v8]], i32 0
-; VEC:   br i1 %[[v11]], label %[[cond:.+]], label %[[else:.+]]
-;
-; VEC: [[cond]]:
-; VEC:   %[[v13:.+]] = extractelement <2 x i32> %wide.load, i32 0
-; VEC:   %[[v9a:.+]] = add nsw i32 %[[v13]], 20
-; VEC:   store i32 %[[v9a]], i32* %[[v2]], align 4
-; VEC:   br label %[[else:.+]]
-;
-; VEC: [[else]]:
-; VEC:   %[[v15:.+]] = extractelement <2 x i1> %[[v8]], i32 1
-; VEC:   br i1 %[[v15]], label %[[cond2:.+]], label %[[else2:.+]]
-;
-; VEC: [[cond2]]:
-; VEC:   %[[v17:.+]] = extractelement <2 x i32> %wide.load, i32 1
-; VEC:   %[[v9b:.+]] = add nsw i32 %[[v17]], 20
-; VEC:   %[[v1:.+]] = add i64 %index, 1
-; VEC:   %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]]
-; VEC:   store i32 %[[v9b]], i32* %[[v4]], align 4
-; VEC:   br label %[[else2:.+]]
+; UNROLL-LABEL: @test(
+; UNROLL-NEXT:  entry:
+; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; UNROLL:       vector.body:
+; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
+; UNROLL-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[INDUCTION]]
+; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDUCTION1]]
+; UNROLL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+; UNROLL-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; UNROLL-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 100
+; UNROLL-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 100
+; UNROLL-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL:       pred.store.if:
+; UNROLL-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP2]], 20
+; UNROLL-NEXT:    store i32 [[TMP6]], i32* [[TMP0]], align 4
+; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; UNROLL:       pred.store.continue:
+; UNROLL-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
+; UNROLL:       pred.store.if2:
+; UNROLL-NEXT:    [[TMP7:%.*]] = add nsw i32 [[TMP3]], 20
+; UNROLL-NEXT:    store i32 [[TMP7]], i32* [[TMP1]], align 4
+; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE3]]
+; UNROLL:       pred.store.continue3:
+; UNROLL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; UNROLL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; UNROLL-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; UNROLL:       middle.block:
+; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 128, 128
+; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
+; UNROLL:       for.body:
+; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ]
+; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
+; UNROLL-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; UNROLL-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100
+; UNROLL-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL:       if.then:
+; UNROLL-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP9]], 20
+; UNROLL-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; UNROLL-NEXT:    br label [[FOR_INC]]
+; UNROLL:       for.inc:
+; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
+; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
+; UNROLL:       for.end:
+; UNROLL-NEXT:    ret i32 0
 ;
-; VEC: [[else2]]:
-
-; UNROLL-LABEL: test
-; UNROLL: vector.body:
-; UNROLL:   %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
-; UNROLL:   %[[IND1:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 1
-; UNROLL:   %[[v0:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND]]
-; UNROLL:   %[[v1:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND1]]
-; UNROLL:   %[[v2:[a-zA-Z0-9]+]] = load i32, i32* %[[v0]], align 4
-; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32, i32* %[[v1]], align 4
-; UNROLL:   %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100
-; UNROLL:   %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
-; UNROLL:   br i1 %[[v4]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
+; UNROLL-NOSIMPLIFY-LABEL: @test(
+; UNROLL-NOSIMPLIFY-NEXT:  entry:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[VECTOR_BODY:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.body:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[INDUCTION]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDUCTION1]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 100
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 100
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL-NOSIMPLIFY:       pred.store.if:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP2]], 20
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 [[TMP6]], i32* [[TMP0]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
+; UNROLL-NOSIMPLIFY:       pred.store.if2:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP7:%.*]] = add nsw i32 [[TMP3]], 20
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 [[TMP7]], i32* [[TMP1]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE3]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue3:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; UNROLL-NOSIMPLIFY:       middle.block:
+; UNROLL-NOSIMPLIFY-NEXT:    [[CMP_N:%.*]] = icmp eq i64 128, 128
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; UNROLL-NOSIMPLIFY:       scalar.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_BODY:%.*]]
+; UNROLL-NOSIMPLIFY:       for.body:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL-NOSIMPLIFY:       if.then:
+; UNROLL-NOSIMPLIFY-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP9]], 20
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_INC]]
+; UNROLL-NOSIMPLIFY:       for.inc:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
+; UNROLL-NOSIMPLIFY:       for.end:
+; UNROLL-NOSIMPLIFY-NEXT:    ret i32 0
 ;
-; UNROLL: [[cond]]:
-; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
-; UNROLL:   store i32 %[[v6]], i32* %[[v0]], align 4
-; UNROLL:   br label %[[else]]
+; VEC-LABEL: @test(
+; VEC-NEXT:  entry:
+; VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
+; VEC:       vector.body:
+; VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
+; VEC-NEXT:    [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
+; VEC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[TMP0]]
+; VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; VEC-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
+; VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
+; VEC-NEXT:    [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
+; VEC-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; VEC-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; VEC:       pred.store.if:
+; VEC-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
+; VEC-NEXT:    [[TMP7:%.*]] = add nsw i32 [[TMP6]], 20
+; VEC-NEXT:    store i32 [[TMP7]], i32* [[TMP1]], align 4
+; VEC-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; VEC:       pred.store.continue:
+; VEC-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
+; VEC-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
+; VEC:       pred.store.if1:
+; VEC-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
+; VEC-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 20
+; VEC-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; VEC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[TMP11]]
+; VEC-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
+; VEC-NEXT:    br label [[PRED_STORE_CONTINUE2]]
+; VEC:       pred.store.continue2:
+; VEC-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; VEC-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; VEC-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; VEC:       middle.block:
+; VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 128, 128
+; VEC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
+; VEC:       for.body:
+; VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ]
+; VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
+; VEC-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; VEC-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100
+; VEC-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; VEC:       if.then:
+; VEC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], 20
+; VEC-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
+; VEC-NEXT:    br label [[FOR_INC]]
+; VEC:       for.inc:
+; VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; VEC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
+; VEC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
+; VEC:       for.end:
+; VEC-NEXT:    ret i32 0
 ;
-; UNROLL: [[else]]:
-; UNROLL:   br i1 %[[v5]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
-;
-; UNROLL: [[cond2]]:
-; UNROLL:   %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
-; UNROLL:   store i32 %[[v7]], i32* %[[v1]], align 4
-; UNROLL:   br label %[[else2]]
-;
-; UNROLL: [[else2]]:
+entry:
+  br label %for.body
+
+
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
@@ -89,11 +194,115 @@ for.end:
 ; vectorized loop body.
 ; PR18724
 
-; UNROLL-NOSIMPLIFY-LABEL: bug18724
-; UNROLL-NOSIMPLIFY: store i32
-; UNROLL-NOSIMPLIFY: store i32
-
 define void @bug18724() {
+; UNROLL-LABEL: @bug18724(
+; UNROLL-NEXT:  entry:
+; UNROLL-NEXT:    br label [[FOR_BODY14:%.*]]
+; UNROLL:       for.body14:
+; UNROLL-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; UNROLL-NEXT:    [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
+; UNROLL-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
+; UNROLL-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
+; UNROLL-NEXT:    br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
+; UNROLL:       if.then18:
+; UNROLL-NEXT:    store i32 2, i32* [[ARRAYIDX16]], align 4
+; UNROLL-NEXT:    [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
+; UNROLL-NEXT:    br label [[FOR_INC23]]
+; UNROLL:       for.inc23:
+; UNROLL-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
+; UNROLL-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; UNROLL-NEXT:    br label [[FOR_BODY14]]
+;
+; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
+; UNROLL-NOSIMPLIFY-NEXT:  entry:
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_BODY9:%.*]]
+; UNROLL-NOSIMPLIFY:       for.body9:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 undef, label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]]
+; UNROLL-NOSIMPLIFY:       for.body14.preheader:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[VECTOR_BODY:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.body:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL-NOSIMPLIFY:       pred.store.if:
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 2, i32* [[TMP0]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 undef, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
+; UNROLL-NOSIMPLIFY:       pred.store.if3:
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 2, i32* [[TMP1]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue4:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP4:%.*]] = add nsw i32 [[VEC_PHI]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP5:%.*]] = add nsw i32 [[VEC_PHI2]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP4]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[OFFSET_IDX6:%.*]] = add i64 undef, [[INDEX]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX6]] to i32
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION7:%.*]] = add i32 [[TMP6]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION8:%.*]] = add i32 [[TMP6]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3
+; UNROLL-NOSIMPLIFY:       middle.block:
+; UNROLL-NOSIMPLIFY-NEXT:    [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1, 0
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; UNROLL-NOSIMPLIFY:       scalar.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[FOR_BODY14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_BODY14:%.*]]
+; UNROLL-NOSIMPLIFY:       for.body14:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
+; UNROLL-NOSIMPLIFY:       if.then18:
+; UNROLL-NOSIMPLIFY-NEXT:    store i32 2, i32* [[ARRAYIDX16]], align 4
+; UNROLL-NOSIMPLIFY-NEXT:    [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_INC23]]
+; UNROLL-NOSIMPLIFY:       for.inc23:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
+; UNROLL-NOSIMPLIFY-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], !llvm.loop !4
+; UNROLL-NOSIMPLIFY:       for.inc26.loopexit:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_INC26]]
+; UNROLL-NOSIMPLIFY:       for.inc26:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INEWCHUNKS_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY9]] ], [ [[INEWCHUNKS_2_LCSSA]], [[FOR_INC26_LOOPEXIT]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    unreachable
+;
+; VEC-LABEL: @bug18724(
+; VEC-NEXT:  entry:
+; VEC-NEXT:    br label [[FOR_BODY14:%.*]]
+; VEC:       for.body14:
+; VEC-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; VEC-NEXT:    [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
+; VEC-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
+; VEC-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
+; VEC-NEXT:    br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
+; VEC:       if.then18:
+; VEC-NEXT:    store i32 2, i32* [[ARRAYIDX16]], align 4
+; VEC-NEXT:    [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
+; VEC-NEXT:    br label [[FOR_INC23]]
+; VEC:       for.inc23:
+; VEC-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
+; VEC-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; VEC-NEXT:    br label [[FOR_BODY14]]
+;
 entry:
   br label %for.body9
 
@@ -124,33 +333,184 @@ for.inc26:
   unreachable
 }
 
-; VEC-LABEL: @minimal_bit_widths(
-;
 ; In the test below, it's more profitable for the expression feeding the
 ; conditional store to remain scalar. Since we can only type-shrink vector
 ; types, we shouldn't try to represent the expression in a smaller type.
 ;
-; VEC: vector.body:
-; VEC:   %wide.load = load <2 x i8>, <2 x i8>* {{.*}}, align 1
-; VEC:   br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
-; VEC: [[IF0]]:
-; VEC:   %[[E0:.+]] = extractelement <2 x i8> %wide.load, i32 0
-; VEC:   %[[Z0:.+]] = zext i8 %[[E0]] to i32
-; VEC:   %[[T0:.+]] = trunc i32 %[[Z0]] to i8
-; VEC:   store i8 %[[T0]], i8* {{.*}}, align 1
-; VEC:   br label %[[CONT0]]
-; VEC: [[CONT0]]:
-; VEC:   br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
-; VEC: [[IF1]]:
-; VEC:   %[[E1:.+]] = extractelement <2 x i8> %wide.load, i32 1
-; VEC:   %[[Z1:.+]] = zext i8 %[[E1]] to i32
-; VEC:   %[[T1:.+]] = trunc i32 %[[Z1]] to i8
-; VEC:   store i8 %[[T1]], i8* {{.*}}, align 1
-; VEC:   br label %[[CONT1]]
-; VEC: [[CONT1]]:
-; VEC:   br i1 {{.*}}, label %middle.block, label %vector.body
-;
 define void @minimal_bit_widths(i1 %c) {
+; UNROLL-LABEL: @minimal_bit_widths(
+; UNROLL-NEXT:  entry:
+; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; UNROLL:       vector.body:
+; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]]
+; UNROLL-NEXT:    [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; UNROLL-NEXT:    [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1
+; UNROLL-NEXT:    br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6]]
+; UNROLL:       pred.store.if:
+; UNROLL-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]]
+; UNROLL-NEXT:    [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1
+; UNROLL-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; UNROLL-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; UNROLL-NEXT:    store i8 [[TMP3]], i8* [[TMP0]], align 1
+; UNROLL-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]]
+; UNROLL-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; UNROLL-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; UNROLL-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
+; UNROLL-NEXT:    store i8 [[TMP7]], i8* [[TMP4]], align 1
+; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; UNROLL:       pred.store.continue6:
+; UNROLL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; UNROLL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
+; UNROLL-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3
+; UNROLL:       middle.block:
+; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
+; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
+; UNROLL:       for.body:
+; UNROLL-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ]
+; UNROLL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ]
+; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; UNROLL-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1
+; UNROLL-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL:       if.then:
+; UNROLL-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
+; UNROLL-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
+; UNROLL-NEXT:    store i8 [[TMP5]], i8* [[TMP2]], align 1
+; UNROLL-NEXT:    br label [[FOR_INC]]
+; UNROLL:       for.inc:
+; UNROLL-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
+; UNROLL-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
+; UNROLL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
+; UNROLL-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !4
+; UNROLL:       for.end:
+; UNROLL-NEXT:    ret void
+;
+; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths(
+; UNROLL-NOSIMPLIFY-NEXT:  entry:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[VECTOR_BODY:%.*]]
+; UNROLL-NOSIMPLIFY:       vector.body:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; UNROLL-NOSIMPLIFY:       pred.store.if:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; UNROLL-NOSIMPLIFY-NEXT:    store i8 [[TMP3]], i8* [[TMP0]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue:
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[C]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; UNROLL-NOSIMPLIFY:       pred.store.if5:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8
+; UNROLL-NOSIMPLIFY-NEXT:    store i8 [[TMP7]], i8* [[TMP4]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; UNROLL-NOSIMPLIFY:       pred.store.continue6:
+; UNROLL-NOSIMPLIFY-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
+; UNROLL-NOSIMPLIFY:       middle.block:
+; UNROLL-NOSIMPLIFY-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; UNROLL-NOSIMPLIFY:       scalar.ph:
+; UNROLL-NOSIMPLIFY-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_BODY:%.*]]
+; UNROLL-NOSIMPLIFY:       for.body:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL-NOSIMPLIFY:       if.then:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
+; UNROLL-NOSIMPLIFY-NEXT:    store i8 [[TMP5]], i8* [[TMP2]], align 1
+; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_INC]]
+; UNROLL-NOSIMPLIFY:       for.inc:
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
+; UNROLL-NOSIMPLIFY-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !6
+; UNROLL-NOSIMPLIFY:       for.end:
+; UNROLL-NOSIMPLIFY-NEXT:    ret void
+;
+; VEC-LABEL: @minimal_bit_widths(
+; VEC-NEXT:  entry:
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i1> undef, i1 [[C:%.*]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT5]], <2 x i1> undef, <2 x i32> zeroinitializer
+; VEC-NEXT:    br label [[VECTOR_BODY:%.*]]
+; VEC:       vector.body:
+; VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
+; VEC-NEXT:    [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
+; VEC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]]
+; VEC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> undef, i64 [[OFFSET_IDX]], i32 0
+; VEC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> undef, <2 x i32> zeroinitializer
+; VEC-NEXT:    [[INDUCTION4:%.*]] = add <2 x i64> [[BROADCAST_SPLAT3]], <i64 0, i64 -1>
+; VEC-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
+; VEC-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; VEC-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0
+; VEC-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <2 x i8>*
+; VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, <2 x i8>* [[TMP4]], align 1
+; VEC-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT6]], i32 0
+; VEC-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; VEC:       pred.store.if:
+; VEC-NEXT:    [[TMP6:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0
+; VEC-NEXT:    [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; VEC-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
+; VEC-NEXT:    store i8 [[TMP8]], i8* [[TMP2]], align 1
+; VEC-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; VEC:       pred.store.continue:
+; VEC-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT6]], i32 1
+; VEC-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; VEC:       pred.store.if7:
+; VEC-NEXT:    [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1
+; VEC-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; VEC-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
+; VEC-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; VEC-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* undef, i64 [[TMP13]]
+; VEC-NEXT:    store i8 [[TMP12]], i8* [[TMP14]], align 1
+; VEC-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; VEC:       pred.store.continue8:
+; VEC-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
+; VEC-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
+; VEC-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
+; VEC:       middle.block:
+; VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
+; VEC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
+; VEC:       for.body:
+; VEC-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ]
+; VEC-NEXT:    [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ]
+; VEC-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; VEC-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1
+; VEC-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; VEC:       if.then:
+; VEC-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
+; VEC-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
+; VEC-NEXT:    store i8 [[TMP5]], i8* [[TMP2]], align 1
+; VEC-NEXT:    br label [[FOR_INC]]
+; VEC:       for.inc:
+; VEC-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
+; VEC-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
+; VEC-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
+; VEC-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !5
+; VEC:       for.end:
+; VEC-NEXT:    ret void
+;
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/SimplifyCFG/PR30210.ll b/llvm/test/Transforms/SimplifyCFG/PR30210.ll
index a2aa825683150..d1b0a4cd4993c 100644
--- a/llvm/test/Transforms/SimplifyCFG/PR30210.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PR30210.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -5,6 +6,12 @@ target triple = "x86_64-unknown-linux-gnu"
 declare i32* @fn1(i32* returned)
 
 define i32 @test1(i1 %B) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND_US:%.*]]
+; CHECK:       for.cond.us:
+; CHECK-NEXT:    br label [[FOR_COND_US]]
+;
 entry:
   br label %for.cond.us
 
@@ -28,9 +35,4 @@ for.cond5:                                        ; preds = %for.cond5, %for.con
 for.end:                                          ; preds = %for.cond5
   %load = load i32, i32* %call, align 4
   br label %for.cond4
-}
-
-; CHECK-LABEL: define i32 @test1(
-; CHECK: br label %[[LABEL:.*]]
-; CHECK: [[LABEL]]:
-; CHECK: br label %[[LABEL]]
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index f994477d6ac2c..6bb38c3ed88e0 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -1,53 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test1(i1 %C, i1* %BP) {
 ; CHECK-LABEL: @test1(
-; CHECK: entry:
-; CHECK-NEXT: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
 entry:
-        br i1 %C, label %T, label %F
+  br i1 %C, label %T, label %F
 T:
-        store i1 %C, i1* %BP
-        unreachable
+  store i1 %C, i1* %BP
+  unreachable
 F:
-        ret void
+  ret void
 }
 
 define void @test2() personality i32 (...)* @__gxx_personality_v0 {
 ; CHECK-LABEL: @test2(
-; CHECK: entry:
-; CHECK-NEXT: call void @test2()
-; CHECK-NEXT: ret void
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @test2()
+; CHECK-NEXT:    ret void
+;
 entry:
-        invoke void @test2( )
-                        to label %N unwind label %U
+  invoke void @test2( )
+  to label %N unwind label %U
 U:
   %res = landingpad { i8* }
-          cleanup
-        unreachable
+  cleanup
+  unreachable
 N:
-        ret void
+  ret void
 }
 
 declare i32 @__gxx_personality_v0(...)
 
 define i32 @test3(i32 %v) {
 ; CHECK-LABEL: @test3(
-; CHECK: entry:
-; CHECK-NEXT: [[CMP:%[A-Za-z0-9]+]] = icmp eq i32 %v, 2
-; CHECK-NEXT: select i1 [[CMP]], i32 2, i32 1
-; CHECK-NEXT: ret
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[V:%.*]], 2
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+;
 entry:
-        switch i32 %v, label %default [
-                 i32 1, label %U
-                 i32 2, label %T
-        ]
+  switch i32 %v, label %default [
+  i32 1, label %U
+  i32 2, label %T
+  ]
 default:
-        ret i32 1
+  ret i32 1
 U:
-        unreachable
+  unreachable
 T:
-        ret i32 2
+  ret i32 2
 }
 
 
@@ -56,21 +60,20 @@ T:
 ;; the latter.
 
 define void @test5(i1 %cond, i8* %ptr) {
-
-; CHECK-LABEL: test5
-; CHECK: entry:
-; CHECK-NOT: select
-; CHECK:  store i8 2, i8* %ptr
-; CHECK:  ret
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i8 2, i8* [[PTR:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
 
 entry:
   br i1 %cond, label %bb1, label %bb3
 
 bb3:
- br label %bb2
+  br label %bb2
 
 bb1:
- br label %bb2
+  br label %bb2
 
 bb2:
   %ptr.2 = phi i8* [ %ptr, %bb3 ], [ null, %bb1 ]
@@ -79,20 +82,21 @@ bb2:
 }
 
 define void @test5_no_null_opt(i1 %cond, i8* %ptr) #0 {
-
-; CHECK-LABEL: test5_no_null_opt
-; CHECK: entry:
-; CHECK: %[[SEL:.*]] = select i1 %cond, i8* null, i8* %ptr
-; CHECK: store i8 2, i8* %[[SEL]]
+; CHECK-LABEL: @test5_no_null_opt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTPTR:%.*]] = select i1 [[COND:%.*]], i8* null, i8* [[PTR:%.*]]
+; CHECK-NEXT:    store i8 2, i8* [[DOTPTR]], align 8
+; CHECK-NEXT:    ret void
+;
 
 entry:
   br i1 %cond, label %bb1, label %bb3
 
 bb3:
- br label %bb2
+  br label %bb2
 
 bb1:
- br label %bb2
+  br label %bb2
 
 bb2:
   %ptr.2 = phi i8* [ %ptr, %bb3 ], [ null, %bb1 ]
@@ -100,13 +104,12 @@ bb2:
   ret void
 }
 
-; CHECK-LABEL: test6
-; CHECK: entry:
-; CHECK-NOT: select
-; CHECK:  store i8 2, i8* %ptr
-; CHECK:  ret
-
 define void @test6(i1 %cond, i8* %ptr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i8 2, i8* [[PTR:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -119,12 +122,13 @@ bb2:
   ret void
 }
 
-; CHECK-LABEL: test6_no_null_opt
-; CHECK: entry:
-; CHECK: %[[SEL:.*]] = select i1 %cond, i8* null, i8* %ptr
-; CHECK: store i8 2, i8* %[[SEL]]
-
 define void @test6_no_null_opt(i1 %cond, i8* %ptr) #0 {
+; CHECK-LABEL: @test6_no_null_opt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND:%.*]], i8* null, i8* [[PTR:%.*]]
+; CHECK-NEXT:    store i8 2, i8* [[SPEC_SELECT]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %cond, label %bb1, label %bb2
 
@@ -139,6 +143,10 @@ bb2:
 
 
 define i32 @test7(i1 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %X, label %if, label %else
 
@@ -150,11 +158,13 @@ else:
   %phi = phi i32 [ 0, %entry ], [ 1, %if ]
   ret i32 %phi
 }
-; CHECK-LABEL: define i32 @test7(
-; CHECK-NOT: call
-; CHECK: ret i32 0
 
 define void @test8(i1 %X, void ()* %Y) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void [[Y:%.*]]()
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %X, label %if, label %else
 
@@ -166,10 +176,14 @@ else:
   call void %phi()
   ret void
 }
-; CHECK-LABEL: define void @test8(
-; CHECK: call void %Y(
 
 define void @test8_no_null_opt(i1 %X, void ()* %Y) #0 {
+; CHECK-LABEL: @test8_no_null_opt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[X:%.*]], void ()* null, void ()* [[Y:%.*]]
+; CHECK-NEXT:    call void [[SPEC_SELECT]]()
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %X, label %if, label %else
 
@@ -181,8 +195,5 @@ else:
   call void %phi()
   ret void
 }
-attributes #0 = { "null-pointer-is-valid"="true" }
 
-; CHECK-LABEL: define void @test8_no_null_opt(
-; CHECK: %[[SEL:.*]] = select i1 %X, void ()* null, void ()* %Y
-; CHECK: call void %[[SEL]]
+attributes #0 = { "null-pointer-is-valid"="true" }
\ No newline at end of file
diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll b/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll
new file mode 100644
index 0000000000000..cd254e4d05fb1
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -simplifycfg -instcombine -S | FileCheck %s
+
+; TODO: ABS call should be optimized away
+define i32 @assume1(i32 %p) {
+; CHECK-LABEL: @assume1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[P:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[P]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[NEG]], i32 [[P]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %cmp = icmp sle i32 %p, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  unreachable
+
+if.end:
+  %call = call i32 @abs(i32 %p)
+  ret i32 %call
+}
+
+
+define i32 @assume2(i32 %p) {
+; CHECK-LABEL: @assume2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[P:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[P]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[NEG]], i32 [[P]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  %cmp = icmp sgt i32 %p, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  br label %if.end
+
+if.else:
+  unreachable
+
+if.end:
+  %call = call i32 @abs(i32 %p)
+  ret i32 %call
+}
+
+declare i32 @abs(i32)

From 91131b65000bd77c097f6356bf3f1668fddaa422 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 25 May 2019 13:48:07 +0000
Subject: [PATCH 0228/1176] [SelectionDAG] soften assertion when legalizing
 narrow vector FP ops

The test based on PR42010:
https://bugs.llvm.org/show_bug.cgi?id=42010
...may show an inaccuracy for PPC's target defs, but we should not
be so aggressive with an assert here. There's no telling what out-of-tree
targets look like.

llvm-svn: 361696
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 10 ++++----
 llvm/test/CodeGen/PowerPC/ftrunc-legalize.ll  | 24 +++++++++++++++++++
 2 files changed, 28 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/ftrunc-legalize.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8570f57616e4f..379ee00c90fbc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2743,13 +2743,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     // We're going to widen this vector op to a legal type by padding with undef
     // elements. If the wide vector op is eventually going to be expanded to
     // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
-    // libcalls on the undef elements. We are assuming that if the scalar op
-    // requires expanding, then the vector op needs expanding too.
+    // libcalls on the undef elements.
     EVT VT = N->getValueType(0);
-    if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
-      EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-      assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
-             "Target supports vector op, but scalar requires expansion?");
+    EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+    if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+        TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
       Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
       break;
     }
diff --git a/llvm/test/CodeGen/PowerPC/ftrunc-legalize.ll b/llvm/test/CodeGen/PowerPC/ftrunc-legalize.ll
new file mode 100644
index 0000000000000..4c27ad4c4341a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/ftrunc-legalize.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64-- -mattr=altivec -verify-machineinstrs < %s | FileCheck %s
+
+; This would assert because the widened vector op is
+; legal/custom, but the scalar op is expanded.
+
+define i32 @PR42010(<2 x float> %x) {
+; CHECK-LABEL: PR42010:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi 3, 1, -32
+; CHECK-NEXT:    vrfiz 2, 2
+; CHECK-NEXT:    stvx 2, 0, 3
+; CHECK-NEXT:    lfs 0, -28(1)
+; CHECK-NEXT:    fctiwz 0, 0
+; CHECK-NEXT:    stfd 0, -8(1)
+; CHECK-NEXT:    lwz 3, -4(1)
+; CHECK-NEXT:    blr
+  %t0 = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x)
+  %t1 = extractelement <2 x float> %t0, i32 1
+  %t2 = fptosi float %t1 to i32
+  ret i32 %t2
+}
+
+declare <2 x float> @llvm.trunc.v2f32(<2 x float>)

From 214981185478329ee6745cbfdd35783e7320e73c Mon Sep 17 00:00:00 2001
From: David Bolvansky <david.bolvansky@gmail.com>
Date: Sat, 25 May 2019 14:10:20 +0000
Subject: [PATCH 0229/1176] [NFC] Make tests more robust for new optimizations

llvm-svn: 361697
---
 llvm/test/CodeGen/ARM/crash-greedy.ll            |  4 ++--
 llvm/test/CodeGen/Hexagon/bit-visit-flowq.ll     |  6 +++---
 llvm/test/CodeGen/Hexagon/rdf-ignore-undef.ll    |  2 +-
 llvm/test/CodeGen/Hexagon/reg-scavengebug.ll     |  4 ++--
 .../CodeGen/Hexagon/regalloc-block-overlap.ll    |  4 ++--
 .../Transforms/LoopVectorize/if-pred-stores.ll   | 16 +++++++++++++---
 6 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/crash-greedy.ll b/llvm/test/CodeGen/ARM/crash-greedy.ll
index 444505f8786a8..bd0f85556b8f8 100644
--- a/llvm/test/CodeGen/ARM/crash-greedy.ll
+++ b/llvm/test/CodeGen/ARM/crash-greedy.ll
@@ -7,11 +7,11 @@ target triple = "thumbv7-apple-darwin"
 declare double @exp(double)
 
 ; CHECK: remat_subreg
-define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df) nounwind {
+define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df, i1 %cond) nounwind {
 entry:
   %conv16 = fpext float %lambda to double
   %mul17 = fmul double %conv16, -1.000000e+00
-  br i1 undef, label %cond.end.us, label %cond.end
+  br i1 %cond, label %cond.end.us, label %cond.end
 
 cond.end.us:                                      ; preds = %entry
   unreachable
diff --git a/llvm/test/CodeGen/Hexagon/bit-visit-flowq.ll b/llvm/test/CodeGen/Hexagon/bit-visit-flowq.ll
index b44847dee68ea..f0786da3bed83 100644
--- a/llvm/test/CodeGen/Hexagon/bit-visit-flowq.ll
+++ b/llvm/test/CodeGen/Hexagon/bit-visit-flowq.ll
@@ -9,7 +9,7 @@ target triple = "hexagon"
 @debug = external hidden unnamed_addr global i1, align 4
 
 ; Function Attrs: nounwind
-define void @foo() local_unnamed_addr #0 {
+define void @foo(i1 %cond) local_unnamed_addr #0 {
 entry:
   br label %if.end5
 
@@ -17,14 +17,14 @@ if.end5:                                          ; preds = %entry
   br i1 undef, label %if.then12, label %if.end13
 
 if.then12:                                        ; preds = %if.end5
-  unreachable
+  ret void
 
 if.end13:                                         ; preds = %if.end5
   br label %for.cond
 
 for.cond:                                         ; preds = %if.end13
   %or.cond288 = or i1 undef, undef
-  br i1 undef, label %if.then44, label %if.end51
+  br i1 %cond, label %if.then44, label %if.end51
 
 if.then44:                                        ; preds = %for.cond
   tail call void @bar() #0
diff --git a/llvm/test/CodeGen/Hexagon/rdf-ignore-undef.ll b/llvm/test/CodeGen/Hexagon/rdf-ignore-undef.ll
index 5d72318f420fe..d52676b0e87e4 100644
--- a/llvm/test/CodeGen/Hexagon/rdf-ignore-undef.ll
+++ b/llvm/test/CodeGen/Hexagon/rdf-ignore-undef.ll
@@ -49,7 +49,7 @@ if.end88.do.body_crit_edge:                       ; preds = %if.end88
   br label %do.body
 
 if.then124:                                       ; preds = %if.end88, %do.body
-  unreachable
+  ret i32 0
 }
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll b/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
index 16e7cfe2a07ae..d53799bc4d191 100644
--- a/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
+++ b/llvm/test/CodeGen/Hexagon/reg-scavengebug.ll
@@ -19,7 +19,7 @@ declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #0
 declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #0
 
 ; Function Attrs: nounwind
-define void @f0(i16* noalias nocapture %a0, i32* noalias nocapture readonly %a1, i32 %a2, i8* noalias nocapture readonly %a3) #1 {
+define void @f0(i16* noalias nocapture %a0, i32* noalias nocapture readonly %a1, i32 %a2, i8* noalias nocapture readonly %a3, i1 %cond) #1 {
 b0:
   %v0 = add nsw i32 %a2, 63
   %v1 = ashr i32 %v0, 6
@@ -40,7 +40,7 @@ b1:                                               ; preds = %b0
   %v13 = getelementptr inbounds i32, i32* %a1, i32 48
   %v14 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v12, <16 x i32> undef)
   %v15 = bitcast i32* %v13 to <16 x i32>*
-  br i1 undef, label %b2, label %b3
+  br i1 %cond, label %b2, label %b3
 
 b2:                                               ; preds = %b1
   %v16 = getelementptr inbounds <16 x i32>, <16 x i32>* %v15, i32 1
diff --git a/llvm/test/CodeGen/Hexagon/regalloc-block-overlap.ll b/llvm/test/CodeGen/Hexagon/regalloc-block-overlap.ll
index c4f490196bb98..90b37f2a0d728 100644
--- a/llvm/test/CodeGen/Hexagon/regalloc-block-overlap.ll
+++ b/llvm/test/CodeGen/Hexagon/regalloc-block-overlap.ll
@@ -16,7 +16,7 @@ declare <32 x i32> @llvm.hexagon.V6.vasrwhsat.128B(<32 x i32>, <32 x i32>, i32)
 declare <64 x i32> @llvm.hexagon.V6.vlutvwh.128B(<32 x i32>, <32 x i32>, i32) #1
 declare <64 x i32> @llvm.hexagon.V6.vlutvwh.oracc.128B(<64 x i32>, <32 x i32>, <32 x i32>, i32) #1
 
-define hidden void @fred(<32 x i32>* %a0, i32 %a1) #0 {
+define hidden void @fred(<32 x i32>* %a0, i32 %a1, i1 %cond) #0 {
 b0:
   %v1 = ashr i32 %a1, 7
   %v2 = shl nsw i32 %v1, 7
@@ -70,7 +70,7 @@ b15:                                              ; preds = %b14
   br label %b16
 
 b16:                                              ; preds = %b15
-  br i1 undef, label %b17, label %b18
+  br i1 %cond, label %b17, label %b18
 
 b17:                                              ; preds = %b16
   unreachable
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 8dd12f5d30c38..353087f66e537 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -194,9 +194,11 @@ for.end:
 ; vectorized loop body.
 ; PR18724
 
-define void @bug18724() {
+define void @bug18724(i1 %cond) {
 ; UNROLL-LABEL: @bug18724(
 ; UNROLL-NEXT:  entry:
+; UNROLL-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; UNROLL-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14:%.*]]
 ; UNROLL:       for.body14:
 ; UNROLL-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -211,13 +213,16 @@ define void @bug18724() {
 ; UNROLL:       for.inc23:
 ; UNROLL-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; UNROLL-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
+; UNROLL-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
+; UNROLL-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14]]
 ;
 ; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
 ; UNROLL-NOSIMPLIFY-NEXT:  entry:
 ; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_BODY9:%.*]]
 ; UNROLL-NOSIMPLIFY:       for.body9:
-; UNROLL-NOSIMPLIFY-NEXT:    br i1 undef, label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]]
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[COND:%.*]], label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]]
 ; UNROLL-NOSIMPLIFY:       for.body14.preheader:
 ; UNROLL-NOSIMPLIFY-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NOSIMPLIFY:       vector.ph:
@@ -287,6 +292,8 @@ define void @bug18724() {
 ;
 ; VEC-LABEL: @bug18724(
 ; VEC-NEXT:  entry:
+; VEC-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; VEC-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; VEC-NEXT:    br label [[FOR_BODY14:%.*]]
 ; VEC:       for.body14:
 ; VEC-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -301,13 +308,16 @@ define void @bug18724() {
 ; VEC:       for.inc23:
 ; VEC-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; VEC-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; VEC-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
+; VEC-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
+; VEC-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; VEC-NEXT:    br label [[FOR_BODY14]]
 ;
 entry:
   br label %for.body9
 
 for.body9:
-  br i1 undef, label %for.inc26, label %for.body14
+  br i1 %cond, label %for.inc26, label %for.body14
 
 for.body14:
   %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ]

From 8b1fa076397555968ffa7dcda5ef91715cec5c8e Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 14:11:55 +0000
Subject: [PATCH 0230/1176] [CVP] Remove unnecessary checks for empty GNWR; NFC

The guaranteed no-wrap region is never empty, it always contains at
least zero, so these optimizations don't ever apply.

To make this more obviously true, replace the conversative return
in makeGNWR with an assertion.

llvm-svn: 361698
---
 llvm/lib/IR/ConstantRange.cpp                 |  3 +-
 .../Scalar/CorrelatedValuePropagation.cpp     | 38 ++++++-------------
 2 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 549886271ff87..0d44c3815b3b4 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -238,8 +238,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
 
   switch (BinOp) {
   default:
-    // Conservative answer: empty set
-    return getEmpty(BitWidth);
+    llvm_unreachable("Unsupported binary op");
 
   case Instruction::Add: {
     if (Unsigned)
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 2c31e4aa6cd16..4e4715be61aed 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -400,15 +400,12 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
 
 // See if we can prove that the given overflow intrinsic will not overflow.
 static bool willNotOverflow(WithOverflowInst *WO, LazyValueInfo *LVI) {
-  Value *RHS = WO->getRHS();
-  ConstantRange RRange = LVI->getConstantRange(RHS, WO->getParent(), WO);
+  ConstantRange LRange = LVI->getConstantRange(
+      WO->getLHS(), WO->getParent(), WO);
+  ConstantRange RRange = LVI->getConstantRange(
+      WO->getRHS(), WO->getParent(), WO);
   ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
       WO->getBinaryOp(), RRange, WO->getNoWrapKind());
-  // As an optimization, do not compute LRange if we do not need it.
-  if (NWRegion.isEmptySet())
-    return false;
-  Value *LHS = WO->getLHS();
-  ConstantRange LRange = LVI->getConstantRange(LHS, WO->getParent(), WO);
   return NWRegion.contains(LRange);
 }
 
@@ -626,36 +623,23 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
   Value *LHS = BinOp->getOperand(0);
   Value *RHS = BinOp->getOperand(1);
 
+  ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
   ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
 
-  // Initialize LRange only if we need it. If we know that guaranteed no wrap
-  // range for the given RHS range is empty don't spend time calculating the
-  // range for the LHS.
-  Optional<ConstantRange> LRange;
-  auto LazyLRange = [&] () {
-      if (!LRange)
-        LRange = LVI->getConstantRange(LHS, BB, BinOp);
-      return LRange.getValue();
-  };
-
   bool Changed = false;
   if (!NUW) {
     ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
         BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
-    if (!NUWRange.isEmptySet()) {
-      bool NewNUW = NUWRange.contains(LazyLRange());
-      BinOp->setHasNoUnsignedWrap(NewNUW);
-      Changed |= NewNUW;
-    }
+    bool NewNUW = NUWRange.contains(LRange);
+    BinOp->setHasNoUnsignedWrap(NewNUW);
+    Changed |= NewNUW;
   }
   if (!NSW) {
     ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
         BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
-    if (!NSWRange.isEmptySet()) {
-      bool NewNSW = NSWRange.contains(LazyLRange());
-      BinOp->setHasNoSignedWrap(NewNSW);
-      Changed |= NewNSW;
-    }
+    bool NewNSW = NSWRange.contains(LRange);
+    BinOp->setHasNoSignedWrap(NewNSW);
+    Changed |= NewNSW;
   }
 
   return Changed;

From 3c7edb2de56e68bf83c8dde040a509f209798c16 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 14:33:30 +0000
Subject: [PATCH 0231/1176] [LoopVectorize] Fix test by regenerating checks

llvm-svn: 361699
---
 llvm/test/Transforms/LoopVectorize/if-pred-stores.ll | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 353087f66e537..f82311e1c120e 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -197,8 +197,6 @@ for.end:
 define void @bug18724(i1 %cond) {
 ; UNROLL-LABEL: @bug18724(
 ; UNROLL-NEXT:  entry:
-; UNROLL-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
-; UNROLL-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14:%.*]]
 ; UNROLL:       for.body14:
 ; UNROLL-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -213,9 +211,6 @@ define void @bug18724(i1 %cond) {
 ; UNROLL:       for.inc23:
 ; UNROLL-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; UNROLL-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
-; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
-; UNROLL-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
-; UNROLL-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14]]
 ;
 ; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
@@ -292,8 +287,6 @@ define void @bug18724(i1 %cond) {
 ;
 ; VEC-LABEL: @bug18724(
 ; VEC-NEXT:  entry:
-; VEC-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
-; VEC-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; VEC-NEXT:    br label [[FOR_BODY14:%.*]]
 ; VEC:       for.body14:
 ; VEC-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -308,9 +301,6 @@ define void @bug18724(i1 %cond) {
 ; VEC:       for.inc23:
 ; VEC-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; VEC-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
-; VEC-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
-; VEC-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
-; VEC-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; VEC-NEXT:    br label [[FOR_BODY14]]
 ;
 entry:

From c9de92ee76f6fa10fc79b3795f9f313ef8326c40 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 15:06:54 +0000
Subject: [PATCH 0232/1176] [X86] Add tests for min/maxnum with const operand;
 NFC

llvm-svn: 361700
---
 llvm/test/CodeGen/X86/fmaxnum.ll | 68 ++++++++++++++++++++++++++++++++
 llvm/test/CodeGen/X86/fminnum.ll | 68 ++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)

diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
index 91ca96e35ead0..cfe77f47db0fb 100644
--- a/llvm/test/CodeGen/X86/fmaxnum.ll
+++ b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -469,5 +469,73 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double
   ret <2 x double> %r
 }
 
+define float @test_maxnum_const_op1(float %x) {
+; SSE-LABEL: test_maxnum_const_op1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    cmpunordss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    andps %xmm2, %xmm3
+; SSE-NEXT:    maxss %xmm0, %xmm2
+; SSE-NEXT:    andnps %xmm2, %xmm1
+; SSE-NEXT:    orps %xmm3, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test_maxnum_const_op1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test_maxnum_const_op1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT:    vmaxss %xmm0, %xmm2, %xmm1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %r = call float @llvm.maxnum.f32(float 1.0, float %x)
+  ret float %r
+}
+
+define float @test_maxnum_const_op2(float %x) {
+; SSE-LABEL: test_maxnum_const_op2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    cmpunordss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    andps %xmm2, %xmm3
+; SSE-NEXT:    maxss %xmm0, %xmm2
+; SSE-NEXT:    andnps %xmm2, %xmm1
+; SSE-NEXT:    orps %xmm3, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test_maxnum_const_op2:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test_maxnum_const_op2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT:    vmaxss %xmm0, %xmm2, %xmm1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %r = call float @llvm.maxnum.f32(float %x, float 1.0)
+  ret float %r
+}
+
 attributes #0 = { "no-nans-fp-math"="true" }
 
diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
index 8faddb4662f91..bbf48deeebcf1 100644
--- a/llvm/test/CodeGen/X86/fminnum.ll
+++ b/llvm/test/CodeGen/X86/fminnum.ll
@@ -469,5 +469,73 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float>
   ret <4 x float> %r
 }
 
+define float @test_minnum_const_op1(float %x) {
+; SSE-LABEL: test_minnum_const_op1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    cmpunordss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    andps %xmm2, %xmm3
+; SSE-NEXT:    minss %xmm0, %xmm2
+; SSE-NEXT:    andnps %xmm2, %xmm1
+; SSE-NEXT:    orps %xmm3, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test_minnum_const_op1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vminss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test_minnum_const_op1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT:    vminss %xmm0, %xmm2, %xmm1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %r = call float @llvm.minnum.f32(float 1.0, float %x)
+  ret float %r
+}
+
+define float @test_minnum_const_op2(float %x) {
+; SSE-LABEL: test_minnum_const_op2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    cmpunordss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    andps %xmm2, %xmm3
+; SSE-NEXT:    minss %xmm0, %xmm2
+; SSE-NEXT:    andnps %xmm2, %xmm1
+; SSE-NEXT:    orps %xmm3, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test_minnum_const_op2:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vminss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test_minnum_const_op2:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT:    vminss %xmm0, %xmm2, %xmm1
+; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
+; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %r = call float @llvm.minnum.f32(float %x, float 1.0)
+  ret float %r
+}
+
 attributes #0 = { "no-nans-fp-math"="true" }
 

From 3f0905e46f39358758fff2cac8dc597e5cd063b0 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 25 May 2019 15:28:55 +0000
Subject: [PATCH 0233/1176] [SelectionDAG] define binops as a superset of
 commutative binops

The test diffs show improved vector narrowing for integer min/max opcodes because
those were all absent from the list. I'm not sure if we can expose functional diffs
for all of the moved/added opcodes though.

It seems like we are missing an AVX512 opportunity to use 256-bit ops in place of
512-bit ops on some tests/targets, but I think that can be a follow-up.

Preliminary steps to make sure the callers are not misusing these queries:
rL361268
rL361547

Differential Revision: https://reviews.llvm.org/D62191

llvm-svn: 361701
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  59 ++++------
 .../CodeGen/X86/horizontal-reduce-smax.ll     |  44 +++----
 .../CodeGen/X86/horizontal-reduce-smin.ll     |  44 +++----
 .../CodeGen/X86/horizontal-reduce-umax.ll     |  44 +++----
 .../CodeGen/X86/horizontal-reduce-umin.ll     |  44 +++----
 .../CodeGen/X86/vector-reduce-smax-widen.ll   | 111 +++++++++++-------
 llvm/test/CodeGen/X86/vector-reduce-smax.ll   | 111 +++++++++++-------
 .../CodeGen/X86/vector-reduce-smin-widen.ll   | 111 +++++++++++-------
 llvm/test/CodeGen/X86/vector-reduce-smin.ll   | 111 +++++++++++-------
 .../CodeGen/X86/vector-reduce-umax-widen.ll   | 111 +++++++++++-------
 llvm/test/CodeGen/X86/vector-reduce-umax.ll   | 111 +++++++++++-------
 .../CodeGen/X86/vector-reduce-umin-widen.ll   | 111 +++++++++++-------
 llvm/test/CodeGen/X86/vector-reduce-umin.ll   | 111 +++++++++++-------
 13 files changed, 658 insertions(+), 465 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b1a64744f0642..a7b7a7dd6f1ca 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2173,39 +2173,6 @@ class TargetLoweringBase {
     return false;
   }
 
-  /// Return true if the node is a math/logic binary operator.
-  virtual bool isBinOp(unsigned Opcode) const {
-    switch (Opcode) {
-    case ISD::ADD:
-    case ISD::SUB:
-    case ISD::MUL:
-    case ISD::AND:
-    case ISD::OR:
-    case ISD::XOR:
-    case ISD::SHL:
-    case ISD::SRL:
-    case ISD::SRA:
-    case ISD::SDIV:
-    case ISD::UDIV:
-    case ISD::SREM:
-    case ISD::UREM:
-    case ISD::FADD:
-    case ISD::FSUB:
-    case ISD::FMUL:
-    case ISD::FDIV:
-    case ISD::FREM:
-    case ISD::FMINNUM:
-    case ISD::FMAXNUM:
-    case ISD::FMINNUM_IEEE:
-    case ISD::FMAXNUM_IEEE:
-    case ISD::FMAXIMUM:
-    case ISD::FMINIMUM:
-      return true;
-    default:
-      return false;
-    }
-  }
-
   /// Returns true if the opcode is a commutative binary operation.
   virtual bool isCommutativeBinOp(unsigned Opcode) const {
     // FIXME: This should get its info from the td file.
@@ -2233,6 +2200,8 @@ class TargetLoweringBase {
     case ISD::UADDSAT:
     case ISD::FMINNUM:
     case ISD::FMAXNUM:
+    case ISD::FMINNUM_IEEE:
+    case ISD::FMAXNUM_IEEE:
     case ISD::FMINIMUM:
     case ISD::FMAXIMUM:
       return true;
@@ -2240,6 +2209,30 @@ class TargetLoweringBase {
     }
   }
 
+  /// Return true if the node is a math/logic binary operator.
+  virtual bool isBinOp(unsigned Opcode) const {
+    // A commutative binop must be a binop.
+    if (isCommutativeBinOp(Opcode))
+      return true;
+    // These are non-commutative binops.
+    switch (Opcode) {
+    case ISD::SUB:
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::SDIV:
+    case ISD::UDIV:
+    case ISD::SREM:
+    case ISD::UREM:
+    case ISD::FSUB:
+    case ISD::FDIV:
+    case ISD::FREM:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// Return true if it's free to truncate a value of type FromTy to type
   /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index 55bac985d6874..f5328cf151d1c 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -557,9 +557,9 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v4i64:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -621,11 +621,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X86-AVX2-LABEL: test_reduce_v8i32:
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -677,11 +677,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX2-LABEL: test_reduce_v8i32:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -689,11 +689,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v8i32:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1276,9 +1276,9 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1359,11 +1359,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -1431,11 +1431,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -1445,11 +1445,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index 7036d93759123..212467078d257 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -561,9 +561,9 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v4i64:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -625,11 +625,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X86-AVX2-LABEL: test_reduce_v8i32:
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -681,11 +681,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX2-LABEL: test_reduce_v8i32:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -693,11 +693,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v8i32:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1280,9 +1280,9 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1363,11 +1363,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -1435,11 +1435,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -1449,11 +1449,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index a373794d218c5..076f4d7d1d268 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -666,9 +666,9 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v4i64:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -739,11 +739,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X86-AVX2-LABEL: test_reduce_v8i32:
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -804,11 +804,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX2-LABEL: test_reduce_v8i32:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -816,11 +816,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v8i32:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1454,9 +1454,9 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1552,11 +1552,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -1639,11 +1639,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -1653,11 +1653,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 6ad9007cd016c..5678671042220 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -606,9 +606,9 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v4i64:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -679,11 +679,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X86-AVX2-LABEL: test_reduce_v8i32:
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -744,11 +744,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX2-LABEL: test_reduce_v8i32:
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -756,11 +756,11 @@ define i32 @test_reduce_v8i32(<8 x i32> %a0) {
 ; X64-AVX512-LABEL: test_reduce_v8i32:
 ; X64-AVX512:       ## %bb.0:
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1358,9 +1358,9 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
@@ -1456,11 +1456,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX2:       ## %bb.0:
 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
@@ -1543,11 +1543,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX2:       ## %bb.0:
 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
@@ -1557,11 +1557,11 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; X64-AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX512-NEXT:    vzeroupper
 ; X64-AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
index 7be51758b1ca7..61cc52d8e2eab 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
@@ -186,9 +186,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -352,17 +352,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -647,18 +659,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -799,11 +824,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -811,11 +836,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -887,11 +912,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -901,11 +926,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1009,11 +1034,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1024,11 +1049,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 19ac789e269a6..8e21e169b8d4c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -186,9 +186,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -352,17 +352,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -647,18 +659,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -873,11 +898,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -885,11 +910,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -961,11 +986,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -975,11 +1000,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1083,11 +1108,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1098,11 +1123,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
index dcc522ad2881e..8359d7d5fbbc3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
@@ -185,9 +185,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -351,17 +351,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -646,18 +658,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -798,11 +823,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -810,11 +835,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -886,11 +911,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -900,11 +925,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1008,11 +1033,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1023,11 +1048,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 0b09c944347b1..0661bf5b2d361 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -185,9 +185,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -351,17 +351,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -646,18 +658,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.smin.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -872,11 +897,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -884,11 +909,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -960,11 +985,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -974,11 +999,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1082,11 +1107,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1097,11 +1122,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
index 46c95994b5199..932fbce3ffc2f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
@@ -199,9 +199,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -381,17 +381,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -704,18 +716,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -874,11 +899,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -886,11 +911,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -977,11 +1002,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -991,11 +1016,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1126,11 +1151,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1141,11 +1166,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index c56ca549fd7b7..f6a2a57e4b8ca 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -199,9 +199,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -381,17 +381,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -704,18 +716,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umax.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -923,11 +948,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -935,11 +960,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1026,11 +1051,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1040,11 +1065,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1175,11 +1200,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1190,11 +1215,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
index 9fac3bb10240d..2f95c7eb0c0b6 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
@@ -198,9 +198,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -380,17 +380,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -703,18 +715,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -873,11 +898,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -885,11 +910,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -976,11 +1001,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -990,11 +1015,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1125,11 +1150,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1140,11 +1165,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index d7e9838be3008..f1d77e32f7fec 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -198,9 +198,9 @@ define i64 @test_v4i64(<4 x i64> %a0) {
 ; AVX512VL-LABEL: test_v4i64:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512VL-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512VL-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
@@ -380,17 +380,29 @@ define i64 @test_v8i64(<8 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v8i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v8i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v8i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v8i64(<8 x i64> %a0)
   ret i64 %1
 }
@@ -703,18 +715,31 @@ define i64 @test_v16i64(<16 x i64> %a0) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: test_v16i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    vzeroupper
-; AVX512-NEXT:    retq
+; AVX512BW-LABEL: test_v16i64:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512BW-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: test_v16i64:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512VL-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.umin.i64.v16i64(<16 x i64> %a0)
   ret i64 %1
 }
@@ -922,11 +947,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX2-LABEL: test_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -934,11 +959,11 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX512-LABEL: test_v8i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1025,11 +1050,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1039,11 +1064,11 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
@@ -1174,11 +1199,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1189,11 +1214,11 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq

From 34d5a74b03ffde54be78cd78aac448c70cfb0228 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 25 May 2019 16:33:17 +0000
Subject: [PATCH 0234/1176] [X86][SSE] vector-sext - cleanup prefix lists

Add X32-SSE common prefix to merge some checks

llvm-svn: 361702
---
 llvm/test/CodeGen/X86/vector-sext-widen.ll | 70 ++++++++--------------
 llvm/test/CodeGen/X86/vector-sext.ll       | 70 ++++++++--------------
 2 files changed, 52 insertions(+), 88 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-sext-widen.ll b/llvm/test/CodeGen/X86/vector-sext-widen.ll
index e58b53fc8cf27..c22ffd186c47c 100644
--- a/llvm/test/CodeGen/X86/vector-sext-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-sext-widen.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 ;
 ; Just two 32-bit runs to make sure we do reasonable things there.
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X32-SSE,X32-SSE2
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X32-SSE,X32-SSE41
 
 define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: sext_16i8_to_8i16:
@@ -5795,41 +5795,23 @@ define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
-; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    movl %esp, %ebp
-; X32-SSE2-NEXT:    andl $-16, %esp
-; X32-SSE2-NEXT:    subl $16, %esp
-; X32-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
-; X32-SSE2-NEXT:    pcmpeqw 40(%ebp), %xmm1
-; X32-SSE2-NEXT:    pcmpeqw 24(%ebp), %xmm0
-; X32-SSE2-NEXT:    packsswb %xmm1, %xmm0
-; X32-SSE2-NEXT:    pcmpeqw 72(%ebp), %xmm3
-; X32-SSE2-NEXT:    pcmpeqw 56(%ebp), %xmm2
-; X32-SSE2-NEXT:    packsswb %xmm3, %xmm2
-; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE2-NEXT:    movl %ebp, %esp
-; X32-SSE2-NEXT:    popl %ebp
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
-; X32-SSE41:       # %bb.0:
-; X32-SSE41-NEXT:    pushl %ebp
-; X32-SSE41-NEXT:    movl %esp, %ebp
-; X32-SSE41-NEXT:    andl $-16, %esp
-; X32-SSE41-NEXT:    subl $16, %esp
-; X32-SSE41-NEXT:    movdqa 8(%ebp), %xmm3
-; X32-SSE41-NEXT:    pcmpeqw 40(%ebp), %xmm1
-; X32-SSE41-NEXT:    pcmpeqw 24(%ebp), %xmm0
-; X32-SSE41-NEXT:    packsswb %xmm1, %xmm0
-; X32-SSE41-NEXT:    pcmpeqw 72(%ebp), %xmm3
-; X32-SSE41-NEXT:    pcmpeqw 56(%ebp), %xmm2
-; X32-SSE41-NEXT:    packsswb %xmm3, %xmm2
-; X32-SSE41-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE41-NEXT:    movl %ebp, %esp
-; X32-SSE41-NEXT:    popl %ebp
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE:       # %bb.0:
+; X32-SSE-NEXT:    pushl %ebp
+; X32-SSE-NEXT:    movl %esp, %ebp
+; X32-SSE-NEXT:    andl $-16, %esp
+; X32-SSE-NEXT:    subl $16, %esp
+; X32-SSE-NEXT:    movdqa 8(%ebp), %xmm3
+; X32-SSE-NEXT:    pcmpeqw 40(%ebp), %xmm1
+; X32-SSE-NEXT:    pcmpeqw 24(%ebp), %xmm0
+; X32-SSE-NEXT:    packsswb %xmm1, %xmm0
+; X32-SSE-NEXT:    pcmpeqw 72(%ebp), %xmm3
+; X32-SSE-NEXT:    pcmpeqw 56(%ebp), %xmm2
+; X32-SSE-NEXT:    packsswb %xmm3, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    movl %ebp, %esp
+; X32-SSE-NEXT:    popl %ebp
+; X32-SSE-NEXT:    retl
   %a = icmp eq <32 x i16> %c1, %c2
   %b = sext <32 x i1> %a to <32 x i8>
   ret <32 x i8> %b
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index bea09bbad91a7..3a9dbaeb57a54 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 ;
 ; Just two 32-bit runs to make sure we do reasonable things there.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X32-SSE,X32-SSE2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X32-SSE,X32-SSE41
 
 define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: sext_16i8_to_8i16:
@@ -5816,41 +5816,23 @@ define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
-; X32-SSE2:       # %bb.0:
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    movl %esp, %ebp
-; X32-SSE2-NEXT:    andl $-16, %esp
-; X32-SSE2-NEXT:    subl $16, %esp
-; X32-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
-; X32-SSE2-NEXT:    pcmpeqw 40(%ebp), %xmm1
-; X32-SSE2-NEXT:    pcmpeqw 24(%ebp), %xmm0
-; X32-SSE2-NEXT:    packsswb %xmm1, %xmm0
-; X32-SSE2-NEXT:    pcmpeqw 72(%ebp), %xmm3
-; X32-SSE2-NEXT:    pcmpeqw 56(%ebp), %xmm2
-; X32-SSE2-NEXT:    packsswb %xmm3, %xmm2
-; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE2-NEXT:    movl %ebp, %esp
-; X32-SSE2-NEXT:    popl %ebp
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
-; X32-SSE41:       # %bb.0:
-; X32-SSE41-NEXT:    pushl %ebp
-; X32-SSE41-NEXT:    movl %esp, %ebp
-; X32-SSE41-NEXT:    andl $-16, %esp
-; X32-SSE41-NEXT:    subl $16, %esp
-; X32-SSE41-NEXT:    movdqa 8(%ebp), %xmm3
-; X32-SSE41-NEXT:    pcmpeqw 40(%ebp), %xmm1
-; X32-SSE41-NEXT:    pcmpeqw 24(%ebp), %xmm0
-; X32-SSE41-NEXT:    packsswb %xmm1, %xmm0
-; X32-SSE41-NEXT:    pcmpeqw 72(%ebp), %xmm3
-; X32-SSE41-NEXT:    pcmpeqw 56(%ebp), %xmm2
-; X32-SSE41-NEXT:    packsswb %xmm3, %xmm2
-; X32-SSE41-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE41-NEXT:    movl %ebp, %esp
-; X32-SSE41-NEXT:    popl %ebp
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE:       # %bb.0:
+; X32-SSE-NEXT:    pushl %ebp
+; X32-SSE-NEXT:    movl %esp, %ebp
+; X32-SSE-NEXT:    andl $-16, %esp
+; X32-SSE-NEXT:    subl $16, %esp
+; X32-SSE-NEXT:    movdqa 8(%ebp), %xmm3
+; X32-SSE-NEXT:    pcmpeqw 40(%ebp), %xmm1
+; X32-SSE-NEXT:    pcmpeqw 24(%ebp), %xmm0
+; X32-SSE-NEXT:    packsswb %xmm1, %xmm0
+; X32-SSE-NEXT:    pcmpeqw 72(%ebp), %xmm3
+; X32-SSE-NEXT:    pcmpeqw 56(%ebp), %xmm2
+; X32-SSE-NEXT:    packsswb %xmm3, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    movl %ebp, %esp
+; X32-SSE-NEXT:    popl %ebp
+; X32-SSE-NEXT:    retl
   %a = icmp eq <32 x i16> %c1, %c2
   %b = sext <32 x i1> %a to <32 x i8>
   ret <32 x i8> %b

From 6bb5041e9414abd2b16460717fdd7ed4d370bde1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 16:44:14 +0000
Subject: [PATCH 0235/1176] [LVI][CVP] Add support for saturating add/sub

Adds support for the uadd.sat family of intrinsics in LVI, based on
ConstantRange methods from D60946.

Differential Revision: https://reviews.llvm.org/D62447

llvm-svn: 361703
---
 llvm/lib/Analysis/LazyValueInfo.cpp           | 36 +++++++++++++++++++
 .../CorrelatedValuePropagation/basic.ll       |  8 ++---
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 4feff1c776439..280dd3ea6043d 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -432,6 +432,8 @@ namespace {
                            BasicBlock *BB);
   bool solveBlockValueOverflowIntrinsic(
       ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+  bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
+                                BasicBlock *BB);
   void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
                                                      ValueLatticeElement &BBLV,
                                                      Instruction *BBI);
@@ -649,6 +651,9 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
       if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
         if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
           return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+
+    if (auto *II = dyn_cast<IntrinsicInst>(BBI))
+      return solveBlockValueIntrinsic(Res, II, BB);
   }
 
   LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -1112,6 +1117,37 @@ bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
       });
 }
 
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(
+    ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::uadd_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.uadd_sat(CR2);
+        });
+  case Intrinsic::usub_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.usub_sat(CR2);
+        });
+  case Intrinsic::sadd_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.sadd_sat(CR2);
+        });
+  case Intrinsic::ssub_sat:
+    return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+        [](const ConstantRange &CR1, const ConstantRange &CR2) {
+          return CR1.ssub_sat(CR2);
+        });
+  default:
+    LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+                      << "' - overdefined (unknown intrinsic).\n");
+    BBLV = ValueLatticeElement::getOverdefined();
+    return true;
+  }
+}
+
 static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
                                                      bool isTrueDest) {
   Value *LHS = ICI->getOperand(0);
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
index 19b149a31b2a9..a063d0cf5bd92 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -995,7 +995,7 @@ define i1 @uadd_sat_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[VAL]], 100
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -1018,7 +1018,7 @@ define i1 @usub_sat_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32 [[VAL]], -101
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -1041,7 +1041,7 @@ define i1 @sadd_sat_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[VAL]], -2147483548
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
@@ -1064,7 +1064,7 @@ define i1 @ssub_sat_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[VAL]], 2147483547
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;

From d87eceda0e6d5de6b2d58430a0124b6f7428695d Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 25 May 2019 16:44:29 +0000
Subject: [PATCH 0236/1176] [X86] Combine fminnum/fmaxnum with non-nan operand
 to fmin/fmax

If we have a known non-nan operand, place it in the second operand
of fmin/fmax that is returned if either operand is nan.

Differential Revision: https://reviews.llvm.org/D62448

llvm-svn: 361704
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++--
 llvm/test/CodeGen/X86/extract-fp.ll     | 20 +--------
 llvm/test/CodeGen/X86/fmaxnum.ll        | 60 +++++--------------------
 llvm/test/CodeGen/X86/fminnum.ll        | 60 +++++--------------------
 4 files changed, 29 insertions(+), 121 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 246e494de782c..e124b7d6c0795 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40511,9 +40511,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
-  // TODO: If an operand is already known to be a NaN or not a NaN, this
-  //       should be an optional swap and FMAX/FMIN.
-
   EVT VT = N->getValueType(0);
   if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
         (Subtarget.hasSSE2() && VT == MVT::f64) ||
@@ -40530,6 +40527,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
     return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
 
+  // If one of the operands is known non-NaN use the native min/max instructions
+  // with the non-NaN input as second operand.
+  if (DAG.isKnownNeverNaN(Op1))
+    return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
+  if (DAG.isKnownNeverNaN(Op0))
+    return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
+
   // If we have to respect NaN inputs, this takes at least 3 instructions.
   // Favor a library call when operating on a scalar and minimizing code size.
   if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
diff --git a/llvm/test/CodeGen/X86/extract-fp.ll b/llvm/test/CodeGen/X86/extract-fp.ll
index ac5a43d046c1e..27430efa7822c 100644
--- a/llvm/test/CodeGen/X86/extract-fp.ll
+++ b/llvm/test/CodeGen/X86/extract-fp.ll
@@ -86,16 +86,8 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) {
 define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
 ; CHECK-LABEL: ext_maxnum_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT:    movaps %xmm0, %xmm1
-; CHECK-NEXT:    cmpunordss %xmm0, %xmm1
-; CHECK-NEXT:    movaps %xmm1, %xmm3
-; CHECK-NEXT:    andps %xmm2, %xmm3
-; CHECK-NEXT:    maxss %xmm0, %xmm2
-; CHECK-NEXT:    andnps %xmm2, %xmm1
-; CHECK-NEXT:    orps %xmm3, %xmm1
-; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    maxss {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>)
   %r = extractelement <4 x float> %v, i32 2
@@ -105,16 +97,8 @@ define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
 define double @ext_minnum_v2f64(<2 x double> %x) nounwind {
 ; CHECK-LABEL: ext_minnum_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT:    movapd %xmm0, %xmm1
-; CHECK-NEXT:    cmpunordsd %xmm0, %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm3
-; CHECK-NEXT:    andpd %xmm2, %xmm3
-; CHECK-NEXT:    minsd %xmm0, %xmm2
-; CHECK-NEXT:    andnpd %xmm2, %xmm1
-; CHECK-NEXT:    orpd %xmm3, %xmm1
-; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    minsd {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0.0, double 1.0>, <2 x double> %x)
   %r = extractelement <2 x double> %v, i32 1
diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
index cfe77f47db0fb..e308412f7cada 100644
--- a/llvm/test/CodeGen/X86/fmaxnum.ll
+++ b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -472,33 +472,13 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double
 define float @test_maxnum_const_op1(float %x) {
 ; SSE-LABEL: test_maxnum_const_op1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT:    movaps %xmm0, %xmm1
-; SSE-NEXT:    cmpunordss %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm3
-; SSE-NEXT:    andps %xmm2, %xmm3
-; SSE-NEXT:    maxss %xmm0, %xmm2
-; SSE-NEXT:    andnps %xmm2, %xmm1
-; SSE-NEXT:    orps %xmm3, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    maxss {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test_maxnum_const_op1:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX512-LABEL: test_maxnum_const_op1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT:    vmaxss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test_maxnum_const_op1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmaxss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %r = call float @llvm.maxnum.f32(float 1.0, float %x)
   ret float %r
 }
@@ -506,33 +486,13 @@ define float @test_maxnum_const_op1(float %x) {
 define float @test_maxnum_const_op2(float %x) {
 ; SSE-LABEL: test_maxnum_const_op2:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT:    movaps %xmm0, %xmm1
-; SSE-NEXT:    cmpunordss %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm3
-; SSE-NEXT:    andps %xmm2, %xmm3
-; SSE-NEXT:    maxss %xmm0, %xmm2
-; SSE-NEXT:    andnps %xmm2, %xmm1
-; SSE-NEXT:    orps %xmm3, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    maxss {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test_maxnum_const_op2:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT:    vmaxss %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX512-LABEL: test_maxnum_const_op2:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT:    vmaxss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test_maxnum_const_op2:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmaxss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %r = call float @llvm.maxnum.f32(float %x, float 1.0)
   ret float %r
 }
diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
index bbf48deeebcf1..33accf2e49c1f 100644
--- a/llvm/test/CodeGen/X86/fminnum.ll
+++ b/llvm/test/CodeGen/X86/fminnum.ll
@@ -472,33 +472,13 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float>
 define float @test_minnum_const_op1(float %x) {
 ; SSE-LABEL: test_minnum_const_op1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT:    movaps %xmm0, %xmm1
-; SSE-NEXT:    cmpunordss %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm3
-; SSE-NEXT:    andps %xmm2, %xmm3
-; SSE-NEXT:    minss %xmm0, %xmm2
-; SSE-NEXT:    andnps %xmm2, %xmm1
-; SSE-NEXT:    orps %xmm3, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    minss {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test_minnum_const_op1:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT:    vminss %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX512-LABEL: test_minnum_const_op1:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT:    vminss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test_minnum_const_op1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vminss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %r = call float @llvm.minnum.f32(float 1.0, float %x)
   ret float %r
 }
@@ -506,33 +486,13 @@ define float @test_minnum_const_op1(float %x) {
 define float @test_minnum_const_op2(float %x) {
 ; SSE-LABEL: test_minnum_const_op2:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT:    movaps %xmm0, %xmm1
-; SSE-NEXT:    cmpunordss %xmm0, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm3
-; SSE-NEXT:    andps %xmm2, %xmm3
-; SSE-NEXT:    minss %xmm0, %xmm2
-; SSE-NEXT:    andnps %xmm2, %xmm1
-; SSE-NEXT:    orps %xmm3, %xmm1
-; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    minss {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: test_minnum_const_op2:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT:    vminss %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX512-LABEL: test_minnum_const_op2:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT:    vminss %xmm0, %xmm2, %xmm1
-; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
-; AVX512-NEXT:    vmovss %xmm2, %xmm0, %xmm1 {%k1}
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: test_minnum_const_op2:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vminss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %r = call float @llvm.minnum.f32(float %x, float 1.0)
   ret float %r
 }

From b0fd12b689297dfb3bb70a6d5dfabb6bebf93ed1 Mon Sep 17 00:00:00 2001
From: Robert Widmann <devteam.codafi@gmail.com>
Date: Sat, 25 May 2019 16:47:27 +0000
Subject: [PATCH 0237/1176] [LLVM-C] Add Accessor for Mach-O Universal Binary
 Slices

Summary: Allow for retrieving an object file corresponding to an architecture-specific slice in a Mach-O universal binary file.

Reviewers: whitequark, deadalnix

Reviewed By: whitequark

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60378

llvm-svn: 361705
---
 llvm/include/llvm-c/Object.h | 16 ++++++++++++++++
 llvm/lib/Object/Object.cpp   | 15 +++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/llvm/include/llvm-c/Object.h b/llvm/include/llvm-c/Object.h
index a32f6514094d0..1e9b703a68ff1 100644
--- a/llvm/include/llvm-c/Object.h
+++ b/llvm/include/llvm-c/Object.h
@@ -102,6 +102,22 @@ LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR);
  */
 LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR);
 
+/*
+ * For a Mach-O universal binary file, retrieves the object file corresponding
+ * to the given architecture if it is present as a slice.
+ *
+ * If NULL is returned, the \p ErrorMessage parameter is populated with the
+ * error's description.  It is then the caller's responsibility to free this
+ * message by calling \c LLVMDisposeMessage.
+ *
+ * It is the responsiblity of the caller to free the returned object file by
+ * calling \c LLVMDisposeBinary.
+ */
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+                                                        const char *Arch,
+                                                        size_t ArchLen,
+                                                        char **ErrorMessage);
+
 /**
  * Retrieve a copy of the section iterator for this object file.
  *
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index e2511b7aed032..d84798cc6dd0f 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/MachOUniversal.h"
 
 using namespace llvm;
 using namespace object;
@@ -131,6 +132,20 @@ LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) {
   return BinaryTypeMapper::mapBinaryTypeToLLVMBinaryType(unwrap(BR)->getType());
 }
 
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+                                                        const char *Arch,
+                                                        size_t ArchLen,
+                                                        char **ErrorMessage) {
+  auto universal = cast<MachOUniversalBinary>(unwrap(BR));
+  Expected<std::unique_ptr<ObjectFile>> ObjOrErr(
+      universal->getObjectForArch({Arch, ArchLen}));
+  if (!ObjOrErr) {
+    *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
+    return nullptr;
+  }
+  return wrap(ObjOrErr.get().release());
+}
+
 LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR) {
   auto OF = cast<ObjectFile>(unwrap(BR));
   auto sections = OF->sections();

From 40fa52b1749a6286331e993177043fc51812f8a1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 25 May 2019 18:02:17 +0000
Subject: [PATCH 0238/1176] [X86] lowerBuildVectorToBitOp - support
 build_vector(shift()) -> shift(build_vector(),C)

Commonly occurs in sign-extension cases

llvm-svn: 361706
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 20 ++++++++
 .../test/CodeGen/X86/rotate-extract-vector.ll |  9 ++--
 llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll |  9 ++--
 llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll | 25 ++++------
 llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll |  9 +---
 llvm/test/CodeGen/X86/vector-sext-widen.ll    | 50 +++++++------------
 llvm/test/CodeGen/X86/vector-sext.ll          | 50 +++++++------------
 7 files changed, 71 insertions(+), 101 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e124b7d6c0795..170e3cf33ba04 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8746,9 +8746,15 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
       return SDValue();
 
   // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
+  bool IsShift = false;
   switch (Opcode) {
   default:
     return SDValue();
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+    IsShift = true;
+    break;
   case ISD::AND:
   case ISD::XOR:
   case ISD::OR:
@@ -8769,10 +8775,24 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
     // We expect the canonicalized RHS operand to be the constant.
     if (!isa<ConstantSDNode>(RHS))
       return SDValue();
+
+    // Extend shift amounts.
+    if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
+      if (!IsShift)
+        return SDValue();
+      RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
+    }
+
     LHSElts.push_back(LHS);
     RHSElts.push_back(RHS);
   }
 
+  // Limit to shifts by uniform immediates.
+  // TODO: Only accept vXi8/vXi64 special cases?
+  // TODO: Permit non-uniform XOP/AVX2/MULLO cases?
+  if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
+    return SDValue();
+
   SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
   SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
   return DAG.getNode(Opcode, DL, VT, LHS, RHS);
diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
index 6301f3bf747c7..4959de711720a 100644
--- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll
@@ -86,13 +86,12 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
 ; X64-NEXT:    vpextrq $1, %xmm0, %rax
 ; X64-NEXT:    movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    vmovq %xmm0, %rax
 ; X64-NEXT:    mulq %rcx
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm0
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; X64-NEXT:    vprolq $57, %zmm0, %zmm0
 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; X64-NEXT:    vzeroupper
@@ -256,24 +255,22 @@ define <2 x i64> @no_extract_udiv(<2 x i64> %i) nounwind {
 ; X64-NEXT:    movabsq $-6148914691236517205, %rdi # imm = 0xAAAAAAAAAAAAAAAB
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    vmovq %xmm0, %rsi
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm0
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
 ; X64-NEXT:    movabsq $-6180857105216966645, %rdi # imm = 0xAA392F35DC17F00B
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq $9, %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm1
 ; X64-NEXT:    movq %rsi, %rax
 ; X64-NEXT:    mulq %rdi
-; X64-NEXT:    shrq $9, %rdx
 ; X64-NEXT:    vmovq %rdx, %xmm2
 ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-NEXT:    vpsrlq $9, %xmm1, %xmm1
 ; X64-NEXT:    vpsllq $56, %xmm0, %xmm0
 ; X64-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
index 62bcc54072b72..e599ceea7c9c6 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
@@ -19,7 +19,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; SSE2-NEXT:    subq %rdx, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    addq %rdx, %rcx
-; SSE2-NEXT:    shrq $2, %rcx
 ; SSE2-NEXT:    movq %rcx, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rcx
@@ -28,9 +27,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; SSE2-NEXT:    subq %rdx, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    addq %rdx, %rcx
-; SSE2-NEXT:    shrq $2, %rcx
 ; SSE2-NEXT:    movq %rcx, %xmm0
 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    psrlq $2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -43,7 +42,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; SSE41-NEXT:    subq %rdx, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    addq %rdx, %rcx
-; SSE41-NEXT:    shrq $2, %rcx
 ; SSE41-NEXT:    movq %rcx, %xmm1
 ; SSE41-NEXT:    movq %xmm0, %rcx
 ; SSE41-NEXT:    movq %rcx, %rax
@@ -51,9 +49,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; SSE41-NEXT:    subq %rdx, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    addq %rdx, %rcx
-; SSE41-NEXT:    shrq $2, %rcx
 ; SSE41-NEXT:    movq %rcx, %xmm0
 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    psrlq $2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_div7_2i64:
@@ -65,7 +63,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm1
 ; AVX-NEXT:    vmovq %xmm0, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -73,9 +70,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    vpsrlq $2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %res = udiv <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %res
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index f40a07935f501..198c6de8b0efb 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -10,32 +10,30 @@
 define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: test_div7_4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
 ; AVX1-NEXT:    movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
-; AVX1-NEXT:    vmovq %rcx, %xmm2
-; AVX1-NEXT:    vmovq %xmm1, %rcx
+; AVX1-NEXT:    vmovq %rcx, %xmm1
+; AVX1-NEXT:    vmovq %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
-; AVX1-NEXT:    vmovq %rcx, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vmovq %rcx, %xmm2
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpsrlq $2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
 ; AVX1-NEXT:    mulq %rsi
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm2
 ; AVX1-NEXT:    vmovq %xmm0, %rcx
 ; AVX1-NEXT:    movq %rcx, %rax
@@ -43,10 +41,10 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX1-NEXT:    subq %rdx, %rcx
 ; AVX1-NEXT:    shrq %rcx
 ; AVX1-NEXT:    addq %rdx, %rcx
-; AVX1-NEXT:    shrq $2, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpsrlq $2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_div7_4i64:
@@ -59,7 +57,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm2
 ; AVX2-NEXT:    vmovq %xmm1, %rcx
 ; AVX2-NEXT:    movq %rcx, %rax
@@ -67,7 +64,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
@@ -76,7 +72,6 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm2
 ; AVX2-NEXT:    vmovq %xmm0, %rcx
 ; AVX2-NEXT:    movq %rcx, %rax
@@ -84,10 +79,10 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
 ; AVX2-NEXT:    subq %rdx, %rcx
 ; AVX2-NEXT:    shrq %rcx
 ; AVX2-NEXT:    addq %rdx, %rcx
-; AVX2-NEXT:    shrq $2, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlq $2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %res = udiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %res
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
index 18ecac073dfa8..495d35a0c8449 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
@@ -17,7 +17,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vmovq %xmm1, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -25,7 +24,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm1
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; AVX-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
@@ -35,7 +33,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm2, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -43,7 +40,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
@@ -54,7 +50,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm2, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -62,7 +57,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm2
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
 ; AVX-NEXT:    vpextrq $1, %xmm0, %rcx
@@ -71,7 +65,6 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm3
 ; AVX-NEXT:    vmovq %xmm0, %rcx
 ; AVX-NEXT:    movq %rcx, %rax
@@ -79,11 +72,11 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
 ; AVX-NEXT:    subq %rdx, %rcx
 ; AVX-NEXT:    shrq %rcx
 ; AVX-NEXT:    addq %rdx, %rcx
-; AVX-NEXT:    shrq $2, %rcx
 ; AVX-NEXT:    vmovq %rcx, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; AVX-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 ; AVX-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT:    vpsrlq $2, %zmm0, %zmm0
 ; AVX-NEXT:    retq
   %res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
   ret <8 x i64> %res
diff --git a/llvm/test/CodeGen/X86/vector-sext-widen.ll b/llvm/test/CodeGen/X86/vector-sext-widen.ll
index c22ffd186c47c..c4a2286b8eac6 100644
--- a/llvm/test/CodeGen/X86/vector-sext-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-sext-widen.ll
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; SSE-NEXT:    movzbl (%rdi), %eax
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shlq $62, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    movq %rcx, %xmm1
+; SSE-NEXT:    movq %rcx, %xmm0
 ; SSE-NEXT:    shlq $63, %rax
-; SSE-NEXT:    sarq $63, %rax
-; SSE-NEXT:    movq %rax, %xmm0
-; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movq %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; AVX1-NEXT:    movzbl (%rdi), %eax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    shlq $63, %rax
-; AVX1-NEXT:    sarq $63, %rax
 ; AVX1-NEXT:    vmovq %rax, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; AVX2-NEXT:    movzbl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; X32-SSE2-NEXT:    movzbl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; X32-SSE41-NEXT:    movzbl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $31, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $30, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; X32-SSE2-NEXT:    movl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm2
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; X32-SSE41-NEXT:    movl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movl %eax, %edx
 ; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
 ; X32-SSE41-NEXT:    movd %edx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $28, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
 ; AVX2-NEXT:    movl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -5990,22 +5980,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) {
 ; X32-SSE2-NEXT:    movl 8(%eax), %eax
 ; X32-SSE2-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE2-NEXT:    shll $15, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    movl %edx, %eax
 ; X32-SSE2-NEXT:    shll $13, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE2-NEXT:    shll $15, %ecx
-; X32-SSE2-NEXT:    sarl $15, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    shll $15, %edx
-; X32-SSE2-NEXT:    sarl $15, %edx
 ; X32-SSE2-NEXT:    movd %edx, %xmm2
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $15, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6021,17 +6008,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) {
 ; X32-SSE41-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE41-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE41-NEXT:    shll $15, %edx
-; X32-SSE41-NEXT:    sarl $15, %edx
 ; X32-SSE41-NEXT:    shll $15, %ecx
-; X32-SSE41-NEXT:    sarl $15, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %edx, %xmm0
 ; X32-SSE41-NEXT:    shll $13, %esi
-; X32-SSE41-NEXT:    sarl $15, %esi
 ; X32-SSE41-NEXT:    pinsrd $2, %esi, %xmm0
 ; X32-SSE41-NEXT:    shll $15, %eax
-; X32-SSE41-NEXT:    sarl $15, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $15, %xmm0
 ; X32-SSE41-NEXT:    popl %esi
 ; X32-SSE41-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE41-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 3a9dbaeb57a54..50efdc10af6e1 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; SSE-NEXT:    movzbl (%rdi), %eax
 ; SSE-NEXT:    movq %rax, %rcx
 ; SSE-NEXT:    shlq $62, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    movq %rcx, %xmm1
+; SSE-NEXT:    movq %rcx, %xmm0
 ; SSE-NEXT:    shlq $63, %rax
-; SSE-NEXT:    sarq $63, %rax
-; SSE-NEXT:    movq %rax, %xmm0
-; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    movq %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; AVX1-NEXT:    movzbl (%rdi), %eax
 ; AVX1-NEXT:    movq %rax, %rcx
 ; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
 ; AVX1-NEXT:    vmovq %rcx, %xmm0
 ; AVX1-NEXT:    shlq $63, %rax
-; AVX1-NEXT:    sarq $63, %rax
 ; AVX1-NEXT:    vmovq %rax, %xmm1
 ; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; AVX2-NEXT:    movzbl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; X32-SSE2-NEXT:    movzbl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
 ; X32-SSE41-NEXT:    movzbl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $31, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $30, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; X32-SSE2-NEXT:    movl (%eax), %eax
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    movl %eax, %ecx
 ; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm2
 ; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $31, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
 ; X32-SSE41-NEXT:    movl (%eax), %eax
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    movl %eax, %edx
 ; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
 ; X32-SSE41-NEXT:    movd %edx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
 ; X32-SSE41-NEXT:    movl %eax, %ecx
 ; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
 ; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
 ; X32-SSE41-NEXT:    shll $28, %eax
-; X32-SSE41-NEXT:    sarl $31, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
 ; AVX2-NEXT:    movl (%rdi), %eax
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm0
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX2-NEXT:    movq %rax, %rcx
 ; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
 ; AVX2-NEXT:    vmovq %rcx, %xmm1
 ; AVX2-NEXT:    shlq $63, %rax
-; AVX2-NEXT:    sarq $63, %rax
 ; AVX2-NEXT:    vmovq %rax, %xmm2
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -6008,22 +5998,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) {
 ; X32-SSE2-NEXT:    movl 8(%eax), %eax
 ; X32-SSE2-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE2-NEXT:    shll $15, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm0
 ; X32-SSE2-NEXT:    movl %edx, %eax
 ; X32-SSE2-NEXT:    shll $13, %eax
-; X32-SSE2-NEXT:    sarl $15, %eax
 ; X32-SSE2-NEXT:    movd %eax, %xmm1
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; X32-SSE2-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE2-NEXT:    shll $15, %ecx
-; X32-SSE2-NEXT:    sarl $15, %ecx
 ; X32-SSE2-NEXT:    movd %ecx, %xmm0
 ; X32-SSE2-NEXT:    shll $15, %edx
-; X32-SSE2-NEXT:    sarl $15, %edx
 ; X32-SSE2-NEXT:    movd %edx, %xmm2
 ; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    psrad $15, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6039,17 +6026,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) {
 ; X32-SSE41-NEXT:    shldl $13, %edx, %eax
 ; X32-SSE41-NEXT:    shldl $15, %ecx, %edx
 ; X32-SSE41-NEXT:    shll $15, %edx
-; X32-SSE41-NEXT:    sarl $15, %edx
 ; X32-SSE41-NEXT:    shll $15, %ecx
-; X32-SSE41-NEXT:    sarl $15, %ecx
 ; X32-SSE41-NEXT:    movd %ecx, %xmm0
 ; X32-SSE41-NEXT:    pinsrd $1, %edx, %xmm0
 ; X32-SSE41-NEXT:    shll $13, %esi
-; X32-SSE41-NEXT:    sarl $15, %esi
 ; X32-SSE41-NEXT:    pinsrd $2, %esi, %xmm0
 ; X32-SSE41-NEXT:    shll $15, %eax
-; X32-SSE41-NEXT:    sarl $15, %eax
 ; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    psrad $15, %xmm0
 ; X32-SSE41-NEXT:    popl %esi
 ; X32-SSE41-NEXT:    .cfi_def_cfa_offset 4
 ; X32-SSE41-NEXT:    retl

From 0290a77aa8609a99ba613efef40ec1626aec362d Mon Sep 17 00:00:00 2001
From: David Bolvansky <david.bolvansky@gmail.com>
Date: Sat, 25 May 2019 22:34:27 +0000
Subject: [PATCH 0239/1176] [SimplifyCFG] Added condition assumption for
 unreachable blocks

Summary: PR41688

Reviewers: spatel, efriedma, craig.topper, hfinkel, reames

Reviewed By: hfinkel

Subscribers: javed.absar, dmgreen, fhahn, hfinkel, reames, nikic, lebedev.ri, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61409

llvm-svn: 361707
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp         |  3 +++
 .../test/Analysis/ValueTracking/select-pattern.ll |  2 ++
 .../Transforms/CallSiteSplitting/split-loop.ll    |  9 +++++++++
 .../Transforms/LoopVectorize/if-pred-stores.ll    | 10 ++++++++++
 llvm/test/Transforms/SimplifyCFG/PR30210.ll       |  4 +++-
 .../SimplifyCFG/UnreachableEliminate.ll           | 12 +++++++++++-
 .../Transforms/SimplifyCFG/unreachable_assume.ll  | 15 ++++++---------
 7 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index d571648c99f42..90b552035af3d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4205,10 +4205,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
           Changed = true;
         }
       } else {
+        Value* Cond = BI->getCondition();
         if (BI->getSuccessor(0) == BB) {
+          Builder.CreateAssumption(Builder.CreateNot(Cond));
           Builder.CreateBr(BI->getSuccessor(1));
           EraseTerminatorAndDCECond(BI);
         } else if (BI->getSuccessor(1) == BB) {
+          Builder.CreateAssumption(Cond);
           Builder.CreateBr(BI->getSuccessor(0));
           EraseTerminatorAndDCECond(BI);
           Changed = true;
diff --git a/llvm/test/Analysis/ValueTracking/select-pattern.ll b/llvm/test/Analysis/ValueTracking/select-pattern.ll
index 4f19c292baedd..1ab4c1edd1315 100644
--- a/llvm/test/Analysis/ValueTracking/select-pattern.ll
+++ b/llvm/test/Analysis/ValueTracking/select-pattern.ll
@@ -8,6 +8,8 @@
 define void @PR36045(i1 %t, i32* %b) {
 ; CHECK-LABEL: @PR36045(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[T:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/CallSiteSplitting/split-loop.ll b/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
index b64a072a5836e..1e71643b7b9e5 100644
--- a/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/split-loop.ll
@@ -5,6 +5,9 @@ define i16 @test1() {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 undef, i16 1, i16 0
+; CHECK-NEXT:    [[TOBOOL18:%.*]] = icmp ne i16 [[SPEC_SELECT]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL18]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
 ; CHECK-NEXT:    call void @callee(i16 [[SPEC_SELECT]])
@@ -27,6 +30,9 @@ define i16 @test2() {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = select i1 undef, i16 1, i16 0
+; CHECK-NEXT:    [[TOBOOL18:%.*]] = icmp ne i16 [[S]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL18]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
 ; CHECK-NEXT:    call void @callee(i16 [[S]])
@@ -53,6 +59,9 @@ define i16 @test3() {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = select i1 undef, i16 1, i16 0
+; CHECK-NEXT:    [[TOBOOL18:%.*]] = icmp ne i16 [[S]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL18]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    br label [[FOR_COND12:%.*]]
 ; CHECK:       for.cond12:
 ; CHECK-NEXT:    call void @callee(i16 [[S]])
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index f82311e1c120e..353087f66e537 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -197,6 +197,8 @@ for.end:
 define void @bug18724(i1 %cond) {
 ; UNROLL-LABEL: @bug18724(
 ; UNROLL-NEXT:  entry:
+; UNROLL-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; UNROLL-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14:%.*]]
 ; UNROLL:       for.body14:
 ; UNROLL-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -211,6 +213,9 @@ define void @bug18724(i1 %cond) {
 ; UNROLL:       for.inc23:
 ; UNROLL-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; UNROLL-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
+; UNROLL-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
+; UNROLL-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; UNROLL-NEXT:    br label [[FOR_BODY14]]
 ;
 ; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
@@ -287,6 +292,8 @@ define void @bug18724(i1 %cond) {
 ;
 ; VEC-LABEL: @bug18724(
 ; VEC-NEXT:  entry:
+; VEC-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; VEC-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; VEC-NEXT:    br label [[FOR_BODY14:%.*]]
 ; VEC:       for.body14:
 ; VEC-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
@@ -301,6 +308,9 @@ define void @bug18724(i1 %cond) {
 ; VEC:       for.inc23:
 ; VEC-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
 ; VEC-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
+; VEC-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
+; VEC-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
+; VEC-NEXT:    call void @llvm.assume(i1 [[CMP13]])
 ; VEC-NEXT:    br label [[FOR_BODY14]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/PR30210.ll b/llvm/test/Transforms/SimplifyCFG/PR30210.ll
index d1b0a4cd4993c..bc422ddcd5f6a 100644
--- a/llvm/test/Transforms/SimplifyCFG/PR30210.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PR30210.ll
@@ -10,6 +10,8 @@ define i32 @test1(i1 %B) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND_US:%.*]]
 ; CHECK:       for.cond.us:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[B:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    br label [[FOR_COND_US]]
 ;
 entry:
@@ -35,4 +37,4 @@ for.cond5:                                        ; preds = %for.cond5, %for.con
 for.end:                                          ; preds = %for.cond5
   %load = load i32, i32* %call, align 4
   br label %for.cond4
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 6bb38c3ed88e0..36b44a25aa8ea 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -4,6 +4,8 @@
 define void @test1(i1 %C, i1* %BP) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[C:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -62,6 +64,8 @@ T:
 define void @test5(i1 %cond, i8* %ptr) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    store i8 2, i8* [[PTR:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -107,6 +111,8 @@ bb2:
 define void @test6(i1 %cond, i8* %ptr) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    store i8 2, i8* [[PTR:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -145,6 +151,8 @@ bb2:
 define i32 @test7(i1 %X) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -162,6 +170,8 @@ else:
 define void @test8(i1 %X, void ()* %Y) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP0]])
 ; CHECK-NEXT:    call void [[Y:%.*]]()
 ; CHECK-NEXT:    ret void
 ;
@@ -196,4 +206,4 @@ else:
   ret void
 }
 
-attributes #0 = { "null-pointer-is-valid"="true" }
\ No newline at end of file
+attributes #0 = { "null-pointer-is-valid"="true" }
diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll b/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll
index cd254e4d05fb1..e0d1e27a99523 100644
--- a/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll
+++ b/llvm/test/Transforms/SimplifyCFG/unreachable_assume.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -simplifycfg -instcombine -S | FileCheck %s
 
-; TODO: ABS call should be optimized away
 define i32 @assume1(i32 %p) {
 ; CHECK-LABEL: @assume1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[P:%.*]], 0
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[NEG]], i32 [[P]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[P:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 [[P]]
 ;
 entry:
   %cmp = icmp sle i32 %p, 0
@@ -26,10 +24,9 @@ if.end:
 define i32 @assume2(i32 %p) {
 ; CHECK-LABEL: @assume2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[P:%.*]], 0
-; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 [[NEG]], i32 [[P]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[P:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 [[P]]
 ;
 entry:
   %cmp = icmp sgt i32 %p, 0

From d4a9cae96500761f0933fb0eb0155a2848e70452 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Sat, 25 May 2019 22:38:02 +0000
Subject: [PATCH 0240/1176] Add missing newline at end of file

llvm-svn: 361708
---
 clang/unittests/Tooling/Syntax/TokensTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index ac605063733fb..1d931faa70488 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -651,4 +651,4 @@ TEST_F(TokenBufferTest, TokensToFileRange) {
   // We don't test assertion failures because death tests are slow.
 }
 
-} // namespace
\ No newline at end of file
+} // namespace

From 2db79ef32c66035a89b5da409b8501945166c2a3 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Sun, 26 May 2019 03:39:07 +0000
Subject: [PATCH 0241/1176] [Driver] Update handling of c++ and runtime
 directories

This is a follow up to r361432 and r361504 which addresses issues
introduced by those changes. Specifically, it avoids duplicating
file and runtime paths in case when the effective triple is the
same as the cannonical one. Furthermore, it fixes the broken multilib
setup in the Fuchsia driver and deduplicates some of the code.

Differential Revision: https://reviews.llvm.org/D62442

llvm-svn: 361709
---
 clang/include/clang/Driver/ToolChain.h        |  6 ++
 clang/lib/Driver/ToolChain.cpp                | 65 ++++++++++++-------
 clang/lib/Driver/ToolChains/Fuchsia.cpp       | 33 ++++------
 .../lib/asan => basic_fuchsia_tree/bin}/.keep |  0
 .../lib/aarch64-fuchsia/c++/asan/libc++.so}   |  0
 .../lib/aarch64-fuchsia/c++/libc++.so}        |  0
 .../aarch64-fuchsia/c++/noexcept/libc++.so}   |  0
 .../lib/x86_64-fuchsia/c++/asan/libc++.so     |  0
 .../lib/x86_64-fuchsia/c++/libc++.so          |  0
 .../lib/x86_64-fuchsia/c++/noexcept/libc++.so |  0
 clang/test/Driver/fuchsia.c                   |  4 --
 clang/test/Driver/fuchsia.cpp                 | 21 +++---
 .../Driver/linux-per-target-runtime-dir.c     |  1 -
 13 files changed, 74 insertions(+), 56 deletions(-)
 rename clang/test/Driver/Inputs/{resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/asan => basic_fuchsia_tree/bin}/.keep (100%)
 rename clang/test/Driver/Inputs/{resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/noexcept/.keep => basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan/libc++.so} (100%)
 rename clang/test/Driver/Inputs/{resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/asan/.keep => basic_fuchsia_tree/lib/aarch64-fuchsia/c++/libc++.so} (100%)
 rename clang/test/Driver/Inputs/{resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/noexcept/.keep => basic_fuchsia_tree/lib/aarch64-fuchsia/c++/noexcept/libc++.so} (100%)
 create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan/libc++.so
 create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/libc++.so
 create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/noexcept/libc++.so

diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 4ccf8413ff7d8..7dd3db376c8c9 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -389,6 +389,12 @@ class ToolChain {
   getCompilerRTArgString(const llvm::opt::ArgList &Args, StringRef Component,
                          FileType Type = ToolChain::FT_Static) const;
 
+  // Returns target specific runtime path if it exists.
+  virtual Optional<std::string> getRuntimePath() const;
+
+  // Returns target specific C++ library path if it exists.
+  virtual Optional<std::string> getCXXStdlibPath() const;
+
   // Returns <ResourceDir>/lib/<OSName>/<arch>.  This is used by runtimes (such
   // as OpenMP) to find arch-specific libraries.
   std::string getArchSpecificLibPath() const;
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 08d1ebb75d7bc..01fb818c9c41c 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -73,29 +73,13 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
                      const ArgList &Args)
     : D(D), Triple(T), Args(Args), CachedRTTIArg(GetRTTIArgument(Args)),
       CachedRTTIMode(CalculateRTTIMode(Args, Triple, CachedRTTIArg)) {
-  SmallString<128> P;
-
   if (D.CCCIsCXX()) {
-    P.assign(D.Dir);
-    llvm::sys::path::append(P, "..", "lib", D.getTargetTriple(), "c++");
-    if (getVFS().exists(P))
-      getLibraryPaths().push_back(P.str());
-
-    P.assign(D.Dir);
-    llvm::sys::path::append(P, "..", "lib", Triple.str(), "c++");
-    if (getVFS().exists(P))
-      getLibraryPaths().push_back(P.str());
+    if (auto CXXStdlibPath = getCXXStdlibPath())
+      getFilePaths().push_back(*CXXStdlibPath);
   }
 
-  P.assign(D.ResourceDir);
-  llvm::sys::path::append(P, D.getTargetTriple(), "lib");
-  if (getVFS().exists(P))
-    getLibraryPaths().push_back(P.str());
-
-  P.assign(D.ResourceDir);
-  llvm::sys::path::append(P, Triple.str(), "lib");
-  if (getVFS().exists(P))
-    getLibraryPaths().push_back(P.str());
+  if (auto RuntimePath = getRuntimePath())
+    getLibraryPaths().push_back(*RuntimePath);
 
   std::string CandidateLibPath = getArchSpecificLibPath();
   if (getVFS().exists(CandidateLibPath))
@@ -421,6 +405,43 @@ const char *ToolChain::getCompilerRTArgString(const llvm::opt::ArgList &Args,
   return Args.MakeArgString(getCompilerRT(Args, Component, Type));
 }
 
+
+Optional<std::string> ToolChain::getRuntimePath() const {
+  SmallString<128> P;
+
+  // First try the triple passed to driver as --target=<triple>.
+  P.assign(D.ResourceDir);
+  llvm::sys::path::append(P, D.getTargetTriple(), "lib");
+  if (getVFS().exists(P))
+    return llvm::Optional<std::string>(P.str());
+
+  // Second try the normalized triple.
+  P.assign(D.ResourceDir);
+  llvm::sys::path::append(P, Triple.str(), "lib");
+  if (getVFS().exists(P))
+    return llvm::Optional<std::string>(P.str());
+
+  return None;
+}
+
+Optional<std::string> ToolChain::getCXXStdlibPath() const {
+  SmallString<128> P;
+
+  // First try the triple passed to driver as --target=<triple>.
+  P.assign(D.Dir);
+  llvm::sys::path::append(P, "..", "lib", D.getTargetTriple(), "c++");
+  if (getVFS().exists(P))
+    return llvm::Optional<std::string>(P.str());
+
+  // Second try the normalized triple.
+  P.assign(D.Dir);
+  llvm::sys::path::append(P, "..", "lib", Triple.str(), "c++");
+  if (getVFS().exists(P))
+    return llvm::Optional<std::string>(P.str());
+
+  return None;
+}
+
 std::string ToolChain::getArchSpecificLibPath() const {
   SmallString<128> Path(getDriver().ResourceDir);
   llvm::sys::path::append(Path, "lib", getOSLibName(),
@@ -833,10 +854,6 @@ void ToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
 
 void ToolChain::AddFilePathLibArgs(const ArgList &Args,
                                    ArgStringList &CmdArgs) const {
-  for (const auto &LibPath : getLibraryPaths())
-    if(LibPath.length() > 0)
-      CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
-
   for (const auto &LibPath : getFilePaths())
     if(LibPath.length() > 0)
       CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp
index 3249d4f1f7f6e..2344a69adb962 100644
--- a/clang/lib/Driver/ToolChains/Fuchsia.cpp
+++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp
@@ -172,21 +172,16 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
     getFilePaths().push_back(P.str());
   }
 
-  auto RuntimeDirs = [&](const Multilib &M) -> std::vector<std::string> {
-    SmallString<128> P;
-    std::vector<std::string> RD;
-
-    P.assign(D.ResourceDir);
-    llvm::sys::path::append(P, D.getTargetTriple(), "lib", M.gccSuffix());
-    if (getVFS().exists(P))
-      RD.push_back(P.str());
-
-    P.assign(D.ResourceDir);
-    llvm::sys::path::append(P, Triple.str(), "lib", M.gccSuffix());
-    if (getVFS().exists(P))
-      RD.push_back(P.str());
-
-    return RD;
+  auto FilePaths = [&](const Multilib &M) -> std::vector<std::string> {
+    std::vector<std::string> FP;
+    if (D.CCCIsCXX()) {
+      if (auto CXXStdlibPath = getCXXStdlibPath()) {
+        SmallString<128> P(*CXXStdlibPath);
+        llvm::sys::path::append(P, M.gccSuffix());
+        FP.push_back(P.str());
+      }
+    }
+    return FP;
   };
 
   Multilibs.push_back(Multilib());
@@ -198,7 +193,7 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
   Multilibs.push_back(Multilib("asan", {}, {}, 2)
                           .flag("+fsanitize=address"));
   Multilibs.FilterOut([&](const Multilib &M) {
-    std::vector<std::string> RD = RuntimeDirs(M);
+    std::vector<std::string> RD = FilePaths(M);
     return std::all_of(RD.begin(), RD.end(), [&](std::string P) {
       return !getVFS().exists(P);
     });
@@ -209,14 +204,14 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
       Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true),
       "fexceptions", Flags);
   addMultilibFlag(getSanitizerArgs().needsAsanRt(), "fsanitize=address", Flags);
-  Multilibs.setFilePathsCallback(RuntimeDirs);
+  Multilibs.setFilePathsCallback(FilePaths);
 
   if (Multilibs.select(Flags, SelectedMultilib))
     if (!SelectedMultilib.isDefault())
       if (const auto &PathsCallback = Multilibs.filePathsCallback())
         for (const auto &Path : PathsCallback(SelectedMultilib))
-          // We need to prepend the multilib path to ensure it takes precedence.
-          getLibraryPaths().insert(getLibraryPaths().begin(), Path);
+          // Prepend the multilib path to ensure it takes the precedence.
+          getFilePaths().insert(getFilePaths().begin(), Path);
 }
 
 std::string Fuchsia::ComputeEffectiveClangTriple(const ArgList &Args,
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/asan/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/bin/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/asan/.keep
rename to clang/test/Driver/Inputs/basic_fuchsia_tree/bin/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/noexcept/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan/libc++.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/noexcept/.keep
rename to clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan/libc++.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/asan/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/libc++.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/asan/.keep
rename to clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/libc++.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/noexcept/.keep b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/noexcept/libc++.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/noexcept/.keep
rename to clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/noexcept/libc++.so
diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan/libc++.so
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/libc++.so
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/noexcept/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/noexcept/libc++.so
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c
index a012654946636..7147f2f45ce9d 100644
--- a/clang/test/Driver/fuchsia.c
+++ b/clang/test/Driver/fuchsia.c
@@ -93,8 +93,6 @@
 // CHECK-ASAN-X86: "-fsanitize=address"
 // CHECK-ASAN-X86: "-fsanitize-address-globals-dead-stripping"
 // CHECK-ASAN-X86: "-dynamic-linker" "asan/ld.so.1"
-// CHECK-ASAN-X86: "-L[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}asan"
-// CHECK-ASAN-X86: "-L[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib"
 // CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan.so"
 // CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan-preinit.a"
 
@@ -107,8 +105,6 @@
 // CHECK-ASAN-AARCH64: "-fsanitize=address"
 // CHECK-ASAN-AARCH64: "-fsanitize-address-globals-dead-stripping"
 // CHECK-ASAN-AARCH64: "-dynamic-linker" "asan/ld.so.1"
-// CHECK-ASAN-AARCH64: "-L[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}asan"
-// CHECK-ASAN-AARCH64: "-L[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib"
 // CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan.so"
 // CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan-preinit.a"
 
diff --git a/clang/test/Driver/fuchsia.cpp b/clang/test/Driver/fuchsia.cpp
index a6d9b8e343cc2..13c270de8e0b2 100644
--- a/clang/test/Driver/fuchsia.cpp
+++ b/clang/test/Driver/fuchsia.cpp
@@ -1,4 +1,5 @@
 // RUN: %clangxx %s -### -no-canonical-prefixes --target=x86_64-fuchsia \
+// RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     --sysroot=%S/platform -fuse-ld=lld 2>&1 | FileCheck %s
 // CHECK: {{.*}}clang{{.*}}" "-cc1"
@@ -44,29 +45,33 @@
 // CHECK-STATIC: "--pop-state"
 // CHECK-STATIC: "-lc"
 
-// RUN: %clang %s -### --target=x86_64-fuchsia -nostdlib++ -fuse-ld=lld 2>&1 \
+// RUN: %clangxx %s -### --target=x86_64-fuchsia -nostdlib++ -fuse-ld=lld 2>&1 \
 // RUN:     | FileCheck %s -check-prefix=CHECK-NOSTDLIBXX
 // CHECK-NOSTDLIBXX-NOT: "-lc++"
 // CHECK-NOSTDLIBXX-NOT: "-lm"
 // CHECK-NOSTDLIBXX: "-lc"
 
-// RUN: %clang %s -### --target=x86_64-fuchsia \
+// RUN: %clangxx %s -### --target=x86_64-fuchsia \
+// RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=lld 2>&1\
 // RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86
-// RUN: %clang %s -### --target=x86_64-fuchsia -fsanitize=address \
+// RUN: %clangxx %s -### --target=x86_64-fuchsia -fsanitize=address \
+// RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=lld 2>&1\
 // RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-X86
-// RUN: %clang %s -### --target=x86_64-fuchsia -fno-exceptions \
+// RUN: %clangxx %s -### --target=x86_64-fuchsia -fno-exceptions \
+// RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=lld 2>&1\
 // RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-NOEXCEPT-X86
-// RUN: %clang %s -### --target=x86_64-fuchsia -fsanitize=address -fno-exceptions \
+// RUN: %clangxx %s -### --target=x86_64-fuchsia -fsanitize=address -fno-exceptions \
+// RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=lld 2>&1\
 // RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-X86
 // CHECK-MULTILIB-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-// CHECK-MULTILIB-ASAN-X86: "-L[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}asan"
-// CHECK-MULTILIB-NOEXCEPT-X86: "-L[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}noexcept"
-// CHECK-MULTILIB-X86: "-L[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib"
+// CHECK-MULTILIB-ASAN-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}asan"
+// CHECK-MULTILIB-NOEXCEPT-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}noexcept"
+// CHECK-MULTILIB-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++"
diff --git a/clang/test/Driver/linux-per-target-runtime-dir.c b/clang/test/Driver/linux-per-target-runtime-dir.c
index 3d52342c394ec..97b433b296d15 100644
--- a/clang/test/Driver/linux-per-target-runtime-dir.c
+++ b/clang/test/Driver/linux-per-target-runtime-dir.c
@@ -13,7 +13,6 @@
 // CHECK-PER-TARGET-RUNTIME: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 // CHECK-PER-TARGET-RUNTIME: "--sysroot=[[SYSROOT]]"
 // CHECK-PER-TARGET-RUNTIME: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-linux-gnu{{/|\\\\}}c++"
-// CHECK-PER-TARGET-RUNTIME: "-L[[RESDIR]]{{/|\\\\}}x86_64-linux-gnu{{/|\\\\}}lib"
 
 // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name 2>&1 \
 // RUN:     --target=x86_64-linux-gnu \

From f29120658b2492fb6a7f2d16abf2b14334435682 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 26 May 2019 07:43:45 +0000
Subject: [PATCH 0242/1176] [Driver][RISCV] Simplify. NFC

llvm-svn: 361710
---
 clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 44 +++++++---------------
 1 file changed, 13 insertions(+), 31 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index b5cee381e1f74..e74fe13e2d495 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -54,20 +54,14 @@ static bool isSupportedExtension(StringRef Ext) {
 static bool getExtensionVersion(const Driver &D, StringRef MArch,
                                 StringRef Ext, StringRef In,
                                 std::string &Major, std::string &Minor) {
-  auto I = In.begin();
-  auto E = In.end();
-
-  while (I != E && isDigit(*I))
-    Major.append(1, *I++);
-
+  Major = In.take_while(isDigit);
+  In = In.substr(Major.size());
   if (Major.empty())
     return true;
 
-  if (I != E && *I == 'p') {
-    ++I;
-
-    while (I != E && isDigit(*I))
-      Minor.append(1, *I++);
+  if (In.consume_front("p")) {
+    Minor = In.take_while(isDigit);
+    In = In.substr(Major.size());
 
     // Expected 'p' to be followed by minor version number.
     if (Minor.empty()) {
@@ -110,17 +104,13 @@ static void getExtensionFeatures(const Driver &D,
   SmallVector<StringRef, 8> Split;
   Exts.split(Split, StringRef("_"));
 
-  SmallVector<StringRef, 3> Prefix;
-  Prefix.push_back("x");
-  Prefix.push_back("s");
-  Prefix.push_back("sx");
+  SmallVector<StringRef, 3> Prefix{"x", "s", "sx"};
   auto I = Prefix.begin();
   auto E = Prefix.end();
 
   SmallVector<StringRef, 8> AllExts;
 
   for (StringRef Ext : Split) {
-
     if (Ext.empty()) {
       D.Diag(diag::err_drv_invalid_riscv_arch_name) << MArch
         << "extension name missing after separator '_'";
@@ -205,11 +195,9 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const ArgList &Args,
     StringRef MArch = A->getValue();
 
     // RISC-V ISA strings must be lowercase.
-    if (std::any_of(std::begin(MArch), std::end(MArch),
-                    [](char c) { return isupper(c); })) {
-
-      D.Diag(diag::err_drv_invalid_riscv_arch_name) << MArch
-        << "string must be lowercase";
+    if (llvm::any_of(MArch, [](char c) { return isupper(c); })) {
+      D.Diag(diag::err_drv_invalid_riscv_arch_name)
+          << MArch << "string must be lowercase";
       return;
     }
 
@@ -221,7 +209,7 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const ArgList &Args,
       return;
     }
 
-    bool HasRV64 = MArch.startswith("rv64") ? true : false;
+    bool HasRV64 = MArch.startswith("rv64");
 
     // The canonical order specified in ISA manual.
     // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
@@ -365,16 +353,10 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const ArgList &Args,
   }
 
   // -mrelax is default, unless -mno-relax is specified.
-  bool Relax = true;
-  if (auto *A = Args.getLastArg(options::OPT_mrelax, options::OPT_mno_relax)) {
-    if (A->getOption().matches(options::OPT_mno_relax)) {
-      Relax = false;
-      Features.push_back("-relax");
-    }
-  }
-
-  if (Relax)
+  if (Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true))
     Features.push_back("+relax");
+  else
+    Features.push_back("-relax");
 
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.

From 603ca511f9169df81e6332721a0f37ab30160df7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 26 May 2019 08:31:00 +0000
Subject: [PATCH 0243/1176] [PowerPC] Add missing R_PPC_* relocation types

While people mostly care about 64-bit, some systems need basic lib32
support. The plan is to make lld (see PR40888) capable of linking some
applications (PR40888).

llvm-svn: 361711
---
 .../llvm/BinaryFormat/ELFRelocs/PowerPC.def   | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
index e4f8ee0ebe2b8..7041896681cc7 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
@@ -27,9 +27,25 @@
 #undef R_PPC_GOT16_HI
 #undef R_PPC_GOT16_HA
 #undef R_PPC_PLTREL24
+#undef R_PPC_COPY
+#undef R_PPC_GLOB_DAT
 #undef R_PPC_JMP_SLOT
+#undef R_PPC_RELATIVE
 #undef R_PPC_LOCAL24PC
+#undef R_PPC_UADDR32
+#undef R_PPC_UADDR16
 #undef R_PPC_REL32
+#undef R_PPC_PLT32
+#undef R_PPC_PLTREL32
+#undef R_PPC_PLT16_LO
+#undef R_PPC_PLT16_HI
+#undef R_PPC_PLT16_HA
+#undef R_PPC_SDAREL16
+#undef R_PPC_SECTOFF
+#undef R_PPC_SECTOFF_LO
+#undef R_PPC_SECTOFF_HI
+#undef R_PPC_SECTOFF_HA
+#undef R_PPC_ADDR30
 #undef R_PPC_TLS
 #undef R_PPC_DTPMOD32
 #undef R_PPC_TPREL16
@@ -84,9 +100,25 @@ ELF_RELOC(R_PPC_GOT16_LO,               15)
 ELF_RELOC(R_PPC_GOT16_HI,               16)
 ELF_RELOC(R_PPC_GOT16_HA,               17)
 ELF_RELOC(R_PPC_PLTREL24,               18)
+ELF_RELOC(R_PPC_COPY,                   19)
+ELF_RELOC(R_PPC_GLOB_DAT,               20)
 ELF_RELOC(R_PPC_JMP_SLOT,               21)
+ELF_RELOC(R_PPC_RELATIVE,               22)
 ELF_RELOC(R_PPC_LOCAL24PC,              23)
+ELF_RELOC(R_PPC_UADDR32,                24)
+ELF_RELOC(R_PPC_UADDR16,                25)
 ELF_RELOC(R_PPC_REL32,                  26)
+ELF_RELOC(R_PPC_PLT32,                  27)
+ELF_RELOC(R_PPC_PLTREL32,               28)
+ELF_RELOC(R_PPC_PLT16_LO,               29)
+ELF_RELOC(R_PPC_PLT16_HI,               30)
+ELF_RELOC(R_PPC_PLT16_HA,               31)
+ELF_RELOC(R_PPC_SDAREL16,               32)
+ELF_RELOC(R_PPC_SECTOFF,                33)
+ELF_RELOC(R_PPC_SECTOFF_LO,             34)
+ELF_RELOC(R_PPC_SECTOFF_HI,             35)
+ELF_RELOC(R_PPC_SECTOFF_HA,             36)
+ELF_RELOC(R_PPC_ADDR30,                 37)
 ELF_RELOC(R_PPC_TLS,                    67)
 ELF_RELOC(R_PPC_DTPMOD32,               68)
 ELF_RELOC(R_PPC_TPREL16,                69)

From 1c1e2ca02216d957ddccf9aea85d70e4db8c0405 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 10:06:40 +0000
Subject: [PATCH 0244/1176] [ARM] Add some base fullfp16 tests. NFC

llvm-svn: 361712
---
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 458 +++++++++++++++++++++++++
 1 file changed, 458 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/fp16-fullfp16.ll

diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
new file mode 100644
index 0000000000000..5f14463bcee88
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 -asm-verbose=false < %s | FileCheck %s
+
+define void @test_fadd(half* %p, half* %q) {
+; CHECK-LABEL: test_fadd:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vadd.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = fadd half %a, %b
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_fsub(half* %p, half* %q) {
+; CHECK-LABEL: test_fsub:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vsub.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = fsub half %a, %b
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_fmul(half* %p, half* %q) {
+; CHECK-LABEL: test_fmul:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vmul.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = fmul half %a, %b
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_fdiv(half* %p, half* %q) {
+; CHECK-LABEL: test_fdiv:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vdiv.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = fdiv half %a, %b
+  store half %r, half* %p
+  ret void
+}
+
+; FIXME
+;define void @test_frem(half* %p, half* %q) {
+;  %a = load half, half* %p, align 2
+;  %b = load half, half* %q, align 2
+;  %r = frem half %a, %b
+;  store half %r, half* %p
+;  ret void
+;}
+
+define void @test_load_store(half* %p, half* %q) {
+; CHECK-LABEL: test_load_store:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vstr.16 s0, [r1]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  store half %a, half* %q
+  ret void
+}
+
+define i32 @test_fptosi_i32(half* %p) {
+; CHECK-LABEL: test_fptosi_i32:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vcvt.s32.f16 s0, s0
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = fptosi half %a to i32
+  ret i32 %r
+}
+
+; FIXME
+;define i64 @test_fptosi_i64(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = fptosi half %a to i64
+;  ret i64 %r
+;}
+
+define i32 @test_fptoui_i32(half* %p) {
+; CHECK-LABEL: test_fptoui_i32:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vcvt.u32.f16 s0, s0
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = fptoui half %a to i32
+  ret i32 %r
+}
+
+; FIXME
+;define i64 @test_fptoui_i64(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = fptoui half %a to i64
+;  ret i64 %r
+;}
+
+define void @test_sitofp_i32(i32 %a, half* %p) {
+; CHECK-LABEL: test_sitofp_i32:
+; CHECK:         vmov s0, r0
+; CHECK-NEXT:    vcvt.f16.s32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r1]
+; CHECK-NEXT:    bx lr
+  %r = sitofp i32 %a to half
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_uitofp_i32(i32 %a, half* %p) {
+; CHECK-LABEL: test_uitofp_i32:
+; CHECK:         vmov s0, r0
+; CHECK-NEXT:    vcvt.f16.u32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r1]
+; CHECK-NEXT:    bx lr
+  %r = uitofp i32 %a to half
+  store half %r, half* %p
+  ret void
+}
+
+; FIXME
+;define void @test_sitofp_i64(i64 %a, half* %p) {
+;  %r = sitofp i64 %a to half
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_uitofp_i64(i64 %a, half* %p) {
+;  %r = uitofp i64 %a to half
+;  store half %r, half* %p
+;  ret void
+;}
+
+define void @test_fptrunc_float(float %f, half* %p) {
+; CHECK-LABEL: test_fptrunc_float:
+; CHECK:         vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = fptrunc float %f to half
+  store half %a, half* %p
+  ret void
+}
+
+define void @test_fptrunc_double(double %d, half* %p) {
+; CHECK-LABEL: test_fptrunc_double:
+; CHECK:         vcvtb.f16.f64 s0, d0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = fptrunc double %d to half
+  store half %a, half* %p
+  ret void
+}
+
+define float @test_fpextend_float(half* %p) {
+; CHECK-LABEL: test_fpextend_float:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = fpext half %a to float
+  ret float %r
+}
+
+define double @test_fpextend_double(half* %p) {
+; CHECK-LABEL: test_fpextend_double:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vcvtb.f64.f16 d0, s0
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = fpext half %a to double
+  ret double %r
+}
+
+define i16 @test_bitcast_halftoi16(half* %p) {
+; CHECK-LABEL: test_bitcast_halftoi16:
+; CHECK:         ldrh r0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = bitcast half %a to i16
+  ret i16 %r
+}
+
+define void @test_bitcast_i16tohalf(i16 %a, half* %p) {
+; CHECK-LABEL: test_bitcast_i16tohalf:
+; CHECK:         strh r0, [r1]
+; CHECK-NEXT:    bx lr
+  %r = bitcast i16 %a to half
+  store half %r, half* %p
+  ret void
+}
+
+; FIXME
+;define void @test_sqrt(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.sqrt.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_fpowi(half* %p, i32 %b) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.powi.f16(half %a, i32 %b)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_sin(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.sin.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_cos(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.cos.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_pow(half* %p, half* %q) {
+;  %a = load half, half* %p, align 2
+;  %b = load half, half* %q, align 2
+;  %r = call half @llvm.pow.f16(half %a, half %b)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_exp(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.exp.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_exp2(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.exp2.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_log(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.log.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_log10(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.log10.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_log2(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.log2.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_fma(half* %p, half* %q, half* %r) {
+;  %a = load half, half* %p, align 2
+;  %b = load half, half* %q, align 2
+;  %c = load half, half* %r, align 2
+;  %v = call half @llvm.fma.f16(half %a, half %b, half %c)
+;  store half %v, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_fabs(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.fabs.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+define void @test_minnum(half* %p, half* %q) {
+; CHECK-LABEL: test_minnum:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vminnm.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = call half @llvm.minnum.f16(half %a, half %b)
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_maxnum(half* %p, half* %q) {
+; CHECK-LABEL: test_maxnum:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vmaxnm.f16 s0, s2, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = call half @llvm.maxnum.f16(half %a, half %b)
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_minimum(half* %p) {
+; CHECK-LABEL: test_minimum:
+; CHECK:         vldr.16 s2, [r0]
+; CHECK-NEXT:    vmov.f16 s0, #1.000000e+00
+; CHECK-NEXT:    vmin.f16 d0, d1, d0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %c = fcmp ult half %a, 1.0
+  %r = select i1 %c, half %a, half 1.0
+  store half %r, half* %p
+  ret void
+}
+
+define void @test_maximum(half* %p) {
+; CHECK-LABEL: test_maximum:
+; CHECK:         vldr.16 s2, [r0]
+; CHECK-NEXT:    vmov.f16 s0, #1.000000e+00
+; CHECK-NEXT:    vmax.f16 d0, d1, d0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %c = fcmp ugt half %a, 1.0
+  %r = select i1 %c, half %a, half 1.0
+  store half %r, half* %p
+  ret void
+}
+
+; FIXME
+;define void @test_copysign(half* %p, half* %q) {
+;  %a = load half, half* %p, align 2
+;  %b = load half, half* %q, align 2
+;  %r = call half @llvm.copysign.f16(half %a, half %b)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_floor(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.floor.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_ceil(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.ceil.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_trunc(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.trunc.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_rint(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.rint.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_nearbyint(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.nearbyint.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+; FIXME
+;define void @test_round(half* %p) {
+;  %a = load half, half* %p, align 2
+;  %r = call half @llvm.round.f16(half %a)
+;  store half %r, half* %p
+;  ret void
+;}
+
+define void @test_fmuladd(half* %p, half* %q, half* %r) {
+; CHECK-LABEL: test_fmuladd:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vldr.16 s4, [r2]
+; CHECK-NEXT:    vmla.f16 s4, s2, s0
+; CHECK-NEXT:    vstr.16 s4, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %c = load half, half* %r, align 2
+  %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+  store half %v, half* %p
+  ret void
+}
+
+declare half @llvm.sqrt.f16(half %a)
+declare half @llvm.powi.f16(half %a, i32 %b)
+declare half @llvm.sin.f16(half %a)
+declare half @llvm.cos.f16(half %a)
+declare half @llvm.pow.f16(half %a, half %b)
+declare half @llvm.exp.f16(half %a)
+declare half @llvm.exp2.f16(half %a)
+declare half @llvm.log.f16(half %a)
+declare half @llvm.log10.f16(half %a)
+declare half @llvm.log2.f16(half %a)
+declare half @llvm.fma.f16(half %a, half %b, half %c)
+declare half @llvm.fabs.f16(half %a)
+declare half @llvm.minnum.f16(half %a, half %b)
+declare half @llvm.maxnum.f16(half %a, half %b)
+declare half @llvm.copysign.f16(half %a, half %b)
+declare half @llvm.floor.f16(half %a)
+declare half @llvm.ceil.f16(half %a)
+declare half @llvm.trunc.f16(half %a)
+declare half @llvm.rint.f16(half %a)
+declare half @llvm.nearbyint.f16(half %a)
+declare half @llvm.round.f16(half %a)
+declare half @llvm.fmuladd.f16(half %a, half %b, half %c)

From caf8a11b656afdaf12d0a3566c7d42e302bd7539 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 10:30:22 +0000
Subject: [PATCH 0245/1176] [ARM] Promote fp16 frem

Promote fp16 frem operations on ARM to floats so they call fmodf.

Differential Revision: https://reviews.llvm.org/D62321

llvm-svn: 361713
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  5 +++++
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll  | 27 +++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 643d2806c521e..8e5c0767358c5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1142,6 +1142,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     }
   }
 
+  // FP16 often need to be promoted to call lib functions
+  if (Subtarget->hasFullFP16()) {
+    setOperationAction(ISD::FREM, MVT::f16, Promote);
+  }
+
   if (Subtarget->hasNEON()) {
     // vmin and vmax aren't available in a scalar form, so we use
     // a NEON instruction with an undef lane instead.
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 5f14463bcee88..d5ded1febe413 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -57,14 +57,25 @@ define void @test_fdiv(half* %p, half* %q) {
   ret void
 }
 
-; FIXME
-;define void @test_frem(half* %p, half* %q) {
-;  %a = load half, half* %p, align 2
-;  %b = load half, half* %q, align 2
-;  %r = frem half %a, %b
-;  store half %r, half* %p
-;  ret void
-;}
+define arm_aapcs_vfpcc void @test_frem(half* %p, half* %q) {
+; CHECK-LABEL: test_frem:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s2, [r1]
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    vcvtb.f32.f16 s1, s2
+; CHECK-NEXT:    bl fmodf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = frem half %a, %b
+  store half %r, half* %p
+  ret void
+}
 
 define void @test_load_store(half* %p, half* %q) {
 ; CHECK-LABEL: test_load_store:

From aeade651f35251602b8367e6715f0ac444bf94ba Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 10:42:24 +0000
Subject: [PATCH 0246/1176] [ARM] Select fp16 fsqrt

This adds a pattern for the sqrt intrinsic, the same as float and double.

Differential Revision: https://reviews.llvm.org/D62322

llvm-svn: 361714
---
 llvm/lib/Target/ARM/ARMInstrVFP.td     |  4 ++--
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 4478fce6ef904..17f954ef7ddb6 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1023,9 +1023,9 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
              Sched<[WriteFPSQRT32]>;
 
 def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sm),
                   IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
-                  []>;
+                  [(set HPR:$Sd, (fsqrt (f16 HPR:$Sm)))]>;
 
 let hasSideEffects = 0 in {
 let isMoveReg = 1 in {
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index d5ded1febe413..e4a8fa75504fa 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -217,13 +217,17 @@ define void @test_bitcast_i16tohalf(i16 %a, half* %p) {
   ret void
 }
 
-; FIXME
-;define void @test_sqrt(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.sqrt.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_sqrt(half* %p) {
+; CHECK-LABEL: test_sqrt:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vsqrt.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.sqrt.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
 ; FIXME
 ;define void @test_fpowi(half* %p, i32 %b) {

From 2881325b17af7b83037fef5436ea3f0dd412f4d7 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 10:51:58 +0000
Subject: [PATCH 0247/1176] [ARM] Select fp16 fabs

This adds a pattern for the fabs intrinsic, the same as float and double.

Differential Revision: https://reviews.llvm.org/D62324

llvm-svn: 361715
---
 llvm/lib/Target/ARM/ARMInstrVFP.td     |  4 ++--
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 17f954ef7ddb6..1f497f6d5bff8 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -595,9 +595,9 @@ def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
 }
 
 def VABSH  : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
-                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sm),
                    IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
-                   []>;
+                   [(set HPR:$Sd, (fabs (f16 HPR:$Sm)))]>;
 
 let Defs = [FPSCR_NZCV] in {
 def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index e4a8fa75504fa..7e8369be740f8 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -312,13 +312,17 @@ define void @test_sqrt(half* %p) {
 ;  ret void
 ;}
 
-; FIXME
-;define void @test_fabs(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.fabs.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_fabs(half* %p) {
+; CHECK-LABEL: test_fabs:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vabs.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.fabs.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
 define void @test_minnum(half* %p, half* %q) {
 ; CHECK-LABEL: test_minnum:

From 58a8541dcc3fb72803ac828b7730c4bd0fc5bf30 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 10:54:23 +0000
Subject: [PATCH 0248/1176] [X86][AVX] combineBitcastvxi1 - peek through bitops
 to determine size of original vector

We were only testing for direct SETCC results - this allows us to peek through AND/OR/XOR combinations of the comparison results as well.

There's a missing SEXT(PACKSS) fold that I need to investigate for v8i1 cases before I can enable it there as well.

llvm-svn: 361716
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 20 +++++++++++--
 .../test/CodeGen/X86/bitcast-and-setcc-256.ll | 28 ++++++++-----------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 170e3cf33ba04..8b6edaa50ba6a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34126,6 +34126,21 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                      EltNo);
 }
 
+// Helper to peek through bitops/setcc to determine size of source vector.
+// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
+static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
+  switch (Src.getOpcode()) {
+  case ISD::SETCC:
+    return Src.getOperand(0).getValueSizeInBits() == Size;
+  case ISD::AND:
+  case ISD::XOR:
+  case ISD::OR:
+    return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
+           checkBitcastSrcVectorSize(Src.getOperand(1), Size);
+  }
+  return false;
+}
+
 // Try to match patterns such as
 // (i16 bitcast (v16i1 x))
 // ->
@@ -34174,10 +34189,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
     SExtVT = MVT::v4i32;
     // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
     // sign-extend to a 256-bit operation to avoid truncation.
-    if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
-        Src.getOperand(0).getValueType().is256BitVector()) {
+    if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
       SExtVT = MVT::v4i64;
-    }
     break;
   case MVT::v8i1:
     SExtVT = MVT::v8i16;
@@ -34186,6 +34199,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
     // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
     // 256-bit because the shuffle is cheaper than sign extending the result of
     // the compare.
+    // TODO : use checkBitcastSrcVectorSize
     if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
         (Src.getOperand(0).getValueType().is256BitVector() ||
          Src.getOperand(0).getValueType().is512BitVector())) {
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
index 85ae7c0c42101..b982cde2a957b 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll
@@ -55,18 +55,18 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
 ;
 ; AVX1-LABEL: v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm4
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm1
-; AVX1-NEXT:    vpand %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vmovmskps %xmm0, %eax
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vandpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -76,9 +76,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
 ; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm1
 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vmovmskps %xmm0, %eax
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -126,9 +124,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
 ; AVX12-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
 ; AVX12-NEXT:    vcmpltpd %ymm2, %ymm3, %ymm1
 ; AVX12-NEXT:    vandpd %ymm1, %ymm0, %ymm0
-; AVX12-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX12-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX12-NEXT:    vmovmskps %xmm0, %eax
+; AVX12-NEXT:    vmovmskpd %ymm0, %eax
 ; AVX12-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX12-NEXT:    vzeroupper
 ; AVX12-NEXT:    retq

From c9f4b7d201cce6343939f741ffdb5f622b6768b0 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 10:59:21 +0000
Subject: [PATCH 0249/1176] [ARM] Promote various fp16 math intrinsics

Promote a number of fp16 math intrinsics to float, so that the relevant float
math routines can be used. Copysign is expanded so as to be handled in-place.

Differential Revision: https://reviews.llvm.org/D62325

llvm-svn: 361717
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  11 ++
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll  | 243 +++++++++++++++++-------
 2 files changed, 182 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8e5c0767358c5..24e67010908e8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1145,6 +1145,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   // FP16 often need to be promoted to call lib functions
   if (Subtarget->hasFullFP16()) {
     setOperationAction(ISD::FREM, MVT::f16, Promote);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+    setOperationAction(ISD::FSIN, MVT::f16, Promote);
+    setOperationAction(ISD::FCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+    setOperationAction(ISD::FPOW, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
   }
 
   if (Subtarget->hasNEON()) {
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 7e8369be740f8..5584e7ef88c83 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -229,78 +229,162 @@ define void @test_sqrt(half* %p) {
   ret void
 }
 
-; FIXME
-;define void @test_fpowi(half* %p, i32 %b) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.powi.f16(half %a, i32 %b)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_fpowi(half* %p, i32 %b) {
+; CHECK-LABEL: test_fpowi:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl __powisf2
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.powi.f16(half %a, i32 %b)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_sin(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.sin.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_sin(half* %p) {
+; CHECK-LABEL: test_sin:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.sin.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_cos(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.cos.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_cos(half* %p) {
+; CHECK-LABEL: test_cos:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl cosf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.cos.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_pow(half* %p, half* %q) {
-;  %a = load half, half* %p, align 2
-;  %b = load half, half* %q, align 2
-;  %r = call half @llvm.pow.f16(half %a, half %b)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_pow(half* %p, half* %q) {
+; CHECK-LABEL: test_pow:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s2, [r1]
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    vcvtb.f32.f16 s1, s2
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = call half @llvm.pow.f16(half %a, half %b)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_exp(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.exp.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_exp(half* %p) {
+; CHECK-LABEL: test_exp:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl expf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.exp.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_exp2(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.exp2.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_exp2(half* %p) {
+; CHECK-LABEL: test_exp2:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl exp2f
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.exp2.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_log(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.log.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_log(half* %p) {
+; CHECK-LABEL: test_log:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl logf
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.log.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_log10(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.log10.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_log10(half* %p) {
+; CHECK-LABEL: test_log10:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl log10f
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.log10.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_log2(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.log2.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_log2(half* %p) {
+; CHECK-LABEL: test_log2:
+; CHECK:         .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vcvtb.f32.f16 s0, s0
+; CHECK-NEXT:    bl log2f
+; CHECK-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r4]
+; CHECK-NEXT:    pop {r4, pc}
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.log2.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
 ; FIXME
 ;define void @test_fma(half* %p, half* %q, half* %r) {
@@ -380,14 +464,29 @@ define void @test_maximum(half* %p) {
   ret void
 }
 
-; FIXME
-;define void @test_copysign(half* %p, half* %q) {
-;  %a = load half, half* %p, align 2
-;  %b = load half, half* %q, align 2
-;  %r = call half @llvm.copysign.f16(half %a, half %b)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_copysign(half* %p, half* %q) {
+; CHECK-LABEL: test_copysign:
+; CHECK:         .pad #4
+; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    vldr.16 s0, [r1]
+; CHECK-NEXT:    vstr.16 s0, [sp]
+; CHECK-NEXT:    vldr.16 s0, [r0]
+; CHECK-NEXT:    ldrb r1, [sp, #1]
+; CHECK-NEXT:    ands r1, r1, #128
+; CHECK-NEXT:    vabs.f16 s0, s0
+; CHECK-NEXT:    movwne r1, #1
+; CHECK-NEXT:    vneg.f16 s2, s0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    add sp, sp, #4
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %r = call half @llvm.copysign.f16(half %a, half %b)
+  store half %r, half* %p
+  ret void
+}
 
 ; FIXME
 ;define void @test_floor(half* %p) {

From 21542cd6f4c62004dfa24b56766d9cd135eb98cd Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 11:13:00 +0000
Subject: [PATCH 0250/1176] [ARM] Select a number of fp16 rounding functions

This add patterns for fp16 round and ceil etc. Same as the float and double
patterns.

Differential Revision: https://reviews.llvm.org/D62326

llvm-svn: 361718
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp    |   2 +
 llvm/lib/Target/ARM/ARMInstrVFP.td         |   8 +-
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll     | 108 +++++++++++++--------
 llvm/test/CodeGen/ARM/fp16-instructions.ll |  13 ---
 4 files changed, 72 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 24e67010908e8..677e4d5b2e8b7 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1156,6 +1156,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FLOG, MVT::f16, Promote);
     setOperationAction(ISD::FLOG10, MVT::f16, Promote);
     setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+
+    setOperationAction(ISD::FROUND, MVT::f16, Legal);
   }
 
   if (Subtarget->hasNEON()) {
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 1f497f6d5bff8..e3d2a94778801 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -930,9 +930,9 @@ def VNEGH  : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
 
 multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
   def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
-               (outs SPR:$Sd), (ins SPR:$Sm),
+               (outs HPR:$Sd), (ins HPR:$Sm),
                NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
-               []>,
+               [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                Requires<[HasFullFP16]> {
     let Inst{7} = op2;
     let Inst{16} = op;
@@ -975,9 +975,9 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
   let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
       isUnpredicable = 1 in {
     def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
-                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sm),
                    NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
-                   []>,
+                   [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
                    Requires<[HasFullFP16]> {
       let Inst{17-16} = rm;
     }
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 5584e7ef88c83..19afba05db645 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -488,53 +488,77 @@ define void @test_copysign(half* %p, half* %q) {
   ret void
 }
 
-; FIXME
-;define void @test_floor(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.floor.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_floor(half* %p) {
+; CHECK-LABEL: test_floor:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintm.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.floor.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_ceil(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.ceil.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_ceil(half* %p) {
+; CHECK-LABEL: test_ceil:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintp.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.ceil.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_trunc(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.trunc.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_trunc(half* %p) {
+; CHECK-LABEL: test_trunc:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintz.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.trunc.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_rint(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.rint.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_rint(half* %p) {
+; CHECK-LABEL: test_rint:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintx.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.rint.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_nearbyint(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.nearbyint.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_nearbyint(half* %p) {
+; CHECK-LABEL: test_nearbyint:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrintr.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.nearbyint.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
-; FIXME
-;define void @test_round(half* %p) {
-;  %a = load half, half* %p, align 2
-;  %r = call half @llvm.round.f16(half %a)
-;  store half %r, half* %p
-;  ret void
-;}
+define void @test_round(half* %p) {
+; CHECK-LABEL: test_round:
+; CHECK:         vldr.16 s0, [r0]
+; CHECK-NEXT:    vrinta.f16 s0, s0
+; CHECK-NEXT:    vstr.16 s0, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %r = call half @llvm.round.f16(half %a)
+  store half %r, half* %p
+  ret void
+}
 
 define void @test_fmuladd(half* %p, half* %q, half* %r) {
 ; CHECK-LABEL: test_fmuladd:
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index ef0d4834f5aa1..514d3c7ae0a81 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -43,8 +43,6 @@ entry:
 ; CHECK-HARDFP-FULLFP16:  {{.*}} lr
 }
 
-; 1. VABS: TODO
-
 ; 2. VADD
 define float @Add(float %a.coerce, float %b.coerce) {
 entry:
@@ -691,15 +689,6 @@ entry:
 ; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
 }
 
-; TODO:
-; 28. VRINTA
-; 29. VRINTM
-; 30. VRINTN
-; 31. VRINTP
-; 32. VRINTR
-; 33. VRINTX
-; 34. VRINTZ
-
 ; 35. VSELEQ
 define half @select_cc1(half* %a0)  {
   %1 = load half, half* %a0
@@ -955,8 +944,6 @@ entry:
 ; CHECK-SOFTFP-FP16-T32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
 }
 
-; 39. VSQRT - TODO
-
 ; 40. VSUB
 define float @Sub(float %a.coerce, float %b.coerce) {
 entry:

From 0dbafe191e5dd3f74dcbb54a126d62e3b3266820 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sun, 26 May 2019 11:34:30 +0000
Subject: [PATCH 0251/1176] [ARM] Select fp16 fma

This adds a pattern for fma, similar to the float and double patterns.

Differential Revision: https://reviews.llvm.org/D62330

llvm-svn: 361719
---
 llvm/lib/Target/ARM/ARMInstrVFP.td     |  3 +++
 llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 24 +++++++++++++++---------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index e3d2a94778801..d1b32f531c05f 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2073,6 +2073,9 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
 def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
           (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
       Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, HPR:$Sdin)),
+          (VFMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+      Requires<[HasFullFP16]>;
 
 def VFMSD : ADbI<0b11101, 0b10, 1, 0,
                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
index 19afba05db645..a30b62acbac43 100644
--- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll
@@ -386,15 +386,21 @@ define void @test_log2(half* %p) {
   ret void
 }
 
-; FIXME
-;define void @test_fma(half* %p, half* %q, half* %r) {
-;  %a = load half, half* %p, align 2
-;  %b = load half, half* %q, align 2
-;  %c = load half, half* %r, align 2
-;  %v = call half @llvm.fma.f16(half %a, half %b, half %c)
-;  store half %v, half* %p
-;  ret void
-;}
+define void @test_fma(half* %p, half* %q, half* %r) {
+; CHECK-LABEL: test_fma:
+; CHECK:         vldr.16 s0, [r1]
+; CHECK-NEXT:    vldr.16 s2, [r0]
+; CHECK-NEXT:    vldr.16 s4, [r2]
+; CHECK-NEXT:    vfma.f16 s4, s2, s0
+; CHECK-NEXT:    vstr.16 s4, [r0]
+; CHECK-NEXT:    bx lr
+  %a = load half, half* %p, align 2
+  %b = load half, half* %q, align 2
+  %c = load half, half* %r, align 2
+  %v = call half @llvm.fma.f16(half %a, half %b, half %c)
+  store half %v, half* %p
+  ret void
+}
 
 define void @test_fabs(half* %p) {
 ; CHECK-LABEL: test_fabs:

From 352f59879522c059f98c853e04f50fd7e46acee5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 26 May 2019 11:43:31 +0000
Subject: [PATCH 0252/1176] [InstCombine] Remove OverflowCheckFlavor; NFC

Instead pass binary op and signedness. The extra enum only makes
things more complicated in this case.

llvm-svn: 361720
---
 .../InstCombine/InstCombineCalls.cpp          | 11 ++----
 .../InstCombine/InstCombineCompares.cpp       | 31 +++++++---------
 .../InstCombine/InstCombineInternal.h         | 37 +------------------
 3 files changed, 20 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8d022617d8546..e2813f9d9d496 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1749,15 +1749,12 @@ static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
 }
 
 Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
-  OverflowCheckFlavor OCF =
-      IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
-  assert(OCF != OCF_INVALID && "unexpected!");
-
+  WithOverflowInst *WO = cast<WithOverflowInst>(II);
   Value *OperationResult = nullptr;
   Constant *OverflowResult = nullptr;
-  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
-                            *II, OperationResult, OverflowResult))
-    return CreateOverflowTuple(II, OperationResult, OverflowResult);
+  if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
+                            WO->getRHS(), *WO, OperationResult, OverflowResult))
+    return CreateOverflowTuple(WO, OperationResult, OverflowResult);
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ce6b38c90ca40..d848aef2552a1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3936,9 +3936,9 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   return BinaryOperator::CreateNot(Result);
 }
 
-bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
-                                         Value *RHS, Instruction &OrigI,
-                                         Value *&Result, Constant *&Overflow) {
+bool InstCombiner::OptimizeOverflowCheck(
+    Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
+    Instruction &OrigI, Value *&Result, Constant *&Overflow) {
   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
     std::swap(LHS, RHS);
 
@@ -3956,18 +3956,17 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
   // compare.
   Builder.SetInsertPoint(&OrigI);
 
-  switch (OCF) {
-  case OCF_INVALID:
-    llvm_unreachable("bad overflow check kind!");
+  switch (BinaryOp) {
+  default:
+    llvm_unreachable("unsupported binary op");
 
-  case OCF_UNSIGNED_ADD:
-  case OCF_SIGNED_ADD: {
+  case Instruction::Add: {
     // X + 0 -> {X, false}
     if (match(RHS, m_Zero()))
       return SetResult(LHS, Builder.getFalse(), false);
 
     OverflowResult OR;
-    if (OCF == OCF_UNSIGNED_ADD) {
+    if (!IsSigned) {
       OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
       if (OR == OverflowResult::NeverOverflows)
         return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
@@ -3984,14 +3983,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
     break;
   }
 
-  case OCF_UNSIGNED_SUB:
-  case OCF_SIGNED_SUB: {
+  case Instruction::Sub: {
     // X - 0 -> {X, false}
     if (match(RHS, m_Zero()))
       return SetResult(LHS, Builder.getFalse(), false);
 
     OverflowResult OR;
-    if (OCF == OCF_UNSIGNED_SUB) {
+    if (!IsSigned) {
       OR = computeOverflowForUnsignedSub(LHS, RHS, &OrigI);
       if (OR == OverflowResult::NeverOverflows)
         return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
@@ -4008,14 +4006,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
     break;
   }
 
-  case OCF_UNSIGNED_MUL:
-  case OCF_SIGNED_MUL: {
+  case Instruction::Mul: {
     // X * 1 -> {X, false}
     if (match(RHS, m_One()))
       return SetResult(LHS, Builder.getFalse(), false);
 
     OverflowResult OR;
-    if (OCF == OCF_UNSIGNED_MUL) {
+    if (!IsSigned) {
       OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
       if (OR == OverflowResult::NeverOverflows)
         return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
@@ -5053,8 +5050,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         isa<IntegerType>(A->getType())) {
       Value *Result;
       Constant *Overflow;
-      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
-                                Overflow)) {
+      if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B,
+                                *AddI, Result, Overflow)) {
         replaceInstUsesWith(*AddI, Result);
         return replaceInstUsesWith(I, Overflow);
       }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index c34a71a2251b1..123025bbccf33 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -185,40 +185,6 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   return false;
 }
 
-/// Specific patterns of overflow check idioms that we match.
-enum OverflowCheckFlavor {
-  OCF_UNSIGNED_ADD,
-  OCF_SIGNED_ADD,
-  OCF_UNSIGNED_SUB,
-  OCF_SIGNED_SUB,
-  OCF_UNSIGNED_MUL,
-  OCF_SIGNED_MUL,
-
-  OCF_INVALID
-};
-
-/// Returns the OverflowCheckFlavor corresponding to a overflow_with_op
-/// intrinsic.
-static inline OverflowCheckFlavor
-IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
-  switch (ID) {
-  default:
-    return OCF_INVALID;
-  case Intrinsic::uadd_with_overflow:
-    return OCF_UNSIGNED_ADD;
-  case Intrinsic::sadd_with_overflow:
-    return OCF_SIGNED_ADD;
-  case Intrinsic::usub_with_overflow:
-    return OCF_UNSIGNED_SUB;
-  case Intrinsic::ssub_with_overflow:
-    return OCF_SIGNED_SUB;
-  case Intrinsic::umul_with_overflow:
-    return OCF_UNSIGNED_MUL;
-  case Intrinsic::smul_with_overflow:
-    return OCF_SIGNED_MUL;
-  }
-}
-
 /// Some binary operators require special handling to avoid poison and undefined
 /// behavior. If a constant vector has undef elements, replace those undefs with
 /// identity constants if possible because those are always safe to execute.
@@ -469,7 +435,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   /// operation in OperationResult and result of the overflow check in
   /// OverflowResult, and return true.  If no simplification is possible,
   /// returns false.
-  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+  bool OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, bool IsSigned,
+                             Value *LHS, Value *RHS,
                              Instruction &CtxI, Value *&OperationResult,
                              Constant *&OverflowResult);
 

From 39f2bebf415d2e8c8cc439aeb40fff5c23e9c8c1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 26 May 2019 11:43:37 +0000
Subject: [PATCH 0253/1176] [InstCombine] Refactor OptimizeOverflowCheck; NFCI

Extract method to compute overflow based on binop and signedness,
and then make the result handling code generic. This extends the
always-overflow handling to signed muls, but has currently no effect,
as we don't compute always overflow for them (thus NFC).

llvm-svn: 361721
---
 .../InstCombine/InstCombineCompares.cpp       | 138 ++++++++----------
 .../InstCombine/InstCombineInternal.h         |   6 +-
 2 files changed, 65 insertions(+), 79 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d848aef2552a1..ab2da177d7b2e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3936,100 +3936,82 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   return BinaryOperator::CreateNot(Result);
 }
 
+static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
+  switch (BinaryOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+    case Instruction::Sub:
+      return match(RHS, m_Zero());
+    case Instruction::Mul:
+      return match(RHS, m_One());
+  }
+}
+
+OverflowResult InstCombiner::computeOverflow(
+    Instruction::BinaryOps BinaryOp, bool IsSigned,
+    Value *LHS, Value *RHS, Instruction *CxtI) const {
+  switch (BinaryOp) {
+    default:
+      llvm_unreachable("Unsupported binary op");
+    case Instruction::Add:
+      if (IsSigned)
+        return computeOverflowForSignedAdd(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
+    case Instruction::Sub:
+      if (IsSigned)
+        return computeOverflowForSignedSub(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
+    case Instruction::Mul:
+      if (IsSigned)
+        return computeOverflowForSignedMul(LHS, RHS, CxtI);
+      else
+        return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
+  }
+}
+
 bool InstCombiner::OptimizeOverflowCheck(
     Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
     Instruction &OrigI, Value *&Result, Constant *&Overflow) {
   if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
     std::swap(LHS, RHS);
 
-  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
-    Result = OpResult;
-    Overflow = OverflowVal;
-    if (ReuseName)
-      Result->takeName(&OrigI);
-    return true;
-  };
-
   // If the overflow check was an add followed by a compare, the insertion point
   // may be pointing to the compare.  We want to insert the new instructions
   // before the add in case there are uses of the add between the add and the
   // compare.
   Builder.SetInsertPoint(&OrigI);
 
-  switch (BinaryOp) {
-  default:
-    llvm_unreachable("unsupported binary op");
-
-  case Instruction::Add: {
-    // X + 0 -> {X, false}
-    if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder.getFalse(), false);
-
-    OverflowResult OR;
-    if (!IsSigned) {
-      OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
-                         true);
-    } else {
-      OR = computeOverflowForSignedAdd(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
-                         true);
-    }
-
-    if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
-    break;
-  }
-
-  case Instruction::Sub: {
-    // X - 0 -> {X, false}
-    if (match(RHS, m_Zero()))
-      return SetResult(LHS, Builder.getFalse(), false);
-
-    OverflowResult OR;
-    if (!IsSigned) {
-      OR = computeOverflowForUnsignedSub(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
-                         true);
-    } else {
-      OR = computeOverflowForSignedSub(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
-                         true);
-    }
-
-    if (OR == OverflowResult::AlwaysOverflows)
-      return SetResult(Builder.CreateSub(LHS, RHS), Builder.getTrue(), true);
-    break;
+  if (isNeutralValue(BinaryOp, RHS)) {
+    Result = LHS;
+    Overflow = Builder.getFalse();
+    return true;
   }
 
-  case Instruction::Mul: {
-    // X * 1 -> {X, false}
-    if (match(RHS, m_One()))
-      return SetResult(LHS, Builder.getFalse(), false);
-
-    OverflowResult OR;
-    if (!IsSigned) {
-      OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
-                         true);
-      if (OR == OverflowResult::AlwaysOverflows)
-        return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
-    } else {
-      OR = computeOverflowForSignedMul(LHS, RHS, &OrigI);
-      if (OR == OverflowResult::NeverOverflows)
-        return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
-                         true);
-    }
-    break;
-  }
+  switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
+    case OverflowResult::MayOverflow:
+      return false;
+    case OverflowResult::AlwaysOverflows:
+      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+      Result->takeName(&OrigI);
+      Overflow = Builder.getTrue();
+      return true;
+    case OverflowResult::NeverOverflows:
+      Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+      Result->takeName(&OrigI);
+      Overflow = Builder.getFalse();
+      if (auto *Inst = dyn_cast<Instruction>(Result)) {
+        if (IsSigned)
+          Inst->setHasNoSignedWrap();
+        else
+          Inst->setHasNoUnsignedWrap();
+      }
+      return true;
   }
 
-  return false;
+  llvm_unreachable("Unexpected overflow result");
 }
 
 /// Recognize and process idiom involving test for multiplication
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 123025bbccf33..434b0d5912157 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -692,7 +692,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   }
 
   OverflowResult computeOverflowForSignedMul(const Value *LHS,
-	                                         const Value *RHS,
+                                             const Value *RHS,
                                              const Instruction *CxtI) const {
     return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
   }
@@ -720,6 +720,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
     return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
   }
 
+  OverflowResult computeOverflow(
+      Instruction::BinaryOps BinaryOp, bool IsSigned,
+      Value *LHS, Value *RHS, Instruction *CxtI) const;
+
   /// Maximum size of array considered when transforming.
   uint64_t MaxArraySizeForCombine;
 

From 7228b50802c915aa29ad79b1dd3aa580fc63bd26 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 26 May 2019 13:06:48 +0000
Subject: [PATCH 0254/1176] gn build: Merge r361664

llvm-svn: 361722
---
 .../gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
index 1964ade17df5b..8d55f89e05bf5 100644
--- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
@@ -84,6 +84,7 @@ static_library("Checkers") {
     "ObjCUnusedIVarsChecker.cpp",
     "PaddingChecker.cpp",
     "PointerArithChecker.cpp",
+    "PointerIterationChecker.cpp",
     "PointerSortingChecker.cpp",
     "PointerSubChecker.cpp",
     "PthreadLockChecker.cpp",

From d0f13e618faf4a74b1c8c1e87f076f3b04690e74 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 26 May 2019 13:22:01 +0000
Subject: [PATCH 0255/1176] [ValueTracking] Base
 computeOverflowForUnsignedMul() on ConstantRange code; NFCI

The implementation in ValueTracking and ConstantRange are equally
powerful, reuse the one in ConstantRange, which will make this easier
to extend.

llvm-svn: 361723
---
 llvm/lib/Analysis/ValueTracking.cpp | 102 ++++++++++------------------
 1 file changed, 34 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 54575441b717d..d46ddc428b266 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3987,51 +3987,44 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
   return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
 }
 
+/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
+static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
+  switch (OR) {
+    case ConstantRange::OverflowResult::MayOverflow:
+      return OverflowResult::MayOverflow;
+    case ConstantRange::OverflowResult::AlwaysOverflows:
+      return OverflowResult::AlwaysOverflows;
+    case ConstantRange::OverflowResult::NeverOverflows:
+      return OverflowResult::NeverOverflows;
+  }
+  llvm_unreachable("Unknown OverflowResult");
+}
+
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+static ConstantRange computeConstantRangeIncludingKnownBits(
+    const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+    OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
+  KnownBits Known = computeKnownBits(
+      V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
+  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
+  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
+  ConstantRange::PreferredRangeType RangeType =
+      ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
+  return CR1.intersectWith(CR2, RangeType);
+}
+
 OverflowResult llvm::computeOverflowForUnsignedMul(
     const Value *LHS, const Value *RHS, const DataLayout &DL,
     AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
     bool UseInstrInfo) {
-  // Multiplying n * m significant bits yields a result of n + m significant
-  // bits. If the total number of significant bits does not exceed the
-  // result bit width (minus 1), there is no overflow.
-  // This means if we have enough leading zero bits in the operands
-  // we can guarantee that the result does not overflow.
-  // Ref: "Hacker's Delight" by Henry Warren
-  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
-  KnownBits LHSKnown(BitWidth);
-  KnownBits RHSKnown(BitWidth);
-  computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
-                   UseInstrInfo);
-  computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
-                   UseInstrInfo);
-  // Note that underestimating the number of zero bits gives a more
-  // conservative answer.
-  unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
-                      RHSKnown.countMinLeadingZeros();
-  // First handle the easy case: if we have enough zero bits there's
-  // definitely no overflow.
-  if (ZeroBits >= BitWidth)
-    return OverflowResult::NeverOverflows;
-
-  // Get the largest possible values for each operand.
-  APInt LHSMax = ~LHSKnown.Zero;
-  APInt RHSMax = ~RHSKnown.Zero;
-
-  // We know the multiply operation doesn't overflow if the maximum values for
-  // each operand will not overflow after we multiply them together.
-  bool MaxOverflow;
-  (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
-  if (!MaxOverflow)
-    return OverflowResult::NeverOverflows;
-
-  // We know it always overflows if multiplying the smallest possible values for
-  // the operands also results in overflow.
-  bool MinOverflow;
-  (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
-  if (MinOverflow)
-    return OverflowResult::AlwaysOverflows;
-
-  return OverflowResult::MayOverflow;
+  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                        nullptr, UseInstrInfo);
+  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+                                        nullptr, UseInstrInfo);
+  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
+  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
+  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
 }
 
 OverflowResult
@@ -4077,33 +4070,6 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
-static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
-  switch (OR) {
-    case ConstantRange::OverflowResult::MayOverflow:
-      return OverflowResult::MayOverflow;
-    case ConstantRange::OverflowResult::AlwaysOverflows:
-      return OverflowResult::AlwaysOverflows;
-    case ConstantRange::OverflowResult::NeverOverflows:
-      return OverflowResult::NeverOverflows;
-  }
-  llvm_unreachable("Unknown OverflowResult");
-}
-
-/// Combine constant ranges from computeConstantRange() and computeKnownBits().
-static ConstantRange computeConstantRangeIncludingKnownBits(
-    const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
-    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
-    OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
-  KnownBits Known = computeKnownBits(
-      V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
-  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
-  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
-  ConstantRange::PreferredRangeType RangeType =
-      ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
-  return CR1.intersectWith(CR2, RangeType);
-}
-
 OverflowResult llvm::computeOverflowForUnsignedAdd(
     const Value *LHS, const Value *RHS, const DataLayout &DL,
     AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,

From b7cc093db28cf6d20b35092c4108bc8aa15cc011 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 13:49:58 +0000
Subject: [PATCH 0256/1176] [Support] make countLeadingZeros() and
 countTrailingZeros() return unsigned

This matches countLeadingOnes() and countTrailingOnes(), and
APInt's countLeadingZeros() and countTrailingZeros().

(as well as __builtin_clzll())

llvm-svn: 361724
---
 llvm/include/llvm/Support/MathExtras.h      | 24 ++++++++++-----------
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp |  2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp   | 23 ++++++++++----------
 3 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index e902a725659ad..85d5a5ae4b903 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -51,14 +51,14 @@ enum ZeroBehavior {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
       return 0;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     T Shift = std::numeric_limits<T>::digits >> 1;
     T Mask = std::numeric_limits<T>::max() >> Shift;
     while (Shift) {
@@ -75,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -91,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -116,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -125,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
       T Tmp = Val >> Shift;
       if (Tmp)
@@ -144,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -160,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -185,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -474,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0e8a517d1d644..0b0dd0ae28b3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1147,7 +1147,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
-  Out.kernarg_segment_alignment = std::max((size_t)4,
+  Out.kernarg_segment_alignment = std::max<size_t>(4,
       countTrailingZeros(MaxKernArgAlign));
 }
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 90b552035af3d..71d592e539d37 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5377,8 +5377,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   if (MinCaseVal->isNullValue())
     TableIndex = SI->getCondition();
   else
-    TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
-                                   "switch.tableidx");
+    TableIndex =
+        Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
 
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
@@ -5512,7 +5512,8 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
   uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
   uint64_t Range = Diff + 1;
   uint64_t NumCases = Values.size();
-  // 40% is the default density for building a jump table in optsize/minsize mode.
+  // 40% is the default density for building a jump table in optsize/minsize
+  // mode.
   uint64_t MinDensity = 40;
 
   return NumCases * 100 >= Range * MinDensity;
@@ -5538,11 +5539,11 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   if (SI->getNumCases() < 4)
     return false;
 
-  // This transform is agnostic to the signedness of the input or case values. We
-  // can treat the case values as signed or unsigned. We can optimize more common
-  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
-  // as signed.
-  SmallVector<int64_t,4> Values;
+  // This transform is agnostic to the signedness of the input or case values.
+  // We can treat the case values as signed or unsigned. We can optimize more
+  // common cases such as a sequence crossing zero {-4,0,4,8} if we interpret
+  // case values as signed.
+  SmallVector<int64_t, 4> Values;
   for (auto &C : SI->cases())
     Values.push_back(C.getCaseValue()->getValue().getSExtValue());
   llvm::sort(Values);
@@ -5563,9 +5564,9 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   for (auto &V : Values)
     GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
 
-  // This transform can be done speculatively because it is so cheap - it results
-  // in a single rotate operation being inserted. This can only happen if the
-  // factor extracted is a power of 2.
+  // This transform can be done speculatively because it is so cheap - it
+  // results in a single rotate operation being inserted. This can only happen
+  // if the factor extracted is a power of 2.
   // FIXME: If the GCD is an odd number we can multiply by the multiplicative
   // inverse of GCD and then perform this transform.
   // FIXME: It's possible that optimizing a switch on powers of two might also

From 50c73a044f22e97ea384c461bcdd2ea4fbed0156 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 13:52:41 +0000
Subject: [PATCH 0257/1176] [SimplifyCFG] NFC, update Switch tests to HEAD so I
 can see if my changes change anything

Also add baseline tests to show effect of later patches.

llvm-svn: 361725
---
 .../Transforms/SimplifyCFG/ARM/cttz-ctlz.ll   |   21 +-
 .../SimplifyCFG/ARM/select-trunc-i64.ll       |   17 +-
 .../switch-to-lookup-table-constant-expr.ll   |   24 +-
 .../SimplifyCFG/ARM/switch-to-lookup-table.ll |  124 +-
 .../SimplifyCFG/X86/disable-lookup-table.ll   |   49 +-
 .../SimplifyCFG/X86/speculate-cttz-ctlz.ll    |  477 ++++--
 .../SimplifyCFG/X86/switch-covered-bug.ll     |   32 +-
 .../SimplifyCFG/X86/switch-table-bug.ll       |   25 +-
 .../SimplifyCFG/X86/switch_to_lookup_table.ll | 1345 ++++++++++-------
 .../SimplifyCFG/switch-dead-default.ll        |  169 ++-
 .../SimplifyCFG/switch-masked-bits.ll         |   23 +-
 .../SimplifyCFG/switch-on-const-select.ll     |  191 +--
 .../SimplifyCFG/switch-range-to-icmp.ll       |   79 +-
 .../SimplifyCFG/switch_create-custom-dl.ll    |  592 +++++---
 .../Transforms/SimplifyCFG/switch_create.ll   |  591 +++++---
 15 files changed, 2469 insertions(+), 1290 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
index ffcf2175091f1..22f5e9f3cc1dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
@@ -1,11 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s
 
 define i32 @ctlz(i32 %A) {
 ; CHECK-LABEL: @ctlz(
-; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
-; CHECK-NEXT: ret i32 [[SEL]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+;
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -21,10 +24,12 @@ cond.end:
 
 define i32 @cttz(i32 %A) {
 ; CHECK-LABEL: @cttz(
-; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
-; CHECK-NEXT: ret i32 [[SEL]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+;
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
index 9484de77db487..9218ee185b1e4 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
@@ -1,11 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ;RUN: opt -S -simplifycfg -mtriple=arm < %s | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
-; CHECK-LABEL: select_trunc_i64
-; CHECK-NOT: br
-; CHECK: select
-; CHECK: select
 define arm_aapcscc i32 @select_trunc_i64(i32 %a, i32 %b) {
+; CHECK-LABEL: @select_trunc_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[ADD]], 2147483647
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648
+; CHECK-NEXT:    [[EXTRACT_T:%.*]] = trunc i64 [[COND]] to i32
+; CHECK-NEXT:    [[COND8_OFF0:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[EXTRACT_T]]
+; CHECK-NEXT:    ret i32 [[COND8_OFF0]]
+;
 entry:
   %conv = sext i32 %a to i64
   %conv1 = sext i32 %b to i64
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
index 453a76864032e..f677371734268 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -15,11 +16,28 @@ target triple = "armv7a--none-eabi"
 @g4 = external thread_local global i32, align 4
 
 define i32* @test3(i32 %n) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ getelementptr inbounds (i32, i32* inttoptr (i32 mul (i32 ptrtoint (i32* @g3 to i32), i32 2) to i32*), i32 1), [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32* [[RETVAL_0]]
+;
 entry:
   switch i32 %n, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 
 sw.bb:
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
index 501bc31bd0dd8..a931e9084393e 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=static    < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=pic       < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=ropi      < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
@@ -22,11 +23,22 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "armv7a--none-eabi"
 
 define i32 @test1(i32 %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.test1, i32 0, i32 [[N]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 15498
+;
 entry:
   switch i32 %n, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -53,11 +65,39 @@ return:
 
 
 define i32* @test2(i32 %n) {
+; ENABLE-LABEL: @test2(
+; ENABLE-NEXT:  entry:
+; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
+; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; ENABLE:       switch.lookup:
+; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test2, i32 0, i32 [[N]]
+; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]]
+; ENABLE-NEXT:    ret i32* [[SWITCH_LOAD]]
+; ENABLE:       return:
+; ENABLE-NEXT:    ret i32* @c4
+;
+; DISABLE-LABEL: @test2(
+; DISABLE-NEXT:  entry:
+; DISABLE-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
+; DISABLE-NEXT:    i32 0, label [[RETURN:%.*]]
+; DISABLE-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; DISABLE-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; DISABLE-NEXT:    ]
+; DISABLE:       sw.bb1:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       sw.bb2:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       sw.default:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       return:
+; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @c4, [[SW_DEFAULT]] ], [ @c3, [[SW_BB2]] ], [ @c2, [[SW_BB1]] ], [ @c1, [[ENTRY:%.*]] ]
+; DISABLE-NEXT:    ret i32* [[RETVAL_0]]
+;
 entry:
   switch i32 %n, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -83,11 +123,39 @@ return:
 @g4 = external global i32, align 4
 
 define i32* @test3(i32 %n) {
+; ENABLE-LABEL: @test3(
+; ENABLE-NEXT:  entry:
+; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
+; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; ENABLE:       switch.lookup:
+; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test3, i32 0, i32 [[N]]
+; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]]
+; ENABLE-NEXT:    ret i32* [[SWITCH_LOAD]]
+; ENABLE:       return:
+; ENABLE-NEXT:    ret i32* @g4
+;
+; DISABLE-LABEL: @test3(
+; DISABLE-NEXT:  entry:
+; DISABLE-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
+; DISABLE-NEXT:    i32 0, label [[RETURN:%.*]]
+; DISABLE-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; DISABLE-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; DISABLE-NEXT:    ]
+; DISABLE:       sw.bb1:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       sw.bb2:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       sw.default:
+; DISABLE-NEXT:    br label [[RETURN]]
+; DISABLE:       return:
+; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ @g3, [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ]
+; DISABLE-NEXT:    ret i32* [[RETVAL_0]]
+;
 entry:
   switch i32 %n, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -114,6 +182,44 @@ declare i32 @f4(i32, i32)
 declare i32 @f5(i32, i32)
 
 define i32 @test4(i32 %a, i32 %b, i32 %c) {
+; ENABLE-LABEL: @test4(
+; ENABLE-NEXT:  entry:
+; ENABLE-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[A:%.*]], 1
+; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 3
+; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[COND_FALSE6:%.*]]
+; ENABLE:       cond.false6:
+; ENABLE-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
+; ENABLE-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
+; ENABLE-NEXT:    br label [[COND_END11:%.*]]
+; ENABLE:       switch.lookup:
+; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32 (i32, i32)*], [3 x i32 (i32, i32)*]* @switch.table.test4, i32 0, i32 [[SWITCH_TABLEIDX]]
+; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32 (i32, i32)*, i32 (i32, i32)** [[SWITCH_GEP]]
+; ENABLE-NEXT:    br label [[COND_END11]]
+; ENABLE:       cond.end11:
+; ENABLE-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ [[COND]], [[COND_FALSE6]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; ENABLE-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
+; ENABLE-NEXT:    ret i32 [[CALL]]
+;
+; DISABLE-LABEL: @test4(
+; DISABLE-NEXT:  entry:
+; DISABLE-NEXT:    switch i32 [[A:%.*]], label [[COND_FALSE6:%.*]] [
+; DISABLE-NEXT:    i32 1, label [[COND_END11:%.*]]
+; DISABLE-NEXT:    i32 2, label [[COND_END11_FOLD_SPLIT:%.*]]
+; DISABLE-NEXT:    i32 3, label [[COND_END11_FOLD_SPLIT1:%.*]]
+; DISABLE-NEXT:    ]
+; DISABLE:       cond.false6:
+; DISABLE-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
+; DISABLE-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
+; DISABLE-NEXT:    br label [[COND_END11]]
+; DISABLE:       cond.end11.fold.split:
+; DISABLE-NEXT:    br label [[COND_END11]]
+; DISABLE:       cond.end11.fold.split1:
+; DISABLE-NEXT:    br label [[COND_END11]]
+; DISABLE:       cond.end11:
+; DISABLE-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ @f1, [[ENTRY:%.*]] ], [ [[COND]], [[COND_FALSE6]] ], [ @f2, [[COND_END11_FOLD_SPLIT]] ], [ @f3, [[COND_END11_FOLD_SPLIT1]] ]
+; DISABLE-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
+; DISABLE-NEXT:    ret i32 [[CALL]]
+;
 entry:
   %cmp = icmp eq i32 %a, 1
   br i1 %cmp, label %cond.end11, label %cond.false
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
index a8758a789ec4d..adb0a9819dc64 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -switch-to-lookup -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; RUN: opt < %s -passes='simplify-cfg<switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
@@ -8,12 +9,32 @@
 ; CHECK-NOT: @switch.table.foo = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
 
 define i32 @foo(i32 %c) "no-jump-tables"="true" {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 42, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 43, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 44, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    i32 45, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 42, label %return
-    i32 43, label %sw.bb1
-    i32 44, label %sw.bb2
-    i32 45, label %sw.bb3
+  i32 42, label %return
+  i32 43, label %sw.bb1
+  i32 44, label %sw.bb2
+  i32 45, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -27,12 +48,24 @@ return:
 
 
 define i32 @bar(i32 %c) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 15
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 42, label %return
-    i32 43, label %sw.bb1
-    i32 44, label %sw.bb2
-    i32 45, label %sw.bb3
+  i32 42, label %return
+  i32 43, label %sw.bb1
+  i32 44, label %sw.bb2
+  i32 45, label %sw.bb3
   ]
 
 sw.bb1: br label %return
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index bee80e6acce07..11ba3984f35d4 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -1,14 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
 
 
 define i64 @test1(i64 %A) {
-; ALL-LABEL: @test1(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; ALL: ret
+; BMI-LABEL: @test1(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; BMI-NEXT:    ret i64 [[COND]]
+;
+; LZCNT-LABEL: @test1(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
+; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; LZCNT-NEXT:    ret i64 [[SPEC_SELECT]]
+;
+; GENERIC-LABEL: @test1(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; GENERIC-NEXT:    ret i64 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -23,11 +40,27 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test2(i32 %A) {
-; ALL-LABEL: @test2(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; ALL: ret
+; BMI-LABEL: @test2(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; BMI-NEXT:    ret i32 [[COND]]
+;
+; LZCNT-LABEL: @test2(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
+; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; LZCNT-NEXT:    ret i32 [[SPEC_SELECT]]
+;
+; GENERIC-LABEL: @test2(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; GENERIC-NEXT:    ret i32 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -43,11 +76,27 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define signext i16 @test3(i16 signext %A) {
-; ALL-LABEL: @test3(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; ALL: ret
+; BMI-LABEL: @test3(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; BMI-NEXT:    ret i16 [[COND]]
+;
+; LZCNT-LABEL: @test3(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
+; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; LZCNT-NEXT:    ret i16 [[SPEC_SELECT]]
+;
+; GENERIC-LABEL: @test3(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i16 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -63,11 +112,27 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define i64 @test1b(i64 %A) {
-; ALL-LABEL: @test1b(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; ALL: ret
+; BMI-LABEL: @test1b(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
+; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; BMI-NEXT:    ret i64 [[SPEC_SELECT]]
+;
+; LZCNT-LABEL: @test1b(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; LZCNT-NEXT:    ret i64 [[COND]]
+;
+; GENERIC-LABEL: @test1b(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
+; GENERIC-NEXT:    ret i64 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -83,11 +148,27 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define i32 @test2b(i32 %A) {
-; ALL-LABEL: @test2b(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; ALL: ret
+; BMI-LABEL: @test2b(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
+; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; BMI-NEXT:    ret i32 [[SPEC_SELECT]]
+;
+; LZCNT-LABEL: @test2b(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; LZCNT-NEXT:    ret i32 [[COND]]
+;
+; GENERIC-LABEL: @test2b(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
+; GENERIC-NEXT:    ret i32 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -103,11 +184,27 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define signext i16 @test3b(i16 signext %A) {
-; ALL-LABEL: @test3b(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; ALL: ret
+; BMI-LABEL: @test3b(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
+; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; BMI-NEXT:    ret i16 [[SPEC_SELECT]]
+;
+; LZCNT-LABEL: @test3b(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; LZCNT-NEXT:    ret i16 [[COND]]
+;
+; GENERIC-LABEL: @test3b(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
+; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i16 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -126,14 +223,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 ; for the target.
 
 define i64 @test1e(i32 %x) {
-; ALL-LABEL: @test1e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTTZ]] to i64
-; BMI-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test1e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
+; BMI-NEXT:    ret i64 [[COND]]
+;
+; LZCNT-LABEL: @test1e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; LZCNT:       cond.true:
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; LZCNT-NEXT:    br label [[COND_END]]
+; LZCNT:       cond.end:
+; LZCNT-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; LZCNT-NEXT:    ret i64 [[COND]]
+;
+; GENERIC-LABEL: @test1e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i64 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -149,14 +270,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test2e(i64 %x) {
-; ALL-LABEL: @test2e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i32
-; BMI-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test2e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
+; BMI-NEXT:    ret i32 [[COND]]
+;
+; LZCNT-LABEL: @test2e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; LZCNT:       cond.true:
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; LZCNT-NEXT:    br label [[COND_END]]
+; LZCNT:       cond.end:
+; LZCNT-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; LZCNT-NEXT:    ret i32 [[COND]]
+;
+; GENERIC-LABEL: @test2e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i32 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -172,14 +317,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i64 @test3e(i32 %x) {
-; ALL-LABEL: @test3e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTLZ]] to i64
-; LZCNT-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
-; BMI-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test3e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; BMI:       cond.true:
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; BMI-NEXT:    br label [[COND_END]]
+; BMI:       cond.end:
+; BMI-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; BMI-NEXT:    ret i64 [[COND]]
+;
+; LZCNT-LABEL: @test3e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
+; LZCNT-NEXT:    ret i64 [[COND]]
+;
+; GENERIC-LABEL: @test3e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i64 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -195,14 +364,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test4e(i64 %x) {
-; ALL-LABEL: @test4e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i32
-; LZCNT-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
-; BMI-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test4e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; BMI:       cond.true:
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; BMI-NEXT:    br label [[COND_END]]
+; BMI:       cond.end:
+; BMI-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; BMI-NEXT:    ret i32 [[COND]]
+;
+; LZCNT-LABEL: @test4e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
+; LZCNT-NEXT:    ret i32 [[COND]]
+;
+; GENERIC-LABEL: @test4e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i32 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -218,14 +411,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test5e(i64 %x) {
-; ALL-LABEL: @test5e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i16
-; LZCNT-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
-; BMI-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test5e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; BMI:       cond.true:
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; BMI-NEXT:    br label [[COND_END]]
+; BMI:       cond.end:
+; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; BMI-NEXT:    ret i16 [[COND]]
+;
+; LZCNT-LABEL: @test5e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
+; LZCNT-NEXT:    ret i16 [[COND]]
+;
+; GENERIC-LABEL: @test5e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -241,14 +458,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test6e(i32 %x) {
-; ALL-LABEL: @test6e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
-; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTLZ]] to i16
-; LZCNT-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
-; BMI-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test6e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; BMI:       cond.true:
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; BMI-NEXT:    br label [[COND_END]]
+; BMI:       cond.end:
+; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; BMI-NEXT:    ret i16 [[COND]]
+;
+; LZCNT-LABEL: @test6e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
+; LZCNT-NEXT:    ret i16 [[COND]]
+;
+; GENERIC-LABEL: @test6e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -264,14 +505,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test7e(i64 %x) {
-; ALL-LABEL: @test7e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i16
-; BMI-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test7e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
+; BMI-NEXT:    ret i16 [[COND]]
+;
+; LZCNT-LABEL: @test7e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; LZCNT:       cond.true:
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; LZCNT-NEXT:    br label [[COND_END]]
+; LZCNT:       cond.end:
+; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; LZCNT-NEXT:    ret i16 [[COND]]
+;
+; GENERIC-LABEL: @test7e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -287,14 +552,38 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test8e(i32 %x) {
-; ALL-LABEL: @test8e(
-; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
-; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTTZ]] to i16
-; BMI-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
-; ALL: ret
+; BMI-LABEL: @test8e(
+; BMI-NEXT:  entry:
+; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
+; BMI-NEXT:    ret i16 [[COND]]
+;
+; LZCNT-LABEL: @test8e(
+; LZCNT-NEXT:  entry:
+; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; LZCNT:       cond.true:
+; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; LZCNT-NEXT:    br label [[COND_END]]
+; LZCNT:       cond.end:
+; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; LZCNT-NEXT:    ret i16 [[COND]]
+;
+; GENERIC-LABEL: @test8e(
+; GENERIC-NEXT:  entry:
+; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
+; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
+; GENERIC:       cond.true:
+; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
+; GENERIC-NEXT:    br label [[COND_END]]
+; GENERIC:       cond.end:
+; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
+; GENERIC-NEXT:    ret i16 [[COND]]
+;
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index c42568ffa935e..b7bf8054a6f10 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 ; RUN: opt -S -passes='simplify-cfg<switch-to-lookup>' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 
@@ -5,25 +6,24 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
-; When we have a covered lookup table, make sure we don't delete PHINodes that
-; are cached in PHIs.
-; CHECK-LABEL: @test
-; CHECK: entry:
-; CHECK-NEXT: sub i3 %arg, -4
-; CHECK-NEXT: zext i3 %switch.tableidx to i4
-; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 %switch.tableidx.zext
-; CHECK-NEXT: load i64, i64* %switch.gep
-; CHECK-NEXT: add i64
-; CHECK-NEXT: ret i64
 define i64 @test(i3 %arg) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4
+; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]]
+; CHECK-NEXT:    [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0
+; CHECK-NEXT:    ret i64 [[V3]]
+;
 entry:
   switch i3 %arg, label %Default [
-    i3 -2, label %Label6
-    i3 1, label %Label1
-    i3 2, label %Label2
-    i3 3, label %Label3
-    i3 -4, label %Label4
-    i3 -3, label %Label5
+  i3 -2, label %Label6
+  i3 1, label %Label1
+  i3 2, label %Label2
+  i3 3, label %Label3
+  i3 -4, label %Label4
+  i3 -3, label %Label5
   ]
 
 Default:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 0b9d6ebe82584..e50a913f9e50a 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 ; RUN: opt -S -passes='simplify-cfg<switch-to-lookup>' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 
@@ -5,21 +6,21 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
-; When tableindex can't fit into i2, we should extend the type to i3.
-; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si
-; CHECK: entry:
-; CHECK-NEXT: sub i2 %0, -2
-; CHECK-NEXT: zext i2 %switch.tableidx to i3
-; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 %switch.tableidx.zext
-; CHECK-NEXT: load i64, i64* %switch.gep
-; CHECK-NEXT: ret i64 %switch.load
 define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
+; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TMP0:%.*]], -2
+; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i64 [[SWITCH_LOAD]]
+;
 entry:
   switch i2 %0, label %1 [
-    i2 0, label %2
-    i2 1, label %3
-    i2 -2, label %4
-    i2 -1, label %5
+  i2 0, label %2
+  i2 1, label %3
+  i2 -2, label %4
+  i2 -1, label %5
   ]
 
 ; <label>:1                                       ; preds = %entry
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 3128ce4afa60a..c77438974b2d2 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -switch-to-lookup=true -keep-loops=false -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; RUN: opt < %s -passes='simplify-cfg<no-keep-loops;switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -33,15 +34,27 @@ target triple = "x86_64-unknown-linux-gnu"
 ; so we return early, directly from the lookup bb.
 
 define i32 @f(i32 %c) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 15
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 42, label %return
-    i32 43, label %sw.bb1
-    i32 44, label %sw.bb2
-    i32 45, label %sw.bb3
-    i32 46, label %sw.bb4
-    i32 47, label %sw.bb5
-    i32 48, label %sw.bb6
+  i32 42, label %return
+  i32 43, label %sw.bb1
+  i32 44, label %sw.bb2
+  i32 45, label %sw.bb3
+  i32 46, label %sw.bb4
+  i32 47, label %sw.bb5
+  i32 48, label %sw.bb6
   ]
 
 sw.bb1: br label %return
@@ -55,33 +68,34 @@ return:
   %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %retval.0
 
-; CHECK-LABEL: @f(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
-; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
-; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
-; CHECK-NEXT: ret i32 %switch.load
-; CHECK: return:
-; CHECK-NEXT: ret i32 15
 }
 
 ; Same thing, but with i8's
 
 define i8 @char(i32 %c) {
+; CHECK-LABEL: @char(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 9
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i8, i8* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i8 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8 15
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 42, label %return
-    i32 43, label %sw.bb1
-    i32 44, label %sw.bb2
-    i32 45, label %sw.bb3
-    i32 46, label %sw.bb4
-    i32 47, label %sw.bb5
-    i32 48, label %sw.bb6
-    i32 49, label %sw.bb7
-    i32 50, label %sw.bb8
+  i32 42, label %return
+  i32 43, label %sw.bb1
+  i32 44, label %sw.bb2
+  i32 45, label %sw.bb3
+  i32 46, label %sw.bb4
+  i32 47, label %sw.bb5
+  i32 48, label %sw.bb6
+  i32 49, label %sw.bb7
+  i32 50, label %sw.bb8
   ]
 
 sw.bb1: br label %return
@@ -97,29 +111,35 @@ return:
   %retval.0 = phi i8 [ 15, %sw.default ], [ 84, %sw.bb8 ], [ 33, %sw.bb7 ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i8 %retval.0
 
-; CHECK-LABEL: @char(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
-; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 9
-; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i8, i8* %switch.gep
-; CHECK-NEXT: ret i8 %switch.load
-; CHECK: return:
-; CHECK-NEXT: ret i8 15
 }
 
 ; A switch used to initialize two variables, an i8 and a float.
 
 declare void @dummy(i8 signext, float)
 define void @h(i32 %x) {
+; CHECK-LABEL: @h(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_EPILOG:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i32 [[X]], 8
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i32 89655594, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i32 [[SWITCH_DOWNSHIFT]] to i8
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x float], [4 x float]* @switch.table.h, i32 0, i32 [[X]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load float, float* [[SWITCH_GEP]]
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    [[A_0:%.*]] = phi i8 [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ 7, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[B_0:%.*]] = phi float [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 0x4023FAE140000000, [[ENTRY]] ]
+; CHECK-NEXT:    call void @dummy(i8 signext [[A_0]], float [[B_0]])
+; CHECK-NEXT:    ret void
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.epilog
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.epilog
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %sw.epilog
@@ -133,22 +153,6 @@ sw.epilog:
   call void @dummy(i8 signext %a.0, float %b.0)
   ret void
 
-; CHECK-LABEL: @h(
-; CHECK: entry:
-; CHECK-NEXT: %0 = icmp ult i32 %x, 4
-; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.shiftamt = mul i32 %x, 8
-; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
-; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float], [4 x float]* @switch.table.h, i32 0, i32 %x
-; CHECK-NEXT: %switch.load = load float, float* %switch.gep
-; CHECK-NEXT: br label %sw.epilog
-; CHECK: sw.epilog:
-; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
-; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
-; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
-; CHECK-NEXT: ret void
 }
 
 
@@ -161,12 +165,23 @@ sw.epilog:
 @.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
 
 define i8* @foostring(i32 %x)  {
+; CHECK-LABEL: @foostring(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.foostring, i32 0, i32 [[X]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i8*, i8** [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i8* [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0)
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -176,32 +191,35 @@ sw.default: br label %return
 
 return:
   %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
-                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
-                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
-                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
-                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), %entry ]
+  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), %entry ]
   ret i8* %retval.0
 
-; CHECK-LABEL: @foostring(
-; CHECK: entry:
-; CHECK-NEXT: %0 = icmp ult i32 %x, 4
-; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.foostring, i32 0, i32 %x
-; CHECK-NEXT: %switch.load = load i8*, i8** %switch.gep
-; CHECK-NEXT: ret i8* %switch.load
 }
 
 ; Switch used to initialize two values. The first value is returned, the second
 ; value is not used. This used to make the transformation generate illegal code.
 
 define i32 @earlyreturncrash(i32 %x)  {
+; CHECK-LABEL: @earlyreturncrash(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_EPILOG:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.earlyreturncrash, i32 0, i32 [[X]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    ret i32 7
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.epilog
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.epilog
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %sw.epilog
@@ -214,13 +232,6 @@ sw.epilog:
   %b.0 = phi i32 [ 10, %sw.default ], [ 5, %sw.bb3 ], [ 1, %sw.bb2 ], [ 4, %sw.bb1 ], [ 3, %entry ]
   ret i32 %a.0
 
-; CHECK-LABEL: @earlyreturncrash(
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.earlyreturncrash, i32 0, i32 %x
-; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
-; CHECK-NEXT: ret i32 %switch.load
-; CHECK: sw.epilog:
-; CHECK-NEXT: ret i32 7
 }
 
 
@@ -229,21 +240,40 @@ sw.epilog:
 ; can be packed into a bitmap.
 
 define i32 @crud(i8 zeroext %c)  {
+; CHECK-LABEL: @crud(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C]], 34
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 59
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[LOR_END]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i8 [[SWITCH_TABLEIDX]] to i59
+; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i59 [[SWITCH_CAST]], 1
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i59 -288230375765830623, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i59 [[SWITCH_DOWNSHIFT]] to i1
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ false, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[LOR_EXT]]
+;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %switch.early.test
 
 switch.early.test:
   switch i8 %c, label %lor.rhs [
-    i8 92, label %lor.end
-    i8 62, label %lor.end
-    i8 60, label %lor.end
-    i8 59, label %lor.end
-    i8 58, label %lor.end
-    i8 46, label %lor.end
-    i8 44, label %lor.end
-    i8 34, label %lor.end
-    i8 39, label %switch.edge
+  i8 92, label %lor.end
+  i8 62, label %lor.end
+  i8 60, label %lor.end
+  i8 59, label %lor.end
+  i8 58, label %lor.end
+  i8 46, label %lor.end
+  i8 44, label %lor.end
+  i8 34, label %lor.end
+  i8 39, label %switch.edge
   ]
 
 switch.edge: br label %lor.end
@@ -251,49 +281,49 @@ lor.rhs: br label %lor.end
 
 lor.end:
   %0 = phi i1 [ true, %switch.early.test ],
-              [ false, %lor.rhs ],
-              [ true, %entry ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.early.test ],
-              [ true, %switch.edge ]
+  [ false, %lor.rhs ],
+  [ true, %entry ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.early.test ],
+  [ true, %switch.edge ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
-; CHECK-LABEL: @crud(
-; CHECK: entry:
-; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
-; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
-; CHECK: switch.early.test:
-; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
-; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
-; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59
-; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1
-; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
-; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
-; CHECK-NEXT: br label %lor.end
-; CHECK: lor.end:
-; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
-; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
-; CHECK-NEXT: ret i32 %lor.ext
 }
 
 ; PR13946
 define i32 @overflow(i32 %type) {
+; CHECK-LABEL: @overflow(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[TYPE:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    i32 -2147483645, label [[SW_BB3]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       sw.bb3:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[DIRENT_TYPE_0:%.*]] = phi i32 [ 6, [[SW_BB3]] ], [ 5, [[SW_BB2]] ], [ 0, [[SW_BB1]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[DIRENT_TYPE_0]]
+;
 entry:
   switch i32 %type, label %sw.default [
-    i32 -2147483648, label %sw.bb
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 -2147483645, label %sw.bb3
-    i32 3, label %sw.bb3
+  i32 -2147483648, label %sw.bb
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 -2147483645, label %sw.bb3
+  i32 3, label %sw.bb3
   ]
 
 sw.bb: br label %if.end
@@ -306,19 +336,29 @@ if.else: br label %if.end
 if.end:
   %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
   ret i32 %dirent_type.0
-; CHECK-LABEL: define i32 @overflow(
-; CHECK: switch
-; CHECK: phi
 }
 
 ; PR13985
 define i1 @undef(i32 %tmp) {
+; CHECK-LABEL: @undef(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[TMP:%.*]], 9
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[BB3:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = trunc i32 [[TMP]] to i9
+; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i9 [[SWITCH_CAST]], 1
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i9 3, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i9 [[SWITCH_DOWNSHIFT]] to i1
+; CHECK-NEXT:    ret i1 [[SWITCH_MASKED]]
+; CHECK:       bb3:
+; CHECK-NEXT:    ret i1 undef
+;
 bb:
   switch i32 %tmp, label %bb3 [
-    i32 0, label %bb1
-    i32 1, label %bb1
-    i32 7, label %bb2
-    i32 8, label %bb2
+  i32 0, label %bb1
+  i32 1, label %bb1
+  i32 7, label %bb2
+  i32 8, label %bb2
   ]
 
 bb1: br label %bb3
@@ -327,16 +367,27 @@ bb2: br label %bb3
 bb3:
   %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
   ret i1 %tmp4
-; CHECK-LABEL: define i1 @undef(
-; CHECK: %switch.cast = trunc i32 %tmp to i9
-; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
 }
 
 ; Also handle large switches that would be rejected by
 ; isValueEqualityComparison()
-; CHECK: large
-; CHECK-NOT: switch i32
+
 define i32 @large(i32 %x) {
+; CHECK-LABEL: @large(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X]], -10
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP]], i32 [[MUL]], i32 [[X]]
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[SPEC_SELECT]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 199
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [199 x i32], [199 x i32]* @switch.table.large, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   %cmp = icmp slt i32 %x, 0
   br i1 %cmp, label %if.then, label %if.end
@@ -348,205 +399,205 @@ if.then:
 if.end:
   %x.addr.0 = phi i32 [ %mul, %if.then ], [ %x, %entry ]
   switch i32 %x.addr.0, label %return [
-    i32 199, label %sw.bb203
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
-    i32 4, label %sw.bb4
-    i32 5, label %sw.bb5
-    i32 6, label %sw.bb6
-    i32 7, label %sw.bb7
-    i32 8, label %sw.bb8
-    i32 9, label %sw.bb9
-    i32 10, label %sw.bb10
-    i32 11, label %sw.bb11
-    i32 12, label %sw.bb12
-    i32 13, label %sw.bb13
-    i32 14, label %sw.bb14
-    i32 15, label %sw.bb15
-    i32 16, label %sw.bb16
-    i32 17, label %sw.bb17
-    i32 18, label %sw.bb18
-    i32 19, label %sw.bb19
-    i32 20, label %sw.bb20
-    i32 21, label %sw.bb21
-    i32 22, label %sw.bb22
-    i32 23, label %sw.bb23
-    i32 24, label %sw.bb24
-    i32 25, label %sw.bb25
-    i32 26, label %sw.bb26
-    i32 27, label %sw.bb27
-    i32 28, label %sw.bb28
-    i32 29, label %sw.bb29
-    i32 30, label %sw.bb30
-    i32 31, label %sw.bb31
-    i32 32, label %sw.bb32
-    i32 33, label %sw.bb33
-    i32 34, label %sw.bb34
-    i32 35, label %sw.bb35
-    i32 36, label %sw.bb37
-    i32 37, label %sw.bb38
-    i32 38, label %sw.bb39
-    i32 39, label %sw.bb40
-    i32 40, label %sw.bb41
-    i32 41, label %sw.bb42
-    i32 42, label %sw.bb43
-    i32 43, label %sw.bb44
-    i32 44, label %sw.bb45
-    i32 45, label %sw.bb47
-    i32 46, label %sw.bb48
-    i32 47, label %sw.bb49
-    i32 48, label %sw.bb50
-    i32 49, label %sw.bb51
-    i32 50, label %sw.bb52
-    i32 51, label %sw.bb53
-    i32 52, label %sw.bb54
-    i32 53, label %sw.bb55
-    i32 54, label %sw.bb56
-    i32 55, label %sw.bb58
-    i32 56, label %sw.bb59
-    i32 57, label %sw.bb60
-    i32 58, label %sw.bb61
-    i32 59, label %sw.bb62
-    i32 60, label %sw.bb63
-    i32 61, label %sw.bb64
-    i32 62, label %sw.bb65
-    i32 63, label %sw.bb66
-    i32 64, label %sw.bb67
-    i32 65, label %sw.bb68
-    i32 66, label %sw.bb69
-    i32 67, label %sw.bb70
-    i32 68, label %sw.bb71
-    i32 69, label %sw.bb72
-    i32 70, label %sw.bb73
-    i32 71, label %sw.bb74
-    i32 72, label %sw.bb76
-    i32 73, label %sw.bb77
-    i32 74, label %sw.bb78
-    i32 75, label %sw.bb79
-    i32 76, label %sw.bb80
-    i32 77, label %sw.bb81
-    i32 78, label %sw.bb82
-    i32 79, label %sw.bb83
-    i32 80, label %sw.bb84
-    i32 81, label %sw.bb85
-    i32 82, label %sw.bb86
-    i32 83, label %sw.bb87
-    i32 84, label %sw.bb88
-    i32 85, label %sw.bb89
-    i32 86, label %sw.bb90
-    i32 87, label %sw.bb91
-    i32 88, label %sw.bb92
-    i32 89, label %sw.bb93
-    i32 90, label %sw.bb94
-    i32 91, label %sw.bb95
-    i32 92, label %sw.bb96
-    i32 93, label %sw.bb97
-    i32 94, label %sw.bb98
-    i32 95, label %sw.bb99
-    i32 96, label %sw.bb100
-    i32 97, label %sw.bb101
-    i32 98, label %sw.bb102
-    i32 99, label %sw.bb103
-    i32 100, label %sw.bb104
-    i32 101, label %sw.bb105
-    i32 102, label %sw.bb106
-    i32 103, label %sw.bb107
-    i32 104, label %sw.bb108
-    i32 105, label %sw.bb109
-    i32 106, label %sw.bb110
-    i32 107, label %sw.bb111
-    i32 108, label %sw.bb112
-    i32 109, label %sw.bb113
-    i32 110, label %sw.bb114
-    i32 111, label %sw.bb115
-    i32 112, label %sw.bb116
-    i32 113, label %sw.bb117
-    i32 114, label %sw.bb118
-    i32 115, label %sw.bb119
-    i32 116, label %sw.bb120
-    i32 117, label %sw.bb121
-    i32 118, label %sw.bb122
-    i32 119, label %sw.bb123
-    i32 120, label %sw.bb124
-    i32 121, label %sw.bb125
-    i32 122, label %sw.bb126
-    i32 123, label %sw.bb127
-    i32 124, label %sw.bb128
-    i32 125, label %sw.bb129
-    i32 126, label %sw.bb130
-    i32 127, label %sw.bb131
-    i32 128, label %sw.bb132
-    i32 129, label %sw.bb133
-    i32 130, label %sw.bb134
-    i32 131, label %sw.bb135
-    i32 132, label %sw.bb136
-    i32 133, label %sw.bb137
-    i32 134, label %sw.bb138
-    i32 135, label %sw.bb139
-    i32 136, label %sw.bb140
-    i32 137, label %sw.bb141
-    i32 138, label %sw.bb142
-    i32 139, label %sw.bb143
-    i32 140, label %sw.bb144
-    i32 141, label %sw.bb145
-    i32 142, label %sw.bb146
-    i32 143, label %sw.bb147
-    i32 144, label %sw.bb148
-    i32 145, label %sw.bb149
-    i32 146, label %sw.bb150
-    i32 147, label %sw.bb151
-    i32 148, label %sw.bb152
-    i32 149, label %sw.bb153
-    i32 150, label %sw.bb154
-    i32 151, label %sw.bb155
-    i32 152, label %sw.bb156
-    i32 153, label %sw.bb157
-    i32 154, label %sw.bb158
-    i32 155, label %sw.bb159
-    i32 156, label %sw.bb160
-    i32 157, label %sw.bb161
-    i32 158, label %sw.bb162
-    i32 159, label %sw.bb163
-    i32 160, label %sw.bb164
-    i32 161, label %sw.bb165
-    i32 162, label %sw.bb166
-    i32 163, label %sw.bb167
-    i32 164, label %sw.bb168
-    i32 165, label %sw.bb169
-    i32 166, label %sw.bb170
-    i32 167, label %sw.bb171
-    i32 168, label %sw.bb172
-    i32 169, label %sw.bb173
-    i32 170, label %sw.bb174
-    i32 171, label %sw.bb175
-    i32 172, label %sw.bb176
-    i32 173, label %sw.bb177
-    i32 174, label %sw.bb178
-    i32 175, label %sw.bb179
-    i32 176, label %sw.bb180
-    i32 177, label %sw.bb181
-    i32 178, label %sw.bb182
-    i32 179, label %sw.bb183
-    i32 180, label %sw.bb184
-    i32 181, label %sw.bb185
-    i32 182, label %sw.bb186
-    i32 183, label %sw.bb187
-    i32 184, label %sw.bb188
-    i32 185, label %sw.bb189
-    i32 186, label %sw.bb190
-    i32 187, label %sw.bb191
-    i32 188, label %sw.bb192
-    i32 189, label %sw.bb193
-    i32 190, label %sw.bb194
-    i32 191, label %sw.bb195
-    i32 192, label %sw.bb196
-    i32 193, label %sw.bb197
-    i32 194, label %sw.bb198
-    i32 195, label %sw.bb199
-    i32 196, label %sw.bb200
-    i32 197, label %sw.bb201
-    i32 198, label %sw.bb202
+  i32 199, label %sw.bb203
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
+  i32 4, label %sw.bb4
+  i32 5, label %sw.bb5
+  i32 6, label %sw.bb6
+  i32 7, label %sw.bb7
+  i32 8, label %sw.bb8
+  i32 9, label %sw.bb9
+  i32 10, label %sw.bb10
+  i32 11, label %sw.bb11
+  i32 12, label %sw.bb12
+  i32 13, label %sw.bb13
+  i32 14, label %sw.bb14
+  i32 15, label %sw.bb15
+  i32 16, label %sw.bb16
+  i32 17, label %sw.bb17
+  i32 18, label %sw.bb18
+  i32 19, label %sw.bb19
+  i32 20, label %sw.bb20
+  i32 21, label %sw.bb21
+  i32 22, label %sw.bb22
+  i32 23, label %sw.bb23
+  i32 24, label %sw.bb24
+  i32 25, label %sw.bb25
+  i32 26, label %sw.bb26
+  i32 27, label %sw.bb27
+  i32 28, label %sw.bb28
+  i32 29, label %sw.bb29
+  i32 30, label %sw.bb30
+  i32 31, label %sw.bb31
+  i32 32, label %sw.bb32
+  i32 33, label %sw.bb33
+  i32 34, label %sw.bb34
+  i32 35, label %sw.bb35
+  i32 36, label %sw.bb37
+  i32 37, label %sw.bb38
+  i32 38, label %sw.bb39
+  i32 39, label %sw.bb40
+  i32 40, label %sw.bb41
+  i32 41, label %sw.bb42
+  i32 42, label %sw.bb43
+  i32 43, label %sw.bb44
+  i32 44, label %sw.bb45
+  i32 45, label %sw.bb47
+  i32 46, label %sw.bb48
+  i32 47, label %sw.bb49
+  i32 48, label %sw.bb50
+  i32 49, label %sw.bb51
+  i32 50, label %sw.bb52
+  i32 51, label %sw.bb53
+  i32 52, label %sw.bb54
+  i32 53, label %sw.bb55
+  i32 54, label %sw.bb56
+  i32 55, label %sw.bb58
+  i32 56, label %sw.bb59
+  i32 57, label %sw.bb60
+  i32 58, label %sw.bb61
+  i32 59, label %sw.bb62
+  i32 60, label %sw.bb63
+  i32 61, label %sw.bb64
+  i32 62, label %sw.bb65
+  i32 63, label %sw.bb66
+  i32 64, label %sw.bb67
+  i32 65, label %sw.bb68
+  i32 66, label %sw.bb69
+  i32 67, label %sw.bb70
+  i32 68, label %sw.bb71
+  i32 69, label %sw.bb72
+  i32 70, label %sw.bb73
+  i32 71, label %sw.bb74
+  i32 72, label %sw.bb76
+  i32 73, label %sw.bb77
+  i32 74, label %sw.bb78
+  i32 75, label %sw.bb79
+  i32 76, label %sw.bb80
+  i32 77, label %sw.bb81
+  i32 78, label %sw.bb82
+  i32 79, label %sw.bb83
+  i32 80, label %sw.bb84
+  i32 81, label %sw.bb85
+  i32 82, label %sw.bb86
+  i32 83, label %sw.bb87
+  i32 84, label %sw.bb88
+  i32 85, label %sw.bb89
+  i32 86, label %sw.bb90
+  i32 87, label %sw.bb91
+  i32 88, label %sw.bb92
+  i32 89, label %sw.bb93
+  i32 90, label %sw.bb94
+  i32 91, label %sw.bb95
+  i32 92, label %sw.bb96
+  i32 93, label %sw.bb97
+  i32 94, label %sw.bb98
+  i32 95, label %sw.bb99
+  i32 96, label %sw.bb100
+  i32 97, label %sw.bb101
+  i32 98, label %sw.bb102
+  i32 99, label %sw.bb103
+  i32 100, label %sw.bb104
+  i32 101, label %sw.bb105
+  i32 102, label %sw.bb106
+  i32 103, label %sw.bb107
+  i32 104, label %sw.bb108
+  i32 105, label %sw.bb109
+  i32 106, label %sw.bb110
+  i32 107, label %sw.bb111
+  i32 108, label %sw.bb112
+  i32 109, label %sw.bb113
+  i32 110, label %sw.bb114
+  i32 111, label %sw.bb115
+  i32 112, label %sw.bb116
+  i32 113, label %sw.bb117
+  i32 114, label %sw.bb118
+  i32 115, label %sw.bb119
+  i32 116, label %sw.bb120
+  i32 117, label %sw.bb121
+  i32 118, label %sw.bb122
+  i32 119, label %sw.bb123
+  i32 120, label %sw.bb124
+  i32 121, label %sw.bb125
+  i32 122, label %sw.bb126
+  i32 123, label %sw.bb127
+  i32 124, label %sw.bb128
+  i32 125, label %sw.bb129
+  i32 126, label %sw.bb130
+  i32 127, label %sw.bb131
+  i32 128, label %sw.bb132
+  i32 129, label %sw.bb133
+  i32 130, label %sw.bb134
+  i32 131, label %sw.bb135
+  i32 132, label %sw.bb136
+  i32 133, label %sw.bb137
+  i32 134, label %sw.bb138
+  i32 135, label %sw.bb139
+  i32 136, label %sw.bb140
+  i32 137, label %sw.bb141
+  i32 138, label %sw.bb142
+  i32 139, label %sw.bb143
+  i32 140, label %sw.bb144
+  i32 141, label %sw.bb145
+  i32 142, label %sw.bb146
+  i32 143, label %sw.bb147
+  i32 144, label %sw.bb148
+  i32 145, label %sw.bb149
+  i32 146, label %sw.bb150
+  i32 147, label %sw.bb151
+  i32 148, label %sw.bb152
+  i32 149, label %sw.bb153
+  i32 150, label %sw.bb154
+  i32 151, label %sw.bb155
+  i32 152, label %sw.bb156
+  i32 153, label %sw.bb157
+  i32 154, label %sw.bb158
+  i32 155, label %sw.bb159
+  i32 156, label %sw.bb160
+  i32 157, label %sw.bb161
+  i32 158, label %sw.bb162
+  i32 159, label %sw.bb163
+  i32 160, label %sw.bb164
+  i32 161, label %sw.bb165
+  i32 162, label %sw.bb166
+  i32 163, label %sw.bb167
+  i32 164, label %sw.bb168
+  i32 165, label %sw.bb169
+  i32 166, label %sw.bb170
+  i32 167, label %sw.bb171
+  i32 168, label %sw.bb172
+  i32 169, label %sw.bb173
+  i32 170, label %sw.bb174
+  i32 171, label %sw.bb175
+  i32 172, label %sw.bb176
+  i32 173, label %sw.bb177
+  i32 174, label %sw.bb178
+  i32 175, label %sw.bb179
+  i32 176, label %sw.bb180
+  i32 177, label %sw.bb181
+  i32 178, label %sw.bb182
+  i32 179, label %sw.bb183
+  i32 180, label %sw.bb184
+  i32 181, label %sw.bb185
+  i32 182, label %sw.bb186
+  i32 183, label %sw.bb187
+  i32 184, label %sw.bb188
+  i32 185, label %sw.bb189
+  i32 186, label %sw.bb190
+  i32 187, label %sw.bb191
+  i32 188, label %sw.bb192
+  i32 189, label %sw.bb193
+  i32 190, label %sw.bb194
+  i32 191, label %sw.bb195
+  i32 192, label %sw.bb196
+  i32 193, label %sw.bb197
+  i32 194, label %sw.bb198
+  i32 195, label %sw.bb199
+  i32 196, label %sw.bb200
+  i32 197, label %sw.bb201
+  i32 198, label %sw.bb202
   ]
 
 sw.bb1: br label %return
@@ -755,15 +806,27 @@ return:
 }
 
 define i32 @cprop(i32 %x, i32 %y) {
+; CHECK-LABEL: @cprop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.cprop, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 123
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 1, label %return
-    i32 2, label %sw.bb1
-    i32 3, label %sw.bb2
-    i32 4, label %sw.bb2
-    i32 5, label %sw.bb2
-    i32 6, label %sw.bb3
-    i32 7, label %sw.bb3
+  i32 1, label %return
+  i32 2, label %sw.bb1
+  i32 3, label %sw.bb2
+  i32 4, label %sw.bb2
+  i32 5, label %sw.bb2
+  i32 6, label %sw.bb3
+  i32 7, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -790,23 +853,31 @@ return:
   %retval.0 = phi i32 [ 123, %sw.default ], [ %select, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
   ret i32 %retval.0
 
-; CHECK-LABEL: @cprop(
-; CHECK: switch.lookup:
-; CHECK: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.cprop, i32 0, i32 %switch.tableidx
 }
 
 define i32 @unreachable_case(i32 %x)  {
+; CHECK-LABEL: @unreachable_case(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 9
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.unreachable_case, i32 0, i32 [[X]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 2
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb
-    i32 3, label %sw.bb1
-    i32 4, label %sw.bb2
-    i32 5, label %sw.bb3
-    i32 6, label %sw.bb3
-    i32 7, label %sw.bb3
-    i32 8, label %sw.bb3
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb
+  i32 2, label %sw.bb
+  i32 3, label %sw.bb1
+  i32 4, label %sw.bb2
+  i32 5, label %sw.bb3
+  i32 6, label %sw.bb3
+  i32 7, label %sw.bb3
+  i32 8, label %sw.bb3
   ]
 
 sw.bb: br label %return
@@ -819,18 +890,21 @@ return:
   %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 0, %sw.bb ], [ 2, %sw.default ]
   ret i32 %retval.0
 
-; CHECK-LABEL: @unreachable_case(
-; CHECK: switch.lookup:
-; CHECK: getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.unreachable_case, i32 0, i32 %x
 }
 
 define i32 @unreachable_default(i32 %x)  {
+; CHECK-LABEL: @unreachable_default(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.unreachable_default, i32 0, i32 [[X:%.*]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+;
 entry:
   switch i32 %x, label %default [
-    i32 0, label %bb0
-    i32 1, label %bb1
-    i32 2, label %bb2
-    i32 3, label %bb3
+  i32 0, label %bb0
+  i32 1, label %bb1
+  i32 2, label %bb2
+  i32 3, label %bb3
   ]
 
 bb0: br label %return
@@ -843,24 +917,40 @@ return:
   %retval = phi i32 [ 42, %bb0 ], [ 52, %bb1 ], [ 1, %bb2 ], [ 2, %bb3 ]
   ret i32 %retval
 
-; CHECK-LABEL: @unreachable_default(
-; CHECK: entry:
-; CHECK-NOT: icmp
-; CHECK-NOT: br 1i
-; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.unreachable_default, i32 0, i32 %x
-; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
-; CHECK-NEXT: ret i32 %switch.load
 }
 
 ; Don't create a table with illegal type
 define i96 @illegaltype(i32 %c) {
+; CHECK-LABEL: @illegaltype(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 42, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 43, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 44, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    i32 45, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    i32 46, label [[SW_BB4:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb4:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i96 [ 15, [[SW_DEFAULT]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i96 [[RETVAL_0]]
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 42, label %return
-    i32 43, label %sw.bb1
-    i32 44, label %sw.bb2
-    i32 45, label %sw.bb3
-    i32 46, label %sw.bb4
+  i32 42, label %return
+  i32 43, label %sw.bb1
+  i32 44, label %sw.bb2
+  i32 45, label %sw.bb3
+  i32 46, label %sw.bb4
   ]
 
 sw.bb1: br label %return
@@ -872,75 +962,106 @@ return:
   %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i96 %retval.0
 
-; CHECK-LABEL: @illegaltype(
-; CHECK-NOT: @switch.table
-; CHECK: switch i32 %c
 }
 
 ; If we can build a lookup table without any holes, we don't need a default result.
 declare void @exit(i32)
 define i32 @nodefaultnoholes(i32 %c) {
+; CHECK-LABEL: @nodefaultnoholes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT:%.*]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    call void @exit(i32 1)
+; CHECK-NEXT:    unreachable
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.nodefaultnoholes, i32 0, i32 [[C]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %return
 sw.bb2: br label %return
 sw.bb3: br label %return
 sw.default: call void @exit(i32 1)
-            unreachable
+  unreachable
 return:
   %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %x
 
-; CHECK-LABEL: @nodefaultnoholes(
-; CHECK: @switch.table
-; CHECK-NOT: switch i32
 }
 
 ; This lookup table will have holes, so we need to test for the holes.
 define i32 @nodefaultwithholes(i32 %c) {
+; CHECK-LABEL: @nodefaultwithholes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 6
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[SW_DEFAULT:%.*]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    call void @exit(i32 1)
+; CHECK-NEXT:    unreachable
+; CHECK:       switch.hole_check:
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[C]] to i8
+; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 47, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.nodefaultwithholes, i32 0, i32 [[C]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
-    i32 5, label %sw.bb3
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
+  i32 5, label %sw.bb3
   ]
 
 sw.bb1: br label %return
 sw.bb2: br label %return
 sw.bb3: br label %return
 sw.default: call void @exit(i32 1)
-            unreachable
+  unreachable
 return:
   %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %x
 
-; CHECK-LABEL: @nodefaultwithholes(
-; CHECK: entry:
-; CHECK: br i1 %{{.*}}, label %switch.hole_check, label %sw.default
-; CHECK: switch.hole_check:
-; CHECK-NEXT: %switch.maskindex = trunc i32 %c to i8
-; CHECK-NEXT: %switch.shifted = lshr i8 47, %switch.maskindex
 ; The mask is binary 101111.
-; CHECK-NEXT: %switch.lobit = trunc i8 %switch.shifted to i1
-; CHECK-NEXT: br i1 %switch.lobit, label %switch.lookup, label %sw.default
-; CHECK-NOT: switch i32
 }
 
 ; We don't build lookup tables with holes for switches with less than four cases.
 define i32 @threecasesholes(i32 %c) {
+; CHECK-LABEL: @threecasesholes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 3, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[C]], [[SW_DEFAULT]] ], [ 5, [[SW_BB2]] ], [ 7, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[X]]
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 3, label %sw.bb2
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 3, label %sw.bb2
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -948,19 +1069,16 @@ sw.default: br label %return
 return:
   %x = phi i32 [ %c, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
   ret i32 %x
-; CHECK-LABEL: @threecasesholes(
-; CHECK: switch i32
-; CHECK-NOT: @switch.table
 }
 
 ; We build lookup tables for switches with three or more cases.
 define i32 @threecases(i32 %c) {
 ; CHECK-LABEL: @threecases(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 %c, 3
-; CHECK-NEXT:    br i1 [[TMP0]], label %switch.lookup, label %return
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 3
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.threecases, i32 0, i32 %c
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.threecases, i32 0, i32 [[C]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -987,9 +1105,9 @@ return:
 define i32 @twocases(i32 %c) {
 ; CHECK-LABEL: @twocases(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 %c, 1
-; CHECK-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP:%.*]], i32 7, i32 3
-; CHECK-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 %c, 0
+; CHECK-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 [[C:%.*]], 1
+; CHECK-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP]], i32 7, i32 3
+; CHECK-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 [[C]], 0
 ; CHECK-NEXT:    [[SWITCH_SELECT2:%.*]] = select i1 [[SWITCH_SELECTCMP1]], i32 9, i32 [[SWITCH_SELECT]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_SELECT2]]
 ;
@@ -1013,11 +1131,28 @@ return:
 @tls_c = thread_local global i32 0
 @tls_d = thread_local global i32 0
 define i32* @tls(i32 %x) {
+; CHECK-LABEL: @tls(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @tls_d, [[SW_DEFAULT]] ], [ @tls_c, [[SW_BB2]] ], [ @tls_b, [[SW_BB1]] ], [ @tls_a, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32* [[RETVAL_0]]
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 sw.bb1:
   br label %return
@@ -1028,9 +1163,6 @@ sw.default:
 return:
   %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
   ret i32* %retval.0
-; CHECK-LABEL: @tls(
-; CHECK: switch i32
-; CHECK-NOT: @switch.table
 }
 
 ; Don't build tables for switches with dllimport variables.
@@ -1039,11 +1171,28 @@ return:
 @dllimport_c = external dllimport global [3x i32]
 @dllimport_d = external dllimport global [3x i32]
 define i32* @dllimport(i32 %x) {
+; CHECK-LABEL: @dllimport(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       sw.bb1:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.bb2:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       sw.default:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_d, i32 0, i32 0), [[SW_DEFAULT]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), [[SW_BB2]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), [[SW_BB1]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32* [[RETVAL_0]]
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %return
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
+  i32 0, label %return
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
   ]
 sw.bb1:
   br label %return
@@ -1053,23 +1202,33 @@ sw.default:
   br label %return
 return:
   %retval.0 = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_d, i32 0, i32 0), %sw.default ],
-                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
-                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
-                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
+  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
+  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
+  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
   ret i32* %retval.0
-; CHECK-LABEL: @dllimport(
-; CHECK: switch i32
-; CHECK-NOT: @switch.table
 }
 
 ; We can use linear mapping.
 define i8 @linearmap1(i32 %c) {
+; CHECK-LABEL: @linearmap1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 10
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], -5
+; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i8 [[SWITCH_IDX_MULT]], 18
+; CHECK-NEXT:    ret i8 [[SWITCH_OFFSET]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8 3
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 10, label %return
-    i32 11, label %sw.bb1
-    i32 12, label %sw.bb2
-    i32 13, label %sw.bb3
+  i32 10, label %return
+  i32 11, label %sw.bb1
+  i32 12, label %sw.bb2
+  i32 13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1078,24 +1237,28 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 8, %sw.bb2 ], [ 13, %sw.bb1 ], [ 18, %entry ]
   ret i8 %x
-; CHECK-LABEL: @linearmap1(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
-; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, -5
-; CHECK-NEXT: %switch.offset = add i8 %switch.idx.mult, 18
-; CHECK-NEXT: ret i8 %switch.offset
 }
 
 ; Linear mapping in a different configuration.
 define i32 @linearmap2(i8 %c) {
+; CHECK-LABEL: @linearmap2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C:%.*]], -13
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i8 [[SWITCH_TABLEIDX]] to i32
+; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[SWITCH_IDX_CAST]], 18
+; CHECK-NEXT:    ret i32 [[SWITCH_OFFSET]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 3
+;
 entry:
   switch i8 %c, label %sw.default [
-    i8 -10, label %return
-    i8 -11, label %sw.bb1
-    i8 -12, label %sw.bb2
-    i8 -13, label %sw.bb3
+  i8 -10, label %return
+  i8 -11, label %sw.bb1
+  i8 -12, label %sw.bb2
+  i8 -13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1104,23 +1267,28 @@ sw.default: br label %return
 return:
   %x = phi i32 [ 3, %sw.default ], [ 18, %sw.bb3 ], [ 19, %sw.bb2 ], [ 20, %sw.bb1 ], [ 21, %entry ]
   ret i32 %x
-; CHECK-LABEL: @linearmap2(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i8 %c, -13
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.idx.cast = zext i8 %switch.tableidx to i32
-; CHECK-NEXT: %switch.offset = add i32 %switch.idx.cast, 18
-; CHECK-NEXT: ret i32 %switch.offset
 }
 
 ; Linear mapping with overflows.
 define i8 @linearmap3(i32 %c) {
+; CHECK-LABEL: @linearmap3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 10
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], 100
+; CHECK-NEXT:    ret i8 [[SWITCH_IDX_MULT]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8 3
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 10, label %return
-    i32 11, label %sw.bb1
-    i32 12, label %sw.bb2
-    i32 13, label %sw.bb3
+  i32 10, label %return
+  i32 11, label %sw.bb1
+  i32 12, label %sw.bb2
+  i32 13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1129,23 +1297,27 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 44, %sw.bb3 ], [ -56, %sw.bb2 ], [ 100, %sw.bb1 ], [ 0, %entry ]
   ret i8 %x
-; CHECK-LABEL: @linearmap3(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
-; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, 100
-; CHECK-NEXT: ret i8 %switch.idx.mult
 }
 
 ; Linear mapping with with multiplier 1 and offset 0.
 define i8 @linearmap4(i32 %c) {
+; CHECK-LABEL: @linearmap4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], -2
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    ret i8 [[SWITCH_IDX_CAST]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i8 3
+;
 entry:
   switch i32 %c, label %sw.default [
-    i32 -2, label %return
-    i32 -1, label %sw.bb1
-    i32 0, label %sw.bb2
-    i32 1, label %sw.bb3
+  i32 -2, label %return
+  i32 -1, label %sw.bb1
+  i32 0, label %sw.bb2
+  i32 1, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1154,22 +1326,26 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %entry ]
   ret i8 %x
-; CHECK-LABEL: @linearmap4(
-; CHECK: entry:
-; CHECK-NEXT: %switch.tableidx = sub i32 %c, -2
-; CHECK: switch.lookup:
-; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
-; CHECK-NEXT: ret i8 %switch.idx.cast
 }
 
 ; Reuse the inverted table range compare.
 define i32 @reuse_cmp1(i32 %x) {
+; CHECK-LABEL: @reuse_cmp1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[INVERTED_CMP:%.*]] = xor i1 [[TMP0]], true
+; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
+; CHECK-NEXT:    [[R_0:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[R_0]], 0
+; CHECK-NEXT:    [[DOTR_0:%.*]] = select i1 [[INVERTED_CMP]], i32 100, i32 [[R_0]]
+; CHECK-NEXT:    ret i32 [[DOTR_0]]
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1185,22 +1361,24 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
   ret i32 %retval.0
-; CHECK-LABEL: @reuse_cmp1(
-; CHECK: entry:
-; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %x, 4
-; CHECK-NEXT: %inverted.cmp = xor i1 [[C]], true
-; CHECK:      [[R:%.+]] = select i1 %inverted.cmp, i32 100, i32 {{.*}}
-; CHECK-NEXT: ret i32 [[R]]
 }
 
 ; Reuse the table range compare.
 define i32 @reuse_cmp2(i32 %x) {
+; CHECK-LABEL: @reuse_cmp2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[X_:%.*]] = select i1 [[TMP0]], i32 [[X]], i32 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X_]], 4
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP0]], i32 [[X_]], i32 100
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1216,24 +1394,26 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
   ret i32 %retval.0
-; CHECK-LABEL: @reuse_cmp2(
-; CHECK: entry:
-; CHECK-NEXT: %0 = icmp ult i32 %x, 4
-; CHECK-NEXT: %x. = select i1 %0, i32 %x, i32 4
-; CHECK-NEXT: [[C:%.+]] = icmp ne i32 %x., 4
-; CHECK:      [[R:%.+]] = select i1 %0, i32 {{.*}}, i32 100
-; CHECK-NEXT: ret i32 [[R]]
 }
 
 ; Cannot reuse the table range compare, because the default value is the same
 ; as one of the case values.
 define i32 @no_reuse_cmp(i32 %x) {
+; CHECK-LABEL: @no_reuse_cmp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
+; CHECK-NEXT:    [[R_0:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[R_0]], 0
+; CHECK-NEXT:    [[R_0_:%.*]] = select i1 [[CMP]], i32 [[R_0]], i32 100
+; CHECK-NEXT:    ret i32 [[R_0_]]
+;
 entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1249,25 +1429,35 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
   ret i32 %retval.0
-; CHECK-LABEL: @no_reuse_cmp(
-; CHECK:  [[S:%.+]] = select
-; CHECK-NEXT:  %cmp = icmp ne i32 [[S]], 0
-; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp, i32 [[S]], i32 100
-; CHECK-NEXT:  ret i32 [[R]]
 }
 
 ; Cannot reuse the table range compare, because the phi at the switch merge
 ; point is not dominated by the switch.
 define i32 @no_reuse_cmp2(i32 %x, i32 %y) {
+; CHECK-LABEL: @no_reuse_cmp2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[EC:%.*]] = icmp ne i32 [[Y:%.*]], 0
+; CHECK-NEXT:    br i1 [[EC]], label [[SWITCH_ENTRY:%.*]], label [[SW_EPILOG:%.*]]
+; CHECK:       switch.entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0
+; CHECK-NEXT:    br label [[SW_EPILOG]]
+; CHECK:       sw.epilog:
+; CHECK-NEXT:    [[R_0:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[SWITCH_ENTRY]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[R_0]], 0
+; CHECK-NEXT:    [[DOTR_0:%.*]] = select i1 [[CMP]], i32 100, i32 [[R_0]]
+; CHECK-NEXT:    ret i32 [[DOTR_0]]
+;
 entry:
   %ec = icmp ne i32 %y, 0
   br i1 %ec, label %switch.entry, label %sw.epilog
 switch.entry:
   switch i32 %x, label %sw.default [
-    i32 0, label %sw.bb
-    i32 1, label %sw.bb1
-    i32 2, label %sw.bb2
-    i32 3, label %sw.bb3
+  i32 0, label %sw.bb
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1283,28 +1473,49 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
   ret i32 %retval.0
-; CHECK-LABEL: @no_reuse_cmp2(
-; CHECK:  %r.0 = phi
-; CHECK-NEXT:  %cmp = icmp eq i32 %r.0, 0
-; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp
-; CHECK-NEXT:  ret i32 [[R]]
 }
 
 define void @pr20210(i8 %x, i1 %y) {
 ; %z has uses outside of its BB or the phi it feeds into,
 ; so doing a table lookup and jumping directly to while.cond would
 ; cause %z to cease dominating all its uses.
-
+; CHECK-LABEL: @pr20210(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[SW:%.*]], label [[INTERMEDIATE:%.*]]
+; CHECK:       sw:
+; CHECK-NEXT:    switch i8 [[X:%.*]], label [[END:%.*]] [
+; CHECK-NEXT:    i8 7, label [[INTERMEDIATE]]
+; CHECK-NEXT:    i8 3, label [[INTERMEDIATE]]
+; CHECK-NEXT:    i8 2, label [[INTERMEDIATE]]
+; CHECK-NEXT:    i8 1, label [[INTERMEDIATE]]
+; CHECK-NEXT:    i8 0, label [[INTERMEDIATE]]
+; CHECK-NEXT:    ]
+; CHECK:       intermediate:
+; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[X]] to i32
+; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[Z]], [[INTERMEDIATE]] ], [ [[J:%.*]], [[WHILE_BODY:%.*]] ]
+; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[I]], 7
+; CHECK-NEXT:    br i1 [[B]], label [[WHILE_BODY]], label [[WHILE_END:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[J]] = add i32 [[I]], 1
+; CHECK-NEXT:    br label [[WHILE_COND]]
+; CHECK:       while.end:
+; CHECK-NEXT:    call void @exit(i32 [[Z]])
+; CHECK-NEXT:    unreachable
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br i1 %y, label %sw, label %intermediate
 
 sw:
   switch i8 %x, label %end [
-    i8 7, label %intermediate
-    i8 3, label %intermediate
-    i8 2, label %intermediate
-    i8 1, label %intermediate
-    i8 0, label %intermediate
+  i8 7, label %intermediate
+  i8 3, label %intermediate
+  i8 2, label %intermediate
+  i8 1, label %intermediate
+  i8 0, label %intermediate
   ]
 
 intermediate:
@@ -1326,8 +1537,6 @@ while.end:
 
 end:
   ret void
-; CHECK-LABEL: @pr20210
-; CHECK: switch i8 %x
 }
 
 ; Make sure we do not crash due to trying to generate an unguarded
@@ -1335,12 +1544,28 @@ end:
 ; values) and simultaneously trying to generate a branch to deal with
 ; the fact that we have holes in the range.
 define i32 @covered_switch_with_bit_tests(i3) {
+; CHECK-LABEL: @covered_switch_with_bit_tests(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
+; CHECK-NEXT:    br label [[L6]]
+; CHECK:       l6:
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    ret i32 [[R]]
+;
 entry:
   switch i3 %0, label %l6 [
-    i3 -3, label %l5
-    i3 -4, label %l5
-    i3 3, label %l1
-    i3 2, label %l1
+  i3 -3, label %l5
+  i3 -4, label %l5
+  i3 3, label %l1
+  i3 2, label %l1
   ]
 
 l1: br label %l2
@@ -1354,18 +1579,17 @@ l5: br label %l2
 l6:
   %r = phi i32 [ %x, %l2 ], [ 0, %entry ]
   ret i32 %r
-; CHECK-LABEL: @covered_switch_with_bit_tests
-; CHECK: entry
-; CHECK-NEXT: switch
 }
 
 ; Speculation depth must be limited to avoid a zero-cost instruction cycle.
 
+define i32 @PR26308(i1 %B, i64 %load) {
 ; CHECK-LABEL: @PR26308(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[CLEANUP4:%.*]]
 ; CHECK:       cleanup4:
-; CHECK-NEXT:  br label %cleanup4
-
-define i32 @PR26308(i1 %B, i64 %load) {
+; CHECK-NEXT:    br label [[CLEANUP4]]
+;
 entry:
   br label %while.body
 
@@ -1393,9 +1617,32 @@ cleanup4:
 declare void @throw(i1)
 
 define void @wineh_test(i64 %val) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @wineh_test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    invoke void @throw(i1 false)
+; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CLEANUP1:%.*]]
+; CHECK:       unreachable:
+; CHECK-NEXT:    unreachable
+; CHECK:       cleanup1:
+; CHECK-NEXT:    [[CLEANUPPAD1:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    switch i64 [[VAL:%.*]], label [[CLEANUPDONE2:%.*]] [
+; CHECK-NEXT:    i64 0, label [[CLEANUPDONE1:%.*]]
+; CHECK-NEXT:    i64 1, label [[CLEANUPDONE1]]
+; CHECK-NEXT:    i64 6, label [[CLEANUPDONE1]]
+; CHECK-NEXT:    ]
+; CHECK:       cleanupdone1:
+; CHECK-NEXT:    cleanupret from [[CLEANUPPAD1]] unwind label [[CLEANUP2:%.*]]
+; CHECK:       cleanupdone2:
+; CHECK-NEXT:    cleanupret from [[CLEANUPPAD1]] unwind label [[CLEANUP2]]
+; CHECK:       cleanup2:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i1 [ true, [[CLEANUPDONE1]] ], [ false, [[CLEANUPDONE2]] ]
+; CHECK-NEXT:    [[CLEANUPPAD2:%.*]] = cleanuppad within none []
+; CHECK-NEXT:    call void @throw(i1 [[PHI]]) [ "funclet"(token [[CLEANUPPAD2]]) ]
+; CHECK-NEXT:    unreachable
+;
 entry:
   invoke void @throw(i1 false)
-          to label %unreachable unwind label %cleanup1
+  to label %unreachable unwind label %cleanup1
 
 unreachable:
   unreachable
@@ -1403,9 +1650,9 @@ unreachable:
 cleanup1:
   %cleanuppad1 = cleanuppad within none []
   switch i64 %val, label %cleanupdone2 [
-    i64 0, label %cleanupdone1
-    i64 1, label %cleanupdone1
-    i64 6, label %cleanupdone1
+  i64 0, label %cleanupdone1
+  i64 1, label %cleanupdone1
+  i64 6, label %cleanupdone1
   ]
 
 cleanupdone1:
@@ -1421,32 +1668,4 @@ cleanup2:
   unreachable
 }
 
-; CHECK-LABEL: @wineh_test(
-; CHECK: entry:
-; CHECK:   invoke void @throw(i1 false)
-; CHECK:           to label %[[unreachable:.*]] unwind label %[[cleanup1:.*]]
-
-; CHECK: [[unreachable]]:
-; CHECK:   unreachable
-
-; CHECK: [[cleanup1]]:
-; CHECK:   %[[cleanuppad1:.*]] = cleanuppad within none []
-; CHECK:   switch i64 %val, label %[[cleanupdone2:.*]] [
-; CHECK:     i64 0, label %[[cleanupdone1:.*]]
-; CHECK:     i64 1, label %[[cleanupdone1]]
-; CHECK:     i64 6, label %[[cleanupdone1]]
-; CHECK:   ]
-
-; CHECK: [[cleanupdone1]]:
-; CHECK:   cleanupret from %[[cleanuppad1]] unwind label %[[cleanup2:.*]]
-
-; CHECK: [[cleanupdone2]]:
-; CHECK:   cleanupret from %[[cleanuppad1]] unwind label %[[cleanup2]]
-
-; CHECK: [[cleanup2]]:
-; CHECK:   %[[phi:.*]] = phi i1 [ true, %[[cleanupdone1]] ], [ false, %[[cleanupdone2]] ]
-; CHECK:   %[[cleanuppad2:.*]] = cleanuppad within none []
-; CHECK:   call void @throw(i1 %[[phi]]) [ "funclet"(token %[[cleanuppad2]]) ]
-; CHECK:   unreachable
-
 declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index e5c2ef65b3181..9a4b4fe263266 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -1,11 +1,20 @@
-; RUN: opt %s -S -simplifycfg | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -passes='simplify-cfg<switch-to-lookup>' | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 declare void @foo(i32)
 
 define void @test(i1 %a) {
-; CHECK-LABEL: @test
-; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    br i1 [[A:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       false:
+; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+;
   switch i1 %a, label %default [i1 1, label %true
-                                i1 0, label %false]
+  i1 0, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -15,14 +24,35 @@ false:
 default:
   call void @foo(i32 2)
   ret void
-}  
+}
 
 define void @test2(i2 %a) {
-; CHECK-LABEL: @test2
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DEFAULT1:%.*]] [
+; CHECK-NEXT:    i2 0, label [[CASE0:%.*]]
+; CHECK-NEXT:    i2 1, label [[CASE1:%.*]]
+; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT:    i2 -1, label [[CASE3:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case0:
+; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+; CHECK:       case1:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       case2:
+; CHECK-NEXT:    tail call void @foo(i32 2)
+; CHECK-NEXT:    ret void
+; CHECK:       case3:
+; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+; CHECK:       default1:
+; CHECK-NEXT:    unreachable
+;
   switch i2 %a, label %default [i2 0, label %case0
-                                i2 1, label %case1
-                                i2 2, label %case2
-                                i2 3, label %case3]
+  i2 1, label %case1
+  i2 2, label %case2
+  i2 3, label %case3]
 case0:
   call void @foo(i32 0)
   ret void
@@ -36,19 +66,35 @@ case3:
   call void @foo(i32 3)
   ret void
 default:
-; CHECK-LABEL: default1:
-; CHECK-NEXT: unreachable
   call void @foo(i32 4)
   ret void
-}  
+}
 
 ; This one is a negative test - we know the value of the default,
 ; but that's about it
 define void @test3(i2 %a) {
-; CHECK-LABEL: @test3
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i2 0, label [[CASE0:%.*]]
+; CHECK-NEXT:    i2 1, label [[CASE1:%.*]]
+; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case0:
+; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+; CHECK:       case1:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       case2:
+; CHECK-NEXT:    tail call void @foo(i32 2)
+; CHECK-NEXT:    ret void
+; CHECK:       default:
+; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+;
   switch i2 %a, label %default [i2 0, label %case0
-                                i2 1, label %case1
-                                i2 2, label %case2]
+  i2 1, label %case1
+  i2 2, label %case2]
 
 case0:
   call void @foo(i32 0)
@@ -60,18 +106,30 @@ case2:
   call void @foo(i32 2)
   ret void
 default:
-; CHECK-LABEL: default:
-; CHECK-NEXT: call void @foo
   call void @foo(i32 0)
   ret void
-}  
+}
 
 ; Negative test - check for possible overflow when computing
 ; number of possible cases.
 define void @test4(i128 %a) {
-; CHECK-LABEL: @test4
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    switch i128 [[A:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i128 0, label [[CASE0:%.*]]
+; CHECK-NEXT:    i128 1, label [[CASE1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case0:
+; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+; CHECK:       case1:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       default:
+; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    ret void
+;
   switch i128 %a, label %default [i128 0, label %case0
-                                  i128 1, label %case1]
+  i128 1, label %case1]
 
 case0:
   call void @foo(i32 0)
@@ -80,20 +138,28 @@ case1:
   call void @foo(i32 1)
   ret void
 default:
-; CHECK-LABEL: default:
-; CHECK-NEXT: call void @foo
   call void @foo(i32 0)
   ret void
-}  
+}
 
 ; All but one bit known zero
 define void @test5(i8 %a) {
-; CHECK-LABEL: @test5
-; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
-  %cmp = icmp ult i8 %a, 2 
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 2
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], 1
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       false:
+; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+;
+  %cmp = icmp ult i8 %a, 2
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 1, label %true
-                                i8 0, label %false]
+  i8 0, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -103,18 +169,27 @@ false:
 default:
   call void @foo(i32 2)
   ret void
-} 
+}
 
 ;; All but one bit known one
 define void @test6(i8 %a) {
-; CHECK-LABEL: @test6
-; CHECK: @llvm.assume
-; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], -3
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       false:
+; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+;
   %and = and i8 %a, 254
-  %cmp = icmp eq i8 %and, 254 
+  %cmp = icmp eq i8 %and, 254
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-                                i8 254, label %false]
+  i8 254, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -129,15 +204,24 @@ default:
 ; Check that we can eliminate both dead cases and dead defaults
 ; within a single run of simplify-cfg
 define void @test7(i8 %a) {
-; CHECK-LABEL: @test7
-; CHECK: @llvm.assume
-; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], -3
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       false:
+; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+;
   %and = and i8 %a, 254
-  %cmp = icmp eq i8 %and, 254 
+  %cmp = icmp eq i8 %and, 254
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-                                i8 254, label %false
-                                i8 0, label %also_dead]
+  i8 254, label %false
+  i8 0, label %also_dead]
 true:
   call void @foo(i32 1)
   ret void
@@ -154,17 +238,18 @@ default:
 
 ;; All but one bit known undef
 ;; Note: This is currently testing an optimization which doesn't trigger. The
-;; case this is protecting against is that a bit could be assumed both zero 
+;; case this is protecting against is that a bit could be assumed both zero
 ;; *or* one given we know it's undef.  ValueTracking doesn't do this today,
 ;; but it doesn't hurt to confirm.
 define void @test8(i8 %a) {
 ; CHECK-LABEL: @test8(
-; CHECK: switch i8
+; CHECK-NEXT:    unreachable
+;
   %and = and i8 %a, 254
   %cmp = icmp eq i8 %and, undef
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-                                i8 254, label %false]
+  i8 254, label %false]
 true:
   call void @foo(i32 1)
   ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index 2d46aac23f61b..21cecc5c942b7 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -4,10 +4,10 @@
 define i32 @test1(i32 %x) nounwind {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  a:
-; CHECK-NEXT:    [[I:%.*]] = shl i32 %x, 1
+; CHECK-NEXT:    [[I:%.*]] = shl i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[I]], 24
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[COND]], i32 5, i32 0
-; CHECK-NEXT:    ret i32 [[DOT]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 5, i32 0
+; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
 ;
   %i = shl i32 %x, 1
   switch i32 %i, label %a [
@@ -48,12 +48,19 @@ c:
 
 define i1 @repeated_signbits(i8 %condition) {
 ; CHECK-LABEL: @repeated_signbits(
-; CHECK:         switch i32
-; CHECK-DAG:     i32 -128, label %a
-; CHECK-DAG:     i32 -1, label %a
-; CHECK-DAG:     i32  0, label %a
-; CHECK-DAG:     i32  127, label %a
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i8 [[CONDITION:%.*]] to i32
+; CHECK-NEXT:    switch i32 [[SEXT]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[A:%.*]]
+; CHECK-NEXT:    i32 127, label [[A]]
+; CHECK-NEXT:    i32 -128, label [[A]]
+; CHECK-NEXT:    i32 -1, label [[A]]
 ; CHECK-NEXT:    ]
+; CHECK:       a:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ], [ false, [[DEFAULT]] ]
+; CHECK-NEXT:    ret i1 [[MERGE]]
+; CHECK:       default:
+; CHECK-NEXT:    br label [[A]]
 ;
 entry:
   %sext = sext i8 %condition to i32
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 165e5b264aef7..98c434a5a0ec3 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -1,141 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
 
 ; Test basic folding to a conditional branch.
 define i32 @foo(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    br i1 [[EQ]], label [[B:%.*]], label [[SWITCH:%.*]]
+; CHECK:       switch:
+; CHECK-NEXT:    [[LT:%.*]] = icmp slt i64 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[LT]], label [[A:%.*]], label [[B]]
+; CHECK:       a:
+; CHECK-NEXT:    tail call void @bees.a() #0
+; CHECK-NEXT:    ret i32 1
+; CHECK:       b:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[SWITCH]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    tail call void @bees.b() #0
+; CHECK-NEXT:    ret i32 [[RETVAL]]
+;
 entry:
-    %eq = icmp eq i64 %x, %y
-    br i1 %eq, label %b, label %switch
+  %eq = icmp eq i64 %x, %y
+  br i1 %eq, label %b, label %switch
 switch:
-    %lt = icmp slt i64 %x, %y
-; CHECK: br i1 %lt, label %a, label %b
-    %qux = select i1 %lt, i32 0, i32 2
-    switch i32 %qux, label %bees [
-        i32 0, label %a
-        i32 1, label %b
-        i32 2, label %b
-    ]
+  %lt = icmp slt i64 %x, %y
+  %qux = select i1 %lt, i32 0, i32 2
+  switch i32 %qux, label %bees [
+  i32 0, label %a
+  i32 1, label %b
+  i32 2, label %b
+  ]
 a:
-    tail call void @bees.a() nounwind
-    ret i32 1
-; CHECK: b:
-; CHECK-NEXT: %retval = phi i32 [ 0, %switch ], [ 2, %entry ]
+  tail call void @bees.a() nounwind
+  ret i32 1
 b:
-    %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
-    tail call void @bees.b() nounwind
-    ret i32 %retval
-; CHECK-NOT: bees:
+  %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
+  tail call void @bees.b() nounwind
+  ret i32 %retval
 bees:
-    tail call void @llvm.trap() nounwind
-    unreachable
+  tail call void @llvm.trap() nounwind
+  unreachable
 }
 
 ; Test basic folding to an unconditional branch.
 define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @bar(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @bees.a() #0
+; CHECK-NEXT:    ret i32 0
+;
 entry:
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() [[$NUW:#[0-9]+]]
-; CHECK-NEXT: ret i32 0
-    %lt = icmp slt i64 %x, %y
-    %qux = select i1 %lt, i32 0, i32 2
-    switch i32 %qux, label %bees [
-        i32 0, label %a
-        i32 1, label %b
-        i32 2, label %a
-    ]
+  %lt = icmp slt i64 %x, %y
+  %qux = select i1 %lt, i32 0, i32 2
+  switch i32 %qux, label %bees [
+  i32 0, label %a
+  i32 1, label %b
+  i32 2, label %a
+  ]
 a:
-    %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
-    tail call void @bees.a() nounwind
-    ret i32 0
+  %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
+  tail call void @bees.a() nounwind
+  ret i32 0
 b:
-    tail call void @bees.b() nounwind
-    br label %a
+  tail call void @bees.b() nounwind
+  br label %a
 bees:
-    tail call void @llvm.trap() nounwind
-    unreachable
+  tail call void @llvm.trap() nounwind
+  unreachable
 }
 
 ; Test the edge case where both values from the select are the default case.
 define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @bazz(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @bees.b() #0
+; CHECK-NEXT:    ret void
+;
 entry:
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.b() [[$NUW]]
-; CHECK-NEXT: ret void
-    %lt = icmp slt i64 %x, %y
-    %qux = select i1 %lt, i32 10, i32 12
-    switch i32 %qux, label %b [
-        i32 0, label %a
-        i32 1, label %bees
-        i32 2, label %bees
-    ]
+  %lt = icmp slt i64 %x, %y
+  %qux = select i1 %lt, i32 10, i32 12
+  switch i32 %qux, label %b [
+  i32 0, label %a
+  i32 1, label %bees
+  i32 2, label %bees
+  ]
 a:
-    tail call void @bees.a() nounwind
-    ret void
+  tail call void @bees.a() nounwind
+  ret void
 b:
-    tail call void @bees.b() nounwind
-    ret void
+  tail call void @bees.b() nounwind
+  ret void
 bees:
-    tail call void @llvm.trap()
-    unreachable
+  tail call void @llvm.trap()
+  unreachable
 }
 
 ; Test the edge case where both values from the select are equal.
 define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @quux(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    tail call void @bees.a() #0
+; CHECK-NEXT:    ret void
+;
 entry:
-; CHECK-NEXT: entry:
-; CHECK-NEXT: tail call void @bees.a() [[$NUW]]
-; CHECK-NEXT: ret void
-    %lt = icmp slt i64 %x, %y
-    %qux = select i1 %lt, i32 0, i32 0
-    switch i32 %qux, label %b [
-        i32 0, label %a
-        i32 1, label %bees
-        i32 2, label %bees
-    ]
+  %lt = icmp slt i64 %x, %y
+  %qux = select i1 %lt, i32 0, i32 0
+  switch i32 %qux, label %b [
+  i32 0, label %a
+  i32 1, label %bees
+  i32 2, label %bees
+  ]
 a:
-    tail call void @bees.a() nounwind
-    ret void
+  tail call void @bees.a() nounwind
+  ret void
 b:
-    tail call void @bees.b() nounwind
-    ret void
+  tail call void @bees.b() nounwind
+  ret void
 bees:
-    tail call void @llvm.trap()
-    unreachable
+  tail call void @llvm.trap()
+  unreachable
 }
 
 ; A final test, for phi node munging.
 define i32 @xyzzy(i64 %x, i64 %y) {
 ; CHECK-LABEL: @xyzzy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[LT:%.*]] = icmp slt i64 [[X]], [[Y]]
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[LT]], i32 -1, i32 1
+; CHECK-NEXT:    [[VAL:%.*]] = select i1 [[EQ]], i32 0, i32 [[SPEC_SELECT]]
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
 entry:
-    %eq = icmp eq i64 %x, %y
-    br i1 %eq, label %r, label %cont
+  %eq = icmp eq i64 %x, %y
+  br i1 %eq, label %r, label %cont
 cont:
-; CHECK: %lt = icmp slt i64 %x, %y
-    %lt = icmp slt i64 %x, %y
-; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
-    %qux = select i1 %lt, i32 0, i32 2
-    switch i32 %qux, label %bees [
-        i32 0, label %a
-        i32 1, label %r
-        i32 2, label %r
-    ]
+  %lt = icmp slt i64 %x, %y
+  %qux = select i1 %lt, i32 0, i32 2
+  switch i32 %qux, label %bees [
+  i32 0, label %a
+  i32 1, label %r
+  i32 2, label %r
+  ]
 r:
-    %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
-    ret i32 %val
+  %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
+  ret i32 %val
 a:
-    ret i32 -1
-; CHECK-NOT: bees:
+  ret i32 -1
 bees:
-    tail call void @llvm.trap()
-    unreachable
+  tail call void @llvm.trap()
+  unreachable
 }
 
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
 
-; CHECK: attributes [[$NUW]] = { nounwind }
 ; CHECK: attributes #1 = { cold noreturn nounwind }
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
index a109b317c7320..c0f6a43fda375 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
@@ -1,18 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -simplifycfg -S | FileCheck %s
 
 declare i32 @f(i32)
 
 define i32 @basic(i32 %x) {
-; CHECK-LABEL: @basic
-; CHECK: x.off = add i32 %x, -5
-; CHECK: %switch = icmp ult i32 %x.off, 3
-; CHECK: br i1 %switch, label %a, label %default
+; CHECK-LABEL: @basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[DEFAULT:%.*]]
+; CHECK:       default:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
+; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK:       a:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
 
 entry:
   switch i32 %x, label %default [
-    i32 5, label %a
-    i32 6, label %a
-    i32 7, label %a
+  i32 5, label %a
+  i32 6, label %a
+  i32 7, label %a
   ]
 default:
   %0 = call i32 @f(i32 0)
@@ -24,20 +33,28 @@ a:
 
 
 define i32 @unreachable(i32 %x) {
-; CHECK-LABEL: @unreachable
-; CHECK: x.off = add i32 %x, -5
-; CHECK: %switch = icmp ult i32 %x.off, 3
-; CHECK: br i1 %switch, label %a, label %b
+; CHECK-LABEL: @unreachable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
+; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK:       b:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
 
 entry:
   switch i32 %x, label %unreachable [
-    i32 5, label %a
-    i32 6, label %a
-    i32 7, label %a
-    i32 10, label %b
-    i32 20, label %b
-    i32 30, label %b
-    i32 40, label %b
+  i32 5, label %a
+  i32 6, label %a
+  i32 7, label %a
+  i32 10, label %b
+  i32 20, label %b
+  i32 30, label %b
+  i32 40, label %b
   ]
 unreachable:
   unreachable
@@ -51,20 +68,28 @@ b:
 
 
 define i32 @unreachable2(i32 %x) {
-; CHECK-LABEL: @unreachable2
-; CHECK: x.off = add i32 %x, -5
-; CHECK: %switch = icmp ult i32 %x.off, 3
-; CHECK: br i1 %switch, label %a, label %b
+; CHECK-LABEL: @unreachable2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
+; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK:       b:
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
 
 entry:
   ; Note: folding the most popular case destination into the default
   ; would prevent switch-to-icmp here.
   switch i32 %x, label %unreachable [
-    i32 5, label %a
-    i32 6, label %a
-    i32 7, label %a
-    i32 10, label %b
-    i32 20, label %b
+  i32 5, label %a
+  i32 6, label %a
+  i32 7, label %a
+  i32 10, label %b
+  i32 20, label %b
   ]
 unreachable:
   unreachable
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
index 7dce54d75d697..083cfe1ee2996 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 target datalayout="p:40:64:64:32"
 
@@ -6,102 +7,151 @@ declare void @foo1()
 declare void @foo2()
 
 define void @test1(i32 %V) {
-        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 17, label [[T:%.*]]
+; CHECK-NEXT:    i32 4, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1(
-; CHECK:  switch i32 %V, label %F [
-; CHECK:    i32 17, label %T
-; CHECK:    i32 4, label %T
-; CHECK:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test1_ptr(i32* %V) {
-        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
-        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test1_ptr(
+; CHECK-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32* [[V:%.*]] to i32
+; CHECK-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 17, label [[T:%.*]]
+; CHECK-NEXT:    i32 4, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+  %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1_ptr(
-; DL:  %magicptr = ptrtoint i32* %V to i32
-; DL:  switch i32 %magicptr, label %F [
-; DL:    i32 17, label %T
-; DL:    i32 4, label %T
-; DL:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test1_ptr_as1(i32 addrspace(1)* %V) {
-        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
-        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test1_ptr_as1(
+; CHECK-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32 addrspace(1)* [[V:%.*]] to i32
+; CHECK-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 17, label [[T:%.*]]
+; CHECK-NEXT:    i32 4, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+  %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1_ptr_as1(
-; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
-; DL:  switch i16 %magicptr, label %F [
-; DL:    i16 17, label %T
-; DL:    i16 4, label %T
-; DL:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test2(i32 %V) {
-        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
-        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
-        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[T:%.*]] [
+; CHECK-NEXT:    i32 17, label [[F:%.*]]
+; CHECK-NEXT:    i32 4, label [[F]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+  %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+  %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test2(
-; CHECK:  switch i32 %V, label %T [
-; CHECK:    i32 17, label %F
-; CHECK:    i32 4, label %F
-; CHECK:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test3(i32 %V) {
-        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-        br i1 %C1, label %T, label %N
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 4, label [[T:%.*]]
+; CHECK-NEXT:    i32 17, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+  br i1 %C1, label %T, label %N
 N:              ; preds = %0
-        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-        br i1 %C2, label %T, label %F
+  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+  br i1 %C2, label %T, label %F
 T:              ; preds = %N, %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %N
-        call void @foo2( )
-        ret void
+  call void @foo2( )
+  ret void
 
-; CHECK-LABEL: @test3(
-; CHECK: switch i32 %V, label %F [
-; CHECK:     i32 4, label %T
-; CHECK:     i32 17, label %T
-; CHECK:   ]
 }
 
 
 define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[LOR_EXT]]
+;
 entry:
   %cmp = icmp eq i8 %c, 62
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -119,20 +169,28 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
-; CHECK-LABEL: @test4(
-; CHECK:  switch i8 %c, label %lor.rhs [
-; CHECK:    i8 62, label %lor.end
-; CHECK:    i8 34, label %lor.end
-; CHECK:    i8 92, label %lor.end
-; CHECK:  ]
 }
 
 define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[LOR_EXT]]
+;
 entry:
   switch i8 %c, label %lor.rhs [
-    i8 62, label %lor.end
-    i8 34, label %lor.end
-    i8 92, label %lor.end
+  i8 62, label %lor.end
+  i8 34, label %lor.end
+  i8 92, label %lor.end
   ]
 
 lor.rhs:                                          ; preds = %entry
@@ -143,48 +201,63 @@ lor.end:                                          ; preds = %entry, %entry, %ent
   %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-; CHECK-LABEL: @test5(
-; CHECK:  switch i8 %c, label %lor.rhs [
-; CHECK:    i8 62, label %lor.end
-; CHECK:    i8 34, label %lor.end
-; CHECK:    i8 92, label %lor.end
-; CHECK:  ]
 }
 
 
 define i1 @test6({ i32, i32 }* %I) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP_1_I:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[I:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP_2_I:%.*]] = load i32, i32* [[TMP_1_I]]
+; CHECK-NEXT:    [[TMP_2_I_OFF:%.*]] = add i32 [[TMP_2_I]], -14
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP_2_I_OFF]], 6
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
+;
 entry:
-        %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+  %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+  %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
+  %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+  br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
 shortcirc_next.0:               ; preds = %entry
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+  %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+  br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
 shortcirc_next.1:               ; preds = %shortcirc_next.0
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+  %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+  br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
 shortcirc_next.2:               ; preds = %shortcirc_next.1
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+  %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+  br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
 shortcirc_next.3:               ; preds = %shortcirc_next.2
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+  %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+  br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
 shortcirc_next.4:               ; preds = %shortcirc_next.3
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
+  %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+  br label %UnifiedReturnBlock
 shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
+  br label %UnifiedReturnBlock
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
+  %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+  ret i1 %UnifiedRetVal
 
-; CHECK-LABEL: @test6(
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 }
 
 define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i8 99, label [[IF_THEN]]
+; CHECK-NEXT:    i8 97, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @foo1() #2
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ult i32 %x, 32
   %cmp4 = icmp eq i8 %c, 97
@@ -200,17 +273,27 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret void
 
-; CHECK-LABEL: @test7(
-; CHECK:   %cmp = icmp ult i32 %x, 32
-; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %if.end [
-; CHECK:     i8 99, label %if.then
-; CHECK:     i8 97, label %if.then
-; CHECK:   ]
 }
 
 define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[N:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       N:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i8 99, label [[IF_THEN]]
+; CHECK-NEXT:    i8 97, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 42, [[SWITCH_EARLY_TEST]] ], [ 42, [[N]] ], [ 42, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    tail call void @foo1() #2
+; CHECK-NEXT:    ret i32 [[A]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %C, label %N, label %if.then
 N:
@@ -229,17 +312,33 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret i32 0
 
-; CHECK-LABEL: @test8(
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %if.end [
-; CHECK:     i8 99, label %if.then
-; CHECK:     i8 97, label %if.then
-; CHECK:   ]
-; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
 }
 
 ;; This is "Example 7" from http://blog.regehr.org/archives/320
 define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    i8 62, label [[LOR_END]]
+; CHECK-NEXT:    i8 60, label [[LOR_END]]
+; CHECK-NEXT:    i8 59, label [[LOR_END]]
+; CHECK-NEXT:    i8 58, label [[LOR_END]]
+; CHECK-NEXT:    i8 46, label [[LOR_END]]
+; CHECK-NEXT:    i8 44, label [[LOR_END]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 39, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[SWITCH_EARLY_TEST]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY:%.*]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    [[CONV46:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV46]]
+;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -285,25 +384,23 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
 
-; CHECK-LABEL: @test9(
-; CHECK:   %cmp = icmp ult i8 %c, 33
-; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
-
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %lor.rhs [
-; CHECK:     i8 92, label %lor.end
-; CHECK:     i8 62, label %lor.end
-; CHECK:     i8 60, label %lor.end
-; CHECK:     i8 59, label %lor.end
-; CHECK:     i8 58, label %lor.end
-; CHECK:     i8 46, label %lor.end
-; CHECK:     i8 44, label %lor.end
-; CHECK:     i8 34, label %lor.end
-; CHECK:     i8 39, label %lor.end
-; CHECK:   ]
+
 }
 
 define i32 @test10(i32 %mode, i1 %Cond) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH_EARLY_TEST:%.*]], label [[F:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i32 [[MODE:%.*]], label [[T:%.*]] [
+; CHECK-NEXT:    i32 51, label [[F]]
+; CHECK-NEXT:    i32 0, label [[F]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 123, [[SWITCH_EARLY_TEST]] ], [ 324, [[F]] ]
+; CHECK-NEXT:    ret i32 [[MERGE]]
+; CHECK:       F:
+; CHECK-NEXT:    br label [[T]]
+;
   %A = icmp ne i32 %mode, 0
   %B = icmp ne i32 %mode, 51
   %C = and i1 %A, %B
@@ -314,17 +411,27 @@ T:
 F:
   ret i32 324
 
-; CHECK-LABEL: @test10(
-; CHECK:  br i1 %Cond, label %switch.early.test, label %F
-; CHECK:switch.early.test:
-; CHECK:  switch i32 %mode, label %T [
-; CHECK:    i32 51, label %F
-; CHECK:    i32 0, label %F
-; CHECK:  ]
 }
 
 ; PR8780
 define i32 @test11(i32 %bar) nounwind {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[BAR:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 55, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 53, label [[RETURN]]
+; CHECK-NEXT:    i32 35, label [[RETURN]]
+; CHECK-NEXT:    i32 24, label [[RETURN]]
+; CHECK-NEXT:    i32 23, label [[RETURN]]
+; CHECK-NEXT:    i32 12, label [[RETURN]]
+; CHECK-NEXT:    i32 4, label [[RETURN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
 entry:
   %cmp = icmp eq i32 %bar, 4
   %cmp2 = icmp eq i32 %bar, 35
@@ -353,19 +460,21 @@ return:                                           ; preds = %if.end, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
   ret i32 %retval.0
 
-; CHECK-LABEL: @test11(
-; CHECK: switch i32 %bar, label %if.end [
-; CHECK:   i32 55, label %return
-; CHECK:   i32 53, label %return
-; CHECK:   i32 35, label %return
-; CHECK:   i32 24, label %return
-; CHECK:   i32 23, label %return
-; CHECK:   i32 12, label %return
-; CHECK:   i32 4, label %return
-; CHECK: ]
 }
 
 define void @test12() nounwind {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_OLD:%.*]] = icmp eq i32 undef, undef
+; CHECK-NEXT:    br i1 [[A_OLD]], label [[BB55_US_US:%.*]], label [[MALFORMED:%.*]]
+; CHECK:       bb55.us.us:
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i32 undef, undef
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 undef, undef
+; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[B]], [[A]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]]
+; CHECK:       malformed:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb49.us.us
 
@@ -382,12 +491,26 @@ bb55.us.us:
 
 malformed:
   ret void
-; CHECK-LABEL: @test12(
 
 }
 
 ; test13 - handle switch formation with ult.
 define void @test13(i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
+; CHECK-NEXT:    i32 4, label [[IF_THEN]]
+; CHECK-NEXT:    i32 3, label [[IF_THEN]]
+; CHECK-NEXT:    i32 1, label [[IF_THEN]]
+; CHECK-NEXT:    i32 0, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @foo1() #3
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ult i32 %x, 2
   br i1 %cmp, label %if.then, label %lor.lhs.false3
@@ -410,18 +533,26 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK-LABEL: @test13(
-; CHECK:  switch i32 %x, label %if.end [
-; CHECK:     i32 6, label %if.then
-; CHECK:     i32 4, label %if.then
-; CHECK:     i32 3, label %if.then
-; CHECK:     i32 1, label %if.then
-; CHECK:     i32 0, label %if.then
-; CHECK:   ]
 }
 
 ; test14 - handle switch formation with ult.
 define void @test14(i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
+; CHECK-NEXT:    i32 4, label [[IF_THEN]]
+; CHECK-NEXT:    i32 3, label [[IF_THEN]]
+; CHECK-NEXT:    i32 2, label [[IF_THEN]]
+; CHECK-NEXT:    i32 1, label [[IF_THEN]]
+; CHECK-NEXT:    i32 0, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @foo1() #3
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ugt i32 %x, 2
   br i1 %cmp, label %lor.lhs.false3, label %if.then
@@ -444,18 +575,15 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK-LABEL: @test14(
-; CHECK:  switch i32 %x, label %if.end [
-; CHECK:     i32 6, label %if.then
-; CHECK:     i32 4, label %if.then
-; CHECK:     i32 3, label %if.then
-; CHECK:     i32 1, label %if.then
-; CHECK:     i32 0, label %if.then
-; CHECK:   ]
 }
 
 ; Don't crash on ginormous ranges.
 define void @test15(i128 %x) nounwind {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:  if.end:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i128 [[X:%.*]], 2
+; CHECK-NEXT:    ret void
+;
   %cmp = icmp ugt i128 %x, 2
   br i1 %cmp, label %if.end, label %lor.false
 
@@ -470,18 +598,19 @@ if.then:
 if.end:
   ret void
 
-; CHECK-LABEL: @test15(
-; CHECK-NOT: switch
-; CHECK: ret void
 }
 
 ; PR8675
 ; rdar://5134905
 define zeroext i1 @test16(i32 %x) nounwind {
-entry:
 ; CHECK-LABEL: @test16(
-; CHECK: %x.off = add i32 %x, -1
-; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
+;
+entry:
   %cmp.i = icmp eq i32 %x, 1
   br i1 %cmp.i, label %lor.end, label %lor.lhs.false
 
@@ -500,6 +629,17 @@ lor.end:
 
 ; Check that we don't turn an icmp into a switch where it's not useful.
 define void @test17(i32 %x, i32 %y) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[OR_COND775:%.*]] = or i1 [[CMP]], [[SWITCH]]
+; CHECK-NEXT:    br i1 [[OR_COND775]], label [[LOR_LHS_FALSE8:%.*]], label [[RETURN:%.*]]
+; CHECK:       lor.lhs.false8:
+; CHECK-NEXT:    tail call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
   %cmp = icmp ult i32 %x, 3
   %switch = icmp ult i32 %y, 2
   %or.cond775 = or i1 %cmp, %switch
@@ -512,13 +652,20 @@ lor.lhs.false8:
 return:
   ret void
 
-; CHECK-LABEL: @test17(
-; CHECK-NOT: switch.early.test
-; CHECK-NOT: switch i32
-; CHECK: ret void
 }
 
 define void @test18(i32 %arg) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[ARG_OFF]], 11
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[BB19:%.*]], label [[BB20:%.*]]
+; CHECK:       bb19:
+; CHECK-NEXT:    tail call void @foo1()
+; CHECK-NEXT:    br label [[BB20]]
+; CHECK:       bb20:
+; CHECK-NEXT:    ret void
+;
 bb:
   %tmp = and i32 %arg, -2
   %tmp1 = icmp eq i32 %tmp, 8
@@ -550,12 +697,23 @@ bb19:                                             ; preds = %bb8, %bb
 bb20:                                             ; preds = %bb19, %bb8
   ret void
 
-; CHECK-LABEL: @test18(
-; CHECK: %arg.off = add i32 %arg, -8
-; CHECK: icmp ult i32 %arg.off, 11
 }
 
 define void @PR26323(i1 %tobool23, i32 %tmp3) {
+; CHECK-LABEL: @PR26323(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp ne i32 [[TMP3:%.*]], 0
+; CHECK-NEXT:    [[NEG14:%.*]] = and i32 [[TMP3]], -2
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp ne i32 [[NEG14]], -1
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[TOBOOL5]], [[TOBOOL23:%.*]]
+; CHECK-NEXT:    [[OR_COND1:%.*]] = and i1 [[CMP17]], [[OR_COND]]
+; CHECK-NEXT:    br i1 [[OR_COND1]], label [[IF_END29:%.*]], label [[IF_THEN27:%.*]]
+; CHECK:       if.then27:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end29:
+; CHECK-NEXT:    ret void
+;
 entry:
   %tobool5 = icmp ne i32 %tmp3, 0
   %neg14 = and i32 %tmp3, -2
@@ -572,21 +730,19 @@ if.end29:                                         ; preds = %entry
   ret void
 }
 
-; CHECK-LABEL: define void @PR26323(
-; CHECK:  %tobool5 = icmp ne i32 %tmp3, 0
-; CHECK:  %neg14 = and i32 %tmp3, -2
-; CHECK:  %cmp17 = icmp ne i32 %neg14, -1
-; CHECK:  %or.cond = and i1 %tobool5, %tobool23
-; CHECK:  %or.cond1 = and i1 %cmp17, %or.cond
-; CHECK:  br i1 %or.cond1, label %if.end29, label %if.then27
-
-; Form a switch when and'ing a negated power of two
-; CHECK-LABEL: define void @test19
-; CHECK: switch i32 %arg, label %else [
-; CHECK: i32 32, label %if
-; CHECK: i32 13, label %if
-; CHECK: i32 12, label %if
 define void @test19(i32 %arg) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[ELSE:%.*]] [
+; CHECK-NEXT:    i32 32, label [[IF:%.*]]
+; CHECK-NEXT:    i32 13, label [[IF]]
+; CHECK-NEXT:    i32 12, label [[IF]]
+; CHECK-NEXT:    ]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 12
   %cmp2 = icmp eq i32 %arg, 32
@@ -602,10 +758,19 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
-; CHECK-LABEL: define void @test20
-; CHECK-NOT: switch
-; CHECK: ret void
 define void @test20(i32 %arg) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], -2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[AND]], 13
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[ARG]], 32
+; CHECK-NEXT:    [[PRED:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 13
   %cmp2 = icmp eq i32 %arg, 32
@@ -621,11 +786,19 @@ else:
 }
 
 ; Form a switch when or'ing a power of two
-; CHECK-LABEL: define void @test21
-; CHECK: i32 32, label %else
-; CHECK: i32 13, label %else
-; CHECK: i32 12, label %else
 define void @test21(i32 %arg) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[IF:%.*]] [
+; CHECK-NEXT:    i32 32, label [[ELSE:%.*]]
+; CHECK-NEXT:    i32 13, label [[ELSE]]
+; CHECK-NEXT:    i32 12, label [[ELSE]]
+; CHECK-NEXT:    ]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 13
   %cmp2 = icmp ne i32 %arg, 32
@@ -641,10 +814,19 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
-; CHECK-LABEL: define void @test22
-; CHECK-NOT: switch
-; CHECK: ret void
 define void @test22(i32 %arg) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[AND]], 12
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[ARG]], 32
+; CHECK-NEXT:    [[PRED:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 12
   %cmp2 = icmp ne i32 %arg, 32
@@ -657,4 +839,4 @@ if:
 
 else:
   ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
index c752636ae83da..3314fc982ae77 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 ; RUN: opt -S -data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
 
@@ -6,102 +7,151 @@ declare void @foo1()
 declare void @foo2()
 
 define void @test1(i32 %V) {
-        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 17, label [[T:%.*]]
+; CHECK-NEXT:    i32 4, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1(
-; CHECK:  switch i32 %V, label %F [
-; CHECK:    i32 17, label %T
-; CHECK:    i32 4, label %T
-; CHECK:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test1_ptr(i32* %V) {
-        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
-        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; DL-LABEL: @test1_ptr(
+; DL-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32* [[V:%.*]] to i32
+; DL-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
+; DL-NEXT:    i32 17, label [[T:%.*]]
+; DL-NEXT:    i32 4, label [[T]]
+; DL-NEXT:    ]
+; DL:       T:
+; DL-NEXT:    call void @foo1()
+; DL-NEXT:    ret void
+; DL:       F:
+; DL-NEXT:    call void @foo2()
+; DL-NEXT:    ret void
+;
+  %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+  %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1_ptr(
-; DL:  %magicptr = ptrtoint i32* %V to i32
-; DL:  switch i32 %magicptr, label %F [
-; DL:    i32 17, label %T
-; DL:    i32 4, label %T
-; DL:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test1_ptr_as1(i32 addrspace(1)* %V) {
-        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
-        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
-        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; DL-LABEL: @test1_ptr_as1(
+; DL-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32 addrspace(1)* [[V:%.*]] to i16
+; DL-NEXT:    switch i16 [[MAGICPTR]], label [[F:%.*]] [
+; DL-NEXT:    i16 17, label [[T:%.*]]
+; DL-NEXT:    i16 4, label [[T]]
+; DL-NEXT:    ]
+; DL:       T:
+; DL-NEXT:    call void @foo1()
+; DL-NEXT:    ret void
+; DL:       F:
+; DL-NEXT:    call void @foo2()
+; DL-NEXT:    ret void
+;
+  %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+  %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test1_ptr_as1(
-; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
-; DL:  switch i16 %magicptr, label %F [
-; DL:    i16 17, label %T
-; DL:    i16 4, label %T
-; DL:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test2(i32 %V) {
-        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
-        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
-        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
-        br i1 %CN, label %T, label %F
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[T:%.*]] [
+; CHECK-NEXT:    i32 17, label [[F:%.*]]
+; CHECK-NEXT:    i32 4, label [[F]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+  %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+  %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+  br i1 %CN, label %T, label %F
 T:              ; preds = %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %0
-        call void @foo2( )
-        ret void
-; CHECK-LABEL: @test2(
-; CHECK:  switch i32 %V, label %T [
-; CHECK:    i32 17, label %F
-; CHECK:    i32 4, label %F
-; CHECK:  ]
+  call void @foo2( )
+  ret void
 }
 
 define void @test3(i32 %V) {
-        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-        br i1 %C1, label %T, label %N
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
+; CHECK-NEXT:    i32 4, label [[T:%.*]]
+; CHECK-NEXT:    i32 17, label [[T]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       F:
+; CHECK-NEXT:    call void @foo2()
+; CHECK-NEXT:    ret void
+;
+  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+  br i1 %C1, label %T, label %N
 N:              ; preds = %0
-        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-        br i1 %C2, label %T, label %F
+  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+  br i1 %C2, label %T, label %F
 T:              ; preds = %N, %0
-        call void @foo1( )
-        ret void
+  call void @foo1( )
+  ret void
 F:              ; preds = %N
-        call void @foo2( )
-        ret void
+  call void @foo2( )
+  ret void
 
-; CHECK-LABEL: @test3(
-; CHECK: switch i32 %V, label %F [
-; CHECK:     i32 4, label %T
-; CHECK:     i32 17, label %T
-; CHECK:   ]
 }
 
 
 define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[LOR_EXT]]
+;
 entry:
   %cmp = icmp eq i8 %c, 62
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -119,20 +169,28 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
-; CHECK-LABEL: @test4(
-; CHECK:  switch i8 %c, label %lor.rhs [
-; CHECK:    i8 62, label %lor.end
-; CHECK:    i8 34, label %lor.end
-; CHECK:    i8 92, label %lor.end
-; CHECK:  ]
 }
 
 define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[LOR_EXT]]
+;
 entry:
   switch i8 %c, label %lor.rhs [
-    i8 62, label %lor.end
-    i8 34, label %lor.end
-    i8 92, label %lor.end
+  i8 62, label %lor.end
+  i8 34, label %lor.end
+  i8 92, label %lor.end
   ]
 
 lor.rhs:                                          ; preds = %entry
@@ -143,48 +201,63 @@ lor.end:                                          ; preds = %entry, %entry, %ent
   %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
-; CHECK-LABEL: @test5(
-; CHECK:  switch i8 %c, label %lor.rhs [
-; CHECK:    i8 62, label %lor.end
-; CHECK:    i8 34, label %lor.end
-; CHECK:    i8 92, label %lor.end
-; CHECK:  ]
 }
 
 
 define i1 @test6({ i32, i32 }* %I) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP_1_I:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[I:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP_2_I:%.*]] = load i32, i32* [[TMP_1_I]]
+; CHECK-NEXT:    [[TMP_2_I_OFF:%.*]] = add i32 [[TMP_2_I]], -14
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP_2_I_OFF]], 6
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
+;
 entry:
-        %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+  %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+  %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
+  %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+  br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
 shortcirc_next.0:               ; preds = %entry
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+  %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+  br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
 shortcirc_next.1:               ; preds = %shortcirc_next.0
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+  %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+  br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
 shortcirc_next.2:               ; preds = %shortcirc_next.1
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+  %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+  br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
 shortcirc_next.3:               ; preds = %shortcirc_next.2
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+  %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+  br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
 shortcirc_next.4:               ; preds = %shortcirc_next.3
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
+  %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+  br label %UnifiedReturnBlock
 shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
+  br label %UnifiedReturnBlock
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
+  %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+  ret i1 %UnifiedRetVal
 
-; CHECK-LABEL: @test6(
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 }
 
 define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i8 99, label [[IF_THEN]]
+; CHECK-NEXT:    i8 97, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @foo1() #2
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ult i32 %x, 32
   %cmp4 = icmp eq i8 %c, 97
@@ -200,17 +273,27 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret void
 
-; CHECK-LABEL: @test7(
-; CHECK:   %cmp = icmp ult i32 %x, 32
-; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %if.end [
-; CHECK:     i8 99, label %if.then
-; CHECK:     i8 97, label %if.then
-; CHECK:   ]
 }
 
 define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[N:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       N:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i8 99, label [[IF_THEN]]
+; CHECK-NEXT:    i8 97, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 42, [[SWITCH_EARLY_TEST]] ], [ 42, [[N]] ], [ 42, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    tail call void @foo1() #2
+; CHECK-NEXT:    ret i32 [[A]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret i32 0
+;
 entry:
   br i1 %C, label %N, label %if.then
 N:
@@ -229,17 +312,33 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret i32 0
 
-; CHECK-LABEL: @test8(
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %if.end [
-; CHECK:     i8 99, label %if.then
-; CHECK:     i8 97, label %if.then
-; CHECK:   ]
-; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
 }
 
 ;; This is "Example 7" from http://blog.regehr.org/archives/320
 define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i8 [[C]], label [[LOR_RHS:%.*]] [
+; CHECK-NEXT:    i8 92, label [[LOR_END]]
+; CHECK-NEXT:    i8 62, label [[LOR_END]]
+; CHECK-NEXT:    i8 60, label [[LOR_END]]
+; CHECK-NEXT:    i8 59, label [[LOR_END]]
+; CHECK-NEXT:    i8 58, label [[LOR_END]]
+; CHECK-NEXT:    i8 46, label [[LOR_END]]
+; CHECK-NEXT:    i8 44, label [[LOR_END]]
+; CHECK-NEXT:    i8 34, label [[LOR_END]]
+; CHECK-NEXT:    i8 39, label [[LOR_END]]
+; CHECK-NEXT:    ]
+; CHECK:       lor.rhs:
+; CHECK-NEXT:    br label [[LOR_END]]
+; CHECK:       lor.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[SWITCH_EARLY_TEST]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY:%.*]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    [[CONV46:%.*]] = zext i1 [[TMP0]] to i32
+; CHECK-NEXT:    ret i32 [[CONV46]]
+;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -285,25 +384,23 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
 
-; CHECK-LABEL: @test9(
-; CHECK:   %cmp = icmp ult i8 %c, 33
-; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
-
-; CHECK: switch.early.test:
-; CHECK:   switch i8 %c, label %lor.rhs [
-; CHECK:     i8 92, label %lor.end
-; CHECK:     i8 62, label %lor.end
-; CHECK:     i8 60, label %lor.end
-; CHECK:     i8 59, label %lor.end
-; CHECK:     i8 58, label %lor.end
-; CHECK:     i8 46, label %lor.end
-; CHECK:     i8 44, label %lor.end
-; CHECK:     i8 34, label %lor.end
-; CHECK:     i8 39, label %lor.end
-; CHECK:   ]
+
 }
 
 define i32 @test10(i32 %mode, i1 %Cond) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH_EARLY_TEST:%.*]], label [[F:%.*]]
+; CHECK:       switch.early.test:
+; CHECK-NEXT:    switch i32 [[MODE:%.*]], label [[T:%.*]] [
+; CHECK-NEXT:    i32 51, label [[F]]
+; CHECK-NEXT:    i32 0, label [[F]]
+; CHECK-NEXT:    ]
+; CHECK:       T:
+; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 123, [[SWITCH_EARLY_TEST]] ], [ 324, [[F]] ]
+; CHECK-NEXT:    ret i32 [[MERGE]]
+; CHECK:       F:
+; CHECK-NEXT:    br label [[T]]
+;
   %A = icmp ne i32 %mode, 0
   %B = icmp ne i32 %mode, 51
   %C = and i1 %A, %B
@@ -314,17 +411,27 @@ T:
 F:
   ret i32 324
 
-; CHECK-LABEL: @test10(
-; CHECK:  br i1 %Cond, label %switch.early.test, label %F
-; CHECK:switch.early.test:
-; CHECK:  switch i32 %mode, label %T [
-; CHECK:    i32 51, label %F
-; CHECK:    i32 0, label %F
-; CHECK:  ]
 }
 
 ; PR8780
 define i32 @test11(i32 %bar) nounwind {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[BAR:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 55, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 53, label [[RETURN]]
+; CHECK-NEXT:    i32 35, label [[RETURN]]
+; CHECK-NEXT:    i32 24, label [[RETURN]]
+; CHECK-NEXT:    i32 23, label [[RETURN]]
+; CHECK-NEXT:    i32 12, label [[RETURN]]
+; CHECK-NEXT:    i32 4, label [[RETURN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
 entry:
   %cmp = icmp eq i32 %bar, 4
   %cmp2 = icmp eq i32 %bar, 35
@@ -353,19 +460,21 @@ return:                                           ; preds = %if.end, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
   ret i32 %retval.0
 
-; CHECK-LABEL: @test11(
-; CHECK: switch i32 %bar, label %if.end [
-; CHECK:   i32 55, label %return
-; CHECK:   i32 53, label %return
-; CHECK:   i32 35, label %return
-; CHECK:   i32 24, label %return
-; CHECK:   i32 23, label %return
-; CHECK:   i32 12, label %return
-; CHECK:   i32 4, label %return
-; CHECK: ]
 }
 
 define void @test12() nounwind {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_OLD:%.*]] = icmp eq i32 undef, undef
+; CHECK-NEXT:    br i1 [[A_OLD]], label [[BB55_US_US:%.*]], label [[MALFORMED:%.*]]
+; CHECK:       bb55.us.us:
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i32 undef, undef
+; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 undef, undef
+; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[B]], [[A]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]]
+; CHECK:       malformed:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %bb49.us.us
 
@@ -382,12 +491,26 @@ bb55.us.us:
 
 malformed:
   ret void
-; CHECK-LABEL: @test12(
 
 }
 
 ; test13 - handle switch formation with ult.
 define void @test13(i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
+; CHECK-NEXT:    i32 4, label [[IF_THEN]]
+; CHECK-NEXT:    i32 3, label [[IF_THEN]]
+; CHECK-NEXT:    i32 1, label [[IF_THEN]]
+; CHECK-NEXT:    i32 0, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @foo1() #3
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ult i32 %x, 2
   br i1 %cmp, label %if.then, label %lor.lhs.false3
@@ -410,18 +533,26 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK-LABEL: @test13(
-; CHECK:  switch i32 %x, label %if.end [
-; CHECK:     i32 6, label %if.then
-; CHECK:     i32 4, label %if.then
-; CHECK:     i32 3, label %if.then
-; CHECK:     i32 1, label %if.then
-; CHECK:     i32 0, label %if.then
-; CHECK:   ]
 }
 
 ; test14 - handle switch formation with ult.
 define void @test14(i32 %x) nounwind ssp noredzone {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
+; CHECK-NEXT:    i32 4, label [[IF_THEN]]
+; CHECK-NEXT:    i32 3, label [[IF_THEN]]
+; CHECK-NEXT:    i32 2, label [[IF_THEN]]
+; CHECK-NEXT:    i32 1, label [[IF_THEN]]
+; CHECK-NEXT:    i32 0, label [[IF_THEN]]
+; CHECK-NEXT:    ]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @foo1() #3
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp ugt i32 %x, 2
   br i1 %cmp, label %lor.lhs.false3, label %if.then
@@ -444,18 +575,15 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
-; CHECK-LABEL: @test14(
-; CHECK:  switch i32 %x, label %if.end [
-; CHECK:     i32 6, label %if.then
-; CHECK:     i32 4, label %if.then
-; CHECK:     i32 3, label %if.then
-; CHECK:     i32 1, label %if.then
-; CHECK:     i32 0, label %if.then
-; CHECK:   ]
 }
 
 ; Don't crash on ginormous ranges.
 define void @test15(i128 %x) nounwind {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:  if.end:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i128 [[X:%.*]], 2
+; CHECK-NEXT:    ret void
+;
   %cmp = icmp ugt i128 %x, 2
   br i1 %cmp, label %if.end, label %lor.false
 
@@ -470,18 +598,19 @@ if.then:
 if.end:
   ret void
 
-; CHECK-LABEL: @test15(
-; CHECK-NOT: switch
-; CHECK: ret void
 }
 
 ; PR8675
 ; rdar://5134905
 define zeroext i1 @test16(i32 %x) nounwind {
-entry:
 ; CHECK-LABEL: @test16(
-; CHECK: %x.off = add i32 %x, -1
-; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
+; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
+;
+entry:
   %cmp.i = icmp eq i32 %x, 1
   br i1 %cmp.i, label %lor.end, label %lor.lhs.false
 
@@ -500,6 +629,17 @@ lor.end:
 
 ; Check that we don't turn an icmp into a switch where it's not useful.
 define void @test17(i32 %x, i32 %y) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[Y:%.*]], 2
+; CHECK-NEXT:    [[OR_COND775:%.*]] = or i1 [[CMP]], [[SWITCH]]
+; CHECK-NEXT:    br i1 [[OR_COND775]], label [[LOR_LHS_FALSE8:%.*]], label [[RETURN:%.*]]
+; CHECK:       lor.lhs.false8:
+; CHECK-NEXT:    tail call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
   %cmp = icmp ult i32 %x, 3
   %switch = icmp ult i32 %y, 2
   %or.cond775 = or i1 %cmp, %switch
@@ -512,13 +652,20 @@ lor.lhs.false8:
 return:
   ret void
 
-; CHECK-LABEL: @test17(
-; CHECK-NOT: switch.early.test
-; CHECK-NOT: switch i32
-; CHECK: ret void
 }
 
 define void @test18(i32 %arg) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[ARG_OFF]], 11
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[BB19:%.*]], label [[BB20:%.*]]
+; CHECK:       bb19:
+; CHECK-NEXT:    tail call void @foo1()
+; CHECK-NEXT:    br label [[BB20]]
+; CHECK:       bb20:
+; CHECK-NEXT:    ret void
+;
 bb:
   %tmp = and i32 %arg, -2
   %tmp1 = icmp eq i32 %tmp, 8
@@ -550,12 +697,23 @@ bb19:                                             ; preds = %bb8, %bb
 bb20:                                             ; preds = %bb19, %bb8
   ret void
 
-; CHECK-LABEL: @test18(
-; CHECK: %arg.off = add i32 %arg, -8
-; CHECK: icmp ult i32 %arg.off, 11
 }
 
 define void @PR26323(i1 %tobool23, i32 %tmp3) {
+; CHECK-LABEL: @PR26323(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp ne i32 [[TMP3:%.*]], 0
+; CHECK-NEXT:    [[NEG14:%.*]] = and i32 [[TMP3]], -2
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp ne i32 [[NEG14]], -1
+; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[TOBOOL5]], [[TOBOOL23:%.*]]
+; CHECK-NEXT:    [[OR_COND1:%.*]] = and i1 [[CMP17]], [[OR_COND]]
+; CHECK-NEXT:    br i1 [[OR_COND1]], label [[IF_END29:%.*]], label [[IF_THEN27:%.*]]
+; CHECK:       if.then27:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end29:
+; CHECK-NEXT:    ret void
+;
 entry:
   %tobool5 = icmp ne i32 %tmp3, 0
   %neg14 = and i32 %tmp3, -2
@@ -572,21 +730,20 @@ if.end29:                                         ; preds = %entry
   ret void
 }
 
-; CHECK-LABEL: define void @PR26323(
-; CHECK:  %tobool5 = icmp ne i32 %tmp3, 0
-; CHECK:  %neg14 = and i32 %tmp3, -2
-; CHECK:  %cmp17 = icmp ne i32 %neg14, -1
-; CHECK:  %or.cond = and i1 %tobool5, %tobool23
-; CHECK:  %or.cond1 = and i1 %cmp17, %or.cond
-; CHECK:  br i1 %or.cond1, label %if.end29, label %if.then27
-
 ; Form a switch when and'ing a negated power of two
-; CHECK-LABEL: define void @test19
-; CHECK: switch i32 %arg, label %else [
-; CHECK: i32 32, label %if
-; CHECK: i32 13, label %if
-; CHECK: i32 12, label %if
 define void @test19(i32 %arg) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[ELSE:%.*]] [
+; CHECK-NEXT:    i32 32, label [[IF:%.*]]
+; CHECK-NEXT:    i32 13, label [[IF]]
+; CHECK-NEXT:    i32 12, label [[IF]]
+; CHECK-NEXT:    ]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 12
   %cmp2 = icmp eq i32 %arg, 32
@@ -602,10 +759,19 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
-; CHECK-LABEL: define void @test20
-; CHECK-NOT: switch
-; CHECK: ret void
 define void @test20(i32 %arg) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], -2
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[AND]], 13
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[ARG]], 32
+; CHECK-NEXT:    [[PRED:%.*]] = or i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 13
   %cmp2 = icmp eq i32 %arg, 32
@@ -621,11 +787,19 @@ else:
 }
 
 ; Form a switch when or'ing a power of two
-; CHECK-LABEL: define void @test21
-; CHECK: i32 32, label %else
-; CHECK: i32 13, label %else
-; CHECK: i32 12, label %else
 define void @test21(i32 %arg) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[IF:%.*]] [
+; CHECK-NEXT:    i32 32, label [[ELSE:%.*]]
+; CHECK-NEXT:    i32 13, label [[ELSE]]
+; CHECK-NEXT:    i32 12, label [[ELSE]]
+; CHECK-NEXT:    ]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 13
   %cmp2 = icmp ne i32 %arg, 32
@@ -641,10 +815,19 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
-; CHECK-LABEL: define void @test22
-; CHECK-NOT: switch
-; CHECK: ret void
 define void @test22(i32 %arg) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[AND:%.*]] = or i32 [[ARG:%.*]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[AND]], 12
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[ARG]], 32
+; CHECK-NEXT:    [[PRED:%.*]] = and i1 [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    call void @foo1()
+; CHECK-NEXT:    ret void
+; CHECK:       else:
+; CHECK-NEXT:    ret void
+;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 12
   %cmp2 = icmp ne i32 %arg, 32
@@ -657,4 +840,4 @@ if:
 
 else:
   ret void
-}
\ No newline at end of file
+}

From 444eaaf1cce248b886c4208a29c5ee0f4c8383cc Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 13:54:04 +0000
Subject: [PATCH 0258/1176] [SimpligyCFG] NFC, remove GCD that was only used
 for powers of two

and replace with an equilivent countTrailingZeros.

GCD is much more expensive than this, with repeated division.

This depends on D60823

llvm-svn: 361726
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 71d592e539d37..83f98d022cac0 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5560,25 +5560,23 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   // Now we have signed numbers that have been shifted so that, given enough
   // precision, there are no negative values. Since the rest of the transform
   // is bitwise only, we switch now to an unsigned representation.
-  uint64_t GCD = 0;
-  for (auto &V : Values)
-    GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
 
   // This transform can be done speculatively because it is so cheap - it
-  // results in a single rotate operation being inserted. This can only happen
-  // if the factor extracted is a power of 2.
-  // FIXME: If the GCD is an odd number we can multiply by the multiplicative
-  // inverse of GCD and then perform this transform.
+  // results in a single rotate operation being inserted.
   // FIXME: It's possible that optimizing a switch on powers of two might also
   // be beneficial - flag values are often powers of two and we could use a CLZ
   // as the key function.
-  if (GCD <= 1 || !isPowerOf2_64(GCD))
-    // No common divisor found or too expensive to compute key function.
-    return false;
 
-  unsigned Shift = Log2_64(GCD);
+  // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
+  // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
+  // less than 64.
+  unsigned Shift = 64;
   for (auto &V : Values)
-    V = (int64_t)((uint64_t)V >> Shift);
+    Shift = std::min(Shift, countTrailingZeros((uint64_t)V);
+  assert(Shift < 64);
+  if (Shift > 0)
+    for (auto &V : Values)
+      V = (int64_t)((uint64_t)V >> Shift);
 
   if (!isSwitchDense(Values))
     // Transform didn't create a dense switch.

From 30111c786f7cf49197fdd9db01e3a6def57b3cef Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 13:55:14 +0000
Subject: [PATCH 0259/1176] [SimplifyCFG] Run ReduceSwitchRange
 unconditionally, generalize

Rather than gating on "isSwitchDense" (resulting in necessesarily
sparse lookup tables even when they were generated), always run
this quite cheap transform.

This transform is useful not just for generating tables.
LowerSwitch also wants this: read LowerSwitch.cpp:257.

Be careful to not generate worse code, by introducing a
SubThreshold heuristic.

Instead of just sorting by signed, generalize the finding of the
best base.

And now that it is run unconditionally, do not replicate its
functionality in SwitchToLookupTable (which could use a Sub
when having a hole is smaller, hence the SubThreshold
heuristic located in a single place).
This simplifies SwitchToLookupTable, and fixes
some ugly corner cases due to the use of signed numbers,
such as a table containing i16 32768 and 32769, of which
32769 would be interpreted as -32768, and now the code thinks
the table is size 65536.

(We still use unconditional subtraction when building a single-register mask,
but I think this whole block should go when the more general sparse
map is added, which doesn't leave empty holes in the table.)

And the reason test4 and test5 did not trigger was documented wrong:
it was because they were not considered sufficiently "dense".

Also, fix generation of invalid LLVM-IR: shl by bit-width.

llvm-svn: 361727
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 122 ++++++++++--------
 .../SimplifyCFG/X86/disable-lookup-table.ll   |  20 +--
 .../SimplifyCFG/X86/switch-covered-bug.ll     |  13 +-
 .../SimplifyCFG/X86/switch-table-bug.ll       |   7 +-
 .../SimplifyCFG/X86/switch_to_lookup_table.ll | 116 +++++++++--------
 .../Transforms/SimplifyCFG/rangereduce.ll     |  39 +++---
 .../SimplifyCFG/switch-dead-default.ll        |  80 +++++++-----
 7 files changed, 219 insertions(+), 178 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 83f98d022cac0..e5925545acaa2 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5309,9 +5309,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
     ConstantInt *CaseVal = CI->getCaseValue();
-    if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+    if (CaseVal->getValue().ult(MinCaseVal->getValue()))
       MinCaseVal = CaseVal;
-    if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+    if (CaseVal->getValue().ugt(MaxCaseVal->getValue()))
       MaxCaseVal = CaseVal;
 
     // Resulting value at phi nodes for this case value.
@@ -5337,8 +5337,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   }
 
   uint64_t NumResults = ResultLists[PHIs[0]].size();
-  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
-  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+  uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1;
   bool TableHasHoles = (NumResults < TableSize);
 
   // If the table has holes, we need a constant result for the default case
@@ -5373,12 +5372,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   // Compute the table index value.
   Builder.SetInsertPoint(SI);
-  Value *TableIndex;
-  if (MinCaseVal->isNullValue())
-    TableIndex = SI->getCondition();
-  else
-    TableIndex =
-        Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx");
+  Value *TableIndex = SI->getCondition();
 
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
@@ -5418,6 +5412,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
                                   CommonDest->getParent(), CommonDest);
 
+    // When doing the register-sized hole-check, unconditionally use a
+    // subtraction.
+    TableIndex = Builder.CreateSub(TableIndex, MinCaseVal);
+
     // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
     // unnecessary illegal types.
     uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
@@ -5461,8 +5459,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     // If using a bitmask, use any value to fill the lookup table holes.
     Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
     StringRef FuncName = Fn->getName();
-    SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
-                            FuncName);
+    // Base is 0 unless using a hole check
+    ConstantInt *Base =
+        NeedMask ? MinCaseVal
+                 : ConstantInt::get(Mod.getContext(), APInt(CaseSize, 0));
+    SwitchLookupTable Table(Mod, TableSize, Base, ResultList, DV, DL, FuncName);
 
     Value *Result = Table.BuildLookup(TableIndex, Builder);
 
@@ -5507,18 +5508,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   return true;
 }
 
-static bool isSwitchDense(ArrayRef<int64_t> Values) {
-  // See also SelectionDAGBuilder::isDense(), which this function was based on.
-  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
-  uint64_t Range = Diff + 1;
-  uint64_t NumCases = Values.size();
-  // 40% is the default density for building a jump table in optsize/minsize
-  // mode.
-  uint64_t MinDensity = 40;
-
-  return NumCases * 100 >= Range * MinDensity;
-}
-
 /// Try to transform a switch that has "holes" in it to a contiguous sequence
 /// of cases.
 ///
@@ -5530,32 +5519,47 @@ static bool isSwitchDense(ArrayRef<int64_t> Values) {
 static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
                               const DataLayout &DL,
                               const TargetTransformInfo &TTI) {
+  // The number of cases that need to be removed by a subtraction operation
+  // to make it worth using.
+  const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8);
   auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
-  if (CondTy->getIntegerBitWidth() > 64 ||
-      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+  unsigned BitWidth = CondTy->getIntegerBitWidth();
+  if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth))
     return false;
   // Only bother with this optimization if there are more than 3 switch cases;
   // SDAG will only bother creating jump tables for 4 or more cases.
+  // This is also useful when using the LowerSwitch transform, but not with
+  // so few cases.
   if (SI->getNumCases() < 4)
     return false;
 
-  // This transform is agnostic to the signedness of the input or case values.
-  // We can treat the case values as signed or unsigned. We can optimize more
-  // common cases such as a sequence crossing zero {-4,0,4,8} if we interpret
-  // case values as signed.
-  SmallVector<int64_t, 4> Values;
+  // We organize the range to start from 0, if it is not already close.
+  SmallVector<uint64_t, 4> Values;
   for (auto &C : SI->cases())
-    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+    Values.push_back(C.getCaseValue()->getValue().getLimitedValue());
   llvm::sort(Values);
 
-  // If the switch is already dense, there's nothing useful to do here.
-  if (isSwitchDense(Values))
-    return false;
-
-  // First, transform the values such that they start at zero and ascend.
-  int64_t Base = Values[0];
-  for (auto &V : Values)
-    V -= (uint64_t)(Base);
+  bool MadeChanges = false;
+  // We must first look find the best start point, for example if we have a
+  // series that crosses zero: -2, -1, 0, 1, 2.
+  uint64_t BestDistance =
+      APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() -
+      Values.back() + Values.front() + 1;
+  unsigned BestIndex = 0;
+  for (unsigned I = 1, E = Values.size(); I != E; I++) {
+    if (Values[I] - Values[I - 1] > BestDistance) {
+      BestIndex = I;
+      BestDistance = Values[I] - Values[I - 1];
+    }
+  }
+  uint64_t Base = 0;
+  // Now transform the values such that they start at zero and ascend.
+  if (Values[BestIndex] >= SubThreshold) {
+    Base = Values[BestIndex];
+    MadeChanges = true;
+    for (auto &V : Values)
+      V = (APInt(BitWidth, V) - Base).getLimitedValue();
+  }
 
   // Now we have signed numbers that have been shifted so that, given enough
   // precision, there are no negative values. Since the rest of the transform
@@ -5572,14 +5576,16 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   // less than 64.
   unsigned Shift = 64;
   for (auto &V : Values)
-    Shift = std::min(Shift, countTrailingZeros((uint64_t)V);
+    Shift = std::min(Shift, countTrailingZeros(V));
   assert(Shift < 64);
-  if (Shift > 0)
+  if (Shift > 0) {
+    MadeChanges = true;
     for (auto &V : Values)
-      V = (int64_t)((uint64_t)V >> Shift);
+      V >>= Shift;
+  }
 
-  if (!isSwitchDense(Values))
-    // Transform didn't create a dense switch.
+  if (!MadeChanges)
+    // We didn't do anything.
     return false;
 
   // The obvious transform is to shift the switch condition right and emit a
@@ -5594,18 +5600,22 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 
   auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
   Builder.SetInsertPoint(SI);
-  auto *ShiftC = ConstantInt::get(Ty, Shift);
-  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
-  auto *LShr = Builder.CreateLShr(Sub, ShiftC);
-  auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
-  auto *Rot = Builder.CreateOr(LShr, Shl);
-  SI->replaceUsesOfWith(SI->getCondition(), Rot);
+  Value *Key = SI->getCondition();
+  if (Base > 0)
+    Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base));
+  if (Shift > 0) {
+    // FIXME replace with fshr?
+    auto *ShiftC = ConstantInt::get(Ty, Shift);
+    auto *LShr = Builder.CreateLShr(Key, ShiftC);
+    auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift);
+    Key = Builder.CreateOr(LShr, Shl);
+  }
+  SI->replaceUsesOfWith(SI->getCondition(), Key);
 
   for (auto Case : SI->cases()) {
     auto *Orig = Case.getCaseValue();
     auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
-    Case.setValue(
-        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+    Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
   }
   return true;
 }
@@ -5646,6 +5656,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
     return requestResimplify();
 
+  if (ReduceSwitchRange(SI, Builder, DL, TTI))
+    return requestResimplify();
+
   // The conversion from switch to lookup tables results in difficult-to-analyze
   // code and makes pruning branches much harder. This is a problem if the
   // switch expression itself can still be restricted as a result of inlining or
@@ -5655,9 +5668,6 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       SwitchToLookupTable(SI, Builder, DL, TTI))
     return requestResimplify();
 
-  if (ReduceSwitchRange(SI, Builder, DL, TTI))
-    return requestResimplify();
-
   return false;
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
index adb0a9819dc64..735a97f305e20 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
@@ -2,6 +2,7 @@
 ; RUN: opt < %s -simplifycfg -switch-to-lookup -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; RUN: opt < %s -passes='simplify-cfg<switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; In the presence of "-no-jump-tables"="true", simplifycfg should not convert switches to lookup tables.
 
 ; CHECK: @switch.table.bar = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
@@ -11,11 +12,12 @@
 define i32 @foo(i32 %c) "no-jump-tables"="true" {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 42, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 43, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 44, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    i32 45, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb1:
 ; CHECK-NEXT:    br label [[RETURN]]
@@ -50,11 +52,11 @@ return:
 define i32 @bar(i32 %c) {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[TMP0]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index b7bf8054a6f10..08d266c7c9122 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -9,11 +9,16 @@ target triple = "x86_64-apple-darwin12.0.0"
 define i64 @test(i3 %arg) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i3 [[ARG:%.*]], -4
-; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i3 [[ARG:%.*]], -1
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[ARG]] to i4
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]]
-; CHECK-NEXT:    [[V3:%.*]] = add i64 [[SWITCH_LOAD]], 0
+; CHECK-NEXT:    br label [[DEFAULT]]
+; CHECK:       Default:
+; CHECK-NEXT:    [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    [[V3:%.*]] = add i64 [[V1]], 0
 ; CHECK-NEXT:    ret i64 [[V3]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index e50a913f9e50a..5d2297f58d404 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -9,11 +9,8 @@ target triple = "x86_64-apple-darwin12.0.0"
 define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
 ; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TMP0:%.*]], -2
-; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i64 [[SWITCH_LOAD]]
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[SWITCH_IDX_CAST]]
 ;
 entry:
   switch i2 %0, label %1 [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index c77438974b2d2..f1c550cf94f20 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -36,11 +36,11 @@ target triple = "x86_64-unknown-linux-gnu"
 define i32 @f(i32 %c) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 7
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[TMP0]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -75,11 +75,11 @@ return:
 define i8 @char(i32 %c) {
 ; CHECK-LABEL: @char(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 9
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 9
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[TMP0]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i8, i8* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i8 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -245,18 +245,18 @@ define i32 @crud(i8 zeroext %c)  {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
 ; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
 ; CHECK:       switch.early.test:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C]], 34
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 59
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[LOR_END]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 [[C]], 34
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 59
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[LOR_END]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i8 [[SWITCH_TABLEIDX]] to i59
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i8 [[TMP0]] to i59
 ; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i59 [[SWITCH_CAST]], 1
 ; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i59 -288230375765830623, [[SWITCH_SHIFTAMT]]
 ; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i59 [[SWITCH_DOWNSHIFT]] to i1
 ; CHECK-NEXT:    br label [[LOR_END]]
 ; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ false, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ false, [[SWITCH_EARLY_TEST]] ]
+; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[LOR_EXT]]
 ;
 entry:
@@ -300,11 +300,12 @@ lor.end:
 define i32 @overflow(i32 %type) {
 ; CHECK-LABEL: @overflow(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[TYPE:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    i32 -2147483645, label [[SW_BB3]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[TYPE:%.*]], -2147483645
+; CHECK-NEXT:    switch i32 [[TMP0]], label [[IF_END:%.*]] [
+; CHECK-NEXT:    i32 -2147483648, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    i32 0, label [[SW_BB3]]
+; CHECK-NEXT:    i32 2147483646, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2147483647, label [[SW_BB2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb1:
 ; CHECK-NEXT:    br label [[IF_END]]
@@ -378,11 +379,10 @@ define i32 @large(i32 %x) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X]], -10
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP]], i32 [[MUL]], i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[SPEC_SELECT]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 199
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SPEC_SELECT]], 200
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [199 x i32], [199 x i32]* @switch.table.large, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [200 x i32], [200 x i32]* @switch.table.large, i32 0, i32 [[SPEC_SELECT]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -808,11 +808,10 @@ return:
 define i32 @cprop(i32 %x, i32 %y) {
 ; CHECK-LABEL: @cprop(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 7
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 8
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.cprop, i32 0, i32 [[SWITCH_TABLEIDX]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @switch.table.cprop, i32 0, i32 [[X]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -923,12 +922,13 @@ return:
 define i96 @illegaltype(i32 %c) {
 ; CHECK-LABEL: @illegaltype(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 42, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 43, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 44, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    i32 45, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    i32 46, label [[SW_BB4:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
+; CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
+; CHECK-NEXT:    i32 4, label [[SW_BB4:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb1:
 ; CHECK-NEXT:    br label [[RETURN]]
@@ -1008,12 +1008,13 @@ define i32 @nodefaultwithholes(i32 %c) {
 ; CHECK-NEXT:    call void @exit(i32 1)
 ; CHECK-NEXT:    unreachable
 ; CHECK:       switch.hole_check:
-; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[C]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[C]], 0
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP1]] to i8
 ; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 47, [[SWITCH_MASKINDEX]]
 ; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
 ; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.nodefaultwithholes, i32 0, i32 [[C]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.nodefaultwithholes, i32 0, i32 [[TMP1]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ;
@@ -1212,11 +1213,11 @@ return:
 define i8 @linearmap1(i32 %c) {
 ; CHECK-LABEL: @linearmap1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 10
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
 ; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], -5
 ; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i8 [[SWITCH_IDX_MULT]], 18
 ; CHECK-NEXT:    ret i8 [[SWITCH_OFFSET]]
@@ -1243,11 +1244,11 @@ return:
 define i32 @linearmap2(i8 %c) {
 ; CHECK-LABEL: @linearmap2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i8 [[C:%.*]], -13
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i8 [[SWITCH_TABLEIDX]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 [[C:%.*]], -13
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i8 [[SWITCH_TABLEIDX]] to i32
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i8 [[TMP0]] to i32
 ; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[SWITCH_IDX_CAST]], 18
 ; CHECK-NEXT:    ret i32 [[SWITCH_OFFSET]]
 ; CHECK:       return:
@@ -1273,11 +1274,11 @@ return:
 define i8 @linearmap3(i32 %c) {
 ; CHECK-LABEL: @linearmap3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], 10
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
 ; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], 100
 ; CHECK-NEXT:    ret i8 [[SWITCH_IDX_MULT]]
 ; CHECK:       return:
@@ -1303,11 +1304,11 @@ return:
 define i8 @linearmap4(i32 %c) {
 ; CHECK-LABEL: @linearmap4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[C:%.*]], -2
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], -2
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[SWITCH_TABLEIDX]] to i8
+; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
 ; CHECK-NEXT:    ret i8 [[SWITCH_IDX_CAST]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret i8 3
@@ -1546,18 +1547,21 @@ end:
 define i32 @covered_switch_with_bit_tests(i3) {
 ; CHECK-LABEL: @covered_switch_with_bit_tests(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4
-; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8
-; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i3 [[TMP0:%.*]], -2
+; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_HOLE_CHECK:%.*]], label [[L6:%.*]]
+; CHECK:       switch.hole_check:
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i3 [[TMP0]], 2
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = zext i3 [[TMP2]] to i8
+; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 15, [[SWITCH_MASKINDEX]]
 ; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]]
+; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6]]
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i4
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[TMP2]] to i4
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    br label [[L6]]
 ; CHECK:       l6:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[SWITCH_HOLE_CHECK]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
index 849f55f6f392b..e9be26e94e4e1 100644
--- a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
+++ b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
@@ -119,11 +119,12 @@ three:
 ; Optimization shouldn't trigger; not an arithmetic progression
 define i32 @test4(i32 %a) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    switch i32 [[A:%.*]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 97, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 102, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 105, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 109, label [[THREE]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
+; CHECK-NEXT:    switch i32 [[TMP1]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 0, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 5, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 8, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 12, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
@@ -156,11 +157,12 @@ three:
 ; Optimization shouldn't trigger; not a power of two
 define i32 @test5(i32 %a) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    switch i32 [[A:%.*]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 97, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 102, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 107, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 112, label [[THREE]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
+; CHECK-NEXT:    switch i32 [[TMP1]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 0, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 5, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 10, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 15, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
@@ -307,15 +309,14 @@ three:
 
 define i32 @test9(i32 %a) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 6
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 31
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    switch i32 [[TMP4]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 6, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 7, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 0, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 2, label [[THREE]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[A]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    switch i32 [[TMP3]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 9, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 10, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 3, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 5, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 9a4b4fe263266..6b74eb2221f48 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -5,12 +5,14 @@ declare void @foo(i32)
 
 define void @test(i1 %a) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    br i1 [[A:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i1 [[A:%.*]], true
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i1 [[A_OFF]], true
+; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       false:
-; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    ret void
 ;
   switch i1 %a, label %default [i1 1, label %true
@@ -35,16 +37,16 @@ define void @test2(i2 %a) {
 ; CHECK-NEXT:    i2 -1, label [[CASE3:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case0:
-; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo(i32 0)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case1:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case2:
-; CHECK-NEXT:    tail call void @foo(i32 2)
+; CHECK-NEXT:    call void @foo(i32 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case3:
-; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    ret void
 ; CHECK:       default1:
 ; CHECK-NEXT:    unreachable
@@ -80,16 +82,16 @@ define void @test3(i2 %a) {
 ; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case0:
-; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo(i32 0)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case1:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case2:
-; CHECK-NEXT:    tail call void @foo(i32 2)
+; CHECK-NEXT:    call void @foo(i32 2)
 ; CHECK-NEXT:    ret void
 ; CHECK:       default:
-; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo(i32 0)
 ; CHECK-NEXT:    ret void
 ;
   switch i2 %a, label %default [i2 0, label %case0
@@ -119,13 +121,13 @@ define void @test4(i128 %a) {
 ; CHECK-NEXT:    i128 1, label [[CASE1:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case0:
-; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo(i32 0)
 ; CHECK-NEXT:    ret void
 ; CHECK:       case1:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       default:
-; CHECK-NEXT:    tail call void @foo(i32 0)
+; CHECK-NEXT:    call void @foo(i32 0)
 ; CHECK-NEXT:    ret void
 ;
   switch i128 %a, label %default [i128 0, label %case0
@@ -146,14 +148,15 @@ default:
 define void @test5(i8 %a) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 2
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], 1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], -1
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
 ; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       false:
-; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    ret void
 ;
   %cmp = icmp ult i8 %a, 2
@@ -174,15 +177,17 @@ default:
 ;; All but one bit known one
 define void @test6(i8 %a) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], -3
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], -2
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], 1
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
 ; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       false:
-; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    ret void
 ;
   %and = and i8 %a, 254
@@ -205,15 +210,17 @@ default:
 ; within a single run of simplify-cfg
 define void @test7(i8 %a) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[A:%.*]], -3
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp eq i8 [[A]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], -2
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], 1
+; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
 ; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    tail call void @foo(i32 1)
+; CHECK-NEXT:    call void @foo(i32 1)
 ; CHECK-NEXT:    ret void
 ; CHECK:       false:
-; CHECK-NEXT:    tail call void @foo(i32 3)
+; CHECK-NEXT:    call void @foo(i32 3)
 ; CHECK-NEXT:    ret void
 ;
   %and = and i8 %a, 254
@@ -243,7 +250,22 @@ default:
 ;; but it doesn't hurt to confirm.
 define void @test8(i8 %a) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    unreachable
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], undef
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    switch i8 [[A]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i8 -1, label [[TRUE:%.*]]
+; CHECK-NEXT:    i8 -2, label [[FALSE:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       true:
+; CHECK-NEXT:    call void @foo(i32 1)
+; CHECK-NEXT:    ret void
+; CHECK:       false:
+; CHECK-NEXT:    call void @foo(i32 3)
+; CHECK-NEXT:    ret void
+; CHECK:       default:
+; CHECK-NEXT:    call void @foo(i32 2)
+; CHECK-NEXT:    ret void
 ;
   %and = and i8 %a, 254
   %cmp = icmp eq i8 %and, undef

From fa91ab85d9f84f8f8691aa5d625c7b3ec4467e21 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 13:55:52 +0000
Subject: [PATCH 0260/1176] [SimplifyCFG] ReduceSwitchRange: Improve on the
 case where the SubThreshold doesn't trigger

llvm-svn: 361728
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 38 ++++++++++++-------
 .../Transforms/SimplifyCFG/rangereduce.ll     |  8 ++--
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index e5925545acaa2..524c3708e7bd1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5552,18 +5552,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
       BestDistance = Values[I] - Values[I - 1];
     }
   }
-  uint64_t Base = 0;
-  // Now transform the values such that they start at zero and ascend.
-  if (Values[BestIndex] >= SubThreshold) {
-    Base = Values[BestIndex];
-    MadeChanges = true;
-    for (auto &V : Values)
-      V = (APInt(BitWidth, V) - Base).getLimitedValue();
-  }
-
-  // Now we have signed numbers that have been shifted so that, given enough
-  // precision, there are no negative values. Since the rest of the transform
-  // is bitwise only, we switch now to an unsigned representation.
 
   // This transform can be done speculatively because it is so cheap - it
   // results in a single rotate operation being inserted.
@@ -5575,8 +5563,10 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
   // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
   // less than 64.
   unsigned Shift = 64;
+  // We need to store this from _before_ the transform
+  uint64_t BestIndexXor = Values[BestIndex];
   for (auto &V : Values)
-    Shift = std::min(Shift, countTrailingZeros(V));
+    Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor));
   assert(Shift < 64);
   if (Shift > 0) {
     MadeChanges = true;
@@ -5584,6 +5574,26 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
       V >>= Shift;
   }
 
+  // We Xor against Values[] (any element will do) because the if we do not
+  // start at zero, but also don't meet the SubThreshold, then we still might
+  // share common rights bits, and if this transform succeeds
+  // then we should insert the subtraction anyways, because the rotate trick
+  // below to avoid a branch needs the shifted away bits to be zero.
+
+  // Now transform the values such that they start at zero and ascend. Do not
+  // do this if the shift reduces the lowest value to less than SubThreshold,
+  // or if the subtraction is less than SubThreshold and it does not enable a
+  // rotate.
+  uint64_t Base = 0;
+  if ((BestIndexXor >= SubThreshold && Shift == 0) ||
+      (Shift > countTrailingZeros(BestIndexXor) &&
+       Values[BestIndex] >= SubThreshold)) {
+    Base = BestIndexXor;
+    MadeChanges = true;
+    for (auto &V : Values)
+      V = (APInt(BitWidth, V) - Base).getLimitedValue();
+  }
+
   if (!MadeChanges)
     // We didn't do anything.
     return false;
@@ -5614,7 +5624,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 
   for (auto Case : SI->cases()) {
     auto *Orig = Case.getCaseValue();
-    auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+    auto Sub = Orig->getValue() - Base;
     Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
   }
   return true;
diff --git a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
index e9be26e94e4e1..804882fe76138 100644
--- a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
+++ b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
@@ -234,10 +234,10 @@ three:
 
 define i8 @test7(i8 %a) optsize {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[A:%.*]], -36
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = shl i8 [[TMP1]], 6
-; CHECK-NEXT:    [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[A:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[A]], 6
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i8 [[TMP3]], 55
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 4
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]]
 ; CHECK:       switch.lookup:

From 9317963920a0d9b55b701326ee4f0c5e392d6aec Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 26 May 2019 14:03:50 +0000
Subject: [PATCH 0261/1176] [InstCombine] prevent crashing with invalid
 extractelement index

This was found/reduced from a fuzzer report:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=14956

llvm-svn: 361729
---
 .../InstCombine/InstCombineVectorOps.cpp      |  5 +++--
 .../Transforms/InstCombine/extractelement.ll  | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 308569395a9ec..d812c5b83d18e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -878,12 +878,13 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   }
 
   // If the inserted element was extracted from some other vector and both
-  // indexes are constant, try to turn this into a shuffle.
+  // indexes are valid constants, try to turn this into a shuffle.
   uint64_t InsertedIdx, ExtractedIdx;
   Value *ExtVecOp;
   if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
-                                       m_ConstantInt(ExtractedIdx)))) {
+                                       m_ConstantInt(ExtractedIdx))) &&
+      ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
     // TODO: Looking at the user(s) to determine if this insert is a
     // fold-to-shuffle opportunity does not match the usual instcombine
     // constraints. We should decide if the transform is worthy based only
diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll
index 5d6a3a1c355eb..b1f57060b02bd 100644
--- a/llvm/test/Transforms/InstCombine/extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/extractelement.ll
@@ -310,3 +310,22 @@ define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) {
   ret float %r
 }
 
+; This would crash/assert because the logic for collectShuffleElements()
+; does not consider the possibility of invalid insert/extract operands.
+
+define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, double* %p) {
+; ANY-LABEL: @invalid_extractelement(
+; ANY-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; ANY-NEXT:    [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> <i32 undef, i32 1, i32 4, i32 3>
+; ANY-NEXT:    [[E:%.*]] = extractelement <4 x double> [[B]], i32 1
+; ANY-NEXT:    store double [[E]], double* [[P:%.*]], align 8
+; ANY-NEXT:    ret <4 x double> [[T4]]
+;
+  %t3 = extractelement <2 x double> %a, i32 0
+  %t4 = insertelement <4 x double> %b, double %t3, i32 2
+  %e = extractelement <4 x double> %t4, i32 1
+  store double %e, double* %p
+  %e1 = extractelement <2 x double> %a, i32 4 ; invalid index
+  %r = insertelement <4 x double> %t4, double %e1, i64 0
+  ret <4 x double> %r
+}

From 927fe7328dff0f2df8d2179b9e915612e711f27b Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 14:44:14 +0000
Subject: [PATCH 0262/1176] [SimplifyCFG] NFC, fix failing tests from last
 patches.

No problems with the transforms.

llvm-svn: 361730
---
 .../SimplifyCFG/CoveredLookupTable.ll         | 35 +++++++++++--------
 .../SimplifyCFG/X86/disable-lookup-table.ll   |  4 ---
 .../SimplifyCFG/X86/switch_to_lookup_table.ll |  4 +--
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
index e558956d50265..79a6d01d0f3f3 100644
--- a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -simplifycfg -switch-to-lookup -S %s | FileCheck %s
 ; RUN: opt -passes='simplify-cfg<switch-to-lookup>' -S %s | FileCheck %s
 ; rdar://15268442
@@ -5,24 +6,28 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
-; CHECK-LABEL: define i3 @coveredswitch_test(
-; CHECK: entry:
-; CHECK-NEXT: sub i3 %input, -4
-; CHECK-NEXT: zext i3 %switch.tableidx to i24
-; CHECK-NEXT: mul i24 %switch.cast, 3
-; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt
-; CHECK-NEXT: trunc i24 %switch.downshift to i3
-; CHECK-NEXT: ret i3 %switch.masked
-
 define i3 @coveredswitch_test(i3 %input) {
+; CHECK-LABEL: @coveredswitch_test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2
+; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[BB8:%.*]]
+; CHECK:       switch.lookup:
+; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18
+; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i18 [[SWITCH_CAST]], 3
+; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]]
+; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3
+; CHECK-NEXT:    ret i3 [[SWITCH_MASKED]]
+; CHECK:       bb8:
+; CHECK-NEXT:    ret i3 -2
+;
 entry:
   switch i3 %input, label %bb8 [
-    i3 0, label %bb7
-    i3 1, label %bb
-    i3 2, label %bb3
-    i3 3, label %bb4
-    i3 4, label %bb5
-    i3 5, label %bb6
+  i3 0, label %bb7
+  i3 1, label %bb
+  i3 2, label %bb3
+  i3 3, label %bb4
+  i3 4, label %bb5
+  i3 5, label %bb6
   ]
 
 bb:                                               ; preds = %entry
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
index 735a97f305e20..05e5e8639374c 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
@@ -5,10 +5,6 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; In the presence of "-no-jump-tables"="true", simplifycfg should not convert switches to lookup tables.
 
-; CHECK: @switch.table.bar = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
-; CHECK-LABEL: foo
-; CHECK-NOT: @switch.table.foo = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
-
 define i32 @foo(i32 %c) "no-jump-tables"="true" {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index f1c550cf94f20..2cdc7257fa932 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -20,10 +20,10 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: @switch.table.earlyreturncrash = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5], align 4
 
 ; The table for @large
-; CHECK: @switch.table.large = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
+; CHECK: @switch.table.large = private unnamed_addr constant [200 x i32] [i32 0, i32 1, i32 4, i32 9,
 
 ; The table for @cprop
-; CHECK: @switch.table.cprop = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7], align 4
+; CHECK: @switch.table.cprop = private unnamed_addr constant [8 x i32] [i32 123, i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7], align 4
 
 ; The table for @unreachable_case
 ; CHECK: @switch.table.unreachable_case = private unnamed_addr constant [9 x i32] [i32 0, i32 0, i32 0, i32 2, i32 -1, i32 1, i32 1, i32 1, i32 1], align 4

From aabe7781a5070f677b12b83699dfe09a18cd73ba Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 15:05:12 +0000
Subject: [PATCH 0263/1176] [LLParser] Fix uninitialized variable warnings.
 NFCI.

These 3 variables cause quite a few warnings in the scan-build report on llvm.

llvm-svn: 361731
---
 llvm/lib/AsmParser/LLParser.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f16f6a2af492f..e8b1970db8d51 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8484,13 +8484,13 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
     return true;
 
   do {
-    unsigned Flag;
+    unsigned Flag = 0;
     switch (Lex.getKind()) {
     case lltok::kw_linkage:
       Lex.Lex();
       if (ParseToken(lltok::colon, "expected ':'"))
         return true;
-      bool HasLinkage;
+      bool HasLinkage = false;
       GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
       assert(HasLinkage && "Linkage not optional in summary entry");
       Lex.Lex();
@@ -8536,7 +8536,7 @@ bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
   assert(Lex.getKind() == lltok::kw_varFlags);
   Lex.Lex();
 
-  unsigned Flag;
+  unsigned Flag = 0;
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||

From e434368a67c0a1086c8310341650d60286b6ab82 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 15:08:45 +0000
Subject: [PATCH 0264/1176] Revert rL361731 : [LLParser] Fix uninitialized
 variable warnings. NFCI.

These 3 variables cause quite a few warnings in the scan-build report on llvm.
........
Revert accidental commit.

llvm-svn: 361732
---
 llvm/lib/AsmParser/LLParser.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index e8b1970db8d51..f16f6a2af492f 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8484,13 +8484,13 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
     return true;
 
   do {
-    unsigned Flag = 0;
+    unsigned Flag;
     switch (Lex.getKind()) {
     case lltok::kw_linkage:
       Lex.Lex();
       if (ParseToken(lltok::colon, "expected ':'"))
         return true;
-      bool HasLinkage = false;
+      bool HasLinkage;
       GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
       assert(HasLinkage && "Linkage not optional in summary entry");
       Lex.Lex();
@@ -8536,7 +8536,7 @@ bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
   assert(Lex.getKind() == lltok::kw_varFlags);
   Lex.Lex();
 
-  unsigned Flag = 0;
+  unsigned Flag;
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||

From 7b883b7ed0555527f6c1facc249fc7bd61062d7f Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 15:29:10 +0000
Subject: [PATCH 0265/1176] [SimplifyCFG] NFC, one more fixed test from
 previous push.

The old test was checking for a stupid subtract one that is a transform that
makes the code woorse.

The constant-islands-jump-table.ll test wants the code a specific way,
that makes sense, so I will submit code to fix that one.

Sorry that I really didn't know how to run the test suite before this.

llvm-svn: 361733
---
 .../SimplifyCFG/ARM/switch-to-lookup-table.ll | 56 +++++++------------
 1 file changed, 19 insertions(+), 37 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
index a931e9084393e..b6573de49754c 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
@@ -182,43 +182,25 @@ declare i32 @f4(i32, i32)
 declare i32 @f5(i32, i32)
 
 define i32 @test4(i32 %a, i32 %b, i32 %c) {
-; ENABLE-LABEL: @test4(
-; ENABLE-NEXT:  entry:
-; ENABLE-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[A:%.*]], 1
-; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 3
-; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[COND_FALSE6:%.*]]
-; ENABLE:       cond.false6:
-; ENABLE-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
-; ENABLE-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
-; ENABLE-NEXT:    br label [[COND_END11:%.*]]
-; ENABLE:       switch.lookup:
-; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32 (i32, i32)*], [3 x i32 (i32, i32)*]* @switch.table.test4, i32 0, i32 [[SWITCH_TABLEIDX]]
-; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32 (i32, i32)*, i32 (i32, i32)** [[SWITCH_GEP]]
-; ENABLE-NEXT:    br label [[COND_END11]]
-; ENABLE:       cond.end11:
-; ENABLE-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ [[COND]], [[COND_FALSE6]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
-; ENABLE-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
-; ENABLE-NEXT:    ret i32 [[CALL]]
-;
-; DISABLE-LABEL: @test4(
-; DISABLE-NEXT:  entry:
-; DISABLE-NEXT:    switch i32 [[A:%.*]], label [[COND_FALSE6:%.*]] [
-; DISABLE-NEXT:    i32 1, label [[COND_END11:%.*]]
-; DISABLE-NEXT:    i32 2, label [[COND_END11_FOLD_SPLIT:%.*]]
-; DISABLE-NEXT:    i32 3, label [[COND_END11_FOLD_SPLIT1:%.*]]
-; DISABLE-NEXT:    ]
-; DISABLE:       cond.false6:
-; DISABLE-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
-; DISABLE-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
-; DISABLE-NEXT:    br label [[COND_END11]]
-; DISABLE:       cond.end11.fold.split:
-; DISABLE-NEXT:    br label [[COND_END11]]
-; DISABLE:       cond.end11.fold.split1:
-; DISABLE-NEXT:    br label [[COND_END11]]
-; DISABLE:       cond.end11:
-; DISABLE-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ @f1, [[ENTRY:%.*]] ], [ [[COND]], [[COND_FALSE6]] ], [ @f2, [[COND_END11_FOLD_SPLIT]] ], [ @f3, [[COND_END11_FOLD_SPLIT1]] ]
-; DISABLE-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
-; DISABLE-NEXT:    ret i32 [[CALL]]
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    switch i32 [[A:%.*]], label [[COND_FALSE6:%.*]] [
+; CHECK-NEXT:    i32 1, label [[COND_END11:%.*]]
+; CHECK-NEXT:    i32 2, label [[COND_END11_FOLD_SPLIT:%.*]]
+; CHECK-NEXT:    i32 3, label [[COND_END11_FOLD_SPLIT1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       cond.false6:
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
+; CHECK-NEXT:    br label [[COND_END11]]
+; CHECK:       cond.end11.fold.split:
+; CHECK-NEXT:    br label [[COND_END11]]
+; CHECK:       cond.end11.fold.split1:
+; CHECK-NEXT:    br label [[COND_END11]]
+; CHECK:       cond.end11:
+; CHECK-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ @f1, [[ENTRY:%.*]] ], [ [[COND]], [[COND_FALSE6]] ], [ @f2, [[COND_END11_FOLD_SPLIT]] ], [ @f3, [[COND_END11_FOLD_SPLIT1]] ]
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
+; CHECK-NEXT:    ret i32 [[CALL]]
 ;
 entry:
   %cmp = icmp eq i32 %a, 1

From a044410f37e9fbef56370bb4fab64b60e13b4ca2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 16:00:35 +0000
Subject: [PATCH 0266/1176] [X86][SSE] Add shuffle combining support for
 ISD::ANY_EXTEND_VECTOR_INREG

Reuses what we already have in place for ISD::ZERO_EXTEND_VECTOR_INREG just with a different sentinel

llvm-svn: 361734
---
 .../X86/MCTargetDesc/X86InstComments.cpp      | 18 ++++---
 .../lib/Target/X86/Utils/X86ShuffleDecode.cpp |  5 +-
 llvm/lib/Target/X86/Utils/X86ShuffleDecode.h  |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 11 ++--
 llvm/test/CodeGen/X86/shrink_vmul.ll          | 52 +++++++++----------
 5 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index b1af31067ae09..fd2b4e2800394 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1202,7 +1202,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXBW, m)
-    DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask);
+    DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
@@ -1210,7 +1211,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXBD, m)
-    DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+    DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
@@ -1218,7 +1220,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXBQ, m)
-    DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+    DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
@@ -1226,7 +1229,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXWD, m)
-    DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+    DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
@@ -1234,7 +1238,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXWQ, m)
-    DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+    DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
@@ -1242,7 +1247,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     LLVM_FALLTHROUGH;
   CASE_PMOVZX(PMOVZXDQ, m)
-    DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+    DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
+                         ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
   }
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 524e72318910b..48fd3e0b7ab9f 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -383,7 +383,8 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
 }
 
 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
-                          unsigned NumDstElts, SmallVectorImpl<int> &Mask) {
+                          unsigned NumDstElts, bool IsAnyExtend,
+                          SmallVectorImpl<int> &Mask) {
   unsigned Scale = DstScalarBits / SrcScalarBits;
   assert(SrcScalarBits < DstScalarBits &&
          "Expected zero extension mask to increase scalar size");
@@ -391,7 +392,7 @@ void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
   for (unsigned i = 0; i != NumDstElts; i++) {
     Mask.push_back(i);
     for (unsigned j = 1; j != Scale; j++)
-      Mask.push_back(SM_SentinelZero);
+      Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero);
   }
 }
 
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index d1678d57a8f91..f52785063071a 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -136,7 +136,7 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
 
 /// Decode a zero extension instruction as a shuffle mask.
 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
-                          unsigned NumDstElts,
+                          unsigned NumDstElts, bool IsAnyExtend,
                           SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a move lower and zero upper instruction as a shuffle mask.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8b6edaa50ba6a..47b8e9eac2d21 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6849,17 +6849,20 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     return true;
   }
   case ISD::ZERO_EXTEND:
-  case ISD::ZERO_EXTEND_VECTOR_INREG: {
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+  case ISD::ANY_EXTEND_VECTOR_INREG: {
     SDValue Src = N.getOperand(0);
     EVT SrcVT = Src.getValueType();
 
-    // Zero-extended source must be a simple vector.
+    // Extended source must be a simple vector.
     if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
         (SrcVT.getScalarSizeInBits() % 8) != 0)
       return false;
 
     unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
-    DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, Mask);
+    bool IsAnyExtend = (ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
+    DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
+                         Mask);
 
     if (NumSizeInBits != SrcVT.getSizeInBits()) {
       assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
@@ -43259,7 +43262,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
 
   // Attempt to combine as a shuffle.
   // TODO: SSE41 support
-  if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+  if (Subtarget.hasAVX()) {
     SDValue Op(N, 0);
     if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
       if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index f693a57c732f5..0c8949f246177 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -479,11 +479,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
+; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
 ; X86-AVX-NEXT:    retl
@@ -503,11 +502,10 @@ define void @mul_2xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX-LABEL: mul_2xi16:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
 entry:
@@ -1167,10 +1165,9 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl c, %esi
 ; X86-AVX-NEXT:    vpmovsxwq (%edx,%ecx), %xmm0
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X86-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X86-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X86-AVX-NEXT:    vmovq %xmm0, (%esi,%ecx,4)
 ; X86-AVX-NEXT:    popl %esi
 ; X86-AVX-NEXT:    retl
@@ -1195,10 +1192,9 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
 ; X64-AVX-NEXT:    vpmovsxwq (%rdi,%rdx), %xmm0
-; X64-AVX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X64-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-AVX-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rdx,4)
 ; X64-AVX-NEXT:    retq
 entry:
@@ -1813,9 +1809,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT:    vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
 ; X86-AVX-NEXT:    retl
 ;
@@ -1834,9 +1830,9 @@ define void @mul_2xi16_varconst1(i8* nocapture readonly %a, i64 %index) {
 ; X64-AVX-LABEL: mul_2xi16_varconst1:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
 entry:
@@ -1941,9 +1937,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX-NEXT:    movl c, %edx
-; X86-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X86-AVX-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X86-AVX-NEXT:    vpmuludq {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X86-AVX-NEXT:    vmovq %xmm0, (%edx,%eax,4)
 ; X86-AVX-NEXT:    retl
 ;
@@ -1962,9 +1958,9 @@ define void @mul_2xi16_varconst3(i8* nocapture readonly %a, i64 %index) {
 ; X64-AVX-LABEL: mul_2xi16_varconst3:
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; X64-AVX-NEXT:    vpmuludq {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; X64-AVX-NEXT:    vmovq %xmm0, (%rax,%rsi,4)
 ; X64-AVX-NEXT:    retq
 entry:

From bd324fa2273778430a4fdf8371fec5d64d2231bb Mon Sep 17 00:00:00 2001
From: Mads Ravn <madsravn@gmail.com>
Date: Sun, 26 May 2019 17:00:38 +0000
Subject: [PATCH 0267/1176] DeleteNullPointerCheck now deletes until the end
 brace of the condition.

Patch by Jonathan Camilleri

Differential Revision https://reviews.llvm.org/D61861

llvm-svn: 361735
---
 .../clang-tidy/readability/DeleteNullPointerCheck.cpp    | 9 ++++++---
 .../test/clang-tidy/readability-delete-null-pointer.cpp  | 9 +++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp
index 0c5eacef2e5c6..303833d73ec1d 100644
--- a/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DeleteNullPointerCheck.h"
+#include "../utils/LexerUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Lex/Lexer.h"
@@ -62,9 +63,11 @@ void DeleteNullPointerCheck::check(const MatchFinder::MatchResult &Result) {
 
   Diag << FixItHint::CreateRemoval(CharSourceRange::getTokenRange(
       IfWithDelete->getBeginLoc(),
-      Lexer::getLocForEndOfToken(IfWithDelete->getCond()->getEndLoc(), 0,
-                                 *Result.SourceManager,
-                                 Result.Context->getLangOpts())));
+      utils::lexer::getPreviousToken(IfWithDelete->getThen()->getBeginLoc(),
+                                     *Result.SourceManager,
+                                     Result.Context->getLangOpts())
+          .getLocation()));
+
   if (Compound) {
     Diag << FixItHint::CreateRemoval(
         CharSourceRange::getTokenRange(Compound->getLBracLoc()));
diff --git a/clang-tools-extra/test/clang-tidy/readability-delete-null-pointer.cpp b/clang-tools-extra/test/clang-tidy/readability-delete-null-pointer.cpp
index b46e52a754b76..5a7ccae4d52fd 100644
--- a/clang-tools-extra/test/clang-tidy/readability-delete-null-pointer.cpp
+++ b/clang-tools-extra/test/clang-tidy/readability-delete-null-pointer.cpp
@@ -3,6 +3,15 @@
 #define NULL 0
 
 void f() {
+  int *ps = 0;
+  if (ps /**/) // #0
+    delete ps;
+  // CHECK-MESSAGES: :[[@LINE-2]]:3: warning: 'if' statement is unnecessary; deleting null pointer has no effect [readability-delete-null-pointer]
+
+  // CHECK-FIXES: int *ps = 0;
+  // CHECK-FIXES-NEXT: {{^  }}// #0
+  // CHECK-FIXES-NEXT: delete ps;
+
   int *p = 0;
 
   // #1

From 343578759e20958b9c41ab9dedc8b4719e90fdda Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 26 May 2019 18:15:51 +0000
Subject: [PATCH 0268/1176] [SimplifyCFG] back out all SwitchInst commits

They caused the sanitizer builds to fail.

My suspicion is the change the countLeadingZeros().

llvm-svn: 361736
---
 llvm/include/llvm/Support/MathExtras.h        |   24 +-
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |    2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  161 +-
 .../Transforms/SimplifyCFG/ARM/cttz-ctlz.ll   |   21 +-
 .../SimplifyCFG/ARM/select-trunc-i64.ll       |   17 +-
 .../switch-to-lookup-table-constant-expr.ll   |   24 +-
 .../SimplifyCFG/ARM/switch-to-lookup-table.ll |  106 +-
 .../SimplifyCFG/CoveredLookupTable.ll         |   35 +-
 .../SimplifyCFG/X86/disable-lookup-table.ll   |   55 +-
 .../SimplifyCFG/X86/speculate-cttz-ctlz.ll    |  477 ++----
 .../SimplifyCFG/X86/switch-covered-bug.ll     |   37 +-
 .../SimplifyCFG/X86/switch-table-bug.ll       |   22 +-
 .../SimplifyCFG/X86/switch_to_lookup_table.ll | 1353 +++++++----------
 .../Transforms/SimplifyCFG/rangereduce.ll     |   47 +-
 .../SimplifyCFG/switch-dead-default.ll        |  191 +--
 .../SimplifyCFG/switch-masked-bits.ll         |   23 +-
 .../SimplifyCFG/switch-on-const-select.ll     |  191 ++-
 .../SimplifyCFG/switch-range-to-icmp.ll       |   79 +-
 .../SimplifyCFG/switch_create-custom-dl.ll    |  592 +++-----
 .../Transforms/SimplifyCFG/switch_create.ll   |  591 +++----
 20 files changed, 1418 insertions(+), 2630 deletions(-)

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 85d5a5ae4b903..e902a725659ad 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -51,14 +51,14 @@ enum ZeroBehavior {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static std::size_t count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
       return 0;
 
     // Bisection method.
-    unsigned ZeroBits = 0;
+    std::size_t ZeroBits = 0;
     T Shift = std::numeric_limits<T>::digits >> 1;
     T Mask = std::numeric_limits<T>::max() >> Shift;
     while (Shift) {
@@ -75,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -91,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -116,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -125,12 +125,12 @@ unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static std::size_t count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
     // Bisection method.
-    unsigned ZeroBits = 0;
+    std::size_t ZeroBits = 0;
     for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
       T Tmp = Val >> Shift;
       if (Tmp)
@@ -144,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -160,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
+  static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -185,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -474,7 +474,7 @@ unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0b0dd0ae28b3f..0e8a517d1d644 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1147,7 +1147,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
-  Out.kernarg_segment_alignment = std::max<size_t>(4,
+  Out.kernarg_segment_alignment = std::max((size_t)4,
       countTrailingZeros(MaxKernArgAlign));
 }
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 524c3708e7bd1..90b552035af3d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5309,9 +5309,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
     ConstantInt *CaseVal = CI->getCaseValue();
-    if (CaseVal->getValue().ult(MinCaseVal->getValue()))
+    if (CaseVal->getValue().slt(MinCaseVal->getValue()))
       MinCaseVal = CaseVal;
-    if (CaseVal->getValue().ugt(MaxCaseVal->getValue()))
+    if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
       MaxCaseVal = CaseVal;
 
     // Resulting value at phi nodes for this case value.
@@ -5337,7 +5337,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   }
 
   uint64_t NumResults = ResultLists[PHIs[0]].size();
-  uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1;
+  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
   bool TableHasHoles = (NumResults < TableSize);
 
   // If the table has holes, we need a constant result for the default case
@@ -5372,7 +5373,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 
   // Compute the table index value.
   Builder.SetInsertPoint(SI);
-  Value *TableIndex = SI->getCondition();
+  Value *TableIndex;
+  if (MinCaseVal->isNullValue())
+    TableIndex = SI->getCondition();
+  else
+    TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+                                   "switch.tableidx");
 
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
@@ -5412,10 +5418,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
                                   CommonDest->getParent(), CommonDest);
 
-    // When doing the register-sized hole-check, unconditionally use a
-    // subtraction.
-    TableIndex = Builder.CreateSub(TableIndex, MinCaseVal);
-
     // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
     // unnecessary illegal types.
     uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
@@ -5459,11 +5461,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     // If using a bitmask, use any value to fill the lookup table holes.
     Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
     StringRef FuncName = Fn->getName();
-    // Base is 0 unless using a hole check
-    ConstantInt *Base =
-        NeedMask ? MinCaseVal
-                 : ConstantInt::get(Mod.getContext(), APInt(CaseSize, 0));
-    SwitchLookupTable Table(Mod, TableSize, Base, ResultList, DV, DL, FuncName);
+    SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
+                            FuncName);
 
     Value *Result = Table.BuildLookup(TableIndex, Builder);
 
@@ -5508,6 +5507,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   return true;
 }
 
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+  // See also SelectionDAGBuilder::isDense(), which this function was based on.
+  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+  uint64_t Range = Diff + 1;
+  uint64_t NumCases = Values.size();
+  // 40% is the default density for building a jump table in optsize/minsize mode.
+  uint64_t MinDensity = 40;
+
+  return NumCases * 100 >= Range * MinDensity;
+}
+
 /// Try to transform a switch that has "holes" in it to a contiguous sequence
 /// of cases.
 ///
@@ -5519,83 +5529,58 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
 static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
                               const DataLayout &DL,
                               const TargetTransformInfo &TTI) {
-  // The number of cases that need to be removed by a subtraction operation
-  // to make it worth using.
-  const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8);
   auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
-  unsigned BitWidth = CondTy->getIntegerBitWidth();
-  if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth))
+  if (CondTy->getIntegerBitWidth() > 64 ||
+      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
     return false;
   // Only bother with this optimization if there are more than 3 switch cases;
   // SDAG will only bother creating jump tables for 4 or more cases.
-  // This is also useful when using the LowerSwitch transform, but not with
-  // so few cases.
   if (SI->getNumCases() < 4)
     return false;
 
-  // We organize the range to start from 0, if it is not already close.
-  SmallVector<uint64_t, 4> Values;
+  // This transform is agnostic to the signedness of the input or case values. We
+  // can treat the case values as signed or unsigned. We can optimize more common
+  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+  // as signed.
+  SmallVector<int64_t,4> Values;
   for (auto &C : SI->cases())
-    Values.push_back(C.getCaseValue()->getValue().getLimitedValue());
+    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
   llvm::sort(Values);
 
-  bool MadeChanges = false;
-  // We must first look find the best start point, for example if we have a
-  // series that crosses zero: -2, -1, 0, 1, 2.
-  uint64_t BestDistance =
-      APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() -
-      Values.back() + Values.front() + 1;
-  unsigned BestIndex = 0;
-  for (unsigned I = 1, E = Values.size(); I != E; I++) {
-    if (Values[I] - Values[I - 1] > BestDistance) {
-      BestIndex = I;
-      BestDistance = Values[I] - Values[I - 1];
-    }
-  }
+  // If the switch is already dense, there's nothing useful to do here.
+  if (isSwitchDense(Values))
+    return false;
+
+  // First, transform the values such that they start at zero and ascend.
+  int64_t Base = Values[0];
+  for (auto &V : Values)
+    V -= (uint64_t)(Base);
 
-  // This transform can be done speculatively because it is so cheap - it
-  // results in a single rotate operation being inserted.
+  // Now we have signed numbers that have been shifted so that, given enough
+  // precision, there are no negative values. Since the rest of the transform
+  // is bitwise only, we switch now to an unsigned representation.
+  uint64_t GCD = 0;
+  for (auto &V : Values)
+    GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
+
+  // This transform can be done speculatively because it is so cheap - it results
+  // in a single rotate operation being inserted. This can only happen if the
+  // factor extracted is a power of 2.
+  // FIXME: If the GCD is an odd number we can multiply by the multiplicative
+  // inverse of GCD and then perform this transform.
   // FIXME: It's possible that optimizing a switch on powers of two might also
   // be beneficial - flag values are often powers of two and we could use a CLZ
   // as the key function.
+  if (GCD <= 1 || !isPowerOf2_64(GCD))
+    // No common divisor found or too expensive to compute key function.
+    return false;
 
-  // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
-  // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
-  // less than 64.
-  unsigned Shift = 64;
-  // We need to store this from _before_ the transform
-  uint64_t BestIndexXor = Values[BestIndex];
+  unsigned Shift = Log2_64(GCD);
   for (auto &V : Values)
-    Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor));
-  assert(Shift < 64);
-  if (Shift > 0) {
-    MadeChanges = true;
-    for (auto &V : Values)
-      V >>= Shift;
-  }
-
-  // We Xor against Values[] (any element will do) because the if we do not
-  // start at zero, but also don't meet the SubThreshold, then we still might
-  // share common rights bits, and if this transform succeeds
-  // then we should insert the subtraction anyways, because the rotate trick
-  // below to avoid a branch needs the shifted away bits to be zero.
-
-  // Now transform the values such that they start at zero and ascend. Do not
-  // do this if the shift reduces the lowest value to less than SubThreshold,
-  // or if the subtraction is less than SubThreshold and it does not enable a
-  // rotate.
-  uint64_t Base = 0;
-  if ((BestIndexXor >= SubThreshold && Shift == 0) ||
-      (Shift > countTrailingZeros(BestIndexXor) &&
-       Values[BestIndex] >= SubThreshold)) {
-    Base = BestIndexXor;
-    MadeChanges = true;
-    for (auto &V : Values)
-      V = (APInt(BitWidth, V) - Base).getLimitedValue();
-  }
-
-  if (!MadeChanges)
-    // We didn't do anything.
+    V = (int64_t)((uint64_t)V >> Shift);
+
+  if (!isSwitchDense(Values))
+    // Transform didn't create a dense switch.
     return false;
 
   // The obvious transform is to shift the switch condition right and emit a
@@ -5610,22 +5595,18 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 
   auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
   Builder.SetInsertPoint(SI);
-  Value *Key = SI->getCondition();
-  if (Base > 0)
-    Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base));
-  if (Shift > 0) {
-    // FIXME replace with fshr?
-    auto *ShiftC = ConstantInt::get(Ty, Shift);
-    auto *LShr = Builder.CreateLShr(Key, ShiftC);
-    auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift);
-    Key = Builder.CreateOr(LShr, Shl);
-  }
-  SI->replaceUsesOfWith(SI->getCondition(), Key);
+  auto *ShiftC = ConstantInt::get(Ty, Shift);
+  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+  auto *LShr = Builder.CreateLShr(Sub, ShiftC);
+  auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
+  auto *Rot = Builder.CreateOr(LShr, Shl);
+  SI->replaceUsesOfWith(SI->getCondition(), Rot);
 
   for (auto Case : SI->cases()) {
     auto *Orig = Case.getCaseValue();
-    auto Sub = Orig->getValue() - Base;
-    Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
+    auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+    Case.setValue(
+        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
   }
   return true;
 }
@@ -5666,9 +5647,6 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
     return requestResimplify();
 
-  if (ReduceSwitchRange(SI, Builder, DL, TTI))
-    return requestResimplify();
-
   // The conversion from switch to lookup tables results in difficult-to-analyze
   // code and makes pruning branches much harder. This is a problem if the
   // switch expression itself can still be restricted as a result of inlining or
@@ -5678,6 +5656,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       SwitchToLookupTable(SI, Builder, DL, TTI))
     return requestResimplify();
 
+  if (ReduceSwitchRange(SI, Builder, DL, TTI))
+    return requestResimplify();
+
   return false;
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
index 22f5e9f3cc1dc..ffcf2175091f1 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
@@ -1,14 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s
 
 define i32 @ctlz(i32 %A) {
 ; CHECK-LABEL: @ctlz(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
-;
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -24,12 +21,10 @@ cond.end:
 
 define i32 @cttz(i32 %A) {
 ; CHECK-LABEL: @cttz(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
-;
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
index 9218ee185b1e4..9484de77db487 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll
@@ -1,20 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ;RUN: opt -S -simplifycfg -mtriple=arm < %s | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
+; CHECK-LABEL: select_trunc_i64
+; CHECK-NOT: br
+; CHECK: select
+; CHECK: select
 define arm_aapcscc i32 @select_trunc_i64(i32 %a, i32 %b) {
-; CHECK-LABEL: @select_trunc_i64(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[ADD]], 2147483647
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648
-; CHECK-NEXT:    [[EXTRACT_T:%.*]] = trunc i64 [[COND]] to i32
-; CHECK-NEXT:    [[COND8_OFF0:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[EXTRACT_T]]
-; CHECK-NEXT:    ret i32 [[COND8_OFF0]]
-;
 entry:
   %conv = sext i32 %a to i64
   %conv1 = sext i32 %b to i64
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
index f677371734268..453a76864032e 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -16,28 +15,11 @@ target triple = "armv7a--none-eabi"
 @g4 = external thread_local global i32, align 4
 
 define i32* @test3(i32 %n) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ getelementptr inbounds (i32, i32* inttoptr (i32 mul (i32 ptrtoint (i32* @g3 to i32), i32 2) to i32*), i32 1), [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32* [[RETVAL_0]]
-;
 entry:
   switch i32 %n, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 
 sw.bb:
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
index b6573de49754c..501bc31bd0dd8 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=static    < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=pic       < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
 ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=ropi      < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
@@ -23,22 +22,11 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "armv7a--none-eabi"
 
 define i32 @test1(i32 %n) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.test1, i32 0, i32 [[N]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 15498
-;
 entry:
   switch i32 %n, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -65,39 +53,11 @@ return:
 
 
 define i32* @test2(i32 %n) {
-; ENABLE-LABEL: @test2(
-; ENABLE-NEXT:  entry:
-; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
-; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; ENABLE:       switch.lookup:
-; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test2, i32 0, i32 [[N]]
-; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]]
-; ENABLE-NEXT:    ret i32* [[SWITCH_LOAD]]
-; ENABLE:       return:
-; ENABLE-NEXT:    ret i32* @c4
-;
-; DISABLE-LABEL: @test2(
-; DISABLE-NEXT:  entry:
-; DISABLE-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
-; DISABLE-NEXT:    i32 0, label [[RETURN:%.*]]
-; DISABLE-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; DISABLE-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; DISABLE-NEXT:    ]
-; DISABLE:       sw.bb1:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       sw.bb2:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       sw.default:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       return:
-; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @c4, [[SW_DEFAULT]] ], [ @c3, [[SW_BB2]] ], [ @c2, [[SW_BB1]] ], [ @c1, [[ENTRY:%.*]] ]
-; DISABLE-NEXT:    ret i32* [[RETVAL_0]]
-;
 entry:
   switch i32 %n, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -123,39 +83,11 @@ return:
 @g4 = external global i32, align 4
 
 define i32* @test3(i32 %n) {
-; ENABLE-LABEL: @test3(
-; ENABLE-NEXT:  entry:
-; ENABLE-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3
-; ENABLE-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; ENABLE:       switch.lookup:
-; ENABLE-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test3, i32 0, i32 [[N]]
-; ENABLE-NEXT:    [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]]
-; ENABLE-NEXT:    ret i32* [[SWITCH_LOAD]]
-; ENABLE:       return:
-; ENABLE-NEXT:    ret i32* @g4
-;
-; DISABLE-LABEL: @test3(
-; DISABLE-NEXT:  entry:
-; DISABLE-NEXT:    switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [
-; DISABLE-NEXT:    i32 0, label [[RETURN:%.*]]
-; DISABLE-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; DISABLE-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; DISABLE-NEXT:    ]
-; DISABLE:       sw.bb1:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       sw.bb2:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       sw.default:
-; DISABLE-NEXT:    br label [[RETURN]]
-; DISABLE:       return:
-; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ @g3, [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ]
-; DISABLE-NEXT:    ret i32* [[RETVAL_0]]
-;
 entry:
   switch i32 %n, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 
 sw.bb:
@@ -182,26 +114,6 @@ declare i32 @f4(i32, i32)
 declare i32 @f5(i32, i32)
 
 define i32 @test4(i32 %a, i32 %b, i32 %c) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[A:%.*]], label [[COND_FALSE6:%.*]] [
-; CHECK-NEXT:    i32 1, label [[COND_END11:%.*]]
-; CHECK-NEXT:    i32 2, label [[COND_END11_FOLD_SPLIT:%.*]]
-; CHECK-NEXT:    i32 3, label [[COND_END11_FOLD_SPLIT1:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       cond.false6:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp eq i32 [[A]], 4
-; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5
-; CHECK-NEXT:    br label [[COND_END11]]
-; CHECK:       cond.end11.fold.split:
-; CHECK-NEXT:    br label [[COND_END11]]
-; CHECK:       cond.end11.fold.split1:
-; CHECK-NEXT:    br label [[COND_END11]]
-; CHECK:       cond.end11:
-; CHECK-NEXT:    [[COND12:%.*]] = phi i32 (i32, i32)* [ @f1, [[ENTRY:%.*]] ], [ [[COND]], [[COND_FALSE6]] ], [ @f2, [[COND_END11_FOLD_SPLIT]] ], [ @f3, [[COND_END11_FOLD_SPLIT1]] ]
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]])
-; CHECK-NEXT:    ret i32 [[CALL]]
-;
 entry:
   %cmp = icmp eq i32 %a, 1
   br i1 %cmp, label %cond.end11, label %cond.false
diff --git a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
index 79a6d01d0f3f3..e558956d50265 100644
--- a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -simplifycfg -switch-to-lookup -S %s | FileCheck %s
 ; RUN: opt -passes='simplify-cfg<switch-to-lookup>' -S %s | FileCheck %s
 ; rdar://15268442
@@ -6,28 +5,24 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
+; CHECK-LABEL: define i3 @coveredswitch_test(
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %input, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i24
+; CHECK-NEXT: mul i24 %switch.cast, 3
+; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt
+; CHECK-NEXT: trunc i24 %switch.downshift to i3
+; CHECK-NEXT: ret i3 %switch.masked
+
 define i3 @coveredswitch_test(i3 %input) {
-; CHECK-LABEL: @coveredswitch_test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[BB8:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18
-; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i18 [[SWITCH_CAST]], 3
-; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3
-; CHECK-NEXT:    ret i3 [[SWITCH_MASKED]]
-; CHECK:       bb8:
-; CHECK-NEXT:    ret i3 -2
-;
 entry:
   switch i3 %input, label %bb8 [
-  i3 0, label %bb7
-  i3 1, label %bb
-  i3 2, label %bb3
-  i3 3, label %bb4
-  i3 4, label %bb5
-  i3 5, label %bb6
+    i3 0, label %bb7
+    i3 1, label %bb
+    i3 2, label %bb3
+    i3 3, label %bb4
+    i3 4, label %bb5
+    i3 5, label %bb6
   ]
 
 bb:                                               ; preds = %entry
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
index 05e5e8639374c..a8758a789ec4d 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll
@@ -1,38 +1,19 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -switch-to-lookup -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; RUN: opt < %s -passes='simplify-cfg<switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; In the presence of "-no-jump-tables"="true", simplifycfg should not convert switches to lookup tables.
 
+; CHECK: @switch.table.bar = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
+; CHECK-LABEL: foo
+; CHECK-NOT: @switch.table.foo = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1]
+
 define i32 @foo(i32 %c) "no-jump-tables"="true" {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 42, label %return
-  i32 43, label %sw.bb1
-  i32 44, label %sw.bb2
-  i32 45, label %sw.bb3
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -46,24 +27,12 @@ return:
 
 
 define i32 @bar(i32 %c) {
-; CHECK-LABEL: @bar(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[TMP0]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 15
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 42, label %return
-  i32 43, label %sw.bb1
-  i32 44, label %sw.bb2
-  i32 45, label %sw.bb3
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
   ]
 
 sw.bb1: br label %return
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 11ba3984f35d4..bee80e6acce07 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -1,31 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT
 ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
 
 
 define i64 @test1(i64 %A) {
-; BMI-LABEL: @test1(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; BMI-NEXT:    ret i64 [[COND]]
-;
-; LZCNT-LABEL: @test1(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
-; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; LZCNT-NEXT:    ret i64 [[SPEC_SELECT]]
-;
-; GENERIC-LABEL: @test1(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; GENERIC-NEXT:    ret i64 [[COND]]
-;
+; ALL-LABEL: @test1(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -40,27 +23,11 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test2(i32 %A) {
-; BMI-LABEL: @test2(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; BMI-NEXT:    ret i32 [[COND]]
-;
-; LZCNT-LABEL: @test2(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
-; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; LZCNT-NEXT:    ret i32 [[SPEC_SELECT]]
-;
-; GENERIC-LABEL: @test2(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; GENERIC-NEXT:    ret i32 [[COND]]
-;
+; ALL-LABEL: @test2(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -76,27 +43,11 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define signext i16 @test3(i16 signext %A) {
-; BMI-LABEL: @test3(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test3(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
-; LZCNT-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; LZCNT-NEXT:    ret i16 [[SPEC_SELECT]]
-;
-; GENERIC-LABEL: @test3(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test3(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -112,27 +63,11 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define i64 @test1b(i64 %A) {
-; BMI-LABEL: @test1b(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
-; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; BMI-NEXT:    ret i64 [[SPEC_SELECT]]
-;
-; LZCNT-LABEL: @test1b(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; LZCNT-NEXT:    ret i64 [[COND]]
-;
-; GENERIC-LABEL: @test1b(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]]
-; GENERIC-NEXT:    ret i64 [[COND]]
-;
+; ALL-LABEL: @test1b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -148,27 +83,11 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define i32 @test2b(i32 %A) {
-; BMI-LABEL: @test2b(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
-; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; BMI-NEXT:    ret i32 [[SPEC_SELECT]]
-;
-; LZCNT-LABEL: @test2b(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; LZCNT-NEXT:    ret i32 [[COND]]
-;
-; GENERIC-LABEL: @test2b(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]]
-; GENERIC-NEXT:    ret i32 [[COND]]
-;
+; ALL-LABEL: @test2b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -184,27 +103,11 @@ cond.end:                                         ; preds = %entry, %cond.true
 
 
 define signext i16 @test3b(i16 signext %A) {
-; BMI-LABEL: @test3b(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
-; BMI-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; BMI-NEXT:    ret i16 [[SPEC_SELECT]]
-;
-; LZCNT-LABEL: @test3b(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test3b(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true)
-; GENERIC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test3b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
+; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -223,38 +126,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 ; for the target.
 
 define i64 @test1e(i32 %x) {
-; BMI-LABEL: @test1e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
-; BMI-NEXT:    ret i64 [[COND]]
-;
-; LZCNT-LABEL: @test1e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i64 [[COND]]
-;
-; GENERIC-LABEL: @test1e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i64 [[COND]]
-;
+; ALL-LABEL: @test1e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTTZ]] to i64
+; BMI-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -270,38 +149,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test2e(i64 %x) {
-; BMI-LABEL: @test2e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
-; BMI-NEXT:    ret i32 [[COND]]
-;
-; LZCNT-LABEL: @test2e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i32 [[COND]]
-;
-; GENERIC-LABEL: @test2e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i32 [[COND]]
-;
+; ALL-LABEL: @test2e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i32
+; BMI-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -317,38 +172,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i64 @test3e(i32 %x) {
-; BMI-LABEL: @test3e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i64 [[COND]]
-;
-; LZCNT-LABEL: @test3e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]]
-; LZCNT-NEXT:    ret i64 [[COND]]
-;
-; GENERIC-LABEL: @test3e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i64 [[COND]]
-;
+; ALL-LABEL: @test3e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTLZ]] to i64
+; LZCNT-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -364,38 +195,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i32 @test4e(i64 %x) {
-; BMI-LABEL: @test4e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i32 [[COND]]
-;
-; LZCNT-LABEL: @test4e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]]
-; LZCNT-NEXT:    ret i32 [[COND]]
-;
-; GENERIC-LABEL: @test4e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i32
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i32 [[COND]]
-;
+; ALL-LABEL: @test4e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i32
+; LZCNT-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -411,38 +218,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test5e(i64 %x) {
-; BMI-LABEL: @test5e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test5e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test5e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test5e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i16
+; LZCNT-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -458,38 +241,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test6e(i32 %x) {
-; BMI-LABEL: @test6e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; BMI:       cond.true:
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; BMI-NEXT:    br label [[COND_END]]
-; BMI:       cond.end:
-; BMI-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test6e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; LZCNT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test6e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test6e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTLZ]] to i16
+; LZCNT-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -505,38 +264,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test7e(i64 %x) {
-; BMI-LABEL: @test7e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test7e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test7e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i64 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test7e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i16
+; BMI-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i64 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
@@ -552,38 +287,14 @@ cond.end:                                         ; preds = %entry, %cond.true
 }
 
 define i16 @test8e(i32 %x) {
-; BMI-LABEL: @test8e(
-; BMI-NEXT:  entry:
-; BMI-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; BMI-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; BMI-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; BMI-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]]
-; BMI-NEXT:    ret i16 [[COND]]
-;
-; LZCNT-LABEL: @test8e(
-; LZCNT-NEXT:  entry:
-; LZCNT-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; LZCNT-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; LZCNT:       cond.true:
-; LZCNT-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; LZCNT-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; LZCNT-NEXT:    br label [[COND_END]]
-; LZCNT:       cond.end:
-; LZCNT-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; LZCNT-NEXT:    ret i16 [[COND]]
-;
-; GENERIC-LABEL: @test8e(
-; GENERIC-NEXT:  entry:
-; GENERIC-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0
-; GENERIC-NEXT:    br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]]
-; GENERIC:       cond.true:
-; GENERIC-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; GENERIC-NEXT:    [[CAST:%.*]] = trunc i32 [[TMP0]] to i16
-; GENERIC-NEXT:    br label [[COND_END]]
-; GENERIC:       cond.end:
-; GENERIC-NEXT:    [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ]
-; GENERIC-NEXT:    ret i16 [[COND]]
-;
+; ALL-LABEL: @test8e(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTTZ]] to i16
+; BMI-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %cond.end, label %cond.true
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index 08d266c7c9122..c42568ffa935e 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 ; RUN: opt -S -passes='simplify-cfg<switch-to-lookup>' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 
@@ -6,29 +5,25 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
+; When we have a covered lookup table, make sure we don't delete PHINodes that
+; are cached in PHIs.
+; CHECK-LABEL: @test
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %arg, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i4
+; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 %switch.tableidx.zext
+; CHECK-NEXT: load i64, i64* %switch.gep
+; CHECK-NEXT: add i64
+; CHECK-NEXT: ret i64
 define i64 @test(i3 %arg) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i3 [[ARG:%.*]], -1
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[ARG]] to i4
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]]
-; CHECK-NEXT:    br label [[DEFAULT]]
-; CHECK:       Default:
-; CHECK-NEXT:    [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
-; CHECK-NEXT:    [[V3:%.*]] = add i64 [[V1]], 0
-; CHECK-NEXT:    ret i64 [[V3]]
-;
 entry:
   switch i3 %arg, label %Default [
-  i3 -2, label %Label6
-  i3 1, label %Label1
-  i3 2, label %Label2
-  i3 3, label %Label3
-  i3 -4, label %Label4
-  i3 -3, label %Label5
+    i3 -2, label %Label6
+    i3 1, label %Label1
+    i3 2, label %Label2
+    i3 3, label %Label3
+    i3 -4, label %Label4
+    i3 -3, label %Label5
   ]
 
 Default:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 5d2297f58d404..0b9d6ebe82584 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 ; RUN: opt -S -passes='simplify-cfg<switch-to-lookup>' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
 
@@ -6,18 +5,21 @@
 target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin12.0.0"
 
+; When tableindex can't fit into i2, we should extend the type to i3.
+; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si
+; CHECK: entry:
+; CHECK-NEXT: sub i2 %0, -2
+; CHECK-NEXT: zext i2 %switch.tableidx to i3
+; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 %switch.tableidx.zext
+; CHECK-NEXT: load i64, i64* %switch.gep
+; CHECK-NEXT: ret i64 %switch.load
 define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
-; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64
-; CHECK-NEXT:    ret i64 [[SWITCH_IDX_CAST]]
-;
 entry:
   switch i2 %0, label %1 [
-  i2 0, label %2
-  i2 1, label %3
-  i2 -2, label %4
-  i2 -1, label %5
+    i2 0, label %2
+    i2 1, label %3
+    i2 -2, label %4
+    i2 -1, label %5
   ]
 
 ; <label>:1                                       ; preds = %entry
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 2cdc7257fa932..3128ce4afa60a 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -switch-to-lookup=true -keep-loops=false -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; RUN: opt < %s -passes='simplify-cfg<no-keep-loops;switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -20,10 +19,10 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: @switch.table.earlyreturncrash = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5], align 4
 
 ; The table for @large
-; CHECK: @switch.table.large = private unnamed_addr constant [200 x i32] [i32 0, i32 1, i32 4, i32 9,
+; CHECK: @switch.table.large = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
 
 ; The table for @cprop
-; CHECK: @switch.table.cprop = private unnamed_addr constant [8 x i32] [i32 123, i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7], align 4
+; CHECK: @switch.table.cprop = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7], align 4
 
 ; The table for @unreachable_case
 ; CHECK: @switch.table.unreachable_case = private unnamed_addr constant [9 x i32] [i32 0, i32 0, i32 0, i32 2, i32 -1, i32 1, i32 1, i32 1, i32 1], align 4
@@ -34,27 +33,15 @@ target triple = "x86_64-unknown-linux-gnu"
 ; so we return early, directly from the lookup bb.
 
 define i32 @f(i32 %c) {
-; CHECK-LABEL: @f(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 [[TMP0]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 15
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 42, label %return
-  i32 43, label %sw.bb1
-  i32 44, label %sw.bb2
-  i32 45, label %sw.bb3
-  i32 46, label %sw.bb4
-  i32 47, label %sw.bb5
-  i32 48, label %sw.bb6
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+    i32 47, label %sw.bb5
+    i32 48, label %sw.bb6
   ]
 
 sw.bb1: br label %return
@@ -68,34 +55,33 @@ return:
   %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %retval.0
 
+; CHECK-LABEL: @f(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.f, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: return:
+; CHECK-NEXT: ret i32 15
 }
 
 ; Same thing, but with i8's
 
 define i8 @char(i32 %c) {
-; CHECK-LABEL: @char(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 9
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 [[TMP0]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i8, i8* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i8 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i8 15
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 42, label %return
-  i32 43, label %sw.bb1
-  i32 44, label %sw.bb2
-  i32 45, label %sw.bb3
-  i32 46, label %sw.bb4
-  i32 47, label %sw.bb5
-  i32 48, label %sw.bb6
-  i32 49, label %sw.bb7
-  i32 50, label %sw.bb8
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+    i32 47, label %sw.bb5
+    i32 48, label %sw.bb6
+    i32 49, label %sw.bb7
+    i32 50, label %sw.bb8
   ]
 
 sw.bb1: br label %return
@@ -111,35 +97,29 @@ return:
   %retval.0 = phi i8 [ 15, %sw.default ], [ 84, %sw.bb8 ], [ 33, %sw.bb7 ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i8 %retval.0
 
+; CHECK-LABEL: @char(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 9
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [9 x i8], [9 x i8]* @switch.table.char, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8, i8* %switch.gep
+; CHECK-NEXT: ret i8 %switch.load
+; CHECK: return:
+; CHECK-NEXT: ret i8 15
 }
 
 ; A switch used to initialize two variables, an i8 and a float.
 
 declare void @dummy(i8 signext, float)
 define void @h(i32 %x) {
-; CHECK-LABEL: @h(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_EPILOG:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i32 [[X]], 8
-; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i32 89655594, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i32 [[SWITCH_DOWNSHIFT]] to i8
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x float], [4 x float]* @switch.table.h, i32 0, i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load float, float* [[SWITCH_GEP]]
-; CHECK-NEXT:    br label [[SW_EPILOG]]
-; CHECK:       sw.epilog:
-; CHECK-NEXT:    [[A_0:%.*]] = phi i8 [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ 7, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[B_0:%.*]] = phi float [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ 0x4023FAE140000000, [[ENTRY]] ]
-; CHECK-NEXT:    call void @dummy(i8 signext [[A_0]], float [[B_0]])
-; CHECK-NEXT:    ret void
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.epilog
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.epilog
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %sw.epilog
@@ -153,6 +133,22 @@ sw.epilog:
   call void @dummy(i8 signext %a.0, float %b.0)
   ret void
 
+; CHECK-LABEL: @h(
+; CHECK: entry:
+; CHECK-NEXT: %0 = icmp ult i32 %x, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.shiftamt = mul i32 %x, 8
+; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float], [4 x float]* @switch.table.h, i32 0, i32 %x
+; CHECK-NEXT: %switch.load = load float, float* %switch.gep
+; CHECK-NEXT: br label %sw.epilog
+; CHECK: sw.epilog:
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
+; CHECK-NEXT: ret void
 }
 
 
@@ -165,23 +161,12 @@ sw.epilog:
 @.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
 
 define i8* @foostring(i32 %x)  {
-; CHECK-LABEL: @foostring(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.foostring, i32 0, i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i8*, i8** [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i8* [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0)
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -191,35 +176,32 @@ sw.default: br label %return
 
 return:
   %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
-  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
-  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
-  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
-  [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), %entry ]
+                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+                      [ getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), %entry ]
   ret i8* %retval.0
 
+; CHECK-LABEL: @foostring(
+; CHECK: entry:
+; CHECK-NEXT: %0 = icmp ult i32 %x, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table.foostring, i32 0, i32 %x
+; CHECK-NEXT: %switch.load = load i8*, i8** %switch.gep
+; CHECK-NEXT: ret i8* %switch.load
 }
 
 ; Switch used to initialize two values. The first value is returned, the second
 ; value is not used. This used to make the transformation generate illegal code.
 
 define i32 @earlyreturncrash(i32 %x)  {
-; CHECK-LABEL: @earlyreturncrash(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_EPILOG:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.earlyreturncrash, i32 0, i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       sw.epilog:
-; CHECK-NEXT:    ret i32 7
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.epilog
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.epilog
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %sw.epilog
@@ -232,6 +214,13 @@ sw.epilog:
   %b.0 = phi i32 [ 10, %sw.default ], [ 5, %sw.bb3 ], [ 1, %sw.bb2 ], [ 4, %sw.bb1 ], [ 3, %entry ]
   ret i32 %a.0
 
+; CHECK-LABEL: @earlyreturncrash(
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.earlyreturncrash, i32 0, i32 %x
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: sw.epilog:
+; CHECK-NEXT: ret i32 7
 }
 
 
@@ -240,40 +229,21 @@ sw.epilog:
 ; can be packed into a bitmap.
 
 define i32 @crud(i8 zeroext %c)  {
-; CHECK-LABEL: @crud(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 [[C]], 34
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 59
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[LOR_END]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = zext i8 [[TMP0]] to i59
-; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i59 [[SWITCH_CAST]], 1
-; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i59 -288230375765830623, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i59 [[SWITCH_DOWNSHIFT]] to i1
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ [[SWITCH_MASKED]], [[SWITCH_LOOKUP]] ], [ false, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP2]] to i32
-; CHECK-NEXT:    ret i32 [[LOR_EXT]]
-;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %switch.early.test
 
 switch.early.test:
   switch i8 %c, label %lor.rhs [
-  i8 92, label %lor.end
-  i8 62, label %lor.end
-  i8 60, label %lor.end
-  i8 59, label %lor.end
-  i8 58, label %lor.end
-  i8 46, label %lor.end
-  i8 44, label %lor.end
-  i8 34, label %lor.end
-  i8 39, label %switch.edge
+    i8 92, label %lor.end
+    i8 62, label %lor.end
+    i8 60, label %lor.end
+    i8 59, label %lor.end
+    i8 58, label %lor.end
+    i8 46, label %lor.end
+    i8 44, label %lor.end
+    i8 34, label %lor.end
+    i8 39, label %switch.edge
   ]
 
 switch.edge: br label %lor.end
@@ -281,50 +251,49 @@ lor.rhs: br label %lor.end
 
 lor.end:
   %0 = phi i1 [ true, %switch.early.test ],
-  [ false, %lor.rhs ],
-  [ true, %entry ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.early.test ],
-  [ true, %switch.edge ]
+              [ false, %lor.rhs ],
+              [ true, %entry ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.early.test ],
+              [ true, %switch.edge ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
+; CHECK-LABEL: @crud(
+; CHECK: entry:
+; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
+; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
+; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59
+; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1
+; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
+; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
+; CHECK-NEXT: ret i32 %lor.ext
 }
 
 ; PR13946
 define i32 @overflow(i32 %type) {
-; CHECK-LABEL: @overflow(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[TYPE:%.*]], -2147483645
-; CHECK-NEXT:    switch i32 [[TMP0]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 -2147483648, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    i32 0, label [[SW_BB3]]
-; CHECK-NEXT:    i32 2147483646, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2147483647, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       sw.bb3:
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[DIRENT_TYPE_0:%.*]] = phi i32 [ 6, [[SW_BB3]] ], [ 5, [[SW_BB2]] ], [ 0, [[SW_BB1]] ], [ 3, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[DIRENT_TYPE_0]]
-;
 entry:
   switch i32 %type, label %sw.default [
-  i32 -2147483648, label %sw.bb
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 -2147483645, label %sw.bb3
-  i32 3, label %sw.bb3
+    i32 -2147483648, label %sw.bb
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 -2147483645, label %sw.bb3
+    i32 3, label %sw.bb3
   ]
 
 sw.bb: br label %if.end
@@ -337,29 +306,19 @@ if.else: br label %if.end
 if.end:
   %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
   ret i32 %dirent_type.0
+; CHECK-LABEL: define i32 @overflow(
+; CHECK: switch
+; CHECK: phi
 }
 
 ; PR13985
 define i1 @undef(i32 %tmp) {
-; CHECK-LABEL: @undef(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[TMP:%.*]], 9
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[BB3:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_CAST:%.*]] = trunc i32 [[TMP]] to i9
-; CHECK-NEXT:    [[SWITCH_SHIFTAMT:%.*]] = mul i9 [[SWITCH_CAST]], 1
-; CHECK-NEXT:    [[SWITCH_DOWNSHIFT:%.*]] = lshr i9 3, [[SWITCH_SHIFTAMT]]
-; CHECK-NEXT:    [[SWITCH_MASKED:%.*]] = trunc i9 [[SWITCH_DOWNSHIFT]] to i1
-; CHECK-NEXT:    ret i1 [[SWITCH_MASKED]]
-; CHECK:       bb3:
-; CHECK-NEXT:    ret i1 undef
-;
 bb:
   switch i32 %tmp, label %bb3 [
-  i32 0, label %bb1
-  i32 1, label %bb1
-  i32 7, label %bb2
-  i32 8, label %bb2
+    i32 0, label %bb1
+    i32 1, label %bb1
+    i32 7, label %bb2
+    i32 8, label %bb2
   ]
 
 bb1: br label %bb3
@@ -368,26 +327,16 @@ bb2: br label %bb3
 bb3:
   %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
   ret i1 %tmp4
+; CHECK-LABEL: define i1 @undef(
+; CHECK: %switch.cast = trunc i32 %tmp to i9
+; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
 }
 
 ; Also handle large switches that would be rejected by
 ; isValueEqualityComparison()
-
+; CHECK: large
+; CHECK-NOT: switch i32
 define i32 @large(i32 %x) {
-; CHECK-LABEL: @large(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[X]], -10
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP]], i32 [[MUL]], i32 [[X]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SPEC_SELECT]], 200
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [200 x i32], [200 x i32]* @switch.table.large, i32 0, i32 [[SPEC_SELECT]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 0
-;
 entry:
   %cmp = icmp slt i32 %x, 0
   br i1 %cmp, label %if.then, label %if.end
@@ -399,205 +348,205 @@ if.then:
 if.end:
   %x.addr.0 = phi i32 [ %mul, %if.then ], [ %x, %entry ]
   switch i32 %x.addr.0, label %return [
-  i32 199, label %sw.bb203
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
-  i32 4, label %sw.bb4
-  i32 5, label %sw.bb5
-  i32 6, label %sw.bb6
-  i32 7, label %sw.bb7
-  i32 8, label %sw.bb8
-  i32 9, label %sw.bb9
-  i32 10, label %sw.bb10
-  i32 11, label %sw.bb11
-  i32 12, label %sw.bb12
-  i32 13, label %sw.bb13
-  i32 14, label %sw.bb14
-  i32 15, label %sw.bb15
-  i32 16, label %sw.bb16
-  i32 17, label %sw.bb17
-  i32 18, label %sw.bb18
-  i32 19, label %sw.bb19
-  i32 20, label %sw.bb20
-  i32 21, label %sw.bb21
-  i32 22, label %sw.bb22
-  i32 23, label %sw.bb23
-  i32 24, label %sw.bb24
-  i32 25, label %sw.bb25
-  i32 26, label %sw.bb26
-  i32 27, label %sw.bb27
-  i32 28, label %sw.bb28
-  i32 29, label %sw.bb29
-  i32 30, label %sw.bb30
-  i32 31, label %sw.bb31
-  i32 32, label %sw.bb32
-  i32 33, label %sw.bb33
-  i32 34, label %sw.bb34
-  i32 35, label %sw.bb35
-  i32 36, label %sw.bb37
-  i32 37, label %sw.bb38
-  i32 38, label %sw.bb39
-  i32 39, label %sw.bb40
-  i32 40, label %sw.bb41
-  i32 41, label %sw.bb42
-  i32 42, label %sw.bb43
-  i32 43, label %sw.bb44
-  i32 44, label %sw.bb45
-  i32 45, label %sw.bb47
-  i32 46, label %sw.bb48
-  i32 47, label %sw.bb49
-  i32 48, label %sw.bb50
-  i32 49, label %sw.bb51
-  i32 50, label %sw.bb52
-  i32 51, label %sw.bb53
-  i32 52, label %sw.bb54
-  i32 53, label %sw.bb55
-  i32 54, label %sw.bb56
-  i32 55, label %sw.bb58
-  i32 56, label %sw.bb59
-  i32 57, label %sw.bb60
-  i32 58, label %sw.bb61
-  i32 59, label %sw.bb62
-  i32 60, label %sw.bb63
-  i32 61, label %sw.bb64
-  i32 62, label %sw.bb65
-  i32 63, label %sw.bb66
-  i32 64, label %sw.bb67
-  i32 65, label %sw.bb68
-  i32 66, label %sw.bb69
-  i32 67, label %sw.bb70
-  i32 68, label %sw.bb71
-  i32 69, label %sw.bb72
-  i32 70, label %sw.bb73
-  i32 71, label %sw.bb74
-  i32 72, label %sw.bb76
-  i32 73, label %sw.bb77
-  i32 74, label %sw.bb78
-  i32 75, label %sw.bb79
-  i32 76, label %sw.bb80
-  i32 77, label %sw.bb81
-  i32 78, label %sw.bb82
-  i32 79, label %sw.bb83
-  i32 80, label %sw.bb84
-  i32 81, label %sw.bb85
-  i32 82, label %sw.bb86
-  i32 83, label %sw.bb87
-  i32 84, label %sw.bb88
-  i32 85, label %sw.bb89
-  i32 86, label %sw.bb90
-  i32 87, label %sw.bb91
-  i32 88, label %sw.bb92
-  i32 89, label %sw.bb93
-  i32 90, label %sw.bb94
-  i32 91, label %sw.bb95
-  i32 92, label %sw.bb96
-  i32 93, label %sw.bb97
-  i32 94, label %sw.bb98
-  i32 95, label %sw.bb99
-  i32 96, label %sw.bb100
-  i32 97, label %sw.bb101
-  i32 98, label %sw.bb102
-  i32 99, label %sw.bb103
-  i32 100, label %sw.bb104
-  i32 101, label %sw.bb105
-  i32 102, label %sw.bb106
-  i32 103, label %sw.bb107
-  i32 104, label %sw.bb108
-  i32 105, label %sw.bb109
-  i32 106, label %sw.bb110
-  i32 107, label %sw.bb111
-  i32 108, label %sw.bb112
-  i32 109, label %sw.bb113
-  i32 110, label %sw.bb114
-  i32 111, label %sw.bb115
-  i32 112, label %sw.bb116
-  i32 113, label %sw.bb117
-  i32 114, label %sw.bb118
-  i32 115, label %sw.bb119
-  i32 116, label %sw.bb120
-  i32 117, label %sw.bb121
-  i32 118, label %sw.bb122
-  i32 119, label %sw.bb123
-  i32 120, label %sw.bb124
-  i32 121, label %sw.bb125
-  i32 122, label %sw.bb126
-  i32 123, label %sw.bb127
-  i32 124, label %sw.bb128
-  i32 125, label %sw.bb129
-  i32 126, label %sw.bb130
-  i32 127, label %sw.bb131
-  i32 128, label %sw.bb132
-  i32 129, label %sw.bb133
-  i32 130, label %sw.bb134
-  i32 131, label %sw.bb135
-  i32 132, label %sw.bb136
-  i32 133, label %sw.bb137
-  i32 134, label %sw.bb138
-  i32 135, label %sw.bb139
-  i32 136, label %sw.bb140
-  i32 137, label %sw.bb141
-  i32 138, label %sw.bb142
-  i32 139, label %sw.bb143
-  i32 140, label %sw.bb144
-  i32 141, label %sw.bb145
-  i32 142, label %sw.bb146
-  i32 143, label %sw.bb147
-  i32 144, label %sw.bb148
-  i32 145, label %sw.bb149
-  i32 146, label %sw.bb150
-  i32 147, label %sw.bb151
-  i32 148, label %sw.bb152
-  i32 149, label %sw.bb153
-  i32 150, label %sw.bb154
-  i32 151, label %sw.bb155
-  i32 152, label %sw.bb156
-  i32 153, label %sw.bb157
-  i32 154, label %sw.bb158
-  i32 155, label %sw.bb159
-  i32 156, label %sw.bb160
-  i32 157, label %sw.bb161
-  i32 158, label %sw.bb162
-  i32 159, label %sw.bb163
-  i32 160, label %sw.bb164
-  i32 161, label %sw.bb165
-  i32 162, label %sw.bb166
-  i32 163, label %sw.bb167
-  i32 164, label %sw.bb168
-  i32 165, label %sw.bb169
-  i32 166, label %sw.bb170
-  i32 167, label %sw.bb171
-  i32 168, label %sw.bb172
-  i32 169, label %sw.bb173
-  i32 170, label %sw.bb174
-  i32 171, label %sw.bb175
-  i32 172, label %sw.bb176
-  i32 173, label %sw.bb177
-  i32 174, label %sw.bb178
-  i32 175, label %sw.bb179
-  i32 176, label %sw.bb180
-  i32 177, label %sw.bb181
-  i32 178, label %sw.bb182
-  i32 179, label %sw.bb183
-  i32 180, label %sw.bb184
-  i32 181, label %sw.bb185
-  i32 182, label %sw.bb186
-  i32 183, label %sw.bb187
-  i32 184, label %sw.bb188
-  i32 185, label %sw.bb189
-  i32 186, label %sw.bb190
-  i32 187, label %sw.bb191
-  i32 188, label %sw.bb192
-  i32 189, label %sw.bb193
-  i32 190, label %sw.bb194
-  i32 191, label %sw.bb195
-  i32 192, label %sw.bb196
-  i32 193, label %sw.bb197
-  i32 194, label %sw.bb198
-  i32 195, label %sw.bb199
-  i32 196, label %sw.bb200
-  i32 197, label %sw.bb201
-  i32 198, label %sw.bb202
+    i32 199, label %sw.bb203
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb4
+    i32 5, label %sw.bb5
+    i32 6, label %sw.bb6
+    i32 7, label %sw.bb7
+    i32 8, label %sw.bb8
+    i32 9, label %sw.bb9
+    i32 10, label %sw.bb10
+    i32 11, label %sw.bb11
+    i32 12, label %sw.bb12
+    i32 13, label %sw.bb13
+    i32 14, label %sw.bb14
+    i32 15, label %sw.bb15
+    i32 16, label %sw.bb16
+    i32 17, label %sw.bb17
+    i32 18, label %sw.bb18
+    i32 19, label %sw.bb19
+    i32 20, label %sw.bb20
+    i32 21, label %sw.bb21
+    i32 22, label %sw.bb22
+    i32 23, label %sw.bb23
+    i32 24, label %sw.bb24
+    i32 25, label %sw.bb25
+    i32 26, label %sw.bb26
+    i32 27, label %sw.bb27
+    i32 28, label %sw.bb28
+    i32 29, label %sw.bb29
+    i32 30, label %sw.bb30
+    i32 31, label %sw.bb31
+    i32 32, label %sw.bb32
+    i32 33, label %sw.bb33
+    i32 34, label %sw.bb34
+    i32 35, label %sw.bb35
+    i32 36, label %sw.bb37
+    i32 37, label %sw.bb38
+    i32 38, label %sw.bb39
+    i32 39, label %sw.bb40
+    i32 40, label %sw.bb41
+    i32 41, label %sw.bb42
+    i32 42, label %sw.bb43
+    i32 43, label %sw.bb44
+    i32 44, label %sw.bb45
+    i32 45, label %sw.bb47
+    i32 46, label %sw.bb48
+    i32 47, label %sw.bb49
+    i32 48, label %sw.bb50
+    i32 49, label %sw.bb51
+    i32 50, label %sw.bb52
+    i32 51, label %sw.bb53
+    i32 52, label %sw.bb54
+    i32 53, label %sw.bb55
+    i32 54, label %sw.bb56
+    i32 55, label %sw.bb58
+    i32 56, label %sw.bb59
+    i32 57, label %sw.bb60
+    i32 58, label %sw.bb61
+    i32 59, label %sw.bb62
+    i32 60, label %sw.bb63
+    i32 61, label %sw.bb64
+    i32 62, label %sw.bb65
+    i32 63, label %sw.bb66
+    i32 64, label %sw.bb67
+    i32 65, label %sw.bb68
+    i32 66, label %sw.bb69
+    i32 67, label %sw.bb70
+    i32 68, label %sw.bb71
+    i32 69, label %sw.bb72
+    i32 70, label %sw.bb73
+    i32 71, label %sw.bb74
+    i32 72, label %sw.bb76
+    i32 73, label %sw.bb77
+    i32 74, label %sw.bb78
+    i32 75, label %sw.bb79
+    i32 76, label %sw.bb80
+    i32 77, label %sw.bb81
+    i32 78, label %sw.bb82
+    i32 79, label %sw.bb83
+    i32 80, label %sw.bb84
+    i32 81, label %sw.bb85
+    i32 82, label %sw.bb86
+    i32 83, label %sw.bb87
+    i32 84, label %sw.bb88
+    i32 85, label %sw.bb89
+    i32 86, label %sw.bb90
+    i32 87, label %sw.bb91
+    i32 88, label %sw.bb92
+    i32 89, label %sw.bb93
+    i32 90, label %sw.bb94
+    i32 91, label %sw.bb95
+    i32 92, label %sw.bb96
+    i32 93, label %sw.bb97
+    i32 94, label %sw.bb98
+    i32 95, label %sw.bb99
+    i32 96, label %sw.bb100
+    i32 97, label %sw.bb101
+    i32 98, label %sw.bb102
+    i32 99, label %sw.bb103
+    i32 100, label %sw.bb104
+    i32 101, label %sw.bb105
+    i32 102, label %sw.bb106
+    i32 103, label %sw.bb107
+    i32 104, label %sw.bb108
+    i32 105, label %sw.bb109
+    i32 106, label %sw.bb110
+    i32 107, label %sw.bb111
+    i32 108, label %sw.bb112
+    i32 109, label %sw.bb113
+    i32 110, label %sw.bb114
+    i32 111, label %sw.bb115
+    i32 112, label %sw.bb116
+    i32 113, label %sw.bb117
+    i32 114, label %sw.bb118
+    i32 115, label %sw.bb119
+    i32 116, label %sw.bb120
+    i32 117, label %sw.bb121
+    i32 118, label %sw.bb122
+    i32 119, label %sw.bb123
+    i32 120, label %sw.bb124
+    i32 121, label %sw.bb125
+    i32 122, label %sw.bb126
+    i32 123, label %sw.bb127
+    i32 124, label %sw.bb128
+    i32 125, label %sw.bb129
+    i32 126, label %sw.bb130
+    i32 127, label %sw.bb131
+    i32 128, label %sw.bb132
+    i32 129, label %sw.bb133
+    i32 130, label %sw.bb134
+    i32 131, label %sw.bb135
+    i32 132, label %sw.bb136
+    i32 133, label %sw.bb137
+    i32 134, label %sw.bb138
+    i32 135, label %sw.bb139
+    i32 136, label %sw.bb140
+    i32 137, label %sw.bb141
+    i32 138, label %sw.bb142
+    i32 139, label %sw.bb143
+    i32 140, label %sw.bb144
+    i32 141, label %sw.bb145
+    i32 142, label %sw.bb146
+    i32 143, label %sw.bb147
+    i32 144, label %sw.bb148
+    i32 145, label %sw.bb149
+    i32 146, label %sw.bb150
+    i32 147, label %sw.bb151
+    i32 148, label %sw.bb152
+    i32 149, label %sw.bb153
+    i32 150, label %sw.bb154
+    i32 151, label %sw.bb155
+    i32 152, label %sw.bb156
+    i32 153, label %sw.bb157
+    i32 154, label %sw.bb158
+    i32 155, label %sw.bb159
+    i32 156, label %sw.bb160
+    i32 157, label %sw.bb161
+    i32 158, label %sw.bb162
+    i32 159, label %sw.bb163
+    i32 160, label %sw.bb164
+    i32 161, label %sw.bb165
+    i32 162, label %sw.bb166
+    i32 163, label %sw.bb167
+    i32 164, label %sw.bb168
+    i32 165, label %sw.bb169
+    i32 166, label %sw.bb170
+    i32 167, label %sw.bb171
+    i32 168, label %sw.bb172
+    i32 169, label %sw.bb173
+    i32 170, label %sw.bb174
+    i32 171, label %sw.bb175
+    i32 172, label %sw.bb176
+    i32 173, label %sw.bb177
+    i32 174, label %sw.bb178
+    i32 175, label %sw.bb179
+    i32 176, label %sw.bb180
+    i32 177, label %sw.bb181
+    i32 178, label %sw.bb182
+    i32 179, label %sw.bb183
+    i32 180, label %sw.bb184
+    i32 181, label %sw.bb185
+    i32 182, label %sw.bb186
+    i32 183, label %sw.bb187
+    i32 184, label %sw.bb188
+    i32 185, label %sw.bb189
+    i32 186, label %sw.bb190
+    i32 187, label %sw.bb191
+    i32 188, label %sw.bb192
+    i32 189, label %sw.bb193
+    i32 190, label %sw.bb194
+    i32 191, label %sw.bb195
+    i32 192, label %sw.bb196
+    i32 193, label %sw.bb197
+    i32 194, label %sw.bb198
+    i32 195, label %sw.bb199
+    i32 196, label %sw.bb200
+    i32 197, label %sw.bb201
+    i32 198, label %sw.bb202
   ]
 
 sw.bb1: br label %return
@@ -806,26 +755,15 @@ return:
 }
 
 define i32 @cprop(i32 %x, i32 %y) {
-; CHECK-LABEL: @cprop(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 8
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @switch.table.cprop, i32 0, i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 123
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 1, label %return
-  i32 2, label %sw.bb1
-  i32 3, label %sw.bb2
-  i32 4, label %sw.bb2
-  i32 5, label %sw.bb2
-  i32 6, label %sw.bb3
-  i32 7, label %sw.bb3
+    i32 1, label %return
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+    i32 4, label %sw.bb2
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+    i32 7, label %sw.bb3
   ]
 
 sw.bb1: br label %return
@@ -852,31 +790,23 @@ return:
   %retval.0 = phi i32 [ 123, %sw.default ], [ %select, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
   ret i32 %retval.0
 
+; CHECK-LABEL: @cprop(
+; CHECK: switch.lookup:
+; CHECK: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table.cprop, i32 0, i32 %switch.tableidx
 }
 
 define i32 @unreachable_case(i32 %x)  {
-; CHECK-LABEL: @unreachable_case(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 9
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.unreachable_case, i32 0, i32 [[X]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 2
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb
-  i32 2, label %sw.bb
-  i32 3, label %sw.bb1
-  i32 4, label %sw.bb2
-  i32 5, label %sw.bb3
-  i32 6, label %sw.bb3
-  i32 7, label %sw.bb3
-  i32 8, label %sw.bb3
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb1
+    i32 4, label %sw.bb2
+    i32 5, label %sw.bb3
+    i32 6, label %sw.bb3
+    i32 7, label %sw.bb3
+    i32 8, label %sw.bb3
   ]
 
 sw.bb: br label %return
@@ -889,21 +819,18 @@ return:
   %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 0, %sw.bb ], [ 2, %sw.default ]
   ret i32 %retval.0
 
+; CHECK-LABEL: @unreachable_case(
+; CHECK: switch.lookup:
+; CHECK: getelementptr inbounds [9 x i32], [9 x i32]* @switch.table.unreachable_case, i32 0, i32 %x
 }
 
 define i32 @unreachable_default(i32 %x)  {
-; CHECK-LABEL: @unreachable_default(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.unreachable_default, i32 0, i32 [[X:%.*]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-;
 entry:
   switch i32 %x, label %default [
-  i32 0, label %bb0
-  i32 1, label %bb1
-  i32 2, label %bb2
-  i32 3, label %bb3
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
   ]
 
 bb0: br label %return
@@ -916,41 +843,24 @@ return:
   %retval = phi i32 [ 42, %bb0 ], [ 52, %bb1 ], [ 1, %bb2 ], [ 2, %bb3 ]
   ret i32 %retval
 
+; CHECK-LABEL: @unreachable_default(
+; CHECK: entry:
+; CHECK-NOT: icmp
+; CHECK-NOT: br 1i
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.unreachable_default, i32 0, i32 %x
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
 }
 
 ; Don't create a table with illegal type
 define i96 @illegaltype(i32 %c) {
-; CHECK-LABEL: @illegaltype(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 42
-; CHECK-NEXT:    switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    i32 3, label [[SW_BB3:%.*]]
-; CHECK-NEXT:    i32 4, label [[SW_BB4:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb4:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i96 [ 15, [[SW_DEFAULT]] ], [ 27, [[SW_BB4]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i96 [[RETVAL_0]]
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 42, label %return
-  i32 43, label %sw.bb1
-  i32 44, label %sw.bb2
-  i32 45, label %sw.bb3
-  i32 46, label %sw.bb4
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
   ]
 
 sw.bb1: br label %return
@@ -962,107 +872,75 @@ return:
   %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i96 %retval.0
 
+; CHECK-LABEL: @illegaltype(
+; CHECK-NOT: @switch.table
+; CHECK: switch i32 %c
 }
 
 ; If we can build a lookup table without any holes, we don't need a default result.
 declare void @exit(i32)
 define i32 @nodefaultnoholes(i32 %c) {
-; CHECK-LABEL: @nodefaultnoholes(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 4
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT:%.*]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    call void @exit(i32 1)
-; CHECK-NEXT:    unreachable
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.nodefaultnoholes, i32 0, i32 [[C]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 
 sw.bb1: br label %return
 sw.bb2: br label %return
 sw.bb3: br label %return
 sw.default: call void @exit(i32 1)
-  unreachable
+            unreachable
 return:
   %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %x
 
+; CHECK-LABEL: @nodefaultnoholes(
+; CHECK: @switch.table
+; CHECK-NOT: switch i32
 }
 
 ; This lookup table will have holes, so we need to test for the holes.
 define i32 @nodefaultwithholes(i32 %c) {
-; CHECK-LABEL: @nodefaultwithholes(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 6
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[SW_DEFAULT:%.*]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    call void @exit(i32 1)
-; CHECK-NEXT:    unreachable
-; CHECK:       switch.hole_check:
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[C]], 0
-; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP1]] to i8
-; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 47, [[SWITCH_MASKINDEX]]
-; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.nodefaultwithholes, i32 0, i32 [[TMP1]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
-  i32 5, label %sw.bb3
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+    i32 5, label %sw.bb3
   ]
 
 sw.bb1: br label %return
 sw.bb2: br label %return
 sw.bb3: br label %return
 sw.default: call void @exit(i32 1)
-  unreachable
+            unreachable
 return:
   %x = phi i32 [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
   ret i32 %x
 
+; CHECK-LABEL: @nodefaultwithholes(
+; CHECK: entry:
+; CHECK: br i1 %{{.*}}, label %switch.hole_check, label %sw.default
+; CHECK: switch.hole_check:
+; CHECK-NEXT: %switch.maskindex = trunc i32 %c to i8
+; CHECK-NEXT: %switch.shifted = lshr i8 47, %switch.maskindex
 ; The mask is binary 101111.
+; CHECK-NEXT: %switch.lobit = trunc i8 %switch.shifted to i1
+; CHECK-NEXT: br i1 %switch.lobit, label %switch.lookup, label %sw.default
+; CHECK-NOT: switch i32
 }
 
 ; We don't build lookup tables with holes for switches with less than four cases.
 define i32 @threecasesholes(i32 %c) {
-; CHECK-LABEL: @threecasesholes(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 3, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[C]], [[SW_DEFAULT]] ], [ 5, [[SW_BB2]] ], [ 7, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[X]]
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 3, label %sw.bb2
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 3, label %sw.bb2
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1070,16 +948,19 @@ sw.default: br label %return
 return:
   %x = phi i32 [ %c, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
   ret i32 %x
+; CHECK-LABEL: @threecasesholes(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
 }
 
 ; We build lookup tables for switches with three or more cases.
 define i32 @threecases(i32 %c) {
 ; CHECK-LABEL: @threecases(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 3
-; CHECK-NEXT:    br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 %c, 3
+; CHECK-NEXT:    br i1 [[TMP0]], label %switch.lookup, label %return
 ; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.threecases, i32 0, i32 [[C]]
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.threecases, i32 0, i32 %c
 ; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_LOAD]]
 ; CHECK:       return:
@@ -1106,9 +987,9 @@ return:
 define i32 @twocases(i32 %c) {
 ; CHECK-LABEL: @twocases(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 [[C:%.*]], 1
-; CHECK-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP]], i32 7, i32 3
-; CHECK-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 [[C]], 0
+; CHECK-NEXT:    [[SWITCH_SELECTCMP:%.*]] = icmp eq i32 %c, 1
+; CHECK-NEXT:    [[SWITCH_SELECT:%.*]] = select i1 [[SWITCH_SELECTCMP:%.*]], i32 7, i32 3
+; CHECK-NEXT:    [[SWITCH_SELECTCMP1:%.*]] = icmp eq i32 %c, 0
 ; CHECK-NEXT:    [[SWITCH_SELECT2:%.*]] = select i1 [[SWITCH_SELECTCMP1]], i32 9, i32 [[SWITCH_SELECT]]
 ; CHECK-NEXT:    ret i32 [[SWITCH_SELECT2]]
 ;
@@ -1132,28 +1013,11 @@ return:
 @tls_c = thread_local global i32 0
 @tls_d = thread_local global i32 0
 define i32* @tls(i32 %x) {
-; CHECK-LABEL: @tls(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ @tls_d, [[SW_DEFAULT]] ], [ @tls_c, [[SW_BB2]] ], [ @tls_b, [[SW_BB1]] ], [ @tls_a, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32* [[RETVAL_0]]
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 sw.bb1:
   br label %return
@@ -1164,6 +1028,9 @@ sw.default:
 return:
   %retval.0 = phi i32* [ @tls_d, %sw.default ], [ @tls_c, %sw.bb2 ], [ @tls_b, %sw.bb1 ], [ @tls_a, %entry ]
   ret i32* %retval.0
+; CHECK-LABEL: @tls(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
 }
 
 ; Don't build tables for switches with dllimport variables.
@@ -1172,28 +1039,11 @@ return:
 @dllimport_c = external dllimport global [3x i32]
 @dllimport_d = external dllimport global [3x i32]
 define i32* @dllimport(i32 %x) {
-; CHECK-LABEL: @dllimport(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT:    i32 2, label [[SW_BB2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       sw.bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       sw.default:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_d, i32 0, i32 0), [[SW_DEFAULT]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), [[SW_BB2]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), [[SW_BB1]] ], [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32* [[RETVAL_0]]
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %return
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
+    i32 0, label %return
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
   ]
 sw.bb1:
   br label %return
@@ -1203,33 +1053,23 @@ sw.default:
   br label %return
 return:
   %retval.0 = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_d, i32 0, i32 0), %sw.default ],
-  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
-  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
-  [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
+                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_c, i32 0, i32 0), %sw.bb2 ],
+                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_b, i32 0, i32 0), %sw.bb1 ],
+                       [ getelementptr inbounds ([3 x i32], [3 x i32]* @dllimport_a, i32 0, i32 0), %entry ]
   ret i32* %retval.0
+; CHECK-LABEL: @dllimport(
+; CHECK: switch i32
+; CHECK-NOT: @switch.table
 }
 
 ; We can use linear mapping.
 define i8 @linearmap1(i32 %c) {
-; CHECK-LABEL: @linearmap1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 10
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], -5
-; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i8 [[SWITCH_IDX_MULT]], 18
-; CHECK-NEXT:    ret i8 [[SWITCH_OFFSET]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i8 3
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 10, label %return
-  i32 11, label %sw.bb1
-  i32 12, label %sw.bb2
-  i32 13, label %sw.bb3
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1238,28 +1078,24 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 8, %sw.bb2 ], [ 13, %sw.bb1 ], [ 18, %entry ]
   ret i8 %x
+; CHECK-LABEL: @linearmap1(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, -5
+; CHECK-NEXT: %switch.offset = add i8 %switch.idx.mult, 18
+; CHECK-NEXT: ret i8 %switch.offset
 }
 
 ; Linear mapping in a different configuration.
 define i32 @linearmap2(i8 %c) {
-; CHECK-LABEL: @linearmap2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i8 [[C:%.*]], -13
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = zext i8 [[TMP0]] to i32
-; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[SWITCH_IDX_CAST]], 18
-; CHECK-NEXT:    ret i32 [[SWITCH_OFFSET]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i32 3
-;
 entry:
   switch i8 %c, label %sw.default [
-  i8 -10, label %return
-  i8 -11, label %sw.bb1
-  i8 -12, label %sw.bb2
-  i8 -13, label %sw.bb3
+    i8 -10, label %return
+    i8 -11, label %sw.bb1
+    i8 -12, label %sw.bb2
+    i8 -13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1268,28 +1104,23 @@ sw.default: br label %return
 return:
   %x = phi i32 [ 3, %sw.default ], [ 18, %sw.bb3 ], [ 19, %sw.bb2 ], [ 20, %sw.bb1 ], [ 21, %entry ]
   ret i32 %x
+; CHECK-LABEL: @linearmap2(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, -13
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = zext i8 %switch.tableidx to i32
+; CHECK-NEXT: %switch.offset = add i32 %switch.idx.cast, 18
+; CHECK-NEXT: ret i32 %switch.offset
 }
 
 ; Linear mapping with overflows.
 define i8 @linearmap3(i32 %c) {
-; CHECK-LABEL: @linearmap3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], 10
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul i8 [[SWITCH_IDX_CAST]], 100
-; CHECK-NEXT:    ret i8 [[SWITCH_IDX_MULT]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i8 3
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 10, label %return
-  i32 11, label %sw.bb1
-  i32 12, label %sw.bb2
-  i32 13, label %sw.bb3
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1298,27 +1129,23 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 44, %sw.bb3 ], [ -56, %sw.bb2 ], [ 100, %sw.bb1 ], [ 0, %entry ]
   ret i8 %x
+; CHECK-LABEL: @linearmap3(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, 100
+; CHECK-NEXT: ret i8 %switch.idx.mult
 }
 
 ; Linear mapping with with multiplier 1 and offset 0.
 define i8 @linearmap4(i32 %c) {
-; CHECK-LABEL: @linearmap4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[C:%.*]], -2
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    ret i8 [[SWITCH_IDX_CAST]]
-; CHECK:       return:
-; CHECK-NEXT:    ret i8 3
-;
 entry:
   switch i32 %c, label %sw.default [
-  i32 -2, label %return
-  i32 -1, label %sw.bb1
-  i32 0, label %sw.bb2
-  i32 1, label %sw.bb3
+    i32 -2, label %return
+    i32 -1, label %sw.bb1
+    i32 0, label %sw.bb2
+    i32 1, label %sw.bb3
   ]
 sw.bb1: br label %return
 sw.bb2: br label %return
@@ -1327,26 +1154,22 @@ sw.default: br label %return
 return:
   %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %entry ]
   ret i8 %x
+; CHECK-LABEL: @linearmap4(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, -2
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: ret i8 %switch.idx.cast
 }
 
 ; Reuse the inverted table range compare.
 define i32 @reuse_cmp1(i32 %x) {
-; CHECK-LABEL: @reuse_cmp1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    [[INVERTED_CMP:%.*]] = xor i1 [[TMP0]], true
-; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
-; CHECK-NEXT:    [[R_0:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[R_0]], 0
-; CHECK-NEXT:    [[DOTR_0:%.*]] = select i1 [[INVERTED_CMP]], i32 100, i32 [[R_0]]
-; CHECK-NEXT:    ret i32 [[DOTR_0]]
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1362,24 +1185,22 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
   ret i32 %retval.0
+; CHECK-LABEL: @reuse_cmp1(
+; CHECK: entry:
+; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %x, 4
+; CHECK-NEXT: %inverted.cmp = xor i1 [[C]], true
+; CHECK:      [[R:%.+]] = select i1 %inverted.cmp, i32 100, i32 {{.*}}
+; CHECK-NEXT: ret i32 [[R]]
 }
 
 ; Reuse the table range compare.
 define i32 @reuse_cmp2(i32 %x) {
-; CHECK-LABEL: @reuse_cmp2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    [[X_:%.*]] = select i1 [[TMP0]], i32 [[X]], i32 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[X_]], 4
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP0]], i32 [[X_]], i32 100
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1395,26 +1216,24 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
   ret i32 %retval.0
+; CHECK-LABEL: @reuse_cmp2(
+; CHECK: entry:
+; CHECK-NEXT: %0 = icmp ult i32 %x, 4
+; CHECK-NEXT: %x. = select i1 %0, i32 %x, i32 4
+; CHECK-NEXT: [[C:%.+]] = icmp ne i32 %x., 4
+; CHECK:      [[R:%.+]] = select i1 %0, i32 {{.*}}, i32 100
+; CHECK-NEXT: ret i32 [[R]]
 }
 
 ; Cannot reuse the table range compare, because the default value is the same
 ; as one of the case values.
 define i32 @no_reuse_cmp(i32 %x) {
-; CHECK-LABEL: @no_reuse_cmp(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
-; CHECK-NEXT:    [[R_0:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 12
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[R_0]], 0
-; CHECK-NEXT:    [[R_0_:%.*]] = select i1 [[CMP]], i32 [[R_0]], i32 100
-; CHECK-NEXT:    ret i32 [[R_0_]]
-;
 entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1430,35 +1249,25 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
   ret i32 %retval.0
+; CHECK-LABEL: @no_reuse_cmp(
+; CHECK:  [[S:%.+]] = select
+; CHECK-NEXT:  %cmp = icmp ne i32 [[S]], 0
+; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp, i32 [[S]], i32 100
+; CHECK-NEXT:  ret i32 [[R]]
 }
 
 ; Cannot reuse the table range compare, because the phi at the switch merge
 ; point is not dominated by the switch.
 define i32 @no_reuse_cmp2(i32 %x, i32 %y) {
-; CHECK-LABEL: @no_reuse_cmp2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EC:%.*]] = icmp ne i32 [[Y:%.*]], 0
-; CHECK-NEXT:    br i1 [[EC]], label [[SWITCH_ENTRY:%.*]], label [[SW_EPILOG:%.*]]
-; CHECK:       switch.entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 4
-; CHECK-NEXT:    [[SWITCH_OFFSET:%.*]] = add i32 [[X]], 10
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 0
-; CHECK-NEXT:    br label [[SW_EPILOG]]
-; CHECK:       sw.epilog:
-; CHECK-NEXT:    [[R_0:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[SWITCH_ENTRY]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[R_0]], 0
-; CHECK-NEXT:    [[DOTR_0:%.*]] = select i1 [[CMP]], i32 100, i32 [[R_0]]
-; CHECK-NEXT:    ret i32 [[DOTR_0]]
-;
 entry:
   %ec = icmp ne i32 %y, 0
   br i1 %ec, label %switch.entry, label %sw.epilog
 switch.entry:
   switch i32 %x, label %sw.default [
-  i32 0, label %sw.bb
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
   ]
 sw.bb: br label %sw.epilog
 sw.bb1: br label %sw.epilog
@@ -1474,49 +1283,28 @@ if.end: br label %return
 return:
   %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
   ret i32 %retval.0
+; CHECK-LABEL: @no_reuse_cmp2(
+; CHECK:  %r.0 = phi
+; CHECK-NEXT:  %cmp = icmp eq i32 %r.0, 0
+; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp
+; CHECK-NEXT:  ret i32 [[R]]
 }
 
 define void @pr20210(i8 %x, i1 %y) {
 ; %z has uses outside of its BB or the phi it feeds into,
 ; so doing a table lookup and jumping directly to while.cond would
 ; cause %z to cease dominating all its uses.
-; CHECK-LABEL: @pr20210(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[Y:%.*]], label [[SW:%.*]], label [[INTERMEDIATE:%.*]]
-; CHECK:       sw:
-; CHECK-NEXT:    switch i8 [[X:%.*]], label [[END:%.*]] [
-; CHECK-NEXT:    i8 7, label [[INTERMEDIATE]]
-; CHECK-NEXT:    i8 3, label [[INTERMEDIATE]]
-; CHECK-NEXT:    i8 2, label [[INTERMEDIATE]]
-; CHECK-NEXT:    i8 1, label [[INTERMEDIATE]]
-; CHECK-NEXT:    i8 0, label [[INTERMEDIATE]]
-; CHECK-NEXT:    ]
-; CHECK:       intermediate:
-; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[X]] to i32
-; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
-; CHECK:       while.cond:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[Z]], [[INTERMEDIATE]] ], [ [[J:%.*]], [[WHILE_BODY:%.*]] ]
-; CHECK-NEXT:    [[B:%.*]] = icmp ne i32 [[I]], 7
-; CHECK-NEXT:    br i1 [[B]], label [[WHILE_BODY]], label [[WHILE_END:%.*]]
-; CHECK:       while.body:
-; CHECK-NEXT:    [[J]] = add i32 [[I]], 1
-; CHECK-NEXT:    br label [[WHILE_COND]]
-; CHECK:       while.end:
-; CHECK-NEXT:    call void @exit(i32 [[Z]])
-; CHECK-NEXT:    unreachable
-; CHECK:       end:
-; CHECK-NEXT:    ret void
-;
+
 entry:
   br i1 %y, label %sw, label %intermediate
 
 sw:
   switch i8 %x, label %end [
-  i8 7, label %intermediate
-  i8 3, label %intermediate
-  i8 2, label %intermediate
-  i8 1, label %intermediate
-  i8 0, label %intermediate
+    i8 7, label %intermediate
+    i8 3, label %intermediate
+    i8 2, label %intermediate
+    i8 1, label %intermediate
+    i8 0, label %intermediate
   ]
 
 intermediate:
@@ -1538,6 +1326,8 @@ while.end:
 
 end:
   ret void
+; CHECK-LABEL: @pr20210
+; CHECK: switch i8 %x
 }
 
 ; Make sure we do not crash due to trying to generate an unguarded
@@ -1545,31 +1335,12 @@ end:
 ; values) and simultaneously trying to generate a branch to deal with
 ; the fact that we have holes in the range.
 define i32 @covered_switch_with_bit_tests(i3) {
-; CHECK-LABEL: @covered_switch_with_bit_tests(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i3 [[TMP0:%.*]], -2
-; CHECK-NEXT:    br i1 [[TMP1]], label [[SWITCH_HOLE_CHECK:%.*]], label [[L6:%.*]]
-; CHECK:       switch.hole_check:
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i3 [[TMP0]], 2
-; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = zext i3 [[TMP2]] to i8
-; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 15, [[SWITCH_MASKINDEX]]
-; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT:    br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6]]
-; CHECK:       switch.lookup:
-; CHECK-NEXT:    [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[TMP2]] to i4
-; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], [6 x i32]* @switch.table.covered_switch_with_bit_tests, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]]
-; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]]
-; CHECK-NEXT:    br label [[L6]]
-; CHECK:       l6:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[SWITCH_HOLE_CHECK]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
-; CHECK-NEXT:    ret i32 [[R]]
-;
 entry:
   switch i3 %0, label %l6 [
-  i3 -3, label %l5
-  i3 -4, label %l5
-  i3 3, label %l1
-  i3 2, label %l1
+    i3 -3, label %l5
+    i3 -4, label %l5
+    i3 3, label %l1
+    i3 2, label %l1
   ]
 
 l1: br label %l2
@@ -1583,17 +1354,18 @@ l5: br label %l2
 l6:
   %r = phi i32 [ %x, %l2 ], [ 0, %entry ]
   ret i32 %r
+; CHECK-LABEL: @covered_switch_with_bit_tests
+; CHECK: entry
+; CHECK-NEXT: switch
 }
 
 ; Speculation depth must be limited to avoid a zero-cost instruction cycle.
 
-define i32 @PR26308(i1 %B, i64 %load) {
 ; CHECK-LABEL: @PR26308(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[CLEANUP4:%.*]]
 ; CHECK:       cleanup4:
-; CHECK-NEXT:    br label [[CLEANUP4]]
-;
+; CHECK-NEXT:  br label %cleanup4
+
+define i32 @PR26308(i1 %B, i64 %load) {
 entry:
   br label %while.body
 
@@ -1621,32 +1393,9 @@ cleanup4:
 declare void @throw(i1)
 
 define void @wineh_test(i64 %val) personality i32 (...)* @__CxxFrameHandler3 {
-; CHECK-LABEL: @wineh_test(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    invoke void @throw(i1 false)
-; CHECK-NEXT:    to label [[UNREACHABLE:%.*]] unwind label [[CLEANUP1:%.*]]
-; CHECK:       unreachable:
-; CHECK-NEXT:    unreachable
-; CHECK:       cleanup1:
-; CHECK-NEXT:    [[CLEANUPPAD1:%.*]] = cleanuppad within none []
-; CHECK-NEXT:    switch i64 [[VAL:%.*]], label [[CLEANUPDONE2:%.*]] [
-; CHECK-NEXT:    i64 0, label [[CLEANUPDONE1:%.*]]
-; CHECK-NEXT:    i64 1, label [[CLEANUPDONE1]]
-; CHECK-NEXT:    i64 6, label [[CLEANUPDONE1]]
-; CHECK-NEXT:    ]
-; CHECK:       cleanupdone1:
-; CHECK-NEXT:    cleanupret from [[CLEANUPPAD1]] unwind label [[CLEANUP2:%.*]]
-; CHECK:       cleanupdone2:
-; CHECK-NEXT:    cleanupret from [[CLEANUPPAD1]] unwind label [[CLEANUP2]]
-; CHECK:       cleanup2:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i1 [ true, [[CLEANUPDONE1]] ], [ false, [[CLEANUPDONE2]] ]
-; CHECK-NEXT:    [[CLEANUPPAD2:%.*]] = cleanuppad within none []
-; CHECK-NEXT:    call void @throw(i1 [[PHI]]) [ "funclet"(token [[CLEANUPPAD2]]) ]
-; CHECK-NEXT:    unreachable
-;
 entry:
   invoke void @throw(i1 false)
-  to label %unreachable unwind label %cleanup1
+          to label %unreachable unwind label %cleanup1
 
 unreachable:
   unreachable
@@ -1654,9 +1403,9 @@ unreachable:
 cleanup1:
   %cleanuppad1 = cleanuppad within none []
   switch i64 %val, label %cleanupdone2 [
-  i64 0, label %cleanupdone1
-  i64 1, label %cleanupdone1
-  i64 6, label %cleanupdone1
+    i64 0, label %cleanupdone1
+    i64 1, label %cleanupdone1
+    i64 6, label %cleanupdone1
   ]
 
 cleanupdone1:
@@ -1672,4 +1421,32 @@ cleanup2:
   unreachable
 }
 
+; CHECK-LABEL: @wineh_test(
+; CHECK: entry:
+; CHECK:   invoke void @throw(i1 false)
+; CHECK:           to label %[[unreachable:.*]] unwind label %[[cleanup1:.*]]
+
+; CHECK: [[unreachable]]:
+; CHECK:   unreachable
+
+; CHECK: [[cleanup1]]:
+; CHECK:   %[[cleanuppad1:.*]] = cleanuppad within none []
+; CHECK:   switch i64 %val, label %[[cleanupdone2:.*]] [
+; CHECK:     i64 0, label %[[cleanupdone1:.*]]
+; CHECK:     i64 1, label %[[cleanupdone1]]
+; CHECK:     i64 6, label %[[cleanupdone1]]
+; CHECK:   ]
+
+; CHECK: [[cleanupdone1]]:
+; CHECK:   cleanupret from %[[cleanuppad1]] unwind label %[[cleanup2:.*]]
+
+; CHECK: [[cleanupdone2]]:
+; CHECK:   cleanupret from %[[cleanuppad1]] unwind label %[[cleanup2]]
+
+; CHECK: [[cleanup2]]:
+; CHECK:   %[[phi:.*]] = phi i1 [ true, %[[cleanupdone1]] ], [ false, %[[cleanupdone2]] ]
+; CHECK:   %[[cleanuppad2:.*]] = cleanuppad within none []
+; CHECK:   call void @throw(i1 %[[phi]]) [ "funclet"(token %[[cleanuppad2]]) ]
+; CHECK:   unreachable
+
 declare i32 @__CxxFrameHandler3(...)
diff --git a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
index 804882fe76138..849f55f6f392b 100644
--- a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
+++ b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll
@@ -119,12 +119,11 @@ three:
 ; Optimization shouldn't trigger; not an arithmetic progression
 define i32 @test4(i32 %a) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
-; CHECK-NEXT:    switch i32 [[TMP1]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 0, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 5, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 8, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 12, label [[THREE]]
+; CHECK-NEXT:    switch i32 [[A:%.*]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 97, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 102, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 105, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 109, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
@@ -157,12 +156,11 @@ three:
 ; Optimization shouldn't trigger; not a power of two
 define i32 @test5(i32 %a) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 97
-; CHECK-NEXT:    switch i32 [[TMP1]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 0, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 5, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 10, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 15, label [[THREE]]
+; CHECK-NEXT:    switch i32 [[A:%.*]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 97, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 102, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 107, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 112, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
@@ -234,10 +232,10 @@ three:
 
 define i8 @test7(i8 %a) optsize {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[A:%.*]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i8 [[A]], 6
-; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub i8 [[TMP3]], 55
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 [[A:%.*]], -36
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i8 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i8 [[TMP1]], 6
+; CHECK-NEXT:    [[TMP4:%.*]] = or i8 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP4]], 4
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[SWITCH_LOOKUP:%.*]], label [[DEF:%.*]]
 ; CHECK:       switch.lookup:
@@ -309,14 +307,15 @@ three:
 
 define i32 @test9(i32 %a) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[A]], 31
-; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    switch i32 [[TMP3]], label [[DEF:%.*]] [
-; CHECK-NEXT:    i32 9, label [[ONE:%.*]]
-; CHECK-NEXT:    i32 10, label [[TWO:%.*]]
-; CHECK-NEXT:    i32 3, label [[THREE:%.*]]
-; CHECK-NEXT:    i32 5, label [[THREE]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], 6
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    switch i32 [[TMP4]], label [[DEF:%.*]] [
+; CHECK-NEXT:    i32 6, label [[ONE:%.*]]
+; CHECK-NEXT:    i32 7, label [[TWO:%.*]]
+; CHECK-NEXT:    i32 0, label [[THREE:%.*]]
+; CHECK-NEXT:    i32 2, label [[THREE]]
 ; CHECK-NEXT:    ]
 ; CHECK:       def:
 ; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 8867, [[TMP0:%.*]] ], [ 11984, [[ONE]] ], [ 1143, [[TWO]] ], [ 99783, [[THREE]] ]
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 6b74eb2221f48..e5c2ef65b3181 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -1,22 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -S -passes='simplify-cfg<switch-to-lookup>' | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt %s -S -simplifycfg | FileCheck %s
 declare void @foo(i32)
 
 define void @test(i1 %a) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[A_OFF:%.*]] = add i1 [[A:%.*]], true
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i1 [[A_OFF]], true
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       false:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-;
+; CHECK-LABEL: @test
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
   switch i1 %a, label %default [i1 1, label %true
-  i1 0, label %false]
+                                i1 0, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -26,35 +15,14 @@ false:
 default:
   call void @foo(i32 2)
   ret void
-}
+}  
 
 define void @test2(i2 %a) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DEFAULT1:%.*]] [
-; CHECK-NEXT:    i2 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i2 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
-; CHECK-NEXT:    i2 -1, label [[CASE3:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       case0:
-; CHECK-NEXT:    call void @foo(i32 0)
-; CHECK-NEXT:    ret void
-; CHECK:       case1:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       case2:
-; CHECK-NEXT:    call void @foo(i32 2)
-; CHECK-NEXT:    ret void
-; CHECK:       case3:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-; CHECK:       default1:
-; CHECK-NEXT:    unreachable
-;
+; CHECK-LABEL: @test2
   switch i2 %a, label %default [i2 0, label %case0
-  i2 1, label %case1
-  i2 2, label %case2
-  i2 3, label %case3]
+                                i2 1, label %case1
+                                i2 2, label %case2
+                                i2 3, label %case3]
 case0:
   call void @foo(i32 0)
   ret void
@@ -68,35 +36,19 @@ case3:
   call void @foo(i32 3)
   ret void
 default:
+; CHECK-LABEL: default1:
+; CHECK-NEXT: unreachable
   call void @foo(i32 4)
   ret void
-}
+}  
 
 ; This one is a negative test - we know the value of the default,
 ; but that's about it
 define void @test3(i2 %a) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    switch i2 [[A:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i2 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i2 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    i2 -2, label [[CASE2:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       case0:
-; CHECK-NEXT:    call void @foo(i32 0)
-; CHECK-NEXT:    ret void
-; CHECK:       case1:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       case2:
-; CHECK-NEXT:    call void @foo(i32 2)
-; CHECK-NEXT:    ret void
-; CHECK:       default:
-; CHECK-NEXT:    call void @foo(i32 0)
-; CHECK-NEXT:    ret void
-;
+; CHECK-LABEL: @test3
   switch i2 %a, label %default [i2 0, label %case0
-  i2 1, label %case1
-  i2 2, label %case2]
+                                i2 1, label %case1
+                                i2 2, label %case2]
 
 case0:
   call void @foo(i32 0)
@@ -108,30 +60,18 @@ case2:
   call void @foo(i32 2)
   ret void
 default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
   call void @foo(i32 0)
   ret void
-}
+}  
 
 ; Negative test - check for possible overflow when computing
 ; number of possible cases.
 define void @test4(i128 %a) {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:    switch i128 [[A:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i128 0, label [[CASE0:%.*]]
-; CHECK-NEXT:    i128 1, label [[CASE1:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       case0:
-; CHECK-NEXT:    call void @foo(i32 0)
-; CHECK-NEXT:    ret void
-; CHECK:       case1:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       default:
-; CHECK-NEXT:    call void @foo(i32 0)
-; CHECK-NEXT:    ret void
-;
+; CHECK-LABEL: @test4
   switch i128 %a, label %default [i128 0, label %case0
-  i128 1, label %case1]
+                                  i128 1, label %case1]
 
 case0:
   call void @foo(i32 0)
@@ -140,29 +80,20 @@ case1:
   call void @foo(i32 1)
   ret void
 default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
   call void @foo(i32 0)
   ret void
-}
+}  
 
 ; All but one bit known zero
 define void @test5(i8 %a) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[A:%.*]], 2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], -1
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       false:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-;
-  %cmp = icmp ult i8 %a, 2
+; CHECK-LABEL: @test5
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+  %cmp = icmp ult i8 %a, 2 
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 1, label %true
-  i8 0, label %false]
+                                i8 0, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -172,29 +103,18 @@ false:
 default:
   call void @foo(i32 2)
   ret void
-}
+} 
 
 ;; All but one bit known one
 define void @test6(i8 %a) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], -2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], 1
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       false:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-;
+; CHECK-LABEL: @test6
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
   %and = and i8 %a, 254
-  %cmp = icmp eq i8 %and, 254
+  %cmp = icmp eq i8 %and, 254 
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-  i8 254, label %false]
+                                i8 254, label %false]
 true:
   call void @foo(i32 1)
   ret void
@@ -209,26 +129,15 @@ default:
 ; Check that we can eliminate both dead cases and dead defaults
 ; within a single run of simplify-cfg
 define void @test7(i8 %a) {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], -2
-; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    [[A_OFF:%.*]] = add i8 [[A]], 1
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i8 [[A_OFF]], 1
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; CHECK:       true:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       false:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-;
+; CHECK-LABEL: @test7
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
   %and = and i8 %a, 254
-  %cmp = icmp eq i8 %and, 254
+  %cmp = icmp eq i8 %and, 254 
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-  i8 254, label %false
-  i8 0, label %also_dead]
+                                i8 254, label %false
+                                i8 0, label %also_dead]
 true:
   call void @foo(i32 1)
   ret void
@@ -245,33 +154,17 @@ default:
 
 ;; All but one bit known undef
 ;; Note: This is currently testing an optimization which doesn't trigger. The
-;; case this is protecting against is that a bit could be assumed both zero
+;; case this is protecting against is that a bit could be assumed both zero 
 ;; *or* one given we know it's undef.  ValueTracking doesn't do this today,
 ;; but it doesn't hurt to confirm.
 define void @test8(i8 %a) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[AND:%.*]] = and i8 [[A:%.*]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], undef
-; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    switch i8 [[A]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i8 -1, label [[TRUE:%.*]]
-; CHECK-NEXT:    i8 -2, label [[FALSE:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       true:
-; CHECK-NEXT:    call void @foo(i32 1)
-; CHECK-NEXT:    ret void
-; CHECK:       false:
-; CHECK-NEXT:    call void @foo(i32 3)
-; CHECK-NEXT:    ret void
-; CHECK:       default:
-; CHECK-NEXT:    call void @foo(i32 2)
-; CHECK-NEXT:    ret void
-;
+; CHECK: switch i8
   %and = and i8 %a, 254
   %cmp = icmp eq i8 %and, undef
   call void @llvm.assume(i1 %cmp)
   switch i8 %a, label %default [i8 255, label %true
-  i8 254, label %false]
+                                i8 254, label %false]
 true:
   call void @foo(i32 1)
   ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index 21cecc5c942b7..2d46aac23f61b 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -4,10 +4,10 @@
 define i32 @test1(i32 %x) nounwind {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  a:
-; CHECK-NEXT:    [[I:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[I:%.*]] = shl i32 %x, 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[I]], 24
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 5, i32 0
-; CHECK-NEXT:    ret i32 [[SPEC_SELECT]]
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[COND]], i32 5, i32 0
+; CHECK-NEXT:    ret i32 [[DOT]]
 ;
   %i = shl i32 %x, 1
   switch i32 %i, label %a [
@@ -48,19 +48,12 @@ c:
 
 define i1 @repeated_signbits(i8 %condition) {
 ; CHECK-LABEL: @repeated_signbits(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i8 [[CONDITION:%.*]] to i32
-; CHECK-NEXT:    switch i32 [[SEXT]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT:    i32 0, label [[A:%.*]]
-; CHECK-NEXT:    i32 127, label [[A]]
-; CHECK-NEXT:    i32 -128, label [[A]]
-; CHECK-NEXT:    i32 -1, label [[A]]
+; CHECK:         switch i32
+; CHECK-DAG:     i32 -128, label %a
+; CHECK-DAG:     i32 -1, label %a
+; CHECK-DAG:     i32  0, label %a
+; CHECK-DAG:     i32  127, label %a
 ; CHECK-NEXT:    ]
-; CHECK:       a:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ], [ false, [[DEFAULT]] ]
-; CHECK-NEXT:    ret i1 [[MERGE]]
-; CHECK:       default:
-; CHECK-NEXT:    br label [[A]]
 ;
 entry:
   %sext = sext i8 %condition to i32
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 98c434a5a0ec3..165e5b264aef7 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -1,158 +1,141 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
 
 ; Test basic folding to a conditional branch.
 define i32 @foo(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    br i1 [[EQ]], label [[B:%.*]], label [[SWITCH:%.*]]
-; CHECK:       switch:
-; CHECK-NEXT:    [[LT:%.*]] = icmp slt i64 [[X]], [[Y]]
-; CHECK-NEXT:    br i1 [[LT]], label [[A:%.*]], label [[B]]
-; CHECK:       a:
-; CHECK-NEXT:    tail call void @bees.a() #0
-; CHECK-NEXT:    ret i32 1
-; CHECK:       b:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[SWITCH]] ], [ 2, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    tail call void @bees.b() #0
-; CHECK-NEXT:    ret i32 [[RETVAL]]
-;
 entry:
-  %eq = icmp eq i64 %x, %y
-  br i1 %eq, label %b, label %switch
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %b, label %switch
 switch:
-  %lt = icmp slt i64 %x, %y
-  %qux = select i1 %lt, i32 0, i32 2
-  switch i32 %qux, label %bees [
-  i32 0, label %a
-  i32 1, label %b
-  i32 2, label %b
-  ]
+    %lt = icmp slt i64 %x, %y
+; CHECK: br i1 %lt, label %a, label %b
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %b
+    ]
 a:
-  tail call void @bees.a() nounwind
-  ret i32 1
+    tail call void @bees.a() nounwind
+    ret i32 1
+; CHECK: b:
+; CHECK-NEXT: %retval = phi i32 [ 0, %switch ], [ 2, %entry ]
 b:
-  %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
-  tail call void @bees.b() nounwind
-  ret i32 %retval
+    %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
+    tail call void @bees.b() nounwind
+    ret i32 %retval
+; CHECK-NOT: bees:
 bees:
-  tail call void @llvm.trap() nounwind
-  unreachable
+    tail call void @llvm.trap() nounwind
+    unreachable
 }
 
 ; Test basic folding to an unconditional branch.
 define i32 @bar(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @bar(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @bees.a() #0
-; CHECK-NEXT:    ret i32 0
-;
 entry:
-  %lt = icmp slt i64 %x, %y
-  %qux = select i1 %lt, i32 0, i32 2
-  switch i32 %qux, label %bees [
-  i32 0, label %a
-  i32 1, label %b
-  i32 2, label %a
-  ]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() [[$NUW:#[0-9]+]]
+; CHECK-NEXT: ret i32 0
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %a
+    ]
 a:
-  %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
-  tail call void @bees.a() nounwind
-  ret i32 0
+    %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
+    tail call void @bees.a() nounwind
+    ret i32 0
 b:
-  tail call void @bees.b() nounwind
-  br label %a
+    tail call void @bees.b() nounwind
+    br label %a
 bees:
-  tail call void @llvm.trap() nounwind
-  unreachable
+    tail call void @llvm.trap() nounwind
+    unreachable
 }
 
 ; Test the edge case where both values from the select are the default case.
 define void @bazz(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @bazz(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @bees.b() #0
-; CHECK-NEXT:    ret void
-;
 entry:
-  %lt = icmp slt i64 %x, %y
-  %qux = select i1 %lt, i32 10, i32 12
-  switch i32 %qux, label %b [
-  i32 0, label %a
-  i32 1, label %bees
-  i32 2, label %bees
-  ]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.b() [[$NUW]]
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 10, i32 12
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
 a:
-  tail call void @bees.a() nounwind
-  ret void
+    tail call void @bees.a() nounwind
+    ret void
 b:
-  tail call void @bees.b() nounwind
-  ret void
+    tail call void @bees.b() nounwind
+    ret void
 bees:
-  tail call void @llvm.trap()
-  unreachable
+    tail call void @llvm.trap()
+    unreachable
 }
 
 ; Test the edge case where both values from the select are equal.
 define void @quux(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: @quux(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    tail call void @bees.a() #0
-; CHECK-NEXT:    ret void
-;
 entry:
-  %lt = icmp slt i64 %x, %y
-  %qux = select i1 %lt, i32 0, i32 0
-  switch i32 %qux, label %b [
-  i32 0, label %a
-  i32 1, label %bees
-  i32 2, label %bees
-  ]
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() [[$NUW]]
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 0
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
 a:
-  tail call void @bees.a() nounwind
-  ret void
+    tail call void @bees.a() nounwind
+    ret void
 b:
-  tail call void @bees.b() nounwind
-  ret void
+    tail call void @bees.b() nounwind
+    ret void
 bees:
-  tail call void @llvm.trap()
-  unreachable
+    tail call void @llvm.trap()
+    unreachable
 }
 
 ; A final test, for phi node munging.
 define i32 @xyzzy(i64 %x, i64 %y) {
 ; CHECK-LABEL: @xyzzy(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[LT:%.*]] = icmp slt i64 [[X]], [[Y]]
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[LT]], i32 -1, i32 1
-; CHECK-NEXT:    [[VAL:%.*]] = select i1 [[EQ]], i32 0, i32 [[SPEC_SELECT]]
-; CHECK-NEXT:    ret i32 [[VAL]]
-;
 entry:
-  %eq = icmp eq i64 %x, %y
-  br i1 %eq, label %r, label %cont
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %r, label %cont
 cont:
-  %lt = icmp slt i64 %x, %y
-  %qux = select i1 %lt, i32 0, i32 2
-  switch i32 %qux, label %bees [
-  i32 0, label %a
-  i32 1, label %r
-  i32 2, label %r
-  ]
+; CHECK: %lt = icmp slt i64 %x, %y
+    %lt = icmp slt i64 %x, %y
+; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %r
+        i32 2, label %r
+    ]
 r:
-  %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
-  ret i32 %val
+    %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
+    ret i32 %val
 a:
-  ret i32 -1
+    ret i32 -1
+; CHECK-NOT: bees:
 bees:
-  tail call void @llvm.trap()
-  unreachable
+    tail call void @llvm.trap()
+    unreachable
 }
 
 declare void @llvm.trap() nounwind noreturn
 declare void @bees.a() nounwind
 declare void @bees.b() nounwind
 
+; CHECK: attributes [[$NUW]] = { nounwind }
 ; CHECK: attributes #1 = { cold noreturn nounwind }
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
index c0f6a43fda375..a109b317c7320 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
@@ -1,27 +1,18 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt %s -simplifycfg -S | FileCheck %s
 
 declare i32 @f(i32)
 
 define i32 @basic(i32 %x) {
-; CHECK-LABEL: @basic(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[DEFAULT:%.*]]
-; CHECK:       default:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
-; CHECK-NEXT:    ret i32 [[TMP0]]
-; CHECK:       a:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
+; CHECK-LABEL: @basic
+; CHECK: x.off = add i32 %x, -5
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %a, label %default
 
 entry:
   switch i32 %x, label %default [
-  i32 5, label %a
-  i32 6, label %a
-  i32 7, label %a
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
   ]
 default:
   %0 = call i32 @f(i32 0)
@@ -33,28 +24,20 @@ a:
 
 
 define i32 @unreachable(i32 %x) {
-; CHECK-LABEL: @unreachable(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
-; CHECK-NEXT:    ret i32 [[TMP0]]
-; CHECK:       b:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
+; CHECK-LABEL: @unreachable
+; CHECK: x.off = add i32 %x, -5
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %a, label %b
 
 entry:
   switch i32 %x, label %unreachable [
-  i32 5, label %a
-  i32 6, label %a
-  i32 7, label %a
-  i32 10, label %b
-  i32 20, label %b
-  i32 30, label %b
-  i32 40, label %b
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+    i32 10, label %b
+    i32 20, label %b
+    i32 30, label %b
+    i32 40, label %b
   ]
 unreachable:
   unreachable
@@ -68,28 +51,20 @@ b:
 
 
 define i32 @unreachable2(i32 %x) {
-; CHECK-LABEL: @unreachable2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -5
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[A:%.*]], label [[B:%.*]]
-; CHECK:       a:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @f(i32 0)
-; CHECK-NEXT:    ret i32 [[TMP0]]
-; CHECK:       b:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @f(i32 1)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
+; CHECK-LABEL: @unreachable2
+; CHECK: x.off = add i32 %x, -5
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %a, label %b
 
 entry:
   ; Note: folding the most popular case destination into the default
   ; would prevent switch-to-icmp here.
   switch i32 %x, label %unreachable [
-  i32 5, label %a
-  i32 6, label %a
-  i32 7, label %a
-  i32 10, label %b
-  i32 20, label %b
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+    i32 10, label %b
+    i32 20, label %b
   ]
 unreachable:
   unreachable
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
index 083cfe1ee2996..7dce54d75d697 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create-custom-dl.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 target datalayout="p:40:64:64:32"
 
@@ -7,151 +6,102 @@ declare void @foo1()
 declare void @foo2()
 
 define void @test1(i32 %V) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 17, label [[T:%.*]]
-; CHECK-NEXT:    i32 4, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1(
+; CHECK:  switch i32 %V, label %F [
+; CHECK:    i32 17, label %T
+; CHECK:    i32 4, label %T
+; CHECK:  ]
 }
 
 define void @test1_ptr(i32* %V) {
-; CHECK-LABEL: @test1_ptr(
-; CHECK-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32* [[V:%.*]] to i32
-; CHECK-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 17, label [[T:%.*]]
-; CHECK-NEXT:    i32 4, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
-  %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
 }
 
 define void @test1_ptr_as1(i32 addrspace(1)* %V) {
-; CHECK-LABEL: @test1_ptr_as1(
-; CHECK-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32 addrspace(1)* [[V:%.*]] to i32
-; CHECK-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 17, label [[T:%.*]]
-; CHECK-NEXT:    i32 4, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
-  %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
 }
 
 define void @test2(i32 %V) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[T:%.*]] [
-; CHECK-NEXT:    i32 17, label [[F:%.*]]
-; CHECK-NEXT:    i32 4, label [[F]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
-  %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
-  %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test2(
+; CHECK:  switch i32 %V, label %T [
+; CHECK:    i32 17, label %F
+; CHECK:    i32 4, label %F
+; CHECK:  ]
 }
 
 define void @test3(i32 %V) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 4, label [[T:%.*]]
-; CHECK-NEXT:    i32 17, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-  br i1 %C1, label %T, label %N
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        br i1 %C1, label %T, label %N
 N:              ; preds = %0
-  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-  br i1 %C2, label %T, label %F
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        br i1 %C2, label %T, label %F
 T:              ; preds = %N, %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %N
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
 
+; CHECK-LABEL: @test3(
+; CHECK: switch i32 %V, label %F [
+; CHECK:     i32 4, label %T
+; CHECK:     i32 17, label %T
+; CHECK:   ]
 }
 
 
 define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[LOR_EXT]]
-;
 entry:
   %cmp = icmp eq i8 %c, 62
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -169,28 +119,20 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
+; CHECK-LABEL: @test4(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
 }
 
 define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[LOR_EXT]]
-;
 entry:
   switch i8 %c, label %lor.rhs [
-  i8 62, label %lor.end
-  i8 34, label %lor.end
-  i8 92, label %lor.end
+    i8 62, label %lor.end
+    i8 34, label %lor.end
+    i8 92, label %lor.end
   ]
 
 lor.rhs:                                          ; preds = %entry
@@ -201,63 +143,48 @@ lor.end:                                          ; preds = %entry, %entry, %ent
   %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
+; CHECK-LABEL: @test5(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
 }
 
 
 define i1 @test6({ i32, i32 }* %I) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP_1_I:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[I:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP_2_I:%.*]] = load i32, i32* [[TMP_1_I]]
-; CHECK-NEXT:    [[TMP_2_I_OFF:%.*]] = add i32 [[TMP_2_I]], -14
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP_2_I_OFF]], 6
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
-; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
-;
 entry:
-  %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-  %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
-  %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-  br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+        %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
 shortcirc_next.0:               ; preds = %entry
-  %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-  br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
 shortcirc_next.1:               ; preds = %shortcirc_next.0
-  %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-  br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
 shortcirc_next.2:               ; preds = %shortcirc_next.1
-  %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-  br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
 shortcirc_next.3:               ; preds = %shortcirc_next.2
-  %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-  br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
 shortcirc_next.4:               ; preds = %shortcirc_next.3
-  %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-  br label %UnifiedReturnBlock
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
 shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-  br label %UnifiedReturnBlock
+        br label %UnifiedReturnBlock
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-  %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-  ret i1 %UnifiedRetVal
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
 
+; CHECK-LABEL: @test6(
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 }
 
 define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i8 99, label [[IF_THEN]]
-; CHECK-NEXT:    i8 97, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @foo1() #2
-; CHECK-NEXT:    ret void
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ult i32 %x, 32
   %cmp4 = icmp eq i8 %c, 97
@@ -273,27 +200,17 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret void
 
+; CHECK-LABEL: @test7(
+; CHECK:   %cmp = icmp ult i32 %x, 32
+; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
 }
 
 define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[N:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       N:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i8 99, label [[IF_THEN]]
-; CHECK-NEXT:    i8 97, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 42, [[SWITCH_EARLY_TEST]] ], [ 42, [[N]] ], [ 42, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    tail call void @foo1() #2
-; CHECK-NEXT:    ret i32 [[A]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret i32 0
-;
 entry:
   br i1 %C, label %N, label %if.then
 N:
@@ -312,33 +229,17 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret i32 0
 
+; CHECK-LABEL: @test8(
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
 }
 
 ;; This is "Example 7" from http://blog.regehr.org/archives/320
 define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    i8 62, label [[LOR_END]]
-; CHECK-NEXT:    i8 60, label [[LOR_END]]
-; CHECK-NEXT:    i8 59, label [[LOR_END]]
-; CHECK-NEXT:    i8 58, label [[LOR_END]]
-; CHECK-NEXT:    i8 46, label [[LOR_END]]
-; CHECK-NEXT:    i8 44, label [[LOR_END]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 39, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[SWITCH_EARLY_TEST]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY:%.*]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    [[CONV46:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[CONV46]]
-;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -384,23 +285,25 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
 
-
+; CHECK-LABEL: @test9(
+; CHECK:   %cmp = icmp ult i8 %c, 33
+; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
+
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %lor.rhs [
+; CHECK:     i8 92, label %lor.end
+; CHECK:     i8 62, label %lor.end
+; CHECK:     i8 60, label %lor.end
+; CHECK:     i8 59, label %lor.end
+; CHECK:     i8 58, label %lor.end
+; CHECK:     i8 46, label %lor.end
+; CHECK:     i8 44, label %lor.end
+; CHECK:     i8 34, label %lor.end
+; CHECK:     i8 39, label %lor.end
+; CHECK:   ]
 }
 
 define i32 @test10(i32 %mode, i1 %Cond) {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH_EARLY_TEST:%.*]], label [[F:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i32 [[MODE:%.*]], label [[T:%.*]] [
-; CHECK-NEXT:    i32 51, label [[F]]
-; CHECK-NEXT:    i32 0, label [[F]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 123, [[SWITCH_EARLY_TEST]] ], [ 324, [[F]] ]
-; CHECK-NEXT:    ret i32 [[MERGE]]
-; CHECK:       F:
-; CHECK-NEXT:    br label [[T]]
-;
   %A = icmp ne i32 %mode, 0
   %B = icmp ne i32 %mode, 51
   %C = and i1 %A, %B
@@ -411,27 +314,17 @@ T:
 F:
   ret i32 324
 
+; CHECK-LABEL: @test10(
+; CHECK:  br i1 %Cond, label %switch.early.test, label %F
+; CHECK:switch.early.test:
+; CHECK:  switch i32 %mode, label %T [
+; CHECK:    i32 51, label %F
+; CHECK:    i32 0, label %F
+; CHECK:  ]
 }
 
 ; PR8780
 define i32 @test11(i32 %bar) nounwind {
-; CHECK-LABEL: @test11(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[BAR:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 55, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 53, label [[RETURN]]
-; CHECK-NEXT:    i32 35, label [[RETURN]]
-; CHECK-NEXT:    i32 24, label [[RETURN]]
-; CHECK-NEXT:    i32 23, label [[RETURN]]
-; CHECK-NEXT:    i32 12, label [[RETURN]]
-; CHECK-NEXT:    i32 4, label [[RETURN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.end:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
-;
 entry:
   %cmp = icmp eq i32 %bar, 4
   %cmp2 = icmp eq i32 %bar, 35
@@ -460,21 +353,19 @@ return:                                           ; preds = %if.end, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
   ret i32 %retval.0
 
+; CHECK-LABEL: @test11(
+; CHECK: switch i32 %bar, label %if.end [
+; CHECK:   i32 55, label %return
+; CHECK:   i32 53, label %return
+; CHECK:   i32 35, label %return
+; CHECK:   i32 24, label %return
+; CHECK:   i32 23, label %return
+; CHECK:   i32 12, label %return
+; CHECK:   i32 4, label %return
+; CHECK: ]
 }
 
 define void @test12() nounwind {
-; CHECK-LABEL: @test12(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_OLD:%.*]] = icmp eq i32 undef, undef
-; CHECK-NEXT:    br i1 [[A_OLD]], label [[BB55_US_US:%.*]], label [[MALFORMED:%.*]]
-; CHECK:       bb55.us.us:
-; CHECK-NEXT:    [[B:%.*]] = icmp ugt i32 undef, undef
-; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 undef, undef
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[B]], [[A]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]]
-; CHECK:       malformed:
-; CHECK-NEXT:    ret void
-;
 entry:
   br label %bb49.us.us
 
@@ -491,26 +382,12 @@ bb55.us.us:
 
 malformed:
   ret void
+; CHECK-LABEL: @test12(
 
 }
 
 ; test13 - handle switch formation with ult.
 define void @test13(i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test13(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
-; CHECK-NEXT:    i32 4, label [[IF_THEN]]
-; CHECK-NEXT:    i32 3, label [[IF_THEN]]
-; CHECK-NEXT:    i32 1, label [[IF_THEN]]
-; CHECK-NEXT:    i32 0, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    call void @foo1() #3
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ult i32 %x, 2
   br i1 %cmp, label %if.then, label %lor.lhs.false3
@@ -533,26 +410,18 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
+; CHECK-LABEL: @test13(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
 }
 
 ; test14 - handle switch formation with ult.
 define void @test14(i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test14(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
-; CHECK-NEXT:    i32 4, label [[IF_THEN]]
-; CHECK-NEXT:    i32 3, label [[IF_THEN]]
-; CHECK-NEXT:    i32 2, label [[IF_THEN]]
-; CHECK-NEXT:    i32 1, label [[IF_THEN]]
-; CHECK-NEXT:    i32 0, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    call void @foo1() #3
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ugt i32 %x, 2
   br i1 %cmp, label %lor.lhs.false3, label %if.then
@@ -575,15 +444,18 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
+; CHECK-LABEL: @test14(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
 }
 
 ; Don't crash on ginormous ranges.
 define void @test15(i128 %x) nounwind {
-; CHECK-LABEL: @test15(
-; CHECK-NEXT:  if.end:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i128 [[X:%.*]], 2
-; CHECK-NEXT:    ret void
-;
   %cmp = icmp ugt i128 %x, 2
   br i1 %cmp, label %if.end, label %lor.false
 
@@ -598,19 +470,18 @@ if.then:
 if.end:
   ret void
 
+; CHECK-LABEL: @test15(
+; CHECK-NOT: switch
+; CHECK: ret void
 }
 
 ; PR8675
 ; rdar://5134905
 define zeroext i1 @test16(i32 %x) nounwind {
-; CHECK-LABEL: @test16(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
-; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
-;
 entry:
+; CHECK-LABEL: @test16(
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
   %cmp.i = icmp eq i32 %x, 1
   br i1 %cmp.i, label %lor.end, label %lor.lhs.false
 
@@ -629,17 +500,6 @@ lor.end:
 
 ; Check that we don't turn an icmp into a switch where it's not useful.
 define void @test17(i32 %x, i32 %y) {
-; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[Y:%.*]], 2
-; CHECK-NEXT:    [[OR_COND775:%.*]] = or i1 [[CMP]], [[SWITCH]]
-; CHECK-NEXT:    br i1 [[OR_COND775]], label [[LOR_LHS_FALSE8:%.*]], label [[RETURN:%.*]]
-; CHECK:       lor.lhs.false8:
-; CHECK-NEXT:    tail call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
   %cmp = icmp ult i32 %x, 3
   %switch = icmp ult i32 %y, 2
   %or.cond775 = or i1 %cmp, %switch
@@ -652,20 +512,13 @@ lor.lhs.false8:
 return:
   ret void
 
+; CHECK-LABEL: @test17(
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
 }
 
 define void @test18(i32 %arg) {
-; CHECK-LABEL: @test18(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG:%.*]], -8
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[ARG_OFF]], 11
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[BB19:%.*]], label [[BB20:%.*]]
-; CHECK:       bb19:
-; CHECK-NEXT:    tail call void @foo1()
-; CHECK-NEXT:    br label [[BB20]]
-; CHECK:       bb20:
-; CHECK-NEXT:    ret void
-;
 bb:
   %tmp = and i32 %arg, -2
   %tmp1 = icmp eq i32 %tmp, 8
@@ -697,23 +550,12 @@ bb19:                                             ; preds = %bb8, %bb
 bb20:                                             ; preds = %bb19, %bb8
   ret void
 
+; CHECK-LABEL: @test18(
+; CHECK: %arg.off = add i32 %arg, -8
+; CHECK: icmp ult i32 %arg.off, 11
 }
 
 define void @PR26323(i1 %tobool23, i32 %tmp3) {
-; CHECK-LABEL: @PR26323(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp ne i32 [[TMP3:%.*]], 0
-; CHECK-NEXT:    [[NEG14:%.*]] = and i32 [[TMP3]], -2
-; CHECK-NEXT:    [[CMP17:%.*]] = icmp ne i32 [[NEG14]], -1
-; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[TOBOOL5]], [[TOBOOL23:%.*]]
-; CHECK-NEXT:    [[OR_COND1:%.*]] = and i1 [[CMP17]], [[OR_COND]]
-; CHECK-NEXT:    br i1 [[OR_COND1]], label [[IF_END29:%.*]], label [[IF_THEN27:%.*]]
-; CHECK:       if.then27:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    unreachable
-; CHECK:       if.end29:
-; CHECK-NEXT:    ret void
-;
 entry:
   %tobool5 = icmp ne i32 %tmp3, 0
   %neg14 = and i32 %tmp3, -2
@@ -730,19 +572,21 @@ if.end29:                                         ; preds = %entry
   ret void
 }
 
+; CHECK-LABEL: define void @PR26323(
+; CHECK:  %tobool5 = icmp ne i32 %tmp3, 0
+; CHECK:  %neg14 = and i32 %tmp3, -2
+; CHECK:  %cmp17 = icmp ne i32 %neg14, -1
+; CHECK:  %or.cond = and i1 %tobool5, %tobool23
+; CHECK:  %or.cond1 = and i1 %cmp17, %or.cond
+; CHECK:  br i1 %or.cond1, label %if.end29, label %if.then27
+
+; Form a switch when and'ing a negated power of two
+; CHECK-LABEL: define void @test19
+; CHECK: switch i32 %arg, label %else [
+; CHECK: i32 32, label %if
+; CHECK: i32 13, label %if
+; CHECK: i32 12, label %if
 define void @test19(i32 %arg) {
-; CHECK-LABEL: @test19(
-; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[ELSE:%.*]] [
-; CHECK-NEXT:    i32 32, label [[IF:%.*]]
-; CHECK-NEXT:    i32 13, label [[IF]]
-; CHECK-NEXT:    i32 12, label [[IF]]
-; CHECK-NEXT:    ]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 12
   %cmp2 = icmp eq i32 %arg, 32
@@ -758,19 +602,10 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test20
+; CHECK-NOT: switch
+; CHECK: ret void
 define void @test20(i32 %arg) {
-; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], -2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[AND]], 13
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[ARG]], 32
-; CHECK-NEXT:    [[PRED:%.*]] = or i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 13
   %cmp2 = icmp eq i32 %arg, 32
@@ -786,19 +621,11 @@ else:
 }
 
 ; Form a switch when or'ing a power of two
+; CHECK-LABEL: define void @test21
+; CHECK: i32 32, label %else
+; CHECK: i32 13, label %else
+; CHECK: i32 12, label %else
 define void @test21(i32 %arg) {
-; CHECK-LABEL: @test21(
-; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[IF:%.*]] [
-; CHECK-NEXT:    i32 32, label [[ELSE:%.*]]
-; CHECK-NEXT:    i32 13, label [[ELSE]]
-; CHECK-NEXT:    i32 12, label [[ELSE]]
-; CHECK-NEXT:    ]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 13
   %cmp2 = icmp ne i32 %arg, 32
@@ -814,19 +641,10 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test22
+; CHECK-NOT: switch
+; CHECK: ret void
 define void @test22(i32 %arg) {
-; CHECK-LABEL: @test22(
-; CHECK-NEXT:    [[AND:%.*]] = or i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[AND]], 12
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[ARG]], 32
-; CHECK-NEXT:    [[PRED:%.*]] = and i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 12
   %cmp2 = icmp ne i32 %arg, 32
@@ -839,4 +657,4 @@ if:
 
 else:
   ret void
-}
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
index 3314fc982ae77..c752636ae83da 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 ; RUN: opt -S -data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
 
@@ -7,151 +6,102 @@ declare void @foo1()
 declare void @foo2()
 
 define void @test1(i32 %V) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 17, label [[T:%.*]]
-; CHECK-NEXT:    i32 4, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1(
+; CHECK:  switch i32 %V, label %F [
+; CHECK:    i32 17, label %T
+; CHECK:    i32 4, label %T
+; CHECK:  ]
 }
 
 define void @test1_ptr(i32* %V) {
-; DL-LABEL: @test1_ptr(
-; DL-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32* [[V:%.*]] to i32
-; DL-NEXT:    switch i32 [[MAGICPTR]], label [[F:%.*]] [
-; DL-NEXT:    i32 17, label [[T:%.*]]
-; DL-NEXT:    i32 4, label [[T]]
-; DL-NEXT:    ]
-; DL:       T:
-; DL-NEXT:    call void @foo1()
-; DL-NEXT:    ret void
-; DL:       F:
-; DL-NEXT:    call void @foo2()
-; DL-NEXT:    ret void
-;
-  %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
-  %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32* %V, inttoptr (i32 4 to i32*)
+        %C2 = icmp eq i32* %V, inttoptr (i32 17 to i32*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr(
+; DL:  %magicptr = ptrtoint i32* %V to i32
+; DL:  switch i32 %magicptr, label %F [
+; DL:    i32 17, label %T
+; DL:    i32 4, label %T
+; DL:  ]
 }
 
 define void @test1_ptr_as1(i32 addrspace(1)* %V) {
-; DL-LABEL: @test1_ptr_as1(
-; DL-NEXT:    [[MAGICPTR:%.*]] = ptrtoint i32 addrspace(1)* [[V:%.*]] to i16
-; DL-NEXT:    switch i16 [[MAGICPTR]], label [[F:%.*]] [
-; DL-NEXT:    i16 17, label [[T:%.*]]
-; DL-NEXT:    i16 4, label [[T]]
-; DL-NEXT:    ]
-; DL:       T:
-; DL-NEXT:    call void @foo1()
-; DL-NEXT:    ret void
-; DL:       F:
-; DL-NEXT:    call void @foo2()
-; DL-NEXT:    ret void
-;
-  %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
-  %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
-  %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 4 to i32 addrspace(1)*)
+        %C2 = icmp eq i32 addrspace(1)* %V, inttoptr (i32 17 to i32 addrspace(1)*)
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test1_ptr_as1(
+; DL:  %magicptr = ptrtoint i32 addrspace(1)* %V to i16
+; DL:  switch i16 %magicptr, label %F [
+; DL:    i16 17, label %T
+; DL:    i16 4, label %T
+; DL:  ]
 }
 
 define void @test2(i32 %V) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[T:%.*]] [
-; CHECK-NEXT:    i32 17, label [[F:%.*]]
-; CHECK-NEXT:    i32 4, label [[F]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
-  %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
-  %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
-  br i1 %CN, label %T, label %F
+        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
 T:              ; preds = %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %0
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
+; CHECK-LABEL: @test2(
+; CHECK:  switch i32 %V, label %T [
+; CHECK:    i32 17, label %F
+; CHECK:    i32 4, label %F
+; CHECK:  ]
 }
 
 define void @test3(i32 %V) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    switch i32 [[V:%.*]], label [[F:%.*]] [
-; CHECK-NEXT:    i32 4, label [[T:%.*]]
-; CHECK-NEXT:    i32 17, label [[T]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       F:
-; CHECK-NEXT:    call void @foo2()
-; CHECK-NEXT:    ret void
-;
-  %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
-  br i1 %C1, label %T, label %N
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        br i1 %C1, label %T, label %N
 N:              ; preds = %0
-  %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
-  br i1 %C2, label %T, label %F
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        br i1 %C2, label %T, label %F
 T:              ; preds = %N, %0
-  call void @foo1( )
-  ret void
+        call void @foo1( )
+        ret void
 F:              ; preds = %N
-  call void @foo2( )
-  ret void
+        call void @foo2( )
+        ret void
 
+; CHECK-LABEL: @test3(
+; CHECK: switch i32 %V, label %F [
+; CHECK:     i32 4, label %T
+; CHECK:     i32 17, label %T
+; CHECK:   ]
 }
 
 
 define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[LOR_EXT]]
-;
 entry:
   %cmp = icmp eq i8 %c, 62
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -169,28 +119,20 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
 
+; CHECK-LABEL: @test4(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
 }
 
 define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 62, label [[LOR_END:%.*]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY]] ], [ true, [[ENTRY]] ]
-; CHECK-NEXT:    [[LOR_EXT:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[LOR_EXT]]
-;
 entry:
   switch i8 %c, label %lor.rhs [
-  i8 62, label %lor.end
-  i8 34, label %lor.end
-  i8 92, label %lor.end
+    i8 62, label %lor.end
+    i8 34, label %lor.end
+    i8 92, label %lor.end
   ]
 
 lor.rhs:                                          ; preds = %entry
@@ -201,63 +143,48 @@ lor.end:                                          ; preds = %entry, %entry, %ent
   %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
   %lor.ext = zext i1 %0 to i32
   ret i32 %lor.ext
+; CHECK-LABEL: @test5(
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
 }
 
 
 define i1 @test6({ i32, i32 }* %I) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP_1_I:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[I:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP_2_I:%.*]] = load i32, i32* [[TMP_1_I]]
-; CHECK-NEXT:    [[TMP_2_I_OFF:%.*]] = add i32 [[TMP_2_I]], -14
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[TMP_2_I_OFF]], 6
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
-; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
-;
 entry:
-  %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-  %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
-  %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-  br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+        %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
 shortcirc_next.0:               ; preds = %entry
-  %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-  br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
 shortcirc_next.1:               ; preds = %shortcirc_next.0
-  %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-  br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
 shortcirc_next.2:               ; preds = %shortcirc_next.1
-  %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-  br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
 shortcirc_next.3:               ; preds = %shortcirc_next.2
-  %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-  br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
 shortcirc_next.4:               ; preds = %shortcirc_next.3
-  %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-  br label %UnifiedReturnBlock
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
 shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-  br label %UnifiedReturnBlock
+        br label %UnifiedReturnBlock
 UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-  %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-  ret i1 %UnifiedRetVal
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
 
+; CHECK-LABEL: @test6(
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
 }
 
 define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i8 99, label [[IF_THEN]]
-; CHECK-NEXT:    i8 97, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @foo1() #2
-; CHECK-NEXT:    ret void
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ult i32 %x, 32
   %cmp4 = icmp eq i8 %c, 97
@@ -273,27 +200,17 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret void
 
+; CHECK-LABEL: @test7(
+; CHECK:   %cmp = icmp ult i32 %x, 32
+; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
 }
 
 define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[N:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       N:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 32
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i8 99, label [[IF_THEN]]
-; CHECK-NEXT:    i8 97, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 42, [[SWITCH_EARLY_TEST]] ], [ 42, [[N]] ], [ 42, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    tail call void @foo1() #2
-; CHECK-NEXT:    ret i32 [[A]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret i32 0
-;
 entry:
   br i1 %C, label %N, label %if.then
 N:
@@ -312,33 +229,17 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry
   ret i32 0
 
+; CHECK-LABEL: @test8(
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
 }
 
 ;; This is "Example 7" from http://blog.regehr.org/archives/320
 define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
-; CHECK-LABEL: @test9(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[C:%.*]], 33
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOR_END:%.*]], label [[SWITCH_EARLY_TEST:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i8 [[C]], label [[LOR_RHS:%.*]] [
-; CHECK-NEXT:    i8 92, label [[LOR_END]]
-; CHECK-NEXT:    i8 62, label [[LOR_END]]
-; CHECK-NEXT:    i8 60, label [[LOR_END]]
-; CHECK-NEXT:    i8 59, label [[LOR_END]]
-; CHECK-NEXT:    i8 58, label [[LOR_END]]
-; CHECK-NEXT:    i8 46, label [[LOR_END]]
-; CHECK-NEXT:    i8 44, label [[LOR_END]]
-; CHECK-NEXT:    i8 34, label [[LOR_END]]
-; CHECK-NEXT:    i8 39, label [[LOR_END]]
-; CHECK-NEXT:    ]
-; CHECK:       lor.rhs:
-; CHECK-NEXT:    br label [[LOR_END]]
-; CHECK:       lor.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, [[SWITCH_EARLY_TEST]] ], [ false, [[LOR_RHS]] ], [ true, [[ENTRY:%.*]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ], [ true, [[SWITCH_EARLY_TEST]] ]
-; CHECK-NEXT:    [[CONV46:%.*]] = zext i1 [[TMP0]] to i32
-; CHECK-NEXT:    ret i32 [[CONV46]]
-;
 entry:
   %cmp = icmp ult i8 %c, 33
   br i1 %cmp, label %lor.end, label %lor.lhs.false
@@ -384,23 +285,25 @@ lor.end:                                          ; preds = %lor.rhs, %lor.lhs.f
   %conv46 = zext i1 %0 to i32
   ret i32 %conv46
 
-
+; CHECK-LABEL: @test9(
+; CHECK:   %cmp = icmp ult i8 %c, 33
+; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
+
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %lor.rhs [
+; CHECK:     i8 92, label %lor.end
+; CHECK:     i8 62, label %lor.end
+; CHECK:     i8 60, label %lor.end
+; CHECK:     i8 59, label %lor.end
+; CHECK:     i8 58, label %lor.end
+; CHECK:     i8 46, label %lor.end
+; CHECK:     i8 44, label %lor.end
+; CHECK:     i8 34, label %lor.end
+; CHECK:     i8 39, label %lor.end
+; CHECK:   ]
 }
 
 define i32 @test10(i32 %mode, i1 %Cond) {
-; CHECK-LABEL: @test10(
-; CHECK-NEXT:    br i1 [[COND:%.*]], label [[SWITCH_EARLY_TEST:%.*]], label [[F:%.*]]
-; CHECK:       switch.early.test:
-; CHECK-NEXT:    switch i32 [[MODE:%.*]], label [[T:%.*]] [
-; CHECK-NEXT:    i32 51, label [[F]]
-; CHECK-NEXT:    i32 0, label [[F]]
-; CHECK-NEXT:    ]
-; CHECK:       T:
-; CHECK-NEXT:    [[MERGE:%.*]] = phi i32 [ 123, [[SWITCH_EARLY_TEST]] ], [ 324, [[F]] ]
-; CHECK-NEXT:    ret i32 [[MERGE]]
-; CHECK:       F:
-; CHECK-NEXT:    br label [[T]]
-;
   %A = icmp ne i32 %mode, 0
   %B = icmp ne i32 %mode, 51
   %C = and i1 %A, %B
@@ -411,27 +314,17 @@ T:
 F:
   ret i32 324
 
+; CHECK-LABEL: @test10(
+; CHECK:  br i1 %Cond, label %switch.early.test, label %F
+; CHECK:switch.early.test:
+; CHECK:  switch i32 %mode, label %T [
+; CHECK:    i32 51, label %F
+; CHECK:    i32 0, label %F
+; CHECK:  ]
 }
 
 ; PR8780
 define i32 @test11(i32 %bar) nounwind {
-; CHECK-LABEL: @test11(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[BAR:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 55, label [[RETURN:%.*]]
-; CHECK-NEXT:    i32 53, label [[RETURN]]
-; CHECK-NEXT:    i32 35, label [[RETURN]]
-; CHECK-NEXT:    i32 24, label [[RETURN]]
-; CHECK-NEXT:    i32 23, label [[RETURN]]
-; CHECK-NEXT:    i32 12, label [[RETURN]]
-; CHECK-NEXT:    i32 4, label [[RETURN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.end:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ]
-; CHECK-NEXT:    ret i32 [[RETVAL_0]]
-;
 entry:
   %cmp = icmp eq i32 %bar, 4
   %cmp2 = icmp eq i32 %bar, 35
@@ -460,21 +353,19 @@ return:                                           ; preds = %if.end, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
   ret i32 %retval.0
 
+; CHECK-LABEL: @test11(
+; CHECK: switch i32 %bar, label %if.end [
+; CHECK:   i32 55, label %return
+; CHECK:   i32 53, label %return
+; CHECK:   i32 35, label %return
+; CHECK:   i32 24, label %return
+; CHECK:   i32 23, label %return
+; CHECK:   i32 12, label %return
+; CHECK:   i32 4, label %return
+; CHECK: ]
 }
 
 define void @test12() nounwind {
-; CHECK-LABEL: @test12(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_OLD:%.*]] = icmp eq i32 undef, undef
-; CHECK-NEXT:    br i1 [[A_OLD]], label [[BB55_US_US:%.*]], label [[MALFORMED:%.*]]
-; CHECK:       bb55.us.us:
-; CHECK-NEXT:    [[B:%.*]] = icmp ugt i32 undef, undef
-; CHECK-NEXT:    [[A:%.*]] = icmp eq i32 undef, undef
-; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[B]], [[A]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB55_US_US]], label [[MALFORMED]]
-; CHECK:       malformed:
-; CHECK-NEXT:    ret void
-;
 entry:
   br label %bb49.us.us
 
@@ -491,26 +382,12 @@ bb55.us.us:
 
 malformed:
   ret void
+; CHECK-LABEL: @test12(
 
 }
 
 ; test13 - handle switch formation with ult.
 define void @test13(i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test13(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
-; CHECK-NEXT:    i32 4, label [[IF_THEN]]
-; CHECK-NEXT:    i32 3, label [[IF_THEN]]
-; CHECK-NEXT:    i32 1, label [[IF_THEN]]
-; CHECK-NEXT:    i32 0, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    call void @foo1() #3
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ult i32 %x, 2
   br i1 %cmp, label %if.then, label %lor.lhs.false3
@@ -533,26 +410,18 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
+; CHECK-LABEL: @test13(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
 }
 
 ; test14 - handle switch formation with ult.
 define void @test14(i32 %x) nounwind ssp noredzone {
-; CHECK-LABEL: @test14(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT:    i32 6, label [[IF_THEN:%.*]]
-; CHECK-NEXT:    i32 4, label [[IF_THEN]]
-; CHECK-NEXT:    i32 3, label [[IF_THEN]]
-; CHECK-NEXT:    i32 2, label [[IF_THEN]]
-; CHECK-NEXT:    i32 1, label [[IF_THEN]]
-; CHECK-NEXT:    i32 0, label [[IF_THEN]]
-; CHECK-NEXT:    ]
-; CHECK:       if.then:
-; CHECK-NEXT:    call void @foo1() #3
-; CHECK-NEXT:    br label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    ret void
-;
 entry:
   %cmp = icmp ugt i32 %x, 2
   br i1 %cmp, label %lor.lhs.false3, label %if.then
@@ -575,15 +444,18 @@ if.then:                                          ; preds = %lor.lhs.false9, %lo
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false9
   ret void
+; CHECK-LABEL: @test14(
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
 }
 
 ; Don't crash on ginormous ranges.
 define void @test15(i128 %x) nounwind {
-; CHECK-LABEL: @test15(
-; CHECK-NEXT:  if.end:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i128 [[X:%.*]], 2
-; CHECK-NEXT:    ret void
-;
   %cmp = icmp ugt i128 %x, 2
   br i1 %cmp, label %if.end, label %lor.false
 
@@ -598,19 +470,18 @@ if.then:
 if.end:
   ret void
 
+; CHECK-LABEL: @test15(
+; CHECK-NOT: switch
+; CHECK: ret void
 }
 
 ; PR8675
 ; rdar://5134905
 define zeroext i1 @test16(i32 %x) nounwind {
-; CHECK-LABEL: @test16(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_OFF:%.*]] = add i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[X_OFF]], 3
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[SWITCH]], i1 true, i1 false
-; CHECK-NEXT:    ret i1 [[SPEC_SELECT]]
-;
 entry:
+; CHECK-LABEL: @test16(
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
   %cmp.i = icmp eq i32 %x, 1
   br i1 %cmp.i, label %lor.end, label %lor.lhs.false
 
@@ -629,17 +500,6 @@ lor.end:
 
 ; Check that we don't turn an icmp into a switch where it's not useful.
 define void @test17(i32 %x, i32 %y) {
-; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 3
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[Y:%.*]], 2
-; CHECK-NEXT:    [[OR_COND775:%.*]] = or i1 [[CMP]], [[SWITCH]]
-; CHECK-NEXT:    br i1 [[OR_COND775]], label [[LOR_LHS_FALSE8:%.*]], label [[RETURN:%.*]]
-; CHECK:       lor.lhs.false8:
-; CHECK-NEXT:    tail call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
   %cmp = icmp ult i32 %x, 3
   %switch = icmp ult i32 %y, 2
   %or.cond775 = or i1 %cmp, %switch
@@ -652,20 +512,13 @@ lor.lhs.false8:
 return:
   ret void
 
+; CHECK-LABEL: @test17(
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
 }
 
 define void @test18(i32 %arg) {
-; CHECK-LABEL: @test18(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG:%.*]], -8
-; CHECK-NEXT:    [[SWITCH:%.*]] = icmp ult i32 [[ARG_OFF]], 11
-; CHECK-NEXT:    br i1 [[SWITCH]], label [[BB19:%.*]], label [[BB20:%.*]]
-; CHECK:       bb19:
-; CHECK-NEXT:    tail call void @foo1()
-; CHECK-NEXT:    br label [[BB20]]
-; CHECK:       bb20:
-; CHECK-NEXT:    ret void
-;
 bb:
   %tmp = and i32 %arg, -2
   %tmp1 = icmp eq i32 %tmp, 8
@@ -697,23 +550,12 @@ bb19:                                             ; preds = %bb8, %bb
 bb20:                                             ; preds = %bb19, %bb8
   ret void
 
+; CHECK-LABEL: @test18(
+; CHECK: %arg.off = add i32 %arg, -8
+; CHECK: icmp ult i32 %arg.off, 11
 }
 
 define void @PR26323(i1 %tobool23, i32 %tmp3) {
-; CHECK-LABEL: @PR26323(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp ne i32 [[TMP3:%.*]], 0
-; CHECK-NEXT:    [[NEG14:%.*]] = and i32 [[TMP3]], -2
-; CHECK-NEXT:    [[CMP17:%.*]] = icmp ne i32 [[NEG14]], -1
-; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[TOBOOL5]], [[TOBOOL23:%.*]]
-; CHECK-NEXT:    [[OR_COND1:%.*]] = and i1 [[CMP17]], [[OR_COND]]
-; CHECK-NEXT:    br i1 [[OR_COND1]], label [[IF_END29:%.*]], label [[IF_THEN27:%.*]]
-; CHECK:       if.then27:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    unreachable
-; CHECK:       if.end29:
-; CHECK-NEXT:    ret void
-;
 entry:
   %tobool5 = icmp ne i32 %tmp3, 0
   %neg14 = and i32 %tmp3, -2
@@ -730,20 +572,21 @@ if.end29:                                         ; preds = %entry
   ret void
 }
 
+; CHECK-LABEL: define void @PR26323(
+; CHECK:  %tobool5 = icmp ne i32 %tmp3, 0
+; CHECK:  %neg14 = and i32 %tmp3, -2
+; CHECK:  %cmp17 = icmp ne i32 %neg14, -1
+; CHECK:  %or.cond = and i1 %tobool5, %tobool23
+; CHECK:  %or.cond1 = and i1 %cmp17, %or.cond
+; CHECK:  br i1 %or.cond1, label %if.end29, label %if.then27
+
 ; Form a switch when and'ing a negated power of two
+; CHECK-LABEL: define void @test19
+; CHECK: switch i32 %arg, label %else [
+; CHECK: i32 32, label %if
+; CHECK: i32 13, label %if
+; CHECK: i32 12, label %if
 define void @test19(i32 %arg) {
-; CHECK-LABEL: @test19(
-; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[ELSE:%.*]] [
-; CHECK-NEXT:    i32 32, label [[IF:%.*]]
-; CHECK-NEXT:    i32 13, label [[IF]]
-; CHECK-NEXT:    i32 12, label [[IF]]
-; CHECK-NEXT:    ]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 12
   %cmp2 = icmp eq i32 %arg, 32
@@ -759,19 +602,10 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test20
+; CHECK-NOT: switch
+; CHECK: ret void
 define void @test20(i32 %arg) {
-; CHECK-LABEL: @test20(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], -2
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[AND]], 13
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[ARG]], 32
-; CHECK-NEXT:    [[PRED:%.*]] = or i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = and i32 %arg, -2
   %cmp1 = icmp eq i32 %and, 13
   %cmp2 = icmp eq i32 %arg, 32
@@ -787,19 +621,11 @@ else:
 }
 
 ; Form a switch when or'ing a power of two
+; CHECK-LABEL: define void @test21
+; CHECK: i32 32, label %else
+; CHECK: i32 13, label %else
+; CHECK: i32 12, label %else
 define void @test21(i32 %arg) {
-; CHECK-LABEL: @test21(
-; CHECK-NEXT:    switch i32 [[ARG:%.*]], label [[IF:%.*]] [
-; CHECK-NEXT:    i32 32, label [[ELSE:%.*]]
-; CHECK-NEXT:    i32 13, label [[ELSE]]
-; CHECK-NEXT:    i32 12, label [[ELSE]]
-; CHECK-NEXT:    ]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 13
   %cmp2 = icmp ne i32 %arg, 32
@@ -815,19 +641,10 @@ else:
 }
 
 ; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test22
+; CHECK-NOT: switch
+; CHECK: ret void
 define void @test22(i32 %arg) {
-; CHECK-LABEL: @test22(
-; CHECK-NEXT:    [[AND:%.*]] = or i32 [[ARG:%.*]], 1
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[AND]], 12
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[ARG]], 32
-; CHECK-NEXT:    [[PRED:%.*]] = and i1 [[CMP1]], [[CMP2]]
-; CHECK-NEXT:    br i1 [[PRED]], label [[IF:%.*]], label [[ELSE:%.*]]
-; CHECK:       if:
-; CHECK-NEXT:    call void @foo1()
-; CHECK-NEXT:    ret void
-; CHECK:       else:
-; CHECK-NEXT:    ret void
-;
   %and = or i32 %arg, 1
   %cmp1 = icmp ne i32 %and, 12
   %cmp2 = icmp ne i32 %arg, 32
@@ -840,4 +657,4 @@ if:
 
 else:
   ret void
-}
+}
\ No newline at end of file

From a549dd25607d2c4bf2d3d15576595e0c017b385e Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Sun, 26 May 2019 18:41:35 +0000
Subject: [PATCH 0269/1176] [MCA] Refactor the logic that computes the critical
 memory dependency info. NFCI

CriticalRegDep has been renamed CriticalDependency, and it is now used by class
Instruction to store information about the critical register dependency and the
critical memory dependency. No functional change intendend.

llvm-svn: 361737
---
 llvm/include/llvm/MCA/Instruction.h      | 52 +++++++++--------
 llvm/lib/MCA/HardwareUnits/Scheduler.cpp | 72 ++++++++++++++++++++----
 llvm/lib/MCA/Instruction.cpp             | 24 ++++----
 llvm/lib/MCA/Stages/DispatchStage.cpp    |  3 -
 4 files changed, 103 insertions(+), 48 deletions(-)

diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 9ac1fffb4430c..74be06e3c1c08 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -80,11 +80,10 @@ struct ReadDescriptor {
 
 class ReadState;
 
-/// Longest register dependency.
+/// A critical data dependency descriptor.
 ///
-/// Used internally by WriteState/ReadState/InstructionBase to help with the
-/// computation of the longest register dependency for an instruction.
-struct CriticalRegDep {
+/// Field RegID is set to the invalid register for memory dependencies.
+struct CriticalDependency {
   unsigned IID;
   unsigned RegID;
   unsigned Cycles;
@@ -136,7 +135,7 @@ class WriteState {
   unsigned DependentWriteCyclesLeft;
 
   // Critical register dependency for this write.
-  CriticalRegDep CRD;
+  CriticalDependency CRD;
 
   // A list of dependent reads. Users is a set of dependent
   // reads. A dependent read is added to the set only if CyclesLeft
@@ -166,7 +165,7 @@ class WriteState {
     return DependentWriteCyclesLeft;
   }
   const WriteState *getDependentWrite() const { return DependentWrite; }
-  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
+  const CriticalDependency &getCriticalRegDep() const { return CRD; }
 
   // This method adds Use to the set of data dependent reads. IID is the
   // instruction identifier associated with this write. ReadAdvance is the
@@ -244,7 +243,7 @@ class ReadState {
   // propagated to field CyclesLeft.
   unsigned TotalCycles;
   // Longest register dependency.
-  CriticalRegDep CRD;
+  CriticalDependency CRD;
   // This field is set to true only if there are no dependent writes, and
   // there are no `CyclesLeft' to wait.
   bool IsReady;
@@ -263,7 +262,7 @@ class ReadState {
   unsigned getSchedClass() const { return RD->SchedClassID; }
   unsigned getRegisterID() const { return RegisterID; }
   unsigned getRegisterFileID() const { return PRFID; }
-  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
+  const CriticalDependency &getCriticalRegDep() const { return CRD; }
 
   bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
   bool isReady() const { return IsReady; }
@@ -405,12 +404,8 @@ class InstructionBase {
   // One entry per each implicit and explicit register use.
   SmallVector<ReadState, 4> Uses;
 
-  // Critical register dependency.
-  CriticalRegDep CRD;
-
 public:
-  InstructionBase(const InstrDesc &D)
-      : Desc(D), IsOptimizableMove(false), CRD() {}
+  InstructionBase(const InstrDesc &D) : Desc(D), IsOptimizableMove(false) {}
 
   SmallVectorImpl<WriteState> &getDefs() { return Defs; }
   const ArrayRef<WriteState> getDefs() const { return Defs; }
@@ -420,9 +415,6 @@ class InstructionBase {
 
   unsigned getLatency() const { return Desc.MaxLatency; }
 
-  const CriticalRegDep &getCriticalRegDep() const { return CRD; }
-  const CriticalRegDep &computeCriticalRegDep();
-
   bool hasDependentUsers() const {
     return any_of(Defs,
                   [](const WriteState &Def) { return Def.getNumUsers() > 0; });
@@ -466,14 +458,19 @@ class Instruction : public InstructionBase {
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
+  // Critical register dependency.
+  CriticalDependency CriticalRegDep;
+
+  // Critical memory dependency.
+  CriticalDependency CriticalMemDep;
+
   // A bitmask of busy processor resource units.
   // This field is set to zero only if execution is not delayed during this
   // cycle because of unavailable pipeline resources.
   uint64_t CriticalResourceMask;
 
-  // An instruction identifier. This field is only set if execution is delayed
-  // by a memory dependency.
-  unsigned CriticalMemDep;
+  // Used internally by the logic that computes the critical memory dependency.
+  const Instruction *CurrentMemDep;
 
   // True if this instruction has been optimized at register renaming stage.
   bool IsEliminated;
@@ -481,8 +478,8 @@ class Instruction : public InstructionBase {
 public:
   Instruction(const InstrDesc &D)
       : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
-        RCUTokenID(0), CriticalResourceMask(0), CriticalMemDep(0),
-        IsEliminated(false) {}
+        RCUTokenID(0), CriticalRegDep(), CriticalMemDep(),
+        CriticalResourceMask(0), CurrentMemDep(nullptr), IsEliminated(false) {}
 
   unsigned getRCUTokenID() const { return RCUTokenID; }
   int getCyclesLeft() const { return CyclesLeft; }
@@ -523,12 +520,21 @@ class Instruction : public InstructionBase {
     Stage = IS_RETIRED;
   }
 
+  const CriticalDependency &getCriticalRegDep() const { return CriticalRegDep; }
+  const CriticalDependency &getCriticalMemDep() const { return CriticalMemDep; }
+  const CriticalDependency &computeCriticalRegDep();
+
+  void setCriticalMemDep(unsigned IID, unsigned Cycles) {
+    CriticalMemDep.IID = IID;
+    CriticalMemDep.Cycles = Cycles;
+  }
+  const Instruction *getCurrentMemDep() const { return CurrentMemDep; }
+  void setCurrentMemDep(const Instruction *CMD) { CurrentMemDep = CMD; }
+
   uint64_t getCriticalResourceMask() const { return CriticalResourceMask; }
-  unsigned getCriticalMemDep() const { return CriticalMemDep; }
   void setCriticalResourceMask(uint64_t ResourceMask) {
     CriticalResourceMask = ResourceMask;
   }
-  void setCriticalMemDep(unsigned IID) { CriticalMemDep = IID; }
 
   void cycleEvent();
 };
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index bf48d9288994c..b2928ed1b1223 100644
--- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -82,6 +82,8 @@ void Scheduler::issueInstructionImpl(
   // This updates the internal state of each write.
   IS->execute(IR.getSourceIndex());
 
+  IS->computeCriticalRegDep();
+
   if (IS->isExecuting())
     IssuedSet.emplace_back(IR);
   else if (IS->isExecuted())
@@ -107,6 +109,59 @@ void Scheduler::issueInstruction(
     promoteToReadySet(ReadyInstructions);
 }
 
+static bool initializeCriticalMemDepInfo(InstRef &IR, const LSUnit &LSU) {
+  Instruction &IS = *IR.getInstruction();
+  assert(IS.isMemOp() && "Not a memory operation!");
+
+  // Check if this instruction depends on another memory operation.
+  InstRef DependentMemOp = LSU.isReady(IR);
+  const Instruction *MemOp = DependentMemOp.getInstruction();
+  IS.setCurrentMemDep(MemOp);
+
+  // Initialize the CriticalMemDep structure.
+  unsigned Cycles = 0;
+  if (MemOp->isExecuting())
+    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
+  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
+  return IR.getSourceIndex() == DependentMemOp.getSourceIndex();
+}
+
+static bool updateMemoryDependencyInfo(InstRef &IR, const LSUnit &LSU) {
+  Instruction &IS = *IR.getInstruction();
+  assert(IS.isMemOp() && "Not a memory operation!");
+
+  const Instruction *MemOp = IS.getCurrentMemDep();
+  if (!MemOp && initializeCriticalMemDepInfo(IR, LSU))
+    return true;
+
+  MemOp = IS.getCurrentMemDep();
+  if (MemOp == IR.getInstruction())
+    return true;
+
+  const CriticalDependency &CMD = IS.getCriticalMemDep();
+  if (MemOp->isExecuting() && !CMD.Cycles) {
+    // Update the critical memory dependency info.
+    IS.setCriticalMemDep(CMD.IID, MemOp->getCyclesLeft());
+    return false;
+  }
+
+  if (!MemOp->isExecuted() && !MemOp->isRetired())
+    return false;
+
+  // Check if there are still unsolved memory dependencies.
+  InstRef DependentMemOp = LSU.isReady(IR);
+  MemOp = DependentMemOp.getInstruction();
+  IS.setCurrentMemDep(MemOp);
+  if (DependentMemOp == IR)
+    return true;
+
+  unsigned Cycles = 0;
+  if (MemOp->isExecuting())
+    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
+  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
+  return false;
+}
+
 bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
   // Scan the set of waiting instructions and promote them to the
   // ready set if operands are all ready.
@@ -116,19 +171,14 @@ bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
     if (!IR)
       break;
 
-    // Check if there are still unsolved memory dependencies.
+    // Check if there are unsolved memory dependencies.
     Instruction &IS = *IR.getInstruction();
-    if (IS.isMemOp()) {
-      const InstRef &CriticalMemDep = LSU.isReady(IR);
-      if (CriticalMemDep != IR) {
-        IS.setCriticalMemDep(CriticalMemDep.getSourceIndex());
-        ++I;
-        continue;
-      }
+    if (IS.isMemOp() && !updateMemoryDependencyInfo(IR, LSU)) {
+      ++I;
+      continue;
     }
 
-    // Check if this instruction is now ready. In case, force
-    // a transition in state using method 'update()'.
+    // Check if there are unsolved register dependencies.
     if (!IS.isReady() && !IS.updatePending()) {
       ++I;
       continue;
@@ -301,7 +351,7 @@ bool Scheduler::dispatch(const InstRef &IR) {
   }
 
   // Memory operations that are not in a ready state are initially assigned to
-  // the WaitSet. 
+  // the WaitSet.
   if (!IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR)) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
     WaitSet.push_back(IR);
diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
index 5e2fb771e4f5f..001842bca3185 100644
--- a/llvm/lib/MCA/Instruction.cpp
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -18,7 +18,8 @@
 namespace llvm {
 namespace mca {
 
-void WriteState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
+void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
+                                 unsigned Cycles) {
   CRD.IID = IID;
   CRD.RegID = RegID;
   CRD.Cycles = Cycles;
@@ -134,23 +135,24 @@ void WriteRef::dump() const {
 }
 #endif
 
-const CriticalRegDep &InstructionBase::computeCriticalRegDep() {
-  if (CRD.Cycles || (Defs.empty() && Uses.empty()))
-    return CRD;
+const CriticalDependency &Instruction::computeCriticalRegDep() {
+  if (CriticalRegDep.Cycles)
+    return CriticalRegDep;
+
   unsigned MaxLatency = 0;
-  for (const WriteState &WS : Defs) {
-    const CriticalRegDep &WriteCRD = WS.getCriticalRegDep();
+  for (const WriteState &WS : getDefs()) {
+    const CriticalDependency &WriteCRD = WS.getCriticalRegDep();
     if (WriteCRD.Cycles > MaxLatency)
-      CRD = WriteCRD;
+      CriticalRegDep = WriteCRD;
   }
 
-  for (const ReadState &RS : Uses) {
-    const CriticalRegDep &ReadCRD = RS.getCriticalRegDep();
+  for (const ReadState &RS : getUses()) {
+    const CriticalDependency &ReadCRD = RS.getCriticalRegDep();
     if (ReadCRD.Cycles > MaxLatency)
-      CRD = ReadCRD;
+      CriticalRegDep = ReadCRD;
   }
 
-  return CRD;
+  return CriticalRegDep;
 }
 
 void Instruction::dispatch(unsigned RCUToken) {
diff --git a/llvm/lib/MCA/Stages/DispatchStage.cpp b/llvm/lib/MCA/Stages/DispatchStage.cpp
index 80d6da09b5e96..7334a268e9a6f 100644
--- a/llvm/lib/MCA/Stages/DispatchStage.cpp
+++ b/llvm/lib/MCA/Stages/DispatchStage.cpp
@@ -102,9 +102,6 @@ Error DispatchStage::dispatch(InstRef IR) {
       IS.setEliminated();
   }
 
-  if (IS.isMemOp())
-    IS.setCriticalMemDep(IR.getSourceIndex());
-
   // A dependency-breaking instruction doesn't have to wait on the register
   // input operands, and it is often optimized at register renaming stage.
   // Update RAW dependencies if this instruction is not a dependency-breaking

From 2916b9e28cabf6a101eef1f17ac8a767d9301d84 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 18:43:44 +0000
Subject: [PATCH 0270/1176] [SelectionDAG] MaskedValueIsZero - add demanded
 elements implementation

Will be used in an upcoming patch but I've updated the original implementation to call this to ensure test coverage.

llvm-svn: 361738
---
 llvm/include/llvm/CodeGen/SelectionDAG.h       | 10 ++++++++--
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 17 +++++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 56dd1ccbb7309..9395fdbcf1ed9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1448,8 +1448,14 @@ class SelectionDAG {
   /// Return true if 'Op & Mask' is known to be zero.  We
   /// use this predicate to simplify operations downstream.  Op and Mask are
   /// known to be the same type.
-  bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth = 0)
-    const;
+  bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                         unsigned Depth = 0) const;
+
+  /// Return true if 'Op & Mask' is known to be zero in DemandedElts.  We
+  /// use this predicate to simplify operations downstream.  Op and Mask are
+  /// known to be the same type.
+  bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                         const APInt &DemandedElts, unsigned Depth = 0) const;
 
   /// Determine which bits of Op are known to be either zero or one and return
   /// them in Known. For vectors, the known bits are those that are shared by
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 553a46f6ec160..88cbfd1d69f74 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2207,9 +2207,22 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
 /// this predicate to simplify operations downstream.  Mask is known to be zero
 /// for bits that V cannot have.
-bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
                                      unsigned Depth) const {
-  return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+  EVT VT = V.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts.  We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+                                     const APInt &DemandedElts,
+                                     unsigned Depth) const {
+  return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
 }
 
 /// isSplatValue - Return true if the vector V has the same value

From 06e02856ab5f9e6ce2a4173b4df6736e6cc8f969 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 26 May 2019 18:58:14 +0000
Subject: [PATCH 0271/1176] [SelectionDAG] GetDemandedBits - cleanup to more
 closely match SimplifyDemandedBits. NFCI.

Prep work before adding demanded elts support.

llvm-svn: 361739
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 37 +++++++++++--------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 88cbfd1d69f74..ad534ab497b4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2123,15 +2123,17 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 
 /// See if the specified operand can be simplified with the knowledge that only
 /// the bits specified by Mask are used.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
   switch (V.getOpcode()) {
   default:
     break;
   case ISD::Constant: {
-    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+    auto *CV = cast<ConstantSDNode>(V.getNode());
     assert(CV && "Const value should be ConstSDNode.");
     const APInt &CVal = CV->getAPIntValue();
-    APInt NewVal = CVal & Mask;
+    APInt NewVal = CVal & DemandedBits;
     if (NewVal != CVal)
       return getConstant(NewVal, SDLoc(V), V.getValueType());
     break;
@@ -2139,24 +2141,25 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
   case ISD::OR:
   case ISD::XOR:
     // If the LHS or RHS don't contribute bits to the or, drop them.
-    if (MaskedValueIsZero(V.getOperand(0), Mask))
+    if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
       return V.getOperand(1);
-    if (MaskedValueIsZero(V.getOperand(1), Mask))
+    if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
       return V.getOperand(0);
     break;
   case ISD::SRL:
     // Only look at single-use SRLs.
     if (!V.getNode()->hasOneUse())
       break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
       // See if we can recursively simplify the LHS.
       unsigned Amt = RHSC->getZExtValue();
 
       // Watch out for shift count overflow though.
-      if (Amt >= Mask.getBitWidth())
+      if (Amt >= DemandedBits.getBitWidth())
         break;
-      APInt NewMask = Mask << Amt;
-      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+      APInt SrcDemandedBits = DemandedBits << Amt;
+      if (SDValue SimplifyLHS =
+              GetDemandedBits(V.getOperand(0), SrcDemandedBits))
         return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
                        V.getOperand(1));
     }
@@ -2166,8 +2169,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
     // Also handle the case where masked out bits in X are known to be zero.
     if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
       const APInt &AndVal = RHSC->getAPIntValue();
-      if (Mask.isSubsetOf(AndVal) ||
-          Mask.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | AndVal))
+      if (DemandedBits.isSubsetOf(AndVal) ||
+          DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
+                                  AndVal))
         return V.getOperand(0);
     }
     break;
@@ -2176,11 +2180,12 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
     SDValue Src = V.getOperand(0);
     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
     // Being conservative here - only peek through if we only demand bits in the
-    // non-extended source (even though the extended bits are technically undef).
-    if (Mask.getActiveBits() > SrcBitWidth)
+    // non-extended source (even though the extended bits are technically
+    // undef).
+    if (DemandedBits.getActiveBits() > SrcBitWidth)
       break;
-    APInt SrcMask = Mask.trunc(SrcBitWidth);
-    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+    APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
+    if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
       return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
     break;
   }
@@ -2189,7 +2194,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
     unsigned ExVTBits = ExVT.getScalarSizeInBits();
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
-    if (Mask.getActiveBits() <= ExVTBits)
+    if (DemandedBits.getActiveBits() <= ExVTBits)
       return V.getOperand(0);
 
     break;

From c2493ce4a40be025054087fde59dd0f339baf6c0 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Sun, 26 May 2019 19:50:31 +0000
Subject: [PATCH 0272/1176] [MCA][Scheduler] Improved critical memory
 dependency computation.

This fixes a problem where back-pressure increases caused by register
dependencies were not correctly notified if execution was also delayed by memory
dependencies.

llvm-svn: 361740
---
 llvm/lib/MCA/HardwareUnits/Scheduler.cpp      | 22 ++++++++++++++-----
 .../llvm-mca/X86/BtVer2/bottleneck-hints-3.s  |  2 +-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index b2928ed1b1223..6b3448fbe82cd 100644
--- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -105,7 +105,13 @@ void Scheduler::issueInstruction(
   // other dependent instructions. Dependent instructions may be issued during
   // this same cycle if operands have ReadAdvance entries.  Promote those
   // instructions to the ReadySet and notify the caller that those are ready.
-  if (HasDependentUsers && promoteToPendingSet(PendingInstructions))
+  // If IR is a memory operation, then always call method `promoteToReadySet()`
+  // to notify any dependent memory operations that IR started execution.
+  bool ShouldPromoteInstructions = Inst.isMemOp();
+  if (HasDependentUsers)
+    ShouldPromoteInstructions |= promoteToPendingSet(PendingInstructions);
+
+  if (ShouldPromoteInstructions)
     promoteToReadySet(ReadyInstructions);
 }
 
@@ -287,15 +293,19 @@ uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) {
 void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
                                         SmallVectorImpl<InstRef> &MemDeps) {
   const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet;
-  for (InstRef &IR : make_range(PendingSet.begin(), EndIt)) {
-    Instruction &IS = *IR.getInstruction();
+  for (const InstRef &IR : make_range(PendingSet.begin(), EndIt)) {
+    const Instruction &IS = *IR.getInstruction();
     if (Resources->checkAvailability(IS.getDesc()))
       continue;
 
-    if (IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR))
-      MemDeps.emplace_back(IR);
-    else
+    const CriticalDependency &CMD = IS.getCriticalMemDep();
+    if (IS.isMemOp() && IS.getCurrentMemDep() != &IS && !CMD.Cycles)
+      continue;
+
+    if (IS.isPending())
       RegDeps.emplace_back(IR);
+    if (CMD.Cycles)
+      MemDeps.emplace_back(IR);
   }
 }
 
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
index 6cd613a52fc2e..bedfef1d95ff7 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
@@ -24,7 +24,7 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: Throughput Bottlenecks:
 # CHECK-NEXT:   Resource Pressure       [ 0.00% ]
 # CHECK-NEXT:   Data Dependencies:      [ 99.89% ]
-# CHECK-NEXT:   - Register Dependencies [ 0.00% ]
+# CHECK-NEXT:   - Register Dependencies [ 83.24% ]
 # CHECK-NEXT:   - Memory Dependencies   [ 99.89% ]
 
 # CHECK:      Instruction Info:

From ba447bae7448435c9986eece0811da1423972fdd Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <Alexander.Timofeev@amd.com>
Date: Sun, 26 May 2019 20:33:26 +0000
Subject: [PATCH 0273/1176]     [AMDGPU] Divergence driven ISel. Assign
 register class for cross block values according to the divergence.

    Details: To make instruction selection really divergence driven it is necessary to assign
             the correct register classes to the cross block values beforehand. For the divergent targets
             same value type requires different register classes dependent on the value divergence.

    Reviewers: rampitec, nhaehnle

    Differential Revision: https://reviews.llvm.org/D59990

    This commit was reverted because of the build failure.
    The reason was mlformed patch.
    Build failure fixed.

llvm-svn: 361741
---
 .../llvm/CodeGen/FunctionLoweringInfo.h       |  11 +-
 llvm/include/llvm/CodeGen/SelectionDAG.h      |   1 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |  11 +-
 .../include/llvm/CodeGen/TargetRegisterInfo.h |   5 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   6 +-
 .../SelectionDAG/FunctionLoweringInfo.cpp     |  14 +-
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp |  33 ++--
 llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h  |   2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   4 +-
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |   2 +-
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp    | 166 +++++++-----------
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  91 +++++++++-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |   5 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  13 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |   5 +
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   4 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |   3 +-
 llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll    |  12 +-
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |   3 +-
 llvm/test/CodeGen/AMDGPU/branch-uniformity.ll |   4 +-
 .../AMDGPU/control-flow-fastregalloc.ll       |   7 +-
 .../divergent-branch-uniform-condition.ll     |  55 +++---
 .../AMDGPU/extract_subvector_vec4_vec3.ll     |   6 +-
 llvm/test/CodeGen/AMDGPU/fabs.ll              |  12 +-
 .../CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll   |  58 +++---
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |   8 +-
 llvm/test/CodeGen/AMDGPU/fneg-fabs.ll         |  16 +-
 llvm/test/CodeGen/AMDGPU/fsub.ll              |  12 +-
 llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll |  10 +-
 .../AMDGPU/i1-copy-phi-uniform-branch.ll      |   1 -
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |   6 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll   |   2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll |   8 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll     |   2 +
 .../AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll     |   2 +-
 .../AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/loop_break.ll        |   8 +-
 llvm/test/CodeGen/AMDGPU/madak.ll             |  12 +-
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |   5 +-
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll  |   5 +-
 llvm/test/CodeGen/AMDGPU/select-opt.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |   3 +-
 .../CodeGen/AMDGPU/si-fix-sgpr-copies.mir     |   2 +-
 llvm/test/CodeGen/AMDGPU/smrd.ll              |   1 -
 .../AMDGPU/subreg-coalescer-undef-use.ll      |  53 ++++--
 .../AMDGPU/uniform-loop-inside-nonuniform.ll  |   5 +-
 .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll |   9 +-
 llvm/test/CodeGen/AMDGPU/valu-i1.ll           |   6 +-
 ...vgpr-spill-emergency-stack-slot-compute.ll |   1 +
 49 files changed, 413 insertions(+), 303 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index b3077fcaabd4f..fb60191abd3a0 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -13,7 +13,6 @@
 
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
-
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
@@ -21,6 +20,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,6 +57,7 @@ class FunctionLoweringInfo {
   const TargetLowering *TLI;
   MachineRegisterInfo *RegInfo;
   BranchProbabilityInfo *BPI;
+  const LegacyDivergenceAnalysis *DA;
   /// CanLowerReturn - true iff the function's return value can be lowered to
   /// registers.
   bool CanLowerReturn;
@@ -198,9 +199,11 @@ class FunctionLoweringInfo {
     return ValueMap.count(V);
   }
 
-  unsigned CreateReg(MVT VT);
+  unsigned CreateReg(MVT VT, bool isDivergent = false);
+
+  unsigned CreateRegs(const Value *V);
 
-  unsigned CreateRegs(Type *Ty);
+  unsigned CreateRegs(Type *Ty, bool isDivergent = false);
 
   unsigned InitializeRegForValue(const Value *V) {
     // Tokens never live in vregs.
@@ -209,7 +212,7 @@ class FunctionLoweringInfo {
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
     assert(VirtReg2Value.empty());
-    return R = CreateRegs(V->getType());
+    return R = CreateRegs(V);
   }
 
   /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 9395fdbcf1ed9..08333870467a4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -406,6 +406,7 @@ class SelectionDAG {
   const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
   const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
   const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
+  const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
   LLVMContext *getContext() const {return Context; }
   OptimizationRemarkEmitter &getORE() const { return *ORE; }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a7b7a7dd6f1ca..cabca9cb22101 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -636,12 +636,21 @@ class TargetLoweringBase {
 
   /// Return the register class that should be used for the specified value
   /// type.
-  virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+  virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
+    (void)isDivergent;
     const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
 
+  /// Allows target to decide about the register class of the
+  /// specific value that is live outside the defining block.
+  /// Returns true if the value needs uniform register class.
+  virtual bool requiresUniformRegister(MachineFunction &MF,
+                                       const Value *) const {
+    return false;
+  }
+
   /// Return the 'representative' register class for the specified value
   /// type.
   ///
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 5ed1e448575fc..7c65e7407d9e2 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -520,6 +520,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
   /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
   virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
 
+  /// Returns true if the register class is considered divergent.
+  virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
+    return false;
+  }
+
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 117654bc7a3f7..d53ee3134d550 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13919,9 +13919,11 @@ struct LoadedSlice {
     assert(DAG && "Missing context");
     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
     EVT ResVT = Use->getValueType(0);
-    const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+    const TargetRegisterClass *ResRC =
+        TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
     const TargetRegisterClass *ArgRC =
-        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+        TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+                           Use->getOperand(0)->isDivergent());
     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
       return false;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index d8ef10f58aa7c..8b405562904f3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -85,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   RegInfo = &MF->getRegInfo();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   unsigned StackAlign = TFI->getStackAlignment();
+  DA = DAG->getDivergenceAnalysis();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -345,9 +346,9 @@ void FunctionLoweringInfo::clear() {
 }
 
 /// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
   return RegInfo->createVirtualRegister(
-      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+      MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
 }
 
 /// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -357,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
 /// In the case that the given value has struct or array type, this function
 /// will assign registers for each member or element.
 ///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -370,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
 
     unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i) {
-      unsigned R = CreateReg(RegisterVT);
+      unsigned R = CreateReg(RegisterVT, isDivergent);
       if (!FirstReg) FirstReg = R;
     }
   }
   return FirstReg;
 }
 
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+  return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+                                      DA->isDivergent(V));
+}
+
 /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
 /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
 /// the register's LiveOutInfo is for a smaller bit width, it is extended to
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 059e5f7c8dd33..4b78d1bb6b160 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -105,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
 
   // Stick to the preferred register classes for legal types.
   if (TLI->isTypeLegal(VT))
-    UseRC = TLI->getRegClassFor(VT);
+    UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
 
   if (!IsClone && !IsCloned)
     for (SDNode *User : Node->uses()) {
@@ -164,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
            "Incompatible phys register def and uses!");
     DstRC = UseRC;
   } else {
-    DstRC = TLI->getRegClassFor(VT);
+    DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
   }
 
   // If all uses are reading from the src physical register and copying the
@@ -225,8 +225,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
     // type correctly. For example, a 64-bit float (X86::FR64) can't live in
     // the 32-bit float super-class (X86::FR32).
     if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
-      const TargetRegisterClass *VTRC =
-        TLI->getRegClassFor(Node->getSimpleValueType(i));
+      const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+          Node->getSimpleValueType(i),
+          (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
       if (RC)
         VTRC = TRI->getCommonSubClass(RC, VTRC);
       if (VTRC)
@@ -289,8 +290,8 @@ unsigned InstrEmitter::getVR(SDValue Op,
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
-      const TargetRegisterClass *RC =
-        TLI->getRegClassFor(Op.getSimpleValueType());
+      const TargetRegisterClass *RC = TLI->getRegClassFor(
+          Op.getSimpleValueType(), Op.getNode()->isDivergent());
       VReg = MRI->createVirtualRegister(RC);
     }
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
@@ -395,11 +396,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     unsigned VReg = R->getReg();
     MVT OpVT = Op.getSimpleValueType();
-    const TargetRegisterClass *OpRC =
-        TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
     const TargetRegisterClass *IIRC =
         II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
            : nullptr;
+    const TargetRegisterClass *OpRC =
+        TLI->isTypeLegal(OpVT)
+            ? TLI->getRegClassFor(OpVT,
+                                  Op.getNode()->isDivergent() ||
+                                      (IIRC && TRI->isDivergentRegClass(IIRC)))
+            : nullptr;
 
     if (OpRC && IIRC && OpRC != IIRC &&
         TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -464,7 +469,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
 }
 
 unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
-                                          MVT VT, const DebugLoc &DL) {
+                                          MVT VT, bool isDivergent, const DebugLoc &DL) {
   const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
   const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
 
@@ -479,7 +484,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
 
   // VReg couldn't be reasonably constrained.  Emit a COPY to a new virtual
   // register instead.
-  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+  RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
   assert(RC && "No legal register class for VT supports that SubIdx");
   unsigned NewReg = MRI->createVirtualRegister(RC);
   BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -514,7 +519,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     // classes.
     unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     const TargetRegisterClass *TRC =
-      TLI->getRegClassFor(Node->getSimpleValueType(0));
+      TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
 
     unsigned Reg;
     MachineInstr *DefMI;
@@ -548,8 +553,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
       if (TargetRegisterInfo::isVirtualRegister(Reg))
         Reg = ConstrainForSubReg(Reg, SubIdx,
                                  Node->getOperand(0).getSimpleValueType(),
-                                 Node->getDebugLoc());
-
+                                 Node->isDivergent(), Node->getDebugLoc());
       // Create the destreg if it is missing.
       if (VRBase == 0)
         VRBase = MRI->createVirtualRegister(TRC);
@@ -584,7 +588,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
     //
     // There is no constraint on the %src register class.
     //
-    const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+    const TargetRegisterClass *SRC =
+        TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
     SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
     assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 3188c2678f1af..42f7846fe7c3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -83,7 +83,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
   /// supports SubIdx sub-registers.  Emit a copy if that isn't possible.
   /// Return the virtual register to use.
   unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
-                              const DebugLoc &DL);
+                              bool isDivergent, const DebugLoc &DL);
 
   /// EmitSubregNode - Generate machine code for subreg nodes.
   ///
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 76e5847ba111d..a5274877ecee4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9844,7 +9844,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
         unsigned &RegOut = ConstantsOut[C];
         if (RegOut == 0) {
-          RegOut = FuncInfo.CreateRegs(C->getType());
+          RegOut = FuncInfo.CreateRegs(C);
           CopyValueToVirtualRegister(C, RegOut);
         }
         Reg = RegOut;
@@ -9857,7 +9857,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
           assert(isa<AllocaInst>(PHIOp) &&
                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
                  "Didn't codegen value into a register!??");
-          Reg = FuncInfo.CreateRegs(PHIOp->getType());
+          Reg = FuncInfo.CreateRegs(PHIOp);
           CopyValueToVirtualRegister(PHIOp, Reg);
         }
       }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 6c9a1cd646ef3..6f55f98c51fd4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1485,7 +1485,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
               !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
             if (!R)
-              R = FuncInfo->CreateRegs(Inst->getType());
+              R = FuncInfo->CreateRegs(Inst);
           }
 
           bool HadTailCall = false;
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 94b1e636c7b15..8ad7a52c92bfd 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -302,52 +302,6 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
-static bool phiHasVGPROperands(const MachineInstr &PHI,
-                               const MachineRegisterInfo &MRI,
-                               const SIRegisterInfo *TRI,
-                               const SIInstrInfo *TII) {
-  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
-    unsigned Reg = PHI.getOperand(i).getReg();
-    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
-      return true;
-  }
-  return false;
-}
-
-static bool phiHasBreakDef(const MachineInstr &PHI,
-                           const MachineRegisterInfo &MRI,
-                           SmallSet<unsigned, 8> &Visited) {
-  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
-    unsigned Reg = PHI.getOperand(i).getReg();
-    if (Visited.count(Reg))
-      continue;
-
-    Visited.insert(Reg);
-
-    MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-    switch (DefInstr->getOpcode()) {
-    default:
-      break;
-    case AMDGPU::SI_IF_BREAK:
-      return true;
-    case AMDGPU::PHI:
-      if (phiHasBreakDef(*DefInstr, MRI, Visited))
-        return true;
-    }
-  }
-  return false;
-}
-
-static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
-                                          const TargetRegisterInfo &TRI) {
-  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
-       E = MBB.end(); I != E; ++I) {
-    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
-      return true;
-  }
-  return false;
-}
-
 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
                                     const MachineInstr *MoveImm,
                                     const SIInstrInfo *TII,
@@ -409,12 +363,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB,
   return false;
 }
 
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
-                                        const TargetRegisterInfo *TRI) {
-  return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
-           return hasTerminatorThatModifiesExec(*MBB, *TRI); });
-}
-
 // Checks if there is potential path From instruction To instruction.
 // If CutOff is specified and it sits in between of that path we ignore
 // a higher portion of the path and report it is not reachable.
@@ -621,63 +569,73 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         break;
       }
       case AMDGPU::PHI: {
-        unsigned Reg = MI.getOperand(0).getReg();
-        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
-          break;
-
-        // We don't need to fix the PHI if the common dominator of the
-        // two incoming blocks terminates with a uniform branch.
-        bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII);
-        if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) {
-          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
-          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
-
-          if (!predsHasDivergentTerminator(MBB0, TRI) &&
-              !predsHasDivergentTerminator(MBB1, TRI)) {
-            LLVM_DEBUG(dbgs()
-                       << "Not fixing PHI for uniform branch: " << MI << '\n');
+        unsigned hasVGPRUses = 0;
+        SetVector<const MachineInstr *> worklist;
+        worklist.insert(&MI);
+        while (!worklist.empty()) {
+          const MachineInstr *Instr = worklist.pop_back_val();
+          unsigned Reg = Instr->getOperand(0).getReg();
+          for (const auto &Use : MRI.use_operands(Reg)) {
+            const MachineInstr *UseMI = Use.getParent();
+            if (UseMI->isCopy() || UseMI->isRegSequence()) {
+              if (UseMI->isCopy() &&
+                  TRI->isPhysicalRegister(UseMI->getOperand(0).getReg()) &&
+                  !TRI->isSGPRReg(MRI, UseMI->getOperand(0).getReg())) {
+                hasVGPRUses++;
+              }
+              worklist.insert(UseMI);
+              continue;
+            }
+
+            if (UseMI->isPHI()) {
+              if (!TRI->isSGPRReg(MRI, Use.getReg()))
+                hasVGPRUses++;
+              continue;
+            }
+
+            unsigned OpNo = UseMI->getOperandNo(&Use);
+            const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
+            if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
+              const TargetRegisterClass *OpRC =
+                  TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
+              if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&
+                  OpRC != &AMDGPU::VS_64RegClass) {
+                hasVGPRUses++;
+              }
+            }
+          }
+        }
+        bool hasVGPRInput = false;
+        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+          unsigned InputReg = MI.getOperand(i).getReg();
+          MachineInstr *Def = MRI.getVRegDef(InputReg);
+          if (TRI->isVGPR(MRI, InputReg)) {
+            if (Def->isCopy()) {
+              unsigned SrcReg = Def->getOperand(1).getReg();
+              const TargetRegisterClass *RC =
+                  TRI->isVirtualRegister(SrcReg) ? MRI.getRegClass(SrcReg)
+                                                 : TRI->getPhysRegClass(SrcReg);
+              if (TRI->isSGPRClass(RC))
+                continue;
+            }
+            hasVGPRInput = true;
+            break;
+          } else if (Def->isCopy() &&
+                     TRI->isVGPR(MRI, Def->getOperand(1).getReg())) {
+            hasVGPRInput = true;
             break;
           }
         }
+        unsigned PHIRes = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC0 = MRI.getRegClass(PHIRes);
 
-        // If a PHI node defines an SGPR and any of its operands are VGPRs,
-        // then we need to move it to the VALU.
-        //
-        // Also, if a PHI node defines an SGPR and has all SGPR operands
-        // we must move it to the VALU, because the SGPR operands will
-        // all end up being assigned the same register, which means
-        // there is a potential for a conflict if different threads take
-        // different control flow paths.
-        //
-        // For Example:
-        //
-        // sgpr0 = def;
-        // ...
-        // sgpr1 = def;
-        // ...
-        // sgpr2 = PHI sgpr0, sgpr1
-        // use sgpr2;
-        //
-        // Will Become:
-        //
-        // sgpr2 = def;
-        // ...
-        // sgpr2 = def;
-        // ...
-        // use sgpr2
-        //
-        // The one exception to this rule is when one of the operands
-        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
-        // instruction.  In this case, there we know the program will
-        // never enter the second block (the loop) without entering
-        // the first block (where the condition is computed), so there
-        // is no chance for values to be over-written.
-
-        SmallSet<unsigned, 8> Visited;
-        if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) {
-          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
-          TII->moveToVALU(MI, MDT);
+        if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
+            (hasVGPRInput || hasVGPRUses > 1)) {
+          TII->moveToVALU(MI);
+        } else {
+          TII->legalizeOperands(MI, MDT);
         }
+
         break;
       }
       case AMDGPU::REG_SEQUENCE:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c2cda5ef4d7ce..8f93c63046caf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9637,7 +9637,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
       break;
 
     MVT VT = Src0.getValueType().getSimpleVT();
-    const TargetRegisterClass *RC = getRegClassFor(VT);
+    const TargetRegisterClass *RC =
+        getRegClassFor(VT, Src0.getNode()->isDivergent());
 
     MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
     SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -10171,3 +10172,91 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
 
   return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
 }
+
+const TargetRegisterClass *
+SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
+  const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+  if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
+    return &AMDGPU::SReg_64RegClass;
+  if (!TRI->isSGPRClass(RC) && !isDivergent)
+    return TRI->getEquivalentSGPRClass(RC);
+  else if (TRI->isSGPRClass(RC) && isDivergent)
+    return TRI->getEquivalentVGPRClass(RC);
+
+  return RC;
+}
+
+static bool hasIfBreakUser(const Value *V, SetVector<const Value *> &Visited) {
+  if (Visited.count(V))
+    return false;
+  Visited.insert(V);
+  bool Result = false;
+  for (auto U : V->users()) {
+    if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
+      if ((Intrinsic->getIntrinsicID() == Intrinsic::amdgcn_if_break) &&
+          (V == U->getOperand(1)))
+        Result = true;
+    } else {
+      Result = hasIfBreakUser(U, Visited);
+    }
+    if (Result)
+      break;
+  }
+  return Result;
+}
+
+bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
+                                               const Value *V) const {
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+    switch (Intrinsic->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::amdgcn_if_break:
+      return true;
+    }
+  }
+  if (const ExtractValueInst *ExtValue = dyn_cast<ExtractValueInst>(V)) {
+    if (const IntrinsicInst *Intrinsic =
+            dyn_cast<IntrinsicInst>(ExtValue->getOperand(0))) {
+      switch (Intrinsic->getIntrinsicID()) {
+      default:
+        return false;
+      case Intrinsic::amdgcn_if:
+      case Intrinsic::amdgcn_else: {
+        ArrayRef<unsigned> Indices = ExtValue->getIndices();
+        if (Indices.size() == 1 && Indices[0] == 1) {
+          return true;
+        }
+      }
+      }
+    }
+  }
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (isa<InlineAsm>(CI->getCalledValue())) {
+      const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
+      ImmutableCallSite CS(CI);
+      TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
+          MF.getDataLayout(), Subtarget->getRegisterInfo(), CS);
+      for (auto &TC : TargetConstraints) {
+        if (TC.Type == InlineAsm::isOutput) {
+          ComputeConstraintToUse(TC, SDValue());
+          unsigned AssignedReg;
+          const TargetRegisterClass *RC;
+          std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
+              SIRI, TC.ConstraintCode,
+              getSimpleValueType(MF.getDataLayout(), CS.getType()));
+          if (RC) {
+            MachineRegisterInfo &MRI = MF.getRegInfo();
+            if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
+              return true;
+            else if (SIRI->isSGPRClass(RC))
+              return true;
+          }
+        }
+      }
+    }
+  }
+  SetVector<const Value *> Visited;
+  return hasIfBreakUser(V, Visited);
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 60a474f51e5c4..094a0b054e235 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -367,7 +367,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                     bool SNaN = false,
                                     unsigned Depth = 0) const override;
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
-
+  virtual const TargetRegisterClass *
+  getRegClassFor(MVT VT, bool isDivergent) const override;
+  virtual bool requiresUniformRegister(MachineFunction &MF,
+                                       const Value *V) const override;
   unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
 };
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e42ed3505cf5c..14f5dbe6ad496 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2219,6 +2219,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       // These come before src2.
       removeModOperands(UseMI);
       UseMI.setDesc(get(NewOpc));
+      // It might happen that UseMI was commuted
+      // and we now have SGPR as SRC1. If so 2 inlined
+      // constant and SGPR are illegal.
+      legalizeOperands(UseMI);
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -3913,7 +3917,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
     return;
 
   // Try to eliminate the copy if it is copying an immediate value.
-  if (Def->isMoveImmediate())
+  if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
     FoldImmediate(*Copy, *Def, OpReg, &MRI);
 }
 
@@ -4147,7 +4151,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
     if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
       if (!VRC) {
         assert(SRC);
-        VRC = RI.getEquivalentVGPRClass(SRC);
+       if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
+          VRC = &AMDGPU::VReg_1RegClass;
+        } else
+          VRC = RI.getEquivalentVGPRClass(SRC);
       }
       RC = VRC;
     } else {
@@ -5309,7 +5316,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
   case AMDGPU::INSERT_SUBREG:
   case AMDGPU::WQM:
   case AMDGPU::WWM:
-    if (RI.hasVGPRs(NewDstRC))
+    if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
       return nullptr;
 
     NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index bfdc1ef9645de..e2df3ae5ea7e9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -195,6 +195,11 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
                                                unsigned Reg) const;
   bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
 
+  virtual bool
+  isDivergentRegClass(const TargetRegisterClass *RC) const override {
+    return !isSGPRClass(RC);
+  }
+
   bool isSGPRPressureSet(unsigned SetID) const {
     return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
   }
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 677e4d5b2e8b7..88d318e7bb32b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1447,7 +1447,9 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+const TargetRegisterClass *
+ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  (void)isDivergent;
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   // load / store 4 to 8 consecutive D registers.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 3b94cb0dcb0fa..8e254d75b1c30 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -456,7 +456,8 @@ class VectorType;
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+    const TargetRegisterClass *
+    getRegClassFor(MVT VT, bool isDivergent = false) const override;
 
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
index 3d457fdd50e81..454c56cbca5d0 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-nand.ll
@@ -5,11 +5,12 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_lds:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    ds_read_b32 v2, v0
+; GCN-NEXT:    ds_read_b32 v1, v0
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB0_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    v_not_b32_e32 v1, v2
 ; GCN-NEXT:    v_or_b32_e32 v1, -5, v1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -17,7 +18,6 @@ define i32 @atomic_nand_i32_lds(i32 addrspace(3)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GCN-NEXT:    v_mov_b32_e32 v2, v1
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB0_1
@@ -33,11 +33,12 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_global:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    global_load_dword v3, v[0:1], off
+; GCN-NEXT:    global_load_dword v2, v[0:1], off
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB1_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -45,7 +46,6 @@ define i32 @atomic_nand_i32_global(i32 addrspace(1)* %ptr) nounwind {
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB1_1
@@ -61,11 +61,12 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-LABEL: atomic_nand_i32_flat:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_load_dword v3, v[0:1]
+; GCN-NEXT:    flat_load_dword v2, v[0:1]
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:  BB2_1: ; %atomicrmw.start
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    v_not_b32_e32 v2, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
@@ -74,7 +75,6 @@ define i32 @atomic_nand_i32_flat(i32* %ptr) nounwind {
 ; GCN-NEXT:    buffer_wbinvl1_vol
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v2
 ; GCN-NEXT:    s_or_b64 s[6:7], vcc, s[6:7]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_cbranch_execnz BB2_1
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 45ed056567c2e..a2facaafb41f9 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -99,7 +99,7 @@ bb3:
 
 ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch:
 ; GCN: s_load_dword [[CND:s[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
+
 ; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0
 ; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
 ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
@@ -117,6 +117,7 @@ bb3:
 ; GCN: v_nop_e64
 
 ; GCN: [[ENDBB]]:
+; GCN: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]]
 ; GCN: buffer_store_dword [[V_CND]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(float addrspace(1)* %arg, float %cnd) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
index e6f684178035e..c9c801fb1911e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-uniformity.ll
@@ -8,8 +8,8 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: ; %LOOP49
-; CHECK: v_cmp_ne_u32_e32 vcc,
-; CHECK: s_cbranch_vccnz
+; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
+; CHECK: s_cbranch_scc1
 ; CHECK: ; %ENDIF53
 define amdgpu_vs float @main(i32 %in) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 41ecdd403d736..15e807a3e0230 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -89,7 +89,7 @@ endif:
 }
 
 ; GCN-LABEL: {{^}}divergent_loop:
-; VGPR: workitem_private_segment_byte_size = 16{{$}}
+; VGPR: workitem_private_segment_byte_size = 12{{$}}
 
 ; GCN: {{^}}; %bb.0:
 
@@ -123,10 +123,9 @@ endif:
 ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
 ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
 ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
-; GCN: v_cmp_ne_u32_e32 vcc,
-; GCN: s_and_b64 vcc, exec, vcc
+; GCN: s_cmp_lg_u32
 ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
+; GCN-NEXT: s_cbranch_scc1 [[LOOP]]
 
 
 ; GCN: [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 8d21050ebee01..08a95ecbf5ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -13,55 +13,50 @@ define amdgpu_ps void @main(i32, float) {
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
 ; CHECK-NEXT:    s_mov_b32 m0, s0
-; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
 ; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
-; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
+; CHECK-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
+; CHECK-NEXT:    s_mov_b64 s[2:3], 0
+; CHECK-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; CHECK-NEXT:  BB0_1: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
-; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
-; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
-; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_cbranch_vccz BB0_5
+; CHECK-NEXT:    s_or_b64 s[4:5], s[4:5], exec
+; CHECK-NEXT:    s_cmp_lt_u32 s0, 32
+; CHECK-NEXT:    s_mov_b64 s[6:7], -1
+; CHECK-NEXT:    s_cbranch_scc0 BB0_5
 ; CHECK-NEXT:  ; %bb.2: ; %endif1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_mov_b64 s[6:7], -1
-; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
-; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
+; CHECK-NEXT:    s_mov_b64 s[4:5], -1
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
 ; CHECK-NEXT:    ; mask branch BB0_4
 ; CHECK-NEXT:  BB0_3: ; %endif2
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
-; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
+; CHECK-NEXT:    s_add_i32 s0, s0, 1
+; CHECK-NEXT:    s_xor_b64 s[4:5], exec, -1
 ; CHECK-NEXT:  BB0_4: ; %Flow1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
-; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT:    s_branch BB0_6
-; CHECK-NEXT:  BB0_5: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    ; implicit-def: $vgpr1
-; CHECK-NEXT:  BB0_6: ; %Flow
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_mov_b64 s[6:7], 0
+; CHECK-NEXT:  BB0_5: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
-; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[4:5]
-; CHECK-NEXT:    s_mov_b64 s[4:5], s[8:9]
+; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
+; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], s[2:3]
+; CHECK-NEXT:    s_mov_b64 s[2:3], s[8:9]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_cbranch_execnz BB0_1
-; CHECK-NEXT:  ; %bb.7: ; %Flow2
+; CHECK-NEXT:  ; %bb.6: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; this is the divergent branch with the condition not marked as divergent
-; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3]
-; CHECK-NEXT:    ; mask branch BB0_9
-; CHECK-NEXT:  BB0_8: ; %if1
+; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[6:7]
+; CHECK-NEXT:    ; mask branch BB0_8
+; CHECK-NEXT:  BB0_7: ; %if1
 ; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT:  BB0_9: ; %endloop
+; CHECK-NEXT:  BB0_8: ; %endloop
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
 ; CHECK-NEXT:    s_endpgm
+; this is the divergent branch with the condition not marked as divergent
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index a39833455a153..fe8f31a0cd2ee 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -13,9 +13,9 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
   ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
   ; GCN:   [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
   ; GCN:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
-  ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
+  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
+  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
   ; GCN:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_96 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY2]], %subreg.sub1, killed [[COPY1]], %subreg.sub2
   ; GCN:   [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
   ; GCN:   [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index f96019dba6dcc..badaa16bbfcc5 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -48,8 +48,8 @@ define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) {
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: v_and_b32
-; GCN: v_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
 define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -62,10 +62,10 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 
-; GCN: v_and_b32
-; GCN: v_and_b32
-; GCN: v_and_b32
-; GCN: v_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
+; GCN: s_and_b32
 define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   store <4 x float> %fabs, <4 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index a3f176b3ef025..01499e681eafa 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -85,15 +85,15 @@ define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg
 
 ; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -121,15 +121,15 @@ define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp:
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
@@ -156,15 +156,15 @@ define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp:
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}}
@@ -194,15 +194,15 @@ define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp:
 ; GCN-DAG:        s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}}
-; GCN-DENORM-DAG: s_mov_b32 [[L:s[0-9]+]], 0x6f800000
+; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -231,8 +231,6 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
-; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -240,9 +238,12 @@ define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
+
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -273,8 +274,6 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
-; GCN-DAG:        s_mov_b32 [[L:s[0-9]+]], 0x6f800000
-; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
@@ -282,9 +281,12 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) {
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000
+; GCN-DAG:        v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000
+
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
-; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |v{{[0-9]+}}|, [[L]]
+; GCN-DAG:        v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]]
 ; GCN-DAG:        v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc
 
 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index ca80c4edbfb29..075115a2ee6cf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -33,9 +33,13 @@ define amdgpu_kernel void @s_test_fmin_legacy_subreg_inputs_f32(float addrspace(
 ; FUNC-LABEL: {{^}}s_test_fmin_legacy_ule_f32:
 ; GCN-DAG: s_load_dwordx2 s{{\[}}[[A:[0-9]+]]:[[B:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
 
-; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+; SI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
 
-; SI-SAFE: v_min_legacy_f32_e64 {{v[0-9]+}}, [[VB]], s[[A]]
+; GCN-NONAN: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+
+; VI-SAFE: v_mov_b32_e32 [[VB:v[0-9]+]], s[[B]]
+
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, s[[B]], [[VA]]
 
 ; VI-SAFE: v_mov_b32_e32 [[VA:v[0-9]+]], s[[A]]
 ; VI-SAFE: v_cmp_ngt_f32_e32 vcc, s[[A]], [[VB]]
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index 0ff5d9652c104..a621b04a346c0 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -4,7 +4,7 @@
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
 ; SI-NOT: and
-; SI: v_sub_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{s[0-9]+}}|
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
 define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
   %fsub = fsub float -0.000000e+00, %fabs
@@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
 ; SI-NOT: and
-; SI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{s[0-9]+}}|
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
 ; SI-NOT: and
 define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
   %fabs = call float @llvm.fabs.f32(float %x)
@@ -85,8 +85,8 @@ define amdgpu_kernel void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrs
 
 ; FIXME: In this case two uses of the constant should be folded
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
 define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -96,10 +96,10 @@ define amdgpu_kernel void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x
 
 ; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
 ; SI: s_brev_b32 [[SIGNBITK:s[0-9]+]], 1{{$}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
-; SI: v_or_b32_e32 v{{[0-9]+}}, [[SIGNBITK]], v{{[0-9]+}}
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[SIGNBITK]]
 define amdgpu_kernel void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 48647a2cdb898..6e4635ec43877 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -27,8 +27,8 @@ define amdgpu_kernel void @s_fsub_f32(float addrspace(1)* %out, float %a, float
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
 ; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
 
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
   %sub = fsub <2 x float> %a, %b
   store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -55,10 +55,10 @@ define amdgpu_kernel void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x flo
 }
 
 ; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: s_endpgm
 define amdgpu_kernel void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
   %result = fsub <4 x float> %a, %b
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index ae78a1ecf3252..87c9a565f08b2 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -4,17 +4,11 @@
 ; SI-LABEL: {{^}}i1_copy_from_loop:
 ;
 ; SI: ; %for.body
-; SI:      v_cmp_gt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
-; SI-DAG:  s_andn2_b64       [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
-; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
-
-; SI: ; %Flow1
-; SI:      s_or_b64          [[CC_ACCUM]], [[CC_ACCUM]], exec
+; SI:      v_cmp_lt_u32_e64  [[CC_SREG:s\[[0-9]+:[0-9]+\]]], s{{[0-9+]}}, 4
 
 ; SI: ; %Flow
 ; SI-DAG:  s_andn2_b64       [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
-; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
+; SI-DAG:  s_and_b64         [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
 ; SI:      s_or_b64          [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
 
 ; SI: ; %for.end
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
index 0aacbbfda182b..c65683d4fab61 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
@@ -7,7 +7,6 @@
 ; GCN:      s_cbranch_scc1  [[PREEXIT:BB[0-9_]+]]
 
 ; GCN: ; %blocka
-; GCN:      s_xor_b64       s[{{[0-9:]+}}], exec, -1
 ; GCN:      s_cmp_eq_u32    s1, 0
 ; GCN:      s_cbranch_scc1  [[EXIT:BB[0-9_]+]]
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 47e080a94baa4..2584f30573fdc 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -11,12 +11,12 @@
 
 ; GCN-LABEL: {{^}}insertelement_v4f32_0:
 ; GCN: s_load_dwordx4
+; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
+; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
+
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
-; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
 define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index 2a5e81a6dd6ae..60ec52c229bca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -387,7 +387,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)*
 
 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
 ; SI-NOT: v0
-; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, v0, v0, v0
+; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
index c47d02f716bdb..05b074bfe2d41 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -53,8 +53,8 @@ define amdgpu_kernel void @test_fabs_fmed3(float addrspace(1)* %out, float %src0
 }
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
-; GCN: s_brev_b32 [[NEG0:s[0-9]+]], 1
-; GCN: v_med3_f32 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
+; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
+; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
 define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
   %neg.med3 = fsub float -0.0, %med3
@@ -88,8 +88,8 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out,
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_r_inv2pi_0_foldable_user:
 ; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
-; GCN-DAG: s_mov_b32 [[NEG_INV:s[0-9]+]], 0xbe22f983
-; GCN: v_med3_f32 [[MED3:v[0-9]+]], -v{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
+; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
+; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 18ede50f40c0a..a7fb618c23430 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -42,6 +42,8 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
 ; VI-OPT: s_mov_b32
 ; VI-OPT: s_mov_b32
 ; VI-NOOPT: s_waitcnt
+; VI-NOOPT-NEXT: v_mov_b32_e32
+; VI-NOOPT-NEXT: s_nop 0
 ; VI-NOOPT-NEXT: s_nop 0
 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
 ; VI-OPT: s_nop 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index bc04f6f28f608..83bc8b2347245 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
-; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index 2cab9c28db374..1f46613a8db0d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -4,7 +4,7 @@
 declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
 
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
-; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
 ; GCN-DAG: v_mov_b32_e32 v4, v0
 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index f37b3a3637a43..5c2ec5021f1a9 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -26,10 +26,9 @@
 ; GCN:      s_mov_b64         [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
 
 ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
-; GCN:      v_cmp_lt_i32_e32  vcc, -1
-; GCN:      s_and_b64         vcc, exec, vcc
-; GCN:      s_or_b64          [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
-; GCN:      s_cbranch_vccnz   [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN:     s_or_b64         [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
+; GCN:     s_cmp_gt_i32 s4, -1
+; GCN:     s_cbranch_scc1   [[FLOW:BB[0-9]+_[0-9]+]]
 
 ; GCN: ; %bb4
 ; GCN:      buffer_load_dword
@@ -39,6 +38,7 @@
 ; GCN:      s_or_b64          [[INNER_MASK]], [[INNER_MASK]], [[TMP0]]
 
 ; GCN: [[FLOW]]: ; %Flow
+; GCN:           ;   in Loop: Header=BB0_1 Depth=1
 ; GCN:      s_and_b64         [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
 ; GCN:      s_or_b64          [[TMP1]], [[TMP1]], [[OUTER_MASK]]
 ; GCN:      s_mov_b64         [[OUTER_MASK]], [[TMP1]]
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index eed0218766481..8e4b6806f98ae 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,MAD,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -17,6 +17,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+; GFX10-MAD:   v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA:   v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -79,6 +80,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo
 ; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
 ; GCN: {{buffer|flat|global}}_load_dword [[VA:v[0-9]+]]
 ; MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
+; GFX10-MAD: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 ; FMA: v_fmaak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -106,6 +108,7 @@ define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %o
 ; GFX10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
 ; GFX10: {{flat|global}}_load_dword [[VB:v[0-9]+]]
 ; MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
+; GFX10-MAD:   v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 ; FMA:   v_fma_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -234,9 +237,12 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
 ; On GFX10+ we can use two scalar operands.
 ; GCN-LABEL: {{^}}madak_constant_bus_violation:
 ; GCN:       s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
-; GCN:       v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+
 ; GCN:       {{buffer|flat|global}}_load_dword [[VGPR:v[0-9]+]]
-; MAD:       v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; MAD:       v_mov_b32_e32 [[MADAK:v[0-9]+]], 0x42280000
+; MAD:       v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
+; GFX10:     v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
+; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; FMA:       v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
 ; GCN:       v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
 ; GFX6:      buffer_store_dword [[MUL]]
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index e8ecf5e25abce..4822818e901af 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -155,8 +155,9 @@ entry:
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
 ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-
-; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
+; CHECK-O0: v_readlane_b32 s[[S1:[0-9]+]], v{{[0-9]+}}, 4
+; CHECK-O0: v_readlane_b32 s[[S2:[0-9]+]], v{{[0-9]+}}, 5
+; CHECK-O0: s_mov_b64 exec, s{{\[}}[[S1]]:[[S2]]{{\]}}
 ; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
 ; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 4c1a769d59958..ddda7baef7412 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -96,7 +96,6 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN:      s_mov_b64          [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]]
 
 ; GCN: ; %LeafBlock1
-; GCN:      s_mov_b64
 ; GCN:      s_mov_b64          [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 
 ; GCN: ; %case1
@@ -109,8 +108,6 @@ ENDIF:                                            ; preds = %LOOP
 
 ; GCN:      s_mov_b64          [[BREAK]], -1{{$}}
 
-; GCN: [[FLOW]]: ; %Flow
-
 ; GCN: ; %case0
 ; GCN:      buffer_load_dword  [[LOAD1:v[0-9]+]],
 ; GCN-DAG:  s_andn2_b64        [[BREAK]], [[BREAK]], exec
@@ -118,7 +115,7 @@ ENDIF:                                            ; preds = %LOOP
 ; GCN-DAG:  s_and_b64          [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec
 ; GCN:      s_or_b64           [[BREAK]], [[BREAK]], [[TMP]]
 
-; GCN: ; %Flow4
+; GCN: [[FLOW]]: ; %Flow4
 ; GCN:      s_and_b64          [[BREAK]], exec, [[BREAK]]
 ; GCN:      s_or_b64           [[LEFT]], [[BREAK]], [[OLD_LEFT]]
 ; GCN:      s_andn2_b64        exec, exec, [[LEFT]]
diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll
index f773357976cce..24df126e4cafc 100644
--- a/llvm/test/CodeGen/AMDGPU/select-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll
@@ -135,8 +135,8 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo
 
 ; GCN-LABEL: {{^}}regression:
 ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0
-; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
-; GCN: v_cmp_eq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
+; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0
 
 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index e0971b8456fdc..3d5c3285cba71 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -104,7 +104,8 @@ endif:
 
 ; SI: ; %else
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
-; SI:      v_cmp_gt_i32_e64   [[PHI:s\[[0-9]+:[0-9]+\]]], 0, [[AVAL]]
+; SI:      v_cmp_gt_i32_e32   vcc, 0, [[AVAL]]
+; SI:      s_and_b64 [[PHI:s\[[0-9]+:[0-9]+\]]], vcc, exec
 
 ; SI: ; %if
 ; SI:      buffer_load_dword  [[AVAL:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
index 3ec7a6678a9ed..03e81a0431c54 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir
@@ -16,7 +16,7 @@ registers:
 
 body: |
   ; GCN-LABEL: name: phi_visit_order
-  ; GCN: V_ADD_I32
+  ; GCN: S_ADD_I32
   bb.0:
     liveins: $vgpr0
     %7 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index c83eb378a1e1f..904de8111fabf 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -571,7 +571,6 @@ main_body:
 ;
 ; TODO: we should keep the loop counter in an SGPR
 ;
-; GCN: v_readfirstlane_b32
 ; GCN: s_buffer_load_dword
 define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 80071e3407e9c..e7555a6703383 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -1,28 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
 ; Don't crash when the use of an undefined value is only detected by the
 ; register coalescer because it is hidden with subregister insert/extract.
 target triple="amdgcn--"
 
-; CHECK-LABEL: foobar:
-; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb
-; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
-; CHECK-NEXT: s_mov_b32 s2, -1
-; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v1, s5
-; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
-
-; CHECK: BB0_1:
-; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec
-; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
-
-; CHECK: BB0_2:
-; CHECK: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_mov_b32 s3, 0xf000
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; CHECK-NEXT: s_endpgm
 define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
+; CHECK-LABEL: foobar:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CHECK-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    s_mov_b32 s2, -1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+
+; FIXME: The change related to the fact that
+; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
+; and hence it cannot derive the fact that the vector element is unused.
+; Such a copies appear because the float4 vectors and their elements in the test are uniform
+; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
+
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    v_mov_b32_e32 v2, s6
+; CHECK-NEXT:    v_mov_b32_e32 v3, s7
+
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; CHECK-NEXT:    ; mask branch BB0_2
+; CHECK-NEXT:  BB0_1: ; %ift
+; CHECK-NEXT:    s_mov_b32 s4, s5
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    v_mov_b32_e32 v2, s6
+; CHECK-NEXT:    v_mov_b32_e32 v3, s7
+; CHECK-NEXT:  BB0_2: ; %ife
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    s_mov_b32 s3, 0xf000
+; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], 0
+; CHECK-NEXT:    s_endpgm
 entry:
   %v0 = insertelement <4 x float> undef, float %a0, i32 0
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index 82283f39792ee..a1cf6cf630048 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -7,10 +7,9 @@
 ; CHECK: s_and_saveexec_b64
 ; CHECK-NEXT: ; mask branch
 ; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
-; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader
 
-; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]:
-; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]]
+; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
+; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]]
 
 ; CHECK: s_endpgm
 define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) {
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index 50cf85e28ae13..fbf7364bfc4bb 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -226,13 +226,12 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addr
 ; GCN-LABEL: {{^}}test_s0_s1_k_f32:
 ; SI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; VI-DAG: s_load_dwordx2 s{{\[}}[[SGPR0:[0-9]+]]:[[SGPR1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; GCN-DAG: s_mov_b32 [[SK0:s[0-9]+]], 0x44800000
+; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], s[[SGPR1]]
-; GCN-DAG: v_mov_b32_e32 [[VS0:v[0-9]+]], s[[SGPR0]]
 
-; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS0]], [[VS1]], [[SK0]]
-; GCN-DAG: s_mov_b32 [[SK1:s[0-9]+]], 0x45800000
-; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VS0]], [[VS1]], [[SK1]]
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK0]]
+; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], s[[SGPR0]], [[VS1]], [[VK1]]
 
 ; GCN: buffer_store_dword [[RESULT0]]
 ; GCN: buffer_store_dword [[RESULT1]]
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 3a9970e78e38f..79a753cc046f9 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -165,8 +165,8 @@ exit:
 ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
 ; SI: buffer_load_dword
 ; SI-DAG: buffer_store_dword
-; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
-; SI: s_cbranch_vccz [[LABEL_LOOP]]
+; SI-DAG: s_cmpk_eq_i32 s{{[0-9+]}}, 0x100
+; SI: s_cbranch_scc0 [[LABEL_LOOP]]
 ; SI: [[LABEL_EXIT]]:
 ; SI: s_endpgm
 
@@ -214,7 +214,7 @@ exit:
 ; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
+; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
 
 ; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
 ; SI: buffer_store_dword
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 0c52daca04738..b0e9171cbb007 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s

From e698958ad8031e0f17202e06f5de53989852bb66 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sun, 26 May 2019 21:26:06 +0000
Subject: [PATCH 0274/1176] [BPF] generate R_BPF_NONE relocation for BTF
 DataSec variables

The variables in BTF DataSec type encode in-section offset.
R_BPF_NONE should be generated instead of R_BPF_64_32.

Signed-off-by: Yonghong Song <yhs@fb.com>

Differential Revision: https://reviews.llvm.org/D62460

llvm-svn: 361742
---
 .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp   | 32 ++++++----
 llvm/test/CodeGen/BPF/reloc-btf-2.ll          | 60 +++++++++++++++++++
 2 files changed, 82 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/reloc-btf-2.ll

diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 1d7a3ec68a8df..057bbf5c3b06a 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -50,21 +50,33 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
   case FK_Data_8:
     return ELF::R_BPF_64_64;
   case FK_Data_4:
-    // .BTF.ext generates FK_Data_4 relocations for
-    // insn offset by creating temporary labels.
-    // The insn offset is within the code section and
-    // already been fulfilled by applyFixup(). No
-    // further relocation is needed.
     if (const MCSymbolRefExpr *A = Target.getSymA()) {
-      if (A->getSymbol().isTemporary()) {
-        MCSection &Section = A->getSymbol().getSection();
+      const MCSymbol &Sym = A->getSymbol();
+
+      if (Sym.isDefined()) {
+        MCSection &Section = Sym.getSection();
         const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
         assert(SectionELF && "Null section for reloc symbol");
 
-        // The reloc symbol should be in text section.
         unsigned Flags = SectionELF->getFlags();
-        if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
-          return ELF::R_BPF_NONE;
+
+        if (Sym.isTemporary()) {
+          // .BTF.ext generates FK_Data_4 relocations for
+          // insn offset by creating temporary labels.
+          // The insn offset is within the code section and
+          // already been fulfilled by applyFixup(). No
+          // further relocation is needed.
+          // The reloc symbol should be in text section.
+          if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+            return ELF::R_BPF_NONE;
+        } else {
+          // .BTF generates FK_Data_4 relocations for variable
+          // offset in DataSec kind. Similar to the above .BTF.ext
+          // insn offset, no further relocation is needed.
+          // The reloc symbol should be in data section.
+          if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_WRITE))
+            return ELF::R_BPF_NONE;
+        }
       }
     }
     return ELF::R_BPF_64_32;
diff --git a/llvm/test/CodeGen/BPF/reloc-btf-2.ll b/llvm/test/CodeGen/BPF/reloc-btf-2.ll
new file mode 100644
index 0000000000000..feabb08915309
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/reloc-btf-2.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=bpfel -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -march=bpfeb -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+
+; source code:
+;   int g __attribute__((section("ids"))) = 4;
+;   static volatile int s = 0;
+;   int test() {
+;     return g + s;
+;   }
+; compilation flag:
+;   clang -target bpf -g -O2 -emit-llvm -S test.c
+
+@g = dso_local local_unnamed_addr global i32 4, section "ids", align 4, !dbg !0
+@s = internal global i32 0, align 4, !dbg !6
+
+; Function Attrs: norecurse nounwind
+define dso_local i32 @test() local_unnamed_addr #0 !dbg !14 {
+  %1 = load i32, i32* @g, align 4, !dbg !17, !tbaa !18
+  %2 = load volatile i32, i32* @s, align 4, !dbg !22, !tbaa !18
+  %3 = add nsw i32 %2, %1, !dbg !23
+  ret i32 %3, !dbg !24
+}
+
+; CHECK-RELOC: file format ELF64-BPF
+; CHECK-RELOC: RELOCATION RECORDS FOR [.BTF]:
+; CHECK-RELOC: R_BPF_NONE .bss
+; CHECK-RELOC: R_BPF_NONE g
+; CHECK-RELOC: RELOCATION RECORDS FOR [.BTF.ext]:
+
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 1, type: !9, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (trunk 360739) (llvm/trunk 360747)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None)
+!3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/llvm/relocation")
+!4 = !{}
+!5 = !{!0, !6}
+!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression())
+!7 = distinct !DIGlobalVariable(name: "s", scope: !2, file: !3, line: 2, type: !8, isLocal: true, isDefinition: true)
+!8 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"wchar_size", i32 4}
+!13 = !{!"clang version 9.0.0 (trunk 360739) (llvm/trunk 360747)"}
+!14 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 3, type: !15, scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!9}
+!17 = !DILocation(line: 4, column: 10, scope: !14)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !DILocation(line: 4, column: 14, scope: !14)
+!23 = !DILocation(line: 4, column: 12, scope: !14)
+!24 = !DILocation(line: 4, column: 3, scope: !14)

From 11b2f4fe50dae42d070bfdd798cc29a70dad320a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 26 May 2019 23:38:25 +0000
Subject: [PATCH 0275/1176] [LoopInterchange] Fix handling of LCSSA nodes
 defined in headers and latches.

The code to preserve LCSSA PHIs currently only properly supports
reduction PHIs and PHIs for values defined outside the latches.

This patch improves the LCSSA PHI handling to cover PHIs for values
defined in the latches.

Fixes PR41725.

Reviewers: efriedma, mcrosier, davide, jdoerfert

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D61576

llvm-svn: 361743
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp |  86 ++++++---
 .../LoopInterchange/perserve-lcssa.ll         | 181 ++++++++++++++++++
 2 files changed, 245 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index bec5af584f438..3dbb1ebebd7c1 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1301,8 +1301,41 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
 }
 
 // Move Lcssa PHIs to the right place.
-static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
-                          BasicBlock *OuterLatch) {
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
+                          BasicBlock *InnerLatch, BasicBlock *OuterHeader,
+                          BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+
+  // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
+  // defined either in the header or latch. Those blocks will become header and
+  // latch of the new outer loop, and the only possible users can PHI nodes
+  // in the exit block of the loop nest or the outer loop header (reduction
+  // PHIs, in that case, the incoming value must be defined in the inner loop
+  // header). We can just substitute the user with the incoming value and remove
+  // the PHI.
+  for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
+    assert(P.getNumIncomingValues() == 1 &&
+           "Only loops with a single exit are supported!");
+
+    // Incoming values are guaranteed be instructions currently.
+    auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+    // Skip phis with incoming values from the inner loop body, excluding the
+    // header and latch.
+    if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+      continue;
+
+    assert(all_of(P.users(),
+                  [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
+                    return (cast<PHINode>(U)->getParent() == OuterHeader &&
+                            IncI->getParent() == InnerHeader) ||
+                           cast<PHINode>(U)->getParent() == OuterExit;
+                  }) &&
+           "Can only replace phis iff the uses are in the loop nest exit or "
+           "the incoming value is defined in the inner header (it will "
+           "dominate all loop blocks after interchanging)");
+    P.replaceAllUsesWith(IncI);
+    P.eraseFromParent();
+  }
+
   SmallVector<PHINode *, 8> LcssaInnerExit;
   for (PHINode &P : InnerExit->phis())
     LcssaInnerExit.push_back(&P);
@@ -1315,31 +1348,39 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
   // If a PHI node has users outside of InnerExit, it has a use outside the
   // interchanged loop and we have to preserve it. We move these to
   // InnerLatch, which will become the new exit block for the innermost
-  // loop after interchanging. For PHIs only used in InnerExit, we can just
-  // replace them with the incoming value.
-  for (PHINode *P : LcssaInnerExit) {
-    bool hasUsersOutside = false;
-    for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
-      Use &U = *UI;
-      ++UI;
-      auto *Usr = cast<Instruction>(U.getUser());
-      if (Usr->getParent() != InnerExit) {
-        hasUsersOutside = true;
-        continue;
-      }
-      U.set(P->getIncomingValueForBlock(InnerLatch));
-    }
-    if (hasUsersOutside)
-      P->moveBefore(InnerLatch->getFirstNonPHI());
-    else
-      P->eraseFromParent();
-  }
+  // loop after interchanging.
+  for (PHINode *P : LcssaInnerExit)
+    P->moveBefore(InnerLatch->getFirstNonPHI());
 
   // If the inner loop latch contains LCSSA PHIs, those come from a child loop
   // and we have to move them to the new inner latch.
   for (PHINode *P : LcssaInnerLatch)
     P->moveBefore(InnerExit->getFirstNonPHI());
 
+  // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
+  // incoming values from the outer latch or header, we have to add a new PHI
+  // in the inner loop latch, which became the exit block of the outer loop,
+  // after interchanging.
+  if (OuterExit) {
+    for (PHINode &P : OuterExit->phis()) {
+      if (P.getNumIncomingValues() != 1)
+        continue;
+      // Skip Phis with incoming values not defined in the outer loop's header
+      // and latch. Also skip incoming phis defined in the latch. Those should
+      // already have been updated.
+      auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
+      if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
+                 I->getParent() != OuterHeader))
+        continue;
+
+      PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
+      NewPhi->setIncomingValue(0, P.getIncomingValue(0));
+      NewPhi->setIncomingBlock(0, OuterLatch);
+      NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
+      P.setIncomingValue(0, NewPhi);
+    }
+  }
+
   // Now adjust the incoming blocks for the LCSSA PHIs.
   // For PHIs moved from Inner's exit block, we need to replace Inner's latch
   // with the new latch.
@@ -1442,7 +1483,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
                    OuterLoopPreHeader);
 
-  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
+                OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
   // For PHIs in the exit block of the outer loop, outer's latch has been
   // replaced by Inners'.
   OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
diff --git a/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
new file mode 100644
index 0000000000000..af61709873c02
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/perserve-lcssa.ll
@@ -0,0 +1,181 @@
+; RUN: opt < %s -loop-interchange -loop-interchange-threshold=-100 -verify-loop-lcssa -S | FileCheck %s
+
+; Test case for PR41725. The induction variables in the latches escape the
+; loops and we must move some PHIs around.
+
+@a = common dso_local global i64 0, align 4
+@b = common dso_local global i64 0, align 4
+@c = common dso_local global [10 x [1 x i32 ]] zeroinitializer, align 16
+
+
+define void @test_lcssa_indvars1()  {
+; CHECK-LABEL: @test_lcssa_indvars1()
+; CHECK-LABEL: inner.body.split:
+; CHECK-NEXT:    %0 = phi i64 [ %iv.outer.next, %outer.latch ]
+; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+
+; CHECK-LABEL: exit:
+; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT:    store i64 %v8.lcssa.lcssa, i64* @b, align 4
+; CHECK-NEXT:    store i64 %v4.lcssa, i64* @a, align 4
+
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.latch, %entry
+  %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %outer.latch ]
+  br label %inner.body
+
+inner.body:                                       ; preds = %inner.body, %outer.header
+  %iv.inner = phi i64 [ 5, %outer.header ], [ %iv.inner.next, %inner.body ]
+  %v7 = getelementptr inbounds [10 x [1 x i32]], [10 x [1 x i32]]* @c, i64 0, i64 %iv.inner, i64 %iv.outer
+  store i32 0, i32* %v7, align 4
+  %iv.inner.next = add nsw i64 %iv.inner, -1
+  %v9 = icmp eq i64 %iv.inner, 0
+  br i1 %v9, label %outer.latch, label %inner.body
+
+outer.latch:                                      ; preds = %inner.body
+  %v8.lcssa = phi i64 [ %iv.inner.next, %inner.body ]
+  %iv.outer.next = add nuw nsw i64 %iv.outer, 1
+  %v5 = icmp ult i64 %iv.outer, 2
+  br i1 %v5, label %outer.header, label %exit
+
+exit:                                             ; preds = %outer.latch
+  %v4.lcssa = phi i64 [ %iv.outer.next, %outer.latch ]
+  %v8.lcssa.lcssa = phi i64 [ %v8.lcssa, %outer.latch ]
+  store i64 %v8.lcssa.lcssa, i64* @b, align 4
+  store i64 %v4.lcssa, i64* @a, align 4
+  ret void
+}
+
+
+define void @test_lcssa_indvars2()  {
+; CHECK-LABEL: @test_lcssa_indvars2()
+; CHECK-LABEL: inner.body.split:
+; CHECK-NEXT:    %0 = phi i64 [ %iv.outer, %outer.latch ]
+; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+
+; CHECK-LABEL: exit:
+; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %iv.inner, %inner.body.split ]
+; CHECK-NEXT:    store i64 %v8.lcssa.lcssa, i64* @b, align 4
+; CHECK-NEXT:    store i64 %v4.lcssa, i64* @a, align 4
+
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.latch, %entry
+  %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %outer.latch ]
+  br label %inner.body
+
+inner.body:                                       ; preds = %inner.body, %outer.header
+  %iv.inner = phi i64 [ 5, %outer.header ], [ %iv.inner.next, %inner.body ]
+  %v7 = getelementptr inbounds [10 x [1 x i32]], [10 x [1 x i32]]* @c, i64 0, i64 %iv.inner, i64 %iv.outer
+  store i32 0, i32* %v7, align 4
+  %iv.inner.next = add nsw i64 %iv.inner, -1
+  %v9 = icmp eq i64 %iv.inner.next, 0
+  br i1 %v9, label %outer.latch, label %inner.body
+
+outer.latch:                                      ; preds = %inner.body
+  %v8.lcssa = phi i64 [ %iv.inner, %inner.body ]
+  %iv.outer.next = add nuw nsw i64 %iv.outer, 1
+  %v5 = icmp ult i64 %iv.outer.next, 2
+  br i1 %v5, label %outer.header, label %exit
+
+exit:                                             ; preds = %outer.latch
+  %v4.lcssa = phi i64 [ %iv.outer, %outer.latch ]
+  %v8.lcssa.lcssa = phi i64 [ %v8.lcssa, %outer.latch ]
+  store i64 %v8.lcssa.lcssa, i64* @b, align 4
+  store i64 %v4.lcssa, i64* @a, align 4
+  ret void
+}
+
+define void @test_lcssa_indvars3()  {
+; CHECK-LABEL: @test_lcssa_indvars3()
+; CHECK-LABEL: inner.body.split:
+; CHECK-NEXT:    %0 = phi i64 [ %iv.outer.next, %outer.latch ]
+; CHECK-NEXT:    %iv.inner.next = add nsw i64 %iv.inner, -1
+
+; CHECK-LABEL: exit:
+; CHECK-NEXT:    %v4.lcssa = phi i64 [ %0, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT:    %v8.lcssa.lcssa.2 = phi i64 [ %iv.inner.next, %inner.body.split ]
+; CHECK-NEXT:    %r1 = add i64 %v8.lcssa.lcssa, %v8.lcssa.lcssa.2
+; CHECK-NEXT:    store i64 %r1, i64* @b, align 4
+; CHECK-NEXT:    store i64 %v4.lcssa, i64* @a, align 4
+
+
+entry:
+  br label %outer.header
+
+outer.header:                                     ; preds = %outer.latch, %entry
+  %iv.outer = phi i64 [ 0, %entry ], [ %iv.outer.next, %outer.latch ]
+  br label %inner.body
+
+inner.body:                                       ; preds = %inner.body, %outer.header
+  %iv.inner = phi i64 [ 5, %outer.header ], [ %iv.inner.next, %inner.body ]
+  %v7 = getelementptr inbounds [10 x [1 x i32]], [10 x [1 x i32]]* @c, i64 0, i64 %iv.inner, i64 %iv.outer
+  store i32 0, i32* %v7, align 4
+  %iv.inner.next = add nsw i64 %iv.inner, -1
+  %v9 = icmp eq i64 %iv.inner, 0
+  br i1 %v9, label %outer.latch, label %inner.body
+
+outer.latch:                                      ; preds = %inner.body
+  %v8.lcssa = phi i64 [ %iv.inner.next, %inner.body ]
+  ;%const.lcssa = phi i64 [ 111, %inner.body ]
+  %iv.outer.next = add nuw nsw i64 %iv.outer, 1
+  %v5 = icmp ult i64 %iv.outer, 2
+  br i1 %v5, label %outer.header, label %exit
+
+exit:                                             ; preds = %outer.latch
+  %v4.lcssa = phi i64 [ %iv.outer.next, %outer.latch ]
+  %v8.lcssa.lcssa = phi i64 [ %v8.lcssa, %outer.latch ]
+  %v8.lcssa.lcssa.2 = phi i64 [ %v8.lcssa, %outer.latch ]
+  %r1 = add i64 %v8.lcssa.lcssa, %v8.lcssa.lcssa.2
+  store i64 %r1, i64* @b, align 4
+  store i64 %v4.lcssa, i64* @a, align 4
+  ret void
+}
+
+
+; Make sure we do not crash for loops without reachable exits.
+define void @no_reachable_exits() {
+; Check we interchanged.
+; CHECK-LABEL: @no_reachable_exits() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label %inner.ph
+; CHECK-LABEL: outer.ph:
+; CHECK-NEXT:    br label %outer.header
+; CHECK-LABEL: inner.ph:
+; CHECK-NEXT:    br label %inner.body
+; CHECK-LABEL: inner.body:
+; CHECK-NEXT:    %tmp31 = phi i32 [ 0, %inner.ph ], [ %tmp6, %inner.body.split ]
+; CHECK-NEXT:    br label %outer.ph
+
+bb:
+  br label %outer.ph
+
+outer.ph:                              ; preds = %bb
+  br label %outer.header
+
+outer.header:                                    ; preds = %outer.ph, %outer.latch
+  %tmp2 = phi i32 [ 0, %outer.ph ], [ %tmp8, %outer.latch ]
+  br i1 undef, label %inner.ph, label %outer.latch
+
+inner.ph:                                        ; preds = %outer.header
+  br label %inner.body
+
+inner.body:                                              ; preds = %inner.ph, %inner.body
+  %tmp31 = phi i32 [ 0, %inner.ph ], [ %tmp6, %inner.body]
+  %tmp5 = load i32*, i32** undef, align 8
+  %tmp6 = add nsw i32 %tmp31, 1
+  br i1 undef, label %inner.body, label %outer.latch
+
+outer.latch:                                              ; preds = %inner.body, %outer.header
+  %tmp8 = add nsw i32 %tmp2, 1
+  br i1 undef, label %outer.header, label %exit
+
+exit:                                              ; preds = %outer.latch
+  unreachable
+}

From cfe08bc7d68712d8bdb2c205981bbbf15968a19e Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 27 May 2019 00:48:59 +0000
Subject: [PATCH 0276/1176] llvm-undname: Make demangling of MD5 names more
 robust

Demangler::parse() for MD5 names would:

1. Put all remaining text into the MD5 name sight unseen
2. Not modify MangledName

This meant that if the demangler recursively called parse() (e.g. in
demangleLocallyScopedNamePiece()), every recursive call that started on
an MD5 name would add all remaining bytes to the output buffer but
only advance the input by a byte.  For valid inputs, MD5 types are
never (well, see comments for 2 exceptions) nested, but for invalid
input this could cause memory use quadratic in the input size.

llvm-svn: 361744
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp | 28 ++++++++++++++++++++++---
 llvm/test/Demangle/ms-md5.test          | 16 ++++++++++++--
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 041d327596b48..c1e6e14fd498a 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -747,16 +747,38 @@ SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
 
 // Parser entry point.
 SymbolNode *Demangler::parse(StringView &MangledName) {
-  // We can't demangle MD5 names, just output them as-is.
-  // Also, MSVC-style mangled symbols must start with '?'.
   if (MangledName.startsWith("??@")) {
     // This is an MD5 mangled name.  We can't demangle it, just return the
     // mangled name.
+    // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
+    size_t MD5Last = MangledName.find('@', strlen("??@"));
+    if (MD5Last == StringView::npos) {
+      Error = true;
+      return nullptr;
+    }
+    const char* Start = MangledName.begin();
+    MangledName = MangledName.dropFront(MD5Last + 1);
+
+    // There are two additional special cases for MD5 names:
+    // 1. For complete object locators where the object name is long enough
+    //    for the object to have an MD5 name, the complete object locator is
+    //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
+    //    leading "??_R4". This is handled here.
+    // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
+    //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
+    //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
+    //    demangle catchable types anywhere, this isn't handled for MD5 names
+    //    either.
+    MangledName.consumeFront("??_R4@");
+
+    StringView MD5(Start, MangledName.begin());
     SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
-    S->Name = synthesizeQualifiedName(Arena, MangledName);
+    S->Name = synthesizeQualifiedName(Arena, MD5);
+
     return S;
   }
 
+  // MSVC-style mangled symbols must start with '?'.
   if (!MangledName.startsWith('?')) {
     Error = true;
     return nullptr;
diff --git a/llvm/test/Demangle/ms-md5.test b/llvm/test/Demangle/ms-md5.test
index 1fe2ecbcb68d0..bb084e7ae4429 100644
--- a/llvm/test/Demangle/ms-md5.test
+++ b/llvm/test/Demangle/ms-md5.test
@@ -1,4 +1,4 @@
-; These tests are based on clang/test/CodeGenCXX/mangle-ms-cxx11.cpp
+; These tests are based on clang/test/CodeGenCXX/mangle-ms-md5.cpp
 
 ; RUN: llvm-undname < %s | FileCheck %s
 
@@ -8,4 +8,16 @@
 ; two check lines here since the tool echos the input.
 ??@a6a285da2eea70dba6b578022be61d81@
 ; CHECK: ??@a6a285da2eea70dba6b578022be61d81@
-; CHECK-NEXT: ??@a6a285da2eea70dba6b578022be61d81@
\ No newline at end of file
+; CHECK-NEXT: ??@a6a285da2eea70dba6b578022be61d81@
+
+; Don't include trailing garbage:
+??@a6a285da2eea70dba6b578022be61d81@asdf
+; CHECK: ??@a6a285da2eea70dba6b578022be61d81@asdf
+; CHECK-NEXT: ??@a6a285da2eea70dba6b578022be61d81@
+
+; The complete object locator special case:
+; FIXME: This should probably print
+; ??@a6a285da2eea70dba6b578022be61d81@::`RTTI Complete Object Locator' instead.
+??@a6a285da2eea70dba6b578022be61d81@??_R4@
+; CHECK: ??@a6a285da2eea70dba6b578022be61d81@??_R4@
+; CHECK-NEXT: ??@a6a285da2eea70dba6b578022be61d81@??_R4@

From ba883e980a9c0ce0edfddb0737b2a30a1dec0ef7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 27 May 2019 05:27:57 +0000
Subject: [PATCH 0277/1176] [X86] Add test cases for D62444. NFC

llvm-svn: 361745
---
 llvm/test/Analysis/CostModel/X86/arith-fp.ll | 171 +++++++++++++++++++
 1 file changed, 171 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index bce7ca8d59f27..1e95e9a5116d2 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -240,6 +240,177 @@ define i32 @fsub(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @fneg_idiom(i32 %arg) {
+; SSE1-LABEL: 'fneg_idiom'
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fneg_idiom'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fneg_idiom'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX1-LABEL: 'fneg_idiom'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'fneg_idiom'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fneg_idiom'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fneg_idiom'
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fneg_idiom'
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; BTVER2-LABEL: 'fneg_idiom'
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  %F32 = fsub float -0.0, undef
+  %V4F32 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, undef
+  %V8F32 = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
+  %V16F32 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
+
+  %F64 = fsub double -0.0, undef
+  %V2F64 = fsub <2 x double> <double -0.0, double -0.0>, undef
+  %V4F64 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, undef
+  %V8F64 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, undef
+
+  ret i32 undef
+}
+
+define i32 @fneg(i32 %arg) {
+; CHECK-LABEL: 'fneg'
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
+; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SLM-LABEL: 'fneg'
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
+; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; GLM-LABEL: 'fneg'
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
+; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; BTVER2-LABEL: 'fneg'
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
+; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+  %F32 = fneg float undef
+  %V4F32 = fneg <4 x float> undef
+  %V8F32 = fneg <8 x float> undef
+  %V16F32 = fneg <16 x float> undef
+
+  %F64 = fneg double undef
+  %V2F64 = fneg <2 x double> undef
+  %V4F64 = fneg <4 x double> undef
+  %V8F64 = fneg <8 x double> undef
+
+  ret i32 undef
+}
+
 define i32 @fmul(i32 %arg) {
 ; SSE1-LABEL: 'fmul'
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef

From 0ff41b8a5afbb07bde8eaada42b1f7ea3a508101 Mon Sep 17 00:00:00 2001
From: "David L. Jones" <dlj@google.com>
Date: Mon, 27 May 2019 06:00:00 +0000
Subject: [PATCH 0278/1176] Revert r361356: "[MIR] Add simple PRE pass to
 MachineCSE"

This is problematic on buildbots, as discussed here: https://reviews.llvm.org/rL361356

It seems like the plan already was to revert, but that hasn't happened yet.

llvm-svn: 361746
---
 llvm/lib/CodeGen/MachineCSE.cpp               |  122 +-
 llvm/test/CodeGen/Mips/internalfunc.ll        |    3 +-
 llvm/test/CodeGen/X86/avx2-masked-gather.ll   |   48 +-
 llvm/test/CodeGen/X86/masked_compressstore.ll | 1013 ++++++++++-------
 llvm/test/CodeGen/X86/masked_gather.ll        |   94 +-
 llvm/test/CodeGen/X86/masked_store.ll         |  722 +++++++-----
 llvm/test/CodeGen/X86/masked_store_trunc.ll   |  531 +++++----
 .../CodeGen/X86/masked_store_trunc_ssat.ll    |  527 +++++----
 .../CodeGen/X86/masked_store_trunc_usat.ll    |  535 +++++----
 9 files changed, 2084 insertions(+), 1511 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index aa45c267b418d..ff15875af9d8b 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -19,7 +19,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -50,8 +49,6 @@ using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
-STATISTIC(NumPREs,      "Number of partial redundant expression"
-                        " transformed to fully redundant");
 STATISTIC(NumPhysCSEs,
           "Number of physreg referencing common subexpr eliminated");
 STATISTIC(NumCrossBBCSEs,
@@ -87,7 +84,6 @@ namespace {
 
     void releaseMemory() override {
       ScopeMap.clear();
-      PREMap.clear();
       Exps.clear();
     }
 
@@ -102,7 +98,6 @@ namespace {
 
     unsigned LookAheadLimit = 0;
     DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
-    DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> PREMap;
     ScopedHTType VNT;
     SmallVector<MachineInstr *, 64> Exps;
     unsigned CurrVN = 0;
@@ -121,17 +116,13 @@ namespace {
                           PhysDefVector &PhysDefs, bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
-                           MachineBasicBlock *CSBB, MachineInstr *MI);
+                           MachineInstr *CSMI, MachineInstr *MI);
     void EnterScope(MachineBasicBlock *MBB);
     void ExitScope(MachineBasicBlock *MBB);
-    bool ProcessBlockCSE(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
     void ExitScopeIfDone(MachineDomTreeNode *Node,
                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
     bool PerformCSE(MachineDomTreeNode *Node);
-
-    bool isPRECandidate(MachineInstr *MI);
-    bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
-    bool PerformSimplePRE(MachineDominatorTree *DT);
   };
 
 } // end anonymous namespace
@@ -414,10 +405,9 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
 }
 
 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
-/// common expression that defines Reg. CSBB is basic block where CSReg is
-/// defined.
+/// common expression that defines Reg.
 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
-                                   MachineBasicBlock *CSBB, MachineInstr *MI) {
+                                   MachineInstr *CSMI, MachineInstr *MI) {
   // FIXME: Heuristics that works around the lack the live range splitting.
 
   // If CSReg is used at all uses of Reg, CSE should not increase register
@@ -443,6 +433,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
   if (TII->isAsCheapAsAMove(*MI)) {
+    MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
     if (CSBB != BB && !CSBB->isSuccessor(BB))
       return false;
@@ -497,7 +488,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
   ScopeMap.erase(SI);
 }
 
-bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
 
   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -607,7 +598,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
              TargetRegisterInfo::isVirtualRegister(NewReg) &&
              "Do not CSE physical register defs!");
 
-      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
+      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
         LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
         DoCSE = false;
         break;
@@ -747,7 +738,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
   for (MachineDomTreeNode *Node : Scopes) {
     MachineBasicBlock *MBB = Node->getBlock();
     EnterScope(MBB);
-    Changed |= ProcessBlockCSE(MBB);
+    Changed |= ProcessBlock(MBB);
     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
     ExitScopeIfDone(Node, OpenChildren);
   }
@@ -755,98 +746,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
   return Changed;
 }
 
-// We use stronger checks for PRE candidate rather than for CSE ones to embrace
-// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
-// to exclude instrs created by PRE that won't be CSEed later.
-bool MachineCSE::isPRECandidate(MachineInstr *MI) {
-  if (!isCSECandidate(MI) ||
-      MI->isNotDuplicable() ||
-      MI->isAsCheapAsAMove() ||
-      MI->getNumDefs() != 1 ||
-      MI->getNumExplicitDefs() != 1)
-    return false;
-
-  for (auto def: MI->defs())
-    if (!TRI->isVirtualRegister(def.getReg()))
-      return false;
-
-  for (auto use: MI->uses())
-    if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
-      return false;
-
-  return true;
-}
-
-bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) {
-  bool Changed = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
-    MachineInstr *MI = &*I;
-    ++I;
-
-    if (!isPRECandidate(MI))
-      continue;
-
-    if (!PREMap.count(MI)) {
-      PREMap[MI] = MBB;
-      continue;
-    }
-
-    auto MBB1 = PREMap[MI];
-    assert(!DT->properlyDominates(MBB, MBB1) &&
-           "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
-    auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
-
-    // Two instrs are partial redundant if their basic blocks are reachable
-    // from one to another but one doesn't dominate another.
-    if (CMBB != MBB1) {
-      auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
-      if (BB != nullptr && BB1 != nullptr &&
-          (isPotentiallyReachable(BB1, BB) ||
-           isPotentiallyReachable(BB, BB1))) {
-
-        assert(MI->getOperand(0).isDef() &&
-               "First operand of instr with one explicit def must be this def");
-        unsigned VReg = MI->getOperand(0).getReg();
-        unsigned NewReg = MRI->cloneVirtualRegister(VReg);
-        if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
-          continue;
-        MachineInstr &NewMI = TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
-        NewMI.getOperand(0).setReg(NewReg);
-
-        PREMap[MI] = CMBB;
-        ++NumPREs;
-        Changed = true;
-      }
-    }
-  }
-  return Changed;
-}
-
-// This simple PRE (partial redundancy elimination) pass doesn't actually
-// eliminate partial redundancy but transforms it to full redundancy,
-// anticipating that the next CSE step will eliminate this created redundancy.
-// If CSE doesn't eliminate this, than created instruction will remain dead
-// and eliminated later by Remove Dead Machine Instructions pass.
-bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
-  SmallVector<MachineDomTreeNode*, 32> BBs;
-
-  PREMap.clear();
-  bool Changed = false;
-  BBs.push_back(DT->getRootNode());
-  do {
-    auto Node = BBs.pop_back_val();
-    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-    for (MachineDomTreeNode *Child : Children)
-      BBs.push_back(Child);
-
-    MachineBasicBlock *MBB = Node->getBlock();
-    Changed |= ProcessBlockPRE(DT, MBB);
-
-  } while (!BBs.empty());
-
-  return Changed;
-}
-
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -857,8 +756,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<MachineDominatorTree>();
   LookAheadLimit = TII->getMachineCSELookAheadLimit();
-  bool ChangedPRE, ChangedCSE;
-  ChangedPRE = PerformSimplePRE(DT);
-  ChangedCSE = PerformCSE(DT->getRootNode());
-  return ChangedPRE || ChangedCSE;
+  return PerformCSE(DT->getRootNode());
 }
diff --git a/llvm/test/CodeGen/Mips/internalfunc.ll b/llvm/test/CodeGen/Mips/internalfunc.ll
index 1816a70df01e4..b6b1c96c5f3be 100644
--- a/llvm/test/CodeGen/Mips/internalfunc.ll
+++ b/llvm/test/CodeGen/Mips/internalfunc.ll
@@ -27,7 +27,8 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %entry, %if.then
 ; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)
 ; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
-; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R1]])
+; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)
+; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
   %tobool3 = icmp ne i32 %a0, 0
   %tmp4 = load void (...)*, void (...)** @gf1, align 4
   %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
index eff624f75e071..3a831a88aa679 100644
--- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll
+++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
@@ -236,17 +236,18 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ptrs, <4 x i1> %masks, <4 x i3
 ; NOGATHER-NEXT:    vpinsrd $1, (%rax), %xmm2, %xmm2
 ; NOGATHER-NEXT:  .LBB4_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB4_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm0, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm2, %xmm2
 ; NOGATHER-NEXT:  .LBB4_6: # %else5
 ; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB4_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
+; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm2, %xmm2
 ; NOGATHER-NEXT:  .LBB4_8: # %else8
@@ -294,17 +295,18 @@ define <4 x float> @masked_gather_v4float(<4 x float*> %ptrs, <4 x i1> %masks, <
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
 ; NOGATHER-NEXT:  .LBB5_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $8, %xmm1, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB5_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm0, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; NOGATHER-NEXT:  .LBB5_6: # %else5
 ; NOGATHER-NEXT:    vpextrb $12, %xmm1, %eax
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB5_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
+; NOGATHER-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; NOGATHER-NEXT:  .LBB5_8: # %else8
@@ -364,11 +366,11 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
 ; NOGATHER-NEXT:  .LBB6_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB6_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm3, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; NOGATHER-NEXT:    vmovq %xmm4, %rax
 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm1, %xmm4
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
 ; NOGATHER-NEXT:  .LBB6_6: # %else5
@@ -376,6 +378,7 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB6_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
+; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm1, %xmm3
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
@@ -399,11 +402,11 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; NOGATHER-NEXT:  .LBB6_12: # %else14
 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB6_14
 ; NOGATHER-NEXT:  # %bb.13: # %cond.load16
-; NOGATHER-NEXT:    vmovq %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; NOGATHER-NEXT:    vpinsrd $2, (%rax), %xmm3, %xmm3
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
@@ -412,7 +415,8 @@ define <8 x i32> @masked_gather_v8i32(<8 x i32*>* %ptr, <8 x i1> %masks, <8 x i3
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB6_16
 ; NOGATHER-NEXT:  # %bb.15: # %cond.load19
-; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; NOGATHER-NEXT:    vpinsrd $3, (%rax), %xmm0, %xmm0
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
@@ -473,11 +477,11 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
 ; NOGATHER-NEXT:  .LBB7_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $4, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB7_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm3, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; NOGATHER-NEXT:    vmovq %xmm4, %rax
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm4 = xmm1[0,1],mem[0],xmm1[3]
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3],ymm1[4,5,6,7]
 ; NOGATHER-NEXT:  .LBB7_6: # %else5
@@ -485,6 +489,7 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB7_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
+; NOGATHER-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; NOGATHER-NEXT:    vpextrq $1, %xmm3, %rax
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],mem[0]
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
@@ -509,11 +514,11 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
 ; NOGATHER-NEXT:  .LBB7_12: # %else14
 ; NOGATHER-NEXT:    vpextrb $12, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB7_14
 ; NOGATHER-NEXT:  # %bb.13: # %cond.load16
-; NOGATHER-NEXT:    vmovq %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
@@ -522,7 +527,8 @@ define <8 x float> @masked_gather_v8float(<8 x float*>* %ptr, <8 x i1> %masks, <
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB7_16
 ; NOGATHER-NEXT:  # %bb.15: # %cond.load19
-; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; NOGATHER-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
@@ -577,11 +583,11 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6
 ; NOGATHER-NEXT:    vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
 ; NOGATHER-NEXT:  .LBB8_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB8_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; NOGATHER-NEXT:    vpinsrq $0, (%rax), %xmm3, %xmm3
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
@@ -590,7 +596,8 @@ define <4 x i64> @masked_gather_v4i64(<4 x i64*>* %ptr, <4 x i1> %masks, <4 x i6
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB8_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
-; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; NOGATHER-NEXT:    vpinsrq $1, (%rax), %xmm0, %xmm0
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
@@ -645,11 +652,11 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks
 ; NOGATHER-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2,3]
 ; NOGATHER-NEXT:  .LBB9_4: # %else2
 ; NOGATHER-NEXT:    vpextrb $8, %xmm0, %eax
-; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm2
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB9_6
 ; NOGATHER-NEXT:  # %bb.5: # %cond.load4
-; NOGATHER-NEXT:    vmovq %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; NOGATHER-NEXT:    vmovq %xmm3, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm3
 ; NOGATHER-NEXT:    vmovlpd {{.*#+}} xmm3 = mem[0],xmm3[1]
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
@@ -658,7 +665,8 @@ define <4 x double> @masked_gather_v4double(<4 x double*>* %ptr, <4 x i1> %masks
 ; NOGATHER-NEXT:    testb $1, %al
 ; NOGATHER-NEXT:    je .LBB9_8
 ; NOGATHER-NEXT:  # %bb.7: # %cond.load7
-; NOGATHER-NEXT:    vpextrq $1, %xmm2, %rax
+; NOGATHER-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; NOGATHER-NEXT:    vpextrq $1, %xmm0, %rax
 ; NOGATHER-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; NOGATHER-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
 ; NOGATHER-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll
index 0806ccd86ef2f..6ee8779f82caa 100644
--- a/llvm/test/CodeGen/X86/masked_compressstore.ll
+++ b/llvm/test/CodeGen/X86/masked_compressstore.ll
@@ -130,67 +130,133 @@ define void @compressstore_v8f64_v8i1(double* %base, <8 x double> %V, <8 x i1> %
 ; SSE42-NEXT:  LBB0_16: ## %else20
 ; SSE42-NEXT:    retq
 ;
-; AVX1OR2-LABEL: compressstore_v8f64_v8i1:
-; AVX1OR2:       ## %bb.0:
-; AVX1OR2-NEXT:    vpextrb $0, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_2
-; AVX1OR2-NEXT:  ## %bb.1: ## %cond.store
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_2: ## %else
-; AVX1OR2-NEXT:    vpextrb $2, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_4
-; AVX1OR2-NEXT:  ## %bb.3: ## %cond.store1
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_4: ## %else2
-; AVX1OR2-NEXT:    vpextrb $4, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1OR2-NEXT:    je LBB0_6
-; AVX1OR2-NEXT:  ## %bb.5: ## %cond.store4
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_6: ## %else5
-; AVX1OR2-NEXT:    vpextrb $6, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_8
-; AVX1OR2-NEXT:  ## %bb.7: ## %cond.store7
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_8: ## %else8
-; AVX1OR2-NEXT:    vpextrb $8, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_10
-; AVX1OR2-NEXT:  ## %bb.9: ## %cond.store10
-; AVX1OR2-NEXT:    vmovlpd %xmm1, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_10: ## %else11
-; AVX1OR2-NEXT:    vpextrb $10, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_12
-; AVX1OR2-NEXT:  ## %bb.11: ## %cond.store13
-; AVX1OR2-NEXT:    vmovhpd %xmm1, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_12: ## %else14
-; AVX1OR2-NEXT:    vpextrb $12, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm1, %xmm0
-; AVX1OR2-NEXT:    je LBB0_14
-; AVX1OR2-NEXT:  ## %bb.13: ## %cond.store16
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB0_14: ## %else17
-; AVX1OR2-NEXT:    vpextrb $14, %xmm2, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB0_16
-; AVX1OR2-NEXT:  ## %bb.15: ## %cond.store19
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:  LBB0_16: ## %else20
-; AVX1OR2-NEXT:    vzeroupper
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: compressstore_v8f64_v8i1:
+; AVX1:       ## %bb.0:
+; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_2
+; AVX1-NEXT:  ## %bb.1: ## %cond.store
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_2: ## %else
+; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_4
+; AVX1-NEXT:  ## %bb.3: ## %cond.store1
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_4: ## %else2
+; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_6
+; AVX1-NEXT:  ## %bb.5: ## %cond.store4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovlps %xmm3, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_6: ## %else5
+; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_8
+; AVX1-NEXT:  ## %bb.7: ## %cond.store7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_8: ## %else8
+; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_10
+; AVX1-NEXT:  ## %bb.9: ## %cond.store10
+; AVX1-NEXT:    vmovlpd %xmm1, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_10: ## %else11
+; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_12
+; AVX1-NEXT:  ## %bb.11: ## %cond.store13
+; AVX1-NEXT:    vmovhpd %xmm1, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_12: ## %else14
+; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_14
+; AVX1-NEXT:  ## %bb.13: ## %cond.store16
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB0_14: ## %else17
+; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB0_16
+; AVX1-NEXT:  ## %bb.15: ## %cond.store19
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:  LBB0_16: ## %else20
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: compressstore_v8f64_v8i1:
+; AVX2:       ## %bb.0:
+; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_2
+; AVX2-NEXT:  ## %bb.1: ## %cond.store
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_2: ## %else
+; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_4
+; AVX2-NEXT:  ## %bb.3: ## %cond.store1
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_4: ## %else2
+; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_6
+; AVX2-NEXT:  ## %bb.5: ## %cond.store4
+; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vmovlpd %xmm3, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_6: ## %else5
+; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_8
+; AVX2-NEXT:  ## %bb.7: ## %cond.store7
+; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_8: ## %else8
+; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_10
+; AVX2-NEXT:  ## %bb.9: ## %cond.store10
+; AVX2-NEXT:    vmovlpd %xmm1, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_10: ## %else11
+; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_12
+; AVX2-NEXT:  ## %bb.11: ## %cond.store13
+; AVX2-NEXT:    vmovhpd %xmm1, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_12: ## %else14
+; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_14
+; AVX2-NEXT:  ## %bb.13: ## %cond.store16
+; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB0_14: ## %else17
+; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB0_16
+; AVX2-NEXT:  ## %bb.15: ## %cond.store19
+; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:  LBB0_16: ## %else20
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: compressstore_v8f64_v8i1:
 ; AVX512F:       ## %bb.0:
@@ -457,125 +523,253 @@ define void @compressstore_v16f64_v16i1(double* %base, <16 x double> %V, <16 x i
 ; SSE42-NEXT:  LBB1_32: ## %else44
 ; SSE42-NEXT:    retq
 ;
-; AVX1OR2-LABEL: compressstore_v16f64_v16i1:
-; AVX1OR2:       ## %bb.0:
-; AVX1OR2-NEXT:    vpextrb $0, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_2
-; AVX1OR2-NEXT:  ## %bb.1: ## %cond.store
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_2: ## %else
-; AVX1OR2-NEXT:    vpextrb $1, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_4
-; AVX1OR2-NEXT:  ## %bb.3: ## %cond.store1
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_4: ## %else2
-; AVX1OR2-NEXT:    vpextrb $2, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1OR2-NEXT:    je LBB1_6
-; AVX1OR2-NEXT:  ## %bb.5: ## %cond.store4
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_6: ## %else5
-; AVX1OR2-NEXT:    vpextrb $3, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_8
-; AVX1OR2-NEXT:  ## %bb.7: ## %cond.store7
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_8: ## %else8
-; AVX1OR2-NEXT:    vpextrb $4, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_10
-; AVX1OR2-NEXT:  ## %bb.9: ## %cond.store10
-; AVX1OR2-NEXT:    vmovlpd %xmm1, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_10: ## %else11
-; AVX1OR2-NEXT:    vpextrb $5, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_12
-; AVX1OR2-NEXT:  ## %bb.11: ## %cond.store13
-; AVX1OR2-NEXT:    vmovhpd %xmm1, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_12: ## %else14
-; AVX1OR2-NEXT:    vpextrb $6, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm1, %xmm0
-; AVX1OR2-NEXT:    je LBB1_14
-; AVX1OR2-NEXT:  ## %bb.13: ## %cond.store16
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_14: ## %else17
-; AVX1OR2-NEXT:    vpextrb $7, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_16
-; AVX1OR2-NEXT:  ## %bb.15: ## %cond.store19
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_16: ## %else20
-; AVX1OR2-NEXT:    vpextrb $8, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_18
-; AVX1OR2-NEXT:  ## %bb.17: ## %cond.store22
-; AVX1OR2-NEXT:    vmovlpd %xmm2, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_18: ## %else23
-; AVX1OR2-NEXT:    vpextrb $9, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_20
-; AVX1OR2-NEXT:  ## %bb.19: ## %cond.store25
-; AVX1OR2-NEXT:    vmovhpd %xmm2, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_20: ## %else26
-; AVX1OR2-NEXT:    vpextrb $10, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm2, %xmm0
-; AVX1OR2-NEXT:    je LBB1_22
-; AVX1OR2-NEXT:  ## %bb.21: ## %cond.store28
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_22: ## %else29
-; AVX1OR2-NEXT:    vpextrb $11, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_24
-; AVX1OR2-NEXT:  ## %bb.23: ## %cond.store31
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_24: ## %else32
-; AVX1OR2-NEXT:    vpextrb $12, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_26
-; AVX1OR2-NEXT:  ## %bb.25: ## %cond.store34
-; AVX1OR2-NEXT:    vmovlpd %xmm3, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_26: ## %else35
-; AVX1OR2-NEXT:    vpextrb $13, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_28
-; AVX1OR2-NEXT:  ## %bb.27: ## %cond.store37
-; AVX1OR2-NEXT:    vmovhpd %xmm3, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_28: ## %else38
-; AVX1OR2-NEXT:    vpextrb $14, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm3, %xmm0
-; AVX1OR2-NEXT:    je LBB1_30
-; AVX1OR2-NEXT:  ## %bb.29: ## %cond.store40
-; AVX1OR2-NEXT:    vmovlpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:    addq $8, %rdi
-; AVX1OR2-NEXT:  LBB1_30: ## %else41
-; AVX1OR2-NEXT:    vpextrb $15, %xmm4, %eax
-; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    je LBB1_32
-; AVX1OR2-NEXT:  ## %bb.31: ## %cond.store43
-; AVX1OR2-NEXT:    vmovhpd %xmm0, (%rdi)
-; AVX1OR2-NEXT:  LBB1_32: ## %else44
-; AVX1OR2-NEXT:    vzeroupper
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: compressstore_v16f64_v16i1:
+; AVX1:       ## %bb.0:
+; AVX1-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_2
+; AVX1-NEXT:  ## %bb.1: ## %cond.store
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_2: ## %else
+; AVX1-NEXT:    vpextrb $1, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_4
+; AVX1-NEXT:  ## %bb.3: ## %cond.store1
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_4: ## %else2
+; AVX1-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_6
+; AVX1-NEXT:  ## %bb.5: ## %cond.store4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vmovlps %xmm5, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_6: ## %else5
+; AVX1-NEXT:    vpextrb $3, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_8
+; AVX1-NEXT:  ## %bb.7: ## %cond.store7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_8: ## %else8
+; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_10
+; AVX1-NEXT:  ## %bb.9: ## %cond.store10
+; AVX1-NEXT:    vmovlpd %xmm1, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_10: ## %else11
+; AVX1-NEXT:    vpextrb $5, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_12
+; AVX1-NEXT:  ## %bb.11: ## %cond.store13
+; AVX1-NEXT:    vmovhpd %xmm1, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_12: ## %else14
+; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_14
+; AVX1-NEXT:  ## %bb.13: ## %cond.store16
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_14: ## %else17
+; AVX1-NEXT:    vpextrb $7, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_16
+; AVX1-NEXT:  ## %bb.15: ## %cond.store19
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_16: ## %else20
+; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_18
+; AVX1-NEXT:  ## %bb.17: ## %cond.store22
+; AVX1-NEXT:    vmovlpd %xmm2, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_18: ## %else23
+; AVX1-NEXT:    vpextrb $9, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_20
+; AVX1-NEXT:  ## %bb.19: ## %cond.store25
+; AVX1-NEXT:    vmovhpd %xmm2, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_20: ## %else26
+; AVX1-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_22
+; AVX1-NEXT:  ## %bb.21: ## %cond.store28
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_22: ## %else29
+; AVX1-NEXT:    vpextrb $11, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_24
+; AVX1-NEXT:  ## %bb.23: ## %cond.store31
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_24: ## %else32
+; AVX1-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_26
+; AVX1-NEXT:  ## %bb.25: ## %cond.store34
+; AVX1-NEXT:    vmovlpd %xmm3, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_26: ## %else35
+; AVX1-NEXT:    vpextrb $13, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_28
+; AVX1-NEXT:  ## %bb.27: ## %cond.store37
+; AVX1-NEXT:    vmovhpd %xmm3, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_28: ## %else38
+; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_30
+; AVX1-NEXT:  ## %bb.29: ## %cond.store40
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
+; AVX1-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX1-NEXT:    addq $8, %rdi
+; AVX1-NEXT:  LBB1_30: ## %else41
+; AVX1-NEXT:    vpextrb $15, %xmm4, %eax
+; AVX1-NEXT:    testb $1, %al
+; AVX1-NEXT:    je LBB1_32
+; AVX1-NEXT:  ## %bb.31: ## %cond.store43
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
+; AVX1-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX1-NEXT:  LBB1_32: ## %else44
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: compressstore_v16f64_v16i1:
+; AVX2:       ## %bb.0:
+; AVX2-NEXT:    vpextrb $0, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_2
+; AVX2-NEXT:  ## %bb.1: ## %cond.store
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_2: ## %else
+; AVX2-NEXT:    vpextrb $1, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_4
+; AVX2-NEXT:  ## %bb.3: ## %cond.store1
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_4: ## %else2
+; AVX2-NEXT:    vpextrb $2, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_6
+; AVX2-NEXT:  ## %bb.5: ## %cond.store4
+; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX2-NEXT:    vmovlpd %xmm5, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_6: ## %else5
+; AVX2-NEXT:    vpextrb $3, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_8
+; AVX2-NEXT:  ## %bb.7: ## %cond.store7
+; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_8: ## %else8
+; AVX2-NEXT:    vpextrb $4, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_10
+; AVX2-NEXT:  ## %bb.9: ## %cond.store10
+; AVX2-NEXT:    vmovlpd %xmm1, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_10: ## %else11
+; AVX2-NEXT:    vpextrb $5, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_12
+; AVX2-NEXT:  ## %bb.11: ## %cond.store13
+; AVX2-NEXT:    vmovhpd %xmm1, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_12: ## %else14
+; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_14
+; AVX2-NEXT:  ## %bb.13: ## %cond.store16
+; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_14: ## %else17
+; AVX2-NEXT:    vpextrb $7, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_16
+; AVX2-NEXT:  ## %bb.15: ## %cond.store19
+; AVX2-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_16: ## %else20
+; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_18
+; AVX2-NEXT:  ## %bb.17: ## %cond.store22
+; AVX2-NEXT:    vmovlpd %xmm2, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_18: ## %else23
+; AVX2-NEXT:    vpextrb $9, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_20
+; AVX2-NEXT:  ## %bb.19: ## %cond.store25
+; AVX2-NEXT:    vmovhpd %xmm2, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_20: ## %else26
+; AVX2-NEXT:    vpextrb $10, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_22
+; AVX2-NEXT:  ## %bb.21: ## %cond.store28
+; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_22: ## %else29
+; AVX2-NEXT:    vpextrb $11, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_24
+; AVX2-NEXT:  ## %bb.23: ## %cond.store31
+; AVX2-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_24: ## %else32
+; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_26
+; AVX2-NEXT:  ## %bb.25: ## %cond.store34
+; AVX2-NEXT:    vmovlpd %xmm3, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_26: ## %else35
+; AVX2-NEXT:    vpextrb $13, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_28
+; AVX2-NEXT:  ## %bb.27: ## %cond.store37
+; AVX2-NEXT:    vmovhpd %xmm3, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_28: ## %else38
+; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_30
+; AVX2-NEXT:  ## %bb.29: ## %cond.store40
+; AVX2-NEXT:    vextractf128 $1, %ymm3, %xmm0
+; AVX2-NEXT:    vmovlpd %xmm0, (%rdi)
+; AVX2-NEXT:    addq $8, %rdi
+; AVX2-NEXT:  LBB1_30: ## %else41
+; AVX2-NEXT:    vpextrb $15, %xmm4, %eax
+; AVX2-NEXT:    testb $1, %al
+; AVX2-NEXT:    je LBB1_32
+; AVX2-NEXT:  ## %bb.31: ## %cond.store43
+; AVX2-NEXT:    vextractf128 $1, %ymm3, %xmm0
+; AVX2-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX2-NEXT:  LBB1_32: ## %else44
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: compressstore_v16f64_v16i1:
 ; AVX512F:       ## %bb.0:
@@ -1062,30 +1256,33 @@ define void @compressstore_v8f32_v8i1(float* %base, <8 x float> %V, <8 x i1> %ma
 ; AVX1OR2-NEXT:  LBB4_8: ## %else8
 ; AVX1OR2-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX1OR2-NEXT:    testb $1, %al
-; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1OR2-NEXT:    je LBB4_10
 ; AVX1OR2-NEXT:  ## %bb.9: ## %cond.store10
-; AVX1OR2-NEXT:    vmovss %xmm0, (%rdi)
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1OR2-NEXT:    vmovss %xmm2, (%rdi)
 ; AVX1OR2-NEXT:    addq $4, %rdi
 ; AVX1OR2-NEXT:  LBB4_10: ## %else11
 ; AVX1OR2-NEXT:    vpextrb $10, %xmm1, %eax
 ; AVX1OR2-NEXT:    testb $1, %al
 ; AVX1OR2-NEXT:    je LBB4_12
 ; AVX1OR2-NEXT:  ## %bb.11: ## %cond.store13
-; AVX1OR2-NEXT:    vextractps $1, %xmm0, (%rdi)
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1OR2-NEXT:    vextractps $1, %xmm2, (%rdi)
 ; AVX1OR2-NEXT:    addq $4, %rdi
 ; AVX1OR2-NEXT:  LBB4_12: ## %else14
 ; AVX1OR2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX1OR2-NEXT:    testb $1, %al
 ; AVX1OR2-NEXT:    je LBB4_14
 ; AVX1OR2-NEXT:  ## %bb.13: ## %cond.store16
-; AVX1OR2-NEXT:    vextractps $2, %xmm0, (%rdi)
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1OR2-NEXT:    vextractps $2, %xmm2, (%rdi)
 ; AVX1OR2-NEXT:    addq $4, %rdi
 ; AVX1OR2-NEXT:  LBB4_14: ## %else17
 ; AVX1OR2-NEXT:    vpextrb $14, %xmm1, %eax
 ; AVX1OR2-NEXT:    testb $1, %al
 ; AVX1OR2-NEXT:    je LBB4_16
 ; AVX1OR2-NEXT:  ## %bb.15: ## %cond.store19
+; AVX1OR2-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1OR2-NEXT:    vextractps $3, %xmm0, (%rdi)
 ; AVX1OR2-NEXT:  LBB4_16: ## %else20
 ; AVX1OR2-NEXT:    vzeroupper
@@ -1848,10 +2045,10 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX1-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
 ; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm10
 ; AVX1-NEXT:    je LBB6_10
 ; AVX1-NEXT:  ## %bb.9: ## %cond.store10
-; AVX1-NEXT:    vmovss %xmm10, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vmovd %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_10: ## %else11
 ; AVX1-NEXT:    vpackssdw %xmm9, %xmm0, %xmm4
@@ -1860,27 +2057,30 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_12
 ; AVX1-NEXT:  ## %bb.11: ## %cond.store13
-; AVX1-NEXT:    vextractps $1, %xmm10, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrd $1, %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_12: ## %else14
 ; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm8, %xmm4
-; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm8, %xmm8
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm0, %xmm4
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_14
 ; AVX1-NEXT:  ## %bb.13: ## %cond.store16
-; AVX1-NEXT:    vextractps $2, %xmm10, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrd $2, %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_14: ## %else17
-; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm0, %xmm4
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrb $7, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_16
 ; AVX1-NEXT:  ## %bb.15: ## %cond.store19
-; AVX1-NEXT:    vextractps $3, %xmm10, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_16: ## %else20
 ; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
@@ -1925,45 +2125,48 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX1-NEXT:    vextractps $3, %xmm1, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_24: ## %else32
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm4, %xmm5
-; AVX1-NEXT:    vpackssdw %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm0
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm5
+; AVX1-NEXT:    vpacksswb %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpextrb $12, %xmm5, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    je LBB6_26
 ; AVX1-NEXT:  ## %bb.25: ## %cond.store34
-; AVX1-NEXT:    vmovd %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vmovd %xmm5, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_26: ## %else35
-; AVX1-NEXT:    vpackssdw %xmm5, %xmm0, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpextrb $13, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_28
 ; AVX1-NEXT:  ## %bb.27: ## %cond.store37
-; AVX1-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpextrd $1, %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_28: ## %else38
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm4
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_30
 ; AVX1-NEXT:  ## %bb.29: ## %cond.store40
-; AVX1-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpextrd $2, %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_30: ## %else41
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_32
 ; AVX1-NEXT:  ## %bb.31: ## %cond.store43
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_32: ## %else44
@@ -2009,45 +2212,48 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX1-NEXT:    vextractps $3, %xmm2, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_40: ## %else56
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm1
-; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm4
-; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
+; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
 ; AVX1-NEXT:    je LBB6_42
 ; AVX1-NEXT:  ## %bb.41: ## %cond.store58
-; AVX1-NEXT:    vmovd %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vmovd %xmm4, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_42: ## %else59
-; AVX1-NEXT:    vpackssdw %xmm4, %xmm0, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_44
 ; AVX1-NEXT:  ## %bb.43: ## %cond.store61
-; AVX1-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm1, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_44: ## %else62
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
+; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_46
 ; AVX1-NEXT:  ## %bb.45: ## %cond.store64
-; AVX1-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT:    vpextrd $2, %xmm1, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_46: ## %else65
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_48
 ; AVX1-NEXT:  ## %bb.47: ## %cond.store67
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
 ; AVX1-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_48: ## %else68
@@ -2093,45 +2299,48 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX1-NEXT:    vextractps $3, %xmm3, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_56: ## %else80
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm1
-; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
 ; AVX1-NEXT:    je LBB6_58
 ; AVX1-NEXT:  ## %bb.57: ## %cond.store82
-; AVX1-NEXT:    vmovd %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vmovd %xmm2, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_58: ## %else83
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_60
 ; AVX1-NEXT:  ## %bb.59: ## %cond.store85
-; AVX1-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT:    vpextrd $1, %xmm1, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_60: ## %else86
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_62
 ; AVX1-NEXT:  ## %bb.61: ## %cond.store88
-; AVX1-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
+; AVX1-NEXT:    vpextrd $2, %xmm1, (%rdi)
 ; AVX1-NEXT:    addq $4, %rdi
 ; AVX1-NEXT:  LBB6_62: ## %else89
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
+; AVX1-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB6_64
 ; AVX1-NEXT:  ## %bb.63: ## %cond.store91
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm0
 ; AVX1-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX1-NEXT:  LBB6_64: ## %else92
 ; AVX1-NEXT:    vzeroupper
@@ -2186,40 +2395,43 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX2-NEXT:    vpacksswb %xmm0, %xmm9, %xmm9
 ; AVX2-NEXT:    vpextrb $4, %xmm9, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm9
 ; AVX2-NEXT:    je LBB6_10
 ; AVX2-NEXT:  ## %bb.9: ## %cond.store10
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm9
 ; AVX2-NEXT:    vmovd %xmm9, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_10: ## %else11
-; AVX2-NEXT:    vpackssdw %xmm8, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
+; AVX2-NEXT:    vpackssdw %xmm8, %xmm0, %xmm8
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm8, %xmm8
+; AVX2-NEXT:    vpextrb $5, %xmm8, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_12
 ; AVX2-NEXT:  ## %bb.11: ## %cond.store13
-; AVX2-NEXT:    vpextrd $1, %xmm9, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm8
+; AVX2-NEXT:    vpextrd $1, %xmm8, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_12: ## %else14
-; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
+; AVX2-NEXT:    vpxor %xmm8, %xmm8, %xmm8
+; AVX2-NEXT:    vpcmpeqd %ymm8, %ymm4, %ymm4
+; AVX2-NEXT:    vextracti128 $1, %ymm4, %xmm8
+; AVX2-NEXT:    vpackssdw %xmm8, %xmm0, %xmm4
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
+; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_14
 ; AVX2-NEXT:  ## %bb.13: ## %cond.store16
-; AVX2-NEXT:    vpextrd $2, %xmm9, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrd $2, %xmm4, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_14: ## %else17
-; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
+; AVX2-NEXT:    vpackssdw %xmm8, %xmm0, %xmm4
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm4, %xmm4
+; AVX2-NEXT:    vpextrb $7, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_16
 ; AVX2-NEXT:  ## %bb.15: ## %cond.store19
-; AVX2-NEXT:    vpextrd $3, %xmm9, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_16: ## %else20
 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
@@ -2264,44 +2476,47 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX2-NEXT:  LBB6_24: ## %else32
 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm5, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm4
+; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
+; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    je LBB6_26
 ; AVX2-NEXT:  ## %bb.25: ## %cond.store34
-; AVX2-NEXT:    vmovd %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm4
+; AVX2-NEXT:    vmovd %xmm4, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_26: ## %else35
-; AVX2-NEXT:    vpackssdw %xmm4, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_28
 ; AVX2-NEXT:  ## %bb.27: ## %cond.store37
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    vpextrd $1, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_28: ## %else38
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm4
+; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm5, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm4
 ; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_30
 ; AVX2-NEXT:  ## %bb.29: ## %cond.store40
-; AVX2-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm4
+; AVX2-NEXT:    vpextrd $2, %xmm4, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_30: ## %else41
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_32
 ; AVX2-NEXT:  ## %bb.31: ## %cond.store43
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_32: ## %else44
@@ -2347,44 +2562,47 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX2-NEXT:  LBB6_40: ## %else56
 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
+; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm0
 ; AVX2-NEXT:    je LBB6_42
 ; AVX2-NEXT:  ## %bb.41: ## %cond.store58
-; AVX2-NEXT:    vmovd %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX2-NEXT:    vmovd %xmm1, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_42: ## %else59
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
-; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_44
 ; AVX2-NEXT:  ## %bb.43: ## %cond.store61
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm0
 ; AVX2-NEXT:    vpextrd $1, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_44: ## %else62
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm6, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
+; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
+; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_46
 ; AVX2-NEXT:  ## %bb.45: ## %cond.store64
-; AVX2-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX2-NEXT:    vpextrd $2, %xmm1, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_46: ## %else65
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
-; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_48
 ; AVX2-NEXT:  ## %bb.47: ## %cond.store67
+; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm0
 ; AVX2-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_48: ## %else68
@@ -2430,44 +2648,47 @@ define void @compressstore_v32f32_v32i32(float* %base, <32 x float> %V, <32 x i3
 ; AVX2-NEXT:  LBB6_56: ## %else80
 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 ; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm7, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm0
 ; AVX2-NEXT:    je LBB6_58
 ; AVX2-NEXT:  ## %bb.57: ## %cond.store82
-; AVX2-NEXT:    vmovd %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm1
+; AVX2-NEXT:    vmovd %xmm1, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_58: ## %else83
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_60
 ; AVX2-NEXT:  ## %bb.59: ## %cond.store85
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm0
 ; AVX2-NEXT:    vpextrd $1, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_60: ## %else86
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm7, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2
-; AVX2-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
-; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
+; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm7, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_62
 ; AVX2-NEXT:  ## %bb.61: ## %cond.store88
-; AVX2-NEXT:    vpextrd $2, %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm1
+; AVX2-NEXT:    vpextrd $2, %xmm1, (%rdi)
 ; AVX2-NEXT:    addq $4, %rdi
 ; AVX2-NEXT:  LBB6_62: ## %else89
-; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
+; AVX2-NEXT:    vpackssdw %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB6_64
 ; AVX2-NEXT:  ## %bb.63: ## %cond.store91
+; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm0
 ; AVX2-NEXT:    vpextrd $3, %xmm0, (%rdi)
 ; AVX2-NEXT:  LBB6_64: ## %else92
 ; AVX2-NEXT:    vzeroupper
@@ -2670,16 +2891,17 @@ define void @compressstore_v4i64_v4i1(i64* %base, <4 x i64> %V, <4 x i1> %mask)
 ; AVX1-NEXT:  LBB8_4: ## %else2
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je LBB8_6
 ; AVX1-NEXT:  ## %bb.5: ## %cond.store4
-; AVX1-NEXT:    vmovq %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovlps %xmm2, (%rdi)
 ; AVX1-NEXT:    addq $8, %rdi
 ; AVX1-NEXT:  LBB8_6: ## %else5
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB8_8
 ; AVX1-NEXT:  ## %bb.7: ## %cond.store7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX1-NEXT:  LBB8_8: ## %else8
 ; AVX1-NEXT:    vzeroupper
@@ -2703,16 +2925,17 @@ define void @compressstore_v4i64_v4i1(i64* %base, <4 x i64> %V, <4 x i1> %mask)
 ; AVX2-NEXT:  LBB8_4: ## %else2
 ; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je LBB8_6
 ; AVX2-NEXT:  ## %bb.5: ## %cond.store4
-; AVX2-NEXT:    vmovq %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vmovq %xmm2, (%rdi)
 ; AVX2-NEXT:    addq $8, %rdi
 ; AVX2-NEXT:  LBB8_6: ## %else5
 ; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB8_8
 ; AVX2-NEXT:  ## %bb.7: ## %cond.store7
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX2-NEXT:  LBB8_8: ## %else8
 ; AVX2-NEXT:    vzeroupper
@@ -2889,16 +3112,17 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX1-NEXT:  LBB9_4: ## %else2
 ; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je LBB9_6
 ; AVX1-NEXT:  ## %bb.5: ## %cond.store4
-; AVX1-NEXT:    vmovq %xmm0, (%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovlps %xmm3, (%rdi)
 ; AVX1-NEXT:    addq $8, %rdi
 ; AVX1-NEXT:  LBB9_6: ## %else5
 ; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB9_8
 ; AVX1-NEXT:  ## %bb.7: ## %cond.store7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX1-NEXT:    addq $8, %rdi
 ; AVX1-NEXT:  LBB9_8: ## %else8
@@ -2918,9 +3142,9 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX1-NEXT:  LBB9_12: ## %else14
 ; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    je LBB9_14
 ; AVX1-NEXT:  ## %bb.13: ## %cond.store16
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vmovq %xmm0, (%rdi)
 ; AVX1-NEXT:    addq $8, %rdi
 ; AVX1-NEXT:  LBB9_14: ## %else17
@@ -2928,6 +3152,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB9_16
 ; AVX1-NEXT:  ## %bb.15: ## %cond.store19
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX1-NEXT:  LBB9_16: ## %else20
 ; AVX1-NEXT:    vzeroupper
@@ -2951,16 +3176,17 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX2-NEXT:  LBB9_4: ## %else2
 ; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je LBB9_6
 ; AVX2-NEXT:  ## %bb.5: ## %cond.store4
-; AVX2-NEXT:    vmovq %xmm0, (%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vmovq %xmm3, (%rdi)
 ; AVX2-NEXT:    addq $8, %rdi
 ; AVX2-NEXT:  LBB9_6: ## %else5
 ; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB9_8
 ; AVX2-NEXT:  ## %bb.7: ## %cond.store7
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $8, %rdi
 ; AVX2-NEXT:  LBB9_8: ## %else8
@@ -2980,9 +3206,9 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX2-NEXT:  LBB9_12: ## %else14
 ; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    je LBB9_14
 ; AVX2-NEXT:  ## %bb.13: ## %cond.store16
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    vmovq %xmm0, (%rdi)
 ; AVX2-NEXT:    addq $8, %rdi
 ; AVX2-NEXT:  LBB9_14: ## %else17
@@ -2990,6 +3216,7 @@ define void @compressstore_v8i64_v8i1(i64* %base, <8 x i64> %V, <8 x i1> %mask)
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB9_16
 ; AVX2-NEXT:  ## %bb.15: ## %cond.store19
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, (%rdi)
 ; AVX2-NEXT:  LBB9_16: ## %else20
 ; AVX2-NEXT:    vzeroupper
@@ -3618,141 +3845,149 @@ define void @compressstore_v16i8_v16i8(i8* %base, <16 x i8> %V, <16 x i8> %trigg
 ; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    movd %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_2
 ; SSE2-NEXT:  ## %bb.1: ## %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_2: ## %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_4
 ; SSE2-NEXT:  ## %bb.3: ## %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_4: ## %else2
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    movd %xmm2, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je LBB12_6
 ; SSE2-NEXT:  ## %bb.5: ## %cond.store4
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_6: ## %else5
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_8
 ; SSE2-NEXT:  ## %bb.7: ## %cond.store7
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_8: ## %else8
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_10
 ; SSE2-NEXT:  ## %bb.9: ## %cond.store10
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_10: ## %else11
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store13
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_12: ## %else14
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_14
 ; SSE2-NEXT:  ## %bb.13: ## %cond.store16
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_14: ## %else17
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store19
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_16: ## %else20
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_18
 ; SSE2-NEXT:  ## %bb.17: ## %cond.store22
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_18: ## %else23
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store25
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_20: ## %else26
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_22
 ; SSE2-NEXT:  ## %bb.21: ## %cond.store28
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_22: ## %else29
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store31
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_24: ## %else32
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
-; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_26
 ; SSE2-NEXT:  ## %bb.25: ## %cond.store34
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_26: ## %else35
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store37
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_28: ## %else38
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $7, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    pextrw $7, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_30
 ; SSE2-NEXT:  ## %bb.29: ## %cond.store40
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:    incq %rdi
 ; SSE2-NEXT:  LBB12_30: ## %else41
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB12_32
 ; SSE2-NEXT:  ## %bb.31: ## %cond.store43
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, (%rdi)
 ; SSE2-NEXT:  LBB12_32: ## %else44
 ; SSE2-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/masked_gather.ll b/llvm/test/CodeGen/X86/masked_gather.ll
index cee52a0d01e71..356ec1f00b539 100644
--- a/llvm/test/CodeGen/X86/masked_gather.ll
+++ b/llvm/test/CodeGen/X86/masked_gather.ll
@@ -70,16 +70,17 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB0_6
 ; AVX1-NEXT:  # %bb.5: # %cond.load4
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovq %xmm3, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX1-NEXT:  .LBB0_6: # %else5
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB0_8
 ; AVX1-NEXT:  # %bb.7: # %cond.load7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX1-NEXT:  .LBB0_8: # %else8
@@ -110,16 +111,17 @@ define <4 x float> @gather_v4f32_ptr_v4i32(<4 x float*> %ptr, <4 x i32> %trigger
 ; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB0_6
 ; AVX2-NEXT:  # %bb.5: # %cond.load4
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vmovq %xmm3, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX2-NEXT:  .LBB0_6: # %else5
 ; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB0_8
 ; AVX2-NEXT:  # %bb.7: # %cond.load7
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX2-NEXT:  .LBB0_8: # %else8
@@ -225,16 +227,17 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB1_6
 ; AVX1-NEXT:  # %bb.5: # %cond.load4
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovq %xmm3, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX1-NEXT:  .LBB1_6: # %else5
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB1_8
 ; AVX1-NEXT:  # %bb.7: # %cond.load7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX1-NEXT:  .LBB1_8: # %else8
@@ -270,16 +273,17 @@ define <4 x float> @gather_v4f32_v4i32_v4i32(float* %base, <4 x i32> %idx, <4 x
 ; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB1_6
 ; AVX2-NEXT:  # %bb.5: # %cond.load4
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vmovq %xmm3, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX2-NEXT:  .LBB1_6: # %else5
 ; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB1_8
 ; AVX2-NEXT:  # %bb.7: # %cond.load7
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX2-NEXT:  .LBB1_8: # %else8
@@ -384,16 +388,17 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
 ; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB2_6
 ; AVX1-NEXT:  # %bb.5: # %cond.load4
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovq %xmm3, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX1-NEXT:  .LBB2_6: # %else5
 ; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB2_8
 ; AVX1-NEXT:  # %bb.7: # %cond.load7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX1-NEXT:  .LBB2_8: # %else8
@@ -428,16 +433,17 @@ define <4 x float> @gather_v4f32_v4i64_v4i32(float* %base, <4 x i64> %idx, <4 x
 ; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
 ; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB2_6
 ; AVX2-NEXT:  # %bb.5: # %cond.load4
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vmovq %xmm3, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
 ; AVX2-NEXT:  .LBB2_6: # %else5
 ; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB2_8
 ; AVX2-NEXT:  # %bb.7: # %cond.load7
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
 ; AVX2-NEXT:  .LBB2_8: # %else8
@@ -656,15 +662,15 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    vpinsrb $1, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_4: # %else2
 ; AVX1-NEXT:    vpmovsxdq %xmm7, %xmm6
-; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm8
 ; AVX1-NEXT:    vpxor %xmm7, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpeqb %xmm7, %xmm2, %xmm7
 ; AVX1-NEXT:    vpextrb $2, %xmm7, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB3_6
 ; AVX1-NEXT:  # %bb.5: # %cond.load4
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vmovq %xmm5, %rax
 ; AVX1-NEXT:    vpinsrb $2, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_6: # %else5
 ; AVX1-NEXT:    vpaddq %xmm6, %xmm4, %xmm6
@@ -672,10 +678,11 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB3_8
 ; AVX1-NEXT:  # %bb.7: # %cond.load7
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vpinsrb $3, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_8: # %else8
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm8, %ymm0
 ; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpextrb $4, %xmm5, %eax
@@ -695,15 +702,15 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    vpinsrb $5, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_12: # %else14
 ; AVX1-NEXT:    vpmovsxdq %xmm6, %xmm6
-; AVX1-NEXT:    vpaddq %xmm7, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddq %xmm7, %xmm4, %xmm8
 ; AVX1-NEXT:    vpxor %xmm7, %xmm7, %xmm7
 ; AVX1-NEXT:    vpcmpeqb %xmm7, %xmm2, %xmm7
 ; AVX1-NEXT:    vpextrb $6, %xmm7, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB3_14
 ; AVX1-NEXT:  # %bb.13: # %cond.load16
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vmovq %xmm5, %rax
 ; AVX1-NEXT:    vpinsrb $6, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_14: # %else17
 ; AVX1-NEXT:    vpaddq %xmm6, %xmm4, %xmm6
@@ -711,11 +718,12 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB3_16
 ; AVX1-NEXT:  # %bb.15: # %cond.load19
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vpinsrb $7, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_16: # %else20
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm8, %ymm0
 ; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm2, %xmm5
 ; AVX1-NEXT:    vpextrb $8, %xmm5, %eax
@@ -740,10 +748,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm6
 ; AVX1-NEXT:    vpextrb $10, %xmm6, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB3_22
 ; AVX1-NEXT:  # %bb.21: # %cond.load28
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vmovq %xmm7, %rax
 ; AVX1-NEXT:    vpinsrb $10, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_22: # %else29
 ; AVX1-NEXT:    vpaddq %xmm5, %xmm4, %xmm4
@@ -751,6 +759,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB3_24
 ; AVX1-NEXT:  # %bb.23: # %cond.load31
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vpinsrb $11, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_24: # %else32
@@ -775,16 +784,17 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB3_30
 ; AVX1-NEXT:  # %bb.29: # %cond.load40
-; AVX1-NEXT:    vmovq %xmm0, %rax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovq %xmm2, %rax
 ; AVX1-NEXT:    vpinsrb $14, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_30: # %else41
 ; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB3_32
 ; AVX1-NEXT:  # %bb.31: # %cond.load43
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vpinsrb $15, (%rax), %xmm3, %xmm3
 ; AVX1-NEXT:  .LBB3_32: # %else44
@@ -819,10 +829,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm6
 ; AVX2-NEXT:    vpextrb $2, %xmm6, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm5
 ; AVX2-NEXT:    je .LBB3_6
 ; AVX2-NEXT:  # %bb.5: # %cond.load4
-; AVX2-NEXT:    vmovq %xmm5, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm7
+; AVX2-NEXT:    vmovq %xmm7, %rax
 ; AVX2-NEXT:    vpinsrb $2, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_6: # %else5
 ; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
@@ -830,6 +840,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB3_8
 ; AVX2-NEXT:  # %bb.7: # %cond.load7
+; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm5
 ; AVX2-NEXT:    vpextrq $1, %xmm5, %rax
 ; AVX2-NEXT:    vpinsrb $3, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_8: # %else8
@@ -854,10 +865,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    vpcmpeqb %xmm5, %xmm2, %xmm5
 ; AVX2-NEXT:    vpextrb $6, %xmm5, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB3_14
 ; AVX2-NEXT:  # %bb.13: # %cond.load16
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm6
+; AVX2-NEXT:    vmovq %xmm6, %rax
 ; AVX2-NEXT:    vpinsrb $6, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_14: # %else17
 ; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm6
@@ -865,6 +876,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB3_16
 ; AVX2-NEXT:  # %bb.15: # %cond.load19
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vpinsrb $7, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_16: # %else20
@@ -890,10 +902,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    vpcmpeqb %xmm5, %xmm2, %xmm5
 ; AVX2-NEXT:    vpextrb $10, %xmm5, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB3_22
 ; AVX2-NEXT:  # %bb.21: # %cond.load28
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm6
+; AVX2-NEXT:    vmovq %xmm6, %rax
 ; AVX2-NEXT:    vpinsrb $10, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_22: # %else29
 ; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
@@ -901,6 +913,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB3_24
 ; AVX2-NEXT:  # %bb.23: # %cond.load31
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vpinsrb $11, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_24: # %else32
@@ -925,16 +938,17 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
 ; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB3_30
 ; AVX2-NEXT:  # %bb.29: # %cond.load40
-; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vmovq %xmm2, %rax
 ; AVX2-NEXT:    vpinsrb $14, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_30: # %else41
 ; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB3_32
 ; AVX2-NEXT:  # %bb.31: # %cond.load43
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vpinsrb $15, (%rax), %xmm3, %xmm3
 ; AVX2-NEXT:  .LBB3_32: # %else44
@@ -995,9 +1009,9 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    kshiftrw $4, %k0, %k1
 ; AVX512-NEXT:    kmovw %k1, %eax
 ; AVX512-NEXT:    testb $1, %al
-; AVX512-NEXT:    vextracti32x4 $2, %zmm4, %xmm5
 ; AVX512-NEXT:    je .LBB3_10
 ; AVX512-NEXT:  # %bb.9: # %cond.load10
+; AVX512-NEXT:    vextracti32x4 $2, %zmm4, %xmm5
 ; AVX512-NEXT:    vmovq %xmm5, %rax
 ; AVX512-NEXT:    vpinsrb $4, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_10: # %else11
@@ -1006,6 +1020,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    testb $1, %al
 ; AVX512-NEXT:    je .LBB3_12
 ; AVX512-NEXT:  # %bb.11: # %cond.load13
+; AVX512-NEXT:    vextracti32x4 $2, %zmm4, %xmm5
 ; AVX512-NEXT:    vpextrq $1, %xmm5, %rax
 ; AVX512-NEXT:    vpinsrb $5, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_12: # %else14
@@ -1017,10 +1032,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    kshiftrw $6, %k0, %k1
 ; AVX512-NEXT:    kmovw %k1, %eax
 ; AVX512-NEXT:    testb $1, %al
-; AVX512-NEXT:    vextracti32x4 $3, %zmm4, %xmm4
 ; AVX512-NEXT:    je .LBB3_14
 ; AVX512-NEXT:  # %bb.13: # %cond.load16
-; AVX512-NEXT:    vmovq %xmm4, %rax
+; AVX512-NEXT:    vextracti32x4 $3, %zmm4, %xmm5
+; AVX512-NEXT:    vmovq %xmm5, %rax
 ; AVX512-NEXT:    vpinsrb $6, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_14: # %else17
 ; AVX512-NEXT:    vpmovsxdq %ymm0, %zmm0
@@ -1029,6 +1044,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    testb $1, %al
 ; AVX512-NEXT:    je .LBB3_16
 ; AVX512-NEXT:  # %bb.15: # %cond.load19
+; AVX512-NEXT:    vextracti32x4 $3, %zmm4, %xmm4
 ; AVX512-NEXT:    vpextrq $1, %xmm4, %rax
 ; AVX512-NEXT:    vpinsrb $7, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_16: # %else20
@@ -1082,9 +1098,9 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    kshiftrw $12, %k0, %k1
 ; AVX512-NEXT:    kmovw %k1, %eax
 ; AVX512-NEXT:    testb $1, %al
-; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; AVX512-NEXT:    je .LBB3_26
 ; AVX512-NEXT:  # %bb.25: # %cond.load34
+; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; AVX512-NEXT:    vmovq %xmm3, %rax
 ; AVX512-NEXT:    vpinsrb $12, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_26: # %else35
@@ -1093,6 +1109,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    testb $1, %al
 ; AVX512-NEXT:    je .LBB3_28
 ; AVX512-NEXT:  # %bb.27: # %cond.load37
+; AVX512-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
 ; AVX512-NEXT:    vpextrq $1, %xmm3, %rax
 ; AVX512-NEXT:    vpinsrb $13, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_28: # %else38
@@ -1103,10 +1120,10 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    kshiftrw $14, %k0, %k1
 ; AVX512-NEXT:    kmovw %k1, %eax
 ; AVX512-NEXT:    testb $1, %al
-; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
 ; AVX512-NEXT:    je .LBB3_30
 ; AVX512-NEXT:  # %bb.29: # %cond.load40
-; AVX512-NEXT:    vmovq %xmm0, %rax
+; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; AVX512-NEXT:    vmovq %xmm1, %rax
 ; AVX512-NEXT:    vpinsrb $14, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_30: # %else41
 ; AVX512-NEXT:    kshiftrw $15, %k0, %k0
@@ -1114,6 +1131,7 @@ define <16 x i8> @gather_v16i8_v16i32_v16i8(i8* %base, <16 x i32> %idx, <16 x i8
 ; AVX512-NEXT:    testb $1, %al
 ; AVX512-NEXT:    je .LBB3_32
 ; AVX512-NEXT:  # %bb.31: # %cond.load43
+; AVX512-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
 ; AVX512-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX512-NEXT:    vpinsrb $15, (%rax), %xmm2, %xmm2
 ; AVX512-NEXT:  .LBB3_32: # %else44
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index 07a4fd96b2f1f..efbb1ef8cc667 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -2151,62 +2151,69 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX1-NEXT:  ## %bb.15: ## %cond.store13
 ; AVX1-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 ; AVX1-NEXT:  LBB14_16: ## %else14
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqw %xmm0, %xmm2, %xmm3
-; AVX1-NEXT:    vpextrb $0, %xmm3, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    je LBB14_18
 ; AVX1-NEXT:  ## %bb.17: ## %cond.store15
-; AVX1-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrw $0, %xmm3, 16(%rdi)
 ; AVX1-NEXT:  LBB14_18: ## %else16
-; AVX1-NEXT:    vpextrb $2, %xmm3, %eax
+; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_20
 ; AVX1-NEXT:  ## %bb.19: ## %cond.store17
-; AVX1-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 ; AVX1-NEXT:  LBB14_20: ## %else18
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_22
 ; AVX1-NEXT:  ## %bb.21: ## %cond.store19
-; AVX1-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
 ; AVX1-NEXT:  LBB14_22: ## %else20
-; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_24
 ; AVX1-NEXT:  ## %bb.23: ## %cond.store21
-; AVX1-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 ; AVX1-NEXT:  LBB14_24: ## %else22
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_26
 ; AVX1-NEXT:  ## %bb.25: ## %cond.store23
-; AVX1-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  LBB14_26: ## %else24
-; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_28
 ; AVX1-NEXT:  ## %bb.27: ## %cond.store25
-; AVX1-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX1-NEXT:  LBB14_28: ## %else26
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_30
 ; AVX1-NEXT:  ## %bb.29: ## %cond.store27
-; AVX1-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX1-NEXT:  LBB14_30: ## %else28
-; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB14_32
 ; AVX1-NEXT:  ## %bb.31: ## %cond.store29
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX1-NEXT:  LBB14_32: ## %else30
 ; AVX1-NEXT:    vzeroupper
@@ -2275,16 +2282,17 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
 ; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX2-NEXT:    je LBB14_18
 ; AVX2-NEXT:  ## %bb.17: ## %cond.store15
-; AVX2-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrw $0, %xmm3, 16(%rdi)
 ; AVX2-NEXT:  LBB14_18: ## %else16
 ; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_20
 ; AVX2-NEXT:  ## %bb.19: ## %cond.store17
-; AVX2-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 ; AVX2-NEXT:  LBB14_20: ## %else18
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2293,13 +2301,15 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_22
 ; AVX2-NEXT:  ## %bb.21: ## %cond.store19
-; AVX2-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
 ; AVX2-NEXT:  LBB14_22: ## %else20
 ; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_24
 ; AVX2-NEXT:  ## %bb.23: ## %cond.store21
-; AVX2-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 ; AVX2-NEXT:  LBB14_24: ## %else22
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2308,13 +2318,15 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_26
 ; AVX2-NEXT:  ## %bb.25: ## %cond.store23
-; AVX2-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 ; AVX2-NEXT:  LBB14_26: ## %else24
 ; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_28
 ; AVX2-NEXT:  ## %bb.27: ## %cond.store25
-; AVX2-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX2-NEXT:  LBB14_28: ## %else26
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2323,13 +2335,15 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_30
 ; AVX2-NEXT:  ## %bb.29: ## %cond.store27
-; AVX2-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX2-NEXT:  LBB14_30: ## %else28
 ; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB14_32
 ; AVX2-NEXT:  ## %bb.31: ## %cond.store29
-; AVX2-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX2-NEXT:  LBB14_32: ## %else30
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -2414,17 +2428,18 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX512F-NEXT:    je LBB14_18
 ; AVX512F-NEXT:  ## %bb.17: ## %cond.store15
-; AVX512F-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $0, %xmm2, 16(%rdi)
 ; AVX512F-NEXT:  LBB14_18: ## %else16
 ; AVX512F-NEXT:    kshiftrw $9, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_20
 ; AVX512F-NEXT:  ## %bb.19: ## %cond.store17
-; AVX512F-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 ; AVX512F-NEXT:  LBB14_20: ## %else18
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2435,14 +2450,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_22
 ; AVX512F-NEXT:  ## %bb.21: ## %cond.store19
-; AVX512F-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
 ; AVX512F-NEXT:  LBB14_22: ## %else20
 ; AVX512F-NEXT:    kshiftrw $11, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_24
 ; AVX512F-NEXT:  ## %bb.23: ## %cond.store21
-; AVX512F-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 ; AVX512F-NEXT:  LBB14_24: ## %else22
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2453,14 +2470,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_26
 ; AVX512F-NEXT:  ## %bb.25: ## %cond.store23
-; AVX512F-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 ; AVX512F-NEXT:  LBB14_26: ## %else24
 ; AVX512F-NEXT:    kshiftrw $13, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_28
 ; AVX512F-NEXT:  ## %bb.27: ## %cond.store25
-; AVX512F-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX512F-NEXT:  LBB14_28: ## %else26
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2471,14 +2490,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_30
 ; AVX512F-NEXT:  ## %bb.29: ## %cond.store27
-; AVX512F-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
 ; AVX512F-NEXT:  LBB14_30: ## %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB14_32
 ; AVX512F-NEXT:  ## %bb.31: ## %cond.store29
-; AVX512F-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX512F-NEXT:  LBB14_32: ## %else30
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
@@ -2563,17 +2584,18 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512VLDQ-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512VLDQ-NEXT:    kmovw %k1, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
-; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX512VLDQ-NEXT:    je LBB14_18
 ; AVX512VLDQ-NEXT:  ## %bb.17: ## %cond.store15
-; AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, 16(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_18: ## %else16
 ; AVX512VLDQ-NEXT:    kshiftrw $9, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_20
 ; AVX512VLDQ-NEXT:  ## %bb.19: ## %cond.store17
-; AVX512VLDQ-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_20: ## %else18
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2584,14 +2606,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_22
 ; AVX512VLDQ-NEXT:  ## %bb.21: ## %cond.store19
-; AVX512VLDQ-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_22: ## %else20
 ; AVX512VLDQ-NEXT:    kshiftrw $11, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_24
 ; AVX512VLDQ-NEXT:  ## %bb.23: ## %cond.store21
-; AVX512VLDQ-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_24: ## %else22
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2602,14 +2626,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_26
 ; AVX512VLDQ-NEXT:  ## %bb.25: ## %cond.store23
-; AVX512VLDQ-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_26: ## %else24
 ; AVX512VLDQ-NEXT:    kshiftrw $13, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_28
 ; AVX512VLDQ-NEXT:  ## %bb.27: ## %cond.store25
-; AVX512VLDQ-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_28: ## %else26
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2620,14 +2646,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_30
 ; AVX512VLDQ-NEXT:  ## %bb.29: ## %cond.store27
-; AVX512VLDQ-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512VLDQ-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_30: ## %else28
 ; AVX512VLDQ-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB14_32
 ; AVX512VLDQ-NEXT:  ## %bb.31: ## %cond.store29
-; AVX512VLDQ-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512VLDQ-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX512VLDQ-NEXT:  LBB14_32: ## %else30
 ; AVX512VLDQ-NEXT:    vzeroupper
 ; AVX512VLDQ-NEXT:    retq
@@ -2652,126 +2680,134 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, <16 x i8>* %addr, <16 x i8> %
 ; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    movd %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_2
 ; SSE2-NEXT:  ## %bb.1: ## %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  LBB15_2: ## %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_4
 ; SSE2-NEXT:  ## %bb.3: ## %cond.store1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  LBB15_4: ## %else2
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    movd %xmm2, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je LBB15_6
 ; SSE2-NEXT:  ## %bb.5: ## %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm1, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  LBB15_6: ## %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_8
 ; SSE2-NEXT:  ## %bb.7: ## %cond.store5
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  LBB15_8: ## %else6
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_10
 ; SSE2-NEXT:  ## %bb.9: ## %cond.store7
-; SSE2-NEXT:    movb %al, 4(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  LBB15_10: ## %else8
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store9
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  LBB15_12: ## %else10
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm1, %eax
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_14
 ; SSE2-NEXT:  ## %bb.13: ## %cond.store11
-; SSE2-NEXT:    movb %al, 6(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  LBB15_14: ## %else12
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store13
+; SSE2-NEXT:    pextrw $3, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  LBB15_16: ## %else14
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm1, %eax
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_18
 ; SSE2-NEXT:  ## %bb.17: ## %cond.store15
-; SSE2-NEXT:    movb %al, 8(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  LBB15_18: ## %else16
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store17
+; SSE2-NEXT:    pextrw $4, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  LBB15_20: ## %else18
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm1, %eax
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_22
 ; SSE2-NEXT:  ## %bb.21: ## %cond.store19
-; SSE2-NEXT:    movb %al, 10(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  LBB15_22: ## %else20
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store21
+; SSE2-NEXT:    pextrw $5, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  LBB15_24: ## %else22
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm1, %eax
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_26
 ; SSE2-NEXT:  ## %bb.25: ## %cond.store23
-; SSE2-NEXT:    movb %al, 12(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  LBB15_26: ## %else24
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store25
+; SSE2-NEXT:    pextrw $6, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  LBB15_28: ## %else26
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm1, %eax
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_30
 ; SSE2-NEXT:  ## %bb.29: ## %cond.store27
-; SSE2-NEXT:    movb %al, 14(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm1, %ecx
+; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  LBB15_30: ## %else28
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB15_32
 ; SSE2-NEXT:  ## %bb.31: ## %cond.store29
+; SSE2-NEXT:    pextrw $7, %xmm1, %eax
 ; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  LBB15_32: ## %else30
 ; SSE2-NEXT:    retq
@@ -3319,250 +3355,266 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; SSE2:       ## %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    movd %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movd %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_2
 ; SSE2-NEXT:  ## %bb.1: ## %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  LBB16_2: ## %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_4
 ; SSE2-NEXT:  ## %bb.3: ## %cond.store1
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  LBB16_4: ## %else2
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    movd %xmm4, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm4, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je LBB16_6
 ; SSE2-NEXT:  ## %bb.5: ## %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  LBB16_6: ## %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_8
 ; SSE2-NEXT:  ## %bb.7: ## %cond.store5
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  LBB16_8: ## %else6
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $2, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    pextrw $2, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_10
 ; SSE2-NEXT:  ## %bb.9: ## %cond.store7
-; SSE2-NEXT:    movb %al, 4(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  LBB16_10: ## %else8
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_12
 ; SSE2-NEXT:  ## %bb.11: ## %cond.store9
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  LBB16_12: ## %else10
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $3, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    pextrw $3, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_14
 ; SSE2-NEXT:  ## %bb.13: ## %cond.store11
-; SSE2-NEXT:    movb %al, 6(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  LBB16_14: ## %else12
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_16
 ; SSE2-NEXT:  ## %bb.15: ## %cond.store13
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  LBB16_16: ## %else14
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $4, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    pextrw $4, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_18
 ; SSE2-NEXT:  ## %bb.17: ## %cond.store15
-; SSE2-NEXT:    movb %al, 8(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  LBB16_18: ## %else16
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_20
 ; SSE2-NEXT:  ## %bb.19: ## %cond.store17
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  LBB16_20: ## %else18
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $5, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    pextrw $5, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_22
 ; SSE2-NEXT:  ## %bb.21: ## %cond.store19
-; SSE2-NEXT:    movb %al, 10(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  LBB16_22: ## %else20
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_24
 ; SSE2-NEXT:  ## %bb.23: ## %cond.store21
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  LBB16_24: ## %else22
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $6, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    pextrw $6, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_26
 ; SSE2-NEXT:  ## %bb.25: ## %cond.store23
-; SSE2-NEXT:    movb %al, 12(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  LBB16_26: ## %else24
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_28
 ; SSE2-NEXT:  ## %bb.27: ## %cond.store25
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  LBB16_28: ## %else26
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm0
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm2, %eax
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_30
 ; SSE2-NEXT:  ## %bb.29: ## %cond.store27
-; SSE2-NEXT:    movb %al, 14(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  LBB16_30: ## %else28
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_32
 ; SSE2-NEXT:  ## %bb.31: ## %cond.store29
+; SSE2-NEXT:    pextrw $7, %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  LBB16_32: ## %else30
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_34
 ; SSE2-NEXT:  ## %bb.33: ## %cond.store31
-; SSE2-NEXT:    movb %al, 16(%rdi)
+; SSE2-NEXT:    movd %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 16(%rdi)
 ; SSE2-NEXT:  LBB16_34: ## %else32
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_36
 ; SSE2-NEXT:  ## %bb.35: ## %cond.store33
+; SSE2-NEXT:    movd %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 17(%rdi)
 ; SSE2-NEXT:  LBB16_36: ## %else34
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je LBB16_38
 ; SSE2-NEXT:  ## %bb.37: ## %cond.store35
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 18(%rdi)
+; SSE2-NEXT:    movd %xmm3, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 18(%rdi)
 ; SSE2-NEXT:  LBB16_38: ## %else36
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_40
 ; SSE2-NEXT:  ## %bb.39: ## %cond.store37
+; SSE2-NEXT:    movd %xmm3, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 19(%rdi)
 ; SSE2-NEXT:  LBB16_40: ## %else38
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm3, %eax
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_42
 ; SSE2-NEXT:  ## %bb.41: ## %cond.store39
-; SSE2-NEXT:    movb %al, 20(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 20(%rdi)
 ; SSE2-NEXT:  LBB16_42: ## %else40
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_44
 ; SSE2-NEXT:  ## %bb.43: ## %cond.store41
+; SSE2-NEXT:    pextrw $2, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 21(%rdi)
 ; SSE2-NEXT:  LBB16_44: ## %else42
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm3, %eax
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_46
 ; SSE2-NEXT:  ## %bb.45: ## %cond.store43
-; SSE2-NEXT:    movb %al, 22(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 22(%rdi)
 ; SSE2-NEXT:  LBB16_46: ## %else44
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_48
 ; SSE2-NEXT:  ## %bb.47: ## %cond.store45
+; SSE2-NEXT:    pextrw $3, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 23(%rdi)
 ; SSE2-NEXT:  LBB16_48: ## %else46
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm3, %eax
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_50
 ; SSE2-NEXT:  ## %bb.49: ## %cond.store47
-; SSE2-NEXT:    movb %al, 24(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 24(%rdi)
 ; SSE2-NEXT:  LBB16_50: ## %else48
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_52
 ; SSE2-NEXT:  ## %bb.51: ## %cond.store49
+; SSE2-NEXT:    pextrw $4, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 25(%rdi)
 ; SSE2-NEXT:  LBB16_52: ## %else50
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm3, %eax
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_54
 ; SSE2-NEXT:  ## %bb.53: ## %cond.store51
-; SSE2-NEXT:    movb %al, 26(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 26(%rdi)
 ; SSE2-NEXT:  LBB16_54: ## %else52
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_56
 ; SSE2-NEXT:  ## %bb.55: ## %cond.store53
+; SSE2-NEXT:    pextrw $5, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 27(%rdi)
 ; SSE2-NEXT:  LBB16_56: ## %else54
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm3, %eax
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_58
 ; SSE2-NEXT:  ## %bb.57: ## %cond.store55
-; SSE2-NEXT:    movb %al, 28(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 28(%rdi)
 ; SSE2-NEXT:  LBB16_58: ## %else56
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_60
 ; SSE2-NEXT:  ## %bb.59: ## %cond.store57
+; SSE2-NEXT:    pextrw $6, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 29(%rdi)
 ; SSE2-NEXT:  LBB16_60: ## %else58
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT:    pextrw $7, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm3, %eax
+; SSE2-NEXT:    pextrw $7, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_62
 ; SSE2-NEXT:  ## %bb.61: ## %cond.store59
-; SSE2-NEXT:    movb %al, 30(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm3, %ecx
+; SSE2-NEXT:    movb %cl, 30(%rdi)
 ; SSE2-NEXT:  LBB16_62: ## %else60
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je LBB16_64
 ; SSE2-NEXT:  ## %bb.63: ## %cond.store61
+; SSE2-NEXT:    pextrw $7, %xmm3, %eax
 ; SSE2-NEXT:    movb %ah, 31(%rdi)
 ; SSE2-NEXT:  LBB16_64: ## %else62
 ; SSE2-NEXT:    retq
@@ -3917,118 +3969,133 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX1-NEXT:  ## %bb.31: ## %cond.store29
 ; AVX1-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
 ; AVX1-NEXT:  LBB16_32: ## %else30
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqb %xmm0, %xmm2, %xmm3
-; AVX1-NEXT:    vpextrb $0, %xmm3, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    je LBB16_34
 ; AVX1-NEXT:  ## %bb.33: ## %cond.store31
-; AVX1-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX1-NEXT:  LBB16_34: ## %else32
-; AVX1-NEXT:    vpextrb $1, %xmm3, %eax
+; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_36
 ; AVX1-NEXT:  ## %bb.35: ## %cond.store33
-; AVX1-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX1-NEXT:  LBB16_36: ## %else34
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $2, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_38
 ; AVX1-NEXT:  ## %bb.37: ## %cond.store35
-; AVX1-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX1-NEXT:  LBB16_38: ## %else36
-; AVX1-NEXT:    vpextrb $3, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_40
 ; AVX1-NEXT:  ## %bb.39: ## %cond.store37
-; AVX1-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX1-NEXT:  LBB16_40: ## %else38
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_42
 ; AVX1-NEXT:  ## %bb.41: ## %cond.store39
-; AVX1-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX1-NEXT:  LBB16_42: ## %else40
-; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_44
 ; AVX1-NEXT:  ## %bb.43: ## %cond.store41
-; AVX1-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX1-NEXT:  LBB16_44: ## %else42
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_46
 ; AVX1-NEXT:  ## %bb.45: ## %cond.store43
-; AVX1-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX1-NEXT:  LBB16_46: ## %else44
-; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_48
 ; AVX1-NEXT:  ## %bb.47: ## %cond.store45
-; AVX1-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX1-NEXT:  LBB16_48: ## %else46
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_50
 ; AVX1-NEXT:  ## %bb.49: ## %cond.store47
-; AVX1-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  LBB16_50: ## %else48
-; AVX1-NEXT:    vpextrb $9, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_52
 ; AVX1-NEXT:  ## %bb.51: ## %cond.store49
-; AVX1-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX1-NEXT:  LBB16_52: ## %else50
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_54
 ; AVX1-NEXT:  ## %bb.53: ## %cond.store51
-; AVX1-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX1-NEXT:  LBB16_54: ## %else52
-; AVX1-NEXT:    vpextrb $11, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_56
 ; AVX1-NEXT:  ## %bb.55: ## %cond.store53
-; AVX1-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX1-NEXT:  LBB16_56: ## %else54
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_58
 ; AVX1-NEXT:  ## %bb.57: ## %cond.store55
-; AVX1-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX1-NEXT:  LBB16_58: ## %else56
-; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_60
 ; AVX1-NEXT:  ## %bb.59: ## %cond.store57
-; AVX1-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX1-NEXT:  LBB16_60: ## %else58
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_62
 ; AVX1-NEXT:  ## %bb.61: ## %cond.store59
-; AVX1-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX1-NEXT:  LBB16_62: ## %else60
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
+; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je LBB16_64
 ; AVX1-NEXT:  ## %bb.63: ## %cond.store61
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
 ; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX1-NEXT:  LBB16_64: ## %else62
 ; AVX1-NEXT:    vzeroupper
@@ -4153,16 +4220,17 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm2
 ; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX2-NEXT:    je LBB16_34
 ; AVX2-NEXT:  ## %bb.33: ## %cond.store31
-; AVX2-NEXT:    vpextrb $0, %xmm1, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX2-NEXT:  LBB16_34: ## %else32
 ; AVX2-NEXT:    vpextrb $1, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_36
 ; AVX2-NEXT:  ## %bb.35: ## %cond.store33
-; AVX2-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX2-NEXT:  LBB16_36: ## %else34
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4171,13 +4239,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_38
 ; AVX2-NEXT:  ## %bb.37: ## %cond.store35
-; AVX2-NEXT:    vpextrb $2, %xmm1, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX2-NEXT:  LBB16_38: ## %else36
 ; AVX2-NEXT:    vpextrb $3, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_40
 ; AVX2-NEXT:  ## %bb.39: ## %cond.store37
-; AVX2-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX2-NEXT:  LBB16_40: ## %else38
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4186,13 +4256,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_42
 ; AVX2-NEXT:  ## %bb.41: ## %cond.store39
-; AVX2-NEXT:    vpextrb $4, %xmm1, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX2-NEXT:  LBB16_42: ## %else40
 ; AVX2-NEXT:    vpextrb $5, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_44
 ; AVX2-NEXT:  ## %bb.43: ## %cond.store41
-; AVX2-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX2-NEXT:  LBB16_44: ## %else42
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4201,13 +4273,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_46
 ; AVX2-NEXT:  ## %bb.45: ## %cond.store43
-; AVX2-NEXT:    vpextrb $6, %xmm1, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX2-NEXT:  LBB16_46: ## %else44
 ; AVX2-NEXT:    vpextrb $7, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_48
 ; AVX2-NEXT:  ## %bb.47: ## %cond.store45
-; AVX2-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX2-NEXT:  LBB16_48: ## %else46
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4216,13 +4290,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_50
 ; AVX2-NEXT:  ## %bb.49: ## %cond.store47
-; AVX2-NEXT:    vpextrb $8, %xmm1, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX2-NEXT:  LBB16_50: ## %else48
 ; AVX2-NEXT:    vpextrb $9, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_52
 ; AVX2-NEXT:  ## %bb.51: ## %cond.store49
-; AVX2-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX2-NEXT:  LBB16_52: ## %else50
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4231,13 +4307,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_54
 ; AVX2-NEXT:  ## %bb.53: ## %cond.store51
-; AVX2-NEXT:    vpextrb $10, %xmm1, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX2-NEXT:  LBB16_54: ## %else52
 ; AVX2-NEXT:    vpextrb $11, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_56
 ; AVX2-NEXT:  ## %bb.55: ## %cond.store53
-; AVX2-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX2-NEXT:  LBB16_56: ## %else54
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4246,13 +4324,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_58
 ; AVX2-NEXT:  ## %bb.57: ## %cond.store55
-; AVX2-NEXT:    vpextrb $12, %xmm1, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX2-NEXT:  LBB16_58: ## %else56
 ; AVX2-NEXT:    vpextrb $13, %xmm2, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_60
 ; AVX2-NEXT:  ## %bb.59: ## %cond.store57
-; AVX2-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX2-NEXT:  LBB16_60: ## %else58
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -4261,13 +4341,15 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_62
 ; AVX2-NEXT:  ## %bb.61: ## %cond.store59
-; AVX2-NEXT:    vpextrb $14, %xmm1, 30(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX2-NEXT:  LBB16_62: ## %else60
 ; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je LBB16_64
 ; AVX2-NEXT:  ## %bb.63: ## %cond.store61
-; AVX2-NEXT:    vpextrb $15, %xmm1, 31(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX2-NEXT:  LBB16_64: ## %else62
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -4440,17 +4522,18 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX512F-NEXT:    je LBB16_34
 ; AVX512F-NEXT:  ## %bb.33: ## %cond.store31
-; AVX512F-NEXT:    vpextrb $0, %xmm1, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
 ; AVX512F-NEXT:  LBB16_34: ## %else32
 ; AVX512F-NEXT:    kshiftrw $1, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_36
 ; AVX512F-NEXT:  ## %bb.35: ## %cond.store33
-; AVX512F-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX512F-NEXT:  LBB16_36: ## %else34
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4462,14 +4545,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_38
 ; AVX512F-NEXT:  ## %bb.37: ## %cond.store35
-; AVX512F-NEXT:    vpextrb $2, %xmm1, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
 ; AVX512F-NEXT:  LBB16_38: ## %else36
 ; AVX512F-NEXT:    kshiftrw $3, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_40
 ; AVX512F-NEXT:  ## %bb.39: ## %cond.store37
-; AVX512F-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX512F-NEXT:  LBB16_40: ## %else38
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4481,14 +4566,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_42
 ; AVX512F-NEXT:  ## %bb.41: ## %cond.store39
-; AVX512F-NEXT:    vpextrb $4, %xmm1, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
 ; AVX512F-NEXT:  LBB16_42: ## %else40
 ; AVX512F-NEXT:    kshiftrw $5, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_44
 ; AVX512F-NEXT:  ## %bb.43: ## %cond.store41
-; AVX512F-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX512F-NEXT:  LBB16_44: ## %else42
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4500,14 +4587,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_46
 ; AVX512F-NEXT:  ## %bb.45: ## %cond.store43
-; AVX512F-NEXT:    vpextrb $6, %xmm1, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
 ; AVX512F-NEXT:  LBB16_46: ## %else44
 ; AVX512F-NEXT:    kshiftrw $7, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_48
 ; AVX512F-NEXT:  ## %bb.47: ## %cond.store45
-; AVX512F-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX512F-NEXT:  LBB16_48: ## %else46
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4519,14 +4608,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_50
 ; AVX512F-NEXT:  ## %bb.49: ## %cond.store47
-; AVX512F-NEXT:    vpextrb $8, %xmm1, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
 ; AVX512F-NEXT:  LBB16_50: ## %else48
 ; AVX512F-NEXT:    kshiftrw $9, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_52
 ; AVX512F-NEXT:  ## %bb.51: ## %cond.store49
-; AVX512F-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX512F-NEXT:  LBB16_52: ## %else50
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4538,14 +4629,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_54
 ; AVX512F-NEXT:  ## %bb.53: ## %cond.store51
-; AVX512F-NEXT:    vpextrb $10, %xmm1, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
 ; AVX512F-NEXT:  LBB16_54: ## %else52
 ; AVX512F-NEXT:    kshiftrw $11, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_56
 ; AVX512F-NEXT:  ## %bb.55: ## %cond.store53
-; AVX512F-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX512F-NEXT:  LBB16_56: ## %else54
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4557,14 +4650,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_58
 ; AVX512F-NEXT:  ## %bb.57: ## %cond.store55
-; AVX512F-NEXT:    vpextrb $12, %xmm1, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
 ; AVX512F-NEXT:  LBB16_58: ## %else56
 ; AVX512F-NEXT:    kshiftrw $13, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_60
 ; AVX512F-NEXT:  ## %bb.59: ## %cond.store57
-; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512F-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX512F-NEXT:  LBB16_60: ## %else58
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -4576,14 +4671,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_62
 ; AVX512F-NEXT:  ## %bb.61: ## %cond.store59
-; AVX512F-NEXT:    vpextrb $14, %xmm1, 30(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
 ; AVX512F-NEXT:  LBB16_62: ## %else60
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je LBB16_64
 ; AVX512F-NEXT:  ## %bb.63: ## %cond.store61
-; AVX512F-NEXT:    vpextrb $15, %xmm1, 31(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX512F-NEXT:  LBB16_64: ## %else62
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
@@ -4756,17 +4853,18 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    vpmovd2m %zmm2, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
-; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
 ; AVX512VLDQ-NEXT:    je LBB16_34
 ; AVX512VLDQ-NEXT:  ## %bb.33: ## %cond.store31
-; AVX512VLDQ-NEXT:    vpextrb $0, %xmm1, 16(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_34: ## %else32
 ; AVX512VLDQ-NEXT:    kshiftrw $1, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_36
 ; AVX512VLDQ-NEXT:  ## %bb.35: ## %cond.store33
-; AVX512VLDQ-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_36: ## %else34
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4778,14 +4876,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_38
 ; AVX512VLDQ-NEXT:  ## %bb.37: ## %cond.store35
-; AVX512VLDQ-NEXT:    vpextrb $2, %xmm1, 18(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_38: ## %else36
 ; AVX512VLDQ-NEXT:    kshiftrw $3, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_40
 ; AVX512VLDQ-NEXT:  ## %bb.39: ## %cond.store37
-; AVX512VLDQ-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_40: ## %else38
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4797,14 +4897,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_42
 ; AVX512VLDQ-NEXT:  ## %bb.41: ## %cond.store39
-; AVX512VLDQ-NEXT:    vpextrb $4, %xmm1, 20(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_42: ## %else40
 ; AVX512VLDQ-NEXT:    kshiftrw $5, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_44
 ; AVX512VLDQ-NEXT:  ## %bb.43: ## %cond.store41
-; AVX512VLDQ-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_44: ## %else42
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4816,14 +4918,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_46
 ; AVX512VLDQ-NEXT:  ## %bb.45: ## %cond.store43
-; AVX512VLDQ-NEXT:    vpextrb $6, %xmm1, 22(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_46: ## %else44
 ; AVX512VLDQ-NEXT:    kshiftrw $7, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_48
 ; AVX512VLDQ-NEXT:  ## %bb.47: ## %cond.store45
-; AVX512VLDQ-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_48: ## %else46
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4835,14 +4939,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_50
 ; AVX512VLDQ-NEXT:  ## %bb.49: ## %cond.store47
-; AVX512VLDQ-NEXT:    vpextrb $8, %xmm1, 24(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_50: ## %else48
 ; AVX512VLDQ-NEXT:    kshiftrw $9, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_52
 ; AVX512VLDQ-NEXT:  ## %bb.51: ## %cond.store49
-; AVX512VLDQ-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_52: ## %else50
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4854,14 +4960,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_54
 ; AVX512VLDQ-NEXT:  ## %bb.53: ## %cond.store51
-; AVX512VLDQ-NEXT:    vpextrb $10, %xmm1, 26(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_54: ## %else52
 ; AVX512VLDQ-NEXT:    kshiftrw $11, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_56
 ; AVX512VLDQ-NEXT:  ## %bb.55: ## %cond.store53
-; AVX512VLDQ-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_56: ## %else54
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4873,14 +4981,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_58
 ; AVX512VLDQ-NEXT:  ## %bb.57: ## %cond.store55
-; AVX512VLDQ-NEXT:    vpextrb $12, %xmm1, 28(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_58: ## %else56
 ; AVX512VLDQ-NEXT:    kshiftrw $13, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_60
 ; AVX512VLDQ-NEXT:  ## %bb.59: ## %cond.store57
-; AVX512VLDQ-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512VLDQ-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_60: ## %else58
 ; AVX512VLDQ-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX512VLDQ-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -4892,14 +5002,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_62
 ; AVX512VLDQ-NEXT:  ## %bb.61: ## %cond.store59
-; AVX512VLDQ-NEXT:    vpextrb $14, %xmm1, 30(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512VLDQ-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_62: ## %else60
 ; AVX512VLDQ-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512VLDQ-NEXT:    kmovw %k0, %eax
 ; AVX512VLDQ-NEXT:    testb $1, %al
 ; AVX512VLDQ-NEXT:    je LBB16_64
 ; AVX512VLDQ-NEXT:  ## %bb.63: ## %cond.store61
-; AVX512VLDQ-NEXT:    vpextrb $15, %xmm1, 31(%rdi)
+; AVX512VLDQ-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX512VLDQ-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX512VLDQ-NEXT:  LBB16_64: ## %else62
 ; AVX512VLDQ-NEXT:    vzeroupper
 ; AVX512VLDQ-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll
index 5b3dc6a2e7585..b9a3dc2b29882 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll
@@ -663,17 +663,18 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
 ; SSE2-NEXT:    movd %xmm7, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB2_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB2_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm6
-; SSE2-NEXT:    movd %xmm6, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm6, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB2_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB2_4: # %else2
@@ -2297,10 +2298,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB9_18
 ; AVX1-NEXT:  # %bb.17: # %cond.store15
-; AVX1-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX1-NEXT:  .LBB9_18: # %else16
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -2310,7 +2311,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_20
 ; AVX1-NEXT:  # %bb.19: # %cond.store17
-; AVX1-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX1-NEXT:  .LBB9_20: # %else18
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm4
@@ -2322,7 +2324,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_22
 ; AVX1-NEXT:  # %bb.21: # %cond.store19
-; AVX1-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX1-NEXT:  .LBB9_22: # %else20
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -2332,7 +2335,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_24
 ; AVX1-NEXT:  # %bb.23: # %cond.store21
-; AVX1-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX1-NEXT:  .LBB9_24: # %else22
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -2345,7 +2349,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_26
 ; AVX1-NEXT:  # %bb.25: # %cond.store23
-; AVX1-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB9_26: # %else24
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
@@ -2353,7 +2358,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_28
 ; AVX1-NEXT:  # %bb.27: # %cond.store25
-; AVX1-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX1-NEXT:  .LBB9_28: # %else26
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
@@ -2365,7 +2371,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_30
 ; AVX1-NEXT:  # %bb.29: # %cond.store27
-; AVX1-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX1-NEXT:  .LBB9_30: # %else28
 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -2373,6 +2380,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_32
 ; AVX1-NEXT:  # %bb.31: # %cond.store29
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX1-NEXT:  .LBB9_32: # %else30
 ; AVX1-NEXT:    vzeroupper
@@ -2478,10 +2486,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB9_18
 ; AVX2-NEXT:  # %bb.17: # %cond.store15
-; AVX2-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX2-NEXT:  .LBB9_18: # %else16
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -2490,7 +2498,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_20
 ; AVX2-NEXT:  # %bb.19: # %cond.store17
-; AVX2-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX2-NEXT:  .LBB9_20: # %else18
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -2502,7 +2511,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_22
 ; AVX2-NEXT:  # %bb.21: # %cond.store19
-; AVX2-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX2-NEXT:  .LBB9_22: # %else20
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -2511,7 +2521,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_24
 ; AVX2-NEXT:  # %bb.23: # %cond.store21
-; AVX2-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX2-NEXT:  .LBB9_24: # %else22
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -2524,7 +2535,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_26
 ; AVX2-NEXT:  # %bb.25: # %cond.store23
-; AVX2-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 ; AVX2-NEXT:  .LBB9_26: # %else24
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -2532,7 +2544,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_28
 ; AVX2-NEXT:  # %bb.27: # %cond.store25
-; AVX2-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX2-NEXT:  .LBB9_28: # %else26
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -2545,7 +2558,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_30
 ; AVX2-NEXT:  # %bb.29: # %cond.store27
-; AVX2-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX2-NEXT:  .LBB9_30: # %else28
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -2553,6 +2567,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_32
 ; AVX2-NEXT:  # %bb.31: # %cond.store29
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX2-NEXT:  .LBB9_32: # %else30
 ; AVX2-NEXT:    vzeroupper
@@ -2620,58 +2635,65 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB9_18
 ; AVX512F-NEXT:  # %bb.17: # %cond.store15
-; AVX512F-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
 ; AVX512F-NEXT:  .LBB9_18: # %else16
 ; AVX512F-NEXT:    kshiftrw $9, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_20
 ; AVX512F-NEXT:  # %bb.19: # %cond.store17
-; AVX512F-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX512F-NEXT:  .LBB9_20: # %else18
 ; AVX512F-NEXT:    kshiftrw $10, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_22
 ; AVX512F-NEXT:  # %bb.21: # %cond.store19
-; AVX512F-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
 ; AVX512F-NEXT:  .LBB9_22: # %else20
 ; AVX512F-NEXT:    kshiftrw $11, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_24
 ; AVX512F-NEXT:  # %bb.23: # %cond.store21
-; AVX512F-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX512F-NEXT:  .LBB9_24: # %else22
 ; AVX512F-NEXT:    kshiftrw $12, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_26
 ; AVX512F-NEXT:  # %bb.25: # %cond.store23
-; AVX512F-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
 ; AVX512F-NEXT:  .LBB9_26: # %else24
 ; AVX512F-NEXT:    kshiftrw $13, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_28
 ; AVX512F-NEXT:  # %bb.27: # %cond.store25
-; AVX512F-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX512F-NEXT:  .LBB9_28: # %else26
 ; AVX512F-NEXT:    kshiftrw $14, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_30
 ; AVX512F-NEXT:  # %bb.29: # %cond.store27
-; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
 ; AVX512F-NEXT:  .LBB9_30: # %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_32
 ; AVX512F-NEXT:  # %bb.31: # %cond.store29
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX512F-NEXT:  .LBB9_32: # %else30
 ; AVX512F-NEXT:    vzeroupper
@@ -2712,36 +2734,38 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm8, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB10_2: # %else
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm8
-; SSE2-NEXT:    pextrw $2, %xmm8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB10_4: # %else2
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
-; SSE2-NEXT:    pextrw $4, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $4, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    movb %cl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    movb %al, 2(%rdi)
 ; SSE2-NEXT:  .LBB10_6: # %else4
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm4
-; SSE2-NEXT:    pextrw $6, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB10_8: # %else6
@@ -2750,17 +2774,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 4(%rdi)
 ; SSE2-NEXT:  .LBB10_10: # %else8
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -2768,17 +2793,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm5, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 6(%rdi)
 ; SSE2-NEXT:  .LBB10_14: # %else12
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm5
-; SSE2-NEXT:    pextrw $6, %xmm5, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm5, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -2786,17 +2812,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 8(%rdi)
 ; SSE2-NEXT:  .LBB10_18: # %else16
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -2804,17 +2831,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 10(%rdi)
 ; SSE2-NEXT:  .LBB10_22: # %else20
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm6
-; SSE2-NEXT:    pextrw $6, %xmm6, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm6, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -2822,17 +2850,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 12(%rdi)
 ; SSE2-NEXT:  .LBB10_26: # %else24
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -2840,17 +2869,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm7, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 14(%rdi)
 ; SSE2-NEXT:  .LBB10_30: # %else28
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm0, %xmm7
-; SSE2-NEXT:    pextrw $6, %xmm7, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm1, %xmm7
+; SSE2-NEXT:    pextrw $6, %xmm7, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB10_32: # %else30
 ; SSE2-NEXT:    retq
@@ -3920,17 +3950,18 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    packssdw %xmm1, %xmm0
 ; SSE2-NEXT:    movd %xmm5, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB12_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB12_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm4
-; SSE2-NEXT:    movd %xmm4, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm4, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB12_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB12_4: # %else2
@@ -4669,40 +4700,42 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    pand %xmm6, %xmm1
 ; SSE2-NEXT:    pand %xmm6, %xmm0
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm7, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm7, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB15_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB15_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB15_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB15_8: # %else6
@@ -4712,9 +4745,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB15_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4722,7 +4755,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -4730,9 +4764,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB15_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4740,7 +4774,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -4748,9 +4783,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB15_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4758,7 +4793,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -4766,9 +4802,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB15_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4776,7 +4812,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -4784,9 +4821,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB15_26: # %else24
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4794,65 +4831,69 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
-; SSE2-NEXT:    pand %xmm6, %xmm3
-; SSE2-NEXT:    pand %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm4
 ; SSE2-NEXT:    pextrw $7, %xmm4, %eax
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB15_30: # %else28
-; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm6, %xmm3
+; SSE2-NEXT:    pand %xmm6, %xmm2
 ; SSE2-NEXT:    shrl $8, %eax
 ; SSE2-NEXT:    notb %al
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_34
 ; SSE2-NEXT:  # %bb.33: # %cond.store31
-; SSE2-NEXT:    movb %al, 16(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 16(%rdi)
 ; SSE2-NEXT:  .LBB15_34: # %else32
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_36
 ; SSE2-NEXT:  # %bb.35: # %cond.store33
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 17(%rdi)
 ; SSE2-NEXT:  .LBB15_36: # %else34
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_38
 ; SSE2-NEXT:  # %bb.37: # %cond.store35
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 18(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 18(%rdi)
 ; SSE2-NEXT:  .LBB15_38: # %else36
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_40
 ; SSE2-NEXT:  # %bb.39: # %cond.store37
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 19(%rdi)
 ; SSE2-NEXT:  .LBB15_40: # %else38
@@ -4862,9 +4903,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_42
 ; SSE2-NEXT:  # %bb.41: # %cond.store39
+; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 20(%rdi)
 ; SSE2-NEXT:  .LBB15_42: # %else40
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4872,7 +4913,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -4880,9 +4922,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_46
 ; SSE2-NEXT:  # %bb.45: # %cond.store43
+; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 22(%rdi)
 ; SSE2-NEXT:  .LBB15_46: # %else44
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4890,7 +4932,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -4898,9 +4941,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_50
 ; SSE2-NEXT:  # %bb.49: # %cond.store47
+; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 24(%rdi)
 ; SSE2-NEXT:  .LBB15_50: # %else48
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4908,7 +4951,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -4916,9 +4960,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_54
 ; SSE2-NEXT:  # %bb.53: # %cond.store51
+; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 26(%rdi)
 ; SSE2-NEXT:  .LBB15_54: # %else52
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4926,7 +4970,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -4934,9 +4979,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_58
 ; SSE2-NEXT:  # %bb.57: # %cond.store55
+; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 28(%rdi)
 ; SSE2-NEXT:  .LBB15_58: # %else56
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4944,7 +4989,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm5
@@ -4952,9 +4998,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_62
 ; SSE2-NEXT:  # %bb.61: # %cond.store59
+; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 30(%rdi)
 ; SSE2-NEXT:  .LBB15_62: # %else60
 ; SSE2-NEXT:    shrl $8, %eax
@@ -4962,7 +5008,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  # %bb.63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 31(%rdi)
 ; SSE2-NEXT:  .LBB15_64: # %else62
 ; SSE2-NEXT:    retq
 ;
@@ -5394,16 +5441,17 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB15_34
 ; AVX1-NEXT:  # %bb.33: # %cond.store31
-; AVX1-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX1-NEXT:  .LBB15_34: # %else32
 ; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_36
 ; AVX1-NEXT:  # %bb.35: # %cond.store33
-; AVX1-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX1-NEXT:  .LBB15_36: # %else34
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5413,13 +5461,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_38
 ; AVX1-NEXT:  # %bb.37: # %cond.store35
-; AVX1-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX1-NEXT:  .LBB15_38: # %else36
 ; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_40
 ; AVX1-NEXT:  # %bb.39: # %cond.store37
-; AVX1-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX1-NEXT:  .LBB15_40: # %else38
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5429,13 +5479,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_42
 ; AVX1-NEXT:  # %bb.41: # %cond.store39
-; AVX1-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX1-NEXT:  .LBB15_42: # %else40
 ; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_44
 ; AVX1-NEXT:  # %bb.43: # %cond.store41
-; AVX1-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX1-NEXT:  .LBB15_44: # %else42
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5445,13 +5497,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_46
 ; AVX1-NEXT:  # %bb.45: # %cond.store43
-; AVX1-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX1-NEXT:  .LBB15_46: # %else44
 ; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_48
 ; AVX1-NEXT:  # %bb.47: # %cond.store45
-; AVX1-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX1-NEXT:  .LBB15_48: # %else46
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5461,13 +5515,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_50
 ; AVX1-NEXT:  # %bb.49: # %cond.store47
-; AVX1-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB15_50: # %else48
 ; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_52
 ; AVX1-NEXT:  # %bb.51: # %cond.store49
-; AVX1-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX1-NEXT:  .LBB15_52: # %else50
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5477,13 +5533,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_54
 ; AVX1-NEXT:  # %bb.53: # %cond.store51
-; AVX1-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX1-NEXT:  .LBB15_54: # %else52
 ; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_56
 ; AVX1-NEXT:  # %bb.55: # %cond.store53
-; AVX1-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX1-NEXT:  .LBB15_56: # %else54
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -5493,13 +5551,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_58
 ; AVX1-NEXT:  # %bb.57: # %cond.store55
-; AVX1-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX1-NEXT:  .LBB15_58: # %else56
 ; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_60
 ; AVX1-NEXT:  # %bb.59: # %cond.store57
-; AVX1-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX1-NEXT:  .LBB15_60: # %else58
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
@@ -5509,12 +5569,14 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_62
 ; AVX1-NEXT:  # %bb.61: # %cond.store59
-; AVX1-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX1-NEXT:  .LBB15_62: # %else60
 ; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_64
 ; AVX1-NEXT:  # %bb.63: # %cond.store61
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX1-NEXT:  .LBB15_64: # %else62
 ; AVX1-NEXT:    vzeroupper
@@ -5663,17 +5725,18 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    vpextrb $0, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB15_34
 ; AVX2-NEXT:  # %bb.33: # %cond.store31
-; AVX2-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX2-NEXT:  .LBB15_34: # %else32
 ; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_36
 ; AVX2-NEXT:  # %bb.35: # %cond.store33
-; AVX2-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX2-NEXT:  .LBB15_36: # %else34
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5683,14 +5746,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_38
 ; AVX2-NEXT:  # %bb.37: # %cond.store35
-; AVX2-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX2-NEXT:  .LBB15_38: # %else36
 ; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_40
 ; AVX2-NEXT:  # %bb.39: # %cond.store37
-; AVX2-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX2-NEXT:  .LBB15_40: # %else38
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5700,14 +5765,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_42
 ; AVX2-NEXT:  # %bb.41: # %cond.store39
-; AVX2-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX2-NEXT:  .LBB15_42: # %else40
 ; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_44
 ; AVX2-NEXT:  # %bb.43: # %cond.store41
-; AVX2-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX2-NEXT:  .LBB15_44: # %else42
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5717,14 +5784,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_46
 ; AVX2-NEXT:  # %bb.45: # %cond.store43
-; AVX2-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX2-NEXT:  .LBB15_46: # %else44
 ; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_48
 ; AVX2-NEXT:  # %bb.47: # %cond.store45
-; AVX2-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX2-NEXT:  .LBB15_48: # %else46
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5734,14 +5803,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_50
 ; AVX2-NEXT:  # %bb.49: # %cond.store47
-; AVX2-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX2-NEXT:  .LBB15_50: # %else48
 ; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_52
 ; AVX2-NEXT:  # %bb.51: # %cond.store49
-; AVX2-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX2-NEXT:  .LBB15_52: # %else50
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5751,14 +5822,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_54
 ; AVX2-NEXT:  # %bb.53: # %cond.store51
-; AVX2-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX2-NEXT:  .LBB15_54: # %else52
 ; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_56
 ; AVX2-NEXT:  # %bb.55: # %cond.store53
-; AVX2-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX2-NEXT:  .LBB15_56: # %else54
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5768,14 +5841,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_58
 ; AVX2-NEXT:  # %bb.57: # %cond.store55
-; AVX2-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX2-NEXT:  .LBB15_58: # %else56
 ; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_60
 ; AVX2-NEXT:  # %bb.59: # %cond.store57
-; AVX2-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX2-NEXT:  .LBB15_60: # %else58
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -5785,13 +5860,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_62
 ; AVX2-NEXT:  # %bb.61: # %cond.store59
-; AVX2-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX2-NEXT:  .LBB15_62: # %else60
 ; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_64
 ; AVX2-NEXT:  # %bb.63: # %cond.store61
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX2-NEXT:  .LBB15_64: # %else62
 ; AVX2-NEXT:    vzeroupper
@@ -5996,10 +6073,10 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB15_34
 ; AVX512F-NEXT:  # %bb.33: # %cond.store31
-; AVX512F-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX512F-NEXT:  .LBB15_34: # %else32
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6009,7 +6086,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_36
 ; AVX512F-NEXT:  # %bb.35: # %cond.store33
-; AVX512F-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX512F-NEXT:  .LBB15_36: # %else34
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6023,7 +6101,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_38
 ; AVX512F-NEXT:  # %bb.37: # %cond.store35
-; AVX512F-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX512F-NEXT:  .LBB15_38: # %else36
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6033,7 +6112,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_40
 ; AVX512F-NEXT:  # %bb.39: # %cond.store37
-; AVX512F-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX512F-NEXT:  .LBB15_40: # %else38
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6047,7 +6127,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_42
 ; AVX512F-NEXT:  # %bb.41: # %cond.store39
-; AVX512F-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX512F-NEXT:  .LBB15_42: # %else40
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6057,7 +6138,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_44
 ; AVX512F-NEXT:  # %bb.43: # %cond.store41
-; AVX512F-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX512F-NEXT:  .LBB15_44: # %else42
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6071,7 +6153,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_46
 ; AVX512F-NEXT:  # %bb.45: # %cond.store43
-; AVX512F-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX512F-NEXT:  .LBB15_46: # %else44
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6081,7 +6164,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_48
 ; AVX512F-NEXT:  # %bb.47: # %cond.store45
-; AVX512F-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX512F-NEXT:  .LBB15_48: # %else46
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6095,7 +6179,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_50
 ; AVX512F-NEXT:  # %bb.49: # %cond.store47
-; AVX512F-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX512F-NEXT:  .LBB15_50: # %else48
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6105,7 +6190,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_52
 ; AVX512F-NEXT:  # %bb.51: # %cond.store49
-; AVX512F-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX512F-NEXT:  .LBB15_52: # %else50
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6119,7 +6205,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_54
 ; AVX512F-NEXT:  # %bb.53: # %cond.store51
-; AVX512F-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX512F-NEXT:  .LBB15_54: # %else52
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6129,7 +6216,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_56
 ; AVX512F-NEXT:  # %bb.55: # %cond.store53
-; AVX512F-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX512F-NEXT:  .LBB15_56: # %else54
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6143,7 +6231,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_58
 ; AVX512F-NEXT:  # %bb.57: # %cond.store55
-; AVX512F-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX512F-NEXT:  .LBB15_58: # %else56
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6153,7 +6242,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_60
 ; AVX512F-NEXT:  # %bb.59: # %cond.store57
-; AVX512F-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX512F-NEXT:  .LBB15_60: # %else58
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6167,7 +6257,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_62
 ; AVX512F-NEXT:  # %bb.61: # %cond.store59
-; AVX512F-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX512F-NEXT:  .LBB15_62: # %else60
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6177,6 +6268,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_64
 ; AVX512F-NEXT:  # %bb.63: # %cond.store61
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX512F-NEXT:  .LBB15_64: # %else62
 ; AVX512F-NEXT:    vzeroupper
@@ -6211,40 +6303,42 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    pand %xmm4, %xmm1
 ; SSE2-NEXT:    pand %xmm4, %xmm0
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB16_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB16_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB16_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB16_8: # %else6
@@ -6254,9 +6348,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB16_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6264,7 +6358,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -6272,9 +6367,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB16_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6282,7 +6377,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -6290,9 +6386,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB16_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6300,7 +6396,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -6308,9 +6405,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB16_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6318,7 +6415,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -6326,9 +6424,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB16_26: # %else24
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6336,7 +6434,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
@@ -6344,9 +6443,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB16_30: # %else28
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6354,7 +6453,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB16_32: # %else30
 ; SSE2-NEXT:    retq
 ;
@@ -6986,17 +7086,18 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
 ; SSE2-NEXT:    pcmpeqw %xmm1, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
 ; SSE2-NEXT:    pxor %xmm2, %xmm3
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB17_2: # %else
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB17_4: # %else2
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index 50c2d619e0bb5..057ccd553cc30 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -1112,17 +1112,18 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    packssdw %xmm1, %xmm0
 ; SSE2-NEXT:    movd %xmm8, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB2_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB2_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm9
-; SSE2-NEXT:    movd %xmm9, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm9, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB2_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB2_4: # %else2
@@ -3378,10 +3379,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB9_18
 ; AVX1-NEXT:  # %bb.17: # %cond.store15
-; AVX1-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX1-NEXT:  .LBB9_18: # %else16
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -3391,7 +3392,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_20
 ; AVX1-NEXT:  # %bb.19: # %cond.store17
-; AVX1-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX1-NEXT:  .LBB9_20: # %else18
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm4
@@ -3403,7 +3405,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_22
 ; AVX1-NEXT:  # %bb.21: # %cond.store19
-; AVX1-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX1-NEXT:  .LBB9_22: # %else20
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -3413,7 +3416,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_24
 ; AVX1-NEXT:  # %bb.23: # %cond.store21
-; AVX1-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX1-NEXT:  .LBB9_24: # %else22
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -3426,7 +3430,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_26
 ; AVX1-NEXT:  # %bb.25: # %cond.store23
-; AVX1-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB9_26: # %else24
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
@@ -3434,7 +3439,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_28
 ; AVX1-NEXT:  # %bb.27: # %cond.store25
-; AVX1-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX1-NEXT:  .LBB9_28: # %else26
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
@@ -3446,7 +3452,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_30
 ; AVX1-NEXT:  # %bb.29: # %cond.store27
-; AVX1-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX1-NEXT:  .LBB9_30: # %else28
 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3454,6 +3461,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_32
 ; AVX1-NEXT:  # %bb.31: # %cond.store29
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX1-NEXT:  .LBB9_32: # %else30
 ; AVX1-NEXT:    vzeroupper
@@ -3555,10 +3563,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB9_18
 ; AVX2-NEXT:  # %bb.17: # %cond.store15
-; AVX2-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX2-NEXT:  .LBB9_18: # %else16
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -3567,7 +3575,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_20
 ; AVX2-NEXT:  # %bb.19: # %cond.store17
-; AVX2-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX2-NEXT:  .LBB9_20: # %else18
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3579,7 +3588,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_22
 ; AVX2-NEXT:  # %bb.21: # %cond.store19
-; AVX2-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX2-NEXT:  .LBB9_22: # %else20
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -3588,7 +3598,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_24
 ; AVX2-NEXT:  # %bb.23: # %cond.store21
-; AVX2-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX2-NEXT:  .LBB9_24: # %else22
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3601,7 +3612,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_26
 ; AVX2-NEXT:  # %bb.25: # %cond.store23
-; AVX2-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 ; AVX2-NEXT:  .LBB9_26: # %else24
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3609,7 +3621,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_28
 ; AVX2-NEXT:  # %bb.27: # %cond.store25
-; AVX2-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX2-NEXT:  .LBB9_28: # %else26
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3622,7 +3635,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_30
 ; AVX2-NEXT:  # %bb.29: # %cond.store27
-; AVX2-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX2-NEXT:  .LBB9_30: # %else28
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3630,6 +3644,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_32
 ; AVX2-NEXT:  # %bb.31: # %cond.store29
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX2-NEXT:  .LBB9_32: # %else30
 ; AVX2-NEXT:    vzeroupper
@@ -3697,58 +3712,65 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB9_18
 ; AVX512F-NEXT:  # %bb.17: # %cond.store15
-; AVX512F-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
 ; AVX512F-NEXT:  .LBB9_18: # %else16
 ; AVX512F-NEXT:    kshiftrw $9, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_20
 ; AVX512F-NEXT:  # %bb.19: # %cond.store17
-; AVX512F-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX512F-NEXT:  .LBB9_20: # %else18
 ; AVX512F-NEXT:    kshiftrw $10, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_22
 ; AVX512F-NEXT:  # %bb.21: # %cond.store19
-; AVX512F-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
 ; AVX512F-NEXT:  .LBB9_22: # %else20
 ; AVX512F-NEXT:    kshiftrw $11, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_24
 ; AVX512F-NEXT:  # %bb.23: # %cond.store21
-; AVX512F-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX512F-NEXT:  .LBB9_24: # %else22
 ; AVX512F-NEXT:    kshiftrw $12, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_26
 ; AVX512F-NEXT:  # %bb.25: # %cond.store23
-; AVX512F-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
 ; AVX512F-NEXT:  .LBB9_26: # %else24
 ; AVX512F-NEXT:    kshiftrw $13, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_28
 ; AVX512F-NEXT:  # %bb.27: # %cond.store25
-; AVX512F-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX512F-NEXT:  .LBB9_28: # %else26
 ; AVX512F-NEXT:    kshiftrw $14, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_30
 ; AVX512F-NEXT:  # %bb.29: # %cond.store27
-; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
 ; AVX512F-NEXT:  .LBB9_30: # %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_32
 ; AVX512F-NEXT:  # %bb.31: # %cond.store29
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX512F-NEXT:  .LBB9_32: # %else30
 ; AVX512F-NEXT:    vzeroupper
@@ -3792,36 +3814,38 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm8, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB10_2: # %else
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm8
-; SSE2-NEXT:    pextrw $2, %xmm8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB10_4: # %else2
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
-; SSE2-NEXT:    pextrw $4, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $4, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    movb %cl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    movb %al, 2(%rdi)
 ; SSE2-NEXT:  .LBB10_6: # %else4
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm4
-; SSE2-NEXT:    pextrw $6, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB10_8: # %else6
@@ -3830,17 +3854,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 4(%rdi)
 ; SSE2-NEXT:  .LBB10_10: # %else8
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -3848,17 +3873,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm5, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 6(%rdi)
 ; SSE2-NEXT:  .LBB10_14: # %else12
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm5
-; SSE2-NEXT:    pextrw $6, %xmm5, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm5, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -3866,17 +3892,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 8(%rdi)
 ; SSE2-NEXT:  .LBB10_18: # %else16
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -3884,17 +3911,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 10(%rdi)
 ; SSE2-NEXT:  .LBB10_22: # %else20
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm1, %xmm6
-; SSE2-NEXT:    pextrw $6, %xmm6, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm6, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -3902,17 +3930,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 12(%rdi)
 ; SSE2-NEXT:  .LBB10_26: # %else24
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm1, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm1, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -3920,17 +3949,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm7, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    je .LBB10_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    movb %al, 14(%rdi)
 ; SSE2-NEXT:  .LBB10_30: # %else28
-; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm0, %xmm7
-; SSE2-NEXT:    pextrw $6, %xmm7, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm1, %xmm7
+; SSE2-NEXT:    pextrw $6, %xmm7, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB10_32: # %else30
 ; SSE2-NEXT:    retq
@@ -5015,17 +5045,18 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    packssdw %xmm6, %xmm0
 ; SSE2-NEXT:    movd %xmm5, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    je .LBB12_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB12_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm4
-; SSE2-NEXT:    movd %xmm4, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm4, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB12_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB12_4: # %else2
@@ -5923,40 +5954,42 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    pxor %xmm6, %xmm6
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm6
 ; SSE2-NEXT:    packsswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm6, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm6, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB15_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB15_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB15_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB15_8: # %else6
@@ -5966,9 +5999,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB15_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5976,7 +6009,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -5984,9 +6018,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB15_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5994,7 +6028,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -6002,9 +6037,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB15_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6012,7 +6047,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -6020,9 +6056,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB15_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6030,7 +6066,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -6038,9 +6075,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB15_26: # %else24
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6048,7 +6085,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm4
@@ -6056,55 +6094,58 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB15_30: # %else28
-; SSE2-NEXT:    packsswb %xmm3, %xmm2
 ; SSE2-NEXT:    shrl $8, %eax
 ; SSE2-NEXT:    notb %al
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
+; SSE2-NEXT:    packsswb %xmm3, %xmm2
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_34
 ; SSE2-NEXT:  # %bb.33: # %cond.store31
-; SSE2-NEXT:    movb %al, 16(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 16(%rdi)
 ; SSE2-NEXT:  .LBB15_34: # %else32
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_36
 ; SSE2-NEXT:  # %bb.35: # %cond.store33
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 17(%rdi)
 ; SSE2-NEXT:  .LBB15_36: # %else34
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_38
 ; SSE2-NEXT:  # %bb.37: # %cond.store35
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 18(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 18(%rdi)
 ; SSE2-NEXT:  .LBB15_38: # %else36
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_40
 ; SSE2-NEXT:  # %bb.39: # %cond.store37
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 19(%rdi)
 ; SSE2-NEXT:  .LBB15_40: # %else38
@@ -6114,9 +6155,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_42
 ; SSE2-NEXT:  # %bb.41: # %cond.store39
+; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 20(%rdi)
 ; SSE2-NEXT:  .LBB15_42: # %else40
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6124,7 +6165,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -6132,9 +6174,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_46
 ; SSE2-NEXT:  # %bb.45: # %cond.store43
+; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 22(%rdi)
 ; SSE2-NEXT:  .LBB15_46: # %else44
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6142,7 +6184,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -6150,9 +6193,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_50
 ; SSE2-NEXT:  # %bb.49: # %cond.store47
+; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 24(%rdi)
 ; SSE2-NEXT:  .LBB15_50: # %else48
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6160,7 +6203,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -6168,9 +6212,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_54
 ; SSE2-NEXT:  # %bb.53: # %cond.store51
+; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 26(%rdi)
 ; SSE2-NEXT:  .LBB15_54: # %else52
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6178,7 +6222,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -6186,9 +6231,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_58
 ; SSE2-NEXT:  # %bb.57: # %cond.store55
+; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 28(%rdi)
 ; SSE2-NEXT:  .LBB15_58: # %else56
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6196,7 +6241,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm5
@@ -6204,9 +6250,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_62
 ; SSE2-NEXT:  # %bb.61: # %cond.store59
+; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 30(%rdi)
 ; SSE2-NEXT:  .LBB15_62: # %else60
 ; SSE2-NEXT:    shrl $8, %eax
@@ -6214,7 +6260,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  # %bb.63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 31(%rdi)
 ; SSE2-NEXT:  .LBB15_64: # %else62
 ; SSE2-NEXT:    retq
 ;
@@ -6638,16 +6685,17 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB15_34
 ; AVX1-NEXT:  # %bb.33: # %cond.store31
-; AVX1-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX1-NEXT:  .LBB15_34: # %else32
 ; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_36
 ; AVX1-NEXT:  # %bb.35: # %cond.store33
-; AVX1-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX1-NEXT:  .LBB15_36: # %else34
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6657,13 +6705,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_38
 ; AVX1-NEXT:  # %bb.37: # %cond.store35
-; AVX1-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX1-NEXT:  .LBB15_38: # %else36
 ; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_40
 ; AVX1-NEXT:  # %bb.39: # %cond.store37
-; AVX1-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX1-NEXT:  .LBB15_40: # %else38
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6673,13 +6723,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_42
 ; AVX1-NEXT:  # %bb.41: # %cond.store39
-; AVX1-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX1-NEXT:  .LBB15_42: # %else40
 ; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_44
 ; AVX1-NEXT:  # %bb.43: # %cond.store41
-; AVX1-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX1-NEXT:  .LBB15_44: # %else42
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6689,13 +6741,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_46
 ; AVX1-NEXT:  # %bb.45: # %cond.store43
-; AVX1-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX1-NEXT:  .LBB15_46: # %else44
 ; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_48
 ; AVX1-NEXT:  # %bb.47: # %cond.store45
-; AVX1-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX1-NEXT:  .LBB15_48: # %else46
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6705,13 +6759,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_50
 ; AVX1-NEXT:  # %bb.49: # %cond.store47
-; AVX1-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB15_50: # %else48
 ; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_52
 ; AVX1-NEXT:  # %bb.51: # %cond.store49
-; AVX1-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX1-NEXT:  .LBB15_52: # %else50
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6721,13 +6777,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_54
 ; AVX1-NEXT:  # %bb.53: # %cond.store51
-; AVX1-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX1-NEXT:  .LBB15_54: # %else52
 ; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_56
 ; AVX1-NEXT:  # %bb.55: # %cond.store53
-; AVX1-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX1-NEXT:  .LBB15_56: # %else54
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6737,13 +6795,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_58
 ; AVX1-NEXT:  # %bb.57: # %cond.store55
-; AVX1-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX1-NEXT:  .LBB15_58: # %else56
 ; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_60
 ; AVX1-NEXT:  # %bb.59: # %cond.store57
-; AVX1-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX1-NEXT:  .LBB15_60: # %else58
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
@@ -6753,12 +6813,14 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_62
 ; AVX1-NEXT:  # %bb.61: # %cond.store59
-; AVX1-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX1-NEXT:  .LBB15_62: # %else60
 ; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_64
 ; AVX1-NEXT:  # %bb.63: # %cond.store61
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX1-NEXT:  .LBB15_64: # %else62
 ; AVX1-NEXT:    vzeroupper
@@ -6902,17 +6964,18 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    vpextrb $0, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB15_34
 ; AVX2-NEXT:  # %bb.33: # %cond.store31
-; AVX2-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX2-NEXT:  .LBB15_34: # %else32
 ; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_36
 ; AVX2-NEXT:  # %bb.35: # %cond.store33
-; AVX2-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX2-NEXT:  .LBB15_36: # %else34
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6922,14 +6985,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_38
 ; AVX2-NEXT:  # %bb.37: # %cond.store35
-; AVX2-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX2-NEXT:  .LBB15_38: # %else36
 ; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_40
 ; AVX2-NEXT:  # %bb.39: # %cond.store37
-; AVX2-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX2-NEXT:  .LBB15_40: # %else38
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6939,14 +7004,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_42
 ; AVX2-NEXT:  # %bb.41: # %cond.store39
-; AVX2-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX2-NEXT:  .LBB15_42: # %else40
 ; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_44
 ; AVX2-NEXT:  # %bb.43: # %cond.store41
-; AVX2-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX2-NEXT:  .LBB15_44: # %else42
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6956,14 +7023,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_46
 ; AVX2-NEXT:  # %bb.45: # %cond.store43
-; AVX2-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX2-NEXT:  .LBB15_46: # %else44
 ; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_48
 ; AVX2-NEXT:  # %bb.47: # %cond.store45
-; AVX2-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX2-NEXT:  .LBB15_48: # %else46
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6973,14 +7042,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_50
 ; AVX2-NEXT:  # %bb.49: # %cond.store47
-; AVX2-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX2-NEXT:  .LBB15_50: # %else48
 ; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_52
 ; AVX2-NEXT:  # %bb.51: # %cond.store49
-; AVX2-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX2-NEXT:  .LBB15_52: # %else50
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6990,14 +7061,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_54
 ; AVX2-NEXT:  # %bb.53: # %cond.store51
-; AVX2-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX2-NEXT:  .LBB15_54: # %else52
 ; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_56
 ; AVX2-NEXT:  # %bb.55: # %cond.store53
-; AVX2-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX2-NEXT:  .LBB15_56: # %else54
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7007,14 +7080,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_58
 ; AVX2-NEXT:  # %bb.57: # %cond.store55
-; AVX2-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX2-NEXT:  .LBB15_58: # %else56
 ; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_60
 ; AVX2-NEXT:  # %bb.59: # %cond.store57
-; AVX2-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX2-NEXT:  .LBB15_60: # %else58
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7024,13 +7099,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_62
 ; AVX2-NEXT:  # %bb.61: # %cond.store59
-; AVX2-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX2-NEXT:  .LBB15_62: # %else60
 ; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_64
 ; AVX2-NEXT:  # %bb.63: # %cond.store61
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX2-NEXT:  .LBB15_64: # %else62
 ; AVX2-NEXT:    vzeroupper
@@ -7241,10 +7318,10 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB15_34
 ; AVX512F-NEXT:  # %bb.33: # %cond.store31
-; AVX512F-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX512F-NEXT:  .LBB15_34: # %else32
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7254,7 +7331,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_36
 ; AVX512F-NEXT:  # %bb.35: # %cond.store33
-; AVX512F-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX512F-NEXT:  .LBB15_36: # %else34
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7268,7 +7346,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_38
 ; AVX512F-NEXT:  # %bb.37: # %cond.store35
-; AVX512F-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX512F-NEXT:  .LBB15_38: # %else36
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7278,7 +7357,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_40
 ; AVX512F-NEXT:  # %bb.39: # %cond.store37
-; AVX512F-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX512F-NEXT:  .LBB15_40: # %else38
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7292,7 +7372,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_42
 ; AVX512F-NEXT:  # %bb.41: # %cond.store39
-; AVX512F-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX512F-NEXT:  .LBB15_42: # %else40
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7302,7 +7383,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_44
 ; AVX512F-NEXT:  # %bb.43: # %cond.store41
-; AVX512F-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX512F-NEXT:  .LBB15_44: # %else42
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7316,7 +7398,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_46
 ; AVX512F-NEXT:  # %bb.45: # %cond.store43
-; AVX512F-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX512F-NEXT:  .LBB15_46: # %else44
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7326,7 +7409,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_48
 ; AVX512F-NEXT:  # %bb.47: # %cond.store45
-; AVX512F-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX512F-NEXT:  .LBB15_48: # %else46
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7340,7 +7424,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_50
 ; AVX512F-NEXT:  # %bb.49: # %cond.store47
-; AVX512F-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX512F-NEXT:  .LBB15_50: # %else48
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7350,7 +7435,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_52
 ; AVX512F-NEXT:  # %bb.51: # %cond.store49
-; AVX512F-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX512F-NEXT:  .LBB15_52: # %else50
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7364,7 +7450,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_54
 ; AVX512F-NEXT:  # %bb.53: # %cond.store51
-; AVX512F-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX512F-NEXT:  .LBB15_54: # %else52
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7374,7 +7461,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_56
 ; AVX512F-NEXT:  # %bb.55: # %cond.store53
-; AVX512F-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX512F-NEXT:  .LBB15_56: # %else54
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7388,7 +7476,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_58
 ; AVX512F-NEXT:  # %bb.57: # %cond.store55
-; AVX512F-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX512F-NEXT:  .LBB15_58: # %else56
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7398,7 +7487,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_60
 ; AVX512F-NEXT:  # %bb.59: # %cond.store57
-; AVX512F-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX512F-NEXT:  .LBB15_60: # %else58
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7412,7 +7502,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_62
 ; AVX512F-NEXT:  # %bb.61: # %cond.store59
-; AVX512F-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX512F-NEXT:  .LBB15_62: # %else60
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7422,6 +7513,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_64
 ; AVX512F-NEXT:  # %bb.63: # %cond.store61
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX512F-NEXT:  .LBB15_64: # %else62
 ; AVX512F-NEXT:    vzeroupper
@@ -7461,40 +7553,42 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    pxor %xmm3, %xmm3
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm3
 ; SSE2-NEXT:    packsswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB16_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB16_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB16_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB16_8: # %else6
@@ -7504,9 +7598,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB16_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7514,7 +7608,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7522,9 +7617,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB16_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7532,7 +7627,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7540,9 +7636,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB16_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7550,7 +7646,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7558,9 +7655,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB16_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7568,7 +7665,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7576,9 +7674,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB16_26: # %else24
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7586,7 +7684,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
@@ -7594,9 +7693,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB16_30: # %else28
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7604,7 +7703,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB16_32: # %else30
 ; SSE2-NEXT:    retq
 ;
@@ -8242,17 +8342,18 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
 ; SSE2-NEXT:    pxor %xmm2, %xmm3
 ; SSE2-NEXT:    pminsw {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    pmaxsw {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB17_2: # %else
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB17_4: # %else2
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
index 062478dc9f5b3..f28929589b6cb 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -919,17 +919,18 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    packuswb %xmm0, %xmm6
 ; SSE2-NEXT:    movd %xmm9, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm6, %eax
 ; SSE2-NEXT:    je .LBB2_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm6, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB2_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm11
-; SSE2-NEXT:    movd %xmm11, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm11, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB2_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm6, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB2_4: # %else2
@@ -3011,10 +3012,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB9_18
 ; AVX1-NEXT:  # %bb.17: # %cond.store15
-; AVX1-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX1-NEXT:  .LBB9_18: # %else16
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -3024,7 +3025,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_20
 ; AVX1-NEXT:  # %bb.19: # %cond.store17
-; AVX1-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX1-NEXT:  .LBB9_20: # %else18
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm4
@@ -3036,7 +3038,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_22
 ; AVX1-NEXT:  # %bb.21: # %cond.store19
-; AVX1-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX1-NEXT:  .LBB9_22: # %else20
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
@@ -3046,7 +3049,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_24
 ; AVX1-NEXT:  # %bb.23: # %cond.store21
-; AVX1-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX1-NEXT:  .LBB9_24: # %else22
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -3059,7 +3063,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_26
 ; AVX1-NEXT:  # %bb.25: # %cond.store23
-; AVX1-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB9_26: # %else24
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm2
@@ -3067,7 +3072,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_28
 ; AVX1-NEXT:  # %bb.27: # %cond.store25
-; AVX1-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
 ; AVX1-NEXT:  .LBB9_28: # %else26
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
@@ -3079,7 +3085,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_30
 ; AVX1-NEXT:  # %bb.29: # %cond.store27
-; AVX1-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX1-NEXT:  .LBB9_30: # %else28
 ; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3087,6 +3094,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB9_32
 ; AVX1-NEXT:  # %bb.31: # %cond.store29
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX1-NEXT:  .LBB9_32: # %else30
 ; AVX1-NEXT:    vzeroupper
@@ -3191,10 +3199,10 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    vpacksswb %xmm4, %xmm0, %xmm4
 ; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB9_18
 ; AVX2-NEXT:  # %bb.17: # %cond.store15
-; AVX2-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $0, %xmm4, 16(%rdi)
 ; AVX2-NEXT:  .LBB9_18: # %else16
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -3203,7 +3211,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_20
 ; AVX2-NEXT:  # %bb.19: # %cond.store17
-; AVX2-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX2-NEXT:  .LBB9_20: # %else18
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3215,7 +3224,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_22
 ; AVX2-NEXT:  # %bb.21: # %cond.store19
-; AVX2-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; AVX2-NEXT:    vpextrw $2, %xmm4, 20(%rdi)
 ; AVX2-NEXT:  .LBB9_22: # %else20
 ; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
 ; AVX2-NEXT:    vpackssdw %xmm0, %xmm1, %xmm1
@@ -3224,7 +3234,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_24
 ; AVX2-NEXT:  # %bb.23: # %cond.store21
-; AVX2-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX2-NEXT:  .LBB9_24: # %else22
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3237,7 +3248,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_26
 ; AVX2-NEXT:  # %bb.25: # %cond.store23
-; AVX2-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
 ; AVX2-NEXT:  .LBB9_26: # %else24
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3245,7 +3257,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_28
 ; AVX2-NEXT:  # %bb.27: # %cond.store25
-; AVX2-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX2-NEXT:  .LBB9_28: # %else26
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm3, %ymm1
@@ -3258,7 +3271,8 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_30
 ; AVX2-NEXT:  # %bb.29: # %cond.store27
-; AVX2-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
 ; AVX2-NEXT:  .LBB9_30: # %else28
 ; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm1
 ; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
@@ -3266,6 +3280,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB9_32
 ; AVX2-NEXT:  # %bb.31: # %cond.store29
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX2-NEXT:  .LBB9_32: # %else30
 ; AVX2-NEXT:    vzeroupper
@@ -3333,58 +3348,65 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %p, <16 x i32>
 ; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB9_18
 ; AVX512F-NEXT:  # %bb.17: # %cond.store15
-; AVX512F-NEXT:    vpextrw $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $0, %xmm1, 16(%rdi)
 ; AVX512F-NEXT:  .LBB9_18: # %else16
 ; AVX512F-NEXT:    kshiftrw $9, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_20
 ; AVX512F-NEXT:  # %bb.19: # %cond.store17
-; AVX512F-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $1, %xmm1, 18(%rdi)
 ; AVX512F-NEXT:  .LBB9_20: # %else18
 ; AVX512F-NEXT:    kshiftrw $10, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_22
 ; AVX512F-NEXT:  # %bb.21: # %cond.store19
-; AVX512F-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $2, %xmm1, 20(%rdi)
 ; AVX512F-NEXT:  .LBB9_22: # %else20
 ; AVX512F-NEXT:    kshiftrw $11, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_24
 ; AVX512F-NEXT:  # %bb.23: # %cond.store21
-; AVX512F-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $3, %xmm1, 22(%rdi)
 ; AVX512F-NEXT:  .LBB9_24: # %else22
 ; AVX512F-NEXT:    kshiftrw $12, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_26
 ; AVX512F-NEXT:  # %bb.25: # %cond.store23
-; AVX512F-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $4, %xmm1, 24(%rdi)
 ; AVX512F-NEXT:  .LBB9_26: # %else24
 ; AVX512F-NEXT:    kshiftrw $13, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_28
 ; AVX512F-NEXT:  # %bb.27: # %cond.store25
-; AVX512F-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $5, %xmm1, 26(%rdi)
 ; AVX512F-NEXT:  .LBB9_28: # %else26
 ; AVX512F-NEXT:    kshiftrw $14, %k0, %k1
 ; AVX512F-NEXT:    kmovw %k1, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_30
 ; AVX512F-NEXT:  # %bb.29: # %cond.store27
-; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrw $6, %xmm1, 28(%rdi)
 ; AVX512F-NEXT:  .LBB9_30: # %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB9_32
 ; AVX512F-NEXT:  # %bb.31: # %cond.store29
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
 ; AVX512F-NEXT:  .LBB9_32: # %else30
 ; AVX512F-NEXT:    vzeroupper
@@ -3453,36 +3475,38 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm8, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm12, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB10_2: # %else
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm0, %xmm8
-; SSE2-NEXT:    pextrw $2, %xmm8, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm8, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB10_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm4
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    movb %cl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm12, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    movb %al, 2(%rdi)
 ; SSE2-NEXT:  .LBB10_6: # %else4
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm0, %xmm4
-; SSE2-NEXT:    pextrw $6, %xmm4, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm4, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm12, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB10_8: # %else6
@@ -3491,17 +3515,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $2, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 4(%rdi)
 ; SSE2-NEXT:  .LBB10_10: # %else8
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
+; SSE2-NEXT:    pextrw $2, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB10_12: # %else10
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
@@ -3509,17 +3534,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm5, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $3, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 6(%rdi)
 ; SSE2-NEXT:  .LBB10_14: # %else12
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm0, %xmm5
-; SSE2-NEXT:    pextrw $6, %xmm5, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm5, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
+; SSE2-NEXT:    pextrw $3, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB10_16: # %else14
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
@@ -3527,17 +3553,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $4, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 8(%rdi)
 ; SSE2-NEXT:  .LBB10_18: # %else16
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
+; SSE2-NEXT:    pextrw $4, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB10_20: # %else18
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
@@ -3545,17 +3572,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $5, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 10(%rdi)
 ; SSE2-NEXT:  .LBB10_22: # %else20
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm0, %xmm6
-; SSE2-NEXT:    pextrw $6, %xmm6, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm6, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
+; SSE2-NEXT:    pextrw $5, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB10_24: # %else22
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
@@ -3563,17 +3591,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    notl %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $6, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 12(%rdi)
 ; SSE2-NEXT:  .LBB10_26: # %else24
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
+; SSE2-NEXT:    pextrw $6, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB10_28: # %else26
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
@@ -3581,17 +3610,18 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %p, <16 x i32> %m
 ; SSE2-NEXT:    pxor %xmm7, %xmm2
 ; SSE2-NEXT:    pextrw $4, %xmm2, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    pextrw $7, %xmm12, %eax
 ; SSE2-NEXT:    je .LBB10_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm12, %eax
 ; SSE2-NEXT:    movb %al, 14(%rdi)
 ; SSE2-NEXT:  .LBB10_30: # %else28
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE2-NEXT:    pxor %xmm0, %xmm7
-; SSE2-NEXT:    pextrw $6, %xmm7, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    pextrw $6, %xmm7, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB10_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
+; SSE2-NEXT:    pextrw $7, %xmm12, %eax
 ; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB10_32: # %else30
 ; SSE2-NEXT:    retq
@@ -4697,17 +4727,18 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask)
 ; SSE2-NEXT:    packuswb %xmm4, %xmm10
 ; SSE2-NEXT:    movd %xmm9, %eax
 ; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm10, %eax
 ; SSE2-NEXT:    je .LBB12_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
+; SSE2-NEXT:    movd %xmm10, %eax
 ; SSE2-NEXT:    movb %al, (%rdi)
 ; SSE2-NEXT:  .LBB12_2: # %else
 ; SSE2-NEXT:    psrlq $16, %xmm5
-; SSE2-NEXT:    movd %xmm5, %ecx
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    movd %xmm5, %eax
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB12_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm10, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB12_4: # %else2
@@ -5566,40 +5597,42 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    pminsw %xmm8, %xmm0
 ; SSE2-NEXT:    pxor %xmm6, %xmm0
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm7, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm7, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB15_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB15_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB15_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB15_8: # %else6
@@ -5609,9 +5642,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB15_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5619,7 +5652,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB15_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -5627,9 +5661,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB15_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5637,7 +5671,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB15_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -5645,9 +5680,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB15_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5655,7 +5690,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB15_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
@@ -5663,9 +5699,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB15_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5673,87 +5709,92 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB15_24: # %else22
-; SSE2-NEXT:    pxor %xmm6, %xmm3
-; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
 ; SSE2-NEXT:    pextrw $6, %xmm1, %eax
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB15_26: # %else24
-; SSE2-NEXT:    pminsw %xmm8, %xmm3
-; SSE2-NEXT:    pminsw %xmm8, %xmm2
+; SSE2-NEXT:    pxor %xmm6, %xmm3
+; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    shrl $8, %eax
 ; SSE2-NEXT:    notb %al
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB15_28: # %else26
-; SSE2-NEXT:    pxor %xmm6, %xmm3
-; SSE2-NEXT:    pxor %xmm6, %xmm2
+; SSE2-NEXT:    pminsw %xmm8, %xmm3
+; SSE2-NEXT:    pminsw %xmm8, %xmm2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm4
 ; SSE2-NEXT:    pextrw $7, %xmm4, %eax
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB15_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB15_30: # %else28
-; SSE2-NEXT:    packuswb %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm6, %xmm3
+; SSE2-NEXT:    pxor %xmm6, %xmm2
 ; SSE2-NEXT:    shrl $8, %eax
 ; SSE2-NEXT:    notb %al
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB15_32: # %else30
+; SSE2-NEXT:    packuswb %xmm3, %xmm2
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm2, %eax
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_34
 ; SSE2-NEXT:  # %bb.33: # %cond.store31
-; SSE2-NEXT:    movb %al, 16(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    movb %cl, 16(%rdi)
 ; SSE2-NEXT:  .LBB15_34: # %else32
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_36
 ; SSE2-NEXT:  # %bb.35: # %cond.store33
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    movb %ah, 17(%rdi)
 ; SSE2-NEXT:  .LBB15_36: # %else34
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB15_38
 ; SSE2-NEXT:  # %bb.37: # %cond.store35
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 18(%rdi)
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 18(%rdi)
 ; SSE2-NEXT:  .LBB15_38: # %else36
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_40
 ; SSE2-NEXT:  # %bb.39: # %cond.store37
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 19(%rdi)
 ; SSE2-NEXT:  .LBB15_40: # %else38
@@ -5763,9 +5804,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_42
 ; SSE2-NEXT:  # %bb.41: # %cond.store39
+; SSE2-NEXT:    pextrw $2, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 20(%rdi)
 ; SSE2-NEXT:  .LBB15_42: # %else40
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5773,7 +5814,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_44
 ; SSE2-NEXT:  # %bb.43: # %cond.store41
-; SSE2-NEXT:    movb %ch, 21(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 21(%rdi)
 ; SSE2-NEXT:  .LBB15_44: # %else42
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -5781,9 +5823,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_46
 ; SSE2-NEXT:  # %bb.45: # %cond.store43
+; SSE2-NEXT:    pextrw $3, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 22(%rdi)
 ; SSE2-NEXT:  .LBB15_46: # %else44
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5791,7 +5833,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_48
 ; SSE2-NEXT:  # %bb.47: # %cond.store45
-; SSE2-NEXT:    movb %ch, 23(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 23(%rdi)
 ; SSE2-NEXT:  .LBB15_48: # %else46
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -5799,9 +5842,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_50
 ; SSE2-NEXT:  # %bb.49: # %cond.store47
+; SSE2-NEXT:    pextrw $4, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 24(%rdi)
 ; SSE2-NEXT:  .LBB15_50: # %else48
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5809,7 +5852,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_52
 ; SSE2-NEXT:  # %bb.51: # %cond.store49
-; SSE2-NEXT:    movb %ch, 25(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 25(%rdi)
 ; SSE2-NEXT:  .LBB15_52: # %else50
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -5817,9 +5861,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_54
 ; SSE2-NEXT:  # %bb.53: # %cond.store51
+; SSE2-NEXT:    pextrw $5, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 26(%rdi)
 ; SSE2-NEXT:  .LBB15_54: # %else52
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5827,7 +5871,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_56
 ; SSE2-NEXT:  # %bb.55: # %cond.store53
-; SSE2-NEXT:    movb %ch, 27(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 27(%rdi)
 ; SSE2-NEXT:  .LBB15_56: # %else54
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm5, %xmm0
@@ -5835,9 +5880,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_58
 ; SSE2-NEXT:  # %bb.57: # %cond.store55
+; SSE2-NEXT:    pextrw $6, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 28(%rdi)
 ; SSE2-NEXT:  .LBB15_58: # %else56
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5845,7 +5890,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_60
 ; SSE2-NEXT:  # %bb.59: # %cond.store57
-; SSE2-NEXT:    movb %ch, 29(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 29(%rdi)
 ; SSE2-NEXT:  .LBB15_60: # %else58
 ; SSE2-NEXT:    pxor %xmm0, %xmm0
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm5
@@ -5853,9 +5899,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    je .LBB15_62
 ; SSE2-NEXT:  # %bb.61: # %cond.store59
+; SSE2-NEXT:    pextrw $7, %xmm2, %ecx
 ; SSE2-NEXT:    movb %cl, 30(%rdi)
 ; SSE2-NEXT:  .LBB15_62: # %else60
 ; SSE2-NEXT:    shrl $8, %eax
@@ -5863,7 +5909,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB15_64
 ; SSE2-NEXT:  # %bb.63: # %cond.store61
-; SSE2-NEXT:    movb %ch, 31(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm2, %eax
+; SSE2-NEXT:    movb %ah, 31(%rdi)
 ; SSE2-NEXT:  .LBB15_64: # %else62
 ; SSE2-NEXT:    retq
 ;
@@ -6297,16 +6344,17 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    je .LBB15_34
 ; AVX1-NEXT:  # %bb.33: # %cond.store31
-; AVX1-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX1-NEXT:  .LBB15_34: # %else32
 ; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_36
 ; AVX1-NEXT:  # %bb.35: # %cond.store33
-; AVX1-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
 ; AVX1-NEXT:  .LBB15_36: # %else34
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6316,13 +6364,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_38
 ; AVX1-NEXT:  # %bb.37: # %cond.store35
-; AVX1-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX1-NEXT:  .LBB15_38: # %else36
 ; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_40
 ; AVX1-NEXT:  # %bb.39: # %cond.store37
-; AVX1-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
 ; AVX1-NEXT:  .LBB15_40: # %else38
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6332,13 +6382,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_42
 ; AVX1-NEXT:  # %bb.41: # %cond.store39
-; AVX1-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX1-NEXT:  .LBB15_42: # %else40
 ; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_44
 ; AVX1-NEXT:  # %bb.43: # %cond.store41
-; AVX1-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
 ; AVX1-NEXT:  .LBB15_44: # %else42
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6348,13 +6400,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_46
 ; AVX1-NEXT:  # %bb.45: # %cond.store43
-; AVX1-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX1-NEXT:  .LBB15_46: # %else44
 ; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_48
 ; AVX1-NEXT:  # %bb.47: # %cond.store45
-; AVX1-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
 ; AVX1-NEXT:  .LBB15_48: # %else46
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6364,13 +6418,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_50
 ; AVX1-NEXT:  # %bb.49: # %cond.store47
-; AVX1-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX1-NEXT:  .LBB15_50: # %else48
 ; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_52
 ; AVX1-NEXT:  # %bb.51: # %cond.store49
-; AVX1-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
 ; AVX1-NEXT:  .LBB15_52: # %else50
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6380,13 +6436,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_54
 ; AVX1-NEXT:  # %bb.53: # %cond.store51
-; AVX1-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX1-NEXT:  .LBB15_54: # %else52
 ; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_56
 ; AVX1-NEXT:  # %bb.55: # %cond.store53
-; AVX1-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
 ; AVX1-NEXT:  .LBB15_56: # %else54
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm2
@@ -6396,13 +6454,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_58
 ; AVX1-NEXT:  # %bb.57: # %cond.store55
-; AVX1-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX1-NEXT:  .LBB15_58: # %else56
 ; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_60
 ; AVX1-NEXT:  # %bb.59: # %cond.store57
-; AVX1-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
 ; AVX1-NEXT:  .LBB15_60: # %else58
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
@@ -6412,12 +6472,14 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_62
 ; AVX1-NEXT:  # %bb.61: # %cond.store59
-; AVX1-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX1-NEXT:  .LBB15_62: # %else60
 ; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX1-NEXT:    testb $1, %al
 ; AVX1-NEXT:    je .LBB15_64
 ; AVX1-NEXT:  # %bb.63: # %cond.store61
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX1-NEXT:  .LBB15_64: # %else62
 ; AVX1-NEXT:    vzeroupper
@@ -6564,17 +6626,18 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    vpextrb $0, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    je .LBB15_34
 ; AVX2-NEXT:  # %bb.33: # %cond.store31
-; AVX2-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX2-NEXT:  .LBB15_34: # %else32
 ; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_36
 ; AVX2-NEXT:  # %bb.35: # %cond.store33
-; AVX2-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX2-NEXT:  .LBB15_36: # %else34
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6584,14 +6647,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_38
 ; AVX2-NEXT:  # %bb.37: # %cond.store35
-; AVX2-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX2-NEXT:  .LBB15_38: # %else36
 ; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_40
 ; AVX2-NEXT:  # %bb.39: # %cond.store37
-; AVX2-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX2-NEXT:  .LBB15_40: # %else38
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6601,14 +6666,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_42
 ; AVX2-NEXT:  # %bb.41: # %cond.store39
-; AVX2-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX2-NEXT:  .LBB15_42: # %else40
 ; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_44
 ; AVX2-NEXT:  # %bb.43: # %cond.store41
-; AVX2-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX2-NEXT:  .LBB15_44: # %else42
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6618,14 +6685,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_46
 ; AVX2-NEXT:  # %bb.45: # %cond.store43
-; AVX2-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX2-NEXT:  .LBB15_46: # %else44
 ; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_48
 ; AVX2-NEXT:  # %bb.47: # %cond.store45
-; AVX2-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX2-NEXT:  .LBB15_48: # %else46
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6635,14 +6704,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_50
 ; AVX2-NEXT:  # %bb.49: # %cond.store47
-; AVX2-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX2-NEXT:  .LBB15_50: # %else48
 ; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_52
 ; AVX2-NEXT:  # %bb.51: # %cond.store49
-; AVX2-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX2-NEXT:  .LBB15_52: # %else50
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6652,14 +6723,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_54
 ; AVX2-NEXT:  # %bb.53: # %cond.store51
-; AVX2-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX2-NEXT:  .LBB15_54: # %else52
 ; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_56
 ; AVX2-NEXT:  # %bb.55: # %cond.store53
-; AVX2-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX2-NEXT:  .LBB15_56: # %else54
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6669,14 +6742,16 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_58
 ; AVX2-NEXT:  # %bb.57: # %cond.store55
-; AVX2-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX2-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX2-NEXT:  .LBB15_58: # %else56
 ; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_60
 ; AVX2-NEXT:  # %bb.59: # %cond.store57
-; AVX2-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX2-NEXT:  .LBB15_60: # %else58
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6686,13 +6761,15 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_62
 ; AVX2-NEXT:  # %bb.61: # %cond.store59
-; AVX2-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX2-NEXT:  .LBB15_62: # %else60
 ; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
 ; AVX2-NEXT:    notb %al
 ; AVX2-NEXT:    testb $1, %al
 ; AVX2-NEXT:    je .LBB15_64
 ; AVX2-NEXT:  # %bb.63: # %cond.store61
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX2-NEXT:  .LBB15_64: # %else62
 ; AVX2-NEXT:    vzeroupper
@@ -6900,10 +6977,10 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb $1, %al
-; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    je .LBB15_34
 ; AVX512F-NEXT:  # %bb.33: # %cond.store31
-; AVX512F-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $0, %xmm3, 16(%rdi)
 ; AVX512F-NEXT:  .LBB15_34: # %else32
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6913,7 +6990,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_36
 ; AVX512F-NEXT:  # %bb.35: # %cond.store33
-; AVX512F-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $1, %xmm1, 17(%rdi)
 ; AVX512F-NEXT:  .LBB15_36: # %else34
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6927,7 +7005,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_38
 ; AVX512F-NEXT:  # %bb.37: # %cond.store35
-; AVX512F-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $2, %xmm3, 18(%rdi)
 ; AVX512F-NEXT:  .LBB15_38: # %else36
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6937,7 +7016,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_40
 ; AVX512F-NEXT:  # %bb.39: # %cond.store37
-; AVX512F-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $3, %xmm1, 19(%rdi)
 ; AVX512F-NEXT:  .LBB15_40: # %else38
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6951,7 +7031,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_42
 ; AVX512F-NEXT:  # %bb.41: # %cond.store39
-; AVX512F-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $4, %xmm3, 20(%rdi)
 ; AVX512F-NEXT:  .LBB15_42: # %else40
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6961,7 +7042,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_44
 ; AVX512F-NEXT:  # %bb.43: # %cond.store41
-; AVX512F-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $5, %xmm1, 21(%rdi)
 ; AVX512F-NEXT:  .LBB15_44: # %else42
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6975,7 +7057,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_46
 ; AVX512F-NEXT:  # %bb.45: # %cond.store43
-; AVX512F-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $6, %xmm3, 22(%rdi)
 ; AVX512F-NEXT:  .LBB15_46: # %else44
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -6985,7 +7068,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_48
 ; AVX512F-NEXT:  # %bb.47: # %cond.store45
-; AVX512F-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $7, %xmm1, 23(%rdi)
 ; AVX512F-NEXT:  .LBB15_48: # %else46
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -6999,7 +7083,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_50
 ; AVX512F-NEXT:  # %bb.49: # %cond.store47
-; AVX512F-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $8, %xmm3, 24(%rdi)
 ; AVX512F-NEXT:  .LBB15_50: # %else48
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7009,7 +7094,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_52
 ; AVX512F-NEXT:  # %bb.51: # %cond.store49
-; AVX512F-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $9, %xmm1, 25(%rdi)
 ; AVX512F-NEXT:  .LBB15_52: # %else50
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7023,7 +7109,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_54
 ; AVX512F-NEXT:  # %bb.53: # %cond.store51
-; AVX512F-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $10, %xmm3, 26(%rdi)
 ; AVX512F-NEXT:  .LBB15_54: # %else52
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7033,7 +7120,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_56
 ; AVX512F-NEXT:  # %bb.55: # %cond.store53
-; AVX512F-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $11, %xmm1, 27(%rdi)
 ; AVX512F-NEXT:  .LBB15_56: # %else54
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7047,7 +7135,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_58
 ; AVX512F-NEXT:  # %bb.57: # %cond.store55
-; AVX512F-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpextrb $12, %xmm3, 28(%rdi)
 ; AVX512F-NEXT:  .LBB15_58: # %else56
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7057,7 +7146,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_60
 ; AVX512F-NEXT:  # %bb.59: # %cond.store57
-; AVX512F-NEXT:    vpextrb $13, %xmm0, 29(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
 ; AVX512F-NEXT:  .LBB15_60: # %else58
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm2, %ymm1
@@ -7071,7 +7161,8 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_62
 ; AVX512F-NEXT:  # %bb.61: # %cond.store59
-; AVX512F-NEXT:    vpextrb $14, %xmm0, 30(%rdi)
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512F-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
 ; AVX512F-NEXT:  .LBB15_62: # %else60
 ; AVX512F-NEXT:    vpternlogq $15, %zmm1, %zmm1, %zmm1
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
@@ -7081,6 +7172,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
 ; AVX512F-NEXT:    testb $1, %al
 ; AVX512F-NEXT:    je .LBB15_64
 ; AVX512F-NEXT:  # %bb.63: # %cond.store61
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
 ; AVX512F-NEXT:  .LBB15_64: # %else62
 ; AVX512F-NEXT:    vzeroupper
@@ -7124,40 +7216,42 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    pminsw %xmm5, %xmm0
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    movl %ecx, %eax
-; SSE2-NEXT:    notb %al
-; SSE2-NEXT:    testb $1, %al
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB16_2: # %else
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $8, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    movb %ah, 1(%rdi)
 ; SSE2-NEXT:  .LBB16_4: # %else2
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
-; SSE2-NEXT:    movd %xmm1, %ecx
-; SSE2-NEXT:    movl %ecx, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    notb %dl
-; SSE2-NEXT:    testb $1, %dl
+; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    notb %cl
+; SSE2-NEXT:    testb $1, %cl
 ; SSE2-NEXT:    je .LBB16_6
 ; SSE2-NEXT:  # %bb.5: # %cond.store3
-; SSE2-NEXT:    movl %eax, %edx
-; SSE2-NEXT:    shrl $16, %edx
-; SSE2-NEXT:    movb %dl, 2(%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    shrl $16, %ecx
+; SSE2-NEXT:    movb %cl, 2(%rdi)
 ; SSE2-NEXT:  .LBB16_6: # %else4
-; SSE2-NEXT:    shrl $24, %ecx
-; SSE2-NEXT:    notb %cl
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $24, %eax
+; SSE2-NEXT:    notb %al
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_8
 ; SSE2-NEXT:  # %bb.7: # %cond.store5
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $24, %eax
 ; SSE2-NEXT:    movb %al, 3(%rdi)
 ; SSE2-NEXT:  .LBB16_8: # %else6
@@ -7167,9 +7261,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_10
 ; SSE2-NEXT:  # %bb.9: # %cond.store7
+; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 4(%rdi)
 ; SSE2-NEXT:  .LBB16_10: # %else8
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7177,7 +7271,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_12
 ; SSE2-NEXT:  # %bb.11: # %cond.store9
-; SSE2-NEXT:    movb %ch, 5(%rdi)
+; SSE2-NEXT:    pextrw $2, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 5(%rdi)
 ; SSE2-NEXT:  .LBB16_12: # %else10
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7185,9 +7280,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_14
 ; SSE2-NEXT:  # %bb.13: # %cond.store11
+; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 6(%rdi)
 ; SSE2-NEXT:  .LBB16_14: # %else12
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7195,7 +7290,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_16
 ; SSE2-NEXT:  # %bb.15: # %cond.store13
-; SSE2-NEXT:    movb %ch, 7(%rdi)
+; SSE2-NEXT:    pextrw $3, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 7(%rdi)
 ; SSE2-NEXT:  .LBB16_16: # %else14
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7203,9 +7299,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_18
 ; SSE2-NEXT:  # %bb.17: # %cond.store15
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 8(%rdi)
 ; SSE2-NEXT:  .LBB16_18: # %else16
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7213,7 +7309,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_20
 ; SSE2-NEXT:  # %bb.19: # %cond.store17
-; SSE2-NEXT:    movb %ch, 9(%rdi)
+; SSE2-NEXT:    pextrw $4, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 9(%rdi)
 ; SSE2-NEXT:  .LBB16_20: # %else18
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7221,9 +7318,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_22
 ; SSE2-NEXT:  # %bb.21: # %cond.store19
+; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 10(%rdi)
 ; SSE2-NEXT:  .LBB16_22: # %else20
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7231,7 +7328,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_24
 ; SSE2-NEXT:  # %bb.23: # %cond.store21
-; SSE2-NEXT:    movb %ch, 11(%rdi)
+; SSE2-NEXT:    pextrw $5, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 11(%rdi)
 ; SSE2-NEXT:  .LBB16_24: # %else22
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -7239,9 +7337,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_26
 ; SSE2-NEXT:  # %bb.25: # %cond.store23
+; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 12(%rdi)
 ; SSE2-NEXT:  .LBB16_26: # %else24
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7249,7 +7347,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_28
 ; SSE2-NEXT:  # %bb.27: # %cond.store25
-; SSE2-NEXT:    movb %ch, 13(%rdi)
+; SSE2-NEXT:    pextrw $6, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 13(%rdi)
 ; SSE2-NEXT:  .LBB16_28: # %else26
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm1, %xmm2
@@ -7257,9 +7356,9 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    movl %eax, %ecx
 ; SSE2-NEXT:    notb %cl
 ; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    je .LBB16_30
 ; SSE2-NEXT:  # %bb.29: # %cond.store27
+; SSE2-NEXT:    pextrw $7, %xmm0, %ecx
 ; SSE2-NEXT:    movb %cl, 14(%rdi)
 ; SSE2-NEXT:  .LBB16_30: # %else28
 ; SSE2-NEXT:    shrl $8, %eax
@@ -7267,7 +7366,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
 ; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB16_32
 ; SSE2-NEXT:  # %bb.31: # %cond.store29
-; SSE2-NEXT:    movb %ch, 15(%rdi)
+; SSE2-NEXT:    pextrw $7, %xmm0, %eax
+; SSE2-NEXT:    movb %ah, 15(%rdi)
 ; SSE2-NEXT:  .LBB16_32: # %else30
 ; SSE2-NEXT:    retq
 ;
@@ -7909,17 +8009,18 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask)
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
 ; SSE2-NEXT:    pminsw {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    movd %xmm3, %ecx
-; SSE2-NEXT:    testb $1, %cl
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movd %xmm3, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_2
 ; SSE2-NEXT:  # %bb.1: # %cond.store
-; SSE2-NEXT:    movb %al, (%rdi)
+; SSE2-NEXT:    movd %xmm0, %ecx
+; SSE2-NEXT:    movb %cl, (%rdi)
 ; SSE2-NEXT:  .LBB17_2: # %else
-; SSE2-NEXT:    shrl $16, %ecx
-; SSE2-NEXT:    testb $1, %cl
+; SSE2-NEXT:    shrl $16, %eax
+; SSE2-NEXT:    testb $1, %al
 ; SSE2-NEXT:    je .LBB17_4
 ; SSE2-NEXT:  # %bb.3: # %cond.store1
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    shrl $16, %eax
 ; SSE2-NEXT:    movb %al, 1(%rdi)
 ; SSE2-NEXT:  .LBB17_4: # %else2

From 76737f4d19f7d9d653b232b810bbc917d9a41ac1 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Mon, 27 May 2019 07:26:13 +0000
Subject: [PATCH 0279/1176] Remove elf::createSharedFile and move its code to
 SharedFile's ctor. NFC.

llvm-svn: 361747
---
 lld/ELF/Driver.cpp     |   2 +-
 lld/ELF/InputFiles.cpp | 100 ++++++++++++++++++++---------------------
 lld/ELF/InputFiles.h   |   1 -
 3 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 416fbb12b65d5..37465b501c802 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -250,7 +250,7 @@ void LinkerDriver::addFile(StringRef Path, bool WithLOption) {
     // significant, as a user did not specify it. This behavior is
     // compatible with GNU.
     Files.push_back(
-        createSharedFile(MBRef, WithLOption ? path::filename(Path) : Path));
+        make<SharedFile>(MBRef, WithLOption ? path::filename(Path) : Path));
     return;
   case file_magic::bitcode:
   case file_magic::elf_relocatable:
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 8a8bf6061e7d8..0581f35893ba0 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -48,6 +48,36 @@ std::vector<SharedFile *> elf::SharedFiles;
 
 std::unique_ptr<TarWriter> elf::Tar;
 
+static ELFKind getELFKind(MemoryBufferRef MB, StringRef ArchiveName) {
+  unsigned char Size;
+  unsigned char Endian;
+  std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
+
+  auto Fatal = [&](StringRef Msg) {
+    StringRef Filename = MB.getBufferIdentifier();
+    if (ArchiveName.empty())
+      fatal(Filename + ": " + Msg);
+    else
+      fatal(ArchiveName + "(" + Filename + "): " + Msg);
+  };
+
+  if (!MB.getBuffer().startswith(ElfMagic))
+    Fatal("not an ELF file");
+  if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
+    Fatal("corrupted ELF file: invalid data encoding");
+  if (Size != ELFCLASS32 && Size != ELFCLASS64)
+    Fatal("corrupted ELF file: invalid file class");
+
+  size_t BufSize = MB.getBuffer().size();
+  if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
+      (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
+    Fatal("corrupted ELF file: file is too short");
+
+  if (Size == ELFCLASS32)
+    return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
+  return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
+}
+
 InputFile::InputFile(Kind K, MemoryBufferRef M)
     : MB(M), GroupId(NextGroupId), FileKind(K) {
   // All files within the same --{start,end}-group get the same group ID.
@@ -1038,7 +1068,24 @@ unsigned SharedFile::VernauxNum;
 
 SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
     : ELFFileBase(SharedKind, M), SoName(DefaultSoName),
-      IsNeeded(!Config->AsNeeded) {}
+      IsNeeded(!Config->AsNeeded) {
+  switch (getELFKind(MB, "")) {
+  case ELF32LEKind:
+    parseHeader<ELF32LE>();
+    break;
+  case ELF32BEKind:
+    parseHeader<ELF32BE>();
+    break;
+  case ELF64LEKind:
+    parseHeader<ELF64LE>();
+    break;
+  case ELF64BEKind:
+    parseHeader<ELF64BE>();
+    break;
+  default:
+    llvm_unreachable("getELFKind");
+  }
+}
 
 // Parse the version definitions in the object file if present, and return a
 // vector whose nth element contains a pointer to the Elf_Verdef for version
@@ -1376,36 +1423,6 @@ void BitcodeFile::parse(
     addDependentLibrary(L, this);
 }
 
-static ELFKind getELFKind(MemoryBufferRef MB, StringRef ArchiveName) {
-  unsigned char Size;
-  unsigned char Endian;
-  std::tie(Size, Endian) = getElfArchType(MB.getBuffer());
-
-  auto Fatal = [&](StringRef Msg) {
-    StringRef Filename = MB.getBufferIdentifier();
-    if (ArchiveName.empty())
-      fatal(Filename + ": " + Msg);
-    else
-      fatal(ArchiveName + "(" + Filename + "): " + Msg);
-  };
-
-  if (!MB.getBuffer().startswith(ElfMagic))
-    Fatal("not an ELF file");
-  if (Endian != ELFDATA2LSB && Endian != ELFDATA2MSB)
-    Fatal("corrupted ELF file: invalid data encoding");
-  if (Size != ELFCLASS32 && Size != ELFCLASS64)
-    Fatal("corrupted ELF file: invalid file class");
-
-  size_t BufSize = MB.getBuffer().size();
-  if ((Size == ELFCLASS32 && BufSize < sizeof(Elf32_Ehdr)) ||
-      (Size == ELFCLASS64 && BufSize < sizeof(Elf64_Ehdr)))
-    Fatal("corrupted ELF file: file is too short");
-
-  if (Size == ELFCLASS32)
-    return (Endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
-  return (Endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
-}
-
 void BinaryFile::parse() {
   ArrayRef<uint8_t> Data = arrayRefFromStringRef(MB.getBuffer());
   auto *Section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
@@ -1448,27 +1465,6 @@ InputFile *elf::createObjectFile(MemoryBufferRef MB, StringRef ArchiveName,
   }
 }
 
-InputFile *elf::createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName) {
-  auto *F = make<SharedFile>(MB, DefaultSoName);
-  switch (getELFKind(MB, "")) {
-  case ELF32LEKind:
-    F->parseHeader<ELF32LE>();
-    break;
-  case ELF32BEKind:
-    F->parseHeader<ELF32BE>();
-    break;
-  case ELF64LEKind:
-    F->parseHeader<ELF64LE>();
-    break;
-  case ELF64BEKind:
-    F->parseHeader<ELF64BE>();
-    break;
-  default:
-    llvm_unreachable("getELFKind");
-  }
-  return F;
-}
-
 void LazyObjFile::fetch() {
   if (MB.getBuffer().empty())
     return;
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 81ee0302da020..a51ba64aac3ca 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -380,7 +380,6 @@ class BinaryFile : public InputFile {
 
 InputFile *createObjectFile(MemoryBufferRef MB, StringRef ArchiveName = "",
                             uint64_t OffsetInArchive = 0);
-InputFile *createSharedFile(MemoryBufferRef MB, StringRef DefaultSoName);
 
 inline bool isBitcode(MemoryBufferRef MB) {
   return identify_magic(MB.getBuffer()) == llvm::file_magic::bitcode;

From 1d28030f99791c043917faf6066ceba208504627 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic@rt-rk.com>
Date: Mon, 27 May 2019 07:48:28 +0000
Subject: [PATCH 0280/1176] [test commit] Add my name to the CREDITS.TXT

This is my test commit. (NFC)

llvm-svn: 361748
---
 llvm/CREDITS.TXT | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT
index 527229d4ba7c0..dab633c7e3b11 100644
--- a/llvm/CREDITS.TXT
+++ b/llvm/CREDITS.TXT
@@ -533,3 +533,7 @@ D: PowerPC Backend Developer
 N: Qiu Chaofan
 E: qiucf@cn.ibm.com
 D: PowerPC Backend Developer
+
+N: Djordje Todorovic
+E: djordje.todorovic@rt-rk.com
+D: Debug Information

From c11de5eada2decd0a495ea02676b6f4838cd54fb Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Mon, 27 May 2019 08:09:02 +0000
Subject: [PATCH 0281/1176] [clang-tidy] Fix unused-variable warning after
 r361647.

Summary:
A range-for was added in r361647 where the range variable was only used in an
assertion.  As a result, it warned for Release builds. This revision
restructures the assertion to avoid the problem.

Patch by Yitzhak Mandelbaum.

Reviewers: ilya-biryukov

Reviewed By: ilya-biryukov

Subscribers: xazax.hun, cfe-commits

Tags: #clang-tools-extra, #clang

Differential Revision: https://reviews.llvm.org/D62412

llvm-svn: 361749
---
 .../clang-tidy/utils/TransformerClangTidyCheck.cpp  | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
index 12be8a6dce732..80a829808681e 100644
--- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
@@ -7,21 +7,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "TransformerClangTidyCheck.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace clang {
 namespace tidy {
 namespace utils {
 using tooling::RewriteRule;
 
-TransformerClangTidyCheck::TransformerClangTidyCheck(tooling::RewriteRule R,
+TransformerClangTidyCheck::TransformerClangTidyCheck(RewriteRule R,
                                                      StringRef Name,
                                                      ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context), Rule(std::move(R)) {
-  for (const auto &Case : Rule.Cases) {
-    assert(Case.Explanation != nullptr &&
-           "clang-tidy checks must have an explanation by default;"
-           " explicitly provide an empty explanation if none is desired");
-  }
+  assert(llvm::all_of(Rule.Cases, [](const RewriteRule::Case &C) {
+                       return C.Explanation != nullptr;
+                     }) &&
+         "clang-tidy checks must have an explanation by default;"
+         " explicitly provide an empty explanation if none is desired");
 }
 
 void TransformerClangTidyCheck::registerMatchers(

From 6e379e2b68a02a4e65a695ae1ebadf762d1e59b6 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Mon, 27 May 2019 08:24:06 +0000
Subject: [PATCH 0282/1176] Make llvm-as --help great again

This is a follow-up to https://reviews.llvm.org/D60411, but for llvm-as.

New output:

    OVERVIEW: llvm .ll -> .bc assembler

    USAGE: llvm-as [options] <input .llvm file>

    OPTIONS:

    Generic Options:

      -help                        - Display available options (-help-hidden for more)
      -help-list                   - Display list of available options (-help-list-hidden for more)
      -version                     - Display the version of this program

    llvm-as Options:

      -data-layout=<layout-string> - data layout string to use
      -disable-output              - Disable output
      -f                           - Enable binary output on terminals
      -module-hash                 - Emit module hash
      -o=<filename>                - Override output filename

Differential Revision: https://reviews.llvm.org/D60603

llvm-svn: 361750
---
 llvm/test/tools/llvm-as/help.test     |  3 +++
 llvm/test/tools/llvm-as/lit.local.cfg |  4 ++++
 llvm/tools/llvm-as/llvm-as.cpp        | 22 ++++++++++++++--------
 3 files changed, 21 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/tools/llvm-as/help.test
 create mode 100644 llvm/test/tools/llvm-as/lit.local.cfg

diff --git a/llvm/test/tools/llvm-as/help.test b/llvm/test/tools/llvm-as/help.test
new file mode 100644
index 0000000000000..8c65b350f3655
--- /dev/null
+++ b/llvm/test/tools/llvm-as/help.test
@@ -0,0 +1,3 @@
+RUN: llvm-as --help 2>&1 | FileCheck --implicit-check-not="General Options:" %s
+CHECK: Generic Options:
+CHECK: llvm-as Options:
diff --git a/llvm/test/tools/llvm-as/lit.local.cfg b/llvm/test/tools/llvm-as/lit.local.cfg
new file mode 100644
index 0000000000000..1fc0bea084cad
--- /dev/null
+++ b/llvm/test/tools/llvm-as/lit.local.cfg
@@ -0,0 +1,4 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+
+config.suffixes = ['.ll', '.s', '.test', '.yaml']
diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp
index 8f8d10ff57280..234fef907a385 100644
--- a/llvm/tools/llvm-as/llvm-as.cpp
+++ b/llvm/tools/llvm-as/llvm-as.cpp
@@ -30,38 +30,43 @@
 #include <memory>
 using namespace llvm;
 
+cl::OptionCategory AsCat("llvm-as Options");
+
 static cl::opt<std::string> InputFilename(cl::Positional,
                                           cl::desc("<input .llvm file>"),
                                           cl::init("-"));
 
 static cl::opt<std::string> OutputFilename("o",
                                            cl::desc("Override output filename"),
-                                           cl::value_desc("filename"));
+                                           cl::value_desc("filename"),
+                                           cl::cat(AsCat));
 
-static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"));
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),
+                           cl::cat(AsCat));
 
 static cl::opt<bool> DisableOutput("disable-output", cl::desc("Disable output"),
-                                   cl::init(false));
+                                   cl::init(false), cl::cat(AsCat));
 
 static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
-                                    cl::init(false));
+                                    cl::init(false), cl::cat(AsCat));
 
 static cl::opt<bool> DumpAsm("d", cl::desc("Print assembly as parsed"),
-                             cl::Hidden);
+                             cl::Hidden, cl::cat(AsCat));
 
 static cl::opt<bool>
     DisableVerify("disable-verify", cl::Hidden,
-                  cl::desc("Do not run verifier on input LLVM (dangerous!)"));
+                  cl::desc("Do not run verifier on input LLVM (dangerous!)"),
+                  cl::cat(AsCat));
 
 static cl::opt<bool> PreserveBitcodeUseListOrder(
     "preserve-bc-uselistorder",
     cl::desc("Preserve use-list order when writing LLVM bitcode."),
-    cl::init(true), cl::Hidden);
+    cl::init(true), cl::Hidden, cl::cat(AsCat));
 
 static cl::opt<std::string> ClDataLayout("data-layout",
                                          cl::desc("data layout string to use"),
                                          cl::value_desc("layout-string"),
-                                         cl::init(""));
+                                         cl::init(""), cl::cat(AsCat));
 
 static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
   // Infer the output filename if needed.
@@ -109,6 +114,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   LLVMContext Context;
+  cl::HideUnrelatedOptions(AsCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
 
   // Parse the file now...

From 0f40585d2d532718a89972bf22ff7743c1d512bc Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Mon, 27 May 2019 09:03:00 +0000
Subject: [PATCH 0283/1176] Cmake: allow using LLVM_EXTERNAL_PROJECTS with
 LLVM_ENABLE_PROJECTS

The current code iterates over the combination of LLVM_EXTERNAL_PROJECTS
and LLVM_ENABLE_PROJECTS, but then disables projects that are only in
the former. If a project is in LLVM_EXTERNAL_PROJECTS, it should be
enabled.

See also llvm-commits thread on r354060.

Differential revision: https://reviews.llvm.org/D62289

llvm-svn: 361751
---
 llvm/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 0f8ddd87e02cc..895f9ab7189d1 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -145,6 +145,9 @@ if (LLVM_ENABLE_PROJECTS_USED OR NOT LLVM_ENABLE_PROJECTS STREQUAL "")
         message(FATAL_ERROR "LLVM_ENABLE_PROJECTS requests ${proj} but directory not found: ${PROJ_DIR}")
       endif()
       set(LLVM_EXTERNAL_${upper_proj}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}" CACHE STRING "")
+    elseif ("${proj}" IN_LIST LLVM_EXTERNAL_PROJECTS)
+      message(STATUS "${proj} project is enabled")
+      set(SHOULD_ENABLE_PROJECT TRUE)
     else()
       message(STATUS "${proj} project is disabled")
       set(SHOULD_ENABLE_PROJECT FALSE)

From c8272195cd2d02cb4d626a3f64144a916bd81d95 Mon Sep 17 00:00:00 2001
From: Balazs Keri <1.int32@gmail.com>
Date: Mon, 27 May 2019 09:36:00 +0000
Subject: [PATCH 0284/1176] [ASTImporter] Added visibility context check for
 CXXRecordDecl.

Summary:
ASTImporter makes now difference between classes with same name in different
translation units if these are not visible outside. These classes are not linked
into one decl chain.

Reviewers: martong, a.sidorin, shafik

Reviewed By: shafik

Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62312

llvm-svn: 361752
---
 clang/lib/AST/ASTImporter.cpp                 |  3 ++
 .../AST/ASTImporterVisibilityTest.cpp         | 37 ++++++++++++++++++-
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 2b7470410f720..2e4c304b3de20 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -2559,6 +2559,9 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
           if (!IsStructuralMatch(D, FoundRecord, false))
             continue;
 
+        if (!hasSameVisibilityContext(FoundRecord, D))
+          continue;
+
         if (IsStructuralMatch(D, FoundRecord)) {
           RecordDecl *FoundDef = FoundRecord->getDefinition();
           if (D->isThisDeclarationADefinition() && FoundDef) {
diff --git a/clang/unittests/AST/ASTImporterVisibilityTest.cpp b/clang/unittests/AST/ASTImporterVisibilityTest.cpp
index a4d242d3bbf1d..95b7c4c920abb 100644
--- a/clang/unittests/AST/ASTImporterVisibilityTest.cpp
+++ b/clang/unittests/AST/ASTImporterVisibilityTest.cpp
@@ -31,6 +31,10 @@ struct GetVarPattern {
   using DeclTy = VarDecl;
   BindableMatcher<Decl> operator()() { return varDecl(hasName("v")); }
 };
+struct GetClassPattern {
+  using DeclTy = CXXRecordDecl;
+  BindableMatcher<Decl> operator()() { return cxxRecordDecl(hasName("X")); }
+};
 
 // Values for the value-parameterized test fixtures.
 // FunctionDecl:
@@ -41,6 +45,9 @@ const auto *AnonF = "namespace { void f(); }";
 const auto *ExternV = "extern int v;";
 const auto *StaticV = "static int v;";
 const auto *AnonV = "namespace { extern int v; }";
+// CXXRecordDecl:
+const auto *ExternC = "class X;";
+const auto *AnonC = "namespace { class X; }";
 
 // First value in tuple: Compile options.
 // Second value in tuple: Source code to be used in the test.
@@ -84,14 +91,19 @@ class ImportVisibilityChain
 // Manual instantiation of the fixture with each type.
 using ImportFunctionsVisibilityChain = ImportVisibilityChain<GetFunPattern>;
 using ImportVariablesVisibilityChain = ImportVisibilityChain<GetVarPattern>;
-// Value-parameterized test for the first type.
+using ImportClassesVisibilityChain = ImportVisibilityChain<GetClassPattern>;
+// Value-parameterized test for functions.
 TEST_P(ImportFunctionsVisibilityChain, ImportChain) {
   TypedTest_ImportChain();
 }
-// Value-parameterized test for the second type.
+// Value-parameterized test for variables.
 TEST_P(ImportVariablesVisibilityChain, ImportChain) {
   TypedTest_ImportChain();
 }
+// Value-parameterized test for classes.
+TEST_P(ImportClassesVisibilityChain, ImportChain) {
+  TypedTest_ImportChain();
+}
 
 // Automatic instantiation of the value-parameterized tests.
 INSTANTIATE_TEST_CASE_P(ParameterizedTests, ImportFunctionsVisibilityChain,
@@ -110,6 +122,11 @@ INSTANTIATE_TEST_CASE_P(
         // provided but they must have the same linkage.  See also the test
         // ImportVariableChainInC which test for this special C Lang case.
         ::testing::Values(ExternV, AnonV)), );
+INSTANTIATE_TEST_CASE_P(
+    ParameterizedTests, ImportClassesVisibilityChain,
+    ::testing::Combine(
+        DefaultTestValuesForRunOptions,
+        ::testing::Values(ExternC, AnonC)), );
 
 // First value in tuple: Compile options.
 // Second value in tuple: Tuple with informations for the test.
@@ -169,6 +186,7 @@ class ImportVisibility
 };
 using ImportFunctionsVisibility = ImportVisibility<GetFunPattern>;
 using ImportVariablesVisibility = ImportVisibility<GetVarPattern>;
+using ImportClassesVisibility = ImportVisibility<GetClassPattern>;
 
 // FunctionDecl.
 TEST_P(ImportFunctionsVisibility, ImportAfter) {
@@ -184,6 +202,13 @@ TEST_P(ImportVariablesVisibility, ImportAfter) {
 TEST_P(ImportVariablesVisibility, ImportAfterImport) {
   TypedTest_ImportAfterImport();
 }
+// CXXRecordDecl.
+TEST_P(ImportClassesVisibility, ImportAfter) {
+  TypedTest_ImportAfter();
+}
+TEST_P(ImportClassesVisibility, ImportAfterImport) {
+  TypedTest_ImportAfterImport();
+}
 
 const bool ExpectLink = true;
 const bool ExpectNotLink = false;
@@ -214,6 +239,14 @@ INSTANTIATE_TEST_CASE_P(
                           std::make_tuple(AnonV, ExternV, ExpectNotLink),
                           std::make_tuple(AnonV, StaticV, ExpectNotLink),
                           std::make_tuple(AnonV, AnonV, ExpectNotLink))), );
+INSTANTIATE_TEST_CASE_P(
+    ParameterizedTests, ImportClassesVisibility,
+    ::testing::Combine(
+        DefaultTestValuesForRunOptions,
+        ::testing::Values(std::make_tuple(ExternC, ExternC, ExpectLink),
+                          std::make_tuple(ExternC, AnonC, ExpectNotLink),
+                          std::make_tuple(AnonC, ExternC, ExpectNotLink),
+                          std::make_tuple(AnonC, AnonC, ExpectNotLink))), );
 
 } // end namespace ast_matchers
 } // end namespace clang

From 32497f57dfee25ae75e5e9f906eeb1a2d87d9e88 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Mon, 27 May 2019 09:52:09 +0000
Subject: [PATCH 0285/1176] [CodeComplete] Complete 'return true/false' in
 boolean functions

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62391

llvm-svn: 361753
---
 clang/lib/Sema/SemaCodeComplete.cpp    | 37 ++++++++++++++++++--------
 clang/test/CodeCompletion/patterns.cpp | 17 ++++++++++--
 2 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index f5e5a84de7a40..16e7308681439 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -2168,23 +2168,38 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
       Results.AddResult(Result(Builder.TakeString()));
     }
 
-    // "return expression ;" or "return ;", depending on whether we
-    // know the function is void or not.
-    bool isVoid = false;
+    // "return expression ;" or "return ;", depending on the return type.
+    QualType ReturnType;
     if (const auto *Function = dyn_cast<FunctionDecl>(SemaRef.CurContext))
-      isVoid = Function->getReturnType()->isVoidType();
+      ReturnType = Function->getReturnType();
     else if (const auto *Method = dyn_cast<ObjCMethodDecl>(SemaRef.CurContext))
-      isVoid = Method->getReturnType()->isVoidType();
+      ReturnType = Method->getReturnType();
     else if (SemaRef.getCurBlock() &&
              !SemaRef.getCurBlock()->ReturnType.isNull())
-      isVoid = SemaRef.getCurBlock()->ReturnType->isVoidType();
-    Builder.AddTypedTextChunk("return");
-    if (!isVoid) {
-      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+      ReturnType = SemaRef.getCurBlock()->ReturnType;;
+    if (ReturnType.isNull() || ReturnType->isVoidType()) {
+      Builder.AddTypedTextChunk("return");
+      Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+      Results.AddResult(Result(Builder.TakeString()));
+    } else {
+      assert(!ReturnType.isNull());
+      // "return expression ;"
+      Builder.AddTypedTextChunk("return");
+      Builder.AddChunk(clang::CodeCompletionString::CK_HorizontalSpace);
       Builder.AddPlaceholderChunk("expression");
+      Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+      Results.AddResult(Result(Builder.TakeString()));
+      // When boolean, also add 'return true;' and 'return false;'.
+      if (ReturnType->isBooleanType()) {
+        Builder.AddTypedTextChunk("return true");
+        Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+        Results.AddResult(Result(Builder.TakeString()));
+
+        Builder.AddTypedTextChunk("return false");
+        Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+        Results.AddResult(Result(Builder.TakeString()));
+      }
     }
-    Builder.AddChunk(CodeCompletionString::CK_SemiColon);
-    Results.AddResult(Result(Builder.TakeString()));
 
     // goto identifier ;
     Builder.AddTypedTextChunk("goto");
diff --git a/clang/test/CodeCompletion/patterns.cpp b/clang/test/CodeCompletion/patterns.cpp
index 1958529687249..596fe829af43e 100644
--- a/clang/test/CodeCompletion/patterns.cpp
+++ b/clang/test/CodeCompletion/patterns.cpp
@@ -30,10 +30,23 @@ int value_return() {
 void void_return() {
   // line 31
 }
+bool bool_return() {
+  // line 34
+}
 // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:28:1 %s -o - | FileCheck -check-prefix=RETURN-VAL %s
-// RETURN-VAL-NOT: COMPLETION: Pattern : return;{{$}}
+// RETURN-VAL-NOT: COMPLETION: Pattern : return;
+// RETURN-VAL-NOT: COMPLETION: Pattern : return false;
+// RETURN-VAL-NOT: COMPLETION: Pattern : return true;
 // RETURN-VAL: COMPLETION: Pattern : return <#expression#>;{{$}}
 
 // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:31:1 %s -o - | FileCheck -check-prefix=RETURN-VOID %s
-// RETURN-VOID-NOT: COMPLETION: Pattern : return <#expression#>;{{$}}
+// RETURN-VOID-NOT: COMPLETION: Pattern : return false;
+// RETURN-VOID-NOT: COMPLETION: Pattern : return true;
+// RETURN-VOID-NOT: COMPLETION: Pattern : return <#expression#>;
 // RETURN-VOID: COMPLETION: Pattern : return;{{$}}
+
+// RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:34:1 %s -o - | FileCheck -check-prefix=RETURN-BOOL %s
+// RETURN-BOOL-NOT: COMPLETION: Pattern : return;
+// RETURN-BOOL: COMPLETION: Pattern : return <#expression#>;{{$}}
+// RETURN-BOOL: COMPLETION: Pattern : return false;{{$}}
+// RETURN-BOOL: COMPLETION: Pattern : return true;{{$}}

From 519ef6afdf18ae88f5a2e0ef7499a1aaeb74e995 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 27 May 2019 10:10:59 +0000
Subject: [PATCH 0286/1176] DWARF: Remove cu_idx variables from parsing
 functions

These variables were useful when looking up the compile unit index
required a binary search. Now that we can look up a compile unit index
in constant time, they are no longer needed.

llvm-svn: 361754
---
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      | 61 ++++++++-----------
 .../SymbolFile/DWARF/SymbolFileDWARF.h        |  7 +--
 .../SymbolFile/DWARF/SymbolFileDWARFDwo.cpp   |  6 +-
 .../SymbolFile/DWARF/SymbolFileDWARFDwo.h     |  3 +-
 4 files changed, 31 insertions(+), 46 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 11a89db7aa5de..463af1af9a1cf 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -640,8 +640,7 @@ const DWARFDebugRangesBase *SymbolFileDWARF::DebugRanges() const {
   return m_ranges.get();
 }
 
-lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu,
-                                                   uint32_t cu_idx) {
+lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu) {
   CompUnitSP cu_sp;
   if (dwarf_cu) {
     CompileUnit *comp_unit = (CompileUnit *)dwarf_cu->GetUserData();
@@ -650,8 +649,7 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu,
       cu_sp = comp_unit->shared_from_this();
     } else {
       if (dwarf_cu->GetSymbolFileDWARF() != this) {
-        return dwarf_cu->GetSymbolFileDWARF()->ParseCompileUnit(dwarf_cu,
-                                                                cu_idx);
+        return dwarf_cu->GetSymbolFileDWARF()->ParseCompileUnit(dwarf_cu);
       } else if (dwarf_cu->GetOffset() == 0 && GetDebugMapSymfile()) {
         // Let the debug map create the compile unit
         cu_sp = m_debug_map_symfile->GetCompileUnit(this);
@@ -681,29 +679,24 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu,
             cu_sp = std::make_shared<CompileUnit>(
                 module_sp, dwarf_cu, cu_file_spec, dwarf_cu->GetID(),
                 cu_language, is_optimized ? eLazyBoolYes : eLazyBoolNo);
-            if (cu_sp) {
-              // If we just created a compile unit with an invalid file spec,
-              // try and get the first entry in the supports files from the
-              // line table as that should be the compile unit.
-              if (!cu_file_spec) {
-                cu_file_spec = cu_sp->GetSupportFiles().GetFileSpecAtIndex(1);
-                if (cu_file_spec) {
-                  (FileSpec &)(*cu_sp) = cu_file_spec;
-                  // Also fix the invalid file spec which was copied from the
-                  // compile unit.
-                  cu_sp->GetSupportFiles().Replace(0, cu_file_spec);
-                }
-              }
 
-              dwarf_cu->SetUserData(cu_sp.get());
+            // If we just created a compile unit with an invalid file spec,
+            // try and get the first entry in the supports files from the
+            // line table as that should be the compile unit.
+            if (!cu_file_spec) {
+              cu_file_spec = cu_sp->GetSupportFiles().GetFileSpecAtIndex(1);
+              if (cu_file_spec) {
+                (FileSpec &)(*cu_sp) = cu_file_spec;
+                // Also fix the invalid file spec which was copied from the
+                // compile unit.
+                cu_sp->GetSupportFiles().Replace(0, cu_file_spec);
+              }
+            }
 
-              // Figure out the compile unit index if we weren't given one
-              if (cu_idx == UINT32_MAX)
-                cu_idx = dwarf_cu->GetID();
+            dwarf_cu->SetUserData(cu_sp.get());
 
-              m_obj_file->GetModule()->GetSymbolVendor()->SetCompileUnitAtIndex(
-                  cu_idx, cu_sp);
-            }
+            m_obj_file->GetModule()->GetSymbolVendor()->SetCompileUnitAtIndex(
+                dwarf_cu->GetID(), cu_sp);
           }
         }
       }
@@ -726,7 +719,7 @@ CompUnitSP SymbolFileDWARF::ParseCompileUnitAtIndex(uint32_t cu_idx) {
   if (info) {
     DWARFUnit *dwarf_cu = info->GetUnitAtIndex(cu_idx);
     if (dwarf_cu)
-      cu_sp = ParseCompileUnit(dwarf_cu, cu_idx);
+      cu_sp = ParseCompileUnit(dwarf_cu);
   }
   return cu_sp;
 }
@@ -1405,14 +1398,12 @@ Type *SymbolFileDWARF::ResolveType(const DWARFDIE &die,
   return nullptr;
 }
 
-CompileUnit *
-SymbolFileDWARF::GetCompUnitForDWARFCompUnit(DWARFUnit *dwarf_cu,
-                                             uint32_t cu_idx) {
+CompileUnit *SymbolFileDWARF::GetCompUnitForDWARFCompUnit(DWARFUnit *dwarf_cu) {
   // Check if the symbol vendor already knows about this compile unit?
   if (dwarf_cu->GetUserData() == nullptr) {
     // The symbol vendor doesn't know about this compile unit, we need to parse
     // and add it to the symbol vendor object.
-    return ParseCompileUnit(dwarf_cu, cu_idx).get();
+    return ParseCompileUnit(dwarf_cu).get();
   }
   return (CompileUnit *)dwarf_cu->GetUserData();
 }
@@ -1429,7 +1420,7 @@ bool SymbolFileDWARF::GetFunction(const DWARFDIE &die, SymbolContext &sc) {
 
   if (die) {
     // Check if the symbol vendor already knows about this compile unit?
-    sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU(), UINT32_MAX);
+    sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU());
 
     sc.function = sc.comp_unit->FindFunctionByUID(die.GetID()).get();
     if (sc.function == nullptr)
@@ -1691,7 +1682,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const Address &so_addr,
         DWARFUnit *dwarf_cu = debug_info->GetUnitAtOffset(DIERef::Section::DebugInfo,
                                                           cu_offset, &cu_idx);
         if (dwarf_cu) {
-          sc.comp_unit = GetCompUnitForDWARFCompUnit(dwarf_cu, cu_idx);
+          sc.comp_unit = GetCompUnitForDWARFCompUnit(dwarf_cu);
           if (sc.comp_unit) {
             resolved |= eSymbolContextCompUnit;
 
@@ -1792,13 +1783,13 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec,
       for (cu_idx = 0;
            (dwarf_cu = debug_info->GetUnitAtIndex(cu_idx)) != nullptr;
            ++cu_idx) {
-        CompileUnit *dc_cu = GetCompUnitForDWARFCompUnit(dwarf_cu, cu_idx);
+        CompileUnit *dc_cu = GetCompUnitForDWARFCompUnit(dwarf_cu);
         const bool full_match = (bool)file_spec.GetDirectory();
         bool file_spec_matches_cu_file_spec =
             dc_cu != nullptr && FileSpec::Equal(file_spec, *dc_cu, full_match);
         if (check_inlines || file_spec_matches_cu_file_spec) {
           SymbolContext sc(m_obj_file->GetModule());
-          sc.comp_unit = GetCompUnitForDWARFCompUnit(dwarf_cu, cu_idx);
+          sc.comp_unit = GetCompUnitForDWARFCompUnit(dwarf_cu);
           if (sc.comp_unit) {
             uint32_t file_idx = UINT32_MAX;
 
@@ -1989,7 +1980,7 @@ uint32_t SymbolFileDWARF::FindGlobalVariables(
           break;
 
         case DW_TAG_variable: {
-          sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU(), UINT32_MAX);
+          sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU());
 
           if (parent_decl_ctx) {
             DWARFASTParser *dwarf_ast = die.GetDWARFParser();
@@ -2071,7 +2062,7 @@ uint32_t SymbolFileDWARF::FindGlobalVariables(const RegularExpression &regex,
       DWARFDIE die = GetDIE(die_ref);
 
       if (die) {
-        sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU(), UINT32_MAX);
+        sc.comp_unit = GetCompUnitForDWARFCompUnit(die.GetCU());
 
         ParseVariables(sc, die, LLDB_INVALID_ADDRESS, false, false, &variables);
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index d3272454d0a5e..28cfe1a9c695f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -239,9 +239,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
   bool
   HasForwardDeclForClangType(const lldb_private::CompilerType &compiler_type);
 
-  lldb_private::CompileUnit *
-  GetCompUnitForDWARFCompUnit(DWARFUnit *dwarf_cu,
-                              uint32_t cu_idx = UINT32_MAX);
+  lldb_private::CompileUnit *GetCompUnitForDWARFCompUnit(DWARFUnit *dwarf_cu);
 
   virtual size_t GetObjCMethodDIEOffsets(lldb_private::ConstString class_name,
                                          DIEArray &method_die_offsets);
@@ -252,8 +250,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
 
   static DWARFDIE GetParentSymbolContextDIE(const DWARFDIE &die);
 
-  virtual lldb::CompUnitSP ParseCompileUnit(DWARFUnit *dwarf_cu,
-                                            uint32_t cu_idx);
+  virtual lldb::CompUnitSP ParseCompileUnit(DWARFUnit *dwarf_cu);
 
   virtual lldb_private::DWARFExpression::LocationListFormat
   GetLocationListFormat() const;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
index c9a1fc61aca4c..3e35ff1579e42 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp
@@ -45,13 +45,11 @@ void SymbolFileDWARFDwo::LoadSectionData(lldb::SectionType sect_type,
   SymbolFileDWARF::LoadSectionData(sect_type, data);
 }
 
-lldb::CompUnitSP
-SymbolFileDWARFDwo::ParseCompileUnit(DWARFUnit *dwarf_cu,
-                                     uint32_t cu_idx) {
+lldb::CompUnitSP SymbolFileDWARFDwo::ParseCompileUnit(DWARFUnit *dwarf_cu) {
   assert(GetCompileUnit() == dwarf_cu && "SymbolFileDWARFDwo::ParseCompileUnit "
                                          "called with incompatible compile "
                                          "unit");
-  return GetBaseSymbolFile()->ParseCompileUnit(m_base_dwarf_cu, UINT32_MAX);
+  return GetBaseSymbolFile()->ParseCompileUnit(m_base_dwarf_cu);
 }
 
 DWARFUnit *SymbolFileDWARFDwo::GetCompileUnit() {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
index e5369d5d8afcb..5b5d73da75344 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h
@@ -17,8 +17,7 @@ class SymbolFileDWARFDwo : public SymbolFileDWARF {
 
   ~SymbolFileDWARFDwo() override = default;
 
-  lldb::CompUnitSP ParseCompileUnit(DWARFUnit *dwarf_cu,
-                                    uint32_t cu_idx) override;
+  lldb::CompUnitSP ParseCompileUnit(DWARFUnit *dwarf_cu) override;
 
   DWARFUnit *GetCompileUnit();
 

From 68b20c589c4890c28fbabe6e2f4636f41d47fd0b Mon Sep 17 00:00:00 2001
From: Diana Picus <diana.picus@linaro.org>
Date: Mon, 27 May 2019 10:30:33 +0000
Subject: [PATCH 0287/1176] [ARM GlobalISel] Cleanup CallLowering a bit

We never actually use the Offsets produced by ComputeValueVTs, so remove
them until we need them.

llvm-svn: 361755
---
 llvm/lib/Target/ARM/ARMCallLowering.cpp | 33 +++++++++----------------
 llvm/lib/Target/ARM/ARMCallLowering.h   |  2 +-
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 5229064032a02..bfdf7f0b667e2 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -192,8 +192,7 @@ void ARMCallLowering::splitToValueTypes(
   const Function &F = MF.getFunction();
 
   SmallVector<EVT, 4> SplitVTs;
-  SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0);
 
   if (SplitVTs.size() == 1) {
     // Even if there is no splitting to do, we still want to replace the
@@ -206,7 +205,6 @@ void ARMCallLowering::splitToValueTypes(
     return;
   }
 
-  unsigned FirstRegIdx = SplitArgs.size();
   for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
     EVT SplitVT = SplitVTs[i];
     Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
@@ -224,13 +222,11 @@ void ARMCallLowering::splitToValueTypes(
         Flags.setInConsecutiveRegsLast();
     }
 
-    SplitArgs.push_back(
-        ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
-                SplitTy, Flags, OrigArg.IsFixed});
+    unsigned PartReg =
+        MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL));
+    SplitArgs.push_back(ArgInfo{PartReg, SplitTy, Flags, OrigArg.IsFixed});
+    PerformArgSplit(PartReg);
   }
-
-  for (unsigned i = 0; i < Offsets.size(); ++i)
-    PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
 }
 
 /// Lower the return value for the already existing \p Ret. This assumes that
@@ -262,9 +258,8 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
     setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
 
     SmallVector<unsigned, 4> Regs;
-    splitToValueTypes(
-        CurArgInfo, SplitVTs, MF,
-        [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
+    splitToValueTypes(CurArgInfo, SplitVTs, MF,
+                      [&](unsigned Reg) { Regs.push_back(Reg); });
     if (Regs.size() > 1)
       MIRBuilder.buildUnmerge(Regs, VRegs[i]);
   }
@@ -466,9 +461,8 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
 
     SplitRegs.clear();
 
-    splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
-      SplitRegs.push_back(Reg);
-    });
+    splitToValueTypes(AInfo, ArgInfos, MF,
+                      [&](unsigned Reg) { SplitRegs.push_back(Reg); });
 
     if (!SplitRegs.empty())
       MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
@@ -575,9 +569,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       return false;
 
     SmallVector<unsigned, 8> Regs;
-    splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
-      Regs.push_back(Reg);
-    });
+    splitToValueTypes(Arg, ArgInfos, MF,
+                      [&](unsigned Reg) { Regs.push_back(Reg); });
 
     if (Regs.size() > 1)
       MIRBuilder.buildUnmerge(Regs, Arg.Reg);
@@ -598,9 +591,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     ArgInfos.clear();
     SmallVector<unsigned, 8> SplitRegs;
     splitToValueTypes(OrigRet, ArgInfos, MF,
-                      [&](unsigned Reg, uint64_t Offset) {
-                        SplitRegs.push_back(Reg);
-                      });
+                      [&](unsigned Reg) { SplitRegs.push_back(Reg); });
 
     auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg);
     CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h
index d395a42179cdc..63760efceb918 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -47,7 +47,7 @@ class ARMCallLowering : public CallLowering {
                       ArrayRef<unsigned> VRegs,
                       MachineInstrBuilder &Ret) const;
 
-  using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
+  using SplitArgTy = std::function<void(unsigned Reg)>;
 
   /// Split an argument into one or more arguments that the CC lowering can cope
   /// with (e.g. replace pointers with integers).

From c675215f679979400ac856a98034ba2907c1a063 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana.picus@linaro.org>
Date: Mon, 27 May 2019 10:32:34 +0000
Subject: [PATCH 0288/1176] [ARM GlobalISel] Un-XFAIL some tests. NFC

It turns out we support big endian now (probably since r332449, but I
haven't bisected to confirm).

llvm-svn: 361756
---
 llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll   | 1 -
 llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index 1d7ca322ce9be..2677a4cfdb557 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE
 ; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG
-; XFAIL: armeb
 
 define void @test_void_return() {
 ; CHECK-LABEL: name: test_void_return
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
index cff38c0339c49..0ce23be65bdbb 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -O0 -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ARM -check-prefix=LITTLE
 ; RUN: llc -O0 -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ARM -check-prefix=BIG
-; XFAIL: armeb
 ; RUN: llc -O0 -mtriple thumb-unknown -mattr=+vfp2,+v6t2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -check-prefix=THUMB
 
 declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*)

From a53d48b7f45d45c7ffbfe9cffcf6be078cab2f66 Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Mon, 27 May 2019 11:19:07 +0000
Subject: [PATCH 0289/1176] [OpenCL] Fix file-scope const sampler variable for
 2.0

OpenCL spec v2.0 s6.13.14:

Samplers can also be declared as global constants in the program
source using the following syntax.

   const sampler_t <sampler name> = <value>
This works fine for OpenCL 1.2 but fails for 2.0, because clang duduces
address space of file-scope const sampler variable to be in global address
space whereas spec v2.0 s6.9.b forbids file-scope sampler variable to be
in global address space.

The fix is not to deduce address space for file-scope sampler variables.

Differential Revision: https://reviews.llvm.org/D62197

llvm-svn: 361757
---
 clang/lib/Sema/SemaType.cpp         | 16 +++++++++++++++-
 clang/test/CodeGenOpenCL/sampler.cl | 15 +++++++++++++--
 clang/test/SemaOpenCL/sampler_t.cl  |  7 +++++++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 6ba4bdcf1def9..91743bb59fef9 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7363,7 +7363,21 @@ static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State,
       T->isDependentType() ||
       // Do not deduce addr space of decltype because it will be taken from
       // its argument.
-      T->isDecltypeType())
+      T->isDecltypeType() ||
+      // OpenCL spec v2.0 s6.9.b:
+      // The sampler type cannot be used with the __local and __global address
+      // space qualifiers.
+      // OpenCL spec v2.0 s6.13.14:
+      // Samplers can also be declared as global constants in the program
+      // source using the following syntax.
+      //   const sampler_t <sampler name> = <value>
+      // In codegen, file-scope sampler type variable has special handing and
+      // does not rely on address space qualifier. On the other hand, deducing
+      // address space of const sampler file-scope variable as global address
+      // space causes spurious diagnostic about __global address space
+      // qualifier, therefore do not deduce address space of file-scope sampler
+      // type variable.
+      (D.getContext() == DeclaratorContext::FileContext && T->isSamplerT()))
     return;
 
   LangAS ImpAddr = LangAS::Default;
diff --git a/clang/test/CodeGenOpenCL/sampler.cl b/clang/test/CodeGenOpenCL/sampler.cl
index 22976c57665f0..74b6d55d5d37e 100644
--- a/clang/test/CodeGenOpenCL/sampler.cl
+++ b/clang/test/CodeGenOpenCL/sampler.cl
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
 //
 // This test covers 5 cases of sampler initialzation:
 //   1. function argument passing
@@ -6,8 +7,9 @@
 //      1b. argument is a function-scope variable
 //      1c. argument is one of caller function's parameters
 //   2. variable initialization
-//      2a. initializing a file-scope variable
+//      2a. initializing a file-scope variable with constant addr space qualifier
 //      2b. initializing a function-scope variable
+//      2c. initializing a file-scope variable with const qualifier
 
 #define CLK_ADDRESS_CLAMP_TO_EDGE       2
 #define CLK_NORMALIZED_COORDS_TRUE      1
@@ -20,6 +22,10 @@
 constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
 // CHECK-NOT: glb_smp
 
+// Case 2c
+const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+// CHECK-NOT: glb_smp_const
+
 int get_sampler_initializer(void);
 
 void fnc4smp(sampler_t s) {}
@@ -47,11 +53,16 @@ kernel void foo(sampler_t smp_par) {
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
   // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
 
-  // Case 1a
+  // Case 1a/2a
   fnc4smp(glb_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
   // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
 
+  // Case 1a/2c
+  fnc4smp(glb_smp_const);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+
   // Case 1c
   fnc4smp(smp_par);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]]
diff --git a/clang/test/SemaOpenCL/sampler_t.cl b/clang/test/SemaOpenCL/sampler_t.cl
index 8473fa33631a1..28e7a0ad27ecf 100644
--- a/clang/test/SemaOpenCL/sampler_t.cl
+++ b/clang/test/SemaOpenCL/sampler_t.cl
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -DCHECK_SAMPLER_VALUE -Wspir-compat -triple amdgcn--amdhsa
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -DCHECK_SAMPLER_VALUE -triple spir-unknown-unknown
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only -DCHECK_SAMPLER_VALUE -Wspir-compat -triple amdgcn--amdhsa
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -verify -pedantic -fsyntax-only -DCHECK_SAMPLER_VALUE -triple spir-unknown-unknown
 
 #define CLK_ADDRESS_CLAMP_TO_EDGE       2
 #define CLK_NORMALIZED_COORDS_TRUE      1
@@ -55,7 +58,11 @@ void kernel ker(sampler_t argsmp) {
   sampler_t sa[] = {argsmp, glb_smp}; // expected-error {{array of 'sampler_t' type is invalid in OpenCL}}
 }
 
+#if __OPENCL_C_VERSION__ == 200
+void bad(sampler_t*); // expected-error{{pointer to type '__generic sampler_t' is invalid in OpenCL}}
+#else
 void bad(sampler_t*); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}}
+#endif
 
 void bar() {
   sampler_t smp1 = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;

From ae4ec62cc9a027ed017f640305be9c3d630f5775 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 27 May 2019 11:53:24 +0000
Subject: [PATCH 0290/1176] FuncUnwinders: prefer debug_frame over eh_frame

The two sections usually contain the same information, and we rarely
have both kinds of entries for a single function. However, in theory the
debug_frame plan can be more complete, whereas eh_frame is only required
to be correct at places where exceptions can be thrown.

Reviewers: jasonmolenda, clayborg

Subscribers: lldb-commits

Differential Revision: https://reviews.llvm.org/D62374

llvm-svn: 361758
---
 .../Inputs/prefer-debug-over-eh-frame.s       | 38 +++++++++++++++++++
 .../Unwind/prefer-debug-over-eh-frame.test    | 23 +++++++++++
 lldb/source/Symbol/FuncUnwinders.cpp          |  8 ++--
 3 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 lldb/lit/Unwind/Inputs/prefer-debug-over-eh-frame.s
 create mode 100644 lldb/lit/Unwind/prefer-debug-over-eh-frame.test

diff --git a/lldb/lit/Unwind/Inputs/prefer-debug-over-eh-frame.s b/lldb/lit/Unwind/Inputs/prefer-debug-over-eh-frame.s
new file mode 100644
index 0000000000000..c9b7a785c3410
--- /dev/null
+++ b/lldb/lit/Unwind/Inputs/prefer-debug-over-eh-frame.s
@@ -0,0 +1,38 @@
+        .cfi_sections .eh_frame, .debug_frame
+        .text
+        .globl  bar
+bar:
+        .cfi_startproc
+        leal    (%edi, %edi), %eax
+        ret
+        .cfi_endproc
+
+        .globl  foo
+foo:
+        .cfi_startproc
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset %rbp, -16
+        movq    %rsp, %rbp
+        .cfi_def_cfa_register %rbp
+        call    bar
+        addl    $1, %eax
+        popq    %rbp
+        ret
+        .cfi_endproc
+
+        .globl  asm_main
+asm_main:
+        .cfi_startproc
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset 6, -16
+        movq    %rsp, %rbp
+        .cfi_def_cfa_register 6
+        movl    $47, %edi
+
+        call foo
+        popq    %rbp
+        .cfi_def_cfa 7, 8
+        ret
+        .cfi_endproc
diff --git a/lldb/lit/Unwind/prefer-debug-over-eh-frame.test b/lldb/lit/Unwind/prefer-debug-over-eh-frame.test
new file mode 100644
index 0000000000000..49113466b6074
--- /dev/null
+++ b/lldb/lit/Unwind/prefer-debug-over-eh-frame.test
@@ -0,0 +1,23 @@
+# Test that we prefer debug_frame over eh_frame unwind plans. They usually
+# contain the same information, and we rarely have both kinds of entries for a
+# single function. However, in theory the debug_frame plan can be more complete,
+# whereas eh_frame is only required to be correct at places where exceptions can
+# be thrown.
+
+# UNSUPPORTED: system-windows
+# REQUIRES: target-x86_64, native
+
+# RUN: %clang %p/Inputs/call-asm.c %p/Inputs/prefer-debug-over-eh-frame.s -o %t
+# RUN: %lldb %t -s %s -o exit | FileCheck %s
+
+breakpoint set -n bar
+# CHECK: Breakpoint 1: where = {{.*}}`bar
+
+process launch
+# CHECK: stop reason = breakpoint 1.1
+
+target modules show-unwind -n foo
+# CHECK: Asynchronous (not restricted to call-sites) UnwindPlan is 'DWARF CFI plus augmentation from assembly parsing'
+# CHECK: Synchronous (restricted to call-sites) UnwindPlan is 'DWARF CFI'
+# CHECK: eh_frame UnwindPlan:
+# CHECK: debug_frame UnwindPlan:
diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp
index 33b2e29598985..09cb9b00aaf3b 100644
--- a/lldb/source/Symbol/FuncUnwinders.cpp
+++ b/lldb/source/Symbol/FuncUnwinders.cpp
@@ -60,10 +60,10 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtCallSite(Target &target,
 
   if (UnwindPlanSP plan_sp = GetSymbolFileUnwindPlan(thread))
     return plan_sp;
-  if (UnwindPlanSP plan_sp = GetEHFrameUnwindPlan(target))
-    return plan_sp;
   if (UnwindPlanSP plan_sp = GetDebugFrameUnwindPlan(target))
     return plan_sp;
+  if (UnwindPlanSP plan_sp = GetEHFrameUnwindPlan(target))
+    return plan_sp;
   if (UnwindPlanSP plan_sp = GetCompactUnwindUnwindPlan(target))
     return plan_sp;
   if (UnwindPlanSP plan_sp = GetArmUnwindUnwindPlan(target))
@@ -362,10 +362,10 @@ UnwindPlanSP FuncUnwinders::GetUnwindPlanAtNonCallSite(Target &target,
 
   if (UnwindPlanSP plan_sp = GetSymbolFileUnwindPlan(thread))
     return plan_sp;
-  if (UnwindPlanSP plan_sp = GetEHFrameAugmentedUnwindPlan(target, thread))
-    return plan_sp;
   if (UnwindPlanSP plan_sp = GetDebugFrameAugmentedUnwindPlan(target, thread))
     return plan_sp;
+  if (UnwindPlanSP plan_sp = GetEHFrameAugmentedUnwindPlan(target, thread))
+    return plan_sp;
 
   return assembly_sp;
 }

From 2b5f340bcba3ac00244aa1aacbb027c142ca187f Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 27 May 2019 13:23:23 +0000
Subject: [PATCH 0291/1176] DWARF: Add a simple test exercising debug_loc
 parsing

llvm-svn: 361759
---
 lldb/lit/SymbolFile/DWARF/debug_loc.s | 117 ++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 lldb/lit/SymbolFile/DWARF/debug_loc.s

diff --git a/lldb/lit/SymbolFile/DWARF/debug_loc.s b/lldb/lit/SymbolFile/DWARF/debug_loc.s
new file mode 100644
index 0000000000000..01533cc9b3b01
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/debug_loc.s
@@ -0,0 +1,117 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t
+# RUN: lldb %t -o "image lookup -v -a 0" -o "image lookup -v -a 2" -o exit \
+# RUN:   | FileCheck %s
+
+# CHECK-LABEL: image lookup -v -a 0
+# CHECK: Variable: {{.*}}, name = "x", type = "int", location = rdi,
+
+# CHECK-LABEL: image lookup -v -a 2
+# CHECK: Variable: {{.*}}, name = "x", type = "int", location = rax,
+
+        .type   f,@function
+f:                                      # @f
+.Lfunc_begin0:
+        movl    %edi, %eax
+.Ltmp0:
+        retq
+.Ltmp1:
+.Lfunc_end0:
+        .size   f, .Lfunc_end0-f
+
+        .section        .debug_str,"MS",@progbits,1
+.Linfo_string0:
+        .asciz  "Hand-written DWARF"
+.Linfo_string3:
+        .asciz  "f"
+.Linfo_string4:
+        .asciz  "int"
+.Linfo_string5:
+        .asciz  "x"
+
+        .section        .debug_loc,"",@progbits
+.Ldebug_loc0:
+        .quad   .Lfunc_begin0-.Lfunc_begin0
+        .quad   .Ltmp0-.Lfunc_begin0
+        .short  1                       # Loc expr size
+        .byte   85                      # super-register DW_OP_reg5
+        .quad   .Ltmp0-.Lfunc_begin0
+        .quad   .Lfunc_end0-.Lfunc_begin0
+        .short  1                       # Loc expr size
+        .byte   80                      # super-register DW_OP_reg0
+        .quad   0
+        .quad   0
+
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   14                      # DW_FORM_strp
+        .byte   19                      # DW_AT_language
+        .byte   5                       # DW_FORM_data2
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   2                       # Abbreviation Code
+        .byte   46                      # DW_TAG_subprogram
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   17                      # DW_AT_low_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   3                       # Abbreviation Code
+        .byte   5                       # DW_TAG_formal_parameter
+        .byte   0                       # DW_CHILDREN_no
+        .byte   2                       # DW_AT_location
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   73                      # DW_AT_type
+        .byte   19                      # DW_FORM_ref4
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   4                       # Abbreviation Code
+        .byte   36                      # DW_TAG_base_type
+        .byte   0                       # DW_CHILDREN_no
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   62                      # DW_AT_encoding
+        .byte   11                      # DW_FORM_data1
+        .byte   11                      # DW_AT_byte_size
+        .byte   11                      # DW_FORM_data1
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  4                       # DWARF version number
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   8                       # Address Size (in bytes)
+        .byte   1                       # Abbrev [1] 0xb:0x50 DW_TAG_compile_unit
+        .long   .Linfo_string0          # DW_AT_producer
+        .short  12                      # DW_AT_language
+        .byte   2                       # Abbrev [2] 0x2a:0x29 DW_TAG_subprogram
+        .quad   .Lfunc_begin0           # DW_AT_low_pc
+        .long   .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+        .long   .Linfo_string3          # DW_AT_name
+        .long   83                      # DW_AT_type
+        .byte   3                       # Abbrev [3] 0x43:0xf DW_TAG_formal_parameter
+        .long   .Ldebug_loc0            # DW_AT_location
+        .long   .Linfo_string5          # DW_AT_name
+        .long   .Lint-.Lcu_begin0       # DW_AT_type
+        .byte   0                       # End Of Children Mark
+.Lint:
+        .byte   4                       # Abbrev [4] 0x53:0x7 DW_TAG_base_type
+        .long   .Linfo_string4          # DW_AT_name
+        .byte   5                       # DW_AT_encoding
+        .byte   4                       # DW_AT_byte_size
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end0:

From 5a500fd2c50a70c98981de5f2b7fb1b7be615127 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 27 May 2019 13:43:01 +0000
Subject: [PATCH 0292/1176] XFAIL prefer-debug-over-eh-frame.test on darwin

debug_frame does not seem to work on darwin, so there is nothing to
prefer.

Adding `-g` to the compiler command line is enough to get the
__debug_frame section added to the dsym file. Though lldb then finds the
section, and correctly assigns the section type to it, this does not
seem to be enough to get lldb to actually use this section for
unwinding.

llvm-svn: 361760
---
 lldb/lit/Unwind/prefer-debug-over-eh-frame.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/lit/Unwind/prefer-debug-over-eh-frame.test b/lldb/lit/Unwind/prefer-debug-over-eh-frame.test
index 49113466b6074..f023a1e75fad1 100644
--- a/lldb/lit/Unwind/prefer-debug-over-eh-frame.test
+++ b/lldb/lit/Unwind/prefer-debug-over-eh-frame.test
@@ -5,9 +5,10 @@
 # be thrown.
 
 # UNSUPPORTED: system-windows
+# XFAIL: system-darwin
 # REQUIRES: target-x86_64, native
 
-# RUN: %clang %p/Inputs/call-asm.c %p/Inputs/prefer-debug-over-eh-frame.s -o %t
+# RUN: %clang -g %p/Inputs/call-asm.c %p/Inputs/prefer-debug-over-eh-frame.s -o %t
 # RUN: %lldb %t -s %s -o exit | FileCheck %s
 
 breakpoint set -n bar

From 441ad62531308e48599c97a2867d370a8fb4b417 Mon Sep 17 00:00:00 2001
From: Nikola Prica <nikola.prica@rt-rk.com>
Date: Mon, 27 May 2019 13:51:30 +0000
Subject: [PATCH 0293/1176] Test commit (NFC)

Add blank line.

llvm-svn: 361761
---
 llvm/lib/CodeGen/LiveDebugValues.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index c56ba124a8de3..2ac3fe20fffb2 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -288,6 +288,7 @@ class LiveDebugValues : public MachineFunctionPass {
                            const VarLocMap &VarLocIDs);
   bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
                               VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+
   bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
                VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
                TransferMap &Transfers, bool transferChanges);

From 3860aad6e7f0c261512ece251a6796dd71450e90 Mon Sep 17 00:00:00 2001
From: Xing Xue <xingxue@outlook.com>
Date: Mon, 27 May 2019 13:57:28 +0000
Subject: [PATCH 0294/1176] [MustExecute] Improve MustExecute to correctly
 handle loop nest

Summary:
for.outer:
  br for.inner
for.inner:
  LI <loop invariant load instruction>
for.inner.latch:
  br for.inner, for.outer.latch
for.outer.latch:
  br for.outer, for.outer.exit

LI is a loop invariant load instruction that post dominate for.outer, so LI should be able to move out of the loop nest. However, there is a bug in allLoopPathsLeadToBlock().

Current algorithm of allLoopPathsLeadToBlock()

  1. get all the transitive predecessors of the basic block LI belongs to (for.inner) ==> for.outer, for.inner.latch
  2. if any successors of any of the predecessors are not for.inner or for.inner's predecessors, then return false
  3. return true

Although for.inner.latch is for.inner's predecessor, but for.inner dominates for.inner.latch, which means if for.inner.latch is ever executed, for.inner should be as well. It should not return false for cases like this.

Author: Whitney (committed by xingxue)

Reviewers: kbarton, jdoerfert, Meinersbur, hfinkel, fhahn

Reviewed By: jdoerfert

Subscribers: hiraditya, jsji, llvm-commits, etiotto, bmahjour

Tags: #LLVM

Differential Revision: https://reviews.llvm.org/D62418

llvm-svn: 361762
---
 llvm/lib/Analysis/MustExecute.cpp             | 9 ++++++++-
 llvm/test/Analysis/MustExecute/loop-header.ll | 8 +++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index e22831cd6a927..b616cd6f762be 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -193,7 +193,8 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
   SmallPtrSet<const BasicBlock *, 4> Predecessors;
   collectTransitivePredecessors(CurLoop, BB, Predecessors);
 
-  // Make sure that all successors of all predecessors of BB are either:
+  // Make sure that all successors of, all predecessors of BB which are not
+  // dominated by BB, are either:
   // 1) BB,
   // 2) Also predecessors of BB,
   // 3) Exit blocks which are not taken on 1st iteration.
@@ -203,6 +204,12 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
     // Predecessor block may throw, so it has a side exit.
     if (blockMayThrow(Pred))
       return false;
+
+    // BB dominates Pred, so if Pred runs, BB must run.
+    // This is true when Pred is a loop latch.
+    if (DT->dominates(BB, Pred))
+      continue;
+
     for (auto *Succ : successors(Pred))
       if (CheckedSuccessors.insert(Succ).second &&
           Succ != BB && !Predecessors.count(Succ))
diff --git a/llvm/test/Analysis/MustExecute/loop-header.ll b/llvm/test/Analysis/MustExecute/loop-header.ll
index d0ec5fa687254..bc75ed683cb9a 100644
--- a/llvm/test/Analysis/MustExecute/loop-header.ll
+++ b/llvm/test/Analysis/MustExecute/loop-header.ll
@@ -83,17 +83,15 @@ exit:
   ret i1 false
 }
 
-; FIXME: everything in inner loop header should be must execute
-; for outer as well
 define i1 @nested_no_throw(i32* noalias %p, i32 %high) {
 ; CHECK-LABEL: @nested_no_throw
 ; CHECK-LABEL: loop:                                             ; preds = %next
 ; CHECK:         %iv = phi i32 [ 0, %entry ], [ %iv.next, %next ]	; (mustexec in: loop)
 ; CHECK:         br label %inner_loop	; (mustexec in: loop)
 ; CHECK-LABEL: inner_loop:
-; CHECK:         %v = load i32, i32* %p	; (mustexec in: inner_loop)
-; CHECK:         %inner.test = icmp eq i32 %v, 0	; (mustexec in: inner_loop)
-; CHECK:         br i1 %inner.test, label %inner_loop, label %next	; (mustexec in: inner_loop)
+; CHECK:         %v = load i32, i32* %p	; (mustexec in 2 loops: inner_loop, loop)
+; CHECK:         %inner.test = icmp eq i32 %v, 0	; (mustexec in 2 loops: inner_loop, loop)
+; CHECK:         br i1 %inner.test, label %inner_loop, label %next	; (mustexec in 2 loops: inner_loop, loop)
 ; CHECK-LABEL: next:
 ; CHECK:         %iv.next = add nuw nsw i32 %iv, 1 ; (mustexec in: loop)
 ; CHECK:         %exit.test = icmp slt i32 %iv, %high ; (mustexec in: loop)

From b79af7930cac33ce23aa27a06a5cc2d3e2740028 Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Mon, 27 May 2019 14:08:43 +0000
Subject: [PATCH 0295/1176] [AMDGPU][MC] Enabled constant expressions as
 operands of s_waitcnt

See bug 40820: https://bugs.llvm.org/show_bug.cgi?id=40820

Reviewers: artem.tamazov, arsenm

Differential Revision: https://reviews.llvm.org/D61017

llvm-svn: 361763
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 64 ++++++++-----------
 llvm/test/MC/AMDGPU/sopp-err.s                | 28 +++++++-
 llvm/test/MC/AMDGPU/sopp.s                    | 24 +++++++
 3 files changed, 78 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b55facadf4965..bc7068ef7569b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4391,20 +4391,18 @@ encodeCnt(
 }
 
 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
-  StringRef CntName = Parser.getTok().getString();
-  int64_t CntVal;
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::LParen))
-    return true;
+  SMLoc CntLoc = getLoc();
+  StringRef CntName = getTokenStr();
 
-  Parser.Lex();
-  if (getLexer().isNot(AsmToken::Integer))
-    return true;
+  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
+      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
+    return false;
 
-  SMLoc ValLoc = Parser.getTok().getLoc();
-  if (getParser().parseAbsoluteExpression(CntVal))
-    return true;
+  int64_t CntVal;
+  SMLoc ValLoc = getLoc();
+  if (!parseExpr(CntVal))
+    return false;
 
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
 
@@ -4417,49 +4415,43 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
+  } else {
+    Error(CntLoc, "invalid counter name " + CntName);
+    return false;
   }
 
   if (Failed) {
     Error(ValLoc, "too large value for " + CntName);
-    return true;
+    return false;
   }
 
-  if (getLexer().isNot(AsmToken::RParen)) {
-    return true;
-  }
+  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
+    return false;
 
-  Parser.Lex();
-  if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
-    const AsmToken NextToken = getLexer().peekTok();
-    if (NextToken.is(AsmToken::Identifier)) {
-      Parser.Lex();
+  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
+    if (isToken(AsmToken::EndOfStatement)) {
+      Error(getLoc(), "expected a counter name");
+      return false;
     }
   }
 
-  return false;
+  return true;
 }
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   int64_t Waitcnt = getWaitcntBitMask(ISA);
-  SMLoc S = Parser.getTok().getLoc();
-
-  switch(getLexer().getKind()) {
-    default: return MatchOperand_ParseFail;
-    case AsmToken::Integer:
-      // The operand can be an integer value.
-      if (getParser().parseAbsoluteExpression(Waitcnt))
-        return MatchOperand_ParseFail;
-      break;
+  SMLoc S = getLoc();
 
-    case AsmToken::Identifier:
-      do {
-        if (parseCnt(Waitcnt))
-          return MatchOperand_ParseFail;
-      } while(getLexer().isNot(AsmToken::EndOfStatement));
-      break;
+  // If parse failed, do not return error code
+  // to avoid excessive error messages.
+  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
+    while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
+  } else {
+    parseExpr(Waitcnt);
   }
+
   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
   return MatchOperand_Success;
 }
diff --git a/llvm/test/MC/AMDGPU/sopp-err.s b/llvm/test/MC/AMDGPU/sopp-err.s
index be655c818052e..c6df9df0090cf 100644
--- a/llvm/test/MC/AMDGPU/sopp-err.s
+++ b/llvm/test/MC/AMDGPU/sopp-err.s
@@ -100,7 +100,31 @@ s_waitcnt vmcnt(0xFFFFFFFFFFFF0000)
 // GCN: error: too large value for vmcnt
 
 s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0),
-// GCN: error: failed parsing operand
+// GCN: error: expected a counter name
 
 s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)&
-// GCN: error: failed parsing operand
+// GCN: error: expected a counter name
+
+s_waitcnt vmcnt(0) & expcnt(0) & x
+// GCN: error: expected a left parenthesis
+
+s_waitcnt vmcnt(0) & expcnt(0) x
+// GCN: error: expected a left parenthesis
+
+s_waitcnt vmcnt(0) & expcnt(0) & 1
+// GCN: error: expected a counter name
+
+s_waitcnt vmcnt(0) & expcnt(0) 1
+// GCN: error: expected a counter name
+
+s_waitcnt vmcnt(0) & expcnt(0) x(0)
+// GCN: error: invalid counter name x
+
+s_waitcnt vmcnt(x)
+// GCN: error: expected absolute expression
+
+s_waitcnt x
+// GCN: error: expected absolute expression
+
+s_waitcnt vmcnt(0
+// GCN: error: expected a closing parenthesis
diff --git a/llvm/test/MC/AMDGPU/sopp.s b/llvm/test/MC/AMDGPU/sopp.s
index 3c426c08a0bcf..807a6d469a5d5 100644
--- a/llvm/test/MC/AMDGPU/sopp.s
+++ b/llvm/test/MC/AMDGPU/sopp.s
@@ -131,6 +131,30 @@ s_waitcnt lgkmcnt_sat(15)
 s_waitcnt lgkmcnt_sat(16)
 // GCN: s_waitcnt ; encoding: [0x7f,0x0f,0x8c,0xbf]
 
+x=1
+s_waitcnt lgkmcnt_sat(x+1)
+// GCN: s_waitcnt lgkmcnt(2)            ; encoding: [0x7f,0x02,0x8c,0xbf]
+
+s_waitcnt lgkmcnt_sat(1+x)
+// GCN: s_waitcnt lgkmcnt(2)            ; encoding: [0x7f,0x02,0x8c,0xbf]
+
+s_waitcnt x+1
+// GCN: s_waitcnt vmcnt(2) expcnt(0) lgkmcnt(0) ; encoding: [0x02,0x00,0x8c,0xbf]
+
+s_waitcnt 1+x
+// GCN: s_waitcnt vmcnt(2) expcnt(0) lgkmcnt(0) ; encoding: [0x02,0x00,0x8c,0xbf]
+
+lgkmcnt_sat=1
+s_waitcnt lgkmcnt_sat
+// GCN: s_waitcnt vmcnt(1) expcnt(0) lgkmcnt(0) ; encoding: [0x01,0x00,0x8c,0xbf]
+
+s_waitcnt lgkmcnt_sat+1
+// GCN: s_waitcnt vmcnt(2) expcnt(0) lgkmcnt(0) ; encoding: [0x02,0x00,0x8c,0xbf]
+
+//===----------------------------------------------------------------------===//
+// misc sopp instructions
+//===----------------------------------------------------------------------===//
+
 s_sethalt 9
 // GCN: s_sethalt 9 ; encoding: [0x09,0x00,0x8d,0xbf]
 

From 80d5d168fdccede55457a3efdce40afa5f62b107 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 27 May 2019 14:12:48 +0000
Subject: [PATCH 0296/1176] Add test cases for dumping AST expression nodes to
 JSON; NFC.

llvm-svn: 361764
---
 clang/test/AST/ast-dump-expr-json.cpp | 8992 +++++++++++++++++++++++++
 1 file changed, 8992 insertions(+)
 create mode 100644 clang/test/AST/ast-dump-expr-json.cpp

diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
new file mode 100644
index 0000000000000..81c3e66c39523
--- /dev/null
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -0,0 +1,8992 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -Wno-unused-value -fcxx-exceptions -std=gnu++17 -ast-dump=json -ast-dump-filter Test %s | FileCheck %s
+
+namespace std {
+using size_t = decltype(sizeof(0));
+
+class type_info {
+public:
+  virtual ~type_info();
+  bool operator==(const type_info& rhs) const noexcept;
+  bool operator!=(const type_info& rhs) const noexcept;
+  type_info(const type_info& rhs) = delete; // cannot be copied
+  type_info& operator=(const type_info& rhs) = delete; // cannot be copied
+};
+
+class bad_typeid {
+public:
+  bad_typeid() noexcept;
+  bad_typeid(const bad_typeid&) noexcept;
+  virtual ~bad_typeid();
+  bad_typeid& operator=(const bad_typeid&) noexcept;
+  const char* what() const noexcept;
+};
+} // namespace std
+void *operator new(std::size_t, void *ptr);
+
+struct S {
+  virtual ~S() = default;
+
+  void func(int);
+  template <typename Ty>
+  Ty foo();
+
+  int i;
+};
+
+struct T : S {};
+
+template <typename>
+struct U {};
+
+void TestThrow() {
+  throw 12;
+  throw;
+}
+
+void TestPointerToMember(S obj1, S *obj2, int S::* data, void (S::*call)(int)) {
+  obj1.*data;
+  obj2->*data;
+  (obj1.*call)(12);
+  (obj2->*call)(12);
+}
+
+void TestCasting(const S *s) {
+  const_cast<S *>(s);
+  static_cast<const T *>(s);
+  dynamic_cast<const T *>(s);
+  reinterpret_cast<const int *>(s);
+}
+
+template <typename... Ts>
+void TestUnaryExpressions(int *p) {
+  sizeof...(Ts);
+  noexcept(p - p);
+
+  ::new int;
+  new (int);
+  new int{12};
+  new int[2];
+  new int[2]{1, 2};
+  new (p) int;
+  new (p) int{12};
+
+  ::delete p;
+  delete [] p;
+}
+
+void TestPostfixExpressions(S a, S *p, U<int> *r) {
+  a.func(0);
+  p->func(0);
+  p->template foo<int>();
+  a.template foo<float>();
+  p->~S();
+  a.~S();
+  a.~decltype(a)();
+  p->::S::~S();
+  r->template U<int>::~U();
+  typeid(a);
+  typeid(S);
+  typeid(const volatile S);
+}
+
+template <typename... Ts>
+void TestPrimaryExpressions(Ts... a) {
+  struct V {
+    void f() {
+      this;
+
+      [this]{};
+      [*this]{};
+    }
+  };
+
+  int b, c;
+
+  [](){};
+  [](int a, ...){};
+  [a...]{};
+  [=]{};
+  [=] { return b; };
+  [&]{};
+  [&] { return c; };
+  [b, &c]{ return b + c; };
+  [a..., x = 12]{};
+  []() constexpr {};
+  []() mutable {};
+  []() noexcept {};
+  []() -> int { return 0; };
+
+  (a + ...);
+  (... + a);
+  (a + ... + b);
+}
+
+namespace NS {
+struct X {};
+void f(X);
+void y(...);
+} // namespace NS
+
+void TestADLCall() {
+  NS::X x;
+  f(x);
+  y(x);
+}
+
+void TestNonADLCall() {
+  NS::X x;
+  NS::f(x);
+}
+
+void TestNonADLCall2() {
+  NS::X x;
+  using NS::f;
+  f(x);
+  y(x);
+}
+
+namespace test_adl_call_three {
+using namespace NS;
+void TestNonADLCall3() {
+  X x;
+  f(x);
+}
+} // namespace test_adl_call_three
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 41
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 41
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 44
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestThrow",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 18,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 41
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 44
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXThrowExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 42
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 9,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 42
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "IntegerLiteral",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 42
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 42
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "value": "12"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXThrowExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 43
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 43
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue"
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 46
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 46
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 51
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestPointerToMember",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (S, S *, int S::*, void (S::*)(int))"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 28,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 46
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 26,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 28,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "obj1",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "S"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 37,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 46
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 34,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 37,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "obj2",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "S *"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 52,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 46
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 43,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 52,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "data",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "int S::*"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 68,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 46
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 58,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 77,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "call",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "void (S::*)(int)"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 80,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 46
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 51
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "BinaryOperator",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 47
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 9,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 47
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "int"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "lvalue",
+// CHECK-NEXT:      "opcode": ".*",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "DeclRefExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 47
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 47
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "S"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "lvalue",
+// CHECK-NEXT:        "referencedDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "ParmVarDecl",
+// CHECK-NEXT:         "name": "obj1",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "S"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 47
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 47
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int S::*"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 47
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 47
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int S::*"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "data",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "int S::*"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "BinaryOperator",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 48
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 48
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "int"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "lvalue",
+// CHECK-NEXT:      "opcode": "->*",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 48
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 48
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "S *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 48
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 48
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "obj2",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "S *"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 48
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 48
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int S::*"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 48
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 48
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int S::*"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "data",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "int S::*"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 49
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 18,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 49
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ParenExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 49
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 14,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 49
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "BinaryOperator",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 49
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 49
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "<bound member function type>"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "opcode": ".*",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 49
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 49
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "obj1",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "S"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 49
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 49
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "void (S::*)(int)"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 49
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 49
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "void (S::*)(int)"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "call",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "void (S::*)(int)"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "IntegerLiteral",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 16,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 49
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 16,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 49
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "value": "12"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 50
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 19,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 50
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ParenExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 50
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 15,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 50
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "BinaryOperator",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 50
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 11,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 50
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "<bound member function type>"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "opcode": "->*",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 50
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 50
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 4,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 50
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 4,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 50
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "S *"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "obj2",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "S *"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 50
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 50
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "void (S::*)(int)"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 11,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 50
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 11,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 50
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "void (S::*)(int)"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "call",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "void (S::*)(int)"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "IntegerLiteral",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 17,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 50
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 17,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 50
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "value": "12"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 53
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 53
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 58
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestCasting",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (const S *)"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 27,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 53
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 18,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 53
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 27,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 53
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "s",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "const S *"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 30,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 53
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 58
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXConstCastExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 54
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 20,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 54
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "S *"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "castKind": "NoOp",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 19,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 54
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 19,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 54
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "const S *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "isPartOfExplicitCast": true,
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 19,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 54
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 19,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 54
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "const S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "s",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "const S *"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXStaticCastExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 55
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 27,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 55
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const T *"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "castKind": "BaseToDerived",
+// CHECK-NEXT:      "path": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "name": "S"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ],
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 26,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 55
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 26,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 55
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "const S *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "isPartOfExplicitCast": true,
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 26,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 55
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 26,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 55
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "const S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "s",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "const S *"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXDynamicCastExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 56
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 28,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 56
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const T *"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "castKind": "Dynamic",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 27,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 56
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 27,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 56
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "const S *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "isPartOfExplicitCast": true,
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 27,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 56
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 27,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 56
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "const S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "s",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "const S *"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXReinterpretCastExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 57
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 34,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 57
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const int *"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "castKind": "BitCast",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 33,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 57
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 33,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 57
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "const S *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "LValueToRValue",
+// CHECK-NEXT:        "isPartOfExplicitCast": true,
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 33,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 57
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 33,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 57
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "const S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "s",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "const S *"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionTemplateDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 61
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 60
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 75
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestUnaryExpressions",
+// CHECK-NEXT:  "templateParams": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "TemplateTypeParmDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 23,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 60
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 11,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 60
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 23,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 60
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isReferenced": true,
+// CHECK-NEXT:    "name": "Ts",
+// CHECK-NEXT:    "tagUsed": "typename",
+// CHECK-NEXT:    "depth": 0,
+// CHECK-NEXT:    "index": 0,
+// CHECK-NEXT:    "isParameterPack": true
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ],
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "FunctionDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 6,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 61
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 61
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 75
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "name": "TestUnaryExpressions",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "void (int *)"
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "ParmVarDecl",
+// CHECK-NEXT:      "loc": {
+// CHECK-NEXT:       "col": 32,
+// CHECK-NEXT:       "file": "{{.*}}",
+// CHECK-NEXT:       "line": 61
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 27,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 61
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 32,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 61
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "isReferenced": true,
+// CHECK-NEXT:      "name": "p",
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "int *"
+// CHECK-NEXT:      }
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CompoundStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 35,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 61
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 1,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 75
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "SizeOfPackExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 62
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 15,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 62
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "unsigned long"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue"
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNoexceptExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 63
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 17,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 63
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "bool"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "BinaryOperator",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 12,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 63
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 16,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 63
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "long"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "opcode": "-",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 12,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 63
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 12,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 63
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 12,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 63
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 12,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 63
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int *"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "p",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int *"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 63
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 63
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 63
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 63
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int *"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "p",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int *"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 65
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 65
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isGlobal": true,
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(unsigned long)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 66
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 66
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(unsigned long)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 67
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 13,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 67
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "initStyle": "list",
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(unsigned long)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "InitListExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 67
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 13,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 67
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 67
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 67
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "12"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 68
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 12,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 68
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArray": true,
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new[]",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(unsigned long)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 11,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 68
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 11,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 68
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "unsigned long"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "IntegralCast",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 68
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 68
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "2"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 69
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 18,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 69
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArray": true,
+// CHECK-NEXT:        "initStyle": "list",
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new[]",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(unsigned long)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 11,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 69
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 11,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 69
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "unsigned long"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "IntegralCast",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "2"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "InitListExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 13,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 69
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 18,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 69
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int [2]"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 14,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 14,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "1"
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 69
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "2"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 70
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 70
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isPlacement": true,
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(std::size_t, void *)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 70
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 70
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "BitCast",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 70
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 70
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 8,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 70
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 8,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 70
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int *"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "p",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int *"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXNewExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 71
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 17,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 71
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int *"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isPlacement": true,
+// CHECK-NEXT:        "initStyle": "list",
+// CHECK-NEXT:        "operatorNewDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator new",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void *(std::size_t, void *)"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "InitListExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 14,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 71
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 17,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 71
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "IntegerLiteral",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 15,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 71
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 15,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 71
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "value": "12"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 71
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 71
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "BitCast",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ImplicitCastExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 71
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 71
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "rvalue",
+// CHECK-NEXT:            "castKind": "LValueToRValue",
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 8,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 71
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 8,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 71
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int *"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "ParmVarDecl",
+// CHECK-NEXT:               "name": "p",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int *"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXDeleteExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 73
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 12,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 73
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isGlobal": true,
+// CHECK-NEXT:        "operatorDeleteDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator delete",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void (void *) noexcept"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 12,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 73
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 12,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 73
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 12,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 73
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 12,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 73
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXDeleteExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 74
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 13,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 74
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArray": true,
+// CHECK-NEXT:        "isArrayAsWritten": true,
+// CHECK-NEXT:        "operatorDeleteDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "FunctionDecl",
+// CHECK-NEXT:         "name": "operator delete[]",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "void (void *) noexcept"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 13,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 74
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 13,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 74
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 13,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 74
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 13,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 74
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 77
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 77
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 90
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestPostfixExpressions",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void (S, S *, U<int> *)"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 31,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 77
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 29,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 31,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "a",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "S"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 37,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 77
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 34,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 37,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "p",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "S *"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "ParmVarDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 48,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 77
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 40,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 48,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isUsed": true,
+// CHECK-NEXT:    "name": "r",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "U<int> *"
+// CHECK-NEXT:    }
+// CHECK-NEXT:   },
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 51,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 77
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 90
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 78
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 11,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 78
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 78
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 78
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": false,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 78
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 78
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "S"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "IntegerLiteral",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 78
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 78
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "value": "0"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 79
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 12,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 79
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 79
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 6,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 79
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": true,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 79
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 79
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 79
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 79
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "S *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "IntegerLiteral",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 79
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 79
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "int"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "value": "0"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 80
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 24,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 80
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "desugaredQualType": "int",
+// CHECK-NEXT:       "qualType": "int"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 80
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 22,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 80
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": true,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 80
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 80
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 80
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 80
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "S *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 81
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 25,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 81
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "desugaredQualType": "float",
+// CHECK-NEXT:       "qualType": "float"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 81
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 23,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 81
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": false,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 81
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 81
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "S"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 82
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 9,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 82
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 82
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 7,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 82
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": true,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 82
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 82
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 82
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 82
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "S *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 83
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 8,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 83
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 83
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 6,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 83
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": false,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 83
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 83
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "S"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 84
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 18,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 84
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 84
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 84
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": false,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 84
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 84
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "S"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 85
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 14,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 85
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 85
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 12,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 85
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": true,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 85
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 85
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "S *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 85
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 85
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "S *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "p",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "S *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXMemberCallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 86
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 26,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 86
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "MemberExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 86
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 24,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 86
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<bound member function type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "isArrow": true,
+// CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 86
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 86
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "U<int> *"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 86
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 86
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "U<int> *"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "r",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "U<int> *"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXTypeidExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 87
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 11,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 87
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const std::type_info"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "lvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "DeclRefExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 87
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 87
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "S"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "lvalue",
+// CHECK-NEXT:        "referencedDecl": {
+// CHECK-NEXT:         "id": "0x{{.*}}",
+// CHECK-NEXT:         "kind": "ParmVarDecl",
+// CHECK-NEXT:         "name": "a",
+// CHECK-NEXT:         "type": {
+// CHECK-NEXT:          "qualType": "S"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        }
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXTypeidExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 88
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 11,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 88
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const std::type_info"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "lvalue"
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CXXTypeidExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 89
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 26,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 89
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "const std::type_info"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "lvalue"
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionTemplateDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 93
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 92
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 122
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestPrimaryExpressions",
+// CHECK-NEXT:  "templateParams": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "TemplateTypeParmDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 23,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 92
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 11,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 92
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 23,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 92
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "isReferenced": true,
+// CHECK-NEXT:    "name": "Ts",
+// CHECK-NEXT:    "tagUsed": "typename",
+// CHECK-NEXT:    "depth": 0,
+// CHECK-NEXT:    "index": 0,
+// CHECK-NEXT:    "isParameterPack": true
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ],
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "FunctionDecl",
+// CHECK-NEXT:    "loc": {
+// CHECK-NEXT:     "col": 6,
+// CHECK-NEXT:     "file": "{{.*}}",
+// CHECK-NEXT:     "line": 93
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 93
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 122
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "name": "TestPrimaryExpressions",
+// CHECK-NEXT:    "type": {
+// CHECK-NEXT:     "qualType": "void (Ts...)"
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "ParmVarDecl",
+// CHECK-NEXT:      "loc": {
+// CHECK-NEXT:       "col": 35,
+// CHECK-NEXT:       "file": "{{.*}}",
+// CHECK-NEXT:       "line": 93
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 29,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 93
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 35,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 93
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "isReferenced": true,
+// CHECK-NEXT:      "name": "a",
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "Ts..."
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "isParameterPack": true
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CompoundStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 38,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 93
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 1,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 122
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "DeclStmt",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 94
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 4,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 101
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 10,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 94
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 94
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 101
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "name": "V",
+// CHECK-NEXT:          "tagUsed": "struct",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true,
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "isConstexpr": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "hasConstexprNonCopyMoveConstructor": true,
+// CHECK-NEXT:           "isAggregate": true,
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isPOD": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTrivial": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXRecordDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 10,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 94
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 94
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 94
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "V",
+// CHECK-NEXT:            "tagUsed": "struct"
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 10,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 95
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 95
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 100
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "f",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "void ()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 14,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 95
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 5,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 100
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "CXXThisExpr",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 7,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 96
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 7,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 96
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "V *"
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "valueCategory": "rvalue"
+// CHECK-NEXT:               },
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "LambdaExpr",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 7,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 98
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 14,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 98
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "(lambda at {{.*}}:98:7)"
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "valueCategory": "rvalue",
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "CXXRecordDecl",
+// CHECK-NEXT:                  "loc": {
+// CHECK-NEXT:                   "col": 7,
+// CHECK-NEXT:                   "file": "{{.*}}",
+// CHECK-NEXT:                   "line": 98
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 7,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 7,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "isImplicit": true,
+// CHECK-NEXT:                  "tagUsed": "class",
+// CHECK-NEXT:                  "completeDefinition": true,
+// CHECK-NEXT:                  "definitionData": {
+// CHECK-NEXT:                   "canConstDefaultInit": true,
+// CHECK-NEXT:                   "copyAssign": {
+// CHECK-NEXT:                    "hasConstParam": true,
+// CHECK-NEXT:                    "implicitHasConstParam": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "copyCtor": {
+// CHECK-NEXT:                    "hasConstParam": true,
+// CHECK-NEXT:                    "implicitHasConstParam": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "defaultCtor": {},
+// CHECK-NEXT:                   "dtor": {
+// CHECK-NEXT:                    "irrelevant": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "isLambda": true,
+// CHECK-NEXT:                   "isStandardLayout": true,
+// CHECK-NEXT:                   "isTriviallyCopyable": true,
+// CHECK-NEXT:                   "moveAssign": {},
+// CHECK-NEXT:                   "moveCtor": {
+// CHECK-NEXT:                    "exists": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "inner": [
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "FieldDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 8,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 98
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "isImplicit": true,
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "V *"
+// CHECK-NEXT:                    }
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "CXXMethodDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 7,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 98
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 12,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 14,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "name": "operator()",
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "auto () const -> auto"
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "inline": true,
+// CHECK-NEXT:                    "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                      "id": "0x{{.*}}",
+// CHECK-NEXT:                      "kind": "CompoundStmt",
+// CHECK-NEXT:                      "range": {
+// CHECK-NEXT:                       "begin": {
+// CHECK-NEXT:                        "col": 13,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 98
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "end": {
+// CHECK-NEXT:                        "col": 14,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 98
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      }
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    ]
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  ]
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "CXXThisExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 8,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 8,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "V *"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "rvalue",
+// CHECK-NEXT:                  "implicit": true
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "CompoundStmt",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 13,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 14,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               },
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "LambdaExpr",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 7,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 99
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 15,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 99
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "(lambda at {{.*}}:99:7)"
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "valueCategory": "rvalue",
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "CXXRecordDecl",
+// CHECK-NEXT:                  "loc": {
+// CHECK-NEXT:                   "col": 7,
+// CHECK-NEXT:                   "file": "{{.*}}",
+// CHECK-NEXT:                   "line": 99
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 7,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 7,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "isImplicit": true,
+// CHECK-NEXT:                  "tagUsed": "class",
+// CHECK-NEXT:                  "completeDefinition": true,
+// CHECK-NEXT:                  "definitionData": {
+// CHECK-NEXT:                   "canConstDefaultInit": true,
+// CHECK-NEXT:                   "copyAssign": {
+// CHECK-NEXT:                    "hasConstParam": true,
+// CHECK-NEXT:                    "implicitHasConstParam": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "copyCtor": {
+// CHECK-NEXT:                    "hasConstParam": true,
+// CHECK-NEXT:                    "implicitHasConstParam": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "defaultCtor": {
+// CHECK-NEXT:                    "defaultedIsConstexpr": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "dtor": {
+// CHECK-NEXT:                    "irrelevant": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "isLambda": true,
+// CHECK-NEXT:                   "isStandardLayout": true,
+// CHECK-NEXT:                   "isTriviallyCopyable": true,
+// CHECK-NEXT:                   "moveAssign": {},
+// CHECK-NEXT:                   "moveCtor": {
+// CHECK-NEXT:                    "exists": true,
+// CHECK-NEXT:                    "needsImplicit": true,
+// CHECK-NEXT:                    "simple": true,
+// CHECK-NEXT:                    "trivial": true
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "inner": [
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "FieldDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 8,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 99
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "isImplicit": true,
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "V"
+// CHECK-NEXT:                    }
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "CXXMethodDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 7,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 99
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 13,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 15,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "name": "operator()",
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "auto () const -> auto"
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "inline": true,
+// CHECK-NEXT:                    "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                      "id": "0x{{.*}}",
+// CHECK-NEXT:                      "kind": "CompoundStmt",
+// CHECK-NEXT:                      "range": {
+// CHECK-NEXT:                       "begin": {
+// CHECK-NEXT:                        "col": 14,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 99
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "end": {
+// CHECK-NEXT:                        "col": 15,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 99
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      }
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    ]
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  ]
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "ParenListExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 8,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 8,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "NULL TYPE"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "rvalue",
+// CHECK-NEXT:                  "inner": [
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "UnaryOperator",
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "<dependent type>"
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "valueCategory": "rvalue",
+// CHECK-NEXT:                    "isPostfix": false,
+// CHECK-NEXT:                    "opcode": "*",
+// CHECK-NEXT:                    "canOverflow": false,
+// CHECK-NEXT:                    "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                      "id": "0x{{.*}}",
+// CHECK-NEXT:                      "kind": "CXXThisExpr",
+// CHECK-NEXT:                      "range": {
+// CHECK-NEXT:                       "begin": {
+// CHECK-NEXT:                        "col": 8,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 99
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "end": {
+// CHECK-NEXT:                        "col": 8,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 99
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "type": {
+// CHECK-NEXT:                       "qualType": "V *"
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "valueCategory": "rvalue",
+// CHECK-NEXT:                      "implicit": true
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    ]
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  ]
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "CompoundStmt",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 14,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 15,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 99
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "DeclStmt",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 103
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 103
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "VarDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 7,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 103
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 103
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 103
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isReferenced": true,
+// CHECK-NEXT:          "name": "b",
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          }
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "VarDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 10,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 103
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 103
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 103
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isReferenced": true,
+// CHECK-NEXT:          "name": "c",
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 105
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 8,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 105
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:105:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 105
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 105
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 105
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 105
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 6,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 7,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 105
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 8,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 105
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 105
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator auto (*)()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 105
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 105
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto ()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 105
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 105
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 106
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 18,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 106
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:106:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 106
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 106
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 106
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 106
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (int, ...) const"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "ParmVarDecl",
+// CHECK-NEXT:              "loc": {
+// CHECK-NEXT:               "col": 10,
+// CHECK-NEXT:               "file": "{{.*}}",
+// CHECK-NEXT:               "line": 106
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "name": "a",
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              }
+// CHECK-NEXT:             },
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 17,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 18,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 106
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator auto (*)(int, ...)",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)(int, ...)"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 106
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 106
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (int, ...)"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "ParmVarDecl",
+// CHECK-NEXT:              "loc": {
+// CHECK-NEXT:               "col": 10,
+// CHECK-NEXT:               "file": "{{.*}}",
+// CHECK-NEXT:               "line": 106
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 106
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "name": "a",
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 17,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 106
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 18,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 106
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 107
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 10,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 107
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:107:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 107
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {},
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 107
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "Ts..."
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 107
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 9,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 107
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 107
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ParenListExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "NULL TYPE"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "Ts..."
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "a",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "Ts..."
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 107
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 108
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 7,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 108
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:108:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 108
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 108
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 108
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 108
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 108
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 7,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 108
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 108
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 7,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 108
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 6,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 108
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 108
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 109
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 19,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 109
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:109:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 109
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 109
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 109
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 109
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 109
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 19,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 109
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 7,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 109
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 19,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 109
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ReturnStmt",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 9,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 109
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 16,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 109
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "DeclRefExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 16,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 109
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 16,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 109
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "const int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "lvalue",
+// CHECK-NEXT:                  "referencedDecl": {
+// CHECK-NEXT:                   "id": "0x{{.*}}",
+// CHECK-NEXT:                   "kind": "VarDecl",
+// CHECK-NEXT:                   "name": "b",
+// CHECK-NEXT:                   "type": {
+// CHECK-NEXT:                    "qualType": "int"
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 109
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 19,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 109
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ReturnStmt",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 9,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 109
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 109
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 109
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 109
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "const int"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "VarDecl",
+// CHECK-NEXT:               "name": "b",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 110
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 7,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 110
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:110:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 110
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 110
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 110
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 110
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 110
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 7,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 110
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 6,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 110
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 7,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 110
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 6,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 110
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 110
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 111
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 19,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 111
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:111:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 111
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 111
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 111
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 111
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 111
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 19,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 111
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 7,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 111
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 19,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 111
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ReturnStmt",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 9,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 111
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 16,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 111
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "DeclRefExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 16,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 111
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 16,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 111
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "lvalue",
+// CHECK-NEXT:                  "referencedDecl": {
+// CHECK-NEXT:                   "id": "0x{{.*}}",
+// CHECK-NEXT:                   "kind": "VarDecl",
+// CHECK-NEXT:                   "name": "c",
+// CHECK-NEXT:                   "type": {
+// CHECK-NEXT:                    "qualType": "int"
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 111
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 19,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 111
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ReturnStmt",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 9,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 111
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 111
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "DeclRefExpr",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 111
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 111
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "lvalue",
+// CHECK-NEXT:              "referencedDecl": {
+// CHECK-NEXT:               "id": "0x{{.*}}",
+// CHECK-NEXT:               "kind": "VarDecl",
+// CHECK-NEXT:               "name": "c",
+// CHECK-NEXT:               "type": {
+// CHECK-NEXT:                "qualType": "int"
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 112
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 26,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 112
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:112:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 112
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {},
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 112
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 8,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 112
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int &"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 112
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 9,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 26,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 10,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 112
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 26,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 112
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ReturnStmt",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 12,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 23,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "BinaryOperator",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 19,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 23,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "rvalue",
+// CHECK-NEXT:                  "opcode": "+",
+// CHECK-NEXT:                  "inner": [
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "ImplicitCastExpr",
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 19,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 112
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 19,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 112
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "int"
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "valueCategory": "rvalue",
+// CHECK-NEXT:                    "castKind": "LValueToRValue",
+// CHECK-NEXT:                    "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                      "id": "0x{{.*}}",
+// CHECK-NEXT:                      "kind": "DeclRefExpr",
+// CHECK-NEXT:                      "range": {
+// CHECK-NEXT:                       "begin": {
+// CHECK-NEXT:                        "col": 19,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 112
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "end": {
+// CHECK-NEXT:                        "col": 19,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 112
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "type": {
+// CHECK-NEXT:                       "qualType": "const int"
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "valueCategory": "lvalue",
+// CHECK-NEXT:                      "referencedDecl": {
+// CHECK-NEXT:                       "id": "0x{{.*}}",
+// CHECK-NEXT:                       "kind": "VarDecl",
+// CHECK-NEXT:                       "name": "b",
+// CHECK-NEXT:                       "type": {
+// CHECK-NEXT:                        "qualType": "int"
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      }
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    ]
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "ImplicitCastExpr",
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 23,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 112
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 23,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 112
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "int"
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "valueCategory": "rvalue",
+// CHECK-NEXT:                    "castKind": "LValueToRValue",
+// CHECK-NEXT:                    "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                      "id": "0x{{.*}}",
+// CHECK-NEXT:                      "kind": "DeclRefExpr",
+// CHECK-NEXT:                      "range": {
+// CHECK-NEXT:                       "begin": {
+// CHECK-NEXT:                        "col": 23,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 112
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "end": {
+// CHECK-NEXT:                        "col": 23,
+// CHECK-NEXT:                        "file": "{{.*}}",
+// CHECK-NEXT:                        "line": 112
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "type": {
+// CHECK-NEXT:                       "qualType": "int"
+// CHECK-NEXT:                      },
+// CHECK-NEXT:                      "valueCategory": "lvalue",
+// CHECK-NEXT:                      "referencedDecl": {
+// CHECK-NEXT:                       "id": "0x{{.*}}",
+// CHECK-NEXT:                       "kind": "VarDecl",
+// CHECK-NEXT:                       "name": "c",
+// CHECK-NEXT:                       "type": {
+// CHECK-NEXT:                        "qualType": "int"
+// CHECK-NEXT:                       }
+// CHECK-NEXT:                      }
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    ]
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  ]
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "castKind": "LValueToRValue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "b",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "int"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 8,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "VarDecl",
+// CHECK-NEXT:           "name": "c",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "int"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 26,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 112
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ReturnStmt",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 12,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 23,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "BinaryOperator",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 19,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 112
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 23,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 112
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "rvalue",
+// CHECK-NEXT:              "opcode": "+",
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ImplicitCastExpr",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 19,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 19,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "int"
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "valueCategory": "rvalue",
+// CHECK-NEXT:                "castKind": "LValueToRValue",
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "DeclRefExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 19,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 19,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "const int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "lvalue",
+// CHECK-NEXT:                  "referencedDecl": {
+// CHECK-NEXT:                   "id": "0x{{.*}}",
+// CHECK-NEXT:                   "kind": "VarDecl",
+// CHECK-NEXT:                   "name": "b",
+// CHECK-NEXT:                   "type": {
+// CHECK-NEXT:                    "qualType": "int"
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               },
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ImplicitCastExpr",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 23,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 23,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 112
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "type": {
+// CHECK-NEXT:                 "qualType": "int"
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "valueCategory": "rvalue",
+// CHECK-NEXT:                "castKind": "LValueToRValue",
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "DeclRefExpr",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 23,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 23,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 112
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "lvalue",
+// CHECK-NEXT:                  "referencedDecl": {
+// CHECK-NEXT:                   "id": "0x{{.*}}",
+// CHECK-NEXT:                   "kind": "VarDecl",
+// CHECK-NEXT:                   "name": "c",
+// CHECK-NEXT:                   "type": {
+// CHECK-NEXT:                    "qualType": "int"
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  }
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 113
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 18,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 113
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:113:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 113
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {},
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 113
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "Ts..."
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 10,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 113
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "int",
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 113
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 17,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 113
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 18,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 113
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ParenListExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "NULL TYPE"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 113
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "Ts..."
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "ParmVarDecl",
+// CHECK-NEXT:             "name": "a",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "Ts..."
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "IntegerLiteral",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 14,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 14,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue",
+// CHECK-NEXT:          "value": "12"
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 17,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 18,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 113
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 114
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 19,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 114
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:114:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 114
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 114
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 114
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 114
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 19,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 18,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 114
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 19,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 114
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 114
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 19,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator auto (*)()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 114
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 19,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 114
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto ()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 18,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 114
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 19,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 114
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 115
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 17,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 115
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:115:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 115
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 115
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 115
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 115
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto ()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 16,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 115
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 17,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 115
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 115
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator auto (*)()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 115
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 115
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto ()"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 16,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 115
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 17,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 115
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 116
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 18,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 116
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:116:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 116
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 116
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 116
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 116
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const noexcept"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 17,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 116
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 18,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 116
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 116
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator auto (*)() noexcept",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)() noexcept"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 116
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 116
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () noexcept"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 17,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 116
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 18,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 116
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "LambdaExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 117
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 27,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 117
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "(lambda at {{.*}}:117:3)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXRecordDecl",
+// CHECK-NEXT:          "loc": {
+// CHECK-NEXT:           "col": 3,
+// CHECK-NEXT:           "file": "{{.*}}",
+// CHECK-NEXT:           "line": 117
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 117
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 117
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "isImplicit": true,
+// CHECK-NEXT:          "tagUsed": "class",
+// CHECK-NEXT:          "completeDefinition": true,
+// CHECK-NEXT:          "definitionData": {
+// CHECK-NEXT:           "canConstDefaultInit": true,
+// CHECK-NEXT:           "copyAssign": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "copyCtor": {
+// CHECK-NEXT:            "hasConstParam": true,
+// CHECK-NEXT:            "implicitHasConstParam": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "defaultCtor": {
+// CHECK-NEXT:            "defaultedIsConstexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "dtor": {
+// CHECK-NEXT:            "irrelevant": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "isEmpty": true,
+// CHECK-NEXT:           "isLambda": true,
+// CHECK-NEXT:           "isLiteral": true,
+// CHECK-NEXT:           "isStandardLayout": true,
+// CHECK-NEXT:           "isTriviallyCopyable": true,
+// CHECK-NEXT:           "moveAssign": {},
+// CHECK-NEXT:           "moveCtor": {
+// CHECK-NEXT:            "exists": true,
+// CHECK-NEXT:            "needsImplicit": true,
+// CHECK-NEXT:            "simple": true,
+// CHECK-NEXT:            "trivial": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 117
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 11,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 27,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () const -> int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 15,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 117
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 27,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 117
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "inner": [
+// CHECK-NEXT:               {
+// CHECK-NEXT:                "id": "0x{{.*}}",
+// CHECK-NEXT:                "kind": "ReturnStmt",
+// CHECK-NEXT:                "range": {
+// CHECK-NEXT:                 "begin": {
+// CHECK-NEXT:                  "col": 17,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 117
+// CHECK-NEXT:                 },
+// CHECK-NEXT:                 "end": {
+// CHECK-NEXT:                  "col": 24,
+// CHECK-NEXT:                  "file": "{{.*}}",
+// CHECK-NEXT:                  "line": 117
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                },
+// CHECK-NEXT:                "inner": [
+// CHECK-NEXT:                 {
+// CHECK-NEXT:                  "id": "0x{{.*}}",
+// CHECK-NEXT:                  "kind": "IntegerLiteral",
+// CHECK-NEXT:                  "range": {
+// CHECK-NEXT:                   "begin": {
+// CHECK-NEXT:                    "col": 24,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 117
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "end": {
+// CHECK-NEXT:                    "col": 24,
+// CHECK-NEXT:                    "file": "{{.*}}",
+// CHECK-NEXT:                    "line": 117
+// CHECK-NEXT:                   }
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "type": {
+// CHECK-NEXT:                   "qualType": "int"
+// CHECK-NEXT:                  },
+// CHECK-NEXT:                  "valueCategory": "rvalue",
+// CHECK-NEXT:                  "value": "0"
+// CHECK-NEXT:                 }
+// CHECK-NEXT:                ]
+// CHECK-NEXT:               }
+// CHECK-NEXT:              ]
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXConversionDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 117
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 27,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator int (*)()",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto (*() const noexcept)() -> int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "constexpr": true
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 117
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 3,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 27,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "__invoke",
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "auto () -> int"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "storageClass": "static",
+// CHECK-NEXT:            "inline": true
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CompoundStmt",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 15,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 117
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 27,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 117
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "ReturnStmt",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 17,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 24,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 117
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "IntegerLiteral",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 24,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 117
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 24,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 117
+// CHECK-NEXT:               }
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "type": {
+// CHECK-NEXT:               "qualType": "int"
+// CHECK-NEXT:              },
+// CHECK-NEXT:              "valueCategory": "rvalue",
+// CHECK-NEXT:              "value": "0"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXFoldExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 119
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 119
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<dependent type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 119
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 119
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "Ts..."
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "Ts..."
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {}
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXFoldExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 120
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 11,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 120
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<dependent type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {},
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 120
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 10,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 120
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "Ts..."
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "Ts..."
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXFoldExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 121
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 15,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 121
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "<dependent type>"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 121
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 4,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 121
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "Ts..."
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "ParmVarDecl",
+// CHECK-NEXT:           "name": "a",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "Ts..."
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         },
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 14,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 121
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 14,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 121
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "int"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "VarDecl",
+// CHECK-NEXT:           "name": "b",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "int"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 130
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 130
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 134
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestADLCall",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 20,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 130
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 134
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "DeclStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 131
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 131
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "VarDecl",
+// CHECK-NEXT:        "loc": {
+// CHECK-NEXT:         "col": 9,
+// CHECK-NEXT:         "file": "{{.*}}",
+// CHECK-NEXT:         "line": 131
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 131
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 131
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "isUsed": true,
+// CHECK-NEXT:        "name": "x",
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "desugaredQualType": "NS::X",
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "init": "call",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXConstructExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 131
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 131
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "NS::X",
+// CHECK-NEXT:           "qualType": "NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 132
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 132
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "adl": true,
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 132
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 132
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(NS::X)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 132
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 132
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (NS::X)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "f",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (NS::X)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 132
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 132
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 132
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 132
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "const NS::X",
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 132
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 132
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "NS::X",
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "desugaredQualType": "NS::X",
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 133
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 133
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "adl": true,
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 133
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 133
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(...)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 133
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 133
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (...)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "y",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (...)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 133
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 133
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "desugaredQualType": "NS::X",
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 133
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 133
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "const NS::X",
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 133
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 133
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "NS::X",
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "desugaredQualType": "NS::X",
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 136
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 136
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 139
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestNonADLCall",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 23,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 136
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 139
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "DeclStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 137
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 137
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "VarDecl",
+// CHECK-NEXT:        "loc": {
+// CHECK-NEXT:         "col": 9,
+// CHECK-NEXT:         "file": "{{.*}}",
+// CHECK-NEXT:         "line": 137
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 137
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 137
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "isUsed": true,
+// CHECK-NEXT:        "name": "x",
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "desugaredQualType": "NS::X",
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "init": "call",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXConstructExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 137
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 137
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "NS::X",
+// CHECK-NEXT:           "qualType": "NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 138
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 138
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 138
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 7,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 138
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(NS::X)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 138
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 7,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 138
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (NS::X)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "f",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (NS::X)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 138
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 138
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 138
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 138
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "const NS::X",
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 9,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 138
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 9,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 138
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "NS::X",
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "desugaredQualType": "NS::X",
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 141
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 141
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 146
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestNonADLCall2",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 24,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 141
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 146
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "DeclStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 142
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 10,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 142
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "VarDecl",
+// CHECK-NEXT:        "loc": {
+// CHECK-NEXT:         "col": 9,
+// CHECK-NEXT:         "file": "{{.*}}",
+// CHECK-NEXT:         "line": 142
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 142
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 9,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 142
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "isUsed": true,
+// CHECK-NEXT:        "name": "x",
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "desugaredQualType": "NS::X",
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "init": "call",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXConstructExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 142
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 9,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 142
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "NS::X",
+// CHECK-NEXT:           "qualType": "NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "DeclStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 143
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 14,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 143
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "UsingDecl",
+// CHECK-NEXT:        "loc": {
+// CHECK-NEXT:         "col": 13,
+// CHECK-NEXT:         "file": "{{.*}}",
+// CHECK-NEXT:         "line": 143
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 143
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 13,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 143
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "name": "NS::f"
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 144
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 144
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 144
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 144
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(NS::X)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 144
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 144
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (NS::X)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "f",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (NS::X)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "foundReferencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "UsingShadowDecl",
+// CHECK-NEXT:           "name": "f"
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 144
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 144
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 144
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 144
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "const NS::X",
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 144
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 144
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "NS::X",
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "desugaredQualType": "NS::X",
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 145
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 145
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "adl": true,
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 145
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 145
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(...)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 145
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 145
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (...)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "y",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (...)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 145
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 145
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "desugaredQualType": "NS::X",
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 145
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 145
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "desugaredQualType": "const NS::X",
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 145
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 145
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "desugaredQualType": "NS::X",
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "desugaredQualType": "NS::X",
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }
+
+
+// CHECK:  "kind": "FunctionDecl",
+// CHECK-NEXT:  "loc": {
+// CHECK-NEXT:   "col": 6,
+// CHECK-NEXT:   "file": "{{.*}}",
+// CHECK-NEXT:   "line": 150
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "range": {
+// CHECK-NEXT:   "begin": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 150
+// CHECK-NEXT:   },
+// CHECK-NEXT:   "end": {
+// CHECK-NEXT:    "col": 1,
+// CHECK-NEXT:    "file": "{{.*}}",
+// CHECK-NEXT:    "line": 153
+// CHECK-NEXT:   }
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "name": "TestNonADLCall3",
+// CHECK-NEXT:  "type": {
+// CHECK-NEXT:   "qualType": "void ()"
+// CHECK-NEXT:  },
+// CHECK-NEXT:  "inner": [
+// CHECK-NEXT:   {
+// CHECK-NEXT:    "id": "0x{{.*}}",
+// CHECK-NEXT:    "kind": "CompoundStmt",
+// CHECK-NEXT:    "range": {
+// CHECK-NEXT:     "begin": {
+// CHECK-NEXT:      "col": 24,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 150
+// CHECK-NEXT:     },
+// CHECK-NEXT:     "end": {
+// CHECK-NEXT:      "col": 1,
+// CHECK-NEXT:      "file": "{{.*}}",
+// CHECK-NEXT:      "line": 153
+// CHECK-NEXT:     }
+// CHECK-NEXT:    },
+// CHECK-NEXT:    "inner": [
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "DeclStmt",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 151
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 151
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "VarDecl",
+// CHECK-NEXT:        "loc": {
+// CHECK-NEXT:         "col": 5,
+// CHECK-NEXT:         "file": "{{.*}}",
+// CHECK-NEXT:         "line": 151
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 151
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 151
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "isUsed": true,
+// CHECK-NEXT:        "name": "x",
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "init": "call",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "CXXConstructExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 151
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 151
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "rvalue"
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     },
+// CHECK-NEXT:     {
+// CHECK-NEXT:      "id": "0x{{.*}}",
+// CHECK-NEXT:      "kind": "CallExpr",
+// CHECK-NEXT:      "range": {
+// CHECK-NEXT:       "begin": {
+// CHECK-NEXT:        "col": 3,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 152
+// CHECK-NEXT:       },
+// CHECK-NEXT:       "end": {
+// CHECK-NEXT:        "col": 6,
+// CHECK-NEXT:        "file": "{{.*}}",
+// CHECK-NEXT:        "line": 152
+// CHECK-NEXT:       }
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "type": {
+// CHECK-NEXT:       "qualType": "void"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "valueCategory": "rvalue",
+// CHECK-NEXT:      "inner": [
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "ImplicitCastExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 152
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 3,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 152
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "void (*)(NS::X)"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "castKind": "FunctionToPointerDecay",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "DeclRefExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 152
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 3,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 152
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "void (NS::X)"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "referencedDecl": {
+// CHECK-NEXT:           "id": "0x{{.*}}",
+// CHECK-NEXT:           "kind": "FunctionDecl",
+// CHECK-NEXT:           "name": "f",
+// CHECK-NEXT:           "type": {
+// CHECK-NEXT:            "qualType": "void (NS::X)"
+// CHECK-NEXT:           }
+// CHECK-NEXT:          }
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       },
+// CHECK-NEXT:       {
+// CHECK-NEXT:        "id": "0x{{.*}}",
+// CHECK-NEXT:        "kind": "CXXConstructExpr",
+// CHECK-NEXT:        "range": {
+// CHECK-NEXT:         "begin": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 152
+// CHECK-NEXT:         },
+// CHECK-NEXT:         "end": {
+// CHECK-NEXT:          "col": 5,
+// CHECK-NEXT:          "file": "{{.*}}",
+// CHECK-NEXT:          "line": 152
+// CHECK-NEXT:         }
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "type": {
+// CHECK-NEXT:         "qualType": "NS::X"
+// CHECK-NEXT:        },
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "inner": [
+// CHECK-NEXT:         {
+// CHECK-NEXT:          "id": "0x{{.*}}",
+// CHECK-NEXT:          "kind": "ImplicitCastExpr",
+// CHECK-NEXT:          "range": {
+// CHECK-NEXT:           "begin": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 152
+// CHECK-NEXT:           },
+// CHECK-NEXT:           "end": {
+// CHECK-NEXT:            "col": 5,
+// CHECK-NEXT:            "file": "{{.*}}",
+// CHECK-NEXT:            "line": 152
+// CHECK-NEXT:           }
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "type": {
+// CHECK-NEXT:           "qualType": "const NS::X"
+// CHECK-NEXT:          },
+// CHECK-NEXT:          "valueCategory": "lvalue",
+// CHECK-NEXT:          "castKind": "NoOp",
+// CHECK-NEXT:          "inner": [
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "DeclRefExpr",
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 152
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 5,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 152
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "NS::X"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "valueCategory": "lvalue",
+// CHECK-NEXT:            "referencedDecl": {
+// CHECK-NEXT:             "id": "0x{{.*}}",
+// CHECK-NEXT:             "kind": "VarDecl",
+// CHECK-NEXT:             "name": "x",
+// CHECK-NEXT:             "type": {
+// CHECK-NEXT:              "qualType": "NS::X"
+// CHECK-NEXT:             }
+// CHECK-NEXT:            }
+// CHECK-NEXT:           }
+// CHECK-NEXT:          ]
+// CHECK-NEXT:         }
+// CHECK-NEXT:        ]
+// CHECK-NEXT:       }
+// CHECK-NEXT:      ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:    ]
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
+// CHECK-NEXT: }

From 11e05491454c143474a1b261bc1c2b7fc6ef51c4 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 27 May 2019 14:16:15 +0000
Subject: [PATCH 0297/1176] DWARFDebugArangeSet: Remove references to
 SymbolFileDWARF

This class does not depend on SymbolFileDWARF. Instead, include more
appropriate low-level headers.

llvm-svn: 361765
---
 .../source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp | 6 ++----
 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h  | 5 ++---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
index 2f878b8e8549c..86ce3b329b25b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
@@ -7,11 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "DWARFDebugArangeSet.h"
-
-#include "SymbolFileDWARF.h"
-#include "lldb/Utility/Stream.h"
+#include "DWARFDataExtractor.h"
 #include "llvm/Object/Error.h"
-#include <assert.h>
+#include <cassert>
 
 using namespace lldb_private;
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h
index 2a01cdd13e683..db0cf22a3f45e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h
@@ -9,11 +9,10 @@
 #ifndef SymbolFileDWARF_DWARFDebugArangeSet_h_
 #define SymbolFileDWARF_DWARFDebugArangeSet_h_
 
-#include "SymbolFileDWARF.h"
+#include "lldb/Core/dwarf.h"
+#include <cstdint>
 #include <vector>
 
-class SymbolFileDWARF;
-
 class DWARFDebugArangeSet {
 public:
   struct Header {

From bcc0cedf77054dc0ab7aac9d652118def4f5d8e7 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 27 May 2019 14:17:32 +0000
Subject: [PATCH 0298/1176] When dumping the AST to JSON, dump the argument
 name to a sizeof pack expression.

llvm-svn: 361766
---
 clang/include/clang/AST/JSONNodeDumper.h | 1 +
 clang/lib/AST/JSONNodeDumper.cpp         | 4 ++++
 clang/test/AST/ast-dump-expr-json.cpp    | 3 ++-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index b966747f42399..dfad90b16d791 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -244,6 +244,7 @@ class JSONNodeDumper
   void VisitImplicitCastExpr(const ImplicitCastExpr *ICE);
   void VisitCallExpr(const CallExpr *CE);
   void VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *TTE);
+  void VisitSizeOfPackExpr(const SizeOfPackExpr *SOPE);
   void VisitUnresolvedLookupExpr(const UnresolvedLookupExpr *ULE);
   void VisitAddrLabelExpr(const AddrLabelExpr *ALE);
 
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index afe43e436dcf9..a6600c0c84deb 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -894,6 +894,10 @@ void JSONNodeDumper::VisitUnaryExprOrTypeTraitExpr(
     JOS.attribute("argType", createQualType(TTE->getArgumentType()));
 }
 
+void JSONNodeDumper::VisitSizeOfPackExpr(const SizeOfPackExpr *SOPE) {
+  VisitNamedDecl(SOPE->getPack());
+}
+
 void JSONNodeDumper::VisitUnresolvedLookupExpr(
     const UnresolvedLookupExpr *ULE) {
   JOS.attribute("usesADL", ULE->requiresADL());
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index 81c3e66c39523..0c541666d26b5 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -1505,7 +1505,8 @@ void TestNonADLCall3() {
 // CHECK-NEXT:        "type": {
 // CHECK-NEXT:         "qualType": "unsigned long"
 // CHECK-NEXT:        },
-// CHECK-NEXT:        "valueCategory": "rvalue"
+// CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "Ts"
 // CHECK-NEXT:       },
 // CHECK-NEXT:       {
 // CHECK-NEXT:        "id": "0x{{.*}}",

From e7b3b80fb1254d1599ed9add544a4cf045d7b588 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 27 May 2019 14:25:04 +0000
Subject: [PATCH 0299/1176] When dumping the AST to JSON, dump the declared
 name of a MemberExpr operand.

llvm-svn: 361767
---
 clang/lib/AST/JSONNodeDumper.cpp      | 5 +++--
 clang/test/AST/ast-dump-expr-json.c   | 2 ++
 clang/test/AST/ast-dump-expr-json.cpp | 9 +++++++++
 clang/test/AST/ast-dump-stmt-json.cpp | 2 ++
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index a6600c0c84deb..760ea338121c3 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -829,9 +829,10 @@ void JSONNodeDumper::VisitCompoundAssignOperator(
 void JSONNodeDumper::VisitMemberExpr(const MemberExpr *ME) {
   // Note, we always write this Boolean field because the information it conveys
   // is critical to understanding the AST node.
+  ValueDecl *VD = ME->getMemberDecl();
+  JOS.attribute("name", VD && VD->getDeclName() ? VD->getNameAsString() : "");
   JOS.attribute("isArrow", ME->isArrow());
-  JOS.attribute("referencedMemberDecl",
-                createPointerRepresentation(ME->getMemberDecl()));
+  JOS.attribute("referencedMemberDecl", createPointerRepresentation(VD));
 }
 
 void JSONNodeDumper::VisitCXXNewExpr(const CXXNewExpr *NE) {
diff --git a/clang/test/AST/ast-dump-expr-json.c b/clang/test/AST/ast-dump-expr-json.c
index a2d6aab142b81..77183be2f9138 100644
--- a/clang/test/AST/ast-dump-expr-json.c
+++ b/clang/test/AST/ast-dump-expr-json.c
@@ -4433,6 +4433,7 @@ void PrimaryExpressions(int a) {
 // CHECK-NEXT:         "qualType": "int"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "lvalue",
+// CHECK-NEXT:        "name": "a",
 // CHECK-NEXT:        "isArrow": false,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -4510,6 +4511,7 @@ void PrimaryExpressions(int a) {
 // CHECK-NEXT:         "qualType": "int"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "lvalue",
+// CHECK-NEXT:        "name": "a",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index 0c541666d26b5..15a93a08a8166 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -2584,6 +2584,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "func",
 // CHECK-NEXT:        "isArrow": false,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -2679,6 +2680,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "func",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -2798,6 +2800,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "foo",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -2896,6 +2899,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "foo",
 // CHECK-NEXT:        "isArrow": false,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -2970,6 +2974,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "~S",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -3067,6 +3072,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "~S",
 // CHECK-NEXT:        "isArrow": false,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -3141,6 +3147,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "~S",
 // CHECK-NEXT:        "isArrow": false,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -3215,6 +3222,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "~S",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
@@ -3312,6 +3320,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:         "qualType": "<bound member function type>"
 // CHECK-NEXT:        },
 // CHECK-NEXT:        "valueCategory": "rvalue",
+// CHECK-NEXT:        "name": "~U",
 // CHECK-NEXT:        "isArrow": true,
 // CHECK-NEXT:        "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:        "inner": [
diff --git a/clang/test/AST/ast-dump-stmt-json.cpp b/clang/test/AST/ast-dump-stmt-json.cpp
index 448857ea3e698..883ac59409e9f 100644
--- a/clang/test/AST/ast-dump-stmt-json.cpp
+++ b/clang/test/AST/ast-dump-stmt-json.cpp
@@ -4653,6 +4653,7 @@ void TestIteration() {
 // CHECK-NEXT:               "qualType": "<bound member function type>"
 // CHECK-NEXT:              },
 // CHECK-NEXT:              "valueCategory": "rvalue",
+// CHECK-NEXT:              "name": "begin",
 // CHECK-NEXT:              "isArrow": false,
 // CHECK-NEXT:              "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:              "inner": [
@@ -4799,6 +4800,7 @@ void TestIteration() {
 // CHECK-NEXT:               "qualType": "<bound member function type>"
 // CHECK-NEXT:              },
 // CHECK-NEXT:              "valueCategory": "rvalue",
+// CHECK-NEXT:              "name": "end",
 // CHECK-NEXT:              "isArrow": false,
 // CHECK-NEXT:              "referencedMemberDecl": "0x{{.*}}",
 // CHECK-NEXT:              "inner": [

From 1b0ae8f05f1a5f701f14e19bee165ba93cd157ef Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 27 May 2019 14:29:10 +0000
Subject: [PATCH 0300/1176] When dumping the AST to JSON, dump whether a
 function is variadic or not.

llvm-svn: 361768
---
 clang/lib/AST/JSONNodeDumper.cpp      | 2 ++
 clang/test/AST/ast-dump-expr-json.cpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 760ea338121c3..08f63fba98c5f 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -572,6 +572,8 @@ void JSONNodeDumper::VisitFunctionDecl(const FunctionDecl *FD) {
   attributeOnlyIfTrue("pure", FD->isPure());
   attributeOnlyIfTrue("explicitlyDeleted", FD->isDeletedAsWritten());
   attributeOnlyIfTrue("constexpr", FD->isConstexpr());
+  attributeOnlyIfTrue("variadic", FD->isVariadic());
+
   if (FD->isDefaulted())
     JOS.attribute("explicitlyDefaulted",
                   FD->isDeleted() ? "deleted" : "default");
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index 15a93a08a8166..c6ccc950e2cef 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -4574,6 +4574,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:             "qualType": "auto (int, ...) const"
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "variadic": true,
 // CHECK-NEXT:            "inner": [
 // CHECK-NEXT:             {
 // CHECK-NEXT:              "id": "0x{{.*}}",
@@ -4673,6 +4674,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "storageClass": "static",
 // CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "variadic": true,
 // CHECK-NEXT:            "inner": [
 // CHECK-NEXT:             {
 // CHECK-NEXT:              "id": "0x{{.*}}",

From e091ab1b2df75b73815c22d0869e9f041679c09a Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Mon, 27 May 2019 14:34:31 +0000
Subject: [PATCH 0301/1176] When dumping the AST to JSON, dump the type
 information from a typeid expression with a type operand.

llvm-svn: 361769
---
 clang/include/clang/AST/JSONNodeDumper.h | 12 +++++++-----
 clang/lib/AST/ASTDumper.cpp              |  4 ++--
 clang/lib/AST/JSONNodeDumper.cpp         | 10 ++++++++++
 clang/test/AST/ast-dump-expr-json.cpp    | 13 +++++++++++--
 4 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h
index dfad90b16d791..9408a2ef4cbfe 100644
--- a/clang/include/clang/AST/JSONNodeDumper.h
+++ b/clang/include/clang/AST/JSONNodeDumper.h
@@ -121,6 +121,7 @@ class JSONNodeDumper
   friend class JSONDumper;
 
   const SourceManager &SM;
+  ASTContext& Ctx;
   PrintingPolicy PrintPolicy;
   const comments::CommandTraits *Traits;
 
@@ -172,11 +173,11 @@ class JSONNodeDumper
   StringRef getCommentCommandName(unsigned CommandID) const;
 
 public:
-  JSONNodeDumper(raw_ostream &OS, const SourceManager &SrcMgr,
+  JSONNodeDumper(raw_ostream &OS, const SourceManager &SrcMgr, ASTContext &Ctx,
                  const PrintingPolicy &PrintPolicy,
                  const comments::CommandTraits *Traits)
-      : NodeStreamer(OS), SM(SrcMgr), PrintPolicy(PrintPolicy), Traits(Traits) {
-  }
+      : NodeStreamer(OS), SM(SrcMgr), Ctx(Ctx), PrintPolicy(PrintPolicy),
+        Traits(Traits) {}
 
   void Visit(const Attr *A);
   void Visit(const Stmt *Node);
@@ -247,6 +248,7 @@ class JSONNodeDumper
   void VisitSizeOfPackExpr(const SizeOfPackExpr *SOPE);
   void VisitUnresolvedLookupExpr(const UnresolvedLookupExpr *ULE);
   void VisitAddrLabelExpr(const AddrLabelExpr *ALE);
+  void VisitCXXTypeidExpr(const CXXTypeidExpr *CTE);
 
   void VisitIntegerLiteral(const IntegerLiteral *IL);
   void VisitCharacterLiteral(const CharacterLiteral *CL);
@@ -360,10 +362,10 @@ class JSONDumper : public ASTNodeTraverser<JSONDumper, JSONNodeDumper> {
   }
 
 public:
-  JSONDumper(raw_ostream &OS, const SourceManager &SrcMgr,
+  JSONDumper(raw_ostream &OS, const SourceManager &SrcMgr, ASTContext &Ctx,
              const PrintingPolicy &PrintPolicy,
              const comments::CommandTraits *Traits)
-      : NodeDumper(OS, SrcMgr, PrintPolicy, Traits) {}
+      : NodeDumper(OS, SrcMgr, Ctx, PrintPolicy, Traits) {}
 
   JSONNodeDumper &doGetNodeDelegate() { return NodeDumper; }
 
diff --git a/clang/lib/AST/ASTDumper.cpp b/clang/lib/AST/ASTDumper.cpp
index 0e0ddcdf8da6e..22196a1a26004 100644
--- a/clang/lib/AST/ASTDumper.cpp
+++ b/clang/lib/AST/ASTDumper.cpp
@@ -180,11 +180,11 @@ LLVM_DUMP_METHOD void Decl::dump() const { dump(llvm::errs()); }
 
 LLVM_DUMP_METHOD void Decl::dump(raw_ostream &OS, bool Deserialize,
                                  ASTDumpOutputFormat Format) const {
-  const ASTContext &Ctx = getASTContext();
+  ASTContext &Ctx = getASTContext();
   const SourceManager &SM = Ctx.getSourceManager();
 
   if (ADOF_JSON == Format) {
-    JSONDumper P(OS, SM, Ctx.getPrintingPolicy(),
+    JSONDumper P(OS, SM, Ctx, Ctx.getPrintingPolicy(),
                  &Ctx.getCommentCommandTraits());
     (void)Deserialize; // FIXME?
     P.Visit(this);
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 08f63fba98c5f..43cad2bf26ee0 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -917,6 +917,16 @@ void JSONNodeDumper::VisitAddrLabelExpr(const AddrLabelExpr *ALE) {
   JOS.attribute("labelDeclId", createPointerRepresentation(ALE->getLabel()));
 }
 
+void JSONNodeDumper::VisitCXXTypeidExpr(const CXXTypeidExpr *CTE) {
+  if (CTE->isTypeOperand()) {
+    QualType Adjusted = CTE->getTypeOperand(Ctx);
+    QualType Unadjusted = CTE->getTypeOperandSourceInfo()->getType();
+    JOS.attribute("typeArg", createQualType(Unadjusted));
+    if (Adjusted != Unadjusted)
+      JOS.attribute("adjustedTypeArg", createQualType(Adjusted));
+  }
+}
+
 void JSONNodeDumper::VisitIntegerLiteral(const IntegerLiteral *IL) {
   JOS.attribute("value",
                 IL->getValue().toString(
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index c6ccc950e2cef..fa6d6ed381575 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -3447,7 +3447,10 @@ void TestNonADLCall3() {
 // CHECK-NEXT:      "type": {
 // CHECK-NEXT:       "qualType": "const std::type_info"
 // CHECK-NEXT:      },
-// CHECK-NEXT:      "valueCategory": "lvalue"
+// CHECK-NEXT:      "valueCategory": "lvalue",
+// CHECK-NEXT:      "typeArg": {
+// CHECK-NEXT:       "qualType": "S"
+// CHECK-NEXT:      }
 // CHECK-NEXT:     },
 // CHECK-NEXT:     {
 // CHECK-NEXT:      "id": "0x{{.*}}",
@@ -3467,7 +3470,13 @@ void TestNonADLCall3() {
 // CHECK-NEXT:      "type": {
 // CHECK-NEXT:       "qualType": "const std::type_info"
 // CHECK-NEXT:      },
-// CHECK-NEXT:      "valueCategory": "lvalue"
+// CHECK-NEXT:      "valueCategory": "lvalue",
+// CHECK-NEXT:      "typeArg": {
+// CHECK-NEXT:       "qualType": "const volatile S"
+// CHECK-NEXT:      },
+// CHECK-NEXT:      "adjustedTypeArg": {
+// CHECK-NEXT:       "qualType": "S"
+// CHECK-NEXT:      }
 // CHECK-NEXT:     }
 // CHECK-NEXT:    ]
 // CHECK-NEXT:   }

From 4a7c4069aeebaf98e6f0508732594383dcd9f051 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <Alexander.Timofeev@amd.com>
Date: Mon, 27 May 2019 15:03:29 +0000
Subject: [PATCH 0302/1176]     [AMDGPU] Fix for the address sanitizer failure
 caused by the ifollowing commit:

    1a8b2ea611cf4ca7cb09562e0238cfefa27c05b5  Divergence driven ISel. Assign register class for cross block values according to the divergence.

llvm-svn: 361770
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 8ad7a52c92bfd..4fd28fc6d81e3 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -595,7 +595,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 
             unsigned OpNo = UseMI->getOperandNo(&Use);
             const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
-            if (Desc.OpInfo && Desc.OpInfo[OpNo].RegClass != -1) {
+            if (!Desc.isPseudo() && Desc.OpInfo &&
+                OpNo <= Desc.getNumOperands() &&
+                Desc.OpInfo[OpNo].RegClass != -1) {
               const TargetRegisterClass *OpRC =
                   TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);
               if (!TRI->isSGPRClass(OpRC) && OpRC != &AMDGPU::VS_32RegClass &&

From 20c3c4fe5a82ada9c854eec2f4a4419edbad5cb4 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Mon, 27 May 2019 16:20:45 +0000
Subject: [PATCH 0303/1176] [clang] Respect TerseOutput when printing lambdas

Reviewers: ilya-biryukov, hokein, sammccall

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62487

llvm-svn: 361771
---
 clang/lib/AST/StmtPrinter.cpp           |  5 ++++-
 clang/unittests/AST/StmtPrinterTest.cpp | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index ea0b472d98b8b..b06edb4b6db15 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1950,7 +1950,10 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
 
   // Print the body.
   OS << ' ';
-  PrintRawCompoundStmt(Node->getBody());
+  if (Policy.TerseOutput)
+    OS << "{}";
+  else
+    PrintRawCompoundStmt(Node->getBody());
 }
 
 void StmtPrinter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *Node) {
diff --git a/clang/unittests/AST/StmtPrinterTest.cpp b/clang/unittests/AST/StmtPrinterTest.cpp
index 0d383d547a2ec..080c18b0737b2 100644
--- a/clang/unittests/AST/StmtPrinterTest.cpp
+++ b/clang/unittests/AST/StmtPrinterTest.cpp
@@ -231,3 +231,17 @@ class A {
   ASSERT_TRUE(PrintedStmtObjCMatches(ObjCSource, returnStmt().bind("id"),
                                      "return self->ivar;\n"));
 }
+
+TEST(StmtPrinter, TerseOutputWithLambdas) {
+  const char *CPPSource = "auto lamb = []{ return 0; };";
+
+  // body is printed when TerseOutput is off(default).
+  ASSERT_TRUE(PrintedStmtCXXMatches(StdVer::CXX11, CPPSource,
+                                    lambdaExpr(anything()).bind("id"),
+                                    "[] {\n    return 0;\n}"));
+
+  // body not printed when TerseOutput is on.
+  ASSERT_TRUE(PrintedStmtCXXMatches(
+      StdVer::CXX11, CPPSource, lambdaExpr(anything()).bind("id"), "[] {}",
+      PolicyAdjusterType([](PrintingPolicy &PP) { PP.TerseOutput = true; })));
+}

From d99f9373d33282ba9928657392fd5da773dff02f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 27 May 2019 16:33:15 +0000
Subject: [PATCH 0304/1176] [LLParser] Fix uninitialized flag variable
 warnings. NFCI.

Fixes a large number of warnings in the scan-build report on llvm builds.

llvm-svn: 361772
---
 llvm/lib/AsmParser/LLParser.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f16f6a2af492f..6af084edbd8c9 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8484,7 +8484,7 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
     return true;
 
   do {
-    unsigned Flag;
+    unsigned Flag = 0;
     switch (Lex.getKind()) {
     case lltok::kw_linkage:
       Lex.Lex();
@@ -8536,7 +8536,7 @@ bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
   assert(Lex.getKind() == lltok::kw_varFlags);
   Lex.Lex();
 
-  unsigned Flag;
+  unsigned Flag = 0;
   if (ParseToken(lltok::colon, "expected ':' here") ||
       ParseToken(lltok::lparen, "expected '(' here") ||
       ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||

From ebb053b139e51270dd8e7f0c1fb8ea5e69535529 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 27 May 2019 16:39:25 +0000
Subject: [PATCH 0305/1176] [SelectionDAG] GetDemandedBits - add demanded
 elements wrapper implementation

The DemandedElts variable is pretty much inert at the moment - the original GetDemandedBits implementation calls it with an 'all ones' DemandedElts value so the function is active and behaves exactly as it used to.

llvm-svn: 361773
---
 llvm/include/llvm/CodeGen/SelectionDAG.h       | 18 ++++++++++++++----
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 +++++++++++++++-
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 08333870467a4..070c644e337fd 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1434,13 +1434,23 @@ class SelectionDAG {
   SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
                     const SDLoc &dl);
 
-  /// See if the specified operand can be simplified with the knowledge that only
-  /// the bits specified by Mask are used.  If so, return the simpler operand,
-  /// otherwise return a null SDValue.
+  /// See if the specified operand can be simplified with the knowledge that
+  /// only the bits specified by DemandedBits are used.  If so, return the
+  /// simpler operand, otherwise return a null SDValue.
   ///
   /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
   /// simplify nodes with multiple uses more aggressively.)
-  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+  SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits);
+
+  /// See if the specified operand can be simplified with the knowledge that
+  /// only the bits specified by DemandedBits are used in the elements specified
+  /// by DemandedElts.  If so, return the simpler operand, otherwise return a
+  /// null SDValue.
+  ///
+  /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+  /// simplify nodes with multiple uses more aggressively.)
+  SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits,
+                          const APInt &DemandedElts);
 
   /// Return true if the sign bit of Op is known to be zero.
   /// We use this predicate to simplify operations downstream.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad534ab497b4d..46474b9741221 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2122,10 +2122,24 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 }
 
 /// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used.
+/// the bits specified by DemandedBits are used.
 /// TODO: really we should be making this into the DAG equivalent of
 /// SimplifyMultipleUseDemandedBits and not generate any new nodes.
 SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
+  EVT VT = V.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return GetDemandedBits(V, DemandedBits, DemandedElts);
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used in the elements specified by
+/// DemandedElts.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
+                                      const APInt &DemandedElts) {
   switch (V.getOpcode()) {
   default:
     break;

From 5379f1a6c586d6723efcc47ae6d20114c255a4f8 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 27 May 2019 17:03:57 +0000
Subject: [PATCH 0306/1176] Include what you use in AArch64AsmBackend.cpp

AArch64AsmBackend.cpp was not using any APIs from AArch64.h, and was
only including it for transitive dependencies.  Doing so is problematic
from include-what-you-use perspective, but it is also a layering issue
(it creates a dependency cycle between the primary AArch64 target
library and the MCTargetDesc library).

llvm-svn: 361774
---
 llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index df94c3187eb85..6418211a4f55b 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -6,9 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -21,8 +22,10 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
 namespace {

From 00a538a230e38c033f3d997fe0edadf2ea03dc42 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Mon, 27 May 2019 17:38:41 +0000
Subject: [PATCH 0307/1176] NFC: Change usage of 'DenseSet' to 'DenseSetImpl'
 in DenseSetImpl::ConstIterator.

Summary:
Change usage of 'DenseSet' to 'DenseSetImpl' in a friend declaration within DenseSetImpl::ConstIterator. 'ConstIterator' was never updated when DenseSet was split into an impl when adding support for DenseSetImpl.

This fixes build errors on MSVC when forward declaring DenseSet as this friend decl does not declare the template arguments as well.

Reviewers: jpienaar

Reviewed By: jpienaar

Subscribers: jpienaar, lebedev.ri, dexonsmith, kristina, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62467

llvm-svn: 361775
---
 llvm/include/llvm/ADT/DenseSet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index 5eaaf2773ac11..9afb715ae1db5 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -130,7 +130,7 @@ class DenseSetImpl {
 
   class ConstIterator {
     typename MapTy::const_iterator I;
-    friend class DenseSet;
+    friend class DenseSetImpl;
     friend class Iterator;
 
   public:

From f4040a0dd81b1bb4d1a4704492d1642c09190f56 Mon Sep 17 00:00:00 2001
From: Alexander Timofeev <Alexander.Timofeev@amd.com>
Date: Mon, 27 May 2019 18:17:21 +0000
Subject: [PATCH 0308/1176] [AMDGPU] Fix for the address sanitizer failure.
 Fixing typo

llvm-svn: 361776
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 4fd28fc6d81e3..d20910baed307 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -596,7 +596,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
             unsigned OpNo = UseMI->getOperandNo(&Use);
             const MCInstrDesc &Desc = TII->get(UseMI->getOpcode());
             if (!Desc.isPseudo() && Desc.OpInfo &&
-                OpNo <= Desc.getNumOperands() &&
+                OpNo < Desc.getNumOperands() &&
                 Desc.OpInfo[OpNo].RegClass != -1) {
               const TargetRegisterClass *OpRC =
                   TRI->getRegClass(Desc.OpInfo[OpNo].RegClass);

From 9c70c574b4fec75f4c8a530891e6e412e7ad77be Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Mon, 27 May 2019 18:26:29 +0000
Subject: [PATCH 0309/1176] [SelectionDAG] Enhance the simplification of
 `copyto` from `implicit-def`.

Summary:
- The current implementation simplifies the case where the source of
  `copyto` is `implicit-def`ed. However, it only works when that
  `implicit-def` is single-used since it detects that from
  `implicit-def` and cannot determine which destination vreg should be
  used if there are multiple uses.
- This patch changes that detection when `copyto` is being emitted. If
  that `copyto`'s source is defined from `implicit-def`, it simplifies
  it. Hence, it works even that `implicit-def` is multi-used.
- Except it simplifies the internal IR, it won't improve the quality of
  code generation. However, it helps to detect 'implicit-def` in a
  straight-forward manner in some passes, such as `si-i1-copies`. A test
  case is added.

Reviewers: sunfish, nhaehnle

Subscribers: jvesely, hiraditya, asbirlea, llvm-commits, yaxunl

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62342

llvm-svn: 361777
---
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 40 +++++++------------
 llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h  |  5 ---
 llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll       | 19 +++++++++
 llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll | 22 ++++++++++
 4 files changed, 55 insertions(+), 31 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4b78d1bb6b160..8533a94c48a6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -186,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   assert(isNew && "Node emitted out of order - early");
 }
 
-/// getDstOfCopyToRegUse - If the only use of the specified result number of
-/// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
-                                                unsigned ResNo) const {
-  if (!Node->hasOneUse())
-    return 0;
-
-  SDNode *User = *Node->use_begin();
-  if (User->getOpcode() == ISD::CopyToReg &&
-      User->getOperand(2).getNode() == Node &&
-      User->getOperand(2).getResNo() == ResNo) {
-    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      return Reg;
-  }
-  return 0;
-}
-
 void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
                                        MachineInstrBuilder &MIB,
                                        const MCInstrDesc &II,
@@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op,
   if (Op.isMachineOpcode() &&
       Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
-    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
     // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
-    if (!VReg) {
-      const TargetRegisterClass *RC = TLI->getRegClassFor(
-          Op.getSimpleValueType(), Op.getNode()->isDivergent());
-      VReg = MRI->createVirtualRegister(RC);
-    }
+    const TargetRegisterClass *RC = TLI->getRegClassFor(
+        Op.getSimpleValueType(), Op.getNode()->isDivergent());
+    unsigned VReg = MRI->createVirtualRegister(RC);
     BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
             TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
     return VReg;
@@ -1011,14 +990,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
   case ISD::TokenFactor: // fall thru
     break;
   case ISD::CopyToReg: {
-    unsigned SrcReg;
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     SDValue SrcVal = Node->getOperand(2);
+    if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+        SrcVal.isMachineOpcode() &&
+        SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+      // Instead building a COPY to that vreg destination, build an
+      // IMPLICIT_DEF instruction instead.
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+      break;
+    }
+    unsigned SrcReg;
     if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
       SrcReg = R->getReg();
     else
       SrcReg = getVR(SrcVal, VRBaseMap);
 
-    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
     if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
       break;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 42f7846fe7c3a..cfe99dd977b5b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -42,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
                        unsigned SrcReg,
                        DenseMap<SDValue, unsigned> &VRBaseMap);
 
-  /// getDstOfCopyToRegUse - If the only use of the specified result number of
-  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
-  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
-                                    unsigned ResNo) const;
-
   void CreateVirtualRegisters(SDNode *Node,
                               MachineInstrBuilder &MIB,
                               const MCInstrDesc &II,
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
index 5b25271ce1717..7286d9785ed9e 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -37,3 +37,22 @@ bb6:                                              ; preds = %bb4, %bb3
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }
+
+; Make sure this won't crash.
+; SI-LABEL: {{^}}vcopy_i1_undef
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) {
+entry:
+  br i1 undef, label %exit, label %false
+
+false:
+  %x = load <2 x float>, <2 x float> addrspace(1)* %p
+  %cmp = fcmp one <2 x float> %x, zeroinitializer
+  br label %exit
+
+exit:
+  %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ]
+  %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0>
+  ret <2 x float> %ret
+}
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll
new file mode 100644
index 0000000000000..fb540c3a6baba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/implicit-def-muse.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel -verify-machineinstrs -o - %s | FileCheck %s
+
+; CHECK-LABEL: vcopy_i1_undef
+; CHECK: IMPLICIT_DEF
+; CHECK-NOT: COPY
+; CHECK: IMPLICIT_DEF
+; CHECK-NOT: COPY
+; CHECK: .false:
+define <2 x float> @vcopy_i1_undef(<2 x float> addrspace(1)* %p) {
+entry:
+  br i1 undef, label %exit, label %false
+
+false:
+  %x = load <2 x float>, <2 x float> addrspace(1)* %p
+  %cmp = fcmp one <2 x float> %x, zeroinitializer
+  br label %exit
+
+exit:
+  %c = phi <2 x i1> [ undef, %entry ], [ %cmp, %false ]
+  %ret = select <2 x i1> %c, <2 x float> <float 2.0, float 2.0>, <2 x float> <float 4.0, float 4.0>
+  ret <2 x float> %ret
+}

From e13ae3e4d82b95ca1086f658de00402ffb6b128e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 27 May 2019 18:26:43 +0000
Subject: [PATCH 0310/1176] [SelectionDAG] fix formatting and redundant
 comments; NFC

There's a possible missing fold here for extracting from the
same source vector. It's similar to a check that we use to
squash a build vector with all extracted elements from the
same source vector.

llvm-svn: 361778
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 46474b9741221..2a4b709858ec9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4138,7 +4138,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
   return SDValue();
 }
 
-static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                                   ArrayRef<SDValue> Ops,
                                   SelectionDAG &DAG) {
   assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
@@ -4989,9 +4991,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::CONCAT_VECTORS: {
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2};
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   }
@@ -5409,9 +5410,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   }
   case ISD::CONCAT_VECTORS: {
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
     SDValue Ops[] = {N1, N2, N3};
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   }
@@ -7135,8 +7135,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       return V;
     break;
   case ISD::CONCAT_VECTORS:
-    // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
-    if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+    if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
       return V;
     break;
   case ISD::SELECT_CC:

From e32ff096858578f526b6d05ab97c8f083f2e1834 Mon Sep 17 00:00:00 2001
From: Volodymyr Sapsai <vsapsai@apple.com>
Date: Mon, 27 May 2019 19:15:30 +0000
Subject: [PATCH 0311/1176] [Preprocessor] Fix crash emitting note with
 framework location for "file not found" error.

A filename can be remapped with a header map to point to a framework
header and we can find the corresponding framework without the header.
But if the original filename doesn't have a remapped framework name,
we'll fail to find its location and will dereference a null pointer
during diagnostics emission.

Fix by tracking remappings better and emit the note only if a framework
is found before any of the remappings.

rdar://problem/48883447

Reviewers: arphaman, erik.pilkington, jkorous

Reviewed By: arphaman

Subscribers: dexonsmith, cfe-commits

Differential Revision: https://reviews.llvm.org/D61707

llvm-svn: 361779
---
 clang/include/clang/Lex/HeaderSearch.h        |  5 +++--
 clang/lib/Lex/HeaderSearch.cpp                |  5 ++++-
 .../TestFramework.hmap.json                   |  7 +++++++
 ...ader-missing-in-framework-with-headermap.c | 20 +++++++++++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/Preprocessor/Inputs/include-header-missing-in-framework/TestFramework.hmap.json
 create mode 100644 clang/test/Preprocessor/include-header-missing-in-framework-with-headermap.c

diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 7488d9e0e3d78..d6602360728e9 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -392,8 +392,9 @@ class HeaderSearch {
   /// true.
   ///
   /// \param IsFrameworkFound If non-null, will be set to true if a framework is
-  /// found in any of searched SearchDirs. Doesn't guarantee the requested file
-  /// is found.
+  /// found in any of searched SearchDirs. Will be set to false if a framework
+  /// is found only through header maps. Doesn't guarantee the requested file is
+  /// found.
   const FileEntry *LookupFile(
       StringRef Filename, SourceLocation IncludeLoc, bool isAngled,
       const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir,
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index af763059ea9a0..16a53bf634a71 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -869,7 +869,10 @@ const FileEntry *HeaderSearch::LookupFile(
         *IsMapped = true;
     }
     if (IsFrameworkFound)
-      *IsFrameworkFound |= IsFrameworkFoundInDir;
+      // Because we keep a filename remapped for subsequent search directory
+      // lookups, ignore IsFrameworkFoundInDir after the first remapping and not
+      // just for remapping in a current search directory.
+      *IsFrameworkFound |= (IsFrameworkFoundInDir && !CacheLookup.MappedName);
     if (!FE) continue;
 
     CurDir = &SearchDirs[i];
diff --git a/clang/test/Preprocessor/Inputs/include-header-missing-in-framework/TestFramework.hmap.json b/clang/test/Preprocessor/Inputs/include-header-missing-in-framework/TestFramework.hmap.json
new file mode 100644
index 0000000000000..193c7a779c61d
--- /dev/null
+++ b/clang/test/Preprocessor/Inputs/include-header-missing-in-framework/TestFramework.hmap.json
@@ -0,0 +1,7 @@
+{
+  "mappings" :
+    {
+     "RemappedHeader.h" : "TestFramework/RemappedHeader.h",
+     "TestFramework/BeforeRemapping.h" : "TestFramework/AfterRemapping.h"
+    }
+}
diff --git a/clang/test/Preprocessor/include-header-missing-in-framework-with-headermap.c b/clang/test/Preprocessor/include-header-missing-in-framework-with-headermap.c
new file mode 100644
index 0000000000000..da8d50cf915c9
--- /dev/null
+++ b/clang/test/Preprocessor/include-header-missing-in-framework-with-headermap.c
@@ -0,0 +1,20 @@
+// RUN: rm -f %t.hmap
+// RUN: %hmaptool write %S/Inputs/include-header-missing-in-framework/TestFramework.hmap.json %t.hmap
+// RUN: %clang_cc1 -fsyntax-only -F %S/Inputs -I %t.hmap -verify %s -DLATE_REMAPPING
+// RUN: %clang_cc1 -fsyntax-only -I %t.hmap -F %S/Inputs -verify %s
+
+// The test is similar to 'include-header-missing-in-framework.c' but covers
+// the case when a header is remapped to a framework-like path with a .hmap
+// file. And we can find the framework but not the header.
+
+#ifdef LATE_REMAPPING
+// Framework is found before remapping.
+#include <TestFramework/BeforeRemapping.h>
+// expected-error@-1 {{'TestFramework/BeforeRemapping.h' file not found}}
+// expected-note@-2 {{did not find header 'BeforeRemapping.h' in framework 'TestFramework' (loaded from}}
+
+#else
+// Framework is found after remapping.
+#include "RemappedHeader.h"
+// expected-error@-1 {{'RemappedHeader.h' file not found}}
+#endif // LATE_REMAPPING

From 2f99d009c1f7557ca5ee8eb55a02dd605710976d Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 27 May 2019 20:26:21 +0000
Subject: [PATCH 0312/1176] [SelectionDAG] fold concat of extract subvectors

This is derived from the related fold for build vectors.
We also have a version of this in DAGCombiner. The benefit of
having this fold at node creation time is (1) efficiency and
(2) preventing infinite looping from creating patterns that
should not exist in the first place.

Currently, the inf-loop could happen with MergeConsecutiveStores()
because it naively creates concat of extracts when forming a wider
vector store. That could fight with target-specific store narrowing.

llvm-svn: 361780
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a4b709858ec9..367b480c2114e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4160,6 +4160,31 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
   if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
     return DAG.getUNDEF(VT);
 
+  // Scan the operands and look for extract operations from a single source
+  // that correspond to insertion at the same location via this concatenation:
+  // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+  SDValue IdentitySrc;
+  bool IsIdentity = true;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    SDValue Op = Ops[i];
+    unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+        Op.getOperand(0).getValueType() != VT ||
+        (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+        !isa<ConstantSDNode>(Op.getOperand(1)) ||
+        Op.getConstantOperandVal(1) != IdentityIndex) {
+      IsIdentity = false;
+      break;
+    }
+    assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+           "Unexpected identity source vector for concat of extracts");
+    IdentitySrc = Op.getOperand(0);
+  }
+  if (IsIdentity) {
+    assert(IdentitySrc && "Failed to set source vector of extracts");
+    return IdentitySrc;
+  }
+
   // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
   // simplified to one big BUILD_VECTOR.
   // FIXME: Add support for SCALAR_TO_VECTOR as well.

From ca84c4be4b443df7e49202bb6ca42f831b524245 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 27 May 2019 20:37:31 +0000
Subject: [PATCH 0313/1176] RegAllocFast: Set MayLiveAcrossBlocks when
 allocating uses

Setting mayLiveOut based only on use instructions after allocating the
def block did not work if the use block was allocated before the def
block, since the virtual register uses were already removed.

Fixes bug 41973.

llvm-svn: 361781
---
 llvm/lib/CodeGen/RegAllocFast.cpp             | 28 +++++++-
 .../regalloc-fast-missing-live-out-spill.mir  | 66 +++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir

diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 4da0912508d20..2ffa5e389f89e 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -226,6 +226,7 @@ namespace {
                 MCPhysReg PhysReg);
 
     bool mayLiveOut(unsigned VirtReg);
+    bool mayLiveIn(unsigned VirtReg);
 
     void dumpState();
   };
@@ -270,8 +271,10 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
 
   // If this block loops back to itself, it would be necessary to check whether
   // the use comes after the def.
-  if (MBB->isSuccessor(MBB))
+  if (MBB->isSuccessor(MBB)) {
+    MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
     return true;
+  }
 
   // See if the first \p Limit uses of the register are all in the current
   // block.
@@ -288,6 +291,24 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
   return false;
 }
 
+/// Returns false if \p VirtReg is known to not be live into the current block.
+bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+  if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+    return !MBB->pred_empty();
+
+  // See if the first \p Limit def of the register are all in the current block.
+  static const unsigned Limit = 8;
+  unsigned C = 0;
+  for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+    if (DefInst.getParent() != MBB || ++C >= Limit) {
+      MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+      return !MBB->pred_empty();
+    }
+  }
+
+  return false;
+}
+
 /// Insert spill instruction for \p AssignedReg before \p Before. Update
 /// DBG_VALUEs with \p VirtReg operands with the stack slot.
 void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
@@ -1083,6 +1104,11 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
         // There is no need to allocate a register for an undef use.
         continue;
       }
+
+      // Populate MayLiveAcrossBlocks in case the use block is allocated before
+      // the def block (removing the vreg uses).
+      mayLiveIn(Reg);
+
       LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
       MCPhysReg PhysReg = LR.PhysReg;
       CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
diff --git a/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
new file mode 100644
index 0000000000000..0fe9f60897fd1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-grtev4-linux-gnu -run-pass=regallocfast -o - %s | FileCheck %s
+
+# Bug 41973. Make sure %12 is detected as live out of %bb.0, even
+# though the use is allocated before the def block %bb.3. Previously
+# mayLiveOut only recorded on defs, and would not find the virtual
+# register use if it had already been replace with a physical
+# register.
+
+---
+name:            main
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: main
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   liveins: $edi, $rsi
+  ; CHECK:   MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rsi :: (store 8 into %stack.0)
+  ; CHECK:   JMP_1 %bb.3
+  ; CHECK: bb.1:
+  ; CHECK:   successors:
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
+  ; CHECK:   renamable $ecx = MOV32r0 implicit-def $eflags
+  ; CHECK:   renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+  ; CHECK:   MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
+  ; CHECK:   MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0)
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
+  ; CHECK:   renamable $ecx = MOV32r0 implicit-def dead $eflags
+  ; CHECK:   renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit
+  ; CHECK:   MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1)
+  ; CHECK:   JMP64r killed renamable $rax
+  bb.0:
+    liveins: $edi, $rsi
+
+    %4:gr64 = COPY $rsi
+    %2:gr32 = COPY $edi
+    %3:gr32 = COPY killed %2
+    %5:gr64 = COPY killed %4
+    %13:gr64 = COPY %5
+    JMP_1 %bb.3
+
+  bb.1:
+    successors:
+
+
+  bb.2:
+    %0:gr64 = COPY %12
+    %10:gr32 = MOV32r0 implicit-def $eflags
+    %11:gr64 = SUBREG_TO_REG 0, %10, %subreg.sub_32bit
+    MOV64mi32 %0, 1, $noreg, 0, $noreg, 0 :: (volatile store 8)
+    %13:gr64 = COPY %11
+
+  bb.3:
+    successors: %bb.2, %bb.1
+
+    %1:gr64 = COPY %13
+    %9:gr32 = MOV32r0 implicit-def dead $eflags
+    %8:gr64 = SUBREG_TO_REG 0, killed %9, %subreg.sub_32bit
+    %12:gr64 = COPY %8
+    JMP64r %1
+
+...

From 23343c5d90bb6a047506a9977a36d225a934d023 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 27 May 2019 21:34:31 +0000
Subject: [PATCH 0314/1176] [RuntimeDyld][ARM] Fix an incorrect assertion
 condition.

Fixes https://llvm.org/PR42036

llvm-svn: 361782
---
 .../ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index b7649e921c28e..3bec8b979f7df 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -224,7 +224,7 @@ class RuntimeDyldMachOARM
       HighInsn = (HighInsn & 0xf800) | ((Value >> 12) & 0x7ff);
 
       uint16_t LowInsn = readBytesUnaligned(LocalAddress + 2, 2);
-      assert((LowInsn & 0xf800) != 0xf8000 &&
+      assert((LowInsn & 0xf800) == 0xf800 &&
              "Unrecognized thumb branch encoding (BR22 low bits)");
       LowInsn = (LowInsn & 0xf800) | ((Value >> 1) & 0x7ff);
 

From 82dc06c340f248e5e5530f607fead15ccfcfde43 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 27 May 2019 23:10:42 +0000
Subject: [PATCH 0315/1176] llvm-undname: Extract demangleMD5Name() method; no
 behavior change

llvm-svn: 361783
---
 .../include/llvm/Demangle/MicrosoftDemangle.h |  1 +
 llvm/lib/Demangle/MicrosoftDemangle.cpp       | 64 ++++++++++---------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index 7ecf72775e34c..6b3c7c9967fbd 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -157,6 +157,7 @@ class Demangler {
   SymbolNode *demangleEncodedSymbol(StringView &MangledName,
                                     QualifiedNameNode *QN);
   SymbolNode *demangleDeclarator(StringView &MangledName);
+  SymbolNode *demangleMD5Name(StringView &MangledName);
 
   VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
                                                StorageClass SC);
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index c1e6e14fd498a..912ca64d76d2b 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -745,38 +745,42 @@ SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
   return Symbol;
 }
 
+SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
+  assert(MangledName.startsWith("??@"));
+  // This is an MD5 mangled name.  We can't demangle it, just return the
+  // mangled name.
+  // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
+  size_t MD5Last = MangledName.find('@', strlen("??@"));
+  if (MD5Last == StringView::npos) {
+    Error = true;
+    return nullptr;
+  }
+  const char *Start = MangledName.begin();
+  MangledName = MangledName.dropFront(MD5Last + 1);
+
+  // There are two additional special cases for MD5 names:
+  // 1. For complete object locators where the object name is long enough
+  //    for the object to have an MD5 name, the complete object locator is
+  //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
+  //    leading "??_R4". This is handled here.
+  // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
+  //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
+  //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
+  //    demangle catchable types anywhere, this isn't handled for MD5 names
+  //    either.
+  MangledName.consumeFront("??_R4@");
+
+  StringView MD5(Start, MangledName.begin());
+  SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
+  S->Name = synthesizeQualifiedName(Arena, MD5);
+
+  return S;
+}
+
 // Parser entry point.
 SymbolNode *Demangler::parse(StringView &MangledName) {
-  if (MangledName.startsWith("??@")) {
-    // This is an MD5 mangled name.  We can't demangle it, just return the
-    // mangled name.
-    // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
-    size_t MD5Last = MangledName.find('@', strlen("??@"));
-    if (MD5Last == StringView::npos) {
-      Error = true;
-      return nullptr;
-    }
-    const char* Start = MangledName.begin();
-    MangledName = MangledName.dropFront(MD5Last + 1);
-
-    // There are two additional special cases for MD5 names:
-    // 1. For complete object locators where the object name is long enough
-    //    for the object to have an MD5 name, the complete object locator is
-    //    called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
-    //    leading "??_R4". This is handled here.
-    // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
-    //    2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
-    //    instead of_CT??@...@8 with just one MD5 name. Since we don't yet
-    //    demangle catchable types anywhere, this isn't handled for MD5 names
-    //    either.
-    MangledName.consumeFront("??_R4@");
-
-    StringView MD5(Start, MangledName.begin());
-    SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
-    S->Name = synthesizeQualifiedName(Arena, MD5);
-
-    return S;
-  }
+  if (MangledName.startsWith("??@"))
+    return demangleMD5Name(MangledName);
 
   // MSVC-style mangled symbols must start with '?'.
   if (!MangledName.startsWith('?')) {

From 61a5e2833d3c00e0491cf334852ef56aaa7bc33f Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Mon, 27 May 2019 23:23:50 +0000
Subject: [PATCH 0316/1176] [Driver] Change layout of per-target runtimes to
 resemble multiarch

This is a follow up to r361432, changing the layout of per-target
runtimes to more closely resemble multiarch. While before, we used
the following layout:

[RESOURCE_DIR]/<target>/lib/libclang_rt.<runtime>.<ext>

Now we use the following layout:

[RESOURCE_DIR]/lib/<target>/libclang_rt.<runtime>.<ext>

This also more closely resembles the existing "non-per-target" layout:

[RESOURCE_DIR]/lib/<os>/libclang_rt.<runtime>-<arch>.<ext>

This change will enable further simplification of the driver logic
in follow up changes.

Differential Revision: https://reviews.llvm.org/D62469

llvm-svn: 361784
---
 clang/lib/Driver/ToolChain.cpp                |  4 +-
 .../lib => lib/aarch64-fuchsia}/.keep         |  0
 .../lib => lib/aarch64-fuchsia/asan}/.keep    |  0
 .../libclang_rt.asan-preinit.a                |  0
 .../aarch64-fuchsia}/libclang_rt.asan.so      |  0
 .../aarch64-fuchsia}/libclang_rt.builtins.a   |  0
 .../aarch64-fuchsia}/libclang_rt.fuzzer.a     |  0
 .../aarch64-fuchsia}/libclang_rt.scudo.so     |  0
 .../aarch64-fuchsia}/libclang_rt.xray-basic.a |  0
 .../aarch64-fuchsia}/libclang_rt.xray.a       |  0
 .../aarch64-fuchsia/noexcept}/.keep           |  0
 .../lib => lib/i386-linux-gnu}/.keep          |  0
 .../i386-linux-gnu}/libclang_rt.builtins.a    |  0
 .../x86_64-fuchsia/.keep}                     |  0
 .../x86_64-fuchsia/asan/.keep}                |  0
 .../libclang_rt.asan-preinit.a}               |  0
 .../x86_64-fuchsia/libclang_rt.asan.so}       |  0
 .../x86_64-fuchsia}/libclang_rt.builtins.a    |  0
 .../x86_64-fuchsia/libclang_rt.fuzzer.a}      |  0
 .../x86_64-fuchsia/libclang_rt.scudo.so}      |  0
 .../x86_64-fuchsia/libclang_rt.xray-basic.a}  |  0
 .../lib/x86_64-fuchsia/libclang_rt.xray.a     |  0
 .../lib/x86_64-fuchsia/noexcept/.keep         |  0
 .../lib/x86_64-linux-gnu/.keep                |  0
 .../x86_64-linux-gnu/libclang_rt.builtins.a   |  0
 clang/test/Driver/fuchsia.c                   | 42 +++++++++----------
 clang/test/Driver/fuchsia.cpp                 |  2 +-
 .../Driver/linux-per-target-runtime-dir.c     |  4 +-
 .../cmake/Modules/CompilerRTUtils.cmake       |  4 +-
 29 files changed, 28 insertions(+), 28 deletions(-)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/.keep (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{i386-linux-gnu/lib => lib/aarch64-fuchsia/asan}/.keep (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.asan-preinit.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.asan.so (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.builtins.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.fuzzer.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.scudo.so (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.xray-basic.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{aarch64-fuchsia/lib => lib/aarch64-fuchsia}/libclang_rt.xray.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib => lib/aarch64-fuchsia/noexcept}/.keep (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-linux-gnu/lib => lib/i386-linux-gnu}/.keep (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{i386-linux-gnu/lib => lib/i386-linux-gnu}/libclang_rt.builtins.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.asan-preinit.a => lib/x86_64-fuchsia/.keep} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.asan.so => lib/x86_64-fuchsia/asan/.keep} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.builtins.a => lib/x86_64-fuchsia/libclang_rt.asan-preinit.a} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.fuzzer.a => lib/x86_64-fuchsia/libclang_rt.asan.so} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-linux-gnu/lib => lib/x86_64-fuchsia}/libclang_rt.builtins.a (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.scudo.so => lib/x86_64-fuchsia/libclang_rt.fuzzer.a} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.xray-basic.a => lib/x86_64-fuchsia/libclang_rt.scudo.so} (100%)
 rename clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/{x86_64-fuchsia/lib/libclang_rt.xray.a => lib/x86_64-fuchsia/libclang_rt.xray-basic.a} (100%)
 create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.xray.a
 create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/noexcept/.keep
 create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/.keep
 create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/libclang_rt.builtins.a

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 01fb818c9c41c..6f5e1916e5887 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -411,13 +411,13 @@ Optional<std::string> ToolChain::getRuntimePath() const {
 
   // First try the triple passed to driver as --target=<triple>.
   P.assign(D.ResourceDir);
-  llvm::sys::path::append(P, D.getTargetTriple(), "lib");
+  llvm::sys::path::append(P, "lib", D.getTargetTriple());
   if (getVFS().exists(P))
     return llvm::Optional<std::string>(P.str());
 
   // Second try the normalized triple.
   P.assign(D.ResourceDir);
-  llvm::sys::path::append(P, Triple.str(), "lib");
+  llvm::sys::path::append(P, "lib", Triple.str());
   if (getVFS().exists(P))
     return llvm::Optional<std::string>(P.str());
 
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/.keep
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/i386-linux-gnu/lib/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/asan/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/i386-linux-gnu/lib/.keep
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/asan/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.asan-preinit.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.asan-preinit.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.asan-preinit.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.asan-preinit.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.asan.so b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.asan.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.asan.so
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.asan.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.builtins.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.builtins.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.builtins.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.fuzzer.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.fuzzer.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.fuzzer.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.fuzzer.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.scudo.so b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.scudo.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.scudo.so
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.scudo.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.xray-basic.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.xray-basic.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.xray-basic.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.xray-basic.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.xray.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.xray.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/aarch64-fuchsia/lib/libclang_rt.xray.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/libclang_rt.xray.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/noexcept/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/.keep
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/aarch64-fuchsia/noexcept/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-linux-gnu/lib/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/i386-linux-gnu/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-linux-gnu/lib/.keep
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/i386-linux-gnu/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/i386-linux-gnu/lib/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/i386-linux-gnu/libclang_rt.builtins.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/i386-linux-gnu/lib/libclang_rt.builtins.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/i386-linux-gnu/libclang_rt.builtins.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.asan-preinit.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.asan-preinit.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.asan.so b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/asan/.keep
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.asan.so
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/asan/.keep
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.asan-preinit.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.builtins.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.asan-preinit.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.fuzzer.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.asan.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.fuzzer.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.asan.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-linux-gnu/lib/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.builtins.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-linux-gnu/lib/libclang_rt.builtins.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.builtins.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.scudo.so b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.fuzzer.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.scudo.so
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.fuzzer.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.xray-basic.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.scudo.so
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.xray-basic.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.scudo.so
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.xray.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.xray-basic.a
similarity index 100%
rename from clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/x86_64-fuchsia/lib/libclang_rt.xray.a
rename to clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.xray-basic.a
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.xray.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/libclang_rt.xray.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/noexcept/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-fuchsia/noexcept/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/.keep b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/libclang_rt.builtins.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/x86_64-linux-gnu/libclang_rt.builtins.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/fuchsia.c b/clang/test/Driver/fuchsia.c
index 7147f2f45ce9d..5cee88cd1e532 100644
--- a/clang/test/Driver/fuchsia.c
+++ b/clang/test/Driver/fuchsia.c
@@ -26,8 +26,8 @@
 // CHECK-NOT: crti.o
 // CHECK-NOT: crtbegin.o
 // CHECK: "-L[[SYSROOT]]{{/|\\\\}}lib"
-// CHECK-X86_64: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
-// CHECK-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK-X86_64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
 // CHECK: "-lc"
 // CHECK-NOT: crtend.o
 // CHECK-NOT: crtn.o
@@ -57,21 +57,21 @@
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     | FileCheck %s -check-prefix=CHECK-NODEFAULTLIBS
 // CHECK-NODEFAULTLIBS: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-// CHECK-NODEFAULTLIBS-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK-NODEFAULTLIBS-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
 // CHECK-NODEFAULTLIBS-NOT: "-lc"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia -nostdlib -fuse-ld=lld 2>&1 \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     | FileCheck %s -check-prefix=CHECK-NOSTDLIB
 // CHECK-NOSTDLIB: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-// CHECK-NOSTDLIB-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK-NOSTDLIB-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
 // CHECK-NOSTDLIB-NOT: "-lc"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia -nolibc -fuse-ld=lld 2>&1 \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     | FileCheck %s -check-prefix=CHECK-NOLIBC
 // CHECK-NOLIBC: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-// CHECK-NOLIBC: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK-NOLIBC: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
 // CHECK-NOLIBC-NOT: "-lc"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
@@ -81,7 +81,7 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-SAFESTACK
 // CHECK-SAFESTACK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-SAFESTACK: "-fsanitize=safe-stack"
-// CHECK-SAFESTACK-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.safestack.a"
+// CHECK-SAFESTACK-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.safestack.a"
 // CHECK-SAFESTACK-NOT: "__safestack_init"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
@@ -93,8 +93,8 @@
 // CHECK-ASAN-X86: "-fsanitize=address"
 // CHECK-ASAN-X86: "-fsanitize-address-globals-dead-stripping"
 // CHECK-ASAN-X86: "-dynamic-linker" "asan/ld.so.1"
-// CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan.so"
-// CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan-preinit.a"
+// CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.asan.so"
+// CHECK-ASAN-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.asan-preinit.a"
 
 // RUN: %clang %s -### --target=aarch64-fuchsia \
 // RUN:     -fsanitize=address 2>&1 \
@@ -105,8 +105,8 @@
 // CHECK-ASAN-AARCH64: "-fsanitize=address"
 // CHECK-ASAN-AARCH64: "-fsanitize-address-globals-dead-stripping"
 // CHECK-ASAN-AARCH64: "-dynamic-linker" "asan/ld.so.1"
-// CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan.so"
-// CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan-preinit.a"
+// CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.asan.so"
+// CHECK-ASAN-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.asan-preinit.a"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
 // RUN:     -fsanitize=address -fPIC -shared 2>&1 \
@@ -116,8 +116,8 @@
 // CHECK-ASAN-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-ASAN-SHARED: "-fsanitize=address"
 // CHECK-ASAN-SHARED: "-fsanitize-address-globals-dead-stripping"
-// CHECK-ASAN-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan.so"
-// CHECK-ASAN-SHARED-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.asan-preinit.a"
+// CHECK-ASAN-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.asan.so"
+// CHECK-ASAN-SHARED-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.asan-preinit.a"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
 // RUN:     -fsanitize=fuzzer 2>&1 \
@@ -126,7 +126,7 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-FUZZER-X86
 // CHECK-FUZZER-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-FUZZER-X86: "-fsanitize=fuzzer,fuzzer-no-link,safe-stack"
-// CHECK-FUZZER-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.fuzzer.a"
+// CHECK-FUZZER-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.fuzzer.a"
 
 // RUN: %clang %s -### --target=aarch64-fuchsia \
 // RUN:     -fsanitize=fuzzer 2>&1 \
@@ -135,7 +135,7 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-FUZZER-AARCH64
 // CHECK-FUZZER-AARCH64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-FUZZER-AARCH64: "-fsanitize=fuzzer,fuzzer-no-link,safe-stack"
-// CHECK-FUZZER-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.fuzzer.a"
+// CHECK-FUZZER-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.fuzzer.a"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
 // RUN:     -fsanitize=scudo 2>&1 \
@@ -145,7 +145,7 @@
 // CHECK-SCUDO-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-SCUDO-X86: "-fsanitize=safe-stack,scudo"
 // CHECK-SCUDO-X86: "-pie"
-// CHECK-SCUDO-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.scudo.so"
+// CHECK-SCUDO-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.scudo.so"
 
 // RUN: %clang %s -### --target=aarch64-fuchsia \
 // RUN:     -fsanitize=scudo 2>&1 \
@@ -155,7 +155,7 @@
 // CHECK-SCUDO-AARCH64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-SCUDO-AARCH64: "-fsanitize=safe-stack,scudo"
 // CHECK-SCUDO-AARCH64: "-pie"
-// CHECK-SCUDO-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.scudo.so"
+// CHECK-SCUDO-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.scudo.so"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
 // RUN:     -fsanitize=scudo -fPIC -shared 2>&1 \
@@ -164,7 +164,7 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-SCUDO-SHARED
 // CHECK-SCUDO-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-SCUDO-SHARED: "-fsanitize=safe-stack,scudo"
-// CHECK-SCUDO-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.scudo.so"
+// CHECK-SCUDO-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.scudo.so"
 
 // RUN: %clang %s -### --target=x86_64-fuchsia \
 // RUN:     -fxray-instrument -fxray-modes=xray-basic \
@@ -173,8 +173,8 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-XRAY-X86
 // CHECK-XRAY-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-XRAY-X86: "-fxray-instrument"
-// CHECK-XRAY-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.xray.a"
-// CHECK-XRAY-X86: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.xray-basic.a"
+// CHECK-XRAY-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.xray.a"
+// CHECK-XRAY-X86: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.xray-basic.a"
 
 // RUN: %clang %s -### --target=aarch64-fuchsia \
 // RUN:     -fxray-instrument -fxray-modes=xray-basic \
@@ -183,8 +183,8 @@
 // RUN:     | FileCheck %s -check-prefix=CHECK-XRAY-AARCH64
 // CHECK-XRAY-AARCH64: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-XRAY-AARCH64: "-fxray-instrument"
-// CHECK-XRAY-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.xray.a"
-// CHECK-XRAY-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.xray-basic.a"
+// CHECK-XRAY-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.xray.a"
+// CHECK-XRAY-AARCH64: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}aarch64-fuchsia{{/|\\\\}}libclang_rt.xray-basic.a"
 
 // RUN: %clang %s -### --target=aarch64-fuchsia \
 // RUN:     -O3 -flto -mcpu=cortex-a53 2>&1 \
diff --git a/clang/test/Driver/fuchsia.cpp b/clang/test/Driver/fuchsia.cpp
index 13c270de8e0b2..823ded4b91544 100644
--- a/clang/test/Driver/fuchsia.cpp
+++ b/clang/test/Driver/fuchsia.cpp
@@ -23,7 +23,7 @@
 // CHECK: "-lc++"
 // CHECK: "-lm"
 // CHECK: "--pop-state"
-// CHECK: "[[RESOURCE_DIR]]{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}lib{{/|\\\\}}libclang_rt.builtins.a"
+// CHECK: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}libclang_rt.builtins.a"
 // CHECK: "-lc"
 // CHECK-NOT: crtend.o
 // CHECK-NOT: crtn.o
diff --git a/clang/test/Driver/linux-per-target-runtime-dir.c b/clang/test/Driver/linux-per-target-runtime-dir.c
index 97b433b296d15..96fa86e118543 100644
--- a/clang/test/Driver/linux-per-target-runtime-dir.c
+++ b/clang/test/Driver/linux-per-target-runtime-dir.c
@@ -18,10 +18,10 @@
 // RUN:     --target=x86_64-linux-gnu \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-X8664 %s
-// CHECK-CLANGRT-X8664: x86_64-linux-gnu{{/|\\}}lib{{/|\\}}libclang_rt.builtins.a
+// CHECK-CLANGRT-X8664: lib{{/|\\}}x86_64-linux-gnu{{/|\\}}libclang_rt.builtins.a
 
 // RUN: %clang -rtlib=compiler-rt -print-file-name=libclang_rt.builtins.a 2>&1 \
 // RUN:     --target=x86_64-linux-gnu \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:   | FileCheck --check-prefix=CHECK-FILE-NAME-X8664 %s
-// CHECK-FILE-NAME-X8664: x86_64-linux-gnu{{/|\\}}lib{{/|\\}}libclang_rt.builtins.a
+// CHECK-FILE-NAME-X8664: lib{{/|\\}}x86_64-linux-gnu{{/|\\}}libclang_rt.builtins.a
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index f1543f5666d70..4fa14512036ae 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -365,7 +365,7 @@ endfunction()
 function(get_compiler_rt_install_dir arch install_dir)
   if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
     get_compiler_rt_target(${arch} target)
-    set(${install_dir} ${COMPILER_RT_INSTALL_PATH}/${target}/lib PARENT_SCOPE)
+    set(${install_dir} ${COMPILER_RT_INSTALL_PATH}/lib/${target} PARENT_SCOPE)
   else()
     set(${install_dir} ${COMPILER_RT_LIBRARY_INSTALL_DIR} PARENT_SCOPE)
   endif()
@@ -374,7 +374,7 @@ endfunction()
 function(get_compiler_rt_output_dir arch output_dir)
   if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
     get_compiler_rt_target(${arch} target)
-    set(${output_dir} ${COMPILER_RT_OUTPUT_DIR}/${target}/lib PARENT_SCOPE)
+    set(${output_dir} ${COMPILER_RT_OUTPUT_DIR}/lib/${target} PARENT_SCOPE)
   else()
     set(${output_dir} ${COMPILER_RT_LIBRARY_OUTPUT_DIR} PARENT_SCOPE)
   endif()

From 165663aeeb643ee834271ca09b3e094052298b92 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 27 May 2019 23:56:41 +0000
Subject: [PATCH 0317/1176] [x86] add test to show volatile store splitting;
 NFC

From the LangRef:
"the backend should never split or merge target-legal
volatile load/store instructions."

See also:
D62498

llvm-svn: 361785
---
 llvm/test/CodeGen/X86/avx-load-store.ll | 44 ++++++++++++++++++-------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 7b18b9cdf07c2..1fd4e07961dbe 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -184,6 +184,26 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp
   ret void
 }
 
+define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind {
+; CHECK-LABEL: double_save_volatile:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovaps %xmm1, 16(%rdi)
+; CHECK-NEXT:    vmovaps %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+;
+; CHECK_O0-LABEL: double_save_volatile:
+; CHECK_O0:       # %bb.0:
+; CHECK_O0-NEXT:    # implicit-def: $ymm2
+; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm2
+; CHECK_O0-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm2
+; CHECK_O0-NEXT:    vmovdqu %ymm2, (%rdi)
+; CHECK_O0-NEXT:    vzeroupper
+; CHECK_O0-NEXT:    retq
+  %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  store volatile <8 x i32> %Z, <8 x i32>* %P, align 16
+  ret void
+}
+
 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
 
 define void @f_f() nounwind {
@@ -191,38 +211,38 @@ define void @f_f() nounwind {
 ; CHECK:       # %bb.0: # %allocas
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB8_2
+; CHECK-NEXT:    jne .LBB9_2
 ; CHECK-NEXT:  # %bb.1: # %cif_mask_all
-; CHECK-NEXT:  .LBB8_2: # %cif_mask_mixed
+; CHECK-NEXT:  .LBB9_2: # %cif_mask_mixed
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB8_4
+; CHECK-NEXT:    jne .LBB9_4
 ; CHECK-NEXT:  # %bb.3: # %cif_mixed_test_all
 ; CHECK-NEXT:    movl $-1, %eax
 ; CHECK-NEXT:    vmovd %eax, %xmm0
 ; CHECK-NEXT:    vmaskmovps %ymm0, %ymm0, (%rax)
-; CHECK-NEXT:  .LBB8_4: # %cif_mixed_test_any_check
+; CHECK-NEXT:  .LBB9_4: # %cif_mixed_test_any_check
 ;
 ; CHECK_O0-LABEL: f_f:
 ; CHECK_O0:       # %bb.0: # %allocas
 ; CHECK_O0-NEXT:    # implicit-def: $al
 ; CHECK_O0-NEXT:    testb $1, %al
-; CHECK_O0-NEXT:    jne .LBB8_1
-; CHECK_O0-NEXT:    jmp .LBB8_2
-; CHECK_O0-NEXT:  .LBB8_1: # %cif_mask_all
-; CHECK_O0-NEXT:  .LBB8_2: # %cif_mask_mixed
+; CHECK_O0-NEXT:    jne .LBB9_1
+; CHECK_O0-NEXT:    jmp .LBB9_2
+; CHECK_O0-NEXT:  .LBB9_1: # %cif_mask_all
+; CHECK_O0-NEXT:  .LBB9_2: # %cif_mask_mixed
 ; CHECK_O0-NEXT:    # implicit-def: $al
 ; CHECK_O0-NEXT:    testb $1, %al
-; CHECK_O0-NEXT:    jne .LBB8_3
-; CHECK_O0-NEXT:    jmp .LBB8_4
-; CHECK_O0-NEXT:  .LBB8_3: # %cif_mixed_test_all
+; CHECK_O0-NEXT:    jne .LBB9_3
+; CHECK_O0-NEXT:    jmp .LBB9_4
+; CHECK_O0-NEXT:  .LBB9_3: # %cif_mixed_test_all
 ; CHECK_O0-NEXT:    movl $-1, %eax
 ; CHECK_O0-NEXT:    vmovd %eax, %xmm0
 ; CHECK_O0-NEXT:    vmovaps %xmm0, %xmm1
 ; CHECK_O0-NEXT:    # implicit-def: $rcx
 ; CHECK_O0-NEXT:    # implicit-def: $ymm2
 ; CHECK_O0-NEXT:    vmaskmovps %ymm2, %ymm1, (%rcx)
-; CHECK_O0-NEXT:  .LBB8_4: # %cif_mixed_test_any_check
+; CHECK_O0-NEXT:  .LBB9_4: # %cif_mixed_test_any_check
 allocas:
   br i1 undef, label %cif_mask_all, label %cif_mask_mixed
 

From f83c39e53f5636c43aac7a19e0b8f0784a6bc59f Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 28 May 2019 01:20:36 +0000
Subject: [PATCH 0318/1176] llvm-undname: Remove unreachable statement

llvm-svn: 361786
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 912ca64d76d2b..e596f0cea1c8e 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -674,7 +674,6 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
       return Arena.alloc<IntrinsicFunctionIdentifierNode>(
           translateIntrinsicFunctionCode(CH, Group));
     }
-    break;
   case FunctionIdentifierCodeGroup::Under:
     return Arena.alloc<IntrinsicFunctionIdentifierNode>(
         translateIntrinsicFunctionCode(MangledName.popFront(), Group));

From 50d502826bf65666c39d4a2b47f3e2a12cf4960d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 28 May 2019 04:09:18 +0000
Subject: [PATCH 0319/1176] [CostModel] Add really basic support for being able
 to query the cost of the FNeg instruction.

Summary:
This reuses the getArithmeticInstrCost, but passes dummy values of the second
operand flags.

The X86 costs are wrong and can be improved in a follow up. I just wanted to
stop it from reporting an unknown cost first.

Reviewers: RKSimon, spatel, andrew.w.kaylor, cameron.mcinally

Reviewed By: spatel

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62444

llvm-svn: 361788
---
 llvm/lib/Analysis/TargetTransformInfo.cpp    |  10 ++
 llvm/test/Analysis/CostModel/X86/arith-fp.ll | 112 +++++++++++++------
 2 files changed, 88 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2f9f1e069f8f3..53dd2bf230459 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1043,6 +1043,16 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
                                   Op1VP, Op2VP, Operands);
   }
+  case Instruction::FNeg: {
+    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
+    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
+    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
+    Op2VK = OK_AnyValue;
+    Op2VP = OP_None;
+    SmallVector<const Value *, 2> Operands(I->operand_values());
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+                                  Op1VP, Op2VP, Operands);
+  }
   case Instruction::Select: {
     const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index 1e95e9a5116d2..d1cffde7b7462 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -354,48 +354,92 @@ define i32 @fneg_idiom(i32 %arg) {
 }
 
 define i32 @fneg(i32 %arg) {
-; CHECK-LABEL: 'fneg'
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
-; CHECK-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE1-LABEL: 'fneg'
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE2-LABEL: 'fneg'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; SSE42-LABEL: 'fneg'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX-LABEL: 'fneg'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+;
+; AVX512-LABEL: 'fneg'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fneg <16 x float> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fneg <8 x double> undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fneg'
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
-; SLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fneg'
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
-; GLM-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fneg <8 x float> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = fneg <16 x float> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fneg <4 x double> undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fneg <8 x double> undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'fneg'
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %F32 = fneg float undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V4F32 = fneg <4 x float> undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V8F32 = fneg <8 x float> undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V16F32 = fneg <16 x float> undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %F64 = fneg double undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V2F64 = fneg <2 x double> undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V4F64 = fneg <4 x double> undef
-; BTVER2-NEXT:  Cost Model: Unknown cost for instruction: %V8F64 = fneg <8 x double> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fneg float undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fneg <4 x float> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fneg <16 x float> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fneg double undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fneg <2 x double> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fneg <8 x double> undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fneg float undef

From 92069605bf24c7f5b3ae5287af306575c29c2e72 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Tue, 28 May 2019 05:17:21 +0000
Subject: [PATCH 0320/1176] Merge ELFFileBase::{initSymtab,parseHeader} as
 ELFFileBase:init. NFC.

This patch simplifies ELFFile instance initialization by merging
two similar functions into a single function and call it from the
ctor.

llvm-svn: 361789
---
 lld/ELF/InputFiles.cpp | 130 +++++++++++++++++++----------------------
 lld/ELF/InputFiles.h   |  18 +++---
 2 files changed, 70 insertions(+), 78 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 0581f35893ba0..2b89533191a19 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -368,39 +368,66 @@ std::string lld::toString(const InputFile *F) {
   return F->ToStringCache;
 }
 
-ELFFileBase::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {}
+ELFFileBase::ELFFileBase(Kind K, MemoryBufferRef MB) : InputFile(K, MB) {
+  EKind = getELFKind(MB, "");
 
-template <class ELFT> void ELFFileBase::parseHeader() {
-  if (ELFT::TargetEndianness == support::little)
-    EKind = ELFT::Is64Bits ? ELF64LEKind : ELF32LEKind;
-  else
-    EKind = ELFT::Is64Bits ? ELF64BEKind : ELF32BEKind;
+  switch (EKind) {
+  case ELF32LEKind:
+    init<ELF32LE>();
+    break;
+  case ELF32BEKind:
+    init<ELF32BE>();
+    break;
+  case ELF64LEKind:
+    init<ELF64LE>();
+    break;
+  case ELF64BEKind:
+    init<ELF64BE>();
+    break;
+  default:
+    llvm_unreachable("getELFKind");
+  }
+}
 
-  EMachine = getObj<ELFT>().getHeader()->e_machine;
-  OSABI = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_OSABI];
-  ABIVersion = getObj<ELFT>().getHeader()->e_ident[llvm::ELF::EI_ABIVERSION];
+template <typename Elf_Shdr>
+static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> Sections, uint32_t Type) {
+  for (const Elf_Shdr &Sec : Sections)
+    if (Sec.sh_type == Type)
+      return &Sec;
+  return nullptr;
 }
 
-template <class ELFT>
-void ELFFileBase::initSymtab(ArrayRef<typename ELFT::Shdr> Sections,
-                             const typename ELFT::Shdr *Symtab) {
-  FirstGlobal = Symtab->sh_info;
-  ArrayRef<typename ELFT::Sym> ELFSyms =
-      CHECK(getObj<ELFT>().symbols(Symtab), this);
-  if (FirstGlobal == 0 || FirstGlobal > ELFSyms.size())
-    fatal(toString(this) + ": invalid sh_info in symbol table");
-  this->ELFSyms = reinterpret_cast<const void *>(ELFSyms.data());
-  this->NumELFSyms = ELFSyms.size();
+template <class ELFT> void ELFFileBase::init() {
+  using Elf_Shdr = typename ELFT::Shdr;
+  using Elf_Sym = typename ELFT::Sym;
 
-  StringTable =
-      CHECK(getObj<ELFT>().getStringTableForSymtab(*Symtab, Sections), this);
-}
+  // Initialize trivial attributes.
+  const ELFFile<ELFT> &Obj = getObj<ELFT>();
+  EMachine = Obj.getHeader()->e_machine;
+  OSABI = Obj.getHeader()->e_ident[llvm::ELF::EI_OSABI];
+  ABIVersion = Obj.getHeader()->e_ident[llvm::ELF::EI_ABIVERSION];
 
-template <class ELFT>
-ObjFile<ELFT>::ObjFile(MemoryBufferRef M, StringRef ArchiveName)
-    : ELFFileBase(ObjKind, M) {
-  parseHeader<ELFT>();
-  this->ArchiveName = ArchiveName;
+  ArrayRef<Elf_Shdr> Sections = CHECK(Obj.sections(), this);
+
+  // Find a symbol table.
+  bool IsDSO =
+      (identify_magic(MB.getBuffer()) == file_magic::elf_shared_object);
+  const Elf_Shdr *SymtabSec =
+      findSection(Sections, IsDSO ? SHT_DYNSYM : SHT_SYMTAB);
+
+  if (!SymtabSec)
+    return;
+
+  // Initialize members corresponding to a symbol table.
+  FirstGlobal = SymtabSec->sh_info;
+
+  ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(SymtabSec), this);
+  if (FirstGlobal == 0 || FirstGlobal > ESyms.size())
+    fatal(toString(this) + ": invalid sh_info in symbol table");
+
+  ELFSyms = reinterpret_cast<const void *>(ESyms.data());
+  NumELFSyms = ESyms.size();
+  StringTable = CHECK(Obj.getStringTableForSymtab(*SymtabSec, Sections), this);
 }
 
 template <class ELFT>
@@ -439,12 +466,6 @@ void ObjFile<ELFT>::parse(
 template <class ELFT>
 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
                                               const Elf_Shdr &Sec) {
-  // Group signatures are stored as symbol names in object files.
-  // sh_info contains a symbol index, so we fetch a symbol and read its name.
-  if (this->getELFSyms<ELFT>().empty())
-    this->initSymtab<ELFT>(
-        Sections, CHECK(object::getSection<ELFT>(Sections, Sec.sh_link), this));
-
   const Elf_Sym *Sym =
       CHECK(object::getSymbol<ELFT>(this->getELFSyms<ELFT>(), Sec.sh_info), this);
   StringRef Signature = CHECK(Sym->getName(this->StringTable), this);
@@ -515,15 +536,8 @@ template <class ELFT> bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &Sec) {
 // When the option is given, we link "just symbols". The section table is
 // initialized with null pointers.
 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
-  ArrayRef<Elf_Shdr> ObjSections = CHECK(this->getObj().sections(), this);
-  this->Sections.resize(ObjSections.size());
-
-  for (const Elf_Shdr &Sec : ObjSections) {
-    if (Sec.sh_type != SHT_SYMTAB)
-      continue;
-    this->initSymtab<ELFT>(ObjSections, &Sec);
-    return;
-  }
+  ArrayRef<Elf_Shdr> Sections = CHECK(this->getObj().sections(), this);
+  this->Sections.resize(Sections.size());
 }
 
 // An ELF object file may contain a `.deplibs` section. If it exists, the
@@ -625,12 +639,10 @@ void ObjFile<ELFT>::initializeSections(
       }
       break;
     }
-    case SHT_SYMTAB:
-      this->initSymtab<ELFT>(ObjSections, &Sec);
-      break;
     case SHT_SYMTAB_SHNDX:
       ShndxTable = CHECK(Obj.getSHNDXTable(Sec, ObjSections), this);
       break;
+    case SHT_SYMTAB:
     case SHT_STRTAB:
     case SHT_NULL:
       break;
@@ -1066,27 +1078,6 @@ void ArchiveFile::fetch(const Archive::Symbol &Sym) {
 
 unsigned SharedFile::VernauxNum;
 
-SharedFile::SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
-    : ELFFileBase(SharedKind, M), SoName(DefaultSoName),
-      IsNeeded(!Config->AsNeeded) {
-  switch (getELFKind(MB, "")) {
-  case ELF32LEKind:
-    parseHeader<ELF32LE>();
-    break;
-  case ELF32BEKind:
-    parseHeader<ELF32BE>();
-    break;
-  case ELF64LEKind:
-    parseHeader<ELF64LE>();
-    break;
-  case ELF64BEKind:
-    parseHeader<ELF64BE>();
-    break;
-  default:
-    llvm_unreachable("getELFKind");
-  }
-}
-
 // Parse the version definitions in the object file if present, and return a
 // vector whose nth element contains a pointer to the Elf_Verdef for version
 // identifier n. Version identifiers that are not definitions map to nullptr.
@@ -1165,9 +1156,6 @@ template <class ELFT> void SharedFile::parse() {
     switch (Sec.sh_type) {
     default:
       continue;
-    case SHT_DYNSYM:
-      this->initSymtab<ELFT>(Sections, &Sec);
-      break;
     case SHT_DYNAMIC:
       DynamicTags =
           CHECK(Obj.template getSectionContentsAsArray<Elf_Dyn>(&Sec), this);
@@ -1181,7 +1169,7 @@ template <class ELFT> void SharedFile::parse() {
     }
   }
 
-  if (VersymSec && this->getELFSyms<ELFT>().empty()) {
+  if (VersymSec && NumELFSyms == 0) {
     error("SHT_GNU_versym should be associated with symbol table");
     return;
   }
@@ -1221,7 +1209,7 @@ template <class ELFT> void SharedFile::parse() {
   // Parse ".gnu.version" section which is a parallel array for the symbol
   // table. If a given file doesn't have a ".gnu.version" section, we use
   // VER_NDX_GLOBAL.
-  size_t Size = this->getELFSyms<ELFT>().size() - this->FirstGlobal;
+  size_t Size = NumELFSyms - FirstGlobal;
   std::vector<uint32_t> Versyms(Size, VER_NDX_GLOBAL);
   if (VersymSec) {
     ArrayRef<Elf_Versym> Versym =
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index a51ba64aac3ca..e506b9eb87b66 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -152,7 +152,6 @@ class InputFile {
 class ELFFileBase : public InputFile {
 public:
   ELFFileBase(Kind K, MemoryBufferRef M);
-  template <typename ELFT> void parseHeader();
   static bool classof(const InputFile *F) { return F->isElf(); }
 
   template <typename ELFT> llvm::object::ELFFile<ELFT> getObj() const {
@@ -170,13 +169,13 @@ class ELFFileBase : public InputFile {
   }
 
 protected:
+  // Initializes this class's member variables.
+  template <typename ELFT> void init();
+
   const void *ELFSyms = nullptr;
   size_t NumELFSyms = 0;
   uint32_t FirstGlobal = 0;
   StringRef StringTable;
-  template <typename ELFT>
-  void initSymtab(ArrayRef<typename ELFT::Shdr> Sections,
-                  const typename ELFT::Shdr *Symtab);
 };
 
 // .o file.
@@ -198,7 +197,10 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   ArrayRef<Symbol *> getLocalSymbols();
   ArrayRef<Symbol *> getGlobalSymbols();
 
-  ObjFile(MemoryBufferRef M, StringRef ArchiveName);
+  ObjFile(MemoryBufferRef M, StringRef ArchiveName) : ELFFileBase(ObjKind, M) {
+    this->ArchiveName = ArchiveName;
+  }
+
   void parse(llvm::DenseMap<llvm::CachedHashStringRef, const InputFile *>
                  &ComdatGroups);
 
@@ -345,6 +347,10 @@ class BitcodeFile : public InputFile {
 // .so file.
 class SharedFile : public ELFFileBase {
 public:
+  SharedFile(MemoryBufferRef M, StringRef DefaultSoName)
+      : ELFFileBase(SharedKind, M), SoName(DefaultSoName),
+        IsNeeded(!Config->AsNeeded) {}
+
   // This is actually a vector of Elf_Verdef pointers.
   std::vector<const void *> Verdefs;
 
@@ -360,8 +366,6 @@ class SharedFile : public ELFFileBase {
 
   static bool classof(const InputFile *F) { return F->kind() == SharedKind; }
 
-  SharedFile(MemoryBufferRef M, StringRef DefaultSoName);
-
   template <typename ELFT> void parse();
 
   // Used for --no-allow-shlib-undefined.

From fe5eaab2b5b4523886bd63aebcfea8cfce586fa1 Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Tue, 28 May 2019 06:26:58 +0000
Subject: [PATCH 0321/1176] [test] Fix plugin tests

Summary:
The following changes were required to fix these tests:

1) Change LLVM_ENABLE_PLUGINS to an option and move it to
   llvm/CMakeLists.txt with an appropriate default -- which matches
   the original default behavior.

2) Move the plugins directory from clang/test/Analysis
   clang/lib/Analysis.  It's not enough to add an exclude to the
   lit.local.cfg file because add_lit_testsuites recurses the tree and
   automatically adds the appropriate `check-` targets, which don't
   make sense for the plugins because they aren't tests and don't
   have `RUN` statements.

   Here's a list of the `clang-check-anlysis*` targets with this
   change:

```
  $ ninja -t targets all| sed -n "s/.*\/\(check[^:]*\):.*/\1/p" | sort -u | grep clang-analysis
  check-clang-analysis
  check-clang-analysis-checkers
  check-clang-analysis-copypaste
  check-clang-analysis-diagnostics
  check-clang-analysis-engine
  check-clang-analysis-exploration_order
  check-clang-analysis-html_diagnostics
  check-clang-analysis-html_diagnostics-relevant_lines
  check-clang-analysis-inlining
  check-clang-analysis-objc
  check-clang-analysis-unified-sources
  check-clang-analysis-z3
```

3) Simplify the logic and only include the subdirectories under
   clang/lib/Analysis/plugins if LLVM_ENABLE_PLUGINS is set.

Reviewed By: NoQ

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62445

llvm-svn: 361790
---
 clang/lib/Analysis/CMakeLists.txt             |  2 ++
 clang/lib/Analysis/plugins/CMakeLists.txt     |  5 ++++
 .../CheckerDependencyHandling/CMakeLists.txt  | 15 +++++------
 .../CheckerDependencyHandling.cpp             |  0
 ...erDependencyHandlingAnalyzerPlugin.exports |  0
 .../CheckerOptionHandling/CMakeLists.txt      | 15 +++++------
 .../CheckerOptionHandling.cpp                 |  0
 ...heckerOptionHandlingAnalyzerPlugin.exports |  0
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 10 +++++++
 .../SampleAnalyzer/MainCallChecker.cpp        |  0
 .../SampleAnalyzerPlugin.exports              |  0
 clang/test/Analysis/lit.local.cfg             |  2 --
 clang/test/Analysis/plugins/CMakeLists.txt    | 12 ---------
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 11 --------
 clang/test/CMakeLists.txt                     | 26 +++++--------------
 llvm/CMakeLists.txt                           | 11 ++++++++
 llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 ------
 17 files changed, 48 insertions(+), 69 deletions(-)
 create mode 100644 clang/lib/Analysis/plugins/CMakeLists.txt
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt (51%)
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt (50%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports (100%)
 create mode 100644 clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
 rename clang/{test => lib}/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports (100%)
 delete mode 100644 clang/test/Analysis/plugins/CMakeLists.txt
 delete mode 100644 clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt

diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index 940a3dfe6f60d..92717143467d5 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -34,3 +34,5 @@ add_clang_library(clangAnalysis
   clangBasic
   clangLex
   )
+
+add_subdirectory(plugins)
diff --git a/clang/lib/Analysis/plugins/CMakeLists.txt b/clang/lib/Analysis/plugins/CMakeLists.txt
new file mode 100644
index 0000000000000..f7dbc936952cc
--- /dev/null
+++ b/clang/lib/Analysis/plugins/CMakeLists.txt
@@ -0,0 +1,5 @@
+if(LLVM_ENABLE_PLUGINS)
+  add_subdirectory(SampleAnalyzer)
+  add_subdirectory(CheckerDependencyHandling)
+  add_subdirectory(CheckerOptionHandling)
+endif()
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
similarity index 51%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 80e2cdbd3a258..0a8ff48755f17 100644
--- a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -1,11 +1,10 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
+target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
similarity index 50%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 6a1d5e8527941..6e289933c2dd4 100644
--- a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -1,11 +1,10 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE CheckerOptionHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
+target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
new file mode 100644
index 0000000000000..639a97f253112
--- /dev/null
+++ b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
+add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
+
+target_link_libraries(SampleAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp b/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
rename to clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg
index b77cae8ecebec..84f7569152c9f 100644
--- a/clang/test/Analysis/lit.local.cfg
+++ b/clang/test/Analysis/lit.local.cfg
@@ -18,7 +18,5 @@ config.substitutions.append(('%diff_plist',
 config.substitutions.append(('%diff_sarif',
     '''diff -U1 -w -I ".*file:.*%basename_t" -I '"version":' -I "2\.0\.0\-csd\.[0-9]*\.beta\."'''))
 
-config.excludes.add('plugins')
-
 if not config.root.clang_staticanalyzer:
     config.unsupported = True
diff --git a/clang/test/Analysis/plugins/CMakeLists.txt b/clang/test/Analysis/plugins/CMakeLists.txt
deleted file mode 100644
index 8d4333f99a4d3..0000000000000
--- a/clang/test/Analysis/plugins/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_subdirectory(SampleAnalyzer)
-add_subdirectory(CheckerDependencyHandling)
-add_subdirectory(CheckerOptionHandling)
-
-set(CLANG_ANALYZER_PLUGIN_DEPS
-  SampleAnalyzerPlugin
-  CheckerDependencyHandlingAnalyzerPlugin
-  CheckerOptionHandlingAnalyzerPlugin
-  )
-
-add_custom_target(clang-analyzer-plugin
-  DEPENDS ${CLANG_ANALYZER_PLUGIN_DEPS})
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
deleted file mode 100644
index 7c7b2aec1988d..0000000000000
--- a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
-add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
-
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(SampleAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 339f637847deb..32fe571afaad6 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -126,27 +126,13 @@ if( NOT CLANG_BUILT_STANDALONE )
 endif()
 
 if (CLANG_ENABLE_STATIC_ANALYZER)
-  add_subdirectory(Analysis/plugins)
-  list(APPEND CLANG_TEST_DEPS clang-analyzer-plugin)
-
-  # check-all would launch those tests via check-clang.
-  set(EXCLUDE_FROM_ALL ON)
-
-  add_lit_testsuite(check-clang-analyzer "Running the Clang analyzer tests"
-    ${CMAKE_CURRENT_BINARY_DIR}/Analysis
-    PARAMS ${ANALYZER_TEST_PARAMS}
-    DEPENDS ${CLANG_TEST_DEPS})
-  set_target_properties(check-clang-analyzer PROPERTIES FOLDER "Clang tests")
-
-  if (LLVM_WITH_Z3)
-    add_lit_testsuite(check-clang-analyzer-z3 "Running the Clang analyzer tests, using Z3 as a solver"
-      ${CMAKE_CURRENT_BINARY_DIR}/Analysis
-      PARAMS ${ANALYZER_TEST_PARAMS_Z3}
-      DEPENDS ${CLANG_TEST_DEPS})
-    set_target_properties(check-clang-analyzer-z3 PROPERTIES FOLDER "Clang tests")
+  if (LLVM_ENABLE_PLUGINS)
+    set(CLANG_ANALYZER_PLUGIN_DEPS
+      SampleAnalyzerPlugin
+      CheckerDependencyHandlingAnalyzerPlugin
+      CheckerOptionHandlingAnalyzerPlugin
+      )
   endif()
-
-  set(EXCLUDE_FROM_ALL OFF)
 endif()
 
 add_custom_target(clang-test-depends DEPENDS ${CLANG_TEST_DEPS})
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 895f9ab7189d1..6ca7b6da6dc0d 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -672,6 +672,17 @@ set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 message(STATUS "LLVM host triple: ${LLVM_HOST_TRIPLE}")
 message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}")
 
+if(WIN32 OR CYGWIN)
+  if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
+    set(LLVM_ENABLE_PLUGINS_default ON)
+  else()
+    set(LLVM_ENABLE_PLUGINS_default OFF)
+  endif()
+else()
+  set(LLVM_ENABLE_PLUGINS_default ON)
+endif()
+option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
+
 include(HandleLLVMOptions)
 
 # Verify that we can find a Python 2 interpreter.  Python 3 is unsupported.
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index cb9a01e1d39f7..8e7c93c9a9314 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -912,14 +912,6 @@ if(LLVM_LINK_LLVM_DYLIB AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS)
   message(FATAL_ERROR "LLVM_LINK_LLVM_DYLIB not compatible with LLVM_EXPORT_SYMBOLS_FOR_PLUGINS")
 endif()
 
-# Plugin support
-# FIXME: Make this configurable.
-if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
-  set(LLVM_ENABLE_PLUGINS ON)
-else()
-  set(LLVM_ENABLE_PLUGINS OFF)
-endif()
-
 # By default we should enable LLVM_ENABLE_IDE only for multi-configuration
 # generators. This option disables optional build system features that make IDEs
 # less usable.

From d8f8abbd4a2823f223bd7bc56445541fb221b512 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Tue, 28 May 2019 06:33:06 +0000
Subject: [PATCH 0322/1176] Use SymbolTable::insert() to implement --trace.

Differential Revision: https://reviews.llvm.org/D62381

llvm-svn: 361791
---
 lld/ELF/Driver.cpp      | 21 +++++++++++----------
 lld/ELF/LTO.cpp         |  4 ++--
 lld/ELF/MarkLive.cpp    | 10 ++++++----
 lld/ELF/SymbolTable.cpp | 19 +++----------------
 lld/ELF/SymbolTable.h   | 10 +++++++---
 lld/ELF/Writer.cpp      | 17 +++++++++++------
 6 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 37465b501c802..ba92d2cfe7346 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1337,10 +1337,10 @@ static void handleLibcall(StringRef Name) {
 // result, the passes after the symbol resolution won't see any
 // symbols of type CommonSymbol.
 static void replaceCommonSymbols() {
-  for (Symbol *Sym : Symtab->getSymbols()) {
+  Symtab->forEachSymbol([](Symbol *Sym) {
     auto *S = dyn_cast<CommonSymbol>(Sym);
     if (!S)
-      continue;
+      return;
 
     auto *Bss = make<BssSection>("COMMON", S->Size, S->Alignment);
     Bss->File = S->File;
@@ -1348,7 +1348,7 @@ static void replaceCommonSymbols() {
     InputSections.push_back(Bss);
     S->replace(Defined{S->File, S->getName(), S->Binding, S->StOther, S->Type,
                        /*Value=*/0, S->Size, Bss});
-  }
+  });
 }
 
 // If all references to a DSO happen to be weak, the DSO is not added
@@ -1356,15 +1356,15 @@ static void replaceCommonSymbols() {
 // created from the DSO. Otherwise, they become dangling references
 // that point to a non-existent DSO.
 static void demoteSharedSymbols() {
-  for (Symbol *Sym : Symtab->getSymbols()) {
+  Symtab->forEachSymbol([](Symbol *Sym) {
     auto *S = dyn_cast<SharedSymbol>(Sym);
     if (!S || S->getFile().IsNeeded)
-      continue;
+      return;
 
     bool Used = S->Used;
     S->replace(Undefined{nullptr, S->getName(), STB_WEAK, S->StOther, S->Type});
     S->Used = Used;
-  }
+  });
 }
 
 // The section referred to by S is considered address-significant. Set the
@@ -1400,9 +1400,10 @@ static void findKeepUniqueSections(opt::InputArgList &Args) {
 
   // Symbols in the dynsym could be address-significant in other executables
   // or DSOs, so we conservatively mark them as address-significant.
-  for (Symbol *S : Symtab->getSymbols())
-    if (S->includeInDynsym())
-      markAddrsig(S);
+  Symtab->forEachSymbol([&](Symbol *Sym) {
+    if (Sym->includeInDynsym())
+      markAddrsig(Sym);
+  });
 
   // Visit the address-significance table in each object file and mark each
   // referenced symbol as address-significant.
@@ -1575,7 +1576,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
 
   // Handle --trace-symbol.
   for (auto *Arg : Args.filtered(OPT_trace_symbol))
-    Symtab->trace(Arg->getValue());
+    Symtab->insert(Arg->getValue())->Traced = true;
 
   // Add all files to the symbol table. This will add almost all
   // symbols that we need to the symbol table. This process might
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index dad52d8b03fc9..eb0f75e6cc400 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -142,12 +142,12 @@ BitcodeCompiler::BitcodeCompiler() {
                                        Config->LTOPartitions);
 
   // Initialize UsedStartStop.
-  for (Symbol *Sym : Symtab->getSymbols()) {
+  Symtab->forEachSymbol([&](Symbol *Sym) {
     StringRef S = Sym->getName();
     for (StringRef Prefix : {"__start_", "__stop_"})
       if (S.startswith(Prefix))
         UsedStartStop.insert(S.substr(Prefix.size()));
-  }
+  });
 }
 
 BitcodeCompiler::~BitcodeCompiler() = default;
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 5132bb4b72b9b..431915c27ba4e 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -213,9 +213,10 @@ template <class ELFT> void MarkLive<ELFT>::run() {
 
   // Preserve externally-visible symbols if the symbols defined by this
   // file can interrupt other ELF file's symbols at runtime.
-  for (Symbol *S : Symtab->getSymbols())
-    if (S->includeInDynsym())
-      markSymbol(S);
+  Symtab->forEachSymbol([&](Symbol *Sym) {
+    if (Sym->includeInDynsym())
+      markSymbol(Sym);
+  });
 
   // Preserve special sections and those which are specified in linker
   // script KEEP command.
@@ -273,10 +274,11 @@ template <class ELFT> void elf::markLive() {
       Sec->Live = true;
 
     // If a DSO defines a symbol referenced in a regular object, it is needed.
-    for (Symbol *Sym : Symtab->getSymbols())
+    Symtab->forEachSymbol([](Symbol *Sym) {
       if (auto *S = dyn_cast<SharedSymbol>(Sym))
         if (S->IsUsedInRegularObj && !S->isWeak())
           S->getFile().IsNeeded = true;
+    });
     return;
   }
 
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 9fc05d98366aa..c4d52b1168e02 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -32,12 +32,6 @@ using namespace lld::elf;
 
 SymbolTable *elf::Symtab;
 
-// Set a flag for --trace-symbol so that we can print out a log message
-// if a new symbol with the same name is inserted into the symbol table.
-void SymbolTable::trace(StringRef Name) {
-  SymMap.insert({CachedHashStringRef(Name), -1});
-}
-
 void SymbolTable::wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap) {
   // Swap symbols as instructed by -wrap.
   int &Idx1 = SymMap[CachedHashStringRef(Sym->getName())];
@@ -70,13 +64,6 @@ Symbol *SymbolTable::insert(StringRef Name) {
   auto P = SymMap.insert({CachedHashStringRef(Name), (int)SymVector.size()});
   int &SymIndex = P.first->second;
   bool IsNew = P.second;
-  bool Traced = false;
-
-  if (SymIndex == -1) {
-    SymIndex = SymVector.size();
-    IsNew = true;
-    Traced = true;
-  }
 
   if (!IsNew)
     return SymVector[SymIndex];
@@ -91,7 +78,6 @@ Symbol *SymbolTable::insert(StringRef Name) {
   Sym->IsUsedInRegularObj = false;
   Sym->ExportDynamic = false;
   Sym->CanInline = true;
-  Sym->Traced = Traced;
   Sym->ScriptDefined = false;
   return Sym;
 }
@@ -106,9 +92,10 @@ Symbol *SymbolTable::find(StringRef Name) {
   auto It = SymMap.find(CachedHashStringRef(Name));
   if (It == SymMap.end())
     return nullptr;
-  if (It->second == -1)
+  Symbol *Sym = SymVector[It->second];
+  if (Sym->isPlaceholder())
     return nullptr;
-  return SymVector[It->second];
+  return Sym;
 }
 
 // Initialize DemangledSyms with a map from demangled symbols to symbol
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index 6e93e0a144d9a..68568d25b73c4 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -10,9 +10,11 @@
 #define LLD_ELF_SYMBOL_TABLE_H
 
 #include "InputFiles.h"
+#include "Symbols.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace lld {
 namespace elf {
@@ -33,7 +35,11 @@ class SymbolTable {
 public:
   void wrap(Symbol *Sym, Symbol *Real, Symbol *Wrap);
 
-  ArrayRef<Symbol *> getSymbols() const { return SymVector; }
+  void forEachSymbol(llvm::function_ref<void(Symbol *)> Fn) {
+    for (Symbol *Sym : SymVector)
+      if (!Sym->isPlaceholder())
+        Fn(Sym);
+  }
 
   Symbol *insert(StringRef Name);
 
@@ -43,8 +49,6 @@ class SymbolTable {
 
   Symbol *find(StringRef Name);
 
-  void trace(StringRef Name);
-
   void handleDynamicList();
 
   // Set of .so files to not link the same shared object file more than once.
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 27ffd9269b00f..58fc6fab7c9fb 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1169,9 +1169,11 @@ static DenseMap<const InputSectionBase *, int> buildSectionOrder() {
 
   // We want both global and local symbols. We get the global ones from the
   // symbol table and iterate the object files for the local ones.
-  for (Symbol *Sym : Symtab->getSymbols())
+  Symtab->forEachSymbol([&](Symbol *Sym) {
     if (!Sym->isLazy())
       AddSym(*Sym);
+  });
+
   for (InputFile *File : ObjectFiles)
     for (Symbol *Sym : File->getSymbols())
       if (Sym->isLocal())
@@ -1609,9 +1611,10 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
   // earlier.
   finalizeSynthetic(In.EhFrame);
 
-  for (Symbol *S : Symtab->getSymbols())
+  Symtab->forEachSymbol([](Symbol *S) {
     if (!S->IsPreemptible)
       S->IsPreemptible = computeIsPreemptible(*S);
+  });
 
   // Scan relocations. This must be done after every symbol is declared so that
   // we can correctly decide if a dynamic relocation is needed.
@@ -1638,18 +1641,20 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
           llvm::all_of(File->DtNeeded, [&](StringRef Needed) {
             return Symtab->SoNames.count(Needed);
           });
-    for (Symbol *Sym : Symtab->getSymbols())
+
+    Symtab->forEachSymbol([](Symbol *Sym) {
       if (Sym->isUndefined() && !Sym->isWeak())
         if (auto *F = dyn_cast_or_null<SharedFile>(Sym->File))
           if (F->AllNeededIsKnown)
             error(toString(F) + ": undefined reference to " + toString(*Sym));
+    });
   }
 
   // Now that we have defined all possible global symbols including linker-
   // synthesized ones. Visit all symbols to give the finishing touches.
-  for (Symbol *Sym : Symtab->getSymbols()) {
+  Symtab->forEachSymbol([](Symbol *Sym) {
     if (!includeInSymtab(*Sym))
-      continue;
+      return;
     if (In.SymTab)
       In.SymTab->addSymbol(Sym);
 
@@ -1659,7 +1664,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
         if (File->IsNeeded && !Sym->isUndefined())
           addVerneed(Sym);
     }
-  }
+  });
 
   // Do not proceed if there was an undefined symbol.
   if (errorCount())

From cfca5095df0209c60109696d6cc368d49e2c5939 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 28 May 2019 06:34:52 +0000
Subject: [PATCH 0323/1176] [ELF] Error on relocations to STT_SECTION symbols
 if the sections were discarded

This is implemented by creating Undefined (instead of Defined) for such
local STT_SECTION symbols. It allows us to catch errors when there are
relocations to such discarded sections (e.g. in PR41693, ld.bfd and gold
error but we don't). Updated comdat-discarded-error.s checks we emit
friendly error message.

For relocatable-eh-frame.s, ld.lld -r a.o a.o will now error
"STT_SECTION symbol should be defined" because the section .eh_frame
refers to is now an Undefined instead of a Defined.
So I have to change `error()` to `warn()` to retain the output.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D61583

llvm-svn: 361792
---
 lld/ELF/InputFiles.cpp                         |  3 +++
 lld/ELF/InputSection.cpp                       |  3 ++-
 lld/ELF/Relocations.cpp                        | 14 +++++++++++---
 lld/test/ELF/comdat-discarded-error.s          | 12 +++++++++++-
 lld/test/ELF/comdat-discarded-reloc.s          |  2 +-
 lld/test/ELF/comdat.s                          |  4 +---
 lld/test/ELF/invalid-undef-section-symbol.test |  2 +-
 lld/test/ELF/relocatable-eh-frame.s            |  4 +++-
 8 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 2b89533191a19..16991421fe6ce 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -997,6 +997,9 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
 
       if (ESym.st_shndx == SHN_UNDEF)
         this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
+      else if (Sec == &InputSection::Discarded)
+        this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type,
+                                           /*DiscardedSecIdx=*/SecIdx);
       else
         this->Symbols[I] =
             make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 07a30ed57c476..74878931afb40 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -438,7 +438,8 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
       // hopefully creates a frame that is ignored at runtime.
       auto *D = dyn_cast<Defined>(&Sym);
       if (!D) {
-        error("STT_SECTION symbol should be defined");
+        warn("STT_SECTION symbol should be defined");
+        P->setSymbolAndType(0, 0, false);
         continue;
       }
       SectionBase *Section = D->Section->Repl;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 12c38c70dcc41..a8ed792164bc8 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -681,9 +681,17 @@ static std::string maybeReportDiscarded(Undefined &Sym, InputSectionBase &Sec,
     return "";
   ArrayRef<Elf_Shdr_Impl<ELFT>> ObjSections =
       CHECK(File->getObj().sections(), File);
-  std::string Msg =
-      "relocation refers to a symbol in a discarded section: " + toString(Sym) +
-      "\n>>> defined in " + toString(File);
+
+  std::string Msg;
+  if (Sym.Type == ELF::STT_SECTION) {
+    Msg = "relocation refers to a discarded section: ";
+    Msg += CHECK(
+        File->getObj().getSectionName(&ObjSections[Sym.DiscardedSecIdx]), File);
+  } else {
+    Msg = "relocation refers to a symbol in a discarded section: " +
+          toString(Sym);
+  }
+  Msg += "\n>>> defined in " + toString(File);
 
   Elf_Shdr_Impl<ELFT> ELFSec = ObjSections[Sym.DiscardedSecIdx - 1];
   if (ELFSec.sh_type != SHT_GROUP)
diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s
index 3584783cde09d..0f6b417b0fa4a 100644
--- a/lld/test/ELF/comdat-discarded-error.s
+++ b/lld/test/ELF/comdat-discarded-error.s
@@ -5,7 +5,7 @@
 # RUN: echo '.section .text.foo,"axG",@progbits,foo,comdat; .globl bar; bar:' | \
 # RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o
 
-# RUN: not ld.lld %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK:      error: relocation refers to a symbol in a discarded section: bar
 # CHECK-NEXT: >>> defined in {{.*}}3.o
@@ -13,6 +13,16 @@
 # CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
 # CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x1)
 
+# CHECK:      error: relocation refers to a discarded section: .text.foo
+# CHECK-NEXT: >>> defined in {{.*}}1.o
+# CHECK-NEXT: >>> section group signature: foo
+# CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
+# CHECK-NEXT: >>> referenced by {{.*}}1.o:(.data+0x0)
+
 .globl _start
 _start:
   jmp bar
+
+.section .text.foo,"axG",@progbits,foo,comdat
+.data
+  .quad .text.foo
diff --git a/lld/test/ELF/comdat-discarded-reloc.s b/lld/test/ELF/comdat-discarded-reloc.s
index d23baf386e92d..d12732cd3569b 100644
--- a/lld/test/ELF/comdat-discarded-reloc.s
+++ b/lld/test/ELF/comdat-discarded-reloc.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat-discarded-reloc.s -o %t2.o
-# RUN: ld.lld -gc-sections %t.o %t2.o -o %t
+# RUN: ld.lld -gc-sections --noinhibit-exec %t.o %t2.o -o /dev/null
 
 ## ELF spec doesn't allow a relocation to point to a deduplicated
 ## COMDAT section. Unfortunately this happens in practice (e.g. .eh_frame)
diff --git a/lld/test/ELF/comdat.s b/lld/test/ELF/comdat.s
index 86103e5d9eb75..9e3f5a81d300e 100644
--- a/lld/test/ELF/comdat.s
+++ b/lld/test/ELF/comdat.s
@@ -1,7 +1,7 @@
 // REQUIRES: x86
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat.s -o %t2.o
-// RUN: ld.lld -shared %t.o %t.o %t2.o -o %t
+// RUN: ld.lld -shared %t.o %t2.o -o %t
 // RUN: llvm-objdump -d %t | FileCheck %s
 // RUN: llvm-readobj -S --symbols %t | FileCheck --check-prefix=READ %s
 
@@ -31,9 +31,7 @@ foo:
 // CHECK-EMPTY:
 // CHECK-NEXT: bar:
 // 0x1000 - 0x1001 - 5 = -6
-// 0      - 0x1006 - 5 = -4107
 // CHECK-NEXT:   1001:	{{.*}}  callq  -6
-// CHECK-NEXT:   1006:	{{.*}}  callq  -4107
 
         .section .text3,"axG",@progbits,zed,comdat,unique,0
 
diff --git a/lld/test/ELF/invalid-undef-section-symbol.test b/lld/test/ELF/invalid-undef-section-symbol.test
index 1d66885eadf8e..80e5a1464d740 100644
--- a/lld/test/ELF/invalid-undef-section-symbol.test
+++ b/lld/test/ELF/invalid-undef-section-symbol.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj %s -o %t.o
-# RUN: not ld.lld -r %t.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld -r --fatal-warnings %t.o -o /dev/null 2>&1 | FileCheck %s
 
 # We used to crash at this.
 # CHECK: STT_SECTION symbol should be defined
diff --git a/lld/test/ELF/relocatable-eh-frame.s b/lld/test/ELF/relocatable-eh-frame.s
index dee906acb87fb..6172dd355db4a 100644
--- a/lld/test/ELF/relocatable-eh-frame.s
+++ b/lld/test/ELF/relocatable-eh-frame.s
@@ -1,10 +1,12 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld -r %t.o %t.o -o %t
+# RUN: ld.lld -r %t.o %t.o -o %t 2>&1 | FileCheck --check-prefix=WARN %s
 # RUN: llvm-readobj -r %t | FileCheck %s
 # RUN: ld.lld %t -o %t.so -shared
 # RUN: llvm-objdump -h %t.so | FileCheck --check-prefix=DSO %s
 
+# WARN: STT_SECTION symbol should be defined
+
 # DSO: .eh_frame     00000034
 
 # CHECK:      Relocations [

From 102b4b2486cad450fcfa317156a772586278bd2c Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Tue, 28 May 2019 06:38:16 +0000
Subject: [PATCH 0324/1176] Revert [test] Fix plugin tests

This reverts r361790 (git commit fe5eaab2b5b4523886bd63aebcfea8cfce586fa1)

It's causing buildbot breakage, so reverting while I investigate.

llvm-svn: 361793
---
 clang/lib/Analysis/CMakeLists.txt             |  2 --
 clang/lib/Analysis/plugins/CMakeLists.txt     |  5 ----
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 10 -------
 clang/test/Analysis/lit.local.cfg             |  2 ++
 clang/test/Analysis/plugins/CMakeLists.txt    | 12 +++++++++
 .../CheckerDependencyHandling/CMakeLists.txt  | 15 ++++++-----
 .../CheckerDependencyHandling.cpp             |  0
 ...erDependencyHandlingAnalyzerPlugin.exports |  0
 .../CheckerOptionHandling/CMakeLists.txt      | 15 ++++++-----
 .../CheckerOptionHandling.cpp                 |  0
 ...heckerOptionHandlingAnalyzerPlugin.exports |  0
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 11 ++++++++
 .../SampleAnalyzer/MainCallChecker.cpp        |  0
 .../SampleAnalyzerPlugin.exports              |  0
 clang/test/CMakeLists.txt                     | 26 ++++++++++++++-----
 llvm/CMakeLists.txt                           | 11 --------
 llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 ++++++
 17 files changed, 69 insertions(+), 48 deletions(-)
 delete mode 100644 clang/lib/Analysis/plugins/CMakeLists.txt
 delete mode 100644 clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
 create mode 100644 clang/test/Analysis/plugins/CMakeLists.txt
 rename clang/{lib => test}/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt (51%)
 rename clang/{lib => test}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp (100%)
 rename clang/{lib => test}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports (100%)
 rename clang/{lib => test}/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt (50%)
 rename clang/{lib => test}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp (100%)
 rename clang/{lib => test}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports (100%)
 create mode 100644 clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
 rename clang/{lib => test}/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp (100%)
 rename clang/{lib => test}/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports (100%)

diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index 92717143467d5..940a3dfe6f60d 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -34,5 +34,3 @@ add_clang_library(clangAnalysis
   clangBasic
   clangLex
   )
-
-add_subdirectory(plugins)
diff --git a/clang/lib/Analysis/plugins/CMakeLists.txt b/clang/lib/Analysis/plugins/CMakeLists.txt
deleted file mode 100644
index f7dbc936952cc..0000000000000
--- a/clang/lib/Analysis/plugins/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-if(LLVM_ENABLE_PLUGINS)
-  add_subdirectory(SampleAnalyzer)
-  add_subdirectory(CheckerDependencyHandling)
-  add_subdirectory(CheckerOptionHandling)
-endif()
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
deleted file mode 100644
index 639a97f253112..0000000000000
--- a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
-add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
-
-target_link_libraries(SampleAnalyzerPlugin PRIVATE
-  clangAnalysis
-  clangAST
-  clangStaticAnalyzerCore
-  clangStaticAnalyzerFrontend
-  LLVMSupport
-  )
diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg
index 84f7569152c9f..b77cae8ecebec 100644
--- a/clang/test/Analysis/lit.local.cfg
+++ b/clang/test/Analysis/lit.local.cfg
@@ -18,5 +18,7 @@ config.substitutions.append(('%diff_plist',
 config.substitutions.append(('%diff_sarif',
     '''diff -U1 -w -I ".*file:.*%basename_t" -I '"version":' -I "2\.0\.0\-csd\.[0-9]*\.beta\."'''))
 
+config.excludes.add('plugins')
+
 if not config.root.clang_staticanalyzer:
     config.unsupported = True
diff --git a/clang/test/Analysis/plugins/CMakeLists.txt b/clang/test/Analysis/plugins/CMakeLists.txt
new file mode 100644
index 0000000000000..8d4333f99a4d3
--- /dev/null
+++ b/clang/test/Analysis/plugins/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_subdirectory(SampleAnalyzer)
+add_subdirectory(CheckerDependencyHandling)
+add_subdirectory(CheckerOptionHandling)
+
+set(CLANG_ANALYZER_PLUGIN_DEPS
+  SampleAnalyzerPlugin
+  CheckerDependencyHandlingAnalyzerPlugin
+  CheckerOptionHandlingAnalyzerPlugin
+  )
+
+add_custom_target(clang-analyzer-plugin
+  DEPENDS ${CLANG_ANALYZER_PLUGIN_DEPS})
diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
similarity index 51%
rename from clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
rename to clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 0a8ff48755f17..80e2cdbd3a258 100644
--- a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -1,10 +1,11 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
 
-target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
-  clangAnalysis
-  clangAST
-  clangStaticAnalyzerCore
-  clangStaticAnalyzerFrontend
-  LLVMSupport
-  )
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
+  target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
+    clangAnalysis
+    clangAST
+    clangStaticAnalyzerCore
+    LLVMSupport
+    )
+endif()
diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp b/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
similarity index 100%
rename from clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
rename to clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports b/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
rename to clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
similarity index 50%
rename from clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
rename to clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 6e289933c2dd4..6a1d5e8527941 100644
--- a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -1,10 +1,11 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE CheckerOptionHandling.cpp PLUGIN_TOOL clang)
 
-target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
-  clangAnalysis
-  clangAST
-  clangStaticAnalyzerCore
-  clangStaticAnalyzerFrontend
-  LLVMSupport
-  )
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
+  target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
+    clangAnalysis
+    clangAST
+    clangStaticAnalyzerCore
+    LLVMSupport
+    )
+endif()
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
similarity index 100%
rename from clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
rename to clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports b/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
rename to clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
new file mode 100644
index 0000000000000..7c7b2aec1988d
--- /dev/null
+++ b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
+add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
+
+if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
+  target_link_libraries(SampleAnalyzerPlugin PRIVATE
+    clangAnalysis
+    clangAST
+    clangStaticAnalyzerCore
+    LLVMSupport
+    )
+endif()
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp b/clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
similarity index 100%
rename from clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
rename to clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports b/clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
similarity index 100%
rename from clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
rename to clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 32fe571afaad6..339f637847deb 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -126,13 +126,27 @@ if( NOT CLANG_BUILT_STANDALONE )
 endif()
 
 if (CLANG_ENABLE_STATIC_ANALYZER)
-  if (LLVM_ENABLE_PLUGINS)
-    set(CLANG_ANALYZER_PLUGIN_DEPS
-      SampleAnalyzerPlugin
-      CheckerDependencyHandlingAnalyzerPlugin
-      CheckerOptionHandlingAnalyzerPlugin
-      )
+  add_subdirectory(Analysis/plugins)
+  list(APPEND CLANG_TEST_DEPS clang-analyzer-plugin)
+
+  # check-all would launch those tests via check-clang.
+  set(EXCLUDE_FROM_ALL ON)
+
+  add_lit_testsuite(check-clang-analyzer "Running the Clang analyzer tests"
+    ${CMAKE_CURRENT_BINARY_DIR}/Analysis
+    PARAMS ${ANALYZER_TEST_PARAMS}
+    DEPENDS ${CLANG_TEST_DEPS})
+  set_target_properties(check-clang-analyzer PROPERTIES FOLDER "Clang tests")
+
+  if (LLVM_WITH_Z3)
+    add_lit_testsuite(check-clang-analyzer-z3 "Running the Clang analyzer tests, using Z3 as a solver"
+      ${CMAKE_CURRENT_BINARY_DIR}/Analysis
+      PARAMS ${ANALYZER_TEST_PARAMS_Z3}
+      DEPENDS ${CLANG_TEST_DEPS})
+    set_target_properties(check-clang-analyzer-z3 PROPERTIES FOLDER "Clang tests")
   endif()
+
+  set(EXCLUDE_FROM_ALL OFF)
 endif()
 
 add_custom_target(clang-test-depends DEPENDS ${CLANG_TEST_DEPS})
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 6ca7b6da6dc0d..895f9ab7189d1 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -672,17 +672,6 @@ set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 message(STATUS "LLVM host triple: ${LLVM_HOST_TRIPLE}")
 message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}")
 
-if(WIN32 OR CYGWIN)
-  if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
-    set(LLVM_ENABLE_PLUGINS_default ON)
-  else()
-    set(LLVM_ENABLE_PLUGINS_default OFF)
-  endif()
-else()
-  set(LLVM_ENABLE_PLUGINS_default ON)
-endif()
-option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
-
 include(HandleLLVMOptions)
 
 # Verify that we can find a Python 2 interpreter.  Python 3 is unsupported.
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 8e7c93c9a9314..cb9a01e1d39f7 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -912,6 +912,14 @@ if(LLVM_LINK_LLVM_DYLIB AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS)
   message(FATAL_ERROR "LLVM_LINK_LLVM_DYLIB not compatible with LLVM_EXPORT_SYMBOLS_FOR_PLUGINS")
 endif()
 
+# Plugin support
+# FIXME: Make this configurable.
+if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
+  set(LLVM_ENABLE_PLUGINS ON)
+else()
+  set(LLVM_ENABLE_PLUGINS OFF)
+endif()
+
 # By default we should enable LLVM_ENABLE_IDE only for multi-configuration
 # generators. This option disables optional build system features that make IDEs
 # less usable.

From ab53c5e5ab42c9456bc7eb48532f41aebb2f4a40 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 28 May 2019 07:25:27 +0000
Subject: [PATCH 0325/1176] [InlineCost] Fix a couple comments. NFC

Replace "unary operator" with "unary instruction" in visitUnaryInstruction since
we now have a UnaryOperator class which might needs its own visit function.

Fix a copy/paste in visitCastInst that appears to have been copied from
visitPtrToInt.

llvm-svn: 361794
---
 llvm/lib/Analysis/InlineCost.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 7fcfc76ea62cc..ced30d6e3b91c 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -708,7 +708,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 }
 
 bool CallAnalyzer::visitCastInst(CastInst &I) {
-  // Propagate constants through ptrtoint.
+  // Propagate constants through casts.
   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
       }))
@@ -744,7 +744,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
       }))
     return true;
 
-  // Disable any SROA on the argument to arbitrary unary operators.
+  // Disable any SROA on the argument to arbitrary unary instructions.
   disableSROA(Operand);
 
   return false;

From 7d9cac5bbac26bed73b7dc4ab6c5815d8aa60b68 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Tue, 28 May 2019 08:42:22 +0000
Subject: [PATCH 0326/1176] [AArch64][SVE2] Asm: support SVE2 Misc Group

Summary:
Patch adds support for the following instructions:

SVE2 bitwise exclusive-or interleaved:
    * EORBT, EORTB

SVE2 bitwise permute:
    * BEXT, BDEP, BGRP

SVE2 bitwise shift left long:
    * SSHLLB, SSHLLT, USHLLB, USHLLT

SVE2 integer add/subtract interleaved long:
    * SADDLBT, SSUBLBT, SSUBLTB

BDEP, BEXT and BGRP are enabled with SVE2 feature +bitperm, all other
instructions in this group are enabled with +sve2.

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62304

llvm-svn: 361795
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 22 ++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 76 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/bdep-diagnostics.s  | 26 +++++++
 llvm/test/MC/AArch64/SVE2/bdep.s              | 32 ++++++++
 llvm/test/MC/AArch64/SVE2/bext-diagnostics.s  | 26 +++++++
 llvm/test/MC/AArch64/SVE2/bext.s              | 32 ++++++++
 llvm/test/MC/AArch64/SVE2/bgrp-diagnostics.s  | 26 +++++++
 llvm/test/MC/AArch64/SVE2/bgrp.s              | 32 ++++++++
 llvm/test/MC/AArch64/SVE2/eorbt-diagnostics.s | 20 +++++
 llvm/test/MC/AArch64/SVE2/eorbt.s             | 48 ++++++++++++
 llvm/test/MC/AArch64/SVE2/eortb-diagnostics.s | 20 +++++
 llvm/test/MC/AArch64/SVE2/eortb.s             | 48 ++++++++++++
 .../MC/AArch64/SVE2/saddlbt-diagnostics.s     | 40 ++++++++++
 llvm/test/MC/AArch64/SVE2/saddlbt.s           | 27 +++++++
 .../test/MC/AArch64/SVE2/sshllb-diagnostics.s | 71 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sshllb.s            | 44 +++++++++++
 .../test/MC/AArch64/SVE2/sshllt-diagnostics.s | 71 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sshllt.s            | 44 +++++++++++
 .../MC/AArch64/SVE2/ssublbt-diagnostics.s     | 40 ++++++++++
 llvm/test/MC/AArch64/SVE2/ssublbt.s           | 27 +++++++
 .../MC/AArch64/SVE2/ssubltb-diagnostics.s     | 40 ++++++++++
 llvm/test/MC/AArch64/SVE2/ssubltb.s           | 27 +++++++
 .../test/MC/AArch64/SVE2/ushllb-diagnostics.s | 71 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ushllb.s            | 44 +++++++++++
 .../test/MC/AArch64/SVE2/ushllt-diagnostics.s | 71 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ushllt.s            | 44 +++++++++++
 26 files changed, 1069 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/bdep-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bdep.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bext-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bext.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bgrp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bgrp.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eorbt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eorbt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eortb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eortb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlbt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/saddlbt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sshllb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sshllb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sshllt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sshllt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublbt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssublbt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubltb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ssubltb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ushllb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ushllb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ushllt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ushllt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index da26b409a4570..739fb2a73b1ad 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1268,6 +1268,21 @@ let Predicates = [HasSVE2] in {
   defm MATCH_PPzZZ  : sve2_char_match<0b0, "match">;
   defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
 
+  // SVE2 bitwise exclusive-or interleaved
+  defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
+  defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">;
+
+  // SVE2 bitwise shift left long
+  defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">;
+  defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">;
+  defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">;
+  defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">;
+
+  // SVE2 integer add/subtract interleaved long
+  defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">;
+  defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
+  defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
@@ -1285,3 +1300,10 @@ let Predicates = [HasSVE2AES] in {
   def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
                                          ZPR128, ZPR64, ZPR64>;
 }
+
+let Predicates = [HasSVE2BitPerm] in {
+  // SVE2 bitwise permute
+  defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
+  defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep">;
+  defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp">;
+}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a05533b18dae1..61155c96c27d3 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2191,6 +2191,82 @@ multiclass sve2_pmul_long<bits<1> opc, string asm> {
   def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve2_misc<bits<2> sz, bits<4> opc, string asm,
+                ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+  asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-24} = 0b01000101;
+  let Inst{23-22} = sz;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b10;
+  let Inst{13-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
+  def _B : sve2_misc<0b00, opc, asm, ZPR8, ZPR8>;
+  def _H : sve2_misc<0b01, opc, asm, ZPR16, ZPR16>;
+  def _S : sve2_misc<0b10, opc, asm, ZPR32, ZPR32>;
+  def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+  let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
+    def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8,  ZPR8>;
+    def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
+    def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
+    def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
+  }
+}
+
+multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
+  def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+  def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+  def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
+                                   ZPRRegOp zprty1, ZPRRegOp zprty2,
+                                   Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+  asm, "\t$Zd, $Zn, $imm",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> imm;
+  let Inst{31-23} = 0b010001010;
+  let Inst{22}    = tsz8_64{2};
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-12} = 0b1010;
+  let Inst{11-10} = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
+  def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm,
+                                        ZPR16, ZPR8, vecshiftL8>;
+  def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm,
+                                        ZPR32, ZPR16, vecshiftL16> {
+    let Inst{19} = imm{3};
+  }
+  def _D : sve2_bitwise_shift_left_long<{1,?,?}, opc, asm,
+                                        ZPR64, ZPR32, vecshiftL32> {
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // SVE2 Accumulate Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/bdep-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bdep-diagnostics.s
new file mode 100644
index 0000000000000..f6dce79586728
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bdep-diagnostics.s
@@ -0,0 +1,26 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bdep z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bdep z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+bdep z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bdep z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+bdep z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bdep z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bdep.s b/llvm/test/MC/AArch64/SVE2/bdep.s
new file mode 100644
index 0000000000000..9d68b5a673c88
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bdep.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d -mattr=+bitperm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bdep z0.b, z1.b, z31.b
+// CHECK-INST: bdep z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xb4,0x1f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b4 1f 45 <unknown>
+
+bdep z0.h, z1.h, z31.h
+// CHECK-INST: bdep z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xb4,0x5f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b4 5f 45 <unknown>
+
+bdep z0.s, z1.s, z31.s
+// CHECK-INST: bdep z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xb4,0x9f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b4 9f 45 <unknown>
+
+bdep z0.d, z1.d, z31.d
+// CHECK-INST: bdep z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xb4,0xdf,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b4 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/bext-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bext-diagnostics.s
new file mode 100644
index 0000000000000..7ffe1449cc2ce
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bext-diagnostics.s
@@ -0,0 +1,26 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bext z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bext z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+bext z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bext z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+bext z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bext z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bext.s b/llvm/test/MC/AArch64/SVE2/bext.s
new file mode 100644
index 0000000000000..2c23bd9b2af43
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bext.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d -mattr=+bitperm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bext z0.b, z1.b, z31.b
+// CHECK-INST: bext z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xb0,0x1f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b0 1f 45 <unknown>
+
+bext z0.h, z1.h, z31.h
+// CHECK-INST: bext z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xb0,0x5f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b0 5f 45 <unknown>
+
+bext z0.s, z1.s, z31.s
+// CHECK-INST: bext z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xb0,0x9f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b0 9f 45 <unknown>
+
+bext z0.d, z1.d, z31.d
+// CHECK-INST: bext z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xb0,0xdf,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b0 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/bgrp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bgrp-diagnostics.s
new file mode 100644
index 0000000000000..9c05a0c6918d7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bgrp-diagnostics.s
@@ -0,0 +1,26 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bgrp z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bgrp z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+bgrp z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bgrp z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.d, p0/z, z7.d
+bgrp z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bgrp z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bgrp.s b/llvm/test/MC/AArch64/SVE2/bgrp.s
new file mode 100644
index 0000000000000..b2e7f98a43030
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bgrp.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+bitperm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d -mattr=+bitperm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+bitperm < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bgrp z0.b, z1.b, z31.b
+// CHECK-INST: bgrp z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xb8,0x1f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b8 1f 45 <unknown>
+
+bgrp z0.h, z1.h, z31.h
+// CHECK-INST: bgrp z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0xb8,0x5f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b8 5f 45 <unknown>
+
+bgrp z0.s, z1.s, z31.s
+// CHECK-INST: bgrp z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xb8,0x9f,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b8 9f 45 <unknown>
+
+bgrp z0.d, z1.d, z31.d
+// CHECK-INST: bgrp z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xb8,0xdf,0x45]
+// CHECK-ERROR: instruction requires: bitperm
+// CHECK-UNKNOWN: 20 b8 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/eorbt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/eorbt-diagnostics.s
new file mode 100644
index 0000000000000..dcdefc08122f5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eorbt-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+eorbt z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: eorbt z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+eorbt z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: eorbt z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/eorbt.s b/llvm/test/MC/AArch64/SVE2/eorbt.s
new file mode 100644
index 0000000000000..6c88b63bb6ee6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eorbt.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+eorbt z0.b, z1.b, z31.b
+// CHECK-INST: eorbt z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0x90,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 90 1f 45 <unknown>
+
+eorbt z0.h, z1.h, z31.h
+// CHECK-INST: eorbt z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x90,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 90 5f 45 <unknown>
+
+eorbt z0.s, z1.s, z31.s
+// CHECK-INST: eorbt z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x90,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 90 9f 45 <unknown>
+
+eorbt z0.d, z1.d, z31.d
+// CHECK-INST: eorbt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x90,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 90 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+eorbt z0.d, z1.d, z31.d
+// CHECK-INST: eorbt z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x90,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 90 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/eortb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/eortb-diagnostics.s
new file mode 100644
index 0000000000000..ea97bb34ba5fe
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eortb-diagnostics.s
@@ -0,0 +1,20 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+eortb z0.b, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: eortb z0.b, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+eortb z0.d, z1.d, z7.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: eortb z0.d, z1.d, z7.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/eortb.s b/llvm/test/MC/AArch64/SVE2/eortb.s
new file mode 100644
index 0000000000000..2fe781c1fd675
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eortb.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+eortb z0.b, z1.b, z31.b
+// CHECK-INST: eortb z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0x94,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 94 1f 45 <unknown>
+
+eortb z0.h, z1.h, z31.h
+// CHECK-INST: eortb z0.h, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x94,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 94 5f 45 <unknown>
+
+eortb z0.s, z1.s, z31.s
+// CHECK-INST: eortb z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x94,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 94 9f 45 <unknown>
+
+eortb z0.d, z1.d, z31.d
+// CHECK-INST: eortb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x94,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 94 df 45 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z0, z7
+// CHECK-INST: movprfx	z0, z7
+// CHECK-ENCODING: [0xe0,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e0 bc 20 04 <unknown>
+
+eortb z0.d, z1.d, z31.d
+// CHECK-INST: eortb z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0x94,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 94 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/saddlbt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/saddlbt-diagnostics.s
new file mode 100644
index 0000000000000..693b3ebdd50c8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlbt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+saddlbt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlbt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlbt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlbt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlbt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlbt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+saddlbt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: saddlbt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+saddlbt z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlbt z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+saddlbt z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: saddlbt z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/saddlbt.s b/llvm/test/MC/AArch64/SVE2/saddlbt.s
new file mode 100644
index 0000000000000..5dcec3bc13c19
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/saddlbt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+saddlbt z0.h, z1.b, z31.b
+// CHECK-INST: saddlbt	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0x80,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 5f 45 <unknown>
+
+saddlbt z0.s, z1.h, z31.h
+// CHECK-INST: saddlbt	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x80,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 9f 45 <unknown>
+
+saddlbt z0.d, z1.s, z31.s
+// CHECK-INST: saddlbt	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x80,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sshllb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sshllb-diagnostics.s
new file mode 100644
index 0000000000000..8a05c9103d4e9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sshllb-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sshllb z18.h, z28.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sshllb z18.h, z28.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z1.h, z9.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sshllb z1.h, z9.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z21.s, z2.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sshllb z21.s, z2.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z14.s, z30.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sshllb z14.s, z30.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z6.d, z12.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sshllb z6.d, z12.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z23.d, z19.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sshllb z23.d, z19.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sshllb z0.b, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllb z0.b, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z0.h, z0.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllb z0.h, z0.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z0.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllb z0.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllb z0.d, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllb z0.d, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sshllb     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sshllb     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/m, z6.d
+sshllb     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sshllb     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sshllb.s b/llvm/test/MC/AArch64/SVE2/sshllb.s
new file mode 100644
index 0000000000000..14824c583f5cd
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sshllb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sshllb     z0.h, z0.b, #0
+// CHECK-INST: sshllb	z0.h, z0.b, #0
+// CHECK-ENCODING: [0x00,0xa0,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a0 08 45 <unknown>
+
+sshllb     z31.h, z31.b, #7
+// CHECK-INST: sshllb	z31.h, z31.b, #7
+// CHECK-ENCODING: [0xff,0xa3,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a3 0f 45 <unknown>
+
+sshllb     z0.s, z0.h, #0
+// CHECK-INST: sshllb	z0.s, z0.h, #0
+// CHECK-ENCODING: [0x00,0xa0,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a0 10 45 <unknown>
+
+sshllb     z31.s, z31.h, #15
+// CHECK-INST: sshllb	z31.s, z31.h, #15
+// CHECK-ENCODING: [0xff,0xa3,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a3 1f 45 <unknown>
+
+sshllb     z0.d, z0.s, #0
+// CHECK-INST: sshllb	z0.d, z0.s, #0
+// CHECK-ENCODING: [0x00,0xa0,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a0 40 45 <unknown>
+
+sshllb     z31.d, z31.s, #31
+// CHECK-INST: sshllb	z31.d, z31.s, #31
+// CHECK-ENCODING: [0xff,0xa3,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a3 5f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sshllt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sshllt-diagnostics.s
new file mode 100644
index 0000000000000..4b443ae6f566d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sshllt-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+sshllt z18.h, z28.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sshllt z18.h, z28.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z1.h, z9.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: sshllt z1.h, z9.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z21.s, z2.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sshllt z21.s, z2.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z14.s, z30.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: sshllt z14.s, z30.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z6.d, z12.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sshllt z6.d, z12.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z23.d, z19.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: sshllt z23.d, z19.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sshllt z0.b, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllt z0.b, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z0.h, z0.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllt z0.h, z0.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z0.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllt z0.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sshllt z0.d, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sshllt z0.d, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+sshllt     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sshllt     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/m, z6.d
+sshllt     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sshllt     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sshllt.s b/llvm/test/MC/AArch64/SVE2/sshllt.s
new file mode 100644
index 0000000000000..88bf9dee6fbcb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sshllt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sshllt     z0.h, z0.b, #0
+// CHECK-INST: sshllt	z0.h, z0.b, #0
+// CHECK-ENCODING: [0x00,0xa4,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a4 08 45 <unknown>
+
+sshllt     z31.h, z31.b, #7
+// CHECK-INST: sshllt	z31.h, z31.b, #7
+// CHECK-ENCODING: [0xff,0xa7,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a7 0f 45 <unknown>
+
+sshllt     z0.s, z0.h, #0
+// CHECK-INST: sshllt	z0.s, z0.h, #0
+// CHECK-ENCODING: [0x00,0xa4,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a4 10 45 <unknown>
+
+sshllt     z31.s, z31.h, #15
+// CHECK-INST: sshllt	z31.s, z31.h, #15
+// CHECK-ENCODING: [0xff,0xa7,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a7 1f 45 <unknown>
+
+sshllt     z0.d, z0.s, #0
+// CHECK-INST: sshllt	z0.d, z0.s, #0
+// CHECK-ENCODING: [0x00,0xa4,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a4 40 45 <unknown>
+
+sshllt     z31.d, z31.s, #31
+// CHECK-INST: sshllt	z31.d, z31.s, #31
+// CHECK-ENCODING: [0xff,0xa7,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff a7 5f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssublbt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssublbt-diagnostics.s
new file mode 100644
index 0000000000000..ceb3d8079107a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublbt-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssublbt z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublbt z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublbt z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublbt z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublbt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublbt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssublbt z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssublbt z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+ssublbt z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublbt z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+ssublbt z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssublbt z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssublbt.s b/llvm/test/MC/AArch64/SVE2/ssublbt.s
new file mode 100644
index 0000000000000..0dff839ce0a77
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssublbt.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssublbt z0.h, z1.b, z31.b
+// CHECK-INST: ssublbt	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0x88,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 88 5f 45 <unknown>
+
+ssublbt z0.s, z1.h, z31.h
+// CHECK-INST: ssublbt	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x88,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 88 9f 45 <unknown>
+
+ssublbt z0.d, z1.s, z31.s
+// CHECK-INST: ssublbt	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x88,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 88 df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ssubltb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ssubltb-diagnostics.s
new file mode 100644
index 0000000000000..77c4d0bbe9dab
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubltb-diagnostics.s
@@ -0,0 +1,40 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+ssubltb z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubltb z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubltb z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubltb z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubltb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubltb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ssubltb z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ssubltb z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0, z7
+ssubltb z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubltb z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+ssubltb z0.h, z1.b, z7.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ssubltb z0.h, z1.b, z7.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ssubltb.s b/llvm/test/MC/AArch64/SVE2/ssubltb.s
new file mode 100644
index 0000000000000..590fc44acf801
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ssubltb.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+ssubltb z0.h, z1.b, z31.b
+// CHECK-INST: ssubltb	z0.h, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0x8c,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 8c 5f 45 <unknown>
+
+ssubltb z0.s, z1.h, z31.h
+// CHECK-INST: ssubltb	z0.s, z1.h, z31.h
+// CHECK-ENCODING: [0x20,0x8c,0x9f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 8c 9f 45 <unknown>
+
+ssubltb z0.d, z1.s, z31.s
+// CHECK-INST: ssubltb	z0.d, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0x8c,0xdf,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 8c df 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ushllb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ushllb-diagnostics.s
new file mode 100644
index 0000000000000..282558415140c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ushllb-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+ushllb z18.h, z28.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: ushllb z18.h, z28.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z1.h, z9.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: ushllb z1.h, z9.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z21.s, z2.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: ushllb z21.s, z2.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z14.s, z30.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: ushllb z14.s, z30.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z6.d, z12.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: ushllb z6.d, z12.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z23.d, z19.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: ushllb z23.d, z19.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+ushllb z0.b, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllb z0.b, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z0.h, z0.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllb z0.h, z0.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z0.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllb z0.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllb z0.d, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllb z0.d, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+ushllb     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ushllb     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/m, z6.d
+ushllb     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ushllb     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ushllb.s b/llvm/test/MC/AArch64/SVE2/ushllb.s
new file mode 100644
index 0000000000000..c67581a597e51
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ushllb.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ushllb     z0.h, z0.b, #0
+// CHECK-INST: ushllb	z0.h, z0.b, #0
+// CHECK-ENCODING: [0x00,0xa8,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a8 08 45 <unknown>
+
+ushllb     z31.h, z31.b, #7
+// CHECK-INST: ushllb	z31.h, z31.b, #7
+// CHECK-ENCODING: [0xff,0xab,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ab 0f 45 <unknown>
+
+ushllb     z0.s, z0.h, #0
+// CHECK-INST: ushllb	z0.s, z0.h, #0
+// CHECK-ENCODING: [0x00,0xa8,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a8 10 45 <unknown>
+
+ushllb     z31.s, z31.h, #15
+// CHECK-INST: ushllb	z31.s, z31.h, #15
+// CHECK-ENCODING: [0xff,0xab,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ab 1f 45 <unknown>
+
+ushllb     z0.d, z0.s, #0
+// CHECK-INST: ushllb	z0.d, z0.s, #0
+// CHECK-ENCODING: [0x00,0xa8,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a8 40 45 <unknown>
+
+ushllb     z31.d, z31.s, #31
+// CHECK-INST: ushllb	z31.d, z31.s, #31
+// CHECK-ENCODING: [0xff,0xab,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff ab 5f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ushllt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ushllt-diagnostics.s
new file mode 100644
index 0000000000000..52dc68cdc9934
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ushllt-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+ushllt z18.h, z28.b, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: ushllt z18.h, z28.b, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z1.h, z9.b, #8
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]
+// CHECK-NEXT: ushllt z1.h, z9.b, #8
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z21.s, z2.h, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: ushllt z21.s, z2.h, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z14.s, z30.h, #16
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 15]
+// CHECK-NEXT: ushllt z14.s, z30.h, #16
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z6.d, z12.s, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: ushllt z6.d, z12.s, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z23.d, z19.s, #32
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 31]
+// CHECK-NEXT: ushllt z23.d, z19.s, #32
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+ushllt z0.b, z0.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllt z0.b, z0.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z0.h, z0.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllt z0.h, z0.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z0.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllt z0.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ushllt z0.d, z0.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ushllt z0.d, z0.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+ushllt     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ushllt     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.d, p0/m, z6.d
+ushllt     z31.d, z31.s, #31
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ushllt     z31.d, z31.s, #31
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ushllt.s b/llvm/test/MC/AArch64/SVE2/ushllt.s
new file mode 100644
index 0000000000000..a3be73226c5cb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ushllt.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ushllt     z0.h, z0.b, #0
+// CHECK-INST: ushllt	z0.h, z0.b, #0
+// CHECK-ENCODING: [0x00,0xac,0x08,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ac 08 45 <unknown>
+
+ushllt     z31.h, z31.b, #7
+// CHECK-INST: ushllt	z31.h, z31.b, #7
+// CHECK-ENCODING: [0xff,0xaf,0x0f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff af 0f 45 <unknown>
+
+ushllt     z0.s, z0.h, #0
+// CHECK-INST: ushllt	z0.s, z0.h, #0
+// CHECK-ENCODING: [0x00,0xac,0x10,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ac 10 45 <unknown>
+
+ushllt     z31.s, z31.h, #15
+// CHECK-INST: ushllt	z31.s, z31.h, #15
+// CHECK-ENCODING: [0xff,0xaf,0x1f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff af 1f 45 <unknown>
+
+ushllt     z0.d, z0.s, #0
+// CHECK-INST: ushllt	z0.d, z0.s, #0
+// CHECK-ENCODING: [0x00,0xac,0x40,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 ac 40 45 <unknown>
+
+ushllt     z31.d, z31.s, #31
+// CHECK-INST: ushllt	z31.d, z31.s, #31
+// CHECK-ENCODING: [0xff,0xaf,0x5f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff af 5f 45 <unknown>

From c4ed601bd9f5c7d6138330317bfc0ce519198085 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Tue, 28 May 2019 08:51:59 +0000
Subject: [PATCH 0327/1176] [AArch64][SVE2] Asm: support SVE2 Histogram
 Computation Groups

Summary:
Patch adds support for the following instructions:

SVE2 histogram generation (segment):
    * HISTSEG

SVE2 histogram generation (vector):
    * HISTCNT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62306

llvm-svn: 361796
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  6 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 47 ++++++++++++++++++
 .../MC/AArch64/SVE2/histcnt-diagnostics.s     | 49 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/histcnt.s           | 21 ++++++++
 .../MC/AArch64/SVE2/histseg-diagnostics.s     | 36 ++++++++++++++
 llvm/test/MC/AArch64/SVE2/histseg.s           | 15 ++++++
 6 files changed, 174 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/histcnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/histcnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/histseg-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/histseg.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 739fb2a73b1ad..05470f5e364ef 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1283,6 +1283,12 @@ let Predicates = [HasSVE2] in {
   defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
   defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
 
+  // SVE2 histogram generation (segment)
+  def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
+
+  // SVE2 histogram generation (vector)
+  defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 61155c96c27d3..4d9c5a8262a69 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5239,3 +5239,50 @@ multiclass sve2_char_match<bit opc, string asm> {
   def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
   def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
 }
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Segment Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_segment<string asm>
+: I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b101000;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Vector Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Pg/z, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<3> Pg;
+  bits<5> Zm;
+  let Inst{31-23} = 0b010001011;
+  let Inst{22}    = sz;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-13} = 0b110;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_hist_gen_vector<string asm> {
+  def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
+  def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
+}
diff --git a/llvm/test/MC/AArch64/SVE2/histcnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/histcnt-diagnostics.s
new file mode 100644
index 0000000000000..9bbbd6b927c27
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/histcnt-diagnostics.s
@@ -0,0 +1,49 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+histcnt z0.b, p0/z, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: histcnt z0.b, p0/z, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+histcnt z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: histcnt z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+histcnt z0.s, p0/m, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: histcnt z0.s, p0/m, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+histcnt z0.s, p8/z, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: histcnt z0.s, p8/z, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+histcnt z0.s, p7/z, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: histcnt z0.s, p7/z, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+histcnt z0.s, p7/z, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: histcnt z0.s, p7/z, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/histcnt.s b/llvm/test/MC/AArch64/SVE2/histcnt.s
new file mode 100644
index 0000000000000..e5ac009e2b642
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/histcnt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+histcnt z0.s, p0/z, z1.s, z2.s
+// CHECK-INST: histcnt z0.s, p0/z, z1.s, z2.s
+// CHECK-ENCODING: [0x20,0xc0,0xa2,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 a2 45 <unknown>
+
+histcnt z29.d, p7/z, z30.d, z31.d
+// CHECK-INST: histcnt z29.d, p7/z, z30.d, z31.d
+// CHECK-ENCODING: [0xdd,0xdf,0xff,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd df ff 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/histseg-diagnostics.s b/llvm/test/MC/AArch64/SVE2/histseg-diagnostics.s
new file mode 100644
index 0000000000000..ce445160e8ab4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/histseg-diagnostics.s
@@ -0,0 +1,36 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+histseg z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: histseg z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+histseg z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: histseg z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+histseg z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: histseg z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+histseg z31.b, z30.b, z29.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: histseg z31.b, z30.b, z29.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.b, p0/m, z6.b
+histseg z31.b, z30.b, z29.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: histseg z31.b, z30.b, z29.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/histseg.s b/llvm/test/MC/AArch64/SVE2/histseg.s
new file mode 100644
index 0000000000000..be670d01b3104
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/histseg.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+histseg z0.b, z1.b, z31.b
+// CHECK-INST: histseg z0.b, z1.b, z31.b
+// CHECK-ENCODING: [0x20,0xa0,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 3f 45 <unknown>

From 8e91dd7934659df87e67e2ac5c78d583fce15fe0 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Tue, 28 May 2019 09:13:17 +0000
Subject: [PATCH 0328/1176] [AArch64][SVE2] Asm: support SVE2 Crypto Extensions
 Group

Summary:
Patch adds support for the following instructions:

SVE2 crypto constructive binary operations:
    * SM4EKEY, RAX1

SVE2 crypto destructive binary operations:
    * AESE, AESD, SM4E

SVE2 crypto unary operations:
    * AESMC, AESIMC

AESE, AESD, AESMC and AESIMC are enabled with +sve2-aes.  SM4E and
SM4EKEY are enabled with +sve2-sm4. RAX1 is enabled with +sve2-sha3.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62307

llvm-svn: 361797
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 20 ++++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 51 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/aesd-diagnostics.s  | 45 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/aesd.s              | 15 ++++++
 llvm/test/MC/AArch64/SVE2/aese-diagnostics.s  | 45 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/aese.s              | 15 ++++++
 .../test/MC/AArch64/SVE2/aesimc-diagnostics.s | 45 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/aesimc.s            | 21 ++++++++
 llvm/test/MC/AArch64/SVE2/aesmc-diagnostics.s | 45 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/aesmc.s             | 21 ++++++++
 llvm/test/MC/AArch64/SVE2/rax1-diagnostics.s  | 36 +++++++++++++
 llvm/test/MC/AArch64/SVE2/rax1.s              | 15 ++++++
 llvm/test/MC/AArch64/SVE2/sm4e-diagnostics.s  | 45 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/sm4e.s              | 15 ++++++
 .../MC/AArch64/SVE2/sm4ekey-diagnostics.s     | 36 +++++++++++++
 llvm/test/MC/AArch64/SVE2/sm4ekey.s           | 15 ++++++
 16 files changed, 485 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesd-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesd.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aese-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aese.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesimc-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesimc.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesmc-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/aesmc.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rax1-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/rax1.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sm4e-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sm4e.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sm4ekey-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/sm4ekey.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 05470f5e364ef..1635a602539ce 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1298,6 +1298,14 @@ let Predicates = [HasSVE2] in {
 }
 
 let Predicates = [HasSVE2AES] in {
+  // SVE2 crypto destructive binary operations
+  def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
+  def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
+
+  // SVE2 crypto unary operations
+  def AESMC_ZZ_B  : sve2_crypto_unary_op<0b0, "aesmc">;
+  def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
+
   // PMULLB and PMULLT instructions which operate with 64-bit source and
   // 128-bit destination elements are enabled with crypto extensions, similar
   // to NEON PMULL2 instruction.
@@ -1307,6 +1315,18 @@ let Predicates = [HasSVE2AES] in {
                                          ZPR128, ZPR64, ZPR64>;
 }
 
+let Predicates = [HasSVE2SM4] in {
+  // SVE2 crypto constructive binary operations
+  def SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32>;
+  // SVE2 crypto destructive binary operations
+  def SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32>;
+}
+
+let Predicates = [HasSVE2SHA3] in {
+  // SVE2 crypto constructive binary operations
+  def RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1",    ZPR64>;
+}
+
 let Predicates = [HasSVE2BitPerm] in {
   // SVE2 bitwise permute
   defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 4d9c5a8262a69..25a25e7d38ce2 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5286,3 +5286,54 @@ multiclass sve2_hist_gen_vector<string asm> {
   def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
   def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
 }
+
+//===----------------------------------------------------------------------===//
+// SVE2 Crypto Extensions Group
+//===----------------------------------------------------------------------===//
+
+class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b11110;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm),
+  asm, "\t$Zdn, $_Zdn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  let Inst{31-17} = 0b010001010010001;
+  let Inst{16}    = opc{1};
+  let Inst{15-11} = 0b11100;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+}
+
+class sve2_crypto_unary_op<bit opc, string asm>
+: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn),
+  asm, "\t$Zdn, $_Zdn",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  let Inst{31-11} = 0b010001010010000011100;
+  let Inst{10}    = opc;
+  let Inst{9-5}   = 0b00000;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+}
diff --git a/llvm/test/MC/AArch64/SVE2/aesd-diagnostics.s b/llvm/test/MC/AArch64/SVE2/aesd-diagnostics.s
new file mode 100644
index 0000000000000..ad90480c3b6e9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesd-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+aesd z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: aesd z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+aesd z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesd z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesd z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesd z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesd z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesd z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+aesd z0.b, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesd z0.b, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+aesd z0.b, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesd z0.b, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/aesd.s b/llvm/test/MC/AArch64/SVE2/aesd.s
new file mode 100644
index 0000000000000..656664e900cd4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesd.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+aesd z0.b, z0.b, z31.b
+// CHECK-INST: aesd z0.b, z0.b, z31.b
+// CHECK-ENCODING: [0xe0,0xe7,0x22,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: e0 e7 22 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/aese-diagnostics.s b/llvm/test/MC/AArch64/SVE2/aese-diagnostics.s
new file mode 100644
index 0000000000000..05114774c1864
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aese-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+aese z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: aese z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+aese z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aese z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aese z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aese z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aese z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aese z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+aese z0.b, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aese z0.b, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+aese z0.b, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aese z0.b, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/aese.s b/llvm/test/MC/AArch64/SVE2/aese.s
new file mode 100644
index 0000000000000..5def632228be4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aese.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+aese z0.b, z0.b, z31.b
+// CHECK-INST: aese z0.b, z0.b, z31.b
+// CHECK-ENCODING: [0xe0,0xe3,0x22,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: e0 e3 22 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/aesimc-diagnostics.s b/llvm/test/MC/AArch64/SVE2/aesimc-diagnostics.s
new file mode 100644
index 0000000000000..1e334863e121e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesimc-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+aesimc z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: aesimc z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+aesimc z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesimc z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesimc z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesimc z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesimc z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesimc z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+aesimc z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesimc z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+aesimc z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesimc z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/aesimc.s b/llvm/test/MC/AArch64/SVE2/aesimc.s
new file mode 100644
index 0000000000000..224d56bcfc8ff
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesimc.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+aesimc z0.b, z0.b
+// CHECK-INST: aesimc z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe4,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: 00 e4 20 45 <unknown>
+
+aesimc z31.b, z31.b
+// CHECK-INST: aesimc z31.b, z31.b
+// CHECK-ENCODING: [0x1f,0xe4,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: 1f e4 20 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/aesmc-diagnostics.s b/llvm/test/MC/AArch64/SVE2/aesmc-diagnostics.s
new file mode 100644
index 0000000000000..7a8bd78acdc01
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesmc-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+aesmc z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: aesmc z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+aesmc z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesmc z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesmc z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesmc z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+aesmc z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: aesmc z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.b, p0/z, z7.b
+aesmc z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesmc z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+aesmc z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: aesmc z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/aesmc.s b/llvm/test/MC/AArch64/SVE2/aesmc.s
new file mode 100644
index 0000000000000..c7951d63edfb9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/aesmc.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-aes - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-aes < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+aesmc z0.b, z0.b
+// CHECK-INST: aesmc z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe0,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: 00 e0 20 45 <unknown>
+
+aesmc z31.b, z31.b
+// CHECK-INST: aesmc z31.b, z31.b
+// CHECK-ENCODING: [0x1f,0xe0,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2-aes
+// CHECK-UNKNOWN: 1f e0 20 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/rax1-diagnostics.s b/llvm/test/MC/AArch64/SVE2/rax1-diagnostics.s
new file mode 100644
index 0000000000000..0a99c22759e46
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rax1-diagnostics.s
@@ -0,0 +1,36 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sha3  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+rax1 z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rax1 z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rax1 z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rax1 z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+rax1 z0.s, z0.s, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: rax1 z0.s, z0.s, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+rax1 z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rax1 z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+rax1 z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: rax1 z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/rax1.s b/llvm/test/MC/AArch64/SVE2/rax1.s
new file mode 100644
index 0000000000000..44634d45bde2e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/rax1.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sha3 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sha3 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-sha3 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sha3 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+rax1 z0.d, z1.d, z31.d
+// CHECK-INST: rax1 z0.d, z1.d, z31.d
+// CHECK-ENCODING: [0x20,0xf4,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2-sha3
+// CHECK-UNKNOWN: 20 f4 3f 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sm4e-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sm4e-diagnostics.s
new file mode 100644
index 0000000000000..521fba458e8db
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sm4e-diagnostics.s
@@ -0,0 +1,45 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sm4  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+sm4e z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: sm4e z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sm4e z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4e z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sm4e z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4e z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sm4e z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4e z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+sm4e z0.s, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sm4e z0.s, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sm4e z0.s, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sm4e z0.s, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sm4e.s b/llvm/test/MC/AArch64/SVE2/sm4e.s
new file mode 100644
index 0000000000000..c11cee5081ba5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sm4e.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sm4 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-sm4 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sm4e z0.s, z0.s, z31.s
+// CHECK-INST: sm4e z0.s, z0.s, z31.s
+// CHECK-ENCODING: [0xe0,0xe3,0x23,0x45]
+// CHECK-ERROR: instruction requires: sve2-sm4
+// CHECK-UNKNOWN: e0 e3 23 45 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sm4ekey-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sm4ekey-diagnostics.s
new file mode 100644
index 0000000000000..f14b577767840
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sm4ekey-diagnostics.s
@@ -0,0 +1,36 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sm4  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+sm4ekey z0.b, z0.b, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4ekey z0.b, z0.b, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sm4ekey z0.h, z0.h, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4ekey z0.h, z0.h, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sm4ekey z0.d, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sm4ekey z0.d, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+sm4ekey z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sm4ekey z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sm4ekey z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sm4ekey z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/sm4ekey.s b/llvm/test/MC/AArch64/SVE2/sm4ekey.s
new file mode 100644
index 0000000000000..555eb09da748a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/sm4ekey.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2-sm4 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2-sm4 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2-sm4 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+sm4ekey z0.s, z1.s, z31.s
+// CHECK-INST: sm4ekey z0.s, z1.s, z31.s
+// CHECK-ENCODING: [0x20,0xf0,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2-sm4
+// CHECK-UNKNOWN: 20 f0 3f 45 <unknown>

From 536a62d00784ffdd897285b9e31152bb22d3893b Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 09:20:57 +0000
Subject: [PATCH 0329/1176] [clangd] Rename -run-synchronously to -sync

llvm-svn: 361798
---
 .../delimited-input-comment-at-the-end.test   |  2 +-
 .../clangd/test/input-mirror.test             |  2 +-
 clang-tools-extra/clangd/test/protocol.test   |  4 +--
 .../test/spaces-in-delimited-input.test       |  4 +--
 clang-tools-extra/clangd/test/too_large.test  |  2 +-
 clang-tools-extra/clangd/tool/ClangdMain.cpp  | 31 +++++++++----------
 6 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/clang-tools-extra/clangd/test/delimited-input-comment-at-the-end.test b/clang-tools-extra/clangd/test/delimited-input-comment-at-the-end.test
index 34a248dfde6d0..bbbd72f8c59f6 100644
--- a/clang-tools-extra/clangd/test/delimited-input-comment-at-the-end.test
+++ b/clang-tools-extra/clangd/test/delimited-input-comment-at-the-end.test
@@ -1,4 +1,4 @@
-# RUN: clangd -input-style=delimited -run-synchronously -input-mirror-file %t < %s
+# RUN: clangd -input-style=delimited -sync -input-mirror-file %t < %s
 # RUN: grep '{"jsonrpc":"2.0","id":3,"method":"exit"}' %t
 #
 # RUN: clangd -lit-test -input-mirror-file %t < %s
diff --git a/clang-tools-extra/clangd/test/input-mirror.test b/clang-tools-extra/clangd/test/input-mirror.test
index 52845621e9e57..a34a4a08cf60c 100644
--- a/clang-tools-extra/clangd/test/input-mirror.test
+++ b/clang-tools-extra/clangd/test/input-mirror.test
@@ -1,4 +1,4 @@
-# RUN: clangd -pretty -run-synchronously -input-mirror-file %t < %s
+# RUN: clangd -pretty -sync -input-mirror-file %t < %s
 # Note that we have to use '-b' as -input-mirror-file does not have a newline at the end of file.
 # RUN: diff -b %t %s
 # It is absolutely vital that this file has CRLF line endings.
diff --git a/clang-tools-extra/clangd/test/protocol.test b/clang-tools-extra/clangd/test/protocol.test
index c218763de206e..3e16c9ec9b334 100644
--- a/clang-tools-extra/clangd/test/protocol.test
+++ b/clang-tools-extra/clangd/test/protocol.test
@@ -1,5 +1,5 @@
-# RUN: not clangd -pretty -run-synchronously -enable-test-uri-scheme < %s | FileCheck -strict-whitespace %s
-# RUN: not clangd -pretty -run-synchronously -enable-test-uri-scheme < %s 2>&1 | FileCheck -check-prefix=STDERR %s
+# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s | FileCheck -strict-whitespace %s
+# RUN: not clangd -pretty -sync -enable-test-uri-scheme < %s 2>&1 | FileCheck -check-prefix=STDERR %s
 # vim: fileformat=dos
 # It is absolutely vital that this file has CRLF line endings.
 #
diff --git a/clang-tools-extra/clangd/test/spaces-in-delimited-input.test b/clang-tools-extra/clangd/test/spaces-in-delimited-input.test
index 9636425ea373e..dc2e2f5ea0f64 100644
--- a/clang-tools-extra/clangd/test/spaces-in-delimited-input.test
+++ b/clang-tools-extra/clangd/test/spaces-in-delimited-input.test
@@ -1,5 +1,5 @@
-# RUN: clangd -input-style=delimited -run-synchronously < %s 2>&1 | FileCheck %s
-# RUN: clangd -lit-test -run-synchronously < %s 2>&1 | FileCheck %s
+# RUN: clangd -input-style=delimited -sync < %s 2>&1 | FileCheck %s
+# RUN: clangd -lit-test -sync < %s 2>&1 | FileCheck %s
 #
 {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
 
diff --git a/clang-tools-extra/clangd/test/too_large.test b/clang-tools-extra/clangd/test/too_large.test
index 7b846c37f0804..7df981e794207 100644
--- a/clang-tools-extra/clangd/test/too_large.test
+++ b/clang-tools-extra/clangd/test/too_large.test
@@ -1,4 +1,4 @@
-# RUN: not clangd -run-synchronously < %s 2>&1 | FileCheck -check-prefix=STDERR %s
+# RUN: not clangd -sync < %s 2>&1 | FileCheck -check-prefix=STDERR %s
 # vim: fileformat=dos
 # It is absolutely vital that this file has CRLF line endings.
 #
diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 44ca8a3363a29..106b7d6a3b9c1 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -96,8 +96,8 @@ static llvm::cl::opt<Logger::Level> LogLevel(
 
 static llvm::cl::opt<bool>
     Test("lit-test",
-         llvm::cl::desc("Abbreviation for -input-style=delimited -pretty "
-                        "-run-synchronously -enable-test-scheme -log=verbose. "
+         llvm::cl::desc("Abbreviation for -input-style=delimited -pretty -sync "
+                        "-enable-test-scheme -log=verbose."
                         "Intended to simplify lit tests."),
          llvm::cl::init(false), llvm::cl::Hidden);
 
@@ -122,10 +122,9 @@ static llvm::cl::opt<int> LimitResults(
                    "0 means no limit. (default=100)"),
     llvm::cl::init(100));
 
-static llvm::cl::opt<bool> RunSynchronously(
-    "run-synchronously",
-    llvm::cl::desc("Parse on main thread. If set, -j is ignored"),
-    llvm::cl::init(false), llvm::cl::Hidden);
+static llvm::cl::opt<bool>
+    Sync("sync", llvm::cl::desc("Parse on main thread. If set, -j is ignored"),
+         llvm::cl::init(false), llvm::cl::Hidden);
 
 static llvm::cl::opt<Path>
     ResourceDir("resource-dir",
@@ -229,10 +228,10 @@ static llvm::cl::opt<std::string> ClangTidyChecks(
         ".clang-tidy files). Only meaningful when -clang-tidy flag is on."),
     llvm::cl::init(""));
 
-static llvm::cl::opt<bool> EnableClangTidy(
-    "clang-tidy",
-    llvm::cl::desc("Enable clang-tidy diagnostics."),
-    llvm::cl::init(true));
+static llvm::cl::opt<bool>
+    EnableClangTidy("clang-tidy",
+                    llvm::cl::desc("Enable clang-tidy diagnostics."),
+                    llvm::cl::init(true));
 
 static llvm::cl::opt<std::string>
     FallbackStyle("fallback-style",
@@ -343,7 +342,7 @@ int main(int argc, char *argv[]) {
       "\n\thttps://clang.llvm.org/extra/clangd.html"
       "\n\thttps://microsoft.github.io/language-server-protocol/");
   if (Test) {
-    RunSynchronously = true;
+    Sync = true;
     InputStyle = JSONStreamStyle::Delimited;
     LogLevel = Logger::Verbose;
     PrettyPrint = true;
@@ -355,15 +354,15 @@ int main(int argc, char *argv[]) {
         "test", "Test scheme for clangd lit tests.");
   }
 
-  if (!RunSynchronously && WorkerThreadsCount == 0) {
+  if (!Sync && WorkerThreadsCount == 0) {
     llvm::errs() << "A number of worker threads cannot be 0. Did you mean to "
-                    "specify -run-synchronously?";
+                    "specify -sync?";
     return 1;
   }
 
-  if (RunSynchronously) {
+  if (Sync) {
     if (WorkerThreadsCount.getNumOccurrences())
-      llvm::errs() << "Ignoring -j because -run-synchronously is set.\n";
+      llvm::errs() << "Ignoring -j because -sync is set.\n";
     WorkerThreadsCount = 0;
   }
   if (FallbackStyle.getNumOccurrences())
@@ -461,7 +460,7 @@ int main(int argc, char *argv[]) {
       if (auto Idx = loadIndex(IndexFile, /*UseDex=*/true))
         Placeholder->reset(std::move(Idx));
     });
-    if (RunSynchronously)
+    if (Sync)
       AsyncIndexLoad.wait();
   }
   Opts.StaticIndex = StaticIdx.get();

From a3388e5f9e1f60a2d82b0cf3121e1dacdfd766b3 Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Tue, 28 May 2019 09:29:05 +0000
Subject: [PATCH 0330/1176] [CMake] Folder structure for generated Xcode
 project to cover more targets

llvm-svn: 361799
---
 lldb/CMakeLists.txt                                       | 2 ++
 lldb/cmake/modules/AddLLDB.cmake                          | 6 +++++-
 lldb/cmake/modules/LLDBConfig.cmake                       | 2 +-
 lldb/cmake/modules/LLDBStandalone.cmake                   | 8 ++++++++
 lldb/lit/CMakeLists.txt                                   | 2 +-
 lldb/source/API/CMakeLists.txt                            | 1 +
 lldb/test/CMakeLists.txt                                  | 4 ++--
 lldb/tools/debugserver/source/CMakeLists.txt              | 1 +
 lldb/tools/debugserver/source/MacOSX/CMakeLists.txt       | 2 ++
 .../debugserver/source/MacOSX/DarwinLog/CMakeLists.txt    | 2 ++
 lldb/tools/driver/CMakeLists.txt                          | 2 ++
 lldb/unittests/CMakeLists.txt                             | 2 +-
 lldb/unittests/tools/lldb-mi/utils/CMakeLists.txt         | 1 +
 lldb/unittests/tools/lldb-server/CMakeLists.txt           | 1 +
 lldb/utils/lit-cpuid/CMakeLists.txt                       | 1 +
 lldb/utils/lldb-dotest/CMakeLists.txt                     | 1 +
 16 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index dc7e1a0ae5349..afcf6bb5fb2bd 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -153,6 +153,7 @@ if(LLDB_INCLUDE_TESTS)
 
   add_custom_target(lldb-test-deps)
   add_dependencies(lldb-test-deps ${LLDB_TEST_DEPS})
+  set_target_properties(lldb-test-deps PROPERTIES FOLDER "lldb misc")
 
   add_subdirectory(test)
   add_subdirectory(unittests)
@@ -193,6 +194,7 @@ if (NOT LLDB_DISABLE_PYTHON)
       set(readline_dep readline)
     endif()
     add_dependencies(finish_swig swig_wrapper liblldb lldb-argdumper ${readline_dep})
+    set_target_properties(finish_swig swig_wrapper PROPERTIES FOLDER "lldb misc")
 
     # Ensure we do the python post-build step when building lldb.
     add_dependencies(lldb finish_swig)
diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake
index 3e82200f2065a..e35fc9e9be584 100644
--- a/lldb/cmake/modules/AddLLDB.cmake
+++ b/lldb/cmake/modules/AddLLDB.cmake
@@ -100,7 +100,11 @@ function(add_lldb_library name)
   # Add in any extra C++ compilation flags for this library.
   target_compile_options(${name} PRIVATE ${PARAM_EXTRA_CXXFLAGS})
 
-  set_target_properties(${name} PROPERTIES FOLDER "lldb libraries")
+  if(PARAM_PLUGIN)
+    set_target_properties(${name} PROPERTIES FOLDER "lldb plugins")
+  else()
+    set_target_properties(${name} PROPERTIES FOLDER "lldb libraries")
+  endif()
 endfunction(add_lldb_library)
 
 function(add_lldb_executable name)
diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index 6c5f0366d0d37..23182fd154f57 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -364,7 +364,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
     )
 
   add_custom_target(lldb-headers)
-  set_target_properties(lldb-headers PROPERTIES FOLDER "Misc")
+  set_target_properties(lldb-headers PROPERTIES FOLDER "lldb misc")
 
   if (NOT CMAKE_CONFIGURATION_TYPES)
     add_llvm_install_targets(install-lldb-headers
diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 6accd66c43b95..604544a01ee6b 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -88,6 +88,14 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
   set(LLVM_INCLUDE_TESTS ON CACHE INTERNAL "")
 
+  option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON)
+  if(LLVM_USE_FOLDERS)
+    set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+  endif()
+
+  set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "lldb misc")
+  set_target_properties(intrinsics_gen PROPERTIES FOLDER "lldb misc")
+
   set(CMAKE_INCLUDE_CURRENT_DIR ON)
   include_directories(
     "${CMAKE_BINARY_DIR}/include"
diff --git a/lldb/lit/CMakeLists.txt b/lldb/lit/CMakeLists.txt
index 8e8d173d827b8..f7bb423b60fb0 100644
--- a/lldb/lit/CMakeLists.txt
+++ b/lldb/lit/CMakeLists.txt
@@ -68,7 +68,7 @@ add_lit_testsuite(check-lldb-lit "Running lldb lit test suite"
   DEPENDS ${LLDB_TEST_DEPS}
   )
 
-set_target_properties(check-lldb-lit PROPERTIES FOLDER "LLDB tests")
+set_target_properties(check-lldb-lit PROPERTIES FOLDER "lldb tests")
 
 # If we're building with an in-tree clang, then list clang as a dependency
 # to run tests.
diff --git a/lldb/source/API/CMakeLists.txt b/lldb/source/API/CMakeLists.txt
index bb17f76d7fa1f..8d2b7f06bef0d 100644
--- a/lldb/source/API/CMakeLists.txt
+++ b/lldb/source/API/CMakeLists.txt
@@ -144,6 +144,7 @@ if (NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
     MESSAGE("-- Symbols (liblldb): exporting all symbols from the lldb and lldb_private namespaces")
     add_llvm_symbol_exports(liblldb ${CMAKE_CURRENT_SOURCE_DIR}/liblldb-private.exports)
   endif()
+  set_target_properties(liblldb_exports PROPERTIES FOLDER "lldb misc")
 endif()
 
 if ( CMAKE_SYSTEM_NAME MATCHES "Windows" )
diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt
index ea9ba2e80089a..037222e04477b 100644
--- a/lldb/test/CMakeLists.txt
+++ b/lldb/test/CMakeLists.txt
@@ -110,13 +110,13 @@ add_python_test_target(check-lldb-single
   "--no-multiprocess;${LLDB_DOTEST_ARGS}"
   "Testing LLDB with args: ${LLDB_DOTEST_ARGS}"
   )
+set_target_properties(check-lldb-single PROPERTIES FOLDER "lldb misc")
 
 # If tests crash cause LLDB to crash, or things are otherwise unstable, or if machine-parsable
 # output is desired (i.e. in continuous integration contexts) check-lldb-single is a better target.
 add_custom_target(check-lldb)
-
-# Make check-lldb depend on all test dependencies.
 add_dependencies(check-lldb lldb-test-deps)
+set_target_properties(check-lldb PROPERTIES FOLDER "lldb misc")
 
 # If we're building with an in-tree clang, then list clang as a dependency
 # to run tests.
diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt
index ad7a3af915ce9..2b8e737d536b9 100644
--- a/lldb/tools/debugserver/source/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/CMakeLists.txt
@@ -236,6 +236,7 @@ if(build_and_sign_debugserver)
     ${DEBUGSERVER_VERS_GENERATED_FILE})
 
   add_library(lldbDebugserverCommon ${lldbDebugserverCommonSources})
+  set_target_properties(lldbDebugserverCommon PROPERTIES FOLDER "lldb libraries/debugserver")
 
   target_link_libraries(lldbDebugserverCommon
                         INTERFACE ${COCOA_LIBRARY}
diff --git a/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt b/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt
index 28877d122d943..7ad4a06a4d361 100644
--- a/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/MacOSX/CMakeLists.txt
@@ -21,3 +21,5 @@ include_directories(${LLDB_SOURCE_DIR}/tools/debugserver/source)
 add_library(lldbDebugserverArchSupport
   ${SOURCES}
   )
+
+set_target_properties(lldbDebugserverArchSupport PROPERTIES FOLDER "lldb libraries/debugserver")
diff --git a/lldb/tools/debugserver/source/MacOSX/DarwinLog/CMakeLists.txt b/lldb/tools/debugserver/source/MacOSX/DarwinLog/CMakeLists.txt
index dffa357f1e680..71abb36358aaf 100644
--- a/lldb/tools/debugserver/source/MacOSX/DarwinLog/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/MacOSX/DarwinLog/CMakeLists.txt
@@ -13,3 +13,5 @@ add_library(lldbDebugserverDarwin_DarwinLog
   LogMessage.cpp
   LogMessageOsLog.cpp
   )
+
+set_target_properties(lldbDebugserverDarwin_DarwinLog PROPERTIES FOLDER "lldb libraries/debugserver")
diff --git a/lldb/tools/driver/CMakeLists.txt b/lldb/tools/driver/CMakeLists.txt
index e6740d8dd5343..1f8c469e08c86 100644
--- a/lldb/tools/driver/CMakeLists.txt
+++ b/lldb/tools/driver/CMakeLists.txt
@@ -28,6 +28,8 @@ add_dependencies(lldb
   ${tablegen_deps}
 )
 
+set_target_properties(LLDBOptionsTableGen PROPERTIES FOLDER "lldb misc")
+
 if(LLDB_BUILD_FRAMEWORK)
   lldb_setup_framework_rpaths_in_tool(lldb)
 endif()
diff --git a/lldb/unittests/CMakeLists.txt b/lldb/unittests/CMakeLists.txt
index 4e4eee6fcceaa..311f47b1b0ec7 100644
--- a/lldb/unittests/CMakeLists.txt
+++ b/lldb/unittests/CMakeLists.txt
@@ -1,5 +1,5 @@
 add_custom_target(LLDBUnitTests)
-set_target_properties(LLDBUnitTests PROPERTIES FOLDER "LLDB tests")
+set_target_properties(LLDBUnitTests PROPERTIES FOLDER "lldb tests")
 
 include_directories(${LLDB_SOURCE_ROOT})
 include_directories(${LLDB_PROJECT_ROOT}/unittests)
diff --git a/lldb/unittests/tools/lldb-mi/utils/CMakeLists.txt b/lldb/unittests/tools/lldb-mi/utils/CMakeLists.txt
index d6fb56abe5865..909c005dcab72 100644
--- a/lldb/unittests/tools/lldb-mi/utils/CMakeLists.txt
+++ b/lldb/unittests/tools/lldb-mi/utils/CMakeLists.txt
@@ -10,3 +10,4 @@ add_lldb_unittest(LLDBMiUtilTests
   )
 
 target_sources(LLDBMiUtilTests PRIVATE $<TARGET_OBJECTS:lldb-mi-utils>)
+set_target_properties(lldb-mi-utils PROPERTIES FOLDER "lldb libraries")
diff --git a/lldb/unittests/tools/lldb-server/CMakeLists.txt b/lldb/unittests/tools/lldb-server/CMakeLists.txt
index 60616c93153fd..3bae69bfa4a12 100644
--- a/lldb/unittests/tools/lldb-server/CMakeLists.txt
+++ b/lldb/unittests/tools/lldb-server/CMakeLists.txt
@@ -7,6 +7,7 @@ function(add_lldb_test_executable test_name)
   set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir})
   list(APPEND ALL_LLDB_TEST_EXECUTABLES ${test_name})
   set(ALL_LLDB_TEST_EXECUTABLES ${ALL_LLDB_TEST_EXECUTABLES} PARENT_SCOPE)
+  set_target_properties(${test_name} PROPERTIES FOLDER "lldb tests")
 endfunction()
 
 add_lldb_test_executable(thread_inferior inferior/thread_inferior.cpp)
diff --git a/lldb/utils/lit-cpuid/CMakeLists.txt b/lldb/utils/lit-cpuid/CMakeLists.txt
index b3af817f3e318..bc9d31309e56a 100644
--- a/lldb/utils/lit-cpuid/CMakeLists.txt
+++ b/lldb/utils/lit-cpuid/CMakeLists.txt
@@ -3,3 +3,4 @@ add_llvm_utility(lit-cpuid
   )
 
 target_link_libraries(lit-cpuid PRIVATE LLVMSupport)
+set_target_properties(lit-cpuid PROPERTIES FOLDER "lldb utils")
diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt
index f1d85a4a6c81f..d36d1a7e93dee 100644
--- a/lldb/utils/lldb-dotest/CMakeLists.txt
+++ b/lldb/utils/lldb-dotest/CMakeLists.txt
@@ -1,6 +1,7 @@
 # Make lldb-dotest a custom target.
 add_custom_target(lldb-dotest)
 add_dependencies(lldb-dotest ${LLDB_TEST_DEPS})
+set_target_properties(lldb-dotest PROPERTIES FOLDER "lldb utils")
 
 get_property(LLDB_DOTEST_ARGS GLOBAL PROPERTY LLDB_DOTEST_ARGS_PROPERTY)
 

From d12f48beda022da99e3033cc166ee53a4ebee772 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 09:31:27 +0000
Subject: [PATCH 0331/1176] [clangd] Never end command-line flag description
 with a period. NFC

For consistency. Not having a period at the end is much more common and
seems to be the preferred style for command-line options.

llvm-svn: 361800
---
 clang-tools-extra/clangd/tool/ClangdMain.cpp | 40 ++++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 106b7d6a3b9c1..90e00e0a26764 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -37,14 +37,14 @@ namespace clangd {
 // FIXME: remove this option when Dex is cheap enough.
 static llvm::cl::opt<bool>
     UseDex("use-dex-index",
-           llvm::cl::desc("Use experimental Dex dynamic index."),
+           llvm::cl::desc("Use experimental Dex dynamic index"),
            llvm::cl::init(true), llvm::cl::Hidden);
 
 static llvm::cl::opt<Path> CompileCommandsDir(
     "compile-commands-dir",
     llvm::cl::desc("Specify a path to look for compile_commands.json. If path "
                    "is invalid, clangd will look in the current directory and "
-                   "parent paths of each source file."));
+                   "parent paths of each source file"));
 
 static llvm::cl::opt<unsigned>
     WorkerThreadsCount("j",
@@ -59,10 +59,10 @@ static llvm::cl::opt<CompletionStyleFlag> CompletionStyle(
     llvm::cl::values(
         clEnumValN(Detailed, "detailed",
                    "One completion item for each semantically distinct "
-                   "completion, with full type information."),
+                   "completion, with full type information"),
         clEnumValN(Bundled, "bundled",
                    "Similar completion items (e.g. function overloads) are "
-                   "combined. Type information shown where possible.")),
+                   "combined. Type information shown where possible")),
     llvm::cl::init(Detailed));
 
 // FIXME: Flags are the wrong mechanism for user preferences.
@@ -98,12 +98,12 @@ static llvm::cl::opt<bool>
     Test("lit-test",
          llvm::cl::desc("Abbreviation for -input-style=delimited -pretty -sync "
                         "-enable-test-scheme -log=verbose."
-                        "Intended to simplify lit tests."),
+                        "Intended to simplify lit tests"),
          llvm::cl::init(false), llvm::cl::Hidden);
 
 static llvm::cl::opt<bool> EnableTestScheme(
     "enable-test-uri-scheme",
-    llvm::cl::desc("Enable 'test:' URI scheme. Only use in lit tests."),
+    llvm::cl::desc("Enable 'test:' URI scheme. Only use in lit tests"),
     llvm::cl::init(false), llvm::cl::Hidden);
 
 enum PCHStorageFlag { Disk, Memory };
@@ -119,7 +119,7 @@ static llvm::cl::opt<PCHStorageFlag> PCHStorage(
 static llvm::cl::opt<int> LimitResults(
     "limit-results",
     llvm::cl::desc("Limit the number of results returned by clangd. "
-                   "0 means no limit. (default=100)"),
+                   "0 means no limit (default=100)"),
     llvm::cl::init(100));
 
 static llvm::cl::opt<bool>
@@ -134,7 +134,7 @@ static llvm::cl::opt<Path>
 static llvm::cl::opt<Path> InputMirrorFile(
     "input-mirror-file",
     llvm::cl::desc(
-        "Mirror all LSP input to the specified file. Useful for debugging."),
+        "Mirror all LSP input to the specified file. Useful for debugging"),
     llvm::cl::init(""), llvm::cl::Hidden);
 
 static llvm::cl::opt<bool> EnableIndex(
@@ -142,7 +142,7 @@ static llvm::cl::opt<bool> EnableIndex(
     llvm::cl::desc(
         "Enable index-based features. By default, clangd maintains an index "
         "built from symbols in opened files. Global index support needs to "
-        "enabled separatedly."),
+        "enabled separatedly"),
     llvm::cl::init(true), llvm::cl::Hidden);
 
 static llvm::cl::opt<bool> AllScopesCompletion(
@@ -151,7 +151,7 @@ static llvm::cl::opt<bool> AllScopesCompletion(
         "If set to true, code completion will include index symbols that are "
         "not defined in the scopes (e.g. "
         "namespaces) visible from the code completion point. Such completions "
-        "can insert scope qualifiers."),
+        "can insert scope qualifiers"),
     llvm::cl::init(true));
 
 static llvm::cl::opt<bool> ShowOrigins(
@@ -167,7 +167,7 @@ static llvm::cl::opt<CodeCompleteOptions::IncludeInsertion> HeaderInsertion(
                    "Include what you use. "
                    "Insert the owning header for top-level symbols, unless the "
                    "header is already directly included or the symbol is "
-                   "forward-declared."),
+                   "forward-declared"),
         clEnumValN(
             CodeCompleteOptions::NeverInsert, "never",
             "Never insert #include directives as part of code completion")));
@@ -176,16 +176,16 @@ static llvm::cl::opt<bool> HeaderInsertionDecorators(
     "header-insertion-decorators",
     llvm::cl::desc("Prepend a circular dot or space before the completion "
                    "label, depending on whether "
-                   "an include line will be inserted or not."),
+                   "an include line will be inserted or not"),
     llvm::cl::init(true));
 
 static llvm::cl::opt<Path> IndexFile(
     "index-file",
     llvm::cl::desc(
         "Index file to build the static index. The file must have been created "
-        "by a compatible clangd-indexer.\n"
+        "by a compatible clangd-indexer\n"
         "WARNING: This option is experimental only, and will be removed "
-        "eventually. Don't rely on it."),
+        "eventually. Don't rely on it"),
     llvm::cl::init(""), llvm::cl::Hidden);
 
 static llvm::cl::opt<bool> EnableBackgroundIndex(
@@ -200,7 +200,7 @@ static llvm::cl::opt<int> BackgroundIndexRebuildPeriod(
     llvm::cl::desc(
         "If set to non-zero, the background index rebuilds the symbol index "
         "periodically every X milliseconds; otherwise, the "
-        "symbol index will be updated for each indexed file."),
+        "symbol index will be updated for each indexed file"),
     llvm::cl::init(5000), llvm::cl::Hidden);
 
 enum CompileArgsFrom { LSPCompileArgs, FilesystemCompileArgs };
@@ -218,19 +218,19 @@ static llvm::cl::opt<bool> EnableFunctionArgSnippets(
     "function-arg-placeholders",
     llvm::cl::desc("When disabled, completions contain only parentheses for "
                    "function calls. When enabled, completions also contain "
-                   "placeholders for method parameters."),
+                   "placeholders for method parameters"),
     llvm::cl::init(CodeCompleteOptions().EnableFunctionArgSnippets));
 
 static llvm::cl::opt<std::string> ClangTidyChecks(
     "clang-tidy-checks",
     llvm::cl::desc(
         "List of clang-tidy checks to run (this will override "
-        ".clang-tidy files). Only meaningful when -clang-tidy flag is on."),
+        ".clang-tidy files). Only meaningful when -clang-tidy flag is on"),
     llvm::cl::init(""));
 
 static llvm::cl::opt<bool>
     EnableClangTidy("clang-tidy",
-                    llvm::cl::desc("Enable clang-tidy diagnostics."),
+                    llvm::cl::desc("Enable clang-tidy diagnostics"),
                     llvm::cl::init(true));
 
 static llvm::cl::opt<std::string>
@@ -242,13 +242,13 @@ static llvm::cl::opt<std::string>
 static llvm::cl::opt<bool> SuggestMissingIncludes(
     "suggest-missing-includes",
     llvm::cl::desc("Attempts to fix diagnostic errors caused by missing "
-                   "includes using index."),
+                   "includes using index"),
     llvm::cl::init(true));
 
 static llvm::cl::opt<OffsetEncoding> ForceOffsetEncoding(
     "offset-encoding",
     llvm::cl::desc("Force the offsetEncoding used for character positions. "
-                   "This bypasses negotiation via client capabilities."),
+                   "This bypasses negotiation via client capabilities"),
     llvm::cl::values(clEnumValN(OffsetEncoding::UTF8, "utf-8",
                                 "Offsets are in UTF-8 bytes"),
                      clEnumValN(OffsetEncoding::UTF16, "utf-16",

From f57bd6bd23d6224a77b08cb37ee717f804c970d5 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Tue, 28 May 2019 09:36:52 +0000
Subject: [PATCH 0332/1176] [AArch64][SVE2] Asm: support SVE2 Floating Point
 Convert Group

Summary:
Patch adds support for the following intructions:

SVE2 floating-point convert precision:
    * FCVTXNT, FCVTNT, FCVTLT

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62382

llvm-svn: 361801
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  5 ++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 37 ++++++++++
 .../test/MC/AArch64/SVE2/fcvtlt-diagnostics.s | 69 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fcvtlt.s            | 21 ++++++
 .../test/MC/AArch64/SVE2/fcvtnt-diagnostics.s | 69 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fcvtnt.s            | 21 ++++++
 .../MC/AArch64/SVE2/fcvtxnt-diagnostics.s     | 74 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fcvtxnt.s           | 21 ++++++
 8 files changed, 317 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtlt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtlt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtnt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtxnt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtxnt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 1635a602539ce..4095c6d95822d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1289,6 +1289,11 @@ let Predicates = [HasSVE2] in {
   // SVE2 histogram generation (vector)
   defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
 
+  // SVE2 floating-point convert precision
+  defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
+  defm FCVTNT_ZPmZ  : sve2_fp_convert_down_narrow<"fcvtnt">;
+  defm FCVTLT_ZPmZ  : sve2_fp_convert_up_long<"fcvtlt">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 25a25e7d38ce2..699c21d867b72 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1435,6 +1435,43 @@ multiclass sve_fp_fcadd<string asm> {
   def _D : sve_fp_fcadd<0b11, asm, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Convert Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_convert_precision<bits<4> opc, string asm,
+                                ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
+  asm, "\t$Zd, $Pg/m, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<3> Pg;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = opc{3-2};
+  let Inst{21-18} = 0b0010;
+  let Inst{17-16} = opc{1-0};
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_fp_convert_down_narrow<string asm> {
+  def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
+  def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+}
+
+multiclass sve2_fp_convert_up_long<string asm> {
+  def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
+  def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_fp_convert_down_odd_rounding<string asm> {
+  def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Stack Allocation Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtlt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtlt-diagnostics.s
new file mode 100644
index 0000000000000..de9a1b9c9475c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtlt-diagnostics.s
@@ -0,0 +1,69 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvtlt z0.b, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.b, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtlt z0.h, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.h, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtlt z0.s, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.s, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtlt z0.d, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.d, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtlt z0.h, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.h, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtlt z0.q, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtlt z0.q, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fcvtlt z0.s, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fcvtlt z0.s, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fcvtlt z0.s, p8/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fcvtlt z0.s, p8/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+fcvtlt z0.s, p7/m, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtlt z0.s, p7/m, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtlt z0.s, p7/m, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtlt z0.s, p7/m, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtlt.s b/llvm/test/MC/AArch64/SVE2/fcvtlt.s
new file mode 100644
index 0000000000000..d5942ad447244
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtlt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fcvtlt z0.s, p0/m, z1.h
+// CHECK-INST: fcvtlt z0.s, p0/m, z1.h
+// CHECK-ENCODING: [0x20,0xa0,0x89,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 89 64 <unknown>
+
+fcvtlt z30.d, p7/m, z31.s
+// CHECK-INST: fcvtlt z30.d, p7/m, z31.s
+// CHECK-ENCODING: [0xfe,0xbf,0xcb,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe bf cb 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s
new file mode 100644
index 0000000000000..b01caf0059036
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtnt-diagnostics.s
@@ -0,0 +1,69 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvtnt z0.b, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.b, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtnt z0.h, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.h, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtnt z0.s, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.s, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtnt z0.d, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.d, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtnt z0.b, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.b, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtnt z0.d, p0/m, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtnt z0.d, p0/m, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fcvtnt z0.h, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fcvtnt z0.h, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fcvtnt z0.h, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fcvtnt z0.h, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+fcvtnt z0.s, p7/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtnt z0.s, p7/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtnt z0.s, p7/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtnt z0.s, p7/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtnt.s b/llvm/test/MC/AArch64/SVE2/fcvtnt.s
new file mode 100644
index 0000000000000..9de9ea7a82f0e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtnt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fcvtnt z0.h, p0/m, z1.s
+// CHECK-INST: fcvtnt z0.h, p0/m, z1.s
+// CHECK-ENCODING: [0x20,0xa0,0x88,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 88 64 <unknown>
+
+fcvtnt z30.s, p7/m, z31.d
+// CHECK-INST: fcvtnt z30.s, p7/m, z31.d
+// CHECK-ENCODING: [0xfe,0xbf,0xca,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe bf ca 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtxnt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtxnt-diagnostics.s
new file mode 100644
index 0000000000000..60f85f4df68f5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtxnt-diagnostics.s
@@ -0,0 +1,74 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvtxnt z0.b, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.b, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.h, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.h, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.s, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.s, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.d, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.d, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.h, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.h, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.b, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.b, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtxnt z0.d, p0/m, z0.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtxnt z0.d, p0/m, z0.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fcvtxnt z0.s, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fcvtxnt z0.s, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fcvtxnt z0.s, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fcvtxnt z0.s, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+fcvtxnt z0.s, p7/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtxnt z0.s, p7/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtxnt z0.s, p7/m, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtxnt z0.s, p7/m, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtxnt.s b/llvm/test/MC/AArch64/SVE2/fcvtxnt.s
new file mode 100644
index 0000000000000..25bb1672d0a4c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtxnt.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fcvtxnt z0.s, p0/m, z1.d
+// CHECK-INST: fcvtxnt z0.s, p0/m, z1.d
+// CHECK-ENCODING: [0x20,0xa0,0x0a,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 0a 64 <unknown>
+
+fcvtxnt z30.s, p7/m, z31.d
+// CHECK-INST: fcvtxnt z30.s, p7/m, z31.d
+// CHECK-ENCODING: [0xfe,0xbf,0x0a,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe bf 0a 64 <unknown>

From 173a68f1fb7958398f8366e6345cb0cda81757dc Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 28 May 2019 10:12:06 +0000
Subject: [PATCH 0333/1176] [ELF] Replace two addSymbol() call sites with
 Symbol::resolve(). NFC

If we have a handle of the symbol, insert() called by addSymbol() is
redundant. Just call resolve().

llvm-svn: 361802
---
 lld/ELF/Writer.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 58fc6fab7c9fb..dd2cae8e8618a 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -180,9 +180,9 @@ static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec,
   if (!S || S->isDefined())
     return nullptr;
 
-  return cast<Defined>(Symtab->addSymbol(
-      Defined{/*File=*/nullptr, Name, Binding, StOther, STT_NOTYPE, Val,
-              /*Size=*/0, Sec}));
+  S->resolve(Defined{/*File=*/nullptr, Name, Binding, StOther, STT_NOTYPE, Val,
+                     /*Size=*/0, Sec});
+  return cast<Defined>(S);
 }
 
 static Defined *addAbsolute(StringRef Name) {
@@ -239,9 +239,8 @@ void elf::addReservedSymbols() {
     if (Config->EMachine == EM_PPC || Config->EMachine == EM_PPC64)
       GotOff = 0x8000;
 
-    Symtab->addSymbol(Defined{/*File=*/nullptr, GotSymName, STB_GLOBAL,
-                              STV_HIDDEN, STT_NOTYPE, GotOff, /*Size=*/0,
-                              Out::ElfHeader});
+    S->resolve(Defined{/*File=*/nullptr, GotSymName, STB_GLOBAL, STV_HIDDEN,
+                       STT_NOTYPE, GotOff, /*Size=*/0, Out::ElfHeader});
     ElfSym::GlobalOffsetTable = cast<Defined>(S);
   }
 

From c6578eefdd57101d954d990a97dca10551c8cb64 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Tue, 28 May 2019 10:29:58 +0000
Subject: [PATCH 0334/1176] [clangd] Introduce a structured hover response

Summary:
Change ClangdServer layer to output a structured response for Hover,
which can be rendered by client according to their needs.

Reviewers: sammccall, ilya-biryukov

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61497

llvm-svn: 361803
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  |  15 +-
 clang-tools-extra/clangd/ClangdServer.cpp     |  11 +-
 clang-tools-extra/clangd/ClangdServer.h       |   2 +-
 clang-tools-extra/clangd/Protocol.cpp         |   1 +
 clang-tools-extra/clangd/XRefs.cpp            | 424 +++++++++++++-----
 clang-tools-extra/clangd/XRefs.h              |  70 ++-
 clang-tools-extra/clangd/test/hover.test      |  33 ++
 clang-tools-extra/clangd/unittests/TestTU.cpp |   3 +-
 .../clangd/unittests/XRefsTests.cpp           | 353 +++++++++++++--
 9 files changed, 769 insertions(+), 143 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 6bc8499730f55..c599e6c4d54fe 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -842,7 +842,20 @@ void ClangdLSPServer::onDocumentHighlight(
 void ClangdLSPServer::onHover(const TextDocumentPositionParams &Params,
                               Callback<llvm::Optional<Hover>> Reply) {
   Server->findHover(Params.textDocument.uri.file(), Params.position,
-                    std::move(Reply));
+                    Bind(
+                        [](decltype(Reply) Reply,
+                           llvm::Expected<llvm::Optional<HoverInfo>> HIorErr) {
+                          if (!HIorErr)
+                            return Reply(HIorErr.takeError());
+                          const auto &HI = HIorErr.get();
+                          if (!HI)
+                            return Reply(llvm::None);
+                          Hover H;
+                          H.range = HI->SymRange;
+                          H.contents = HI->render();
+                          return Reply(H);
+                        },
+                        std::move(Reply)));
 }
 
 void ClangdLSPServer::onTypeHierarchy(
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 449c0c980991d..9989f610d8b46 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -461,15 +461,18 @@ void ClangdServer::findDocumentHighlights(
 }
 
 void ClangdServer::findHover(PathRef File, Position Pos,
-                             Callback<llvm::Optional<Hover>> CB) {
-  auto Action = [Pos](Callback<llvm::Optional<Hover>> CB,
+                             Callback<llvm::Optional<HoverInfo>> CB) {
+  auto Action = [Pos](Callback<llvm::Optional<HoverInfo>> CB, Path File,
                       llvm::Expected<InputsAndAST> InpAST) {
     if (!InpAST)
       return CB(InpAST.takeError());
-    CB(clangd::getHover(InpAST->AST, Pos));
+    format::FormatStyle Style = getFormatStyleForFile(
+        File, InpAST->Inputs.Contents, InpAST->Inputs.FS.get());
+    CB(clangd::getHover(InpAST->AST, Pos, std::move(Style)));
   };
 
-  WorkScheduler.runWithAST("Hover", File, Bind(Action, std::move(CB)));
+  WorkScheduler.runWithAST("Hover", File,
+                           Bind(Action, std::move(CB), File.str()));
 }
 
 void ClangdServer::typeHierarchy(PathRef File, Position Pos, int Resolve,
diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index 1038982dc7a4a..62c0c9a09ff3e 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -186,7 +186,7 @@ class ClangdServer {
 
   /// Get code hover for a given position.
   void findHover(PathRef File, Position Pos,
-                 Callback<llvm::Optional<Hover>> CB);
+                 Callback<llvm::Optional<HoverInfo>> CB);
 
   /// Get information about type hierarchy for a given position.
   void typeHierarchy(PathRef File, Position Pos, int Resolve,
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index a8b1c43732d33..04c26637b1dcf 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/JSON.h"
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index c51631ad1934b..e5e137df21f1b 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -7,21 +7,37 @@
 //===----------------------------------------------------------------------===//
 #include "XRefs.h"
 #include "AST.h"
+#include "CodeCompletionStrings.h"
 #include "FindSymbols.h"
 #include "Logger.h"
+#include "Protocol.h"
 #include "SourceCode.h"
 #include "URI.h"
 #include "index/Merge.h"
 #include "index/SymbolCollector.h"
 #include "index/SymbolLocation.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Index/IndexDataConsumer.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Index/IndexingAction.h"
 #include "clang/Index/USRGeneration.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 namespace clangd {
@@ -241,17 +257,17 @@ IdentifiedSymbol getSymbolAtPosition(ParsedAST &AST, SourceLocation Pos) {
   return {DeclMacrosFinder.getFoundDecls(), DeclMacrosFinder.takeMacroInfos()};
 }
 
-Range getTokenRange(ParsedAST &AST, SourceLocation TokLoc) {
-  const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  SourceLocation LocEnd = Lexer::getLocForEndOfToken(
-      TokLoc, 0, SourceMgr, AST.getASTContext().getLangOpts());
+Range getTokenRange(ASTContext &AST, SourceLocation TokLoc) {
+  const SourceManager &SourceMgr = AST.getSourceManager();
+  SourceLocation LocEnd =
+      Lexer::getLocForEndOfToken(TokLoc, 0, SourceMgr, AST.getLangOpts());
   return {sourceLocToPosition(SourceMgr, TokLoc),
           sourceLocToPosition(SourceMgr, LocEnd)};
 }
 
-llvm::Optional<Location> makeLocation(ParsedAST &AST, SourceLocation TokLoc,
+llvm::Optional<Location> makeLocation(ASTContext &AST, SourceLocation TokLoc,
                                       llvm::StringRef TUPath) {
-  const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
+  const SourceManager &SourceMgr = AST.getSourceManager();
   const FileEntry *F = SourceMgr.getFileEntryForID(SourceMgr.getFileID(TokLoc));
   if (!F)
     return None;
@@ -299,8 +315,8 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
   // As a consequence, there's no need to look them up in the index either.
   std::vector<LocatedSymbol> Result;
   for (auto M : Symbols.Macros) {
-    if (auto Loc =
-            makeLocation(AST, M.Info->getDefinitionLoc(), *MainFilePath)) {
+    if (auto Loc = makeLocation(AST.getASTContext(), M.Info->getDefinitionLoc(),
+                                *MainFilePath)) {
       LocatedSymbol Macro;
       Macro.Name = M.Name;
       Macro.PreferredDeclaration = *Loc;
@@ -320,7 +336,7 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
 
   // Emit all symbol locations (declaration or definition) from AST.
   for (const Decl *D : Symbols.Decls) {
-    auto Loc = makeLocation(AST, findNameLoc(D), *MainFilePath);
+    auto Loc = makeLocation(AST.getASTContext(), findNameLoc(D), *MainFilePath);
     if (!Loc)
       continue;
 
@@ -453,7 +469,7 @@ std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,
   std::vector<DocumentHighlight> Result;
   for (const auto &Ref : References) {
     DocumentHighlight DH;
-    DH.range = getTokenRange(AST, Ref.Loc);
+    DH.range = getTokenRange(AST.getASTContext(), Ref.Loc);
     if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Write))
       DH.kind = DocumentHighlightKind::Write;
     else if (Ref.Role & index::SymbolRoleSet(index::SymbolRole::Read))
@@ -477,102 +493,238 @@ static PrintingPolicy printingPolicyForDecls(PrintingPolicy Base) {
   return Policy;
 }
 
-/// Return a string representation (e.g. "class MyNamespace::MyClass") of
-/// the type declaration \p TD.
-static std::string typeDeclToString(const TypeDecl *TD) {
-  QualType Type = TD->getASTContext().getTypeDeclType(TD);
+/// Given a declaration \p D, return a human-readable string representing the
+/// local scope in which it is declared, i.e. class(es) and method name. Returns
+/// an empty string if it is not local.
+static std::string getLocalScope(const Decl *D) {
+  std::vector<std::string> Scopes;
+  const DeclContext *DC = D->getDeclContext();
+  auto GetName = [](const Decl *D) {
+    const NamedDecl *ND = dyn_cast<NamedDecl>(D);
+    std::string Name = ND->getNameAsString();
+    if (!Name.empty())
+      return Name;
+    if (auto RD = dyn_cast<RecordDecl>(D))
+      return ("(anonymous " + RD->getKindName() + ")").str();
+    return std::string("");
+  };
+  while (DC) {
+    if (const TypeDecl *TD = dyn_cast<TypeDecl>(DC))
+      Scopes.push_back(GetName(TD));
+    else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
+      Scopes.push_back(FD->getNameAsString());
+    DC = DC->getParent();
+  }
 
-  PrintingPolicy Policy =
-      printingPolicyForDecls(TD->getASTContext().getPrintingPolicy());
+  return llvm::join(llvm::reverse(Scopes), "::");
+}
 
-  std::string Name;
-  llvm::raw_string_ostream Stream(Name);
-  Type.print(Stream, Policy);
+/// Returns the human-readable representation for namespace containing the
+/// declaration \p D. Returns empty if it is contained global namespace.
+static std::string getNamespaceScope(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+
+  if (const TypeDecl *TD = dyn_cast<TypeDecl>(DC))
+    return getNamespaceScope(TD);
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
+    return getNamespaceScope(FD);
+  if (const NamedDecl *ND = dyn_cast<NamedDecl>(DC))
+    return ND->getQualifiedNameAsString();
 
-  return Stream.str();
+  return "";
 }
 
-/// Return a string representation (e.g. "namespace ns1::ns2") of
-/// the named declaration \p ND.
-static std::string namedDeclQualifiedName(const NamedDecl *ND,
-                                          llvm::StringRef Prefix) {
+static std::string printDefinition(const Decl *D) {
+  std::string Definition;
+  llvm::raw_string_ostream OS(Definition);
   PrintingPolicy Policy =
-      printingPolicyForDecls(ND->getASTContext().getPrintingPolicy());
-
-  std::string Name;
-  llvm::raw_string_ostream Stream(Name);
-  Stream << Prefix << ' ';
-  ND->printQualifiedName(Stream, Policy);
+      printingPolicyForDecls(D->getASTContext().getPrintingPolicy());
+  Policy.IncludeTagDefinition = false;
+  D->print(OS, Policy);
+  return Definition;
+}
 
-  return Stream.str();
+static void printParams(llvm::raw_ostream &OS,
+                        const std::vector<HoverInfo::Param> &Params) {
+  for (size_t I = 0, E = Params.size(); I != E; ++I) {
+    if (I)
+      OS << ", ";
+    OS << Params.at(I);
+  }
 }
 
-/// Given a declaration \p D, return a human-readable string representing the
-/// scope in which it is declared.  If the declaration is in the global scope,
-/// return the string "global namespace".
-static llvm::Optional<std::string> getScopeName(const Decl *D) {
-  const DeclContext *DC = D->getDeclContext();
+static std::vector<HoverInfo::Param>
+fetchTemplateParameters(const TemplateParameterList *Params,
+                        const PrintingPolicy &PP) {
+  assert(Params);
+  std::vector<HoverInfo::Param> TempParameters;
+
+  for (const Decl *Param : *Params) {
+    HoverInfo::Param P;
+    P.Type.emplace();
+    if (const auto TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
+      P.Type = TTP->wasDeclaredWithTypename() ? "typename" : "class";
+      if (TTP->isParameterPack())
+        *P.Type += "...";
+
+      if (!TTP->getName().empty())
+        P.Name = TTP->getNameAsString();
+      if (TTP->hasDefaultArgument())
+        P.Default = TTP->getDefaultArgument().getAsString(PP);
+    } else if (const auto NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
+      if (IdentifierInfo *II = NTTP->getIdentifier())
+        P.Name = II->getName().str();
+
+      llvm::raw_string_ostream Out(*P.Type);
+      NTTP->getType().print(Out, PP);
+      if (NTTP->isParameterPack())
+        Out << "...";
+
+      if (NTTP->hasDefaultArgument()) {
+        P.Default.emplace();
+        llvm::raw_string_ostream Out(*P.Default);
+        NTTP->getDefaultArgument()->printPretty(Out, nullptr, PP);
+      }
+    } else if (const auto TTPD = dyn_cast<TemplateTemplateParmDecl>(Param)) {
+      llvm::raw_string_ostream OS(*P.Type);
+      OS << "template <";
+      printParams(OS,
+                  fetchTemplateParameters(TTPD->getTemplateParameters(), PP));
+      OS << "> class"; // FIXME: TemplateTemplateParameter doesn't store the
+                       // info on whether this param was a "typename" or
+                       // "class".
+      if (!TTPD->getName().empty())
+        P.Name = TTPD->getNameAsString();
+      if (TTPD->hasDefaultArgument()) {
+        P.Default.emplace();
+        llvm::raw_string_ostream Out(*P.Default);
+        TTPD->getDefaultArgument().getArgument().print(PP, Out);
+      }
+    }
+    TempParameters.push_back(std::move(P));
+  }
 
-  if (isa<TranslationUnitDecl>(DC))
-    return std::string("global namespace");
-  if (const TypeDecl *TD = dyn_cast<TypeDecl>(DC))
-    return typeDeclToString(TD);
-  else if (const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC))
-    return namedDeclQualifiedName(ND, "namespace");
-  else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DC))
-    return namedDeclQualifiedName(FD, "function");
+  return TempParameters;
+}
 
-  return None;
+static llvm::Optional<Range> getTokenRange(SourceLocation Loc,
+                                           const ASTContext &Ctx) {
+  if (!Loc.isValid())
+    return llvm::None;
+  SourceLocation End = Lexer::getLocForEndOfToken(
+      Loc, 0, Ctx.getSourceManager(), Ctx.getLangOpts());
+  if (!End.isValid())
+    return llvm::None;
+  return halfOpenToRange(Ctx.getSourceManager(),
+                         CharSourceRange::getCharRange(Loc, End));
 }
 
 /// Generate a \p Hover object given the declaration \p D.
-static Hover getHoverContents(const Decl *D) {
-  Hover H;
-  llvm::Optional<std::string> NamedScope = getScopeName(D);
-
-  // Generate the "Declared in" section.
-  if (NamedScope) {
-    assert(!NamedScope->empty());
-
-    H.contents.value += "Declared in ";
-    H.contents.value += *NamedScope;
-    H.contents.value += "\n\n";
+static HoverInfo getHoverContents(const Decl *D) {
+  HoverInfo HI;
+  const ASTContext &Ctx = D->getASTContext();
+
+  HI.NamespaceScope = getNamespaceScope(D);
+  if (!HI.NamespaceScope->empty())
+    HI.NamespaceScope->append("::");
+  HI.LocalScope = getLocalScope(D);
+  if (!HI.LocalScope.empty())
+    HI.LocalScope.append("::");
+
+  PrintingPolicy Policy = printingPolicyForDecls(Ctx.getPrintingPolicy());
+  if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(D)) {
+    HI.Documentation = getDeclComment(Ctx, *ND);
+    HI.Name = printName(Ctx, *ND);
   }
 
-  // We want to include the template in the Hover.
-  if (TemplateDecl *TD = D->getDescribedTemplate())
-    D = TD;
-
-  std::string DeclText;
-  llvm::raw_string_ostream OS(DeclText);
-
-  PrintingPolicy Policy =
-      printingPolicyForDecls(D->getASTContext().getPrintingPolicy());
+  HI.Kind = indexSymbolKindToSymbolKind(index::getSymbolInfo(D).Kind);
 
-  D->print(OS, Policy);
+  // Fill in template params.
+  if (const TemplateDecl *TD = D->getDescribedTemplate()) {
+    HI.TemplateParameters =
+        fetchTemplateParameters(TD->getTemplateParameters(), Policy);
+    D = TD;
+  } else if (const FunctionDecl *FD = D->getAsFunction()) {
+    if (const auto FTD = FD->getDescribedTemplate()) {
+      HI.TemplateParameters =
+          fetchTemplateParameters(FTD->getTemplateParameters(), Policy);
+      D = FTD;
+    }
+  }
 
-  OS.flush();
+  // Fill in types and params.
+  if (const FunctionDecl *FD = D->getAsFunction()) {
+    HI.ReturnType.emplace();
+    llvm::raw_string_ostream OS(*HI.ReturnType);
+    FD->getReturnType().print(OS, Policy);
+
+    HI.Type.emplace();
+    llvm::raw_string_ostream TypeOS(*HI.Type);
+    FD->getReturnType().print(TypeOS, Policy);
+    TypeOS << '(';
+
+    HI.Parameters.emplace();
+    for (const ParmVarDecl *PVD : FD->parameters()) {
+      if (HI.Parameters->size())
+        TypeOS << ", ";
+      HI.Parameters->emplace_back();
+      auto &P = HI.Parameters->back();
+      if (!PVD->getType().isNull()) {
+        P.Type.emplace();
+        llvm::raw_string_ostream OS(*P.Type);
+        PVD->getType().print(OS, Policy);
+        PVD->getType().print(TypeOS, Policy);
+      } else {
+        std::string Param;
+        llvm::raw_string_ostream OS(Param);
+        PVD->dump(OS);
+        OS.flush();
+        elog("Got param with null type: {0}", Param);
+      }
+      if (!PVD->getName().empty())
+        P.Name = PVD->getNameAsString();
+      if (PVD->hasDefaultArg()) {
+        P.Default.emplace();
+        llvm::raw_string_ostream Out(*P.Default);
+        PVD->getDefaultArg()->printPretty(Out, nullptr, Policy);
+      }
+    }
+    TypeOS << ')';
+    // FIXME: handle variadics.
+  } else if (const auto *VD = dyn_cast<ValueDecl>(D)) {
+    // FIXME: Currently lambdas are also handled as ValueDecls, they should be
+    // more similar to functions.
+    HI.Type.emplace();
+    llvm::raw_string_ostream OS(*HI.Type);
+    VD->getType().print(OS, Policy);
+  }
 
-  H.contents.value += DeclText;
-  return H;
+  HI.Definition = printDefinition(D);
+  return HI;
 }
 
 /// Generate a \p Hover object given the type \p T.
-static Hover getHoverContents(QualType T, ASTContext &ASTCtx) {
-  Hover H;
-  std::string TypeText;
-  llvm::raw_string_ostream OS(TypeText);
+static HoverInfo getHoverContents(QualType T, const Decl *D,
+                                  ASTContext &ASTCtx) {
+  HoverInfo HI;
+  llvm::raw_string_ostream OS(HI.Name);
   PrintingPolicy Policy = printingPolicyForDecls(ASTCtx.getPrintingPolicy());
   T.print(OS, Policy);
-  OS.flush();
-  H.contents.value += TypeText;
-  return H;
+
+  if (D)
+    HI.Kind = indexSymbolKindToSymbolKind(index::getSymbolInfo(D).Kind);
+  return HI;
 }
 
 /// Generate a \p Hover object given the macro \p MacroDecl.
-static Hover getHoverContents(MacroDecl Decl, ParsedAST &AST) {
+static HoverInfo getHoverContents(MacroDecl Decl, ParsedAST &AST) {
+  HoverInfo HI;
   SourceManager &SM = AST.getASTContext().getSourceManager();
-  std::string Definition = Decl.Name;
+  HI.Name = Decl.Name;
+  HI.Kind = indexSymbolKindToSymbolKind(
+      index::getSymbolInfoForMacro(*Decl.Info).Kind);
+  // FIXME: Populate documentation
+  // FIXME: Pupulate parameters
 
   // Try to get the full definition, not just the name
   SourceLocation StartLoc = Decl.Info->getDefinitionLoc();
@@ -586,14 +738,12 @@ static Hover getHoverContents(MacroDecl Decl, ParsedAST &AST) {
       unsigned StartOffset = SM.getFileOffset(StartLoc);
       unsigned EndOffset = SM.getFileOffset(EndLoc);
       if (EndOffset <= Buffer.size() && StartOffset < EndOffset)
-        Definition = Buffer.substr(StartOffset, EndOffset - StartOffset).str();
+        HI.Definition =
+            ("#define " + Buffer.substr(StartOffset, EndOffset - StartOffset))
+                .str();
     }
   }
-
-  Hover H;
-  H.contents.kind = MarkupKind::PlainText;
-  H.contents.value = "#define " + Definition;
-  return H;
+  return HI;
 }
 
 namespace {
@@ -607,14 +757,11 @@ namespace {
 /// a deduced type set. The AST should be improved to simplify this scenario.
 class DeducedTypeVisitor : public RecursiveASTVisitor<DeducedTypeVisitor> {
   SourceLocation SearchedLocation;
-  llvm::Optional<QualType> DeducedType;
 
 public:
   DeducedTypeVisitor(SourceLocation SearchedLocation)
       : SearchedLocation(SearchedLocation) {}
 
-  llvm::Optional<QualType> getDeducedType() { return DeducedType; }
-
   // Handle auto initializers:
   //- auto i = 1;
   //- decltype(auto) i = 1;
@@ -626,8 +773,10 @@ class DeducedTypeVisitor : public RecursiveASTVisitor<DeducedTypeVisitor> {
       return true;
 
     if (auto *AT = D->getType()->getContainedAutoType()) {
-      if (!AT->getDeducedType().isNull())
+      if (!AT->getDeducedType().isNull()) {
         DeducedType = AT->getDeducedType();
+        this->D = D;
+      }
     }
     return true;
   }
@@ -655,13 +804,17 @@ class DeducedTypeVisitor : public RecursiveASTVisitor<DeducedTypeVisitor> {
     const AutoType *AT = D->getReturnType()->getContainedAutoType();
     if (AT && !AT->getDeducedType().isNull()) {
       DeducedType = AT->getDeducedType();
+      this->D = D;
     } else if (auto DT = dyn_cast<DecltypeType>(D->getReturnType())) {
       // auto in a trailing return type just points to a DecltypeType and
       // getContainedAutoType does not unwrap it.
-      if (!DT->getUnderlyingType().isNull())
+      if (!DT->getUnderlyingType().isNull()) {
         DeducedType = DT->getUnderlyingType();
+        this->D = D;
+      }
     } else if (!D->getReturnType().isNull()) {
       DeducedType = D->getReturnType();
+      this->D = D;
     }
     return true;
   }
@@ -680,16 +833,19 @@ class DeducedTypeVisitor : public RecursiveASTVisitor<DeducedTypeVisitor> {
     const DecltypeType *DT = dyn_cast<DecltypeType>(TL.getTypePtr());
     while (DT && !DT->getUnderlyingType().isNull()) {
       DeducedType = DT->getUnderlyingType();
-      DT = dyn_cast<DecltypeType>(DeducedType->getTypePtr());
+      D = DT->getAsTagDecl();
+      DT = dyn_cast<DecltypeType>(DeducedType.getTypePtr());
     }
     return true;
   }
+
+  QualType DeducedType;
+  const Decl *D = nullptr;
 };
 } // namespace
 
 /// Retrieves the deduced type at a given location (auto, decltype).
-llvm::Optional<QualType> getDeducedType(ParsedAST &AST,
-                                        SourceLocation SourceLocationBeg) {
+bool hasDeducedType(ParsedAST &AST, SourceLocation SourceLocationBeg) {
   Token Tok;
   auto &ASTCtx = AST.getASTContext();
   // Only try to find a deduced type if the token is auto or decltype.
@@ -697,18 +853,17 @@ llvm::Optional<QualType> getDeducedType(ParsedAST &AST,
       Lexer::getRawToken(SourceLocationBeg, Tok, ASTCtx.getSourceManager(),
                          ASTCtx.getLangOpts(), false) ||
       !Tok.is(tok::raw_identifier)) {
-    return {};
+    return false;
   }
   AST.getPreprocessor().LookUpIdentifierInfo(Tok);
   if (!(Tok.is(tok::kw_auto) || Tok.is(tok::kw_decltype)))
-    return {};
-
-  DeducedTypeVisitor V(SourceLocationBeg);
-  V.TraverseAST(AST.getASTContext());
-  return V.getDeducedType();
+    return false;
+  return true;
 }
 
-llvm::Optional<Hover> getHover(ParsedAST &AST, Position Pos) {
+llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
+                                   format::FormatStyle Style) {
+  llvm::Optional<HoverInfo> HI;
   const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
   SourceLocation SourceLocationBeg =
       getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
@@ -716,16 +871,28 @@ llvm::Optional<Hover> getHover(ParsedAST &AST, Position Pos) {
   auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
 
   if (!Symbols.Macros.empty())
-    return getHoverContents(Symbols.Macros[0], AST);
-
-  if (!Symbols.Decls.empty())
-    return getHoverContents(Symbols.Decls[0]);
+    HI = getHoverContents(Symbols.Macros[0], AST);
+  else if (!Symbols.Decls.empty())
+    HI = getHoverContents(Symbols.Decls[0]);
+  else {
+    if (!hasDeducedType(AST, SourceLocationBeg))
+      return None;
+
+    DeducedTypeVisitor V(SourceLocationBeg);
+    V.TraverseAST(AST.getASTContext());
+    if (V.DeducedType.isNull())
+      return None;
+    HI = getHoverContents(V.DeducedType, V.D, AST.getASTContext());
+  }
 
-  auto DeducedType = getDeducedType(AST, SourceLocationBeg);
-  if (DeducedType && !DeducedType->isNull())
-    return getHoverContents(*DeducedType, AST.getASTContext());
+  auto Replacements = format::reformat(
+      Style, HI->Definition, tooling::Range(0, HI->Definition.size()));
+  if (auto Formatted =
+          tooling::applyAllReplacements(HI->Definition, Replacements))
+    HI->Definition = *Formatted;
 
-  return None;
+  HI->SymRange = getTokenRange(SourceLocationBeg, AST.getASTContext());
+  return HI;
 }
 
 std::vector<Location> findReferences(ParsedAST &AST, Position Pos,
@@ -748,7 +915,7 @@ std::vector<Location> findReferences(ParsedAST &AST, Position Pos,
   auto MainFileRefs = findRefs(Symbols.Decls, AST);
   for (const auto &Ref : MainFileRefs) {
     Location Result;
-    Result.range = getTokenRange(AST, Ref.Loc);
+    Result.range = getTokenRange(AST.getASTContext(), Ref.Loc);
     Result.uri = URIForFile::canonicalize(*MainFilePath, *MainFilePath);
     Results.push_back(std::move(Result));
   }
@@ -991,5 +1158,46 @@ getTypeHierarchy(ParsedAST &AST, Position Pos, int ResolveLevels,
   return Result;
 }
 
+MarkupContent HoverInfo::render() const {
+  MarkupContent Content;
+  Content.kind = MarkupKind::PlainText;
+  std::vector<std::string> Output;
+
+  if (NamespaceScope) {
+    llvm::raw_string_ostream Out(Content.value);
+    Out << "Declared in ";
+    // Drop trailing "::".
+    if (!LocalScope.empty())
+      Out << *NamespaceScope << llvm::StringRef(LocalScope).drop_back(2);
+    else if (NamespaceScope->empty())
+      Out << "global namespace";
+    else
+      Out << llvm::StringRef(*NamespaceScope).drop_back(2);
+    Out << "\n\n";
+  }
+
+  if (!Definition.empty()) {
+    Output.push_back(Definition);
+  } else {
+    // Builtin types
+    Output.push_back(Name);
+  }
+  Content.value += llvm::join(Output, " ");
+  return Content;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+                              const HoverInfo::Param &P) {
+  std::vector<llvm::StringRef> Output;
+  if (P.Type)
+    Output.push_back(*P.Type);
+  if (P.Name)
+    Output.push_back(*P.Name);
+  OS << llvm::join(Output, " ");
+  if (P.Default)
+    OS << " = " << *P.Default;
+  return OS;
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index 008bba50a0920..3695989e7e894 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -16,7 +16,10 @@
 #include "ClangdUnit.h"
 #include "Protocol.h"
 #include "index/Index.h"
+#include "index/SymbolLocation.h"
+#include "clang/Index/IndexSymbol.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 
 namespace clang {
@@ -46,8 +49,73 @@ std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
 std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,
                                                       Position Pos);
 
+/// Contains detailed information about a Symbol. Especially useful when
+/// generating hover responses. It can be rendered as a hover panel, or
+/// embedding clients can use the structured information to provide their own
+/// UI.
+struct HoverInfo {
+  /// Represents parameters of a function, a template or a macro.
+  /// For example:
+  /// - void foo(ParamType Name = DefaultValue)
+  /// - #define FOO(Name)
+  /// - template <ParamType Name = DefaultType> class Foo {};
+  struct Param {
+    /// The pretty-printed parameter type, e.g. "int", or "typename" (in
+    /// TemplateParameters)
+    llvm::Optional<std::string> Type;
+    /// None for unnamed parameters.
+    llvm::Optional<std::string> Name;
+    /// None if no default is provided.
+    llvm::Optional<std::string> Default;
+  };
+
+  /// For a variable named Bar, declared in clang::clangd::Foo::getFoo the
+  /// following fields will hold:
+  /// - NamespaceScope: clang::clangd::
+  /// - LocalScope: Foo::getFoo::
+  /// - Name: Bar
+
+  /// Scopes might be None in cases where they don't make sense, e.g. macros and
+  /// auto/decltype.
+  /// Contains all of the enclosing namespaces, empty string means global
+  /// namespace.
+  llvm::Optional<std::string> NamespaceScope;
+  /// Remaining named contexts in symbol's qualified name, empty string means
+  /// symbol is not local.
+  std::string LocalScope;
+  /// Name of the symbol, does not contain any "::".
+  std::string Name;
+  llvm::Optional<Range> SymRange;
+  /// Scope containing the symbol. e.g, "global namespace", "function x::Y"
+  /// - None for deduced types, e.g "auto", "decltype" keywords.
+  SymbolKind Kind;
+  std::string Documentation;
+  /// Source code containing the definition of the symbol.
+  std::string Definition;
+
+  /// Pretty-printed variable type.
+  /// Set only for variables.
+  llvm::Optional<std::string> Type;
+  /// Set for functions and lambadas.
+  llvm::Optional<std::string> ReturnType;
+  /// Set for functions, lambdas and macros with parameters.
+  llvm::Optional<std::vector<Param>> Parameters;
+  /// Set for all templates(function, class, variable).
+  llvm::Optional<std::vector<Param>> TemplateParameters;
+
+  /// Lower to LSP struct.
+  MarkupContent render() const;
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const HoverInfo::Param &);
+inline bool operator==(const HoverInfo::Param &LHS,
+                       const HoverInfo::Param &RHS) {
+  return std::tie(LHS.Type, LHS.Name, LHS.Default) ==
+         std::tie(RHS.Type, RHS.Name, RHS.Default);
+}
+
 /// Get the hover information when hovering at \p Pos.
-llvm::Optional<Hover> getHover(ParsedAST &AST, Position Pos);
+llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
+                                   format::FormatStyle Style);
 
 /// Returns reference locations of the symbol at a specified \p Pos.
 /// \p Limit limits the number of results returned (0 means no limit).
diff --git a/clang-tools-extra/clangd/test/hover.test b/clang-tools-extra/clangd/test/hover.test
index 8f1ead055954f..e45164b346ea5 100644
--- a/clang-tools-extra/clangd/test/hover.test
+++ b/clang-tools-extra/clangd/test/hover.test
@@ -10,6 +10,16 @@
 # CHECK-NEXT:    "contents": {
 # CHECK-NEXT:      "kind": "plaintext",
 # CHECK-NEXT:      "value": "Declared in global namespace\n\nvoid foo()"
+# CHECK-NEXT:    },
+# CHECK-NEXT:    "range": {
+# CHECK-NEXT:      "end": {
+# CHECK-NEXT:        "character": 28,
+# CHECK-NEXT:        "line": 0
+# CHECK-NEXT:      },
+# CHECK-NEXT:      "start": {
+# CHECK-NEXT:        "character": 25,
+# CHECK-NEXT:        "line": 0
+# CHECK-NEXT:      }
 # CHECK-NEXT:    }
 # CHECK-NEXT:  }
 # CHECK-NEXT:}
@@ -19,6 +29,29 @@
 # CHECK-NEXT:  "jsonrpc": "2.0",
 # CHECK-NEXT:  "result": null
 ---
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main2.cpp","languageId":"cpp","version":1,"text":"enum foo{}; int main() { foo f; }\n"}}}
+---
+{"jsonrpc":"2.0","id":1,"method":"textDocument/hover","params":{"textDocument":{"uri":"test:///main2.cpp"},"position":{"line":0,"character":27}}}
+#      CHECK:  "id": 1,
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "contents": {
+# CHECK-NEXT:      "kind": "plaintext",
+# CHECK-NEXT:      "value": "Declared in global namespace\n\nenum foo {}"
+# CHECK-NEXT:    },
+# CHECK-NEXT:    "range": {
+# CHECK-NEXT:      "end": {
+# CHECK-NEXT:        "character": 28,
+# CHECK-NEXT:        "line": 0
+# CHECK-NEXT:      },
+# CHECK-NEXT:      "start": {
+# CHECK-NEXT:        "character": 25,
+# CHECK-NEXT:        "line": 0
+# CHECK-NEXT:      }
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:}
+---
 {"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}
diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp
index cccb71c39a502..2c7b595af4821 100644
--- a/clang-tools-extra/clangd/unittests/TestTU.cpp
+++ b/clang-tools-extra/clangd/unittests/TestTU.cpp
@@ -59,8 +59,7 @@ ParsedAST TestTU::build() const {
                     /*OldPreamble=*/nullptr,
                     /*OldCompileCommand=*/Inputs.CompileCommand, Inputs,
                     /*StoreInMemory=*/true, /*PreambleCallback=*/nullptr);
-  auto AST = buildAST(FullFilename, createInvocationFromCommandLine(Cmd),
-                      Inputs, Preamble);
+  auto AST = buildAST(FullFilename, std::move(CI), Inputs, Preamble);
   if (!AST.hasValue()) {
     ADD_FAILURE() << "Failed to build code:\n" << Code;
     llvm_unreachable("Failed to build TestTU!");
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 2badcffd04c87..677f3f426c552 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -9,6 +9,7 @@
 #include "ClangdUnit.h"
 #include "Compiler.h"
 #include "Matchers.h"
+#include "Protocol.h"
 #include "SyncAPI.h"
 #include "TestFS.h"
 #include "TestTU.h"
@@ -16,6 +17,7 @@
 #include "index/FileIndex.h"
 #include "index/SymbolCollector.h"
 #include "clang/Index/IndexingAction.h"
+#include "llvm/ADT/None.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
@@ -569,6 +571,306 @@ int [[bar_not_preamble]];
                               HeaderNotInPreambleAnnotations.range())));
 }
 
+TEST(Hover, Structured) {
+  struct {
+    const char *const Code;
+    const std::function<void(HoverInfo &)> ExpectedBuilder;
+  } Cases[] = {
+      // Global scope.
+      {R"cpp(
+          // Best foo ever.
+          void [[fo^o]]() {}
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "foo";
+         HI.Kind = SymbolKind::Function;
+         HI.Documentation = "Best foo ever.";
+         HI.Definition = "void foo()";
+         HI.ReturnType = "void";
+         HI.Type = "void()";
+         HI.Parameters.emplace();
+       }},
+      // Inside namespace
+      {R"cpp(
+          namespace ns1 { namespace ns2 {
+            /// Best foo ever.
+            void [[fo^o]]() {}
+          }}
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "ns1::ns2::";
+         HI.Name = "foo";
+         HI.Kind = SymbolKind::Function;
+         HI.Documentation = "Best foo ever.";
+         HI.Definition = "void foo()";
+         HI.ReturnType = "void";
+         HI.Type = "void()";
+         HI.Parameters.emplace();
+       }},
+      // Field
+      {R"cpp(
+          namespace ns1 { namespace ns2 {
+            struct Foo {
+              int [[b^ar]];
+            };
+          }}
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "ns1::ns2::";
+         HI.LocalScope = "Foo::";
+         HI.Name = "bar";
+         HI.Kind = SymbolKind::Field;
+         HI.Definition = "int bar";
+         HI.Type = "int";
+       }},
+      // Local to class method.
+      {R"cpp(
+          namespace ns1 { namespace ns2 {
+            struct Foo {
+              void foo() {
+                int [[b^ar]];
+              }
+            };
+          }}
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "ns1::ns2::";
+         HI.LocalScope = "Foo::foo::";
+         HI.Name = "bar";
+         HI.Kind = SymbolKind::Variable;
+         HI.Definition = "int bar";
+         HI.Type = "int";
+       }},
+      // Anon namespace and local scope.
+      {R"cpp(
+          namespace ns1 { namespace {
+            struct {
+              int [[b^ar]];
+            } T;
+          }}
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "ns1::(anonymous)::";
+         HI.LocalScope = "(anonymous struct)::";
+         HI.Name = "bar";
+         HI.Kind = SymbolKind::Field;
+         HI.Definition = "int bar";
+         HI.Type = "int";
+       }},
+      // Variable with template type
+      {R"cpp(
+          template <typename T, class... Ts> class Foo {};
+          Foo<int, char, bool> [[fo^o]];
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "foo";
+         HI.Kind = SymbolKind::Variable;
+         HI.Definition = "Foo<int, char, bool> foo";
+         HI.Type = "Foo<int, char, bool>";
+       }},
+      // Implicit template instantiation
+      {R"cpp(
+          template <typename T> class vector{};
+          [[vec^tor]]<int> foo;
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "vector";
+         HI.Kind = SymbolKind::Class;
+         HI.Definition = "template <typename T> class vector {}";
+         HI.TemplateParameters = {
+             {std::string("typename"), std::string("T"), llvm::None},
+         };
+       }},
+      // Class template
+      {R"cpp(
+          template <template<typename, bool...> class C,
+                    typename = char,
+                    int = 0,
+                    bool Q = false,
+                    class... Ts> class Foo {};
+          template <template<typename, bool...> class T>
+          [[F^oo]]<T> foo;
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "Foo";
+         HI.Kind = SymbolKind::Class;
+         HI.Definition =
+             R"cpp(template <template <typename, bool...> class C, typename = char, int = 0,
+          bool Q = false, class... Ts>
+class Foo {})cpp";
+         HI.TemplateParameters = {
+             {std::string("template <typename, bool...> class"),
+              std::string("C"), llvm::None},
+             {std::string("typename"), llvm::None, std::string("char")},
+             {std::string("int"), llvm::None, std::string("0")},
+             {std::string("bool"), std::string("Q"), std::string("false")},
+             {std::string("class..."), std::string("Ts"), llvm::None},
+         };
+       }},
+      // Function template
+      {R"cpp(
+          template <template<typename, bool...> class C,
+                    typename = char,
+                    int = 0,
+                    bool Q = false,
+                    class... Ts> void foo();
+          template<typename, bool...> class Foo;
+
+          void bar() {
+            [[fo^o]]<Foo>();
+          }
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "foo";
+         HI.Kind = SymbolKind::Function;
+         HI.Definition =
+             R"cpp(template <template <typename, bool...> class C, typename = char, int = 0,
+          bool Q = false, class... Ts>
+void foo())cpp";
+         HI.ReturnType = "void";
+         HI.Type = "void()";
+         HI.Parameters.emplace();
+         HI.TemplateParameters = {
+             {std::string("template <typename, bool...> class"),
+              std::string("C"), llvm::None},
+             {std::string("typename"), llvm::None, std::string("char")},
+             {std::string("int"), llvm::None, std::string("0")},
+             {std::string("bool"), std::string("Q"), std::string("false")},
+             {std::string("class..."), std::string("Ts"), llvm::None},
+         };
+       }},
+      // Function decl
+      {R"cpp(
+          template<typename, bool...> class Foo {};
+          Foo<bool, true, false> foo(int, bool T = false);
+
+          void bar() {
+            [[fo^o]](3);
+          }
+          )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.Name = "foo";
+         HI.Kind = SymbolKind::Function;
+         HI.Definition = "Foo<bool, true, false> foo(int, bool T = false)";
+         HI.ReturnType = "Foo<bool, true, false>";
+         HI.Type = "Foo<bool, true, false>(int, bool)";
+         HI.Parameters = {
+             {std::string("int"), llvm::None, llvm::None},
+             {std::string("bool"), std::string("T"), std::string("false")},
+         };
+       }},
+      // Lambda variable
+      {R"cpp(
+        void foo() {
+          int bar = 5;
+          auto lamb = [&bar](int T, bool B) -> bool { return T && B && bar; };
+          bool res = [[lam^b]](bar, false);
+        }
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.LocalScope = "foo::";
+         HI.Name = "lamb";
+         HI.Kind = SymbolKind::Variable;
+         HI.Definition = "auto lamb = [&bar](int T, bool B) -> bool {}";
+         HI.Type = std::string("class (lambda)");
+         return HI;
+       }},
+      // Local variable in lambda
+      {R"cpp(
+        void foo() {
+          auto lamb = []{int [[te^st]];};
+        }
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.NamespaceScope = "";
+         HI.LocalScope = "foo::(anonymous class)::operator()::";
+         HI.Name = "test";
+         HI.Kind = SymbolKind::Variable;
+         HI.Definition = "int test";
+         HI.Type = "int";
+       }},
+
+      // auto on lambda
+      {R"cpp(
+        void foo() {
+          [[au^to]] lamb = []{};
+        }
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.Name = "class (lambda)";
+         HI.Kind = SymbolKind::Variable;
+       }},
+      // auto on template instantiation
+      {R"cpp(
+        template<typename T> class Foo{};
+        void foo() {
+          [[au^to]] x = Foo<int>();
+        }
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.Name = "class Foo<int>";
+         HI.Kind = SymbolKind::Variable;
+       }},
+      // auto on specialized template
+      {R"cpp(
+        template<typename T> class Foo{};
+        template<> class Foo<int>{};
+        void foo() {
+          [[au^to]] x = Foo<int>();
+        }
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.Name = "class Foo<int>";
+         HI.Kind = SymbolKind::Variable;
+       }},
+
+      // macro
+      {R"cpp(
+        // Best MACRO ever.
+        #define MACRO(x,y,z) void foo(x, y, z);
+        [[MAC^RO]](int, double d, bool z = false);
+        )cpp",
+       [](HoverInfo &HI) {
+         HI.Name = "MACRO", HI.Kind = SymbolKind::String,
+         HI.Definition = "#define MACRO(x, y, z) void foo(x, y, z);";
+       }},
+  };
+  for (const auto &Case : Cases) {
+    SCOPED_TRACE(Case.Code);
+
+    Annotations T(Case.Code);
+    TestTU TU = TestTU::withCode(T.code());
+    TU.ExtraArgs.push_back("-std=c++17");
+    auto AST = TU.build();
+    ASSERT_TRUE(AST.getDiagnostics().empty());
+
+    auto H = getHover(AST, T.point(), format::getLLVMStyle());
+    ASSERT_TRUE(H);
+    HoverInfo Expected;
+    Expected.SymRange = T.range();
+    Case.ExpectedBuilder(Expected);
+
+    EXPECT_EQ(H->NamespaceScope, Expected.NamespaceScope);
+    EXPECT_EQ(H->LocalScope, Expected.LocalScope);
+    EXPECT_EQ(H->Name, Expected.Name);
+    EXPECT_EQ(H->Kind, Expected.Kind);
+    EXPECT_EQ(H->Documentation, Expected.Documentation);
+    EXPECT_EQ(H->Definition, Expected.Definition);
+    EXPECT_EQ(H->Type, Expected.Type);
+    EXPECT_EQ(H->ReturnType, Expected.ReturnType);
+    EXPECT_EQ(H->Parameters, Expected.Parameters);
+    EXPECT_EQ(H->TemplateParameters, Expected.TemplateParameters);
+    EXPECT_EQ(H->SymRange, Expected.SymRange);
+  }
+} // namespace clang
+
 TEST(Hover, All) {
   struct OneTest {
     StringRef Input;
@@ -591,7 +893,7 @@ TEST(Hover, All) {
               int test1 = bonjour;
             }
           )cpp",
-          "Declared in function main\n\nint bonjour",
+          "Declared in main\n\nint bonjour",
       },
       {
           R"cpp(// Local variable in method
@@ -602,7 +904,7 @@ TEST(Hover, All) {
               }
             };
           )cpp",
-          "Declared in function s::method\n\nint bonjour",
+          "Declared in s::method\n\nint bonjour",
       },
       {
           R"cpp(// Struct
@@ -613,7 +915,7 @@ TEST(Hover, All) {
               ns1::My^Class* Params;
             }
           )cpp",
-          "Declared in namespace ns1\n\nstruct MyClass {}",
+          "Declared in ns1\n\nstruct MyClass {}",
       },
       {
           R"cpp(// Class
@@ -624,7 +926,7 @@ TEST(Hover, All) {
               ns1::My^Class* Params;
             }
           )cpp",
-          "Declared in namespace ns1\n\nclass MyClass {}",
+          "Declared in ns1\n\nclass MyClass {}",
       },
       {
           R"cpp(// Union
@@ -635,7 +937,7 @@ TEST(Hover, All) {
               ns1::My^Union Params;
             }
           )cpp",
-          "Declared in namespace ns1\n\nunion MyUnion {}",
+          "Declared in ns1\n\nunion MyUnion {}",
       },
       {
           R"cpp(// Function definition via pointer
@@ -663,7 +965,7 @@ TEST(Hover, All) {
               bar.^x;
             }
           )cpp",
-          "Declared in struct Foo\n\nint x",
+          "Declared in Foo\n\nint x",
       },
       {
           R"cpp(// Field with initialization
@@ -673,7 +975,7 @@ TEST(Hover, All) {
               bar.^x;
             }
           )cpp",
-          "Declared in struct Foo\n\nint x = 5",
+          "Declared in Foo\n\nint x = 5",
       },
       {
           R"cpp(// Static field
@@ -682,7 +984,7 @@ TEST(Hover, All) {
               Foo::^x;
             }
           )cpp",
-          "Declared in struct Foo\n\nstatic int x",
+          "Declared in Foo\n\nstatic int x",
       },
       {
           R"cpp(// Field, member initializer
@@ -691,7 +993,7 @@ TEST(Hover, All) {
               Foo() : ^x(0) {}
             };
           )cpp",
-          "Declared in struct Foo\n\nint x",
+          "Declared in Foo\n\nint x",
       },
       {
           R"cpp(// Field, GNU old-style field designator
@@ -700,7 +1002,7 @@ TEST(Hover, All) {
               Foo bar = { ^x : 1 };
             }
           )cpp",
-          "Declared in struct Foo\n\nint x",
+          "Declared in Foo\n\nint x",
       },
       {
           R"cpp(// Field, field designator
@@ -709,7 +1011,7 @@ TEST(Hover, All) {
               Foo bar = { .^x = 2 };
             }
           )cpp",
-          "Declared in struct Foo\n\nint x",
+          "Declared in Foo\n\nint x",
       },
       {
           R"cpp(// Method call
@@ -719,7 +1021,7 @@ TEST(Hover, All) {
               bar.^x();
             }
           )cpp",
-          "Declared in struct Foo\n\nint x()",
+          "Declared in Foo\n\nint x()",
       },
       {
           R"cpp(// Static method call
@@ -728,7 +1030,7 @@ TEST(Hover, All) {
               Foo::^x();
             }
           )cpp",
-          "Declared in struct Foo\n\nstatic int x()",
+          "Declared in Foo\n\nstatic int x()",
       },
       {
           R"cpp(// Typedef
@@ -746,7 +1048,7 @@ TEST(Hover, All) {
             } // namespace ns
             int main() { ^ns::Foo::bar(); }
           )cpp",
-          "Declared in global namespace\n\nnamespace ns {\n}",
+          "Declared in global namespace\n\nnamespace ns {}",
       },
       {
           R"cpp(// Anonymous namespace
@@ -757,7 +1059,7 @@ TEST(Hover, All) {
             } // namespace ns
             int main() { ns::f^oo++; }
           )cpp",
-          "Declared in namespace ns::(anonymous)\n\nint foo",
+          "Declared in ns::(anonymous)\n\nint foo",
       },
       {
           R"cpp(// Macro
@@ -783,9 +1085,8 @@ TEST(Hover, All) {
             }
             int main() ^MACRO
           )cpp",
-          R"cpp(#define MACRO {\
-              return 0;\
-            })cpp",
+          "#define MACRO                                                       "
+          "           \\\n  { return 0; }",
       },
       {
           R"cpp(// Forward class declaration
@@ -812,7 +1113,7 @@ TEST(Hover, All) {
               Hel^lo hello = ONE;
             }
           )cpp",
-          "Declared in global namespace\n\nenum Hello {\n}",
+          "Declared in global namespace\n\nenum Hello {}",
       },
       {
           R"cpp(// Enumerator
@@ -823,7 +1124,7 @@ TEST(Hover, All) {
               Hello hello = O^NE;
             }
           )cpp",
-          "Declared in enum Hello\n\nONE",
+          "Declared in Hello\n\nONE",
       },
       {
           R"cpp(// Enumerator in anonymous enum
@@ -834,7 +1135,7 @@ TEST(Hover, All) {
               int hello = O^NE;
             }
           )cpp",
-          "Declared in enum (anonymous)\n\nONE",
+          "Declared in global namespace\n\nONE",
       },
       {
           R"cpp(// Global variable
@@ -854,7 +1155,7 @@ TEST(Hover, All) {
               ns1::he^y++;
             }
           )cpp",
-          "Declared in namespace ns1\n\nstatic int hey = 10",
+          "Declared in ns1\n\nstatic int hey = 10",
       },
       {
           R"cpp(// Field in anonymous struct
@@ -865,7 +1166,7 @@ TEST(Hover, All) {
               s.he^llo++;
             }
           )cpp",
-          "Declared in struct (anonymous)\n\nint hello",
+          "Declared in (anonymous struct)\n\nint hello",
       },
       {
           R"cpp(// Templated function
@@ -886,7 +1187,7 @@ TEST(Hover, All) {
             };
             void g() { struct outer o; o.v.d^ef++; }
           )cpp",
-          "Declared in union outer::(anonymous)\n\nint def",
+          "Declared in outer::(anonymous union)\n\nint def",
       },
       {
           R"cpp(// Nothing
@@ -1194,9 +1495,9 @@ TEST(Hover, All) {
     TestTU TU = TestTU::withCode(T.code());
     TU.ExtraArgs.push_back("-std=c++17");
     auto AST = TU.build();
-    if (auto H = getHover(AST, T.point())) {
+    if (auto H = getHover(AST, T.point(), format::getLLVMStyle())) {
       EXPECT_NE("", Test.ExpectedHover) << Test.Input;
-      EXPECT_EQ(H->contents.value, Test.ExpectedHover.str()) << Test.Input;
+      EXPECT_EQ(H->render().value, Test.ExpectedHover.str()) << Test.Input;
     } else
       EXPECT_EQ("", Test.ExpectedHover.str()) << Test.Input;
   }

From 4b48aa0e30f14e4c4da7a8f162a8c7e1e843809f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 28 May 2019 10:53:23 +0000
Subject: [PATCH 0335/1176] [X86] X86CmovConverterPass::collectCmovCandidates -
 fix uninitialized variable warnings. NFCI.

llvm-svn: 361804
---
 llvm/lib/Target/X86/X86CmovConversion.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 3585c39df5124..ab7b31d8aa703 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -280,7 +280,8 @@ bool X86CmovConverterPass::collectCmovCandidates(
     Group.clear();
     // Condition code of first CMOV instruction current processed range and its
     // opposite condition code.
-    X86::CondCode FirstCC, FirstOppCC, MemOpCC;
+    X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
+                  MemOpCC = X86::COND_INVALID;
     // Indicator of a non CMOVrr instruction in the current processed range.
     bool FoundNonCMOVInst = false;
     // Indicator for current processed CMOV-group if it should be skipped.

From 48c8bdad2afb627a3a3ef7e6f450b97b6a4a6b9c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 28 May 2019 11:10:56 +0000
Subject: [PATCH 0336/1176] [SLPVectorizer][X86] Add broadcast test case from
 D62427

llvm-svn: 361805
---
 .../Transforms/SLPVectorizer/X86/broadcast.ll | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
new file mode 100644
index 0000000000000..79525cff5a3ca
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
@@ -0,0 +1,124 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-threshold=-999 < %s | FileCheck %s
+
+
+; S[0] = %v1 + %v2
+; S[1] = %v2 + %v1
+; S[2] = %v2 + %v1
+; S[3] = %v1 + %v2
+;
+; TODO: We should broadcast %v1 and %v2
+;
+define void @bcast_vals(i64 *%A, i64 *%B, i64 *%S) {
+; CHECK-LABEL: @bcast_vals(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP5]], i32 1
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0
+; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 1
+; CHECK-NEXT:    [[IDXS2:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 2
+; CHECK-NEXT:    [[IDXS3:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 3
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
+; CHECK-NEXT:    store <4 x i64> [[TMP7]], <4 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A0 = load i64, i64 *%A, align 8
+  %B0 = load i64, i64 *%B, align 8
+
+  %v1 = sub i64 %A0, 1
+  %v2 = sub i64 %B0, 1
+
+  %Add0 = add i64 %v1, %v2
+  %Add1 = add i64 %v2, %v1
+  %Add2 = add i64 %v2, %v1
+  %Add3 = add i64 %v1, %v2
+
+  %idxS0 = getelementptr inbounds i64, i64* %S, i64 0
+  %idxS1 = getelementptr inbounds i64, i64* %S, i64 1
+  %idxS2 = getelementptr inbounds i64, i64* %S, i64 2
+  %idxS3 = getelementptr inbounds i64, i64* %S, i64 3
+
+  store i64 %Add0, i64 *%idxS0, align 8
+  store i64 %Add1, i64 *%idxS1, align 8
+  store i64 %Add2, i64 *%idxS2, align 8
+  store i64 %Add3, i64 *%idxS3, align 8
+  ret void
+}
+
+; S[0] = %v1 + %v2
+; S[1] = %v3 + %v1
+; S[2] = %v5 + %v1
+; S[3] = %v1 + %v4
+;
+; TODO: We should broadcast %v1.
+;
+define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
+; CHECK-LABEL: @bcast_vals2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A0:%.*]] = load i16, i16* [[A:%.*]], align 8
+; CHECK-NEXT:    [[B0:%.*]] = load i16, i16* [[B:%.*]], align 8
+; CHECK-NEXT:    [[C0:%.*]] = load i16, i16* [[C:%.*]], align 8
+; CHECK-NEXT:    [[D0:%.*]] = load i16, i16* [[D:%.*]], align 8
+; CHECK-NEXT:    [[E0:%.*]] = load i16, i16* [[E:%.*]], align 8
+; CHECK-NEXT:    [[V1:%.*]] = sext i16 [[A0]] to i32
+; CHECK-NEXT:    [[V2:%.*]] = sext i16 [[B0]] to i32
+; CHECK-NEXT:    [[V3:%.*]] = sext i16 [[C0]] to i32
+; CHECK-NEXT:    [[V4:%.*]] = sext i16 [[D0]] to i32
+; CHECK-NEXT:    [[V5:%.*]] = sext i16 [[E0]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[V1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[V3]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V5]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[V1]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[V2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[V1]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V4]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP3]], [[TMP7]]
+; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
+; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1
+; CHECK-NEXT:    [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2
+; CHECK-NEXT:    [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %A0 = load i16, i16 *%A, align 8
+  %B0 = load i16, i16 *%B, align 8
+  %C0 = load i16, i16 *%C, align 8
+  %D0 = load i16, i16 *%D, align 8
+  %E0 = load i16, i16 *%E, align 8
+
+  %v1 = sext i16 %A0 to i32
+  %v2 = sext i16 %B0 to i32
+  %v3 = sext i16 %C0 to i32
+  %v4 = sext i16 %D0 to i32
+  %v5 = sext i16 %E0 to i32
+
+  %Add0 = add i32 %v1, %v2
+  %Add1 = add i32 %v3, %v1
+  %Add2 = add i32 %v5, %v1
+  %Add3 = add i32 %v1, %v4
+
+  %idxS0 = getelementptr inbounds i32, i32* %S, i64 0
+  %idxS1 = getelementptr inbounds i32, i32* %S, i64 1
+  %idxS2 = getelementptr inbounds i32, i32* %S, i64 2
+  %idxS3 = getelementptr inbounds i32, i32* %S, i64 3
+
+  store i32 %Add0, i32 *%idxS0, align 8
+  store i32 %Add1, i32 *%idxS1, align 8
+  store i32 %Add2, i32 *%idxS2, align 8
+  store i32 %Add3, i32 *%idxS3, align 8
+  ret void
+}

From 241dcb386e7dfd4f775f79dab5386d0803911dda Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Tue, 28 May 2019 11:21:59 +0000
Subject: [PATCH 0337/1176] Revert [ELF] Error on relocations to STT_SECTION
 symbols if the sections were discarded

This reverts r361792 (git commit cfca5095df0209c60109696d6cc368d49e2c5939), the
revision causes link errors internally, will share more details with the
author.

llvm-svn: 361806
---
 lld/ELF/InputFiles.cpp                         |  3 ---
 lld/ELF/InputSection.cpp                       |  3 +--
 lld/ELF/Relocations.cpp                        | 14 +++-----------
 lld/test/ELF/comdat-discarded-error.s          | 12 +-----------
 lld/test/ELF/comdat-discarded-reloc.s          |  2 +-
 lld/test/ELF/comdat.s                          |  4 +++-
 lld/test/ELF/invalid-undef-section-symbol.test |  2 +-
 lld/test/ELF/relocatable-eh-frame.s            |  4 +---
 8 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 16991421fe6ce..2b89533191a19 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -997,9 +997,6 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
 
       if (ESym.st_shndx == SHN_UNDEF)
         this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
-      else if (Sec == &InputSection::Discarded)
-        this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type,
-                                           /*DiscardedSecIdx=*/SecIdx);
       else
         this->Symbols[I] =
             make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 74878931afb40..07a30ed57c476 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -438,8 +438,7 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
       // hopefully creates a frame that is ignored at runtime.
       auto *D = dyn_cast<Defined>(&Sym);
       if (!D) {
-        warn("STT_SECTION symbol should be defined");
-        P->setSymbolAndType(0, 0, false);
+        error("STT_SECTION symbol should be defined");
         continue;
       }
       SectionBase *Section = D->Section->Repl;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index a8ed792164bc8..12c38c70dcc41 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -681,17 +681,9 @@ static std::string maybeReportDiscarded(Undefined &Sym, InputSectionBase &Sec,
     return "";
   ArrayRef<Elf_Shdr_Impl<ELFT>> ObjSections =
       CHECK(File->getObj().sections(), File);
-
-  std::string Msg;
-  if (Sym.Type == ELF::STT_SECTION) {
-    Msg = "relocation refers to a discarded section: ";
-    Msg += CHECK(
-        File->getObj().getSectionName(&ObjSections[Sym.DiscardedSecIdx]), File);
-  } else {
-    Msg = "relocation refers to a symbol in a discarded section: " +
-          toString(Sym);
-  }
-  Msg += "\n>>> defined in " + toString(File);
+  std::string Msg =
+      "relocation refers to a symbol in a discarded section: " + toString(Sym) +
+      "\n>>> defined in " + toString(File);
 
   Elf_Shdr_Impl<ELFT> ELFSec = ObjSections[Sym.DiscardedSecIdx - 1];
   if (ELFSec.sh_type != SHT_GROUP)
diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s
index 0f6b417b0fa4a..3584783cde09d 100644
--- a/lld/test/ELF/comdat-discarded-error.s
+++ b/lld/test/ELF/comdat-discarded-error.s
@@ -5,7 +5,7 @@
 # RUN: echo '.section .text.foo,"axG",@progbits,foo,comdat; .globl bar; bar:' | \
 # RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o
 
-# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK:      error: relocation refers to a symbol in a discarded section: bar
 # CHECK-NEXT: >>> defined in {{.*}}3.o
@@ -13,16 +13,6 @@
 # CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
 # CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x1)
 
-# CHECK:      error: relocation refers to a discarded section: .text.foo
-# CHECK-NEXT: >>> defined in {{.*}}1.o
-# CHECK-NEXT: >>> section group signature: foo
-# CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
-# CHECK-NEXT: >>> referenced by {{.*}}1.o:(.data+0x0)
-
 .globl _start
 _start:
   jmp bar
-
-.section .text.foo,"axG",@progbits,foo,comdat
-.data
-  .quad .text.foo
diff --git a/lld/test/ELF/comdat-discarded-reloc.s b/lld/test/ELF/comdat-discarded-reloc.s
index d12732cd3569b..d23baf386e92d 100644
--- a/lld/test/ELF/comdat-discarded-reloc.s
+++ b/lld/test/ELF/comdat-discarded-reloc.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat-discarded-reloc.s -o %t2.o
-# RUN: ld.lld -gc-sections --noinhibit-exec %t.o %t2.o -o /dev/null
+# RUN: ld.lld -gc-sections %t.o %t2.o -o %t
 
 ## ELF spec doesn't allow a relocation to point to a deduplicated
 ## COMDAT section. Unfortunately this happens in practice (e.g. .eh_frame)
diff --git a/lld/test/ELF/comdat.s b/lld/test/ELF/comdat.s
index 9e3f5a81d300e..86103e5d9eb75 100644
--- a/lld/test/ELF/comdat.s
+++ b/lld/test/ELF/comdat.s
@@ -1,7 +1,7 @@
 // REQUIRES: x86
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat.s -o %t2.o
-// RUN: ld.lld -shared %t.o %t2.o -o %t
+// RUN: ld.lld -shared %t.o %t.o %t2.o -o %t
 // RUN: llvm-objdump -d %t | FileCheck %s
 // RUN: llvm-readobj -S --symbols %t | FileCheck --check-prefix=READ %s
 
@@ -31,7 +31,9 @@ foo:
 // CHECK-EMPTY:
 // CHECK-NEXT: bar:
 // 0x1000 - 0x1001 - 5 = -6
+// 0      - 0x1006 - 5 = -4107
 // CHECK-NEXT:   1001:	{{.*}}  callq  -6
+// CHECK-NEXT:   1006:	{{.*}}  callq  -4107
 
         .section .text3,"axG",@progbits,zed,comdat,unique,0
 
diff --git a/lld/test/ELF/invalid-undef-section-symbol.test b/lld/test/ELF/invalid-undef-section-symbol.test
index 80e5a1464d740..1d66885eadf8e 100644
--- a/lld/test/ELF/invalid-undef-section-symbol.test
+++ b/lld/test/ELF/invalid-undef-section-symbol.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj %s -o %t.o
-# RUN: not ld.lld -r --fatal-warnings %t.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld -r %t.o -o /dev/null 2>&1 | FileCheck %s
 
 # We used to crash at this.
 # CHECK: STT_SECTION symbol should be defined
diff --git a/lld/test/ELF/relocatable-eh-frame.s b/lld/test/ELF/relocatable-eh-frame.s
index 6172dd355db4a..dee906acb87fb 100644
--- a/lld/test/ELF/relocatable-eh-frame.s
+++ b/lld/test/ELF/relocatable-eh-frame.s
@@ -1,12 +1,10 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld -r %t.o %t.o -o %t 2>&1 | FileCheck --check-prefix=WARN %s
+# RUN: ld.lld -r %t.o %t.o -o %t
 # RUN: llvm-readobj -r %t | FileCheck %s
 # RUN: ld.lld %t -o %t.so -shared
 # RUN: llvm-objdump -h %t.so | FileCheck --check-prefix=DSO %s
 
-# WARN: STT_SECTION symbol should be defined
-
 # DSO: .eh_frame     00000034
 
 # CHECK:      Relocations [

From 5b86163f62d18040614df8ad445012635029eba2 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 28 May 2019 11:24:20 +0000
Subject: [PATCH 0338/1176] Fix some llvm-readelf tests after r361633

They were failing on 32-bit Windows. In the cases where I've changed
test expectations, I've checked that they match the output of GNU
readelf.

llvm-svn: 361807
---
 llvm/test/tools/llvm-readobj/mips-got.test |  2 +-
 llvm/test/tools/llvm-readobj/mips-plt.test |  2 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp      | 13 +++++++------
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/mips-got.test b/llvm/test/tools/llvm-readobj/mips-got.test
index b3424dbc6f1f4..ee5ba2bfdc2ed 100644
--- a/llvm/test/tools/llvm-readobj/mips-got.test
+++ b/llvm/test/tools/llvm-readobj/mips-got.test
@@ -405,7 +405,7 @@ GNU-GOT-EXE:      PLT GOT:
 GNU-GOT-EXE:       Reserved entries:
 GNU-GOT-EXE-NEXT:    Address  Initial Purpose
 GNU-GOT-EXE-NEXT:   00410854 00000000 PLT lazy resolver
-GNU-GOT-EXE-NEXT:   00410894 80000000 Module pointer
+GNU-GOT-EXE-NEXT:   00410858 00000000 Module pointer
 
 GNU-GOT-EXE:       Entries:
 GNU-GOT-EXE-NEXT:    Address  Initial Sym.Val. Type    Ndx Name
diff --git a/llvm/test/tools/llvm-readobj/mips-plt.test b/llvm/test/tools/llvm-readobj/mips-plt.test
index 64c1f785d03ce..b130a67d0443f 100644
--- a/llvm/test/tools/llvm-readobj/mips-plt.test
+++ b/llvm/test/tools/llvm-readobj/mips-plt.test
@@ -56,7 +56,7 @@ GNU-NEXT: PLT GOT:
 GNU:       Reserved entries:
 GNU-NEXT:    Address  Initial Purpose
 GNU-NEXT:   00410814 00000000 PLT lazy resolver
-GNU-NEXT:   00410854 80000000 Module pointer
+GNU-NEXT:   00410818 00000000 Module pointer
 
 GNU:       Entries:
 GNU-NEXT:    Address  Initial Sym.Val. Type    Ndx Name
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 48dd47d10824a..8bc339eb37956 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3346,9 +3346,8 @@ template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
     OS << "  Tag        Type                 Name/Value\n";
   for (auto Entry : Table) {
     uintX_t Tag = Entry.getTag();
-    std::string TypeString = getTypeString(Obj->getHeader()->e_machine, Tag);
-    OS << format("  0x%0*x %-20s ", Is64 ? 16 : 8, Tag,
-                 ("(" + TypeString + ")").c_str());
+    std::string TypeString = std::string("(") + getTypeString(Obj->getHeader()->e_machine, Tag) + ")";
+    OS << "  " << format_hex(Tag, Is64 ? 18 : 10) << format(" %-20s ", TypeString.c_str());
     this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
     OS << "\n";
   }
@@ -4134,7 +4133,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   size_t Bias = ELFT::Is64Bits ? 8 : 0;
   auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
     OS.PadToColumn(2);
-    OS << format_hex_no_prefix(Parser.getGotAddress(E), 8 + Bias);
+    OS << format_hex_no_prefix(Parser.getPltAddress(E), 8 + Bias);
     OS.PadToColumn(11 + Bias);
     OS << format_hex_no_prefix(*E, 8 + Bias);
     OS.PadToColumn(20 + 2 * Bias);
@@ -4147,7 +4146,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   OS << "   Address  Initial Purpose\n";
   PrintEntry(Parser.getPltLazyResolver(), "PLT lazy resolver");
   if (Parser.getPltModulePointer())
-    PrintEntry(Parser.getGotModulePointer(), "Module pointer");
+    PrintEntry(Parser.getPltModulePointer(), "Module pointer");
 
   if (!Parser.getPltEntries().empty()) {
     OS << "\n";
@@ -4159,7 +4158,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
           Sym, this->dumper()->getDynamicStringTable(), false);
 
       OS.PadToColumn(2);
-      OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias));
+      OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias));
       OS.PadToColumn(11 + Bias);
       OS << to_string(format_hex_no_prefix(E, 8 + Bias));
       OS.PadToColumn(20 + 2 * Bias);
@@ -4919,6 +4918,7 @@ void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
 template <class ELFT>
 void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   auto PrintEntry = [&](const Elf_Addr *E) {
+		// XXX: here?
     W.printHex("Address", Parser.getGotAddress(E));
     W.printNumber("Access", Parser.getGotOffset(E));
     W.printHex("Initial", *E);
@@ -4983,6 +4983,7 @@ void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
 template <class ELFT>
 void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   auto PrintEntry = [&](const Elf_Addr *E) {
+		// XXX: here? no.
     W.printHex("Address", Parser.getPltAddress(E));
     W.printHex("Initial", *E);
   };

From 53f2f3286572cb879b3861d7c15480e4d830dd3b Mon Sep 17 00:00:00 2001
From: Yevgeny Rouban <yevgeny.rouban@azul.com>
Date: Tue, 28 May 2019 11:33:50 +0000
Subject: [PATCH 0339/1176] [CorrelatedValuePropagation] Fix prof
 branch_weights metadata handling for SwitchInst

This patch fixes the CorrelatedValuePropagation pass to keep
prof branch_weights metadata of SwitchInst consistent.
It makes use of SwitchInstProfUpdateWrapper.
New tests are added.

Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D62126

llvm-svn: 361808
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 117 ++++++++---------
 .../CorrelatedValuePropagation/profmd.ll      | 119 ++++++++++++++++++
 2 files changed, 180 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 4e4715be61aed..4cb4d21754a10 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -306,11 +306,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
 /// that cannot fire no matter what the incoming edge can safely be removed. If
 /// a case fires on every incoming edge then the entire switch can be removed
 /// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
                           DominatorTree *DT) {
   DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
-  Value *Cond = SI->getCondition();
-  BasicBlock *BB = SI->getParent();
+  Value *Cond = I->getCondition();
+  BasicBlock *BB = I->getParent();
 
   // If the condition was defined in same block as the switch then LazyValueInfo
   // currently won't say anything useful about it, though in theory it could.
@@ -327,67 +327,72 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
   for (auto *Succ : successors(BB))
     SuccessorsCount[Succ]++;
 
-  for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
-    ConstantInt *Case = CI->getCaseValue();
-
-    // Check to see if the switch condition is equal to/not equal to the case
-    // value on every incoming edge, equal/not equal being the same each time.
-    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
-    for (pred_iterator PI = PB; PI != PE; ++PI) {
-      // Is the switch condition equal to the case value?
-      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
-                                                              Cond, Case, *PI,
-                                                              BB, SI);
-      // Give up on this case if nothing is known.
-      if (Value == LazyValueInfo::Unknown) {
-        State = LazyValueInfo::Unknown;
-        break;
+  { // Scope for SwitchInstProfUpdateWrapper. It must not live during
+    // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
+    SwitchInstProfUpdateWrapper SI(*I);
+
+    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+      ConstantInt *Case = CI->getCaseValue();
+
+      // Check to see if the switch condition is equal to/not equal to the case
+      // value on every incoming edge, equal/not equal being the same each time.
+      LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        // Is the switch condition equal to the case value?
+        LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                                Cond, Case, *PI,
+                                                                BB, SI);
+        // Give up on this case if nothing is known.
+        if (Value == LazyValueInfo::Unknown) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
+
+        // If this was the first edge to be visited, record that all other edges
+        // need to give the same result.
+        if (PI == PB) {
+          State = Value;
+          continue;
+        }
+
+        // If this case is known to fire for some edges and known not to fire for
+        // others then there is nothing we can do - give up.
+        if (Value != State) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
       }
 
-      // If this was the first edge to be visited, record that all other edges
-      // need to give the same result.
-      if (PI == PB) {
-        State = Value;
+      if (State == LazyValueInfo::False) {
+        // This case never fires - remove it.
+        BasicBlock *Succ = CI->getCaseSuccessor();
+        Succ->removePredecessor(BB);
+        CI = SI.removeCase(CI);
+        CE = SI->case_end();
+
+        // The condition can be modified by removePredecessor's PHI simplification
+        // logic.
+        Cond = SI->getCondition();
+
+        ++NumDeadCases;
+        Changed = true;
+        if (--SuccessorsCount[Succ] == 0)
+          DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
         continue;
       }
-
-      // If this case is known to fire for some edges and known not to fire for
-      // others then there is nothing we can do - give up.
-      if (Value != State) {
-        State = LazyValueInfo::Unknown;
+      if (State == LazyValueInfo::True) {
+        // This case always fires.  Arrange for the switch to be turned into an
+        // unconditional branch by replacing the switch condition with the case
+        // value.
+        SI->setCondition(Case);
+        NumDeadCases += SI->getNumCases();
+        Changed = true;
         break;
       }
-    }
 
-    if (State == LazyValueInfo::False) {
-      // This case never fires - remove it.
-      BasicBlock *Succ = CI->getCaseSuccessor();
-      Succ->removePredecessor(BB);
-      CI = SI->removeCase(CI);
-      CE = SI->case_end();
-
-      // The condition can be modified by removePredecessor's PHI simplification
-      // logic.
-      Cond = SI->getCondition();
-
-      ++NumDeadCases;
-      Changed = true;
-      if (--SuccessorsCount[Succ] == 0)
-        DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
-      continue;
+      // Increment the case iterator since we didn't delete it.
+      ++CI;
     }
-    if (State == LazyValueInfo::True) {
-      // This case always fires.  Arrange for the switch to be turned into an
-      // unconditional branch by replacing the switch condition with the case
-      // value.
-      SI->setCondition(Case);
-      NumDeadCases += SI->getNumCases();
-      Changed = true;
-      break;
-    }
-
-    // Increment the case iterator since we didn't delete it.
-    ++CI;
   }
 
   if (Changed)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll b/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
new file mode 100644
index 0000000000000..493b4c2273e21
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; Removed several cases from switch.
+define i32 @switch1(i32 %s) {
+; CHECK-LABEL: @switch1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+; CHECK:       negative:
+; CHECK-NEXT:    switch i32 [[S]], label [[OUT]] [
+; CHECK-NEXT:    i32 -2, label [[NEXT:%.*]]
+; CHECK-NEXT:    i32 -1, label [[NEXT]]
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 1, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  i32 2, label %out
+  i32 3, label %out
+; CHECK-NEXT: !prof ![[MD0:[0-9]+]]
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6}
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+; Removed all cases from switch.
+define i32 @switch2(i32 %s) {
+; CHECK-LABEL: @switch2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+; Change switch into conditional branch.
+define i32 @switch3(i32 %s) {
+; CHECK-LABEL: @switch3(
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+; CHECK:      positive:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %s, 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEXT:%.*]], label [[OUT:%.*]], !prof ![[MD1:[0-9]+]]
+  switch i32 %s, label %out [
+  i32 1, label %next
+  i32 -1, label %next
+  i32 -2, label %next
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+; Removed all cases from switch.
+define i32 @switch4(i32 %s) {
+; CHECK-LABEL: @switch4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+; CHECK:       negative:
+; CHECK-NEXT:    br label %out
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 1, label %out
+  i32 2, label %out
+  i32 3, label %out
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4}
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+}
+
+; CHECK: ![[MD0]] = !{!"branch_weights", i32 99, i32 4, i32 3}
+; CHECK: ![[MD1]] = !{!"branch_weights", i32 1, i32 99}

From 2255b31cec4206769630ae6f9801491317bd11ec Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Tue, 28 May 2019 11:54:01 +0000
Subject: [PATCH 0340/1176] [clang-tidy] Fix null pointer dereference in
 readability-identifier-naming

Summary:
readability-identifier-naming causes a null pointer dereference when checking an identifier introduced by a structured binding whose right hand side is an undeclared identifier.

Running the check on a file that is just the following results in a crash:
```
auto [left] = right;
```

Patch by Mark Stegeman!

Reviewers: alexfh, hokein, aaron.ballman, JonasToth

Reviewed By: hokein, aaron.ballman

Subscribers: madsravn, xazax.hun, cfe-commits

Tags: #clang-tools-extra, #clang

Differential Revision: https://reviews.llvm.org/D62404

llvm-svn: 361809
---
 .../clang-tidy/readability/IdentifierNamingCheck.cpp     | 9 +++++----
 .../clang-tidy/readability-identifier-naming-bugfix.cpp  | 5 +++++
 2 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 clang-tools-extra/test/clang-tidy/readability-identifier-naming-bugfix.cpp

diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
index 7e56fe16d9b38..1bdfe2124e90c 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
@@ -800,10 +800,11 @@ void IdentifierNamingCheck::check(const MatchFinder::MatchResult &Result) {
 
     // Fix type aliases in value declarations
     if (const auto *Value = Result.Nodes.getNodeAs<ValueDecl>("decl")) {
-      if (const auto *Typedef =
-              Value->getType().getTypePtr()->getAs<TypedefType>()) {
-        addUsage(NamingCheckFailures, Typedef->getDecl(),
-                 Value->getSourceRange());
+      if (const auto *TypePtr = Value->getType().getTypePtrOrNull()) {
+        if (const auto *Typedef = TypePtr->getAs<TypedefType>()) {
+          addUsage(NamingCheckFailures, Typedef->getDecl(),
+                   Value->getSourceRange());
+        }
       }
     }
 
diff --git a/clang-tools-extra/test/clang-tidy/readability-identifier-naming-bugfix.cpp b/clang-tools-extra/test/clang-tidy/readability-identifier-naming-bugfix.cpp
new file mode 100644
index 0000000000000..9f892ebceccf2
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/readability-identifier-naming-bugfix.cpp
@@ -0,0 +1,5 @@
+// RUN: %check_clang_tidy -expect-clang-tidy-error %s readability-identifier-naming %t
+
+// This used to cause a null pointer dereference.
+auto [left] = right;
+// CHECK-MESSAGES: :[[@LINE-1]]:15: error: use of undeclared identifier 'right'

From 4df2baadd28225310f16d41e61093a5290fb94b4 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Tue, 28 May 2019 12:06:26 +0000
Subject: [PATCH 0341/1176] [ARM] Use CHECK-NEXT in CodeGen/ARM/O3-pipeline.ll.
 NFC.

Use CHECK-NEXT, like in other pipeline tests, so that we actually
notice when the pipeline is changed.

llvm-svn: 361810
---
 llvm/test/CodeGen/ARM/O3-pipeline.ll | 290 +++++++++++++--------------
 1 file changed, 145 insertions(+), 145 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 20bb06ed4be29..094dd6d0644bc 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -2,148 +2,148 @@
 
 ; REQUIRES: asserts
 
-; CHECK:  ModulePass Manager
-; CHECK:    Pre-ISel Intrinsic Lowering
-; CHECK:    FunctionPass Manager
-; CHECK:      Expand Atomic instructions
-; CHECK:      Simplify the CFG
-; CHECK:      Dominator Tree Construction
-; CHECK:      Basic Alias Analysis (stateless AA impl)
-; CHECK:      Module Verifier
-; CHECK:      Natural Loop Information
-; CHECK:      Canonicalize natural loops
-; CHECK:      Scalar Evolution Analysis
-; CHECK:      Loop Pass Manager
-; CHECK:        Induction Variable Users
-; CHECK:        Loop Strength Reduction
-; CHECK:      Basic Alias Analysis (stateless AA impl)
-; CHECK:      Function Alias Analysis Results
-; CHECK:      Merge contiguous icmps into a memcmp
-; CHECK:      Expand memcmp() to load/stores
-; CHECK:      Lower Garbage Collection Instructions
-; CHECK:      Shadow Stack GC Lowering
-; CHECK:      Remove unreachable blocks from the CFG
-; CHECK:      Dominator Tree Construction
-; CHECK:      Natural Loop Information
-; CHECK:      Branch Probability Analysis
-; CHECK:      Block Frequency Analysis
-; CHECK:      Constant Hoisting
-; CHECK:      Partially inline calls to library functions
-; CHECK:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
-; CHECK:      Scalarize Masked Memory Intrinsics
-; CHECK:      Expand reduction intrinsics
-; CHECK:      Dominator Tree Construction
-; CHECK:      Natural Loop Information
-; CHECK:      Scalar Evolution Analysis
-; CHECK:      Basic Alias Analysis (stateless AA impl)
-; CHECK:      Function Alias Analysis Results
-; CHECK:      Loop Pass Manager
-; CHECK:        Transform loops to use DSP intrinsics
-; CHECK:      Interleaved Access Pass
-; CHECK:      ARM IR optimizations
-; CHECK:      Dominator Tree Construction
-; CHECK:      Natural Loop Information
-; CHECK:      CodeGen Prepare
-; CHECK:    Rewrite Symbols
-; CHECK:    FunctionPass Manager
-; CHECK:      Dominator Tree Construction
-; CHECK:      Exception handling preparation
-; CHECK:      Merge internal globals
-; CHECK:      Safe Stack instrumentation pass
-; CHECK:      Insert stack protectors
-; CHECK:      Module Verifier
-; CHECK:      Dominator Tree Construction
-; CHECK:      Basic Alias Analysis (stateless AA impl)
-; CHECK:      Function Alias Analysis Results
-; CHECK:      Natural Loop Information
-; CHECK:      Branch Probability Analysis
-; CHECK:      ARM Instruction Selection
-; CHECK:      Expand ISel Pseudo-instructions
-; CHECK:      Early Tail Duplication
-; CHECK:      Optimize machine instruction PHIs
-; CHECK:      Slot index numbering
-; CHECK:      Merge disjoint stack slots
-; CHECK:      Local Stack Slot Allocation
-; CHECK:      Remove dead machine instructions
-; CHECK:      MachineDominator Tree Construction
-; CHECK:      Machine Natural Loop Construction
-; CHECK:      Early Machine Loop Invariant Code Motion
-; CHECK:      Machine Common Subexpression Elimination
-; CHECK:      MachinePostDominator Tree Construction
-; CHECK:      Machine Block Frequency Analysis
-; CHECK:      Machine code sinking
-; CHECK:      Peephole Optimizations
-; CHECK:      Remove dead machine instructions
-; CHECK:      ARM MLA / MLS expansion pass
-; CHECK:      ARM pre- register allocation load / store optimization pass
-; CHECK:      ARM A15 S->D optimizer
-; CHECK:      Detect Dead Lanes
-; CHECK:      Process Implicit Definitions
-; CHECK:      Remove unreachable machine basic blocks
-; CHECK:      Live Variable Analysis
-; CHECK:      MachineDominator Tree Construction
-; CHECK:      Machine Natural Loop Construction
-; CHECK:      Eliminate PHI nodes for register allocation
-; CHECK:      Two-Address instruction pass
-; CHECK:      Slot index numbering
-; CHECK:      Live Interval Analysis
-; CHECK:      Simple Register Coalescing
-; CHECK:      Rename Disconnected Subregister Components
-; CHECK:      Machine Instruction Scheduler
-; CHECK:      Machine Block Frequency Analysis
-; CHECK:      Debug Variable Analysis
-; CHECK:      Live Stack Slot Analysis
-; CHECK:      Virtual Register Map
-; CHECK:      Live Register Matrix
-; CHECK:      Bundle Machine CFG Edges
-; CHECK:      Spill Code Placement Analysis
-; CHECK:      Lazy Machine Block Frequency Analysis
-; CHECK:      Machine Optimization Remark Emitter
-; CHECK:      Greedy Register Allocator
-; CHECK:      Virtual Register Rewriter
-; CHECK:      Stack Slot Coloring
-; CHECK:      Machine Copy Propagation Pass
-; CHECK:      Machine Loop Invariant Code Motion
-; CHECK:      PostRA Machine Sink
-; CHECK:      Machine Block Frequency Analysis
-; CHECK:      MachinePostDominator Tree Construction
-; CHECK:      Lazy Machine Block Frequency Analysis
-; CHECK:      Machine Optimization Remark Emitter
-; CHECK:      Shrink Wrapping analysis
-; CHECK:      Prologue/Epilogue Insertion & Frame Finalization
-; CHECK:      Control Flow Optimizer
-; CHECK:      Tail Duplication
-; CHECK:      Machine Copy Propagation Pass
-; CHECK:      Post-RA pseudo instruction expansion pass
-; CHECK:      ARM load / store optimization pass
-; CHECK:      ReachingDefAnalysis
-; CHECK:      ARM Execution Domain Fix
-; CHECK:      BreakFalseDeps
-; CHECK:      ARM pseudo instruction expansion pass
-; CHECK:      Thumb2 instruction size reduce pass
-; CHECK:      MachineDominator Tree Construction
-; CHECK:      Machine Natural Loop Construction
-; CHECK:      Machine Block Frequency Analysis
-; CHECK:      If Converter
-; CHECK:      Thumb IT blocks insertion pass
-; CHECK:      MachineDominator Tree Construction
-; CHECK:      Machine Natural Loop Construction
-; CHECK:      Post RA top-down list latency scheduler
-; CHECK:      Analyze Machine Code For Garbage Collection
-; CHECK:      Machine Block Frequency Analysis
-; CHECK:      MachinePostDominator Tree Construction
-; CHECK:      Branch Probability Basic Block Placement
-; CHECK:      Thumb2 instruction size reduce pass
-; CHECK:      Unpack machine instruction bundles
-; CHECK:      optimise barriers pass
-; CHECK:      ARM constant island placement and branch shortening pass
-; CHECK:      Contiguously Lay Out Funclets
-; CHECK:      StackMap Liveness Analysis
-; CHECK:      Live DEBUG_VALUE analysis
-; CHECK:      Insert fentry calls
-; CHECK:      Insert XRay ops
-; CHECK:      Implement the 'patchable-function' attribute
-; CHECK:      Lazy Machine Block Frequency Analysis
-; CHECK:      Machine Optimization Remark Emitter
-; CHECK:      ARM Assembly Printer
-; CHECK:      Free MachineFunction
+; CHECK:       ModulePass Manager
+; CHECK-NEXT:    Pre-ISel Intrinsic Lowering
+; CHECK-NEXT:    FunctionPass Manager
+; CHECK-NEXT:      Expand Atomic instructions
+; CHECK-NEXT:      Simplify the CFG
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:      Module Verifier
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      Canonicalize natural loops
+; CHECK-NEXT:      Scalar Evolution Analysis
+; CHECK-NEXT:      Loop Pass Manager
+; CHECK-NEXT:        Induction Variable Users
+; CHECK-NEXT:        Loop Strength Reduction
+; CHECK-NEXT:      Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:      Function Alias Analysis Results
+; CHECK-NEXT:      Merge contiguous icmps into a memcmp
+; CHECK-NEXT:      Expand memcmp() to load/stores
+; CHECK-NEXT:      Lower Garbage Collection Instructions
+; CHECK-NEXT:      Shadow Stack GC Lowering
+; CHECK-NEXT:      Remove unreachable blocks from the CFG
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      Branch Probability Analysis
+; CHECK-NEXT:      Block Frequency Analysis
+; CHECK-NEXT:      Constant Hoisting
+; CHECK-NEXT:      Partially inline calls to library functions
+; CHECK-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; CHECK-NEXT:      Scalarize Masked Memory Intrinsics
+; CHECK-NEXT:      Expand reduction intrinsics
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      Scalar Evolution Analysis
+; CHECK-NEXT:      Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:      Function Alias Analysis Results
+; CHECK-NEXT:      Loop Pass Manager
+; CHECK-NEXT:        Transform loops to use DSP intrinsics
+; CHECK-NEXT:      Interleaved Access Pass
+; CHECK-NEXT:      ARM IR optimizations
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      CodeGen Prepare
+; CHECK-NEXT:    Rewrite Symbols
+; CHECK-NEXT:    FunctionPass Manager
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Exception handling preparation
+; CHECK-NEXT:      Merge internal globals
+; CHECK-NEXT:      Safe Stack instrumentation pass
+; CHECK-NEXT:      Insert stack protectors
+; CHECK-NEXT:      Module Verifier
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:      Function Alias Analysis Results
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      Branch Probability Analysis
+; CHECK-NEXT:      ARM Instruction Selection
+; CHECK-NEXT:      Expand ISel Pseudo-instructions
+; CHECK-NEXT:      Early Tail Duplication
+; CHECK-NEXT:      Optimize machine instruction PHIs
+; CHECK-NEXT:      Slot index numbering
+; CHECK-NEXT:      Merge disjoint stack slots
+; CHECK-NEXT:      Local Stack Slot Allocation
+; CHECK-NEXT:      Remove dead machine instructions
+; CHECK-NEXT:      MachineDominator Tree Construction
+; CHECK-NEXT:      Machine Natural Loop Construction
+; CHECK-NEXT:      Early Machine Loop Invariant Code Motion
+; CHECK-NEXT:      Machine Common Subexpression Elimination
+; CHECK-NEXT:      MachinePostDominator Tree Construction
+; CHECK-NEXT:      Machine Block Frequency Analysis
+; CHECK-NEXT:      Machine code sinking
+; CHECK-NEXT:      Peephole Optimizations
+; CHECK-NEXT:      Remove dead machine instructions
+; CHECK-NEXT:      ARM MLA / MLS expansion pass
+; CHECK-NEXT:      ARM pre- register allocation load / store optimization pass
+; CHECK-NEXT:      ARM A15 S->D optimizer
+; CHECK-NEXT:      Detect Dead Lanes
+; CHECK-NEXT:      Process Implicit Definitions
+; CHECK-NEXT:      Remove unreachable machine basic blocks
+; CHECK-NEXT:      Live Variable Analysis
+; CHECK-NEXT:      MachineDominator Tree Construction
+; CHECK-NEXT:      Machine Natural Loop Construction
+; CHECK-NEXT:      Eliminate PHI nodes for register allocation
+; CHECK-NEXT:      Two-Address instruction pass
+; CHECK-NEXT:      Slot index numbering
+; CHECK-NEXT:      Live Interval Analysis
+; CHECK-NEXT:      Simple Register Coalescing
+; CHECK-NEXT:      Rename Disconnected Subregister Components
+; CHECK-NEXT:      Machine Instruction Scheduler
+; CHECK-NEXT:      Machine Block Frequency Analysis
+; CHECK-NEXT:      Debug Variable Analysis
+; CHECK-NEXT:      Live Stack Slot Analysis
+; CHECK-NEXT:      Virtual Register Map
+; CHECK-NEXT:      Live Register Matrix
+; CHECK-NEXT:      Bundle Machine CFG Edges
+; CHECK-NEXT:      Spill Code Placement Analysis
+; CHECK-NEXT:      Lazy Machine Block Frequency Analysis
+; CHECK-NEXT:      Machine Optimization Remark Emitter
+; CHECK-NEXT:      Greedy Register Allocator
+; CHECK-NEXT:      Virtual Register Rewriter
+; CHECK-NEXT:      Stack Slot Coloring
+; CHECK-NEXT:      Machine Copy Propagation Pass
+; CHECK-NEXT:      Machine Loop Invariant Code Motion
+; CHECK-NEXT:      PostRA Machine Sink
+; CHECK-NEXT:      Machine Block Frequency Analysis
+; CHECK-NEXT:      MachinePostDominator Tree Construction
+; CHECK-NEXT:      Lazy Machine Block Frequency Analysis
+; CHECK-NEXT:      Machine Optimization Remark Emitter
+; CHECK-NEXT:      Shrink Wrapping analysis
+; CHECK-NEXT:      Prologue/Epilogue Insertion & Frame Finalization
+; CHECK-NEXT:      Control Flow Optimizer
+; CHECK-NEXT:      Tail Duplication
+; CHECK-NEXT:      Machine Copy Propagation Pass
+; CHECK-NEXT:      Post-RA pseudo instruction expansion pass
+; CHECK-NEXT:      ARM load / store optimization pass
+; CHECK-NEXT:      ReachingDefAnalysis
+; CHECK-NEXT:      ARM Execution Domain Fix
+; CHECK-NEXT:      BreakFalseDeps
+; CHECK-NEXT:      ARM pseudo instruction expansion pass
+; CHECK-NEXT:      Thumb2 instruction size reduce pass
+; CHECK-NEXT:      MachineDominator Tree Construction
+; CHECK-NEXT:      Machine Natural Loop Construction
+; CHECK-NEXT:      Machine Block Frequency Analysis
+; CHECK-NEXT:      If Converter
+; CHECK-NEXT:      Thumb IT blocks insertion pass
+; CHECK-NEXT:      MachineDominator Tree Construction
+; CHECK-NEXT:      Machine Natural Loop Construction
+; CHECK-NEXT:      Post RA top-down list latency scheduler
+; CHECK-NEXT:      Analyze Machine Code For Garbage Collection
+; CHECK-NEXT:      Machine Block Frequency Analysis
+; CHECK-NEXT:      MachinePostDominator Tree Construction
+; CHECK-NEXT:      Branch Probability Basic Block Placement
+; CHECK-NEXT:      Thumb2 instruction size reduce pass
+; CHECK-NEXT:      Unpack machine instruction bundles
+; CHECK-NEXT:      optimise barriers pass
+; CHECK-NEXT:      ARM constant island placement and branch shortening pass
+; CHECK-NEXT:      Contiguously Lay Out Funclets
+; CHECK-NEXT:      StackMap Liveness Analysis
+; CHECK-NEXT:      Live DEBUG_VALUE analysis
+; CHECK-NEXT:      Insert fentry calls
+; CHECK-NEXT:      Insert XRay ops
+; CHECK-NEXT:      Implement the 'patchable-function' attribute
+; CHECK-NEXT:      Lazy Machine Block Frequency Analysis
+; CHECK-NEXT:      Machine Optimization Remark Emitter
+; CHECK-NEXT:      ARM Assembly Printer
+; CHECK-NEXT:      Free MachineFunction

From d936e40575c4f770853efd681d8066411b3e34ff Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 28 May 2019 12:19:38 +0000
Subject: [PATCH 0342/1176] Re-commit r357452 (take 2): "SimplifyCFG
 SinkCommonCodeFromPredecessors: Also sink function calls without used results
 (PR41259)"

This was reverted in r360086 as it was supected of causing mysterious test
failures internally. However, it was never concluded that this patch was the
root cause.

> The code was previously checking that candidates for sinking had exactly
> one use or were a store instruction (which can't have uses). This meant
> we could sink call instructions only if they had a use.
>
> That limitation seemed a bit arbitrary, so this patch changes it to
> "instruction has zero or one use" which seems more natural and removes
> the need to special-case stores.
>
> Differential revision: https://reviews.llvm.org/D59936

llvm-svn: 361811
---
 clang/test/CodeGenCXX/nrvo.cpp                |  1 -
 .../CodeGenCXX/stack-reuse-exceptions.cpp     |  2 +-
 clang/test/CodeGenObjC/exceptions.m           |  2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 29 ++++++-----
 llvm/test/CodeGen/AArch64/max-jump-table.ll   | 48 ++++++++---------
 llvm/test/CodeGen/AArch64/min-jump-table.ll   | 30 +++++------
 llvm/test/CodeGen/AArch64/win64-jumptable.ll  | 52 +++++++++----------
 llvm/test/CodeGen/ARM/cmpxchg-idioms.ll       |  4 +-
 .../SimplifyCFG/sink-common-code.ll           | 44 ++++++++++++++++
 9 files changed, 128 insertions(+), 84 deletions(-)

diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp
index 0f359b9c90007..221857402988a 100644
--- a/clang/test/CodeGenCXX/nrvo.cpp
+++ b/clang/test/CodeGenCXX/nrvo.cpp
@@ -60,7 +60,6 @@ X test2(bool B) {
   // CHECK-NEXT: call void @llvm.lifetime.start
   // CHECK-NEXT: call {{.*}} @_ZN1XC1Ev
   // CHECK: call {{.*}} @_ZN1XC1ERKS_
-  // CHECK: call {{.*}} @_ZN1XC1ERKS_
   // CHECK: call {{.*}} @_ZN1XD1Ev
   // CHECK-NEXT: call void @llvm.lifetime.end
   // CHECK: call {{.*}} @_ZN1XD1Ev
diff --git a/clang/test/CodeGenCXX/stack-reuse-exceptions.cpp b/clang/test/CodeGenCXX/stack-reuse-exceptions.cpp
index de870c5305048..2d968db3fdbd7 100644
--- a/clang/test/CodeGenCXX/stack-reuse-exceptions.cpp
+++ b/clang/test/CodeGenCXX/stack-reuse-exceptions.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -o - -emit-llvm -O1 \
-// RUN:     -fexceptions -fcxx-exceptions | FileCheck %s
+// RUN:     -fexceptions -fcxx-exceptions -mllvm -simplifycfg-sink-common=false | FileCheck %s
 //
 // We should emit lifetime.ends for these temporaries in both the 'exception'
 // and 'normal' paths in functions.
diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m
index 439b9401485f0..741f8a8191586 100644
--- a/clang/test/CodeGenObjC/exceptions.m
+++ b/clang/test/CodeGenObjC/exceptions.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 -emit-llvm -fobjc-exceptions -O2 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 -emit-llvm -fobjc-exceptions -mllvm -simplifycfg-sink-common=false -O2 -o - %s | FileCheck %s
 //
 // <rdar://problem/7471679> [irgen] [eh] Exception code built with clang (x86_64) crashes
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 90b552035af3d..69df6549cb1f1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1445,9 +1445,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
 static bool canSinkInstructions(
     ArrayRef<Instruction *> Insts,
     DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
-  // Prune out obviously bad instructions to move. Any non-store instruction
-  // must have exactly one use, and we check later that use is by a single,
-  // common PHI instruction in the successor.
+  // Prune out obviously bad instructions to move. Each instruction must have
+  // exactly zero or one use, and we check later that use is by a single, common
+  // PHI instruction in the successor.
+  bool HasUse = !Insts.front()->user_empty();
   for (auto *I : Insts) {
     // These instructions may change or break semantics if moved.
     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1461,9 +1462,10 @@ static bool canSinkInstructions(
       if (C->isInlineAsm())
         return false;
 
-    // Everything must have only one use too, apart from stores which
-    // have no uses.
-    if (!isa<StoreInst>(I) && !I->hasOneUse())
+    // Each instruction must have zero or one use.
+    if (HasUse && !I->hasOneUse())
+      return false;
+    if (!HasUse && !I->user_empty())
       return false;
   }
 
@@ -1472,11 +1474,11 @@ static bool canSinkInstructions(
     if (!I->isSameOperationAs(I0))
       return false;
 
-  // All instructions in Insts are known to be the same opcode. If they aren't
-  // stores, check the only user of each is a PHI or in the same block as the
-  // instruction, because if a user is in the same block as an instruction
-  // we're contemplating sinking, it must already be determined to be sinkable.
-  if (!isa<StoreInst>(I0)) {
+  // All instructions in Insts are known to be the same opcode. If they have a
+  // use, check that the only user is a PHI or in the same block as the
+  // instruction, because if a user is in the same block as an instruction we're
+  // contemplating sinking, it must already be determined to be sinkable.
+  if (HasUse) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
     if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1554,7 +1556,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
   // it is slightly over-aggressive - it gets confused by commutative instructions
   // so double-check it here.
   Instruction *I0 = Insts.front();
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
           auto *U = cast<Instruction>(*I->user_begin());
@@ -1612,11 +1614,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
       I0->andIRFlags(I);
     }
 
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     // canSinkLastInstruction checked that all instructions were used by
     // one and only one PHI node. Find that now, RAUW it to our common
     // instruction and nuke it.
-    assert(I0->hasOneUse());
     auto *PN = cast<PHINode>(*I0->user_begin());
     PN->replaceAllUsesWith(I0);
     PN->eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/max-jump-table.ll b/llvm/test/CodeGen/AArch64/max-jump-table.ll
index 44dde7b1cd066..f309efe95b5b6 100644
--- a/llvm/test/CodeGen/AArch64/max-jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/max-jump-table.ll
@@ -4,7 +4,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt1(i32 %a, i32 %b) {
 entry:
@@ -45,23 +45,23 @@ entry:
 ; CHECKM3-NEXT: %jump-table.0:
 ; CHECKM3-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
-bb9: tail call void @ext(i32 16) br label %return
-bb10: tail call void @ext(i32 18) br label %return
-bb11: tail call void @ext(i32 20) br label %return
-bb12: tail call void @ext(i32 22) br label %return
-bb13: tail call void @ext(i32 24) br label %return
-bb14: tail call void @ext(i32 26) br label %return
-bb15: tail call void @ext(i32 28) br label %return
-bb16: tail call void @ext(i32 30) br label %return
-bb17: tail call void @ext(i32 32) br label %return
+bb1: tail call void  @ext(i32 1, i32 0) br label %return
+bb2: tail call void  @ext(i32 2, i32 2) br label %return
+bb3: tail call void  @ext(i32 3, i32 4) br label %return
+bb4: tail call void  @ext(i32 4, i32 6) br label %return
+bb5: tail call void  @ext(i32 5, i32 8) br label %return
+bb6: tail call void  @ext(i32 6, i32 10) br label %return
+bb7: tail call void  @ext(i32 7, i32 12) br label %return
+bb8: tail call void  @ext(i32 8, i32 14) br label %return
+bb9: tail call void  @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 1, i32 18) br label %return
+bb11: tail call void @ext(i32 2, i32 20) br label %return
+bb12: tail call void @ext(i32 3, i32 22) br label %return
+bb13: tail call void @ext(i32 4, i32 24) br label %return
+bb14: tail call void @ext(i32 5, i32 26) br label %return
+bb15: tail call void @ext(i32 6, i32 28) br label %return
+bb16: tail call void @ext(i32 7, i32 30) br label %return
+bb17: tail call void @ext(i32 8, i32 32) br label %return
 
 return: ret i32 %b
 }
@@ -91,11 +91,11 @@ entry:
 ; CHECKM3-NOT: %jump-table.1
 ; CHECK-DAG: End machine code for function jt2.
 
-bb1: tail call void @ext(i32 1) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 3) br label %return
-bb4: tail call void @ext(i32 4) br label %return
-bb5: tail call void @ext(i32 5) br label %return
-bb6: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 6, i32 1) br label %return
+bb2: tail call void @ext(i32 5, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 3) br label %return
+bb4: tail call void @ext(i32 3, i32 4) br label %return
+bb5: tail call void @ext(i32 2, i32 5) br label %return
+bb6: tail call void @ext(i32 1, i32 6) br label %return
 return: ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll
index 7d6d26259af00..8d16a4d9d6ad5 100644
--- a/llvm/test/CodeGen/AArch64/min-jump-table.ll
+++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll
@@ -2,7 +2,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt2(i32 %a, i32 %b) {
 entry:
@@ -17,8 +17,8 @@ entry:
 ; CHECK4-NOT: {{^}}Jump Tables:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
 
 return: ret i32 %b
 }
@@ -40,10 +40,10 @@ entry:
 ; CHECK4-NOT: %jump-table.1:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 3, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 4) br label %return
+bb4: tail call void @ext(i32 5, i32 6) br label %return
 
 return: ret i32 %b
 }
@@ -65,14 +65,14 @@ entry:
 ; CHECK-NEXT: %jump-table.0:
 ; CHECK-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
 
 return: ret i32 %b
 }
diff --git a/llvm/test/CodeGen/AArch64/win64-jumptable.ll b/llvm/test/CodeGen/AArch64/win64-jumptable.ll
index 4eb86ea663fcd..6a9752687aaf5 100644
--- a/llvm/test/CodeGen/AArch64/win64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/win64-jumptable.ll
@@ -10,43 +10,43 @@ entry:
     i32 3, label %sw.bb3
   ]
 
-sw.bb:                                            ; preds = %entry
-  tail call void @g(i32 0) #2
+sw.bb:
+  tail call void @g(i32 0, i32 4)
   br label %sw.epilog
 
-sw.bb1:                                           ; preds = %entry
-  tail call void @g(i32 1) #2
+sw.bb1:
+  tail call void @g(i32 1, i32 5)
   br label %sw.epilog
 
-sw.bb2:                                           ; preds = %entry
-  tail call void @g(i32 2) #2
+sw.bb2:
+  tail call void @g(i32 2, i32 6)
   br label %sw.epilog
 
-sw.bb3:                                           ; preds = %entry
-  tail call void @g(i32 3) #2
+sw.bb3:
+  tail call void @g(i32 3, i32 7)
   br label %sw.epilog
 
-sw.epilog:                                        ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
-  tail call void @g(i32 10) #2
+sw.epilog:
+  tail call void @g(i32 10, i32 8)
   ret void
 }
 
-declare void @g(i32)
-
-; CHECK:		.text
-; CHECK:		f:
-; CHECK:		.seh_proc f
-; CHECK:		b	g
-; CHECK-NEXT:	.p2align	2
-; CHECK-NEXT:	.LJTI0_0:
-; CHECK:		.word	.LBB0_2-.LJTI0_0
-; CHECK:		.word	.LBB0_3-.LJTI0_0
-; CHECK:		.word	.LBB0_4-.LJTI0_0
-; CHECK:		.word	.LBB0_5-.LJTI0_0
-; CHECK:		.section	.xdata,"dr"
-; CHECK:		.seh_handlerdata
-; CHECK:		.text
-; CHECK:		.seh_endproc
+declare void @g(i32, i32)
+
+; CHECK:    .text
+; CHECK:    f:
+; CHECK:    .seh_proc f
+; CHECK:    b g
+; CHECK-NEXT: .p2align  2
+; CHECK-NEXT: .LJTI0_0:
+; CHECK:    .word .LBB0_2-.LJTI0_0
+; CHECK:    .word .LBB0_3-.LJTI0_0
+; CHECK:    .word .LBB0_4-.LJTI0_0
+; CHECK:    .word .LBB0_5-.LJTI0_0
+; CHECK:    .section  .xdata,"dr"
+; CHECK:    .seh_handlerdata
+; CHECK:    .text
+; CHECK:    .seh_endproc
 
 ; Check that we can emit an object file with correct unwind info.
 ; UNWIND: FunctionLength: {{[1-9][0-9]*}}
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
index 283202f0cc1f6..1af80e7d0c871 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -20,14 +20,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
 ; CHECK: clrex
-; CHECK: dmb ish
 ; CHECK: movs r0, #0
+; CHECK: dmb ish
 ; CHECK: bx lr
 
 ; CHECK: [[SUCCESS]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
 ; CHECK: movs r0, #1
+; CHECK: dmb ish
 ; CHECK: bx lr
 
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
index 02c29bd354024..12a3e59cd3776 100644
--- a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -843,6 +843,50 @@ if.end:
 ; CHECK: insertvalue
 ; CHECK-NOT: insertvalue
 
+
+declare void @baz(i32)
+
+define void @test_sink_void_calls(i32 %x) {
+entry:
+  switch i32 %x, label %default [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+  ]
+bb0:
+  call void @baz(i32 12)
+  br label %return
+bb1:
+  call void @baz(i32 34)
+  br label %return
+bb2:
+  call void @baz(i32 56)
+  br label %return
+bb3:
+  call void @baz(i32 78)
+  br label %return
+bb4:
+  call void @baz(i32 90)
+  br label %return
+default:
+  unreachable
+return:
+  ret void
+
+; Check that the calls get sunk to the return block.
+; We would previously not sink calls without uses, see PR41259.
+; CHECK-LABEL: @test_sink_void_calls
+; CHECK-NOT: call
+; CHECK-LABEL: return:
+; CHECK: phi
+; CHECK: call
+; CHECK-NOT: call
+; CHECK: ret
+}
+
+
 ; CHECK: ![[$TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0}
 ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]}
 ; CHECK: ![[TEXT]] = !{!"an example type tree"}

From 80343a348b8efeced7a8190765caba4753ef6d3f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 28 May 2019 12:30:35 +0000
Subject: [PATCH 0343/1176] Cleanups for r361807 that I somehow failed to
 commit

llvm-svn: 361812
---
 llvm/tools/llvm-readobj/ELFDumper.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 8bc339eb37956..150e98df8bb0e 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3346,8 +3346,11 @@ template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
     OS << "  Tag        Type                 Name/Value\n";
   for (auto Entry : Table) {
     uintX_t Tag = Entry.getTag();
-    std::string TypeString = std::string("(") + getTypeString(Obj->getHeader()->e_machine, Tag) + ")";
-    OS << "  " << format_hex(Tag, Is64 ? 18 : 10) << format(" %-20s ", TypeString.c_str());
+    std::string TypeString = std::string("(") +
+                             getTypeString(Obj->getHeader()->e_machine, Tag) +
+                             ")";
+    OS << "  " << format_hex(Tag, Is64 ? 18 : 10)
+       << format(" %-20s ", TypeString.c_str());
     this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
     OS << "\n";
   }
@@ -4918,7 +4921,6 @@ void LLVMStyle<ELFT>::printELFLinkerOptions(const ELFFile<ELFT> *Obj) {
 template <class ELFT>
 void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
   auto PrintEntry = [&](const Elf_Addr *E) {
-		// XXX: here?
     W.printHex("Address", Parser.getGotAddress(E));
     W.printNumber("Access", Parser.getGotOffset(E));
     W.printHex("Initial", *E);
@@ -4983,7 +4985,6 @@ void LLVMStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
 template <class ELFT>
 void LLVMStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
   auto PrintEntry = [&](const Elf_Addr *E) {
-		// XXX: here? no.
     W.printHex("Address", Parser.getPltAddress(E));
     W.printHex("Initial", *E);
   };

From 19e91253c0a5e021697f9271c299d6816cbab642 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter@arm.com>
Date: Tue, 28 May 2019 12:36:39 +0000
Subject: [PATCH 0344/1176] [NFC] Test commit, delete trailing whitespace

llvm-svn: 361813
---
 llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 77cc6cf80135f..a4b78f2a7d6b0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -666,7 +666,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
-  if (!UseMaskForCond && !UseMaskForGaps && 
+  if (!UseMaskForCond && !UseMaskForGaps &&
       Factor <= TLI->getMaxSupportedInterleaveFactor()) {
     unsigned NumElts = VecTy->getVectorNumElements();
     auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);

From 57e267a2e92a7744df043c740cb946952c05ede8 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 28 May 2019 12:52:57 +0000
Subject: [PATCH 0345/1176] [X86] Custom lower CONCAT_VECTORS of v2i1

The generic legalizer cannot handle this. Add an assert instead of
silently miscompiling vectors with elements smaller than 8 bits.

llvm-svn: 361814
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   1 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   9 +-
 .../test/CodeGen/X86/avx512-insert-extract.ll | 104 ++++++++++++++++++
 llvm/test/CodeGen/X86/vec_saddo.ll            |   8 +-
 llvm/test/CodeGen/X86/vec_smulo.ll            |  46 ++++----
 llvm/test/CodeGen/X86/vec_ssubo.ll            |   8 +-
 llvm/test/CodeGen/X86/vec_uaddo.ll            |   8 +-
 llvm/test/CodeGen/X86/vec_umulo.ll            |   8 +-
 llvm/test/CodeGen/X86/vec_usubo.ll            |   8 +-
 9 files changed, 154 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 52ae1e01a9ef8..19baf178f121d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1415,6 +1415,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
   unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+  assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
   // Store (in the right endianness) the elements to memory.
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
     // Ignore undef elements.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47b8e9eac2d21..0bc31d5d516bf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1357,19 +1357,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SSUBSAT,          VT, Custom);
 
       setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS,   VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,   VT,  Custom);
       setOperationAction(ISD::VSELECT,          VT,  Expand);
     }
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v2i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1,  Custom);
-    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
     for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   }
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index b81c829052c6a..8bdd7dc2c1df8 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -2252,3 +2252,107 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
   %t4 = bitcast <128 x i1> %t3 to i128
   ret i128 %t4
 }
+
+define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
+; KNL-LABEL: test_concat_v2i1:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    movswl (%rdi), %eax
+; KNL-NEXT:    vmovd %eax, %xmm0
+; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0
+; KNL-NEXT:    movswl 2(%rdi), %eax
+; KNL-NEXT:    vmovd %eax, %xmm1
+; KNL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; KNL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; KNL-NEXT:    vucomiss %xmm2, %xmm1
+; KNL-NEXT:    setb %al
+; KNL-NEXT:    kmovw %eax, %k0
+; KNL-NEXT:    kshiftlw $1, %k0, %k0
+; KNL-NEXT:    vucomiss %xmm2, %xmm0
+; KNL-NEXT:    setb %al
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    korw %k0, %k1, %k0
+; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; KNL-NEXT:    vucomiss %xmm2, %xmm1
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    kshiftlw $1, %k1, %k1
+; KNL-NEXT:    vucomiss %xmm2, %xmm0
+; KNL-NEXT:    seta %al
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    kmovw %eax, %k2
+; KNL-NEXT:    korw %k1, %k2, %k1
+; KNL-NEXT:    kandw %k1, %k0, %k1
+; KNL-NEXT:    kshiftrw $1, %k1, %k2
+; KNL-NEXT:    movswl (%rsi), %eax
+; KNL-NEXT:    vmovd %eax, %xmm0
+; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0
+; KNL-NEXT:    movswl 2(%rsi), %eax
+; KNL-NEXT:    vmovd %eax, %xmm1
+; KNL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; KNL-NEXT:    vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; KNL-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; KNL-NEXT:    vmovd %xmm0, %eax
+; KNL-NEXT:    movw %ax, (%rdx)
+; KNL-NEXT:    vcvtps2ph $4, %xmm1, %xmm0
+; KNL-NEXT:    vmovd %xmm0, %eax
+; KNL-NEXT:    movw %ax, 2(%rdx)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_concat_v2i1:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    movswl (%rdi), %eax
+; SKX-NEXT:    vmovd %eax, %xmm0
+; SKX-NEXT:    vcvtph2ps %xmm0, %xmm0
+; SKX-NEXT:    movswl 2(%rdi), %eax
+; SKX-NEXT:    vmovd %eax, %xmm1
+; SKX-NEXT:    vcvtph2ps %xmm1, %xmm1
+; SKX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SKX-NEXT:    vucomiss %xmm2, %xmm1
+; SKX-NEXT:    setb %al
+; SKX-NEXT:    kmovd %eax, %k0
+; SKX-NEXT:    kshiftlb $1, %k0, %k0
+; SKX-NEXT:    vucomiss %xmm2, %xmm0
+; SKX-NEXT:    setb %al
+; SKX-NEXT:    kmovd %eax, %k1
+; SKX-NEXT:    kshiftlb $7, %k1, %k1
+; SKX-NEXT:    kshiftrb $7, %k1, %k1
+; SKX-NEXT:    korw %k0, %k1, %k0
+; SKX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; SKX-NEXT:    vucomiss %xmm2, %xmm1
+; SKX-NEXT:    seta %al
+; SKX-NEXT:    kmovd %eax, %k1
+; SKX-NEXT:    kshiftlb $1, %k1, %k1
+; SKX-NEXT:    vucomiss %xmm2, %xmm0
+; SKX-NEXT:    seta %al
+; SKX-NEXT:    kmovd %eax, %k2
+; SKX-NEXT:    kshiftlb $7, %k2, %k2
+; SKX-NEXT:    kshiftrb $7, %k2, %k2
+; SKX-NEXT:    korw %k1, %k2, %k1
+; SKX-NEXT:    kandw %k1, %k0, %k1
+; SKX-NEXT:    kshiftrb $1, %k1, %k2
+; SKX-NEXT:    movswl (%rsi), %eax
+; SKX-NEXT:    vmovd %eax, %xmm0
+; SKX-NEXT:    vcvtph2ps %xmm0, %xmm0
+; SKX-NEXT:    movswl 2(%rsi), %eax
+; SKX-NEXT:    vmovd %eax, %xmm1
+; SKX-NEXT:    vcvtph2ps %xmm1, %xmm1
+; SKX-NEXT:    vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; SKX-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; SKX-NEXT:    vmovd %xmm0, %eax
+; SKX-NEXT:    movw %ax, (%rdx)
+; SKX-NEXT:    vcvtps2ph $4, %xmm1, %xmm0
+; SKX-NEXT:    vmovd %xmm0, %eax
+; SKX-NEXT:    movw %ax, 2(%rdx)
+; SKX-NEXT:    retq
+  %tmp = load <2 x half>, <2 x half>* %arg, align 8
+  %tmp3 = fcmp fast olt <2 x half> %tmp, <half 0xH4600, half 0xH4600>
+  %tmp4 = fcmp fast ogt <2 x half> %tmp, zeroinitializer
+  %tmp5 = and <2 x i1> %tmp3, %tmp4
+  %tmp6 = load <2 x half>, <2 x half>* %arg1, align 8
+  %tmp7 = select <2 x i1> %tmp5, <2 x half> %tmp6, <2 x half> zeroinitializer
+  store <2 x half> %tmp7, <2 x half>* %arg2, align 8
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index a9e5697d9325e..aeb1951fbef87 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -1871,7 +1871,8 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    cmpb %al, %cl
 ; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    andb %bl, %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    testq %r9, %r9
 ; AVX512-NEXT:    setns %al
 ; AVX512-NEXT:    testq %rsi, %rsi
@@ -1884,8 +1885,9 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    cmpb %bl, %cl
 ; AVX512-NEXT:    setne %cl
 ; AVX512-NEXT:    andb %al, %cl
-; AVX512-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT:    andl $1, %ecx
+; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
 ; AVX512-NEXT:    movq %r14, 24(%r10)
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index 8b716b7f1ecad..ab97c51df410b 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -2706,44 +2706,42 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    pushq %r13
 ; AVX512-NEXT:    pushq %r12
 ; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    subq $40, %rsp
-; AVX512-NEXT:    movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT:    movq %r8, %r15
-; AVX512-NEXT:    movq %rdx, %rax
-; AVX512-NEXT:    movq %rsi, %r12
-; AVX512-NEXT:    movq %rdi, %rbx
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r14
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT:    subq $24, %rsp
+; AVX512-NEXT:    movq %r8, %rax
+; AVX512-NEXT:    movq %rcx, %r14
+; AVX512-NEXT:    movq %rdx, %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
 ; AVX512-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
-; AVX512-NEXT:    movq %rax, %rdi
-; AVX512-NEXT:    movq %rcx, %rsi
+; AVX512-NEXT:    movq %rax, %rdx
 ; AVX512-NEXT:    movq %r9, %rcx
 ; AVX512-NEXT:    callq __muloti4
 ; AVX512-NEXT:    movq %rax, %r13
 ; AVX512-NEXT:    movq %rdx, %rbp
-; AVX512-NEXT:    cmpq $0, {{[0-9]+}}(%rsp)
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    movb %al, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; AVX512-NEXT:    movq %rbx, %rdi
-; AVX512-NEXT:    movq %r12, %rsi
-; AVX512-NEXT:    movq %r15, %rdx
-; AVX512-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT:    movq %r14, %rsi
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT:    movq %r12, %rcx
 ; AVX512-NEXT:    callq __muloti4
 ; AVX512-NEXT:    cmpq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    setne %cl
-; AVX512-NEXT:    movb %cl, {{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512-NEXT:    movq %rbp, 24(%r14)
-; AVX512-NEXT:    movq %r13, 16(%r14)
-; AVX512-NEXT:    movq %rdx, 8(%r14)
-; AVX512-NEXT:    movq %rax, (%r14)
+; AVX512-NEXT:    kmovd %ecx, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
+; AVX512-NEXT:    cmpq $0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    setne %cl
+; AVX512-NEXT:    andl $1, %ecx
+; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
+; AVX512-NEXT:    movq %rdx, 24(%r15)
+; AVX512-NEXT:    movq %rax, 16(%r15)
+; AVX512-NEXT:    movq %rbp, 8(%r15)
+; AVX512-NEXT:    movq %r13, (%r15)
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    addq $40, %rsp
+; AVX512-NEXT:    addq $24, %rsp
 ; AVX512-NEXT:    popq %rbx
 ; AVX512-NEXT:    popq %r12
 ; AVX512-NEXT:    popq %r13
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 51192ed45d457..15c0531d67a75 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -1910,7 +1910,8 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    cmpb %al, %cl
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    andb %bl, %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    testq %r9, %r9
 ; AVX512-NEXT:    setns %al
 ; AVX512-NEXT:    testq %rsi, %rsi
@@ -1923,8 +1924,9 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    cmpb %bl, %cl
 ; AVX512-NEXT:    setne %cl
 ; AVX512-NEXT:    andb %al, %cl
-; AVX512-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT:    andl $1, %ecx
+; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
 ; AVX512-NEXT:    movq %r14, 24(%r10)
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 36dc931173105..41a0e258e3d12 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -1336,12 +1336,14 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; AVX512-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
 ; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    addq %r8, %rdi
 ; AVX512-NEXT:    adcq %r9, %rsi
 ; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
 ; AVX512-NEXT:    movq %rcx, 24(%r10)
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 6f37183c05371..0c95b73853e96 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -2575,7 +2575,8 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    setb %al
 ; AVX512-NEXT:    orb %cl, %al
 ; AVX512-NEXT:    orb %r13b, %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    testq %r9, %r9
 ; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    testq %rsi, %rsi
@@ -2597,8 +2598,9 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    setb %sil
 ; AVX512-NEXT:    orb %bl, %sil
 ; AVX512-NEXT:    orb %cl, %sil
-; AVX512-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT:    andl $1, %esi
+; AVX512-NEXT:    kmovw %esi, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    movq %r10, 16(%r14)
 ; AVX512-NEXT:    movq %rax, (%r14)
 ; AVX512-NEXT:    movq %r15, 24(%r14)
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index 5c843dc504ca7..b662ac45caf60 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -1378,12 +1378,14 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; AVX512-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
 ; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    subq %r8, %rdi
 ; AVX512-NEXT:    sbbq %r9, %rsi
 ; AVX512-NEXT:    setb %al
-; AVX512-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
+; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
 ; AVX512-NEXT:    movq %rcx, 24(%r10)

From 6bf4ca9d2e1c7a239832114d9f8521e8d9ff3b96 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 28 May 2019 12:58:07 +0000
Subject: [PATCH 0346/1176] [x86] fix 256-bit vector store splitting to honor
 'volatile'

Forking this out of the discussion in D62498
(and assuming that will be committed later, so adding the helper function here).
The LangRef says:
"the backend should never split or merge target-legal volatile load/store instructions."

Differential Revision: https://reviews.llvm.org/D62506

llvm-svn: 361815
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 44 +++++++++++++++++--------
 llvm/test/CodeGen/X86/avx-load-store.ll |  6 ++--
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0bc31d5d516bf..7b4ce08b57860 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21022,6 +21022,35 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
 }
 
+/// Change a 256-bit vector store into a pair of 128-bit vector stores.
+static SDValue split256BitStore(StoreSDNode *Store, SelectionDAG &DAG) {
+  SDValue StoredVal = Store->getValue();
+  assert(StoredVal.getValueType().is256BitVector() && "Expecting 256-bit op");
+
+  // Splitting volatile memory ops is not allowed unless the operation was not
+  // legal to begin with. We are assuming the input op is legal (this transform
+  // is only used for targets with AVX).
+  if (Store->isVolatile())
+    return SDValue();
+
+  MVT StoreVT = StoredVal.getSimpleValueType();
+  unsigned NumElems = StoreVT.getVectorNumElements();
+  SDLoc DL(Store);
+  SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, DL);
+  SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, DL);
+  SDValue Ptr0 = Store->getBasePtr();
+  SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, DL);
+  unsigned Alignment = Store->getAlignment();
+  SDValue Ch0 =
+      DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
+                   Alignment, Store->getMemOperand()->getFlags());
+  SDValue Ch1 =
+      DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
+                   Store->getPointerInfo().getWithOffset(16),
+                   MinAlign(Alignment, 16), Store->getMemOperand()->getFlags());
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
+}
+
 static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
                           SelectionDAG &DAG) {
   StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
@@ -39345,20 +39374,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     if (NumElems < 2)
       return SDValue();
 
-    SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
-    SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);
-
-    SDValue Ptr0 = St->getBasePtr();
-    SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);
-
-    SDValue Ch0 =
-        DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
-                     Alignment, St->getMemOperand()->getFlags());
-    SDValue Ch1 =
-        DAG.getStore(St->getChain(), dl, Value1, Ptr1,
-                     St->getPointerInfo().getWithOffset(16),
-                     MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+    return split256BitStore(St, DAG);
   }
 
   // Optimize trunc store (of multiple scalars) to shuffle and store.
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 1fd4e07961dbe..7bd39f4d1d379 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -187,8 +187,10 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp
 define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind {
 ; CHECK-LABEL: double_save_volatile:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovaps %xmm1, 16(%rdi)
-; CHECK-NEXT:    vmovaps %xmm0, (%rdi)
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vmovups %ymm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 ;
 ; CHECK_O0-LABEL: double_save_volatile:

From 14857814343a27ba48221cd2edbe38c52b1b1c85 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Tue, 28 May 2019 13:00:52 +0000
Subject: [PATCH 0347/1176] [IRBuilder] Add CreateUnOp(...) to the IRBuilder to
 support unary FNeg

Also update UnaryOperator to support isa, cast, and dyn_cast.

Differential Revision: https://reviews.llvm.org/D62417

llvm-svn: 361816
---
 llvm/include/llvm/Analysis/TargetFolder.h |  4 ++++
 llvm/include/llvm/IR/ConstantFolder.h     |  4 ++++
 llvm/include/llvm/IR/IRBuilder.h          | 11 +++++++++++
 llvm/include/llvm/IR/InstrTypes.h         | 11 ++++++++++-
 llvm/include/llvm/IR/NoFolder.h           |  4 ++++
 llvm/unittests/IR/IRBuilderTest.cpp       | 12 ++++++++++++
 6 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index b8047a46b93d9..7ab6562be4404 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -124,6 +124,10 @@ class TargetFolder {
     return Fold(ConstantExpr::getNot(C));
   }
 
+  Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return Fold(ConstantExpr::get(Opc, C));
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index 1971cb854e340..5a5cabfd02064 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -134,6 +134,10 @@ class ConstantFolder {
     return ConstantExpr::getNot(C);
   }
 
+  Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return ConstantExpr::get(Opc, C);
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index c2090a2186ae6..d052666354f79 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1372,6 +1372,17 @@ class IRBuilder : public IRBuilderBase, public Inserter {
     return Insert(BinaryOperator::CreateNot(V), Name);
   }
 
+  Value *CreateUnOp(Instruction::UnaryOps Opc,
+                    Value *V, const Twine &Name = "",
+                    MDNode *FPMathTag = nullptr) {
+    if (auto *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateUnOp(Opc, VC), Name);
+    Instruction *UnOp = UnaryOperator::Create(Opc, V);
+    if (isa<FPMathOperator>(UnOp))
+      UnOp = setFPAttrs(UnOp, FPMathTag, FMF);
+    return Insert(UnOp, Name);
+  }
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index a595707f3a614..7ffa7a6f60e8f 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -77,7 +77,8 @@ class UnaryInstruction : public Instruction {
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Alloca ||
+    return I->isUnaryOp() ||
+           I->getOpcode() == Instruction::Alloca ||
            I->getOpcode() == Instruction::Load ||
            I->getOpcode() == Instruction::VAArg ||
            I->getOpcode() == Instruction::ExtractValue ||
@@ -156,6 +157,14 @@ class UnaryOperator : public UnaryInstruction {
   UnaryOps getOpcode() const {
     return static_cast<UnaryOps>(Instruction::getOpcode());
   }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const Instruction *I) {
+    return I->isUnaryOp();
+  }
+  static bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index 7fd303933c228..0e3c19f4947f4 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -203,6 +203,10 @@ class NoFolder {
     return BinaryOperator::CreateNot(C);
   }
 
+  Instruction *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+    return UnaryOperator::Create(Opc, C);
+  }
+
   //===--------------------------------------------------------------------===//
   // Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp
index 80744061e0ced..51397ec745fb3 100644
--- a/llvm/unittests/IR/IRBuilderTest.cpp
+++ b/llvm/unittests/IR/IRBuilderTest.cpp
@@ -202,6 +202,18 @@ TEST_F(IRBuilderTest, GetIntTy) {
   delete DL;
 }
 
+TEST_F(IRBuilderTest, UnaryOperators) {
+  IRBuilder<NoFolder> Builder(BB);
+  Value *V = Builder.CreateLoad(GV->getValueType(), GV);
+
+  // Test CreateUnOp
+  Value *U = Builder.CreateUnOp(Instruction::FNeg, V);
+  ASSERT_TRUE(isa<Instruction>(U));
+  ASSERT_TRUE(isa<FPMathOperator>(U));
+  ASSERT_TRUE(isa<UnaryOperator>(U));
+  ASSERT_FALSE(isa<BinaryOperator>(U));
+}
+
 TEST_F(IRBuilderTest, FastMathFlags) {
   IRBuilder<> Builder(BB);
   Value *F, *FC;

From c0f43bee37f94df5384378d5ed99bd89c767871c Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Tue, 28 May 2019 13:04:47 +0000
Subject: [PATCH 0348/1176] Follow up of r361810: test case fix attempt for
 Windows builder

llvm-svn: 361817
---
 llvm/test/CodeGen/ARM/O3-pipeline.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 094dd6d0644bc..501877fb3f72d 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc -mtriple=arm -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s
 
 ; REQUIRES: asserts
 

From 9ed4b316d13f887b64bb4c129d462374e208d29e Mon Sep 17 00:00:00 2001
From: Adam Balogh <adam.balogh@ericsson.com>
Date: Tue, 28 May 2019 13:07:09 +0000
Subject: [PATCH 0349/1176] [Analyzer] Replace `CXXSelfAssignmentBRVisitor`
 with `NoteTags`

The `cplusplus.SelfAssignment` checker has a visitor that is added
to every `BugReport` to mark the to branch of the self assignment
operator with e.g. `rhs == *this` and `rhs != *this`. With the new
`NoteTag` feature this visitor is not needed anymore. Instead the
checker itself marks the two branches using the `NoteTag`s.

Differential Revision: https://reviews.llvm.org/D62479

llvm-svn: 361818
---
 .../Core/BugReporter/BugReporterVisitors.h    | 14 -----
 .../Checkers/CXXSelfAssignmentChecker.cpp     | 20 ++++++-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp |  1 -
 .../Core/BugReporterVisitors.cpp              | 53 -------------------
 .../StaticAnalyzer/Core/PathDiagnostic.cpp    |  9 ++++
 5 files changed, 27 insertions(+), 70 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
index e624d0fc026c4..1a09714a65b25 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -294,20 +294,6 @@ class SuppressInlineDefensiveChecksVisitor final : public BugReporterVisitor {
                                                  BugReport &BR) override;
 };
 
-class CXXSelfAssignmentBRVisitor final : public BugReporterVisitor {
-  bool Satisfied = false;
-
-public:
-  CXXSelfAssignmentBRVisitor() = default;
-
-  void Profile(llvm::FoldingSetNodeID &ID) const override {}
-
-  std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *Succ,
-                                                 BugReporterContext &BRC,
-                                                 BugReport &BR) override;
-};
-
-
 /// The bug visitor will walk all the nodes in a path and collect all the
 /// constraints. When it reaches the root node, will create a refutation
 /// manager and check if the constraints are satisfiable
diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp
index 1233849b1733c..01f5b9c889e32 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CXXSelfAssignmentChecker.cpp
@@ -50,10 +50,26 @@ void CXXSelfAssignmentChecker::checkBeginFunction(CheckerContext &C) const {
       State->getSVal(SVB.getCXXThis(MD, LCtx->getStackFrame()));
   auto Param = SVB.makeLoc(State->getRegion(MD->getParamDecl(0), LCtx));
   auto ParamVal = State->getSVal(Param);
+
   ProgramStateRef SelfAssignState = State->bindLoc(Param, ThisVal, LCtx);
-  C.addTransition(SelfAssignState);
+  const NoteTag *SelfAssignTag =
+    C.getNoteTag([MD](BugReport &BR) -> std::string {
+        SmallString<256> Msg;
+        llvm::raw_svector_ostream Out(Msg);
+        Out << "Assuming " << MD->getParamDecl(0)->getName() << " == *this";
+        return Out.str();
+      });
+  C.addTransition(SelfAssignState, SelfAssignTag);
+
   ProgramStateRef NonSelfAssignState = State->bindLoc(Param, ParamVal, LCtx);
-  C.addTransition(NonSelfAssignState);
+  const NoteTag *NonSelfAssignTag =
+    C.getNoteTag([MD](BugReport &BR) -> std::string {
+        SmallString<256> Msg;
+        llvm::raw_svector_ostream Out(Msg);
+        Out << "Assuming " << MD->getParamDecl(0)->getName() << " != *this";
+        return Out.str();
+      });
+  C.addTransition(NonSelfAssignState, NonSelfAssignTag);
 }
 
 void ento::registerCXXSelfAssignmentChecker(CheckerManager &Mgr) {
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index cc93675344e14..738ad9a062fc3 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2610,7 +2610,6 @@ std::pair<BugReport*, std::unique_ptr<VisitorsDiagnosticsTy>> findValidReport(
     // Register additional node visitors.
     R->addVisitor(llvm::make_unique<NilReceiverBRVisitor>());
     R->addVisitor(llvm::make_unique<ConditionBRVisitor>());
-    R->addVisitor(llvm::make_unique<CXXSelfAssignmentBRVisitor>());
     R->addVisitor(llvm::make_unique<TagVisitor>());
 
     BugReporterContext BRC(Reporter, ErrorGraph.BackMap);
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index 21320b1cdd884..d11ecd95802a3 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -2371,59 +2371,6 @@ UndefOrNullArgVisitor::VisitNode(const ExplodedNode *N,
   return nullptr;
 }
 
-std::shared_ptr<PathDiagnosticPiece>
-CXXSelfAssignmentBRVisitor::VisitNode(const ExplodedNode *Succ,
-                                      BugReporterContext &BRC, BugReport &) {
-  if (Satisfied)
-    return nullptr;
-
-  const auto Edge = Succ->getLocation().getAs<BlockEdge>();
-  if (!Edge.hasValue())
-    return nullptr;
-
-  auto Tag = Edge->getTag();
-  if (!Tag)
-    return nullptr;
-
-  if (Tag->getTagDescription() != "cplusplus.SelfAssignment")
-    return nullptr;
-
-  Satisfied = true;
-
-  const auto *Met =
-      dyn_cast<CXXMethodDecl>(Succ->getCodeDecl().getAsFunction());
-  assert(Met && "Not a C++ method.");
-  assert((Met->isCopyAssignmentOperator() || Met->isMoveAssignmentOperator()) &&
-         "Not a copy/move assignment operator.");
-
-  const auto *LCtx = Edge->getLocationContext();
-
-  const auto &State = Succ->getState();
-  auto &SVB = State->getStateManager().getSValBuilder();
-
-  const auto Param =
-      State->getSVal(State->getRegion(Met->getParamDecl(0), LCtx));
-  const auto This =
-      State->getSVal(SVB.getCXXThis(Met, LCtx->getStackFrame()));
-
-  auto L = PathDiagnosticLocation::create(Met, BRC.getSourceManager());
-
-  if (!L.isValid() || !L.asLocation().isValid())
-    return nullptr;
-
-  SmallString<256> Buf;
-  llvm::raw_svector_ostream Out(Buf);
-
-  Out << "Assuming " << Met->getParamDecl(0)->getName() <<
-    ((Param == This) ? " == " : " != ") << "*this";
-
-  auto Piece = std::make_shared<PathDiagnosticEventPiece>(L, Out.str());
-  Piece->addRange(Met->getSourceRange());
-
-  return std::move(Piece);
-}
-
-
 FalsePositiveRefutationBRVisitor::FalsePositiveRefutationBRVisitor()
     : Constraints(ConstraintRangeTy::Factory().getEmptyMap()) {}
 
diff --git a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index b3008479fe358..5889a979661ce 100644
--- a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -732,6 +732,15 @@ PathDiagnosticLocation::create(const ProgramPoint& P,
 
     } else {
       S = BSrc->getTerminatorCondition();
+      if (!S) {
+        // If the BlockEdge has no terminator condition statement but its
+        // source is the entry of the CFG (e.g. a checker crated the branch at
+        // the beginning of a function), use the function's declaration instead.
+        assert(BSrc == &BSrc->getParent()->getEntry() && "CFGBlock has no "
+               "TerminatorCondition and is not the enrty block of the CFG");
+        return PathDiagnosticLocation::createBegin(
+            P.getLocationContext()->getDecl(), SMng);
+      }
     }
   } else if (Optional<StmtPoint> SP = P.getAs<StmtPoint>()) {
     S = SP->getStmt();

From d3ed418ad3c4bd233b926e586888e177d4fa82be Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 28 May 2019 13:08:31 +0000
Subject: [PATCH 0350/1176] MIR: Fix printer crashing on dead CSR frame indexes

llvm-svn: 361819
---
 llvm/lib/CodeGen/MIRPrinter.cpp               |  3 ++
 .../CodeGen/AMDGPU/mir-print-dead-csr-fi.mir  | 28 +++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/mir-print-dead-csr-fi.mir

diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 86e3f53608c68..1d95187d597c5 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -403,6 +403,9 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
   }
 
   for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+    if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+      continue;
+
     yaml::StringValue Reg;
     printRegMIR(CSInfo.getReg(), Reg, TRI);
     if (!CSInfo.isSpilledToReg()) {
diff --git a/llvm/test/CodeGen/AMDGPU/mir-print-dead-csr-fi.mir b/llvm/test/CodeGen/AMDGPU/mir-print-dead-csr-fi.mir
new file mode 100644
index 0000000000000..cccf2c113ebe5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mir-print-dead-csr-fi.mir
@@ -0,0 +1,28 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=prologepilog -o - %s | FileCheck %s
+
+# Make sure the MIR printer doesn't crash when there are dead frame indexes. The
+# CSR SGPR frame indexes are inserted, but deleted.
+
+# CHECK-LABEL: name: csr_sgpr
+# CHECK: fixedStack: []
+# CHECK: stack: []
+---
+name:            csr_sgpr
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr30_sgpr31' }
+frameInfo:
+  maxAlignment:    4
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  scratchWaveOffsetReg: '$sgpr4'
+  frameOffsetReg:  '$sgpr5'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31
+
+    INLINEASM &"; clobber s42", 1, 12, implicit-def dead early-clobber $sgpr42
+    S_SETPC_B64_return $sgpr30_sgpr31
+
+...

From 5d0e6b6755da38084809d38bf527164a7f285ecd Mon Sep 17 00:00:00 2001
From: David Stenberg <david.stenberg@ericsson.com>
Date: Tue, 28 May 2019 13:23:25 +0000
Subject: [PATCH 0351/1176] Stop undef fragments from closing non-overlapping
 fragments

Summary:
When DwarfDebug::buildLocationList() encountered an undef debug value,
it would truncate all open values, regardless if they were overlapping or
not. This patch fixes so that it only does that for overlapping fragments.

This change unearthed a bug that I had introduced in D57511,
which I have fixed in this patch. The code in DebugHandlerBase that
changes labels for parameter debug values could break DwarfDebug's
assumption that the labels for the entries in the debug value history
are monotonically increasing. Before this patch, that bug could result
in location list entries whose ending address was lower than the
beginning address, and with the changes for undef debug values that this
patch introduces it could trigger an assertion, due to attempting to
emit location list entries with empty ranges. A reproducer for the bug
is added in param-reg-const-mix.mir.

Reviewers: aprantl, jmorse, probinson

Reviewed By: aprantl

Subscribers: javed.absar, llvm-commits

Tags: #debug-info, #llvm

Differential Revision: https://reviews.llvm.org/D62379

llvm-svn: 361820
---
 llvm/include/llvm/CodeGen/MachineInstr.h      |  6 ++
 .../CodeGen/AsmPrinter/DebugHandlerBase.cpp   |  9 +-
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    | 22 ++---
 .../DebugInfo/MIR/ARM/param-reg-const-mix.mir | 96 +++++++++++++++++++
 llvm/test/DebugInfo/X86/undef-fragment.ll     | 78 +++++++++++++++
 5 files changed, 197 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/DebugInfo/MIR/ARM/param-reg-const-mix.mir
 create mode 100644 llvm/test/DebugInfo/X86/undef-fragment.ll

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 221510f47a0c5..42889dc31682d 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1005,6 +1005,12 @@ class MachineInstr
       && getOperand(1).isImm();
   }
 
+  /// Return true if the instruction is a debug value which describes a part of
+  /// a variable as unavailable.
+  bool isUndefDebugValue() const {
+    return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg();
+  }
+
   bool isPHI() const {
     return getOpcode() == TargetOpcode::PHI ||
            getOpcode() == TargetOpcode::G_PHI;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 22c28cccd8983..22f458e4b03e5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -246,8 +246,13 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
                                        Pred.getInstr()->getDebugExpression());
                           }))
             break;
-          if (!IsDescribedByReg(I->getInstr()))
-            LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
+          // The code that generates location lists for DWARF assumes that the
+          // entries' start labels are monotonically increasing, and since we
+          // don't change the label for fragments that are described by
+          // registers, we must bail out when encountering such a fragment.
+          if (IsDescribedByReg(I->getInstr()))
+            break;
+          LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
         }
       }
     }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 070b1b64a36bb..f5501a1f0ef1d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1139,16 +1139,6 @@ void DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
   for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
     const MachineInstr *Instr = EI->getInstr();
 
-    if (EI->isDbgValue()) {
-      // Check if a variable is inaccessible in this range.
-      // TODO: This should only truncate open ranges that are overlapping.
-      if (Instr->getNumOperands() > 1 &&
-          Instr->getOperand(0).isReg() && !Instr->getOperand(0).getReg()) {
-        OpenRanges.clear();
-        continue;
-      }
-    }
-
     // Remove all values that are no longer live.
     size_t Index = std::distance(EB, EI);
     auto Last =
@@ -1177,8 +1167,16 @@ void DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     // If this history map entry has a debug value, add that to the list of
     // open ranges.
     if (EI->isDbgValue()) {
-      auto Value = getDebugLocValue(Instr);
-      OpenRanges.emplace_back(EI->getEndIndex(), Value);
+      // Do not add undef debug values, as they are redundant information in
+      // the location list entries. An undef debug results in an empty location
+      // description. If there are any non-undef fragments then padding pieces
+      // with empty location descriptions will automatically be inserted, and if
+      // all fragments are undef then the whole location list entry is
+      // redundant.
+      if (!Instr->isUndefDebugValue()) {
+        auto Value = getDebugLocValue(Instr);
+        OpenRanges.emplace_back(EI->getEndIndex(), Value);
+      }
     }
 
     // Location list entries with empty location descriptions are redundant
diff --git a/llvm/test/DebugInfo/MIR/ARM/param-reg-const-mix.mir b/llvm/test/DebugInfo/MIR/ARM/param-reg-const-mix.mir
new file mode 100644
index 0000000000000..99511280f223e
--- /dev/null
+++ b/llvm/test/DebugInfo/MIR/ARM/param-reg-const-mix.mir
@@ -0,0 +1,96 @@
+# RUN: llc -mtriple=armv4t-unknown-unknown -start-after=livedebugvalues -filetype=obj -o - %s | llvm-dwarfdump - | FileCheck %s
+
+# This reproducer is based on the following C code:
+#
+# struct S0 { int f1; int f2; int f3; };
+#
+# int a;
+#
+# void fn1(struct S0 p1) {
+#   a = p1.f1 >= fn2(p1.f2);
+# }
+#
+# and was generated using the following commands:
+# $ clang -O1 -g --target=armv4t -S -emit-llvm
+# $ llc -O1 -stop-after=livedebugvalues
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "armv4t-unknown-unknown"
+
+  ; Function Attrs: nounwind
+  define arm_aapcscc i32 @fn1([3 x i32] %p1.coerce) !dbg !7 {
+  entry:
+    %p1.coerce.fca.0.extract = extractvalue [3 x i32] %p1.coerce, 0
+    call void @llvm.dbg.value(metadata i32 %p1.coerce.fca.0.extract, metadata !17, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !18
+    %p1.coerce.fca.1.extract = extractvalue [3 x i32] %p1.coerce, 1
+    call void @llvm.dbg.value(metadata i32 %p1.coerce.fca.1.extract, metadata !17, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32)), !dbg !18
+    call void @llvm.dbg.value(metadata i32 undef, metadata !17, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32)), !dbg !18
+    %call = tail call arm_aapcscc i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %p1.coerce.fca.1.extract), !dbg !19
+    %cmp = icmp sge i32 %p1.coerce.fca.0.extract, %call, !dbg !19
+    %conv = zext i1 %cmp to i32, !dbg !19
+    ret i32 %conv, !dbg !19
+  }
+
+  declare arm_aapcscc i32 @fn2(...)
+
+  ; Function Attrs: nounwind readnone speculatable
+  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+
+  attributes #0 = { nounwind readnone speculatable }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!3, !4, !5}
+  !llvm.ident = !{!6}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+  !1 = !DIFile(filename: "test.c", directory: "/")
+  !2 = !{}
+  !3 = !{i32 2, !"Dwarf Version", i32 4}
+  !4 = !{i32 2, !"Debug Info Version", i32 3}
+  !5 = !{i32 1, !"min_enum_size", i32 4}
+  !6 = !{!"clang version 9.0.0"}
+  !7 = distinct !DISubprogram(name: "fn1", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+  !8 = !DISubroutineType(types: !9)
+  !9 = !{!10, !11}
+  !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S0", file: !1, line: 1, size: 96, elements: !12)
+  !12 = !{!13, !14, !15}
+  !13 = !DIDerivedType(tag: DW_TAG_member, name: "f1", scope: !11, file: !1, line: 1, baseType: !10, size: 32)
+  !14 = !DIDerivedType(tag: DW_TAG_member, name: "f2", scope: !11, file: !1, line: 1, baseType: !10, size: 32, offset: 32)
+  !15 = !DIDerivedType(tag: DW_TAG_member, name: "f3", scope: !11, file: !1, line: 1, baseType: !10, size: 32, offset: 64)
+  !16 = !{!17}
+  !17 = !DILocalVariable(name: "p1", arg: 1, scope: !7, file: !1, line: 3, type: !11)
+  !18 = !DILocation(line: 3, scope: !7)
+  !19 = !DILocation(line: 4, scope: !7)
+
+...
+---
+name:            fn1
+tracksRegLiveness: false
+body:             |
+  bb.0.entry:
+    $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r4, killed $lr
+    $r4 = MOVr $r0, 14, $noreg, $noreg
+    DBG_VALUE $r1, $noreg, !17, !DIExpression(DW_OP_LLVM_fragment, 32, 32), debug-location !18
+    DBG_VALUE $noreg, $noreg, !17, !DIExpression(DW_OP_LLVM_fragment, 64, 32), debug-location !18
+    DBG_VALUE $r4, $noreg, !17, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !18
+    $r0 = MOVr killed $r1, 14, $noreg, $noreg, debug-location !19
+    BL @fn2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp, implicit-def $r0, debug-location !19
+    renamable $r1 = MOVi 0, 14, $noreg, $noreg
+    CMPrr killed renamable $r4, killed renamable $r0, 14, $noreg, implicit-def $cpsr, debug-location !19
+    $r1 = MOVi 1, 10, killed $cpsr, $noreg, implicit killed renamable $r1, debug-location !19
+    $r0 = MOVr killed $r1, 14, $noreg, $noreg, debug-location !19
+    $sp = LDMIA_UPD $sp, 14, $noreg, def $r4, def $lr, debug-location !19
+    BX_RET 14, $noreg, implicit $r0, debug-location !19
+
+...
+
+# Verify that the addresses in the location list for the parameter are
+# monotonically increasing, and that the undef debug value fragment does not
+# terminate the non-overlapping fragment that is described by $r1.
+
+# CHECK: DW_AT_location (0x00000000
+# CHECK-NEXT: [0x00000008, 0x00000010): DW_OP_reg4 R4, DW_OP_piece 0x4, DW_OP_reg1 R1, DW_OP_piece 0x4
+# CHECK-NEXT: [0x00000010, 0x00000024): DW_OP_reg4 R4, DW_OP_piece 0x4)
+# CHECK-NEXT: DW_AT_name ("p1")
diff --git a/llvm/test/DebugInfo/X86/undef-fragment.ll b/llvm/test/DebugInfo/X86/undef-fragment.ll
new file mode 100644
index 0000000000000..e8c381a200e4d
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/undef-fragment.ll
@@ -0,0 +1,78 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -O2 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; This reproducer is based on the following C code:
+;
+; typedef struct { int a; int b; } S;
+;
+; extern int ext(int);
+;
+; int foo() {
+;   S s = {123, 456};
+;   ext(1);
+;   s.a = ext(2);
+;   ext(3);
+;   s.a = 789;
+;   return s.b;
+; }
+;
+; and was generated using -O2 -g -fno-inline.
+;
+; As a small note, the third dbg.value's value has been changed from %call1 to
+; undef (it would have become undef either way, but this was done to make the
+; intention of the test a bit more clear).
+
+; Verify that a location list entry describing s.b is started at the
+; non-overlapping undef debug value.
+
+; CHECK: DW_AT_location (0x00000000
+; CHECK-NEXT: {{0x[0-9a-f]+}}, [[ADDR1:0x[0-9a-f]+]]): DW_OP_constu 0x7b, DW_OP_stack_value, DW_OP_piece 0x4, DW_OP_constu 0x1c8, DW_OP_stack_value, DW_OP_piece 0x4
+; CHECK-NEXT: [[ADDR1]], [[ADDR2:0x[0-9a-f]+]]): DW_OP_piece 0x4, DW_OP_constu 0x1c8, DW_OP_stack_value, DW_OP_piece 0x4
+; CHECK-NEXT: [[ADDR2]], {{0x[0-9a-f]+}}): DW_OP_constu 0x315, DW_OP_stack_value, DW_OP_piece 0x4, DW_OP_constu 0x1c8, DW_OP_stack_value, DW_OP_piece 0x4)
+; CHECK-NEXT: DW_AT_name    ("s")
+
+; Function Attrs: noinline nounwind uwtable
+define i32 @main() !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata i32 123, metadata !12, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !18
+  call void @llvm.dbg.value(metadata i32 456, metadata !12, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32)), !dbg !18
+  %call = tail call i32 @ext(i32 1), !dbg !19
+  %call1 = tail call i32 @ext(i32 2), !dbg !20
+  call void @llvm.dbg.value(metadata i32 undef, metadata !12, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !18
+  %call2 = tail call i32 @ext(i32 3), !dbg !21
+  call void @llvm.dbg.value(metadata i32 789, metadata !12, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !18
+  ret i32 456, !dbg !22
+}
+
+declare i32 @ext(i32)
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+
+attributes #0 = { nounwind readnone speculatable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "undef.c", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = !{!"clang version 9.0.0"}
+!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{!12}
+!12 = !DILocalVariable(name: "s", scope: !7, file: !1, line: 6, type: !13)
+!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "S", file: !1, line: 1, baseType: !14)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !1, line: 1, size: 64, elements: !15)
+!15 = !{!16, !17}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !1, line: 1, baseType: !10, size: 32)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !14, file: !1, line: 1, baseType: !10, size: 32, offset: 32)
+!18 = !DILocation(line: 6, scope: !7)
+!19 = !DILocation(line: 7, scope: !7)
+!20 = !DILocation(line: 8, scope: !7)
+!21 = !DILocation(line: 9, scope: !7)
+!22 = !DILocation(line: 11, scope: !7)

From 9cd9624fb68db36835229ba5b08f9797b2f9d16b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 28 May 2019 13:46:26 +0000
Subject: [PATCH 0352/1176] [DAG] LegalizeVectorTypes - reduce scope of local
 variables. NFCI.

Move the element index/count variables into the block where they are actually used - appeases cppcheck and helps avoid shadow variable warnings.

llvm-svn: 361821
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 379ee00c90fbc..aefc2aabf64b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2812,14 +2812,13 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
 
   SDLoc dl(ConcatOps[0]);
   EVT WidenEltVT = WidenVT.getVectorElementType();
-  int Idx = 0;
 
   // while (Some element of ConcatOps is not of type MaxVT) {
   //   From the end of ConcatOps, collect elements of the same type and put
   //   them into an op of the next larger supported type
   // }
   while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
-    Idx = ConcatEnd - 1;
+    int Idx = ConcatEnd - 1;
     VT = ConcatOps[Idx--].getValueType();
     while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
       Idx--;
@@ -4378,10 +4377,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
   SDValue Index = MSC->getIndex();
   SDValue Scale = MSC->getScale();
 
-  unsigned NumElts;
   if (OpNo == 1) {
     DataOp = GetWidenedVector(DataOp);
-    NumElts = DataOp.getValueType().getVectorNumElements();
+    unsigned NumElts = DataOp.getValueType().getVectorNumElements();
 
     // Widen index.
     EVT IndexVT = Index.getValueType();

From d5a8637072f4c556b88156bd2f6237a2ead47d31 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 28 May 2019 13:54:17 +0000
Subject: [PATCH 0353/1176] [x86] split 256-bit store of concatenated vectors

This shows up as a side issue to the main problem for the AVX target example from PR37428:
https://bugs.llvm.org/show_bug.cgi?id=37428 - https://godbolt.org/z/7tpRa3

But as we can see in the pile of existing test diffs, it's actually a widespread problem
that affects any AVX or later target. Apart from a couple of oddballs, I think these are
all improvements for the reasons stated in the code comment: we do not want to enable YMM
unnecessarily (avoid vzeroupper and frequency throttling) and some cores split 256-bit
stores anyway.

We could say that MergeConsecutiveStores() is going overboard on some of these examples,
but that won't solve the problem completely. But that is the reason I'm proposing this as
a lowering rather than a combine: we will infinite loop fighting the merge code if we try
this earlier.

Differential Revision: https://reviews.llvm.org/D62498

llvm-svn: 361822
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  11 +
 llvm/test/CodeGen/X86/avg.ll                  | 402 +++++++++---------
 .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll |  24 +-
 llvm/test/CodeGen/X86/avx-intrinsics-x86.ll   |  12 +-
 llvm/test/CodeGen/X86/avx512-trunc-widen.ll   |  16 +-
 llvm/test/CodeGen/X86/avx512-trunc.ll         |  16 +-
 llvm/test/CodeGen/X86/nontemporal-2.ll        |  40 +-
 llvm/test/CodeGen/X86/oddsubvector.ll         |  15 +-
 llvm/test/CodeGen/X86/pmovsx-inreg.ll         |  72 ++--
 llvm/test/CodeGen/X86/shrink_vmul-widen.ll    | 124 +++---
 llvm/test/CodeGen/X86/shrink_vmul.ll          | 124 +++---
 .../CodeGen/X86/shuffle-vs-trunc-512-widen.ll |  18 +-
 llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll |  18 +-
 llvm/test/CodeGen/X86/subvector-broadcast.ll  |  68 +--
 llvm/test/CodeGen/X86/vec_fptrunc.ll          |  10 +-
 llvm/test/CodeGen/X86/vec_saddo.ll            |  68 +--
 llvm/test/CodeGen/X86/vec_smulo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_ssubo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_uaddo.ll            |  24 +-
 llvm/test/CodeGen/X86/vec_umulo.ll            |  26 +-
 llvm/test/CodeGen/X86/vec_usubo.ll            |  24 +-
 llvm/test/CodeGen/X86/vector-gep.ll           | 134 +++---
 llvm/test/CodeGen/X86/vector-trunc-widen.ll   |  72 ++--
 llvm/test/CodeGen/X86/vector-trunc.ll         |  72 ++--
 .../CodeGen/X86/x86-interleaved-access.ll     |  73 ++--
 25 files changed, 786 insertions(+), 845 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7b4ce08b57860..73976f30374c2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,6 +1287,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
+      setOperationAction(ISD::STORE,              VT, Custom);
     }
 
     if (HasInt256)
@@ -21080,7 +21081,17 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   if (St->isTruncatingStore())
     return SDValue();
 
+  // If this is a 256-bit store of concatenated ops, we are better off splitting
+  // that store into two 128-bit stores. This avoids spurious use of 256-bit ops
+  // and each half can execute independently. Some cores would split the op into
+  // halves anyway, so the concat (vinsertf128) is purely an extra op.
   MVT StoreVT = StoredVal.getSimpleValueType();
+  if (StoreVT.is256BitVector()) {
+    if (StoredVal.getOpcode() != ISD::CONCAT_VECTORS || !StoredVal.hasOneUse())
+      return SDValue();
+    return split256BitStore(St, DAG);
+  }
+
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
          "Unexpected VT");
   if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index cfa9f11a9c73e..22a6daa999d71 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -102,11 +102,10 @@ define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8:
@@ -267,8 +266,8 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-LABEL: avg_v48i8:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
-; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm4
+; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm4
+; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
@@ -279,10 +278,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm15 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[3,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm11 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm14 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm12 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm13 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -297,52 +296,52 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm4
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm3
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm13
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[3,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm12
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm11
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,3]
+; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm10
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm6, %xmm7, %xmm9
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm8
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm4, %xmm15, %xmm15
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm8
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm3, %xmm15, %xmm15
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm7, %xmm10, %xmm7
+; AVX1-NEXT:    vpaddd %xmm7, %xmm11, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm2, %xmm14, %xmm14
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm0, %xmm12, %xmm12
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm0, %xmm13, %xmm13
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[3,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm6 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm4[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpsubd %xmm4, %xmm13, %xmm10
-; AVX1-NEXT:    vpsubd %xmm4, %xmm11, %xmm11
-; AVX1-NEXT:    vpsubd %xmm4, %xmm9, %xmm9
-; AVX1-NEXT:    vpsubd %xmm4, %xmm8, %xmm8
-; AVX1-NEXT:    vpsubd %xmm4, %xmm15, %xmm13
-; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpsubd %xmm4, %xmm14, %xmm0
-; AVX1-NEXT:    vpsubd %xmm4, %xmm12, %xmm2
-; AVX1-NEXT:    vpsubd %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpsubd %xmm4, %xmm6, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4, %xmm4 # 16-byte Folded Reload
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubd %xmm3, %xmm12, %xmm11
+; AVX1-NEXT:    vpsubd %xmm3, %xmm10, %xmm10
+; AVX1-NEXT:    vpsubd %xmm3, %xmm9, %xmm9
+; AVX1-NEXT:    vpsubd %xmm3, %xmm8, %xmm8
+; AVX1-NEXT:    vpsubd %xmm3, %xmm15, %xmm12
+; AVX1-NEXT:    vpsubd %xmm3, %xmm7, %xmm7
+; AVX1-NEXT:    vpsubd %xmm3, %xmm14, %xmm0
+; AVX1-NEXT:    vpsubd %xmm3, %xmm13, %xmm2
+; AVX1-NEXT:    vpsubd %xmm3, %xmm5, %xmm5
+; AVX1-NEXT:    vpsubd %xmm3, %xmm6, %xmm6
+; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
@@ -353,13 +352,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpsrld $1, %xmm7, %xmm2
-; AVX1-NEXT:    vpsrld $1, %xmm13, %xmm4
+; AVX1-NEXT:    vpsrld $1, %xmm12, %xmm4
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpsrld $1, %xmm8, %xmm4
 ; AVX1-NEXT:    vpsrld $1, %xmm9, %xmm5
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm5
-; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm6
+; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm5
+; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm6
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm6, %xmm5
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
@@ -368,13 +367,12 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm4, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v48i8:
@@ -449,13 +447,12 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX512F-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX512F-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX512F-NEXT:    vmovdqa 32(%rdi), %xmm2
-; AVX512F-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
 ; AVX512F-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm1
+; AVX512F-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
-; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX512F-NEXT:    vmovdqu %xmm2, (%rax)
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: avg_v48i8:
@@ -507,15 +504,14 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8:
@@ -628,11 +624,10 @@ define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16:
@@ -685,15 +680,14 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16:
@@ -834,11 +828,10 @@ define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_2:
@@ -893,13 +886,12 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
 ; AVX1-NEXT:    vpavgb %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm1
-; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_2:
@@ -1013,11 +1005,10 @@ define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_2:
@@ -1070,15 +1061,14 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
-; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rsi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_2:
@@ -1206,11 +1196,10 @@ define void @avg_v32i8_const(<32 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_const:
@@ -1258,15 +1247,14 @@ define void @avg_v64i8_const(<64 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm3
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_const:
@@ -1365,11 +1353,10 @@ define void @avg_v16i16_const(<16 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v16i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_const:
@@ -1416,15 +1403,14 @@ define void @avg_v32i16_const(<32 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v32i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm3
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_const:
@@ -1665,100 +1651,96 @@ define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind {
 ; AVX1-NEXT:    pushq %rbp
 ; AVX1-NEXT:    movq %rsp, %rbp
 ; AVX1-NEXT:    andq $-32, %rsp
-; AVX1-NEXT:    subq $96, %rsp
+; AVX1-NEXT:    subq $32, %rsp
 ; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpavgb 288(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm8, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpavgb 320(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm8, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rsp) # 32-byte Spill
-; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vpavgb 352(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm8, %ymm13
-; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT:    vpavgb 384(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm8, %ymm14
-; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
-; AVX1-NEXT:    vpavgb 416(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm8, %ymm15
-; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm5
-; AVX1-NEXT:    vpavgb 448(%rbp), %xmm5, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm8, %ymm12
-; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm6
-; AVX1-NEXT:    vpavgb 480(%rbp), %xmm6, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm8, %ymm6
-; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
-; AVX1-NEXT:    vpavgb 512(%rbp), %xmm7, %xmm7
-; AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm8, %ymm7
-; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 528(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 544(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm8
-; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 560(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 576(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm9
-; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 592(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 608(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm10
-; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 624(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 640(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm1
-; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm2
-; AVX1-NEXT:    vpavgb 656(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vpavgb 672(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm2
-; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm3
-; AVX1-NEXT:    vpavgb 688(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vpavgb 704(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm3
-; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm4
-; AVX1-NEXT:    vpavgb 720(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vpavgb 736(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm4
-; AVX1-NEXT:    vpavgb 752(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm11
-; AVX1-NEXT:    vpavgb 768(%rbp), %xmm11, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT:    vmovaps %ymm4, 480(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, 448(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, 416(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, 384(%rdi)
-; AVX1-NEXT:    vmovaps %ymm0, 352(%rdi)
-; AVX1-NEXT:    vmovaps %ymm10, 320(%rdi)
-; AVX1-NEXT:    vmovaps %ymm9, 288(%rdi)
-; AVX1-NEXT:    vmovaps %ymm8, 256(%rdi)
-; AVX1-NEXT:    vmovaps %ymm7, 224(%rdi)
-; AVX1-NEXT:    vmovaps %ymm6, 192(%rdi)
-; AVX1-NEXT:    vmovaps %ymm12, 160(%rdi)
-; AVX1-NEXT:    vmovaps %ymm15, 128(%rdi)
-; AVX1-NEXT:    vmovaps %ymm14, 96(%rdi)
-; AVX1-NEXT:    vmovaps %ymm13, 64(%rdi)
-; AVX1-NEXT:    vmovaps (%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
-; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 768(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 496(%rdi)
+; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 752(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 480(%rdi)
+; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 736(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 464(%rdi)
+; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 720(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 448(%rdi)
+; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 704(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 432(%rdi)
+; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 688(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 416(%rdi)
+; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 672(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 400(%rdi)
+; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 656(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 384(%rdi)
+; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 640(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 368(%rdi)
+; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 624(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 352(%rdi)
+; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 608(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 336(%rdi)
+; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 592(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 320(%rdi)
+; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 576(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 304(%rdi)
+; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 560(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 288(%rdi)
+; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 544(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 272(%rdi)
+; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 528(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 256(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
+; AVX1-NEXT:    vpavgb 512(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 240(%rdi)
+; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm7
+; AVX1-NEXT:    vmovdqa %xmm7, 224(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
+; AVX1-NEXT:    vpavgb 480(%rbp), %xmm7, %xmm7
+; AVX1-NEXT:    vmovdqa %xmm7, 208(%rdi)
+; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm6
+; AVX1-NEXT:    vmovdqa %xmm6, 192(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
+; AVX1-NEXT:    vpavgb 448(%rbp), %xmm6, %xmm6
+; AVX1-NEXT:    vmovdqa %xmm6, 176(%rdi)
+; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm5
+; AVX1-NEXT:    vmovdqa %xmm5, 160(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm5
+; AVX1-NEXT:    vpavgb 416(%rbp), %xmm5, %xmm5
+; AVX1-NEXT:    vmovdqa %xmm5, 144(%rdi)
+; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 128(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vpavgb 384(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 112(%rdi)
+; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa %xmm3, 96(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpavgb 352(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa %xmm3, 80(%rdi)
+; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm2, 64(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpavgb 320(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm2, 48(%rdi)
+; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 32(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpavgb 288(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    movq %rbp, %rsp
 ; AVX1-NEXT:    popq %rbp
 ; AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index 8f0ec5030eb03..9706bf3455fef 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -725,12 +725,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
-; X86-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
-; X86-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
-; X86-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
-; X86-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
-; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
+; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
+; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
+; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
+; X86-AVX-NEXT:    vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10]
+; X86-AVX-NEXT:    vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10]
 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
@@ -745,12 +745,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ;
 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
-; X64-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
-; X64-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
-; X64-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
-; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
-; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
+; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
+; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
+; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
+; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
+; X64-AVX-NEXT:    vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10]
+; X64-AVX-NEXT:    vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17]
 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 2fd2b863859c7..8e48289c1042e 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -916,8 +916,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
-; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-AVX-NEXT:    vmovntdq %xmm0, (%eax) # encoding: [0xc5,0xf9,0xe7,0x00]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: movnt_dq:
@@ -925,24 +924,21 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
-; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-AVX512VL-NEXT:    vmovntdq %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x00]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: movnt_dq:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
-; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-AVX-NEXT:    vmovntdq %xmm0, (%rdi) # encoding: [0xc5,0xf9,0xe7,0x07]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: movnt_dq:
 ; X64-AVX512VL:       # %bb.0:
 ; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
-; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-AVX512VL-NEXT:    vmovntdq %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x07]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
index ba451973faa04..1ce08c01773d1 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
@@ -462,12 +462,10 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -672,8 +670,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -952,8 +950,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index c15d33222ca0e..263f7c90441d4 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -458,12 +458,10 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -667,8 +665,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -948,8 +946,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
index 5b39cb16afec7..aa3e7cda18c0a 100644
--- a/llvm/test/CodeGen/X86/nontemporal-2.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -1061,12 +1061,12 @@ define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v8i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1126,12 +1126,12 @@ define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1164,12 +1164,12 @@ define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v16i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1202,12 +1202,12 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v32i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index 69ea53e7e9c82..9bc6c0f380a07 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -116,13 +116,14 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru
 ;
 ; AVX-LABEL: PR40815:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovaps 16(%rdi), %xmm0
-; AVX-NEXT:    vmovaps 48(%rdi), %xmm1
-; AVX-NEXT:    vinsertf128 $1, 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    vmovups %ymm1, (%rsi)
-; AVX-NEXT:    vmovups %ymm0, 32(%rsi)
-; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    vmovaps (%rdi), %xmm0
+; AVX-NEXT:    vmovaps 16(%rdi), %xmm1
+; AVX-NEXT:    vmovaps 32(%rdi), %xmm2
+; AVX-NEXT:    vmovaps 48(%rdi), %xmm3
+; AVX-NEXT:    vmovaps %xmm2, 16(%rsi)
+; AVX-NEXT:    vmovaps %xmm3, (%rsi)
+; AVX-NEXT:    vmovaps %xmm0, 48(%rsi)
+; AVX-NEXT:    vmovaps %xmm1, 32(%rsi)
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: PR40815:
diff --git a/llvm/test/CodeGen/X86/pmovsx-inreg.ll b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
index 9ab6917966b38..f89223fa45834 100644
--- a/llvm/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
@@ -53,12 +53,12 @@ define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test2:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -134,12 +134,12 @@ define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test4:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -215,12 +215,12 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
 ;
 ; AVX1-LABEL: test6:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -296,12 +296,12 @@ define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -377,12 +377,12 @@ define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test10:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -458,12 +458,12 @@ define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test12:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index 0ed79ea4af70b..7599858007407 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -215,10 +215,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -261,9 +260,8 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -349,12 +347,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -417,11 +414,10 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -648,10 +644,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -693,9 +688,8 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -780,12 +774,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -847,11 +840,10 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1284,24 +1276,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1351,23 +1342,22 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2201,8 +2191,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2255,14 +2245,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2435,8 +2424,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2486,13 +2475,12 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
-; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 0c8949f246177..5e952472f7577 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -209,10 +209,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -255,9 +254,8 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -343,12 +341,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -411,11 +408,10 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -640,10 +636,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -685,9 +680,8 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -772,12 +766,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -839,11 +832,10 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1258,24 +1250,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1325,23 +1316,22 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2157,8 +2147,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2211,14 +2201,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2391,8 +2380,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2442,13 +2431,12 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
-; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
index 1a6bdd3aaa407..737925eca0440 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
@@ -88,23 +88,21 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
index 19031bbb2c0f8..6f94e0c608683 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
@@ -88,23 +88,21 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 3ce584eff2a9e..7ecfac5151f2e 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -835,24 +835,24 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>*
 define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ; X32-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X32-AVX1:       # %bb.0: # %entry
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,0,4,0]
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,0,2,0]
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,0,2,0]
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,0,4,0]
 ; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,0,2,0,3,0,4,0]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vmovups %ymm0, ga4
+; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,0,2,0,3,0,4,0]
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vmovdqu %xmm0, ga4+16
+; X32-AVX1-NEXT:    vmovdqu %xmm4, ga4
 ; X32-AVX1-NEXT:    vmovups %ymm2, gb4+32
 ; X32-AVX1-NEXT:    vmovups %ymm1, gb4
 ; X32-AVX1-NEXT:    vzeroupper
@@ -886,24 +886,24 @@ define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ;
 ; X64-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X64-AVX1:       # %bb.0: # %entry
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,4]
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2]
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2]
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,4]
 ; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,2,3,4]
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vmovups %ymm0, {{.*}}(%rip)
+; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,2,3,4]
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vmovdqu %xmm0, ga4+{{.*}}(%rip)
+; X64-AVX1-NEXT:    vmovdqu %xmm4, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm2, gb4+{{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm1, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec_fptrunc.ll b/llvm/test/CodeGen/X86/vec_fptrunc.ll
index bb6be6cd9e84b..e7318d9d69723 100644
--- a/llvm/test/CodeGen/X86/vec_fptrunc.ll
+++ b/llvm/test/CodeGen/X86/vec_fptrunc.ll
@@ -99,9 +99,8 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
 ; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
-; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
-; X32-AVX-NEXT:    vzeroupper
+; X32-AVX-NEXT:    vmovupd %xmm1, 16(%eax)
+; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
 ; X32-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: fptrunc_frommem8:
@@ -120,9 +119,8 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
 ; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
-; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
-; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    vmovupd %xmm1, 16(%rsi)
+; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
 ; X64-AVX-NEXT:    retq
 entry:
   %0 = load <8 x double>, <8 x double>* %in
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index aeb1951fbef87..d37795b55cdc2 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -693,8 +693,8 @@ define <8 x i32> @saddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v8i32:
@@ -824,48 +824,48 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
 ; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm10
-; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpandn %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm8
+; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm1
+; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm1, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm7
+; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm1
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm1, %xmm7
-; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm7, %xmm4
-; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm2
-; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
-; AVX1-NEXT:    vpandn %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm3
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm0
+; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm7, %xmm0
+; AVX1-NEXT:    vpandn %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm9, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm10, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index ab97c51df410b..3f53f9f2250cc 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -973,8 +973,8 @@ define <8 x i32> @smulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm5, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v8i32:
@@ -1266,59 +1266,59 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpmuldq %xmm4, %xmm6, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
-; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
+; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm8
+; AVX1-NEXT:    vpsrad $31, %xmm8, %xmm6
 ; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
-; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm7, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmuldq %xmm3, %xmm1, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm9
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3],xmm7[4,5],xmm4[6,7]
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm9
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpmuldq %xmm5, %xmm7, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpmuldq %xmm4, %xmm7, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7]
-; AVX1-NEXT:    vpmulld %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3],xmm6[4,5],xmm1[6,7]
+; AVX1-NEXT:    vpmulld %xmm4, %xmm7, %xmm4
+; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
 ; AVX1-NEXT:    vpmuldq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpmuldq %xmm2, %xmm0, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7]
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm6, %xmm2
-; AVX1-NEXT:    vpxor %xmm8, %xmm2, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm9, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm4
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm9, %xmm0, %xmm1
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm8, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 15c0531d67a75..3dc73e3b4ba1e 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -714,8 +714,8 @@ define <8 x i32> @ssubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm6, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v8i32:
@@ -850,52 +850,52 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm4
 ; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
-; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpandn %xmm3, %xmm7, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm8
+; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm9, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm1, %xmm7, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm7
-; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpandn %xmm3, %xmm6, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm6
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm6
+; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm4
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm1, %xmm6, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm4
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm7
-; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm2
-; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
-; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpandn %xmm2, %xmm6, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm10, %ymm1, %ymm4
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpandn %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm10, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm7, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 41a0e258e3d12..4e9cd2efb74b2 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -501,8 +501,8 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v8i32:
@@ -633,19 +633,19 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 0c95b73853e96..0bcaacc21dfe4 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -843,10 +843,10 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm5, %ymm2
-; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1111,23 +1111,23 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm9, %xmm5, %xmm5
 ; AVX1-NEXT:    vpackssdw %xmm13, %xmm5, %xmm5
 ; AVX1-NEXT:    vpacksswb %xmm11, %xmm5, %xmm5
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpmulld %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm2
-; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm0
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm3
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm6
 ; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[1,1,2,3]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm5[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[3,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm6, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index b662ac45caf60..c5a7b19cf14dd 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -525,8 +525,8 @@ define <8 x i32> @usubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v8i32:
@@ -671,19 +671,19 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vector-gep.ll b/llvm/test/CodeGen/X86/vector-gep.ll
index 8f62fe5382564..693380a48ee22 100644
--- a/llvm/test/CodeGen/X86/vector-gep.ll
+++ b/llvm/test/CodeGen/X86/vector-gep.ll
@@ -122,74 +122,88 @@ define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind {
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-32, %esp
-; CHECK-NEXT:    subl $96, %esp
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm4
-; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm3
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
+; CHECK-NEXT:    subl $160, %esp
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm3
+; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm5
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
+; CHECK-NEXT:    vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; CHECK-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, (%esp) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm4
 ; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
+; CHECK-NEXT:    vpaddd %xmm4, %xmm5, %xmm4
+; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm1
 ; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
-; CHECK-NEXT:    vmovaps %ymm0, (%esp) # 32-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
-; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
-; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm4
-; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
-; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
-; CHECK-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm5
-; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
-; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
-; CHECK-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
-; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm6
-; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
+; CHECK-NEXT:    vpaddd %xmm1, %xmm5, %xmm1
+; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm6
 ; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm6, %ymm6
+; CHECK-NEXT:    vpaddd %xmm6, %xmm5, %xmm6
+; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm2
+; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpaddd %xmm2, %xmm5, %xmm2
 ; CHECK-NEXT:    vmovdqa 152(%ebp), %xmm7
 ; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
-; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm7, %xmm5, %xmm7
+; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm0
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm0, %ymm0
-; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
-; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm1, %ymm1
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
 ; CHECK-NEXT:    movl 8(%ebp), %eax
-; CHECK-NEXT:    vmovaps %ymm1, 224(%eax)
-; CHECK-NEXT:    vmovaps %ymm0, 192(%eax)
-; CHECK-NEXT:    vmovaps %ymm6, 160(%eax)
-; CHECK-NEXT:    vmovaps %ymm5, 128(%eax)
-; CHECK-NEXT:    vmovaps %ymm4, 96(%eax)
-; CHECK-NEXT:    vmovaps %ymm2, 64(%eax)
-; CHECK-NEXT:    vmovaps (%esp), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vmovaps %ymm0, 32(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vmovaps %ymm0, (%eax)
+; CHECK-NEXT:    vmovdqa %xmm3, 240(%eax)
+; CHECK-NEXT:    vmovdqa %xmm0, 224(%eax)
+; CHECK-NEXT:    vmovdqa %xmm7, 208(%eax)
+; CHECK-NEXT:    vmovdqa %xmm2, 192(%eax)
+; CHECK-NEXT:    vmovdqa %xmm6, 176(%eax)
+; CHECK-NEXT:    vmovdqa %xmm1, 160(%eax)
+; CHECK-NEXT:    vmovdqa %xmm4, 144(%eax)
+; CHECK-NEXT:    vmovaps (%esp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 128(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 112(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 96(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 80(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 64(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 48(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 32(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 16(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, (%eax)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index c6b36e4aae783..327cc7917e912 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -668,14 +668,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -686,8 +686,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -717,16 +717,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -792,16 +792,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1291,14 +1291,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1317,23 +1317,19 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 56e86a6bc95fd..0027fbe2657de 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -678,14 +678,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -696,8 +696,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -727,16 +727,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -802,16 +802,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1301,14 +1301,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1327,23 +1327,19 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index bff39467c1eb2..8cd01b631d601 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -341,11 +341,10 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vmovdqa %xmm0, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride4:
@@ -358,11 +357,10 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm4, %ymm0
-; AVX2-NEXT:    vmovdqa %ymm0, 32(%rdi)
-; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vmovdqa %xmm0, 48(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm3, (%rdi)
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride4:
@@ -888,37 +886,20 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) {
 }
 
 define void @interleaved_store_vf8_i8_stride4(<8 x i8> %x1, <8 x i8> %x2, <8 x i8> %x3, <8 x i8> %x4, <32 x i8>* %p) {
-; AVX1-LABEL: interleaved_store_vf8_i8_stride4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
-; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512-LABEL: interleaved_store_vf8_i8_stride4:
-; AVX2OR512:       # %bb.0:
-; AVX2OR512-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
-; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX2OR512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2OR512-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2OR512-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
-; AVX2OR512-NEXT:    vmovdqa %ymm0, (%rdi)
-; AVX2OR512-NEXT:    vzeroupper
-; AVX2OR512-NEXT:    retq
+; AVX-LABEL: interleaved_store_vf8_i8_stride4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
+; AVX-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX-NEXT:    retq
 %v1 = shufflevector <8 x i8> %x1, <8 x i8> %x2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %v2 = shufflevector <8 x i8> %x3, <8 x i8> %x4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %interleaved.vec = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0,i32 8,i32 16,i32 24,i32 1,i32 9,i32 17,i32 25,i32 2,i32 10,i32 18,i32 26,i32 3,i32 11,i32 19,i32 27,i32 4,i32 12,i32 20,i32 28,i32 5,i32 13,i32 21,i32 29,i32 6,i32 14,i32 22,i32 30,i32 7,i32 15,i32 23,i32 31>
@@ -1096,10 +1077,9 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rdi)
 ; AVX1-NEXT:    vmovdqu %xmm2, 32(%rdi)
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride3:
@@ -1116,10 +1096,9 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rdi)
 ; AVX2-NEXT:    vmovdqu %xmm2, 32(%rdi)
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride3:

From b4a394506c9a0335030b6ea0f91a13e65f680d74 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 13:56:21 +0000
Subject: [PATCH 0354/1176] [clangd] Compute expected type for templates

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62515

llvm-svn: 361823
---
 clang-tools-extra/clangd/ExpectedTypes.cpp    | 10 +++++-
 .../clangd/unittests/ExpectedTypeTest.cpp     | 32 ++++++++++++++++---
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp
index 886b5db0cd0c6..3b0779ea66bc6 100644
--- a/clang-tools-extra/clangd/ExpectedTypes.cpp
+++ b/clang-tools-extra/clangd/ExpectedTypes.cpp
@@ -8,9 +8,11 @@
 
 #include "ExpectedTypes.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Type.h"
 #include "clang/Index/USRGeneration.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 
 namespace clang {
@@ -41,7 +43,13 @@ static const Type *toEquivClass(ASTContext &Ctx, QualType T) {
 
 static llvm::Optional<QualType>
 typeOfCompletion(const CodeCompletionResult &R) {
-  auto *VD = dyn_cast_or_null<ValueDecl>(R.Declaration);
+  const NamedDecl *D = R.Declaration;
+  if (!D)
+    return llvm::None;
+  // Templates do not have a type on their own, look at the templated decl.
+  if (auto *Template = dyn_cast<TemplateDecl>(D))
+    D = Template->getTemplatedDecl();
+  auto *VD = dyn_cast<ValueDecl>(D);
   if (!VD)
     return llvm::None; // We handle only variables and functions below.
   auto T = VD->getType();
diff --git a/clang-tools-extra/clangd/unittests/ExpectedTypeTest.cpp b/clang-tools-extra/clangd/unittests/ExpectedTypeTest.cpp
index 8d2d60ebe5547..0315f4de7482b 100644
--- a/clang-tools-extra/clangd/unittests/ExpectedTypeTest.cpp
+++ b/clang-tools-extra/clangd/unittests/ExpectedTypeTest.cpp
@@ -11,6 +11,7 @@
 #include "TestTU.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/StringRef.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -31,16 +32,14 @@ class ExpectedTypeConversionTest : public ::testing::Test {
     AST = TestTU::withCode(Code).build();
   }
 
-  const ValueDecl *decl(llvm::StringRef Name) {
-    return &cast<ValueDecl>(findDecl(*AST, Name));
-  }
+  const NamedDecl *decl(llvm::StringRef Name) { return &findDecl(*AST, Name); }
 
   QualType typeOf(llvm::StringRef Name) {
-    return decl(Name)->getType().getCanonicalType();
+    return cast<ValueDecl>(decl(Name))->getType().getCanonicalType();
   }
 
   /// An overload for convenience.
-  llvm::Optional<OpaqueType> fromCompletionResult(const ValueDecl *D) {
+  llvm::Optional<OpaqueType> fromCompletionResult(const NamedDecl *D) {
     return OpaqueType::fromCompletionResult(
         ASTCtx(), CodeCompletionResult(D, CCP_Declaration));
   }
@@ -148,6 +147,29 @@ TEST_F(ExpectedTypeConversionTest, FunctionReturns) {
   EXPECT_EQ(fromCompletionResult(decl("returns_ptr")), IntPtrTy);
 }
 
+TEST_F(ExpectedTypeConversionTest, Templates) {
+  build(R"cpp(
+template <class T>
+int* returns_not_dependent();
+template <class T>
+T* returns_dependent();
+
+template <class T>
+int* var_not_dependent = nullptr;
+template <class T>
+T* var_dependent = nullptr;
+
+int* int_ptr_;
+  )cpp");
+
+  auto IntPtrTy = *OpaqueType::fromType(ASTCtx(), typeOf("int_ptr_"));
+  EXPECT_EQ(fromCompletionResult(decl("returns_not_dependent")), IntPtrTy);
+  EXPECT_EQ(fromCompletionResult(decl("returns_dependent")), llvm::None);
+
+  EXPECT_EQ(fromCompletionResult(decl("var_not_dependent")), IntPtrTy);
+  EXPECT_EQ(fromCompletionResult(decl("var_dependent")), llvm::None);
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

From 833c5abbce5bf8766b87e6b20d280e60badbce63 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 28 May 2019 14:04:48 +0000
Subject: [PATCH 0355/1176] Add release note entries for recent typo correction
 changes

Differential Revision: https://reviews.llvm.org/D62523

llvm-svn: 361824
---
 clang/docs/ReleaseNotes.rst | 7 +++++--
 lld/docs/ReleaseNotes.rst   | 6 ++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1f09655027fc4..a13c454083150 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -75,7 +75,7 @@ future versions of Clang.
 Modified Compiler Flags
 -----------------------
 
-- `clang -dumpversion` now returns the version of Clang itself.
+- ``clang -dumpversion`` now returns the version of Clang itself.
 
 - ...
 
@@ -92,7 +92,10 @@ Attribute Changes in Clang
 Windows Support
 ---------------
 
-- ...
+- clang-cl now treats non-existent files as possible typos for flags,
+  ``clang-cl /diagnostic:caret /c test.cc`` for example now produces
+  ``clang: error: no such file or directory: '/diagnostic:caret'; did you mean '/diagnostics:caret'?``
+
 
 
 C Language Changes in Clang
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 250210279e648..76207fec11acb 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -24,11 +24,17 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
+* ld.lld now has typo suggestions for flags:
+  ``$ ld.lld --call-shared`` now prints
+  ``unknown argument '--call-shared', did you mean '--call_shared'``.
+
 * ...
 
 COFF Improvements
 -----------------
 
+* Like the ELF driver, lld-link now has typo suggestions for flags.
+
 * lld-link now correctly reports duplicate symbol errors for obj files
   that were compiled with /Gy.
 

From a815cbb0105276a2258d592970c9322748d3ad49 Mon Sep 17 00:00:00 2001
From: Michal Gorny <mgorny@gentoo.org>
Date: Tue, 28 May 2019 14:10:47 +0000
Subject: [PATCH 0356/1176] [openmp] [test] Skip kernel-breaking tests on
 NetBSD

The omp_taskloop_num_tasks and omp_taskwait have deadlooped
on the NetBSD buildbot previously, practically hanging the host running
it.  Disable them until we can find a good solution, or make the kernel
less fragile.

llvm-svn: 361825
---
 openmp/runtime/test/lit.cfg                          | 3 +++
 openmp/runtime/test/tasking/omp_taskloop_num_tasks.c | 6 ++++++
 openmp/runtime/test/tasking/omp_taskwait.c           | 4 ++++
 3 files changed, 13 insertions(+)

diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg
index 6316ae7481bcc..ac5f446952565 100644
--- a/openmp/runtime/test/lit.cfg
+++ b/openmp/runtime/test/lit.cfg
@@ -103,6 +103,9 @@ if config.libomp_omp_version >= 40:
 if 'Linux' in config.operating_system:
     config.available_features.add("linux")
 
+if config.operating_system == 'NetBSD':
+    config.available_features.add("netbsd")
+
 if config.operating_system in ['Linux', 'Windows']:
     config.available_features.add('affinity')
 
diff --git a/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
index 75efea6e1c641..bed2c5b207edc 100644
--- a/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
+++ b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -1,3 +1,5 @@
+// This test is known to be fragile on NetBSD kernel at the moment.
+// UNSUPPORTED: netbsd
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
 // REQUIRES: openmp-4.5
@@ -5,6 +7,10 @@
 // These compilers don't support the taskloop construct
 // UNSUPPORTED: gcc-4, gcc-5, icc-16
 
+// This test is known to be fragile on NetBSD kernel at the moment,
+// https://bugs.llvm.org/show_bug.cgi?id=42020.
+// UNSUPPORTED: netbsd
+
 /*
  * Test for taskloop
  * Method: caculate how many times the iteration space is dispatched
diff --git a/openmp/runtime/test/tasking/omp_taskwait.c b/openmp/runtime/test/tasking/omp_taskwait.c
index c3a0ea7ee600a..584eceb58075c 100644
--- a/openmp/runtime/test/tasking/omp_taskwait.c
+++ b/openmp/runtime/test/tasking/omp_taskwait.c
@@ -1,4 +1,8 @@
 // RUN: %libomp-compile-and-run
+
+// This test is known to be fragile on NetBSD kernel at the moment,
+// https://bugs.llvm.org/show_bug.cgi?id=42020.
+// UNSUPPORTED: netbsd
 #include <stdio.h>
 #include <math.h>
 #include "omp_testsuite.h"

From c4eee4054aa2fc94133e45318828f4e39f4c3fbd Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 14:14:48 +0000
Subject: [PATCH 0357/1176] [CMake] Default options for faster executables on
 MSVC

Differential Revision: https://reviews.llvm.org/D55056

llvm-svn: 361826
---
 llvm/CMakeLists.txt                        |  4 ++++
 llvm/cmake/modules/ChooseMSVCCRT.cmake     | 26 +++++++++++++++++++++-
 llvm/cmake/modules/HandleLLVMOptions.cmake |  8 +++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 895f9ab7189d1..5862644dff118 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -435,6 +435,10 @@ option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
 option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
 
+if (MSVC)
+   option(LLVM_ENABLE_INCREMENTAL_LINK "Link incrementally. Enabling it might produce slower executables." OFF)
+endif()
+
 option(LLVM_ENABLE_DUMP "Enable dump functions even when assertions are disabled" OFF)
 
 if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
diff --git a/llvm/cmake/modules/ChooseMSVCCRT.cmake b/llvm/cmake/modules/ChooseMSVCCRT.cmake
index 0e6e1aa55254e..50bddf83ef549 100644
--- a/llvm/cmake/modules/ChooseMSVCCRT.cmake
+++ b/llvm/cmake/modules/ChooseMSVCCRT.cmake
@@ -50,6 +50,17 @@ macro(set_flag_in_var flagsvar regex flag)
   set(${flagsvar} "${${flagsvar}}" CACHE STRING "${flagsvar_docs}" FORCE)
 endmacro(set_flag_in_var)
 
+macro(disable_MT_if_LLDB build message)
+  if (LLVM_TOOL_LLDB_BUILD)
+    if ((NOT ${build} STREQUAL "DEBUG") AND (LLVM_USE_CRT_${build} STREQUAL "MT"))
+      if (LLVM_TOOL_CLANG_BUILD OR LLVM_TOOL_LLD_BUILD)
+        set(performance " This might impact runtime performance for Clang or LLD. Preferably build them separately.")
+      endif()
+      message(WARNING "${message}.${performance}")
+      set(LLVM_USE_CRT_${build} "MD")
+    endif()
+  endif()
+endmacro(disable_MT_if_LLDB)
 
 macro(choose_msvc_crt MSVC_CRT)
   if(LLVM_USE_CRT)
@@ -66,13 +77,26 @@ variables (LLVM_USE_CRT_DEBUG, etc) instead.")
       get_current_crt(LLVM_USE_CRT_${build}
         MSVC_CRT_REGEX
         CMAKE_CXX_FLAGS_${build})
+
+      # Make /MT the default in Release builds to make them faster
+      # and avoid the DLL function thunking.
+      if ((${build} STREQUAL "MINSIZEREL") OR
+          (${build} STREQUAL "RELEASE") OR
+          (${build} STREQUAL "RELWITHDEBINFO"))
+          set(LLVM_USE_CRT_${build} "MT")
+      endif()
+
+      disable_MT_if_LLDB(${build} "Using /MD as required by LLDB")
+
       set(LLVM_USE_CRT_${build}
         "${LLVM_USE_CRT_${build}}"
         CACHE STRING "Specify VC++ CRT to use for ${build_type} configurations."
         FORCE)
       set_property(CACHE LLVM_USE_CRT_${build}
         PROPERTY STRINGS ;${${MSVC_CRT}})
-    endif(NOT LLVM_USE_CRT_${build})
+    else()
+      disable_MT_if_LLDB(${build} "Disabling /MT as required by LLDB")
+    endif()
   endforeach(build_type)
 
   foreach(build_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE})
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index cb9a01e1d39f7..3154c1487fa29 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -418,6 +418,14 @@ if( MSVC )
   # "Enforce type conversion rules".
   append("/Zc:rvalueCast" CMAKE_CXX_FLAGS)
 
+  if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" AND NOT LLVM_ENABLE_INCREMENTAL_LINK)
+    foreach(CONFIG RELEASE RELWITHDEBINFO MINSIZEREL)
+      foreach(FLAG EXE MODULE SHARED STATIC)
+        string(REGEX REPLACE "[-/](INCREMENTAL:YES|INCREMENTAL:NO|INCREMENTAL)" "/INCREMENTAL:NO" CMAKE_${FLAG}_LINKER_FLAGS_${CONFIG} "${CMAKE_${FLAG}_LINKER_FLAGS_${CONFIG}}")
+      endforeach()
+    endforeach()
+  endif()
+
   if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO)
     # clang-cl and cl by default produce non-deterministic binaries because
     # link.exe /incremental requires a timestamp in the .obj file.  clang-cl

From 6d458fa86631523d19fe07af07b126910b74e3fc Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Tue, 28 May 2019 14:17:48 +0000
Subject: [PATCH 0358/1176] Add constrained intrinsic tests for powerpc64 and
 powerpc64le.

Submitted by:	Drew Wock
Reviewed by:	Hal Finkel
Approved by:	Hal Finkel
Differential Revision:	https://reviews.llvm.org/D62388

llvm-svn: 361827
---
 .../vector-constrained-fp-intrinsics.ll       | 10810 ++++++++++++++++
 1 file changed, 10810 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
new file mode 100644
index 0000000000000..94d2f94ddd401
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -0,0 +1,10810 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu < %s | FileCheck --check-prefix=PC64LE %s
+; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s | FileCheck --check-prefix=PC64LE9 %s
+; RUN: llc -O3 -mtriple=powerpc64-linux-gnu < %s | FileCheck --check-prefix=PC64 %s
+
+define <1 x float> @constrained_vector_fdiv_v1f32() {
+; PC64LE-LABEL: constrained_vector_fdiv_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI0_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI0_1@toc@l(4)
+; PC64LE-NEXT:    xsdivsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI0_1@toc@l(3)
+; PC64LE9-NEXT:    xsdivsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fdiv_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI0_1@toc@l(3)
+; PC64-NEXT:    fdivs 1, 1, 0
+; PC64-NEXT:    blr
+entry:
+  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %div
+}
+
+define <2 x double> @constrained_vector_fdiv_v2f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI1_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI1_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvdivdp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI1_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 34, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fdiv_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI1_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI1_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI1_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI1_2@toc@l(3)
+; PC64-NEXT:    fdiv 1, 1, 0
+; PC64-NEXT:    fdiv 2, 2, 0
+; PC64-NEXT:    blr
+entry:
+  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %div
+}
+
+define <3 x float> @constrained_vector_fdiv_v3f32() {
+; PC64LE-LABEL: constrained_vector_fdiv_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI2_3@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI2_2@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI2_3@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI2_2@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
+; PC64LE-NEXT:    xsdivsp 1, 1, 0
+; PC64LE-NEXT:    lfs 3, .LCPI2_1@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
+; PC64LE-NEXT:    xsdivsp 2, 2, 0
+; PC64LE-NEXT:    addi 3, 3, .LCPI2_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsdivsp 0, 3, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI2_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_2@toc@ha
+; PC64LE9-NEXT:    xsdivsp 1, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI2_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_3@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI2_3@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI2_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xsdivsp 2, 2, 0
+; PC64LE9-NEXT:    xsdivsp 0, 3, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fdiv_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI2_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI2_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI2_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI2_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI2_3@toc@l(3)
+; PC64-NEXT:    fdivs 1, 1, 0
+; PC64-NEXT:    fdivs 2, 2, 0
+; PC64-NEXT:    fdivs 3, 3, 0
+; PC64-NEXT:    blr
+entry:
+  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %div
+}
+
+define <3 x double> @constrained_vector_fdiv_v3f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI3_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI3_2@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI3_3@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI3_1@toc@ha
+; PC64LE-NEXT:    lfs 3, .LCPI3_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvdivdp 2, 1, 0
+; PC64LE-NEXT:    lfs 0, .LCPI3_1@toc@l(4)
+; PC64LE-NEXT:    xsdivdp 3, 0, 3
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI3_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI3_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI3_2@toc@l
+; PC64LE9-NEXT:    xsdivdp 3, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI3_3@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fdiv_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI3_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI3_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI3_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI3_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI3_3@toc@l(3)
+; PC64-NEXT:    fdiv 1, 1, 0
+; PC64-NEXT:    fdiv 2, 2, 0
+; PC64-NEXT:    fdiv 3, 3, 0
+; PC64-NEXT:    blr
+entry:
+  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %div
+}
+
+define <4 x double> @constrained_vector_fdiv_v4f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI4_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI4_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI4_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 4, .LCPI4_1@toc@l
+; PC64LE-NEXT:    addi 4, 5, .LCPI4_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvdivdp 34, 1, 0
+; PC64LE-NEXT:    xvdivdp 35, 2, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_2@toc@l
+; PC64LE9-NEXT:    xvdivdp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 35, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fdiv_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI4_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI4_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI4_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI4_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI4_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI4_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI4_3@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI4_4@toc@ha
+; PC64-NEXT:    fdiv 1, 1, 0
+; PC64-NEXT:    lfs 4, .LCPI4_4@toc@l(3)
+; PC64-NEXT:    fdiv 2, 2, 0
+; PC64-NEXT:    fdiv 3, 3, 0
+; PC64-NEXT:    fdiv 4, 4, 0
+; PC64-NEXT:    blr
+entry:
+  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %div
+}
+
+define <1 x float> @constrained_vector_frem_v1f32() {
+; PC64LE-LABEL: constrained_vector_frem_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI5_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI5_1@toc@l(4)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI5_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI5_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_frem_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI5_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI5_1@toc@l(3)
+; PC64-NEXT:    bl fmodf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %rem
+}
+
+define <2 x double> @constrained_vector_frem_v2f64() {
+; PC64LE-LABEL: constrained_vector_frem_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI6_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI6_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI6_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_frem_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI6_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %rem
+}
+
+define <3 x float> @constrained_vector_frem_v3f32() {
+; PC64LE-LABEL: constrained_vector_frem_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI7_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI7_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI7_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI7_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_frem_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI7_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmodf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmodf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmodf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %rem
+}
+
+define <3 x double> @constrained_vector_frem_v3f64() {
+; PC64LE-LABEL: constrained_vector_frem_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI8_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI8_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_frem_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %rem
+}
+
+define <4 x double> @constrained_vector_frem_v4f64() {
+; PC64LE-LABEL: constrained_vector_frem_v4f64:
+; PC64LE:       # %bb.0:
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI9_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI9_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v4f64:
+; PC64LE9:       # %bb.0:
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_frem_v4f64:
+; PC64:       # %bb.0:
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f28, -32
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 28, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
+; PC64-NEXT:    fmr 28, 1
+; PC64-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmod
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    fmr 3, 28
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 28, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %rem
+}
+
+define <1 x float> @constrained_vector_fmul_v1f32() {
+; PC64LE-LABEL: constrained_vector_fmul_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI10_1@toc@l(4)
+; PC64LE-NEXT:    xsmulsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI10_1@toc@l(3)
+; PC64LE9-NEXT:    xsmulsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fmul_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI10_1@toc@l(3)
+; PC64-NEXT:    fmuls 1, 1, 0
+; PC64-NEXT:    blr
+entry:
+  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 2.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %mul
+}
+
+define <2 x double> @constrained_vector_fmul_v2f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI11_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI11_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvmuldp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI11_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI11_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvmuldp 34, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fmul_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI11_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI11_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI11_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI11_2@toc@l(3)
+; PC64-NEXT:    fmul 1, 0, 1
+; PC64-NEXT:    fmul 2, 0, 2
+; PC64-NEXT:    blr
+entry:
+  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %mul
+}
+
+define <3 x float> @constrained_vector_fmul_v3f32() {
+; PC64LE-LABEL: constrained_vector_fmul_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI12_3@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI12_2@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI12_1@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI12_3@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI12_2@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; PC64LE-NEXT:    xsmulsp 1, 0, 1
+; PC64LE-NEXT:    lfs 3, .LCPI12_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
+; PC64LE-NEXT:    xsmulsp 2, 0, 2
+; PC64LE-NEXT:    addi 3, 3, .LCPI12_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsmulsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI12_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
+; PC64LE9-NEXT:    xsmulsp 0, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI12_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_3@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI12_3@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI12_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xsmulsp 2, 1, 2
+; PC64LE9-NEXT:    xsmulsp 1, 1, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 2
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fmul_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI12_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI12_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI12_3@toc@ha
+; PC64-NEXT:    lfs 4, .LCPI12_3@toc@l(3)
+; PC64-NEXT:    fmuls 1, 3, 0
+; PC64-NEXT:    fmuls 2, 3, 2
+; PC64-NEXT:    fmuls 3, 3, 4
+; PC64-NEXT:    blr
+entry:
+  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
+           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
+                        float 0x7FF0000000000000>,
+           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %mul
+}
+
+define <3 x double> @constrained_vector_fmul_v3f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI13_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI13_2@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI13_3@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI13_1@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI13_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvmuldp 2, 1, 0
+; PC64LE-NEXT:    lfs 0, .LCPI13_1@toc@l(4)
+; PC64LE-NEXT:    xsmuldp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI13_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI13_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI13_2@toc@l
+; PC64LE9-NEXT:    xsmuldp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI13_3@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvmuldp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fmul_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI13_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI13_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI13_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI13_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI13_3@toc@l(3)
+; PC64-NEXT:    fmul 1, 0, 1
+; PC64-NEXT:    fmul 2, 0, 2
+; PC64-NEXT:    fmul 3, 0, 3
+; PC64-NEXT:    blr
+entry:
+  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %mul
+}
+
+define <4 x double> @constrained_vector_fmul_v4f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI14_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI14_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI14_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI14_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI14_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvmuldp 34, 1, 0
+; PC64LE-NEXT:    xvmuldp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_2@toc@l
+; PC64LE9-NEXT:    xvmuldp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvmuldp 35, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fmul_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI14_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI14_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI14_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI14_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI14_3@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI14_4@toc@ha
+; PC64-NEXT:    fmul 1, 0, 1
+; PC64-NEXT:    lfs 4, .LCPI14_4@toc@l(3)
+; PC64-NEXT:    fmul 2, 0, 2
+; PC64-NEXT:    fmul 3, 0, 3
+; PC64-NEXT:    fmul 4, 0, 4
+; PC64-NEXT:    blr
+entry:
+  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 2.000000e+00, double 3.000000e+00,
+                         double 4.000000e+00, double 5.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %mul
+}
+
+define <1 x float> @constrained_vector_fadd_v1f32() {
+; PC64LE-LABEL: constrained_vector_fadd_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI15_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI15_1@toc@l(4)
+; PC64LE-NEXT:    xsaddsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI15_1@toc@l(3)
+; PC64LE9-NEXT:    xsaddsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fadd_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI15_1@toc@l(3)
+; PC64-NEXT:    fadds 1, 1, 0
+; PC64-NEXT:    blr
+entry:
+  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %add
+}
+
+define <2 x double> @constrained_vector_fadd_v2f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI16_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI16_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI16_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvadddp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI16_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI16_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvadddp 34, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fadd_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI16_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
+; PC64-NEXT:    lfd 1, .LCPI16_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI16_2@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI16_2@toc@l(3)
+; PC64-NEXT:    fadd 2, 1, 0
+; PC64-NEXT:    fadd 1, 1, 3
+; PC64-NEXT:    blr
+entry:
+  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %add
+}
+
+define <3 x float> @constrained_vector_fadd_v3f32() {
+; PC64LE-LABEL: constrained_vector_fadd_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI17_2@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI17_1@toc@ha
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI17_2@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI17_1@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI17_3@toc@l
+; PC64LE-NEXT:    xsaddsp 1, 0, 1
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsaddsp 2, 0, 2
+; PC64LE-NEXT:    xsaddsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI17_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
+; PC64LE9-NEXT:    xsaddsp 2, 0, 2
+; PC64LE9-NEXT:    lfs 3, .LCPI17_2@toc@l(3)
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    xsaddsp 1, 0, 1
+; PC64LE9-NEXT:    xsaddsp 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI17_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fadd_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI17_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI17_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64-NEXT:    lfs 4, .LCPI17_3@toc@l(3)
+; PC64-NEXT:    fadds 1, 3, 0
+; PC64-NEXT:    fadds 2, 3, 2
+; PC64-NEXT:    fadds 3, 3, 4
+; PC64-NEXT:    blr
+entry:
+  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %add
+}
+
+define <3 x double> @constrained_vector_fadd_v3f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI18_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI18_1@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI18_2@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI18_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvadddp 2, 1, 0
+; PC64LE-NEXT:    xxlxor 0, 0, 0
+; PC64LE-NEXT:    xsadddp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI18_1@toc@l
+; PC64LE9-NEXT:    xsadddp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI18_2@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvadddp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fadd_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI18_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI18_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI18_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI18_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI18_3@toc@l(3)
+; PC64-NEXT:    fadd 1, 0, 1
+; PC64-NEXT:    fadd 2, 0, 2
+; PC64-NEXT:    fadd 3, 0, 3
+; PC64-NEXT:    blr
+entry:
+  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> <double 2.0, double 1.0, double 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %add
+}
+
+define <4 x double> @constrained_vector_fadd_v4f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI19_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI19_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI19_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI19_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI19_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvadddp 34, 1, 0
+; PC64LE-NEXT:    xvadddp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_2@toc@l
+; PC64LE9-NEXT:    xvadddp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvadddp 35, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fadd_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI19_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI19_1@toc@ha
+; PC64-NEXT:    lfd 3, .LCPI19_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI19_2@toc@ha
+; PC64-NEXT:    lfd 1, .LCPI19_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI19_3@toc@ha
+; PC64-NEXT:    lfs 5, .LCPI19_3@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI19_4@toc@ha
+; PC64-NEXT:    fadd 2, 3, 0
+; PC64-NEXT:    lfs 6, .LCPI19_4@toc@l(3)
+; PC64-NEXT:    fadd 4, 3, 1
+; PC64-NEXT:    fadd 1, 3, 5
+; PC64-NEXT:    fadd 3, 3, 6
+; PC64-NEXT:    blr
+entry:
+  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %add
+}
+
+define <1 x float> @constrained_vector_fsub_v1f32() {
+; PC64LE-LABEL: constrained_vector_fsub_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI20_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI20_1@toc@l(4)
+; PC64LE-NEXT:    xssubsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI20_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI20_1@toc@l(3)
+; PC64LE9-NEXT:    xssubsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fsub_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI20_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI20_1@toc@l(3)
+; PC64-NEXT:    fsubs 1, 1, 0
+; PC64-NEXT:    blr
+entry:
+  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %sub
+}
+
+define <2 x double> @constrained_vector_fsub_v2f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI21_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI21_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI21_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsubdp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI21_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI21_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI21_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvsubdp 34, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fsub_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI21_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI21_1@toc@ha
+; PC64-NEXT:    lfd 1, .LCPI21_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI21_2@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI21_2@toc@l(3)
+; PC64-NEXT:    fsub 2, 1, 0
+; PC64-NEXT:    fsub 1, 1, 3
+; PC64-NEXT:    blr
+entry:
+  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %sub
+}
+
+define <3 x float> @constrained_vector_fsub_v3f32() {
+; PC64LE-LABEL: constrained_vector_fsub_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI22_2@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI22_1@toc@ha
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI22_2@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI22_1@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI22_3@toc@l
+; PC64LE-NEXT:    xssubsp 1, 0, 1
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xssubsp 2, 0, 2
+; PC64LE-NEXT:    xssubsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI22_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
+; PC64LE9-NEXT:    xssubsp 2, 0, 2
+; PC64LE9-NEXT:    lfs 3, .LCPI22_2@toc@l(3)
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    xssubsp 1, 0, 1
+; PC64LE9-NEXT:    xssubsp 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI22_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fsub_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI22_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI22_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64-NEXT:    lfs 4, .LCPI22_3@toc@l(3)
+; PC64-NEXT:    fsubs 1, 3, 0
+; PC64-NEXT:    fsubs 2, 3, 2
+; PC64-NEXT:    fsubs 3, 3, 4
+; PC64-NEXT:    blr
+entry:
+  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %sub
+}
+
+define <3 x double> @constrained_vector_fsub_v3f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI23_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI23_1@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI23_2@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI23_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsubdp 2, 1, 0
+; PC64LE-NEXT:    xxlxor 0, 0, 0
+; PC64LE-NEXT:    xssubdp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI23_1@toc@l
+; PC64LE9-NEXT:    xssubdp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI23_2@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvsubdp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fsub_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI23_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI23_2@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI23_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI23_3@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI23_3@toc@l(3)
+; PC64-NEXT:    fsub 1, 0, 1
+; PC64-NEXT:    fsub 2, 0, 2
+; PC64-NEXT:    fsub 3, 0, 3
+; PC64-NEXT:    blr
+entry:
+  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
+           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF>,
+           <3 x double> <double 2.0, double 1.0, double 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %sub
+}
+
+define <4 x double> @constrained_vector_fsub_v4f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI24_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI24_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI24_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI24_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI24_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvsubdp 34, 1, 0
+; PC64LE-NEXT:    xvsubdp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_2@toc@l
+; PC64LE9-NEXT:    xvsubdp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsubdp 35, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fsub_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI24_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI24_1@toc@ha
+; PC64-NEXT:    lfd 3, .LCPI24_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI24_2@toc@ha
+; PC64-NEXT:    lfd 1, .LCPI24_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI24_3@toc@ha
+; PC64-NEXT:    lfs 5, .LCPI24_3@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI24_4@toc@ha
+; PC64-NEXT:    fsub 2, 3, 0
+; PC64-NEXT:    lfs 6, .LCPI24_4@toc@l(3)
+; PC64-NEXT:    fsub 4, 3, 1
+; PC64-NEXT:    fsub 1, 3, 5
+; PC64-NEXT:    fsub 3, 3, 6
+; PC64-NEXT:    blr
+entry:
+  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
+           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %sub
+}
+
+define <1 x float> @constrained_vector_sqrt_v1f32() {
+; PC64LE-LABEL: constrained_vector_sqrt_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
+; PC64LE-NEXT:    xssqrtsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
+; PC64LE9-NEXT:    xssqrtsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sqrt_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI25_0@toc@l(3)
+; PC64-NEXT:    bl sqrtf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
+                              <1 x float> <float 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %sqrt
+}
+
+define <2 x double> @constrained_vector_sqrt_v2f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI26_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvsqrtdp 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI26_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sqrt_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI26_0@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI26_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI26_1@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %sqrt
+}
+
+define <3 x float> @constrained_vector_sqrt_v3f32() {
+; PC64LE-LABEL: constrained_vector_sqrt_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI27_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI27_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI27_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
+; PC64LE-NEXT:    xssqrtsp 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI27_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
+; PC64LE-NEXT:    xssqrtsp 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI27_3@toc@l
+; PC64LE-NEXT:    xssqrtsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI27_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI27_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
+; PC64LE9-NEXT:    xssqrtsp 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI27_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI27_3@toc@l
+; PC64LE9-NEXT:    xssqrtsp 1, 1
+; PC64LE9-NEXT:    xssqrtsp 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sqrt_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI27_0@toc@l(3)
+; PC64-NEXT:    bl sqrtf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI27_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI27_1@toc@l(3)
+; PC64-NEXT:    bl sqrtf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI27_2@toc@l(3)
+; PC64-NEXT:    bl sqrtf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sqrt
+}
+
+define <3 x double> @constrained_vector_sqrt_v3f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI28_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI28_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xssqrtdp 3, 1
+; PC64LE-NEXT:    xvsqrtdp 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI28_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI28_1@toc@l
+; PC64LE9-NEXT:    xssqrtdp 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sqrt_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI28_0@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI28_1@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI28_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI28_2@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %sqrt
+}
+
+define <4 x double> @constrained_vector_sqrt_v4f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI29_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI29_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI29_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsqrtdp 34, 0
+; PC64LE-NEXT:    xvsqrtdp 35, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI29_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI29_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI29_1@toc@l
+; PC64LE9-NEXT:    xvsqrtdp 34, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 35, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sqrt_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI29_0@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI29_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI29_1@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI29_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI29_2@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI29_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI29_3@toc@l(3)
+; PC64-NEXT:    bl sqrt
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+ entry:
+  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %sqrt
+}
+
+define <1 x float> @constrained_vector_pow_v1f32() {
+; PC64LE-LABEL: constrained_vector_pow_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI30_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI30_1@toc@l(4)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI30_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI30_1@toc@l(3)
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_pow_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI30_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI30_1@toc@l(3)
+; PC64-NEXT:    bl powf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %pow
+}
+
+define <2 x double> @constrained_vector_pow_v2f64() {
+; PC64LE-LABEL: constrained_vector_pow_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI31_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI31_1@toc@l(4)
+; PC64LE-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI31_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_pow_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI31_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI31_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
+                             <2 x double> <double 42.1, double 42.2>,
+                             <2 x double> <double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %pow
+}
+
+define <3 x float> @constrained_vector_pow_v3f32() {
+; PC64LE-LABEL: constrained_vector_pow_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI32_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI32_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI32_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI32_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_pow_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI32_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI32_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl powf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl powf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl powf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
+                             <3 x float> <float 42.0, float 43.0, float 44.0>,
+                             <3 x float> <float 3.0, float 3.0, float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <3 x float> %pow
+}
+
+define <3 x double> @constrained_vector_pow_v3f64() {
+; PC64LE-LABEL: constrained_vector_pow_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI33_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI33_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_pow_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %pow
+}
+
+define <4 x double> @constrained_vector_pow_v4f64() {
+; PC64LE-LABEL: constrained_vector_pow_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI34_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI34_1@toc@l(4)
+; PC64LE-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_pow_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f28, -32
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 28, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
+; PC64-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
+; PC64-NEXT:    fmr 28, 1
+; PC64-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl pow
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    fmr 3, 28
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 28, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
+                             <4 x double> <double 42.1, double 42.2,
+                                           double 42.3, double 42.4>,
+                             <4 x double> <double 3.0, double 3.0,
+                                           double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %pow
+}
+
+define <1 x float> @constrained_vector_powi_v1f32() {
+; PC64LE-LABEL: constrained_vector_powi_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_powi_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
+; PC64-NEXT:    bl __powisf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
+                              <1 x float> <float 42.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %powi
+}
+
+define <2 x double> @constrained_vector_powi_v2f64() {
+; PC64LE-LABEL: constrained_vector_powi_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_powi_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
+                              <2 x double> <double 42.1, double 42.2>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %powi
+}
+
+define <3 x float> @constrained_vector_powi_v3f32() {
+;
+;
+; PC64LE-LABEL: constrained_vector_powi_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI37_3@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_powi_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    bl __powisf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powisf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powisf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %powi
+}
+
+define <3 x double> @constrained_vector_powi_v3f64() {
+; PC64LE-LABEL: constrained_vector_powi_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_powi_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    lfs 1, .LCPI38_0@toc@l(3)
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI38_1@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          i32 3,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %powi
+}
+
+define <4 x double> @constrained_vector_powi_v4f64() {
+; PC64LE-LABEL: constrained_vector_powi_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_powi_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
+; PC64-NEXT:    li 4, 3
+; PC64-NEXT:    bl __powidf2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %powi
+}
+
+define <1 x float> @constrained_vector_sin_v1f32() {
+; PC64LE-LABEL: constrained_vector_sin_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sin_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
+; PC64-NEXT:    bl sinf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %sin
+}
+
+define <2 x double> @constrained_vector_sin_v2f64() {
+; PC64LE-LABEL: constrained_vector_sin_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sin_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI41_0@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI41_1@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %sin
+}
+
+define <3 x float> @constrained_vector_sin_v3f32() {
+; PC64LE-LABEL: constrained_vector_sin_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI42_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sin_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
+; PC64-NEXT:    bl sinf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
+; PC64-NEXT:    bl sinf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
+; PC64-NEXT:    bl sinf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sin
+}
+
+define <3 x double> @constrained_vector_sin_v3f64() {
+; PC64LE-LABEL: constrained_vector_sin_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sin_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI43_0@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI43_1@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %sin
+}
+
+define <4 x double> @constrained_vector_sin_v4f64() {
+; PC64LE-LABEL: constrained_vector_sin_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_sin_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI44_0@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI44_3@toc@l(3)
+; PC64-NEXT:    bl sin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %sin
+}
+
+define <1 x float> @constrained_vector_cos_v1f32() {
+; PC64LE-LABEL: constrained_vector_cos_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_cos_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
+; PC64-NEXT:    bl cosf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %cos
+}
+
+define <2 x double> @constrained_vector_cos_v2f64() {
+; PC64LE-LABEL: constrained_vector_cos_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_cos_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI46_0@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI46_1@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %cos
+}
+
+define <3 x float> @constrained_vector_cos_v3f32() {
+; PC64LE-LABEL: constrained_vector_cos_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI47_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_cos_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
+; PC64-NEXT:    bl cosf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
+; PC64-NEXT:    bl cosf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
+; PC64-NEXT:    bl cosf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %cos
+}
+
+define <3 x double> @constrained_vector_cos_v3f64() {
+; PC64LE-LABEL: constrained_vector_cos_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_cos_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI48_0@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI48_1@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %cos
+}
+
+define <4 x double> @constrained_vector_cos_v4f64() {
+; PC64LE-LABEL: constrained_vector_cos_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_cos_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI49_0@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI49_3@toc@l(3)
+; PC64-NEXT:    bl cos
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %cos
+}
+
+define <1 x float> @constrained_vector_exp_v1f32() {
+; PC64LE-LABEL: constrained_vector_exp_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
+; PC64-NEXT:    bl expf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp
+}
+
+define <2 x double> @constrained_vector_exp_v2f64() {
+; PC64LE-LABEL: constrained_vector_exp_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI51_0@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI51_1@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %exp
+}
+
+define <3 x float> @constrained_vector_exp_v3f32() {
+; PC64LE-LABEL: constrained_vector_exp_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI52_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
+; PC64-NEXT:    bl expf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
+; PC64-NEXT:    bl expf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
+; PC64-NEXT:    bl expf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp
+}
+
+define <3 x double> @constrained_vector_exp_v3f64() {
+; PC64LE-LABEL: constrained_vector_exp_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI53_0@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI53_1@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %exp
+}
+
+define <4 x double> @constrained_vector_exp_v4f64() {
+; PC64LE-LABEL: constrained_vector_exp_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI54_0@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI54_3@toc@l(3)
+; PC64-NEXT:    bl exp
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %exp
+}
+
+define <1 x float> @constrained_vector_exp2_v1f32() {
+; PC64LE-LABEL: constrained_vector_exp2_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp2_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
+; PC64-NEXT:    bl exp2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp2
+}
+
+define <2 x double> @constrained_vector_exp2_v2f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp2_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
+                              <2 x double> <double 42.1, double 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %exp2
+}
+
+define <3 x float> @constrained_vector_exp2_v3f32() {
+; PC64LE-LABEL: constrained_vector_exp2_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI57_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp2_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
+; PC64-NEXT:    bl exp2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
+; PC64-NEXT:    bl exp2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
+; PC64-NEXT:    bl exp2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp2
+}
+
+define <3 x double> @constrained_vector_exp2_v3f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp2_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI58_0@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI58_1@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %exp2
+}
+
+define <4 x double> @constrained_vector_exp2_v4f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_exp2_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
+; PC64-NEXT:    bl exp2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %exp2
+}
+
+define <1 x float> @constrained_vector_log_v1f32() {
+; PC64LE-LABEL: constrained_vector_log_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
+; PC64-NEXT:    bl logf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log
+}
+
+define <2 x double> @constrained_vector_log_v2f64() {
+; PC64LE-LABEL: constrained_vector_log_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI61_0@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI61_1@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %log
+}
+
+define <3 x float> @constrained_vector_log_v3f32() {
+; PC64LE-LABEL: constrained_vector_log_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI62_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
+; PC64-NEXT:    bl logf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
+; PC64-NEXT:    bl logf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
+; PC64-NEXT:    bl logf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log
+}
+
+define <3 x double> @constrained_vector_log_v3f64() {
+; PC64LE-LABEL: constrained_vector_log_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI63_0@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI63_1@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log
+}
+
+define <4 x double> @constrained_vector_log_v4f64() {
+; PC64LE-LABEL: constrained_vector_log_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI64_0@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI64_3@toc@l(3)
+; PC64-NEXT:    bl log
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %log
+}
+
+define <1 x float> @constrained_vector_log10_v1f32() {
+; PC64LE-LABEL: constrained_vector_log10_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log10_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
+; PC64-NEXT:    bl log10f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log10
+}
+
+define <2 x double> @constrained_vector_log10_v2f64() {
+; PC64LE-LABEL: constrained_vector_log10_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log10_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI66_0@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI66_1@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
+                               <2 x double> <double 42.0, double 42.1>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <2 x double> %log10
+}
+
+define <3 x float> @constrained_vector_log10_v3f32() {
+; PC64LE-LABEL: constrained_vector_log10_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI67_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log10_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
+; PC64-NEXT:    bl log10f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
+; PC64-NEXT:    bl log10f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
+; PC64-NEXT:    bl log10f
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log10
+}
+
+define <3 x double> @constrained_vector_log10_v3f64() {
+; PC64LE-LABEL: constrained_vector_log10_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log10_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI68_0@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI68_1@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log10
+}
+
+define <4 x double> @constrained_vector_log10_v4f64() {
+; PC64LE-LABEL: constrained_vector_log10_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log10_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI69_0@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI69_3@toc@l(3)
+; PC64-NEXT:    bl log10
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
+                               <4 x double> <double 42.0, double 42.1,
+                                             double 42.2, double 42.3>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <4 x double> %log10
+}
+
+define <1 x float> @constrained_vector_log2_v1f32() {
+; PC64LE-LABEL: constrained_vector_log2_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log2_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
+; PC64-NEXT:    bl log2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log2
+}
+
+define <2 x double> @constrained_vector_log2_v2f64() {
+; PC64LE-LABEL: constrained_vector_log2_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log2_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI71_0@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI71_1@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %log2
+}
+
+define <3 x float> @constrained_vector_log2_v3f32() {
+; PC64LE-LABEL: constrained_vector_log2_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI72_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log2_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
+; PC64-NEXT:    bl log2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
+; PC64-NEXT:    bl log2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
+; PC64-NEXT:    bl log2f
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log2
+}
+
+define <3 x double> @constrained_vector_log2_v3f64() {
+; PC64LE-LABEL: constrained_vector_log2_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log2_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI73_0@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI73_1@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log2
+}
+
+define <4 x double> @constrained_vector_log2_v4f64() {
+; PC64LE-LABEL: constrained_vector_log2_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_log2_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI74_0@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI74_3@toc@l(3)
+; PC64-NEXT:    bl log2
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %log2
+}
+
+define <1 x float> @constrained_vector_rint_v1f32() {
+; PC64LE-LABEL: constrained_vector_rint_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_rint_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
+; PC64-NEXT:    bl rintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %rint
+}
+
+define <2 x double> @constrained_vector_rint_v2f64() {
+; PC64LE-LABEL: constrained_vector_rint_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_rint_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> <double 42.1, double 42.0>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32() {
+; PC64LE-LABEL: constrained_vector_rint_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI77_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI77_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_rint_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
+; PC64-NEXT:    bl rintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
+; PC64-NEXT:    bl rintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
+; PC64-NEXT:    bl rintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+ entry:
+  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %rint
+}
+
+define <3 x double> @constrained_vector_rint_v3f64() {
+; PC64LE-LABEL: constrained_vector_rint_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_rint_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI78_0@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI78_1@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %rint
+}
+
+define <4 x double> @constrained_vector_rint_v4f64() {
+; PC64LE-LABEL: constrained_vector_rint_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_rint_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
+; PC64-NEXT:    bl rint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> <double 42.1, double 42.2,
+                                      double 42.3, double 42.4>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_nearbyint_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
+; PC64-NEXT:    bl nearbyintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
+                               <1 x float> <float 42.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %nearby
+}
+
+define <2 x double> @constrained_vector_nearbyint_v2f64() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI81_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI81_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_nearbyint_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI81_0@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI81_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI81_1@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> <double 42.1, double 42.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI82_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_nearbyint_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
+; PC64-NEXT:    bl nearbyintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
+; PC64-NEXT:    bl nearbyintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
+; PC64-NEXT:    bl nearbyintf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %nearby
+}
+
+define <3 x double> @constrained_vector_nearby_v3f64() {
+; PC64LE-LABEL: constrained_vector_nearby_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI83_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpic 2, 0
+; PC64LE-NEXT:    xxswapd 0, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    fmr 1, 0
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI83_1@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 2, 0
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_nearby_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI83_0@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI83_1@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI83_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI83_2@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %nearby
+}
+
+define <4 x double> @constrained_vector_nearbyint_v4f64() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI84_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI84_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 4, .LCPI84_1@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    xvrdpic 35, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI84_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI84_1@toc@l
+; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 35, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_nearbyint_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI84_0@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI84_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI84_1@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI84_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfd 1, .LCPI84_2@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI84_3@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfd 1, .LCPI84_3@toc@l(3)
+; PC64-NEXT:    bl nearbyint
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> <double 42.1, double 42.2,
+                                              double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %nearby
+}
+
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; PC64LE-LABEL: constrained_vector_maxnum_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI85_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI85_1@toc@l(4)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI85_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI85_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_maxnum_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI85_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI85_1@toc@l(3)
+; PC64-NEXT:    bl fmaxf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; PC64LE-LABEL: constrained_vector_maxnum_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI86_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI86_1@toc@l(4)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI86_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI86_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI86_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI86_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_maxnum_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI86_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI86_1@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI86_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI86_3@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; PC64LE-LABEL: constrained_vector_maxnum_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI87_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI87_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI87_3@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI87_3@toc@l(4)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
+; PC64LE-NEXT:    fmr 1, 31
+; PC64LE-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI87_5@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI87_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI87_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_maxnum_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 31, .LCPI87_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI87_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI87_1@toc@l(3)
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    bl fmaxf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI87_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI87_3@toc@l(3)
+; PC64-NEXT:    bl fmaxf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI87_4@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fmaxf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %max
+}
+
+define <3 x double> @constrained_vector_max_v3f64() {
+; PC64LE-LABEL: constrained_vector_max_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_max_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_max_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI88_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI88_1@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI88_3@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI88_5@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI88_5@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+                          <3 x double> <double 43.0, double 44.0, double 45.0>,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %max
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; PC64LE-LABEL: constrained_vector_maxnum_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_7@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_7@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_7@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_7@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_maxnum_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI89_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI89_1@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI89_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI89_3@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI89_5@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI89_5@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI89_7@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI89_7@toc@l(3)
+; PC64-NEXT:    bl fmax
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; PC64LE-LABEL: constrained_vector_minnum_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI90_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI90_1@toc@l(4)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI90_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI90_1@toc@l(3)
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_minnum_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI90_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI90_1@toc@l(3)
+; PC64-NEXT:    bl fminf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+ entry:
+  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; PC64LE-LABEL: constrained_vector_minnum_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI91_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI91_1@toc@l(4)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI91_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI91_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI91_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI91_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_minnum_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI91_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI91_1@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI91_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI91_3@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; PC64LE-LABEL: constrained_vector_minnum_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI92_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI92_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI92_3@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI92_3@toc@l(4)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
+; PC64LE-NEXT:    fmr 1, 31
+; PC64LE-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI92_5@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI92_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI92_3@toc@l(3)
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_minnum_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 31, .LCPI92_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI92_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI92_1@toc@l(3)
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    bl fminf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI92_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI92_3@toc@l(3)
+; PC64-NEXT:    bl fminf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI92_4@toc@l(3)
+; PC64-NEXT:    fmr 2, 31
+; PC64-NEXT:    bl fminf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 30
+; PC64-NEXT:    fmr 2, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %min
+}
+
+define <3 x double> @constrained_vector_min_v3f64() {
+; PC64LE-LABEL: constrained_vector_min_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_min_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_min_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI93_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI93_1@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI93_3@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI93_5@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI93_5@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+                          <3 x double> <double 43.0, double 44.0, double 45.0>,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %min
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; PC64LE-LABEL: constrained_vector_minnum_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_7@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_7@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_7@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_7@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_minnum_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -144(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 144
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f29, -24
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
+; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI94_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI94_1@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI94_3@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI94_3@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI94_5@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI94_5@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
+; PC64-NEXT:    fmr 29, 1
+; PC64-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI94_7@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI94_7@toc@l(3)
+; PC64-NEXT:    bl fmin
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 4, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    fmr 3, 29
+; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 144
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %min
+}
+
+define <1 x float> @constrained_vector_fptrunc_v1f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v1f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fptrunc_v1f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
+; PC64-NEXT:    frsp 1, 0
+; PC64-NEXT:    blr
+entry:
+  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
+                                <1 x double><double 42.1>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <1 x float> %result
+}
+
+define <2 x float> @constrained_vector_fptrunc_v2f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI96_1@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI96_1@toc@l(4)
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    frsp 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI96_1@toc@ha
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    lfd 0, .LCPI96_1@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fptrunc_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI96_1@toc@ha
+; PC64-NEXT:    lfd 2, .LCPI96_1@toc@l(3)
+; PC64-NEXT:    frsp 1, 0
+; PC64-NEXT:    frsp 2, 2
+; PC64-NEXT:    blr
+entry:
+  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x float> %result
+}
+
+define <3 x float> @constrained_vector_fptrunc_v3f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI97_1@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI97_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    lfd 2, .LCPI97_3@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
+; PC64LE-NEXT:    frsp 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI97_2@toc@l
+; PC64LE-NEXT:    frsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_1@toc@ha
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    lfd 0, .LCPI97_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI97_2@toc@l
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI97_3@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fptrunc_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI97_1@toc@ha
+; PC64-NEXT:    lfd 2, .LCPI97_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
+; PC64-NEXT:    lfd 3, .LCPI97_2@toc@l(3)
+; PC64-NEXT:    frsp 1, 0
+; PC64-NEXT:    frsp 2, 2
+; PC64-NEXT:    frsp 3, 3
+; PC64-NEXT:    blr
+entry:
+  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <3 x float> %result
+}
+
+define <4 x float> @constrained_vector_fptrunc_v4f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI98_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI98_2@toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI98_3@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI98_1@toc@l(4)
+; PC64LE-NEXT:    lfd 2, .LCPI98_2@toc@l(5)
+; PC64LE-NEXT:    lfd 3, .LCPI98_3@toc@l(6)
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    xxmrghd 1, 3, 2
+; PC64LE-NEXT:    xvcvdpsp 34, 0
+; PC64LE-NEXT:    xvcvdpsp 35, 1
+; PC64LE-NEXT:    vmrgew 2, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_1@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI98_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_2@toc@ha
+; PC64LE9-NEXT:    xxmrghd 0, 1, 0
+; PC64LE9-NEXT:    xvcvdpsp 34, 0
+; PC64LE9-NEXT:    lfd 0, .LCPI98_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_3@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI98_3@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 0, 1, 0
+; PC64LE9-NEXT:    xvcvdpsp 35, 0
+; PC64LE9-NEXT:    vmrgew 2, 3, 2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fptrunc_v4f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
+; PC64-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI98_1@toc@ha
+; PC64-NEXT:    lfd 2, .LCPI98_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI98_2@toc@ha
+; PC64-NEXT:    lfd 3, .LCPI98_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI98_3@toc@ha
+; PC64-NEXT:    frsp 1, 0
+; PC64-NEXT:    lfd 4, .LCPI98_3@toc@l(3)
+; PC64-NEXT:    frsp 2, 2
+; PC64-NEXT:    frsp 3, 3
+; PC64-NEXT:    frsp 4, 4
+; PC64-NEXT:    blr
+entry:
+  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_fpext_v1f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
+; PC64LE-NEXT:    xxspltd 34, 0, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
+; PC64LE9-NEXT:    xxspltd 34, 0, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fpext_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI99_0@toc@l(3)
+; PC64-NEXT:    blr
+entry:
+  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
+                                <1 x float><float 42.0>,
+                                metadata !"fpexcept.strict")
+  ret <1 x double> %result
+}
+
+define <2 x double> @constrained_vector_fpext_v2f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v2f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI100_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI100_1@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v2f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI100_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI100_1@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fpext_v2f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI100_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI100_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI100_1@toc@l(3)
+; PC64-NEXT:    blr
+entry:
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %result
+}
+
+define <3 x double> @constrained_vector_fpext_v3f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI101_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI101_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI101_1@toc@l(4)
+; PC64LE-NEXT:    lfs 3, .LCPI101_2@toc@l(5)
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI101_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_2@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI101_2@toc@l(3)
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fpext_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI101_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI101_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI101_2@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI101_2@toc@l(3)
+; PC64-NEXT:    blr
+entry:
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x double> %result
+}
+
+define <4 x double> @constrained_vector_fpext_v4f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v4f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI102_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI102_2@toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI102_3@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI102_1@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI102_2@toc@l(5)
+; PC64LE-NEXT:    lfs 3, .LCPI102_3@toc@l(6)
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 35, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v4f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI102_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_2@toc@ha
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    lfs 0, .LCPI102_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_3@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI102_3@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_fpext_v4f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI102_0@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI102_1@toc@ha
+; PC64-NEXT:    lfs 2, .LCPI102_1@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI102_2@toc@ha
+; PC64-NEXT:    lfs 3, .LCPI102_2@toc@l(3)
+; PC64-NEXT:    addis 3, 2, .LCPI102_3@toc@ha
+; PC64-NEXT:    lfs 4, .LCPI102_3@toc@l(3)
+; PC64-NEXT:    blr
+entry:
+  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %result
+}
+
+define <1 x float> @constrained_vector_ceil_v1f32() {
+; PC64LE-LABEL: constrained_vector_ceil_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
+; PC64LE-NEXT:    frip 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
+; PC64LE9-NEXT:    frip 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_ceil_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI103_0@toc@l(3)
+; PC64-NEXT:    bl ceilf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %ceil
+}
+
+define <2 x double> @constrained_vector_ceil_v2f64() {
+; PC64LE-LABEL: constrained_vector_ceil_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI104_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpip 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI104_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpip 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_ceil_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI104_0@toc@l(3)
+; PC64-NEXT:    bl ceil
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI104_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI104_1@toc@l(3)
+; PC64-NEXT:    bl ceil
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %ceil
+}
+
+define <3 x float> @constrained_vector_ceil_v3f32() {
+; PC64LE-LABEL: constrained_vector_ceil_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI105_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI105_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI105_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
+; PC64LE-NEXT:    frip 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI105_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
+; PC64LE-NEXT:    frip 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI105_3@toc@l
+; PC64LE-NEXT:    frip 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI105_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
+; PC64LE9-NEXT:    frip 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI105_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI105_3@toc@l
+; PC64LE9-NEXT:    frip 1, 1
+; PC64LE9-NEXT:    frip 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_ceil_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI105_0@toc@l(3)
+; PC64-NEXT:    bl ceilf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
+; PC64-NEXT:    bl ceilf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI105_2@toc@l(3)
+; PC64-NEXT:    bl ceilf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %ceil
+}
+
+define <3 x double> @constrained_vector_ceil_v3f64() {
+; PC64LE-LABEL: constrained_vector_ceil_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI106_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI106_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpip 3, 1
+; PC64LE-NEXT:    xvrdpip 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI106_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI106_1@toc@l
+; PC64LE9-NEXT:    xsrdpip 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpip 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_ceil_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI106_0@toc@l(3)
+; PC64-NEXT:    bl ceil
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI106_1@toc@l(3)
+; PC64-NEXT:    bl ceil
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI106_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI106_2@toc@l(3)
+; PC64-NEXT:    bl ceil
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %ceil
+}
+
+define <1 x float> @constrained_vector_floor_v1f32() {
+; PC64LE-LABEL: constrained_vector_floor_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
+; PC64LE-NEXT:    frim 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
+; PC64LE9-NEXT:    frim 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_floor_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI107_0@toc@l(3)
+; PC64-NEXT:    bl floorf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %floor
+}
+
+
+define <2 x double> @constrained_vector_floor_v2f64() {
+; PC64LE-LABEL: constrained_vector_floor_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI108_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpim 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI108_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpim 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_floor_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI108_0@toc@l(3)
+; PC64-NEXT:    bl floor
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI108_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI108_1@toc@l(3)
+; PC64-NEXT:    bl floor
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %floor
+}
+
+define <3 x float> @constrained_vector_floor_v3f32() {
+; PC64LE-LABEL: constrained_vector_floor_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI109_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI109_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI109_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
+; PC64LE-NEXT:    frim 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI109_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
+; PC64LE-NEXT:    frim 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI109_3@toc@l
+; PC64LE-NEXT:    frim 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI109_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
+; PC64LE9-NEXT:    frim 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI109_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI109_3@toc@l
+; PC64LE9-NEXT:    frim 1, 1
+; PC64LE9-NEXT:    frim 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_floor_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI109_0@toc@l(3)
+; PC64-NEXT:    bl floorf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
+; PC64-NEXT:    bl floorf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI109_2@toc@l(3)
+; PC64-NEXT:    bl floorf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %floor
+}
+
+define <3 x double> @constrained_vector_floor_v3f64() {
+; PC64LE-LABEL: constrained_vector_floor_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI110_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI110_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpim 3, 1
+; PC64LE-NEXT:    xvrdpim 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI110_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI110_1@toc@l
+; PC64LE9-NEXT:    xsrdpim 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpim 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_floor_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI110_0@toc@l(3)
+; PC64-NEXT:    bl floor
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI110_1@toc@l(3)
+; PC64-NEXT:    bl floor
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI110_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI110_2@toc@l(3)
+; PC64-NEXT:    bl floor
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %floor
+}
+
+define <1 x float> @constrained_vector_round_v1f32() {
+; PC64LE-LABEL: constrained_vector_round_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
+; PC64LE-NEXT:    frin 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
+; PC64LE9-NEXT:    frin 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_round_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI111_0@toc@l(3)
+; PC64-NEXT:    bl roundf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %round
+}
+
+define <2 x double> @constrained_vector_round_v2f64() {
+; PC64LE-LABEL: constrained_vector_round_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI112_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpi 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI112_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpi 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_round_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI112_0@toc@l(3)
+; PC64-NEXT:    bl round
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI112_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI112_1@toc@l(3)
+; PC64-NEXT:    bl round
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %round
+}
+
+define <3 x float> @constrained_vector_round_v3f32() {
+; PC64LE-LABEL: constrained_vector_round_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI113_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI113_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI113_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
+; PC64LE-NEXT:    frin 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI113_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
+; PC64LE-NEXT:    frin 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI113_3@toc@l
+; PC64LE-NEXT:    frin 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI113_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
+; PC64LE9-NEXT:    frin 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI113_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI113_3@toc@l
+; PC64LE9-NEXT:    frin 1, 1
+; PC64LE9-NEXT:    frin 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_round_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI113_0@toc@l(3)
+; PC64-NEXT:    bl roundf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
+; PC64-NEXT:    bl roundf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI113_2@toc@l(3)
+; PC64-NEXT:    bl roundf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %round
+}
+
+
+define <3 x double> @constrained_vector_round_v3f64() {
+; PC64LE-LABEL: constrained_vector_round_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI114_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI114_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpi 3, 1
+; PC64LE-NEXT:    xvrdpi 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI114_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI114_1@toc@l
+; PC64LE9-NEXT:    xsrdpi 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpi 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_round_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI114_0@toc@l(3)
+; PC64-NEXT:    bl round
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI114_1@toc@l(3)
+; PC64-NEXT:    bl round
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI114_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI114_2@toc@l(3)
+; PC64-NEXT:    bl round
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %round
+}
+
+define <1 x float> @constrained_vector_trunc_v1f32() {
+; PC64LE-LABEL: constrained_vector_trunc_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
+; PC64LE-NEXT:    friz 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
+; PC64LE9-NEXT:    friz 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_trunc_v1f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -112(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 112
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
+; PC64-NEXT:    lfs 1, .LCPI115_0@toc@l(3)
+; PC64-NEXT:    bl truncf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addi 1, 1, 112
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %trunc
+}
+
+define <2 x double> @constrained_vector_trunc_v2f64() {
+; PC64LE-LABEL: constrained_vector_trunc_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI116_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpiz 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI116_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpiz 34, 0
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_trunc_v2f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI116_0@toc@l(3)
+; PC64-NEXT:    bl trunc
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI116_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI116_1@toc@l(3)
+; PC64-NEXT:    bl trunc
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 2, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %trunc
+}
+
+define <3 x float> @constrained_vector_trunc_v3f32() {
+; PC64LE-LABEL: constrained_vector_trunc_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI117_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI117_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI117_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
+; PC64LE-NEXT:    friz 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI117_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
+; PC64LE-NEXT:    friz 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI117_3@toc@l
+; PC64LE-NEXT:    friz 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI117_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
+; PC64LE9-NEXT:    friz 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI117_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI117_3@toc@l
+; PC64LE9-NEXT:    friz 1, 1
+; PC64LE9-NEXT:    friz 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_trunc_v3f32:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfs 1, .LCPI117_0@toc@l(3)
+; PC64-NEXT:    bl truncf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
+; PC64-NEXT:    bl truncf
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI117_2@toc@l(3)
+; PC64-NEXT:    bl truncf
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %trunc
+}
+
+define <3 x double> @constrained_vector_trunc_v3f64() {
+; PC64LE-LABEL: constrained_vector_trunc_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI118_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI118_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpiz 3, 1
+; PC64LE-NEXT:    xvrdpiz 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI118_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI118_1@toc@l
+; PC64LE9-NEXT:    xsrdpiz 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpiz 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+;
+; PC64-LABEL: constrained_vector_trunc_v3f64:
+; PC64:       # %bb.0: # %entry
+; PC64-NEXT:    mflr 0
+; PC64-NEXT:    std 0, 16(1)
+; PC64-NEXT:    stdu 1, -128(1)
+; PC64-NEXT:    .cfi_def_cfa_offset 128
+; PC64-NEXT:    .cfi_offset lr, 16
+; PC64-NEXT:    .cfi_offset f30, -16
+; PC64-NEXT:    .cfi_offset f31, -8
+; PC64-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
+; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
+; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
+; PC64-NEXT:    lfd 1, .LCPI118_0@toc@l(3)
+; PC64-NEXT:    bl trunc
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
+; PC64-NEXT:    fmr 31, 1
+; PC64-NEXT:    lfd 1, .LCPI118_1@toc@l(3)
+; PC64-NEXT:    bl trunc
+; PC64-NEXT:    nop
+; PC64-NEXT:    addis 3, 2, .LCPI118_2@toc@ha
+; PC64-NEXT:    fmr 30, 1
+; PC64-NEXT:    lfs 1, .LCPI118_2@toc@l(3)
+; PC64-NEXT:    bl trunc
+; PC64-NEXT:    nop
+; PC64-NEXT:    fmr 3, 1
+; PC64-NEXT:    fmr 1, 31
+; PC64-NEXT:    fmr 2, 30
+; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
+; PC64-NEXT:    addi 1, 1, 128
+; PC64-NEXT:    ld 0, 16(1)
+; PC64-NEXT:    mtlr 0
+; PC64-NEXT:    blr
+entry:
+  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %trunc
+}
+
+
+; Single width declarations
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+
+; Scalar width declarations
+declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
+declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+
+; Illegal width declarations
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
+declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+
+; Double width declarations
+declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From 6a17102731b0d5ff4705bc0212eb38a5658bb89c Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 14:23:00 +0000
Subject: [PATCH 0359/1176] gn build: make clangd depend on clang resource
 headers

Summary:
clangd needs them to function properly, even though they are not
strictly required for the build.

Reviewers: thakis

Reviewed By: thakis

Subscribers: MaskRay, jkorous, arphaman, llvm-commits, kadircet

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62480

llvm-svn: 361828
---
 llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
index 02d12d42cdb8a..d1832b3797cf9 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
@@ -11,6 +11,7 @@ executable("clangd") {
     "//clang/lib/Basic",
     "//clang/lib/Format",
     "//clang/lib/Frontend",
+    "//clang/lib/Headers",
     "//clang/lib/Sema",
     "//clang/lib/Tooling",
     "//clang/lib/Tooling/Core",

From 49e432d030d804fada7452460d6871722799f682 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 14:33:16 +0000
Subject: [PATCH 0360/1176] [CodeComplete] Consistently break after '{' in
 multi-line patterns

Summary:
Completion can return multi-line patterns in some cases, e.g.

    for (<#init#>; <#cond#>; <#inc#>) {
    <#body#>
    }

However, most patterns break the line only before closing brace,
resulting in code like:

    namespace <#name#> { <#decls#>
    }

While some (e.g. the 'for' example above) are breaking lines after the
opening brace too.

This change ensures all patterns consistently break after the opening
brace, this leads to nicer UX when using those in an actual editor.

Reviewers: gribozavr

Reviewed By: gribozavr

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62405

llvm-svn: 361829
---
 clang/lib/Sema/SemaCodeComplete.cpp           |  8 +++++
 .../CodeCompletion/ordinary-name-cxx11.cpp    | 30 ++++++++++++++-----
 clang/test/CodeCompletion/ordinary-name.cpp   | 30 ++++++++++++++-----
 3 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 16e7308681439..3682dd0f4d4c9 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1899,6 +1899,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
         Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
         Builder.AddPlaceholderChunk("identifier");
         Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+        Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
         Builder.AddPlaceholderChunk("declarations");
         Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
         Builder.AddChunk(CodeCompletionString::CK_RightBrace);
@@ -2046,6 +2047,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
         SemaRef.getLangOpts().CXXExceptions) {
       Builder.AddTypedTextChunk("try");
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
@@ -2054,6 +2056,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
       Builder.AddPlaceholderChunk("declaration");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
@@ -2072,6 +2075,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
         Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
@@ -2087,6 +2091,8 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
+      Builder.AddPlaceholderChunk("cases");
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
       Results.AddResult(Result(Builder.TakeString()));
     }
@@ -2117,6 +2123,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
         Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
@@ -2125,6 +2132,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
       // do { statements } while ( expression );
       Builder.AddTypedTextChunk("do");
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
+      Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
diff --git a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
index 7816243e8f4db..f78265b5b1ba9 100644
--- a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
+++ b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
@@ -14,7 +14,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-CC1: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: double
   // CHECK-CC1-NEXT: COMPLETION: Pattern : dynamic_cast<<#type#>>(<#expression#>)
   // CHECK-CC1-NEXT: COMPLETION: enum
@@ -24,7 +26,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: foo : [#void#]foo()
   // CHECK-CC1-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
   // CHECK-CC1: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: int
   // CHECK-CC1-NEXT: COMPLETION: long
   // CHECK-CC1-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
@@ -47,7 +51,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: thread_local
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]true
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
@@ -60,7 +66,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: void
   // CHECK-CC1-NEXT: COMPLETION: volatile
   // CHECK-CC1-NEXT: COMPLETION: wchar_t
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: X : X
   // CHECK-CC1-NEXT: COMPLETION: y : [#int#]y
   // CHECK-CC1-NEXT: COMPLETION: z : [#void#]z(<#int#>)
@@ -83,7 +91,9 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: inline
   // CHECK-CC2-NEXT: COMPLETION: int
   // CHECK-CC2-NEXT: COMPLETION: long
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{<#declarations#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{
+  // CHECK-CC2-NEXT: <#declarations#>
+  // CHECK-CC2-NEXT: }
   // CHECK-CC2: COMPLETION: Pattern : namespace <#name#> = <#namespace#>;
   // CHECK-CC2-NEXT: COMPLETION: operator
   // CHECK-CC2-NEXT: COMPLETION: short
@@ -209,7 +219,9 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{
+  // CHECK-NO-RTTI-NEXT: <#statements#>
+  // CHECK-NO-RTTI-NEXT: }
   // CHECK-NO-RTTI: COMPLETION: double
   // CHECK-NO-RTTI-NOT: dynamic_cast
   // CHECK-NO-RTTI: COMPLETION: enum
@@ -219,7 +231,9 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: foo : [#void#]foo()
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
   // CHECK-NO-RTTI: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: <#statements#>
+  // CHECK-NO-RTTI-NEXT: }
   // CHECK-NO-RTTI: COMPLETION: int
   // CHECK-NO-RTTI-NEXT: COMPLETION: long
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
@@ -254,7 +268,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: void
   // CHECK-NO-RTTI-NEXT: COMPLETION: volatile
   // CHECK-NO-RTTI-NEXT: COMPLETION: wchar_t
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){
   // CHECK-NO-RTTI: COMPLETION: X : X
   // CHECK-NO-RTTI-NEXT: COMPLETION: y : [#int#]y
   // CHECK-NO-RTTI-NEXT: COMPLETION: z : [#void#]z(<#int#>)
diff --git a/clang/test/CodeCompletion/ordinary-name.cpp b/clang/test/CodeCompletion/ordinary-name.cpp
index 90f9a6ff3b20e..1ddd6e1bcccb0 100644
--- a/clang/test/CodeCompletion/ordinary-name.cpp
+++ b/clang/test/CodeCompletion/ordinary-name.cpp
@@ -12,7 +12,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-CC1: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: double
   // CHECK-CC1-NEXT: COMPLETION: Pattern : dynamic_cast<<#type#>>(<#expression#>)
   // CHECK-CC1-NEXT: COMPLETION: enum
@@ -21,8 +23,12 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: float
   // CHECK-CC1-NEXT: COMPLETION: foo : [#void#]foo()
   // CHECK-CC1-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-CC1-NEXT: <#statements#>{{$}}
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-CC1-NEXT: <#statements#>{{$}}
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: int
   // CHECK-CC1-NEXT: COMPLETION: long
   // CHECK-CC1-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
@@ -40,7 +46,11 @@ void foo() {
   // CHECK-CC1: COMPLETION: t : t
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]true
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }catch(<#declaration#>){
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
@@ -53,7 +63,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: void
   // CHECK-CC1-NEXT: COMPLETION: volatile
   // CHECK-CC1-NEXT: COMPLETION: wchar_t
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-CC1-NEXT: <#statements#>
+  // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: X : X
   // CHECK-CC1-NEXT: COMPLETION: y : [#int#]y
   // CHECK-CC1-NEXT: COMPLETION: z : [#void#]z(<#int#>)
@@ -71,7 +83,9 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: inline
   // CHECK-CC2-NEXT: COMPLETION: int
   // CHECK-CC2-NEXT: COMPLETION: long
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{<#declarations#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{
+  // CHECK-CC2-NEXT: <#declarations#>
+  // CHECK-CC2-NEXT: }
   // CHECK-CC2: COMPLETION: Pattern : namespace <#name#> = <#namespace#>;
   // CHECK-CC2-NEXT: COMPLETION: operator
   // CHECK-CC2-NEXT: COMPLETION: short
@@ -181,7 +195,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{
   // CHECK-NO-RTTI: COMPLETION: double
   // CHECK-NO-RTTI-NOT: dynamic_cast
   // CHECK-NO-RTTI: COMPLETION: enum
@@ -191,7 +205,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: foo : [#void#]foo()
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
   // CHECK-NO-RTTI: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){
   // CHECK-NO-RTTI: COMPLETION: int
   // CHECK-NO-RTTI-NEXT: COMPLETION: long
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
@@ -222,7 +236,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: void
   // CHECK-NO-RTTI-NEXT: COMPLETION: volatile
   // CHECK-NO-RTTI-NEXT: COMPLETION: wchar_t
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){
   // CHECK-NO-RTTI: COMPLETION: X : X
   // CHECK-NO-RTTI-NEXT: COMPLETION: y : [#int#]y
   // CHECK-NO-RTTI-NEXT: COMPLETION: z : [#void#]z(<#int#>)

From 5d3b3188f722456a6470c7effcacf17656406429 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 28 May 2019 14:34:28 +0000
Subject: [PATCH 0361/1176] Reland D61583 [ELF] Error on relocations to
 STT_SECTION symbols if the sections were discarded

This is implemented by creating Undefined (instead of Defined) for such
local STT_SECTION symbols. It allows us to catch errors when there are
relocations to such discarded sections (e.g. in PR41693, ld.bfd and gold
error but we don't). Updated comdat-discarded-error.s checks we emit
friendly error message.

For relocatable-eh-frame.s, ld.lld -r a.o a.o will now error
"STT_SECTION symbol should be defined" because the section .eh_frame
refers to is now an Undefined instead of a Defined.
So I have to change `error()` to `warn()` to retain the output.

rLLD361144 inadvertently enabled the error for --gdb-index
(in LLDDwarfObj<ELFT>::findAux()).

Relocations from .debug_info (not in comdat) to .text.* (in comdat) for
DW_AT_low_pc are common. If an .text.* was discarded, rLLD361144 would error,
which was unexpected. (Note, if we don't error as this patch does,
InputSection::relocateNonAlloc() will resolve such relocations).

llvm-svn: 361830
---
 lld/ELF/DWARF.cpp                             |  5 +-
 lld/ELF/InputFiles.cpp                        |  3 +
 lld/ELF/InputSection.cpp                      |  3 +-
 lld/ELF/Relocations.cpp                       | 14 ++++-
 lld/test/ELF/comdat-discarded-error.s         | 12 +++-
 lld/test/ELF/comdat-discarded-gdb-index.s     | 63 +++++++++++++++++++
 lld/test/ELF/comdat-discarded-reloc.s         |  2 +-
 lld/test/ELF/comdat.s                         |  4 +-
 .../ELF/invalid-undef-section-symbol.test     |  2 +-
 lld/test/ELF/relocatable-eh-frame.s           |  4 +-
 10 files changed, 100 insertions(+), 12 deletions(-)
 create mode 100644 lld/test/ELF/comdat-discarded-gdb-index.s

diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index 6b90a038e8730..6ebe7247e9044 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -93,8 +93,11 @@ LLDDwarfObj<ELFT>::findAux(const InputSectionBase &Sec, uint64_t Pos,
   uint32_t SecIndex = File->getSectionIndex(Sym);
 
   // Broken debug info can point to a non-Defined symbol.
-  auto *DR = dyn_cast<Defined>(&File->getRelocTargetSym(Rel));
+  Symbol &S = File->getRelocTargetSym(Rel);
+  auto *DR = dyn_cast<Defined>(&S);
   if (!DR) {
+    if (S.isSection())
+      return None;
     RelType Type = Rel.getType(Config->IsMips64EL);
     if (Type != Target->NoneRel)
       error(toString(File) + ": relocation " + lld::toString(Type) + " at 0x" +
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 2b89533191a19..16991421fe6ce 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -997,6 +997,9 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
 
       if (ESym.st_shndx == SHN_UNDEF)
         this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
+      else if (Sec == &InputSection::Discarded)
+        this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type,
+                                           /*DiscardedSecIdx=*/SecIdx);
       else
         this->Symbols[I] =
             make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 07a30ed57c476..74878931afb40 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -438,7 +438,8 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
       // hopefully creates a frame that is ignored at runtime.
       auto *D = dyn_cast<Defined>(&Sym);
       if (!D) {
-        error("STT_SECTION symbol should be defined");
+        warn("STT_SECTION symbol should be defined");
+        P->setSymbolAndType(0, 0, false);
         continue;
       }
       SectionBase *Section = D->Section->Repl;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 12c38c70dcc41..a8ed792164bc8 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -681,9 +681,17 @@ static std::string maybeReportDiscarded(Undefined &Sym, InputSectionBase &Sec,
     return "";
   ArrayRef<Elf_Shdr_Impl<ELFT>> ObjSections =
       CHECK(File->getObj().sections(), File);
-  std::string Msg =
-      "relocation refers to a symbol in a discarded section: " + toString(Sym) +
-      "\n>>> defined in " + toString(File);
+
+  std::string Msg;
+  if (Sym.Type == ELF::STT_SECTION) {
+    Msg = "relocation refers to a discarded section: ";
+    Msg += CHECK(
+        File->getObj().getSectionName(&ObjSections[Sym.DiscardedSecIdx]), File);
+  } else {
+    Msg = "relocation refers to a symbol in a discarded section: " +
+          toString(Sym);
+  }
+  Msg += "\n>>> defined in " + toString(File);
 
   Elf_Shdr_Impl<ELFT> ELFSec = ObjSections[Sym.DiscardedSecIdx - 1];
   if (ELFSec.sh_type != SHT_GROUP)
diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s
index 3584783cde09d..0f6b417b0fa4a 100644
--- a/lld/test/ELF/comdat-discarded-error.s
+++ b/lld/test/ELF/comdat-discarded-error.s
@@ -5,7 +5,7 @@
 # RUN: echo '.section .text.foo,"axG",@progbits,foo,comdat; .globl bar; bar:' | \
 # RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o
 
-# RUN: not ld.lld %t1.o %t2.o %t3.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s
 
 # CHECK:      error: relocation refers to a symbol in a discarded section: bar
 # CHECK-NEXT: >>> defined in {{.*}}3.o
@@ -13,6 +13,16 @@
 # CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
 # CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x1)
 
+# CHECK:      error: relocation refers to a discarded section: .text.foo
+# CHECK-NEXT: >>> defined in {{.*}}1.o
+# CHECK-NEXT: >>> section group signature: foo
+# CHECK-NEXT: >>> prevailing definition is in {{.*}}2.o
+# CHECK-NEXT: >>> referenced by {{.*}}1.o:(.data+0x0)
+
 .globl _start
 _start:
   jmp bar
+
+.section .text.foo,"axG",@progbits,foo,comdat
+.data
+  .quad .text.foo
diff --git a/lld/test/ELF/comdat-discarded-gdb-index.s b/lld/test/ELF/comdat-discarded-gdb-index.s
new file mode 100644
index 0000000000000..43505960498a2
--- /dev/null
+++ b/lld/test/ELF/comdat-discarded-gdb-index.s
@@ -0,0 +1,63 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: ld.lld --gdb-index %t.o %t.o -o %t
+
+## .debug_info has a relocation to .text.foo . The second %t.o is discarded.
+## Check we don't error on the relocation.
+# CHECK: .rela.debug_info {
+# CHECK-NEXT: 0xC R_X86_64_64 .text.foo 0x0
+
+.section .text.foo,"axG",@progbits,foo,comdat
+.globl foo
+.Lfunc_begin0:
+foo:
+  ret
+.Lfunc_end0:
+
+.section .debug_abbrev,"",@progbits
+  .byte   1       # Abbreviation Code
+  .byte   17      # DW_TAG_compile_unit
+  .byte   1       # DW_CHILDREN_yes
+  .byte   17      # DW_AT_low_pc
+  .byte   1       # DW_FORM_addr
+  .byte   18      # DW_AT_high_pc
+  .byte   6       # DW_FORM_data4
+  .ascii  "\264B" # DW_AT_GNU_pubnames
+  .byte   25      # DW_FORM_flag_present
+  .byte   0       # EOM(1)
+  .byte   0       # EOM(2)
+  .byte   2       # Abbreviation Code
+  .byte   46      # DW_TAG_subprogram
+  .byte   0       # DW_CHILDREN_no
+  .byte   3       # DW_AT_name
+  .byte   8       # DW_FORM_string
+  .byte   0       # EOM(1)
+  .byte   0       # EOM(2)
+  .byte   0
+
+.section .debug_info,"",@progbits
+.Lcu_begin0:
+  .long   .Lcu_end0 - .Lcu_begin0 - 4
+  .short  4              # DWARF version number
+  .long   0              # Offset Into Abbrev. Section
+  .byte   4              # Address Size
+.Ldie0:
+  .byte   1              # Abbrev [1] DW_TAG_compile_unit
+  .quad   .Lfunc_begin0  # DW_AT_low_pc
+  .long   .Lfunc_end0 - .Lfunc_begin0 # DW_AT_high_pc
+  .byte   2              # Abbrev [2] DW_TAG_subprogram
+  .asciz  "foo"          # DW_AT_name
+  .byte   0
+.Lcu_end0:
+
+.section .debug_gnu_pubnames,"",@progbits
+  .long   .LpubNames_end0 - .LpubNames_begin0
+.LpubNames_begin0:
+  .short  2              # Version
+  .long   .Lcu_begin0    # CU Offset
+  .long   .Lcu_end0 - .Lcu_begin0
+  .long   .Ldie0 - .Lcu_begin0
+  .byte   48             # Attributes: FUNCTION, EXTERNAL
+  .asciz  "foo"          # External Name
+  .long   0
+.LpubNames_end0:
diff --git a/lld/test/ELF/comdat-discarded-reloc.s b/lld/test/ELF/comdat-discarded-reloc.s
index d23baf386e92d..d12732cd3569b 100644
--- a/lld/test/ELF/comdat-discarded-reloc.s
+++ b/lld/test/ELF/comdat-discarded-reloc.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat-discarded-reloc.s -o %t2.o
-# RUN: ld.lld -gc-sections %t.o %t2.o -o %t
+# RUN: ld.lld -gc-sections --noinhibit-exec %t.o %t2.o -o /dev/null
 
 ## ELF spec doesn't allow a relocation to point to a deduplicated
 ## COMDAT section. Unfortunately this happens in practice (e.g. .eh_frame)
diff --git a/lld/test/ELF/comdat.s b/lld/test/ELF/comdat.s
index 86103e5d9eb75..9e3f5a81d300e 100644
--- a/lld/test/ELF/comdat.s
+++ b/lld/test/ELF/comdat.s
@@ -1,7 +1,7 @@
 // REQUIRES: x86
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat.s -o %t2.o
-// RUN: ld.lld -shared %t.o %t.o %t2.o -o %t
+// RUN: ld.lld -shared %t.o %t2.o -o %t
 // RUN: llvm-objdump -d %t | FileCheck %s
 // RUN: llvm-readobj -S --symbols %t | FileCheck --check-prefix=READ %s
 
@@ -31,9 +31,7 @@ foo:
 // CHECK-EMPTY:
 // CHECK-NEXT: bar:
 // 0x1000 - 0x1001 - 5 = -6
-// 0      - 0x1006 - 5 = -4107
 // CHECK-NEXT:   1001:	{{.*}}  callq  -6
-// CHECK-NEXT:   1006:	{{.*}}  callq  -4107
 
         .section .text3,"axG",@progbits,zed,comdat,unique,0
 
diff --git a/lld/test/ELF/invalid-undef-section-symbol.test b/lld/test/ELF/invalid-undef-section-symbol.test
index 1d66885eadf8e..80e5a1464d740 100644
--- a/lld/test/ELF/invalid-undef-section-symbol.test
+++ b/lld/test/ELF/invalid-undef-section-symbol.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj %s -o %t.o
-# RUN: not ld.lld -r %t.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: not ld.lld -r --fatal-warnings %t.o -o /dev/null 2>&1 | FileCheck %s
 
 # We used to crash at this.
 # CHECK: STT_SECTION symbol should be defined
diff --git a/lld/test/ELF/relocatable-eh-frame.s b/lld/test/ELF/relocatable-eh-frame.s
index dee906acb87fb..6172dd355db4a 100644
--- a/lld/test/ELF/relocatable-eh-frame.s
+++ b/lld/test/ELF/relocatable-eh-frame.s
@@ -1,10 +1,12 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld -r %t.o %t.o -o %t
+# RUN: ld.lld -r %t.o %t.o -o %t 2>&1 | FileCheck --check-prefix=WARN %s
 # RUN: llvm-readobj -r %t | FileCheck %s
 # RUN: ld.lld %t -o %t.so -shared
 # RUN: llvm-objdump -h %t.so | FileCheck --check-prefix=DSO %s
 
+# WARN: STT_SECTION symbol should be defined
+
 # DSO: .eh_frame     00000034
 
 # CHECK:      Relocations [

From 71f8f745b412500c6a9207c56f55fa2d6e8114b1 Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Tue, 28 May 2019 14:37:45 +0000
Subject: [PATCH 0362/1176] Revert 361827. It broke the bots.

llvm-svn: 361831
---
 .../vector-constrained-fp-intrinsics.ll       | 10810 ----------------
 1 file changed, 10810 deletions(-)
 delete mode 100644 llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
deleted file mode 100644
index 94d2f94ddd401..0000000000000
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ /dev/null
@@ -1,10810 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu < %s | FileCheck --check-prefix=PC64LE %s
-; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s | FileCheck --check-prefix=PC64LE9 %s
-; RUN: llc -O3 -mtriple=powerpc64-linux-gnu < %s | FileCheck --check-prefix=PC64 %s
-
-define <1 x float> @constrained_vector_fdiv_v1f32() {
-; PC64LE-LABEL: constrained_vector_fdiv_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI0_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI0_1@toc@l(4)
-; PC64LE-NEXT:    xsdivsp 0, 1, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fdiv_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI0_1@toc@l(3)
-; PC64LE9-NEXT:    xsdivsp 0, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fdiv_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI0_1@toc@l(3)
-; PC64-NEXT:    fdivs 1, 1, 0
-; PC64-NEXT:    blr
-entry:
-  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
-           <1 x float> <float 1.000000e+00>,
-           <1 x float> <float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %div
-}
-
-define <2 x double> @constrained_vector_fdiv_v2f64() {
-; PC64LE-LABEL: constrained_vector_fdiv_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI1_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI1_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvdivdp 34, 1, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fdiv_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI1_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI1_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvdivdp 34, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fdiv_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI1_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI1_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI1_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI1_2@toc@l(3)
-; PC64-NEXT:    fdiv 1, 1, 0
-; PC64-NEXT:    fdiv 2, 2, 0
-; PC64-NEXT:    blr
-entry:
-  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
-           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
-           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %div
-}
-
-define <3 x float> @constrained_vector_fdiv_v3f32() {
-; PC64LE-LABEL: constrained_vector_fdiv_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI2_3@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI2_2@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI2_3@toc@l(4)
-; PC64LE-NEXT:    lfs 2, .LCPI2_2@toc@l(5)
-; PC64LE-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
-; PC64LE-NEXT:    xsdivsp 1, 1, 0
-; PC64LE-NEXT:    lfs 3, .LCPI2_1@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
-; PC64LE-NEXT:    xsdivsp 2, 2, 0
-; PC64LE-NEXT:    addi 3, 3, .LCPI2_4@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xsdivsp 0, 3, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fdiv_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI2_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI2_2@toc@ha
-; PC64LE9-NEXT:    xsdivsp 1, 1, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI2_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI2_3@toc@ha
-; PC64LE9-NEXT:    lfs 3, .LCPI2_3@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI2_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xsdivsp 2, 2, 0
-; PC64LE9-NEXT:    xsdivsp 0, 3, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fdiv_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI2_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI2_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI2_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI2_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI2_3@toc@l(3)
-; PC64-NEXT:    fdivs 1, 1, 0
-; PC64-NEXT:    fdivs 2, 2, 0
-; PC64-NEXT:    fdivs 3, 3, 0
-; PC64-NEXT:    blr
-entry:
-  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
-           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
-           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %div
-}
-
-define <3 x double> @constrained_vector_fdiv_v3f64() {
-; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI3_3@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI3_2@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI3_3@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI3_1@toc@ha
-; PC64LE-NEXT:    lfs 3, .LCPI3_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvdivdp 2, 1, 0
-; PC64LE-NEXT:    lfs 0, .LCPI3_1@toc@l(4)
-; PC64LE-NEXT:    xsdivdp 3, 0, 3
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI3_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI3_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI3_2@toc@l
-; PC64LE9-NEXT:    xsdivdp 3, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI3_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI3_3@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvdivdp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fdiv_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI3_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI3_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI3_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI3_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI3_3@toc@l(3)
-; PC64-NEXT:    fdiv 1, 1, 0
-; PC64-NEXT:    fdiv 2, 2, 0
-; PC64-NEXT:    fdiv 3, 3, 0
-; PC64-NEXT:    blr
-entry:
-  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
-           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
-           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x double> %div
-}
-
-define <4 x double> @constrained_vector_fdiv_v4f64() {
-; PC64LE-LABEL: constrained_vector_fdiv_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI4_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI4_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI4_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 4, .LCPI4_1@toc@l
-; PC64LE-NEXT:    addi 4, 5, .LCPI4_2@toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 3
-; PC64LE-NEXT:    lxvd2x 2, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvdivdp 34, 1, 0
-; PC64LE-NEXT:    xvdivdp 35, 2, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fdiv_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI4_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI4_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI4_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI4_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI4_2@toc@l
-; PC64LE9-NEXT:    xvdivdp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvdivdp 35, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fdiv_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI4_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI4_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI4_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI4_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI4_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI4_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI4_3@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI4_4@toc@ha
-; PC64-NEXT:    fdiv 1, 1, 0
-; PC64-NEXT:    lfs 4, .LCPI4_4@toc@l(3)
-; PC64-NEXT:    fdiv 2, 2, 0
-; PC64-NEXT:    fdiv 3, 3, 0
-; PC64-NEXT:    fdiv 4, 4, 0
-; PC64-NEXT:    blr
-entry:
-  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
-           <4 x double> <double 1.000000e+00, double 2.000000e+00,
-                         double 3.000000e+00, double 4.000000e+00>,
-           <4 x double> <double 1.000000e+01, double 1.000000e+01,
-                         double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %div
-}
-
-define <1 x float> @constrained_vector_frem_v1f32() {
-; PC64LE-LABEL: constrained_vector_frem_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI5_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI5_1@toc@l(4)
-; PC64LE-NEXT:    bl fmodf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_frem_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI5_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI5_1@toc@l(3)
-; PC64LE9-NEXT:    bl fmodf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_frem_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI5_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI5_1@toc@l(3)
-; PC64-NEXT:    bl fmodf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
-           <1 x float> <float 1.000000e+00>,
-           <1 x float> <float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %rem
-}
-
-define <2 x double> @constrained_vector_frem_v2f64() {
-; PC64LE-LABEL: constrained_vector_frem_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI6_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI6_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_frem_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI6_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_frem_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI6_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
-           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
-           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %rem
-}
-
-define <3 x float> @constrained_vector_frem_v3f32() {
-; PC64LE-LABEL: constrained_vector_frem_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f29, -24
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI7_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI7_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fmodf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
-; PC64LE-NEXT:    bl fmodf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    fmr 29, 1
-; PC64LE-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
-; PC64LE-NEXT:    bl fmodf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
-; PC64LE-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI7_4@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_frem_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f29, -24
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI7_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmodf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmodf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
-; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmodf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_frem_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI7_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmodf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmodf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmodf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
-           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
-           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %rem
-}
-
-define <3 x double> @constrained_vector_frem_v3f64() {
-; PC64LE-LABEL: constrained_vector_frem_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 96
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    .cfi_offset v31, -32
-; PC64LE-NEXT:    addis 4, 2, .LCPI8_1@toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI8_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 96
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_frem_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    .cfi_offset v31, -32
-; PC64LE9-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 80
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_frem_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
-           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
-           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x double> %rem
-}
-
-define <4 x double> @constrained_vector_frem_v4f64() {
-; PC64LE-LABEL: constrained_vector_frem_v4f64:
-; PC64LE:       # %bb.0:
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 96
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    .cfi_offset v31, -32
-; PC64LE-NEXT:    addis 4, 2, .LCPI9_1@toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI9_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
-; PC64LE-NEXT:    bl fmod
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 96
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_frem_v4f64:
-; PC64LE9:       # %bb.0:
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    .cfi_offset v31, -32
-; PC64LE9-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmod
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 80
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_frem_v4f64:
-; PC64:       # %bb.0:
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f28, -32
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 28, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
-; PC64-NEXT:    fmr 28, 1
-; PC64-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmod
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    fmr 3, 28
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 28, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
-           <4 x double> <double 1.000000e+00, double 2.000000e+00,
-                         double 3.000000e+00, double 4.000000e+00>,
-           <4 x double> <double 1.000000e+01, double 1.000000e+01,
-                         double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %rem
-}
-
-define <1 x float> @constrained_vector_fmul_v1f32() {
-; PC64LE-LABEL: constrained_vector_fmul_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI10_1@toc@l(4)
-; PC64LE-NEXT:    xsmulsp 0, 1, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fmul_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI10_1@toc@l(3)
-; PC64LE9-NEXT:    xsmulsp 0, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fmul_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI10_1@toc@l(3)
-; PC64-NEXT:    fmuls 1, 1, 0
-; PC64-NEXT:    blr
-entry:
-  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 2.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %mul
-}
-
-define <2 x double> @constrained_vector_fmul_v2f64() {
-; PC64LE-LABEL: constrained_vector_fmul_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI11_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI11_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvmuldp 34, 1, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fmul_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI11_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI11_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmuldp 34, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fmul_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI11_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI11_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI11_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI11_2@toc@l(3)
-; PC64-NEXT:    fmul 1, 0, 1
-; PC64-NEXT:    fmul 2, 0, 2
-; PC64-NEXT:    blr
-entry:
-  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
-           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %mul
-}
-
-define <3 x float> @constrained_vector_fmul_v3f32() {
-; PC64LE-LABEL: constrained_vector_fmul_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI12_3@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI12_2@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI12_1@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI12_3@toc@l(4)
-; PC64LE-NEXT:    lfs 2, .LCPI12_2@toc@l(5)
-; PC64LE-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
-; PC64LE-NEXT:    xsmulsp 1, 0, 1
-; PC64LE-NEXT:    lfs 3, .LCPI12_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
-; PC64LE-NEXT:    xsmulsp 2, 0, 2
-; PC64LE-NEXT:    addi 3, 3, .LCPI12_4@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xsmulsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fmul_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI12_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
-; PC64LE9-NEXT:    xsmulsp 0, 1, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI12_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI12_3@toc@ha
-; PC64LE9-NEXT:    lfs 3, .LCPI12_3@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI12_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    xsmulsp 2, 1, 2
-; PC64LE9-NEXT:    xsmulsp 1, 1, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 1, 1, 1
-; PC64LE9-NEXT:    xscvdpspn 1, 2
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fmul_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI12_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI12_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI12_3@toc@ha
-; PC64-NEXT:    lfs 4, .LCPI12_3@toc@l(3)
-; PC64-NEXT:    fmuls 1, 3, 0
-; PC64-NEXT:    fmuls 2, 3, 2
-; PC64-NEXT:    fmuls 3, 3, 4
-; PC64-NEXT:    blr
-entry:
-  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
-           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
-                        float 0x7FF0000000000000>,
-           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %mul
-}
-
-define <3 x double> @constrained_vector_fmul_v3f64() {
-; PC64LE-LABEL: constrained_vector_fmul_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI13_3@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI13_2@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI13_3@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI13_1@toc@ha
-; PC64LE-NEXT:    lfd 3, .LCPI13_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvmuldp 2, 1, 0
-; PC64LE-NEXT:    lfs 0, .LCPI13_1@toc@l(4)
-; PC64LE-NEXT:    xsmuldp 3, 3, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI13_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI13_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI13_2@toc@l
-; PC64LE9-NEXT:    xsmuldp 3, 0, 1
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI13_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI13_3@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvmuldp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fmul_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI13_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI13_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI13_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI13_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI13_3@toc@l(3)
-; PC64-NEXT:    fmul 1, 0, 1
-; PC64-NEXT:    fmul 2, 0, 2
-; PC64-NEXT:    fmul 3, 0, 3
-; PC64-NEXT:    blr
-entry:
-  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
-           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF>,
-           <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x double> %mul
-}
-
-define <4 x double> @constrained_vector_fmul_v4f64() {
-; PC64LE-LABEL: constrained_vector_fmul_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI14_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI14_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI14_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI14_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 5, .LCPI14_2@toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvmuldp 34, 1, 0
-; PC64LE-NEXT:    xvmuldp 35, 1, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fmul_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI14_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI14_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI14_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI14_2@toc@l
-; PC64LE9-NEXT:    xvmuldp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvmuldp 35, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fmul_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI14_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI14_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI14_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI14_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI14_3@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI14_4@toc@ha
-; PC64-NEXT:    fmul 1, 0, 1
-; PC64-NEXT:    lfs 4, .LCPI14_4@toc@l(3)
-; PC64-NEXT:    fmul 2, 0, 2
-; PC64-NEXT:    fmul 3, 0, 3
-; PC64-NEXT:    fmul 4, 0, 4
-; PC64-NEXT:    blr
-entry:
-  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
-           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <4 x double> <double 2.000000e+00, double 3.000000e+00,
-                         double 4.000000e+00, double 5.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %mul
-}
-
-define <1 x float> @constrained_vector_fadd_v1f32() {
-; PC64LE-LABEL: constrained_vector_fadd_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI15_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI15_1@toc@l(4)
-; PC64LE-NEXT:    xsaddsp 0, 1, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fadd_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI15_1@toc@l(3)
-; PC64LE9-NEXT:    xsaddsp 0, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fadd_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI15_1@toc@l(3)
-; PC64-NEXT:    fadds 1, 1, 0
-; PC64-NEXT:    blr
-entry:
-  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 1.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %add
-}
-
-define <2 x double> @constrained_vector_fadd_v2f64() {
-; PC64LE-LABEL: constrained_vector_fadd_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI16_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI16_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI16_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvadddp 34, 1, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fadd_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI16_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI16_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvadddp 34, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fadd_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI16_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
-; PC64-NEXT:    lfd 1, .LCPI16_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI16_2@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI16_2@toc@l(3)
-; PC64-NEXT:    fadd 2, 1, 0
-; PC64-NEXT:    fadd 1, 1, 3
-; PC64-NEXT:    blr
-entry:
-  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
-           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %add
-}
-
-define <3 x float> @constrained_vector_fadd_v3f32() {
-; PC64LE-LABEL: constrained_vector_fadd_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI17_2@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI17_1@toc@ha
-; PC64LE-NEXT:    xxlxor 3, 3, 3
-; PC64LE-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI17_2@toc@l(4)
-; PC64LE-NEXT:    lfs 2, .LCPI17_1@toc@l(5)
-; PC64LE-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI17_3@toc@l
-; PC64LE-NEXT:    xsaddsp 1, 0, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xsaddsp 2, 0, 2
-; PC64LE-NEXT:    xsaddsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI17_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
-; PC64LE9-NEXT:    xsaddsp 2, 0, 2
-; PC64LE9-NEXT:    lfs 3, .LCPI17_2@toc@l(3)
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
-; PC64LE9-NEXT:    xsaddsp 1, 0, 1
-; PC64LE9-NEXT:    xsaddsp 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI17_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fadd_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI17_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI17_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
-; PC64-NEXT:    lfs 4, .LCPI17_3@toc@l(3)
-; PC64-NEXT:    fadds 1, 3, 0
-; PC64-NEXT:    fadds 2, 3, 2
-; PC64-NEXT:    fadds 3, 3, 4
-; PC64-NEXT:    blr
-entry:
-  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
-           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
-                        float 0xFFFFFFFFE0000000>,
-           <3 x float> <float 2.0, float 1.0, float 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %add
-}
-
-define <3 x double> @constrained_vector_fadd_v3f64() {
-; PC64LE-LABEL: constrained_vector_fadd_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI18_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI18_1@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI18_2@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
-; PC64LE-NEXT:    lfd 3, .LCPI18_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvadddp 2, 1, 0
-; PC64LE-NEXT:    xxlxor 0, 0, 0
-; PC64LE-NEXT:    xsadddp 3, 3, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI18_1@toc@l
-; PC64LE9-NEXT:    xsadddp 3, 0, 1
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI18_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI18_2@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvadddp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fadd_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI18_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI18_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI18_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI18_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI18_3@toc@l(3)
-; PC64-NEXT:    fadd 1, 0, 1
-; PC64-NEXT:    fadd 2, 0, 2
-; PC64-NEXT:    fadd 3, 0, 3
-; PC64-NEXT:    blr
-entry:
-  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
-           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF>,
-           <3 x double> <double 2.0, double 1.0, double 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x double> %add
-}
-
-define <4 x double> @constrained_vector_fadd_v4f64() {
-; PC64LE-LABEL: constrained_vector_fadd_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI19_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI19_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI19_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI19_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 5, .LCPI19_2@toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvadddp 34, 1, 0
-; PC64LE-NEXT:    xvadddp 35, 1, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fadd_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI19_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI19_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI19_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI19_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI19_2@toc@l
-; PC64LE9-NEXT:    xvadddp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvadddp 35, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fadd_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI19_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI19_1@toc@ha
-; PC64-NEXT:    lfd 3, .LCPI19_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI19_2@toc@ha
-; PC64-NEXT:    lfd 1, .LCPI19_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI19_3@toc@ha
-; PC64-NEXT:    lfs 5, .LCPI19_3@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI19_4@toc@ha
-; PC64-NEXT:    fadd 2, 3, 0
-; PC64-NEXT:    lfs 6, .LCPI19_4@toc@l(3)
-; PC64-NEXT:    fadd 4, 3, 1
-; PC64-NEXT:    fadd 1, 3, 5
-; PC64-NEXT:    fadd 3, 3, 6
-; PC64-NEXT:    blr
-entry:
-  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
-           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <4 x double> <double 1.000000e+00, double 1.000000e-01,
-                         double 2.000000e+00, double 2.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %add
-}
-
-define <1 x float> @constrained_vector_fsub_v1f32() {
-; PC64LE-LABEL: constrained_vector_fsub_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI20_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI20_1@toc@l(4)
-; PC64LE-NEXT:    xssubsp 0, 1, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fsub_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI20_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI20_1@toc@l(3)
-; PC64LE9-NEXT:    xssubsp 0, 1, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fsub_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI20_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI20_1@toc@l(3)
-; PC64-NEXT:    fsubs 1, 1, 0
-; PC64-NEXT:    blr
-entry:
-  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 1.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %sub
-}
-
-define <2 x double> @constrained_vector_fsub_v2f64() {
-; PC64LE-LABEL: constrained_vector_fsub_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI21_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI21_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI21_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvsubdp 34, 1, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fsub_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI21_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI21_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI21_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvsubdp 34, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fsub_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI21_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI21_1@toc@ha
-; PC64-NEXT:    lfd 1, .LCPI21_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI21_2@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI21_2@toc@l(3)
-; PC64-NEXT:    fsub 2, 1, 0
-; PC64-NEXT:    fsub 1, 1, 3
-; PC64-NEXT:    blr
-entry:
-  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
-           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
-           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %sub
-}
-
-define <3 x float> @constrained_vector_fsub_v3f32() {
-; PC64LE-LABEL: constrained_vector_fsub_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI22_2@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI22_1@toc@ha
-; PC64LE-NEXT:    xxlxor 3, 3, 3
-; PC64LE-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI22_2@toc@l(4)
-; PC64LE-NEXT:    lfs 2, .LCPI22_1@toc@l(5)
-; PC64LE-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI22_3@toc@l
-; PC64LE-NEXT:    xssubsp 1, 0, 1
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xssubsp 2, 0, 2
-; PC64LE-NEXT:    xssubsp 0, 0, 3
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xscvdpspn 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
-; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI22_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
-; PC64LE9-NEXT:    xssubsp 2, 0, 2
-; PC64LE9-NEXT:    lfs 3, .LCPI22_2@toc@l(3)
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
-; PC64LE9-NEXT:    xssubsp 1, 0, 1
-; PC64LE9-NEXT:    xssubsp 0, 0, 3
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI22_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fsub_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
-; PC64-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI22_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI22_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
-; PC64-NEXT:    lfs 4, .LCPI22_3@toc@l(3)
-; PC64-NEXT:    fsubs 1, 3, 0
-; PC64-NEXT:    fsubs 2, 3, 2
-; PC64-NEXT:    fsubs 3, 3, 4
-; PC64-NEXT:    blr
-entry:
-  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
-           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
-                        float 0xFFFFFFFFE0000000>,
-           <3 x float> <float 2.0, float 1.0, float 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %sub
-}
-
-define <3 x double> @constrained_vector_fsub_v3f64() {
-; PC64LE-LABEL: constrained_vector_fsub_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI23_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI23_1@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI23_2@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
-; PC64LE-NEXT:    lfd 3, .LCPI23_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvsubdp 2, 1, 0
-; PC64LE-NEXT:    xxlxor 0, 0, 0
-; PC64LE-NEXT:    xssubdp 3, 3, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
-; PC64LE9-NEXT:    addi 3, 3, .LCPI23_1@toc@l
-; PC64LE9-NEXT:    xssubdp 3, 0, 1
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI23_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI23_2@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    xvsubdp 2, 1, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fsub_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI23_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI23_2@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI23_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI23_3@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI23_3@toc@l(3)
-; PC64-NEXT:    fsub 1, 0, 1
-; PC64-NEXT:    fsub 2, 0, 2
-; PC64-NEXT:    fsub 3, 0, 3
-; PC64-NEXT:    blr
-entry:
-  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
-           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
-                         double 0xFFEFFFFFFFFFFFFF>,
-           <3 x double> <double 2.0, double 1.0, double 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x double> %sub
-}
-
-define <4 x double> @constrained_vector_fsub_v4f64() {
-; PC64LE-LABEL: constrained_vector_fsub_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI24_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI24_2@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI24_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI24_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 5, .LCPI24_2@toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    lxvd2x 2, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xxswapd 2, 2
-; PC64LE-NEXT:    xvsubdp 34, 1, 0
-; PC64LE-NEXT:    xvsubdp 35, 1, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fsub_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI24_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI24_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI24_1@toc@l
-; PC64LE9-NEXT:    lxvx 1, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI24_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI24_2@toc@l
-; PC64LE9-NEXT:    xvsubdp 34, 1, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvsubdp 35, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fsub_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI24_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI24_1@toc@ha
-; PC64-NEXT:    lfd 3, .LCPI24_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI24_2@toc@ha
-; PC64-NEXT:    lfd 1, .LCPI24_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI24_3@toc@ha
-; PC64-NEXT:    lfs 5, .LCPI24_3@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI24_4@toc@ha
-; PC64-NEXT:    fsub 2, 3, 0
-; PC64-NEXT:    lfs 6, .LCPI24_4@toc@l(3)
-; PC64-NEXT:    fsub 4, 3, 1
-; PC64-NEXT:    fsub 1, 3, 5
-; PC64-NEXT:    fsub 3, 3, 6
-; PC64-NEXT:    blr
-entry:
-  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
-           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
-                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
-           <4 x double> <double 1.000000e+00, double 1.000000e-01,
-                         double 2.000000e+00, double 2.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %sub
-}
-
-define <1 x float> @constrained_vector_sqrt_v1f32() {
-; PC64LE-LABEL: constrained_vector_sqrt_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
-; PC64LE-NEXT:    xssqrtsp 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sqrt_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
-; PC64LE9-NEXT:    xssqrtsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sqrt_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI25_0@toc@l(3)
-; PC64-NEXT:    bl sqrtf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
-                              <1 x float> <float 42.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <1 x float> %sqrt
-}
-
-define <2 x double> @constrained_vector_sqrt_v2f64() {
-; PC64LE-LABEL: constrained_vector_sqrt_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI26_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvsqrtdp 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sqrt_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI26_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvsqrtdp 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sqrt_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI26_0@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI26_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI26_1@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
-                              <2 x double> <double 42.0, double 42.1>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %sqrt
-}
-
-define <3 x float> @constrained_vector_sqrt_v3f32() {
-; PC64LE-LABEL: constrained_vector_sqrt_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI27_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI27_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI27_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
-; PC64LE-NEXT:    xssqrtsp 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI27_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
-; PC64LE-NEXT:    xssqrtsp 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI27_3@toc@l
-; PC64LE-NEXT:    xssqrtsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI27_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI27_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI27_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
-; PC64LE9-NEXT:    xssqrtsp 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI27_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI27_3@toc@l
-; PC64LE9-NEXT:    xssqrtsp 1, 1
-; PC64LE9-NEXT:    xssqrtsp 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sqrt_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI27_0@toc@l(3)
-; PC64-NEXT:    bl sqrtf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI27_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI27_1@toc@l(3)
-; PC64-NEXT:    bl sqrtf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI27_2@toc@l(3)
-; PC64-NEXT:    bl sqrtf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %sqrt
-}
-
-define <3 x double> @constrained_vector_sqrt_v3f64() {
-; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI28_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI28_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xssqrtdp 3, 1
-; PC64LE-NEXT:    xvsqrtdp 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI28_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI28_1@toc@l
-; PC64LE9-NEXT:    xssqrtdp 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvsqrtdp 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sqrt_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI28_0@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI28_1@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI28_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI28_2@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %sqrt
-}
-
-define <4 x double> @constrained_vector_sqrt_v4f64() {
-; PC64LE-LABEL: constrained_vector_sqrt_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI29_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI29_0@toc@l
-; PC64LE-NEXT:    addi 4, 4, .LCPI29_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    lxvd2x 1, 0, 4
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvsqrtdp 34, 0
-; PC64LE-NEXT:    xvsqrtdp 35, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sqrt_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI29_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI29_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI29_1@toc@l
-; PC64LE9-NEXT:    xvsqrtdp 34, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvsqrtdp 35, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sqrt_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI29_0@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI29_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI29_1@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI29_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI29_2@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI29_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI29_3@toc@l(3)
-; PC64-NEXT:    bl sqrt
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
- entry:
-  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
-                              <4 x double> <double 42.0, double 42.1,
-                                            double 42.2, double 42.3>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %sqrt
-}
-
-define <1 x float> @constrained_vector_pow_v1f32() {
-; PC64LE-LABEL: constrained_vector_pow_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI30_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI30_1@toc@l(4)
-; PC64LE-NEXT:    bl powf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_pow_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI30_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI30_1@toc@l(3)
-; PC64LE9-NEXT:    bl powf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_pow_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI30_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI30_1@toc@l(3)
-; PC64-NEXT:    bl powf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
-                             <1 x float> <float 42.0>,
-                             <1 x float> <float 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %pow
-}
-
-define <2 x double> @constrained_vector_pow_v2f64() {
-; PC64LE-LABEL: constrained_vector_pow_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI31_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI31_1@toc@l(4)
-; PC64LE-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_pow_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI31_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI31_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_pow_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI31_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI31_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
-                             <2 x double> <double 42.1, double 42.2>,
-                             <2 x double> <double 3.0, double 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %pow
-}
-
-define <3 x float> @constrained_vector_pow_v3f32() {
-; PC64LE-LABEL: constrained_vector_pow_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f29, -24
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI32_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI32_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl powf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
-; PC64LE-NEXT:    bl powf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    fmr 29, 1
-; PC64LE-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
-; PC64LE-NEXT:    bl powf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
-; PC64LE-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI32_4@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_pow_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f29, -24
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI32_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl powf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl powf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
-; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl powf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_pow_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI32_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI32_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl powf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl powf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl powf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
-                             <3 x float> <float 42.0, float 43.0, float 44.0>,
-                             <3 x float> <float 3.0, float 3.0, float 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <3 x float> %pow
-}
-
-define <3 x double> @constrained_vector_pow_v3f64() {
-; PC64LE-LABEL: constrained_vector_pow_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 96
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    .cfi_offset v31, -32
-; PC64LE-NEXT:    addis 4, 2, .LCPI33_1@toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI33_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 96
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_pow_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    .cfi_offset v31, -32
-; PC64LE9-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 80
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_pow_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          <3 x double> <double 3.0, double 3.0, double 3.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %pow
-}
-
-define <4 x double> @constrained_vector_pow_v4f64() {
-; PC64LE-LABEL: constrained_vector_pow_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -96(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 96
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    .cfi_offset v31, -32
-; PC64LE-NEXT:    addis 4, 2, .LCPI34_1@toc@ha
-; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI34_1@toc@l(4)
-; PC64LE-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
-; PC64LE-NEXT:    bl pow
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 96
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_pow_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -80(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    .cfi_offset v31, -32
-; PC64LE9-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl pow
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 80
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_pow_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f28, -32
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 28, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
-; PC64-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
-; PC64-NEXT:    fmr 28, 1
-; PC64-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl pow
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    fmr 3, 28
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 28, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
-                             <4 x double> <double 42.1, double 42.2,
-                                           double 42.3, double 42.4>,
-                             <4 x double> <double 3.0, double 3.0,
-                                           double 3.0, double 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %pow
-}
-
-define <1 x float> @constrained_vector_powi_v1f32() {
-; PC64LE-LABEL: constrained_vector_powi_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
-; PC64LE-NEXT:    bl __powisf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_powi_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powisf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_powi_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
-; PC64-NEXT:    bl __powisf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
-                              <1 x float> <float 42.0>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <1 x float> %powi
-}
-
-define <2 x double> @constrained_vector_powi_v2f64() {
-; PC64LE-LABEL: constrained_vector_powi_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_powi_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_powi_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
-                              <2 x double> <double 42.1, double 42.2>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %powi
-}
-
-define <3 x float> @constrained_vector_powi_v3f32() {
-;
-;
-; PC64LE-LABEL: constrained_vector_powi_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
-; PC64LE-NEXT:    bl __powisf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
-; PC64LE-NEXT:    bl __powisf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
-; PC64LE-NEXT:    bl __powisf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI37_3@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_powi_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powisf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powisf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powisf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_powi_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    bl __powisf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powisf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powisf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %powi
-}
-
-define <3 x double> @constrained_vector_powi_v3f64() {
-; PC64LE-LABEL: constrained_vector_powi_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_powi_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_powi_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    lfs 1, .LCPI38_0@toc@l(3)
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI38_1@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          i32 3,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %powi
-}
-
-define <4 x double> @constrained_vector_powi_v4f64() {
-; PC64LE-LABEL: constrained_vector_powi_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    li 4, 3
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
-; PC64LE-NEXT:    bl __powidf2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_powi_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
-; PC64LE9-NEXT:    bl __powidf2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_powi_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
-; PC64-NEXT:    li 4, 3
-; PC64-NEXT:    bl __powidf2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
-                              <4 x double> <double 42.1, double 42.2,
-                                            double 42.3, double 42.4>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %powi
-}
-
-define <1 x float> @constrained_vector_sin_v1f32() {
-; PC64LE-LABEL: constrained_vector_sin_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
-; PC64LE-NEXT:    bl sinf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sin_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
-; PC64LE9-NEXT:    bl sinf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sin_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
-; PC64-NEXT:    bl sinf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %sin
-}
-
-define <2 x double> @constrained_vector_sin_v2f64() {
-; PC64LE-LABEL: constrained_vector_sin_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sin_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sin_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI41_0@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI41_1@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %sin
-}
-
-define <3 x float> @constrained_vector_sin_v3f32() {
-; PC64LE-LABEL: constrained_vector_sin_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
-; PC64LE-NEXT:    bl sinf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
-; PC64LE-NEXT:    bl sinf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
-; PC64LE-NEXT:    bl sinf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI42_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sin_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
-; PC64LE9-NEXT:    bl sinf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
-; PC64LE9-NEXT:    bl sinf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
-; PC64LE9-NEXT:    bl sinf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sin_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
-; PC64-NEXT:    bl sinf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
-; PC64-NEXT:    bl sinf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
-; PC64-NEXT:    bl sinf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %sin
-}
-
-define <3 x double> @constrained_vector_sin_v3f64() {
-; PC64LE-LABEL: constrained_vector_sin_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sin_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sin_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI43_0@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI43_1@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %sin
-}
-
-define <4 x double> @constrained_vector_sin_v4f64() {
-; PC64LE-LABEL: constrained_vector_sin_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
-; PC64LE-NEXT:    bl sin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_sin_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
-; PC64LE9-NEXT:    bl sin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_sin_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI44_0@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI44_3@toc@l(3)
-; PC64-NEXT:    bl sin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %sin
-}
-
-define <1 x float> @constrained_vector_cos_v1f32() {
-; PC64LE-LABEL: constrained_vector_cos_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
-; PC64LE-NEXT:    bl cosf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_cos_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
-; PC64LE9-NEXT:    bl cosf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_cos_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
-; PC64-NEXT:    bl cosf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %cos
-}
-
-define <2 x double> @constrained_vector_cos_v2f64() {
-; PC64LE-LABEL: constrained_vector_cos_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_cos_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_cos_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI46_0@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI46_1@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %cos
-}
-
-define <3 x float> @constrained_vector_cos_v3f32() {
-; PC64LE-LABEL: constrained_vector_cos_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
-; PC64LE-NEXT:    bl cosf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
-; PC64LE-NEXT:    bl cosf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
-; PC64LE-NEXT:    bl cosf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI47_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_cos_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
-; PC64LE9-NEXT:    bl cosf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
-; PC64LE9-NEXT:    bl cosf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
-; PC64LE9-NEXT:    bl cosf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_cos_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
-; PC64-NEXT:    bl cosf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
-; PC64-NEXT:    bl cosf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
-; PC64-NEXT:    bl cosf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %cos
-}
-
-define <3 x double> @constrained_vector_cos_v3f64() {
-; PC64LE-LABEL: constrained_vector_cos_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_cos_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_cos_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI48_0@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI48_1@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %cos
-}
-
-define <4 x double> @constrained_vector_cos_v4f64() {
-; PC64LE-LABEL: constrained_vector_cos_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
-; PC64LE-NEXT:    bl cos
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_cos_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
-; PC64LE9-NEXT:    bl cos
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_cos_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI49_0@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI49_3@toc@l(3)
-; PC64-NEXT:    bl cos
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %cos
-}
-
-define <1 x float> @constrained_vector_exp_v1f32() {
-; PC64LE-LABEL: constrained_vector_exp_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
-; PC64LE-NEXT:    bl expf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
-; PC64LE9-NEXT:    bl expf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
-; PC64-NEXT:    bl expf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %exp
-}
-
-define <2 x double> @constrained_vector_exp_v2f64() {
-; PC64LE-LABEL: constrained_vector_exp_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI51_0@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI51_1@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %exp
-}
-
-define <3 x float> @constrained_vector_exp_v3f32() {
-; PC64LE-LABEL: constrained_vector_exp_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
-; PC64LE-NEXT:    bl expf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
-; PC64LE-NEXT:    bl expf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
-; PC64LE-NEXT:    bl expf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI52_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
-; PC64LE9-NEXT:    bl expf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
-; PC64LE9-NEXT:    bl expf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
-; PC64LE9-NEXT:    bl expf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
-; PC64-NEXT:    bl expf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
-; PC64-NEXT:    bl expf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
-; PC64-NEXT:    bl expf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %exp
-}
-
-define <3 x double> @constrained_vector_exp_v3f64() {
-; PC64LE-LABEL: constrained_vector_exp_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI53_0@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI53_1@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %exp
-}
-
-define <4 x double> @constrained_vector_exp_v4f64() {
-; PC64LE-LABEL: constrained_vector_exp_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
-; PC64LE-NEXT:    bl exp
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
-; PC64LE9-NEXT:    bl exp
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI54_0@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI54_3@toc@l(3)
-; PC64-NEXT:    bl exp
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %exp
-}
-
-define <1 x float> @constrained_vector_exp2_v1f32() {
-; PC64LE-LABEL: constrained_vector_exp2_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
-; PC64LE-NEXT:    bl exp2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp2_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
-; PC64LE9-NEXT:    bl exp2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp2_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
-; PC64-NEXT:    bl exp2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %exp2
-}
-
-define <2 x double> @constrained_vector_exp2_v2f64() {
-; PC64LE-LABEL: constrained_vector_exp2_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp2_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp2_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
-                              <2 x double> <double 42.1, double 42.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %exp2
-}
-
-define <3 x float> @constrained_vector_exp2_v3f32() {
-; PC64LE-LABEL: constrained_vector_exp2_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
-; PC64LE-NEXT:    bl exp2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
-; PC64LE-NEXT:    bl exp2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
-; PC64LE-NEXT:    bl exp2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI57_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp2_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
-; PC64LE9-NEXT:    bl exp2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
-; PC64LE9-NEXT:    bl exp2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp2_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
-; PC64-NEXT:    bl exp2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
-; PC64-NEXT:    bl exp2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
-; PC64-NEXT:    bl exp2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %exp2
-}
-
-define <3 x double> @constrained_vector_exp2_v3f64() {
-; PC64LE-LABEL: constrained_vector_exp2_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp2_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp2_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI58_0@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI58_1@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %exp2
-}
-
-define <4 x double> @constrained_vector_exp2_v4f64() {
-; PC64LE-LABEL: constrained_vector_exp2_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
-; PC64LE-NEXT:    bl exp2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_exp2_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
-; PC64LE9-NEXT:    bl exp2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_exp2_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
-; PC64-NEXT:    bl exp2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
-                              <4 x double> <double 42.1, double 42.2,
-                                            double 42.3, double 42.4>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %exp2
-}
-
-define <1 x float> @constrained_vector_log_v1f32() {
-; PC64LE-LABEL: constrained_vector_log_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
-; PC64LE-NEXT:    bl logf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
-; PC64LE9-NEXT:    bl logf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
-; PC64-NEXT:    bl logf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log
-}
-
-define <2 x double> @constrained_vector_log_v2f64() {
-; PC64LE-LABEL: constrained_vector_log_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI61_0@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI61_1@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %log
-}
-
-define <3 x float> @constrained_vector_log_v3f32() {
-; PC64LE-LABEL: constrained_vector_log_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
-; PC64LE-NEXT:    bl logf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
-; PC64LE-NEXT:    bl logf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
-; PC64LE-NEXT:    bl logf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI62_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
-; PC64LE9-NEXT:    bl logf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
-; PC64LE9-NEXT:    bl logf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
-; PC64LE9-NEXT:    bl logf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
-; PC64-NEXT:    bl logf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
-; PC64-NEXT:    bl logf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
-; PC64-NEXT:    bl logf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log
-}
-
-define <3 x double> @constrained_vector_log_v3f64() {
-; PC64LE-LABEL: constrained_vector_log_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI63_0@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI63_1@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %log
-}
-
-define <4 x double> @constrained_vector_log_v4f64() {
-; PC64LE-LABEL: constrained_vector_log_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
-; PC64LE-NEXT:    bl log
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
-; PC64LE9-NEXT:    bl log
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI64_0@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI64_3@toc@l(3)
-; PC64-NEXT:    bl log
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %log
-}
-
-define <1 x float> @constrained_vector_log10_v1f32() {
-; PC64LE-LABEL: constrained_vector_log10_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
-; PC64LE-NEXT:    bl log10f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log10_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
-; PC64LE9-NEXT:    bl log10f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log10_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
-; PC64-NEXT:    bl log10f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log10
-}
-
-define <2 x double> @constrained_vector_log10_v2f64() {
-; PC64LE-LABEL: constrained_vector_log10_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log10_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log10_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI66_0@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI66_1@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
-                               <2 x double> <double 42.0, double 42.1>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <2 x double> %log10
-}
-
-define <3 x float> @constrained_vector_log10_v3f32() {
-; PC64LE-LABEL: constrained_vector_log10_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
-; PC64LE-NEXT:    bl log10f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
-; PC64LE-NEXT:    bl log10f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
-; PC64LE-NEXT:    bl log10f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI67_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log10_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
-; PC64LE9-NEXT:    bl log10f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
-; PC64LE9-NEXT:    bl log10f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
-; PC64LE9-NEXT:    bl log10f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log10_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
-; PC64-NEXT:    bl log10f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
-; PC64-NEXT:    bl log10f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
-; PC64-NEXT:    bl log10f
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log10
-}
-
-define <3 x double> @constrained_vector_log10_v3f64() {
-; PC64LE-LABEL: constrained_vector_log10_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log10_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log10_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI68_0@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI68_1@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %log10
-}
-
-define <4 x double> @constrained_vector_log10_v4f64() {
-; PC64LE-LABEL: constrained_vector_log10_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
-; PC64LE-NEXT:    bl log10
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log10_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
-; PC64LE9-NEXT:    bl log10
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log10_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI69_0@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI69_3@toc@l(3)
-; PC64-NEXT:    bl log10
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
-                               <4 x double> <double 42.0, double 42.1,
-                                             double 42.2, double 42.3>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <4 x double> %log10
-}
-
-define <1 x float> @constrained_vector_log2_v1f32() {
-; PC64LE-LABEL: constrained_vector_log2_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
-; PC64LE-NEXT:    bl log2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log2_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
-; PC64LE9-NEXT:    bl log2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log2_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
-; PC64-NEXT:    bl log2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log2
-}
-
-define <2 x double> @constrained_vector_log2_v2f64() {
-; PC64LE-LABEL: constrained_vector_log2_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log2_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log2_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI71_0@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI71_1@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
-                              <2 x double> <double 42.0, double 42.1>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %log2
-}
-
-define <3 x float> @constrained_vector_log2_v3f32() {
-; PC64LE-LABEL: constrained_vector_log2_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
-; PC64LE-NEXT:    bl log2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
-; PC64LE-NEXT:    bl log2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
-; PC64LE-NEXT:    bl log2f
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI72_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log2_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
-; PC64LE9-NEXT:    bl log2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
-; PC64LE9-NEXT:    bl log2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
-; PC64LE9-NEXT:    bl log2f
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log2_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
-; PC64-NEXT:    bl log2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
-; PC64-NEXT:    bl log2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
-; PC64-NEXT:    bl log2f
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log2
-}
-
-define <3 x double> @constrained_vector_log2_v3f64() {
-; PC64LE-LABEL: constrained_vector_log2_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log2_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log2_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI73_0@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI73_1@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %log2
-}
-
-define <4 x double> @constrained_vector_log2_v4f64() {
-; PC64LE-LABEL: constrained_vector_log2_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
-; PC64LE-NEXT:    bl log2
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 3, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_log2_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
-; PC64LE9-NEXT:    bl log2
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 3, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_log2_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI74_0@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI74_3@toc@l(3)
-; PC64-NEXT:    bl log2
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
-                              <4 x double> <double 42.0, double 42.1,
-                                            double 42.2, double 42.3>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %log2
-}
-
-define <1 x float> @constrained_vector_rint_v1f32() {
-; PC64LE-LABEL: constrained_vector_rint_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
-; PC64LE-NEXT:    bl rintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_rint_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
-; PC64LE9-NEXT:    bl rintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_rint_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
-; PC64-NEXT:    bl rintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %rint
-}
-
-define <2 x double> @constrained_vector_rint_v2f64() {
-; PC64LE-LABEL: constrained_vector_rint_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_rint_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_rint_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
-                        <2 x double> <double 42.1, double 42.0>,
-                        metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
-  ret <2 x double> %rint
-}
-
-define <3 x float> @constrained_vector_rint_v3f32() {
-; PC64LE-LABEL: constrained_vector_rint_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
-; PC64LE-NEXT:    bl rintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
-; PC64LE-NEXT:    bl rintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
-; PC64LE-NEXT:    bl rintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI77_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_rint_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
-; PC64LE9-NEXT:    bl rintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
-; PC64LE9-NEXT:    bl rintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
-; PC64LE9-NEXT:    bl rintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI77_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_rint_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
-; PC64-NEXT:    bl rintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
-; PC64-NEXT:    bl rintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
-; PC64-NEXT:    bl rintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
- entry:
-  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %rint
-}
-
-define <3 x double> @constrained_vector_rint_v3f64() {
-; PC64LE-LABEL: constrained_vector_rint_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 0, 1
-; PC64LE-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_rint_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 0, 1
-; PC64LE9-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_rint_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI78_0@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI78_1@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %rint
-}
-
-define <4 x double> @constrained_vector_rint_v4f64() {
-; PC64LE-LABEL: constrained_vector_rint_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
-; PC64LE-NEXT:    bl rint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_rint_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
-; PC64LE9-NEXT:    bl rint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_rint_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
-; PC64-NEXT:    bl rint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
-                        <4 x double> <double 42.1, double 42.2,
-                                      double 42.3, double 42.4>,
-                        metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
-  ret <4 x double> %rint
-}
-
-define <1 x float> @constrained_vector_nearbyint_v1f32() {
-; PC64LE-LABEL: constrained_vector_nearbyint_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
-; PC64LE-NEXT:    bl nearbyintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
-; PC64LE9-NEXT:    bl nearbyintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_nearbyint_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
-; PC64-NEXT:    bl nearbyintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
-                               <1 x float> <float 42.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %nearby
-}
-
-define <2 x double> @constrained_vector_nearbyint_v2f64() {
-; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI81_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpic 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI81_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_nearbyint_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI81_0@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI81_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI81_1@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
-                                <2 x double> <double 42.1, double 42.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %nearby
-}
-
-define <3 x float> @constrained_vector_nearbyint_v3f32() {
-; PC64LE-LABEL: constrained_vector_nearbyint_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -48(1)
-; PC64LE-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
-; PC64LE-NEXT:    bl nearbyintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
-; PC64LE-NEXT:    fmr 31, 1
-; PC64LE-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
-; PC64LE-NEXT:    bl nearbyintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
-; PC64LE-NEXT:    bl nearbyintf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI82_3@toc@l
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 31
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    addi 1, 1, 48
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
-; PC64LE9-NEXT:    bl nearbyintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
-; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
-; PC64LE9-NEXT:    bl nearbyintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
-; PC64LE9-NEXT:    bl nearbyintf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3@toc@l
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_nearbyint_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
-; PC64-NEXT:    bl nearbyintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
-; PC64-NEXT:    bl nearbyintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
-; PC64-NEXT:    bl nearbyintf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %nearby
-}
-
-define <3 x double> @constrained_vector_nearby_v3f64() {
-; PC64LE-LABEL: constrained_vector_nearby_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
-; PC64LE-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
-; PC64LE-NEXT:    bl nearbyint
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI83_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpic 2, 0
-; PC64LE-NEXT:    xxswapd 0, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    fmr 1, 0
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
-; PC64LE9-NEXT:    bl nearbyint
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI83_1@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 2, 0
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_nearby_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI83_0@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI83_1@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI83_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI83_2@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %nearby
-}
-
-define <4 x double> @constrained_vector_nearbyint_v4f64() {
-; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI84_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI84_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 4, .LCPI84_1@toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvrdpic 34, 0
-; PC64LE-NEXT:    xvrdpic 35, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_1@toc@l
-; PC64LE9-NEXT:    xvrdpic 34, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 35, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_nearbyint_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI84_0@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI84_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI84_1@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI84_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfd 1, .LCPI84_2@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI84_3@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfd 1, .LCPI84_3@toc@l(3)
-; PC64-NEXT:    bl nearbyint
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
-                                <4 x double> <double 42.1, double 42.2,
-                                              double 42.3, double 42.4>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %nearby
-}
-
-define <1 x float> @constrained_vector_maxnum_v1f32() {
-; PC64LE-LABEL: constrained_vector_maxnum_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI85_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI85_1@toc@l(4)
-; PC64LE-NEXT:    bl fmaxf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_maxnum_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI85_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI85_1@toc@l(3)
-; PC64LE9-NEXT:    bl fmaxf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_maxnum_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI85_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI85_1@toc@l(3)
-; PC64-NEXT:    bl fmaxf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
-                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %max
-}
-
-define <2 x double> @constrained_vector_maxnum_v2f64() {
-; PC64LE-LABEL: constrained_vector_maxnum_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI86_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI86_1@toc@l(4)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI86_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI86_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_maxnum_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI86_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI86_1@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI86_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI86_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_maxnum_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI86_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI86_1@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI86_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI86_3@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
-                                <2 x double> <double 43.0, double 42.0>,
-                                <2 x double> <double 41.0, double 40.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %max
-}
-
-define <3 x float> @constrained_vector_maxnum_v3f32() {
-; PC64LE-LABEL: constrained_vector_maxnum_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f29, -24
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI87_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI87_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fmaxf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI87_3@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI87_3@toc@l(4)
-; PC64LE-NEXT:    bl fmaxf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    fmr 29, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
-; PC64LE-NEXT:    fmr 1, 31
-; PC64LE-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
-; PC64LE-NEXT:    bl fmaxf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
-; PC64LE-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI87_5@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_maxnum_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f29, -24
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI87_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fmaxf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI87_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmaxf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
-; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    fmr 1, 31
-; PC64LE9-NEXT:    bl fmaxf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_maxnum_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 31, .LCPI87_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI87_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI87_1@toc@l(3)
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    bl fmaxf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI87_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI87_3@toc@l(3)
-; PC64-NEXT:    bl fmaxf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI87_4@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fmaxf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
-                              <3 x float> <float 43.0, float 44.0, float 45.0>,
-                              <3 x float> <float 41.0, float 42.0, float 43.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %max
-}
-
-define <3 x double> @constrained_vector_max_v3f64() {
-; PC64LE-LABEL: constrained_vector_max_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    addis 4, 2, .LCPI88_1@toc@ha
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI88_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI88_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI88_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    addis 4, 2, .LCPI88_5@toc@ha
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI88_5@toc@l(4)
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_max_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI88_1@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI88_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI88_5@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI88_5@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_max_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI88_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI88_1@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI88_3@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI88_5@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI88_5@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
-                          <3 x double> <double 43.0, double 44.0, double 45.0>,
-                          <3 x double> <double 40.0, double 41.0, double 42.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %max
-}
-
-define <4 x double> @constrained_vector_maxnum_v4f64() {
-; PC64LE-LABEL: constrained_vector_maxnum_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    addis 4, 2, .LCPI89_1@toc@ha
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI89_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI89_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI89_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    addis 4, 2, .LCPI89_5@toc@ha
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI89_5@toc@l(4)
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI89_7@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI89_7@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
-; PC64LE-NEXT:    bl fmax
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_maxnum_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI89_1@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI89_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_5@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI89_5@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI89_7@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI89_7@toc@l(3)
-; PC64LE9-NEXT:    bl fmax
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_maxnum_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI89_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI89_1@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI89_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI89_3@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI89_5@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI89_5@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI89_7@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI89_7@toc@l(3)
-; PC64-NEXT:    bl fmax
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
-                                <4 x double> <double 44.0, double 45.0,
-                                              double 46.0, double 47.0>,
-                                <4 x double> <double 40.0, double 41.0,
-                                              double 42.0, double 43.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %max
-}
-
-define <1 x float> @constrained_vector_minnum_v1f32() {
-; PC64LE-LABEL: constrained_vector_minnum_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -32(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI90_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI90_1@toc@l(4)
-; PC64LE-NEXT:    bl fminf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    addi 1, 1, 32
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_minnum_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -32(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI90_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI90_1@toc@l(3)
-; PC64LE9-NEXT:    bl fminf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    addi 1, 1, 32
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_minnum_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI90_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI90_1@toc@l(3)
-; PC64-NEXT:    bl fminf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
- entry:
-  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
-                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %min
-}
-
-define <2 x double> @constrained_vector_minnum_v2f64() {
-; PC64LE-LABEL: constrained_vector_minnum_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI91_1@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI91_1@toc@l(4)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI91_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI91_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_minnum_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -48(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI91_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI91_1@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI91_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI91_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 48
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_minnum_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI91_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI91_1@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI91_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI91_3@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
-                                <2 x double> <double 43.0, double 42.0>,
-                                <2 x double> <double 41.0, double 40.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %min
-}
-
-define <3 x float> @constrained_vector_minnum_v3f32() {
-; PC64LE-LABEL: constrained_vector_minnum_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset f29, -24
-; PC64LE-NEXT:    .cfi_offset f30, -16
-; PC64LE-NEXT:    .cfi_offset f31, -8
-; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -64(1)
-; PC64LE-NEXT:    addis 4, 2, .LCPI92_1@toc@ha
-; PC64LE-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
-; PC64LE-NEXT:    lfs 31, .LCPI92_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
-; PC64LE-NEXT:    fmr 2, 31
-; PC64LE-NEXT:    bl fminf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI92_3@toc@ha
-; PC64LE-NEXT:    fmr 30, 1
-; PC64LE-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI92_3@toc@l(4)
-; PC64LE-NEXT:    bl fminf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    fmr 29, 1
-; PC64LE-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
-; PC64LE-NEXT:    fmr 1, 31
-; PC64LE-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
-; PC64LE-NEXT:    bl fminf
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    xscvdpspn 0, 29
-; PC64LE-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI92_5@toc@l
-; PC64LE-NEXT:    lvx 4, 0, 3
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 30
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 2, 3
-; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 3, 2, 4
-; PC64LE-NEXT:    addi 1, 1, 64
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_minnum_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset f29, -24
-; PC64LE9-NEXT:    .cfi_offset f30, -16
-; PC64LE9-NEXT:    .cfi_offset f31, -8
-; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_1@toc@ha
-; PC64LE9-NEXT:    lfs 31, .LCPI92_1@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    bl fminf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
-; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI92_3@toc@l(3)
-; PC64LE9-NEXT:    bl fminf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
-; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    fmr 1, 31
-; PC64LE9-NEXT:    bl fminf
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 29
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 3, 2, 4
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_minnum_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 31, .LCPI92_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI92_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI92_1@toc@l(3)
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    bl fminf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI92_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI92_3@toc@l(3)
-; PC64-NEXT:    bl fminf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI92_4@toc@l(3)
-; PC64-NEXT:    fmr 2, 31
-; PC64-NEXT:    bl fminf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 30
-; PC64-NEXT:    fmr 2, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
-                              <3 x float> <float 43.0, float 44.0, float 45.0>,
-                              <3 x float> <float 41.0, float 42.0, float 43.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %min
-}
-
-define <3 x double> @constrained_vector_min_v3f64() {
-; PC64LE-LABEL: constrained_vector_min_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    addis 4, 2, .LCPI93_1@toc@ha
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI93_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI93_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI93_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    addis 4, 2, .LCPI93_5@toc@ha
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI93_5@toc@l(4)
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    fmr 3, 1
-; PC64LE-NEXT:    xxlor 1, 63, 63
-; PC64LE-NEXT:    xxlor 2, 63, 63
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_min_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI93_1@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI93_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI93_5@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI93_5@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    fmr 3, 1
-; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
-; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_min_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI93_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI93_1@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI93_3@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI93_5@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI93_5@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
- %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
-                          <3 x double> <double 43.0, double 44.0, double 45.0>,
-                          <3 x double> <double 40.0, double 41.0, double 42.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %min
-}
-
-define <4 x double> @constrained_vector_minnum_v4f64() {
-; PC64LE-LABEL: constrained_vector_minnum_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    mflr 0
-; PC64LE-NEXT:    std 0, 16(1)
-; PC64LE-NEXT:    stdu 1, -80(1)
-; PC64LE-NEXT:    .cfi_def_cfa_offset 80
-; PC64LE-NEXT:    .cfi_offset lr, 16
-; PC64LE-NEXT:    .cfi_offset v31, -16
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    addis 4, 2, .LCPI94_1@toc@ha
-; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI94_1@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI94_3@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI94_3@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    addis 4, 2, .LCPI94_5@toc@ha
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI94_5@toc@l(4)
-; PC64LE-NEXT:    xxmrghd 63, 1, 0
-; PC64LE-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    addis 4, 2, .LCPI94_7@toc@ha
-; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
-; PC64LE-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
-; PC64LE-NEXT:    lfs 2, .LCPI94_7@toc@l(4)
-; PC64LE-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
-; PC64LE-NEXT:    bl fmin
-; PC64LE-NEXT:    nop
-; PC64LE-NEXT:    li 3, 48
-; PC64LE-NEXT:    vmr 2, 31
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    li 3, 64
-; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
-; PC64LE-NEXT:    xxmrghd 35, 1, 0
-; PC64LE-NEXT:    addi 1, 1, 80
-; PC64LE-NEXT:    ld 0, 16(1)
-; PC64LE-NEXT:    mtlr 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_minnum_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    mflr 0
-; PC64LE9-NEXT:    std 0, 16(1)
-; PC64LE9-NEXT:    stdu 1, -64(1)
-; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
-; PC64LE9-NEXT:    .cfi_offset lr, 16
-; PC64LE9-NEXT:    .cfi_offset v31, -16
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI94_1@toc@l(3)
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_3@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI94_3@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 63, 1, 0
-; PC64LE9-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_5@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI94_5@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI94_7@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI94_7@toc@l(3)
-; PC64LE9-NEXT:    bl fmin
-; PC64LE9-NEXT:    nop
-; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    vmr 2, 31
-; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    addi 1, 1, 64
-; PC64LE9-NEXT:    ld 0, 16(1)
-; PC64LE9-NEXT:    mtlr 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_minnum_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -144(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 144
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f29, -24
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
-; PC64-NEXT:    stfd 29, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 136(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI94_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI94_1@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI94_3@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI94_3@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI94_5@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI94_5@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
-; PC64-NEXT:    fmr 29, 1
-; PC64-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI94_7@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI94_7@toc@l(3)
-; PC64-NEXT:    bl fmin
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 4, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    fmr 3, 29
-; PC64-NEXT:    lfd 31, 136(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 144
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
-                                <4 x double> <double 44.0, double 45.0,
-                                              double 46.0, double 47.0>,
-                                <4 x double> <double 40.0, double 41.0,
-                                              double 42.0, double 43.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %min
-}
-
-define <1 x float> @constrained_vector_fptrunc_v1f64() {
-; PC64LE-LABEL: constrained_vector_fptrunc_v1f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
-; PC64LE-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
-; PC64LE-NEXT:    frsp 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fptrunc_v1f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
-; PC64-NEXT:    frsp 1, 0
-; PC64-NEXT:    blr
-entry:
-  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
-                                <1 x double><double 42.1>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <1 x float> %result
-}
-
-define <2 x float> @constrained_vector_fptrunc_v2f64() {
-; PC64LE-LABEL: constrained_vector_fptrunc_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI96_1@toc@ha
-; PC64LE-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
-; PC64LE-NEXT:    lfd 1, .LCPI96_1@toc@l(4)
-; PC64LE-NEXT:    frsp 0, 0
-; PC64LE-NEXT:    frsp 1, 1
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI96_1@toc@ha
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    lfd 0, .LCPI96_1@toc@l(3)
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fptrunc_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI96_1@toc@ha
-; PC64-NEXT:    lfd 2, .LCPI96_1@toc@l(3)
-; PC64-NEXT:    frsp 1, 0
-; PC64-NEXT:    frsp 2, 2
-; PC64-NEXT:    blr
-entry:
-  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
-                                <2 x double><double 42.1, double 42.2>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x float> %result
-}
-
-define <3 x float> @constrained_vector_fptrunc_v3f64() {
-; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI97_1@toc@ha
-; PC64LE-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
-; PC64LE-NEXT:    lfd 1, .LCPI97_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
-; PC64LE-NEXT:    frsp 0, 0
-; PC64LE-NEXT:    lfd 2, .LCPI97_3@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
-; PC64LE-NEXT:    frsp 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI97_2@toc@l
-; PC64LE-NEXT:    frsp 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_1@toc@ha
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    lfd 0, .LCPI97_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI97_2@toc@l
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI97_3@toc@l(3)
-; PC64LE9-NEXT:    frsp 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fptrunc_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI97_1@toc@ha
-; PC64-NEXT:    lfd 2, .LCPI97_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
-; PC64-NEXT:    lfd 3, .LCPI97_2@toc@l(3)
-; PC64-NEXT:    frsp 1, 0
-; PC64-NEXT:    frsp 2, 2
-; PC64-NEXT:    frsp 3, 3
-; PC64-NEXT:    blr
-entry:
-  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
-                                <3 x double><double 42.1, double 42.2,
-                                             double 42.3>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <3 x float> %result
-}
-
-define <4 x float> @constrained_vector_fptrunc_v4f64() {
-; PC64LE-LABEL: constrained_vector_fptrunc_v4f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI98_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI98_2@toc@ha
-; PC64LE-NEXT:    addis 6, 2, .LCPI98_3@toc@ha
-; PC64LE-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
-; PC64LE-NEXT:    lfd 1, .LCPI98_1@toc@l(4)
-; PC64LE-NEXT:    lfd 2, .LCPI98_2@toc@l(5)
-; PC64LE-NEXT:    lfd 3, .LCPI98_3@toc@l(6)
-; PC64LE-NEXT:    xxmrghd 0, 1, 0
-; PC64LE-NEXT:    xxmrghd 1, 3, 2
-; PC64LE-NEXT:    xvcvdpsp 34, 0
-; PC64LE-NEXT:    xvcvdpsp 35, 1
-; PC64LE-NEXT:    vmrgew 2, 3, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fptrunc_v4f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI98_1@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI98_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI98_2@toc@ha
-; PC64LE9-NEXT:    xxmrghd 0, 1, 0
-; PC64LE9-NEXT:    xvcvdpsp 34, 0
-; PC64LE9-NEXT:    lfd 0, .LCPI98_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI98_3@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI98_3@toc@l(3)
-; PC64LE9-NEXT:    xxmrghd 0, 1, 0
-; PC64LE9-NEXT:    xvcvdpsp 35, 0
-; PC64LE9-NEXT:    vmrgew 2, 3, 2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fptrunc_v4f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
-; PC64-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI98_1@toc@ha
-; PC64-NEXT:    lfd 2, .LCPI98_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI98_2@toc@ha
-; PC64-NEXT:    lfd 3, .LCPI98_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI98_3@toc@ha
-; PC64-NEXT:    frsp 1, 0
-; PC64-NEXT:    lfd 4, .LCPI98_3@toc@l(3)
-; PC64-NEXT:    frsp 2, 2
-; PC64-NEXT:    frsp 3, 3
-; PC64-NEXT:    frsp 4, 4
-; PC64-NEXT:    blr
-entry:
-  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
-                                <4 x double><double 42.1, double 42.2,
-                                             double 42.3, double 42.4>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x float> %result
-}
-
-define <1 x double> @constrained_vector_fpext_v1f32() {
-; PC64LE-LABEL: constrained_vector_fpext_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
-; PC64LE-NEXT:    xxspltd 34, 0, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fpext_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
-; PC64LE9-NEXT:    xxspltd 34, 0, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fpext_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI99_0@toc@l(3)
-; PC64-NEXT:    blr
-entry:
-  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
-                                <1 x float><float 42.0>,
-                                metadata !"fpexcept.strict")
-  ret <1 x double> %result
-}
-
-define <2 x double> @constrained_vector_fpext_v2f32() {
-; PC64LE-LABEL: constrained_vector_fpext_v2f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI100_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI100_1@toc@l(4)
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fpext_v2f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI100_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI100_1@toc@l(3)
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fpext_v2f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI100_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI100_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI100_1@toc@l(3)
-; PC64-NEXT:    blr
-entry:
-  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
-                                <2 x float><float 42.0, float 43.0>,
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %result
-}
-
-define <3 x double> @constrained_vector_fpext_v3f32() {
-; PC64LE-LABEL: constrained_vector_fpext_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI101_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI101_2@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
-; PC64LE-NEXT:    lfs 2, .LCPI101_1@toc@l(4)
-; PC64LE-NEXT:    lfs 3, .LCPI101_2@toc@l(5)
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fpext_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI101_1@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI101_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI101_2@toc@ha
-; PC64LE9-NEXT:    lfs 3, .LCPI101_2@toc@l(3)
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fpext_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI101_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI101_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI101_2@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI101_2@toc@l(3)
-; PC64-NEXT:    blr
-entry:
-  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
-                                <3 x float><float 42.0, float 43.0,
-                                            float 44.0>,
-                                metadata !"fpexcept.strict")
-  ret <3 x double> %result
-}
-
-define <4 x double> @constrained_vector_fpext_v4f32() {
-; PC64LE-LABEL: constrained_vector_fpext_v4f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI102_1@toc@ha
-; PC64LE-NEXT:    addis 5, 2, .LCPI102_2@toc@ha
-; PC64LE-NEXT:    addis 6, 2, .LCPI102_3@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI102_1@toc@l(4)
-; PC64LE-NEXT:    lfs 2, .LCPI102_2@toc@l(5)
-; PC64LE-NEXT:    lfs 3, .LCPI102_3@toc@l(6)
-; PC64LE-NEXT:    xxmrghd 34, 1, 0
-; PC64LE-NEXT:    xxmrghd 35, 3, 2
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_fpext_v4f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI102_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI102_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI102_2@toc@ha
-; PC64LE9-NEXT:    xxmrghd 34, 1, 0
-; PC64LE9-NEXT:    lfs 0, .LCPI102_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI102_3@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI102_3@toc@l(3)
-; PC64LE9-NEXT:    xxmrghd 35, 1, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_fpext_v4f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI102_0@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI102_1@toc@ha
-; PC64-NEXT:    lfs 2, .LCPI102_1@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI102_2@toc@ha
-; PC64-NEXT:    lfs 3, .LCPI102_2@toc@l(3)
-; PC64-NEXT:    addis 3, 2, .LCPI102_3@toc@ha
-; PC64-NEXT:    lfs 4, .LCPI102_3@toc@l(3)
-; PC64-NEXT:    blr
-entry:
-  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
-                                <4 x float><float 42.0, float 43.0,
-                                            float 44.0, float 45.0>,
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %result
-}
-
-define <1 x float> @constrained_vector_ceil_v1f32() {
-; PC64LE-LABEL: constrained_vector_ceil_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
-; PC64LE-NEXT:    frip 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
-; PC64LE9-NEXT:    frip 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_ceil_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI103_0@toc@l(3)
-; PC64-NEXT:    bl ceilf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %ceil
-}
-
-define <2 x double> @constrained_vector_ceil_v2f64() {
-; PC64LE-LABEL: constrained_vector_ceil_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI104_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpip 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI104_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_ceil_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI104_0@toc@l(3)
-; PC64-NEXT:    bl ceil
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI104_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI104_1@toc@l(3)
-; PC64-NEXT:    bl ceil
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %ceil
-}
-
-define <3 x float> @constrained_vector_ceil_v3f32() {
-; PC64LE-LABEL: constrained_vector_ceil_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI105_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI105_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI105_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE-NEXT:    frip 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI105_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
-; PC64LE-NEXT:    frip 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI105_3@toc@l
-; PC64LE-NEXT:    frip 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI105_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
-; PC64LE9-NEXT:    frip 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI105_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI105_3@toc@l
-; PC64LE9-NEXT:    frip 1, 1
-; PC64LE9-NEXT:    frip 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_ceil_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI105_0@toc@l(3)
-; PC64-NEXT:    bl ceilf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
-; PC64-NEXT:    bl ceilf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI105_2@toc@l(3)
-; PC64-NEXT:    bl ceilf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %ceil
-}
-
-define <3 x double> @constrained_vector_ceil_v3f64() {
-; PC64LE-LABEL: constrained_vector_ceil_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI106_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI106_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpip 3, 1
-; PC64LE-NEXT:    xvrdpip 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI106_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI106_1@toc@l
-; PC64LE9-NEXT:    xsrdpip 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_ceil_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI106_0@toc@l(3)
-; PC64-NEXT:    bl ceil
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI106_1@toc@l(3)
-; PC64-NEXT:    bl ceil
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI106_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI106_2@toc@l(3)
-; PC64-NEXT:    bl ceil
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %ceil
-}
-
-define <1 x float> @constrained_vector_floor_v1f32() {
-; PC64LE-LABEL: constrained_vector_floor_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
-; PC64LE-NEXT:    frim 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_floor_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
-; PC64LE9-NEXT:    frim 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_floor_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI107_0@toc@l(3)
-; PC64-NEXT:    bl floorf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %floor
-}
-
-
-define <2 x double> @constrained_vector_floor_v2f64() {
-; PC64LE-LABEL: constrained_vector_floor_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI108_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpim 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_floor_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI108_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_floor_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI108_0@toc@l(3)
-; PC64-NEXT:    bl floor
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI108_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI108_1@toc@l(3)
-; PC64-NEXT:    bl floor
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %floor
-}
-
-define <3 x float> @constrained_vector_floor_v3f32() {
-; PC64LE-LABEL: constrained_vector_floor_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI109_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI109_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI109_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
-; PC64LE-NEXT:    frim 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI109_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
-; PC64LE-NEXT:    frim 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI109_3@toc@l
-; PC64LE-NEXT:    frim 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_floor_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI109_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
-; PC64LE9-NEXT:    frim 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI109_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI109_3@toc@l
-; PC64LE9-NEXT:    frim 1, 1
-; PC64LE9-NEXT:    frim 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_floor_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI109_0@toc@l(3)
-; PC64-NEXT:    bl floorf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
-; PC64-NEXT:    bl floorf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI109_2@toc@l(3)
-; PC64-NEXT:    bl floorf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %floor
-}
-
-define <3 x double> @constrained_vector_floor_v3f64() {
-; PC64LE-LABEL: constrained_vector_floor_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI110_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI110_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpim 3, 1
-; PC64LE-NEXT:    xvrdpim 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_floor_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI110_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI110_1@toc@l
-; PC64LE9-NEXT:    xsrdpim 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_floor_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI110_0@toc@l(3)
-; PC64-NEXT:    bl floor
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI110_1@toc@l(3)
-; PC64-NEXT:    bl floor
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI110_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI110_2@toc@l(3)
-; PC64-NEXT:    bl floor
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %floor
-}
-
-define <1 x float> @constrained_vector_round_v1f32() {
-; PC64LE-LABEL: constrained_vector_round_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
-; PC64LE-NEXT:    frin 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_round_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
-; PC64LE9-NEXT:    frin 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_round_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI111_0@toc@l(3)
-; PC64-NEXT:    bl roundf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %round
-}
-
-define <2 x double> @constrained_vector_round_v2f64() {
-; PC64LE-LABEL: constrained_vector_round_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI112_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpi 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_round_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI112_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_round_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI112_0@toc@l(3)
-; PC64-NEXT:    bl round
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI112_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI112_1@toc@l(3)
-; PC64-NEXT:    bl round
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %round
-}
-
-define <3 x float> @constrained_vector_round_v3f32() {
-; PC64LE-LABEL: constrained_vector_round_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI113_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI113_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI113_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE-NEXT:    frin 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI113_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
-; PC64LE-NEXT:    frin 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI113_3@toc@l
-; PC64LE-NEXT:    frin 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_round_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI113_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
-; PC64LE9-NEXT:    frin 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI113_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI113_3@toc@l
-; PC64LE9-NEXT:    frin 1, 1
-; PC64LE9-NEXT:    frin 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_round_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI113_0@toc@l(3)
-; PC64-NEXT:    bl roundf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
-; PC64-NEXT:    bl roundf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI113_2@toc@l(3)
-; PC64-NEXT:    bl roundf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %round
-}
-
-
-define <3 x double> @constrained_vector_round_v3f64() {
-; PC64LE-LABEL: constrained_vector_round_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI114_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI114_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpi 3, 1
-; PC64LE-NEXT:    xvrdpi 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_round_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI114_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI114_1@toc@l
-; PC64LE9-NEXT:    xsrdpi 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_round_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI114_0@toc@l(3)
-; PC64-NEXT:    bl round
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI114_1@toc@l(3)
-; PC64-NEXT:    bl round
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI114_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI114_2@toc@l(3)
-; PC64-NEXT:    bl round
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %round
-}
-
-define <1 x float> @constrained_vector_trunc_v1f32() {
-; PC64LE-LABEL: constrained_vector_trunc_v1f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
-; PC64LE-NEXT:    friz 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
-; PC64LE9-NEXT:    friz 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_trunc_v1f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -112(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 112
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
-; PC64-NEXT:    lfs 1, .LCPI115_0@toc@l(3)
-; PC64-NEXT:    bl truncf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addi 1, 1, 112
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %trunc
-}
-
-define <2 x double> @constrained_vector_trunc_v2f64() {
-; PC64LE-LABEL: constrained_vector_trunc_v2f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI116_0@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpiz 34, 0
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI116_0@toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 34, 0
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_trunc_v2f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI116_0@toc@l(3)
-; PC64-NEXT:    bl trunc
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI116_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI116_1@toc@l(3)
-; PC64-NEXT:    bl trunc
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 2, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %trunc
-}
-
-define <3 x float> @constrained_vector_trunc_v3f32() {
-; PC64LE-LABEL: constrained_vector_trunc_v3f32:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI117_1@toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI117_2@toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI117_1@toc@l(4)
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
-; PC64LE-NEXT:    friz 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI117_0@toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
-; PC64LE-NEXT:    friz 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI117_3@toc@l
-; PC64LE-NEXT:    friz 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI117_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
-; PC64LE9-NEXT:    friz 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI117_2@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI117_3@toc@l
-; PC64LE9-NEXT:    friz 1, 1
-; PC64LE9-NEXT:    friz 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_trunc_v3f32:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfs 1, .LCPI117_0@toc@l(3)
-; PC64-NEXT:    bl truncf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
-; PC64-NEXT:    bl truncf
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI117_2@toc@l(3)
-; PC64-NEXT:    bl truncf
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %trunc
-}
-
-define <3 x double> @constrained_vector_trunc_v3f64() {
-; PC64LE-LABEL: constrained_vector_trunc_v3f64:
-; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI118_1@toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
-; PC64LE-NEXT:    lfs 1, .LCPI118_0@toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpiz 3, 1
-; PC64LE-NEXT:    xvrdpiz 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE-NEXT:    blr
-;
-; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
-; PC64LE9:       # %bb.0: # %entry
-; PC64LE9-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI118_0@toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI118_1@toc@l
-; PC64LE9-NEXT:    xsrdpiz 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE9-NEXT:    blr
-;
-; PC64-LABEL: constrained_vector_trunc_v3f64:
-; PC64:       # %bb.0: # %entry
-; PC64-NEXT:    mflr 0
-; PC64-NEXT:    std 0, 16(1)
-; PC64-NEXT:    stdu 1, -128(1)
-; PC64-NEXT:    .cfi_def_cfa_offset 128
-; PC64-NEXT:    .cfi_offset lr, 16
-; PC64-NEXT:    .cfi_offset f30, -16
-; PC64-NEXT:    .cfi_offset f31, -8
-; PC64-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
-; PC64-NEXT:    stfd 30, 112(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 31, 120(1) # 8-byte Folded Spill
-; PC64-NEXT:    lfd 1, .LCPI118_0@toc@l(3)
-; PC64-NEXT:    bl trunc
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
-; PC64-NEXT:    fmr 31, 1
-; PC64-NEXT:    lfd 1, .LCPI118_1@toc@l(3)
-; PC64-NEXT:    bl trunc
-; PC64-NEXT:    nop
-; PC64-NEXT:    addis 3, 2, .LCPI118_2@toc@ha
-; PC64-NEXT:    fmr 30, 1
-; PC64-NEXT:    lfs 1, .LCPI118_2@toc@l(3)
-; PC64-NEXT:    bl trunc
-; PC64-NEXT:    nop
-; PC64-NEXT:    fmr 3, 1
-; PC64-NEXT:    fmr 1, 31
-; PC64-NEXT:    fmr 2, 30
-; PC64-NEXT:    lfd 31, 120(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 112(1) # 8-byte Folded Reload
-; PC64-NEXT:    addi 1, 1, 128
-; PC64-NEXT:    ld 0, 16(1)
-; PC64-NEXT:    mtlr 0
-; PC64-NEXT:    blr
-entry:
-  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %trunc
-}
-
-
-; Single width declarations
-declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
-declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
-
-; Scalar width declarations
-declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
-declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
-declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
-
-; Illegal width declarations
-declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
-declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
-
-; Double width declarations
-declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
-declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From 9212206d253b858d761cbad8a71ec71189d6b588 Mon Sep 17 00:00:00 2001
From: Jason Liu <jasonliu.development@gmail.com>
Date: Tue, 28 May 2019 14:37:59 +0000
Subject: [PATCH 0363/1176] [XCOFF] Implement parsing symbol table for
 xcoffobjfile and output as yaml format

Summary:
This patch implement parsing symbol table for xcoffobjfile and
output as yaml format. Parsing auxiliary entries of a symbol
will be in a separate patch.

The XCOFF object file (aix_xcoff.o) used in the test comes from
-bash-4.2$ cat test.c
extern int i;
extern int TestforXcoff;
int main()
{
i++;
TestforXcoff--;
}

Patch by DiggerLin

Reviewers: sfertile, hubert.reinterpretcast, MaskRay, daltenty

Differential Revision: https://reviews.llvm.org/D61532

llvm-svn: 361832
---
 llvm/include/llvm/BinaryFormat/XCOFF.h      |  74 ++++++-
 llvm/include/llvm/Object/XCOFFObjectFile.h  |  61 +++++-
 llvm/include/llvm/ObjectYAML/XCOFFYAML.h    |  25 ++-
 llvm/lib/Object/XCOFFObjectFile.cpp         | 202 ++++++++++++++++----
 llvm/lib/ObjectYAML/XCOFFYAML.cpp           |  67 +++++++
 llvm/test/tools/obj2yaml/Inputs/aix_xcoff.o | Bin 588 -> 740 bytes
 llvm/test/tools/obj2yaml/aix_xcoff.test     |  83 +++++++-
 llvm/tools/llvm-readobj/XCOFFDumper.cpp     |   2 +-
 llvm/tools/obj2yaml/xcoff2yaml.cpp          |  45 ++++-
 9 files changed, 500 insertions(+), 59 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index 32c0232f6e50c..9c17559b40d79 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -13,11 +13,14 @@
 #ifndef LLVM_BINARYFORMAT_XCOFF_H
 #define LLVM_BINARYFORMAT_XCOFF_H
 
+#include <cstdint>
+
 namespace llvm {
 namespace XCOFF {
 
 // Constants used in the XCOFF definition.
-enum { SectionNameSize = 8 };
+enum { SectionNameSize = 8, SymbolNameSize = 8 };
+enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
 
 // Flags for defining the section type. Used for the s_flags field of
 // the section header structure. Defined in the system header `scnhdr.h`.
@@ -37,6 +40,75 @@ enum SectionTypeFlags {
   STYP_OVRFLO = 0x8000
 };
 
+// STORAGE CLASSES, n_sclass field of syment.
+// The values come from `storclass.h` and `dbxstclass.h`.
+enum StorageClass : uint8_t {
+  // Storage classes used for symbolic debugging symbols.
+  C_FILE = 103,  // File name
+  C_BINCL = 108, // Beginning of include file
+  C_EINCL = 109, // Ending of include file
+  C_GSYM = 128,  // Global variable
+  C_STSYM = 133, // Statically allocated symbol
+  C_BCOMM = 135, // Beginning of common block
+  C_ECOMM = 137, // End of common block
+  C_ENTRY = 141, // Alternate entry
+  C_BSTAT = 143, // Beginning of static block
+  C_ESTAT = 144, // End of static block
+  C_GTLS = 145,  // Global thread-local variable
+  C_STTLS = 146, // Static thread-local variable
+
+  // Storage classes used for DWARF symbols.
+  C_DWARF = 112, // DWARF section symbol
+
+  // Storage classes used for absolute symbols.
+  C_LSYM = 129,  // Automatic variable allocated on stack
+  C_PSYM = 130,  // Argument to subroutine allocated on stack
+  C_RSYM = 131,  // Register variable
+  C_RPSYM = 132, // Argument to function or procedure stored in register
+  C_ECOML = 136, // Local member of common block
+  C_FUN = 142,   // Function or procedure
+
+  // Storage classes used for undefined external symbols or
+  // symbols of general sections.
+  C_EXT = 2,       // External symbol
+  C_WEAKEXT = 111, // Weak external symbol
+
+  // Storage classes used for symbols of general sections.
+  C_NULL = 0,
+  C_STAT = 3,     // Static
+  C_BLOCK = 100,  // ".bb" or ".eb"
+  C_FCN = 101,    // ".bf" or ".ef"
+  C_HIDEXT = 107, // Un-named external symbol
+  C_INFO = 110,   // Comment string in .info section
+  C_DECL = 140,   // Declaration of object (type)
+
+  // Storage classes - Obsolete/Undocumented.
+  C_AUTO = 1,     // Automatic variable
+  C_REG = 4,      // Register variable
+  C_EXTDEF = 5,   // External definition
+  C_LABEL = 6,    // Label
+  C_ULABEL = 7,   // Undefined label
+  C_MOS = 8,      // Member of structure
+  C_ARG = 9,      // Function argument
+  C_STRTAG = 10,  // Structure tag
+  C_MOU = 11,     // Member of union
+  C_UNTAG = 12,   // Union tag
+  C_TPDEF = 13,   // Type definition
+  C_USTATIC = 14, // Undefined static
+  C_ENTAG = 15,   // Enumeration tag
+  C_MOE = 16,     // Member of enumeration
+  C_REGPARM = 17, // Register parameter
+  C_FIELD = 18,   // Bit field
+  C_EOS = 102,    // End of structure
+  C_LINE = 104,
+  C_ALIAS = 105,  // Duplicate tag
+  C_HIDDEN = 106, // Special storage class for external
+  C_EFCN = 255,   // Physical end of function
+
+  // Storage classes - reserved
+  C_TCSYM = 134 // Reserved
+};
+
 } // end namespace XCOFF
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index 33a13bc2c1012..d9f9d2724f15c 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -61,16 +61,54 @@ struct XCOFFSectionHeader {
   support::big32_t Flags;
 };
 
+struct XCOFFSymbolEntry {
+  enum { NAME_IN_STR_TBL_MAGIC = 0x0 };
+  typedef struct {
+    support::big32_t Magic; // Zero indicates name in string table.
+    support::ubig32_t Offset;
+  } NameInStrTblType;
+
+  typedef struct {
+    uint8_t LanguageId;
+    uint8_t CpuTypeId;
+  } CFileLanguageIdAndTypeIdType;
+
+  union {
+    char SymbolName[XCOFF::SymbolNameSize];
+    NameInStrTblType NameInStrTbl;
+  };
+
+  support::ubig32_t Value; // Symbol value; storage class-dependent.
+  support::big16_t SectionNumber;
+
+  union {
+    support::ubig16_t SymbolType;
+    CFileLanguageIdAndTypeIdType CFileLanguageIdAndTypeId;
+  };
+
+  XCOFF::StorageClass StorageClass;
+  uint8_t NumberOfAuxEntries;
+};
+
+struct XCOFFStringTable {
+  uint32_t Size;
+  const char *Data;
+};
+
 class XCOFFObjectFile : public ObjectFile {
 private:
   const XCOFFFileHeader *FileHdrPtr = nullptr;
   const XCOFFSectionHeader *SectionHdrTablePtr = nullptr;
+  const XCOFFSymbolEntry *SymbolTblPtr = nullptr;
+  XCOFFStringTable StringTable = {0, nullptr};
 
   size_t getFileHeaderSize() const;
   size_t getSectionHeaderSize() const;
 
   const XCOFFSectionHeader *toSection(DataRefImpl Ref) const;
-
+  static bool isReservedSectionNumber(int16_t SectionNumber);
+  std::error_code getSectionByNum(int16_t Num,
+                                  const XCOFFSectionHeader *&Result) const;
 
 public:
   void moveSymbolNext(DataRefImpl &Symb) const override;
@@ -121,18 +159,27 @@ class XCOFFObjectFile : public ObjectFile {
   XCOFFObjectFile(MemoryBufferRef Object, std::error_code &EC);
 
   const XCOFFFileHeader *getFileHeader() const { return FileHdrPtr; }
+  const XCOFFSymbolEntry *getPointerToSymbolTable() const {
+    return SymbolTblPtr;
+  }
+
+  Expected<StringRef>
+  getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const;
 
+  const XCOFFSymbolEntry *toSymbolEntry(DataRefImpl Ref) const;
   uint16_t getMagic() const;
   uint16_t getNumberOfSections() const;
-  int32_t  getTimeStamp() const;
-  uint32_t  getSymbolTableOffset() const;
+  int32_t getTimeStamp() const;
+  uint32_t getSymbolTableOffset() const;
 
-  // Note that this value is signed and might return a negative value. Negative
-  // values are reserved for future use.
-  int32_t  getNumberOfSymbolTableEntries() const;
+  // Returns the value as encoded in the object file.
+  // Negative values are reserved for future use.
+  int32_t getRawNumberOfSymbolTableEntries() const;
 
+  // Returns a sanitized value, useable as an index into the symbol table.
+  uint32_t getLogicalNumberOfSymbolTableEntries() const;
   uint16_t getOptionalHeaderSize() const;
-  uint16_t getFlags() const;
+  uint16_t getFlags() const { return FileHdrPtr->Flags; };
 }; // XCOFFObjectFile
 
 } // namespace object
diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
index 078ab756695e4..f99004e69762f 100644
--- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -12,8 +12,9 @@
 #ifndef LLVM_OBJECTYAML_XCOFFYAML_H
 #define LLVM_OBJECTYAML_XCOFFYAML_H
 
+#include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/ObjectYAML/YAML.h"
-#include <cstdint>
+#include <vector>
 
 namespace llvm {
 namespace XCOFFYAML {
@@ -28,14 +29,30 @@ struct FileHeader {
   llvm::yaml::Hex16 Flags;
 };
 
+struct Symbol {
+  StringRef SymbolName;
+  llvm::yaml::Hex32 Value; // Symbol value; storage class-dependent.
+  StringRef SectionName;
+  llvm::yaml::Hex16 Type;
+  XCOFF::StorageClass StorageClass;
+  uint8_t NumberOfAuxEntries; // Number of auxiliary entries
+};
+
 struct Object {
   FileHeader Header;
+  std::vector<Symbol> Symbols;
   Object();
 };
 } // namespace XCOFFYAML
-
+} // namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(XCOFFYAML::Symbol)
+namespace llvm {
 namespace yaml {
 
+template <> struct ScalarEnumerationTraits<XCOFF::StorageClass> {
+  static void enumeration(IO &IO, XCOFF::StorageClass &Value);
+};
+
 template <> struct MappingTraits<XCOFFYAML::FileHeader> {
   static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
 };
@@ -44,6 +61,10 @@ template <> struct MappingTraits<XCOFFYAML::Object> {
   static void mapping(IO &IO, XCOFFYAML::Object &Obj);
 };
 
+template <> struct MappingTraits<XCOFFYAML::Symbol> {
+  static void mapping(IO &IO, XCOFFYAML::Symbol &S);
+};
+
 } // namespace yaml
 } // namespace llvm
 
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index db57fbad00235..9a3e1783ab2c9 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -26,6 +26,17 @@ enum { XCOFF32FileHeaderSize = 20 };
 static_assert(sizeof(XCOFFFileHeader) == XCOFF32FileHeaderSize,
               "Wrong size for XCOFF file header.");
 
+// Sets EC and returns false if there is less than 'Size' bytes left in the
+// buffer at 'Offset'.
+static bool checkSize(MemoryBufferRef M, std::error_code &EC, uint64_t Offset,
+                      uint64_t Size) {
+  if (M.getBufferSize() < Offset + Size) {
+    EC = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
+}
+
 // Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
 // Returns unexpected_eof on error.
 template <typename T>
@@ -43,6 +54,12 @@ template <typename T> static const T *viewAs(uintptr_t in) {
   return reinterpret_cast<const T *>(in);
 }
 
+static StringRef generateStringRef(const char *Name, uint64_t Size) {
+  auto NulCharPtr = static_cast<const char *>(memchr(Name, '\0', Size));
+  return NulCharPtr ? StringRef(Name, NulCharPtr - Name)
+                    : StringRef(Name, Size);
+}
+
 const XCOFFSectionHeader *XCOFFObjectFile::toSection(DataRefImpl Ref) const {
   auto Sec = viewAs<XCOFFSectionHeader>(Ref.p);
 #ifndef NDEBUG
@@ -58,6 +75,12 @@ const XCOFFSectionHeader *XCOFFObjectFile::toSection(DataRefImpl Ref) const {
   return Sec;
 }
 
+const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const {
+  assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!");
+  auto SymEntPtr = viewAs<XCOFFSymbolEntry>(Ref.p);
+  return SymEntPtr;
+}
+
 // The next 2 functions are not exactly necessary yet, but they are useful to
 // abstract over the size difference between XCOFF32 and XCOFF64 structure
 // definitions.
@@ -69,15 +92,40 @@ size_t XCOFFObjectFile::getSectionHeaderSize() const {
   return sizeof(XCOFFSectionHeader);
 }
 
+uint16_t XCOFFObjectFile::getMagic() const { return FileHdrPtr->Magic; }
+
 void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
-  llvm_unreachable("Not yet implemented!");
-  return;
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+
+  SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1;
+  Symb.p = reinterpret_cast<uintptr_t>(SymEntPtr);
 }
 
 Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
-  StringRef Result;
-  llvm_unreachable("Not yet implemented!");
-  return Result;
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+
+  if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
+    return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize);
+
+  // A storage class value with the high-order bit on indicates that the name is
+  // a symbolic debugger stabstring.
+  if (SymEntPtr->StorageClass & 0x80)
+    return StringRef("Unimplemented Debug Name");
+
+  uint32_t Offset = SymEntPtr->NameInStrTbl.Offset;
+  // The byte offset is relative to the start of the string table
+  // or .debug section. A byte offset value of 0 is a null or zero-length symbol
+  // name. A byte offset in the range 1 to 3 (inclusive) points into the length
+  // field; as a soft-error recovery mechanism, we treat such cases as having an
+  // offset of 0.
+  if (Offset < 4)
+    return StringRef(nullptr, 0);
+
+  if (StringTable.Data != nullptr && StringTable.Size > Offset)
+    return (StringTable.Data + Offset);
+
+  return make_error<GenericBinaryError>("Symbol Name parse failed",
+                                        object_error::parse_failed);
 }
 
 Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
@@ -87,9 +135,7 @@ Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
 }
 
 uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
-  uint64_t Result = 0;
-  llvm_unreachable("Not yet implemented!");
-  return Result;
+  return toSymbolEntry(Symb)->Value;
 }
 
 uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
@@ -106,8 +152,20 @@ XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
 
 Expected<section_iterator>
 XCOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
-  llvm_unreachable("Not yet implemented!");
-  return section_iterator(SectionRef());
+  const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+  int16_t SectNum = SymEntPtr->SectionNumber;
+
+  if (isReservedSectionNumber(SectNum))
+    return section_end();
+
+  const XCOFFSectionHeader *Sec;
+  if (std::error_code EC = getSectionByNum(SectNum, Sec))
+    return errorCodeToError(EC);
+
+  DataRefImpl SecDRI;
+  SecDRI.p = reinterpret_cast<uintptr_t>(Sec);
+
+  return section_iterator(SectionRef(SecDRI, this));
 }
 
 void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
@@ -219,13 +277,16 @@ uint32_t XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
 }
 
 basic_symbol_iterator XCOFFObjectFile::symbol_begin() const {
-  llvm_unreachable("Not yet implemented!");
-  return basic_symbol_iterator(SymbolRef());
+  DataRefImpl SymDRI;
+  SymDRI.p = reinterpret_cast<uintptr_t>(SymbolTblPtr);
+  return basic_symbol_iterator(SymbolRef(SymDRI, this));
 }
 
 basic_symbol_iterator XCOFFObjectFile::symbol_end() const {
-  llvm_unreachable("Not yet implemented!");
-  return basic_symbol_iterator(SymbolRef());
+  DataRefImpl SymDRI;
+  SymDRI.p = reinterpret_cast<uintptr_t>(
+      SymbolTblPtr + getLogicalNumberOfSymbolTableEntries());
+  return basic_symbol_iterator(SymbolRef(SymDRI, this));
 }
 
 section_iterator XCOFFObjectFile::section_begin() const {
@@ -243,7 +304,7 @@ section_iterator XCOFFObjectFile::section_end() const {
 
 uint8_t XCOFFObjectFile::getBytesInAddress() const {
   // Only support 32-bit object files for now ...
-  assert(getFileHeaderSize() ==  XCOFF32FileHeaderSize);
+  assert(getFileHeaderSize() == XCOFF32FileHeaderSize);
   return 4;
 }
 
@@ -274,6 +335,67 @@ Expected<uint64_t> XCOFFObjectFile::getStartAddress() const {
   return 0;
 }
 
+std::error_code
+XCOFFObjectFile::getSectionByNum(int16_t Num,
+                                 const XCOFFSectionHeader *&Result) const {
+  if (Num > 0 && static_cast<uint16_t>(Num) <= getNumberOfSections()) {
+    Result = SectionHdrTablePtr + (Num - 1);
+    return std::error_code();
+  }
+
+  return object_error::invalid_section_index;
+}
+
+Expected<StringRef>
+XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const {
+  int16_t SectionNum = SymEntPtr->SectionNumber;
+
+  switch (SectionNum) {
+  case XCOFF::N_DEBUG:
+    return "N_DEBUG";
+  case XCOFF::N_ABS:
+    return "N_ABS";
+  case XCOFF::N_UNDEF:
+    return "N_UNDEF";
+  default: {
+    const XCOFFSectionHeader *SectHeaderPtr;
+    std::error_code EC;
+    if ((EC = getSectionByNum(SectionNum, SectHeaderPtr)))
+      return errorCodeToError(EC);
+    else
+      return generateStringRef(SectHeaderPtr->Name, XCOFF::SectionNameSize);
+  }
+  }
+}
+
+bool XCOFFObjectFile::isReservedSectionNumber(int16_t SectionNumber) {
+  return (SectionNumber <= 0 && SectionNumber >= -2);
+}
+
+uint16_t XCOFFObjectFile::getNumberOfSections() const {
+  return FileHdrPtr->NumberOfSections;
+}
+
+int32_t XCOFFObjectFile::getTimeStamp() const { return FileHdrPtr->TimeStamp; }
+
+uint32_t XCOFFObjectFile::getSymbolTableOffset() const {
+  return FileHdrPtr->SymbolTableOffset;
+}
+
+int32_t XCOFFObjectFile::getRawNumberOfSymbolTableEntries() const {
+  return FileHdrPtr->NumberOfSymTableEntries;
+}
+
+uint32_t XCOFFObjectFile::getLogicalNumberOfSymbolTableEntries() const {
+  return (FileHdrPtr->NumberOfSymTableEntries >= 0
+              ? FileHdrPtr->NumberOfSymTableEntries
+              : 0);
+}
+
+uint16_t XCOFFObjectFile::getOptionalHeaderSize() const {
+  return FileHdrPtr->AuxHeaderSize;
+}
+
 XCOFFObjectFile::XCOFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
     : ObjectFile(Binary::ID_XCOFF32, Object) {
 
@@ -293,37 +415,39 @@ XCOFFObjectFile::XCOFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
                         getNumberOfSections() * getSectionHeaderSize())))
       return;
   }
-}
 
-uint16_t XCOFFObjectFile::getMagic() const {
-  return FileHdrPtr->Magic;
-}
+  if (getLogicalNumberOfSymbolTableEntries() == 0)
+    return;
 
-uint16_t XCOFFObjectFile::getNumberOfSections() const {
-  return FileHdrPtr->NumberOfSections;
-}
+  // Get pointer to the symbol table.
+  CurPtr = FileHdrPtr->SymbolTableOffset;
+  uint64_t SymbolTableSize = (uint64_t)(sizeof(XCOFFSymbolEntry)) *
+                             getLogicalNumberOfSymbolTableEntries();
 
-int32_t XCOFFObjectFile::getTimeStamp() const {
-  return FileHdrPtr->TimeStamp;
-}
+  if ((EC = getObject(SymbolTblPtr, Data, base() + CurPtr, SymbolTableSize)))
+    return;
 
-uint32_t XCOFFObjectFile::getSymbolTableOffset() const {
-  return FileHdrPtr->SymbolTableOffset;
-}
+  // Move pointer to the string table.
+  CurPtr += SymbolTableSize;
 
-int32_t XCOFFObjectFile::getNumberOfSymbolTableEntries() const {
-  // As far as symbol table size is concerned, if this field is negative it is
-  // to be treated as a 0. However since this field is also used for printing we
-  // don't want to truncate any negative values.
-  return FileHdrPtr->NumberOfSymTableEntries;
-}
+  if (CurPtr + 4 > Data.getBufferSize())
+    return;
 
-uint16_t XCOFFObjectFile::getOptionalHeaderSize() const {
-  return FileHdrPtr->AuxHeaderSize;
-}
+  StringTable.Size = support::endian::read32be(base() + CurPtr);
+
+  if (StringTable.Size <= 4)
+    return;
+
+  // Check for whether the String table has the size indicated by length
+  // field
+  if (!checkSize(Data, EC, CurPtr, StringTable.Size))
+    return;
 
-uint16_t XCOFFObjectFile::getFlags() const {
-  return FileHdrPtr->Flags;
+  StringTable.Data = reinterpret_cast<const char *>(base() + CurPtr);
+  if (StringTable.Data[StringTable.Size - 1] != '\0') {
+    EC = object_error::string_table_non_null_end;
+    return;
+  }
 }
 
 Expected<std::unique_ptr<ObjectFile>>
diff --git a/llvm/lib/ObjectYAML/XCOFFYAML.cpp b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
index c3fe1bb868c59..982e6aecbb987 100644
--- a/llvm/lib/ObjectYAML/XCOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ObjectYAML/XCOFFYAML.h"
+#include "llvm/BinaryFormat/XCOFF.h"
 #include <string.h>
 
 namespace llvm {
@@ -22,6 +23,62 @@ Object::Object() { memset(&Header, 0, sizeof(Header)); }
 
 namespace yaml {
 
+void ScalarEnumerationTraits<XCOFF::StorageClass>::enumeration(
+    IO &IO, XCOFF::StorageClass &Value) {
+#define ECase(X) IO.enumCase(Value, #X, XCOFF::X)
+  ECase(C_NULL);
+  ECase(C_AUTO);
+  ECase(C_EXT);
+  ECase(C_STAT);
+  ECase(C_REG);
+  ECase(C_EXTDEF);
+  ECase(C_LABEL);
+  ECase(C_ULABEL);
+  ECase(C_MOS);
+  ECase(C_ARG);
+  ECase(C_STRTAG);
+  ECase(C_MOU);
+  ECase(C_UNTAG);
+  ECase(C_TPDEF);
+  ECase(C_USTATIC);
+  ECase(C_ENTAG);
+  ECase(C_MOE);
+  ECase(C_REGPARM);
+  ECase(C_FIELD);
+  ECase(C_BLOCK);
+  ECase(C_FCN);
+  ECase(C_EOS);
+  ECase(C_FILE);
+  ECase(C_LINE);
+  ECase(C_ALIAS);
+  ECase(C_HIDDEN);
+  ECase(C_HIDEXT);
+  ECase(C_BINCL);
+  ECase(C_EINCL);
+  ECase(C_INFO);
+  ECase(C_WEAKEXT);
+  ECase(C_DWARF);
+  ECase(C_GSYM);
+  ECase(C_LSYM);
+  ECase(C_PSYM);
+  ECase(C_RSYM);
+  ECase(C_RPSYM);
+  ECase(C_STSYM);
+  ECase(C_TCSYM);
+  ECase(C_BCOMM);
+  ECase(C_ECOML);
+  ECase(C_ECOMM);
+  ECase(C_DECL);
+  ECase(C_ENTRY);
+  ECase(C_FUN);
+  ECase(C_BSTAT);
+  ECase(C_ESTAT);
+  ECase(C_GTLS);
+  ECase(C_STTLS);
+  ECase(C_EFCN);
+#undef ECase
+}
+
 void MappingTraits<XCOFFYAML::FileHeader>::mapping(
     IO &IO, XCOFFYAML::FileHeader &FileHdr) {
   IO.mapRequired("MagicNumber", FileHdr.Magic);
@@ -33,9 +90,19 @@ void MappingTraits<XCOFFYAML::FileHeader>::mapping(
   IO.mapRequired("Flags", FileHdr.Flags);
 }
 
+void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
+  IO.mapRequired("Name", S.SymbolName);
+  IO.mapRequired("Value", S.Value);
+  IO.mapRequired("Section", S.SectionName);
+  IO.mapRequired("Type", S.Type);
+  IO.mapRequired("StorageClass", S.StorageClass);
+  IO.mapRequired("NumberOfAuxEntries", S.NumberOfAuxEntries);
+}
+
 void MappingTraits<XCOFFYAML::Object>::mapping(IO &IO, XCOFFYAML::Object &Obj) {
   IO.mapTag("!XCOFF", true);
   IO.mapRequired("FileHeader", Obj.Header);
+  IO.mapRequired("Symbols", Obj.Symbols);
 }
 
 } // namespace yaml
diff --git a/llvm/test/tools/obj2yaml/Inputs/aix_xcoff.o b/llvm/test/tools/obj2yaml/Inputs/aix_xcoff.o
index 3712f7f853e506d8c0bcf2922e7f61c8b63cffee..84383c4ce24c3b247b05fc0cbdfa2353e620975d 100644
GIT binary patch
literal 740
zcmZuuyH3ME5F9%u3J(#C5CuZS2^EcNLK7q!A|z;1P*6BZVifU61TI{F<Oqo#!6(u2
z5rqFhnAy9F;}BNb-MP8Fxw~EK6ZXaPM*>&}05<9Hqh1^dm|8&CrH|==qoU_`+fiFV
z1We-F^ml5Nn1KZq4`091@3(3?erPPmqZ%;U1L{p!f?t|k*WhR_O$7^A_khRUlX$-Y
z?}_J66Of@TdO>2rB07e1)9&9^6wQ!-axfv8SLy5J0^?SOcr!K>=VvG&Dk(Mj>6$`A
zqM|~gZYvZMITiR}|GGygmj0k{X+^#J$nR)I$9{f6Y|FbRrn@+|`mbhYBGkANoj@OT
zje;-DFj)Gdbc&j=4WBCsKvL<f=uLM?=f5>d$L!fA=O#bQbj*LMdg^GM9hqA>naN#-
lamyOy`ZKLDI<8D?YVyrE)0wZ?X|em^;O@LL2t&*)egS#hN}2!w

delta 262
zcmaFDdWJ=S@je4njK7u%0|O%m0|SH5L;-PgA0Q(Ih|d5q0|OJ7RM1OFEJ*}289?$N
zECa+FfcOCrGcd4Bj8mI9Lpk1rfh9LFGfy5Y2qZuzAp?+35a0u10e)s6DFtVN<k-ME
z1Ar_(Fe?GbVga*2CNWL6VDy^2j8T-)XYw9KWk#0C4;UqZ>|cz^sy-mMGBE;GfxH3)
YOk5Be$?XY~otTsvQzoZ@$#qN;0B}7ZzyJUM

diff --git a/llvm/test/tools/obj2yaml/aix_xcoff.test b/llvm/test/tools/obj2yaml/aix_xcoff.test
index 1ad89e9c5d3c4..b3e60dfd0580e 100644
--- a/llvm/test/tools/obj2yaml/aix_xcoff.test
+++ b/llvm/test/tools/obj2yaml/aix_xcoff.test
@@ -2,10 +2,79 @@
 # Test that we can parse the XCOFF object file correctly.
 # CHECK: --- !XCOFF
 # CHECK-NEXT: FileHeader:
-# CHECK-NEXT: MagicNumber:     0x01DF
-# CHECK-NEXT: NumberOfSections: 2
-# CHECK-NEXT: CreationTime:    1548692020
-# CHECK-NEXT: OffsetToSymbolTable: 0x00000108
-# CHECK-NEXT: EntriesInSymbolTable: 18
-# CHECK-NEXT: AuxiliaryHeaderSize: 0
-# CHECK-NEXT: Flags:           0x0000
+# CHECK-NEXT:   MagicNumber:     0x01DF
+# CHECK-NEXT:   NumberOfSections: 2
+# CHECK-NEXT:   CreationTime:    1552337792
+# CHECK-NEXT:   OffsetToSymbolTable: 0x0000013A
+# CHECK-NEXT:   EntriesInSymbolTable: 22
+# CHECK-NEXT:   AuxiliaryHeaderSize: 0
+# CHECK-NEXT:   Flags:           0x0000
+
+# CHECK: Symbols:
+# CHECK-NEXT:   - Name:      .file
+# CHECK-NEXT:     Value:     0x00000000
+# CHECK-NEXT:     Section: N_DEBUG
+# CHECK-NEXT:     Type:      0x0003
+# CHECK-NEXT:     StorageClass:    C_FILE
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      i
+# CHECK-NEXT:     Value:     0x00000000
+# CHECK-NEXT:     Section: N_UNDEF
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_EXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      TestforXcoff
+# CHECK-NEXT:     Value:     0x00000000
+# CHECK-NEXT:     Section: N_UNDEF
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_EXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      .text
+# CHECK-NEXT:     Value:     0x00000000
+# CHECK-NEXT:     Section: .text
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      .main
+# CHECK-NEXT:     Value:     0x00000000
+# CHECK-NEXT:     Section: .text
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_EXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      main
+# CHECK-NEXT:     Value:     0x00000060
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      main
+# CHECK-NEXT:     Value:     0x00000060
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_EXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      .data
+# CHECK-NEXT:     Value:     0x00000070
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      TOC
+# CHECK-NEXT:     Value:     0x00000074
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      i
+# CHECK-NEXT:     Value:     0x00000074
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT:   - Name:      TestforXcoff
+# CHECK-NEXT:     Value:     0x00000078
+# CHECK-NEXT:     Section: .data
+# CHECK-NEXT:     Type:      0x0000
+# CHECK-NEXT:     StorageClass:    C_HIDEXT
+# CHECK-NEXT:     NumberOfAuxEntries: 1
+# CHECK-NEXT: ...
diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index 009b99c1f7bf0..59293fe80eedb 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -66,7 +66,7 @@ void XCOFFDumper::printFileHeaders() {
   }
 
   W.printHex("SymbolTableOffset", Obj.getSymbolTableOffset());
-  int32_t SymTabEntries = Obj.getNumberOfSymbolTableEntries();
+  int32_t SymTabEntries = Obj.getRawNumberOfSymbolTableEntries();
   if (SymTabEntries >= 0)
     W.printNumber("SymbolTableEntries", SymTabEntries);
   else
diff --git a/llvm/tools/obj2yaml/xcoff2yaml.cpp b/llvm/tools/obj2yaml/xcoff2yaml.cpp
index 5ee9eafbcbef7..bcacd2b8d551f 100644
--- a/llvm/tools/obj2yaml/xcoff2yaml.cpp
+++ b/llvm/tools/obj2yaml/xcoff2yaml.cpp
@@ -19,15 +19,20 @@ class XCOFFDumper {
   const object::XCOFFObjectFile &Obj;
   XCOFFYAML::Object YAMLObj;
   void dumpHeader();
+  std::error_code dumpSymbols();
 
 public:
-  XCOFFDumper(const object::XCOFFObjectFile &obj);
+  XCOFFDumper(const object::XCOFFObjectFile &obj) : Obj(obj) {}
+  std::error_code dump();
   XCOFFYAML::Object &getYAMLObj() { return YAMLObj; }
 };
 } // namespace
 
-XCOFFDumper::XCOFFDumper(const object::XCOFFObjectFile &obj) : Obj(obj) {
+std::error_code XCOFFDumper::dump() {
+  std::error_code EC;
   dumpHeader();
+  EC = dumpSymbols();
+  return EC;
 }
 
 void XCOFFDumper::dumpHeader() {
@@ -42,9 +47,45 @@ void XCOFFDumper::dumpHeader() {
   YAMLObj.Header.Flags = FileHdrPtr->Flags;
 }
 
+std::error_code XCOFFDumper::dumpSymbols() {
+  std::vector<XCOFFYAML::Symbol> &Symbols = YAMLObj.Symbols;
+
+  for (const SymbolRef &S : Obj.symbols()) {
+    DataRefImpl SymbolDRI = S.getRawDataRefImpl();
+    const XCOFFSymbolEntry *SymbolEntPtr = Obj.toSymbolEntry(SymbolDRI);
+    XCOFFYAML::Symbol Sym;
+
+    Expected<StringRef> SymNameRefOrErr = Obj.getSymbolName(SymbolDRI);
+    if (!SymNameRefOrErr) {
+      return errorToErrorCode(SymNameRefOrErr.takeError());
+    }
+    Sym.SymbolName = SymNameRefOrErr.get();
+
+    Sym.Value = SymbolEntPtr->Value;
+
+    Expected<StringRef> SectionNameRefOrErr =
+        Obj.getSymbolSectionName(SymbolEntPtr);
+    if (!SectionNameRefOrErr)
+      return errorToErrorCode(SectionNameRefOrErr.takeError());
+
+    Sym.SectionName = SectionNameRefOrErr.get();
+
+    Sym.Type = SymbolEntPtr->SymbolType;
+    Sym.StorageClass = SymbolEntPtr->StorageClass;
+    Sym.NumberOfAuxEntries = SymbolEntPtr->NumberOfAuxEntries;
+    Symbols.push_back(Sym);
+  }
+
+  return std::error_code();
+}
+
 std::error_code xcoff2yaml(raw_ostream &Out,
                            const object::XCOFFObjectFile &Obj) {
   XCOFFDumper Dumper(Obj);
+
+  if (std::error_code EC = Dumper.dump())
+    return EC;
+
   yaml::Output Yout(Out);
   Yout << Dumper.getYAMLObj();
 

From dfc0ca0c80cc05ef5c8ded8e88d4367f2fd344a2 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Tue, 28 May 2019 14:47:39 +0000
Subject: [PATCH 0364/1176] [clang-tidy] Verify fix description for
 misc-unused-using-decl.

Reviewers: gribozavr

Subscribers: xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62522

llvm-svn: 361833
---
 clang-tools-extra/test/clang-tidy/misc-unused-using-decls.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/test/clang-tidy/misc-unused-using-decls.cpp b/clang-tools-extra/test/clang-tidy/misc-unused-using-decls.cpp
index 65ef0dac22b14..eed0baeb3ebf3 100644
--- a/clang-tools-extra/test/clang-tidy/misc-unused-using-decls.cpp
+++ b/clang-tools-extra/test/clang-tidy/misc-unused-using-decls.cpp
@@ -79,6 +79,7 @@ T ff() { T t; return t; }
 // eol-comments aren't removed (yet)
 using n::A; // A
 // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: using decl 'A' is unused
+// CHECK-MESSAGES: :[[@LINE-2]]:10: note: remove the using
 // CHECK-FIXES: {{^}}// A
 using n::B;
 using n::C;

From eb006d3268cb8e2c2d005b45402204d6a203101b Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Tue, 28 May 2019 14:50:42 +0000
Subject: [PATCH 0365/1176] [clang-tidy] Fix description for
 misc-definitions-in-headers.

Reviewers: gribozavr

Subscribers: xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62524

llvm-svn: 361834
---
 .../clang-tidy/misc/DefinitionsInHeadersCheck.cpp        | 9 ++++++---
 .../test/clang-tidy/misc-definitions-in-headers.hpp      | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp
index a36f307b1bf93..a496e3b292fde 100644
--- a/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp
@@ -124,13 +124,16 @@ void DefinitionsInHeadersCheck::check(const MatchFinder::MatchResult &Result) {
       }
     }
 
-    bool is_full_spec = FD->getTemplateSpecializationKind() != TSK_Undeclared;
+    bool IsFullSpec = FD->getTemplateSpecializationKind() != TSK_Undeclared;
     diag(FD->getLocation(),
          "%select{function|full function template specialization}0 %1 defined "
          "in a header file; function definitions in header files can lead to "
          "ODR violations")
-        << is_full_spec << FD << FixItHint::CreateInsertion(
-                     FD->getReturnTypeSourceRange().getBegin(), "inline ");
+        << IsFullSpec << FD;
+    diag(FD->getLocation(), /*FixDescription=*/"make as 'inline'",
+         DiagnosticIDs::Note)
+        << FixItHint::CreateInsertion(FD->getReturnTypeSourceRange().getBegin(),
+                                      "inline ");
   } else if (const auto *VD = dyn_cast<VarDecl>(ND)) {
     // Static data members of a class template are allowed.
     if (VD->getDeclContext()->isDependentContext() && VD->isStaticDataMember())
diff --git a/clang-tools-extra/test/clang-tidy/misc-definitions-in-headers.hpp b/clang-tools-extra/test/clang-tidy/misc-definitions-in-headers.hpp
index 5e83e68e8462b..662b0619cdc06 100644
--- a/clang-tools-extra/test/clang-tidy/misc-definitions-in-headers.hpp
+++ b/clang-tools-extra/test/clang-tidy/misc-definitions-in-headers.hpp
@@ -2,6 +2,7 @@
 
 int f() {
 // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: function 'f' defined in a header file; function definitions in header files can lead to ODR violations [misc-definitions-in-headers]
+// CHECK-MESSAGES: :[[@LINE-2]]:5: note: make as 'inline'
 // CHECK-FIXES: inline int f() {
   return 1;
 }

From 88ab281b4d57d2ae965b335afb74d73407c233c7 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 28 May 2019 14:54:49 +0000
Subject: [PATCH 0366/1176] llvm-undname: Add support for local static thread
 guards

llvm-svn: 361835
---
 llvm/include/llvm/Demangle/MicrosoftDemangle.h      | 2 +-
 llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 1 +
 llvm/lib/Demangle/MicrosoftDemangle.cpp             | 7 +++++--
 llvm/lib/Demangle/MicrosoftDemangleNodes.cpp        | 5 ++++-
 llvm/test/Demangle/ms-operators.test                | 3 +++
 5 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index 6b3c7c9967fbd..423fc2eac8588 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -223,7 +223,7 @@ class Demangler {
   demangleSpecialTableSymbolNode(StringView &MangledName,
                                  SpecialIntrinsicKind SIK);
   LocalStaticGuardVariableNode *
-  demangleLocalStaticGuard(StringView &MangledName);
+  demangleLocalStaticGuard(StringView &MangledName, bool IsThread);
   VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
                                               StringView &MangledName,
                                               StringView VariableName);
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index dde9ae7bca636..63111bc559100 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -408,6 +408,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
 
   void output(OutputStream &OS, OutputFlags Flags) const override;
 
+  bool IsThread = false;
   uint32_t ScopeIndex = 0;
 };
 
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index e596f0cea1c8e..4abb48e575733 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -293,9 +293,10 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
 }
 
 LocalStaticGuardVariableNode *
-Demangler::demangleLocalStaticGuard(StringView &MangledName) {
+Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
   LocalStaticGuardIdentifierNode *LSGI =
       Arena.alloc<LocalStaticGuardIdentifierNode>();
+  LSGI->IsThread = IsThread;
   QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
   LocalStaticGuardVariableNode *LSGVN =
       Arena.alloc<LocalStaticGuardVariableNode>();
@@ -443,7 +444,9 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   case SpecialIntrinsicKind::VcallThunk:
     return demangleVcallThunkNode(MangledName);
   case SpecialIntrinsicKind::LocalStaticGuard:
-    return demangleLocalStaticGuard(MangledName);
+    return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
+  case SpecialIntrinsicKind::LocalStaticThreadGuard:
+    return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
   case SpecialIntrinsicKind::RttiTypeDescriptor: {
     TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
     if (Error)
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index a3ee5f9d91d4d..2c1e5f44f4227 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -349,7 +349,10 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
 
 void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
                                             OutputFlags Flags) const {
-  OS << "`local static guard'";
+  if (IsThread)
+    OS << "`local static thread guard'";
+  else
+    OS << "`local static guard'";
   if (ScopeIndex > 0)
     OS << "{" << ScopeIndex << "}";
 }
diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test
index d18c2794d2d41..6a22ebda17145 100644
--- a/llvm/test/Demangle/ms-operators.test
+++ b/llvm/test/Demangle/ms-operators.test
@@ -230,6 +230,9 @@
 ??__F_decisionToDFA@XPathLexer@@0V?$vector@VDFA@dfa@antlr4@@V?$allocator@VDFA@dfa@antlr4@@@std@@@std@@A@YAXXZ
 ; CHECK: void __cdecl `dynamic atexit destructor for `private: static class std::vector<class antlr4::dfa::DFA, class std::allocator<class antlr4::dfa::DFA>> XPathLexer::_decisionToDFA''(void)
 
+??__J?1??f@@YAAAUS@@XZ@51
+; CHECK: `struct S & __cdecl f(void)'::`2'::`local static thread guard'{2}
+
 ??__K_deg@@YAHO@Z
 ; CHECK: int __cdecl operator ""_deg(long double)
 

From 9bf766c57343f14320c1ad6db456ce127f4d548c Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 15:04:39 +0000
Subject: [PATCH 0367/1176] Revert r361826, as it still breaks LLDB.

llvm-svn: 361837
---
 llvm/CMakeLists.txt                        |  4 ----
 llvm/cmake/modules/ChooseMSVCCRT.cmake     | 26 +---------------------
 llvm/cmake/modules/HandleLLVMOptions.cmake |  8 -------
 3 files changed, 1 insertion(+), 37 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 5862644dff118..895f9ab7189d1 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -435,10 +435,6 @@ option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
 option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
 
-if (MSVC)
-   option(LLVM_ENABLE_INCREMENTAL_LINK "Link incrementally. Enabling it might produce slower executables." OFF)
-endif()
-
 option(LLVM_ENABLE_DUMP "Enable dump functions even when assertions are disabled" OFF)
 
 if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
diff --git a/llvm/cmake/modules/ChooseMSVCCRT.cmake b/llvm/cmake/modules/ChooseMSVCCRT.cmake
index 50bddf83ef549..0e6e1aa55254e 100644
--- a/llvm/cmake/modules/ChooseMSVCCRT.cmake
+++ b/llvm/cmake/modules/ChooseMSVCCRT.cmake
@@ -50,17 +50,6 @@ macro(set_flag_in_var flagsvar regex flag)
   set(${flagsvar} "${${flagsvar}}" CACHE STRING "${flagsvar_docs}" FORCE)
 endmacro(set_flag_in_var)
 
-macro(disable_MT_if_LLDB build message)
-  if (LLVM_TOOL_LLDB_BUILD)
-    if ((NOT ${build} STREQUAL "DEBUG") AND (LLVM_USE_CRT_${build} STREQUAL "MT"))
-      if (LLVM_TOOL_CLANG_BUILD OR LLVM_TOOL_LLD_BUILD)
-        set(performance " This might impact runtime performance for Clang or LLD. Preferably build them separately.")
-      endif()
-      message(WARNING "${message}.${performance}")
-      set(LLVM_USE_CRT_${build} "MD")
-    endif()
-  endif()
-endmacro(disable_MT_if_LLDB)
 
 macro(choose_msvc_crt MSVC_CRT)
   if(LLVM_USE_CRT)
@@ -77,26 +66,13 @@ variables (LLVM_USE_CRT_DEBUG, etc) instead.")
       get_current_crt(LLVM_USE_CRT_${build}
         MSVC_CRT_REGEX
         CMAKE_CXX_FLAGS_${build})
-
-      # Make /MT the default in Release builds to make them faster
-      # and avoid the DLL function thunking.
-      if ((${build} STREQUAL "MINSIZEREL") OR
-          (${build} STREQUAL "RELEASE") OR
-          (${build} STREQUAL "RELWITHDEBINFO"))
-          set(LLVM_USE_CRT_${build} "MT")
-      endif()
-
-      disable_MT_if_LLDB(${build} "Using /MD as required by LLDB")
-
       set(LLVM_USE_CRT_${build}
         "${LLVM_USE_CRT_${build}}"
         CACHE STRING "Specify VC++ CRT to use for ${build_type} configurations."
         FORCE)
       set_property(CACHE LLVM_USE_CRT_${build}
         PROPERTY STRINGS ;${${MSVC_CRT}})
-    else()
-      disable_MT_if_LLDB(${build} "Disabling /MT as required by LLDB")
-    endif()
+    endif(NOT LLVM_USE_CRT_${build})
   endforeach(build_type)
 
   foreach(build_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE})
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 3154c1487fa29..cb9a01e1d39f7 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -418,14 +418,6 @@ if( MSVC )
   # "Enforce type conversion rules".
   append("/Zc:rvalueCast" CMAKE_CXX_FLAGS)
 
-  if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" AND NOT LLVM_ENABLE_INCREMENTAL_LINK)
-    foreach(CONFIG RELEASE RELWITHDEBINFO MINSIZEREL)
-      foreach(FLAG EXE MODULE SHARED STATIC)
-        string(REGEX REPLACE "[-/](INCREMENTAL:YES|INCREMENTAL:NO|INCREMENTAL)" "/INCREMENTAL:NO" CMAKE_${FLAG}_LINKER_FLAGS_${CONFIG} "${CMAKE_${FLAG}_LINKER_FLAGS_${CONFIG}}")
-      endforeach()
-    endforeach()
-  endif()
-
   if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO)
     # clang-cl and cl by default produce non-deterministic binaries because
     # link.exe /incremental requires a timestamp in the .obj file.  clang-cl

From b1296faee050c65ca1e31632c2dc881984c97ad7 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 15:21:03 +0000
Subject: [PATCH 0368/1176] [CodeComplete] Set preferred type for qualified-id

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62514

llvm-svn: 361838
---
 clang/include/clang/Sema/Sema.h           |  4 ++--
 clang/lib/Parse/ParseExprCXX.cpp          |  5 ++++-
 clang/lib/Sema/SemaCodeComplete.cpp       | 16 +++++++++-----
 clang/unittests/Sema/CodeCompleteTest.cpp | 26 +++++++++++++++++++++++
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 60480d98bebe3..a1a7f5f6ea942 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10646,8 +10646,8 @@ class Sema {
   void CodeCompleteInitializer(Scope *S, Decl *D);
   void CodeCompleteAfterIf(Scope *S);
 
-  void CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
-                               bool EnteringContext, QualType BaseType);
+  void CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS, bool EnteringContext,
+                               QualType BaseType, QualType PreferredType);
   void CodeCompleteUsing(Scope *S);
   void CodeCompleteUsingDirective(Scope *S);
   void CodeCompleteNamespaceDecl(Scope *S);
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 6173a6cd63ed5..da39e2e2237b9 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -232,13 +232,16 @@ bool Parser::ParseOptionalCXXScopeSpecifier(CXXScopeSpec &SS,
     HasScopeSpecifier = true;
   }
 
+  // Preferred type might change when parsing qualifiers, we need the original.
+  auto SavedType = PreferredType;
   while (true) {
     if (HasScopeSpecifier) {
       if (Tok.is(tok::code_completion)) {
         // Code completion for a nested-name-specifier, where the code
         // completion token follows the '::'.
         Actions.CodeCompleteQualifiedId(getCurScope(), SS, EnteringContext,
-                                        ObjectType.get());
+                                        ObjectType.get(),
+                                        SavedType.get(SS.getBeginLoc()));
         // Include code completion token into the range of the scope otherwise
         // when we try to annotate the scope tokens the dangling code completion
         // token will cause assertion in
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 3682dd0f4d4c9..4f474032f6066 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -5215,7 +5215,8 @@ void Sema::CodeCompleteAfterIf(Scope *S) {
 }
 
 void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
-                                   bool EnteringContext, QualType BaseType) {
+                                   bool EnteringContext, QualType BaseType,
+                                   QualType PreferredType) {
   if (SS.isEmpty() || !CodeCompleter)
     return;
 
@@ -5224,13 +5225,15 @@ void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
   // it can be useful for global code completion which have information about
   // contexts/symbols that are not in the AST.
   if (SS.isInvalid()) {
-    CodeCompletionContext CC(CodeCompletionContext::CCC_Symbol);
+    CodeCompletionContext CC(CodeCompletionContext::CCC_Symbol, PreferredType);
     CC.setCXXScopeSpecifier(SS);
     // As SS is invalid, we try to collect accessible contexts from the current
     // scope with a dummy lookup so that the completion consumer can try to
     // guess what the specified scope is.
     ResultBuilder DummyResults(*this, CodeCompleter->getAllocator(),
                                CodeCompleter->getCodeCompletionTUInfo(), CC);
+    if (!PreferredType.isNull())
+      DummyResults.setPreferredType(PreferredType);
     if (S->getEntity()) {
       CodeCompletionDeclConsumer Consumer(DummyResults, S->getEntity(),
                                           BaseType);
@@ -5253,9 +5256,12 @@ void Sema::CodeCompleteQualifiedId(Scope *S, CXXScopeSpec &SS,
   if (!isDependentScopeSpecifier(SS) && RequireCompleteDeclContext(SS, Ctx))
     return;
 
-  ResultBuilder Results(*this, CodeCompleter->getAllocator(),
-                        CodeCompleter->getCodeCompletionTUInfo(),
-                        CodeCompletionContext::CCC_Symbol);
+  ResultBuilder Results(
+      *this, CodeCompleter->getAllocator(),
+      CodeCompleter->getCodeCompletionTUInfo(),
+      CodeCompletionContext(CodeCompletionContext::CCC_Symbol, PreferredType));
+  if (!PreferredType.isNull())
+    Results.setPreferredType(PreferredType);
   Results.EnterNewScope();
 
   // The "template" keyword can follow "::" in the grammar, but only
diff --git a/clang/unittests/Sema/CodeCompleteTest.cpp b/clang/unittests/Sema/CodeCompleteTest.cpp
index 4e1068f4a3945..1d0e732384916 100644
--- a/clang/unittests/Sema/CodeCompleteTest.cpp
+++ b/clang/unittests/Sema/CodeCompleteTest.cpp
@@ -454,5 +454,31 @@ TEST(PreferredTypeTest, FunctionArguments) {
     }
   )cpp";
   EXPECT_THAT(collectPreferredTypes(Code), Each("volatile double *"));
+
+  Code = R"cpp(
+    namespace ns {
+      struct vector {
+      };
+    }
+    void accepts_vector(ns::vector);
+
+    void test() {
+      accepts_vector(^::^ns::^vector());
+    }
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(Code), Each("ns::vector"));
+
+  Code = R"cpp(
+    template <class T>
+    struct vector { using self = vector; };
+
+    void accepts_vector(vector<int>);
+    int foo(int);
+
+    void test() {
+      accepts_vector(^::^vector<decltype(foo(1))>::^self);
+    }
+  )cpp";
+  EXPECT_THAT(collectPreferredTypes(Code), Each("vector<int>"));
 }
 } // namespace

From a2ca6e7803b7e5583bbc38d68002d32976a6f4b0 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 28 May 2019 15:30:04 +0000
Subject: [PATCH 0369/1176] llvm-undname: Support demangling char8_t

Ports clang's mangling support added in r354633 to llvm-undname.

llvm-svn: 361839
---
 llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 1 +
 llvm/lib/Demangle/MicrosoftDemangle.cpp             | 2 ++
 llvm/lib/Demangle/MicrosoftDemangleNodes.cpp        | 1 +
 llvm/test/Demangle/ms-cxx20.test                    | 3 +++
 4 files changed, 7 insertions(+)

diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 63111bc559100..df384e7362a72 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -82,6 +82,7 @@ enum class PrimitiveKind {
   Char,
   Schar,
   Uchar,
+  Char8,
   Char16,
   Char32,
   Short,
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 4abb48e575733..425c7d3fd19bc 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -1944,6 +1944,8 @@ PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
     case 'W':
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
+    case 'Q':
+      return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
     case 'S':
       return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
     case 'U':
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 2c1e5f44f4227..c26151c5b19f1 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -130,6 +130,7 @@ void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char, "char");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Schar, "signed char");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uchar, "unsigned char");
+    OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char8, "char8_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char16, "char16_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char32, "char32_t");
     OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Short, "short");
diff --git a/llvm/test/Demangle/ms-cxx20.test b/llvm/test/Demangle/ms-cxx20.test
index 7e116f1ebcd49..587f41aa49bae 100644
--- a/llvm/test/Demangle/ms-cxx20.test
+++ b/llvm/test/Demangle/ms-cxx20.test
@@ -7,3 +7,6 @@
 
 ??__MS@@QEAA?AVstrong_ordering@std@@AEBU0@@Z'
 ; CHECK: class std::strong_ordering __cdecl S::operator<=>(struct S const &)
+
+?f@@YAX_Q@Z
+; CHECK: void __cdecl f(char8_t)

From 756565d47079044b31b88bbcb8f71518c9526bd8 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 15:32:11 +0000
Subject: [PATCH 0370/1176] Fix 'warning: comparison is always true due to
 limited range of data type [-Wtype-limits]' with GCC 7.3

llvm-svn: 361840
---
 lld/COFF/Chunks.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 688d69ba087ef..901333e041706 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -860,7 +860,7 @@ MergeChunk::MergeChunk(uint32_t Alignment)
 void MergeChunk::addSection(SectionChunk *C) {
   assert(isPowerOf2_32(C->getAlignment()));
   uint8_t P2Align = llvm::Log2_32(C->getAlignment());
-  assert(P2Align >= 0 && P2Align < array_lengthof(Instances));
+  assert(P2Align < array_lengthof(Instances));
   auto *&MC = Instances[P2Align];
   if (!MC)
     MC = make<MergeChunk>(C->getAlignment());

From 8534675cefb427f33c4fa083d2751ef09acccaf5 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 15:33:37 +0000
Subject: [PATCH 0371/1176] [clangd] Place cursor better after completing
 patterns

Summary:
By producing the $0 marker in the snippets at the last placeholder.
This produces nicer results in most cases, e.g. for
   namespace <#name#> {
     <#decls#>
   }

we now produce ${0:decls} instead of ${2:decls} and the final cursor
placement is more convenient.

Reviewers: hokein

Reviewed By: hokein

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62389

llvm-svn: 361841
---
 clang-tools-extra/clangd/CodeComplete.cpp     |  3 ++-
 .../clangd/CodeCompletionStrings.cpp          | 27 ++++++++++++++++---
 .../clangd/CodeCompletionStrings.h            |  8 ++++--
 .../clangd/unittests/CodeCompleteTests.cpp    | 22 +++++++++++++++
 .../unittests/CodeCompletionStringsTests.cpp  | 25 +++++++++++++++--
 5 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 7f811c31de5bc..4328a647f4bbe 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -394,8 +394,9 @@ struct CodeCompletionBuilder {
     Bundled.emplace_back();
     BundledEntry &S = Bundled.back();
     if (C.SemaResult) {
+      bool IsPattern = C.SemaResult->Kind == CodeCompletionResult::RK_Pattern;
       getSignature(*SemaCCS, &S.Signature, &S.SnippetSuffix,
-                   &Completion.RequiredQualifier);
+                   &Completion.RequiredQualifier, IsPattern);
       S.ReturnType = getReturnType(*SemaCCS);
     } else if (C.IndexResult) {
       S.Signature = C.IndexResult->Signature;
diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
index 586be67e92c30..bf3cabc269820 100644
--- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp
+++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
@@ -11,6 +11,8 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/RawCommentList.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Sema/CodeCompleteConsumer.h"
+#include <limits>
 #include <utility>
 
 namespace clang {
@@ -73,8 +75,23 @@ std::string getDeclComment(const ASTContext &Ctx, const NamedDecl &Decl) {
 }
 
 void getSignature(const CodeCompletionString &CCS, std::string *Signature,
-                  std::string *Snippet, std::string *RequiredQualifiers) {
-  unsigned ArgCount = 0;
+                  std::string *Snippet, std::string *RequiredQualifiers,
+                  bool CompletingPattern) {
+  // Placeholder with this index will be ${0:…} to mark final cursor position.
+  // Usually we do not add $0, so the cursor is placed at end of completed text.
+  unsigned CursorSnippetArg = std::numeric_limits<unsigned>::max();
+  if (CompletingPattern) {
+    // In patterns, it's best to place the cursor at the last placeholder, to
+    // handle cases like
+    //    namespace ${1:name} {
+    //      ${0:decls}
+    //    }
+    CursorSnippetArg =
+        llvm::count_if(CCS, [](const CodeCompletionString::Chunk &C) {
+          return C.Kind == CodeCompletionString::CK_Placeholder;
+        });
+  }
+  unsigned SnippetArg = 0;
   bool HadObjCArguments = false;
   for (const auto &Chunk : CCS) {
     // Informative qualifier chunks only clutter completion results, skip
@@ -124,8 +141,10 @@ void getSignature(const CodeCompletionString &CCS, std::string *Signature,
       break;
     case CodeCompletionString::CK_Placeholder:
       *Signature += Chunk.Text;
-      ++ArgCount;
-      *Snippet += "${" + std::to_string(ArgCount) + ':';
+      ++SnippetArg;
+      *Snippet +=
+          "${" +
+          std::to_string(SnippetArg == CursorSnippetArg ? 0 : SnippetArg) + ':';
       appendEscapeSnippet(Chunk.Text, Snippet);
       *Snippet += '}';
       break;
diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.h b/clang-tools-extra/clangd/CodeCompletionStrings.h
index 153e0af1189e6..6733d0231df49 100644
--- a/clang-tools-extra/clangd/CodeCompletionStrings.h
+++ b/clang-tools-extra/clangd/CodeCompletionStrings.h
@@ -38,12 +38,16 @@ std::string getDeclComment(const ASTContext &Ctx, const NamedDecl &D);
 /// Formats the signature for an item, as a display string and snippet.
 /// e.g. for const_reference std::vector<T>::at(size_type) const, this returns:
 ///   *Signature = "(size_type) const"
-///   *Snippet = "(${0:size_type})"
+///   *Snippet = "(${1:size_type})"
 /// If set, RequiredQualifiers is the text that must be typed before the name.
 /// e.g "Base::" when calling a base class member function that's hidden.
+///
+/// When \p CompletingPattern is true, the last placeholder will be of the form
+/// ${0:…}, indicating the cursor should stay there.
 void getSignature(const CodeCompletionString &CCS, std::string *Signature,
                   std::string *Snippet,
-                  std::string *RequiredQualifiers = nullptr);
+                  std::string *RequiredQualifiers = nullptr,
+                  bool CompletingPattern = false);
 
 /// Assembles formatted documentation for a completion result. This includes
 /// documentation comments and other relevant information like annotations.
diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 8f8376e25bc4c..4dcb8701a3594 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -2382,6 +2382,28 @@ TEST(CompletionTest, ObjectiveCMethodTwoArgumentsFromMiddle) {
   EXPECT_THAT(C, ElementsAre(SnippetSuffix("${1:(unsigned int)}")));
 }
 
+TEST(CompletionTest, CursorInSnippets) {
+  clangd::CodeCompleteOptions Options;
+  Options.EnableSnippets = true;
+  auto Results = completions(
+      R"cpp(
+    void while_foo(int a, int b);
+    void test() {
+      whil^
+    })cpp",
+      /*IndexSymbols=*/{}, Options);
+
+  // Last placeholder in code patterns should be $0 to put the cursor there.
+  EXPECT_THAT(
+      Results.Completions,
+      Contains(AllOf(Named("while"),
+                     SnippetSuffix("(${1:condition}){${0:statements}\n}"))));
+  // However, snippets for functions must *not* end with $0.
+  EXPECT_THAT(Results.Completions,
+              Contains(AllOf(Named("while_foo"),
+                             SnippetSuffix("(${1:int a}, ${2:int b})"))));
+}
+
 TEST(CompletionTest, WorksWithNullType) {
   auto R = completions(R"cpp(
     int main() {
diff --git a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
index 43429c864655c..83b3826f6fc2b 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
@@ -22,10 +22,12 @@ class CompletionStringTest : public ::testing::Test {
         CCTUInfo(Allocator), Builder(*Allocator, CCTUInfo) {}
 
 protected:
-  void computeSignature(const CodeCompletionString &CCS) {
+  void computeSignature(const CodeCompletionString &CCS,
+                        bool CompletingPattern = false) {
     Signature.clear();
     Snippet.clear();
-    getSignature(CCS, &Signature, &Snippet);
+    getSignature(CCS, &Signature, &Snippet, /*RequiredQualifier=*/nullptr,
+                 CompletingPattern);
   }
 
   std::shared_ptr<clang::GlobalCodeCompletionAllocator> Allocator;
@@ -99,6 +101,25 @@ TEST_F(CompletionStringTest, EscapeSnippet) {
   EXPECT_EQ(Snippet, "(${1:\\$p\\}1\\\\})");
 }
 
+TEST_F(CompletionStringTest, SnippetsInPatterns) {
+  auto MakeCCS = [this]() -> const CodeCompletionString & {
+    CodeCompletionBuilder Builder(*Allocator, CCTUInfo);
+    Builder.AddTypedTextChunk("namespace");
+    Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
+    Builder.AddPlaceholderChunk("name");
+    Builder.AddChunk(CodeCompletionString::CK_Equal);
+    Builder.AddPlaceholderChunk("target");
+    Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+    return *Builder.TakeString();
+  };
+  computeSignature(MakeCCS(), /*CompletingPattern=*/false);
+  EXPECT_EQ(Snippet, " ${1:name} = ${2:target};");
+
+  // When completing a pattern, the last placeholder holds the cursor position.
+  computeSignature(MakeCCS(), /*CompletingPattern=*/true);
+  EXPECT_EQ(Snippet, " ${1:name} = ${0:target};");
+}
+
 TEST_F(CompletionStringTest, IgnoreInformativeQualifier) {
   Builder.AddTypedTextChunk("X");
   Builder.AddInformativeChunk("info ok");

From ebe22a1774ed433534a63af0bf5fdc5b5bd821b4 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 15:35:23 +0000
Subject: [PATCH 0372/1176] [LLD][COFF] Early load PDB type server files

We need to have all input files ready before doing debuginfo type merging.
This patch is moving the late PDB type server discovery much earlier in the process, when the explicit inputs (OBJs, LIBs) are loaded.
The short term goal is to parallelize type merging.

Differential Revision: https://reviews.llvm.org/D60095

llvm-svn: 361842
---
 lld/COFF/DebugTypes.cpp                       | 213 ++++++++++++++++--
 lld/COFF/DebugTypes.h                         |  29 ++-
 lld/COFF/Driver.cpp                           |   4 +
 lld/COFF/Driver.h                             |   4 +-
 lld/COFF/PDB.cpp                              | 120 +---------
 .../pdb-type-server-valid-signature.yaml      |   2 +-
 .../pdb-type-server-invalid-signature.yaml    |   4 +-
 7 files changed, 235 insertions(+), 141 deletions(-)

diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 34f32ea085ca3..4e215194b70c3 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -7,8 +7,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "DebugTypes.h"
+#include "Driver.h"
 #include "InputFiles.h"
+#include "lld/Common/ErrorHandler.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/Support/Path.h"
 
 using namespace lld;
 using namespace lld::coff;
@@ -16,14 +23,44 @@ using namespace llvm;
 using namespace llvm::codeview;
 
 namespace {
+// The TypeServerSource class represents a PDB type server, a file referenced by
+// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
+// files, therefore there must be only once instance per OBJ lot. The file path
+// is discovered from the dependent OBJ's debug type stream. The
+// TypeServerSource object is then queued and loaded by the COFF Driver. The
+// debug type stream for such PDB files will be merged first in the final PDB,
+// before any dependent OBJ.
 class TypeServerSource : public TpiSource {
 public:
-  TypeServerSource(ObjFile *F) : TpiSource(PDB, F) {}
+  explicit TypeServerSource(MemoryBufferRef M, llvm::pdb::NativeSession *S)
+      : TpiSource(PDB, nullptr), Session(S), MB(M) {}
+
+  // Queue a PDB type server for loading in the COFF Driver
+  static void enqueue(const ObjFile *DependentFile,
+                      const TypeServer2Record &TS);
+
+  // Create an instance
+  static Expected<TypeServerSource *> getInstance(MemoryBufferRef M);
+
+  // Fetch the PDB instance loaded for a corresponding dependent OBJ.
+  static Expected<TypeServerSource *>
+  findFromFile(const ObjFile *DependentFile);
+
+  static std::map<std::string, std::pair<std::string, TypeServerSource *>>
+      Instances;
+
+  // The interface to the PDB (if it was opened successfully)
+  std::unique_ptr<llvm::pdb::NativeSession> Session;
+
+private:
+  MemoryBufferRef MB;
 };
 
+// This class represents the debug type stream of an OBJ file that depends on a
+// PDB type server (see TypeServerSource).
 class UseTypeServerSource : public TpiSource {
 public:
-  UseTypeServerSource(ObjFile *F, TypeServer2Record *TS)
+  UseTypeServerSource(const ObjFile *F, const TypeServer2Record *TS)
       : TpiSource(UsingPDB, F), TypeServerDependency(*TS) {}
 
   // Information about the PDB type server dependency, that needs to be loaded
@@ -31,14 +68,20 @@ class UseTypeServerSource : public TpiSource {
   TypeServer2Record TypeServerDependency;
 };
 
+// This class represents the debug type stream of a Microsoft precompiled
+// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
+// PDB, before any other OBJs that depend on this. Note that only MSVC generate
+// such files, clang does not.
 class PrecompSource : public TpiSource {
 public:
-  PrecompSource(ObjFile *F) : TpiSource(PCH, F) {}
+  PrecompSource(const ObjFile *F) : TpiSource(PCH, F) {}
 };
 
+// This class represents the debug type stream of an OBJ file that depends on a
+// Microsoft precompiled headers OBJ (see PrecompSource).
 class UsePrecompSource : public TpiSource {
 public:
-  UsePrecompSource(ObjFile *F, PrecompRecord *Precomp)
+  UsePrecompSource(const ObjFile *F, const PrecompRecord *Precomp)
       : TpiSource(UsingPCH, F), PrecompDependency(*Precomp) {}
 
   // Information about the Precomp OBJ dependency, that needs to be loaded in
@@ -49,40 +92,176 @@ class UsePrecompSource : public TpiSource {
 
 static std::vector<std::unique_ptr<TpiSource>> GC;
 
-TpiSource::TpiSource(TpiKind K, ObjFile *F) : Kind(K), File(F) {
+TpiSource::TpiSource(TpiKind K, const ObjFile *F) : Kind(K), File(F) {
   GC.push_back(std::unique_ptr<TpiSource>(this));
 }
 
-TpiSource *coff::makeTpiSource(ObjFile *F) {
+TpiSource *lld::coff::makeTpiSource(const ObjFile *F) {
   return new TpiSource(TpiSource::Regular, F);
 }
 
-TpiSource *coff::makeTypeServerSource(ObjFile *F) {
-  return new TypeServerSource(F);
-}
-
-TpiSource *coff::makeUseTypeServerSource(ObjFile *F, TypeServer2Record *TS) {
+TpiSource *lld::coff::makeUseTypeServerSource(const ObjFile *F,
+                                              const TypeServer2Record *TS) {
+  TypeServerSource::enqueue(F, *TS);
   return new UseTypeServerSource(F, TS);
 }
 
-TpiSource *coff::makePrecompSource(ObjFile *F) { return new PrecompSource(F); }
+TpiSource *lld::coff::makePrecompSource(const ObjFile *F) {
+  return new PrecompSource(F);
+}
 
-TpiSource *coff::makeUsePrecompSource(ObjFile *F, PrecompRecord *Precomp) {
+TpiSource *lld::coff::makeUsePrecompSource(const ObjFile *F,
+                                           const PrecompRecord *Precomp) {
   return new UsePrecompSource(F, Precomp);
 }
 
 namespace lld {
 namespace coff {
 template <>
-const PrecompRecord &retrieveDependencyInfo(TpiSource *Source) {
+const PrecompRecord &retrieveDependencyInfo(const TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPCH);
-  return ((UsePrecompSource *)Source)->PrecompDependency;
+  return ((const UsePrecompSource *)Source)->PrecompDependency;
 }
 
 template <>
-const TypeServer2Record &retrieveDependencyInfo(TpiSource *Source) {
+const TypeServer2Record &retrieveDependencyInfo(const TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPDB);
-  return ((UseTypeServerSource *)Source)->TypeServerDependency;
+  return ((const UseTypeServerSource *)Source)->TypeServerDependency;
 }
 } // namespace coff
 } // namespace lld
+
+std::map<std::string, std::pair<std::string, TypeServerSource *>>
+    TypeServerSource::Instances;
+
+// Make a PDB path assuming the PDB is in the same folder as the OBJ
+static std::string getPdbBaseName(const ObjFile *File, StringRef TSPath) {
+  StringRef LocalPath =
+      !File->ParentName.empty() ? File->ParentName : File->getName();
+  std::string Path = sys::path::parent_path(LocalPath);
+
+  // Currently, type server PDBs are only created by MSVC cl, which only runs
+  // on Windows, so we can assume type server paths are Windows style.
+  return Path + sys::path::filename(TSPath, sys::path::Style::windows).str();
+}
+
+// The casing of the PDB path stamped in the OBJ can differ from the actual path
+// on disk. With this, we ensure to always use lowercase as a key for the
+// PDBInputFile::Instances map, at least on Windows.
+static std::string normalizePdbPath(StringRef path) {
+#if defined(_WIN32)
+  return path.lower();
+#else // LINUX
+  return path;
+#endif
+}
+
+// If existing, return the actual PDB path on disk.
+static Optional<std::string> findPdbPath(StringRef PDBPath,
+                                         const ObjFile *DependentFile) {
+  // Ensure the file exists before anything else. In some cases, if the path
+  // points to a removable device, Driver::enqueuePath() would fail with an
+  // error (EAGAIN, "resource unavailable try again") which we want to skip
+  // silently.
+  if (llvm::sys::fs::exists(PDBPath))
+    return normalizePdbPath(PDBPath);
+  std::string Ret = getPdbBaseName(DependentFile, PDBPath);
+  if (llvm::sys::fs::exists(Ret))
+    return normalizePdbPath(Ret);
+  return None;
+}
+
+// Fetch the PDB instance that was already loaded by the COFF Driver.
+Expected<TypeServerSource *>
+TypeServerSource::findFromFile(const ObjFile *DependentFile) {
+  const TypeServer2Record &TS =
+      retrieveDependencyInfo<TypeServer2Record>(DependentFile->DebugTypesObj);
+
+  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
+  if (!P)
+    return createFileError(TS.Name, errorCodeToError(std::error_code(
+                                        ENOENT, std::generic_category())));
+
+  auto It = TypeServerSource::Instances.find(*P);
+  // The PDB file exists on disk, at this point we expect it to have been
+  // inserted in the map by TypeServerSource::loadPDB()
+  assert(It != TypeServerSource::Instances.end());
+
+  std::pair<std::string, TypeServerSource *> &PDB = It->second;
+
+  if (!PDB.second)
+    return createFileError(
+        *P, createStringError(inconvertibleErrorCode(), PDB.first.c_str()));
+
+  pdb::PDBFile &PDBFile = (PDB.second)->Session->getPDBFile();
+  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
+
+  // Just because a file with a matching name was found doesn't mean it can be
+  // used. The GUID and Age must match between the PDB header and the OBJ
+  // TypeServer2 record. The 'Age' is used by MSVC incremental compilation.
+  if (Info.getGuid() != TS.getGuid() || Info.getAge() != TS.getAge())
+    return createFileError(
+        TS.Name,
+        make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
+
+  return PDB.second;
+}
+
+// FIXME: Temporary interface until PDBLinker::maybeMergeTypeServerPDB() is
+// moved here.
+Expected<llvm::pdb::NativeSession *>
+lld::coff::findTypeServerSource(const ObjFile *F) {
+  Expected<TypeServerSource *> TS = TypeServerSource::findFromFile(F);
+  if (!TS)
+    return TS.takeError();
+  return TS.get()->Session.get();
+}
+
+// Queue a PDB type server for loading in the COFF Driver
+void TypeServerSource::enqueue(const ObjFile *DependentFile,
+                               const TypeServer2Record &TS) {
+  // Start by finding where the PDB is located (either the record path or next
+  // to the OBJ file)
+  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
+  if (!P)
+    return;
+  auto It = TypeServerSource::Instances.emplace(
+      *P, std::pair<std::string, TypeServerSource *>{});
+  if (!It.second)
+    return; // another OBJ already scheduled this PDB for load
+
+  Driver->enqueuePath(*P, false);
+}
+
+// Create an instance of TypeServerSource or an error string if the PDB couldn't
+// be loaded. The error message will be displayed later, when the referring OBJ
+// will be merged in. NOTE - a PDB load failure is not a link error: some
+// debug info will simply be missing from the final PDB - that is the default
+// accepted behavior.
+void lld::coff::loadTypeServerSource(llvm::MemoryBufferRef M) {
+  std::string Path = normalizePdbPath(M.getBufferIdentifier());
+
+  Expected<TypeServerSource *> TS = TypeServerSource::getInstance(M);
+  if (!TS)
+    TypeServerSource::Instances[Path] = {toString(TS.takeError()), nullptr};
+  else
+    TypeServerSource::Instances[Path] = {{}, *TS};
+}
+
+Expected<TypeServerSource *> TypeServerSource::getInstance(MemoryBufferRef M) {
+  std::unique_ptr<llvm::pdb::IPDBSession> ISession;
+  Error Err = pdb::NativeSession::createFromPdb(
+      MemoryBuffer::getMemBuffer(M, false), ISession);
+  if (Err)
+    return std::move(Err);
+
+  std::unique_ptr<llvm::pdb::NativeSession> Session(
+      static_cast<pdb::NativeSession *>(ISession.release()));
+
+  pdb::PDBFile &PDBFile = Session->getPDBFile();
+  Expected<pdb::InfoStream &> Info = PDBFile.getPDBInfoStream();
+  // All PDB Files should have an Info stream.
+  if (!Info)
+    return Info.takeError();
+  return new TypeServerSource(M, Session.release());
+}
diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h
index 0505a354257af..cb03aba5b0d2d 100644
--- a/lld/COFF/DebugTypes.h
+++ b/lld/COFF/DebugTypes.h
@@ -10,12 +10,16 @@
 #define LLD_COFF_DEBUGTYPES_H
 
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace codeview {
 class PrecompRecord;
 class TypeServer2Record;
 } // namespace codeview
+namespace pdb {
+class NativeSession;
+}
 } // namespace llvm
 
 namespace lld {
@@ -27,23 +31,28 @@ class TpiSource {
 public:
   enum TpiKind { Regular, PCH, UsingPCH, PDB, UsingPDB };
 
-  TpiSource(TpiKind K, ObjFile *F);
+  TpiSource(TpiKind K, const ObjFile *F);
   virtual ~TpiSource() {}
 
   const TpiKind Kind;
-  ObjFile *File;
+  const ObjFile *File;
 };
 
-TpiSource *makeTpiSource(ObjFile *F);
-TpiSource *makeTypeServerSource(ObjFile *F);
-TpiSource *makeUseTypeServerSource(ObjFile *F,
-                                   llvm::codeview::TypeServer2Record *TS);
-TpiSource *makePrecompSource(ObjFile *F);
-TpiSource *makeUsePrecompSource(ObjFile *F,
-                                llvm::codeview::PrecompRecord *Precomp);
+TpiSource *makeTpiSource(const ObjFile *F);
+TpiSource *makeUseTypeServerSource(const ObjFile *F,
+                                   const llvm::codeview::TypeServer2Record *TS);
+TpiSource *makePrecompSource(const ObjFile *F);
+TpiSource *makeUsePrecompSource(const ObjFile *F,
+                                const llvm::codeview::PrecompRecord *Precomp);
+
+void loadTypeServerSource(llvm::MemoryBufferRef M);
 
 // Temporary interface to get the dependency
-template <typename T> const T &retrieveDependencyInfo(TpiSource *Source);
+template <typename T> const T &retrieveDependencyInfo(const TpiSource *Source);
+
+// Temporary interface until we move PDBLinker::maybeMergeTypeServerPDB here
+llvm::Expected<llvm::pdb::NativeSession *>
+findTypeServerSource(const ObjFile *F);
 
 } // namespace coff
 } // namespace lld
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index df374f518d94d..7f7fde12980d6 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "DebugTypes.h"
 #include "Driver.h"
 #include "Config.h"
 #include "ICF.h"
@@ -181,6 +182,9 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB,
   case file_magic::coff_import_library:
     Symtab->addFile(make<ObjFile>(MBRef));
     break;
+  case file_magic::pdb:
+    loadTypeServerSource(MBRef);
+    break;
   case file_magic::coff_cl_gl_object:
     error(Filename + ": is not a native COFF file. Recompile without /GL");
     break;
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index f9448bd2d3b16..f0c2ee6a7728d 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -77,6 +77,8 @@ class LinkerDriver {
 
   MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
 
+  void enqueuePath(StringRef Path, bool WholeArchive);
+
 private:
   std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro
 
@@ -120,8 +122,6 @@ class LinkerDriver {
   void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
                         StringRef ParentName, uint64_t OffsetInArchive);
 
-  void enqueuePath(StringRef Path, bool WholeArchive);
-
   void enqueueTask(std::function<void()> Task);
   bool run();
 
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 242235154d05f..39244b1736ddc 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -175,11 +175,6 @@ class PDBLinker {
 
   llvm::SmallString<128> NativePath;
 
-  /// A list of other PDBs which are loaded during the linking process and which
-  /// we need to keep around since the linking operation may reference pointers
-  /// inside of these PDBs.
-  llvm::SmallVector<std::unique_ptr<pdb::NativeSession>, 2> LoadedPDBs;
-
   std::vector<pdb::SecMapEntry> SectionMap;
 
   /// Type index mappings of type server PDBs that we've loaded so far.
@@ -189,10 +184,6 @@ class PDBLinker {
   /// far.
   std::map<uint32_t, CVIndexMap> PrecompTypeIndexMappings;
 
-  /// List of TypeServer PDBs which cannot be loaded.
-  /// Cached to prevent repeated load attempts.
-  std::map<codeview::GUID, std::string> MissingTypeServerPDBs;
-
   // For statistics
   uint64_t GlobalSymbols = 0;
   uint64_t ModuleSymbols = 0;
@@ -416,115 +407,26 @@ PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) {
   return *ObjectIndexMap;
 }
 
-static Expected<std::unique_ptr<pdb::NativeSession>>
-tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) {
-  // Ensure the file exists before anything else. We want to return ENOENT,
-  // "file not found", even if the path points to a removable device (in which
-  // case the return message would be EAGAIN, "resource unavailable try again")
-  if (!llvm::sys::fs::exists(TSPath))
-    return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
-
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(
-      TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
-  if (!MBOrErr)
-    return errorCodeToError(MBOrErr.getError());
-
-  std::unique_ptr<pdb::IPDBSession> ThisSession;
-  if (auto EC = pdb::NativeSession::createFromPdb(
-          MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)),
-                                     /*RequiresNullTerminator=*/false),
-          ThisSession))
-    return std::move(EC);
-
-  std::unique_ptr<pdb::NativeSession> NS(
-      static_cast<pdb::NativeSession *>(ThisSession.release()));
-  pdb::PDBFile &File = NS->getPDBFile();
-  auto ExpectedInfo = File.getPDBInfoStream();
-  // All PDB Files should have an Info stream.
-  if (!ExpectedInfo)
-    return ExpectedInfo.takeError();
-
-  // Just because a file with a matching name was found and it was an actual
-  // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
-  // must match the GUID specified in the TypeServer2 record.
-  if (ExpectedInfo->getGuid() != GuidFromObj)
-    return make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date);
-
-  return std::move(NS);
-}
-
 Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File) {
-  const TypeServer2Record &TS =
-      retrieveDependencyInfo<TypeServer2Record>(File->DebugTypesObj);
+  Expected<llvm::pdb::NativeSession *> PDBSession = findTypeServerSource(File);
+  if (!PDBSession)
+    return PDBSession.takeError();
 
-  const codeview::GUID &TSId = TS.getGuid();
-  StringRef TSPath = TS.getName();
+  pdb::PDBFile &PDBFile = PDBSession.get()->getPDBFile();
+  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
 
-  // First, check if the PDB has previously failed to load.
-  auto PrevErr = MissingTypeServerPDBs.find(TSId);
-  if (PrevErr != MissingTypeServerPDBs.end())
-    return createFileError(
-        TSPath,
-        make_error<StringError>(PrevErr->second, inconvertibleErrorCode()));
-
-  // Second, check if we already loaded a PDB with this GUID. Return the type
-  // index mapping if we have it.
-  auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()});
-  CVIndexMap &IndexMap = Insertion.first->second;
-  if (!Insertion.second)
-    return IndexMap;
+  auto It = TypeServerIndexMappings.emplace(Info.getGuid(), CVIndexMap());
+  CVIndexMap &IndexMap = It.first->second;
+  if (!It.second)
+    return IndexMap; // already merged
 
   // Mark this map as a type server map.
   IndexMap.IsTypeServerMap = true;
 
-  // Check for a PDB at:
-  // 1. The given file path
-  // 2. Next to the object file or archive file
-  auto ExpectedSession = handleExpected(
-      tryToLoadPDB(TSId, TSPath),
-      [&]() {
-        StringRef LocalPath =
-            !File->ParentName.empty() ? File->ParentName : File->getName();
-        SmallString<128> Path = sys::path::parent_path(LocalPath);
-        // Currently, type server PDBs are only created by cl, which only runs
-        // on Windows, so we can assume type server paths are Windows style.
-        sys::path::append(
-            Path, sys::path::filename(TSPath, sys::path::Style::windows));
-        return tryToLoadPDB(TSId, Path);
-      },
-      [&](std::unique_ptr<ECError> EC) -> Error {
-        auto SysErr = EC->convertToErrorCode();
-        // Only re-try loading if the previous error was "No such file or
-        // directory"
-        if (SysErr.category() == std::generic_category() &&
-            SysErr.value() == ENOENT)
-          return Error::success();
-        return Error(std::move(EC));
-      });
-
-  if (auto E = ExpectedSession.takeError()) {
-    TypeServerIndexMappings.erase(TSId);
-
-    // Flatten the error to a string, for later display, if the error occurs
-    // again on the same PDB.
-    std::string ErrMsg;
-    raw_string_ostream S(ErrMsg);
-    S << E;
-    MissingTypeServerPDBs.emplace(TSId, S.str());
-
-    return createFileError(TSPath, std::move(E));
-  }
-
-  pdb::NativeSession *Session = ExpectedSession->get();
-
-  // Keep a strong reference to this PDB, so that it's safe to hold pointers
-  // into the file.
-  LoadedPDBs.push_back(std::move(*ExpectedSession));
-
-  auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream();
+  Expected<pdb::TpiStream &> ExpectedTpi = PDBFile.getPDBTpiStream();
   if (auto E = ExpectedTpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
-  auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream();
+  Expected<pdb::TpiStream &> ExpectedIpi = PDBFile.getPDBIpiStream();
   if (auto E = ExpectedIpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
 
diff --git a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
index dd95a3df8893c..23656d1807cff 100644
--- a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
+++ b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
@@ -69,7 +69,7 @@ sections:
       - Kind:            LF_TYPESERVER2
         TypeServer2:
           Guid:            '{8DABD2A0-28FF-CB43-9BAF-175B77B76414}'
-          Age:             18
+          Age:             1
           Name:            'pdb-diff-cl.pdb'
   - Name:            '.text$mn'
     Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
diff --git a/lld/test/COFF/pdb-type-server-invalid-signature.yaml b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
index 247e00096235c..efac72df5bd61 100644
--- a/lld/test/COFF/pdb-type-server-invalid-signature.yaml
+++ b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
@@ -4,7 +4,7 @@
 # RUN: lld-link %t.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s
 # RUN: cd %S
 
-# CHECK: warning: Cannot use debug info for {{.*}}.obj
+# CHECK: warning: Cannot use debug info for '{{.*}}.obj'
 # CHECK-NEXT: The signature does not match; the file(s) might be out of date
 
 # Also test a valid match
@@ -14,7 +14,7 @@
 # RUN: lld-link %t2.obj -out:%t2.exe -debug -pdb:%t2.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=VALID-SIGNATURE -allow-empty
 # RUN: cd %S
 
-# VALID-SIGNATURE-NOT: warning: Cannot use debug info for {{.*}}.obj
+# VALID-SIGNATURE-NOT: warning: Cannot use debug info for '{{.*}}.obj'
 # VALID-SIGNATURE-NOT: The signature does not match; the file(s) might be out of date
 
 --- !COFF

From 561fcc0d63caca46e46a746db482a1c6895b2ac4 Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Tue, 28 May 2019 15:37:01 +0000
Subject: [PATCH 0373/1176] [X86-64] Fix 256-bit SET0 lowering for non-VLX
 targets

If we don't have VLX then 256-bit SET0 should be lowered
to VPXOR with ZMM registers.  This restores functionality
accidentally removed by r309926.

Differential Revision: https://reviews.llvm.org/D62415

llvm-svn: 361843
---
 llvm/lib/Target/X86/X86InstrInfo.cpp       |  6 ++
 llvm/test/CodeGen/X86/avx512f-256-set0.mir | 66 ++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/avx512f-256-set0.mir

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4ec80d90d9a0b..20d3cf0d92788 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3932,6 +3932,12 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MIB.addReg(SrcReg, RegState::ImplicitDefine);
       return true;
     }
+    if (MI.getOpcode() == X86::AVX512_256_SET0) {
+      // No VLX so we must reference a zmm.
+      unsigned ZReg =
+        TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+      MIB->getOperand(0).setReg(ZReg);
+    }
     return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
   }
   case X86::V_SETALLONES:
diff --git a/llvm/test/CodeGen/X86/avx512f-256-set0.mir b/llvm/test/CodeGen/X86/avx512f-256-set0.mir
new file mode 100644
index 0000000000000..6ba37b0360c72
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512f-256-set0.mir
@@ -0,0 +1,66 @@
+# Test that we emit VPXORD with ZMM registers instead of YMM
+# registers when we do not have VLX.
+#
+# RUN: llc -mtriple=x86_64-- -mattr=+avx512f -o - %s | FileCheck %s
+# CHECK: vpxord %zmm16, %zmm16, %zmm16
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  @tst_ = common global [4 x i64] zeroinitializer, align 64
+
+  define void @main() #0 {
+  bb0:
+    %gep1 = bitcast [4 x i64]* @tst_ to [4 x i64]*
+    %lsr.iv1 = bitcast [4 x i64]* %gep1 to <4 x i64>*
+    store <4 x i64> zeroinitializer, <4 x i64>* %lsr.iv1, align 16
+    ret void
+  }
+
+  attributes #0 = { "target-features"="+avx512f" }
+
+...
+---
+name:            main
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.bb0:
+    renamable $ymm16 = AVX512_256_SET0
+    VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store 32 into %ir.lsr.iv1, align 64)
+    RET 0
+
+...

From 448a79d123f34ed873174102bdfb4ab936c7fe36 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 28 May 2019 16:11:56 +0000
Subject: [PATCH 0374/1176] [AArch64] Delete unused VariantKind in
 AArch64MCExpr

llvm-svn: 361844
---
 llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 3 +--
 llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h   | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 20676ebb61a28..0a529321edc8f 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -79,8 +79,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
 }
 
 void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
-  if (getKind() != VK_NONE)
-    OS << getVariantKindName();
+  OS << getVariantKindName();
   Expr->print(OS, MAI);
 }
 
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 246060ace034b..ec9c959116283 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -22,8 +22,6 @@ namespace llvm {
 class AArch64MCExpr : public MCTargetExpr {
 public:
   enum VariantKind {
-    VK_NONE     = 0x000,
-
     // Symbol locations specifying (roughly speaking) what calculation should be
     // performed to construct the final address for the relocated
     // symbol. E.g. direct, via the GOT, ...

From 760df47b778a530e9368a4b8706940ba103d57ba Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Tue, 28 May 2019 16:13:20 +0000
Subject: [PATCH 0375/1176] [ARM] Replace fp-only-sp and d16 with fp64 and d32.

Those two subtarget features were awkward because their semantics are
reversed: each one indicates the _lack_ of support for something in
the architecture, rather than the presence. As a consequence, you
don't get the behavior you want if you combine two sets of feature
bits.

Each SubtargetFeature for an FP architecture version now comes in four
versions, one for each combination of those options. So you can still
say (for example) '+vfp2' in a feature string and it will mean what
it's always meant, but there's a new string '+vfp2d16sp' meaning the
version without those extra options.

A lot of this change is just mechanically replacing positive checks
for the old features with negative checks for the new ones. But one
more interesting change is that I've rearranged getFPUFeatures() so
that the main FPU feature is appended to the output list *before*
rather than after the features derived from the Restriction field, so
that -fp64 and -d32 can override defaults added by the main feature.

Reviewers: dmgreen, samparker, SjoerdMeijer

Subscribers: srhines, javed.absar, eraman, kristof.beyls, hiraditya, zzheng, Petar.Avramovic, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D60691

llvm-svn: 361845
---
 clang/lib/Basic/Targets/ARM.cpp               | 18 ++--
 clang/test/CodeGen/arm-target-features.c      | 28 +++---
 clang/test/Driver/arm-mfpu.c                  | 49 ++++++-----
 llvm/include/llvm/MC/MCSubtargetInfo.h        |  4 +
 llvm/lib/MC/MCSubtargetInfo.cpp               | 17 ++++
 llvm/lib/Object/ELFObjectFile.cpp             |  6 +-
 llvm/lib/Support/ARMTargetParser.cpp          | 39 +++++----
 llvm/lib/Target/ARM/ARM.td                    | 86 ++++++++++---------
 llvm/lib/Target/ARM/ARMAsmPrinter.cpp         |  4 +-
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |  9 +-
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp   |  4 +-
 llvm/lib/Target/ARM/ARMFastISel.cpp           | 40 +++++----
 llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp       |  6 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       | 70 +++++++--------
 llvm/lib/Target/ARM/ARMInstrInfo.td           | 24 +++---
 .../lib/Target/ARM/ARMInstructionSelector.cpp | 11 +--
 llvm/lib/Target/ARM/ARMLegalizerInfo.cpp      |  4 +-
 llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp   |  2 +-
 llvm/lib/Target/ARM/ARMSubtarget.h            | 35 +++++---
 llvm/lib/Target/ARM/ARMTargetTransformInfo.h  |  2 +-
 .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 31 +++----
 .../ARM/Disassembler/ARMDisassembler.cpp      |  4 +-
 .../ARM/MCTargetDesc/ARMTargetStreamer.cpp    | 46 +++++-----
 .../GlobalISel/arm-legalize-load-store.mir    |  2 +-
 llvm/test/CodeGen/ARM/arm-storebytesmerge.ll  |  2 +-
 llvm/test/CodeGen/ARM/arm32-rounding.ll       |  4 +-
 llvm/test/CodeGen/ARM/build-attributes.ll     | 38 ++++----
 llvm/test/CodeGen/ARM/fast-isel-call.ll       |  6 +-
 llvm/test/CodeGen/ARM/float-helpers.s         |  4 +-
 llvm/test/CodeGen/ARM/fp-only-sp.ll           |  2 +-
 llvm/test/CodeGen/ARM/fp16-instructions.ll    | 16 ++--
 llvm/test/CodeGen/ARM/fp16-promote.ll         |  2 +-
 llvm/test/CodeGen/ARM/fpconv.ll               |  2 +-
 llvm/test/CodeGen/ARM/half.ll                 |  2 +-
 .../CodeGen/ARM/inlineasm-X-allocation.ll     |  2 +-
 .../ARM/inlineasm-operand-implicit-cast.ll    |  4 +-
 llvm/test/CodeGen/ARM/no-fpu.ll               |  4 +-
 llvm/test/CodeGen/Thumb2/aapcs.ll             |  2 +-
 .../CodeGen/Thumb2/float-intrinsics-double.ll |  2 +-
 .../CodeGen/Thumb2/float-intrinsics-float.ll  |  2 +-
 llvm/test/CodeGen/Thumb2/t2sizereduction.mir  |  2 +-
 llvm/test/MC/ARM/armv8.3a-js.s                |  4 +-
 llvm/test/MC/ARM/d16.s                        |  4 +-
 llvm/test/MC/ARM/invalid-neon-v8.s            |  2 +-
 llvm/test/MC/ARM/single-precision-fp.s        |  2 +-
 llvm/test/MC/ARM/vldm-vstm-diags.s            |  2 +-
 llvm/test/Transforms/Inline/ARM/inline-fp.ll  |  2 +-
 .../LoopUnroll/runtime-epilog-debuginfo.ll    |  2 +-
 48 files changed, 353 insertions(+), 302 deletions(-)

diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 55c0d371598dc..18ad466afe709 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -400,8 +400,7 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   HasFloat16 = true;
 
   // This does not diagnose illegal cases like having both
-  // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
-  uint32_t HW_FP_remove = 0;
+  // "+vfpv2" and "+vfpv3" or having "+neon" and "-fp64".
   for (const auto &Feature : Features) {
     if (Feature == "+soft-float") {
       SoftFloat = true;
@@ -409,19 +408,19 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       SoftFloatABI = true;
     } else if (Feature == "+vfp2") {
       FPU |= VFP2FPU;
-      HW_FP |= HW_FP_SP | HW_FP_DP;
+      HW_FP |= HW_FP_SP;
     } else if (Feature == "+vfp3") {
       FPU |= VFP3FPU;
-      HW_FP |= HW_FP_SP | HW_FP_DP;
+      HW_FP |= HW_FP_SP;
     } else if (Feature == "+vfp4") {
       FPU |= VFP4FPU;
-      HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
+      HW_FP |= HW_FP_SP | HW_FP_HP;
     } else if (Feature == "+fp-armv8") {
       FPU |= FPARMV8;
-      HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
+      HW_FP |= HW_FP_SP | HW_FP_HP;
     } else if (Feature == "+neon") {
       FPU |= NeonFPU;
-      HW_FP |= HW_FP_SP | HW_FP_DP;
+      HW_FP |= HW_FP_SP;
     } else if (Feature == "+hwdiv") {
       HWDiv |= HWDivThumb;
     } else if (Feature == "+hwdiv-arm") {
@@ -432,8 +431,8 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       Crypto = 1;
     } else if (Feature == "+dsp") {
       DSP = 1;
-    } else if (Feature == "+fp-only-sp") {
-      HW_FP_remove |= HW_FP_DP;
+    } else if (Feature == "+fp64") {
+      HW_FP |= HW_FP_DP;
     } else if (Feature == "+8msecext") {
       if (CPUProfile != "M" || ArchVersion != 8) {
         Diags.Report(diag::err_target_unsupported_mcmse) << CPU;
@@ -449,7 +448,6 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       DotProd = true;
     }
   }
-  HW_FP &= ~HW_FP_remove;
 
   switch (ArchVersion) {
   case 6:
diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c
index f58d37824aafc..d62cea65ed056 100644
--- a/clang/test/CodeGen/arm-target-features.c
+++ b/clang/test/CodeGen/arm-target-features.c
@@ -1,23 +1,23 @@
 // REQUIRES: arm-registered-target
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// CHECK-VFP3: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp3"
+// CHECK-VFP3: "target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp3"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4
-// CHECK-VFP4: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp4"
+// CHECK-VFP4: "target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a12 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7s-linux-gnueabi -target-cpu swift -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-2
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu krait -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
-// CHECK-VFP4-DIV: "target-features"="+armv7-a,+dsp,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp4"
-// CHECK-VFP4-DIV-2: "target-features"="+armv7s,+dsp,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp4"
+// CHECK-VFP4-DIV: "target-features"="+armv7-a,+d32,+dsp,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp4"
+// CHECK-VFP4-DIV-2: "target-features"="+armv7s,+d32,+dsp,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode,+vfp4"
 
 // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a15 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-ARM
 // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a17 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV-ARM
-// CHECK-VFP4-DIV-ARM: "target-features"="+armv7-a,+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4,-thumb-mode"
+// CHECK-VFP4-DIV-ARM: "target-features"="+armv7-a,+d32,+dsp,+fp64,+hwdiv,+hwdiv-arm,+neon,+vfp4,-thumb-mode"
 
 // RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
@@ -28,34 +28,34 @@
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m2 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
-// CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+thumb-mode"
+// CHECK-BASIC-V8: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp64,+hwdiv,+hwdiv-arm,+neon,+thumb-mode"
 
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
-// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+dotprod,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode"
+// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp64,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode"
 
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM
-// CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon,-thumb-mode"
+// CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp64,+hwdiv,+hwdiv-arm,+neon,-thumb-mode"
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-DIV
-// CHECK-VFP3-D16-DIV: "target-features"="+armv7-r,+d16,+dsp,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp3"
+// CHECK-VFP3-D16-DIV: "target-features"="+armv7-r,+dsp,+fp64,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp3"
 
 
 // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-THUMB-DIV
-// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+armv7-r,+d16,+dsp,+hwdiv,+vfp3,-thumb-mode"
+// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+armv7-r,+dsp,+fp64,+hwdiv,+vfp3,-thumb-mode"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV
-// CHECK-VFP3-D16-FP16-DIV: "target-features"="+armv7-r,+d16,+dsp,+fp16,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp3"
+// CHECK-VFP3-D16-FP16-DIV: "target-features"="+armv7-r,+dsp,+fp16,+fp64,+hwdiv,+hwdiv-arm,+thumb-mode,+vfp3"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-D16-SP-THUMB-DIV
-// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+armv7e-m,+d16,+dsp,+fp-only-sp,+hwdiv,+thumb-mode,+vfp4"
+// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+hwdiv,+thumb-mode,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP5-D16-THUMB-DIV
-// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+armv7e-m,+d16,+dsp,+fp-armv8,+hwdiv,+thumb-mode"
+// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+armv7e-m,+dsp,+fp-armv8,+fp64,+hwdiv,+thumb-mode"
 
 
 // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
@@ -107,6 +107,6 @@
 // CHECK-ARMV8M-M23-LINUX: "target-features"="+armv8-m.base,+hwdiv,+thumb-mode"
 
 // RUN: %clang_cc1 -triple thumb-linux-gnueabi -target-cpu cortex-m33 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ARMV8M-MAIN-LINUX 
-// CHECK-ARMV8M-MAIN-LINUX: "target-features"="+armv8-m.main,+d16,+dsp,+fp-armv8,+fp-only-sp,+hwdiv,+thumb-mode"
+// CHECK-ARMV8M-MAIN-LINUX: "target-features"="+armv8-m.main,+dsp,+fp-armv8,+hwdiv,+thumb-mode"
 
 void foo() {}
diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c
index 135207784875e..33cad80bf1492 100644
--- a/clang/test/Driver/arm-mfpu.c
+++ b/clang/test/Driver/arm-mfpu.c
@@ -6,7 +6,6 @@
 // CHECK-DEFAULT: "-target-feature" "+soft-float-abi"
 // CHECK-DEFAULT-NOT: "-target-feature" "+vfp2"
 // CHECK-DEFAULT-NOT: "-target-feature" "+vfp3"
-// CHECK-DEFAULT-NOT: "-target-feature" "+d16"
 // CHECK-DEFAULT-NOT: "-target-feature" "+neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=fpa %s -### -o %t.o 2>&1 \
@@ -64,12 +63,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
 // CHECK-VFP3-FP16-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP3-FP16: "-target-feature" "+soft-float-abi"
-// CHECK-VFP3-FP16: "-target-feature" "-fp-only-sp"
-// CHECK-VFP3-FP16: "-target-feature" "-d16"
 // CHECK-VFP3-FP16: "-target-feature" "+vfp3"
 // CHECK-VFP3-FP16: "-target-feature" "+fp16"
 // CHECK-VFP3-FP16: "-target-feature" "-vfp4"
 // CHECK-VFP3-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3-FP16: "-target-feature" "+fp64"
+// CHECK-VFP3-FP16: "-target-feature" "+d32"
 // CHECK-VFP3-FP16: "-target-feature" "-neon"
 // CHECK-VFP3-FP16: "-target-feature" "-crypto"
 
@@ -81,11 +80,11 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
 // CHECK-VFP3-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP3-D16: "-target-feature" "+soft-float-abi"
-// CHECK-VFP3-D16: "-target-feature" "-fp-only-sp"
-// CHECK-VFP3-D16: "-target-feature" "+d16"
 // CHECK-VFP3-D16: "-target-feature" "+vfp3"
 // CHECK-VFP3-D16: "-target-feature" "-vfp4"
 // CHECK-VFP3-D16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3-D16: "-target-feature" "+fp64"
+// CHECK-VFP3-D16-NOT: "-target-feature" "+d32"
 // CHECK-VFP3-D16: "-target-feature" "-neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=vfpv3-d16-fp16 %s -### -o %t.o 2>&1 \
@@ -94,12 +93,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
 // CHECK-VFP3-D16-FP16-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP3-D16-FP16: "-target-feature" "+soft-float-abi"
-// CHECK-VFP3-D16-FP16: "-target-feature" "-fp-only-sp"
-// CHECK-VFP3-D16-FP16: "-target-feature" "+d16"
 // CHECK-VFP3-D16-FP16: "-target-feature" "+vfp3"
 // CHECK-VFP3-D16-FP16: "-target-feature" "+fp16"
 // CHECK-VFP3-D16-FP16: "-target-feature" "-vfp4"
 // CHECK-VFP3-D16-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3-D16-FP16: "-target-feature" "+fp64"
+// CHECK-VFP3-D16-FP16-NOT: "-target-feature" "+d32"
 // CHECK-VFP3-D16-FP16: "-target-feature" "-neon"
 // CHECK-VFP3-D16-FP16: "-target-feature" "-crypto"
 
@@ -109,8 +108,8 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
 // CHECK-VFP3XD-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP3XD: "-target-feature" "+soft-float-abi"
-// CHECK-VFP3XD: "-target-feature" "+fp-only-sp"
-// CHECK-VFP3XD: "-target-feature" "+d16"
+// CHECK-VFP3XD-NOT: "-target-feature" "+fp64"
+// CHECK-VFP3XD-NOT: "-target-feature" "+d32"
 // CHECK-VFP3XD: "-target-feature" "+vfp3"
 // CHECK-VFP3XD: "-target-feature" "-fp16"
 // CHECK-VFP3XD: "-target-feature" "-vfp4"
@@ -124,12 +123,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-3 %s
 // CHECK-VFP3XD-FP16-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP3XD-FP16: "-target-feature" "+soft-float-abi"
-// CHECK-VFP3XD-FP16: "-target-feature" "+fp-only-sp"
-// CHECK-VFP3XD-FP16: "-target-feature" "+d16"
 // CHECK-VFP3XD-FP16: "-target-feature" "+vfp3"
 // CHECK-VFP3XD-FP16: "-target-feature" "+fp16"
 // CHECK-VFP3XD-FP16: "-target-feature" "-vfp4"
 // CHECK-VFP3XD-FP16: "-target-feature" "-fp-armv8"
+// CHECK-VFP3XD-FP16-NOT: "-target-feature" "+fp64"
+// CHECK-VFP3XD-FP16-NOT: "-target-feature" "+d32"
 // CHECK-VFP3XD-FP16: "-target-feature" "-neon"
 // CHECK-VFP3XD-FP16: "-target-feature" "-crypto"
 
@@ -160,10 +159,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-4 %s
 // CHECK-VFP4-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-VFP4-D16: "-target-feature" "+soft-float-abi"
-// CHECK-VFP4-D16: "-target-feature" "-fp-only-sp"
-// CHECK-VFP4-D16: "-target-feature" "+d16"
 // CHECK-VFP4-D16: "-target-feature" "+vfp4"
 // CHECK-VFP4-D16: "-target-feature" "-fp-armv8"
+// CHECK-VFP4-D16: "-target-feature" "+fp64"
+// CHECK-VFP4-D16-NOT: "-target-feature" "+d32"
 // CHECK-VFP4-D16: "-target-feature" "-neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=fp4-sp-d16 %s -### -o %t.o 2>&1 \
@@ -174,10 +173,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-4 %s
 // CHECK-FP4-SP-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-FP4-SP-D16: "-target-feature" "+soft-float-abi"
-// CHECK-FP4-SP-D16: "-target-feature" "+fp-only-sp"
-// CHECK-FP4-SP-D16: "-target-feature" "+d16"
 // CHECK-FP4-SP-D16: "-target-feature" "+vfp4"
 // CHECK-FP4-SP-D16: "-target-feature" "-fp-armv8"
+// CHECK-FP4-SP-D16-NOT: "-target-feature" "+fp64"
+// CHECK-FP4-SP-D16-NOT: "-target-feature" "+d32"
 // CHECK-FP4-SP-D16: "-target-feature" "-neon"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=fp5-sp-d16 %s -### -o %t.o 2>&1 \
@@ -188,10 +187,10 @@
 // RUN:   2>&1 | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s
 // CHECK-FP5-SP-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-FP5-SP-D16: "-target-feature" "+soft-float-abi"
-// CHECK-FP5-SP-D16: "-target-feature" "+fp-only-sp"
-// CHECK-FP5-SP-D16: "-target-feature" "+d16"
 // CHECK-FP5-SP-D16: "-target-feature" "+fp-armv8"
 // CHECK-FP5-SP-D16: "-target-feature" "-neon"
+// CHECK-FP5-SP-D16-NOT: "-target-feature" "+fp64"
+// CHECK-FP5-SP-D16-NOT: "-target-feature" "+d32"
 // CHECK-FP5-SP-D16: "-target-feature" "-crypto"
 
 // RUN: %clang -target arm-linux-eabi -mfpu=fp5-dp-d16 %s -### -o %t.o 2>&1 \
@@ -202,9 +201,9 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-5 %s
 // CHECK-FP5-DP-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-FP5-DP-D16: "-target-feature" "+soft-float-abi"
-// CHECK-FP5-DP-D16: "-target-feature" "-fp-only-sp"
-// CHECK-FP5-DP-D16: "-target-feature" "+d16"
 // CHECK-FP5-DP-D16: "-target-feature" "+fp-armv8"
+// CHECK-FP5-DP-D16: "-target-feature" "+fp64"
+// CHECK-FP5-DP-D16-NOT: "-target-feature" "+d32"
 // CHECK-FP5-DP-D16: "-target-feature" "-neon"
 // CHECK-FP5-DP-D16: "-target-feature" "-crypto"
 // CHECK-SOFT-ABI-FP-5: "-target-feature" "+soft-float"
@@ -236,12 +235,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-SOFT-ABI-FP-6 %s
 // CHECK-NEON-FP16-NOT: "-target-feature" "+soft-float"
 // CHECK-NEON-FP16: "-target-feature" "+soft-float-abi"
-// CHECK-NEON-FP16: "-target-feature" "-fp-only-sp"
-// CHECK-NEON-FP16: "-target-feature" "-d16"
 // CHECK-NEON-FP16: "-target-feature" "+vfp3"
 // CHECK-NEON-FP16: "-target-feature" "+fp16"
 // CHECK-NEON-FP16: "-target-feature" "-vfp4"
 // CHECK-NEON-FP16: "-target-feature" "-fp-armv8"
+// CHECK-NEON-FP16: "-target-feature" "+fp64"
+// CHECK-NEON-FP16: "-target-feature" "+d32"
 // CHECK-NEON-FP16: "-target-feature" "+neon"
 // CHECK-NEON-FP16: "-target-feature" "-crypto"
 
@@ -319,12 +318,12 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FP %s
 // CHECK-NO-FP-NOT: "-target-feature" "+soft-float"
 // CHECK-NO-FP: "-target-feature" "+soft-float-abi"
-// CHECK-NO-FP: "-target-feature" "-fp-only-sp"
-// CHECK-NO-FP: "-target-feature" "-d16"
 // CHECK-NO-FP: "-target-feature" "-vfp2"
 // CHECK-NO-FP: "-target-feature" "-vfp3"
 // CHECK-NO-FP: "-target-feature" "-vfp4"
 // CHECK-NO-FP: "-target-feature" "-fp-armv8"
+// CHECK-NO-FP-NOT: "-target-feature" "+fp64"
+// CHECK-NO-FP-NOT: "-target-feature" "+d32"
 // CHECK-NO-FP: "-target-feature" "-neon"
 // CHECK-NO-FP: "-target-feature" "-crypto"
 
@@ -369,7 +368,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARM5-ANDROID-FP-DEFAULT %s
 // CHECK-ARM5-ANDROID-FP-DEFAULT: "-target-feature" "+soft-float"
 // CHECK-ARM5-ANDROID-FP-DEFAULT: "-target-feature" "+soft-float-abi"
-// CHECK-ARM5-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+d16"
+// CHECK-ARM5-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+d32"
 // CHECK-ARM5-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+vfp3"
 // CHECK-ARM5-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+vfp4"
 // CHECK-ARM5-ANDROID-FP-DEFAULT-NOT: "-target-feature" "+fp-armv8"
@@ -390,7 +389,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARM7-ANDROID-FP-D16 %s
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+soft-float"
 // CHECK-ARM7-ANDROID-FP-D16: "-target-feature" "+soft-float-abi"
-// CHECK-ARM7-ANDROID-FP-D16: "-target-feature" "+d16"
+// CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+d32"
 // CHECK-ARM7-ANDROID-FP-D16: "-target-feature" "+vfp3"
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+vfp4"
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+fp-armv8"
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 767a7abcbaf3f..9490a6ecedadb 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -141,6 +141,10 @@ class MCSubtargetInfo {
   /// all feature bits implied by the flag.
   FeatureBitset ApplyFeatureFlag(StringRef FS);
 
+  /// Set/clear additional feature bits, including all other bits they imply.
+  FeatureBitset SetFeatureBitsTransitively(const FeatureBitset& FB);
+  FeatureBitset ClearFeatureBitsTransitively(const FeatureBitset &FB);
+
   /// Check whether the subtarget features are enabled/disabled as per
   /// the provided string, ignoring all other features.
   bool checkFeatures(StringRef FS) const;
diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp
index 2af8a5559b56e..9b73800978cc7 100644
--- a/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -195,6 +195,23 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
   return FeatureBits;
 }
 
+FeatureBitset MCSubtargetInfo::SetFeatureBitsTransitively(
+  const FeatureBitset &FB) {
+  SetImpliedBits(FeatureBits, FB, ProcFeatures);
+  return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ClearFeatureBitsTransitively(
+  const FeatureBitset &FB) {
+  for (unsigned I = 0, E = FB.size(); I < E; I++) {
+    if (FB[I]) {
+      FeatureBits.reset(I);
+      ClearImpliedBits(FeatureBits, I, ProcFeatures);
+    }
+  }
+  return FeatureBits;
+}
+
 FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef Feature) {
   // Find feature in table.
   const SubtargetFeatureKV *FeatureEntry =
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index c0ac7a357f822..1c3469b5971af 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -194,9 +194,9 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
     default:
       break;
     case ARMBuildAttrs::Not_Allowed:
-      Features.AddFeature("vfp2", false);
-      Features.AddFeature("vfp3", false);
-      Features.AddFeature("vfp4", false);
+      Features.AddFeature("vfp2d16sp", false);
+      Features.AddFeature("vfp3d16sp", false);
+      Features.AddFeature("vfp4d16sp", false);
       break;
     case ARMBuildAttrs::AllowFPv2:
       Features.AddFeature("vfp2");
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index c57da4cb20218..02f0d95ff2784 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -159,23 +159,6 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
   if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
     return false;
 
-  // fp-only-sp and d16 subtarget features are independent of each other, so we
-  // must enable/disable both.
-  switch (FPUNames[FPUKind].Restriction) {
-  case FPURestriction::SP_D16:
-    Features.push_back("+fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case FPURestriction::D16:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("+d16");
-    break;
-  case FPURestriction::None:
-    Features.push_back("-fp-only-sp");
-    Features.push_back("-d16");
-    break;
-  }
-
   // FPU version subtarget features are inclusive of lower-numbered ones, so
   // enable the one corresponding to this version and disable all that are
   // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
@@ -216,6 +199,28 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
     break;
   }
 
+  // fp64 and d32 subtarget features are independent of each other, so we
+  // must disable/enable both.
+  if (FPUKind == FK_NONE) {
+    Features.push_back("-fp64");
+    Features.push_back("-d32");
+  } else {
+    switch (FPUNames[FPUKind].Restriction) {
+    case FPURestriction::SP_D16:
+      Features.push_back("-fp64");
+      Features.push_back("-d32");
+      break;
+    case FPURestriction::D16:
+      Features.push_back("+fp64");
+      Features.push_back("-d32");
+      break;
+    case FPURestriction::None:
+      Features.push_back("+fp64");
+      Features.push_back("+d32");
+      break;
+    }
+  }
+
   // crypto includes neon, so we handle this similarly to FPU version.
   switch (FPUNames[FPUKind].NeonSupport) {
   case NeonSupportLevel::Crypto:
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 48eba2246c577..20a61d343b383 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -32,12 +32,40 @@ def ModeSoftFloat         : SubtargetFeature<"soft-float","UseSoftFloat",
 //
 
 // Floating Point, HW Division and Neon Support
-def FeatureVFP2           : SubtargetFeature<"vfp2", "HasVFPv2", "true",
-                                             "Enable VFP2 instructions">;
+def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
+                                             "Floating point unit supports "
+                                             "double precision">;
+
+def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
+                                             "Extend FP to 32 double registers">;
+
+multiclass VFPver<string name, string query, string description,
+                  list<SubtargetFeature> prev = [],
+                  list<SubtargetFeature> otherimplies = []> {
+  def _D16_SP: SubtargetFeature<
+    name#"d16sp", query#"D16SP", "true",
+    description#" with only 16 d-registers and no double precision",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>;
+  def _SP: SubtargetFeature<
+    name#"sp", query#"SP", "true",
+    description#" with no double precision",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_SP")) #
+      otherimplies # [FeatureD32, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+  def _D16: SubtargetFeature<
+    name#"d16", query#"D16", "true",
+    description#" with only 16 d-registers",
+    !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) #
+      otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+  def "": SubtargetFeature<
+    name, query, "true", description,
+    prev # otherimplies # [
+        !cast<SubtargetFeature>(NAME # "_D16"),
+        !cast<SubtargetFeature>(NAME # "_SP")]>;
+}
 
-def FeatureVFP3           : SubtargetFeature<"vfp3", "HasVFPv3", "true",
-                                             "Enable VFP3 instructions",
-                                             [FeatureVFP2]>;
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">;
+defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
+                         [FeatureVFP2]>;
 
 def FeatureNEON           : SubtargetFeature<"neon", "HasNEON", "true",
                                              "Enable NEON instructions",
@@ -47,31 +75,22 @@ def FeatureFP16           : SubtargetFeature<"fp16", "HasFP16", "true",
                                              "Enable half-precision "
                                              "floating point">;
 
-def FeatureVFP4           : SubtargetFeature<"vfp4", "HasVFPv4", "true",
-                                             "Enable VFP4 instructions",
-                                             [FeatureVFP3, FeatureFP16]>;
+defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
+                         [FeatureVFP3], [FeatureFP16]>;
 
-def FeatureFPARMv8        : SubtargetFeature<"fp-armv8", "HasFPARMv8",
-                                             "true", "Enable ARMv8 FP",
-                                             [FeatureVFP4]>;
+defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
+                         [FeatureVFP4]>;
 
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "Enable full half-precision "
                                              "floating point",
-                                             [FeatureFPARMv8]>;
+                                             [FeatureFPARMv8_D16_SP]>;
 
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
                                              "Enable full half-precision "
                                              "floating point fml instructions",
                                              [FeatureFullFP16]>;
 
-def FeatureVFPOnlySP      : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
-                                             "Floating point unit supports "
-                                             "single precision only">;
-
-def FeatureD16            : SubtargetFeature<"d16", "HasD16", "true",
-                                             "Restrict FP to 16 double registers">;
-
 def FeatureHWDivThumb     : SubtargetFeature<"hwdiv",
                                              "HasHardwareDivideInThumb", "true",
                                              "Enable divide instructions in Thumb">;
@@ -943,14 +962,12 @@ def : ProcessorModel<"cortex-r4f",  CortexA8Model,      [ARMv7r, ProcR4,
                                                          FeatureHasRetAddrStack,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureAvoidPartialCPSR]>;
 
 def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHWDivARM,
                                                          FeatureHasSlowFPVMLx,
@@ -958,8 +975,7 @@ def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
 
 def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureFP16,
                                                          FeatureMP,
                                                          FeatureSlowFPBrcc,
@@ -969,8 +985,7 @@ def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
 
 def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureVFP3,
-                                                         FeatureD16,
+                                                         FeatureVFP3_D16,
                                                          FeatureFP16,
                                                          FeatureMP,
                                                          FeatureSlowFPBrcc,
@@ -991,10 +1006,8 @@ def : ProcessorModel<"sc300",       CortexM4Model,      [ARMv7m,
                                                          FeatureUseAA,
                                                          FeatureHasNoBranchPredictor]>;
 
-def : ProcessorModel<"cortex-m4",   CortexM4Model,      [ARMv7em,
-                                                         FeatureVFP4,
-                                                         FeatureVFPOnlySP,
-                                                         FeatureD16,
+def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
+                                                         FeatureVFP4_D16_SP,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
@@ -1002,17 +1015,14 @@ def : ProcessorModel<"cortex-m4",   CortexM4Model,      [ARMv7em,
                                                          FeatureHasNoBranchPredictor]>;
 
 def : ProcNoItin<"cortex-m7",                           [ARMv7em,
-                                                         FeatureFPARMv8,
-                                                         FeatureD16]>;
+                                                         FeatureFPARMv8_D16]>;
 
 def : ProcNoItin<"cortex-m23",                          [ARMv8mBaseline,
                                                          FeatureNoMovt]>;
 
 def : ProcessorModel<"cortex-m33", CortexM4Model,       [ARMv8mMainline,
                                                          FeatureDSP,
-                                                         FeatureFPARMv8,
-                                                         FeatureD16,
-                                                         FeatureVFPOnlySP,
+                                                         FeatureFPARMv8_D16_SP,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
@@ -1021,9 +1031,7 @@ def : ProcessorModel<"cortex-m33", CortexM4Model,       [ARMv8mMainline,
 
 def : ProcessorModel<"cortex-m35p", CortexM4Model,      [ARMv8mMainline,
                                                          FeatureDSP,
-                                                         FeatureFPARMv8,
-                                                         FeatureD16,
-                                                         FeatureVFPOnlySP,
+                                                         FeatureFPARMv8_D16_SP,
                                                          FeaturePrefLoopAlign32,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureUseMISched,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 239b95ffb8e5f..6bede80adaa7b 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -657,7 +657,7 @@ void ARMAsmPrinter::emitAttributes() {
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                       ARMBuildAttrs::IEEEDenormals);
   else {
-    if (!STI.hasVFP2()) {
+    if (!STI.hasVFP2Base()) {
       // When the target doesn't have an FPU (by design or
       // intention), the assumptions made on the software support
       // mirror that of the equivalent hardware support *if it
@@ -667,7 +667,7 @@ void ARMAsmPrinter::emitAttributes() {
       if (STI.hasV7Ops())
         ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                           ARMBuildAttrs::PreserveFPSign);
-    } else if (STI.hasVFP3()) {
+    } else if (STI.hasVFP3Base()) {
       // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
       // the sign bit of the zero matches the sign bit of the input or
       // result that is being flushed to zero.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 22c53d9e26cf3..fbef5d790a440 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -133,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
-  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+  if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
@@ -830,7 +830,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = ARM::VMOVRS;
   else if (SPRDest && GPRSrc)
     Opc = ARM::VMOVSR;
-  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+  else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VORRq;
@@ -890,7 +890,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BeginIdx = ARM::dsub_0;
     SubRegs = 4;
     Spacing = 2;
-  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+  } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
+             !Subtarget.hasFP64()) {
     Opc = ARM::VMOVS;
     BeginIdx = ARM::ssub_0;
     SubRegs = 2;
@@ -1481,7 +1482,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   // changed into a VORR that can go down the NEON pipeline.
-  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
+  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
     return false;
 
   // Look for a copy between even S-registers.  That is where we keep floats
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 1870e4c0b7f08..96200a0910917 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -149,7 +149,7 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
 const uint32_t *
 ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-  if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+  if (!STI.useSoftFloat() && STI.hasVFP2Base() && !STI.isThumb1Only())
     return CSR_NoRegs_RegMask;
   else
     return CSR_FPRegs_RegMask;
@@ -193,7 +193,7 @@ getReservedRegs(const MachineFunction &MF) const {
   if (STI.isR9Reserved())
     markSuperRegs(Reserved, ARM::R9);
   // Reserve D16-D31 if the subtarget doesn't support them.
-  if (!STI.hasVFP3() || STI.hasD16()) {
+  if (!STI.hasD32()) {
     static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
     for (unsigned R = 0; R < 16; ++R)
       markSuperRegs(Reserved, ARM::D16 + R);
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index cd01b70b378b4..6e274d269bf29 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -441,7 +441,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
   }
 
   // Require VFP2 for loading fp constants.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
@@ -969,7 +969,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       break;
     case MVT::f32:
-      if (!Subtarget->hasVFP2()) return false;
+      if (!Subtarget->hasVFP2Base()) return false;
       // Unaligned loads need special handling. Floats require word-alignment.
       if (Alignment && Alignment < 4) {
         needVMOV = true;
@@ -982,7 +982,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       }
       break;
     case MVT::f64:
-      if (!Subtarget->hasVFP2()) return false;
+      // Can load and store double precision even without FeatureFP64
+      if (!Subtarget->hasVFP2Base()) return false;
       // FIXME: Unaligned loads need special handling.  Doublewords require
       // word-alignment.
       if (Alignment && Alignment < 4)
@@ -1107,7 +1108,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
       }
       break;
     case MVT::f32:
-      if (!Subtarget->hasVFP2()) return false;
+      if (!Subtarget->hasVFP2Base()) return false;
       // Unaligned stores need special handling. Floats require word-alignment.
       if (Alignment && Alignment < 4) {
         unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
@@ -1122,7 +1123,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
       }
       break;
     case MVT::f64:
-      if (!Subtarget->hasVFP2()) return false;
+      // Can load and store double precision even without FeatureFP64
+      if (!Subtarget->hasVFP2Base()) return false;
       // FIXME: Unaligned stores need special handling.  Doublewords require
       // word-alignment.
       if (Alignment && Alignment < 4)
@@ -1353,10 +1355,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   if (!SrcEVT.isSimple()) return false;
   MVT SrcVT = SrcEVT.getSimpleVT();
 
-  if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
     return false;
 
-  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
     return false;
 
   // Check to see if the 2nd operand is a constant that we can encode directly
@@ -1506,7 +1508,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
 
 bool ARMFastISel::SelectFPExt(const Instruction *I) {
   // Make sure we have VFP and that we're extending float to double.
-  if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
 
   Value *V = I->getOperand(0);
   if (!I->getType()->isDoubleTy() ||
@@ -1525,7 +1527,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
 
 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   // Make sure we have VFP and that we're truncating double to float.
-  if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+  if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
 
   Value *V = I->getOperand(0);
   if (!(I->getType()->isFloatTy() &&
@@ -1544,7 +1546,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
 
 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   MVT DstVT;
   Type *Ty = I->getType();
@@ -1576,7 +1578,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
 
   unsigned Opc;
   if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
-  else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+  else if (Ty->isDoubleTy() && Subtarget->hasFP64())
     Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
   else return false;
 
@@ -1589,7 +1591,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
 
 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
-  if (!Subtarget->hasVFP2()) return false;
+  if (!Subtarget->hasVFP2Base()) return false;
 
   MVT DstVT;
   Type *RetTy = I->getType();
@@ -1602,7 +1604,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   unsigned Opc;
   Type *OpTy = I->getOperand(0)->getType();
   if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
-  else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+  else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
     Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
   else return false;
 
@@ -1808,9 +1810,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   // if we have them.
   // FIXME: It'd be nice to use NEON instructions.
   Type *Ty = I->getType();
-  if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+  if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
     return false;
-  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+  if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
     return false;
 
   unsigned Opc;
@@ -1852,7 +1854,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   default:
     report_fatal_error("Unsupported calling convention");
   case CallingConv::Fast:
-    if (Subtarget->hasVFP2() && !isVarArg) {
+    if (Subtarget->hasVFP2Base() && !isVarArg) {
       if (!Subtarget->isAAPCS_ABI())
         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
       // For AAPCS ABI targets, just use VFP variant of the calling convention.
@@ -1863,7 +1865,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::CXX_FAST_TLS:
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() &&
+      if (Subtarget->hasVFP2Base() &&
           TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else
@@ -1932,11 +1934,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       case MVT::i32:
         break;
       case MVT::f32:
-        if (!Subtarget->hasVFP2())
+        if (!Subtarget->hasVFP2Base())
           return false;
         break;
       case MVT::f64:
-        if (!Subtarget->hasVFP2())
+        if (!Subtarget->hasVFP2Base())
           return false;
         break;
       }
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index cb66d16a19432..492c83c2bf7a0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -4043,9 +4043,9 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
 
   // If an opcode was found then we can lower the read to a VFP instruction.
   if (Opcode) {
-    if (!Subtarget->hasVFP2())
+    if (!Subtarget->hasVFP2Base())
       return false;
-    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
       return false;
 
     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
@@ -4154,7 +4154,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
                     .Default(0);
 
   if (Opcode) {
-    if (!Subtarget->hasVFP2())
+    if (!Subtarget->hasVFP2Base())
       return false;
     Ops = { N->getOperand(2), getAL(CurDAG, DL),
             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 88d318e7bb32b..7dd2fef89ee79 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -241,7 +241,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   if (Subtarget->isTargetMachO()) {
     // Uses VFP for Thumb libfuncs if available.
-    if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+    if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
         Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
       static const struct {
         const RTLIB::Libcall Op;
@@ -510,7 +510,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   else
     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
@@ -698,7 +698,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
 
     // NEON only has FMA instructions as of VFP4.
-    if (!Subtarget->hasVFP4()) {
+    if (!Subtarget->hasVFP4Base()) {
       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
     }
@@ -732,7 +732,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget->isFPOnlySP()) {
+  if (!Subtarget->hasFP64()) {
     // When targeting a floating-point unit with only single-precision
     // operations, f64 is legal for the few double-precision instructions which
     // are present However, no double-precision operations other than moves,
@@ -1030,7 +1030,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
@@ -1080,7 +1080,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+  if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
       !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
@@ -1088,7 +1088,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
-  if (!Subtarget->hasVFP4()) {
+  if (!Subtarget->hasVFP4Base()) {
     setOperationAction(ISD::FMA, MVT::f64, Expand);
     setOperationAction(ISD::FMA, MVT::f32, Expand);
   }
@@ -1096,7 +1096,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   // Various VFP goodness
   if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
     // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
-    if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+    if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
       setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
       setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
     }
@@ -1116,7 +1116,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
 
   // FP-ARMv8 implements a lot of rounding-like FP operations.
-  if (Subtarget->hasFPARMv8()) {
+  if (Subtarget->hasFPARMv8Base()) {
     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
     setOperationAction(ISD::FCEIL, MVT::f32, Legal);
     setOperationAction(ISD::FROUND, MVT::f32, Legal);
@@ -1130,7 +1130,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 
-    if (!Subtarget->isFPOnlySP()) {
+    if (Subtarget->hasFP64()) {
       setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::f64, Legal);
       setOperationAction(ISD::FROUND, MVT::f64, Legal);
@@ -1202,7 +1202,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
   if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
-      !Subtarget->hasVFP2() || Subtarget->hasMinSize())
+      !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
     setSchedulingPreference(Sched::RegPressure);
   else
     setSchedulingPreference(Sched::Hybrid);
@@ -1637,7 +1637,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::C:
     if (!Subtarget->isAAPCS_ABI())
       return CallingConv::ARM_APCS;
-    else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
+    else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
              !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
@@ -1646,10 +1646,11 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::Fast:
   case CallingConv::CXX_FAST_TLS:
     if (!Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+      if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
         return CallingConv::Fast;
       return CallingConv::ARM_APCS;
-    } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+    } else if (Subtarget->hasVFP2Base() &&
+               !Subtarget->isThumb1Only() && !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;
     else
       return CallingConv::ARM_AAPCS;
@@ -3912,7 +3913,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
 SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
                                      SelectionDAG &DAG, const SDLoc &dl,
                                      bool InvalidOnQNaN) const {
-  assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
+  assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
   SDValue Cmp;
   SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
   if (!isFloatingPointZero(RHS))
@@ -4225,7 +4226,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
 SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
                                    SDValue TrueVal, SDValue ARMcc, SDValue CCR,
                                    SDValue Cmp, SelectionDAG &DAG) const {
-  if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+  if (!Subtarget->hasFP64() && VT == MVT::f64) {
     FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
                            DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
     TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
@@ -4474,7 +4475,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TrueVal = Op.getOperand(2);
   SDValue FalseVal = Op.getOperand(3);
 
-  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
     DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
                                                     dl);
 
@@ -4497,9 +4498,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     // inverting the compare condition, swapping 'less' and 'greater') and
     // sometimes need to swap the operands to the VSEL (which inverts the
     // condition in the sense of firing whenever the previous condition didn't)
-    if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f16 ||
-                                    TrueVal.getValueType() == MVT::f32 ||
-                                    TrueVal.getValueType() == MVT::f64)) {
+    if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
+                                        TrueVal.getValueType() == MVT::f32 ||
+                                        TrueVal.getValueType() == MVT::f64)) {
       ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
       if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
           CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
@@ -4522,7 +4523,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
   // must use VSEL (limited condition codes), due to not having conditional f16
   // moves.
-  if (Subtarget->hasFPARMv8() &&
+  if (Subtarget->hasFPARMv8Base() &&
       !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
       (TrueVal.getValueType() == MVT::f16 ||
        TrueVal.getValueType() == MVT::f32 ||
@@ -4715,7 +4716,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
-  if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
+  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
     DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
                                                     dl);
 
@@ -4862,7 +4863,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
-  if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+  if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::FP_TO_SINT)
       LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -4926,7 +4927,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
-  if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+  if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::SINT_TO_FP)
       LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
@@ -5909,12 +5910,12 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     }
   }
 
-  if (!ST->hasVFP3())
+  if (!ST->hasVFP3Base())
     return SDValue();
 
   // Use the default (constant pool) lowering for double constants when we have
   // an SP-only FPU
-  if (IsDouble && Subtarget->isFPOnlySP())
+  if (IsDouble && !Subtarget->hasFP64())
     return SDValue();
 
   // Try splatting with a VMOV.f32...
@@ -11356,7 +11357,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
                                      const ARMSubtarget *Subtarget) {
   // vmovrrd(vmovdrr x, y) -> x,y
   SDValue InDouble = N->getOperand(0);
-  if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
 
   // vmovrrd(load f64) -> (load i32), (load i32)
@@ -13303,7 +13304,7 @@ static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
   unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
 
   // VLDR and LDRD: 4 * imm8
-  if ((VT.isFloatingPoint() && Subtarget->hasVFP2()) || NumBytes == 8)
+  if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
     return isShiftedUInt<8, 2>(V);
 
   if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
@@ -13347,7 +13348,7 @@ static bool isLegalAddressImmediate(int64_t V, EVT VT,
     return isUInt<8>(V);
   case MVT::f32:
   case MVT::f64:
-    if (!Subtarget->hasVFP2()) // FIXME: NEON?
+    if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
       return false;
     return isShiftedUInt<8, 2>(V);
   }
@@ -13910,7 +13911,7 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   // Although we are correct (we are free to emit anything, without
   // constraints), we might break use cases that would expect us to be more
   // efficient and emit something else.
-  if (!Subtarget->hasVFP2())
+  if (!Subtarget->hasVFP2Base())
     return "r";
   if (ConstraintVT.isFloatingPoint())
     return "w";
@@ -14392,7 +14393,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 }
 
 SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+  assert(Op.getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
          "Unexpected type for custom-lowering FP_EXTEND");
 
   RTLIB::Libcall LC;
@@ -14404,8 +14405,7 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
 }
 
 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getOperand(0).getValueType() == MVT::f64 &&
-         Subtarget->isFPOnlySP() &&
+  assert(Op.getOperand(0).getValueType() == MVT::f64 && !Subtarget->hasFP64() &&
          "Unexpected type for custom-lowering FP_ROUND");
 
   RTLIB::Libcall LC;
@@ -14468,13 +14468,13 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
 /// materialize the FP immediate as a load from a constant pool.
 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                      bool ForCodeSize) const {
-  if (!Subtarget->hasVFP3())
+  if (!Subtarget->hasVFP3Base())
     return false;
   if (VT == MVT::f16 && Subtarget->hasFullFP16())
     return ARM_AM::getFP16Imm(Imm) != -1;
   if (VT == MVT::f32)
     return ARM_AM::getFP32Imm(Imm) != -1;
-  if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
+  if (VT == MVT::f64 && Subtarget->hasFP64())
     return ARM_AM::getFP64Imm(Imm) != -1;
   return false;
 }
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f55e73abbd7f7..d0821b94477a6 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -258,18 +258,18 @@ def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
                                  AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
 def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
                                  AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
-                                 AssemblerPredicate<"FeatureVFP2", "VFP2">;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
-                                 AssemblerPredicate<"FeatureVFP3", "VFP3">;
-def HasVFP4          : Predicate<"Subtarget->hasVFP4()">,
-                                 AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasDPVFP         : Predicate<"!Subtarget->isFPOnlySP()">,
-                                 AssemblerPredicate<"!FeatureVFPOnlySP",
+def NoVFP            : Predicate<"!Subtarget->hasVFP2Base()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2Base()">,
+                                 AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3Base()">,
+                                 AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+def HasVFP4          : Predicate<"Subtarget->hasVFP4Base()">,
+                                 AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+def HasDPVFP         : Predicate<"Subtarget->hasFP64()">,
+                                 AssemblerPredicate<"FeatureFP64",
                                                     "double precision VFP">;
-def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
-                                 AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
+def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8Base()">,
+                                 AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
                                  AssemblerPredicate<"FeatureNEON", "NEON">;
 def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
@@ -371,7 +371,7 @@ def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 // Do not use them for Darwin platforms.
 def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
                                  " FPOpFusion::Fast && "
-                                 " Subtarget->hasVFP4()) && "
+                                 " Subtarget->hasVFP4Base()) && "
                                  "!Subtarget->isTargetDarwin() &&"
                                  "Subtarget->useFPVMLx()">;
 
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index b97924cf975a4..4485a474a6dfc 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -232,7 +232,7 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
                               MachineRegisterInfo &MRI,
                               const TargetRegisterInfo &TRI,
                               const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+  assert(TII.getSubtarget().hasVFP2Base() && "Can't select merge without VFP");
 
   // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
   // into one DPR.
@@ -263,7 +263,8 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
                                 MachineRegisterInfo &MRI,
                                 const TargetRegisterInfo &TRI,
                                 const RegisterBankInfo &RBI) {
-  assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
+  assert(TII.getSubtarget().hasVFP2Base() &&
+         "Can't select unmerge without VFP");
 
   // We only support G_UNMERGE_VALUES as a way to break up one DPR into two
   // GPRs.
@@ -1036,12 +1037,12 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     return selectCmp(Helper, MIB, MRI);
   }
   case G_FCMP: {
-    assert(STI.hasVFP2() && "Can't select fcmp without VFP");
+    assert(STI.hasVFP2Base() && "Can't select fcmp without VFP");
 
     unsigned OpReg = I.getOperand(2).getReg();
     unsigned Size = MRI.getType(OpReg).getSizeInBits();
 
-    if (Size == 64 && STI.isFPOnlySP()) {
+    if (Size == 64 && !STI.hasFP64()) {
       LLVM_DEBUG(dbgs() << "Subtarget only supports single precision");
       return false;
     }
@@ -1087,7 +1088,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
     LLT ValTy = MRI.getType(Reg);
     const auto ValSize = ValTy.getSizeInBits();
 
-    assert((ValSize != 64 || STI.hasVFP2()) &&
+    assert((ValSize != 64 || STI.hasVFP2Base()) &&
            "Don't know how to load/store 64-bit value without VFP");
 
     const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 8f2029312d24f..458cafdc7a583 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -157,7 +157,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
 
   getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
 
-  if (!ST.useSoftFloat() && ST.hasVFP2()) {
+  if (!ST.useSoftFloat() && ST.hasVFP2Base()) {
     getActionDefinitionsBuilder(
         {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG})
         .legalFor({s32, s64});
@@ -208,7 +208,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
         .libcallForCartesianProduct({s32, s64}, {s32});
   }
 
-  if (!ST.useSoftFloat() && ST.hasVFP4())
+  if (!ST.useSoftFloat() && ST.hasVFP4Base())
     getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64});
   else
     getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64});
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index d03b482043eed..4566ac2c9dd0b 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -453,7 +453,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     for (const auto &Mapping : OperandsMapping[i]) {
       assert(
           (Mapping.RegBank->getID() != ARM::FPRRegBankID ||
-           MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+           MF.getSubtarget<ARMSubtarget>().hasVFP2Base()) &&
           "Trying to use floating point register bank on target without vfp");
     }
   }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 9500a9faf4e10..abedc6f6d81d3 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -166,6 +166,21 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool HasFPARMv8 = false;
   bool HasNEON = false;
 
+  /// Versions of the VFP flags restricted to single precision, or to
+  /// 16 d-registers, or both.
+  bool HasVFPv2SP = false;
+  bool HasVFPv3SP = false;
+  bool HasVFPv4SP = false;
+  bool HasFPARMv8SP = false;
+  bool HasVFPv2D16 = false;
+  bool HasVFPv3D16 = false;
+  bool HasVFPv4D16 = false;
+  bool HasFPARMv8D16 = false;
+  bool HasVFPv2D16SP = false;
+  bool HasVFPv3D16SP = false;
+  bool HasVFPv4D16SP = false;
+  bool HasFPARMv8D16SP = false;
+
   /// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
   bool HasDotProd = false;
 
@@ -232,9 +247,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// HasFP16FML - True if subtarget supports half-precision FP fml operations
   bool HasFP16FML = false;
 
-  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// HasD32 - True if subtarget has the full 32 double precision
   /// FP registers for VFPv3.
-  bool HasD16 = false;
+  bool HasD32 = false;
 
   /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
   bool HasHardwareDivideInThumb = false;
@@ -291,9 +306,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// extension.
   bool HasVirtualization = false;
 
-  /// FPOnlySP - If true, the floating point unit only supports single
+  /// HasFP64 - If true, the floating point unit supports double
   /// precision.
-  bool FPOnlySP = false;
+  bool HasFP64 = false;
 
   /// If true, the processor supports the Performance Monitor Extensions. These
   /// include a generic cycle-counter as well as more fine-grained (often
@@ -569,10 +584,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 
   bool hasARMOps() const { return !NoARM; }
 
-  bool hasVFP2() const { return HasVFPv2; }
-  bool hasVFP3() const { return HasVFPv3; }
-  bool hasVFP4() const { return HasVFPv4; }
-  bool hasFPARMv8() const { return HasFPARMv8; }
+  bool hasVFP2Base() const { return HasVFPv2D16SP; }
+  bool hasVFP3Base() const { return HasVFPv3D16SP; }
+  bool hasVFP4Base() const { return HasVFPv4D16SP; }
+  bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
   bool hasNEON() const { return HasNEON;  }
   bool hasSHA2() const { return HasSHA2; }
   bool hasAES() const { return HasAES; }
@@ -601,7 +616,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool useFPVMLx() const { return !SlowFPVMLx; }
   bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
-  bool isFPOnlySP() const { return FPOnlySP; }
+  bool hasFP64() const { return HasFP64; }
   bool hasPerfMon() const { return HasPerfMon; }
   bool hasTrustZone() const { return HasTrustZone; }
   bool has8MSecExt() const { return Has8MSecExt; }
@@ -638,7 +653,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool genExecuteOnly() const { return GenExecuteOnly; }
 
   bool hasFP16() const { return HasFP16; }
-  bool hasD16() const { return HasD16; }
+  bool hasD32() const { return HasD32; }
   bool hasFullFP16() const { return HasFullFP16; }
   bool hasFP16FML() const { return HasFP16FML; }
 
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 2fbcd8b2ba6ee..882a63c33a579 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -48,7 +48,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   const ARMTargetLowering *TLI;
 
   // Currently the following features are excluded from InlineFeatureWhitelist.
-  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+  // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
   // Depending on whether they are set or unset, different
   // instructions/registers are available. For example, inlining a callee with
   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f4af747f3ee4c..f8a00f713e4d0 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -492,8 +492,8 @@ class ARMAsmParser : public MCTargetAsmParser {
     return getSTI().getFeatureBits()[ARM::FeatureDSP];
   }
 
-  bool hasD16() const {
-    return getSTI().getFeatureBits()[ARM::FeatureD16];
+  bool hasD32() const {
+    return getSTI().getFeatureBits()[ARM::FeatureD32];
   }
 
   bool hasV8_1aOps() const {
@@ -3424,7 +3424,7 @@ int ARMAsmParser::tryParseRegister() {
   }
 
   // Some FPUs only have 16 D registers, so D16-D31 are invalid
-  if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+  if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
     return -1;
 
   Parser.Lex(); // Eat identifier token.
@@ -10415,11 +10415,11 @@ ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) {
                       : "operand must be a register in range [r0, r12] or r14";
   // DPR contains 16 registers for some FPUs, and 32 for others.
   case Match_DPR:
-    return hasD16() ? "operand must be a register in range [d0, d15]"
-                    : "operand must be a register in range [d0, d31]";
+    return hasD32() ? "operand must be a register in range [d0, d31]"
+                    : "operand must be a register in range [d0, d15]";
   case Match_DPR_RegList:
-    return hasD16() ? "operand must be a list of registers in range [d0, d15]"
-                    : "operand must be a list of registers in range [d0, d31]";
+    return hasD32() ? "operand must be a list of registers in range [d0, d31]"
+                    : "operand must be a list of registers in range [d0, d15]";
 
   // For all other diags, use the static string from tablegen.
   default:
@@ -10621,14 +10621,15 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     { ARM::AEK_CRC, {Feature_HasV8Bit}, {ARM::FeatureCRC} },
     { ARM::AEK_CRYPTO,  {Feature_HasV8Bit},
       {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
-    { ARM::AEK_FP, {Feature_HasV8Bit}, {ARM::FeatureFPARMv8} },
+    { ARM::AEK_FP, {Feature_HasV8Bit},
+      {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
     { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM),
       {Feature_HasV7Bit, Feature_IsNotMClassBit},
       {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
     { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit},
       {ARM::FeatureMP} },
     { ARM::AEK_SIMD, {Feature_HasV8Bit},
-      {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+      {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
     { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} },
     // FIXME: Only available in A-class, isel not predicated
     { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} },
@@ -10678,12 +10679,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
                                "allowed for the current base architecture");
 
     MCSubtargetInfo &STI = copySTI();
-    FeatureBitset ToggleFeatures = EnableFeature
-      ? (~STI.getFeatureBits() & Extension.Features)
-      : ( STI.getFeatureBits() & Extension.Features);
-
-    FeatureBitset Features =
-        ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+    if (EnableFeature) {
+      STI.SetFeatureBitsTransitively(Extension.Features);
+    } else {
+      STI.ClearFeatureBitsTransitively(Extension.Features);
+    }
+    FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits());
     setAvailableFeatures(Features);
     return false;
   }
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index d4b2be7d381c3..6948f7af4693f 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1043,9 +1043,9 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
   const FeatureBitset &featureBits =
     ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
 
-  bool hasD16 = featureBits[ARM::FeatureD16];
+  bool hasD32 = featureBits[ARM::FeatureD32];
 
-  if (RegNo > 31 || (hasD16 && RegNo > 15))
+  if (RegNo > 31 || (!hasD32 && RegNo > 15))
     return MCDisassembler::Fail;
 
   unsigned Register = DPRDecoderTable[RegNo];
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 8f9c66507a47a..9502a5d7c393b 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -222,37 +222,37 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
                         ? ARMBuildAttrs::AllowNeonARMv8_1a
                         : ARMBuildAttrs::AllowNeonARMv8);
   } else {
-    if (STI.hasFeature(ARM::FeatureFPARMv8))
+    if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
       // FPU, but there are two different names for it depending on the CPU.
-      emitFPU(STI.hasFeature(ARM::FeatureD16)
-                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
-                                                           : ARM::FK_FPV5_D16)
-                  : ARM::FK_FP_ARMV8);
-    else if (STI.hasFeature(ARM::FeatureVFP4))
-      emitFPU(STI.hasFeature(ARM::FeatureD16)
-                  ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
-                                                           : ARM::FK_VFPV4_D16)
-                  : ARM::FK_VFPV4);
-    else if (STI.hasFeature(ARM::FeatureVFP3))
+      emitFPU(STI.hasFeature(ARM::FeatureD32)
+                  ? ARM::FK_FP_ARMV8
+                  : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+                                                      : ARM::FK_FPV5_SP_D16));
+    else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+      emitFPU(STI.hasFeature(ARM::FeatureD32)
+                  ? ARM::FK_VFPV4
+                  : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16
+                                                      : ARM::FK_FPV4_SP_D16));
+    else if (STI.hasFeature(ARM::FeatureVFP3_D16_SP))
       emitFPU(
-          STI.hasFeature(ARM::FeatureD16)
-              // +d16
-              ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
-                     ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
-                                                         : ARM::FK_VFPV3XD)
-                     : (STI.hasFeature(ARM::FeatureFP16)
+          STI.hasFeature(ARM::FeatureD32)
+              // +d32
+              ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+                                                  : ARM::FK_VFPV3)
+              // -d32
+              : (STI.hasFeature(ARM::FeatureFP64)
+                     ? (STI.hasFeature(ARM::FeatureFP16)
                             ? ARM::FK_VFPV3_D16_FP16
-                            : ARM::FK_VFPV3_D16))
-              // -d16
-              : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
-                                                  : ARM::FK_VFPV3));
-    else if (STI.hasFeature(ARM::FeatureVFP2))
+                            : ARM::FK_VFPV3_D16)
+                     : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+                                                         : ARM::FK_VFPV3XD)));
+    else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP))
       emitFPU(ARM::FK_VFPV2);
   }
 
   // ABI_HardFP_use attribute to indicate single precision FP.
-  if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+  if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64))
     emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
                   ARMBuildAttrs::HardFPSinglePrecision);
 
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
index fd3e20c40c0b0..63137071ed683 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir
@@ -11,7 +11,7 @@
   define void @test_gep_s16() { ret void }
 
   attributes #0 = { "target-features"="+vfp2" }
-  attributes #1 = { "target-features"="-vfp2" }
+  attributes #1 = { "target-features"="-vfp2d16sp" }
 ...
 ---
 name:            test_legal_loads_stores
diff --git a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
index 00c5914b34b8b..c159ca49c4420 100644
--- a/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
+++ b/llvm/test/CodeGen/ARM/arm-storebytesmerge.ll
@@ -337,5 +337,5 @@ define arm_aapcs_vfpcc void @test(i8* %v50) #0 {
   ret void
   }
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="+d16,+dsp,+fp-armv8,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fp-only-sp,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="-d32,+dsp,+fp-armv8,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/llvm/test/CodeGen/ARM/arm32-rounding.ll b/llvm/test/CodeGen/ARM/arm32-rounding.ll
index f247648d814a5..b0a9f54e42404 100644
--- a/llvm/test/CodeGen/ARM/arm32-rounding.ll
+++ b/llvm/test/CodeGen/ARM/arm32-rounding.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 | FileCheck --check-prefix=CHECK --check-prefix=DP %s
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16,+fp-only-sp | FileCheck --check-prefix=SP %s
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16 | FileCheck --check-prefix=DP %s
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,-d32,-fp64 | FileCheck --check-prefix=SP %s
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,-d32 | FileCheck --check-prefix=DP %s
 
 ; CHECK-LABEL: test1
 ; CHECK: vrintm.f32
diff --git a/llvm/test/CodeGen/ARM/build-attributes.ll b/llvm/test/CodeGen/ARM/build-attributes.ll
index 32ffa457388ca..f349530fb4893 100644
--- a/llvm/test/CodeGen/ARM/build-attributes.ll
+++ b/llvm/test/CodeGen/ARM/build-attributes.ll
@@ -33,9 +33,9 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,+d16 | FileCheck %s --check-prefix=CORTEX-A5-NONEON
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A5-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,-d32 | FileCheck %s --check-prefix=CORTEX-A5-NONEON
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2d16sp | FileCheck %s --check-prefix=CORTEX-A5-NOFPU
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2d16sp  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-NOFPU-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A8-SOFT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=soft  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A8-SOFT-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A8-HARD
@@ -50,16 +50,16 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT-FAST
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A12-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2d16sp | FileCheck %s --check-prefix=CORTEX-A12-NOFPU
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2d16sp  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-NOFPU-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A15-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 | FileCheck %s --check-prefix=CORTEX-A17-DEFAULT
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2d16sp | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2d16sp  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
 
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-no-trapping-fp-math | FileCheck %s --check-prefix=NO-TRAPPING-MATH
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -denormal-fp-math=ieee | FileCheck %s --check-prefix=DENORMAL-IEEE
@@ -67,9 +67,9 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -denormal-fp-math=positive-zero | FileCheck %s --check-prefix=DENORMAL-POSITIVE-ZERO
 
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,-d32,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,-fp64,-d32 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,-fp64,-d32,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
 
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -96,10 +96,10 @@
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-HARD-FAST
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-NOFPU-FAST
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE
-; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2d16sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2d16sp  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-NOFPU-FAST
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-fp64  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
 ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
 ; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -mcpu=cortex-m23 | FileCheck %s --check-prefix=CORTEX-M23
@@ -157,12 +157,12 @@
 ; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s  --check-prefix=CORTEX-A7-CHECK
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s  --check-prefix=CORTEX-A7-CHECK-FAST
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon,-fp16 | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon,-fp16  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2d16sp,-vfp3,-vfp4,-neon,-fp16 | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2d16sp,-vfp3,-vfp4,-neon,-fp16  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon  -enable-unsafe-fp-math -frame-pointer=all -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,-d32,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
@@ -230,8 +230,8 @@
 ; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 
 ; ARMv8-R
-; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-vfp2,-fp16 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-NOFPU
-; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-neon,+fp-only-sp,+d16 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-SP
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-vfp2d16sp,-fp16 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-NOFPU
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 -mattr=-neon,-fp64,-d32 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-SP
 ; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=ARMv8R --check-prefix=ARMv8R-NEON
 
 ; ARMv8-M
diff --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll
index e6094cb63a1bf..3e5c79dc63308 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-call.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll
@@ -4,9 +4,9 @@
 ; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-MACHO
 ; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-ELF
 ; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=+long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2d16sp | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2d16sp | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc -fast-isel-sink-local-values < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2d16sp | FileCheck %s --check-prefix=THUMB-NOVFP
 
 ; Note that some of these tests assume that relocations are either
 ; movw/movt or constant pool loads. Different platforms will select
diff --git a/llvm/test/CodeGen/ARM/float-helpers.s b/llvm/test/CodeGen/ARM/float-helpers.s
index 42ab56084d45c..d5388a372b887 100644
--- a/llvm/test/CodeGen/ARM/float-helpers.s
+++ b/llvm/test/CodeGen/ARM/float-helpers.s
@@ -5,8 +5,8 @@
 ; RUN: llc -asm-verbose=false -mattr=+vfp3 -meabi=gnu -mtriple=arm-eabi < %s | FileCheck %s -check-prefix=CHECK-SOFTFP
 ; RUN: llc -asm-verbose=false -mattr=+vfp3 -float-abi=hard -mtriple=arm-eabi < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-DP
 ; RUN: llc -asm-verbose=false -mattr=+vfp3 -float-abi=hard -meabi=gnu -mtriple=arm-eabi < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-DP
-; RUN: llc -asm-verbose=false -mattr=+vfp3,+fp-only-sp -float-abi=hard -mtriple=arm-eabi < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-SPONLY
-; RUN: llc -asm-verbose=false -mattr=+vfp3,+fp-only-sp -float-abi=hard -mtriple=arm-eabi -meabi=gnu < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-SPONLY
+; RUN: llc -asm-verbose=false -mattr=+vfp3,-fp64 -float-abi=hard -mtriple=arm-eabi < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-SPONLY
+; RUN: llc -asm-verbose=false -mattr=+vfp3,-fp64 -float-abi=hard -mtriple=arm-eabi -meabi=gnu < %s | FileCheck %s -check-prefix=CHECK-HARDFP-SP -check-prefix=CHECK-HARDFP-SPONLY
 
 ; The Runtime ABI for the ARM Architecture IHI0043 section 4.1.2 The
 ; floating-point helper functions to always use the base AAPCS (soft-float)
diff --git a/llvm/test/CodeGen/ARM/fp-only-sp.ll b/llvm/test/CodeGen/ARM/fp-only-sp.ll
index 2c7b2acbde9c5..ebfa41a8294de 100644
--- a/llvm/test/CodeGen/ARM/fp-only-sp.ll
+++ b/llvm/test/CodeGen/ARM/fp-only-sp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - -O0 | FileCheck %s
 ; RUN: llc -mtriple=thumbv7em-apple-macho -mcpu=cortex-m4 %s -o - | FileCheck %s
 
-; Note: vldr and vstr really do have 64-bit variants even with fp-only-sp
+; Note: vldr and vstr really do have 64-bit variants even with -fp64
 define void @test_load_store(double* %addr) {
 ; CHECK-LABEL: test_load_store:
 ; CHECK: vldr [[TMP:d[0-9]+]], [r0]
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index 514d3c7ae0a81..e27631ce16429 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -5,28 +5,28 @@
 ; SOFTFP:
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
-; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
 
 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
 ; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
-; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
+; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
 
 ; Test fast-isel
-; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
-; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
+; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16,+fp64 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
 
 ; HARD:
 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
-; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
 
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,fp64  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
 
 ; FP-CONTRACT=FAST
-; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16,+fp64 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
 
 ; TODO: we can't pass half-precision arguments as "half" types yet. We do
 ; that for the time being by passing "float %f.coerce" and the necessary
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index d7eaddc9e408a..855f8d55dcb87 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FP16  --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
 ; RUN: llc -asm-verbose=false < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
-; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=-vfp2d16sp | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "armv7---eabihf"
diff --git a/llvm/test/CodeGen/ARM/fpconv.ll b/llvm/test/CodeGen/ARM/fpconv.ll
index 8d740d88fc4ac..929da5f18c813 100644
--- a/llvm/test/CodeGen/ARM/fpconv.ll
+++ b/llvm/test/CodeGen/ARM/fpconv.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - | FileCheck %s --check-prefix=CHECK-VFP
 ; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s
 ; RUN: llc -mtriple=armv8r-none-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-VFP
-; RUN: llc -mtriple=armv8r-none-none-eabi -mattr=+fp-only-sp %s -o - | FileCheck %s --check-prefix=CHECK-VFP-SP
+; RUN: llc -mtriple=armv8r-none-none-eabi -mattr=-fp64 %s -o - | FileCheck %s --check-prefix=CHECK-VFP-SP
 
 define float @f1(double %x) {
 ;CHECK-VFP-LABEL: f1:
diff --git a/llvm/test/CodeGen/ARM/half.ll b/llvm/test/CodeGen/ARM/half.ll
index a334adc379168..6759a0576e75c 100644
--- a/llvm/test/CodeGen/ARM/half.ll
+++ b/llvm/test/CodeGen/ARM/half.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-F16
 ; RUN: llc < %s -mtriple=thumbv8-apple-ios7.0 | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8
 ; RUN: llc < %s -mtriple=armv8r-none-none-eabi | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8
-; RUN: llc < %s -mtriple=armv8r-none-none-eabi -mattr=+fp-only-sp | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8-SP
+; RUN: llc < %s -mtriple=armv8r-none-none-eabi -mattr=-fp64 | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8-SP
 
 define void @test_load_store(half* %in, half* %out) {
 ; CHECK-LABEL: test_load_store:
diff --git a/llvm/test/CodeGen/ARM/inlineasm-X-allocation.ll b/llvm/test/CodeGen/ARM/inlineasm-X-allocation.ll
index b2cb932f90552..ff8dba6e38c01 100644
--- a/llvm/test/CodeGen/ARM/inlineasm-X-allocation.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm-X-allocation.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=armv7-none-eabi -mattr=-neon,-vfp2 %s -o - | FileCheck %s  -check-prefixes=COMMON,NOVFP
+; RUN: llc -mtriple=armv7-none-eabi -mattr=-neon,-vfp2d16sp %s -o - | FileCheck %s  -check-prefixes=COMMON,NOVFP
 ; RUN: llc -mtriple=armv7-none-eabi -mattr=+neon %s -float-abi=hard -o - | FileCheck %s -check-prefixes=COMMON,VFP
 
 ; The intent here is to test "X", which says that any operand whatsoever is allowed.
diff --git a/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll b/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll
index 7b98f0f0de314..8ae9f704fb9f4 100644
--- a/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll
@@ -134,7 +134,7 @@ define arm_aapcscc double @dbl_gprs_matching_spec_reg_in_op_soft(double %d1, dou
   ret double %add
 }
 
-attributes #0 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="true" }
+attributes #0 = { nounwind "target-features"="-d32,+vfp2,+vfp3" "use-soft-float"="true" }
 
 
 ; Check support for returning a float in GPR with hard float ABI
@@ -304,4 +304,4 @@ define %struct.twodouble @dbl_gprs_matching_spec_reg_in_op_hard(double %d1, doub
   ret %struct.twodouble %res
 }
 
-attributes #1 = { nounwind "target-features"="+d16,+vfp2,+vfp3,-fp-only-sp" "use-soft-float"="false" }
+attributes #1 = { nounwind "target-features"="-d32,+vfp2,+vfp3" "use-soft-float"="false" }
diff --git a/llvm/test/CodeGen/ARM/no-fpu.ll b/llvm/test/CodeGen/ARM/no-fpu.ll
index c5d1f1951d7c1..13da7190a9fdf 100644
--- a/llvm/test/CodeGen/ARM/no-fpu.ll
+++ b/llvm/test/CodeGen/ARM/no-fpu.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-vfp2 | FileCheck --check-prefix=NONEON-NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,-vfp2d16sp | FileCheck --check-prefix=NONEON-NOVFP %s
 ; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon | FileCheck --check-prefix=NONEON %s
-; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-vfp2 | FileCheck --check-prefix=NOVFP %s
+; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-vfp2d16sp | FileCheck --check-prefix=NOVFP %s
 ; RUN: llc < %s -mtriple=armv7-none-gnueabi -mattr=-neon,+vfp2 | FileCheck --check-prefix=NONEON-VFP %s
 
 ; Check no NEON instructions are selected when feature is disabled.
diff --git a/llvm/test/CodeGen/Thumb2/aapcs.ll b/llvm/test/CodeGen/Thumb2/aapcs.ll
index 179c35c052a4f..651b9945825c7 100644
--- a/llvm/test/CodeGen/Thumb2/aapcs.ll
+++ b/llvm/test/CodeGen/Thumb2/aapcs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m4 -mattr=-vfp2             | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3             | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
 
 define float @float_in_reg(float %a, float %b) {
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 657d1b172da98..05d303adb5565 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8
 
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
index 8ee2af03eca7d..ec81164b422b1 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m33                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8  -check-prefix=VMLA
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 -check-prefix=VMLA
 
diff --git a/llvm/test/CodeGen/Thumb2/t2sizereduction.mir b/llvm/test/CodeGen/Thumb2/t2sizereduction.mir
index 6b05f7f42a5d2..aa92b19dbdcb6 100644
--- a/llvm/test/CodeGen/Thumb2/t2sizereduction.mir
+++ b/llvm/test/CodeGen/Thumb2/t2sizereduction.mir
@@ -29,7 +29,7 @@
     br i1 %exitcond, label %for.cond.cleanup, label %for.body
   }
 
-  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="+d16,+dsp,+fp-armv8,+fp-only-sp,+hwdiv,+strict-align,+thumb-mode,-crc,-dotprod,-hwdiv-arm,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="-d32,+dsp,+fp-armv8,-fp64,+hwdiv,+strict-align,+thumb-mode,-crc,-dotprod,-hwdiv-arm,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 ...
 ---
diff --git a/llvm/test/MC/ARM/armv8.3a-js.s b/llvm/test/MC/ARM/armv8.3a-js.s
index fbbdd981864ae..9a2ab9cfe5285 100644
--- a/llvm/test/MC/ARM/armv8.3a-js.s
+++ b/llvm/test/MC/ARM/armv8.3a-js.s
@@ -1,7 +1,7 @@
 // RUN:     llvm-mc -triple   arm-none-none-eabi -show-encoding -mattr=+v8.3a,+fp-armv8 < %s 2>&1 | FileCheck %s --check-prefix=ARM
 // RUN:     llvm-mc -triple thumb-none-none-eabi -show-encoding -mattr=+v8.3a,+fp-armv8 < %s 2>&1 | FileCheck %s --check-prefix=THUMB
 // RUN: not llvm-mc -triple   arm-none-none-eabi -show-encoding -mattr=+v8.2a,+fp-armv8 < %s 2>&1 | FileCheck --check-prefix=REQ-V83 %s
-// RUN: not llvm-mc -triple   arm-none-none-eabi -show-encoding -mattr=+v8.3a,-fp-armv8 < %s 2>&1 | FileCheck --check-prefix=REQ-FP %s
+// RUN: not llvm-mc -triple   arm-none-none-eabi -show-encoding -mattr=+v8.3a,-fp-armv8d16fp < %s 2>&1 | FileCheck --check-prefix=REQ-FP %s
 
   vjcvt.s32.f64 s1, d2
 // ARM: vjcvt.s32.f64 s1, d2    @ encoding: [0xc2,0x0b,0xf9,0xee]
@@ -13,4 +13,4 @@
 // ARM: vjcvt.s32.f64 s17, d18    @ encoding: [0xe2,0x8b,0xf9,0xee]
 // THUMB: vjcvt.s32.f64 s17, d18    @ encoding: [0xf9,0xee,0xe2,0x8b]
 // REQ-V83: error: instruction requires: armv8.3a
-// REQ-FP: error: instruction requires: FPARMv8
+// REQ-FP: error: invalid instruction
diff --git a/llvm/test/MC/ARM/d16.s b/llvm/test/MC/ARM/d16.s
index 648992e9a7b90..67b5095a13282 100644
--- a/llvm/test/MC/ARM/d16.s
+++ b/llvm/test/MC/ARM/d16.s
@@ -1,5 +1,5 @@
-@ RUN:     llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,-d16 2>&1 | FileCheck %s --check-prefix=D32
-@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,+d16 2>&1 | FileCheck %s --check-prefix=D16
+@ RUN:     llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,+d32 2>&1 | FileCheck %s --check-prefix=D32
+@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,-d32 2>&1 | FileCheck %s --check-prefix=D16
 
 @ D32-NOT: error:
 
diff --git a/llvm/test/MC/ARM/invalid-neon-v8.s b/llvm/test/MC/ARM/invalid-neon-v8.s
index cae1fb331cf50..ff0873995956f 100644
--- a/llvm/test/MC/ARM/invalid-neon-v8.s
+++ b/llvm/test/MC/ARM/invalid-neon-v8.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc -triple armv8 -mattr=-fp-armv8 -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple armv8 -mattr=-fp-armv8d16sp -show-encoding < %s 2>&1 | FileCheck %s
 
 vmaxnm.f32 s4, d5, q1
 @ CHECK: error: invalid instruction
diff --git a/llvm/test/MC/ARM/single-precision-fp.s b/llvm/test/MC/ARM/single-precision-fp.s
index f658e712319be..9de4b10183737 100644
--- a/llvm/test/MC/ARM/single-precision-fp.s
+++ b/llvm/test/MC/ARM/single-precision-fp.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=+fp-only-sp,-neon 2> %t > %t2
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-neon 2> %t > %t2
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t2
 
diff --git a/llvm/test/MC/ARM/vldm-vstm-diags.s b/llvm/test/MC/ARM/vldm-vstm-diags.s
index 854d5c55f2aa8..acefa71ccaf3f 100644
--- a/llvm/test/MC/ARM/vldm-vstm-diags.s
+++ b/llvm/test/MC/ARM/vldm-vstm-diags.s
@@ -1,5 +1,5 @@
 @ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null             %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-D32
-@ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null -mattr=+d16 %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-D16
+@ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null -mattr=-d32 %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-D16
 
   // First operand must be a GPR
   vldm s0, {s1, s2}
diff --git a/llvm/test/Transforms/Inline/ARM/inline-fp.ll b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
index be3dd2a93fd6b..fdc066c9ba685 100644
--- a/llvm/test/Transforms/Inline/ARM/inline-fp.ll
+++ b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP
 ; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP
-; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP
+; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,-fp64 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP
 ; Make sure that soft float implementations are calculated as being more expensive
 ; to the inliner.
 
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
index dc6adfb8e2496..7bbfb7ef17ade 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll
@@ -66,7 +66,7 @@ lee1.exit:                                        ; preds = %lee1.exit.loopexit,
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-fp-armv8,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}

From 800db530d9fa1ed03a4facbb9e058413f4eca42c Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 28 May 2019 16:28:27 +0000
Subject: [PATCH 0376/1176] [clangd] Fix test output for r361841

llvm-svn: 361846
---
 clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 4dcb8701a3594..a7aa16569bfe6 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -2397,7 +2397,7 @@ TEST(CompletionTest, CursorInSnippets) {
   EXPECT_THAT(
       Results.Completions,
       Contains(AllOf(Named("while"),
-                     SnippetSuffix("(${1:condition}){${0:statements}\n}"))));
+                     SnippetSuffix("(${1:condition}){\n${0:statements}\n}"))));
   // However, snippets for functions must *not* end with $0.
   EXPECT_THAT(Results.Completions,
               Contains(AllOf(Named("while_foo"),

From 7166843f1e10efbdd3a24fccb15ad33bfb6f0f70 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 28 May 2019 16:29:39 +0000
Subject: [PATCH 0377/1176] [AMDGPU] Fix the mis-handling of `vreg_1` copied
 from scalar register.

Summary:
- Don't treat the use of a scalar register as `vreg_1` an VGPR usage.
  Otherwise, that promotes that scalar register into vector one, which
  breaks the assumption that scalar register holds the lane mask.
- The issue is triggered in a complicated case, where if the uses of
  that (lane mask) scalar register is legalized firstly before its
  definition, e.g., due to the mismatch block placement and its
  topological order or loop. In that cases, the legalization of PHI
  introduces the use of that scalar register as `vreg_1`.

Reviewers: rampitec, nhaehnle, arsenm, alex-t

Subscribers: kzhuravl, jvesely, wdng, dstuttard, tpr, t-tye, hiraditya, llvm-commits, yaxunl

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62492

llvm-svn: 361847
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp   |  6 ++-
 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir | 44 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index d20910baed307..fb151b4ffdc15 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -588,7 +588,9 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
             }
 
             if (UseMI->isPHI()) {
-              if (!TRI->isSGPRReg(MRI, Use.getReg()))
+              const TargetRegisterClass *UseRC = MRI.getRegClass(Use.getReg());
+              if (!TRI->isSGPRReg(MRI, Use.getReg()) &&
+                  UseRC != &AMDGPU::VReg_1RegClass)
                 hasVGPRUses++;
               continue;
             }
@@ -633,8 +635,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 
         if ((!TRI->isVGPR(MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
             (hasVGPRInput || hasVGPRUses > 1)) {
+          LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
           TII->moveToVALU(MI);
         } else {
+          LLVM_DEBUG(dbgs() << "Legalizing PHI: " << MI);
           TII->legalizeOperands(MI, MDT);
         }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
index 3d6e05cb2c9b1..306e62a430920 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
@@ -16,3 +16,47 @@ body:               |
     %6:sreg_32 = S_ADD_I32 %2:sreg_32, %5:sreg_32, implicit-def $scc
     %7:sreg_32 = S_ADDC_U32 %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $scc
 ...
+
+# Test to ensure i1 phi copies from scalar registers through another phi won't
+# be promoted into vector ones.
+# GCN-LABEL: name: fix-sgpr-i1-phi-copies
+# GCN: .8:
+# GCN-NOT: vreg_64 = PHI
+---
+name: fix-sgpr-i1-phi-copies
+tracksRegLiveness: true
+body:               |
+  bb.9:
+    S_BRANCH %bb.0
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.6, implicit undef $scc
+
+  bb.5:
+    %3:vreg_1 = IMPLICIT_DEF
+
+  bb.6:
+    %4:vreg_1 = PHI %2:sreg_64, %bb.4, %3:vreg_1, %bb.5
+
+  bb.7:
+    %5:vreg_1 = PHI %2:sreg_64, %bb.3, %4:vreg_1, %bb.6
+    S_BRANCH %bb.8
+
+  bb.0:
+    S_CBRANCH_SCC1 %bb.2, implicit undef $scc
+
+  bb.1:
+    %0:sreg_64 = S_MOV_B64 0
+    S_BRANCH %bb.3
+
+  bb.2:
+    %1:sreg_64 = S_MOV_B64 -1
+    S_BRANCH %bb.3
+
+  bb.3:
+    %2:sreg_64 = PHI %0:sreg_64, %bb.1, %1:sreg_64, %bb.2
+    S_CBRANCH_SCC1 %bb.7, implicit undef $scc
+    S_BRANCH %bb.4
+
+  bb.8:
+...

From 24e80b8d042a1bcf8a3dd6aeb6275c697f83c659 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 28 May 2019 16:46:02 +0000
Subject: [PATCH 0378/1176] AMDGPU: Don't enable all lanes with non-CSR VGPR
 spills

If the only VGPRs used for SGPR spilling were not CSRs, this was
enabling all laness and immediately restoring exec. This is the usual
situation in leaf functions.

llvm-svn: 361848
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    | 88 +++++++++++--------
 .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 16 ++++
 2 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 1eea77be6200e..e333154f83bfd 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -613,30 +613,36 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    if (LiveRegs.empty()) {
-      LiveRegs.init(TRI);
-      LiveRegs.addLiveIns(MBB);
-    }
+  // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+  // turn on all lanes before doing the spill to memory.
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
 
-    // To avoid clobbering VGPRs in lanes that weren't active on function entry,
-    // turn on all lanes before doing the spill to memory.
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
-                               Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                               &TII->getRegisterInfo());
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
+
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      if (LiveRegs.empty()) {
+        LiveRegs.init(TRI);
+        LiveRegs.addLiveIns(MBB);
+      }
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
+              ScratchExecCopy)
+        .addImm(-1);
     }
 
+    TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
+                             Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                             &TII->getRegisterInfo());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);
@@ -654,27 +660,31 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc DL;
 
-  if (!FuncInfo->getSGPRSpillVGPRs().empty()) {
-    // See emitPrologue
-    LivePhysRegs LiveRegs(*ST.getRegisterInfo());
-    LiveRegs.addLiveIns(MBB);
+  unsigned ScratchExecCopy = AMDGPU::NoRegister;
+  for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+         : FuncInfo->getSGPRSpillVGPRs()) {
+    if (!Reg.FI.hasValue())
+      continue;
 
-    unsigned ScratchExecCopy
-      = findScratchNonCalleeSaveRegister(MF, LiveRegs,
-                                         AMDGPU::SReg_64_XEXECRegClass);
-
-    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
-      .addImm(-1);
-
-    for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
-           : FuncInfo->getSGPRSpillVGPRs()) {
-      if (!Reg.FI.hasValue())
-        continue;
-      TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
-                                Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
-                                &TII->getRegisterInfo());
+    if (ScratchExecCopy == AMDGPU::NoRegister) {
+      // See emitPrologue
+      LivePhysRegs LiveRegs(*ST.getRegisterInfo());
+      LiveRegs.addLiveIns(MBB);
+
+      ScratchExecCopy
+        = findScratchNonCalleeSaveRegister(MF, LiveRegs,
+                                           AMDGPU::SReg_64_XEXECRegClass);
+
+      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy)
+        .addImm(-1);
     }
 
+    TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
+                              Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
+                              &TII->getRegisterInfo());
+  }
+
+  if (ScratchExecCopy != AMDGPU::NoRegister) {
     // FIXME: Split block and make terminator.
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
       .addReg(ScratchExecCopy);
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index ebd6f96a5b836..bc9160772e2cb 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -135,5 +135,21 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
   ret void
 }
 
+; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
+; enable all lanes and restore.
+
+; GCN-LABEL: {{^}}spill_only_csr_sgpr:
+; GCN: s_waitcnt
+; GCN-NEXT: v_writelane_b32 v0, s42, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ; clobber s42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s42, v0, 0
+; GCN-NEXT: s_setpc_b64
+define void @spill_only_csr_sgpr() {
+  call void asm sideeffect "; clobber s42", "~{s42}"()
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "no-frame-pointer-elim"="true" }

From 04a087ace786b81711a1eaf8f7f092bb31d6dd25 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 28 May 2019 17:34:05 +0000
Subject: [PATCH 0379/1176] [DWARFExpression] Remove ctor that takes just a
 compile unit.

Like many of our DWARF classes, the DWARFExpression can be initialized
in several ways. One such way was through a constructor that takes just
the compile unit. This constructor is used to initialize both empty
DWARFExpressions, and DWARFExpression that will be populated later.

To make the distinction more clear, I changed the constructor to a
default constructor and updated its call sites. Where the
DWARFExpression was being populated later, I replaced that with a call
to the copy assignment constructor.

Differential revision: https://reviews.llvm.org/D62425

llvm-svn: 361849
---
 .../include/lldb/Expression/DWARFExpression.h | 104 +++++-------------
 lldb/source/Expression/DWARFExpression.cpp    |  61 +++-------
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  |   4 +-
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  |   9 +-
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  35 +++---
 .../NativePDB/DWARFLocationExpression.cpp     |   4 +-
 .../PDB/PDBLocationToDWARFExpression.cpp      |  14 +--
 lldb/source/Symbol/Function.cpp               |   2 +-
 8 files changed, 73 insertions(+), 160 deletions(-)

diff --git a/lldb/include/lldb/Expression/DWARFExpression.h b/lldb/include/lldb/Expression/DWARFExpression.h
index 0a7d919cc688a..21830a5628004 100644
--- a/lldb/include/lldb/Expression/DWARFExpression.h
+++ b/lldb/include/lldb/Expression/DWARFExpression.h
@@ -43,8 +43,7 @@ class DWARFExpression {
                             // (.debug_loclists/.debug_loclists.dwo).
   };
 
-  /// Constructor
-  explicit DWARFExpression(DWARFUnit *dwarf_cu);
+  DWARFExpression();
 
   /// Constructor
   ///
@@ -58,7 +57,7 @@ class DWARFExpression {
   /// \param[in] data_length
   ///     The byte length of the location expression.
   DWARFExpression(lldb::ModuleSP module, const DataExtractor &data,
-                  DWARFUnit *dwarf_cu, lldb::offset_t data_offset,
+                  const DWARFUnit *dwarf_cu, lldb::offset_t data_offset,
                   lldb::offset_t data_length);
 
   /// Destructor
@@ -132,6 +131,9 @@ class DWARFExpression {
 
   bool Update_DW_OP_addr(lldb::addr_t file_addr);
 
+  void UpdateValue(uint64_t const_value, lldb::offset_t const_value_byte_size,
+                   uint8_t addr_byte_size);
+
   void SetModule(const lldb::ModuleSP &module) { m_module_wp = module; }
 
   bool ContainsThreadLocalStorage() const;
@@ -141,66 +143,6 @@ class DWARFExpression {
       std::function<lldb::addr_t(lldb::addr_t file_addr)> const
           &link_address_callback);
 
-  /// Make the expression parser read its location information from a given
-  /// data source.  Does not change the offset and length
-  ///
-  /// \param[in] data
-  ///     A data extractor configured to read the DWARF location expression's
-  ///     bytecode.
-  void SetOpcodeData(const DataExtractor &data);
-
-  /// Make the expression parser read its location information from a given
-  /// data source
-  ///
-  /// \param[in] module_sp
-  ///     The module that defines the DWARF expression.
-  ///
-  /// \param[in] data
-  ///     A data extractor configured to read the DWARF location expression's
-  ///     bytecode.
-  ///
-  /// \param[in] data_offset
-  ///     The offset of the location expression in the extractor.
-  ///
-  /// \param[in] data_length
-  ///     The byte length of the location expression.
-  void SetOpcodeData(lldb::ModuleSP module_sp, const DataExtractor &data,
-                     lldb::offset_t data_offset, lldb::offset_t data_length);
-
-  /// Copy the DWARF location expression into a local buffer.
-  ///
-  /// It is a good idea to copy the data so we don't keep the entire object
-  /// file worth of data around just for a few bytes of location expression.
-  /// LLDB typically will mmap the entire contents of debug information files,
-  /// and if we use SetOpcodeData, it will get a shared reference to all of
-  /// this data for the and cause the object file to have to stay around. Even
-  /// worse, a very very large ".a" that contains one or more .o files could
-  /// end up being referenced. Location lists are typically small so even
-  /// though we are copying the data, it shouldn't amount to that much for the
-  /// variables we end up parsing.
-  ///
-  /// \param[in] module_sp
-  ///     The module that defines the DWARF expression.
-  ///
-  /// \param[in] data
-  ///     A data extractor configured to read and copy the DWARF
-  ///     location expression's bytecode.
-  ///
-  /// \param[in] data_offset
-  ///     The offset of the location expression in the extractor.
-  ///
-  /// \param[in] data_length
-  ///     The byte length of the location expression.
-  void CopyOpcodeData(lldb::ModuleSP module_sp, const DataExtractor &data,
-                      lldb::offset_t data_offset, lldb::offset_t data_length);
-
-  void CopyOpcodeData(const void *data, lldb::offset_t data_length,
-                      lldb::ByteOrder byte_order, uint8_t addr_byte_size);
-
-  void CopyOpcodeData(uint64_t const_value,
-                      lldb::offset_t const_value_byte_size,
-                      uint8_t addr_byte_size);
-
   /// Tells the expression that it refers to a location list.
   ///
   /// \param[in] slide
@@ -294,7 +236,7 @@ class DWARFExpression {
   ///     details of the failure are provided through it.
   static bool Evaluate(ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
                        lldb::ModuleSP opcode_ctx, const DataExtractor &opcodes,
-                       DWARFUnit *dwarf_cu, const lldb::offset_t offset,
+                       const DWARFUnit *dwarf_cu, const lldb::offset_t offset,
                        const lldb::offset_t length,
                        const lldb::RegisterKind reg_set,
                        const Value *initial_value_ptr,
@@ -324,7 +266,7 @@ class DWARFExpression {
 
   bool MatchesOperand(StackFrame &frame, const Instruction::Operand &op);
 
-protected:
+private:
   /// Pretty-prints the location expression to a stream
   ///
   /// \param[in] stream
@@ -355,20 +297,24 @@ class DWARFExpression {
   bool GetOpAndEndOffsets(StackFrame &frame, lldb::offset_t &op_offset,
                           lldb::offset_t &end_offset);
 
-  /// Classes that inherit from DWARFExpression can see and modify these
-
-  lldb::ModuleWP m_module_wp; ///< Module which defined this expression.
-  DataExtractor m_data; ///< A data extractor capable of reading opcode bytes
-  DWARFUnit *m_dwarf_cu; ///< The DWARF compile unit this expression
-                                ///belongs to. It is used
-  ///< to evaluate values indexing into the .debug_addr section (e.g.
-  ///< DW_OP_GNU_addr_index, DW_OP_GNU_const_index)
-  lldb::RegisterKind
-      m_reg_kind; ///< One of the defines that starts with LLDB_REGKIND_
-  lldb::addr_t m_loclist_slide; ///< A value used to slide the location list
-                                ///offsets so that
-  ///< they are relative to the object that owns the location list
-  ///< (the function for frame base and variable location lists)
+  /// Module which defined this expression.
+  lldb::ModuleWP m_module_wp;
+
+  /// A data extractor capable of reading opcode bytes
+  DataExtractor m_data;
+
+  /// The DWARF compile unit this expression belongs to. It is used to evaluate
+  /// values indexing into the .debug_addr section (e.g. DW_OP_GNU_addr_index,
+  /// DW_OP_GNU_const_index)
+  const DWARFUnit *m_dwarf_cu;
+
+  /// One of the defines that starts with LLDB_REGKIND_
+  lldb::RegisterKind m_reg_kind;
+
+  /// A value used to slide the location list offsets so that m_c they are
+  /// relative to the object that owns the location list (the function for
+  /// frame base and variable location lists)
+  lldb::addr_t m_loclist_slide;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp
index d6fa41bfb74b7..50750c430332a 100644
--- a/lldb/source/Expression/DWARFExpression.cpp
+++ b/lldb/source/Expression/DWARFExpression.cpp
@@ -53,13 +53,13 @@ ReadAddressFromDebugAddrSection(const DWARFUnit *dwarf_cu,
 }
 
 // DWARFExpression constructor
-DWARFExpression::DWARFExpression(DWARFUnit *dwarf_cu)
-    : m_module_wp(), m_data(), m_dwarf_cu(dwarf_cu),
+DWARFExpression::DWARFExpression()
+    : m_module_wp(), m_data(), m_dwarf_cu(nullptr),
       m_reg_kind(eRegisterKindDWARF), m_loclist_slide(LLDB_INVALID_ADDRESS) {}
 
 DWARFExpression::DWARFExpression(lldb::ModuleSP module_sp,
                                  const DataExtractor &data,
-                                 DWARFUnit *dwarf_cu,
+                                 const DWARFUnit *dwarf_cu,
                                  lldb::offset_t data_offset,
                                  lldb::offset_t data_length)
     : m_module_wp(), m_data(data, data_offset, data_length),
@@ -74,51 +74,16 @@ DWARFExpression::~DWARFExpression() {}
 
 bool DWARFExpression::IsValid() const { return m_data.GetByteSize() > 0; }
 
-void DWARFExpression::SetOpcodeData(const DataExtractor &data) {
-  m_data = data;
-}
-
-void DWARFExpression::CopyOpcodeData(lldb::ModuleSP module_sp,
-                                     const DataExtractor &data,
-                                     lldb::offset_t data_offset,
-                                     lldb::offset_t data_length) {
-  const uint8_t *bytes = data.PeekData(data_offset, data_length);
-  if (bytes) {
-    m_module_wp = module_sp;
-    m_data.SetData(DataBufferSP(new DataBufferHeap(bytes, data_length)));
-    m_data.SetByteOrder(data.GetByteOrder());
-    m_data.SetAddressByteSize(data.GetAddressByteSize());
-  }
-}
-
-void DWARFExpression::CopyOpcodeData(const void *data,
-                                     lldb::offset_t data_length,
-                                     ByteOrder byte_order,
-                                     uint8_t addr_byte_size) {
-  if (data && data_length) {
-    m_data.SetData(DataBufferSP(new DataBufferHeap(data, data_length)));
-    m_data.SetByteOrder(byte_order);
-    m_data.SetAddressByteSize(addr_byte_size);
-  }
-}
-
-void DWARFExpression::CopyOpcodeData(uint64_t const_value,
-                                     lldb::offset_t const_value_byte_size,
-                                     uint8_t addr_byte_size) {
-  if (const_value_byte_size) {
-    m_data.SetData(
-        DataBufferSP(new DataBufferHeap(&const_value, const_value_byte_size)));
-    m_data.SetByteOrder(endian::InlHostByteOrder());
-    m_data.SetAddressByteSize(addr_byte_size);
-  }
-}
+void DWARFExpression::UpdateValue(uint64_t const_value,
+                                  lldb::offset_t const_value_byte_size,
+                                  uint8_t addr_byte_size) {
+  if (!const_value_byte_size)
+    return;
 
-void DWARFExpression::SetOpcodeData(lldb::ModuleSP module_sp,
-                                    const DataExtractor &data,
-                                    lldb::offset_t data_offset,
-                                    lldb::offset_t data_length) {
-  m_module_wp = module_sp;
-  m_data.SetData(data, data_offset, data_length);
+  m_data.SetData(
+      DataBufferSP(new DataBufferHeap(&const_value, const_value_byte_size)));
+  m_data.SetByteOrder(endian::InlHostByteOrder());
+  m_data.SetAddressByteSize(addr_byte_size);
 }
 
 void DWARFExpression::DumpLocation(Stream *s, lldb::offset_t offset,
@@ -1191,7 +1156,7 @@ bool DWARFExpression::Evaluate(ExecutionContext *exe_ctx,
 bool DWARFExpression::Evaluate(
     ExecutionContext *exe_ctx, RegisterContext *reg_ctx,
     lldb::ModuleSP module_sp, const DataExtractor &opcodes,
-    DWARFUnit *dwarf_cu, const lldb::offset_t opcodes_offset,
+    const DWARFUnit *dwarf_cu, const lldb::offset_t opcodes_offset,
     const lldb::offset_t opcodes_length, const lldb::RegisterKind reg_kind,
     const Value *initial_value_ptr, const Value *object_address_ptr,
     Value &result, Status *error_ptr) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 1e7cc0468f8bb..b1e826582a833 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2553,7 +2553,7 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
   int call_file = 0;
   int call_line = 0;
   int call_column = 0;
-  DWARFExpression frame_base(die.GetCU());
+  DWARFExpression frame_base;
 
   const dw_tag_t tag = die.Tag();
 
@@ -2692,7 +2692,6 @@ bool DWARFASTParserClang::ParseChildMembers(
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
         Declaration decl;
-        // DWARFExpression location;
         const char *name = nullptr;
         const char *prop_name = nullptr;
         const char *prop_getter_name = nullptr;
@@ -3172,7 +3171,6 @@ bool DWARFASTParserClang::ParseChildMembers(
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
         Declaration decl;
-        DWARFExpression location(die.GetCU());
         DWARFFormValue encoding_form;
         AccessType accessibility = default_accessibility;
         bool is_virtual = false;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index 87d1a4be2e902..b0412f92c1c29 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -504,8 +504,8 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
               uint32_t block_offset =
                   form_value.BlockData() - debug_info_data.GetDataStart();
               uint32_t block_length = form_value.Unsigned();
-              frame_base->SetOpcodeData(module, debug_info_data, block_offset,
-                                        block_length);
+              *frame_base = DWARFExpression(module, debug_info_data, cu,
+                                            block_offset, block_length);
             } else {
               const DWARFDataExtractor &debug_loc_data =
                   dwarf2Data->DebugLocData();
@@ -514,8 +514,9 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
               size_t loc_list_length = DWARFExpression::LocationListSize(
                   cu, debug_loc_data, debug_loc_offset);
               if (loc_list_length > 0) {
-                frame_base->SetOpcodeData(module, debug_loc_data,
-                                          debug_loc_offset, loc_list_length);
+                *frame_base =
+                    DWARFExpression(module, debug_loc_data, cu,
+                                    debug_loc_offset, loc_list_length);
                 if (lo_pc != LLDB_INVALID_ADDRESS) {
                   assert(lo_pc >= cu->GetBaseAddress());
                   frame_base->SetLocationListSlide(lo_pc -
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 463af1af9a1cf..8836de4ac5ee1 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3102,7 +3102,7 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
       Declaration decl;
       uint32_t i;
       DWARFFormValue type_die_form;
-      DWARFExpression location(die.GetCU());
+      DWARFExpression location;
       bool is_external = false;
       bool is_artificial = false;
       bool location_is_const_value_data = false;
@@ -3153,14 +3153,15 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
                 uint32_t block_offset =
                     form_value.BlockData() - debug_info_data.GetDataStart();
                 uint32_t block_length = form_value.Unsigned();
-                location.CopyOpcodeData(module, debug_info_data, block_offset,
-                                        block_length);
+                location = DWARFExpression(module, debug_info_data, die.GetCU(),
+                                           block_offset, block_length);
               } else if (DWARFFormValue::IsDataForm(form_value.Form())) {
                 // Retrieve the value as a data expression.
                 uint32_t data_offset = attributes.DIEOffsetAtIndex(i);
                 if (auto data_length = form_value.GetFixedSize())
-                  location.CopyOpcodeData(module, debug_info_data, data_offset,
-                                          *data_length);
+                  location =
+                      DWARFExpression(module, debug_info_data, die.GetCU(),
+                                      data_offset, *data_length);
                 else {
                   const uint8_t *data_pointer = form_value.BlockData();
                   if (data_pointer) {
@@ -3176,15 +3177,17 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
                 if (form_value.Form() == DW_FORM_strp) {
                   uint32_t data_offset = attributes.DIEOffsetAtIndex(i);
                   if (auto data_length = form_value.GetFixedSize())
-                    location.CopyOpcodeData(module, debug_info_data,
-                                            data_offset, *data_length);
+                    location =
+                        DWARFExpression(module, debug_info_data, die.GetCU(),
+                                        data_offset, *data_length);
                 } else {
                   const char *str = form_value.AsCString();
                   uint32_t string_offset =
                       str - (const char *)debug_info_data.GetDataStart();
                   uint32_t string_length = strlen(str) + 1;
-                  location.CopyOpcodeData(module, debug_info_data,
-                                          string_offset, string_length);
+                  location =
+                      DWARFExpression(module, debug_info_data, die.GetCU(),
+                                      string_offset, string_length);
                 }
               }
             }
@@ -3198,7 +3201,8 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
               uint32_t block_offset =
                   form_value.BlockData() - data.GetDataStart();
               uint32_t block_length = form_value.Unsigned();
-              location.CopyOpcodeData(module, data, block_offset, block_length);
+              location = DWARFExpression(module, data, die.GetCU(),
+                                         block_offset, block_length);
             } else {
               const DWARFDataExtractor &debug_loc_data = DebugLocData();
               const dw_offset_t debug_loc_offset = form_value.Unsigned();
@@ -3206,8 +3210,8 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
               size_t loc_list_length = DWARFExpression::LocationListSize(
                   die.GetCU(), debug_loc_data, debug_loc_offset);
               if (loc_list_length > 0) {
-                location.CopyOpcodeData(module, debug_loc_data,
-                                        debug_loc_offset, loc_list_length);
+                location = DWARFExpression(module, debug_loc_data, die.GetCU(),
+                                           debug_loc_offset, loc_list_length);
                 assert(func_low_pc != LLDB_INVALID_ADDRESS);
                 location.SetLocationListSlide(
                     func_low_pc -
@@ -3444,10 +3448,9 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
             new SymbolFileType(*this, GetUID(DIERef(type_die_form))));
 
         if (const_value.Form() && type_sp && type_sp->GetType())
-          location.CopyOpcodeData(
-              const_value.Unsigned(),
-              type_sp->GetType()->GetByteSize().getValueOr(0),
-              die.GetCU()->GetAddressByteSize());
+          location.UpdateValue(const_value.Unsigned(),
+                               type_sp->GetType()->GetByteSize().getValueOr(0),
+                               die.GetCU()->GetAddressByteSize());
 
         var_sp = std::make_shared<Variable>(
             die.GetID(), name, mangled, type_sp, scope, symbol_context_scope,
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp
index 5db7de6f7c73a..3d8bfb0587217 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.cpp
@@ -111,13 +111,13 @@ static DWARFExpression MakeLocationExpressionInternal(lldb::ModuleSP module,
   uint32_t address_size = architecture.GetAddressByteSize();
   uint32_t byte_size = architecture.GetDataByteSize();
   if (byte_order == eByteOrderInvalid || address_size == 0)
-    return DWARFExpression(nullptr);
+    return DWARFExpression();
 
   RegisterKind register_kind = eRegisterKindDWARF;
   StreamBuffer<32> stream(Stream::eBinary, address_size, byte_order);
 
   if (!writer(stream, register_kind))
-    return DWARFExpression(nullptr);
+    return DWARFExpression();
 
   DataBufferSP buffer =
       std::make_shared<DataBufferHeap>(stream.GetData(), stream.GetSize());
diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp
index c7cf8b235bf26..1c17bf6563b36 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp
@@ -69,7 +69,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
   is_constant = true;
 
   if (!module)
-    return DWARFExpression(nullptr);
+    return DWARFExpression();
 
   const ArchSpec &architecture = module->GetArchitecture();
   llvm::Triple::ArchType arch_type = architecture.GetMachine();
@@ -77,7 +77,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
   uint32_t address_size = architecture.GetAddressByteSize();
   uint32_t byte_size = architecture.GetDataByteSize();
   if (byte_order == eByteOrderInvalid || address_size == 0)
-    return DWARFExpression(nullptr);
+    return DWARFExpression();
 
   RegisterKind register_kind = eRegisterKindDWARF;
   StreamBuffer<32> stream(Stream::eBinary, address_size, byte_order);
@@ -88,13 +88,13 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
 
     SectionList *section_list = module->GetSectionList();
     if (!section_list)
-      return DWARFExpression(nullptr);
+      return DWARFExpression();
 
     uint32_t section_id = symbol.getAddressSection();
 
     auto section = section_list->FindSectionByID(section_id);
     if (!section)
-      return DWARFExpression(nullptr);
+      return DWARFExpression();
 
     uint32_t offset = symbol.getAddressOffset();
     stream.PutMaxHex64(section->GetFileAddress() + offset, address_size,
@@ -129,7 +129,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
       register_kind = eRegisterKindLLDB;
       reg_num = GetLLDBRegisterNumber(arch_type, reg_id);
       if (reg_num == LLDB_INVALID_REGNUM)
-        return DWARFExpression(nullptr);
+        return DWARFExpression();
     }
 
     if (reg_num > 31) {
@@ -149,7 +149,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
     register_kind = eRegisterKindLLDB;
     uint32_t reg_num = GetLLDBRegisterNumber(arch_type, symbol.getRegisterId());
     if (reg_num == LLDB_INVALID_REGNUM)
-      return DWARFExpression(nullptr);
+      return DWARFExpression();
 
     if (reg_num > 31) {
       stream.PutHex8(DW_OP_regx);
@@ -168,7 +168,7 @@ DWARFExpression ConvertPDBLocationToDWARFExpression(
     break;
   }
   default:
-    return DWARFExpression(nullptr);
+    return DWARFExpression();
   }
 
   DataBufferSP buffer =
diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index 0538a9e351cb9..83350e7123fde 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -184,7 +184,7 @@ Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid,
                    const AddressRange &range)
     : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid),
       m_type(type), m_mangled(mangled), m_block(func_uid), m_range(range),
-      m_frame_base(nullptr), m_flags(), m_prologue_byte_size(0) {
+      m_frame_base(), m_flags(), m_prologue_byte_size(0) {
   m_block.SetParentScope(this);
   assert(comp_unit != nullptr);
 }

From f7980e727f2c08660b8bb23f80e131306faceca8 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 28 May 2019 17:37:58 +0000
Subject: [PATCH 0380/1176] Revert "[x86] split 256-bit store of concatenated
 vectors"

This reverts commit d5a8637072f4c556b88156bd2f6237a2ead47d31.

Most likely suspect for this bot failure:
http://lab.llvm.org:8011/builders/clang-cmake-x86_64-avx2-linux/builds/9684

llvm-svn: 361850
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  11 -
 llvm/test/CodeGen/X86/avg.ll                  | 402 +++++++++---------
 .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll |  24 +-
 llvm/test/CodeGen/X86/avx-intrinsics-x86.ll   |  12 +-
 llvm/test/CodeGen/X86/avx512-trunc-widen.ll   |  16 +-
 llvm/test/CodeGen/X86/avx512-trunc.ll         |  16 +-
 llvm/test/CodeGen/X86/nontemporal-2.ll        |  40 +-
 llvm/test/CodeGen/X86/oddsubvector.ll         |  15 +-
 llvm/test/CodeGen/X86/pmovsx-inreg.ll         |  72 ++--
 llvm/test/CodeGen/X86/shrink_vmul-widen.ll    | 124 +++---
 llvm/test/CodeGen/X86/shrink_vmul.ll          | 124 +++---
 .../CodeGen/X86/shuffle-vs-trunc-512-widen.ll |  18 +-
 llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll |  18 +-
 llvm/test/CodeGen/X86/subvector-broadcast.ll  |  68 +--
 llvm/test/CodeGen/X86/vec_fptrunc.ll          |  10 +-
 llvm/test/CodeGen/X86/vec_saddo.ll            |  68 +--
 llvm/test/CodeGen/X86/vec_smulo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_ssubo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_uaddo.ll            |  24 +-
 llvm/test/CodeGen/X86/vec_umulo.ll            |  26 +-
 llvm/test/CodeGen/X86/vec_usubo.ll            |  24 +-
 llvm/test/CodeGen/X86/vector-gep.ll           | 134 +++---
 llvm/test/CodeGen/X86/vector-trunc-widen.ll   |  72 ++--
 llvm/test/CodeGen/X86/vector-trunc.ll         |  72 ++--
 .../CodeGen/X86/x86-interleaved-access.ll     |  73 ++--
 25 files changed, 845 insertions(+), 786 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 73976f30374c2..7b4ce08b57860 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1287,7 +1287,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
-      setOperationAction(ISD::STORE,              VT, Custom);
     }
 
     if (HasInt256)
@@ -21081,17 +21080,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   if (St->isTruncatingStore())
     return SDValue();
 
-  // If this is a 256-bit store of concatenated ops, we are better off splitting
-  // that store into two 128-bit stores. This avoids spurious use of 256-bit ops
-  // and each half can execute independently. Some cores would split the op into
-  // halves anyway, so the concat (vinsertf128) is purely an extra op.
   MVT StoreVT = StoredVal.getSimpleValueType();
-  if (StoreVT.is256BitVector()) {
-    if (StoredVal.getOpcode() != ISD::CONCAT_VECTORS || !StoredVal.hasOneUse())
-      return SDValue();
-    return split256BitStore(St, DAG);
-  }
-
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
          "Unexpected VT");
   if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 22a6daa999d71..cfa9f11a9c73e 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -102,10 +102,11 @@ define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8:
@@ -266,8 +267,8 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-LABEL: avg_v48i8:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
-; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm4
-; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
+; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
+; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm4
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
@@ -278,10 +279,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm15 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[3,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm11 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm14 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm13 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm12 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -296,52 +297,52 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm4
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm3
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm12
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[3,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm13
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm10
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[1,1,2,3]
+; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm11
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm6, %xmm7, %xmm9
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm8
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm3, %xmm15, %xmm15
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm8
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm4, %xmm15, %xmm15
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm7, %xmm11, %xmm7
+; AVX1-NEXT:    vpaddd %xmm7, %xmm10, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm2, %xmm14, %xmm14
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm0, %xmm13, %xmm13
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm0, %xmm12, %xmm12
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[3,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm6 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm4[1,1,2,3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4, %xmm4 # 16-byte Folded Reload
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpsubd %xmm3, %xmm12, %xmm11
-; AVX1-NEXT:    vpsubd %xmm3, %xmm10, %xmm10
-; AVX1-NEXT:    vpsubd %xmm3, %xmm9, %xmm9
-; AVX1-NEXT:    vpsubd %xmm3, %xmm8, %xmm8
-; AVX1-NEXT:    vpsubd %xmm3, %xmm15, %xmm12
-; AVX1-NEXT:    vpsubd %xmm3, %xmm7, %xmm7
-; AVX1-NEXT:    vpsubd %xmm3, %xmm14, %xmm0
-; AVX1-NEXT:    vpsubd %xmm3, %xmm13, %xmm2
-; AVX1-NEXT:    vpsubd %xmm3, %xmm5, %xmm5
-; AVX1-NEXT:    vpsubd %xmm3, %xmm6, %xmm6
-; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpsubd %xmm4, %xmm13, %xmm10
+; AVX1-NEXT:    vpsubd %xmm4, %xmm11, %xmm11
+; AVX1-NEXT:    vpsubd %xmm4, %xmm9, %xmm9
+; AVX1-NEXT:    vpsubd %xmm4, %xmm8, %xmm8
+; AVX1-NEXT:    vpsubd %xmm4, %xmm15, %xmm13
+; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpsubd %xmm4, %xmm14, %xmm0
+; AVX1-NEXT:    vpsubd %xmm4, %xmm12, %xmm2
+; AVX1-NEXT:    vpsubd %xmm4, %xmm5, %xmm5
+; AVX1-NEXT:    vpsubd %xmm4, %xmm6, %xmm6
+; AVX1-NEXT:    vpsubd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
@@ -352,13 +353,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpsrld $1, %xmm7, %xmm2
-; AVX1-NEXT:    vpsrld $1, %xmm12, %xmm4
+; AVX1-NEXT:    vpsrld $1, %xmm13, %xmm4
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpsrld $1, %xmm8, %xmm4
 ; AVX1-NEXT:    vpsrld $1, %xmm9, %xmm5
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm5
-; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm6
+; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm5
+; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm6
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm6, %xmm5
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
@@ -367,12 +368,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm4, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v48i8:
@@ -447,12 +449,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX512F-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX512F-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX512F-NEXT:    vmovdqa 32(%rdi), %xmm2
-; AVX512F-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
 ; AVX512F-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
-; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm2
+; AVX512F-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
+; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm1
 ; AVX512F-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX512F-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX512F-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: avg_v48i8:
@@ -504,14 +507,15 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm1
 ; AVX1-NEXT:    vpavgb 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8:
@@ -624,10 +628,11 @@ define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16:
@@ -680,14 +685,15 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16:
@@ -828,10 +834,11 @@ define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_2:
@@ -886,12 +893,13 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
 ; AVX1-NEXT:    vpavgb %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm1
+; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_2:
@@ -1005,10 +1013,11 @@ define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_2:
@@ -1061,14 +1070,15 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
-; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rsi), %xmm2, %xmm2
-; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_2:
@@ -1196,10 +1206,11 @@ define void @avg_v32i8_const(<32 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_const:
@@ -1247,14 +1258,15 @@ define void @avg_v64i8_const(<64 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm3
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_const:
@@ -1353,10 +1365,11 @@ define void @avg_v16i16_const(<16 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v16i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_const:
@@ -1403,14 +1416,15 @@ define void @avg_v32i16_const(<32 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v32i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm3
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vmovups %ymm1, (%rax)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_const:
@@ -1651,96 +1665,100 @@ define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind {
 ; AVX1-NEXT:    pushq %rbp
 ; AVX1-NEXT:    movq %rsp, %rbp
 ; AVX1-NEXT:    andq $-32, %rsp
-; AVX1-NEXT:    subq $32, %rsp
+; AVX1-NEXT:    subq $96, %rsp
 ; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 768(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 496(%rdi)
-; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 752(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 480(%rdi)
-; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 736(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 464(%rdi)
-; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 720(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 448(%rdi)
-; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 704(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 432(%rdi)
-; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 688(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 416(%rdi)
-; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 672(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 400(%rdi)
-; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 656(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 384(%rdi)
-; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 640(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 368(%rdi)
-; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 624(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 352(%rdi)
-; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 608(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 336(%rdi)
-; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 592(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 320(%rdi)
-; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 576(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 304(%rdi)
-; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 560(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 288(%rdi)
-; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 544(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 272(%rdi)
-; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm8
-; AVX1-NEXT:    vpavgb 528(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 256(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
-; AVX1-NEXT:    vpavgb 512(%rbp), %xmm8, %xmm8
-; AVX1-NEXT:    vmovdqa %xmm8, 240(%rdi)
-; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm7
-; AVX1-NEXT:    vmovdqa %xmm7, 224(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
-; AVX1-NEXT:    vpavgb 480(%rbp), %xmm7, %xmm7
-; AVX1-NEXT:    vmovdqa %xmm7, 208(%rdi)
-; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm6
-; AVX1-NEXT:    vmovdqa %xmm6, 192(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
-; AVX1-NEXT:    vpavgb 448(%rbp), %xmm6, %xmm6
-; AVX1-NEXT:    vmovdqa %xmm6, 176(%rdi)
-; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm5
-; AVX1-NEXT:    vmovdqa %xmm5, 160(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm5
-; AVX1-NEXT:    vpavgb 416(%rbp), %xmm5, %xmm5
-; AVX1-NEXT:    vmovdqa %xmm5, 144(%rdi)
-; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vmovdqa %xmm4, 128(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT:    vpavgb 384(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vmovdqa %xmm4, 112(%rdi)
-; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa %xmm3, 96(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT:    vpavgb 352(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa %xmm3, 80(%rdi)
-; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vmovdqa %xmm2, 64(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpavgb 320(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vmovdqa %xmm2, 48(%rdi)
-; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqa %xmm1, 32(%rdi)
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpavgb 288(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
-; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpavgb 288(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm8, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpavgb 320(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm8, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, (%rsp) # 32-byte Spill
+; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vpavgb 352(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm8, %ymm13
+; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vpavgb 384(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm8, %ymm14
+; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT:    vpavgb 416(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm8, %ymm15
+; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT:    vpavgb 448(%rbp), %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm8, %ymm12
+; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT:    vpavgb 480(%rbp), %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm8, %ymm6
+; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT:    vpavgb 512(%rbp), %xmm7, %xmm7
+; AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm8, %ymm7
+; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm0
+; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm1
+; AVX1-NEXT:    vpavgb 528(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vpavgb 544(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm8
+; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm0
+; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm1
+; AVX1-NEXT:    vpavgb 560(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vpavgb 576(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm9
+; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm0
+; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm1
+; AVX1-NEXT:    vpavgb 592(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vpavgb 608(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm10
+; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm0
+; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm1
+; AVX1-NEXT:    vpavgb 624(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vpavgb 640(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm1
+; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm2
+; AVX1-NEXT:    vpavgb 656(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vpavgb 672(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm2
+; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm3
+; AVX1-NEXT:    vpavgb 688(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vpavgb 704(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm3
+; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm4
+; AVX1-NEXT:    vpavgb 720(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vpavgb 736(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm4
+; AVX1-NEXT:    vpavgb 752(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm11
+; AVX1-NEXT:    vpavgb 768(%rbp), %xmm11, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
+; AVX1-NEXT:    vmovaps %ymm4, 480(%rdi)
+; AVX1-NEXT:    vmovaps %ymm3, 448(%rdi)
+; AVX1-NEXT:    vmovaps %ymm2, 416(%rdi)
+; AVX1-NEXT:    vmovaps %ymm1, 384(%rdi)
+; AVX1-NEXT:    vmovaps %ymm0, 352(%rdi)
+; AVX1-NEXT:    vmovaps %ymm10, 320(%rdi)
+; AVX1-NEXT:    vmovaps %ymm9, 288(%rdi)
+; AVX1-NEXT:    vmovaps %ymm8, 256(%rdi)
+; AVX1-NEXT:    vmovaps %ymm7, 224(%rdi)
+; AVX1-NEXT:    vmovaps %ymm6, 192(%rdi)
+; AVX1-NEXT:    vmovaps %ymm12, 160(%rdi)
+; AVX1-NEXT:    vmovaps %ymm15, 128(%rdi)
+; AVX1-NEXT:    vmovaps %ymm14, 96(%rdi)
+; AVX1-NEXT:    vmovaps %ymm13, 64(%rdi)
+; AVX1-NEXT:    vmovaps (%rsp), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
 ; AVX1-NEXT:    movq %rbp, %rsp
 ; AVX1-NEXT:    popq %rbp
 ; AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index 9706bf3455fef..8f0ec5030eb03 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -725,12 +725,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
-; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
-; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
-; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
-; X86-AVX-NEXT:    vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10]
-; X86-AVX-NEXT:    vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10]
+; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
+; X86-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
+; X86-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
+; X86-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
+; X86-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
+; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
@@ -745,12 +745,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ;
 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
-; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
-; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
-; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
-; X64-AVX-NEXT:    vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10]
-; X64-AVX-NEXT:    vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17]
+; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
+; X64-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
+; X64-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
+; X64-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
+; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
+; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 8e48289c1042e..2fd2b863859c7 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -916,7 +916,8 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX-NEXT:    vmovntdq %xmm0, (%eax) # encoding: [0xc5,0xf9,0xe7,0x00]
+; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
+; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: movnt_dq:
@@ -924,21 +925,24 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX512VL-NEXT:    vmovntdq %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x00]
+; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
+; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: movnt_dq:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX-NEXT:    vmovntdq %xmm0, (%rdi) # encoding: [0xc5,0xf9,0xe7,0x07]
+; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
+; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: movnt_dq:
 ; X64-AVX512VL:       # %bb.0:
 ; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX512VL-NEXT:    vmovntdq %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x07]
+; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
+; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
index 1ce08c01773d1..ba451973faa04 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
@@ -462,10 +462,12 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -670,8 +672,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -950,8 +952,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index 263f7c90441d4..c15d33222ca0e 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -458,10 +458,12 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -665,8 +667,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -946,8 +948,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
index aa3e7cda18c0a..5b39cb16afec7 100644
--- a/llvm/test/CodeGen/X86/nontemporal-2.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -1061,12 +1061,12 @@ define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v8i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
-; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1126,12 +1126,12 @@ define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
-; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1164,12 +1164,12 @@ define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v16i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
-; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1202,12 +1202,12 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v32i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
-; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index 9bc6c0f380a07..69ea53e7e9c82 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -116,14 +116,13 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru
 ;
 ; AVX-LABEL: PR40815:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovaps (%rdi), %xmm0
-; AVX-NEXT:    vmovaps 16(%rdi), %xmm1
-; AVX-NEXT:    vmovaps 32(%rdi), %xmm2
-; AVX-NEXT:    vmovaps 48(%rdi), %xmm3
-; AVX-NEXT:    vmovaps %xmm2, 16(%rsi)
-; AVX-NEXT:    vmovaps %xmm3, (%rsi)
-; AVX-NEXT:    vmovaps %xmm0, 48(%rsi)
-; AVX-NEXT:    vmovaps %xmm1, 32(%rsi)
+; AVX-NEXT:    vmovaps 16(%rdi), %xmm0
+; AVX-NEXT:    vmovaps 48(%rdi), %xmm1
+; AVX-NEXT:    vinsertf128 $1, 32(%rdi), %ymm1, %ymm1
+; AVX-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    vmovups %ymm1, (%rsi)
+; AVX-NEXT:    vmovups %ymm0, 32(%rsi)
+; AVX-NEXT:    vzeroupper
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: PR40815:
diff --git a/llvm/test/CodeGen/X86/pmovsx-inreg.ll b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
index f89223fa45834..9ab6917966b38 100644
--- a/llvm/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
@@ -53,12 +53,12 @@ define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test2:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -134,12 +134,12 @@ define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test4:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -215,12 +215,12 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
 ;
 ; AVX1-LABEL: test6:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -296,12 +296,12 @@ define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -377,12 +377,12 @@ define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test10:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -458,12 +458,12 @@ define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test12:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm1
-; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vmovups %ymm2, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
+; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
+; AVX1-NEXT:    vmovups %ymm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index 7599858007407..0ed79ea4af70b 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -215,9 +215,10 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -260,8 +261,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -347,11 +349,12 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -414,10 +417,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -644,9 +648,10 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -688,8 +693,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -774,11 +780,12 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -840,10 +847,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1276,23 +1284,24 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1342,22 +1351,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2191,8 +2201,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2245,13 +2255,14 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
-; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2424,8 +2435,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2475,12 +2486,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
-; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
+; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 5e952472f7577..0c8949f246177 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -209,9 +209,10 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -254,8 +255,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -341,11 +343,12 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -408,10 +411,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -636,9 +640,10 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -680,8 +685,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -766,11 +772,12 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -832,10 +839,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1250,23 +1258,24 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1316,22 +1325,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2147,8 +2157,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2201,13 +2211,14 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
-; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
+; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2380,8 +2391,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2431,12 +2442,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
-; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
+; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
+; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
index 737925eca0440..1a6bdd3aaa407 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
@@ -88,21 +88,23 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
index 6f94e0c608683..19031bbb2c0f8 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
@@ -88,21 +88,23 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 7ecfac5151f2e..3ce584eff2a9e 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -835,24 +835,24 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>*
 define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ; X32-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X32-AVX1:       # %bb.0: # %entry
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,0,2,0]
-; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,0,4,0]
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,0,4,0]
+; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,0,2,0]
 ; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,0,2,0,3,0,4,0]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
-; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
-; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vmovdqu %xmm0, ga4+16
-; X32-AVX1-NEXT:    vmovdqu %xmm4, ga4
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,0,2,0,3,0,4,0]
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vmovups %ymm0, ga4
 ; X32-AVX1-NEXT:    vmovups %ymm2, gb4+32
 ; X32-AVX1-NEXT:    vmovups %ymm1, gb4
 ; X32-AVX1-NEXT:    vzeroupper
@@ -886,24 +886,24 @@ define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ;
 ; X64-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X64-AVX1:       # %bb.0: # %entry
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2]
-; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,4]
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,4]
+; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2]
 ; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,2,3,4]
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
-; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
-; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vmovdqu %xmm0, ga4+{{.*}}(%rip)
-; X64-AVX1-NEXT:    vmovdqu %xmm4, {{.*}}(%rip)
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,2,3,4]
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vmovups %ymm0, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm2, gb4+{{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm1, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec_fptrunc.ll b/llvm/test/CodeGen/X86/vec_fptrunc.ll
index e7318d9d69723..bb6be6cd9e84b 100644
--- a/llvm/test/CodeGen/X86/vec_fptrunc.ll
+++ b/llvm/test/CodeGen/X86/vec_fptrunc.ll
@@ -99,8 +99,9 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
 ; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
-; X32-AVX-NEXT:    vmovupd %xmm1, 16(%eax)
-; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
+; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
+; X32-AVX-NEXT:    vzeroupper
 ; X32-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: fptrunc_frommem8:
@@ -119,8 +120,9 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
 ; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
-; X64-AVX-NEXT:    vmovupd %xmm1, 16(%rsi)
-; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
+; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
 entry:
   %0 = load <8 x double>, <8 x double>* %in
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index d37795b55cdc2..aeb1951fbef87 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -693,8 +693,8 @@ define <8 x i32> @saddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v8i32:
@@ -824,48 +824,48 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
 ; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm10
-; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm1
-; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm8, %xmm1, %xmm8
+; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpandn %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm7
-; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm1
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm1, %xmm7
+; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm4
+; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm7, %xmm3
-; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm0
-; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm7, %xmm0
-; AVX1-NEXT:    vpandn %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm7, %xmm4
+; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm2
+; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
+; AVX1-NEXT:    vpandn %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm9, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm10, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index 3f53f9f2250cc..ab97c51df410b 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -973,8 +973,8 @@ define <8 x i32> @smulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm5, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v8i32:
@@ -1266,59 +1266,59 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpmuldq %xmm4, %xmm6, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
-; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm8
-; AVX1-NEXT:    vpsrad $31, %xmm8, %xmm6
+; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
 ; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm7, %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm7, %xmm5, %xmm5
 ; AVX1-NEXT:    vpmuldq %xmm3, %xmm1, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3],xmm7[4,5],xmm4[6,7]
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
-; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm9
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm9
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpmuldq %xmm4, %xmm7, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpmuldq %xmm5, %xmm7, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3],xmm6[4,5],xmm1[6,7]
-; AVX1-NEXT:    vpmulld %xmm4, %xmm7, %xmm4
-; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7]
+; AVX1-NEXT:    vpmulld %xmm5, %xmm7, %xmm5
+; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
 ; AVX1-NEXT:    vpmuldq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpmuldq %xmm2, %xmm0, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7]
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
-; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm9, %xmm0, %xmm1
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpxor %xmm8, %xmm2, %xmm2
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm9, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm4
+; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm8, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 3dc73e3b4ba1e..15c0531d67a75 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -714,8 +714,8 @@ define <8 x i32> @ssubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vmovdqa %xmm6, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v8i32:
@@ -850,52 +850,52 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm4
 ; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
-; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm9, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpandn %xmm1, %xmm7, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm8
+; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm3
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpandn %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm4
-; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT:    vpandn %xmm1, %xmm6, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm4
-; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm6
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm3
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm6
+; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpandn %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm0
-; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
-; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpandn %xmm0, %xmm4, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm2
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
+; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpandn %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm10, %ymm1, %ymm4
+; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm10, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm7, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 4e9cd2efb74b2..41a0e258e3d12 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -501,8 +501,8 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v8i32:
@@ -633,19 +633,19 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 0bcaacc21dfe4..0c95b73853e96 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -843,10 +843,10 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm5, %ymm2
+; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm1
-; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1111,23 +1111,23 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm9, %xmm5, %xmm5
 ; AVX1-NEXT:    vpackssdw %xmm13, %xmm5, %xmm5
 ; AVX1-NEXT:    vpacksswb %xmm11, %xmm5, %xmm5
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpmulld %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
-; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm6
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm2
+; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm0
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[1,1,2,3]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm6, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm5[3,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index c5a7b19cf14dd..b662ac45caf60 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -525,8 +525,8 @@ define <8 x i32> @usubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v8i32:
@@ -671,19 +671,19 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
-; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vector-gep.ll b/llvm/test/CodeGen/X86/vector-gep.ll
index 693380a48ee22..8f62fe5382564 100644
--- a/llvm/test/CodeGen/X86/vector-gep.ll
+++ b/llvm/test/CodeGen/X86/vector-gep.ll
@@ -122,88 +122,74 @@ define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind {
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-32, %esp
-; CHECK-NEXT:    subl $160, %esp
-; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm3
-; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm5
-; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
-; CHECK-NEXT:    vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    subl $96, %esp
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm4
+; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm3
+; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa %xmm0, (%esp) # 16-byte Spill
-; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm4
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; CHECK-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm4
 ; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm5, %xmm4
-; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm1
+; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
 ; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm5, %xmm1
-; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm5, %xmm6
-; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm2
+; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
+; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
+; CHECK-NEXT:    vmovaps %ymm0, (%esp) # 32-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
 ; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vpaddd %xmm2, %xmm5, %xmm2
+; CHECK-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
+; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm4
+; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm5
+; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
+; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
+; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
+; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
+; CHECK-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
+; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm5
+; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm6
+; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
+; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
+; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
+; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
+; CHECK-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm6
+; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm7
+; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
+; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
+; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
+; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
+; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm6, %ymm6
 ; CHECK-NEXT:    vmovdqa 152(%ebp), %xmm7
 ; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm5, %xmm7
-; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
+; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm0
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
-; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm0, %ymm0
+; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm7
+; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
+; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
+; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm1
+; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
+; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm1, %ymm1
 ; CHECK-NEXT:    movl 8(%ebp), %eax
-; CHECK-NEXT:    vmovdqa %xmm3, 240(%eax)
-; CHECK-NEXT:    vmovdqa %xmm0, 224(%eax)
-; CHECK-NEXT:    vmovdqa %xmm7, 208(%eax)
-; CHECK-NEXT:    vmovdqa %xmm2, 192(%eax)
-; CHECK-NEXT:    vmovdqa %xmm6, 176(%eax)
-; CHECK-NEXT:    vmovdqa %xmm1, 160(%eax)
-; CHECK-NEXT:    vmovdqa %xmm4, 144(%eax)
-; CHECK-NEXT:    vmovaps (%esp), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 128(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 112(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 96(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 80(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 64(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 48(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 32(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, 16(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT:    vmovaps %xmm0, (%eax)
+; CHECK-NEXT:    vmovaps %ymm1, 224(%eax)
+; CHECK-NEXT:    vmovaps %ymm0, 192(%eax)
+; CHECK-NEXT:    vmovaps %ymm6, 160(%eax)
+; CHECK-NEXT:    vmovaps %ymm5, 128(%eax)
+; CHECK-NEXT:    vmovaps %ymm4, 96(%eax)
+; CHECK-NEXT:    vmovaps %ymm2, 64(%eax)
+; CHECK-NEXT:    vmovaps (%esp), %ymm0 # 32-byte Reload
+; CHECK-NEXT:    vmovaps %ymm0, 32(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT:    vmovaps %ymm0, (%eax)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index 327cc7917e912..c6b36e4aae783 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -668,14 +668,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -686,8 +686,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -717,16 +717,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -792,16 +792,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1291,14 +1291,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1317,19 +1317,23 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 0027fbe2657de..56e86a6bc95fd 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -678,14 +678,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -696,8 +696,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -727,16 +727,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -802,16 +802,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1301,14 +1301,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1327,19 +1327,23 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index 8cd01b631d601..bff39467c1eb2 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -341,10 +341,11 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX1-NEXT:    vmovdqa %xmm0, 48(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm4, 32(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
-; AVX1-NEXT:    vmovdqa %xmm3, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
+; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride4:
@@ -357,10 +358,11 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX2-NEXT:    vmovdqa %xmm0, 48(%rdi)
-; AVX2-NEXT:    vmovdqa %xmm4, 32(%rdi)
-; AVX2-NEXT:    vmovdqa %xmm1, 16(%rdi)
-; AVX2-NEXT:    vmovdqa %xmm3, (%rdi)
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm4, %ymm0
+; AVX2-NEXT:    vmovdqa %ymm0, 32(%rdi)
+; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
+; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride4:
@@ -886,20 +888,37 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) {
 }
 
 define void @interleaved_store_vf8_i8_stride4(<8 x i8> %x1, <8 x i8> %x2, <8 x i8> %x3, <8 x i8> %x4, <32 x i8>* %p) {
-; AVX-LABEL: interleaved_store_vf8_i8_stride4:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
-; AVX-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
-; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
-; AVX-NEXT:    vmovdqa %xmm2, (%rdi)
-; AVX-NEXT:    retq
+; AVX1-LABEL: interleaved_store_vf8_i8_stride4:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
+; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2OR512-LABEL: interleaved_store_vf8_i8_stride4:
+; AVX2OR512:       # %bb.0:
+; AVX2OR512-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
+; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
+; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
+; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
+; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; AVX2OR512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2OR512-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2OR512-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
+; AVX2OR512-NEXT:    vmovdqa %ymm0, (%rdi)
+; AVX2OR512-NEXT:    vzeroupper
+; AVX2OR512-NEXT:    retq
 %v1 = shufflevector <8 x i8> %x1, <8 x i8> %x2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %v2 = shufflevector <8 x i8> %x3, <8 x i8> %x4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %interleaved.vec = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0,i32 8,i32 16,i32 24,i32 1,i32 9,i32 17,i32 25,i32 2,i32 10,i32 18,i32 26,i32 3,i32 11,i32 19,i32 27,i32 4,i32 12,i32 20,i32 28,i32 5,i32 13,i32 21,i32 29,i32 6,i32 14,i32 22,i32 30,i32 7,i32 15,i32 23,i32 31>
@@ -1077,9 +1096,10 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vmovdqu %xmm0, 16(%rdi)
-; AVX1-NEXT:    vmovdqu %xmm1, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    vmovdqu %xmm2, 32(%rdi)
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride3:
@@ -1096,9 +1116,10 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vmovdqu %xmm0, 16(%rdi)
-; AVX2-NEXT:    vmovdqu %xmm1, (%rdi)
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vmovdqu %xmm2, 32(%rdi)
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride3:

From f612b18720aa6da2843c562a266d0850039418dd Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 28 May 2019 17:38:04 +0000
Subject: [PATCH 0381/1176] [COFF] Add ImportChunkThunk, simplify, deduplicate

Removes the isHotPatchable faux-virtual and virtual methods.  Follow-up to
D62362.

Reviewers: aganea

Differential Revision: https://reviews.llvm.org/D62422

llvm-svn: 361851
---
 lld/COFF/Chunks.cpp |  2 +-
 lld/COFF/Chunks.h   | 78 ++++++++++++++++++---------------------------
 2 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 901333e041706..ebdbc40ed6da2 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -649,7 +649,7 @@ void StringChunk::writeTo(uint8_t *Buf) const {
   Buf[Str.size()] = '\0';
 }
 
-ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) {
+ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImportThunkChunk(S) {
   // Intel Optimization Manual says that all branch targets
   // should be 16-byte aligned. MSVC linker does this too.
   setAlignment(16);
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index cfa71e3ff1c60..0df168be77127 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -54,7 +54,7 @@ enum : unsigned { Log2MaxSectionAlignment = 13 };
 // doesn't even have actual data (if common or bss).
 class Chunk {
 public:
-  enum Kind : uint8_t { SectionKind, OtherKind };
+  enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
   Kind kind() const { return ChunkKind; }
 
   // Returns the size of this chunk (even if this is a common or BSS.)
@@ -167,19 +167,14 @@ class NonSectionChunk : public Chunk {
   // Collect all locations that contain absolute addresses for base relocations.
   virtual void getBaserels(std::vector<Baserel> *Res) {}
 
-  // Return true if this file has the hotpatch flag set to true in the
-  // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
-  // synthesized by the linker.
-  virtual bool isHotPatchable() const { return false; }
-
   // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
   // bytes, so this is used only for logging or debugging.
   virtual StringRef getDebugName() const { return ""; }
 
-  static bool classof(const Chunk *C) { return C->kind() == OtherKind; }
+  static bool classof(const Chunk *C) { return C->kind() != SectionKind; }
 
 protected:
-  NonSectionChunk() : Chunk(OtherKind) {}
+  NonSectionChunk(Kind K = OtherKind) : Chunk(K) {}
 };
 
 // A chunk corresponding a section of an input file.
@@ -250,8 +245,6 @@ class SectionChunk final : public Chunk {
     return getSectionName().startswith(".debug_") || getSectionName() == ".eh_frame";
   }
 
-  bool isHotPatchable() const { return File->HotPatchable; }
-
   // Allow iteration over the bodies of this chunk's relocated symbols.
   llvm::iterator_range<symbol_iterator> symbols() const {
     return llvm::make_range(symbol_iterator(File, RelocsData),
@@ -379,13 +372,6 @@ inline void Chunk::writeTo(uint8_t *Buf) const {
     static_cast<const NonSectionChunk *>(this)->writeTo(Buf);
 }
 
-inline bool Chunk::isHotPatchable() const {
-  if (isa<SectionChunk>(this))
-    return static_cast<const SectionChunk *>(this)->isHotPatchable();
-  else
-    return static_cast<const NonSectionChunk *>(this)->isHotPatchable();
-}
-
 inline StringRef Chunk::getSectionName() const {
   if (isa<SectionChunk>(this))
     return static_cast<const SectionChunk *>(this)->getSectionName();
@@ -478,57 +464,44 @@ static const uint8_t ImportThunkARM64[] = {
 // Windows-specific.
 // A chunk for DLL import jump table entry. In a final output, its
 // contents will be a JMP instruction to some __imp_ symbol.
-class ImportThunkChunkX64 : public NonSectionChunk {
+class ImportThunkChunk : public NonSectionChunk {
+public:
+  ImportThunkChunk(Defined *S)
+      : NonSectionChunk(ImportThunkKind), ImpSymbol(S) {}
+  static bool classof(const Chunk *C) { return C->kind() == ImportThunkKind; }
+
+protected:
+  Defined *ImpSymbol;
+};
+
+class ImportThunkChunkX64 : public ImportThunkChunk {
 public:
   explicit ImportThunkChunkX64(Defined *S);
   size_t getSize() const override { return sizeof(ImportThunkX86); }
   void writeTo(uint8_t *Buf) const override;
-
-  bool isHotPatchable() const override { return true; }
-
-private:
-  Defined *ImpSymbol;
 };
 
-class ImportThunkChunkX86 : public NonSectionChunk {
+class ImportThunkChunkX86 : public ImportThunkChunk {
 public:
-  explicit ImportThunkChunkX86(Defined *S) : ImpSymbol(S) {
-  }
+  explicit ImportThunkChunkX86(Defined *S) : ImportThunkChunk(S) {}
   size_t getSize() const override { return sizeof(ImportThunkX86); }
   void getBaserels(std::vector<Baserel> *Res) override;
   void writeTo(uint8_t *Buf) const override;
-
-  bool isHotPatchable() const override { return true; }
-
-private:
-  Defined *ImpSymbol;
 };
 
-class ImportThunkChunkARM : public NonSectionChunk {
+class ImportThunkChunkARM : public ImportThunkChunk {
 public:
-  explicit ImportThunkChunkARM(Defined *S) : ImpSymbol(S) {
-  }
+  explicit ImportThunkChunkARM(Defined *S) : ImportThunkChunk(S) {}
   size_t getSize() const override { return sizeof(ImportThunkARM); }
   void getBaserels(std::vector<Baserel> *Res) override;
   void writeTo(uint8_t *Buf) const override;
-
-  bool isHotPatchable() const override { return true; }
-
-private:
-  Defined *ImpSymbol;
 };
 
-class ImportThunkChunkARM64 : public NonSectionChunk {
+class ImportThunkChunkARM64 : public ImportThunkChunk {
 public:
-  explicit ImportThunkChunkARM64(Defined *S) : ImpSymbol(S) {
-  }
+  explicit ImportThunkChunkARM64(Defined *S) : ImportThunkChunk(S) {}
   size_t getSize() const override { return sizeof(ImportThunkARM64); }
   void writeTo(uint8_t *Buf) const override;
-
-  bool isHotPatchable() const override { return true; }
-
-private:
-  Defined *ImpSymbol;
 };
 
 class RangeExtensionThunkARM : public NonSectionChunk {
@@ -684,6 +657,17 @@ class AbsolutePointerChunk : public NonSectionChunk {
   uint64_t Value;
 };
 
+// Return true if this file has the hotpatch flag set to true in the S_COMPILE3
+// record in codeview debug info. Also returns true for some thunks synthesized
+// by the linker.
+inline bool Chunk::isHotPatchable() const {
+  if (auto *SC = dyn_cast<SectionChunk>(this))
+    return SC->File->HotPatchable;
+  else if (isa<ImportThunkChunk>(this))
+    return true;
+  return false;
+}
+
 void applyMOV32T(uint8_t *Off, uint32_t V);
 void applyBranch24T(uint8_t *Off, int32_t V);
 

From 19f51ec04ac1edd0b278e23b8b5cb3ff49c2dd08 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 17:53:43 +0000
Subject: [PATCH 0382/1176] [DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y  ->
  (x - y) + C  fold

Summary:
The main motivation is shown by all these `neg` instructions that are now created.
In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test.

AArch64 test changes all look good (`neg` created), or neutral.

X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created).

I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill
is now hoisted into preheader (which should still be good?),
2 4-byte reloads become 1 8-byte reload, and are elsewhere,
but i'm not sure how that affects that loop.

I'm unable to interpret AMDGPU change, looks neutral-ish?

This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].

https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later)

Reviewers: craig.topper, RKSimon, spatel, arsenm

Reviewed By: RKSimon

Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62223

llvm-svn: 361852
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 ++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  6 +-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 16 ++--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 21 ++---
 llvm/test/CodeGen/X86/combine-add.ll          |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 ++++++++++---------
 llvm/test/CodeGen/X86/shift-amount-mod.ll     |  9 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 20 ++---
 llvm/test/CodeGen/X86/zext-sext.ll            | 21 ++---
 9 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d53ee3134d550..06c2daa90bf10 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2923,6 +2923,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index 6daef644761b5..d349eb09f7353 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -486,8 +486,7 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
+; CHECK-NEXT:    neg w8, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -500,8 +499,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
+; CHECK-NEXT:    neg x8, x1
 ; CHECK-NEXT:    sub x8, x8, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index c571dac94b81e..8886954623f7c 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 20c84c5b63277..71c8f6926c1f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -15,10 +15,11 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_add_i32 s2, s2, -1
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VARIANT0-NEXT:    s_waitcnt expcnt(0)
+; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -36,12 +37,12 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_add_i32 s2, s2, -1
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
+; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -59,8 +60,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT2-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -82,8 +83,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT3-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 6f5f1370e6b4e..1d20fcf33d742 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c9a577dbaa92b..fd3d83ed2cbec 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %r14
-; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    movq %rdi, %rbp
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,10 +78,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT:    movl $1, %r14d
+; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -91,48 +92,47 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r12d, %r12d
-; CHECK-NEXT:    testb %r12b, %r12b
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    testb %r14b, %r14b
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r12), %eax
+; CHECK-NEXT:    leal 1(%r14), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %ecx
+; CHECK-NEXT:    movl $-1, %r13d
 ; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    movl $1, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    js LBB0_55
 ; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    movq %rax, %r13
+; CHECK-NEXT:    movq %rax, %r12
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
@@ -157,16 +157,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_34
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r13), %rax
+; CHECK-NEXT:    leaq 1(%r12), %rax
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_29
 ; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r13
+; CHECK-NEXT:    incq %r12
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
+; CHECK-NEXT:    leal -324(%r13), %eax
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -176,11 +175,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $11, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $24, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -195,8 +194,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r13)
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movb $0, (%r12)
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
@@ -208,22 +207,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %ecx ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    movl $2, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %ecx
+; CHECK-NEXT:    movl $20, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r12), %eax
+; CHECK-NEXT:    leal -268(%r14), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
@@ -233,12 +232,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r14d
-; CHECK-NEXT:    testl %r14d, %r14d
-; CHECK-NEXT:    movl %ecx, %r12d
+; CHECK-NEXT:    decl %r15d
+; CHECK-NEXT:    testl %r15d, %r15d
+; CHECK-NEXT:    movl %r13d, %r14d
 ; CHECK-NEXT:    jg LBB0_13
 ; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -255,27 +254,28 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %ecx
-; CHECK-NEXT:    cmpl $16, %ecx
+; CHECK-NEXT:    incl %r13d
+; CHECK-NEXT:    cmpl $16, %r13d
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    btl %r13d, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    addq $8189, %r15 ## imm = 0x1FFD
-; CHECK-NEXT:    subq %rbx, %r15
-; CHECK-NEXT:    addq _syHistory@{{.*}}(%rip), %r15
+; CHECK-NEXT:    subq %rbp, %rbx
+; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
+; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    decq %r15
+; CHECK-NEXT:    decq %rax
 ; CHECK-NEXT:    jmp LBB0_49
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
@@ -302,7 +302,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq %r15, %rbx
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    movq %rbx, %rbp
 ; CHECK-NEXT:    jmp LBB0_48
 ; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 6c268d8a27f42..e8af5f66d36c9 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
-; X64-NEXT:    subl %esi, %ecx
+; X64-NEXT:    negl %ecx
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
@@ -1139,9 +1139,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 2ffbfcb56b2f7..37a3dcbd0e4a3 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    subl %edx, %esi
 ; X64-NEXT:    leal 32(%rsi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    psubd %xmm2, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    psubd %xmm2, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 7034378a880b5..84096e3b6805d 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -15,30 +15,27 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 ; CHECK-NEXT:    subq %rax, %rsi
 ; CHECK-NEXT:    movq (%rdx), %rax
 ; CHECK-NEXT:    movswl 8(%rdi), %edx
-; CHECK-NEXT:    movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
 ; CHECK-NEXT:    movswl (%rax,%rsi,2), %eax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    imull %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    addl $2138875574, %eax # imm = 0x7F7CA6B6
 ; CHECK-NEXT:    cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT:    movslq %eax, %r8
+; CHECK-NEXT:    movslq %eax, %rdi
 ; CHECK-NEXT:    setl %dl
 ; CHECK-NEXT:    cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT:    movq %r8, %r9
+; CHECK-NEXT:    movq %rdi, %r8
 ; CHECK-NEXT:    leal -1(%rdx,%rdx), %edx
 ; CHECK-NEXT:    cmovlel %edx, %esi
-; CHECK-NEXT:    subq %rax, %r9
-; CHECK-NEXT:    addq %r8, %rdi
+; CHECK-NEXT:    subq %rax, %r8
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %esi
-; CHECK-NEXT:    cmovneq %rax, %r9
-; CHECK-NEXT:    testl %r8d, %r8d
-; CHECK-NEXT:    cmovnsq %rax, %r9
-; CHECK-NEXT:    movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
-; CHECK-NEXT:    subq %r9, %rdi
-; CHECK-NEXT:    addq (%rcx), %rdi
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    cmovneq %rax, %r8
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovnsq %rax, %r8
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    subq %r8, %rdi
+; CHECK-NEXT:    leaq -2138875574(%rax,%rdi), %rax
 ; CHECK-NEXT:    movq %rax, (%rcx)
 ; CHECK-NEXT:    retq
 entry:

From 1499f65ac1702d0858c1f7a5406929040a2e6e1e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 17:53:54 +0000
Subject: [PATCH 0383/1176] [DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x
 + C)  ->  (y - x) - C  fold

Summary:
Direct sibling of D62223 patch.
While i don't have a direct motivational pattern for this,
it would seem to make sense to handle both patterns (or none),
for symmetry?

The aarch64 changes look neutral;
sparc and systemz look like improvement (one less instruction each);
x86 changes - 32bit case improves, 64bit case shows that LEA no longer
gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea`

https://rise4fun.com/Alive/ffh

Reviewers: RKSimon, craig.topper, spatel, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62252

llvm-svn: 361853
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  4 +--
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 12 ++++----
 .../CodeGen/SPARC/2013-05-17-CallFrame.ll     |  5 ++--
 llvm/test/CodeGen/SystemZ/alloca-03.ll        | 11 ++++----
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 28 +++++++++----------
 6 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 06c2daa90bf10..51f9c34e7ee82 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2929,6 +2929,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
   }
+  // y - (x + C)  ->  (y - x) - C
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index d349eb09f7353..c91700436bb96 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -513,7 +513,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    add w8, w2, w1
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -525,7 +525,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    add x8, x2, x1
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 8886954623f7c..167ca6a10ec13 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w2, w8
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w8, w2, w8
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
diff --git a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 1a97e4e317e57..274e99b114c32 100644
--- a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -15,10 +15,9 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
 ; V8-NEXT:    .cfi_register 15, 31
 ; V8-NEXT:    add %i0, 7, %i0
 ; V8-NEXT:    and %i0, -8, %i0
-; V8-NEXT:    add %i0, 8, %i0
 ; V8-NEXT:    sub %sp, %i0, %i0
-; V8-NEXT:    add %i0, 96, %o0
-; V8-NEXT:    mov %i0, %sp
+; V8-NEXT:    add %i0, -8, %sp
+; V8-NEXT:    add %i0, 88, %o0
 ; V8-NEXT:    add %sp, -16, %sp
 ; V8-NEXT:    st %o0, [%sp+104]
 ; V8-NEXT:    st %o0, [%sp+100]
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index 343071211b751..cac569ff41fa3 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -75,13 +75,12 @@ define void @f3(i64 %len) {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    sllg %r2, %r2, 3
-; CHECK-NEXT:    la %r0, 120(%r2)
+; CHECK-NEXT:    sllg %r0, %r2, 3
 ; CHECK-NEXT:    sgr %r1, %r0
-; CHECK-NEXT:    la %r2, 280(%r1)
-; CHECK-NEXT:    nill %r2, 65408
-; CHECK-NEXT:    lgr %r15, %r1
-; CHECK-NEXT:    mvghi 0(%r2), 10
+; CHECK-NEXT:    lay %r15, -120(%r1)
+; CHECK-NEXT:    la %r1, 160(%r1)
+; CHECK-NEXT:    nill %r1, 65408
+; CHECK-NEXT:    mvghi 0(%r1), 10
 ; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
 ; CHECK-NEXT:    br %r14
   %x = alloca i64, i64 %len, align 128
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 37a3dcbd0e4a3..59a42ad9e3926 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl $32, %ecx
-; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    addl %ecx, %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    addl $32, %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal -32(%rdx,%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    psubd %xmm0, %xmm2
-; X32-NEXT:    movdqa %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd %xmm2, %xmm1
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd %xmm2, %xmm1
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS

From 6a24c9b9abd0f0f46161a93aa908ce4b32190db8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 17:54:04 +0000
Subject: [PATCH 0384/1176] [DAGCombiner][X86][AArch64] (x - C) + y  ->  (x +
 y) - C  fold

Summary:
Only vector tests are being affected here,
since subtraction by scalar constant is rewritten
as addition by negated constant.

No surprising test changes.

https://rise4fun.com/Alive/pbT

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62257

llvm-svn: 361854
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  8 ++++++++
 .../CodeGen/AArch64/sink-addsub-of-const.ll   |  6 +++---
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 12 +++++------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 20 +++++++++----------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 16 +++++++--------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 12 +++++------
 6 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 51f9c34e7ee82..df842c27e27af 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2454,6 +2454,14 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
+  // Hoist one-use subtraction by constant:  (x - C) + y  ->  (x + y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
   // rather than 'add 0/-1' (the zext should get folded).
   // add (sext i1 Y), X --> sub X, (zext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 167ca6a10ec13..7c9ae43db134d 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI23_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 59a42ad9e3926..3bca6d1d199e3 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 2c41ee31a101d..58c972164d8f1 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    psubb %xmm2, %xmm0
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm0, %xmm1
+; SSE2-NEXT:    psubb %xmm2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
-; SSE41-NEXT:    psubb %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm0, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vzeroupper
 ; AVX2NOBW-NEXT:    retq
 ;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
   %res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm3, %xmm2
-; SSE2-NEXT:    psubb %xmm3, %xmm2
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm2, %xmm1
+; SSE2-NEXT:    psubb %xmm3, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psllw $3, %xmm2
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm3, %xmm2
-; SSE41-NEXT:    psubb %xmm3, %xmm2
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm2, %xmm1
+; SSE41-NEXT:    psubb %xmm3, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm2
 ; SSE41-NEXT:    psllw $3, %xmm2
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index d612d73448754..eda349005cda7 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm2
 ; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
@@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_32i8:
@@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512BW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %res
@@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm4
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
@@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm3
 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
@@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
@@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512BW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 86c706c03a70c..961bec56e5d3c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
@@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
@@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm4
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX512F-NEXT:    vpand %ymm8, %ymm4, %ymm4
@@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm3
 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
 ; AVX512F-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
@@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpsubb %zmm3, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm3, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsllw $3, %zmm1, %zmm2
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1

From 8c9b3e4e4a6b4c22293e13e81bee497e182ddd87 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 17:54:13 +0000
Subject: [PATCH 0385/1176] [DAGCombine][X86][AArch64][AMDGPU] (x - y) + -1  ->
  add (xor y, -1), x  fold

Summary:
This prevents regressions in next patch,
and somewhat recovers from the regression to AMDGPU test in D62223.

It is indeed not great that we leave vector decrement,
don't transform it into vector add all-ones..

https://rise4fun.com/Alive/ZRl

Reviewers: RKSimon, craig.topper, spatel, arsenm

Reviewed By: RKSimon, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62263

llvm-svn: 361855
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++
 llvm/test/CodeGen/AArch64/xor.ll              | 18 +++---
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 19 +++---
 llvm/test/CodeGen/X86/xor.ll                  | 62 ++++++++++---------
 4 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df842c27e27af..43899ecb8e60a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2303,6 +2303,13 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     }
   }
 
+  // (x - y) + -1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isAllOnesOrAllOnesSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
     return Combined;
 
@@ -2931,6 +2938,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // (x - y) - 1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
       isConstantOrConstantVector(N0.getOperand(1))) {
diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll
index 1dca55a971308..ca6c0dfabba48 100644
--- a/llvm/test/CodeGen/AArch64/xor.ll
+++ b/llvm/test/CodeGen/AArch64/xor.ll
@@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
 define i32 @add_of_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -40,9 +40,8 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -52,9 +51,8 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 71c8f6926c1f9..2dd7e20c00ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -9,17 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT0-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT0-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT0-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT0-NEXT:    s_mov_b32 s6, 0
 ; VARIANT0-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_waitcnt expcnt(0)
-; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -31,18 +30,18 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1:       ; %bb.0: ; %entry
 ; VARIANT1-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT1-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT1-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT1-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT1-NEXT:    s_mov_b32 s6, 0
 ; VARIANT1-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
-; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -60,8 +59,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT2-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -83,8 +81,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT3-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 5ef5999be95f4..654382f7b73e9 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -532,22 +532,24 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -558,22 +560,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not_decrement:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -583,24 +587,23 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -610,24 +613,23 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not_decrement:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>

From 76696654322bd502cfd4592e48fc6a1782de5694 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 17:54:21 +0000
Subject: [PATCH 0386/1176] [DAGCombine] (x - C) - y  ->  (x - y) - C  fold

Summary:
Again only vectors affected. Frustrating. Let me take a look into that..

https://rise4fun.com/Alive/AAq

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, JDevlieghere, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62294

llvm-svn: 361856
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp     | 7 +++++++
 llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll | 2 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll     | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 43899ecb8e60a..efac27a1b69be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2957,6 +2957,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
   }
+  // (x - C) - y  ->  (x - y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 7c9ae43db134d..0e1a426c77f29 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 3bca6d1d199e3..4544707d07a2f 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>

From 2fb0a820df9c9884b9f42efdd0919b309e2b1204 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 May 2019 18:08:06 +0000
Subject: [PATCH 0387/1176] [IR] Add SaturatingInst and BinaryOpIntrinsic
 classes

Based on the suggestion in D62447, this adds a SaturatingInst class
that represents the saturating add/sub family of intrinsics. It
exposes the same interface as WithOverflowInst, for this reason I
have also added a common base class BinaryOpIntrinsic that holds the
actual implementation code and will be useful in some places handling
both overflowing and saturating math.

Differential Revision: https://reviews.llvm.org/D62466

llvm-svn: 361857
---
 llvm/include/llvm/IR/IntrinsicInst.h | 49 ++++++++++++++++++++++++++--
 llvm/lib/IR/IntrinsicInst.cpp        | 12 +++++--
 2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index da9823b88c584..9b816b0a224d1 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -267,8 +267,9 @@ namespace llvm {
     }
   };
 
-  /// This class represents a op.with.overflow intrinsic.
-  class WithOverflowInst : public IntrinsicInst {
+  /// This class represents an intrinsic that is based on a binary operation.
+  /// This includes op.with.overflow and saturating add/sub intrinsics.
+  class BinaryOpIntrinsic : public IntrinsicInst {
   public:
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
@@ -278,6 +279,10 @@ namespace llvm {
       case Intrinsic::ssub_with_overflow:
       case Intrinsic::umul_with_overflow:
       case Intrinsic::smul_with_overflow:
+      case Intrinsic::uadd_sat:
+      case Intrinsic::sadd_sat:
+      case Intrinsic::usub_sat:
+      case Intrinsic::ssub_sat:
         return true;
       default:
         return false;
@@ -300,6 +305,46 @@ namespace llvm {
     unsigned getNoWrapKind() const;
   };
 
+  /// Represents an op.with.overflow intrinsic.
+  class WithOverflowInst : public BinaryOpIntrinsic {
+  public:
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        return true;
+      default:
+        return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// Represents a saturating add/sub intrinsic.
+  class SaturatingInst : public BinaryOpIntrinsic {
+  public:
+    static bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::uadd_sat:
+      case Intrinsic::sadd_sat:
+      case Intrinsic::usub_sat:
+      case Intrinsic::ssub_sat:
+        return true;
+      default:
+        return false;
+      }
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
   /// Common base class for all memory intrinsics. Simply provides
   /// common methods.
   /// Written as CRTP to avoid a common base class amongst the
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 7ff8631c76f17..793e2895dce6d 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -171,13 +171,17 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
   }
 }
 
-Instruction::BinaryOps WithOverflowInst::getBinaryOp() const {
+Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
   switch (getIntrinsicID()) {
     case Intrinsic::uadd_with_overflow:
     case Intrinsic::sadd_with_overflow:
+    case Intrinsic::uadd_sat:
+    case Intrinsic::sadd_sat:
       return Instruction::Add;
     case Intrinsic::usub_with_overflow:
     case Intrinsic::ssub_with_overflow:
+    case Intrinsic::usub_sat:
+    case Intrinsic::ssub_sat:
       return Instruction::Sub;
     case Intrinsic::umul_with_overflow:
     case Intrinsic::smul_with_overflow:
@@ -187,18 +191,20 @@ Instruction::BinaryOps WithOverflowInst::getBinaryOp() const {
   }
 }
 
-bool WithOverflowInst::isSigned() const {
+bool BinaryOpIntrinsic::isSigned() const {
   switch (getIntrinsicID()) {
     case Intrinsic::sadd_with_overflow:
     case Intrinsic::ssub_with_overflow:
     case Intrinsic::smul_with_overflow:
+    case Intrinsic::sadd_sat:
+    case Intrinsic::ssub_sat:
       return true;
     default:
       return false;
   }
 }
 
-unsigned WithOverflowInst::getNoWrapKind() const {
+unsigned BinaryOpIntrinsic::getNoWrapKind() const {
   if (isSigned())
     return OverflowingBinaryOperator::NoSignedWrap;
   else

From 332c10056227d5da5557f50e8c64dc8814ca56f0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 May 2019 18:08:31 +0000
Subject: [PATCH 0388/1176] [ValueTracking][ConstantRange] Distinguish low/high
 always overflow

In order to fold an always overflowing signed saturating add/sub,
we need to know in which direction the always overflow occurs.
This patch splits up AlwaysOverflows into AlwaysOverflowsLow and
AlwaysOverflowsHigh to pass through this information (but it is
not used yet).

Differential Revision: https://reviews.llvm.org/D62463

llvm-svn: 361858
---
 llvm/include/llvm/Analysis/ValueTracking.h    | 11 +++-
 llvm/include/llvm/IR/ConstantRange.h          | 11 +++-
 llvm/lib/Analysis/ValueTracking.cpp           |  6 +-
 llvm/lib/IR/ConstantRange.cpp                 | 18 ++---
 .../InstCombine/InstCombineCalls.cpp          |  4 +-
 .../InstCombine/InstCombineCompares.cpp       |  3 +-
 llvm/unittests/IR/ConstantRangeTest.cpp       | 65 ++++++++++++-------
 7 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index bc6be5833070a..8e03b7773e8ec 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -412,7 +412,16 @@ class Value;
   bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI,
                                const DominatorTree *DT = nullptr);
 
-  enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
+  enum class OverflowResult {
+    /// Always overflows in the direction of signed/unsigned min value.
+    AlwaysOverflowsLow,
+    /// Always overflows in the direction of signed/unsigned max value.
+    AlwaysOverflowsHigh,
+    /// May or may not overflow.
+    MayOverflow,
+    /// Never overflows.
+    NeverOverflows,
+  };
 
   OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
                                                const Value *RHS,
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index f8d4e0a4a9a8c..0b176747f7c0f 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -416,7 +416,16 @@ class LLVM_NODISCARD ConstantRange {
 
   /// Represents whether an operation on the given constant range is known to
   /// always or never overflow.
-  enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
+  enum class OverflowResult {
+    /// Always overflows in the direction of signed/unsigned min value.
+    AlwaysOverflowsLow,
+    /// Always overflows in the direction of signed/unsigned max value.
+    AlwaysOverflowsHigh,
+    /// May or may not overflow.
+    MayOverflow,
+    /// Never overflows.
+    NeverOverflows,
+  };
 
   /// Return whether unsigned add of the two ranges always/never overflows.
   OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const;
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index d46ddc428b266..640063700e884 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3992,8 +3992,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
   switch (OR) {
     case ConstantRange::OverflowResult::MayOverflow:
       return OverflowResult::MayOverflow;
-    case ConstantRange::OverflowResult::AlwaysOverflows:
-      return OverflowResult::AlwaysOverflows;
+    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+      return OverflowResult::AlwaysOverflowsLow;
+    case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+      return OverflowResult::AlwaysOverflowsHigh;
     case ConstantRange::OverflowResult::NeverOverflows:
       return OverflowResult::NeverOverflows;
   }
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 0d44c3815b3b4..30b6a27078c2c 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -1208,9 +1208,9 @@ ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow(
   APInt Min = getUnsignedMin(), Max = getUnsignedMax();
   APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
 
-  // a u+ b overflows iff a u> ~b.
+  // a u+ b overflows high iff a u> ~b.
   if (Min.ugt(~OtherMin))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsHigh;
   if (Max.ugt(~OtherMax))
     return OverflowResult::MayOverflow;
   return OverflowResult::NeverOverflows;
@@ -1231,10 +1231,10 @@ ConstantRange::OverflowResult ConstantRange::signedAddMayOverflow(
   // a s+ b overflows low iff a s< 0 && b s< 0 && a s< smin - b.
   if (Min.isNonNegative() && OtherMin.isNonNegative() &&
       Min.sgt(SignedMax - OtherMin))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsHigh;
   if (Max.isNegative() && OtherMax.isNegative() &&
       Max.slt(SignedMin - OtherMax))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsLow;
 
   if (Max.isNonNegative() && OtherMax.isNonNegative() &&
       Max.sgt(SignedMax - OtherMax))
@@ -1254,9 +1254,9 @@ ConstantRange::OverflowResult ConstantRange::unsignedSubMayOverflow(
   APInt Min = getUnsignedMin(), Max = getUnsignedMax();
   APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
 
-  // a u- b overflows iff a u< b.
+  // a u- b overflows low iff a u< b.
   if (Max.ult(OtherMin))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsLow;
   if (Min.ult(OtherMax))
     return OverflowResult::MayOverflow;
   return OverflowResult::NeverOverflows;
@@ -1277,10 +1277,10 @@ ConstantRange::OverflowResult ConstantRange::signedSubMayOverflow(
   // a s- b overflows low iff a s< 0 && b s>= 0 && a s< smin + b.
   if (Min.isNonNegative() && OtherMax.isNegative() &&
       Min.sgt(SignedMax + OtherMax))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsHigh;
   if (Max.isNegative() && OtherMin.isNonNegative() &&
       Max.slt(SignedMin + OtherMin))
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsLow;
 
   if (Max.isNonNegative() && OtherMin.isNegative() &&
       Max.sgt(SignedMax + OtherMin))
@@ -1303,7 +1303,7 @@ ConstantRange::OverflowResult ConstantRange::unsignedMulMayOverflow(
 
   (void) Min.umul_ov(OtherMin, Overflow);
   if (Overflow)
-    return OverflowResult::AlwaysOverflows;
+    return OverflowResult::AlwaysOverflowsHigh;
 
   (void) Max.umul_ov(OtherMax, Overflow);
   if (Overflow)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e2813f9d9d496..a18043ef33f8c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2064,7 +2064,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
       if (OR == OverflowResult::NeverOverflows)
         return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflows)
+      if (OR == OverflowResult::AlwaysOverflowsHigh)
         return replaceInstUsesWith(*II,
                                    ConstantInt::getAllOnesValue(II->getType()));
       break;
@@ -2072,7 +2072,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
       if (OR == OverflowResult::NeverOverflows)
         return BinaryOperator::CreateNUWSub(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflows)
+      if (OR == OverflowResult::AlwaysOverflowsLow)
         return replaceInstUsesWith(*II,
                                    ConstantInt::getNullValue(II->getType()));
       break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ab2da177d7b2e..b3eb75ea8a8b7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3993,7 +3993,8 @@ bool InstCombiner::OptimizeOverflowCheck(
   switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
     case OverflowResult::MayOverflow:
       return false;
-    case OverflowResult::AlwaysOverflows:
+    case OverflowResult::AlwaysOverflowsLow:
+    case OverflowResult::AlwaysOverflowsHigh:
       Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
       Result->takeName(&OrigI);
       Overflow = Builder.getTrue();
diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp
index 4709baad346ea..eeebe2e73ae24 100644
--- a/llvm/unittests/IR/ConstantRangeTest.cpp
+++ b/llvm/unittests/IR/ConstantRangeTest.cpp
@@ -1487,8 +1487,10 @@ TEST(ConstantRange, MakeGuaranteedNoWrapRegionMulSignedRange) {
 
 #define EXPECT_MAY_OVERFLOW(op) \
   EXPECT_EQ(ConstantRange::OverflowResult::MayOverflow, (op))
-#define EXPECT_ALWAYS_OVERFLOWS(op) \
-  EXPECT_EQ(ConstantRange::OverflowResult::AlwaysOverflows, (op))
+#define EXPECT_ALWAYS_OVERFLOWS_LOW(op) \
+  EXPECT_EQ(ConstantRange::OverflowResult::AlwaysOverflowsLow, (op))
+#define EXPECT_ALWAYS_OVERFLOWS_HIGH(op) \
+  EXPECT_EQ(ConstantRange::OverflowResult::AlwaysOverflowsHigh, (op))
 #define EXPECT_NEVER_OVERFLOWS(op) \
   EXPECT_EQ(ConstantRange::OverflowResult::NeverOverflows, (op))
 
@@ -1521,9 +1523,9 @@ TEST_F(ConstantRangeTest, UnsignedAddOverflow) {
   ConstantRange C1(APInt(16, 0x0299), APInt(16, 0x0400));
   ConstantRange C2(APInt(16, 0x0300), APInt(16, 0x0400));
   EXPECT_MAY_OVERFLOW(A.unsignedAddMayOverflow(C1));
-  EXPECT_ALWAYS_OVERFLOWS(A.unsignedAddMayOverflow(C2));
+  EXPECT_ALWAYS_OVERFLOWS_HIGH(A.unsignedAddMayOverflow(C2));
   EXPECT_MAY_OVERFLOW(C1.unsignedAddMayOverflow(A));
-  EXPECT_ALWAYS_OVERFLOWS(C2.unsignedAddMayOverflow(A));
+  EXPECT_ALWAYS_OVERFLOWS_HIGH(C2.unsignedAddMayOverflow(A));
 }
 
 TEST_F(ConstantRangeTest, UnsignedSubOverflow) {
@@ -1548,7 +1550,7 @@ TEST_F(ConstantRangeTest, UnsignedSubOverflow) {
   ConstantRange A(APInt(16, 0x0000), APInt(16, 0x0100));
   ConstantRange B(APInt(16, 0x0100), APInt(16, 0x0200));
   EXPECT_NEVER_OVERFLOWS(B.unsignedSubMayOverflow(A));
-  EXPECT_ALWAYS_OVERFLOWS(A.unsignedSubMayOverflow(B));
+  EXPECT_ALWAYS_OVERFLOWS_LOW(A.unsignedSubMayOverflow(B));
 
   ConstantRange A1(APInt(16, 0x0000), APInt(16, 0x0101));
   ConstantRange B1(APInt(16, 0x0100), APInt(16, 0x0201));
@@ -1591,7 +1593,7 @@ TEST_F(ConstantRangeTest, SignedAddOverflow) {
   ConstantRange B5(APInt(16, 0x0299), APInt(16, 0x0400));
   ConstantRange B6(APInt(16, 0x0300), APInt(16, 0x0400));
   EXPECT_MAY_OVERFLOW(A.signedAddMayOverflow(B5));
-  EXPECT_ALWAYS_OVERFLOWS(A.signedAddMayOverflow(B6));
+  EXPECT_ALWAYS_OVERFLOWS_HIGH(A.signedAddMayOverflow(B6));
 
   ConstantRange C(APInt(16, 0x8200), APInt(16, 0x8300));
   ConstantRange D1(APInt(16, 0xfe00), APInt(16, 0xff00));
@@ -1605,7 +1607,7 @@ TEST_F(ConstantRangeTest, SignedAddOverflow) {
   ConstantRange D5(APInt(16, 0xfc00), APInt(16, 0xfd02));
   ConstantRange D6(APInt(16, 0xfc00), APInt(16, 0xfd01));
   EXPECT_MAY_OVERFLOW(C.signedAddMayOverflow(D5));
-  EXPECT_ALWAYS_OVERFLOWS(C.signedAddMayOverflow(D6));
+  EXPECT_ALWAYS_OVERFLOWS_LOW(C.signedAddMayOverflow(D6));
 
   ConstantRange E(APInt(16, 0xff00), APInt(16, 0x0100));
   EXPECT_NEVER_OVERFLOWS(E.signedAddMayOverflow(E));
@@ -1637,7 +1639,7 @@ TEST_F(ConstantRangeTest, SignedSubOverflow) {
   ConstantRange B3(APInt(16, 0xfc00), APInt(16, 0xfd02));
   ConstantRange B4(APInt(16, 0xfc00), APInt(16, 0xfd01));
   EXPECT_MAY_OVERFLOW(A.signedSubMayOverflow(B3));
-  EXPECT_ALWAYS_OVERFLOWS(A.signedSubMayOverflow(B4));
+  EXPECT_ALWAYS_OVERFLOWS_HIGH(A.signedSubMayOverflow(B4));
 
   ConstantRange C(APInt(16, 0x8200), APInt(16, 0x8300));
   ConstantRange D1(APInt(16, 0x0100), APInt(16, 0x0201));
@@ -1647,7 +1649,7 @@ TEST_F(ConstantRangeTest, SignedSubOverflow) {
   ConstantRange D3(APInt(16, 0x0299), APInt(16, 0x0400));
   ConstantRange D4(APInt(16, 0x0300), APInt(16, 0x0400));
   EXPECT_MAY_OVERFLOW(C.signedSubMayOverflow(D3));
-  EXPECT_ALWAYS_OVERFLOWS(C.signedSubMayOverflow(D4));
+  EXPECT_ALWAYS_OVERFLOWS_LOW(C.signedSubMayOverflow(D4));
 
   ConstantRange E(APInt(16, 0xff00), APInt(16, 0x0100));
   EXPECT_NEVER_OVERFLOWS(E.signedSubMayOverflow(E));
@@ -1663,25 +1665,39 @@ static void TestOverflowExhaustive(Fn1 OverflowFn, Fn2 MayOverflowFn) {
                                        const ConstantRange &CR2) {
     // Loop over all N1 in CR1 and N2 in CR2 and check whether any of the
     // operations have overflow / have no overflow.
-    bool RangeHasOverflow = false;
+    bool RangeHasOverflowLow = false;
+    bool RangeHasOverflowHigh = false;
     bool RangeHasNoOverflow = false;
     ForeachNumInConstantRange(CR1, [&](const APInt &N1) {
       ForeachNumInConstantRange(CR2, [&](const APInt &N2) {
-        if (OverflowFn(N1, N2))
-          RangeHasOverflow = true;
-        else
+        bool IsOverflowHigh;
+        if (!OverflowFn(IsOverflowHigh, N1, N2)) {
           RangeHasNoOverflow = true;
+          return;
+        }
+
+        if (IsOverflowHigh)
+          RangeHasOverflowHigh = true;
+        else
+          RangeHasOverflowLow = true;
       });
     });
 
     ConstantRange::OverflowResult OR = MayOverflowFn(CR1, CR2);
     switch (OR) {
-    case ConstantRange::OverflowResult::AlwaysOverflows:
-      EXPECT_TRUE(RangeHasOverflow);
+    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+      EXPECT_TRUE(RangeHasOverflowLow);
+      EXPECT_FALSE(RangeHasOverflowHigh);
+      EXPECT_FALSE(RangeHasNoOverflow);
+      break;
+    case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+      EXPECT_TRUE(RangeHasOverflowHigh);
+      EXPECT_FALSE(RangeHasOverflowLow);
       EXPECT_FALSE(RangeHasNoOverflow);
       break;
     case ConstantRange::OverflowResult::NeverOverflows:
-      EXPECT_FALSE(RangeHasOverflow);
+      EXPECT_FALSE(RangeHasOverflowLow);
+      EXPECT_FALSE(RangeHasOverflowHigh);
       EXPECT_TRUE(RangeHasNoOverflow);
       break;
     case ConstantRange::OverflowResult::MayOverflow:
@@ -1691,7 +1707,7 @@ static void TestOverflowExhaustive(Fn1 OverflowFn, Fn2 MayOverflowFn) {
       if (CR1.isEmptySet() || CR2.isEmptySet())
         break;
 
-      EXPECT_TRUE(RangeHasOverflow);
+      EXPECT_TRUE(RangeHasOverflowLow || RangeHasOverflowHigh);
       EXPECT_TRUE(RangeHasNoOverflow);
       break;
     }
@@ -1700,9 +1716,10 @@ static void TestOverflowExhaustive(Fn1 OverflowFn, Fn2 MayOverflowFn) {
 
 TEST_F(ConstantRangeTest, UnsignedAddOverflowExhaustive) {
   TestOverflowExhaustive(
-      [](const APInt &N1, const APInt &N2) {
+      [](bool &IsOverflowHigh, const APInt &N1, const APInt &N2) {
         bool Overflow;
         (void) N1.uadd_ov(N2, Overflow);
+        IsOverflowHigh = true;
         return Overflow;
       },
       [](const ConstantRange &CR1, const ConstantRange &CR2) {
@@ -1712,9 +1729,10 @@ TEST_F(ConstantRangeTest, UnsignedAddOverflowExhaustive) {
 
 TEST_F(ConstantRangeTest, UnsignedSubOverflowExhaustive) {
   TestOverflowExhaustive(
-      [](const APInt &N1, const APInt &N2) {
+      [](bool &IsOverflowHigh, const APInt &N1, const APInt &N2) {
         bool Overflow;
         (void) N1.usub_ov(N2, Overflow);
+        IsOverflowHigh = false;
         return Overflow;
       },
       [](const ConstantRange &CR1, const ConstantRange &CR2) {
@@ -1724,9 +1742,10 @@ TEST_F(ConstantRangeTest, UnsignedSubOverflowExhaustive) {
 
 TEST_F(ConstantRangeTest, UnsignedMulOverflowExhaustive) {
   TestOverflowExhaustive(
-      [](const APInt &N1, const APInt &N2) {
+      [](bool &IsOverflowHigh, const APInt &N1, const APInt &N2) {
         bool Overflow;
         (void) N1.umul_ov(N2, Overflow);
+        IsOverflowHigh = true;
         return Overflow;
       },
       [](const ConstantRange &CR1, const ConstantRange &CR2) {
@@ -1736,9 +1755,10 @@ TEST_F(ConstantRangeTest, UnsignedMulOverflowExhaustive) {
 
 TEST_F(ConstantRangeTest, SignedAddOverflowExhaustive) {
   TestOverflowExhaustive(
-      [](const APInt &N1, const APInt &N2) {
+      [](bool &IsOverflowHigh, const APInt &N1, const APInt &N2) {
         bool Overflow;
         (void) N1.sadd_ov(N2, Overflow);
+        IsOverflowHigh = N1.isNonNegative();
         return Overflow;
       },
       [](const ConstantRange &CR1, const ConstantRange &CR2) {
@@ -1748,9 +1768,10 @@ TEST_F(ConstantRangeTest, SignedAddOverflowExhaustive) {
 
 TEST_F(ConstantRangeTest, SignedSubOverflowExhaustive) {
   TestOverflowExhaustive(
-      [](const APInt &N1, const APInt &N2) {
+      [](bool &IsOverflowHigh, const APInt &N1, const APInt &N2) {
         bool Overflow;
         (void) N1.ssub_ov(N2, Overflow);
+        IsOverflowHigh = N1.isNonNegative();
         return Overflow;
       },
       [](const ConstantRange &CR1, const ConstantRange &CR2) {

From 09c2625108c8d16da32d282d8a10061509c68a9e Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Tue, 28 May 2019 18:26:00 +0000
Subject: [PATCH 0389/1176] Driver: support `/Zc:char8_t` and `/Zc:char8_t-`

Update the `cl` emulation to support the `/Zc:char8_t[-]?` options as per the
MSVC 2019.1 toolset.  These are aliases for `-fchar8_t` and `-fno-char8_t`.

llvm-svn: 361859
---
 clang/docs/UsersManual.rst                    | 2 ++
 clang/include/clang/Driver/CLCompatOptions.td | 6 ++++++
 clang/test/Driver/cl-options.c                | 7 +++++++
 3 files changed, 15 insertions(+)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 88cb72c9b61f1..c9195e908276c 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -3051,6 +3051,8 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /Yc<filename>           Generate a pch file for all code up to and including <filename>
       /Yu<filename>           Load a pch file and use it instead of all code up to and including <filename>
       /Z7                     Enable CodeView debug information in object files
+      /Zc:char8_t             Enable C++2a char8_t type
+      /Zc:char8_t-            Disable C++2a char8_t type
       /Zc:dllexportInlines-   Don't dllexport/dllimport inline member functions of dllexport/import classes
       /Zc:dllexportInlines    dllexport/dllimport inline member functions of dllexport/import classes (default)
       /Zc:sizedDealloc-       Disable C++14 sized global deallocation functions
diff --git a/clang/include/clang/Driver/CLCompatOptions.td b/clang/include/clang/Driver/CLCompatOptions.td
index fb02d856a253c..577a66786ad20 100644
--- a/clang/include/clang/Driver/CLCompatOptions.td
+++ b/clang/include/clang/Driver/CLCompatOptions.td
@@ -212,6 +212,12 @@ def _SLASH_Zc_alignedNew : CLFlag<"Zc:alignedNew">,
 def _SLASH_Zc_alignedNew_ : CLFlag<"Zc:alignedNew-">,
   HelpText<"Disable C++17 aligned allocation functions">,
   Alias<fno_aligned_allocation>;
+def _SLASH_Zc_char8_t : CLFlag<"Zc:char8_t">,
+  HelpText<"Enable char8_t from C++2a">,
+  Alias<fchar8__t>;
+def _SLASH_Zc_char8_t_ : CLFlag<"Zc:char8_t-">,
+  HelpText<"Disable char8_t from c++2a">,
+  Alias<fno_char8__t>;
 def _SLASH_Zc_strictStrings : CLFlag<"Zc:strictStrings">,
   HelpText<"Treat string literals as const">, Alias<W_Joined>,
   AliasArgs<["error=c++11-compat-deprecated-writable-strings"]>;
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index 0f64060d107e1..4b41720441a9d 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -326,6 +326,13 @@
 // RUN: %clang_cl -c /Zc:twoPhase -### -- %s 2>&1 | FileCheck -check-prefix=DELAYEDOFF %s
 // DELAYEDOFF-NOT: "-fdelayed-template-parsing"
 
+// RUN: %clang_cl -c -### /std:c++latest -- %s 2>&1 | FileCheck -check-prefix CHECK-LATEST-CHAR8_T %s
+// CHECK-LATEST-CHAR8_T-NOT: "-fchar8_t"
+// RUN: %clang_cl -c -### /Zc:char8_t -- %s 2>&1 | FileCheck -check-prefix CHECK-CHAR8_T %s
+// CHECK-CHAR8_T: "-fchar8_t"
+// RUN: %clang_cl -c -### /Zc:char8_t- -- %s 2>&1 | FileCheck -check-prefix CHECK-CHAR8_T_ %s
+// CHECK-CHAR8_T_: "-fno-char8_t"
+
 // For some warning ids, we can map from MSVC warning to Clang warning.
 // RUN: %clang_cl -wd4005 -wd4100 -wd4910 -wd4996 -### -- %s 2>&1 | FileCheck -check-prefix=Wno %s
 // Wno: "-cc1"

From caeec8501e1f69aa64954048950c9454f82c02bc Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 18:31:36 +0000
Subject: [PATCH 0390/1176] [NFC][MIPS] Autogenerater madd-msub.ll test

Being affected by upcoming patch

llvm-svn: 361860
---
 llvm/test/CodeGen/Mips/madd-msub.ll | 651 +++++++++++++++++++---------
 1 file changed, 435 insertions(+), 216 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/madd-msub.ll b/llvm/test/CodeGen/Mips/madd-msub.ll
index c5f7af1de20d5..8a1010e45f66f 100644
--- a/llvm/test/CodeGen/Mips/madd-msub.ll
+++ b/llvm/test/CodeGen/Mips/madd-msub.ll
@@ -1,57 +1,76 @@
-; RUN: llc -march=mips -mcpu=mips32 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,32
-; RUN: llc -march=mips -mcpu=mips32r2 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,32
-; RUN: llc -march=mips -mcpu=mips32r6 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,32R6
-; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dsp < %s \
-; RUN:   | FileCheck %s -check-prefix=DSP
-; RUN: llc -march=mips -mcpu=mips64   -target-abi n64 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,64
-; RUN: llc -march=mips -mcpu=mips64r2 -target-abi n64 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,64
-; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 < %s \
-; RUN:   | FileCheck %s -check-prefixes=ALL,64R6
-
-; FIXME: The MIPS16 test should check its output
-; RUN: llc -march=mips -mattr=mips16 < %s
-
-; ALL-LABEL: madd1:
-
-; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
-; 32-DAG:        mtlo $6
-; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
-; DSP-DAG:       mtlo $6, $[[AC:ac[0-3]+]]
-; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
-; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 32R6-DAG:      muh  $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      sra  $[[T4:[0-9]+]], $6, 31
-; 32R6-DAG:      addu $[[T5:[0-9]+]], $[[T3]], $[[T4]]
-; 32R6-DAG:      addu $2, $[[T5]], $[[T2]]
-
-; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
-; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
-; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
-; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
-; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
-; 64-DAG:        daddu $2, $[[T2]], $[[T3]]
-
-; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
-; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
-; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
-; 64R6-DAG:      daddu $2, $[[T2]], $[[T3]]
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck %s -check-prefixes=ALL,32
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefixes=ALL,32
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefixes=ALL,32R6
+; RUN: llc -march=mips -mcpu=mips32r2 -mattr=dsp < %s | FileCheck %s -check-prefix=DSP
+; RUN: llc -march=mips -mcpu=mips64   -target-abi n64 < %s | FileCheck %s -check-prefixes=ALL,64
+; RUN: llc -march=mips -mcpu=mips64r2 -target-abi n64 < %s | FileCheck %s -check-prefixes=ALL,64
+; RUN: llc -march=mips -mcpu=mips64r6 -target-abi n64 < %s | FileCheck %s -check-prefixes=ALL,64R6
+; RUN: llc -march=mips -mattr=mips16 < %s | FileCheck %s -check-prefixes=ALL,16
 
 define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+; 32-LABEL: madd1:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    sra $1, $6, 31
+; 32-NEXT:    mtlo $6
+; 32-NEXT:    mthi $1
+; 32-NEXT:    madd $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: madd1:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    addu $3, $1, $6
+; 32R6-NEXT:    sltu $1, $3, $1
+; 32R6-NEXT:    muh $2, $5, $4
+; 32R6-NEXT:    sra $4, $6, 31
+; 32R6-NEXT:    addu $2, $2, $4
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    addu $2, $2, $1
+;
+; DSP-LABEL: madd1:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    sra $1, $6, 31
+; DSP-NEXT:    mtlo $6, $ac0
+; DSP-NEXT:    mthi $1, $ac0
+; DSP-NEXT:    madd $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: madd1:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $4, 0
+; 64-NEXT:    sll $2, $5, 0
+; 64-NEXT:    dmult $2, $1
+; 64-NEXT:    mflo $1
+; 64-NEXT:    sll $2, $6, 0
+; 64-NEXT:    jr $ra
+; 64-NEXT:    daddu $2, $1, $2
+;
+; 64R6-LABEL: madd1:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $4, 0
+; 64R6-NEXT:    sll $2, $5, 0
+; 64R6-NEXT:    dmul $1, $2, $1
+; 64R6-NEXT:    sll $2, $6, 0
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    daddu $2, $1, $2
+;
+; 16-LABEL: madd1:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $3
+; 16-NEXT:    sra $4, $6, 31
+; 16-NEXT:    addu $4, $3, $4
+; 16-NEXT:    addu $3, $2, $6
+; 16-NEXT:    sltu $3, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    addu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = sext i32 %a to i64
   %conv2 = sext i32 %b to i64
@@ -61,36 +80,59 @@ entry:
   ret i64 %add
 }
 
-; ALL-LABEL: madd2:
-
-; FIXME: We don't really need this instruction
-; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
-; 32-DAG:        mtlo $6
-; 32-DAG:        [[m:m]]addu ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
-; DSP-DAG:       mtlo $6, $[[AC:ac[0-3]+]]
-; DSP-DAG:       maddu $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $6
-; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; FIXME: There's a redundant move here. We should remove it
-; 32R6-DAG:      muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      addu $2, $[[T3]], $[[T2]]
-
-; 64-DAG:        d[[m:m]]ult $5, $4
-; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
-; 64-DAG:        daddu $2, $[[T0]], $6
-
-; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
-; 64R6-DAG:      daddu $2, $[[T0]], $6
-
 define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
+; 32-LABEL: madd2:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    addiu $1, $zero, 0
+; 32-NEXT:    mtlo $6
+; 32-NEXT:    mthi $1
+; 32-NEXT:    maddu $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: madd2:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    addu $3, $1, $6
+; 32R6-NEXT:    sltu $1, $3, $1
+; 32R6-NEXT:    muhu $2, $5, $4
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    addu $2, $2, $1
+;
+; DSP-LABEL: madd2:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    addiu $1, $zero, 0
+; DSP-NEXT:    mtlo $6, $ac0
+; DSP-NEXT:    mthi $1, $ac0
+; DSP-NEXT:    maddu $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: madd2:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    dmult $5, $4
+; 64-NEXT:    mflo $1
+; 64-NEXT:    jr $ra
+; 64-NEXT:    daddu $2, $1, $6
+;
+; 64R6-LABEL: madd2:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    dmul $1, $5, $4
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    daddu $2, $1, $6
+;
+; 16-LABEL: madd2:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    multu $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $4
+; 16-NEXT:    addu $3, $2, $6
+; 16-NEXT:    sltu $3, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    addu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = zext i32 %a to i64
   %conv2 = zext i32 %b to i64
@@ -100,39 +142,63 @@ entry:
   ret i64 %add
 }
 
-; ALL-LABEL: madd3:
-
-; 32-DAG:        mthi $6
-; 32-DAG:        mtlo $7
-; 32-DAG:        [[m:m]]add ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       mthi $6, $[[AC:ac[0-3]+]]
-; DSP-DAG:       mtlo $7, $[[AC]]
-; DSP-DAG:       madd $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      addu $[[T1:[0-9]+]], $[[T0]], $7
-; 32R6-DAG:      sltu $[[T2:[0-9]+]], $[[T1]], $1
-; 32R6-DAG:      muh  $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      addu $[[T4:[0-9]+]], $[[T3]], $6
-; 32R6-DAG:      addu $2, $[[T4]], $[[T2]]
-
-; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
-; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
-; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
-; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
-; 64-DAG:        daddu $2, $[[T2]], $6
-
-; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
-; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
-; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 64R6-DAG:      daddu $2, $[[T2]], $6
-
 define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+; 32-LABEL: madd3:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    mtlo $7
+; 32-NEXT:    mthi $6
+; 32-NEXT:    madd $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: madd3:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    addu $3, $1, $7
+; 32R6-NEXT:    sltu $1, $3, $1
+; 32R6-NEXT:    muh $2, $5, $4
+; 32R6-NEXT:    addu $2, $2, $6
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    addu $2, $2, $1
+;
+; DSP-LABEL: madd3:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    mtlo $7, $ac0
+; DSP-NEXT:    mthi $6, $ac0
+; DSP-NEXT:    madd $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: madd3:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $4, 0
+; 64-NEXT:    sll $2, $5, 0
+; 64-NEXT:    dmult $2, $1
+; 64-NEXT:    mflo $1
+; 64-NEXT:    jr $ra
+; 64-NEXT:    daddu $2, $1, $6
+;
+; 64R6-LABEL: madd3:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $4, 0
+; 64R6-NEXT:    sll $2, $5, 0
+; 64R6-NEXT:    dmul $1, $2, $1
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    daddu $2, $1, $6
+;
+; 16-LABEL: madd3:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $3
+; 16-NEXT:    addu $4, $3, $6
+; 16-NEXT:    addu $3, $2, $7
+; 16-NEXT:    sltu $3, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    addu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = sext i32 %a to i64
   %conv2 = sext i32 %b to i64
@@ -141,10 +207,49 @@ entry:
   ret i64 %add
 }
 
-; ALL-LABEL: madd4
-; ALL-NOT: madd ${{[0-9]+}}, ${{[0-9]+}}
-
 define i32 @madd4(i32 %a, i32 %b, i32 %c) {
+; 32-LABEL: madd4:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    mul $1, $4, $5
+; 32-NEXT:    jr $ra
+; 32-NEXT:    addu $2, $6, $1
+;
+; 32R6-LABEL: madd4:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $4, $5
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    addu $2, $6, $1
+;
+; DSP-LABEL: madd4:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    mul $1, $4, $5
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    addu $2, $6, $1
+;
+; 64-LABEL: madd4:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $5, 0
+; 64-NEXT:    sll $2, $4, 0
+; 64-NEXT:    mul $1, $2, $1
+; 64-NEXT:    sll $2, $6, 0
+; 64-NEXT:    jr $ra
+; 64-NEXT:    addu $2, $2, $1
+;
+; 64R6-LABEL: madd4:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $5, 0
+; 64R6-NEXT:    sll $2, $4, 0
+; 64R6-NEXT:    mul $1, $2, $1
+; 64R6-NEXT:    sll $2, $6, 0
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    addu $2, $2, $1
+;
+; 16-LABEL: madd4:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $4, $5
+; 16-NEXT:    mflo $2
+; 16-NEXT:    addu $2, $6, $2
+; 16-NEXT:    jrc $ra
 entry:
   %mul = mul nsw i32 %a, %b
   %add = add nsw i32 %c, %mul
@@ -152,42 +257,69 @@ entry:
   ret i32 %add
 }
 
-; ALL-LABEL: msub1:
-
-; 32-DAG:        sra $[[T0:[0-9]+]], $6, 31
-; 32-DAG:        mtlo $6
-; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       sra $[[T0:[0-9]+]], $6, 31
-; DSP-DAG:       mtlo $6, $[[AC:ac[0-3]+]]
-; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul  $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      sltu $[[T1:[0-9]+]], $6, $[[T0]]
-; 32R6-DAG:      muh  $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      sra  $[[T3:[0-9]+]], $6, 31
-; 32R6-DAG:      subu $[[T4:[0-9]+]], $[[T3]], $[[T2]]
-; 32R6-DAG:      subu $2, $[[T4]], $[[T1]]
-; 32R6-DAG:      subu $3, $6, $[[T0]]
-
-; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
-; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
-; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
-; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
-; 64-DAG:        sll $[[T3:[0-9]+]], $6, 0
-; 64-DAG:        dsubu $2, $[[T3]], $[[T2]]
-
-; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
-; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
-; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 64R6-DAG:      sll $[[T3:[0-9]+]], $6, 0
-; 64R6-DAG:      dsubu $2, $[[T3]], $[[T2]]
-
 define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+; 32-LABEL: msub1:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    sra $1, $6, 31
+; 32-NEXT:    mtlo $6
+; 32-NEXT:    mthi $1
+; 32-NEXT:    msub $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: msub1:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    sltu $2, $6, $1
+; 32R6-NEXT:    muh $3, $5, $4
+; 32R6-NEXT:    sra $4, $6, 31
+; 32R6-NEXT:    subu $3, $4, $3
+; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    subu $3, $6, $1
+;
+; DSP-LABEL: msub1:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    sra $1, $6, 31
+; DSP-NEXT:    mtlo $6, $ac0
+; DSP-NEXT:    mthi $1, $ac0
+; DSP-NEXT:    msub $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: msub1:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $4, 0
+; 64-NEXT:    sll $2, $5, 0
+; 64-NEXT:    dmult $2, $1
+; 64-NEXT:    mflo $1
+; 64-NEXT:    sll $2, $6, 0
+; 64-NEXT:    jr $ra
+; 64-NEXT:    dsubu $2, $2, $1
+;
+; 64R6-LABEL: msub1:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $4, 0
+; 64R6-NEXT:    sll $2, $5, 0
+; 64R6-NEXT:    dmul $1, $2, $1
+; 64R6-NEXT:    sll $2, $6, 0
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    dsubu $2, $2, $1
+;
+; 16-LABEL: msub1:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $4
+; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    sltu $6, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    sra $5, $6, 31
+; 16-NEXT:    subu $4, $5, $4
+; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = sext i32 %c to i64
   %conv2 = sext i32 %a to i64
@@ -197,36 +329,61 @@ entry:
   ret i64 %sub
 }
 
-; ALL-LABEL: msub2:
-
-; FIXME: We don't really need this instruction
-; 32-DAG:        addiu $[[T0:[0-9]+]], $zero, 0
-; 32-DAG:        mtlo $6
-; 32-DAG:        [[m:m]]subu ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       addiu $[[T0:[0-9]+]], $zero, 0
-; DSP-DAG:       mtlo $6, $[[AC:ac[0-3]+]]
-; DSP-DAG:       msubu $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      sltu $[[T1:[0-9]+]], $6, $[[T0]]
-; 32R6-DAG:      muhu $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      negu $[[T3:[0-9]+]], $[[T2]]
-; 32R6-DAG:      subu $2, $[[T3]], $[[T1]]
-; 32R6-DAG:      subu $3, $6, $[[T0]]
-
-; 64-DAG:        d[[m:m]]ult $5, $4
-; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
-; 64-DAG:        dsubu $2, $6, $[[T0]]
-
-; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
-; 64R6-DAG:      dsubu $2, $6, $[[T0]]
-
 define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
+; 32-LABEL: msub2:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    addiu $1, $zero, 0
+; 32-NEXT:    mtlo $6
+; 32-NEXT:    mthi $1
+; 32-NEXT:    msubu $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: msub2:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    sltu $2, $6, $1
+; 32R6-NEXT:    muhu $3, $5, $4
+; 32R6-NEXT:    negu $3, $3
+; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    subu $3, $6, $1
+;
+; DSP-LABEL: msub2:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    addiu $1, $zero, 0
+; DSP-NEXT:    mtlo $6, $ac0
+; DSP-NEXT:    mthi $1, $ac0
+; DSP-NEXT:    msubu $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: msub2:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    dmult $5, $4
+; 64-NEXT:    mflo $1
+; 64-NEXT:    jr $ra
+; 64-NEXT:    dsubu $2, $6, $1
+;
+; 64R6-LABEL: msub2:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    dmul $1, $5, $4
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    dsubu $2, $6, $1
+;
+; 16-LABEL: msub2:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    multu $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $4
+; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    sltu $6, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    neg $4, $4
+; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = zext i32 %c to i64
   %conv2 = zext i32 %a to i64
@@ -236,40 +393,63 @@ entry:
   ret i64 %sub
 }
 
-; ALL-LABEL: msub3:
-
-; FIXME: We don't really need this instruction
-; 32-DAG:        mthi $6
-; 32-DAG:        mtlo $7
-; 32-DAG:        [[m:m]]sub ${{[45]}}, ${{[45]}}
-; 32-DAG:        [[m]]fhi $2
-; 32-DAG:        [[m]]flo $3
-
-; DSP-DAG:       mtlo $7, $[[AC:ac[0-3]+]]
-; DSP-DAG:       mthi $6, $[[AC]]
-; DSP-DAG:       msub $[[AC]], ${{[45]}}, ${{[45]}}
-; DSP-DAG:       mfhi $2, $[[AC]]
-; DSP-DAG:       mflo $3, $[[AC]]
-
-; 32R6-DAG:      mul $[[T0:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      sltu $[[T1:[0-9]+]], $7, $[[T0]]
-; 32R6-DAG:      muh $[[T2:[0-9]+]], ${{[45]}}, ${{[45]}}
-; 32R6-DAG:      subu $[[T3:[0-9]+]], $6, $[[T2]]
-; 32R6-DAG:      subu $2, $[[T3]], $[[T1]]
-; 32R6-DAG:      subu $3, $7, $[[T0]]
-
-; 64-DAG:        sll $[[T0:[0-9]+]], $4, 0
-; 64-DAG:        sll $[[T1:[0-9]+]], $5, 0
-; 64-DAG:        d[[m:m]]ult $[[T1]], $[[T0]]
-; 64-DAG:        [[m]]flo $[[T2:[0-9]+]]
-; 64-DAG:        dsubu $2, $6, $[[T2]]
-
-; 64R6-DAG:      sll $[[T0:[0-9]+]], $4, 0
-; 64R6-DAG:      sll $[[T1:[0-9]+]], $5, 0
-; 64R6-DAG:      dmul $[[T2:[0-9]+]], $[[T1]], $[[T0]]
-; 64R6-DAG:      dsubu $2, $6, $[[T2]]
-
 define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+; 32-LABEL: msub3:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    mtlo $7
+; 32-NEXT:    mthi $6
+; 32-NEXT:    msub $5, $4
+; 32-NEXT:    mfhi $2
+; 32-NEXT:    jr $ra
+; 32-NEXT:    mflo $3
+;
+; 32R6-LABEL: msub3:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $5, $4
+; 32R6-NEXT:    sltu $2, $7, $1
+; 32R6-NEXT:    muh $3, $5, $4
+; 32R6-NEXT:    subu $3, $6, $3
+; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    subu $3, $7, $1
+;
+; DSP-LABEL: msub3:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    mtlo $7, $ac0
+; DSP-NEXT:    mthi $6, $ac0
+; DSP-NEXT:    msub $ac0, $5, $4
+; DSP-NEXT:    mfhi $2, $ac0
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    mflo $3, $ac0
+;
+; 64-LABEL: msub3:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $4, 0
+; 64-NEXT:    sll $2, $5, 0
+; 64-NEXT:    dmult $2, $1
+; 64-NEXT:    mflo $1
+; 64-NEXT:    jr $ra
+; 64-NEXT:    dsubu $2, $6, $1
+;
+; 64R6-LABEL: msub3:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $4, 0
+; 64R6-NEXT:    sll $2, $5, 0
+; 64R6-NEXT:    dmul $1, $2, $1
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    dsubu $2, $6, $1
+;
+; 16-LABEL: msub3:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $5, $4
+; 16-NEXT:    mflo $2
+; 16-NEXT:    mfhi $4
+; 16-NEXT:    subu $3, $7, $2
+; 16-NEXT:    sltu $7, $2
+; 16-NEXT:    move $2, $24
+; 16-NEXT:    subu $4, $6, $4
+; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    jrc $ra
 entry:
   %conv = sext i32 %a to i64
   %conv3 = sext i32 %b to i64
@@ -278,10 +458,49 @@ entry:
   ret i64 %sub
 }
 
-; ALL-LABEL: msub4
-; ALL-NOT: msub ${{[0-9]+}}, ${{[0-9]+}}
-
 define i32 @msub4(i32 %a, i32 %b, i32 %c) {
+; 32-LABEL: msub4:
+; 32:       # %bb.0: # %entry
+; 32-NEXT:    mul $1, $4, $5
+; 32-NEXT:    jr $ra
+; 32-NEXT:    subu $2, $6, $1
+;
+; 32R6-LABEL: msub4:
+; 32R6:       # %bb.0: # %entry
+; 32R6-NEXT:    mul $1, $4, $5
+; 32R6-NEXT:    jr $ra
+; 32R6-NEXT:    subu $2, $6, $1
+;
+; DSP-LABEL: msub4:
+; DSP:       # %bb.0: # %entry
+; DSP-NEXT:    mul $1, $4, $5
+; DSP-NEXT:    jr $ra
+; DSP-NEXT:    subu $2, $6, $1
+;
+; 64-LABEL: msub4:
+; 64:       # %bb.0: # %entry
+; 64-NEXT:    sll $1, $5, 0
+; 64-NEXT:    sll $2, $4, 0
+; 64-NEXT:    mul $1, $2, $1
+; 64-NEXT:    sll $2, $6, 0
+; 64-NEXT:    jr $ra
+; 64-NEXT:    subu $2, $2, $1
+;
+; 64R6-LABEL: msub4:
+; 64R6:       # %bb.0: # %entry
+; 64R6-NEXT:    sll $1, $5, 0
+; 64R6-NEXT:    sll $2, $4, 0
+; 64R6-NEXT:    mul $1, $2, $1
+; 64R6-NEXT:    sll $2, $6, 0
+; 64R6-NEXT:    jr $ra
+; 64R6-NEXT:    subu $2, $2, $1
+;
+; 16-LABEL: msub4:
+; 16:       # %bb.0: # %entry
+; 16-NEXT:    mult $4, $5
+; 16-NEXT:    mflo $2
+; 16-NEXT:    subu $2, $6, $2
+; 16-NEXT:    jrc $ra
 entry:
   %mul = mul nsw i32 %a, %b
   %sub = sub nsw i32 %c, %mul

From cf950b46c8ca5fc32cac6cc9bf8f542246eb6a5b Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 18:36:11 +0000
Subject: [PATCH 0391/1176] Fix 'warning: format specifies type 'int' but the
 argument has type 'MIuint' (aka 'unsigned long long') [-Wformat]' with Clang
 8.0

llvm-svn: 361861
---
 lldb/tools/lldb-mi/MICmnLLDBDebuggerHandleEvents.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/tools/lldb-mi/MICmnLLDBDebuggerHandleEvents.cpp b/lldb/tools/lldb-mi/MICmnLLDBDebuggerHandleEvents.cpp
index 58a7c7de5009d..60cfd3563dae2 100644
--- a/lldb/tools/lldb-mi/MICmnLLDBDebuggerHandleEvents.cpp
+++ b/lldb/tools/lldb-mi/MICmnLLDBDebuggerHandleEvents.cpp
@@ -951,7 +951,7 @@ bool CMICmnLLDBDebuggerHandleEvents::HandleProcessEventStateSuspended(
   } else {
     const MIuint nTargetIndex = rDebugger.GetIndexOfTarget(target);
     if (nTargetIndex != UINT_MAX)
-      streamOut.Printf("Target %d: (", nTargetIndex);
+      streamOut.Printf("Target %" PRIu64 ": (", (uint64_t)nTargetIndex);
     else
       streamOut.Printf("Target <unknown index>: (");
     target.GetDescription(streamOut, lldb::eDescriptionLevelBrief);

From 2076fb28f1801f1fd3981e09014817b0e25b582c Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 18:36:35 +0000
Subject: [PATCH 0392/1176] =?UTF-8?q?Fix=20'warning:=20suggest=20explicit?=
 =?UTF-8?q?=20braces=20to=20avoid=20ambiguous=20=E2=80=98else=E2=80=99=20[?=
 =?UTF-8?q?-Wdangling-else]'=20with=20GCC=207.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See: https://github.com/google/googletest/issues/1119
llvm-svn: 361862
---
 .../ARM64/TestArm64InstEmulation.cpp          | 30 ++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp b/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp
index 9fc01a432699d..d853e6fb43c0d 100644
--- a/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp
+++ b/lldb/unittests/UnwindAssembly/ARM64/TestArm64InstEmulation.cpp
@@ -648,24 +648,34 @@ TEST_F(TestArm64InstEmulation, TestRegisterDoubleSpills) {
   EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
   EXPECT_EQ(0, row_sp->GetCFAValue().GetOffset());
 
-  if (row_sp->GetRegisterInfo(fpu_d8_arm64, regloc))
+  if (row_sp->GetRegisterInfo(fpu_d8_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d9_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d9_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d10_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d10_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d11_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d11_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d12_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d12_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d13_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d13_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d14_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d14_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(fpu_d15_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(fpu_d15_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(gpr_x27_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(gpr_x27_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
-  if (row_sp->GetRegisterInfo(gpr_x28_arm64, regloc))
+  }
+  if (row_sp->GetRegisterInfo(gpr_x28_arm64, regloc)) {
     EXPECT_TRUE(regloc.IsSame());
+  }
 }

From c51cdacab962a4d3a8a9a0c6327d44148f73c57d Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 May 2019 18:59:21 +0000
Subject: [PATCH 0393/1176] [InstCombine] Clean up saturing math overflow
 optimizations; NFC

Reduce duplication and make it easier to handle signed
always-overflows conditions in the future.

llvm-svn: 361863
---
 .../InstCombine/InstCombineCalls.cpp          | 49 ++++++++-----------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a18043ef33f8c..9c6297e4c1721 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2052,38 +2052,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     LLVM_FALLTHROUGH;
   case Intrinsic::usub_sat:
   case Intrinsic::ssub_sat: {
-    Value *Arg0 = II->getArgOperand(0);
-    Value *Arg1 = II->getArgOperand(1);
+    SaturatingInst *SI = cast<SaturatingInst>(II);
+    Value *Arg0 = SI->getLHS();
+    Value *Arg1 = SI->getRHS();
 
     // Make use of known overflow information.
-    OverflowResult OR;
-    switch (IID) {
-    default:
-      llvm_unreachable("Unexpected intrinsic!");
-    case Intrinsic::uadd_sat:
-      OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
-      if (OR == OverflowResult::NeverOverflows)
-        return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflowsHigh)
-        return replaceInstUsesWith(*II,
-                                   ConstantInt::getAllOnesValue(II->getType()));
-      break;
-    case Intrinsic::usub_sat:
-      OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
-      if (OR == OverflowResult::NeverOverflows)
-        return BinaryOperator::CreateNUWSub(Arg0, Arg1);
-      if (OR == OverflowResult::AlwaysOverflowsLow)
-        return replaceInstUsesWith(*II,
+    OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
+                                        Arg0, Arg1, SI);
+    switch (OR) {
+      case OverflowResult::MayOverflow:
+        break;
+      case OverflowResult::NeverOverflows:
+        if (SI->isSigned())
+          return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
+        else
+          return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
+      case OverflowResult::AlwaysOverflowsLow:
+        if (SI->isSigned()) break; // TODO: Support signed.
+        return replaceInstUsesWith(*SI,
                                    ConstantInt::getNullValue(II->getType()));
-      break;
-    case Intrinsic::sadd_sat:
-      if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
-        return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
-      break;
-    case Intrinsic::ssub_sat:
-      if (willNotOverflowSignedSub(Arg0, Arg1, *II))
-        return BinaryOperator::CreateNSWSub(Arg0, Arg1);
-      break;
+      case OverflowResult::AlwaysOverflowsHigh:
+        if (SI->isSigned()) break; // TODO: Support signed.
+        return replaceInstUsesWith(*SI,
+                                   ConstantInt::getAllOnesValue(II->getType()));
     }
 
     // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN

From 2941eb68643885612b1cc305f878dd255156a59a Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 May 2019 18:59:28 +0000
Subject: [PATCH 0394/1176] [InstCombine] Add tests for signed saturating
 always overflow; NFC

llvm-svn: 361864
---
 .../InstCombine/saturating-add-sub.ll         | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 8b50eb6426271..26374a501e744 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -339,6 +339,32 @@ define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) {
   ret <2 x i8> %r
 }
 
+define i8 @test_scalar_sadd_always_overflows_low(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_always_overflows_low(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], -120
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i8 [[A]], i8 -120
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[MIN]], i8 -10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp slt i8 %a, -120
+  %min = select i1 %cmp, i8 %a, i8 -120
+  %r = call i8 @llvm.sadd.sat.i8(i8 %min, i8 -10)
+  ret i8 %r
+}
+
+define i8 @test_scalar_sadd_always_overflows_high(i8 %a) {
+; CHECK-LABEL: @test_scalar_sadd_always_overflows_high(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], 120
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i8 [[A]], i8 120
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[MAX]], i8 10)
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sgt i8 %a, 120
+  %max = select i1 %cmp, i8 %a, i8 120
+  %r = call i8 @llvm.sadd.sat.i8(i8 %max, i8 10)
+  ret i8 %r
+}
+
 ; While this is a no-overflow condition, the nuw flag gets lost due to
 ; canonicalization and we can no longer determine this
 define i8 @test_scalar_uadd_sub_nuw_lost_no_ov(i8 %a) {
@@ -801,6 +827,32 @@ define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) {
   ret <2 x i8> %r
 }
 
+define i8 @test_scalar_ssub_always_overflows_low(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_always_overflows_low(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], 120
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i8 [[A]], i8 120
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 -10, i8 [[MAX]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp sgt i8 %a, 120
+  %max = select i1 %cmp, i8 %a, i8 120
+  %r = call i8 @llvm.ssub.sat.i8(i8 -10, i8 %max)
+  ret i8 %r
+}
+
+define i8 @test_scalar_ssub_always_overflows_high(i8 %a) {
+; CHECK-LABEL: @test_scalar_ssub_always_overflows_high(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], -120
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i8 [[A]], i8 -120
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 10, i8 [[MIN]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %cmp = icmp slt i8 %a, -120
+  %min = select i1 %cmp, i8 %a, i8 -120
+  %r = call i8 @llvm.ssub.sat.i8(i8 10, i8 %min)
+  ret i8 %r
+}
+
 define i8 @test_scalar_usub_add_nuw_no_ov(i8 %a) {
 ; CHECK-LABEL: @test_scalar_usub_add_nuw_no_ov(
 ; CHECK-NEXT:    [[R:%.*]] = add i8 [[A:%.*]], 1

From 272d70c3665f1fdc57ddab2d555c0bec6944f966 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 19:04:21 +0000
Subject: [PATCH 0395/1176] Revert DAGCombine "hoist binop with const" folds

Appear to introduce test-suite compile-time hang.

http://lab.llvm.org:8011/builders/clang-cmake-x86_64-sde-avx512-linux/builds/22825

This reverts r361852,r361853,r361854,r361855,r361856

llvm-svn: 361865
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 ----------
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 10 ++-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 36 ++++----
 llvm/test/CodeGen/AArch64/xor.ll              | 18 ++--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 14 ++--
 .../CodeGen/SPARC/2013-05-17-CallFrame.ll     |  5 +-
 llvm/test/CodeGen/SystemZ/alloca-03.ll        | 11 +--
 llvm/test/CodeGen/X86/combine-add.ll          |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 +++++++++----------
 llvm/test/CodeGen/X86/shift-amount-mod.ll     |  9 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 64 +++++++-------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 20 ++---
 llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 16 ++--
 llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 12 +--
 llvm/test/CodeGen/X86/xor.ll                  | 62 +++++++-------
 llvm/test/CodeGen/X86/zext-sext.ll            | 21 +++--
 16 files changed, 196 insertions(+), 231 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index efac27a1b69be..d53ee3134d550 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2303,13 +2303,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     }
   }
 
-  // (x - y) + -1  ->  add (xor y, -1), x
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isAllOnesOrAllOnesSplat(N1)) {
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
-    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
-  }
-
   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
     return Combined;
 
@@ -2461,14 +2454,6 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
-  // Hoist one-use subtraction by constant:  (x - C) + y  ->  (x + y) - C
-  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
-    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
-    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
-  }
-
   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
   // rather than 'add 0/-1' (the zext should get folded).
   // add (sext i1 Y), X --> sub X, (zext i1 Y)
@@ -2938,33 +2923,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
-  // (x - y) - 1  ->  add (xor y, -1), x
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
-                              DAG.getAllOnesConstant(DL, VT));
-    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
-  }
-
-  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
-    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
-  }
-  // y - (x + C)  ->  (y - x) - C
-  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N1.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
-    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
-  }
-  // (x - C) - y  ->  (x - y) - C
-  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
-    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
-  }
-
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index c91700436bb96..6daef644761b5 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -486,7 +486,8 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg w8, w1
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -499,7 +500,8 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg x8, x1
+; CHECK-NEXT:    mov w8, #64
+; CHECK-NEXT:    sub x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -513,7 +515,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w2, w1
+; CHECK-NEXT:    add w8, w1, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -525,7 +527,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x2, x1
+; CHECK-NEXT:    add x8, x1, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 0e1a426c77f29..c571dac94b81e 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w2, w8
-; CHECK-NEXT:    sub w0, w8, #32 // =32
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w2, w8
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    sub w0, w8, #32 // =32
+; CHECK-NEXT:    sub w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI23_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll
index ca6c0dfabba48..1dca55a971308 100644
--- a/llvm/test/CodeGen/AArch64/xor.ll
+++ b/llvm/test/CodeGen/AArch64/xor.ll
@@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
 define i32 @add_of_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w1
-; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    sub w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w1
-; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    sub w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -40,8 +40,9 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -51,8 +52,9 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 2dd7e20c00ccf..20c84c5b63277 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -9,16 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT0-NEXT:    s_load_dword s2, s[0:1], 0xb
-; VARIANT0-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT0-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT0-NEXT:    s_mov_b32 s6, 0
 ; VARIANT0-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT:    s_add_i32 s2, s2, -1
 ; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; VARIANT0-NEXT:    s_barrier
+; VARIANT0-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -30,15 +30,15 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1:       ; %bb.0: ; %entry
 ; VARIANT1-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT1-NEXT:    s_load_dword s2, s[0:1], 0xb
-; VARIANT1-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT1-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT1-NEXT:    s_mov_b32 s6, 0
 ; VARIANT1-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT1-NEXT:    s_add_i32 s2, s2, -1
 ; VARIANT1-NEXT:    s_barrier
+; VARIANT1-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT1-NEXT:    s_waitcnt expcnt(0)
@@ -59,7 +59,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    v_xad_u32 v3, v0, -1, s0
+; VARIANT2-NEXT:    s_add_i32 s0, s0, -1
+; VARIANT2-NEXT:    v_sub_u32_e32 v3, s0, v0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -81,7 +82,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    v_xad_u32 v3, v0, -1, s0
+; VARIANT3-NEXT:    s_add_i32 s0, s0, -1
+; VARIANT3-NEXT:    v_sub_u32_e32 v3, s0, v0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 274e99b114c32..1a97e4e317e57 100644
--- a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -15,9 +15,10 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
 ; V8-NEXT:    .cfi_register 15, 31
 ; V8-NEXT:    add %i0, 7, %i0
 ; V8-NEXT:    and %i0, -8, %i0
+; V8-NEXT:    add %i0, 8, %i0
 ; V8-NEXT:    sub %sp, %i0, %i0
-; V8-NEXT:    add %i0, -8, %sp
-; V8-NEXT:    add %i0, 88, %o0
+; V8-NEXT:    add %i0, 96, %o0
+; V8-NEXT:    mov %i0, %sp
 ; V8-NEXT:    add %sp, -16, %sp
 ; V8-NEXT:    st %o0, [%sp+104]
 ; V8-NEXT:    st %o0, [%sp+100]
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index cac569ff41fa3..343071211b751 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -75,12 +75,13 @@ define void @f3(i64 %len) {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    sllg %r0, %r2, 3
+; CHECK-NEXT:    sllg %r2, %r2, 3
+; CHECK-NEXT:    la %r0, 120(%r2)
 ; CHECK-NEXT:    sgr %r1, %r0
-; CHECK-NEXT:    lay %r15, -120(%r1)
-; CHECK-NEXT:    la %r1, 160(%r1)
-; CHECK-NEXT:    nill %r1, 65408
-; CHECK-NEXT:    mvghi 0(%r1), 10
+; CHECK-NEXT:    la %r2, 280(%r1)
+; CHECK-NEXT:    nill %r2, 65408
+; CHECK-NEXT:    lgr %r15, %r1
+; CHECK-NEXT:    mvghi 0(%r2), 10
 ; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
 ; CHECK-NEXT:    br %r14
   %x = alloca i64, i64 %len, align 128
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 1d20fcf33d742..6f5f1370e6b4e 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index fd3d83ed2cbec..c9a577dbaa92b 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:    movq %rdi, %rbp
+; CHECK-NEXT:    movq %rdx, %r14
+; CHECK-NEXT:    movq %rdi, %r15
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,11 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
+; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT:    movl $1, %r15d
+; CHECK-NEXT:    movl $1, %r14d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -92,47 +91,48 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    testb %r14b, %r14b
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    testb %r12b, %r12b
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r14), %eax
+; CHECK-NEXT:    leal 1(%r12), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %r13d
+; CHECK-NEXT:    movl $-1, %ecx
 ; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %r13d
+; CHECK-NEXT:    movl $1, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    ## implicit-def: $r13
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    ## implicit-def: $r13
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    js LBB0_55
 ; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    movq %rax, %r12
+; CHECK-NEXT:    movq %rax, %r13
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
@@ -157,15 +157,16 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_34
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r12), %rax
+; CHECK-NEXT:    leaq 1(%r13), %rax
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_29
 ; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r12
+; CHECK-NEXT:    incq %r13
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal -324(%r13), %eax
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -175,11 +176,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, %r13d
+; CHECK-NEXT:    cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, %r13d
+; CHECK-NEXT:    cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -194,8 +195,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r12)
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movb $0, (%r13)
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
@@ -207,22 +208,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %ecx ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %r13d
+; CHECK-NEXT:    movl $2, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %r13d
+; CHECK-NEXT:    movl $20, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r14), %eax
+; CHECK-NEXT:    leal -268(%r12), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
@@ -232,12 +233,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r15d
-; CHECK-NEXT:    testl %r15d, %r15d
-; CHECK-NEXT:    movl %r13d, %r14d
+; CHECK-NEXT:    decl %r14d
+; CHECK-NEXT:    testl %r14d, %r14d
+; CHECK-NEXT:    movl %ecx, %r12d
 ; CHECK-NEXT:    jg LBB0_13
 ; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -254,28 +255,27 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %r13d
-; CHECK-NEXT:    cmpl $16, %r13d
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:    cmpl $16, %ecx
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %r13d, %eax
+; CHECK-NEXT:    btl %ecx, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    subq %rbp, %rbx
-; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
-; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
+; CHECK-NEXT:    addq $8189, %r15 ## imm = 0x1FFD
+; CHECK-NEXT:    subq %rbx, %r15
+; CHECK-NEXT:    addq _syHistory@{{.*}}(%rip), %r15
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    decq %rax
+; CHECK-NEXT:    decq %r15
 ; CHECK-NEXT:    jmp LBB0_49
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
@@ -302,8 +302,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
-; CHECK-NEXT:    movq %rbx, %rbp
+; CHECK-NEXT:    movq %r15, %rbx
 ; CHECK-NEXT:    jmp LBB0_48
 ; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index e8af5f66d36c9..6c268d8a27f42 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $32, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %ecx
+; X64-NEXT:    movl $32, %ecx
+; X64-NEXT:    subl %esi, %ecx
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
@@ -1139,10 +1139,9 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $64, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 4544707d07a2f..2ffbfcb56b2f7 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    addl $-32, %eax
+; X32-NEXT:    addl $32, %ecx
+; X32-NEXT:    subl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    leal -32(%rdx,%rsi), %eax
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    addl $32, %edi
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $-32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    subl %edx, %esi
 ; X64-NEXT:    leal 32(%rsi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd %xmm2, %xmm1
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
-; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm0, %xmm2
+; X32-NEXT:    movdqa %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd %xmm2, %xmm1
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    psubd %xmm1, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm0, %xmm2
+; X64-NEXT:    movdqa %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    psubd %xmm2, %xmm1
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    psubd %xmm2, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    psubd %xmm2, %xmm1
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    psubd %xmm2, %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
@@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 58c972164d8f1..2c41ee31a101d 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubb %xmm2, %xmm0
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm0, %xmm1
-; SSE2-NEXT:    psubb %xmm2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    psubb %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm0, %xmm1
-; SSE41-NEXT:    psubb %xmm2, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
-; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vzeroupper
 ; AVX2NOBW-NEXT:    retq
 ;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
-; AVX512BW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
   %res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    psubb %xmm3, %xmm2
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm2, %xmm1
-; SSE2-NEXT:    psubb %xmm3, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psllw $3, %xmm2
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm3, %xmm2
+; SSE41-NEXT:    psubb %xmm3, %xmm2
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm2, %xmm1
-; SSE41-NEXT:    psubb %xmm3, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm2
 ; SSE41-NEXT:    psllw $3, %xmm2
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
-; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
-; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index eda349005cda7..d612d73448754 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm2
 ; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
@@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
+; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
-; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_32i8:
@@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
-; AVX512BW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %res
@@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm4
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
@@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm3
 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
@@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
-; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
@@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX512BW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
-; AVX512BW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 961bec56e5d3c..86c706c03a70c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX512F-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
+; AVX512F-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
@@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
+; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
@@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
-; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpaddb %ymm4, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
+; AVX512F-NEXT:    vpaddb %ymm4, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm4
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX512F-NEXT:    vpand %ymm8, %ymm4, %ymm4
@@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm3
 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
 ; AVX512F-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
@@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpsubb %zmm3, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
-; AVX512BW-NEXT:    vpsubb %zmm3, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsllw $3, %zmm1, %zmm2
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 654382f7b73e9..5ef5999be95f4 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -532,24 +532,22 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    decl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    notl %esi
-; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
+; X64-LIN-NEXT:    subl %esi, %edi
+; X64-LIN-NEXT:    leal -1(%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    notl %edx
-; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
+; X64-WIN-NEXT:    subl %edx, %ecx
+; X64-WIN-NEXT:    leal -1(%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -560,24 +558,22 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not_decrement:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    decl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    notl %esi
-; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
+; X64-LIN-NEXT:    subl %esi, %edi
+; X64-LIN-NEXT:    leal -1(%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    notl %edx
-; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
+; X64-WIN-NEXT:    subl %edx, %ecx
+; X64-WIN-NEXT:    leal -1(%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -587,23 +583,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not:
 ; X32:       # %bb.0:
-; X32-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-NEXT:    pxor %xmm1, %xmm2
-; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
-; X64-LIN-NEXT:    pxor %xmm1, %xmm2
-; X64-LIN-NEXT:    paddd %xmm2, %xmm0
+; X64-LIN-NEXT:    psubd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-LIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
+; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    pxor (%rdx), %xmm0
-; X64-WIN-NEXT:    paddd (%rcx), %xmm0
+; X64-WIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -613,23 +610,24 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not_decrement:
 ; X32:       # %bb.0:
-; X32-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-NEXT:    pxor %xmm1, %xmm2
-; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
-; X64-LIN-NEXT:    pxor %xmm1, %xmm2
-; X64-LIN-NEXT:    paddd %xmm2, %xmm0
+; X64-LIN-NEXT:    psubd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-LIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
+; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    pxor (%rdx), %xmm0
-; X64-WIN-NEXT:    paddd (%rcx), %xmm0
+; X64-WIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 84096e3b6805d..7034378a880b5 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -15,27 +15,30 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 ; CHECK-NEXT:    subq %rax, %rsi
 ; CHECK-NEXT:    movq (%rdx), %rax
 ; CHECK-NEXT:    movswl 8(%rdi), %edx
+; CHECK-NEXT:    movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
 ; CHECK-NEXT:    movswl (%rax,%rsi,2), %eax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    imull %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    addl $2138875574, %eax # imm = 0x7F7CA6B6
 ; CHECK-NEXT:    cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT:    movslq %eax, %rdi
+; CHECK-NEXT:    movslq %eax, %r8
 ; CHECK-NEXT:    setl %dl
 ; CHECK-NEXT:    cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT:    movq %rdi, %r8
+; CHECK-NEXT:    movq %r8, %r9
 ; CHECK-NEXT:    leal -1(%rdx,%rdx), %edx
 ; CHECK-NEXT:    cmovlel %edx, %esi
-; CHECK-NEXT:    subq %rax, %r8
+; CHECK-NEXT:    subq %rax, %r9
+; CHECK-NEXT:    addq %r8, %rdi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %esi
-; CHECK-NEXT:    cmovneq %rax, %r8
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovnsq %rax, %r8
-; CHECK-NEXT:    movq (%rcx), %rax
-; CHECK-NEXT:    subq %r8, %rdi
-; CHECK-NEXT:    leaq -2138875574(%rax,%rdi), %rax
+; CHECK-NEXT:    cmovneq %rax, %r9
+; CHECK-NEXT:    testl %r8d, %r8d
+; CHECK-NEXT:    cmovnsq %rax, %r9
+; CHECK-NEXT:    movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
+; CHECK-NEXT:    subq %r9, %rdi
+; CHECK-NEXT:    addq (%rcx), %rdi
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    movq %rax, (%rcx)
 ; CHECK-NEXT:    retq
 entry:

From 363079fe73eb6f179ecdde81e6f0d526d4ba1cf8 Mon Sep 17 00:00:00 2001
From: Shoaib Meenai <smeenai@fb.com>
Date: Tue, 28 May 2019 19:09:17 +0000
Subject: [PATCH 0396/1176] [compiler-rt] Fix cmake warnings

 - Fix cmake BOOL misspellings
 - Set cmake policy for CMP0075 to NEW

As requested by smeenai I've compared CMAkeCache.txt in master with and
without this patch and the only changes are to the variable types I fixed:

     $ diff build-b1-master/CMakeCache.txt build-b1-compiler-rt-fix-cmake-warnings/CMakeCache.txt
     503c503
     < COMPILER_RT_BAREMETAL_BUILD:STRING=OFF
     ---
     > COMPILER_RT_BAREMETAL_BUILD:BOOL=OFF
     550c550
     < COMPILER_RT_HWASAN_WITH_INTERCEPTORS:STRING=ON
     ---
     > COMPILER_RT_HWASAN_WITH_INTERCEPTORS:BOOL=ON

Patch by Wink Saville <wink@saville.com>.

Differential Revision: https://reviews.llvm.org/D61203

llvm-svn: 361866
---
 compiler-rt/CMakeLists.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 800304c07638b..14f514a96ab72 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -5,6 +5,10 @@
 
 cmake_minimum_required(VERSION 3.4.3)
 
+if(POLICY CMP0075)
+  cmake_policy(SET CMP0075 NEW)
+endif()
+
 # Check if compiler-rt is built as a standalone project.
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
   project(CompilerRT C CXX ASM)
@@ -61,10 +65,10 @@ if (NOT COMPILER_RT_ASAN_SHADOW_SCALE STREQUAL "")
       -D${COMPILER_RT_ASAN_SHADOW_SCALE_DEFINITION})
 endif()
 
-set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS ON CACHE BOOLEAN
+set(COMPILER_RT_HWASAN_WITH_INTERCEPTORS ON CACHE BOOL
     "Enable libc interceptors in HWASan (testing mode)")
 
-set(COMPILER_RT_BAREMETAL_BUILD OFF CACHE BOOLEAN
+set(COMPILER_RT_BAREMETAL_BUILD OFF CACHE BOOL
   "Build for a bare-metal target.")
 
 if (COMPILER_RT_STANDALONE_BUILD)

From 91f8066d1de5df00e29ac1c517d2e7353dcb3a60 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Tue, 28 May 2019 19:27:19 +0000
Subject: [PATCH 0397/1176] [OpenMP] Set pragma start loc to `#pragma` loc

This patch adjusts `PragmaOpenMPHandler` to set the location of
`tok::annot_pragma_openmp` to the `#pragma` location instead of the
`omp` location so that the former becomes the start location of the
OpenMP AST node.  This can be useful when, for example, rewriting a
directive using Clang's Rewrite facility.  Most of this patch updates
tests for changes to locations in diagnostics and `-ast-dump` output.

Reviewed By: ABataev, lebedev.ri, Meinersbur, aaron.ballman

Differential Revision: https://reviews.llvm.org/D61509

llvm-svn: 361867
---
 .../clang-tidy/openmp-use-default-none.cpp    |  20 +-
 clang/lib/Parse/ParsePragma.cpp               |   2 +-
 clang/test/AST/ast-dump-openmp-atomic.c       |   4 +-
 clang/test/AST/ast-dump-openmp-barrier.c      |   2 +-
 clang/test/AST/ast-dump-openmp-cancel.c       |  10 +-
 .../AST/ast-dump-openmp-cancellation-point.c  |  10 +-
 clang/test/AST/ast-dump-openmp-critical.c     |   4 +-
 ...dump-openmp-distribute-parallel-for-simd.c |  60 +-
 .../ast-dump-openmp-distribute-parallel-for.c |  60 +-
 .../AST/ast-dump-openmp-distribute-simd.c     |  20 +-
 clang/test/AST/ast-dump-openmp-distribute.c   |  20 +-
 clang/test/AST/ast-dump-openmp-flush.c        |   2 +-
 clang/test/AST/ast-dump-openmp-for-simd.c     |  20 +-
 clang/test/AST/ast-dump-openmp-for.c          |  20 +-
 clang/test/AST/ast-dump-openmp-master.c       |   4 +-
 clang/test/AST/ast-dump-openmp-ordered.c      |  14 +-
 .../AST/ast-dump-openmp-parallel-for-simd.c   |  40 +-
 clang/test/AST/ast-dump-openmp-parallel-for.c |  40 +-
 .../AST/ast-dump-openmp-parallel-sections.c   |   8 +-
 clang/test/AST/ast-dump-openmp-parallel.c     |   8 +-
 clang/test/AST/ast-dump-openmp-section.c      |  12 +-
 clang/test/AST/ast-dump-openmp-sections.c     |   4 +-
 clang/test/AST/ast-dump-openmp-simd.c         |  20 +-
 clang/test/AST/ast-dump-openmp-single.c       |   4 +-
 clang/test/AST/ast-dump-openmp-target-data.c  |   4 +-
 .../AST/ast-dump-openmp-target-enter-data.c   |  18 +-
 .../AST/ast-dump-openmp-target-exit-data.c    |  18 +-
 ...ast-dump-openmp-target-parallel-for-simd.c | 240 +++---
 .../AST/ast-dump-openmp-target-parallel-for.c | 240 +++---
 .../AST/ast-dump-openmp-target-parallel.c     |  48 +-
 clang/test/AST/ast-dump-openmp-target-simd.c  | 100 +--
 ...arget-teams-distribute-parallel-for-simd.c | 680 ++++++++---------
 ...nmp-target-teams-distribute-parallel-for.c | 680 ++++++++---------
 ...dump-openmp-target-teams-distribute-simd.c | 240 +++---
 .../ast-dump-openmp-target-teams-distribute.c | 240 +++---
 clang/test/AST/ast-dump-openmp-target-teams.c |  48 +-
 .../test/AST/ast-dump-openmp-target-update.c  |  18 +-
 clang/test/AST/ast-dump-openmp-target.c       |  20 +-
 clang/test/AST/ast-dump-openmp-task.c         |  14 +-
 clang/test/AST/ast-dump-openmp-taskgroup.c    |   4 +-
 .../test/AST/ast-dump-openmp-taskloop-simd.c  | 120 +--
 clang/test/AST/ast-dump-openmp-taskloop.c     | 120 +--
 clang/test/AST/ast-dump-openmp-taskwait.c     |   2 +-
 clang/test/AST/ast-dump-openmp-taskyield.c    |   2 +-
 ...penmp-teams-distribute-parallel-for-simd.c | 720 +++++++++---------
 ...ump-openmp-teams-distribute-parallel-for.c | 720 +++++++++---------
 .../ast-dump-openmp-teams-distribute-simd.c   | 280 +++----
 .../AST/ast-dump-openmp-teams-distribute.c    | 280 +++----
 clang/test/AST/ast-dump-openmp-teams.c        |  56 +-
 clang/test/AST/dump.cpp                       |   8 +-
 clang/test/OpenMP/parallel_codegen.cpp        |   4 +-
 clang/test/OpenMP/threadprivate_codegen.cpp   |   8 +-
 .../PCH/stmt-openmp_structured_block-bit.cpp  |   8 +-
 53 files changed, 2674 insertions(+), 2674 deletions(-)

diff --git a/clang-tools-extra/test/clang-tidy/openmp-use-default-none.cpp b/clang-tools-extra/test/clang-tidy/openmp-use-default-none.cpp
index 1e388ed0860b1..35d2d17b1e0e8 100644
--- a/clang-tools-extra/test/clang-tidy/openmp-use-default-none.cpp
+++ b/clang-tools-extra/test/clang-tidy/openmp-use-default-none.cpp
@@ -23,7 +23,7 @@ void n0(const int a) {
 void p0_0() {
 #pragma omp parallel
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'parallel' does not specify 'default' clause, consider specifying 'default(none)' clause
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'parallel' does not specify 'default' clause, consider specifying 'default(none)' clause
 }
 
 // 'parallel' directive can have 'default' clause, and said clause specified,
@@ -38,7 +38,7 @@ void p0_1() {
 void p0_2() {
 #pragma omp parallel default(shared)
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'parallel' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'parallel' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-3]]:22: note: existing 'default' clause specified here
 }
 
@@ -49,7 +49,7 @@ void p0_2() {
 void p1_0() {
 #pragma omp task
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'task' does not specify 'default' clause, consider specifying 'default(none)' clause
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'task' does not specify 'default' clause, consider specifying 'default(none)' clause
 }
 
 // 'task' directive can have 'default' clause, and said clause specified,
@@ -64,7 +64,7 @@ void p1_1() {
 void p1_2() {
 #pragma omp task default(shared)
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'task' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'task' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-3]]:18: note: existing 'default' clause specified here
 }
 
@@ -76,7 +76,7 @@ void p2_0() {
 #pragma omp target
 #pragma omp teams
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'teams' does not specify 'default' clause, consider specifying 'default(none)' clause
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'teams' does not specify 'default' clause, consider specifying 'default(none)' clause
 }
 
 // 'teams' directive can have 'default' clause, and said clause specified,
@@ -93,7 +93,7 @@ void p2_2() {
 #pragma omp target
 #pragma omp teams default(shared)
   ;
-  // CHECK-NOTES: :[[@LINE-2]]:9: warning: OpenMP directive 'teams' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'teams' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-3]]:19: note: existing 'default' clause specified here
 }
 
@@ -105,7 +105,7 @@ void p3_0(const int a) {
 #pragma omp taskloop
   for (int b = 0; b < a; b++)
     ;
-  // CHECK-NOTES: :[[@LINE-3]]:9: warning: OpenMP directive 'taskloop' does not specify 'default' clause, consider specifying 'default(none)' clause
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'taskloop' does not specify 'default' clause, consider specifying 'default(none)' clause
 }
 
 // 'taskloop' directive can have 'default' clause, and said clause specified,
@@ -122,7 +122,7 @@ void p3_2(const int a) {
 #pragma omp taskloop default(shared)
   for (int b = 0; b < a; b++)
     ;
-  // CHECK-NOTES: :[[@LINE-3]]:9: warning: OpenMP directive 'taskloop' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'taskloop' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-4]]:22: note: existing 'default' clause specified here
 }
 
@@ -138,7 +138,7 @@ void p4_0(const int a) {
 #pragma omp parallel for
   for (int b = 0; b < a; b++)
     ;
-  // CHECK-NOTES: :[[@LINE-3]]:9: warning: OpenMP directive 'parallel for' does not specify 'default' clause, consider specifying 'default(none)' clause
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' does not specify 'default' clause, consider specifying 'default(none)' clause
 }
 
 // 'parallel' directive can have 'default' clause, and said clause specified,
@@ -155,6 +155,6 @@ void p4_2(const int a) {
 #pragma omp parallel for default(shared)
   for (int b = 0; b < a; b++)
     ;
-  // CHECK-NOTES: :[[@LINE-3]]:9: warning: OpenMP directive 'parallel for' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-4]]:26: note: existing 'default' clause specified here
 }
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index 6cf313e300931..5b9749c80aa6d 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -2217,7 +2217,7 @@ void PragmaOpenMPHandler::HandlePragma(Preprocessor &PP,
   Token Tok;
   Tok.startToken();
   Tok.setKind(tok::annot_pragma_openmp);
-  Tok.setLocation(FirstTok.getLocation());
+  Tok.setLocation(Introducer.Loc);
 
   while (Tok.isNot(tok::eod) && Tok.isNot(tok::eof)) {
     Pragma.push_back(Tok);
diff --git a/clang/test/AST/ast-dump-openmp-atomic.c b/clang/test/AST/ast-dump-openmp-atomic.c
index f95ef2ffb996d..de9526e6301c2 100644
--- a/clang/test/AST/ast-dump-openmp-atomic.c
+++ b/clang/test/AST/ast-dump-openmp-atomic.c
@@ -9,10 +9,10 @@ void test(int i) {
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-atomic.c:3:1, line:6:1> line:3:6 test 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:11, col:15> col:15 used i 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:18, line:6:1>
-// CHECK-NEXT:     `-OMPAtomicDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT:     `-OMPAtomicDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3, col:5>
 // CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:         | |-UnaryOperator {{.*}} <col:3, col:5> openmp_structured_block 'int' prefix '++'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <col:5> 'int' lvalue ParmVar {{.*}} 'i' 'int'
-// CHECK-NEXT:         | `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-atomic.c:4:9) *const restrict'
+// CHECK-NEXT:         | `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-atomic.c:4:1) *const restrict'
 // CHECK-NEXT:         `-DeclRefExpr {{.*}} <line:5:5> 'int' lvalue ParmVar {{.*}} 'i' 'int'
diff --git a/clang/test/AST/ast-dump-openmp-barrier.c b/clang/test/AST/ast-dump-openmp-barrier.c
index 1173a0e6f2a78..23f3ecbb62aec 100644
--- a/clang/test/AST/ast-dump-openmp-barrier.c
+++ b/clang/test/AST/ast-dump-openmp-barrier.c
@@ -7,4 +7,4 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-barrier.c:3:1, line:5:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:5:1>
-// CHECK-NEXT:     `-OMPBarrierDirective {{.*}} <line:4:9, col:20> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPBarrierDirective {{.*}} <line:4:1, col:20> openmp_standalone_directive
diff --git a/clang/test/AST/ast-dump-openmp-cancel.c b/clang/test/AST/ast-dump-openmp-cancel.c
index f7f0fcd3c73cd..ee26353d77597 100644
--- a/clang/test/AST/ast-dump-openmp-cancel.c
+++ b/clang/test/AST/ast-dump-openmp-cancel.c
@@ -10,11 +10,11 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-cancel.c:3:1, line:8:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:8:1>
-// CHECK-NEXT:     `-OMPParallelDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT:     `-OMPParallelDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3, line:7:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-CompoundStmt {{.*}} <line:5:3, line:7:3> openmp_structured_block
-// CHECK-NEXT:           | `-OMPCancelDirective {{.*}} <line:6:9, col:28> openmp_standalone_directive
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-cancel.c:4:9) *const restrict'
+// CHECK-NEXT:           | `-OMPCancelDirective {{.*}} <line:6:1, col:28> openmp_standalone_directive
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-cancel.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-cancellation-point.c b/clang/test/AST/ast-dump-openmp-cancellation-point.c
index 36baa73bd1de9..709009039369c 100644
--- a/clang/test/AST/ast-dump-openmp-cancellation-point.c
+++ b/clang/test/AST/ast-dump-openmp-cancellation-point.c
@@ -10,11 +10,11 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-cancellation-point.c:3:1, line:8:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:8:1>
-// CHECK-NEXT:     `-OMPParallelDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT:     `-OMPParallelDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3, line:7:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-CompoundStmt {{.*}} <line:5:3, line:7:3> openmp_structured_block
-// CHECK-NEXT:           | `-OMPCancellationPointDirective {{.*}} <line:6:9, col:40> openmp_standalone_directive
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-cancellation-point.c:4:9) *const restrict'
+// CHECK-NEXT:           | `-OMPCancellationPointDirective {{.*}} <line:6:1, col:40> openmp_standalone_directive
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-cancellation-point.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-critical.c b/clang/test/AST/ast-dump-openmp-critical.c
index c618c40de593a..44ad1ed2e3542 100644
--- a/clang/test/AST/ast-dump-openmp-critical.c
+++ b/clang/test/AST/ast-dump-openmp-critical.c
@@ -8,8 +8,8 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-critical.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPCriticalDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT:     `-OMPCriticalDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-critical.c:4:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-critical.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c
index 472474c121968..d882e296a398d 100644
--- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:4:9, col:41>
+// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:4:1, col:41>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,11 +55,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -67,7 +67,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:10:9, col:41>
+// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:10:1, col:41>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -95,11 +95,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -110,7 +110,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:17:9, col:53>
+// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:17:1, col:53>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:51> 'int' 1
@@ -141,11 +141,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -156,7 +156,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:24:9, col:53>
+// CHECK-NEXT: |   `-OMPDistributeParallelForSimdDirective {{.*}} <line:24:1, col:53>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:51> 'int' 2
@@ -187,11 +187,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -203,7 +203,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPDistributeParallelForSimdDirective {{.*}} <line:31:9, col:53>
+// CHECK-NEXT:     `-OMPDistributeParallelForSimdDirective {{.*}} <line:31:1, col:53>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:51> 'int' 2
@@ -246,11 +246,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c
index 25279198ddb8d..0f2dceb0fcb01 100644
--- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c
+++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:4:9, col:36>
+// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:4:1, col:36>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,11 +55,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -67,7 +67,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:10:9, col:36>
+// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:10:1, col:36>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -95,11 +95,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -110,7 +110,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:17:9, col:48>
+// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:17:1, col:48>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:46> 'int' 1
@@ -141,11 +141,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -156,7 +156,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:24:9, col:48>
+// CHECK-NEXT: |   `-OMPDistributeParallelForDirective {{.*}} <line:24:1, col:48>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:46> 'int' 2
@@ -187,11 +187,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -203,7 +203,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPDistributeParallelForDirective {{.*}} <line:31:9, col:48>
+// CHECK-NEXT:     `-OMPDistributeParallelForDirective {{.*}} <line:31:1, col:48>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:46> 'int' 2
@@ -246,11 +246,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-distribute-simd.c b/clang/test/AST/ast-dump-openmp-distribute-simd.c
index cf0adbe7be025..3029373a20ce8 100644
--- a/clang/test/AST/ast-dump-openmp-distribute-simd.c
+++ b/clang/test/AST/ast-dump-openmp-distribute-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:4:9, col:28>
+// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:4:1, col:28>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,7 +55,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -63,7 +63,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:10:9, col:28>
+// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:10:1, col:28>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -91,7 +91,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -102,7 +102,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:17:9, col:40>
+// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:17:1, col:40>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:29, col:39>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:38> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:38> 'int' 1
@@ -133,7 +133,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -144,7 +144,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:24:9, col:40>
+// CHECK-NEXT: |   `-OMPDistributeSimdDirective {{.*}} <line:24:1, col:40>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:29, col:39>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:38> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:38> 'int' 2
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -187,7 +187,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPDistributeSimdDirective {{.*}} <line:31:9, col:40>
+// CHECK-NEXT:     `-OMPDistributeSimdDirective {{.*}} <line:31:1, col:40>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:29, col:39>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:38> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:38> 'int' 2
@@ -230,7 +230,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-distribute.c b/clang/test/AST/ast-dump-openmp-distribute.c
index 6e08745b4cbc1..5ece4060bd3ce 100644
--- a/clang/test/AST/ast-dump-openmp-distribute.c
+++ b/clang/test/AST/ast-dump-openmp-distribute.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:4:9, col:23>
+// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:4:1, col:23>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,7 +55,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -63,7 +63,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:10:9, col:23>
+// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:10:1, col:23>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -91,7 +91,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -102,7 +102,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:17:9, col:35>
+// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:17:1, col:35>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:24, col:34>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:33> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:33> 'int' 1
@@ -133,7 +133,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -144,7 +144,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:24:9, col:35>
+// CHECK-NEXT: |   `-OMPDistributeDirective {{.*}} <line:24:1, col:35>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:24, col:34>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:33> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:33> 'int' 2
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -187,7 +187,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPDistributeDirective {{.*}} <line:31:9, col:35>
+// CHECK-NEXT:     `-OMPDistributeDirective {{.*}} <line:31:1, col:35>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:24, col:34>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:33> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:33> 'int' 2
@@ -230,7 +230,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-distribute.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-flush.c b/clang/test/AST/ast-dump-openmp-flush.c
index 74b8c159c024e..d5312e4363576 100644
--- a/clang/test/AST/ast-dump-openmp-flush.c
+++ b/clang/test/AST/ast-dump-openmp-flush.c
@@ -7,4 +7,4 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-flush.c:3:1, line:5:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:5:1>
-// CHECK-NEXT:     `-OMPFlushDirective {{.*}} <line:4:9, col:18> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPFlushDirective {{.*}} <line:4:1, col:18> openmp_standalone_directive
diff --git a/clang/test/AST/ast-dump-openmp-for-simd.c b/clang/test/AST/ast-dump-openmp-for-simd.c
index fc7d8519f2b5d..d799b18670b57 100644
--- a/clang/test/AST/ast-dump-openmp-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-for-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,7 +55,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -63,7 +63,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:10:9, col:21>
+// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:10:1, col:21>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -91,7 +91,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -102,7 +102,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:17:9, col:33>
+// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:17:1, col:33>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:31> 'int' 1
@@ -133,7 +133,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -144,7 +144,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:24:9, col:33>
+// CHECK-NEXT: |   `-OMPForSimdDirective {{.*}} <line:24:1, col:33>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:31> 'int' 2
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -187,7 +187,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPForSimdDirective {{.*}} <line:31:9, col:33>
+// CHECK-NEXT:     `-OMPForSimdDirective {{.*}} <line:31:1, col:33>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:31> 'int' 2
@@ -230,7 +230,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-for.c b/clang/test/AST/ast-dump-openmp-for.c
index 7294f794a3656..557424d1926e3 100644
--- a/clang/test/AST/ast-dump-openmp-for.c
+++ b/clang/test/AST/ast-dump-openmp-for.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl 0x{{.*}} <{{.*}}ast-dump-openmp-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:4:9, col:16>
+// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:4:1, col:16>
 // CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:5:3, line:6:5>
@@ -55,7 +55,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt 0x{{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr 0x{{.*}} <col:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
@@ -63,7 +63,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:10:9, col:16>
+// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:10:1, col:16>
 // CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:11:3, line:13:7>
@@ -91,7 +91,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -102,7 +102,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:17:9, col:28>
+// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:17:1, col:28>
 // CHECK-NEXT: |     |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
 // CHECK-NEXT: |     | `-ConstantExpr 0x{{.*}} <col:26> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 1
@@ -133,7 +133,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -144,7 +144,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:24:9, col:28>
+// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:24:1, col:28>
 // CHECK-NEXT: |     |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
 // CHECK-NEXT: |     | `-ConstantExpr 0x{{.*}} <col:26> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 2
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -187,7 +187,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl 0x{{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl 0x{{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt 0x{{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPForDirective 0x{{.*}} <line:31:9, col:28>
+// CHECK-NEXT:     `-OMPForDirective 0x{{.*}} <line:31:1, col:28>
 // CHECK-NEXT:       |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
 // CHECK-NEXT:       | `-ConstantExpr 0x{{.*}} <col:26> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 2
@@ -230,7 +230,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator 0x{{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr 0x{{.*}} <col:30> 'int' lvalue Var 0x{{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt 0x{{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl 0x{{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl 0x{{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl 0x{{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl 0x{{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-master.c b/clang/test/AST/ast-dump-openmp-master.c
index 9325e59cbf07b..5943a7e0a8c80 100644
--- a/clang/test/AST/ast-dump-openmp-master.c
+++ b/clang/test/AST/ast-dump-openmp-master.c
@@ -8,8 +8,8 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-master.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPMasterDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT:     `-OMPMasterDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-master.c:4:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-master.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-ordered.c b/clang/test/AST/ast-dump-openmp-ordered.c
index 0cce4525d6ca8..9f39be3e6d851 100644
--- a/clang/test/AST/ast-dump-openmp-ordered.c
+++ b/clang/test/AST/ast-dump-openmp-ordered.c
@@ -21,15 +21,15 @@ void test_three(int x) {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-ordered.c:3:1, line:6:1> line:3:6 test_one 'void ()'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:17, line:6:1>
-// CHECK-NEXT: |   `-OMPOrderedDirective {{.*}} <line:4:9, col:20>
+// CHECK-NEXT: |   `-OMPOrderedDirective {{.*}} <line:4:1, col:20>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT: |       `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |         |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT: |         `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:4:9) *const restrict'
+// CHECK-NEXT: |         `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:4:1) *const restrict'
 // CHECK-NEXT: |-FunctionDecl {{.*}} <line:8:1, line:12:1> line:8:6 test_two 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:12:1>
-// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:9:9, col:24>
+// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:9:1, col:24>
 // CHECK-NEXT: |     |-OMPOrderedClause {{.*}} <col:17, col:24>
 // CHECK-NEXT: |     | `-<<<NULL>>>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:10:3, line:11:5>
@@ -47,14 +47,14 @@ void test_three(int x) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:11:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:9:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:9:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:9:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:9:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:10:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: `-FunctionDecl {{.*}} <line:14:1, line:19:1> line:14:6 test_three 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:24, line:19:1>
-// CHECK-NEXT:     `-OMPForDirective {{.*}} <line:15:9, col:27>
+// CHECK-NEXT:     `-OMPForDirective {{.*}} <line:15:1, col:27>
 // CHECK-NEXT:       |-OMPOrderedClause {{.*}} <col:17, col:26>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:25> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:25> 'int' 1
@@ -73,10 +73,10 @@ void test_three(int x) {
 // CHECK-NEXT:         | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT:         | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | `-CompoundStmt {{.*}} <col:31, line:18:3> openmp_structured_block
-// CHECK-NEXT:         | |   `-OMPOrderedDirective {{.*}} <line:17:9, col:35> openmp_standalone_directive
+// CHECK-NEXT:         | |   `-OMPOrderedDirective {{.*}} <line:17:1, col:35> openmp_standalone_directive
 // CHECK-NEXT:         | |     |-OMPDependClause {{.*}} <col:21, <invalid sloc>>
 // CHECK-NEXT:         | |     `-<<<NULL>>>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:15:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:15:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:15:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-ordered.c:15:1) *const restrict'
 // CHECK-NEXT:         | `-VarDecl {{.*}} <line:16:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
diff --git a/clang/test/AST/ast-dump-openmp-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-parallel-for-simd.c
index 0c22b9f0bca86..81d075161db91 100644
--- a/clang/test/AST/ast-dump-openmp-parallel-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-parallel-for-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:4:9, col:30>
+// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:4:1, col:30>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,9 +55,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -65,7 +65,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:10:9, col:30>
+// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:10:1, col:30>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -93,9 +93,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -106,7 +106,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:17:9, col:42>
+// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:17:1, col:42>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:31, col:41>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:40> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:40> 'int' 1
@@ -137,9 +137,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -150,7 +150,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:24:9, col:42>
+// CHECK-NEXT: |   `-OMPParallelForSimdDirective {{.*}} <line:24:1, col:42>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:31, col:41>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:40> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:40> 'int' 2
@@ -181,9 +181,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -195,7 +195,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPParallelForSimdDirective {{.*}} <line:31:9, col:42>
+// CHECK-NEXT:     `-OMPParallelForSimdDirective {{.*}} <line:31:1, col:42>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:31, col:41>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:40> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:40> 'int' 2
@@ -238,9 +238,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-parallel-for.c b/clang/test/AST/ast-dump-openmp-parallel-for.c
index bebd54568c802..15e572bdee46d 100644
--- a/clang/test/AST/ast-dump-openmp-parallel-for.c
+++ b/clang/test/AST/ast-dump-openmp-parallel-for.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:4:9, col:25>
+// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:4:1, col:25>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,9 +55,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -65,7 +65,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:10:9, col:25>
+// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:10:1, col:25>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -93,9 +93,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -106,7 +106,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:17:9, col:37>
+// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:17:1, col:37>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:26, col:36>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:35> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:35> 'int' 1
@@ -137,9 +137,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -150,7 +150,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:24:9, col:37>
+// CHECK-NEXT: |   `-OMPParallelForDirective {{.*}} <line:24:1, col:37>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:26, col:36>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:35> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:35> 'int' 2
@@ -181,9 +181,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -195,7 +195,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPParallelForDirective {{.*}} <line:31:9, col:37>
+// CHECK-NEXT:     `-OMPParallelForDirective {{.*}} <line:31:1, col:37>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:26, col:36>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:35> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:35> 'int' 2
@@ -238,9 +238,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-parallel-sections.c b/clang/test/AST/ast-dump-openmp-parallel-sections.c
index d5bf5e84b1906..d90dd7847d3ce 100644
--- a/clang/test/AST/ast-dump-openmp-parallel-sections.c
+++ b/clang/test/AST/ast-dump-openmp-parallel-sections.c
@@ -15,11 +15,11 @@ void test_one() {
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:18, line:6:1>
 // CHECK-NEXT: `-FunctionDecl {{.*}} <line:8:1, line:11:1> line:8:6 test_one 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:17, line:11:1>
-// CHECK-NEXT:     `-OMPParallelSectionsDirective {{.*}} <line:9:9, col:30>
+// CHECK-NEXT:     `-OMPParallelSectionsDirective {{.*}} <line:9:1, col:30>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:10:3, col:7>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-CompoundStmt {{.*}} <col:3, col:7> openmp_structured_block
 // CHECK-NEXT:           | `-NullStmt {{.*}} <col:5>
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:9:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-sections.c:9:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:9:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel-sections.c:9:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-parallel.c b/clang/test/AST/ast-dump-openmp-parallel.c
index 389566b052e74..ba33546681b5d 100644
--- a/clang/test/AST/ast-dump-openmp-parallel.c
+++ b/clang/test/AST/ast-dump-openmp-parallel.c
@@ -8,10 +8,10 @@ void test() {
 // CHECK: TranslationUnitDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl 0x{{.*}} <{{.*}}ast-dump-openmp-parallel.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt 0x{{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPParallelDirective 0x{{.*}} <line:4:9, col:21>
+// CHECK-NEXT:     `-OMPParallelDirective 0x{{.*}} <line:4:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt 0x{{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-NullStmt 0x{{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl 0x{{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel.c:4:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl 0x{{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-section.c b/clang/test/AST/ast-dump-openmp-section.c
index 1268a0919d90d..adc9df1973553 100644
--- a/clang/test/AST/ast-dump-openmp-section.c
+++ b/clang/test/AST/ast-dump-openmp-section.c
@@ -11,18 +11,18 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-section.c:3:1, line:9:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:9:1>
-// CHECK-NEXT:     `-OMPSectionsDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT:     `-OMPSectionsDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3, line:8:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-CompoundStmt {{.*}} <line:5:3, line:8:3> openmp_structured_block
-// CHECK-NEXT:           | `-OMPSectionDirective {{.*}} <line:6:9, col:20>
+// CHECK-NEXT:           | `-OMPSectionDirective {{.*}} <line:6:1, col:20>
 // CHECK-NEXT:           |   `-CapturedStmt {{.*}} <line:7:5>
 // CHECK-NEXT:           |     `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |       |-NullStmt {{.*}} <col:5> openmp_structured_block
-// CHECK-NEXT:           |       `-ImplicitParamDecl {{.*}} <line:6:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:6:9) *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:4:9) *const restrict'
-// CHECK-NEXT:           |-RecordDecl {{.*}} <line:6:9> col:9 implicit struct definition
+// CHECK-NEXT:           |       `-ImplicitParamDecl {{.*}} <line:6:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:6:1) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:4:1) *const restrict'
+// CHECK-NEXT:           |-RecordDecl {{.*}} <line:6:1> col:1 implicit struct definition
 // CHECK-NEXT:           | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:             |-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT:             `-ImplicitParamDecl {{.*}} <line:6:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:6:9) *const restrict'
+// CHECK-NEXT:             `-ImplicitParamDecl {{.*}} <line:6:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-section.c:6:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-sections.c b/clang/test/AST/ast-dump-openmp-sections.c
index d932cdd5b48ea..330732bde1e25 100644
--- a/clang/test/AST/ast-dump-openmp-sections.c
+++ b/clang/test/AST/ast-dump-openmp-sections.c
@@ -15,9 +15,9 @@ void test_one() {
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:18, line:6:1>
 // CHECK-NEXT: `-FunctionDecl {{.*}} <line:8:1, line:11:1> line:8:6 test_one 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:17, line:11:1>
-// CHECK-NEXT:     `-OMPSectionsDirective {{.*}} <line:9:9, col:21>
+// CHECK-NEXT:     `-OMPSectionsDirective {{.*}} <line:9:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:10:3, col:7>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-CompoundStmt {{.*}} <col:3, col:7> openmp_structured_block
 // CHECK-NEXT:           | `-NullStmt {{.*}} <col:5>
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:9:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-sections.c:9:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:9:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-sections.c:9:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-simd.c b/clang/test/AST/ast-dump-openmp-simd.c
index 5ba69e4e7d486..0f12df1ec6ebf 100644
--- a/clang/test/AST/ast-dump-openmp-simd.c
+++ b/clang/test/AST/ast-dump-openmp-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:4:9, col:17>
+// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:4:1, col:17>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
@@ -55,7 +55,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -63,7 +63,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:10:9, col:17>
+// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:10:1, col:17>
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
@@ -91,7 +91,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -102,7 +102,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:17:9, col:29>
+// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:17:1, col:29>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:18, col:28>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:27> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:27> 'int' 1
@@ -133,7 +133,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -144,7 +144,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:24:9, col:29>
+// CHECK-NEXT: |   `-OMPSimdDirective {{.*}} <line:24:1, col:29>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:18, col:28>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:27> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:27> 'int' 2
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -187,7 +187,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPSimdDirective {{.*}} <line:31:9, col:29>
+// CHECK-NEXT:     `-OMPSimdDirective {{.*}} <line:31:1, col:29>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:18, col:28>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:27> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:27> 'int' 2
@@ -230,7 +230,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-single.c b/clang/test/AST/ast-dump-openmp-single.c
index b0c47b8e9b0e0..33b28bc1849d4 100644
--- a/clang/test/AST/ast-dump-openmp-single.c
+++ b/clang/test/AST/ast-dump-openmp-single.c
@@ -8,8 +8,8 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-single.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPSingleDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT:     `-OMPSingleDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-single.c:4:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-single.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-data.c b/clang/test/AST/ast-dump-openmp-target-data.c
index 230b7be15045a..4d2754536edfe 100644
--- a/clang/test/AST/ast-dump-openmp-target-data.c
+++ b/clang/test/AST/ast-dump-openmp-target-data.c
@@ -9,10 +9,10 @@ void test(int x) {
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-data.c:3:1, line:6:1> line:3:6 test 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:11, col:15> col:15 used x 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:18, line:6:1>
-// CHECK-NEXT:     `-OMPTargetDataDirective {{.*}} <line:4:9, col:31>
+// CHECK-NEXT:     `-OMPTargetDataDirective {{.*}} <line:4:1, col:31>
 // CHECK-NEXT:       |-OMPMapClause {{.*}} <col:25, col:30>
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <col:29> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-data.c:4:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-data.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-enter-data.c b/clang/test/AST/ast-dump-openmp-target-enter-data.c
index b1c579b95a198..8abfeb8661e5d 100644
--- a/clang/test/AST/ast-dump-openmp-target-enter-data.c
+++ b/clang/test/AST/ast-dump-openmp-target-enter-data.c
@@ -9,16 +9,16 @@ void test(int x) {
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-enter-data.c:3:1, line:6:1> line:3:6 test 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:11, col:15> col:15 used x 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:18, line:6:1>
-// CHECK-NEXT:     `-OMPTargetEnterDataDirective {{.*}} <line:4:9, line:5:39> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPTargetEnterDataDirective {{.*}} <line:4:1, line:5:39> openmp_standalone_directive
 // CHECK-NEXT:       |-OMPMapClause {{.*}} <line:4:31, line:5:38>
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <col:37> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:4:9>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:4:1>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:9>
+// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:1>
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-enter-data.c:4:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-enter-data.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-exit-data.c b/clang/test/AST/ast-dump-openmp-target-exit-data.c
index 55d69715a6d8f..777c6c019cb09 100644
--- a/clang/test/AST/ast-dump-openmp-target-exit-data.c
+++ b/clang/test/AST/ast-dump-openmp-target-exit-data.c
@@ -9,16 +9,16 @@ void test(int x) {
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-exit-data.c:3:1, line:6:1> line:3:6 test 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:11, col:15> col:15 used x 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:18, line:6:1>
-// CHECK-NEXT:     `-OMPTargetExitDataDirective {{.*}} <line:4:9, line:5:38> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPTargetExitDataDirective {{.*}} <line:4:1, line:5:38> openmp_standalone_directive
 // CHECK-NEXT:       |-OMPMapClause {{.*}} <line:4:30, line:5:37>
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <col:36> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:4:9>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:4:1>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:9>
+// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:1>
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-exit-data.c:4:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-exit-data.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c
index d3f076aa46384..739a750c7a0f3 100644
--- a/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:4:9, col:37>
+// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:4:1, col:37>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -61,14 +61,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -86,20 +86,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -119,14 +119,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -144,9 +144,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -154,7 +154,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:10:9, col:37>
+// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:10:1, col:37>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -189,17 +189,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -231,9 +231,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -241,13 +241,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -281,17 +281,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -323,9 +323,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -336,7 +336,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:17:9, col:49>
+// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:17:1, col:49>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:38, col:48>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:47> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:47> 'int' 1
@@ -374,17 +374,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -416,9 +416,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -426,13 +426,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -466,17 +466,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -508,9 +508,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -521,7 +521,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:24:9, col:49>
+// CHECK-NEXT: |   `-OMPTargetParallelForSimdDirective {{.*}} <line:24:1, col:49>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:38, col:48>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:47> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:47> 'int' 2
@@ -559,17 +559,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -601,9 +601,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -611,13 +611,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -651,17 +651,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -693,9 +693,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -707,7 +707,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetParallelForSimdDirective {{.*}} <line:31:9, col:49>
+// CHECK-NEXT:     `-OMPTargetParallelForSimdDirective {{.*}} <line:31:1, col:49>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:38, col:48>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:47> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:47> 'int' 2
@@ -758,9 +758,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -770,8 +770,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -817,9 +817,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -830,13 +830,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -884,9 +884,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -896,8 +896,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -943,9 +943,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-parallel-for.c
index aaad0e9c5d8a2..c8379602edf35 100644
--- a/clang/test/AST/ast-dump-openmp-target-parallel-for.c
+++ b/clang/test/AST/ast-dump-openmp-target-parallel-for.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:4:9, col:32>
+// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:4:1, col:32>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -61,14 +61,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -86,20 +86,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -119,14 +119,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -144,9 +144,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -154,7 +154,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:10:9, col:32>
+// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:10:1, col:32>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -189,17 +189,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -231,9 +231,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -241,13 +241,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -281,17 +281,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -323,9 +323,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -336,7 +336,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:17:9, col:44>
+// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:17:1, col:44>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:33, col:43>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:42> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:42> 'int' 1
@@ -374,17 +374,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -416,9 +416,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -426,13 +426,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -466,17 +466,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -508,9 +508,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -521,7 +521,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:24:9, col:44>
+// CHECK-NEXT: |   `-OMPTargetParallelForDirective {{.*}} <line:24:1, col:44>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:33, col:43>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:42> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:42> 'int' 2
@@ -559,17 +559,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -601,9 +601,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -611,13 +611,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -651,17 +651,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -693,9 +693,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -707,7 +707,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetParallelForDirective {{.*}} <line:31:9, col:44>
+// CHECK-NEXT:     `-OMPTargetParallelForDirective {{.*}} <line:31:1, col:44>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:33, col:43>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:42> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:42> 'int' 2
@@ -758,9 +758,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -770,8 +770,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -817,9 +817,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -830,13 +830,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -884,9 +884,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -896,8 +896,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -943,9 +943,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-parallel.c b/clang/test/AST/ast-dump-openmp-target-parallel.c
index 2a1232df0d26b..c376b98e511c9 100644
--- a/clang/test/AST/ast-dump-openmp-target-parallel.c
+++ b/clang/test/AST/ast-dump-openmp-target-parallel.c
@@ -8,7 +8,7 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPTargetParallelDirective {{.*}} <line:4:9, col:28>
+// CHECK-NEXT:     `-OMPTargetParallelDirective {{.*}} <line:4:1, col:28>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-CapturedStmt {{.*}} <col:3>
@@ -16,38 +16,38 @@ void test() {
 // CHECK-NEXT:           |   |-CapturedStmt {{.*}} <col:3>
 // CHECK-NEXT:           |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |   |   |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |   `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
-// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
-// CHECK-NEXT:           |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |   `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
+// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
+// CHECK-NEXT:           |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           |   | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           |   `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |     |-NullStmt {{.*}} <line:5:3> openmp_structured_block
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
-// CHECK-NEXT:           |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
+// CHECK-NEXT:           |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:             | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |   |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:             |   `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
-// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
-// CHECK-NEXT:             |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:             |   `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
+// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
+// CHECK-NEXT:             |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:             | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:             `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:               |-NullStmt {{.*}} <line:5:3> openmp_structured_block
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:9) *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-simd.c b/clang/test/AST/ast-dump-openmp-target-simd.c
index 430d6b2ca4bd3..be33e4b01612c 100644
--- a/clang/test/AST/ast-dump-openmp-target-simd.c
+++ b/clang/test/AST/ast-dump-openmp-target-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:4:9, col:24>
+// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:4:1, col:24>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -59,18 +59,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -88,7 +88,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -96,7 +96,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:10:9, col:24>
+// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:10:1, col:24>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -129,7 +129,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -137,13 +137,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -175,7 +175,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -186,7 +186,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:17:9, col:36>
+// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:17:1, col:36>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:25, col:35>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:34> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:34> 'int' 1
@@ -222,7 +222,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -230,13 +230,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -268,7 +268,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -279,7 +279,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:24:9, col:36>
+// CHECK-NEXT: |   `-OMPTargetSimdDirective {{.*}} <line:24:1, col:36>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:25, col:35>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:34> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:34> 'int' 2
@@ -315,7 +315,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -323,13 +323,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -361,7 +361,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -373,7 +373,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetSimdDirective {{.*}} <line:31:9, col:36>
+// CHECK-NEXT:     `-OMPTargetSimdDirective {{.*}} <line:31:1, col:36>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:25, col:35>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:34> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:34> 'int' 2
@@ -422,7 +422,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -433,13 +433,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -485,7 +485,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c
index 20e326764fcce..567cd39b9ed28 100644
--- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:4:9, col:54>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:4:1, col:54>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -63,18 +63,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -92,16 +92,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -121,18 +121,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -150,22 +150,22 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -187,18 +187,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -216,16 +216,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -245,18 +245,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |     |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -274,11 +274,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |       | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |         `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -286,7 +286,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:10:9, col:54>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:10:1, col:54>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -323,21 +323,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -369,19 +369,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -415,21 +415,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -461,11 +461,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -473,13 +473,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -515,21 +515,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -561,19 +561,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -607,21 +607,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -653,11 +653,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -668,7 +668,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:17:9, col:66>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:17:1, col:66>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:55, col:65>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:64> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:64> 'int' 1
@@ -708,21 +708,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -754,19 +754,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -800,21 +800,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -846,11 +846,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -858,13 +858,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -900,21 +900,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -946,19 +946,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -992,21 +992,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1038,11 +1038,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -1053,7 +1053,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:24:9, col:66>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:24:1, col:66>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:55, col:65>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:64> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:64> 'int' 2
@@ -1093,21 +1093,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1139,19 +1139,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1185,21 +1185,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1231,11 +1231,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -1243,13 +1243,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1285,21 +1285,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1331,19 +1331,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1377,21 +1377,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1423,11 +1423,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -1439,7 +1439,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:31:9, col:66>
+// CHECK-NEXT:     `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} <line:31:1, col:66>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:55, col:65>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:64> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:64> 'int' 2
@@ -1492,11 +1492,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1506,10 +1506,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1555,11 +1555,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1569,8 +1569,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1618,11 +1618,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1632,10 +1632,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | |   | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |   | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1681,11 +1681,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1696,13 +1696,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1752,11 +1752,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1766,10 +1766,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1815,11 +1815,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1829,8 +1829,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1878,11 +1878,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |     | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1892,10 +1892,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |     | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |     | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |     | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1941,11 +1941,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |       |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |       |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |       |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |       |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |       |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c
index 4e2696716d3f7..7110793b46a79 100644
--- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c
+++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:4:9, col:49>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:4:1, col:49>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -63,18 +63,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -92,16 +92,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -121,18 +121,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -150,22 +150,22 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -187,18 +187,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -216,16 +216,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -245,18 +245,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |     |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -274,11 +274,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |       | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict'
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |         `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -286,7 +286,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:10:9, col:49>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:10:1, col:49>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -323,21 +323,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -369,19 +369,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -415,21 +415,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -461,11 +461,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -473,13 +473,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -515,21 +515,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -561,19 +561,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -607,21 +607,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -653,11 +653,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -668,7 +668,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:17:9, col:61>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:17:1, col:61>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:50, col:60>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:59> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:59> 'int' 1
@@ -708,21 +708,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -754,19 +754,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -800,21 +800,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -846,11 +846,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -858,13 +858,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -900,21 +900,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -946,19 +946,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -992,21 +992,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1038,11 +1038,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -1053,7 +1053,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:24:9, col:61>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:24:1, col:61>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:50, col:60>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:59> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:59> 'int' 2
@@ -1093,21 +1093,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1139,19 +1139,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1185,21 +1185,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | |   | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | |   | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |   | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1231,11 +1231,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -1243,13 +1243,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1285,21 +1285,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1331,19 +1331,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1377,21 +1377,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |     | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |     | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |     | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
-// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |     | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1423,11 +1423,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |       |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |       |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |       |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:9) *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict'
 // CHECK-NEXT: |       |       |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |       `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -1439,7 +1439,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:31:9, col:61>
+// CHECK-NEXT:     `-OMPTargetTeamsDistributeParallelForDirective {{.*}} <line:31:1, col:61>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:50, col:60>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:59> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:59> 'int' 2
@@ -1492,11 +1492,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1506,10 +1506,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1555,11 +1555,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1569,8 +1569,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1618,11 +1618,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1632,10 +1632,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | |   | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | | |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | | |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |   | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1681,11 +1681,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1696,13 +1696,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1752,11 +1752,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1766,10 +1766,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         |   | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         |   | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         |   | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1815,11 +1815,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1829,8 +1829,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1878,11 +1878,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |     | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |     | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -1892,10 +1892,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |     | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |     | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
-// CHECK-NEXT:         |     |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
+// CHECK-NEXT:         |     |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |     | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |     | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         |     | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1941,11 +1941,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |       |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |       |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |       |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:9) *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |       |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict'
 // CHECK-NEXT:         |       |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |       | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |       |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c
index d738ce8f2b47e..2c7695f03a854 100644
--- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c
+++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:4:9, col:41>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:4:1, col:41>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -61,14 +61,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -86,20 +86,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -119,14 +119,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -144,9 +144,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -154,7 +154,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:10:9, col:41>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:10:1, col:41>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -189,17 +189,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -231,9 +231,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -241,13 +241,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -281,17 +281,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -323,9 +323,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -336,7 +336,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:17:9, col:53>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:17:1, col:53>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:51> 'int' 1
@@ -374,17 +374,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -416,9 +416,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -426,13 +426,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -466,17 +466,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -508,9 +508,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -521,7 +521,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:24:9, col:53>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:24:1, col:53>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:51> 'int' 2
@@ -559,17 +559,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -601,9 +601,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -611,13 +611,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -651,17 +651,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:23> col:23 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -693,9 +693,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -707,7 +707,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:31:9, col:53>
+// CHECK-NEXT:     `-OMPTargetTeamsDistributeSimdDirective {{.*}} <line:31:1, col:53>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:42, col:52>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:51> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:51> 'int' 2
@@ -758,9 +758,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -770,8 +770,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -817,9 +817,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -830,13 +830,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -884,9 +884,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -896,8 +896,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:23> col:23 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -943,9 +943,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute.c
index e83e80e534d89..948c3cc99f787 100644
--- a/clang/test/AST/ast-dump-openmp-target-teams-distribute.c
+++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:4:9, col:36>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:4:1, col:36>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -61,14 +61,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -86,20 +86,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -119,14 +119,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:5:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -144,9 +144,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |     | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict'
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |       `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -154,7 +154,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:10:9, col:36>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:10:1, col:36>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -189,17 +189,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -231,9 +231,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -241,13 +241,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -281,17 +281,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:11:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -323,9 +323,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:13:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -336,7 +336,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:17:9, col:48>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:17:1, col:48>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:46> 'int' 1
@@ -374,17 +374,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -416,9 +416,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -426,13 +426,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -466,17 +466,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:18:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -508,9 +508,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:20:7>
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -521,7 +521,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:24:9, col:48>
+// CHECK-NEXT: |   `-OMPTargetTeamsDistributeDirective {{.*}} <line:24:1, col:48>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:46> 'int' 2
@@ -559,17 +559,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | | | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -601,9 +601,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | |   |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
 // CHECK-NEXT: |       | | |   |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |   `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -611,13 +611,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -651,17 +651,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
 // CHECK-NEXT: |       |   | | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:25:3> col:3 implicit 'int'
 // CHECK-NEXT: |       |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -693,9 +693,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |     |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |     |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |     |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:9) *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict'
 // CHECK-NEXT: |       |     |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |     `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -707,7 +707,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTargetTeamsDistributeDirective {{.*}} <line:31:9, col:48>
+// CHECK-NEXT:     `-OMPTargetTeamsDistributeDirective {{.*}} <line:31:1, col:48>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:37, col:47>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:46> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:46> 'int' 2
@@ -758,9 +758,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -770,8 +770,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -817,9 +817,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | |   |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |   |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -830,13 +830,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -884,9 +884,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
@@ -896,8 +896,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:32:3> col:3 implicit 'int'
 // CHECK-NEXT:         |   | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -943,9 +943,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |     |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |     |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |     |     `-NullStmt {{.*}} <line:35:9>
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:9) *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict'
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |     | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |     |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-target-teams.c b/clang/test/AST/ast-dump-openmp-target-teams.c
index 6d3d60ca7a1be..9910af83de312 100644
--- a/clang/test/AST/ast-dump-openmp-target-teams.c
+++ b/clang/test/AST/ast-dump-openmp-target-teams.c
@@ -8,7 +8,7 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPTargetTeamsDirective {{.*}} <line:4:9, col:25>
+// CHECK-NEXT:     `-OMPTargetTeamsDirective {{.*}} <line:4:1, col:25>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-CapturedStmt {{.*}} <col:3>
@@ -16,38 +16,38 @@ void test() {
 // CHECK-NEXT:           |   |-CapturedStmt {{.*}} <col:3>
 // CHECK-NEXT:           |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |   |   |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |   `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
-// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
-// CHECK-NEXT:           |   |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |   `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
+// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
+// CHECK-NEXT:           |   |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           |   | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           |   `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |     |-NullStmt {{.*}} <line:5:3> openmp_structured_block
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
-// CHECK-NEXT:           |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
+// CHECK-NEXT:           |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:             | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |   |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:             |   `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
-// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
-// CHECK-NEXT:             |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:             |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:             |   `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
+// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
+// CHECK-NEXT:             |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:             | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:             `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:               |-NullStmt {{.*}} <line:5:3> openmp_structured_block
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:9) *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target-update.c b/clang/test/AST/ast-dump-openmp-target-update.c
index f1ca902213c75..27516efb9b904 100644
--- a/clang/test/AST/ast-dump-openmp-target-update.c
+++ b/clang/test/AST/ast-dump-openmp-target-update.c
@@ -8,16 +8,16 @@ void test(int x) {
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-update.c:3:1, line:5:1> line:3:6 test 'void (int)'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:11, col:15> col:15 used x 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:18, line:5:1>
-// CHECK-NEXT:     `-OMPTargetUpdateDirective {{.*}} <line:4:9, col:32> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPTargetUpdateDirective {{.*}} <line:4:1, col:32> openmp_standalone_directive
 // CHECK-NEXT:       |-OMPToClause {{.*}} <col:27, col:31>
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <col:9>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <col:1>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:9>
+// CHECK-NEXT:           |-CompoundStmt {{.*}} <col:1>
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-update.c:4:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target-update.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-target.c b/clang/test/AST/ast-dump-openmp-target.c
index b0f219e1d8636..880fc8a814da8 100644
--- a/clang/test/AST/ast-dump-openmp-target.c
+++ b/clang/test/AST/ast-dump-openmp-target.c
@@ -8,22 +8,22 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-CapturedStmt {{.*}} <col:3>
 // CHECK-NEXT:           | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |   |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |   `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:9) *const restrict'
+// CHECK-NEXT:           |   `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict'
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:9) *const restrict'
-// CHECK-NEXT:           |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict'
+// CHECK-NEXT:           |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |-NullStmt {{.*}} <line:5:3> openmp_structured_block
-// CHECK-NEXT:             `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:9) *const restrict'
+// CHECK-NEXT:             `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-task.c b/clang/test/AST/ast-dump-openmp-task.c
index 3a2dc25f265c6..2640844342fa4 100644
--- a/clang/test/AST/ast-dump-openmp-task.c
+++ b/clang/test/AST/ast-dump-openmp-task.c
@@ -8,14 +8,14 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-task.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPTaskDirective {{.*}} <line:4:9, col:17>
+// CHECK-NEXT:     `-OMPTaskDirective {{.*}} <line:4:1, col:17>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-task.c:4:9) *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-task.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-taskgroup.c b/clang/test/AST/ast-dump-openmp-taskgroup.c
index 616f1ed9b9fde..c6a9d81e70819 100644
--- a/clang/test/AST/ast-dump-openmp-taskgroup.c
+++ b/clang/test/AST/ast-dump-openmp-taskgroup.c
@@ -8,8 +8,8 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskgroup.c:3:1, line:6:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPTaskgroupDirective {{.*}} <line:4:9, col:22>
+// CHECK-NEXT:     `-OMPTaskgroupDirective {{.*}} <line:4:1, col:22>
 // CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskgroup.c:4:9) *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskgroup.c:4:1) *const restrict'
diff --git a/clang/test/AST/ast-dump-openmp-taskloop-simd.c b/clang/test/AST/ast-dump-openmp-taskloop-simd.c
index 6e6d11fc94031..77a2fb4077ff7 100644
--- a/clang/test/AST/ast-dump-openmp-taskloop-simd.c
+++ b/clang/test/AST/ast-dump-openmp-taskloop-simd.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskloop-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:4:9, col:26>
+// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:4:1, col:26>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -58,17 +58,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -76,7 +76,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:10:9, col:26>
+// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:10:1, col:26>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -108,17 +108,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -129,7 +129,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:17:9, col:38>
+// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:17:1, col:38>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:27, col:37>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:36> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:36> 'int' 1
@@ -164,17 +164,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -185,7 +185,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:24:9, col:38>
+// CHECK-NEXT: |   `-OMPTaskLoopSimdDirective {{.*}} <line:24:1, col:38>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:27, col:37>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:36> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:36> 'int' 2
@@ -220,17 +220,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -242,7 +242,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTaskLoopSimdDirective {{.*}} <line:31:9, col:38>
+// CHECK-NEXT:     `-OMPTaskLoopSimdDirective {{.*}} <line:31:1, col:38>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:27, col:37>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:36> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:36> 'int' 2
@@ -290,17 +290,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop-simd.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-taskloop.c b/clang/test/AST/ast-dump-openmp-taskloop.c
index ca831625844be..b780c9a1c0562 100644
--- a/clang/test/AST/ast-dump-openmp-taskloop.c
+++ b/clang/test/AST/ast-dump-openmp-taskloop.c
@@ -39,7 +39,7 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskloop.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:4:9, col:21>
+// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:4:1, col:21>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:5:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     `-CapturedStmt {{.*}} <col:3, line:6:5>
@@ -58,17 +58,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:4:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:4:1) *const restrict'
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
@@ -76,7 +76,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:10:9, col:21>
+// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:10:1, col:21>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:11:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
@@ -108,17 +108,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:10:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:10:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
@@ -129,7 +129,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:17:9, col:33>
+// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:17:1, col:33>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:31> 'int' 1
@@ -164,17 +164,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:17:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:17:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
@@ -185,7 +185,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:24:9, col:33>
+// CHECK-NEXT: |   `-OMPTaskLoopDirective {{.*}} <line:24:1, col:33>
 // CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:31> 'int' 2
@@ -220,17 +220,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:24:9) *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:24:1) *const restrict'
 // CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
@@ -242,7 +242,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPTaskLoopDirective {{.*}} <line:31:9, col:33>
+// CHECK-NEXT:     `-OMPTaskLoopDirective {{.*}} <line:31:1, col:33>
 // CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:22, col:32>
 // CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:31> 'int'
 // CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:31> 'int' 2
@@ -290,17 +290,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .lb. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .ub. 'const unsigned long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .st. 'const long'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .liter. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .reductions. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:31:9) *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .lb. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .ub. 'const unsigned long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .st. 'const long'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .liter. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .reductions. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-taskloop.c:31:1) *const restrict'
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-taskwait.c b/clang/test/AST/ast-dump-openmp-taskwait.c
index 3e3e0479835d5..61831ef79340e 100644
--- a/clang/test/AST/ast-dump-openmp-taskwait.c
+++ b/clang/test/AST/ast-dump-openmp-taskwait.c
@@ -7,4 +7,4 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskwait.c:3:1, line:5:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:5:1>
-// CHECK-NEXT:     `-OMPTaskwaitDirective {{.*}} <line:4:9, col:21> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPTaskwaitDirective {{.*}} <line:4:1, col:21> openmp_standalone_directive
diff --git a/clang/test/AST/ast-dump-openmp-taskyield.c b/clang/test/AST/ast-dump-openmp-taskyield.c
index a316d7ef625c3..7465e23f701fa 100644
--- a/clang/test/AST/ast-dump-openmp-taskyield.c
+++ b/clang/test/AST/ast-dump-openmp-taskyield.c
@@ -7,4 +7,4 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskyield.c:3:1, line:5:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:5:1>
-// CHECK-NEXT:     `-OMPTaskyieldDirective {{.*}} <line:4:9, col:22> openmp_standalone_directive
+// CHECK-NEXT:     `-OMPTaskyieldDirective {{.*}} <line:4:1, col:22> openmp_standalone_directive
diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c
index 5e019f9e89eb1..d751267d85158 100644
--- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c
+++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c
@@ -44,14 +44,14 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:8:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:6:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:9, col:47>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:1, col:47>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:47>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:47>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:9, col:47> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:1, col:47> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-CapturedStmt {{.*}} <line:6:3, line:7:5>
@@ -69,18 +69,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -97,16 +97,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -125,18 +125,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -153,11 +153,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -179,18 +179,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <<invalid sloc>> 'int' 1
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:5:9, col:47> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:5:1, col:47> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-CapturedStmt {{.*}} <line:6:3, line:7:5>
@@ -208,18 +208,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -236,16 +236,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -264,18 +264,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -292,11 +292,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -321,15 +321,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:16:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:9, col:47>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:1, col:47>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:47>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:47>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:9, col:47> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:1, col:47> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-CapturedStmt {{.*}} <line:13:3, line:15:7>
@@ -359,21 +359,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -403,19 +403,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -447,21 +447,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -491,11 +491,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -520,20 +520,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:12:9, col:47> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:12:1, col:47> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-CapturedStmt {{.*}} <line:13:3, line:15:7>
@@ -563,21 +563,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -607,19 +607,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -651,21 +651,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -695,11 +695,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -727,15 +727,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:24:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:9, col:59>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:1, col:59>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:59>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:59>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:9, col:59> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:1, col:59> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:57> 'int' 1
@@ -768,21 +768,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -812,19 +812,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -856,21 +856,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -900,11 +900,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -929,20 +929,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:20:9, col:59> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:20:1, col:59> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:57> 'int' 1
@@ -975,21 +975,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1019,19 +1019,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1063,21 +1063,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1107,11 +1107,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -1139,15 +1139,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:32:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:9, col:59>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:1, col:59>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:59>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:59>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:9, col:59> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:1, col:59> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:57> 'int' 2
@@ -1180,21 +1180,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1224,19 +1224,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1268,21 +1268,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1312,11 +1312,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -1359,20 +1359,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:28:9, col:59> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:28:1, col:59> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:57> 'int' 2
@@ -1405,21 +1405,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1449,19 +1449,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1493,21 +1493,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -1537,11 +1537,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -1588,16 +1588,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:41:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:9, col:19>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:1, col:19>
 // CHECK-NEXT:       |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:9, col:59>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:1, col:59>
 // CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:9, col:59>
+// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:1, col:59>
 // CHECK-NEXT:         | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:9, col:59> openmp_structured_block
+// CHECK-NEXT:         | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <col:1, col:59> openmp_structured_block
 // CHECK-NEXT:         | | | | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT:         | | | | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT:         | | | | |   `-IntegerLiteral {{.*}} <col:57> 'int' 2
@@ -1642,11 +1642,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1656,10 +1656,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
-// CHECK-NEXT:         | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
+// CHECK-NEXT:         | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | |   | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         | | | |   | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -1702,11 +1702,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1716,8 +1716,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -1762,11 +1762,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1776,10 +1776,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
-// CHECK-NEXT:         | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
+// CHECK-NEXT:         | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         | | | | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -1822,11 +1822,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1872,13 +1872,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1887,7 +1887,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | `-FieldDecl {{.*}} <line:39:27> col:27 implicit 'int'
 // CHECK-NEXT:         | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT:         | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:36:9, col:59> openmp_structured_block
+// CHECK-NEXT:         |   |-OMPTeamsDistributeParallelForSimdDirective {{.*}} <line:36:1, col:59> openmp_structured_block
 // CHECK-NEXT:         |   | |-OMPCollapseClause {{.*}} <col:48, col:58>
 // CHECK-NEXT:         |   | | `-ConstantExpr {{.*}} <col:57> 'int'
 // CHECK-NEXT:         |   | |   `-IntegerLiteral {{.*}} <col:57> 'int' 2
@@ -1932,11 +1932,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1946,10 +1946,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
-// CHECK-NEXT:         |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
+// CHECK-NEXT:         |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   |   | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         |   |   | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -1992,11 +1992,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -2006,8 +2006,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -2052,11 +2052,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -2066,10 +2066,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
-// CHECK-NEXT:         |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
+// CHECK-NEXT:         |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         |   | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -2112,11 +2112,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c
index be7ed22b5af2b..62d660b22a414 100644
--- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c
+++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c
@@ -44,14 +44,14 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:3:1, line:8:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:8:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:6:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:9, col:42>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:1, col:42>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:42>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:42>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:9, col:42> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:1, col:42> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-CapturedStmt {{.*}} <line:6:3, line:7:5>
@@ -69,18 +69,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -97,16 +97,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -125,18 +125,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -153,11 +153,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -179,18 +179,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <<invalid sloc>> 'int' 1
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:5:9, col:42> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:5:1, col:42> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-CapturedStmt {{.*}} <line:6:3, line:7:5>
@@ -208,18 +208,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -236,16 +236,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -264,18 +264,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -292,11 +292,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |     `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -321,15 +321,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:16:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:9, col:42>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:1, col:42>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:42>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:42>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:9, col:42> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:1, col:42> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-CapturedStmt {{.*}} <line:13:3, line:15:7>
@@ -359,21 +359,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -403,19 +403,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -447,21 +447,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -491,11 +491,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -520,20 +520,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:12:9, col:42> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:12:1, col:42> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-CapturedStmt {{.*}} <line:13:3, line:15:7>
@@ -563,21 +563,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -607,19 +607,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -651,21 +651,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -695,11 +695,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -727,15 +727,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:24:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:9, col:54>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:1, col:54>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:54>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:54>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:9, col:54> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:1, col:54> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:52> 'int' 1
@@ -768,21 +768,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -812,19 +812,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -856,21 +856,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -900,11 +900,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -929,20 +929,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:20:9, col:54> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:20:1, col:54> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:52> 'int' 1
@@ -975,21 +975,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1019,19 +1019,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1063,21 +1063,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -1107,11 +1107,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -1139,15 +1139,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:32:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:9, col:54>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:1, col:54>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:54>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:54>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:9, col:54> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:1, col:54> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:52> 'int' 2
@@ -1180,21 +1180,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
-// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | |   | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |   | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1224,19 +1224,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1268,21 +1268,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
-// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
+// CHECK-NEXT: |       | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1312,11 +1312,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -1359,20 +1359,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:28:9, col:54> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:28:1, col:54> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:52> 'int' 2
@@ -1405,21 +1405,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
-// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
+// CHECK-NEXT: |       |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   |   | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |   | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1449,19 +1449,19 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |     `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1493,21 +1493,21 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   | | |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
-// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
+// CHECK-NEXT: |       |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -1537,11 +1537,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT: |       |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -1588,16 +1588,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:41:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:9, col:19>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:1, col:19>
 // CHECK-NEXT:       |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:9, col:54>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:1, col:54>
 // CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:9, col:54>
+// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:1, col:54>
 // CHECK-NEXT:         | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:9, col:54> openmp_structured_block
+// CHECK-NEXT:         | | | |-OMPTeamsDistributeParallelForDirective {{.*}} <col:1, col:54> openmp_structured_block
 // CHECK-NEXT:         | | | | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT:         | | | | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT:         | | | | |   `-IntegerLiteral {{.*}} <col:52> 'int' 2
@@ -1642,11 +1642,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1656,10 +1656,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
-// CHECK-NEXT:         | | | |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
+// CHECK-NEXT:         | | | |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | |   | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         | | | |   | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -1702,11 +1702,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1716,8 +1716,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -1762,11 +1762,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1776,10 +1776,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
-// CHECK-NEXT:         | | | | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
+// CHECK-NEXT:         | | | | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         | | | | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -1822,11 +1822,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         | | | |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1872,13 +1872,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1887,7 +1887,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | `-FieldDecl {{.*}} <line:39:27> col:27 implicit 'int'
 // CHECK-NEXT:         | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT:         | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:36:9, col:54> openmp_structured_block
+// CHECK-NEXT:         |   |-OMPTeamsDistributeParallelForDirective {{.*}} <line:36:1, col:54> openmp_structured_block
 // CHECK-NEXT:         |   | |-OMPCollapseClause {{.*}} <col:43, col:53>
 // CHECK-NEXT:         |   | | `-ConstantExpr {{.*}} <col:52> 'int'
 // CHECK-NEXT:         |   | |   `-IntegerLiteral {{.*}} <col:52> 'int' 2
@@ -1932,11 +1932,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1946,10 +1946,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
-// CHECK-NEXT:         |   |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
+// CHECK-NEXT:         |   |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   |   | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         |   |   | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -1992,11 +1992,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -2006,8 +2006,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -2052,11 +2052,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         |   | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -2066,10 +2066,10 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
-// CHECK-NEXT:         |   | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
+// CHECK-NEXT:         |   | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         |   | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -2112,11 +2112,11 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.lb. 'const unsigned long'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit used .previous.ub. 'const unsigned long'
-// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.lb. 'const unsigned long'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit used .previous.ub. 'const unsigned long'
+// CHECK-NEXT:         |   |   |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c
index 1b45d0f322537..0d7072177fb5d 100644
--- a/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c
+++ b/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c
@@ -44,14 +44,14 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:8:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:6:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:9, col:34>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:1, col:34>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:34>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:34>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:9, col:34> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:1, col:34> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-ForStmt {{.*}} <line:6:3, line:7:5>
@@ -67,14 +67,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -91,9 +91,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -115,18 +115,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <<invalid sloc>> 'int' 1
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:5:9, col:34> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:5:1, col:34> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-ForStmt {{.*}} <line:6:3, line:7:5>
@@ -142,14 +142,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:6:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -166,9 +166,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -193,15 +193,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:16:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:9, col:34>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:1, col:34>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:34>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:34>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:9, col:34> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:1, col:34> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-ForStmt {{.*}} <line:13:3, line:15:7>
@@ -229,17 +229,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -269,9 +269,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -296,20 +296,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:12:9, col:34> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:12:1, col:34> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-ForStmt {{.*}} <line:13:3, line:15:7>
@@ -337,17 +337,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:13:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:13:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -377,9 +377,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -407,15 +407,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:24:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:9, col:46>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:1, col:46>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:46>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:46>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:9, col:46> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:1, col:46> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:44> 'int' 1
@@ -446,17 +446,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -486,9 +486,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -513,20 +513,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:20:9, col:46> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:20:1, col:46> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:44> 'int' 1
@@ -557,17 +557,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:21:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:21:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -597,9 +597,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -627,15 +627,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:32:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:9, col:46>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:1, col:46>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:46>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:46>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:9, col:46> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:1, col:46> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:44> 'int' 2
@@ -666,17 +666,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -706,9 +706,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -751,20 +751,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:28:9, col:46> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:28:1, col:46> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:44> 'int' 2
@@ -795,17 +795,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:29:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:30:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:29:23> col:23 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:30:25> col:25 implicit 'int &'
@@ -835,9 +835,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -884,16 +884,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:41:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:9, col:19>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:1, col:19>
 // CHECK-NEXT:       |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:9, col:46>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:1, col:46>
 // CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:9, col:46>
+// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:1, col:46>
 // CHECK-NEXT:         | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:9, col:46> openmp_structured_block
+// CHECK-NEXT:         | | | |-OMPTeamsDistributeSimdDirective {{.*}} <col:1, col:46> openmp_structured_block
 // CHECK-NEXT:         | | | | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT:         | | | | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT:         | | | | |   `-IntegerLiteral {{.*}} <col:44> 'int' 2
@@ -936,9 +936,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -948,8 +948,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -992,9 +992,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1040,13 +1040,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1055,7 +1055,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | `-FieldDecl {{.*}} <line:39:27> col:27 implicit 'int'
 // CHECK-NEXT:         | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT:         | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:36:9, col:46> openmp_structured_block
+// CHECK-NEXT:         |   |-OMPTeamsDistributeSimdDirective {{.*}} <line:36:1, col:46> openmp_structured_block
 // CHECK-NEXT:         |   | |-OMPCollapseClause {{.*}} <col:35, col:45>
 // CHECK-NEXT:         |   | | `-ConstantExpr {{.*}} <col:44> 'int'
 // CHECK-NEXT:         |   | |   `-IntegerLiteral {{.*}} <col:44> 'int' 2
@@ -1098,9 +1098,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1110,8 +1110,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:37:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:38:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:37:23> col:23 implicit 'int &'
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:38:25> col:25 implicit 'int &'
@@ -1154,9 +1154,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:9) *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict'
 // CHECK-NEXT:         |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute.c b/clang/test/AST/ast-dump-openmp-teams-distribute.c
index 593e844d03bd8..b574a633ace80 100644
--- a/clang/test/AST/ast-dump-openmp-teams-distribute.c
+++ b/clang/test/AST/ast-dump-openmp-teams-distribute.c
@@ -44,14 +44,14 @@ void test_five(int x, int y, int z) {
 // CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute.c:3:1, line:8:1> line:3:6 test_one 'void (int)'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:8:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:4:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:6:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:9, col:29>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:1, col:29>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:29>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:29>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:9, col:29> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:1, col:29> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-ForStmt {{.*}} <line:6:3, line:7:5>
@@ -67,14 +67,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -91,9 +91,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict'
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -115,18 +115,18 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |     `-IntegerLiteral {{.*}} <<invalid sloc>> 'int' 1
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:5:9, col:29> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:5:1, col:29> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:6:3, line:7:5>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-ForStmt {{.*}} <line:6:3, line:7:5>
@@ -142,14 +142,14 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:6:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
@@ -166,9 +166,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
 // CHECK-NEXT: |       |   | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | | `-NullStmt {{.*}} <line:7:5> openmp_structured_block
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict'
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:6:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |-OMPCapturedExprDecl {{.*}} <col:23> col:23 implicit used .capture_expr. 'int'
@@ -193,15 +193,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:16:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:11:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:9, col:29>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:12:1, col:29>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:29>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:29>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:9, col:29> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:1, col:29> openmp_structured_block
 // CHECK-NEXT: |       | | | | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       | | | |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       | | | |   | |-ForStmt {{.*}} <line:13:3, line:15:7>
@@ -229,17 +229,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -269,9 +269,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -296,20 +296,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:12:9, col:29> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:12:1, col:29> openmp_structured_block
 // CHECK-NEXT: |       |   | `-CapturedStmt {{.*}} <line:13:3, line:15:7>
 // CHECK-NEXT: |       |   |   |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT: |       |   |   | |-ForStmt {{.*}} <line:13:3, line:15:7>
@@ -337,17 +337,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:13:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:14:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:11:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:12:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:13:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:14:25> col:25 implicit 'int &'
@@ -377,9 +377,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:15:7>
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:12:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:13:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:14:10, col:18> col:14 used i 'int' cinit
@@ -407,15 +407,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:24:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:19:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:9, col:41>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:20:1, col:41>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:41>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:41>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:9, col:41> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:1, col:41> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:39> 'int' 1
@@ -446,17 +446,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -486,9 +486,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -513,20 +513,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:20:9, col:41> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:20:1, col:41> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:39> 'int' 1
@@ -557,17 +557,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:21:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:22:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:19:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:20:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:21:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:22:25> col:25 implicit 'int &'
@@ -597,9 +597,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:23:7>
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:20:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:21:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:22:10, col:18> col:14 used i 'int' cinit
@@ -627,15 +627,15 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
 // CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:32:1>
-// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:9, col:19>
+// CHECK-NEXT: |   `-OMPTargetDirective {{.*}} <line:27:1, col:19>
 // CHECK-NEXT: |     |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT: |     | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |     | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:9, col:41>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:28:1, col:41>
 // CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:9, col:41>
+// CHECK-NEXT: |       | |-CapturedStmt {{.*}} <col:1, col:41>
 // CHECK-NEXT: |       | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:9, col:41> openmp_structured_block
+// CHECK-NEXT: |       | | | |-OMPTeamsDistributeDirective {{.*}} <col:1, col:41> openmp_structured_block
 // CHECK-NEXT: |       | | | | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT: |       | | | | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT: |       | | | | |   `-IntegerLiteral {{.*}} <col:39> 'int' 2
@@ -666,17 +666,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       | | | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | | | |   |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | | |   `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:9) *const restrict'
-// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | | | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict'
+// CHECK-NEXT: |       | | | |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       | | | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -706,9 +706,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | | | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       | | | | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       | | | | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:9) *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict'
 // CHECK-NEXT: |       | | | | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | | | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -751,20 +751,20 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       | | |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       | | `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT: |       | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:9) *const restrict'
-// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict'
+// CHECK-NEXT: |       | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT: |       | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       | | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int'
 // CHECK-NEXT: |       | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int'
 // CHECK-NEXT: |       | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT: |       | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:28:9, col:41> openmp_structured_block
+// CHECK-NEXT: |       |   |-OMPTeamsDistributeDirective {{.*}} <line:28:1, col:41> openmp_structured_block
 // CHECK-NEXT: |       |   | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT: |       |   | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT: |       |   | |   `-IntegerLiteral {{.*}} <col:39> 'int' 2
@@ -795,17 +795,17 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
 // CHECK-NEXT: |       |   |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       |   |   |-DeclRefExpr {{.*}} <line:29:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT: |       |   |   `-DeclRefExpr {{.*}} <line:30:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
-// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:9) *const restrict'
-// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:9> col:9 implicit struct definition
+// CHECK-NEXT: |       |   |-ImplicitParamDecl {{.*}} <line:27:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict'
+// CHECK-NEXT: |       |   |-RecordDecl {{.*}} <line:28:1> col:1 implicit struct definition
 // CHECK-NEXT: |       |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT: |       |   | |-FieldDecl {{.*}} <line:29:3> col:3 implicit 'int &'
 // CHECK-NEXT: |       |   | `-FieldDecl {{.*}} <line:30:5> col:5 implicit 'int &'
@@ -835,9 +835,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT: |       |   | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
 // CHECK-NEXT: |       |   | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT: |       |   | |   `-NullStmt {{.*}} <line:31:7> openmp_structured_block
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:9) *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <line:28:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT: |       |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict'
 // CHECK-NEXT: |       |   | |-VarDecl {{.*}} <line:29:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT: |       |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       |   | `-VarDecl {{.*}} <line:30:10, col:18> col:14 used i 'int' cinit
@@ -884,16 +884,16 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
 // CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:41:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:9, col:19>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:35:1, col:19>
 // CHECK-NEXT:       |-OMPFirstprivateClause {{.*}} <<invalid sloc>> <implicit>
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:       | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:       | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:9, col:41>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:36:1, col:41>
 // CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:9, col:41>
+// CHECK-NEXT:         | |-CapturedStmt {{.*}} <col:1, col:41>
 // CHECK-NEXT:         | | |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         | | | |-OMPTeamsDistributeDirective {{.*}} <col:9, col:41> openmp_structured_block
+// CHECK-NEXT:         | | | |-OMPTeamsDistributeDirective {{.*}} <col:1, col:41> openmp_structured_block
 // CHECK-NEXT:         | | | | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT:         | | | | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT:         | | | | |   `-IntegerLiteral {{.*}} <col:39> 'int' 2
@@ -936,9 +936,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -948,8 +948,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         | | | |   |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | | |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:9) *const restrict'
-// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         | | | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict'
+// CHECK-NEXT:         | | | |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         | | | | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -992,9 +992,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | | | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         | | | | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         | | | | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:9) *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         | | | | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict'
 // CHECK-NEXT:         | | | | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         | | | | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | | | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1040,13 +1040,13 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         | | `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
 // CHECK-NEXT:         | |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:9) *const restrict'
-// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict'
+// CHECK-NEXT:         | |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:         | | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         | | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int'
 // CHECK-NEXT:         | | | `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
@@ -1055,7 +1055,7 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         | | `-FieldDecl {{.*}} <line:39:27> col:27 implicit 'int'
 // CHECK-NEXT:         | |   `-OMPCaptureKindAttr {{.*}} <<invalid sloc>> Implicit 9
 // CHECK-NEXT:         | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:         |   |-OMPTeamsDistributeDirective {{.*}} <line:36:9, col:41> openmp_structured_block
+// CHECK-NEXT:         |   |-OMPTeamsDistributeDirective {{.*}} <line:36:1, col:41> openmp_structured_block
 // CHECK-NEXT:         |   | |-OMPCollapseClause {{.*}} <col:30, col:40>
 // CHECK-NEXT:         |   | | `-ConstantExpr {{.*}} <col:39> 'int'
 // CHECK-NEXT:         |   | |   `-IntegerLiteral {{.*}} <col:39> 'int' 2
@@ -1098,9 +1098,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:9) *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict'
 // CHECK-NEXT:         |   |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
@@ -1110,8 +1110,8 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:37:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
 // CHECK-NEXT:         |   |   |-DeclRefExpr {{.*}} <line:38:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
 // CHECK-NEXT:         |   |   `-DeclRefExpr {{.*}} <line:39:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
-// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:9) *const restrict'
-// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:9> col:9 implicit struct definition
+// CHECK-NEXT:         |   |-ImplicitParamDecl {{.*}} <line:35:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict'
+// CHECK-NEXT:         |   |-RecordDecl {{.*}} <line:36:1> col:1 implicit struct definition
 // CHECK-NEXT:         |   | |-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:37:3> col:3 implicit 'int &'
 // CHECK-NEXT:         |   | |-FieldDecl {{.*}} <line:38:5> col:5 implicit 'int &'
@@ -1154,9 +1154,9 @@ void test_five(int x, int y, int z) {
 // CHECK-NEXT:         |   | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
 // CHECK-NEXT:         |   | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
 // CHECK-NEXT:         |   | |     `-NullStmt {{.*}} <line:40:9>
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:9) *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <line:36:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:         |   | |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict'
 // CHECK-NEXT:         |   | |-VarDecl {{.*}} <line:37:8, col:16> col:12 used i 'int' cinit
 // CHECK-NEXT:         |   | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         |   | |-VarDecl {{.*}} <line:38:10, col:18> col:14 used i 'int' cinit
diff --git a/clang/test/AST/ast-dump-openmp-teams.c b/clang/test/AST/ast-dump-openmp-teams.c
index 038af5fdc4720..0661afc810d83 100644
--- a/clang/test/AST/ast-dump-openmp-teams.c
+++ b/clang/test/AST/ast-dump-openmp-teams.c
@@ -9,48 +9,48 @@ void test() {
 // CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams.c:3:1, line:7:1> line:3:6 test 'void ()'
 // CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:7:1>
-// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:4:9, col:19>
-// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:9, col:18>
+// CHECK-NEXT:     `-OMPTargetDirective {{.*}} <line:4:1, col:19>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:1, col:18>
 // CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |-CapturedStmt {{.*}} <col:9, col:18>
+// CHECK-NEXT:           |-CapturedStmt {{.*}} <col:1, col:18>
 // CHECK-NEXT:           | `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |   |-OMPTeamsDirective {{.*}} <col:9, col:18> openmp_structured_block
+// CHECK-NEXT:           |   |-OMPTeamsDirective {{.*}} <col:1, col:18> openmp_structured_block
 // CHECK-NEXT:           |   | `-CapturedStmt {{.*}} <line:6:3>
 // CHECK-NEXT:           |   |   `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |   |     |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |   |     |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |   |     `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:9) *const restrict'
-// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:9) *const restrict'
-// CHECK-NEXT:           |   |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT:           |   |     |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |   |     `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict'
+// CHECK-NEXT:           |   |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict'
+// CHECK-NEXT:           |   |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT:           |   | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           |   `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:           |     |-NullStmt {{.*}} <line:6:3> openmp_structured_block
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:9) *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           |     `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict'
 // CHECK-NEXT:           |-AlwaysInlineAttr {{.*}} <<invalid sloc>> Implicit __forceinline
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit .global_tid. 'const int'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .part_id. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .privates. 'void *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .task_t. 'void *const'
-// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:9) *const restrict'
-// CHECK-NEXT:           |-RecordDecl {{.*}} <col:9> col:9 implicit struct definition
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .part_id. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .privates. 'void *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .task_t. 'void *const'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict'
+// CHECK-NEXT:           |-RecordDecl {{.*}} <col:1> col:1 implicit struct definition
 // CHECK-NEXT:           | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:           `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:             |-OMPTeamsDirective {{.*}} <line:5:9, col:18> openmp_structured_block
+// CHECK-NEXT:             |-OMPTeamsDirective {{.*}} <line:5:1, col:18> openmp_structured_block
 // CHECK-NEXT:             | `-CapturedStmt {{.*}} <line:6:3>
 // CHECK-NEXT:             |   `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:             |     |-NullStmt {{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:             |     |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:             |     |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:             |     `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:9) *const restrict'
-// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <line:4:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:9) *const restrict'
-// CHECK-NEXT:             |-RecordDecl {{.*}} <line:5:9> col:9 implicit struct definition
+// CHECK-NEXT:             |     |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:             |     |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:             |     `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict'
+// CHECK-NEXT:             |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict'
+// CHECK-NEXT:             |-RecordDecl {{.*}} <line:5:1> col:1 implicit struct definition
 // CHECK-NEXT:             | `-CapturedRecordAttr {{.*}} <<invalid sloc>> Implicit
 // CHECK-NEXT:             `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
 // CHECK-NEXT:               |-NullStmt {{.*}} <line:6:3> openmp_structured_block
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:5:9> col:9 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:9> col:9 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:9> col:9 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:9) *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <line:5:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:               |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:               `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict'
diff --git a/clang/test/AST/dump.cpp b/clang/test/AST/dump.cpp
index e257cfa99ede1..641abc5ea6469 100644
--- a/clang/test/AST/dump.cpp
+++ b/clang/test/AST/dump.cpp
@@ -5,7 +5,7 @@
 int ga, gb;
 #pragma omp threadprivate(ga, gb)
 
-// CHECK:      |-OMPThreadPrivateDecl {{.+}} <col:9> col:9
+// CHECK:      |-OMPThreadPrivateDecl {{.+}} <col:1> col:1
 // CHECK-NEXT: | |-DeclRefExpr {{.+}} <col:27> 'int' lvalue Var {{.+}} 'ga' 'int'
 // CHECK-NEXT: | `-DeclRefExpr {{.+}} <col:31> 'int' lvalue Var {{.+}} 'gb' 'int'
 
@@ -50,7 +50,7 @@ struct S {
   }
 };
 
-// CHECK:      |     `-OMPParallelForDirective {{.+}} {{<line:.+:9, col:80>|<col:9, col:80>}}
+// CHECK:      |     `-OMPParallelForDirective {{.+}} {{<line:.+:1, col:80>|<col:1, col:80>}}
 // CHECK-NEXT: |       |-OMPDefaultClause {{.+}} <col:26, col:38>
 // CHECK-NEXT: |       |-OMPPrivateClause {{.+}} <col:40, col:49>
 // CHECK-NEXT: |       | `-DeclRefExpr {{.+}} <col:48> 'int' lvalue OMPCapturedExpr {{.+}} 'a' 'int &'
@@ -71,8 +71,8 @@ struct S {
 void foo();
 
 // CHECK:        |-FunctionDecl {{.+}} <line:[[@LINE-2]]:1, col:10> col:6 foo 'void ()'
-// CHECK-NEXT:   |-OMPDeclareSimdDeclAttr {{.+}} <line:[[@LINE-4]]:9, col:34> Implicit BS_Inbranch
-// CHECK:        `-OMPDeclareSimdDeclAttr {{.+}} <line:[[@LINE-6]]:9, col:25> Implicit BS_Undefined
+// CHECK-NEXT:   |-OMPDeclareSimdDeclAttr {{.+}} <line:[[@LINE-4]]:1, col:34> Implicit BS_Inbranch
+// CHECK:        `-OMPDeclareSimdDeclAttr {{.+}} <line:[[@LINE-6]]:1, col:25> Implicit BS_Undefined
 
 #pragma omp declare target
 int bar() {
diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp
index ae117a0aad7dc..87a5197fd2e74 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -15,8 +15,8 @@
 // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
 // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
 // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
-// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+15]];9;;\00"
-// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+7]];9;;\00"
+// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+15]];1;;\00"
+// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+7]];1;;\00"
 
 template <class T>
 void foo(T argc) {}
diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp
index 405b9f16f7d75..f2496b522ac9d 100644
--- a/clang/test/OpenMP/threadprivate_codegen.cpp
+++ b/clang/test/OpenMP/threadprivate_codegen.cpp
@@ -154,10 +154,10 @@ struct S5 {
 // CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23
 // CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01
 // CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer
-// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;201;9;;\00"
-// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;256;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;201;1;;\00"
+// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;256;1;;\00"
 // CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;343;19;;\00"
-// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;380;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;380;1;;\00"
 // CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;397;9;;\00"
 // CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;419;10;;\00"
 // CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;442;10;;\00"
@@ -173,7 +173,7 @@ struct S5 {
 // CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;801;10;;\00"
 // CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;824;27;;\00"
 // CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;847;10;;\00"
-// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;315;9;;\00"
+// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;315;1;;\00"
 // CHECK-TLS-DAG:  [[GS1:@.+]] = internal thread_local global [[S1]] zeroinitializer
 // CHECK-TLS-DAG:  [[GS2:@.+]] = internal global [[S2]] zeroinitializer
 // CHECK-TLS-DAG:  [[ARR_X:@.+]] = thread_local global [2 x [3 x [[S1]]]] zeroinitializer
diff --git a/clang/test/PCH/stmt-openmp_structured_block-bit.cpp b/clang/test/PCH/stmt-openmp_structured_block-bit.cpp
index c94624e8cffcd..1aa093e4bc24a 100644
--- a/clang/test/PCH/stmt-openmp_structured_block-bit.cpp
+++ b/clang/test/PCH/stmt-openmp_structured_block-bit.cpp
@@ -15,10 +15,10 @@ void test() {
 // CHECK: TranslationUnitDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
 // CHECK: `-FunctionDecl 0x{{.*}} <{{.*}}stmt-openmp_structured_block-bit.cpp:8:1, line:11:1> line:8:6 {{(test|imported test)}} 'void ()'
 // CHECK-NEXT:   `-CompoundStmt 0x{{.*}} <col:13, line:11:1>
-// CHECK-NEXT:     `-OMPParallelDirective 0x{{.*}} <line:9:9, col:21>
+// CHECK-NEXT:     `-OMPParallelDirective 0x{{.*}} <line:9:1, col:21>
 // CHECK-NEXT:       `-CapturedStmt 0x{{.*}} <line:10:3>
 // CHECK-NEXT:         `-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc> {{(nothrow|imported <undeserialized declarations> nothrow)}}
 // CHECK-NEXT:           |-NullStmt 0x{{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <line:9:9> col:9 {{(implicit|imported implicit)}} .global_tid. 'const int *const __restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <col:9> col:9 {{(implicit|imported implicit)}} .bound_tid. 'const int *const __restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl 0x{{.*}} <col:9> col:9 {{(implicit|imported implicit)}} __context '(anonymous struct at {{.*}}stmt-openmp_structured_block-bit.cpp:9:9) *const __restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <line:9:1> col:1 {{(implicit|imported implicit)}} .global_tid. 'const int *const __restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <col:1> col:1 {{(implicit|imported implicit)}} .bound_tid. 'const int *const __restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl 0x{{.*}} <col:1> col:1 {{(implicit|imported implicit)}} __context '(anonymous struct at {{.*}}stmt-openmp_structured_block-bit.cpp:9:1) *const __restrict'

From 5fc1dfa784dcfeda6ddd4436543e0db119afdf99 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 28 May 2019 19:37:09 +0000
Subject: [PATCH 0398/1176] [AMDGPU] Correct the handling of inlineasm output
 registers.

Summary:
- There's a regression due to the cross-block RC assignment. Use the
  proper way to derive the output register RC in inline asm.

Reviewers: rampitec, alex-t

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, dstuttard, tpr, t-tye, eraman, hiraditya, llvm-commits, yaxunl

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62537

llvm-svn: 361868
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  3 +--
 llvm/test/CodeGen/AMDGPU/inline-asm.ll    | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8f93c63046caf..c860d3ae06c48 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10244,8 +10244,7 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
           unsigned AssignedReg;
           const TargetRegisterClass *RC;
           std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint(
-              SIRI, TC.ConstraintCode,
-              getSimpleValueType(MF.getDataLayout(), CS.getType()));
+              SIRI, TC.ConstraintCode, TC.ConstraintVT);
           if (RC) {
             MachineRegisterInfo &MRI = MF.getRegInfo();
             if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg))
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
index 6a97626c7b0e6..a964dedb27135 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
@@ -277,3 +277,23 @@ entry:
   tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
   ret void
 }
+
+; Check aggregate types are handled properly.
+; CHECK-LABEL: mad_u64
+; CHECK: v_mad_u64_u32
+define void @mad_u64(i32 %x) {
+entry:
+  br i1 undef, label %exit, label %false
+
+false:
+  %s0 = tail call { i64, i64 } asm sideeffect "v_mad_u64_u32 $0, $1, $2, $3, $4", "=v,=s,v,v,v"(i32 -766435501, i32 %x, i64 0)
+  br label %exit
+
+exit:
+  %s1 = phi { i64, i64} [ undef, %entry ], [ %s0, %false]
+  %v0 = extractvalue { i64, i64 } %s1, 0
+  %v1 = extractvalue { i64, i64 } %s1, 1
+  tail call void asm sideeffect "; use $0", "v"(i64 %v0)
+  tail call void asm sideeffect "; use $0", "v"(i64 %v1)
+  ret void
+}

From 0dac476072df69a4136f2f82506c27232db4743c Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Tue, 28 May 2019 20:01:25 +0000
Subject: [PATCH 0399/1176] Change ELF tools to allow multiple sections per
 file.

This is how multi-partition combined output files are going to look. If we
see multiple sections, the tools will just read the first one.

Differential Revision: https://reviews.llvm.org/D62349

llvm-svn: 361869
---
 llvm/include/llvm/Object/ELFObjectFile.h | 10 ++--
 llvm/test/Object/multiple-sections.yaml  | 62 ++++++++++++++++++++++++
 llvm/tools/llvm-readobj/ELFDumper.cpp    | 43 ++++++++--------
 3 files changed, 85 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/Object/multiple-sections.yaml

diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index d5e9d3638dc3c..855742445d2fb 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -951,15 +951,13 @@ ELFObjectFile<ELFT>::create(MemoryBufferRef Object) {
   for (const Elf_Shdr &Sec : *SectionsOrErr) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNSYM: {
-      if (DotDynSymSec)
-        return createError("More than one dynamic symbol table!");
-      DotDynSymSec = &Sec;
+      if (!DotDynSymSec)
+        DotDynSymSec = &Sec;
       break;
     }
     case ELF::SHT_SYMTAB: {
-      if (DotSymtabSec)
-        return createError("More than one static symbol table!");
-      DotSymtabSec = &Sec;
+      if (!DotSymtabSec)
+        DotSymtabSec = &Sec;
       break;
     }
     case ELF::SHT_SYMTAB_SHNDX: {
diff --git a/llvm/test/Object/multiple-sections.yaml b/llvm/test/Object/multiple-sections.yaml
new file mode 100644
index 0000000000000..e416b760fc5a9
--- /dev/null
+++ b/llvm/test/Object/multiple-sections.yaml
@@ -0,0 +1,62 @@
+# RUN: yaml2obj %s -o %t.o
+# RUN: llvm-readobj -a --elf-cg-profile --addrsig %t.o | FileCheck %s
+
+# Test that multiple sections with the same type does not trigger an error.
+
+# CHECK: ElfHeader {
+# CHECK: SHT_GNU_verdef {
+# CHECK: SHT_GNU_verneed {
+# CHECK: CGProfile [
+# CHECK: Addrsig [
+
+--- !ELF
+FileHeader:      
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+Sections:        
+  - Name:            .symtab2
+    Type:            SHT_SYMTAB
+    Link:            .strtab
+    Content:         ''
+    EntSize:         24
+  - Name:            .versym
+    Type:            SHT_GNU_versym
+    Entries:         [ ]
+  - Name:            .versym2
+    Type:            SHT_GNU_versym
+    Entries:         [ ]
+  - Name:            .verdef
+    Type:            SHT_GNU_verdef
+    Info:            0x0000000000000000
+    Entries:
+  - Name:            .verdef2
+    Type:            SHT_GNU_verdef
+    Info:            0x0000000000000000
+    Entries:
+  - Name:            .verneed
+    Type:            SHT_GNU_verneed
+    Info:            0x0000000000000000
+    Dependencies:
+  - Name:            .verneed2
+    Type:            SHT_GNU_verneed
+    Info:            0x0000000000000000
+    Dependencies:
+  - Name:            .llvm.call-graph-profile
+    Type:            SHT_LLVM_CALL_GRAPH_PROFILE
+    Content:         ''
+    EntSize:         16
+  - Name:            .llvm.call-graph-profile2
+    Type:            SHT_LLVM_CALL_GRAPH_PROFILE
+    Content:         ''
+    EntSize:         16
+  - Name:            .llvm_addrsig
+    Type:            SHT_LLVM_ADDRSIG
+    Content:         ''
+  - Name:            .llvm_addrsig2
+    Type:            SHT_LLVM_ADDRSIG
+    Content:         ''
+Symbols:         
+  - Name:            f
+...
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 150e98df8bb0e..fcadf73110e7e 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1414,45 +1414,40 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     switch (Sec.sh_type) {
     case ELF::SHT_SYMTAB:
-      if (DotSymtabSec != nullptr)
-        reportError("Multiple SHT_SYMTAB");
-      DotSymtabSec = &Sec;
+      if (!DotSymtabSec)
+        DotSymtabSec = &Sec;
       break;
     case ELF::SHT_DYNSYM:
-      if (DynSymRegion.Size)
-        reportError("Multiple SHT_DYNSYM");
-      DynSymRegion = createDRIFrom(&Sec);
-      // This is only used (if Elf_Shdr present)for naming section in GNU style
-      DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
-      DynamicStringTable = unwrapOrError(Obj->getStringTableForSymtab(Sec));
+      if (!DynSymRegion.Size) {
+        DynSymRegion = createDRIFrom(&Sec);
+        // This is only used (if Elf_Shdr present)for naming section in GNU
+        // style
+        DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
+        DynamicStringTable = unwrapOrError(Obj->getStringTableForSymtab(Sec));
+      }
       break;
     case ELF::SHT_SYMTAB_SHNDX:
       ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec));
       break;
     case ELF::SHT_GNU_versym:
-      if (SymbolVersionSection != nullptr)
-        reportError("Multiple SHT_GNU_versym");
-      SymbolVersionSection = &Sec;
+      if (!SymbolVersionSection)
+        SymbolVersionSection = &Sec;
       break;
     case ELF::SHT_GNU_verdef:
-      if (SymbolVersionDefSection != nullptr)
-        reportError("Multiple SHT_GNU_verdef");
-      SymbolVersionDefSection = &Sec;
+      if (!SymbolVersionDefSection)
+        SymbolVersionDefSection = &Sec;
       break;
     case ELF::SHT_GNU_verneed:
-      if (SymbolVersionNeedSection != nullptr)
-        reportError("Multiple SHT_GNU_verneed");
-      SymbolVersionNeedSection = &Sec;
+      if (!SymbolVersionNeedSection)
+        SymbolVersionNeedSection = &Sec;
       break;
     case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
-      if (DotCGProfileSec != nullptr)
-        reportError("Multiple .llvm.call-graph-profile");
-      DotCGProfileSec = &Sec;
+      if (!DotCGProfileSec)
+        DotCGProfileSec = &Sec;
       break;
     case ELF::SHT_LLVM_ADDRSIG:
-      if (DotAddrsigSec != nullptr)
-        reportError("Multiple .llvm_addrsig");
-      DotAddrsigSec = &Sec;
+      if (!DotAddrsigSec)
+        DotAddrsigSec = &Sec;
       break;
     }
   }

From 4e68e878f19026999455f293a95a23d19164f53e Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Tue, 28 May 2019 20:22:16 +0000
Subject: [PATCH 0400/1176] Fix GDB pretty printer for Optional after r354246

llvm-svn: 361870
---
 llvm/utils/gdb-scripts/prettyprinters.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/utils/gdb-scripts/prettyprinters.py b/llvm/utils/gdb-scripts/prettyprinters.py
index 7ddc33acb20c8..55f2b369319a0 100644
--- a/llvm/utils/gdb-scripts/prettyprinters.py
+++ b/llvm/utils/gdb-scripts/prettyprinters.py
@@ -129,8 +129,7 @@ def __next__(self):
     self.val = None
     if not val['Storage']['hasVal']:
       raise StopIteration
-    return ('value', val['Storage']['storage']['buffer'].address.cast(
-        val.type.template_argument(0).pointer()).dereference())
+    return ('value', val['Storage']['value'])
 
   def to_string(self):
     return 'llvm::Optional{}'.format('' if self.val['Storage']['hasVal'] else ' is not initialized')

From 2feb7e56e2872d0ea55f9cf8fd1a46f2a08b81ea Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 20:39:39 +0000
Subject: [PATCH 0401/1176] [DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y  ->
  (x - y) + C  fold. Try 2

Summary:
The main motivation is shown by all these `neg` instructions that are now created.
In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test.

AArch64 test changes all look good (`neg` created), or neutral.

X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created).

I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill
is now hoisted into preheader (which should still be good?),
2 4-byte reloads become 1 8-byte reload, and are elsewhere,
but i'm not sure how that affects that loop.

I'm unable to interpret AMDGPU change, looks neutral-ish?

This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].

https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later)

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs.

Reviewers: craig.topper, RKSimon, spatel, arsenm

Reviewed By: RKSimon

Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62223

llvm-svn: 361871
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 ++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  6 +-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 16 ++--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 21 ++---
 llvm/test/CodeGen/X86/combine-add.ll          |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 ++++++++++---------
 llvm/test/CodeGen/X86/shift-amount-mod.ll     |  9 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 20 ++---
 llvm/test/CodeGen/X86/zext-sext.ll            | 21 ++---
 9 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d53ee3134d550..06c2daa90bf10 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2923,6 +2923,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index 6daef644761b5..d349eb09f7353 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -486,8 +486,7 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
+; CHECK-NEXT:    neg w8, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -500,8 +499,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
+; CHECK-NEXT:    neg x8, x1
 ; CHECK-NEXT:    sub x8, x8, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index c571dac94b81e..8886954623f7c 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 20c84c5b63277..71c8f6926c1f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -15,10 +15,11 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_add_i32 s2, s2, -1
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VARIANT0-NEXT:    s_waitcnt expcnt(0)
+; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -36,12 +37,12 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_add_i32 s2, s2, -1
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
+; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -59,8 +60,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT2-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -82,8 +83,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT3-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 6f5f1370e6b4e..1d20fcf33d742 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c9a577dbaa92b..fd3d83ed2cbec 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %r14
-; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    movq %rdi, %rbp
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,10 +78,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT:    movl $1, %r14d
+; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -91,48 +92,47 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r12d, %r12d
-; CHECK-NEXT:    testb %r12b, %r12b
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    testb %r14b, %r14b
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r12), %eax
+; CHECK-NEXT:    leal 1(%r14), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %ecx
+; CHECK-NEXT:    movl $-1, %r13d
 ; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    movl $1, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    js LBB0_55
 ; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    movq %rax, %r13
+; CHECK-NEXT:    movq %rax, %r12
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
@@ -157,16 +157,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_34
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r13), %rax
+; CHECK-NEXT:    leaq 1(%r12), %rax
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_29
 ; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r13
+; CHECK-NEXT:    incq %r12
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
+; CHECK-NEXT:    leal -324(%r13), %eax
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -176,11 +175,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $11, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $24, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -195,8 +194,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r13)
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movb $0, (%r12)
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
@@ -208,22 +207,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %ecx ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    movl $2, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %ecx
+; CHECK-NEXT:    movl $20, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r12), %eax
+; CHECK-NEXT:    leal -268(%r14), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
@@ -233,12 +232,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r14d
-; CHECK-NEXT:    testl %r14d, %r14d
-; CHECK-NEXT:    movl %ecx, %r12d
+; CHECK-NEXT:    decl %r15d
+; CHECK-NEXT:    testl %r15d, %r15d
+; CHECK-NEXT:    movl %r13d, %r14d
 ; CHECK-NEXT:    jg LBB0_13
 ; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -255,27 +254,28 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %ecx
-; CHECK-NEXT:    cmpl $16, %ecx
+; CHECK-NEXT:    incl %r13d
+; CHECK-NEXT:    cmpl $16, %r13d
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    btl %r13d, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    addq $8189, %r15 ## imm = 0x1FFD
-; CHECK-NEXT:    subq %rbx, %r15
-; CHECK-NEXT:    addq _syHistory@{{.*}}(%rip), %r15
+; CHECK-NEXT:    subq %rbp, %rbx
+; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
+; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    decq %r15
+; CHECK-NEXT:    decq %rax
 ; CHECK-NEXT:    jmp LBB0_49
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
@@ -302,7 +302,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq %r15, %rbx
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    movq %rbx, %rbp
 ; CHECK-NEXT:    jmp LBB0_48
 ; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 6c268d8a27f42..e8af5f66d36c9 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
-; X64-NEXT:    subl %esi, %ecx
+; X64-NEXT:    negl %ecx
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
@@ -1139,9 +1139,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 2ffbfcb56b2f7..37a3dcbd0e4a3 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    subl %edx, %esi
 ; X64-NEXT:    leal 32(%rsi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    psubd %xmm2, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    psubd %xmm2, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 7034378a880b5..84096e3b6805d 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -15,30 +15,27 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 ; CHECK-NEXT:    subq %rax, %rsi
 ; CHECK-NEXT:    movq (%rdx), %rax
 ; CHECK-NEXT:    movswl 8(%rdi), %edx
-; CHECK-NEXT:    movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
 ; CHECK-NEXT:    movswl (%rax,%rsi,2), %eax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    imull %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    addl $2138875574, %eax # imm = 0x7F7CA6B6
 ; CHECK-NEXT:    cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT:    movslq %eax, %r8
+; CHECK-NEXT:    movslq %eax, %rdi
 ; CHECK-NEXT:    setl %dl
 ; CHECK-NEXT:    cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT:    movq %r8, %r9
+; CHECK-NEXT:    movq %rdi, %r8
 ; CHECK-NEXT:    leal -1(%rdx,%rdx), %edx
 ; CHECK-NEXT:    cmovlel %edx, %esi
-; CHECK-NEXT:    subq %rax, %r9
-; CHECK-NEXT:    addq %r8, %rdi
+; CHECK-NEXT:    subq %rax, %r8
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %esi
-; CHECK-NEXT:    cmovneq %rax, %r9
-; CHECK-NEXT:    testl %r8d, %r8d
-; CHECK-NEXT:    cmovnsq %rax, %r9
-; CHECK-NEXT:    movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
-; CHECK-NEXT:    subq %r9, %rdi
-; CHECK-NEXT:    addq (%rcx), %rdi
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    cmovneq %rax, %r8
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovnsq %rax, %r8
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    subq %r8, %rdi
+; CHECK-NEXT:    leaq -2138875574(%rax,%rdi), %rax
 ; CHECK-NEXT:    movq %rax, (%rcx)
 ; CHECK-NEXT:    retq
 entry:

From 96c9986199f18909742076837604b1b8f0e88639 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 20:39:55 +0000
Subject: [PATCH 0402/1176] [DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x
 + C)  ->  (y - x) - C  fold. Try 2

Summary:
Direct sibling of D62223 patch.
While i don't have a direct motivational pattern for this,
it would seem to make sense to handle both patterns (or none),
for symmetry?

The aarch64 changes look neutral;
sparc and systemz look like improvement (one less instruction each);
x86 changes - 32bit case improves, 64bit case shows that LEA no longer
gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea`

https://rise4fun.com/Alive/ffh

This is a recommit, originally committed in rL361853, but reverted
to investigate test-suite compile-time hangs.

Reviewers: RKSimon, craig.topper, spatel, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62252

llvm-svn: 361872
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  4 +--
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 12 ++++----
 .../CodeGen/SPARC/2013-05-17-CallFrame.ll     |  5 ++--
 llvm/test/CodeGen/SystemZ/alloca-03.ll        | 11 ++++----
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 28 +++++++++----------
 6 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 06c2daa90bf10..51f9c34e7ee82 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2929,6 +2929,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
   }
+  // y - (x + C)  ->  (y - x) - C
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index d349eb09f7353..c91700436bb96 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -513,7 +513,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    add w8, w2, w1
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -525,7 +525,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    add x8, x2, x1
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 8886954623f7c..167ca6a10ec13 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w2, w8
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w8, w2, w8
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
diff --git a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 1a97e4e317e57..274e99b114c32 100644
--- a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -15,10 +15,9 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
 ; V8-NEXT:    .cfi_register 15, 31
 ; V8-NEXT:    add %i0, 7, %i0
 ; V8-NEXT:    and %i0, -8, %i0
-; V8-NEXT:    add %i0, 8, %i0
 ; V8-NEXT:    sub %sp, %i0, %i0
-; V8-NEXT:    add %i0, 96, %o0
-; V8-NEXT:    mov %i0, %sp
+; V8-NEXT:    add %i0, -8, %sp
+; V8-NEXT:    add %i0, 88, %o0
 ; V8-NEXT:    add %sp, -16, %sp
 ; V8-NEXT:    st %o0, [%sp+104]
 ; V8-NEXT:    st %o0, [%sp+100]
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index 343071211b751..cac569ff41fa3 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -75,13 +75,12 @@ define void @f3(i64 %len) {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    sllg %r2, %r2, 3
-; CHECK-NEXT:    la %r0, 120(%r2)
+; CHECK-NEXT:    sllg %r0, %r2, 3
 ; CHECK-NEXT:    sgr %r1, %r0
-; CHECK-NEXT:    la %r2, 280(%r1)
-; CHECK-NEXT:    nill %r2, 65408
-; CHECK-NEXT:    lgr %r15, %r1
-; CHECK-NEXT:    mvghi 0(%r2), 10
+; CHECK-NEXT:    lay %r15, -120(%r1)
+; CHECK-NEXT:    la %r1, 160(%r1)
+; CHECK-NEXT:    nill %r1, 65408
+; CHECK-NEXT:    mvghi 0(%r1), 10
 ; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
 ; CHECK-NEXT:    br %r14
   %x = alloca i64, i64 %len, align 128
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 37a3dcbd0e4a3..59a42ad9e3926 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl $32, %ecx
-; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    addl %ecx, %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    addl $32, %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal -32(%rdx,%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    psubd %xmm0, %xmm2
-; X32-NEXT:    movdqa %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd %xmm2, %xmm1
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd %xmm2, %xmm1
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS

From d485c6bc9f9d3d9efa5a6071c11bc8a5ada87e06 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 20:40:03 +0000
Subject: [PATCH 0403/1176] [DAGCombine][X86][AArch64][AMDGPU] (x - y) + -1  ->
  add (xor y, -1), x  fold. Try 2

Summary:
This prevents regressions in next patch,
and somewhat recovers from the regression to AMDGPU test in D62223.

It is indeed not great that we leave vector decrement,
don't transform it into vector add all-ones..

https://rise4fun.com/Alive/ZRl

This is a recommit, originally committed in rL361855, but reverted
to investigate test-suite compile-time hangs.

Reviewers: RKSimon, craig.topper, spatel, arsenm

Reviewed By: RKSimon, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62263

llvm-svn: 361873
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++
 llvm/test/CodeGen/AArch64/xor.ll              | 18 +++---
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 19 +++---
 llvm/test/CodeGen/X86/xor.ll                  | 62 ++++++++++---------
 4 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 51f9c34e7ee82..4e141695677b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2303,6 +2303,13 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     }
   }
 
+  // (x - y) + -1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isAllOnesOrAllOnesSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
     return Combined;
 
@@ -2923,6 +2930,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // (x - y) - 1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
       isConstantOrConstantVector(N0.getOperand(1))) {
diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll
index 1dca55a971308..ca6c0dfabba48 100644
--- a/llvm/test/CodeGen/AArch64/xor.ll
+++ b/llvm/test/CodeGen/AArch64/xor.ll
@@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
 define i32 @add_of_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -40,9 +40,8 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -52,9 +51,8 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 71c8f6926c1f9..2dd7e20c00ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -9,17 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT0-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT0-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT0-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT0-NEXT:    s_mov_b32 s6, 0
 ; VARIANT0-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_waitcnt expcnt(0)
-; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -31,18 +30,18 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1:       ; %bb.0: ; %entry
 ; VARIANT1-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT1-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT1-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT1-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT1-NEXT:    s_mov_b32 s6, 0
 ; VARIANT1-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
-; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -60,8 +59,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT2-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -83,8 +81,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT3-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 5ef5999be95f4..654382f7b73e9 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -532,22 +532,24 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -558,22 +560,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not_decrement:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -583,24 +587,23 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -610,24 +613,23 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not_decrement:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>

From dfc34f0211b78a288cddfdc59798132c8087592d Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 28 May 2019 20:40:10 +0000
Subject: [PATCH 0404/1176] [DAGCombine] (x - C) - y  ->  (x - y) - C  fold.
 Try 2

Summary:
Again only vectors affected. Frustrating. Let me take a look into that..

https://rise4fun.com/Alive/AAq

This is a recommit, originally committed in rL361856, but reverted
to investigate test-suite compile-time hangs.

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, JDevlieghere, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62294

llvm-svn: 361874
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp     | 7 +++++++
 llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll | 2 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll     | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4e141695677b3..48c918051bb1d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2949,6 +2949,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
   }
+  // (x - C) - y  ->  (x - y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 167ca6a10ec13..344016ea5027b 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 59a42ad9e3926..7da5c7db2d689 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>

From 6d7bf5e8df5455fa32cc437f7043bbb0a0607d49 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue, 28 May 2019 20:47:44 +0000
Subject: [PATCH 0405/1176] [CodeGen] Add lrint/llrint builtins

This patch add the ISD::LRINT and ISD::LLRINT along with new
intrinsics.  The changes are straightforward as for other
floating-point rounding functions, with just some adjustments
required to handle the return value being an interger.

The idea is to optimize lrint/llrint generation for AArch64
in a subsequent patch.  Current semantic is just route it to libm
symbol.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D62017

llvm-svn: 361875
---
 llvm/docs/LangRef.rst                         | 74 +++++++++++++++++
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |  2 +-
 llvm/include/llvm/IR/Intrinsics.td            |  2 +
 llvm/include/llvm/IR/RuntimeLibcalls.def      | 10 +++
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 14 ++++
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 56 +++++++++++++
 .../SelectionDAG/LegalizeIntegerTypes.cpp     | 27 ++++++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  5 ++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  6 +-
 .../SelectionDAG/SelectionDAGDumper.cpp       |  2 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  2 +
 llvm/lib/IR/Verifier.cpp                      |  4 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  2 +
 llvm/test/CodeGen/AArch64/llrint-conv.ll      | 56 +++++++++++++
 llvm/test/CodeGen/AArch64/lrint-conv.ll       | 56 +++++++++++++
 llvm/test/CodeGen/ARM/llrint-conv.ll          | 25 ++++++
 llvm/test/CodeGen/ARM/lrint-conv.ll           | 25 ++++++
 llvm/test/CodeGen/Mips/llrint-conv.ll         | 56 +++++++++++++
 llvm/test/CodeGen/Mips/lrint-conv.ll          | 56 +++++++++++++
 llvm/test/CodeGen/PowerPC/llrint-conv.ll      | 56 +++++++++++++
 llvm/test/CodeGen/PowerPC/lrint-conv.ll       | 56 +++++++++++++
 llvm/test/CodeGen/X86/llrint-conv-i32.ll      | 60 ++++++++++++++
 llvm/test/CodeGen/X86/llrint-conv.ll          | 83 +++++++++++++++++++
 llvm/test/CodeGen/X86/lrint-conv-i32.ll       | 32 +++++++
 llvm/test/CodeGen/X86/lrint-conv.ll           | 83 +++++++++++++++++++
 25 files changed, 847 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/llrint-conv.ll
 create mode 100644 llvm/test/CodeGen/AArch64/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/ARM/llrint-conv.ll
 create mode 100644 llvm/test/CodeGen/ARM/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/Mips/llrint-conv.ll
 create mode 100644 llvm/test/CodeGen/Mips/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/llrint-conv.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/X86/llrint-conv-i32.ll
 create mode 100644 llvm/test/CodeGen/X86/llrint-conv.ll
 create mode 100644 llvm/test/CodeGen/X86/lrint-conv-i32.ll
 create mode 100644 llvm/test/CodeGen/X86/lrint-conv.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6311f6f616369..43f27da1afcd2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12449,6 +12449,80 @@ Semantics:
 This function returns the same values as the libm ``llround``
 functions would, but without setting errno.
 
+'``llvm.lrint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.lrint`` on any
+floating-point type. Not all targets support all types however.
+
+::
+
+      declare i32 @llvm.lrint.i32.f32(float %Val)
+      declare i32 @llvm.lrint.i32.f64(double %Val)
+      declare i32 @llvm.lrint.i32.f80(float %Val)
+      declare i32 @llvm.lrint.i32.f128(double %Val)
+      declare i32 @llvm.lrint.i32.ppcf128(double %Val)
+
+      declare i64 @llvm.lrint.i64.f32(float %Val)
+      declare i64 @llvm.lrint.i64.f64(double %Val)
+      declare i64 @llvm.lrint.i64.f80(float %Val)
+      declare i64 @llvm.lrint.i64.f128(double %Val)
+      declare i64 @llvm.lrint.i64.ppcf128(double %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.lrint.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument is a floating-point number and return is an integer type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``lrint``
+functions would, but without setting errno.
+
+'``llvm.llrint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.llrint`` on any
+floating-point type. Not all targets support all types however.
+
+::
+
+      declare i64 @llvm.llrint.i64.f32(float %Val)
+      declare i64 @llvm.llrint.i64.f64(double %Val)
+      declare i64 @llvm.llrint.i64.f80(float %Val)
+      declare i64 @llvm.llrint.i64.f128(double %Val)
+      declare i64 @llvm.llrint.i64.ppcf128(double %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.llrint.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument is a floating-point number and return is an integer type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``llrint``
+functions would, but without setting errno.
+
 Bit Manipulation Intrinsics
 ---------------------------
 
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 9b765299b10c5..acf27dcc5fab5 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -610,7 +610,7 @@ namespace ISD {
     FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
-    LROUND, LLROUND,
+    LROUND, LLROUND, LRINT, LLRINT,
 
     /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
     /// values.
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b329d5c3eb88f..06620feee3c60 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -541,6 +541,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
 
   def int_lround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
   def int_llround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+  def int_lrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+  def int_llrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
 }
 
 def int_minnum : Intrinsic<[llvm_anyfloat_ty],
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c617ef9a8d3b2..f6c74d497b188 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -264,6 +264,16 @@ HANDLE_LIBCALL(LLROUND_F64, "llround")
 HANDLE_LIBCALL(LLROUND_F80, "llroundl")
 HANDLE_LIBCALL(LLROUND_F128, "llroundl")
 HANDLE_LIBCALL(LLROUND_PPCF128, "llroundl")
+HANDLE_LIBCALL(LRINT_F32, "lrintf")
+HANDLE_LIBCALL(LRINT_F64, "lrint")
+HANDLE_LIBCALL(LRINT_F80, "lrintl")
+HANDLE_LIBCALL(LRINT_F128, "lrintl")
+HANDLE_LIBCALL(LRINT_PPCF128, "lrintl")
+HANDLE_LIBCALL(LLRINT_F32, "llrintf")
+HANDLE_LIBCALL(LLRINT_F64, "llrint")
+HANDLE_LIBCALL(LLRINT_F80, "llrintl")
+HANDLE_LIBCALL(LLRINT_F128, "llrintl")
+HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl")
 
 // Conversion
 HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 19baf178f121d..4f7d14ab67e42 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1003,6 +1003,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::EXTRACT_VECTOR_ELT:
   case ISD::LROUND:
   case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LLRINT:
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(0).getValueType());
     break;
@@ -2919,6 +2921,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                          RTLIB::LLROUND_F128,
                                          RTLIB::LLROUND_PPCF128));
     break;
+  case ISD::LRINT:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+                                         RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+                                         RTLIB::LRINT_F128,
+                                         RTLIB::LRINT_PPCF128));
+    break;
+  case ISD::LLRINT:
+    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+                                         RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+                                         RTLIB::LLRINT_F128,
+                                         RTLIB::LLRINT_PPCF128));
+    break;
   case ISD::VAARG:
     Results.push_back(DAG.expandVAArg(Node));
     Results.push_back(Results[0].getValue(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 1b21f7df4c8b5..b4849b2881e6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -774,6 +774,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_XINT(N); break;
   case ISD::LROUND:      Res = SoftenFloatOp_LROUND(N); break;
   case ISD::LLROUND:     Res = SoftenFloatOp_LLROUND(N); break;
+  case ISD::LRINT:       Res = SoftenFloatOp_LRINT(N); break;
+  case ISD::LLRINT:      Res = SoftenFloatOp_LLRINT(N); break;
   case ISD::SELECT:      Res = SoftenFloatOp_SELECT(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
@@ -1068,6 +1070,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
                          NVT, Op, false, SDLoc(N)).first;
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LRINT_F32,
+                                           RTLIB::LRINT_F64,
+                                           RTLIB::LRINT_F80,
+                                           RTLIB::LRINT_F128,
+                                           RTLIB::LRINT_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLRINT_F32,
+                                           RTLIB::LLRINT_F64,
+                                           RTLIB::LLRINT_F80,
+                                           RTLIB::LLRINT_F128,
+                                           RTLIB::LLRINT_PPCF128),
+                         NVT, Op, false, SDLoc(N)).first;
+}
+
 //===----------------------------------------------------------------------===//
 //  Float Result Expansion
 //===----------------------------------------------------------------------===//
@@ -1602,6 +1632,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
   case ISD::LROUND:     Res = ExpandFloatOp_LROUND(N); break;
   case ISD::LLROUND:    Res = ExpandFloatOp_LLROUND(N); break;
+  case ISD::LRINT:      Res = ExpandFloatOp_LRINT(N); break;
+  case ISD::LLRINT:     Res = ExpandFloatOp_LLRINT(N); break;
   case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
   case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
@@ -1796,6 +1828,30 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
                          RVT, N->getOperand(0), false, SDLoc(N)).first;
 }
 
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LRINT_F32,
+                                           RTLIB::LRINT_F64,
+                                           RTLIB::LRINT_F80,
+                                           RTLIB::LRINT_F128,
+                                           RTLIB::LRINT_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+                                           RTLIB::LLRINT_F32,
+                                           RTLIB::LLRINT_F64,
+                                           RTLIB::LLRINT_F80,
+                                           RTLIB::LLRINT_F128,
+                                           RTLIB::LLRINT_PPCF128),
+                         RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
 //===----------------------------------------------------------------------===//
 //  Float Operand Promotion
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 357654fb1af85..56bc237258fa2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1624,6 +1624,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
   case ISD::LLROUND:     ExpandIntRes_LLROUND(N, Lo, Hi); break;
+  case ISD::LLRINT:      ExpandIntRes_LLRINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
   case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
   case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -2517,6 +2518,32 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
                Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  if (VT == MVT::f32)
+    LC = RTLIB::LLRINT_F32;
+  else if (VT == MVT::f64)
+    LC = RTLIB::LLRINT_F64;
+  else if (VT == MVT::f80)
+    LC = RTLIB::LLRINT_F80;
+  else if (VT == MVT::f128)
+    LC = RTLIB::LLRINT_F128;
+  else if (VT == MVT::ppcf128)
+    LC = RTLIB::LLRINT_PPCF128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+
+  SDValue Op = N->getOperand(0);
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+    Op = GetPromotedFloat(Op);
+
+  SDLoc dl(N);
+  EVT RetVT = N->getValueType(0);
+  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+               Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   if (ISD::isNormalLoad(N)) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 590b5c7e5a028..a0e7c8a89c187 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -419,6 +419,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_LLROUND           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LLRINT            (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -556,6 +557,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
   SDValue SoftenFloatOp_LROUND(SDNode *N);
   SDValue SoftenFloatOp_LLROUND(SDNode *N);
+  SDValue SoftenFloatOp_LRINT(SDNode *N);
+  SDValue SoftenFloatOp_LLRINT(SDNode *N);
   SDValue SoftenFloatOp_SELECT(SDNode *N);
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
@@ -617,6 +620,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
   SDValue ExpandFloatOp_LROUND(SDNode *N);
   SDValue ExpandFloatOp_LLROUND(SDNode *N);
+  SDValue ExpandFloatOp_LRINT(SDNode *N);
+  SDValue ExpandFloatOp_LLRINT(SDNode *N);
   SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
   SDValue ExpandFloatOp_SETCC(SDNode *N);
   SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a5274877ecee4..fe857f73b2548 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6029,12 +6029,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
   case Intrinsic::lround:
-  case Intrinsic::llround: {
+  case Intrinsic::llround:
+  case Intrinsic::lrint:
+  case Intrinsic::llrint: {
     unsigned Opcode;
     switch (Intrinsic) {
     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::lround:  Opcode = ISD::LROUND;  break;
     case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+    case Intrinsic::lrint:   Opcode = ISD::LRINT;   break;
+    case Intrinsic::llrint:  Opcode = ISD::LLRINT;  break;
     }
 
     EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 2841633657830..da3049881d310 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -332,6 +332,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FP_TO_FP16:                 return "fp_to_fp16";
   case ISD::LROUND:                     return "lround";
   case ISD::LLROUND:                    return "llround";
+  case ISD::LRINT:                      return "lrint";
+  case ISD::LLRINT:                     return "llrint";
 
     // Control flow instructions
   case ISD::BR:                         return "br";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 32f97f7e2aacb..888d420a441db 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -713,6 +713,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::FROUND,     VT, Expand);
     setOperationAction(ISD::LROUND,     VT, Expand);
     setOperationAction(ISD::LLROUND,    VT, Expand);
+    setOperationAction(ISD::LRINT,      VT, Expand);
+    setOperationAction(ISD::LLRINT,     VT, Expand);
   }
 
   // Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index fc8d210e67ad9..878a0081e1977 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4622,7 +4622,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     break;
   }
   case Intrinsic::lround:
-  case Intrinsic::llround: {
+  case Intrinsic::llround:
+  case Intrinsic::lrint:
+  case Intrinsic::llrint: {
     Type *ValTy = Call.getArgOperand(0)->getType();
     Type *ResultTy = Call.getType();
     Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7b4ce08b57860..e1c0c8a6bd501 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -674,6 +674,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FMA, MVT::f80, Expand);
     setOperationAction(ISD::LROUND, MVT::f80, Expand);
     setOperationAction(ISD::LLROUND, MVT::f80, Expand);
+    setOperationAction(ISD::LRINT, MVT::f80, Expand);
+    setOperationAction(ISD::LLRINT, MVT::f80, Expand);
   }
 
   // Always use a library call for pow.
diff --git a/llvm/test/CodeGen/AArch64/llrint-conv.ll b/llvm/test/CodeGen/AArch64/llrint-conv.ll
new file mode 100644
index 0000000000000..365f6b5456de5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      llrintf
+define i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       b       llrintf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      llrint
+define i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       b       llrint
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      llrintl
+define i32 @testmswl(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       b       llrintl
+define i64 @testmsll(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.f128(fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll
new file mode 100644
index 0000000000000..a652de9cb3e2c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      lrintf
+define i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       b       lrintf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      lrint
+define i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       b       lrint
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      lrintl
+define dso_local i32 @testmswl(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       b       lrintl
+define dso_local i64 @testmsll(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.lrint.i64.f128(fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
new file mode 100644
index 0000000000000..017955bb43afb
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+
+; SOFTFP-LABEL: testmsxs_builtin:
+; SOFTFP:       bl      llrintf
+; HARDFP-LABEL: testmsxs_builtin:
+; HARDFP:       bl      llrintf
+define i64 @testmsxs_builtin(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+; SOFTFP-LABEL: testmsxd_builtin:
+; SOFTFP:       bl      llrint
+; HARDFP-LABEL: testmsxd_builtin:
+; HARDFP:       bl      llrint
+define i64 @testmsxd_builtin(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
new file mode 100644
index 0000000000000..192da565c12fd
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
+; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+
+; SOFTFP-LABEL: testmsws_builtin:
+; SOFTFP:       bl      lrintf
+; HARDFP-LABEL: testmsws_builtin:
+; HARDFP:       bl      lrintf
+define i32 @testmsws_builtin(float %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %x)
+  ret i32 %0
+}
+
+; SOFTFP-LABEL: testmswd_builtin:
+; SOFTFP:       bl      lrint
+; HARDFP-LABEL: testmswd_builtin:
+; HARDFP:       bl      lrint
+define i32 @testmswd_builtin(double %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %x)
+  ret i32 %0
+}
+
+declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
+declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
new file mode 100644
index 0000000000000..dcb4e5657e80b
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+
+define signext i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       jal     llrintf
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       jal     llrintf
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       jal     llrint
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       jal     llrint
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswl(fp128 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       jal     llrintl
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsll(fp128 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       jal     llrintl
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.f128(fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
new file mode 100644
index 0000000000000..bd3f7b3babe10
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+
+define signext i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       jal     lrint
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       jal     lrint
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  ret i64 %0
+}
+
+define signext i32 @testmswl(fp128 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       jal     lrintl
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define signext i64 @testmsll(fp128 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       jal     lrintl
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.lrint.i64.f128(fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
new file mode 100644
index 0000000000000..daadf85b4085a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      llrintf
+define signext i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       bl      llrintf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      llrint
+define signext i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       bl      llrint
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      llrintl
+define signext i32 @testmswl(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       bl      llrintl
+define i64 @testmsll(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
new file mode 100644
index 0000000000000..adfc994497323
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK:       bl      lrintf
+define signext i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       bl      lrintf
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       bl      lrint
+define signext i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       bl      lrint
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  ret i64 %0
+}
+
+; CHECK-LABEL: testmswl:
+; CHECK:       bl      lrintl
+define signext i32 @testmswl(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.ppcf128(ppc_fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsll:
+; CHECK:       bl      lrintl
+define i64 @testmsll(ppc_fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.ppcf128(ppc_fp128 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.lrint.i64.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/llrint-conv-i32.ll b/llvm/test/CodeGen/X86/llrint-conv-i32.ll
new file mode 100644
index 0000000000000..de05af14fa169
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llrint-conv-i32.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown             | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
+
+define i64 @testmsxs_builtin(float %x) {
+; CHECK-LABEL: testmsxs_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstps (%esp)
+; CHECK-NEXT:    calll llrintf
+; CHECK-NEXT:    popl %ecx
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+;
+; SSE2-LABEL: testmsxs_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    pushl %eax
+; SSE2-NEXT:    .cfi_def_cfa_offset 8
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movss %xmm0, (%esp)
+; SSE2-NEXT:    calll llrintf
+; SSE2-NEXT:    popl %ecx
+; SSE2-NEXT:    .cfi_def_cfa_offset 4
+; SSE2-NEXT:    retl
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+define i64 @testmsxd_builtin(double %x) {
+; CHECK-LABEL: testmsxd_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subl $8, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT:    fstpl (%esp)
+; CHECK-NEXT:    calll llrint
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    retl
+;
+; SSE2-LABEL: testmsxd_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    subl $8, %esp
+; SSE2-NEXT:    .cfi_def_cfa_offset 12
+; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movsd %xmm0, (%esp)
+; SSE2-NEXT:    calll llrint
+; SSE2-NEXT:    addl $8, %esp
+; SSE2-NEXT:    .cfi_def_cfa_offset 4
+; SSE2-NEXT:    retl
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
new file mode 100644
index 0000000000000..bcdea81b023ac
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+define i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq llrintf
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llrintf # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  ret i64 %0
+}
+
+define i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq llrint
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llrint # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  ret i64 %0
+}
+
+define dso_local i32 @testmswl(x86_fp80 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq llrintl
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define dso_local i64 @testmsll(x86_fp80 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp llrintl # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.llrint.f32(float) nounwind readnone
+declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
new file mode 100644
index 0000000000000..7bc8c36741f59
--- /dev/null
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown             | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2
+
+define i32 @testmsws_builtin(float %x) {
+; CHECK-LABEL: testmsws_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrintf # TAILCALL
+;
+; SSE2-LABEL: testmsws_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    jmp lrintf # TAILCALL
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %x)
+  ret i32 %0
+}
+
+define i32 @testmswd_builtin(double %x) {
+; CHECK-LABEL: testmswd_builtin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrint # TAILCALL
+;
+; SSE2-LABEL: testmswd_builtin:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    jmp lrint # TAILCALL
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %x)
+  ret i32 %0
+}
+
+declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
+declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv.ll b/llvm/test/CodeGen/X86/lrint-conv.ll
new file mode 100644
index 0000000000000..a34c31e889d53
--- /dev/null
+++ b/llvm/test/CodeGen/X86/lrint-conv.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+define i32 @testmsws(float %x) {
+; CHECK-LABEL: testmsws:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq lrintf
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxs(float %x) {
+; CHECK-LABEL: testmsxs:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrintf # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
+  ret i64 %0
+}
+
+define i32 @testmswd(double %x) {
+; CHECK-LABEL: testmswd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq lrint
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @testmsxd(double %x) {
+; CHECK-LABEL: testmsxd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrint # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
+  ret i64 %0
+}
+
+define dso_local i32 @testmswl(x86_fp80 %x) {
+; CHECK-LABEL: testmswl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq lrintl
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define dso_local i64 @testmsll(x86_fp80 %x) {
+; CHECK-LABEL: testmsll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrintl # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x)
+  ret i64 %0
+}
+
+declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.lrint.i64.f80(x86_fp80) nounwind readnone

From ccc1fa5e1d2d2c9be66aac07c3d70332d1d81adf Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 28 May 2019 20:57:56 +0000
Subject: [PATCH 0406/1176] Revert r361842 as it breaks LLDB ::
 tools/lldb-mi/exec/exec-finish.test

llvm-svn: 361876
---
 lld/COFF/DebugTypes.cpp                       | 213 ++----------------
 lld/COFF/DebugTypes.h                         |  29 +--
 lld/COFF/Driver.cpp                           |   4 -
 lld/COFF/Driver.h                             |   4 +-
 lld/COFF/PDB.cpp                              | 120 +++++++++-
 .../pdb-type-server-valid-signature.yaml      |   2 +-
 .../pdb-type-server-invalid-signature.yaml    |   4 +-
 7 files changed, 141 insertions(+), 235 deletions(-)

diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 4e215194b70c3..34f32ea085ca3 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -7,15 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "DebugTypes.h"
-#include "Driver.h"
 #include "InputFiles.h"
-#include "lld/Common/ErrorHandler.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
-#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
-#include "llvm/Support/Path.h"
 
 using namespace lld;
 using namespace lld::coff;
@@ -23,44 +16,14 @@ using namespace llvm;
 using namespace llvm::codeview;
 
 namespace {
-// The TypeServerSource class represents a PDB type server, a file referenced by
-// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
-// files, therefore there must be only once instance per OBJ lot. The file path
-// is discovered from the dependent OBJ's debug type stream. The
-// TypeServerSource object is then queued and loaded by the COFF Driver. The
-// debug type stream for such PDB files will be merged first in the final PDB,
-// before any dependent OBJ.
 class TypeServerSource : public TpiSource {
 public:
-  explicit TypeServerSource(MemoryBufferRef M, llvm::pdb::NativeSession *S)
-      : TpiSource(PDB, nullptr), Session(S), MB(M) {}
-
-  // Queue a PDB type server for loading in the COFF Driver
-  static void enqueue(const ObjFile *DependentFile,
-                      const TypeServer2Record &TS);
-
-  // Create an instance
-  static Expected<TypeServerSource *> getInstance(MemoryBufferRef M);
-
-  // Fetch the PDB instance loaded for a corresponding dependent OBJ.
-  static Expected<TypeServerSource *>
-  findFromFile(const ObjFile *DependentFile);
-
-  static std::map<std::string, std::pair<std::string, TypeServerSource *>>
-      Instances;
-
-  // The interface to the PDB (if it was opened successfully)
-  std::unique_ptr<llvm::pdb::NativeSession> Session;
-
-private:
-  MemoryBufferRef MB;
+  TypeServerSource(ObjFile *F) : TpiSource(PDB, F) {}
 };
 
-// This class represents the debug type stream of an OBJ file that depends on a
-// PDB type server (see TypeServerSource).
 class UseTypeServerSource : public TpiSource {
 public:
-  UseTypeServerSource(const ObjFile *F, const TypeServer2Record *TS)
+  UseTypeServerSource(ObjFile *F, TypeServer2Record *TS)
       : TpiSource(UsingPDB, F), TypeServerDependency(*TS) {}
 
   // Information about the PDB type server dependency, that needs to be loaded
@@ -68,20 +31,14 @@ class UseTypeServerSource : public TpiSource {
   TypeServer2Record TypeServerDependency;
 };
 
-// This class represents the debug type stream of a Microsoft precompiled
-// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
-// PDB, before any other OBJs that depend on this. Note that only MSVC generate
-// such files, clang does not.
 class PrecompSource : public TpiSource {
 public:
-  PrecompSource(const ObjFile *F) : TpiSource(PCH, F) {}
+  PrecompSource(ObjFile *F) : TpiSource(PCH, F) {}
 };
 
-// This class represents the debug type stream of an OBJ file that depends on a
-// Microsoft precompiled headers OBJ (see PrecompSource).
 class UsePrecompSource : public TpiSource {
 public:
-  UsePrecompSource(const ObjFile *F, const PrecompRecord *Precomp)
+  UsePrecompSource(ObjFile *F, PrecompRecord *Precomp)
       : TpiSource(UsingPCH, F), PrecompDependency(*Precomp) {}
 
   // Information about the Precomp OBJ dependency, that needs to be loaded in
@@ -92,176 +49,40 @@ class UsePrecompSource : public TpiSource {
 
 static std::vector<std::unique_ptr<TpiSource>> GC;
 
-TpiSource::TpiSource(TpiKind K, const ObjFile *F) : Kind(K), File(F) {
+TpiSource::TpiSource(TpiKind K, ObjFile *F) : Kind(K), File(F) {
   GC.push_back(std::unique_ptr<TpiSource>(this));
 }
 
-TpiSource *lld::coff::makeTpiSource(const ObjFile *F) {
+TpiSource *coff::makeTpiSource(ObjFile *F) {
   return new TpiSource(TpiSource::Regular, F);
 }
 
-TpiSource *lld::coff::makeUseTypeServerSource(const ObjFile *F,
-                                              const TypeServer2Record *TS) {
-  TypeServerSource::enqueue(F, *TS);
-  return new UseTypeServerSource(F, TS);
+TpiSource *coff::makeTypeServerSource(ObjFile *F) {
+  return new TypeServerSource(F);
 }
 
-TpiSource *lld::coff::makePrecompSource(const ObjFile *F) {
-  return new PrecompSource(F);
+TpiSource *coff::makeUseTypeServerSource(ObjFile *F, TypeServer2Record *TS) {
+  return new UseTypeServerSource(F, TS);
 }
 
-TpiSource *lld::coff::makeUsePrecompSource(const ObjFile *F,
-                                           const PrecompRecord *Precomp) {
+TpiSource *coff::makePrecompSource(ObjFile *F) { return new PrecompSource(F); }
+
+TpiSource *coff::makeUsePrecompSource(ObjFile *F, PrecompRecord *Precomp) {
   return new UsePrecompSource(F, Precomp);
 }
 
 namespace lld {
 namespace coff {
 template <>
-const PrecompRecord &retrieveDependencyInfo(const TpiSource *Source) {
+const PrecompRecord &retrieveDependencyInfo(TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPCH);
-  return ((const UsePrecompSource *)Source)->PrecompDependency;
+  return ((UsePrecompSource *)Source)->PrecompDependency;
 }
 
 template <>
-const TypeServer2Record &retrieveDependencyInfo(const TpiSource *Source) {
+const TypeServer2Record &retrieveDependencyInfo(TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPDB);
-  return ((const UseTypeServerSource *)Source)->TypeServerDependency;
+  return ((UseTypeServerSource *)Source)->TypeServerDependency;
 }
 } // namespace coff
 } // namespace lld
-
-std::map<std::string, std::pair<std::string, TypeServerSource *>>
-    TypeServerSource::Instances;
-
-// Make a PDB path assuming the PDB is in the same folder as the OBJ
-static std::string getPdbBaseName(const ObjFile *File, StringRef TSPath) {
-  StringRef LocalPath =
-      !File->ParentName.empty() ? File->ParentName : File->getName();
-  std::string Path = sys::path::parent_path(LocalPath);
-
-  // Currently, type server PDBs are only created by MSVC cl, which only runs
-  // on Windows, so we can assume type server paths are Windows style.
-  return Path + sys::path::filename(TSPath, sys::path::Style::windows).str();
-}
-
-// The casing of the PDB path stamped in the OBJ can differ from the actual path
-// on disk. With this, we ensure to always use lowercase as a key for the
-// PDBInputFile::Instances map, at least on Windows.
-static std::string normalizePdbPath(StringRef path) {
-#if defined(_WIN32)
-  return path.lower();
-#else // LINUX
-  return path;
-#endif
-}
-
-// If existing, return the actual PDB path on disk.
-static Optional<std::string> findPdbPath(StringRef PDBPath,
-                                         const ObjFile *DependentFile) {
-  // Ensure the file exists before anything else. In some cases, if the path
-  // points to a removable device, Driver::enqueuePath() would fail with an
-  // error (EAGAIN, "resource unavailable try again") which we want to skip
-  // silently.
-  if (llvm::sys::fs::exists(PDBPath))
-    return normalizePdbPath(PDBPath);
-  std::string Ret = getPdbBaseName(DependentFile, PDBPath);
-  if (llvm::sys::fs::exists(Ret))
-    return normalizePdbPath(Ret);
-  return None;
-}
-
-// Fetch the PDB instance that was already loaded by the COFF Driver.
-Expected<TypeServerSource *>
-TypeServerSource::findFromFile(const ObjFile *DependentFile) {
-  const TypeServer2Record &TS =
-      retrieveDependencyInfo<TypeServer2Record>(DependentFile->DebugTypesObj);
-
-  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
-  if (!P)
-    return createFileError(TS.Name, errorCodeToError(std::error_code(
-                                        ENOENT, std::generic_category())));
-
-  auto It = TypeServerSource::Instances.find(*P);
-  // The PDB file exists on disk, at this point we expect it to have been
-  // inserted in the map by TypeServerSource::loadPDB()
-  assert(It != TypeServerSource::Instances.end());
-
-  std::pair<std::string, TypeServerSource *> &PDB = It->second;
-
-  if (!PDB.second)
-    return createFileError(
-        *P, createStringError(inconvertibleErrorCode(), PDB.first.c_str()));
-
-  pdb::PDBFile &PDBFile = (PDB.second)->Session->getPDBFile();
-  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
-
-  // Just because a file with a matching name was found doesn't mean it can be
-  // used. The GUID and Age must match between the PDB header and the OBJ
-  // TypeServer2 record. The 'Age' is used by MSVC incremental compilation.
-  if (Info.getGuid() != TS.getGuid() || Info.getAge() != TS.getAge())
-    return createFileError(
-        TS.Name,
-        make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
-
-  return PDB.second;
-}
-
-// FIXME: Temporary interface until PDBLinker::maybeMergeTypeServerPDB() is
-// moved here.
-Expected<llvm::pdb::NativeSession *>
-lld::coff::findTypeServerSource(const ObjFile *F) {
-  Expected<TypeServerSource *> TS = TypeServerSource::findFromFile(F);
-  if (!TS)
-    return TS.takeError();
-  return TS.get()->Session.get();
-}
-
-// Queue a PDB type server for loading in the COFF Driver
-void TypeServerSource::enqueue(const ObjFile *DependentFile,
-                               const TypeServer2Record &TS) {
-  // Start by finding where the PDB is located (either the record path or next
-  // to the OBJ file)
-  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
-  if (!P)
-    return;
-  auto It = TypeServerSource::Instances.emplace(
-      *P, std::pair<std::string, TypeServerSource *>{});
-  if (!It.second)
-    return; // another OBJ already scheduled this PDB for load
-
-  Driver->enqueuePath(*P, false);
-}
-
-// Create an instance of TypeServerSource or an error string if the PDB couldn't
-// be loaded. The error message will be displayed later, when the referring OBJ
-// will be merged in. NOTE - a PDB load failure is not a link error: some
-// debug info will simply be missing from the final PDB - that is the default
-// accepted behavior.
-void lld::coff::loadTypeServerSource(llvm::MemoryBufferRef M) {
-  std::string Path = normalizePdbPath(M.getBufferIdentifier());
-
-  Expected<TypeServerSource *> TS = TypeServerSource::getInstance(M);
-  if (!TS)
-    TypeServerSource::Instances[Path] = {toString(TS.takeError()), nullptr};
-  else
-    TypeServerSource::Instances[Path] = {{}, *TS};
-}
-
-Expected<TypeServerSource *> TypeServerSource::getInstance(MemoryBufferRef M) {
-  std::unique_ptr<llvm::pdb::IPDBSession> ISession;
-  Error Err = pdb::NativeSession::createFromPdb(
-      MemoryBuffer::getMemBuffer(M, false), ISession);
-  if (Err)
-    return std::move(Err);
-
-  std::unique_ptr<llvm::pdb::NativeSession> Session(
-      static_cast<pdb::NativeSession *>(ISession.release()));
-
-  pdb::PDBFile &PDBFile = Session->getPDBFile();
-  Expected<pdb::InfoStream &> Info = PDBFile.getPDBInfoStream();
-  // All PDB Files should have an Info stream.
-  if (!Info)
-    return Info.takeError();
-  return new TypeServerSource(M, Session.release());
-}
diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h
index cb03aba5b0d2d..0505a354257af 100644
--- a/lld/COFF/DebugTypes.h
+++ b/lld/COFF/DebugTypes.h
@@ -10,16 +10,12 @@
 #define LLD_COFF_DEBUGTYPES_H
 
 #include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace codeview {
 class PrecompRecord;
 class TypeServer2Record;
 } // namespace codeview
-namespace pdb {
-class NativeSession;
-}
 } // namespace llvm
 
 namespace lld {
@@ -31,28 +27,23 @@ class TpiSource {
 public:
   enum TpiKind { Regular, PCH, UsingPCH, PDB, UsingPDB };
 
-  TpiSource(TpiKind K, const ObjFile *F);
+  TpiSource(TpiKind K, ObjFile *F);
   virtual ~TpiSource() {}
 
   const TpiKind Kind;
-  const ObjFile *File;
+  ObjFile *File;
 };
 
-TpiSource *makeTpiSource(const ObjFile *F);
-TpiSource *makeUseTypeServerSource(const ObjFile *F,
-                                   const llvm::codeview::TypeServer2Record *TS);
-TpiSource *makePrecompSource(const ObjFile *F);
-TpiSource *makeUsePrecompSource(const ObjFile *F,
-                                const llvm::codeview::PrecompRecord *Precomp);
-
-void loadTypeServerSource(llvm::MemoryBufferRef M);
+TpiSource *makeTpiSource(ObjFile *F);
+TpiSource *makeTypeServerSource(ObjFile *F);
+TpiSource *makeUseTypeServerSource(ObjFile *F,
+                                   llvm::codeview::TypeServer2Record *TS);
+TpiSource *makePrecompSource(ObjFile *F);
+TpiSource *makeUsePrecompSource(ObjFile *F,
+                                llvm::codeview::PrecompRecord *Precomp);
 
 // Temporary interface to get the dependency
-template <typename T> const T &retrieveDependencyInfo(const TpiSource *Source);
-
-// Temporary interface until we move PDBLinker::maybeMergeTypeServerPDB here
-llvm::Expected<llvm::pdb::NativeSession *>
-findTypeServerSource(const ObjFile *F);
+template <typename T> const T &retrieveDependencyInfo(TpiSource *Source);
 
 } // namespace coff
 } // namespace lld
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 7f7fde12980d6..df374f518d94d 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DebugTypes.h"
 #include "Driver.h"
 #include "Config.h"
 #include "ICF.h"
@@ -182,9 +181,6 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB,
   case file_magic::coff_import_library:
     Symtab->addFile(make<ObjFile>(MBRef));
     break;
-  case file_magic::pdb:
-    loadTypeServerSource(MBRef);
-    break;
   case file_magic::coff_cl_gl_object:
     error(Filename + ": is not a native COFF file. Recompile without /GL");
     break;
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index f0c2ee6a7728d..f9448bd2d3b16 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -77,8 +77,6 @@ class LinkerDriver {
 
   MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
 
-  void enqueuePath(StringRef Path, bool WholeArchive);
-
 private:
   std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro
 
@@ -122,6 +120,8 @@ class LinkerDriver {
   void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
                         StringRef ParentName, uint64_t OffsetInArchive);
 
+  void enqueuePath(StringRef Path, bool WholeArchive);
+
   void enqueueTask(std::function<void()> Task);
   bool run();
 
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 39244b1736ddc..242235154d05f 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -175,6 +175,11 @@ class PDBLinker {
 
   llvm::SmallString<128> NativePath;
 
+  /// A list of other PDBs which are loaded during the linking process and which
+  /// we need to keep around since the linking operation may reference pointers
+  /// inside of these PDBs.
+  llvm::SmallVector<std::unique_ptr<pdb::NativeSession>, 2> LoadedPDBs;
+
   std::vector<pdb::SecMapEntry> SectionMap;
 
   /// Type index mappings of type server PDBs that we've loaded so far.
@@ -184,6 +189,10 @@ class PDBLinker {
   /// far.
   std::map<uint32_t, CVIndexMap> PrecompTypeIndexMappings;
 
+  /// List of TypeServer PDBs which cannot be loaded.
+  /// Cached to prevent repeated load attempts.
+  std::map<codeview::GUID, std::string> MissingTypeServerPDBs;
+
   // For statistics
   uint64_t GlobalSymbols = 0;
   uint64_t ModuleSymbols = 0;
@@ -407,26 +416,115 @@ PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) {
   return *ObjectIndexMap;
 }
 
+static Expected<std::unique_ptr<pdb::NativeSession>>
+tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) {
+  // Ensure the file exists before anything else. We want to return ENOENT,
+  // "file not found", even if the path points to a removable device (in which
+  // case the return message would be EAGAIN, "resource unavailable try again")
+  if (!llvm::sys::fs::exists(TSPath))
+    return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(
+      TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
+  if (!MBOrErr)
+    return errorCodeToError(MBOrErr.getError());
+
+  std::unique_ptr<pdb::IPDBSession> ThisSession;
+  if (auto EC = pdb::NativeSession::createFromPdb(
+          MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)),
+                                     /*RequiresNullTerminator=*/false),
+          ThisSession))
+    return std::move(EC);
+
+  std::unique_ptr<pdb::NativeSession> NS(
+      static_cast<pdb::NativeSession *>(ThisSession.release()));
+  pdb::PDBFile &File = NS->getPDBFile();
+  auto ExpectedInfo = File.getPDBInfoStream();
+  // All PDB Files should have an Info stream.
+  if (!ExpectedInfo)
+    return ExpectedInfo.takeError();
+
+  // Just because a file with a matching name was found and it was an actual
+  // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
+  // must match the GUID specified in the TypeServer2 record.
+  if (ExpectedInfo->getGuid() != GuidFromObj)
+    return make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date);
+
+  return std::move(NS);
+}
+
 Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File) {
-  Expected<llvm::pdb::NativeSession *> PDBSession = findTypeServerSource(File);
-  if (!PDBSession)
-    return PDBSession.takeError();
+  const TypeServer2Record &TS =
+      retrieveDependencyInfo<TypeServer2Record>(File->DebugTypesObj);
 
-  pdb::PDBFile &PDBFile = PDBSession.get()->getPDBFile();
-  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
+  const codeview::GUID &TSId = TS.getGuid();
+  StringRef TSPath = TS.getName();
 
-  auto It = TypeServerIndexMappings.emplace(Info.getGuid(), CVIndexMap());
-  CVIndexMap &IndexMap = It.first->second;
-  if (!It.second)
-    return IndexMap; // already merged
+  // First, check if the PDB has previously failed to load.
+  auto PrevErr = MissingTypeServerPDBs.find(TSId);
+  if (PrevErr != MissingTypeServerPDBs.end())
+    return createFileError(
+        TSPath,
+        make_error<StringError>(PrevErr->second, inconvertibleErrorCode()));
+
+  // Second, check if we already loaded a PDB with this GUID. Return the type
+  // index mapping if we have it.
+  auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()});
+  CVIndexMap &IndexMap = Insertion.first->second;
+  if (!Insertion.second)
+    return IndexMap;
 
   // Mark this map as a type server map.
   IndexMap.IsTypeServerMap = true;
 
-  Expected<pdb::TpiStream &> ExpectedTpi = PDBFile.getPDBTpiStream();
+  // Check for a PDB at:
+  // 1. The given file path
+  // 2. Next to the object file or archive file
+  auto ExpectedSession = handleExpected(
+      tryToLoadPDB(TSId, TSPath),
+      [&]() {
+        StringRef LocalPath =
+            !File->ParentName.empty() ? File->ParentName : File->getName();
+        SmallString<128> Path = sys::path::parent_path(LocalPath);
+        // Currently, type server PDBs are only created by cl, which only runs
+        // on Windows, so we can assume type server paths are Windows style.
+        sys::path::append(
+            Path, sys::path::filename(TSPath, sys::path::Style::windows));
+        return tryToLoadPDB(TSId, Path);
+      },
+      [&](std::unique_ptr<ECError> EC) -> Error {
+        auto SysErr = EC->convertToErrorCode();
+        // Only re-try loading if the previous error was "No such file or
+        // directory"
+        if (SysErr.category() == std::generic_category() &&
+            SysErr.value() == ENOENT)
+          return Error::success();
+        return Error(std::move(EC));
+      });
+
+  if (auto E = ExpectedSession.takeError()) {
+    TypeServerIndexMappings.erase(TSId);
+
+    // Flatten the error to a string, for later display, if the error occurs
+    // again on the same PDB.
+    std::string ErrMsg;
+    raw_string_ostream S(ErrMsg);
+    S << E;
+    MissingTypeServerPDBs.emplace(TSId, S.str());
+
+    return createFileError(TSPath, std::move(E));
+  }
+
+  pdb::NativeSession *Session = ExpectedSession->get();
+
+  // Keep a strong reference to this PDB, so that it's safe to hold pointers
+  // into the file.
+  LoadedPDBs.push_back(std::move(*ExpectedSession));
+
+  auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream();
   if (auto E = ExpectedTpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
-  Expected<pdb::TpiStream &> ExpectedIpi = PDBFile.getPDBIpiStream();
+  auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream();
   if (auto E = ExpectedIpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
 
diff --git a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
index 23656d1807cff..dd95a3df8893c 100644
--- a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
+++ b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
@@ -69,7 +69,7 @@ sections:
       - Kind:            LF_TYPESERVER2
         TypeServer2:
           Guid:            '{8DABD2A0-28FF-CB43-9BAF-175B77B76414}'
-          Age:             1
+          Age:             18
           Name:            'pdb-diff-cl.pdb'
   - Name:            '.text$mn'
     Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
diff --git a/lld/test/COFF/pdb-type-server-invalid-signature.yaml b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
index efac72df5bd61..247e00096235c 100644
--- a/lld/test/COFF/pdb-type-server-invalid-signature.yaml
+++ b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
@@ -4,7 +4,7 @@
 # RUN: lld-link %t.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s
 # RUN: cd %S
 
-# CHECK: warning: Cannot use debug info for '{{.*}}.obj'
+# CHECK: warning: Cannot use debug info for {{.*}}.obj
 # CHECK-NEXT: The signature does not match; the file(s) might be out of date
 
 # Also test a valid match
@@ -14,7 +14,7 @@
 # RUN: lld-link %t2.obj -out:%t2.exe -debug -pdb:%t2.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=VALID-SIGNATURE -allow-empty
 # RUN: cd %S
 
-# VALID-SIGNATURE-NOT: warning: Cannot use debug info for '{{.*}}.obj'
+# VALID-SIGNATURE-NOT: warning: Cannot use debug info for {{.*}}.obj
 # VALID-SIGNATURE-NOT: The signature does not match; the file(s) might be out of date
 
 --- !COFF

From 34d8daae539dfdbb3133d18b61caddd57e29adfe Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue, 28 May 2019 21:04:29 +0000
Subject: [PATCH 0407/1176] [AArch64] Handle ISD::LRINT and ISD::LLRINT

This patch optimizes ISD::LRINT and ISD::LLRINT to frintx plus
fcvtzs. It currently only handles the scalar version.

Reviewed By: SjoerdMeijer, mstorsjo

Differential Revision: https://reviews.llvm.org/D62018

llvm-svn: 361877
---
 .../include/llvm/Target/TargetSelectionDAG.td |  2 +
 .../Target/AArch64/AArch64ISelLowering.cpp    |  2 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 13 +++++
 llvm/test/CodeGen/AArch64/llrint-conv.ll      | 16 +++++--
 llvm/test/CodeGen/AArch64/lrint-conv-win.ll   | 48 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/lrint-conv.ll       | 16 +++++--
 6 files changed, 89 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/lrint-conv-win.ll

diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 28a2eb0727a69..646f945872b0c 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -453,6 +453,8 @@ def fround     : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;
 
 def lround     : SDNode<"ISD::LROUND"     , SDTFPToIntOp>;
 def llround    : SDNode<"ISD::LLROUND"    , SDTFPToIntOp>;
+def lrint      : SDNode<"ISD::LRINT"      , SDTFPToIntOp>;
+def llrint     : SDNode<"ISD::LLRINT"     , SDTFPToIntOp>;
 
 def fpround    : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
 def fpextend   : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ac656959bcbaa..d8e52929ffb3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -459,6 +459,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FMAXIMUM, Ty, Legal);
     setOperationAction(ISD::LROUND, Ty, Legal);
     setOperationAction(ISD::LLROUND, Ty, Legal);
+    setOperationAction(ISD::LRINT, Ty, Legal);
+    setOperationAction(ISD::LLRINT, Ty, Legal);
   }
 
   if (Subtarget->hasFullFP16()) {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8b702901d51a9..d9734eb3a1291 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3160,6 +3160,19 @@ let Predicates = [HasFRInt3264] in {
   defm FRINT64X : FRIntNNT<0b11, "frint64x">;
 } // HasFRInt3264
 
+def : Pat<(i32 (lrint f32:$Rn)),
+          (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i32 (lrint f64:$Rn)),
+          (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (lrint f32:$Rn)),
+          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (lrint f64:$Rn)),
+          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (llrint f32:$Rn)),
+          (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (llrint f64:$Rn)),
+          (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+
 //===----------------------------------------------------------------------===//
 // Floating point two operand instructions.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/llrint-conv.ll b/llvm/test/CodeGen/AArch64/llrint-conv.ll
index 365f6b5456de5..fa11b007eeb3d 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
 
 ; CHECK-LABEL: testmsws:
-; CHECK:       bl      llrintf
+; CHECK:       frintx  [[REG:s[0-9]]], s0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK:       ret
 define i32 @testmsws(float %x) {
 entry:
   %0 = tail call i64 @llvm.llrint.f32(float %x)
@@ -10,7 +12,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmsxs:
-; CHECK:       b       llrintf
+; CHECK:       frintx  [[REG:s[0-9]]], s0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK-NEXT:  ret
 define i64 @testmsxs(float %x) {
 entry:
   %0 = tail call i64 @llvm.llrint.f32(float %x)
@@ -18,7 +22,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmswd:
-; CHECK:       bl      llrint
+; CHECK:       frintx  [[REG:d[0-9]]], d0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK:       ret
 define i32 @testmswd(double %x) {
 entry:
   %0 = tail call i64 @llvm.llrint.f64(double %x)
@@ -27,7 +33,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmsxd:
-; CHECK:       b       llrint
+; CHECK:       frintx  [[REG:d[0-9]]], d0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK-nEXT:  ret
 define i64 @testmsxd(double %x) {
 entry:
   %0 = tail call i64 @llvm.llrint.f64(double %x)
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-win.ll b/llvm/test/CodeGen/AArch64/lrint-conv-win.ll
new file mode 100644
index 0000000000000..490f009c3fbab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-win.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=aarch64-windows -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: testmsxs:
+; CHECK:       frintx  [[SREG:s[0-9]+]], s0
+; CHECK-NEXT:  fcvtzs  [[WREG:w[0-9]+]], [[SREG]]
+; CHECK-NEXT:  sxtw    x0, [[WREG]]
+; CHECK-NEXT:  ret
+define i64 @testmsxs(float %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %x)
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testmsws:
+; CHECK:       frintx  [[SREG:s[0-9]+]], s0
+; CHECK-NEXT:  fcvtzs  [[WREG:w[0-9]+]], [[SREG]]
+; CHECK-NEXT:  ret
+define i32 @testmsws(float %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %x)
+  ret i32 %0
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK:       frintx  [[DREG:d[0-9]+]], d0
+; CHECK-NEXT:  fcvtzs  [[WREG:w[0-9]+]], [[DREG]]
+; CHECK-NEXT:  sxtw    x0, [[WREG]]
+; CHECK-NEXT:  ret
+define i64 @testmsxd(double %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %x)
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK:       frintx  [[DREG:d[0-9]+]], d0
+; CHECK-NEXT:  fcvtzs  [[WREG:w[0-9]+]], [[DREG]]
+; CHECK-NEXT:  ret
+define i32 @testmswd(double %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %x)
+  ret i32 %0
+}
+
+declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
+declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll
index a652de9cb3e2c..14d078b96ff1c 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
 
 ; CHECK-LABEL: testmsws:
-; CHECK:       bl      lrintf
+; CHECK:       frintx  [[REG:s[0-9]]], s0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK:       ret
 define i32 @testmsws(float %x) {
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
@@ -10,7 +12,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmsxs:
-; CHECK:       b       lrintf
+; CHECK:       frintx  [[REG:s[0-9]]], s0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK-NEXT:  ret
 define i64 @testmsxs(float %x) {
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
@@ -18,7 +22,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmswd:
-; CHECK:       bl      lrint
+; CHECK:       frintx  [[REG:d[0-9]]], d0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK:       ret
 define i32 @testmswd(double %x) {
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
@@ -27,7 +33,9 @@ entry:
 }
 
 ; CHECK-LABEL: testmsxd:
-; CHECK:       b       lrint
+; CHECK:       frintx  [[REG:d[0-9]]], d0
+; CHECK-NEXT:  fcvtzs  x0, [[REG]]
+; CHECK-NEXT:  ret
 define i64 @testmsxd(double %x) {
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f64(double %x)

From 14689910737b8e63a0ef7caf407d13aa68bbd6f8 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue, 28 May 2019 21:16:04 +0000
Subject: [PATCH 0408/1176] [clang] Handle lrint/llrint builtins

As for other floating-point rounding builtins that can be optimized
when build with -fno-math-errno, this patch adds support for lrint
and llrint.  It currently only optimize for AArch64 backend.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D62019

llvm-svn: 361878
---
 clang/lib/CodeGen/CGBuiltin.cpp    | 16 ++++++++++++++++
 clang/test/CodeGen/builtins.c      |  9 +++++++++
 clang/test/CodeGen/math-builtins.c | 12 ++++++------
 clang/test/CodeGen/math-libcalls.c | 12 ++++++------
 4 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d59e0fc960b3e..bc798cab11e95 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1741,6 +1741,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     case Builtin::BI__builtin_llroundl:
       return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround));
 
+    case Builtin::BIlrint:
+    case Builtin::BIlrintf:
+    case Builtin::BIlrintl:
+    case Builtin::BI__builtin_lrint:
+    case Builtin::BI__builtin_lrintf:
+    case Builtin::BI__builtin_lrintl:
+      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint));
+
+    case Builtin::BIllrint:
+    case Builtin::BIllrintf:
+    case Builtin::BIllrintl:
+    case Builtin::BI__builtin_llrint:
+    case Builtin::BI__builtin_llrintf:
+    case Builtin::BI__builtin_llrintl:
+      return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint));
+
     default:
       break;
     }
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index 1e6d35df73a04..5b482543d2d0e 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -390,6 +390,15 @@ void test_float_builtin_ops(float F, double D, long double LD) {
 
   resli = __builtin_lroundl (LD);
   // CHECK: call i64 @llvm.lround.i64.f80
+
+  resli = __builtin_lrintf (F);
+  // CHECK: call i64 @llvm.lrint.i64.f32
+
+  resli = __builtin_lrint (D);
+  // CHECK: call i64 @llvm.lrint.i64.f64
+
+  resli = __builtin_lrintl (LD);
+  // CHECK: call i64 @llvm.lrint.i64.f80
 }
 
 // __builtin_longjmp isn't supported on all platforms, so only test it on X86.
diff --git a/clang/test/CodeGen/math-builtins.c b/clang/test/CodeGen/math-builtins.c
index cb31288496a73..13e9c13096f27 100644
--- a/clang/test/CodeGen/math-builtins.c
+++ b/clang/test/CodeGen/math-builtins.c
@@ -353,9 +353,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   __builtin_llrint(f);     __builtin_llrintf(f);    __builtin_llrintl(f);
 
-// NO__ERRNO: declare i64 @llrint(double) [[READNONE]]
-// NO__ERRNO: declare i64 @llrintf(float) [[READNONE]]
-// NO__ERRNO: declare i64 @llrintl(x86_fp80) [[READNONE]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
 // HAS_ERRNO: declare i64 @llrint(double) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @llrintf(float) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @llrintl(x86_fp80) [[NOT_READNONE]]
@@ -416,9 +416,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   __builtin_lrint(f);      __builtin_lrintf(f);     __builtin_lrintl(f);
 
-// NO__ERRNO: declare i64 @lrint(double) [[READNONE]]
-// NO__ERRNO: declare i64 @lrintf(float) [[READNONE]]
-// NO__ERRNO: declare i64 @lrintl(x86_fp80) [[READNONE]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
 // HAS_ERRNO: declare i64 @lrint(double) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @lrintf(float) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @lrintl(x86_fp80) [[NOT_READNONE]]
diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c
index 405597e1f0319..97a87beb12eca 100644
--- a/clang/test/CodeGen/math-libcalls.c
+++ b/clang/test/CodeGen/math-libcalls.c
@@ -308,9 +308,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   llrint(f);     llrintf(f);    llrintl(f);
 
-// NO__ERRNO: declare i64 @llrint(double) [[READNONE]]
-// NO__ERRNO: declare i64 @llrintf(float) [[READNONE]]
-// NO__ERRNO: declare i64 @llrintl(x86_fp80) [[READNONE]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.llrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
 // HAS_ERRNO: declare i64 @llrint(double) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @llrintf(float) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @llrintl(x86_fp80) [[NOT_READNONE]]
@@ -371,9 +371,9 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) {
 
   lrint(f);      lrintf(f);     lrintl(f);
 
-// NO__ERRNO: declare i64 @lrint(double) [[READNONE]]
-// NO__ERRNO: declare i64 @lrintf(float) [[READNONE]]
-// NO__ERRNO: declare i64 @lrintl(x86_fp80) [[READNONE]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f64(double) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f32(float) [[READNONE_INTRINSIC]]
+// NO__ERRNO: declare i64 @llvm.lrint.i64.f80(x86_fp80) [[READNONE_INTRINSIC]]
 // HAS_ERRNO: declare i64 @lrint(double) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @lrintf(float) [[NOT_READNONE]]
 // HAS_ERRNO: declare i64 @lrintl(x86_fp80) [[NOT_READNONE]]

From fe23ed2c681413e7baf517c79aee9be130579873 Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Date: Tue, 28 May 2019 21:18:34 +0000
Subject: [PATCH 0409/1176] AMDGPU: Temporary drop s_mul_hi_i/u32 patterns

It introduces performance regressions in several applications.

This has already been submitted downstream.

llvm-svn: 361879
---
 llvm/lib/Target/AMDGPU/SOPInstructions.td | 8 ++------
 llvm/test/CodeGen/AMDGPU/mul.ll           | 5 -----
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 401a560a68330..342293851c35b 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -558,12 +558,8 @@ let SubtargetPredicate = isGFX9Plus in {
     def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">;
   } // End Defs = [SCC]
 
-  let isCommutable = 1 in {
-    def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32",
-      [(set i32:$sdst, (UniformBinFrag<mulhu> SSrc_b32:$src0, SSrc_b32:$src1))]>;
-    def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32",
-      [(set i32:$sdst, (UniformBinFrag<mulhs> SSrc_b32:$src0, SSrc_b32:$src1))]>;
-  }
+  def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
+  def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
 } // End SubtargetPredicate = isGFX9Plus
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll
index d9d51597891cc..f8b4ac906c0b5 100644
--- a/llvm/test/CodeGen/AMDGPU/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.ll
@@ -141,11 +141,6 @@ define amdgpu_kernel void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
 ; crash with a 'failed to select' error.
 
 ; FUNC-LABEL: {{^}}s_mul_i64:
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10-DAG: s_mul_hi_u32
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10: s_endpgm
 define amdgpu_kernel void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %mul = mul i64 %a, %b
   store i64 %mul, i64 addrspace(1)* %out, align 8

From 02afe4e077c480f839c891ea12c274fe2f63ca14 Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Tue, 28 May 2019 21:18:59 +0000
Subject: [PATCH 0410/1176] [CUDA][HIP] Emit dependent libs for host only

Recently D60274 was introduced to allow lld to handle dependent libs. However current
usage of dependent libs (e.g. pragma comment(lib, *) in windows header files) are intended
for host only. Emitting the metadata in device IR causes link error in device path.

Until there is a way to different it dependent libs for device or host, metadata for dependent
libs should be emitted for host only. This patch enforces that.

Differential Revision: https://reviews.llvm.org/D62483

llvm-svn: 361880
---
 clang/lib/CodeGen/CodeGenModule.cpp      | 7 ++++++-
 clang/test/CodeGenCUDA/dependent-libs.cu | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CodeGenCUDA/dependent-libs.cu

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 6daea41928137..8c9e240a680fc 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -457,7 +457,12 @@ void CodeGenModule::Release() {
   // that ELF linkers tend to handle libraries in a more complicated fashion
   // than on other platforms. This forces us to defer handling the dependent
   // libs to the linker.
-  if (!ELFDependentLibraries.empty()) {
+  //
+  // CUDA/HIP device and host libraries are different. Currently there is no
+  // way to differentiate dependent libraries for host or device. Existing
+  // usage of #pragma comment(lib, *) is intended for host libraries on
+  // Windows. Therefore emit llvm.dependent-libraries only for host.
+  if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) {
     auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries");
     for (auto *MD : ELFDependentLibraries)
       NMD->addOperand(MD);
diff --git a/clang/test/CodeGenCUDA/dependent-libs.cu b/clang/test/CodeGenCUDA/dependent-libs.cu
new file mode 100644
index 0000000000000..6f59e667d3b12
--- /dev/null
+++ b/clang/test/CodeGenCUDA/dependent-libs.cu
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s
+// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s
+
+// DEV-NOT: llvm.dependent-libraries
+// HOST: llvm.dependent-libraries
+#pragma comment(lib, "libabc")

From 5b32f60ec31ce136edac6f693538aeb6039f4ad0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 28 May 2019 21:28:24 +0000
Subject: [PATCH 0411/1176] Revert "[CorrelatedValuePropagation] Fix prof
 branch_weights metadata handling for SwitchInst"

This reverts commit 53f2f3286572cb879b3861d7c15480e4d830dd3b.

As reported on D62126, this causes assertion failures if the switch
has incorrect branch_weights metadata, which may happen as a result
of other transforms not handling it correctly yet.

llvm-svn: 361881
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 117 +++++++++--------
 .../CorrelatedValuePropagation/profmd.ll      | 119 ------------------
 2 files changed, 56 insertions(+), 180 deletions(-)
 delete mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 4cb4d21754a10..4e4715be61aed 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -306,11 +306,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
 /// that cannot fire no matter what the incoming edge can safely be removed. If
 /// a case fires on every incoming edge then the entire switch can be removed
 /// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
                           DominatorTree *DT) {
   DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
-  Value *Cond = I->getCondition();
-  BasicBlock *BB = I->getParent();
+  Value *Cond = SI->getCondition();
+  BasicBlock *BB = SI->getParent();
 
   // If the condition was defined in same block as the switch then LazyValueInfo
   // currently won't say anything useful about it, though in theory it could.
@@ -327,72 +327,67 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
   for (auto *Succ : successors(BB))
     SuccessorsCount[Succ]++;
 
-  { // Scope for SwitchInstProfUpdateWrapper. It must not live during
-    // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
-    SwitchInstProfUpdateWrapper SI(*I);
-
-    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
-      ConstantInt *Case = CI->getCaseValue();
-
-      // Check to see if the switch condition is equal to/not equal to the case
-      // value on every incoming edge, equal/not equal being the same each time.
-      LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
-      for (pred_iterator PI = PB; PI != PE; ++PI) {
-        // Is the switch condition equal to the case value?
-        LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
-                                                                Cond, Case, *PI,
-                                                                BB, SI);
-        // Give up on this case if nothing is known.
-        if (Value == LazyValueInfo::Unknown) {
-          State = LazyValueInfo::Unknown;
-          break;
-        }
-
-        // If this was the first edge to be visited, record that all other edges
-        // need to give the same result.
-        if (PI == PB) {
-          State = Value;
-          continue;
-        }
-
-        // If this case is known to fire for some edges and known not to fire for
-        // others then there is nothing we can do - give up.
-        if (Value != State) {
-          State = LazyValueInfo::Unknown;
-          break;
-        }
+  for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+    ConstantInt *Case = CI->getCaseValue();
+
+    // Check to see if the switch condition is equal to/not equal to the case
+    // value on every incoming edge, equal/not equal being the same each time.
+    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
+      // Is the switch condition equal to the case value?
+      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                              Cond, Case, *PI,
+                                                              BB, SI);
+      // Give up on this case if nothing is known.
+      if (Value == LazyValueInfo::Unknown) {
+        State = LazyValueInfo::Unknown;
+        break;
       }
 
-      if (State == LazyValueInfo::False) {
-        // This case never fires - remove it.
-        BasicBlock *Succ = CI->getCaseSuccessor();
-        Succ->removePredecessor(BB);
-        CI = SI.removeCase(CI);
-        CE = SI->case_end();
-
-        // The condition can be modified by removePredecessor's PHI simplification
-        // logic.
-        Cond = SI->getCondition();
-
-        ++NumDeadCases;
-        Changed = true;
-        if (--SuccessorsCount[Succ] == 0)
-          DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
+      // If this was the first edge to be visited, record that all other edges
+      // need to give the same result.
+      if (PI == PB) {
+        State = Value;
         continue;
       }
-      if (State == LazyValueInfo::True) {
-        // This case always fires.  Arrange for the switch to be turned into an
-        // unconditional branch by replacing the switch condition with the case
-        // value.
-        SI->setCondition(Case);
-        NumDeadCases += SI->getNumCases();
-        Changed = true;
+
+      // If this case is known to fire for some edges and known not to fire for
+      // others then there is nothing we can do - give up.
+      if (Value != State) {
+        State = LazyValueInfo::Unknown;
         break;
       }
+    }
 
-      // Increment the case iterator since we didn't delete it.
-      ++CI;
+    if (State == LazyValueInfo::False) {
+      // This case never fires - remove it.
+      BasicBlock *Succ = CI->getCaseSuccessor();
+      Succ->removePredecessor(BB);
+      CI = SI->removeCase(CI);
+      CE = SI->case_end();
+
+      // The condition can be modified by removePredecessor's PHI simplification
+      // logic.
+      Cond = SI->getCondition();
+
+      ++NumDeadCases;
+      Changed = true;
+      if (--SuccessorsCount[Succ] == 0)
+        DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
+      continue;
     }
+    if (State == LazyValueInfo::True) {
+      // This case always fires.  Arrange for the switch to be turned into an
+      // unconditional branch by replacing the switch condition with the case
+      // value.
+      SI->setCondition(Case);
+      NumDeadCases += SI->getNumCases();
+      Changed = true;
+      break;
+    }
+
+    // Increment the case iterator since we didn't delete it.
+    ++CI;
   }
 
   if (Changed)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll b/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
deleted file mode 100644
index 493b4c2273e21..0000000000000
--- a/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
+++ /dev/null
@@ -1,119 +0,0 @@
-; RUN: opt < %s -correlated-propagation -S | FileCheck %s
-
-; Removed several cases from switch.
-define i32 @switch1(i32 %s) {
-; CHECK-LABEL: @switch1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
-;
-entry:
-  %cmp = icmp slt i32 %s, 0
-  br i1 %cmp, label %negative, label %out
-
-negative:
-; CHECK:       negative:
-; CHECK-NEXT:    switch i32 [[S]], label [[OUT]] [
-; CHECK-NEXT:    i32 -2, label [[NEXT:%.*]]
-; CHECK-NEXT:    i32 -1, label [[NEXT]]
-  switch i32 %s, label %out [
-  i32 0, label %out
-  i32 1, label %out
-  i32 -1, label %next
-  i32 -2, label %next
-  i32 2, label %out
-  i32 3, label %out
-; CHECK-NEXT: !prof ![[MD0:[0-9]+]]
-  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6}
-
-out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
-  ret i32 %p
-
-next:
-  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
-  ret i32 %q
-}
-
-; Removed all cases from switch.
-define i32 @switch2(i32 %s) {
-; CHECK-LABEL: @switch2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[S:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[OUT:%.*]]
-;
-entry:
-  %cmp = icmp sgt i32 %s, 0
-  br i1 %cmp, label %positive, label %out
-
-positive:
-  switch i32 %s, label %out [
-  i32 0, label %out
-  i32 -1, label %next
-  i32 -2, label %next
-  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
-
-out:
-  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
-  ret i32 %p
-
-next:
-  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
-  ret i32 %q
-}
-
-; Change switch into conditional branch.
-define i32 @switch3(i32 %s) {
-; CHECK-LABEL: @switch3(
-;
-entry:
-  %cmp = icmp sgt i32 %s, 0
-  br i1 %cmp, label %positive, label %out
-
-positive:
-; CHECK:      positive:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %s, 1
-; CHECK-NEXT:    br i1 [[CMP]], label [[NEXT:%.*]], label [[OUT:%.*]], !prof ![[MD1:[0-9]+]]
-  switch i32 %s, label %out [
-  i32 1, label %next
-  i32 -1, label %next
-  i32 -2, label %next
-  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
-
-out:
-  %p = phi i32 [ -1, %entry ], [ 1, %positive ]
-  ret i32 %p
-
-next:
-  %q = phi i32 [ 0, %positive ], [ 0, %positive ], [ 0, %positive ]
-  ret i32 %q
-}
-
-; Removed all cases from switch.
-define i32 @switch4(i32 %s) {
-; CHECK-LABEL: @switch4(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
-;
-entry:
-  %cmp = icmp slt i32 %s, 0
-  br i1 %cmp, label %negative, label %out
-
-negative:
-; CHECK:       negative:
-; CHECK-NEXT:    br label %out
-  switch i32 %s, label %out [
-  i32 0, label %out
-  i32 1, label %out
-  i32 2, label %out
-  i32 3, label %out
-  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4}
-
-out:
-  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
-  ret i32 %p
-}
-
-; CHECK: ![[MD0]] = !{!"branch_weights", i32 99, i32 4, i32 3}
-; CHECK: ![[MD1]] = !{!"branch_weights", i32 1, i32 99}

From e88173abc01b454f91cec54b50e58e67a5aed263 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur@google.com>
Date: Tue, 28 May 2019 21:45:56 +0000
Subject: [PATCH 0412/1176] [PGO] Handle cases of failing to split critical
 edges

Fix PR41279 where critical edges to EHPad are not split.
The fix is to not instrument those critical edges. We used to be able to know
the size of counters right after MST is computed. With this, we have to
pre-collect the instrument BBs to know the size, and then instrument them.

Differential Revision: https://reviews.llvm.org/D62439

llvm-svn: 361882
---
 .../Instrumentation/PGOInstrumentation.cpp    | 100 ++++++++++--------
 llvm/test/Transforms/PGOProfile/PR41279.ll    |  67 ++++++++++++
 2 files changed, 123 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/PR41279.ll

diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index d5267a86e2da6..6996e7a0502f8 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -573,6 +573,10 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
   // The Minimum Spanning Tree of function CFG.
   CFGMST<Edge, BBInfo> MST;
 
+  // Collect all the BBs that will be instrumented, and store them in
+  // InstrumentBBs.
+  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
   // Give an edge, find the BB that will be instrumented.
   // Return nullptr if there is no BB to be instrumented.
   BasicBlock *getInstrBB(Edge *E);
@@ -629,16 +633,6 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
     if (CreateGlobalVar)
       FuncNameVar = createPGOFuncNameVar(F, FuncName);
   }
-
-  // Return the number of profile counters needed for the function.
-  unsigned getNumCounters() {
-    unsigned NumCounters = 0;
-    for (auto &E : this->MST.AllEdges) {
-      if (!E->InMST && !E->Removed)
-        NumCounters++;
-    }
-    return NumCounters + SIVisitor.getNumOfSelectInsts();
-  }
 };
 
 } // end anonymous namespace
@@ -753,6 +747,24 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
   }
 }
 
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+    std::vector<BasicBlock *> &InstrumentBBs) {
+  // Use a worklist as we will update the vector during the iteration.
+  std::vector<Edge *> EdgeList;
+  EdgeList.reserve(MST.AllEdges.size());
+  for (auto &E : MST.AllEdges)
+    EdgeList.push_back(E.get());
+
+  for (auto &E : EdgeList) {
+    BasicBlock *InstrBB = getInstrBB(E);
+    if (InstrBB)
+      InstrumentBBs.push_back(InstrBB);
+  }
+}
+
 // Given a CFG E to be instrumented, find which BB to place the instrumented
 // code. The function will split the critical edge if necessary.
 template <class Edge, class BBInfo>
@@ -783,9 +795,18 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
                     << " --> " << getBBInfo(DestBB).Index << "\n");
   unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
   BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
-  assert(InstrBB && "Critical edge is not split");
-
+  if (!InstrBB) {
+    LLVM_DEBUG(
+        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+    return nullptr;
+  }
+  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+  MST.addEdge(SrcBB, InstrBB, 0);
+  // Second one: Add new edge of InstrBB->DestBB.
+  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+  NewEdge1.InMST = true;
   E->Removed = true;
+
   return InstrBB;
 }
 
@@ -801,15 +822,14 @@ static void instrumentOneFunc(
 
   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
                                                    BFI, IsCS);
-  unsigned NumCounters = FuncInfo.getNumCounters();
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
   uint32_t I = 0;
   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
-  for (auto &E : FuncInfo.MST.AllEdges) {
-    BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
-    if (!InstrBB)
-      continue;
-
+  for (auto *InstrBB : InstrumentBBs) {
     IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
     assert(Builder.GetInsertPoint() != InstrBB->end() &&
            "Cannot get the Instrumentation point");
@@ -1039,39 +1059,31 @@ class PGOUseFunc {
 // edges and the BB. Return false on error.
 bool PGOUseFunc::setInstrumentedCounts(
     const std::vector<uint64_t> &CountFromProfile) {
+
+  std::vector<BasicBlock *> InstrumentBBs;
+  FuncInfo.getInstrumentBBs(InstrumentBBs);
+  unsigned NumCounters =
+      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
   // The number of counters here should match the number of counters
   // in profile. Return if they mismatch.
-  if (FuncInfo.getNumCounters() != CountFromProfile.size()) {
+  if (NumCounters != CountFromProfile.size()) {
     return false;
   }
-  // Use a worklist as we will update the vector during the iteration.
-  std::vector<PGOUseEdge *> WorkList;
-  for (auto &E : FuncInfo.MST.AllEdges)
-    WorkList.push_back(E.get());
-
   uint32_t I = 0;
-  for (auto &E : WorkList) {
-    BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
-    if (!InstrBB)
-      continue;
+  for (BasicBlock *InstrBB : InstrumentBBs) {
     uint64_t CountValue = CountFromProfile[I++];
-    if (!E->Removed) {
-      getBBInfo(InstrBB).setBBInfoCount(CountValue);
-      E->setEdgeCount(CountValue);
-      continue;
+    UseBBInfo &Info = getBBInfo(InstrBB);
+    Info.setBBInfoCount(CountValue);
+    // If only one in-edge, the edge profile count should be the same as BB
+    // profile count.
+    if (Info.InEdges.size() == 1) {
+      Info.InEdges[0]->setEdgeCount(CountValue);
+    }
+    // If only one out-edge, the edge profile count should be the same as BB
+    // profile count.
+    if (Info.OutEdges.size() == 1) {
+      Info.OutEdges[0]->setEdgeCount(CountValue);
     }
-
-    // Need to add two new edges.
-    BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
-    BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
-    // Add new edge of SrcBB->InstrBB.
-    PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
-    NewEdge.setEdgeCount(CountValue);
-    // Add new edge of InstrBB->DestBB.
-    PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
-    NewEdge1.setEdgeCount(CountValue);
-    NewEdge1.InMST = true;
-    getBBInfo(InstrBB).setBBInfoCount(CountValue);
   }
   ProfileCountSize = CountFromProfile.size();
   CountPosition = I;
diff --git a/llvm/test/Transforms/PGOProfile/PR41279.ll b/llvm/test/Transforms/PGOProfile/PR41279.ll
new file mode 100644
index 0000000000000..373563ba75fbe
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/PR41279.ll
@@ -0,0 +1,67 @@
+; Test that instrumentaiton works fine for the case of failing the split critical edges.
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+
+declare void @f3({ i8*, i64 }*, { i8*, i64 }*, i64)
+declare { i8*, i64 } @f0({ i8*, i64 }*)
+declare i64 @f1()
+declare void @invok2({ i8*, i64 }*, i8* noalias readonly align 1, i64)
+declare void @invok1({ i8*, i64 }*, { i8*, i64 }*, i64)
+declare i32 @__CxxFrameHandler3(...)
+
+define internal void @foo({ i8*, i64 }*, { i8*, i64 }*) personality i32 (...)* @__CxxFrameHandler3 {
+  %3 = alloca i8, align 1
+  store i8 0, i8* %3, align 1
+  %4 = call i64 @f1()
+  %5 = icmp ult i64 %4, 32
+  br i1 %5, label %7, label %13
+
+6:
+  cleanupret from %17 unwind to caller
+; GEN: 6:
+; GEN:  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 2)
+
+7:
+  store i8 1, i8* %3, align 1
+  %8 = call { i8*, i64 } @f0({ i8*, i64 }* %0)
+  %9 = extractvalue { i8*, i64 } %8, 0
+  %10 = extractvalue { i8*, i64 } %8, 1
+  invoke void @invok1({ i8*, i64 }* %1, { i8*, i64 }* %0, i64 1)
+          to label %11 unwind label %16
+; GEN: 7:
+; GEN-NOT: call void @llvm.instrprof.increment
+
+11:
+  store i8 0, i8* %3, align 1
+  invoke void @invok2({ i8*, i64 }* %1, i8* noalias readonly align 1 %9, i64 %10)
+          to label %12 unwind label %16
+; GEN: 11:
+; GEN-NOT: call void @llvm.instrprof.increment
+
+12:
+  store i8 0, i8* %3, align 1
+  br label %14
+; GEN: 12:
+; GEN:  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 1)
+
+13:
+  call void @f3({ i8*, i64 }* %0, { i8*, i64 }* %1, i64 1)
+  br label %14
+; GEN: 13:
+; GEN:  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 0)
+
+14:
+  ret void
+
+15:
+  store i8 0, i8* %3, align 1
+  br label %6
+; GEN: 15:
+; GEN:  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__foo, i32 0, i32 0), i64 60927483247, i32 4, i32 3)
+
+16:
+  %17 = cleanuppad within none []
+  %18 = load i8, i8* %3, align 1
+  %19 = trunc i8 %18 to i1
+  br i1 %19, label %15, label %6
+}

From 81748bae47fdaf280877dfa8d7d309b88b848de9 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 28 May 2019 21:52:34 +0000
Subject: [PATCH 0413/1176] [clangd] Add SourceManager accessor to ParsedAST.
 NFC

llvm-svn: 361883
---
 clang-tools-extra/clangd/ClangdUnit.h         |  7 +++++++
 clang-tools-extra/clangd/XRefs.cpp            | 21 ++++++++-----------
 clang-tools-extra/clangd/refactor/Rename.cpp  |  7 +++----
 clang-tools-extra/clangd/refactor/Tweak.cpp   |  2 +-
 .../refactor/tweaks/RawStringLiteral.cpp      |  5 ++---
 .../clangd/refactor/tweaks/SwapIfBranches.cpp |  2 +-
 .../clangd/unittests/ClangdUnitTests.cpp      |  2 +-
 .../clangd/unittests/SelectionTests.cpp       |  2 +-
 .../clangd/unittests/SymbolCollectorTests.cpp |  5 ++---
 9 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdUnit.h b/clang-tools-extra/clangd/ClangdUnit.h
index 042bb0d6a036f..16246eb27843d 100644
--- a/clang-tools-extra/clangd/ClangdUnit.h
+++ b/clang-tools-extra/clangd/ClangdUnit.h
@@ -95,6 +95,13 @@ class ParsedAST {
   std::shared_ptr<Preprocessor> getPreprocessorPtr();
   const Preprocessor &getPreprocessor() const;
 
+  SourceManager &getSourceManager() {
+    return getASTContext().getSourceManager();
+  }
+  const SourceManager &getSourceManager() const {
+    return getASTContext().getSourceManager();
+  }
+
   /// This function returns top-level decls present in the main file of the AST.
   /// The result does not include the decls that come from the preamble.
   /// (These should be const, but RecursiveASTVisitor requires Decl*).
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index e5e137df21f1b..4b9b95362c769 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -286,7 +286,7 @@ llvm::Optional<Location> makeLocation(ASTContext &AST, SourceLocation TokLoc,
 
 std::vector<LocatedSymbol> locateSymbolAt(ParsedAST &AST, Position Pos,
                                           const SymbolIndex *Index) {
-  const auto &SM = AST.getASTContext().getSourceManager();
+  const auto &SM = AST.getSourceManager();
   auto MainFilePath =
       getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);
   if (!MainFilePath) {
@@ -461,7 +461,7 @@ findRefs(const std::vector<const Decl *> &Decls, ParsedAST &AST) {
 
 std::vector<DocumentHighlight> findDocumentHighlights(ParsedAST &AST,
                                                       Position Pos) {
-  const SourceManager &SM = AST.getASTContext().getSourceManager();
+  const SourceManager &SM = AST.getSourceManager();
   auto Symbols = getSymbolAtPosition(
       AST, getBeginningOfIdentifier(AST, Pos, SM.getMainFileID()));
   auto References = findRefs(Symbols.Decls, AST);
@@ -719,7 +719,7 @@ static HoverInfo getHoverContents(QualType T, const Decl *D,
 /// Generate a \p Hover object given the macro \p MacroDecl.
 static HoverInfo getHoverContents(MacroDecl Decl, ParsedAST &AST) {
   HoverInfo HI;
-  SourceManager &SM = AST.getASTContext().getSourceManager();
+  SourceManager &SM = AST.getSourceManager();
   HI.Name = Decl.Name;
   HI.Kind = indexSymbolKindToSymbolKind(
       index::getSymbolInfoForMacro(*Decl.Info).Kind);
@@ -864,9 +864,8 @@ bool hasDeducedType(ParsedAST &AST, SourceLocation SourceLocationBeg) {
 llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
                                    format::FormatStyle Style) {
   llvm::Optional<HoverInfo> HI;
-  const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  SourceLocation SourceLocationBeg =
-      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
+  SourceLocation SourceLocationBeg = getBeginningOfIdentifier(
+      AST, Pos, AST.getSourceManager().getMainFileID());
   // Identified symbols at a specific position.
   auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
 
@@ -900,7 +899,7 @@ std::vector<Location> findReferences(ParsedAST &AST, Position Pos,
   if (!Limit)
     Limit = std::numeric_limits<uint32_t>::max();
   std::vector<Location> Results;
-  const SourceManager &SM = AST.getASTContext().getSourceManager();
+  const SourceManager &SM = AST.getSourceManager();
   auto MainFilePath =
       getCanonicalPath(SM.getFileEntryForID(SM.getMainFileID()), SM);
   if (!MainFilePath) {
@@ -949,7 +948,7 @@ std::vector<Location> findReferences(ParsedAST &AST, Position Pos,
 }
 
 std::vector<SymbolDetails> getSymbolInfo(ParsedAST &AST, Position Pos) {
-  const SourceManager &SM = AST.getASTContext().getSourceManager();
+  const SourceManager &SM = AST.getSourceManager();
 
   auto Loc = getBeginningOfIdentifier(AST, Pos, SM.getMainFileID());
   auto Symbols = getSymbolAtPosition(AST, Loc);
@@ -1084,10 +1083,8 @@ getTypeAncestors(const CXXRecordDecl &CXXRD, ASTContext &ASTCtx,
 }
 
 const CXXRecordDecl *findRecordTypeAt(ParsedAST &AST, Position Pos) {
-  ASTContext &ASTCtx = AST.getASTContext();
-  const SourceManager &SourceMgr = ASTCtx.getSourceManager();
-  SourceLocation SourceLocationBeg =
-      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
+  SourceLocation SourceLocationBeg = getBeginningOfIdentifier(
+      AST, Pos, AST.getSourceManager().getMainFileID());
   IdentifiedSymbol Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
   if (Symbols.Decls.empty())
     return nullptr;
diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp
index 98d6fd488bb39..46fd19d29f281 100644
--- a/clang-tools-extra/clangd/refactor/Rename.cpp
+++ b/clang-tools-extra/clangd/refactor/Rename.cpp
@@ -45,10 +45,9 @@ renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos,
                  llvm::StringRef NewName) {
   RefactoringResultCollector ResultCollector;
   ASTContext &ASTCtx = AST.getASTContext();
-  const SourceManager &SourceMgr = ASTCtx.getSourceManager();
-  SourceLocation SourceLocationBeg =
-      clangd::getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
-  tooling::RefactoringRuleContext Context(ASTCtx.getSourceManager());
+  SourceLocation SourceLocationBeg = clangd::getBeginningOfIdentifier(
+      AST, Pos, AST.getSourceManager().getMainFileID());
+  tooling::RefactoringRuleContext Context(AST.getSourceManager());
   Context.setASTContext(ASTCtx);
   auto Rename = clang::tooling::RenameOccurrences::initiate(
       Context, SourceRange(SourceLocationBeg), NewName);
diff --git a/clang-tools-extra/clangd/refactor/Tweak.cpp b/clang-tools-extra/clangd/refactor/Tweak.cpp
index 34634e64b6f97..6a19751e375de 100644
--- a/clang-tools-extra/clangd/refactor/Tweak.cpp
+++ b/clang-tools-extra/clangd/refactor/Tweak.cpp
@@ -41,7 +41,7 @@ void validateRegistry() {
 Tweak::Selection::Selection(ParsedAST &AST, unsigned RangeBegin,
                             unsigned RangeEnd)
     : AST(AST), ASTSelection(AST.getASTContext(), RangeBegin, RangeEnd) {
-  auto &SM = AST.getASTContext().getSourceManager();
+  auto &SM = AST.getSourceManager();
   Code = SM.getBufferData(SM.getMainFileID());
   Cursor = SM.getComposedLoc(SM.getMainFileID(), RangeBegin);
 }
diff --git a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp
index e3eaba501922e..7feadd1eb7854 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/RawStringLiteral.cpp
@@ -82,15 +82,14 @@ bool RawStringLiteral::prepare(const Selection &Inputs) {
     return false;
   Str = dyn_cast_or_null<StringLiteral>(N->ASTNode.get<Stmt>());
   return Str &&
-         isNormalString(*Str, Inputs.Cursor,
-                        Inputs.AST.getASTContext().getSourceManager()) &&
+         isNormalString(*Str, Inputs.Cursor, Inputs.AST.getSourceManager()) &&
          needsRaw(Str->getBytes()) && canBeRaw(Str->getBytes());
 }
 
 Expected<tooling::Replacements>
 RawStringLiteral::apply(const Selection &Inputs) {
   return tooling::Replacements(
-      tooling::Replacement(Inputs.AST.getASTContext().getSourceManager(), Str,
+      tooling::Replacement(Inputs.AST.getSourceManager(), Str,
                            ("R\"(" + Str->getBytes() + ")\"").str(),
                            Inputs.AST.getASTContext().getLangOpts()));
 }
diff --git a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp
index 9b0b72d94ca52..12838d2a06a4b 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp
@@ -62,7 +62,7 @@ bool SwapIfBranches::prepare(const Selection &Inputs) {
 
 Expected<tooling::Replacements> SwapIfBranches::apply(const Selection &Inputs) {
   auto &Ctx = Inputs.AST.getASTContext();
-  auto &SrcMgr = Ctx.getSourceManager();
+  auto &SrcMgr = Inputs.AST.getSourceManager();
 
   auto ThenRng = toHalfOpenFileRange(SrcMgr, Ctx.getLangOpts(),
                                      If->getThen()->getSourceRange());
diff --git a/clang-tools-extra/clangd/unittests/ClangdUnitTests.cpp b/clang-tools-extra/clangd/unittests/ClangdUnitTests.cpp
index 2c239ce76acd6..100e92c3c65da 100644
--- a/clang-tools-extra/clangd/unittests/ClangdUnitTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ClangdUnitTests.cpp
@@ -49,7 +49,7 @@ Bar* bar;
     std::string WithPreamble = Preamble + Text;
     Annotations TestCase(WithPreamble);
     auto AST = TestTU::withCode(TestCase.code()).build();
-    const auto &SourceMgr = AST.getASTContext().getSourceManager();
+    const auto &SourceMgr = AST.getSourceManager();
     SourceLocation Actual = getBeginningOfIdentifier(
         AST, TestCase.points().back(), SourceMgr.getMainFileID());
     Position ActualPos = offsetToPosition(
diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
index ac9facca83901..deae9f40b33e5 100644
--- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -37,7 +37,7 @@ SelectionTree makeSelectionTree(const StringRef MarkedCode, ParsedAST &AST) {
 Range nodeRange(const SelectionTree::Node *N, ParsedAST &AST) {
   if (!N)
     return Range{};
-  SourceManager &SM = AST.getASTContext().getSourceManager();
+  SourceManager &SM = AST.getSourceManager();
   StringRef Buffer = SM.getBufferData(SM.getMainFileID());
   SourceRange SR = N->ASTNode.getSourceRange();
   SR.setBegin(SM.getFileLoc(SR.getBegin()));
diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index d372b1d672280..e422f5ca82b53 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -123,11 +123,10 @@ class ShouldCollectSymbolTest : public ::testing::Test {
     assert(AST.hasValue());
     const NamedDecl &ND =
         Qualified ? findDecl(*AST, Name) : findUnqualifiedDecl(*AST, Name);
-    ASTContext& Ctx = AST->getASTContext();
-    const SourceManager& SM = Ctx.getSourceManager();
+    const SourceManager& SM = AST->getSourceManager();
     bool MainFile = SM.isWrittenInMainFile(SM.getExpansionLoc(ND.getBeginLoc()));
     return SymbolCollector::shouldCollectSymbol(
-        ND, Ctx, SymbolCollector::Options(), MainFile);
+        ND, AST->getASTContext(), SymbolCollector::Options(), MainFile);
   }
 
 protected:

From 551465859113a609e72b8de907f8977ff28c234a Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Tue, 28 May 2019 22:09:12 +0000
Subject: [PATCH 0414/1176] [WebAssembly] Support for atomic fences

Summary:
This adds support for translation of LLVM IR fence instruction. We
convert a singlethread fence to a pseudo compiler barrier which becomes
0 instructions in final binary, and a thread fence to an idempotent
atomicrmw instruction to a memory address.

Reviewers: dschuff, jfb, sunfish, tlively

Subscribers: sbc100, jgravelle-google, llvm-commits

Differential Revision: https://reviews.llvm.org/D50277

llvm-svn: 361884
---
 .../WebAssembly/WebAssemblyAsmPrinter.cpp     |  4 +
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   | 97 ++++++++++++++++++-
 .../WebAssembly/WebAssemblyInstrAtomics.td    | 10 ++
 llvm/test/CodeGen/WebAssembly/atomic-fence.ll | 47 +++++++++
 4 files changed, 154 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/atomic-fence.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 565438d7e0e9b..7f9d41da39783 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -369,6 +369,10 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       OutStreamer->AddBlankLine();
     }
     break;
+  case WebAssembly::COMPILER_FENCE:
+    // This is a compiler barrier that prevents instruction reordering during
+    // backend compilation, and should not be emitted.
+    break;
   case WebAssembly::EXTRACT_EXCEPTION_I32:
   case WebAssembly::EXTRACT_EXCEPTION_I32_S:
     // These are pseudo instructions that simulates popping values from stack.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 852e2e72f97f9..bd699d92f76c7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -77,14 +77,103 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
     return;
   }
 
-  // Few custom selection stuff. If we need WebAssembly-specific selection,
-  // uncomment this block add corresponding case statements.
-  /*
+  // Few custom selection stuff.
+  SDLoc DL(Node);
+  MachineFunction &MF = CurDAG->getMachineFunction();
   switch (Node->getOpcode()) {
+  case ISD::ATOMIC_FENCE: {
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
+      break;
+
+    uint64_t SyncScopeID =
+        cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+    switch (SyncScopeID) {
+    case SyncScope::SingleThread: {
+      // We lower a single-thread fence to a pseudo compiler barrier instruction
+      // preventing instruction reordering. This will not be emitted in final
+      // binary.
+      MachineSDNode *Fence =
+          CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+                                 DL,                 // debug loc
+                                 MVT::Other,         // outchain type
+                                 Node->getOperand(0) // inchain
+          );
+      ReplaceNode(Node, Fence);
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+
+    case SyncScope::System: {
+      // For non-emscripten systems, we have not decided on what we should
+      // traslate fences to yet.
+      if (!Subtarget->getTargetTriple().isOSEmscripten())
+        report_fatal_error(
+            "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
+
+      // Wasm does not have a fence instruction, but because all atomic
+      // instructions in wasm are sequentially consistent, we translate a
+      // fence to an idempotent atomic RMW instruction to a linear memory
+      // address. All atomic instructions in wasm are sequentially consistent,
+      // but this is to ensure a fence also prevents reordering of non-atomic
+      // instructions in the VM. Even though LLVM IR's fence instruction does
+      // not say anything about its relationship with non-atomic instructions,
+      // we think this is more user-friendly.
+      //
+      // While any address can work, here we use a value stored in
+      // __stack_pointer wasm global because there's high chance that area is
+      // in cache.
+      //
+      // So the selected instructions will be in the form of:
+      //   %addr = get_global $__stack_pointer
+      //   %0 = i32.const 0
+      //   i32.atomic.rmw.or %addr, %0
+      SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
+          "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
+      MachineSDNode *GetGlobal =
+          CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
+                                 DL,                          // debug loc
+                                 MVT::i32,                    // result type
+                                 StackPtrSym // __stack_pointer symbol
+          );
+
+      SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      auto *MMO = MF.getMachineMemOperand(
+          MachinePointerInfo::getUnknownStack(MF),
+          // FIXME Volatile isn't really correct, but currently all LLVM
+          // atomic instructions are treated as volatiles in the backend, so
+          // we should be consistent.
+          MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+              MachineMemOperand::MOStore,
+          4, 4, AAMDNodes(), nullptr, SyncScope::System,
+          AtomicOrdering::SequentiallyConsistent);
+      MachineSDNode *Const0 =
+          CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
+      MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
+          WebAssembly::ATOMIC_RMW_OR_I32, // opcode
+          DL,                             // debug loc
+          MVT::i32,                       // result type
+          MVT::Other,                     // outchain type
+          {
+              Zero,                  // alignment
+              Zero,                  // offset
+              SDValue(GetGlobal, 0), // __stack_pointer
+              SDValue(Const0, 0),    // OR with 0 to make it idempotent
+              Node->getOperand(0)    // inchain
+          });
+
+      CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
+      ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
+      CurDAG->RemoveDeadNode(Node);
+      return;
+    }
+    default:
+      llvm_unreachable("Unknown scope!");
+    }
+  }
+
   default:
     break;
   }
-  */
 
   // Select the default instruction.
   SelectCode(Node);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 86ed5eff95583..e85aa57efc424 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -887,3 +887,13 @@ defm : TerRMWTruncExtPattern<
   ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
   ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
   ATOMIC_RMW32_U_CMPXCHG_I64>;
+
+//===----------------------------------------------------------------------===//
+// Atomic fences
+//===----------------------------------------------------------------------===//
+
+// A compiler fence instruction that prevents reordering of instructions.
+let Defs = [ARGUMENTS] in {
+let isPseudo = 1, hasSideEffects = 1 in
+defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
+} // Defs = [ARGUMENTS]
diff --git a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll
new file mode 100644
index 0000000000000..22084e48e27c6
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
+; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
+; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
+; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes
+
+; A multithread fence turns into 'global.get $__stack_pointer' followed by an
+; idempotent atomicrmw instruction.
+; CHECK-LABEL: multithread_fence:
+; CHECK:      global.get  $push[[SP:[0-9]+]]=, __stack_pointer
+; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
+; CHECK-NEXT: i32.atomic.rmw.or  $drop=, 0($pop[[SP]]), $pop[[ZERO]]
+; NOATOMIC-NOT: i32.atomic.rmw.or
+define void @multithread_fence() {
+  fence seq_cst
+  ret void
+}
+
+; Fences with weaker memory orderings than seq_cst should be treated the same
+; because atomic memory access in wasm are sequentially consistent.
+; CHECK-LABEL: multithread_weak_fence:
+; CHECK:  global.get  $push{{.+}}=, __stack_pointer
+; CHECK:  i32.atomic.rmw.or
+; CHECK:  i32.atomic.rmw.or
+; CHECK:  i32.atomic.rmw.or
+define void @multithread_weak_fence() {
+  fence acquire
+  fence release
+  fence acq_rel
+  ret void
+}
+
+; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
+; that acts as a compiler barrier. The barrier should not be emitted to .s file.
+; CHECK-LABEL: singlethread_fence:
+; CHECK-NOT:  compiler_fence
+define void @singlethread_fence() {
+  fence syncscope("singlethread") seq_cst
+  fence syncscope("singlethread") acquire
+  fence syncscope("singlethread") release
+  fence syncscope("singlethread") acq_rel
+  ret void
+}

From 7e48b406ef5ef2208c75874f7751a786e748706f Mon Sep 17 00:00:00 2001
From: "Michael J. Spencer" <bigcheesegs@gmail.com>
Date: Tue, 28 May 2019 22:21:47 +0000
Subject: [PATCH 0415/1176] [Driver] Fix -working-directory issues

Currently the `-working-directory` option does not actually impact the working
directory for all of the clang driver, it only impacts how files are looked up
to make sure they exist.  This means that that clang passes the wrong paths
to -fdebug-compilation-dir and -coverage-notes-file.

This patch fixes that by changing all the places in the driver where we convert
to absolute paths to use the VFS, and then calling setCurrentWorkingDirectory on
the VFS.  This also changes the default VFS for `Driver` to use a virtualized
working directory, instead of changing the process's working directory.

Differential Revision: https://reviews.llvm.org/D62271

llvm-svn: 361885
---
 .../clang/Basic/DiagnosticDriverKinds.td      |  2 ++
 clang/lib/Driver/Driver.cpp                   | 24 ++++++++-----------
 clang/lib/Driver/ToolChains/Clang.cpp         | 21 +++++++---------
 clang/test/Driver/working-directory.c         | 10 +++++++-
 4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 2ece1f0f57890..ffecccba16a15 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -91,6 +91,8 @@ def err_no_external_assembler : Error<
   "there is no external assembler that can be used on this platform">;
 def err_drv_unable_to_remove_file : Error<
   "unable to remove file: %0">;
+def err_drv_unable_to_set_working_directory : Error <
+  "unable to set working directory: %0">;
 def err_drv_command_failure : Error<
   "unable to execute command: %0">;
 def err_drv_invalid_darwin_version : Error<
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 0f7bbb6eb0fbc..4c153bf3348bd 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -133,7 +133,7 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
 
   // Provide a sane fallback if no VFS is specified.
   if (!this->VFS)
-    this->VFS = llvm::vfs::getRealFileSystem();
+    this->VFS = llvm::vfs::createPhysicalFileSystem().release();
 
   Name = llvm::sys::path::filename(ClangExecutable);
   Dir = llvm::sys::path::parent_path(ClangExecutable);
@@ -1005,6 +1005,11 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
     }
   }
 
+  // Check for working directory option before accessing any files
+  if (Arg *WD = Args.getLastArg(options::OPT_working_directory))
+    if (std::error_code EC = VFS->setCurrentWorkingDirectory(WD->getValue()))
+      Diag(diag::err_drv_unable_to_set_working_directory) << WD->getValue();
+
   // FIXME: This stuff needs to go into the Compilation, not the driver.
   bool CCCPrintPhases;
 
@@ -1984,20 +1989,11 @@ bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
   if (Value == "-")
     return true;
 
-  SmallString<64> Path(Value);
-  if (Arg *WorkDir = Args.getLastArg(options::OPT_working_directory)) {
-    if (!llvm::sys::path::is_absolute(Path)) {
-      SmallString<64> Directory(WorkDir->getValue());
-      llvm::sys::path::append(Directory, Value);
-      Path.assign(Directory);
-    }
-  }
-
-  if (getVFS().exists(Path))
+  if (getVFS().exists(Value))
     return true;
 
   if (IsCLMode()) {
-    if (!llvm::sys::path::is_absolute(Twine(Path)) &&
+    if (!llvm::sys::path::is_absolute(Twine(Value)) &&
         llvm::sys::Process::FindInEnvPath("LIB", Value))
       return true;
 
@@ -2023,12 +2019,12 @@ bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value,
     if (getOpts().findNearest(Value, Nearest, IncludedFlagsBitmask,
                               ExcludedFlagsBitmask) <= 1) {
       Diag(clang::diag::err_drv_no_such_file_with_suggestion)
-          << Path << Nearest;
+          << Value << Nearest;
       return false;
     }
   }
 
-  Diag(clang::diag::err_drv_no_such_file) << Path;
+  Diag(clang::diag::err_drv_no_such_file) << Value;
   return false;
 }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index d76e175959835..3009bc8d292b1 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -616,11 +616,11 @@ static bool shouldUseLeafFramePointer(const ArgList &Args,
 }
 
 /// Add a CC1 option to specify the debug compilation directory.
-static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs) {
-  SmallString<128> cwd;
-  if (!llvm::sys::fs::current_path(cwd)) {
+static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs,
+                               const llvm::vfs::FileSystem &VFS) {
+  if (llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory()) {
     CmdArgs.push_back("-fdebug-compilation-dir");
-    CmdArgs.push_back(Args.MakeArgString(cwd));
+    CmdArgs.push_back(Args.MakeArgString(*CWD));
   }
 }
 
@@ -885,13 +885,8 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
       else
         OutputFilename = llvm::sys::path::filename(Output.getBaseInput());
       SmallString<128> CoverageFilename = OutputFilename;
-      if (llvm::sys::path::is_relative(CoverageFilename)) {
-        SmallString<128> Pwd;
-        if (!llvm::sys::fs::current_path(Pwd)) {
-          llvm::sys::path::append(Pwd, CoverageFilename);
-          CoverageFilename.swap(Pwd);
-        }
-      }
+      if (llvm::sys::path::is_relative(CoverageFilename))
+        (void)D.getVFS().makeAbsolute(CoverageFilename);
       llvm::sys::path::replace_extension(CoverageFilename, "gcno");
       CmdArgs.push_back(Args.MakeArgString(CoverageFilename));
 
@@ -4354,7 +4349,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fno-autolink");
 
   // Add in -fdebug-compilation-dir if necessary.
-  addDebugCompDirArg(Args, CmdArgs);
+  addDebugCompDirArg(Args, CmdArgs, D.getVFS());
 
   addDebugPrefixMapArg(D, Args, CmdArgs);
 
@@ -6065,7 +6060,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo
                                : codegenoptions::NoDebugInfo);
     // Add the -fdebug-compilation-dir flag if needed.
-    addDebugCompDirArg(Args, CmdArgs);
+    addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());
 
     addDebugPrefixMapArg(getToolChain().getDriver(), Args, CmdArgs);
 
diff --git a/clang/test/Driver/working-directory.c b/clang/test/Driver/working-directory.c
index 15ba8f00bd126..fbd4ed4f9e10c 100644
--- a/clang/test/Driver/working-directory.c
+++ b/clang/test/Driver/working-directory.c
@@ -1,3 +1,11 @@
 // RUN: %clang -### -working-directory /no/such/dir/ input 2>&1 | FileCheck %s
+// RUN: %clang -### -working-directory %p/Inputs no_such_file.cpp -c 2>&1 | FileCheck %s --check-prefix=CHECK_NO_FILE
+// RUN: %clang -### -working-directory %p/Inputs pchfile.cpp -c 2>&1 | FileCheck %s --check-prefix=CHECK_WORKS
 
-//CHECK: no such file or directory: '/no/such/dir/input'
+// CHECK: unable to set working directory: /no/such/dir/
+
+// CHECK_NO_FILE: no such file or directory: 'no_such_file.cpp'
+
+// CHECK_WORKS: "-coverage-notes-file" "{{[^"]+}}test{{/|\\\\}}Driver{{/|\\\\}}Inputs{{/|\\\\}}pchfile.gcno"
+// CHECK_WORKS: "-working-directory" "{{[^"]+}}test{{/|\\\\}}Driver{{/|\\\\}}Inputs"
+// CHECK_WORKS: "-fdebug-compilation-dir" "{{[^"]+}}test{{/|\\\\}}Driver{{/|\\\\}}Inputs"

From d02da8f42c2eb60dcf97120e2baeced43c1ec6e6 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 28 May 2019 22:33:30 +0000
Subject: [PATCH 0416/1176] [SymbolFileDWARF] Remove unused member (NFC)

Removes the unused debug line instance.

llvm-svn: 361886
---
 lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 2 +-
 lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h   | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 8836de4ac5ee1..a015445198ce6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -358,7 +358,7 @@ SymbolFileDWARF::SymbolFileDWARF(ObjectFile *objfile,
       m_debug_map_module_wp(), m_debug_map_symfile(nullptr),
       m_context(objfile->GetModule()->GetSectionList(), dwo_section_list),
       m_data_debug_loc(), m_data_debug_ranges(), m_data_debug_rnglists(),
-      m_abbr(), m_info(), m_line(), m_fetched_external_modules(false),
+      m_abbr(), m_info(), m_fetched_external_modules(false),
       m_supports_DW_AT_APPLE_objc_complete_type(eLazyBoolCalculate), m_ranges(),
       m_unique_ast_type_map() {}
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index 28cfe1a9c695f..ea11164700f00 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -453,11 +453,9 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
   DWARFDataSegment m_data_debug_rnglists;
 
   // The unique pointer items below are generated on demand if and when someone
-  // accesses
-  // them through a non const version of this class.
+  // accesses them through a non const version of this class.
   std::unique_ptr<DWARFDebugAbbrev> m_abbr;
   std::unique_ptr<DWARFDebugInfo> m_info;
-  std::unique_ptr<DWARFDebugLine> m_line;
   std::unique_ptr<GlobalVariableMap> m_global_aranges_up;
 
   typedef std::unordered_map<lldb::offset_t, lldb_private::DebugMacrosSP>

From 1efbe67414fd66e74050fb02100f04265e0a51ae Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Tue, 28 May 2019 22:42:32 +0000
Subject: [PATCH 0417/1176] Fix an incorrect 'Throws' in the regex code. Add a
 test for the new behavior. Reviewed as https://reviews.llvm.org/D61828.
 Thanks to Mark for the catch and the fix.

llvm-svn: 361887
---
 libcxx/include/regex                          |  2 +-
 .../re.regex.construct/bad_range.pass.cpp     | 38 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp

diff --git a/libcxx/include/regex b/libcxx/include/regex
index 1f397cd41f7d3..a0e3ba61e71c4 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -2306,7 +2306,7 @@ public:
             else
             {
                 if (__b.size() != 1 || __e.size() != 1)
-                    __throw_regex_error<regex_constants::error_collate>();
+                    __throw_regex_error<regex_constants::error_range>();
                 if (__icase_)
                 {
                     __b[0] = __traits_.translate_nocase(__b[0]);
diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
new file mode 100644
index 0000000000000..198cea87d142b
--- /dev/null
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: libcpp-no-exceptions
+// <regex>
+
+// template <class charT, class traits = regex_traits<charT>> class basic_regex;
+
+// template <class ST, class SA>
+//    basic_regex(const basic_string<charT, ST, SA>& s);
+
+#include <regex>
+#include <cassert>
+#include "test_macros.h"
+
+static bool error_range_thrown(const char *pat)
+{
+    bool result = false;
+    try {
+        std::regex re(pat);
+    } catch (const std::regex_error &ex) {
+        result = (ex.code() == std::regex_constants::error_range);
+    }
+    return result;
+}
+
+int main(int, char**)
+{
+    assert(error_range_thrown(R"([\w-a])"));
+    assert(error_range_thrown(R"([a-\w])"));
+
+  return 0;
+}

From b73ea75b384df86d6db36aff02e7f5e5f744c48d Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Tue, 28 May 2019 22:52:49 +0000
Subject: [PATCH 0418/1176] [AArch64][GlobalISel] Select FCMPSri/FCMPDri when
 comparing against 0.0

Add support for selecting FCMPSri and FCMPDri when comparing against 0.0, and
factor out opcode selection for G_FCMP into its own function.

Add a test to show that we don't do this with other immediates.

Differential Revision: https://reviews.llvm.org/D62539

llvm-svn: 361888
---
 .../AArch64/AArch64InstructionSelector.cpp    | 40 ++++++++-----
 .../AArch64/GlobalISel/select-fcmp.mir        | 56 +++++++++++++++++++
 2 files changed, 83 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index bb878ef2f5c30..5fc272707f50e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -741,6 +741,20 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
+/// Helper function to select the opcode for a G_FCMP.
+static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
+  // If this is a compare against +0.0, then we don't have to explicitly
+  // materialize a constant.
+  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+  if (OpSize != 32 && OpSize != 64)
+    return 0;
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+                              {AArch64::FCMPSri, AArch64::FCMPDri}};
+  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
+}
+
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
@@ -1845,15 +1859,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       return false;
     }
 
-    unsigned CmpOpc = 0;
-    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
-    if (CmpTy == LLT::scalar(32)) {
-      CmpOpc = AArch64::FCMPSrr;
-    } else if (CmpTy == LLT::scalar(64)) {
-      CmpOpc = AArch64::FCMPDrr;
-    } else {
+    unsigned CmpOpc = selectFCMPOpc(I, MRI);
+    if (!CmpOpc)
       return false;
-    }
 
     // FIXME: regbank
 
@@ -1861,9 +1869,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     changeFCMPPredToAArch64CC(
         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
 
-    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
-                               .addUse(I.getOperand(2).getReg())
-                               .addUse(I.getOperand(3).getReg());
+    // Partially build the compare. Decide if we need to add a use for the
+    // third operand based off whether or not we're comparing against 0.0.
+    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                     .addUse(I.getOperand(2).getReg());
+
+    // If we don't have an immediate compare, then we need to add a use of the
+    // register which wasn't used for the immediate.
+    // Note that the immediate will always be the last operand.
+    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
 
     const unsigned DefReg = I.getOperand(0).getReg();
     unsigned Def1Reg = DefReg;
@@ -1893,8 +1908,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
     }
-
-    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
 
     I.eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
new file mode 100644
index 0000000000000..c3e9ea5bc5803
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# Verify that we get FCMPSri when we compare against 0.0 and that we get
+# FCMPSrr otherwise.
+
+...
+---
+name:            zero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: zero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            notzero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: notzero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[FMOVSi:%[0-9]+]]:fpr32 = FMOVSi 112
+    ; CHECK: FCMPSrr [[COPY]], [[FMOVSi]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 1.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0

From 75d38f1e4894b3835258810847c59df78c42d549 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 28 May 2019 23:03:33 +0000
Subject: [PATCH 0419/1176] Add debuginfo-tests that use cdb on Windows

This is an initial prototype of how we can run debugger integration
tests on Windows. cdb and windbg share a command language and debugger
engine. Visual Studio has its own, but we should at least be able to use
cdb as the basis for optimized debug info integration tests.

There's a lot of work to do here still. For example:
- Make fewer assumptions about the SDK location
- Don't assume x64 (important, I need x86 testing)
- More environment isolation, have lit setup vcvars instead of passing
  LIB and INCLUDE down.
- Write a .py file to replace the grep+sed RUN line

But, this seemed like a good enough concept to commit as is, since it's
useful to me already.

Reviewers: aprantl, zturner

Differential Revision: https://reviews.llvm.org/D54187

llvm-svn: 361889
---
 debuginfo-tests/CMakeLists.txt              |  3 ++
 debuginfo-tests/lit.cfg.py                  | 35 ++++++++++++++++++---
 debuginfo-tests/lit.site.cfg.py.in          |  1 +
 debuginfo-tests/win_cdb/README.txt          |  6 ++++
 debuginfo-tests/win_cdb/hello.c             | 14 +++++++++
 debuginfo-tests/win_cdb/lit.local.cfg.py    |  2 ++
 debuginfo-tests/win_cdb/realigned-frame.cpp | 34 ++++++++++++++++++++
 7 files changed, 90 insertions(+), 5 deletions(-)
 create mode 100644 debuginfo-tests/win_cdb/README.txt
 create mode 100644 debuginfo-tests/win_cdb/hello.c
 create mode 100644 debuginfo-tests/win_cdb/lit.local.cfg.py
 create mode 100644 debuginfo-tests/win_cdb/realigned-frame.cpp

diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt
index fbab61c527da0..db35d5d5960f4 100644
--- a/debuginfo-tests/CMakeLists.txt
+++ b/debuginfo-tests/CMakeLists.txt
@@ -13,6 +13,9 @@ set(DEBUGINFO_TEST_DEPS
   not
   )
 
+# Indicate if this is an MSVC environment.
+pythonize_bool(MSVC)
+
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
diff --git a/debuginfo-tests/lit.cfg.py b/debuginfo-tests/lit.cfg.py
index a806c125555bd..c47f453f74cf6 100644
--- a/debuginfo-tests/lit.cfg.py
+++ b/debuginfo-tests/lit.cfg.py
@@ -38,6 +38,36 @@
 # test_exec_root: The root path where tests should be run.
 config.test_exec_root = config.debuginfo_tests_obj_root
 
+tools = [
+    ToolSubst('%test_debuginfo', command=os.path.join(
+        config.debuginfo_tests_src_root, 'test_debuginfo.pl')),
+]
+
+def get_required_attr(config, attr_name):
+  attr_value = getattr(config, attr_name, None)
+  if attr_value == None:
+    lit_config.fatal(
+      "No attribute %r in test configuration! You may need to run "
+      "tests from your build directory or add this attribute "
+      "to lit.site.cfg " % attr_name)
+  return attr_value
+
+# If this is an MSVC environment, the tests at the root of the tree are
+# unsupported. The local win_cdb test suite, however, is supported.
+is_msvc = get_required_attr(config, "is_msvc")
+if is_msvc:
+    # FIXME: We should add some llvm lit utility code to find the Windows SDK
+    # and set up the environment appopriately.
+    win_sdk = 'C:/Program Files (x86)/Windows Kits/10/'
+    arch = 'x64'
+    config.unsupported = True
+    llvm_config.with_system_environment(['LIB', 'LIBPATH', 'INCLUDE'])
+    # Clear _NT_SYMBOL_PATH to prevent cdb from attempting to load symbols from
+    # the network.
+    llvm_config.with_environment('_NT_SYMBOL_PATH', '')
+    tools.append(ToolSubst('%cdb', '"%s"' % os.path.join(win_sdk, 'Debuggers',
+                                                         arch, 'cdb.exe')))
+
 llvm_config.use_default_substitutions()
 
 # clang_src_dir is not used by these tests, but is required by
@@ -53,11 +83,6 @@
 
 tool_dirs = [config.llvm_tools_dir]
 
-tools = [
-    ToolSubst('%test_debuginfo', command=os.path.join(
-        config.debuginfo_tests_src_root, 'test_debuginfo.pl')),
-]
-
 llvm_config.add_tool_substitutions(tools, tool_dirs)
 
 lit.util.usePlatformSdkOnDarwin(config, lit_config)
diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in
index 70169ddb1d7e3..491f4546c1d6c 100644
--- a/debuginfo-tests/lit.site.cfg.py.in
+++ b/debuginfo-tests/lit.site.cfg.py.in
@@ -17,6 +17,7 @@ config.has_lld = lit.util.pythonize_bool("@DEBUGINFO_TESTS_HAS_LLD@")
 config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.host_arch = "@HOST_ARCH@"
+config.is_msvc = @MSVC_PYBOOL@
 
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 
diff --git a/debuginfo-tests/win_cdb/README.txt b/debuginfo-tests/win_cdb/README.txt
new file mode 100644
index 0000000000000..ebb90dfdc1dca
--- /dev/null
+++ b/debuginfo-tests/win_cdb/README.txt
@@ -0,0 +1,6 @@
+These are debug info integration tests similar to the ones in the parent
+directory, except that these are designed to test compatibility between clang,
+lld, and cdb, the command line debugger that ships as part of the Microsoft
+Windows SDK. The debugger command language that cdb uses is very different from
+gdb and LLDB, so it's useful to be able to write some tests directly in the cdb
+command language.
diff --git a/debuginfo-tests/win_cdb/hello.c b/debuginfo-tests/win_cdb/hello.c
new file mode 100644
index 0000000000000..00a15e8dbede6
--- /dev/null
+++ b/debuginfo-tests/win_cdb/hello.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cl %s -o %t.exe -fuse-ld=lld -Z7
+// RUN: grep DE[B]UGGER: %s | sed -e 's/.*DE[B]UGGER: //' > %t.script
+// RUN: %cdb -cf %t.script %t.exe | FileCheck %s --check-prefixes=DEBUGGER,CHECK
+
+#include <stdio.h>
+int main() {
+  printf("hello world\n");
+  int x = 42;
+  __debugbreak();
+  // DEBUGGER: g
+  // DEBUGGER: dv
+  // CHECK: x = 0n42
+}
+// DEBUGGER: q
diff --git a/debuginfo-tests/win_cdb/lit.local.cfg.py b/debuginfo-tests/win_cdb/lit.local.cfg.py
new file mode 100644
index 0000000000000..e4d61b1fbb635
--- /dev/null
+++ b/debuginfo-tests/win_cdb/lit.local.cfg.py
@@ -0,0 +1,2 @@
+# The win_cdb tests are supported when cmake was run in an MSVC environment.
+config.unsupported = not config.is_msvc
diff --git a/debuginfo-tests/win_cdb/realigned-frame.cpp b/debuginfo-tests/win_cdb/realigned-frame.cpp
new file mode 100644
index 0000000000000..a964b43e0cbd4
--- /dev/null
+++ b/debuginfo-tests/win_cdb/realigned-frame.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cl %s -o %t.exe -fuse-ld=lld -Z7
+// RUN: grep DE[B]UGGER: %s | sed -e 's/.*DE[B]UGGER: //' > %t.script
+// RUN: %cdb -cf %t.script %t.exe | FileCheck %s --check-prefixes=DEBUGGER,CHECK
+
+// From https://llvm.org/pr38857, where we had issues with stack realignment.
+
+struct Foo {
+  int x = 42;
+  int __declspec(noinline) foo();
+  void __declspec(noinline) bar(int *a, int *b, double *c);
+};
+int Foo::foo() {
+  int a = 1;
+  int b = 2;
+  double __declspec(align(32)) force_alignment = 0.42;
+  bar(&a, &b, &force_alignment);
+  // DEBUGGER: g
+  // DEBUGGER: .frame 1
+  // DEBUGGER: dv
+  // CHECK: a = 0n1
+  // CHECK: b = 0n2
+  // CHECK: force_alignment = 0.41999{{.*}}
+  // DEBUGGER: q
+  x += (int)force_alignment;
+  return x;
+}
+void Foo::bar(int *a, int *b, double *c) {
+  __debugbreak();
+  *c += *a + *b;
+}
+int main() {
+  Foo o;
+  o.foo();
+}

From 94ef686f575c5cd66b0b30e34aaa5dc7b05ad361 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 28 May 2019 23:09:42 +0000
Subject: [PATCH 0420/1176] Move code to mark a variable as odr-used adjacement
 to all the related code.

No functional change intended.

llvm-svn: 361890
---
 clang/include/clang/Sema/Sema.h         |  2 ++
 clang/include/clang/Sema/SemaInternal.h | 30 ---------------------
 clang/lib/Sema/SemaExpr.cpp             | 36 +++++++++++++++++++++++++
 clang/lib/Sema/SemaExprCXX.cpp          |  8 +++---
 4 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a1a7f5f6ea942..db6435461eec8 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4165,6 +4165,8 @@ class Sema {
   void MarkVariableReferenced(SourceLocation Loc, VarDecl *Var);
   void MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base = nullptr);
   void MarkMemberReferenced(MemberExpr *E);
+  void MarkCaptureUsedInEnclosingContext(VarDecl *Capture, SourceLocation Loc,
+                                         unsigned CapturingScopeIndex);
 
   void UpdateMarkingForLValueToRValue(Expr *E);
   void CleanupVarDeclMarking();
diff --git a/clang/include/clang/Sema/SemaInternal.h b/clang/include/clang/Sema/SemaInternal.h
index 07e633cab8ee7..dfeca60349e93 100644
--- a/clang/include/clang/Sema/SemaInternal.h
+++ b/clang/include/clang/Sema/SemaInternal.h
@@ -59,36 +59,6 @@ inline bool DeclAttrsMatchCUDAMode(const LangOptions &LangOpts, Decl *D) {
   return isDeviceSideDecl == LangOpts.CUDAIsDevice;
 }
 
-// Directly mark a variable odr-used. Given a choice, prefer to use
-// MarkVariableReferenced since it does additional checks and then
-// calls MarkVarDeclODRUsed.
-// If the variable must be captured:
-//  - if FunctionScopeIndexToStopAt is null, capture it in the CurContext
-//  - else capture it in the DeclContext that maps to the
-//    *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
-inline void MarkVarDeclODRUsed(VarDecl *Var,
-    SourceLocation Loc, Sema &SemaRef,
-    const unsigned *const FunctionScopeIndexToStopAt) {
-  // Keep track of used but undefined variables.
-  // FIXME: We shouldn't suppress this warning for static data members.
-  if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
-      (!Var->isExternallyVisible() || Var->isInline() ||
-       SemaRef.isExternalWithNoLinkageType(Var)) &&
-      !(Var->isStaticDataMember() && Var->hasInit())) {
-    SourceLocation &old = SemaRef.UndefinedButUsed[Var->getCanonicalDecl()];
-    if (old.isInvalid())
-      old = Loc;
-  }
-  QualType CaptureType, DeclRefType;
-  SemaRef.tryCaptureVariable(Var, Loc, Sema::TryCapture_Implicit,
-    /*EllipsisLoc*/ SourceLocation(),
-    /*BuildAndDiagnose*/ true,
-    CaptureType, DeclRefType,
-    FunctionScopeIndexToStopAt);
-
-  Var->markUsed(SemaRef.Context);
-}
-
 /// Return a DLL attribute from the declaration.
 inline InheritableAttr *getDLLAttr(Decl *D) {
   assert(!(D->hasAttr<DLLImportAttr>() && D->hasAttr<DLLExportAttr>()) &&
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3a12c2dd84ffb..cc3dea9ead034 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -15013,6 +15013,42 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
   }
 }
 
+/// Directly mark a variable odr-used. Given a choice, prefer to use
+/// MarkVariableReferenced since it does additional checks and then
+/// calls MarkVarDeclODRUsed.
+/// If the variable must be captured:
+///  - if FunctionScopeIndexToStopAt is null, capture it in the CurContext
+///  - else capture it in the DeclContext that maps to the
+///    *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
+static void
+MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
+                   const unsigned *const FunctionScopeIndexToStopAt) {
+  // Keep track of used but undefined variables.
+  // FIXME: We shouldn't suppress this warning for static data members.
+  if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
+      (!Var->isExternallyVisible() || Var->isInline() ||
+       SemaRef.isExternalWithNoLinkageType(Var)) &&
+      !(Var->isStaticDataMember() && Var->hasInit())) {
+    SourceLocation &old = SemaRef.UndefinedButUsed[Var->getCanonicalDecl()];
+    if (old.isInvalid())
+      old = Loc;
+  }
+  QualType CaptureType, DeclRefType;
+  SemaRef.tryCaptureVariable(Var, Loc, Sema::TryCapture_Implicit,
+    /*EllipsisLoc*/ SourceLocation(),
+    /*BuildAndDiagnose*/ true,
+    CaptureType, DeclRefType,
+    FunctionScopeIndexToStopAt);
+
+  Var->markUsed(SemaRef.Context);
+}
+
+void Sema::MarkCaptureUsedInEnclosingContext(VarDecl *Capture,
+                                             SourceLocation Loc,
+                                             unsigned CapturingScopeIndex) {
+  MarkVarDeclODRUsed(Capture, Loc, *this, &CapturingScopeIndex);
+}
+
 static void
 diagnoseUncapturableValueReference(Sema &S, SourceLocation loc,
                                    ValueDecl *var, DeclContext *DC) {
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index e3286e8943f2a..ef27fc2d71913 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7495,11 +7495,9 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
     // capture the variable in that lambda (and all its enclosing lambdas).
     if (const Optional<unsigned> Index =
             getStackIndexOfNearestEnclosingCaptureCapableLambda(
-                S.FunctionScopes, Var, S)) {
-      const unsigned FunctionScopeIndexOfCapturableLambda = Index.getValue();
-      MarkVarDeclODRUsed(Var, VarExpr->getExprLoc(), S,
-                         &FunctionScopeIndexOfCapturableLambda);
-    }
+                S.FunctionScopes, Var, S))
+      S.MarkCaptureUsedInEnclosingContext(Var, VarExpr->getExprLoc(),
+                                          Index.getValue());
     const bool IsVarNeverAConstantExpression =
         VariableCanNeverBeAConstantExpression(Var, S.Context);
     if (!IsFullExprInstantiationDependent || IsVarNeverAConstantExpression) {

From 8cb63232d9c7c18a27e36edcd16ae42b2eccefee Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 28 May 2019 23:09:44 +0000
Subject: [PATCH 0421/1176] If capturing a variable fails, add a capture anyway
 (and mark it invalid) so that we can avoid repeated diagnostics for the same
 capture.

llvm-svn: 361891
---
 clang/include/clang/Sema/ScopeInfo.h          |  31 +++--
 clang/lib/Sema/SemaDecl.cpp                   |   2 +-
 clang/lib/Sema/SemaExpr.cpp                   | 108 ++++++++++--------
 clang/lib/Sema/SemaLambda.cpp                 |   5 +-
 clang/lib/Sema/SemaStmt.cpp                   |   3 +
 .../expr/expr.prim/expr.prim.lambda/blocks.mm |   7 ++
 clang/test/Sema/captured-statements.c         |   9 +-
 clang/test/SemaCXX/lambda-expressions.cpp     |   4 +-
 .../capturing-flexible-array-in-block.mm      |   5 +-
 9 files changed, 107 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 2aa1caf699c22..9fd34d147d8e9 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -539,23 +539,28 @@ class Capture {
   /// the lambda.
   bool NonODRUsed = false;
 
+  /// Whether the capture is invalid (a capture was required but the entity is
+  /// non-capturable).
+  bool Invalid = false;
+
 public:
   Capture(VarDecl *Var, bool Block, bool ByRef, bool IsNested,
-          SourceLocation Loc, SourceLocation EllipsisLoc,
-          QualType CaptureType, Expr *Cpy)
+          SourceLocation Loc, SourceLocation EllipsisLoc, QualType CaptureType,
+          Expr *Cpy, bool Invalid)
       : VarAndNestedAndThis(Var, IsNested ? IsNestedCapture : 0),
         InitExprAndCaptureKind(
-            Cpy, !Var ? Cap_VLA : Block ? Cap_Block : ByRef ? Cap_ByRef
-                                                            : Cap_ByCopy),
-        Loc(Loc), EllipsisLoc(EllipsisLoc), CaptureType(CaptureType) {}
+            Cpy, !Var ? Cap_VLA
+                      : Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
+        Loc(Loc), EllipsisLoc(EllipsisLoc), CaptureType(CaptureType),
+        Invalid(Invalid) {}
 
   enum IsThisCapture { ThisCapture };
   Capture(IsThisCapture, bool IsNested, SourceLocation Loc,
-          QualType CaptureType, Expr *Cpy, const bool ByCopy)
+          QualType CaptureType, Expr *Cpy, const bool ByCopy, bool Invalid)
       : VarAndNestedAndThis(
             nullptr, (IsThisCaptured | (IsNested ? IsNestedCapture : 0))),
-        InitExprAndCaptureKind(Cpy, ByCopy ? Cap_ByCopy : Cap_ByRef),
-        Loc(Loc), CaptureType(CaptureType) {}
+        InitExprAndCaptureKind(Cpy, ByCopy ? Cap_ByCopy : Cap_ByRef), Loc(Loc),
+        CaptureType(CaptureType), Invalid(Invalid) {}
 
   bool isThisCapture() const {
     return VarAndNestedAndThis.getInt() & IsThisCaptured;
@@ -585,6 +590,8 @@ class Capture {
     return VarAndNestedAndThis.getInt() & IsNestedCapture;
   }
 
+  bool isInvalid() const { return Invalid; }
+
   bool isODRUsed() const { return ODRUsed; }
   bool isNonODRUsed() const { return NonODRUsed; }
   void markUsed(bool IsODRUse) { (IsODRUse ? ODRUsed : NonODRUsed) = true; }
@@ -650,9 +657,9 @@ class CapturingScopeInfo : public FunctionScopeInfo {
 
   void addCapture(VarDecl *Var, bool isBlock, bool isByref, bool isNested,
                   SourceLocation Loc, SourceLocation EllipsisLoc,
-                  QualType CaptureType, Expr *Cpy) {
+                  QualType CaptureType, Expr *Cpy, bool Invalid) {
     Captures.push_back(Capture(Var, isBlock, isByref, isNested, Loc,
-                               EllipsisLoc, CaptureType, Cpy));
+                               EllipsisLoc, CaptureType, Cpy, Invalid));
     CaptureMap[Var] = Captures.size();
   }
 
@@ -660,7 +667,7 @@ class CapturingScopeInfo : public FunctionScopeInfo {
     Captures.push_back(Capture(/*Var*/ nullptr, /*isBlock*/ false,
                                /*isByref*/ false, /*isNested*/ false, Loc,
                                /*EllipsisLoc*/ SourceLocation(), CaptureType,
-                               /*Cpy*/ nullptr));
+                               /*Cpy*/ nullptr, /*Invalid*/ false));
   }
 
   // Note, we do not need to add the type of 'this' since that is always
@@ -1016,7 +1023,7 @@ CapturingScopeInfo::addThisCapture(bool isNested, SourceLocation Loc,
                                    Expr *Cpy,
                                    const bool ByCopy) {
   Captures.push_back(Capture(Capture::ThisCapture, isNested, Loc, QualType(),
-                             Cpy, ByCopy));
+                             Cpy, ByCopy, /*Invalid*/ false));
   CXXThisCaptureIndex = Captures.size();
 }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a035f200fce61..188e801b4c436 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12939,7 +12939,7 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
           /*RefersToEnclosingVariableOrCapture*/true, C.getLocation(),
           /*EllipsisLoc*/C.isPackExpansion()
                          ? C.getEllipsisLoc() : SourceLocation(),
-          CaptureType, /*Expr*/ nullptr);
+          CaptureType, /*Expr*/ nullptr, /*Invalid*/false);
 
     } else if (C.capturesThis()) {
       LSI->addThisCapture(/*Nested*/ false, C.getLocation(),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index cc3dea9ead034..95be7af8b6856 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -13853,10 +13853,9 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   QualType BlockTy;
 
   // Set the captured variables on the block.
-  // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo!
   SmallVector<BlockDecl::Capture, 4> Captures;
   for (Capture &Cap : BSI->Captures) {
-    if (Cap.isThisCapture())
+    if (Cap.isInvalid() || Cap.isThisCapture())
       continue;
     BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(),
                               Cap.isNested(), Cap.getInitExpr());
@@ -15212,31 +15211,36 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
                                  QualType &CaptureType,
                                  QualType &DeclRefType,
                                  const bool Nested,
-                                 Sema &S) {
+                                 Sema &S, bool Invalid) {
   Expr *CopyExpr = nullptr;
   bool ByRef = false;
 
   // Blocks are not allowed to capture arrays, excepting OpenCL.
   // OpenCL v2.0 s1.12.5 (revision 40): arrays are captured by reference
   // (decayed to pointers).
-  if (!S.getLangOpts().OpenCL && CaptureType->isArrayType()) {
+  if (!Invalid && !S.getLangOpts().OpenCL && CaptureType->isArrayType()) {
     if (BuildAndDiagnose) {
       S.Diag(Loc, diag::err_ref_array_type);
       S.Diag(Var->getLocation(), diag::note_previous_decl)
       << Var->getDeclName();
+      Invalid = true;
+    } else {
+      return false;
     }
-    return false;
   }
 
   // Forbid the block-capture of autoreleasing variables.
-  if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
+  if (!Invalid &&
+      CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
     if (BuildAndDiagnose) {
       S.Diag(Loc, diag::err_arc_autoreleasing_capture)
         << /*block*/ 0;
       S.Diag(Var->getLocation(), diag::note_previous_decl)
         << Var->getDeclName();
+      Invalid = true;
+    } else {
+      return false;
     }
-    return false;
   }
 
   // Warn about implicitly autoreleasing indirect parameters captured by blocks.
@@ -15259,7 +15263,7 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
 
     QualType PointeeTy = PT->getPointeeType();
 
-    if (PointeeTy->getAs<ObjCObjectPointerType>() &&
+    if (!Invalid && PointeeTy->getAs<ObjCObjectPointerType>() &&
         PointeeTy.getObjCLifetime() == Qualifiers::OCL_Autoreleasing &&
         !IsObjCOwnershipAttributedType(PointeeTy)) {
       if (BuildAndDiagnose) {
@@ -15323,11 +15327,10 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
 
   // Actually capture the variable.
   if (BuildAndDiagnose)
-    BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc,
-                    SourceLocation(), CaptureType, CopyExpr);
-
-  return true;
+    BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(),
+                    CaptureType, CopyExpr, Invalid);
 
+  return !Invalid;
 }
 
 
@@ -15339,7 +15342,7 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
                                     QualType &CaptureType,
                                     QualType &DeclRefType,
                                     const bool RefersToCapturedVariable,
-                                    Sema &S) {
+                                    Sema &S, bool Invalid) {
   // By default, capture variables by reference.
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
@@ -15384,11 +15387,11 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
 
   // Actually capture the variable.
   if (BuildAndDiagnose)
-    RSI->addCapture(Var, /*isBlock*/false, ByRef, RefersToCapturedVariable, Loc,
-                    SourceLocation(), CaptureType, CopyExpr);
+    RSI->addCapture(Var, /*isBlock*/ false, ByRef, RefersToCapturedVariable,
+                    Loc, SourceLocation(), CaptureType, CopyExpr,
+                    Invalid);
 
-
-  return true;
+  return !Invalid;
 }
 
 /// Create a field within the lambda class for the variable
@@ -15435,8 +15438,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
                             const Sema::TryCaptureKind Kind,
                             SourceLocation EllipsisLoc,
                             const bool IsTopScope,
-                            Sema &S) {
-
+                            Sema &S, bool Invalid) {
   // Determine whether we are capturing by reference or by value.
   bool ByRef = false;
   if (IsTopScope && Kind != Sema::TryCapture_Implicit) {
@@ -15477,31 +15479,33 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
     }
 
     // Forbid the lambda copy-capture of autoreleasing variables.
-    if (CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
+    if (!Invalid &&
+        CaptureType.getObjCLifetime() == Qualifiers::OCL_Autoreleasing) {
       if (BuildAndDiagnose) {
         S.Diag(Loc, diag::err_arc_autoreleasing_capture) << /*lambda*/ 1;
         S.Diag(Var->getLocation(), diag::note_previous_decl)
           << Var->getDeclName();
+        Invalid = true;
+      } else {
+        return false;
       }
-      return false;
     }
 
     // Make sure that by-copy captures are of a complete and non-abstract type.
-    if (BuildAndDiagnose) {
+    if (!Invalid && BuildAndDiagnose) {
       if (!CaptureType->isDependentType() &&
           S.RequireCompleteType(Loc, CaptureType,
                                 diag::err_capture_of_incomplete_type,
                                 Var->getDeclName()))
-        return false;
-
-      if (S.RequireNonAbstractType(Loc, CaptureType,
-                                   diag::err_capture_of_abstract_type))
-        return false;
+        Invalid = true;
+      else if (S.RequireNonAbstractType(Loc, CaptureType,
+                                        diag::err_capture_of_abstract_type))
+        Invalid = true;
     }
   }
 
   // Capture this variable in the lambda.
-  if (BuildAndDiagnose)
+  if (BuildAndDiagnose && !Invalid)
     addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc,
                             RefersToCapturedVariable);
 
@@ -15522,9 +15526,10 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
   // Add the capture.
   if (BuildAndDiagnose)
     LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable,
-                    Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr);
+                    Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr,
+                    Invalid);
 
-  return true;
+  return !Invalid;
 }
 
 bool Sema::tryCaptureVariable(
@@ -15622,11 +15627,6 @@ bool Sema::tryCaptureVariable(
       }
       return true;
     }
-    // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture
-    // certain types of variables (unnamed, variably modified types etc.)
-    // so check for eligibility.
-    if (!isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this))
-       return true;
 
     // Try to capture variable-length arrays types.
     if (Var->getType()->isVariablyModifiedType()) {
@@ -15697,33 +15697,45 @@ bool Sema::tryCaptureVariable(
   // requirements, and adding captures if requested.
   // If the variable had already been captured previously, we start capturing
   // at the lambda nested within that one.
+  bool Invalid = false;
   for (unsigned I = ++FunctionScopesIndex, N = MaxFunctionScopesIndex + 1; I != N;
        ++I) {
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[I]);
 
+    // Certain capturing entities (lambdas, blocks etc.) are not allowed to capture
+    // certain types of variables (unnamed, variably modified types etc.)
+    // so check for eligibility.
+    if (!Invalid)
+      Invalid =
+          !isVariableCapturable(CSI, Var, ExprLoc, BuildAndDiagnose, *this);
+
+    // After encountering an error, if we're actually supposed to capture, keep
+    // capturing in nested contexts to suppress any follow-on diagnostics.
+    if (Invalid && !BuildAndDiagnose)
+      return true;
+
     if (BlockScopeInfo *BSI = dyn_cast<BlockScopeInfo>(CSI)) {
-      if (!captureInBlock(BSI, Var, ExprLoc,
-                          BuildAndDiagnose, CaptureType,
-                          DeclRefType, Nested, *this))
-        return true;
+      Invalid = !captureInBlock(BSI, Var, ExprLoc, BuildAndDiagnose, CaptureType,
+                               DeclRefType, Nested, *this, Invalid);
       Nested = true;
     } else if (CapturedRegionScopeInfo *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
-      if (!captureInCapturedRegion(RSI, Var, ExprLoc,
-                                   BuildAndDiagnose, CaptureType,
-                                   DeclRefType, Nested, *this))
-        return true;
+      Invalid = !captureInCapturedRegion(RSI, Var, ExprLoc, BuildAndDiagnose,
+                                         CaptureType, DeclRefType, Nested,
+                                         *this, Invalid);
       Nested = true;
     } else {
       LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(CSI);
-      if (!captureInLambda(LSI, Var, ExprLoc,
-                           BuildAndDiagnose, CaptureType,
+      Invalid =
+          !captureInLambda(LSI, Var, ExprLoc, BuildAndDiagnose, CaptureType,
                            DeclRefType, Nested, Kind, EllipsisLoc,
-                            /*IsTopScope*/I == N - 1, *this))
-        return true;
+                           /*IsTopScope*/ I == N - 1, *this, Invalid);
       Nested = true;
     }
+
+    if (Invalid && !BuildAndDiagnose)
+      return true;
   }
-  return false;
+  return Invalid;
 }
 
 bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc,
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index b2055dd650e2f..f6c9dee2a0819 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -854,7 +854,7 @@ FieldDecl *Sema::buildInitCaptureField(LambdaScopeInfo *LSI, VarDecl *Var) {
 
   LSI->addCapture(Var, /*isBlock*/false, Var->getType()->isReferenceType(),
                   /*isNested*/false, Var->getLocation(), SourceLocation(),
-                  Var->getType(), Var->getInit());
+                  Var->getType(), Var->getInit(), /*Invalid*/false);
   return Field;
 }
 
@@ -1586,6 +1586,9 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
     for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I, ++CurField) {
       const Capture &From = LSI->Captures[I];
 
+      if (From.isInvalid())
+        return ExprError();
+
       assert(!From.isBlockCapture() && "Cannot capture __block variables");
       bool IsImplicit = I >= LSI->NumExplicitCaptures;
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index c7fb5654516a6..51a72c618b7de 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4228,6 +4228,9 @@ buildCapturedStmtCaptureList(SmallVectorImpl<CapturedStmt::Capture> &Captures,
                              SmallVectorImpl<Expr *> &CaptureInits,
                              ArrayRef<sema::Capture> Candidates) {
   for (const sema::Capture &Cap : Candidates) {
+    if (Cap.isInvalid())
+      continue;
+
     if (Cap.isThisCapture()) {
       Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                                CapturedStmt::VCK_This));
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm
index 96e8fcd8d3717..cb56f6816ad03 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/blocks.mm
@@ -50,6 +50,13 @@ void nesting() {
     [=] () mutable {
       ^ {
         int i = array[2]; // expected-error{{cannot refer to declaration with an array type inside block}}
+        i += array[3];
+      }();
+    }();
+
+    [=] () mutable {
+      ^ {
+        int i = 0;
         i += array[3]; // expected-error{{cannot refer to declaration with an array type inside block}}
       }();
     }();
diff --git a/clang/test/Sema/captured-statements.c b/clang/test/Sema/captured-statements.c
index 86e9273944bf3..ac04915097ebb 100644
--- a/clang/test/Sema/captured-statements.c
+++ b/clang/test/Sema/captured-statements.c
@@ -65,11 +65,18 @@ void test_nest_block() {
   int b;
   #pragma clang __debug captured
   {
-    __block int c;
     int d;
     ^{
       a = b; // expected-error{{__block variable 'a' cannot be captured in a captured statement}}
+      a = b; // (duplicate diagnostic suppressed)
       b = d; // OK - Consistent with block inside a lambda
+    }();
+  }
+  #pragma clang __debug captured
+  {
+    __block int c;
+    int d;
+    ^{
       c = a; // expected-error{{__block variable 'a' cannot be captured in a captured statement}}
       c = d; // OK
       d = b; // expected-error{{variable is not assignable (missing __block type specifier)}}
diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp
index 1833400be3d14..8b0b83078b0a1 100644
--- a/clang/test/SemaCXX/lambda-expressions.cpp
+++ b/clang/test/SemaCXX/lambda-expressions.cpp
@@ -65,9 +65,9 @@ namespace ImplicitCapture {
     d = 3;
     [=]() { return c; }; // expected-error {{unnamed variable cannot be implicitly captured in a lambda expression}}
 
-    __block int e; // expected-note 3 {{declared}}
+    __block int e; // expected-note 2{{declared}}
     [&]() { return e; }; // expected-error {{__block variable 'e' cannot be captured in a lambda expression}}
-    [&e]() { return e; }; // expected-error 2 {{__block variable 'e' cannot be captured in a lambda expression}}
+    [&e]() { return e; }; // expected-error {{__block variable 'e' cannot be captured in a lambda expression}}
 
     int f[10]; // expected-note {{declared}}
     [&]() { return f[2]; };
diff --git a/clang/test/SemaObjCXX/capturing-flexible-array-in-block.mm b/clang/test/SemaObjCXX/capturing-flexible-array-in-block.mm
index d7d888564c1e3..cf88d4684c58b 100644
--- a/clang/test/SemaObjCXX/capturing-flexible-array-in-block.mm
+++ b/clang/test/SemaObjCXX/capturing-flexible-array-in-block.mm
@@ -2,7 +2,8 @@
 // rdar://12655829
 
 void f() {
-  struct { int x; int y[]; } a; // expected-note 2 {{'a' declared here}}
+  struct { int x; int y[]; } a; // expected-note 3 {{'a' declared here}}
   ^{return a.x;}(); // expected-error {{cannot refer to declaration of structure variable with flexible array member inside block}}
-  [] {return a.x;}(); // expected-error {{variable 'a' with flexible array member cannot be captured in a lambda expression}}
+  [=] {return a.x;}(); // expected-error {{variable 'a' with flexible array member cannot be captured in a lambda expression}}
+  [] {return a.x;}(); // expected-error {{variable 'a' cannot be implicitly captured in a lambda with no capture-default}} expected-note {{here}}
 }

From e925be1339052b2f363d1b708370a45868fada04 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 28 May 2019 23:09:45 +0000
Subject: [PATCH 0422/1176] Simplify clang::Capture. No functionality change
 intended.

We don't need to pack flags into the bottom bits of pointers here; we
have plenty of trailing bits in this type.

llvm-svn: 361892
---
 clang/include/clang/Sema/ScopeInfo.h | 102 ++++++++++++---------------
 1 file changed, 46 insertions(+), 56 deletions(-)

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 9fd34d147d8e9..9b4d9638d236b 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -506,20 +506,14 @@ class Capture {
   enum CaptureKind {
     Cap_ByCopy, Cap_ByRef, Cap_Block, Cap_VLA
   };
-  enum {
-    IsNestedCapture = 0x1,
-    IsThisCaptured = 0x2
-  };
 
-  /// The variable being captured (if we are not capturing 'this') and whether
-  /// this is a nested capture, and whether we are capturing 'this'
-  llvm::PointerIntPair<VarDecl*, 2> VarAndNestedAndThis;
+  /// If !CapturesThis, the captured variable.
+  VarDecl *CapturedVar = nullptr;
 
-  /// Expression to initialize a field of the given type, and the kind of
-  /// capture (if this is a capture and not an init-capture). The expression
-  /// is only required if we are capturing ByVal and the variable's type has
-  /// a non-trivial copy constructor.
-  llvm::PointerIntPair<void *, 2, CaptureKind> InitExprAndCaptureKind;
+  /// Expression to initialize a field of the given type. This is only required
+  /// if we are capturing ByVal and the variable's type has a non-trivial copy
+  /// constructor.
+  Expr *InitExpr = nullptr;
 
   /// The source location at which the first capture occurred.
   SourceLocation Loc;
@@ -527,78 +521,77 @@ class Capture {
   /// The location of the ellipsis that expands a parameter pack.
   SourceLocation EllipsisLoc;
 
-  /// The type as it was captured, which is in effect the type of the
-  /// non-static data member that would hold the capture.
+  /// The type as it was captured, which is the type of the non-static data
+  /// member that would hold the capture.
   QualType CaptureType;
 
+  /// The CaptureKind of this capture.
+  unsigned Kind : 2;
+
+  /// Whether this is a nested capture (a capture of an enclosing capturing
+  /// scope's capture).
+  unsigned Nested : 1;
+
+  /// Whether this is a capture of '*this'.
+  unsigned CapturesThis : 1;
+
   /// Whether an explicit capture has been odr-used in the body of the
   /// lambda.
-  bool ODRUsed = false;
+  unsigned ODRUsed : 1;
 
   /// Whether an explicit capture has been non-odr-used in the body of
   /// the lambda.
-  bool NonODRUsed = false;
+  unsigned NonODRUsed : 1;
 
   /// Whether the capture is invalid (a capture was required but the entity is
   /// non-capturable).
-  bool Invalid = false;
+  unsigned Invalid : 1;
 
 public:
   Capture(VarDecl *Var, bool Block, bool ByRef, bool IsNested,
           SourceLocation Loc, SourceLocation EllipsisLoc, QualType CaptureType,
           Expr *Cpy, bool Invalid)
-      : VarAndNestedAndThis(Var, IsNested ? IsNestedCapture : 0),
-        InitExprAndCaptureKind(
-            Cpy, !Var ? Cap_VLA
-                      : Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
-        Loc(Loc), EllipsisLoc(EllipsisLoc), CaptureType(CaptureType),
-        Invalid(Invalid) {}
+      : CapturedVar(Var), InitExpr(Cpy), Loc(Loc), EllipsisLoc(EllipsisLoc),
+        CaptureType(CaptureType),
+        Kind(!Var ? Cap_VLA
+                  : Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
+        Nested(IsNested), CapturesThis(false), ODRUsed(false),
+        NonODRUsed(false), Invalid(Invalid) {}
 
   enum IsThisCapture { ThisCapture };
   Capture(IsThisCapture, bool IsNested, SourceLocation Loc,
           QualType CaptureType, Expr *Cpy, const bool ByCopy, bool Invalid)
-      : VarAndNestedAndThis(
-            nullptr, (IsThisCaptured | (IsNested ? IsNestedCapture : 0))),
-        InitExprAndCaptureKind(Cpy, ByCopy ? Cap_ByCopy : Cap_ByRef), Loc(Loc),
-        CaptureType(CaptureType), Invalid(Invalid) {}
-
-  bool isThisCapture() const {
-    return VarAndNestedAndThis.getInt() & IsThisCaptured;
-  }
+      : InitExpr(Cpy), Loc(Loc), CaptureType(CaptureType),
+        Kind(ByCopy ? Cap_ByCopy : Cap_ByRef), Nested(IsNested),
+        CapturesThis(true), ODRUsed(false), NonODRUsed(false),
+        Invalid(Invalid) {}
 
+  bool isThisCapture() const { return CapturesThis; }
   bool isVariableCapture() const {
     return !isThisCapture() && !isVLATypeCapture();
   }
 
-  bool isCopyCapture() const {
-    return InitExprAndCaptureKind.getInt() == Cap_ByCopy;
-  }
-
-  bool isReferenceCapture() const {
-    return InitExprAndCaptureKind.getInt() == Cap_ByRef;
-  }
+  bool isCopyCapture() const { return Kind == Cap_ByCopy; }
+  bool isReferenceCapture() const { return Kind == Cap_ByRef; }
+  bool isBlockCapture() const { return Kind == Cap_Block; }
+  bool isVLATypeCapture() const { return Kind == Cap_VLA; }
 
-  bool isBlockCapture() const {
-    return InitExprAndCaptureKind.getInt() == Cap_Block;
-  }
-
-  bool isVLATypeCapture() const {
-    return InitExprAndCaptureKind.getInt() == Cap_VLA;
-  }
-
-  bool isNested() const {
-    return VarAndNestedAndThis.getInt() & IsNestedCapture;
-  }
+  bool isNested() const { return Nested; }
 
   bool isInvalid() const { return Invalid; }
 
   bool isODRUsed() const { return ODRUsed; }
   bool isNonODRUsed() const { return NonODRUsed; }
-  void markUsed(bool IsODRUse) { (IsODRUse ? ODRUsed : NonODRUsed) = true; }
+  void markUsed(bool IsODRUse) {
+    if (IsODRUse)
+      ODRUsed = true;
+    else
+      NonODRUsed = true;
+  }
 
   VarDecl *getVariable() const {
     assert(isVariableCapture());
-    return VarAndNestedAndThis.getPointer();
+    return CapturedVar;
   }
 
   /// Retrieve the location at which this variable was captured.
@@ -611,14 +604,11 @@ class Capture {
   /// Retrieve the capture type for this capture, which is effectively
   /// the type of the non-static data member in the lambda/block structure
   /// that would store this capture.
-  QualType getCaptureType() const {
-    assert(!isThisCapture());
-    return CaptureType;
-  }
+  QualType getCaptureType() const { return CaptureType; }
 
   Expr *getInitExpr() const {
     assert(!isVLATypeCapture() && "no init expression for type capture");
-    return static_cast<Expr *>(InitExprAndCaptureKind.getPointer());
+    return InitExpr;
   }
 };
 

From 30116531b8a9acc0b25648f9e27c11265d149d60 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 28 May 2019 23:09:46 +0000
Subject: [PATCH 0423/1176] Defer creating fields for captures until we finish
 building the capturing expression or statement.

No functionality change yet. The intent is that we will also delay
building the initialization expression until the enclosing context, so
that:
a) we build the initialization expression in the right context, and
b) we can elide captures that are not odr-used, as suggested by P0588R1.

This also consolidates some duplicated code building capture fields into
a single place.

llvm-svn: 361893
---
 clang/include/clang/Sema/ScopeInfo.h | 42 +++++++++------
 clang/include/clang/Sema/Sema.h      |  7 ++-
 clang/lib/Sema/ScopeInfo.cpp         | 14 ++---
 clang/lib/Sema/SemaDecl.cpp          |  8 +--
 clang/lib/Sema/SemaExpr.cpp          | 81 ++--------------------------
 clang/lib/Sema/SemaExprCXX.cpp       | 63 +++++++++-------------
 clang/lib/Sema/SemaLambda.cpp        | 70 ++++++++++++++++++------
 clang/lib/Sema/SemaStmt.cpp          | 15 ++++--
 clang/lib/Sema/TreeTransform.h       |  2 +-
 clang/test/AST/ast-dump-expr.cpp     | 14 ++---
 10 files changed, 142 insertions(+), 174 deletions(-)

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 9b4d9638d236b..375d93111adb4 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -507,8 +507,13 @@ class Capture {
     Cap_ByCopy, Cap_ByRef, Cap_Block, Cap_VLA
   };
 
-  /// If !CapturesThis, the captured variable.
-  VarDecl *CapturedVar = nullptr;
+  union {
+    /// If Kind == Cap_VLA, the captured type.
+    const VariableArrayType *CapturedVLA;
+
+    /// Otherwise, the captured variable (if any).
+    VarDecl *CapturedVar;
+  };
 
   /// Expression to initialize a field of the given type. This is only required
   /// if we are capturing ByVal and the variable's type has a non-trivial copy
@@ -553,8 +558,7 @@ class Capture {
           Expr *Cpy, bool Invalid)
       : CapturedVar(Var), InitExpr(Cpy), Loc(Loc), EllipsisLoc(EllipsisLoc),
         CaptureType(CaptureType),
-        Kind(!Var ? Cap_VLA
-                  : Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
+        Kind(Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
         Nested(IsNested), CapturesThis(false), ODRUsed(false),
         NonODRUsed(false), Invalid(Invalid) {}
 
@@ -566,6 +570,13 @@ class Capture {
         CapturesThis(true), ODRUsed(false), NonODRUsed(false),
         Invalid(Invalid) {}
 
+  enum IsVLACapture { VLACapture };
+  Capture(IsVLACapture, const VariableArrayType *VLA, bool IsNested,
+          SourceLocation Loc, QualType CaptureType)
+      : CapturedVLA(VLA), Loc(Loc), CaptureType(CaptureType), Kind(Cap_VLA),
+        Nested(IsNested), CapturesThis(false), ODRUsed(false),
+        NonODRUsed(false), Invalid(false) {}
+
   bool isThisCapture() const { return CapturesThis; }
   bool isVariableCapture() const {
     return !isThisCapture() && !isVLATypeCapture();
@@ -594,6 +605,11 @@ class Capture {
     return CapturedVar;
   }
 
+  const VariableArrayType *getCapturedVLAType() const {
+    assert(isVLATypeCapture());
+    return CapturedVLA;
+  }
+
   /// Retrieve the location at which this variable was captured.
   SourceLocation getLocation() const { return Loc; }
 
@@ -653,17 +669,13 @@ class CapturingScopeInfo : public FunctionScopeInfo {
     CaptureMap[Var] = Captures.size();
   }
 
-  void addVLATypeCapture(SourceLocation Loc, QualType CaptureType) {
-    Captures.push_back(Capture(/*Var*/ nullptr, /*isBlock*/ false,
-                               /*isByref*/ false, /*isNested*/ false, Loc,
-                               /*EllipsisLoc*/ SourceLocation(), CaptureType,
-                               /*Cpy*/ nullptr, /*Invalid*/ false));
+  void addVLATypeCapture(SourceLocation Loc, const VariableArrayType *VLAType,
+                         QualType CaptureType) {
+    Captures.push_back(Capture(Capture::VLACapture, VLAType,
+                               /*FIXME: IsNested*/ false, Loc, CaptureType));
   }
 
-  // Note, we do not need to add the type of 'this' since that is always
-  // retrievable from Sema::getCurrentThisType - and is also encoded within the
-  // type of the corresponding FieldDecl.
-  void addThisCapture(bool isNested, SourceLocation Loc,
+  void addThisCapture(bool isNested, SourceLocation Loc, QualType CaptureType,
                       Expr *Cpy, bool ByCopy);
 
   /// Determine whether the C++ 'this' is captured.
@@ -1010,9 +1022,9 @@ void FunctionScopeInfo::recordUseOfWeak(const ExprT *E, bool IsRead) {
 
 inline void
 CapturingScopeInfo::addThisCapture(bool isNested, SourceLocation Loc,
-                                   Expr *Cpy,
+                                   QualType CaptureType, Expr *Cpy,
                                    const bool ByCopy) {
-  Captures.push_back(Capture(Capture::ThisCapture, isNested, Loc, QualType(),
+  Captures.push_back(Capture(Capture::ThisCapture, isNested, Loc, CaptureType,
                              Cpy, ByCopy, /*Invalid*/ false));
   CXXThisCaptureIndex = Captures.size();
 }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index db6435461eec8..d7486ec1c2617 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5739,8 +5739,8 @@ class Sema {
                                           IdentifierInfo *Id,
                                           unsigned InitStyle, Expr *Init);
 
-  /// Build the implicit field for an init-capture.
-  FieldDecl *buildInitCaptureField(sema::LambdaScopeInfo *LSI, VarDecl *Var);
+  /// Add an init-capture to a lambda scope.
+  void addInitCapture(sema::LambdaScopeInfo *LSI, VarDecl *Var);
 
   /// Note that we have finished the explicit captures for the
   /// given lambda.
@@ -5786,6 +5786,9 @@ class Sema {
   bool DiagnoseUnusedLambdaCapture(SourceRange CaptureRange,
                                    const sema::Capture &From);
 
+  /// Build a FieldDecl suitable to hold the given capture.
+  FieldDecl *BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture);
+
   /// Complete a lambda-expression having processed and attached the
   /// lambda body.
   ExprResult BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index 1003d2639c0e0..dd309a2811850 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -113,17 +113,9 @@ FunctionScopeInfo::WeakObjectProfileTy::getBaseInfo(const Expr *E) {
 }
 
 bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
-  RecordDecl *RD = nullptr;
-  if (auto *LSI = dyn_cast<LambdaScopeInfo>(this))
-    RD = LSI->Lambda;
-  else if (auto CRSI = dyn_cast<CapturedRegionScopeInfo>(this))
-    RD = CRSI->TheRecordDecl;
-
-  if (RD)
-    for (auto *FD : RD->fields()) {
-      if (FD->hasCapturedVLAType() && FD->getCapturedVLAType() == VAT)
-        return true;
-    }
+  for (auto &Cap : Captures)
+    if (Cap.isVLATypeCapture() && Cap.getCapturedVLAType() == VAT)
+      return true;
   return false;
 }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 188e801b4c436..fbc410f014d97 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12942,11 +12942,11 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
           CaptureType, /*Expr*/ nullptr, /*Invalid*/false);
 
     } else if (C.capturesThis()) {
-      LSI->addThisCapture(/*Nested*/ false, C.getLocation(),
-                              /*Expr*/ nullptr,
-                              C.getCaptureKind() == LCK_StarThis);
+      LSI->addThisCapture(/*Nested*/ false, C.getLocation(), I->getType(),
+                          /*Expr*/ nullptr, C.getCaptureKind() == LCK_StarThis);
     } else {
-      LSI->addVLATypeCapture(C.getLocation(), I->getType());
+      LSI->addVLATypeCapture(C.getLocation(), I->getCapturedVLAType(),
+                             I->getType());
     }
     ++I;
   }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 95be7af8b6856..8eccb4b0c5d30 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4050,32 +4050,11 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T,
 
       // Unknown size indication requires no size computation.
       // Otherwise, evaluate and record it.
-      if (auto Size = VAT->getSizeExpr()) {
-        if (!CSI->isVLATypeCaptured(VAT)) {
-          RecordDecl *CapRecord = nullptr;
-          if (auto LSI = dyn_cast<LambdaScopeInfo>(CSI)) {
-            CapRecord = LSI->Lambda;
-          } else if (auto CRSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
-            CapRecord = CRSI->TheRecordDecl;
-          }
-          if (CapRecord) {
-            auto ExprLoc = Size->getExprLoc();
-            auto SizeType = Context.getSizeType();
-            // Build the non-static data member.
-            auto Field =
-                FieldDecl::Create(Context, CapRecord, ExprLoc, ExprLoc,
-                                  /*Id*/ nullptr, SizeType, /*TInfo*/ nullptr,
-                                  /*BW*/ nullptr, /*Mutable*/ false,
-                                  /*InitStyle*/ ICIS_NoInit);
-            Field->setImplicit(true);
-            Field->setAccess(AS_private);
-            Field->setCapturedVLAType(VAT);
-            CapRecord->addDecl(Field);
-
-            CSI->addVLATypeCapture(ExprLoc, SizeType);
-          }
-        }
-      }
+      auto Size = VAT->getSizeExpr();
+      if (Size && !CSI->isVLATypeCaptured(VAT) &&
+          (isa<CapturedRegionScopeInfo>(CSI) || isa<LambdaScopeInfo>(CSI)))
+        CSI->addVLATypeCapture(Size->getExprLoc(), VAT, Context.getSizeType());
+
       T = VAT->getElementType();
       break;
     }
@@ -15367,18 +15346,6 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
     // The current implementation assumes that all variables are captured
     // by references. Since there is no capture by copy, no expression
     // evaluation will be needed.
-    RecordDecl *RD = RSI->TheRecordDecl;
-
-    FieldDecl *Field
-      = FieldDecl::Create(S.Context, RD, Loc, Loc, nullptr, CaptureType,
-                          S.Context.getTrivialTypeSourceInfo(CaptureType, Loc),
-                          nullptr, false, ICIS_NoInit);
-    Field->setImplicit(true);
-    Field->setAccess(AS_private);
-    RD->addDecl(Field);
-    if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
-      S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel);
-
     CopyExpr = new (S.Context) DeclRefExpr(
         S.Context, Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc);
     Var->setReferenced(true);
@@ -15394,39 +15361,6 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
   return !Invalid;
 }
 
-/// Create a field within the lambda class for the variable
-/// being captured.
-static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI,
-                                    QualType FieldType, QualType DeclRefType,
-                                    SourceLocation Loc,
-                                    bool RefersToCapturedVariable) {
-  CXXRecordDecl *Lambda = LSI->Lambda;
-
-  // Build the non-static data member.
-  FieldDecl *Field
-    = FieldDecl::Create(S.Context, Lambda, Loc, Loc, nullptr, FieldType,
-                        S.Context.getTrivialTypeSourceInfo(FieldType, Loc),
-                        nullptr, false, ICIS_NoInit);
-  // If the variable being captured has an invalid type, mark the lambda class
-  // as invalid as well.
-  if (!FieldType->isDependentType()) {
-    if (S.RequireCompleteType(Loc, FieldType, diag::err_field_incomplete)) {
-      Lambda->setInvalidDecl();
-      Field->setInvalidDecl();
-    } else {
-      NamedDecl *Def;
-      FieldType->isIncompleteType(&Def);
-      if (Def && Def->isInvalidDecl()) {
-        Lambda->setInvalidDecl();
-        Field->setInvalidDecl();
-      }
-    }
-  }
-  Field->setImplicit(true);
-  Field->setAccess(AS_private);
-  Lambda->addDecl(Field);
-}
-
 /// Capture the given variable in the lambda.
 static bool captureInLambda(LambdaScopeInfo *LSI,
                             VarDecl *Var,
@@ -15504,11 +15438,6 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
     }
   }
 
-  // Capture this variable in the lambda.
-  if (BuildAndDiagnose && !Invalid)
-    addAsFieldToClosureType(S, LSI, CaptureType, DeclRefType, Loc,
-                            RefersToCapturedVariable);
-
   // Compute the type of a reference to this captured variable.
   if (ByRef)
     DeclRefType = CaptureType.getNonReferenceType();
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index ef27fc2d71913..00b158debc5a2 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1135,39 +1135,15 @@ Sema::CXXThisScopeRAII::~CXXThisScopeRAII() {
 }
 
 static Expr *captureThis(Sema &S, ASTContext &Context, RecordDecl *RD,
-                         QualType ThisTy, SourceLocation Loc,
-                         const bool ByCopy) {
-
-  QualType AdjustedThisTy = ThisTy;
-  // The type of the corresponding data member (not a 'this' pointer if 'by
-  // copy').
-  QualType CaptureThisFieldTy = ThisTy;
-  if (ByCopy) {
-    // If we are capturing the object referred to by '*this' by copy, ignore any
-    // cv qualifiers inherited from the type of the member function for the type
-    // of the closure-type's corresponding data member and any use of 'this'.
-    CaptureThisFieldTy = ThisTy->getPointeeType();
-    CaptureThisFieldTy.removeLocalCVRQualifiers(Qualifiers::CVRMask);
-    AdjustedThisTy = Context.getPointerType(CaptureThisFieldTy);
-  }
-
-  FieldDecl *Field = FieldDecl::Create(
-      Context, RD, Loc, Loc, nullptr, CaptureThisFieldTy,
-      Context.getTrivialTypeSourceInfo(CaptureThisFieldTy, Loc), nullptr, false,
-      ICIS_NoInit);
-
-  Field->setImplicit(true);
-  Field->setAccess(AS_private);
-  RD->addDecl(Field);
-  Expr *This =
-      new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/ true);
+                         QualType ThisTy, QualType CaptureType,
+                         SourceLocation Loc, const bool ByCopy) {
+  Expr *This = new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/ true);
   if (ByCopy) {
-    Expr *StarThis =  S.CreateBuiltinUnaryOp(Loc,
-                                      UO_Deref,
-                                      This).get();
-    InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
-      nullptr, CaptureThisFieldTy, Loc);
-    InitializationKind InitKind = InitializationKind::CreateDirect(Loc, Loc, Loc);
+    Expr *StarThis = S.CreateBuiltinUnaryOp(Loc, UO_Deref, This).get();
+    InitializedEntity Entity =
+        InitializedEntity::InitializeLambdaCapture(nullptr, CaptureType, Loc);
+    InitializationKind InitKind =
+        InitializationKind::CreateDirect(Loc, Loc, Loc);
     InitializationSequence Init(S, Entity, InitKind, StarThis);
     ExprResult ER = Init.Perform(S, Entity, InitKind, StarThis);
     if (ER.isInvalid()) return nullptr;
@@ -1273,21 +1249,32 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[idx]);
     Expr *ThisExpr = nullptr;
 
+    // The type of the corresponding data member (not a 'this' pointer if 'by
+    // copy').
+    QualType CaptureType = ThisTy;
+    if (ByCopy) {
+      // If we are capturing the object referred to by '*this' by copy, ignore
+      // any cv qualifiers inherited from the type of the member function for
+      // the type of the closure-type's corresponding data member and any use
+      // of 'this'.
+      CaptureType = ThisTy->getPointeeType();
+      CaptureType.removeLocalCVRQualifiers(Qualifiers::CVRMask);
+    }
+
     if (LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI)) {
       // For lambda expressions, build a field and an initializing expression,
       // and capture the *enclosing object* by copy only if this is the first
       // iteration.
-      ThisExpr = captureThis(*this, Context, LSI->Lambda, ThisTy, Loc,
-                             ByCopy && idx == MaxFunctionScopesIndex);
+      ThisExpr = captureThis(*this, Context, LSI->Lambda, ThisTy, CaptureType,
+                             Loc, ByCopy && idx == MaxFunctionScopesIndex);
 
     } else if (CapturedRegionScopeInfo *RSI
         = dyn_cast<CapturedRegionScopeInfo>(FunctionScopes[idx]))
-      ThisExpr =
-          captureThis(*this, Context, RSI->TheRecordDecl, ThisTy, Loc,
-                      false/*ByCopy*/);
+      ThisExpr = captureThis(*this, Context, RSI->TheRecordDecl, ThisTy,
+                             CaptureType, Loc, false /*ByCopy*/);
 
     bool isNested = NumCapturingClosures > 1;
-    CSI->addThisCapture(isNested, Loc, ThisExpr, ByCopy);
+    CSI->addThisCapture(isNested, Loc, CaptureType, ThisExpr, ByCopy);
   }
   return false;
 }
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index f6c9dee2a0819..6d487cc832513 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -843,19 +843,10 @@ VarDecl *Sema::createLambdaInitCaptureVarDecl(SourceLocation Loc,
   return NewVD;
 }
 
-FieldDecl *Sema::buildInitCaptureField(LambdaScopeInfo *LSI, VarDecl *Var) {
-  FieldDecl *Field = FieldDecl::Create(
-      Context, LSI->Lambda, Var->getLocation(), Var->getLocation(),
-      nullptr, Var->getType(), Var->getTypeSourceInfo(), nullptr, false,
-      ICIS_NoInit);
-  Field->setImplicit(true);
-  Field->setAccess(AS_private);
-  LSI->Lambda->addDecl(Field);
-
+void Sema::addInitCapture(LambdaScopeInfo *LSI, VarDecl *Var) {
   LSI->addCapture(Var, /*isBlock*/false, Var->getType()->isReferenceType(),
                   /*isNested*/false, Var->getLocation(), SourceLocation(),
                   Var->getType(), Var->getInit(), /*Invalid*/false);
-  return Field;
 }
 
 void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
@@ -1182,7 +1173,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
     }
 
     if (C->Init.isUsable()) {
-      buildInitCaptureField(LSI, Var);
+      addInitCapture(LSI, Var);
     } else {
       TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef :
                                                    TryCapture_ExplicitByVal;
@@ -1539,6 +1530,54 @@ bool Sema::DiagnoseUnusedLambdaCapture(SourceRange CaptureRange,
   return true;
 }
 
+/// Create a field within the lambda class or captured statement record for the
+/// given capture.
+FieldDecl *Sema::BuildCaptureField(RecordDecl *RD,
+                                   const sema::Capture &Capture) {
+  SourceLocation Loc = Capture.getLocation();
+  QualType FieldType = Capture.getCaptureType();
+
+  TypeSourceInfo *TSI = nullptr;
+  if (Capture.isVariableCapture()) {
+    auto *Var = Capture.getVariable();
+    if (Var->isInitCapture())
+      TSI = Capture.getVariable()->getTypeSourceInfo();
+  }
+
+  // FIXME: Should we really be doing this? A null TypeSourceInfo seems more
+  // appropriate, at least for an implicit capture.
+  if (!TSI)
+    TSI = Context.getTrivialTypeSourceInfo(FieldType, Loc);
+
+  // Build the non-static data member.
+  FieldDecl *Field =
+      FieldDecl::Create(Context, RD, Loc, Loc, nullptr, FieldType, TSI, nullptr,
+                        false, ICIS_NoInit);
+  // If the variable being captured has an invalid type, mark the class as
+  // invalid as well.
+  if (!FieldType->isDependentType()) {
+    if (RequireCompleteType(Loc, FieldType, diag::err_field_incomplete)) {
+      RD->setInvalidDecl();
+      Field->setInvalidDecl();
+    } else {
+      NamedDecl *Def;
+      FieldType->isIncompleteType(&Def);
+      if (Def && Def->isInvalidDecl()) {
+        RD->setInvalidDecl();
+        Field->setInvalidDecl();
+      }
+    }
+  }
+  Field->setImplicit(true);
+  Field->setAccess(AS_private);
+  RD->addDecl(Field);
+
+  if (Capture.isVLATypeCapture())
+    Field->setCapturedVLAType(Capture.getCapturedVLAType());
+
+  return Field;
+}
+
 ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
                                  LambdaScopeInfo *LSI) {
   // Collect information from the lambda scope.
@@ -1576,14 +1615,12 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
 
     PopExpressionEvaluationContext();
 
-    // Translate captures.
-    auto CurField = Class->field_begin();
     // True if the current capture has a used capture or default before it.
     bool CurHasPreviousCapture = CaptureDefault != LCD_None;
     SourceLocation PrevCaptureLoc = CurHasPreviousCapture ?
         CaptureDefaultLoc : IntroducerRange.getBegin();
 
-    for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I, ++CurField) {
+    for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I) {
       const Capture &From = LSI->Captures[I];
 
       if (From.isInvalid())
@@ -1626,6 +1663,9 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
         PrevCaptureLoc = CaptureRange.getEnd();
       }
 
+      // Add a FieldDecl for the capture.
+      FieldDecl *Field = BuildCaptureField(Class, From);
+
       // Handle 'this' capture.
       if (From.isThisCapture()) {
         // Capturing 'this' implicitly with a default of '[=]' is deprecated,
@@ -1659,7 +1699,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       Expr *Init = From.getInitExpr();
       if (!Init) {
         auto InitResult = performLambdaVarCaptureInitialization(
-            *this, From, *CurField, CaptureDefaultLoc, IsImplicit);
+            *this, From, Field, CaptureDefaultLoc, IsImplicit);
         if (InitResult.isInvalid())
           return ExprError();
         Init = InitResult.get();
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 51a72c618b7de..357e257abe096 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4224,13 +4224,16 @@ Sema::CreateCapturedStmtRecordDecl(CapturedDecl *&CD, SourceLocation Loc,
 }
 
 static void
-buildCapturedStmtCaptureList(SmallVectorImpl<CapturedStmt::Capture> &Captures,
-                             SmallVectorImpl<Expr *> &CaptureInits,
-                             ArrayRef<sema::Capture> Candidates) {
-  for (const sema::Capture &Cap : Candidates) {
+buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
+                             SmallVectorImpl<CapturedStmt::Capture> &Captures,
+                             SmallVectorImpl<Expr *> &CaptureInits) {
+  for (const sema::Capture &Cap : RSI->Captures) {
     if (Cap.isInvalid())
       continue;
 
+    // Create a field for this capture.
+    FieldDecl *Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
+
     if (Cap.isThisCapture()) {
       Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                                CapturedStmt::VCK_This));
@@ -4243,6 +4246,8 @@ buildCapturedStmtCaptureList(SmallVectorImpl<CapturedStmt::Capture> &Captures,
       continue;
     }
 
+    if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
+      S.setOpenMPCaptureKind(Field, Cap.getVariable(), RSI->OpenMPLevel);
     Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                              Cap.isReferenceCapture()
                                                  ? CapturedStmt::VCK_ByRef
@@ -4360,7 +4365,7 @@ StmtResult Sema::ActOnCapturedRegionEnd(Stmt *S) {
 
   SmallVector<CapturedStmt::Capture, 4> Captures;
   SmallVector<Expr *, 4> CaptureInits;
-  buildCapturedStmtCaptureList(Captures, CaptureInits, RSI->Captures);
+  buildCapturedStmtCaptureList(*this, RSI, Captures, CaptureInits);
 
   CapturedDecl *CD = RSI->TheCapturedDecl;
   RecordDecl *RD = RSI->TheRecordDecl;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index b5114eeef3011..6620885f23961 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -11382,7 +11382,7 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
           break;
         }
         NewVDs.push_back(NewVD);
-        getSema().buildInitCaptureField(LSI, NewVD);
+        getSema().addInitCapture(LSI, NewVD);
       }
 
       if (Invalid)
diff --git a/clang/test/AST/ast-dump-expr.cpp b/clang/test/AST/ast-dump-expr.cpp
index 96d3df34cb47f..693dd573079f3 100644
--- a/clang/test/AST/ast-dump-expr.cpp
+++ b/clang/test/AST/ast-dump-expr.cpp
@@ -252,9 +252,9 @@ void PrimaryExpressions(Ts... a) {
       // CHECK-NEXT: CopyAssignment
       // CHECK-NEXT: MoveAssignment
       // CHECK-NEXT: Destructor
-      // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V *'
       // CHECK-NEXT: CXXMethodDecl
       // CHECK-NEXT: CompoundStmt
+      // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V *'
       // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' implicit this
 
       [*this]{};
@@ -267,9 +267,9 @@ void PrimaryExpressions(Ts... a) {
       // CHECK-NEXT: CopyAssignment
       // CHECK-NEXT: MoveAssignment
       // CHECK-NEXT: Destructor
-      // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V'
       // CHECK-NEXT: CXXMethodDecl
       // CHECK-NEXT: CompoundStmt
+      // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V'
       // CHECK-NEXT: ParenListExpr 0x{{[^ ]*}} <col:8> 'NULL TYPE'
       // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} <col:8> '<dependent type>' prefix '*' cannot overflow
       // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' implicit this
@@ -322,9 +322,9 @@ void PrimaryExpressions(Ts... a) {
   // CHECK-NEXT: CopyAssignment
   // CHECK-NEXT: MoveAssignment
   // CHECK-NEXT: Destructor
-  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'Ts...'
   // CHECK-NEXT: CXXMethodDecl 0x{{[^ ]*}} <col:8, col:10> col:3 operator() 'auto () const -> auto' inline
   // CHECK-NEXT: CompoundStmt
+  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'Ts...'
   // CHECK-NEXT: ParenListExpr 0x{{[^ ]*}} <col:4> 'NULL TYPE'
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:4> 'Ts...' lvalue ParmVar 0x{{[^ ]*}} 'a' 'Ts...'
   // CHECK-NEXT: CompoundStmt 0x{{[^ ]*}} <col:9, col:10>
@@ -403,8 +403,6 @@ void PrimaryExpressions(Ts... a) {
   // CHECK-NEXT: CopyAssignment
   // CHECK-NEXT: MoveAssignment
   // CHECK-NEXT: Destructor
-  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'int'
-  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'int &'
   // CHECK-NEXT: CXXMethodDecl 0x{{[^ ]*}} <col:9, col:26> col:3 operator() 'auto () const -> auto' inline
   // CHECK-NEXT: CompoundStmt
   // CHECK-NEXT: ReturnStmt 0x{{[^ ]*}} <col:12, col:23>
@@ -413,6 +411,8 @@ void PrimaryExpressions(Ts... a) {
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:19> 'const int' lvalue Var 0x{{[^ ]*}} 'b' 'int'
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:23> 'int' lvalue Var 0x{{[^ ]*}} 'c' 'int'
+  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'int'
+  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'int &'
   // CHECK-NEXT: ImplicitCastExpr
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:4> 'int' lvalue Var 0x{{[^ ]*}} 'b' 'int'
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:8> 'int' lvalue Var 0x{{[^ ]*}} 'c' 'int'
@@ -434,10 +434,10 @@ void PrimaryExpressions(Ts... a) {
   // CHECK-NEXT: CopyAssignment
   // CHECK-NEXT: MoveAssignment
   // CHECK-NEXT: Destructor
-  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'Ts...'
-  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:10> col:10 implicit 'int':'int'
   // CHECK-NEXT: CXXMethodDecl 0x{{[^ ]*}} <col:16, col:18> col:3 operator() 'auto () const -> auto' inline
   // CHECK-NEXT: CompoundStmt
+  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:4> col:4 implicit 'Ts...'
+  // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:10> col:10 implicit 'int':'int'
   // CHECK-NEXT: ParenListExpr 0x{{[^ ]*}} <col:4> 'NULL TYPE'
   // CHECK-NEXT: DeclRefExpr 0x{{[^ ]*}} <col:4> 'Ts...' lvalue ParmVar 0x{{[^ ]*}} 'a' 'Ts...'
   // CHECK-NEXT: IntegerLiteral 0x{{[^ ]*}} <col:14> 'int' 12

From 41d5fdfa9140b4228f4ccb8946ccdf87cf877667 Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Tue, 28 May 2019 23:13:55 +0000
Subject: [PATCH 0424/1176] Fix test that used raw string literals. Doesn't
 work in C++03

llvm-svn: 361894
---
 .../std/re/re.regex/re.regex.construct/bad_range.pass.cpp     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
index 198cea87d142b..590c10f94dff7 100644
--- a/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/bad_range.pass.cpp
@@ -31,8 +31,8 @@ static bool error_range_thrown(const char *pat)
 
 int main(int, char**)
 {
-    assert(error_range_thrown(R"([\w-a])"));
-    assert(error_range_thrown(R"([a-\w])"));
+    assert(error_range_thrown("([\\w-a])"));
+    assert(error_range_thrown("([a-\\w])"));
 
   return 0;
 }

From d103bc31d74e54438e78332a17c5ddeb334927f7 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 28 May 2019 23:20:52 +0000
Subject: [PATCH 0425/1176] Fix r361893 to also update a recently-added test.

llvm-svn: 361895
---
 clang/test/AST/ast-dump-expr-json.cpp | 326 +++++++++++++-------------
 1 file changed, 163 insertions(+), 163 deletions(-)

diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index fa6d6ed381575..90a3bb0734221 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -3854,31 +3854,6 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                  "inner": [
 // CHECK-NEXT:                   {
 // CHECK-NEXT:                    "id": "0x{{.*}}",
-// CHECK-NEXT:                    "kind": "FieldDecl",
-// CHECK-NEXT:                    "loc": {
-// CHECK-NEXT:                     "col": 8,
-// CHECK-NEXT:                     "file": "{{.*}}",
-// CHECK-NEXT:                     "line": 98
-// CHECK-NEXT:                    },
-// CHECK-NEXT:                    "range": {
-// CHECK-NEXT:                     "begin": {
-// CHECK-NEXT:                      "col": 8,
-// CHECK-NEXT:                      "file": "{{.*}}",
-// CHECK-NEXT:                      "line": 98
-// CHECK-NEXT:                     },
-// CHECK-NEXT:                     "end": {
-// CHECK-NEXT:                      "col": 8,
-// CHECK-NEXT:                      "file": "{{.*}}",
-// CHECK-NEXT:                      "line": 98
-// CHECK-NEXT:                     }
-// CHECK-NEXT:                    },
-// CHECK-NEXT:                    "isImplicit": true,
-// CHECK-NEXT:                    "type": {
-// CHECK-NEXT:                     "qualType": "V *"
-// CHECK-NEXT:                    }
-// CHECK-NEXT:                   },
-// CHECK-NEXT:                   {
-// CHECK-NEXT:                    "id": "0x{{.*}}",
 // CHECK-NEXT:                    "kind": "CXXMethodDecl",
 // CHECK-NEXT:                    "loc": {
 // CHECK-NEXT:                     "col": 7,
@@ -3920,6 +3895,31 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                      }
 // CHECK-NEXT:                     }
 // CHECK-NEXT:                    ]
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "FieldDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 8,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 98
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 98
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "isImplicit": true,
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "V *"
+// CHECK-NEXT:                    }
 // CHECK-NEXT:                   }
 // CHECK-NEXT:                  ]
 // CHECK-NEXT:                 },
@@ -4043,31 +4043,6 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                  "inner": [
 // CHECK-NEXT:                   {
 // CHECK-NEXT:                    "id": "0x{{.*}}",
-// CHECK-NEXT:                    "kind": "FieldDecl",
-// CHECK-NEXT:                    "loc": {
-// CHECK-NEXT:                     "col": 8,
-// CHECK-NEXT:                     "file": "{{.*}}",
-// CHECK-NEXT:                     "line": 99
-// CHECK-NEXT:                    },
-// CHECK-NEXT:                    "range": {
-// CHECK-NEXT:                     "begin": {
-// CHECK-NEXT:                      "col": 8,
-// CHECK-NEXT:                      "file": "{{.*}}",
-// CHECK-NEXT:                      "line": 99
-// CHECK-NEXT:                     },
-// CHECK-NEXT:                     "end": {
-// CHECK-NEXT:                      "col": 8,
-// CHECK-NEXT:                      "file": "{{.*}}",
-// CHECK-NEXT:                      "line": 99
-// CHECK-NEXT:                     }
-// CHECK-NEXT:                    },
-// CHECK-NEXT:                    "isImplicit": true,
-// CHECK-NEXT:                    "type": {
-// CHECK-NEXT:                     "qualType": "V"
-// CHECK-NEXT:                    }
-// CHECK-NEXT:                   },
-// CHECK-NEXT:                   {
-// CHECK-NEXT:                    "id": "0x{{.*}}",
 // CHECK-NEXT:                    "kind": "CXXMethodDecl",
 // CHECK-NEXT:                    "loc": {
 // CHECK-NEXT:                     "col": 7,
@@ -4109,6 +4084,31 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                      }
 // CHECK-NEXT:                     }
 // CHECK-NEXT:                    ]
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   {
+// CHECK-NEXT:                    "id": "0x{{.*}}",
+// CHECK-NEXT:                    "kind": "FieldDecl",
+// CHECK-NEXT:                    "loc": {
+// CHECK-NEXT:                     "col": 8,
+// CHECK-NEXT:                     "file": "{{.*}}",
+// CHECK-NEXT:                     "line": 99
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                      "col": 8,
+// CHECK-NEXT:                      "file": "{{.*}}",
+// CHECK-NEXT:                      "line": 99
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                    },
+// CHECK-NEXT:                    "isImplicit": true,
+// CHECK-NEXT:                    "type": {
+// CHECK-NEXT:                     "qualType": "V"
+// CHECK-NEXT:                    }
 // CHECK-NEXT:                   }
 // CHECK-NEXT:                  ]
 // CHECK-NEXT:                 },
@@ -4811,31 +4811,6 @@ void TestNonADLCall3() {
 // CHECK-NEXT:          "inner": [
 // CHECK-NEXT:           {
 // CHECK-NEXT:            "id": "0x{{.*}}",
-// CHECK-NEXT:            "kind": "FieldDecl",
-// CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 4,
-// CHECK-NEXT:             "file": "{{.*}}",
-// CHECK-NEXT:             "line": 107
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "range": {
-// CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 4,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 107
-// CHECK-NEXT:             },
-// CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 4,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 107
-// CHECK-NEXT:             }
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "isImplicit": true,
-// CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "qualType": "Ts..."
-// CHECK-NEXT:            }
-// CHECK-NEXT:           },
-// CHECK-NEXT:           {
-// CHECK-NEXT:            "id": "0x{{.*}}",
 // CHECK-NEXT:            "kind": "CXXMethodDecl",
 // CHECK-NEXT:            "loc": {
 // CHECK-NEXT:             "col": 3,
@@ -4877,6 +4852,31 @@ void TestNonADLCall3() {
 // CHECK-NEXT:              }
 // CHECK-NEXT:             }
 // CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 107
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 107
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "Ts..."
+// CHECK-NEXT:            }
 // CHECK-NEXT:           }
 // CHECK-NEXT:          ]
 // CHECK-NEXT:         },
@@ -5799,56 +5799,6 @@ void TestNonADLCall3() {
 // CHECK-NEXT:          "inner": [
 // CHECK-NEXT:           {
 // CHECK-NEXT:            "id": "0x{{.*}}",
-// CHECK-NEXT:            "kind": "FieldDecl",
-// CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 4,
-// CHECK-NEXT:             "file": "{{.*}}",
-// CHECK-NEXT:             "line": 112
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "range": {
-// CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 4,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 112
-// CHECK-NEXT:             },
-// CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 4,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 112
-// CHECK-NEXT:             }
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "isImplicit": true,
-// CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "qualType": "int"
-// CHECK-NEXT:            }
-// CHECK-NEXT:           },
-// CHECK-NEXT:           {
-// CHECK-NEXT:            "id": "0x{{.*}}",
-// CHECK-NEXT:            "kind": "FieldDecl",
-// CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 8,
-// CHECK-NEXT:             "file": "{{.*}}",
-// CHECK-NEXT:             "line": 112
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "range": {
-// CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 8,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 112
-// CHECK-NEXT:             },
-// CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 8,
-// CHECK-NEXT:              "file": "{{.*}}",
-// CHECK-NEXT:              "line": 112
-// CHECK-NEXT:             }
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "isImplicit": true,
-// CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "qualType": "int &"
-// CHECK-NEXT:            }
-// CHECK-NEXT:           },
-// CHECK-NEXT:           {
-// CHECK-NEXT:            "id": "0x{{.*}}",
 // CHECK-NEXT:            "kind": "CXXMethodDecl",
 // CHECK-NEXT:            "loc": {
 // CHECK-NEXT:             "col": 3,
@@ -6035,6 +5985,56 @@ void TestNonADLCall3() {
 // CHECK-NEXT:              ]
 // CHECK-NEXT:             }
 // CHECK-NEXT:            ]
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 112
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
+// CHECK-NEXT:           },
+// CHECK-NEXT:           {
+// CHECK-NEXT:            "id": "0x{{.*}}",
+// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "loc": {
+// CHECK-NEXT:             "col": 8,
+// CHECK-NEXT:             "file": "{{.*}}",
+// CHECK-NEXT:             "line": 112
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "range": {
+// CHECK-NEXT:             "begin": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             },
+// CHECK-NEXT:             "end": {
+// CHECK-NEXT:              "col": 8,
+// CHECK-NEXT:              "file": "{{.*}}",
+// CHECK-NEXT:              "line": 112
+// CHECK-NEXT:             }
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "type": {
+// CHECK-NEXT:             "qualType": "int &"
+// CHECK-NEXT:            }
 // CHECK-NEXT:           }
 // CHECK-NEXT:          ]
 // CHECK-NEXT:         },
@@ -6359,98 +6359,98 @@ void TestNonADLCall3() {
 // CHECK-NEXT:          "inner": [
 // CHECK-NEXT:           {
 // CHECK-NEXT:            "id": "0x{{.*}}",
-// CHECK-NEXT:            "kind": "FieldDecl",
+// CHECK-NEXT:            "kind": "CXXMethodDecl",
 // CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 4,
+// CHECK-NEXT:             "col": 3,
 // CHECK-NEXT:             "file": "{{.*}}",
 // CHECK-NEXT:             "line": 113
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "range": {
 // CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "col": 16,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             },
 // CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 4,
+// CHECK-NEXT:              "col": 18,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             }
 // CHECK-NEXT:            },
-// CHECK-NEXT:            "isImplicit": true,
+// CHECK-NEXT:            "name": "operator()",
 // CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "qualType": "Ts..."
-// CHECK-NEXT:            }
+// CHECK-NEXT:             "qualType": "auto () const -> auto"
+// CHECK-NEXT:            },
+// CHECK-NEXT:            "inline": true,
+// CHECK-NEXT:            "inner": [
+// CHECK-NEXT:             {
+// CHECK-NEXT:              "id": "0x{{.*}}",
+// CHECK-NEXT:              "kind": "CompoundStmt",
+// CHECK-NEXT:              "range": {
+// CHECK-NEXT:               "begin": {
+// CHECK-NEXT:                "col": 17,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 113
+// CHECK-NEXT:               },
+// CHECK-NEXT:               "end": {
+// CHECK-NEXT:                "col": 18,
+// CHECK-NEXT:                "file": "{{.*}}",
+// CHECK-NEXT:                "line": 113
+// CHECK-NEXT:               }
+// CHECK-NEXT:              }
+// CHECK-NEXT:             }
+// CHECK-NEXT:            ]
 // CHECK-NEXT:           },
 // CHECK-NEXT:           {
 // CHECK-NEXT:            "id": "0x{{.*}}",
 // CHECK-NEXT:            "kind": "FieldDecl",
 // CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 10,
+// CHECK-NEXT:             "col": 4,
 // CHECK-NEXT:             "file": "{{.*}}",
 // CHECK-NEXT:             "line": 113
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "range": {
 // CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "col": 4,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             },
 // CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 10,
+// CHECK-NEXT:              "col": 4,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             }
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "isImplicit": true,
 // CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "desugaredQualType": "int",
-// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:             "qualType": "Ts..."
 // CHECK-NEXT:            }
 // CHECK-NEXT:           },
 // CHECK-NEXT:           {
 // CHECK-NEXT:            "id": "0x{{.*}}",
-// CHECK-NEXT:            "kind": "CXXMethodDecl",
+// CHECK-NEXT:            "kind": "FieldDecl",
 // CHECK-NEXT:            "loc": {
-// CHECK-NEXT:             "col": 3,
+// CHECK-NEXT:             "col": 10,
 // CHECK-NEXT:             "file": "{{.*}}",
 // CHECK-NEXT:             "line": 113
 // CHECK-NEXT:            },
 // CHECK-NEXT:            "range": {
 // CHECK-NEXT:             "begin": {
-// CHECK-NEXT:              "col": 16,
+// CHECK-NEXT:              "col": 10,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             },
 // CHECK-NEXT:             "end": {
-// CHECK-NEXT:              "col": 18,
+// CHECK-NEXT:              "col": 10,
 // CHECK-NEXT:              "file": "{{.*}}",
 // CHECK-NEXT:              "line": 113
 // CHECK-NEXT:             }
 // CHECK-NEXT:            },
-// CHECK-NEXT:            "name": "operator()",
+// CHECK-NEXT:            "isImplicit": true,
 // CHECK-NEXT:            "type": {
-// CHECK-NEXT:             "qualType": "auto () const -> auto"
-// CHECK-NEXT:            },
-// CHECK-NEXT:            "inline": true,
-// CHECK-NEXT:            "inner": [
-// CHECK-NEXT:             {
-// CHECK-NEXT:              "id": "0x{{.*}}",
-// CHECK-NEXT:              "kind": "CompoundStmt",
-// CHECK-NEXT:              "range": {
-// CHECK-NEXT:               "begin": {
-// CHECK-NEXT:                "col": 17,
-// CHECK-NEXT:                "file": "{{.*}}",
-// CHECK-NEXT:                "line": 113
-// CHECK-NEXT:               },
-// CHECK-NEXT:               "end": {
-// CHECK-NEXT:                "col": 18,
-// CHECK-NEXT:                "file": "{{.*}}",
-// CHECK-NEXT:                "line": 113
-// CHECK-NEXT:               }
-// CHECK-NEXT:              }
-// CHECK-NEXT:             }
-// CHECK-NEXT:            ]
+// CHECK-NEXT:             "desugaredQualType": "int",
+// CHECK-NEXT:             "qualType": "int"
+// CHECK-NEXT:            }
 // CHECK-NEXT:           }
 // CHECK-NEXT:          ]
 // CHECK-NEXT:         },

From 88aed8da61f1756717503dd2953ccef01a7a3c4a Mon Sep 17 00:00:00 2001
From: Alexander Shaposhnikov <shal1t712@gmail.com>
Date: Tue, 28 May 2019 23:22:12 +0000
Subject: [PATCH 0426/1176] [tools] Introduce llvm-lipo

This diff starts the implementation of llvm-lipo
which is supposed to be a drop-in replacement for the well-known tool lipo.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D61927

llvm-svn: 361896
---
 llvm/test/CMakeLists.txt                      |   1 +
 llvm/test/tools/llvm-lipo/help-message.test   |  13 ++
 .../llvm-lipo/verify-arch-macho-binary.test   |  31 +++
 .../verify-arch-universal-binary.test         |  44 +++++
 llvm/tools/llvm-lipo/CMakeLists.txt           |  16 ++
 llvm/tools/llvm-lipo/LLVMBuild.txt            |  20 ++
 llvm/tools/llvm-lipo/LipoOpts.td              |  10 +
 llvm/tools/llvm-lipo/llvm-lipo.cpp            | 185 ++++++++++++++++++
 8 files changed, 320 insertions(+)
 create mode 100644 llvm/test/tools/llvm-lipo/help-message.test
 create mode 100644 llvm/test/tools/llvm-lipo/verify-arch-macho-binary.test
 create mode 100644 llvm/test/tools/llvm-lipo/verify-arch-universal-binary.test
 create mode 100644 llvm/tools/llvm-lipo/CMakeLists.txt
 create mode 100644 llvm/tools/llvm-lipo/LLVMBuild.txt
 create mode 100644 llvm/tools/llvm-lipo/LipoOpts.td
 create mode 100644 llvm/tools/llvm-lipo/llvm-lipo.cpp

diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index f596602c8268d..03d154e39d02b 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -72,6 +72,7 @@ set(LLVM_TEST_DEPENDS
           llvm-jitlink
           llvm-lib
           llvm-link
+          llvm-lipo
           llvm-lto2
           llvm-mc
           llvm-mca
diff --git a/llvm/test/tools/llvm-lipo/help-message.test b/llvm/test/tools/llvm-lipo/help-message.test
new file mode 100644
index 0000000000000..8a9d48b6cc2a5
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/help-message.test
@@ -0,0 +1,13 @@
+# RUN: llvm-lipo -h | FileCheck --check-prefix=LIPO-USAGE %s
+# RUN: llvm-lipo --help | FileCheck --check-prefix=LIPO-USAGE %s
+
+# RUN: llvm-lipo -version | FileCheck --check-prefix=LIPO-VERSION %s
+# RUN: llvm-lipo --version | FileCheck --check-prefix=LIPO-VERSION %s
+
+# RUN: not llvm-lipo 2>&1 | FileCheck --check-prefix=LIPO-USAGE %s
+# RUN: not llvm-lipo -abcabc 2>&1 | FileCheck --check-prefix=LIPO-UNKNOWN-ARG %s
+# RUN: not llvm-lipo --abcabc 2>&1 | FileCheck --check-prefix=LIPO-UNKNOWN-ARG %s
+
+# LIPO-USAGE:    USAGE: llvm-lipo
+# LIPO-UNKNOWN-ARG:    unknown argument '{{-+}}abcabc'
+# LIPO-VERSION: {{ version }}
diff --git a/llvm/test/tools/llvm-lipo/verify-arch-macho-binary.test b/llvm/test/tools/llvm-lipo/verify-arch-macho-binary.test
new file mode 100644
index 0000000000000..1c0a7b8c93a0a
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/verify-arch-macho-binary.test
@@ -0,0 +1,31 @@
+# RUN: yaml2obj %s > %t
+
+# RUN: llvm-lipo %t -verify_arch i386
+# RUN: llvm-lipo %t --verify_arch i386
+
+# RUN: not llvm-lipo %t -verify_arch aarch64
+# RUN: not llvm-lipo %t -verify_arch aarch64 i386 
+
+# INVALID_ARCH: Invalid architecture: aarch101
+# RUN: not llvm-lipo %t -verify_arch aarch101 2>&1 | FileCheck --check-prefix=INVALID_ARCH %s
+
+# INVALID_OBJ: The file was not recognized as a valid object file
+# RUN: touch %t.empty
+# RUN: not llvm-lipo %t.empty -verify_arch aarch101 2>&1 | FileCheck --check-prefix=INVALID_OBJ %s
+
+# NO_INPUT_OBJ: at least one input file should be specified
+# RUN: not llvm-lipo -verify_arch i386 2>&1 | FileCheck --check-prefix=NO_INPUT_OBJ %s
+
+# MULTIPLE_INPUT_OBJ: verify_arch expects a single input file
+# RUN: not llvm-lipo %t %t -verify_arch i386 2>&1 | FileCheck --check-prefix=MULTIPLE_INPUT_OBJ %s
+
+--- !mach-o
+FileHeader:      
+  magic:           0xFEEDFACE
+  cputype:         0x00000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           0
+  sizeofcmds:      0
+  flags:           0x00002000
+...
diff --git a/llvm/test/tools/llvm-lipo/verify-arch-universal-binary.test b/llvm/test/tools/llvm-lipo/verify-arch-universal-binary.test
new file mode 100644
index 0000000000000..128813b6f76fa
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/verify-arch-universal-binary.test
@@ -0,0 +1,44 @@
+# RUN: yaml2obj %s > %t
+
+# RUN: llvm-lipo %t -verify_arch i386
+# RUN: llvm-lipo %t -verify_arch i386 x86_64
+
+# RUN: not llvm-lipo %t -verify_arch aarch64
+# RUN: not llvm-lipo %t -verify_arch aarch64 i386 
+
+--- !fat-mach-o
+FatHeader:       
+  magic:           0xCAFEBABE
+  nfat_arch:       2
+FatArchs:        
+  - cputype:         0x00000007
+    cpusubtype:      0x00000003
+    offset:          0x0000000000001000
+    size:            28
+    align:           12
+  - cputype:         0x01000007
+    cpusubtype:      0x00000003
+    offset:          0x0000000000002000
+    size:            32
+    align:           12
+Slices:          
+  - !mach-o
+    FileHeader:      
+      magic:           0xFEEDFACE
+      cputype:         0x00000007
+      cpusubtype:      0x00000003
+      filetype:        0x00000001
+      ncmds:           0
+      sizeofcmds:      0
+      flags:           0x00002000
+  - !mach-o
+    FileHeader:      
+      magic:           0xFEEDFACF
+      cputype:         0x01000007
+      cpusubtype:      0x00000003
+      filetype:        0x00000001
+      ncmds:           0
+      sizeofcmds:      0
+      flags:           0x00002000
+      reserved:        0x00000000
+...
diff --git a/llvm/tools/llvm-lipo/CMakeLists.txt b/llvm/tools/llvm-lipo/CMakeLists.txt
new file mode 100644
index 0000000000000..b51f792223cbc
--- /dev/null
+++ b/llvm/tools/llvm-lipo/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  Object
+  Option
+  Support
+)
+
+set(LLVM_TARGET_DEFINITIONS LipoOpts.td)
+tablegen(LLVM LipoOpts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(LipoOptsTableGen)
+
+add_llvm_tool(llvm-lipo
+  llvm-lipo.cpp
+  DEPENDS
+  LipoOptsTableGen
+)
diff --git a/llvm/tools/llvm-lipo/LLVMBuild.txt b/llvm/tools/llvm-lipo/LLVMBuild.txt
new file mode 100644
index 0000000000000..fa807add933ab
--- /dev/null
+++ b/llvm/tools/llvm-lipo/LLVMBuild.txt
@@ -0,0 +1,20 @@
+;===- ./tools/llvm-lipo/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+[component_0]
+type = Tool
+name = llvm-lipo
+parent = Tools
+required_libraries = Object Option Support
diff --git a/llvm/tools/llvm-lipo/LipoOpts.td b/llvm/tools/llvm-lipo/LipoOpts.td
new file mode 100644
index 0000000000000..0ff667aafff6e
--- /dev/null
+++ b/llvm/tools/llvm-lipo/LipoOpts.td
@@ -0,0 +1,10 @@
+include "llvm/Option/OptParser.td"
+
+def help : Flag<["-", "--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def version : Flag<["-", "--"], "version">,
+              HelpText<"Print the version and exit.">;
+
+def verify_arch : Option<["-", "--"], "verify_arch", KIND_REMAINING_ARGS>,
+                  HelpText<"Verify that the specified arch_types are present in the input file">;
diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp
new file mode 100644
index 0000000000000..a38d3ae7d0bf1
--- /dev/null
+++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp
@@ -0,0 +1,185 @@
+//===-- llvm-lipo.cpp - a tool for manipulating universal binaries --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A utility for creating / splitting / inspecting universal binaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+static const StringRef ToolName = "llvm-lipo";
+
+LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Message) {
+  WithColor::error(errs(), ToolName) << Message << "\n";
+  errs().flush();
+  exit(EXIT_FAILURE);
+}
+
+LLVM_ATTRIBUTE_NORETURN static void reportError(StringRef File, Error E) {
+  assert(E);
+  std::string Buf;
+  raw_string_ostream OS(Buf);
+  logAllUnhandledErrors(std::move(E), OS);
+  OS.flush();
+  WithColor::error(errs(), ToolName) << "'" << File << "': " << Buf;
+  exit(EXIT_FAILURE);
+}
+
+namespace {
+enum LipoID {
+  LIPO_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  LIPO_##ID,
+#include "LipoOpts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const LIPO_##NAME[] = VALUE;
+#include "LipoOpts.inc"
+#undef PREFIX
+
+static const opt::OptTable::Info LipoInfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELPTEXT, METAVAR, VALUES)                                      \
+  {LIPO_##PREFIX, NAME,      HELPTEXT,                                         \
+   METAVAR,       LIPO_##ID, opt::Option::KIND##Class,                         \
+   PARAM,         FLAGS,     LIPO_##GROUP,                                     \
+   LIPO_##ALIAS,  ALIASARGS, VALUES},
+#include "LipoOpts.inc"
+#undef OPTION
+};
+
+class LipoOptTable : public opt::OptTable {
+public:
+  LipoOptTable() : OptTable(LipoInfoTable) {}
+};
+
+struct Config {
+  SmallVector<std::string, 1> InputFiles;
+  SmallVector<std::string, 1> VerifyArchList;
+};
+
+} // end namespace
+
+static Config parseLipoOptions(ArrayRef<const char *> ArgsArr) {
+  Config C;
+  LipoOptTable T;
+  unsigned MissingArgumentIndex, MissingArgumentCount;
+  llvm::opt::InputArgList InputArgs =
+      T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
+
+  if (InputArgs.size() == 0) {
+    // PrintHelp does not accept Twine.
+    T.PrintHelp(errs(), "llvm-lipo input[s] option[s]", "llvm-lipo");
+    exit(EXIT_FAILURE);
+  }
+
+  if (InputArgs.hasArg(LIPO_help)) {
+    // PrintHelp does not accept Twine.
+    T.PrintHelp(outs(), "llvm-lipo input[s] option[s]", "llvm-lipo");
+    exit(EXIT_SUCCESS);
+  }
+
+  if (InputArgs.hasArg(LIPO_version)) {
+    outs() << ToolName + "\n";
+    cl::PrintVersionMessage();
+    exit(EXIT_SUCCESS);
+  }
+
+  for (auto Arg : InputArgs.filtered(LIPO_UNKNOWN))
+    reportError("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+
+  for (auto Arg : InputArgs.filtered(LIPO_INPUT))
+    C.InputFiles.push_back(Arg->getValue());
+  if (C.InputFiles.empty())
+    reportError("at least one input file should be specified");
+
+  if (InputArgs.hasArg(LIPO_verify_arch)) {
+    for (auto A : InputArgs.getAllArgValues(LIPO_verify_arch))
+      C.VerifyArchList.push_back(A);
+    if (C.VerifyArchList.empty())
+      reportError(
+          "verify_arch requires at least one architecture to be specified");
+    if (C.InputFiles.size() > 1)
+      reportError("verify_arch expects a single input file");
+  }
+  return C;
+}
+
+static SmallVector<OwningBinary<Binary>, 1>
+readInputBinaries(ArrayRef<std::string> InputFiles) {
+  SmallVector<OwningBinary<Binary>, 1> InputBinaries;
+  for (StringRef InputFile : InputFiles) {
+    Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
+        createBinary(InputFile);
+    if (!BinaryOrErr)
+      reportError(InputFile, BinaryOrErr.takeError());
+    if (!isa<MachOObjectFile>(BinaryOrErr->getBinary()) &&
+        !isa<MachOUniversalBinary>(BinaryOrErr->getBinary()))
+      reportError("File " + InputFile + " has unsupported binary format");
+    InputBinaries.push_back(std::move(*BinaryOrErr));
+  }
+  return InputBinaries;
+}
+
+LLVM_ATTRIBUTE_NORETURN
+static void verifyArch(ArrayRef<OwningBinary<Binary>> InputBinaries,
+                       ArrayRef<std::string> VerifyArchList) {
+  assert(!InputBinaries.empty() &&
+         "The list of input binaries should be non-empty");
+  assert(!VerifyArchList.empty() &&
+         "The list of architectures should be non-empty");
+  assert(InputBinaries.size() == 1 && "Incorrect number of input binaries");
+
+  for (StringRef Arch : VerifyArchList)
+    if (Triple(Arch).getArch() == Triple::ArchType::UnknownArch)
+      reportError("Invalid architecture: " + Arch);
+
+  if (auto UO =
+          dyn_cast<MachOUniversalBinary>(InputBinaries.front().getBinary())) {
+    for (StringRef Arch : VerifyArchList) {
+      Expected<std::unique_ptr<MachOObjectFile>> Obj =
+          UO->getObjectForArch(Arch);
+      if (!Obj)
+        exit(EXIT_FAILURE);
+    }
+  } else if (auto O =
+                 dyn_cast<MachOObjectFile>(InputBinaries.front().getBinary())) {
+    const Triple::ArchType ObjectArch = O->getArch();
+    for (StringRef Arch : VerifyArchList)
+      if (ObjectArch != Triple(Arch).getArch())
+        exit(EXIT_FAILURE);
+  } else {
+    llvm_unreachable("Unexpected binary format");
+  }
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv) {
+  InitLLVM X(argc, argv);
+  Config C = parseLipoOptions(makeArrayRef(argv + 1, argc));
+  SmallVector<OwningBinary<Binary>, 1> InputBinaries =
+      readInputBinaries(C.InputFiles);
+  if (!C.VerifyArchList.empty())
+    verifyArch(InputBinaries, C.VerifyArchList);
+  return EXIT_SUCCESS;
+}

From 902f649217efcf51ac852216ac9be54fccc3a9dc Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 28 May 2019 23:26:22 +0000
Subject: [PATCH 0427/1176] [X86] Fix the Sema checks for getmant builtins to
 only allow 4 and 8 for rounding immediates.

These don't support embedded rounding so we shouldn't be setting HasRC. That way we only
allow current direction and suppress all exceptions.

llvm-svn: 361897
---
 clang/lib/Sema/SemaChecking.cpp | 11 ++++-------
 clang/test/Sema/builtins-x86.c  |  8 ++++++++
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f2aa931eeddfa..f3d8f30567d3e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3378,6 +3378,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_cvtss2sd_round_mask:
   case X86::BI__builtin_ia32_getexpsd128_round_mask:
   case X86::BI__builtin_ia32_getexpss128_round_mask:
+  case X86::BI__builtin_ia32_getmantpd512_mask:
+  case X86::BI__builtin_ia32_getmantps512_mask:
   case X86::BI__builtin_ia32_maxsd_round_mask:
   case X86::BI__builtin_ia32_maxss_round_mask:
   case X86::BI__builtin_ia32_minsd_round_mask:
@@ -3400,6 +3402,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_fixupimmsd_maskz:
   case X86::BI__builtin_ia32_fixupimmss_mask:
   case X86::BI__builtin_ia32_fixupimmss_maskz:
+  case X86::BI__builtin_ia32_getmantsd_round_mask:
+  case X86::BI__builtin_ia32_getmantss_round_mask:
   case X86::BI__builtin_ia32_rangepd512_mask:
   case X86::BI__builtin_ia32_rangeps512_mask:
   case X86::BI__builtin_ia32_rangesd128_round_mask:
@@ -3470,8 +3474,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_scalefps512_mask:
   case X86::BI__builtin_ia32_scalefsd_round_mask:
   case X86::BI__builtin_ia32_scalefss_round_mask:
-  case X86::BI__builtin_ia32_getmantpd512_mask:
-  case X86::BI__builtin_ia32_getmantps512_mask:
   case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
   case X86::BI__builtin_ia32_sqrtsd_round_mask:
   case X86::BI__builtin_ia32_sqrtss_round_mask:
@@ -3500,11 +3502,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
     ArgNum = 4;
     HasRC = true;
     break;
-  case X86::BI__builtin_ia32_getmantsd_round_mask:
-  case X86::BI__builtin_ia32_getmantss_round_mask:
-    ArgNum = 5;
-    HasRC = true;
-    break;
   }
 
   llvm::APSInt Result;
diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c
index 9872a64f18740..6a2a47d7792ce 100644
--- a/clang/test/Sema/builtins-x86.c
+++ b/clang/test/Sema/builtins-x86.c
@@ -81,6 +81,14 @@ __mmask16 test__builtin_ia32_cmpps512_mask_rounding(__m512 __a, __m512 __b, __mm
   return __builtin_ia32_cmpps512_mask(__a, __b, 0, __u, 0); // expected-error {{invalid rounding argument}}
 }
 
+__m512 test__builtin_ia32_getmantps512_mask(__m512 a, __m512 b) {
+  return __builtin_ia32_getmantps512_mask(a, 0, b, (__mmask16)-1, 10); // expected-error {{invalid rounding argument}}
+}
+
+__m128 test__builtin_ia32_getmantss_round_mask(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_getmantss_round_mask(a, b, 0, c, (__mmask8)-1, 10); // expected-error {{invalid rounding argument}}
+}
+
 __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i mask) {
   return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}}
 }

From 3da8e5f92073519133086b0c2c972ba028819539 Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Tue, 28 May 2019 23:26:32 +0000
Subject: [PATCH 0428/1176] Fix IPv6 support on lldb-server platform

Summary:
This is a general fix for the ConnectionFileDescriptor class but my main motivation was to make lldb-server working with IPv6.
The connect URI can use square brackets ([]) to wrap the interface part of the URI (e.g.: <scheme>://[<interface>]:<port>). For IPv6 addresses this is a must since its ip can include colons and it will overlap with the port colon otherwise. The URIParser class parses the square brackets correctly but the ConnectionFileDescriptor doesn't generate them for IPv6 addresses making it impossible to connect to the gdb server when using this protocol.

How to reproduce the issue:
```
$ lldb-server p --server --listen [::1]:8080
...
$ lldb
(lldb) platform select remote-macosx
(lldb) platform connect connect://[::1]:8080
(lldb) platform process -p <pid>
error: unable to launch a GDB server on 'computer'
```

The server was actually launched we were just not able to connect to it. With this fix lldb will correctly connect. I fixed this by wrapping the ip portion with [].

Reviewers: labath

Reviewed By: labath

Subscribers: xiaobai, mgorny, jfb, lldb-commits, labath

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D61833

llvm-svn: 361898
---
 .../posix/ConnectionFileDescriptorPosix.cpp   |   2 +-
 lldb/unittests/Host/CMakeLists.txt            |   2 +
 .../Host/ConnectionFileDescriptorTest.cpp     |  50 +++++++++
 lldb/unittests/Host/SocketTest.cpp            |  94 ++--------------
 lldb/unittests/Host/SocketTestUtilities.cpp   | 104 ++++++++++++++++++
 lldb/unittests/Host/SocketTestUtilities.h     |  47 ++++++++
 6 files changed, 212 insertions(+), 87 deletions(-)
 create mode 100644 lldb/unittests/Host/ConnectionFileDescriptorTest.cpp
 create mode 100644 lldb/unittests/Host/SocketTestUtilities.cpp
 create mode 100644 lldb/unittests/Host/SocketTestUtilities.h

diff --git a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
index 167569dca69e9..237d11acb418f 100644
--- a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
+++ b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
@@ -764,7 +764,7 @@ void ConnectionFileDescriptor::InitializeSocket(Socket *socket) {
   m_write_sp.reset(socket);
   m_read_sp = m_write_sp;
   StreamString strm;
-  strm.Printf("connect://%s:%u", tcp_socket->GetRemoteIPAddress().c_str(),
+  strm.Printf("connect://[%s]:%u", tcp_socket->GetRemoteIPAddress().c_str(),
               tcp_socket->GetRemotePortNumber());
   m_uri = strm.GetString();
 }
diff --git a/lldb/unittests/Host/CMakeLists.txt b/lldb/unittests/Host/CMakeLists.txt
index 8c79bcfdc8fef..cf7c7cacfe2bb 100644
--- a/lldb/unittests/Host/CMakeLists.txt
+++ b/lldb/unittests/Host/CMakeLists.txt
@@ -1,4 +1,5 @@
 set (FILES
+  ConnectionFileDescriptorTest.cpp
   FileActionTest.cpp
   FileSystemTest.cpp
   HostInfoTest.cpp
@@ -8,6 +9,7 @@ set (FILES
   ProcessLaunchInfoTest.cpp
   SocketAddressTest.cpp
   SocketTest.cpp
+  SocketTestUtilities.cpp
   TaskPoolTest.cpp
 )
 
diff --git a/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp b/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp
new file mode 100644
index 0000000000000..229e0eebf64a4
--- /dev/null
+++ b/lldb/unittests/Host/ConnectionFileDescriptorTest.cpp
@@ -0,0 +1,50 @@
+//===-- ConnectionFileDescriptorTest.cpp ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SocketTestUtilities.h"
+#include "gtest/gtest.h"
+
+#include "lldb/Host/posix/ConnectionFileDescriptorPosix.h"
+#include "lldb/Utility/UriParser.h"
+
+using namespace lldb_private;
+
+class ConnectionFileDescriptorTest : public testing::Test {
+public:
+  void SetUp() override {
+    ASSERT_THAT_ERROR(Socket::Initialize(), llvm::Succeeded());
+  }
+
+  void TearDown() override { Socket::Terminate(); }
+
+  void TestGetURI(std::string ip) {
+    std::unique_ptr<TCPSocket> socket_a_up;
+    std::unique_ptr<TCPSocket> socket_b_up;
+    if (!IsAddressFamilySupported(ip)) {
+      GTEST_LOG_(WARNING) << "Skipping test due to missing IPv"
+                          << (IsIPv4(ip) ? "4" : "6") << " support.";
+      return;
+    }
+    CreateTCPConnectedSockets(ip, &socket_a_up, &socket_b_up);
+    auto socket = socket_a_up.release();
+    ConnectionFileDescriptor connection_file_descriptor(socket);
+
+    llvm::StringRef scheme;
+    llvm::StringRef hostname;
+    int port;
+    llvm::StringRef path;
+    std::string uri(connection_file_descriptor.GetURI());
+    EXPECT_TRUE(UriParser::Parse(uri, scheme, hostname, port, path));
+    EXPECT_EQ(ip, hostname);
+    EXPECT_EQ(socket->GetRemotePortNumber(), port);
+  }
+};
+
+TEST_F(ConnectionFileDescriptorTest, TCPGetURIv4) { TestGetURI("127.0.0.1"); }
+
+TEST_F(ConnectionFileDescriptorTest, TCPGetURIv6) { TestGetURI("::1"); }
\ No newline at end of file
diff --git a/lldb/unittests/Host/SocketTest.cpp b/lldb/unittests/Host/SocketTest.cpp
index 1192fa65f875e..26a8bd765c6ef 100644
--- a/lldb/unittests/Host/SocketTest.cpp
+++ b/lldb/unittests/Host/SocketTest.cpp
@@ -6,24 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <cstdio>
-#include <functional>
-#include <thread>
-
+#include "SocketTestUtilities.h"
 #include "gtest/gtest.h"
 
-#include "lldb/Host/Config.h"
-#include "lldb/Host/Socket.h"
-#include "lldb/Host/common/TCPSocket.h"
-#include "lldb/Host/common/UDPSocket.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Testing/Support/Error.h"
-
-#ifndef LLDB_DISABLE_POSIX
-#include "lldb/Host/posix/DomainSocket.h"
-#endif
-
 using namespace lldb_private;
 
 class SocketTest : public testing::Test {
@@ -33,55 +18,6 @@ class SocketTest : public testing::Test {
   }
 
   void TearDown() override { Socket::Terminate(); }
-
-protected:
-  static void AcceptThread(Socket *listen_socket,
-                           bool child_processes_inherit, Socket **accept_socket,
-                           Status *error) {
-    *error = listen_socket->Accept(*accept_socket);
-  }
-
-  template <typename SocketType>
-  void CreateConnectedSockets(
-      llvm::StringRef listen_remote_address,
-      const std::function<std::string(const SocketType &)> &get_connect_addr,
-      std::unique_ptr<SocketType> *a_up, std::unique_ptr<SocketType> *b_up) {
-    bool child_processes_inherit = false;
-    Status error;
-    std::unique_ptr<SocketType> listen_socket_up(
-        new SocketType(true, child_processes_inherit));
-    EXPECT_FALSE(error.Fail());
-    error = listen_socket_up->Listen(listen_remote_address, 5);
-    EXPECT_FALSE(error.Fail());
-    EXPECT_TRUE(listen_socket_up->IsValid());
-
-    Status accept_error;
-    Socket *accept_socket;
-    std::thread accept_thread(AcceptThread, listen_socket_up.get(),
-                              child_processes_inherit, &accept_socket,
-                              &accept_error);
-
-    std::string connect_remote_address = get_connect_addr(*listen_socket_up);
-    std::unique_ptr<SocketType> connect_socket_up(
-        new SocketType(true, child_processes_inherit));
-    EXPECT_FALSE(error.Fail());
-    error = connect_socket_up->Connect(connect_remote_address);
-    EXPECT_FALSE(error.Fail());
-    EXPECT_TRUE(connect_socket_up->IsValid());
-
-    a_up->swap(connect_socket_up);
-    EXPECT_TRUE(error.Success());
-    EXPECT_NE(nullptr, a_up->get());
-    EXPECT_TRUE((*a_up)->IsValid());
-
-    accept_thread.join();
-    b_up->reset(static_cast<SocketType *>(accept_socket));
-    EXPECT_TRUE(accept_error.Success());
-    EXPECT_NE(nullptr, b_up->get());
-    EXPECT_TRUE((*b_up)->IsValid());
-
-    listen_socket_up.reset();
-  }
 };
 
 TEST_F(SocketTest, DecodeHostAndPort) {
@@ -159,38 +95,24 @@ TEST_F(SocketTest, DomainListenConnectAccept) {
 
   std::unique_ptr<DomainSocket> socket_a_up;
   std::unique_ptr<DomainSocket> socket_b_up;
-  CreateConnectedSockets<DomainSocket>(
-      Path, [=](const DomainSocket &) { return Path.str().str(); },
-      &socket_a_up, &socket_b_up);
+  CreateDomainConnectedSockets(Path, &socket_a_up, &socket_b_up);
 }
 #endif
 
 TEST_F(SocketTest, TCPListen0ConnectAccept) {
   std::unique_ptr<TCPSocket> socket_a_up;
   std::unique_ptr<TCPSocket> socket_b_up;
-  CreateConnectedSockets<TCPSocket>(
-      "127.0.0.1:0",
-      [=](const TCPSocket &s) {
-        char connect_remote_address[64];
-        snprintf(connect_remote_address, sizeof(connect_remote_address),
-                 "127.0.0.1:%u", s.GetLocalPortNumber());
-        return std::string(connect_remote_address);
-      },
-      &socket_a_up, &socket_b_up);
+  CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up);
 }
 
 TEST_F(SocketTest, TCPGetAddress) {
   std::unique_ptr<TCPSocket> socket_a_up;
   std::unique_ptr<TCPSocket> socket_b_up;
-  CreateConnectedSockets<TCPSocket>(
-      "127.0.0.1:0",
-      [=](const TCPSocket &s) {
-        char connect_remote_address[64];
-        snprintf(connect_remote_address, sizeof(connect_remote_address),
-                 "127.0.0.1:%u", s.GetLocalPortNumber());
-        return std::string(connect_remote_address);
-      },
-      &socket_a_up, &socket_b_up);
+  if (!IsAddressFamilySupported("127.0.0.1")) {
+    GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support.";
+    return;
+  }
+  CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up);
 
   EXPECT_EQ(socket_a_up->GetLocalPortNumber(),
             socket_b_up->GetRemotePortNumber());
diff --git a/lldb/unittests/Host/SocketTestUtilities.cpp b/lldb/unittests/Host/SocketTestUtilities.cpp
new file mode 100644
index 0000000000000..660aba05d5fcf
--- /dev/null
+++ b/lldb/unittests/Host/SocketTestUtilities.cpp
@@ -0,0 +1,104 @@
+//===----------------- SocketTestUtilities.cpp ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SocketTestUtilities.h"
+#include "lldb/Utility/StreamString.h"
+
+#ifdef _WIN32
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#else
+#include <arpa/inet.h>
+#endif
+
+using namespace lldb_private;
+
+static void AcceptThread(Socket *listen_socket, bool child_processes_inherit,
+                         Socket **accept_socket, Status *error) {
+  *error = listen_socket->Accept(*accept_socket);
+}
+
+template <typename SocketType>
+void lldb_private::CreateConnectedSockets(
+    llvm::StringRef listen_remote_address,
+    const std::function<std::string(const SocketType &)> &get_connect_addr,
+    std::unique_ptr<SocketType> *a_up, std::unique_ptr<SocketType> *b_up) {
+  bool child_processes_inherit = false;
+  Status error;
+  std::unique_ptr<SocketType> listen_socket_up(
+      new SocketType(true, child_processes_inherit));
+  EXPECT_FALSE(error.Fail());
+  error = listen_socket_up->Listen(listen_remote_address, 5);
+  EXPECT_FALSE(error.Fail());
+  EXPECT_TRUE(listen_socket_up->IsValid());
+
+  Status accept_error;
+  Socket *accept_socket;
+  std::thread accept_thread(AcceptThread, listen_socket_up.get(),
+                            child_processes_inherit, &accept_socket,
+                            &accept_error);
+
+  std::string connect_remote_address = get_connect_addr(*listen_socket_up);
+  std::unique_ptr<SocketType> connect_socket_up(
+      new SocketType(true, child_processes_inherit));
+  EXPECT_FALSE(error.Fail());
+  error = connect_socket_up->Connect(connect_remote_address);
+  EXPECT_FALSE(error.Fail());
+  EXPECT_TRUE(connect_socket_up->IsValid());
+
+  a_up->swap(connect_socket_up);
+  EXPECT_TRUE(error.Success());
+  EXPECT_NE(nullptr, a_up->get());
+  EXPECT_TRUE((*a_up)->IsValid());
+
+  accept_thread.join();
+  b_up->reset(static_cast<SocketType *>(accept_socket));
+  EXPECT_TRUE(accept_error.Success());
+  EXPECT_NE(nullptr, b_up->get());
+  EXPECT_TRUE((*b_up)->IsValid());
+
+  listen_socket_up.reset();
+}
+
+bool lldb_private::CreateTCPConnectedSockets(
+    std::string listen_remote_ip, std::unique_ptr<TCPSocket> *socket_a_up,
+    std::unique_ptr<TCPSocket> *socket_b_up) {
+  StreamString strm;
+  strm.Printf("[%s]:0", listen_remote_ip.c_str());
+  CreateConnectedSockets<TCPSocket>(
+      strm.GetString(),
+      [=](const TCPSocket &s) {
+        char connect_remote_address[64];
+        snprintf(connect_remote_address, sizeof(connect_remote_address),
+                 "[%s]:%u", listen_remote_ip.c_str(), s.GetLocalPortNumber());
+        return std::string(connect_remote_address);
+      },
+      socket_a_up, socket_b_up);
+  return true;
+}
+
+#ifndef LLDB_DISABLE_POSIX
+void lldb_private::CreateDomainConnectedSockets(
+    llvm::StringRef path, std::unique_ptr<DomainSocket> *socket_a_up,
+    std::unique_ptr<DomainSocket> *socket_b_up) {
+  return CreateConnectedSockets<DomainSocket>(
+      path, [=](const DomainSocket &) { return path.str(); }, socket_a_up,
+      socket_b_up);
+}
+#endif
+
+bool lldb_private::IsAddressFamilySupported(std::string ip) {
+  auto addresses = lldb_private::SocketAddress::GetAddressInfo(
+      ip.c_str(), NULL, AF_UNSPEC, SOCK_STREAM, IPPROTO_TCP);
+  return addresses.size() > 0;
+}
+
+bool lldb_private::IsIPv4(std::string ip) {
+  struct sockaddr_in sock_addr;
+  return inet_pton(AF_INET, ip.c_str(), &(sock_addr.sin_addr)) != 0;
+}
diff --git a/lldb/unittests/Host/SocketTestUtilities.h b/lldb/unittests/Host/SocketTestUtilities.h
new file mode 100644
index 0000000000000..4e51be924fe96
--- /dev/null
+++ b/lldb/unittests/Host/SocketTestUtilities.h
@@ -0,0 +1,47 @@
+//===--------------------- SocketTestUtilities.h ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_UNITTESTS_HOST_SOCKETTESTUTILITIES_H
+#define LLDB_UNITTESTS_HOST_SOCKETTESTUTILITIES_H
+
+#include <cstdio>
+#include <functional>
+#include <thread>
+
+#include "lldb/Host/Config.h"
+#include "lldb/Host/Socket.h"
+#include "lldb/Host/common/TCPSocket.h"
+#include "lldb/Host/common/UDPSocket.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Testing/Support/Error.h"
+
+#ifndef LLDB_DISABLE_POSIX
+#include "lldb/Host/posix/DomainSocket.h"
+#endif
+
+namespace lldb_private {
+template <typename SocketType>
+void CreateConnectedSockets(
+    llvm::StringRef listen_remote_address,
+    const std::function<std::string(const SocketType &)> &get_connect_addr,
+    std::unique_ptr<SocketType> *a_up, std::unique_ptr<SocketType> *b_up);
+bool CreateTCPConnectedSockets(std::string listen_remote_ip,
+                               std::unique_ptr<TCPSocket> *a_up,
+                               std::unique_ptr<TCPSocket> *b_up);
+#ifndef LLDB_DISABLE_POSIX
+void CreateDomainConnectedSockets(llvm::StringRef path,
+                                  std::unique_ptr<DomainSocket> *a_up,
+                                  std::unique_ptr<DomainSocket> *b_up);
+#endif
+
+bool IsAddressFamilySupported(std::string ip);
+bool IsIPv4(std::string ip);
+} // namespace lldb_private
+
+#endif
\ No newline at end of file

From eb5ee3004f79c97e973025bb524e278a47c595c7 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Tue, 28 May 2019 23:35:44 +0000
Subject: [PATCH 0429/1176] [ORC] Track JIT symbol states more explicitly.

Prior to this patch, JITDylibs inferred symbol states (whether a symbol was
newly added, materializing, resolved, or ready to run) via a combination of (1)
bits in the JITSymbolFlags member, and (2) the state of some internal JITDylib
data structures. This patch explicitly tracks symbol states by adding a new
SymbolState member to the symbol table entries, and removing the 'Lazy' and
'Materializing' bits from JITSymbolFlags. This is a first step towards adding
additional states representing initialization phases (e.g. eh-frame registration,
registration with the language runtime, and static initialization).

llvm-svn: 361899
---
 llvm/include/llvm/ExecutionEngine/JITSymbol.h |  21 +-
 llvm/include/llvm/ExecutionEngine/Orc/Core.h  |  55 +++-
 llvm/lib/ExecutionEngine/Orc/Core.cpp         | 235 ++++++++----------
 .../Orc/RTDyldObjectLinkingLayer.cpp          |   2 +-
 4 files changed, 160 insertions(+), 153 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index fc028884d0120..b14154c5b5e8c 100644
--- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -65,15 +65,9 @@ class JITSymbolFlags {
     Absolute = 1U << 3,
     Exported = 1U << 4,
     Callable = 1U << 5,
-    Lazy = 1U << 6,
-    Materializing = 1U << 7,
-    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Materializing)
+    LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Callable)
   };
 
-  static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) {
-    return static_cast<FlagNames>(Orig.Flags & ~Lazy & ~Materializing);
-  }
-
   /// Default-construct a JITSymbolFlags instance.
   JITSymbolFlags() = default;
 
@@ -110,19 +104,6 @@ class JITSymbolFlags {
     return (Flags & HasError) == HasError;
   }
 
-  /// Returns true if this is a lazy symbol.
-  ///        This flag is used internally by the JIT APIs to track
-  ///        materialization states.
-  bool isLazy() const { return Flags & Lazy; }
-
-  /// Returns true if this symbol is in the process of being
-  ///        materialized.
-  bool isMaterializing() const { return Flags & Materializing; }
-
-  /// Returns true if this symbol is fully materialized.
-  ///        (i.e. neither lazy, nor materializing).
-  bool isMaterialized() const { return !(Flags & (Lazy | Materializing)); }
-
   /// Returns true if the Weak flag is set.
   bool isWeak() const {
     return (Flags & Weak) == Weak;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index a966a16390b3d..6a913e85fcbea 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -641,6 +641,59 @@ class JITDylib {
     LLVM_MARK_AS_BITMASK_ENUM(NotifyFullyReady)
   };
 
+  enum class SymbolState : uint8_t {
+    Invalid,       // No symbol should be in this state.
+    NeverSearched, // Added to the symbol table, never queried.
+    Materializing, // Queried, materialization begun.
+    Resolved,      // Assigned address, still materializing.
+    Ready = 0x3f   // Ready and safe for clients to access.
+  };
+
+  class SymbolTableEntry {
+  public:
+    SymbolTableEntry() = default;
+    SymbolTableEntry(JITSymbolFlags Flags)
+        : Flags(Flags), State(SymbolState::NeverSearched),
+          MaterializerAttached(false), PendingRemoval(false) {}
+
+    JITTargetAddress getAddress() const { return Addr; }
+    JITSymbolFlags getFlags() const { return Flags; }
+    SymbolState getState() const { return State; }
+
+    bool isInMaterializationPhase() const {
+      return State == SymbolState::Materializing ||
+             State == SymbolState::Resolved;
+    }
+
+    bool hasMaterializerAttached() const { return MaterializerAttached; }
+    bool isPendingRemoval() const { return PendingRemoval; }
+
+    void setAddress(JITTargetAddress Addr) { this->Addr = Addr; }
+    void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
+    void setState(SymbolState State) { this->State = State; }
+
+    void setMaterializerAttached(bool MaterializerAttached) {
+      this->MaterializerAttached = MaterializerAttached;
+    }
+
+    void setPendingRemoval(bool PendingRemoval) {
+      this->PendingRemoval = PendingRemoval;
+    }
+
+    JITEvaluatedSymbol getSymbol() const {
+      return JITEvaluatedSymbol(Addr, Flags);
+    }
+
+  private:
+    JITTargetAddress Addr = 0;
+    JITSymbolFlags Flags;
+    SymbolState State : 6;
+    bool MaterializerAttached : 1;
+    bool PendingRemoval : 1;
+  };
+
+  using SymbolTable = DenseMap<SymbolStringPtr, SymbolTableEntry>;
+
   JITDylib(ExecutionSession &ES, std::string Name);
 
   Error defineImpl(MaterializationUnit &MU);
@@ -685,7 +738,7 @@ class JITDylib {
 
   ExecutionSession &ES;
   std::string JITDylibName;
-  SymbolMap Symbols;
+  SymbolTable Symbols;
   UnmaterializedInfosMap UnmaterializedInfos;
   MaterializingInfosMap MaterializingInfos;
   GeneratorFunction DefGenerator;
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 947355f17de93..c2b7e4a24b99e 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -371,11 +371,6 @@ MaterializationResponsibility::MaterializationResponsibility(
     JITDylib &JD, SymbolFlagsMap SymbolFlags, VModuleKey K)
     : JD(JD), SymbolFlags(std::move(SymbolFlags)), K(std::move(K)) {
   assert(!this->SymbolFlags.empty() && "Materializing nothing?");
-
-#ifndef NDEBUG
-  for (auto &KV : this->SymbolFlags)
-    KV.second |= JITSymbolFlags::Materializing;
-#endif
 }
 
 MaterializationResponsibility::~MaterializationResponsibility() {
@@ -395,8 +390,6 @@ void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
     auto I = SymbolFlags.find(KV.first);
     assert(I != SymbolFlags.end() &&
            "Resolving symbol outside this responsibility set");
-    assert(I->second.isMaterializing() && "Duplicate resolution");
-    I->second &= ~JITSymbolFlags::Materializing;
     if (I->second.isWeak())
       assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) &&
              "Resolving symbol with incorrect flags");
@@ -415,12 +408,6 @@ void MaterializationResponsibility::emit() {
     dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n";
   });
 
-#ifndef NDEBUG
-  for (auto &KV : SymbolFlags)
-    assert(!KV.second.isMaterializing() &&
-           "Failed to resolve symbol before emission");
-#endif // NDEBUG
-
   JD.emit(SymbolFlags);
   SymbolFlags.clear();
 }
@@ -431,13 +418,8 @@ Error MaterializationResponsibility::defineMaterializing(
   // It's ok if we hit a duplicate here: In that case the new version will be
   // discarded, and the JITDylib::defineMaterializing method will return a
   // duplicate symbol error.
-  for (auto &KV : NewSymbolFlags) {
-    auto I = SymbolFlags.insert(KV).first;
-    (void)I;
-#ifndef NDEBUG
-    I->second |= JITSymbolFlags::Materializing;
-#endif
-  }
+  for (auto &KV : NewSymbolFlags)
+    SymbolFlags.insert(KV);
 
   return JD.defineMaterializing(NewSymbolFlags);
 }
@@ -744,21 +726,19 @@ ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) {
 
 Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
   return ES.runSessionLocked([&]() -> Error {
-    std::vector<SymbolMap::iterator> AddedSyms;
+    std::vector<SymbolTable::iterator> AddedSyms;
 
     for (auto &KV : SymbolFlags) {
-      SymbolMap::iterator EntryItr;
+      SymbolTable::iterator EntryItr;
       bool Added;
 
-      auto NewFlags = KV.second;
-      NewFlags |= JITSymbolFlags::Materializing;
-
-      std::tie(EntryItr, Added) = Symbols.insert(
-          std::make_pair(KV.first, JITEvaluatedSymbol(0, NewFlags)));
+      std::tie(EntryItr, Added) =
+          Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second)));
 
-      if (Added)
+      if (Added) {
         AddedSyms.push_back(EntryItr);
-      else {
+        EntryItr->second.setState(SymbolState::Materializing);
+      } else {
         // Remove any symbols already added.
         for (auto &SI : AddedSyms)
           Symbols.erase(SI);
@@ -782,9 +762,10 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
         for (auto &KV : MU->getSymbols()) {
           auto SymI = Symbols.find(KV.first);
           assert(SymI != Symbols.end() && "Replacing unknown symbol");
-          assert(!SymI->second.getFlags().isLazy() &&
-                 SymI->second.getFlags().isMaterializing() &&
-                 "Can not replace symbol that is not materializing");
+          assert(SymI->second.isInMaterializationPhase() &&
+                 "Can not call replace on a symbol that is not materializing");
+          assert(!SymI->second.hasMaterializerAttached() &&
+                 "Symbol should not have materializer attached already");
           assert(UnmaterializedInfos.count(KV.first) == 0 &&
                  "Symbol being replaced should have no UnmaterializedInfo");
         }
@@ -803,16 +784,15 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
         // Otherwise, make MU responsible for all the symbols.
         auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
         for (auto &KV : UMI->MU->getSymbols()) {
-          assert(!KV.second.isLazy() &&
-                 "Lazy flag should be managed internally.");
-          assert(!KV.second.isMaterializing() &&
-                 "Materializing flags should be managed internally.");
-
           auto SymI = Symbols.find(KV.first);
-          JITSymbolFlags ReplaceFlags = KV.second;
-          ReplaceFlags |= JITSymbolFlags::Lazy;
-          SymI->second = JITEvaluatedSymbol(SymI->second.getAddress(),
-                                            std::move(ReplaceFlags));
+          assert(SymI->second.getState() == SymbolState::Materializing &&
+                 "Can not replace a symbol that is not materializing");
+          assert(!SymI->second.hasMaterializerAttached() &&
+                 "Can not replace a symbol that has a materializer attached");
+          assert(UnmaterializedInfos.count(KV.first) == 0 &&
+                 "Unexpected materializer entry in map");
+          SymI->second.setAddress(SymI->second.getAddress());
+          SymI->second.setMaterializerAttached(true);
           UnmaterializedInfos[KV.first] = UMI;
         }
 
@@ -830,9 +810,9 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
 
     for (auto &KV : SymbolFlags) {
       assert(Symbols.count(KV.first) && "JITDylib does not cover this symbol?");
-      assert(Symbols.find(KV.first)->second.getFlags().isMaterializing() &&
-             "getRequestedSymbols can only be called for materializing "
-             "symbols");
+      assert(Symbols.find(KV.first)->second.isInMaterializationPhase() &&
+             "getRequestedSymbols can only be called for symbols that have "
+             "started materializing");
       auto I = MaterializingInfos.find(KV.first);
       if (I == MaterializingInfos.end())
         continue;
@@ -848,9 +828,8 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
 void JITDylib::addDependencies(const SymbolStringPtr &Name,
                                const SymbolDependenceMap &Dependencies) {
   assert(Symbols.count(Name) && "Name not in symbol table");
-  assert((Symbols[Name].getFlags().isLazy() ||
-          Symbols[Name].getFlags().isMaterializing()) &&
-         "Symbol is not lazy or materializing");
+  assert(Symbols[Name].isInMaterializationPhase() &&
+         "Can not add dependencies for a symbol that is not materializing");
 
   auto &MI = MaterializingInfos[Name];
   assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
@@ -865,9 +844,8 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
       // Assert that this symbol exists and has not been emitted already.
       auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
       assert(SymI != OtherJITDylib.Symbols.end() &&
-             (SymI->second.getFlags().isLazy() ||
-              SymI->second.getFlags().isMaterializing()) &&
-             "Dependency on emitted symbol");
+             (SymI->second.getState() != SymbolState::Ready &&
+              "Dependency on emitted symbol"));
 #endif
 
       auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
@@ -892,27 +870,25 @@ void JITDylib::resolve(const SymbolMap &Resolved) {
       auto &Name = KV.first;
       auto Sym = KV.second;
 
-      assert(!Sym.getFlags().isLazy() && !Sym.getFlags().isMaterializing() &&
-             "Materializing flags should be managed internally");
-
       auto I = Symbols.find(Name);
 
       assert(I != Symbols.end() && "Symbol not found");
-      assert(!I->second.getFlags().isLazy() &&
-             I->second.getFlags().isMaterializing() &&
+      assert(!I->second.hasMaterializerAttached() &&
+             "Resolving symbol with materializer attached?");
+      assert(I->second.getState() == SymbolState::Materializing &&
              "Symbol should be materializing");
       assert(I->second.getAddress() == 0 && "Symbol has already been resolved");
 
       assert((Sym.getFlags() & ~JITSymbolFlags::Weak) ==
-                 (JITSymbolFlags::stripTransientFlags(I->second.getFlags()) &
-                  ~JITSymbolFlags::Weak) &&
+                 (I->second.getFlags() & ~JITSymbolFlags::Weak) &&
              "Resolved flags should match the declared flags");
 
       // Once resolved, symbols can never be weak.
       JITSymbolFlags ResolvedFlags = Sym.getFlags();
       ResolvedFlags &= ~JITSymbolFlags::Weak;
-      ResolvedFlags |= JITSymbolFlags::Materializing;
-      I->second = JITEvaluatedSymbol(Sym.getAddress(), ResolvedFlags);
+      I->second.setAddress(Sym.getAddress());
+      I->second.setFlags(ResolvedFlags);
+      I->second.setState(SymbolState::Resolved);
 
       auto &MI = MaterializingInfos[Name];
       for (auto &Q : MI.PendingQueries) {
@@ -986,9 +962,7 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
             // and update its materializing state.
             assert(DependantJD.Symbols.count(DependantName) &&
                    "Dependant has no entry in the Symbols table");
-            auto &DependantSym = DependantJD.Symbols[DependantName];
-            DependantSym.setFlags(DependantSym.getFlags() &
-                                  ~JITSymbolFlags::Materializing);
+            DependantJD.Symbols[DependantName].setState(SymbolState::Ready);
             DependantJD.MaterializingInfos.erase(DependantMII);
           }
         }
@@ -1005,8 +979,7 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
         }
         assert(Symbols.count(Name) &&
                "Symbol has no entry in the Symbols table");
-        auto &Sym = Symbols[Name];
-        Sym.setFlags(Sym.getFlags() & ~JITSymbolFlags::Materializing);
+        Symbols[Name].setState(SymbolState::Ready);
         MaterializingInfos.erase(MII);
       }
     }
@@ -1124,7 +1097,7 @@ void JITDylib::removeFromSearchOrder(JITDylib &JD) {
 Error JITDylib::remove(const SymbolNameSet &Names) {
   return ES.runSessionLocked([&]() -> Error {
     using SymbolMaterializerItrPair =
-        std::pair<SymbolMap::iterator, UnmaterializedInfosMap::iterator>;
+        std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>;
     std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
     SymbolNameSet Missing;
     SymbolNameSet Materializing;
@@ -1139,13 +1112,14 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
       }
 
       // Note symbol materializing.
-      if (I->second.getFlags().isMaterializing()) {
+      if (I->second.isInMaterializationPhase()) {
         Materializing.insert(Name);
         continue;
       }
 
-      auto UMII = I->second.getFlags().isLazy() ? UnmaterializedInfos.find(Name)
-                                                : UnmaterializedInfos.end();
+      auto UMII = I->second.hasMaterializerAttached()
+                      ? UnmaterializedInfos.find(Name)
+                      : UnmaterializedInfos.end();
       SymbolsToRemove.push_back(std::make_pair(I, UMII));
     }
 
@@ -1205,14 +1179,11 @@ Expected<SymbolNameSet> JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
 
   for (auto &Name : Names) {
     auto I = Symbols.find(Name);
-
-    if (I == Symbols.end()) {
+    if (I != Symbols.end()) {
+      assert(!Flags.count(Name) && "Symbol already present in Flags map");
+      Flags[Name] = I->second.getFlags();
+    } else
       Unresolved.insert(Name);
-      continue;
-    }
-
-    assert(!Flags.count(Name) && "Symbol already present in Flags map");
-    Flags[Name] = JITSymbolFlags::stripTransientFlags(I->second.getFlags());
   }
 
   return Unresolved;
@@ -1247,6 +1218,7 @@ void JITDylib::lodgeQueryImpl(
 
   std::vector<SymbolStringPtr> ToRemove;
   for (auto Name : Unresolved) {
+
     // Search for the name in Symbols. Skip it if not found.
     auto SymI = Symbols.find(Name);
     if (SymI == Symbols.end())
@@ -1260,16 +1232,17 @@ void JITDylib::lodgeQueryImpl(
     // set.
     ToRemove.push_back(Name);
 
-    // If the symbol has an address then resolve it.
-    if (SymI->second.getAddress() != 0)
-      Q->resolve(Name, SymI->second);
-
-    // If the symbol is lazy, get the MaterialiaztionUnit for it.
-    if (SymI->second.getFlags().isLazy()) {
+    if (SymI->second.getState() >= SymbolState::Resolved) {
+      assert(!SymI->second.hasMaterializerAttached() &&
+             "Resolved symbols should not have materializers attached");
+      Q->resolve(Name, SymI->second.getSymbol());
+      if (SymI->second.getState() == SymbolState::Ready) {
+        Q->notifySymbolReady();
+        continue;
+      }
+    } else if (SymI->second.hasMaterializerAttached()) {
       assert(SymI->second.getAddress() == 0 &&
-             "Lazy symbol should not have a resolved address");
-      assert(!SymI->second.getFlags().isMaterializing() &&
-             "Materializing and lazy should not both be set");
+             "Symbol not resolved but already has address?");
       auto UMII = UnmaterializedInfos.find(Name);
       assert(UMII != UnmaterializedInfos.end() &&
              "Lazy symbol should have UnmaterializedInfo");
@@ -1280,24 +1253,17 @@ void JITDylib::lodgeQueryImpl(
       // materializing state.
       for (auto &KV : MU->getSymbols()) {
         auto SymK = Symbols.find(KV.first);
-        auto Flags = SymK->second.getFlags();
-        Flags &= ~JITSymbolFlags::Lazy;
-        Flags |= JITSymbolFlags::Materializing;
-        SymK->second.setFlags(Flags);
+        SymK->second.setMaterializerAttached(false);
+        SymK->second.setState(SymbolState::Materializing);
         UnmaterializedInfos.erase(KV.first);
       }
 
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
-    } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing, so it must be
-      // ready. Notify the query and continue.
-      Q->notifySymbolReady();
-      continue;
     }
 
     // Add the query to the PendingQueries list.
-    assert(SymI->second.getFlags().isMaterializing() &&
+    assert(SymI->second.isInMaterializationPhase() &&
            "By this line the symbol should be materializing");
     auto &MI = MaterializingInfos[Name];
     MI.PendingQueries.push_back(Q);
@@ -1387,17 +1353,15 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
 
     // If the symbol has an address then resolve it.
     if (SymI->second.getAddress() != 0) {
-      Q->resolve(Name, SymI->second);
+      Q->resolve(Name, SymI->second.getSymbol());
       if (Q->isFullyResolved())
         ActionFlags |= NotifyFullyResolved;
     }
 
     // If the symbol is lazy, get the MaterialiaztionUnit for it.
-    if (SymI->second.getFlags().isLazy()) {
+    if (SymI->second.hasMaterializerAttached()) {
       assert(SymI->second.getAddress() == 0 &&
              "Lazy symbol should not have a resolved address");
-      assert(!SymI->second.getFlags().isMaterializing() &&
-             "Materializing and lazy should not both be set");
       auto UMII = UnmaterializedInfos.find(Name);
       assert(UMII != UnmaterializedInfos.end() &&
              "Lazy symbol should have UnmaterializedInfo");
@@ -1408,18 +1372,15 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
       // materializing state.
       for (auto &KV : MU->getSymbols()) {
         auto SymK = Symbols.find(KV.first);
-        auto Flags = SymK->second.getFlags();
-        Flags &= ~JITSymbolFlags::Lazy;
-        Flags |= JITSymbolFlags::Materializing;
-        SymK->second.setFlags(Flags);
+        assert(SymK != Symbols.end() && "Missing symbol table entry");
+        SymK->second.setState(SymbolState::Materializing);
+        SymK->second.setMaterializerAttached(false);
         UnmaterializedInfos.erase(KV.first);
       }
 
       // Add MU to the list of MaterializationUnits to be materialized.
       MUs.push_back(std::move(MU));
-    } else if (!SymI->second.getFlags().isMaterializing()) {
-      // The symbol is neither lazy nor materializing, so it must be ready.
-      // Notify the query and continue.
+    } else if (SymI->second.getState() == SymbolState::Ready) {
       Q->notifySymbolReady();
       if (Q->isFullyReady())
         ActionFlags |= NotifyFullyReady;
@@ -1427,7 +1388,7 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
     }
 
     // Add the query to the PendingQueries list.
-    assert(SymI->second.getFlags().isMaterializing() &&
+    assert(SymI->second.isInMaterializationPhase() &&
            "By this line the symbol should be materializing");
     auto &MI = MaterializingInfos[Name];
     MI.PendingQueries.push_back(Q);
@@ -1455,21 +1416,36 @@ void JITDylib::dump(raw_ostream &OS) {
     for (auto &KV : Symbols) {
       OS << "    \"" << *KV.first << "\": ";
       if (auto Addr = KV.second.getAddress())
-        OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags();
+        OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags()
+           << " ";
       else
-        OS << "<not resolved>";
-      if (KV.second.getFlags().isLazy() ||
-          KV.second.getFlags().isMaterializing()) {
-        OS << " (";
-        if (KV.second.getFlags().isLazy()) {
-          auto I = UnmaterializedInfos.find(KV.first);
-          assert(I != UnmaterializedInfos.end() &&
-                 "Lazy symbol should have UnmaterializedInfo");
-          OS << " Lazy (MU=" << I->second->MU.get() << ")";
-        }
-        if (KV.second.getFlags().isMaterializing())
-          OS << " Materializing";
-        OS << ", " << KV.second.getFlags() << " )\n";
+        OS << "<not resolved> ";
+
+      switch (KV.second.getState()) {
+      case SymbolState::Invalid:
+        OS << "Invalid";
+        break;
+      case SymbolState::NeverSearched:
+        OS << "Never-Searched";
+        break;
+      case SymbolState::Materializing:
+        OS << "Materializing";
+        break;
+      case SymbolState::Resolved:
+        OS << "Resolved";
+        break;
+      case SymbolState::Ready:
+        OS << "Ready";
+        break;
+        // default: llvm_unreachable("Invalid state"); break;
+      }
+
+      if (KV.second.hasMaterializerAttached()) {
+        OS << " (Materializer ";
+        auto I = UnmaterializedInfos.find(KV.first);
+        assert(I != UnmaterializedInfos.end() &&
+               "Lazy symbol should have UnmaterializedInfo");
+        OS << I->second->MU.get() << ")\n";
       } else
         OS << "\n";
     }
@@ -1505,21 +1481,17 @@ Error JITDylib::defineImpl(MaterializationUnit &MU) {
   std::vector<SymbolStringPtr> MUDefsOverridden;
 
   for (const auto &KV : MU.getSymbols()) {
-    assert(!KV.second.isLazy() && "Lazy flag should be managed internally.");
-    assert(!KV.second.isMaterializing() &&
-           "Materializing flags should be managed internally.");
-
     auto I = Symbols.find(KV.first);
 
     if (I != Symbols.end()) {
       if (KV.second.isStrong()) {
         if (I->second.getFlags().isStrong() ||
-            I->second.getFlags().isMaterializing())
+            I->second.getState() > SymbolState::NeverSearched)
           Duplicates.insert(KV.first);
         else {
-          assert(I->second.getFlags().isLazy() &&
-                 !I->second.getFlags().isMaterializing() &&
-                 "Overridden existing def should be in the Lazy state");
+          assert(I->second.getState() == SymbolState::NeverSearched &&
+                 "Overridden existing def should be in the never-searched "
+                 "state");
           ExistingDefsOverridden.push_back(KV.first);
         }
       } else
@@ -1546,9 +1518,10 @@ Error JITDylib::defineImpl(MaterializationUnit &MU) {
 
   // Finally, add the defs from this MU.
   for (auto &KV : MU.getSymbols()) {
-    auto NewFlags = KV.second;
-    NewFlags |= JITSymbolFlags::Lazy;
-    Symbols[KV.first] = JITEvaluatedSymbol(0, NewFlags);
+    auto &SymEntry = Symbols[KV.first];
+    SymEntry.setFlags(KV.second);
+    SymEntry.setState(SymbolState::NeverSearched);
+    SymEntry.setMaterializerAttached(true);
   }
 
   return Error::success();
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 97f69ebafa2d4..373a0680d9003 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -175,7 +175,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
       auto I = R.getSymbols().find(InternedName);
 
       if (OverrideObjectFlags && I != R.getSymbols().end())
-        Flags = JITSymbolFlags::stripTransientFlags(I->second);
+        Flags = I->second;
       else if (AutoClaimObjectSymbols && I == R.getSymbols().end())
         ExtraSymbolsToClaim[InternedName] = Flags;
     }

From a6f57ad2c9dc1f34f7935ccbddc5fe13ffdc2adc Mon Sep 17 00:00:00 2001
From: Quentin Colombet <quentin.colombet@gmail.com>
Date: Tue, 28 May 2019 23:43:12 +0000
Subject: [PATCH 0430/1176] [RegUsageInfoCollector] Don't mark as saved
 registers that don't have subregister lanes

To determine the list of clobbered registers, the RegUsageInfoCollector pass
uses the list of callee saved registers provided by the target and then augments
it with the list of registers which have all their subregisters saved. It then
basically does the difference between all the registers and the saved registers
to come up with what is clobbered (plus it checks that the register is defined
within that functions).

The patch fixes a bug where when register does not have any subregister lane,
hence when checking if any of its subregister are not saved, we would find none
and think the register is saved as well.

That's obviously wrong.

The code was actually kind of checking for something like that with the
CoveredBySubRegs bit. What this bit says is that a register is completely
covered by its subregisters.
We required that this bit was set, to check that a register was saved by its
subregister lanes, since without this bit, we potentially would miss to check
some part of the register.

However, this bit is used de facto on registers that don't have any
subregisters (e.g., on ARM) and the code was not prepared for that.

This patch fixes this by checking that a register has subregisters before
declaring it saved when none of its lanes are modified.

llvm-svn: 361901
---
 llvm/lib/CodeGen/RegUsageInfoCollector.cpp |  7 +++++--
 llvm/test/CodeGen/ARM/ipra-reg-usage.ll    | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/ipra-reg-usage.ll

diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 01c8fcbc64ce4..3031195807d77 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -179,12 +179,15 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
 
       // Add PReg to SavedRegs if all subregs are saved.
       bool AllSubRegsSaved = true;
-      for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
+      bool HasAtLeastOneSubreg = false;
+      for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR) {
+        HasAtLeastOneSubreg = true;
         if (!SavedRegs.test(*SR)) {
           AllSubRegsSaved = false;
           break;
         }
-      if (AllSubRegsSaved)
+      }
+      if (AllSubRegsSaved && HasAtLeastOneSubreg)
         SavedRegs.set(PReg);
     }
   }
diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
new file mode 100644
index 0000000000000..b01e576e7332d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=cortex-a8 -enable-ipra -print-regusage -o /dev/null 2>&1 < %s | FileCheck %s
+; ipra used to wrongly assumed that registers without subregisters were saved.
+; In that example, r0 wouldn't be in the list of clobbered registers.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+declare void @bar1()
+define void @foo()#0 {
+; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpexc $fpinst $fpscr $fpscr_nzcv $fpsid $itstate $pc $sp $spsr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r12_sp $r0_r1 $r2_r3 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
+  call void @bar1()
+  call void @bar2()
+  ret void
+}
+declare void @bar2()
+attributes #0 = {nounwind}

From 26212da5553a20a4a1dd4126514cb5697230fa53 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm@meinersbur.de>
Date: Tue, 28 May 2019 23:47:55 +0000
Subject: [PATCH 0431/1176] [ScopBuilder] Move buildInvariantEquivalenceClasses
 function from ScopInfo. NFC.

Refactor Scop and ScopBuilder class. Move
buildInvariantEquivalenceClasses function from Scop class to ScopBuilder
class.

Patch by: Dominik Adamski <adamski.dominik@gmail.com>

Differential Revision: https://reviews.llvm.org/D62351

llvm-svn: 361902
---
 polly/include/polly/ScopBuilder.h  | 14 ++++++++++++++
 polly/include/polly/ScopInfo.h     | 26 ++++++++++++--------------
 polly/lib/Analysis/ScopBuilder.cpp | 22 +++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 22 +---------------------
 4 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index 54d48bec0703a..cd17beacd6322 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -120,6 +120,20 @@ class ScopBuilder {
   void buildScop(Region &R, AssumptionCache &AC,
                  OptimizationRemarkEmitter &ORE);
 
+  /// Create equivalence classes for required invariant accesses.
+  ///
+  /// These classes will consolidate multiple required invariant loads from the
+  /// same address in order to keep the number of dimensions in the SCoP
+  /// description small. For each such class equivalence class only one
+  /// representing element, hence one required invariant load, will be chosen
+  /// and modeled as parameter. The method
+  /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an
+  /// equivalence class with the representing element that is modeled. As a
+  /// consequence Scop::getIdForParam() will only return an id for the
+  /// representing element of each equivalence class, thus for each required
+  /// invariant location.
+  void buildInvariantEquivalenceClasses();
+
   /// Try to build a multi-dimensional fixed sized MemoryAccess from the
   /// Load/Store instruction.
   ///
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 5f48f64dea0aa..bf03899a1bb0c 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2031,20 +2031,6 @@ class Scop {
   /// Check if the base ptr of @p MA is in the SCoP but not hoistable.
   bool hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes);
 
-  /// Create equivalence classes for required invariant accesses.
-  ///
-  /// These classes will consolidate multiple required invariant loads from the
-  /// same address in order to keep the number of dimensions in the SCoP
-  /// description small. For each such class equivalence class only one
-  /// representing element, hence one required invariant load, will be chosen
-  /// and modeled as parameter. The method
-  /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an
-  /// equivalence class with the representing element that is modeled. As a
-  /// consequence Scop::getIdForParam() will only return an id for the
-  /// representing element of each equivalence class, thus for each required
-  /// invariant location.
-  void buildInvariantEquivalenceClasses();
-
   /// Return the context under which the access cannot be hoisted.
   ///
   /// @param Access The access to check.
@@ -2386,6 +2372,18 @@ class Scop {
   /// Add metadata for @p Access.
   void addAccessData(MemoryAccess *Access);
 
+  /// Add new invariant access equivalence class
+  void
+  addInvariantEquivClass(const InvariantEquivClassTy &InvariantEquivClass) {
+    InvariantEquivClasses.emplace_back(InvariantEquivClass);
+  }
+
+  /// Add mapping from invariant loads to the representing invariant load of
+  ///        their equivalence class.
+  void addInvariantLoadMapping(const Value *LoadInst, Value *ClassRep) {
+    InvEquivClassVMap[LoadInst] = ClassRep;
+  }
+
   /// Remove the metadata stored for @p Access.
   void removeAccessData(MemoryAccess *Access);
 
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index 3c44429a54858..b1de18fa16378 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -105,6 +105,26 @@ static cl::opt<GranularityChoice> StmtGranularity(
                           "Store-level granularity")),
     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
 
+void ScopBuilder::buildInvariantEquivalenceClasses() {
+  DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
+
+  const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
+  for (LoadInst *LInst : RIL) {
+    const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
+
+    Type *Ty = LInst->getType();
+    LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
+    if (ClassRep) {
+      scop->addInvariantLoadMapping(LInst, ClassRep);
+      continue;
+    }
+
+    ClassRep = LInst;
+    scop->addInvariantEquivClass(
+        InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
+  }
+}
+
 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
                                    Region *NonAffineSubRegion,
                                    bool IsExitBlock) {
@@ -1492,7 +1512,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
   }
 
-  scop->buildInvariantEquivalenceClasses();
+  buildInvariantEquivalenceClasses();
 
   /// A map from basic blocks to their invalid domains.
   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index f7e7898ee8b17..65e088e323efc 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -2102,26 +2102,6 @@ void Scop::addUserContext() {
   Context = Context.intersect(UserContext);
 }
 
-void Scop::buildInvariantEquivalenceClasses() {
-  DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
-
-  const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads();
-  for (LoadInst *LInst : RIL) {
-    const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand());
-
-    Type *Ty = LInst->getType();
-    LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
-    if (ClassRep) {
-      InvEquivClassVMap[LInst] = ClassRep;
-      continue;
-    }
-
-    ClassRep = LInst;
-    InvariantEquivClasses.emplace_back(
-        InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty});
-  }
-}
-
 void Scop::buildContext() {
   isl::space Space = isl::space::params_alloc(getIslCtx(), 0);
   Context = isl::set::universe(Space);
@@ -3699,7 +3679,7 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
 
     // If we did not consolidate MA, thus did not find an equivalence class
     // for it, we create a new one.
-    InvariantEquivClasses.emplace_back(
+    addInvariantEquivClass(
         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
   }
 }

From 92d706eaca6cc79501066eae4392b600008e52c1 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Wed, 29 May 2019 00:01:05 +0000
Subject: [PATCH 0432/1176] [Driver] Search the toolchain dir with
 -print-file-name

This is useful when looking for directories or files relative to the
toolchain root, e.g. include/c++/v1. This change also adds a test
to make sure this functionality doesn't regress in the future.

Differential Revision: https://reviews.llvm.org/D62558

llvm-svn: 361903
---
 clang/lib/Driver/Driver.cpp         |  5 +++++
 clang/test/Driver/print-file-name.c | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 clang/test/Driver/print-file-name.c

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 4c153bf3348bd..a57c66403a28a 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4437,6 +4437,11 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
   if (llvm::sys::fs::exists(Twine(P)))
     return P.str();
 
+  SmallString<128> D(Dir);
+  llvm::sys::path::append(D, "..", Name);
+  if (llvm::sys::fs::exists(Twine(D)))
+    return D.str();
+
   if (auto P = SearchPaths(TC.getLibraryPaths()))
     return *P;
 
diff --git a/clang/test/Driver/print-file-name.c b/clang/test/Driver/print-file-name.c
new file mode 100644
index 0000000000000..9447c04a5ea94
--- /dev/null
+++ b/clang/test/Driver/print-file-name.c
@@ -0,0 +1,19 @@
+// Test that -print-file-name finds the correct file.
+
+// RUN: %clang -print-file-name=share/asan_blacklist.txt 2>&1 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --target=x86_64-linux-gnu \
+// RUN:   | FileCheck --check-prefix=CHECK-RESOURCE-DIR %s
+// CHECK-RESOURCE-DIR: resource_dir{{/|\\\\}}share{{/|\\\\}}asan_blacklist.txt
+
+// RUN: %clang -print-file-name=libclang_rt.builtins.a 2>&1 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
+// RUN:     --target=x86_64-linux-gnu \
+// RUN:   | FileCheck --check-prefix=CHECK-COMPILER-RT %s
+// CHECK-COMPILER-RT: resource_dir_with_per_target_subdir{{/|\\\\}}lib{{/|\\\\}}x86_64-linux-gnu{{/|\\\\}}libclang_rt.builtins.a
+
+// RUN: %clang -print-file-name=include/c++/v1 2>&1 \
+// RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxx_tree/usr/bin \
+// RUN:     --target=x86_64-linux-gnu \
+// RUN:   | FileCheck --check-prefix=CHECK-INSTALL-DIR %s
+// CHECK-INSTALL-DIR: basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}bin{{/|\\\\}}..{{/|\\\\}}include{{/|\\\\}}c++{{/|\\\\}}v1

From 26d711be6e8e6d27779afe641881c02e59cd2947 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 29 May 2019 01:06:00 +0000
Subject: [PATCH 0433/1176] [WebAssembly] Add signatures for RINT builtins

Reviewers: azakai, dschuff

Subscribers: sbc100, jgravelle-google, hiraditya, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62564

llvm-svn: 361904
---
 .../WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 5a86b27cac02a..7b9ae90326f0f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -225,6 +225,12 @@ struct RuntimeLibcallSignatureTable {
     Table[RTLIB::LLROUND_F32] = i64_func_f32;
     Table[RTLIB::LLROUND_F64] = i64_func_f64;
     Table[RTLIB::LLROUND_F128] = i64_func_i64_i64;
+    Table[RTLIB::LRINT_F32] = iPTR_func_f32;
+    Table[RTLIB::LRINT_F64] = iPTR_func_f64;
+    Table[RTLIB::LRINT_F128] = iPTR_func_i64_i64;
+    Table[RTLIB::LLRINT_F32] = i64_func_f32;
+    Table[RTLIB::LLRINT_F64] = i64_func_f64;
+    Table[RTLIB::LLRINT_F128] = i64_func_i64_i64;
     Table[RTLIB::FLOOR_F32] = f32_func_f32;
     Table[RTLIB::FLOOR_F64] = f64_func_f64;
     Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64;

From dc805a49064b6fa2fbe638fa7f5943a00263e1b6 Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Wed, 29 May 2019 01:34:44 +0000
Subject: [PATCH 0434/1176] Fix failure of lit test dependent-libs.cu

llvm-svn: 361905
---
 clang/test/CodeGenCUDA/dependent-libs.cu | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenCUDA/dependent-libs.cu b/clang/test/CodeGenCUDA/dependent-libs.cu
index 6f59e667d3b12..f8bf90715848e 100644
--- a/clang/test/CodeGenCUDA/dependent-libs.cu
+++ b/clang/test/CodeGenCUDA/dependent-libs.cu
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s
-// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck --check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s
 
 // DEV-NOT: llvm.dependent-libraries
 // HOST: llvm.dependent-libraries

From 860736cc3cfc60a15012d754be41aa389898a1e6 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 29 May 2019 01:35:10 +0000
Subject: [PATCH 0435/1176] [AArch64] auto-generate complete test checks; NFC

llvm-svn: 361906
---
 llvm/test/CodeGen/AArch64/strqu.ll | 38 +++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/strqu.ll b/llvm/test/CodeGen/AArch64/strqu.ll
index 94b9ff3c3bae8..f20a30e3a79c1 100644
--- a/llvm/test/CodeGen/AArch64/strqu.ll
+++ b/llvm/test/CodeGen/AArch64/strqu.ll
@@ -1,28 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu    | FileCheck --check-prefixes=CHECK,NOSPLIT %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu | FileCheck --check-prefixes=CHECK,NOSPLIT %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-linux-gnu    -mcpu=exynos-m1 | FileCheck --check-prefixes=CHECK,NOSPLIT %s
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mcpu=exynos-m1 | FileCheck --check-prefixes=CHECK,SPLIT %s
 
-; CHECK-LABEL: test_split_f:
-; NOSPLIT: str q{{[0-9]+}}, [x{{[0-9]+}}]
-; SPLIT: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+}}]
-; SPLIT: st1 { v{{[0-9]+}}.2s }, [x{{[0-9]+}}]
 define void @test_split_f(<4 x float> %val, <4 x float>* %addr) {
+; NOSPLIT-LABEL: test_split_f:
+; NOSPLIT:       // %bb.0:
+; NOSPLIT-NEXT:    str q0, [x0]
+; NOSPLIT-NEXT:    ret
+;
+; SPLIT-LABEL: test_split_f:
+; SPLIT:       // %bb.0:
+; SPLIT-NEXT:    rev64 v0.4s, v0.4s
+; SPLIT-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; SPLIT-NEXT:    st1 { v0.2s }, [x0]
+; SPLIT-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; SPLIT-NEXT:    add x8, x0, #8 // =8
+; SPLIT-NEXT:    st1 { v0.2s }, [x8]
+; SPLIT-NEXT:    ret
   store <4 x float> %val, <4 x float>* %addr, align 8
   ret void
 }
 
-; CHECK-LABEL: test_split_d:
-; NOSPLIT: str q{{[0-9]+}}, [x{{[0-9]+}}]
-; SPLIT: st1 { v{{[0-9]+}}.2d }, [x{{[0-9]+}}]
 define void @test_split_d(<2 x double> %val, <2 x double>* %addr) {
+; NOSPLIT-LABEL: test_split_d:
+; NOSPLIT:       // %bb.0:
+; NOSPLIT-NEXT:    str q0, [x0]
+; NOSPLIT-NEXT:    ret
+;
+; SPLIT-LABEL: test_split_d:
+; SPLIT:       // %bb.0:
+; SPLIT-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; SPLIT-NEXT:    st1 { v0.2d }, [x0]
+; SPLIT-NEXT:    ret
   store <2 x double> %val, <2 x double>* %addr, align 8
   ret void
 }
 
-; CHECK-LABEL: test_split_128:
-; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}]
 define void @test_split_128(fp128 %val, fp128* %addr) {
+; CHECK-LABEL: test_split_128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ret
   store fp128 %val, fp128* %addr, align 8
   ret void
 }

From 12e3726fadb0b2a4d8aeed0a2817b5159f9d029d Mon Sep 17 00:00:00 2001
From: Stephane Moore <mog@google.com>
Date: Wed, 29 May 2019 01:36:23 +0000
Subject: [PATCH 0436/1176] Revise the google-objc-global-variable-declaration
 check to match the style guide.

Summary:
Revise the google-objc-global-variable-declaration check to match the style guide.

This commit updates the check as follows:
(1) Do not emit fixes for extern global constants.
(2) Allow the second character of prefixes for constants to be numeric (the new guideline is that global constants should generally be named with a prefix that begins with a capital letter followed by one or more capital letters or numbers).

https://google.github.io/styleguide/objcguide.html#prefixes

Contributed by yaqiji.

Reviewers: Wizard, benhamilton, stephanemoore

Reviewed By: benhamilton, stephanemoore

Subscribers: mgorny, cfe-commits, yaqiji

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62045

llvm-svn: 361907
---
 .../google/GlobalVariableDeclarationCheck.cpp | 22 ++++++++++++-------
 .../google-objc-global-variable-declaration.m | 20 ++++++++++++++---
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
index ce833906dd5c5..30ab04c08c008 100644
--- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
@@ -23,29 +23,35 @@ namespace objc {
 
 namespace {
 
-AST_MATCHER(VarDecl, isLocalVariable) {
-  return Node.isLocalVarDecl();
-}
+AST_MATCHER(VarDecl, isLocalVariable) { return Node.isLocalVarDecl(); }
 
 FixItHint generateFixItHint(const VarDecl *Decl, bool IsConst) {
+  if (IsConst && (Decl->getStorageClass() != SC_Static)) {
+    // No fix available if it is not a static constant, since it is difficult
+    // to determine the proper fix in this case.
+    return FixItHint();
+  }
+
   char FC = Decl->getName()[0];
   if (!llvm::isAlpha(FC) || Decl->getName().size() == 1) {
     // No fix available if first character is not alphabetical character, or it
-    // is a single-character variable, since it is difficult to determine the 
+    // is a single-character variable, since it is difficult to determine the
     // proper fix in this case. Users should create a proper variable name by
     // their own.
     return FixItHint();
   }
   char SC = Decl->getName()[1];
   if ((FC == 'k' || FC == 'g') && !llvm::isAlpha(SC)) {
-    // No fix available if the prefix is correct but the second character is not
-    // alphabetical, since it is difficult to determine the proper fix in this
-    // case.
+    // No fix available if the prefix is correct but the second character is
+    // not alphabetical, since it is difficult to determine the proper fix in
+    // this case.
     return FixItHint();
   }
+
   auto NewName = (IsConst ? "k" : "g") +
                  llvm::StringRef(std::string(1, FC)).upper() +
                  Decl->getName().substr(1).str();
+
   return FixItHint::CreateReplacement(
       CharSourceRange::getTokenRange(SourceRange(Decl->getLocation())),
       llvm::StringRef(NewName));
@@ -71,7 +77,7 @@ void GlobalVariableDeclarationCheck::registerMatchers(MatchFinder *Finder) {
       this);
   Finder->addMatcher(varDecl(hasGlobalStorage(), hasType(isConstQualified()),
                              unless(isLocalVariable()),
-                             unless(matchesName("::(k[A-Z]|[A-Z]{2,})")))
+                             unless(matchesName("::(k[A-Z])|([A-Z][A-Z0-9])")))
                          .bind("global_const"),
                      this);
 }
diff --git a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
index 346ddeca7db93..32af3533f3f1d 100644
--- a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
+++ b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
@@ -1,10 +1,14 @@
 // RUN: %check_clang_tidy %s google-objc-global-variable-declaration %t
 
 @class NSString;
+
 static NSString* const myConstString = @"hello";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'myConstString' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const kMyConstString = @"hello";
 
+extern NSString* const GlobalConstant = @"hey";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'GlobalConstant' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+
 static NSString* MyString = @"hi";
 // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: non-const global variable 'MyString' must have a name which starts with 'g[A-Z]' [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* gMyString = @"hi";
@@ -25,13 +29,23 @@
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable '_notAlpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const _notAlpha = @"NotBeginWithAlpha";
 
+static NSString* const notCap = @"NotBeginWithCap";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'notCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+// CHECK-FIXES: static NSString* const kNotCap = @"NotBeginWithCap";
+
 static NSString* const k_Alpha = @"SecondNotAlpha";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'k_Alpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const k_Alpha = @"SecondNotAlpha";
 
+static NSString* const SecondNotCap = @"SecondNotCapOrNumber";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'SecondNotCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+// CHECK-FIXES: static NSString* const kSecondNotCap = @"SecondNotCapOrNumber";
+
 static NSString* const kGood = @"hello";
 static NSString* const XYGood = @"hello";
+static NSString* const X1Good = @"hello";
 static NSString* gMyIntGood = 0;
+extern NSString* Y2Good;
 
 extern NSString* const GTLServiceErrorDomain;
 
@@ -42,8 +56,8 @@
 
 @implementation Foo
 - (void)f {
-    int x = 0;
-    static int bar;
-    static const int baz = 42;
+  int x = 0;
+  static int bar;
+  static const int baz = 42;
 }
 @end

From 19f703e0d77c657c89322ed337e22d9283cd20d5 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 29 May 2019 01:37:44 +0000
Subject: [PATCH 0437/1176] [AArch64] auto-generate complete test checks; NFC

llvm-svn: 361908
---
 .../CodeGen/AArch64/sdag-store-merging-bug.ll | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sdag-store-merging-bug.ll b/llvm/test/CodeGen/AArch64/sdag-store-merging-bug.ll
index b12dc0933fc08..d67988de57527 100644
--- a/llvm/test/CodeGen/AArch64/sdag-store-merging-bug.ll
+++ b/llvm/test/CodeGen/AArch64/sdag-store-merging-bug.ll
@@ -1,20 +1,22 @@
-; RUN: llc -o - %s -mtriple aarch64-- -mattr +slow-misaligned-128store -stop-after=instruction-select | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s -mtriple aarch64-- -mattr +slow-misaligned-128store | FileCheck %s
 ; Checks for a bug where selection dag store merging would construct wrong
 ; indices when extracting values from vectors, resulting in an invalid
 ; lane duplication in this case.
 ; The only way I could trigger stores with mismatching types getting merged was
 ; via the aarch64 slow-misaligned-128store code splitting stores earlier.
 
-; CHECK-LABEL: name: func
-; CHECK: LDRQui
-; CHECK-NOT: INSERT_SUBREG
-; CHECK-NOT: DUP
-; CHECK-NEXT: STRQui
+; aarch64 feature slow-misaligned-128store splits the following store.
+; store merging immediately merges it back together (but used to get the
+; merging wrong), this is the only way I was able to reproduce the bug...
+
 define void @func(<2 x double>* %sptr, <2 x double>* %dptr) {
+; CHECK-LABEL: func:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    str q0, [x1]
+; CHECK-NEXT:    ret
   %load = load <2 x double>, <2 x double>* %sptr, align 8
-  ; aarch64 feature slow-misaligned-128store splits the following store.
-  ; store merging immediately merges it back together (but used to get the
-  ; merging wrong), this is the only way I was able to reproduce the bug...
   store <2 x double> %load, <2 x double>* %dptr, align 4
   ret void
 }

From 529118fc87c0ca274a4355c3f90ddd7b07215b2d Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Wed, 29 May 2019 01:51:56 +0000
Subject: [PATCH 0438/1176] [builtins] Move the compare2f definition outside of
 the macro

This should hopefully address the error we're seeing in older versions
of Clang.

Differential Revision: https://reviews.llvm.org/D62554

llvm-svn: 361909
---
 compiler-rt/lib/builtins/arm/comparesf2.S | 30 ++++++++++++++---------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/compiler-rt/lib/builtins/arm/comparesf2.S b/compiler-rt/lib/builtins/arm/comparesf2.S
index 442c880e31ef0..a87cadf1b98b4 100644
--- a/compiler-rt/lib/builtins/arm/comparesf2.S
+++ b/compiler-rt/lib/builtins/arm/comparesf2.S
@@ -38,12 +38,11 @@
 
 #include "../assembly.h"
 
-    .macro COMPARESF2_FUNCTION name:req handle_nan:req
-@ int \name(float a, float b)
-
-    .p2align 2
-DEFINE_COMPILERRT_FUNCTION(\name)
+    .syntax unified
+    .text
+    DEFINE_CODE_STATE
 
+    .macro COMPARESF2_FUNCTION_BODY handle_nan:req
 #if defined(COMPILER_RT_ARMHF_TARGET)
     vmov r0, s0
     vmov r1, s1
@@ -168,12 +167,12 @@ LOCAL_LABEL(CHECK_NAN\@):
     \handle_nan
     JMP(lr)
 #endif
-END_COMPILERRT_FUNCTION(\name)
     .endm
 
-    .syntax unified
-    .text
-    DEFINE_CODE_STATE
+@ int __eqsf2(float a, float b)
+
+    .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__eqsf2)
 
     .macro __eqsf2_handle_nan
 #if defined(USE_THUMB_1)
@@ -183,7 +182,9 @@ END_COMPILERRT_FUNCTION(\name)
 #endif
     .endm
 
-COMPARESF2_FUNCTION __eqsf2, __eqsf2_handle_nan
+COMPARESF2_FUNCTION_BODY __eqsf2_handle_nan
+
+END_COMPILERRT_FUNCTION(__eqsf2)
 
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2)
@@ -194,6 +195,11 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2)
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __lesf2)
 #endif
 
+@ int __gtsf2(float a, float b)
+
+    .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__gtsf2)
+
     .macro __gtsf2_handle_nan
 #if defined(USE_THUMB_1)
     movs    r0,         #1
@@ -203,7 +209,9 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __lesf2)
 #endif
     .endm
 
-COMPARESF2_FUNCTION __gtsf2, __gtsf2_handle_nan
+COMPARESF2_FUNCTION_BODY __gtsf2_handle_nan
+
+END_COMPILERRT_FUNCTION(__gtsf2)
 
 DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2)
 

From 656afe370df3ac374db7624e0b450f15a2212ab2 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 29 May 2019 02:02:59 +0000
Subject: [PATCH 0439/1176] [X86] Fix x86-64 call *foo@tlsdesc(%rax) and
 support R_386_TLSGOTDESC R_386_TLS_DESC_CALL

D18885 emitted 5 bytes for call *foo@tlsdesc(%rax). It should use the
2-byte form instead and let R_X86_64_TLSDESC_CALL apply to the beginning
of the call instruction.

The 2-byte form was deliberately chosen to make ->LE and ->IE relaxation work:

    0:   48 8d 05 00 00 00 00    lea    0x0(%rip),%rax        # 7 <.text+0x7>
                         3: R_X86_64_GOTPC32_TLSDESC     a-0x4
    7:   ff 10                   callq  *(%rax)
                         7: R_X86_64_TLSDESC_CALL        a

=>

    0:   48 c7 c0 fc ff ff ff    mov    $0xfffffffffffffffc,%rax
    7:   66 90                   xchg   %ax,%ax

Also change the symbol type to STT_TLS when VK_TLSCALL or VK_TLSDESC is
seen.

Reviewed By: compnerd

Differential Revision: https://reviews.llvm.org/D62512

llvm-svn: 361910
---
 llvm/lib/MC/MCELFStreamer.cpp                 |  2 ++
 .../X86/MCTargetDesc/X86ELFObjectWriter.cpp   |  4 +++
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     | 20 +++++++++++---
 llvm/test/MC/ELF/relocation-tls.s             | 26 -------------------
 llvm/test/MC/X86/tlsdesc-32.s                 | 19 ++++++++++++++
 llvm/test/MC/X86/tlsdesc-64.s                 | 19 ++++++++++++++
 6 files changed, 61 insertions(+), 29 deletions(-)
 delete mode 100644 llvm/test/MC/ELF/relocation-tls.s
 create mode 100644 llvm/test/MC/X86/tlsdesc-32.s
 create mode 100644 llvm/test/MC/X86/tlsdesc-64.s

diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 6fe16abd5a3a6..245dd063004f8 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -400,6 +400,8 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     case MCSymbolRefExpr::VK_INDNTPOFF:
     case MCSymbolRefExpr::VK_NTPOFF:
     case MCSymbolRefExpr::VK_GOTNTPOFF:
+    case MCSymbolRefExpr::VK_TLSCALL:
+    case MCSymbolRefExpr::VK_TLSDESC:
     case MCSymbolRefExpr::VK_TLSGD:
     case MCSymbolRefExpr::VK_TLSLD:
     case MCSymbolRefExpr::VK_TLSLDM:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index e6939839d281a..232a065932384 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -271,6 +271,10 @@ static unsigned getRelocType32(MCContext &Ctx,
     assert(Type == RT32_32);
     assert(!IsPCRel);
     return ELF::R_386_GOTOFF;
+  case MCSymbolRefExpr::VK_TLSCALL:
+    return ELF::R_386_TLS_DESC_CALL;
+  case MCSymbolRefExpr::VK_TLSDESC:
+    return ELF::R_386_TLS_GOTDESC;
   case MCSymbolRefExpr::VK_TPOFF:
     assert(Type == RT32_32);
     assert(!IsPCRel);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8eb48fcb0bb74..31d26d08a63fb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -524,9 +524,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
     // indirect register encoding, this handles addresses like [EAX].  The
     // encoding for [EBP] with no displacement means [disp32] so we handle it
     // by emitting a displacement of 0 below.
-    if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
-      EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
-      return;
+    if (BaseRegNo != N86::EBP) {
+      if (Disp.isImm() && Disp.getImm() == 0) {
+        EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+        return;
+      }
+
+      // If the displacement is @tlscall, treat it as a zero.
+      if (Disp.isExpr()) {
+        auto *Sym = dyn_cast<MCSymbolRefExpr>(Disp.getExpr());
+        if (Sym && Sym->getKind() == MCSymbolRefExpr::VK_TLSCALL) {
+          // This is exclusively used by call *a@tlscall(base). The relocation
+          // (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
+          Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
+          EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+          return;
+        }
+      }
     }
 
     // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
diff --git a/llvm/test/MC/ELF/relocation-tls.s b/llvm/test/MC/ELF/relocation-tls.s
deleted file mode 100644
index 00b4046ad3420..0000000000000
--- a/llvm/test/MC/ELF/relocation-tls.s
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -S --sr | FileCheck  %s
-
-// Test that we produce the correct relocation.
-
-        leaq    _ZL3ccc@TLSDESC(%rip), %rax
-        call    *_ZL3ccc@TLSCALL(%rax)
-        addq    %fs:0, %rax
-
-// CHECK: Section {
-// CHECK:   Index:
-// CHECK:   Name: .rela.text
-// CHECK-NEXT:   Type: SHT_RELA
-// CHECK-NEXT:   Flags [
-// CHECK-NEXT:   ]
-// CHECK-NEXT:   Address: 0x0
-// CHECK-NEXT:   Offset:
-// CHECK-NEXT:   Size:
-// CHECK-NEXT:   Link:
-// CHECK-NEXT:   Info:
-// CHECK-NEXT:   AddressAlignment: 8
-// CHECK-NEXT:   EntrySize: 24
-// CHECK-NEXT:   Relocations [
-// CHECK-NEXT:     0x3 R_X86_64_GOTPC32_TLSDESC _ZL3ccc 0xFFFFFFFFFFFFFFFC
-// CHECK-NEXT:     0x9 R_X86_64_TLSDESC_CALL _ZL3ccc 0x0
-// CHECK-NEXT:   ]
-// CHECK-NEXT: }
diff --git a/llvm/test/MC/X86/tlsdesc-32.s b/llvm/test/MC/X86/tlsdesc-32.s
new file mode 100644
index 0000000000000..866bda65b94ff
--- /dev/null
+++ b/llvm/test/MC/X86/tlsdesc-32.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple i386-pc-linux-musl %s | FileCheck --check-prefix=PRINT %s
+
+# RUN: llvm-mc -filetype=obj -triple i386-pc-linux-musl %s -o %t
+# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck %s
+
+# PRINT:      leal a@tlsdesc(%ebx), %eax
+# PRINT-NEXT: calll *a@tlscall(%eax)
+
+# SYM: TLS GLOBAL DEFAULT UND a
+
+# CHECK:      0: leal (%ebx), %eax
+# CHECK-NEXT:   00000002: R_386_TLS_GOTDESC a
+# CHECK-NEXT: 6: calll *(%eax)
+# CHECK-NEXT:   00000006: R_386_TLS_DESC_CALL a
+
+leal a@tlsdesc(%ebx), %eax
+call *a@tlscall(%eax)
+addl %gs:0, %eax
diff --git a/llvm/test/MC/X86/tlsdesc-64.s b/llvm/test/MC/X86/tlsdesc-64.s
new file mode 100644
index 0000000000000..6da468aa5aa17
--- /dev/null
+++ b/llvm/test/MC/X86/tlsdesc-64.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple x86_64-pc-linux-musl %s | FileCheck --check-prefix=PRINT %s
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-musl %s -o %t
+# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s
+# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --match-full-lines %s
+
+# PRINT:      leaq a@tlsdesc(%rip), %rax
+# PRINT-NEXT: callq *a@tlscall(%rax)
+
+# SYM: TLS GLOBAL DEFAULT UND a
+
+# CHECK:      0: leaq (%rip), %rax
+# CHECK-NEXT:   0000000000000003: R_X86_64_GOTPC32_TLSDESC a-4
+# CHECK-NEXT: 7: callq *(%rax)
+# CHECK-NEXT:   0000000000000007: R_X86_64_TLSDESC_CALL a
+
+leaq a@tlsdesc(%rip), %rax
+call *a@tlscall(%rax)
+addq %fs:0, %rax

From 719322411ce62662bcd3bdc5ee9deb6c2c84ffb7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 29 May 2019 02:03:56 +0000
Subject: [PATCH 0440/1176] [ELF] Implement General Dynamic style TLSDESC for
 x86-64

This handles two initial relocation types R_X86_64_GOTPC32_TLSDESC and
R_X86_64_TLSDESC_CALL, as well as the GD->LE and GD->IE relaxations.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62513

llvm-svn: 361911
---
 lld/ELF/Arch/X86_64.cpp                  | 115 ++++++++++++++++-------
 lld/ELF/InputSection.cpp                 |   2 +
 lld/ELF/Relocations.cpp                  |  11 ++-
 lld/ELF/Relocations.h                    |   1 +
 lld/test/ELF/invalid/x86-64-tlsdesc-gd.s |  15 +++
 lld/test/ELF/x86-64-tlsdesc-gd.s         |  69 ++++++++++++++
 6 files changed, 172 insertions(+), 41 deletions(-)
 create mode 100644 lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
 create mode 100644 lld/test/ELF/x86-64-tlsdesc-gd.s

diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 7a839ebe1e30f..fdaf63b11bc3e 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -55,6 +55,7 @@ X86_64::X86_64() {
   PltRel = R_X86_64_JUMP_SLOT;
   RelativeRel = R_X86_64_RELATIVE;
   IRelativeRel = R_X86_64_IRELATIVE;
+  TlsDescRel = R_X86_64_TLSDESC;
   TlsGotRel = R_X86_64_TPOFF64;
   TlsModuleIndexRel = R_X86_64_DTPMOD64;
   TlsOffsetRel = R_X86_64_DTPOFF64;
@@ -88,6 +89,8 @@ RelExpr X86_64::getRelExpr(RelType Type, const Symbol &S,
     return R_DTPREL;
   case R_X86_64_TPOFF32:
     return R_TLS;
+  case R_X86_64_TLSDESC_CALL:
+    return R_TLSDESC_CALL;
   case R_X86_64_TLSLD:
     return R_TLSLD_PC;
   case R_X86_64_TLSGD:
@@ -105,6 +108,8 @@ RelExpr X86_64::getRelExpr(RelType Type, const Symbol &S,
   case R_X86_64_GOT32:
   case R_X86_64_GOT64:
     return R_GOTPLT;
+  case R_X86_64_GOTPC32_TLSDESC:
+    return R_TLSDESC_PC;
   case R_X86_64_GOTPCREL:
   case R_X86_64_GOTPCRELX:
   case R_X86_64_REX_GOTPCRELX:
@@ -173,45 +178,82 @@ RelType X86_64::getDynRel(RelType Type) const {
 }
 
 void X86_64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
-  // Convert
-  //   .byte 0x66
-  //   leaq x@tlsgd(%rip), %rdi
-  //   .word 0x6666
-  //   rex64
-  //   call __tls_get_addr@plt
-  // to
-  //   mov %fs:0x0,%rax
-  //   lea x@tpoff,%rax
-  const uint8_t Inst[] = {
-      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
-      0x48, 0x8d, 0x80, 0, 0, 0, 0,                         // lea x@tpoff,%rax
-  };
-  memcpy(Loc - 4, Inst, sizeof(Inst));
-
-  // The original code used a pc relative relocation and so we have to
-  // compensate for the -4 in had in the addend.
-  write32le(Loc + 8, Val + 4);
+  if (Type == R_X86_64_TLSGD) {
+    // Convert
+    //   .byte 0x66
+    //   leaq x@tlsgd(%rip), %rdi
+    //   .word 0x6666
+    //   rex64
+    //   call __tls_get_addr@plt
+    // to the following two instructions.
+    const uint8_t Inst[] = {
+        0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
+        0x00, 0x00,                            // mov %fs:0x0,%rax
+        0x48, 0x8d, 0x80, 0,    0,    0,    0, // lea x@tpoff,%rax
+    };
+    memcpy(Loc - 4, Inst, sizeof(Inst));
+
+    // The original code used a pc relative relocation and so we have to
+    // compensate for the -4 in had in the addend.
+    write32le(Loc + 8, Val + 4);
+  } else {
+    // Convert
+    //   lea x@tlsgd(%rip), %rax
+    //   call *(%rax)
+    // to the following two instructions.
+    assert(Type == R_X86_64_GOTPC32_TLSDESC);
+    if (memcmp(Loc - 3, "\x48\x8d\x05", 3)) {
+      error(getErrorLocation(Loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
+                                        "in callq *x@tlsdesc(%rip), %rax");
+      return;
+    }
+    // movq $x@tpoff(%rip),%rax
+    Loc[-2] = 0xc7;
+    Loc[-1] = 0xc0;
+    write32le(Loc, Val + 4);
+    // xchg ax,ax
+    Loc[4] = 0x66;
+    Loc[5] = 0x90;
+  }
 }
 
 void X86_64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
-  // Convert
-  //   .byte 0x66
-  //   leaq x@tlsgd(%rip), %rdi
-  //   .word 0x6666
-  //   rex64
-  //   call __tls_get_addr@plt
-  // to
-  //   mov %fs:0x0,%rax
-  //   addq x@tpoff,%rax
-  const uint8_t Inst[] = {
-      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
-      0x48, 0x03, 0x05, 0, 0, 0, 0,                         // addq x@tpoff,%rax
-  };
-  memcpy(Loc - 4, Inst, sizeof(Inst));
-
-  // Both code sequences are PC relatives, but since we are moving the constant
-  // forward by 8 bytes we have to subtract the value by 8.
-  write32le(Loc + 8, Val - 8);
+  if (Type == R_X86_64_TLSGD) {
+    // Convert
+    //   .byte 0x66
+    //   leaq x@tlsgd(%rip), %rdi
+    //   .word 0x6666
+    //   rex64
+    //   call __tls_get_addr@plt
+    // to the following two instructions.
+    const uint8_t Inst[] = {
+        0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
+        0x00, 0x00,                            // mov %fs:0x0,%rax
+        0x48, 0x03, 0x05, 0,    0,    0,    0, // addq x@gottpoff(%rip),%rax
+    };
+    memcpy(Loc - 4, Inst, sizeof(Inst));
+
+    // Both code sequences are PC relatives, but since we are moving the
+    // constant forward by 8 bytes we have to subtract the value by 8.
+    write32le(Loc + 8, Val - 8);
+  } else {
+    // Convert
+    //   lea x@tlsgd(%rip), %rax
+    //   call *(%rax)
+    // to the following two instructions.
+    assert(Type == R_X86_64_GOTPC32_TLSDESC);
+    if (memcmp(Loc - 3, "\x48\x8d\x05", 3)) {
+      error(getErrorLocation(Loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
+                                        "in callq *x@tlsdesc(%rip), %rax");
+      return;
+    }
+    // movq x@gottpoff(%rip),%rax
+    Loc[-2] = 0x8b;
+    write32le(Loc, Val);
+    // xchg ax,ax
+    Loc[4] = 0x66;
+    Loc[5] = 0x90;
+  }
 }
 
 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
@@ -331,6 +373,7 @@ void X86_64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
   case R_X86_64_TPOFF32:
   case R_X86_64_GOT32:
   case R_X86_64_GOTPC32:
+  case R_X86_64_GOTPC32_TLSDESC:
   case R_X86_64_GOTPCREL:
   case R_X86_64_GOTPCRELX:
   case R_X86_64_REX_GOTPCRELX:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 74878931afb40..9075568f8c1d7 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -755,6 +755,8 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
     return Sym.getSize() + A;
   case R_TLSDESC:
     return In.Got->getGlobalDynAddr(Sym) + A;
+  case R_TLSDESC_PC:
+    return In.Got->getGlobalDynAddr(Sym) + A - P;
   case R_AARCH64_TLSDESC_PAGE:
     return getAArch64Page(In.Got->getGlobalDynAddr(Sym) + A) -
            getAArch64Page(P);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index a8ed792164bc8..aab5385dad03d 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -217,7 +217,8 @@ handleTlsRelocation(RelType Type, Symbol &Sym, InputSectionBase &C,
   if (Config->EMachine == EM_MIPS)
     return handleMipsTlsRelocation(Type, Sym, C, Offset, Addend, Expr);
 
-  if (oneof<R_TLSDESC, R_AARCH64_TLSDESC_PAGE, R_TLSDESC_CALL>(Expr) &&
+  if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC>(
+          Expr) &&
       Config->Shared) {
     if (In.Got->addDynTlsEntry(Sym)) {
       uint64_t Off = In.Got->getGlobalDynOffset(Sym);
@@ -273,8 +274,8 @@ handleTlsRelocation(RelType Type, Symbol &Sym, InputSectionBase &C,
     return 1;
   }
 
-  if (oneof<R_TLSDESC, R_AARCH64_TLSDESC_PAGE, R_TLSDESC_CALL, R_TLSGD_GOT,
-            R_TLSGD_GOTPLT, R_TLSGD_PC>(Expr)) {
+  if (oneof<R_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
+            R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC>(Expr)) {
     if (Config->Shared) {
       if (In.Got->addDynTlsEntry(Sym)) {
         uint64_t Off = In.Got->getGlobalDynOffset(Sym);
@@ -403,8 +404,8 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
             R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
             R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
             R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT,
-            R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT,
-            R_TLSIE_HINT>(E))
+            R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_TLSDESC_PC,
+            R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E))
     return true;
 
   // These never do, except if the entire file is position dependent or if
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index c856595942d97..923aa4661a394 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -61,6 +61,7 @@ enum RelExpr {
   R_TLS,
   R_TLSDESC,
   R_TLSDESC_CALL,
+  R_TLSDESC_PC,
   R_TLSGD_GOT,
   R_TLSGD_GOTPLT,
   R_TLSGD_PC,
diff --git a/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s b/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
new file mode 100644
index 0000000000000..bd75ff2bc8f3e
--- /dev/null
+++ b/lld/test/ELF/invalid/x86-64-tlsdesc-gd.s
@@ -0,0 +1,15 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo '.tbss; .globl a; a:' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
+# RUN: ld.lld -shared %t1.o -o %t1.so
+
+## GD to LE relaxation.
+# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck -DINPUT=%t.o %s
+## GD to IE relaxation.
+# RUN: not ld.lld %t.o %t1.so -o /dev/null 2>&1 | FileCheck -DINPUT=%t.o %s
+
+# CHECK: error: [[INPUT]]:(.text+0x0): R_X86_64_GOTPC32_TLSDESC must be used in callq *x@tlsdesc(%rip), %rax
+
+leaq a@tlsdesc(%rip), %rdx
+call *a@tlscall(%rdx)
+movl %fs:(%rax), %eax
diff --git a/lld/test/ELF/x86-64-tlsdesc-gd.s b/lld/test/ELF/x86-64-tlsdesc-gd.s
new file mode 100644
index 0000000000000..1c72fbe40d04b
--- /dev/null
+++ b/lld/test/ELF/x86-64-tlsdesc-gd.s
@@ -0,0 +1,69 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo '.tbss; .globl b; b:' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
+# RUN: ld.lld -shared -soname=t1.so %t1.o -o %t1.so
+
+# RUN: ld.lld -shared %t.o %t1.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=GD-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=GD %s
+
+# RUN: ld.lld %t.o %t1.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+# RUN: ld.lld %t.o %t1.so -o %t
+# RUN: llvm-readobj -r %t | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s
+
+# GD-REL:      .rela.dyn {
+# GD-REL-NEXT:   0x20A0 R_X86_64_TLSDESC a 0x0
+# GD-REL-NEXT:   0x20B0 R_X86_64_TLSDESC b 0x0
+# GD-REL-NEXT: }
+
+# 0x20a0-0x1007 = 4249
+# GD:            leaq 4249(%rip), %rax
+# GD-NEXT: 1007: callq *(%rax)
+# GD-NEXT:       movl %fs:(%rax), %eax
+
+# 0x20b0-0x1013 = 4253
+# GD-NEXT:       leaq 4253(%rip), %rax
+# GD-NEXT: 1013: callq *(%rax)
+# GD-NEXT:       movl %fs:(%rax), %eax
+
+# NOREL: no relocations
+
+## offset(a) = -4
+# LE:      movq $-4, %rax
+# LE-NEXT: nop
+# LE-NEXT: movl %fs:(%rax), %eax
+## offset(b) = 0
+# LE:      movq $0, %rax
+# LE-NEXT: nop
+# LE-NEXT: movl %fs:(%rax), %eax
+
+# IE-REL:      .rela.dyn {
+# IE-REL-NEXT:   0x2020C0 R_X86_64_TPOFF64 b 0x0
+# IE-REL-NEXT: }
+
+## a is relaxed to use LE.
+# IE:              movq $-4, %rax
+# IE-NEXT:         nop
+# IE-NEXT:         movl %fs:(%rax), %eax
+## 0x2020C0 - 0x201013 = 4269
+# IE-NEXT:         movq 4269(%rip), %rax
+# IE-NEXT: 201013: nop
+# IE-NEXT:         movl %fs:(%rax), %eax
+
+leaq a@tlsdesc(%rip), %rax
+call *a@tlscall(%rax)
+movl %fs:(%rax), %eax
+
+leaq b@tlsdesc(%rip), %rax
+call *b@tlscall(%rax)
+movl %fs:(%rax), %eax
+
+.section .tbss
+.globl a
+.zero 8
+a:
+.zero 4

From 818c652643411667c054fd9a929c4c07941832b5 Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Wed, 29 May 2019 02:20:37 +0000
Subject: [PATCH 0441/1176] [X86] Use 'llvm_unreachable' instead of nullptr in
 unreachable code to avoid static check fail

RegClassOrBank is an object of RegClassOrRegBank, which is defined as
using llvm::RegClassOrRegBank = typedef PointerUnion<const
TargetRegisterClass *, const RegisterBank *>
so control flow can not get here. Use ""llvm_unreachable" here to avoid
"null pointer" confusion.

Patch by Shengchen Kan (skan)

Differential Revision: https://reviews.llvm.org/D62006

Signed-off-by: pengfei <pengfei.wang@intel.com>
llvm-svn: 361912
---
 llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 4 +++-
 llvm/lib/Target/X86/X86InstructionSelector.cpp   | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 55f10a2d0655c..c2fa813c7d756 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -91,7 +91,9 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
     return RB;
   if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
     return &getRegBankFromRegClass(*RC);
-  return nullptr;
+
+  llvm_unreachable("RegClassOrBank is either a const RegisterBank* or "
+                   "a const TargetRegisterClass*");
 }
 
 const TargetRegisterClass &
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 61de562f8a5fa..e52ee03f34ae9 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -1610,8 +1610,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
          "Arguments and return value types must match");
 
-  const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
-  if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
+  const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
+  if (RegRB.getID() != X86::GPRRegBankID)
     return false;
 
   const static unsigned NumTypes = 4; // i8, i16, i32, i64
@@ -1709,7 +1709,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   const DivRemEntry &TypeEntry = *OpEntryIt;
   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
 
-  const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
+  const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
   if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {

From 2405bd6898151e0a7ffede78b0d0c7c85c0b66d3 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Wed, 29 May 2019 02:21:37 +0000
Subject: [PATCH 0442/1176] Rework std::type_info definition to support systems
 without fully merged type info names.

Previously std::type_info always expected type info string to be unique.
But this isn't always the case. Like when -Bsymbolic is passed to the
linker or due to llvm.org/PR37398.

This patch adds the LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT CMake
option which, when specified, overrides the default configuration for
the library.

The current defaults still assume unique names even though this isn't
strictly correct for ELF binaries. We should consider changing the
default in a follow up commit.

llvm-svn: 361913
---
 libcxx/CMakeLists.txt           |  16 ++-
 libcxx/docs/BuildingLibcxx.rst  |  15 +++
 libcxx/include/__config         |  14 +-
 libcxx/include/__config_site.in |   1 +
 libcxx/include/typeinfo         | 224 +++++++++++++++++++++-----------
 5 files changed, 187 insertions(+), 83 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 885c7d5fdb0cc..1096898d055ec 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -123,6 +123,18 @@ set(LIBCXX_ABI_NAMESPACE "" CACHE STRING "The inline ABI namespace used by libc+
 option(LIBCXX_ABI_UNSTABLE "Unstable ABI of libc++." OFF)
 option(LIBCXX_ABI_FORCE_ITANIUM "Ignore auto-detection and force use of the Itanium ABI.")
 option(LIBCXX_ABI_FORCE_MICROSOFT "Ignore auto-detection and force use of the Microsoft ABI.")
+
+
+set(LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT  "" CACHE STRING
+  "Whether typeinfo names are expected to be unique. Defining this option overrides the default configuration in the library.")
+set(MERGED_TYPEINFO_VALUES ";ON;OFF")
+set_property(CACHE LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT PROPERTY STRINGS ${MERGED_TYPEINFO_DEFAULTS})
+list(FIND MERGED_TYPEINFO_VALUES "${LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT}" IS_VALID_DEFAULT)
+if (${IS_VALID_DEFAULT} EQUAL -1)
+  message(FATAL_ERROR "Value '${LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT}' is not a valid value for
+          LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT")
+endif()
+
 option(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT "Enable per TU ABI insulation by default. To be used by vendors." OFF)
 set(LIBCXX_ABI_DEFINES "" CACHE STRING "A semicolon separated list of ABI macros to define in the site config header.")
 option(LIBCXX_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF)
@@ -701,13 +713,15 @@ config_define_if(LIBCXX_ABI_UNSTABLE _LIBCPP_ABI_UNSTABLE)
 config_define_if(LIBCXX_ABI_FORCE_ITANIUM _LIBCPP_ABI_FORCE_ITANIUM)
 config_define_if(LIBCXX_ABI_FORCE_MICROSOFT _LIBCPP_ABI_FORCE_MICROSOFT)
 config_define_if(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT _LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT)
-
 config_define_if_not(LIBCXX_ENABLE_GLOBAL_FILESYSTEM_NAMESPACE _LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE)
 config_define_if_not(LIBCXX_ENABLE_STDIN _LIBCPP_HAS_NO_STDIN)
 config_define_if_not(LIBCXX_ENABLE_STDOUT _LIBCPP_HAS_NO_STDOUT)
 config_define_if_not(LIBCXX_ENABLE_THREADS _LIBCPP_HAS_NO_THREADS)
 config_define_if_not(LIBCXX_ENABLE_MONOTONIC_CLOCK _LIBCPP_HAS_NO_MONOTONIC_CLOCK)
 config_define_if_not(LIBCXX_ENABLE_THREAD_UNSAFE_C_FUNCTIONS _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS)
+if (NOT LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT STREQUAL "")
+  config_define("${LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT}" _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT)
+endif()
 
 config_define_if(LIBCXX_HAS_PTHREAD_API _LIBCPP_HAS_THREAD_API_PTHREAD)
 config_define_if(LIBCXX_HAS_EXTERNAL_THREAD_API _LIBCPP_HAS_THREAD_API_EXTERNAL)
diff --git a/libcxx/docs/BuildingLibcxx.rst b/libcxx/docs/BuildingLibcxx.rst
index 29a3a2ce48deb..c334269a6d55a 100644
--- a/libcxx/docs/BuildingLibcxx.rst
+++ b/libcxx/docs/BuildingLibcxx.rst
@@ -369,6 +369,21 @@ The following options allow building libc++ for a different ABI version.
   A semicolon-separated list of ABI macros to persist in the site config header.
   See ``include/__config`` for the list of ABI macros.
 
+
+.. option:: LIBCXX_HAS_MERGED_TYPEINFO_NAMES_DEFAULT
+
+  **Default**: ``None``. When defined this option overrides the libraries default configuration
+  for whether merged type info names are present.
+
+
+  Build ``std::type_info`` with the assumption that type info names for a type have been fully
+  merged are unique across the entire program. This may not be the case for libraries built with
+  ``-Bsymbolic`` or due to compiler or linker bugs (Ex. llvm.org/PR37398).
+
+  When the value is ``ON`` typeinfo comparisons compare only the pointer value, otherwise ``strcmp``
+  is used as a fallback.
+
+
 .. _LLVM-specific variables:
 
 LLVM-specific options
diff --git a/libcxx/include/__config b/libcxx/include/__config
index cdbfef37fc1c7..b63102c20d161 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -775,6 +775,16 @@ typedef __char32_t char32_t;
 #  endif
 #endif
 
+#ifndef _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT
+# ifdef _LIBCPP_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos.
+# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0
+#else
+// TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398
+// And we should consider defaulting to OFF.
+# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1
+#endif
+#endif
+
 #ifndef _LIBCPP_HIDE_FROM_ABI
 #  if _LIBCPP_HIDE_FROM_ABI_PER_TU
 #    define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_INTERNAL_LINKAGE
@@ -936,10 +946,6 @@ template <unsigned> struct __static_assert_check {};
 #define _LIBCPP_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__;
 #endif
 
-#if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__)
-#define _LIBCPP_NONUNIQUE_RTTI_BIT (1ULL << 63)
-#endif
-
 #if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || \
     defined(__sun__) || defined(__NetBSD__) || defined(__CloudABI__)
 #define _LIBCPP_LOCALE__L_EXTENSIONS 1
diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in
index 0818d6e1daf24..ffbd372edf000 100644
--- a/libcxx/include/__config_site.in
+++ b/libcxx/include/__config_site.in
@@ -27,6 +27,7 @@
 #cmakedefine _LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL
 #cmakedefine _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS
 #cmakedefine _LIBCPP_NO_VCRUNTIME
+#cmakedefine01 _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT
 #cmakedefine _LIBCPP_ABI_NAMESPACE @_LIBCPP_ABI_NAMESPACE@
 
 @_LIBCPP_ABI_DEFINES@
diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo
index a52c984961bfc..5d2f03effc547 100644
--- a/libcxx/include/typeinfo
+++ b/libcxx/include/typeinfo
@@ -72,13 +72,10 @@ public:
 #include <vcruntime_typeinfo.h>
 #else
 
-#if defined(_LIBCPP_NONUNIQUE_RTTI_BIT) && !defined(_LIBCPP_ABI_MICROSOFT)
-#   define _LIBCPP_HAS_NONUNIQUE_TYPEINFO
-#endif
-
 namespace std  // purposefully not using versioning namespace
 {
 
+
 #if defined(_LIBCPP_ABI_MICROSOFT)
 
 class _LIBCPP_EXCEPTION_ABI type_info
@@ -116,8 +113,32 @@ public:
     { return !operator==(__arg); }
 };
 
-#elif defined(_LIBCPP_HAS_NONUNIQUE_TYPEINFO)
+#else // !defined(_LIBCPP_ABI_MICROSOFT)
 
+// ========================================================================== //
+//                           Implementations
+// ========================================================================== //
+// ------------------------------------------------------------------------- //
+//                               Unique
+// ------------------------------------------------------------------------- //
+// This implementation of type_info assumes a unique copy of the RTTI for a
+// given type inside a program. This is a valid assumption when abiding to
+// Itanium ABI (http://itanium-cxx-abi.github.io/cxx-abi/abi.html#vtable-components).
+// Under this assumption, we can always compare the addresses of the type names
+// to implement equality-comparison of type_infos instead of having to perform
+// a deep string comparison.
+// -------------------------------------------------------------------------- //
+//                             NonUnique
+// -------------------------------------------------------------------------- //
+// This implementation of type_info does not assume there is always a unique
+// copy of the RTTI for a given type inside a program. For various reasons
+// the linker may have failed to merge every copy of a types RTTI
+// (For example: -Bsymbolic or llvm.org/PR37398). Under this assumption, two
+// type_infos are equal if their addresses are equal or if a deep string
+// comparison is equal.
+// -------------------------------------------------------------------------- //
+//                          NonUniqueARMRTTIBit
+// -------------------------------------------------------------------------- //
 // This implementation of type_info does not assume always a unique copy of
 // the RTTI for a given type inside a program. It packs the pointer to the
 // type name into a uintptr_t and reserves the high bit of that pointer (which
@@ -129,91 +150,131 @@ public:
 // faster. If at least one of the type_infos can't guarantee uniqueness, we
 // have no choice but to fall back to a deep string comparison.
 //
+// This implementation is specific to ARM64 on Apple platforms.
+//
 // Note that the compiler is the one setting (or unsetting) the high bit of
 // the pointer when it constructs the type_info, depending on whether it can
 // guarantee uniqueness for that specific type_info.
-class _LIBCPP_EXCEPTION_ABI type_info
-{
-    type_info& operator=(const type_info&);
-    type_info(const type_info&);
-
-    _LIBCPP_INLINE_VISIBILITY
-    int __compare_nonunique_names(const type_info &__arg) const _NOEXCEPT
-    { return __builtin_strcmp(name(), __arg.name()); }
-
-protected:
-    uintptr_t __type_name;
-
-    _LIBCPP_INLINE_VISIBILITY
-    explicit type_info(const char* __n)
-      : __type_name(reinterpret_cast<uintptr_t>(__n)) {}
 
-public:
-    _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE
-    virtual ~type_info();
-
-    _LIBCPP_INLINE_VISIBILITY
-    const char* name() const _NOEXCEPT
-    {
-      return reinterpret_cast<const char*>(__type_name &
-                                           ~_LIBCPP_NONUNIQUE_RTTI_BIT);
+struct __type_info_implementations {
+  struct __string_impl_base {
+    typedef const char* __type_name_t;
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    _LIBCPP_CONSTEXPR static const char* __type_name_to_string(__type_name_t __v) _NOEXCEPT {
+      return __v;
     }
-
-    _LIBCPP_INLINE_VISIBILITY
-    bool before(const type_info& __arg) const _NOEXCEPT
-    {
-      if (!((__type_name & __arg.__type_name) & _LIBCPP_NONUNIQUE_RTTI_BIT))
-        return __type_name < __arg.__type_name;
-      return __compare_nonunique_names(__arg) < 0;
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    _LIBCPP_CONSTEXPR static __type_name_t __string_to_type_name(const char* __v) _NOEXCEPT {
+      return __v;
     }
+  };
 
-    _LIBCPP_INLINE_VISIBILITY
-    size_t hash_code() const _NOEXCEPT
-    {
-      if (!(__type_name & _LIBCPP_NONUNIQUE_RTTI_BIT))
-        return __type_name;
+  struct __unique_impl : __string_impl_base {
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static size_t __hash(__type_name_t __v) _NOEXCEPT {
+      return reinterpret_cast<size_t>(__v);
+    }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      return __lhs == __rhs;
+    }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      return __lhs < __rhs;
+    }
+  };
 
-      const char* __ptr = name();
+  struct __non_unique_impl : __string_impl_base {
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static size_t __hash(__type_name_t __ptr) _NOEXCEPT {
       size_t __hash = 5381;
       while (unsigned char __c = static_cast<unsigned char>(*__ptr++))
         __hash = (__hash * 33) ^ __c;
       return __hash;
     }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      return __lhs == __rhs || __builtin_strcmp(__lhs, __rhs) == 0;
+    }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      return __builtin_strcmp(__lhs, __rhs) < 0;
+    }
+  };
 
-    _LIBCPP_INLINE_VISIBILITY
-    bool operator==(const type_info& __arg) const _NOEXCEPT
-    {
-      if (__type_name == __arg.__type_name)
-        return true;
+  struct __non_unique_arm_rtti_bit_impl {
+    typedef uintptr_t __type_name_t;
+
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static const char* __type_name_to_string(__type_name_t __v) _NOEXCEPT {
+      return reinterpret_cast<const char*>(__v &
+          ~__non_unique_rtti_bit::value);
+    }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static __type_name_t __string_to_type_name(const char* __v) _NOEXCEPT {
+      return reinterpret_cast<__type_name_t>(__v);
+    }
 
-      if (!((__type_name & __arg.__type_name) & _LIBCPP_NONUNIQUE_RTTI_BIT))
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static size_t __hash(__type_name_t __v) _NOEXCEPT {
+      if (__is_type_name_unique(__v))
+        return reinterpret_cast<size_t>(__v);
+      return __non_unique_impl::__hash(__type_name_to_string(__v));
+    }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __eq(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      if (__lhs == __rhs)
+        return true;
+      if (__is_type_name_unique(__lhs, __rhs))
         return false;
-      return __compare_nonunique_names(__arg) == 0;
+      return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) == 0;
     }
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_ALWAYS_INLINE
+    static bool __lt(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      if (__is_type_name_unique(__lhs, __rhs))
+        return __lhs < __rhs;
+      return __builtin_strcmp(__type_name_to_string(__lhs), __type_name_to_string(__rhs)) < 0;
+    }
+
+   private:
+    typedef std::integral_constant<__type_name_t, (1ULL << 63)> __non_unique_rtti_bit;
 
     _LIBCPP_INLINE_VISIBILITY
-    bool operator!=(const type_info& __arg) const _NOEXCEPT
-    { return !operator==(__arg); }
+    static bool __is_type_name_unique(__type_name_t __lhs) _NOEXCEPT {
+      return !(__lhs & __non_unique_rtti_bit::value);
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    static bool __is_type_name_unique(__type_name_t __lhs, __type_name_t __rhs) _NOEXCEPT {
+      return !((__lhs & __rhs) & __non_unique_rtti_bit::value);
+    }
+  };
+
+  typedef
+#if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__)
+    __non_unique_arm_rtti_bit_impl
+#elif _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT == 0
+    __non_unique_impl
+#elif _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT == 1
+    __unique_impl
+#else
+#   error invalid configuration for _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT
+#endif
+     __impl;
 };
 
-#else // !_LIBCPP_ABI_MICROSOFT && !_LIBCPP_HAS_NONUNIQUE_TYPEINFO
-
-// This implementation of type_info assumes a unique copy of the RTTI for a
-// given type inside a program. This is a valid assumption when abiding to
-// Itanium ABI (http://itanium-cxx-abi.github.io/cxx-abi/abi.html#vtable-components).
-// Under this assumption, we can always compare the addresses of the type names
-// to implement equality-comparison of type_infos instead of having to perform
-// a deep string comparison.
 class _LIBCPP_EXCEPTION_ABI type_info
 {
-    type_info& operator=(const type_info&);
-    type_info(const type_info&);
+  type_info& operator=(const type_info&);
+  type_info(const type_info&);
+
+ protected:
+    typedef __type_info_implementations::__impl __impl;
 
-protected:
-    const char *__type_name;
+    __impl::__type_name_t __type_name;
 
     _LIBCPP_INLINE_VISIBILITY
-    explicit type_info(const char* __n) : __type_name(__n) {}
+    explicit type_info(const char* __n)
+      : __type_name(__impl::__string_to_type_name(__n)) {}
 
 public:
     _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE
@@ -221,43 +282,50 @@ public:
 
     _LIBCPP_INLINE_VISIBILITY
     const char* name() const _NOEXCEPT
-    { return __type_name; }
+    {
+      return __impl::__type_name_to_string(__type_name);
+    }
 
     _LIBCPP_INLINE_VISIBILITY
     bool before(const type_info& __arg) const _NOEXCEPT
-    { return __type_name < __arg.__type_name; }
+    {
+      return __impl::__lt(__type_name, __arg.__type_name);
+    }
 
     _LIBCPP_INLINE_VISIBILITY
     size_t hash_code() const _NOEXCEPT
-    { return reinterpret_cast<size_t>(__type_name); }
+    {
+      return __impl::__hash(__type_name);
+    }
 
     _LIBCPP_INLINE_VISIBILITY
     bool operator==(const type_info& __arg) const _NOEXCEPT
-    { return __type_name == __arg.__type_name; }
+    {
+      return __impl::__eq(__type_name, __arg.__type_name);
+    }
 
     _LIBCPP_INLINE_VISIBILITY
     bool operator!=(const type_info& __arg) const _NOEXCEPT
     { return !operator==(__arg); }
 };
-
-#endif
+#endif // defined(_LIBCPP_ABI_MICROSOFT)
 
 class _LIBCPP_EXCEPTION_ABI bad_cast
     : public exception
 {
-public:
-    bad_cast() _NOEXCEPT;
-    virtual ~bad_cast() _NOEXCEPT;
-    virtual const char* what() const _NOEXCEPT;
+ public:
+  bad_cast() _NOEXCEPT;
+  virtual ~bad_cast() _NOEXCEPT;
+  virtual const char* what() const _NOEXCEPT;
 };
 
 class _LIBCPP_EXCEPTION_ABI bad_typeid
     : public exception
 {
-public:
-    bad_typeid() _NOEXCEPT;
-    virtual ~bad_typeid() _NOEXCEPT;
-    virtual const char* what() const _NOEXCEPT;
+ public:
+  bad_typeid() _NOEXCEPT;
+  virtual ~bad_typeid() _NOEXCEPT;
+  virtual const char* what() const _NOEXCEPT;
 };
 
 }  // std

From 9ac757bf09a35d87eee7e0d12695e4f87bb8eaac Mon Sep 17 00:00:00 2001
From: Stephane Moore <mog@google.com>
Date: Wed, 29 May 2019 02:23:32 +0000
Subject: [PATCH 0443/1176] =?UTF-8?q?Rollback=20"Revise=20the=20google-obj?=
 =?UTF-8?q?c-global-variable-declaration=20check=20to=20match=20the=20styl?=
 =?UTF-8?q?e=20guide."=20=F0=9F=92=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The change introduced new test failures.

Phabricator URL of original commit: https://reviews.llvm.org/rG12e3726fadb0b2a4d8aeed0a2817b5159f9d029d

llvm-svn: 361914
---
 .../google/GlobalVariableDeclarationCheck.cpp | 22 +++++++------------
 .../google-objc-global-variable-declaration.m | 20 +++--------------
 2 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
index 30ab04c08c008..ce833906dd5c5 100644
--- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
@@ -23,35 +23,29 @@ namespace objc {
 
 namespace {
 
-AST_MATCHER(VarDecl, isLocalVariable) { return Node.isLocalVarDecl(); }
+AST_MATCHER(VarDecl, isLocalVariable) {
+  return Node.isLocalVarDecl();
+}
 
 FixItHint generateFixItHint(const VarDecl *Decl, bool IsConst) {
-  if (IsConst && (Decl->getStorageClass() != SC_Static)) {
-    // No fix available if it is not a static constant, since it is difficult
-    // to determine the proper fix in this case.
-    return FixItHint();
-  }
-
   char FC = Decl->getName()[0];
   if (!llvm::isAlpha(FC) || Decl->getName().size() == 1) {
     // No fix available if first character is not alphabetical character, or it
-    // is a single-character variable, since it is difficult to determine the
+    // is a single-character variable, since it is difficult to determine the 
     // proper fix in this case. Users should create a proper variable name by
     // their own.
     return FixItHint();
   }
   char SC = Decl->getName()[1];
   if ((FC == 'k' || FC == 'g') && !llvm::isAlpha(SC)) {
-    // No fix available if the prefix is correct but the second character is
-    // not alphabetical, since it is difficult to determine the proper fix in
-    // this case.
+    // No fix available if the prefix is correct but the second character is not
+    // alphabetical, since it is difficult to determine the proper fix in this
+    // case.
     return FixItHint();
   }
-
   auto NewName = (IsConst ? "k" : "g") +
                  llvm::StringRef(std::string(1, FC)).upper() +
                  Decl->getName().substr(1).str();
-
   return FixItHint::CreateReplacement(
       CharSourceRange::getTokenRange(SourceRange(Decl->getLocation())),
       llvm::StringRef(NewName));
@@ -77,7 +71,7 @@ void GlobalVariableDeclarationCheck::registerMatchers(MatchFinder *Finder) {
       this);
   Finder->addMatcher(varDecl(hasGlobalStorage(), hasType(isConstQualified()),
                              unless(isLocalVariable()),
-                             unless(matchesName("::(k[A-Z])|([A-Z][A-Z0-9])")))
+                             unless(matchesName("::(k[A-Z]|[A-Z]{2,})")))
                          .bind("global_const"),
                      this);
 }
diff --git a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
index 32af3533f3f1d..346ddeca7db93 100644
--- a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
+++ b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
@@ -1,14 +1,10 @@
 // RUN: %check_clang_tidy %s google-objc-global-variable-declaration %t
 
 @class NSString;
-
 static NSString* const myConstString = @"hello";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'myConstString' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const kMyConstString = @"hello";
 
-extern NSString* const GlobalConstant = @"hey";
-// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'GlobalConstant' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
-
 static NSString* MyString = @"hi";
 // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: non-const global variable 'MyString' must have a name which starts with 'g[A-Z]' [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* gMyString = @"hi";
@@ -29,23 +25,13 @@
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable '_notAlpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const _notAlpha = @"NotBeginWithAlpha";
 
-static NSString* const notCap = @"NotBeginWithCap";
-// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'notCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
-// CHECK-FIXES: static NSString* const kNotCap = @"NotBeginWithCap";
-
 static NSString* const k_Alpha = @"SecondNotAlpha";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'k_Alpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const k_Alpha = @"SecondNotAlpha";
 
-static NSString* const SecondNotCap = @"SecondNotCapOrNumber";
-// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'SecondNotCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
-// CHECK-FIXES: static NSString* const kSecondNotCap = @"SecondNotCapOrNumber";
-
 static NSString* const kGood = @"hello";
 static NSString* const XYGood = @"hello";
-static NSString* const X1Good = @"hello";
 static NSString* gMyIntGood = 0;
-extern NSString* Y2Good;
 
 extern NSString* const GTLServiceErrorDomain;
 
@@ -56,8 +42,8 @@
 
 @implementation Foo
 - (void)f {
-  int x = 0;
-  static int bar;
-  static const int baz = 42;
+    int x = 0;
+    static int bar;
+    static const int baz = 42;
 }
 @end

From 99e040b3c903a060f0e4f01b817e7c09af5429e9 Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Wed, 29 May 2019 02:26:29 +0000
Subject: [PATCH 0444/1176] build: only search for the needed python type

Windows has different types of runtime libraries which are ABI
incompatible with one another.  This requires that the debug build of
lldb link against the debug build of python.  Adjust the python search
to search for only the required type of python.  This permits building a
release build of lldb against just the release build of python.

llvm-svn: 361915
---
 lldb/cmake/modules/LLDBConfig.cmake | 90 ++++++++---------------------
 1 file changed, 23 insertions(+), 67 deletions(-)

diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index 23182fd154f57..d2f418fe059be 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -153,83 +153,39 @@ function(find_python_libs_windows)
   endif()
 
   file(TO_CMAKE_PATH "${PYTHON_HOME}" PYTHON_HOME)
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/python_d.exe" PYTHON_DEBUG_EXE)
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/libs/${PYTHONLIBS_BASE_NAME}_d.lib" PYTHON_DEBUG_LIB)
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/${PYTHONLIBS_BASE_NAME}_d.dll" PYTHON_DEBUG_DLL)
-
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/python.exe" PYTHON_RELEASE_EXE)
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/libs/${PYTHONLIBS_BASE_NAME}.lib" PYTHON_RELEASE_LIB)
-  file(TO_CMAKE_PATH "${PYTHON_HOME}/${PYTHONLIBS_BASE_NAME}.dll" PYTHON_RELEASE_DLL)
-
-  if (NOT EXISTS ${PYTHON_DEBUG_EXE})
-    message("Unable to find ${PYTHON_DEBUG_EXE}")
-    unset(PYTHON_DEBUG_EXE)
-  endif()
-
-  if (NOT EXISTS ${PYTHON_RELEASE_EXE})
-    message("Unable to find ${PYTHON_RELEASE_EXE}")
-    unset(PYTHON_RELEASE_EXE)
-  endif()
-
-  if (NOT EXISTS ${PYTHON_DEBUG_LIB})
-    message("Unable to find ${PYTHON_DEBUG_LIB}")
-    unset(PYTHON_DEBUG_LIB)
-  endif()
-
-  if (NOT EXISTS ${PYTHON_RELEASE_LIB})
-    message("Unable to find ${PYTHON_RELEASE_LIB}")
-    unset(PYTHON_RELEASE_LIB)
-  endif()
-
-  if (NOT EXISTS ${PYTHON_DEBUG_DLL})
-    message("Unable to find ${PYTHON_DEBUG_DLL}")
-    unset(PYTHON_DEBUG_DLL)
+  # TODO(compnerd) when CMake Policy `CMP0091` is set to NEW, we should use
+  # if(CMAKE_MSVC_RUNTIME_LIBRARY MATCHES MultiThreadedDebug)
+  if(CMAKE_BUILD_TYPE STREQUAL Debug)
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/python_d.exe" PYTHON_EXE)
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/libs/${PYTHONLIBS_BASE_NAME}_d.lib" PYTHON_LIB)
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/${PYTHONLIBS_BASE_NAME}_d.dll" PYTHON_DLL)
+  else()
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/python.exe" PYTHON_EXE)
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/libs/${PYTHONLIBS_BASE_NAME}.lib" PYTHON_LIB)
+    file(TO_CMAKE_PATH "${PYTHON_HOME}/${PYTHONLIBS_BASE_NAME}.dll" PYTHON_DLL)
   endif()
 
-  if (NOT EXISTS ${PYTHON_RELEASE_DLL})
-    message("Unable to find ${PYTHON_RELEASE_DLL}")
-    unset(PYTHON_RELEASE_DLL)
-  endif()
+  foreach(component PYTHON_EXE;PYTHON_LIB;PYTHON_DLL)
+    if(NOT EXISTS ${${component}})
+      message("unable to find ${component}")
+      unset(${component})
+    endif()
+  endforeach()
 
-  if (NOT (PYTHON_DEBUG_EXE AND PYTHON_RELEASE_EXE AND PYTHON_DEBUG_LIB AND PYTHON_RELEASE_LIB AND PYTHON_DEBUG_DLL AND PYTHON_RELEASE_DLL))
-    message("Python installation is corrupt. Python support will be disabled for this build.")
+  if (NOT PYTHON_EXE OR NOT PYTHON_LIB OR NOT PYTHON_DLL)
+    message("Unable to find all Python components.  Python support will be disabled for this build.")
     set(LLDB_DISABLE_PYTHON 1 PARENT_SCOPE)
     return()
   endif()
 
-  # Generator expressions are evaluated in the context of each build configuration generated
-  # by CMake. Here we use the $<CONFIG:Debug>:VALUE logical generator expression to ensure
-  # that the debug Python library, DLL, and executable are used in the Debug build configuration.
-  #
-  # Generator expressions can be difficult to grok at first so here's a breakdown of the one
-  # used for PYTHON_LIBRARY:
-  #
-  # 1. $<CONFIG:Debug> evaluates to 1 when the Debug configuration is being generated,
-  #    or 0 in all other cases.
-  # 2. $<$<CONFIG:Debug>:${PYTHON_DEBUG_LIB}> expands to ${PYTHON_DEBUG_LIB} when the Debug
-  #    configuration is being generated, or nothing (literally) in all other cases.
-  # 3. $<$<NOT:$<CONFIG:Debug>>:${PYTHON_RELEASE_LIB}> expands to ${PYTHON_RELEASE_LIB} when
-  #    any configuration other than Debug is being generated, or nothing in all other cases.
-  # 4. The conditionals in 2 & 3 are mutually exclusive.
-  # 5. A logical expression with a conditional that evaluates to 0 yields no value at all.
-  #
-  # Due to 4 & 5 it's possible to concatenate 2 & 3 to obtain a single value specific to each
-  # build configuration. In this example the value will be ${PYTHON_DEBUG_LIB} when generating the
-  # Debug configuration, or ${PYTHON_RELEASE_LIB} when generating any other configuration.
-  # Note that it's imperative that there is no whitespace between the two expressions, otherwise
-  # CMake will insert a semicolon between the two.
-  set (PYTHON_EXECUTABLE $<$<CONFIG:Debug>:${PYTHON_DEBUG_EXE}>$<$<NOT:$<CONFIG:Debug>>:${PYTHON_RELEASE_EXE}>)
-  set (PYTHON_LIBRARY $<$<CONFIG:Debug>:${PYTHON_DEBUG_LIB}>$<$<NOT:$<CONFIG:Debug>>:${PYTHON_RELEASE_LIB}>)
-  set (PYTHON_DLL $<$<CONFIG:Debug>:${PYTHON_DEBUG_DLL}>$<$<NOT:$<CONFIG:Debug>>:${PYTHON_RELEASE_DLL}>)
-
-  set (PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} PARENT_SCOPE)
-  set (PYTHON_LIBRARY ${PYTHON_LIBRARY} PARENT_SCOPE)
+  set (PYTHON_EXECUTABLE ${PYTHON_EXE} PARENT_SCOPE)
+  set (PYTHON_LIBRARY ${PYTHON_LIB} PARENT_SCOPE)
   set (PYTHON_DLL ${PYTHON_DLL} PARENT_SCOPE)
   set (PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR} PARENT_SCOPE)
 
-  message("-- LLDB Found PythonExecutable: ${PYTHON_RELEASE_EXE} and ${PYTHON_DEBUG_EXE}")
-  message("-- LLDB Found PythonLibs: ${PYTHON_RELEASE_LIB} and ${PYTHON_DEBUG_LIB}")
-  message("-- LLDB Found PythonDLL: ${PYTHON_RELEASE_DLL} and ${PYTHON_DEBUG_DLL}")
+  message("-- LLDB Found PythonExecutable: ${PYTHON_EXE}}")
+  message("-- LLDB Found PythonLibs: ${PYTHON_LIB}")
+  message("-- LLDB Found PythonDLL: ${PYTHON_DLL}")
   message("-- LLDB Found PythonIncludeDirs: ${PYTHON_INCLUDE_DIR}")
 endfunction(find_python_libs_windows)
 

From 360ead76480adf7bd7dcef22944e2acc2cc72720 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Wed, 29 May 2019 02:33:11 +0000
Subject: [PATCH 0445/1176] Update private_typeinfo's `is_equal` implementation
 after r361913

The libc++ typeinfo implementation is being improved to better
handle non-merged type names.

This patch takes advantage of that more correct behavior by delegating
to std::type_infos default operator== instead of doing pointer equality
ourselves.

However, libc++ still expects unique RTTI by default, and so we
should still fall back to strcmp when explicitly requested.

llvm-svn: 361916
---
 libcxxabi/src/private_typeinfo.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp
index 2d83dc03536ed..c0a239476240c 100644
--- a/libcxxabi/src/private_typeinfo.cpp
+++ b/libcxxabi/src/private_typeinfo.cpp
@@ -58,14 +58,12 @@ static inline
 bool
 is_equal(const std::type_info* x, const std::type_info* y, bool use_strcmp)
 {
-#ifndef _WIN32
+    // Use std::type_info's default comparison unless we've explicitly asked
+    // for strcmp.
     if (!use_strcmp)
-        return x == y;
-    return strcmp(x->name(), y->name()) == 0;
-#else
-    (void) use_strcmp;
-    return (x == y) || (strcmp(x->name(), y->name()) == 0);
-#endif
+        return *x == *y;
+    // Still allow pointer equality to short circut.
+    return x == y || strcmp(x->name(), y->name()) == 0;
 }
 
 namespace __cxxabiv1

From 82705e7d52b46ea6067cd75af6c16c695de99b1d Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Wed, 29 May 2019 02:38:19 +0000
Subject: [PATCH 0446/1176] Fix build breakage on 32-bit machines

llvm-svn: 361917
---
 libcxx/include/typeinfo | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo
index 5d2f03effc547..27601769a83be 100644
--- a/libcxx/include/typeinfo
+++ b/libcxx/include/typeinfo
@@ -237,7 +237,10 @@ struct __type_info_implementations {
     }
 
    private:
-    typedef std::integral_constant<__type_name_t, (1ULL << 63)> __non_unique_rtti_bit;
+    // The unique bit is the top bit. It is expected that __type_name_t is 64 bits when
+    // this implementation is actually used.
+    typedef std::integral_constant<__type_name_t,
+      (1ULL << ((__CHAR_BIT__ * sizeof(__type_name_t)) - 1))> __non_unique_rtti_bit;
 
     _LIBCPP_INLINE_VISIBILITY
     static bool __is_type_name_unique(__type_name_t __lhs) _NOEXCEPT {

From 72e3f9662b702fb407012d59ef0c23be2a3c7c7c Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Wed, 29 May 2019 02:49:59 +0000
Subject: [PATCH 0447/1176] Revert "[X86] Use 'llvm_unreachable' instead of
 nullptr in unreachable code to"

This reverts commit c1b3716614bc0a107e6f41a7d3d503baefad8a5b.

llvm-svn: 361918
---
 llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 4 +---
 llvm/lib/Target/X86/X86InstructionSelector.cpp   | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index c2fa813c7d756..55f10a2d0655c 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -91,9 +91,7 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
     return RB;
   if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
     return &getRegBankFromRegClass(*RC);
-
-  llvm_unreachable("RegClassOrBank is either a const RegisterBank* or "
-                   "a const TargetRegisterClass*");
+  return nullptr;
 }
 
 const TargetRegisterClass &
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index e52ee03f34ae9..61de562f8a5fa 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -1610,8 +1610,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
          "Arguments and return value types must match");
 
-  const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
-  if (RegRB.getID() != X86::GPRRegBankID)
+  const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
+  if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
     return false;
 
   const static unsigned NumTypes = 4; // i8, i16, i32, i64
@@ -1709,7 +1709,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
   const DivRemEntry &TypeEntry = *OpEntryIt;
   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
 
-  const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
+  const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
   if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {

From f6cb3bcb4ccbfa288d500c7ba54396611aaf07df Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Wed, 29 May 2019 03:02:59 +0000
Subject: [PATCH 0448/1176] Support resource tracking with InstrSchedModel

The current design use DFA to do resource tracking in SMS,
and DFA only support InstrItins, and also has scaling limitation.

This patch extend SMS to allow Subtarget to use ProcResource in
InstrSchedModel instead.

Differential Revision: https://reviews.llvm.org/D62163

llvm-svn: 361919
---
 llvm/include/llvm/CodeGen/MachinePipeliner.h  |  55 +++-
 .../llvm/CodeGen/TargetSubtargetInfo.h        |   4 +
 llvm/lib/CodeGen/MachinePipeliner.cpp         | 241 +++++++++++++++---
 3 files changed, 265 insertions(+), 35 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index a30e4b91edcfc..d40becbb227f1 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -463,6 +463,56 @@ class NodeSet {
 #endif
 };
 
+// 16 was selected based on the number of ProcResource kinds for all
+// existing Subtargets, so that SmallVector don't need to resize too often.
+static const int DefaultProcResSize = 16;
+
+class ResourceManager {
+private:
+  const MCSubtargetInfo *STI;
+  const MCSchedModel &SM;
+  const bool UseDFA;
+  std::unique_ptr<DFAPacketizer> DFAResources;
+  /// Each processor resource is associated with a so-called processor resource
+  /// mask. This vector allows to correlate processor resource IDs with
+  /// processor resource masks. There is exactly one element per each processor
+  /// resource declared by the scheduling model.
+  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
+
+  llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount;
+
+public:
+  ResourceManager(const TargetSubtargetInfo *ST)
+      : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()),
+        ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
+        ProcResourceCount(SM.getNumProcResourceKinds(), 0) {
+    if (UseDFA)
+      DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+    initProcResourceVectors(SM, ProcResourceMasks);
+  }
+
+  void initProcResourceVectors(const MCSchedModel &SM,
+                               SmallVectorImpl<uint64_t> &Masks);
+  /// Check if the resources occupied by a MCInstrDesc are available in
+  /// the current state.
+  bool canReserveResources(const MCInstrDesc *MID) const;
+
+  /// Reserve the resources occupied by a MCInstrDesc and change the current
+  /// state to reflect that change.
+  void reserveResources(const MCInstrDesc *MID);
+
+  /// Check if the resources occupied by a machine instruction are available
+  /// in the current state.
+  bool canReserveResources(const MachineInstr &MI) const;
+
+  /// Reserve the resources occupied by a machine instruction and change the
+  /// current state to reflect that change.
+  void reserveResources(const MachineInstr &MI);
+
+  /// Reset the state
+  void clearResources();
+};
+
 /// This class represents the scheduled code.  The main data structure is a
 /// map from scheduled cycle to instructions.  During scheduling, the
 /// data structure explicitly represents all stages/iterations.   When
@@ -501,12 +551,11 @@ class SMSchedule {
   /// Virtual register information.
   MachineRegisterInfo &MRI;
 
-  std::unique_ptr<DFAPacketizer> Resources;
+  ResourceManager ProcItinResources;
 
 public:
   SMSchedule(MachineFunction *mf)
-      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
-        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {}
 
   void reset() {
     ScheduledInstrs.clear();
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 9d5026648526a..4c6f1163469b4 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -246,6 +246,10 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   }
 
+  /// Default to DFA for resource management, return false when target will use
+  /// ProcResource in InstrSchedModel instead.
+  virtual bool useDFAforSMS() const { return true; }
+
   // For use with PostRAScheduling: get the minimum optimization level needed
   // to enable post-RA scheduling.
   virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 9c1a5a733753f..051cd07cdae65 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -179,6 +179,13 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
       !EnableSWPOptSize.getPosition())
     return false;
 
+  // Cannot pipeline loops without instruction itineraries if we are using
+  // DFA for the pipeliner.
+  if (mf.getSubtarget().useDFAforSMS() &&
+      (!mf.getSubtarget().getInstrItineraryData() ||
+       mf.getSubtarget().getInstrItineraryData()->isEmpty()))
+    return false;
+
   MF = &mf;
   MLI = &getAnalysis<MachineLoopInfo>();
   MDT = &getAnalysis<MachineDominatorTree>();
@@ -810,27 +817,55 @@ namespace {
 // the number of functional unit choices.
 struct FuncUnitSorter {
   const InstrItineraryData *InstrItins;
+  const MCSubtargetInfo *STI;
   DenseMap<unsigned, unsigned> Resources;
 
-  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+  FuncUnitSorter(const TargetSubtargetInfo &TSI)
+      : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
 
   // Compute the number of functional unit alternatives needed
   // at each stage, and take the minimum value. We prioritize the
   // instructions by the least number of choices first.
   unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
-    unsigned schedClass = Inst->getDesc().getSchedClass();
+    unsigned SchedClass = Inst->getDesc().getSchedClass();
     unsigned min = UINT_MAX;
-    for (const InstrStage *IS = InstrItins->beginStage(schedClass),
-                          *IE = InstrItins->endStage(schedClass);
-         IS != IE; ++IS) {
-      unsigned funcUnits = IS->getUnits();
-      unsigned numAlternatives = countPopulation(funcUnits);
-      if (numAlternatives < min) {
-        min = numAlternatives;
-        F = funcUnits;
+    if (InstrItins && !InstrItins->isEmpty()) {
+      for (const InstrStage &IS :
+           make_range(InstrItins->beginStage(SchedClass),
+                      InstrItins->endStage(SchedClass))) {
+        unsigned funcUnits = IS.getUnits();
+        unsigned numAlternatives = countPopulation(funcUnits);
+        if (numAlternatives < min) {
+          min = numAlternatives;
+          F = funcUnits;
+        }
       }
+      return min;
+    }
+    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+      const MCSchedClassDesc *SCDesc =
+          STI->getSchedModel().getSchedClassDesc(SchedClass);
+      if (!SCDesc->isValid())
+        // No valid Schedule Class Desc for schedClass, should be
+        // Pseudo/PostRAPseudo
+        return min;
+
+      for (const MCWriteProcResEntry &PRE :
+           make_range(STI->getWriteProcResBegin(SCDesc),
+                      STI->getWriteProcResEnd(SCDesc))) {
+        if (!PRE.Cycles)
+          continue;
+        const MCProcResourceDesc *ProcResource =
+            STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
+        unsigned NumUnits = ProcResource->NumUnits;
+        if (NumUnits < min) {
+          min = NumUnits;
+          F = PRE.ProcResourceIdx;
+        }
+      }
+      return min;
     }
-    return min;
+    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
   }
 
   // Compute the critical resources needed by the instruction. This
@@ -840,13 +875,34 @@ struct FuncUnitSorter {
   // the same, highly used, functional unit have high priority.
   void calcCriticalResources(MachineInstr &MI) {
     unsigned SchedClass = MI.getDesc().getSchedClass();
-    for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
-                          *IE = InstrItins->endStage(SchedClass);
-         IS != IE; ++IS) {
-      unsigned FuncUnits = IS->getUnits();
-      if (countPopulation(FuncUnits) == 1)
-        Resources[FuncUnits]++;
+    if (InstrItins && !InstrItins->isEmpty()) {
+      for (const InstrStage &IS :
+           make_range(InstrItins->beginStage(SchedClass),
+                      InstrItins->endStage(SchedClass))) {
+        unsigned FuncUnits = IS.getUnits();
+        if (countPopulation(FuncUnits) == 1)
+          Resources[FuncUnits]++;
+      }
+      return;
+    }
+    if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+      const MCSchedClassDesc *SCDesc =
+          STI->getSchedModel().getSchedClassDesc(SchedClass);
+      if (!SCDesc->isValid())
+        // No valid Schedule Class Desc for schedClass, should be
+        // Pseudo/PostRAPseudo
+        return;
+
+      for (const MCWriteProcResEntry &PRE :
+           make_range(STI->getWriteProcResBegin(SCDesc),
+                      STI->getWriteProcResEnd(SCDesc))) {
+        if (!PRE.Cycles)
+          continue;
+        Resources[PRE.ProcResourceIdx]++;
+      }
+      return;
     }
+    llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
   }
 
   /// Return true if IS1 has less priority than IS2.
@@ -869,14 +925,14 @@ struct FuncUnitSorter {
 /// to add it to each existing DFA, until a legal space is found. If the
 /// instruction cannot be reserved in an existing DFA, we create a new one.
 unsigned SwingSchedulerDAG::calculateResMII() {
-  SmallVector<DFAPacketizer *, 8> Resources;
+
+  SmallVector<ResourceManager*, 8> Resources;
   MachineBasicBlock *MBB = Loop.getHeader();
-  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
 
   // Sort the instructions by the number of available choices for scheduling,
   // least to most. Use the number of critical resources as the tie breaker.
-  FuncUnitSorter FUS =
-      FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
   for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
                                    E = MBB->getFirstTerminator();
        I != E; ++I)
@@ -898,8 +954,8 @@ unsigned SwingSchedulerDAG::calculateResMII() {
     // DFA is needed for each cycle.
     unsigned NumCycles = getSUnit(MI)->Latency;
     unsigned ReservedCycles = 0;
-    SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
-    SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+    SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
+    SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
     for (unsigned C = 0; C < NumCycles; ++C)
       while (RI != RE) {
         if ((*RI++)->canReserveResources(*MI)) {
@@ -914,8 +970,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
     }
     // Add new DFAs, if needed, to reserve resources.
     for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
-      DFAPacketizer *NewResource =
-          TII->CreateTargetScheduleState(MF.getSubtarget());
+      ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
       assert(NewResource->canReserveResources(*MI) && "Reserve error.");
       NewResource->reserveResources(*MI);
       Resources.push_back(NewResource);
@@ -923,8 +978,8 @@ unsigned SwingSchedulerDAG::calculateResMII() {
   }
   int Resmii = Resources.size();
   // Delete the memory for each of the DFAs that were created earlier.
-  for (DFAPacketizer *RI : Resources) {
-    DFAPacketizer *D = RI;
+  for (ResourceManager *RI : Resources) {
+    ResourceManager *D = RI;
     delete D;
   }
   Resources.clear();
@@ -3197,8 +3252,9 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
   for (int curCycle = StartCycle; curCycle != termCycle;
        forward ? ++curCycle : --curCycle) {
 
-    // Add the already scheduled instructions at the specified cycle to the DFA.
-    Resources->clearResources();
+    // Add the already scheduled instructions at the specified cycle to the
+    // DFA.
+    ProcItinResources.clearResources();
     for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
          checkCycle <= LastCycle; checkCycle += II) {
       std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
@@ -3208,13 +3264,13 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
            I != E; ++I) {
         if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
           continue;
-        assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+        assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
                "These instructions have already been scheduled.");
-        Resources->reserveResources(*(*I)->getInstr());
+        ProcItinResources.reserveResources(*(*I)->getInstr());
       }
     }
     if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
-        Resources->canReserveResources(*SU->getInstr())) {
+        ProcItinResources.canReserveResources(*SU->getInstr())) {
       LLVM_DEBUG({
         dbgs() << "\tinsert at cycle " << curCycle << " ";
         SU->getInstr()->dump();
@@ -3812,5 +3868,126 @@ LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
 
 #endif
 
+void ResourceManager::initProcResourceVectors(
+    const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
+  unsigned ProcResourceID = 0;
+
+  // We currently limit the resource kinds to 64 and below so that we can use
+  // uint64_t for Masks
+  assert(SM.getNumProcResourceKinds() < 64 &&
+         "Too many kinds of resources, unsupported");
+  // Create a unique bitmask for every processor resource unit.
+  // Skip resource at index 0, since it always references 'InvalidUnit'.
+  Masks.resize(SM.getNumProcResourceKinds());
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    ProcResourceID++;
+  }
+  // Create a unique bitmask for every processor resource group.
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (!Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    for (unsigned U = 0; U < Desc.NumUnits; ++U)
+      Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
+    ProcResourceID++;
+  }
+  LLVM_DEBUG({
+    dbgs() << "ProcResourceDesc:\n";
+    for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+      const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
+      dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
+                       ProcResource->Name, I, Masks[I], ProcResource->NumUnits);
+    }
+    dbgs() << " -----------------\n";
+  });
+}
+
+bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
+
+  LLVM_DEBUG({ dbgs() << "canReserveResources:\n"; });
+  if (UseDFA)
+    return DFAResources->canReserveResources(MID);
+
+  unsigned InsnClass = MID->getSchedClass();
+  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  if (!SCDesc->isValid()) {
+    LLVM_DEBUG({
+      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+    });
+    return true;
+  }
+
+  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
+  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
+  for (; I != E; ++I) {
+    if (!I->Cycles)
+      continue;
+    const MCProcResourceDesc *ProcResource =
+        SM.getProcResource(I->ProcResourceIdx);
+    unsigned NumUnits = ProcResource->NumUnits;
+    LLVM_DEBUG({
+      dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+                       ProcResource->Name, I->ProcResourceIdx,
+                       ProcResourceCount[I->ProcResourceIdx], NumUnits,
+                       I->Cycles);
+    });
+    if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
+      return false;
+  }
+  LLVM_DEBUG(dbgs() << "return true\n\n";);
+  return true;
+}
+
+void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+  LLVM_DEBUG({ dbgs() << "reserveResources:\n"; });
+  if (UseDFA)
+    return DFAResources->reserveResources(MID);
+
+  unsigned InsnClass = MID->getSchedClass();
+  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+  if (!SCDesc->isValid()) {
+    LLVM_DEBUG({
+      dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+      dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+    });
+    return;
+  }
+  for (const MCWriteProcResEntry &PRE :
+       make_range(STI->getWriteProcResBegin(SCDesc),
+                  STI->getWriteProcResEnd(SCDesc))) {
+    if (!PRE.Cycles)
+      continue;
+    const MCProcResourceDesc *ProcResource =
+        SM.getProcResource(PRE.ProcResourceIdx);
+    unsigned NumUnits = ProcResource->NumUnits;
+    ++ProcResourceCount[PRE.ProcResourceIdx];
+    LLVM_DEBUG({
+      dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+                       ProcResource->Name, PRE.ProcResourceIdx,
+                       ProcResourceCount[PRE.ProcResourceIdx], NumUnits,
+                       PRE.Cycles);
+    });
+  }
+  LLVM_DEBUG({ dbgs() << "reserveResources: done!\n\n"; });
+}
+
+bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
+  return canReserveResources(&MI.getDesc());
+}
+
+void ResourceManager::reserveResources(const MachineInstr &MI) {
+  return reserveResources(&MI.getDesc());
+}
 
+void ResourceManager::clearResources() {
+  if (UseDFA)
+    return DFAResources->clearResources();
+  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+}
 

From 586831b2b098fe572b34f411ddaff3b21b053a4b Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Wed, 29 May 2019 03:15:36 +0000
Subject: [PATCH 0449/1176] Make __has_builtin work with __builtin_LINE and
 friends.

The source location builtins are implemented as keywords, but
__has_builtin should still report true for them.

This patch also fixes a test failure on systemz where the alignment
of string literals is 2 not 1.

llvm-svn: 361920
---
 clang/lib/Lex/PPMacroExpansion.cpp         | 4 ++++
 clang/test/CodeGenCXX/builtin_FUNCTION.cpp | 8 ++++----
 clang/test/Preprocessor/feature_tests.c    | 9 +++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 4576f1a47e15d..72f8f48839d05 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1620,6 +1620,10 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
                       .Case("__is_target_vendor", true)
                       .Case("__is_target_os", true)
                       .Case("__is_target_environment", true)
+                      .Case("__builtin_LINE", true)
+                      .Case("__builtin_FILE", true)
+                      .Case("__builtin_FUNCTION", true)
+                      .Case("__builtin_COLUMN", true)
                       .Default(false);
         }
       });
diff --git a/clang/test/CodeGenCXX/builtin_FUNCTION.cpp b/clang/test/CodeGenCXX/builtin_FUNCTION.cpp
index b3156ea45ae62..02e616351a939 100644
--- a/clang/test/CodeGenCXX/builtin_FUNCTION.cpp
+++ b/clang/test/CodeGenCXX/builtin_FUNCTION.cpp
@@ -6,7 +6,7 @@ namespace test_func {
 constexpr const char *test_default_arg(const char *f = __builtin_FUNCTION()) {
   return f;
 }
-// CHECK: @[[EMPTY_STR:.+]] = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+// CHECK: @[[EMPTY_STR:.+]] = private unnamed_addr constant [1 x i8] zeroinitializer
 
 // CHECK: @_ZN9test_func6globalE = {{(dso_local )?}}global i8* getelementptr inbounds ([1 x i8], [1 x i8]* @[[EMPTY_STR]], i32 0, i32 0)
 const char *global = test_default_arg();
@@ -16,9 +16,9 @@ const char *global_two = __builtin_FUNCTION();
 
 const char * const global_three = test_default_arg();
 
-// CHECK: @[[STR_ONE:.+]] = private unnamed_addr constant [14 x i8] c"test_func_one\00", align 1
-// CHECK: @[[STR_TWO:.+]] = private unnamed_addr constant [14 x i8] c"test_func_two\00", align 1
-// CHECK: @[[STR_THREE:.+]] = private unnamed_addr constant [20 x i8] c"do_default_arg_test\00", align 1
+// CHECK: @[[STR_ONE:.+]] = private unnamed_addr constant [14 x i8] c"test_func_one\00"
+// CHECK: @[[STR_TWO:.+]] = private unnamed_addr constant [14 x i8] c"test_func_two\00"
+// CHECK: @[[STR_THREE:.+]] = private unnamed_addr constant [20 x i8] c"do_default_arg_test\00"
 
 // CHECK: define {{(dso_local )?}}i8* @_ZN9test_func13test_func_oneEv()
 // CHECK: ret i8* getelementptr inbounds ([14 x i8], [14 x i8]* @[[STR_ONE]], i32 0, i32 0)
diff --git a/clang/test/Preprocessor/feature_tests.c b/clang/test/Preprocessor/feature_tests.c
index c2fbd11c97cd2..2035a729f2d0e 100644
--- a/clang/test/Preprocessor/feature_tests.c
+++ b/clang/test/Preprocessor/feature_tests.c
@@ -20,6 +20,15 @@
 #error Clang should have these
 #endif
 
+// These are technically implemented as keywords, but __has_builtin should
+// still return true.
+#if !__has_builtin(__builtin_LINE) || \
+    !__has_builtin(__builtin_FILE) || \
+    !__has_builtin(__builtin_FUNCTION) || \
+    !__has_builtin(__builtin_COLUMN)
+#error Clang should have these
+#endif
+
 #if __has_builtin(__builtin_insanity)
 #error Clang should not have this
 #endif

From 10c548cdfa1ebe15c0312d373191b09fbe7b6a3c Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 29 May 2019 03:28:51 +0000
Subject: [PATCH 0450/1176] IR: Give the TypeAllocator a more generic name and
 start using it for section names as well. NFCI.

This prepares us to start using it for partition names.

llvm-svn: 361922
---
 llvm/lib/IR/Globals.cpp       |  5 ++---
 llvm/lib/IR/LLVMContextImpl.h | 10 +++-------
 llvm/lib/IR/Type.cpp          | 16 ++++++++--------
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index b2f5640026f9f..b3fdcc6a5fc9c 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -192,9 +192,8 @@ void GlobalObject::setSection(StringRef S) {
 
   // Get or create a stable section name string and put it in the table in the
   // context.
-  if (!S.empty()) {
-    S = getContext().pImpl->SectionStrings.insert(S).first->first();
-  }
+  if (!S.empty())
+    S = getContext().pImpl->Saver.save(S);
   getContext().pImpl->GlobalObjectSections[this] = S;
 
   // Update the HasSectionHashEntryBit. Setting the section to the empty string
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index aaa765be9fa3c..e977f05110991 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -30,7 +30,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -41,6 +40,7 @@
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cassert>
@@ -1321,9 +1321,8 @@ class LLVMContextImpl {
   Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
   IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
 
-  /// TypeAllocator - All dynamically allocated types are allocated from this.
-  /// They live forever until the context is torn down.
-  BumpPtrAllocator TypeAllocator;
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver Saver{Alloc};
 
   DenseMap<unsigned, IntegerType*> IntegerTypes;
 
@@ -1357,9 +1356,6 @@ class LLVMContextImpl {
   /// Collection of per-GlobalObject sections used in this context.
   DenseMap<const GlobalObject *, StringRef> GlobalObjectSections;
 
-  /// Stable collection of section strings.
-  StringSet<> SectionStrings;
-
   /// DiscriminatorTable - This table maps file:line locations to an
   /// integer representing the next DWARF path discriminator to assign to
   /// instructions in different blocks at the same location.
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index c839648a03ed3..4016bb10ba371 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -255,7 +255,7 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
   IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
 
   if (!Entry)
-    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+    Entry = new (C.pImpl->Alloc) IntegerType(C, NumBits);
 
   return Entry;
 }
@@ -307,7 +307,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
   if (Insertion.second) {
     // The function type was not found. Allocate one and update FunctionTypes
     // in-place.
-    FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
+    FT = (FunctionType *)pImpl->Alloc.Allocate(
         sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
         alignof(FunctionType));
     new (FT) FunctionType(ReturnType, Params, isVarArg);
@@ -353,7 +353,7 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
   if (Insertion.second) {
     // The struct type was not found. Allocate one and update AnonStructTypes
     // in-place.
-    ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+    ST = new (Context.pImpl->Alloc) StructType(Context);
     ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
     ST->setBody(ETypes, isPacked);
     *Insertion.first = ST;
@@ -379,7 +379,7 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
     return;
   }
 
-  ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data();
+  ContainedTys = Elements.copy(getContext().pImpl->Alloc).data();
 }
 
 void StructType::setName(StringRef Name) {
@@ -434,7 +434,7 @@ void StructType::setName(StringRef Name) {
 // StructType Helper functions.
 
 StructType *StructType::create(LLVMContext &Context, StringRef Name) {
-  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  StructType *ST = new (Context.pImpl->Alloc) StructType(Context);
   if (!Name.empty())
     ST->setName(Name);
   return ST;
@@ -585,7 +585,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
     pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
 
   if (!Entry)
-    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+    Entry = new (pImpl->Alloc) ArrayType(ElementType, NumElements);
   return Entry;
 }
 
@@ -613,7 +613,7 @@ VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
     ->VectorTypes[std::make_pair(ElementType, NumElements)];
 
   if (!Entry)
-    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+    Entry = new (pImpl->Alloc) VectorType(ElementType, NumElements);
   return Entry;
 }
 
@@ -637,7 +637,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
      : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
 
   if (!Entry)
-    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+    Entry = new (CImpl->Alloc) PointerType(EltTy, AddressSpace);
   return Entry;
 }
 

From 31fda09b2db405bbaa225bb6068c5f787506b9db Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 29 May 2019 03:29:01 +0000
Subject: [PATCH 0451/1176] Add IR support, ELF section and user documentation
 for partitioning feature.

The partitioning feature was proposed here:
http://lists.llvm.org/pipermail/llvm-dev/2019-February/130583.html

This is mostly just documentation. The feature itself will be contributed
in subsequent patches.

Differential Revision: https://reviews.llvm.org/D60242

llvm-svn: 361923
---
 lld/docs/Partitions.rst                    | 117 +++++++++++++++++++++
 lld/docs/index.rst                         |   1 +
 lld/docs/partitions.dot                    |  22 ++++
 lld/docs/partitions.svg                    | 110 +++++++++++++++++++
 llvm/docs/Extensions.rst                   |  16 +++
 llvm/include/llvm/BinaryFormat/ELF.h       |   1 +
 llvm/include/llvm/IR/GlobalValue.h         |  18 +++-
 llvm/lib/AsmParser/LLLexer.cpp             |   1 +
 llvm/lib/AsmParser/LLParser.cpp            |  29 ++++-
 llvm/lib/AsmParser/LLToken.h               |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp  |  20 +++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp  |  11 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp |  18 ++++
 llvm/lib/IR/AsmWriter.cpp                  |  17 +++
 llvm/lib/IR/Globals.cpp                    |  23 ++++
 llvm/lib/IR/LLVMContextImpl.h              |   3 +
 llvm/lib/MC/MCParser/ELFAsmParser.cpp      |   2 +
 llvm/lib/MC/MCSectionELF.cpp               |   2 +
 llvm/lib/Object/ELF.cpp                    |   1 +
 llvm/test/Bitcode/compatibility.ll         |  18 ++++
 llvm/test/CodeGen/X86/partition.ll         |  33 ++++++
 llvm/test/MC/ELF/section.s                 |  12 +++
 llvm/test/Object/X86/irsymtab.ll           |   2 +-
 23 files changed, 470 insertions(+), 8 deletions(-)
 create mode 100644 lld/docs/Partitions.rst
 create mode 100644 lld/docs/partitions.dot
 create mode 100644 lld/docs/partitions.svg
 create mode 100644 llvm/test/CodeGen/X86/partition.ll

diff --git a/lld/docs/Partitions.rst b/lld/docs/Partitions.rst
new file mode 100644
index 0000000000000..96b6e26da20c2
--- /dev/null
+++ b/lld/docs/Partitions.rst
@@ -0,0 +1,117 @@
+Partitions
+==========
+
+.. warning::
+
+  This feature has not yet fully landed in LLD. This document describes how
+  the feature is intended to work once it lands. Furthermore, the feature
+  is currently experimental, and its interface is subject to change.
+
+LLD's partitioning feature allows a program (which may be an executable
+or a shared library) to be split into multiple pieces, or partitions. A
+partitioned program consists of a main partition together with a number of
+loadable partitions. The loadable partitions depend on the main partition
+in a similar way to a regular ELF shared object dependency, but unlike a
+shared object, the main partition and the loadable partitions share a virtual
+address space at link time, and each loadable partition is assigned a fixed
+offset from the main partition. This allows the loadable partitions to refer
+to code and data in the main partition directly without the binary size and
+performance overhead of PLTs, GOTs or symbol table entries.
+
+Usage
+-----
+
+A program that uses the partitioning feature must decide which symbols are
+going to be used as the "entry points" for each partition. An entry point
+could, for example, be the equivalent of the partition's ``main`` function, or
+there could be a group of functions that expose the functionality implemented
+by the partition. The intent is that in order to use a loadable partition,
+the program will use ``dlopen``/``dlsym`` or similar functions to dynamically
+load the partition at its assigned address, look up an entry point by name
+and call it. Note, however, that the standard ``dlopen`` function does not
+allow specifying a load address. On Android, the ``android_dlopen_ext``
+function may be used together with the ``ANDROID_DLEXT_RESERVED_ADDRESS``
+flag to load a shared object at a specific address.
+
+Once the entry points have been decided, the translation unit(s)
+containing the entry points should be compiled using the Clang compiler flag
+``-fsymbol-partition=<soname>``, where ``<soname>`` is the intended soname
+of the partition. The resulting object files are passed to the linker in
+the usual way.
+
+The linker will then use these entry points to automatically split the program
+into partitions according to which sections of the program are reachable from
+which entry points, similarly to how ``--gc-sections`` removes unused parts of
+a program. Any sections that are only reachable from a loadable partition's
+entry point are assigned to that partition, while all other sections are
+assigned to the main partition, including sections only reachable from
+loadable partitions.
+
+The following diagram illustrates how sections are assigned to partitions. Each
+section is colored according to its assigned partition.
+
+.. image:: partitions.svg
+
+The result of linking a program that uses partitions is essentially an
+ELF file with all of the partitions concatenated together. This file is
+referred to as a combined output file. To extract a partition from the
+combined output file, the ``llvm-objcopy`` tool should be used together
+with the flag ``--extract-main-partition`` to extract the main partition, or
+``-extract-partition=<soname>`` to extract one of the loadable partitions.
+An example command sequence is shown below:
+
+.. code-block:: shell
+
+  # Compile the main program.
+  clang -ffunction-sections -fdata-sections -c main.c
+
+  # Compile a feature to be placed in a loadable partition.
+  # Note that this is likely to be a separate build step to the main partition.
+  clang -ffunction-sections -fdata-sections -fsymbol-partition=libfeature.so -c feature.c
+
+  # Link the combined output file.
+  clang main.o feature.o -fuse-ld=lld -shared -o libcombined.so -Wl,-soname,libmain.so -Wl,--gc-sections
+
+  # Extract the partitions.
+  llvm-objcopy libcombined.so libmain.so --extract-main-partition
+  llvm-objcopy libcombined.so libfeature.so --extract-partition=libfeature.so
+
+In order to allow a program to discover the names of its loadable partitions
+and the locations of their reserved regions, the linker creates a partition
+index, which is an array of structs with the following definition:
+
+.. code-block:: c
+
+  struct partition_index_entry {
+    int32_t name_offset;
+    int32_t addr_offset;
+    uint32_t size;
+  };
+
+The ``name_offset`` field is a relative pointer to a null-terminated string
+containing the soname of the partition, the ``addr_offset`` field is a
+relative pointer to its load address and the ``size`` field contains the
+size of the region reserved for the partition. To derive an absolute pointer
+from the relative pointer fields in this data structure, the address of the
+field should be added to the value stored in the field.
+
+The program may discover the location of the partition index using the
+linker-defined symbols ``__part_index_begin`` and ``__part_index_end``.
+
+Restrictions
+------------
+
+This feature is currently only supported in the ELF linker.
+
+The partitioning feature may not currently be used together with the
+``SECTIONS`` or ``PHDRS`` linker script features, nor may it be used with the
+``--section-start``, ``-Ttext``, ``-Tdata`` or ``-Tbss`` flags. All of these
+features assume a single set of output sections and/or program headers, which
+makes their semantics ambiguous in the presence of more than one partition.
+
+The partitioning feature may not currently be used on the MIPS architecture
+because it is unclear whether the MIPS multi-GOT ABI is compatible with
+partitions.
+
+The current implementation only supports creating up to 254 partitions due
+to implementation limitations. This limit may be relaxed in the future.
diff --git a/lld/docs/index.rst b/lld/docs/index.rst
index 8b3f70e1d457f..9056d1c2de15a 100644
--- a/lld/docs/index.rst
+++ b/lld/docs/index.rst
@@ -174,4 +174,5 @@ document soon.
    WebAssembly
    windows_support
    missingkeyfunction
+   Partitions
    ReleaseNotes
diff --git a/lld/docs/partitions.dot b/lld/docs/partitions.dot
new file mode 100644
index 0000000000000..81f12a2f4283b
--- /dev/null
+++ b/lld/docs/partitions.dot
@@ -0,0 +1,22 @@
+digraph G {
+  part_main [label="Main partition",shape=plaintext];
+  part1 [label="Loadable partition 1",shape=plaintext];
+  part2 [label="Loadable partition 2",shape=plaintext];
+  main [style=filled,fillcolor=lightblue];
+  f1 [style=filled,fillcolor=lightsalmon];
+  f2 [style=filled,fillcolor=palegreen];
+  f3 [style=filled,fillcolor=lightblue];
+  f4 [style=filled,fillcolor=lightsalmon];
+  f5 [style=filled,fillcolor=lightblue];
+  f6 [style=filled,fillcolor=palegreen];
+  part_main -> main;
+  main -> f3;
+  part1 -> f1;
+  f1 -> f3;
+  f1 -> f4;
+  f1 -> f5;
+  part2 -> f2;
+  f2 -> f3;
+  f2 -> f5;
+  f2 -> f6;
+}
diff --git a/lld/docs/partitions.svg b/lld/docs/partitions.svg
new file mode 100644
index 0000000000000..39cd96933446d
--- /dev/null
+++ b/lld/docs/partitions.svg
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: G Pages: 1 -->
+<svg width="393pt" height="188pt"
+ viewBox="0.00 0.00 393.00 188.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 184)">
+<title>G</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-184 389,-184 389,4 -4,4"/>
+<!-- part_main -->
+<g id="node1" class="node"><title>part_main</title>
+<text text-anchor="middle" x="47.5" y="-158.3" font-family="Times,serif" font-size="14.00">Main partition</text>
+</g>
+<!-- main -->
+<g id="node4" class="node"><title>main</title>
+<ellipse fill="lightblue" stroke="black" cx="75.5" cy="-90" rx="28.6953" ry="18"/>
+<text text-anchor="middle" x="75.5" y="-86.3" font-family="Times,serif" font-size="14.00">main</text>
+</g>
+<!-- part_main&#45;&gt;main -->
+<g id="edge1" class="edge"><title>part_main&#45;&gt;main</title>
+<path fill="none" stroke="black" d="M54.4214,-143.697C57.6218,-135.696 61.492,-126.02 65.0381,-117.155"/>
+<polygon fill="black" stroke="black" points="68.3868,-118.207 68.8511,-107.622 61.8874,-115.607 68.3868,-118.207"/>
+</g>
+<!-- part1 -->
+<g id="node2" class="node"><title>part1</title>
+<text text-anchor="middle" x="176.5" y="-158.3" font-family="Times,serif" font-size="14.00">Loadable partition 1</text>
+</g>
+<!-- f1 -->
+<g id="node5" class="node"><title>f1</title>
+<ellipse fill="lightsalmon" stroke="black" cx="176.5" cy="-90" rx="27" ry="18"/>
+<text text-anchor="middle" x="176.5" y="-86.3" font-family="Times,serif" font-size="14.00">f1</text>
+</g>
+<!-- part1&#45;&gt;f1 -->
+<g id="edge3" class="edge"><title>part1&#45;&gt;f1</title>
+<path fill="none" stroke="black" d="M176.5,-143.697C176.5,-135.983 176.5,-126.712 176.5,-118.112"/>
+<polygon fill="black" stroke="black" points="180,-118.104 176.5,-108.104 173,-118.104 180,-118.104"/>
+</g>
+<!-- part2 -->
+<g id="node3" class="node"><title>part2</title>
+<text text-anchor="middle" x="321.5" y="-158.3" font-family="Times,serif" font-size="14.00">Loadable partition 2</text>
+</g>
+<!-- f2 -->
+<g id="node6" class="node"><title>f2</title>
+<ellipse fill="palegreen" stroke="black" cx="284.5" cy="-90" rx="27" ry="18"/>
+<text text-anchor="middle" x="284.5" y="-86.3" font-family="Times,serif" font-size="14.00">f2</text>
+</g>
+<!-- part2&#45;&gt;f2 -->
+<g id="edge7" class="edge"><title>part2&#45;&gt;f2</title>
+<path fill="none" stroke="black" d="M312.354,-143.697C307.97,-135.403 302.636,-125.311 297.813,-116.187"/>
+<polygon fill="black" stroke="black" points="300.801,-114.35 293.034,-107.145 294.612,-117.621 300.801,-114.35"/>
+</g>
+<!-- f3 -->
+<g id="node7" class="node"><title>f3</title>
+<ellipse fill="lightblue" stroke="black" cx="104.5" cy="-18" rx="27" ry="18"/>
+<text text-anchor="middle" x="104.5" y="-14.3" font-family="Times,serif" font-size="14.00">f3</text>
+</g>
+<!-- main&#45;&gt;f3 -->
+<g id="edge2" class="edge"><title>main&#45;&gt;f3</title>
+<path fill="none" stroke="black" d="M82.3726,-72.411C85.7675,-64.2164 89.9422,-54.1395 93.7473,-44.9548"/>
+<polygon fill="black" stroke="black" points="97.0828,-46.0481 97.6767,-35.4699 90.6158,-43.3689 97.0828,-46.0481"/>
+</g>
+<!-- f1&#45;&gt;f3 -->
+<g id="edge4" class="edge"><title>f1&#45;&gt;f3</title>
+<path fill="none" stroke="black" d="M161.93,-74.8345C151.75,-64.9376 137.976,-51.5462 126.469,-40.3591"/>
+<polygon fill="black" stroke="black" points="128.905,-37.8461 119.296,-33.3847 124.026,-42.865 128.905,-37.8461"/>
+</g>
+<!-- f4 -->
+<g id="node8" class="node"><title>f4</title>
+<ellipse fill="lightsalmon" stroke="black" cx="176.5" cy="-18" rx="27" ry="18"/>
+<text text-anchor="middle" x="176.5" y="-14.3" font-family="Times,serif" font-size="14.00">f4</text>
+</g>
+<!-- f1&#45;&gt;f4 -->
+<g id="edge5" class="edge"><title>f1&#45;&gt;f4</title>
+<path fill="none" stroke="black" d="M176.5,-71.6966C176.5,-63.9827 176.5,-54.7125 176.5,-46.1124"/>
+<polygon fill="black" stroke="black" points="180,-46.1043 176.5,-36.1043 173,-46.1044 180,-46.1043"/>
+</g>
+<!-- f5 -->
+<g id="node9" class="node"><title>f5</title>
+<ellipse fill="lightblue" stroke="black" cx="248.5" cy="-18" rx="27" ry="18"/>
+<text text-anchor="middle" x="248.5" y="-14.3" font-family="Times,serif" font-size="14.00">f5</text>
+</g>
+<!-- f1&#45;&gt;f5 -->
+<g id="edge6" class="edge"><title>f1&#45;&gt;f5</title>
+<path fill="none" stroke="black" d="M191.07,-74.8345C201.25,-64.9376 215.024,-51.5462 226.531,-40.3591"/>
+<polygon fill="black" stroke="black" points="228.974,-42.865 233.704,-33.3847 224.095,-37.8461 228.974,-42.865"/>
+</g>
+<!-- f2&#45;&gt;f3 -->
+<g id="edge8" class="edge"><title>f2&#45;&gt;f3</title>
+<path fill="none" stroke="black" d="M260.806,-81.0022C232.063,-71.1346 182.266,-53.5073 140.5,-36 138.683,-35.2385 136.825,-34.4358 134.957,-33.6106"/>
+<polygon fill="black" stroke="black" points="136.231,-30.3452 125.68,-29.3829 133.328,-36.7149 136.231,-30.3452"/>
+</g>
+<!-- f2&#45;&gt;f5 -->
+<g id="edge9" class="edge"><title>f2&#45;&gt;f5</title>
+<path fill="none" stroke="black" d="M276.15,-72.7646C271.788,-64.2831 266.353,-53.7144 261.459,-44.1974"/>
+<polygon fill="black" stroke="black" points="264.49,-42.4395 256.804,-35.1473 258.265,-45.6409 264.49,-42.4395"/>
+</g>
+<!-- f6 -->
+<g id="node10" class="node"><title>f6</title>
+<ellipse fill="palegreen" stroke="black" cx="320.5" cy="-18" rx="27" ry="18"/>
+<text text-anchor="middle" x="320.5" y="-14.3" font-family="Times,serif" font-size="14.00">f6</text>
+</g>
+<!-- f2&#45;&gt;f6 -->
+<g id="edge10" class="edge"><title>f2&#45;&gt;f6</title>
+<path fill="none" stroke="black" d="M292.85,-72.7646C297.212,-64.2831 302.647,-53.7144 307.541,-44.1974"/>
+<polygon fill="black" stroke="black" points="310.735,-45.6409 312.196,-35.1473 304.51,-42.4395 310.735,-45.6409"/>
+</g>
+</g>
+</svg>
diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst
index 8543ac6118506..e6f7fdd50447b 100644
--- a/llvm/docs/Extensions.rst
+++ b/llvm/docs/Extensions.rst
@@ -379,6 +379,22 @@ this directive, all symbols are considered address-significant.
 
 This marks ``sym`` as address-significant.
 
+``SHT_LLVM_SYMPART`` Section (symbol partition specification)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This section is used to mark symbols with the `partition`_ that they
+belong to. An ``.llvm_sympart`` section consists of a null-terminated string
+specifying the name of the partition followed by a relocation referring to
+the symbol that belongs to the partition. It may be constructed as follows:
+
+.. code-block:: gas
+
+  .section ".llvm_sympart","",@llvm_sympart
+  .asciz "libpartition.so"
+  .word symbol_in_partition
+
+.. _partition: https://lld.llvm.org/Partitions.html
+
 CodeView-Dependent
 ------------------
 
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6ec924d08d106..8258bb3711bfd 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -842,6 +842,7 @@ enum : unsigned {
   SHT_LLVM_ADDRSIG = 0x6fff4c03,        // List of address-significant symbols
                                         // for safe ICF.
   SHT_LLVM_DEPENDENT_LIBRARIES = 0x6fff4c04, // LLVM Dependent Library Specifiers.
+  SHT_LLVM_SYMPART = 0x6fff4c05,        // Symbol partition specification.
   // Android's experimental support for SHT_RELR sections.
   // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
   SHT_ANDROID_RELR = 0x6fffff00,        // Relocation entries; only offsets.
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index d78ce622bc5dd..2209881dbda62 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -79,15 +79,15 @@ class GlobalValue : public Constant {
         ValueType(Ty), Visibility(DefaultVisibility),
         UnnamedAddrVal(unsigned(UnnamedAddr::None)),
         DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
-        HasLLVMReservedName(false), IsDSOLocal(false), IntID((Intrinsic::ID)0U),
-        Parent(nullptr) {
+        HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false),
+        IntID((Intrinsic::ID)0U), Parent(nullptr) {
     setLinkage(Linkage);
     setName(Name);
   }
 
   Type *ValueType;
 
-  static const unsigned GlobalValueSubClassDataBits = 17;
+  static const unsigned GlobalValueSubClassDataBits = 16;
 
   // All bitfields use unsigned as the underlying type so that MSVC will pack
   // them.
@@ -108,9 +108,13 @@ class GlobalValue : public Constant {
   /// definition cannot be runtime preempted.
   unsigned IsDSOLocal : 1;
 
+  /// True if this symbol has a partition name assigned (see
+  /// https://lld.llvm.org/Partitions.html).
+  unsigned HasPartition : 1;
+
 private:
   // Give subclasses access to what otherwise would be wasted padding.
-  // (17 + 4 + 2 + 2 + 2 + 3 + 1 + 1) == 32.
+  // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
   friend class Constant;
@@ -280,6 +284,12 @@ class GlobalValue : public Constant {
     return IsDSOLocal;
   }
 
+  bool hasPartition() const {
+    return HasPartition;
+  }
+  StringRef getPartition() const;
+  void setPartition(StringRef Part);
+
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
     return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
   }
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index c0b9cd12d0c0f..dc8ff7f131505 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(align);
   KEYWORD(addrspace);
   KEYWORD(section);
+  KEYWORD(partition);
   KEYWORD(alias);
   KEYWORD(ifunc);
   KEYWORD(module);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 6af084edbd8c9..28a8480e7d3dc 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -856,11 +856,14 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
 ///   ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
 ///                     OptionalVisibility OptionalDLLStorageClass
 ///                     OptionalThreadLocal OptionalUnnamedAddr
-//                      'alias|ifunc' IndirectSymbol
+///                     'alias|ifunc' IndirectSymbol IndirectSymbolAttr*
 ///
 /// IndirectSymbol
 ///   ::= TypeAndValue
 ///
+/// IndirectSymbolAttr
+///   ::= ',' 'partition' StringConstant
+///
 /// Everything through OptionalUnnamedAddr has already been parsed.
 ///
 bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
@@ -960,6 +963,21 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
   GA->setUnnamedAddr(UnnamedAddr);
   maybeSetDSOLocal(DSOLocal, *GA);
 
+  // At this point we've parsed everything except for the IndirectSymbolAttrs.
+  // Now parse them if there are any.
+  while (Lex.getKind() == lltok::comma) {
+    Lex.Lex();
+
+    if (Lex.getKind() == lltok::kw_partition) {
+      Lex.Lex();
+      GA->setPartition(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected partition string"))
+        return true;
+    } else {
+      return TokError("unknown alias or ifunc property!");
+    }
+  }
+
   if (Name.empty())
     NumberedVals.push_back(GA.get());
 
@@ -1095,6 +1113,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       GV->setSection(Lex.getStrVal());
       if (ParseToken(lltok::StringConstant, "expected global section string"))
         return true;
+    } else if (Lex.getKind() == lltok::kw_partition) {
+      Lex.Lex();
+      GV->setPartition(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected partition string"))
+        return true;
     } else if (Lex.getKind() == lltok::kw_align) {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment)) return true;
@@ -5287,6 +5310,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   std::vector<unsigned> FwdRefAttrGrps;
   LocTy BuiltinLoc;
   std::string Section;
+  std::string Partition;
   unsigned Alignment;
   std::string GC;
   GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
@@ -5303,6 +5327,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
                                  BuiltinLoc) ||
       (EatIfPresent(lltok::kw_section) &&
        ParseStringConstant(Section)) ||
+      (EatIfPresent(lltok::kw_partition) &&
+       ParseStringConstant(Partition)) ||
       parseOptionalComdat(FunctionName, C) ||
       ParseOptionalAlignment(Alignment) ||
       (EatIfPresent(lltok::kw_gc) &&
@@ -5404,6 +5430,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setUnnamedAddr(UnnamedAddr);
   Fn->setAlignment(Alignment);
   Fn->setSection(Section);
+  Fn->setPartition(Partition);
   Fn->setComdat(C);
   Fn->setPersonalityFn(PersonalityFn);
   if (!GC.empty()) Fn->setGC(GC);
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 33ea28bb10e83..a1e7093217877 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -113,6 +113,7 @@ enum Kind {
   kw_align,
   kw_addrspace,
   kw_section,
+  kw_partition,
   kw_alias,
   kw_ifunc,
   kw_module,
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 412f99d5e6266..b23115ba31d4f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2892,7 +2892,8 @@ static void inferDSOLocal(GlobalValue *GV) {
 Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   // v1: [pointer type, isconst, initid, linkage, alignment, section,
   // visibility, threadlocal, unnamed_addr, externally_initialized,
-  // dllstorageclass, comdat, attributes, preemption specifier] (name in VST)
+  // dllstorageclass, comdat, attributes, preemption specifier,
+  // partition strtab offset, partition strtab size] (name in VST)
   // v2: [strtab_offset, strtab_size, v1]
   StringRef Name;
   std::tie(Name, Record) = readNameFromStrtab(Record);
@@ -2983,6 +2984,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   }
   inferDSOLocal(NewGV);
 
+  // Check whether we have enough values to read a partition name.
+  if (Record.size() > 15)
+    NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15]));
+
   return Error::success();
 }
 
@@ -3072,6 +3077,12 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   }
   inferDSOLocal(Func);
 
+  // Record[16] is the address space number.
+
+  // Check whether we have enough values to read a partition name.
+  if (Record.size() > 18)
+    Func->setPartition(StringRef(Strtab.data() + Record[17], Record[18]));
+
   ValueList.push_back(Func);
 
   // If this is a function with a body, remember the prototype we are
@@ -3149,6 +3160,13 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++]));
   inferDSOLocal(NewGA);
 
+  // Check whether we have enough values to read a partition name.
+  if (OpNum + 1 < Record.size()) {
+    NewGA->setPartition(
+        StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1]));
+    OpNum += 2;
+  }
+
   ValueList.push_back(NewGA);
   IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
   return Error::success();
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 7d9b0583d1e1f..00d6fe8e27c46 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1262,7 +1262,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
         GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
         GV.hasComdat() ||
         GV.hasAttributes() ||
-        GV.isDSOLocal()) {
+        GV.isDSOLocal() ||
+        GV.hasPartition()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(getEncodedThreadLocalMode(GV));
       Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1274,6 +1275,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       Vals.push_back(VE.getAttributeListID(AL));
 
       Vals.push_back(GV.isDSOLocal());
+      Vals.push_back(addToStrtab(GV.getPartition()));
+      Vals.push_back(GV.getPartition().size());
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }
@@ -1311,6 +1314,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
 
     Vals.push_back(F.isDSOLocal());
     Vals.push_back(F.getAddressSpace());
+    Vals.push_back(addToStrtab(F.getPartition()));
+    Vals.push_back(F.getPartition().size());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -1333,6 +1338,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     Vals.push_back(getEncodedThreadLocalMode(A));
     Vals.push_back(getEncodedUnnamedAddr(A));
     Vals.push_back(A.isDSOLocal());
+    Vals.push_back(addToStrtab(A.getPartition()));
+    Vals.push_back(A.getPartition().size());
 
     unsigned AbbrevToUse = 0;
     Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
@@ -1351,6 +1358,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     Vals.push_back(getEncodedLinkage(I));
     Vals.push_back(getEncodedVisibility(I));
     Vals.push_back(I.isDSOLocal());
+    Vals.push_back(addToStrtab(I.getPartition()));
+    Vals.push_back(I.getPartition().size());
     Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
     Vals.clear();
   }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bf7776b1dc000..3317952d05d0a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1632,6 +1632,24 @@ bool AsmPrinter::doFinalization(Module &M) {
         OutStreamer->EmitAddrsigSym(getSymbol(&GV));
   }
 
+  // Emit symbol partition specifications (ELF only).
+  if (TM.getTargetTriple().isOSBinFormatELF()) {
+    unsigned UniqueID = 0;
+    for (const GlobalValue &GV : M.global_values()) {
+      if (!GV.hasPartition() || GV.isDeclarationForLinker() ||
+          GV.getVisibility() != GlobalValue::DefaultVisibility)
+        continue;
+
+      OutStreamer->SwitchSection(OutContext.getELFSection(
+          ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID));
+      OutStreamer->EmitBytes(GV.getPartition());
+      OutStreamer->EmitZeros(1);
+      OutStreamer->EmitValue(
+          MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
+          MAI->getCodePointerSize());
+    }
+  }
+
   // Allow the target to emit any magic that it wants at the end of the file,
   // after everything else has gone out.
   EmitEndOfAsmFile(M);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index b5db8bdeb2272..ca7afd0d81aa0 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -3247,6 +3247,12 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
     printEscapedString(GV->getSection(), Out);
     Out << '"';
   }
+  if (GV->hasPartition()) {
+    Out << ", partition \"";
+    printEscapedString(GV->getPartition(), Out);
+    Out << '"';
+  }
+
   maybePrintComdat(Out, *GV);
   if (GV->getAlignment())
     Out << ", align " << GV->getAlignment();
@@ -3298,6 +3304,12 @@ void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) {
     writeOperand(IS, !isa<ConstantExpr>(IS));
   }
 
+  if (GIS->hasPartition()) {
+    Out << ", partition \"";
+    printEscapedString(GIS->getPartition(), Out);
+    Out << '"';
+  }
+
   printInfoComment(*GIS);
   Out << '\n';
 }
@@ -3438,6 +3450,11 @@ void AssemblyWriter::printFunction(const Function *F) {
     printEscapedString(F->getSection(), Out);
     Out << '"';
   }
+  if (F->hasPartition()) {
+    Out << " partition \"";
+    printEscapedString(F->getPartition(), Out);
+    Out << '"';
+  }
   maybePrintComdat(Out, *F);
   if (F->getAlignment())
     Out << " align " << F->getAlignment();
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index b3fdcc6a5fc9c..e2bfc0420bc53 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -67,6 +67,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setUnnamedAddr(Src->getUnnamedAddr());
   setDLLStorageClass(Src->getDLLStorageClass());
   setDSOLocal(Src->isDSOLocal());
+  setPartition(Src->getPartition());
 }
 
 void GlobalValue::removeFromParent() {
@@ -180,6 +181,28 @@ const Comdat *GlobalValue::getComdat() const {
   return cast<GlobalObject>(this)->getComdat();
 }
 
+StringRef GlobalValue::getPartition() const {
+  if (!hasPartition())
+    return "";
+  return getContext().pImpl->GlobalValuePartitions[this];
+}
+
+void GlobalValue::setPartition(StringRef S) {
+  // Do nothing if we're clearing the partition and it is already empty.
+  if (!hasPartition() && S.empty())
+    return;
+
+  // Get or create a stable partition name string and put it in the table in the
+  // context.
+  if (!S.empty())
+    S = getContext().pImpl->Saver.save(S);
+  getContext().pImpl->GlobalValuePartitions[this] = S;
+
+  // Update the HasPartition field. Setting the partition to the empty string
+  // means this global no longer has a partition.
+  HasPartition = !S.empty();
+}
+
 StringRef GlobalObject::getSectionImpl() const {
   assert(hasSection());
   return getContext().pImpl->GlobalObjectSections[this];
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index e977f05110991..4560617624ea5 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1356,6 +1356,9 @@ class LLVMContextImpl {
   /// Collection of per-GlobalObject sections used in this context.
   DenseMap<const GlobalObject *, StringRef> GlobalObjectSections;
 
+  /// Collection of per-GlobalValue partitions used in this context.
+  DenseMap<const GlobalValue *, StringRef> GlobalValuePartitions;
+
   /// DiscriminatorTable - This table maps file:line locations to an
   /// integer representing the next DWARF path discriminator to assign to
   /// instructions in different blocks at the same location.
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 48ced8d3dfa9c..a55bdd5364cb7 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -617,6 +617,8 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
       Type = ELF::SHT_LLVM_CALL_GRAPH_PROFILE;
     else if (TypeName == "llvm_dependent_libraries")
       Type = ELF::SHT_LLVM_DEPENDENT_LIBRARIES;
+    else if (TypeName == "llvm_sympart")
+      Type = ELF::SHT_LLVM_SYMPART;
     else if (TypeName.getAsInteger(0, Type))
       return TokError("unknown section type");
   }
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index 569b6ba099747..efe504b2024c7 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -154,6 +154,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     OS << "llvm_call_graph_profile";
   else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
     OS << "llvm_dependent_libraries";
+  else if (Type == ELF::SHT_LLVM_SYMPART)
+    OS << "llvm_sympart";
   else
     report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
                        " for section " + getSectionName());
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 951f4ae8f7eea..a9c90e01551ea 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -254,6 +254,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH_PROFILE);
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG);
     STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES);
+    STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
     STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index a1474df6d941d..6c3a6887346e0 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -160,6 +160,10 @@ $comdat.samesize = comdat samesize
 @g.section = global i32 0, section "_DATA"
 ; CHECK: @g.section = global i32 0, section "_DATA"
 
+; Global Variables -- partition
+@g.partition = global i32 0, partition "part"
+; CHECK: @g.partition = global i32 0, partition "part"
+
 ; Global Variables -- comdat
 @comdat.any = global i32 0, comdat
 ; CHECK: @comdat.any = global i32 0, comdat
@@ -251,6 +255,10 @@ declare void @g.f1()
 @a.local_unnamed_addr = local_unnamed_addr alias i32, i32* @g.local_unnamed_addr
 ; CHECK: @a.local_unnamed_addr = local_unnamed_addr alias i32, i32* @g.local_unnamed_addr
 
+; Aliases -- partition
+; CHECK: @alias.partition = alias i32, i32* @g.partition, partition "part"
+@alias.partition = alias i32, i32* @g.partition, partition "part"
+
 ;; IFunc
 ; Format @<Name> = [Linkage] [Visibility] ifunc <IFuncTy>,
 ;                  <ResolverTy>* @<Resolver>
@@ -271,6 +279,10 @@ declare void @g.f1()
 @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver
 ; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver
 
+; IFunc -- partition
+; CHECK: @ifunc.partition = ifunc void (), i8* ()* @ifunc_resolver, partition "part"
+@ifunc.partition = ifunc void (), i8* ()* @ifunc_resolver, partition "part"
+
 define i8* @ifunc_resolver() {
 entry:
   ret i8* null
@@ -620,6 +632,12 @@ declare void @f.strictfp() #35
 declare void @f.section() section "80"
 ; CHECK: declare void @f.section() section "80"
 
+; Functions -- partition
+define void @f.partition() partition "part" {
+; CHECK: define void @f.partition() partition "part"
+  ret void
+}
+
 ; Functions -- comdat
 define void @f.comdat_any() comdat($comdat.any) {
 ; CHECK: define void @f.comdat_any() comdat($comdat.any)
diff --git a/llvm/test/CodeGen/X86/partition.ll b/llvm/test/CodeGen/X86/partition.ll
new file mode 100644
index 0000000000000..cc8d44e399ef3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/partition.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
+
+; CHECK: .section .llvm_sympart,"",@llvm_sympart,unique,1
+; CHECK-NEXT: .ascii "part1"
+; CHECK-NEXT: .zero 1
+; CHECK-NEXT: .quad f1
+; CHECK-NEXT: .section .llvm_sympart,"",@llvm_sympart,unique,2
+; CHECK-NEXT: .ascii "part4"
+; CHECK-NEXT: .zero 1
+; CHECK-NEXT: .quad g1
+; CHECK-NEXT: .section .llvm_sympart,"",@llvm_sympart,unique,3
+; CHECK-NEXT: .ascii "part5"
+; CHECK-NEXT: .zero 1
+; CHECK-NEXT: .quad a1
+; CHECK-NEXT: .section .llvm_sympart,"",@llvm_sympart,unique,4
+; CHECK-NEXT: .ascii "part6"
+; CHECK-NEXT: .zero 1
+; CHECK-NEXT: .quad i1
+
+define void @f1() partition "part1" {
+  unreachable
+}
+
+define hidden void @f2() partition "part2" {
+  unreachable
+}
+
+declare void @f3() partition "part3"
+
+@g1 = global i32 0, partition "part4"
+
+@a1 = alias i32, i32* @g1, partition "part5"
+@i1 = ifunc void(), void()* @f1, partition "part6"
diff --git a/llvm/test/MC/ELF/section.s b/llvm/test/MC/ELF/section.s
index 7c9bb7915d1af..f4ed666bfc71d 100644
--- a/llvm/test/MC/ELF/section.s
+++ b/llvm/test/MC/ELF/section.s
@@ -306,3 +306,15 @@ bar:
 // CHECK-NEXT:       SHF_STRINGS
 // CHECK-NEXT:   ]
 // CHECK: }
+
+// Test SHT_LLVM_SYMPART
+
+.section .llvm_sympart,"",@llvm_sympart
+// ASM: .section .llvm_sympart,"",@llvm_sympart
+
+// CHECK: Section {
+// CHECK:   Name: .llvm_sympart
+// CHECK-NEXT:   Type: SHT_LLVM_SYMPART
+// CHECK-NEXT:   Flags [
+// CHECK-NEXT:   ]
+// CHECK: }
diff --git a/llvm/test/Object/X86/irsymtab.ll b/llvm/test/Object/X86/irsymtab.ll
index 1b9915a2b745e..2e7b189572f69 100644
--- a/llvm/test/Object/X86/irsymtab.ll
+++ b/llvm/test/Object/X86/irsymtab.ll
@@ -9,7 +9,7 @@
 
 ; BCA:      <SYMTAB_BLOCK
 ; Version stored at offset 0.
-; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x02\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00L\x00\x00\x00\x01\x00\x00\x00X\x00\x00\x00\x00\x00\x00\x00X\x00\x00\x00\x02\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00'
+; BCA-NEXT:   <BLOB abbrevid=4/> blob data = '\x02\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00L\x00\x00\x00\x01\x00\x00\x00X\x00\x00\x00\x00\x00\x00\x00X\x00\x00\x00\x02\x00\x00\x00\x88\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x88\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00'
 ; BCA-NEXT: </SYMTAB_BLOCK>
 ; BCA-NEXT: <STRTAB_BLOCK
 ; BCA-NEXT:   <BLOB abbrevid=4/> blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll'

From e8698ead9dd12252dab26a3412788e14303d634d Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Wed, 29 May 2019 03:43:01 +0000
Subject: [PATCH 0452/1176] Inline value into debug statement to avoid unused
 variable warning.

llvm-svn: 361924
---
 llvm/lib/CodeGen/MachinePipeliner.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 051cd07cdae65..61441d909388d 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -3965,13 +3965,12 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) {
       continue;
     const MCProcResourceDesc *ProcResource =
         SM.getProcResource(PRE.ProcResourceIdx);
-    unsigned NumUnits = ProcResource->NumUnits;
     ++ProcResourceCount[PRE.ProcResourceIdx];
     LLVM_DEBUG({
       dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
                        ProcResource->Name, PRE.ProcResourceIdx,
-                       ProcResourceCount[PRE.ProcResourceIdx], NumUnits,
-                       PRE.Cycles);
+                       ProcResourceCount[PRE.ProcResourceIdx],
+                       ProcResource->NumUnits, PRE.Cycles);
     });
   }
   LLVM_DEBUG({ dbgs() << "reserveResources: done!\n\n"; });

From ba2816be824adfa7171d14d472ad645ab9edc5b3 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 29 May 2019 03:55:20 +0000
Subject: [PATCH 0453/1176] ELF: Add basic partition data structures and
 behaviours.

This change causes us to read partition specifications from partition
specification sections and split output sections into partitions according
to their reachability from partition entry points.

This is only the first step towards a full implementation of partitions. Later
changes will add additional synthetic sections to each partition so that
they can be loaded independently.

Differential Revision: https://reviews.llvm.org/D60353

llvm-svn: 361925
---
 lld/ELF/Driver.cpp                    | 71 +++++++++++++++++++++++---
 lld/ELF/ICF.cpp                       |  4 +-
 lld/ELF/InputSection.cpp              | 15 +++++-
 lld/ELF/InputSection.h                | 17 ++++---
 lld/ELF/LinkerScript.cpp              | 26 ++++++----
 lld/ELF/MapFile.cpp                   |  4 +-
 lld/ELF/MarkLive.cpp                  | 73 ++++++++++++++++++++++-----
 lld/ELF/OutputSections.cpp            |  8 +--
 lld/ELF/SymbolTable.cpp               |  1 +
 lld/ELF/Symbols.cpp                   |  2 +-
 lld/ELF/Symbols.h                     |  4 ++
 lld/ELF/SyntheticSections.cpp         | 20 ++++----
 lld/ELF/SyntheticSections.h           | 11 +++-
 lld/ELF/Writer.cpp                    | 21 ++++----
 lld/test/ELF/partition-errors.s       | 23 +++++++++
 lld/test/ELF/partition-icf.s          | 50 ++++++++++++++++++
 lld/test/ELF/partition-move-to-main.s | 44 ++++++++++++++++
 lld/test/ELF/partitions.s             | 63 +++++++++++++++++++++++
 18 files changed, 387 insertions(+), 70 deletions(-)
 create mode 100644 lld/test/ELF/partition-errors.s
 create mode 100644 lld/test/ELF/partition-icf.s
 create mode 100644 lld/test/ELF/partition-move-to-main.s
 create mode 100644 lld/test/ELF/partitions.s

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index ba92d2cfe7346..a234b8fe3c7d6 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -98,6 +98,8 @@ bool elf::link(ArrayRef<const char *> Args, bool CanExitEarly,
   Tar = nullptr;
   memset(&In, 0, sizeof(In));
 
+  Partitions = {Partition()};
+
   SharedFile::VernauxNum = 0;
 
   Config->ProgName = Args[0];
@@ -1344,7 +1346,7 @@ static void replaceCommonSymbols() {
 
     auto *Bss = make<BssSection>("COMMON", S->Size, S->Alignment);
     Bss->File = S->File;
-    Bss->Live = !Config->GcSections;
+    Bss->markDead();
     InputSections.push_back(Bss);
     S->replace(Defined{S->File, S->getName(), S->Binding, S->StOther, S->Type,
                        /*Value=*/0, S->Size, Bss});
@@ -1432,6 +1434,55 @@ static void findKeepUniqueSections(opt::InputArgList &Args) {
   }
 }
 
+// This function reads a symbol partition specification section. These sections
+// are used to control which partition a symbol is allocated to. See
+// https://lld.llvm.org/Partitions.html for more details on partitions.
+template <typename ELFT>
+static void readSymbolPartitionSection(InputSectionBase *S) {
+  // Read the relocation that refers to the partition's entry point symbol.
+  Symbol *Sym;
+  if (S->AreRelocsRela)
+    Sym = &S->getFile<ELFT>()->getRelocTargetSym(S->template relas<ELFT>()[0]);
+  else
+    Sym = &S->getFile<ELFT>()->getRelocTargetSym(S->template rels<ELFT>()[0]);
+  if (!isa<Defined>(Sym) || !Sym->includeInDynsym())
+    return;
+
+  StringRef PartName = reinterpret_cast<const char *>(S->data().data());
+  for (Partition &Part : Partitions) {
+    if (Part.Name == PartName) {
+      Sym->Partition = Part.getNumber();
+      return;
+    }
+  }
+
+  // Forbid partitions from being used on incompatible targets, and forbid them
+  // from being used together with various linker features that assume a single
+  // set of output sections.
+  if (Script->HasSectionsCommand)
+    error(toString(S->File) +
+          ": partitions cannot be used with the SECTIONS command");
+  if (Script->hasPhdrsCommands())
+    error(toString(S->File) +
+          ": partitions cannot be used with the PHDRS command");
+  if (!Config->SectionStartMap.empty())
+    error(toString(S->File) + ": partitions cannot be used with "
+                              "--section-start, -Ttext, -Tdata or -Tbss");
+  if (Config->EMachine == EM_MIPS)
+    error(toString(S->File) + ": partitions cannot be used on this target");
+
+  // Impose a limit of no more than 254 partitions. This limit comes from the
+  // sizes of the Partition fields in InputSectionBase and Symbol, as well as
+  // the amount of space devoted to the partition number in RankFlags.
+  if (Partitions.size() == 254)
+    fatal("may not have more than 254 partitions");
+
+  Partitions.emplace_back();
+  Partition &NewPart = Partitions.back();
+  NewPart.Name = PartName;
+  Sym->Partition = NewPart.getNumber();
+}
+
 template <class ELFT> static Symbol *addUndefined(StringRef Name) {
   return Symtab->addSymbol(
       Undefined{nullptr, Name, STB_GLOBAL, STV_DEFAULT, 0});
@@ -1700,13 +1751,17 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
     for (InputSectionBase *S : F->getSections())
       InputSections.push_back(cast<InputSection>(S));
 
-  // We do not want to emit debug sections if --strip-all
-  // or -strip-debug are given.
-  if (Config->Strip != StripPolicy::None) {
-    llvm::erase_if(InputSections, [](InputSectionBase *S) {
-      return S->Name.startswith(".debug") || S->Name.startswith(".zdebug");
-    });
-  }
+  llvm::erase_if(InputSections, [](InputSectionBase *S) {
+    if (S->Type == SHT_LLVM_SYMPART) {
+      readSymbolPartitionSection<ELFT>(S);
+      return true;
+    }
+
+    // We do not want to emit debug sections if --strip-all
+    // or -strip-debug are given.
+    return Config->Strip != StripPolicy::None &&
+           (S->Name.startswith(".debug") || S->Name.startswith(".zdebug"));
+  });
 
   Config->EFlags = Target->calcEFlags();
   // MaxPageSize (sometimes called abi page size) is the maximum page size that
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 546b1214ec795..08f7ad3cda424 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -157,7 +157,7 @@ template <class ELFT> class ICF {
 
 // Returns true if section S is subject of ICF.
 static bool isEligible(InputSection *S) {
-  if (!S->Live || S->KeepUnique || !(S->Flags & SHF_ALLOC))
+  if (!S->isLive() || S->KeepUnique || !(S->Flags & SHF_ALLOC))
     return false;
 
   // Don't merge writable sections. .data.rel.ro sections are marked as writable
@@ -496,7 +496,7 @@ template <class ELFT> void ICF<ELFT>::run() {
       // we want to remove duplicate implicit dependencies such as link order
       // and relocation sections.
       for (InputSection *IS : Sections[I]->DependentSections)
-        IS->Live = false;
+        IS->markDead();
     }
   });
 }
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 9075568f8c1d7..2ff6b4800b787 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -443,7 +443,7 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
         continue;
       }
       SectionBase *Section = D->Section->Repl;
-      if (!Section->Live) {
+      if (!Section->isLive()) {
         P->setSymbolAndType(0, 0, false);
         continue;
       }
@@ -1095,8 +1095,19 @@ template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
 
 void InputSection::replace(InputSection *Other) {
   Alignment = std::max(Alignment, Other->Alignment);
+
+  // When a section is replaced with another section that was allocated to
+  // another partition, the replacement section (and its associated sections)
+  // need to be placed in the main partition so that both partitions will be
+  // able to access it.
+  if (Partition != Other->Partition) {
+    Partition = 1;
+    for (InputSection *IS : DependentSections)
+      IS->Partition = 1;
+  }
+
   Other->Repl = Repl;
-  Other->Live = false;
+  Other->markDead();
 }
 
 template <class ELFT>
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 17ebb7ee15b37..ec81a2df7325e 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -54,10 +54,6 @@ class SectionBase {
   // The next three bit fields are only used by InputSectionBase, but we
   // put them here so the struct packs better.
 
-  // The garbage collector sets sections' Live bits.
-  // If GC is disabled, all sections are considered live by default.
-  unsigned Live : 1;
-
   // True if this section has already been placed to a linker script
   // output section. This is needed because, in a linker script, you
   // can refer to the same section more than once. For example, in
@@ -76,6 +72,11 @@ class SectionBase {
   // Set for sections that should not be folded by ICF.
   unsigned KeepUnique : 1;
 
+  // The 1-indexed partition that this section is assigned to by the garbage
+  // collector, or 0 if this section is dead. Normally there is only one
+  // partition, so this will either be 0 or 1.
+  uint8_t Partition;
+
   // These corresponds to the fields in Elf_Shdr.
   uint32_t Alignment;
   uint64_t Flags;
@@ -95,12 +96,16 @@ class SectionBase {
 
   uint64_t getVA(uint64_t Offset = 0) const;
 
+  bool isLive() const { return Partition != 0; }
+  void markLive() { Partition = 1; }
+  void markDead() { Partition = 0; }
+
 protected:
   SectionBase(Kind SectionKind, StringRef Name, uint64_t Flags,
               uint64_t Entsize, uint64_t Alignment, uint32_t Type,
               uint32_t Info, uint32_t Link)
-      : Name(Name), Repl(this), SectionKind(SectionKind), Live(false),
-        Assigned(false), Bss(false), KeepUnique(false), Alignment(Alignment),
+      : Name(Name), Repl(this), SectionKind(SectionKind), Assigned(false),
+        Bss(false), KeepUnique(false), Partition(0), Alignment(Alignment),
         Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), Info(Info) {}
 };
 
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 99f0853c911a2..a6354c841c264 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -380,7 +380,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *Cmd) {
     size_t SizeBefore = Ret.size();
 
     for (InputSectionBase *Sec : InputSections) {
-      if (!Sec->Live || Sec->Assigned)
+      if (!Sec->isLive() || Sec->Assigned)
         continue;
 
       // For -emit-relocs we have to ignore entries like
@@ -425,7 +425,7 @@ void LinkerScript::discard(ArrayRef<InputSection *> V) {
       In.HashTab = nullptr;
 
     S->Assigned = false;
-    S->Live = false;
+    S->markDead();
     discard(S->DependentSections);
   }
 }
@@ -544,8 +544,9 @@ static OutputSection *createSection(InputSectionBase *IS,
   return Sec;
 }
 
-static OutputSection *addInputSec(StringMap<OutputSection *> &Map,
-                                  InputSectionBase *IS, StringRef OutsecName) {
+static OutputSection *
+addInputSec(StringMap<TinyPtrVector<OutputSection *>> &Map,
+            InputSectionBase *IS, StringRef OutsecName) {
   // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r
   // option is given. A section with SHT_GROUP defines a "section group", and
   // its members have SHF_GROUP attribute. Usually these flags have already been
@@ -624,23 +625,26 @@ static OutputSection *addInputSec(StringMap<OutputSection *> &Map,
   //
   // Given the above issues, we instead merge sections by name and error on
   // incompatible types and flags.
-  OutputSection *&Sec = Map[OutsecName];
-  if (Sec) {
+  TinyPtrVector<OutputSection *> &V = Map[OutsecName];
+  for (OutputSection *Sec : V) {
+    if (Sec->Partition != IS->Partition)
+      continue;
     Sec->addSection(cast<InputSection>(IS));
     return nullptr;
   }
 
-  Sec = createSection(IS, OutsecName);
+  OutputSection *Sec = createSection(IS, OutsecName);
+  V.push_back(Sec);
   return Sec;
 }
 
 // Add sections that didn't match any sections command.
 void LinkerScript::addOrphanSections() {
-  StringMap<OutputSection *> Map;
+  StringMap<TinyPtrVector<OutputSection *>> Map;
   std::vector<OutputSection *> V;
 
   auto Add = [&](InputSectionBase *S) {
-    if (!S->Live || S->Parent)
+    if (!S->isLive() || S->Parent)
       return;
 
     StringRef Name = getOutputSectionName(S);
@@ -886,7 +890,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
     // A live output section means that some input section was added to it. It
     // might have been removed (if it was empty synthetic section), but we at
     // least know the flags.
-    if (Sec->Live)
+    if (Sec->isLive())
       Flags = Sec->Flags;
 
     // We do not want to keep any special flags for output section
@@ -897,7 +901,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
                             SHF_WRITE | SHF_EXECINSTR);
 
     if (IsEmpty && isDiscardable(*Sec)) {
-      Sec->Live = false;
+      Sec->markDead();
       Cmd = nullptr;
     }
   }
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 2f1921ec9837d..f5d5d5a834ea7 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -57,7 +57,7 @@ static std::vector<Defined *> getSymbols() {
   for (InputFile *File : ObjectFiles)
     for (Symbol *B : File->getSymbols())
       if (auto *DR = dyn_cast<Defined>(B))
-        if (!DR->isSection() && DR->Section && DR->Section->Live &&
+        if (!DR->isSection() && DR->Section && DR->Section->isLive() &&
             (DR->File == File || DR->NeedsPltAddr || DR->Section->Bss))
           V.push_back(DR);
   return V;
@@ -239,7 +239,7 @@ void elf::writeCrossReferenceTable() {
       if (isa<SharedSymbol>(Sym))
         Map[Sym].insert(File);
       if (auto *D = dyn_cast<Defined>(Sym))
-        if (!D->isLocal() && (!D->Section || D->Section->Live))
+        if (!D->isLocal() && (!D->Section || D->Section->isLive()))
           Map[D].insert(File);
     }
   }
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 431915c27ba4e..ef85075e4a327 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -25,6 +25,7 @@
 #include "OutputSections.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
+#include "SyntheticSections.h"
 #include "Target.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
@@ -44,11 +45,15 @@ using namespace lld::elf;
 namespace {
 template <class ELFT> class MarkLive {
 public:
+  MarkLive(unsigned Partition) : Partition(Partition) {}
+
   void run();
+  void moveToMain();
 
 private:
   void enqueue(InputSectionBase *Sec, uint64_t Offset);
   void markSymbol(Symbol *Sym);
+  void mark();
 
   template <class RelTy>
   void resolveReloc(InputSectionBase &Sec, RelTy &Rel, bool IsLSDA);
@@ -56,6 +61,9 @@ template <class ELFT> class MarkLive {
   template <class RelTy>
   void scanEhFrameSection(EhInputSection &EH, ArrayRef<RelTy> Rels);
 
+  // The index of the partition that we are currently processing.
+  unsigned Partition;
+
   // A list of sections to visit.
   SmallVector<InputSection *, 256> Queue;
 
@@ -183,9 +191,12 @@ void MarkLive<ELFT>::enqueue(InputSectionBase *Sec, uint64_t Offset) {
   if (auto *MS = dyn_cast<MergeInputSection>(Sec))
     MS->getSectionPiece(Offset)->Live = true;
 
-  if (Sec->Live)
+  // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and
+  // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition
+  // doesn't change, we don't need to do anything.
+  if (Sec->Partition == 1 || Sec->Partition == Partition)
     return;
-  Sec->Live = true;
+  Sec->Partition = Sec->Partition ? 1 : Partition;
 
   // Add input section to the queue.
   if (InputSection *S = dyn_cast<InputSection>(Sec))
@@ -203,6 +214,20 @@ template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *Sym) {
 // sections to set their "Live" bits.
 template <class ELFT> void MarkLive<ELFT>::run() {
   // Add GC root symbols.
+
+  // Preserve externally-visible symbols if the symbols defined by this
+  // file can interrupt other ELF file's symbols at runtime.
+  Symtab->forEachSymbol([&](Symbol *Sym) {
+    if (Sym->includeInDynsym() && Sym->Partition == Partition)
+      markSymbol(Sym);
+  });
+
+  // If this isn't the main partition, that's all that we need to preserve.
+  if (Partition != 1) {
+    mark();
+    return;
+  }
+
   markSymbol(Symtab->find(Config->Entry));
   markSymbol(Symtab->find(Config->Init));
   markSymbol(Symtab->find(Config->Fini));
@@ -211,13 +236,6 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   for (StringRef S : Script->ReferencedSymbols)
     markSymbol(Symtab->find(S));
 
-  // Preserve externally-visible symbols if the symbols defined by this
-  // file can interrupt other ELF file's symbols at runtime.
-  Symtab->forEachSymbol([&](Symbol *Sym) {
-    if (Sym->includeInDynsym())
-      markSymbol(Sym);
-  });
-
   // Preserve special sections and those which are specified in linker
   // script KEEP command.
   for (InputSectionBase *Sec : InputSections) {
@@ -226,7 +244,7 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     // all of them. We also want to preserve personality routines and LSDA
     // referenced by .eh_frame sections, so we scan them for that here.
     if (auto *EH = dyn_cast<EhInputSection>(Sec)) {
-      EH->Live = true;
+      EH->markLive();
       if (!EH->NumRelocations)
         continue;
 
@@ -247,6 +265,10 @@ template <class ELFT> void MarkLive<ELFT>::run() {
     }
   }
 
+  mark();
+}
+
+template <class ELFT> void MarkLive<ELFT>::mark() {
   // Mark all reachable sections.
   while (!Queue.empty()) {
     InputSectionBase &Sec = *Queue.pop_back_val();
@@ -264,6 +286,22 @@ template <class ELFT> void MarkLive<ELFT>::run() {
   }
 }
 
+// Move the sections for some symbols to the main partition, specifically ifuncs
+// (because they can result in an IRELATIVE being added to the main partition's
+// GOT, which means that the ifunc must be available when the main partition is
+// loaded) and TLS symbols (because we only know how to correctly process TLS
+// relocations for the main partition).
+template <class ELFT> void MarkLive<ELFT>::moveToMain() {
+  for (InputFile *File : ObjectFiles)
+    for (Symbol *S : File->getSymbols())
+      if (auto *D = dyn_cast<Defined>(S))
+        if ((D->Type == STT_GNU_IFUNC || D->Type == STT_TLS) && D->Section &&
+            D->Section->isLive())
+          markSymbol(S);
+
+  mark();
+}
+
 // Before calling this function, Live bits are off for all
 // input sections. This function make some or all of them on
 // so that they are emitted to the output file.
@@ -271,7 +309,7 @@ template <class ELFT> void elf::markLive() {
   // If -gc-sections is not given, no sections are removed.
   if (!Config->GcSections) {
     for (InputSectionBase *Sec : InputSections)
-      Sec->Live = true;
+      Sec->markLive();
 
     // If a DSO defines a symbol referenced in a regular object, it is needed.
     Symtab->forEachSymbol([](Symbol *Sym) {
@@ -309,16 +347,23 @@ template <class ELFT> void elf::markLive() {
     bool IsRel = (Sec->Type == SHT_REL || Sec->Type == SHT_RELA);
 
     if (!IsAlloc && !IsLinkOrder && !IsRel)
-      Sec->Live = true;
+      Sec->markLive();
   }
 
   // Follow the graph to mark all live sections.
-  MarkLive<ELFT>().run();
+  for (unsigned CurPart = 1; CurPart <= Partitions.size(); ++CurPart)
+    MarkLive<ELFT>(CurPart).run();
+
+  // If we have multiple partitions, some sections need to live in the main
+  // partition even if they were allocated to a loadable partition. Move them
+  // there now.
+  if (Partitions.size() != 1)
+    MarkLive<ELFT>(1).moveToMain();
 
   // Report garbage-collected sections.
   if (Config->PrintGcSections)
     for (InputSectionBase *Sec : InputSections)
-      if (!Sec->Live)
+      if (!Sec->isLive())
         message("removing unused section " + toString(Sec));
 }
 
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 6919ec465a0de..becca8356232c 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -84,10 +84,10 @@ static bool canMergeToProgbits(unsigned Type) {
 }
 
 void OutputSection::addSection(InputSection *IS) {
-  if (!Live) {
+  if (!isLive()) {
     // If IS is the first section to be added to this section,
-    // initialize Type, Entsize and flags from IS.
-    Live = true;
+    // initialize Partition, Type, Entsize and flags from IS.
+    Partition = IS->Partition;
     Type = IS->Type;
     Entsize = IS->Entsize;
     Flags = IS->Flags;
@@ -158,7 +158,7 @@ bool OutputSection::classof(const BaseCommand *C) {
 }
 
 void OutputSection::sort(llvm::function_ref<int(InputSectionBase *S)> Order) {
-  assert(Live);
+  assert(isLive());
   for (BaseCommand *B : SectionCommands)
     if (auto *ISD = dyn_cast<InputSectionDescription>(B))
       sortByOrder(ISD->Sections, Order);
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index c4d52b1168e02..cf83e907a73c4 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -79,6 +79,7 @@ Symbol *SymbolTable::insert(StringRef Name) {
   Sym->ExportDynamic = false;
   Sym->CanInline = true;
   Sym->ScriptDefined = false;
+  Sym->Partition = 1;
   return Sym;
 }
 
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 45c545d532962..e8c6377fb596d 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -333,7 +333,7 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *Sym) {
     Warn(": unable to order absolute symbol: ");
   else if (D && isa<OutputSection>(D->Section))
     Warn(": unable to order synthetic symbol: ");
-  else if (D && !D->Section->Repl->Live)
+  else if (D && !D->Section->Repl->isLive())
     Warn(": unable to order discarded symbol: ");
 }
 
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 64fa30db25205..03de7009ce56b 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -255,6 +255,9 @@ class Symbol {
   // True if this symbol is defined by a linker script.
   unsigned ScriptDefined : 1;
 
+  // The partition whose dynamic symbol table contains this symbol's definition.
+  uint8_t Partition = 1;
+
   bool isSection() const { return Type == llvm::ELF::STT_SECTION; }
   bool isTls() const { return Type == llvm::ELF::STT_TLS; }
   bool isFunc() const { return Type == llvm::ELF::STT_FUNC; }
@@ -516,6 +519,7 @@ void Symbol::replace(const Symbol &New) {
   Traced = Old.Traced;
   IsPreemptible = Old.IsPreemptible;
   ScriptDefined = Old.ScriptDefined;
+  Partition = Old.Partition;
 
   // Symbol length is computed lazily. If we already know a symbol length,
   // propagate it.
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 7e4acccdd52c9..672e0b2030666 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -107,7 +107,7 @@ MipsAbiFlagsSection<ELFT> *MipsAbiFlagsSection<ELFT>::create() {
   for (InputSectionBase *Sec : InputSections) {
     if (Sec->Type != SHT_MIPS_ABIFLAGS)
       continue;
-    Sec->Live = false;
+    Sec->markDead();
     Create = true;
 
     std::string Filename = toString(Sec->File);
@@ -180,7 +180,7 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() {
 
   Elf_Mips_RegInfo Reginfo = {};
   for (InputSectionBase *Sec : Sections) {
-    Sec->Live = false;
+    Sec->markDead();
 
     std::string Filename = toString(Sec->File);
     ArrayRef<uint8_t> D = Sec->data();
@@ -237,7 +237,7 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() {
 
   Elf_Mips_RegInfo Reginfo = {};
   for (InputSectionBase *Sec : Sections) {
-    Sec->Live = false;
+    Sec->markDead();
 
     if (Sec->data().size() != sizeof(Elf_Mips_RegInfo)) {
       error(toString(Sec->File) + ": invalid size of .reginfo section");
@@ -259,7 +259,7 @@ InputSection *elf::createInterpSection() {
 
   auto *Sec = make<InputSection>(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, Contents,
                                  ".interp");
-  Sec->Live = true;
+  Sec->markLive();
   return Sec;
 }
 
@@ -358,7 +358,7 @@ bool EhFrameSection::isFdeLive(EhSectionPiece &Fde, ArrayRef<RelTy> Rels) {
   // FDEs for garbage-collected or merged-by-ICF sections are dead.
   if (auto *D = dyn_cast<Defined>(&B))
     if (SectionBase *Sec = D->Section)
-      return Sec->Live;
+      return Sec->isLive();
   return false;
 }
 
@@ -1290,7 +1290,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
   // as RelaIplt have. And we still want to emit proper dynamic tags for that
   // case, so here we always use RelaPlt as marker for the begining of
   // .rel[a].plt section.
-  if (In.RelaPlt->getParent()->Live) {
+  if (In.RelaPlt->getParent()->isLive()) {
     addInSec(DT_JMPREL, In.RelaPlt);
     Entries.push_back({DT_PLTRELSZ, addPltRelSz});
     switch (Config->EMachine) {
@@ -2370,7 +2370,7 @@ readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) {
       if (R.SectionIndex == -1ULL)
         continue;
       InputSectionBase *S = Sections[R.SectionIndex];
-      if (!S || S == &InputSection::Discarded || !S->Live)
+      if (!S || S == &InputSection::Discarded || !S->isLive())
         continue;
       // Range list with zero size has no effect.
       if (R.LowPC == R.HighPC)
@@ -2503,7 +2503,7 @@ template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
   // a .gdb_index. So we can remove them from the output.
   for (InputSectionBase *S : InputSections)
     if (S->Name == ".debug_gnu_pubnames" || S->Name == ".debug_gnu_pubtypes")
-      S->Live = false;
+      S->markDead();
 
   std::vector<GdbChunk> Chunks(Sections.size());
   std::vector<std::vector<NameAttrEntry>> NameAttrs(Sections.size());
@@ -2945,7 +2945,7 @@ void elf::mergeSections() {
 
     // We do not want to handle sections that are not alive, so just remove
     // them instead of trying to merge.
-    if (!MS->Live) {
+    if (!MS->isLive()) {
       S = nullptr;
       continue;
     }
@@ -3257,6 +3257,8 @@ bool PPC64LongBranchTargetSection::isNeeded() const {
 
 InStruct elf::In;
 
+std::vector<Partition> elf::Partitions;
+
 template GdbIndexSection *GdbIndexSection::create<ELF32LE>();
 template GdbIndexSection *GdbIndexSection::create<ELF32BE>();
 template GdbIndexSection *GdbIndexSection::create<ELF64LE>();
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 58a01ea6c54cb..ad37cde763040 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -31,6 +31,7 @@
 namespace lld {
 namespace elf {
 class Defined;
+struct Partition;
 
 class SyntheticSection : public InputSection {
 public:
@@ -38,7 +39,7 @@ class SyntheticSection : public InputSection {
                    StringRef Name)
       : InputSection(nullptr, Flags, Type, Alignment, {}, Name,
                      InputSectionBase::Synthetic) {
-    this->Live = true;
+    markLive();
   }
 
   virtual ~SyntheticSection() = default;
@@ -1062,6 +1063,14 @@ Defined *addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value,
 
 void addVerneed(Symbol *SS);
 
+extern std::vector<Partition> Partitions;
+
+// Linker generated per-partition sections.
+struct Partition {
+  StringRef Name;
+  unsigned getNumber() const { return this - &Partitions[0] + 1; }
+};
+
 // Linker generated sections which can be used as inputs.
 struct InStruct {
   InputSection *ARMAttributes;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index dd2cae8e8618a..00d676a3c89c3 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -157,7 +157,7 @@ template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() {
 
 template <class ELFT> static void combineEhSections() {
   for (InputSectionBase *&S : InputSections) {
-    if (!S->Live)
+    if (!S->isLive())
       continue;
 
     if (auto *ES = dyn_cast<EhInputSection>(S)) {
@@ -607,7 +607,7 @@ static bool includeInSymtab(const Symbol &B) {
     Sec = Sec->Repl;
 
     // Exclude symbols pointing to garbage-collected sections.
-    if (isa<InputSectionBase>(Sec) && !Sec->Live)
+    if (isa<InputSectionBase>(Sec) && !Sec->isLive())
       return false;
 
     if (auto *S = dyn_cast<MergeInputSection>(Sec))
@@ -761,8 +761,9 @@ static bool isRelroSection(const OutputSection *Sec) {
 // * It is easy to check if a give branch was taken.
 // * It is easy two see how similar two ranks are (see getRankProximity).
 enum RankFlags {
-  RF_NOT_ADDR_SET = 1 << 17,
-  RF_NOT_ALLOC = 1 << 16,
+  RF_NOT_ADDR_SET = 1 << 25,
+  RF_NOT_ALLOC = 1 << 24,
+  RF_PARTITION = 1 << 16, // Partition number (8 bits)
   RF_NOT_INTERP = 1 << 15,
   RF_NOT_NOTE = 1 << 14,
   RF_WRITE = 1 << 13,
@@ -782,7 +783,7 @@ enum RankFlags {
 };
 
 static unsigned getSectionRank(const OutputSection *Sec) {
-  unsigned Rank = 0;
+  unsigned Rank = Sec->Partition * RF_PARTITION;
 
   // We want to put section specified by -T option first, so we
   // can start assigning VA starting from them later.
@@ -953,11 +954,11 @@ void Writer<ELFT>::forEachRelSec(
   // Note that relocations for non-alloc sections are directly
   // processed by InputSection::relocateNonAlloc.
   for (InputSectionBase *IS : InputSections)
-    if (IS->Live && isa<InputSection>(IS) && (IS->Flags & SHF_ALLOC))
+    if (IS->isLive() && isa<InputSection>(IS) && (IS->Flags & SHF_ALLOC))
       Fn(*IS);
   for (EhInputSection *ES : In.EhFrame->Sections)
     Fn(*ES);
-  if (In.ARMExidx && In.ARMExidx->Live)
+  if (In.ARMExidx && In.ARMExidx->isLive())
     for (InputSection *Ex : In.ARMExidx->ExidxSections)
       Fn(*Ex);
 }
@@ -1054,7 +1055,7 @@ static int getRankProximityAux(OutputSection *A, OutputSection *B) {
 
 static int getRankProximity(OutputSection *A, BaseCommand *B) {
   auto *Sec = dyn_cast<OutputSection>(B);
-  return (Sec && Sec->Live) ? getRankProximityAux(A, Sec) : -1;
+  return (Sec && Sec->isLive()) ? getRankProximityAux(A, Sec) : -1;
 }
 
 // When placing orphan sections, we want to place them after symbol assignments
@@ -1096,7 +1097,7 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B,
   int Proximity = getRankProximity(Sec, *I);
   for (; I != E; ++I) {
     auto *CurSec = dyn_cast<OutputSection>(*I);
-    if (!CurSec || !CurSec->Live)
+    if (!CurSec || !CurSec->isLive())
       continue;
     if (getRankProximity(Sec, CurSec) != Proximity ||
         Sec->SortRank < CurSec->SortRank)
@@ -1105,7 +1106,7 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B,
 
   auto IsLiveOutputSec = [](BaseCommand *Cmd) {
     auto *OS = dyn_cast<OutputSection>(Cmd);
-    return OS && OS->Live;
+    return OS && OS->isLive();
   };
   auto J = std::find_if(llvm::make_reverse_iterator(I),
                         llvm::make_reverse_iterator(B), IsLiveOutputSec);
diff --git a/lld/test/ELF/partition-errors.s b/lld/test/ELF/partition-errors.s
new file mode 100644
index 0000000000000..6150fe20cdadb
--- /dev/null
+++ b/lld/test/ELF/partition-errors.s
@@ -0,0 +1,23 @@
+// REQUIRES: x86, mips
+// RUN: llvm-mc -triple=x86_64-unknown-linux -filetype=obj -o %t.o %s
+// RUN: echo "SECTIONS {}" > %t.script
+// RUN: not ld.lld --export-dynamic %t.o %t.script 2>&1 | FileCheck %s
+// RUN: echo "PHDRS { text PT_LOAD; }" > %t2.script
+// RUN: not ld.lld --export-dynamic %t.o %t2.script 2>&1 | FileCheck %s
+// RUN: not ld.lld --export-dynamic %t.o --section-start .text=0 2>&1 | FileCheck %s
+// RUN: not ld.lld --export-dynamic %t.o -Ttext=0 2>&1 | FileCheck %s
+// RUN: not ld.lld --export-dynamic %t.o -Tdata=0 2>&1 | FileCheck %s
+// RUN: not ld.lld --export-dynamic %t.o -Tbss=0 2>&1 | FileCheck %s
+
+// RUN: llvm-mc -triple=mipsel-unknown-linux -filetype=obj -o %t2.o %s
+// RUN: not ld.lld --export-dynamic %t2.o 2>&1 | FileCheck %s
+
+// CHECK: error: {{.*}}.o: partitions cannot be used
+
+.section .llvm_sympart.f1,"",@llvm_sympart
+.asciz "part1"
+.quad f1
+
+.text
+.globl f1
+f1:
diff --git a/lld/test/ELF/partition-icf.s b/lld/test/ELF/partition-icf.s
new file mode 100644
index 0000000000000..58be0c1ad00fa
--- /dev/null
+++ b/lld/test/ELF/partition-icf.s
@@ -0,0 +1,50 @@
+// REQUIRES: x86
+// RUN: llvm-mc %s -o %t.o -filetype=obj --triple=x86_64-unknown-linux
+// RUN: ld.lld %t.o -o %t --export-dynamic --gc-sections --icf=all
+// RUN: llvm-readelf -S -s %t | FileCheck %s
+
+// CHECK: [[MAIN:[0-9]+]]] .text
+// CHECK: [[P1:[0-9]+]]] .text
+// CHECK: [[P2:[0-9]+]]] .text
+
+// CHECK: Symbol table '.symtab'
+// CHECK:   [[P1]] f1
+// CHECK:   [[P2]] f2
+// CHECK: [[MAIN]] g1
+// CHECK: [[MAIN]] g2
+
+.section .llvm_sympart.f1,"",@llvm_sympart
+.asciz "part1"
+.quad f1
+
+.section .llvm_sympart.f2,"",@llvm_sympart
+.asciz "part2"
+.quad f2
+
+.section .llvm_sympart.g1,"",@llvm_sympart
+.asciz "part1"
+.quad g1
+
+.section .llvm_sympart.g2,"",@llvm_sympart
+.asciz "part2"
+.quad g2
+
+.section .text.f1,"ax",@progbits
+.globl f1
+f1:
+.byte 1
+
+.section .text.f2,"ax",@progbits
+.globl f2
+f2:
+.byte 2
+
+.section .text.g1,"ax",@progbits
+.globl g1
+g1:
+.byte 3
+
+.section .text.g2,"ax",@progbits
+.globl g2
+g2:
+.byte 3
diff --git a/lld/test/ELF/partition-move-to-main.s b/lld/test/ELF/partition-move-to-main.s
new file mode 100644
index 0000000000000..c9fe14c135f65
--- /dev/null
+++ b/lld/test/ELF/partition-move-to-main.s
@@ -0,0 +1,44 @@
+// REQUIRES: x86
+// RUN: llvm-mc %s -o %t.o -filetype=obj --triple=x86_64-unknown-linux
+// RUN: ld.lld %t.o -o %t --export-dynamic --gc-sections
+// RUN: llvm-readelf -S -s %t | FileCheck %s
+
+// Ordinarily, the TLS and IFUNC sections would be split into partitions.
+// Make sure that that didn't happen by checking that there is only one
+// of each section.
+
+// CHECK: .ifunc
+// CHECK: .tdata
+
+// CHECK-NOT: .ifunc
+// CHECK-NOT: .tdata
+
+.section .llvm_sympart.f1,"",@llvm_sympart
+.asciz "part1"
+.quad f1
+
+.section .text._start,"ax",@progbits
+.globl _start
+_start:
+call tls1
+call ifunc1
+
+.section .text.f1,"ax",@progbits
+.globl f1
+f1:
+call tls2
+call ifunc2
+
+.section .ifunc,"ax",@progbits,unique,1
+.type ifunc1 STT_GNU_IFUNC
+ifunc1:
+
+.section .ifunc,"ax",@progbits,unique,2
+.type ifunc2 STT_GNU_IFUNC
+ifunc2:
+
+.section .tdata,"awT",@progbits,unique,1
+tls1:
+
+.section .tdata,"awT",@progbits,unique,2
+tls2:
diff --git a/lld/test/ELF/partitions.s b/lld/test/ELF/partitions.s
new file mode 100644
index 0000000000000..0fb8dcb9849f5
--- /dev/null
+++ b/lld/test/ELF/partitions.s
@@ -0,0 +1,63 @@
+// REQUIRES: x86
+// RUN: llvm-mc %s -o %t.o -filetype=obj --triple=x86_64-unknown-linux
+// RUN: ld.lld %t.o -o %t --export-dynamic --gc-sections
+// RUN: llvm-readelf -S -s %t | FileCheck %s
+
+// This is basically lld/docs/partitions.dot in object file form.
+// Test that the sections are correctly allocated to partitions.
+
+// CHECK: [[MAIN:[0-9]+]]] .text
+// CHECK: [[P1:[0-9]+]]] .text
+// CHECK: [[P2:[0-9]+]]] .text
+
+// CHECK: Symbol table '.symtab'
+// CHECK: [[MAIN]] f3
+// CHECK:   [[P1]] f4
+// CHECK: [[MAIN]] f5
+// CHECK:   [[P2]] f6
+// CHECK: [[MAIN]] _start
+// CHECK:   [[P1]] f1
+// CHECK:   [[P2]] f2
+
+.section .llvm_sympart.f1,"",@llvm_sympart
+.asciz "part1"
+.quad f1
+
+.section .llvm_sympart.f2,"",@llvm_sympart
+.asciz "part2"
+.quad f2
+
+.section .text._start,"ax",@progbits
+.globl _start
+_start:
+call f3
+
+.section .text.f1,"ax",@progbits
+.globl f1
+f1:
+call f3
+call f4
+call f5
+
+.section .text.f2,"ax",@progbits
+.globl f2
+f2:
+call f3
+call f5
+call f6
+
+.section .text.f3,"ax",@progbits
+f3:
+ret
+
+.section .text.f4,"ax",@progbits
+f4:
+ret
+
+.section .text.f5,"ax",@progbits
+f5:
+ret
+
+.section .text.f6,"ax",@progbits
+f6:
+ret

From 87575f6501dc4c1ae27e3bd6a469fa0391964a2b Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 29 May 2019 04:06:01 +0000
Subject: [PATCH 0454/1176] ELF: Don't reuse a thunk in a different loadable
 partition.

There's no guarantee that the other partition will be loaded, so it
can't be reused.

Differential Revision: https://reviews.llvm.org/D62365

llvm-svn: 361926
---
 lld/ELF/Relocations.cpp              | 19 +++++++++---
 lld/ELF/Relocations.h                |  3 +-
 lld/test/ELF/partition-thunk-reuse.s | 45 ++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 5 deletions(-)
 create mode 100644 lld/test/ELF/partition-thunk-reuse.s

diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index aab5385dad03d..b02e1cc2cae13 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1607,12 +1607,22 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS,
                                             InputSectionDescription *ISD,
                                             uint64_t Off) {
   auto *TS = make<ThunkSection>(OS, Off);
+  TS->Partition = OS->Partition;
   ISD->ThunkSections.push_back({TS, Pass});
   return TS;
 }
 
-std::pair<Thunk *, bool> ThunkCreator::getThunk(Symbol &Sym, RelType Type,
-                                                uint64_t Src) {
+static bool isThunkSectionCompatible(InputSection *Source,
+                                     SectionBase *Target) {
+  // We can't reuse thunks in different loadable partitions because they might
+  // not be loaded. But partition 1 (the main partition) will always be loaded.
+  if (Source->Partition != Target->Partition)
+    return Target->Partition == 1;
+  return true;
+}
+
+std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *IS, Symbol &Sym,
+                                                RelType Type, uint64_t Src) {
   std::vector<Thunk *> *ThunkVec = nullptr;
 
   // We use (section, offset) pair to find the thunk position if possible so
@@ -1625,7 +1635,8 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(Symbol &Sym, RelType Type,
 
   // Check existing Thunks for Sym to see if they can be reused
   for (Thunk *T : *ThunkVec)
-    if (T->isCompatibleWith(Type) &&
+    if (isThunkSectionCompatible(IS, T->getThunkTargetSym()->Section) &&
+        T->isCompatibleWith(Type) &&
         Target->inBranchRange(Type, Src, T->getThunkTargetSym()->getVA()))
       return std::make_pair(T, false);
 
@@ -1709,7 +1720,7 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> OutputSections) {
 
             Thunk *T;
             bool IsNew;
-            std::tie(T, IsNew) = getThunk(*Rel.Sym, Rel.Type, Src);
+            std::tie(T, IsNew) = getThunk(IS, *Rel.Sym, Rel.Type, Src);
 
             if (IsNew) {
               // Find or create a ThunkSection for the new Thunk
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 923aa4661a394..00156f5c3731b 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -136,7 +136,8 @@ class ThunkCreator {
 
   void createInitialThunkSections(ArrayRef<OutputSection *> OutputSections);
 
-  std::pair<Thunk *, bool> getThunk(Symbol &Sym, RelType Type, uint64_t Src);
+  std::pair<Thunk *, bool> getThunk(InputSection *IS, Symbol &Sym, RelType Type,
+                                    uint64_t Src);
 
   ThunkSection *addThunkSection(OutputSection *OS, InputSectionDescription *,
                                 uint64_t Off);
diff --git a/lld/test/ELF/partition-thunk-reuse.s b/lld/test/ELF/partition-thunk-reuse.s
new file mode 100644
index 0000000000000..6425bdf345b03
--- /dev/null
+++ b/lld/test/ELF/partition-thunk-reuse.s
@@ -0,0 +1,45 @@
+// REQUIRES: arm
+// RUN: llvm-mc %s -o %t.o -filetype=obj --triple=armv7-unknown-linux -arm-add-build-attributes
+// RUN: ld.lld %t.o -o %t --export-dynamic --gc-sections
+// RUN: llvm-nm %t | FileCheck %s
+
+// CHECK: __Thumbv7ABSLongThunk__start
+// CHECK: __Thumbv7ABSLongThunk__start
+
+// CHECK: __Thumbv7ABSLongThunk_foo
+// CHECK-NOT: __Thumbv7ABSLongThunk_foo
+
+.thumb
+
+.section .llvm_sympart.g1,"",%llvm_sympart
+.asciz "part1"
+.4byte f1
+
+.section .llvm_sympart.g2,"",%llvm_sympart
+.asciz "part2"
+.4byte f2
+
+.section .text._start,"ax",%progbits
+.globl _start
+_start:
+bx lr
+foo:
+b f0
+.zero 17*1048576
+
+.section .text.f0,"ax",%progbits
+.globl f0
+f0:
+b foo
+
+.section .text.f1,"aw",%progbits
+.globl f1
+f1:
+b _start
+b foo
+
+.section .text.f2,"ax",%progbits
+.globl f2
+f2:
+b _start
+b foo

From c77aff7e170ae6f50a403f459a9dcba211b0d4c8 Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Wed, 29 May 2019 04:09:32 +0000
Subject: [PATCH 0455/1176] Inline a variable into debug section to fix unused
 variable warning.

llvm-svn: 361927
---
 llvm/lib/CodeGen/MachinePipeliner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 61441d909388d..af159f1c45514 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -3963,10 +3963,10 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) {
                   STI->getWriteProcResEnd(SCDesc))) {
     if (!PRE.Cycles)
       continue;
-    const MCProcResourceDesc *ProcResource =
-        SM.getProcResource(PRE.ProcResourceIdx);
     ++ProcResourceCount[PRE.ProcResourceIdx];
     LLVM_DEBUG({
+      const MCProcResourceDesc *ProcResource =
+          SM.getProcResource(PRE.ProcResourceIdx);
       dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
                        ProcResource->Name, PRE.ProcResourceIdx,
                        ProcResourceCount[PRE.ProcResourceIdx],

From b3bcbb5b6608043645219e6b9f0af76f5c8d0890 Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <wuzish@cn.ibm.com>
Date: Wed, 29 May 2019 05:17:03 +0000
Subject: [PATCH 0456/1176] [PowerPC] [Clang] Port SSE intrinsics to PowerPC

Port xmmintrin.h which include Intel SSE intrinsics implementation to PowerPC platform (using Altivec).

The new headers containing those implemenations are located into a directory named ppc_wrappers
which has higher priority when the platform is PowerPC on Linux. They are mainly developed by Steven Munroe,
with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu.

Patched by: Qiu Chaofan <qiucf@cn.ibm.com>
Reviewed By: Jinsong Ji

Differential Revision: https://reviews.llvm.org/D62121

llvm-svn: 361928
---
 clang/lib/Headers/CMakeLists.txt           |    2 +
 clang/lib/Headers/ppc_wrappers/mm_malloc.h |   48 +
 clang/lib/Headers/ppc_wrappers/xmmintrin.h | 1838 +++++++++++++++++
 clang/test/CodeGen/ppc-mm-malloc.c         |   71 +
 clang/test/CodeGen/ppc-mmintrin.c          |    1 +
 clang/test/CodeGen/ppc-xmmintrin.c         | 2090 ++++++++++++++++++++
 clang/test/Headers/ppc-intrinsics.c        |   13 -
 clang/test/Headers/ppc-mmx-intrinsics.c    |   11 +
 clang/test/Headers/ppc-sse-intrinsics.c    |   20 +
 9 files changed, 4081 insertions(+), 13 deletions(-)
 create mode 100644 clang/lib/Headers/ppc_wrappers/mm_malloc.h
 create mode 100644 clang/lib/Headers/ppc_wrappers/xmmintrin.h
 create mode 100644 clang/test/CodeGen/ppc-mm-malloc.c
 create mode 100644 clang/test/CodeGen/ppc-xmmintrin.c
 delete mode 100644 clang/test/Headers/ppc-intrinsics.c
 create mode 100644 clang/test/Headers/ppc-mmx-intrinsics.c
 create mode 100644 clang/test/Headers/ppc-sse-intrinsics.c

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 392ca2ae391c9..f7a3e5410ced5 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -126,6 +126,8 @@ set(cuda_wrapper_files
 
 set(ppc_wrapper_files
   ppc_wrappers/mmintrin.h
+  ppc_wrappers/xmmintrin.h
+  ppc_wrappers/mm_malloc.h
 )
 
 set(openmp_wrapper_files
diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
new file mode 100644
index 0000000000000..36589194b3e2f
--- /dev/null
+++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
@@ -0,0 +1,48 @@
+/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+   may not be visible.  */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+  /* PowerPC64 ELF V2 ABI requires quadword alignment.  */
+  size_t vec_align = sizeof (__vector float);
+  /* Linux GLIBC malloc alignment is at least 2 X ptr size.  */
+  size_t malloc_align = (sizeof (void *) + sizeof (void *));
+  void *ptr;
+
+  if (alignment == malloc_align && alignment == vec_align)
+    return malloc (size);
+  if (alignment < vec_align)
+    alignment = vec_align;
+  if (posix_memalign (&ptr, alignment, size) == 0)
+    return ptr;
+  else
+    return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+  free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
new file mode 100644
index 0000000000000..1b322b66519a6
--- /dev/null
+++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
@@ -0,0 +1,1838 @@
+/*===---- xmmintrin.h - Implementation of SSE intrinsics on PowerPC --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header file is to help porting code using Intel intrinsics
+   explicitly from x86_64 to powerpc64/powerpc64le.
+
+   Since X86 SSE intrinsics mainly handles __m128 type, PowerPC
+   VMX/VSX ISA is a good match for vector float SIMD operations.
+   However scalar float operations in vector (XMM) registers require
+   the POWER8 VSX ISA (2.07) level. There are differences for data
+   format and placement of float scalars in the vector register, which
+   require extra steps to match SSE scalar float semantics on POWER.
+
+   It should be noted that there's much difference between X86_64's
+   MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use
+   portable <fenv.h> instead of access MXSCR directly.
+
+   Most SSE scalar float intrinsic operations can be performed more
+   efficiently as C language float scalar operations or optimized to
+   use vector SIMD operations. We recommend this for new applications. */
+#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
+#endif
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+/* Define four value permute mask */
+#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
+
+#include <altivec.h>
+
+/* Avoid collisions between altivec.h and strict adherence to C++ and
+   C11 standards.  This should eventually be done inside altivec.h itself,
+   but only after testing a full distro build.  */
+#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
+				 (defined(__STDC_VERSION__) &&	\
+				  __STDC_VERSION__ >= 201112L))
+#undef vector
+#undef pixel
+#undef bool
+#endif
+
+/* We need type definitions from the MMX header file.  */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free ().  */
+#if __STDC_HOSTED__
+#include <mm_malloc.h>
+#endif
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
+				       __aligned__ (1)));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create an undefined vector.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+  __m128 __Y = __Y;
+  return __Y;
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+  return ((__m128)vec_ld(0, (__v4sf*)__P));
+}
+
+/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+  return (vec_vsx_ld(0, __P));
+}
+
+/* Load four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+  __v4sf   __tmp;
+  __m128 result;
+  static const __vector unsigned char permute_vector =
+    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
+	0x17, 0x10, 0x11, 0x12, 0x13 };
+
+  __tmp = vec_ld (0, (__v4sf *) __P);
+  result = (__m128) vec_perm (__tmp, __tmp, permute_vector);
+  return result;
+}
+
+/* Create a vector with all four elements equal to F.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
+}
+
+/* Create the vector [Z Y X W].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Store four SPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+  vec_st((__v4sf)__A, 0, (__v4sf*)__P);
+}
+
+/* Store four SPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+  *(__m128_u *)__P = __A;
+}
+
+/* Store four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+  __v4sf   __tmp;
+  static const __vector unsigned char permute_vector =
+    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
+	0x17, 0x10, 0x11, 0x12, 0x13 };
+
+  __tmp = (__m128) vec_perm (__A, __A, permute_vector);
+
+  _mm_store_ps (__P, __tmp);
+}
+
+/* Store the lower SPFP value across four words.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = vec_splat((__v4sf)__A, 0);
+  _mm_store_ps (__P, __va);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+  _mm_store1_ps (__P, __A);
+}
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Sets the low SPFP value of A from the low value of B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+
+  return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask));
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+  return _mm_set_ss (*__P);
+}
+
+/* Stores the lower SPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+  *__P = ((__v4sf)__A)[0];
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+   floating-point) values of A and B; the upper three SPFP values are
+   passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a + b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] + __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a - b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] - __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a * b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] * __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a / b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] / __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = vec_sqrt (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+/* Perform the respective operation on the four SPFP values in A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A + (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A - (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A * (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A / (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+  return (vec_sqrt ((__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+  return (vec_re ((__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+  return (vec_rsqrte (__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = _mm_rcp_ps (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = vec_rsqrte (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+  __v4sf a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower float)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper float values) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf)__A, 0);
+  b = vec_splat ((__v4sf)__B, 0);
+  c = vec_min (a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+  __v4sf a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower float)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper float values) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = vec_max (a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+  __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, m);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+  __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, m);
+}
+
+/* Perform logical bit-wise operations on 128-bit values.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_and ((__v4sf)__A, (__v4sf)__B));
+//  return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_andc ((__v4sf)__B, (__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_or ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_xor ((__v4sf)__A, (__v4sf)__B));
+}
+
+/* Perform a comparison on the four SPFP values of A and B.  For each
+   element, if the comparison is true, place a mask of all ones in the
+   result, otherwise a mask of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpeq ((__v4sf)__A,(__v4sf) __B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128  __A, __m128  __B)
+{
+  __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B);
+  return ((__m128)vec_nor (temp, temp));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128  __A, __m128  __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
+  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
+  return ((__m128 ) vec_and (c, d));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
+  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
+  return ((__m128 ) vec_or (c, d));
+}
+
+/* Perform a comparison on the lower SPFP values of A and B.  If the
+   comparison is true, place a mask of all ones in the result, otherwise a
+   mask of zeros.  The upper three SPFP values are passed through from A.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128  __A, __m128  __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpeq(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmplt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmple(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpgt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpge(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpeq(a, b);
+  c = vec_nor (c, c);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpge(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpgt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmple(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we do the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmplt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
+  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
+  c = vec_and (c, d);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
+  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
+  c = vec_or (c, d);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+   and 0 if false.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] == __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] < __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] <= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] > __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] >= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] != __B[0]);
+}
+
+/* FIXME
+ * The __mm_ucomi??_ss implementations below are exactly the same as
+ * __mm_comi??_ss because GCC for PowerPC only generates unordered
+ * compares (scalar and vector).
+ * Technically __mm_comieq_ss et al should be using the ordered
+ * compare and signal for QNaNs.
+ * The __mm_ucomieq_sd et all should be OK, as is.
+ */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] == __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] < __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] <= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] > __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] >= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] != __B[0]);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+  return ((__v4sf)__A)[0];
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+   rounding mode.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+  __m64 res = 0;
+#ifdef _ARCH_PWR8
+  double dtmp;
+  __asm__(
+#ifdef __LITTLE_ENDIAN__
+      "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+      "xscvspdp %x2,%x0;\n"
+      "fctiw  %2,%2;\n"
+      "mfvsrd  %1,%x2;\n"
+      : "+wa" (__A),
+        "=r" (res),
+        "=f" (dtmp)
+      : );
+#else
+  res = __builtin_rint(__A[0]);
+#endif
+  return (res);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the
+   current rounding mode.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+  __m64 res = 0;
+#ifdef _ARCH_PWR8
+  double dtmp;
+  __asm__(
+#ifdef __LITTLE_ENDIAN__
+      "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+      "xscvspdp %x2,%x0;\n"
+      "fctid  %2,%2;\n"
+      "mfvsrd  %1,%x2;\n"
+      : "+wa" (__A),
+        "=r" (res),
+        "=f" (dtmp)
+      : );
+#else
+  res = __builtin_llrint(__A[0]);
+#endif
+  return (res);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+  return _mm_cvtss_si64 ((__v4sf) __A);
+}
+
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
+  _MM_HINT_ET0 = 7,
+  _MM_HINT_ET1 = 6,
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  /* Current PowerPC will ignores the hint parameters.  */
+  __builtin_prefetch (__P);
+}
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+   current rounding mode.  Return the integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+  /* Splat two lower SPFP values to both halves.  */
+  __v4sf temp, rounded;
+  __vector unsigned long long result;
+
+  /* Splat two lower SPFP values to both halves.  */
+  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  rounded = vec_rint(temp);
+  result = (__vector unsigned long long) vec_cts (rounded, 0);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
+}
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+/* Truncate the two lower SPFP values to 32-bit integers.  Return the
+   integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+  __v4sf temp;
+  __vector unsigned long long result;
+
+  /* Splat two lower SPFP values to both halves.  */
+  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  result = (__vector unsigned long long) vec_cts (temp, 0);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+  float temp = __B;
+  __A[0] = temp;
+
+  return __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+/* Intel intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+  float temp = __B;
+  __A[0] = temp;
+
+  return __A;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return _mm_cvtsi64_ss (__A, __B);
+}
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+   as the two lower elements in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128        __A, __m64        __B)
+{
+  __vector signed int vm1;
+  __vector float vf1;
+
+  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
+  vf1 = (__vector float) vec_ctf (vm1, 0);
+
+  return ((__m128) (__vector unsigned long long)
+    { ((__vector unsigned long long)vf1) [0],
+	((__vector unsigned long long)__A) [1]});
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+  __vector signed short vs8;
+  __vector signed int vi4;
+  __vector float vf1;
+
+  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
+  vi4 = vec_vupklsh (vs8);
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+  const __vector unsigned short zero =
+    { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __vector unsigned short vs8;
+  __vector unsigned int vi4;
+  __vector float vf1;
+
+  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
+  vi4 = (__vector unsigned int) vec_mergel
+#ifdef __LITTLE_ENDIAN__
+                                           (vs8, zero);
+#else
+                                           (zero, vs8);
+#endif
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+  __vector signed char vc16;
+  __vector signed short vs8;
+  __vector signed int vi4;
+  __vector float vf1;
+
+  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
+  vs8 = vec_vupkhsb (vc16);
+  vi4 = vec_vupkhsh (vs8);
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_cvtpu8_ps (__m64  __A)
+{
+  const __vector unsigned char zero =
+    { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __vector unsigned char vc16;
+  __vector unsigned short vs8;
+  __vector unsigned int vi4;
+  __vector float vf1;
+
+  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
+#ifdef __LITTLE_ENDIAN__
+  vs8 = (__vector unsigned short) vec_mergel (vc16, zero);
+  vi4 = (__vector unsigned int) vec_mergeh (vs8,
+					    (__vector unsigned short) zero);
+#else
+  vs8 = (__vector unsigned short) vec_mergel (zero, vc16);
+  vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero,
+                                            vs8);
+#endif
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps (__m64 __A, __m64 __B)
+{
+  __vector signed int vi4;
+  __vector float vf4;
+
+  vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
+  vf4 = (__vector float) vec_ctf (vi4, 0);
+  return (__m128) vf4;
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16 (__m128 __A)
+{
+  __v4sf rounded;
+  __vector signed int temp;
+  __vector unsigned long long result;
+
+  rounded = vec_rint(__A);
+  temp = vec_cts (rounded, 0);
+  result = (__vector unsigned long long) vec_pack (temp, temp);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8 (__m128 __A)
+{
+  __v4sf rounded;
+  __vector signed int tmp_i;
+  static const __vector signed int zero = {0, 0, 0, 0};
+  __vector signed short tmp_s;
+  __vector signed char res_v;
+
+  rounded = vec_rint(__A);
+  tmp_i = vec_cts (rounded, 0);
+  tmp_s = vec_pack (tmp_i, zero);
+  res_v = vec_pack (tmp_s, tmp_s);
+  return (__m64) ((__vector long long) res_v)[0];
+}
+
+/* Selects four specific SPFP values from A and B based on MASK.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_shuffle_ps (__m128  __A, __m128  __B, int const __mask)
+{
+  unsigned long element_selector_10 = __mask & 0x03;
+  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
+  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
+  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
+  static const unsigned int permute_selectors[4] =
+    {
+#ifdef __LITTLE_ENDIAN__
+      0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+#else
+      0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
+#endif
+    };
+  __vector unsigned int t;
+
+  t[0] = permute_selectors[element_selector_10];
+  t[1] = permute_selectors[element_selector_32];
+  t[2] = permute_selectors[element_selector_54] + 0x10101010;
+  t[3] = permute_selectors[element_selector_76] + 0x10101010;
+  return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t);
+}
+
+/* Selects and interleaves the upper two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_vmrglw ((__v4sf) __A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_vmrghw ((__v4sf) __A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+   the lower two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
+  __a [1] = __p [1];
+
+  return (__m128)__a;
+}
+
+/* Stores the upper two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
+
+  *__P = __a[1];
+}
+
+/* Moves the upper two values of B into the lower two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_mergel ((__vector unsigned long long)__B,
+			      (__vector unsigned long long)__A);
+}
+
+/* Moves the lower two values of B into the upper two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_mergeh ((__vector unsigned long long)__A,
+			      (__vector unsigned long long)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+   the upper two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
+  __a [0] = __p [0];
+
+  return (__m128)__a;
+}
+
+/* Stores the lower two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
+
+  *__P = __a[0];
+}
+
+#ifdef _ARCH_PWR8
+/* Intrinsic functions that require PowerISA 2.07 minimum.  */
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128  __A)
+{
+  __vector unsigned long long result;
+  static const __vector unsigned int perm_mask =
+    {
+#ifdef __LITTLE_ENDIAN__
+	0x00204060, 0x80808080, 0x80808080, 0x80808080
+#else
+      0x80808080, 0x80808080, 0x80808080, 0x00204060
+#endif
+    };
+
+  result = ((__vector unsigned long long)
+	    vec_vbpermq ((__vector unsigned char) __A,
+			 (__vector unsigned char) perm_mask));
+
+#ifdef __LITTLE_ENDIAN__
+  return result[1];
+#else
+  return result[0];
+#endif
+}
+#endif /* _ARCH_PWR8 */
+
+/* Create a vector with all four elements equal to *P.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+  return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+  return _mm_load1_ps (__P);
+}
+
+/* Extracts one of the four words of A.  The selector N must be immediate.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+  unsigned int shiftr = __N & 3;
+#ifdef __BIG_ENDIAN__
+  shiftr = 3 - shiftr;
+#endif
+
+  return ((__A >> (shiftr * 16)) & 0xffff);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+  return _mm_extract_pi16 (__A, __N);
+}
+
+/* Inserts word D into one of four words of A.  The selector N must be
+   immediate.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+  const int shiftl = (__N & 3) * 16;
+  const __m64 shiftD = (const __m64) __D << shiftl;
+  const __m64 mask = 0xffffUL << shiftl;
+  __m64 result = (__A & (~mask)) | (shiftD & mask);
+
+  return (result);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
+}
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector signed short a, b, r;
+  __vector __bool short c;
+
+  a = (__vector signed short)vec_splats (__A);
+  b = (__vector signed short)vec_splats (__B);
+  c = (__vector __bool short)vec_cmpgt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+  res.as_short[0] =
+      (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
+  res.as_short[1] =
+      (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
+  res.as_short[2] =
+      (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
+  res.as_short[3] =
+      (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector unsigned char a, b, r;
+  __vector __bool char c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = (__vector __bool char)vec_cmpgt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+  long i;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+
+  for (i = 0; i < 8; i++)
+  res.as_char[i] =
+      ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ?
+	  m1.as_char[i] : m2.as_char[i];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector signed short a, b, r;
+  __vector __bool short c;
+
+  a = (__vector signed short)vec_splats (__A);
+  b = (__vector signed short)vec_splats (__B);
+  c = (__vector __bool short)vec_cmplt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+  res.as_short[0] =
+      (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
+  res.as_short[1] =
+      (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
+  res.as_short[2] =
+      (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
+  res.as_short[3] =
+      (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector unsigned char a, b, r;
+  __vector __bool char c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = (__vector __bool char)vec_cmplt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+  long i;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+
+  for (i = 0; i < 8; i++)
+  res.as_char[i] =
+      ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ?
+	  m1.as_char[i] : m2.as_char[i];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+  unsigned long long p =
+#ifdef __LITTLE_ENDIAN__
+                         0x0008101820283038UL; // permute control for sign bits
+#else
+                         0x3830282018100800UL; // permute control for sign bits
+#endif
+  return __builtin_bpermd (p, __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+   in B and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  __vector unsigned short a, b;
+  __vector unsigned short c;
+  __vector unsigned int w0, w1;
+  __vector unsigned char xform1 = {
+#ifdef __LITTLE_ENDIAN__
+      0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
+      0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
+#else
+      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15,
+      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15
+#endif
+    };
+
+  a = (__vector unsigned short)vec_splats (__A);
+  b = (__vector unsigned short)vec_splats (__B);
+
+  w0 = vec_vmuleuh (a, b);
+  w1 = vec_vmulouh (a, b);
+  c = (__vector unsigned short)vec_perm (w0, w1, xform1);
+
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+  unsigned long element_selector_10 = __N & 0x03;
+  unsigned long element_selector_32 = (__N >> 2) & 0x03;
+  unsigned long element_selector_54 = (__N >> 4) & 0x03;
+  unsigned long element_selector_76 = (__N >> 6) & 0x03;
+  static const unsigned short permute_selectors[4] =
+    {
+#ifdef __LITTLE_ENDIAN__
+	      0x0908, 0x0B0A, 0x0D0C, 0x0F0E
+#else
+	      0x0607, 0x0405, 0x0203, 0x0001
+#endif
+    };
+  __m64_union t;
+  __vector unsigned long long a, p, r;
+
+#ifdef __LITTLE_ENDIAN__
+  t.as_short[0] = permute_selectors[element_selector_10];
+  t.as_short[1] = permute_selectors[element_selector_32];
+  t.as_short[2] = permute_selectors[element_selector_54];
+  t.as_short[3] = permute_selectors[element_selector_76];
+#else
+  t.as_short[3] = permute_selectors[element_selector_10];
+  t.as_short[2] = permute_selectors[element_selector_32];
+  t.as_short[1] = permute_selectors[element_selector_54];
+  t.as_short[0] = permute_selectors[element_selector_76];
+#endif
+  p = vec_splats (t.as_m64);
+  a = vec_splats (__A);
+  r = vec_perm (a, a, (__vector unsigned char)p);
+  return (__m64) ((__vector long long) r)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
+}
+
+/* Conditionally store byte elements of A into P.  The high bit of each
+   byte in the selector N determines whether the corresponding byte from
+   A is stored.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+  __m64 hibit = 0x8080808080808080UL;
+  __m64 mask, tmp;
+  __m64 *p = (__m64*)__P;
+
+  tmp = *p;
+  mask = _mm_cmpeq_pi8 ((__N & hibit), hibit);
+  tmp = (tmp & (~mask)) | (__A & mask);
+  *p = tmp;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  __vector unsigned char a, b, c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = vec_avg (a, b);
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  __vector unsigned short a, b, c;
+
+  a = (__vector unsigned short)vec_splats (__A);
+  b = (__vector unsigned short)vec_splats (__B);
+  c = vec_avg (a, b);
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+extern __inline    __m64    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64  __A, __m64  __B)
+{
+  __vector unsigned char a, b;
+  __vector unsigned char vmin, vmax, vabsdiff;
+  __vector signed int vsum;
+  const __vector unsigned int zero =
+    { 0, 0, 0, 0 };
+  __m64_union result = {0};
+
+  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
+  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
+  vmin = vec_min (a, b);
+  vmax = vec_max (a, b);
+  vabsdiff = vec_sub (vmax, vmin);
+  /* Sum four groups of bytes into integers.  */
+  vsum = (__vector signed int) vec_sum4s (vabsdiff, zero);
+  /* Sum across four integers with integer result.  */
+  vsum = vec_sums (vsum, (__vector signed int) zero);
+  /* The sum is in the right most 32-bits of the vector result.
+     Transfer to a GPR and truncate to 16 bits.  */
+  result.as_short[0] = vsum[3];
+  return result.as_m64;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
+}
+
+/* Stores the data in A to the address P without polluting the caches.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+  /* Use the data cache block touch for store transient.  */
+  __asm__ (
+    "	dcbtstt	0,%0"
+    :
+    : "b" (__P)
+    : "memory"
+  );
+  *__P = __A;
+}
+
+/* Likewise.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+  /* Use the data cache block touch for store transient.  */
+  __asm__ (
+    "	dcbtstt	0,%0"
+    :
+    : "b" (__P)
+    : "memory"
+  );
+  _mm_store_ps (__P, __A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+   any subsequent store.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+  /* Generate a light weight sync.  */
+  __atomic_thread_fence (__ATOMIC_RELEASE);
+}
+
+/* The execution of the next instruction is delayed by an implementation
+   specific amount of time.  The instruction does not modify the
+   architectural state.  This is after the pop_options pragma because
+   it does not require SSE support in the processor--the encoding is a
+   nop on processors that do not support it.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  /* There is no exact match with this construct, but the following is
+     close to the desired effect.  */
+#if _ARCH_PWR8
+  /* On power8 and later processors we can depend on Program Priority
+     (PRI) and associated "very low" PPI setting.  Since we don't know
+     what PPI this thread is running at we: 1) save the current PRI
+     from the PPR SPR into a local GRP, 2) set the PRI to "very low*
+     via the special or 31,31,31 encoding. 3) issue an "isync" to
+     insure the PRI change takes effect before we execute any more
+     instructions.
+     Now we can execute a lwsync (release barrier) while we execute
+     this thread at "very low" PRI.  Finally we restore the original
+     PRI and continue execution.  */
+  unsigned long __PPR;
+
+  __asm__ volatile (
+    "	mfppr	%0;"
+    "   or 31,31,31;"
+    "   isync;"
+    "   lwsync;"
+    "   isync;"
+    "   mtppr	%0;"
+    : "=r" (__PPR)
+    :
+    : "memory"
+  );
+#else
+  /* For older processor where we may not even have Program Priority
+     controls we can only depend on Heavy Weight Sync.  */
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+#endif
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3].  */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
+do {									\
+  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
+  __v4sf __t0 = vec_vmrghw (__r0, __r1);			\
+  __v4sf __t1 = vec_vmrghw (__r2, __r3);			\
+  __v4sf __t2 = vec_vmrglw (__r0, __r1);			\
+  __v4sf __t3 = vec_vmrglw (__r2, __r3);			\
+  (row0) = (__v4sf)vec_mergeh ((__vector long long)__t0, 	\
+			       (__vector long long)__t1);	\
+  (row1) = (__v4sf)vec_mergel ((__vector long long)__t0,	\
+			       (__vector long long)__t1);	\
+  (row2) = (__v4sf)vec_mergeh ((__vector long long)__t2,	\
+			       (__vector long long)__t3);	\
+  (row3) = (__v4sf)vec_mergel ((__vector long long)__t2,	\
+			       (__vector long long)__t3);	\
+} while (0)
+
+/* For backward source compatibility.  */
+//# include <emmintrin.h>
+
+#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/clang/test/CodeGen/ppc-mm-malloc.c b/clang/test/CodeGen/ppc-mm-malloc.c
new file mode 100644
index 0000000000000..e0a20f81ee7d7
--- /dev/null
+++ b/clang/test/CodeGen/ppc-mm-malloc.c
@@ -0,0 +1,71 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// The stdlib.h included in mm_malloc.h references native system header
+// like: bits/libc-header-start.h or features.h, cross-compile it may
+// require installing target headers in build env, otherwise expecting
+// failures. So this test will focus on native build only.
+
+// RUN: %clang -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
+
+#include <mm_malloc.h>
+
+void __attribute__((noinline))
+test_mm_malloc() {
+  char *buf = _mm_malloc(100, 16);
+  _mm_free(buf);
+}
+
+// CHECK-LABEL: @test_mm_malloc
+
+// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
+// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
+// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
+// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
+// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG11]]:
+// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
+// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG16]]:
+// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
+// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG12]]:
+// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
+// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG23]]:
+// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
+// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
+// CHECK: [[REG24]]:
+// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
+// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG31]]:
+// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
+// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG32]]:
+// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG19]]:
+// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
+// CHECK-NEXT: ret i8* [[REG34]]
+
+// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
+// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
+// CHECK-NEXT: call void @free(i8* [[REG37]])
+// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/ppc-mmintrin.c b/clang/test/CodeGen/ppc-mmintrin.c
index 212a387ec35b8..65a44570c0264 100644
--- a/clang/test/CodeGen/ppc-mmintrin.c
+++ b/clang/test/CodeGen/ppc-mmintrin.c
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c
new file mode 100644
index 0000000000000..d2d0334a09446
--- /dev/null
+++ b/clang/test/CodeGen/ppc-xmmintrin.c
@@ -0,0 +1,2090 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+#include <xmmintrin.h>
+
+__m128 res, m1, m2;
+__m64 res64, ms[2];
+float fs[4];
+int i, i2;
+long long i64;
+
+// CHECK-LE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
+// CHECK-BE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2
+
+// CHECK-LE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
+// CHECK-BE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
+
+void __attribute__((noinline))
+test_add() {
+  res = _mm_add_ps(m1, m2);
+  res = _mm_add_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_add
+
+// CHECK: define available_externally <4 x float> @_mm_add_ps(<4 x float> [[REG1:[0-9a-zA-Z_%.]+]], <4 x float> [[REG2:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1]], <4 x float>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG2]], <4 x float>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG3]], align 16
+// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG4]], align 16
+// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG5]], [[REG6]]
+// CHECK-NEXT: ret <4 x float> [[REG7]]
+
+// CHECK: define available_externally <4 x float> @_mm_add_ss(<4 x float> [[REG8:[0-9a-zA-Z_%.]+]], <4 x float> [[REG9:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG8]], <4 x float>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG9]], <4 x float>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
+// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG12]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG11]], align 16
+// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG15]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG16]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
+// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
+// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG18]], [[REG19]]
+// CHECK-NEXT: store <4 x float> [[REG20]], <4 x float>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
+// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG21]], align 16
+// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG22]], <4 x float> [[REG23]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG24]]
+
+void __attribute__((noinline))
+test_avg() {
+  res64 = _mm_avg_pu16(ms[0], ms[1]);
+  res64 = _mm_avg_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_avg
+
+// CHECK: define available_externally i64 @_mm_avg_pu16
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG25]])
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG28]])
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG29]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG30]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG31]], <8 x i16> [[REG32]])
+// CHECK-NEXT: store <8 x i16> [[REG33]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG34]] to <2 x i64>
+// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG35]], i32 0
+// CHECK-NEXT: ret i64 [[REG36]]
+
+// CHECK: define available_externally i64 @_mm_avg_pu8
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG37]])
+// CHECK-NEXT: [[REG39:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG38]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG39]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG40]])
+// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG41]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG42]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG43]], <16 x i8> [[REG44]])
+// CHECK-NEXT: store <16 x i8> [[REG45]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG46]] to <2 x i64>
+// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG47]], i32 0
+// CHECK-NEXT: ret i64 [[REG48]]
+
+void __attribute__((noinline))
+test_alt_name_avg() {
+  res64 = _m_pavgw(ms[0], ms[1]);
+  res64 = _m_pavgb(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_avg
+
+// CHECK: define available_externally i64 @_m_pavgw
+// CHECK: [[REG49:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu16
+// CHECK-NEXT: ret i64 [[REG49]]
+
+// CHECK: define available_externally i64 @_m_pavgb
+// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu8
+// CHECK-NEXT: ret i64 [[REG50]]
+
+void __attribute__((noinline))
+test_cmp() {
+  res = _mm_cmpeq_ps(m1, m2);
+  res = _mm_cmpeq_ss(m1, m2);
+  res = _mm_cmpge_ps(m1, m2);
+  res = _mm_cmpge_ss(m1, m2);
+  res = _mm_cmpgt_ps(m1, m2);
+  res = _mm_cmpgt_ss(m1, m2);
+  res = _mm_cmple_ps(m1, m2);
+  res = _mm_cmple_ss(m1, m2);
+  res = _mm_cmplt_ps(m1, m2);
+  res = _mm_cmplt_ss(m1, m2);
+  res = _mm_cmpneq_ps(m1, m2);
+  res = _mm_cmpneq_ss(m1, m2);
+  res = _mm_cmpnge_ps(m1, m2);
+  res = _mm_cmpnge_ss(m1, m2);
+  res = _mm_cmpngt_ps(m1, m2);
+  res = _mm_cmpngt_ss(m1, m2);
+  res = _mm_cmpnle_ps(m1, m2);
+  res = _mm_cmpnle_ss(m1, m2);
+  res = _mm_cmpnlt_ps(m1, m2);
+  res = _mm_cmpnlt_ss(m1, m2);
+  res = _mm_cmpord_ps(m1, m2);
+  res = _mm_cmpord_ss(m1, m2);
+  res = _mm_cmpunord_ps(m1, m2);
+  res = _mm_cmpunord_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_cmp
+
+// CHECK: define available_externally <4 x float> @_mm_cmpeq_ps
+// CHECK: [[REG51:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG51]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG52]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpeq_ss
+// CHECK: [[REG53:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG53]], <4 x float>* [[REG54:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG55]], <4 x float>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG54]], align 16
+// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG56]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG57]], <4 x float> [[REG58]])
+// CHECK: [[REG59:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG59]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpge_ps
+// CHECK: [[REG60:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG60]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG61]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpge_ss
+// CHECK: [[REG62:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG62]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG64]], <4 x float>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
+// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG65]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG66]], <4 x float> [[REG67]])
+// CHECK: [[REG68:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG68]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpgt_ps
+// CHECK: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG69]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG70]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpgt_ss
+// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG71]], <4 x float>* [[REG72:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG73]], <4 x float>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG72]], align 16
+// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG74]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG75]], <4 x float> [[REG76]])
+// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG77]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmple_ps
+// CHECK: [[REG78:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG78]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG79]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmple_ss
+// CHECK: [[REG80:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG80]], <4 x float>* [[REG81:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG82:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG82]], <4 x float>* [[REG83:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG81]], align 16
+// CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG83]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG84]], <4 x float> [[REG85]])
+// CHECK: [[REG86:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG86]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmplt_ps
+// CHECK: [[REG87:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG87]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG88]]
+
+// CHECK: @_mm_cmplt_ss
+// CHECK: [[REG89:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG89]], <4 x float>* [[REG90:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG90]], align 16
+// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG93]], <4 x float> [[REG94]])
+// CHECK: [[REG95:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG95]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpneq_ps
+// CHECK: [[REG96:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG96]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG97]], <4 x float>* [[REG98:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
+// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
+// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_nor(float vector[4], float vector[4])(<4 x float> [[REG99]], <4 x float> [[REG100]])
+// CHECK-NEXT: ret <4 x float> [[REG101]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpneq_ss
+// CHECK: [[REG102:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG102]], <4 x float>* [[REG103:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG104:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG104]], <4 x float>* [[REG105:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG103]], align 16
+// CHECK-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG105]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG106]], <4 x float> [[REG107]])
+// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
+// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG108]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnge_ps
+// CHECK: [[REG109:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG109]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG110]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnge_ss
+// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG111]], <4 x float>* [[REG112:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG113:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG113]], <4 x float>* [[REG114:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG112]], align 16
+// CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG114]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG115]], <4 x float> [[REG116]])
+// CHECK: [[REG117:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG117]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpngt_ps
+// CHECK: [[REG118:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG118]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG119]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpngt_ss
+// CHECK: [[REG120:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG120]], <4 x float>* [[REG121:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG122:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG122]], <4 x float>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG121]], align 16
+// CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG123]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG124]], <4 x float> [[REG125]])
+// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG126]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnle_ps
+// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG127]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG128]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnle_ss
+// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG129]], <4 x float>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG131]], <4 x float>* [[REG132:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG130]], align 16
+// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG132]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG133]], <4 x float> [[REG134]])
+// CHECK: [[REG135:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG135]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ps
+// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG136]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG137]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ss
+// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG138]], <4 x float>* [[REG139:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG140]], <4 x float>* [[REG141:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG139]], align 16
+// CHECK-NEXT: [[REG143:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG141]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG142]], <4 x float> [[REG143]])
+// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG144]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpord_ps
+// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG145]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG146]], <4 x i32>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG148]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG149]], <4 x i32>* [[REG150:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG147]], align 16
+// CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG151]])
+// CHECK-NEXT: store <4 x i32> [[REG152]], <4 x i32>* [[REG153:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG150]], align 16
+// CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG154]])
+// CHECK-NEXT: store <4 x i32> [[REG155]], <4 x i32>* [[REG156:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG153]], align 16
+// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG156]], align 16
+// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> {{[0-9a-zA-Z_%.]+}}, <4 x i32> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG159]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG160]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpord_ss
+// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
+// CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG161]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG162]], <4 x i32>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG164:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
+// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG164]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG165]], <4 x i32>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG163]], align 16
+// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG167]])
+// CHECK-NEXT: store <4 x i32> [[REG168]], <4 x i32>* [[REG161:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG166]], align 16
+// CHECK-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG169]])
+// CHECK-NEXT: store <4 x i32> [[REG170]], <4 x i32>* [[REG171:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
+// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG171]], align 16
+// CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG172]], <4 x i32> [[REG173]])
+// CHECK-NEXT: store <4 x i32> [[REG174]], <4 x i32>* [[REG161]], align 16
+// CHECK: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
+// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG175]] to <4 x float>
+// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> [[REG176]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG177]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpunord_ps
+// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG178]])
+// CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG179]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG180]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG181]])
+// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG182]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG183]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG184]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG185]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG186]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG187]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG188:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG189:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG188]], <4 x i32> [[REG189]])
+// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG190]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG191]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpunord_ss
+// CHECK: [[REG192:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG192]])
+// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG193]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG194]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG195]])
+// CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG196]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG197]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG198]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG199]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG200:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG200]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG201]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG204:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG202]], <4 x i32> [[REG203]])
+// CHECK-NEXT: store <4 x i32> [[REG204]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG206]] to <4 x float>
+// CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG205]], <4 x float> [[REG207]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG208]]
+
+void __attribute__((noinline))
+test_comi() {
+  i = _mm_comieq_ss(m1, m2);
+  i = _mm_comige_ss(m1, m2);
+  i = _mm_comigt_ss(m1, m2);
+  i = _mm_comile_ss(m1, m2);
+  i = _mm_comilt_ss(m1, m2);
+  i = _mm_comineq_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_comi
+
+// CHECK: define available_externally signext i32 @_mm_comieq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG209]], i32 0
+// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG211]], i32 0
+// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG210]], [[REG212]]
+// CHECK-NEXT: [[REG214:[0-9a-zA-Z_%.]+]] = zext i1 [[REG213]] to i32
+// CHECK-NEXT: ret i32 [[REG214]]
+
+// CHECK: define available_externally signext i32 @_mm_comige_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG216:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG215]], i32 0
+// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG217]], i32 0
+// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG216]], [[REG218]]
+// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = zext i1 [[REG219]] to i32
+// CHECK-NEXT: ret i32 [[REG220]]
+
+// CHECK: define available_externally signext i32 @_mm_comigt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG222:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG221]], i32 0
+// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG223]], i32 0
+// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG222]], [[REG224]]
+// CHECK-NEXT: [[REG226:[0-9a-zA-Z_%.]+]] = zext i1 [[REG225]] to i32
+// CHECK-NEXT: ret i32 [[REG226]]
+
+// CHECK: define available_externally signext i32 @_mm_comile_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG227]], i32 0
+// CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG229]], i32 0
+// CHECK-NEXT: [[REG231:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG228]], [[REG230]]
+// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = zext i1 [[REG231]] to i32
+// CHECK-NEXT: ret i32 [[REG232]]
+
+// CHECK: define available_externally signext i32 @_mm_comilt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG233]], i32 0
+// CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG235]], i32 0
+// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG234]], [[REG236]]
+// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = zext i1 [[REG237]] to i32
+// CHECK-NEXT: ret i32 [[REG238]]
+
+// CHECK: define available_externally signext i32 @_mm_comineq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG239]], i32 0
+// CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG241]], i32 0
+// CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG240]], [[REG242]]
+// CHECK-NEXT: [[REG244:[0-9a-zA-Z_%.]+]] = zext i1 [[REG243]] to i32
+// CHECK-NEXT: ret i32 [[REG244]]
+
+void __attribute__((noinline))
+test_convert() {
+  res = _mm_cvt_pi2ps(m1, ms[1]);
+  res64 = _mm_cvt_ps2pi(m1);
+  res = _mm_cvt_si2ss(m1, i);
+  i = _mm_cvt_ss2si(m1);
+  res = _mm_cvtpi16_ps(ms[0]);
+  res = _mm_cvtpi32_ps(m1, ms[1]);
+  res = _mm_cvtpi32x2_ps(ms[0], ms[1]);
+  res = _mm_cvtpi8_ps(ms[0]);
+  res64 = _mm_cvtps_pi16(m1);
+  res64 = _mm_cvtps_pi32(m1);
+  res64 = _mm_cvtps_pi8(m1);
+  res = _mm_cvtpu16_ps(ms[0]);
+  res = _mm_cvtpu8_ps(ms[0]);
+  res = _mm_cvtsi32_ss(m1, i);
+  res = _mm_cvtsi64_ss(m1, i64);
+  fs[0] = _mm_cvtss_f32(m1);
+  i = _mm_cvtss_si32(m1);
+  i64 = _mm_cvtss_si64(m1);
+  res64 = _mm_cvtt_ps2pi(m1);
+  i = _mm_cvtt_ss2si(m1);
+  res64 = _mm_cvttps_pi32(m1);
+  i = _mm_cvttss_si32(m1);
+  i64 = _mm_cvttss_si64(m1);
+}
+
+// CHECK-LABEL: @test_convert
+
+// CHECK: define available_externally <4 x float> @_mm_cvt_pi2ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtpi32_ps(<4 x float> [[REG245]], i64 [[REG246]])
+// CHECK-NEXT: ret <4 x float> [[REG247]]
+
+// CHECK: define available_externally i64 @_mm_cvt_ps2pi
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtps_pi32(<4 x float> [[REG248]])
+// CHECK-NEXT: ret i64 [[REG249]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvt_si2ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtsi32_ss(<4 x float> [[REG250]], i32 signext [[REG251]])
+// CHECK-NEXT: ret <4 x float> [[REG252]]
+
+// CHECK: define available_externally signext i32 @_mm_cvt_ss2si
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtss_si32(<4 x float> [[REG253]])
+// CHECK-NEXT: ret i32 [[REG254]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi16_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG255]], i32 0
+// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG256]], i64 [[REG257]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG258]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG259]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG260]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupklsh(short vector[8])(<8 x i16> [[REG261]])
+// CHECK-NEXT: store <4 x i32> [[REG262]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG263]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG264]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG265]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi32_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
+// CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG270]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG272]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG273]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG274]] to <2 x i64>
+// CHECK-NEXT: [[REG276:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG275]], i32 0
+// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG276]], i32 0
+// CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG278]] to <2 x i64>
+// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG279]], i32 1
+// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG277]], i64 [[REG280]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG281]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG283:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG282]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG283]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi32x2_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG284]], i32 0
+// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG287:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG285]], i64 [[REG286]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG287]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG288]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG289]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG290]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG291]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG292]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi8_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG293]], i32 0
+// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG294]], i64 [[REG295]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG296]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG298]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG300:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_vupkhsb(signed char vector[16])(<16 x i8> [[REG299]])
+// CHECK-NEXT: store <8 x i16> [[REG300]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupkhsh(short vector[8])(<8 x i16> [[REG301]])
+// CHECK-NEXT: store <4 x i32> [[REG302]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG303]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG304]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG305]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi16
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG307:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG306]])
+// CHECK-NEXT: store <4 x float> [[REG307]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG308]], i32 0)
+// CHECK-NEXT: store <4 x i32> [[REG309]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG310]], <4 x i32> [[REG311]])
+// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG312]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG313]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG314]], i32 0
+// CHECK-NEXT: ret i64 [[REG315]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG316:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG316]] to <2 x i64>
+// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG317]], i32 zeroext 0)
+// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG318]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG319]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG320]])
+// CHECK-NEXT: store <4 x float> [[REG321]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG322:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG322]], i32 0)
+// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG323]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG324]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG325]], i32 0
+// CHECK-NEXT: ret i64 [[REG326]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi8
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG327]])
+// CHECK-NEXT: store <4 x float> [[REG328]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG329]], i32 0)
+// CHECK-NEXT: store <4 x i32> [[REG330]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG331]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <8 x i16> [[REG332]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(short vector[8], short vector[8])(<8 x i16> [[REG333]], <8 x i16> [[REG334]])
+// CHECK-NEXT: store <16 x i8> [[REG335]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG336]] to <2 x i64>
+// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG337]], i32 0
+// CHECK-NEXT: ret i64 [[REG338]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpu16_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG339]], i32 0
+// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG340]], i64 [[REG341]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG342]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG343]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG344]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG345]], <8 x i16> zeroinitializer)
+// CHECK-BE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG345]])
+// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG346]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG347]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG348]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG349]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG350]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpu8_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG351]], i32 0
+// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG354:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG352]], i64 [[REG353]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG354]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG355]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG356]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG357]], <16 x i8> zeroinitializer)
+// CHECK-BE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> zeroinitializer, <16 x i8> [[REG357]])
+// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG358]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG359]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG360]], <8 x i16> zeroinitializer)
+// CHECK-BE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG360]])
+// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG361]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG362]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG363]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG364]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG365]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtsi32_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG366]] to float
+// CHECK-NEXT: store float [[REG367]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG369]], float [[REG368]], i32 0
+// CHECK-NEXT: store <4 x float> [[REG370]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG371]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtsi64_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG372]] to float
+// CHECK-NEXT: store float [[REG373]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG375]], float [[REG374]], i32 0
+// CHECK-NEXT: store <4 x float> [[REG376]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG377]]
+
+// CHECK: define available_externally float @_mm_cvtss_f32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG378]], i32 0
+// CHECK-NEXT: ret float [[REG379]]
+
+// CHECK: define available_externally signext i32 @_mm_cvtss_si32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
+// CHECK-BE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
+// CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 0
+// CHECK-NEXT: [[REG383:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 1
+// CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 2
+// CHECK-NEXT: store <4 x float> [[REG382]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 [[REG383]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store double [[REG384]], double* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG385:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG386:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG385]] to i32
+// CHECK-NEXT: ret i32 [[REG386]]
+
+// CHECK: define available_externally i64 @_mm_cvtss_si64
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
+// CHECK-BE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
+// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 0
+// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 1
+// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 2
+// CHECK-NEXT: store <4 x float> [[REG389]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 [[REG390]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store double [[REG391]], double* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG392]]
+
+// CHECK: define available_externally i64 @_mm_cvtt_ps2pi
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK: [[REG393:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttps_pi32(<4 x float> [[REG393]])
+// CHECK-NEXT: ret i64 [[REG394]]
+
+// CHECK: define available_externally signext i32 @_mm_cvtt_ss2si
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvttss_si32(<4 x float> [[REG395]])
+// CHECK-NEXT: ret i32 [[REG396]]
+
+// CHECK: define available_externally i64 @_mm_cvttps_pi32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG397]] to <2 x i64>
+// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG398]], i32 zeroext 0)
+// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG400]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG401]], i32 0)
+// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG402]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG403]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG404]], i32 0
+// CHECK-NEXT: ret i64 [[REG405]]
+
+// CHECK: define available_externally signext i32 @_mm_cvttss_si32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG407:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG406]], i32 0
+// CHECK-NEXT: store float [[REG407]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = fptosi float [[REG408]] to i32
+// CHECK-NEXT: ret i32 [[REG409]]
+
+// CHECK: define available_externally i64 @_mm_cvttss_si64
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG410]], i32 0
+// CHECK-NEXT: store float [[REG411]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = fptosi float [[REG412]] to i64
+// CHECK-NEXT: ret i64 [[REG413]]
+
+void __attribute__((noinline))
+test_div() {
+  res = _mm_div_ps(m1, m2);
+  res = _mm_div_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_div
+
+// CHECK: define available_externally <4 x float> @_mm_div_ps(<4 x float> [[REG414:[0-9a-zA-Z_%.]+]], <4 x float> [[REG415:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG414]], <4 x float>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG415]], <4 x float>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG416]], align 16
+// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG417]], align 16
+// CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG418]], [[REG419]]
+// CHECK-NEXT: ret <4 x float> [[REG420]]
+
+// CHECK: define available_externally <4 x float> @_mm_div_ss(<4 x float> [[REG421:[0-9a-zA-Z_%.]+]], <4 x float> [[REG422:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG421]], <4 x float>* [[REG423:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG422]], <4 x float>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
+// CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG425]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG426]], <4 x float>* [[REG427:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG424]], align 16
+// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG428]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG429]], <4 x float>* [[REG430:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG431:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG427]], align 16
+// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG430]], align 16
+// CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG431]], [[REG432]]
+// CHECK-NEXT: store <4 x float> [[REG433]], <4 x float>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
+// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG434]], align 16
+// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG435]], <4 x float> [[REG436]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG437]]
+
+void __attribute__((noinline))
+test_extract() {
+  i = _mm_extract_pi16(ms[0], i2);
+  i = _m_pextrw(ms[0], i2);
+}
+
+// CHECK-LABEL: @test_extract
+
+// CHECK: define available_externally signext i32 @_mm_extract_pi16
+// CHECK: [[REG438:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG439:[0-9a-zA-Z_%.]+]] = and i32 [[REG438]], 3
+// CHECK-NEXT: store i32 [[REG439]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-BE: sub i32 3, {{[0-9a-zA-Z_%.]+}}
+// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = mul i32 [[REG441]], 16
+// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = zext i32 [[REG442]] to i64
+// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = lshr i64 [[REG440]], [[REG443]]
+// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = and i64 [[REG444]], 65535
+// CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG445]] to i32
+// CHECK-NEXT: ret i32 [[REG446]]
+
+// CHECK: define available_externally signext i32 @_m_pextrw
+// CHECK: [[REG447:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_extract_pi16
+// CHECK-NEXT: ret i32 [[REG447]]
+
+void __attribute__((noinline))
+test_insert() {
+  res64 = _mm_insert_pi16(ms[0], i, i2);
+  res64 = _m_pinsrw(ms[0], i, i2);
+}
+
+// CHECK-LABEL: @test_insert
+
+// CHECK: define available_externally i64 @_mm_insert_pi16
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = and i32 [[REG448]], 3
+// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG449]], 16
+// CHECK-NEXT: store i32 [[REG450]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = sext i32 [[REG451]] to i64
+// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = zext i32 [[REG453]] to i64
+// CHECK-NEXT: [[REG455:[0-9a-zA-Z_%.]+]] = shl i64 [[REG452]], [[REG454]]
+// CHECK-NEXT: store i64 [[REG455]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = zext i32 [[REG456]] to i64
+// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = shl i64 65535, [[REG457]]
+// CHECK-NEXT: store i64 [[REG458]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG461:[0-9a-zA-Z_%.]+]] = xor i64 [[REG460]], -1
+// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = and i64 [[REG459]], [[REG461]]
+// CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG464:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = and i64 [[REG463]], [[REG464]]
+// CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = or i64 [[REG462]], [[REG465]]
+// CHECK-NEXT: store i64 [[REG466]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG467]]
+
+// CHECK: define available_externally i64 @_m_pinsrw
+// CHECK: [[REG468:[0-9a-zA-Z_%.]+]] = call i64 @_mm_insert_pi16
+// CHECK-NEXT: ret i64 [[REG468]]
+
+void __attribute__((noinline))
+test_load() {
+  res = _mm_load_ps(fs);
+  res = _mm_load_ps1(fs);
+  res = _mm_load_ss(fs);
+  res = _mm_load1_ps(fs);
+  res = _mm_loadh_pi(m1, &ms[0]);
+  res = _mm_loadl_pi(m1, &ms[0]);
+  res = _mm_loadr_ps(fs);
+  res = _mm_loadu_ps(fs);
+}
+
+// CHECK-LABEL: @test_load
+
+// CHECK: define available_externally <4 x float> @_mm_load_ps
+// CHECK: [[REG469:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
+// CHECK-NEXT: ret <4 x float> [[REG469]]
+
+// CHECK: define available_externally <4 x float> @_mm_load_ps1
+// CHECK: [[REG470:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_load1_ps
+// CHECK-NEXT: ret <4 x float> [[REG470]]
+
+// CHECK: define available_externally <4 x float> @_mm_load_ss
+// CHECK: [[REG471:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set_ss
+// CHECK-NEXT: ret <4 x float> [[REG471]]
+
+// CHECK: define available_externally <4 x float> @_mm_load1_ps
+// CHECK: [[REG472:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps
+// CHECK-NEXT: ret <4 x float> [[REG472]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadh_pi
+// CHECK: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: store <2 x i64> [[REG473]], <2 x i64>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG474]], align 16
+// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG475]], i32 1
+// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG477]], i64 [[REG476]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG479]], <2 x i64>* [[REG478]], align 16
+// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478]], align 16
+// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG480]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG481]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadl_pi
+// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: store <2 x i64> [[REG482]], <2 x i64>* [[REG483:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG483]], align 16
+// CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
+// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG488:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG486]], i64 [[REG485]], i32 0
+// CHECK-NEXT: store <2 x i64> [[REG488]], <2 x i64>* [[REG487]], align 16
+// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487]], align 16
+// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG489]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG490]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadr_ps
+// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
+// CHECK-NEXT: store <4 x float> [[REG491]], <4 x float>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
+// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
+// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG493]], <4 x float> [[REG494]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+// CHECK-NEXT: store <4 x float> [[REG495]], <4 x float>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG496]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG497]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadu_ps
+// CHECK: [[REG498:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vsx_ld(int, float const*)
+// CHECK-NEXT: ret <4 x float> [[REG498]]
+
+void __attribute__((noinline))
+test_logic() {
+  res = _mm_or_ps(m1, m2);
+  res = _mm_and_ps(m1, m2);
+  res = _mm_andnot_ps(m1, m2);
+  res = _mm_xor_ps(m1, m2);
+}
+
+// CHECK-LABEL: @test_logic
+
+// CHECK: define available_externally <4 x float> @_mm_or_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_or(float vector[4], float vector[4])(<4 x float> [[REG499]], <4 x float> [[REG500]])
+// CHECK-NEXT: ret <4 x float> [[REG501]]
+
+// CHECK: define available_externally <4 x float> @_mm_and_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_and(float vector[4], float vector[4])(<4 x float> [[REG502]], <4 x float> [[REG503]])
+// CHECK-NEXT: ret <4 x float> [[REG504]]
+
+// CHECK: define available_externally <4 x float> @_mm_andnot_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_andc(float vector[4], float vector[4])(<4 x float> [[REG505]], <4 x float> [[REG506]])
+// CHECK-NEXT: ret <4 x float> [[REG507]]
+
+// CHECK: define available_externally <4 x float> @_mm_xor_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG508]], <4 x float> [[REG509]])
+// CHECK-NEXT: ret <4 x float> [[REG510]]
+
+void __attribute__((noinline))
+test_max() {
+  res = _mm_max_ps(m1, m2);
+  res = _mm_max_ss(m1, m2);
+  res64 = _mm_max_pi16(ms[0], ms[1]);
+  res64 = _mm_max_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_max
+
+// CHECK: define available_externally <4 x float> @_mm_max_ps(<4 x float> [[REG511:[0-9a-zA-Z_%.]+]], <4 x float> [[REG512:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG511]], <4 x float>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG512]], <4 x float>* [[REG514:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
+// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
+// CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG515]], <4 x float> [[REG516]])
+// CHECK-NEXT: store <4 x i32> [[REG517]], <4 x i32>* [[REG518:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
+// CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
+// CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG518]], align 16
+// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG519]], <4 x float> [[REG520]], <4 x i32> [[REG521]])
+// CHECK-NEXT: ret <4 x float> [[REG522]]
+
+// CHECK: define available_externally <4 x float> @_mm_max_ss
+// CHECK: [[REG523:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG523]], <4 x float>* [[REG524:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG525:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG525]], <4 x float>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG524]], align 16
+// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG526]], align 16
+// CHECK-NEXT: call <4 x float> @vec_max(float vector[4], float vector[4])(<4 x float> [[REG527]], <4 x float> [[REG528]])
+// CHECK: [[REG529:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG529]]
+
+// CHECK: define available_externally i64 @_mm_max_pi16
+// CHECK: [[REG530:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG530]])
+// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG531]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG532]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG533]])
+// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG534]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG535]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG536]], <8 x i16> [[REG537]])
+// CHECK-NEXT: store <8 x i16> [[REG538]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG540:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG539]], <8 x i16> [[REG540]], <8 x i16> [[REG541]])
+// CHECK-NEXT: store <8 x i16> [[REG542]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG543]] to <2 x i64>
+// CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG544]], i32 0
+// CHECK-NEXT: ret i64 [[REG545]]
+
+// CHECK: define available_externally i64 @_mm_max_pu8
+// CHECK: [[REG546:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG546]])
+// CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG547]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG548]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG549]])
+// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG551]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG552]], <16 x i8> [[REG553]])
+// CHECK-NEXT: store <16 x i8> [[REG554]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG555:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG556:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG557:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG555]], <16 x i8> [[REG556]], <16 x i8> [[REG557]])
+// CHECK-NEXT: store <16 x i8> [[REG558]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG559]] to <2 x i64>
+// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG560]], i32 0
+// CHECK-NEXT: ret i64 [[REG561]]
+
+void __attribute__((noinline))
+test_alt_name_max() {
+  res64 = _m_pmaxsw(ms[0], ms[1]);
+  res64 = _m_pmaxub(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_max
+
+// CHECK: define available_externally i64 @_m_pmaxsw
+// CHECK: [[REG562:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pi16
+// CHECK-NEXT: ret i64 [[REG562]]
+
+// CHECK: define available_externally i64 @_m_pmaxub
+// CHECK: [[REG563:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pu8
+// CHECK-NEXT: ret i64 [[REG563]]
+
+void __attribute__((noinline))
+test_min() {
+  res = _mm_min_ps(m1, m2);
+  res = _mm_min_ss(m1, m2);
+  res64 = _mm_min_pi16(ms[0], ms[1]);
+  res64 = _mm_min_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_min
+
+// CHECK: define available_externally <4 x float> @_mm_min_ps(<4 x float> [[REG517:[0-9a-zA-Z_%.]+]], <4 x float> [[REG518:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG517]], <4 x float>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG518]], <4 x float>* [[REG565:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
+// CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
+// CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG566]], <4 x float> [[REG567]])
+// CHECK-NEXT: store <4 x i32> [[REG568]], <4 x i32>* [[REG569:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
+// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
+// CHECK-NEXT: [[REG572:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG569]], align 16
+// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG570]], <4 x float> [[REG571]], <4 x i32> [[REG572]])
+// CHECK-NEXT: ret <4 x float> [[REG573]]
+
+// CHECK: define available_externally <4 x float> @_mm_min_ss
+// CHECK: [[REG574:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG574]], <4 x float>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG576:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG576]], <4 x float>* [[REG577:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG575]], align 16
+// CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG577]], align 16
+// CHECK-NEXT: call <4 x float> @vec_min(float vector[4], float vector[4])(<4 x float> [[REG578]], <4 x float> [[REG579]])
+// CHECK: [[REG580:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG580]]
+
+// CHECK: define available_externally i64 @_mm_min_pi16
+// CHECK: [[REG581:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG581]])
+// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG582]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG583]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG584]])
+// CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG585]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG586]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG588:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG589:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> [[REG587]], <8 x i16> [[REG588]])
+// CHECK-NEXT: store <8 x i16> [[REG589]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG590]], <8 x i16> [[REG591]], <8 x i16> [[REG592]])
+// CHECK-NEXT: store <8 x i16> [[REG593]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG594:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG594]] to <2 x i64>
+// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG595]], i32 0
+// CHECK-NEXT: ret i64 [[REG596]]
+
+// CHECK: define available_externally i64 @_mm_min_pu8
+// CHECK: [[REG597:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG597]])
+// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG598]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG599]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG600]])
+// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG601]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG602]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG603]], <16 x i8> [[REG604]])
+// CHECK-NEXT: store <16 x i8> [[REG605]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG606]], <16 x i8> [[REG607]], <16 x i8> [[REG608]])
+// CHECK-NEXT: store <16 x i8> [[REG609]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG610]] to <2 x i64>
+// CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG611]], i32 0
+// CHECK-NEXT: ret i64 [[REG612]]
+
+void __attribute__((noinline))
+test_alt_name_min() {
+  res64 = _m_pminsw(ms[0], ms[1]);
+  res64 = _m_pminub(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_min
+
+// CHECK: define available_externally i64 @_m_pminsw
+// CHECK: [[REG613:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pi16
+// CHECK-NEXT: ret i64 [[REG613]]
+
+// CHECK: define available_externally i64 @_m_pminub
+// CHECK: [[REG614:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pu8
+// CHECK-NEXT: ret i64 [[REG614]]
+
+void __attribute__((noinline))
+test_move() {
+  _mm_maskmove_si64(ms[0], ms[1], (char *)&res64);
+  res = _mm_move_ss(m1, m2);
+  res = _mm_movehl_ps(m1, m2);
+  res = _mm_movelh_ps(m1, m2);
+  i = _mm_movemask_pi8(ms[0]);
+  i = _mm_movemask_ps(m1);
+}
+
+// CHECK-LABEL: @test_move
+
+// CHECK: define available_externally void @_mm_maskmove_si64
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 -9187201950435737472, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG615:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG615]] to i64*
+// CHECK-NEXT: store i64* [[REG616]], i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG618:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
+// CHECK-NEXT: store i64 [[REG618]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG619:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = and i64 [[REG619]], [[REG620]]
+// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8(i64 [[REG621]], i64 [[REG622]])
+// CHECK-NEXT: store i64 [[REG623]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = xor i64 [[REG625]], -1
+// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = and i64 [[REG624]], [[REG626]]
+// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = and i64 [[REG628]], [[REG629]]
+// CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = or i64 [[REG627]], [[REG630]]
+// CHECK-NEXT: store i64 [[REG631]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG632]], i64* [[REG633]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally <4 x float> @_mm_move_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG634]], <4 x float> [[REG635]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG636]]
+
+// CHECK: define available_externally <4 x float> @_mm_movehl_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG637]] to <2 x i64>
+// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG639]] to <2 x i64>
+// CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG638]], <2 x i64> [[REG640]])
+// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG641]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG642]]
+
+// CHECK: define available_externally <4 x float> @_mm_movelh_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG643]] to <2 x i64>
+// CHECK-NEXT: [[REG645:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG645]] to <2 x i64>
+// CHECK-NEXT: [[REG647:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG644]], <2 x i64> [[REG646]])
+// CHECK-NEXT: [[REG648:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG647]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG648]]
+
+// CHECK: define available_externally signext i32 @_mm_movemask_pi8
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-LE-NEXT: store i64 2269495618449464, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-BE-NEXT: store i64 4048780183313844224, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = call i64 @llvm.ppc.bpermd(i64 [[REG649]], i64 [[REG650]])
+// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG651]] to i32
+// CHECK-NEXT: ret i32 [[REG652]]
+
+// CHECK: define available_externally signext i32 @_mm_movemask_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG653]] to <16 x i8>
+// CHECK-LE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
+// CHECK-BE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
+// CHECK-NEXT: store <2 x i64> [[REG655]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 1
+// CHECK-BE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 0
+// CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG657]] to i32
+// CHECK-NEXT: ret i32 [[REG658]]
+
+void __attribute__((noinline))
+test_alt_name_move() {
+  i = _m_pmovmskb(ms[0]);
+  _m_maskmovq(ms[0], ms[1], (char *)&res64);
+}
+
+// CHECK-LABEL: @test_alt_name_move
+
+// CHECK: define available_externally signext i32 @_m_pmovmskb
+// CHECK: [[REG659:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_movemask_pi8
+// CHECK-NEXT: ret i32 [[REG659]]
+
+// CHECK: define available_externally void @_m_maskmovq
+// CHECK: call void @_mm_maskmove_si64
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_mul() {
+  res = _mm_mul_ps(m1, m2);
+  res = _mm_mul_ss(m1, m2);
+  res64 = _mm_mulhi_pu16(ms[0], ms[1]);
+  res64 = _m_pmulhuw(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_mul
+
+// CHECK: define available_externally <4 x float> @_mm_mul_ps(<4 x float> [[REG660:[0-9a-zA-Z_%.]+]], <4 x float> [[REG661:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG660]], <4 x float>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG661]], <4 x float>* [[REG663:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG662]], align 16
+// CHECK-NEXT: [[REG665:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG663]], align 16
+// CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG664]], [[REG665]]
+// CHECK-NEXT: ret <4 x float> [[REG666]]
+
+// CHECK: define available_externally <4 x float> @_mm_mul_ss(<4 x float> [[REG667:[0-9a-zA-Z_%.]+]], <4 x float> [[REG668:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG667]], <4 x float>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG668]], <4 x float>* [[REG670:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
+// CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG671]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG672]], <4 x float>* [[REG673:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG670]], align 16
+// CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG674]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG675]], <4 x float>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG673]], align 16
+// CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG676]], align 16
+// CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG677]], [[REG678]]
+// CHECK-NEXT: store <4 x float> [[REG679]], <4 x float>* [[REG680:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
+// CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG680]], align 16
+// CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG681]], <4 x float> [[REG682]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG683]]
+
+// CHECK: define available_externally i64 @_mm_mulhi_pu16(i64 [[REG684:[0-9a-zA-Z_%.]+]], i64 [[REG685:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG684]], i64* [[REG686:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG685]], i64* [[REG687:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG688:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG686]], align 8
+// CHECK-NEXT: [[REG689:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG688]])
+// CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG689]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG690]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG687]], align 8
+// CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG691]])
+// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG692]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG693]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG696:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> [[REG694]], <8 x i16> [[REG695]])
+// CHECK-NEXT: store <4 x i32> [[REG696]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> [[REG697]], <8 x i16> [[REG698]])
+// CHECK-NEXT: store <4 x i32> [[REG699]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> [[REG700]], <4 x i32> [[REG701]], <16 x i8> [[REG702]])
+// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG703]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG705]] to <2 x i64>
+// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG706]], i32 0
+// CHECK-NEXT: ret i64 [[REG707]]
+
+// CHECK: define available_externally i64 @_m_pmulhuw(i64 [[REG708:[0-9a-zA-Z_%.]+]], i64 [[REG709:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG708]], i64* [[REG710:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG709]], i64* [[REG711:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG712:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG710]], align 8
+// CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG711]], align 8
+// CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pu16(i64 [[REG712]], i64 [[REG713]])
+// CHECK-NEXT: ret i64 [[REG714]]
+
+void __attribute__((noinline))
+test_prefetch() {
+  _mm_prefetch(ms, i);
+}
+
+// CHECK-LABEL: @test_prefetch
+
+// CHECK: define available_externally void @_mm_prefetch
+// CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void @llvm.prefetch(i8* [[REG715]], i32 0, i32 3, i32 1)
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_rcp() {
+  res = _mm_rcp_ps(m1);
+  res = _mm_rcp_ss(m1);
+}
+
+// CHECK-LABEL: @test_rcp
+
+// CHECK: define available_externally <4 x float> @_mm_rcp_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_re(float vector[4])(<4 x float> [[REG716]])
+// CHECK-NEXT: ret <4 x float> [[REG717]]
+
+// CHECK: define available_externally <4 x float> @_mm_rcp_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG718]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG719]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG720:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_rcp_ps(<4 x float> [[REG720]])
+// CHECK-NEXT: store <4 x float> [[REG721]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG722]], <4 x float> [[REG723]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG724]]
+
+void __attribute__((noinline))
+test_rsqrt() {
+  res = _mm_rsqrt_ps(m1);
+  res = _mm_rsqrt_ss(m1);
+}
+
+// CHECK-LABEL: @test_rsqrt
+
+// CHECK: define available_externally <4 x float> @_mm_rsqrt_ps
+// CHECK: [[REG725:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: ret <4 x float> [[REG725]]
+
+// CHECK: define available_externally <4 x float> @_mm_rsqrt_ss
+// CHECK: [[REG726:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG726]], <4 x float>* [[REG727:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG727]], align 16
+// CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> [[REG728]])
+// CHECK-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG730:[0-9a-zA_Z_%.]+]], align 16
+// CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG730]], align 16
+// CHECK-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG731]], <4 x float> [[REG732]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG733]]
+
+void __attribute__((noinline))
+test_sad() {
+  res64 = _mm_sad_pu8(ms[0], ms[1]);
+  res64 = _m_psadbw(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_sad
+
+// CHECK: define available_externally i64 @_mm_sad_pu8(i64 [[REG734:[0-9a-zA-Z_%.]+]], i64 [[REG735:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG734]], i64* [[REG736:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG735]], i64* [[REG737:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[REG738]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG736]], align 8
+// CHECK-NEXT: [[REG740:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG739]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG740]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG742]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG737]], align 8
+// CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG743]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG744]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG745]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG746]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG747]], <16 x i8> [[REG748]])
+// CHECK-NEXT: store <16 x i8> [[REG749]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG750]], <16 x i8> [[REG751]])
+// CHECK-NEXT: store <16 x i8> [[REG752]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG754:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG753]], <16 x i8> [[REG754]])
+// CHECK-NEXT: store <16 x i8> [[REG755]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> [[REG756]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sums(<4 x i32> [[REG758]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <4 x i32> [[REG759]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG760]], i32 3
+// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG761]] to i16
+// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG763]], i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG762]], i16* [[REG764]], align 8
+// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
+// CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG765]], align 8
+// CHECK-NEXT: ret i64 [[REG766]]
+
+// CHECK: define available_externally i64 @_m_psadbw
+// CHECK: [[REG767:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sad_pu8
+// CHECK-NEXT: ret i64 [[REG767]]
+
+void __attribute__((noinline))
+test_set() {
+  res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]);
+  res = _mm_set_ps1(fs[0]);
+  res = _mm_set_ss(fs[0]);
+  res = _mm_set1_ps(fs[0]);
+  res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]);
+}
+
+// CHECK-LABEL: @test_set
+
+// CHECK: define available_externally <4 x float> @_mm_set_ps(float [[REG768:[0-9a-zA-Z_%.]+]], float [[REG769:[0-9a-zA-Z_%.]+]], float [[REG770:[0-9a-zA-Z_%.]+]], float [[REG771:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG768]], float* [[REG772:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG769]], float* [[REG773:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG770]], float* [[REG774:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG771]], float* [[REG775:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load float, float* [[REG775]], align 4
+// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG776]], i32 0
+// CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load float, float* [[REG774]], align 4
+// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG777]], float [[REG778]], i32 1
+// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load float, float* [[REG773]], align 4
+// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG779]], float [[REG780]], i32 2
+// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load float, float* [[REG772]], align 4
+// CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG781]], float [[REG782]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG783]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG784:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG784]]
+
+// CHECK: define available_externally <4 x float> @_mm_set_ps1(float [[REG785:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG785]], float* [[REG786:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = load float, float* [[REG786]], align 4
+// CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps(float [[REG787]])
+// CHECK-NEXT: ret <4 x float> [[REG788]]
+
+// CHECK: define available_externally <4 x float> @_mm_set_ss(float [[REG789:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG789:[0-9a-zA-Z_%.]+]], float* [[REG790:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = load float, float* [[REG790]], align 4
+// CHECK-NEXT: [[REG792:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG791]], i32 0
+// CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG792]], float 0.000000e+00, i32 1
+// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG793]], float 0.000000e+00, i32 2
+// CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG794]], float 0.000000e+00, i32 3
+// CHECK-NEXT: store <4 x float> [[REG795]], <4 x float>* [[REG796:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG796]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG797]]
+
+// CHECK: define available_externally <4 x float> @_mm_set1_ps(float [[REG798:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG798]], float* [[REG799:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG800]], i32 0
+// CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG801]], float [[REG802]], i32 1
+// CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG803]], float [[REG804]], i32 2
+// CHECK-NEXT: [[REG806:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG805]], float [[REG806]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG807]], <4 x float>* [[REG808:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG808]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG809]]
+
+// CHECK: define available_externally <4 x float> @_mm_setr_ps(float [[REG810:[0-9a-zA-Z_%.]+]], float [[REG811:[0-9a-zA-Z_%.]+]], float [[REG812:[0-9a-zA-Z_%.]+]], float [[REG813:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG810]], float* [[REG814:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG811]], float* [[REG815:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG812]], float* [[REG816:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG813]], float* [[REG817:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = load float, float* [[REG814]], align 4
+// CHECK-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG818]], i32 0
+// CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load float, float* [[REG815]], align 4
+// CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG819]], float [[REG820]], i32 1
+// CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load float, float* [[REG816]], align 4
+// CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG821]], float [[REG822]], i32 2
+// CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load float, float* [[REG817]], align 4
+// CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG823]], float [[REG824]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG825]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG826]]
+
+void __attribute__((noinline))
+test_setzero() {
+  res = _mm_setzero_ps();
+}
+
+// CHECK-LABEL: @test_setzero
+
+// CHECK: define available_externally <4 x float> @_mm_setzero_ps
+// CHECK: store <4 x float> zeroinitializer, <4 x float>* [[REG827:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG827]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG828]]
+
+void __attribute__((noinline))
+test_sfence() {
+  _mm_sfence();
+}
+
+// CHECK-LABEL: @test_sfence
+
+// CHECK: define available_externally void @_mm_sfence
+// CHECK: fence release
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_shuffle() {
+  res64 = _mm_shuffle_pi16(ms[0], i);
+  res = _mm_shuffle_ps(m1, m2, i);
+  res64 = _m_pshufw(ms[0], i);
+}
+
+// CHECK-LABEL: @test_shuffle
+
+// CHECK: define available_externally i64 @_mm_shuffle_pi16(i64 [[REG829:[0-9a-zA-Z_%.]+]], i32 signext [[REG830:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG829]], i64* [[REG831:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i32 [[REG830]], i32* [[REG832:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = and i32 [[REG833]], 3
+// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = sext i32 [[REG834]] to i64
+// CHECK-NEXT: store i64 [[REG835]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG836]], 2
+// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = and i32 [[REG837]], 3
+// CHECK-NEXT: [[REG839:[0-9a-zA-Z_%.]+]] = sext i32 [[REG838]] to i64
+// CHECK-NEXT: store i64 [[REG839]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG840:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG840]], 4
+// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = and i32 [[REG841]], 3
+// CHECK-NEXT: [[REG843:[0-9a-zA-Z_%.]+]] = sext i32 [[REG842]] to i64
+// CHECK-NEXT: store i64 [[REG843]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG844]], 6
+// CHECK-NEXT: [[REG846:[0-9a-zA-Z_%.]+]] = and i32 [[REG845]], 3
+// CHECK-NEXT: [[REG847:[0-9a-zA-Z_%.]+]] = sext i32 [[REG846]] to i64
+// CHECK-NEXT: store i64 [[REG847]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG848]]
+// CHECK-NEXT: [[REG850:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG849]], align 2
+// CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 0
+// CHECK-BE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 3
+// CHECK-NEXT: store i16 [[REG850]], i16* [[REG852]]
+// CHECK-NEXT: [[REG853:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG854:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG853]]
+// CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG854]], align 2
+// CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 1
+// CHECK-BE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 2
+// CHECK-NEXT: store i16 [[REG855]], i16* [[REG857]]
+// CHECK-NEXT: [[REG858:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG858]]
+// CHECK-NEXT: [[REG860:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG859]], align 2
+// CHECK-NEXT: [[REG861:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 2
+// CHECK-BE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 1
+// CHECK-NEXT: store i16 [[REG860]], i16* [[REG862]]
+// CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG863]]
+// CHECK-NEXT: [[REG865:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG864]], align 2
+// CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 3
+// CHECK-BE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG865]], i16* [[REG867]]
+// CHECK-NEXT: [[REG868:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
+// CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG868]], align 8
+// CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG869]])
+// CHECK-NEXT: store <2 x i64> [[REG870]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG871:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG831]], align 8
+// CHECK-NEXT: [[REG872:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG871]])
+// CHECK-NEXT: store <2 x i64> [[REG872]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG873:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG874:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG875]] to <16 x i8>
+// CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])(<2 x i64> [[REG873]], <2 x i64> [[REG874]], <16 x i8> [[REG876]])
+// CHECK-NEXT: store <2 x i64> [[REG877]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG878:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG879:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG878]], i32 0
+// CHECK-NEXT: ret i64 [[REG879]]
+
+// CHECK: define available_externally <4 x float> @_mm_shuffle_ps(<4 x float> [[REG880:[0-9a-zA-Z_%.]+]], <4 x float> [[REG881:[0-9a-zA-Z_%.]+]], i32 signext [[REG882:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG880]], <4 x float>* [[REG883:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG881]], <4 x float>* [[REG884:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store i32 [[REG882]], i32* [[REG885:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = and i32 [[REG886]], 3
+// CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = sext i32 [[REG887]] to i64
+// CHECK-NEXT: store i64 [[REG888]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG890:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG889]], 2
+// CHECK-NEXT: [[REG891:[0-9a-zA-Z_%.]+]] = and i32 [[REG890]], 3
+// CHECK-NEXT: [[REG892:[0-9a-zA-Z_%.]+]] = sext i32 [[REG891]] to i64
+// CHECK-NEXT: store i64 [[REG892]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG893:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG893]], 4
+// CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = and i32 [[REG894]], 3
+// CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = sext i32 [[REG895]] to i64
+// CHECK-NEXT: store i64 [[REG896]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG897]], 6
+// CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = and i32 [[REG898]], 3
+// CHECK-NEXT: [[REG900:[0-9a-zA-Z_%.]+]] = sext i32 [[REG899]] to i64
+// CHECK-NEXT: store i64 [[REG900]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG901:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG902:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG901]]
+// CHECK-NEXT: [[REG903:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG902]], align 4
+// CHECK-NEXT: [[REG904:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG905:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG904]], i32 [[REG903]], i32 0
+// CHECK-NEXT: store <4 x i32> [[REG905]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG906]]
+// CHECK-NEXT: [[REG908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG907]], align 4
+// CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG909]], i32 [[REG908]], i32 1
+// CHECK-NEXT: store <4 x i32> [[REG910]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG911]]
+// CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG912]], align 4
+// CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = add i32 [[REG913]], 269488144
+// CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG915]], i32 [[REG914]], i32 2
+// CHECK-NEXT: store <4 x i32> [[REG916]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG917:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG918:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG917]]
+// CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG918]], align 4
+// CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = add i32 [[REG919]], 269488144
+// CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG921]], i32 [[REG920]], i32 3
+// CHECK-NEXT: store <4 x i32> [[REG922]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG923:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG924:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG925]] to <16 x i8>
+// CHECK-NEXT: [[REG927:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG923]], <4 x float> [[REG924]], <16 x i8> [[REG926]])
+// CHECK-NEXT: ret <4 x float> [[REG927]]
+
+// CHECK: define available_externally i64 @_m_pshufw
+// CHECK: [[REG928:[0-9a-zA-Z_%.]+]] = call i64 @_mm_shuffle_pi16
+// CHECK-NEXT: ret i64 [[REG928]]
+
+void __attribute__((noinline))
+test_sqrt() {
+  res = _mm_sqrt_ps(m1);
+  res = _mm_sqrt_ss(m1);
+}
+
+// CHECK-LABEL: @test_sqrt
+
+// CHECK: define available_externally <4 x float> @_mm_sqrt_ps
+// CHECK: [[REG929:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: ret <4 x float> [[REG929]]
+
+// CHECK: define available_externally <4 x float> @_mm_sqrt_ss
+// CHECK: [[REG930:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG930]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG931]], <4 x float>* [[REG932:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG933:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG932]], align 16
+// CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> [[REG933]])
+// CHECK-NEXT: store <4 x float> [[REG934]], <4 x float>* [[REG935:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG936:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG937:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG935]], align 16
+// CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG936]], <4 x float> [[REG937]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG938]]
+
+void __attribute__((noinline))
+test_store() {
+  _mm_store_ps(fs, m1);
+  _mm_store_ps1(fs, m1);
+  _mm_store_ss(fs, m1);
+  _mm_store1_ps(fs, m1);
+  _mm_storeh_pi(ms, m1);
+  _mm_storel_pi(ms, m1);
+  _mm_storer_ps(fs, m1);
+}
+
+// CHECK-LABEL: @test_store
+
+// CHECK: define available_externally void @_mm_store_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG941:[0-9a-zA-Z_%.]+]] = bitcast float* [[REG940]] to <4 x float>*
+// CHECK-NEXT: call void @vec_st(float vector[4], int, float vector[4]*)(<4 x float> [[REG939]], i32 signext 0, <4 x float>* [[REG941]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store_ps1
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG942:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG943:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store1_ps(float* [[REG942]], <4 x float> [[REG943]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store_ss
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG944:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG944]], i32 0
+// CHECK-NEXT: [[REG946:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store float [[REG945]], float* [[REG946]], align 4
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store1_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG947]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG948]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG949]], <4 x float> [[REG950]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storeh_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG952:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG951]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG952]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG953:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG953]], i32 1
+// CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG954]], i64* [[REG955]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storel_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG956]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG957]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG958:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG959:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG958]], i32 0
+// CHECK-NEXT: [[REG960:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG959]], i64* [[REG960]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storer_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG961:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG963:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG961]], <4 x float> [[REG962]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+// CHECK-NEXT: store <4 x float> [[REG963]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG964]], <4 x float> [[REG965]])
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_stream() {
+  _mm_stream_pi(&res64, ms[0]);
+  _mm_stream_ps(&fs[0], m1);
+}
+
+// CHECK-LABEL: @test_stream
+
+/// CHECK: define available_externally void @_mm_stream_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG966]])
+// CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG967]], i64* [[REG968]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_stream_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG969:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* [[REG969]])
+// CHECK-NEXT: [[REG970:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG970]], <4 x float> [[REG971]])
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_sub() {
+  res = _mm_sub_ps(m1, m2);
+  res = _mm_sub_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_sub
+
+// CHECK: define available_externally <4 x float> @_mm_sub_ps(<4 x float> [[REG972:[0-9a-zA-Z_%.]+]], <4 x float> [[REG973:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG972]], <4 x float>* [[REG974:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG973]], <4 x float>* [[REG975:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG974]], align 16
+// CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG975]], align 16
+// CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG976]], [[REG977]]
+// CHECK-NEXT: ret <4 x float> [[REG978]]
+
+// CHECK: define available_externally <4 x float> @_mm_sub_ss(<4 x float> [[REG979:[0-9a-zA-Z_%.]+]], <4 x float> [[REG980:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG979]], <4 x float>* [[REG981:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG980]], <4 x float>* [[REG982:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
+// CHECK-NEXT: [[REG984:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG983]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG984]], <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG982]], align 16
+// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG986]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG987]], <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG990:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG991:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG989]], [[REG990]]
+// CHECK-NEXT: store <4 x float> [[REG991]], <4 x float>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG993:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
+// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG992]], align 16
+// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG993]], <4 x float> [[REG994]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG995]]
+
+void __attribute__((noinline))
+test_transpose() {
+  __m128 m3, m4;
+  _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
+}
+
+// CHECK-LABEL: @test_transpose
+
+// CHECK: br label %[[REG996:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG996]]:
+// CHECK: [[REG997:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: [[REG998:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: [[REG999:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: [[REG1000:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: [[REG1001:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: [[REG1002:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+// CHECK: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: [[REG1004:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+// CHECK: ret void
+
+void __attribute__((noinline))
+test_ucomi() {
+  i = _mm_ucomieq_ss(m1, m2);
+  i = _mm_ucomige_ss(m1, m2);
+  i = _mm_ucomigt_ss(m1, m2);
+  i = _mm_ucomile_ss(m1, m2);
+  i = _mm_ucomilt_ss(m1, m2);
+  i = _mm_ucomineq_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_ucomi
+
+// CHECK: define available_externally signext i32 @_mm_ucomieq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1005:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1006:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1005]], i32 0
+// CHECK-NEXT: [[REG1007:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1007]], i32 0
+// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG1006]], [[REG1008]]
+// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1009]] to i32
+// CHECK-NEXT: ret i32 [[REG1010]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomige_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1011:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1012:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1011]], i32 0
+// CHECK-NEXT: [[REG1013:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1014:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1013]], i32 0
+// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG1012]], [[REG1014]]
+// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1015]] to i32
+// CHECK-NEXT: ret i32 [[REG1016]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomigt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1018:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1017]], i32 0
+// CHECK-NEXT: [[REG1019:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1020:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1019]], i32 0
+// CHECK-NEXT: [[REG1021:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG1018]], [[REG1020]]
+// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1021]] to i32
+// CHECK-NEXT: ret i32 [[REG1022]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomile_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1023]], i32 0
+// CHECK-NEXT: [[REG1025:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1026:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1025]], i32 0
+// CHECK-NEXT: [[REG1027:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG1024]], [[REG1026]]
+// CHECK-NEXT: [[REG1028:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1027]] to i32
+// CHECK-NEXT: ret i32 [[REG1028]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomilt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1029]], i32 0
+// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1032:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1031]], i32 0
+// CHECK-NEXT: [[REG1033:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG1030]], [[REG1032]]
+// CHECK-NEXT: [[REG1034:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1033]] to i32
+// CHECK-NEXT: ret i32 [[REG1034]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomineq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1035:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1035]], i32 0
+// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1037]], i32 0
+// CHECK-NEXT: [[REG1039:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG1036]], [[REG1038]]
+// CHECK-NEXT: [[REG1040:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1039]] to i32
+// CHECK-NEXT: ret i32 [[REG1040]]
+
+void __attribute__((noinline))
+test_undefined() {
+  res = _mm_undefined_ps();
+}
+
+// CHECK-LABEL: @test_undefined
+
+// CHECK: define available_externally <4 x float> @_mm_undefined_ps
+// CHECK: [[REG1041:[0-9a-zA-Z_%.]+]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[REG1042:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1042]], <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG1043]]
+
+void __attribute__((noinline))
+test_unpack() {
+  res = _mm_unpackhi_ps(m1, m2);
+  res = _mm_unpacklo_ps(m1, m2);
+}
+
+// CHECK-LABEL: @test_unpack
+
+// CHECK: define available_externally <4 x float> @_mm_unpackhi_ps(<4 x float> [[REG1044:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1045:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1044]], <4 x float>* [[REG1046:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1045]], <4 x float>* [[REG1047:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG1048:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1046]], align 16
+// CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1047]], align 16
+// CHECK-NEXT: [[REG1050:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])(<4 x float> [[REG1048]], <4 x float> [[REG1049]])
+// CHECK-NEXT: ret <4 x float> [[REG1050]]
+
+// CHECK: define available_externally <4 x float> @_mm_unpacklo_ps(<4 x float> [[REG1051:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1052:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1051]], <4 x float>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1052]], <4 x float>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1053]], align 16
+// CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1054]], align 16
+// CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> [[REG1055]], <4 x float> [[REG1056]])
+// CHECK-NEXT: ret <4 x float> [[REG1057]]
diff --git a/clang/test/Headers/ppc-intrinsics.c b/clang/test/Headers/ppc-intrinsics.c
deleted file mode 100644
index 622ce90c76258..0000000000000
--- a/clang/test/Headers/ppc-intrinsics.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// REQUIRES: powerpc-registered-target
-
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
-// expected-no-diagnostics
-
-// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
-
-#include <mmintrin.h>
-// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
-
-// CHECK: target triple = "powerpc64-
-// CHECK: !llvm.module.flags =
diff --git a/clang/test/Headers/ppc-mmx-intrinsics.c b/clang/test/Headers/ppc-mmx-intrinsics.c
new file mode 100644
index 0000000000000..fe989c2dfbaa5
--- /dev/null
+++ b/clang/test/Headers/ppc-mmx-intrinsics.c
@@ -0,0 +1,11 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify -x c++
+// expected-no-diagnostics
+
+// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
+
+#include <mmintrin.h>
+// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
diff --git a/clang/test/Headers/ppc-sse-intrinsics.c b/clang/test/Headers/ppc-sse-intrinsics.c
new file mode 100644
index 0000000000000..f4aa7d9009bb9
--- /dev/null
+++ b/clang/test/Headers/ppc-sse-intrinsics.c
@@ -0,0 +1,20 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// Since mm_malloc.h references system native stdlib.h, doing cross-compile
+// testing may cause unexpected problems. This would affect xmmintrin.h and
+// other following intrinsics headers. If there's need to test them using
+// cross-compile, please add -ffreestanding to compiler options, like
+// test/CodeGen/ppc-xmmintrin.c.
+
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
+// expected-no-diagnostics
+
+// RUN: not %clang -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
+
+// Altivec must be enabled.
+#include <xmmintrin.h>
+
+#include <mm_malloc.h>
+// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."

From ed6fa44f236c4cbdb4a63e0953e4801e5f10a55b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 29 May 2019 06:18:34 +0000
Subject: [PATCH 0457/1176] [llvm-readobj] -u: don't crash when dumping
 SHT_ARM_EXIDX if .symtab doesn't exist

Reviewed By: kongyi

Differential Revision: https://reviews.llvm.org/D62567

llvm-svn: 361929
---
 llvm/test/tools/llvm-readobj/ARM/unwind.s | 163 +++++++++++-----------
 llvm/tools/llvm-readobj/ARMEHABIPrinter.h |  10 +-
 2 files changed, 90 insertions(+), 83 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/ARM/unwind.s b/llvm/test/tools/llvm-readobj/ARM/unwind.s
index afabeb7cd2e76..ef17118283b8f 100644
--- a/llvm/test/tools/llvm-readobj/ARM/unwind.s
+++ b/llvm/test/tools/llvm-readobj/ARM/unwind.s
@@ -1,5 +1,8 @@
-@ RUN: llvm-mc -triple armv7-linux-eabi -filetype obj -o - %s \
-@ RUN:    | llvm-readobj -u | FileCheck %s
+@ RUN: llvm-mc -triple armv7-linux-eabi -filetype obj %s -o %t
+@ RUN: llvm-readobj -u %t | FileCheck --check-prefixes=CHECK,SYM %s
+
+@@ If .symtab doesn't exist, we can still dump some information.
+@ RUN: llvm-objcopy --allow-broken-links --strip-all %t - | llvm-readobj -u - | FileCheck %s
 
 	.syntax unified
 
@@ -151,7 +154,7 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: __personality
+@ SYM:           FunctionName: __personality
 @ CHECK:         Model: Compact (Inline)
 @ CHECK:         PersonalityIndex: 0
 @ CHECK:         Opcodes [
@@ -167,7 +170,7 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: personality0
+@ SYM:           FunctionName: personality0
 @ CHECK:         Model: Compact (Inline)
 @ CHECK:         PersonalityIndex: 0
 @ CHECK:         Opcodes [
@@ -183,18 +186,18 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: personality1
-@ CHECK:         ExceptionHandlingTable: .ARM.extab.personality1
-@ CHECK:         TableEntryOffset: 0x0
-@ CHECK:         Model: Compact
-@ CHECK:         PersonalityIndex: 1
-@ CHECK:         Opcodes [
-@ CHECK:           0xB1 0x0F ; pop {r0, r1, r2, r3}
-@ CHECK:           0xA7      ; pop {r4, r5, r6, r7, r8, r9, r10, fp}
-@ CHECK:           0x3F      ; vsp = vsp + 256
-@ CHECK:           0xB0      ; finish
-@ CHECK:           0xB0      ; finish
-@ CHECK:         ]
+@ SYM:           FunctionName: personality1
+@ SYM:           ExceptionHandlingTable: .ARM.extab.personality1
+@ SYM:           TableEntryOffset: 0x0
+@ SYM:           Model: Compact
+@ SYM:           PersonalityIndex: 1
+@ SYM:           Opcodes [
+@ SYM:             0xB1 0x0F ; pop {r0, r1, r2, r3}
+@ SYM:             0xA7      ; pop {r4, r5, r6, r7, r8, r9, r10, fp}
+@ SYM:             0x3F      ; vsp = vsp + 256
+@ SYM:             0xB0      ; finish
+@ SYM:             0xB0      ; finish
+@ SYM:           ]
 @ CHECK:       }
 @ CHECK:     ]
 @ CHECK:   }
@@ -203,11 +206,11 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: custom_personality
-@ CHECK:         ExceptionHandlingTable: .ARM.extab.custom_personality
-@ CHECK:         TableEntryOffset: 0x0
-@ CHECK:         Model: Generic
-@ CHECK:         PersonalityRoutineAddress: 0x0
+@ SYM:           FunctionName: custom_personality
+@ SYM:           ExceptionHandlingTable: .ARM.extab.custom_personality
+@ SYM:           TableEntryOffset: 0x0
+@ SYM:           Model: Generic
+@ SYM:           PersonalityRoutineAddress: 0x0
 @ CHECK:       }
 @ CHECK:     ]
 @ CHECK:   }
@@ -216,7 +219,7 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: opcodes
+@ SYM:           FunctionName: opcodes
 @ CHECK:         Model: Compact (Inline)
 @ CHECK:         PersonalityIndex: 0
 @ CHECK:         Opcodes [
@@ -231,7 +234,7 @@ spare:
 @ CHECK:     Entries [
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x0
-@ CHECK:         FunctionName: function0
+@ SYM:           FunctionName: function0
 @ CHECK:         Model: Compact (Inline)
 @ CHECK:         PersonalityIndex: 0
 @ CHECK:         Opcodes [
@@ -242,14 +245,14 @@ spare:
 @ CHECK:       }
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x4
-@ CHECK:         FunctionName: function1
-@ CHECK:         ExceptionHandlingTable: .ARM.extab.multiple
-@ CHECK:         Model: Generic
-@ CHECK:         PersonalityRoutineAddress: 0x0
+@ SYM:           FunctionName: function1
+@ SYM:           ExceptionHandlingTable: .ARM.extab.multiple
+@ SYM:           Model: Generic
+@ SYM:           PersonalityRoutineAddress: 0x0
 @ CHECK:       }
 @ CHECK:       Entry {
 @ CHECK:         FunctionAddress: 0x8
-@ CHECK:         FunctionName: function2
+@ SYM:           FunctionName: function2
 @ CHECK:         Model: Compact (Inline)
 @ CHECK:         PersonalityIndex: 0
 @ CHECK:         Opcodes [
@@ -263,63 +266,63 @@ spare:
 @ CHECK:   UnwindIndexTable {
 @ CHECK:     SectionName: .ARM.exidx.raw
 @ CHECK:     Entries [
-@ CHECK:       Opcodes [
-@ CHECK:         0xD7      ; pop {d8, d9, d10, d11, d12, d13, d14, d15}
-@ CHECK:         0xC9 0x02 ; pop {d0, d1, d2}
-@ CHECK:         0xC8 0x02 ; pop {d16, d17, d18}
-@ CHECK:         0xC7 0x03 ; pop {wCGR0, wCGR1}
-@ CHECK:         0xC6 0x02 ; pop {wR0, wR1, wR2}
-@ CHECK:         0xC2      ; pop {wR10, wR11, wR12}
-@ CHECK:         0xBA      ; pop {d8, d9, d10}
-@ CHECK:         0xB3 0x12 ; pop {d1, d2, d3}
-@ CHECK:         0xB2 0x80 0x04 ; vsp = vsp + 2564
-@ CHECK:         0xB1 0x01 ; pop {r0}
-@ CHECK:         0xB0      ; finish
-@ CHECK:         0xA9      ; pop {r4, r5, lr}
-@ CHECK:         0xA1      ; pop {r4, r5}
-@ CHECK:         0x91      ; vsp = r1
-@ CHECK:         0x84 0xC0 ; pop {r10, fp, lr}
-@ CHECK:         0x80 0xC0 ; pop {r10, fp}
-@ CHECK:         0x80 0x01 ; pop {r4}
-@ CHECK:         0x81 0x00 ; pop {ip}
-@ CHECK:         0x80 0x00 ; refuse to unwind
-@ CHECK:         0x42      ; vsp = vsp - 12
-@ CHECK:         0x02      ; vsp = vsp + 12
-@ CHECK:       ]
+@ SYM:         Opcodes [
+@ SYM:           0xD7      ; pop {d8, d9, d10, d11, d12, d13, d14, d15}
+@ SYM:           0xC9 0x02 ; pop {d0, d1, d2}
+@ SYM:           0xC8 0x02 ; pop {d16, d17, d18}
+@ SYM:           0xC7 0x03 ; pop {wCGR0, wCGR1}
+@ SYM:           0xC6 0x02 ; pop {wR0, wR1, wR2}
+@ SYM:           0xC2      ; pop {wR10, wR11, wR12}
+@ SYM:           0xBA      ; pop {d8, d9, d10}
+@ SYM:           0xB3 0x12 ; pop {d1, d2, d3}
+@ SYM:           0xB2 0x80 0x04 ; vsp = vsp + 2564
+@ SYM:           0xB1 0x01 ; pop {r0}
+@ SYM:           0xB0      ; finish
+@ SYM:           0xA9      ; pop {r4, r5, lr}
+@ SYM:           0xA1      ; pop {r4, r5}
+@ SYM:           0x91      ; vsp = r1
+@ SYM:           0x84 0xC0 ; pop {r10, fp, lr}
+@ SYM:           0x80 0xC0 ; pop {r10, fp}
+@ SYM:           0x80 0x01 ; pop {r4}
+@ SYM:           0x81 0x00 ; pop {ip}
+@ SYM:           0x80 0x00 ; refuse to unwind
+@ SYM:           0x42      ; vsp = vsp - 12
+@ SYM:           0x02      ; vsp = vsp + 12
+@ SYM:         ]
 @ CHECK:     ]
 @ CHECK:   }
 @ CHECK:   UnwindIndexTable {
 @ CHECK:     SectionName: .ARM.exidx.spare
 @ CHECK:     Entries [
-@ CHECK:       Opcodes [
-@ CHECK:         0xD8      ; spare
-@ CHECK:         0xD0      ; pop {d8}
-@ CHECK:         0xCA      ; spare
-@ CHECK:         0xC9 0x00 ; pop {d0}
-@ CHECK:         0xC8 0x00 ; pop {d16}
-@ CHECK:         0xC7 0x10 ; spare
-@ CHECK:         0xC7 0x01 ; pop {wCGR0}
-@ CHECK:         0xC7 0x00 ; spare
-@ CHECK:         0xC6 0x00 ; pop {wR0}
-@ CHECK:         0xC0      ; pop {wR10}
-@ CHECK:         0xB8      ; pop {d8}
-@ CHECK:         0xB4      ; spare
-@ CHECK:         0xB3 0x00 ; pop {d0}
-@ CHECK:         0xB2 0x00 ; vsp = vsp + 516
-@ CHECK:         0xB1 0x10 ; spare
-@ CHECK:         0xB1 0x01 ; pop {r0}
-@ CHECK:         0xB1 0x00 ; spare
-@ CHECK:         0xB0      ; finish
-@ CHECK:         0xA8      ; pop {r4, lr}
-@ CHECK:         0xA0      ; pop {r4}
-@ CHECK:         0x9F      ; reserved (WiMMX MOVrr)
-@ CHECK:         0x9D      ; reserved (ARM MOVrr)
-@ CHECK:         0x91      ; vsp = r1
-@ CHECK:         0x88 0x00 ; pop {pc}
-@ CHECK:         0x80 0x00 ; refuse to unwind
-@ CHECK:         0x40      ; vsp = vsp - 4
-@ CHECK:         0x00      ; vsp = vsp + 4
-@ CHECK:       ]
+@ SYM:         Opcodes [
+@ SYM:           0xD8      ; spare
+@ SYM:           0xD0      ; pop {d8}
+@ SYM:           0xCA      ; spare
+@ SYM:           0xC9 0x00 ; pop {d0}
+@ SYM:           0xC8 0x00 ; pop {d16}
+@ SYM:           0xC7 0x10 ; spare
+@ SYM:           0xC7 0x01 ; pop {wCGR0}
+@ SYM:           0xC7 0x00 ; spare
+@ SYM:           0xC6 0x00 ; pop {wR0}
+@ SYM:           0xC0      ; pop {wR10}
+@ SYM:           0xB8      ; pop {d8}
+@ SYM:           0xB4      ; spare
+@ SYM:           0xB3 0x00 ; pop {d0}
+@ SYM:           0xB2 0x00 ; vsp = vsp + 516
+@ SYM:           0xB1 0x10 ; spare
+@ SYM:           0xB1 0x01 ; pop {r0}
+@ SYM:           0xB1 0x00 ; spare
+@ SYM:           0xB0      ; finish
+@ SYM:           0xA8      ; pop {r4, lr}
+@ SYM:           0xA0      ; pop {r4}
+@ SYM:           0x9F      ; reserved (WiMMX MOVrr)
+@ SYM:           0x9D      ; reserved (ARM MOVrr)
+@ SYM:           0x91      ; vsp = r1
+@ SYM:           0x88 0x00 ; pop {pc}
+@ SYM:           0x80 0x00 ; refuse to unwind
+@ SYM:           0x40      ; vsp = vsp - 4
+@ SYM:           0x00      ; vsp = vsp + 4
+@ SYM:         ]
 @ CHECK:     ]
 @ CHECK:   }
 @ CHECK: }
diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index f847041a6ae6f..11f9d6166a59a 100644
--- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -365,6 +365,8 @@ template <typename ET>
 ErrorOr<StringRef>
 PrinterContext<ET>::FunctionAtAddress(unsigned Section,
                                       uint64_t Address) const {
+  if (!Symtab)
+    return readobj_error::unknown_symbol;
   auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
   if (!StrTableOrErr)
     error(StrTableOrErr.takeError());
@@ -550,13 +552,15 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
       const Elf_Shdr *EHT =
         FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
 
-      if (auto Name = ELF->getSectionName(EHT))
-        SW.printString("ExceptionHandlingTable", *Name);
+      if (EHT)
+        if (auto Name = ELF->getSectionName(EHT))
+          SW.printString("ExceptionHandlingTable", *Name);
 
       uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr);
       SW.printHex("TableEntryOffset", TableEntryOffset);
 
-      PrintExceptionTable(IT, EHT, TableEntryOffset);
+      if (EHT)
+        PrintExceptionTable(IT, EHT, TableEntryOffset);
     }
   }
 }

From 48061cd999a810b6b2fc631554729df44d9699da Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <wuzish@cn.ibm.com>
Date: Wed, 29 May 2019 07:09:54 +0000
Subject: [PATCH 0458/1176] revert rC361928: [PowerPC] [Clang] Port SSE
 intrinsics to PowerPC

Because test fails in other targets rather than PowerPC

llvm-svn: 361930
---
 clang/lib/Headers/CMakeLists.txt           |    2 -
 clang/lib/Headers/ppc_wrappers/mm_malloc.h |   48 -
 clang/lib/Headers/ppc_wrappers/xmmintrin.h | 1838 -----------------
 clang/test/CodeGen/ppc-mm-malloc.c         |   71 -
 clang/test/CodeGen/ppc-mmintrin.c          |    1 -
 clang/test/CodeGen/ppc-xmmintrin.c         | 2090 --------------------
 clang/test/Headers/ppc-intrinsics.c        |   13 +
 clang/test/Headers/ppc-mmx-intrinsics.c    |   11 -
 clang/test/Headers/ppc-sse-intrinsics.c    |   20 -
 9 files changed, 13 insertions(+), 4081 deletions(-)
 delete mode 100644 clang/lib/Headers/ppc_wrappers/mm_malloc.h
 delete mode 100644 clang/lib/Headers/ppc_wrappers/xmmintrin.h
 delete mode 100644 clang/test/CodeGen/ppc-mm-malloc.c
 delete mode 100644 clang/test/CodeGen/ppc-xmmintrin.c
 create mode 100644 clang/test/Headers/ppc-intrinsics.c
 delete mode 100644 clang/test/Headers/ppc-mmx-intrinsics.c
 delete mode 100644 clang/test/Headers/ppc-sse-intrinsics.c

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f7a3e5410ced5..392ca2ae391c9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -126,8 +126,6 @@ set(cuda_wrapper_files
 
 set(ppc_wrapper_files
   ppc_wrappers/mmintrin.h
-  ppc_wrappers/xmmintrin.h
-  ppc_wrappers/mm_malloc.h
 )
 
 set(openmp_wrapper_files
diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
deleted file mode 100644
index 36589194b3e2f..0000000000000
--- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-#ifndef _MM_MALLOC_H_INCLUDED
-#define _MM_MALLOC_H_INCLUDED
-
-#include <stdlib.h>
-
-/* We can't depend on <stdlib.h> since the prototype of posix_memalign
-   may not be visible.  */
-#ifndef __cplusplus
-extern int posix_memalign (void **, size_t, size_t);
-#else
-extern "C" int posix_memalign (void **, size_t, size_t) throw ();
-#endif
-
-static __inline void *
-_mm_malloc (size_t size, size_t alignment)
-{
-  /* PowerPC64 ELF V2 ABI requires quadword alignment.  */
-  size_t vec_align = sizeof (__vector float);
-  /* Linux GLIBC malloc alignment is at least 2 X ptr size.  */
-  size_t malloc_align = (sizeof (void *) + sizeof (void *));
-  void *ptr;
-
-  if (alignment == malloc_align && alignment == vec_align)
-    return malloc (size);
-  if (alignment < vec_align)
-    alignment = vec_align;
-  if (posix_memalign (&ptr, alignment, size) == 0)
-    return ptr;
-  else
-    return NULL;
-}
-
-static __inline void
-_mm_free (void * ptr)
-{
-  free (ptr);
-}
-
-#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
deleted file mode 100644
index 1b322b66519a6..0000000000000
--- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h
+++ /dev/null
@@ -1,1838 +0,0 @@
-/*===---- xmmintrin.h - Implementation of SSE intrinsics on PowerPC --------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-
-/* Implemented from the specification included in the Intel C++ Compiler
-   User Guide and Reference, version 9.0.  */
-
-#ifndef NO_WARN_X86_INTRINSICS
-/* This header file is to help porting code using Intel intrinsics
-   explicitly from x86_64 to powerpc64/powerpc64le.
-
-   Since X86 SSE intrinsics mainly handles __m128 type, PowerPC
-   VMX/VSX ISA is a good match for vector float SIMD operations.
-   However scalar float operations in vector (XMM) registers require
-   the POWER8 VSX ISA (2.07) level. There are differences for data
-   format and placement of float scalars in the vector register, which
-   require extra steps to match SSE scalar float semantics on POWER.
-
-   It should be noted that there's much difference between X86_64's
-   MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use
-   portable <fenv.h> instead of access MXSCR directly.
-
-   Most SSE scalar float intrinsic operations can be performed more
-   efficiently as C language float scalar operations or optimized to
-   use vector SIMD operations. We recommend this for new applications. */
-#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
-#endif
-
-#ifndef _XMMINTRIN_H_INCLUDED
-#define _XMMINTRIN_H_INCLUDED
-
-/* Define four value permute mask */
-#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
-
-#include <altivec.h>
-
-/* Avoid collisions between altivec.h and strict adherence to C++ and
-   C11 standards.  This should eventually be done inside altivec.h itself,
-   but only after testing a full distro build.  */
-#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
-				 (defined(__STDC_VERSION__) &&	\
-				  __STDC_VERSION__ >= 201112L))
-#undef vector
-#undef pixel
-#undef bool
-#endif
-
-/* We need type definitions from the MMX header file.  */
-#include <mmintrin.h>
-
-/* Get _mm_malloc () and _mm_free ().  */
-#if __STDC_HOSTED__
-#include <mm_malloc.h>
-#endif
-
-/* The Intel API is flexible enough that we must allow aliasing with other
-   vector types, and their scalar components.  */
-typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
-
-/* Unaligned version of the same type.  */
-typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
-				       __aligned__ (1)));
-
-/* Internal data types for implementing the intrinsics.  */
-typedef float __v4sf __attribute__ ((__vector_size__ (16)));
-
-/* Create an undefined vector.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_undefined_ps (void)
-{
-  __m128 __Y = __Y;
-  return __Y;
-}
-
-/* Create a vector of zeros.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_ps (void)
-{
-  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
-}
-
-/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ps (float const *__P)
-{
-  return ((__m128)vec_ld(0, (__v4sf*)__P));
-}
-
-/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadu_ps (float const *__P)
-{
-  return (vec_vsx_ld(0, __P));
-}
-
-/* Load four SPFP values in reverse order.  The address must be aligned.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadr_ps (float const *__P)
-{
-  __v4sf   __tmp;
-  __m128 result;
-  static const __vector unsigned char permute_vector =
-    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
-	0x17, 0x10, 0x11, 0x12, 0x13 };
-
-  __tmp = vec_ld (0, (__v4sf *) __P);
-  result = (__m128) vec_perm (__tmp, __tmp, permute_vector);
-  return result;
-}
-
-/* Create a vector with all four elements equal to F.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set1_ps (float __F)
-{
-  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ps1 (float __F)
-{
-  return _mm_set1_ps (__F);
-}
-
-/* Create the vector [Z Y X W].  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
-{
-  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
-}
-
-/* Create the vector [W X Y Z].  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setr_ps (float __Z, float __Y, float __X, float __W)
-{
-  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
-}
-
-/* Store four SPFP values.  The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ps (float *__P, __m128 __A)
-{
-  vec_st((__v4sf)__A, 0, (__v4sf*)__P);
-}
-
-/* Store four SPFP values.  The address need not be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeu_ps (float *__P, __m128 __A)
-{
-  *(__m128_u *)__P = __A;
-}
-
-/* Store four SPFP values in reverse order.  The address must be aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storer_ps (float *__P, __m128 __A)
-{
-  __v4sf   __tmp;
-  static const __vector unsigned char permute_vector =
-    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
-	0x17, 0x10, 0x11, 0x12, 0x13 };
-
-  __tmp = (__m128) vec_perm (__A, __A, permute_vector);
-
-  _mm_store_ps (__P, __tmp);
-}
-
-/* Store the lower SPFP value across four words.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store1_ps (float *__P, __m128 __A)
-{
-  __v4sf __va = vec_splat((__v4sf)__A, 0);
-  _mm_store_ps (__P, __va);
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ps1 (float *__P, __m128 __A)
-{
-  _mm_store1_ps (__P, __A);
-}
-
-/* Create a vector with element 0 as F and the rest zero.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_set_ss (float __F)
-{
-  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
-}
-
-/* Sets the low SPFP value of A from the low value of B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_move_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-
-  return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask));
-}
-
-/* Create a vector with element 0 as *P and the rest zero.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ss (float const *__P)
-{
-  return _mm_set_ss (*__P);
-}
-
-/* Stores the lower SPFP value.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_store_ss (float *__P, __m128 __A)
-{
-  *__P = ((__v4sf)__A)[0];
-}
-
-/* Perform the respective operation on the lower SPFP (single-precision
-   floating-point) values of A and B; the upper three SPFP values are
-   passed through from A.  */
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_ss (__m128 __A, __m128 __B)
-{
-#ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-     results. So to insure we don't generate spurious exceptions
-     (from the upper double values) we splat the lower double
-     before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a + b;
-  /* Then we merge the lower float result with the original upper
-     float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-#else
-  __A[0] = __A[0] + __B[0];
-  return (__A);
-#endif
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_ss (__m128 __A, __m128 __B)
-{
-#ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-     results. So to insure we don't generate spurious exceptions
-     (from the upper double values) we splat the lower double
-     before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a - b;
-  /* Then we merge the lower float result with the original upper
-     float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-#else
-  __A[0] = __A[0] - __B[0];
-  return (__A);
-#endif
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_ss (__m128 __A, __m128 __B)
-{
-#ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-     results. So to insure we don't generate spurious exceptions
-     (from the upper double values) we splat the lower double
-     before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a * b;
-  /* Then we merge the lower float result with the original upper
-     float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-#else
-  __A[0] = __A[0] * __B[0];
-  return (__A);
-#endif
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_ss (__m128 __A, __m128 __B)
-{
-#ifdef _ARCH_PWR7
-  __m128 a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-     results. So to insure we don't generate spurious exceptions
-     (from the upper double values) we splat the lower double
-     before we to the operation.  */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = a / b;
-  /* Then we merge the lower float result with the original upper
-     float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-#else
-  __A[0] = __A[0] / __B[0];
-  return (__A);
-#endif
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_ss (__m128 __A)
-{
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper double values) we splat the lower double
-   * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = vec_sqrt (a);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-}
-
-/* Perform the respective operation on the four SPFP values in A and B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_add_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A + (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sub_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A - (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mul_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A * (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_div_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) ((__v4sf)__A / (__v4sf)__B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrt_ps (__m128 __A)
-{
-  return (vec_sqrt ((__v4sf)__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp_ps (__m128 __A)
-{
-  return (vec_re ((__v4sf)__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt_ps (__m128 __A)
-{
-  return (vec_rsqrte (__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rcp_ss (__m128 __A)
-{
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper double values) we splat the lower double
-   * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = _mm_rcp_ps (a);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_rsqrt_ss (__m128 __A)
-{
-  __m128 a, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower double)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper double values) we splat the lower double
-   * before we to the operation. */
-  a = vec_splat (__A, 0);
-  c = vec_rsqrte (a);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return (vec_sel (__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_ss (__m128 __A, __m128 __B)
-{
-  __v4sf a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower float)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper float values) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf)__A, 0);
-  b = vec_splat ((__v4sf)__B, 0);
-  c = vec_min (a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return (vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_ss (__m128 __A, __m128 __B)
-{
-  __v4sf a, b, c;
-  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
-  /* PowerISA VSX does not allow partial (for just lower float)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper float values) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat (__A, 0);
-  b = vec_splat (__B, 0);
-  c = vec_max (a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return (vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_ps (__m128 __A, __m128 __B)
-{
-  __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
-  return vec_sel (__B, __A, m);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_ps (__m128 __A, __m128 __B)
-{
-  __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
-  return vec_sel (__B, __A, m);
-}
-
-/* Perform logical bit-wise operations on 128-bit values.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_and_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_and ((__v4sf)__A, (__v4sf)__B));
-//  return __builtin_ia32_andps (__A, __B);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_andnot_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_andc ((__v4sf)__B, (__v4sf)__A));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_or_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_or ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_xor_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_xor ((__v4sf)__A, (__v4sf)__B));
-}
-
-/* Perform a comparison on the four SPFP values of A and B.  For each
-   element, if the comparison is true, place a mask of all ones in the
-   result, otherwise a mask of zeros.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmpeq ((__v4sf)__A,(__v4sf) __B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_ps (__m128  __A, __m128  __B)
-{
-  __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B);
-  return ((__m128)vec_nor (temp, temp));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_ps (__m128 __A, __m128 __B)
-{
-  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
-}
-
-extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_ps (__m128  __A, __m128  __B)
-{
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
-    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
-  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
-  return ((__m128 ) vec_and (c, d));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_ps (__m128 __A, __m128 __B)
-{
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
-    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
-  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
-  return ((__m128 ) vec_or (c, d));
-}
-
-/* Perform a comparison on the lower SPFP values of A and B.  If the
-   comparison is true, place a mask of all ones in the result, otherwise a
-   mask of zeros.  The upper three SPFP values are passed through from A.  */
-extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpeq_ss (__m128  __A, __m128  __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpeq(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmplt_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmplt(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmple_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmple(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpgt_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpgt(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpge_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpge(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpneq_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpeq(a, b);
-  c = vec_nor (c, c);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnlt_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpge(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnle_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmpgt(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpngt_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we to the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmple(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpnge_ss (__m128 __A, __m128 __B)
-{
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-  __v4sf a, b, c;
-  /* PowerISA VMX does not allow partial (for just element 0)
-   * results. So to insure we don't generate spurious exceptions
-   * (from the upper elements) we splat the lower float
-   * before we do the operation. */
-  a = vec_splat ((__v4sf) __A, 0);
-  b = vec_splat ((__v4sf) __B, 0);
-  c = (__v4sf) vec_cmplt(a, b);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpord_ss (__m128 __A, __m128 __B)
-{
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
-    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
-  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
-  c = vec_and (c, d);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmpunord_ss (__m128 __A, __m128 __B)
-{
-  __vector unsigned int a, b;
-  __vector unsigned int c, d;
-  static const __vector unsigned int float_exp_mask =
-    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
-  static const __vector unsigned int mask =
-    { 0xffffffff, 0, 0, 0 };
-
-  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
-  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
-  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
-  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
-  c = vec_or (c, d);
-  /* Then we merge the lower float result with the original upper
-   * float elements from __A.  */
-  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
-}
-
-/* Compare the lower SPFP values of A and B and return 1 if true
-   and 0 if false.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comieq_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] == __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comilt_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] < __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comile_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] <= __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comigt_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] > __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comige_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] >= __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comineq_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] != __B[0]);
-}
-
-/* FIXME
- * The __mm_ucomi??_ss implementations below are exactly the same as
- * __mm_comi??_ss because GCC for PowerPC only generates unordered
- * compares (scalar and vector).
- * Technically __mm_comieq_ss et al should be using the ordered
- * compare and signal for QNaNs.
- * The __mm_ucomieq_sd et all should be OK, as is.
- */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomieq_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] == __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomilt_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] < __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomile_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] <= __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomigt_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] > __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomige_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] >= __B[0]);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ucomineq_ss (__m128 __A, __m128 __B)
-{
-  return (__A[0] != __B[0]);
-}
-
-extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_f32 (__m128 __A)
-{
-  return ((__v4sf)__A)[0];
-}
-
-/* Convert the lower SPFP value to a 32-bit integer according to the current
-   rounding mode.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si32 (__m128 __A)
-{
-  __m64 res = 0;
-#ifdef _ARCH_PWR8
-  double dtmp;
-  __asm__(
-#ifdef __LITTLE_ENDIAN__
-      "xxsldwi %x0,%x0,%x0,3;\n"
-#endif
-      "xscvspdp %x2,%x0;\n"
-      "fctiw  %2,%2;\n"
-      "mfvsrd  %1,%x2;\n"
-      : "+wa" (__A),
-        "=r" (res),
-        "=f" (dtmp)
-      : );
-#else
-  res = __builtin_rint(__A[0]);
-#endif
-  return (res);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_ss2si (__m128 __A)
-{
-  return _mm_cvtss_si32 (__A);
-}
-
-/* Convert the lower SPFP value to a 32-bit integer according to the
-   current rounding mode.  */
-
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si64 (__m128 __A)
-{
-  __m64 res = 0;
-#ifdef _ARCH_PWR8
-  double dtmp;
-  __asm__(
-#ifdef __LITTLE_ENDIAN__
-      "xxsldwi %x0,%x0,%x0,3;\n"
-#endif
-      "xscvspdp %x2,%x0;\n"
-      "fctid  %2,%2;\n"
-      "mfvsrd  %1,%x2;\n"
-      : "+wa" (__A),
-        "=r" (res),
-        "=f" (dtmp)
-      : );
-#else
-  res = __builtin_llrint(__A[0]);
-#endif
-  return (res);
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtss_si64x (__m128 __A)
-{
-  return _mm_cvtss_si64 ((__v4sf) __A);
-}
-
-/* Constants for use with _mm_prefetch.  */
-enum _mm_hint
-{
-  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
-  _MM_HINT_ET0 = 7,
-  _MM_HINT_ET1 = 6,
-  _MM_HINT_T0 = 3,
-  _MM_HINT_T1 = 2,
-  _MM_HINT_T2 = 1,
-  _MM_HINT_NTA = 0
-};
-
-/* Loads one cache line from address P to a location "closer" to the
-   processor.  The selector I specifies the type of prefetch operation.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_prefetch (const void *__P, enum _mm_hint __I)
-{
-  /* Current PowerPC will ignores the hint parameters.  */
-  __builtin_prefetch (__P);
-}
-
-/* Convert the two lower SPFP values to 32-bit integers according to the
-   current rounding mode.  Return the integers in packed form.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi32 (__m128 __A)
-{
-  /* Splat two lower SPFP values to both halves.  */
-  __v4sf temp, rounded;
-  __vector unsigned long long result;
-
-  /* Splat two lower SPFP values to both halves.  */
-  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  rounded = vec_rint(temp);
-  result = (__vector unsigned long long) vec_cts (rounded, 0);
-
-  return (__m64) ((__vector long long) result)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_ps2pi (__m128 __A)
-{
-  return _mm_cvtps_pi32 (__A);
-}
-
-/* Truncate the lower SPFP value to a 32-bit integer.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si32 (__m128 __A)
-{
-  /* Extract the lower float element.  */
-  float temp = __A[0];
-  /* truncate to 32-bit integer and return.  */
-  return temp;
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_ss2si (__m128 __A)
-{
-  return _mm_cvttss_si32 (__A);
-}
-
-/* Intel intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si64 (__m128 __A)
-{
-  /* Extract the lower float element.  */
-  float temp = __A[0];
-  /* truncate to 32-bit integer and return.  */
-  return temp;
-}
-
-/* Microsoft intrinsic.  */
-extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttss_si64x (__m128 __A)
-{
-  /* Extract the lower float element.  */
-  float temp = __A[0];
-  /* truncate to 32-bit integer and return.  */
-  return temp;
-}
-
-/* Truncate the two lower SPFP values to 32-bit integers.  Return the
-   integers in packed form.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttps_pi32 (__m128 __A)
-{
-  __v4sf temp;
-  __vector unsigned long long result;
-
-  /* Splat two lower SPFP values to both halves.  */
-  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
-  result = (__vector unsigned long long) vec_cts (temp, 0);
-
-  return (__m64) ((__vector long long) result)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtt_ps2pi (__m128 __A)
-{
-  return _mm_cvttps_pi32 (__A);
-}
-
-/* Convert B to a SPFP value and insert it as element zero in A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi32_ss (__m128 __A, int __B)
-{
-  float temp = __B;
-  __A[0] = temp;
-
-  return __A;
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_si2ss (__m128 __A, int __B)
-{
-  return _mm_cvtsi32_ss (__A, __B);
-}
-
-/* Convert B to a SPFP value and insert it as element zero in A.  */
-/* Intel intrinsic.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64_ss (__m128 __A, long long __B)
-{
-  float temp = __B;
-  __A[0] = temp;
-
-  return __A;
-}
-
-/* Microsoft intrinsic.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsi64x_ss (__m128 __A, long long __B)
-{
-  return _mm_cvtsi64_ss (__A, __B);
-}
-
-/* Convert the two 32-bit values in B to SPFP form and insert them
-   as the two lower elements in A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi32_ps (__m128        __A, __m64        __B)
-{
-  __vector signed int vm1;
-  __vector float vf1;
-
-  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
-  vf1 = (__vector float) vec_ctf (vm1, 0);
-
-  return ((__m128) (__vector unsigned long long)
-    { ((__vector unsigned long long)vf1) [0],
-	((__vector unsigned long long)__A) [1]});
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvt_pi2ps (__m128 __A, __m64 __B)
-{
-  return _mm_cvtpi32_ps (__A, __B);
-}
-
-/* Convert the four signed 16-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi16_ps (__m64 __A)
-{
-  __vector signed short vs8;
-  __vector signed int vi4;
-  __vector float vf1;
-
-  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
-  vi4 = vec_vupklsh (vs8);
-  vf1 = (__vector float) vec_ctf (vi4, 0);
-
-  return (__m128) vf1;
-}
-
-/* Convert the four unsigned 16-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpu16_ps (__m64 __A)
-{
-  const __vector unsigned short zero =
-    { 0, 0, 0, 0, 0, 0, 0, 0 };
-  __vector unsigned short vs8;
-  __vector unsigned int vi4;
-  __vector float vf1;
-
-  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
-  vi4 = (__vector unsigned int) vec_mergel
-#ifdef __LITTLE_ENDIAN__
-                                           (vs8, zero);
-#else
-                                           (zero, vs8);
-#endif
-  vf1 = (__vector float) vec_ctf (vi4, 0);
-
-  return (__m128) vf1;
-}
-
-/* Convert the low four signed 8-bit values in A to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi8_ps (__m64 __A)
-{
-  __vector signed char vc16;
-  __vector signed short vs8;
-  __vector signed int vi4;
-  __vector float vf1;
-
-  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
-  vs8 = vec_vupkhsb (vc16);
-  vi4 = vec_vupkhsh (vs8);
-  vf1 = (__vector float) vec_ctf (vi4, 0);
-
-  return (__m128) vf1;
-}
-
-/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
-extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
-_mm_cvtpu8_ps (__m64  __A)
-{
-  const __vector unsigned char zero =
-    { 0, 0, 0, 0, 0, 0, 0, 0 };
-  __vector unsigned char vc16;
-  __vector unsigned short vs8;
-  __vector unsigned int vi4;
-  __vector float vf1;
-
-  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
-#ifdef __LITTLE_ENDIAN__
-  vs8 = (__vector unsigned short) vec_mergel (vc16, zero);
-  vi4 = (__vector unsigned int) vec_mergeh (vs8,
-					    (__vector unsigned short) zero);
-#else
-  vs8 = (__vector unsigned short) vec_mergel (zero, vc16);
-  vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero,
-                                            vs8);
-#endif
-  vf1 = (__vector float) vec_ctf (vi4, 0);
-
-  return (__m128) vf1;
-}
-
-/* Convert the four signed 32-bit values in A and B to SPFP form.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpi32x2_ps (__m64 __A, __m64 __B)
-{
-  __vector signed int vi4;
-  __vector float vf4;
-
-  vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
-  vf4 = (__vector float) vec_ctf (vi4, 0);
-  return (__m128) vf4;
-}
-
-/* Convert the four SPFP values in A to four signed 16-bit integers.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi16 (__m128 __A)
-{
-  __v4sf rounded;
-  __vector signed int temp;
-  __vector unsigned long long result;
-
-  rounded = vec_rint(__A);
-  temp = vec_cts (rounded, 0);
-  result = (__vector unsigned long long) vec_pack (temp, temp);
-
-  return (__m64) ((__vector long long) result)[0];
-}
-
-/* Convert the four SPFP values in A to four signed 8-bit integers.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtps_pi8 (__m128 __A)
-{
-  __v4sf rounded;
-  __vector signed int tmp_i;
-  static const __vector signed int zero = {0, 0, 0, 0};
-  __vector signed short tmp_s;
-  __vector signed char res_v;
-
-  rounded = vec_rint(__A);
-  tmp_i = vec_cts (rounded, 0);
-  tmp_s = vec_pack (tmp_i, zero);
-  res_v = vec_pack (tmp_s, tmp_s);
-  return (__m64) ((__vector long long) res_v)[0];
-}
-
-/* Selects four specific SPFP values from A and B based on MASK.  */
-extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
-_mm_shuffle_ps (__m128  __A, __m128  __B, int const __mask)
-{
-  unsigned long element_selector_10 = __mask & 0x03;
-  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
-  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
-  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
-  static const unsigned int permute_selectors[4] =
-    {
-#ifdef __LITTLE_ENDIAN__
-      0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-#else
-      0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
-#endif
-    };
-  __vector unsigned int t;
-
-  t[0] = permute_selectors[element_selector_10];
-  t[1] = permute_selectors[element_selector_32];
-  t[2] = permute_selectors[element_selector_54] + 0x10101010;
-  t[3] = permute_selectors[element_selector_76] + 0x10101010;
-  return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t);
-}
-
-/* Selects and interleaves the upper two SPFP values from A and B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpackhi_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) vec_vmrglw ((__v4sf) __A, (__v4sf)__B);
-}
-
-/* Selects and interleaves the lower two SPFP values from A and B.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_unpacklo_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) vec_vmrghw ((__v4sf) __A, (__v4sf)__B);
-}
-
-/* Sets the upper two SPFP values with 64-bits of data loaded from P;
-   the lower two values are passed through from A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadh_pi (__m128 __A, __m64 const *__P)
-{
-  __vector unsigned long long __a = (__vector unsigned long long)__A;
-  __vector unsigned long long __p = vec_splats(*__P);
-  __a [1] = __p [1];
-
-  return (__m128)__a;
-}
-
-/* Stores the upper two SPFP values of A into P.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storeh_pi (__m64 *__P, __m128 __A)
-{
-  __vector unsigned long long __a = (__vector unsigned long long) __A;
-
-  *__P = __a[1];
-}
-
-/* Moves the upper two values of B into the lower two values of A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movehl_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) vec_mergel ((__vector unsigned long long)__B,
-			      (__vector unsigned long long)__A);
-}
-
-/* Moves the lower two values of B into the upper two values of A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movelh_ps (__m128 __A, __m128 __B)
-{
-  return (__m128) vec_mergeh ((__vector unsigned long long)__A,
-			      (__vector unsigned long long)__B);
-}
-
-/* Sets the lower two SPFP values with 64-bits of data loaded from P;
-   the upper two values are passed through from A.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_pi (__m128 __A, __m64 const *__P)
-{
-  __vector unsigned long long __a = (__vector unsigned long long)__A;
-  __vector unsigned long long __p = vec_splats(*__P);
-  __a [0] = __p [0];
-
-  return (__m128)__a;
-}
-
-/* Stores the lower two SPFP values of A into P.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_storel_pi (__m64 *__P, __m128 __A)
-{
-  __vector unsigned long long __a = (__vector unsigned long long) __A;
-
-  *__P = __a[0];
-}
-
-#ifdef _ARCH_PWR8
-/* Intrinsic functions that require PowerISA 2.07 minimum.  */
-
-/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_ps (__m128  __A)
-{
-  __vector unsigned long long result;
-  static const __vector unsigned int perm_mask =
-    {
-#ifdef __LITTLE_ENDIAN__
-	0x00204060, 0x80808080, 0x80808080, 0x80808080
-#else
-      0x80808080, 0x80808080, 0x80808080, 0x00204060
-#endif
-    };
-
-  result = ((__vector unsigned long long)
-	    vec_vbpermq ((__vector unsigned char) __A,
-			 (__vector unsigned char) perm_mask));
-
-#ifdef __LITTLE_ENDIAN__
-  return result[1];
-#else
-  return result[0];
-#endif
-}
-#endif /* _ARCH_PWR8 */
-
-/* Create a vector with all four elements equal to *P.  */
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load1_ps (float const *__P)
-{
-  return _mm_set1_ps (*__P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_load_ps1 (float const *__P)
-{
-  return _mm_load1_ps (__P);
-}
-
-/* Extracts one of the four words of A.  The selector N must be immediate.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_extract_pi16 (__m64 const __A, int const __N)
-{
-  unsigned int shiftr = __N & 3;
-#ifdef __BIG_ENDIAN__
-  shiftr = 3 - shiftr;
-#endif
-
-  return ((__A >> (shiftr * 16)) & 0xffff);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pextrw (__m64 const __A, int const __N)
-{
-  return _mm_extract_pi16 (__A, __N);
-}
-
-/* Inserts word D into one of four words of A.  The selector N must be
-   immediate.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
-{
-  const int shiftl = (__N & 3) * 16;
-  const __m64 shiftD = (const __m64) __D << shiftl;
-  const __m64 mask = 0xffffUL << shiftl;
-  __m64 result = (__A & (~mask)) | (shiftD & mask);
-
-  return (result);
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pinsrw (__m64 const __A, int const __D, int const __N)
-{
-  return _mm_insert_pi16 (__A, __D, __N);
-}
-
-/* Compute the element-wise maximum of signed 16-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
-_mm_max_pi16 (__m64 __A, __m64 __B)
-{
-#if _ARCH_PWR8
-  __vector signed short a, b, r;
-  __vector __bool short c;
-
-  a = (__vector signed short)vec_splats (__A);
-  b = (__vector signed short)vec_splats (__B);
-  c = (__vector __bool short)vec_cmpgt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
-#else
-  __m64_union m1, m2, res;
-
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
-
-  res.as_short[0] =
-      (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
-  res.as_short[1] =
-      (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
-  res.as_short[2] =
-      (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
-  res.as_short[3] =
-      (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
-
-  return (__m64) res.as_m64;
-#endif
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmaxsw (__m64 __A, __m64 __B)
-{
-  return _mm_max_pi16 (__A, __B);
-}
-
-/* Compute the element-wise maximum of unsigned 8-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_max_pu8 (__m64 __A, __m64 __B)
-{
-#if _ARCH_PWR8
-  __vector unsigned char a, b, r;
-  __vector __bool char c;
-
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector __bool char)vec_cmpgt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
-#else
-  __m64_union m1, m2, res;
-  long i;
-
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
-
-
-  for (i = 0; i < 8; i++)
-  res.as_char[i] =
-      ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ?
-	  m1.as_char[i] : m2.as_char[i];
-
-  return (__m64) res.as_m64;
-#endif
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmaxub (__m64 __A, __m64 __B)
-{
-  return _mm_max_pu8 (__A, __B);
-}
-
-/* Compute the element-wise minimum of signed 16-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_pi16 (__m64 __A, __m64 __B)
-{
-#if _ARCH_PWR8
-  __vector signed short a, b, r;
-  __vector __bool short c;
-
-  a = (__vector signed short)vec_splats (__A);
-  b = (__vector signed short)vec_splats (__B);
-  c = (__vector __bool short)vec_cmplt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
-#else
-  __m64_union m1, m2, res;
-
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
-
-  res.as_short[0] =
-      (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
-  res.as_short[1] =
-      (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
-  res.as_short[2] =
-      (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
-  res.as_short[3] =
-      (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
-
-  return (__m64) res.as_m64;
-#endif
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pminsw (__m64 __A, __m64 __B)
-{
-  return _mm_min_pi16 (__A, __B);
-}
-
-/* Compute the element-wise minimum of unsigned 8-bit values.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_min_pu8 (__m64 __A, __m64 __B)
-{
-#if _ARCH_PWR8
-  __vector unsigned char a, b, r;
-  __vector __bool char c;
-
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = (__vector __bool char)vec_cmplt (a, b);
-  r = vec_sel (b, a, c);
-  return (__m64) ((__vector long long) r)[0];
-#else
-  __m64_union m1, m2, res;
-  long i;
-
-  m1.as_m64 = __A;
-  m2.as_m64 = __B;
-
-
-  for (i = 0; i < 8; i++)
-  res.as_char[i] =
-      ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ?
-	  m1.as_char[i] : m2.as_char[i];
-
-  return (__m64) res.as_m64;
-#endif
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pminub (__m64 __A, __m64 __B)
-{
-  return _mm_min_pu8 (__A, __B);
-}
-
-/* Create an 8-bit mask of the signs of 8-bit values.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_movemask_pi8 (__m64 __A)
-{
-  unsigned long long p =
-#ifdef __LITTLE_ENDIAN__
-                         0x0008101820283038UL; // permute control for sign bits
-#else
-                         0x3830282018100800UL; // permute control for sign bits
-#endif
-  return __builtin_bpermd (p, __A);
-}
-
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmovmskb (__m64 __A)
-{
-  return _mm_movemask_pi8 (__A);
-}
-
-/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
-   in B and produce the high 16 bits of the 32-bit results.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-{
-  __vector unsigned short a, b;
-  __vector unsigned short c;
-  __vector unsigned int w0, w1;
-  __vector unsigned char xform1 = {
-#ifdef __LITTLE_ENDIAN__
-      0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
-      0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
-#else
-      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15,
-      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15
-#endif
-    };
-
-  a = (__vector unsigned short)vec_splats (__A);
-  b = (__vector unsigned short)vec_splats (__B);
-
-  w0 = vec_vmuleuh (a, b);
-  w1 = vec_vmulouh (a, b);
-  c = (__vector unsigned short)vec_perm (w0, w1, xform1);
-
-  return (__m64) ((__vector long long) c)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pmulhuw (__m64 __A, __m64 __B)
-{
-  return _mm_mulhi_pu16 (__A, __B);
-}
-
-/* Return a combination of the four 16-bit values in A.  The selector
-   must be an immediate.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_shuffle_pi16 (__m64 __A, int const __N)
-{
-  unsigned long element_selector_10 = __N & 0x03;
-  unsigned long element_selector_32 = (__N >> 2) & 0x03;
-  unsigned long element_selector_54 = (__N >> 4) & 0x03;
-  unsigned long element_selector_76 = (__N >> 6) & 0x03;
-  static const unsigned short permute_selectors[4] =
-    {
-#ifdef __LITTLE_ENDIAN__
-	      0x0908, 0x0B0A, 0x0D0C, 0x0F0E
-#else
-	      0x0607, 0x0405, 0x0203, 0x0001
-#endif
-    };
-  __m64_union t;
-  __vector unsigned long long a, p, r;
-
-#ifdef __LITTLE_ENDIAN__
-  t.as_short[0] = permute_selectors[element_selector_10];
-  t.as_short[1] = permute_selectors[element_selector_32];
-  t.as_short[2] = permute_selectors[element_selector_54];
-  t.as_short[3] = permute_selectors[element_selector_76];
-#else
-  t.as_short[3] = permute_selectors[element_selector_10];
-  t.as_short[2] = permute_selectors[element_selector_32];
-  t.as_short[1] = permute_selectors[element_selector_54];
-  t.as_short[0] = permute_selectors[element_selector_76];
-#endif
-  p = vec_splats (t.as_m64);
-  a = vec_splats (__A);
-  r = vec_perm (a, a, (__vector unsigned char)p);
-  return (__m64) ((__vector long long) r)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pshufw (__m64 __A, int const __N)
-{
-  return _mm_shuffle_pi16 (__A, __N);
-}
-
-/* Conditionally store byte elements of A into P.  The high bit of each
-   byte in the selector N determines whether the corresponding byte from
-   A is stored.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
-{
-  __m64 hibit = 0x8080808080808080UL;
-  __m64 mask, tmp;
-  __m64 *p = (__m64*)__P;
-
-  tmp = *p;
-  mask = _mm_cmpeq_pi8 ((__N & hibit), hibit);
-  tmp = (tmp & (~mask)) | (__A & mask);
-  *p = tmp;
-}
-
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_maskmovq (__m64 __A, __m64 __N, char *__P)
-{
-  _mm_maskmove_si64 (__A, __N, __P);
-}
-
-/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_pu8 (__m64 __A, __m64 __B)
-{
-  __vector unsigned char a, b, c;
-
-  a = (__vector unsigned char)vec_splats (__A);
-  b = (__vector unsigned char)vec_splats (__B);
-  c = vec_avg (a, b);
-  return (__m64) ((__vector long long) c)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pavgb (__m64 __A, __m64 __B)
-{
-  return _mm_avg_pu8 (__A, __B);
-}
-
-/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_avg_pu16 (__m64 __A, __m64 __B)
-{
-  __vector unsigned short a, b, c;
-
-  a = (__vector unsigned short)vec_splats (__A);
-  b = (__vector unsigned short)vec_splats (__B);
-  c = vec_avg (a, b);
-  return (__m64) ((__vector long long) c)[0];
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_pavgw (__m64 __A, __m64 __B)
-{
-  return _mm_avg_pu16 (__A, __B);
-}
-
-/* Compute the sum of the absolute differences of the unsigned 8-bit
-   values in A and B.  Return the value in the lower 16-bit word; the
-   upper words are cleared.  */
-extern __inline    __m64    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sad_pu8 (__m64  __A, __m64  __B)
-{
-  __vector unsigned char a, b;
-  __vector unsigned char vmin, vmax, vabsdiff;
-  __vector signed int vsum;
-  const __vector unsigned int zero =
-    { 0, 0, 0, 0 };
-  __m64_union result = {0};
-
-  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
-  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
-  vmin = vec_min (a, b);
-  vmax = vec_max (a, b);
-  vabsdiff = vec_sub (vmax, vmin);
-  /* Sum four groups of bytes into integers.  */
-  vsum = (__vector signed int) vec_sum4s (vabsdiff, zero);
-  /* Sum across four integers with integer result.  */
-  vsum = vec_sums (vsum, (__vector signed int) zero);
-  /* The sum is in the right most 32-bits of the vector result.
-     Transfer to a GPR and truncate to 16 bits.  */
-  result.as_short[0] = vsum[3];
-  return result.as_m64;
-}
-
-extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_m_psadbw (__m64 __A, __m64 __B)
-{
-  return _mm_sad_pu8 (__A, __B);
-}
-
-/* Stores the data in A to the address P without polluting the caches.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_pi (__m64 *__P, __m64 __A)
-{
-  /* Use the data cache block touch for store transient.  */
-  __asm__ (
-    "	dcbtstt	0,%0"
-    :
-    : "b" (__P)
-    : "memory"
-  );
-  *__P = __A;
-}
-
-/* Likewise.  The address must be 16-byte aligned.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_stream_ps (float *__P, __m128 __A)
-{
-  /* Use the data cache block touch for store transient.  */
-  __asm__ (
-    "	dcbtstt	0,%0"
-    :
-    : "b" (__P)
-    : "memory"
-  );
-  _mm_store_ps (__P, __A);
-}
-
-/* Guarantees that every preceding store is globally visible before
-   any subsequent store.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sfence (void)
-{
-  /* Generate a light weight sync.  */
-  __atomic_thread_fence (__ATOMIC_RELEASE);
-}
-
-/* The execution of the next instruction is delayed by an implementation
-   specific amount of time.  The instruction does not modify the
-   architectural state.  This is after the pop_options pragma because
-   it does not require SSE support in the processor--the encoding is a
-   nop on processors that do not support it.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_pause (void)
-{
-  /* There is no exact match with this construct, but the following is
-     close to the desired effect.  */
-#if _ARCH_PWR8
-  /* On power8 and later processors we can depend on Program Priority
-     (PRI) and associated "very low" PPI setting.  Since we don't know
-     what PPI this thread is running at we: 1) save the current PRI
-     from the PPR SPR into a local GRP, 2) set the PRI to "very low*
-     via the special or 31,31,31 encoding. 3) issue an "isync" to
-     insure the PRI change takes effect before we execute any more
-     instructions.
-     Now we can execute a lwsync (release barrier) while we execute
-     this thread at "very low" PRI.  Finally we restore the original
-     PRI and continue execution.  */
-  unsigned long __PPR;
-
-  __asm__ volatile (
-    "	mfppr	%0;"
-    "   or 31,31,31;"
-    "   isync;"
-    "   lwsync;"
-    "   isync;"
-    "   mtppr	%0;"
-    : "=r" (__PPR)
-    :
-    : "memory"
-  );
-#else
-  /* For older processor where we may not even have Program Priority
-     controls we can only depend on Heavy Weight Sync.  */
-  __atomic_thread_fence (__ATOMIC_SEQ_CST);
-#endif
-}
-
-/* Transpose the 4x4 matrix composed of row[0-3].  */
-#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
-do {									\
-  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
-  __v4sf __t0 = vec_vmrghw (__r0, __r1);			\
-  __v4sf __t1 = vec_vmrghw (__r2, __r3);			\
-  __v4sf __t2 = vec_vmrglw (__r0, __r1);			\
-  __v4sf __t3 = vec_vmrglw (__r2, __r3);			\
-  (row0) = (__v4sf)vec_mergeh ((__vector long long)__t0, 	\
-			       (__vector long long)__t1);	\
-  (row1) = (__v4sf)vec_mergel ((__vector long long)__t0,	\
-			       (__vector long long)__t1);	\
-  (row2) = (__v4sf)vec_mergeh ((__vector long long)__t2,	\
-			       (__vector long long)__t3);	\
-  (row3) = (__v4sf)vec_mergel ((__vector long long)__t2,	\
-			       (__vector long long)__t3);	\
-} while (0)
-
-/* For backward source compatibility.  */
-//# include <emmintrin.h>
-
-#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/clang/test/CodeGen/ppc-mm-malloc.c b/clang/test/CodeGen/ppc-mm-malloc.c
deleted file mode 100644
index e0a20f81ee7d7..0000000000000
--- a/clang/test/CodeGen/ppc-mm-malloc.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// REQUIRES: powerpc-registered-target
-
-// The stdlib.h included in mm_malloc.h references native system header
-// like: bits/libc-header-start.h or features.h, cross-compile it may
-// require installing target headers in build env, otherwise expecting
-// failures. So this test will focus on native build only.
-
-// RUN: %clang -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
-
-#include <mm_malloc.h>
-
-void __attribute__((noinline))
-test_mm_malloc() {
-  char *buf = _mm_malloc(100, 16);
-  _mm_free(buf);
-}
-
-// CHECK-LABEL: @test_mm_malloc
-
-// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
-// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
-// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
-// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
-// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG11]]:
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
-// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG16]]:
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
-// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
-// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG12]]:
-// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
-// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG23]]:
-// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
-// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
-// CHECK: [[REG24]]:
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
-// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
-// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
-// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG31]]:
-// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
-// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
-// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG32]]:
-// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
-// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG19]]:
-// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
-// CHECK-NEXT: ret i8* [[REG34]]
-
-// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
-// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
-// CHECK-NEXT: call void @free(i8* [[REG37]])
-// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/ppc-mmintrin.c b/clang/test/CodeGen/ppc-mmintrin.c
index 65a44570c0264..212a387ec35b8 100644
--- a/clang/test/CodeGen/ppc-mmintrin.c
+++ b/clang/test/CodeGen/ppc-mmintrin.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
 // RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c
deleted file mode 100644
index d2d0334a09446..0000000000000
--- a/clang/test/CodeGen/ppc-xmmintrin.c
+++ /dev/null
@@ -1,2090 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// REQUIRES: powerpc-registered-target
-
-// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
-// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-BE
-// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
-// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-LE
-
-#include <xmmintrin.h>
-
-__m128 res, m1, m2;
-__m64 res64, ms[2];
-float fs[4];
-int i, i2;
-long long i64;
-
-// CHECK-LE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
-// CHECK-BE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2
-
-// CHECK-LE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
-// CHECK-BE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
-
-void __attribute__((noinline))
-test_add() {
-  res = _mm_add_ps(m1, m2);
-  res = _mm_add_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_add
-
-// CHECK: define available_externally <4 x float> @_mm_add_ps(<4 x float> [[REG1:[0-9a-zA-Z_%.]+]], <4 x float> [[REG2:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1]], <4 x float>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG2]], <4 x float>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG3]], align 16
-// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG4]], align 16
-// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG5]], [[REG6]]
-// CHECK-NEXT: ret <4 x float> [[REG7]]
-
-// CHECK: define available_externally <4 x float> @_mm_add_ss(<4 x float> [[REG8:[0-9a-zA-Z_%.]+]], <4 x float> [[REG9:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG8]], <4 x float>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG9]], <4 x float>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG12]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG11]], align 16
-// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG15]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG16]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
-// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
-// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG18]], [[REG19]]
-// CHECK-NEXT: store <4 x float> [[REG20]], <4 x float>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
-// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG21]], align 16
-// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG22]], <4 x float> [[REG23]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG24]]
-
-void __attribute__((noinline))
-test_avg() {
-  res64 = _mm_avg_pu16(ms[0], ms[1]);
-  res64 = _mm_avg_pu8(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_avg
-
-// CHECK: define available_externally i64 @_mm_avg_pu16
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG25]])
-// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG28]])
-// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG29]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG30]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG31]], <8 x i16> [[REG32]])
-// CHECK-NEXT: store <8 x i16> [[REG33]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG34]] to <2 x i64>
-// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG35]], i32 0
-// CHECK-NEXT: ret i64 [[REG36]]
-
-// CHECK: define available_externally i64 @_mm_avg_pu8
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG37]])
-// CHECK-NEXT: [[REG39:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG38]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG39]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG40]])
-// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG41]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG42]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG43]], <16 x i8> [[REG44]])
-// CHECK-NEXT: store <16 x i8> [[REG45]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG46]] to <2 x i64>
-// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG47]], i32 0
-// CHECK-NEXT: ret i64 [[REG48]]
-
-void __attribute__((noinline))
-test_alt_name_avg() {
-  res64 = _m_pavgw(ms[0], ms[1]);
-  res64 = _m_pavgb(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_alt_name_avg
-
-// CHECK: define available_externally i64 @_m_pavgw
-// CHECK: [[REG49:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu16
-// CHECK-NEXT: ret i64 [[REG49]]
-
-// CHECK: define available_externally i64 @_m_pavgb
-// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu8
-// CHECK-NEXT: ret i64 [[REG50]]
-
-void __attribute__((noinline))
-test_cmp() {
-  res = _mm_cmpeq_ps(m1, m2);
-  res = _mm_cmpeq_ss(m1, m2);
-  res = _mm_cmpge_ps(m1, m2);
-  res = _mm_cmpge_ss(m1, m2);
-  res = _mm_cmpgt_ps(m1, m2);
-  res = _mm_cmpgt_ss(m1, m2);
-  res = _mm_cmple_ps(m1, m2);
-  res = _mm_cmple_ss(m1, m2);
-  res = _mm_cmplt_ps(m1, m2);
-  res = _mm_cmplt_ss(m1, m2);
-  res = _mm_cmpneq_ps(m1, m2);
-  res = _mm_cmpneq_ss(m1, m2);
-  res = _mm_cmpnge_ps(m1, m2);
-  res = _mm_cmpnge_ss(m1, m2);
-  res = _mm_cmpngt_ps(m1, m2);
-  res = _mm_cmpngt_ss(m1, m2);
-  res = _mm_cmpnle_ps(m1, m2);
-  res = _mm_cmpnle_ss(m1, m2);
-  res = _mm_cmpnlt_ps(m1, m2);
-  res = _mm_cmpnlt_ss(m1, m2);
-  res = _mm_cmpord_ps(m1, m2);
-  res = _mm_cmpord_ss(m1, m2);
-  res = _mm_cmpunord_ps(m1, m2);
-  res = _mm_cmpunord_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_cmp
-
-// CHECK: define available_externally <4 x float> @_mm_cmpeq_ps
-// CHECK: [[REG51:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG51]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG52]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpeq_ss
-// CHECK: [[REG53:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG53]], <4 x float>* [[REG54:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG55]], <4 x float>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG54]], align 16
-// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG56]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG57]], <4 x float> [[REG58]])
-// CHECK: [[REG59:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG59]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpge_ps
-// CHECK: [[REG60:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG60]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG61]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpge_ss
-// CHECK: [[REG62:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG62]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG64]], <4 x float>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
-// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG65]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG66]], <4 x float> [[REG67]])
-// CHECK: [[REG68:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG68]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpgt_ps
-// CHECK: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG69]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG70]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpgt_ss
-// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG71]], <4 x float>* [[REG72:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG73]], <4 x float>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG72]], align 16
-// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG74]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG75]], <4 x float> [[REG76]])
-// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG77]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmple_ps
-// CHECK: [[REG78:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG78]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG79]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmple_ss
-// CHECK: [[REG80:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG80]], <4 x float>* [[REG81:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG82:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG82]], <4 x float>* [[REG83:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG81]], align 16
-// CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG83]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG84]], <4 x float> [[REG85]])
-// CHECK: [[REG86:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG86]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmplt_ps
-// CHECK: [[REG87:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG87]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG88]]
-
-// CHECK: @_mm_cmplt_ss
-// CHECK: [[REG89:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG89]], <4 x float>* [[REG90:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG90]], align 16
-// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG93]], <4 x float> [[REG94]])
-// CHECK: [[REG95:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG95]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpneq_ps
-// CHECK: [[REG96:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG96]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG97]], <4 x float>* [[REG98:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
-// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
-// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_nor(float vector[4], float vector[4])(<4 x float> [[REG99]], <4 x float> [[REG100]])
-// CHECK-NEXT: ret <4 x float> [[REG101]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpneq_ss
-// CHECK: [[REG102:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG102]], <4 x float>* [[REG103:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG104:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG104]], <4 x float>* [[REG105:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG103]], align 16
-// CHECK-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG105]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG106]], <4 x float> [[REG107]])
-// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
-// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG108]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnge_ps
-// CHECK: [[REG109:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG109]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG110]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnge_ss
-// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG111]], <4 x float>* [[REG112:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG113:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG113]], <4 x float>* [[REG114:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG112]], align 16
-// CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG114]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG115]], <4 x float> [[REG116]])
-// CHECK: [[REG117:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG117]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpngt_ps
-// CHECK: [[REG118:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG118]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG119]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpngt_ss
-// CHECK: [[REG120:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG120]], <4 x float>* [[REG121:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG122:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG122]], <4 x float>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG121]], align 16
-// CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG123]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG124]], <4 x float> [[REG125]])
-// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG126]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnle_ps
-// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG127]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG128]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnle_ss
-// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG129]], <4 x float>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG131]], <4 x float>* [[REG132:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG130]], align 16
-// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG132]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG133]], <4 x float> [[REG134]])
-// CHECK: [[REG135:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG135]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ps
-// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
-// CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG136]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG137]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ss
-// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG138]], <4 x float>* [[REG139:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG140]], <4 x float>* [[REG141:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG139]], align 16
-// CHECK-NEXT: [[REG143:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG141]], align 16
-// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG142]], <4 x float> [[REG143]])
-// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG144]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpord_ps
-// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG145]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG146]], <4 x i32>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG148]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG149]], <4 x i32>* [[REG150:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG147]], align 16
-// CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG151]])
-// CHECK-NEXT: store <4 x i32> [[REG152]], <4 x i32>* [[REG153:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG150]], align 16
-// CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG154]])
-// CHECK-NEXT: store <4 x i32> [[REG155]], <4 x i32>* [[REG156:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG153]], align 16
-// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG156]], align 16
-// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> {{[0-9a-zA-Z_%.]+}}, <4 x i32> {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG159]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG160]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpord_ss
-// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
-// CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG161]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG162]], <4 x i32>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG164:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
-// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG164]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG165]], <4 x i32>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG163]], align 16
-// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG167]])
-// CHECK-NEXT: store <4 x i32> [[REG168]], <4 x i32>* [[REG161:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG166]], align 16
-// CHECK-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG169]])
-// CHECK-NEXT: store <4 x i32> [[REG170]], <4 x i32>* [[REG171:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
-// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG171]], align 16
-// CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG172]], <4 x i32> [[REG173]])
-// CHECK-NEXT: store <4 x i32> [[REG174]], <4 x i32>* [[REG161]], align 16
-// CHECK: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
-// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG175]] to <4 x float>
-// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> [[REG176]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG177]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpunord_ps
-// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG178]])
-// CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG179]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG180]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG181]])
-// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG182]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG183]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG184]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG185]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG186]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG187]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG188:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG189:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG188]], <4 x i32> [[REG189]])
-// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG190]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG191]]
-
-// CHECK: define available_externally <4 x float> @_mm_cmpunord_ss
-// CHECK: [[REG192:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG192]])
-// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG193]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG194]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG195]])
-// CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG196]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG197]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG198]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG199]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG200:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG200]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK-NEXT: store <4 x i32> [[REG201]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG204:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG202]], <4 x i32> [[REG203]])
-// CHECK-NEXT: store <4 x i32> [[REG204]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG206]] to <4 x float>
-// CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG205]], <4 x float> [[REG207]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG208]]
-
-void __attribute__((noinline))
-test_comi() {
-  i = _mm_comieq_ss(m1, m2);
-  i = _mm_comige_ss(m1, m2);
-  i = _mm_comigt_ss(m1, m2);
-  i = _mm_comile_ss(m1, m2);
-  i = _mm_comilt_ss(m1, m2);
-  i = _mm_comineq_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_comi
-
-// CHECK: define available_externally signext i32 @_mm_comieq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG209]], i32 0
-// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG211]], i32 0
-// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG210]], [[REG212]]
-// CHECK-NEXT: [[REG214:[0-9a-zA-Z_%.]+]] = zext i1 [[REG213]] to i32
-// CHECK-NEXT: ret i32 [[REG214]]
-
-// CHECK: define available_externally signext i32 @_mm_comige_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG216:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG215]], i32 0
-// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG217]], i32 0
-// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG216]], [[REG218]]
-// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = zext i1 [[REG219]] to i32
-// CHECK-NEXT: ret i32 [[REG220]]
-
-// CHECK: define available_externally signext i32 @_mm_comigt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG222:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG221]], i32 0
-// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG223]], i32 0
-// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG222]], [[REG224]]
-// CHECK-NEXT: [[REG226:[0-9a-zA-Z_%.]+]] = zext i1 [[REG225]] to i32
-// CHECK-NEXT: ret i32 [[REG226]]
-
-// CHECK: define available_externally signext i32 @_mm_comile_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG227]], i32 0
-// CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG229]], i32 0
-// CHECK-NEXT: [[REG231:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG228]], [[REG230]]
-// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = zext i1 [[REG231]] to i32
-// CHECK-NEXT: ret i32 [[REG232]]
-
-// CHECK: define available_externally signext i32 @_mm_comilt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG233]], i32 0
-// CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG235]], i32 0
-// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG234]], [[REG236]]
-// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = zext i1 [[REG237]] to i32
-// CHECK-NEXT: ret i32 [[REG238]]
-
-// CHECK: define available_externally signext i32 @_mm_comineq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG239]], i32 0
-// CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG241]], i32 0
-// CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG240]], [[REG242]]
-// CHECK-NEXT: [[REG244:[0-9a-zA-Z_%.]+]] = zext i1 [[REG243]] to i32
-// CHECK-NEXT: ret i32 [[REG244]]
-
-void __attribute__((noinline))
-test_convert() {
-  res = _mm_cvt_pi2ps(m1, ms[1]);
-  res64 = _mm_cvt_ps2pi(m1);
-  res = _mm_cvt_si2ss(m1, i);
-  i = _mm_cvt_ss2si(m1);
-  res = _mm_cvtpi16_ps(ms[0]);
-  res = _mm_cvtpi32_ps(m1, ms[1]);
-  res = _mm_cvtpi32x2_ps(ms[0], ms[1]);
-  res = _mm_cvtpi8_ps(ms[0]);
-  res64 = _mm_cvtps_pi16(m1);
-  res64 = _mm_cvtps_pi32(m1);
-  res64 = _mm_cvtps_pi8(m1);
-  res = _mm_cvtpu16_ps(ms[0]);
-  res = _mm_cvtpu8_ps(ms[0]);
-  res = _mm_cvtsi32_ss(m1, i);
-  res = _mm_cvtsi64_ss(m1, i64);
-  fs[0] = _mm_cvtss_f32(m1);
-  i = _mm_cvtss_si32(m1);
-  i64 = _mm_cvtss_si64(m1);
-  res64 = _mm_cvtt_ps2pi(m1);
-  i = _mm_cvtt_ss2si(m1);
-  res64 = _mm_cvttps_pi32(m1);
-  i = _mm_cvttss_si32(m1);
-  i64 = _mm_cvttss_si64(m1);
-}
-
-// CHECK-LABEL: @test_convert
-
-// CHECK: define available_externally <4 x float> @_mm_cvt_pi2ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtpi32_ps(<4 x float> [[REG245]], i64 [[REG246]])
-// CHECK-NEXT: ret <4 x float> [[REG247]]
-
-// CHECK: define available_externally i64 @_mm_cvt_ps2pi
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtps_pi32(<4 x float> [[REG248]])
-// CHECK-NEXT: ret i64 [[REG249]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvt_si2ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtsi32_ss(<4 x float> [[REG250]], i32 signext [[REG251]])
-// CHECK-NEXT: ret <4 x float> [[REG252]]
-
-// CHECK: define available_externally signext i32 @_mm_cvt_ss2si
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtss_si32(<4 x float> [[REG253]])
-// CHECK-NEXT: ret i32 [[REG254]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi16_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG255]], i32 0
-// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG256]], i64 [[REG257]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG258]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG259]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG260]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupklsh(short vector[8])(<8 x i16> [[REG261]])
-// CHECK-NEXT: store <4 x i32> [[REG262]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG263]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG264]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG265]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi32_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
-// CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG270]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG272]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG273]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG274]] to <2 x i64>
-// CHECK-NEXT: [[REG276:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG275]], i32 0
-// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG276]], i32 0
-// CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG278]] to <2 x i64>
-// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG279]], i32 1
-// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG277]], i64 [[REG280]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG281]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG283:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG282]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG283]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi32x2_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG284]], i32 0
-// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG287:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG285]], i64 [[REG286]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG287]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG288]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG289]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG290]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG291]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG292]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpi8_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG293]], i32 0
-// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG294]], i64 [[REG295]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG296]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG298]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG300:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_vupkhsb(signed char vector[16])(<16 x i8> [[REG299]])
-// CHECK-NEXT: store <8 x i16> [[REG300]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupkhsh(short vector[8])(<8 x i16> [[REG301]])
-// CHECK-NEXT: store <4 x i32> [[REG302]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG303]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG304]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG305]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi16
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG307:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG306]])
-// CHECK-NEXT: store <4 x float> [[REG307]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG308]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG309]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG310]], <4 x i32> [[REG311]])
-// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG312]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG313]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG314]], i32 0
-// CHECK-NEXT: ret i64 [[REG315]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG316:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG316]] to <2 x i64>
-// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG317]], i32 zeroext 0)
-// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG318]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG319]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG320]])
-// CHECK-NEXT: store <4 x float> [[REG321]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG322:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG322]], i32 0)
-// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG323]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG324]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG325]], i32 0
-// CHECK-NEXT: ret i64 [[REG326]]
-
-// CHECK: define available_externally i64 @_mm_cvtps_pi8
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG327]])
-// CHECK-NEXT: store <4 x float> [[REG328]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG329]], i32 0)
-// CHECK-NEXT: store <4 x i32> [[REG330]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG331]], <4 x i32> zeroinitializer)
-// CHECK-NEXT: store <8 x i16> [[REG332]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(short vector[8], short vector[8])(<8 x i16> [[REG333]], <8 x i16> [[REG334]])
-// CHECK-NEXT: store <16 x i8> [[REG335]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG336]] to <2 x i64>
-// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG337]], i32 0
-// CHECK-NEXT: ret i64 [[REG338]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpu16_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG339]], i32 0
-// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG340]], i64 [[REG341]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG342]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG343]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG344]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG345]], <8 x i16> zeroinitializer)
-// CHECK-BE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG345]])
-// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG346]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG347]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG348]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG349]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG350]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtpu8_ps
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG351]], i32 0
-// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG354:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG352]], i64 [[REG353]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG354]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG355]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG356]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG357]], <16 x i8> zeroinitializer)
-// CHECK-BE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> zeroinitializer, <16 x i8> [[REG357]])
-// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG358]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG359]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG360]], <8 x i16> zeroinitializer)
-// CHECK-BE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG360]])
-// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG361]] to <4 x i32>
-// CHECK-NEXT: store <4 x i32> [[REG362]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG363]], i32 0)
-// CHECK-NEXT: store <4 x float> [[REG364]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG365]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtsi32_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG366]] to float
-// CHECK-NEXT: store float [[REG367]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG369]], float [[REG368]], i32 0
-// CHECK-NEXT: store <4 x float> [[REG370]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG371]]
-
-// CHECK: define available_externally <4 x float> @_mm_cvtsi64_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG372]] to float
-// CHECK-NEXT: store float [[REG373]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG375]], float [[REG374]], i32 0
-// CHECK-NEXT: store <4 x float> [[REG376]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG377]]
-
-// CHECK: define available_externally float @_mm_cvtss_f32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG378]], i32 0
-// CHECK-NEXT: ret float [[REG379]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtss_si32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
-// CHECK-BE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
-// CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 0
-// CHECK-NEXT: [[REG383:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 1
-// CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 2
-// CHECK-NEXT: store <4 x float> [[REG382]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 [[REG383]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store double [[REG384]], double* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG385:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG386:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG385]] to i32
-// CHECK-NEXT: ret i32 [[REG386]]
-
-// CHECK: define available_externally i64 @_mm_cvtss_si64
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
-// CHECK-BE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
-// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 0
-// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 1
-// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 2
-// CHECK-NEXT: store <4 x float> [[REG389]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store i64 [[REG390]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store double [[REG391]], double* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG392]]
-
-// CHECK: define available_externally i64 @_mm_cvtt_ps2pi
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK: [[REG393:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttps_pi32(<4 x float> [[REG393]])
-// CHECK-NEXT: ret i64 [[REG394]]
-
-// CHECK: define available_externally signext i32 @_mm_cvtt_ss2si
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvttss_si32(<4 x float> [[REG395]])
-// CHECK-NEXT: ret i32 [[REG396]]
-
-// CHECK: define available_externally i64 @_mm_cvttps_pi32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG397]] to <2 x i64>
-// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG398]], i32 zeroext 0)
-// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <4 x float>
-// CHECK-NEXT: store <4 x float> [[REG400]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG401]], i32 0)
-// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG402]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG403]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG404]], i32 0
-// CHECK-NEXT: ret i64 [[REG405]]
-
-// CHECK: define available_externally signext i32 @_mm_cvttss_si32
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG407:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG406]], i32 0
-// CHECK-NEXT: store float [[REG407]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = fptosi float [[REG408]] to i32
-// CHECK-NEXT: ret i32 [[REG409]]
-
-// CHECK: define available_externally i64 @_mm_cvttss_si64
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG410]], i32 0
-// CHECK-NEXT: store float [[REG411]], float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = fptosi float [[REG412]] to i64
-// CHECK-NEXT: ret i64 [[REG413]]
-
-void __attribute__((noinline))
-test_div() {
-  res = _mm_div_ps(m1, m2);
-  res = _mm_div_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_div
-
-// CHECK: define available_externally <4 x float> @_mm_div_ps(<4 x float> [[REG414:[0-9a-zA-Z_%.]+]], <4 x float> [[REG415:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG414]], <4 x float>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG415]], <4 x float>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG416]], align 16
-// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG417]], align 16
-// CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG418]], [[REG419]]
-// CHECK-NEXT: ret <4 x float> [[REG420]]
-
-// CHECK: define available_externally <4 x float> @_mm_div_ss(<4 x float> [[REG421:[0-9a-zA-Z_%.]+]], <4 x float> [[REG422:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG421]], <4 x float>* [[REG423:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG422]], <4 x float>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
-// CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG425]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG426]], <4 x float>* [[REG427:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG424]], align 16
-// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG428]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG429]], <4 x float>* [[REG430:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG431:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG427]], align 16
-// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG430]], align 16
-// CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG431]], [[REG432]]
-// CHECK-NEXT: store <4 x float> [[REG433]], <4 x float>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
-// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG434]], align 16
-// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG435]], <4 x float> [[REG436]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG437]]
-
-void __attribute__((noinline))
-test_extract() {
-  i = _mm_extract_pi16(ms[0], i2);
-  i = _m_pextrw(ms[0], i2);
-}
-
-// CHECK-LABEL: @test_extract
-
-// CHECK: define available_externally signext i32 @_mm_extract_pi16
-// CHECK: [[REG438:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG439:[0-9a-zA-Z_%.]+]] = and i32 [[REG438]], 3
-// CHECK-NEXT: store i32 [[REG439]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-BE: sub i32 3, {{[0-9a-zA-Z_%.]+}}
-// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = mul i32 [[REG441]], 16
-// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = zext i32 [[REG442]] to i64
-// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = lshr i64 [[REG440]], [[REG443]]
-// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = and i64 [[REG444]], 65535
-// CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG445]] to i32
-// CHECK-NEXT: ret i32 [[REG446]]
-
-// CHECK: define available_externally signext i32 @_m_pextrw
-// CHECK: [[REG447:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_extract_pi16
-// CHECK-NEXT: ret i32 [[REG447]]
-
-void __attribute__((noinline))
-test_insert() {
-  res64 = _mm_insert_pi16(ms[0], i, i2);
-  res64 = _m_pinsrw(ms[0], i, i2);
-}
-
-// CHECK-LABEL: @test_insert
-
-// CHECK: define available_externally i64 @_mm_insert_pi16
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = and i32 [[REG448]], 3
-// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG449]], 16
-// CHECK-NEXT: store i32 [[REG450]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = sext i32 [[REG451]] to i64
-// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = zext i32 [[REG453]] to i64
-// CHECK-NEXT: [[REG455:[0-9a-zA-Z_%.]+]] = shl i64 [[REG452]], [[REG454]]
-// CHECK-NEXT: store i64 [[REG455]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = zext i32 [[REG456]] to i64
-// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = shl i64 65535, [[REG457]]
-// CHECK-NEXT: store i64 [[REG458]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG461:[0-9a-zA-Z_%.]+]] = xor i64 [[REG460]], -1
-// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = and i64 [[REG459]], [[REG461]]
-// CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG464:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = and i64 [[REG463]], [[REG464]]
-// CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = or i64 [[REG462]], [[REG465]]
-// CHECK-NEXT: store i64 [[REG466]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: ret i64 [[REG467]]
-
-// CHECK: define available_externally i64 @_m_pinsrw
-// CHECK: [[REG468:[0-9a-zA-Z_%.]+]] = call i64 @_mm_insert_pi16
-// CHECK-NEXT: ret i64 [[REG468]]
-
-void __attribute__((noinline))
-test_load() {
-  res = _mm_load_ps(fs);
-  res = _mm_load_ps1(fs);
-  res = _mm_load_ss(fs);
-  res = _mm_load1_ps(fs);
-  res = _mm_loadh_pi(m1, &ms[0]);
-  res = _mm_loadl_pi(m1, &ms[0]);
-  res = _mm_loadr_ps(fs);
-  res = _mm_loadu_ps(fs);
-}
-
-// CHECK-LABEL: @test_load
-
-// CHECK: define available_externally <4 x float> @_mm_load_ps
-// CHECK: [[REG469:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
-// CHECK-NEXT: ret <4 x float> [[REG469]]
-
-// CHECK: define available_externally <4 x float> @_mm_load_ps1
-// CHECK: [[REG470:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_load1_ps
-// CHECK-NEXT: ret <4 x float> [[REG470]]
-
-// CHECK: define available_externally <4 x float> @_mm_load_ss
-// CHECK: [[REG471:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set_ss
-// CHECK-NEXT: ret <4 x float> [[REG471]]
-
-// CHECK: define available_externally <4 x float> @_mm_load1_ps
-// CHECK: [[REG472:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps
-// CHECK-NEXT: ret <4 x float> [[REG472]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadh_pi
-// CHECK: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: store <2 x i64> [[REG473]], <2 x i64>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG474]], align 16
-// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG475]], i32 1
-// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG477]], i64 [[REG476]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG479]], <2 x i64>* [[REG478]], align 16
-// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478]], align 16
-// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG480]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG481]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadl_pi
-// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
-// CHECK-NEXT: store <2 x i64> [[REG482]], <2 x i64>* [[REG483:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG483]], align 16
-// CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
-// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG488:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG486]], i64 [[REG485]], i32 0
-// CHECK-NEXT: store <2 x i64> [[REG488]], <2 x i64>* [[REG487]], align 16
-// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487]], align 16
-// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG489]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG490]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadr_ps
-// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
-// CHECK-NEXT: store <4 x float> [[REG491]], <4 x float>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
-// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
-// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG493]], <4 x float> [[REG494]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
-// CHECK-NEXT: store <4 x float> [[REG495]], <4 x float>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG496]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG497]]
-
-// CHECK: define available_externally <4 x float> @_mm_loadu_ps
-// CHECK: [[REG498:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vsx_ld(int, float const*)
-// CHECK-NEXT: ret <4 x float> [[REG498]]
-
-void __attribute__((noinline))
-test_logic() {
-  res = _mm_or_ps(m1, m2);
-  res = _mm_and_ps(m1, m2);
-  res = _mm_andnot_ps(m1, m2);
-  res = _mm_xor_ps(m1, m2);
-}
-
-// CHECK-LABEL: @test_logic
-
-// CHECK: define available_externally <4 x float> @_mm_or_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_or(float vector[4], float vector[4])(<4 x float> [[REG499]], <4 x float> [[REG500]])
-// CHECK-NEXT: ret <4 x float> [[REG501]]
-
-// CHECK: define available_externally <4 x float> @_mm_and_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_and(float vector[4], float vector[4])(<4 x float> [[REG502]], <4 x float> [[REG503]])
-// CHECK-NEXT: ret <4 x float> [[REG504]]
-
-// CHECK: define available_externally <4 x float> @_mm_andnot_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_andc(float vector[4], float vector[4])(<4 x float> [[REG505]], <4 x float> [[REG506]])
-// CHECK-NEXT: ret <4 x float> [[REG507]]
-
-// CHECK: define available_externally <4 x float> @_mm_xor_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG508]], <4 x float> [[REG509]])
-// CHECK-NEXT: ret <4 x float> [[REG510]]
-
-void __attribute__((noinline))
-test_max() {
-  res = _mm_max_ps(m1, m2);
-  res = _mm_max_ss(m1, m2);
-  res64 = _mm_max_pi16(ms[0], ms[1]);
-  res64 = _mm_max_pu8(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_max
-
-// CHECK: define available_externally <4 x float> @_mm_max_ps(<4 x float> [[REG511:[0-9a-zA-Z_%.]+]], <4 x float> [[REG512:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG511]], <4 x float>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG512]], <4 x float>* [[REG514:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
-// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
-// CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG515]], <4 x float> [[REG516]])
-// CHECK-NEXT: store <4 x i32> [[REG517]], <4 x i32>* [[REG518:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
-// CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
-// CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG518]], align 16
-// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG519]], <4 x float> [[REG520]], <4 x i32> [[REG521]])
-// CHECK-NEXT: ret <4 x float> [[REG522]]
-
-// CHECK: define available_externally <4 x float> @_mm_max_ss
-// CHECK: [[REG523:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG523]], <4 x float>* [[REG524:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG525:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG525]], <4 x float>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG524]], align 16
-// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG526]], align 16
-// CHECK-NEXT: call <4 x float> @vec_max(float vector[4], float vector[4])(<4 x float> [[REG527]], <4 x float> [[REG528]])
-// CHECK: [[REG529:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG529]]
-
-// CHECK: define available_externally i64 @_mm_max_pi16
-// CHECK: [[REG530:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG530]])
-// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG531]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG532]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG533]])
-// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG534]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG535]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG536]], <8 x i16> [[REG537]])
-// CHECK-NEXT: store <8 x i16> [[REG538]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG540:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG539]], <8 x i16> [[REG540]], <8 x i16> [[REG541]])
-// CHECK-NEXT: store <8 x i16> [[REG542]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG543]] to <2 x i64>
-// CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG544]], i32 0
-// CHECK-NEXT: ret i64 [[REG545]]
-
-// CHECK: define available_externally i64 @_mm_max_pu8
-// CHECK: [[REG546:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG546]])
-// CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG547]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG548]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG549]])
-// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG551]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG552]], <16 x i8> [[REG553]])
-// CHECK-NEXT: store <16 x i8> [[REG554]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG555:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG556:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG557:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG555]], <16 x i8> [[REG556]], <16 x i8> [[REG557]])
-// CHECK-NEXT: store <16 x i8> [[REG558]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG559]] to <2 x i64>
-// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG560]], i32 0
-// CHECK-NEXT: ret i64 [[REG561]]
-
-void __attribute__((noinline))
-test_alt_name_max() {
-  res64 = _m_pmaxsw(ms[0], ms[1]);
-  res64 = _m_pmaxub(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_alt_name_max
-
-// CHECK: define available_externally i64 @_m_pmaxsw
-// CHECK: [[REG562:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pi16
-// CHECK-NEXT: ret i64 [[REG562]]
-
-// CHECK: define available_externally i64 @_m_pmaxub
-// CHECK: [[REG563:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pu8
-// CHECK-NEXT: ret i64 [[REG563]]
-
-void __attribute__((noinline))
-test_min() {
-  res = _mm_min_ps(m1, m2);
-  res = _mm_min_ss(m1, m2);
-  res64 = _mm_min_pi16(ms[0], ms[1]);
-  res64 = _mm_min_pu8(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_min
-
-// CHECK: define available_externally <4 x float> @_mm_min_ps(<4 x float> [[REG517:[0-9a-zA-Z_%.]+]], <4 x float> [[REG518:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG517]], <4 x float>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG518]], <4 x float>* [[REG565:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
-// CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
-// CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG566]], <4 x float> [[REG567]])
-// CHECK-NEXT: store <4 x i32> [[REG568]], <4 x i32>* [[REG569:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
-// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
-// CHECK-NEXT: [[REG572:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG569]], align 16
-// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG570]], <4 x float> [[REG571]], <4 x i32> [[REG572]])
-// CHECK-NEXT: ret <4 x float> [[REG573]]
-
-// CHECK: define available_externally <4 x float> @_mm_min_ss
-// CHECK: [[REG574:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG574]], <4 x float>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
-// CHECK: [[REG576:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
-// CHECK-NEXT: store <4 x float> [[REG576]], <4 x float>* [[REG577:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG575]], align 16
-// CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG577]], align 16
-// CHECK-NEXT: call <4 x float> @vec_min(float vector[4], float vector[4])(<4 x float> [[REG578]], <4 x float> [[REG579]])
-// CHECK: [[REG580:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG580]]
-
-// CHECK: define available_externally i64 @_mm_min_pi16
-// CHECK: [[REG581:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG581]])
-// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG582]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG583]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG584]])
-// CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG585]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG586]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG588:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG589:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> [[REG587]], <8 x i16> [[REG588]])
-// CHECK-NEXT: store <8 x i16> [[REG589]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG590]], <8 x i16> [[REG591]], <8 x i16> [[REG592]])
-// CHECK-NEXT: store <8 x i16> [[REG593]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG594:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG594]] to <2 x i64>
-// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG595]], i32 0
-// CHECK-NEXT: ret i64 [[REG596]]
-
-// CHECK: define available_externally i64 @_mm_min_pu8
-// CHECK: [[REG597:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG597]])
-// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG598]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG599]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG600]])
-// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG601]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG602]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG603]], <16 x i8> [[REG604]])
-// CHECK-NEXT: store <16 x i8> [[REG605]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG606]], <16 x i8> [[REG607]], <16 x i8> [[REG608]])
-// CHECK-NEXT: store <16 x i8> [[REG609]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG610]] to <2 x i64>
-// CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG611]], i32 0
-// CHECK-NEXT: ret i64 [[REG612]]
-
-void __attribute__((noinline))
-test_alt_name_min() {
-  res64 = _m_pminsw(ms[0], ms[1]);
-  res64 = _m_pminub(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_alt_name_min
-
-// CHECK: define available_externally i64 @_m_pminsw
-// CHECK: [[REG613:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pi16
-// CHECK-NEXT: ret i64 [[REG613]]
-
-// CHECK: define available_externally i64 @_m_pminub
-// CHECK: [[REG614:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pu8
-// CHECK-NEXT: ret i64 [[REG614]]
-
-void __attribute__((noinline))
-test_move() {
-  _mm_maskmove_si64(ms[0], ms[1], (char *)&res64);
-  res = _mm_move_ss(m1, m2);
-  res = _mm_movehl_ps(m1, m2);
-  res = _mm_movelh_ps(m1, m2);
-  i = _mm_movemask_pi8(ms[0]);
-  i = _mm_movemask_ps(m1);
-}
-
-// CHECK-LABEL: @test_move
-
-// CHECK: define available_externally void @_mm_maskmove_si64
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 -9187201950435737472, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG615:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG615]] to i64*
-// CHECK-NEXT: store i64* [[REG616]], i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG618:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
-// CHECK-NEXT: store i64 [[REG618]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG619:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = and i64 [[REG619]], [[REG620]]
-// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8(i64 [[REG621]], i64 [[REG622]])
-// CHECK-NEXT: store i64 [[REG623]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = xor i64 [[REG625]], -1
-// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = and i64 [[REG624]], [[REG626]]
-// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = and i64 [[REG628]], [[REG629]]
-// CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = or i64 [[REG627]], [[REG630]]
-// CHECK-NEXT: store i64 [[REG631]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG632]], i64* [[REG633]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally <4 x float> @_mm_move_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG634]], <4 x float> [[REG635]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG636]]
-
-// CHECK: define available_externally <4 x float> @_mm_movehl_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG637]] to <2 x i64>
-// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG639]] to <2 x i64>
-// CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG638]], <2 x i64> [[REG640]])
-// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG641]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG642]]
-
-// CHECK: define available_externally <4 x float> @_mm_movelh_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG643]] to <2 x i64>
-// CHECK-NEXT: [[REG645:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG645]] to <2 x i64>
-// CHECK-NEXT: [[REG647:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG644]], <2 x i64> [[REG646]])
-// CHECK-NEXT: [[REG648:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG647]] to <4 x float>
-// CHECK-NEXT: ret <4 x float> [[REG648]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_pi8
-// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-LE-NEXT: store i64 2269495618449464, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-BE-NEXT: store i64 4048780183313844224, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = call i64 @llvm.ppc.bpermd(i64 [[REG649]], i64 [[REG650]])
-// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG651]] to i32
-// CHECK-NEXT: ret i32 [[REG652]]
-
-// CHECK: define available_externally signext i32 @_mm_movemask_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG653]] to <16 x i8>
-// CHECK-LE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
-// CHECK-BE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
-// CHECK-NEXT: store <2 x i64> [[REG655]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-LE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 1
-// CHECK-BE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 0
-// CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG657]] to i32
-// CHECK-NEXT: ret i32 [[REG658]]
-
-void __attribute__((noinline))
-test_alt_name_move() {
-  i = _m_pmovmskb(ms[0]);
-  _m_maskmovq(ms[0], ms[1], (char *)&res64);
-}
-
-// CHECK-LABEL: @test_alt_name_move
-
-// CHECK: define available_externally signext i32 @_m_pmovmskb
-// CHECK: [[REG659:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_movemask_pi8
-// CHECK-NEXT: ret i32 [[REG659]]
-
-// CHECK: define available_externally void @_m_maskmovq
-// CHECK: call void @_mm_maskmove_si64
-// CHECK-NEXT: ret void
-
-void __attribute__((noinline))
-test_mul() {
-  res = _mm_mul_ps(m1, m2);
-  res = _mm_mul_ss(m1, m2);
-  res64 = _mm_mulhi_pu16(ms[0], ms[1]);
-  res64 = _m_pmulhuw(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_mul
-
-// CHECK: define available_externally <4 x float> @_mm_mul_ps(<4 x float> [[REG660:[0-9a-zA-Z_%.]+]], <4 x float> [[REG661:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG660]], <4 x float>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG661]], <4 x float>* [[REG663:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG662]], align 16
-// CHECK-NEXT: [[REG665:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG663]], align 16
-// CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG664]], [[REG665]]
-// CHECK-NEXT: ret <4 x float> [[REG666]]
-
-// CHECK: define available_externally <4 x float> @_mm_mul_ss(<4 x float> [[REG667:[0-9a-zA-Z_%.]+]], <4 x float> [[REG668:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG667]], <4 x float>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG668]], <4 x float>* [[REG670:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
-// CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG671]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG672]], <4 x float>* [[REG673:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG670]], align 16
-// CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG674]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG675]], <4 x float>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG673]], align 16
-// CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG676]], align 16
-// CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG677]], [[REG678]]
-// CHECK-NEXT: store <4 x float> [[REG679]], <4 x float>* [[REG680:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
-// CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG680]], align 16
-// CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG681]], <4 x float> [[REG682]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG683]]
-
-// CHECK: define available_externally i64 @_mm_mulhi_pu16(i64 [[REG684:[0-9a-zA-Z_%.]+]], i64 [[REG685:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG684]], i64* [[REG686:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG685]], i64* [[REG687:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG688:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG686]], align 8
-// CHECK-NEXT: [[REG689:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG688]])
-// CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG689]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG690]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG687]], align 8
-// CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG691]])
-// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG692]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG693]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG696:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> [[REG694]], <8 x i16> [[REG695]])
-// CHECK-NEXT: store <4 x i32> [[REG696]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> [[REG697]], <8 x i16> [[REG698]])
-// CHECK-NEXT: store <4 x i32> [[REG699]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> [[REG700]], <4 x i32> [[REG701]], <16 x i8> [[REG702]])
-// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG703]] to <8 x i16>
-// CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG705]] to <2 x i64>
-// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG706]], i32 0
-// CHECK-NEXT: ret i64 [[REG707]]
-
-// CHECK: define available_externally i64 @_m_pmulhuw(i64 [[REG708:[0-9a-zA-Z_%.]+]], i64 [[REG709:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG708]], i64* [[REG710:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG709]], i64* [[REG711:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: [[REG712:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG710]], align 8
-// CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG711]], align 8
-// CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pu16(i64 [[REG712]], i64 [[REG713]])
-// CHECK-NEXT: ret i64 [[REG714]]
-
-void __attribute__((noinline))
-test_prefetch() {
-  _mm_prefetch(ms, i);
-}
-
-// CHECK-LABEL: @test_prefetch
-
-// CHECK: define available_externally void @_mm_prefetch
-// CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
-// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void @llvm.prefetch(i8* [[REG715]], i32 0, i32 3, i32 1)
-// CHECK-NEXT: ret void
-
-void __attribute__((noinline))
-test_rcp() {
-  res = _mm_rcp_ps(m1);
-  res = _mm_rcp_ss(m1);
-}
-
-// CHECK-LABEL: @test_rcp
-
-// CHECK: define available_externally <4 x float> @_mm_rcp_ps
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_re(float vector[4])(<4 x float> [[REG716]])
-// CHECK-NEXT: ret <4 x float> [[REG717]]
-
-// CHECK: define available_externally <4 x float> @_mm_rcp_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG718]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG719]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG720:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_rcp_ps(<4 x float> [[REG720]])
-// CHECK-NEXT: store <4 x float> [[REG721]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG722]], <4 x float> [[REG723]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG724]]
-
-void __attribute__((noinline))
-test_rsqrt() {
-  res = _mm_rsqrt_ps(m1);
-  res = _mm_rsqrt_ss(m1);
-}
-
-// CHECK-LABEL: @test_rsqrt
-
-// CHECK: define available_externally <4 x float> @_mm_rsqrt_ps
-// CHECK: [[REG725:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: ret <4 x float> [[REG725]]
-
-// CHECK: define available_externally <4 x float> @_mm_rsqrt_ss
-// CHECK: [[REG726:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG726]], <4 x float>* [[REG727:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG727]], align 16
-// CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> [[REG728]])
-// CHECK-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG730:[0-9a-zA_Z_%.]+]], align 16
-// CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG730]], align 16
-// CHECK-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG731]], <4 x float> [[REG732]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG733]]
-
-void __attribute__((noinline))
-test_sad() {
-  res64 = _mm_sad_pu8(ms[0], ms[1]);
-  res64 = _m_psadbw(ms[0], ms[1]);
-}
-
-// CHECK-LABEL: @test_sad
-
-// CHECK: define available_externally i64 @_mm_sad_pu8(i64 [[REG734:[0-9a-zA-Z_%.]+]], i64 [[REG735:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG734]], i64* [[REG736:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 [[REG735]], i64* [[REG737:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i8*
-// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[REG738]], i8 0, i64 8, i1 false)
-// CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG736]], align 8
-// CHECK-NEXT: [[REG740:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG739]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG740]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG742]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG737]], align 8
-// CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG743]], i32 1
-// CHECK-NEXT: store <2 x i64> [[REG744]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG745]] to <16 x i8>
-// CHECK-NEXT: store <16 x i8> [[REG746]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG747]], <16 x i8> [[REG748]])
-// CHECK-NEXT: store <16 x i8> [[REG749]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG750]], <16 x i8> [[REG751]])
-// CHECK-NEXT: store <16 x i8> [[REG752]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG754:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG753]], <16 x i8> [[REG754]])
-// CHECK-NEXT: store <16 x i8> [[REG755]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> [[REG756]], <4 x i32> zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sums(<4 x i32> [[REG758]], <4 x i32> zeroinitializer)
-// CHECK-NEXT: store <4 x i32> [[REG759]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG760]], i32 3
-// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG761]] to i16
-// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG763]], i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG762]], i16* [[REG764]], align 8
-// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
-// CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG765]], align 8
-// CHECK-NEXT: ret i64 [[REG766]]
-
-// CHECK: define available_externally i64 @_m_psadbw
-// CHECK: [[REG767:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sad_pu8
-// CHECK-NEXT: ret i64 [[REG767]]
-
-void __attribute__((noinline))
-test_set() {
-  res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]);
-  res = _mm_set_ps1(fs[0]);
-  res = _mm_set_ss(fs[0]);
-  res = _mm_set1_ps(fs[0]);
-  res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]);
-}
-
-// CHECK-LABEL: @test_set
-
-// CHECK: define available_externally <4 x float> @_mm_set_ps(float [[REG768:[0-9a-zA-Z_%.]+]], float [[REG769:[0-9a-zA-Z_%.]+]], float [[REG770:[0-9a-zA-Z_%.]+]], float [[REG771:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG768]], float* [[REG772:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG769]], float* [[REG773:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG770]], float* [[REG774:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG771]], float* [[REG775:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load float, float* [[REG775]], align 4
-// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG776]], i32 0
-// CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load float, float* [[REG774]], align 4
-// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG777]], float [[REG778]], i32 1
-// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load float, float* [[REG773]], align 4
-// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG779]], float [[REG780]], i32 2
-// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load float, float* [[REG772]], align 4
-// CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG781]], float [[REG782]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG783]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG784:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG784]]
-
-// CHECK: define available_externally <4 x float> @_mm_set_ps1(float [[REG785:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG785]], float* [[REG786:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = load float, float* [[REG786]], align 4
-// CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps(float [[REG787]])
-// CHECK-NEXT: ret <4 x float> [[REG788]]
-
-// CHECK: define available_externally <4 x float> @_mm_set_ss(float [[REG789:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG789:[0-9a-zA-Z_%.]+]], float* [[REG790:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = load float, float* [[REG790]], align 4
-// CHECK-NEXT: [[REG792:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG791]], i32 0
-// CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG792]], float 0.000000e+00, i32 1
-// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG793]], float 0.000000e+00, i32 2
-// CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG794]], float 0.000000e+00, i32 3
-// CHECK-NEXT: store <4 x float> [[REG795]], <4 x float>* [[REG796:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG796]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG797]]
-
-// CHECK: define available_externally <4 x float> @_mm_set1_ps(float [[REG798:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG798]], float* [[REG799:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG800]], i32 0
-// CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG801]], float [[REG802]], i32 1
-// CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG803]], float [[REG804]], i32 2
-// CHECK-NEXT: [[REG806:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
-// CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG805]], float [[REG806]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG807]], <4 x float>* [[REG808:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG808]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG809]]
-
-// CHECK: define available_externally <4 x float> @_mm_setr_ps(float [[REG810:[0-9a-zA-Z_%.]+]], float [[REG811:[0-9a-zA-Z_%.]+]], float [[REG812:[0-9a-zA-Z_%.]+]], float [[REG813:[0-9a-zA-Z_%.]+]])
-// CHECK: store float [[REG810]], float* [[REG814:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG811]], float* [[REG815:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG812]], float* [[REG816:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: store float [[REG813]], float* [[REG817:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = load float, float* [[REG814]], align 4
-// CHECK-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG818]], i32 0
-// CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load float, float* [[REG815]], align 4
-// CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG819]], float [[REG820]], i32 1
-// CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load float, float* [[REG816]], align 4
-// CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG821]], float [[REG822]], i32 2
-// CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load float, float* [[REG817]], align 4
-// CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG823]], float [[REG824]], i32 3
-// CHECK-NEXT: store <4 x float> [[REG825]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: ret <4 x float> [[REG826]]
-
-void __attribute__((noinline))
-test_setzero() {
-  res = _mm_setzero_ps();
-}
-
-// CHECK-LABEL: @test_setzero
-
-// CHECK: define available_externally <4 x float> @_mm_setzero_ps
-// CHECK: store <4 x float> zeroinitializer, <4 x float>* [[REG827:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG827]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG828]]
-
-void __attribute__((noinline))
-test_sfence() {
-  _mm_sfence();
-}
-
-// CHECK-LABEL: @test_sfence
-
-// CHECK: define available_externally void @_mm_sfence
-// CHECK: fence release
-// CHECK-NEXT: ret void
-
-void __attribute__((noinline))
-test_shuffle() {
-  res64 = _mm_shuffle_pi16(ms[0], i);
-  res = _mm_shuffle_ps(m1, m2, i);
-  res64 = _m_pshufw(ms[0], i);
-}
-
-// CHECK-LABEL: @test_shuffle
-
-// CHECK: define available_externally i64 @_mm_shuffle_pi16(i64 [[REG829:[0-9a-zA-Z_%.]+]], i32 signext [[REG830:[0-9a-zA-Z_%.]+]])
-// CHECK: store i64 [[REG829]], i64* [[REG831:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i32 [[REG830]], i32* [[REG832:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = and i32 [[REG833]], 3
-// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = sext i32 [[REG834]] to i64
-// CHECK-NEXT: store i64 [[REG835]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG836]], 2
-// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = and i32 [[REG837]], 3
-// CHECK-NEXT: [[REG839:[0-9a-zA-Z_%.]+]] = sext i32 [[REG838]] to i64
-// CHECK-NEXT: store i64 [[REG839]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG840:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG840]], 4
-// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = and i32 [[REG841]], 3
-// CHECK-NEXT: [[REG843:[0-9a-zA-Z_%.]+]] = sext i32 [[REG842]] to i64
-// CHECK-NEXT: store i64 [[REG843]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
-// CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG844]], 6
-// CHECK-NEXT: [[REG846:[0-9a-zA-Z_%.]+]] = and i32 [[REG845]], 3
-// CHECK-NEXT: [[REG847:[0-9a-zA-Z_%.]+]] = sext i32 [[REG846]] to i64
-// CHECK-NEXT: store i64 [[REG847]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG848]]
-// CHECK-NEXT: [[REG850:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG849]], align 2
-// CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 0
-// CHECK-BE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 3
-// CHECK-NEXT: store i16 [[REG850]], i16* [[REG852]]
-// CHECK-NEXT: [[REG853:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG854:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG853]]
-// CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG854]], align 2
-// CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 1
-// CHECK-BE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 2
-// CHECK-NEXT: store i16 [[REG855]], i16* [[REG857]]
-// CHECK-NEXT: [[REG858:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG858]]
-// CHECK-NEXT: [[REG860:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG859]], align 2
-// CHECK-NEXT: [[REG861:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 2
-// CHECK-BE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 1
-// CHECK-NEXT: store i16 [[REG860]], i16* [[REG862]]
-// CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG863]]
-// CHECK-NEXT: [[REG865:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG864]], align 2
-// CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
-// CHECK-LE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 3
-// CHECK-BE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 0
-// CHECK-NEXT: store i16 [[REG865]], i16* [[REG867]]
-// CHECK-NEXT: [[REG868:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
-// CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG868]], align 8
-// CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG869]])
-// CHECK-NEXT: store <2 x i64> [[REG870]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG871:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG831]], align 8
-// CHECK-NEXT: [[REG872:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG871]])
-// CHECK-NEXT: store <2 x i64> [[REG872]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG873:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG874:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG875]] to <16 x i8>
-// CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])(<2 x i64> [[REG873]], <2 x i64> [[REG874]], <16 x i8> [[REG876]])
-// CHECK-NEXT: store <2 x i64> [[REG877]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG878:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG879:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG878]], i32 0
-// CHECK-NEXT: ret i64 [[REG879]]
-
-// CHECK: define available_externally <4 x float> @_mm_shuffle_ps(<4 x float> [[REG880:[0-9a-zA-Z_%.]+]], <4 x float> [[REG881:[0-9a-zA-Z_%.]+]], i32 signext [[REG882:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG880]], <4 x float>* [[REG883:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG881]], <4 x float>* [[REG884:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store i32 [[REG882]], i32* [[REG885:[0-9a-zA-Z_%.]+]], align 4
-// CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = and i32 [[REG886]], 3
-// CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = sext i32 [[REG887]] to i64
-// CHECK-NEXT: store i64 [[REG888]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG890:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG889]], 2
-// CHECK-NEXT: [[REG891:[0-9a-zA-Z_%.]+]] = and i32 [[REG890]], 3
-// CHECK-NEXT: [[REG892:[0-9a-zA-Z_%.]+]] = sext i32 [[REG891]] to i64
-// CHECK-NEXT: store i64 [[REG892]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG893:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG893]], 4
-// CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = and i32 [[REG894]], 3
-// CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = sext i32 [[REG895]] to i64
-// CHECK-NEXT: store i64 [[REG896]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
-// CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG897]], 6
-// CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = and i32 [[REG898]], 3
-// CHECK-NEXT: [[REG900:[0-9a-zA-Z_%.]+]] = sext i32 [[REG899]] to i64
-// CHECK-NEXT: store i64 [[REG900]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG901:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG902:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG901]]
-// CHECK-NEXT: [[REG903:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG902]], align 4
-// CHECK-NEXT: [[REG904:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG905:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG904]], i32 [[REG903]], i32 0
-// CHECK-NEXT: store <4 x i32> [[REG905]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG906]]
-// CHECK-NEXT: [[REG908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG907]], align 4
-// CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG909]], i32 [[REG908]], i32 1
-// CHECK-NEXT: store <4 x i32> [[REG910]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG911]]
-// CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG912]], align 4
-// CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = add i32 [[REG913]], 269488144
-// CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG915]], i32 [[REG914]], i32 2
-// CHECK-NEXT: store <4 x i32> [[REG916]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG917:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG918:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG917]]
-// CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG918]], align 4
-// CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = add i32 [[REG919]], 269488144
-// CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG921]], i32 [[REG920]], i32 3
-// CHECK-NEXT: store <4 x i32> [[REG922]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG923:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG924:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG925]] to <16 x i8>
-// CHECK-NEXT: [[REG927:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG923]], <4 x float> [[REG924]], <16 x i8> [[REG926]])
-// CHECK-NEXT: ret <4 x float> [[REG927]]
-
-// CHECK: define available_externally i64 @_m_pshufw
-// CHECK: [[REG928:[0-9a-zA-Z_%.]+]] = call i64 @_mm_shuffle_pi16
-// CHECK-NEXT: ret i64 [[REG928]]
-
-void __attribute__((noinline))
-test_sqrt() {
-  res = _mm_sqrt_ps(m1);
-  res = _mm_sqrt_ss(m1);
-}
-
-// CHECK-LABEL: @test_sqrt
-
-// CHECK: define available_externally <4 x float> @_mm_sqrt_ps
-// CHECK: [[REG929:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
-// CHECK-NEXT: ret <4 x float> [[REG929]]
-
-// CHECK: define available_externally <4 x float> @_mm_sqrt_ss
-// CHECK: [[REG930:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG930]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG931]], <4 x float>* [[REG932:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG933:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG932]], align 16
-// CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> [[REG933]])
-// CHECK-NEXT: store <4 x float> [[REG934]], <4 x float>* [[REG935:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG936:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG937:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG935]], align 16
-// CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG936]], <4 x float> [[REG937]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG938]]
-
-void __attribute__((noinline))
-test_store() {
-  _mm_store_ps(fs, m1);
-  _mm_store_ps1(fs, m1);
-  _mm_store_ss(fs, m1);
-  _mm_store1_ps(fs, m1);
-  _mm_storeh_pi(ms, m1);
-  _mm_storel_pi(ms, m1);
-  _mm_storer_ps(fs, m1);
-}
-
-// CHECK-LABEL: @test_store
-
-// CHECK: define available_externally void @_mm_store_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG941:[0-9a-zA-Z_%.]+]] = bitcast float* [[REG940]] to <4 x float>*
-// CHECK-NEXT: call void @vec_st(float vector[4], int, float vector[4]*)(<4 x float> [[REG939]], i32 signext 0, <4 x float>* [[REG941]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_ps1
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG942:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG943:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store1_ps(float* [[REG942]], <4 x float> [[REG943]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store_ss
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG944:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG944]], i32 0
-// CHECK-NEXT: [[REG946:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store float [[REG945]], float* [[REG946]], align 4
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_store1_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG947]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG948]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* [[REG949]], <4 x float> [[REG950]])
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storeh_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG952:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG951]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG952]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG953:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG953]], i32 1
-// CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG954]], i64* [[REG955]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storel_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG956]] to <2 x i64>
-// CHECK-NEXT: store <2 x i64> [[REG957]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG958:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG959:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG958]], i32 0
-// CHECK-NEXT: [[REG960:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG959]], i64* [[REG960]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_storer_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG961:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG963:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG961]], <4 x float> [[REG962]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
-// CHECK-NEXT: store <4 x float> [[REG963]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* [[REG964]], <4 x float> [[REG965]])
-// CHECK-NEXT: ret void
-
-void __attribute__((noinline))
-test_stream() {
-  _mm_stream_pi(&res64, ms[0]);
-  _mm_stream_ps(&fs[0], m1);
-}
-
-// CHECK-LABEL: @test_stream
-
-/// CHECK: define available_externally void @_mm_stream_pi
-// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG966]])
-// CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store i64 [[REG967]], i64* [[REG968]], align 8
-// CHECK-NEXT: ret void
-
-// CHECK: define available_externally void @_mm_stream_ps
-// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG969:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* [[REG969]])
-// CHECK-NEXT: [[REG970:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
-// CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: call void @_mm_store_ps(float* [[REG970]], <4 x float> [[REG971]])
-// CHECK-NEXT: ret void
-
-void __attribute__((noinline))
-test_sub() {
-  res = _mm_sub_ps(m1, m2);
-  res = _mm_sub_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_sub
-
-// CHECK: define available_externally <4 x float> @_mm_sub_ps(<4 x float> [[REG972:[0-9a-zA-Z_%.]+]], <4 x float> [[REG973:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG972]], <4 x float>* [[REG974:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG973]], <4 x float>* [[REG975:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG974]], align 16
-// CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG975]], align 16
-// CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG976]], [[REG977]]
-// CHECK-NEXT: ret <4 x float> [[REG978]]
-
-// CHECK: define available_externally <4 x float> @_mm_sub_ss(<4 x float> [[REG979:[0-9a-zA-Z_%.]+]], <4 x float> [[REG980:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG979]], <4 x float>* [[REG981:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG980]], <4 x float>* [[REG982:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
-// CHECK-NEXT: [[REG984:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG983]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG984]], <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG982]], align 16
-// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG986]], i32 zeroext 0)
-// CHECK-NEXT: store <4 x float> [[REG987]], <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG990:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG991:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG989]], [[REG990]]
-// CHECK-NEXT: store <4 x float> [[REG991]], <4 x float>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG993:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
-// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG992]], align 16
-// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG993]], <4 x float> [[REG994]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
-// CHECK-NEXT: ret <4 x float> [[REG995]]
-
-void __attribute__((noinline))
-test_transpose() {
-  __m128 m3, m4;
-  _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
-}
-
-// CHECK-LABEL: @test_transpose
-
-// CHECK: br label %[[REG996:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG996]]:
-// CHECK: [[REG997:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
-// CHECK: [[REG998:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
-// CHECK: [[REG999:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
-// CHECK: [[REG1000:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
-// CHECK: [[REG1001:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
-// CHECK: [[REG1002:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
-// CHECK: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
-// CHECK: [[REG1004:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
-// CHECK: ret void
-
-void __attribute__((noinline))
-test_ucomi() {
-  i = _mm_ucomieq_ss(m1, m2);
-  i = _mm_ucomige_ss(m1, m2);
-  i = _mm_ucomigt_ss(m1, m2);
-  i = _mm_ucomile_ss(m1, m2);
-  i = _mm_ucomilt_ss(m1, m2);
-  i = _mm_ucomineq_ss(m1, m2);
-}
-
-// CHECK-LABEL: @test_ucomi
-
-// CHECK: define available_externally signext i32 @_mm_ucomieq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1005:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1006:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1005]], i32 0
-// CHECK-NEXT: [[REG1007:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1007]], i32 0
-// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG1006]], [[REG1008]]
-// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1009]] to i32
-// CHECK-NEXT: ret i32 [[REG1010]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomige_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1011:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1012:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1011]], i32 0
-// CHECK-NEXT: [[REG1013:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1014:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1013]], i32 0
-// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG1012]], [[REG1014]]
-// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1015]] to i32
-// CHECK-NEXT: ret i32 [[REG1016]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomigt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1018:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1017]], i32 0
-// CHECK-NEXT: [[REG1019:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1020:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1019]], i32 0
-// CHECK-NEXT: [[REG1021:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG1018]], [[REG1020]]
-// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1021]] to i32
-// CHECK-NEXT: ret i32 [[REG1022]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomile_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1023]], i32 0
-// CHECK-NEXT: [[REG1025:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1026:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1025]], i32 0
-// CHECK-NEXT: [[REG1027:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG1024]], [[REG1026]]
-// CHECK-NEXT: [[REG1028:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1027]] to i32
-// CHECK-NEXT: ret i32 [[REG1028]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomilt_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1029]], i32 0
-// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1032:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1031]], i32 0
-// CHECK-NEXT: [[REG1033:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG1030]], [[REG1032]]
-// CHECK-NEXT: [[REG1034:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1033]] to i32
-// CHECK-NEXT: ret i32 [[REG1034]]
-
-// CHECK: define available_externally signext i32 @_mm_ucomineq_ss
-// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1035:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1035]], i32 0
-// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
-// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1037]], i32 0
-// CHECK-NEXT: [[REG1039:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG1036]], [[REG1038]]
-// CHECK-NEXT: [[REG1040:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1039]] to i32
-// CHECK-NEXT: ret i32 [[REG1040]]
-
-void __attribute__((noinline))
-test_undefined() {
-  res = _mm_undefined_ps();
-}
-
-// CHECK-LABEL: @test_undefined
-
-// CHECK: define available_externally <4 x float> @_mm_undefined_ps
-// CHECK: [[REG1041:[0-9a-zA-Z_%.]+]] = alloca <4 x float>, align 16
-// CHECK-NEXT: [[REG1042:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1042]], <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
-// CHECK-NEXT: ret <4 x float> [[REG1043]]
-
-void __attribute__((noinline))
-test_unpack() {
-  res = _mm_unpackhi_ps(m1, m2);
-  res = _mm_unpacklo_ps(m1, m2);
-}
-
-// CHECK-LABEL: @test_unpack
-
-// CHECK: define available_externally <4 x float> @_mm_unpackhi_ps(<4 x float> [[REG1044:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1045:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1044]], <4 x float>* [[REG1046:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1045]], <4 x float>* [[REG1047:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1048:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1046]], align 16
-// CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1047]], align 16
-// CHECK-NEXT: [[REG1050:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])(<4 x float> [[REG1048]], <4 x float> [[REG1049]])
-// CHECK-NEXT: ret <4 x float> [[REG1050]]
-
-// CHECK: define available_externally <4 x float> @_mm_unpacklo_ps(<4 x float> [[REG1051:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1052:[0-9a-zA-Z_%.]+]])
-// CHECK: store <4 x float> [[REG1051]], <4 x float>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: store <4 x float> [[REG1052]], <4 x float>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
-// CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1053]], align 16
-// CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1054]], align 16
-// CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> [[REG1055]], <4 x float> [[REG1056]])
-// CHECK-NEXT: ret <4 x float> [[REG1057]]
diff --git a/clang/test/Headers/ppc-intrinsics.c b/clang/test/Headers/ppc-intrinsics.c
new file mode 100644
index 0000000000000..622ce90c76258
--- /dev/null
+++ b/clang/test/Headers/ppc-intrinsics.c
@@ -0,0 +1,13 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
+// expected-no-diagnostics
+
+// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
+
+#include <mmintrin.h>
+// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
+
+// CHECK: target triple = "powerpc64-
+// CHECK: !llvm.module.flags =
diff --git a/clang/test/Headers/ppc-mmx-intrinsics.c b/clang/test/Headers/ppc-mmx-intrinsics.c
deleted file mode 100644
index fe989c2dfbaa5..0000000000000
--- a/clang/test/Headers/ppc-mmx-intrinsics.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// REQUIRES: powerpc-registered-target
-
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-gnu-linux %s -Xclang -verify -x c++
-// expected-no-diagnostics
-
-// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
-
-#include <mmintrin.h>
-// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
diff --git a/clang/test/Headers/ppc-sse-intrinsics.c b/clang/test/Headers/ppc-sse-intrinsics.c
deleted file mode 100644
index f4aa7d9009bb9..0000000000000
--- a/clang/test/Headers/ppc-sse-intrinsics.c
+++ /dev/null
@@ -1,20 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// REQUIRES: powerpc-registered-target
-
-// Since mm_malloc.h references system native stdlib.h, doing cross-compile
-// testing may cause unexpected problems. This would affect xmmintrin.h and
-// other following intrinsics headers. If there's need to test them using
-// cross-compile, please add -ffreestanding to compiler options, like
-// test/CodeGen/ppc-xmmintrin.c.
-
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
-// expected-no-diagnostics
-
-// RUN: not %clang -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
-
-// Altivec must be enabled.
-#include <xmmintrin.h>
-
-#include <mm_malloc.h>
-// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."

From e04b002cf89014592b3603bb9af9f74a01626771 Mon Sep 17 00:00:00 2001
From: Michal Gorny <mgorny@gentoo.org>
Date: Wed, 29 May 2019 07:20:30 +0000
Subject: [PATCH 0459/1176] [libunwind] [test] Fix inferring source paths

Fix two issues that caused libcxx source path not to be inferred
correctly when not specified explicitly:

1. get_lit_conf() uses default value only if the lit variable is set
   to None.  Due to the mehod of substituting lit.site.cfg, they were
   "" rather than None when unset, effectively causing the default never
   to apply.  Instead, use 'or' construct to use the default whenever
   get_lit_conf() returns a false value.

2. If os.path.join() is given a component starting with '/', it takes
   it to be an absolute path and ignores everything preceding it.
   Remove the slash to correctly append subdirectory.

With these two fixes, libunwind tests start working on NetBSD buildbot
again.

Differential Revision: https://reviews.llvm.org/D62005

llvm-svn: 361931
---
 libunwind/test/libunwind/test/config.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py
index 803462589740c..05e3f3cc21f31 100644
--- a/libunwind/test/libunwind/test/config.py
+++ b/libunwind/test/libunwind/test/config.py
@@ -21,12 +21,10 @@ def __init__(self, lit_config, config):
         self.libcxx_src_root = None
 
     def configure_src_root(self):
-        self.libunwind_src_root = self.get_lit_conf(
-            'libunwind_src_root',
-            os.path.dirname(self.config.test_source_root))
-        self.libcxx_src_root = self.get_lit_conf(
-            'libcxx_src_root',
-            os.path.join(self.libunwind_src_root, '/../libcxx'))
+        self.libunwind_src_root = (self.get_lit_conf('libunwind_src_root')
+            or os.path.dirname(self.config.test_source_root))
+        self.libcxx_src_root = (self.get_lit_conf('libcxx_src_root')
+            or os.path.join(self.libunwind_src_root, '..', 'libcxx'))
 
     def configure_obj_root(self):
         self.libunwind_obj_root = self.get_lit_conf('libunwind_obj_root')

From 65dde1e0db647d7e3557be510a29077fae0bc13b Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 29 May 2019 08:28:47 +0000
Subject: [PATCH 0460/1176] [llvm-readobj/llvm-readelf] - Simplify the
 elf-versioninfo.test test case.

This removes 2 precompiled objects from the test case and replaces
them with a single YAML. That allowed to simplify and clean up the test,
remove excessive checks.

Differential revision: https://reviews.llvm.org/D62529

llvm-svn: 361932
---
 .../llvm-readobj/Inputs/verdef.elf-x86-64     | Bin 2256 -> 0 bytes
 .../llvm-readobj/Inputs/verneed.elf-x86-64    | Bin 13520 -> 0 bytes
 .../tools/llvm-readobj/elf-versioninfo.test   | 298 ++++++++++--------
 3 files changed, 173 insertions(+), 125 deletions(-)
 delete mode 100755 llvm/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/verneed.elf-x86-64

diff --git a/llvm/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 b/llvm/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
deleted file mode 100755
index 4b907694e800a622f28357c2cbe4a4129316febf..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2256
zcmcIl%Wl(95FIBah4LzeMHe8o5(^e!DToCNgo2<&l}bRMA|W9x2Zy>+6QkHbvVmVf
ztoj?U%90P@16XFq7WSYp$DZT3ZlbCZOy$g-$GLOI_l)~tZF9ZiI48uUD!0X<p}Jw~
zN5>}4hTN1njhvCwQd9SwJ`D?(^~n6x$IV8K!j|Tps9_~rQ_Esz1BaUQ50@;@^e8ih
znmBBC6dccK+Z|BL>RFUu<Gu6yLziawFQ@)mer}?!%*el?{v|yz`(!?PIHz@KQ=gsT
zm)5Qy%G0&SPc|Ml>s(wHuiXvwC-4(rwxdWquNB8$ocO&&Ua4Kx{qPP@Z(bkVaZ$(-
zyjR>K@<o~B=*p3b{y}Gd{ZBQypwWwI2J(<8n&ZglAMX|zLoEnmRgHeJ7<}lD=Yh@K
z6+6gDI8|^j5OQl|@rBH-%*}-TEE~^$j>i!qn`Zwu`?=YF%+C6>P5z_8`7o5w_nTXK
zN4vpF-L2OTsfl)-%DiwcMwXBMr||~#o#w|~P2>0K41dPsgM7U@BOmuLjo;$kd)B=3
zVB_wDsyTTN<E*$zyPt@wOcpm7bmOEa?soT$`?lSSccQMi-6(0N9=y(Lxf{8Ij<}&8
zhx-s-kZ$C6c3z4bhq__nw{%54=%jW!McU9m5WE*~*=Q%^H}zMmfjI6OGXWv*Pw5&Q
zC7%4JCjWtf*mz`y{x<Z%>=4iSpvHFzLLBRJo-oT}ofYCaPt^7q&+*KoUROM3jW*{m
z*C?)nIQnE=_#ikJ9)2JA{iH@tQOfu&4FbzK=Nu6~GBD4_{@GVw<ET$O|3B=}VR))?
zeSB-NC)1CLUsWvjC=2Uju8UHx|5}6Wt(Z5o1AR9qc!oUoPyU7K<Jm1WRUfU;t)f%c
VcvnSvp~f`)H^aX#ChNM4{|l6Nt*ih5

diff --git a/llvm/test/tools/llvm-readobj/Inputs/verneed.elf-x86-64 b/llvm/test/tools/llvm-readobj/Inputs/verneed.elf-x86-64
deleted file mode 100644
index 3a9c8d8df8da38631340f7dd361a3dd9c0efb7c8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 13520
zcmeHO%}*0i5T7n6enK^V5KUw+B~3Kz0&2WyYE*)W8t|qUEK(>5w5GcfImywZAO8e@
z21kw_IT{la@MMf<>-^rE*=`%~!bRgu(l_7p-u`xb*m-Z?+<!P36bhsi&~0*B(c3EG
zm>$yzq6*!l0UihG1RZ8`KmRJ?uJ~-$eg0c1t^A0@!Hm2PNucC~ecq#-ie5+5A&8=r
ztW){44<qFohdiv6LJUly%qKjSddf8~;Ltzik(5!c*Z1UWI`6J8#AHk|PsV%lRGr~G
zH=oZs@QAt!RleBSee4(au@eQ<$9Js29M=^}T!Af@SdRGz`Y?d8kA``~ydRw&&}o>N
z-aEaYu6Whe(&qrT9C-JV!)9TOR%@+Btu{XvCCw<N)d^Y{r`2&<%sk`b;b(O?_K0FS
zFZrrr&YqvEoU_ZL*SieU4FN;I5HJJ`0YktLFa!(%L%<ODrxDn}3*XKt2KR>FzW!r`
zs9pNW<IqTjCFpfr<L>7+Q|TLYyi)Cca{3qOc$M1SWF3J<hJYbp2p9r}fFWQA7y^cX
zAz%m?0)~Jg@OL4gd!xDst2*{{LFYZA?ZA6<&vZ)c*<PvapPmuB+va6i_p-O-jKOj7
zV}BUr_VSr;Nd05ZN_17Yu4`TV*tgEp_jbDpy)6D;qQ|0hOTzhe-T7RT4FN;I5HJJ`
z0YktLFa!(%L%<L)1Pp=y27yrD_`9!owCL?Ogii|Zj_PQqAO=VIV0d>uXhr^7K+orE
z2F}UvbuJzKfxYa5``9U(Uuz_5D-<m@UPXLDJXvZsx|G=#MfGY@_mL@Ut=e)mdbylZ
zl-6GJ=tM44U0Hg;sm;_|xN2IRqbR8-Y0FEV(`=GfTPg98bUuO3$IoZ)J$-!V(Sy5s
zaR=bQ3m9m>Li;9L0_$_BjlKy?`n`enQ}_-LdLEjueUY*lG+xSO^Jx4z&PNYLb?u|X
zVwh$>2=l|F{TbLmCH|=oy7q0>CBF<Klg_XC$}h7o{~dt#g*GH#`$Dcru8!&xETEQz
zXML+}F-%FI&aZeVez({UpZ2Y?=T;udA<0*!>sB4lKt8`H|C^%ZS2R#~@3W5eVeYK2
z7RpLPNe~qngLM5`XNGME$?{`Kh!NfSPQz=ReAS<^pxCJmJeK@-Vm!$lm34l_Z;ox*
h+!716Dfwy622oa*S(jPbEB~51okVmYi}XTo{%@DYknI2f

diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
index 28653b7608b76..c4e0bb62e1729 100644
--- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test
+++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
@@ -1,130 +1,178 @@
-// Test that llvm-readobj dumps version info tags correctly.
+## Test that llvm-readobj dumps version info tags correctly.
 
-RUN: llvm-readobj --dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERDEF
-RUN: llvm-readelf --dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERDEF
+# RUN: yaml2obj %s --docnum=1 -o %t1
+# RUN: llvm-readobj -V %t1 | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readelf -V %t1 | FileCheck %s --check-prefix=GNU
 
-LLVM-VERDEF: 0x000000006FFFFFF0 VERSYM               0x24C
-LLVM-VERDEF: 0x000000006FFFFFFC VERDEF               0x25C
-LLVM-VERDEF: 0x000000006FFFFFFD VERDEFNUM            3
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  OSABI:   ELFOSABI_FREEBSD
+  Type:    ET_DYN
+  Machine: EM_X86_64
+Sections:
+  - Name:         .gnu.version
+    Type:         SHT_GNU_versym
+    Flags:        [ SHF_ALLOC ]
+    Link:         .dynsym
+    AddressAlign: 0x0000000000000002
+    EntSize:      0x0000000000000002
+    Entries:      [ 0, 2, 3, 4, 5, 6]
+  - Name:         .gnu.version_d
+    Type:         SHT_GNU_verdef
+    Flags:        [ SHF_ALLOC ]
+    Link:         .dynstr
+    AddressAlign: 0x0000000000000004
+    Info:         0x0000000000000003
+    Entries:
+      - Version:    1
+        Flags:      0
+        VersionNdx: 2
+        Hash:       175630257
+        Names:           
+          - VERSION1
+      - Version:    1
+        Flags:      0
+        VersionNdx: 3
+        Hash:       175630258
+        Names:
+          - VERSION2
+          - VERSION1
+  - Name:         .gnu.version_r
+    Type:         SHT_GNU_verneed
+    Flags:        [ SHF_ALLOC ]
+    Link:         .dynstr
+    AddressAlign: 0x0000000000000004
+    Info:         0x0000000000000002
+    Dependencies:
+      - Version: 1
+        File:    verneed1.so.0
+        Entries:
+          - Name:  v1
+            Hash:  1938
+            Flags: 0
+            Other: 4
+          - Name:  v2
+            Hash:  1939
+            Flags: 0
+            Other: 5
+      - Version: 1
+        File:    verneed2.so.0
+        Entries:
+          - Name:  v3
+            Hash:  1937
+            Flags: 0
+            Other: 6
+DynamicSymbols:  
+  - Name:            sym1
+    Binding:         STB_GLOBAL
+  - Name:            sym2
+    Binding:         STB_GLOBAL
+  - Name:            sym3
+    Binding:         STB_GLOBAL
+  - Name:            sym4
+    Binding:         STB_GLOBAL
+  - Name:            sym5
+    Binding:         STB_GLOBAL
+...
 
-LLVM-VERDEF: Version symbols {
-LLVM-VERDEF-NEXT:   Section Name: .gnu.version (20)
-LLVM-VERDEF-NEXT:   Address: 0x24C
-LLVM-VERDEF-NEXT:   Offset: 0x24C
-LLVM-VERDEF-NEXT:   Link: 1
-LLVM-VERDEF-NEXT:   Symbols [
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 0
-LLVM-VERDEF-NEXT:       Name: {{$}}
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 1
-LLVM-VERDEF-NEXT:       Name: _end{{$}}
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 1
-LLVM-VERDEF-NEXT:       Name: _edata{{$}}
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 3
-LLVM-VERDEF-NEXT:       Name: goo@@VERSION2
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 1
-LLVM-VERDEF-NEXT:       Name: __bss_start{{$}}
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 2
-LLVM-VERDEF-NEXT:       Name: foo@@VERSION1
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 2
-LLVM-VERDEF-NEXT:       Name: VERSION1@@VERSION1
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:     Symbol {
-LLVM-VERDEF-NEXT:       Version: 3
-LLVM-VERDEF-NEXT:       Name: VERSION2@@VERSION2
-LLVM-VERDEF-NEXT:     }
-LLVM-VERDEF-NEXT:   ]
-LLVM-VERDEF-NEXT: }
+# LLVM:      Version symbols {
+# LLVM-NEXT:   Section Name: .gnu.version
+# LLVM-NEXT:   Address: 0x0
+# LLVM-NEXT:   Offset: 0x280
+# LLVM-NEXT:   Link: 7
+# LLVM-NEXT:   Symbols [
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 0
+# LLVM-NEXT:       Name:
+# LLVM-NEXT:     }
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 2
+# LLVM-NEXT:       Name: sym1@@VERSION1
+# LLVM-NEXT:     }
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 3
+# LLVM-NEXT:       Name: sym2@@VERSION2
+# LLVM-NEXT:     }
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 4
+# LLVM-NEXT:       Name: sym3@v1
+# LLVM-NEXT:     }
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 5
+# LLVM-NEXT:       Name: sym4@v2
+# LLVM-NEXT:     }
+# LLVM-NEXT:     Symbol {
+# LLVM-NEXT:       Version: 6
+# LLVM-NEXT:       Name: sym5@v3
+# LLVM-NEXT:     }
+# LLVM-NEXT:   ]
+# LLVM-NEXT: }
+# LLVM-NEXT: SHT_GNU_verdef {
+# LLVM-NEXT:   Definition {
+# LLVM-NEXT:     Version: 1
+# LLVM-NEXT:     Flags: 0x0
+# LLVM-NEXT:     Index: 2
+# LLVM-NEXT:     Hash: 175630257
+# LLVM-NEXT:     Name: VERSION1
+# LLVM-NEXT:   }
+# LLVM-NEXT:   Definition {
+# LLVM-NEXT:     Version: 1
+# LLVM-NEXT:     Flags: 0x0
+# LLVM-NEXT:     Index: 3
+# LLVM-NEXT:     Hash: 175630258
+# LLVM-NEXT:     Name: VERSION2
+# LLVM-NEXT:     Predecessor: VERSION1
+# LLVM-NEXT:   }
+# LLVM-NEXT:   Definition {
+# LLVM-NEXT:     Version: 1
+# LLVM-NEXT:     Flags: 0x0
+# LLVM-NEXT:     Index: 3
+# LLVM-NEXT:     Hash: 175630258
+# LLVM-NEXT:     Name: VERSION2
+# LLVM-NEXT:     Predecessor: VERSION1
+# LLVM-NEXT:   }
+# LLVM-NEXT: }
+# LLVM-NEXT: SHT_GNU_verneed {
+# LLVM-NEXT:   Dependency {
+# LLVM-NEXT:     Version: 1
+# LLVM-NEXT:     Count: 2
+# LLVM-NEXT:     FileName: verneed1.so.0
+# LLVM-NEXT:     Entries [
+# LLVM-NEXT:       Entry {
+# LLVM-NEXT:         Hash: 1938
+# LLVM-NEXT:         Flags: 0x0
+# LLVM-NEXT:         Index: 4
+# LLVM-NEXT:         Name: v1
+# LLVM-NEXT:       }
+# LLVM-NEXT:       Entry {
+# LLVM-NEXT:         Hash: 1939
+# LLVM-NEXT:         Flags: 0x0
+# LLVM-NEXT:         Index: 5
+# LLVM-NEXT:         Name: v2
+# LLVM-NEXT:       }
+# LLVM-NEXT:     ]
+# LLVM-NEXT:   }
+# LLVM-NEXT:   Dependency {
+# LLVM-NEXT:     Version: 1
+# LLVM-NEXT:     Count: 1
+# LLVM-NEXT:     FileName: verneed2.so.0
+# LLVM-NEXT:     Entries [
+# LLVM-NEXT:       Entry {
+# LLVM-NEXT:         Hash: 1937
+# LLVM-NEXT:         Flags: 0x0
+# LLVM-NEXT:         Index: 6
+# LLVM-NEXT:         Name: v3
+# LLVM-NEXT:       }
+# LLVM-NEXT:     ]
+# LLVM-NEXT:   }
+# LLVM-NEXT: }
 
-LLVM-VERDEF:      SHT_GNU_verdef {
-LLVM-VERDEF-NEXT:   Definition {
-LLVM-VERDEF-NEXT:     Version: 1
-LLVM-VERDEF-NEXT:     Flags: Base (0x1)
-LLVM-VERDEF-NEXT:     Index: 1
-LLVM-VERDEF-NEXT:     Hash: 430712
-LLVM-VERDEF-NEXT:     Name: blah
-LLVM-VERDEF-NEXT:   }
-LLVM-VERDEF-NEXT:   Definition {
-LLVM-VERDEF-NEXT:     Version: 1
-LLVM-VERDEF-NEXT:     Flags: 0x0
-LLVM-VERDEF-NEXT:     Index: 2
-LLVM-VERDEF-NEXT:     Hash: 175630257
-LLVM-VERDEF-NEXT:     Name: VERSION1
-LLVM-VERDEF-NEXT:   }
-LLVM-VERDEF-NEXT:   Definition {
-LLVM-VERDEF-NEXT:     Version: 1
-LLVM-VERDEF-NEXT:     Flags: 0x0
-LLVM-VERDEF-NEXT:     Index: 3
-LLVM-VERDEF-NEXT:     Hash: 175630258
-LLVM-VERDEF-NEXT:     Name: VERSION2
-LLVM-VERDEF-NEXT:     Predecessor: VERSION1
-LLVM-VERDEF-NEXT:   }
-LLVM-VERDEF-NEXT: }
-
-GNU-VERDEF: 0x000000006ffffff0 (VERSYM)             0x24c
-GNU-VERDEF: 0x000000006ffffffc (VERDEF)             0x25c
-GNU-VERDEF: 0x000000006ffffffd (VERDEFNUM)          3
-
-GNU-VERDEF:      Version symbols section '.gnu.version' contains 8 entries:
-GNU-VERDEF-NEXT:  Addr: 000000000000024c  Offset: 0x00024c  Link: 1 (.dynsym)
-GNU-VERDEF-NEXT:   000:   0 (*local*)       1 (*global*)      1 (*global*)      3 (VERSION2)
-GNU-VERDEF-NEXT:   004:   1 (*global*)      2 (VERSION1)      2 (VERSION1)      3 (VERSION2)
-
-GNU-VERDEF: Dumper for .gnu.version_d is not implemented
-
-RUN: llvm-readobj -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=LLVM-VERNEED
-RUN: llvm-readelf -V %p/Inputs/verneed.elf-x86-64 | FileCheck %s --check-prefix=GNU-VERNEED
-
-LLVM-VERNEED:       SHT_GNU_verneed {
-LLVM-VERNEED-NEXT:   Dependency {
-LLVM-VERNEED-NEXT:     Version: 1
-LLVM-VERNEED-NEXT:     Count: 2
-LLVM-VERNEED-NEXT:     FileName: verneed1.so.0
-LLVM-VERNEED-NEXT:     Entries [
-LLVM-VERNEED-NEXT:       Entry {
-LLVM-VERNEED-NEXT:         Hash: 1938
-LLVM-VERNEED-NEXT:         Flags: 0x0
-LLVM-VERNEED-NEXT:         Index: 3
-LLVM-VERNEED-NEXT:         Name: v2
-LLVM-VERNEED-NEXT:       }
-LLVM-VERNEED-NEXT:       Entry {
-LLVM-VERNEED-NEXT:         Hash: 1939
-LLVM-VERNEED-NEXT:         Flags: 0x0
-LLVM-VERNEED-NEXT:         Index: 2
-LLVM-VERNEED-NEXT:         Name: v3
-LLVM-VERNEED-NEXT:       }
-LLVM-VERNEED-NEXT:     ]
-LLVM-VERNEED-NEXT:   }
-LLVM-VERNEED-NEXT:   Dependency {
-LLVM-VERNEED-NEXT:     Version: 1
-LLVM-VERNEED-NEXT:     Count: 1
-LLVM-VERNEED-NEXT:     FileName: verneed2.so.0
-LLVM-VERNEED-NEXT:     Entries [
-LLVM-VERNEED-NEXT:       Entry {
-LLVM-VERNEED-NEXT:         Hash: 1937
-LLVM-VERNEED-NEXT:         Flags: 0x0
-LLVM-VERNEED-NEXT:         Index: 4
-LLVM-VERNEED-NEXT:         Name: v1
-LLVM-VERNEED-NEXT:       }
-LLVM-VERNEED-NEXT:     ]
-LLVM-VERNEED-NEXT:   }
-LLVM-VERNEED-NEXT: }
-
-GNU-VERNEED:      Version symbols section '.gnu.version' contains 4 entries:
-GNU-VERNEED-NEXT:  Addr: 0000000000010228  Offset: 0x000228  Link: 1 (.dynsym)
-GNU-VERNEED-NEXT:   000:   0 (*local*)       2 (v3)            3 (v2)            4 (v1)
-
-GNU-VERNEED: Dumper for .gnu.version_r is not implemented
+# GNU:      Version symbols section '.gnu.version' contains 6 entries:
+# GNU-NEXT:  Addr: 0000000000000000  Offset: 0x000280  Link: 7 (.dynsym)
+# GNU-NEXT:   000:   0 (*local*) 2 (VERSION1) 3 (VERSION2) 4 (v1)
+# GNU-NEXT:   004:   5 (v2)      6 (v3)
 
+# GNU-VERDEF: Dumper for .gnu.version_d is not implemented
+# GNU-VERNEED: Dumper for .gnu.version_r is not implemented

From 4f58ad4e720df4c265271907758b3daffbf764d2 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Wed, 29 May 2019 08:40:33 +0000
Subject: [PATCH 0461/1176] [AArch64][SVE2] Asm: support SVE2 Floating Point
 Pairwise Group

Summary:
Patch adds support for the following instructions:

SVE2 floating-point pairwise operations:
    * FADDP, FMAXNMP, FMINNMP, FMAXP, FMINP

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62383

llvm-svn: 361933
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  7 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 33 ++++++++++++
 llvm/test/MC/AArch64/SVE2/faddp-diagnostics.s | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/faddp.s             | 53 +++++++++++++++++++
 .../MC/AArch64/SVE2/fmaxnmp-diagnostics.s     | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmaxnmp.s           | 53 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmaxp-diagnostics.s | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmaxp.s             | 53 +++++++++++++++++++
 .../MC/AArch64/SVE2/fminnmp-diagnostics.s     | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fminnmp.s           | 53 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fminp-diagnostics.s | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fminp.s             | 53 +++++++++++++++++++
 12 files changed, 555 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/faddp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/faddp.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmaxnmp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmaxnmp.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmaxp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmaxp.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fminnmp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fminnmp.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fminp-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fminp.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4095c6d95822d..2d8b9a9879f99 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1294,6 +1294,13 @@ let Predicates = [HasSVE2] in {
   defm FCVTNT_ZPmZ  : sve2_fp_convert_down_narrow<"fcvtnt">;
   defm FCVTLT_ZPmZ  : sve2_fp_convert_up_long<"fcvtlt">;
 
+  // SVE2 floating-point pairwise operations
+  defm FADDP_ZPmZZ   : sve2_fp_pairwise_pred<0b000, "faddp">;
+  defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
+  defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
+  defm FMAXP_ZPmZZ   : sve2_fp_pairwise_pred<0b110, "fmaxp">;
+  defm FMINP_ZPmZZ   : sve2_fp_pairwise_pred<0b111, "fminp">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 699c21d867b72..967e0a4179460 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1472,6 +1472,39 @@ multiclass sve2_fp_convert_down_odd_rounding<string asm> {
   def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Pairwise Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
+                            ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+  asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zm;
+  bits<5> Zdn;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = sz;
+  let Inst{21-19} = 0b010;
+  let Inst{18-16} = opc;
+  let Inst{15-13} = 0b100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
+  def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
+  def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
+  def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Stack Allocation Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/faddp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/faddp-diagnostics.s
new file mode 100644
index 0000000000000..99d4f5eefe435
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/faddp-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+faddp z0.h, p0/m, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: faddp z0.h, p0/m, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+faddp z0.b, p0/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: faddp z0.b, p0/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+faddp z0.h, p0/m, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: faddp z0.h, p0/m, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+faddp z0.h, p0/m, z0.h, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: faddp z0.h, p0/m, z0.h, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+faddp z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: faddp z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+faddp z0.h, p8/m, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: faddp z0.h, p8/m, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/faddp.s b/llvm/test/MC/AArch64/SVE2/faddp.s
new file mode 100644
index 0000000000000..8c05eff4cc14b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/faddp.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+faddp z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: faddp z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x50,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 50 64 <unknown>
+
+faddp z29.s, p3/m, z29.s, z30.s
+// CHECK-INST: faddp z29.s, p3/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x8f,0x90,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 8f 90 64 <unknown>
+
+faddp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: faddp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd0,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d0 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+faddp z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: faddp z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd0,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d0 64 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+faddp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: faddp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd0,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d0 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fmaxnmp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmaxnmp-diagnostics.s
new file mode 100644
index 0000000000000..2d51333416ae5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmaxnmp-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+fmaxnmp z0.h, p0/m, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: fmaxnmp z0.h, p0/m, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fmaxnmp z0.b, p0/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxnmp z0.b, p0/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+fmaxnmp z0.h, p0/m, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxnmp z0.h, p0/m, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmaxnmp z0.h, p0/m, z0.h, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxnmp z0.h, p0/m, z0.h, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fmaxnmp z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmaxnmp z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fmaxnmp z0.h, p8/m, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fmaxnmp z0.h, p8/m, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmaxnmp.s b/llvm/test/MC/AArch64/SVE2/fmaxnmp.s
new file mode 100644
index 0000000000000..6a35174a498a1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmaxnmp.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+fmaxnmp z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: fmaxnmp z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x54,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 54 64 <unknown>
+
+fmaxnmp z29.s, p3/m, z29.s, z30.s
+// CHECK-INST: fmaxnmp z29.s, p3/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x8f,0x94,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 8f 94 64 <unknown>
+
+fmaxnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fmaxnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd4,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d4 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+fmaxnmp z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: fmaxnmp z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd4,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d4 64 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fmaxnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fmaxnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd4,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d4 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fmaxp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmaxp-diagnostics.s
new file mode 100644
index 0000000000000..6165e74878b8d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmaxp-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+fmaxp z0.h, p0/m, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: fmaxp z0.h, p0/m, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fmaxp z0.b, p0/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxp z0.b, p0/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+fmaxp z0.h, p0/m, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxp z0.h, p0/m, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmaxp z0.h, p0/m, z0.h, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmaxp z0.h, p0/m, z0.h, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fmaxp z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmaxp z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fmaxp z0.h, p8/m, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fmaxp z0.h, p8/m, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmaxp.s b/llvm/test/MC/AArch64/SVE2/fmaxp.s
new file mode 100644
index 0000000000000..9d2eb4c2f38f4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmaxp.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+fmaxp z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: fmaxp z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x56,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 56 64 <unknown>
+
+fmaxp z29.s, p3/m, z29.s, z30.s
+// CHECK-INST: fmaxp z29.s, p3/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x8f,0x96,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 8f 96 64 <unknown>
+
+fmaxp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fmaxp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd6,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d6 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+fmaxp z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: fmaxp z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd6,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d6 64 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fmaxp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fmaxp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd6,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d6 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fminnmp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fminnmp-diagnostics.s
new file mode 100644
index 0000000000000..2338816d06d07
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fminnmp-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+fminnmp z0.h, p0/m, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: fminnmp z0.h, p0/m, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fminnmp z0.b, p0/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminnmp z0.b, p0/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+fminnmp z0.h, p0/m, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminnmp z0.h, p0/m, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fminnmp z0.h, p0/m, z0.h, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminnmp z0.h, p0/m, z0.h, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fminnmp z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fminnmp z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fminnmp z0.h, p8/m, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fminnmp z0.h, p8/m, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fminnmp.s b/llvm/test/MC/AArch64/SVE2/fminnmp.s
new file mode 100644
index 0000000000000..b40510a064532
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fminnmp.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+fminnmp z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: fminnmp z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x55,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 55 64 <unknown>
+
+fminnmp z29.s, p3/m, z29.s, z30.s
+// CHECK-INST: fminnmp z29.s, p3/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x8f,0x95,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 8f 95 64 <unknown>
+
+fminnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fminnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd5,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d5 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+fminnmp z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: fminnmp z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd5,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d5 64 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fminnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fminnmp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd5,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d5 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fminp-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fminp-diagnostics.s
new file mode 100644
index 0000000000000..cc55a2652fbbc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fminp-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+fminp z0.h, p0/m, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: fminp z0.h, p0/m, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fminp z0.b, p0/m, z0.b, z1.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminp z0.b, p0/m, z0.b, z1.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+fminp z0.h, p0/m, z0.s, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminp z0.h, p0/m, z0.s, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fminp z0.h, p0/m, z0.h, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fminp z0.h, p0/m, z0.h, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fminp z0.h, p0/z, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fminp z0.h, p0/z, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fminp z0.h, p8/m, z0.h, z1.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fminp z0.h, p8/m, z0.h, z1.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fminp.s b/llvm/test/MC/AArch64/SVE2/fminp.s
new file mode 100644
index 0000000000000..f33ad1b0e5822
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fminp.s
@@ -0,0 +1,53 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+fminp z0.h, p0/m, z0.h, z1.h
+// CHECK-INST: fminp z0.h, p0/m, z0.h, z1.h
+// CHECK-ENCODING: [0x20,0x80,0x57,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 57 64 <unknown>
+
+fminp z29.s, p3/m, z29.s, z30.s
+// CHECK-INST: fminp z29.s, p3/m, z29.s, z30.s
+// CHECK-ENCODING: [0xdd,0x8f,0x97,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 8f 97 64 <unknown>
+
+fminp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fminp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d7 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31.d, p0/z, z6.d
+// CHECK-INST: movprfx z31.d, p0/z, z6.d
+// CHECK-ENCODING: [0xdf,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df 20 d0 04 <unknown>
+
+fminp z31.d, p0/m, z31.d, z30.d
+// CHECK-INST: fminp z31.d, p0/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x83,0xd7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 83 d7 64 <unknown>
+
+movprfx z31, z6
+// CHECK-INST: movprfx z31, z6
+// CHECK-ENCODING: [0xdf,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: df bc 20 04 <unknown>
+
+fminp z31.d, p7/m, z31.d, z30.d
+// CHECK-INST: fminp z31.d, p7/m, z31.d, z30.d
+// CHECK-ENCODING: [0xdf,0x9f,0xd7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 9f d7 64 <unknown>

From d61cb749f4ac2c90244906d756e80a5c4a7ffa89 Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Wed, 29 May 2019 08:42:35 +0000
Subject: [PATCH 0462/1176] [X86] Fix i386 struct and union parameter alignment

According to i386 System V ABI 2.1: Structures and unions assume the
alignment of their most strictly aligned component. But current
implementation always takes them as 4-byte aligned which will result
in incorrect code, e.g:

 1 #include <immintrin.h>
 2 typedef union {
 3         int d[4];
 4         __m128 m;
 5 } M128;
 6 extern void foo(int, ...);
 7 void test(void)
 8 {
 9   M128 a;
10   foo(1, a);
11   foo(1, a.m);
12 }

The first call (line 10) takes the second arg as 4-byte aligned while
the second call (line 11) takes the second arg as 16-byte aligned.
There is oxymoron for the alignment of the 2 calls because they should
be the same.

This patch fixes the bug by following i386 System V ABI and apply it to
Linux only since other System V OS (e.g Darwin, PS4 and FreeBSD) don't
want to spend any effort dealing with the ramifications of ABI breaks
at present.

Patch by Wei Xiao (wxiao3)

Differential Revision: https://reviews.llvm.org/D60748

llvm-svn: 361934
---
 clang/lib/CodeGen/TargetInfo.cpp            | 13 +++++++++--
 clang/test/CodeGen/x86_32-align-linux.c     | 25 +++++++++++++++++++++
 clang/test/CodeGen/x86_32-arguments-linux.c | 24 ++++++++++----------
 3 files changed, 48 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/CodeGen/x86_32-align-linux.c

diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 24b7b9f97f9a7..4b96aa13d00e6 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1010,6 +1010,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
+  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -1076,6 +1077,7 @@ class X86_32ABIInfo : public SwiftABIInfo {
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+      IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
@@ -1492,8 +1494,15 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
   if (Align <= MinABIStackAlignInBytes)
     return 0; // Use default alignment.
 
-  // On non-Darwin, the stack type alignment is always 4.
-  if (!IsDarwinVectorABI) {
+  if (IsLinuxABI) {
+    // i386 System V ABI 2.1: Structures and unions assume the alignment of their
+    // most strictly aligned component.
+    //
+    // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+    // want to spend any effort dealing with the ramifications of ABI breaks.
+    return Align;
+  } else if (!IsDarwinVectorABI) {
+    // On non-Darwin and non-Linux, the stack type alignment is always 4.
     // Set explicit alignment, since we may need to realign the top.
     return MinABIStackAlignInBytes;
   }
diff --git a/clang/test/CodeGen/x86_32-align-linux.c b/clang/test/CodeGen/x86_32-align-linux.c
new file mode 100644
index 0000000000000..5fce3f5f295c7
--- /dev/null
+++ b/clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+typedef union {
+        int d[4];
+        __m128 m;
+} M128;
+
+extern void foo(int, ...);
+
+M128 a;
+
+// CHECK-LABEL: define void @test
+// CHECK: entry:
+// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval align 16
+// CHECK: call void (i32, ...) @foo(i32 1, <4 x float>
+
+void test(void)
+{
+  foo(1, a);
+  foo(1, a.m);
+}
+
diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/x86_32-arguments-linux.c
index 02eac51216af7..3718980ba16b9 100644
--- a/clang/test/CodeGen/x86_32-arguments-linux.c
+++ b/clang/test/CodeGen/x86_32-arguments-linux.c
@@ -3,21 +3,21 @@
 
 // CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
-// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4)
+// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 8 %a3,
+// CHECK: <1 x double> %a4, %struct.s56_2* byval align 8 %a5,
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 32 %a11,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval align 32 %a13)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}},
-// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 4 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 4 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 4 %{{[^ ]*}})
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 8 %{{[^ ]*}},
+// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 8 %{{[^ ]*}},
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 32 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 32 %{{[^ ]*}})
 // CHECK: }
 //
 // <rdar://problem/7964854> [i386] clang misaligns long double in structures

From 75dfbdc2da1218cc19f18576a78109ef47ee5ff0 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Wed, 29 May 2019 08:53:06 +0000
Subject: [PATCH 0463/1176] [AArch64][SVE2] Asm: support Floating Point
 Widening Multiply-Add

Summary:
Patch adds support for the indexed and unpredicated vectors forms of the
FMLALB, FMLALT, FMLSLB and FMLSLT instructions.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62386

llvm-svn: 361935
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 ++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 56 +++++++++++++++
 .../test/MC/AArch64/SVE/movprfx-diagnostics.s | 10 +++
 .../test/MC/AArch64/SVE2/fmlalb-diagnostics.s | 71 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmlalb.s            | 54 ++++++++++++++
 .../test/MC/AArch64/SVE2/fmlalt-diagnostics.s | 71 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmlalt.s            | 54 ++++++++++++++
 .../test/MC/AArch64/SVE2/fmlslb-diagnostics.s | 71 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmlslb.s            | 54 ++++++++++++++
 .../test/MC/AArch64/SVE2/fmlslt-diagnostics.s | 71 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/fmlslt.s            | 54 ++++++++++++++
 11 files changed, 578 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlalb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlalb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlalt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlalt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlslb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlslb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlslt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fmlslt.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2d8b9a9879f99..8798e039494c2 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1301,6 +1301,18 @@ let Predicates = [HasSVE2] in {
   defm FMAXP_ZPmZZ   : sve2_fp_pairwise_pred<0b110, "fmaxp">;
   defm FMINP_ZPmZZ   : sve2_fp_pairwise_pred<0b111, "fminp">;
 
+  // SVE2 floating-point multiply-add long (indexed)
+  def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
+  def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
+  def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
+  def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
+
+  // SVE2 floating-point multiply-add long
+  def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
+  def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
+  def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
+  def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 967e0a4179460..c9347e58f55d5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1505,6 +1505,62 @@ multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
   def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
 }
 
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm,
+                        VectorIndexH:$iop),
+  asm, "\t$Zda, $Zn, $Zm$iop",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<3> Zm;
+  bits<3> iop;
+  let Inst{31-21} = 0b01100100101;
+  let Inst{20-19} = iop{2-1};
+  let Inst{18-16} = Zm;
+  let Inst{15-14} = 0b01;
+  let Inst{13}    = opc{1};
+  let Inst{12}    = 0b0;
+  let Inst{11}    = iop{0};
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+  asm, "\t$Zda, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-21} = 0b01100100101;
+  let Inst{20-16} = Zm;
+  let Inst{15-14} = 0b10;
+  let Inst{13}    = opc{1};
+  let Inst{12-11} = 0b00;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Stack Allocation Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/movprfx-diagnostics.s b/llvm/test/MC/AArch64/SVE/movprfx-diagnostics.s
index 843e0daebcbdc..3eb833312f215 100644
--- a/llvm/test/MC/AArch64/SVE/movprfx-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/movprfx-diagnostics.s
@@ -110,6 +110,16 @@ mla z0.d, p0/m, z0.d, z2.d
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 
+// ------------------------------------------------------------------------- //
+// Destination used in other operand (ternary + indexed)
+
+movprfx z0, z1
+sdot z0.s, z1.b, z0.b[3]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx and destination also used as non-destructive source
+// CHECK-NEXT: sdot z0.s, z1.b, z0.b[3]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
 // ------------------------------------------------------------------------- //
 // Different general predicate (unary)
 
diff --git a/llvm/test/MC/AArch64/SVE2/fmlalb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmlalb-diagnostics.s
new file mode 100644
index 0000000000000..1cc9818d6e2f5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlalb-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// z register out of range for index
+
+fmlalb z0.s, z1.h, z8.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlalb z0.s, z1.h, z8.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Index out of bounds
+
+fmlalb z0.s, z1.h, z7.h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlalb z0.s, z1.h, z7.h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.h, z7.h[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlalb z0.s, z1.h, z7.h[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmlalb z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.b, z2.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.b, z2.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.s, z2.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.s, z2.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalb z0.s, z1.d, z2.d[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalb z0.s, z1.d, z2.d[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z29.s, p0/z, z7.s
+fmlalb  z29.s, z30.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlalb  z29.s, z30.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+fmlalb  z0.s, z1.h, z7.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlalb  z0.s, z1.h, z7.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmlalb.s b/llvm/test/MC/AArch64/SVE2/fmlalb.s
new file mode 100644
index 0000000000000..87e3c83670109
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlalb.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fmlalb z29.s, z30.h, z31.h
+// CHECK-INST: fmlalb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x83,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 83 bf 64 <unknown>
+
+fmlalb z0.s, z1.h, z7.h[0]
+// CHECK-INST: fmlalb	z0.s, z1.h, z7.h[0]
+// CHECK-ENCODING: [0x20,0x40,0xa7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 40 a7 64 <unknown>
+
+fmlalb z30.s, z31.h, z7.h[7]
+// CHECK-INST: fmlalb z30.s, z31.h, z7.h[7]
+// CHECK-ENCODING: [0xfe,0x4b,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe 4b bf 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z29, z28
+// CHECK-INST: movprfx	z29, z28
+// CHECK-ENCODING: [0x9d,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 9d bf 20 04 <unknown>
+
+fmlalb z29.s, z30.h, z31.h
+// CHECK-INST: fmlalb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x83,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 83 bf 64 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+fmlalb z21.s, z1.h, z7.h[7]
+// CHECK-INST: fmlalb	z21.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x35,0x48,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 48 bf 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fmlalt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmlalt-diagnostics.s
new file mode 100644
index 0000000000000..30b81fe551197
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlalt-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// z register out of range for index
+
+fmlalt z0.s, z1.h, z8.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlalt z0.s, z1.h, z8.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Index out of bounds
+
+fmlalt z0.s, z1.h, z7.h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlalt z0.s, z1.h, z7.h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.h, z7.h[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlalt z0.s, z1.h, z7.h[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmlalt z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.b, z2.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.b, z2.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.s, z2.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.s, z2.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlalt z0.s, z1.d, z2.d[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlalt z0.s, z1.d, z2.d[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z29.s, p0/z, z7.s
+fmlalt  z29.s, z30.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlalt  z29.s, z30.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+fmlalt  z0.s, z1.h, z7.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlalt  z0.s, z1.h, z7.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmlalt.s b/llvm/test/MC/AArch64/SVE2/fmlalt.s
new file mode 100644
index 0000000000000..2082bfff5bcd5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlalt.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fmlalt z29.s, z30.h, z31.h
+// CHECK-INST: fmlalt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x87,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 87 bf 64 <unknown>
+
+fmlalt z0.s, z1.h, z7.h[0]
+// CHECK-INST: fmlalt	z0.s, z1.h, z7.h[0]
+// CHECK-ENCODING: [0x20,0x44,0xa7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 44 a7 64 <unknown>
+
+fmlalt z30.s, z31.h, z7.h[7]
+// CHECK-INST: fmlalt z30.s, z31.h, z7.h[7]
+// CHECK-ENCODING: [0xfe,0x4f,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe 4f bf 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z29, z28
+// CHECK-INST: movprfx	z29, z28
+// CHECK-ENCODING: [0x9d,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 9d bf 20 04 <unknown>
+
+fmlalt z29.s, z30.h, z31.h
+// CHECK-INST: fmlalt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0x87,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 87 bf 64 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+fmlalt z21.s, z1.h, z7.h[7]
+// CHECK-INST: fmlalt	z21.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x35,0x4c,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 4c bf 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fmlslb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmlslb-diagnostics.s
new file mode 100644
index 0000000000000..99466e119d15f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlslb-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// z register out of range for index
+
+fmlslb z0.s, z1.h, z8.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlslb z0.s, z1.h, z8.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Index out of bounds
+
+fmlslb z0.s, z1.h, z7.h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlslb z0.s, z1.h, z7.h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.h, z7.h[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlslb z0.s, z1.h, z7.h[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmlslb z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.b, z2.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.b, z2.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.s, z2.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.s, z2.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslb z0.s, z1.d, z2.d[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslb z0.s, z1.d, z2.d[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z29.s, p0/z, z7.s
+fmlslb  z29.s, z30.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlslb  z29.s, z30.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+fmlslb  z0.s, z1.h, z7.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlslb  z0.s, z1.h, z7.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmlslb.s b/llvm/test/MC/AArch64/SVE2/fmlslb.s
new file mode 100644
index 0000000000000..1b3ffbf36affe
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlslb.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fmlslb z29.s, z30.h, z31.h
+// CHECK-INST: fmlslb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0xa3,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd a3 bf 64 <unknown>
+
+fmlslb z0.s, z1.h, z7.h[0]
+// CHECK-INST: fmlslb	z0.s, z1.h, z7.h[0]
+// CHECK-ENCODING: [0x20,0x60,0xa7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 60 a7 64 <unknown>
+
+fmlslb z30.s, z31.h, z7.h[7]
+// CHECK-INST: fmlslb z30.s, z31.h, z7.h[7]
+// CHECK-ENCODING: [0xfe,0x6b,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe 6b bf 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z29, z28
+// CHECK-INST: movprfx	z29, z28
+// CHECK-ENCODING: [0x9d,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 9d bf 20 04 <unknown>
+
+fmlslb z29.s, z30.h, z31.h
+// CHECK-INST: fmlslb z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0xa3,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd a3 bf 64 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+fmlslb z21.s, z1.h, z7.h[7]
+// CHECK-INST: fmlslb	z21.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x35,0x68,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 68 bf 64 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/fmlslt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmlslt-diagnostics.s
new file mode 100644
index 0000000000000..c56cf1eab9f46
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlslt-diagnostics.s
@@ -0,0 +1,71 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// z register out of range for index
+
+fmlslt z0.s, z1.h, z8.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmlslt z0.s, z1.h, z8.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Index out of bounds
+
+fmlslt z0.s, z1.h, z7.h[-1]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlslt z0.s, z1.h, z7.h[-1]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.h, z7.h[8]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7].
+// CHECK-NEXT: fmlslt z0.s, z1.h, z7.h[8]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmlslt z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.b, z2.b[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.b, z2.b[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.s, z2.s[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.s, z2.s[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmlslt z0.s, z1.d, z2.d[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmlslt z0.s, z1.d, z2.d[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z29.s, p0/z, z7.s
+fmlslt  z29.s, z30.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlslt  z29.s, z30.h, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+fmlslt  z0.s, z1.h, z7.h[0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: fmlslt  z0.s, z1.h, z7.h[0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fmlslt.s b/llvm/test/MC/AArch64/SVE2/fmlslt.s
new file mode 100644
index 0000000000000..4a92adc77eb6c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmlslt.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fmlslt z29.s, z30.h, z31.h
+// CHECK-INST: fmlslt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0xa7,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd a7 bf 64 <unknown>
+
+fmlslt z0.s, z1.h, z7.h[0]
+// CHECK-INST: fmlslt	z0.s, z1.h, z7.h[0]
+// CHECK-ENCODING: [0x20,0x64,0xa7,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 64 a7 64 <unknown>
+
+fmlslt z30.s, z31.h, z7.h[7]
+// CHECK-INST: fmlslt z30.s, z31.h, z7.h[7]
+// CHECK-ENCODING: [0xfe,0x6f,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe 6f bf 64 <unknown>
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z29, z28
+// CHECK-INST: movprfx	z29, z28
+// CHECK-ENCODING: [0x9d,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 9d bf 20 04 <unknown>
+
+fmlslt z29.s, z30.h, z31.h
+// CHECK-INST: fmlslt z29.s, z30.h, z31.h
+// CHECK-ENCODING: [0xdd,0xa7,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd a7 bf 64 <unknown>
+
+movprfx z21, z28
+// CHECK-INST: movprfx	z21, z28
+// CHECK-ENCODING: [0x95,0xbf,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: 95 bf 20 04 <unknown>
+
+fmlslt z21.s, z1.h, z7.h[7]
+// CHECK-INST: fmlslt	z21.s, z1.h, z7.h[7]
+// CHECK-ENCODING: [0x35,0x6c,0xbf,0x64]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 35 6c bf 64 <unknown>

From 6c04ef3d48ab771b63d478aa9c338ad63c6f7836 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Wed, 29 May 2019 09:03:27 +0000
Subject: [PATCH 0464/1176] [AArch64][SVE2] Asm: support SVE Bitwise Logical -
 Unpredicated Group

Summary:
Patch adds support for the following instructions:
    * EOR3, BSL, BCAX, BSL1N, BSL2N, NBSL, XAR

Aliases for types .B/.H/.S for EOR3 and BCAX have been added, the
preferred disassembly is .D.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62387

llvm-svn: 361936
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 11 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 70 ++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/bcax-diagnostics.s  | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/bcax.s              | 52 ++++++++++++++
 llvm/test/MC/AArch64/SVE2/bsl-diagnostics.s   | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/bsl.s               | 30 ++++++++
 llvm/test/MC/AArch64/SVE2/bsl1n-diagnostics.s | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/bsl1n.s             | 30 ++++++++
 llvm/test/MC/AArch64/SVE2/bsl2n-diagnostics.s | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/bsl2n.s             | 30 ++++++++
 llvm/test/MC/AArch64/SVE2/eor3-diagnostics.s  | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/eor3.s              | 52 ++++++++++++++
 llvm/test/MC/AArch64/SVE2/nbsl-diagnostics.s  | 39 ++++++++++
 llvm/test/MC/AArch64/SVE2/nbsl.s              | 30 ++++++++
 llvm/test/MC/AArch64/SVE2/xar-diagnostics.s   | 60 ++++++++++++++++
 llvm/test/MC/AArch64/SVE2/xar.s               | 72 +++++++++++++++++++
 16 files changed, 671 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/bcax-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bcax.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl1n-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl1n.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl2n-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/bsl2n.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eor3-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/eor3.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/nbsl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/nbsl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/xar-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/xar.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 8798e039494c2..cffbf43f0868e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1313,6 +1313,17 @@ let Predicates = [HasSVE2] in {
   def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
   def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
 
+  // SVE2 bitwise ternary operations
+  defm EOR3_ZZZZ_D  : sve2_int_bitwise_ternary_op<0b000, "eor3">;
+  defm BCAX_ZZZZ_D  : sve2_int_bitwise_ternary_op<0b010, "bcax">;
+  def BSL_ZZZZ_D    : sve2_int_bitwise_ternary_op_d<0b001, "bsl">;
+  def BSL1N_ZZZZ_D  : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">;
+  def BSL2N_ZZZZ_D  : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
+  def NBSL_ZZZZ_D   : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
+
+  // sve_int_rotate_imm
+  defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c9347e58f55d5..2ab53c5acac3a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2864,6 +2864,76 @@ multiclass sve_int_bin_cons_log<bits<2> opc, string asm> {
                   (!cast<Instruction>(NAME) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 1>;
 }
 
+class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
+: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Zk),
+  asm, "\t$Zdn, $_Zdn, $Zm, $Zk",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zk;
+  bits<5> Zm;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = opc{2-1};
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-11} = 0b00111;
+  let Inst{10}    = opc{0};
+  let Inst{9-5}   = Zk;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm> {
+  def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
+
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR8:$Zdn,  ZPR8:$Zm,  ZPR8:$Zk),  1>;
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>;
+  def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+                  (!cast<Instruction>(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>;
+}
+
+class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
+                                ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, immtype:$imm),
+  asm, "\t$Zdn, $_Zdn, $Zm, $imm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zdn;
+  bits<5> Zm;
+  bits<6> imm;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = tsz8_64{3-2};
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = tsz8_64{1-0};
+  let Inst{18-16} = imm{2-0}; // imm3
+  let Inst{15-10} = 0b001101;
+  let Inst{9-5}   = Zm;
+  let Inst{4-0}   = Zdn;
+
+  let Constraints = "$Zdn = $_Zdn";
+  let DestructiveInstType = Destructive;
+  let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_rotate_right_imm<string asm> {
+  def _B : sve2_int_rotate_right_imm<{0,0,0,1}, asm, ZPR8, vecshiftR8>;
+  def _H : sve2_int_rotate_right_imm<{0,0,1,?}, asm, ZPR16, vecshiftR16> {
+    let Inst{19} = imm{3};
+  }
+  def _S : sve2_int_rotate_right_imm<{0,1,?,?}, asm, ZPR32, vecshiftR32> {
+    let Inst{20-19} = imm{4-3};
+  }
+  def _D : sve2_int_rotate_right_imm<{1,?,?,?}, asm, ZPR64, vecshiftR64> {
+    let Inst{22}    = imm{5};
+    let Inst{20-19} = imm{4-3};
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Wide Immediate - Predicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/bcax-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bcax-diagnostics.s
new file mode 100644
index 0000000000000..e283076ef9a25
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bcax-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bcax z0.b, z0.b, z1.s, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bcax z0.b, z0.b, z1.s, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bcax z0.h, z0.h, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bcax z0.h, z0.h, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bcax z0.d, z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bcax z0.d, z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+bcax z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: bcax z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+bcax z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bcax z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bcax.s b/llvm/test/MC/AArch64/SVE2/bcax.s
new file mode 100644
index 0000000000000..9263712bc0a54
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bcax.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bcax z29.d, z29.d, z30.d, z31.d
+// CHECK-INST: bcax z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 7e 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test aliases.
+
+bcax z29.b, z29.b, z30.b, z31.b
+// CHECK-INST: bcax z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 7e 04 <unknown>
+
+bcax z29.h, z29.h, z30.h, z31.h
+// CHECK-INST: bcax z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 7e 04 <unknown>
+
+bcax z29.s, z29.s, z30.s, z31.s
+// CHECK-INST: bcax z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 7e 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+bcax z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: bcax z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3b,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3b 7e 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/bsl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bsl-diagnostics.s
new file mode 100644
index 0000000000000..e2de021be7afd
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bsl z0.b, z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl z0.b, z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl z0.h, z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl z0.h, z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl z0.s, z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl z0.s, z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+bsl z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: bsl z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+bsl z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bsl.s b/llvm/test/MC/AArch64/SVE2/bsl.s
new file mode 100644
index 0000000000000..acf3a65fc5543
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bsl z0.d, z0.d, z1.d, z2.d
+// CHECK-INST: bsl z0.d, z0.d, z1.d, z2.d
+// CHECK-ENCODING: [0x40,0x3c,0x21,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 40 3c 21 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+bsl z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: bsl z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3f,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3f 3e 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/bsl1n-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bsl1n-diagnostics.s
new file mode 100644
index 0000000000000..2b8c38e871d68
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl1n-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bsl1n z0.b, z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl1n z0.b, z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl1n z0.h, z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl1n z0.h, z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl1n z0.s, z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl1n z0.s, z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+bsl1n z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: bsl1n z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+bsl1n z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bsl1n.s b/llvm/test/MC/AArch64/SVE2/bsl1n.s
new file mode 100644
index 0000000000000..fda643103e702
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl1n.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bsl1n z0.d, z0.d, z1.d, z2.d
+// CHECK-INST: bsl1n z0.d, z0.d, z1.d, z2.d
+// CHECK-ENCODING: [0x40,0x3c,0x61,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 40 3c 61 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+bsl1n z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: bsl1n z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3f,0x7e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3f 7e 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/bsl2n-diagnostics.s b/llvm/test/MC/AArch64/SVE2/bsl2n-diagnostics.s
new file mode 100644
index 0000000000000..3c2b9f79e1a26
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl2n-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+bsl2n z0.b, z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl2n z0.b, z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl2n z0.h, z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl2n z0.h, z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bsl2n z0.s, z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bsl2n z0.s, z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+bsl2n z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: bsl2n z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+bsl2n z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/bsl2n.s b/llvm/test/MC/AArch64/SVE2/bsl2n.s
new file mode 100644
index 0000000000000..f4482ef134848
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/bsl2n.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+bsl2n z0.d, z0.d, z1.d, z2.d
+// CHECK-INST: bsl2n z0.d, z0.d, z1.d, z2.d
+// CHECK-ENCODING: [0x40,0x3c,0xa1,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 40 3c a1 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+bsl2n z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: bsl2n z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3f,0xbe,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3f be 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/eor3-diagnostics.s b/llvm/test/MC/AArch64/SVE2/eor3-diagnostics.s
new file mode 100644
index 0000000000000..611db530b5dd3
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eor3-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+eor3 z0.b, z0.b, z1.s, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: eor3 z0.b, z0.b, z1.s, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+eor3 z0.h, z0.h, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: eor3 z0.h, z0.h, z1.h, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+eor3 z0.d, z0.d, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: eor3 z0.d, z0.d, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+eor3 z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: eor3 z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+eor3 z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: eor3 z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/eor3.s b/llvm/test/MC/AArch64/SVE2/eor3.s
new file mode 100644
index 0000000000000..cf385122744ca
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/eor3.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+eor3 z29.d, z29.d, z30.d, z31.d
+// CHECK-INST: eor3 z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 3e 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test aliases.
+
+eor3 z29.b, z29.b, z30.b, z31.b
+// CHECK-INST: eor3 z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 3e 04 <unknown>
+
+eor3 z29.h, z29.h, z30.h, z31.h
+// CHECK-INST: eor3 z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 3e 04 <unknown>
+
+eor3 z29.s, z29.s, z30.s, z31.s
+// CHECK-INST: eor3 z29.d, z29.d, z30.d, z31.d
+// CHECK-ENCODING: [0xfd,0x3b,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fd 3b 3e 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+eor3 z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: eor3 z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3b,0x3e,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3b 3e 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/nbsl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/nbsl-diagnostics.s
new file mode 100644
index 0000000000000..0793b7f09c514
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/nbsl-diagnostics.s
@@ -0,0 +1,39 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+nbsl z0.b, z0.b, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: nbsl z0.b, z0.b, z1.b, z2.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+nbsl z0.h, z0.h, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: nbsl z0.h, z0.h, z1.h, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+nbsl z0.s, z0.s, z1.s, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: nbsl z0.s, z0.s, z1.s, z2.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Source and Destination Registers must match
+
+nbsl z0.d, z1.d, z2.d, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register
+// CHECK-NEXT: nbsl z0.d, z1.d, z2.d, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+nbsl z0.d, z0.d, z1.d, z2.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/nbsl.s b/llvm/test/MC/AArch64/SVE2/nbsl.s
new file mode 100644
index 0000000000000..5df8ad0c398fa
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/nbsl.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+nbsl z0.d, z0.d, z1.d, z2.d
+// CHECK-INST: nbsl z0.d, z0.d, z1.d, z2.d
+// CHECK-ENCODING: [0x40,0x3c,0xe1,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 40 3c e1 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+nbsl z31.d, z31.d, z30.d, z29.d
+// CHECK-INST: nbsl z31.d, z31.d, z30.d, z29.d
+// CHECK-ENCODING: [0xbf,0x3f,0xfe,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bf 3f fe 04 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/xar-diagnostics.s b/llvm/test/MC/AArch64/SVE2/xar-diagnostics.s
new file mode 100644
index 0000000000000..d675f0a1920c6
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/xar-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+xar z30.b, z30.b, z10.b, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: xar z30.b, z30.b, z10.b, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z18.b, z18.b, z27.b, #9
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 8]
+// CHECK-NEXT: xar z18.b, z18.b, z27.b, #9
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z26.h, z26.h, z4.h, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: xar z26.h, z26.h, z4.h, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z25.h, z25.h, z10.h, #17
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]
+// CHECK-NEXT: xar z25.h, z25.h, z10.h, #17
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z17.s, z17.s, z0.s, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: xar z17.s, z17.s, z0.s, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z0.s, z0.s, z15.s, #33
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]
+// CHECK-NEXT: xar z0.s, z0.s, z15.s, #33
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z4.d, z4.d, z13.d, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: xar z4.d, z4.d, z13.d, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+xar z26.d, z26.d, z26.d, #65
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]
+// CHECK-NEXT: xar z26.d, z26.d, z26.d, #65
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Element sizes must match
+
+xar z0.b, z0.b , z0.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: xar z0.b, z0.b , z0.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+xar z0.d, z0.d, z1.d, #1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx
+// CHECK-NEXT: xar z0.d, z0.d, z1.d, #1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/xar.s b/llvm/test/MC/AArch64/SVE2/xar.s
new file mode 100644
index 0000000000000..7389df33754ea
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/xar.s
@@ -0,0 +1,72 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+xar     z0.b, z0.b, z1.b, #1
+// CHECK-INST: xar	z0.b, z0.b, z1.b, #1
+// CHECK-ENCODING: [0x20,0x34,0x2f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 34 2f 04 <unknown>
+
+xar     z31.b, z31.b, z30.b, #8
+// CHECK-INST: xar	z31.b, z31.b, z30.b, #8
+// CHECK-ENCODING: [0xdf,0x37,0x28,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 37 28 04 <unknown>
+
+xar     z0.h, z0.h, z1.h, #1
+// CHECK-INST: xar	z0.h, z0.h, z1.h, #1
+// CHECK-ENCODING: [0x20,0x34,0x3f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 34 3f 04 <unknown>
+
+xar     z31.h, z31.h, z30.h, #16
+// CHECK-INST: xar	z31.h, z31.h, z30.h, #16
+// CHECK-ENCODING: [0xdf,0x37,0x30,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 37 30 04 <unknown>
+
+xar     z0.s, z0.s, z1.s, #1
+// CHECK-INST: xar	z0.s, z0.s, z1.s, #1
+// CHECK-ENCODING: [0x20,0x34,0x7f,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 34 7f 04 <unknown>
+
+xar     z31.s, z31.s, z30.s, #32
+// CHECK-INST: xar	z31.s, z31.s, z30.s, #32
+// CHECK-ENCODING: [0xdf,0x37,0x60,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 37 60 04 <unknown>
+
+xar     z0.d, z0.d, z1.d, #1
+// CHECK-INST: xar	z0.d, z0.d, z1.d, #1
+// CHECK-ENCODING: [0x20,0x34,0xff,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 34 ff 04 <unknown>
+
+xar     z31.d, z31.d, z30.d, #64
+// CHECK-INST: xar	z31.d, z31.d, z30.d, #64
+// CHECK-ENCODING: [0xdf,0x37,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 37 a0 04 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z31, z7
+// CHECK-INST: movprfx z31, z7
+// CHECK-ENCODING: [0xff,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: ff bc 20 04 <unknown>
+
+xar     z31.d, z31.d, z30.d, #64
+// CHECK-INST: xar     z31.d, z31.d, z30.d, #64
+// CHECK-ENCODING: [0xdf,0x37,0xa0,0x04]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 37 a0 04 <unknown>

From a0a20f38ec5b8a6e56e94ce8d238d7ced71de065 Mon Sep 17 00:00:00 2001
From: Douglas Yung <douglas.yung@sony.com>
Date: Wed, 29 May 2019 09:20:01 +0000
Subject: [PATCH 0465/1176] Fix test added in r361903 to work on Windows.

llvm-svn: 361937
---
 clang/test/Driver/print-file-name.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/Driver/print-file-name.c b/clang/test/Driver/print-file-name.c
index 9447c04a5ea94..afdaeeca48389 100644
--- a/clang/test/Driver/print-file-name.c
+++ b/clang/test/Driver/print-file-name.c
@@ -4,16 +4,16 @@
 // RUN:     -resource-dir=%S/Inputs/resource_dir \
 // RUN:     --target=x86_64-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-RESOURCE-DIR %s
-// CHECK-RESOURCE-DIR: resource_dir{{/|\\\\}}share{{/|\\\\}}asan_blacklist.txt
+// CHECK-RESOURCE-DIR: resource_dir{{/|\\}}share{{/|\\}}asan_blacklist.txt
 
 // RUN: %clang -print-file-name=libclang_rt.builtins.a 2>&1 \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     --target=x86_64-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-COMPILER-RT %s
-// CHECK-COMPILER-RT: resource_dir_with_per_target_subdir{{/|\\\\}}lib{{/|\\\\}}x86_64-linux-gnu{{/|\\\\}}libclang_rt.builtins.a
+// CHECK-COMPILER-RT: resource_dir_with_per_target_subdir{{/|\\}}lib{{/|\\}}x86_64-linux-gnu{{/|\\}}libclang_rt.builtins.a
 
 // RUN: %clang -print-file-name=include/c++/v1 2>&1 \
 // RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxx_tree/usr/bin \
 // RUN:     --target=x86_64-linux-gnu \
 // RUN:   | FileCheck --check-prefix=CHECK-INSTALL-DIR %s
-// CHECK-INSTALL-DIR: basic_linux_libcxx_tree{{/|\\\\}}usr{{/|\\\\}}bin{{/|\\\\}}..{{/|\\\\}}include{{/|\\\\}}c++{{/|\\\\}}v1
+// CHECK-INSTALL-DIR: basic_linux_libcxx_tree{{/|\\}}usr{{/|\\}}bin{{/|\\}}..{{/|\\}}include{{/|\\}}c++{{/|\\}}v1

From 78cfe1e6feecebeeb7928cb1ffa1e2a64f5ed976 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 29 May 2019 09:22:36 +0000
Subject: [PATCH 0466/1176] DWARF: Fix address range support in mixed 4+5
 scenario

Summary:
debug_ranges got renamed to debug_rnglists in DWARF 5. Prior to this
patch lldb was just picking the first section it could find in the file,
and using that for all address ranges lookups. This is not correct in
case the file contains a mixture of compile units with various standard
versions (not a completely unlikely scenario).

In this patch I make lldb support reading from both sections
simulaneously, and decide the correct section to use based on the
version number of the compile unit. SymbolFileDWARF::DebugRanges is
split into GetDebugRanges and GetDebugRngLists (the first one is renamed
mainly so we can catch all incorrect usages).

I tried to structure the code similarly to how llvm handles this logic
(hence DWARFUnit::FindRnglistFromOffset/Index), but the implementations
are still relatively far from each other.

Reviewers: JDevlieghere, aprantl, clayborg

Subscribers: lldb-commits

Differential Revision: https://reviews.llvm.org/D62302

llvm-svn: 361938
---
 .../DWARF/debug_ranges-missing-section.s      |  78 +++++++++++++
 .../DWARF/debug_ranges_and_rnglists.test      |   1 -
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 104 ++++++++----------
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |  29 +++++
 .../Plugins/SymbolFile/DWARF/DWARFUnit.h      |   9 ++
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  49 ++++-----
 .../SymbolFile/DWARF/SymbolFileDWARF.h        |   6 +-
 7 files changed, 182 insertions(+), 94 deletions(-)
 create mode 100644 lldb/lit/SymbolFile/DWARF/debug_ranges-missing-section.s

diff --git a/lldb/lit/SymbolFile/DWARF/debug_ranges-missing-section.s b/lldb/lit/SymbolFile/DWARF/debug_ranges-missing-section.s
new file mode 100644
index 0000000000000..4f6feceda6da8
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/debug_ranges-missing-section.s
@@ -0,0 +1,78 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t
+# RUN: %lldb %t -o "image lookup -v -s lookup_ranges" -o exit 2>&1 | FileCheck %s
+
+# CHECK: DIE has DW_AT_ranges(0x47) attribute, but range extraction failed (No debug_ranges section),
+# CHECK:  Function: id = {0x7fffffff0000001c}, name = "ranges", range = [0x0000000000000000-0x0000000000000004)
+# CHECK:    Blocks: id = {0x7fffffff0000001c}, range = [0x00000000-0x00000004)
+
+        .text
+        .p2align 12
+        .globl  ranges
+        .type   ranges,@function
+ranges:                                    # @ranges
+        nop
+lookup_ranges:
+        nop
+        nop
+        nop
+.Lranges_end:
+        .size   ranges, .Lranges_end-ranges
+                                        # -- End function
+        .section        .debug_str,"MS",@progbits,1
+.Lproducer:
+        .asciz  "Hand-written DWARF"
+.Lranges:
+        .asciz  "ranges"
+
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                       # Abbreviation Code
+        .byte   17                      # DW_TAG_compile_unit
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   37                      # DW_AT_producer
+        .byte   14                      # DW_FORM_strp
+        .byte   17                      # DW_AT_low_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   2                       # Abbreviation Code
+        .byte   46                      # DW_TAG_subprogram
+        .byte   1                       # DW_CHILDREN_yes
+        .byte   17                      # DW_AT_low_pc
+        .byte   1                       # DW_FORM_addr
+        .byte   18                      # DW_AT_high_pc
+        .byte   6                       # DW_FORM_data4
+        .byte   3                       # DW_AT_name
+        .byte   14                      # DW_FORM_strp
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   5                       # Abbreviation Code
+        .byte   11                      # DW_TAG_lexical_block
+        .byte   0                       # DW_CHILDREN_no
+        .byte   85                      # DW_AT_ranges
+        .byte   23                      # DW_FORM_sec_offset
+        .byte   0                       # EOM(1)
+        .byte   0                       # EOM(2)
+        .byte   0                       # EOM(3)
+
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  4                       # DWARF version number
+        .long   .debug_abbrev           # Offset Into Abbrev. Section
+        .byte   8                       # Address Size (in bytes)
+        .byte   1                       # Abbrev [1] 0xb:0x7b DW_TAG_compile_unit
+        .long   .Lproducer              # DW_AT_producer
+        .quad   ranges                  # DW_AT_low_pc
+        .long   .Lranges_end-ranges     # DW_AT_high_pc
+        .byte   2                       # Abbrev [2] 0x2a:0x4d DW_TAG_subprogram
+        .quad   ranges                  # DW_AT_low_pc
+        .long   .Lranges_end-ranges     # DW_AT_high_pc
+        .long   .Lranges                # DW_AT_name
+        .byte   5                       # Abbrev [5] 0x61:0x15 DW_TAG_lexical_block
+        .long   0x47                    # DW_AT_ranges
+        .byte   0                       # End Of Children Mark
+        .byte   0                       # End Of Children Mark
+.Ldebug_info_end0:
diff --git a/lldb/lit/SymbolFile/DWARF/debug_ranges_and_rnglists.test b/lldb/lit/SymbolFile/DWARF/debug_ranges_and_rnglists.test
index 6967476689bbe..49bb4e7a18993 100644
--- a/lldb/lit/SymbolFile/DWARF/debug_ranges_and_rnglists.test
+++ b/lldb/lit/SymbolFile/DWARF/debug_ranges_and_rnglists.test
@@ -1,5 +1,4 @@
 # REQUIRES: lld
-# XFAIL: *
 
 # RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %S/debug_ranges.s > %t-ranges.o
 # RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %S/debug_rnglists.s > %t-rnglists.o
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index b0412f92c1c29..f408c5b32b120 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -360,11 +360,22 @@ bool DWARFDebugInfoEntry::Extract(const DWARFUnit *cu,
   return false;
 }
 
-static dw_offset_t GetRangesOffset(const DWARFDebugRangesBase *debug_ranges,
-                                   DWARFFormValue &form_value) {
-  if (form_value.Form() == DW_FORM_rnglistx)
-    return debug_ranges->GetOffset(form_value.Unsigned());
-  return form_value.Unsigned();
+static DWARFRangeList GetRangesOrReportError(const DWARFUnit &unit,
+                                             const DWARFDebugInfoEntry &die,
+                                             const DWARFFormValue &value) {
+  llvm::Expected<DWARFRangeList> expected_ranges =
+      (value.Form() == DW_FORM_rnglistx)
+          ? unit.FindRnglistFromIndex(value.Unsigned())
+          : unit.FindRnglistFromOffset(value.Unsigned());
+  if (expected_ranges)
+    return std::move(*expected_ranges);
+  unit.GetSymbolFileDWARF()->GetObjectFile()->GetModule()->ReportError(
+      "{0x%8.8x}: DIE has DW_AT_ranges(0x%" PRIx64 ") attribute, but "
+      "range extraction failed (%s), please file a bug "
+      "and attach the file at the start of this error message",
+      die.GetOffset(), value.Unsigned(),
+      toString(expected_ranges.takeError()).c_str());
+  return DWARFRangeList();
 }
 
 // GetDIENamesAndRanges
@@ -437,17 +448,9 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
           }
           break;
 
-        case DW_AT_ranges: {
-          const DWARFDebugRangesBase *debug_ranges = dwarf2Data->DebugRanges();
-          if (debug_ranges)
-            debug_ranges->FindRanges(cu, GetRangesOffset(debug_ranges, form_value), ranges);
-          else
-            cu->GetSymbolFileDWARF()->GetObjectFile()->GetModule()->ReportError(
-                "{0x%8.8x}: DIE has DW_AT_ranges(0x%" PRIx64
-                ") attribute yet DWARF has no .debug_ranges, please file a bug "
-                "and attach the file at the start of this error message",
-                m_offset, form_value.Unsigned());
-        } break;
+        case DW_AT_ranges:
+          ranges = GetRangesOrReportError(*cu, *this, form_value);
+          break;
 
         case DW_AT_name:
           if (name == nullptr)
@@ -703,14 +706,6 @@ void DWARFDebugInfoEntry::DumpAttribute(
     s.PutCString(" )");
   } break;
 
-  case DW_AT_ranges: {
-    lldb::offset_t ranges_offset =
-        GetRangesOffset(dwarf2Data->DebugRanges(), form_value);
-    dw_addr_t base_addr = cu ? cu->GetBaseAddress() : 0;
-    DWARFDebugRanges::Dump(s, dwarf2Data->get_debug_ranges_data(),
-                           &ranges_offset, base_addr);
-  } break;
-
   default:
     break;
   }
@@ -962,13 +957,9 @@ size_t DWARFDebugInfoEntry::GetAttributeAddressRanges(
     bool check_specification_or_abstract_origin) const {
   ranges.Clear();
 
-  SymbolFileDWARF *dwarf2Data = cu->GetSymbolFileDWARF();
-
   DWARFFormValue form_value;
   if (GetAttributeValue(cu, DW_AT_ranges, form_value)) {
-    if (DWARFDebugRangesBase *debug_ranges = dwarf2Data->DebugRanges())
-      debug_ranges->FindRanges(cu, GetRangesOffset(debug_ranges, form_value),
-                               ranges);
+    ranges = GetRangesOrReportError(*cu, *this, form_value);
   } else if (check_hi_lo_pc) {
     dw_addr_t lo_pc = LLDB_INVALID_ADDRESS;
     dw_addr_t hi_pc = LLDB_INVALID_ADDRESS;
@@ -1413,45 +1404,38 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
               ((function_die != nullptr) || (block_die != nullptr));
         }
       } else {
-        DWARFFormValue form_value;
-        if (GetAttributeValue(cu, DW_AT_ranges, form_value)) {
-          DWARFRangeList ranges;
-          SymbolFileDWARF *dwarf2Data = cu->GetSymbolFileDWARF();
-          DWARFDebugRangesBase *debug_ranges = dwarf2Data->DebugRanges();
-          debug_ranges->FindRanges(
-              cu, GetRangesOffset(debug_ranges, form_value), ranges);
-
-          if (ranges.FindEntryThatContains(address)) {
-            found_address = true;
-            //  puts("***MATCH***");
-            switch (m_tag) {
-            case DW_TAG_compile_unit: // File
-            case DW_TAG_partial_unit: // File
+        DWARFRangeList ranges;
+        if (GetAttributeAddressRanges(cu, ranges, /*check_hi_lo_pc*/ false) &&
+            ranges.FindEntryThatContains(address)) {
+          found_address = true;
+          //  puts("***MATCH***");
+          switch (m_tag) {
+          case DW_TAG_compile_unit: // File
+          case DW_TAG_partial_unit: // File
               check_children =
                   ((function_die != nullptr) || (block_die != nullptr));
               break;
 
-            case DW_TAG_subprogram: // Function
-              if (function_die)
-                *function_die = this;
-              check_children = (block_die != nullptr);
-              break;
-
-            case DW_TAG_inlined_subroutine: // Inlined Function
-            case DW_TAG_lexical_block:      // Block { } in code
-              if (block_die) {
-                *block_die = this;
-                check_children = true;
-              }
-              break;
+          case DW_TAG_subprogram: // Function
+            if (function_die)
+              *function_die = this;
+            check_children = (block_die != nullptr);
+            break;
 
-            default:
+          case DW_TAG_inlined_subroutine: // Inlined Function
+          case DW_TAG_lexical_block:      // Block { } in code
+            if (block_die) {
+              *block_die = this;
               check_children = true;
-              break;
             }
-          } else {
-            check_children = false;
+            break;
+
+          default:
+            check_children = true;
+            break;
           }
+        } else {
+          check_children = false;
         }
       }
     }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index cd8492356c38a..ca5aebd00c873 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -789,3 +789,32 @@ uint32_t DWARFUnit::GetHeaderByteSize() const {
   }
   llvm_unreachable("invalid UnitType.");
 }
+
+llvm::Expected<DWARFRangeList>
+DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) const {
+  const DWARFDebugRangesBase *debug_ranges;
+  llvm::StringRef section;
+  if (GetVersion() <= 4) {
+    debug_ranges = m_dwarf->GetDebugRanges();
+    section = "debug_ranges";
+  } else {
+    debug_ranges = m_dwarf->GetDebugRngLists();
+    section = "debug_rnglists";
+  }
+  if (!debug_ranges)
+    return llvm::make_error<llvm::object::GenericBinaryError>("No " + section +
+                                                              " section");
+
+  DWARFRangeList ranges;
+  debug_ranges->FindRanges(this, offset, ranges);
+  return ranges;
+}
+
+llvm::Expected<DWARFRangeList>
+DWARFUnit::FindRnglistFromIndex(uint32_t index) const {
+  const DWARFDebugRangesBase *debug_rnglists = m_dwarf->GetDebugRngLists();
+  if (!debug_rnglists)
+    return llvm::make_error<llvm::object::GenericBinaryError>(
+        "No debug_rnglists section");
+  return FindRnglistFromOffset(debug_rnglists->GetOffset(index));
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index da516aecd8ed1..73f4d2ea3e0d3 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -211,6 +211,15 @@ class DWARFUnit : public lldb_private::UserID {
 
   uint8_t GetUnitType() const { return m_header.GetUnitType(); }
 
+  /// Return a list of address ranges resulting from a (possibly encoded)
+  /// range list starting at a given offset in the appropriate ranges section.
+  llvm::Expected<DWARFRangeList> FindRnglistFromOffset(dw_offset_t offset) const;
+
+  /// Return a list of address ranges retrieved from an encoded range
+  /// list whose offset is found via a table lookup given an index (DWARF v5
+  /// and later).
+  llvm::Expected<DWARFRangeList> FindRnglistFromIndex(uint32_t index) const;
+
 protected:
   DWARFUnit(SymbolFileDWARF *dwarf, lldb::user_id_t uid,
             const DWARFUnitHeader &header,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index a015445198ce6..1b0a572dd57e6 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -359,7 +359,7 @@ SymbolFileDWARF::SymbolFileDWARF(ObjectFile *objfile,
       m_context(objfile->GetModule()->GetSectionList(), dwo_section_list),
       m_data_debug_loc(), m_data_debug_ranges(), m_data_debug_rnglists(),
       m_abbr(), m_info(), m_fetched_external_modules(false),
-      m_supports_DW_AT_APPLE_objc_complete_type(eLazyBoolCalculate), m_ranges(),
+      m_supports_DW_AT_APPLE_objc_complete_type(eLazyBoolCalculate),
       m_unique_ast_type_map() {}
 
 SymbolFileDWARF::~SymbolFileDWARF() {}
@@ -619,16 +619,14 @@ SymbolFileDWARF::GetDWARFCompileUnit(lldb_private::CompileUnit *comp_unit) {
   return nullptr;
 }
 
-DWARFDebugRangesBase *SymbolFileDWARF::DebugRanges() {
-  if (m_ranges == nullptr) {
+DWARFDebugRangesBase *SymbolFileDWARF::GetDebugRanges() {
+  if (!m_ranges) {
     static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
     Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION,
                        static_cast<void *>(this));
 
     if (get_debug_ranges_data().GetByteSize() > 0)
       m_ranges.reset(new DWARFDebugRanges());
-    else if (get_debug_rnglists_data().GetByteSize() > 0)
-      m_ranges.reset(new DWARFDebugRngLists());
 
     if (m_ranges)
       m_ranges->Extract(this);
@@ -636,8 +634,19 @@ DWARFDebugRangesBase *SymbolFileDWARF::DebugRanges() {
   return m_ranges.get();
 }
 
-const DWARFDebugRangesBase *SymbolFileDWARF::DebugRanges() const {
-  return m_ranges.get();
+DWARFDebugRangesBase *SymbolFileDWARF::GetDebugRngLists() {
+  if (!m_rnglists) {
+    static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
+    Timer scoped_timer(func_cat, "%s this = %p", LLVM_PRETTY_FUNCTION,
+                       static_cast<void *>(this));
+
+    if (get_debug_rnglists_data().GetByteSize() > 0)
+      m_rnglists.reset(new DWARFDebugRngLists());
+
+    if (m_rnglists)
+      m_rnglists->Extract(this);
+  }
+  return m_rnglists.get();
 }
 
 lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu) {
@@ -3222,29 +3231,9 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
           case DW_AT_specification:
             spec_die = form_value.Reference();
             break;
-          case DW_AT_start_scope: {
-            if (form_value.Form() == DW_FORM_sec_offset) {
-              DWARFRangeList dwarf_scope_ranges;
-              const DWARFDebugRangesBase *debug_ranges = DebugRanges();
-              debug_ranges->FindRanges(die.GetCU(),
-                                       form_value.Unsigned(),
-                                       dwarf_scope_ranges);
-            } else {
-              // TODO: Handle the case when DW_AT_start_scope have form
-              // constant. The
-              // dwarf spec is a bit ambiguous about what is the expected
-              // behavior in case the enclosing block have a non coninious
-              // address range and the DW_AT_start_scope entry have a form
-              // constant.
-              GetObjectFile()->GetModule()->ReportWarning(
-                  "0x%8.8" PRIx64
-                  ": DW_AT_start_scope has unsupported form type (0x%x)\n",
-                  die.GetID(), form_value.Form());
-            }
-
-            scope_ranges.Sort();
-            scope_ranges.CombineConsecutiveRanges();
-          } break;
+          case DW_AT_start_scope:
+            // TODO: Implement this.
+            break;
           case DW_AT_artificial:
             is_artificial = form_value.Boolean();
             break;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
index ea11164700f00..1b13b06d70c1c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h
@@ -225,9 +225,8 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
 
   const DWARFDebugInfo *DebugInfo() const;
 
-  DWARFDebugRangesBase *DebugRanges();
-
-  const DWARFDebugRangesBase *DebugRanges() const;
+  DWARFDebugRangesBase *GetDebugRanges();
+  DWARFDebugRangesBase *GetDebugRngLists();
 
   const lldb_private::DWARFDataExtractor &DebugLocData();
 
@@ -471,6 +470,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile,
   typedef std::unordered_map<std::string, DIERefSetSP> NameToOffsetMap;
   NameToOffsetMap m_function_scope_qualified_name_map;
   std::unique_ptr<DWARFDebugRangesBase> m_ranges;
+  std::unique_ptr<DWARFDebugRangesBase> m_rnglists;
   UniqueDWARFASTTypeMap m_unique_ast_type_map;
   DIEToTypePtr m_die_to_type;
   DIEToVariableSP m_die_to_variable_sp;

From d2042d3dd770ec593f0bf27a6fabe976d3fab804 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 29 May 2019 09:32:59 +0000
Subject: [PATCH 0467/1176] DWARFASTParserClang: Unify compilation unit
 language handling

Summary:
The function was not being consistent in how it retrieved the language
of the current compile unit. Sometimes it did so from the lldb CU
object, and sometimes from the DWARF die. This patch unifies the
handling on the latter. The reason for choosing the DWARF method is
because I'd eventually like to stop creating lldb CUs for dwarf type
units (and so this code needs to would need to work without them).

Reviewers: clayborg, JDevlieghere, aprantl

Subscribers: jdoerfert, lldb-commits

Differential Revision: https://reviews.llvm.org/D62481

llvm-svn: 361939
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp        | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index b1e826582a833..a260684285d40 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -294,6 +294,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
 
   dw_attr_t attr;
   TypeSP type_sp;
+  LanguageType cu_language = die.GetLanguage();
   switch (tag) {
   case DW_TAG_typedef:
   case DW_TAG_base_type:
@@ -468,11 +469,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         }
       }
 
-      bool translation_unit_is_objc =
-          (sc.comp_unit->GetLanguage() == eLanguageTypeObjC ||
-           sc.comp_unit->GetLanguage() == eLanguageTypeObjC_plus_plus);
-
-      if (translation_unit_is_objc) {
+      if (cu_language == eLanguageTypeObjC ||
+          cu_language == eLanguageTypeObjC_plus_plus) {
         if (type_name_cstr != nullptr) {
           static ConstString g_objc_type_name_id("id");
           static ConstString g_objc_type_name_Class("Class");
@@ -629,8 +627,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     Declaration unique_decl(decl);
 
     if (type_name_const_str) {
-      LanguageType die_language = die.GetLanguage();
-      if (Language::LanguageIsCPlusPlus(die_language)) {
+      if (Language::LanguageIsCPlusPlus(cu_language)) {
         // For C++, we rely solely upon the one definition rule that says
         // only one thing can exist at a given decl context. We ignore the
         // file and line that things are declared on.
@@ -668,7 +665,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     }
 
     if (byte_size && *byte_size == 0 && type_name_cstr && !die.HasChildren() &&
-        sc.comp_unit->GetLanguage() == eLanguageTypeObjC) {
+        cu_language == eLanguageTypeObjC) {
       // Work around an issue with clang at the moment where forward
       // declarations for objective C classes are emitted as:
       //  DW_TAG_structure_type [2]

From f9169d0896dff637f1476e042a0669ef8c981167 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 29 May 2019 10:01:00 +0000
Subject: [PATCH 0468/1176] [clangd] Represent Hover result using
 FormattedString

Reviewers: sammccall, kadircet

Reviewed By: kadircet

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61601

llvm-svn: 361940
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  |  32 ++-
 clang-tools-extra/clangd/ClangdLSPServer.h    |   5 +-
 clang-tools-extra/clangd/ClangdServer.cpp     |   4 +-
 clang-tools-extra/clangd/ClangdServer.h       |   1 +
 clang-tools-extra/clangd/FormattedString.cpp  |  23 ++
 clang-tools-extra/clangd/FormattedString.h    |   1 +
 clang-tools-extra/clangd/Protocol.cpp         |  32 +++
 clang-tools-extra/clangd/Protocol.h           |  17 +-
 clang-tools-extra/clangd/XRefs.cpp            |  25 +-
 clang-tools-extra/clangd/XRefs.h              |   5 +-
 .../clangd/unittests/XRefsTests.cpp           | 269 ++++++++++++++----
 11 files changed, 319 insertions(+), 95 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index c599e6c4d54fe..53326296a6810 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -8,6 +8,7 @@
 
 #include "ClangdLSPServer.h"
 #include "Diagnostics.h"
+#include "FormattedString.h"
 #include "Protocol.h"
 #include "SourceCode.h"
 #include "Trace.h"
@@ -358,6 +359,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
   SupportsHierarchicalDocumentSymbol =
       Params.capabilities.HierarchicalDocumentSymbol;
   SupportFileStatus = Params.initializationOptions.FileStatus;
+  HoverContentFormat = Params.capabilities.HoverContentFormat;
   llvm::json::Object Result{
       {{"capabilities",
         llvm::json::Object{
@@ -843,17 +845,27 @@ void ClangdLSPServer::onHover(const TextDocumentPositionParams &Params,
                               Callback<llvm::Optional<Hover>> Reply) {
   Server->findHover(Params.textDocument.uri.file(), Params.position,
                     Bind(
-                        [](decltype(Reply) Reply,
-                           llvm::Expected<llvm::Optional<HoverInfo>> HIorErr) {
-                          if (!HIorErr)
-                            return Reply(HIorErr.takeError());
-                          const auto &HI = HIorErr.get();
-                          if (!HI)
+                        [this](decltype(Reply) Reply,
+                               llvm::Expected<llvm::Optional<HoverInfo>> H) {
+                          if (!H)
+                            return Reply(H.takeError());
+                          if (!*H)
                             return Reply(llvm::None);
-                          Hover H;
-                          H.range = HI->SymRange;
-                          H.contents = HI->render();
-                          return Reply(H);
+
+                          Hover R;
+                          R.contents.kind = HoverContentFormat;
+                          R.range = (*H)->SymRange;
+                          switch (HoverContentFormat) {
+                          case MarkupKind::PlainText:
+                            R.contents.value =
+                                (*H)->present().renderAsPlainText();
+                            return Reply(std::move(R));
+                          case MarkupKind::Markdown:
+                            R.contents.value =
+                                (*H)->present().renderAsMarkdown();
+                            return Reply(std::move(R));
+                          };
+                          llvm_unreachable("unhandled MarkupKind");
                         },
                         std::move(Reply)));
 }
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
index f0b10a2f89667..238e9affa134a 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -154,7 +154,10 @@ class ClangdLSPServer : private DiagnosticsConsumer {
   bool SupportsHierarchicalDocumentSymbol = false;
   /// Whether the client supports showing file status.
   bool SupportFileStatus = false;
-  // Store of the current versions of the open documents.
+  /// Which kind of markup should we use in textDocument/hover responses.
+  MarkupKind HoverContentFormat = MarkupKind::PlainText;
+
+  /// Store of the current versions of the open documents.
   DraftStore DraftMgr;
 
   // The CDB is created by the "initialize" LSP method.
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 9989f610d8b46..ca1500be0e36e 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -10,11 +10,13 @@
 #include "ClangdUnit.h"
 #include "CodeComplete.h"
 #include "FindSymbols.h"
+#include "FormattedString.h"
 #include "Headers.h"
 #include "Protocol.h"
 #include "SourceCode.h"
 #include "TUScheduler.h"
 #include "Trace.h"
+#include "XRefs.h"
 #include "index/CanonicalIncludes.h"
 #include "index/FileIndex.h"
 #include "index/Merge.h"
@@ -462,7 +464,7 @@ void ClangdServer::findDocumentHighlights(
 
 void ClangdServer::findHover(PathRef File, Position Pos,
                              Callback<llvm::Optional<HoverInfo>> CB) {
-  auto Action = [Pos](Callback<llvm::Optional<HoverInfo>> CB, Path File,
+  auto Action = [Pos](decltype(CB) CB, Path File,
                       llvm::Expected<InputsAndAST> InpAST) {
     if (!InpAST)
       return CB(InpAST.takeError());
diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index 62c0c9a09ff3e..c0389f098193d 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -14,6 +14,7 @@
 #include "ClangdUnit.h"
 #include "CodeComplete.h"
 #include "FSProvider.h"
+#include "FormattedString.h"
 #include "Function.h"
 #include "GlobalCompilationDatabase.h"
 #include "Protocol.h"
diff --git a/clang-tools-extra/clangd/FormattedString.cpp b/clang-tools-extra/clangd/FormattedString.cpp
index 3ae1a3c6fa8a7..3be179bbdc2ce 100644
--- a/clang-tools-extra/clangd/FormattedString.cpp
+++ b/clang-tools-extra/clangd/FormattedString.cpp
@@ -9,6 +9,7 @@
 #include "clang/Basic/CharInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include <cstddef>
 #include <string>
 
@@ -169,5 +170,27 @@ std::string FormattedString::renderAsPlainText() const {
     R.pop_back();
   return R;
 }
+
+std::string FormattedString::renderForTests() const {
+  std::string R;
+  for (const auto &C : Chunks) {
+    switch (C.Kind) {
+    case ChunkKind::PlainText:
+      R += "text[" + C.Contents + "]";
+      break;
+    case ChunkKind::InlineCodeBlock:
+      R += "code[" + C.Contents + "]";
+      break;
+    case ChunkKind::CodeBlock:
+      if (!R.empty())
+        R += "\n";
+      R += llvm::formatv("codeblock({0}) [\n{1}\n]\n", C.Language, C.Contents);
+      break;
+    }
+  }
+  while (!R.empty() && isWhitespace(R.back()))
+    R.pop_back();
+  return R;
+}
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/FormattedString.h b/clang-tools-extra/clangd/FormattedString.h
index f20c19af85680..10313a4408cf6 100644
--- a/clang-tools-extra/clangd/FormattedString.h
+++ b/clang-tools-extra/clangd/FormattedString.h
@@ -35,6 +35,7 @@ class FormattedString {
 
   std::string renderAsMarkdown() const;
   std::string renderAsPlainText() const;
+  std::string renderForTests() const;
 
 private:
   enum class ChunkKind {
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 04c26637b1dcf..51316fefd1f7f 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -303,6 +303,17 @@ bool fromJSON(const llvm::json::Value &Params, ClientCapabilities &R) {
               DocumentSymbol->getBoolean("hierarchicalDocumentSymbolSupport"))
         R.HierarchicalDocumentSymbol = *HierarchicalSupport;
     }
+    if (auto *Hover = TextDocument->getObject("hover")) {
+      if (auto *ContentFormat = Hover->getArray("contentFormat")) {
+        for (const auto &Format : *ContentFormat) {
+          MarkupKind K = MarkupKind::PlainText;
+          if (fromJSON(Format, K)) {
+            R.HoverContentFormat = K;
+            break;
+          }
+        }
+      }
+    }
   }
   if (auto *Workspace = O->getObject("workspace")) {
     if (auto *Symbol = Workspace->getObject("symbol")) {
@@ -684,6 +695,27 @@ static llvm::StringRef toTextKind(MarkupKind Kind) {
   llvm_unreachable("Invalid MarkupKind");
 }
 
+bool fromJSON(const llvm::json::Value &V, MarkupKind &K) {
+  auto Str = V.getAsString();
+  if (!Str) {
+    elog("Failed to parse markup kind: expected a string");
+    return false;
+  }
+  if (*Str == "plaintext")
+    K = MarkupKind::PlainText;
+  else if (*Str == "markdown")
+    K = MarkupKind::Markdown;
+  else {
+    elog("Unknown markup kind: {0}", *Str);
+    return false;
+  }
+  return true;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MarkupKind K) {
+  return OS << toTextKind(K);
+}
+
 llvm::json::Value toJSON(const MarkupContent &MC) {
   if (MC.value.empty())
     return nullptr;
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index 957e2f3da444c..21bac8abfb531 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -353,6 +353,15 @@ llvm::json::Value toJSON(const OffsetEncoding &);
 bool fromJSON(const llvm::json::Value &, OffsetEncoding &);
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, OffsetEncoding);
 
+// Describes the content type that a client supports in various result literals
+// like `Hover`, `ParameterInfo` or `CompletionItem`.
+enum class MarkupKind {
+  PlainText,
+  Markdown,
+};
+bool fromJSON(const llvm::json::Value &, MarkupKind &);
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MarkupKind);
+
 // This struct doesn't mirror LSP!
 // The protocol defines deeply nested structures for client capabilities.
 // Instead of mapping them all, this just parses out the bits we care about.
@@ -391,6 +400,9 @@ struct ClientCapabilities {
 
   /// Supported encodings for LSP character offsets. (clangd extension).
   llvm::Optional<std::vector<OffsetEncoding>> offsetEncoding;
+
+  /// The content format that should be used for Hover requests.
+  MarkupKind HoverContentFormat = MarkupKind::PlainText;
 };
 bool fromJSON(const llvm::json::Value &, ClientCapabilities &);
 
@@ -861,11 +873,6 @@ struct CompletionParams : TextDocumentPositionParams {
 };
 bool fromJSON(const llvm::json::Value &, CompletionParams &);
 
-enum class MarkupKind {
-  PlainText,
-  Markdown,
-};
-
 struct MarkupContent {
   MarkupKind kind = MarkupKind::PlainText;
   std::string value;
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 4b9b95362c769..818dbc53a4d5f 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -9,6 +9,7 @@
 #include "AST.h"
 #include "CodeCompletionStrings.h"
 #include "FindSymbols.h"
+#include "FormattedString.h"
 #include "Logger.h"
 #include "Protocol.h"
 #include "SourceCode.h"
@@ -1155,32 +1156,26 @@ getTypeHierarchy(ParsedAST &AST, Position Pos, int ResolveLevels,
   return Result;
 }
 
-MarkupContent HoverInfo::render() const {
-  MarkupContent Content;
-  Content.kind = MarkupKind::PlainText;
-  std::vector<std::string> Output;
-
+FormattedString HoverInfo::present() const {
+  FormattedString Output;
   if (NamespaceScope) {
-    llvm::raw_string_ostream Out(Content.value);
-    Out << "Declared in ";
+    Output.appendText("Declared in");
     // Drop trailing "::".
     if (!LocalScope.empty())
-      Out << *NamespaceScope << llvm::StringRef(LocalScope).drop_back(2);
+      Output.appendInlineCode(llvm::StringRef(LocalScope).drop_back(2));
     else if (NamespaceScope->empty())
-      Out << "global namespace";
+      Output.appendInlineCode("global namespace");
     else
-      Out << llvm::StringRef(*NamespaceScope).drop_back(2);
-    Out << "\n\n";
+      Output.appendInlineCode(llvm::StringRef(*NamespaceScope).drop_back(2));
   }
 
   if (!Definition.empty()) {
-    Output.push_back(Definition);
+    Output.appendCodeBlock(Definition);
   } else {
     // Builtin types
-    Output.push_back(Name);
+    Output.appendCodeBlock(Name);
   }
-  Content.value += llvm::join(Output, " ");
-  return Content;
+  return Output;
 }
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index 3695989e7e894..180e3f755c3d9 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -14,6 +14,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_XREFS_H
 
 #include "ClangdUnit.h"
+#include "FormattedString.h"
 #include "Protocol.h"
 #include "index/Index.h"
 #include "index/SymbolLocation.h"
@@ -103,8 +104,8 @@ struct HoverInfo {
   /// Set for all templates(function, class, variable).
   llvm::Optional<std::vector<Param>> TemplateParameters;
 
-  /// Lower to LSP struct.
-  MarkupContent render() const;
+  /// Produce a user-readable information.
+  FormattedString present() const;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const HoverInfo::Param &);
 inline bool operator==(const HoverInfo::Param &LHS,
diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index 677f3f426c552..cf2d726637157 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -893,7 +893,10 @@ TEST(Hover, All) {
               int test1 = bonjour;
             }
           )cpp",
-          "Declared in main\n\nint bonjour",
+          "text[Declared in]code[main]\n"
+          "codeblock(cpp) [\n"
+          "int bonjour\n"
+          "]",
       },
       {
           R"cpp(// Local variable in method
@@ -904,7 +907,10 @@ TEST(Hover, All) {
               }
             };
           )cpp",
-          "Declared in s::method\n\nint bonjour",
+          "text[Declared in]code[s::method]\n"
+          "codeblock(cpp) [\n"
+          "int bonjour\n"
+          "]",
       },
       {
           R"cpp(// Struct
@@ -915,7 +921,10 @@ TEST(Hover, All) {
               ns1::My^Class* Params;
             }
           )cpp",
-          "Declared in ns1\n\nstruct MyClass {}",
+          "text[Declared in]code[ns1]\n"
+          "codeblock(cpp) [\n"
+          "struct MyClass {}\n"
+          "]",
       },
       {
           R"cpp(// Class
@@ -926,7 +935,10 @@ TEST(Hover, All) {
               ns1::My^Class* Params;
             }
           )cpp",
-          "Declared in ns1\n\nclass MyClass {}",
+          "text[Declared in]code[ns1]\n"
+          "codeblock(cpp) [\n"
+          "class MyClass {}\n"
+          "]",
       },
       {
           R"cpp(// Union
@@ -937,7 +949,10 @@ TEST(Hover, All) {
               ns1::My^Union Params;
             }
           )cpp",
-          "Declared in ns1\n\nunion MyUnion {}",
+          "text[Declared in]code[ns1]\n"
+          "codeblock(cpp) [\n"
+          "union MyUnion {}\n"
+          "]",
       },
       {
           R"cpp(// Function definition via pointer
@@ -946,7 +961,10 @@ TEST(Hover, All) {
               auto *X = &^foo;
             }
           )cpp",
-          "Declared in global namespace\n\nint foo(int)",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "int foo(int)\n"
+          "]",
       },
       {
           R"cpp(// Function declaration via call
@@ -955,7 +973,10 @@ TEST(Hover, All) {
               return ^foo(42);
             }
           )cpp",
-          "Declared in global namespace\n\nint foo(int)",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "int foo(int)\n"
+          "]",
       },
       {
           R"cpp(// Field
@@ -965,7 +986,10 @@ TEST(Hover, All) {
               bar.^x;
             }
           )cpp",
-          "Declared in Foo\n\nint x",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x\n"
+          "]",
       },
       {
           R"cpp(// Field with initialization
@@ -975,7 +999,10 @@ TEST(Hover, All) {
               bar.^x;
             }
           )cpp",
-          "Declared in Foo\n\nint x = 5",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x = 5\n"
+          "]",
       },
       {
           R"cpp(// Static field
@@ -984,7 +1011,10 @@ TEST(Hover, All) {
               Foo::^x;
             }
           )cpp",
-          "Declared in Foo\n\nstatic int x",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "static int x\n"
+          "]",
       },
       {
           R"cpp(// Field, member initializer
@@ -993,7 +1023,10 @@ TEST(Hover, All) {
               Foo() : ^x(0) {}
             };
           )cpp",
-          "Declared in Foo\n\nint x",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x\n"
+          "]",
       },
       {
           R"cpp(// Field, GNU old-style field designator
@@ -1002,7 +1035,10 @@ TEST(Hover, All) {
               Foo bar = { ^x : 1 };
             }
           )cpp",
-          "Declared in Foo\n\nint x",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x\n"
+          "]",
       },
       {
           R"cpp(// Field, field designator
@@ -1011,7 +1047,10 @@ TEST(Hover, All) {
               Foo bar = { .^x = 2 };
             }
           )cpp",
-          "Declared in Foo\n\nint x",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x\n"
+          "]",
       },
       {
           R"cpp(// Method call
@@ -1021,7 +1060,10 @@ TEST(Hover, All) {
               bar.^x();
             }
           )cpp",
-          "Declared in Foo\n\nint x()",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "int x()\n"
+          "]",
       },
       {
           R"cpp(// Static method call
@@ -1030,7 +1072,10 @@ TEST(Hover, All) {
               Foo::^x();
             }
           )cpp",
-          "Declared in Foo\n\nstatic int x()",
+          "text[Declared in]code[Foo]\n"
+          "codeblock(cpp) [\n"
+          "static int x()\n"
+          "]",
       },
       {
           R"cpp(// Typedef
@@ -1039,7 +1084,10 @@ TEST(Hover, All) {
               ^Foo bar;
             }
           )cpp",
-          "Declared in global namespace\n\ntypedef int Foo",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "typedef int Foo\n"
+          "]",
       },
       {
           R"cpp(// Namespace
@@ -1048,7 +1096,10 @@ TEST(Hover, All) {
             } // namespace ns
             int main() { ^ns::Foo::bar(); }
           )cpp",
-          "Declared in global namespace\n\nnamespace ns {}",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "namespace ns {}\n"
+          "]",
       },
       {
           R"cpp(// Anonymous namespace
@@ -1059,7 +1110,10 @@ TEST(Hover, All) {
             } // namespace ns
             int main() { ns::f^oo++; }
           )cpp",
-          "Declared in ns::(anonymous)\n\nint foo",
+          "text[Declared in]code[ns::(anonymous)]\n"
+          "codeblock(cpp) [\n"
+          "int foo\n"
+          "]",
       },
       {
           R"cpp(// Macro
@@ -1069,14 +1123,18 @@ TEST(Hover, All) {
             #define MACRO 2
             #undef macro
           )cpp",
-          "#define MACRO 1",
+          "codeblock(cpp) [\n"
+          "#define MACRO 1\n"
+          "]",
       },
       {
           R"cpp(// Macro
             #define MACRO 0
             #define MACRO2 ^MACRO
           )cpp",
-          "#define MACRO 0",
+          "codeblock(cpp) [\n"
+          "#define MACRO 0\n"
+          "]",
       },
       {
           R"cpp(// Macro
@@ -1085,8 +1143,10 @@ TEST(Hover, All) {
             }
             int main() ^MACRO
           )cpp",
-          "#define MACRO                                                       "
-          "           \\\n  { return 0; }",
+          R"cpp(codeblock(cpp) [
+#define MACRO                                                                  \
+  { return 0; }
+])cpp",
       },
       {
           R"cpp(// Forward class declaration
@@ -1094,7 +1154,10 @@ TEST(Hover, All) {
             class Foo {};
             F^oo* foo();
           )cpp",
-          "Declared in global namespace\n\nclass Foo {}",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "class Foo {}\n"
+          "]",
       },
       {
           R"cpp(// Function declaration
@@ -1102,7 +1165,10 @@ TEST(Hover, All) {
             void g() { f^oo(); }
             void foo() {}
           )cpp",
-          "Declared in global namespace\n\nvoid foo()",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "void foo()\n"
+          "]",
       },
       {
           R"cpp(// Enum declaration
@@ -1113,7 +1179,10 @@ TEST(Hover, All) {
               Hel^lo hello = ONE;
             }
           )cpp",
-          "Declared in global namespace\n\nenum Hello {}",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "enum Hello {}\n"
+          "]",
       },
       {
           R"cpp(// Enumerator
@@ -1124,7 +1193,10 @@ TEST(Hover, All) {
               Hello hello = O^NE;
             }
           )cpp",
-          "Declared in Hello\n\nONE",
+          "text[Declared in]code[Hello]\n"
+          "codeblock(cpp) [\n"
+          "ONE\n"
+          "]",
       },
       {
           R"cpp(// Enumerator in anonymous enum
@@ -1135,7 +1207,10 @@ TEST(Hover, All) {
               int hello = O^NE;
             }
           )cpp",
-          "Declared in global namespace\n\nONE",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "ONE\n"
+          "]",
       },
       {
           R"cpp(// Global variable
@@ -1144,7 +1219,10 @@ TEST(Hover, All) {
               he^y++;
             }
           )cpp",
-          "Declared in global namespace\n\nstatic int hey = 10",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "static int hey = 10\n"
+          "]",
       },
       {
           R"cpp(// Global variable in namespace
@@ -1155,7 +1233,10 @@ TEST(Hover, All) {
               ns1::he^y++;
             }
           )cpp",
-          "Declared in ns1\n\nstatic int hey = 10",
+          "text[Declared in]code[ns1]\n"
+          "codeblock(cpp) [\n"
+          "static int hey = 10\n"
+          "]",
       },
       {
           R"cpp(// Field in anonymous struct
@@ -1166,7 +1247,10 @@ TEST(Hover, All) {
               s.he^llo++;
             }
           )cpp",
-          "Declared in (anonymous struct)\n\nint hello",
+          "text[Declared in]code[(anonymous struct)]\n"
+          "codeblock(cpp) [\n"
+          "int hello\n"
+          "]",
       },
       {
           R"cpp(// Templated function
@@ -1176,7 +1260,10 @@ TEST(Hover, All) {
             }
             void g() { auto x = f^oo<int>(); }
           )cpp",
-          "Declared in global namespace\n\ntemplate <typename T> T foo()",
+          "text[Declared in]code[global namespace]\n"
+          "codeblock(cpp) [\n"
+          "template <typename T> T foo()\n"
+          "]",
       },
       {
           R"cpp(// Anonymous union
@@ -1187,7 +1274,10 @@ TEST(Hover, All) {
             };
             void g() { struct outer o; o.v.d^ef++; }
           )cpp",
-          "Declared in outer::(anonymous union)\n\nint def",
+          "text[Declared in]code[outer::(anonymous union)]\n"
+          "codeblock(cpp) [\n"
+          "int def\n"
+          "]",
       },
       {
           R"cpp(// Nothing
@@ -1203,7 +1293,9 @@ TEST(Hover, All) {
               ^auto i = 1;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with const auto
@@ -1211,7 +1303,9 @@ TEST(Hover, All) {
               const ^auto i = 1;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with const auto&
@@ -1219,7 +1313,9 @@ TEST(Hover, All) {
               const ^auto& i = 1;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with auto&
@@ -1227,7 +1323,9 @@ TEST(Hover, All) {
               ^auto& i = 1;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with auto*
@@ -1236,7 +1334,9 @@ TEST(Hover, All) {
               ^auto* i = &a;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Auto with initializer list.
@@ -1249,7 +1349,9 @@ TEST(Hover, All) {
               ^auto i = {1,2};
             }
           )cpp",
-          "class std::initializer_list<int>",
+          "codeblock(cpp) [\n"
+          "class std::initializer_list<int>\n"
+          "]",
       },
       {
           R"cpp(// User defined conversion to auto
@@ -1257,7 +1359,9 @@ TEST(Hover, All) {
               operator ^auto() const { return 10; }
             };
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with decltype(auto)
@@ -1265,7 +1369,9 @@ TEST(Hover, All) {
               ^decltype(auto) i = 1;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with const decltype(auto)
@@ -1274,7 +1380,9 @@ TEST(Hover, All) {
               ^decltype(auto) i = j;
             }
           )cpp",
-          "const int",
+          "codeblock(cpp) [\n"
+          "const int\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with const& decltype(auto)
@@ -1284,7 +1392,9 @@ TEST(Hover, All) {
               ^decltype(auto) i = j;
             }
           )cpp",
-          "const int &",
+          "codeblock(cpp) [\n"
+          "const int &\n"
+          "]",
       },
       {
           R"cpp(// Simple initialization with & decltype(auto)
@@ -1294,7 +1404,9 @@ TEST(Hover, All) {
               ^decltype(auto) i = j;
             }
           )cpp",
-          "int &",
+          "codeblock(cpp) [\n"
+          "int &\n"
+          "]",
       },
       {
           R"cpp(// decltype with initializer list: nothing
@@ -1315,7 +1427,9 @@ TEST(Hover, All) {
               return 0;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// auto function return with trailing type
@@ -1324,7 +1438,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// trailing return type
@@ -1333,7 +1449,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// auto in function return
@@ -1342,7 +1460,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// auto& in function return
@@ -1351,7 +1471,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// auto* in function return
@@ -1361,7 +1483,9 @@ TEST(Hover, All) {
               return bar;
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// const auto& in function return
@@ -1370,7 +1494,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// decltype(auto) in function return
@@ -1379,7 +1505,9 @@ TEST(Hover, All) {
               return Bar();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// decltype(auto) reference in function return
@@ -1389,7 +1517,9 @@ TEST(Hover, All) {
               return (a);
             }
           )cpp",
-          "int &",
+          "codeblock(cpp) [\n"
+          "int &\n"
+          "]",
       },
       {
           R"cpp(// decltype lvalue reference
@@ -1398,7 +1528,9 @@ TEST(Hover, All) {
               ^decltype(I) J = I;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// decltype lvalue reference
@@ -1408,7 +1540,9 @@ TEST(Hover, All) {
               ^decltype(K) J = I;
             }
           )cpp",
-          "int &",
+          "codeblock(cpp) [\n"
+          "int &\n"
+          "]",
       },
       {
           R"cpp(// decltype lvalue reference parenthesis
@@ -1417,7 +1551,9 @@ TEST(Hover, All) {
               ^decltype((I)) J = I;
             }
           )cpp",
-          "int &",
+          "codeblock(cpp) [\n"
+          "int &\n"
+          "]",
       },
       {
           R"cpp(// decltype rvalue reference
@@ -1426,7 +1562,9 @@ TEST(Hover, All) {
               ^decltype(static_cast<int&&>(I)) J = static_cast<int&&>(I);
             }
           )cpp",
-          "int &&",
+          "codeblock(cpp) [\n"
+          "int &&\n"
+          "]",
       },
       {
           R"cpp(// decltype rvalue reference function call
@@ -1436,7 +1574,9 @@ TEST(Hover, All) {
               ^decltype(bar()) J = bar();
             }
           )cpp",
-          "int &&",
+          "codeblock(cpp) [\n"
+          "int &&\n"
+          "]",
       },
       {
           R"cpp(// decltype of function with trailing return type.
@@ -1448,7 +1588,9 @@ TEST(Hover, All) {
               ^decltype(test()) i = test();
             }
           )cpp",
-          "struct Bar",
+          "codeblock(cpp) [\n"
+          "struct Bar\n"
+          "]",
       },
       {
           R"cpp(// decltype of var with decltype.
@@ -1458,7 +1600,9 @@ TEST(Hover, All) {
               ^decltype(J) K = J;
             }
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
       {
           R"cpp(// structured binding. Not supported yet
@@ -1486,7 +1630,9 @@ TEST(Hover, All) {
             int bar();
             ^auto (*foo)() = bar;
           )cpp",
-          "int",
+          "codeblock(cpp) [\n"
+          "int\n"
+          "]",
       },
   };
 
@@ -1497,7 +1643,8 @@ TEST(Hover, All) {
     auto AST = TU.build();
     if (auto H = getHover(AST, T.point(), format::getLLVMStyle())) {
       EXPECT_NE("", Test.ExpectedHover) << Test.Input;
-      EXPECT_EQ(H->render().value, Test.ExpectedHover.str()) << Test.Input;
+      EXPECT_EQ(H->present().renderForTests(), Test.ExpectedHover.str())
+          << Test.Input;
     } else
       EXPECT_EQ("", Test.ExpectedHover.str()) << Test.Input;
   }

From bf559a7f3fca4630b3e3511da18ab78d65a5e7ff Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 29 May 2019 10:11:14 +0000
Subject: [PATCH 0469/1176] [Index] Correctly set symbol kind of
 IndirectFieldDecl

Summary: The kind has been 'unknown' before, now it is 'field'.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62573

llvm-svn: 361941
---
 .../clangd/unittests/CodeCompleteTests.cpp         | 14 ++++++++++++++
 clang/lib/Index/IndexSymbol.cpp                    |  1 +
 clang/test/Index/index-anonymous-union-fields.cpp  | 10 ++++++++++
 3 files changed, 25 insertions(+)
 create mode 100644 clang/test/Index/index-anonymous-union-fields.cpp

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index a7aa16569bfe6..b9ca702ee0fae 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -449,6 +449,20 @@ TEST(CompletionTest, Kinds) {
   Results = completions("nam^");
   EXPECT_THAT(Results.Completions,
               Has("namespace", CompletionItemKind::Snippet));
+
+  // Members of anonymous unions are of kind 'field'.
+  Results = completions(
+      R"cpp(
+        struct X{
+            union {
+              void *a;
+            };
+        };
+        auto u = X().^
+      )cpp");
+  EXPECT_THAT(
+      Results.Completions,
+      UnorderedElementsAre(AllOf(Named("a"), Kind(CompletionItemKind::Field))));
 }
 
 TEST(CompletionTest, NoDuplicates) {
diff --git a/clang/lib/Index/IndexSymbol.cpp b/clang/lib/Index/IndexSymbol.cpp
index a8f11b34486d0..db397b9856136 100644
--- a/clang/lib/Index/IndexSymbol.cpp
+++ b/clang/lib/Index/IndexSymbol.cpp
@@ -168,6 +168,7 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
       Info.Kind = SymbolKind::Function;
       break;
     case Decl::Field:
+    case Decl::IndirectField:
       Info.Kind = SymbolKind::Field;
       if (const CXXRecordDecl *
             CXXRec = dyn_cast<CXXRecordDecl>(D->getDeclContext())) {
diff --git a/clang/test/Index/index-anonymous-union-fields.cpp b/clang/test/Index/index-anonymous-union-fields.cpp
new file mode 100644
index 0000000000000..30f254d110956
--- /dev/null
+++ b/clang/test/Index/index-anonymous-union-fields.cpp
@@ -0,0 +1,10 @@
+struct X {
+  union {
+    void *a;
+  };
+};
+
+// RUN: c-index-test -index-file %s > %t
+// RUN: FileCheck %s -input-file=%t
+
+// CHECK: [indexDeclaration]: kind: field | name: a | {{.*}} | loc: 3:11

From db15e68e4d53eccd28c62584027e93393f1d32a5 Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Wed, 29 May 2019 10:13:41 +0000
Subject: [PATCH 0470/1176] [HWASAN] Remove unused code

Differential revision: https://reviews.llvm.org/D62489

llvm-svn: 361942
---
 .../lib/hwasan/hwasan_interceptors.cpp        | 23 -------------------
 compiler-rt/lib/hwasan/hwasan_thread.h        |  5 ----
 2 files changed, 28 deletions(-)

diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
index 17bf14d810cee..47fed0fc9abb8 100644
--- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
@@ -44,24 +44,6 @@ using __sanitizer::atomic_load;
 using __sanitizer::atomic_store;
 using __sanitizer::atomic_uintptr_t;
 
-bool IsInInterceptorScope() {
-  Thread *t = GetCurrentThread();
-  return t && t->InInterceptorScope();
-}
-
-struct InterceptorScope {
-  InterceptorScope() {
-    Thread *t = GetCurrentThread();
-    if (t)
-      t->EnterInterceptorScope();
-  }
-  ~InterceptorScope() {
-    Thread *t = GetCurrentThread();
-    if (t)
-      t->LeaveInterceptorScope();
-  }
-};
-
 static uptr allocated_for_dlsym;
 static const uptr kDlsymAllocPoolSize = 1024;
 static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
@@ -254,11 +236,6 @@ INTERCEPTOR(int, fork, void) {
   return pid;
 }
 
-
-struct HwasanInterceptorContext {
-  bool in_interceptor_scope;
-};
-
 namespace __hwasan {
 
 int OnExit() {
diff --git a/compiler-rt/lib/hwasan/hwasan_thread.h b/compiler-rt/lib/hwasan/hwasan_thread.h
index 6fa592bfac6ba..42c1e9e124b07 100644
--- a/compiler-rt/lib/hwasan/hwasan_thread.h
+++ b/compiler-rt/lib/hwasan/hwasan_thread.h
@@ -46,10 +46,6 @@ class Thread {
   void EnterSymbolizer() { in_symbolizer_++; }
   void LeaveSymbolizer() { in_symbolizer_--; }
 
-  bool InInterceptorScope() { return in_interceptor_scope_; }
-  void EnterInterceptorScope() { in_interceptor_scope_++; }
-  void LeaveInterceptorScope() { in_interceptor_scope_--; }
-
   AllocatorCache *allocator_cache() { return &allocator_cache_; }
   HeapAllocationsRingBuffer *heap_allocations() { return heap_allocations_; }
   StackAllocationsRingBuffer *stack_allocations() { return stack_allocations_; }
@@ -82,7 +78,6 @@ class Thread {
 
   unsigned in_signal_handler_;
   unsigned in_symbolizer_;
-  unsigned in_interceptor_scope_;
 
   u32 random_state_;
   u32 random_buffer_;

From 8ac7b2d07bd6042afe0e8618ca8682d7663f4be8 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 29 May 2019 10:31:46 +0000
Subject: [PATCH 0471/1176] [llvm-readelf] - Allow dumping of the .dynamic
 section even if there is no PT_DYNAMIC header.

It is now possible after D61937 was landed and was discussed
in it's review comments. It is not consistent with GNU, which
does not output .dynamic section content in this case for
no visible reason.

Differential revision: https://reviews.llvm.org/D62179

llvm-svn: 361943
---
 llvm/test/Object/corrupt.test                 |   4 +-
 llvm/test/Object/invalid.test                 |   3 -
 .../llvm-readobj/elf-dynamic-malformed.test   |  18 +-
 .../elf-dynamic-no-pt-dynamic.test            |  14 +-
 .../yaml2obj/dynamic-section-raw-content.yaml |   6 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 161 +++++++++---------
 6 files changed, 112 insertions(+), 94 deletions(-)

diff --git a/llvm/test/Object/corrupt.test b/llvm/test/Object/corrupt.test
index c98757885aa12..49f973d43b191 100644
--- a/llvm/test/Object/corrupt.test
+++ b/llvm/test/Object/corrupt.test
@@ -38,11 +38,11 @@ RUN:   FileCheck --check-prefix=PHENTSIZE %s
 
 PHENTSIZE: invalid e_phentsize
 
-RUN: not llvm-readobj --dynamic-table \
+RUN: llvm-readobj --dynamic-table \
 RUN:   %p/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 2>&1 | \
 RUN:   FileCheck --check-prefix=VIRTADDR %s
 
-VIRTADDR: Virtual address is not in any segment
+VIRTADDR: warning: Unable to parse DT_STRTAB: Virtual address is not in any segment
 
 
 RUN: not llvm-readobj --dyn-relocations \
diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index 6f94f9d218d44..921a5d6a7bc01 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -40,9 +40,6 @@ SECTION-NEXT:    EntrySize: 32
 RUN: not llvm-readobj --symbols %p/Inputs/invalid-sh_entsize.elf 2>&1 | FileCheck --check-prefix=INVALID-SYM-SIZE %s
 INVALID-SYM-SIZE: invalid sh_entsize
 
-RUN: not llvm-readobj --dyn-symbols %p/Inputs/invalid-sh_entsize.elf 2>&1 | FileCheck --check-prefix=INVALID-DYNSYM-SIZE %s
-INVALID-DYNSYM-SIZE: Invalid entity size
-
 RUN: not llvm-readobj --symbols %p/Inputs/invalid-section-index.elf 2>&1 | FileCheck --check-prefix=INVALID-SECTION-INDEX %s
 INVALID-SECTION-INDEX: invalid section index
 
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
index aaee340e1cca6..abc0f89310c3a 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
@@ -114,7 +114,8 @@ ProgramHeaders:
 # RUN: not llvm-readobj --dynamic-table %t.bad-strtab 2>&1 | FileCheck %s --check-prefix BAD-STRTAB
 # RUN: not llvm-readelf --dynamic-table %t.bad-strtab 2>&1 | FileCheck %s --check-prefix BAD-STRTAB
 
-# BAD-STRTAB: LLVM ERROR: Virtual address is not in any segment
+# BAD-STRTAB: warning: Unable to parse DT_STRTAB: Virtual address is not in any segment
+# BAD-STRTAB: error: Invalid dynamic string table reference
 
 --- !ELF
 FileHeader:
@@ -147,10 +148,19 @@ ProgramHeaders:
 
 # Test handling of other d_ptr tags pointing outside the file's address space.
 # RUN: yaml2obj %s --docnum=5 -o %t.bad-rela
-# RUN: not llvm-readobj --dynamic-table %t.bad-rela 2>&1 | FileCheck %s --check-prefix BAD-RELA
-# RUN: not llvm-readelf --dynamic-table %t.bad-rela 2>&1 | FileCheck %s --check-prefix BAD-RELA
+# RUN: llvm-readobj --dynamic-table %t.bad-rela 2>&1 | FileCheck %s --check-prefixes=CHECK,BAD-RELA
+# RUN: llvm-readelf --dynamic-table %t.bad-rela 2>&1 | FileCheck %s --check-prefixes=CHECK,BAD-RELA-GNU
 
-# BAD-RELA: LLVM ERROR: Virtual address is not in any segment
+# CHECK: warning: Unable to parse DT_RELA: Virtual address is not in any segment
+# BAD-RELA:      DynamicSection [ (2 entries)
+# BAD-RELA-NEXT:   Tag                Type Name/Value
+# BAD-RELA-NEXT:   0x0000000000000007 RELA 0x1000000
+# BAD-RELA-NEXT:   0x0000000000000000 NULL 0x0
+# BAD-RELA-NEXT: ]
+# BAD-RELA-GNU:      Dynamic section at offset 0x1f0 contains 2 entries:
+# BAD-RELA-GNU-NEXT: Tag                Type   Name/Value
+# BAD-RELA-GNU-NEXT: 0x0000000000000007 (RELA) 0x1000000
+# BAD-RELA-GNU-NEXT: 0x0000000000000000 (NULL) 0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
index 3ef293714a897..89e660337b5f5 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-no-pt-dynamic.test
@@ -1,16 +1,22 @@
-# Show that no dumping occurs if there is no PT_DYNAMIC header.
+## Show that dumping occurs even if there is no PT_DYNAMIC header.
+## This is inconsistent with the GNU behavior, but seems to be more reasonable.
 # RUN: yaml2obj %s -o %t.no-phdr
 # RUN: llvm-readobj --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=LLVM
-# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU --allow-empty
+# RUN: llvm-readelf --dynamic-table %t.no-phdr | FileCheck %s --check-prefix=GNU
 
 # LLVM:      File: {{.*}}.no-phdr
 # LLVM-NEXT: Format: ELF64-x86-64
 # LLVM-NEXT: Arch: x86_64
 # LLVM-NEXT: AddressSize: 64bit
 # LLVM-NEXT: LoadName:{{ *}}
-# LLVM-NOT:  {{.}}
+# LLVM-NEXT: DynamicSection [ (1 entries)
+# LLVM-NEXT:   Tag                Type Name/Value
+# LLVM-NEXT:   0x0000000000000000 NULL 0x0
+# LLVM-NEXT: ]
 
-# GNU-NOT: {{.}}
+# GNU:      Dynamic section at offset 0x1b8 contains 1 entries:
+# GNU-NEXT: Tag                Type   Name/Value
+# GNU-NEXT: 0x0000000000000000 (NULL) 0x0
 
 --- !ELF
 FileHeader:
diff --git a/llvm/test/tools/yaml2obj/dynamic-section-raw-content.yaml b/llvm/test/tools/yaml2obj/dynamic-section-raw-content.yaml
index 37af06755a910..81008bcd85258 100644
--- a/llvm/test/tools/yaml2obj/dynamic-section-raw-content.yaml
+++ b/llvm/test/tools/yaml2obj/dynamic-section-raw-content.yaml
@@ -10,10 +10,10 @@
 # RAW-NEXT: ]
 # RAW-NEXT: Address:
 # RAW-NEXT: Offset:
-# RAW-NEXT: Size: 5
+# RAW-NEXT: Size: 16
 
 # RAW:      Hex dump of section '.dynamic':
-# RAW-NEXT: 0x00000000 01234567 89 {{.*}}
+# RAW-NEXT: 0x00000000 01234567 89012345 67890000 00000000 {{.*}}
 
 # RUN: not yaml2obj --docnum=2 %s -o %t2 2>&1 | FileCheck %s --check-prefix=ERR
 # ERR: Cannot specify both raw content and explicit entries for dynamic section '.dynamic'.
@@ -27,7 +27,7 @@ FileHeader:
 Sections:
   - Name: .dynamic
     Type: SHT_DYNAMIC
-    Content: "0123456789"
+    Content: "01234567890123456789000000000000"
 
 --- !ELF
 FileHeader:
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index fcadf73110e7e..047b463582c9b 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1346,6 +1346,7 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
 
 template <typename ELFT>
 void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+  // Try to locate the PT_DYNAMIC header.
   const Elf_Phdr *DynamicPhdr = nullptr;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type != ELF::PT_DYNAMIC)
@@ -1354,11 +1355,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     break;
   }
 
-  // We do not want to dump dynamic section if we have no PT_DYNAMIC header.
-  // This matches GNU's behavior.
-  if (!DynamicPhdr)
-    return;
-
   // Try to locate the .dynamic section in the sections header table.
   const Elf_Shdr *DynamicSec = nullptr;
   for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
@@ -1373,9 +1369,16 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   // Ignore sh_entsize and use the expected value for entry size explicitly.
   // This allows us to dump the dynamic sections with a broken sh_entsize
   // field.
-  if (DynamicSec)
+  if (DynamicSec) {
     DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
                              DynamicSec->sh_size, sizeof(Elf_Dyn)});
+    parseDynamicTable();
+  }
+
+  // If we have a PT_DYNAMIC header, we will either check the found dynamic
+  // section or take the dynamic table data directly from the header.
+  if (!DynamicPhdr)
+    return;
 
   if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
       ObjF->getMemoryBufferRef().getBufferSize())
@@ -1389,7 +1392,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
   }
 
   StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
-
   if (DynamicSec->sh_addr + DynamicSec->sh_size >
           DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
       DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
@@ -1401,8 +1403,6 @@ void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
     reportWarning("The SHT_DYNAMIC section '" + Name +
                   "' is not at the start of "
                   "PT_DYNAMIC segment");
-
-  parseDynamicTable();
 }
 
 template <typename ELFT>
@@ -1460,11 +1460,71 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
     ELFDumperStyle.reset(new LLVMStyle<ELFT>(Writer, this));
 }
 
+static const char *getTypeString(unsigned Arch, uint64_t Type) {
+#define DYNAMIC_TAG(n, v)
+  switch (Arch) {
+  case EM_HEXAGON:
+    switch (Type) {
+#define HEXAGON_DYNAMIC_TAG(name, value)                                       \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef HEXAGON_DYNAMIC_TAG
+    }
+    break;
+
+  case EM_MIPS:
+    switch (Type) {
+#define MIPS_DYNAMIC_TAG(name, value)                                          \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef MIPS_DYNAMIC_TAG
+    }
+    break;
+
+  case EM_PPC64:
+    switch (Type) {
+#define PPC64_DYNAMIC_TAG(name, value)                                         \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC64_DYNAMIC_TAG
+    }
+    break;
+  }
+#undef DYNAMIC_TAG
+  switch (Type) {
+// Now handle all dynamic tags except the architecture specific ones
+#define MIPS_DYNAMIC_TAG(name, value)
+#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC64_DYNAMIC_TAG(name, value)
+// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
+#define DYNAMIC_TAG_MARKER(name, value)
+#define DYNAMIC_TAG(name, value)                                               \
+  case DT_##name:                                                              \
+    return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef DYNAMIC_TAG
+#undef MIPS_DYNAMIC_TAG
+#undef HEXAGON_DYNAMIC_TAG
+#undef PPC64_DYNAMIC_TAG
+#undef DYNAMIC_TAG_MARKER
+  default:
+    return "unknown";
+  }
+}
+
 template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
-  auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
+  auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
     auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
-    if (!MappedAddrOrError)
-      report_fatal_error(MappedAddrOrError.takeError());
+    if (!MappedAddrOrError) {
+      reportWarning("Unable to parse DT_" +
+                    Twine(getTypeString(
+                        ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
+                    ": " + llvm::toString(MappedAddrOrError.takeError()));
+      return nullptr;
+    }
     return MappedAddrOrError.get();
   };
 
@@ -1474,26 +1534,26 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
   for (const Elf_Dyn &Dyn : dynamic_table()) {
     switch (Dyn.d_tag) {
     case ELF::DT_HASH:
-      HashTable =
-          reinterpret_cast<const Elf_Hash *>(toMappedAddr(Dyn.getPtr()));
+      HashTable = reinterpret_cast<const Elf_Hash *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_GNU_HASH:
-      GnuHashTable =
-          reinterpret_cast<const Elf_GnuHash *>(toMappedAddr(Dyn.getPtr()));
+      GnuHashTable = reinterpret_cast<const Elf_GnuHash *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_STRTAB:
-      StringTableBegin =
-          reinterpret_cast<const char *>(toMappedAddr(Dyn.getPtr()));
+      StringTableBegin = reinterpret_cast<const char *>(
+          toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
       break;
     case ELF::DT_STRSZ:
       StringTableSize = Dyn.getVal();
       break;
     case ELF::DT_SYMTAB:
-      DynSymRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       DynSymRegion.EntSize = sizeof(Elf_Sym);
       break;
     case ELF::DT_RELA:
-      DynRelaRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELASZ:
       DynRelaRegion.Size = Dyn.getVal();
@@ -1505,7 +1565,7 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
       SONameOffset = Dyn.getVal();
       break;
     case ELF::DT_REL:
-      DynRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELSZ:
       DynRelRegion.Size = Dyn.getVal();
@@ -1515,7 +1575,7 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
       break;
     case ELF::DT_RELR:
     case ELF::DT_ANDROID_RELR:
-      DynRelrRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynRelrRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_RELRSZ:
     case ELF::DT_ANDROID_RELRSZ:
@@ -1535,7 +1595,7 @@ template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
                     Twine((uint64_t)Dyn.getVal()));
       break;
     case ELF::DT_JMPREL:
-      DynPLTRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+      DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
       break;
     case ELF::DT_PLTRELSZ:
       DynPLTRelRegion.Size = Dyn.getVal();
@@ -1627,61 +1687,6 @@ template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
   ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
 }
 
-static const char *getTypeString(unsigned Arch, uint64_t Type) {
-#define DYNAMIC_TAG(n, v)
-  switch (Arch) {
-  case EM_HEXAGON:
-    switch (Type) {
-#define HEXAGON_DYNAMIC_TAG(name, value)                                       \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef HEXAGON_DYNAMIC_TAG
-    }
-    break;
-
-  case EM_MIPS:
-    switch (Type) {
-#define MIPS_DYNAMIC_TAG(name, value)                                          \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef MIPS_DYNAMIC_TAG
-    }
-    break;
-
-  case EM_PPC64:
-    switch(Type) {
-#define PPC64_DYNAMIC_TAG(name, value)                                         \
-    case DT_##name:                                                            \
-      return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef PPC64_DYNAMIC_TAG
-    }
-    break;
-  }
-#undef DYNAMIC_TAG
-  switch (Type) {
-// Now handle all dynamic tags except the architecture specific ones
-#define MIPS_DYNAMIC_TAG(name, value)
-#define HEXAGON_DYNAMIC_TAG(name, value)
-#define PPC64_DYNAMIC_TAG(name, value)
-// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
-#define DYNAMIC_TAG_MARKER(name, value)
-#define DYNAMIC_TAG(name, value)                                               \
-  case DT_##name:                                                              \
-    return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef DYNAMIC_TAG
-#undef MIPS_DYNAMIC_TAG
-#undef HEXAGON_DYNAMIC_TAG
-#undef PPC64_DYNAMIC_TAG
-#undef DYNAMIC_TAG_MARKER
-  default:
-    return "unknown";
-  }
-}
-
 #define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum)                                 \
   { #enum, prefix##_##enum }
 

From 7964f6fe5fb416fc09ae1c477fd3c356b226f2ab Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 29 May 2019 10:39:01 +0000
Subject: [PATCH 0472/1176] [ADT] add iterator_range::empty()

llvm-svn: 361944
---
 llvm/include/llvm/ADT/iterator_range.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h
index 774c7c4e3366e..aa8830943cabc 100644
--- a/llvm/include/llvm/ADT/iterator_range.h
+++ b/llvm/include/llvm/ADT/iterator_range.h
@@ -44,6 +44,7 @@ class iterator_range {
 
   IteratorT begin() const { return begin_iterator; }
   IteratorT end() const { return end_iterator; }
+  bool empty() const { return begin_iterator == end_iterator; }
 };
 
 /// Convenience function for iterating over sub-ranges.

From 5b363c14d7bd83523caa66013d4200459b71abc4 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 29 May 2019 11:01:07 +0000
Subject: [PATCH 0473/1176] [llvm-readobj] - Repair the test case.

I forgot to change the test tag in r361932.
Now it is fixed.

llvm-svn: 361945
---
 llvm/test/tools/llvm-readobj/elf-versioninfo.test | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
index c4e0bb62e1729..42c158693d8f3 100644
--- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test
+++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
@@ -174,5 +174,5 @@ DynamicSymbols:
 # GNU-NEXT:   000:   0 (*local*) 2 (VERSION1) 3 (VERSION2) 4 (v1)
 # GNU-NEXT:   004:   5 (v2)      6 (v3)
 
-# GNU-VERDEF: Dumper for .gnu.version_d is not implemented
-# GNU-VERNEED: Dumper for .gnu.version_r is not implemented
+# GNU: Dumper for .gnu.version_d is not implemented
+# GNU: Dumper for .gnu.version_r is not implemented

From 5dc90367464853bb2830c9e4e4875e1588ba7096 Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Wed, 29 May 2019 11:26:06 +0000
Subject: [PATCH 0474/1176] [CMake] LLDB.framework tools handling

Summary:
Modify the way LLDB.framework tools are collected. This allows for better fine-tuning of the install behavior downstream. Each target calls `lldb_add_to_framework()` individually. When entering the function, the target exists and we can tweak its very own post-build and install steps. This was not possible with the old `LLDB_FRAMEWORK_TOOLS` approach.

No function change otherwise.
This is a reduced follow-up from the proposal in D61952.

Reviewers: xiaobai, compnerd, JDevlieghere

Reviewed By: JDevlieghere

Subscribers: clayborg, friss, ki.stfu, mgorny, lldb-commits, labath, #lldb

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62472

llvm-svn: 361946
---
 lldb/CMakeLists.txt                          |  4 +++
 lldb/cmake/modules/AddLLDB.cmake             | 29 ++++++++++++++++++++
 lldb/cmake/modules/LLDBConfig.cmake          |  2 --
 lldb/cmake/modules/LLDBFramework.cmake       | 15 +---------
 lldb/tools/argdumper/CMakeLists.txt          |  4 +++
 lldb/tools/darwin-debug/CMakeLists.txt       |  4 +++
 lldb/tools/debugserver/source/CMakeLists.txt |  4 +++
 lldb/tools/driver/CMakeLists.txt             | 12 +++++++-
 lldb/tools/lldb-mi/CMakeLists.txt            | 12 +++++++-
 lldb/tools/lldb-server/CMakeLists.txt        |  4 +++
 lldb/tools/lldb-vscode/CMakeLists.txt        | 12 +++++++-
 11 files changed, 83 insertions(+), 19 deletions(-)

diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index afcf6bb5fb2bd..20eb493f7e24c 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -151,6 +151,10 @@ if(LLDB_INCLUDE_TESTS)
     list(APPEND LLDB_TEST_DEPS dsymutil)
   endif()
 
+  if(TARGET lldb-framework)
+    list(APPEND LLDB_TEST_DEPS lldb-framework)
+  endif()
+
   add_custom_target(lldb-test-deps)
   add_dependencies(lldb-test-deps ${LLDB_TEST_DEPS})
   set_target_properties(lldb-test-deps PROPERTIES FOLDER "lldb misc")
diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake
index e35fc9e9be584..a40d8e9cae9d7 100644
--- a/lldb/cmake/modules/AddLLDB.cmake
+++ b/lldb/cmake/modules/AddLLDB.cmake
@@ -208,3 +208,32 @@ function(lldb_setup_framework_rpaths_in_tool name)
 
   add_dependencies(${name} lldb-framework)
 endfunction()
+
+# Unified handling for executable LLDB.framework resources. Given the name of an
+# executable target, this function adds a post-build step to copy it to the
+# framework bundle in the build-tree.
+function(lldb_add_to_framework name)
+  set(subdir "LLDB.framework/Versions/${LLDB_FRAMEWORK_VERSION}/Resources")
+
+  # Destination for the copy in the build-tree. While the framework target may
+  # not exist yet, it will exist when the generator expression gets expanded.
+  set(copy_dest "$<TARGET_FILE_DIR:liblldb>/../../../${subdir}")
+
+  # Copy into the framework's Resources directory for testing.
+  add_custom_command(TARGET ${name} POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${name}> ${copy_dest}
+    COMMENT "Copy ${name} to ${copy_dest}"
+  )
+endfunction()
+
+# CMake's set_target_properties() doesn't allow to pass lists for RPATH
+# properties directly (error: "called with incorrect number of arguments").
+# Instead of defining two list variables each time, use this helper function.
+function(lldb_setup_rpaths name)
+  cmake_parse_arguments(LIST "" "" "BUILD_RPATH;INSTALL_RPATH" ${ARGN})
+  set_target_properties(${name} PROPERTIES
+    BUILD_WITH_INSTALL_RPATH OFF
+    BUILD_RPATH "${LIST_BUILD_RPATH}"
+    INSTALL_RPATH "${LIST_INSTALL_RPATH}"
+  )
+endfunction()
\ No newline at end of file
diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index d2f418fe059be..018ea6c618baa 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -64,8 +64,6 @@ if(LLDB_BUILD_FRAMEWORK)
   set(LLDB_FRAMEWORK_VERSION A CACHE STRING "LLDB.framework version (default is A)")
   set(LLDB_FRAMEWORK_BUILD_DIR bin CACHE STRING "Output directory for LLDB.framework")
   set(LLDB_FRAMEWORK_INSTALL_DIR Library/Frameworks CACHE STRING "Install directory for LLDB.framework")
-  set(LLDB_FRAMEWORK_TOOLS darwin-debug;debugserver;lldb-argdumper;lldb-server CACHE STRING
-      "List of tools to include in LLDB.framework/Resources")
 
   # Set designated directory for all dSYMs. Essentially, this emits the
   # framework's dSYM outside of the framework directory.
diff --git a/lldb/cmake/modules/LLDBFramework.cmake b/lldb/cmake/modules/LLDBFramework.cmake
index 147d3de527fa5..ee5fef36a5877 100644
--- a/lldb/cmake/modules/LLDBFramework.cmake
+++ b/lldb/cmake/modules/LLDBFramework.cmake
@@ -36,22 +36,9 @@ else()
 endif()
 
 # Target to capture extra steps for a fully functional framework bundle.
-add_custom_target(lldb-framework)
+add_custom_target(lldb-framework ALL)
 add_dependencies(lldb-framework liblldb)
 
-# Dependencies are defined once tools are added (see AddLLDB.cmake)
-if(LLDB_FRAMEWORK_TOOLS)
-  message(STATUS "LLDB.framework: adding tools ${LLDB_FRAMEWORK_TOOLS}")
-  foreach(tool ${LLDB_FRAMEWORK_TOOLS})
-    add_custom_command(TARGET lldb-framework POST_BUILD
-      COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:${tool}> $<TARGET_FILE_DIR:liblldb>/Resources
-      COMMENT "LLDB.framework: copy additional tool ${tool}"
-    )
-  endforeach()
-else()
-  message(WARNING "LLDB.framework: no additional tools configured (set via LLDB_FRAMEWORK_TOOLS)")
-endif()
-
 # Apart from this one, CMake creates all required symlinks in the framework bundle.
 add_custom_command(TARGET lldb-framework POST_BUILD
   COMMAND ${CMAKE_COMMAND} -E create_symlink
diff --git a/lldb/tools/argdumper/CMakeLists.txt b/lldb/tools/argdumper/CMakeLists.txt
index 71c73cc195c2a..af9374b7ea859 100644
--- a/lldb/tools/argdumper/CMakeLists.txt
+++ b/lldb/tools/argdumper/CMakeLists.txt
@@ -4,3 +4,7 @@ add_lldb_tool(lldb-argdumper
   LINK_LIBS
     lldbUtility
   )
+
+if(LLDB_BUILD_FRAMEWORK)
+  lldb_add_to_framework(lldb-argdumper)
+endif()
diff --git a/lldb/tools/darwin-debug/CMakeLists.txt b/lldb/tools/darwin-debug/CMakeLists.txt
index 5be6e4ee45983..6b9eac31fe51b 100644
--- a/lldb/tools/darwin-debug/CMakeLists.txt
+++ b/lldb/tools/darwin-debug/CMakeLists.txt
@@ -1,3 +1,7 @@
 add_lldb_tool(darwin-debug
   darwin-debug.cpp
   )
+
+if(LLDB_BUILD_FRAMEWORK)
+  lldb_add_to_framework(darwin-debug)
+endif()
diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt
index 2b8e737d536b9..e0d834799f896 100644
--- a/lldb/tools/debugserver/source/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/CMakeLists.txt
@@ -265,6 +265,10 @@ if(build_and_sign_debugserver)
       ${entitlements}
     )
 
+  if(LLDB_BUILD_FRAMEWORK)
+    lldb_add_to_framework(debugserver)
+  endif()
+
   if(IOS)
     set_property(TARGET lldbDebugserverCommon APPEND PROPERTY COMPILE_DEFINITIONS
       WITH_LOCKDOWN
diff --git a/lldb/tools/driver/CMakeLists.txt b/lldb/tools/driver/CMakeLists.txt
index 1f8c469e08c86..06dc88afdaa05 100644
--- a/lldb/tools/driver/CMakeLists.txt
+++ b/lldb/tools/driver/CMakeLists.txt
@@ -31,5 +31,15 @@ add_dependencies(lldb
 set_target_properties(LLDBOptionsTableGen PROPERTIES FOLDER "lldb misc")
 
 if(LLDB_BUILD_FRAMEWORK)
-  lldb_setup_framework_rpaths_in_tool(lldb)
+  # In the build-tree, we know the exact path to the framework directory.
+  # The installed framework can be in different locations.
+  get_target_property(framework_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
+  lldb_setup_rpaths(lldb
+    BUILD_RPATH
+      "${framework_build_dir}"
+    INSTALL_RPATH
+      "@loader_path/../../../SharedFrameworks"
+      "@loader_path/../../System/Library/PrivateFrameworks"
+      "@loader_path/../../Library/PrivateFrameworks"
+  )
 endif()
diff --git a/lldb/tools/lldb-mi/CMakeLists.txt b/lldb/tools/lldb-mi/CMakeLists.txt
index 20c031a12004d..db1634ed5c342 100644
--- a/lldb/tools/lldb-mi/CMakeLists.txt
+++ b/lldb/tools/lldb-mi/CMakeLists.txt
@@ -95,5 +95,15 @@ add_lldb_tool(lldb-mi
   )
 
 if(LLDB_BUILD_FRAMEWORK)
-  lldb_setup_framework_rpaths_in_tool(lldb-mi)
+  # In the build-tree, we know the exact path to the framework directory.
+  # The installed framework can be in different locations.
+  get_target_property(framework_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
+  lldb_setup_rpaths(lldb-mi
+    BUILD_RPATH
+      "${framework_build_dir}"
+    INSTALL_RPATH
+      "@loader_path/../../../SharedFrameworks"
+      "@loader_path/../../System/Library/PrivateFrameworks"
+      "@loader_path/../../Library/PrivateFrameworks"
+  )
 endif()
diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt
index f1c826b448604..d9900b8a03f57 100644
--- a/lldb/tools/lldb-server/CMakeLists.txt
+++ b/lldb/tools/lldb-server/CMakeLists.txt
@@ -77,3 +77,7 @@ add_lldb_tool(lldb-server
 )
 
 target_link_libraries(lldb-server PRIVATE ${LLDB_SYSTEM_LIBS})
+
+if(LLDB_BUILD_FRAMEWORK)
+  lldb_add_to_framework(lldb-server)
+endif()
diff --git a/lldb/tools/lldb-vscode/CMakeLists.txt b/lldb/tools/lldb-vscode/CMakeLists.txt
index 357a4a10b12f0..a39a27d7d2e6b 100644
--- a/lldb/tools/lldb-vscode/CMakeLists.txt
+++ b/lldb/tools/lldb-vscode/CMakeLists.txt
@@ -31,5 +31,15 @@ add_lldb_tool(lldb-vscode
   )
 
 if(LLDB_BUILD_FRAMEWORK)
-  lldb_setup_framework_rpaths_in_tool(lldb-vscode)
+  # In the build-tree, we know the exact path to the framework directory.
+  # The installed framework can be in different locations.
+  get_target_property(framework_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
+  lldb_setup_rpaths(lldb-vscode
+    BUILD_RPATH
+      "${framework_build_dir}"
+    INSTALL_RPATH
+      "@loader_path/../../../SharedFrameworks"
+      "@loader_path/../../System/Library/PrivateFrameworks"
+      "@loader_path/../../Library/PrivateFrameworks"
+  )
 endif()

From 72e05d0aa4ff192c75e5e0137aec377ae5cd2a9b Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Wed, 29 May 2019 11:28:11 +0000
Subject: [PATCH 0475/1176] [CMake] Remove lldb-server from LLDB.framework

Summary: The LLDB test suite doesn't need lldb-server in the framework bundle anymore.

Reviewers: JDevlieghere, jasonmolenda, xiaobai

Reviewed By: JDevlieghere

Subscribers: mgorny, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62474

llvm-svn: 361947
---
 lldb/tools/lldb-server/CMakeLists.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt
index d9900b8a03f57..f1c826b448604 100644
--- a/lldb/tools/lldb-server/CMakeLists.txt
+++ b/lldb/tools/lldb-server/CMakeLists.txt
@@ -77,7 +77,3 @@ add_lldb_tool(lldb-server
 )
 
 target_link_libraries(lldb-server PRIVATE ${LLDB_SYSTEM_LIBS})
-
-if(LLDB_BUILD_FRAMEWORK)
-  lldb_add_to_framework(lldb-server)
-endif()

From 377c1cfe9495810f8416c626103708340e2a0038 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 29 May 2019 11:28:35 +0000
Subject: [PATCH 0476/1176] Revert "D11003: Tolerate DWARF compile unit without
 filename."

Summary:
This code is modifying a support file list after it has been created.
This makes it hard to share the file list between type units and
compile units in DWARF. It's not a total showstopper, but supporting
this while also sharing the lists would make things more complicated.

Given that this was added to support a project which never fully
materialised, and that even back then there were some concerns about the
correctness of this approach (according to D11003#200772 the compile
unit name is not guaranteed to be the first one in the support file
list), I think we should just delete this workaround.

Reviewers: clayborg, tberghammer, dsrbecky

Subscribers: aprantl, lldb-commits

Differential Revision: https://reviews.llvm.org/D62517

llvm-svn: 361948
---
 .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp    | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 1b0a572dd57e6..0ae204e22727f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -689,19 +689,6 @@ lldb::CompUnitSP SymbolFileDWARF::ParseCompileUnit(DWARFUnit *dwarf_cu) {
                 module_sp, dwarf_cu, cu_file_spec, dwarf_cu->GetID(),
                 cu_language, is_optimized ? eLazyBoolYes : eLazyBoolNo);
 
-            // If we just created a compile unit with an invalid file spec,
-            // try and get the first entry in the supports files from the
-            // line table as that should be the compile unit.
-            if (!cu_file_spec) {
-              cu_file_spec = cu_sp->GetSupportFiles().GetFileSpecAtIndex(1);
-              if (cu_file_spec) {
-                (FileSpec &)(*cu_sp) = cu_file_spec;
-                // Also fix the invalid file spec which was copied from the
-                // compile unit.
-                cu_sp->GetSupportFiles().Replace(0, cu_file_spec);
-              }
-            }
-
             dwarf_cu->SetUserData(cu_sp.get());
 
             m_obj_file->GetModule()->GetSymbolVendor()->SetCompileUnitAtIndex(

From a6fb183c98943e90a9e35db78dae25bc79b8c66a Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Wed, 29 May 2019 11:37:16 +0000
Subject: [PATCH 0477/1176] [llvm-objcopy] Implement IHEX writer

Differential revision: https://reviews.llvm.org/D60270

llvm-svn: 361949
---
 llvm/include/llvm/Support/Error.h             |  27 +-
 .../ELF/Inputs/ihex-elf-pt-null.yaml          |  20 ++
 .../ELF/Inputs/ihex-elf-sections.yaml         |  60 ++++
 .../ELF/Inputs/ihex-elf-sections2.yaml        |  39 +++
 .../ELF/Inputs/ihex-elf-segments.yaml         |  60 ++++
 .../tools/llvm-objcopy/ELF/ihex-writer.test   |  81 ++++++
 llvm/tools/llvm-objcopy/CopyConfig.cpp        |   3 +-
 llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp    |  45 +--
 llvm/tools/llvm-objcopy/ELF/Object.cpp        | 264 +++++++++++++++++-
 llvm/tools/llvm-objcopy/ELF/Object.h          | 140 ++++++++++
 10 files changed, 715 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-pt-null.yaml
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections.yaml
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections2.yaml
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-segments.yaml
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test

diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 23a48ed03e446..70b5a5a0ceb2d 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -1177,11 +1177,14 @@ Error createStringError(std::error_code EC, char const *Msg);
 class FileError final : public ErrorInfo<FileError> {
 
   friend Error createFileError(const Twine &, Error);
+  friend Error createFileError(const Twine &, size_t, Error);
 
 public:
   void log(raw_ostream &OS) const override {
     assert(Err && !FileName.empty() && "Trying to log after takeError().");
     OS << "'" << FileName << "': ";
+    if (Line.hasValue())
+      OS << "line " << Line.getValue() << ": ";
     Err->log(OS);
   }
 
@@ -1193,26 +1196,36 @@ class FileError final : public ErrorInfo<FileError> {
   static char ID;
 
 private:
-  FileError(const Twine &F, std::unique_ptr<ErrorInfoBase> E) {
+  FileError(const Twine &F, Optional<size_t> LineNum,
+            std::unique_ptr<ErrorInfoBase> E) {
     assert(E && "Cannot create FileError from Error success value.");
     assert(!F.isTriviallyEmpty() &&
            "The file name provided to FileError must not be empty.");
     FileName = F.str();
     Err = std::move(E);
+    Line = std::move(LineNum);
   }
 
-  static Error build(const Twine &F, Error E) {
-    return Error(std::unique_ptr<FileError>(new FileError(F, E.takePayload())));
+  static Error build(const Twine &F, Optional<size_t> Line, Error E) {
+    return Error(
+        std::unique_ptr<FileError>(new FileError(F, Line, E.takePayload())));
   }
 
   std::string FileName;
+  Optional<size_t> Line;
   std::unique_ptr<ErrorInfoBase> Err;
 };
 
 /// Concatenate a source file path and/or name with an Error. The resulting
 /// Error is unchecked.
 inline Error createFileError(const Twine &F, Error E) {
-  return FileError::build(F, std::move(E));
+  return FileError::build(F, Optional<size_t>(), std::move(E));
+}
+
+/// Concatenate a source file path and/or name with line number and an Error.
+/// The resulting Error is unchecked.
+inline Error createFileError(const Twine &F, size_t Line, Error E) {
+  return FileError::build(F, Optional<size_t>(Line), std::move(E));
 }
 
 /// Concatenate a source file path and/or name with a std::error_code 
@@ -1221,6 +1234,12 @@ inline Error createFileError(const Twine &F, std::error_code EC) {
   return createFileError(F, errorCodeToError(EC));
 }
 
+/// Concatenate a source file path and/or name with line number and
+/// std::error_code to form an Error object.
+inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) {
+  return createFileError(F, Line, errorCodeToError(EC));
+}
+
 Error createFileError(const Twine &F, ErrorSuccess) = delete;
 
 /// Helper for check-and-exit error handling.
diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-pt-null.yaml b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-pt-null.yaml
new file mode 100644
index 0000000000000..3f142aeb28406
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-pt-null.yaml
@@ -0,0 +1,20 @@
+!ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0
+    AddressAlign:    0x8
+    Content:         "0001020304"
+ProgramHeaders:
+  - Type: PT_NULL
+    Flags: [ PF_X, PF_R ]
+    VAddr: 0xF00000000
+    PAddr: 0x100000
+    Sections:
+      - Section: .text
diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections.yaml b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections.yaml
new file mode 100644
index 0000000000000..9a71b3175efaa
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections.yaml
@@ -0,0 +1,60 @@
+!ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+Sections:
+  - Name:            .text
+# This section contents exceeds default IHex line length of 16 bytes
+# so we expect two lines created for it.
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0
+    AddressAlign:    0x8
+    Content:         "000102030405060708090A0B0C0D0E0F1011121314"
+  - Name:            .data
+# This section overlap 16-bit segment boundary, so we expect
+# additional 'SegmentAddr' record of type '02'
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "3031323334353637383940"
+    Address:         0xFFF8
+    AddressAlign:    0x8
+  - Name:            .data2
+# Previous section '.data' should have forced creation of
+# 'SegmentAddr'(02) record with segment address of 0x10000,
+# so this section should have address of 0x100.
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "40414243"
+    Address:         0x10100
+    AddressAlign:    0x8
+  - Name:            .data3
+# The last section not only overlaps segment boundary, but
+# also has linear address which doesn't fit 20 bits. The 
+# following records should be craeted:
+# 'SegmentAddr'(02) record with address 0x0
+# 'ExtendedAddr'(04) record with address 0x100000
+# 'Data'(00) record with 8 bytes of section data
+# 'SegmentAddr'(02) record with address 0x10000
+# 'Data'(00) record with remaining 3 bytes of data.
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "5051525354555657585960"
+    Address:         0x10FFF8
+    AddressAlign:    0x8
+  - Name:            .bss
+# NOBITS sections are not written to IHex
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x10100
+    Size:            0x1000
+    AddressAlign:    0x8
+  - Name:            .dummy
+# Non-allocatable sections are not written to IHex
+    Type:            SHT_PROGBITS
+    Flags:           [ ]
+    Address:         0x20FFF8
+    Size:            65536
+    AddressAlign:    0x8
diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections2.yaml b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections2.yaml
new file mode 100644
index 0000000000000..224340cbcc92e
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-sections2.yaml
@@ -0,0 +1,39 @@
+!ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+Sections:
+  - Name:            .text
+# Zero length sections are not exported to IHex
+# 'SegmentAddr' and 'ExtendedAddr' records aren't
+# created either.
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x7FFFFFFF
+    AddressAlign:    0x8
+    Size:            0
+  - Name:            .text1
+# Section address is sign-extended 32-bit address
+# Data fits 32-bit range
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0xFFFFFFFF80001000
+    AddressAlign:    0x8
+    Content:         "0001020304"
+  - Name:            .text2
+# Part of section data is in 32-bit address range
+# and part isn't.  
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0xFFFFFFF8
+    AddressAlign:    0x8
+    Content:         "000102030405060708"
+  - Name:            .text3
+  # Entire secion is outside of 32-bit range
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]    
+    Address:         0xFFFFFFFF0
+    AddressAlign:    0x8
+    Content:         "0001020304"
diff --git a/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-segments.yaml b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-segments.yaml
new file mode 100644
index 0000000000000..c7d8a7affb82d
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/Inputs/ihex-elf-segments.yaml
@@ -0,0 +1,60 @@
+# Here we use yaml from ihex-elf-sections.yaml, but add single load
+# segment containing all exported sections. In such case we should 
+# use physical address of a section intead of virtual address. Physical
+# addresses start from 0x100000, so we create two additional 'ExtenededAddr'
+# (03) record in the beginning of IHex file with that physical address
+!ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+  Entry:           0x100000
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x0
+    AddressAlign:    0x8
+    Content:         "000102030405060708090A0B0C0D0E0F1011121314"
+  - Name:            .data1
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "3031323334353637383940"
+    Address:         0xFFF8
+    AddressAlign:    0x8
+  - Name:            .data2
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "40414243"
+    Address:         0x10100
+    AddressAlign:    0x8
+  - Name:            .data3
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Content:         "5051525354555657585960"
+    Address:         0x10FFF8
+    AddressAlign:    0x8
+  - Name:            .bss
+    Type:            SHT_NOBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x10100
+    Size:            0x1000
+    AddressAlign:    0x8
+  - Name:            .dummy
+    Type:            SHT_PROGBITS
+    Flags:           [ ]
+    Address:         0x20FFF8
+    Size:            65536
+    AddressAlign:    0x8
+ProgramHeaders:
+  - Type: PT_LOAD
+    Flags: [ PF_X, PF_R ]
+    VAddr: 0xF00000000
+    PAddr: 0x100000
+    Sections:
+      - Section: .text
+      - Section: .data1
+      - Section: .data2
+      - Section: .data3
+      - Section: .bss
diff --git a/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
new file mode 100644
index 0000000000000..2b74743385bd5
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
@@ -0,0 +1,81 @@
+# RUN: yaml2obj %p/Inputs/ihex-elf-sections.yaml -o %t
+# RUN: llvm-objcopy -O ihex %t - | FileCheck %s
+
+# Check ihex output, when we have segments in ELF file
+# In such case only sections in PT_LOAD segments will
+# be exported and their physical addresses will be used
+# RUN: yaml2obj %p/Inputs/ihex-elf-segments.yaml -o %t-segs
+# RUN: llvm-objcopy -O ihex %t-segs - | FileCheck %s --check-prefix=SEGMENTS
+
+# Check that non-load segments are ignored:
+# RUN: yaml2obj %p/Inputs/ihex-elf-pt-null.yaml -o %t2-segs
+# RUN: llvm-objcopy -O ihex %t2-segs - | FileCheck %s --check-prefix=PT_NULL
+
+# Check that sign-extended 32-bit section addresses are processed
+# correctly
+# RUN: yaml2obj %p/Inputs/ihex-elf-sections2.yaml -o %t-sec2
+# RUN: llvm-objcopy -O ihex --only-section=.text1 %t-sec2 - | FileCheck %s --check-prefix=SIGN_EXTENDED
+
+# Check that section address range overlapping 32 bit range
+# triggers an error
+# RUN: not llvm-objcopy -O ihex --only-section=.text2 %t-sec2 %t-sec2-2.hex 2>&1 | FileCheck %s --check-prefix=BAD-ADDR
+# RUN: not llvm-objcopy -O ihex --only-section=.text3 %t-sec2 %t-sec2-3.hex 2>&1 | FileCheck %s --check-prefix=BAD-ADDR2
+
+# Check that zero length section is not written
+# RUN: llvm-objcopy -O ihex --only-section=.text %t-sec2 - | FileCheck %s --check-prefix=ZERO_SIZE_SEC
+
+# Check 80x86 start address record. It is created for start
+# addresses less than 0x100000
+# RUN: llvm-objcopy -O ihex --set-start=0xFFFF %t - | FileCheck %s --check-prefix=START1
+
+# Check i386 start address record (05). It is created for
+# start addresses which doesn't fit 20 bits
+# RUN: llvm-objcopy -O ihex --set-start=0x100000 %t - | FileCheck %s --check-prefix=START2
+
+# We allow sign extended 32 bit start addresses as well.
+# RUN: llvm-objcopy -O ihex --set-start=0xFFFFFFFF80001000 %t - | FileCheck %s --check-prefix=START3
+
+# Start address which exceeds 32 bit range triggers an error
+# RUN: not llvm-objcopy -O ihex --set-start=0xF00000000 %t %t6.hex 2>&1 | FileCheck %s --check-prefix=BAD-START
+
+# CHECK:      :10000000000102030405060708090A0B0C0D0E0F78
+# CHECK-NEXT: :05001000101112131491
+# CHECK-NEXT: :08FFF800303132333435363765
+# CHECK-NEXT: :020000021000EC
+# CHECK-NEXT: :030000003839404C
+# CHECK-NEXT: :0401000040414243F5
+# CHECK-NEXT: :020000020000FC
+# CHECK-NEXT: :020000040010EA
+# CHECK-NEXT: :08FFF800505152535455565765
+# CHECK-NEXT: :020000040011E9
+# CHECK-NEXT: :03000000585960EC
+# CHECK-NEXT: :00000001FF
+
+# SEGMENTS:       :020000040010EA
+# SEGMENTS-NEXT:  :10000000000102030405060708090A0B0C0D0E0F78
+# SEGMENTS-NEXT:  :05001000101112131491
+# SEGMENTS-NEXT:  :0B001800303132333435363738394090
+# SEGMENTS-NEXT:  :0400280040414243CE
+# SEGMENTS-NEXT:  :0B003000505152535455565758596018
+# SEGMENTS-NEXT:  :0400000500100000E7
+# SEGMENTS-NEXT:  :00000001FF
+
+# 'ExtendedAddr' (04) record shouldn't be created
+# PT_NULL-NOT: :02000004
+
+# SIGN_EXTENDED:      :0200000480007A
+# SIGN_EXTENDED-NEXT: :051000000001020304E1
+# SIGN_EXTENDED-NEXT: :00000001FF
+
+# BAD-ADDR: error: {{.*}}: Section '.text2' address range [0xfffffff8, 0x100000000] is not 32 bit
+# BAD-ADDR2: error: {{.*}}: Section '.text3' address range [0xffffffff0, 0xffffffff4] is not 32 bit
+
+# There shouldn't be 'ExtendedAddr' nor 'Data' records
+# ZERO_SIZE_SEC-NOT:  :02000004
+# ZERO_SIZE_SEC-NOT:  :00FFFF00
+# ZERO_SIZE_SEC:      :00000001FF
+
+# START1: :040000030000FFFFFB
+# START2: :0400000500100000E7
+# START3: :040000058000100067
+# BAD-START: error: {{.*}}: Entry point address 0xf00000000 overflows 32 bits
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp
index 0b0023f52d8ab..b138544fcd7c7 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -458,7 +458,8 @@ Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
       return MI.takeError();
     Config.BinaryArch = *MI;
   }
-  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary") {
+  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary" &&
+      Config.OutputFormat != "ihex") {
     Expected<MachineInfo> MI = getOutputFormatMachineInfo(Config.OutputFormat);
     if (!MI)
       return MI.takeError();
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index be25bd5ee4391..efb8f0582ec34 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -130,12 +130,9 @@ static ElfType getOutputElfType(const MachineInfo &MI) {
     return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
 }
 
-static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
-                                            Object &Obj, Buffer &Buf,
-                                            ElfType OutputElfType) {
-  if (Config.OutputFormat == "binary") {
-    return llvm::make_unique<BinaryWriter>(Obj, Buf);
-  }
+static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
+                                               Object &Obj, Buffer &Buf,
+                                               ElfType OutputElfType) {
   // Depending on the initial ELFT and OutputFormat we need a different Writer.
   switch (OutputElfType) {
   case ELFT_ELF32LE:
@@ -154,6 +151,17 @@ static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
   llvm_unreachable("Invalid output format");
 }
 
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+                                            Object &Obj, Buffer &Buf,
+                                            ElfType OutputElfType) {
+  using Functor = std::function<std::unique_ptr<Writer>()>;
+  return StringSwitch<Functor>(Config.OutputFormat)
+      .Case("binary", [&] { return llvm::make_unique<BinaryWriter>(Obj, Buf); })
+      .Case("ihex", [&] { return llvm::make_unique<IHexWriter>(Obj, Buf); })
+      .Default(
+          [&] { return createELFWriter(Config, Obj, Buf, OutputElfType); })();
+}
+
 template <class ELFT>
 static Expected<ArrayRef<uint8_t>>
 findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
@@ -714,6 +722,15 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj,
   return Error::success();
 }
 
+static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
+                         ElfType OutputElfType) {
+  std::unique_ptr<Writer> Writer =
+      createWriter(Config, Obj, Out, OutputElfType);
+  if (Error E = Writer->finalize())
+    return E;
+  return Writer->write();
+}
+
 Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
                                 Buffer &Out) {
   BinaryReader Reader(Config.BinaryArch, &In);
@@ -721,15 +738,11 @@ Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
 
   // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
   // (-B<arch>).
-  const ElfType OutputElfType = getOutputElfType(
-      Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
+  const ElfType OutputElfType =
+      getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
   if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
     return E;
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  if (Error E = Writer->finalize())
-    return E;
-  return Writer->write();
+  return writeOutput(Config, *Obj, Out, OutputElfType);
 }
 
 Error executeObjcopyOnBinary(const CopyConfig &Config,
@@ -764,12 +777,8 @@ Error executeObjcopyOnBinary(const CopyConfig &Config,
   if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
     return createFileError(Config.InputFilename, std::move(E));
 
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  if (Error E = Writer->finalize())
+  if (Error E = writeOutput(Config, *Obj, Out, OutputElfType))
     return createFileError(Config.InputFilename, std::move(E));
-  if (Error E = Writer->write())
-    return E;
   if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput)
     if (Error E =
             linkToBuildIdDir(Config, Config.OutputFilename,
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 85e7ffa6d8ecb..7a9a1bd37e5e3 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -17,7 +17,7 @@
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Compression.h"
-#include "llvm/Support/Errc.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/Path.h"
@@ -147,6 +147,156 @@ void SectionWriter::visit(const Section &Sec) {
     llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
 }
 
+static bool addressOverflows32bit(uint64_t Addr) {
+  // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok
+  return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX;
+}
+
+template <class T> static T checkedGetHex(StringRef S) {
+  T Value;
+  bool Fail = S.getAsInteger(16, Value);
+  assert(!Fail);
+  (void)Fail;
+  return Value;
+}
+
+// Fills exactly Len bytes of buffer with hexadecimal characters
+// representing value 'X'
+template <class T, class Iterator>
+static Iterator utohexstr(T X, Iterator It, size_t Len) {
+  // Fill range with '0'
+  std::fill(It, It + Len, '0');
+
+  for (long I = Len - 1; I >= 0; --I) {
+    unsigned char Mod = static_cast<unsigned char>(X) & 15;
+    *(It + I) = hexdigit(Mod, false);
+    X >>= 4;
+  }
+  assert(X == 0);
+  return It + Len;
+}
+
+uint8_t IHexRecord::getChecksum(StringRef S) {
+  assert((S.size() & 1) == 0);
+  uint8_t Checksum = 0;
+  while (!S.empty()) {
+    Checksum += checkedGetHex<uint8_t>(S.take_front(2));
+    S = S.drop_front(2);
+  }
+  return -Checksum;
+}
+
+IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr,
+                                 ArrayRef<uint8_t> Data) {
+  IHexLineData Line(getLineLength(Data.size()));
+  assert(Line.size());
+  auto Iter = Line.begin();
+  *Iter++ = ':';
+  Iter = utohexstr(Data.size(), Iter, 2);
+  Iter = utohexstr(Addr, Iter, 4);
+  Iter = utohexstr(Type, Iter, 2);
+  for (uint8_t X : Data)
+    Iter = utohexstr(X, Iter, 2);
+  StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter));
+  Iter = utohexstr(getChecksum(S), Iter, 2);
+  *Iter++ = '\r';
+  *Iter++ = '\n';
+  assert(Iter == Line.end());
+  return Line;
+}
+
+static uint64_t sectionPhysicalAddr(const SectionBase *Sec) {
+  Segment *Seg = Sec->ParentSegment;
+  if (Seg && Seg->Type != ELF::PT_LOAD)
+    Seg = nullptr;
+  return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset
+             : Sec->Addr;
+}
+
+void IHexSectionWriterBase::writeSection(const SectionBase *Sec,
+                                         ArrayRef<uint8_t> Data) {
+  assert(Data.size() == Sec->Size);
+  const uint32_t ChunkSize = 16;
+  uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU;
+  while (!Data.empty()) {
+    uint64_t DataSize = std::min<uint64_t>(Data.size(), ChunkSize);
+    if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) {
+      if (Addr > 0xFFFFFU) {
+        // Write extended address record, zeroing segment address
+        // if needed.
+        if (SegmentAddr != 0)
+          SegmentAddr = writeSegmentAddr(0U);
+        BaseAddr = writeBaseAddr(Addr);
+      } else {
+        // We can still remain 16-bit
+        SegmentAddr = writeSegmentAddr(Addr);
+      }
+    }
+    uint64_t SegOffset = Addr - BaseAddr - SegmentAddr;
+    assert(SegOffset <= 0xFFFFU);
+    DataSize = std::min(DataSize, 0x10000U - SegOffset);
+    writeData(0, SegOffset, Data.take_front(DataSize));
+    Addr += DataSize;
+    Data = Data.drop_front(DataSize);
+  }
+}
+
+uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFU);
+  uint8_t Data[] = {static_cast<uint8_t>((Addr & 0xF0000U) >> 12), 0};
+  writeData(2, 0, Data);
+  return Addr & 0xF0000U;
+}
+
+uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFFFFU);
+  uint64_t Base = Addr & 0xFFFF0000U;
+  uint8_t Data[] = {static_cast<uint8_t>(Base >> 24),
+                    static_cast<uint8_t>((Base >> 16) & 0xFF)};
+  writeData(4, 0, Data);
+  return Base;
+}
+
+void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr,
+                                      ArrayRef<uint8_t> Data) {
+  Offset += IHexRecord::getLineLength(Data.size());
+}
+
+void IHexSectionWriterBase::visit(const Section &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
+  writeSection(&Sec, Sec.Data);
+}
+
+void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
+  // Check that sizer has already done its work
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  // We are free to pass an invalid pointer to writeSection as long
+  // as we don't actually write any data. The real writer class has
+  // to override this method .
+  writeSection(&Sec, {nullptr, Sec.Size});
+}
+
+void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
+                                  ArrayRef<uint8_t> Data) {
+  IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data);
+  memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size());
+  Offset += HexData.size();
+}
+
+void IHexSectionWriter::visit(const StringTableSection &Sec) {
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  std::vector<uint8_t> Data(Sec.Size);
+  Sec.StrTabBuilder.write(Data.data());
+  writeSection(&Sec, Data);
+}
+
 void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
 
 void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
@@ -217,6 +367,15 @@ void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
   Visitor.visit(*this);
 }
 
+void OwnedDataSection::appendHexData(StringRef HexData) {
+  assert((HexData.size() & 1) == 0);
+  while (!HexData.empty()) {
+    Data.push_back(checkedGetHex<uint8_t>(HexData.take_front(2)));
+    HexData = HexData.drop_front(2);
+  }
+  Size = Data.size();
+}
+
 void BinarySectionWriter::visit(const CompressedSection &Sec) {
   error("cannot write compressed section '" + Sec.Name + "' ");
 }
@@ -1807,6 +1966,109 @@ Error BinaryWriter::finalize() {
   return Error::success();
 }
 
+bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs,
+                                            const SectionBase *Rhs) const {
+  return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) <
+         (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU);
+}
+
+uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) {
+  IHexLineData HexData;
+  uint8_t Data[4] = {};
+  // We don't write entry point record if entry is zero.
+  if (Obj.Entry == 0)
+    return 0;
+
+  if (Obj.Entry <= 0xFFFFFU) {
+    Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF;
+    support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data);
+  } else {
+    support::endian::write(Data, static_cast<uint32_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data);
+  }
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) {
+  IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {});
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+Error IHexWriter::write() {
+  IHexSectionWriter Writer(Buf);
+  // Write sections.
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(Writer);
+
+  uint64_t Offset = Writer.getBufferOffset();
+  // Write entry point address.
+  Offset += writeEntryPointRecord(Buf.getBufferStart() + Offset);
+  // Write EOF.
+  Offset += writeEndOfFileRecord(Buf.getBufferStart() + Offset);
+  assert(Offset == TotalSize);
+  return Buf.commit();
+}
+
+Error IHexWriter::checkSection(const SectionBase &Sec) {
+  uint64_t Addr = sectionPhysicalAddr(&Sec);
+  if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
+    return createStringError(
+        errc::invalid_argument,
+        "Section '%s' address range [%p, %p] is not 32 bit", Sec.Name.c_str(),
+        Addr, Addr + Sec.Size - 1);
+  return Error::success();
+}
+
+Error IHexWriter::finalize() {
+  bool UseSegments = false;
+  auto ShouldWrite = [](const SectionBase &Sec) {
+    return (Sec.Flags & ELF::SHF_ALLOC) && (Sec.Type != ELF::SHT_NOBITS);
+  };
+  auto IsInPtLoad = [](const SectionBase &Sec) {
+    return Sec.ParentSegment && Sec.ParentSegment->Type == ELF::PT_LOAD;
+  };
+
+  // We can't write 64-bit addresses.
+  if (addressOverflows32bit(Obj.Entry))
+    return createStringError(errc::invalid_argument,
+                             "Entry point address %p overflows 32 bits.",
+                             Obj.Entry);
+
+  // If any section we're to write has segment then we
+  // switch to using physical addresses. Otherwise we
+  // use section virtual address.
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && IsInPtLoad(Section)) {
+      UseSegments = true;
+      break;
+    }
+
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) {
+      if (Error E = checkSection(Section))
+        return E;
+      Sections.insert(&Section);
+    }
+
+  IHexSectionWriterBase LengthCalc(Buf);
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(LengthCalc);
+
+  // We need space to write section records + StartAddress record
+  // (if start adress is not zero) + EndOfFile record.
+  TotalSize = LengthCalc.getBufferOffset() +
+              (Obj.Entry ? IHexRecord::getLineLength(4) : 0) +
+              IHexRecord::getLineLength(0);
+  if (Error E = Buf.allocate(TotalSize))
+    return E;
+  return Error::success();
+}
+
 template class ELFBuilder<ELF64LE>;
 template class ELFBuilder<ELF64BE>;
 template class ELFBuilder<ELF32LE>;
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h
index 9298518bcce9b..fabbb7faac1f0 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -17,6 +17,7 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include <cstddef>
 #include <cstdint>
@@ -168,6 +169,8 @@ template <class ELFT> class ELFSectionSizer : public MutableSectionVisitor {
 
 #define MAKE_SEC_WRITER_FRIEND                                                 \
   friend class SectionWriter;                                                  \
+  friend class IHexSectionWriterBase;                                          \
+  friend class IHexSectionWriter;                                              \
   template <class ELFT> friend class ELFSectionWriter;                         \
   template <class ELFT> friend class ELFSectionSizer;
 
@@ -186,6 +189,114 @@ class BinarySectionWriter : public SectionWriter {
   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
 };
 
+using IHexLineData = SmallVector<char, 64>;
+
+struct IHexRecord {
+  // Memory address of the record.
+  uint16_t Addr;
+  // Record type (see below).
+  uint16_t Type;
+  // Record data in hexadecimal form.
+  StringRef HexData;
+
+  // Helper method to get file length of the record
+  // including newline character
+  static size_t getLength(size_t DataSize) {
+    // :LLAAAATT[DD...DD]CC'
+    return DataSize * 2 + 11;
+  }
+
+  // Gets length of line in a file (getLength + CRLF).
+  static size_t getLineLength(size_t DataSize) {
+    return getLength(DataSize) + 2;
+  }
+
+  // Given type, address and data returns line which can
+  // be written to output file.
+  static IHexLineData getLine(uint8_t Type, uint16_t Addr,
+                              ArrayRef<uint8_t> Data);
+
+  // Calculates checksum of stringified record representation
+  // S must NOT contain leading ':' and trailing whitespace
+  // characters
+  static uint8_t getChecksum(StringRef S);
+
+  enum Type {
+    // Contains data and a 16-bit starting address for the data.
+    // The byte count specifies number of data bytes in the record.
+    Data = 0,
+    // Must occur exactly once per file in the last line of the file.
+    // The data field is empty (thus byte count is 00) and the address
+    // field is typically 0000.
+    EndOfFile = 1,
+    // The data field contains a 16-bit segment base address (thus byte
+    // count is always 02) compatible with 80x86 real mode addressing.
+    // The address field (typically 0000) is ignored. The segment address
+    // from the most recent 02 record is multiplied by 16 and added to each
+    // subsequent data record address to form the physical starting address
+    // for the data. This allows addressing up to one megabyte of address
+    // space.
+    SegmentAddr = 2,
+    // or 80x86 processors, specifies the initial content of the CS:IP
+    // registers. The address field is 0000, the byte count is always 04,
+    // the first two data bytes are the CS value, the latter two are the
+    // IP value.
+    StartAddr80x86 = 3,
+    // Allows for 32 bit addressing (up to 4GiB). The record's address field
+    // is ignored (typically 0000) and its byte count is always 02. The two
+    // data bytes (big endian) specify the upper 16 bits of the 32 bit
+    // absolute address for all subsequent type 00 records
+    ExtendedAddr = 4,
+    // The address field is 0000 (not used) and the byte count is always 04.
+    // The four data bytes represent a 32-bit address value. In the case of
+    // 80386 and higher CPUs, this address is loaded into the EIP register.
+    StartAddr = 5,
+    // We have no other valid types
+    InvalidType = 6
+  };
+};
+
+// Base class for IHexSectionWriter. This class implements writing algorithm,
+// but doesn't actually write records. It is used for output buffer size
+// calculation in IHexWriter::finalize.
+class IHexSectionWriterBase : public BinarySectionWriter {
+  // 20-bit segment address
+  uint32_t SegmentAddr = 0;
+  // Extended linear address
+  uint32_t BaseAddr = 0;
+
+  // Write segment address corresponding to 'Addr'
+  uint64_t writeSegmentAddr(uint64_t Addr);
+  // Write extended linear (base) address corresponding to 'Addr'
+  uint64_t writeBaseAddr(uint64_t Addr);
+
+protected:
+  // Offset in the output buffer
+  uint64_t Offset = 0;
+
+  void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
+  virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
+
+public:
+  explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
+
+  uint64_t getBufferOffset() const { return Offset; }
+  void visit(const Section &Sec) final;
+  void visit(const OwnedDataSection &Sec) final;
+  void visit(const StringTableSection &Sec) override;
+  void visit(const DynamicRelocationSection &Sec) final;
+  using BinarySectionWriter::visit;
+};
+
+// Real IHEX section writer
+class IHexSectionWriter : public IHexSectionWriterBase {
+public:
+  IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
+
+  void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
+  void visit(const StringTableSection &Sec) override;
+};
+
 class Writer {
 protected:
   Object &Obj;
@@ -245,6 +356,25 @@ class BinaryWriter : public Writer {
   BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
 };
 
+class IHexWriter : public Writer {
+  struct SectionCompare {
+    bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
+  };
+
+  std::set<const SectionBase *, SectionCompare> Sections;
+  size_t TotalSize;
+
+  Error checkSection(const SectionBase &Sec);
+  uint64_t writeEntryPointRecord(uint8_t *Buf);
+  uint64_t writeEndOfFileRecord(uint8_t *Buf);
+
+public:
+  ~IHexWriter() {}
+  Error finalize() override;
+  Error write() override;
+  IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
+};
+
 class SectionBase {
 public:
   std::string Name;
@@ -361,6 +491,16 @@ class OwnedDataSection : public SectionBase {
     OriginalOffset = std::numeric_limits<uint64_t>::max();
   }
 
+  OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
+                   uint64_t SecOff) {
+    Name = SecName.str();
+    Type = ELF::SHT_PROGBITS;
+    Addr = SecAddr;
+    Flags = SecFlags;
+    OriginalOffset = SecOff;
+  }
+
+  void appendHexData(StringRef HexData);
   void accept(SectionVisitor &Sec) const override;
   void accept(MutableSectionVisitor &Visitor) override;
 };

From 280ac1fd1dc35f1f7bce4d2b768fbcdb91f71097 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Wed, 29 May 2019 11:38:27 +0000
Subject: [PATCH 0478/1176] [MCA] Refactor class LSUnit. NFCI

This should be the last bit of refactoring in preparation for a patch that would
finally fix PR37494.

This patch introduces the concept of memory dependency groups (class
MemoryGroup) and "Load/Store Unit token" (LSUToken) to track the status of a
memory operation.

A MemoryGroup is a node of a memory dependency graph. It is used internally to
classify memory operations based on the memory operations they depend on.  Let I
and J be two memory operations, we say that I and J equivalent (for the purpose
of mapping instructions to memory dependency groups) if the set of memory
operations they depend depend on is identical.

MemoryGroups are identified by so-called LSUToken (a unique group identifier
assigned by the LSUnit to every group). When an instruction I is dispatched to
the LSUnit, the LSUnit maps I to a group, and then returns a LSUToken.
LSUTokens are used by class Scheduler to track memory dependencies.

This patch simplifies the LSUnit interface and moves most of the implementation
details to its base class (LSUnitBase). There is no user visible change to the
output.

llvm-svn: 361950
---
 llvm/include/llvm/MCA/HardwareUnits/LSUnit.h  | 284 ++++++++++++++----
 .../llvm/MCA/HardwareUnits/Scheduler.h        |   6 +-
 llvm/include/llvm/MCA/Instruction.h           |  22 +-
 llvm/lib/MCA/HardwareUnits/LSUnit.cpp         | 241 +++++++--------
 llvm/lib/MCA/HardwareUnits/Scheduler.cpp      |  87 ++----
 5 files changed, 385 insertions(+), 255 deletions(-)

diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
index e2ba9cbbf8bee..e55b700884295 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -15,7 +15,8 @@
 #ifndef LLVM_MCA_LSUNIT_H
 #define LLVM_MCA_LSUNIT_H
 
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MCA/HardwareUnits/HardwareUnit.h"
 #include "llvm/MCA/Instruction.h"
@@ -25,6 +26,143 @@ namespace mca {
 
 class Scheduler;
 
+/// A node of a memory dependency graph. A MemoryGroup describes a set of
+/// instructions with same memory dependencies.
+///
+/// By construction, instructions of a MemoryGroup don't depend on each other.
+/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
+/// A Memory group identifier is then stored as a "token" in field
+/// Instruction::LSUTokenID of each dispatched instructions. That token is used
+/// internally by the LSUnit to track memory dependencies.
+class MemoryGroup {
+  unsigned NumPredecessors;
+  unsigned NumExecutingPredecessors;
+  unsigned NumExecutedPredecessors;
+
+  unsigned NumInstructions;
+  unsigned NumExecuting;
+  unsigned NumExecuted;
+  SmallVector<MemoryGroup *, 4> Succ;
+
+  CriticalDependency CriticalPredecessor;
+  InstRef CriticalMemoryInstruction;
+
+  MemoryGroup(const MemoryGroup &) = delete;
+  MemoryGroup &operator=(const MemoryGroup &) = delete;
+
+public:
+  MemoryGroup()
+      : NumPredecessors(0), NumExecutingPredecessors(0),
+        NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
+        NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
+  MemoryGroup(MemoryGroup &&) = default;
+
+  ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
+  unsigned getNumSuccessors() const { return Succ.size(); }
+  unsigned getNumPredecessors() const { return NumPredecessors; }
+  unsigned getNumExecutingPredecessors() const {
+    return NumExecutingPredecessors;
+  }
+  unsigned getNumExecutedPredecessors() const {
+    return NumExecutedPredecessors;
+  }
+  unsigned getNumInstructions() const { return NumInstructions; }
+  unsigned getNumExecuting() const { return NumExecuting; }
+  unsigned getNumExecuted() const { return NumExecuted; }
+
+  const InstRef &getCriticalMemoryInstruction() const { 
+    return CriticalMemoryInstruction;
+  }
+  const CriticalDependency &getCriticalPredecessor() const {
+    return CriticalPredecessor;
+  }
+
+  void addSuccessor(MemoryGroup *Group) {
+    Group->NumPredecessors++;
+    assert(!isExecuted() && "Should have been removed!");
+    if (isExecuting())
+      Group->onGroupIssued(CriticalMemoryInstruction);
+    Succ.emplace_back(Group);
+  }
+
+  bool isWaiting() const {
+    return NumPredecessors >
+           (NumExecutingPredecessors + NumExecutedPredecessors);
+  }
+  bool isPending() const {
+    return NumExecutingPredecessors &&
+           ((NumExecutedPredecessors + NumExecutingPredecessors) ==
+            NumPredecessors);
+  }
+  bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
+  bool isExecuting() const {
+    return NumExecuting == NumInstructions - NumExecuted;
+  }
+  bool isExecuted() const { return NumInstructions == NumExecuted; }
+
+  void onGroupIssued(const InstRef &IR) {
+    assert(!isReady() && "Unexpected group-start event!");
+    NumExecutingPredecessors++;
+
+    unsigned Cycles = IR.getInstruction()->getCyclesLeft();
+    if (CriticalPredecessor.Cycles < Cycles) {
+      CriticalPredecessor.IID = IR.getSourceIndex();
+      CriticalPredecessor.Cycles = Cycles;
+    }
+  }
+
+  void onGroupExecuted() {
+    assert(!isReady() && "Inconsistent state found!");
+    NumExecutingPredecessors--;
+    NumExecutedPredecessors++;
+  }
+
+  void onInstructionIssued(const InstRef &IR) {
+    assert(!isExecuting() && "Invalid internal state!");
+    ++NumExecuting;
+
+    // update the CriticalMemDep.
+    const Instruction &IS = *IR.getInstruction();
+    if ((bool)CriticalMemoryInstruction) {
+      const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
+      if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
+        CriticalMemoryInstruction = IR;
+    } else {
+      CriticalMemoryInstruction = IR;
+    }
+
+    if (!isExecuting())
+      return;
+
+    // Notify successors that this group started execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupIssued(CriticalMemoryInstruction);
+  }
+
+  void onInstructionExecuted() {
+    assert(isReady() && !isExecuted() && "Invalid internal state!");
+    --NumExecuting;
+    ++NumExecuted;
+
+    if (!isExecuted())
+      return;
+
+    // Notify successors that this group has finished execution.
+    for (MemoryGroup *MG : Succ)
+      MG->onGroupExecuted();
+  }
+
+  void addInstruction() {
+    assert(!getNumSuccessors() && "Cannot add instructions to this group!");
+    ++NumInstructions;
+  }
+
+  void cycleEvent() {
+    if (CriticalPredecessor.Cycles)
+      CriticalPredecessor.Cycles--;
+  }
+};
+
 /// Abstract base interface for LS (load/store) units in llvm-mca.
 class LSUnitBase : public HardwareUnit {
   /// Load queue size.
@@ -43,6 +181,9 @@ class LSUnitBase : public HardwareUnit {
   /// llvm/Target/TargetSchedule.td).
   unsigned SQSize;
 
+  unsigned UsedLQEntries;
+  unsigned UsedSQEntries;
+
   /// True if loads don't alias with stores.
   ///
   /// By default, the LS unit assumes that loads and stores don't alias with
@@ -50,6 +191,10 @@ class LSUnitBase : public HardwareUnit {
   /// alias with stores.
   const bool NoAlias;
 
+  /// Used to map group identifiers to MemoryGroups.
+  DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
+  unsigned NextGroupID;
+
 public:
   LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
              unsigned StoreQueueSize, bool AssumeNoAlias);
@@ -62,6 +207,11 @@ class LSUnitBase : public HardwareUnit {
   /// Returns the total number of entries in the store queue.
   unsigned getStoreQueueSize() const { return SQSize; }
 
+  unsigned getUsedLQEntries() const { return UsedLQEntries; }
+  unsigned getUsedSQEntries() const { return UsedSQEntries; }
+  unsigned assignLQSlot() { return UsedLQEntries++; }
+  unsigned assignSQSlot() { return UsedSQEntries++; }
+
   bool assumeNoAlias() const { return NoAlias; }
 
   enum Status {
@@ -81,25 +231,71 @@ class LSUnitBase : public HardwareUnit {
   ///
   /// This method assumes that a previous call to `isAvailable(IR)` succeeded
   /// with a LSUnitBase::Status value of LSU_AVAILABLE.
-  virtual void dispatch(const InstRef &IR) = 0;
+  /// Returns the GroupID associated with this instruction. That value will be
+  /// used to set the LSUTokenID field in class Instruction.
+  virtual unsigned dispatch(const InstRef &IR) = 0;
+
+  bool isSQEmpty() const { return !UsedSQEntries; }
+  bool isLQEmpty() const { return !UsedLQEntries; }
+  bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
+  bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
+
+  bool isValidGroupID(unsigned Index) const {
+    return Index && (Groups.find(Index) != Groups.end());
+  }
 
   /// Check if a peviously dispatched instruction IR is now ready for execution.
-  ///
-  /// Instruction IR is assumed to be a memory operation. If IR is still waiting
-  /// on another memory instruction M, then M is returned to the caller. If IR
-  /// depends on more than one memory operations, then this method returns one
-  /// of them.
-  ///
-  /// Derived classes can implement memory consistency rules for simulated
-  /// processor within this member function.
-  virtual const InstRef &isReady(const InstRef &IR) const = 0;
+  bool isReady(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    assert(isValidGroupID(GroupID) &&
+           "Invalid group associated with this instruction!");
+    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    return Group.isReady();
+  }
+
+  /// Check if a previously dispatched instruction IR only depends on
+  /// instructions that are currently executing.
+  bool isPending(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    assert(isValidGroupID(GroupID) &&
+           "Invalid group associated with this instruction!");
+    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    return Group.isPending();
+  }
+
+  const MemoryGroup &getGroup(unsigned Index) const {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  MemoryGroup &getGroup(unsigned Index) {
+    assert(isValidGroupID(Index) && "Group doesn't exist!");
+    return *Groups.find(Index)->second;
+  }
+
+  unsigned createMemoryGroup() {
+    Groups.insert(std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+    return NextGroupID++;
+  }
+
+  // Instruction executed event handlers.
+  virtual void onInstructionExecuted(const InstRef &IR);
+
+  virtual void onInstructionIssued(const InstRef &IR) {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    Groups[GroupID]->onInstructionIssued(IR);
+  }
+
+  virtual void cycleEvent();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
 };
 
-/// A Load/Store Unit implementing a load and store queues.
+/// Default Load/Store Unit (LS Unit) for simulated processors.
 ///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
+/// Each load (or store) consumes one entry in the load (or store) queue.
 ///
 /// Rules are:
 /// 1) A younger load is allowed to pass an older load only if there are no
@@ -159,14 +355,6 @@ class LSUnitBase : public HardwareUnit {
 /// the load/store queue(s). That also means, all the older loads/stores have
 /// already been executed.
 class LSUnit : public LSUnitBase {
-  // When a `MayLoad` instruction is dispatched to the schedulers for execution,
-  // the LSUnit reserves an entry in the `LoadQueue` for it.
-  //
-  // LoadQueue keeps track of all the loads that are in-flight. A load
-  // instruction is eventually removed from the LoadQueue when it reaches
-  // completion stage. That means, a load leaves the queue whe it is 'executed',
-  // and its value can be forwarded on the data path to outside units.
-  //
   // This class doesn't know about the latency of a load instruction. So, it
   // conservatively/pessimistically assumes that the latency of a load opcode
   // matches the instruction latency.
@@ -197,30 +385,17 @@ class LSUnit : public LSUnitBase {
   // alternative approaches that let instructions specify the number of
   // load/store queue entries which they consume at dispatch stage (See
   // PR39830).
-  SmallSet<InstRef, 16> LoadQueue;
-  SmallSet<InstRef, 16> StoreQueue;
-
-  void assignLQSlot(const InstRef &IR);
-  void assignSQSlot(const InstRef &IR);
-
+  //
   // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a store barrier. It forces older store to be
   // executed before newer stores are issued.
-  SmallSet<InstRef, 8> StoreBarriers;
-
+  //
   // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
   // conservatively treated as a load barrier. It forces older loads to execute
   // before newer loads are issued.
-  SmallSet<InstRef, 8> LoadBarriers;
-
-  bool isSQEmpty() const { return StoreQueue.empty(); }
-  bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const {
-    return getStoreQueueSize() != 0 && StoreQueue.size() == getStoreQueueSize();
-  }
-  bool isLQFull() const {
-    return getLoadQueueSize() != 0 && LoadQueue.size() == getLoadQueueSize();
-  }
+  unsigned CurrentLoadGroupID;
+  unsigned CurrentLoadBarrierGroupID;
+  unsigned CurrentStoreGroupID;
 
 public:
   LSUnit(const MCSchedModel &SM)
@@ -228,11 +403,8 @@ class LSUnit : public LSUnitBase {
   LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
       : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
   LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
-      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias) {}
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
+      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
+        CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
 
   /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
   /// accomodate instruction IR.
@@ -242,9 +414,6 @@ class LSUnit : public LSUnitBase {
   ///
   /// This method assumes that a previous call to `isAvailable(IR)` succeeded
   /// returning LSU_AVAILABLE.
-  void dispatch(const InstRef &IR) override;
-
-  /// Check if a peviously dispatched instruction IR is now ready for execution.
   ///
   /// Rules are:
   /// By default, rules are:
@@ -254,19 +423,12 @@ class LSUnit : public LSUnitBase {
   /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
   /// 5. A load has to wait until an older load barrier is fully executed.
   /// 6. A store has to wait until an older store barrier is fully executed.
-  const InstRef &isReady(const InstRef &IR) const override;
+  unsigned dispatch(const InstRef &IR) override;
 
-  /// Instruction executed event handler.
-  ///
-  /// Load and store instructions are tracked by their corresponding queues from
-  /// dispatch until "instruction executed" event.
-  /// When a load instruction Ld reaches the 'Executed' stage, its value
-  /// is propagated to all the dependent users, and the LS unit stops tracking
-  /// Ld.
-  /// FIXME: For simplicity, we optimistically assume a similar behavior for
-  /// store instructions. In practice, store operations don't tend to leave the
-  /// store queue until they reach the 'Retired' stage (See PR39830).
-  void onInstructionExecuted(const InstRef &IR);
+  // FIXME: For simplicity, we optimistically assume a similar behavior for
+  // store instructions. In practice, store operations don't tend to leave the
+  // store queue until they reach the 'Retired' stage (See PR39830).
+  void onInstructionExecuted(const InstRef &IR) override;
 };
 
 } // namespace mca
diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
index 41d062be568d1..27beb842dfd25 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -191,7 +191,11 @@ class Scheduler : public HardwareUnit {
   /// Returns true if instruction IR is ready to be issued to the underlying
   /// pipelines. Note that this operation cannot fail; it assumes that a
   /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
-  bool dispatch(const InstRef &IR);
+  ///
+  /// If IR is a memory operation, then the Scheduler queries the LS unit to
+  /// obtain a LS token. An LS token is used internally to track memory
+  /// dependencies.
+  bool dispatch(InstRef &IR);
 
   /// Issue an instruction and populates a vector of used pipeline resources,
   /// and a vector of instructions that transitioned to the ready state as a
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 74be06e3c1c08..d4d3f22797f7e 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -458,6 +458,11 @@ class Instruction : public InstructionBase {
   // Retire Unit token ID for this instruction.
   unsigned RCUTokenID;
 
+  // LS token ID for this instruction.
+  // This field is set to the invalid null token if this is not a memory
+  // operation.
+  unsigned LSUTokenID;
+
   // Critical register dependency.
   CriticalDependency CriticalRegDep;
 
@@ -469,19 +474,18 @@ class Instruction : public InstructionBase {
   // cycle because of unavailable pipeline resources.
   uint64_t CriticalResourceMask;
 
-  // Used internally by the logic that computes the critical memory dependency.
-  const Instruction *CurrentMemDep;
-
   // True if this instruction has been optimized at register renaming stage.
   bool IsEliminated;
 
 public:
   Instruction(const InstrDesc &D)
       : InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
-        RCUTokenID(0), CriticalRegDep(), CriticalMemDep(),
-        CriticalResourceMask(0), CurrentMemDep(nullptr), IsEliminated(false) {}
+        RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
+        CriticalResourceMask(0), IsEliminated(false) {}
 
   unsigned getRCUTokenID() const { return RCUTokenID; }
+  unsigned getLSUTokenID() const { return LSUTokenID; }
+  void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
   int getCyclesLeft() const { return CyclesLeft; }
 
   // Transition to the dispatch stage, and assign a RCUToken to this
@@ -523,13 +527,9 @@ class Instruction : public InstructionBase {
   const CriticalDependency &getCriticalRegDep() const { return CriticalRegDep; }
   const CriticalDependency &getCriticalMemDep() const { return CriticalMemDep; }
   const CriticalDependency &computeCriticalRegDep();
-
-  void setCriticalMemDep(unsigned IID, unsigned Cycles) {
-    CriticalMemDep.IID = IID;
-    CriticalMemDep.Cycles = Cycles;
+  void setCriticalMemDep(const CriticalDependency &MemDep) {
+    CriticalMemDep = MemDep;
   }
-  const Instruction *getCurrentMemDep() const { return CurrentMemDep; }
-  void setCurrentMemDep(const Instruction *CMD) { CurrentMemDep = CMD; }
 
   uint64_t getCriticalResourceMask() const { return CriticalResourceMask; }
   void setCriticalResourceMask(uint64_t ResourceMask) {
diff --git a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
index c3866d6bba7a0..ac1a6a36547bc 100644
--- a/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -23,7 +23,8 @@ namespace mca {
 
 LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
                        bool AssumeNoAlias)
-    : LQSize(LQ), SQSize(SQ), NoAlias(AssumeNoAlias) {
+    : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0),
+      NoAlias(AssumeNoAlias), NextGroupID(1) {
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
     if (!LQSize && EPI.LoadQueueID) {
@@ -40,47 +41,113 @@ LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
 
 LSUnitBase::~LSUnitBase() {}
 
+void LSUnitBase::cycleEvent() {
+  for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
+    G.second->cycleEvent();
+}
+
 #ifndef NDEBUG
-void LSUnit::dump() const {
+void LSUnitBase::dump() const {
   dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
   dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
-  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
-  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n';
+  dbgs() << "\n";
+  for (const auto &GroupIt : Groups) {
+    const MemoryGroup &Group = *GroupIt.second;
+    dbgs() << "[LSUnit] Group (" << GroupIt.first << "): "
+           << "[ #Preds = " << Group.getNumPredecessors()
+           << ", #GIssued = " << Group.getNumExecutingPredecessors()
+           << ", #GExecuted = " << Group.getNumExecutedPredecessors()
+           << ", #Inst = " << Group.getNumInstructions()
+           << ", #IIssued = " << Group.getNumExecuting()
+           << ", #IExecuted = " << Group.getNumExecuted() << '\n';
+  }
 }
 #endif
 
-void LSUnit::assignLQSlot(const InstRef &IR) {
-  assert(!isLQFull() && "Load Queue is full!");
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << IR.getSourceIndex()
-                    << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(IR);
-}
-
-void LSUnit::assignSQSlot(const InstRef &IR) {
-  assert(!isSQFull() && "Store Queue is full!");
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << IR.getSourceIndex()
-                    << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(IR);
-}
-
-void LSUnit::dispatch(const InstRef &IR) {
+unsigned LSUnit::dispatch(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
   unsigned IsMemBarrier = Desc.HasSideEffects;
   assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
 
-  if (Desc.MayLoad) {
-    if (IsMemBarrier)
-      LoadBarriers.insert(IR);
-    assignLQSlot(IR);
-  }
+  if (Desc.MayLoad)
+    assignLQSlot();
+  if (Desc.MayStore)
+    assignSQSlot();
 
   if (Desc.MayStore) {
+    // Always create a new group for store operations.
+
+    // A store may not pass a previous store or store barrier.
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
+
+    // A store may not pass a previous load or load barrier.
+    unsigned ImmediateLoadDominator =
+        std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
+    if (ImmediateLoadDominator) {
+      MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
+                        << ") --> (" << NewGID << ")\n");
+      IDom.addSuccessor(&NewGroup);
+    }
+    if (CurrentStoreGroupID) {
+      MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StoreGroup.addSuccessor(&NewGroup);
+    }
+
+    CurrentStoreGroupID = NewGID;
+    if (Desc.MayLoad) {
+      CurrentLoadGroupID = NewGID;
+      if (IsMemBarrier)
+        CurrentLoadBarrierGroupID = NewGID;
+    }
+
+    return NewGID;
+  }
+
+  assert(Desc.MayLoad && "Expected a load!");
+
+  // Always create a new memory group if this is the first load of the sequence.
+
+  // A load may not pass a previous store unless flag 'NoAlias' is set.
+  // A load may pass a previous load.
+  // A younger load cannot pass a older load barrier.
+  // A load barrier cannot pass a older load.
+  bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
+                               CurrentLoadGroupID <= CurrentStoreGroupID ||
+                               CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
+  if (ShouldCreateANewGroup) {
+    unsigned NewGID = createMemoryGroup();
+    MemoryGroup &NewGroup = getGroup(NewGID);
+    NewGroup.addInstruction();
+
+    if (!assumeNoAlias() && CurrentStoreGroupID) {
+      MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+                        << ") --> (" << NewGID << ")\n");
+      StGroup.addSuccessor(&NewGroup);
+    }
+    if (CurrentLoadBarrierGroupID) {
+      MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
+      LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
+                        << ") --> (" << NewGID << ")\n");
+      LdGroup.addSuccessor(&NewGroup);
+    }
+
+    CurrentLoadGroupID = NewGID;
     if (IsMemBarrier)
-      StoreBarriers.insert(IR);
-    assignSQSlot(IR);
+      CurrentLoadBarrierGroupID = NewGID;
+    return NewGID;
   }
+
+  MemoryGroup &Group = getGroup(CurrentLoadGroupID);
+  Group.addInstruction();
+  return CurrentLoadGroupID;
 }
 
 LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
@@ -92,106 +159,46 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
   return LSUnit::LSU_AVAILABLE;
 }
 
-const InstRef &LSUnit::isReady(const InstRef &IR) const {
+void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
   bool IsALoad = Desc.MayLoad;
   bool IsAStore = Desc.MayStore;
-  assert((IsALoad || IsAStore) && "Not a memory operation!");
-
-  if (IsALoad && !LoadBarriers.empty()) {
-    const InstRef &LoadBarrier = *LoadBarriers.begin();
-    // A younger load cannot pass a older load barrier.
-    if (Index > LoadBarrier.getSourceIndex())
-      return LoadBarrier;
-    // A load barrier cannot pass a older load.
-    if (Index == LoadBarrier.getSourceIndex()) {
-      const InstRef &Load = *LoadQueue.begin();
-      if (Index != Load.getSourceIndex())
-        return Load;
-    }
-  }
+  assert((IsALoad || IsAStore) && "Expected a memory operation!");
 
-  if (IsAStore && !StoreBarriers.empty()) {
-    const InstRef &StoreBarrier = *StoreBarriers.begin();
-    // A younger store cannot pass a older store barrier.
-    if (Index > StoreBarrier.getSourceIndex())
-      return StoreBarrier;
-    // A store barrier cannot pass a older store.
-    if (Index == StoreBarrier.getSourceIndex()) {
-      const InstRef &Store = *StoreQueue.begin();
-      if (Index != Store.getSourceIndex())
-        return Store;
-    }
-  }
-
-  // A load may not pass a previous store unless flag 'NoAlias' is set.
-  // A load may pass a previous load.
-  if (assumeNoAlias() && IsALoad)
-    return IR;
-
-  if (StoreQueue.size()) {
-    // A load may not pass a previous store.
-    // A store may not pass a previous store.
-    const InstRef &Store = *StoreQueue.begin();
-    if (Index > Store.getSourceIndex())
-      return Store;
+  unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+  auto It = Groups.find(GroupID);
+  It->second->onInstructionExecuted();
+  if (It->second->isExecuted()) {
+    Groups.erase(It);
   }
 
-  // Okay, we are older than the oldest store in the queue.
-  if (isLQEmpty())
-    return IR;
-
-  // Check if there are no older loads.
-  const InstRef &Load = *LoadQueue.begin();
-  if (Index <= Load.getSourceIndex())
-    return IR;
-
-  // A load may pass a previous load.
-  if (IsALoad)
-    return IR;
-
-  // A store may not pass a previous load.
-  return Load;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
-  bool IsALoad = Desc.MayLoad;
-  bool IsAStore = Desc.MayStore;
-
   if (IsALoad) {
-    if (LoadQueue.erase(IR)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the load queue.\n");
-    }
-    if (!LoadBarriers.empty()) {
-      const InstRef &LoadBarrier = *LoadBarriers.begin();
-      if (Index == LoadBarrier.getSourceIndex()) {
-        LLVM_DEBUG(
-            dbgs() << "[LSUnit]: Instruction idx=" << Index
-                   << " has been removed from the set of load barriers.\n");
-        LoadBarriers.erase(IR);
-      }
-    }
+    UsedLQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the load queue.\n");
   }
 
   if (IsAStore) {
-    if (StoreQueue.erase(IR)) {
-      LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                        << " has been removed from the store queue.\n");
-    }
+    UsedSQEntries--;
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+                      << " has been removed from the store queue.\n");
+  }
+}
 
-    if (!StoreBarriers.empty()) {
-      const InstRef &StoreBarrier = *StoreBarriers.begin();
-      if (Index == StoreBarrier.getSourceIndex()) {
-        LLVM_DEBUG(
-            dbgs() << "[LSUnit]: Instruction idx=" << Index
-                   << " has been removed from the set of store barriers.\n");
-        StoreBarriers.erase(IR);
-      }
-    }
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const Instruction &IS = *IR.getInstruction();
+  if (!IS.isMemOp())
+    return;
+
+  LSUnitBase::onInstructionExecuted(IR);
+  unsigned GroupID = IS.getLSUTokenID();
+  if (!isValidGroupID(GroupID)) {
+    if (GroupID == CurrentLoadGroupID)
+      CurrentLoadGroupID = 0;
+    if (GroupID == CurrentStoreGroupID)
+      CurrentStoreGroupID = 0;
+    if (GroupID == CurrentLoadBarrierGroupID)
+      CurrentLoadBarrierGroupID = 0;
   }
 }
 
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index 6b3448fbe82cd..3afc0ac89ef02 100644
--- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -84,6 +84,12 @@ void Scheduler::issueInstructionImpl(
 
   IS->computeCriticalRegDep();
 
+  if (IS->isMemOp()) {
+    LSU.onInstructionIssued(IR);
+    const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID());
+    IS->setCriticalMemDep(Group.getCriticalPredecessor());
+  }
+
   if (IS->isExecuting())
     IssuedSet.emplace_back(IR);
   else if (IS->isExecuted())
@@ -115,59 +121,6 @@ void Scheduler::issueInstruction(
     promoteToReadySet(ReadyInstructions);
 }
 
-static bool initializeCriticalMemDepInfo(InstRef &IR, const LSUnit &LSU) {
-  Instruction &IS = *IR.getInstruction();
-  assert(IS.isMemOp() && "Not a memory operation!");
-
-  // Check if this instruction depends on another memory operation.
-  InstRef DependentMemOp = LSU.isReady(IR);
-  const Instruction *MemOp = DependentMemOp.getInstruction();
-  IS.setCurrentMemDep(MemOp);
-
-  // Initialize the CriticalMemDep structure.
-  unsigned Cycles = 0;
-  if (MemOp->isExecuting())
-    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
-  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
-  return IR.getSourceIndex() == DependentMemOp.getSourceIndex();
-}
-
-static bool updateMemoryDependencyInfo(InstRef &IR, const LSUnit &LSU) {
-  Instruction &IS = *IR.getInstruction();
-  assert(IS.isMemOp() && "Not a memory operation!");
-
-  const Instruction *MemOp = IS.getCurrentMemDep();
-  if (!MemOp && initializeCriticalMemDepInfo(IR, LSU))
-    return true;
-
-  MemOp = IS.getCurrentMemDep();
-  if (MemOp == IR.getInstruction())
-    return true;
-
-  const CriticalDependency &CMD = IS.getCriticalMemDep();
-  if (MemOp->isExecuting() && !CMD.Cycles) {
-    // Update the critical memory dependency info.
-    IS.setCriticalMemDep(CMD.IID, MemOp->getCyclesLeft());
-    return false;
-  }
-
-  if (!MemOp->isExecuted() && !MemOp->isRetired())
-    return false;
-
-  // Check if there are still unsolved memory dependencies.
-  InstRef DependentMemOp = LSU.isReady(IR);
-  MemOp = DependentMemOp.getInstruction();
-  IS.setCurrentMemDep(MemOp);
-  if (DependentMemOp == IR)
-    return true;
-
-  unsigned Cycles = 0;
-  if (MemOp->isExecuting())
-    Cycles = static_cast<unsigned>(MemOp->getCyclesLeft());
-  IS.setCriticalMemDep(DependentMemOp.getSourceIndex(), Cycles);
-  return false;
-}
-
 bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
   // Scan the set of waiting instructions and promote them to the
   // ready set if operands are all ready.
@@ -179,7 +132,7 @@ bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
 
     // Check if there are unsolved memory dependencies.
     Instruction &IS = *IR.getInstruction();
-    if (IS.isMemOp() && !updateMemoryDependencyInfo(IR, LSU)) {
+    if (IS.isMemOp() && !LSU.isReady(IR)) {
       ++I;
       continue;
     }
@@ -298,14 +251,16 @@ void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
     if (Resources->checkAvailability(IS.getDesc()))
       continue;
 
-    const CriticalDependency &CMD = IS.getCriticalMemDep();
-    if (IS.isMemOp() && IS.getCurrentMemDep() != &IS && !CMD.Cycles)
-      continue;
+    if (IS.isMemOp()) {
+      const MemoryGroup &Group = LSU.getGroup(IS.getLSUTokenID());
+      if (Group.isWaiting())
+        continue;
+      if (Group.isPending())
+        MemDeps.emplace_back(IR);
+    }
 
     if (IS.isPending())
       RegDeps.emplace_back(IR);
-    if (CMD.Cycles)
-      MemDeps.emplace_back(IR);
   }
 }
 
@@ -313,6 +268,8 @@ void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
                            SmallVectorImpl<InstRef> &Executed,
                            SmallVectorImpl<InstRef> &Pending,
                            SmallVectorImpl<InstRef> &Ready) {
+  LSU.cycleEvent();
+
   // Release consumed resources.
   Resources->cycleEvent(Freed);
 
@@ -343,14 +300,14 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
   return Desc.MustIssueImmediately;
 }
 
-bool Scheduler::dispatch(const InstRef &IR) {
-  const Instruction &IS = *IR.getInstruction();
+bool Scheduler::dispatch(InstRef &IR) {
+  Instruction &IS = *IR.getInstruction();
   const InstrDesc &Desc = IS.getDesc();
   Resources->reserveBuffers(Desc.Buffers);
 
   // If necessary, reserve queue entries in the load-store unit (LSU).
   if (IS.isMemOp())
-    LSU.dispatch(IR);
+    IS.setLSUTokenID(LSU.dispatch(IR));
 
   if (IS.isPending()) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
@@ -360,9 +317,9 @@ bool Scheduler::dispatch(const InstRef &IR) {
     return false;
   }
 
-  // Memory operations that are not in a ready state are initially assigned to
-  // the WaitSet.
-  if (!IS.isReady() || (IS.isMemOp() && LSU.isReady(IR) != IR)) {
+  // Memory operations that still have unsolved memory dependencies are
+  // initially dispatched to the WaitSet.
+  if (!IS.isReady() || (IS.isMemOp() && !LSU.isReady(IR))) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
     WaitSet.push_back(IR);
     return false;

From 78c5fa97eb534c790b5ea57510b0f74eb408a53e Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Wed, 29 May 2019 12:03:41 +0000
Subject: [PATCH 0479/1176] [clangd] Another improvement for std include
 mapping.

Summary:
Improve the way of checking a symbol name is in the first cell. The previous way
is not very robost for cases where a cell lists multiple symbols (e.g. int8_t).

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62575

llvm-svn: 361951
---
 clang-tools-extra/clangd/StdSymbolMap.inc     | 25 ++++++++++++
 .../clangd/include-mapping/gen_std.py         |  7 ++--
 .../clangd/include-mapping/test.py            | 38 ++++++++++++++++++-
 3 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clangd/StdSymbolMap.inc b/clang-tools-extra/clangd/StdSymbolMap.inc
index 85fd474eed4b5..3ec94860ae792 100644
--- a/clang-tools-extra/clangd/StdSymbolMap.inc
+++ b/clang-tools-extra/clangd/StdSymbolMap.inc
@@ -454,11 +454,24 @@ SYMBOL(includes, std::, <algorithm>)
 SYMBOL(inclusive_scan, std::, <numeric>)
 SYMBOL(independent_bits_engine, std::, <random>)
 SYMBOL(indirect_array, std::, <valarray>)
+SYMBOL(initializer_list, std::, <initializer_list>)
 SYMBOL(inner_product, std::, <numeric>)
 SYMBOL(inplace_merge, std::, <algorithm>)
 SYMBOL(input_iterator_tag, std::, <iterator>)
 SYMBOL(insert_iterator, std::, <iterator>)
 SYMBOL(inserter, std::, <iterator>)
+SYMBOL(int16_t, std::, <cstdint>)
+SYMBOL(int32_t, std::, <cstdint>)
+SYMBOL(int64_t, std::, <cstdint>)
+SYMBOL(int8_t, std::, <cstdint>)
+SYMBOL(int_fast16_t, std::, <cstdint>)
+SYMBOL(int_fast32_t, std::, <cstdint>)
+SYMBOL(int_fast64_t, std::, <cstdint>)
+SYMBOL(int_fast8_t, std::, <cstdint>)
+SYMBOL(int_least16_t, std::, <cstdint>)
+SYMBOL(int_least32_t, std::, <cstdint>)
+SYMBOL(int_least64_t, std::, <cstdint>)
+SYMBOL(int_least8_t, std::, <cstdint>)
 SYMBOL(integer_sequence, std::, <utility>)
 SYMBOL(integral_constant, std::, <type_traits>)
 SYMBOL(internal, std::, <ios>)
@@ -1150,6 +1163,18 @@ SYMBOL(u16string_view, std::, <string_view>)
 SYMBOL(u32streampos, std::, <ios>)
 SYMBOL(u32string, std::, <string>)
 SYMBOL(u32string_view, std::, <string_view>)
+SYMBOL(uint16_t, std::, <cstdint>)
+SYMBOL(uint32_t, std::, <cstdint>)
+SYMBOL(uint64_t, std::, <cstdint>)
+SYMBOL(uint8_t, std::, <cstdint>)
+SYMBOL(uint_fast16_t, std::, <cstdint>)
+SYMBOL(uint_fast32_t, std::, <cstdint>)
+SYMBOL(uint_fast64_t, std::, <cstdint>)
+SYMBOL(uint_fast8_t, std::, <cstdint>)
+SYMBOL(uint_least16_t, std::, <cstdint>)
+SYMBOL(uint_least32_t, std::, <cstdint>)
+SYMBOL(uint_least64_t, std::, <cstdint>)
+SYMBOL(uint_least8_t, std::, <cstdint>)
 SYMBOL(uintmax_t, std::, <cstdint>)
 SYMBOL(uintptr_t, std::, <cstdint>)
 SYMBOL(uncaught_exceptions, std::, <exception>)
diff --git a/clang-tools-extra/clangd/include-mapping/gen_std.py b/clang-tools-extra/clangd/include-mapping/gen_std.py
index bfa6d0d49224b..c5824a0c42efa 100755
--- a/clang-tools-extra/clangd/include-mapping/gen_std.py
+++ b/clang-tools-extra/clangd/include-mapping/gen_std.py
@@ -84,10 +84,9 @@ def ParseSymbolPage(symbol_page_html, symbol_name):
     for row in table.select('tr'):
       if HasClass(row, 't-dcl', 't-dsc'):
         was_decl = True
-        # Declaration is in the first cell.
-        text = row.find('td').text
-        # Decl may not be for the symbol name we're looking for.
-        if not re.search("\\b%s\\b" % symbol_name, text):
+        # Symbols are in the first cell.
+        found_symbols = row.find('td').stripped_strings
+        if not symbol_name in found_symbols:
           continue
         headers.update(current_headers)
       elif HasClass(row, 't-dsc-header'):
diff --git a/clang-tools-extra/clangd/include-mapping/test.py b/clang-tools-extra/clangd/include-mapping/test.py
index 107257698d35b..3f17b53189c11 100755
--- a/clang-tools-extra/clangd/include-mapping/test.py
+++ b/clang-tools-extra/clangd/include-mapping/test.py
@@ -85,7 +85,11 @@ def testParseSymbolPage_MulHeaders(self):
     <td></td>
   </tr>
   <tr class="t-dcl">
-    <td>void foo()</td>
+    <td>
+      <span>void</span>
+      foo
+      <span>()</span>
+    </td>
     <td>this is matched</td>
   </tr>
 </tbody></table>
@@ -108,7 +112,11 @@ def testParseSymbolPage_MulHeadersInSameDiv(self):
 <td></td>
 </tr>
 <tr class="t-dcl">
-  <td>void foo()</td>
+  <td>
+    <span>void</span>
+    foo
+    <span>()</span>
+  </td>
   <td>this is matched</td>
 </tr>
 </tbody></table>
@@ -116,6 +124,32 @@ def testParseSymbolPage_MulHeadersInSameDiv(self):
     self.assertEqual(ParseSymbolPage(html, "foo"),
                      set(['<algorithm>', '<utility>']))
 
+  def testParseSymbolPage_MulSymbolsInSameTd(self):
+    # defined in header <cstdint>
+    #   int8_t
+    #   int16_t
+    html = """
+<table class="t-dcl-begin"><tbody>
+<tr class="t-dsc-header">
+<td><div>
+     Defined in header <code><a href="cstdint.html" title="cstdint">&lt;cstdint&gt;</a></code><br>
+</div></td>
+<td></td>
+</tr>
+<tr class="t-dcl">
+  <td>
+    <span>int8_t</span>
+    <span>int16_t</span>
+  </td>
+  <td>this is matched</td>
+</tr>
+</tbody></table>
+"""
+    self.assertEqual(ParseSymbolPage(html, "int8_t"),
+                     set(['<cstdint>']))
+    self.assertEqual(ParseSymbolPage(html, "int16_t"),
+                     set(['<cstdint>']))
+
 
 if __name__ == '__main__':
   unittest.main()

From 4c5a0d1683e1bc5d0e12de8806b37ffbdc8c5904 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Wed, 29 May 2019 12:08:11 +0000
Subject: [PATCH 0480/1176] [clangd] Remove the whitelist std symbols in
 CanonicalIncludes.

Summary: These symbols have been included via StdSymbolMap.inc.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62527

llvm-svn: 361952
---
 .../clangd/index/CanonicalIncludes.cpp          | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index 25bbffb13dcd9..4b045944f25c0 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -87,31 +87,14 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
 }
 
 void addSystemHeadersMapping(CanonicalIncludes *Includes) {
-  static const std::vector<std::pair<const char *, const char *>> SymbolMap = {
-      // Map symbols in <iosfwd> to their preferred includes.
-      {"std::basic_filebuf", "<fstream>"},
-      {"std::filebuf", "<fstream>"},
-      {"std::wfilebuf", "<fstream>"},
-      {"std::basic_istream", "<istream>"},
-      {"std::istream", "<istream>"},
-      {"std::wistream", "<istream>"},
-      {"std::basic_ostream", "<ostream>"},
-      {"std::ostream", "<ostream>"},
-      {"std::wostream", "<ostream>"},
-      {"std::uint_least16_t", "<cstdint>"}, // <type_traits> redeclares these
-      {"std::uint_least32_t", "<cstdint>"},
 #define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header },
       #include "StdSymbolMap.inc"
 #undef SYMBOL
   };
-  for (const auto &Pair : SymbolMap)
-    Includes->addSymbolMapping(Pair.first, Pair.second);
 
   // FIXME: remove the std header mapping once we support ambiguous symbols, now
   // it serves as a fallback to disambiguate:
   //   - symbols with mulitiple headers (e.g. std::move)
-  //   - symbols with a primary template in one header and a specialization in
-  //     another (std::abs)
   static const std::vector<std::pair<const char *, const char *>>
       SystemHeaderMap = {
           {"include/__stddef_max_align_t.h", "<cstddef>"},

From f4fc01f8dd3a5dfd2060d1ad0df6b90e8351ddf7 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter@arm.com>
Date: Wed, 29 May 2019 12:22:54 +0000
Subject: [PATCH 0481/1176] [SVE][IR] Scalable Vector IR Type

* Adds a 'scalable' flag to VectorType
* Adds an 'ElementCount' class to VectorType to pass (possibly scalable) vector lengths, with overloaded operators.
* Modifies existing helper functions to use ElementCount
* Adds support for serializing/deserializing to/from both textual and bitcode IR formats
* Extends the verifier to reject global variables of scalable types
* Updates documentation

See the latest version of the RFC here: http://lists.llvm.org/pipermail/llvm-dev/2018-July/124396.html

Reviewers: rengolin, lattner, echristo, chandlerc, hfinkel, rkruppe, samparker, SjoerdMeijer, greened, sebpop

Reviewed By: hfinkel, sebpop

Differential Revision: https://reviews.llvm.org/D32530

llvm-svn: 361953
---
 llvm/docs/LangRef.rst                      |  54 +++++--
 llvm/include/llvm/ADT/DenseMapInfo.h       |  16 ++
 llvm/include/llvm/IR/DerivedTypes.h        |  68 +++++++--
 llvm/include/llvm/IR/Type.h                |   1 +
 llvm/include/llvm/Support/ScalableSize.h   |  43 ++++++
 llvm/lib/AsmParser/LLLexer.cpp             |   1 +
 llvm/lib/AsmParser/LLParser.cpp            |  13 +-
 llvm/lib/AsmParser/LLToken.h               |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp  |   6 +-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp  |   5 +-
 llvm/lib/IR/AsmWriter.cpp                  |   5 +-
 llvm/lib/IR/LLVMContextImpl.h              |   2 +-
 llvm/lib/IR/Type.cpp                       |  13 +-
 llvm/lib/IR/Verifier.cpp                   |  66 +++++++++
 llvm/test/Bitcode/compatibility.ll         |   4 +
 llvm/test/Verifier/scalable-aggregates.ll  |  31 ++++
 llvm/test/Verifier/scalable-global-vars.ll |  24 +++
 llvm/unittests/IR/CMakeLists.txt           |   1 +
 llvm/unittests/IR/VectorTypesTest.cpp      | 164 +++++++++++++++++++++
 19 files changed, 479 insertions(+), 39 deletions(-)
 create mode 100644 llvm/include/llvm/Support/ScalableSize.h
 create mode 100644 llvm/test/Verifier/scalable-aggregates.ll
 create mode 100644 llvm/test/Verifier/scalable-global-vars.ll
 create mode 100644 llvm/unittests/IR/VectorTypesTest.cpp

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 43f27da1afcd2..840272df33fd6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -675,6 +675,9 @@ an optional list of attached :ref:`metadata <metadata>`.
 Variables and aliases can have a
 :ref:`Thread Local Storage Model <tls_model>`.
 
+:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
+structs or arrays because their size is unknown at compile time.
+
 Syntax::
 
       @<GlobalVarName> = [Linkage] [PreemptionSpecifier] [Visibility]
@@ -2730,30 +2733,40 @@ Vector Type
 A vector type is a simple derived type that represents a vector of
 elements. Vector types are used when multiple primitive data are
 operated in parallel using a single instruction (SIMD). A vector type
-requires a size (number of elements) and an underlying primitive data
-type. Vector types are considered :ref:`first class <t_firstclass>`.
+requires a size (number of elements), an underlying primitive data type,
+and a scalable property to represent vectors where the exact hardware
+vector length is unknown at compile time. Vector types are considered
+:ref:`first class <t_firstclass>`.
 
 :Syntax:
 
 ::
 
-      < <# elements> x <elementtype> >
+      < <# elements> x <elementtype> >          ; Fixed-length vector
+      < vscale x <# elements> x <elementtype> > ; Scalable vector
 
 The number of elements is a constant integer value larger than 0;
 elementtype may be any integer, floating-point or pointer type. Vectors
-of size zero are not allowed.
+of size zero are not allowed. For scalable vectors, the total number of
+elements is a constant multiple (called vscale) of the specified number
+of elements; vscale is a positive integer that is unknown at compile time
+and the same hardware-dependent constant for all scalable vectors at run
+time. The size of a specific scalable vector type is thus constant within
+IR, even if the exact size in bytes cannot be determined until run time.
 
 :Examples:
 
-+-------------------+--------------------------------------------------+
-| ``<4 x i32>``     | Vector of 4 32-bit integer values.               |
-+-------------------+--------------------------------------------------+
-| ``<8 x float>``   | Vector of 8 32-bit floating-point values.        |
-+-------------------+--------------------------------------------------+
-| ``<2 x i64>``     | Vector of 2 64-bit integer values.               |
-+-------------------+--------------------------------------------------+
-| ``<4 x i64*>``    | Vector of 4 pointers to 64-bit integer values.   |
-+-------------------+--------------------------------------------------+
++------------------------+----------------------------------------------------+
+| ``<4 x i32>``          | Vector of 4 32-bit integer values.                 |
++------------------------+----------------------------------------------------+
+| ``<8 x float>``        | Vector of 8 32-bit floating-point values.          |
++------------------------+----------------------------------------------------+
+| ``<2 x i64>``          | Vector of 2 64-bit integer values.                 |
++------------------------+----------------------------------------------------+
+| ``<4 x i64*>``         | Vector of 4 pointers to 64-bit integer values.     |
++------------------------+----------------------------------------------------+
+| ``<vscale x 4 x i32>`` | Vector with a multiple of 4 32-bit integer values. |
++------------------------+----------------------------------------------------+
 
 .. _t_label:
 
@@ -8135,6 +8148,7 @@ Syntax:
 ::
 
       <result> = extractelement <n x <ty>> <val>, <ty2> <idx>  ; yields <ty>
+      <result> = extractelement <vscale x n x <ty>> <val>, <ty2> <idx> ; yields <ty>
 
 Overview:
 """""""""
@@ -8155,7 +8169,9 @@ Semantics:
 
 The result is a scalar of the same type as the element type of ``val``.
 Its value is the value at position ``idx`` of ``val``. If ``idx``
-exceeds the length of ``val``, the result is a
+exceeds the length of ``val`` for a fixed-length vector, the result is a
+:ref:`poison value <poisonvalues>`. For a scalable vector, if the value
+of ``idx`` exceeds the runtime length of the vector, the result is a
 :ref:`poison value <poisonvalues>`.
 
 Example:
@@ -8176,6 +8192,7 @@ Syntax:
 ::
 
       <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>    ; yields <n x <ty>>
+      <result> = insertelement <vscale x n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <vscale x n x <ty>>
 
 Overview:
 """""""""
@@ -8197,7 +8214,9 @@ Semantics:
 
 The result is a vector of the same type as ``val``. Its element values
 are those of ``val`` except at position ``idx``, where it gets the value
-``elt``. If ``idx`` exceeds the length of ``val``, the result
+``elt``. If ``idx`` exceeds the length of ``val`` for a fixed-length vector,
+the result is a :ref:`poison value <poisonvalues>`. For a scalable vector,
+if the value of ``idx`` exceeds the runtime length of the vector, the result
 is a :ref:`poison value <poisonvalues>`.
 
 Example:
@@ -8218,6 +8237,7 @@ Syntax:
 ::
 
       <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
+      <result> = shufflevector <vscale x n x <ty>> <v1>, <vscale x n x <ty>> v2, <vscale x m x i32> <mask>  ; yields <vscale x m x <ty>>
 
 Overview:
 """""""""
@@ -8249,6 +8269,10 @@ undef. If any element of the mask operand is undef, that element of the
 result is undef. If the shuffle mask selects an undef element from one
 of the input vectors, the resulting element is undef.
 
+For scalable vectors, the only valid mask values at present are
+``zeroinitializer`` and ``undef``, since we cannot write all indices as
+literals for a vector with a length unknown at compile time.
+
 Example:
 """"""""
 
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index 18d6dffa9a52e..5ef6f3ad1b046 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/ScalableSize.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -268,6 +269,21 @@ template <> struct DenseMapInfo<hash_code> {
   static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
 };
 
+template <> struct DenseMapInfo<ElementCount> {
+  static inline ElementCount getEmptyKey() { return {~0U, true}; }
+  static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; }
+  static unsigned getHashValue(const ElementCount& EltCnt) {
+    if (EltCnt.Scalable)
+      return (EltCnt.Min * 37U) - 1U;
+
+    return EltCnt.Min * 37U;
+  }
+
+  static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) {
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 5bf37294bb2ec..42699a0dcd3bf 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -23,6 +23,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ScalableSize.h"
 #include <cassert>
 #include <cstdint>
 
@@ -387,6 +388,8 @@ class SequentialType : public CompositeType {
   SequentialType(const SequentialType &) = delete;
   SequentialType &operator=(const SequentialType &) = delete;
 
+  /// For scalable vectors, this will return the minimum number of elements
+  /// in the vector.
   uint64_t getNumElements() const { return NumElements; }
   Type *getElementType() const { return ContainedType; }
 
@@ -422,14 +425,37 @@ uint64_t Type::getArrayNumElements() const {
 
 /// Class to represent vector types.
 class VectorType : public SequentialType {
-  VectorType(Type *ElType, unsigned NumEl);
+  /// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the
+  /// minimum number of elements of type Ty contained within the vector, and
+  /// 'scalable' indicates that the total element count is an integer multiple
+  /// of 'n', where the multiple is either guaranteed to be one, or is
+  /// statically unknown at compile time.
+  ///
+  /// If the multiple is known to be 1, then the extra term is discarded in
+  /// textual IR:
+  ///
+  /// <4 x i32>          - a vector containing 4 i32s
+  /// <vscale x 4 x i32> - a vector containing an unknown integer multiple
+  ///                      of 4 i32s
+
+  VectorType(Type *ElType, unsigned NumEl, bool Scalable = false);
+  VectorType(Type *ElType, ElementCount EC);
+
+  // If true, the total number of elements is an unknown multiple of the
+  // minimum 'NumElements' from SequentialType. Otherwise the total number
+  // of elements is exactly equal to 'NumElements'.
+  bool Scalable;
 
 public:
   VectorType(const VectorType &) = delete;
   VectorType &operator=(const VectorType &) = delete;
 
   /// This static method is the primary way to construct an VectorType.
-  static VectorType *get(Type *ElementType, unsigned NumElements);
+  static VectorType *get(Type *ElementType, ElementCount EC);
+  static VectorType *get(Type *ElementType, unsigned NumElements,
+                         bool Scalable = false) {
+    return VectorType::get(ElementType, {NumElements, Scalable});
+  }
 
   /// This static method gets a VectorType with the same number of elements as
   /// the input type, and the element type is an integer type of the same width
@@ -438,7 +464,7 @@ class VectorType : public SequentialType {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     assert(EltBits && "Element size must be of a non-zero size");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method is like getInteger except that the element types are
@@ -446,7 +472,7 @@ class VectorType : public SequentialType {
   static VectorType *getExtendedElementVectorType(VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method is like getInteger except that the element types are
@@ -456,29 +482,45 @@ class VectorType : public SequentialType {
     assert((EltBits & 1) == 0 &&
            "Cannot truncate vector element with odd bit-width");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
-    return VectorType::get(EltTy, VTy->getNumElements());
+    return VectorType::get(EltTy, VTy->getElementCount());
   }
 
   /// This static method returns a VectorType with half as many elements as the
   /// input type and the same element type.
   static VectorType *getHalfElementsVectorType(VectorType *VTy) {
-    unsigned NumElts = VTy->getNumElements();
-    assert ((NumElts & 1) == 0 &&
+    auto EltCnt = VTy->getElementCount();
+    assert ((EltCnt.Min & 1) == 0 &&
             "Cannot halve vector with odd number of elements.");
-    return VectorType::get(VTy->getElementType(), NumElts/2);
+    return VectorType::get(VTy->getElementType(), EltCnt/2);
   }
 
   /// This static method returns a VectorType with twice as many elements as the
   /// input type and the same element type.
   static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
-    unsigned NumElts = VTy->getNumElements();
-    return VectorType::get(VTy->getElementType(), NumElts*2);
+    auto EltCnt = VTy->getElementCount();
+    assert((VTy->getNumElements() * 2ull) <= UINT_MAX &&
+           "Too many elements in vector");
+    return VectorType::get(VTy->getElementType(), EltCnt*2);
   }
 
   /// Return true if the specified type is valid as a element type.
   static bool isValidElementType(Type *ElemTy);
 
-  /// Return the number of bits in the Vector type.
+  /// Return an ElementCount instance to represent the (possibly scalable)
+  /// number of elements in the vector.
+  ElementCount getElementCount() const {
+    uint64_t MinimumEltCnt = getNumElements();
+    assert(MinimumEltCnt <= UINT_MAX && "Too many elements in vector");
+    return { (unsigned)MinimumEltCnt, Scalable };
+  }
+
+  /// Returns whether or not this is a scalable vector (meaning the total
+  /// element count is a multiple of the minimum).
+  bool isScalable() const {
+    return Scalable;
+  }
+
+  /// Return the minimum number of bits in the Vector type.
   /// Returns zero when the vector is a vector of pointers.
   unsigned getBitWidth() const {
     return getNumElements() * getElementType()->getPrimitiveSizeInBits();
@@ -494,6 +536,10 @@ unsigned Type::getVectorNumElements() const {
   return cast<VectorType>(this)->getNumElements();
 }
 
+bool Type::getVectorIsScalable() const {
+  return cast<VectorType>(this)->isScalable();
+}
+
 /// Class to represent pointers.
 class PointerType : public Type {
   explicit PointerType(Type *ElType, unsigned AddrSpace);
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 5f9f44e815237..f2aa49030aaae 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -366,6 +366,7 @@ class Type {
     return ContainedTys[0];
   }
 
+  inline bool getVectorIsScalable() const;
   inline unsigned getVectorNumElements() const;
   Type *getVectorElementType() const {
     assert(getTypeID() == VectorTyID);
diff --git a/llvm/include/llvm/Support/ScalableSize.h b/llvm/include/llvm/Support/ScalableSize.h
new file mode 100644
index 0000000000000..96bf043773a0b
--- /dev/null
+++ b/llvm/include/llvm/Support/ScalableSize.h
@@ -0,0 +1,43 @@
+//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a struct that can be used to query the size of IR types
+// which may be scalable vectors. It provides convenience operators so that
+// it can be used in much the same way as a single scalar value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SCALABLESIZE_H
+#define LLVM_SUPPORT_SCALABLESIZE_H
+
+namespace llvm {
+
+class ElementCount {
+public:
+  unsigned Min;  // Minimum number of vector elements.
+  bool Scalable; // If true, NumElements is a multiple of 'Min' determined
+                 // at runtime rather than compile time.
+
+  ElementCount(unsigned Min, bool Scalable)
+  : Min(Min), Scalable(Scalable) {}
+
+  ElementCount operator*(unsigned RHS) {
+    return { Min * RHS, Scalable };
+  }
+  ElementCount operator/(unsigned RHS) {
+    return { Min / RHS, Scalable };
+  }
+
+  bool operator==(const ElementCount& RHS) const {
+    return Min == RHS.Min && Scalable == RHS.Scalable;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_SCALABLESIZE_H
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index dc8ff7f131505..a9099711be4cc 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -707,6 +707,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
   KEYWORD(umin);
 
+  KEYWORD(vscale);
   KEYWORD(x);
   KEYWORD(blockaddress);
 
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 28a8480e7d3dc..95646675cb279 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2721,7 +2721,18 @@ bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
 ///   Type
 ///     ::= '[' APSINTVAL 'x' Types ']'
 ///     ::= '<' APSINTVAL 'x' Types '>'
+///     ::= '<' 'vscale' 'x' APSINTVAL 'x' Types '>'
 bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+  bool Scalable = false;
+
+  if (isVector && Lex.getKind() == lltok::kw_vscale) {
+    Lex.Lex(); // consume the 'vscale'
+    if (ParseToken(lltok::kw_x, "expected 'x' after vscale"))
+      return true;
+
+    Scalable = true;
+  }
+
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
       Lex.getAPSIntVal().getBitWidth() > 64)
     return TokError("expected number in address space");
@@ -2748,7 +2759,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid vector element type");
-    Result = VectorType::get(EltTy, unsigned(Size));
+    Result = VectorType::get(EltTy, unsigned(Size), Scalable);
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index a1e7093217877..6256c14b9d693 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -37,6 +37,7 @@ enum Kind {
   bar,     // |
   colon,   // :
 
+  kw_vscale,
   kw_x,
   kw_true,
   kw_false,
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index b23115ba31d4f..134ce0367031e 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1758,7 +1758,8 @@ Error BitcodeReader::parseTypeTableBody() {
         return error("Invalid type");
       ResultTy = ArrayType::get(ResultTy, Record[0]);
       break;
-    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty] or
+                                    //         [numelts, eltty, scalable]
       if (Record.size() < 2)
         return error("Invalid record");
       if (Record[0] == 0)
@@ -1766,7 +1767,8 @@ Error BitcodeReader::parseTypeTableBody() {
       ResultTy = getTypeByID(Record[1]);
       if (!ResultTy || !StructType::isValidElementType(ResultTy))
         return error("Invalid type");
-      ResultTy = VectorType::get(ResultTy, Record[0]);
+      bool Scalable = Record.size() > 2 ? Record[2] : false;
+      ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
       break;
     }
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 00d6fe8e27c46..8e1e06226bb46 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -931,10 +931,13 @@ void ModuleBitcodeWriter::writeTypeTable() {
     }
     case Type::VectorTyID: {
       VectorType *VT = cast<VectorType>(T);
-      // VECTOR [numelts, eltty]
+      // VECTOR [numelts, eltty] or
+      //        [numelts, eltty, scalable]
       Code = bitc::TYPE_CODE_VECTOR;
       TypeVals.push_back(VT->getNumElements());
       TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+      if (VT->isScalable())
+        TypeVals.push_back(VT->isScalable());
       break;
     }
     }
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ca7afd0d81aa0..402a1bd9df58f 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -620,7 +620,10 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   }
   case Type::VectorTyID: {
     VectorType *PTy = cast<VectorType>(Ty);
-    OS << "<" << PTy->getNumElements() << " x ";
+    OS << "<";
+    if (PTy->isScalable())
+      OS << "vscale x ";
+    OS << PTy->getNumElements() << " x ";
     print(PTy->getElementType(), OS);
     OS << '>';
     return;
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 4560617624ea5..78cf707e0e748 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1334,7 +1334,7 @@ class LLVMContextImpl {
   unsigned NamedStructTypesUniqueID = 0;
 
   DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
-  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<std::pair<Type *, ElementCount>, VectorType*> VectorTypes;
   DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
   DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
 
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 4016bb10ba371..3d53134ca6dfa 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -599,21 +599,20 @@ bool ArrayType::isValidElementType(Type *ElemTy) {
 //                          VectorType Implementation
 //===----------------------------------------------------------------------===//
 
-VectorType::VectorType(Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType, NumEl) {}
+VectorType::VectorType(Type *ElType, ElementCount EC)
+  : SequentialType(VectorTyID, ElType, EC.Min), Scalable(EC.Scalable) {}
 
-VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
-  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+VectorType *VectorType::get(Type *ElementType, ElementCount EC) {
+  assert(EC.Min > 0 && "#Elements of a VectorType must be greater than 0");
   assert(isValidElementType(ElementType) && "Element type of a VectorType must "
                                             "be an integer, floating point, or "
                                             "pointer type.");
 
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   VectorType *&Entry = ElementType->getContext().pImpl
-    ->VectorTypes[std::make_pair(ElementType, NumElements)];
-
+                                 ->VectorTypes[std::make_pair(ElementType, EC)];
   if (!Entry)
-    Entry = new (pImpl->Alloc) VectorType(ElementType, NumElements);
+    Entry = new (pImpl->Alloc) VectorType(ElementType, EC);
   return Entry;
 }
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 878a0081e1977..d5c3287e7134c 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -43,6 +43,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "LLVMContextImpl.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -307,6 +308,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   TBAAVerifier TBAAVerifyHelper;
 
   void checkAtomicMemAccessSize(Type *Ty, const Instruction *I);
+  static bool containsScalableVectorValue(const Type *Ty);
 
 public:
   explicit Verifier(raw_ostream *OS, bool ShouldTreatBrokenDebugInfoAsError,
@@ -318,6 +320,33 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
 
   bool hasBrokenDebugInfo() const { return BrokenDebugInfo; }
 
+  bool verifyTypes(const Module &M) {
+    LLVMContext &Ctx = M.getContext();
+    for (auto &Entry : Ctx.pImpl->ArrayTypes) {
+      ArrayType *ATy = Entry.second;
+      if (containsScalableVectorValue(ATy)) {
+        CheckFailed("Arrays cannot contain scalable vectors", ATy, &M);
+        Broken = true;
+      }
+    }
+
+    for (StructType* STy : Ctx.pImpl->AnonStructTypes)
+      if (containsScalableVectorValue(STy)) {
+        CheckFailed("Structs cannot contain scalable vectors", STy, &M);
+        Broken = true;
+      }
+
+    for (auto &Entry : Ctx.pImpl->NamedStructTypes) {
+      StructType *STy = Entry.second;
+      if (containsScalableVectorValue(STy)) {
+        CheckFailed("Structs cannot contain scalable vectors", STy, &M);
+        Broken = true;
+      }
+    }
+
+    return !Broken;
+  }
+
   bool verify(const Function &F) {
     assert(F.getParent() == &M &&
            "An instance of this class only works with a specific module!");
@@ -387,6 +416,8 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
 
     verifyCompileUnits();
 
+    verifyTypes(M);
+
     verifyDeoptimizeCallingConvs();
     DISubprogramAttachments.clear();
     return !Broken;
@@ -613,6 +644,35 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
   });
 }
 
+// Check for a scalable vector type, making sure to look through arrays and
+// structs. Pointers to scalable vectors don't count, since we know what the
+// size of a pointer is.
+static bool containsScalableVectorValueRecursive(const Type *Ty,
+                                        SmallVectorImpl<const Type*> &Visited) {
+  if (is_contained(Visited, Ty))
+    return false;
+
+  Visited.push_back(Ty);
+
+  if (auto *VTy = dyn_cast<VectorType>(Ty))
+    return VTy->isScalable();
+
+  if (auto *ATy = dyn_cast<ArrayType>(Ty))
+    return containsScalableVectorValueRecursive(ATy->getElementType(), Visited);
+
+  if (auto *STy = dyn_cast<StructType>(Ty))
+    for (Type *EltTy : STy->elements())
+      if (containsScalableVectorValueRecursive(EltTy, Visited))
+        return true;
+
+  return false;
+}
+
+bool Verifier::containsScalableVectorValue(const Type *Ty) {
+  SmallVector<const Type*, 16> VisitedList = {};
+  return containsScalableVectorValueRecursive(Ty, VisitedList);
+}
+
 void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
   if (GV.hasInitializer()) {
     Assert(GV.getInitializer()->getType() == GV.getValueType(),
@@ -691,6 +751,12 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
                       "DIGlobalVariableExpression");
   }
 
+  // Scalable vectors cannot be global variables, since we don't know
+  // the runtime size. Need to look inside structs/arrays to find the
+  // underlying element type as well.
+  if (containsScalableVectorValue(GV.getValueType()))
+    CheckFailed("Globals cannot contain scalable vectors", &GV);
+
   if (!GV.hasInitializer()) {
     visitGlobalValue(GV);
     return;
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 6c3a6887346e0..06b81fa14a819 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -917,6 +917,10 @@ define void @typesystem() {
   ; CHECK: %t7 = alloca x86_mmx
   %t8 = alloca %opaquety*
   ; CHECK: %t8 = alloca %opaquety*
+  %t9 = alloca <4 x i32>
+  ; CHECK: %t9 = alloca <4 x i32>
+  %t10 = alloca <vscale x 4 x i32>
+  ; CHECK: %t10 = alloca <vscale x 4 x i32>
 
   ret void
 }
diff --git a/llvm/test/Verifier/scalable-aggregates.ll b/llvm/test/Verifier/scalable-aggregates.ll
new file mode 100644
index 0000000000000..8d7416bea23d1
--- /dev/null
+++ b/llvm/test/Verifier/scalable-aggregates.ll
@@ -0,0 +1,31 @@
+; RUN: not opt -S -verify < %s 2>&1 | FileCheck %s
+
+;; Arrays and Structs cannot contain scalable vectors, since we don't
+;; know the size at compile time and the container types need to have
+;; a known size.
+
+; CHECK-DAG: Arrays cannot contain scalable vectors
+; CHECK-DAG:  [2 x { i32, <vscale x 1 x i32> }]; ModuleID = '<stdin>'
+; CHECK-DAG: Arrays cannot contain scalable vectors
+; CHECK-DAG:  [4 x <vscale x 256 x i1>]; ModuleID = '<stdin>'
+; CHECK-DAG: Arrays cannot contain scalable vectors
+; CHECK-DAG:  [2 x <vscale x 4 x i32>]; ModuleID = '<stdin>'
+; CHECK-DAG: Structs cannot contain scalable vectors
+; CHECK-DAG:  { i64, [4 x <vscale x 256 x i1>] }; ModuleID = '<stdin>'
+; CHECK-DAG: Structs cannot contain scalable vectors
+; CHECK-DAG:  { i32, <vscale x 1 x i32> }; ModuleID = '<stdin>'
+; CHECK-DAG: Structs cannot contain scalable vectors
+; CHECK-DAG: { <vscale x 16 x i8>, <vscale x 2 x double> }; ModuleID = '<stdin>'
+; CHECK-DAG: Structs cannot contain scalable vectors
+; CHECK-DAG:  %sty = type { i64, <vscale x 32 x i16> }; ModuleID = '<stdin>'
+
+%sty = type { i64, <vscale x 32 x i16> }
+
+define void @scalable_aggregates() {
+  %array = alloca [2 x <vscale x 4 x i32>]
+  %struct = alloca { <vscale x 16 x i8>, <vscale x 2 x double> }
+  %named_struct = alloca %sty
+  %s_in_a = alloca [2 x { i32, <vscale x 1 x i32> } ]
+  %a_in_s = alloca { i64, [4 x <vscale x 256 x i1> ] }
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll
new file mode 100644
index 0000000000000..e7834be7799c5
--- /dev/null
+++ b/llvm/test/Verifier/scalable-global-vars.ll
@@ -0,0 +1,24 @@
+; RUN: not opt -S -verify < %s 2>&1 | FileCheck %s
+
+;; Global variables cannot be scalable vectors, since we don't
+;; know the size at compile time.
+
+; CHECK: Globals cannot contain scalable vectors
+; CHECK-NEXT: <vscale x 4 x i32>* @ScalableVecGlobal
+@ScalableVecGlobal = global <vscale x 4 x i32> zeroinitializer
+
+; CHECK: Globals cannot contain scalable vectors
+; CHECK-NEXT: [64 x <vscale x 2 x double>]* @ScalableVecGlobalArray
+@ScalableVecGlobalArray = global [64 x <vscale x 2 x double>] zeroinitializer
+
+; CHECK: Globals cannot contain scalable vectors
+; CHECK-NEXT: { <vscale x 16 x i64>, <vscale x 16 x i1> }* @ScalableVecGlobalStruct
+@ScalableVecGlobalStruct = global { <vscale x 16 x i64>, <vscale x 16 x i1> } zeroinitializer
+
+; CHECK: Globals cannot contain scalable vectors
+; CHECK-NEXT: { [4 x i32], [2 x { <vscale x 4 x i64>, <vscale x 32 x i8> }] }* @ScalableVecMixed
+@ScalableVecMixed = global { [4 x i32], [2 x { <vscale x 4 x i64>,  <vscale x 32 x i8> }]} zeroinitializer
+
+;; Global _pointers_ to scalable vectors are fine
+; CHECK-NOT: Globals cannot contain scalable vectors
+@ScalableVecPtr = global <vscale x 8 x i16>* zeroinitializer
diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt
index 5b35dc726838e..d27c6d969f177 100644
--- a/llvm/unittests/IR/CMakeLists.txt
+++ b/llvm/unittests/IR/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_unittest(IRTests
   ValueHandleTest.cpp
   ValueMapTest.cpp
   ValueTest.cpp
+  VectorTypesTest.cpp
   VerifierTest.cpp
   WaymarkTest.cpp
   )
diff --git a/llvm/unittests/IR/VectorTypesTest.cpp b/llvm/unittests/IR/VectorTypesTest.cpp
new file mode 100644
index 0000000000000..f3caf6d69761f
--- /dev/null
+++ b/llvm/unittests/IR/VectorTypesTest.cpp
@@ -0,0 +1,164 @@
+//===--- llvm/unittest/IR/VectorTypesTest.cpp - vector types unit tests ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ScalableSize.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+TEST(VectorTypesTest, FixedLength) {
+  LLVMContext Ctx;
+
+  Type *Int16Ty = Type::getInt16Ty(Ctx);
+  Type *Int32Ty = Type::getInt32Ty(Ctx);
+  Type *Int64Ty = Type::getInt64Ty(Ctx);
+  Type *Float64Ty = Type::getDoubleTy(Ctx);
+
+  VectorType *V8Int32Ty = VectorType::get(Int32Ty, 8);
+  ASSERT_FALSE(V8Int32Ty->isScalable());
+  EXPECT_EQ(V8Int32Ty->getNumElements(), 8U);
+  EXPECT_EQ(V8Int32Ty->getElementType()->getScalarSizeInBits(), 32U);
+
+  VectorType *V8Int16Ty = VectorType::get(Int16Ty, {8, false});
+  ASSERT_FALSE(V8Int16Ty->isScalable());
+  EXPECT_EQ(V8Int16Ty->getNumElements(), 8U);
+  EXPECT_EQ(V8Int16Ty->getElementType()->getScalarSizeInBits(), 16U);
+
+  ElementCount EltCnt(4, false);
+  VectorType *V4Int64Ty = VectorType::get(Int64Ty, EltCnt);
+  ASSERT_FALSE(V4Int64Ty->isScalable());
+  EXPECT_EQ(V4Int64Ty->getNumElements(), 4U);
+  EXPECT_EQ(V4Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *V2Int64Ty = VectorType::get(Int64Ty, EltCnt/2);
+  ASSERT_FALSE(V2Int64Ty->isScalable());
+  EXPECT_EQ(V2Int64Ty->getNumElements(), 2U);
+  EXPECT_EQ(V2Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *V8Int64Ty = VectorType::get(Int64Ty, EltCnt*2);
+  ASSERT_FALSE(V8Int64Ty->isScalable());
+  EXPECT_EQ(V8Int64Ty->getNumElements(), 8U);
+  EXPECT_EQ(V8Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *V4Float64Ty = VectorType::get(Float64Ty, EltCnt);
+  ASSERT_FALSE(V4Float64Ty->isScalable());
+  EXPECT_EQ(V4Float64Ty->getNumElements(), 4U);
+  EXPECT_EQ(V4Float64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ExtTy = VectorType::getExtendedElementVectorType(V8Int16Ty);
+  EXPECT_EQ(ExtTy, V8Int32Ty);
+  ASSERT_FALSE(ExtTy->isScalable());
+  EXPECT_EQ(ExtTy->getNumElements(), 8U);
+  EXPECT_EQ(ExtTy->getElementType()->getScalarSizeInBits(), 32U);
+
+  VectorType *TruncTy = VectorType::getTruncatedElementVectorType(V8Int32Ty);
+  EXPECT_EQ(TruncTy, V8Int16Ty);
+  ASSERT_FALSE(TruncTy->isScalable());
+  EXPECT_EQ(TruncTy->getNumElements(), 8U);
+  EXPECT_EQ(TruncTy->getElementType()->getScalarSizeInBits(), 16U);
+
+  VectorType *HalvedTy = VectorType::getHalfElementsVectorType(V4Int64Ty);
+  EXPECT_EQ(HalvedTy, V2Int64Ty);
+  ASSERT_FALSE(HalvedTy->isScalable());
+  EXPECT_EQ(HalvedTy->getNumElements(), 2U);
+  EXPECT_EQ(HalvedTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *DoubledTy = VectorType::getDoubleElementsVectorType(V4Int64Ty);
+  EXPECT_EQ(DoubledTy, V8Int64Ty);
+  ASSERT_FALSE(DoubledTy->isScalable());
+  EXPECT_EQ(DoubledTy->getNumElements(), 8U);
+  EXPECT_EQ(DoubledTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ConvTy = VectorType::getInteger(V4Float64Ty);
+  EXPECT_EQ(ConvTy, V4Int64Ty);
+  ASSERT_FALSE(ConvTy->isScalable());
+  EXPECT_EQ(ConvTy->getNumElements(), 4U);
+  EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  EltCnt = V8Int64Ty->getElementCount();
+  EXPECT_EQ(EltCnt.Min, 8U);
+  ASSERT_FALSE(EltCnt.Scalable);
+}
+
+TEST(VectorTypesTest, Scalable) {
+  LLVMContext Ctx;
+
+  Type *Int16Ty = Type::getInt16Ty(Ctx);
+  Type *Int32Ty = Type::getInt32Ty(Ctx);
+  Type *Int64Ty = Type::getInt64Ty(Ctx);
+  Type *Float64Ty = Type::getDoubleTy(Ctx);
+
+  VectorType *ScV8Int32Ty = VectorType::get(Int32Ty, 8, true);
+  ASSERT_TRUE(ScV8Int32Ty->isScalable());
+  EXPECT_EQ(ScV8Int32Ty->getNumElements(), 8U);
+  EXPECT_EQ(ScV8Int32Ty->getElementType()->getScalarSizeInBits(), 32U);
+
+  VectorType *ScV8Int16Ty = VectorType::get(Int16Ty, {8, true});
+  ASSERT_TRUE(ScV8Int16Ty->isScalable());
+  EXPECT_EQ(ScV8Int16Ty->getNumElements(), 8U);
+  EXPECT_EQ(ScV8Int16Ty->getElementType()->getScalarSizeInBits(), 16U);
+
+  ElementCount EltCnt(4, true);
+  VectorType *ScV4Int64Ty = VectorType::get(Int64Ty, EltCnt);
+  ASSERT_TRUE(ScV4Int64Ty->isScalable());
+  EXPECT_EQ(ScV4Int64Ty->getNumElements(), 4U);
+  EXPECT_EQ(ScV4Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ScV2Int64Ty = VectorType::get(Int64Ty, EltCnt/2);
+  ASSERT_TRUE(ScV2Int64Ty->isScalable());
+  EXPECT_EQ(ScV2Int64Ty->getNumElements(), 2U);
+  EXPECT_EQ(ScV2Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ScV8Int64Ty = VectorType::get(Int64Ty, EltCnt*2);
+  ASSERT_TRUE(ScV8Int64Ty->isScalable());
+  EXPECT_EQ(ScV8Int64Ty->getNumElements(), 8U);
+  EXPECT_EQ(ScV8Int64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ScV4Float64Ty = VectorType::get(Float64Ty, EltCnt);
+  ASSERT_TRUE(ScV4Float64Ty->isScalable());
+  EXPECT_EQ(ScV4Float64Ty->getNumElements(), 4U);
+  EXPECT_EQ(ScV4Float64Ty->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ExtTy = VectorType::getExtendedElementVectorType(ScV8Int16Ty);
+  EXPECT_EQ(ExtTy, ScV8Int32Ty);
+  ASSERT_TRUE(ExtTy->isScalable());
+  EXPECT_EQ(ExtTy->getNumElements(), 8U);
+  EXPECT_EQ(ExtTy->getElementType()->getScalarSizeInBits(), 32U);
+
+  VectorType *TruncTy = VectorType::getTruncatedElementVectorType(ScV8Int32Ty);
+  EXPECT_EQ(TruncTy, ScV8Int16Ty);
+  ASSERT_TRUE(TruncTy->isScalable());
+  EXPECT_EQ(TruncTy->getNumElements(), 8U);
+  EXPECT_EQ(TruncTy->getElementType()->getScalarSizeInBits(), 16U);
+
+  VectorType *HalvedTy = VectorType::getHalfElementsVectorType(ScV4Int64Ty);
+  EXPECT_EQ(HalvedTy, ScV2Int64Ty);
+  ASSERT_TRUE(HalvedTy->isScalable());
+  EXPECT_EQ(HalvedTy->getNumElements(), 2U);
+  EXPECT_EQ(HalvedTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *DoubledTy = VectorType::getDoubleElementsVectorType(ScV4Int64Ty);
+  EXPECT_EQ(DoubledTy, ScV8Int64Ty);
+  ASSERT_TRUE(DoubledTy->isScalable());
+  EXPECT_EQ(DoubledTy->getNumElements(), 8U);
+  EXPECT_EQ(DoubledTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  VectorType *ConvTy = VectorType::getInteger(ScV4Float64Ty);
+  EXPECT_EQ(ConvTy, ScV4Int64Ty);
+  ASSERT_TRUE(ConvTy->isScalable());
+  EXPECT_EQ(ConvTy->getNumElements(), 4U);
+  EXPECT_EQ(ConvTy->getElementType()->getScalarSizeInBits(), 64U);
+
+  EltCnt = ScV8Int64Ty->getElementCount();
+  EXPECT_EQ(EltCnt.Min, 8U);
+  ASSERT_TRUE(EltCnt.Scalable);
+}
+
+} // end anonymous namespace

From 33da02762fe129f8167924892000295991b6daf3 Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Wed, 29 May 2019 12:26:23 +0000
Subject: [PATCH 0482/1176] Attempt to fix buildbot after r361949

llvm-svn: 361954
---
 llvm/tools/llvm-objcopy/ELF/Object.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 7a9a1bd37e5e3..a43fab28e8821 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -276,7 +276,7 @@ void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
   // We are free to pass an invalid pointer to writeSection as long
   // as we don't actually write any data. The real writer class has
   // to override this method .
-  writeSection(&Sec, {nullptr, Sec.Size});
+  writeSection(&Sec, {nullptr, static_cast<size_t>(Sec.Size)});
 }
 
 void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {

From e1312c54d3dc2aa8820a505de460939a2042050d Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Wed, 29 May 2019 12:40:36 +0000
Subject: [PATCH 0483/1176] [LibTooling] Add `before` and `after` selectors for
 selecting point-ranges relative to nodes.

Summary:
The `before` and `after` selectors allow users to specify a zero-length range --
a point -- at the relevant location in an AST-node's source.  Point ranges can
be useful, for example, to insert a change using an API that takes a range to be
modified (e.g. `tooling::change()`).

Reviewers: ilya-biryukov

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62419

llvm-svn: 361955
---
 .../clang/Tooling/Refactoring/RangeSelector.h |  9 +++
 .../lib/Tooling/Refactoring/RangeSelector.cpp | 22 +++++++
 clang/unittests/Tooling/RangeSelectorTest.cpp | 57 ++++++++++++++++++-
 3 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Tooling/Refactoring/RangeSelector.h b/clang/include/clang/Tooling/Refactoring/RangeSelector.h
index 2d878b90aa5e8..b117e4d82ad46 100644
--- a/clang/include/clang/Tooling/Refactoring/RangeSelector.h
+++ b/clang/include/clang/Tooling/Refactoring/RangeSelector.h
@@ -37,6 +37,15 @@ RangeSelector range(RangeSelector Begin, RangeSelector End);
 /// Convenience version of \c range where end-points are bound nodes.
 RangeSelector range(std::string BeginID, std::string EndID);
 
+/// Selects the (empty) range [B,B) when \p Selector selects the range [B,E).
+RangeSelector before(RangeSelector Selector);
+
+/// Selects the the point immediately following \p Selector. That is, the
+/// (empty) range [E,E), when \p Selector selects either
+/// * the CharRange [B,E) or
+/// * the TokenRange [B,E'] where the token at E' spans the range [E,E').
+RangeSelector after(RangeSelector Selector);
+
 /// Selects a node, including trailing semicolon (for non-expression
 /// statements). \p ID is the node's binding in the match result.
 RangeSelector node(std::string ID);
diff --git a/clang/lib/Tooling/Refactoring/RangeSelector.cpp b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
index d5f82d4262bec..768c02e2277b3 100644
--- a/clang/lib/Tooling/Refactoring/RangeSelector.cpp
+++ b/clang/lib/Tooling/Refactoring/RangeSelector.cpp
@@ -104,6 +104,28 @@ static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
   return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
 }
 
+RangeSelector tooling::before(RangeSelector Selector) {
+  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<CharSourceRange> SelectedRange = Selector(Result);
+    if (!SelectedRange)
+      return SelectedRange.takeError();
+    return CharSourceRange::getCharRange(SelectedRange->getBegin());
+  };
+}
+
+RangeSelector tooling::after(RangeSelector Selector) {
+  return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<CharSourceRange> SelectedRange = Selector(Result);
+    if (!SelectedRange)
+      return SelectedRange.takeError();
+    if (SelectedRange->isCharRange())
+      return CharSourceRange::getCharRange(SelectedRange->getEnd());
+    return CharSourceRange::getCharRange(Lexer::getLocForEndOfToken(
+        SelectedRange->getEnd(), 0, Result.Context->getSourceManager(),
+        Result.Context->getLangOpts()));
+  };
+}
+
 RangeSelector tooling::node(std::string ID) {
   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp
index ae323fc512bc6..38c15be00cd06 100644
--- a/clang/unittests/Tooling/RangeSelectorTest.cpp
+++ b/clang/unittests/Tooling/RangeSelectorTest.cpp
@@ -21,13 +21,15 @@ using namespace tooling;
 using namespace ast_matchers;
 
 namespace {
-using ::testing::AllOf;
-using ::testing::HasSubstr;
-using MatchResult = MatchFinder::MatchResult;
 using ::llvm::Expected;
 using ::llvm::Failed;
 using ::llvm::HasValue;
 using ::llvm::StringError;
+using ::testing::AllOf;
+using ::testing::HasSubstr;
+using ::testing::Property;
+
+using MatchResult = MatchFinder::MatchResult;
 
 struct TestMatch {
   // The AST unit from which `result` is built. We bundle it because it backs
@@ -117,6 +119,55 @@ TEST(RangeSelectorTest, UnboundNode) {
                        Failed<StringError>(withUnboundNodeMessage()));
 }
 
+MATCHER_P(EqualsCharSourceRange, Range, "") {
+  return Range.getAsRange() == arg.getAsRange() &&
+         Range.isTokenRange() == arg.isTokenRange();
+}
+
+// FIXME: here and elsewhere: use llvm::Annotations library to explicitly mark
+// points and ranges of interest, enabling more readable tests.
+TEST(RangeSelectorTest, BeforeOp) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  StringRef Call = "call";
+  TestMatch Match = matchCode(Code, callExpr().bind(Call));
+  const auto* E = Match.Result.Nodes.getNodeAs<Expr>(Call);
+  assert(E != nullptr);
+  auto ExprBegin = E->getSourceRange().getBegin();
+  EXPECT_THAT_EXPECTED(
+      before(node(Call))(Match.Result),
+      HasValue(EqualsCharSourceRange(
+          CharSourceRange::getCharRange(ExprBegin, ExprBegin))));
+}
+
+TEST(RangeSelectorTest, AfterOp) {
+  StringRef Code = R"cc(
+    int f(int x, int y, int z) { return 3; }
+    int g() { return f(/* comment */ 3, 7 /* comment */, 9); }
+  )cc";
+  StringRef Call = "call";
+  TestMatch Match = matchCode(Code, callExpr().bind(Call));
+  const auto* E = Match.Result.Nodes.getNodeAs<Expr>(Call);
+  assert(E != nullptr);
+  const SourceRange Range = E->getSourceRange();
+  // The end token, a right paren, is one character wide, so advance by one,
+  // bringing us to the semicolon.
+  const SourceLocation SemiLoc = Range.getEnd().getLocWithOffset(1);
+  const auto ExpectedAfter = CharSourceRange::getCharRange(SemiLoc, SemiLoc);
+
+  // Test with a char range.
+  auto CharRange = CharSourceRange::getCharRange(Range.getBegin(), SemiLoc);
+  EXPECT_THAT_EXPECTED(after(charRange(CharRange))(Match.Result),
+                       HasValue(EqualsCharSourceRange(ExpectedAfter)));
+
+  // Test with a token range.
+  auto TokenRange = CharSourceRange::getTokenRange(Range);
+  EXPECT_THAT_EXPECTED(after(charRange(TokenRange))(Match.Result),
+                       HasValue(EqualsCharSourceRange(ExpectedAfter)));
+}
+
 TEST(RangeSelectorTest, RangeOp) {
   StringRef Code = R"cc(
     int f(int x, int y, int z) { return 3; }

From 9ffd8b5a6f845c00cdcbbed0dc8c749c1061f524 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 29 May 2019 13:14:35 +0000
Subject: [PATCH 0484/1176] AMDGPU/GlobalISel: Remove unnecesssary REQUIREs

This has been a mandatory part of the build for a while.

llvm-svn: 361956
---
 llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll  | 6 +++---
 .../CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir     | 2 --
 .../CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir     | 2 --
 .../CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir    | 2 --
 .../CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll | 1 -
 .../CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir  | 2 --
 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir       | 2 --
 llvm/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll       | 2 --
 llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll                 | 2 --
 9 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
index 8f83feac51d86..05cbd6f9942bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - 2>&1 | FileCheck %s
-; REQUIRES: global-isel
-; This file checks that the translation from llvm IR to generic MachineInstr
-; is correct.
+
+; This file checks that the translation from llvm IR to generic
+; MachineInstr is correct.
 
 ; Tests for add.
 ; CHECK: name: addi32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index 217e576324b06..c19c2794e6a49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -1,8 +1,6 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
 
-# REQUIRES: global-isel
-
 --- |
   define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index ba50b5996a49a..53c2afa81a4c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -2,8 +2,6 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
 
-# REQUIRES: global-isel
-
 --- |
   define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index ba38b5bff9a33..eb8e39cd08df0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -1,8 +1,6 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
 
-# REQUIRES: global-isel
-
 --- |
   define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0,
                                               i64 addrspace(1)* %global1,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index 5756ff52b8386..72f27e68b4118 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; REQUIRES: global-isel
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA-VI %s
 
 define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
index 55e617e107d6a..32839ffafc44b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
@@ -2,8 +2,6 @@
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
-# REQUIRES: global-isel
-
 --- |
   define void @exp_s() {
     call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index 70c5419bea330..b7499599ee7bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -1,7 +1,5 @@
 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
 
-# REQUIRES: global-isel
-
 --- |
   define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) { ret void }
   define amdgpu_kernel void @load_global_uniform(i32 addrspace(1)* %ptr1) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll
index a1bf987e65521..23ab633667775 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll
@@ -1,7 +1,5 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s
 
-; REQUIRES: global-isel
-
 ; GCN-LABEL: vs_epilog
 ; GCN: s_endpgm
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
index ff33a6e18ee97..da386d2949f45 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
@@ -3,8 +3,6 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
 
-; REQUIRES: global-isel
-
 ; SMRD load with an immediate offset.
 ; GCN-LABEL: {{^}}smrd0:
 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01

From 36e72544413aa9a3b40cb58835933449a0c6d2bd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 29 May 2019 13:14:39 +0000
Subject: [PATCH 0485/1176] SpeculateAroundPHIs: Respect convergent

llvm-svn: 361957
---
 .../Transforms/Scalar/SpeculateAroundPHIs.cpp |  8 ++
 .../SpeculateAroundPHIs/convergent.ll         | 98 +++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll

diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index 51e97cf1c782c..c13fb3e045167 100644
--- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -67,6 +67,14 @@ isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
       return false;
     }
 
+    if (auto CS = ImmutableCallSite(UI)) {
+      if (CS.isConvergent() || CS.cannotDuplicate()) {
+        LLVM_DEBUG(dbgs() << "  Unsafe: convergent "
+                   "callsite cannot de duplicated: " << *UI << '\n');
+        return false;
+      }
+    }
+
     // FIXME: This check is much too conservative. We're not going to move these
     // instructions onto new dynamic paths through the program unless there is
     // a call instruction between the use and the PHI node. And memory isn't
diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll b/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll
new file mode 100644
index 0000000000000..6c035b28447e7
--- /dev/null
+++ b/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=spec-phis < %s | FileCheck %s
+; Make sure convergent and noduplicate calls aren't duplicated.
+
+declare i32 @llvm.convergent(i32) #0
+declare i32 @llvm.noduplicate(i32) #1
+declare i32 @llvm.regular(i32) #2
+
+define i32 @test_convergent(i1 %flag, i32 %arg) #0 {
+; CHECK-LABEL: @test_convergent(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       b:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.convergent(i32 [[P]])
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+entry:
+  br i1 %flag, label %a, label %b
+
+a:
+  br label %exit
+
+b:
+  br label %exit
+
+exit:
+  %p = phi i32 [ 7, %a ], [ 11, %b ]
+  %sum = call i32 @llvm.convergent(i32 %p)
+  ret i32 %sum
+}
+
+define i32 @test_noduplicate(i1 %flag, i32 %arg) #1 {
+; CHECK-LABEL: @test_noduplicate(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       b:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.noduplicate(i32 [[P]])
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+entry:
+  br i1 %flag, label %a, label %b
+
+a:
+  br label %exit
+
+b:
+  br label %exit
+
+exit:
+  %p = phi i32 [ 7, %a ], [ 11, %b ]
+  %sum = call i32 @llvm.noduplicate(i32 %p)
+  ret i32 %sum
+}
+
+; Otherwise identical function which should be transformed.
+define i32 @test_reference(i1 %flag, i32 %arg) #2 {
+; CHECK-LABEL: @test_reference(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    [[SUM_0:%.*]] = call i32 @llvm.regular(i32 7)
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       b:
+; CHECK-NEXT:    [[SUM_1:%.*]] = call i32 @llvm.regular(i32 11)
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ]
+; CHECK-NEXT:    ret i32 [[SUM_PHI]]
+;
+entry:
+  br i1 %flag, label %a, label %b
+
+a:
+  br label %exit
+
+b:
+  br label %exit
+
+exit:
+  %p = phi i32 [ 7, %a ], [ 11, %b ]
+  %sum = call i32 @llvm.regular(i32 %p)
+  ret i32 %sum
+}
+
+
+attributes #0 = { nounwind readnone convergent speculatable }
+attributes #1 = { nounwind readnone noduplicate speculatable }
+attributes #2 = { nounwind readnone speculatable }

From 24c56296258b3f282eca7981369b137cc1ed4bf1 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Wed, 29 May 2019 13:41:57 +0000
Subject: [PATCH 0486/1176] [ARM] Split predicates out into their own .td file

The new ARMPredicates.td is included from ARM.td, early enough that
the predicate definitions are already in scope when ARMSchedule.td is
included. This will make it possible to refer to them in
UnsupportedFeatures fields of scheduling models.

NFC: the chunk of Tablegen being moved here is copied and pasted
verbatim.

Patch by: Simon Tatham

Differential Revision: https://reviews.llvm.org/D60693

llvm-svn: 361958
---
 llvm/lib/Target/ARM/ARM.td           |   1 +
 llvm/lib/Target/ARM/ARMInstrInfo.td  | 184 --------------------------
 llvm/lib/Target/ARM/ARMPredicates.td | 188 +++++++++++++++++++++++++++
 3 files changed, 189 insertions(+), 184 deletions(-)
 create mode 100644 llvm/lib/Target/ARM/ARMPredicates.td

diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 20a61d343b383..9af350c055117 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -777,6 +777,7 @@ def ARMv7s   : Architecture<"armv7s",      "ARMv7a",   [ARMv7a]>;
 // ARM schedules.
 //===----------------------------------------------------------------------===//
 //
+include "ARMPredicates.td"
 include "ARMSchedule.td"
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index d0821b94477a6..fd268609c2026 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -213,190 +213,6 @@ def ARMsmlalbt      : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
 def ARMsmlaltb      : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
 def ARMsmlaltt      : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
 
-//===----------------------------------------------------------------------===//
-// ARM Instruction Predicate Definitions.
-//
-def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
-                                 AssemblerPredicate<"HasV4TOps", "armv4t">;
-def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T           : Predicate<"Subtarget->hasV5TOps()">,
-                                 AssemblerPredicate<"HasV5TOps", "armv5t">;
-def NoV5T            : Predicate<"!Subtarget->hasV5TOps()">;
-def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
-                                 AssemblerPredicate<"HasV5TEOps", "armv5te">;
-def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
-                                 AssemblerPredicate<"HasV6Ops", "armv6">;
-def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
-def HasV6M           : Predicate<"Subtarget->hasV6MOps()">,
-                                 AssemblerPredicate<"HasV6MOps",
-                                                    "armv6m or armv6t2">;
-def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
-                                 AssemblerPredicate<"HasV8MBaselineOps",
-                                                    "armv8m.base">;
-def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
-                                 AssemblerPredicate<"HasV8MMainlineOps",
-                                                    "armv8m.main">;
-def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
-                                 AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
-def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV6K           : Predicate<"Subtarget->hasV6KOps()">,
-                                 AssemblerPredicate<"HasV6KOps", "armv6k">;
-def NoV6K            : Predicate<"!Subtarget->hasV6KOps()">;
-def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
-                                 AssemblerPredicate<"HasV7Ops", "armv7">;
-def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
-                                 AssemblerPredicate<"HasV8Ops", "armv8">;
-def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
-                                 AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
-                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
-def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
-                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
-def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
-                                 AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
-def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
-                                 AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
-def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
-                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def NoVFP            : Predicate<"!Subtarget->hasVFP2Base()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2Base()">,
-                                 AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3Base()">,
-                                 AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
-def HasVFP4          : Predicate<"Subtarget->hasVFP4Base()">,
-                                 AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
-def HasDPVFP         : Predicate<"Subtarget->hasFP64()">,
-                                 AssemblerPredicate<"FeatureFP64",
-                                                    "double precision VFP">;
-def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8Base()">,
-                                 AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">,
-                                 AssemblerPredicate<"FeatureNEON", "NEON">;
-def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
-                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
-def HasAES           : Predicate<"Subtarget->hasAES()">,
-                                 AssemblerPredicate<"FeatureAES", "aes">;
-def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
-                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
-def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
-                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
-def HasCRC           : Predicate<"Subtarget->hasCRC()">,
-                                 AssemblerPredicate<"FeatureCRC", "crc">;
-def HasRAS           : Predicate<"Subtarget->hasRAS()">,
-                                 AssemblerPredicate<"FeatureRAS", "ras">;
-def HasFP16          : Predicate<"Subtarget->hasFP16()">,
-                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
-def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
-                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
-                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
-def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
-                                 AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
-def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
-                                 AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasDSP           : Predicate<"Subtarget->hasDSP()">,
-                                 AssemblerPredicate<"FeatureDSP", "dsp">;
-def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
-                                 AssemblerPredicate<"FeatureDB",
-                                                    "data-barriers">;
-def HasDFB           : Predicate<"Subtarget->hasFullDataBarrier()">,
-                                 AssemblerPredicate<"FeatureDFB",
-                                                    "full-data-barrier">;
-def HasV7Clrex  : Predicate<"Subtarget->hasV7Clrex()">,
-                            AssemblerPredicate<"FeatureV7Clrex",
-                                               "v7 clrex">;
-def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
-                                  AssemblerPredicate<"FeatureAcquireRelease",
-                                                     "acquire/release">;
-def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
-                                 AssemblerPredicate<"FeatureMP",
-                                                    "mp-extensions">;
-def HasVirtualization: Predicate<"false">,
-                                 AssemblerPredicate<"FeatureVirtualization",
-                                                   "virtualization-extensions">;
-def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
-                                 AssemblerPredicate<"FeatureTrustZone",
-                                                    "TrustZone">;
-def Has8MSecExt      : Predicate<"Subtarget->has8MSecExt()">,
-                                 AssemblerPredicate<"Feature8MSecExt",
-                                                    "ARMv8-M Security Extensions">;
-def HasZCZ           : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb          : Predicate<"Subtarget->isThumb()">,
-                                 AssemblerPredicate<"ModeThumb", "thumb">;
-def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
-                                 AssemblerPredicate<"ModeThumb,FeatureThumb2",
-                                                    "thumb2">;
-def IsMClass         : Predicate<"Subtarget->isMClass()">,
-                                 AssemblerPredicate<"FeatureMClass", "armv*m">;
-def IsNotMClass      : Predicate<"!Subtarget->isMClass()">,
-                                 AssemblerPredicate<"!FeatureMClass",
-                                                    "!armv*m">;
-def IsARM            : Predicate<"!Subtarget->isThumb()">,
-                                 AssemblerPredicate<"!ModeThumb", "arm-mode">;
-def IsMachO          : Predicate<"Subtarget->isTargetMachO()">;
-def IsNotMachO       : Predicate<"!Subtarget->isTargetMachO()">;
-def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
-def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
-def IsNotWindows     : Predicate<"!Subtarget->isTargetWindows()">;
-def IsReadTPHard     : Predicate<"Subtarget->isReadTPHard()">;
-def IsReadTPSoft     : Predicate<"!Subtarget->isReadTPHard()">;
-def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
-                                 AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
-def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
-
-def UseNegativeImmediates :
-  Predicate<"false">,
-            AssemblerPredicate<"!FeatureNoNegativeImmediates",
-                               "NegativeImmediates">;
-
-// FIXME: Eventually this will be just "hasV6T2Ops".
-let RecomputePerFunction = 1 in {
-  def UseMovt          : Predicate<"Subtarget->useMovt()">;
-  def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
-  def UseMovtInPic     : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
-  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
-
-  def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
-                           "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
-                           "Subtarget->hasMinSize())">;
-}
-def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
-
-// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed, and when
-// they are not slower than a mul + add sequence.
-// Do not use them for Darwin platforms.
-def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
-                                 " FPOpFusion::Fast && "
-                                 " Subtarget->hasVFP4Base()) && "
-                                 "!Subtarget->isTargetDarwin() &&"
-                                 "Subtarget->useFPVMLx()">;
-
-def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
-def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
-
-def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
-def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
-
-def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
-                          "!Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
-                              "Subtarget->useNEONForSinglePrecisionFP()">;
-
-let RecomputePerFunction = 1 in {
-  def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
-  def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
-}
-
-def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
-
-// Armv8.5-A extensions
-def HasSB            : Predicate<"Subtarget->hasSB()">,
-                       AssemblerPredicate<"FeatureSB", "sb">;
-
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
new file mode 100644
index 0000000000000..37c3098796128
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -0,0 +1,188 @@
+//===-- ARMPredicates.td - ARM Instruction Predicates ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
+                                 AssemblerPredicate<"HasV4TOps", "armv4t">;
+def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T           : Predicate<"Subtarget->hasV5TOps()">,
+                                 AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T            : Predicate<"!Subtarget->hasV5TOps()">;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
+                                 AssemblerPredicate<"HasV5TEOps", "armv5te">;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
+                                 AssemblerPredicate<"HasV6Ops", "armv6">;
+def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M           : Predicate<"Subtarget->hasV6MOps()">,
+                                 AssemblerPredicate<"HasV6MOps",
+                                                    "armv6m or armv6t2">;
+def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
+                                 AssemblerPredicate<"HasV8MBaselineOps",
+                                                    "armv8m.base">;
+def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
+                                 AssemblerPredicate<"HasV8MMainlineOps",
+                                                    "armv8m.main">;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
+                                 AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K           : Predicate<"Subtarget->hasV6KOps()">,
+                                 AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K            : Predicate<"!Subtarget->hasV6KOps()">;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
+                                 AssemblerPredicate<"HasV7Ops", "armv7">;
+def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
+                                 AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
+                                 AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
+                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
+                                 AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
+                                 AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
+                                 AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def NoVFP            : Predicate<"!Subtarget->hasVFP2Base()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2Base()">,
+                                 AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3Base()">,
+                                 AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+def HasVFP4          : Predicate<"Subtarget->hasVFP4Base()">,
+                                 AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+def HasDPVFP         : Predicate<"Subtarget->hasFP64()">,
+                                 AssemblerPredicate<"FeatureFP64",
+                                                    "double precision VFP">;
+def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8Base()">,
+                                 AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+                                 AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
+                                 AssemblerPredicate<"FeatureSHA2", "sha2">;
+def HasAES           : Predicate<"Subtarget->hasAES()">,
+                                 AssemblerPredicate<"FeatureAES", "aes">;
+def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
+                                 AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
+                                 AssemblerPredicate<"FeatureDotProd", "dotprod">;
+def HasCRC           : Predicate<"Subtarget->hasCRC()">,
+                                 AssemblerPredicate<"FeatureCRC", "crc">;
+def HasRAS           : Predicate<"Subtarget->hasRAS()">,
+                                 AssemblerPredicate<"FeatureRAS", "ras">;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">,
+                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
+                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
+                                 AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+                                 AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
+def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
+                                 AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
+def HasDSP           : Predicate<"Subtarget->hasDSP()">,
+                                 AssemblerPredicate<"FeatureDSP", "dsp">;
+def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
+                                 AssemblerPredicate<"FeatureDB",
+                                                    "data-barriers">;
+def HasDFB           : Predicate<"Subtarget->hasFullDataBarrier()">,
+                                 AssemblerPredicate<"FeatureDFB",
+                                                    "full-data-barrier">;
+def HasV7Clrex  : Predicate<"Subtarget->hasV7Clrex()">,
+                            AssemblerPredicate<"FeatureV7Clrex",
+                                               "v7 clrex">;
+def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
+                                  AssemblerPredicate<"FeatureAcquireRelease",
+                                                     "acquire/release">;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate<"FeatureMP",
+                                                    "mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+                                 AssemblerPredicate<"FeatureVirtualization",
+                                                   "virtualization-extensions">;
+def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
+                                 AssemblerPredicate<"FeatureTrustZone",
+                                                    "TrustZone">;
+def Has8MSecExt      : Predicate<"Subtarget->has8MSecExt()">,
+                                 AssemblerPredicate<"Feature8MSecExt",
+                                                    "ARMv8-M Security Extensions">;
+def HasZCZ           : Predicate<"Subtarget->hasZeroCycleZeroing()">;
+def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb          : Predicate<"Subtarget->isThumb()">,
+                                 AssemblerPredicate<"ModeThumb", "thumb">;
+def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
+                                 AssemblerPredicate<"ModeThumb,FeatureThumb2",
+                                                    "thumb2">;
+def IsMClass         : Predicate<"Subtarget->isMClass()">,
+                                 AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass      : Predicate<"!Subtarget->isMClass()">,
+                                 AssemblerPredicate<"!FeatureMClass",
+                                                    "!armv*m">;
+def IsARM            : Predicate<"!Subtarget->isThumb()">,
+                                 AssemblerPredicate<"!ModeThumb", "arm-mode">;
+def IsMachO          : Predicate<"Subtarget->isTargetMachO()">;
+def IsNotMachO       : Predicate<"!Subtarget->isTargetMachO()">;
+def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
+def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
+def IsNotWindows     : Predicate<"!Subtarget->isTargetWindows()">;
+def IsReadTPHard     : Predicate<"Subtarget->isReadTPHard()">;
+def IsReadTPSoft     : Predicate<"!Subtarget->isReadTPHard()">;
+def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
+                                 AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
+
+def UseNegativeImmediates :
+  Predicate<"false">,
+            AssemblerPredicate<"!FeatureNoNegativeImmediates",
+                               "NegativeImmediates">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+let RecomputePerFunction = 1 in {
+  def UseMovt          : Predicate<"Subtarget->useMovt()">;
+  def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+  def UseMovtInPic     : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
+  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
+
+  def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+                           "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+                           "Subtarget->hasMinSize())">;
+}
+def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
+
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
+// Do not use them for Darwin platforms.
+def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
+                                 " FPOpFusion::Fast && "
+                                 " Subtarget->hasVFP4Base()) && "
+                                 "!Subtarget->isTargetDarwin() &&"
+                                 "Subtarget->useFPVMLx()">;
+
+def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
+
+def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
+
+def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
+                          "!Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
+                              "Subtarget->useNEONForSinglePrecisionFP()">;
+
+let RecomputePerFunction = 1 in {
+  def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
+  def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
+}
+
+def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+
+// Armv8.5-A extensions
+def HasSB            : Predicate<"Subtarget->hasSB()">,
+                       AssemblerPredicate<"FeatureSB", "sb">;

From e3b1f5d22ca92cbac4cfec685e2eec8e207e6986 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 29 May 2019 13:48:19 +0000
Subject: [PATCH 0487/1176] clang-cl: Fix mangling of catchable types with
 names longer than 4kiB

The mangling used to contain the MD5 name of both the RTTI type
descriptor and the name of the copy ctor in MSVC2013, but it changed
to just the former in 2015. It looks like it changed back to the old
mangling in VS2017 version 15.7 and onwards, including VS2019 (version
16.0). VS2017 version 15.0 still has the VS2015 mangling. Versions
between 15.0 and 15.7 are't on godbolt. I found 15.4 (_MSC_VER 1911)
locally and that uses the 15.0 mangling still, but I didn't find 15.5 or
15.6, so I'm not sure where exactly it changed back.

Differential Revision: https://reviews.llvm.org/D62490

llvm-svn: 361959
---
 clang/include/clang/Basic/LangOptions.h |  3 ++-
 clang/lib/AST/MicrosoftMangle.cpp       | 16 +++++++++++-----
 clang/test/CodeGenCXX/mangle-ms-md5.cpp | 15 +++++++++++++++
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 3197584f59ea3..8099eed28c5e0 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -109,7 +109,8 @@ class LangOptions : public LangOptionsBase {
     MSVC2013 = 1800,
     MSVC2015 = 1900,
     MSVC2017 = 1910,
-    MSVC2017_5 = 1912
+    MSVC2017_5 = 1912,
+    MSVC2017_7 = 1914,
   };
 
   /// Clang versions with different platform ABI conformance.
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index db1812103006b..a021e7162e442 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3151,12 +3151,18 @@ void MicrosoftMangleContextImpl::mangleCXXCatchableType(
   }
   Mangler.getStream() << RTTIMangling;
 
-  // VS2015 CTP6 omits the copy-constructor in the mangled name.  This name is,
-  // in fact, superfluous but I'm not sure the change was made consciously.
+  // VS2015 and VS2017.1 omit the copy-constructor in the mangled name but
+  // both older and newer versions include it.
+  // FIXME: It is known that the Ctor is present in 2013, and in 2017.7
+  // (_MSC_VER 1914) and newer, and that it's omitted in 2015 and 2017.4
+  // (_MSC_VER 1911), but it's unknown when exactly it reappeared (1914?
+  // Or 1912, 1913 aleady?).
+  bool OmitCopyCtor = getASTContext().getLangOpts().isCompatibleWithMSVC(
+                          LangOptions::MSVC2015) &&
+                      !getASTContext().getLangOpts().isCompatibleWithMSVC(
+                          LangOptions::MSVC2017_7);
   llvm::SmallString<64> CopyCtorMangling;
-  if (!getASTContext().getLangOpts().isCompatibleWithMSVC(
-          LangOptions::MSVC2015) &&
-      CD) {
+  if (!OmitCopyCtor && CD) {
     llvm::raw_svector_ostream Stream(CopyCtorMangling);
     msvc_hashing_ostream MHO(Stream);
     mangleCXXCtor(CD, CT, MHO);
diff --git a/clang/test/CodeGenCXX/mangle-ms-md5.cpp b/clang/test/CodeGenCXX/mangle-ms-md5.cpp
index 740fd61576ccb..1492984d3fede 100644
--- a/clang/test/CodeGenCXX/mangle-ms-md5.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-md5.cpp
@@ -9,3 +9,18 @@ struct yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy::yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy() {}
 // CHECK-DAG: @"??@a6a285da2eea70dba6b578022be61d81@??_R4@" = linkonce_odr constant %rtti.CompleteObjectLocator
 // CHECK-DAG: @"??@a6a285da2eea70dba6b578022be61d81@" = unnamed_addr alias
+
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=18.0 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=HAVECTOR %s
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=19.0 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=OMITCTOR %s
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=19.10 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=OMITCTOR %s
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=19.11 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=OMITCTOR %s
+// FIXME: Not known where between 19.11 and 19.14 this changed.
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=19.14 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=HAVECTOR %s
+// RUN: %clang_cc1 -DTHROW -fcxx-exceptions -fms-compatibility-version=19.20 -emit-llvm -o - -triple i686-pc-win32 %s | FileCheck --check-prefix=HAVECTOR %s
+#ifdef THROW
+void g() {
+  throw yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy();
+// OMITCTOR: "_CT??@c14087f0ec22b387aea7c59083f4f546@4"
+// HAVECTOR: "_CT??@c14087f0ec22b387aea7c59083f4f546@??@4ef4f8979c81f9d2224b32bf327e6bdf@4"
+}
+#endif

From 228b130a4b87143c48f93d2d6ccba90df3e9dfb7 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Wed, 29 May 2019 14:11:53 +0000
Subject: [PATCH 0488/1176] [clangd] Fix buildbot error.

llvm-svn: 361960
---
 clang-tools-extra/clangd/index/CanonicalIncludes.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index 4b045944f25c0..017547ce05755 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -87,11 +87,15 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
 }
 
 void addSystemHeadersMapping(CanonicalIncludes *Includes) {
+  static const std::vector<std::pair<const char *, const char *>> SymbolMap = {
 #define SYMBOL(Name, NameSpace, Header) { #NameSpace#Name, #Header },
       #include "StdSymbolMap.inc"
 #undef SYMBOL
   };
 
+  for (const auto &Pair : SymbolMap)
+    Includes->addSymbolMapping(Pair.first, Pair.second);
+
   // FIXME: remove the std header mapping once we support ambiguous symbols, now
   // it serves as a fallback to disambiguate:
   //   - symbols with mulitiple headers (e.g. std::move)

From 268dfaf153625cb14618157319a1a88a6352602c Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 29 May 2019 14:15:35 +0000
Subject: [PATCH 0489/1176] gn build: Merge r361953

llvm-svn: 361961
---
 llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
index b13b8c8cffc29..d3342af392da8 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
@@ -40,6 +40,7 @@ unittest("IRTests") {
     "ValueHandleTest.cpp",
     "ValueMapTest.cpp",
     "ValueTest.cpp",
+    "VectorTypesTest.cpp",
     "VerifierTest.cpp",
     "WaymarkTest.cpp",
   ]

From 898aaf1cb8c90d7fda844dd39bc4a9c2c0dbd3ff Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 29 May 2019 14:36:11 +0000
Subject: [PATCH 0490/1176] Clean up DWARFDebugInfoEntry

llvm-svn: 361962
---
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 55 -------------------
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    | 51 -----------------
 2 files changed, 106 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index f408c5b32b120..3bfb287466489 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -1074,13 +1074,6 @@ void DWARFDebugInfoEntry::BuildFunctionAddressRangeTable(
   }
 }
 
-std::vector<DWARFDIE>
-DWARFDebugInfoEntry::GetDeclContextDIEs(DWARFUnit *cu) const {
-
-  DWARFDIE die(cu, const_cast<DWARFDebugInfoEntry *>(this));
-  return die.GetDeclContextDIEs();
-}
-
 void DWARFDebugInfoEntry::GetDWARFDeclContext(
     DWARFUnit *cu, DWARFDeclContext &dwarf_decl_ctx) const {
   const dw_tag_t tag = Tag();
@@ -1096,14 +1089,6 @@ void DWARFDebugInfoEntry::GetDWARFDeclContext(
   }
 }
 
-bool DWARFDebugInfoEntry::MatchesDWARFDeclContext(
-    DWARFUnit *cu, const DWARFDeclContext &dwarf_decl_ctx) const {
-
-  DWARFDeclContext this_dwarf_decl_ctx;
-  GetDWARFDeclContext(cu, this_dwarf_decl_ctx);
-  return this_dwarf_decl_ctx == dwarf_decl_ctx;
-}
-
 DWARFDIE
 DWARFDebugInfoEntry::GetParentDeclContextDIE(DWARFUnit *cu) const {
   DWARFAttributes attributes;
@@ -1214,7 +1199,6 @@ DWARFDebugInfoEntry::GetQualifiedName(DWARFUnit *cu,
   return storage.c_str();
 }
 
-// LookupAddress
 bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
                                         const DWARFUnit *cu,
                                         DWARFDebugInfoEntry **function_die,
@@ -1232,13 +1216,9 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
       check_children = true;
       break;
     case DW_TAG_entry_point:
-      break;
     case DW_TAG_enumeration_type:
-      break;
     case DW_TAG_formal_parameter:
-      break;
     case DW_TAG_imported_declaration:
-      break;
     case DW_TAG_label:
       break;
     case DW_TAG_lexical_block:
@@ -1246,9 +1226,7 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
       match_addr_range = true;
       break;
     case DW_TAG_member:
-      break;
     case DW_TAG_pointer_type:
-      break;
     case DW_TAG_reference_type:
       break;
     case DW_TAG_compile_unit:
@@ -1260,20 +1238,15 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
       check_children = true;
       break;
     case DW_TAG_subroutine_type:
-      break;
     case DW_TAG_typedef:
-      break;
     case DW_TAG_union_type:
-      break;
     case DW_TAG_unspecified_parameters:
-      break;
     case DW_TAG_variant:
       break;
     case DW_TAG_common_block:
       check_children = true;
       break;
     case DW_TAG_common_inclusion:
-      break;
     case DW_TAG_inheritance:
       break;
     case DW_TAG_inlined_subroutine:
@@ -1284,76 +1257,53 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
       match_addr_range = true;
       break;
     case DW_TAG_ptr_to_member_type:
-      break;
     case DW_TAG_set_type:
-      break;
     case DW_TAG_subrange_type:
-      break;
     case DW_TAG_with_stmt:
-      break;
     case DW_TAG_access_declaration:
-      break;
     case DW_TAG_base_type:
       break;
     case DW_TAG_catch_block:
       match_addr_range = true;
       break;
     case DW_TAG_const_type:
-      break;
     case DW_TAG_constant:
-      break;
     case DW_TAG_enumerator:
-      break;
     case DW_TAG_file_type:
-      break;
     case DW_TAG_friend:
-      break;
     case DW_TAG_namelist:
-      break;
     case DW_TAG_namelist_item:
-      break;
     case DW_TAG_packed_type:
       break;
     case DW_TAG_subprogram:
       match_addr_range = true;
       break;
     case DW_TAG_template_type_parameter:
-      break;
     case DW_TAG_template_value_parameter:
-      break;
     case DW_TAG_GNU_template_parameter_pack:
-      break;
     case DW_TAG_thrown_type:
       break;
     case DW_TAG_try_block:
       match_addr_range = true;
       break;
     case DW_TAG_variant_part:
-      break;
     case DW_TAG_variable:
-      break;
     case DW_TAG_volatile_type:
-      break;
     case DW_TAG_dwarf_procedure:
-      break;
     case DW_TAG_restrict_type:
-      break;
     case DW_TAG_interface_type:
       break;
     case DW_TAG_namespace:
       check_children = true;
       break;
     case DW_TAG_imported_module:
-      break;
     case DW_TAG_unspecified_type:
       break;
     case DW_TAG_partial_unit:
       match_addr_range = true;
       break;
     case DW_TAG_imported_unit:
-      break;
     case DW_TAG_shared_type:
-      break;
     default:
       break;
     }
@@ -1485,11 +1435,6 @@ DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(
   return nullptr;
 }
 
-bool DWARFDebugInfoEntry::OffsetLessThan(const DWARFDebugInfoEntry &a,
-                                         const DWARFDebugInfoEntry &b) {
-  return a.GetOffset() < b.GetOffset();
-}
-
 bool DWARFDebugInfoEntry::operator==(const DWARFDebugInfoEntry &rhs) const {
   return m_offset == rhs.m_offset && m_parent_idx == rhs.m_parent_idx &&
          m_sibling_idx == rhs.m_sibling_idx &&
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index 7b7459ad5d14c..f5da1bf7e5bf4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -19,26 +19,6 @@
 #include <set>
 #include <vector>
 
-typedef std::map<const DWARFDebugInfoEntry *, dw_addr_t> DIEToAddressMap;
-typedef DIEToAddressMap::iterator DIEToAddressMapIter;
-typedef DIEToAddressMap::const_iterator DIEToAddressMapConstIter;
-
-typedef std::map<dw_addr_t, const DWARFDebugInfoEntry *> AddressToDIEMap;
-typedef AddressToDIEMap::iterator AddressToDIEMapIter;
-typedef AddressToDIEMap::const_iterator AddressToDIEMapConstIter;
-
-typedef std::map<dw_offset_t, dw_offset_t> DIEToDIEMap;
-typedef DIEToDIEMap::iterator DIEToDIEMapIter;
-typedef DIEToDIEMap::const_iterator DIEToDIEMapConstIter;
-
-typedef std::map<uint32_t, const DWARFDebugInfoEntry *> UInt32ToDIEMap;
-typedef UInt32ToDIEMap::iterator UInt32ToDIEMapIter;
-typedef UInt32ToDIEMap::const_iterator UInt32ToDIEMapConstIter;
-
-typedef std::multimap<uint32_t, const DWARFDebugInfoEntry *> UInt32ToDIEMMap;
-typedef UInt32ToDIEMMap::iterator UInt32ToDIEMMapIter;
-typedef UInt32ToDIEMMap::const_iterator UInt32ToDIEMMapConstIter;
-
 class DWARFDeclContext;
 
 #define DIE_SIBLING_IDX_BITSIZE 31
@@ -49,10 +29,6 @@ class DWARFDebugInfoEntry {
   typedef collection::iterator iterator;
   typedef collection::const_iterator const_iterator;
 
-  typedef std::vector<dw_offset_t> offset_collection;
-  typedef offset_collection::iterator offset_collection_iterator;
-  typedef offset_collection::const_iterator offset_collection_const_iterator;
-
   DWARFDebugInfoEntry()
       : m_offset(DW_INVALID_OFFSET), m_parent_idx(0), m_sibling_idx(0),
         m_has_children(false), m_abbr_idx(0), m_tag(0) {}
@@ -129,9 +105,6 @@ class DWARFDebugInfoEntry {
   const char *GetQualifiedName(DWARFUnit *cu, const DWARFAttributes &attributes,
                                std::string &storage) const;
 
-  static bool OffsetLessThan(const DWARFDebugInfoEntry &a,
-                             const DWARFDebugInfoEntry &b);
-
   void Dump(const DWARFUnit *cu, lldb_private::Stream &s,
             uint32_t recurse_depth) const;
 
@@ -187,38 +160,14 @@ class DWARFDebugInfoEntry {
     return HasChildren() ? this + 1 : nullptr;
   }
 
-  std::vector<DWARFDIE> GetDeclContextDIEs(DWARFUnit *cu) const;
-
   void GetDWARFDeclContext(DWARFUnit *cu,
                            DWARFDeclContext &dwarf_decl_ctx) const;
 
-  bool MatchesDWARFDeclContext(DWARFUnit *cu,
-                               const DWARFDeclContext &dwarf_decl_ctx) const;
-
   DWARFDIE GetParentDeclContextDIE(DWARFUnit *cu) const;
   DWARFDIE GetParentDeclContextDIE(DWARFUnit *cu,
                                    const DWARFAttributes &attributes) const;
 
-  void SetParent(DWARFDebugInfoEntry *parent) {
-    if (parent) {
-      // We know we are kept in a vector of contiguous entries, so we know
-      // our parent will be some index behind "this".
-      m_parent_idx = this - parent;
-    } else
-      m_parent_idx = 0;
-  }
-  void SetSibling(DWARFDebugInfoEntry *sibling) {
-    if (sibling) {
-      // We know we are kept in a vector of contiguous entries, so we know
-      // our sibling will be some index after "this".
-      m_sibling_idx = sibling - this;
-      sibling->SetParent(GetParent());
-    } else
-      m_sibling_idx = 0;
-  }
-
   void SetSiblingIndex(uint32_t idx) { m_sibling_idx = idx; }
-
   void SetParentIndex(uint32_t idx) { m_parent_idx = idx; }
 
 protected:

From 64345f280b64ed8d51c4af88fd0cdf851186d072 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 29 May 2019 14:39:37 +0000
Subject: [PATCH 0491/1176] Revert rL361944 from llvm/trunk : [ADT] add
 iterator_range::empty() ........ Breaks windows buildbots

llvm-svn: 361963
---
 llvm/include/llvm/ADT/iterator_range.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h
index aa8830943cabc..774c7c4e3366e 100644
--- a/llvm/include/llvm/ADT/iterator_range.h
+++ b/llvm/include/llvm/ADT/iterator_range.h
@@ -44,7 +44,6 @@ class iterator_range {
 
   IteratorT begin() const { return begin_iterator; }
   IteratorT end() const { return end_iterator; }
-  bool empty() const { return begin_iterator == end_iterator; }
 };
 
 /// Convenience function for iterating over sub-ranges.

From af7bf2f6873375b18941d500eb4de7a3028b2250 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 29 May 2019 14:58:50 +0000
Subject: [PATCH 0492/1176] [mips] Use range-based for loops. NFC

llvm-svn: 361964
---
 llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 40d04e7658d55..5576ce643c80f 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -159,8 +159,6 @@ getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
 
-  using RegIter = TargetRegisterClass::const_iterator;
-
   for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
     Reserved.set(ReservedGPR32[I]);
 
@@ -182,14 +180,12 @@ getReservedRegs(const MachineFunction &MF) const {
 
   if (Subtarget.isFP64bit()) {
     // Reserve all registers in AFGR64.
-    for (RegIter Reg = Mips::AFGR64RegClass.begin(),
-         EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
-      Reserved.set(*Reg);
+    for (MCPhysReg Reg : Mips::AFGR64RegClass)
+      Reserved.set(Reg);
   } else {
     // Reserve all registers in FGR64.
-    for (RegIter Reg = Mips::FGR64RegClass.begin(),
-         EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg)
-      Reserved.set(*Reg);
+    for (MCPhysReg Reg : Mips::FGR64RegClass)
+      Reserved.set(Reg);
   }
   // Reserve FP if this function should have a dedicated frame pointer register.
   if (Subtarget.getFrameLowering()->hasFP(MF)) {

From 188162118f17fca71dfeb118ce022deefa3cbf58 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 29 May 2019 14:58:56 +0000
Subject: [PATCH 0493/1176] [mips] Iterate over MSACtrlRegClass to reserve all
 MSA control registers. NFC

llvm-svn: 361965
---
 llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 5576ce643c80f..4ad6fc5ad8ef1 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -217,14 +217,8 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(Mips::DSPOutFlag);
 
   // Reserve MSA control registers.
-  Reserved.set(Mips::MSAIR);
-  Reserved.set(Mips::MSACSR);
-  Reserved.set(Mips::MSAAccess);
-  Reserved.set(Mips::MSASave);
-  Reserved.set(Mips::MSAModify);
-  Reserved.set(Mips::MSARequest);
-  Reserved.set(Mips::MSAMap);
-  Reserved.set(Mips::MSAUnmap);
+  for (MCPhysReg Reg : Mips::MSACtrlRegClass)
+    Reserved.set(Reg);
 
   // Reserve RA if in mips16 mode.
   if (Subtarget.inMips16Mode()) {

From 909c8c2b0d93b9c4ae4eec11216c34f8f35c9fcc Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 29 May 2019 14:59:07 +0000
Subject: [PATCH 0494/1176] [mips] Use reg-exp in tests to tolerate register
 indexes changing. NFC

llvm-svn: 361966
---
 llvm/test/CodeGen/Mips/llvm-ir/fptosi.ll      | 296 +++----
 llvm/test/CodeGen/Mips/llvm-ir/load.ll        | 784 +++++++++---------
 llvm/test/CodeGen/Mips/llvm-ir/store.ll       | 376 ++++-----
 .../Mips/micromips-pseudo-mtlohi-expand.ll    |  48 +-
 4 files changed, 752 insertions(+), 752 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/llvm-ir/fptosi.ll b/llvm/test/CodeGen/Mips/llvm-ir/fptosi.ll
index 03a0de7466452..6b4fd6030561a 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/fptosi.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/fptosi.ll
@@ -39,123 +39,123 @@ define i32 @test1(float %t) {
 ; M32-LABEL: test1:
 ; M32:       # %bb.0: # %entry
 ; M32-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M32-NEXT:    # <MCOperand Reg:147>
-; M32-NEXT:    # <MCOperand Reg:159>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32-NEXT:    # <MCOperand Reg:19>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32-NEXT:    # <MCOperand Reg:321>
-; M32-NEXT:    # <MCOperand Reg:147>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R2-FP64-LABEL: test1:
 ; M32R2-FP64:       # %bb.0: # %entry
 ; M32R2-FP64-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M32R2-FP64-NEXT:    # <MCOperand Reg:147>
-; M32R2-FP64-NEXT:    # <MCOperand Reg:159>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-FP64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R2-FP64-NEXT:    # <MCOperand Reg:19>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-FP64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R2-FP64-NEXT:    # <MCOperand Reg:321>
-; M32R2-FP64-NEXT:    # <MCOperand Reg:147>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R2-SF-LABEL: test1:
 ; M32R2-SF:       # %bb.0: # %entry
 ; M32R2-SF-NEXT:    addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:-24>>
 ; M32R2-SF-NEXT:    .cfi_def_cfa_offset 24
 ; M32R2-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; M32R2-SF-NEXT:    # <MCInst #{{[0-9]+}} SW
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:20>>
 ; M32R2-SF-NEXT:    .cfi_offset 31, -4
 ; M32R2-SF-NEXT:    jal __fixsfsi # <MCInst #{{[0-9]+}} JAL
 ; M32R2-SF-NEXT:    # <MCOperand Expr:(__fixsfsi)>>
 ; M32R2-SF-NEXT:    nop # <MCInst #{{[0-9]+}} SLL
-; M32R2-SF-NEXT:    # <MCOperand Reg:21>
-; M32R2-SF-NEXT:    # <MCOperand Reg:21>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:0>>
 ; M32R2-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; M32R2-SF-NEXT:    # <MCInst #{{[0-9]+}} LW
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:20>>
 ; M32R2-SF-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-SF-NEXT:    addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:24>>
 ;
 ; M32R3R5-LABEL: test1:
 ; M32R3R5:       # %bb.0: # %entry
 ; M32R3R5-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M32R3R5-NEXT:    # <MCOperand Reg:147>
-; M32R3R5-NEXT:    # <MCOperand Reg:159>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R3R5-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R3R5-NEXT:    # <MCOperand Reg:19>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R3R5-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R3R5-NEXT:    # <MCOperand Reg:321>
-; M32R3R5-NEXT:    # <MCOperand Reg:147>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R6-LABEL: test1:
 ; M32R6:       # %bb.0: # %entry
 ; M32R6-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M32R6-NEXT:    # <MCOperand Reg:147>
-; M32R6-NEXT:    # <MCOperand Reg:159>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; M32R6-NEXT:    # <MCOperand Reg:21>
-; M32R6-NEXT:    # <MCOperand Reg:19>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R6-NEXT:    # <MCOperand Reg:321>
-; M32R6-NEXT:    # <MCOperand Reg:147>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M64-LABEL: test1:
 ; M64:       # %bb.0: # %entry
 ; M64-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M64-NEXT:    # <MCOperand Reg:147>
-; M64-NEXT:    # <MCOperand Reg:159>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M64-NEXT:    # <MCOperand Reg:301>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M64-NEXT:    # <MCOperand Reg:321>
-; M64-NEXT:    # <MCOperand Reg:147>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M64R6-LABEL: test1:
 ; M64R6:       # %bb.0: # %entry
 ; M64R6-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S
-; M64R6-NEXT:    # <MCOperand Reg:147>
-; M64R6-NEXT:    # <MCOperand Reg:159>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; M64R6-NEXT:    # <MCOperand Reg:355>
-; M64R6-NEXT:    # <MCOperand Reg:301>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64R6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M64R6-NEXT:    # <MCOperand Reg:321>
-; M64R6-NEXT:    # <MCOperand Reg:147>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-FP32-LABEL: test1:
 ; MMR2-FP32:       # %bb.0: # %entry
 ; MMR2-FP32-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:147>
-; MMR2-FP32-NEXT:    # <MCOperand Reg:159>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:19>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP32-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:321>
-; MMR2-FP32-NEXT:    # <MCOperand Reg:147>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-FP64-LABEL: test1:
 ; MMR2-FP64:       # %bb.0: # %entry
 ; MMR2-FP64-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:147>
-; MMR2-FP64-NEXT:    # <MCOperand Reg:159>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:19>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:321>
-; MMR2-FP64-NEXT:    # <MCOperand Reg:147>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-SF-LABEL: test1:
 ; MMR2-SF:       # %bb.0: # %entry
@@ -164,63 +164,63 @@ define i32 @test1(float %t) {
 ; MMR2-SF-NEXT:    .cfi_def_cfa_offset 24
 ; MMR2-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MMR2-SF-NEXT:    # <MCInst #{{[0-9]+}} SWSP_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>
-; MMR2-SF-NEXT:    # <MCOperand Reg:20>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR2-SF-NEXT:    .cfi_offset 31, -4
 ; MMR2-SF-NEXT:    jal __fixsfsi # <MCInst #{{[0-9]+}} JAL_MM
 ; MMR2-SF-NEXT:    # <MCOperand Expr:(__fixsfsi)>>
 ; MMR2-SF-NEXT:    nop # <MCInst #{{[0-9]+}} SLL
-; MMR2-SF-NEXT:    # <MCOperand Reg:21>
-; MMR2-SF-NEXT:    # <MCOperand Reg:21>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:0>>
 ; MMR2-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MMR2-SF-NEXT:    # <MCInst #{{[0-9]+}} LWSP_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>
-; MMR2-SF-NEXT:    # <MCOperand Reg:20>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR2-SF-NEXT:    addiusp 24 # <MCInst #{{[0-9]+}} ADDIUSP_MM
 ; MMR2-SF-NEXT:    # <MCOperand Imm:24>>
 ; MMR2-SF-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR6-LABEL: test1:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    trunc.w.s $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_S_MMR6
-; MMR6-NEXT:    # <MCOperand Reg:147>
-; MMR6-NEXT:    # <MCOperand Reg:159>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:147>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR6-SF-LABEL: test1:
 ; MMR6-SF:       # %bb.0: # %entry
 ; MMR6-SF-NEXT:    addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:-24>>
 ; MMR6-SF-NEXT:    .cfi_def_cfa_offset 24
 ; MMR6-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MMR6-SF-NEXT:    # <MCInst #{{[0-9]+}} SW
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR6-SF-NEXT:    .cfi_offset 31, -4
 ; MMR6-SF-NEXT:    jalr __fixsfsi # <MCInst #{{[0-9]+}} JALRC16_MMR6
 ; MMR6-SF-NEXT:    # <MCOperand Expr:(__fixsfsi)>>
 ; MMR6-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MMR6-SF-NEXT:    # <MCInst #{{[0-9]+}} LW
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR6-SF-NEXT:    addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:24>>
 ; MMR6-SF-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 entry:
   %conv = fptosi float %t to i32
   ret i32 %conv
@@ -230,123 +230,123 @@ define i32 @test2(double %t) {
 ; M32-LABEL: test2:
 ; M32:       # %bb.0: # %entry
 ; M32-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D32
-; M32-NEXT:    # <MCOperand Reg:147>
-; M32-NEXT:    # <MCOperand Reg:133>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32-NEXT:    # <MCOperand Reg:19>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32-NEXT:    # <MCOperand Reg:321>
-; M32-NEXT:    # <MCOperand Reg:147>>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R2-FP64-LABEL: test2:
 ; M32R2-FP64:       # %bb.0: # %entry
 ; M32R2-FP64-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64
-; M32R2-FP64-NEXT:    # <MCOperand Reg:147>
-; M32R2-FP64-NEXT:    # <MCOperand Reg:373>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-FP64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R2-FP64-NEXT:    # <MCOperand Reg:19>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-FP64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R2-FP64-NEXT:    # <MCOperand Reg:321>
-; M32R2-FP64-NEXT:    # <MCOperand Reg:147>>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R2-SF-LABEL: test2:
 ; M32R2-SF:       # %bb.0: # %entry
 ; M32R2-SF-NEXT:    addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:-24>>
 ; M32R2-SF-NEXT:    .cfi_def_cfa_offset 24
 ; M32R2-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; M32R2-SF-NEXT:    # <MCInst #{{[0-9]+}} SW
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:20>>
 ; M32R2-SF-NEXT:    .cfi_offset 31, -4
 ; M32R2-SF-NEXT:    jal __fixdfsi # <MCInst #{{[0-9]+}} JAL
 ; M32R2-SF-NEXT:    # <MCOperand Expr:(__fixdfsi)>>
 ; M32R2-SF-NEXT:    nop # <MCInst #{{[0-9]+}} SLL
-; M32R2-SF-NEXT:    # <MCOperand Reg:21>
-; M32R2-SF-NEXT:    # <MCOperand Reg:21>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:0>>
 ; M32R2-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; M32R2-SF-NEXT:    # <MCInst #{{[0-9]+}} LW
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:20>>
 ; M32R2-SF-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R2-SF-NEXT:    # <MCOperand Reg:19>>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R2-SF-NEXT:    addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
-; M32R2-SF-NEXT:    # <MCOperand Reg:20>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; M32R2-SF-NEXT:    # <MCOperand Imm:24>>
 ;
 ; M32R3R5-LABEL: test2:
 ; M32R3R5:       # %bb.0: # %entry
 ; M32R3R5-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D32
-; M32R3R5-NEXT:    # <MCOperand Reg:147>
-; M32R3R5-NEXT:    # <MCOperand Reg:133>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R3R5-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M32R3R5-NEXT:    # <MCOperand Reg:19>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R3R5-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R3R5-NEXT:    # <MCOperand Reg:321>
-; M32R3R5-NEXT:    # <MCOperand Reg:147>>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R3R5-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M32R6-LABEL: test2:
 ; M32R6:       # %bb.0: # %entry
 ; M32R6-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64
-; M32R6-NEXT:    # <MCOperand Reg:147>
-; M32R6-NEXT:    # <MCOperand Reg:373>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; M32R6-NEXT:    # <MCOperand Reg:21>
-; M32R6-NEXT:    # <MCOperand Reg:19>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M32R6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M32R6-NEXT:    # <MCOperand Reg:321>
-; M32R6-NEXT:    # <MCOperand Reg:147>>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M64-LABEL: test2:
 ; M64:       # %bb.0: # %entry
 ; M64-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64
-; M64-NEXT:    # <MCOperand Reg:147>
-; M64-NEXT:    # <MCOperand Reg:373>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; M64-NEXT:    # <MCOperand Reg:301>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M64-NEXT:    # <MCOperand Reg:321>
-; M64-NEXT:    # <MCOperand Reg:147>>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; M64R6-LABEL: test2:
 ; M64R6:       # %bb.0: # %entry
 ; M64R6-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D64
-; M64R6-NEXT:    # <MCOperand Reg:147>
-; M64R6-NEXT:    # <MCOperand Reg:373>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; M64R6-NEXT:    # <MCOperand Reg:355>
-; M64R6-NEXT:    # <MCOperand Reg:301>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; M64R6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1
-; M64R6-NEXT:    # <MCOperand Reg:321>
-; M64R6-NEXT:    # <MCOperand Reg:147>>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; M64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-FP32-LABEL: test2:
 ; MMR2-FP32:       # %bb.0: # %entry
 ; MMR2-FP32-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:147>
-; MMR2-FP32-NEXT:    # <MCOperand Reg:133>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:19>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP32-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR2-FP32-NEXT:    # <MCOperand Reg:321>
-; MMR2-FP32-NEXT:    # <MCOperand Reg:147>>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-FP64-LABEL: test2:
 ; MMR2-FP64:       # %bb.0: # %entry
 ; MMR2-FP64-NEXT:    cvt.w.d $f0, $f12 # <MCInst #{{[0-9]+}} CVT_W_D64_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:147>
-; MMR2-FP64-NEXT:    # <MCOperand Reg:373>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:19>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-FP64-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR2-FP64-NEXT:    # <MCOperand Reg:321>
-; MMR2-FP64-NEXT:    # <MCOperand Reg:147>>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-FP64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-SF-LABEL: test2:
 ; MMR2-SF:       # %bb.0: # %entry
@@ -355,63 +355,63 @@ define i32 @test2(double %t) {
 ; MMR2-SF-NEXT:    .cfi_def_cfa_offset 24
 ; MMR2-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MMR2-SF-NEXT:    # <MCInst #{{[0-9]+}} SWSP_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>
-; MMR2-SF-NEXT:    # <MCOperand Reg:20>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR2-SF-NEXT:    .cfi_offset 31, -4
 ; MMR2-SF-NEXT:    jal __fixdfsi # <MCInst #{{[0-9]+}} JAL_MM
 ; MMR2-SF-NEXT:    # <MCOperand Expr:(__fixdfsi)>>
 ; MMR2-SF-NEXT:    nop # <MCInst #{{[0-9]+}} SLL
-; MMR2-SF-NEXT:    # <MCOperand Reg:21>
-; MMR2-SF-NEXT:    # <MCOperand Reg:21>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:0>>
 ; MMR2-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MMR2-SF-NEXT:    # <MCInst #{{[0-9]+}} LWSP_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>
-; MMR2-SF-NEXT:    # <MCOperand Reg:20>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR2-SF-NEXT:    addiusp 24 # <MCInst #{{[0-9]+}} ADDIUSP_MM
 ; MMR2-SF-NEXT:    # <MCOperand Imm:24>>
 ; MMR2-SF-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR2-SF-NEXT:    # <MCOperand Reg:19>>
+; MMR2-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR6-LABEL: test2:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    trunc.w.d $f0, $f12 # <MCInst #{{[0-9]+}} TRUNC_W_D_MMR6
-; MMR6-NEXT:    # <MCOperand Reg:147>
-; MMR6-NEXT:    # <MCOperand Reg:373>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR6-NEXT:    mfc1 $2, $f0 # <MCInst #{{[0-9]+}} MFC1_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:147>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR6-SF-LABEL: test2:
 ; MMR6-SF:       # %bb.0: # %entry
 ; MMR6-SF-NEXT:    addiu $sp, $sp, -24 # <MCInst #{{[0-9]+}} ADDiu
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:-24>>
 ; MMR6-SF-NEXT:    .cfi_def_cfa_offset 24
 ; MMR6-SF-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MMR6-SF-NEXT:    # <MCInst #{{[0-9]+}} SW
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR6-SF-NEXT:    .cfi_offset 31, -4
 ; MMR6-SF-NEXT:    jalr __fixdfsi # <MCInst #{{[0-9]+}} JALRC16_MMR6
 ; MMR6-SF-NEXT:    # <MCOperand Expr:(__fixdfsi)>>
 ; MMR6-SF-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MMR6-SF-NEXT:    # <MCInst #{{[0-9]+}} LW
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:20>>
 ; MMR6-SF-NEXT:    addiu $sp, $sp, 24 # <MCInst #{{[0-9]+}} ADDiu
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
-; MMR6-SF-NEXT:    # <MCOperand Reg:20>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-SF-NEXT:    # <MCOperand Imm:24>>
 ; MMR6-SF-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-SF-NEXT:    # <MCOperand Reg:19>>
+; MMR6-SF-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 entry:
   %conv = fptosi double %t to i32
   ret i32 %conv
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/load.ll b/llvm/test/CodeGen/Mips/llvm-ir/load.ll
index 5ce32750a5039..050dba1377c79 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/load.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/load.ll
@@ -18,135 +18,135 @@ define i8 @f1() {
 ; MIPS32-LABEL: f1:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR3-LABEL: f1:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS32R6-LABEL: f1:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR6-LABEL: f1:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR6-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f1:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu
-; MIPS3-NEXT:    # <MCOperand Reg:321>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS64-LABEL: f1:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu
-; MIPS64-NEXT:    # <MCOperand Reg:321>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS64R6-LABEL: f1:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lbu $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LBu
-; MIPS64R6-NEXT:    # <MCOperand Reg:321>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 entry:
   %0 = load i8, i8 * @a
@@ -157,135 +157,135 @@ define i32 @f2() {
 ; MIPS32-LABEL: f2:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR3-LABEL: f2:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS32R6-LABEL: f2:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR6-LABEL: f2:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR6-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f2:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB
-; MIPS3-NEXT:    # <MCOperand Reg:321>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS64-LABEL: f2:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB
-; MIPS64-NEXT:    # <MCOperand Reg:321>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS64R6-LABEL: f2:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lb $2, %lo(a)($1) # <MCInst #{{[0-9]+}} LB
-; MIPS64R6-NEXT:    # <MCOperand Reg:321>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 entry:
   %0 = load i8, i8 * @a
@@ -297,135 +297,135 @@ define i16 @f3() {
 ; MIPS32-LABEL: f3:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR3-LABEL: f3:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS32R6-LABEL: f3:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR6-LABEL: f3:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR6-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f3:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu
-; MIPS3-NEXT:    # <MCOperand Reg:321>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS64-LABEL: f3:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu
-; MIPS64-NEXT:    # <MCOperand Reg:321>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS64R6-LABEL: f3:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lhu $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LHu
-; MIPS64R6-NEXT:    # <MCOperand Reg:321>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 entry:
   %0 = load i16, i16 * @b
@@ -436,135 +436,135 @@ define i32 @f4() {
 ; MIPS32-LABEL: f4:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR3-LABEL: f4:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS32R6-LABEL: f4:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR6-LABEL: f4:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR6-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f4:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH
-; MIPS3-NEXT:    # <MCOperand Reg:321>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS64-LABEL: f4:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH
-; MIPS64-NEXT:    # <MCOperand Reg:321>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS64R6-LABEL: f4:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lh $2, %lo(b)($1) # <MCInst #{{[0-9]+}} LH
-; MIPS64R6-NEXT:    # <MCOperand Reg:321>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 entry:
   %0 = load i16, i16 * @b
@@ -576,135 +576,135 @@ define i32 @f5() {
 ; MIPS32-LABEL: f5:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MMR3-LABEL: f5:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS32R6-LABEL: f5:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MMR6-LABEL: f5:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR6-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f5:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS3-NEXT:    # <MCOperand Reg:321>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64-LABEL: f5:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS64-NEXT:    # <MCOperand Reg:321>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64R6-LABEL: f5:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS64R6-NEXT:    # <MCOperand Reg:321>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 entry:
   %0 = load i32, i32 * @c
@@ -715,149 +715,149 @@ define i64 @f6() {
 ; MIPS32-LABEL: f6:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32-NEXT:    # <MCOperand Reg:322>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    addiu $2, $zero, 0 # <MCInst #{{[0-9]+}} ADDiu
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:21>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Imm:0>>
 ;
 ; MMR3-LABEL: f6:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR3-NEXT:    li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Imm:0>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR3-NEXT:    # <MCOperand Reg:322>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS32R6-LABEL: f6:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32R6-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32R6-NEXT:    # <MCOperand Reg:322>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    addiu $2, $zero, 0 # <MCInst #{{[0-9]+}} ADDiu
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Imm:0>>
 ;
 ; MMR6-LABEL: f6:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR6-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR6-NEXT:    # <MCOperand Reg:322>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MMR6-NEXT:    li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Imm:0>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f6:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lwu $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LWu
-; MIPS3-NEXT:    # <MCOperand Reg:416>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64-LABEL: f6:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lwu $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LWu
-; MIPS64-NEXT:    # <MCOperand Reg:416>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64R6-LABEL: f6:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lwu $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LWu
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 entry:
   %0 = load i32, i32 * @c
@@ -869,151 +869,151 @@ define i64 @f7() {
 ; MIPS32-LABEL: f7:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32-NEXT:    # <MCOperand Reg:322>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    sra $2, $3, 31 # <MCInst #{{[0-9]+}} SRA
-; MIPS32-NEXT:    # <MCOperand Reg:321>
-; MIPS32-NEXT:    # <MCOperand Reg:322>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Imm:31>>
 ;
 ; MMR3-LABEL: f7:
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR3-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR3-NEXT:    # <MCOperand Reg:322>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    sra $2, $3, 31 # <MCInst #{{[0-9]+}} SRA_MM
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:322>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Imm:31>>
 ;
 ; MIPS32R6-LABEL: f7:
 ; MIPS32R6:       # %bb.0: # %entry
 ; MIPS32R6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32R6-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW
-; MIPS32R6-NEXT:    # <MCOperand Reg:322>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    sra $2, $3, 31 # <MCInst #{{[0-9]+}} SRA
-; MIPS32R6-NEXT:    # <MCOperand Reg:321>
-; MIPS32R6-NEXT:    # <MCOperand Reg:322>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Imm:31>>
 ;
 ; MMR6-LABEL: f7:
 ; MMR6:       # %bb.0: # %entry
 ; MMR6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR6-NEXT:    lw $3, %lo(c)($1) # <MCInst #{{[0-9]+}} LW_MM
-; MMR6-NEXT:    # <MCOperand Reg:322>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MMR6-NEXT:    sra $2, $3, 31 # <MCInst #{{[0-9]+}} SRA_MM
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:322>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Imm:31>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS3-LABEL: f7:
 ; MIPS3:       # %bb.0: # %entry
 ; MIPS3-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS3-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS3-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS3-NEXT:    # <MCOperand Reg:30>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Imm:16>>
 ; MIPS3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS3-NEXT:    # <MCOperand Reg:301>>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS3-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW64
-; MIPS3-NEXT:    # <MCOperand Reg:416>
-; MIPS3-NEXT:    # <MCOperand Reg:30>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64-LABEL: f7:
 ; MIPS64:       # %bb.0: # %entry
 ; MIPS64-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64-NEXT:    # <MCOperand Reg:30>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS64-NEXT:    # <MCOperand Reg:301>>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW64
-; MIPS64-NEXT:    # <MCOperand Reg:416>
-; MIPS64-NEXT:    # <MCOperand Reg:30>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64R6-LABEL: f7:
 ; MIPS64R6:       # %bb.0: # %entry
 ; MIPS64R6-NEXT:    lui $1, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    lw $2, %lo(c)($1) # <MCInst #{{[0-9]+}} LW64
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 entry:
   %0 = load i32, i32 * @c
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/store.ll b/llvm/test/CodeGen/Mips/llvm-ir/store.ll
index 2cb287ef376a7..975eb8b90f042 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/store.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/store.ll
@@ -17,107 +17,107 @@ define void @f1(i8 %a) {
 ; MIPS32-LABEL: f1:
 ; MIPS32:       # %bb.0:
 ; MIPS32-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB
-; MIPS32-NEXT:    # <MCOperand Reg:22>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR3-LABEL: f1:
 ; MMR3:       # %bb.0:
 ; MMR3-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB_MM
-; MMR3-NEXT:    # <MCOperand Reg:22>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS32R6-LABEL: f1:
 ; MIPS32R6:       # %bb.0:
 ; MIPS32R6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB
-; MIPS32R6-NEXT:    # <MCOperand Reg:22>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MMR6-LABEL: f1:
 ; MMR6:       # %bb.0:
 ; MMR6-NEXT:    lui $1, %hi(a) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MMR6-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB_MM
-; MMR6-NEXT:    # <MCOperand Reg:22>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS4-LABEL: f1:
 ; MIPS4:       # %bb.0:
 ; MIPS4-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS4-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS4-NEXT:    # <MCOperand Reg:301>>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS4-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB64
-; MIPS4-NEXT:    # <MCOperand Reg:356>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%lo(a))>>
 ;
 ; MIPS64R6-LABEL: f1:
 ; MIPS64R6:       # %bb.0:
 ; MIPS64R6-NEXT:    lui $1, %highest(a) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(a))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(a) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(a))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    sb $4, %lo(a)($1) # <MCInst #{{[0-9]+}} SB64
-; MIPS64R6-NEXT:    # <MCOperand Reg:356>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(a))>>
   store i8 %a, i8 * @a
   ret void
@@ -127,107 +127,107 @@ define void @f2(i16 %a) {
 ; MIPS32-LABEL: f2:
 ; MIPS32:       # %bb.0:
 ; MIPS32-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH
-; MIPS32-NEXT:    # <MCOperand Reg:22>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR3-LABEL: f2:
 ; MMR3:       # %bb.0:
 ; MMR3-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH_MM
-; MMR3-NEXT:    # <MCOperand Reg:22>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS32R6-LABEL: f2:
 ; MIPS32R6:       # %bb.0:
 ; MIPS32R6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH
-; MIPS32R6-NEXT:    # <MCOperand Reg:22>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MMR6-LABEL: f2:
 ; MMR6:       # %bb.0:
 ; MMR6-NEXT:    lui $1, %hi(b) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MMR6-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH_MM
-; MMR6-NEXT:    # <MCOperand Reg:22>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS4-LABEL: f2:
 ; MIPS4:       # %bb.0:
 ; MIPS4-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS4-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS4-NEXT:    # <MCOperand Reg:301>>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS4-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH64
-; MIPS4-NEXT:    # <MCOperand Reg:356>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%lo(b))>>
 ;
 ; MIPS64R6-LABEL: f2:
 ; MIPS64R6:       # %bb.0:
 ; MIPS64R6-NEXT:    lui $1, %highest(b) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(b))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(b) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(b))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    sh $4, %lo(b)($1) # <MCInst #{{[0-9]+}} SH64
-; MIPS64R6-NEXT:    # <MCOperand Reg:356>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(b))>>
   store i16 %a, i16 * @b
   ret void
@@ -237,115 +237,115 @@ define void @f3(i32 %a) {
 ; MIPS32-LABEL: f3:
 ; MIPS32:       # %bb.0:
 ; MIPS32-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32-NEXT:    # <MCOperand Reg:22>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MMR3-LABEL: f3:
 ; MMR3:       # %bb.0:
 ; MMR3-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR3-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR3-NEXT:    sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW_MM
-; MMR3-NEXT:    # <MCOperand Reg:22>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS32R6-LABEL: f3:
 ; MIPS32R6:       # %bb.0:
 ; MIPS32R6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32R6-NEXT:    # <MCOperand Reg:22>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MMR6-LABEL: f3:
 ; MMR6:       # %bb.0:
 ; MMR6-NEXT:    lui $1, %hi(c) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MMR6-NEXT:    sw $4, %lo(c)($1) # <MCInst #{{[0-9]+}} SW_MM
-; MMR6-NEXT:    # <MCOperand Reg:22>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS4-LABEL: f3:
 ; MIPS4:       # %bb.0:
 ; MIPS4-NEXT:    sll $1, $4, 0 # <MCInst #{{[0-9]+}} SLL
-; MIPS4-NEXT:    # <MCOperand Reg:1>
-; MIPS4-NEXT:    # <MCOperand Reg:22>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:0>>
 ; MIPS4-NEXT:    lui $2, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS4-NEXT:    daddiu $2, $2, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:416>
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS4-NEXT:    dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:416>
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    daddiu $2, $2, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:416>
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS4-NEXT:    dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:416>
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS4-NEXT:    # <MCOperand Reg:301>>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS4-NEXT:    sw $1, %lo(c)($2) # <MCInst #{{[0-9]+}} SW
-; MIPS4-NEXT:    # <MCOperand Reg:1>
-; MIPS4-NEXT:    # <MCOperand Reg:416>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%lo(c))>>
 ;
 ; MIPS64R6-LABEL: f3:
 ; MIPS64R6:       # %bb.0:
 ; MIPS64R6-NEXT:    sll $1, $4, 0 # <MCInst #{{[0-9]+}} SLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:1>
-; MIPS64R6-NEXT:    # <MCOperand Reg:22>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:0>>
 ; MIPS64R6-NEXT:    lui $2, %highest(c) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(c))>>
 ; MIPS64R6-NEXT:    daddiu $2, $2, %higher(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(c))>>
 ; MIPS64R6-NEXT:    dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $2, $2, %hi(c) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(c))>>
 ; MIPS64R6-NEXT:    dsll $2, $2, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    sw $1, %lo(c)($2) # <MCInst #{{[0-9]+}} SW
-; MIPS64R6-NEXT:    # <MCOperand Reg:1>
-; MIPS64R6-NEXT:    # <MCOperand Reg:416>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(c))>>
   store i32 %a, i32 * @c
   ret void
@@ -355,139 +355,139 @@ define void @f4(i64 %a) {
 ; MIPS32-LABEL: f4:
 ; MIPS32:       # %bb.0:
 ; MIPS32-NEXT:    lui $1, %hi(d) # <MCInst #{{[0-9]+}} LUi
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MIPS32-NEXT:    sw $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32-NEXT:    # <MCOperand Reg:22>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MIPS32-NEXT:    addiu $1, $1, %lo(d) # <MCInst #{{[0-9]+}} ADDiu
-; MIPS32-NEXT:    # <MCOperand Reg:1>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MIPS32-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS32-NEXT:    # <MCOperand Reg:19>>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32-NEXT:    sw $5, 4($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32-NEXT:    # <MCOperand Reg:23>
-; MIPS32-NEXT:    # <MCOperand Reg:1>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32-NEXT:    # <MCOperand Imm:4>>
 ;
 ; MMR3-LABEL: f4:
 ; MMR3:       # %bb.0:
 ; MMR3-NEXT:    lui $1, %hi(d) # <MCInst #{{[0-9]+}} LUi
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MMR3-NEXT:    sw $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SW_MM
-; MMR3-NEXT:    # <MCOperand Reg:22>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MMR3-NEXT:    addiu $2, $1, %lo(d) # <MCInst #{{[0-9]+}} ADDiu
-; MMR3-NEXT:    # <MCOperand Reg:321>
-; MMR3-NEXT:    # <MCOperand Reg:1>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MMR3-NEXT:    sw16 $5, 4($2) # <MCInst #{{[0-9]+}} SW16_MM
-; MMR3-NEXT:    # <MCOperand Reg:23>
-; MMR3-NEXT:    # <MCOperand Reg:321>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR3-NEXT:    # <MCOperand Imm:4>>
 ; MMR3-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR3-NEXT:    # <MCOperand Reg:19>>
+; MMR3-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS32R6-LABEL: f4:
 ; MIPS32R6:       # %bb.0:
 ; MIPS32R6-NEXT:    lui $1, %hi(d) # <MCInst #{{[0-9]+}} LUi
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MIPS32R6-NEXT:    sw $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32R6-NEXT:    # <MCOperand Reg:22>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo(d) # <MCInst #{{[0-9]+}} ADDiu
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MIPS32R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR
-; MIPS32R6-NEXT:    # <MCOperand Reg:21>
-; MIPS32R6-NEXT:    # <MCOperand Reg:19>>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS32R6-NEXT:    sw $5, 4($1) # <MCInst #{{[0-9]+}} SW
-; MIPS32R6-NEXT:    # <MCOperand Reg:23>
-; MIPS32R6-NEXT:    # <MCOperand Reg:1>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS32R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS32R6-NEXT:    # <MCOperand Imm:4>>
 ;
 ; MMR6-LABEL: f4:
 ; MMR6:       # %bb.0:
 ; MMR6-NEXT:    lui $1, %hi(d) # <MCInst #{{[0-9]+}} LUi
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MMR6-NEXT:    sw $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SW_MM
-; MMR6-NEXT:    # <MCOperand Reg:22>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MMR6-NEXT:    addiu $2, $1, %lo(d) # <MCInst #{{[0-9]+}} ADDiu
-; MMR6-NEXT:    # <MCOperand Reg:321>
-; MMR6-NEXT:    # <MCOperand Reg:1>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ; MMR6-NEXT:    sw16 $5, 4($2) # <MCInst #{{[0-9]+}} SW16_MM
-; MMR6-NEXT:    # <MCOperand Reg:23>
-; MMR6-NEXT:    # <MCOperand Reg:321>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR6-NEXT:    # <MCOperand Imm:4>>
 ; MMR6-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR6-NEXT:    # <MCOperand Reg:19>>
+; MMR6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MIPS4-LABEL: f4:
 ; MIPS4:       # %bb.0:
 ; MIPS4-NEXT:    lui $1, %highest(d) # <MCInst #{{[0-9]+}} LUi64
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%highest(d))>>
 ; MIPS4-NEXT:    daddiu $1, $1, %higher(d) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%higher(d))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    daddiu $1, $1, %hi(d) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MIPS4-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS4-NEXT:    # <MCOperand Reg:30>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Imm:16>>
 ; MIPS4-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR
-; MIPS4-NEXT:    # <MCOperand Reg:301>>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS4-NEXT:    sd $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SD
-; MIPS4-NEXT:    # <MCOperand Reg:356>
-; MIPS4-NEXT:    # <MCOperand Reg:30>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS4-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS4-NEXT:    # <MCOperand Expr:(%lo(d))>>
 ;
 ; MIPS64R6-LABEL: f4:
 ; MIPS64R6:       # %bb.0:
 ; MIPS64R6-NEXT:    lui $1, %highest(d) # <MCInst #{{[0-9]+}} LUi64
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%highest(d))>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(d) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%higher(d))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(d) # <MCInst #{{[0-9]+}} DADDiu
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%hi(d))>>
 ; MIPS64R6-NEXT:    dsll $1, $1, 16 # <MCInst #{{[0-9]+}} DSLL
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Imm:16>>
 ; MIPS64R6-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JALR64
-; MIPS64R6-NEXT:    # <MCOperand Reg:355>
-; MIPS64R6-NEXT:    # <MCOperand Reg:301>>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MIPS64R6-NEXT:    sd $4, %lo(d)($1) # <MCInst #{{[0-9]+}} SD
-; MIPS64R6-NEXT:    # <MCOperand Reg:356>
-; MIPS64R6-NEXT:    # <MCOperand Reg:30>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MIPS64R6-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MIPS64R6-NEXT:    # <MCOperand Expr:(%lo(d))>>
   store i64 %a, i64 * @d
   ret void
diff --git a/llvm/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll b/llvm/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll
index 3f86bd24f34ff..faf37e8a020e3 100644
--- a/llvm/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll
+++ b/llvm/test/CodeGen/Mips/micromips-pseudo-mtlohi-expand.ll
@@ -8,52 +8,52 @@ define i64 @test(i32 signext %a, i32 signext %b) {
 ; MMR2-LABEL: test:
 ; MMR2:       # %bb.0: # %entry
 ; MMR2-NEXT:    li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR2-NEXT:    # <MCOperand Reg:321>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-NEXT:    # <MCOperand Imm:0>>
 ; MMR2-NEXT:    li16 $3, 1 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR2-NEXT:    # <MCOperand Reg:322>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-NEXT:    # <MCOperand Imm:1>>
 ; MMR2-NEXT:    mtlo $3 # <MCInst #{{[0-9]+}} MTLO_MM
-; MMR2-NEXT:    # <MCOperand Reg:322>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-NEXT:    mthi $2 # <MCInst #{{[0-9]+}} MTHI_MM
-; MMR2-NEXT:    # <MCOperand Reg:321>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-NEXT:    madd $4, $5 # <MCInst #{{[0-9]+}} MADD
-; MMR2-NEXT:    # <MCOperand Reg:22>
-; MMR2-NEXT:    # <MCOperand Reg:23>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-NEXT:    mflo16 $2 # <MCInst #{{[0-9]+}} MFLO16_MM
-; MMR2-NEXT:    # <MCOperand Reg:321>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-NEXT:    mfhi16 $3 # <MCInst #{{[0-9]+}} MFHI16_MM
-; MMR2-NEXT:    # <MCOperand Reg:322>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-NEXT:    jrc $ra # <MCInst #{{[0-9]+}} JRC16_MM
-; MMR2-NEXT:    # <MCOperand Reg:19>>
+; MMR2-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ;
 ; MMR2-DSP-LABEL: test:
 ; MMR2-DSP:       # %bb.0: # %entry
 ; MMR2-DSP-NEXT:    li16 $2, 0 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR2-DSP-NEXT:    # <MCOperand Reg:321>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-DSP-NEXT:    # <MCOperand Imm:0>>
 ; MMR2-DSP-NEXT:    li16 $3, 1 # <MCInst #{{[0-9]+}} LI16_MM
-; MMR2-DSP-NEXT:    # <MCOperand Reg:322>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
 ; MMR2-DSP-NEXT:    # <MCOperand Imm:1>>
 ; MMR2-DSP-NEXT:    mtlo $3, $ac0 # <MCInst #{{[0-9]+}} MTLO_DSP
-; MMR2-DSP-NEXT:    # <MCOperand Reg:291>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:322>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-DSP-NEXT:    mthi $2, $ac0 # <MCInst #{{[0-9]+}} MTHI_DSP
-; MMR2-DSP-NEXT:    # <MCOperand Reg:253>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:321>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-DSP-NEXT:    madd $ac0, $4, $5 # <MCInst #{{[0-9]+}} MADD_DSP
-; MMR2-DSP-NEXT:    # <MCOperand Reg:26>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:22>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:23>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:26>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-DSP-NEXT:    mflo $2, $ac0 # <MCInst #{{[0-9]+}} MFLO_DSP
-; MMR2-DSP-NEXT:    # <MCOperand Reg:321>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:26>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-DSP-NEXT:    jr $ra # <MCInst #{{[0-9]+}} JR_MM
-; MMR2-DSP-NEXT:    # <MCOperand Reg:19>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 ; MMR2-DSP-NEXT:    mfhi $3, $ac0 # <MCInst #{{[0-9]+}} MFHI_DSP
-; MMR2-DSP-NEXT:    # <MCOperand Reg:322>
-; MMR2-DSP-NEXT:    # <MCOperand Reg:26>>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>
+; MMR2-DSP-NEXT:    # <MCOperand Reg:{{[0-9]+}}>>
 entry:
   %conv = sext i32 %a to i64
   %conv1 = sext i32 %b to i64

From c7f0b33fa5b0929288265891f874b646e23b4685 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 29 May 2019 14:59:32 +0000
Subject: [PATCH 0495/1176] [mips] Check argument for __builtin_msa_ctcmsa /
 __builtin_msa_cfcmsa

The `__builtin_msa_ctcmsa` and `__builtin_msa_cfcmsa` builtins are mapped
to the `ctcmsa` and `cfcmsa` instructions respectively. While MSA
control registers have indexes in 0..7 range, the instructions accept
register index in 0..31 range [1].

[1] MIPS Architecture for Programmers Volume IV-j:
    The MIPS64 SIMD Architecture Module
https://www.mips.com/?do-download=the-mips64-simd-architecture-module

llvm-svn: 361967
---
 clang/lib/Sema/SemaChecking.cpp              | 2 ++
 clang/test/CodeGen/builtins-mips-msa-error.c | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f3d8f30567d3e..2159a20c2c1fb 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3034,6 +3034,8 @@ bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   // These intrinsics take an unsigned 5 bit immediate.
   // The first block of intrinsics actually have an unsigned 5 bit field,
   // not a df/n field.
+  case Mips::BI__builtin_msa_cfcmsa:
+  case Mips::BI__builtin_msa_ctcmsa: i = 0; l = 0; u = 31; break;
   case Mips::BI__builtin_msa_clei_u_b:
   case Mips::BI__builtin_msa_clei_u_h:
   case Mips::BI__builtin_msa_clei_u_w:
diff --git a/clang/test/CodeGen/builtins-mips-msa-error.c b/clang/test/CodeGen/builtins-mips-msa-error.c
index 0454a19b1cd2b..11ddb08d9f4e9 100644
--- a/clang/test/CodeGen/builtins-mips-msa-error.c
+++ b/clang/test/CodeGen/builtins-mips-msa-error.c
@@ -77,6 +77,8 @@ void test(void) {
   v4i32_r = __msa_ceqi_w(v4i32_a, 16);               // expected-error {{argument value 16 is outside the valid range [-16, 15]}}
   v2i64_r = __msa_ceqi_d(v2i64_a, 16);               // expected-error {{argument value 16 is outside the valid range [-16, 15]}}
 
+  int_r = __msa_cfcmsa(32);                          // expected-error {{argument value 32 is outside the valid range [0, 31]}}
+
   v16i8_r = __msa_clei_s_b(v16i8_a, 16);             // expected-error {{argument value 16 is outside the valid range [-16, 15]}}
   v8i16_r = __msa_clei_s_h(v8i16_a, 16);             // expected-error {{argument value 16 is outside the valid range [-16, 15]}}
   v4i32_r = __msa_clei_s_w(v4i32_a, 16);             // expected-error {{argument value 16 is outside the valid range [-16, 15]}}
@@ -107,6 +109,8 @@ void test(void) {
   int_r = __msa_copy_u_w(v4u32_a, 4);                // expected-error {{argument value 4 is outside the valid range [0, 3]}}
   ll_r  = __msa_copy_u_d(v2i64_a, 2);                // expected-error {{argument value 2 is outside the valid range [0, 1]}}
 
+  __builtin_msa_ctcmsa(32, 777);                     // expected-error {{argument value 32 is outside the valid range [0, 31]}}
+
   v16i8_r = __msa_insve_b(v16i8_r, 16, v16i8_a);     // expected-error {{argument value 16 is outside the valid range [0, 15]}}
   v8i16_r = __msa_insve_h(v8i16_r, 8, v8i16_a);      // expected-error {{argument value 8 is outside the valid range [0, 7]}}
   v4i32_r = __msa_insve_w(v4i32_r, 4, v4i32_a);      // expected-error {{argument value 4 is outside the valid range [0, 3]}}

From 4f09d9fcfa2839d0a02d7d5c8402380d91d6caca Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 29 May 2019 15:02:16 +0000
Subject: [PATCH 0496/1176] Qualify use of llvm::empty that's ambiguous with
 std::empty

llvm-svn: 361968
---
 llvm/lib/CodeGen/MachineModuleInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 8e03b59607dbf..aadcd7319799b 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -209,7 +209,7 @@ bool MachineModuleInfo::doInitialization(Module &M) {
   HasSplitStack = HasNosplitStack = false;
   AddrLabelSymbols = nullptr;
   TheModule = &M;
-  DbgInfoAvailable = !empty(M.debug_compile_units());
+  DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
   return false;
 }
 

From c450874cb847b2c27ecc8776d39db2640627c4d0 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 29 May 2019 15:10:19 +0000
Subject: [PATCH 0497/1176] [clangd] Map typedefs to the same LSP completion
 kind as VSCode

For consistency and, more importantly, to get a nicer icon for those in VSCode.

llvm-svn: 361969
---
 clang-tools-extra/clangd/CodeComplete.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 4328a647f4bbe..2186bdee44865 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -92,8 +92,10 @@ CompletionItemKind toCompletionItemKind(index::SymbolKind Kind) {
   case SK::Extension:
   case SK::Union:
     return CompletionItemKind::Class;
-  // FIXME(ioeric): figure out whether reference is the right type for aliases.
   case SK::TypeAlias:
+    // We use the same kind as the VSCode C++ extension.
+    // FIXME: pick a better option when we have one.
+    return CompletionItemKind::Interface;
   case SK::Using:
     return CompletionItemKind::Reference;
   case SK::Function:
@@ -481,7 +483,8 @@ struct CodeCompletionBuilder {
         return EmptyArgs ? "()" : "($0)";
       return *Snippet; // Not an arg snippet?
     }
-    if (Completion.Kind == CompletionItemKind::Reference ||
+    // 'CompletionItemKind::Interface' matches template type aliases.
+    if (Completion.Kind == CompletionItemKind::Interface ||
         Completion.Kind == CompletionItemKind::Class) {
       if (Snippet->front() != '<')
         return *Snippet; // Not an arg snippet?

From 6b03a1b42352e38fafff67c6d8d685144b8f9a4c Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Wed, 29 May 2019 15:17:55 +0000
Subject: [PATCH 0498/1176] Add additional constraints on midpoint(pointer,
 pointer). Fixes PR#42037.

llvm-svn: 361970
---
 libcxx/include/numeric                            |  7 +++++--
 .../numeric.ops.midpoint/midpoint.fail.cpp        | 15 ++++++++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/numeric b/libcxx/include/numeric
index 4b08611290d56..62cc29cbd6a79 100644
--- a/libcxx/include/numeric
+++ b/libcxx/include/numeric
@@ -527,7 +527,7 @@ lcm(_Tp __m, _Up __n)
 #if _LIBCPP_STD_VER > 17
 template <class _Tp>
 _LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp>, _Tp>
+enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp>
 midpoint(_Tp __a, _Tp __b) noexcept
 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 {
@@ -548,7 +548,10 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 
 template <class _TPtr>
 _LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_pointer_v<_TPtr>, _TPtr>
+enable_if_t<is_pointer_v<_TPtr> 
+             && is_object_v<remove_pointer_t<_TPtr>> 
+             && ! is_void_v<remove_pointer_t<_TPtr>> 
+             && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr>
 midpoint(_TPtr __a, _TPtr __b) noexcept
 {
     return __a + _VSTD::midpoint(ptrdiff_t(0), __b - __a);
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.fail.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.fail.cpp
index 2352ce788be45..c7576108af568 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.fail.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.fail.cpp
@@ -18,13 +18,22 @@
 
 #include "test_macros.h"
 
+int func1 () { return 1; }
+int func2 () { return 2; }
+
+struct Incomplete;
+Incomplete *ip = nullptr;
+void       *vp = nullptr;
+
 int main(int, char**)
 {
-    (void) std::midpoint(false, true); // expected-error {{no matching function for call to 'midpoint'}}
+    (void) std::midpoint(false, true);  // expected-error {{no matching function for call to 'midpoint'}}
 
 //  A couple of odd pointer types that should fail
-    (void) std::midpoint(nullptr, nullptr);     // expected-error {{no matching function for call to 'midpoint'}}
-    (void) std::midpoint((void *)0, (void *)0); // expected-error@numeric:* {{arithmetic on pointers to void}}
+    (void) std::midpoint(nullptr, nullptr);  // expected-error {{no matching function for call to 'midpoint'}}
+    (void) std::midpoint(func1, func2);      // expected-error {{no matching function for call to 'midpoint'}}
+    (void) std::midpoint(ip, ip);            // expected-error {{no matching function for call to 'midpoint'}}
+    (void) std::midpoint(vp, vp);            // expected-error {{no matching function for call to 'midpoint'}}
     
     return 0;
 }

From 4ebbc4d73aac1f55bc17a3766218315a3144ee3a Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 29 May 2019 15:21:28 +0000
Subject: [PATCH 0499/1176] [NFC][InstCombine] Add unary FNeg tests to fsub.ll
 known-never-nan.ll

llvm-svn: 361971
---
 llvm/test/Transforms/InstCombine/fsub.ll      | 127 ++++++++++++++++++
 .../Transforms/InstCombine/known-never-nan.ll |  11 ++
 2 files changed, 138 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
index 189bc4ca850f5..33f994da29b7f 100644
--- a/llvm/test/Transforms/InstCombine/fsub.ll
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -27,6 +27,18 @@ define float @neg_sub_nsz(float %x, float %y) {
   ret float %t2
 }
 
+; FIXME: This combine isn't working.
+define float @unary_neg_sub_nsz(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_sub_nsz(
+; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fneg nsz float [[T1]]
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fneg nsz float %t1
+  ret float %t2
+}
+
 ; If the subtract has another use, we don't do the transform (even though it
 ; doesn't increase the IR instruction count) because we assume that fneg is
 ; easier to analyze and generally cheaper than generic fsub.
@@ -47,6 +59,19 @@ define float @neg_sub_nsz_extra_use(float %x, float %y) {
   ret float %t2
 }
 
+define float @unary_neg_sub_nsz_extra_use(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_sub_nsz_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fneg nsz float [[T1]]
+; CHECK-NEXT:    call void @use(float [[T1]])
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fneg nsz float %t1
+  call void @use(float %t1)
+  ret float %t2
+}
+
 ; With nsz: Z - (X - Y) --> Z + (Y - X)
 
 define float @sub_sub_nsz(float %x, float %y, float %z) {
@@ -140,6 +165,16 @@ define float @neg_op1(float %x, float %y) {
   ret float %r
 }
 
+define float @unary_neg_op1(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_op1(
+; CHECK-NEXT:    [[R:%.*]] = fadd float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %negy = fneg float %y
+  %r = fsub float %x, %negy
+  ret float %r
+}
+
 define <2 x float> @neg_op1_vec(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_op1_vec(
 ; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -150,6 +185,16 @@ define <2 x float> @neg_op1_vec(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %r
 }
 
+define <2 x float> @unary_neg_op1_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_op1_vec(
+; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negy = fneg <2 x float> %y
+  %r = fsub <2 x float> %x, %negy
+  ret <2 x float> %r
+}
+
 define <2 x float> @neg_op1_vec_undef(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_op1_vec_undef(
 ; CHECK-NEXT:    [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -174,6 +219,18 @@ define double @neg_ext_op1(float %a, double %b) {
   ret double %t3
 }
 
+define double @unary_neg_ext_op1(float %a, double %b) {
+; CHECK-LABEL: @unary_neg_ext_op1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[T3:%.*]] = fadd double [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fneg float %a
+  %t2 = fpext float %t1 to double
+  %t3 = fsub double %b, %t2
+  ret double %t3
+}
+
 ; Verify that vectors work too.
 
 define <2 x float> @neg_trunc_op1(<2 x double> %a, <2 x float> %b) {
@@ -188,6 +245,18 @@ define <2 x float> @neg_trunc_op1(<2 x double> %a, <2 x float> %b) {
   ret <2 x float> %t3
 }
 
+define <2 x float> @unary_neg_trunc_op1(<2 x double> %a, <2 x float> %b) {
+; CHECK-LABEL: @unary_neg_trunc_op1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x double> [[A:%.*]] to <2 x float>
+; CHECK-NEXT:    [[T3:%.*]] = fadd <2 x float> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[T3]]
+;
+  %t1 = fneg <2 x double> %a
+  %t2 = fptrunc <2 x double> %t1 to <2 x float>
+  %t3 = fsub <2 x float> %b, %t2
+  ret <2 x float> %t3
+}
+
 ; No FMF needed, but they should propagate to the fadd.
 
 define double @neg_ext_op1_fast(float %a, double %b) {
@@ -202,6 +271,18 @@ define double @neg_ext_op1_fast(float %a, double %b) {
   ret double %t3
 }
 
+define double @unary_neg_ext_op1_fast(float %a, double %b) {
+; CHECK-LABEL: @unary_neg_ext_op1_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[A:%.*]] to double
+; CHECK-NEXT:    [[T3:%.*]] = fadd fast double [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    ret double [[T3]]
+;
+  %t1 = fneg float %a
+  %t2 = fpext float %t1 to double
+  %t3 = fsub fast double %b, %t2
+  ret double %t3
+}
+
 ; Extra use should prevent the transform.
 
 define float @neg_ext_op1_extra_use(half %a, float %b) {
@@ -219,6 +300,21 @@ define float @neg_ext_op1_extra_use(half %a, float %b) {
   ret float %t3
 }
 
+define float @unary_neg_ext_op1_extra_use(half %a, float %b) {
+; CHECK-LABEL: @unary_neg_ext_op1_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fneg half [[A:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fpext half [[T1]] to float
+; CHECK-NEXT:    [[T3:%.*]] = fsub float [[B:%.*]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fneg half %a
+  %t2 = fpext half %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use(float %t2)
+  ret float %t3
+}
+
 ; One-use fptrunc is always hoisted above fneg, so the corresponding
 ; multi-use bug for fptrunc isn't visible with a fold starting from
 ; the last fsub.
@@ -238,6 +334,22 @@ define float @neg_trunc_op1_extra_use(double %a, float %b) {
   ret float %t3
 }
 
+; FIXME: This combine isn't working.
+define float @unary_neg_trunc_op1_extra_use(double %a, float %b) {
+; CHECK-LABEL: @unary_neg_trunc_op1_extra_use(
+; CHECK-NEXT:    [[T1:%.*]] = fneg double [[A:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc double [[T1]] to float
+; CHECK-NEXT:    [[T3:%.*]] = fsub float [[B:%.*]], [[T2]]
+; CHECK-NEXT:    call void @use(float [[T2]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fneg double %a
+  %t2 = fptrunc double %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use(float %t2)
+  ret float %t3
+}
+
 ; Extra uses should prevent the transform.
 
 define float @neg_trunc_op1_extra_uses(double %a, float %b) {
@@ -255,6 +367,21 @@ define float @neg_trunc_op1_extra_uses(double %a, float %b) {
   ret float %t3
 }
 
+define float @unary_neg_trunc_op1_extra_uses(double %a, float %b) {
+; CHECK-LABEL: @unary_neg_trunc_op1_extra_uses(
+; CHECK-NEXT:    [[T1:%.*]] = fneg double [[A:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc double [[T1]] to float
+; CHECK-NEXT:    [[T3:%.*]] = fsub float [[B:%.*]], [[T2]]
+; CHECK-NEXT:    call void @use2(float [[T2]], double [[T1]])
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fneg double %a
+  %t2 = fptrunc double %t1 to float
+  %t3 = fsub float %b, %t2
+  call void @use2(float %t2, double %t1)
+  ret float %t3
+}
+
 ; Don't negate a constant expression to form fadd and induce infinite looping:
 ; https://bugs.llvm.org/show_bug.cgi?id=37605
 
diff --git a/llvm/test/Transforms/InstCombine/known-never-nan.ll b/llvm/test/Transforms/InstCombine/known-never-nan.ll
index 23a0780fe4390..c5fdc8e800cc6 100644
--- a/llvm/test/Transforms/InstCombine/known-never-nan.ll
+++ b/llvm/test/Transforms/InstCombine/known-never-nan.ll
@@ -130,6 +130,17 @@ define i1 @nnan_fneg() {
   ret i1 %tmp
 }
 
+define i1 @nnan_unary_fneg() {
+; CHECK-LABEL: @nnan_unary_fneg(
+; CHECK-NEXT:    [[NNAN:%.*]] = call nnan double @func()
+; CHECK-NEXT:    ret i1 true
+;
+  %nnan = call nnan double @func()
+  %op = fneg double %nnan
+  %tmp = fcmp ord double %op, %op
+  ret i1 %tmp
+}
+
 define i1 @fpext_maybe_nan(float %arg0) {
 ; CHECK-LABEL: @fpext_maybe_nan(
 ; CHECK-NEXT:    [[TMP:%.*]] = fcmp ord float [[ARG0:%.*]], 0.000000e+00

From 124cba0b8153a9e8f0f32bdd523f877055d9fe72 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 15:25:19 +0000
Subject: [PATCH 0500/1176] [analyzer] print() JSONify: Store implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61912

llvm-svn: 361972
---
 clang/include/clang/Basic/JsonSupport.h       | 27 ++++++
 .../Core/PathSensitive/ProgramState.h         | 10 ++-
 .../StaticAnalyzer/Core/PathSensitive/Store.h |  3 +-
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  | 20 ++---
 clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 82 ++++++++++++-------
 clang/test/Analysis/expr-inspection.c         | 28 +++++--
 6 files changed, 117 insertions(+), 53 deletions(-)
 create mode 100644 clang/include/clang/Basic/JsonSupport.h

diff --git a/clang/include/clang/Basic/JsonSupport.h b/clang/include/clang/Basic/JsonSupport.h
new file mode 100644
index 0000000000000..43fb48a3c7759
--- /dev/null
+++ b/clang/include/clang/Basic/JsonSupport.h
@@ -0,0 +1,27 @@
+//===- JsonSupport.h - JSON Output Utilities --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_JSONSUPPORT_H
+#define LLVM_CLANG_BASIC_JSONSUPPORT_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/Support/raw_ostream.h"
+
+
+namespace clang {
+
+inline raw_ostream &Indent(raw_ostream &Out, const unsigned int Space,
+                           bool IsDot) {
+  for (unsigned int I = 0; I < Space * 2; ++I)
+    Out << (IsDot ? "&nbsp;" : " ");
+  return Out;
+}
+
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_JSONSUPPORT_H
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index 2c6465d5fb13f..4092f40747858 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -424,10 +424,12 @@ class ProgramState : public llvm::FoldingSetNode {
   }
 
   // Pretty-printing.
-  void print(raw_ostream &Out, const char *nl = "\n", const char *sep = "",
-             const LocationContext *CurrentLC = nullptr) const;
-  void printDOT(raw_ostream &Out,
-                const LocationContext *CurrentLC = nullptr) const;
+  void printJson(raw_ostream &Out, const LocationContext *LCtx = nullptr,
+                 const char *NL = "\n", const char *Sep = "",
+                 unsigned int Space = 0, bool IsDot = false) const;
+
+  void printDOT(raw_ostream &Out, const LocationContext *LCtx = nullptr,
+                unsigned int Space = 0) const;
 
   void dump() const;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
index 177368332967f..cbff299539449 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
@@ -253,7 +253,8 @@ class StoreManager {
   virtual bool scanReachableSymbols(Store S, const MemRegion *R,
                                     ScanReachableSymbols &Visitor) = 0;
 
-  virtual void print(Store store, raw_ostream &Out, const char* nl) = 0;
+  virtual void printJson(raw_ostream &Out, Store S, const char *NL,
+                         unsigned int Space, bool IsDot) const = 0;
 
   class BindingsHandler {
   public:
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 04ed507055819..f5c7af3b1c287 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -440,16 +440,16 @@ void ProgramState::setStore(const StoreRef &newStore) {
 //  State pretty-printing.
 //===----------------------------------------------------------------------===//
 
-void ProgramState::print(raw_ostream &Out,
-                         const char *NL, const char *Sep,
-                         const LocationContext *LC) const {
+void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
+                             const char *NL, const char *Sep,
+                             unsigned int Space, bool IsDot) const {
   // Print the store.
   ProgramStateManager &Mgr = getStateManager();
   const ASTContext &Context = getStateManager().getContext();
-  Mgr.getStoreManager().print(getStore(), Out, NL);
+  Mgr.getStoreManager().printJson(Out, getStore(), NL, Space, IsDot);
 
   // Print out the environment.
-  Env.print(Out, NL, Sep, Context, LC);
+  Env.print(Out, NL, Sep, Context, LCtx);
 
   // Print out the constraints.
   Mgr.getConstraintManager().print(this, Out, NL, Sep);
@@ -458,16 +458,16 @@ void ProgramState::print(raw_ostream &Out,
   printDynamicTypeInfo(this, Out, NL, Sep);
 
   // Print checker-specific data.
-  Mgr.getOwningEngine().printState(Out, this, NL, Sep, LC);
+  Mgr.getOwningEngine().printState(Out, this, NL, Sep, LCtx);
 }
 
-void ProgramState::printDOT(raw_ostream &Out,
-                            const LocationContext *LC) const {
-  print(Out, "\\l", "\\|", LC);
+void ProgramState::printDOT(raw_ostream &Out, const LocationContext *LCtx,
+                            unsigned int Space) const {
+  printJson(Out, LCtx, "\\l", "\\|", Space, /*IsDot=*/true);
 }
 
 LLVM_DUMP_METHOD void ProgramState::dump() const {
-  print(llvm::errs());
+  printJson(llvm::errs());
 }
 
 AnalysisManager& ProgramState::getAnalysisManager() const {
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 603be35bdba05..53d0cf54d703f 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -19,6 +19,7 @@
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
@@ -120,21 +121,21 @@ BindingKey BindingKey::Make(const MemRegion *R, Kind k) {
 }
 
 namespace llvm {
-  static inline
-  raw_ostream &operator<<(raw_ostream &os, BindingKey K) {
-    os << '(' << K.getRegion();
-    if (!K.hasSymbolicOffset())
-      os << ',' << K.getOffset();
-    os << ',' << (K.isDirect() ? "direct" : "default")
-       << ')';
-    return os;
-  }
+static inline raw_ostream &operator<<(raw_ostream &Out, BindingKey K) {
+  Out << "\"kind\": \"" << (K.isDirect() ? "Direct" : "Default")
+      << "\", \"offset\": ";
+
+  if (!K.hasSymbolicOffset())
+    Out << K.getOffset();
+  else
+    Out << "null";
 
-} // end llvm namespace
+  return Out;
+}
+
+} // namespace llvm
 
-#ifndef NDEBUG
 LLVM_DUMP_METHOD void BindingKey::dump() const { llvm::errs() << *this; }
-#endif
 
 //===----------------------------------------------------------------------===//
 // Actual Store type.
@@ -206,18 +207,31 @@ class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,
     return asImmutableMap().getRootWithoutRetain();
   }
 
-  void dump(raw_ostream &OS, const char *nl) const {
-   for (iterator I = begin(), E = end(); I != E; ++I) {
-     const ClusterBindings &Cluster = I.getData();
-     for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end();
-          CI != CE; ++CI) {
-       OS << ' ' << CI.getKey() << " : " << CI.getData() << nl;
-     }
-     OS << nl;
-   }
+  void printJson(raw_ostream &Out, const char *NL = "\n",
+                 unsigned int Space = 0, bool IsDot = false) const {
+    for (iterator I = begin(); I != end(); ++I) {
+      Indent(Out, Space, IsDot)
+          << "{ \"cluster\": \"" << I.getKey() << "\", \"items\": [" << NL;
+
+      ++Space;
+      const ClusterBindings &CB = I.getData();
+      for (ClusterBindings::iterator CI = CB.begin(); CI != CB.end(); ++CI) {
+        Indent(Out, Space, IsDot) << "{ " << CI.getKey() << ", \"value\": \""
+                                  << CI.getData() << "\" }";
+        if (std::next(CI) != CB.end())
+          Out << ',';
+        Out << NL;
+      }
+
+      --Space;
+      Indent(Out, Space, IsDot) << "]}";
+      if (std::next(I) != end())
+        Out << ',';
+      Out << NL;
+    }
   }
 
-  LLVM_DUMP_METHOD void dump() const { dump(llvm::errs(), "\n"); }
+  LLVM_DUMP_METHOD void dump() const { printJson(llvm::errs()); }
 };
 } // end anonymous namespace
 
@@ -594,7 +608,8 @@ class RegionStoreManager : public StoreManager {
                              RBFactory.getTreeFactory());
   }
 
-  void print(Store store, raw_ostream &Out, const char* nl) override;
+  void printJson(raw_ostream &Out, Store S, const char *NL = "\n",
+                 unsigned int Space = 0, bool IsDot = false) const override;
 
   void iterBindings(Store store, BindingsHandler& f) override {
     RegionBindingsRef B = getRegionBindings(store);
@@ -2611,11 +2626,18 @@ StoreRef RegionStoreManager::removeDeadBindings(Store store,
 // Utility methods.
 //===----------------------------------------------------------------------===//
 
-void RegionStoreManager::print(Store store, raw_ostream &OS,
-                               const char* nl) {
-  RegionBindingsRef B = getRegionBindings(store);
-  OS << "Store (direct and default bindings), "
-     << B.asStore()
-     << " :" << nl;
-  B.dump(OS, nl);
+void RegionStoreManager::printJson(raw_ostream &Out, Store S, const char *NL,
+                                   unsigned int Space, bool IsDot) const {
+  RegionBindingsRef Bindings = getRegionBindings(S);
+
+  Indent(Out, Space, IsDot) << "\"store\": ";
+
+  if (Bindings.isEmpty()) {
+    Out << "null," << NL;
+    return;
+  }
+
+  Out << '[' << NL;
+  Bindings.printJson(Out, NL, ++Space, IsDot);
+  Indent(Out, --Space, IsDot) << "]," << NL;
 }
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index 8419542889329..33ff460d79bae 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -1,4 +1,6 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -verify %s 2>&1 | FileCheck %s
+// RUN: %clang_analyze_cc1 \
+// RUN:  -analyzer-checker=debug.ExprInspection \
+// RUN:  -verify %s 2>&1 | FileCheck %s
 
 // Self-tests for the debug.ExprInspection checker.
 
@@ -10,16 +12,26 @@ void foo(int x) {
   clang_analyzer_dump(x); // expected-warning{{reg_$0<int x>}}
   clang_analyzer_dump(x + (-1)); // expected-warning{{(reg_$0<int x>) + -1}}
   int y = 1;
-  clang_analyzer_printState();
-  for (; y < 3; ++y)
+  for (; y < 3; ++y) {
     clang_analyzer_numTimesReached(); // expected-warning{{2}}
+
+    if (y == 2) {
+      int z = x > 13;
+      if (!z)
+        clang_analyzer_printState();
+    }
+  }
 }
 
-// CHECK: Store (direct and default bindings)
-// CHECK-NEXT: (y,0,direct) : 1 S32b
+// CHECK:      "store": [
+// CHECK-NEXT:   { "cluster": "y", "items": [
+// CHECK-NEXT:     { "kind": "Direct", "offset": 0, "value": "2 S32b" }
+// CHECK-NEXT:   ]}
+// CHECK-NEXT: ]
 
-// CHECK: Expressions by stack frame:
+// CHECK:      Expressions by stack frame:
 // CHECK-NEXT: #0 Calling foo
-// CHECK-NEXT: clang_analyzer_printState : &code{clang_analyzer_printState}
+// CHECK-NEXT: (LC1, S847) clang_analyzer_printState : &code{clang_analyzer_printState}
 
-// CHECK: {{(Ranges are empty.)|(Constraints:[[:space:]]*$)}}
+// CHECK:      Ranges of symbol values:
+// CHECK-NEXT:  reg_$0<int x> : { [-2147483648, 13] }

From 28f384a7c71b42d33da7c43ab29853d50603295b Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 29 May 2019 15:29:35 +0000
Subject: [PATCH 0501/1176] [NFC][InstCombine] Add unary FNeg tests to
 fpcast.ll and fpextend.ll

llvm-svn: 361973
---
 llvm/test/Transforms/InstCombine/fpcast.ll   | 36 ++++++++++++++++++++
 llvm/test/Transforms/InstCombine/fpextend.ll | 16 +++++++++
 2 files changed, 52 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index bfc1de4ff6ddd..f0f5848ad0033 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -40,6 +40,18 @@ define half @fneg_fptrunc(float %a) {
   ret half %c
 }
 
+; FIXME: This combine isn't working.
+define half @unary_fneg_fptrunc(float %a) {
+; CHECK-LABEL: @unary_fneg_fptrunc(
+; CHECK-NEXT:    [[B:%.*]] = fneg float [[A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fptrunc float [[B]] to half
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fneg float %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
 define <2 x half> @fneg_fptrunc_vec_undef(<2 x float> %a) {
 ; CHECK-LABEL: @fneg_fptrunc_vec_undef(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
@@ -51,6 +63,18 @@ define <2 x half> @fneg_fptrunc_vec_undef(<2 x float> %a) {
   ret <2 x half> %c
 }
 
+; FIXME: This combine isn't working.
+define <2 x half> @unary_fneg_fptrunc_vec(<2 x float> %a) {
+; CHECK-LABEL: @unary_fneg_fptrunc_vec(
+; CHECK-NEXT:    [[B:%.*]] = fneg <2 x float> [[A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fptrunc <2 x float> [[B]] to <2 x half>
+; CHECK-NEXT:    ret <2 x half> [[C]]
+;
+  %b = fneg <2 x float> %a
+  %c = fptrunc <2 x float> %b to <2 x half>
+  ret <2 x half> %c
+}
+
 define half @test4-fast(float %a) {
 ; CHECK-LABEL: @test4-fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
@@ -62,6 +86,18 @@ define half @test4-fast(float %a) {
   ret half %c
 }
 
+; FIXME: This combine isn't working.
+define half @test4_unary_fneg-fast(float %a) {
+; CHECK-LABEL: @test4_unary_fneg-fast(
+; CHECK-NEXT:    [[B:%.*]] = fneg fast float [[A:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fptrunc float [[B]] to half
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fneg fast float %a
+  %c = fptrunc float %b to half
+  ret half %c
+}
+
 define half @test5(float %a, float %b, float %c) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[D:%.*]] = fcmp ogt float [[A:%.*]], [[B:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/fpextend.ll b/llvm/test/Transforms/InstCombine/fpextend.ll
index 88401504f570f..f1e7076f3af6f 100644
--- a/llvm/test/Transforms/InstCombine/fpextend.ll
+++ b/llvm/test/Transforms/InstCombine/fpextend.ll
@@ -55,6 +55,22 @@ entry:
   ret float %tmp34
 }
 
+; FIXME: This combine isn't working.
+define float @test4_unary_fneg(float %x) nounwind  {
+; CHECK-LABEL: @test4_unary_fneg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = fpext float [[X:%.*]] to double
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg double [[TMP1]]
+; CHECK-NEXT:    [[TMP34:%.*]] = fptrunc double [[TMP2]] to float
+; CHECK-NEXT:    ret float [[TMP34]]
+;
+entry:
+  %tmp1 = fpext float %x to double
+  %tmp2 = fneg double %tmp1
+  %tmp34 = fptrunc double %tmp2 to float
+  ret float %tmp34
+}
+
 ; Test with vector splat constant
 define <2 x float> @test5(<2 x float> %x) nounwind  {
 ; CHECK-LABEL: @test5(

From 2fa31880752a0b0af08ec144e5392b7ef5939f6b Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 29 May 2019 15:32:17 +0000
Subject: [PATCH 0502/1176] [CodeComplete] Add semicolon when completing
 patterns for 'static_assert' and 'typedef

This is a trivial follow-up to r360042, which added semicolons to other
pattern completions, so sending without review.

llvm-svn: 361974
---
 clang/lib/Sema/SemaCodeComplete.cpp              |  2 ++
 clang/test/CodeCompletion/keywords.cpp           |  2 +-
 .../test/CodeCompletion/ordinary-name-cxx11.cpp  | 16 ++++++++--------
 clang/test/CodeCompletion/ordinary-name.cpp      |  8 ++++----
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 4f474032f6066..1a30573f3566d 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1713,6 +1713,7 @@ static void AddTypedefResult(ResultBuilder &Results) {
   Builder.AddPlaceholderChunk("type");
   Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
   Builder.AddPlaceholderChunk("name");
+  Builder.AddChunk(CodeCompletionString::CK_SemiColon);
   Results.AddResult(CodeCompletionResult(Builder.TakeString()));
 }
 
@@ -1826,6 +1827,7 @@ static void AddStaticAssertResult(CodeCompletionBuilder &Builder,
   Builder.AddChunk(CodeCompletionString::CK_Comma);
   Builder.AddPlaceholderChunk("message");
   Builder.AddChunk(CodeCompletionString::CK_RightParen);
+  Builder.AddChunk(CodeCompletionString::CK_SemiColon);
   Results.AddResult(CodeCompletionResult(Builder.TakeString()));
 }
 
diff --git a/clang/test/CodeCompletion/keywords.cpp b/clang/test/CodeCompletion/keywords.cpp
index 6e5824c2f1833..977407052beaa 100644
--- a/clang/test/CodeCompletion/keywords.cpp
+++ b/clang/test/CodeCompletion/keywords.cpp
@@ -34,7 +34,7 @@ struct Struct {
 // RUN: %clang_cc1 -std=c++11 -code-completion-at=%s:11:1 %s | FileCheck --check-prefix=CHECK-TOP-LEVEL %s
 // CHECK-TOP-LEVEL: alignas(<#expression#>)
 // CHECK-TOP-LEVEL: constexpr
-// CHECK-TOP-LEVEL: static_assert(<#expression#>, <#message#>)
+// CHECK-TOP-LEVEL: static_assert(<#expression#>, <#message#>);
 // CHECK-TOP-LEVEL: thread_local
 // CHECK-TOP-LEVEL-NOT: final
 // CHECK-TOP-LEVEL-NOT: noexcept
diff --git a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
index f78265b5b1ba9..f955c421f1618 100644
--- a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
+++ b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
@@ -43,7 +43,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#size_t#]sizeof(<#expression-or-type#>)
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#size_t#]sizeof...(<#parameter-pack#>)
   // CHECK-CC1-NEXT: COMPLETION: static
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-CC1-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-CC1-NEXT: COMPLETION: struct
   // CHECK-CC1-NEXT: COMPLETION: Pattern : switch(<#condition#>){
@@ -55,7 +55,7 @@ void foo() {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typeof <#expression#>
@@ -99,14 +99,14 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: short
   // CHECK-CC2-NEXT: COMPLETION: signed
   // CHECK-CC2-NEXT: COMPLETION: static
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>)
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-CC2-NEXT: COMPLETION: struct
   // CHECK-CC2-NEXT: COMPLETION: t : t
   // CHECK-CC2-NEXT: COMPLETION: Pattern : template <#declaration#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : template<<#parameters#>>
   // CHECK-CC2-NEXT: COMPLETION: thread_local
   // CHECK-CC2-NEXT: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof <#expression#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof(<#type#>)
@@ -145,11 +145,11 @@ void foo() {
   // CHECK-CC3-NEXT: COMPLETION: short
   // CHECK-CC3-NEXT: COMPLETION: signed
   // CHECK-CC3-NEXT: COMPLETION: static
-  // CHECK-CC3-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>)
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-CC3-NEXT: COMPLETION: struct
   // CHECK-CC3-NEXT: COMPLETION: Pattern : template<<#parameters#>>
   // CHECK-CC3-NEXT: COMPLETION: thread_local
-  // CHECK-CC3-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof <#expression#>
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof(<#type#>)
@@ -248,7 +248,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#size_t#]sizeof(<#expression-or-type#>)
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#size_t#]sizeof...(<#parameter-pack#>)
   // CHECK-NO-RTTI-NEXT: COMPLETION: static
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI-NEXT: COMPLETION: struct
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch(<#condition#>){
@@ -257,7 +257,7 @@ void foo() {
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#bool#]true
   // CHECK-NO-RTTI-NOT: try
   // CHECK-NO-RTTI: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-NO-RTTI-NOT: typeid
   // CHECK-NO-RTTI: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typeof <#expression#>
diff --git a/clang/test/CodeCompletion/ordinary-name.cpp b/clang/test/CodeCompletion/ordinary-name.cpp
index 1ddd6e1bcccb0..5c700461a664b 100644
--- a/clang/test/CodeCompletion/ordinary-name.cpp
+++ b/clang/test/CodeCompletion/ordinary-name.cpp
@@ -52,7 +52,7 @@ void foo() {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : typeof <#expression#>
@@ -96,7 +96,7 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: Pattern : template <#declaration#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : template<<#parameters#>>
   // CHECK-CC2-NEXT: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof <#expression#>
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof(<#type#>)
@@ -133,7 +133,7 @@ void foo() {
   // CHECK-CC3-NEXT: COMPLETION: static
   // CHECK-CC3-NEXT: COMPLETION: struct
   // CHECK-CC3-NEXT: COMPLETION: Pattern : template<<#parameters#>>
-  // CHECK-CC3-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof <#expression#>
   // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof(<#type#>)
@@ -225,7 +225,7 @@ void foo() {
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#bool#]true
   // CHECK-NO-RTTI-NOT: try
   // CHECK-NO-RTTI: COMPLETION: TYPEDEF : TYPEDEF
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>;
   // CHECK-NO-RTTI-NOT: typeid
   // CHECK-NO-RTTI: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typeof <#expression#>

From d506b0a4843f04ea5e7d36dbb595e31025b60b9a Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 29 May 2019 15:36:42 +0000
Subject: [PATCH 0503/1176] [WebAssembly] Fix signatures of undefined function
 in LTO object which are not called directly.

We recently added special handling for function that are not called
directly but failed to add testing for the LTO case.

See https://reviews.llvm.org/D62153

Differential Revision: https://reviews.llvm.org/D62561

llvm-svn: 361975
---
 lld/test/wasm/lto/undef.ll | 20 ++++++++++++++++++--
 lld/wasm/SymbolTable.cpp   | 18 ++++++++++--------
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/lld/test/wasm/lto/undef.ll b/lld/test/wasm/lto/undef.ll
index 729007b50c059..65e8e4642d5e0 100644
--- a/lld/test/wasm/lto/undef.ll
+++ b/lld/test/wasm/lto/undef.ll
@@ -5,10 +5,22 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-declare void @bar()
+declare i32 @bar()
+
+; Symbols such as foo which are only called indirectly are handled slightly
+; differently with resepect to signature checking.
+declare i32 @foo()
+
+@ptr = global i8* bitcast (i32 ()* @foo to i8*), align 8
+; Ensure access to ptr is not inlined below, even under LTO
+@llvm.used = appending global [1 x i8**] [i8** @ptr], section "llvm.metadata"
 
 define void @_start() {
-  call void @bar()
+  call i32 @bar()
+
+  %addr = load i32 ()*, i32 ()** bitcast (i8** @ptr to i32 ()**), align 8
+  call i32 %addr()
+
   ret void
 }
 
@@ -18,3 +30,7 @@ define void @_start() {
 ; CHECK-NEXT:        Field:           bar
 ; CHECK-NEXT:        Kind:            FUNCTION
 ; CHECK-NEXT:        SigIndex:        0
+; CHECK-NEXT:      - Module:          env
+; CHECK-NEXT:        Field:           foo
+; CHECK-NEXT:        Kind:            FUNCTION
+; CHECK-NEXT:        SigIndex:        0
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index ae424749ffc63..dec59cb98c71d 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -136,14 +136,13 @@ static void reportTypeError(const Symbol *Existing, const InputFile *File,
 // mismatch.
 static bool signatureMatches(FunctionSymbol *Existing,
                              const WasmSignature *NewSig) {
-  if (!NewSig)
-    return true;
-
   const WasmSignature *OldSig = Existing->Signature;
-  if (!OldSig) {
-    Existing->Signature = NewSig;
+
+  // If either function is missing a signature (this happend for bitcode
+  // symbols) then assume they match.  Any mismatch will be reported later
+  // when the LTO objects are added.
+  if (!NewSig || !OldSig)
     return true;
-  }
 
   return *NewSig == *OldSig;
 }
@@ -390,8 +389,9 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
                                           uint32_t Flags, InputFile *File,
                                           const WasmSignature *Sig,
                                           bool IsCalledDirectly) {
-  LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name <<
-             " [" << (Sig ? toString(*Sig) : "none") << "]\n");
+  LLVM_DEBUG(dbgs() << "addUndefinedFunction: " << Name << " ["
+                    << (Sig ? toString(*Sig) : "none")
+                    << "] IsCalledDirectly:" << IsCalledDirectly << "\n");
 
   Symbol *S;
   bool WasInserted;
@@ -414,6 +414,8 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef Name, StringRef ImportName,
       reportTypeError(S, File, WASM_SYMBOL_TYPE_FUNCTION);
       return S;
     }
+    if (!ExistingFunction->Signature && Sig)
+      ExistingFunction->Signature = Sig;
     if (IsCalledDirectly && !signatureMatches(ExistingFunction, Sig))
       if (getFunctionVariant(S, Sig, File, &S))
         Replace();

From 9ce37466043ee2f6ea8ed074f947c528ab0be37d Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 15:36:58 +0000
Subject: [PATCH 0504/1176] [analyzer] print() JSONify: Environment
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62081

llvm-svn: 361976
---
 .../clang/Analysis/AnalysisDeclContext.h      | 12 ++-
 .../Core/PathSensitive/Environment.h          |  6 +-
 .../Core/PathSensitive/ExprEngine.h           |  6 +-
 .../Core/PathSensitive/SubEngine.h            |  4 +-
 clang/lib/Analysis/AnalysisDeclContext.cpp    | 95 +++++++++++++++----
 clang/lib/StaticAnalyzer/Core/Environment.cpp | 87 +++++++++++++----
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  | 10 +-
 .../Core/PrettyStackTraceLocationContext.h    |  6 +-
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  |  4 +-
 clang/test/Analysis/expr-inspection.c         | 11 ++-
 10 files changed, 177 insertions(+), 64 deletions(-)

diff --git a/clang/include/clang/Analysis/AnalysisDeclContext.h b/clang/include/clang/Analysis/AnalysisDeclContext.h
index 86f331d26a9bb..1961d571e9e12 100644
--- a/clang/include/clang/Analysis/AnalysisDeclContext.h
+++ b/clang/include/clang/Analysis/AnalysisDeclContext.h
@@ -274,11 +274,17 @@ class LocationContext : public llvm::FoldingSetNode {
   virtual void Profile(llvm::FoldingSetNodeID &ID) = 0;
 
   void dumpStack(
-      raw_ostream &OS, StringRef Indent = {}, const char *NL = "\n",
-      const char *Sep = "",
+      raw_ostream &Out, const char *NL = "\n",
       std::function<void(const LocationContext *)> printMoreInfoPerContext =
           [](const LocationContext *) {}) const;
-  void dumpStack() const;
+
+  void printJson(
+      raw_ostream &Out, const char *NL = "\n", unsigned int Space = 0,
+      bool IsDot = false,
+      std::function<void(const LocationContext *)> printMoreInfoPerContext =
+          [](const LocationContext *) {}) const;
+
+  void dump() const;
 
 public:
   static void ProfileCommon(llvm::FoldingSetNodeID &ID,
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
index 6fc589b838299..498e36e1431fa 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Environment.h
@@ -91,9 +91,9 @@ class Environment {
     return ExprBindings == RHS.ExprBindings;
   }
 
-  void print(raw_ostream &Out, const char *NL, const char *Sep,
-             const ASTContext &Context,
-             const LocationContext *WithLC = nullptr) const;
+  void printJson(raw_ostream &Out, const ASTContext &Ctx,
+                 const LocationContext *LCtx = nullptr, const char *NL = "\n",
+                 unsigned int Space = 0, bool IsDot = false) const;
 };
 
 class EnvironmentManager {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 8bc599a96a596..22866b35a676c 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -377,9 +377,9 @@ class ExprEngine : public SubEngine {
                        const CallEvent *Call) override;
 
   /// printState - Called by ProgramStateManager to print checker-specific data.
-  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
-                  const char *Sep,
-                  const LocationContext *LCtx = nullptr) override;
+  void printState(raw_ostream &Out, ProgramStateRef State,
+                  const LocationContext *LCtx, const char *NL,
+                  unsigned int Space, bool IsDot) const override;
 
   ProgramStateManager &getStateManager() override { return StateMgr; }
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
index 9296e17ca0e7b..28b2f30c06621 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
@@ -160,8 +160,8 @@ class SubEngine {
 
   /// printState - Called by ProgramStateManager to print checker-specific data.
   virtual void printState(raw_ostream &Out, ProgramStateRef State,
-                          const char *NL, const char *Sep,
-                          const LocationContext *LCtx = nullptr) = 0;
+                          const LocationContext *LCtx, const char *NL,
+                          unsigned int Space, bool IsDot) const = 0;
 
   /// Called by CoreEngine when the analysis worklist is either empty or the
   //  maximum number of analysis steps have been reached.
diff --git a/clang/lib/Analysis/AnalysisDeclContext.cpp b/clang/lib/Analysis/AnalysisDeclContext.cpp
index 750d9bb1202ae..ba1f8375124ea 100644
--- a/clang/lib/Analysis/AnalysisDeclContext.cpp
+++ b/clang/lib/Analysis/AnalysisDeclContext.cpp
@@ -30,6 +30,7 @@
 #include "clang/Analysis/CFG.h"
 #include "clang/Analysis/CFGStmtMap.h"
 #include "clang/Analysis/Support/BumpVector.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
@@ -463,17 +464,17 @@ bool LocationContext::isParentOf(const LocationContext *LC) const {
   return false;
 }
 
-static void printLocation(raw_ostream &OS, const SourceManager &SM,
-                          SourceLocation SLoc) {
-  if (SLoc.isFileID() && SM.isInMainFile(SLoc))
-    OS << "line " << SM.getExpansionLineNumber(SLoc);
+static void printLocation(raw_ostream &Out, const SourceManager &SM,
+                          SourceLocation Loc) {
+  if (Loc.isFileID() && SM.isInMainFile(Loc))
+    Out << SM.getExpansionLineNumber(Loc);
   else
-    SLoc.print(OS, SM);
+    Loc.print(Out, SM);
 }
 
-void LocationContext::dumpStack(
-    raw_ostream &OS, StringRef Indent, const char *NL, const char *Sep,
-    std::function<void(const LocationContext *)> printMoreInfoPerContext) const {
+void LocationContext::dumpStack(raw_ostream &Out, const char *NL,
+                                std::function<void(const LocationContext *)>
+                                    printMoreInfoPerContext) const {
   ASTContext &Ctx = getAnalysisDeclContext()->getASTContext();
   PrintingPolicy PP(Ctx.getLangOpts());
   PP.TerseOutput = 1;
@@ -485,38 +486,92 @@ void LocationContext::dumpStack(
   for (const LocationContext *LCtx = this; LCtx; LCtx = LCtx->getParent()) {
     switch (LCtx->getKind()) {
     case StackFrame:
-      OS << Indent << '#' << Frame << ' ';
+      Out << "\t#" << Frame << ' ';
       ++Frame;
       if (const auto *D = dyn_cast<NamedDecl>(LCtx->getDecl()))
-        OS << "Calling " << D->getQualifiedNameAsString();
+        Out << "Calling " << D->getQualifiedNameAsString();
       else
-        OS << "Calling anonymous code";
+        Out << "Calling anonymous code";
       if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) {
-        OS << " at ";
-        printLocation(OS, SM, S->getBeginLoc());
+        Out << " at line ";
+        printLocation(Out, SM, S->getBeginLoc());
       }
       break;
     case Scope:
-      OS << "Entering scope";
+      Out << "Entering scope";
       break;
     case Block:
-      OS << "Invoking block";
+      Out << "Invoking block";
       if (const Decl *D = cast<BlockInvocationContext>(LCtx)->getDecl()) {
-        OS << " defined at ";
-        printLocation(OS, SM, D->getBeginLoc());
+        Out << " defined at line ";
+        printLocation(Out, SM, D->getBeginLoc());
       }
       break;
     }
-    OS << NL;
+    Out << NL;
 
     printMoreInfoPerContext(LCtx);
   }
 }
 
-LLVM_DUMP_METHOD void LocationContext::dumpStack() const {
-  dumpStack(llvm::errs());
+void LocationContext::printJson(raw_ostream &Out, const char *NL,
+                                unsigned int Space, bool IsDot,
+                                std::function<void(const LocationContext *)>
+                                    printMoreInfoPerContext) const {
+  ASTContext &Ctx = getAnalysisDeclContext()->getASTContext();
+  PrintingPolicy PP(Ctx.getLangOpts());
+  PP.TerseOutput = 1;
+
+  const SourceManager &SM =
+      getAnalysisDeclContext()->getASTContext().getSourceManager();
+
+  unsigned Frame = 0;
+  for (const LocationContext *LCtx = this; LCtx; LCtx = LCtx->getParent()) {
+    Indent(Out, Space, IsDot) << "{ \"location_context\": \"";
+    switch (LCtx->getKind()) {
+    case StackFrame:
+      Out << '#' << Frame << " Call\", \"calling\": \"";
+      ++Frame;
+      if (const auto *D = dyn_cast<NamedDecl>(LCtx->getDecl()))
+        Out << D->getQualifiedNameAsString();
+      else
+        Out << "anonymous code";
+
+      Out << "\", \"call_line\": ";
+      if (const Stmt *S = cast<StackFrameContext>(LCtx)->getCallSite()) {
+        Out << '\"';
+        printLocation(Out, SM, S->getBeginLoc());
+	Out << '\"';
+      } else {
+        Out << "null";
+      }
+
+      Out << ", \"items\": ";
+      break;
+    case Scope:
+      Out << "Entering scope\" ";
+      break;
+    case Block:
+      Out << "Invoking block\" ";
+      if (const Decl *D = cast<BlockInvocationContext>(LCtx)->getDecl()) {
+        Out << ", \"decl_line\": ";
+        printLocation(Out, SM, D->getBeginLoc());
+        Out << ' ';
+      }
+      break;
+    }
+
+    printMoreInfoPerContext(LCtx);
+
+    Out << '}';
+    if (LCtx->getParent())
+      Out << ',';
+    Out << NL;
+  }
 }
 
+LLVM_DUMP_METHOD void LocationContext::dump() const { printJson(llvm::errs()); }
+
 //===----------------------------------------------------------------------===//
 // Lazily generated map to query the external variables referenced by a Block.
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp
index 9d888ece177c6..6344bc5a4d6f2 100644
--- a/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -18,6 +18,7 @@
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
@@ -199,43 +200,93 @@ EnvironmentManager::removeDeadBindings(Environment Env,
   return NewEnv;
 }
 
-void Environment::print(raw_ostream &Out, const char *NL,
-                        const char *Sep,
-                        const ASTContext &Context,
-                        const LocationContext *WithLC) const {
-  if (ExprBindings.isEmpty())
+void Environment::printJson(raw_ostream &Out, const ASTContext &Ctx,
+                            const LocationContext *LCtx, const char *NL,
+                            unsigned int Space, bool IsDot) const {
+  Indent(Out, Space, IsDot) << "\"environment\": ";
+  ++Space;
+
+  if (ExprBindings.isEmpty()) {
+    Out << "null," << NL;
     return;
+  }
 
-  if (!WithLC) {
+  if (!LCtx) {
     // Find the freshest location context.
     llvm::SmallPtrSet<const LocationContext *, 16> FoundContexts;
-    for (auto I : *this) {
+    for (const auto &I : *this) {
       const LocationContext *LC = I.first.getLocationContext();
       if (FoundContexts.count(LC) == 0) {
         // This context is fresher than all other contexts so far.
-        WithLC = LC;
+        LCtx = LC;
         for (const LocationContext *LCI = LC; LCI; LCI = LCI->getParent())
           FoundContexts.insert(LCI);
       }
     }
   }
 
-  assert(WithLC);
+  assert(LCtx);
+
+  Out << '[' << NL; // Start of Environment.
+  PrintingPolicy PP = Ctx.getPrintingPolicy();
+
+  LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
+    // LCtx items begin
+    bool HasItem = false;
+    unsigned int InnerSpace = Space + 1;
 
-  PrintingPolicy PP = Context.getPrintingPolicy();
+    llvm::SmallString<256> TempBuf;
+    llvm::raw_svector_ostream TempOut(TempBuf);
 
-  Out << NL << "Expressions by stack frame:" << NL;
-  WithLC->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
-    for (auto I : ExprBindings) {
-      if (I.first.getLocationContext() != LC)
+    // Store the last ExprBinding which we will print.
+    BindingsTy::iterator LastI = ExprBindings.end();
+    for (BindingsTy::iterator I = ExprBindings.begin(); I != ExprBindings.end();
+         ++I) {
+      if (I->first.getLocationContext() != LC)
         continue;
 
-      const Stmt *S = I.first.getStmt();
+      if (!HasItem) {
+        HasItem = true;
+        Out << '[' << NL;
+      }
+
+      const Stmt *S = I->first.getStmt();
       assert(S != nullptr && "Expected non-null Stmt");
 
-      Out << "(LC" << LC->getID() << ", S" << S->getID(Context) << ") ";
-      S->printPretty(Out, /*Helper=*/nullptr, PP);
-      Out << " : " << I.second << NL;
+      LastI = I;
+    }
+
+    for (BindingsTy::iterator I = ExprBindings.begin(); I != ExprBindings.end();
+         ++I) {
+      if (I->first.getLocationContext() != LC)
+        continue;
+
+      const Stmt *S = I->first.getStmt();
+      Indent(Out, InnerSpace, IsDot)
+          << "{ \"lctx_id\": " << LC->getID()
+          << ", \"stmt_id\": " << S->getID(Ctx) << ", \"pretty\": ";
+
+      // See whether the current statement is pretty-printable.
+      S->printPretty(TempOut, /*Helper=*/nullptr, PP);
+      if (!TempBuf.empty()) {
+        Out << '\"' << TempBuf.str().trim() << '\"';
+        TempBuf.clear();
+      } else {
+        Out << "null";
+      }
+
+      Out << ", \"value\": \"" << I->second << "\" }";
+
+      if (I != LastI)
+        Out << ',';
+      Out << NL;
     }
+
+    if (HasItem)
+      Indent(Out, --InnerSpace, IsDot) << ']';
+    else
+      Out << "null ";
   });
+
+  Indent(Out, --Space, IsDot) << "]," << NL; // End of Environment.
 }
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 975af4743927c..a57546b89095b 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -558,19 +558,19 @@ static void printObjectsUnderConstructionForContext(raw_ostream &Out,
 }
 
 void ExprEngine::printState(raw_ostream &Out, ProgramStateRef State,
-                            const char *NL, const char *Sep,
-                            const LocationContext *LCtx) {
+                            const LocationContext *LCtx, const char *NL,
+                            unsigned int Space, bool IsDot) const {
   if (LCtx) {
     if (!State->get<ObjectsUnderConstruction>().isEmpty()) {
-      Out << Sep << "Objects under construction:" << NL;
+      Out << "Objects under construction:" << NL;
 
-      LCtx->dumpStack(Out, "", NL, Sep, [&](const LocationContext *LC) {
+      LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
         printObjectsUnderConstructionForContext(Out, State, NL, LC);
       });
     }
   }
 
-  getCheckerManager().runCheckersForPrintState(Out, State, NL, Sep);
+  getCheckerManager().runCheckersForPrintState(Out, State, NL, "");
 }
 
 void ExprEngine::processEndWorklist() {
diff --git a/clang/lib/StaticAnalyzer/Core/PrettyStackTraceLocationContext.h b/clang/lib/StaticAnalyzer/Core/PrettyStackTraceLocationContext.h
index c79273dca8017..c71ee3bd42861 100644
--- a/clang/lib/StaticAnalyzer/Core/PrettyStackTraceLocationContext.h
+++ b/clang/lib/StaticAnalyzer/Core/PrettyStackTraceLocationContext.h
@@ -32,9 +32,9 @@ class PrettyStackTraceLocationContext : public llvm::PrettyStackTraceEntry {
     assert(LCtx);
   }
 
-  void print(raw_ostream &OS) const override {
-    OS << "While analyzing stack: \n";
-    LCtx->dumpStack(OS, "\t");
+  void print(raw_ostream &Out) const override {
+    Out << "While analyzing stack: \n";
+    LCtx->dumpStack(Out);
   }
 };
 
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index f5c7af3b1c287..911b96db2c993 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -449,7 +449,7 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
   Mgr.getStoreManager().printJson(Out, getStore(), NL, Space, IsDot);
 
   // Print out the environment.
-  Env.print(Out, NL, Sep, Context, LCtx);
+  Env.printJson(Out, Context, LCtx, NL, Space, IsDot);
 
   // Print out the constraints.
   Mgr.getConstraintManager().print(this, Out, NL, Sep);
@@ -458,7 +458,7 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
   printDynamicTypeInfo(this, Out, NL, Sep);
 
   // Print checker-specific data.
-  Mgr.getOwningEngine().printState(Out, this, NL, Sep, LCtx);
+  Mgr.getOwningEngine().printState(Out, this, LCtx, NL, Space, IsDot);
 }
 
 void ProgramState::printDOT(raw_ostream &Out, const LocationContext *LCtx,
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index 33ff460d79bae..fe3191eec9e9e 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -27,11 +27,12 @@ void foo(int x) {
 // CHECK-NEXT:   { "cluster": "y", "items": [
 // CHECK-NEXT:     { "kind": "Direct", "offset": 0, "value": "2 S32b" }
 // CHECK-NEXT:   ]}
-// CHECK-NEXT: ]
-
-// CHECK:      Expressions by stack frame:
-// CHECK-NEXT: #0 Calling foo
-// CHECK-NEXT: (LC1, S847) clang_analyzer_printState : &code{clang_analyzer_printState}
+// CHECK-NEXT: ],
+// CHECK-NEXT: "environment": [
+// CHECK-NEXT:   { "location_context": "#0 Call", "calling": "foo", "call_line": null, "items": [
+// CHECK-NEXT:     { "lctx_id": 1, "stmt_id": 847, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
+// CHECK-NEXT:   ]}
+// CHECK-NEXT: ],
 
 // CHECK:      Ranges of symbol values:
 // CHECK-NEXT:  reg_$0<int x> : { [-2147483648, 13] }

From 56e970d45d021f69e57c351ff1c85cec7c7a03b6 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 29 May 2019 15:41:08 +0000
Subject: [PATCH 0505/1176] [WebAssembly] Move direct call tracking from member
 to local. NFC.

This data structure is only needed temporarily while symbols are being
created.

This is a followup on rL361678.

Differential Revision: https://reviews.llvm.org/D62548

llvm-svn: 361977
---
 lld/wasm/InputFiles.cpp | 13 ++++++++++---
 lld/wasm/InputFiles.h   |  7 -------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index a9d6abff4abf0..e1c4fa7b747fc 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -272,7 +272,14 @@ void ObjFile::parse(bool IgnoreComdats) {
   }
 
   uint32_t SectionIndex = 0;
-  SymbolIsCalledDirectly.resize(WasmObj->getNumberOfSymbols(), false);
+
+  // Bool for each symbol, true if called directly.  This allows us to implement
+  // a weaker form of signature checking where undefined functions that are not
+  // called directly (i.e. only address taken) don't have to match the defined
+  // function's signature.  We cannot do this for directly called functions
+  // because those signatures are checked at validation times.
+  // See https://bugs.llvm.org/show_bug.cgi?id=40412
+  std::vector<bool> IsCalledDirectly(WasmObj->getNumberOfSymbols(), false);
   for (const SectionRef &Sec : WasmObj->sections()) {
     const WasmSection &Section = WasmObj->getWasmSection(Sec);
     // Wasm objects can have at most one code and one data section.
@@ -292,7 +299,7 @@ void ObjFile::parse(bool IgnoreComdats) {
     // directly
     for (const WasmRelocation &Reloc : Section.Relocations)
       if (Reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
-        SymbolIsCalledDirectly[Reloc.Index] = true;
+        IsCalledDirectly[Reloc.Index] = true;
   }
 
   TypeMap.resize(getWasmObj()->types().size());
@@ -342,7 +349,7 @@ void ObjFile::parse(bool IgnoreComdats) {
       }
     }
     size_t Idx = Symbols.size();
-    Symbols.push_back(createUndefined(WasmSym, SymbolIsCalledDirectly[Idx]));
+    Symbols.push_back(createUndefined(WasmSym, IsCalledDirectly[Idx]));
   }
 }
 
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index 64ac208daa6f7..f5b4532fa927c 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -69,13 +69,6 @@ class InputFile {
 
   // List of all symbols referenced or defined by this file.
   std::vector<Symbol *> Symbols;
-  // Bool for each symbol, true if called directly.  This allows us to implement
-  // a weaker form of signature checking where undefined functions that are not
-  // called directly (i.e. only address taken) don't have to match the defined
-  // function's signature.  We cannot do this for directly called functions
-  // because those signatures are checked at validation times.
-  // See https://bugs.llvm.org/show_bug.cgi?id=40412
-  std::vector<bool> SymbolIsCalledDirectly;
 
 private:
   const Kind FileKind;

From 5df5eb8816361d8be84a5f99cda4344dbcb01f87 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 15:43:26 +0000
Subject: [PATCH 0506/1176] [analyzer] print() JSONify: Constraints
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62082

llvm-svn: 361978
---
 .../Core/PathSensitive/ConstraintManager.h    |  7 ++--
 .../Core/PathSensitive/SMTConstraintManager.h | 32 +++++++++++----
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  |  2 +-
 .../Core/RangeConstraintManager.cpp           | 41 ++++++++++++-------
 clang/test/Analysis/expr-inspection.c         |  5 ++-
 5 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
index 5b69299f78319..1baf8c57de86f 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
@@ -162,10 +162,9 @@ class ConstraintManager {
   virtual ProgramStateRef removeDeadBindings(ProgramStateRef state,
                                                  SymbolReaper& SymReaper) = 0;
 
-  virtual void print(ProgramStateRef state,
-                     raw_ostream &Out,
-                     const char* nl,
-                     const char *sep) = 0;
+  virtual void printJson(raw_ostream &Out, ProgramStateRef State,
+                         const char *NL, unsigned int Space,
+                         bool IsDot) const = 0;
 
   virtual void EndPath(ProgramStateRef state) {}
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h
index fe097b92b3ae9..1712501b13bd8 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SMTCONSTRAINTMANAGER_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SMTCONSTRAINTMANAGER_H
 
+#include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h"
 
@@ -208,17 +209,32 @@ class SMTConstraintManager : public clang::ento::SimpleConstraintManager {
     return State->set<ConstraintSMT>(CZ);
   }
 
-  void print(ProgramStateRef St, raw_ostream &OS, const char *nl,
-             const char *sep) override {
+  void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
+                 unsigned int Space = 0, bool IsDot = false) const override {
+    ConstraintSMTType Constraints = State->get<ConstraintSMT>();
 
-    auto CZ = St->get<ConstraintSMT>();
+    Indent(Out, Space, IsDot) << "\"constraints\": ";
+    if (Constraints.isEmpty()) {
+      Out << "null," << NL;
+      return;
+    }
 
-    OS << nl << sep << "Constraints:";
-    for (auto I = CZ.begin(), E = CZ.end(); I != E; ++I) {
-      OS << nl << ' ' << I->first << " : ";
-      I->second->print(OS);
+    ++Space;
+    Out << '[' << NL;
+    for (ConstraintSMTType::iterator I = Constraints.begin();
+         I != Constraints.end(); ++I) {
+      Indent(Out, Space, IsDot)
+          << "{ \"symbol\": \"" << I->first << "\", \"range\": \"";
+      I->second->print(Out);
+      Out << "\" }";
+
+      if (std::next(I) != Constraints.end())
+        Out << ',';
+      Out << NL;
     }
-    OS << nl;
+
+    --Space;
+    Indent(Out, Space, IsDot) << "],";
   }
 
   bool haveEqualConstraints(ProgramStateRef S1,
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 911b96db2c993..090801f1abbf9 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -452,7 +452,7 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
   Env.printJson(Out, Context, LCtx, NL, Space, IsDot);
 
   // Print out the constraints.
-  Mgr.getConstraintManager().print(this, Out, NL, Sep);
+  Mgr.getConstraintManager().printJson(Out, this, NL, Space, IsDot);
 
   // Print out the tracked dynamic types.
   printDynamicTypeInfo(this, Out, NL, Sep);
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 5c3eb0d66a039..64724227395d5 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Basic/JsonSupport.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
@@ -261,8 +262,8 @@ class RangeConstraintManager : public RangedConstraintManager {
   ProgramStateRef removeDeadBindings(ProgramStateRef State,
                                      SymbolReaper &SymReaper) override;
 
-  void print(ProgramStateRef State, raw_ostream &Out, const char *nl,
-             const char *sep) override;
+  void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
+                 unsigned int Space = 0, bool IsDot = false) const override;
 
   //===------------------------------------------------------------------===//
   // Implementation for interface from RangedConstraintManager.
@@ -754,25 +755,35 @@ ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
   return New.isEmpty() ? nullptr : State->set<ConstraintRange>(Sym, New);
 }
 
-//===------------------------------------------------------------------------===
+//===----------------------------------------------------------------------===//
 // Pretty-printing.
-//===------------------------------------------------------------------------===/
-
-void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
-                                   const char *nl, const char *sep) {
+//===----------------------------------------------------------------------===//
 
-  ConstraintRangeTy Ranges = St->get<ConstraintRange>();
+void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
+                                       const char *NL, unsigned int Space,
+                                       bool IsDot) const {
+  ConstraintRangeTy Constraints = State->get<ConstraintRange>();
 
-  if (Ranges.isEmpty()) {
-    Out << nl << sep << "Ranges are empty." << nl;
+  Indent(Out, Space, IsDot) << "\"constraints\": ";
+  if (Constraints.isEmpty()) {
+    Out << "null," << NL;
     return;
   }
 
-  Out << nl << sep << "Ranges of symbol values:";
-  for (ConstraintRangeTy::iterator I = Ranges.begin(), E = Ranges.end(); I != E;
-       ++I) {
-    Out << nl << ' ' << I.getKey() << " : ";
+  ++Space;
+  Out << '[' << NL;
+  for (ConstraintRangeTy::iterator I = Constraints.begin();
+       I != Constraints.end(); ++I) {
+    Indent(Out, Space, IsDot)
+        << "{ \"symbol\": \"" << I.getKey() << "\", \"range\": \"";
     I.getData().print(Out);
+    Out << "\" }";
+
+    if (std::next(I) != Constraints.end())
+      Out << ',';
+    Out << NL;
   }
-  Out << nl;
+
+  --Space;
+  Indent(Out, Space, IsDot) << "]," << NL;
 }
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index fe3191eec9e9e..461252d537268 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -33,6 +33,7 @@ void foo(int x) {
 // CHECK-NEXT:     { "lctx_id": 1, "stmt_id": 847, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
 // CHECK-NEXT:   ]}
 // CHECK-NEXT: ],
+// CHECK-NEXT: "constraints": [
+// CHECK-NEXT:   { "symbol": "reg_$0<int x>", "range": "{ [-2147483648, 13] }" }
+// CHECK-NEXT: ],
 
-// CHECK:      Ranges of symbol values:
-// CHECK-NEXT:  reg_$0<int x> : { [-2147483648, 13] }

From 32981637ce6c025ca0695f768a110c6c98c03e94 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 15:53:12 +0000
Subject: [PATCH 0507/1176] [analyzer] print() JSONify: Type information
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62083

llvm-svn: 361979
---
 .../Core/PathSensitive/DynamicTypeMap.h       | 10 ++---
 .../Checkers/DynamicTypePropagation.cpp       |  4 +-
 .../StaticAnalyzer/Core/DynamicTypeMap.cpp    | 44 ++++++++++++-------
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  |  2 +-
 clang/test/Analysis/expr-inspection.c         |  2 +-
 5 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
index 6608f26b3b380..a84b248720618 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
@@ -29,12 +29,11 @@ class MemRegion;
 /// symbol to its most likely type.
 struct DynamicTypeMap {};
 
-using DynamicTypeMapImpl =
-    llvm::ImmutableMap<const MemRegion *, DynamicTypeInfo>;
+using DynamicTypeMapTy = llvm::ImmutableMap<const MemRegion *, DynamicTypeInfo>;
 
 template <>
 struct ProgramStateTrait<DynamicTypeMap>
-    : public ProgramStatePartialTrait<DynamicTypeMapImpl> {
+    : public ProgramStatePartialTrait<DynamicTypeMapTy> {
   static void *GDMIndex();
 };
 
@@ -54,8 +53,9 @@ inline ProgramStateRef setDynamicTypeInfo(ProgramStateRef State,
                             DynamicTypeInfo(NewTy, CanBeSubClassed));
 }
 
-void printDynamicTypeInfo(ProgramStateRef State, raw_ostream &Out,
-                          const char *NL, const char *Sep);
+void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State,
+                              const char *NL = "\n", unsigned int Space = 0,
+                              bool IsDot = false);
 
 } // namespace ento
 } // namespace clang
diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 1862ffc79d487..916a20e559f86 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -114,8 +114,8 @@ class DynamicTypePropagation:
 void DynamicTypePropagation::checkDeadSymbols(SymbolReaper &SR,
                                               CheckerContext &C) const {
   ProgramStateRef State = C.getState();
-  DynamicTypeMapImpl TypeMap = State->get<DynamicTypeMap>();
-  for (DynamicTypeMapImpl::iterator I = TypeMap.begin(), E = TypeMap.end();
+  DynamicTypeMapTy TypeMap = State->get<DynamicTypeMap>();
+  for (DynamicTypeMapTy::iterator I = TypeMap.begin(), E = TypeMap.end();
        I != E; ++I) {
     if (!SR.isLiveRegion(I->first)) {
       State = State->remove<DynamicTypeMap>(I->first);
diff --git a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
index c5ee8ce4c22e1..22c4cc4a624a7 100644
--- a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
+++ b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
@@ -53,27 +54,38 @@ ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *Reg,
   return NewState;
 }
 
-void printDynamicTypeInfo(ProgramStateRef State, raw_ostream &Out,
-                          const char *NL, const char *Sep) {
-  bool First = true;
-  for (const auto &I : State->get<DynamicTypeMap>()) {
-    if (First) {
-      Out << NL << "Dynamic types of regions:" << NL;
-      First = false;
-    }
-    const MemRegion *MR = I.first;
-    const DynamicTypeInfo &DTI = I.second;
-    Out << MR << " : ";
+void printDynamicTypeInfoJson(raw_ostream &Out, ProgramStateRef State,
+                              const char *NL, unsigned int Space, bool IsDot) {
+  Indent(Out, Space, IsDot) << "\"dynamic_types\": ";
+
+  const DynamicTypeMapTy &DTM = State->get<DynamicTypeMap>();
+  if (DTM.isEmpty()) {
+    Out << "null," << NL;
+    return;
+  }
+
+  ++Space;
+  Out << '[' << NL;
+  for (DynamicTypeMapTy::iterator I = DTM.begin(); I != DTM.end(); ++I) {
+    const MemRegion *MR = I->first;
+    const DynamicTypeInfo &DTI = I->second;
+    Out << "{ \"region\": \"" << MR << "\", \"dyn_type\": ";
     if (DTI.isValid()) {
-      Out << DTI.getType()->getPointeeType().getAsString();
-      if (DTI.canBeASubClass()) {
-        Out << " (or its subclass)";
-      }
+      Out << '\"' << DTI.getType()->getPointeeType().getAsString()
+          << "\" \"sub_classable\": "
+          << (DTI.canBeASubClass() ? "true" : "false");
     } else {
-      Out << "Invalid type info";
+      Out << "null"; // Invalid type info
     }
+    Out << "\" }";
+
+    if (std::next(I) != DTM.end())
+      Out << ',';
     Out << NL;
   }
+
+  --Space;
+  Indent(Out, Space, IsDot) << "]," << NL;
 }
 
 void *ProgramStateTrait<DynamicTypeMap>::GDMIndex() {
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 090801f1abbf9..b661dd1de08e7 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -455,7 +455,7 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
   Mgr.getConstraintManager().printJson(Out, this, NL, Space, IsDot);
 
   // Print out the tracked dynamic types.
-  printDynamicTypeInfo(this, Out, NL, Sep);
+  printDynamicTypeInfoJson(Out, this, NL, Space, IsDot);
 
   // Print checker-specific data.
   Mgr.getOwningEngine().printState(Out, this, LCtx, NL, Space, IsDot);
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index 461252d537268..abfef06b9970d 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -36,4 +36,4 @@ void foo(int x) {
 // CHECK-NEXT: "constraints": [
 // CHECK-NEXT:   { "symbol": "reg_$0<int x>", "range": "{ [-2147483648, 13] }" }
 // CHECK-NEXT: ],
-
+// CHECK-NEXT: "dynamic_types": null,

From 35e54eb31ef2280e1ac3c122d619d10c51379bc3 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 15:58:26 +0000
Subject: [PATCH 0508/1176] [analyzer] print() JSONify: Constructing objects
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62085

llvm-svn: 361980
---
 .../Core/PathSensitive/ExprEngine.h           |   8 +-
 .../Core/PathSensitive/SubEngine.h            |   8 +-
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  | 127 +++++++++++++-----
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  |   2 +-
 clang/test/Analysis/dump_egraph.cpp           |   8 +-
 clang/test/Analysis/expr-inspection.c         |   1 +
 6 files changed, 110 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index 22866b35a676c..6daaa129fc7cb 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -376,10 +376,10 @@ class ExprEngine : public SubEngine {
                        const LocationContext *LCtx,
                        const CallEvent *Call) override;
 
-  /// printState - Called by ProgramStateManager to print checker-specific data.
-  void printState(raw_ostream &Out, ProgramStateRef State,
-                  const LocationContext *LCtx, const char *NL,
-                  unsigned int Space, bool IsDot) const override;
+  /// printJson - Called by ProgramStateManager to print checker-specific data.
+  void printJson(raw_ostream &Out, ProgramStateRef State,
+                 const LocationContext *LCtx, const char *NL,
+                 unsigned int Space, bool IsDot) const override;
 
   ProgramStateManager &getStateManager() override { return StateMgr; }
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
index 28b2f30c06621..7789b431c0a69 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h
@@ -158,10 +158,10 @@ class SubEngine {
                            const CallEvent *Call,
                            RegionAndSymbolInvalidationTraits &HTraits) = 0;
 
-  /// printState - Called by ProgramStateManager to print checker-specific data.
-  virtual void printState(raw_ostream &Out, ProgramStateRef State,
-                          const LocationContext *LCtx, const char *NL,
-                          unsigned int Space, bool IsDot) const = 0;
+  /// printJson - Called by ProgramStateManager to print checker-specific data.
+  virtual void printJson(raw_ostream &Out, ProgramStateRef State,
+                         const LocationContext *LCtx, const char *NL,
+                         unsigned int Space, bool IsDot) const = 0;
 
   /// Called by CoreEngine when the analysis worklist is either empty or the
   //  maximum number of analysis steps have been reached.
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index a57546b89095b..07fc6f7643a83 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -33,6 +33,7 @@
 #include "clang/Analysis/ConstructionContext.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/PrettyStackTrace.h"
@@ -141,21 +142,47 @@ class ConstructedObjectKey {
     return getLocationContext()->getDecl()->getASTContext();
   }
 
-  void print(llvm::raw_ostream &OS, PrinterHelper *Helper, PrintingPolicy &PP) {
-    OS << "(LC" << getLocationContext()->getID() << ',';
-    if (const Stmt *S = getItem().getStmtOrNull())
-      OS << 'S' << S->getID(getASTContext());
+  void printJson(llvm::raw_ostream &Out, PrinterHelper *Helper,
+                 PrintingPolicy &PP) const {
+    const Stmt *S = getItem().getStmtOrNull();
+    const CXXCtorInitializer *I = nullptr;
+    if (!S)
+      I = getItem().getCXXCtorInitializer();
+
+    // IDs
+    Out << "\"lctx_id\": " << getLocationContext()->getID() << ", ";
+
+    if (S)
+      Out << "\"stmt_id\": " << S->getID(getASTContext());
     else
-      OS << 'I' << getItem().getCXXCtorInitializer()->getID(getASTContext());
-    OS << ',' << getItem().getKindAsString();
+      Out << "\"init_id\": " << I->getID(getASTContext());
+
+    // Kind
+    Out << ", \"kind\": \"" << getItem().getKindAsString()
+        << "\", \"argument_index\": ";
+
     if (getItem().getKind() == ConstructionContextItem::ArgumentKind)
-      OS << " #" << getItem().getIndex();
-    OS << ") ";
-    if (const Stmt *S = getItem().getStmtOrNull()) {
-      S->printPretty(OS, Helper, PP);
+      Out << getItem().getIndex() << '\"';
+    else
+      Out << "null";
+
+    // Pretty-print
+    Out << ", \"pretty\": \"";
+
+    if (S) {
+      llvm::SmallString<256> TempBuf;
+      llvm::raw_svector_ostream TempOut(TempBuf);
+
+      // See whether the current statement is pretty-printable.
+      S->printPretty(TempOut, Helper, PP);
+      if (!TempBuf.empty()) {
+        Out << TempBuf.str().trim() << '\"';
+        TempBuf.clear();
+      } else {
+        Out << "null";
+      }
     } else {
-      const CXXCtorInitializer *I = getItem().getCXXCtorInitializer();
-      OS << I->getAnyMember()->getNameAsString();
+      Out << I->getAnyMember()->getNameAsString() << '\"';
     }
   }
 
@@ -541,33 +568,69 @@ ExprEngine::processRegionChanges(ProgramStateRef state,
                                                          LCtx, Call);
 }
 
-static void printObjectsUnderConstructionForContext(raw_ostream &Out,
-                                                    ProgramStateRef State,
-                                                    const char *NL,
-                                                    const LocationContext *LC) {
+static void
+printObjectsUnderConstructionJson(raw_ostream &Out, ProgramStateRef State,
+                                  const char *NL, const LocationContext *LCtx,
+                                  unsigned int Space = 0, bool IsDot = false) {
   PrintingPolicy PP =
-      LC->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
-  for (auto I : State->get<ObjectsUnderConstruction>()) {
-    ConstructedObjectKey Key = I.first;
+      LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();
+
+  ++Space;
+  bool HasItem = false;
+
+  // Store the last key.
+  const ConstructedObjectKey *LastKey = nullptr;
+  for (const auto &I : State->get<ObjectsUnderConstruction>()) {
+    const ConstructedObjectKey &Key = I.first;
+    if (Key.getLocationContext() != LCtx)
+      continue;
+
+    if (!HasItem) {
+      Out << "[" << NL;
+      HasItem = true;
+    }
+
+    LastKey = &Key;
+  }
+
+  for (const auto &I : State->get<ObjectsUnderConstruction>()) {
+    const ConstructedObjectKey &Key = I.first;
     SVal Value = I.second;
-    if (Key.getLocationContext() != LC)
+    if (Key.getLocationContext() != LCtx)
       continue;
-    Key.print(Out, nullptr, PP);
-    Out << " : " << Value << NL;
+
+    Indent(Out, Space, IsDot) << "{ ";
+    Key.printJson(Out, nullptr, PP);
+    Out << ", \"value\": \"" << Value << "\" }";
+
+    if (&Key != LastKey)
+      Out << ',';
+    Out << NL;
+  }
+
+  if (HasItem)
+    Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
+  else {
+    Out << "null ";
   }
 }
 
-void ExprEngine::printState(raw_ostream &Out, ProgramStateRef State,
-                            const LocationContext *LCtx, const char *NL,
-                            unsigned int Space, bool IsDot) const {
-  if (LCtx) {
-    if (!State->get<ObjectsUnderConstruction>().isEmpty()) {
-      Out << "Objects under construction:" << NL;
+void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State,
+                           const LocationContext *LCtx, const char *NL,
+                           unsigned int Space, bool IsDot) const {
+  Indent(Out, Space, IsDot) << "\"constructing_objects\": ";
 
-      LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
-        printObjectsUnderConstructionForContext(Out, State, NL, LC);
-      });
-    }
+  if (LCtx && !State->get<ObjectsUnderConstruction>().isEmpty()) {
+    ++Space;
+    Out << '[' << NL;
+    LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
+      printObjectsUnderConstructionJson(Out, State, NL, LC, Space, IsDot);
+    });
+
+    --Space;
+    Indent(Out, Space, IsDot) << "]," << NL; // End of "constructing_objects".
+  } else {
+    Out << "null," << NL;
   }
 
   getCheckerManager().runCheckersForPrintState(Out, State, NL, "");
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index b661dd1de08e7..2f5c33ee6c222 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -458,7 +458,7 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
   printDynamicTypeInfoJson(Out, this, NL, Space, IsDot);
 
   // Print checker-specific data.
-  Mgr.getOwningEngine().printState(Out, this, LCtx, NL, Space, IsDot);
+  Mgr.getOwningEngine().printJson(Out, this, LCtx, NL, Space, IsDot);
 }
 
 void ProgramState::printDOT(raw_ostream &Out, const LocationContext *LCtx,
diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index 10e33a7523f54..9b05e4a6ba5dc 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -16,7 +16,9 @@ void foo() {
   T t;
 }
 
-// CHECK: (LC1,S{{[0-9]*}},construct into local variable) T t;\n : &t
-// CHECK: (LC2,I{{[0-9]*}},construct into member variable) s : &t-\>s
-// CHECK: conj_$5\{int, LC3, no stmt, #1\}
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\"
+
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
+
+// CHECK: \"store\": [\l&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index abfef06b9970d..cf9c9f82d0a05 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -37,3 +37,4 @@ void foo(int x) {
 // CHECK-NEXT:   { "symbol": "reg_$0<int x>", "range": "{ [-2147483648, 13] }" }
 // CHECK-NEXT: ],
 // CHECK-NEXT: "dynamic_types": null,
+// CHECK-NEXT: "constructing_objects": null,

From a2a1ec27d0e799725cdd41f8456deb9a49433e2d Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Wed, 29 May 2019 16:01:36 +0000
Subject: [PATCH 0509/1176] [NFC][libcxx] Remove trailing whitespace

It's incredibly annoying when trying to create diffs

llvm-svn: 361981
---
 libcxx/include/chrono           | 16 ++++++++--------
 libcxx/include/complex          |  4 ++--
 libcxx/include/forward_list     |  6 +++---
 libcxx/include/future           | 10 +++++-----
 libcxx/include/initializer_list |  2 +-
 libcxx/include/iomanip          | 10 +++++-----
 libcxx/include/ios              | 30 +++++++++++++++---------------
 libcxx/include/iterator         | 30 +++++++++++++++---------------
 libcxx/include/locale           |  6 +++---
 libcxx/include/numeric          |  8 ++++----
 libcxx/include/queue            | 20 ++++++++++----------
 libcxx/include/random           | 16 ++++++++--------
 libcxx/include/regex            | 20 ++++++++++----------
 libcxx/include/stack            | 10 +++++-----
 libcxx/include/string           | 16 ++++++++--------
 libcxx/include/string_view      | 16 ++++++++--------
 libcxx/include/unordered_map    | 16 ++++++++--------
 libcxx/include/unordered_set    | 12 ++++++------
 18 files changed, 124 insertions(+), 124 deletions(-)

diff --git a/libcxx/include/chrono b/libcxx/include/chrono
index 0ee8c32d73433..1b907571aa14a 100644
--- a/libcxx/include/chrono
+++ b/libcxx/include/chrono
@@ -1934,7 +1934,7 @@ bool operator!=(const weekday_last& __lhs, const weekday_last& __rhs) noexcept
 inline constexpr
 weekday_indexed weekday::operator[](unsigned __index) const noexcept { return weekday_indexed{*this, __index}; }
 
-inline constexpr 
+inline constexpr
 weekday_last    weekday::operator[](last_spec) const noexcept { return weekday_last{*this}; }
 
 
@@ -2245,7 +2245,7 @@ public:
      year_month_day() = default;
      inline constexpr year_month_day(
             const chrono::year& __yval, const chrono::month& __mval, const chrono::day& __dval) noexcept
-            : __y{__yval}, __m{__mval}, __d{__dval} {}  
+            : __y{__yval}, __m{__mval}, __d{__dval} {}
             constexpr year_month_day(const year_month_day_last& __ymdl) noexcept;
      inline constexpr year_month_day(const sys_days& __sysd) noexcept
             : year_month_day(__from_days(__sysd.time_since_epoch())) {}
@@ -2500,7 +2500,7 @@ inline constexpr year_month_day_last& year_month_day_last::operator+=(const year
 inline constexpr year_month_day_last& year_month_day_last::operator-=(const years& __dy)  noexcept { *this = *this - __dy; return *this; }
 
 inline constexpr year_month_day::year_month_day(const year_month_day_last& __ymdl) noexcept
-    : __y{__ymdl.year()}, __m{__ymdl.month()}, __d{__ymdl.day()} {}  
+    : __y{__ymdl.year()}, __m{__ymdl.month()}, __d{__ymdl.day()} {}
 
 inline constexpr bool year_month_day::ok() const noexcept
 {
@@ -2551,7 +2551,7 @@ year_month_weekday year_month_weekday::__from_days(days __d) noexcept
     const sys_days      __sysd{__d};
     const chrono::weekday __wd = chrono::weekday(__sysd);
     const year_month_day __ymd = year_month_day(__sysd);
-    return year_month_weekday{__ymd.year(), __ymd.month(), 
+    return year_month_weekday{__ymd.year(), __ymd.month(),
                               __wd[(static_cast<unsigned>(__ymd.day())-1)/7+1]};
 }
 
@@ -2643,9 +2643,9 @@ public:
     inline constexpr operator                 sys_days() const noexcept { return   sys_days{__to_days()}; }
     inline explicit constexpr operator      local_days() const noexcept { return local_days{__to_days()}; }
     inline constexpr bool ok() const noexcept { return __y.ok() && __m.ok() && __wdl.ok(); }
-    
+
     constexpr days __to_days() const noexcept;
-    
+
 };
 
 inline constexpr
@@ -2683,7 +2683,7 @@ year_month_weekday_last operator/(const month_weekday_last& __lhs, const year& _
 
 inline constexpr
 year_month_weekday_last operator/(const month_weekday_last& __lhs, int __rhs) noexcept
-{ return year(__rhs) / __lhs; }  
+{ return year(__rhs) / __lhs; }
 
 
 inline constexpr
@@ -2795,7 +2795,7 @@ inline namespace literals
     {
         return chrono::day(static_cast<unsigned>(__d));
     }
- 
+
     constexpr chrono::year operator ""y(unsigned long long __y) noexcept
     {
         return chrono::year(static_cast<int>(__y));
diff --git a/libcxx/include/complex b/libcxx/include/complex
index ff702b4ffc7ce..c168406befbdd 100644
--- a/libcxx/include/complex
+++ b/libcxx/include/complex
@@ -1449,10 +1449,10 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const complex<_Tp>& __x)
     return __os << __s.str();
 }
 
-#if _LIBCPP_STD_VER > 11 
+#if _LIBCPP_STD_VER > 11
 // Literal suffix for complex number literals [complex.literals]
 inline namespace literals
-{ 
+{
   inline namespace complex_literals
   {
     constexpr complex<long double> operator""il(long double __im)
diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index b7ac0ddf2ce3b..54021d0a03e8d 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -530,7 +530,7 @@ public:
 #if _LIBCPP_STD_VER >= 14
         _NOEXCEPT;
 #else
-        _NOEXCEPT_(!__node_traits::propagate_on_container_move_assignment::value || 
+        _NOEXCEPT_(!__node_traits::propagate_on_container_move_assignment::value ||
                     __is_nothrow_swappable<__node_allocator>::value);
 #endif
 protected:
@@ -595,11 +595,11 @@ __forward_list_base<_Tp, _Alloc>::swap(__forward_list_base& __x)
 #if _LIBCPP_STD_VER >= 14
         _NOEXCEPT
 #else
-        _NOEXCEPT_(!__node_traits::propagate_on_container_move_assignment::value || 
+        _NOEXCEPT_(!__node_traits::propagate_on_container_move_assignment::value ||
                     __is_nothrow_swappable<__node_allocator>::value)
 #endif
 {
-    __swap_allocator(__alloc(), __x.__alloc(), 
+    __swap_allocator(__alloc(), __x.__alloc(),
             integral_constant<bool, __node_traits::propagate_on_container_swap::value>());
     using _VSTD::swap;
     swap(__before_begin()->__next_, __x.__before_begin()->__next_);
diff --git a/libcxx/include/future b/libcxx/include/future
index 50bdd2da27812..24396e72ac457 100644
--- a/libcxx/include/future
+++ b/libcxx/include/future
@@ -2015,7 +2015,7 @@ public:
               class = typename enable_if
               <
                   !is_same<
-                      typename __uncvref<_Fp>::type, 
+                      typename __uncvref<_Fp>::type,
                       packaged_task
                       >::value
                   >::type
@@ -2026,7 +2026,7 @@ public:
               class = typename enable_if
               <
                   !is_same<
-                      typename __uncvref<_Fp>::type, 
+                      typename __uncvref<_Fp>::type,
                       packaged_task
                       >::value
                   >::type
@@ -2144,7 +2144,7 @@ public:
               class = typename enable_if
               <
                   !is_same<
-                      typename __uncvref<_Fp>::type, 
+                      typename __uncvref<_Fp>::type,
                       packaged_task
                       >::value
                   >::type
@@ -2155,11 +2155,11 @@ public:
               class = typename enable_if
               <
                   !is_same<
-                      typename __uncvref<_Fp>::type, 
+                      typename __uncvref<_Fp>::type,
                       packaged_task
                       >::value
                   >::type
-              >    
+              >
         _LIBCPP_INLINE_VISIBILITY
         packaged_task(allocator_arg_t, const _Allocator& __a, _Fp&& __f)
              : __f_(allocator_arg, __a, _VSTD::forward<_Fp>(__f)),
diff --git a/libcxx/include/initializer_list b/libcxx/include/initializer_list
index 6c4493b706050..893736f57ead6 100644
--- a/libcxx/include/initializer_list
+++ b/libcxx/include/initializer_list
@@ -82,7 +82,7 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     _LIBCPP_CONSTEXPR_AFTER_CXX11
     size_t    size()  const _NOEXCEPT {return __size_;}
-    
+
     _LIBCPP_INLINE_VISIBILITY
     _LIBCPP_CONSTEXPR_AFTER_CXX11
     const _Ep* begin() const _NOEXCEPT {return __begin_;}
diff --git a/libcxx/include/iomanip b/libcxx/include/iomanip
index 82b7603a348a1..3f78f4d02b3c8 100644
--- a/libcxx/include/iomanip
+++ b/libcxx/include/iomanip
@@ -515,7 +515,7 @@ put_time(const tm* __tm, const _CharT* __fmt)
 
 template <class _CharT, class _Traits, class _ForwardIterator>
 std::basic_ostream<_CharT, _Traits> &
-__quoted_output ( basic_ostream<_CharT, _Traits> &__os, 
+__quoted_output ( basic_ostream<_CharT, _Traits> &__os,
         _ForwardIterator __first, _ForwardIterator __last, _CharT __delim, _CharT __escape )
 {
     _VSTD::basic_string<_CharT, _Traits> __str;
@@ -570,7 +570,7 @@ __quoted_input ( basic_istream<_CharT, _Traits> &__is, _String & __string, _Char
 
 template <class _CharT, class _Traits, class _Iter>
 basic_ostream<_CharT, _Traits>& operator<<(
-         basic_ostream<_CharT, _Traits>& __os, 
+         basic_ostream<_CharT, _Traits>& __os,
          const __quoted_output_proxy<_CharT, _Iter, _Traits> & __proxy)
 {
     return __quoted_output (__os, __proxy.__first, __proxy.__last, __proxy.__delim, __proxy.__escape);
@@ -590,7 +590,7 @@ struct __quoted_proxy
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_INLINE_VISIBILITY
 basic_ostream<_CharT, _Traits>& operator<<(
-        basic_ostream<_CharT, _Traits>& __os, 
+        basic_ostream<_CharT, _Traits>& __os,
         const __quoted_proxy<_CharT, _Traits, _Allocator> & __proxy)
 {
     return __quoted_output (__os, __proxy.__string.cbegin (), __proxy.__string.cend (), __proxy.__delim, __proxy.__escape);
@@ -600,7 +600,7 @@ basic_ostream<_CharT, _Traits>& operator<<(
 template <class _CharT, class _Traits, class _Allocator>
 _LIBCPP_INLINE_VISIBILITY
 basic_istream<_CharT, _Traits>& operator>>(
-        basic_istream<_CharT, _Traits>& __is, 
+        basic_istream<_CharT, _Traits>& __is,
         const __quoted_proxy<_CharT, _Traits, _Allocator> & __proxy)
 {
     return __quoted_input ( __is, __proxy.__string, __proxy.__delim, __proxy.__escape );
@@ -660,7 +660,7 @@ __quoted_output_proxy<_CharT, const _CharT *, _Traits>
 quoted (basic_string_view <_CharT, _Traits> __sv,
              _CharT __delim = _CharT('"'), _CharT __escape=_CharT('\\'))
 {
-    return __quoted_output_proxy<_CharT, const _CharT *, _Traits> 
+    return __quoted_output_proxy<_CharT, const _CharT *, _Traits>
          ( __sv.data(), __sv.data() + __sv.size(), __delim, __escape );
 }
 #endif
diff --git a/libcxx/include/ios b/libcxx/include/ios
index ce4e1769f2fbb..88efefb46ff2b 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -202,8 +202,8 @@ enum class io_errc
 };
 
 concept_map ErrorCodeEnum<io_errc> { };
-error_code make_error_code(io_errc e) noexcept; 
-error_condition make_error_condition(io_errc e) noexcept; 
+error_code make_error_code(io_errc e) noexcept;
+error_condition make_error_condition(io_errc e) noexcept;
 storage-class-specifier const error_category& iostream_category() noexcept;
 
 }  // std
@@ -644,47 +644,47 @@ public:
     virtual ~basic_ios();
 
     // 27.5.4.2 Members:
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     basic_ostream<char_type, traits_type>* tie() const;
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     basic_ostream<char_type, traits_type>* tie(basic_ostream<char_type, traits_type>* __tiestr);
 
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     basic_streambuf<char_type, traits_type>* rdbuf() const;
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     basic_streambuf<char_type, traits_type>* rdbuf(basic_streambuf<char_type, traits_type>* __sb);
 
     basic_ios& copyfmt(const basic_ios& __rhs);
 
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     char_type fill() const;
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     char_type fill(char_type __ch);
 
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     locale imbue(const locale& __loc);
 
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     char narrow(char_type __c, char __dfault) const;
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     char_type widen(char __c) const;
 
 protected:
     _LIBCPP_INLINE_VISIBILITY
     basic_ios() {// purposefully does no initialization
                 }
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     void init(basic_streambuf<char_type, traits_type>* __sb);
 
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     void move(basic_ios& __rhs);
 #ifndef _LIBCPP_CXX03_LANG
     _LIBCPP_INLINE_VISIBILITY
     void move(basic_ios&& __rhs) {move(__rhs);}
 #endif
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     void swap(basic_ios& __rhs) _NOEXCEPT;
-    _LIBCPP_INLINE_VISIBILITY 
+    _LIBCPP_INLINE_VISIBILITY
     void set_rdbuf(basic_streambuf<char_type, traits_type>* __sb);
 private:
     basic_ostream<char_type, traits_type>* __tie_;
diff --git a/libcxx/include/iterator b/libcxx/include/iterator
index 03cfd8305994f..e73b4b713ed0a 100644
--- a/libcxx/include/iterator
+++ b/libcxx/include/iterator
@@ -69,7 +69,7 @@ typename iterator_traits<InputIterator>::difference_type n = 1);
 
 template <class BidirectionalIterator>  // constexpr in C++17
   constexpr BidirectionalIterator prev(BidirectionalIterator x,
-    typename iterator_traits<BidirectionalIterator>::difference_type n = 1);    
+    typename iterator_traits<BidirectionalIterator>::difference_type n = 1);
 
 template <class Iterator>
 class reverse_iterator
@@ -136,7 +136,7 @@ operator-(const reverse_iterator<Iterator1>& x, const reverse_iterator<Iterator2
 
 template <class Iterator>
 constexpr reverse_iterator<Iterator>
-operator+(typename reverse_iterator<Iterator>::difference_type n, 
+operator+(typename reverse_iterator<Iterator>::difference_type n,
           const reverse_iterator<Iterator>& x);   // constexpr in C++17
 
 template <class Iterator>
@@ -216,7 +216,7 @@ public:
     typedef typename iterator_traits<Iterator>::value_type        value_type;
     typedef typename iterator_traits<Iterator>::iterator_category iterator_category;
     typedef value_type&&                                          reference;
- 
+
     constexpr move_iterator();  // all the constexprs are in C++17
     constexpr explicit move_iterator(Iterator i);
     template <class U>
@@ -230,10 +230,10 @@ public:
     constexpr move_iterator operator++(int);
     constexpr move_iterator& operator--();
     constexpr move_iterator operator--(int);
-    constexpr move_iterator operator+(difference_type n) const; 
-    constexpr move_iterator& operator+=(difference_type n); 
-    constexpr move_iterator operator-(difference_type n) const; 
-    constexpr move_iterator& operator-=(difference_type n); 
+    constexpr move_iterator operator+(difference_type n) const;
+    constexpr move_iterator& operator+=(difference_type n);
+    constexpr move_iterator operator-(difference_type n) const;
+    constexpr move_iterator& operator-=(difference_type n);
     constexpr unspecified operator[](difference_type n) const;
 private:
     Iterator current; // exposition only
@@ -270,7 +270,7 @@ operator-(const move_iterator<Iterator1>& x,
 
 template <class Iterator>
 constexpr move_iterator<Iterator> operator+(   // constexpr in C++17
-            typename move_iterator<Iterator>::difference_type n, 
+            typename move_iterator<Iterator>::difference_type n,
             const move_iterator<Iterator>& x);
 
 template <class Iterator>   // constexpr in C++17
@@ -534,8 +534,8 @@ struct __is_random_access_iterator : public __has_iterator_category_convertible_
 
 template <class _Tp>
 struct __is_exactly_input_iterator
-    : public integral_constant<bool, 
-         __has_iterator_category_convertible_to<_Tp, input_iterator_tag>::value && 
+    : public integral_constant<bool,
+         __has_iterator_category_convertible_to<_Tp, input_iterator_tag>::value &&
         !__has_iterator_category_convertible_to<_Tp, forward_iterator_tag>::value> {};
 
 template<class _Category, class _Tp, class _Distance = ptrdiff_t,
@@ -620,7 +620,7 @@ template <class _InputIter>
 inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 typename enable_if
 <
-    __is_input_iterator<_InputIter>::value, 
+    __is_input_iterator<_InputIter>::value,
     _InputIter
 >::type
 next(_InputIter __x,
@@ -637,7 +637,7 @@ template <class _InputIter>
 inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 typename enable_if
 <
-    __is_input_iterator<_InputIter>::value, 
+    __is_input_iterator<_InputIter>::value,
     _InputIter
 >::type
 prev(_InputIter __x,
@@ -1129,7 +1129,7 @@ public:
       _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
       move_iterator(const move_iterator<_Up>& __u) : __i(__u.base()) {}
     _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 _Iter base() const {return __i;}
-    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 
+    _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
     reference operator*() const { return static_cast<reference>(*__i); }
     _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
     pointer  operator->() const { return __i;}
@@ -1649,7 +1649,7 @@ operator+(typename __wrap_iter<_Iter>::difference_type __n,
 template <class _Iter>
 struct __libcpp_is_trivial_iterator
     : public _LIBCPP_BOOL_CONSTANT(is_pointer<_Iter>::value) {};
-    
+
 template <class _Iter>
 struct __libcpp_is_trivial_iterator<move_iterator<_Iter> >
     : public _LIBCPP_BOOL_CONSTANT(__libcpp_is_trivial_iterator<_Iter>::value) {};
@@ -1904,7 +1904,7 @@ template <class _Cont> constexpr
 _LIBCPP_INLINE_VISIBILITY
 auto data(const _Cont& __c)
 _NOEXCEPT_(noexcept(__c.data()))
--> decltype        (__c.data()) 
+-> decltype        (__c.data())
 { return            __c.data(); }
 
 template <class _Tp, size_t _Sz>
diff --git a/libcxx/include/locale b/libcxx/include/locale
index d570331bbb036..3fe44300227a7 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -126,7 +126,7 @@ public:
     wbuffer_convert(const wbuffer_convert&) = delete;               // C++14
     wbuffer_convert & operator=(const wbuffer_convert &) = delete;  // C++14
     ~wbuffer_convert();                                             // C++14
-    
+
     streambuf* rdbuf() const;
     streambuf* rdbuf(streambuf* bytebuf);
 
@@ -3923,7 +3923,7 @@ private:
     wbuffer_convert(const wbuffer_convert&);
     wbuffer_convert& operator=(const wbuffer_convert&);
 public:
-    _LIBCPP_EXPLICIT_AFTER_CXX11 wbuffer_convert(streambuf* __bytebuf = 0, 
+    _LIBCPP_EXPLICIT_AFTER_CXX11 wbuffer_convert(streambuf* __bytebuf = 0,
             _Codecvt* __pcvt = new _Codecvt, state_type __state = state_type());
     ~wbuffer_convert();
 
@@ -4038,7 +4038,7 @@ wbuffer_convert<_Codecvt, _Elem, _Tr>::underflow()
                                        this->egptr(), __inext);
                 if (__r == codecvt_base::noconv)
                 {
-                    this->setg((char_type*)__extbuf_, (char_type*)__extbuf_, 
+                    this->setg((char_type*)__extbuf_, (char_type*)__extbuf_,
                                (char_type*) const_cast<char *>(__extbufend_));
                     __c = *this->gptr();
                 }
diff --git a/libcxx/include/numeric b/libcxx/include/numeric
index 62cc29cbd6a79..ba2fe2696a953 100644
--- a/libcxx/include/numeric
+++ b/libcxx/include/numeric
@@ -548,9 +548,9 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 
 template <class _TPtr>
 _LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_pointer_v<_TPtr> 
-             && is_object_v<remove_pointer_t<_TPtr>> 
-             && ! is_void_v<remove_pointer_t<_TPtr>> 
+enable_if_t<is_pointer_v<_TPtr>
+             && is_object_v<remove_pointer_t<_TPtr>>
+             && ! is_void_v<remove_pointer_t<_TPtr>>
              && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr>
 midpoint(_TPtr __a, _TPtr __b) noexcept
 {
@@ -568,7 +568,7 @@ _LIBCPP_INLINE_VISIBILITY constexpr
 enable_if_t<is_floating_point_v<_Fp>, _Fp>
 midpoint(_Fp __a, _Fp __b) noexcept
 {
-    return isnormal(__a) && isnormal(__b) 
+    return isnormal(__a) && isnormal(__b)
        && ((__sign(__a) != __sign(__b)) || ((numeric_limits<_Fp>::max() - abs(__a)) < abs(__b)))
              ?  __a / 2 + __b / 2
              : (__a + __b) / 2;
diff --git a/libcxx/include/queue b/libcxx/include/queue
index 55be800178559..97ec6f633c515 100644
--- a/libcxx/include/queue
+++ b/libcxx/include/queue
@@ -70,8 +70,8 @@ public:
 
 template<class Container>
   queue(Container) -> queue<typename Container::value_type, Container>; // C++17
-  
-template<class Container, class Allocator> 
+
+template<class Container, class Allocator>
   queue(Container, Allocator) -> queue<typename Container::value_type, Container>; // C++17
 
 template <class T, class Container>
@@ -165,13 +165,13 @@ public:
 template <class Compare, class Container>
 priority_queue(Compare, Container)
     -> priority_queue<typename Container::value_type, Container, Compare>; // C++17
-  
-template<class InputIterator, 
+
+template<class InputIterator,
          class Compare = less<typename iterator_traits<InputIterator>::value_type>,
          class Container = vector<typename iterator_traits<InputIterator>::value_type>>
 priority_queue(InputIterator, InputIterator, Compare = Compare(), Container = Container())
     -> priority_queue<typename iterator_traits<InputIterator>::value_type, Container, Compare>; // C++17
-  
+
 template<class Compare, class Container, class Allocator>
 priority_queue(Compare, Container, Allocator)
     -> priority_queue<typename Container::value_type, Container, Compare>; // C++17
@@ -346,7 +346,7 @@ template<class _Container,
 >
 queue(_Container)
     -> queue<typename _Container::value_type, _Container>;
-  
+
 template<class _Container,
          class _Alloc,
          class = typename enable_if<!__is_allocator<_Container>::value, nullptr_t>::type,
@@ -558,8 +558,8 @@ template <class _Compare,
 >
 priority_queue(_Compare, _Container)
     -> priority_queue<typename _Container::value_type, _Container, _Compare>;
-  
-template<class _InputIterator, 
+
+template<class _InputIterator,
          class _Compare   = less<typename iterator_traits<_InputIterator>::value_type>,
          class _Container = vector<typename iterator_traits<_InputIterator>::value_type>,
          class = typename enable_if< __is_input_iterator<_InputIterator>::value, nullptr_t>::type,
@@ -568,8 +568,8 @@ template<class _InputIterator,
 >
 priority_queue(_InputIterator, _InputIterator, _Compare = _Compare(), _Container = _Container())
     -> priority_queue<typename iterator_traits<_InputIterator>::value_type, _Container, _Compare>;
-  
-template<class _Compare, 
+
+template<class _Compare,
          class _Container,
          class _Alloc,
          class = typename enable_if<!__is_allocator<_Compare>::value, nullptr_t>::type,
diff --git a/libcxx/include/random b/libcxx/include/random
index a73239519ac1f..9fefee0817025 100644
--- a/libcxx/include/random
+++ b/libcxx/include/random
@@ -2226,19 +2226,19 @@ template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::state_size;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::shift_size;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::mask_bits;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
@@ -2250,7 +2250,7 @@ template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_u;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
@@ -2262,7 +2262,7 @@ template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_s;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
@@ -2274,7 +2274,7 @@ template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_t;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
@@ -2286,7 +2286,7 @@ template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
           _UIntType __a, size_t __u, _UIntType __d, size_t __s,
           _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-    _LIBCPP_CONSTEXPR const size_t 
+    _LIBCPP_CONSTEXPR const size_t
     mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_l;
 
 template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
@@ -6426,7 +6426,7 @@ public:
             param_type(size_t __nw, result_type __xmin, result_type __xmax,
                        _UnaryOperation __fw);
         param_type & operator=(const param_type& __rhs);
-        
+
         _LIBCPP_INLINE_VISIBILITY
         vector<result_type> intervals() const {return __b_;}
         _LIBCPP_INLINE_VISIBILITY
diff --git a/libcxx/include/regex b/libcxx/include/regex
index a0e3ba61e71c4..0db3c91ce8bb6 100644
--- a/libcxx/include/regex
+++ b/libcxx/include/regex
@@ -677,7 +677,7 @@ public:
                    regex_constants::match_flag_type m = regex_constants::match_default);
     regex_iterator(_BidirectionalIterator __a, _BidirectionalIterator __b,
                    const regex_type&& __re,
-                   regex_constants::match_flag_type __m 
+                   regex_constants::match_flag_type __m
                                      = regex_constants::match_default) = delete; // C++14
     regex_iterator(const regex_iterator&);
     regex_iterator& operator=(const regex_iterator&);
@@ -3410,7 +3410,7 @@ basic_regex<_CharT, _Traits>::__parse_BACKREF(_ForwardIterator __first,
         if (__temp != __last)
         {
             if (*__first == '\\')
-            { 
+            {
                 int __val = __traits_.value(*__temp, 10);
                 if (__val >= 1 && __val <= 9)
                 {
@@ -4107,7 +4107,7 @@ basic_regex<_CharT, _Traits>::__parse_DUP_COUNT(_ForwardIterator __first,
         if ( __val != -1 )
         {
             __c = __val;
-            for (++__first; 
+            for (++__first;
                  __first != __last && ( __val = __traits_.value(*__first, 10)) != -1;
                  ++__first)
             {
@@ -4487,7 +4487,7 @@ basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
         case 'c':
             if ((__t = _VSTD::next(__first)) != __last)
             {
-                if (('A' <= *__t && *__t <= 'Z') || 
+                if (('A' <= *__t && *__t <= 'Z') ||
                     ('a' <= *__t && *__t <= 'z'))
                 {
                     if (__str)
@@ -4496,7 +4496,7 @@ basic_regex<_CharT, _Traits>::__parse_character_escape(_ForwardIterator __first,
                         __push_char(_CharT(*__t % 32));
                     __first = ++__t;
                 }
-                else 
+                else
                     __throw_regex_error<regex_constants::error_escape>();
             }
             else
@@ -5918,7 +5918,7 @@ basic_regex<_CharT, _Traits>::__search(
 {
     __m.__init(1 + mark_count(), __first, __last,
                                     __flags & regex_constants::__no_update_pos);
-    if (__match_at_start(__first, __last, __m, __flags, 
+    if (__match_at_start(__first, __last, __m, __flags,
                                     !(__flags & regex_constants::__no_update_pos)))
     {
         __m.__prefix_.second = __m[0].first;
@@ -6064,7 +6064,7 @@ bool
 regex_search(const basic_string<_Cp, _ST, _SA>&& __s,
              match_results<typename basic_string<_Cp, _ST, _SA>::const_iterator, _Ap>&,
              const basic_regex<_Cp, _Tp>& __e,
-             regex_constants::match_flag_type __flags = regex_constants::match_default) = delete; 
+             regex_constants::match_flag_type __flags = regex_constants::match_default) = delete;
 #endif
 
 // regex_match
@@ -6128,7 +6128,7 @@ bool
 regex_match(const basic_string<_CharT, _ST, _SA>&& __s,
             match_results<typename basic_string<_CharT, _ST, _SA>::const_iterator, _Allocator>& __m,
             const basic_regex<_CharT, _Traits>& __e,
-            regex_constants::match_flag_type __flags = regex_constants::match_default) = delete; 
+            regex_constants::match_flag_type __flags = regex_constants::match_default) = delete;
 #endif
 
 template <class _CharT, class _Traits>
@@ -6181,7 +6181,7 @@ public:
 #if _LIBCPP_STD_VER > 11
     regex_iterator(_BidirectionalIterator __a, _BidirectionalIterator __b,
                    const regex_type&& __re,
-                   regex_constants::match_flag_type __m 
+                   regex_constants::match_flag_type __m
                                      = regex_constants::match_default) = delete;
 #endif
 
@@ -6377,7 +6377,7 @@ private:
             __result_ = &__position_->prefix();
         else
             __result_ = &(*__position_)[__subs_[__n_]];
-        }       
+        }
 };
 
 template <class _BidirectionalIterator, class _CharT, class _Traits>
diff --git a/libcxx/include/stack b/libcxx/include/stack
index b50ca5cdcb140..2a2b350386e58 100644
--- a/libcxx/include/stack
+++ b/libcxx/include/stack
@@ -62,8 +62,8 @@ public:
 
 template<class Container>
   stack(Container) -> stack<typename Container::value_type, Container>;  // C++17
-  
-template<class Container, class Allocator> 
+
+template<class Container, class Allocator>
   stack(Container, Allocator) -> stack<typename Container::value_type, Container>; // C++17
 
 template <class T, class Container>
@@ -118,7 +118,7 @@ public:
     typedef typename container_type::const_reference const_reference;
     typedef typename container_type::size_type       size_type;
     static_assert((is_same<_Tp, value_type>::value), "" );
-    
+
 protected:
     container_type c;
 
@@ -240,12 +240,12 @@ template<class _Container,
 >
 stack(_Container)
     -> stack<typename _Container::value_type, _Container>;
-  
+
 template<class _Container,
          class _Alloc,
          class = typename enable_if<!__is_allocator<_Container>::value, nullptr_t>::type,
          class = typename enable_if< __is_allocator<_Alloc>::value, nullptr_t>::type
-         > 
+         >
 stack(_Container, _Alloc)
     -> stack<typename _Container::value_type, _Container>;
 #endif
diff --git a/libcxx/include/string b/libcxx/include/string
index 8d4e13cf52737..1e5b09800c61d 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -627,13 +627,13 @@ struct __libcpp_string_gets_noexcept_iterator_impl : public false_type {};
 #else
 template <class _Iter, bool = __is_forward_iterator<_Iter>::value>
 struct __libcpp_string_gets_noexcept_iterator_impl : public _LIBCPP_BOOL_CONSTANT((
-    noexcept(++(declval<_Iter&>())) && 
-    is_nothrow_assignable<_Iter&, _Iter>::value && 
-    noexcept(declval<_Iter>() == declval<_Iter>()) && 
+    noexcept(++(declval<_Iter&>())) &&
+    is_nothrow_assignable<_Iter&, _Iter>::value &&
+    noexcept(declval<_Iter>() == declval<_Iter>()) &&
     noexcept(*declval<_Iter>())
 )) {};
 
-template <class _Iter> 
+template <class _Iter>
 struct __libcpp_string_gets_noexcept_iterator_impl<_Iter, false> : public false_type {};
 #endif
 
@@ -2525,7 +2525,7 @@ basic_string<_CharT, _Traits, _Allocator>::__append_forward_unsafe(
             const basic_string __temp (__first, __last, __alloc());
             append(__temp.data(), __temp.size());
         }
-        else 
+        else
         {
             if (__cap - __sz < __n)
                 __grow_by(__cap, __sz + __n - __cap, __sz, __sz, 0);
@@ -3824,7 +3824,7 @@ basic_string<_CharT, _Traits, _Allocator>::__invariants() const
 
 template<class _CharT, class _Traits, class _Allocator>
 inline
-void 
+void
 basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT
 {
     clear();
@@ -3834,7 +3834,7 @@ basic_string<_CharT, _Traits, _Allocator>::__clear_and_shrink() _NOEXCEPT
         __set_long_cap(0);
         __set_short_size(0);
     }
-} 
+}
 
 // operator==
 
@@ -4327,7 +4327,7 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator*
 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string<char>)
 _LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string<wchar_t>)
 
-#if _LIBCPP_STD_VER > 11 
+#if _LIBCPP_STD_VER > 11
 // Literal suffixes for basic_string [basic.string.literals]
 inline namespace literals
 {
diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index 9a6eb0c237377..d29bcc3e8c1a8 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -364,7 +364,7 @@ public:
     }
 
     _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-    int compare(                       size_type __pos1, size_type __n1, 
+    int compare(                       size_type __pos1, size_type __n1,
                 basic_string_view __sv, size_type __pos2, size_type __n2) const
     {
         return substr(__pos1, __n1).compare(__sv.substr(__pos2, __n2));
@@ -628,7 +628,7 @@ bool operator==(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator==(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator==(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     if ( __lhs.size() != __rhs.size()) return false;
@@ -658,7 +658,7 @@ bool operator!=(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator!=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator!=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     if ( __lhs.size() != __rhs.size())
@@ -685,7 +685,7 @@ bool operator<(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator<(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator<(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     return __lhs.compare(__rhs) < 0;
@@ -710,7 +710,7 @@ bool operator>(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator>(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator>(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     return __lhs.compare(__rhs) > 0;
@@ -735,7 +735,7 @@ bool operator<=(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator<=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator<=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     return __lhs.compare(__rhs) <= 0;
@@ -761,7 +761,7 @@ bool operator>=(basic_string_view<_CharT, _Traits> __lhs,
 
 template<class _CharT, class _Traits>
 _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
-bool operator>=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, 
+bool operator>=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
                 basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
 {
     return __lhs.compare(__rhs) >= 0;
@@ -787,7 +787,7 @@ struct _LIBCPP_TEMPLATE_VIS hash<basic_string_view<_CharT, _Traits> >
 };
 
 
-#if _LIBCPP_STD_VER > 11 
+#if _LIBCPP_STD_VER > 11
 inline namespace literals
 {
   inline namespace string_view_literals
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 42057c5d3fd16..4dfe69868e543 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -79,12 +79,12 @@ public:
       unordered_map(InputIterator f, InputIterator l, size_type n, const allocator_type& a)
       : unordered_map(f, l, n, hasher(), key_equal(), a) {}  // C++14
     template <class InputIterator>
-      unordered_map(InputIterator f, InputIterator l, size_type n, const hasher& hf, 
+      unordered_map(InputIterator f, InputIterator l, size_type n, const hasher& hf,
         const allocator_type& a)
       : unordered_map(f, l, n, hf, key_equal(), a) {}  // C++14
     unordered_map(initializer_list<value_type> il, size_type n, const allocator_type& a)
       : unordered_map(il, n, hasher(), key_equal(), a) {}  // C++14
-    unordered_map(initializer_list<value_type> il, size_type n, const hasher& hf, 
+    unordered_map(initializer_list<value_type> il, size_type n, const hasher& hf,
       const allocator_type& a)
       : unordered_map(il, n, hf, key_equal(), a) {}  // C++14
     ~unordered_map();
@@ -277,12 +277,12 @@ public:
       unordered_multimap(InputIterator f, InputIterator l, size_type n, const allocator_type& a)
       : unordered_multimap(f, l, n, hasher(), key_equal(), a) {}  // C++14
     template <class InputIterator>
-      unordered_multimap(InputIterator f, InputIterator l, size_type n, const hasher& hf, 
+      unordered_multimap(InputIterator f, InputIterator l, size_type n, const hasher& hf,
         const allocator_type& a)
       : unordered_multimap(f, l, n, hf, key_equal(), a) {}  // C++14
     unordered_multimap(initializer_list<value_type> il, size_type n, const allocator_type& a)
       : unordered_multimap(il, n, hasher(), key_equal(), a) {}  // C++14
-    unordered_multimap(initializer_list<value_type> il, size_type n, const hasher& hf, 
+    unordered_multimap(initializer_list<value_type> il, size_type n, const hasher& hf,
       const allocator_type& a)
       : unordered_multimap(il, n, hf, key_equal(), a) {}  // C++14
     ~unordered_multimap();
@@ -951,14 +951,14 @@ public:
       : unordered_map(__first, __last, __n, hasher(), key_equal(), __a) {}
     template <class _InputIterator>
     _LIBCPP_INLINE_VISIBILITY
-      unordered_map(_InputIterator __first, _InputIterator __last, size_type __n, const hasher& __hf, 
+      unordered_map(_InputIterator __first, _InputIterator __last, size_type __n, const hasher& __hf,
         const allocator_type& __a)
       : unordered_map(__first, __last, __n, __hf, key_equal(), __a) {}
     _LIBCPP_INLINE_VISIBILITY
     unordered_map(initializer_list<value_type> __il, size_type __n, const allocator_type& __a)
       : unordered_map(__il, __n, hasher(), key_equal(), __a) {}
     _LIBCPP_INLINE_VISIBILITY
-    unordered_map(initializer_list<value_type> __il, size_type __n, const hasher& __hf, 
+    unordered_map(initializer_list<value_type> __il, size_type __n, const hasher& __hf,
       const allocator_type& __a)
       : unordered_map(__il, __n, __hf, key_equal(), __a) {}
 #endif
@@ -1778,14 +1778,14 @@ public:
       : unordered_multimap(__first, __last, __n, hasher(), key_equal(), __a) {}
     template <class _InputIterator>
     _LIBCPP_INLINE_VISIBILITY
-      unordered_multimap(_InputIterator __first, _InputIterator __last, size_type __n, const hasher& __hf, 
+      unordered_multimap(_InputIterator __first, _InputIterator __last, size_type __n, const hasher& __hf,
         const allocator_type& __a)
       : unordered_multimap(__first, __last, __n, __hf, key_equal(), __a) {}
     _LIBCPP_INLINE_VISIBILITY
     unordered_multimap(initializer_list<value_type> __il, size_type __n, const allocator_type& __a)
       : unordered_multimap(__il, __n, hasher(), key_equal(), __a) {}
     _LIBCPP_INLINE_VISIBILITY
-    unordered_multimap(initializer_list<value_type> __il, size_type __n, const hasher& __hf, 
+    unordered_multimap(initializer_list<value_type> __il, size_type __n, const hasher& __hf,
       const allocator_type& __a)
       : unordered_multimap(__il, __n, __hf, key_equal(), __a) {}
 #endif
diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index b32e4cae2cdcf..3661e36a3712a 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -75,7 +75,7 @@ public:
     template <class InputIterator>
       unordered_set(InputIterator f, InputIterator l, size_type n, const allocator_type& a); // C++14
     template <class InputIterator>
-      unordered_set(InputIterator f, InputIterator l, size_type n, 
+      unordered_set(InputIterator f, InputIterator l, size_type n,
                     const hasher& hf,  const allocator_type& a); // C++14
     unordered_set(initializer_list<value_type> il, size_type n, const allocator_type& a); // C++14
     unordered_set(initializer_list<value_type> il, size_type n,
@@ -242,7 +242,7 @@ public:
       unordered_multiset(InputIterator f, InputIterator l, size_type n,
                          const hasher& hf, const allocator_type& a); // C++14
     unordered_multiset(initializer_list<value_type> il, size_type n, const allocator_type& a); // C++14
-    unordered_multiset(initializer_list<value_type> il, size_type n, 
+    unordered_multiset(initializer_list<value_type> il, size_type n,
                        const hasher& hf,  const allocator_type& a); // C++14
     ~unordered_multiset();
     unordered_multiset& operator=(const unordered_multiset&);
@@ -450,11 +450,11 @@ public:
 #if _LIBCPP_STD_VER > 11
     template <class _InputIterator>
     inline _LIBCPP_INLINE_VISIBILITY
-        unordered_set(_InputIterator __first, _InputIterator __last, 
+        unordered_set(_InputIterator __first, _InputIterator __last,
                     size_type __n, const allocator_type& __a)
             : unordered_set(__first, __last, __n, hasher(), key_equal(), __a) {}
     template <class _InputIterator>
-        unordered_set(_InputIterator __first, _InputIterator __last, 
+        unordered_set(_InputIterator __first, _InputIterator __last,
                       size_type __n, const hasher& __hf, const allocator_type& __a)
             : unordered_set(__first, __last, __n, __hf, key_equal(), __a) {}
 #endif
@@ -480,7 +480,7 @@ public:
                                                       const allocator_type& __a)
         : unordered_set(__il, __n, hasher(), key_equal(), __a) {}
     inline _LIBCPP_INLINE_VISIBILITY
-    unordered_set(initializer_list<value_type> __il, size_type __n, 
+    unordered_set(initializer_list<value_type> __il, size_type __n,
                                   const hasher& __hf, const allocator_type& __a)
         : unordered_set(__il, __n, __hf, key_equal(), __a) {}
 #endif
@@ -1052,7 +1052,7 @@ public:
 #if _LIBCPP_STD_VER > 11
     template <class _InputIterator>
     inline _LIBCPP_INLINE_VISIBILITY
-    unordered_multiset(_InputIterator __first, _InputIterator __last, 
+    unordered_multiset(_InputIterator __first, _InputIterator __last,
                        size_type __n, const allocator_type& __a)
         : unordered_multiset(__first, __last, __n, hasher(), key_equal(), __a) {}
     template <class _InputIterator>

From b7ca72a1138f668c73641f1a5a34819bec3a21de Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 16:02:33 +0000
Subject: [PATCH 0510/1176] [analyzer] print() JSONify: Checker messages
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62086

llvm-svn: 361982
---
 .../StaticAnalyzer/Core/CheckerManager.h      | 23 +++---
 .../StaticAnalyzer/Core/CheckerManager.cpp    | 73 +++++++++++++++++--
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |  3 +-
 clang/test/Analysis/expr-inspection.c         |  2 +
 clang/test/Analysis/use-after-move.cpp        | 22 ++++++
 5 files changed, 108 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 612286ba8b1ff..532b908df32dd 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -406,16 +406,21 @@ class CheckerManager {
   ///
   /// Unlike most other callbacks, any checker can simply implement the virtual
   /// method CheckerBase::printState if it has custom data to print.
-  /// \param Out The output stream
+  ///
+  /// \param Out   The output stream
   /// \param State The state being printed
-  /// \param NL The preferred representation of a newline.
-  /// \param Sep The preferred separator between different kinds of data.
-  void runCheckersForPrintState(raw_ostream &Out, ProgramStateRef State,
-                                const char *NL, const char *Sep);
-
-//===----------------------------------------------------------------------===//
-// Internal registration functions for AST traversing.
-//===----------------------------------------------------------------------===//
+  /// \param NL    The preferred representation of a newline.
+  /// \param Sep   The preferred separator between different messages.
+  /// \param Space The preferred space between the left side and the message.
+  /// \param IsDot Whether the message will be printed in 'dot' format.
+  void runCheckersForPrintStateJson(raw_ostream &Out, ProgramStateRef State,
+                                    const char *NL = "\n",
+                                    unsigned int Space = 0,
+                                    bool IsDot = false) const;
+
+  //===----------------------------------------------------------------------===//
+  // Internal registration functions for AST traversing.
+  //===----------------------------------------------------------------------===//
 
   // Functions used by the registration mechanism, checkers should not touch
   // these directly.
diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 53d872021af5b..cda9fe9bf5c8f 100644
--- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -14,6 +14,7 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Analysis/ProgramPoint.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
@@ -698,11 +699,73 @@ void CheckerManager::runCheckersOnEndOfTranslationUnit(
     EndOfTranslationUnitChecker(TU, mgr, BR);
 }
 
-void CheckerManager::runCheckersForPrintState(raw_ostream &Out,
-                                              ProgramStateRef State,
-                                              const char *NL, const char *Sep) {
-  for (const auto &CheckerTag : CheckerTags)
-    CheckerTag.second->printState(Out, State, NL, Sep);
+void CheckerManager::runCheckersForPrintStateJson(raw_ostream &Out,
+                                                  ProgramStateRef State,
+                                                  const char *NL,
+                                                  unsigned int Space,
+                                                  bool IsDot) const {
+  Indent(Out, Space, IsDot) << "\"checker_messages\": ";
+
+  // Create a temporary stream to see whether we have any message.
+  SmallString<1024> TempBuf;
+  llvm::raw_svector_ostream TempOut(TempBuf);
+  unsigned int InnerSpace = Space + 2;
+
+  // Create the new-line in JSON with enough space.
+  SmallString<128> NewLine;
+  llvm::raw_svector_ostream NLOut(NewLine);
+  NLOut << "\", " << NL;                     // Inject the ending and a new line
+  Indent(NLOut, InnerSpace, IsDot) << "\"";  // then begin the next message.
+
+  ++Space;
+  bool HasMessage = false;
+
+  // Store the last CheckerTag.
+  const void *LastCT = nullptr;
+  for (const auto &CT : CheckerTags) {
+    // See whether the current checker has a message.
+    CT.second->printState(TempOut, State, /*NL=*/NewLine.c_str(), /*Sep=*/"");
+
+    if (TempBuf.empty())
+      continue;
+
+    if (!HasMessage) {
+      Out << '[' << NL;
+      HasMessage = true;
+    }
+
+    LastCT = &CT;
+    TempBuf.clear();
+  }
+
+  for (const auto &CT : CheckerTags) {
+    // See whether the current checker has a message.
+    CT.second->printState(TempOut, State, /*NL=*/NewLine.c_str(), /*Sep=*/"");
+
+    if (TempBuf.empty())
+      continue;
+
+    Indent(Out, Space, IsDot)
+        << "{ \"checker\": \"" << CT.second->getCheckName().getName()
+        << "\", \"messages\": [" << NL;
+    Indent(Out, InnerSpace, IsDot)
+        << '\"' << TempBuf.str().trim() << '\"' << NL;
+    Indent(Out, Space, IsDot) << "]}";
+
+    if (&CT != LastCT)
+      Out << ',';
+    Out << NL;
+
+    TempBuf.clear();
+  }
+
+  // It is the last element of the 'program_state' so do not add a comma.
+  if (HasMessage)
+    Indent(Out, --Space, IsDot) << "]";
+  else
+    Out << "null";
+
+  Out << NL;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 07fc6f7643a83..6fa7cf2e23272 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -633,7 +633,8 @@ void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State,
     Out << "null," << NL;
   }
 
-  getCheckerManager().runCheckersForPrintState(Out, State, NL, "");
+  getCheckerManager().runCheckersForPrintStateJson(Out, State, NL, Space,
+                                                   IsDot);
 }
 
 void ExprEngine::processEndWorklist() {
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index cf9c9f82d0a05..2837c30d37e6d 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -38,3 +38,5 @@ void foo(int x) {
 // CHECK-NEXT: ],
 // CHECK-NEXT: "dynamic_types": null,
 // CHECK-NEXT: "constructing_objects": null,
+// CHECK-NEXT: "checker_messages": null
+
diff --git a/clang/test/Analysis/use-after-move.cpp b/clang/test/Analysis/use-after-move.cpp
index 5e4179b1f13f9..ac4222bc776e3 100644
--- a/clang/test/Analysis/use-after-move.cpp
+++ b/clang/test/Analysis/use-after-move.cpp
@@ -44,9 +44,18 @@
 // CHECK-MOVE-INVALID-VALUE-SAME: "KnownsOnly", "KnownsAndLocals" or "All"
 // CHECK-MOVE-INVALID-VALUE-SAME: string value
 
+// Tests checker-messages printing.
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.Move %s\
+// RUN:  -std=c++11 -analyzer-output=text -analyzer-config eagerly-assume=false\
+// RUN:  -analyzer-config exploration_strategy=dfs -DDFS\
+// RUN:  -analyzer-config cplusplus.Move:WarnOn=All -DAGGRESSIVE_DFS\
+// RUN:  -analyzer-checker core,cplusplus.SmartPtr,debug.ExprInspection\
+// RUN:  -verify=expected,peaceful,aggressive %s 2>&1 | FileCheck %s
+
 #include "Inputs/system-header-simulator-cxx.h"
 
 void clang_analyzer_warnIfReached();
+void clang_analyzer_printState();
 
 class B {
 public:
@@ -145,6 +154,19 @@ void simpleMoveCtorTest() {
   {
     A a;
     A b = std::move(a); // peaceful-note {{Object 'a' is moved}}
+
+#ifdef AGGRESSIVE_DFS
+    clang_analyzer_printState();
+
+// CHECK:      "checker_messages": [
+// CHECK-NEXT:   { "checker": "cplusplus.Move", "messages": [
+// CHECK-NEXT:     "Moved-from objects :",
+// CHECK:          "a: moved",
+// CHECK:          ""
+// CHECK-NEXT:   ]}
+// CHECK-NEXT: ]
+#endif
+
     a.foo(); // peaceful-warning {{Method called on moved-from object 'a'}}
              // peaceful-note@-1 {{Method called on moved-from object 'a'}}
   }

From df0a42127cee5747727b368ddd8d85ed8d273c5a Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 16:22:21 +0000
Subject: [PATCH 0511/1176] [analyzer] print() JSONify: Program state
 implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy,
             dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62087

llvm-svn: 361983
---
 .../Core/PathSensitive/ProgramState.h         |  4 +--
 .../lib/StaticAnalyzer/Core/ProgramState.cpp  | 23 ++++++++-----
 clang/test/Analysis/dump_egraph.cpp           |  6 ++--
 clang/test/Analysis/expr-inspection.c         | 34 ++++++++++---------
 4 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index 4092f40747858..cddae9d02e402 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -425,8 +425,8 @@ class ProgramState : public llvm::FoldingSetNode {
 
   // Pretty-printing.
   void printJson(raw_ostream &Out, const LocationContext *LCtx = nullptr,
-                 const char *NL = "\n", const char *Sep = "",
-                 unsigned int Space = 0, bool IsDot = false) const;
+                 const char *NL = "\n", unsigned int Space = 0,
+                 bool IsDot = false) const;
 
   void printDOT(raw_ostream &Out, const LocationContext *LCtx = nullptr,
                 unsigned int Space = 0) const;
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 2f5c33ee6c222..a1ca0b1b84bfa 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -10,13 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/Analysis/CFG.h"
+#include "clang/Basic/JsonSupport.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
@@ -441,15 +442,18 @@ void ProgramState::setStore(const StoreRef &newStore) {
 //===----------------------------------------------------------------------===//
 
 void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
-                             const char *NL, const char *Sep,
-                             unsigned int Space, bool IsDot) const {
-  // Print the store.
+                             const char *NL, unsigned int Space,
+                             bool IsDot) const {
+  Indent(Out, Space, IsDot) << "\"program_state\": {" << NL;
+  ++Space;
+
   ProgramStateManager &Mgr = getStateManager();
-  const ASTContext &Context = getStateManager().getContext();
+
+  // Print the store.
   Mgr.getStoreManager().printJson(Out, getStore(), NL, Space, IsDot);
 
   // Print out the environment.
-  Env.printJson(Out, Context, LCtx, NL, Space, IsDot);
+  Env.printJson(Out, Mgr.getContext(), LCtx, NL, Space, IsDot);
 
   // Print out the constraints.
   Mgr.getConstraintManager().printJson(Out, this, NL, Space, IsDot);
@@ -459,11 +463,14 @@ void ProgramState::printJson(raw_ostream &Out, const LocationContext *LCtx,
 
   // Print checker-specific data.
   Mgr.getOwningEngine().printJson(Out, this, LCtx, NL, Space, IsDot);
+
+  --Space;
+  Indent(Out, Space, IsDot) << '}';
 }
 
 void ProgramState::printDOT(raw_ostream &Out, const LocationContext *LCtx,
                             unsigned int Space) const {
-  printJson(Out, LCtx, "\\l", "\\|", Space, /*IsDot=*/true);
+  printJson(Out, LCtx, /*NL=*/"\\l", Space, /*IsDot=*/true);
 }
 
 LLVM_DUMP_METHOD void ProgramState::dump() const {
diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index 9b05e4a6ba5dc..e3e55554e44ed 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -16,9 +16,9 @@ void foo() {
   T t;
 }
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
 
-// CHECK: \"store\": [\l&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
+// CHECK: \"store\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index 2837c30d37e6d..aec4c1ebaeb22 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -23,20 +23,22 @@ void foo(int x) {
   }
 }
 
-// CHECK:      "store": [
-// CHECK-NEXT:   { "cluster": "y", "items": [
-// CHECK-NEXT:     { "kind": "Direct", "offset": 0, "value": "2 S32b" }
-// CHECK-NEXT:   ]}
-// CHECK-NEXT: ],
-// CHECK-NEXT: "environment": [
-// CHECK-NEXT:   { "location_context": "#0 Call", "calling": "foo", "call_line": null, "items": [
-// CHECK-NEXT:     { "lctx_id": 1, "stmt_id": 847, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
-// CHECK-NEXT:   ]}
-// CHECK-NEXT: ],
-// CHECK-NEXT: "constraints": [
-// CHECK-NEXT:   { "symbol": "reg_$0<int x>", "range": "{ [-2147483648, 13] }" }
-// CHECK-NEXT: ],
-// CHECK-NEXT: "dynamic_types": null,
-// CHECK-NEXT: "constructing_objects": null,
-// CHECK-NEXT: "checker_messages": null
+// CHECK:      "program_state": {
+// CHECK-NEXT:   "store": [
+// CHECK-NEXT:     { "cluster": "y", "items": [
+// CHECK-NEXT:       { "kind": "Direct", "offset": 0, "value": "2 S32b" }
+// CHECK-NEXT:     ]}
+// CHECK-NEXT:   ],
+// CHECK-NEXT:   "environment": [
+// CHECK-NEXT:     { "location_context": "#0 Call", "calling": "foo", "call_line": null, "items": [
+// CHECK-NEXT:       { "lctx_id": 1, "stmt_id": 847, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
+// CHECK-NEXT:     ]}
+// CHECK-NEXT:   ],
+// CHECK-NEXT:   "constraints": [
+// CHECK-NEXT:     { "symbol": "reg_$0<int x>", "range": "{ [-2147483648, 13] }" }
+// CHECK-NEXT:   ],
+// CHECK-NEXT:   "dynamic_types": null,
+// CHECK-NEXT:   "constructing_objects": null,
+// CHECK-NEXT:   "checker_messages": null
+// CHECK-NEXT: }
 

From 0a729261e22e11e010a6e7244f2c69281a508d9b Mon Sep 17 00:00:00 2001
From: Francois Ferrand <thetypz@gmail.com>
Date: Wed, 29 May 2019 16:22:43 +0000
Subject: [PATCH 0512/1176] [NFC] clang-format: Use LLVM style in
 NamespaceEndCommentsFixerTest

As pointed out in https://reviews.llvm.org/D37813#inline-555026, the
code which is formatted does not match LLVM formatting style.

Technically this is not a problem since these tests bypass most of the
formatter, but it can be misleading.

llvm-svn: 361984
---
 .../Format/NamespaceEndCommentsFixerTest.cpp  | 297 +++++++++---------
 1 file changed, 149 insertions(+), 148 deletions(-)

diff --git a/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp b/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp
index 5091b1d9de0be..d4c16c82e133a 100644
--- a/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp
+++ b/clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp
@@ -45,124 +45,124 @@ class NamespaceEndCommentsFixerTest : public ::testing::Test {
 
 TEST_F(NamespaceEndCommentsFixerTest, AddsEndComment) {
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace\n",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}\n"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("inline namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace A",
             fixNamespaceEndComments("inline namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("namespace ::A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace ::A",
             fixNamespaceEndComments("namespace ::A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("namespace ::A::B {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace ::A::B",
             fixNamespaceEndComments("namespace ::A::B {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("namespace /**/::/**/A/**/::/**/B/**/ {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace ::A::B",
             fixNamespaceEndComments("namespace /**/::/**/A/**/::/**/B/**/ {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}"));
   EXPECT_EQ("namespace A {\n"
             "namespace B {\n"
-            "  int i;\n"
+            "int i;\n"
             "}\n"
             "}// namespace A",
             fixNamespaceEndComments("namespace A {\n"
                                     "namespace B {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}\n"
                                     "}"));
   EXPECT_EQ("namespace A {\n"
             "namespace B {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace B\n"
             "}// namespace A",
             fixNamespaceEndComments("namespace A {\n"
                                     "namespace B {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}\n"
                                     "}"));
   EXPECT_EQ("namespace A {\n"
-            "  int a;\n"
-            "  int b;\n"
+            "int a;\n"
+            "int b;\n"
             "}// namespace A\n"
             "namespace B {\n"
-            "  int b;\n"
-            "  int a;\n"
+            "int b;\n"
+            "int a;\n"
             "}// namespace B",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int a;\n"
-                                    "  int b;\n"
+                                    "int a;\n"
+                                    "int b;\n"
                                     "}\n"
                                     "namespace B {\n"
-                                    "  int b;\n"
-                                    "  int a;\n"
+                                    "int b;\n"
+                                    "int a;\n"
                                     "}"));
   EXPECT_EQ("namespace A {\n"
-            "  int a1;\n"
-            "  int a2;\n"
+            "int a1;\n"
+            "int a2;\n"
             "}// namespace A\n"
             "namespace A {\n"
-            "  int a2;\n"
-            "  int a1;\n"
+            "int a2;\n"
+            "int a1;\n"
             "}// namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int a1;\n"
-                                    "  int a2;\n"
+                                    "int a1;\n"
+                                    "int a2;\n"
                                     "}\n"
                                     "namespace A {\n"
-                                    "  int a2;\n"
-                                    "  int a1;\n"
+                                    "int a2;\n"
+                                    "int a1;\n"
                                     "}"));
   EXPECT_EQ("namespace A {\n"
-            "  int a;\n"
-            "  int b;\n"
+            "int a;\n"
+            "int b;\n"
             "}// namespace A\n"
             "// comment about b\n"
             "int b;",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int a;\n"
-                                    "  int b;\n"
+                                    "int a;\n"
+                                    "int b;\n"
                                     "}\n"
                                     "// comment about b\n"
                                     "int b;"));
@@ -221,84 +221,84 @@ TEST_F(NamespaceEndCommentsFixerTest, AddsEndComment) {
 
   // Adds an end comment after a semicolon.
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "};// namespace",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "};"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "};// namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "};"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "};// namespace A\n"
             "// unrelated",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "};\n"
                                     "// unrelated"));
 }
 
 TEST_F(NamespaceEndCommentsFixerTest, AddsNewlineIfNeeded) {
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace A\n"
             " int k;",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "} int k;"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace\n"
             " int k;",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "} int k;"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace A\n"
             " namespace B {\n"
-            "  int j;\n"
-            "  int k;\n"
+            "int j;\n"
+            "int k;\n"
             "}// namespace B",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "} namespace B {\n"
-                                    "  int j;\n"
-                                    "  int k;\n"
+                                    "int j;\n"
+                                    "int k;\n"
                                     "}"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "};// namespace\n"
             "int k;",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "};int k;"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "};// namespace\n"
             ";",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "};;"));
 }
 
@@ -313,18 +313,18 @@ TEST_F(NamespaceEndCommentsFixerTest, DoesNotAddEndCommentForShortNamespace) {
 
 TEST_F(NamespaceEndCommentsFixerTest, DoesNotAddCommentAfterUnaffectedRBrace) {
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}",
                                     // The range (16, 3) spans the 'int' above.
                                     /*Ranges=*/{1, tooling::Range(16, 3)}));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "};",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "};",
                                     // The range (16, 3) spans the 'int' above.
                                     /*Ranges=*/{1, tooling::Range(16, 3)}));
@@ -333,89 +333,90 @@ TEST_F(NamespaceEndCommentsFixerTest, DoesNotAddCommentAfterUnaffectedRBrace) {
 TEST_F(NamespaceEndCommentsFixerTest, DoesNotAddCommentAfterRBraceInPPDirective) {
   EXPECT_EQ("#define SAD \\\n"
             "namespace A { \\\n"
-            "  int i; \\\n"
+            "int i; \\\n"
             "}",
             fixNamespaceEndComments("#define SAD \\\n"
                                     "namespace A { \\\n"
-                                    "  int i; \\\n"
+                                    "int i; \\\n"
                                     "}"));
 }
 
 TEST_F(NamespaceEndCommentsFixerTest, KeepsValidEndComment) {
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // end anonymous namespace",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // end anonymous namespace"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} /* end of namespace A */",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} /* end of namespace A */"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}   //   namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}   //   namespace A"));
   EXPECT_EQ("namespace A::B {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // end namespace A::B",
             fixNamespaceEndComments("namespace A::B {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // end namespace A::B"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}; // end namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}; // end namespace A"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
+            "int i;\n"
             "}; /* unnamed namespace */",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}; /* unnamed namespace */"));
 }
 
 TEST_F(NamespaceEndCommentsFixerTest, UpdatesInvalidEndLineComment) {
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // namespace A"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // namespace"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} //"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
-            "} // namespace A",
+            "int i;\n"
+            "}; // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
-                                    "} //"));
+                                    "int i;\n"
+                                    "}; //"));
+
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // banamespace A"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}; // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}; // banamespace A"));
   // Updates invalid line comments even for short namespaces.
   EXPECT_EQ("namespace A {} // namespace A",
@@ -447,40 +448,40 @@ TEST_F(NamespaceEndCommentsFixerTest, UpdatesInvalidEndLineComment) {
 
 TEST_F(NamespaceEndCommentsFixerTest, UpdatesInvalidEndBlockComment) {
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} /* namespace A */"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}  // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}  /* end namespace */"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} /**/"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} /* end unnamed namespace */"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} /* banamespace A */"));
   EXPECT_EQ("namespace A {\n"
-            "  int i;\n"
+            "int i;\n"
             "}; // namespace A",
             fixNamespaceEndComments("namespace A {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}; /* banamespace A */"));
   EXPECT_EQ("namespace A {} // namespace A",
             fixNamespaceEndComments("namespace A {} /**/"));
@@ -495,7 +496,7 @@ TEST_F(NamespaceEndCommentsFixerTest,
             "#elseif\n"
             "namespace B {\n"
             "#endif\n"
-            "  int i;\n"
+            "int i;\n"
             "}\n"
             "}\n",
             fixNamespaceEndComments("#ifdef 1\n"
@@ -503,7 +504,7 @@ TEST_F(NamespaceEndCommentsFixerTest,
                                     "#elseif\n"
                                     "namespace B {\n"
                                     "#endif\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}\n"
                                     "}\n"));
 }
@@ -643,7 +644,7 @@ TEST_F(NamespaceEndCommentsFixerTest,
             "#elseif\n"
             "namespace B {\n"
             "#endif\n"
-            "  int i;\n"
+            "int i;\n"
             "}\n"
             "}\n",
             fixNamespaceEndComments("#ifdef 1\n"
@@ -651,26 +652,26 @@ TEST_F(NamespaceEndCommentsFixerTest,
                                     "#elseif\n"
                                     "namespace B {\n"
                                     "#endif\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "}\n"
                                     "}\n"));
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
-            "  int j;\n"
+            "int i;\n"
+            "int j;\n"
             "}// namespace\n"
             "#if A\n"
-            "  int i;\n"
+            "int i;\n"
             "#else\n"
-            "  int j;\n"
+            "int j;\n"
             "#endif",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
-                                    "  int j;\n"
+                                    "int i;\n"
+                                    "int j;\n"
                                     "}\n"
                                     "#if A\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "#else\n"
-                                    "  int j;\n"
+                                    "int j;\n"
                                     "#endif"));
   EXPECT_EQ("#if A\n"
             "namespace A {\n"
@@ -757,11 +758,11 @@ TEST_F(NamespaceEndCommentsFixerTest,
 TEST_F(NamespaceEndCommentsFixerTest,
        DoesNotAddEndCommentForUnbalancedRBracesAfterNamespaceEnd) {
   EXPECT_EQ("namespace {\n"
-            "  int i;\n"
+            "int i;\n"
             "} // namespace\n"
             "}",
             fixNamespaceEndComments("namespace {\n"
-                                    "  int i;\n"
+                                    "int i;\n"
                                     "} // namespace\n"
                                     "}"));
 }

From 308b7139b1d8e2374014c37f170b30197271e958 Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Wed, 29 May 2019 16:29:31 +0000
Subject: [PATCH 0513/1176] Partial revert of revert of r361827: Add
 constrained intrinsic tests for powerpc64le.

The powerpc64-"nonle" tests are removed. They fail because of a bug that
Drew is currently working on that affects multiple targets.

Submitted by:	Drew Wock <drew.wock@sas.com>
Reviewed by:	Hal Finkel, Kevin P. Neal
Approved by:	Hal Finkel
Differential Revision:	http://reviews.llvm.org/D62388

llvm-svn: 361985
---
 .../vector-constrained-fp-intrinsics.ll       | 7528 +++++++++++++++++
 1 file changed, 7528 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
new file mode 100644
index 0000000000000..1a467f9d482c6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -0,0 +1,7528 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu < %s | FileCheck --check-prefix=PC64LE %s
+; RUN: llc -O3 -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s | FileCheck --check-prefix=PC64LE9 %s
+
+define <1 x float> @constrained_vector_fdiv_v1f32() {
+; PC64LE-LABEL: constrained_vector_fdiv_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI0_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI0_1@toc@l(4)
+; PC64LE-NEXT:    xsdivsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI0_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI0_1@toc@l(3)
+; PC64LE9-NEXT:    xsdivsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %div
+}
+
+define <2 x double> @constrained_vector_fdiv_v2f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI1_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI1_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvdivdp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI1_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI1_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 34, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %div
+}
+
+define <3 x float> @constrained_vector_fdiv_v3f32() {
+; PC64LE-LABEL: constrained_vector_fdiv_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI2_3@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI2_2@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI2_3@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI2_2@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
+; PC64LE-NEXT:    xsdivsp 1, 1, 0
+; PC64LE-NEXT:    lfs 3, .LCPI2_1@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
+; PC64LE-NEXT:    xsdivsp 2, 2, 0
+; PC64LE-NEXT:    addi 3, 3, .LCPI2_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsdivsp 0, 3, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI2_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI2_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_2@toc@ha
+; PC64LE9-NEXT:    xsdivsp 1, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI2_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_3@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI2_3@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI2_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI2_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xsdivsp 2, 2, 0
+; PC64LE9-NEXT:    xsdivsp 0, 3, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+entry:
+  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %div
+}
+
+define <3 x double> @constrained_vector_fdiv_v3f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI3_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI3_2@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI3_3@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI3_1@toc@ha
+; PC64LE-NEXT:    lfs 3, .LCPI3_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvdivdp 2, 1, 0
+; PC64LE-NEXT:    lfs 0, .LCPI3_1@toc@l(4)
+; PC64LE-NEXT:    xsdivdp 3, 0, 3
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI3_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI3_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI3_2@toc@l
+; PC64LE9-NEXT:    xsdivdp 3, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI3_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI3_3@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %div
+}
+
+define <4 x double> @constrained_vector_fdiv_v4f64() {
+; PC64LE-LABEL: constrained_vector_fdiv_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI4_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI4_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI4_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 4, .LCPI4_1@toc@l
+; PC64LE-NEXT:    addi 4, 5, .LCPI4_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    lxvd2x 2, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvdivdp 34, 1, 0
+; PC64LE-NEXT:    xvdivdp 35, 2, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fdiv_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI4_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI4_2@toc@l
+; PC64LE9-NEXT:    xvdivdp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvdivdp 35, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %div
+}
+
+define <1 x float> @constrained_vector_frem_v1f32() {
+; PC64LE-LABEL: constrained_vector_frem_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI5_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI5_1@toc@l(4)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI5_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI5_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI5_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %rem
+}
+
+define <2 x double> @constrained_vector_frem_v2f64() {
+; PC64LE-LABEL: constrained_vector_frem_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI6_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI6_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI6_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI6_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %rem
+}
+
+define <3 x float> @constrained_vector_frem_v3f32() {
+; PC64LE-LABEL: constrained_vector_frem_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI7_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI7_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
+; PC64LE-NEXT:    bl fmodf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI7_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI7_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI7_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmodf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %rem
+}
+
+define <3 x double> @constrained_vector_frem_v3f64() {
+; PC64LE-LABEL: constrained_vector_frem_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI8_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI8_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %rem
+}
+
+define <4 x double> @constrained_vector_frem_v4f64() {
+; PC64LE-LABEL: constrained_vector_frem_v4f64:
+; PC64LE:       # %bb.0:
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI9_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI9_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
+; PC64LE-NEXT:    bl fmod
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_frem_v4f64:
+; PC64LE9:       # %bb.0:
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmod
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %rem
+}
+
+define <1 x float> @constrained_vector_fmul_v1f32() {
+; PC64LE-LABEL: constrained_vector_fmul_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI10_1@toc@l(4)
+; PC64LE-NEXT:    xsmulsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI10_1@toc@l(3)
+; PC64LE9-NEXT:    xsmulsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 2.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %mul
+}
+
+define <2 x double> @constrained_vector_fmul_v2f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI11_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI11_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvmuldp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI11_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI11_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvmuldp 34, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %mul
+}
+
+define <3 x float> @constrained_vector_fmul_v3f32() {
+; PC64LE-LABEL: constrained_vector_fmul_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI12_3@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI12_2@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI12_1@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI12_3@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI12_2@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; PC64LE-NEXT:    xsmulsp 1, 0, 1
+; PC64LE-NEXT:    lfs 3, .LCPI12_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
+; PC64LE-NEXT:    xsmulsp 2, 0, 2
+; PC64LE-NEXT:    addi 3, 3, .LCPI12_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsmulsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI12_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_2@toc@ha
+; PC64LE9-NEXT:    xsmulsp 0, 1, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI12_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_3@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI12_3@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI12_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI12_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xsmulsp 2, 1, 2
+; PC64LE9-NEXT:    xsmulsp 1, 1, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE9-NEXT:    xscvdpspn 1, 2
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+entry:
+  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
+           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
+                        float 0x7FF0000000000000>,
+           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %mul
+}
+
+define <3 x double> @constrained_vector_fmul_v3f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI13_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI13_2@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI13_3@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI13_1@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI13_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvmuldp 2, 1, 0
+; PC64LE-NEXT:    lfs 0, .LCPI13_1@toc@l(4)
+; PC64LE-NEXT:    xsmuldp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI13_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI13_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI13_2@toc@l
+; PC64LE9-NEXT:    xsmuldp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI13_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI13_3@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvmuldp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %mul
+}
+
+define <4 x double> @constrained_vector_fmul_v4f64() {
+; PC64LE-LABEL: constrained_vector_fmul_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI14_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI14_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI14_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI14_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI14_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvmuldp 34, 1, 0
+; PC64LE-NEXT:    xvmuldp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fmul_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI14_2@toc@l
+; PC64LE9-NEXT:    xvmuldp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvmuldp 35, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 2.000000e+00, double 3.000000e+00,
+                         double 4.000000e+00, double 5.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %mul
+}
+
+define <1 x float> @constrained_vector_fadd_v1f32() {
+; PC64LE-LABEL: constrained_vector_fadd_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI15_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI15_1@toc@l(4)
+; PC64LE-NEXT:    xsaddsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI15_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI15_1@toc@l(3)
+; PC64LE9-NEXT:    xsaddsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %add
+}
+
+define <2 x double> @constrained_vector_fadd_v2f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI16_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI16_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI16_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvadddp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI16_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI16_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI16_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvadddp 34, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %add
+}
+
+define <3 x float> @constrained_vector_fadd_v3f32() {
+; PC64LE-LABEL: constrained_vector_fadd_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI17_2@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI17_1@toc@ha
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI17_2@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI17_1@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI17_3@toc@l
+; PC64LE-NEXT:    xsaddsp 1, 0, 1
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xsaddsp 2, 0, 2
+; PC64LE-NEXT:    xsaddsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI17_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
+; PC64LE9-NEXT:    xsaddsp 2, 0, 2
+; PC64LE9-NEXT:    lfs 3, .LCPI17_2@toc@l(3)
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    xsaddsp 1, 0, 1
+; PC64LE9-NEXT:    xsaddsp 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI17_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+entry:
+  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %add
+}
+
+define <3 x double> @constrained_vector_fadd_v3f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI18_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI18_1@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI18_2@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI18_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvadddp 2, 1, 0
+; PC64LE-NEXT:    xxlxor 0, 0, 0
+; PC64LE-NEXT:    xsadddp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI18_1@toc@l
+; PC64LE9-NEXT:    xsadddp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI18_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI18_2@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvadddp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> <double 2.0, double 1.0, double 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %add
+}
+
+define <4 x double> @constrained_vector_fadd_v4f64() {
+; PC64LE-LABEL: constrained_vector_fadd_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI19_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI19_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI19_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI19_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI19_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvadddp 34, 1, 0
+; PC64LE-NEXT:    xvadddp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fadd_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI19_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI19_2@toc@l
+; PC64LE9-NEXT:    xvadddp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvadddp 35, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %add
+}
+
+define <1 x float> @constrained_vector_fsub_v1f32() {
+; PC64LE-LABEL: constrained_vector_fsub_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI20_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI20_1@toc@l(4)
+; PC64LE-NEXT:    xssubsp 0, 1, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI20_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI20_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI20_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI20_1@toc@l(3)
+; PC64LE9-NEXT:    xssubsp 0, 1, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %sub
+}
+
+define <2 x double> @constrained_vector_fsub_v2f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI21_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI21_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI21_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsubdp 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI21_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI21_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI21_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI21_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvsubdp 34, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %sub
+}
+
+define <3 x float> @constrained_vector_fsub_v3f32() {
+; PC64LE-LABEL: constrained_vector_fsub_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI22_2@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI22_1@toc@ha
+; PC64LE-NEXT:    xxlxor 3, 3, 3
+; PC64LE-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI22_2@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI22_1@toc@l(5)
+; PC64LE-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI22_3@toc@l
+; PC64LE-NEXT:    xssubsp 1, 0, 1
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xssubsp 2, 0, 2
+; PC64LE-NEXT:    xssubsp 0, 0, 3
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xscvdpspn 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 1, 1, 1
+; PC64LE-NEXT:    xxsldwi 35, 2, 2, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI22_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
+; PC64LE9-NEXT:    xssubsp 2, 0, 2
+; PC64LE9-NEXT:    lfs 3, .LCPI22_2@toc@l(3)
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    xssubsp 1, 0, 1
+; PC64LE9-NEXT:    xssubsp 0, 0, 3
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI22_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    blr
+entry:
+  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %sub
+}
+
+define <3 x double> @constrained_vector_fsub_v3f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI23_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI23_1@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI23_2@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64LE-NEXT:    lfd 3, .LCPI23_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsubdp 2, 1, 0
+; PC64LE-NEXT:    xxlxor 0, 0, 0
+; PC64LE-NEXT:    xssubdp 3, 3, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI23_1@toc@l
+; PC64LE9-NEXT:    xssubdp 3, 0, 1
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI23_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI23_2@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    xvsubdp 2, 1, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
+           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF>,
+           <3 x double> <double 2.0, double 1.0, double 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x double> %sub
+}
+
+define <4 x double> @constrained_vector_fsub_v4f64() {
+; PC64LE-LABEL: constrained_vector_fsub_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI24_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI24_2@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI24_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI24_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 5, .LCPI24_2@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    lxvd2x 2, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xxswapd 2, 2
+; PC64LE-NEXT:    xvsubdp 34, 1, 0
+; PC64LE-NEXT:    xvsubdp 35, 1, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fsub_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_1@toc@l
+; PC64LE9-NEXT:    lxvx 1, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI24_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI24_2@toc@l
+; PC64LE9-NEXT:    xvsubdp 34, 1, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsubdp 35, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
+           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %sub
+}
+
+define <1 x float> @constrained_vector_sqrt_v1f32() {
+; PC64LE-LABEL: constrained_vector_sqrt_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
+; PC64LE-NEXT:    xssqrtsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI25_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI25_0@toc@l(3)
+; PC64LE9-NEXT:    xssqrtsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
+                              <1 x float> <float 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %sqrt
+}
+
+define <2 x double> @constrained_vector_sqrt_v2f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI26_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvsqrtdp 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI26_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI26_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %sqrt
+}
+
+define <3 x float> @constrained_vector_sqrt_v3f32() {
+; PC64LE-LABEL: constrained_vector_sqrt_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI27_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI27_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI27_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
+; PC64LE-NEXT:    xssqrtsp 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI27_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
+; PC64LE-NEXT:    xssqrtsp 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI27_3@toc@l
+; PC64LE-NEXT:    xssqrtsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI27_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI27_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_2@toc@ha
+; PC64LE9-NEXT:    xssqrtsp 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI27_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI27_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI27_3@toc@l
+; PC64LE9-NEXT:    xssqrtsp 1, 1
+; PC64LE9-NEXT:    xssqrtsp 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sqrt
+}
+
+define <3 x double> @constrained_vector_sqrt_v3f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI28_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI28_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xssqrtdp 3, 1
+; PC64LE-NEXT:    xvsqrtdp 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI28_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI28_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI28_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI28_1@toc@l
+; PC64LE9-NEXT:    xssqrtdp 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %sqrt
+}
+
+define <4 x double> @constrained_vector_sqrt_v4f64() {
+; PC64LE-LABEL: constrained_vector_sqrt_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI29_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI29_0@toc@l
+; PC64LE-NEXT:    addi 4, 4, .LCPI29_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    lxvd2x 1, 0, 4
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvsqrtdp 34, 0
+; PC64LE-NEXT:    xvsqrtdp 35, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sqrt_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI29_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI29_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI29_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI29_1@toc@l
+; PC64LE9-NEXT:    xvsqrtdp 34, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvsqrtdp 35, 0
+; PC64LE9-NEXT:    blr
+;
+ entry:
+  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %sqrt
+}
+
+define <1 x float> @constrained_vector_pow_v1f32() {
+; PC64LE-LABEL: constrained_vector_pow_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI30_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI30_1@toc@l(4)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI30_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI30_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI30_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI30_1@toc@l(3)
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %pow
+}
+
+define <2 x double> @constrained_vector_pow_v2f64() {
+; PC64LE-LABEL: constrained_vector_pow_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI31_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI31_1@toc@l(4)
+; PC64LE-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI31_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI31_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
+                             <2 x double> <double 42.1, double 42.2>,
+                             <2 x double> <double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %pow
+}
+
+define <3 x float> @constrained_vector_pow_v3f32() {
+; PC64LE-LABEL: constrained_vector_pow_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI32_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI32_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
+; PC64LE-NEXT:    bl powf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI32_4@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI32_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI32_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl powf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
+                             <3 x float> <float 42.0, float 43.0, float 44.0>,
+                             <3 x float> <float 3.0, float 3.0, float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <3 x float> %pow
+}
+
+define <3 x double> @constrained_vector_pow_v3f64() {
+; PC64LE-LABEL: constrained_vector_pow_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI33_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI33_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %pow
+}
+
+define <4 x double> @constrained_vector_pow_v4f64() {
+; PC64LE-LABEL: constrained_vector_pow_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -96(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 96
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    .cfi_offset v31, -32
+; PC64LE-NEXT:    addis 4, 2, .LCPI34_1@toc@ha
+; PC64LE-NEXT:    stfd 31, 88(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI34_1@toc@l(4)
+; PC64LE-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
+; PC64LE-NEXT:    bl pow
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    lfd 31, 88(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 96
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_pow_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -80(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    .cfi_offset v31, -32
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl pow
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    lfd 31, 72(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 80
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
+                             <4 x double> <double 42.1, double 42.2,
+                                           double 42.3, double 42.4>,
+                             <4 x double> <double 3.0, double 3.0,
+                                           double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %pow
+}
+
+define <1 x float> @constrained_vector_powi_v1f32() {
+; PC64LE-LABEL: constrained_vector_powi_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
+                              <1 x float> <float 42.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %powi
+}
+
+define <2 x double> @constrained_vector_powi_v2f64() {
+; PC64LE-LABEL: constrained_vector_powi_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
+                              <2 x double> <double 42.1, double 42.2>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %powi
+}
+
+define <3 x float> @constrained_vector_powi_v3f32() {
+;
+;
+; PC64LE-LABEL: constrained_vector_powi_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
+; PC64LE-NEXT:    bl __powisf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI37_3@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powisf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %powi
+}
+
+define <3 x double> @constrained_vector_powi_v3f64() {
+; PC64LE-LABEL: constrained_vector_powi_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          i32 3,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %powi
+}
+
+define <4 x double> @constrained_vector_powi_v4f64() {
+; PC64LE-LABEL: constrained_vector_powi_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    li 4, 3
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
+; PC64LE-NEXT:    bl __powidf2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_powi_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
+; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    bl __powidf2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %powi
+}
+
+define <1 x float> @constrained_vector_sin_v1f32() {
+; PC64LE-LABEL: constrained_vector_sin_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI40_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI40_0@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %sin
+}
+
+define <2 x double> @constrained_vector_sin_v2f64() {
+; PC64LE-LABEL: constrained_vector_sin_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI41_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI41_0@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI41_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI41_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %sin
+}
+
+define <3 x float> @constrained_vector_sin_v3f32() {
+; PC64LE-LABEL: constrained_vector_sin_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
+; PC64LE-NEXT:    bl sinf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI42_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI42_0@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI42_1@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI42_2@toc@l(3)
+; PC64LE9-NEXT:    bl sinf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sin
+}
+
+define <3 x double> @constrained_vector_sin_v3f64() {
+; PC64LE-LABEL: constrained_vector_sin_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI43_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI43_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI43_2@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %sin
+}
+
+define <4 x double> @constrained_vector_sin_v4f64() {
+; PC64LE-LABEL: constrained_vector_sin_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
+; PC64LE-NEXT:    bl sin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_sin_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI44_1@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI44_2@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI44_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI44_3@toc@l(3)
+; PC64LE9-NEXT:    bl sin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %sin
+}
+
+define <1 x float> @constrained_vector_cos_v1f32() {
+; PC64LE-LABEL: constrained_vector_cos_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI45_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI45_0@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %cos
+}
+
+define <2 x double> @constrained_vector_cos_v2f64() {
+; PC64LE-LABEL: constrained_vector_cos_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI46_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI46_0@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI46_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI46_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %cos
+}
+
+define <3 x float> @constrained_vector_cos_v3f32() {
+; PC64LE-LABEL: constrained_vector_cos_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
+; PC64LE-NEXT:    bl cosf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI47_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI47_0@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI47_1@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI47_2@toc@l(3)
+; PC64LE9-NEXT:    bl cosf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %cos
+}
+
+define <3 x double> @constrained_vector_cos_v3f64() {
+; PC64LE-LABEL: constrained_vector_cos_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI48_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI48_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI48_2@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %cos
+}
+
+define <4 x double> @constrained_vector_cos_v4f64() {
+; PC64LE-LABEL: constrained_vector_cos_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
+; PC64LE-NEXT:    bl cos
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_cos_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI49_1@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI49_2@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI49_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI49_3@toc@l(3)
+; PC64LE9-NEXT:    bl cos
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %cos
+}
+
+define <1 x float> @constrained_vector_exp_v1f32() {
+; PC64LE-LABEL: constrained_vector_exp_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI50_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI50_0@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp
+}
+
+define <2 x double> @constrained_vector_exp_v2f64() {
+; PC64LE-LABEL: constrained_vector_exp_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI51_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI51_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI51_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI51_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %exp
+}
+
+define <3 x float> @constrained_vector_exp_v3f32() {
+; PC64LE-LABEL: constrained_vector_exp_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
+; PC64LE-NEXT:    bl expf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI52_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI52_0@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI52_1@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI52_2@toc@l(3)
+; PC64LE9-NEXT:    bl expf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp
+}
+
+define <3 x double> @constrained_vector_exp_v3f64() {
+; PC64LE-LABEL: constrained_vector_exp_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI53_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI53_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI53_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %exp
+}
+
+define <4 x double> @constrained_vector_exp_v4f64() {
+; PC64LE-LABEL: constrained_vector_exp_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
+; PC64LE-NEXT:    bl exp
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI54_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI54_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI54_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI54_3@toc@l(3)
+; PC64LE9-NEXT:    bl exp
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %exp
+}
+
+define <1 x float> @constrained_vector_exp2_v1f32() {
+; PC64LE-LABEL: constrained_vector_exp2_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI55_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI55_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp2
+}
+
+define <2 x double> @constrained_vector_exp2_v2f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI56_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI56_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI56_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI56_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
+                              <2 x double> <double 42.1, double 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %exp2
+}
+
+define <3 x float> @constrained_vector_exp2_v3f32() {
+; PC64LE-LABEL: constrained_vector_exp2_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI57_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI57_0@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI57_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI57_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp2
+}
+
+define <3 x double> @constrained_vector_exp2_v3f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI58_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI58_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI58_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %exp2
+}
+
+define <4 x double> @constrained_vector_exp2_v4f64() {
+; PC64LE-LABEL: constrained_vector_exp2_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
+; PC64LE-NEXT:    bl exp2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_exp2_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI59_1@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI59_2@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI59_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI59_3@toc@l(3)
+; PC64LE9-NEXT:    bl exp2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %exp2
+}
+
+define <1 x float> @constrained_vector_log_v1f32() {
+; PC64LE-LABEL: constrained_vector_log_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI60_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI60_0@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log
+}
+
+define <2 x double> @constrained_vector_log_v2f64() {
+; PC64LE-LABEL: constrained_vector_log_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI61_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI61_0@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI61_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI61_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %log
+}
+
+define <3 x float> @constrained_vector_log_v3f32() {
+; PC64LE-LABEL: constrained_vector_log_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
+; PC64LE-NEXT:    bl logf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI62_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI62_0@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI62_1@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI62_2@toc@l(3)
+; PC64LE9-NEXT:    bl logf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log
+}
+
+define <3 x double> @constrained_vector_log_v3f64() {
+; PC64LE-LABEL: constrained_vector_log_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI63_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI63_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI63_2@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log
+}
+
+define <4 x double> @constrained_vector_log_v4f64() {
+; PC64LE-LABEL: constrained_vector_log_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
+; PC64LE-NEXT:    bl log
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI64_1@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI64_2@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI64_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI64_3@toc@l(3)
+; PC64LE9-NEXT:    bl log
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %log
+}
+
+define <1 x float> @constrained_vector_log10_v1f32() {
+; PC64LE-LABEL: constrained_vector_log10_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI65_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI65_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log10
+}
+
+define <2 x double> @constrained_vector_log10_v2f64() {
+; PC64LE-LABEL: constrained_vector_log10_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI66_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI66_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI66_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI66_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
+                               <2 x double> <double 42.0, double 42.1>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <2 x double> %log10
+}
+
+define <3 x float> @constrained_vector_log10_v3f32() {
+; PC64LE-LABEL: constrained_vector_log10_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
+; PC64LE-NEXT:    bl log10f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI67_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI67_0@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI67_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI67_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log10
+}
+
+define <3 x double> @constrained_vector_log10_v3f64() {
+; PC64LE-LABEL: constrained_vector_log10_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI68_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI68_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI68_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log10
+}
+
+define <4 x double> @constrained_vector_log10_v4f64() {
+; PC64LE-LABEL: constrained_vector_log10_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
+; PC64LE-NEXT:    bl log10
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log10_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI69_1@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI69_2@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI69_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI69_3@toc@l(3)
+; PC64LE9-NEXT:    bl log10
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
+                               <4 x double> <double 42.0, double 42.1,
+                                             double 42.2, double 42.3>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <4 x double> %log10
+}
+
+define <1 x float> @constrained_vector_log2_v1f32() {
+; PC64LE-LABEL: constrained_vector_log2_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI70_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI70_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log2
+}
+
+define <2 x double> @constrained_vector_log2_v2f64() {
+; PC64LE-LABEL: constrained_vector_log2_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI71_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI71_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI71_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI71_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %log2
+}
+
+define <3 x float> @constrained_vector_log2_v3f32() {
+; PC64LE-LABEL: constrained_vector_log2_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
+; PC64LE-NEXT:    bl log2f
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI72_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI72_0@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI72_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI72_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2f
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log2
+}
+
+define <3 x double> @constrained_vector_log2_v3f64() {
+; PC64LE-LABEL: constrained_vector_log2_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI73_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI73_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI73_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %log2
+}
+
+define <4 x double> @constrained_vector_log2_v4f64() {
+; PC64LE-LABEL: constrained_vector_log2_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
+; PC64LE-NEXT:    bl log2
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 3, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_log2_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI74_1@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI74_2@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI74_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI74_3@toc@l(3)
+; PC64LE9-NEXT:    bl log2
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 3, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %log2
+}
+
+define <1 x float> @constrained_vector_rint_v1f32() {
+; PC64LE-LABEL: constrained_vector_rint_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI75_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI75_0@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %rint
+}
+
+define <2 x double> @constrained_vector_rint_v2f64() {
+; PC64LE-LABEL: constrained_vector_rint_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI76_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI76_0@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI76_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI76_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> <double 42.1, double 42.0>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32() {
+; PC64LE-LABEL: constrained_vector_rint_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
+; PC64LE-NEXT:    bl rintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI77_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI77_0@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI77_1@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI77_2@toc@l(3)
+; PC64LE9-NEXT:    bl rintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI77_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI77_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+;
+ entry:
+  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %rint
+}
+
+define <3 x double> @constrained_vector_rint_v3f64() {
+; PC64LE-LABEL: constrained_vector_rint_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI78_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI78_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI78_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI78_2@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %rint
+}
+
+define <4 x double> @constrained_vector_rint_v4f64() {
+; PC64LE-LABEL: constrained_vector_rint_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
+; PC64LE-NEXT:    bl rint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_rint_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI79_0@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_1@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI79_1@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI79_2@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI79_3@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI79_3@toc@l(3)
+; PC64LE9-NEXT:    bl rint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> <double 42.1, double 42.2,
+                                      double 42.3, double 42.4>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI80_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI80_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
+                               <1 x float> <float 42.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %nearby
+}
+
+define <2 x double> @constrained_vector_nearbyint_v2f64() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI81_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI81_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> <double 42.1, double 42.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI82_3@toc@l
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 31
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI82_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_1@toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI82_1@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI82_2@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3@toc@l
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %nearby
+}
+
+define <3 x double> @constrained_vector_nearby_v3f64() {
+; PC64LE-LABEL: constrained_vector_nearby_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI83_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpic 2, 0
+; PC64LE-NEXT:    xxswapd 0, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    fmr 1, 0
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI83_0@toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI83_1@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 2, 0
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %nearby
+}
+
+define <4 x double> @constrained_vector_nearbyint_v4f64() {
+; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI84_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI84_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addi 3, 4, .LCPI84_1@toc@l
+; PC64LE-NEXT:    lxvd2x 1, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xxswapd 1, 1
+; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    xvrdpic 35, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI84_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI84_1@toc@l
+; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpic 35, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> <double 42.1, double 42.2,
+                                              double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %nearby
+}
+
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; PC64LE-LABEL: constrained_vector_maxnum_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI85_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI85_1@toc@l(4)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI85_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI85_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI85_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI85_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; PC64LE-LABEL: constrained_vector_maxnum_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI86_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI86_1@toc@l(4)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI86_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI86_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI86_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI86_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI86_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI86_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI86_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; PC64LE-LABEL: constrained_vector_maxnum_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI87_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI87_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI87_3@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI87_3@toc@l(4)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
+; PC64LE-NEXT:    fmr 1, 31
+; PC64LE-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
+; PC64LE-NEXT:    bl fmaxf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI87_5@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI87_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI87_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI87_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI87_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    bl fmaxf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %max
+}
+
+define <3 x double> @constrained_vector_max_v3f64() {
+; PC64LE-LABEL: constrained_vector_max_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI88_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI88_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_max_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI88_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI88_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI88_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI88_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI88_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+                          <3 x double> <double 43.0, double 44.0, double 45.0>,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %max
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; PC64LE-LABEL: constrained_vector_maxnum_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI89_7@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI89_7@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
+; PC64LE-NEXT:    bl fmax
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_maxnum_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI89_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI89_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_6@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI89_6@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI89_7@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI89_7@toc@l(3)
+; PC64LE9-NEXT:    bl fmax
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; PC64LE-LABEL: constrained_vector_minnum_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI90_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI90_1@toc@l(4)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 32
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI90_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI90_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI90_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI90_1@toc@l(3)
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+;
+ entry:
+  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; PC64LE-LABEL: constrained_vector_minnum_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI91_1@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI91_1@toc@l(4)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI91_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI91_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 48
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI91_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI91_1@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI91_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI91_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI91_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; PC64LE-LABEL: constrained_vector_minnum_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset f29, -24
+; PC64LE-NEXT:    .cfi_offset f30, -16
+; PC64LE-NEXT:    .cfi_offset f31, -8
+; PC64LE-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
+; PC64LE-NEXT:    addis 4, 2, .LCPI92_1@toc@ha
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
+; PC64LE-NEXT:    lfs 31, .LCPI92_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
+; PC64LE-NEXT:    fmr 2, 31
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI92_3@toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI92_3@toc@l(4)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    fmr 29, 1
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
+; PC64LE-NEXT:    fmr 1, 31
+; PC64LE-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
+; PC64LE-NEXT:    bl fminf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 29
+; PC64LE-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI92_5@toc@l
+; PC64LE-NEXT:    lvx 4, 0, 3
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 30
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 2, 3
+; PC64LE-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 3, 2, 4
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset f29, -24
+; PC64LE9-NEXT:    .cfi_offset f30, -16
+; PC64LE9-NEXT:    .cfi_offset f31, -8
+; PC64LE9-NEXT:    stfd 29, -24(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI92_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_1@toc@ha
+; PC64LE9-NEXT:    lfs 31, .LCPI92_1@toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_2@toc@ha
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI92_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI92_3@toc@l(3)
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
+; PC64LE9-NEXT:    fmr 29, 1
+; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    bl fminf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 29
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 3, 2, 4
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 29, -24(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    blr
+entry:
+  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %min
+}
+
+define <3 x double> @constrained_vector_min_v3f64() {
+; PC64LE-LABEL: constrained_vector_min_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI93_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI93_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    xxlor 1, 63, 63
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_min_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI93_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI93_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI93_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI93_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI93_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xscpsgndp 1, 63, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+                          <3 x double> <double 43.0, double 44.0, double 45.0>,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %min
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; PC64LE-LABEL: constrained_vector_minnum_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    .cfi_def_cfa_offset 80
+; PC64LE-NEXT:    .cfi_offset lr, 16
+; PC64LE-NEXT:    .cfi_offset v31, -16
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_1@toc@ha
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_1@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_3@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_3@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_5@toc@ha
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_5@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    addis 4, 2, .LCPI94_7@toc@ha
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI94_7@toc@l(4)
+; PC64LE-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
+; PC64LE-NEXT:    bl fmin
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_minnum_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
+; PC64LE9-NEXT:    .cfi_def_cfa_offset 64
+; PC64LE9-NEXT:    .cfi_offset lr, 16
+; PC64LE9-NEXT:    .cfi_offset v31, -16
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI94_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_1@toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_2@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_3@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_3@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_4@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI94_4@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_5@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_5@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_6@toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI94_6@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI94_7@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI94_7@toc@l(3)
+; PC64LE9-NEXT:    bl fmin
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
+; PC64LE9-NEXT:    blr
+entry:
+  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %min
+}
+
+define <1 x float> @constrained_vector_fptrunc_v1f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v1f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v1f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI95_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI95_0@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
+                                <1 x double><double 42.1>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <1 x float> %result
+}
+
+define <2 x float> @constrained_vector_fptrunc_v2f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI96_1@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI96_1@toc@l(4)
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    frsp 1, 1
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI96_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI96_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI96_1@toc@ha
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    lfd 0, .LCPI96_1@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x float> %result
+}
+
+define <3 x float> @constrained_vector_fptrunc_v3f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI97_1@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI97_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
+; PC64LE-NEXT:    frsp 0, 0
+; PC64LE-NEXT:    lfd 2, .LCPI97_3@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
+; PC64LE-NEXT:    frsp 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI97_2@toc@l
+; PC64LE-NEXT:    frsp 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI97_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_1@toc@ha
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    lfd 0, .LCPI97_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_2@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI97_2@toc@l
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    addis 3, 2, .LCPI97_3@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI97_3@toc@l(3)
+; PC64LE9-NEXT:    frsp 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <3 x float> %result
+}
+
+define <4 x float> @constrained_vector_fptrunc_v4f64() {
+; PC64LE-LABEL: constrained_vector_fptrunc_v4f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI98_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI98_2@toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI98_3@toc@ha
+; PC64LE-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
+; PC64LE-NEXT:    lfd 1, .LCPI98_1@toc@l(4)
+; PC64LE-NEXT:    lfd 2, .LCPI98_2@toc@l(5)
+; PC64LE-NEXT:    lfd 3, .LCPI98_3@toc@l(6)
+; PC64LE-NEXT:    xxmrghd 0, 1, 0
+; PC64LE-NEXT:    xxmrghd 1, 3, 2
+; PC64LE-NEXT:    xvcvdpsp 34, 0
+; PC64LE-NEXT:    xvcvdpsp 35, 1
+; PC64LE-NEXT:    vmrgew 2, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fptrunc_v4f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_0@toc@ha
+; PC64LE9-NEXT:    lfd 0, .LCPI98_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_1@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI98_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_2@toc@ha
+; PC64LE9-NEXT:    xxmrghd 0, 1, 0
+; PC64LE9-NEXT:    xvcvdpsp 34, 0
+; PC64LE9-NEXT:    lfd 0, .LCPI98_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI98_3@toc@ha
+; PC64LE9-NEXT:    lfd 1, .LCPI98_3@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 0, 1, 0
+; PC64LE9-NEXT:    xvcvdpsp 35, 0
+; PC64LE9-NEXT:    vmrgew 2, 3, 2
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_fpext_v1f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
+; PC64LE-NEXT:    xxspltd 34, 0, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI99_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI99_0@toc@l(3)
+; PC64LE9-NEXT:    xxspltd 34, 0, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
+                                <1 x float><float 42.0>,
+                                metadata !"fpexcept.strict")
+  ret <1 x double> %result
+}
+
+define <2 x double> @constrained_vector_fpext_v2f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v2f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI100_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI100_1@toc@l(4)
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v2f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI100_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI100_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI100_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI100_1@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %result
+}
+
+define <3 x double> @constrained_vector_fpext_v3f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI101_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI101_2@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
+; PC64LE-NEXT:    lfs 2, .LCPI101_1@toc@l(4)
+; PC64LE-NEXT:    lfs 3, .LCPI101_2@toc@l(5)
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_0@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI101_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_1@toc@ha
+; PC64LE9-NEXT:    lfs 2, .LCPI101_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI101_2@toc@ha
+; PC64LE9-NEXT:    lfs 3, .LCPI101_2@toc@l(3)
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x double> %result
+}
+
+define <4 x double> @constrained_vector_fpext_v4f32() {
+; PC64LE-LABEL: constrained_vector_fpext_v4f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI102_1@toc@ha
+; PC64LE-NEXT:    addis 5, 2, .LCPI102_2@toc@ha
+; PC64LE-NEXT:    addis 6, 2, .LCPI102_3@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI102_1@toc@l(4)
+; PC64LE-NEXT:    lfs 2, .LCPI102_2@toc@l(5)
+; PC64LE-NEXT:    lfs 3, .LCPI102_3@toc@l(6)
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    xxmrghd 35, 3, 2
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_fpext_v4f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI102_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI102_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_2@toc@ha
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    lfs 0, .LCPI102_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI102_3@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI102_3@toc@l(3)
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %result
+}
+
+define <1 x float> @constrained_vector_ceil_v1f32() {
+; PC64LE-LABEL: constrained_vector_ceil_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
+; PC64LE-NEXT:    frip 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI103_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI103_0@toc@l(3)
+; PC64LE9-NEXT:    frip 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %ceil
+}
+
+define <2 x double> @constrained_vector_ceil_v2f64() {
+; PC64LE-LABEL: constrained_vector_ceil_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI104_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpip 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI104_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI104_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpip 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %ceil
+}
+
+define <3 x float> @constrained_vector_ceil_v3f32() {
+; PC64LE-LABEL: constrained_vector_ceil_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI105_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI105_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI105_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
+; PC64LE-NEXT:    frip 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI105_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
+; PC64LE-NEXT:    frip 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI105_3@toc@l
+; PC64LE-NEXT:    frip 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI105_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI105_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2@toc@ha
+; PC64LE9-NEXT:    frip 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI105_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI105_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI105_3@toc@l
+; PC64LE9-NEXT:    frip 1, 1
+; PC64LE9-NEXT:    frip 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %ceil
+}
+
+define <3 x double> @constrained_vector_ceil_v3f64() {
+; PC64LE-LABEL: constrained_vector_ceil_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI106_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI106_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpip 3, 1
+; PC64LE-NEXT:    xvrdpip 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI106_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI106_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI106_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI106_1@toc@l
+; PC64LE9-NEXT:    xsrdpip 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpip 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %ceil
+}
+
+define <1 x float> @constrained_vector_floor_v1f32() {
+; PC64LE-LABEL: constrained_vector_floor_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
+; PC64LE-NEXT:    frim 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI107_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI107_0@toc@l(3)
+; PC64LE9-NEXT:    frim 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %floor
+}
+
+
+define <2 x double> @constrained_vector_floor_v2f64() {
+; PC64LE-LABEL: constrained_vector_floor_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI108_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpim 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI108_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI108_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpim 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %floor
+}
+
+define <3 x float> @constrained_vector_floor_v3f32() {
+; PC64LE-LABEL: constrained_vector_floor_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI109_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI109_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI109_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
+; PC64LE-NEXT:    frim 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI109_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
+; PC64LE-NEXT:    frim 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI109_3@toc@l
+; PC64LE-NEXT:    frim 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI109_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI109_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_2@toc@ha
+; PC64LE9-NEXT:    frim 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI109_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI109_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI109_3@toc@l
+; PC64LE9-NEXT:    frim 1, 1
+; PC64LE9-NEXT:    frim 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %floor
+}
+
+define <3 x double> @constrained_vector_floor_v3f64() {
+; PC64LE-LABEL: constrained_vector_floor_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI110_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI110_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpim 3, 1
+; PC64LE-NEXT:    xvrdpim 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_floor_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI110_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI110_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI110_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI110_1@toc@l
+; PC64LE9-NEXT:    xsrdpim 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpim 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %floor
+}
+
+define <1 x float> @constrained_vector_round_v1f32() {
+; PC64LE-LABEL: constrained_vector_round_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
+; PC64LE-NEXT:    frin 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI111_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI111_0@toc@l(3)
+; PC64LE9-NEXT:    frin 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %round
+}
+
+define <2 x double> @constrained_vector_round_v2f64() {
+; PC64LE-LABEL: constrained_vector_round_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI112_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpi 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI112_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI112_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpi 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %round
+}
+
+define <3 x float> @constrained_vector_round_v3f32() {
+; PC64LE-LABEL: constrained_vector_round_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI113_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI113_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI113_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
+; PC64LE-NEXT:    frin 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI113_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
+; PC64LE-NEXT:    frin 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI113_3@toc@l
+; PC64LE-NEXT:    frin 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI113_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI113_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2@toc@ha
+; PC64LE9-NEXT:    frin 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI113_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI113_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI113_3@toc@l
+; PC64LE9-NEXT:    frin 1, 1
+; PC64LE9-NEXT:    frin 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %round
+}
+
+
+define <3 x double> @constrained_vector_round_v3f64() {
+; PC64LE-LABEL: constrained_vector_round_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI114_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI114_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpi 3, 1
+; PC64LE-NEXT:    xvrdpi 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_round_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI114_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI114_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI114_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI114_1@toc@l
+; PC64LE9-NEXT:    xsrdpi 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpi 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %round
+}
+
+define <1 x float> @constrained_vector_trunc_v1f32() {
+; PC64LE-LABEL: constrained_vector_trunc_v1f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
+; PC64LE-NEXT:    friz 0, 0
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI115_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI115_0@toc@l(3)
+; PC64LE9-NEXT:    friz 0, 0
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    blr
+entry:
+  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %trunc
+}
+
+define <2 x double> @constrained_vector_trunc_v2f64() {
+; PC64LE-LABEL: constrained_vector_trunc_v2f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI116_0@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xvrdpiz 34, 0
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI116_0@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI116_0@toc@l
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpiz 34, 0
+; PC64LE9-NEXT:    blr
+entry:
+  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %trunc
+}
+
+define <3 x float> @constrained_vector_trunc_v3f32() {
+; PC64LE-LABEL: constrained_vector_trunc_v3f32:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
+; PC64LE-NEXT:    addis 4, 2, .LCPI117_1@toc@ha
+; PC64LE-NEXT:    lfs 0, .LCPI117_2@toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI117_1@toc@l(4)
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
+; PC64LE-NEXT:    friz 0, 0
+; PC64LE-NEXT:    lfs 2, .LCPI117_0@toc@l(3)
+; PC64LE-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
+; PC64LE-NEXT:    friz 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI117_3@toc@l
+; PC64LE-NEXT:    friz 2, 2
+; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    lvx 3, 0, 3
+; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI117_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_1@toc@ha
+; PC64LE9-NEXT:    lfs 1, .LCPI117_1@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_2@toc@ha
+; PC64LE9-NEXT:    friz 0, 0
+; PC64LE9-NEXT:    lfs 2, .LCPI117_2@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI117_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI117_3@toc@l
+; PC64LE9-NEXT:    friz 1, 1
+; PC64LE9-NEXT:    friz 2, 2
+; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    xscvdpspn 1, 1
+; PC64LE9-NEXT:    xscvdpspn 2, 2
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
+; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
+; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
+; PC64LE9-NEXT:    vmrglw 2, 3, 2
+; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    blr
+entry:
+  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %trunc
+}
+
+define <3 x double> @constrained_vector_trunc_v3f64() {
+; PC64LE-LABEL: constrained_vector_trunc_v3f64:
+; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
+; PC64LE-NEXT:    addi 3, 3, .LCPI118_1@toc@l
+; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI118_0@toc@l(3)
+; PC64LE-NEXT:    xxswapd 0, 0
+; PC64LE-NEXT:    xsrdpiz 3, 1
+; PC64LE-NEXT:    xvrdpiz 2, 0
+; PC64LE-NEXT:    xxswapd 1, 2
+; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    blr
+;
+; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
+; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    addis 3, 2, .LCPI118_0@toc@ha
+; PC64LE9-NEXT:    lfs 0, .LCPI118_0@toc@l(3)
+; PC64LE9-NEXT:    addis 3, 2, .LCPI118_1@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI118_1@toc@l
+; PC64LE9-NEXT:    xsrdpiz 3, 0
+; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    xvrdpiz 2, 0
+; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    blr
+entry:
+  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %trunc
+}
+
+
+; Single width declarations
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+
+; Scalar width declarations
+declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
+declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+
+; Illegal width declarations
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
+declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+
+; Double width declarations
+declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From c5227a1f53103d6c0cbcb5a59da84356208df0dd Mon Sep 17 00:00:00 2001
From: Francois Ferrand <thetypz@gmail.com>
Date: Wed, 29 May 2019 16:30:47 +0000
Subject: [PATCH 0514/1176] [clang-format] Allow configuring list of
 function-like macros that resolve to a type

Summary:
Adds a `TypenameMacros` configuration option that causes certain identifiers to be handled in a way similar to `typeof()`.

This is enough to:
- Prevent misinterpreting declarations of pointers to such types as expressions (`STACK_OF(int) * foo` -> `STACK_OF(int) *foo`),
- Avoid surprising line breaks in variable/struct field declarations (`STACK_OF(int)\nfoo;` -> `STACK_OF(int) foo;`, see https://bugs.llvm.org/show_bug.cgi?id=30353).

Reviewers: Typz, krasimir, djasper

Reviewed By: Typz

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D57184

llvm-svn: 361986
---
 clang/docs/ClangFormatStyleOptions.rst | 18 ++++++++++++++++
 clang/include/clang/Format/Format.h    | 19 ++++++++++++++++-
 clang/lib/Format/Format.cpp            |  1 +
 clang/lib/Format/FormatToken.h         |  1 +
 clang/lib/Format/FormatTokenLexer.cpp  |  2 ++
 clang/lib/Format/TokenAnnotator.cpp    | 18 +++++++++-------
 clang/unittests/Format/FormatTest.cpp  | 29 ++++++++++++++++++++++++++
 7 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 3611bdd7b00d0..342ecc7914ad6 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -1367,6 +1367,24 @@ the configuration (without a prefix: ``Auto``).
 
   For example: BOOST_FOREACH.
 
+**TypenameMacros** (``std::vector<std::string>``)
+  A vector of macros that should be interpreted as type declarations
+  instead of as function calls.
+
+  These are expected to be macros of the form:
+
+  .. code-block: c++
+
+    STACK_OF(...)
+
+  In the .clang-format configuration file, this can be configured like:
+
+  .. code-block: yaml
+
+    TypenameMacros: ['STACK_OF', 'LIST']
+
+  For example: OpenSSL STACK_OF, BSD LIST_ENTRY.
+
 **IncludeBlocks** (``IncludeBlocksStyle``)
   Dependent on the value, multiple ``#include`` blocks can be sorted
   as one and divided based on category.
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index fcbe0a7e9322a..af7c351d79bb9 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -1160,6 +1160,22 @@ struct FormatStyle {
   /// For example: BOOST_FOREACH.
   std::vector<std::string> ForEachMacros;
 
+  /// \brief A vector of macros that should be interpreted as type declarations
+  /// instead of as function calls.
+  ///
+  /// These are expected to be macros of the form:
+  /// \code
+  ///   STACK_OF(...)
+  /// \endcode
+  ///
+  /// In the .clang-format configuration file, this can be configured like:
+  /// \code{.yaml}
+  ///   TypenameMacros: ['STACK_OF', 'LIST']
+  /// \endcode
+  ///
+  /// For example: OpenSSL STACK_OF, BSD LIST_ENTRY.
+  std::vector<std::string> TypenameMacros;
+
   /// A vector of macros that should be interpreted as complete
   /// statements.
   ///
@@ -1952,7 +1968,8 @@ struct FormatStyle {
            SpacesInParentheses == R.SpacesInParentheses &&
            SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
            Standard == R.Standard && TabWidth == R.TabWidth &&
-           StatementMacros == R.StatementMacros && UseTab == R.UseTab;
+           StatementMacros == R.StatementMacros && UseTab == R.UseTab &&
+           TypenameMacros == R.TypenameMacros;
   }
 
   llvm::Optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 2772db0a1103b..d775ca14a7c63 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -505,6 +505,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("Standard", Style.Standard);
     IO.mapOptional("StatementMacros", Style.StatementMacros);
     IO.mapOptional("TabWidth", Style.TabWidth);
+    IO.mapOptional("TypenameMacros", Style.TypenameMacros);
     IO.mapOptional("UseTab", Style.UseTab);
   }
 };
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index f54ffe9d54cab..e8b737f00bd2d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -96,6 +96,7 @@ namespace format {
   TYPE(TrailingAnnotation)                                                     \
   TYPE(TrailingReturnArrow)                                                    \
   TYPE(TrailingUnaryOperator)                                                  \
+  TYPE(TypenameMacro)                                                          \
   TYPE(UnaryOperator)                                                          \
   TYPE(CSharpStringLiteral)                                                    \
   TYPE(CSharpNullCoalescing)                                                   \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index c12daa7f20e9e..4438756579abf 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -39,6 +39,8 @@ FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
     Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro});
   for (const std::string &StatementMacro : Style.StatementMacros)
     Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro});
+  for (const std::string &TypenameMacro : Style.TypenameMacros)
+    Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro});
 }
 
 ArrayRef<FormatToken *> FormatTokenLexer::lex() {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 4e255700d4dc0..1dca764eaeb10 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1194,11 +1194,12 @@ class AnnotatingParser {
 
     // Reset token type in case we have already looked at it and then
     // recovered from an error (e.g. failure to find the matching >).
-    if (!CurrentToken->isOneOf(
-            TT_LambdaLSquare, TT_LambdaLBrace, TT_ForEachMacro,
-            TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace,
-            TT_JsFatArrow, TT_LambdaArrow, TT_OverloadedOperator,
-            TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral))
+    if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_LambdaLBrace,
+                               TT_ForEachMacro, TT_TypenameMacro,
+                               TT_FunctionLBrace, TT_ImplicitStringLiteral,
+                               TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
+                               TT_OverloadedOperator, TT_RegexLiteral,
+                               TT_TemplateString, TT_ObjCStringLiteral))
       CurrentToken->Type = TT_Unknown;
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
@@ -1416,6 +1417,7 @@ class AnnotatingParser {
           if (AfterParen->Tok.isNot(tok::caret)) {
             if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
               if (BeforeParen->is(tok::identifier) &&
+                  !BeforeParen->is(TT_TypenameMacro) &&
                   BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
                   (!BeforeParen->Previous ||
                    BeforeParen->Previous->ClosesTemplateDeclaration))
@@ -1667,7 +1669,8 @@ class AnnotatingParser {
       FormatToken *TokenBeforeMatchingParen =
           PrevToken->MatchingParen->getPreviousNonComment();
       if (TokenBeforeMatchingParen &&
-          TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
+          TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
+                                            TT_TypenameMacro))
         return TT_PointerOrReference;
     }
 
@@ -2527,7 +2530,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
       FormatToken *TokenBeforeMatchingParen =
           Left.MatchingParen->getPreviousNonComment();
       if (!TokenBeforeMatchingParen ||
-          !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
+          !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype,
+                                             TT_TypenameMacro))
         return true;
     }
     return (Left.Tok.isLiteral() ||
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 31f40b167081e..87405bccef1b5 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -13545,6 +13545,35 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
       guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })"));
 }
 
+TEST_F(FormatTest, TypenameMacros) {
+  std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"};
+
+  // Test case reported in https://bugs.llvm.org/show_bug.cgi?id=30353
+  FormatStyle Google = getGoogleStyleWithColumns(0);
+  Google.TypenameMacros = TypenameMacros;
+  verifyFormat("struct foo {\n"
+               "  int bar;\n"
+               "  TAILQ_ENTRY(a) bleh;\n"
+               "};", Google);
+
+  FormatStyle Macros = getLLVMStyle();
+  Macros.TypenameMacros = TypenameMacros;
+
+  verifyFormat("STACK_OF(int) a;", Macros);
+  verifyFormat("STACK_OF(int) *a;", Macros);
+  verifyFormat("STACK_OF(int const *) *a;", Macros);
+  verifyFormat("STACK_OF(int *const) *a;", Macros);
+  verifyFormat("STACK_OF(int, string) a;", Macros);
+  verifyFormat("STACK_OF(LIST(int)) a;", Macros);
+  verifyFormat("STACK_OF(LIST(int)) a, b;", Macros);
+  verifyFormat("for (LIST(int) *a = NULL; a;) {\n}", Macros);
+  verifyFormat("STACK_OF(int) f(LIST(int) *arg);", Macros);
+
+  Macros.PointerAlignment = FormatStyle::PAS_Left;
+  verifyFormat("STACK_OF(int)* a;", Macros);
+  verifyFormat("STACK_OF(int*)* a;", Macros);
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang

From 78337420cd1b23b01eb0c96d9d27a5c6a44d8d71 Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Wed, 29 May 2019 16:31:32 +0000
Subject: [PATCH 0515/1176] Add more information to the log timer dump

Summary:
The `log timer dump` is showing the time of the function itself minus any function that is called from this one that also happens to be timed. However, this is really not obvious and it also makes it hard to understand the time spent in total and also which children are actually taking the time.
To get a better reading of the timer dump I added the total, children (which I named child) and also the hit count. I used these timers to figure out a performance issue and only after adding this things were more clear to me.

It looks like this:
```
(lldb) log timer dump
35.447713617 sec (total: 35.449s; child: 0.001s; count: 1374) for void SymbolFileDWARF::Index()
29.717921481 sec (total: 29.718s; child: 0.000s; count: 8230500) for const lldb_private::ConstString &lldb_private::Mangled::GetDemangledName(lldb::LanguageType) const
21.049508865 sec (total: 24.683s; child: 3.633s; count: 1399) for void lldb_private::Symtab::InitNameIndexes()
...
```

Reviewers: clayborg, teemperor, labath, espindola, xiaobai

Reviewed By: labath, xiaobai

Subscribers: emaste, mgorny, arichardson, eraman, MaskRay, jdoerfert, labath, davide, teemperor, aprantl, erik.pilkington, jfb, abidh, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D61235

llvm-svn: 361987
---
 lldb/include/lldb/Utility/Timer.h    |  2 ++
 lldb/source/Utility/Timer.cpp        | 44 ++++++++++++++++++++--------
 lldb/unittests/Utility/TimerTest.cpp | 39 +++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/lldb/include/lldb/Utility/Timer.h b/lldb/include/lldb/Utility/Timer.h
index 0d2e8d8c3e533..ad9421a75b15f 100644
--- a/lldb/include/lldb/Utility/Timer.h
+++ b/lldb/include/lldb/Utility/Timer.h
@@ -30,6 +30,8 @@ class Timer {
     friend class Timer;
     const char *m_name;
     std::atomic<uint64_t> m_nanos;
+    std::atomic<uint64_t> m_nanos_total;
+    std::atomic<uint64_t> m_count;
     std::atomic<Category *> m_next;
 
     DISALLOW_COPY_AND_ASSIGN(Category);
diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp
index 6831924497648..7492045bc3262 100644
--- a/lldb/source/Utility/Timer.cpp
+++ b/lldb/source/Utility/Timer.cpp
@@ -41,6 +41,8 @@ static TimerStack &GetTimerStackForCurrentThread() {
 
 Timer::Category::Category(const char *cat) : m_name(cat) {
   m_nanos.store(0, std::memory_order_release);
+  m_nanos_total.store(0, std::memory_order_release);
+  m_count.store(0, std::memory_order_release);
   Category *expected = g_categories;
   do {
     m_next = expected;
@@ -93,6 +95,8 @@ Timer::~Timer() {
 
   // Keep total results for each category so we can dump results.
   m_category.m_nanos += std::chrono::nanoseconds(timer_dur).count();
+  m_category.m_nanos_total += std::chrono::nanoseconds(total_dur).count();
+  m_category.m_count++;
 }
 
 void Timer::SetDisplayDepth(uint32_t depth) { g_display_depth = depth; }
@@ -100,25 +104,38 @@ void Timer::SetDisplayDepth(uint32_t depth) { g_display_depth = depth; }
 /* binary function predicate:
  * - returns whether a person is less than another person
  */
-
-typedef std::pair<const char *, uint64_t> TimerEntry;
-
-static bool CategoryMapIteratorSortCriterion(const TimerEntry &lhs,
-                                             const TimerEntry &rhs) {
-  return lhs.second > rhs.second;
+namespace {
+struct Stats {
+  const char *name;
+  uint64_t nanos;
+  uint64_t nanos_total;
+  uint64_t count;
+};
+} // namespace
+
+static bool CategoryMapIteratorSortCriterion(const Stats &lhs,
+                                             const Stats &rhs) {
+  return lhs.nanos > rhs.nanos;
 }
 
 void Timer::ResetCategoryTimes() {
-  for (Category *i = g_categories; i; i = i->m_next)
+  for (Category *i = g_categories; i; i = i->m_next) {
     i->m_nanos.store(0, std::memory_order_release);
+    i->m_nanos_total.store(0, std::memory_order_release);
+    i->m_count.store(0, std::memory_order_release);
+  }
 }
 
 void Timer::DumpCategoryTimes(Stream *s) {
-  std::vector<TimerEntry> sorted;
+  std::vector<Stats> sorted;
   for (Category *i = g_categories; i; i = i->m_next) {
     uint64_t nanos = i->m_nanos.load(std::memory_order_acquire);
-    if (nanos)
-      sorted.push_back(std::make_pair(i->m_name, nanos));
+    if (nanos) {
+      uint64_t nanos_total = i->m_nanos_total.load(std::memory_order_acquire);
+      uint64_t count = i->m_count.load(std::memory_order_acquire);
+      Stats stats{i->m_name, nanos, nanos_total, count};
+      sorted.push_back(stats);
+    }
   }
   if (sorted.empty())
     return; // Later code will break without any elements.
@@ -126,6 +143,9 @@ void Timer::DumpCategoryTimes(Stream *s) {
   // Sort by time
   llvm::sort(sorted.begin(), sorted.end(), CategoryMapIteratorSortCriterion);
 
-  for (const auto &timer : sorted)
-    s->Printf("%.9f sec for %s\n", timer.second / 1000000000., timer.first);
+  for (const auto &stats : sorted)
+    s->Printf("%.9f sec (total: %.3fs; child: %.3fs; count: %llu) for %s\n",
+              stats.nanos / 1000000000., stats.nanos_total / 1000000000.,
+              (stats.nanos_total - stats.nanos) / 1000000000., stats.count,
+              stats.name);
 }
diff --git a/lldb/unittests/Utility/TimerTest.cpp b/lldb/unittests/Utility/TimerTest.cpp
index 28d8d050b3e0a..53d0ff12d891a 100644
--- a/lldb/unittests/Utility/TimerTest.cpp
+++ b/lldb/unittests/Utility/TimerTest.cpp
@@ -61,7 +61,9 @@ TEST(TimerTest, CategoryTimes2) {
   StreamString ss;
   Timer::DumpCategoryTimes(&ss);
   double seconds1, seconds2;
-  ASSERT_EQ(2, sscanf(ss.GetData(), "%lf sec for CAT1%*[\n ]%lf sec for CAT2",
+  ASSERT_EQ(2, sscanf(ss.GetData(),
+                      "%lf sec (total: %*lfs; child: %*lfs; count: %*d) for "
+                      "CAT1%*[\n ]%lf sec for CAT2",
                       &seconds1, &seconds2))
       << "String: " << ss.GetData();
   EXPECT_LT(0.01, seconds1);
@@ -69,3 +71,38 @@ TEST(TimerTest, CategoryTimes2) {
   EXPECT_LT(0.001, seconds2);
   EXPECT_GT(0.1, seconds2);
 }
+
+TEST(TimerTest, CategoryTimesStats) {
+  Timer::ResetCategoryTimes();
+  {
+    static Timer::Category tcat1("CAT1");
+    Timer t1(tcat1, ".");
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    static Timer::Category tcat2("CAT2");
+    {
+      Timer t2(tcat2, ".");
+      std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+    {
+      Timer t3(tcat2, ".");
+      std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+  }
+  // Example output:
+  // 0.105202764 sec (total: 0.132s; child: 0.027s; count: 1) for CAT1
+  // 0.026772798 sec (total: 0.027s; child: 0.000s; count: 2) for CAT2
+  StreamString ss;
+  Timer::DumpCategoryTimes(&ss);
+  double seconds1, total1, child1, seconds2;
+  int count1, count2;
+  ASSERT_EQ(
+      6, sscanf(ss.GetData(),
+                "%lf sec (total: %lfs; child: %lfs; count: %d) for CAT1%*[\n ]"
+                "%lf sec (total: %*lfs; child: %*lfs; count: %d) for CAT2",
+                &seconds1, &total1, &child1, &count1, &seconds2, &count2))
+      << "String: " << ss.GetData();
+  EXPECT_NEAR(total1 - child1, seconds1, 0.002);
+  EXPECT_EQ(1, count1);
+  EXPECT_NEAR(child1, seconds2, 0.002);
+  EXPECT_EQ(2, count2);
+}

From 98a797c224a6abb2218963f551185ffba057aa4b Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 29 May 2019 16:50:14 +0000
Subject: [PATCH 0516/1176] [NFC][InstCombine] Add a unary FNeg test to
 fsub.ll.

llvm-svn: 361988
---
 llvm/test/Transforms/InstCombine/fsub.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
index 33f994da29b7f..f4d971c7ff174 100644
--- a/llvm/test/Transforms/InstCombine/fsub.ll
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -14,6 +14,17 @@ define float @test1(float %x, float %y) {
   ret float %t2
 }
 
+define float @test1_unary(float %x, float %y) {
+; CHECK-LABEL: @test1_unary(
+; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = fneg float [[T1]]
+; CHECK-NEXT:    ret float [[T2]]
+;
+  %t1 = fsub float %x, %y
+  %t2 = fneg float %t1
+  ret float %t2
+}
+
 ; Can't do anything with the test above because -0.0 - 0.0 = -0.0, but if we have nsz:
 ; -(X - Y) --> Y - X
 

From 5b2088d1fac1f464cd51d4b660b29c5db47a54c4 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Wed, 29 May 2019 16:50:46 +0000
Subject: [PATCH 0517/1176] [ThinLTO] Use original alias visibility when
 importing

Summary:
When we import an alias, we do so by making a clone of the aliasee. Just
as this clone uses the original alias name and linkage, it should also
use the same visibility (not the aliasee's visibility). Otherwise,
linker behavior is affected (e.g. if the aliasee was hidden, but the
alias is not, the resulting imported clone should not be hidden,
otherwise the linker will make the final symbol hidden which is
incorrect).

Reviewers: wmi

Subscribers: mehdi_amini, inglorion, eraman, steven_wu, dexonsmith, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62535

llvm-svn: 361989
---
 llvm/lib/Transforms/IPO/FunctionImport.cpp   | 5 +++--
 llvm/test/ThinLTO/X86/Inputs/alias_import.ll | 2 +-
 llvm/test/ThinLTO/X86/alias_import.ll        | 8 ++++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 71a76a6a67ca7..9207f5fe0ef3f 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -1053,9 +1053,10 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
 
   ValueToValueMapTy VMap;
   Function *NewFn = CloneFunction(Fn, VMap);
-  // Clone should use the original alias's linkage and name, and we ensure
-  // all uses of alias instead use the new clone (casted if necessary).
+  // Clone should use the original alias's linkage, visibility and name, and we
+  // ensure all uses of alias instead use the new clone (casted if necessary).
   NewFn->setLinkage(GA->getLinkage());
+  NewFn->setVisibility(GA->getVisibility());
   GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
   NewFn->takeName(GA);
   return NewFn;
diff --git a/llvm/test/ThinLTO/X86/Inputs/alias_import.ll b/llvm/test/ThinLTO/X86/Inputs/alias_import.ll
index 7425f23fd3f51..740ab4baa3582 100644
--- a/llvm/test/ThinLTO/X86/Inputs/alias_import.ll
+++ b/llvm/test/ThinLTO/X86/Inputs/alias_import.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 @globalfuncLinkonceAlias = linkonce alias void (...), bitcast (void ()* @globalfunc to void (...)*)
 @globalfuncWeakODRAlias = weak_odr alias void (...), bitcast (void ()* @globalfunc to void (...)*)
 @globalfuncLinkonceODRAlias = linkonce_odr alias void (...), bitcast (void ()* @globalfunc to void (...)*)
-define void @globalfunc() {
+define hidden void @globalfunc() {
 entry:
   ret void
 }
diff --git a/llvm/test/ThinLTO/X86/alias_import.ll b/llvm/test/ThinLTO/X86/alias_import.ll
index af131e145bb58..6c6716a958de5 100644
--- a/llvm/test/ThinLTO/X86/alias_import.ll
+++ b/llvm/test/ThinLTO/X86/alias_import.ll
@@ -38,7 +38,7 @@
 ; PROMOTE-DAG: @linkonceODRfuncLinkonceAlias = weak alias void (...), bitcast (void ()* @linkonceODRfunc to void (...)*)
 ; PROMOTE-DAG: @linkonceODRfuncLinkonceODRAlias = weak_odr alias void (...), bitcast (void ()* @linkonceODRfunc to void (...)*)
 
-; PROMOTE-DAG: define void @globalfunc()
+; PROMOTE-DAG: define hidden void @globalfunc()
 ; PROMOTE-DAG: define internal void @internalfunc()
 ; PROMOTE-DAG: define weak_odr void @linkonceODRfunc()
 ; PROMOTE-DAG: define weak_odr void @weakODRfunc()
@@ -52,11 +52,11 @@
 ; IMPORT-DAG: define available_externally void @linkonceODRfuncAlias
 ; IMPORT-DAG: define available_externally void @linkonceODRfuncWeakODRAlias
 ; IMPORT-DAG: define available_externally void @linkonceODRfuncLinkonceODRAlias
-; IMPORT-DAG: define available_externally void @globalfuncAlias()
+; IMPORT-DAG: define available_externally dso_local void @globalfuncAlias()
 ; IMPORT-DAG: declare void @globalfuncWeakAlias()
 ; IMPORT-DAG: declare void @globalfuncLinkonceAlias()
-; IMPORT-DAG: define available_externally void @globalfuncWeakODRAlias()
-; IMPORT-DAG: define available_externally void @globalfuncLinkonceODRAlias()
+; IMPORT-DAG: define available_externally dso_local void @globalfuncWeakODRAlias()
+; IMPORT-DAG: define available_externally dso_local void @globalfuncLinkonceODRAlias()
 ; IMPORT-DAG: define available_externally dso_local void @internalfuncAlias()
 ; IMPORT-DAG: declare void @internalfuncWeakAlias()
 ; IMPORT-DAG: declare void @internalfuncLinkonceAlias()

From f80c4241b306ce4cf2d718b590e516866d787c67 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 29 May 2019 16:59:48 +0000
Subject: [PATCH 0518/1176] CallSiteSplitting: Respect convergent and
 noduplicate

llvm-svn: 361990
---
 .../Transforms/Scalar/CallSiteSplitting.cpp   |  3 +
 .../CallSiteSplitting/convergent.ll           | 89 ++++++++++++++++++
 .../CallSiteSplitting/noduplicate.ll          | 91 +++++++++++++++++++
 3 files changed, 183 insertions(+)
 create mode 100644 llvm/test/Transforms/CallSiteSplitting/convergent.ll
 create mode 100644 llvm/test/Transforms/CallSiteSplitting/noduplicate.ll

diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 6b749238d2b48..3519b000a33ff 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -183,6 +183,9 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
 }
 
 static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
+  if (CS.isConvergent() || CS.cannotDuplicate())
+    return false;
+
   // FIXME: As of now we handle only CallInst. InvokeInst could be handled
   // without too much effort.
   Instruction *Instr = CS.getInstruction();
diff --git a/llvm/test/Transforms/CallSiteSplitting/convergent.ll b/llvm/test/Transforms/CallSiteSplitting/convergent.ll
new file mode 100644
index 0000000000000..4dcff02743407
--- /dev/null
+++ b/llvm/test/Transforms/CallSiteSplitting/convergent.ll
@@ -0,0 +1,89 @@
+; RUN: opt -S -callsite-splitting -callsite-splitting-duplication-threshold=100000000 < %s | FileCheck -enable-var-scope %s
+
+; Convergent calls should not be duplicated in this case
+; CHECK-LABEL: define void @convergent_caller(
+; CHECK: call void @convergent_callee(
+; CHECK-NOT: call void @convergent_callee(
+define void @convergent_caller(i1 %c, i8* %a_elt, i8* %b_elt) #0 {
+entry:
+  br label %Top
+
+Top:
+  %tobool1 = icmp eq i8* %a_elt, null
+  br i1 %tobool1, label %CallSiteBB, label %NextCond
+
+NextCond:
+  %cmp = icmp ne i8* %b_elt, null
+  br i1 %cmp, label %CallSiteBB, label %End
+
+CallSiteBB:
+  %p = phi i1 [ false, %Top ], [ %c, %NextCond ]
+  call void @convergent_callee(i8* %a_elt, i1 %p)
+  br label %End
+
+End:
+  ret void
+}
+
+; CHECK-LABEL: define void @convergent_callee(
+; CHECK: call void @convergent_external(
+; CHECK-NOT: call void @convergent_external(
+define void @convergent_callee(i8* %a_elt, i1 %c) #0 {
+entry:
+  %tobool = icmp ne i8* %a_elt, null
+  br i1 %tobool, label %then, label %endif
+
+then:
+  br label %endif
+
+endif:
+  call void @convergent_external(i8* %a_elt) #0
+  ret void
+}
+
+; Make sure an otherwise identical function is transformed
+; CHECK-LABEL: define void @reference_caller(
+; CHECK: call void @nonconvergent_callee(
+; CHECK: call void @nonconvergent_callee(
+define void @reference_caller(i1 %c, i8* %a_elt, i8* %b_elt) #1 {
+entry:
+  br label %Top
+
+Top:
+  %tobool1 = icmp eq i8* %a_elt, null
+  br i1 %tobool1, label %CallSiteBB, label %NextCond
+
+NextCond:
+  %cmp = icmp ne i8* %b_elt, null
+  br i1 %cmp, label %CallSiteBB, label %End
+
+CallSiteBB:
+  %p = phi i1 [ false, %Top ], [ %c, %NextCond ]
+  call void @nonconvergent_callee(i8* %a_elt, i1 %p)
+  br label %End
+
+End:
+  ret void
+}
+
+; CHECK-LABEL: define void @nonconvergent_callee(
+; CHECK: call void @nonconvergent_external(
+; CHECK-NOT: call void @nonconvergent_external(
+define void @nonconvergent_callee(i8* %a_elt, i1 %c) #1 {
+entry:
+  %tobool = icmp ne i8* %a_elt, null
+  br i1 %tobool, label %then, label %endif
+
+then:
+  br label %endif
+
+endif:
+  call void @nonconvergent_external(i8* %a_elt)
+  ret void
+}
+
+declare void @convergent_external(i8*) #0
+declare void @nonconvergent_external(i8*) #1
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { nounwind }
diff --git a/llvm/test/Transforms/CallSiteSplitting/noduplicate.ll b/llvm/test/Transforms/CallSiteSplitting/noduplicate.ll
new file mode 100644
index 0000000000000..cdc4fdc77dae9
--- /dev/null
+++ b/llvm/test/Transforms/CallSiteSplitting/noduplicate.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -callsite-splitting -callsite-splitting-duplication-threshold=100000000 < %s | FileCheck -enable-var-scope %s
+; RUN: opt -S -callsite-splitting -callsite-splitting-duplication-threshold=100000000 < %s | FileCheck -enable-var-scope %s
+
+; Noduplicate calls should not be duplicated
+; CHECK-LABEL: define void @noduplicate_caller(
+; CHECK: call void @noduplicate_callee(
+; CHECK-NOT: call void @noduplicate_callee(
+define void @noduplicate_caller(i1 %c, i8* %a_elt, i8* %b_elt) #0 {
+entry:
+  br label %Top
+
+Top:
+  %tobool1 = icmp eq i8* %a_elt, null
+  br i1 %tobool1, label %CallSiteBB, label %NextCond
+
+NextCond:
+  %cmp = icmp ne i8* %b_elt, null
+  br i1 %cmp, label %CallSiteBB, label %End
+
+CallSiteBB:
+  %p = phi i1 [ false, %Top ], [ %c, %NextCond ]
+  call void @noduplicate_callee(i8* %a_elt, i1 %p)
+  br label %End
+
+End:
+  ret void
+}
+
+; CHECK-LABEL: define void @noduplicate_callee(
+; CHECK: call void @noduplicate_external(
+; CHECK-NOT: call void @noduplicate_external(
+define void @noduplicate_callee(i8* %a_elt, i1 %c) #0 {
+entry:
+  %tobool = icmp ne i8* %a_elt, null
+  br i1 %tobool, label %then, label %endif
+
+then:
+  br label %endif
+
+endif:
+  call void @noduplicate_external(i8* %a_elt) #0
+  ret void
+}
+
+; Make sure an otherwise identical function is transformed
+; CHECK-LABEL: define void @reference_caller(
+; CHECK: call void @nonnoduplicate_callee(
+; CHECK: call void @nonnoduplicate_callee(
+define void @reference_caller(i1 %c, i8* %a_elt, i8* %b_elt) #1 {
+entry:
+  br label %Top
+
+Top:
+  %tobool1 = icmp eq i8* %a_elt, null
+  br i1 %tobool1, label %CallSiteBB, label %NextCond
+
+NextCond:
+  %cmp = icmp ne i8* %b_elt, null
+  br i1 %cmp, label %CallSiteBB, label %End
+
+CallSiteBB:
+  %p = phi i1 [ false, %Top ], [ %c, %NextCond ]
+  call void @nonnoduplicate_callee(i8* %a_elt, i1 %p)
+  br label %End
+
+End:
+  ret void
+}
+
+; CHECK-LABEL: define void @nonnoduplicate_callee(
+; CHECK: call void @nonnoduplicate_external(
+; CHECK-NOT: call void @nonnoduplicate_external(
+define void @nonnoduplicate_callee(i8* %a_elt, i1 %c) #1 {
+entry:
+  %tobool = icmp ne i8* %a_elt, null
+  br i1 %tobool, label %then, label %endif
+
+then:
+  br label %endif
+
+endif:
+  call void @nonnoduplicate_external(i8* %a_elt)
+  ret void
+}
+
+declare void @noduplicate_external(i8*) #0
+declare void @nonnoduplicate_external(i8*) #1
+
+attributes #0 = { noduplicate nounwind }
+attributes #1 = { nounwind }
+

From e3a76fa1e2cd9aa96f5081a15d58e2eb84d5a890 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 29 May 2019 17:02:27 +0000
Subject: [PATCH 0519/1176] [X86] Fix machineverifier error on
 avx512f-256-set0.mir

Previously the pass ran the entire llc pipeline which caused the IR to be recodegened.

This commit restricts it to just running the postrapseudos pass and checking the results of that instead of the final assembly.

llvm-svn: 361991
---
 llvm/test/CodeGen/X86/avx512f-256-set0.mir | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx512f-256-set0.mir b/llvm/test/CodeGen/X86/avx512f-256-set0.mir
index 6ba37b0360c72..45fbafae11d6d 100644
--- a/llvm/test/CodeGen/X86/avx512f-256-set0.mir
+++ b/llvm/test/CodeGen/X86/avx512f-256-set0.mir
@@ -1,8 +1,9 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64-- -mattr=+avx512f -o - %s -run-pass=postrapseudos -verify-machineinstrs | FileCheck %s
+
 # Test that we emit VPXORD with ZMM registers instead of YMM
 # registers when we do not have VLX.
-#
-# RUN: llc -mtriple=x86_64-- -mattr=+avx512f -o - %s | FileCheck %s
-# CHECK: vpxord %zmm16, %zmm16, %zmm16
+
 --- |
   ; ModuleID = 'test.ll'
   source_filename = "test.ll"
@@ -59,6 +60,10 @@ constants:       []
 machineFunctionInfo: {}
 body:             |
   bb.0.bb0:
+    ; CHECK-LABEL: name: main
+    ; CHECK: $zmm16 = VPXORDZrr undef $zmm16, undef $zmm16
+    ; CHECK: VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store 32 into %ir.lsr.iv1, align 64)
+    ; CHECK: RET 0
     renamable $ymm16 = AVX512_256_SET0
     VMOVAPSZmr $rip, 1, $noreg, @tst_, $noreg, killed renamable $zmm16 :: (store 32 into %ir.lsr.iv1, align 64)
     RET 0

From d2143c370805e9c54322309b44a023eb5642b678 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Wed, 29 May 2019 17:04:48 +0000
Subject: [PATCH 0520/1176] Add curley brackets to case statement that declares
 a variable.

llvm-svn: 361992
---
 clang/lib/AST/ItaniumMangle.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 6207c6228dfd4..4fe3bfe65615a 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3952,7 +3952,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       Diags.Report(DiagID);
       return;
     }
-    case UETT_OpenMPRequiredSimdAlign:
+    case UETT_OpenMPRequiredSimdAlign: {
       DiagnosticsEngine &Diags = Context.getDiags();
       unsigned DiagID = Diags.getCustomDiagID(
           DiagnosticsEngine::Error,
@@ -3960,6 +3960,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
       Diags.Report(DiagID);
       return;
     }
+    }
     if (SAE->isArgumentType()) {
       Out << 't';
       mangleType(SAE->getArgumentType());

From c98b288b030cde85ae8d4e188677546efcbe0d29 Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Wed, 29 May 2019 17:14:48 +0000
Subject: [PATCH 0521/1176] Yet another attempt to fix buildbot after r361949

Looks like %p format specifier of createStringError behaves
differently on different platforms

llvm-svn: 361993
---
 llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
index 2b74743385bd5..9275dddff1a52 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
@@ -67,8 +67,8 @@
 # SIGN_EXTENDED-NEXT: :051000000001020304E1
 # SIGN_EXTENDED-NEXT: :00000001FF
 
-# BAD-ADDR: error: {{.*}}: Section '.text2' address range [0xfffffff8, 0x100000000] is not 32 bit
-# BAD-ADDR2: error: {{.*}}: Section '.text3' address range [0xffffffff0, 0xffffffff4] is not 32 bit
+# BAD-ADDR: error: {{.*}}: Section '.text2' address range [{{.*}}, {{.*}}] is not 32 bit
+# BAD-ADDR2: error: {{.*}}: Section '.text3' address range [{{.*}}, {{.*}}] is not 32 bit
 
 # There shouldn't be 'ExtendedAddr' nor 'Data' records
 # ZERO_SIZE_SEC-NOT:  :02000004
@@ -78,4 +78,4 @@
 # START1: :040000030000FFFFFB
 # START2: :0400000500100000E7
 # START3: :040000058000100067
-# BAD-START: error: {{.*}}: Entry point address 0xf00000000 overflows 32 bits
+# BAD-START: error: {{.*}}: Entry point address {{.*}} overflows 32 bits

From 4b7a713accdce4e46a45f34bd8705e43b495ddec Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Wed, 29 May 2019 17:23:27 +0000
Subject: [PATCH 0522/1176] [CUDA][HIP] Skip setting `externally_initialized`
 for static device variables.

Summary:
- By declaring device variables as `static`, we assume they won't be
  addressable from the host side. Thus, no `externally_initialized` is
  required.

Reviewers: yaxunl

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62603

llvm-svn: 361994
---
 clang/lib/CodeGen/CodeGenModule.cpp       |  3 ++-
 clang/test/CodeGenCUDA/device-var-init.cu | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8c9e240a680fc..8cfb4e60e0de7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3869,7 +3869,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
   if (GV && LangOpts.CUDA) {
     if (LangOpts.CUDAIsDevice) {
-      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())
+      if (Linkage != llvm::GlobalValue::InternalLinkage &&
+          (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
         GV->setExternallyInitialized(true);
     } else {
       // Host-side shadows of external declarations of device-side
diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu
index af42e698cfe9a..fd236bb842df0 100644
--- a/clang/test/CodeGenCUDA/device-var-init.cu
+++ b/clang/test/CodeGenCUDA/device-var-init.cu
@@ -33,6 +33,16 @@ __device__ int d_v_i = 1;
 // DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
 // HOST:   @d_v_i = internal global i32 undef,
 
+// For `static` device variables, assume they won't be addressed from the host
+// side.
+static __device__ int d_s_v_i = 1;
+// DEVICE: @_ZL7d_s_v_i = internal addrspace(1) global i32 1,
+
+// Dummy function to keep static variables referenced.
+__device__ int foo() {
+  return d_s_v_i;
+}
+
 // trivial constructor -- allowed
 __device__ T d_t;
 // DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer

From 5a0e13c4d6bcdf9038dbcd63f76c2f1575c2487a Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Wed, 29 May 2019 17:25:03 +0000
Subject: [PATCH 0523/1176] Fixed source header [NFC]

llvm-svn: 361995
---
 lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index 29a0b30fcd1bd..cca5a612e713a 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -1,4 +1,4 @@
-//===-- PythonDataObjectsTests.cpp ------------------------------*- C++ -*-===//
+//===-- SymbolFileDWARFTests.cpp --------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

From f6faa382f3e0eae41e8e82848e4eafe08ea1ccb5 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 29 May 2019 17:49:30 +0000
Subject: [PATCH 0524/1176] [Index] Compute correct symbol kind for variable
 templates

Summary:
The index library itself seems to never pass variable templates as
input, however clangd does.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62579

llvm-svn: 361996
---
 .../clangd/unittests/CodeCompleteTests.cpp    | 25 +++++++++++++++++++
 clang/lib/Index/IndexSymbol.cpp               | 24 ++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index b9ca702ee0fae..202757aff412b 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -463,6 +463,31 @@ TEST(CompletionTest, Kinds) {
   EXPECT_THAT(
       Results.Completions,
       UnorderedElementsAre(AllOf(Named("a"), Kind(CompletionItemKind::Field))));
+
+  // Completion kinds for templates should not be unknown.
+  Results = completions(
+      R"cpp(
+        template <class T> struct complete_class {};
+        template <class T> void complete_function();
+        template <class T> using complete_type_alias = int;
+        template <class T> int complete_variable = 10;
+
+        struct X {
+          template <class T> static int complete_static_member = 10;
+
+          static auto x = complete_^
+        }
+      )cpp");
+  EXPECT_THAT(
+      Results.Completions,
+      UnorderedElementsAre(
+          AllOf(Named("complete_class"), Kind(CompletionItemKind::Class)),
+          AllOf(Named("complete_function"), Kind(CompletionItemKind::Function)),
+          AllOf(Named("complete_type_alias"),
+                Kind(CompletionItemKind::Interface)),
+          AllOf(Named("complete_variable"), Kind(CompletionItemKind::Variable)),
+          AllOf(Named("complete_static_member"),
+                Kind(CompletionItemKind::Property))));
 }
 
 TEST(CompletionTest, NoDuplicates) {
diff --git a/clang/lib/Index/IndexSymbol.cpp b/clang/lib/Index/IndexSymbol.cpp
index db397b9856136..064f3ae32f9ec 100644
--- a/clang/lib/Index/IndexSymbol.cpp
+++ b/clang/lib/Index/IndexSymbol.cpp
@@ -96,6 +96,13 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
     Info.Properties |= (SymbolPropertySet)SymbolProperty::ProtocolInterface;
   }
 
+  if (auto *VT = dyn_cast<VarTemplateDecl>(D)) {
+    Info.Properties |= (SymbolPropertySet)SymbolProperty::Generic;
+    Info.Lang = SymbolLanguage::CXX;
+    // All other fields are filled from the templated decl.
+    D = VT->getTemplatedDecl();
+  }
+
   if (const TagDecl *TD = dyn_cast<TagDecl>(D)) {
     switch (TD->getTagKind()) {
     case TTK_Struct:
@@ -333,6 +340,23 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
           Info.Lang = SymbolLanguage::CXX;
       }
       break;
+    case Decl::ClassTemplatePartialSpecialization:
+    case Decl::ClassScopeFunctionSpecialization:
+    case Decl::ClassTemplateSpecialization:
+    case Decl::CXXRecord:
+    case Decl::Enum:
+    case Decl::Record:
+      llvm_unreachable("records handled before");
+      break;
+    case Decl::VarTemplateSpecialization:
+    case Decl::VarTemplatePartialSpecialization:
+    case Decl::ImplicitParam:
+    case Decl::ParmVar:
+    case Decl::Var:
+    case Decl::VarTemplate:
+      llvm_unreachable("variables handled before");
+      break;
+    // Other decls get the 'unknown' kind.
     default:
       break;
     }

From 13e491cca51ed187f52ee2e86e2b09ae19017bab Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:05:53 +0000
Subject: [PATCH 0525/1176] [analyzer] print() JSONify: getNodeLabel
 implementation

Summary: This patch also rewrites the ProgramPoint printing.

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: cfe-commits, szepet, rnkovacs, a.sidorin, mikhail.ramalho,
             donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62346

llvm-svn: 361997
---
 clang/include/clang/Analysis/ProgramPoint.h  |   2 +-
 clang/lib/Analysis/ProgramPoint.cpp          | 202 ++++++++++---------
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp |  56 +++--
 clang/test/Analysis/dump_egraph.c            |  10 +-
 clang/test/Analysis/dump_egraph.cpp          |   6 +-
 5 files changed, 157 insertions(+), 119 deletions(-)

diff --git a/clang/include/clang/Analysis/ProgramPoint.h b/clang/include/clang/Analysis/ProgramPoint.h
index ffc2a82d9e08e..546224bfd58da 100644
--- a/clang/include/clang/Analysis/ProgramPoint.h
+++ b/clang/include/clang/Analysis/ProgramPoint.h
@@ -213,7 +213,7 @@ class ProgramPoint {
     ID.AddPointer(getTag());
   }
 
-  void print(StringRef CR, llvm::raw_ostream &Out) const;
+  void printJson(llvm::raw_ostream &Out, const char *NL = "\n") const;
 
   LLVM_DUMP_METHOD void dump() const;
 
diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index 697d2e57cedb3..482c2770a3109 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -43,151 +43,152 @@ ProgramPoint ProgramPoint::getProgramPoint(const Stmt *S, ProgramPoint::Kind K,
 }
 
 LLVM_DUMP_METHOD void ProgramPoint::dump() const {
-  return print(/*CR=*/"\n", llvm::errs());
+  return printJson(llvm::errs());
 }
 
-static void printLocation(raw_ostream &Out, SourceLocation SLoc,
-                          const SourceManager &SM,
-                          StringRef CR,
-                          StringRef Postfix) {
-  if (SLoc.isFileID()) {
-    Out << CR << "line=" << SM.getExpansionLineNumber(SLoc)
-        << " col=" << SM.getExpansionColumnNumber(SLoc) << Postfix;
+static void printLocation(raw_ostream &Out, SourceLocation Loc,
+                          const SourceManager &SM) {
+  Out << "\"location\": ";
+  if (!Loc.isFileID()) {
+    Out << "null";
+    return;
   }
+
+  Out << "{ \"line\": " << SM.getExpansionLineNumber(Loc)
+      << ", \"column\": " << SM.getExpansionColumnNumber(Loc) << " }";
 }
 
-void ProgramPoint::print(StringRef CR, llvm::raw_ostream &Out) const {
+void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
   const ASTContext &Context =
       getLocationContext()->getAnalysisDeclContext()->getASTContext();
   const SourceManager &SM = Context.getSourceManager();
+
+  Out << "\"kind\": \"";
   switch (getKind()) {
   case ProgramPoint::BlockEntranceKind:
-    Out << "Block Entrance: B"
+    Out << "BlockEntrance\""
+        << ", \"block_id\": "
         << castAs<BlockEntrance>().getBlock()->getBlockID();
     break;
 
   case ProgramPoint::FunctionExitKind: {
     auto FEP = getAs<FunctionExitPoint>();
-    Out << "Function Exit: B" << FEP->getBlock()->getBlockID();
+    Out << "FunctionExit\""
+        << ", \"block_id\": " << FEP->getBlock()->getBlockID()
+        << ", \"stmt_id\": ";
+
     if (const ReturnStmt *RS = FEP->getStmt()) {
-      Out << CR << " Return: S" << RS->getID(Context) << CR;
-      RS->printPretty(Out, /*helper=*/nullptr, Context.getPrintingPolicy(),
-                      /*Indentation=*/2, /*NewlineSymbol=*/CR);
+      Out << RS->getID(Context) << ", \"stmt\": \"";
+      RS->printPretty(Out, /*Helper=*/nullptr, Context.getPrintingPolicy());
+      Out << '\"';
+    } else {
+      Out << "null, \"stmt\": null";
     }
     break;
   }
   case ProgramPoint::BlockExitKind:
-    assert(false);
+    llvm_unreachable("BlockExitKind");
     break;
-
   case ProgramPoint::CallEnterKind:
-    Out << "CallEnter";
+    Out << "CallEnter\"";
     break;
-
   case ProgramPoint::CallExitBeginKind:
-    Out << "CallExitBegin";
+    Out << "CallExitBegin\"";
     break;
-
   case ProgramPoint::CallExitEndKind:
-    Out << "CallExitEnd";
+    Out << "CallExitEnd\"";
     break;
-
   case ProgramPoint::PostStmtPurgeDeadSymbolsKind:
-    Out << "PostStmtPurgeDeadSymbols";
+    Out << "PostStmtPurgeDeadSymbols\"";
     break;
-
   case ProgramPoint::PreStmtPurgeDeadSymbolsKind:
-    Out << "PreStmtPurgeDeadSymbols";
+    Out << "PreStmtPurgeDeadSymbols\"";
     break;
-
   case ProgramPoint::EpsilonKind:
-    Out << "Epsilon Point";
+    Out << "EpsilonPoint\"";
     break;
 
-  case ProgramPoint::LoopExitKind: {
-    LoopExit LE = castAs<LoopExit>();
-    Out << "LoopExit: " << LE.getLoopStmt()->getStmtClassName();
+  case ProgramPoint::LoopExitKind:
+    Out << "LoopExit\", \"stmt\": \""
+        << castAs<LoopExit>().getLoopStmt()->getStmtClassName() << '\"';
     break;
-  }
 
   case ProgramPoint::PreImplicitCallKind: {
     ImplicitCallPoint PC = castAs<ImplicitCallPoint>();
-    Out << "PreCall: ";
+    Out << "PreCall\", \"stmt\": \"";
     PC.getDecl()->print(Out, Context.getLangOpts());
-    printLocation(Out, PC.getLocation(), SM, CR, /*Postfix=*/CR);
+    Out << "\", ";
+    printLocation(Out, PC.getLocation(), SM);
     break;
   }
 
   case ProgramPoint::PostImplicitCallKind: {
     ImplicitCallPoint PC = castAs<ImplicitCallPoint>();
-    Out << "PostCall: ";
+    Out << "PostCall\", \"stmt\": \"";
     PC.getDecl()->print(Out, Context.getLangOpts());
-    printLocation(Out, PC.getLocation(), SM, CR, /*Postfix=*/CR);
+    Out << "\", ";
+    printLocation(Out, PC.getLocation(), SM);
     break;
   }
 
   case ProgramPoint::PostInitializerKind: {
-    Out << "PostInitializer: ";
+    Out << "PostInitializer\", ";
     const CXXCtorInitializer *Init = castAs<PostInitializer>().getInitializer();
-    if (const FieldDecl *FD = Init->getAnyMember())
-      Out << *FD;
-    else {
+    if (const FieldDecl *FD = Init->getAnyMember()) {
+      Out << "\"field_decl\": \"" << *FD << '\"';
+    } else {
+      Out << "\"type\": \"";
       QualType Ty = Init->getTypeSourceInfo()->getType();
       Ty = Ty.getLocalUnqualifiedType();
       Ty.print(Out, Context.getLangOpts());
+      Out << '\"';
     }
     break;
   }
 
   case ProgramPoint::BlockEdgeKind: {
     const BlockEdge &E = castAs<BlockEdge>();
-    Out << "Edge: (B" << E.getSrc()->getBlockID() << ", B"
-        << E.getDst()->getBlockID() << ')';
-
-    if (const Stmt *T = E.getSrc()->getTerminatorStmt()) {
-      SourceLocation SLoc = T->getBeginLoc();
-
-      Out << "\\|Terminator: ";
-      E.getSrc()->printTerminator(Out, Context.getLangOpts());
-      printLocation(Out, SLoc, SM, CR, /*Postfix=*/"");
-
-      if (isa<SwitchStmt>(T)) {
-        const Stmt *Label = E.getDst()->getLabel();
-
-        if (Label) {
-          if (const auto *C = dyn_cast<CaseStmt>(Label)) {
-            Out << CR << "case ";
-            if (C->getLHS())
-              C->getLHS()->printPretty(
-                  Out, nullptr, Context.getPrintingPolicy(),
-                  /*Indentation=*/0, /*NewlineSymbol=*/CR);
-
-            if (const Stmt *RHS = C->getRHS()) {
-              Out << " .. ";
-              RHS->printPretty(Out, nullptr, Context.getPrintingPolicy(),
-                               /*Indetation=*/0, /*NewlineSymbol=*/CR);
-            }
-
-            Out << ":";
-          } else {
-            assert(isa<DefaultStmt>(Label));
-            Out << CR << "default:";
-          }
-        } else
-          Out << CR << "(implicit) default:";
-      } else if (isa<IndirectGotoStmt>(T)) {
-        // FIXME
+    const Stmt *T = E.getSrc()->getTerminatorStmt();
+    Out << "Edge\", \"src_id\": " << E.getSrc()->getBlockID()
+        << ", \"dst_id\": " << E.getDst()->getBlockID()
+        << ", \"terminator\": " << (!T ? "null, \"term_kind\": null" : "\"");
+    if (!T)
+      break;
+
+    E.getSrc()->printTerminator(Out, Context.getLangOpts());
+    Out << "\", ";
+    printLocation(Out, T->getBeginLoc(), SM);
+    Out << ", \"term_kind\": \"";
+
+    if (isa<SwitchStmt>(T)) {
+      Out << "SwitchStmt\", \"case\": ";
+      if (const Stmt *Label = E.getDst()->getLabel()) {
+        if (const auto *C = dyn_cast<CaseStmt>(Label)) {
+          Out << "{ \"lhs\": ";
+          if (const Stmt *LHS = C->getLHS())
+            LHS->printPretty(Out, nullptr, Context.getPrintingPolicy());
+          else
+            Out << "null";
+          Out << ", \"rhs\": ";
+          if (const Stmt *RHS = C->getRHS())
+            RHS->printPretty(Out, nullptr, Context.getPrintingPolicy());
+          else
+            Out << "null";
+          Out << " }";
+        } else {
+          assert(isa<DefaultStmt>(Label));
+          Out << "\"default\"";
+        }
       } else {
-        Out << CR << "Condition: ";
-        if (*E.getSrc()->succ_begin() == E.getDst())
-          Out << "true";
-        else
-          Out << "false";
+        Out << "\"implicit default\"";
       }
-
-      Out << CR;
+    } else if (isa<IndirectGotoStmt>(T)) {
+      // FIXME: More info.
+      Out << "IndirectGotoStmt\"";
+    } else {
+      Out << "Condition\", \"value\": "
+          << (*E.getSrc()->succ_begin() == E.getDst() ? "true" : "false");
     }
-
     break;
   }
 
@@ -195,22 +196,37 @@ void ProgramPoint::print(StringRef CR, llvm::raw_ostream &Out) const {
     const Stmt *S = castAs<StmtPoint>().getStmt();
     assert(S != nullptr && "Expecting non-null Stmt");
 
-    Out << S->getStmtClassName() << " S" << S->getID(Context) << " <"
-        << (const void *)S << "> ";
-    S->printPretty(Out, /*helper=*/nullptr, Context.getPrintingPolicy(),
-                   /*Indentation=*/2, /*NewlineSymbol=*/CR);
-    printLocation(Out, S->getBeginLoc(), SM, CR, /*Postfix=*/"");
+    llvm::SmallString<256> TempBuf;
+    llvm::raw_svector_ostream TempOut(TempBuf);
+
+    Out << "Statement\", \"stmt_kind\": \"" << S->getStmtClassName()
+        << "\", \"stmt_id\": " << S->getID(Context)
+        << ", \"pointer\": \"" << (const void *)S << "\", \"pretty\": ";
+
+    // See whether the current statement is pretty-printable.
+    S->printPretty(TempOut, /*Helper=*/nullptr, Context.getPrintingPolicy());
+    if (!TempBuf.empty()) {
+      Out << '\"' << TempBuf.str().trim() << "\", ";
+      TempBuf.clear();
+    } else {
+      Out << "null, ";
+    }
+
+    printLocation(Out, S->getBeginLoc(), SM);
 
+    Out << ", \"stmt_point_kind\": ";
     if (getAs<PreStmt>())
-      Out << CR << "PreStmt" << CR;
+      Out << "\"PreStmt\"";
     else if (getAs<PostLoad>())
-      Out << CR << "PostLoad" << CR;
+      Out << "\"PostLoad\"";
     else if (getAs<PostStore>())
-      Out << CR << "PostStore" << CR;
+      Out << "\"PostStore\"";
     else if (getAs<PostLValue>())
-      Out << CR << "PostLValue" << CR;
+      Out << "\"PostLValue\"";
     else if (getAs<PostAllocatorCall>())
-      Out << CR << "PostAllocatorCall" << CR;
+      Out << "\"PostAllocatorCall\"";
+    else
+      Out << "null";
 
     break;
   }
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 6fa7cf2e23272..e87b6535c1e95 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -162,12 +162,12 @@ class ConstructedObjectKey {
         << "\", \"argument_index\": ";
 
     if (getItem().getKind() == ConstructionContextItem::ArgumentKind)
-      Out << getItem().getIndex() << '\"';
+      Out << getItem().getIndex();
     else
       Out << "null";
 
     // Pretty-print
-    Out << ", \"pretty\": \"";
+    Out << ", \"pretty\": ";
 
     if (S) {
       llvm::SmallString<256> TempBuf;
@@ -176,13 +176,13 @@ class ConstructedObjectKey {
       // See whether the current statement is pretty-printable.
       S->printPretty(TempOut, Helper, PP);
       if (!TempBuf.empty()) {
-        Out << TempBuf.str().trim() << '\"';
+        Out << '\"' << TempBuf.str().trim() << '\"';
         TempBuf.clear();
       } else {
         Out << "null";
       }
     } else {
-      Out << I->getAnyMember()->getNameAsString() << '\"';
+      Out << '\"' << I->getAnyMember()->getNameAsString() << '\"';
     }
   }
 
@@ -3079,37 +3079,55 @@ struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
   }
 
   static std::string getNodeLabel(const ExplodedNode *N, ExplodedGraph *G){
-    std::string sbuf;
-    llvm::raw_string_ostream Out(sbuf);
+    std::string Buf;
+    llvm::raw_string_ostream Out(Buf);
 
+    const bool IsDot = true;
+    const unsigned int Space = 1;
     ProgramStateRef State = N->getState();
 
+    Out << "{ \"node_id\": \"" << (const void *)N
+        << "\", \"state_id\": " << State->getID()
+        << ", \"has_report\": " << (nodeHasBugReport(N) ? "true" : "false")
+        << ",\\l";
+
+    Indent(Out, Space, IsDot) << "\"program_points\": [\\l";
+
     // Dump program point for all the previously skipped nodes.
     traverseHiddenNodes(
         N,
         [&](const ExplodedNode *OtherNode) {
-          OtherNode->getLocation().print(/*CR=*/"\\l", Out);
+          Indent(Out, Space + 1, IsDot) << "{ ";
+          OtherNode->getLocation().printJson(Out, /*NL=*/"\\l");
+          Out << ", \"tag\": ";
           if (const ProgramPointTag *Tag = OtherNode->getLocation().getTag())
-            Out << "\\lTag:" << Tag->getTagDescription();
-          if (N->isSink())
-            Out << "\\lNode is sink\\l";
-          if (nodeHasBugReport(N))
-            Out << "\\lBug report attached\\l";
+            Out << '\"' << Tag->getTagDescription() << "\" }";
+          else
+            Out << "null }";
         },
-        [&](const ExplodedNode *) { Out << "\\l--------\\l"; },
+	// Adds a comma and a new-line between each program point.
+        [&](const ExplodedNode *) { Out << ",\\l"; },
         [&](const ExplodedNode *) { return false; });
 
-    Out << "\\l\\|";
-
-    Out << "StateID: ST" << State->getID() << ", NodeID: N" << N->getID(G)
-        << " <" << (const void *)N << ">\\|";
+    Out << "\\l"; // Adds a new-line to the last program point.
+    Indent(Out, Space, IsDot) << "],\\l";
 
     bool SameAsAllPredecessors =
         std::all_of(N->pred_begin(), N->pred_end(), [&](const ExplodedNode *P) {
           return P->getState() == State;
         });
-    if (!SameAsAllPredecessors)
-      State->printDOT(Out, N->getLocationContext());
+
+    if (!SameAsAllPredecessors) {
+      State->printDOT(Out, N->getLocationContext(), Space);
+    } else {
+      Indent(Out, Space, IsDot) << "\"program_state\": null";
+    }
+
+    Out << "\\l}";
+    if (!N->succ_empty())
+      Out << ',';
+    Out << "\\l";
+
     return Out.str();
   }
 };
diff --git a/clang/test/Analysis/dump_egraph.c b/clang/test/Analysis/dump_egraph.c
index 6e8793b202be3..f1ac03b10cc94 100644
--- a/clang/test/Analysis/dump_egraph.c
+++ b/clang/test/Analysis/dump_egraph.c
@@ -11,6 +11,10 @@ int foo() {
 }
 
 // CHECK: digraph "Exploded Graph" {
-// CHECK: Edge: (B2, B1)
-// CHECK: Block Entrance: B1
-// CHECK: Bug report attached
+
+// CHECK: \"program_points\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Edge\", \"src_id\": 2, \"dst_id\": 1, \"terminator\": null, \"term_kind\": null, \"tag\": null \}\l&nbsp;&nbsp;],\l&nbsp;&nbsp;\"program_state\": null
+
+// CHECK: \"program_points\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"BlockEntrance\", \"block_id\": 1
+
+// CHECK: \"has_report\": true
+
diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index e3e55554e44ed..5896f8666d68f 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -16,9 +16,9 @@ void foo() {
   T t;
 }
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
 
-// CHECK: \"store\": [\l&nbsp;&nbsp;&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
+// CHECK: \"store\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 

From 83c28abdb252670ccf557e02042195c012b45431 Mon Sep 17 00:00:00 2001
From: Stella Stamenova <stilis@microsoft.com>
Date: Wed, 29 May 2019 18:07:39 +0000
Subject: [PATCH 0526/1176] lit: modernize the lit configuration for the lit
 tests

Summary: This also normalizes the config feature that represents the windows platform to "system-windows" as opposed to having both "windows" and "system-windows"

Reviewers: asmith, probinson

Subscribers: delcypher, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61798

llvm-svn: 361998
---
 llvm/utils/lit/tests/lit.cfg           | 43 ++++++++------------------
 llvm/utils/lit/tests/lit.site.cfg.in   |  3 ++
 llvm/utils/lit/tests/shtest-timeout.py |  4 +--
 3 files changed, 17 insertions(+), 33 deletions(-)

diff --git a/llvm/utils/lit/tests/lit.cfg b/llvm/utils/lit/tests/lit.cfg
index 2af93d6314ade..4648b1bfc9c30 100644
--- a/llvm/utils/lit/tests/lit.cfg
+++ b/llvm/utils/lit/tests/lit.cfg
@@ -4,6 +4,7 @@ import os
 import sys
 
 import lit.formats
+from lit.llvm import llvm_config
 
 # Configuration file for the 'lit' test runner.
 
@@ -25,42 +26,35 @@ config.test_exec_root = config.test_source_root
 
 config.target_triple = '(unused)'
 
-src_root = os.path.join(config.test_source_root, '..')
 llvm_src_root = getattr(config, 'llvm_src_root', None)
-if llvm_src_root != None:
-  # ``src_root`` may be in LLVM's binary build directory which does not contain
+if llvm_src_root:
+  # ``test_source_root`` may be in LLVM's binary build directory which does not contain
   # ``lit.py``, so use `llvm_src_root` instead.
   lit_path = os.path.join(llvm_src_root, 'utils', 'lit')
 else:
-  lit_path = src_root
+  lit_path = os.path.join(config.test_source_root, '..')
 
-pythonpath_list = [lit_path] # Required because some tests import the lit module
+# Required because some tests import the lit module
+llvm_config.with_environment('PYTHONPATH', lit_path, append_path=True)
 
-# Ensure the user's PYTHONPATH is included.
-if 'PYTHONPATH' in os.environ:
-    pythonpath_list.append(os.environ['PYTHONPATH'])
-if 'PYTHONPATH' in config.environment:
-    pythonpath_list.append(config.environment['PYTHONPATH'])
-config.environment['PYTHONPATH'] = os.pathsep.join(pythonpath_list)
+# Add llvm and lit tools directories if this config is being loaded indirectly.
+for attribute in ('llvm_tools_dir', 'lit_tools_dir'):
+    directory = getattr(config, attribute, None)
+    if directory:
+        llvm_config.with_environment('PATH', directory, append_path=True)
 
-config.substitutions.append(('%{src_root}', src_root))
 config.substitutions.append(('%{inputs}', os.path.join(
-            src_root, 'tests', 'Inputs')))
+    config.test_source_root, 'Inputs')))
 config.substitutions.append(('%{lit}', "%%{python} %s" % (
-            os.path.join(lit_path, 'lit.py'),)))
+    os.path.join(lit_path, 'lit.py'),)))
 config.substitutions.append(('%{python}', '"%s"' % (sys.executable)))
 
-
 # Enable coverage.py reporting, assuming the coverage module has been installed
 # and sitecustomize.py in the virtualenv has been modified appropriately.
 if lit_config.params.get('check-coverage', None):
     config.environment['COVERAGE_PROCESS_START'] = os.path.join(
         os.path.dirname(__file__), ".coveragerc")
 
-# Add a feature to detect the Python version.
-config.available_features.add("python%d.%d" % (sys.version_info[0],
-                                                  sys.version_info[1]))
-
 # Add a feature to detect if psutil is available
 try:
     import psutil
@@ -69,14 +63,3 @@ try:
 except ImportError:
     lit_config.warning('Could not import psutil. Some tests will be skipped and'
                        ' the --timeout command line argument will not work.')
-
-if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
-    config.available_features.add('windows')
-
-# Add llvm and lit tools directories if this config is being loaded indirectly.
-path = config.environment['PATH']
-for attribute in ('llvm_tools_dir', 'lit_tools_dir'):
-    directory = getattr(config, attribute, None)
-    if directory:
-        path = os.path.pathsep.join((directory, path))
-config.environment['PATH'] = path
diff --git a/llvm/utils/lit/tests/lit.site.cfg.in b/llvm/utils/lit/tests/lit.site.cfg.in
index 693364c7c9738..43f4e4002af67 100644
--- a/llvm/utils/lit/tests/lit.site.cfg.in
+++ b/llvm/utils/lit/tests/lit.site.cfg.in
@@ -16,5 +16,8 @@ except KeyError:
     key, = e.args
     lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
 
+import lit.llvm
+lit.llvm.initialize(lit_config, config)
+
 # Let the main config do the real work.
 lit_config.load_config(config, "@LLVM_BINARY_DIR@/utils/lit/tests/lit.cfg")
diff --git a/llvm/utils/lit/tests/shtest-timeout.py b/llvm/utils/lit/tests/shtest-timeout.py
index aa7d7e1c7f66b..1208e7379d0ee 100644
--- a/llvm/utils/lit/tests/shtest-timeout.py
+++ b/llvm/utils/lit/tests/shtest-timeout.py
@@ -1,9 +1,7 @@
 # REQUIRES: python-psutil
 
 # llvm.org/PR33944
-# This should be system-windows as windows does not cover all cases, but
-# apparently neither does system-windows
-# UNSUPPORTED: windows
+# UNSUPPORTED: system-windows
 
 # FIXME: This test is fragile because it relies on time which can
 # be affected by system performance. In particular we are currently

From 03e1a82f52d219225c22f14ac73966bb97d4fd0d Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Wed, 29 May 2019 18:08:22 +0000
Subject: [PATCH 0527/1176] [Target] Introduce Process::GetLanguageRuntimes

Summary:
Currently there's not really a good way to iterate over the language runtimes a
process has. This is sometimes desirable (as seen in my change to Thread).
Additionally, there's not really a good reason to iterate over every available
language, but rather only over languages for which we have a plugin loaded.

Reviewers: JDevlieghere, davide, jingham

Subscribers: lldb-commits

Differential Revision: https://reviews.llvm.org/D62562

llvm-svn: 361999
---
 lldb/include/lldb/Target/Language.h |  2 ++
 lldb/include/lldb/Target/Process.h  |  3 +++
 lldb/source/Target/Language.cpp     |  9 +++++++++
 lldb/source/Target/Process.cpp      | 22 ++++++++++++++++++++++
 lldb/source/Target/Thread.cpp       | 16 ++++++----------
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h
index b45a51f767463..6ea6029bdace0 100644
--- a/lldb/include/lldb/Target/Language.h
+++ b/lldb/include/lldb/Target/Language.h
@@ -264,6 +264,8 @@ class Language : public PluginInterface {
   // etc.
   static lldb::LanguageType GetPrimaryLanguage(lldb::LanguageType language);
 
+  static std::set<lldb::LanguageType> GetSupportedLanguages();
+
   static void GetLanguagesSupportingTypeSystems(
       std::set<lldb::LanguageType> &languages,
       std::set<lldb::LanguageType> &languages_for_expressions);
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index 90118c87b2f73..2657302340eec 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -2178,6 +2178,9 @@ class Process : public std::enable_shared_from_this<Process>,
 
   OperatingSystem *GetOperatingSystem() { return m_os_up.get(); }
 
+  std::vector<LanguageRuntime *>
+  GetLanguageRuntimes(bool retry_if_null = true);
+
   LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language,
                                       bool retry_if_null = true);
 
diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp
index a1a388d396e90..3c3ef2841d444 100644
--- a/lldb/source/Target/Language.cpp
+++ b/lldb/source/Target/Language.cpp
@@ -348,6 +348,15 @@ LanguageType Language::GetPrimaryLanguage(LanguageType language) {
   }
 }
 
+std::set<lldb::LanguageType> Language::GetSupportedLanguages() {
+  std::set<lldb::LanguageType> supported_languages;
+  ForEach([&](Language *lang) {
+    supported_languages.emplace(lang->GetLanguageType());
+    return true;
+  });
+  return supported_languages;
+}
+
 void Language::GetLanguagesSupportingTypeSystems(
     std::set<lldb::LanguageType> &languages,
     std::set<lldb::LanguageType> &languages_for_expressions) {
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 4162571fbd608..c88ef0dcf8184 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -44,6 +44,7 @@
 #include "lldb/Target/InstrumentationRuntime.h"
 #include "lldb/Target/JITLoader.h"
 #include "lldb/Target/JITLoaderList.h"
+#include "lldb/Target/Language.h"
 #include "lldb/Target/LanguageRuntime.h"
 #include "lldb/Target/MemoryHistory.h"
 #include "lldb/Target/MemoryRegionInfo.h"
@@ -1547,6 +1548,27 @@ const lldb::ABISP &Process::GetABI() {
   return m_abi_sp;
 }
 
+std::vector<LanguageRuntime *>
+Process::GetLanguageRuntimes(bool retry_if_null) {
+  std::vector<LanguageRuntime *> language_runtimes;
+
+  if (m_finalizing)
+    return language_runtimes;
+
+  std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
+  // Before we pass off a copy of the language runtimes, we must make sure that
+  // our collection is properly populated. It's possible that some of the
+  // language runtimes were not loaded yet, either because nobody requested it
+  // yet or the proper condition for loading wasn't yet met (e.g. libc++.so
+  // hadn't been loaded).
+  for (const lldb::LanguageType lang_type : Language::GetSupportedLanguages()) {
+    if (LanguageRuntime *runtime = GetLanguageRuntime(lang_type, retry_if_null))
+      language_runtimes.emplace_back(runtime);
+  }
+
+  return language_runtimes;
+}
+
 LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language,
                                              bool retry_if_null) {
   if (m_finalizing)
diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp
index 39086529c114e..f248e6b525ef0 100644
--- a/lldb/source/Target/Thread.cpp
+++ b/lldb/source/Target/Thread.cpp
@@ -2211,11 +2211,9 @@ ValueObjectSP Thread::GetCurrentException() {
 
   // NOTE: Even though this behavior is generalized, only ObjC is actually
   // supported at the moment.
-  for (unsigned lang = eLanguageTypeUnknown; lang < eNumLanguageTypes; lang++) {
-    if (auto runtime = GetProcess()->GetLanguageRuntime(
-            static_cast<lldb::LanguageType>(lang)))
-      if (auto e = runtime->GetExceptionObjectForThread(shared_from_this()))
-        return e;
+  for (LanguageRuntime *runtime : GetProcess()->GetLanguageRuntimes()) {
+    if (auto e = runtime->GetExceptionObjectForThread(shared_from_this()))
+      return e;
   }
 
   return ValueObjectSP();
@@ -2228,11 +2226,9 @@ ThreadSP Thread::GetCurrentExceptionBacktrace() {
 
   // NOTE: Even though this behavior is generalized, only ObjC is actually
   // supported at the moment.
-  for (unsigned lang = eLanguageTypeUnknown; lang < eNumLanguageTypes; lang++) {
-    if (auto runtime = GetProcess()->GetLanguageRuntime(
-            static_cast<lldb::LanguageType>(lang)))
-      if (auto bt = runtime->GetBacktraceThreadFromException(exception))
-        return bt;
+  for (LanguageRuntime *runtime : GetProcess()->GetLanguageRuntimes()) {
+    if (auto bt = runtime->GetBacktraceThreadFromException(exception))
+      return bt;
   }
 
   return ThreadSP();

From 9ee26c8d5f049f0f5fc99944a75e4900d4ae3110 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:17:18 +0000
Subject: [PATCH 0528/1176] [analyzer][AST] print() JSONify: Stmt
 implementation

Summary:
This patch also adds a function called `JsonFormat()` which:
- Flattens the string so removes the new-lines.
- Escapes double quotes.

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: cfe-commits, szepet, rnkovacs, a.sidorin, mikhail.ramalho,
             donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62494

llvm-svn: 362000
---
 clang/include/clang/AST/Stmt.h                |  4 ++
 clang/include/clang/Basic/JsonSupport.h       | 36 ++++++++++++++
 clang/lib/AST/StmtPrinter.cpp                 | 20 ++++++--
 clang/lib/Analysis/ProgramPoint.cpp           | 49 +++++++++----------
 clang/lib/StaticAnalyzer/Core/Environment.cpp | 12 +----
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  | 12 +----
 6 files changed, 79 insertions(+), 54 deletions(-)

diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 8834a60cd6c99..77b2173fcb878 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -1100,6 +1100,10 @@ class alignas(void *) Stmt {
                    StringRef NewlineSymbol = "\n",
                    const ASTContext *Context = nullptr) const;
 
+  /// Pretty-prints in JSON format.
+  void printJson(raw_ostream &Out, PrinterHelper *Helper,
+                 const PrintingPolicy &Policy, bool AddQuotes) const;
+
   /// viewAST - Visualize an AST rooted at this Stmt* using GraphViz.  Only
   ///   works on systems with GraphViz (Mac OS X) or dot+gv installed.
   void viewAST() const;
diff --git a/clang/include/clang/Basic/JsonSupport.h b/clang/include/clang/Basic/JsonSupport.h
index 43fb48a3c7759..f235daa1689e7 100644
--- a/clang/include/clang/Basic/JsonSupport.h
+++ b/clang/include/clang/Basic/JsonSupport.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_BASIC_JSONSUPPORT_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 
 
@@ -22,6 +23,41 @@ inline raw_ostream &Indent(raw_ostream &Out, const unsigned int Space,
   return Out;
 }
 
+inline std::string JsonFormat(StringRef RawSR, bool AddQuotes) {
+  if (RawSR.empty())
+    return "null";
+
+  // Trim special characters.
+  std::string Str = RawSR.trim().str();
+  size_t Pos = 0;
+
+  // Escape double quotes.
+  while (true) {
+    Pos = Str.find('\"', Pos);
+    if (Pos == std::string::npos)
+      break;
+
+    // Prevent bad conversions.
+    size_t TempPos = (Pos != 0) ? Pos - 1 : 0;
+
+    // See whether the current double quote is escaped.
+    if (TempPos != Str.find("\\\"", TempPos)) {
+      Str.insert(Pos, "\\");
+      ++Pos; // As we insert the escape-character move plus one.
+    }
+
+    ++Pos;
+  }
+
+  // Remove new-lines.
+  Str.erase(std::remove(Str.begin(), Str.end(), '\n'), Str.end());
+
+  if (!AddQuotes)
+    return Str;
+
+  return '\"' + Str + '\"';
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_JSONSUPPORT_H
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index b06edb4b6db15..7fe0be5217dbc 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -36,6 +36,7 @@
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/ExpressionTraits.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Lambda.h"
 #include "clang/Basic/OpenMPKinds.h"
@@ -2395,12 +2396,21 @@ void Stmt::dumpPretty(const ASTContext &Context) const {
   printPretty(llvm::errs(), nullptr, PrintingPolicy(Context.getLangOpts()));
 }
 
-void Stmt::printPretty(raw_ostream &OS, PrinterHelper *Helper,
+void Stmt::printPretty(raw_ostream &Out, PrinterHelper *Helper,
                        const PrintingPolicy &Policy, unsigned Indentation,
-                       StringRef NL,
-                       const ASTContext *Context) const {
-  StmtPrinter P(OS, Helper, Policy, Indentation, NL, Context);
-  P.Visit(const_cast<Stmt*>(this));
+                       StringRef NL, const ASTContext *Context) const {
+  StmtPrinter P(Out, Helper, Policy, Indentation, NL, Context);
+  P.Visit(const_cast<Stmt *>(this));
+}
+
+void Stmt::printJson(raw_ostream &Out, PrinterHelper *Helper,
+                     const PrintingPolicy &Policy, bool AddQuotes) const {
+  std::string Buf;
+  llvm::raw_string_ostream TempOut(Buf);
+
+  printPretty(TempOut, Helper, Policy);
+
+  Out << JsonFormat(TempOut.str(), AddQuotes);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index 482c2770a3109..fb29300f520f1 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -46,8 +46,8 @@ LLVM_DUMP_METHOD void ProgramPoint::dump() const {
   return printJson(llvm::errs());
 }
 
-static void printLocation(raw_ostream &Out, SourceLocation Loc,
-                          const SourceManager &SM) {
+static void printLocJson(raw_ostream &Out, SourceLocation Loc,
+                         const SourceManager &SM) {
   Out << "\"location\": ";
   if (!Loc.isFileID()) {
     Out << "null";
@@ -62,6 +62,8 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
   const ASTContext &Context =
       getLocationContext()->getAnalysisDeclContext()->getASTContext();
   const SourceManager &SM = Context.getSourceManager();
+  const PrintingPolicy &PP = Context.getPrintingPolicy();
+  const bool AddQuotes = true;
 
   Out << "\"kind\": \"";
   switch (getKind()) {
@@ -78,9 +80,8 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
         << ", \"stmt_id\": ";
 
     if (const ReturnStmt *RS = FEP->getStmt()) {
-      Out << RS->getID(Context) << ", \"stmt\": \"";
-      RS->printPretty(Out, /*Helper=*/nullptr, Context.getPrintingPolicy());
-      Out << '\"';
+      Out << RS->getID(Context) << ", \"stmt\": ";
+      RS->printJson(Out, nullptr, PP, AddQuotes);
     } else {
       Out << "null, \"stmt\": null";
     }
@@ -118,7 +119,7 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
     Out << "PreCall\", \"stmt\": \"";
     PC.getDecl()->print(Out, Context.getLangOpts());
     Out << "\", ";
-    printLocation(Out, PC.getLocation(), SM);
+    printLocJson(Out, PC.getLocation(), SM);
     break;
   }
 
@@ -127,7 +128,7 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
     Out << "PostCall\", \"stmt\": \"";
     PC.getDecl()->print(Out, Context.getLangOpts());
     Out << "\", ";
-    printLocation(Out, PC.getLocation(), SM);
+    printLocJson(Out, PC.getLocation(), SM);
     break;
   }
 
@@ -157,23 +158,26 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
 
     E.getSrc()->printTerminator(Out, Context.getLangOpts());
     Out << "\", ";
-    printLocation(Out, T->getBeginLoc(), SM);
-    Out << ", \"term_kind\": \"";
+    printLocJson(Out, T->getBeginLoc(), SM);
 
+    Out << ", \"term_kind\": \"";
     if (isa<SwitchStmt>(T)) {
       Out << "SwitchStmt\", \"case\": ";
       if (const Stmt *Label = E.getDst()->getLabel()) {
         if (const auto *C = dyn_cast<CaseStmt>(Label)) {
           Out << "{ \"lhs\": ";
-          if (const Stmt *LHS = C->getLHS())
-            LHS->printPretty(Out, nullptr, Context.getPrintingPolicy());
-          else
+          if (const Stmt *LHS = C->getLHS()) {
+            LHS->printJson(Out, nullptr, PP, AddQuotes);
+          } else {
             Out << "null";
+	  }
+
           Out << ", \"rhs\": ";
-          if (const Stmt *RHS = C->getRHS())
-            RHS->printPretty(Out, nullptr, Context.getPrintingPolicy());
-          else
+          if (const Stmt *RHS = C->getRHS()) {
+            RHS->printJson(Out, nullptr, PP, AddQuotes);
+          } else {
             Out << "null";
+          }
           Out << " }";
         } else {
           assert(isa<DefaultStmt>(Label));
@@ -196,23 +200,14 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
     const Stmt *S = castAs<StmtPoint>().getStmt();
     assert(S != nullptr && "Expecting non-null Stmt");
 
-    llvm::SmallString<256> TempBuf;
-    llvm::raw_svector_ostream TempOut(TempBuf);
-
     Out << "Statement\", \"stmt_kind\": \"" << S->getStmtClassName()
         << "\", \"stmt_id\": " << S->getID(Context)
         << ", \"pointer\": \"" << (const void *)S << "\", \"pretty\": ";
 
-    // See whether the current statement is pretty-printable.
-    S->printPretty(TempOut, /*Helper=*/nullptr, Context.getPrintingPolicy());
-    if (!TempBuf.empty()) {
-      Out << '\"' << TempBuf.str().trim() << "\", ";
-      TempBuf.clear();
-    } else {
-      Out << "null, ";
-    }
+    S->printJson(Out, nullptr, PP, AddQuotes);
 
-    printLocation(Out, S->getBeginLoc(), SM);
+    Out << ", ";
+    printLocJson(Out, S->getBeginLoc(), SM);
 
     Out << ", \"stmt_point_kind\": ";
     if (getAs<PreStmt>())
diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp
index 6344bc5a4d6f2..df2402ba017c6 100644
--- a/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -235,9 +235,6 @@ void Environment::printJson(raw_ostream &Out, const ASTContext &Ctx,
     bool HasItem = false;
     unsigned int InnerSpace = Space + 1;
 
-    llvm::SmallString<256> TempBuf;
-    llvm::raw_svector_ostream TempOut(TempBuf);
-
     // Store the last ExprBinding which we will print.
     BindingsTy::iterator LastI = ExprBindings.end();
     for (BindingsTy::iterator I = ExprBindings.begin(); I != ExprBindings.end();
@@ -266,14 +263,7 @@ void Environment::printJson(raw_ostream &Out, const ASTContext &Ctx,
           << "{ \"lctx_id\": " << LC->getID()
           << ", \"stmt_id\": " << S->getID(Ctx) << ", \"pretty\": ";
 
-      // See whether the current statement is pretty-printable.
-      S->printPretty(TempOut, /*Helper=*/nullptr, PP);
-      if (!TempBuf.empty()) {
-        Out << '\"' << TempBuf.str().trim() << '\"';
-        TempBuf.clear();
-      } else {
-        Out << "null";
-      }
+      S->printJson(Out, nullptr, PP, /*AddQuotes=*/true);
 
       Out << ", \"value\": \"" << I->second << "\" }";
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index e87b6535c1e95..9a9b12dc60027 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -170,17 +170,7 @@ class ConstructedObjectKey {
     Out << ", \"pretty\": ";
 
     if (S) {
-      llvm::SmallString<256> TempBuf;
-      llvm::raw_svector_ostream TempOut(TempBuf);
-
-      // See whether the current statement is pretty-printable.
-      S->printPretty(TempOut, Helper, PP);
-      if (!TempBuf.empty()) {
-        Out << '\"' << TempBuf.str().trim() << '\"';
-        TempBuf.clear();
-      } else {
-        Out << "null";
-      }
+      S->printJson(Out, Helper, PP, /*AddQuotes=*/true);
     } else {
       Out << '\"' << I->getAnyMember()->getNameAsString() << '\"';
     }

From d5443f8c21b11cfa8214b192a53f24cda14230a3 Mon Sep 17 00:00:00 2001
From: Aakanksha Patil <aakanksha555@gmail.com>
Date: Wed, 29 May 2019 18:20:11 +0000
Subject: [PATCH 0529/1176] AMDGPU: Return address lowering

The patch computes the return address for the current function.

Differential revision: https://reviews.llvm.org/D59666

llvm-svn: 362001
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 +++++++++
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |  2 +-
 llvm/test/CodeGen/AMDGPU/returnaddress.ll | 65 +++++++++++++++++++++++
 3 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/returnaddress.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c860d3ae06c48..ac90399b980e2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3695,6 +3695,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
   case ISD::LOAD: {
     SDValue Result = LowerLOAD(Op, DAG);
     assert((!Result.getNode() ||
@@ -4153,6 +4154,31 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+  SDLoc DL(Op);
+  // Checking the depth
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
+    return DAG.getConstant(0, DL, VT);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  // Check for kernel and shader functions
+  if (Info->isEntryFunction())
+    return DAG.getConstant(0, DL, VT);
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  // There is a call to @llvm.returnaddress in this function
+  MFI.setReturnAddressIsTaken(true);
+
+  const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+  // Get the return address reg and mark it as an implicit live-in
+  unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
+
+  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+}
+
 SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
                                             SDValue Op,
                                             const SDLoc &DL,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 094a0b054e235..a63d75184b32d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -89,7 +89,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-
+  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
                               SelectionDAG &DAG, ArrayRef<SDValue> Ops,
                               bool IsIntrinsic = false) const;
diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress.ll b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
new file mode 100644
index 0000000000000..1db6e3e0a8575
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; Test with zero frame
+; GCN-LABEL: {{^}}func1
+; GCN: v_mov_b32_e32 v0, s30
+; GCN: v_mov_b32_e32 v1, s31
+; GCN: s_setpc_b64 s[30:31]
+define i8* @func1() nounwind {
+entry:
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+; Test with non-zero frame
+; GCN-LABEL: {{^}}func2
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, 0
+; GCN: s_setpc_b64 s[30:31]
+define i8* @func2() nounwind {
+entry:
+  %0 = tail call i8* @llvm.returnaddress(i32 1)
+  ret i8* %0
+}
+
+; Test with amdgpu_kernel
+; GCN-LABEL: {{^}}func3
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, v0
+define amdgpu_kernel void @func3(i8** %out) nounwind {
+entry:
+  %tmp = tail call i8* @llvm.returnaddress(i32 0)
+  store i8* %tmp, i8** %out, align 4
+  ret void
+}
+
+; Test with use outside the entry-block
+; GCN-LABEL: {{^}}func4
+; GCN: v_mov_b32_e32 v0, 0
+; GCN: v_mov_b32_e32 v1, v0
+define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind {
+entry:
+  %cmp = icmp ne i32 %val, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  %tmp = tail call i8* @llvm.returnaddress(i32 1)
+  store i8* %tmp, i8** %out, align 4
+  ret void
+
+exit:
+  ret void
+}
+
+; Test ending in unreachable
+; GCN-LABEL: {{^}}func5
+; GCN: v_mov_b32_e32 v0, 0
+define void @func5() nounwind {
+entry:
+  %tmp = tail call i8* @llvm.returnaddress(i32 2)
+  store volatile i32 0, i32 addrspace(3)* undef, align 4
+  unreachable
+}
+
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone

From 02be650617a5c8812e5c4696bcbbab284fe83acd Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:21:14 +0000
Subject: [PATCH 0530/1176] [analyzer] print() JSONify: Decl revision

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: cfe-commits, szepet, rnkovacs, a.sidorin, mikhail.ramalho,
             donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62495

llvm-svn: 362002
---
 clang/lib/Analysis/ProgramPoint.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index fb29300f520f1..32ae439c8ca5d 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -116,18 +116,16 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
 
   case ProgramPoint::PreImplicitCallKind: {
     ImplicitCallPoint PC = castAs<ImplicitCallPoint>();
-    Out << "PreCall\", \"stmt\": \"";
-    PC.getDecl()->print(Out, Context.getLangOpts());
-    Out << "\", ";
+    Out << "PreCall\", \"decl\": \""
+        << PC.getDecl()->getAsFunction()->getQualifiedNameAsString() << "\", ";
     printLocJson(Out, PC.getLocation(), SM);
     break;
   }
 
   case ProgramPoint::PostImplicitCallKind: {
     ImplicitCallPoint PC = castAs<ImplicitCallPoint>();
-    Out << "PostCall\", \"stmt\": \"";
-    PC.getDecl()->print(Out, Context.getLangOpts());
-    Out << "\", ";
+    Out << "PostCall\", \"decl\": \""
+        << PC.getDecl()->getAsFunction()->getQualifiedNameAsString() << "\", ";
     printLocJson(Out, PC.getLocation(), SM);
     break;
   }

From dea605e0907804fd63365ac0273e8a39781fe25d Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:29:31 +0000
Subject: [PATCH 0531/1176] [analyzer] print() JSONify: CFG implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: cfe-commits, szepet, rnkovacs, a.sidorin, mikhail.ramalho,
             donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62496

llvm-svn: 362003
---
 clang/include/clang/Analysis/CFG.h  |  4 ++++
 clang/lib/Analysis/CFG.cpp          | 14 +++++++++++++-
 clang/lib/Analysis/ProgramPoint.cpp | 13 ++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 734c49881e3e5..d8b3d6ff71ede 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -882,7 +882,11 @@ class CFGBlock {
   void dump(const CFG *cfg, const LangOptions &LO, bool ShowColors = false) const;
   void print(raw_ostream &OS, const CFG* cfg, const LangOptions &LO,
              bool ShowColors) const;
+
   void printTerminator(raw_ostream &OS, const LangOptions &LO) const;
+  void printTerminatorJson(raw_ostream &Out, const LangOptions &LO,
+                           bool AddQuotes) const;
+  
   void printAsOperand(raw_ostream &OS, bool /*PrintType*/) {
     OS << "BB#" << getBlockID();
   }
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 5d50cfb474e19..1d83359341528 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -27,10 +27,11 @@
 #include "clang/AST/StmtObjC.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/AST/Type.h"
-#include "clang/Analysis/Support/BumpVector.h"
 #include "clang/Analysis/ConstructionContext.h"
+#include "clang/Analysis/Support/BumpVector.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/ExceptionSpecificationType.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
@@ -5561,6 +5562,17 @@ void CFGBlock::printTerminator(raw_ostream &OS,
   TPrinter.print(getTerminator());
 }
 
+/// printTerminatorJson - Pretty-prints the terminator in JSON format.
+void CFGBlock::printTerminatorJson(raw_ostream &Out, const LangOptions &LO,
+                                   bool AddQuotes) const {
+  std::string Buf;
+  llvm::raw_string_ostream TempOut(Buf);
+
+  printTerminator(TempOut, LO);
+
+  Out << JsonFormat(TempOut.str(), AddQuotes);
+}
+
 Stmt *CFGBlock::getTerminatorCondition(bool StripParens) {
   Stmt *Terminator = getTerminatorStmt();
   if (!Terminator)
diff --git a/clang/lib/Analysis/ProgramPoint.cpp b/clang/lib/Analysis/ProgramPoint.cpp
index 32ae439c8ca5d..7e05706e48714 100644
--- a/clang/lib/Analysis/ProgramPoint.cpp
+++ b/clang/lib/Analysis/ProgramPoint.cpp
@@ -149,13 +149,16 @@ void ProgramPoint::printJson(llvm::raw_ostream &Out, const char *NL) const {
     const BlockEdge &E = castAs<BlockEdge>();
     const Stmt *T = E.getSrc()->getTerminatorStmt();
     Out << "Edge\", \"src_id\": " << E.getSrc()->getBlockID()
-        << ", \"dst_id\": " << E.getDst()->getBlockID()
-        << ", \"terminator\": " << (!T ? "null, \"term_kind\": null" : "\"");
-    if (!T)
+        << ", \"dst_id\": " << E.getDst()->getBlockID() << ", \"terminator\": ";
+
+    if (!T) {
+      Out << "null, \"term_kind\": null";
       break;
+    }
 
-    E.getSrc()->printTerminator(Out, Context.getLangOpts());
-    Out << "\", ";
+    E.getSrc()->printTerminatorJson(Out, Context.getLangOpts(),
+                                    /*AddQuotes=*/true);
+    Out << ", ";
     printLocJson(Out, T->getBeginLoc(), SM);
 
     Out << ", \"term_kind\": \"";

From 5458cd4027f534a9c02ade8fadb66701c47366be Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Wed, 29 May 2019 18:31:50 +0000
Subject: [PATCH 0532/1176] [WebAssembly] Support VPtr sanitizer for Emscripten

Summary:
After https://github.com/emscripten-core/emscripten/pull/8651, Emscripten
supports the full UBSan runtime. This includes the VPtr sanitizer.

This diff allows clang to generate code that uses the VPtr sanitizer for
Emscripten.

Patch by Guanzhong Chen

Reviewers: tlively, aheejin

Reviewed By: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, sunfish, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62559

llvm-svn: 362004
---
 clang/lib/Driver/ToolChains/WebAssembly.cpp  |  8 ++++++++
 clang/lib/Driver/ToolChains/WebAssembly.h    |  1 +
 clang/test/CodeGenCXX/wasm-sanitize-vptr.cpp | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/wasm-sanitize-vptr.cpp

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp
index af6e856b9f9a2..7fffbbe6bf594 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.cpp
+++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp
@@ -208,6 +208,14 @@ void WebAssembly::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
   }
 }
 
+SanitizerMask WebAssembly::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  if (getTriple().isOSEmscripten()) {
+    Res |= SanitizerKind::Vptr;
+  }
+  return Res;
+}
+
 Tool *WebAssembly::buildLinker() const {
   return new tools::wasm::Linker(*this);
 }
diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h
index 8e4e545c98511..67d5fce845765 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -66,6 +66,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
       llvm::opt::ArgStringList &CC1Args) const override;
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
+  SanitizerMask getSupportedSanitizers() const override;
 
   const char *getDefaultLinker() const override { return "wasm-ld"; }
 
diff --git a/clang/test/CodeGenCXX/wasm-sanitize-vptr.cpp b/clang/test/CodeGenCXX/wasm-sanitize-vptr.cpp
new file mode 100644
index 0000000000000..2a9055aea1ef8
--- /dev/null
+++ b/clang/test/CodeGenCXX/wasm-sanitize-vptr.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++11 -fsanitize=vptr -emit-llvm %s -o - -triple wasm32-unknown-emscripten | FileCheck %s
+
+struct S {
+  virtual ~S() {}
+  int a;
+};
+
+struct T : S {
+  int b;
+};
+
+// CHECK-LABEL: @_Z15bad_static_castv
+void bad_static_cast() {
+  S s;
+  // CHECK: br i1 %[[NONNULL:.*]], label %[[CONT:.*]], label %[[MISS:.*]], !prof
+  // CHECK: [[MISS]]:
+  // CHECK: call void @__ubsan_handle_dynamic_type_cache_miss_abort
+  // CHECK: [[CONT]]:
+  T &r = static_cast<T &>(s);
+}

From 5feead5752e060327207c7f56666165fdc4edb94 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Wed, 29 May 2019 18:36:54 +0000
Subject: [PATCH 0533/1176] Fix an unused-variable error.

llvm-svn: 362005
---
 clang/lib/StaticAnalyzer/Core/Environment.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp
index df2402ba017c6..3ebb66c6af10c 100644
--- a/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -248,6 +248,7 @@ void Environment::printJson(raw_ostream &Out, const ASTContext &Ctx,
       }
 
       const Stmt *S = I->first.getStmt();
+      (void)S;
       assert(S != nullptr && "Expected non-null Stmt");
 
       LastI = I;

From 5382803b048329627c8b1588aeffe177edb8825f Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Wed, 29 May 2019 18:37:13 +0000
Subject: [PATCH 0534/1176] [InstCombine] Optimize always overflowing signed
 saturating add/sub

Based on the overflow direction information added in D62463, we can
now fold always overflowing signed saturating add/sub to signed min/max.

Differential Revision: https://reviews.llvm.org/D62544

llvm-svn: 362006
---
 .../InstCombine/InstCombineCalls.cpp          | 20 +++++++++++--------
 .../InstCombine/saturating-add-sub.ll         | 20 ++++---------------
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9c6297e4c1721..39aae2f2e140b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -13,6 +13,7 @@
 #include "InstCombineInternal.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
@@ -2053,6 +2054,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::usub_sat:
   case Intrinsic::ssub_sat: {
     SaturatingInst *SI = cast<SaturatingInst>(II);
+    Type *Ty = SI->getType();
     Value *Arg0 = SI->getLHS();
     Value *Arg1 = SI->getRHS();
 
@@ -2067,14 +2069,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
         else
           return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
-      case OverflowResult::AlwaysOverflowsLow:
-        if (SI->isSigned()) break; // TODO: Support signed.
-        return replaceInstUsesWith(*SI,
-                                   ConstantInt::getNullValue(II->getType()));
-      case OverflowResult::AlwaysOverflowsHigh:
-        if (SI->isSigned()) break; // TODO: Support signed.
-        return replaceInstUsesWith(*SI,
-                                   ConstantInt::getAllOnesValue(II->getType()));
+      case OverflowResult::AlwaysOverflowsLow: {
+        unsigned BitWidth = Ty->getScalarSizeInBits();
+        APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
+        return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
+      }
+      case OverflowResult::AlwaysOverflowsHigh: {
+        unsigned BitWidth = Ty->getScalarSizeInBits();
+        APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
+        return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
+      }
     }
 
     // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 26374a501e744..fa0cc56447cee 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -341,10 +341,7 @@ define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) {
 
 define i8 @test_scalar_sadd_always_overflows_low(i8 %a) {
 ; CHECK-LABEL: @test_scalar_sadd_always_overflows_low(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], -120
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i8 [[A]], i8 -120
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[MIN]], i8 -10)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 -128
 ;
   %cmp = icmp slt i8 %a, -120
   %min = select i1 %cmp, i8 %a, i8 -120
@@ -354,10 +351,7 @@ define i8 @test_scalar_sadd_always_overflows_low(i8 %a) {
 
 define i8 @test_scalar_sadd_always_overflows_high(i8 %a) {
 ; CHECK-LABEL: @test_scalar_sadd_always_overflows_high(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], 120
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i8 [[A]], i8 120
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[MAX]], i8 10)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 127
 ;
   %cmp = icmp sgt i8 %a, 120
   %max = select i1 %cmp, i8 %a, i8 120
@@ -829,10 +823,7 @@ define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) {
 
 define i8 @test_scalar_ssub_always_overflows_low(i8 %a) {
 ; CHECK-LABEL: @test_scalar_ssub_always_overflows_low(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A:%.*]], 120
-; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i8 [[A]], i8 120
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 -10, i8 [[MAX]])
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 -128
 ;
   %cmp = icmp sgt i8 %a, 120
   %max = select i1 %cmp, i8 %a, i8 120
@@ -842,10 +833,7 @@ define i8 @test_scalar_ssub_always_overflows_low(i8 %a) {
 
 define i8 @test_scalar_ssub_always_overflows_high(i8 %a) {
 ; CHECK-LABEL: @test_scalar_ssub_always_overflows_high(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A:%.*]], -120
-; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i8 [[A]], i8 -120
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 10, i8 [[MIN]])
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 127
 ;
   %cmp = icmp slt i8 %a, -120
   %min = select i1 %cmp, i8 %a, i8 -120

From 96c500aab4f6ce8c10b26e1b2a45db21cfcd07b8 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Wed, 29 May 2019 18:37:49 +0000
Subject: [PATCH 0535/1176] [CMake] [Runtimes] Set *_STANDALONE_BUILD

Summary:
The runtimes use `*_STANDALONE_BUILD=OFF` to signify that clang is an in-tree target. This is not the case with the runtime builds, so we really need this set to `ON`.

In order to resolve the issues phosek was having with checks, we should use checks that don't link. We can use compiler-rt's `try_compile_only` as a basis for that.

This patch is *required* to be able to run the runtime libraries check-* targets.

Reviewers: smeenai, phosek, compnerd

Reviewed By: phosek

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62410

llvm-svn: 362007
---
 llvm/runtimes/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index acf4f3e3e57ae..51ad2371f44f6 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -128,6 +128,12 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
     string(REPLACE "-" "_" canon_name ${projName})
     string(TOUPPER ${canon_name} canon_name)
 
+    # The subdirectories need to treat this as standalone builds. D57992 tried
+    # to get rid of this, but the runtimes treat *_STANDALONE_BUILD=OFF as if
+    # llvm & clang are configured in the same CMake, and setup dependencies
+    # against their targets.
+    set(${canon_name}_STANDALONE_BUILD ON)
+
     if(LLVM_RUNTIMES_LIBDIR_SUBDIR)
       set(${canon_name}_LIBDIR_SUBDIR "${LLVM_RUNTIMES_LIBDIR_SUBDIR}" CACHE STRING "" FORCE)
     endif()

From 8a88d6aaaa7e2a56a3df5dffb795d305361ee9b9 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:38:52 +0000
Subject: [PATCH 0536/1176] [analyzer] print() JSONify: SVal implementation

Summary: -

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Reviewed By: NoQ

Subscribers: cfe-commits, szepet, rnkovacs, a.sidorin, mikhail.ramalho,
             Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62497

llvm-svn: 362008
---
 .../clang/StaticAnalyzer/Core/PathSensitive/SVals.h    |  3 +++
 clang/lib/StaticAnalyzer/Core/Environment.cpp          |  6 ++++--
 clang/lib/StaticAnalyzer/Core/SVals.cpp                | 10 ++++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
index 8861f1504814d..1abe297820886 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h
@@ -190,6 +190,9 @@ class SVal {
 
   const MemRegion *getAsRegion() const;
 
+  /// printJson - Pretty-prints in JSON format.
+  void printJson(raw_ostream &Out, bool AddQuotes) const;
+
   void dumpToStream(raw_ostream &OS) const;
   void dump() const;
 
diff --git a/clang/lib/StaticAnalyzer/Core/Environment.cpp b/clang/lib/StaticAnalyzer/Core/Environment.cpp
index 3ebb66c6af10c..94cc4d6dbb2fd 100644
--- a/clang/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Environment.cpp
@@ -263,10 +263,12 @@ void Environment::printJson(raw_ostream &Out, const ASTContext &Ctx,
       Indent(Out, InnerSpace, IsDot)
           << "{ \"lctx_id\": " << LC->getID()
           << ", \"stmt_id\": " << S->getID(Ctx) << ", \"pretty\": ";
-
       S->printJson(Out, nullptr, PP, /*AddQuotes=*/true);
 
-      Out << ", \"value\": \"" << I->second << "\" }";
+      Out << ", \"value\": ";
+      I->second.printJson(Out, /*AddQuotes=*/true);
+
+      Out << " }";
 
       if (I != LastI)
         Out << ',';
diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp
index b3c83e7792da3..9b5de6c3eb92b 100644
--- a/clang/lib/StaticAnalyzer/Core/SVals.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
+#include "clang/Basic/JsonSupport.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
@@ -283,6 +284,15 @@ SVal loc::ConcreteInt::evalBinOp(BasicValueFactory& BasicVals,
 
 LLVM_DUMP_METHOD void SVal::dump() const { dumpToStream(llvm::errs()); }
 
+void SVal::printJson(raw_ostream &Out, bool AddQuotes) const {
+  std::string Buf;
+  llvm::raw_string_ostream TempOut(Buf);
+
+  dumpToStream(TempOut);
+
+  Out << JsonFormat(TempOut.str(), AddQuotes);
+}
+
 void SVal::dumpToStream(raw_ostream &os) const {
   switch (getBaseKind()) {
     case UnknownValKind:

From e8b29c00dd0724c5dc06e981b2dc4fb391784178 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Wed, 29 May 2019 18:49:31 +0000
Subject: [PATCH 0537/1176] [analyzer] SATestBuild.py: Use driver for analyzing
 single-file tests.

Don't bother coming up with a -cc1 run-line ourselves.

This, in particular, gets rid of a macOS-specific code path.

llvm-svn: 362009
---
 clang/utils/analyzer/SATestBuild.py | 15 +++++----------
 clang/utils/analyzer/SATestUtils.py | 14 +-------------
 2 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index 1c96cd8838182..691ded80a1ef1 100755
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -343,18 +343,13 @@ def runAnalyzePreprocessed(Args, Dir, SBOutputDir, Mode):
                 BuildScript))
         raise Exception()
 
-    CmdPrefix = Clang + " -cc1 "
+    CmdPrefix = Clang + " --analyze "
 
-    # For now, we assume the preprocessed files should be analyzed
-    # with the OS X SDK.
-    SDKPath = SATestUtils.getSDKPath("macosx")
-    if SDKPath is not None:
-        CmdPrefix += "-isysroot " + SDKPath + " "
-
-    CmdPrefix += "-analyze -analyzer-output=plist -w "
-    CmdPrefix += "-analyzer-checker=" + Checkers
+    CmdPrefix += "--analyzer-output plist "
+    CmdPrefix += " -Xclang -analyzer-checker=" + Checkers
     CmdPrefix += " -fcxx-exceptions -fblocks "
-    CmdPrefix += " -analyzer-config %s " % generateAnalyzerConfig(Args)
+    CmdPrefix += " -Xclang -analyzer-config -Xclang %s "\
+        % generateAnalyzerConfig(Args)
 
     if (Mode == 2):
         CmdPrefix += "-std=c++11 "
diff --git a/clang/utils/analyzer/SATestUtils.py b/clang/utils/analyzer/SATestUtils.py
index 2320652619ed3..0ed4a4b02f793 100644
--- a/clang/utils/analyzer/SATestUtils.py
+++ b/clang/utils/analyzer/SATestUtils.py
@@ -1,5 +1,5 @@
 import os
-from subprocess import check_output, check_call
+from subprocess import check_call
 import sys
 
 
@@ -47,18 +47,6 @@ def isValidSingleInputFile(FileName):
     return Ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 
 
-def getSDKPath(SDKName):
-    """
-    Get the path to the SDK for the given SDK name. Returns None if
-    the path cannot be determined.
-    """
-    if which("xcrun") is None:
-        return None
-
-    Cmd = "xcrun --sdk " + SDKName + " --show-sdk-path"
-    return check_output(Cmd, shell=True).rstrip()
-
-
 def runScript(ScriptPath, PBuildLogFile, Cwd, Stdout=sys.stdout,
               Stderr=sys.stderr):
     """

From 4dd6a82e2646da2dd6df444a9c3a837a56724469 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 29 May 2019 18:54:28 +0000
Subject: [PATCH 0538/1176] mac: Make ubsan test config look more like asan
 test config

In particular, don't call get_target_flags_for_arch() since that
will cause an error in some situations:

If DARWIN_iossim_ARCHS=i386;x86_64, DARWIN_osx_ARCHS=x86_64, and
DARWIN_iossym_SYSROOT isn't set (due to the simulator sysroot not being
available), then config-ix.cmake won't add i386 to COMPILER_RT_SUPPORTED_ARCH
but ubsan's test/CMakeLists.txt would call get_target_flags_for_arch()
with i386, which would then run into the error in
get_target_flags_for_arch().

Having these conditions isn't ideal. The background here is that we
configure our mac-hosted trunk bots all the same (so they all have the
same DARWIN_*_archs, and we don't easily know if a mac host bot is
targeting mac or ios at the place where we call cmake), but only the
ios-targeting bots have ios sysroots available.

This will hopefully unbreak that use case without impacting anything
else -- and it makes ubsan and asan test setup more alike.

llvm-svn: 362010
---
 compiler-rt/test/ubsan/CMakeLists.txt | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/compiler-rt/test/ubsan/CMakeLists.txt b/compiler-rt/test/ubsan/CMakeLists.txt
index 351bf3b829da3..60ef84d7530fb 100644
--- a/compiler-rt/test/ubsan/CMakeLists.txt
+++ b/compiler-rt/test/ubsan/CMakeLists.txt
@@ -113,10 +113,7 @@ if(APPLE)
   endif()
   foreach(platform ${UBSAN_APPLE_PLATFORMS})
     foreach(arch ${DARWIN_${platform}_ARCHS})
-      get_target_flags_for_arch(${arch} UBSAN_TEST_TARGET_ARCH_FLAGS_AS_LIST)
-      string(REPLACE ";" " " UBSAN_TEST_TARGET_ARCH_FLAGS "${UBSAN_TEST_TARGET_ARCH_FLAGS_AS_LIST}")
-      set(UBSAN_TEST_TARGET_CFLAGS
-        "${UBSAN_TEST_TARGET_ARCH_FLAGS} -isysroot ${DARWIN_${platform}_SYSROOT}")
+      set(UBSAN_TEST_TARGET_CFLAGS "-arch ${arch} -isysroot ${DARWIN_${platform}_SYSROOT}")
       if (";${UBSAN_SUPPORTED_ARCH};" MATCHES ";${arch};")
         add_ubsan_device_testsuite("Standalone" ubsan ${platform} ${arch})
       endif()

From ee37e28fd1c670ecabea64a15b9cc8698ca62b86 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 18:58:41 +0000
Subject: [PATCH 0539/1176] [analyzer] print() JSONify chain: Generic stmt_id

Summary: Some environment create less statements so make them generic.
llvm-svn: 362011
---
 clang/test/Analysis/dump_egraph.cpp   | 4 ++--
 clang/test/Analysis/expr-inspection.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index 5896f8666d68f..fd966cb78fd60 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -16,9 +16,9 @@ void foo() {
   T t;
 }
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": 1155, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": {{[0-9]+}}, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": 1092, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
+// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
 
 // CHECK: \"store\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 
diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c
index aec4c1ebaeb22..a1fd952b26d59 100644
--- a/clang/test/Analysis/expr-inspection.c
+++ b/clang/test/Analysis/expr-inspection.c
@@ -31,7 +31,7 @@ void foo(int x) {
 // CHECK-NEXT:   ],
 // CHECK-NEXT:   "environment": [
 // CHECK-NEXT:     { "location_context": "#0 Call", "calling": "foo", "call_line": null, "items": [
-// CHECK-NEXT:       { "lctx_id": 1, "stmt_id": 847, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
+// CHECK-NEXT:       { "lctx_id": 1, "stmt_id": {{[0-9]+}}, "pretty": "clang_analyzer_printState", "value": "&code{clang_analyzer_printState}" }
 // CHECK-NEXT:     ]}
 // CHECK-NEXT:   ],
 // CHECK-NEXT:   "constraints": [

From 6e07f16fae605c42014aa4f1f2babf3e7767c95c Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 29 May 2019 19:12:48 +0000
Subject: [PATCH 0540/1176] IR: add optional type to 'byval' function
 parameters

When we switch to opaque pointer types we will need some way to describe
how many bytes a 'byval' parameter should occupy on the stack. This adds
a (for now) optional extra type parameter.

If present, the type must match the pointee type of the argument.

Note to front-end maintainers: if this causes test failures, it's probably
because the "byval" attribute is printed after attributes without any parameter
after this change.

llvm-svn: 362012
---
 llvm/docs/LangRef.rst                         |   5 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |   1 +
 llvm/include/llvm/IR/Argument.h               |   3 +
 llvm/include/llvm/IR/Attributes.h             |  20 ++++
 llvm/include/llvm/IR/CallSite.h               |   5 +
 llvm/include/llvm/IR/Function.h               |   5 +
 llvm/include/llvm/IR/InstrTypes.h             |   5 +
 llvm/lib/AsmParser/LLParser.cpp               |  24 +++-
 llvm/lib/AsmParser/LLParser.h                 |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  50 +++++++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  15 ++-
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp   |   6 +-
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |   5 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |   8 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  18 ++-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   1 +
 llvm/lib/IR/AttributeImpl.h                   |  32 +++++-
 llvm/lib/IR/Attributes.cpp                    | 108 +++++++++++++++++-
 llvm/lib/IR/Function.cpp                      |   5 +
 llvm/lib/IR/Verifier.cpp                      |   5 +
 llvm/test/Assembler/byval-type-attr.ll        |  31 +++++
 llvm/test/Assembler/invalid-byval-type1.ll    |   4 +
 llvm/test/Assembler/invalid-byval-type2.ll    |   4 +
 llvm/test/Assembler/invalid-byval-type3.ll    |   4 +
 llvm/test/Bitcode/Inputs/byval-upgrade.bc     | Bin 0 -> 1092 bytes
 llvm/test/Bitcode/attributes-3.3.ll           |   2 +-
 llvm/test/Bitcode/attributes.ll               |   2 +-
 llvm/test/Bitcode/byval-upgrade.test          |   7 ++
 llvm/test/Bitcode/compatibility-3.6.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.7.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.8.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.9.ll        |   2 +-
 llvm/test/Bitcode/compatibility-4.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-5.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-6.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility.ll            |  11 +-
 llvm/test/Bitcode/highLevelStructure.3.2.ll   |   4 +-
 llvm/test/CodeGen/AArch64/byval-type.ll       |  37 ++++++
 .../test/Transforms/Inline/byval-tail-call.ll |   4 +-
 llvm/unittests/IR/AttributesTest.cpp          |  20 ++++
 40 files changed, 425 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/Assembler/byval-type-attr.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type1.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type2.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type3.ll
 create mode 100644 llvm/test/Bitcode/Inputs/byval-upgrade.bc
 create mode 100644 llvm/test/Bitcode/byval-upgrade.test
 create mode 100644 llvm/test/CodeGen/AArch64/byval-type.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 840272df33fd6..2105ce9d669f7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1017,7 +1017,7 @@ Currently, only the following parameter attributes are defined:
     opposed to memory, though some targets use it to distinguish between
     two different kinds of registers). Use of this attribute is
     target-specific.
-``byval``
+``byval`` or ``byval(<ty>)``
     This indicates that the pointer parameter should really be passed by
     value to the function. The attribute implies that a hidden copy of
     the pointee is made between the caller and the callee, so the callee
@@ -1029,6 +1029,9 @@ Currently, only the following parameter attributes are defined:
     ``byval`` parameters). This is not a valid attribute for return
     values.
 
+    The byval attribute also supports an optional type argument, which must be
+    the same as the pointee type of the argument.
+
     The byval attribute also supports specifying an alignment with the
     align attribute. It indicates the alignment of the stack slot to
     form and the known alignment of the pointer specified to the call
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cabca9cb22101..f06e01acae30c 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -188,6 +188,7 @@ class TargetLoweringBase {
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     uint16_t Alignment = 0;
+    Type *ByValType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 7997c863b8300..952fbcdffb142 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -78,6 +78,9 @@ class Argument final : public Value {
   /// If this is a byval or inalloca argument, return its alignment.
   unsigned getParamAlignment() const;
 
+  /// If this is a byval argument, return its type.
+  Type *getParamByValType() const;
+
   /// Return true if this argument has the nest attribute.
   bool hasNestAttr() const;
 
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 8d7f4018e846c..06cc09e1cfc79 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -90,6 +90,7 @@ class Attribute {
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
+  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
 
   /// Return a uniquified Attribute object that has the specific
   /// alignment set.
@@ -102,6 +103,7 @@ class Attribute {
   static Attribute getWithAllocSizeArgs(LLVMContext &Context,
                                         unsigned ElemSizeArg,
                                         const Optional<unsigned> &NumElemsArg);
+  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -117,6 +119,9 @@ class Attribute {
   /// attribute.
   bool isStringAttribute() const;
 
+  /// Return true if the attribute is a type attribute.
+  bool isTypeAttribute() const;
+
   /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
@@ -139,6 +144,10 @@ class Attribute {
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
+  /// Return the attribute's value as a Type. This requires the attribute to be
+  /// a type attribute.
+  Type *getValueAsType() const;
+
   /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
@@ -279,6 +288,7 @@ class AttributeSet {
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
   uint64_t getDereferenceableOrNullBytes() const;
+  Type *getByValType() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp = false) const;
 
@@ -598,6 +608,9 @@ class AttributeList {
   /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
+  /// Return the byval type for the specified function parameter.
+  Type *getParamByValType(unsigned ArgNo) const;
+
   /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
@@ -697,6 +710,7 @@ class AttrBuilder {
   uint64_t DerefBytes = 0;
   uint64_t DerefOrNullBytes = 0;
   uint64_t AllocSizeArgs = 0;
+  Type *ByValType = nullptr;
 
 public:
   AttrBuilder() = default;
@@ -772,6 +786,9 @@ class AttrBuilder {
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
+  /// Retrieve the byval type.
+  Type *getByValType() const { return ByValType; }
+
   /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -796,6 +813,9 @@ class AttrBuilder {
   AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg);
 
+  /// This turns a byval type into the form used internally in Attribute.
+  AttrBuilder &addByValAttr(Type *Ty);
+
   /// Add an allocsize attribute, using the representation returned by
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h
index 183e387a422e5..b47a96c5d5faa 100644
--- a/llvm/include/llvm/IR/CallSite.h
+++ b/llvm/include/llvm/IR/CallSite.h
@@ -415,6 +415,11 @@ class CallSiteBase {
     CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
+  }
+
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 613d21bc64d24..896c2189eb824 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -431,6 +431,11 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return AttributeSets.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 7ffa7a6f60e8f..6ce76811c0e66 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1560,6 +1560,11 @@ class CallBase : public Instruction {
     return Attrs.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return Attrs.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 95646675cb279..0a9a09e644d7a 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1601,7 +1601,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_byval: {
+      Type *Ty;
+      if (ParseByValWithOptionalType(Ty))
+        return true;
+      B.addByValAttr(Ty);
+      continue;
+    }
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -2454,6 +2460,22 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   return false;
 }
 
+/// ParseByValWithOptionalType
+///   ::= byval
+///   ::= byval(<ty>)
+bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+  Result = nullptr;
+  if (!EatIfPresent(lltok::kw_byval))
+    return true;
+  if (!EatIfPresent(lltok::lparen))
+    return false;
+  if (ParseType(Result))
+    return true;
+  if (!EatIfPresent(lltok::rparen))
+    return Error(Lex.getLoc(), "expected ')'");
+  return false;
+}
+
 /// ParseOptionalOperandBundles
 ///    ::= /*empty*/
 ///    ::= '[' OperandBundle [, OperandBundle ]* ']'
diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h
index ad169afb93584..ec4a61b5498d8 100644
--- a/llvm/lib/AsmParser/LLParser.h
+++ b/llvm/lib/AsmParser/LLParser.h
@@ -339,6 +339,7 @@ namespace llvm {
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
+    bool ParseByValWithOptionalType(Type *&Result);
 
     // Module Summary Index Parsing.
     bool SkipModuleSummaryEntry();
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 134ce0367031e..9f562ba82db93 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -638,6 +638,10 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
     return getFnValueByID(ValNo, Ty);
   }
 
+  /// Upgrades old-style typeless byval attributes by adding the corresponding
+  /// argument's pointee type.
+  void propagateByValTypes(CallBase *CB);
+
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
@@ -1492,6 +1496,12 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
+          // Upgrade old-style byval attribute to one with a type, even if it's
+          // nullptr. We will have to insert the real type when we associate
+          // this AttributeList with a function.
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(nullptr);
+
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
@@ -1507,9 +1517,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
-        } else {                     // String attribute
-          assert((Record[i] == 3 || Record[i] == 4) &&
-                 "Invalid attribute group entry");
+        } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
@@ -1527,6 +1535,15 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
+        } else {
+          assert((Record[i] == 5 || Record[i] == 6) &&
+                 "Invalid attribute group entry");
+          bool HasType = Record[i] == 6;
+          Attribute::AttrKind Kind;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
         }
       }
 
@@ -3028,6 +3045,17 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
+  // Upgrade any old-style byval without a type by propagating the argument's
+  // pointee type. There should be no opaque pointers where the byval type is
+  // implicit.
+  for (auto &Arg : Func->args()) {
+    if (Arg.hasByValAttr() && !Arg.getParamByValType()) {
+      Arg.removeAttr(Attribute::ByVal);
+      Arg.addAttr(Attribute::getWithByValType(
+          Context, Arg.getType()->getPointerElementType()));
+    }
+  }
+
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
@@ -3441,6 +3469,19 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   return Error::success();
 }
 
+void BitcodeReader::propagateByValTypes(CallBase *CB) {
+  for (unsigned i = 0; i < CB->getNumArgOperands(); ++i) {
+    if (CB->paramHasAttr(i, Attribute::ByVal) &&
+        !CB->getAttribute(i, Attribute::ByVal).getValueAsType()) {
+      CB->removeParamAttr(i, Attribute::ByVal);
+      CB->addParamAttr(
+          i, Attribute::getWithByValType(
+                 Context,
+                 CB->getArgOperand(i)->getType()->getPointerElementType()));
+    }
+  }
+}
+
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
@@ -4256,6 +4297,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
+
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4731,6 +4774,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8e1e06226bb46..d243815667fdf 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -747,7 +747,7 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
-      } else {
+      } else if (Attr.isStringAttribute()) {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
@@ -758,6 +758,13 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
+      } else {
+        assert(Attr.isTypeAttribute());
+        Type *Ty = Attr.getValueAsType();
+        Record.push_back(Ty ? 6 : 5);
+        Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+        if (Ty)
+          Record.push_back(VE.getTypeID(Attr.getValueAsType()));
       }
     }
 
@@ -4126,15 +4133,15 @@ void ModuleBitcodeWriter::write() {
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
+  // Emit information describing all of the types in the module.
+  writeTypeTable();
+
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
-  // Emit information describing all of the types in the module.
-  writeTypeTable();
-
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 72d7000fad9dc..143570fb20a8c 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -949,9 +949,11 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   incorporateFunctionMetadata(F);
 
   // Adding function arguments to the value table.
-  for (const auto &I : F.args())
+  for (const auto &I : F.args()) {
     EnumerateValue(&I);
-
+    if (I.hasAttribute(Attribute::ByVal) && I.getParamByValType())
+      EnumerateType(I.getParamByValType());
+  }
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index f144b18aa6358..93727406a087a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -87,7 +87,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
 
   if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
-    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8fb1a7b5bb9c2..d887ed73c441e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1204,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
     if (Arg.IsByVal || Arg.IsInAlloca) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
-      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
-      // For ByVal, alignment should come from FE. BE will guess if this info is
-      // not there, but there are cases it cannot get right.
+      unsigned FrameSize =
+          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+      // For ByVal, alignment should come from FE. BE will guess if this info
+      // is not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
         FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe857f73b2548..da06ac7a414ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9076,8 +9076,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       if (Args[i].IsByVal || Args[i].IsInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should come from FE.  BE will guess if this
+
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Args[i].ByValType ? Args[i].ByValType : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
@@ -9574,9 +9577,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(Arg.getType());
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
+
+        // For ByVal, size and alignment should be passed from FE.  BE will
+        // guess if this info is not there but there are cases it cannot get
+        // right.
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Arg.getParamByValType() ? Arg.getParamByValType() : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         unsigned FrameAlign;
         if (Arg.getParamAlignment())
           FrameAlign = Arg.getParamAlignment();
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d636e613363e4..4ad578d80fab6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -112,6 +112,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlignment(ArgIdx);
+  ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 8ebcb04a565d1..f6898476382dd 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -29,6 +29,7 @@
 namespace llvm {
 
 class LLVMContext;
+class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -41,7 +42,8 @@ class AttributeImpl : public FoldingSetNode {
   enum AttrEntryKind {
     EnumAttrEntry,
     IntAttrEntry,
-    StringAttrEntry
+    StringAttrEntry,
+    TypeAttrEntry,
   };
 
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -56,6 +58,7 @@ class AttributeImpl : public FoldingSetNode {
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
   bool isIntAttribute() const { return KindID == IntAttrEntry; }
   bool isStringAttribute() const { return KindID == StringAttrEntry; }
+  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
 
   bool hasAttribute(Attribute::AttrKind A) const;
   bool hasAttribute(StringRef Kind) const;
@@ -66,16 +69,20 @@ class AttributeImpl : public FoldingSetNode {
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
 
+  Type *getValueAsType() const;
+
   /// Used when sorting the attributes.
   bool operator<(const AttributeImpl &AI) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     if (isEnumAttribute())
-      Profile(ID, getKindAsEnum(), 0);
+      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
     else if (isIntAttribute())
       Profile(ID, getKindAsEnum(), getValueAsInt());
-    else
+    else if (isStringAttribute())
       Profile(ID, getKindAsString(), getValueAsString());
+    else
+      Profile(ID, getKindAsEnum(), getValueAsType());
   }
 
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -88,6 +95,12 @@ class AttributeImpl : public FoldingSetNode {
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      Type *Ty) {
+    ID.AddInteger(Kind);
+    ID.AddPointer(Ty);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -145,6 +158,18 @@ class StringAttributeImpl : public AttributeImpl {
   StringRef getStringValue() const { return Val; }
 };
 
+class TypeAttributeImpl : public EnumAttributeImpl {
+  virtual void anchor();
+
+  Type *Ty;
+
+public:
+  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+  Type *getTypeValue() const { return Ty; }
+};
+
 //===----------------------------------------------------------------------===//
 /// \class
 /// This class represents a group of attributes that apply to one
@@ -189,6 +214,7 @@ class AttributeSetNode final
   uint64_t getDereferenceableOrNullBytes() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp) const;
+  Type *getByValType() const;
 
   using iterator = const Attribute *;
 
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 0be09a05e82ab..839ef46b4f41d 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -121,6 +121,27 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
   return Attribute(PA);
 }
 
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+                         Type *Ty) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+  ID.AddPointer(Ty);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = new TypeAttributeImpl(Kind, Ty);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
 Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
@@ -146,6 +167,10 @@ Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
   return get(Context, DereferenceableOrNull, Bytes);
 }
 
+Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
+  return get(Context, ByVal, Ty);
+}
+
 Attribute
 Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg) {
@@ -170,9 +195,13 @@ bool Attribute::isStringAttribute() const {
   return pImpl && pImpl->isStringAttribute();
 }
 
+bool Attribute::isTypeAttribute() const {
+  return pImpl && pImpl->isTypeAttribute();
+}
+
 Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
-  assert((isEnumAttribute() || isIntAttribute()) &&
+  assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
   return pImpl->getKindAsEnum();
 }
@@ -198,6 +227,14 @@ StringRef Attribute::getValueAsString() const {
   return pImpl->getValueAsString();
 }
 
+Type *Attribute::getValueAsType() const {
+  if (!pImpl) return {};
+  assert(isTypeAttribute() &&
+         "Invalid attribute type to get the value as a type!");
+  return pImpl->getValueAsType();
+}
+
+
 bool Attribute::hasAttribute(AttrKind Kind) const {
   return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
 }
@@ -252,8 +289,6 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
-  if (hasAttribute(Attribute::ByVal))
-    return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
   if (hasAttribute(Attribute::SwiftError))
@@ -353,6 +388,19 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   if (hasAttribute(Attribute::ImmArg))
     return "immarg";
 
+  if (hasAttribute(Attribute::ByVal)) {
+    std::string Result;
+    Result += "byval";
+    if (Type *Ty = getValueAsType()) {
+      raw_string_ostream OS(Result);
+      Result += '(';
+      Ty->print(OS, false, true);
+      OS.flush();
+      Result += ')';
+    }
+    return Result;
+  }
+
   // FIXME: These should be output like this:
   //
   //   align=4
@@ -451,6 +499,8 @@ void IntAttributeImpl::anchor() {}
 
 void StringAttributeImpl::anchor() {}
 
+void TypeAttributeImpl::anchor() {}
+
 bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
   if (isStringAttribute()) return false;
   return getKindAsEnum() == A;
@@ -462,7 +512,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
 }
 
 Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
-  assert(isEnumAttribute() || isIntAttribute());
+  assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
   return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
 }
 
@@ -481,6 +531,11 @@ StringRef AttributeImpl::getValueAsString() const {
   return static_cast<const StringAttributeImpl *>(this)->getStringValue();
 }
 
+Type *AttributeImpl::getValueAsType() const {
+  assert(isTypeAttribute());
+  return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
+}
+
 bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   // This sorts the attributes with Attribute::AttrKinds coming first (sorted
   // relative to their enum value) and then strings.
@@ -488,10 +543,23 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
     if (AI.isIntAttribute()) return true;
     if (AI.isStringAttribute()) return true;
+    if (AI.isTypeAttribute()) return true;
+  }
+
+  if (isTypeAttribute()) {
+    if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) {
+      assert(getKindAsEnum() != AI.getKindAsEnum() &&
+             "Comparison of types would be unstable");
+      return getKindAsEnum() < AI.getKindAsEnum();
+    }
+    if (AI.isIntAttribute()) return true;
+    if (AI.isStringAttribute()) return true;
   }
 
   if (isIntAttribute()) {
     if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) return false;
     if (AI.isIntAttribute()) {
       if (getKindAsEnum() == AI.getKindAsEnum())
         return getValueAsInt() < AI.getValueAsInt();
@@ -500,7 +568,9 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isStringAttribute()) return true;
   }
 
+  assert(isStringAttribute());
   if (AI.isEnumAttribute()) return false;
+  if (AI.isTypeAttribute()) return false;
   if (AI.isIntAttribute()) return false;
   if (getKindAsString() == AI.getKindAsString())
     return getValueAsString() < AI.getValueAsString();
@@ -608,6 +678,10 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
   return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
 }
 
+Type *AttributeSet::getByValType() const {
+  return SetNode ? SetNode->getByValType() : nullptr;
+}
+
 std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
   return SetNode ? SetNode->getAllocSizeArgs()
                  : std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -691,6 +765,9 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
 
     Attribute Attr;
     switch (Kind) {
+    case Attribute::ByVal:
+      Attr = Attribute::getWithByValType(C, B.getByValType());
+      break;
     case Attribute::Alignment:
       Attr = Attribute::getWithAlignment(C, B.getAlignment());
       break;
@@ -760,6 +837,13 @@ unsigned AttributeSetNode::getStackAlignment() const {
   return 0;
 }
 
+Type *AttributeSetNode::getByValType() const {
+  for (const auto I : *this)
+    if (I.hasAttribute(Attribute::ByVal))
+      return I.getValueAsType();
+  return 0;
+}
+
 uint64_t AttributeSetNode::getDereferenceableBytes() const {
   for (const auto I : *this)
     if (I.hasAttribute(Attribute::Dereferenceable))
@@ -1258,6 +1342,11 @@ unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
+Type *AttributeList::getParamByValType(unsigned Index) const {
+  return getAttributes(Index+FirstArgIndex).getByValType();
+}
+
+
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }
@@ -1336,6 +1425,7 @@ void AttrBuilder::clear() {
   TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
+  ByValType = nullptr;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1360,6 +1450,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     Alignment = Attr.getAlignment();
   else if (Kind == Attribute::StackAlignment)
     StackAlignment = Attr.getStackAlignment();
+  else if (Kind == Attribute::ByVal)
+    ByValType = Attr.getValueAsType();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
   else if (Kind == Attribute::DereferenceableOrNull)
@@ -1382,6 +1474,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     Alignment = 0;
   else if (Val == Attribute::StackAlignment)
     StackAlignment = 0;
+  else if (Val == Attribute::ByVal)
+    ByValType = nullptr;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
   else if (Val == Attribute::DereferenceableOrNull)
@@ -1464,6 +1558,12 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
+  Attrs[Attribute::ByVal] = true;
+  ByValType = Ty;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index b00deb677b31d..a4a78ca4deb96 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -113,6 +113,11 @@ unsigned Argument::getParamAlignment() const {
   return getParent()->getParamAlignment(getArgNo());
 }
 
+Type *Argument::getParamByValType() const {
+  assert(getType()->isPointerTy() && "Only pointers have byval types");
+  return getParent()->getParamByValType(getArgNo());
+}
+
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index d5c3287e7134c..963bf82c98982 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1695,6 +1695,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
          "'noinline and alwaysinline' are incompatible!",
          V);
 
+  if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+    Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
+           "Attribute 'byval' type does not match parameter!");
+  }
+
   AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
   Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
          "Wrong types for attribute: " +
diff --git a/llvm/test/Assembler/byval-type-attr.ll b/llvm/test/Assembler/byval-type-attr.ll
new file mode 100644
index 0000000000000..eb17a30db3b2e
--- /dev/null
+++ b/llvm/test/Assembler/byval-type-attr.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: define void @foo(i32* byval(i32) align 4)
+define void @foo(i32* byval(i32) align 4) {
+  ret void
+}
+
+; CHECK: define void @bar({ i32*, i8 }* byval({ i32*, i8 }) align 4)
+define void @bar({i32*, i8}* byval({i32*, i8}) align 4) {
+  ret void
+}
+
+define void @caller({ i32*, i8 }* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+; CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+  call void @bar({i32*, i8}* byval %ptr)
+  invoke void @bar({i32*, i8}* byval %ptr) to label %success unwind label %fail
+
+success:
+  ret void
+
+fail:
+  landingpad { i8*, i32 } cleanup
+  ret void
+}
+
+; CHECK: declare void @baz([8 x i8]* byval([8 x i8]))
+%named_type = type [8 x i8]
+declare void @baz(%named_type* byval(%named_type))
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Assembler/invalid-byval-type1.ll b/llvm/test/Assembler/invalid-byval-type1.ll
new file mode 100644
index 0000000000000..236790e114289
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type1.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attribute 'byval' type does not match parameter!
+declare void @foo(i32* byval(i8))
diff --git a/llvm/test/Assembler/invalid-byval-type2.ll b/llvm/test/Assembler/invalid-byval-type2.ll
new file mode 100644
index 0000000000000..3ca0d5e7c4c2b
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: void type only allowed for function results
+declare void @foo(i32* byval(void))
diff --git a/llvm/test/Assembler/invalid-byval-type3.ll b/llvm/test/Assembler/invalid-byval-type3.ll
new file mode 100644
index 0000000000000..4626dd71c5b56
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attributes 'byval' and 'inalloca' do not support unsized types!
+declare void @foo(void()* byval(void()))
diff --git a/llvm/test/Bitcode/Inputs/byval-upgrade.bc b/llvm/test/Bitcode/Inputs/byval-upgrade.bc
new file mode 100644
index 0000000000000000000000000000000000000000..e0da41f78e773b0d9154313098c2adbc5d3e5a71
GIT binary patch
literal 1092
zcmX|APfXip6#o(jY#?(CjkZv1%LdkE)kNK}b;+DaB&J=^IPhnh(A0^9wG6hDIUx#x
zPL;uFopw=`QVyBaLy@@bmQHBufdj#`%a~Vn)iiNns1XdIilJ!hq)qB!FJamzeedV@
z^!)z(-unX5>h@c9gfN8Ah*fA;sG1_Eh_@p24`73tM`pm6;o`GxxBNv*Pc7y0%$riy
zS8Ir7*7D_uVn1sR%r^v<D3{$$INqyw4WxteI^Wjaq-7}oy!k?rxM%02b7}5-lWQR5
zIO#l@J^e-g{4MT55#shc5V8Qa+~m+TjyppI5M)QF2ke#H$L2x-4)8h%3eeRWtm7_R
zwcKu9N`cr(EoSXay*Dxqsij`3s|`(BLS5mT==7YU`v$82>bNEF<{n}G5Hk&;bA2X+
zj`P;>USsFf57|1Qnh0Vdq#%XA?dQ9y%8`&xToM#S{BLPLL)d@C5h|KZwP19yIcsxc
zZ#GkR;@yW9Y2e$}mCcItJ*fK%U?-u_>m_>BZt|ricS_F$6nr{~Pv)Z5J!JcxBA+W{
zr!~N?6_`hh^r%o%byHgm%vY3r8O&!SGjSQeOyJW&O_uQ)8A~KJnbVq$M)Dv_mIT{`
zVB0B^<vy~LT4%O-dXs0?!tAJUU0hMc$C76m^RB4g6eb#Fdm@Bi9>d=r(_$bMP0ng^
z2v0ci#NRa#c1qEP2`%c-VmWO(H~Yq6nLK<-{<TUT2-rw*k=|XTjjG%V(;Ga!KFe%`
zYk$c~o}%Q*Rpou%y&$>Mn7E*yf}}6z#Gf%SuUo$x*W@8BCadu=JQ>pB4lRCHYko9n
zJ4CiW`pDfZxzlAUapVDX!8w_&0=wpCpYZh7EW7oT-YqfDOzfzm-<_A<xuc3kU;BTJ
zgtVz7p3L?4KF``J46vWF8)15%XSNH>6%$)2m}ko2+V`)jZbKLIk`D-ZvF6>C#5AaS
z=0QDls!c&X1U}=$W4YP$I}~}aO8(YI{wcH-p7`0_(kq)KW*Z6!Hj8=Hx2(3LbazJb
z7`pd<!Bf<sN3RiFjuJRj6$Ju^aP&3IzX9goP#<#wulUT6oRfS&PhRpo*5~%<hl?-m
z`zvGL<C&v+(ND10m7T&3?~*Fs0TMCaUDXSOW_4Iu6y^}J0UopHX+N)U4oA1}BVZFz
z+ql0{C|i_kSjDdf?gxT*FmBSi>&$%5Yh8Bq4cICt${oFP7fH3I(6fqr^?U@6fP}9m
zY~izt;p1>5axpeM{7Eu7EMJUGem)V7jK`;k6T(P1=IIbRgvrE2{F2-eiB$jp7n@*I
AZvX%Q

literal 0
HcmV?d00001

diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll
index b564425c37381..f1b37d1991228 100644
--- a/llvm/test/Bitcode/attributes-3.3.ll
+++ b/llvm/test/Bitcode/attributes-3.3.ll
@@ -48,7 +48,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index de3cf8dd4d73a..6f149c0d3bf7b 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -45,7 +45,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/byval-upgrade.test b/llvm/test/Bitcode/byval-upgrade.test
new file mode 100644
index 0000000000000..1012bf728830f
--- /dev/null
+++ b/llvm/test/Bitcode/byval-upgrade.test
@@ -0,0 +1,7 @@
+RUN: llvm-dis %p/Inputs/byval-upgrade.bc -o - | FileCheck %s
+
+Make sure we upgrade old-stile IntAttribute byval records to a fully typed
+version correctly.
+
+CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll
index e9313dfba870e..322c95b02d1f8 100644
--- a/llvm/test/Bitcode/compatibility-3.6.ll
+++ b/llvm/test/Bitcode/compatibility-3.6.ll
@@ -404,7 +404,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll
index 82fc99055357a..e8260741373dd 100644
--- a/llvm/test/Bitcode/compatibility-3.7.ll
+++ b/llvm/test/Bitcode/compatibility-3.7.ll
@@ -410,7 +410,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll
index 2e70a380d10ed..c8d8261915595 100644
--- a/llvm/test/Bitcode/compatibility-3.8.ll
+++ b/llvm/test/Bitcode/compatibility-3.8.ll
@@ -435,7 +435,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll
index 7c84daa7d3c44..f031e54a9f211 100644
--- a/llvm/test/Bitcode/compatibility-3.9.ll
+++ b/llvm/test/Bitcode/compatibility-3.9.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll
index 9e34d48c95f76..8020df45da6a5 100644
--- a/llvm/test/Bitcode/compatibility-4.0.ll
+++ b/llvm/test/Bitcode/compatibility-4.0.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll
index a4b3fca82b7bf..3b32d668af557 100644
--- a/llvm/test/Bitcode/compatibility-5.0.ll
+++ b/llvm/test/Bitcode/compatibility-5.0.ll
@@ -508,7 +508,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll
index 097e19ecb7ff8..3b08c4a72311d 100644
--- a/llvm/test/Bitcode/compatibility-6.0.ll
+++ b/llvm/test/Bitcode/compatibility-6.0.ll
@@ -515,7 +515,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 06b81fa14a819..991bde69b61d7 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -529,7 +529,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
@@ -1735,6 +1735,15 @@ define i8** @constexpr() {
 declare void @llvm.test.immarg.intrinsic(i32 immarg)
 ; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
 
+; byval attribute with type
+%named_type = type [8 x i8]
+declare void @byval_type(i32* byval(i32) align 2)
+declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+declare void @byval_named_type(%named_type* byval(%named_type))
+; CHECK: declare void @byval_type(i32* byval(i32) align 2)
+; CHECK: declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+; CHECK: declare void @byval_named_type([8 x i8]* byval([8 x i8]))
+
 ; CHECK: attributes #0 = { alignstack=4 }
 ; CHECK: attributes #1 = { alignstack=8 }
 ; CHECK: attributes #2 = { alwaysinline }
diff --git a/llvm/test/Bitcode/highLevelStructure.3.2.ll b/llvm/test/Bitcode/highLevelStructure.3.2.ll
index 749b157cffc32..91d6ee4ac2574 100644
--- a/llvm/test/Bitcode/highLevelStructure.3.2.ll
+++ b/llvm/test/Bitcode/highLevelStructure.3.2.ll
@@ -41,7 +41,7 @@ declare void @ParamAttr3(i8* sret)
 declare void @ParamAttr4(i8 signext)
 ; CHECK: declare void @ParamAttr5(i8* inreg)
 declare void @ParamAttr5(i8* inreg)
-; CHECK: declare void @ParamAttr6(i8* byval)
+; CHECK: declare void @ParamAttr6(i8* byval(i8))
 declare void @ParamAttr6(i8* byval)
 ; CHECK: declare void @ParamAttr7(i8* noalias)
 declare void @ParamAttr7(i8* noalias)
@@ -51,7 +51,7 @@ declare void @ParamAttr8(i8* nocapture)
 declare void @ParamAttr9(i8* nest noalias nocapture)
 ; CHECK: declare void @ParamAttr10{{[(i8* sret noalias nocapture) | (i8* noalias nocapture sret)]}}
 declare void @ParamAttr10(i8* sret noalias nocapture)
-;CHECK: declare void @ParamAttr11{{[(i8* byval noalias nocapture) | (i8* noalias nocapture byval)]}}
+;CHECK: declare void @ParamAttr11{{[(i8* byval(i8) noalias nocapture) | (i8* noalias nocapture byval(i8))]}}
 declare void @ParamAttr11(i8* byval noalias nocapture)
 ;CHECK: declare void @ParamAttr12{{[(i8* inreg noalias nocapture) | (i8* noalias nocapture inreg)]}}
 declare void @ParamAttr12(i8* inreg noalias nocapture)
diff --git a/llvm/test/CodeGen/AArch64/byval-type.ll b/llvm/test/CodeGen/AArch64/byval-type.ll
new file mode 100644
index 0000000000000..0c2e2dc471dd9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/byval-type.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+
+define i8 @byval_match(i8* byval(i8) align 1, i8* byval %ptr) {
+; CHECK-LABEL: byval_match:
+; CHECK: ldrb w0, [sp, #8]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_match(i8* %p0, i8* %p1) {
+; CHECK-LABEL: caller_match:
+; CHECK: ldrb [[P1:w[0-9]+]], [x1]
+; CHECK: strb [[P1]], [sp, #8]
+; CHECK: ldrb [[P0:w[0-9]+]], [x0]
+; CHECK: strb [[P0]], [sp]
+; CHECK: bl byval_match
+  call i8 @byval_match(i8* byval(i8) align 1 %p0, i8* byval %p1)
+  ret void
+}
+
+define i8 @byval_large([3 x i64]* byval([3 x i64]) align 8, i8* byval %ptr) {
+; CHECK-LABEL: byval_large:
+; CHECK: ldrb w0, [sp, #24]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_large([3 x i64]* %p0, i8* %p1) {
+; CHECK-LABEL: caller_large:
+; CHECK: ldr [[P0HI:x[0-9]+]], [x0, #16]
+; CHECK: ldr [[P0LO:q[0-9]+]], [x0]
+; CHECK: str [[P0HI]], [sp, #16]
+; CHECK: str [[P0LO]], [sp]
+; CHECK: bl byval_large
+  call i8 @byval_large([3 x i64]* byval([3 x i64]) align 8 %p0, i8* byval %p1)
+  ret void
+}
diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll
index 8aafe7943f4ee..be495f1bcd379 100644
--- a/llvm/test/Transforms/Inline/byval-tail-call.ll
+++ b/llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -56,7 +56,7 @@ define void @foobar(i32* %x) {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   tail call void @bar2(i32* byval %x)
   ret void
@@ -67,7 +67,7 @@ define void @barfoo() {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   %x = alloca i32
   tail call void @bar2(i32* byval %x)
diff --git a/llvm/unittests/IR/AttributesTest.cpp b/llvm/unittests/IR/AttributesTest.cpp
index e0be2343a1445..06da35aca5735 100644
--- a/llvm/unittests/IR/AttributesTest.cpp
+++ b/llvm/unittests/IR/AttributesTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
@@ -40,6 +41,10 @@ TEST(Attributes, Ordering) {
   EXPECT_TRUE(Align4 < Deref5);
   EXPECT_TRUE(Align5 < Deref4);
 
+  Attribute ByVal = Attribute::get(C, Attribute::ByVal, Type::getInt32Ty(C));
+  EXPECT_FALSE(ByVal < Attribute::get(C, Attribute::ZExt));
+  EXPECT_TRUE(ByVal < Align4);
+
   AttributeList ASs[] = {AttributeList::get(C, 2, Attribute::ZExt),
                          AttributeList::get(C, 1, Attribute::SExt)};
 
@@ -166,4 +171,19 @@ TEST(Attributes, OverflowGet) {
   EXPECT_EQ(2U, AL.getNumAttrSets());
 }
 
+TEST(Attributes, StringRepresentation) {
+  LLVMContext C;
+  StructType *Ty = StructType::create(Type::getInt32Ty(C), "mystruct");
+
+  // Insufficiently careful printing can result in byval(%mystruct = { i32 })
+  Attribute A = Attribute::getWithByValType(C, Ty);
+  EXPECT_EQ(A.getAsString(), "byval(%mystruct)");
+
+  A = Attribute::getWithByValType(C, nullptr);
+  EXPECT_EQ(A.getAsString(), "byval");
+
+  A = Attribute::getWithByValType(C, Type::getInt32Ty(C));
+  EXPECT_EQ(A.getAsString(), "byval(i32)");
+}
+
 } // end anonymous namespace

From 45e8cc6639e9d7dbc900ed35fdfe46c02c4eb298 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 29 May 2019 19:13:29 +0000
Subject: [PATCH 0541/1176] LLVM IR: update Clang tests for byval being a typed
 attribute.

Since byval is now a typed attribute it gets sorted slightly differently by
LLVM when the order of attributes is being canonicalized. This updates the few
Clang tests that depend on the old order.

llvm-svn: 362013
---
 clang/test/CodeGen/aapcs-align.cpp               |  4 ++--
 .../test/CodeGenCXX/builtin-source-location.cpp  |  4 ++--
 clang/test/CodeGenCXX/wasm-args-returns.cpp      |  4 ++--
 .../CodeGenCXX/x86_64-arguments-nacl-x32.cpp     |  2 +-
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 ++++++++--------
 .../kernels-have-spir-cc-by-default.cl           |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index 40fba7823524e..bcc4604d7422d 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -95,8 +95,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define void @g4
-// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
-// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval align 8
 // CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
 // CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
 
diff --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp
index 6a8387093dfec..f8bfd7d940b91 100644
--- a/clang/test/CodeGenCXX/builtin-source-location.cpp
+++ b/clang/test/CodeGenCXX/builtin-source-location.cpp
@@ -104,7 +104,7 @@ struct TestInit {
 //
 // CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP_ONE:[^,]*]],
 // CHECK-CTOR-GLOBAL-SAME: i32 3400, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{[^%]*}}%[[TMP_ONE]])
+// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{.*}}%[[TMP_ONE]])
 #line 3400 "GlobalInitVal.cpp"
 TestInit GlobalInitVal;
 
@@ -119,7 +119,7 @@ extern "C" void test_init_function() {
 //
 // CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
 // CHECK-CTOR-LOCAL-SAME: i32 3500, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{[^%]*}}%[[TMP]])
+// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{.*}}%[[TMP]])
 #line 3500 "LocalInitVal.cpp"
   TestInit init_local;
   sink(init_local);
diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp
index a7c4e1e282a12..506540e86fd9c 100644
--- a/clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,12 +30,12 @@ struct two_fields {
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* byval nocapture readonly align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
 // CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
-// CHECK: call void @_Z3use10two_fields(%struct.two_fields* byval nonnull align 8 %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval align 8 %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
diff --git a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
index 3392b32bd2b6f..89d6dae5d30fc 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -20,7 +20,7 @@ void f_struct_with_mdp(struct_with_mdp a) { (void)a; }
 struct struct_with_mdp_too_much {
   char *a; char *b; char *c; char *d; test_struct_mdp e;
 };
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval {{.*}} %a)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval
 void f_struct_with_mdp_too_much(struct_with_mdp_too_much a) {
   (void)a;
 }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index aec00e76014ec..8b03fb00e3fde 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@ flexible_array func_flexible_array_ret()
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval  align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 5bb52e9beb514..4392ef90677c5 100644
--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@ typedef struct test_struct {
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* byval nocapture
+// CHECK: struct.int_single* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@ kernel void test_single(int_single input, global int* output) {
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* byval nocapture
+// CHECK: struct.int_pair* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@ kernel void test_pair(int_pair input, global int* output) {
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* byval nocapture
+// CHECK: struct.test_struct* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;

From 1d7ca677697f5b6bb40011e7fc962bb1a158e96a Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 19:21:59 +0000
Subject: [PATCH 0542/1176] [analyzer] [NFC] PathDiagnostic: Create
 PathDiagnosticPopUpPiece

Summary:
This new piece is similar to our macro expansion printing in HTML reports:
On mouse-hover event it pops up on variables. Similar to note pieces it
supports `plist` diagnostics as well.

It is optional, on by default: `add-pop-up-notes=true`.

Extra: In HTML reports `background-color: LemonChiffon` was too light,
changed to `PaleGoldenRod`.

Reviewers: NoQ, alexfh

Reviewed By: NoQ

Subscribers: cfe-commits, gerazo, gsd, george.karpenkov, alexfh, xazax.hun,
             baloghadamsoftware, szepet, a.sidorin, mikhail.ramalho,
             Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D60670

llvm-svn: 362014
---
 .../StaticAnalyzer/Core/AnalyzerOptions.def   |   4 +
 .../Core/BugReporter/PathDiagnostic.h         |  22 +++-
 clang/lib/Rewrite/HTMLRewrite.cpp             |  49 ++++---
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp |  21 ++-
 .../StaticAnalyzer/Core/HTMLDiagnostics.cpp   | 124 ++++++++++++++++--
 .../StaticAnalyzer/Core/PathDiagnostic.cpp    |  21 ++-
 .../StaticAnalyzer/Core/PlistDiagnostics.cpp  |  34 +++++
 .../StaticAnalyzer/Core/SarifDiagnostics.cpp  |  11 +-
 clang/test/Analysis/analyzer-config.c         |   3 +-
 9 files changed, 243 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index cc8b70bcb2c30..40d5d47bbcea3 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -204,6 +204,10 @@ ANALYZER_OPTION(bool, ShouldPrunePaths, "prune-paths",
                 "be pruned out of the final output.",
                 true)
 
+ANALYZER_OPTION(bool, ShouldAddPopUpNotes, "add-pop-up-notes",
+                "Whether pop-up notes should be added to the final output.",
+                true)
+
 ANALYZER_OPTION(
     bool, ShouldConditionalizeStaticInitializers,
     "cfg-conditional-static-initializers",
diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
index 547a8ca643469..5230742a4aa43 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
@@ -367,7 +367,7 @@ class PathDiagnosticLocationPair {
 
 class PathDiagnosticPiece: public llvm::FoldingSetNode {
 public:
-  enum Kind { ControlFlow, Event, Macro, Call, Note };
+  enum Kind { ControlFlow, Event, Macro, Call, Note, PopUp };
   enum DisplayHint { Above, Below };
 
 private:
@@ -482,7 +482,7 @@ class PathDiagnosticSpotPiece : public PathDiagnosticPiece {
 
   static bool classof(const PathDiagnosticPiece *P) {
     return P->getKind() == Event || P->getKind() == Macro ||
-           P->getKind() == Note;
+           P->getKind() == Note || P->getKind() == PopUp;
   }
 };
 
@@ -746,7 +746,7 @@ class PathDiagnosticMacroPiece : public PathDiagnosticSpotPiece {
 class PathDiagnosticNotePiece: public PathDiagnosticSpotPiece {
 public:
   PathDiagnosticNotePiece(const PathDiagnosticLocation &Pos, StringRef S,
-                               bool AddPosRange = true)
+                          bool AddPosRange = true)
       : PathDiagnosticSpotPiece(Pos, S, Note, AddPosRange) {}
   ~PathDiagnosticNotePiece() override;
 
@@ -759,6 +759,22 @@ class PathDiagnosticNotePiece: public PathDiagnosticSpotPiece {
   void Profile(llvm::FoldingSetNodeID &ID) const override;
 };
 
+class PathDiagnosticPopUpPiece: public PathDiagnosticSpotPiece {
+public:
+  PathDiagnosticPopUpPiece(const PathDiagnosticLocation &Pos, StringRef S,
+                           bool AddPosRange = true)
+      : PathDiagnosticSpotPiece(Pos, S, PopUp, AddPosRange) {}
+  ~PathDiagnosticPopUpPiece() override;
+
+  static bool classof(const PathDiagnosticPiece *P) {
+    return P->getKind() == PopUp;
+  }
+
+  void dump() const override;
+
+  void Profile(llvm::FoldingSetNodeID &ID) const override;
+};
+
 /// File IDs mapped to sets of line numbers.
 using FilesToLineNumsMap = std::map<FileID, std::set<unsigned>>;
 
diff --git a/clang/lib/Rewrite/HTMLRewrite.cpp b/clang/lib/Rewrite/HTMLRewrite.cpp
index 688dd7f787578..e304fbbed7294 100644
--- a/clang/lib/Rewrite/HTMLRewrite.cpp
+++ b/clang/lib/Rewrite/HTMLRewrite.cpp
@@ -306,14 +306,16 @@ h1 { font-size:14pt }
 .keyword { color: blue }
 .string_literal { color: red }
 .directive { color: darkmagenta }
-/* Macro expansions. */
-.expansion { display: none; }
-.macro:hover .expansion {
+
+/* Macros and variables could have pop-up notes hidden by default.
+  - Macro pop-up:    expansion of the macro
+  - Variable pop-up: value (table) of the variable */
+.macro_popup, .variable_popup { display: none; }
+
+/* Pop-up appears on mouse-hover event. */
+.macro:hover .macro_popup, .variable:hover .variable_popup {
   display: block;
-  border: 2px solid #FF0000;
   padding: 2px;
-  background-color:#FFF0F0;
-  font-weight: normal;
   -webkit-border-radius:5px;
   -webkit-box-shadow:1px 1px 7px #000;
   border-radius:5px;
@@ -324,6 +326,27 @@ h1 { font-size:14pt }
   z-index: 1
 }
 
+.macro_popup {
+  border: 2px solid red;
+  background-color:#FFF0F0;
+  font-weight: normal;
+}
+
+.variable_popup {
+  border: 2px solid blue;
+  background-color:#F0F0FF;
+  font-weight: bold;
+  font-family: Helvetica, sans-serif;
+  font-size: 9pt;
+}
+
+/* Pop-up notes needs a relative position as a base where they pops up. */
+.macro, .variable {
+  background-color: PaleGoldenRod;
+  position: relative;
+}
+.macro { color: DarkMagenta; }
+
 #tooltiphint {
   position: fixed;
   width: 50em;
@@ -336,12 +359,6 @@ h1 { font-size:14pt }
   background-color: #c0c0c0;
   z-index: 2;
 }
-.macro {
-  color: darkmagenta;
-  background-color:LemonChiffon;
-  /* Macros are position: relative to provide base for expansions. */
-  position: relative;
-}
 
 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
 .num { text-align:right; font-size:8pt }
@@ -369,6 +386,7 @@ h1 { font-size:14pt }
 .PathIndex { border-radius:8px }
 .PathIndexEvent { background-color:#bfba87 }
 .PathIndexControl { background-color:#8c8c8c }
+.PathIndexPopUp { background-color: #879abc; }
 .PathNav a { text-decoration:none; font-size: larger }
 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
 .CodeRemovalHint { background-color:#de1010 }
@@ -636,10 +654,9 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
       TmpPP.Lex(Tok);
     }
 
-
-    // Insert the expansion as the end tag, so that multi-line macros all get
-    // highlighted.
-    Expansion = "<span class='expansion'>" + Expansion + "</span></span>";
+    // Insert the 'macro_popup' as the end tag, so that multi-line macros all
+    // get highlighted.
+    Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";
 
     HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>",
                    Expansion.c_str(), LLoc.isTokenRange());
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 738ad9a062fc3..6627633f39332 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -154,8 +154,6 @@ static void removeRedundantMsgs(PathPieces &path) {
       case PathDiagnosticPiece::Macro:
         removeRedundantMsgs(cast<PathDiagnosticMacroPiece>(*piece).subPieces);
         break;
-      case PathDiagnosticPiece::ControlFlow:
-        break;
       case PathDiagnosticPiece::Event: {
         if (i == N-1)
           break;
@@ -175,7 +173,9 @@ static void removeRedundantMsgs(PathPieces &path) {
         }
         break;
       }
+      case PathDiagnosticPiece::ControlFlow:
       case PathDiagnosticPiece::Note:
+      case PathDiagnosticPiece::PopUp:
         break;
     }
     path.push_back(std::move(piece));
@@ -230,9 +230,8 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
         break;
       }
       case PathDiagnosticPiece::ControlFlow:
-        break;
-
       case PathDiagnosticPiece::Note:
+      case PathDiagnosticPiece::PopUp:
         break;
     }
 
@@ -242,6 +241,16 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
   return containsSomethingInteresting;
 }
 
+/// Same logic as above to remove extra pieces.
+static void removePopUpNotes(PathPieces &Path) {
+  for (unsigned int i = 0; i < Path.size(); ++i) {
+    auto Piece = std::move(Path.front());
+    Path.pop_front();
+    if (!isa<PathDiagnosticPopUpPiece>(*Piece))
+      Path.push_back(std::move(Piece));
+  }
+}
+
 /// Returns true if the given decl has been implicitly given a body, either by
 /// the analyzer or by the compiler proper.
 static bool hasImplicitBody(const Decl *D) {
@@ -1981,6 +1990,10 @@ static std::unique_ptr<PathDiagnostic> generatePathDiagnosticForConsumer(
       (void)stillHasNotes;
     }
 
+    // Remove pop-up notes if needed.
+    if (!Opts.ShouldAddPopUpNotes)
+      removePopUpNotes(PD->getMutablePieces());
+
     // Redirect all call pieces to have valid locations.
     adjustCallLocations(PD->getMutablePieces());
     removePiecesWithInvalidLocations(PD->getMutablePieces());
diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 79aaae8cbb3a5..8ede3f15e60f7 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -90,8 +90,9 @@ class HTMLDiagnostics : public PathDiagnosticConsumer {
                              const PathDiagnosticMacroPiece& P,
                              unsigned num);
 
-  void HandlePiece(Rewriter& R, FileID BugFileID,
-                   const PathDiagnosticPiece& P, unsigned num, unsigned max);
+  void HandlePiece(Rewriter &R, FileID BugFileID, const PathDiagnosticPiece &P,
+                   const std::vector<SourceRange> &PopUpRanges, unsigned num,
+                   unsigned max);
 
   void HighlightRange(Rewriter& R, FileID BugFileID, SourceRange Range,
                       const char *HighlightStart = "<span class=\"mrange\">",
@@ -605,6 +606,53 @@ window.addEventListener("keydown", function (event) {
 )<<<";
 }
 
+static void
+HandlePopUpPieceStartTag(Rewriter &R,
+                         const std::vector<SourceRange> &PopUpRanges) {
+  for (const auto &Range : PopUpRanges) {
+    html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "",
+                         "<table class='variable_popup'><tbody>",
+                         /*IsTokenRange=*/true);
+  }
+}
+
+static void HandlePopUpPieceEndTag(Rewriter &R,
+                                   const PathDiagnosticPopUpPiece &Piece,
+                                   std::vector<SourceRange> &PopUpRanges,
+                                   unsigned int LastReportedPieceIndex,
+                                   unsigned int PopUpPieceIndex) {
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+
+  SourceRange Range(Piece.getLocation().asRange());
+
+  // Write out the path indices with a right arrow and the message as a row.
+  Out << "<tr><td valign='top'><div class='PathIndex PathIndexPopUp'>"
+      << LastReportedPieceIndex;
+
+  // Also annotate the state transition with extra indices.
+  Out << '.' << PopUpPieceIndex;
+
+  Out << "</div></td><td>" << Piece.getString() << "</td></tr>";
+
+  // If no report made at this range mark the variable and add the end tags.
+  if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) ==
+      PopUpRanges.end()) {
+    // Store that we create a report at this range.
+    PopUpRanges.push_back(Range);
+
+    Out << "</tbody></table></span>";
+    html::HighlightRange(R, Range.getBegin(), Range.getEnd(),
+                         "<span class='variable'>", Buf.c_str(),
+                         /*IsTokenRange=*/true);
+
+  // Otherwise inject just the new row at the end of the range.
+  } else {
+    html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "", Buf.c_str(),
+                         /*IsTokenRange=*/true);
+  }
+}
+
 void HTMLDiagnostics::RewriteFile(Rewriter &R,
                                   const PathPieces& path, FileID FID) {
   // Process the path.
@@ -615,39 +663,80 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R,
                     [](const std::shared_ptr<PathDiagnosticPiece> &p) {
                       return isa<PathDiagnosticNotePiece>(*p);
                     });
+  unsigned PopUpPieceCount =
+      std::count_if(path.begin(), path.end(),
+                    [](const std::shared_ptr<PathDiagnosticPiece> &p) {
+                      return isa<PathDiagnosticPopUpPiece>(*p);
+                    });
 
-  unsigned TotalRegularPieces = TotalPieces - TotalNotePieces;
+  unsigned TotalRegularPieces = TotalPieces - TotalNotePieces - PopUpPieceCount;
   unsigned NumRegularPieces = TotalRegularPieces;
   unsigned NumNotePieces = TotalNotePieces;
+  // Stores the count of the regular piece indices.
+  std::map<int, int> IndexMap;
 
+  // Stores the different ranges where we have reported something.
+  std::vector<SourceRange> PopUpRanges;
   for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) {
-    if (isa<PathDiagnosticNotePiece>(I->get())) {
+    const auto &Piece = *I->get();
+
+    if (isa<PathDiagnosticPopUpPiece>(Piece)) {
+      ++IndexMap[NumRegularPieces];
+    } else if (isa<PathDiagnosticNotePiece>(Piece)) {
       // This adds diagnostic bubbles, but not navigation.
       // Navigation through note pieces would be added later,
       // as a separate pass through the piece list.
-      HandlePiece(R, FID, **I, NumNotePieces, TotalNotePieces);
+      HandlePiece(R, FID, Piece, PopUpRanges, NumNotePieces, TotalNotePieces);
       --NumNotePieces;
     } else {
-      HandlePiece(R, FID, **I, NumRegularPieces, TotalRegularPieces);
+      HandlePiece(R, FID, Piece, PopUpRanges, NumRegularPieces,
+                  TotalRegularPieces);
       --NumRegularPieces;
     }
   }
 
-  // Add line numbers, header, footer, etc.
+  // Secondary indexing if we are having multiple pop-ups between two notes.
+  // (e.g. [(13) 'a' is 'true'];  [(13.1) 'b' is 'false'];  [(13.2) 'c' is...)
+  NumRegularPieces = TotalRegularPieces;
+  for (auto I = path.rbegin(), E = path.rend(); I != E; ++I) {
+    const auto &Piece = *I->get();
+
+    if (const auto *PopUpP = dyn_cast<PathDiagnosticPopUpPiece>(&Piece)) {
+      int PopUpPieceIndex = IndexMap[NumRegularPieces];
+
+      // Pop-up pieces needs the index of the last reported piece and its count
+      // how many times we report to handle multiple reports on the same range.
+      // This marks the variable, adds the </table> end tag and the message
+      // (list element) as a row. The <table> start tag will be added after the
+      // rows has been written out. Note: It stores every different range.
+      HandlePopUpPieceEndTag(R, *PopUpP, PopUpRanges, NumRegularPieces,
+                             PopUpPieceIndex);
+
+      if (PopUpPieceIndex > 0)
+        --IndexMap[NumRegularPieces];
+
+    } else if (!isa<PathDiagnosticNotePiece>(Piece)) {
+      --NumRegularPieces;
+    }
+  }
 
+  // Add the <table> start tag of pop-up pieces based on the stored ranges.
+  HandlePopUpPieceStartTag(R, PopUpRanges);
+
+  // Add line numbers, header, footer, etc.
   html::EscapeText(R, FID);
   html::AddLineNumbers(R, FID);
 
   // If we have a preprocessor, relex the file and syntax highlight.
   // We might not have a preprocessor if we come from a deserialized AST file,
   // for example.
-
   html::SyntaxHighlight(R, FID, PP);
   html::HighlightMacros(R, FID, PP);
 }
 
-void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
-                                  const PathDiagnosticPiece& P,
+void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
+                                  const PathDiagnosticPiece &P,
+                                  const std::vector<SourceRange> &PopUpRanges,
                                   unsigned num, unsigned max) {
   // For now, just draw a box above the line in question, and emit the
   // warning.
@@ -689,9 +778,7 @@ void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
   bool IsNote = false;
   bool SuppressIndex = (max == 1);
   switch (P.getKind()) {
-  case PathDiagnosticPiece::Call:
-      llvm_unreachable("Calls and extra notes should already be handled");
-  case PathDiagnosticPiece::Event:  Kind = "Event"; break;
+  case PathDiagnosticPiece::Event: Kind = "Event"; break;
   case PathDiagnosticPiece::ControlFlow: Kind = "Control"; break;
     // Setting Kind to "Control" is intentional.
   case PathDiagnosticPiece::Macro: Kind = "Control"; break;
@@ -700,6 +787,9 @@ void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
     IsNote = true;
     SuppressIndex = true;
     break;
+  case PathDiagnosticPiece::Call:
+  case PathDiagnosticPiece::PopUp:
+    llvm_unreachable("Calls and extra notes should already be handled");
   }
 
   std::string sbuf;
@@ -859,8 +949,14 @@ void HTMLDiagnostics::HandlePiece(Rewriter& R, FileID BugFileID,
 
   // Now highlight the ranges.
   ArrayRef<SourceRange> Ranges = P.getRanges();
-  for (const auto &Range : Ranges)
+  for (const auto &Range : Ranges) {
+    // If we have already highlighted the range as a pop-up there is no work.
+    if (std::find(PopUpRanges.begin(), PopUpRanges.end(), Range) !=
+        PopUpRanges.end())
+      continue;
+
     HighlightRange(R, LPosInfo.first, Range);
+  }
 }
 
 static void EmitAlphaCounter(raw_ostream &os, unsigned n) {
diff --git a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index 5889a979661ce..1f642064827d5 100644
--- a/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -90,6 +90,8 @@ PathDiagnosticMacroPiece::~PathDiagnosticMacroPiece() = default;
 
 PathDiagnosticNotePiece::~PathDiagnosticNotePiece() = default;
 
+PathDiagnosticPopUpPiece::~PathDiagnosticPopUpPiece() = default;
+
 void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current,
                            bool ShouldFlattenMacros) const {
   for (auto &Piece : *this) {
@@ -119,6 +121,7 @@ void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current,
     case PathDiagnosticPiece::Event:
     case PathDiagnosticPiece::ControlFlow:
     case PathDiagnosticPiece::Note:
+    case PathDiagnosticPiece::PopUp:
       Current.push_back(Piece);
       break;
     }
@@ -369,15 +372,16 @@ static Optional<bool> comparePiece(const PathDiagnosticPiece &X,
     case PathDiagnosticPiece::ControlFlow:
       return compareControlFlow(cast<PathDiagnosticControlFlowPiece>(X),
                                 cast<PathDiagnosticControlFlowPiece>(Y));
-    case PathDiagnosticPiece::Event:
-    case PathDiagnosticPiece::Note:
-      return None;
     case PathDiagnosticPiece::Macro:
       return compareMacro(cast<PathDiagnosticMacroPiece>(X),
                           cast<PathDiagnosticMacroPiece>(Y));
     case PathDiagnosticPiece::Call:
       return compareCall(cast<PathDiagnosticCallPiece>(X),
                          cast<PathDiagnosticCallPiece>(Y));
+    case PathDiagnosticPiece::Event:
+    case PathDiagnosticPiece::Note:
+    case PathDiagnosticPiece::PopUp:
+      return None;
   }
   llvm_unreachable("all cases handled");
 }
@@ -1287,6 +1291,10 @@ void PathDiagnosticNotePiece::Profile(llvm::FoldingSetNodeID &ID) const {
   PathDiagnosticSpotPiece::Profile(ID);
 }
 
+void PathDiagnosticPopUpPiece::Profile(llvm::FoldingSetNodeID &ID) const {
+  PathDiagnosticSpotPiece::Profile(ID);
+}
+
 void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const {
   ID.Add(getLocation());
   ID.AddString(BugType);
@@ -1412,6 +1420,13 @@ LLVM_DUMP_METHOD void PathDiagnosticNotePiece::dump() const {
   getLocation().dump();
 }
 
+LLVM_DUMP_METHOD void PathDiagnosticPopUpPiece::dump() const {
+  llvm::errs() << "POP-UP\n--------------\n";
+  llvm::errs() << getString() << "\n";
+  llvm::errs() << " ---- at ----\n";
+  getLocation().dump();
+}
+
 LLVM_DUMP_METHOD void PathDiagnosticLocation::dump() const {
   if (!isValid()) {
     llvm::errs() << "<INVALID>\n";
diff --git a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
index c03bab0fe1623..231db1fa16f68 100644
--- a/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
@@ -120,6 +120,9 @@ class PlistPrinter {
       case PathDiagnosticPiece::Note:
         ReportNote(o, cast<PathDiagnosticNotePiece>(P), indent);
         break;
+      case PathDiagnosticPiece::PopUp:
+        ReportPopUp(o, cast<PathDiagnosticPopUpPiece>(P), indent);
+        break;
     }
   }
 
@@ -138,6 +141,9 @@ class PlistPrinter {
                             unsigned indent, unsigned depth);
   void ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
                   unsigned indent);
+
+  void ReportPopUp(raw_ostream &o, const PathDiagnosticPopUpPiece &P,
+                   unsigned indent);
 };
 
 } // end of anonymous namespace
@@ -397,6 +403,34 @@ void PlistPrinter::ReportNote(raw_ostream &o, const PathDiagnosticNotePiece& P,
   Indent(o, indent); o << "</dict>\n";
 }
 
+void PlistPrinter::ReportPopUp(raw_ostream &o,
+                               const PathDiagnosticPopUpPiece &P,
+                               unsigned indent) {
+  const SourceManager &SM = PP.getSourceManager();
+
+  Indent(o, indent) << "<dict>\n";
+  ++indent;
+
+  Indent(o, indent) << "<key>kind</key><string>pop-up</string>\n";
+
+  // Output the location.
+  FullSourceLoc L = P.getLocation().asLocation();
+
+  Indent(o, indent) << "<key>location</key>\n";
+  EmitLocation(o, SM, L, FM, indent);
+
+  // Output the ranges (if any).
+  ArrayRef<SourceRange> Ranges = P.getRanges();
+  EmitRanges(o, Ranges, indent);
+
+  // Output the text.
+  EmitMessage(o, P.getString(), indent);
+
+  // Finish up.
+  --indent;
+  Indent(o, indent) << "</dict>\n";
+}
+
 //===----------------------------------------------------------------------===//
 // Static function definitions.
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
index a8f529b7d353f..4233f25edb349 100644
--- a/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
@@ -191,15 +191,16 @@ static json::Object createLocation(json::Object &&PhysicalLocation,
 
 static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
   switch (Piece.getKind()) {
-  case PathDiagnosticPiece::Kind::Call:
-  case PathDiagnosticPiece::Kind::Macro:
-  case PathDiagnosticPiece::Kind::Note:
+  case PathDiagnosticPiece::Call:
+  case PathDiagnosticPiece::Macro:
+  case PathDiagnosticPiece::Note:
+  case PathDiagnosticPiece::PopUp:
     // FIXME: What should be reported here?
     break;
-  case PathDiagnosticPiece::Kind::Event:
+  case PathDiagnosticPiece::Event:
     return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
                                                      : Importance::Essential;
-  case PathDiagnosticPiece::Kind::ControlFlow:
+  case PathDiagnosticPiece::ControlFlow:
     return Importance::Unimportant;
   }
   return Importance::Unimportant;
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 38bf5cc60cebe..a9c2eaa353b34 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -2,6 +2,7 @@
 // RUN: FileCheck --input-file=%t %s --match-full-lines
 
 // CHECK: [config]
+// CHECK-NEXT: add-pop-up-notes = true
 // CHECK-NEXT: aggressive-binary-operation-simplification = false
 // CHECK-NEXT: alpha.clone.CloneChecker:IgnoredFilesPattern = ""
 // CHECK-NEXT: alpha.clone.CloneChecker:MinimumCloneComplexity = 50
@@ -87,4 +88,4 @@
 // CHECK-NEXT: unroll-loops = false
 // CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 84
+// CHECK-NEXT: num-entries = 85

From 107f8d98730c9f38f28b462f0e11901274f93cdd Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 29 May 2019 19:24:19 +0000
Subject: [PATCH 0543/1176] [DAGCombiner] Replace gathers with a zero mask with
 the passthru value

These can be created by the legalizer when splitting a larger gather.

See https://llvm.org/PR42055 for a motivating example.

Differential Revision: https://reviews.llvm.org/D62613

llvm-svn: 362015
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++---
 llvm/test/CodeGen/X86/avx2-masked-gather.ll   | 21 +++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 48c918051bb1d..1518efd125672 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8084,13 +8084,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
   SDValue Mask = MGT->getMask();
   SDLoc DL(N);
 
+  // Zap gathers with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MGATHER result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
diff --git a/llvm/test/CodeGen/X86/avx2-masked-gather.ll b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
index 3a831a88aa679..c119e12828867 100644
--- a/llvm/test/CodeGen/X86/avx2-masked-gather.ll
+++ b/llvm/test/CodeGen/X86/avx2-masked-gather.ll
@@ -769,3 +769,24 @@ entry:
   ret <2 x double> %res
 }
 
+
+define <2 x double> @masked_gather_zeromask(<2 x double*>* %ptr, <2 x double> %dummy, <2 x double> %passthru) {
+; X86-LABEL: masked_gather_zeromask:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    vmovaps %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: masked_gather_zeromask:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; NOGATHER-LABEL: masked_gather_zeromask:
+; NOGATHER:       # %bb.0: # %entry
+; NOGATHER-NEXT:    vmovaps %xmm1, %xmm0
+; NOGATHER-NEXT:    retq
+entry:
+  %ld  = load <2 x double*>, <2 x double*>* %ptr
+  %res = call <2 x double> @llvm.masked.gather.v2double(<2 x double*> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru)
+  ret <2 x double> %res
+}

From 4955eb7ceb98bc2be9641a99e87556c5918abf02 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 29 May 2019 20:00:36 +0000
Subject: [PATCH 0544/1176] gn build: Make it possible to build with coverage
 information

Differential Revision: https://reviews.llvm.org/D62508

llvm-svn: 362018
---
 llvm/utils/gn/build/BUILD.gn | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn
index 1af1c220a6f94..a20cdb1f5afdb 100644
--- a/llvm/utils/gn/build/BUILD.gn
+++ b/llvm/utils/gn/build/BUILD.gn
@@ -3,6 +3,19 @@ import("//llvm/utils/gn/build/mac_sdk.gni")
 import("//llvm/utils/gn/build/toolchain/compiler.gni")
 import("//llvm/utils/gn/build/toolchain/target_flags.gni")
 
+declare_args() {
+  # Whether to build everything with coverage information.
+  # After building with this, run tests and then run
+  #    llvm/utils/prepare-code-coverage-artifact.py  \
+  #        .../llvm-profdata .../llvm-cov out/gn/profiles/ report/ \
+  #        out/gn/bin/llvm-undname ...`
+  # to generate a HTML report for the binaries passed in the last line.
+  llvm_build_instrumented_coverage = false
+}
+
+assert(!llvm_build_instrumented_coverage || is_clang,
+       "llvm_build_instrumented_coverage requires clang as host compiler")
+
 config("compiler_defaults") {
   defines = []
 
@@ -136,6 +149,21 @@ config("compiler_defaults") {
   if (use_lld && host_os != "win") {
     ldflags += [ "-fuse-ld=lld" ]
   }
+
+  if (llvm_build_instrumented_coverage) {
+    cflags += [
+      "-fcoverage-mapping",
+
+      # Using an absolute path here is lame, but it's used at test execution
+      # time to generate the profiles, and lit doesn't specify a fixed folder
+      # for test execution -- so this is the only way to get all profiles into
+      # a single folder like llvm/utils/prepare-code-coverage-artifact.py
+      # expects.
+      "-fprofile-instr-generate=" +
+          rebase_path("$root_build_dir/profiles/%4m.profraw"),
+    ]
+    ldflags += [ "-fprofile-instr-generate" ]
+  }
 }
 
 config("no_exceptions") {

From 68908c9017d7c07f2a83b5d3428d5d15523a656c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 29 May 2019 20:03:00 +0000
Subject: [PATCH 0545/1176] UpdateTestChecks: Lanai triple support

Summary:
The assembly structure most resembles the SPARC pattern:
```
        .globl  f6                      ! -- Begin function f6
        .p2align        2
        .type   f6,@function
f6:                                     ! @f6
        .cfi_startproc
! %bb.0:
        st      %fp, [--%sp]
<...>
        ld      -8[%fp], %fp
.Lfunc_end0:
        .size   f6, .Lfunc_end0-f6
        .cfi_endproc
                                        ! -- End function
```
Test being affected by upcoming patch, so regenerate it.

Reviewers: RKSimon, jpienaar

Reviewed By: RKSimon

Subscribers: jyknight, fedor.sergeev, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62545

llvm-svn: 362019
---
 llvm/test/CodeGen/Lanai/constant_multiply.ll | 171 ++++++++++++++-----
 llvm/utils/UpdateTestChecks/asm.py           |  18 ++
 2 files changed, 148 insertions(+), 41 deletions(-)

diff --git a/llvm/test/CodeGen/Lanai/constant_multiply.ll b/llvm/test/CodeGen/Lanai/constant_multiply.ll
index 80054dbc0f9da..f176a7143d866 100644
--- a/llvm/test/CodeGen/Lanai/constant_multiply.ll
+++ b/llvm/test/CodeGen/Lanai/constant_multiply.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s | FileCheck %s
 
 ; Test custom lowering for 32-bit integer multiplication.
@@ -5,103 +6,191 @@
 target datalayout = "E-m:e-p:32:32-i64:64-a:0:32-n32-S64"
 target triple = "lanai"
 
-; CHECK-LABEL: f6:
-; CHECK: sh %r6, 0x1, %r{{[0-9]+}}
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @f6(i32 inreg %a) #0 {
+; CHECK-LABEL: f6:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x1, %r3
+; CHECK-NEXT:    sh %r6, 0x3, %r9
+; CHECK-NEXT:    sub %r9, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, 6
   ret i32 %1
 }
 
-; CHECK-LABEL: f7:
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r6, %rv
 define i32 @f7(i32 inreg %a) #0 {
+; CHECK-LABEL: f7:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sub %r3, %r6, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, 7
   ret i32 %1
 }
 
-; CHECK-LABEL: f8:
-; CHECK: sh %r6, 0x3, %rv
 define i32 @f8(i32 inreg %a) #0 {
+; CHECK-LABEL: f8:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = shl nsw i32 %a, 3
   ret i32 %1
 }
 
-; CHECK-LABEL: f9:
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: add %r{{[0-9]+}}, %r6, %rv
 define i32 @f9(i32 inreg %a) #0 {
+; CHECK-LABEL: f9:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    add %r3, %r6, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, 9
   ret i32 %1
 }
 
-; CHECK-LABEL: f10:
-; CHECK: sh %r6, 0x1, %r{{[0-9]+}}
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: add %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @f10(i32 inreg %a) #0 {
+; CHECK-LABEL: f10:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x1, %r3
+; CHECK-NEXT:    sh %r6, 0x3, %r9
+; CHECK-NEXT:    add %r9, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, 10
   ret i32 %1
 }
 
-; CHECK-LABEL: f1280:
-; CHECK: sh %r6, 0x8, %r{{[0-9]+}}
-; CHECK: sh %r6, 0xa, %r{{[0-9]+}}
-; CHECK: add %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @f1280(i32 inreg %a) #0 {
+; CHECK-LABEL: f1280:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x8, %r3
+; CHECK-NEXT:    sh %r6, 0xa, %r9
+; CHECK-NEXT:    add %r9, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, 1280
   ret i32 %1
 }
 
-; CHECK-LABEL: fm6:
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sh %r6, 0x1, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @fm6(i32 inreg %a) #0 {
+; CHECK-LABEL: fm6:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sh %r6, 0x1, %r9
+; CHECK-NEXT:    sub %r9, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, -6
   ret i32 %1
 }
 
-; CHECK-LABEL: fm7:
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sub %r6, %r{{[0-9]+}}, %rv
 define i32 @fm7(i32 inreg %a) #0 {
+; CHECK-LABEL: fm7:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sub %r6, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, -7
   ret i32 %1
 }
 
-; CHECK-LABEL: fm8:
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @fm8(i32 inreg %a) #0 {
+; CHECK-LABEL: fm8:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sub %r0, %r3, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, -8
   ret i32 %1
 }
 
-; CHECK-LABEL: fm9:
-; CHECK: sub	%r0, %r6, %r{{[0-9]+}}
-; CHECK: sh	%r6, 0x3, %r9
-; CHECK: sub	%r{{[0-9]+}}, %r9, %rv
 define i32 @fm9(i32 inreg %a) #0 {
+; CHECK-LABEL: fm9:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sub %r0, %r6, %r3
+; CHECK-NEXT:    sh %r6, 0x3, %r9
+; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, -9
   ret i32 %1
 }
 
-; CHECK-LABEL: fm10:
-; CHECK: sh %r6, 0x1, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: sh %r6, 0x3, %r{{[0-9]+}}
-; CHECK: sub %r{{[0-9]+}}, %r{{[0-9]+}}, %rv
 define i32 @fm10(i32 inreg %a) #0 {
+; CHECK-LABEL: fm10:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    sh %r6, 0x1, %r3
+; CHECK-NEXT:    sub %r0, %r3, %r3
+; CHECK-NEXT:    sh %r6, 0x3, %r9
+; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul nsw i32 %a, -10
   ret i32 %1
 }
 
-; CHECK-LABEL: h1:
-; CHECK: __mulsi3
 define i32 @h1(i32 inreg %a) #0 {
+; CHECK-LABEL: h1:
+; CHECK:       ! %bb.0:
+; CHECK-NEXT:    st %fp, [--%sp]
+; CHECK-NEXT:    add %sp, 0x8, %fp
+; CHECK-NEXT:    sub %sp, 0x8, %sp
+; CHECK-NEXT:    mov 0xaaaa0000, %r3
+; CHECK-NEXT:    add %pc, 0x10, %rca
+; CHECK-NEXT:    st %rca, [--%sp]
+; CHECK-NEXT:    bt __mulsi3
+; CHECK-NEXT:    or %r3, 0xaaab, %r7
+; CHECK-NEXT:    ld -4[%fp], %pc ! return
+; CHECK-NEXT:    add %fp, 0x0, %sp
+; CHECK-NEXT:    ld -8[%fp], %fp
   %1 = mul i32 %a, -1431655765
   ret i32 %1
 }
diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 07ba2644ef4bd..247c301bff4ea 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -69,6 +69,13 @@ class string:
     r'.Lfunc_end[0-9]+:\n',
     flags=(re.M | re.S))
 
+ASM_FUNCTION_LANAI_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*!+[ \t]*@(?P=func)\n'
+    r'(?:[ \t]+.cfi_startproc\n)?'  # drop optional cfi noise
+    r'(?P<body>.*?)\s*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
 ASM_FUNCTION_SPARC_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*!+[ \t]*@(?P=func)\n'
     r'(?P<body>.*?)\s*'
@@ -186,6 +193,16 @@ def scrub_asm_riscv(asm, args):
   asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
+def scrub_asm_lanai(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
 def scrub_asm_sparc(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
@@ -266,6 +283,7 @@ def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, pre
       'powerpc64le': (scrub_asm_powerpc, ASM_FUNCTION_PPC_RE),
       'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
       'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      'lanai': (scrub_asm_lanai, ASM_FUNCTION_LANAI_RE),
       'sparc': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE),
       'sparcv9': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE),
       's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),

From 4b0184b2d38e0c38da2c665257d516b1eb2b6175 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:06:09 +0000
Subject: [PATCH 0546/1176] [analyzer] ConditionBRVisitor: Enhance to write out
 more information

Summary:
Add extra messages to the bug report to inform the user why the analyzer
`Taking true/false branch`.

Reviewers: NoQ, george.karpenkov

Reviewed By: NoQ

Subscribers: gerazo, gsd, dkrupp, whisperity, baloghadamsoftware, xazax.hun,
             eraman, szepet, a.sidorin, mikhail.ramalho, Szelethus,
             donat.nagy, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D53076

llvm-svn: 362020
---
 .../Core/BugReporter/BugReporterVisitors.h    |  17 +-
 .../Core/BugReporterVisitors.cpp              | 163 ++--
 .../NewDelete-path-notes.cpp.plist            |  62 ++
 .../expected-plists/cxx-for-range.cpp.plist   | 186 ++++
 .../Inputs/expected-plists/edges-new.mm.plist | 918 +++++++++++++++++-
 .../expected-plists/inline-plist.c.plist      | 346 ++++---
 .../objc-radar17039661.m.plist                |  62 ++
 .../plist-macros-with-expansion.cpp.plist     |  56 ++
 .../expected-plists/plist-macros.cpp.plist    | 115 ++-
 .../expected-plists/plist-output.m.plist      | 180 ++++
 .../retain-release.m.objc.plist               | 124 +++
 .../retain-release.m.objcpp.plist             | 124 +++
 clang/test/Analysis/NewDelete-path-notes.cpp  |   4 +-
 .../diagnostics/no-store-func-path-notes.c    |  18 +-
 .../diagnostics/no-store-func-path-notes.cpp  |   6 +-
 .../diagnostics/no-store-func-path-notes.m    |   9 +-
 clang/test/Analysis/inline-plist.c            |   3 +-
 .../test/Analysis/osobject-retain-release.cpp |   3 +-
 clang/test/Analysis/use-after-move.cpp        |  13 +-
 clang/test/Analysis/virtualcall.cpp           |   6 +-
 20 files changed, 2158 insertions(+), 257 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
index 1a09714a65b25..6efe6fbfd0e6d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -189,22 +189,23 @@ class ConditionBRVisitor final : public BugReporterVisitor {
                   BugReporterContext &BRC);
 
   std::shared_ptr<PathDiagnosticPiece>
-  VisitTrueTest(const Expr *Cond, bool tookTrue, BugReporterContext &BRC,
-                BugReport &R, const ExplodedNode *N);
+  VisitTrueTest(const Expr *Cond, BugReporterContext &BRC, BugReport &R,
+                const ExplodedNode *N, bool TookTrue);
 
   std::shared_ptr<PathDiagnosticPiece>
-  VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR, const bool tookTrue,
-                BugReporterContext &BRC, BugReport &R, const ExplodedNode *N);
+  VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR,
+                BugReporterContext &BRC, BugReport &R, const ExplodedNode *N,
+                bool TookTrue, bool IsAssuming);
 
   std::shared_ptr<PathDiagnosticPiece>
   VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
-                const bool tookTrue, BugReporterContext &BRC, BugReport &R,
-                const ExplodedNode *N);
+                BugReporterContext &BRC, BugReport &R, const ExplodedNode *N,
+                bool TookTrue, bool IsAssuming);
 
   std::shared_ptr<PathDiagnosticPiece>
   VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr,
-                         const bool tookTrue, BugReporterContext &BRC,
-                         BugReport &R, const ExplodedNode *N);
+                         BugReporterContext &BRC, BugReport &R,
+                         const ExplodedNode *N, bool TookTrue);
 
   bool patternMatch(const Expr *Ex,
                     const Expr *ParentEx,
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index d11ecd95802a3..d6eb31d6c4e56 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -179,6 +179,23 @@ static bool hasVisibleUpdate(const ExplodedNode *LeftNode, SVal LeftVal,
     RLCV->getStore() == RightNode->getState()->getStore();
 }
 
+static Optional<const llvm::APSInt *>
+getConcreteIntegerValue(const Expr *CondVarExpr, const ExplodedNode *N) {
+  ProgramStateRef State = N->getState();
+  const LocationContext *LCtx = N->getLocationContext();
+
+  // The declaration of the value may rely on a pointer so take its l-value.
+  if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(CondVarExpr)) {
+    if (const auto *VD = dyn_cast_or_null<VarDecl>(DRE->getDecl())) {
+      SVal DeclSVal = State->getSVal(State->getLValue(VD, LCtx));
+      if (auto DeclCI = DeclSVal.getAs<nonloc::ConcreteInt>())
+        return &DeclCI->getValue();
+    }
+  }
+
+  return {};
+}
+
 //===----------------------------------------------------------------------===//
 // Definitions for bug reporter visitors.
 //===----------------------------------------------------------------------===//
@@ -1846,30 +1863,36 @@ ConditionBRVisitor::VisitNode(const ExplodedNode *N,
 std::shared_ptr<PathDiagnosticPiece>
 ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N,
                                   BugReporterContext &BRC, BugReport &BR) {
-  ProgramPoint progPoint = N->getLocation();
+  ProgramPoint ProgPoint = N->getLocation();
+  const std::pair<const ProgramPointTag *, const ProgramPointTag *> &Tags =
+      ExprEngine::geteagerlyAssumeBinOpBifurcationTags();
 
   // If an assumption was made on a branch, it should be caught
   // here by looking at the state transition.
-  if (Optional<BlockEdge> BE = progPoint.getAs<BlockEdge>()) {
-    const CFGBlock *srcBlk = BE->getSrc();
-    if (const Stmt *term = srcBlk->getTerminatorStmt())
-      return VisitTerminator(term, N, srcBlk, BE->getDst(), BR, BRC);
+  if (Optional<BlockEdge> BE = ProgPoint.getAs<BlockEdge>()) {
+    const CFGBlock *SrcBlock = BE->getSrc();
+    if (const Stmt *Term = SrcBlock->getTerminatorStmt()) {
+      // If the tag of the previous node is 'Eagerly Assume...' the current
+      // 'BlockEdge' has the same constraint information. We do not want to
+      // report the value as it is just an assumption on the predecessor node
+      // which will be caught in the next VisitNode() iteration as a 'PostStmt'.
+      const ProgramPointTag *PreviousNodeTag =
+          N->getFirstPred()->getLocation().getTag();
+      if (PreviousNodeTag == Tags.first || PreviousNodeTag == Tags.second)
+        return nullptr;
+
+      return VisitTerminator(Term, N, SrcBlock, BE->getDst(), BR, BRC);
+    }
     return nullptr;
   }
 
-  if (Optional<PostStmt> PS = progPoint.getAs<PostStmt>()) {
-    const std::pair<const ProgramPointTag *, const ProgramPointTag *> &tags =
-        ExprEngine::geteagerlyAssumeBinOpBifurcationTags();
-
-    const ProgramPointTag *tag = PS->getTag();
-    if (tag == tags.first)
-      return VisitTrueTest(cast<Expr>(PS->getStmt()), true,
-                           BRC, BR, N);
-    if (tag == tags.second)
-      return VisitTrueTest(cast<Expr>(PS->getStmt()), false,
-                           BRC, BR, N);
+  if (Optional<PostStmt> PS = ProgPoint.getAs<PostStmt>()) {
+    const ProgramPointTag *CurrentNodeTag = PS->getTag();
+    if (CurrentNodeTag != Tags.first && CurrentNodeTag != Tags.second)
+      return nullptr;
 
-    return nullptr;
+    bool TookTrue = CurrentNodeTag == Tags.first;
+    return VisitTrueTest(cast<Expr>(PS->getStmt()), BRC, BR, N, TookTrue);
   }
 
   return nullptr;
@@ -1928,30 +1951,30 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTerminator(
 
   assert(Cond);
   assert(srcBlk->succ_size() == 2);
-  const bool tookTrue = *(srcBlk->succ_begin()) == dstBlk;
-  return VisitTrueTest(Cond, tookTrue, BRC, R, N);
+  const bool TookTrue = *(srcBlk->succ_begin()) == dstBlk;
+  return VisitTrueTest(Cond, BRC, R, N, TookTrue);
 }
 
 std::shared_ptr<PathDiagnosticPiece>
-ConditionBRVisitor::VisitTrueTest(const Expr *Cond, bool tookTrue,
-                                  BugReporterContext &BRC, BugReport &R,
-                                  const ExplodedNode *N) {
+ConditionBRVisitor::VisitTrueTest(const Expr *Cond, BugReporterContext &BRC,
+                                  BugReport &R, const ExplodedNode *N,
+                                  bool TookTrue) {
   ProgramStateRef CurrentState = N->getState();
-  ProgramStateRef PreviousState = N->getFirstPred()->getState();
+  ProgramStateRef PrevState = N->getFirstPred()->getState();
   const LocationContext *LCtx = N->getLocationContext();
 
   // If the constraint information is changed between the current and the
   // previous program state we assuming the newly seen constraint information.
   // If we cannot evaluate the condition (and the constraints are the same)
   // the analyzer has no information about the value and just assuming it.
-  if (BRC.getStateManager().haveEqualConstraints(CurrentState, PreviousState) &&
-      CurrentState->getSVal(Cond, LCtx).isValid())
-    return nullptr;
+  bool IsAssuming =
+      !BRC.getStateManager().haveEqualConstraints(CurrentState, PrevState) ||
+      CurrentState->getSVal(Cond, LCtx).isUnknownOrUndef();
 
   // These will be modified in code below, but we need to preserve the original
   //  values in case we want to throw the generic message.
   const Expr *CondTmp = Cond;
-  bool tookTrueTmp = tookTrue;
+  bool TookTrueTmp = TookTrue;
 
   while (true) {
     CondTmp = CondTmp->IgnoreParenCasts();
@@ -1960,18 +1983,18 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, bool tookTrue,
         break;
       case Stmt::BinaryOperatorClass:
         if (auto P = VisitTrueTest(Cond, cast<BinaryOperator>(CondTmp),
-                                   tookTrueTmp, BRC, R, N))
+                                   BRC, R, N, TookTrueTmp, IsAssuming))
           return P;
         break;
       case Stmt::DeclRefExprClass:
         if (auto P = VisitTrueTest(Cond, cast<DeclRefExpr>(CondTmp),
-                                   tookTrueTmp, BRC, R, N))
+                                   BRC, R, N, TookTrueTmp, IsAssuming))
           return P;
         break;
       case Stmt::UnaryOperatorClass: {
         const auto *UO = cast<UnaryOperator>(CondTmp);
         if (UO->getOpcode() == UO_LNot) {
-          tookTrueTmp = !tookTrueTmp;
+          TookTrueTmp = !TookTrueTmp;
           CondTmp = UO->getSubExpr();
           continue;
         }
@@ -1983,12 +2006,17 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, bool tookTrue,
 
   // Condition too complex to explain? Just say something so that the user
   // knew we've made some path decision at this point.
+  // If it is too complex and we know the evaluation of the condition do not
+  // repeat the note from 'BugReporter.cpp'
+  if (!IsAssuming)
+    return nullptr;
+
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
   if (!Loc.isValid() || !Loc.asLocation().isValid())
     return nullptr;
 
   return std::make_shared<PathDiagnosticEventPiece>(
-      Loc, tookTrue ? GenericTrueMessage : GenericFalseMessage);
+      Loc, TookTrue ? GenericTrueMessage : GenericFalseMessage);
 }
 
 bool ConditionBRVisitor::patternMatch(const Expr *Ex,
@@ -2066,10 +2094,9 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex,
   return false;
 }
 
-std::shared_ptr<PathDiagnosticPiece>
-ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
-                                  const bool tookTrue, BugReporterContext &BRC,
-                                  BugReport &R, const ExplodedNode *N) {
+std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
+    const Expr *Cond, const BinaryOperator *BExpr, BugReporterContext &BRC,
+    BugReport &R, const ExplodedNode *N, bool TookTrue, bool IsAssuming) {
   bool shouldInvert = false;
   Optional<bool> shouldPrune;
 
@@ -2089,8 +2116,8 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
   if (BinaryOperator::isAssignmentOp(Op)) {
     // For assignment operators, all that we care about is that the LHS
     // evaluates to "true" or "false".
-    return VisitConditionVariable(LhsString, BExpr->getLHS(), tookTrue,
-                                  BRC, R, N);
+    return VisitConditionVariable(LhsString, BExpr->getLHS(), BRC, R, N,
+                                  TookTrue);
   }
 
   // For non-assignment operations, we require that we can understand
@@ -2102,7 +2129,8 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
   // Should we invert the strings if the LHS is not a variable name?
   SmallString<256> buf;
   llvm::raw_svector_ostream Out(buf);
-  Out << "Assuming " << (shouldInvert ? RhsString : LhsString) << " is ";
+  Out << (IsAssuming ? "Assuming " : "")
+      << (shouldInvert ? RhsString : LhsString) << " is ";
 
   // Do we need to invert the opcode?
   if (shouldInvert)
@@ -2114,7 +2142,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
       case BO_GE: Op = BO_LE; break;
     }
 
-  if (!tookTrue)
+  if (!TookTrue)
     switch (Op) {
       case BO_EQ: Op = BO_NE; break;
       case BO_NE: Op = BO_EQ; break;
@@ -2141,6 +2169,11 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
   Out << (shouldInvert ? LhsString : RhsString);
   const LocationContext *LCtx = N->getLocationContext();
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
+
+  // If we know the value create a pop-up note.
+  if (!IsAssuming)
+    return std::make_shared<PathDiagnosticPopUpPiece>(Loc, Out.str());
+
   auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Out.str());
   if (shouldPrune.hasValue())
     event->setPrunable(shouldPrune.getValue());
@@ -2148,8 +2181,8 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const BinaryOperator *BExpr,
 }
 
 std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitConditionVariable(
-    StringRef LhsString, const Expr *CondVarExpr, const bool tookTrue,
-    BugReporterContext &BRC, BugReport &report, const ExplodedNode *N) {
+    StringRef LhsString, const Expr *CondVarExpr, BugReporterContext &BRC,
+    BugReport &report, const ExplodedNode *N, bool TookTrue) {
   // FIXME: If there's already a constraint tracker for this variable,
   // we shouldn't emit anything here (c.f. the double note in
   // test/Analysis/inlining/path-notes.c)
@@ -2160,13 +2193,13 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitConditionVariable(
   QualType Ty = CondVarExpr->getType();
 
   if (Ty->isPointerType())
-    Out << (tookTrue ? "not null" : "null");
+    Out << (TookTrue ? "not null" : "null");
   else if (Ty->isObjCObjectPointerType())
-    Out << (tookTrue ? "not nil" : "nil");
+    Out << (TookTrue ? "not nil" : "nil");
   else if (Ty->isBooleanType())
-    Out << (tookTrue ? "true" : "false");
+    Out << (TookTrue ? "true" : "false");
   else if (Ty->isIntegralOrEnumerationType())
-    Out << (tookTrue ? "non-zero" : "zero");
+    Out << (TookTrue ? "non-zero" : "zero");
   else
     return nullptr;
 
@@ -2187,34 +2220,44 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitConditionVariable(
   return event;
 }
 
-std::shared_ptr<PathDiagnosticPiece>
-ConditionBRVisitor::VisitTrueTest(const Expr *Cond, const DeclRefExpr *DR,
-                                  const bool tookTrue, BugReporterContext &BRC,
-                                  BugReport &report, const ExplodedNode *N) {
-  const auto *VD = dyn_cast<VarDecl>(DR->getDecl());
+std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
+    const Expr *Cond, const DeclRefExpr *DRE, BugReporterContext &BRC,
+    BugReport &report, const ExplodedNode *N, bool TookTrue, bool IsAssuming) {
+  const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
   if (!VD)
     return nullptr;
 
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
 
-  Out << "Assuming '" << VD->getDeclName() << "' is ";
+  Out << (IsAssuming ? "Assuming '" : "'") << VD->getDeclName() << "' is ";
 
-  QualType VDTy = VD->getType();
+  QualType Ty = VD->getType();
 
-  if (VDTy->isPointerType())
-    Out << (tookTrue ? "non-null" : "null");
-  else if (VDTy->isObjCObjectPointerType())
-    Out << (tookTrue ? "non-nil" : "nil");
-  else if (VDTy->isScalarType())
-    Out << (tookTrue ? "not equal to 0" : "0");
-  else
+  if (Ty->isPointerType())
+    Out << (TookTrue ? "non-null" : "null");
+  else if (Ty->isObjCObjectPointerType())
+    Out << (TookTrue ? "non-nil" : "nil");
+  else if (Ty->isScalarType()) {
+    Optional<const llvm::APSInt *> IntValue;
+    if (!IsAssuming)
+      IntValue = getConcreteIntegerValue(DRE, N);
+
+    if (IsAssuming || !IntValue.hasValue())
+      Out << (TookTrue ? "not equal to 0" : "0");
+    else
+      Out << *IntValue.getValue();
+  } else
     return nullptr;
 
   const LocationContext *LCtx = N->getLocationContext();
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
-  auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Out.str());
 
+  // If we know the value create a pop-up note.
+  if (!IsAssuming)
+    return std::make_shared<PathDiagnosticPopUpPiece>(Loc, Out.str());
+
+  auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Out.str());
   const ProgramState *state = N->getState().get();
   if (const MemRegion *R = state->getLValue(VD, LCtx).getAsRegion()) {
     if (report.isInteresting(R))
diff --git a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
index d74d9fc7c677a..21204b0d8ae90 100644
--- a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
@@ -90,6 +90,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>9</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>9</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>9</integer>
+      <key>col</key><integer>7</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>9</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>9</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;p&apos; is non-null</string>
+     <key>message</key>
+     <string>&apos;p&apos; is non-null</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>9</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>9</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
index 9afb8055793b8..5be7d0adb9694 100644
--- a/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
@@ -158,6 +158,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>11</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>11</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>11</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;y&apos; is not equal to 2</string>
+     <key>message</key>
+     <string>&apos;y&apos; is not equal to 2</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -420,6 +482,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>11</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>11</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>11</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;y&apos; is equal to 2</string>
+     <key>message</key>
+     <string>&apos;y&apos; is equal to 2</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>11</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -665,6 +789,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>32</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>32</integer>
+           <key>col</key><integer>10</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>32</integer>
+      <key>col</key><integer>7</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>32</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>32</integer>
+         <key>col</key><integer>10</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;fail&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;fail&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>32</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>32</integer>
+           <key>col</key><integer>10</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
index 7592d2a5043d1..4eca510c3f056 100644
--- a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
@@ -2709,6 +2709,34 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>146</integer>
+      <key>col</key><integer>8</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>146</integer>
+         <key>col</key><integer>8</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>146</integer>
+         <key>col</key><integer>13</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -2887,6 +2915,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>146</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>146</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>146</integer>
+      <key>col</key><integer>8</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>146</integer>
+         <key>col</key><integer>8</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>146</integer>
+         <key>col</key><integer>13</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>146</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>146</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -3805,6 +3895,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>178</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>178</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>178</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is equal to 0</string>
+     <key>message</key>
+     <string>&apos;i&apos; is equal to 0</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -3999,6 +4151,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>178</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>178</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>178</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is not equal to 0</string>
+     <key>message</key>
+     <string>&apos;i&apos; is not equal to 0</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>178</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -4033,6 +4247,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>181</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>181</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>181</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>181</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>181</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>181</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>181</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -7807,16 +8083,44 @@
       </array>
     </dict>
     <dict>
-     <key>kind</key><string>control</string>
-     <key>edges</key>
-      <array>
-       <dict>
-        <key>start</key>
-         <array>
-          <dict>
-           <key>line</key><integer>267</integer>
-           <key>col</key><integer>18</integer>
-           <key>file</key><integer>0</integer>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>267</integer>
+      <key>col</key><integer>18</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>267</integer>
+         <key>col</key><integer>18</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>267</integer>
+         <key>col</key><integer>22</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;coin&apos; is 0</string>
+     <key>message</key>
+     <string>&apos;coin&apos; is 0</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>267</integer>
+           <key>col</key><integer>18</integer>
+           <key>file</key><integer>0</integer>
           </dict>
           <dict>
            <key>line</key><integer>267</integer>
@@ -8658,6 +8962,34 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>285</integer>
+      <key>col</key><integer>12</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>285</integer>
+         <key>col</key><integer>12</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>285</integer>
+         <key>col</key><integer>12</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;z&apos; is 0</string>
+     <key>message</key>
+     <string>&apos;z&apos; is 0</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -8997,6 +9329,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>294</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>294</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>294</integer>
+      <key>col</key><integer>7</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>294</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>294</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;y&apos; is null</string>
+     <key>message</key>
+     <string>&apos;y&apos; is null</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>294</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>294</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -11585,6 +11979,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -11784,6 +12240,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 0</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 0</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -12049,6 +12567,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -12514,18 +13094,80 @@
          <array>
           <dict>
            <key>line</key><integer>457</integer>
-           <key>col</key><integer>5</integer>
+           <key>col</key><integer>5</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>5</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
            <key>line</key><integer>457</integer>
-           <key>col</key><integer>6</integer>
+           <key>col</key><integer>9</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -12535,12 +13177,12 @@
          <array>
           <dict>
            <key>line</key><integer>457</integer>
-           <key>col</key><integer>5</integer>
+           <key>col</key><integer>9</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
            <key>line</key><integer>457</integer>
-           <key>col</key><integer>6</integer>
+           <key>col</key><integer>9</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
@@ -13106,6 +13748,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -14587,6 +15291,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -16195,6 +16961,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -18028,6 +18856,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>457</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>457</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;first&apos; is 1</string>
+     <key>message</key>
+     <string>&apos;first&apos; is 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>457</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/inline-plist.c.plist b/clang/test/Analysis/Inputs/expected-plists/inline-plist.c.plist
index b6248158f15a9..1d64cd796ba56 100644
--- a/clang/test/Analysis/Inputs/expected-plists/inline-plist.c.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/inline-plist.c.plist
@@ -518,12 +518,74 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>47</integer>
+           <key>line</key><integer>45</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>45</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>45</integer>
+      <key>col</key><integer>7</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>45</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>45</integer>
+         <key>col</key><integer>12</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;p&apos; is equal to null</string>
+     <key>message</key>
+     <string>&apos;p&apos; is equal to null</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>45</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>45</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>48</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>47</integer>
+           <key>line</key><integer>48</integer>
            <key>col</key><integer>16</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -535,7 +597,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>47</integer>
+      <key>line</key><integer>48</integer>
       <key>col</key><integer>18</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -543,12 +605,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>47</integer>
+         <key>line</key><integer>48</integer>
          <key>col</key><integer>18</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>47</integer>
+         <key>line</key><integer>48</integer>
          <key>col</key><integer>18</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -564,7 +626,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>47</integer>
+      <key>line</key><integer>48</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -572,12 +634,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>47</integer>
+         <key>line</key><integer>48</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>47</integer>
+         <key>line</key><integer>48</integer>
          <key>col</key><integer>19</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -725,7 +787,7 @@
     <integer>38</integer>
     <integer>39</integer>
     <integer>45</integer>
-    <integer>47</integer>
+    <integer>48</integer>
    </array>
   </dict>
   </dict>
@@ -736,7 +798,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>58</integer>
+      <key>line</key><integer>59</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -744,12 +806,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>58</integer>
+         <key>line</key><integer>59</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>58</integer>
+         <key>line</key><integer>59</integer>
          <key>col</key><integer>8</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -769,12 +831,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>58</integer>
+           <key>line</key><integer>59</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>58</integer>
+           <key>line</key><integer>59</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -782,12 +844,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>59</integer>
+           <key>line</key><integer>60</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>59</integer>
+           <key>line</key><integer>60</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -799,7 +861,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>59</integer>
+      <key>line</key><integer>60</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -807,12 +869,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>59</integer>
+         <key>line</key><integer>60</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>61</integer>
+         <key>line</key><integer>62</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -828,7 +890,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>59</integer>
+      <key>line</key><integer>60</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -846,12 +908,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>59</integer>
+           <key>line</key><integer>60</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>59</integer>
+           <key>line</key><integer>60</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -859,12 +921,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -880,12 +942,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -893,12 +955,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>8</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>60</integer>
+           <key>line</key><integer>61</integer>
            <key>col</key><integer>8</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -910,7 +972,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>60</integer>
+      <key>line</key><integer>61</integer>
       <key>col</key><integer>8</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -918,12 +980,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>60</integer>
+         <key>line</key><integer>61</integer>
          <key>col</key><integer>6</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>60</integer>
+         <key>line</key><integer>61</integer>
          <key>col</key><integer>6</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -944,7 +1006,7 @@
    <key>issue_hash_content_of_line_in_context</key><string>a2e7504f29818834127c44ba841f4da8</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>60</integer>
+   <key>line</key><integer>61</integer>
    <key>col</key><integer>8</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -952,10 +1014,10 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>57</integer>
     <integer>58</integer>
     <integer>59</integer>
     <integer>60</integer>
+    <integer>61</integer>
    </array>
   </dict>
   </dict>
@@ -970,12 +1032,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -983,12 +1045,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1000,7 +1062,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>66</integer>
+      <key>line</key><integer>67</integer>
       <key>col</key><integer>12</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1008,12 +1070,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>66</integer>
+         <key>line</key><integer>67</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>69</integer>
+         <key>line</key><integer>70</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1029,7 +1091,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>66</integer>
+      <key>line</key><integer>67</integer>
       <key>col</key><integer>12</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1047,12 +1109,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1060,12 +1122,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>67</integer>
+           <key>line</key><integer>68</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>67</integer>
+           <key>line</key><integer>68</integer>
            <key>col</key><integer>7</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1077,7 +1139,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>67</integer>
+      <key>line</key><integer>68</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1085,12 +1147,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>67</integer>
+         <key>line</key><integer>68</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>67</integer>
+         <key>line</key><integer>68</integer>
          <key>col</key><integer>10</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1110,12 +1172,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>67</integer>
+           <key>line</key><integer>68</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>67</integer>
+           <key>line</key><integer>68</integer>
            <key>col</key><integer>7</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1123,12 +1185,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>68</integer>
+           <key>line</key><integer>69</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>68</integer>
+           <key>line</key><integer>69</integer>
            <key>col</key><integer>10</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1140,7 +1202,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>68</integer>
+      <key>line</key><integer>69</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1148,12 +1210,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>68</integer>
+         <key>line</key><integer>69</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>68</integer>
+         <key>line</key><integer>69</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1169,7 +1231,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>66</integer>
+      <key>line</key><integer>67</integer>
       <key>col</key><integer>12</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1177,12 +1239,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>66</integer>
+         <key>line</key><integer>67</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>69</integer>
+         <key>line</key><integer>70</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1202,12 +1264,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>12</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1215,12 +1277,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1232,7 +1294,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>66</integer>
+      <key>line</key><integer>67</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1240,12 +1302,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>66</integer>
+         <key>line</key><integer>67</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>66</integer>
+         <key>line</key><integer>67</integer>
          <key>col</key><integer>8</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1265,12 +1327,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>66</integer>
+           <key>line</key><integer>67</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1278,12 +1340,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1299,12 +1361,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1312,12 +1374,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>70</integer>
+           <key>line</key><integer>71</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1329,7 +1391,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>70</integer>
+      <key>line</key><integer>71</integer>
       <key>col</key><integer>6</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1337,12 +1399,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>70</integer>
+         <key>line</key><integer>71</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>70</integer>
+         <key>line</key><integer>71</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1366,7 +1428,7 @@
   <key>issue_hash_function_offset</key><string>5</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>70</integer>
+   <key>line</key><integer>71</integer>
    <key>col</key><integer>6</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -1374,11 +1436,11 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>65</integer>
     <integer>66</integer>
     <integer>67</integer>
     <integer>68</integer>
-    <integer>70</integer>
+    <integer>69</integer>
+    <integer>71</integer>
    </array>
   </dict>
   </dict>
@@ -1393,12 +1455,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>74</integer>
+           <key>line</key><integer>75</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>74</integer>
+           <key>line</key><integer>75</integer>
            <key>col</key><integer>9</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1406,12 +1468,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1423,7 +1485,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>75</integer>
+      <key>line</key><integer>76</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1431,12 +1493,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>75</integer>
+         <key>line</key><integer>76</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>77</integer>
+         <key>line</key><integer>78</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1452,7 +1514,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>75</integer>
+      <key>line</key><integer>76</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1470,12 +1532,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1483,12 +1545,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>76</integer>
+           <key>line</key><integer>77</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>76</integer>
+           <key>line</key><integer>77</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1500,7 +1562,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>76</integer>
+      <key>line</key><integer>77</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1508,12 +1570,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>76</integer>
+         <key>line</key><integer>77</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>76</integer>
+         <key>line</key><integer>77</integer>
          <key>col</key><integer>9</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1529,7 +1591,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>75</integer>
+      <key>line</key><integer>76</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1537,12 +1599,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>75</integer>
+         <key>line</key><integer>76</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>77</integer>
+         <key>line</key><integer>78</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1562,12 +1624,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>75</integer>
+           <key>line</key><integer>76</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1575,12 +1637,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1596,12 +1658,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1609,12 +1671,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>78</integer>
+           <key>line</key><integer>79</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1626,7 +1688,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>78</integer>
+      <key>line</key><integer>79</integer>
       <key>col</key><integer>6</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1634,12 +1696,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>78</integer>
+         <key>line</key><integer>79</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>78</integer>
+         <key>line</key><integer>79</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1663,7 +1725,7 @@
   <key>issue_hash_function_offset</key><string>5</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>78</integer>
+   <key>line</key><integer>79</integer>
    <key>col</key><integer>6</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -1671,11 +1733,11 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>73</integer>
     <integer>74</integer>
     <integer>75</integer>
     <integer>76</integer>
-    <integer>78</integer>
+    <integer>77</integer>
+    <integer>79</integer>
    </array>
   </dict>
   </dict>
@@ -1690,12 +1752,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>82</integer>
+           <key>line</key><integer>83</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>82</integer>
+           <key>line</key><integer>83</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1703,12 +1765,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1720,7 +1782,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>83</integer>
+      <key>line</key><integer>84</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1728,12 +1790,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>84</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>85</integer>
+         <key>line</key><integer>86</integer>
          <key>col</key><integer>7</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1749,7 +1811,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>83</integer>
+      <key>line</key><integer>84</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1767,12 +1829,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1780,12 +1842,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>84</integer>
+           <key>line</key><integer>85</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>84</integer>
+           <key>line</key><integer>85</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1797,7 +1859,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>84</integer>
+      <key>line</key><integer>85</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1805,12 +1867,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>84</integer>
+         <key>line</key><integer>85</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>84</integer>
+         <key>line</key><integer>85</integer>
          <key>col</key><integer>10</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1826,7 +1888,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>83</integer>
+      <key>line</key><integer>84</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1834,12 +1896,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>84</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>85</integer>
+         <key>line</key><integer>86</integer>
          <key>col</key><integer>7</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1859,12 +1921,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>83</integer>
+           <key>line</key><integer>84</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1872,12 +1934,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1893,12 +1955,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1906,12 +1968,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>86</integer>
+           <key>line</key><integer>87</integer>
            <key>col</key><integer>6</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -1923,7 +1985,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>86</integer>
+      <key>line</key><integer>87</integer>
       <key>col</key><integer>6</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1931,12 +1993,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>86</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>86</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>4</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1960,7 +2022,7 @@
   <key>issue_hash_function_offset</key><string>5</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>86</integer>
+   <key>line</key><integer>87</integer>
    <key>col</key><integer>6</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -1968,12 +2030,12 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>81</integer>
     <integer>82</integer>
     <integer>83</integer>
     <integer>84</integer>
     <integer>85</integer>
     <integer>86</integer>
+    <integer>87</integer>
    </array>
   </dict>
   </dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist b/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist
index a5735a97c496b..926f827426499 100644
--- a/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/objc-radar17039661.m.plist
@@ -802,6 +802,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>38</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>38</integer>
+           <key>col</key><integer>20</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>38</integer>
+      <key>col</key><integer>11</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>38</integer>
+         <key>col</key><integer>11</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>38</integer>
+         <key>col</key><integer>37</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;traitValue&apos; is equal to &apos;newTraitValue&apos;</string>
+     <key>message</key>
+     <string>&apos;traitValue&apos; is equal to &apos;newTraitValue&apos;</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>38</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>38</integer>
+           <key>col</key><integer>20</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist
index 3a1ad5b778b0c..259ce77aba725 100644
--- a/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/plist-macros-with-expansion.cpp.plist
@@ -5206,6 +5206,62 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>418</integer>
+      <key>col</key><integer>3</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>418</integer>
+         <key>col</key><integer>3</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>418</integer>
+         <key>col</key><integer>27</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;A&apos; is &gt;= 0</string>
+     <key>message</key>
+     <string>&apos;A&apos; is &gt;= 0</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>418</integer>
+      <key>col</key><integer>3</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>418</integer>
+         <key>col</key><integer>3</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>418</integer>
+         <key>col</key><integer>27</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;B&apos; is &gt;= 0</string>
+     <key>message</key>
+     <string>&apos;B&apos; is &gt;= 0</string>
+    </dict>
     <dict>
      <key>kind</key><string>event</string>
      <key>location</key>
diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-macros.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/plist-macros.cpp.plist
index 996ab0a7333ec..2cc1604ca88e3 100644
--- a/clang/test/Analysis/Inputs/expected-plists/plist-macros.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/plist-macros.cpp.plist
@@ -1364,6 +1364,34 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>69</integer>
+      <key>col</key><integer>3</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>69</integer>
+         <key>col</key><integer>3</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>69</integer>
+         <key>col</key><integer>16</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;p&apos; is null</string>
+     <key>message</key>
+     <string>&apos;p&apos; is null</string>
+    </dict>
     <dict>
      <key>kind</key><string>event</string>
      <key>location</key>
@@ -1525,7 +1553,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>83</integer>
+      <key>line</key><integer>87</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1533,12 +1561,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1546,15 +1574,15 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Passing null pointer value via 1st parameter &apos;a&apos;</string>
+     <string>Passing value via 1st parameter &apos;a&apos;</string>
      <key>message</key>
-     <string>Passing null pointer value via 1st parameter &apos;a&apos;</string>
+     <string>Passing value via 1st parameter &apos;a&apos;</string>
     </dict>
     <dict>
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>83</integer>
+      <key>line</key><integer>87</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -1562,12 +1590,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>83</integer>
+         <key>line</key><integer>87</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -1589,9 +1617,9 @@
      </dict>
      <key>depth</key><integer>1</integer>
      <key>extended_message</key>
-     <string>Entered call from &apos;test1&apos;</string>
+     <string>Entered call from &apos;test2&apos;</string>
      <key>message</key>
-     <string>Entered call from &apos;test1&apos;</string>
+     <string>Entered call from &apos;test2&apos;</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
@@ -1645,6 +1673,69 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>77</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>77</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>event</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>77</integer>
+      <key>col</key><integer>7</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>77</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>77</integer>
+         <key>col</key><integer>7</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>depth</key><integer>1</integer>
+     <key>extended_message</key>
+     <string>Assuming &apos;a&apos; is null</string>
+     <key>message</key>
+     <string>Assuming &apos;a&apos; is null</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>77</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>77</integer>
+           <key>col</key><integer>7</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -1747,8 +1838,8 @@
     <integer>76</integer>
     <integer>77</integer>
     <integer>79</integer>
-    <integer>82</integer>
-    <integer>83</integer>
+    <integer>86</integer>
+    <integer>87</integer>
    </array>
   </dict>
   </dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist b/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist
index fb07a574b0e7c..5b1de9121f35c 100644
--- a/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/plist-output.m.plist
@@ -2495,6 +2495,34 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>96</integer>
+      <key>col</key><integer>8</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>96</integer>
+         <key>col</key><integer>8</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>96</integer>
+         <key>col</key><integer>13</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -2673,6 +2701,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>96</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>96</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>96</integer>
+      <key>col</key><integer>8</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>96</integer>
+         <key>col</key><integer>8</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>96</integer>
+         <key>col</key><integer>13</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>96</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>96</integer>
+           <key>col</key><integer>8</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -3446,6 +3536,34 @@
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>127</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>127</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>127</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is not equal to 1</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -3624,6 +3742,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>127</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>127</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>127</integer>
+      <key>col</key><integer>9</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>127</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>127</integer>
+         <key>col</key><integer>14</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+     <key>message</key>
+     <string>&apos;i&apos; is equal to 1</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>127</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>127</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist
index b9389f3ce7136..3b3bce10844ec 100644
--- a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objc.plist
@@ -9568,6 +9568,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>745</integer>
+      <key>col</key><integer>6</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>6</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;name&apos; is non-nil</string>
+     <key>message</key>
+     <string>&apos;name&apos; is non-nil</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -10095,6 +10157,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>745</integer>
+      <key>col</key><integer>6</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>6</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;name&apos; is non-nil</string>
+     <key>message</key>
+     <string>&apos;name&apos; is non-nil</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist
index 688fc27953af8..539b33e7c63e7 100644
--- a/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/retain-release.m.objcpp.plist
@@ -9568,6 +9568,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>745</integer>
+      <key>col</key><integer>6</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>6</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;name&apos; is non-nil</string>
+     <key>message</key>
+     <string>&apos;name&apos; is non-nil</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
@@ -10095,6 +10157,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>745</integer>
+      <key>col</key><integer>6</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>6</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>745</integer>
+         <key>col</key><integer>9</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>&apos;name&apos; is non-nil</string>
+     <key>message</key>
+     <string>&apos;name&apos; is non-nil</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>6</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>745</integer>
+           <key>col</key><integer>9</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/NewDelete-path-notes.cpp b/clang/test/Analysis/NewDelete-path-notes.cpp
index 2195b9d42a4b9..70fd973facfcb 100644
--- a/clang/test/Analysis/NewDelete-path-notes.cpp
+++ b/clang/test/Analysis/NewDelete-path-notes.cpp
@@ -6,8 +6,8 @@
 void test() {
   int *p = new int;
   // expected-note@-1 {{Memory is allocated}}
-  if (p)
-    // expected-note@-1 {{Taking true branch}}
+  if (p) // expected-note {{'p' is non-null}}
+         // expected-note@-1 {{Taking true branch}}
     delete p;
     // expected-note@-1 {{Memory is released}}
 
diff --git a/clang/test/Analysis/diagnostics/no-store-func-path-notes.c b/clang/test/Analysis/diagnostics/no-store-func-path-notes.c
index c0208214cc3ca..fd0a90e85e291 100644
--- a/clang/test/Analysis/diagnostics/no-store-func-path-notes.c
+++ b/clang/test/Analysis/diagnostics/no-store-func-path-notes.c
@@ -5,7 +5,8 @@ typedef __typeof(sizeof(int)) size_t;
 void *memset(void *__s, int __c, size_t __n);
 
 int initializer1(int *p, int x) {
-  if (x) { // expected-note{{Taking false branch}}
+  if (x) { // expected-note{{'x' is 0}}
+           // expected-note@-1{{Taking false branch}}
     *p = 1;
     return 0;
   } else {
@@ -30,7 +31,8 @@ int param_initialized_properly() {
 static int global;
 
 int initializer2(int **p, int x) {
-  if (x) { // expected-note{{Taking false branch}}
+  if (x) { // expected-note{{'x' is 0}}
+           // expected-note@-1{{Taking false branch}}
     *p = &global;
     return 0;
   } else {
@@ -47,7 +49,8 @@ int param_not_written_into_by_func() {
 }
 
 void initializer3(int *p, int param) {
-  if (param) // expected-note{{Taking false branch}}
+  if (param) // expected-note{{'param' is 0}}
+             // expected-note@-1{{Taking false branch}}
     *p = 0;
 } // expected-note{{Returning without writing to '*p'}}
 
@@ -60,12 +63,14 @@ int param_written_into_by_void_func() {
 }
 
 void initializer4(int *p, int param) {
-  if (param) // expected-note{{Taking false branch}}
+  if (param) // expected-note{{'param' is 0}}
+             // expected-note@-1{{Taking false branch}}
     *p = 0;
 } // expected-note{{Returning without writing to '*p'}}
 
 void initializer5(int *p, int param) {
-  if (!param) // expected-note{{Taking false branch}}
+  if (!param) // expected-note{{'param' is 1}}
+              // expected-note@-1{{Taking false branch}}
     *p = 0;
 } // expected-note{{Returning without writing to '*p'}}
 
@@ -95,7 +100,8 @@ typedef struct {
 } S;
 
 int initializer7(S *s, int param) {
-  if (param) { // expected-note{{Taking false branch}}
+  if (param) { // expected-note{{'param' is 0}}
+               // expected-note@-1{{Taking false branch}}
     s->x = 0;
     return 0;
   }
diff --git a/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp b/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
index 587c08fae13d5..39b7dd9e49265 100644
--- a/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
+++ b/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
@@ -1,7 +1,8 @@
 // RUN: %clang_analyze_cc1 -x c++ -std=c++14 -analyzer-checker=core -analyzer-output=text -verify %s
 
 int initializer1(int &p, int x) {
-  if (x) { // expected-note{{Taking false branch}}
+  if (x) { // expected-note{{'x' is 0}}
+           // expected-note@-1{{Taking false branch}}
     p = 1;
     return 0;
   } else {
@@ -19,7 +20,8 @@ int param_not_initialized_by_func() {
 
 struct S {
   int initialize(int *p, int param) {
-    if (param) { //expected-note{{Taking false branch}}
+    if (param) { // expected-note{{'param' is 0}}
+                 // expected-note@-1{{Taking false branch}}
       *p = 1;
       return 1;
     }
diff --git a/clang/test/Analysis/diagnostics/no-store-func-path-notes.m b/clang/test/Analysis/diagnostics/no-store-func-path-notes.m
index c0aa5144e763a..0136389efe617 100644
--- a/clang/test/Analysis/diagnostics/no-store-func-path-notes.m
+++ b/clang/test/Analysis/diagnostics/no-store-func-path-notes.m
@@ -10,7 +10,8 @@ - (int)initVar:(int *)var param:(int)param;
 
 @implementation I
 - (int)initVar:(int *)var param:(int)param {
-  if (param) { // expected-note{{Taking false branch}}
+  if (param) { // expected-note{{'param' is 0}}
+               // expected-note@-1{{Taking false branch}}
     *var = 1;
     return 0;
   }
@@ -22,14 +23,16 @@ int foo(I *i) {
   int x;                            //expected-note{{'x' declared without an initial value}}
   int out = [i initVar:&x param:0]; //expected-note{{Calling 'initVar:param:'}}
                                     //expected-note@-1{{Returning from 'initVar:param:'}}
-  if (out)                          // expected-note{{Taking true branch}}
+  if (out)                          //expected-note{{'out' is 1}}
+                                    //expected-note@-1{{Taking true branch}}
     return x;                       //expected-warning{{Undefined or garbage value returned to caller}}
                                     //expected-note@-1{{Undefined or garbage value returned to caller}}
   return 0;
 }
 
 int initializer1(int *p, int x) {
-  if (x) { // expected-note{{Taking false branch}}
+  if (x) { // expected-note{{'x' is 0}}
+           // expected-note@-1{{Taking false branch}}
     *p = 1;
     return 0;
   } else {
diff --git a/clang/test/Analysis/inline-plist.c b/clang/test/Analysis/inline-plist.c
index b3fb554da7044..dbcca5c2f7999 100644
--- a/clang/test/Analysis/inline-plist.c
+++ b/clang/test/Analysis/inline-plist.c
@@ -43,7 +43,8 @@ void bar(int *p) {
   }
 
   if (p == 0) {
-    // expected-note@-1 {{Taking true branch}}
+    // expected-note@-1 {{'p' is equal to null}}
+    // expected-note@-2 {{Taking true branch}}
     triggers_bug(p);
     // expected-note@-1 {{Passing null pointer value via 1st parameter 'p'}}
     // expected-note@-2 {{Calling 'triggers_bug'}}
diff --git a/clang/test/Analysis/osobject-retain-release.cpp b/clang/test/Analysis/osobject-retain-release.cpp
index 98b3e95abfd71..9d7cd854422eb 100644
--- a/clang/test/Analysis/osobject-retain-release.cpp
+++ b/clang/test/Analysis/osobject-retain-release.cpp
@@ -498,7 +498,8 @@ unsigned int check_dynamic_cast_no_null_on_orig(OSObject *obj) {
 void check_dynamic_cast_null_branch(OSObject *obj) {
   OSArray *arr1 = OSArray::withCapacity(10); // expected-note{{Call to method 'OSArray::withCapacity' returns an OSObject}}
   OSArray *arr = OSDynamicCast(OSArray, obj); // expected-note{{Assuming dynamic cast returns null due to type mismatch}}
-  if (!arr) // expected-note{{Taking true branch}}
+  if (!arr) // expected-note{{'arr' is null}}
+            // expected-note@-1{{Taking true branch}}
     return; // expected-warning{{Potential leak of an object stored into 'arr1'}}
             // expected-note@-1{{Object leaked}}
   arr1->release();
diff --git a/clang/test/Analysis/use-after-move.cpp b/clang/test/Analysis/use-after-move.cpp
index ac4222bc776e3..e58301df8b512 100644
--- a/clang/test/Analysis/use-after-move.cpp
+++ b/clang/test/Analysis/use-after-move.cpp
@@ -242,10 +242,12 @@ void reinitializationTest(int i) {
   }
   {
     A a;
-    if (i == 1) { // peaceful-note 2 {{Taking false branch}}
+    if (i == 1) { // peaceful-note 2 {{'i' is not equal to 1}}
+                  // peaceful-note@-1 2 {{Taking false branch}}
       std::move(a);
     }
-    if (i == 2) { // peaceful-note 2 {{Taking false branch}}
+    if (i == 2) { // peaceful-note 2 {{'i' is not equal to 2}}
+                  // peaceful-note@-1 2 {{Taking false branch}}
       a = A();
       a.foo();
     }
@@ -276,7 +278,8 @@ void reinitializationTest(int i) {
                   // peaceful-note@-1 {{Taking false branch}}
       a = A();
     }
-    if (i > 5) { // peaceful-note {{Taking true branch}}
+    if (i > 5) { // peaceful-note {{'i' is > 5}}
+                 // peaceful-note@-1 {{Taking true branch}}
       a.foo(); // peaceful-warning {{Method called on moved-from object 'a'}}
                // peaceful-note@-1 {{Method called on moved-from object 'a'}}
     }
@@ -559,7 +562,9 @@ void differentBranchesTest(int i) {
   // Same thing, but with a ternary operator.
   {
     A a, b;
-    i > 0 ? (void)(b = std::move(a)) : a.bar(); // no-warning // peaceful-note {{'?' condition is true}}
+    i > 0 ? (void)(b = std::move(a)) : a.bar(); // no-warning
+    // peaceful-note@-1 {{'i' is > 0}}
+    // peaceful-note@-2 {{'?' condition is true}}
   }
   // A variation on the theme above.
   {
diff --git a/clang/test/Analysis/virtualcall.cpp b/clang/test/Analysis/virtualcall.cpp
index 1929abf6f0aa6..5847110c093eb 100644
--- a/clang/test/Analysis/virtualcall.cpp
+++ b/clang/test/Analysis/virtualcall.cpp
@@ -164,8 +164,10 @@ class X {
   X(int i) {
     if (i > 0) {
 #if !PUREONLY
-	// expected-note-re@-2 {{{{^}}Taking true branch}}
-	// expected-note-re@-3 {{{{^}}Taking false branch}}
+	// expected-note-re@-2 {{{{^}}'i' is > 0}}
+	// expected-note-re@-3 {{{{^}}Taking true branch}}
+	// expected-note-re@-4 {{{{^}}'i' is <= 0}}
+	// expected-note-re@-5 {{{{^}}Taking false branch}}
 #endif
       X x(i - 1);
 #if !PUREONLY

From d80024c6875972f472e12dff231da4dabedba454 Mon Sep 17 00:00:00 2001
From: Pete Couperus <petecoup@synopsys.com>
Date: Wed, 29 May 2019 20:07:35 +0000
Subject: [PATCH 0547/1176] [ARC] Cleanup ARCAsmPrinter.

Summary:
Remove unused getTargetStreamer.
Remove unused headers.

Reviewers: dantrushin

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62549

llvm-svn: 362021
---
 llvm/lib/Target/ARC/ARCAsmPrinter.cpp | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index d72a5003678ff..5c3e2c9e773cc 100644
--- a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -12,29 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARC.h"
-#include "ARCInstrInfo.h"
 #include "ARCMCInstLower.h"
 #include "ARCSubtarget.h"
 #include "ARCTargetMachine.h"
-#include "ARCTargetStreamer.h"
 #include "MCTargetDesc/ARCInstPrinter.h"
 #include "TargetInfo/ARCTargetInfo.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include <algorithm>
 
 using namespace llvm;
 
@@ -44,7 +33,6 @@ namespace {
 
 class ARCAsmPrinter : public AsmPrinter {
   ARCMCInstLower MCInstLowering;
-  ARCTargetStreamer &getTargetStreamer();
 
 public:
   explicit ARCAsmPrinter(TargetMachine &TM,
@@ -58,10 +46,6 @@ class ARCAsmPrinter : public AsmPrinter {
 
 } // end anonymous namespace
 
-ARCTargetStreamer &ARCAsmPrinter::getTargetStreamer() {
-  return static_cast<ARCTargetStreamer &>(*OutStreamer->getTargetStreamer());
-}
-
 void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);

From 95dec50a35dadafaee5742af47428b7f572cff8e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 29 May 2019 20:11:53 +0000
Subject: [PATCH 0548/1176] [LoopIdiomRecognize][NFC] Use DEBUG_TYPE, add
 LLVM_DEBUG() to runOnNoncountableLoop()

Split off from D61144

llvm-svn: 362022
---
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 151fe092eb1b4..9f98d03e9c152 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -312,9 +312,10 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
   SmallVector<BasicBlock *, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
-  LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
                     << CurLoop->getHeader()->getParent()->getName()
-                    << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+                    << "] Countable Loop %" << CurLoop->getHeader()->getName()
+                    << "\n");
 
   bool MadeChange = false;
 
@@ -1107,6 +1108,11 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
 }
 
 bool LoopIdiomRecognize::runOnNoncountableLoop() {
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
+                    << CurLoop->getHeader()->getParent()->getName()
+                    << "] Noncountable Loop %"
+                    << CurLoop->getHeader()->getName() << "\n");
+
   return recognizePopcount() || recognizeAndInsertFFS();
 }
 

From 8193ea60f6a01319c1bae26ec186c2676f2ba5f8 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:13:29 +0000
Subject: [PATCH 0549/1176] [analyzer] ConditionBRVisitor: Test
 'add-pop-up-notes=false'

Summary: -

Reviewers: NoQ, alexfh

Reviewed By: alexfh

Subscribers: cfe-commits, xazax.hun, baloghadamsoftware, szepet, a.sidorin,
             mikhail.ramalho, Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61060

llvm-svn: 362023
---
 .../NewDelete-path-notes.cpp.plist            | 224 ++++++++----------
 clang/test/Analysis/NewDelete-path-notes.cpp  |  22 +-
 2 files changed, 114 insertions(+), 132 deletions(-)

diff --git a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
index 21204b0d8ae90..fb7a03d928ae6 100644
--- a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
@@ -13,30 +13,30 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>7</integer>
-      <key>col</key><integer>12</integer>
+      <key>line</key><integer>24</integer>
+      <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
      <key>ranges</key>
      <array>
        <array>
         <dict>
-         <key>line</key><integer>7</integer>
-         <key>col</key><integer>12</integer>
+         <key>line</key><integer>24</integer>
+         <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>7</integer>
-         <key>col</key><integer>18</integer>
+         <key>line</key><integer>24</integer>
+         <key>col</key><integer>10</integer>
          <key>file</key><integer>0</integer>
         </dict>
        </array>
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Memory is allocated</string>
+     <string>Attempt to free released memory</string>
      <key>message</key>
-     <string>Memory is allocated</string>
+     <string>Attempt to free released memory</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
@@ -46,32 +46,61 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>7</integer>
-           <key>col</key><integer>3</integer>
+           <key>line</key><integer>17</integer>
+           <key>col</key><integer>1</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>7</integer>
-           <key>col</key><integer>5</integer>
+           <key>line</key><integer>17</integer>
+           <key>col</key><integer>4</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>9</integer>
+           <key>line</key><integer>18</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>4</integer>
+           <key>line</key><integer>18</integer>
+           <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
        </dict>
       </array>
     </dict>
+    <dict>
+     <key>kind</key><string>event</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>18</integer>
+      <key>col</key><integer>12</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>18</integer>
+         <key>col</key><integer>12</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>18</integer>
+         <key>col</key><integer>18</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>depth</key><integer>0</integer>
+     <key>extended_message</key>
+     <string>Memory is allocated</string>
+     <key>message</key>
+     <string>Memory is allocated</string>
+    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -80,60 +109,32 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>9</integer>
+           <key>line</key><integer>18</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>4</integer>
+           <key>line</key><integer>18</integer>
+           <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>7</integer>
+           <key>line</key><integer>20</integer>
+           <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>7</integer>
+           <key>line</key><integer>20</integer>
+           <key>col</key><integer>4</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
        </dict>
       </array>
     </dict>
-    <dict>
-     <key>kind</key><string>pop-up</string>
-     <key>location</key>
-     <dict>
-      <key>line</key><integer>9</integer>
-      <key>col</key><integer>7</integer>
-      <key>file</key><integer>0</integer>
-     </dict>
-     <key>ranges</key>
-     <array>
-       <array>
-        <dict>
-         <key>line</key><integer>9</integer>
-         <key>col</key><integer>7</integer>
-         <key>file</key><integer>0</integer>
-        </dict>
-        <dict>
-         <key>line</key><integer>9</integer>
-         <key>col</key><integer>7</integer>
-         <key>file</key><integer>0</integer>
-        </dict>
-       </array>
-     </array>
-     <key>extended_message</key>
-     <string>&apos;p&apos; is non-null</string>
-     <key>message</key>
-     <string>&apos;p&apos; is non-null</string>
-    </dict>
     <dict>
      <key>kind</key><string>control</string>
      <key>edges</key>
@@ -142,25 +143,25 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>7</integer>
+           <key>line</key><integer>20</integer>
+           <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>9</integer>
-           <key>col</key><integer>7</integer>
+           <key>line</key><integer>20</integer>
+           <key>col</key><integer>4</integer>
            <key>file</key><integer>0</integer>
           </dict>
          </array>
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>11</integer>
+           <key>line</key><integer>21</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>11</integer>
+           <key>line</key><integer>21</integer>
            <key>col</key><integer>10</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -172,7 +173,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>11</integer>
+      <key>line</key><integer>21</integer>
       <key>col</key><integer>5</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -180,12 +181,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>11</integer>
+         <key>line</key><integer>21</integer>
          <key>col</key><integer>5</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>11</integer>
+         <key>line</key><integer>21</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -205,12 +206,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>11</integer>
+           <key>line</key><integer>21</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>11</integer>
+           <key>line</key><integer>21</integer>
            <key>col</key><integer>10</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -218,12 +219,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>14</integer>
+           <key>line</key><integer>24</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>14</integer>
+           <key>line</key><integer>24</integer>
            <key>col</key><integer>8</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -231,35 +232,6 @@
        </dict>
       </array>
     </dict>
-    <dict>
-     <key>kind</key><string>event</string>
-     <key>location</key>
-     <dict>
-      <key>line</key><integer>14</integer>
-      <key>col</key><integer>3</integer>
-      <key>file</key><integer>0</integer>
-     </dict>
-     <key>ranges</key>
-     <array>
-       <array>
-        <dict>
-         <key>line</key><integer>14</integer>
-         <key>col</key><integer>3</integer>
-         <key>file</key><integer>0</integer>
-        </dict>
-        <dict>
-         <key>line</key><integer>14</integer>
-         <key>col</key><integer>10</integer>
-         <key>file</key><integer>0</integer>
-        </dict>
-       </array>
-     </array>
-     <key>depth</key><integer>0</integer>
-     <key>extended_message</key>
-     <string>Attempt to free released memory</string>
-     <key>message</key>
-     <string>Attempt to free released memory</string>
-    </dict>
    </array>
    <key>description</key><string>Attempt to free released memory</string>
    <key>category</key><string>Memory error</string>
@@ -269,10 +241,10 @@
    <key>issue_hash_content_of_line_in_context</key><string>bd8e324d09c70b9e2be6f824a4942e5a</string>
   <key>issue_context_kind</key><string>function</string>
   <key>issue_context</key><string>test</string>
-  <key>issue_hash_function_offset</key><string>8</string>
+  <key>issue_hash_function_offset</key><string>7</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>14</integer>
+   <key>line</key><integer>24</integer>
    <key>col</key><integer>3</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -280,11 +252,11 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>6</integer>
-    <integer>7</integer>
-    <integer>9</integer>
-    <integer>11</integer>
-    <integer>14</integer>
+    <integer>17</integer>
+    <integer>18</integer>
+    <integer>20</integer>
+    <integer>21</integer>
+    <integer>24</integer>
    </array>
   </dict>
   </dict>
@@ -295,7 +267,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>25</integer>
+      <key>line</key><integer>35</integer>
       <key>col</key><integer>2</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -303,12 +275,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>25</integer>
+         <key>line</key><integer>35</integer>
          <key>col</key><integer>2</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>25</integer>
+         <key>line</key><integer>35</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -324,7 +296,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>19</integer>
+      <key>line</key><integer>29</integer>
       <key>col</key><integer>2</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -342,12 +314,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>19</integer>
+           <key>line</key><integer>29</integer>
            <key>col</key><integer>2</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>19</integer>
+           <key>line</key><integer>29</integer>
            <key>col</key><integer>5</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -355,12 +327,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>20</integer>
+           <key>line</key><integer>30</integer>
            <key>col</key><integer>3</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>20</integer>
+           <key>line</key><integer>30</integer>
            <key>col</key><integer>8</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -372,7 +344,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>20</integer>
+      <key>line</key><integer>30</integer>
       <key>col</key><integer>3</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -380,12 +352,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>20</integer>
+         <key>line</key><integer>30</integer>
          <key>col</key><integer>3</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>20</integer>
+         <key>line</key><integer>30</integer>
          <key>col</key><integer>13</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -401,7 +373,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>25</integer>
+      <key>line</key><integer>35</integer>
       <key>col</key><integer>2</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -409,12 +381,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>25</integer>
+         <key>line</key><integer>35</integer>
          <key>col</key><integer>2</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>25</integer>
+         <key>line</key><integer>35</integer>
          <key>col</key><integer>12</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -434,12 +406,12 @@
         <key>start</key>
          <array>
           <dict>
-           <key>line</key><integer>25</integer>
+           <key>line</key><integer>35</integer>
            <key>col</key><integer>2</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>25</integer>
+           <key>line</key><integer>35</integer>
            <key>col</key><integer>4</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -447,12 +419,12 @@
         <key>end</key>
          <array>
           <dict>
-           <key>line</key><integer>27</integer>
+           <key>line</key><integer>37</integer>
            <key>col</key><integer>2</integer>
            <key>file</key><integer>0</integer>
           </dict>
           <dict>
-           <key>line</key><integer>27</integer>
+           <key>line</key><integer>37</integer>
            <key>col</key><integer>7</integer>
            <key>file</key><integer>0</integer>
           </dict>
@@ -464,7 +436,7 @@
      <key>kind</key><string>event</string>
      <key>location</key>
      <dict>
-      <key>line</key><integer>27</integer>
+      <key>line</key><integer>37</integer>
       <key>col</key><integer>2</integer>
       <key>file</key><integer>0</integer>
      </dict>
@@ -472,12 +444,12 @@
      <array>
        <array>
         <dict>
-         <key>line</key><integer>27</integer>
+         <key>line</key><integer>37</integer>
          <key>col</key><integer>2</integer>
          <key>file</key><integer>0</integer>
         </dict>
         <dict>
-         <key>line</key><integer>27</integer>
+         <key>line</key><integer>37</integer>
          <key>col</key><integer>11</integer>
          <key>file</key><integer>0</integer>
         </dict>
@@ -501,7 +473,7 @@
   <key>issue_hash_function_offset</key><string>3</string>
   <key>location</key>
   <dict>
-   <key>line</key><integer>27</integer>
+   <key>line</key><integer>37</integer>
    <key>col</key><integer>2</integer>
    <key>file</key><integer>0</integer>
   </dict>
@@ -509,11 +481,11 @@
   <dict>
    <key>0</key>
    <array>
-    <integer>19</integer>
-    <integer>20</integer>
-    <integer>24</integer>
-    <integer>25</integer>
-    <integer>27</integer>
+    <integer>29</integer>
+    <integer>30</integer>
+    <integer>34</integer>
+    <integer>35</integer>
+    <integer>37</integer>
    </array>
   </dict>
   </dict>
diff --git a/clang/test/Analysis/NewDelete-path-notes.cpp b/clang/test/Analysis/NewDelete-path-notes.cpp
index 70fd973facfcb..d556947507efc 100644
--- a/clang/test/Analysis/NewDelete-path-notes.cpp
+++ b/clang/test/Analysis/NewDelete-path-notes.cpp
@@ -1,13 +1,23 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=text -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=text -analyzer-config c++-allocator-inlining=true -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus.NewDelete,unix.Malloc -analyzer-output=plist %s -o %t.plist
-// RUN: cat %t.plist | %diff_plist %S/Inputs/expected-plists/NewDelete-path-notes.cpp.plist -
+// RUN: %clang_analyze_cc1 \
+// RUN:   -analyzer-checker=cplusplus.NewDelete,unix.Malloc \
+// RUN:   -analyzer-config add-pop-up-notes=false \
+// RUN:   -analyzer-output=text -verify %s
+// RUN: %clang_analyze_cc1 \
+// RUN:   -analyzer-checker=cplusplus.NewDelete,unix.Malloc \
+// RUN:   -analyzer-config c++-allocator-inlining=true \
+// RUN:   -analyzer-config add-pop-up-notes=false \
+// RUN:   -analyzer-output=text -verify %s
+// RUN: %clang_analyze_cc1 \
+// RUN:   -analyzer-checker=cplusplus.NewDelete,unix.Malloc \
+// RUN:   -analyzer-config add-pop-up-notes=false \
+// RUN:   -analyzer-output=plist %s -o %t.plist
+// RUN: cat %t.plist | %diff_plist \
+// RUN:   %S/Inputs/expected-plists/NewDelete-path-notes.cpp.plist -
 
 void test() {
   int *p = new int;
   // expected-note@-1 {{Memory is allocated}}
-  if (p) // expected-note {{'p' is non-null}}
-         // expected-note@-1 {{Taking true branch}}
+  if (p) // expected-note {{Taking true branch}}
     delete p;
     // expected-note@-1 {{Memory is released}}
 

From 9942a996d90228e489b2c93a5660b1ffbd1ffb34 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:18:07 +0000
Subject: [PATCH 0550/1176] [analyzer] ConditionBRVisitor: Remove duplicated
 code

Summary: -

Reviewers: NoQ, george.karpenkov

Reviewed By: NoQ

Subscribers: cfe-commits, xazax.hun, baloghadamsoftware, szepet, a.sidorin,
             mikhail.ramalho, Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58199

llvm-svn: 362025
---
 .../Core/BugReporter/BugReporterVisitors.h    | 12 ++++
 .../Core/BugReporterVisitors.cpp              | 59 ++++++++++---------
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
index 6efe6fbfd0e6d..36372dacd3978 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -207,6 +207,18 @@ class ConditionBRVisitor final : public BugReporterVisitor {
                          BugReporterContext &BRC, BugReport &R,
                          const ExplodedNode *N, bool TookTrue);
 
+  /// Tries to print the value of the given expression.
+  ///
+  /// \param CondVarExpr The expression to print its value.
+  /// \param Out The stream to print.
+  /// \param N The node where we encountered the condition.
+  /// \param TookTrue Whether we took the \c true branch of the condition.
+  ///
+  /// \return Whether the print was successful. (The printing is successful if
+  ///         we model the value and we could obtain it.)
+  bool printValue(const Expr *CondVarExpr, raw_ostream &Out,
+                  const ExplodedNode *N, bool TookTrue, bool IsAssuming);
+
   bool patternMatch(const Expr *Ex,
                     const Expr *ParentEx,
                     raw_ostream &Out,
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index d6eb31d6c4e56..fb5b37508234a 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -2190,17 +2190,7 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitConditionVariable(
   llvm::raw_svector_ostream Out(buf);
   Out << "Assuming " << LhsString << " is ";
 
-  QualType Ty = CondVarExpr->getType();
-
-  if (Ty->isPointerType())
-    Out << (TookTrue ? "not null" : "null");
-  else if (Ty->isObjCObjectPointerType())
-    Out << (TookTrue ? "not nil" : "nil");
-  else if (Ty->isBooleanType())
-    Out << (TookTrue ? "true" : "false");
-  else if (Ty->isIntegralOrEnumerationType())
-    Out << (TookTrue ? "non-zero" : "zero");
-  else
+  if (!printValue(CondVarExpr, Out, N, TookTrue, /*IsAssuming=*/true))
     return nullptr;
 
   const LocationContext *LCtx = N->getLocationContext();
@@ -2232,22 +2222,7 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
 
   Out << (IsAssuming ? "Assuming '" : "'") << VD->getDeclName() << "' is ";
 
-  QualType Ty = VD->getType();
-
-  if (Ty->isPointerType())
-    Out << (TookTrue ? "non-null" : "null");
-  else if (Ty->isObjCObjectPointerType())
-    Out << (TookTrue ? "non-nil" : "nil");
-  else if (Ty->isScalarType()) {
-    Optional<const llvm::APSInt *> IntValue;
-    if (!IsAssuming)
-      IntValue = getConcreteIntegerValue(DRE, N);
-
-    if (IsAssuming || !IntValue.hasValue())
-      Out << (TookTrue ? "not equal to 0" : "0");
-    else
-      Out << *IntValue.getValue();
-  } else
+  if (!printValue(DRE, Out, N, TookTrue, IsAssuming))
     return nullptr;
 
   const LocationContext *LCtx = N->getLocationContext();
@@ -2271,6 +2246,36 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
   return std::move(event);
 }
 
+bool ConditionBRVisitor::printValue(const Expr *CondVarExpr, raw_ostream &Out,
+                                    const ExplodedNode *N, bool TookTrue,
+                                    bool IsAssuming) {
+  QualType Ty = CondVarExpr->getType();
+
+  if (Ty->isPointerType()) {
+    Out << (TookTrue ? "non-null" : "null");
+    return true;
+  }
+
+  if (Ty->isObjCObjectPointerType()) {
+    Out << (TookTrue ? "non-nil" : "nil");
+    return true;
+  }
+
+  if (!Ty->isIntegralOrEnumerationType())
+    return false;
+
+  Optional<const llvm::APSInt *> IntValue;
+  if (!IsAssuming)
+    IntValue = getConcreteIntegerValue(CondVarExpr, N);
+
+  if (IsAssuming || !IntValue.hasValue())
+    Out << (TookTrue ? "not equal to 0" : "0");
+  else
+    Out << *IntValue.getValue();
+
+  return true;
+}
+
 const char *const ConditionBRVisitor::GenericTrueMessage =
     "Assuming the condition is true";
 const char *const ConditionBRVisitor::GenericFalseMessage =

From d1f0ec3f64304840eb94e54a6da9d2231c4c3345 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:29:02 +0000
Subject: [PATCH 0551/1176] [analyzer] ConditionBRVisitor: MemberExpr support

Summary: -

Reviewers: NoQ, george.karpenkov

Reviewed By: NoQ

Subscribers: cfe-commits, xazax.hun, baloghadamsoftware, szepet, a.sidorin,
             mikhail.ramalho, Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58206

llvm-svn: 362026
---
 .../Core/BugReporter/BugReporterVisitors.h    |  8 ++-
 .../Core/BugReporterVisitors.cpp              | 69 +++++++++++++++++--
 .../Inputs/expected-plists/edges-new.mm.plist | 62 +++++++++++++++++
 .../deref-track-symbolic-region.c.plist       |  8 +--
 .../diagnostics/deref-track-symbolic-region.c |  8 +--
 clang/test/Analysis/diagnostics/dtors.cpp     |  9 +--
 .../expected-plists/path-notes.cpp.plist      |  4 +-
 clang/test/Analysis/inlining/path-notes.cpp   |  2 +-
 clang/test/Analysis/null-deref-path-notes.cpp |  2 +-
 .../test/Analysis/osobject-retain-release.cpp |  8 ++-
 clang/test/Analysis/uninit-vals.m             | 17 +++--
 11 files changed, 164 insertions(+), 33 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
index 36372dacd3978..ef5d327d39da5 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -202,6 +202,11 @@ class ConditionBRVisitor final : public BugReporterVisitor {
                 BugReporterContext &BRC, BugReport &R, const ExplodedNode *N,
                 bool TookTrue, bool IsAssuming);
 
+  std::shared_ptr<PathDiagnosticPiece>
+  VisitTrueTest(const Expr *Cond, const MemberExpr *ME, BugReporterContext &BRC,
+                BugReport &R, const ExplodedNode *N, bool TookTrue,
+                bool IsAssuming);
+
   std::shared_ptr<PathDiagnosticPiece>
   VisitConditionVariable(StringRef LhsString, const Expr *CondVarExpr,
                          BugReporterContext &BRC, BugReport &R,
@@ -225,7 +230,8 @@ class ConditionBRVisitor final : public BugReporterVisitor {
                     BugReporterContext &BRC,
                     BugReport &R,
                     const ExplodedNode *N,
-                    Optional<bool> &prunable);
+                    Optional<bool> &prunable,
+                    bool IsSameFieldName);
 
   static bool isPieceMessageGeneric(const PathDiagnosticPiece *Piece);
 };
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index fb5b37508234a..c45b519582104 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -1991,6 +1991,11 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond, BugReporterContext &BRC,
                                    BRC, R, N, TookTrueTmp, IsAssuming))
           return P;
         break;
+      case Stmt::MemberExprClass:
+        if (auto P = VisitTrueTest(Cond, cast<MemberExpr>(CondTmp),
+                                   BRC, R, N, TookTrueTmp, IsAssuming))
+          return P;
+        break;
       case Stmt::UnaryOperatorClass: {
         const auto *UO = cast<UnaryOperator>(CondTmp);
         if (UO->getOpcode() == UO_LNot) {
@@ -2025,7 +2030,8 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex,
                                       BugReporterContext &BRC,
                                       BugReport &report,
                                       const ExplodedNode *N,
-                                      Optional<bool> &prunable) {
+                                      Optional<bool> &prunable,
+                                      bool IsSameFieldName) {
   const Expr *OriginalExpr = Ex;
   Ex = Ex->IgnoreParenCasts();
 
@@ -2091,6 +2097,17 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex,
     return false;
   }
 
+  if (const auto *ME = dyn_cast<MemberExpr>(Ex)) {
+    if (!IsSameFieldName)
+      Out << "field '" << ME->getMemberDecl()->getName() << '\'';
+    else
+      Out << '\''
+          << Lexer::getSourceText(
+                 CharSourceRange::getTokenRange(Ex->getSourceRange()),
+                 BRC.getSourceManager(), BRC.getASTContext().getLangOpts(), 0)
+          << '\'';
+  }
+
   return false;
 }
 
@@ -2100,13 +2117,23 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
   bool shouldInvert = false;
   Optional<bool> shouldPrune;
 
+  // Check if the field name of the MemberExprs is ambiguous. Example:
+  // " 'a.d' is equal to 'h.d' " in 'test/Analysis/null-deref-path-notes.cpp'.
+  bool IsSameFieldName = false;
+  if (const auto *LhsME =
+          dyn_cast<MemberExpr>(BExpr->getLHS()->IgnoreParenCasts()))
+    if (const auto *RhsME =
+            dyn_cast<MemberExpr>(BExpr->getRHS()->IgnoreParenCasts()))
+      IsSameFieldName = LhsME->getMemberDecl()->getName() ==
+                        RhsME->getMemberDecl()->getName();
+
   SmallString<128> LhsString, RhsString;
   {
     llvm::raw_svector_ostream OutLHS(LhsString), OutRHS(RhsString);
-    const bool isVarLHS = patternMatch(BExpr->getLHS(), BExpr, OutLHS,
-                                       BRC, R, N, shouldPrune);
-    const bool isVarRHS = patternMatch(BExpr->getRHS(), BExpr, OutRHS,
-                                       BRC, R, N, shouldPrune);
+    const bool isVarLHS = patternMatch(BExpr->getLHS(), BExpr, OutLHS, BRC, R,
+                                       N, shouldPrune, IsSameFieldName);
+    const bool isVarRHS = patternMatch(BExpr->getRHS(), BExpr, OutRHS, BRC, R,
+                                       N, shouldPrune, IsSameFieldName);
 
     shouldInvert = !isVarLHS && isVarRHS;
   }
@@ -2170,11 +2197,15 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
   const LocationContext *LCtx = N->getLocationContext();
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
 
+  // Convert 'field ...' to 'Field ...' if it is a MemberExpr.
+  std::string Message = Out.str();
+  Message[0] = toupper(Message[0]);
+
   // If we know the value create a pop-up note.
   if (!IsAssuming)
-    return std::make_shared<PathDiagnosticPopUpPiece>(Loc, Out.str());
+    return std::make_shared<PathDiagnosticPopUpPiece>(Loc, Message);
 
-  auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Out.str());
+  auto event = std::make_shared<PathDiagnosticEventPiece>(Loc, Message);
   if (shouldPrune.hasValue())
     event->setPrunable(shouldPrune.getValue());
   return event;
@@ -2246,6 +2277,30 @@ std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
   return std::move(event);
 }
 
+std::shared_ptr<PathDiagnosticPiece> ConditionBRVisitor::VisitTrueTest(
+    const Expr *Cond, const MemberExpr *ME, BugReporterContext &BRC,
+    BugReport &report, const ExplodedNode *N, bool TookTrue, bool IsAssuming) {
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+
+  Out << (IsAssuming ? "Assuming field '" : "Field '")
+      << ME->getMemberDecl()->getName() << "' is ";
+
+  if (!printValue(ME, Out, N, TookTrue, IsAssuming))
+    return nullptr;
+
+  const LocationContext *LCtx = N->getLocationContext();
+  PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
+  if (!Loc.isValid() || !Loc.asLocation().isValid())
+    return nullptr;
+
+  // If we know the value create a pop-up note.
+  if (!IsAssuming)
+    return std::make_shared<PathDiagnosticPopUpPiece>(Loc, Out.str());
+
+  return std::make_shared<PathDiagnosticEventPiece>(Loc, Out.str());
+}
+
 bool ConditionBRVisitor::printValue(const Expr *CondVarExpr, raw_ostream &Out,
                                     const ExplodedNode *N, bool TookTrue,
                                     bool IsAssuming) {
diff --git a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
index 4eca510c3f056..b4c79018c6665 100644
--- a/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/edges-new.mm.plist
@@ -22227,6 +22227,68 @@
            <key>file</key><integer>0</integer>
           </dict>
          </array>
+        <key>end</key>
+         <array>
+          <dict>
+           <key>line</key><integer>587</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>587</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
+       </dict>
+      </array>
+    </dict>
+    <dict>
+     <key>kind</key><string>pop-up</string>
+     <key>location</key>
+     <dict>
+      <key>line</key><integer>587</integer>
+      <key>col</key><integer>11</integer>
+      <key>file</key><integer>0</integer>
+     </dict>
+     <key>ranges</key>
+     <array>
+       <array>
+        <dict>
+         <key>line</key><integer>587</integer>
+         <key>col</key><integer>11</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+        <dict>
+         <key>line</key><integer>587</integer>
+         <key>col</key><integer>16</integer>
+         <key>file</key><integer>0</integer>
+        </dict>
+       </array>
+     </array>
+     <key>extended_message</key>
+     <string>Field &apos;b&apos; is equal to 2</string>
+     <key>message</key>
+     <string>Field &apos;b&apos; is equal to 2</string>
+    </dict>
+    <dict>
+     <key>kind</key><string>control</string>
+     <key>edges</key>
+      <array>
+       <dict>
+        <key>start</key>
+         <array>
+          <dict>
+           <key>line</key><integer>587</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+          <dict>
+           <key>line</key><integer>587</integer>
+           <key>col</key><integer>11</integer>
+           <key>file</key><integer>0</integer>
+          </dict>
+         </array>
         <key>end</key>
          <array>
           <dict>
diff --git a/clang/test/Analysis/diagnostics/Inputs/expected-plists/deref-track-symbolic-region.c.plist b/clang/test/Analysis/diagnostics/Inputs/expected-plists/deref-track-symbolic-region.c.plist
index 35b14fba4c89a..47c3b8df09c9f 100644
--- a/clang/test/Analysis/diagnostics/Inputs/expected-plists/deref-track-symbolic-region.c.plist
+++ b/clang/test/Analysis/diagnostics/Inputs/expected-plists/deref-track-symbolic-region.c.plist
@@ -165,9 +165,9 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;x&apos; is null</string>
      <key>message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;x&apos; is null</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
@@ -454,9 +454,9 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;x&apos; is null</string>
      <key>message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;x&apos; is null</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
diff --git a/clang/test/Analysis/diagnostics/deref-track-symbolic-region.c b/clang/test/Analysis/diagnostics/deref-track-symbolic-region.c
index 63d0971b85d60..1a1190f934d06 100644
--- a/clang/test/Analysis/diagnostics/deref-track-symbolic-region.c
+++ b/clang/test/Analysis/diagnostics/deref-track-symbolic-region.c
@@ -15,8 +15,8 @@ void test(struct S syz, int *pp) {
 
   struct S *ps = &syz;
   if (ps->x)
-    //expected-note@-1{{Taking false branch}}
-    //expected-note@-2{{Assuming pointer value is null}}
+    //expected-note@-1{{Assuming field 'x' is null}}
+    //expected-note@-2{{Taking false branch}}
 
     m++;
 
@@ -30,8 +30,8 @@ void testTrackConstraintBRVisitorIsTrackingTurnedOn(struct S syz, int *pp) {
 
   struct S *ps = &syz;
   if (ps->x)
-    //expected-note@-1{{Taking false branch}}
-    //expected-note@-2{{Assuming pointer value is null}}
+    //expected-note@-1{{Assuming field 'x' is null}}
+    //expected-note@-2{{Taking false branch}}
 
     m++;
   int *p = syz.x; //expected-note {{'p' initialized to a null pointer value}}
diff --git a/clang/test/Analysis/diagnostics/dtors.cpp b/clang/test/Analysis/diagnostics/dtors.cpp
index b3fe7ec803a9d..18bedc61f98e8 100644
--- a/clang/test/Analysis/diagnostics/dtors.cpp
+++ b/clang/test/Analysis/diagnostics/dtors.cpp
@@ -16,10 +16,11 @@ struct smart_ptr {
   S *s;
   smart_ptr(S *);
   S *get() {
-    return (x || 0) ? nullptr : s; // expected-note{{Left side of '||' is false}}
-                                   // expected-note@-1{{'?' condition is false}}
-                                   // expected-warning@-2{{Use of memory after it is freed}}
-                                   // expected-note@-3{{Use of memory after it is freed}}
+    return (x || 0) ? nullptr : s; // expected-note{{Field 'x' is 0}}
+                                   // expected-note@-1{{Left side of '||' is false}}
+                                   // expected-note@-2{{'?' condition is false}}
+                                   // expected-warning@-3{{Use of memory after it is freed}}
+                                   // expected-note@-4{{Use of memory after it is freed}}
   }
 };
 
diff --git a/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.cpp.plist b/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.cpp.plist
index f517d4dddbc60..c9fd8c848bdca 100644
--- a/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.cpp.plist
+++ b/clang/test/Analysis/inlining/Inputs/expected-plists/path-notes.cpp.plist
@@ -4271,9 +4271,9 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;arr&apos; is null</string>
      <key>message</key>
-     <string>Assuming pointer value is null</string>
+     <string>Assuming field &apos;arr&apos; is null</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
diff --git a/clang/test/Analysis/inlining/path-notes.cpp b/clang/test/Analysis/inlining/path-notes.cpp
index 43859237438f2..ef56cc76f82cf 100644
--- a/clang/test/Analysis/inlining/path-notes.cpp
+++ b/clang/test/Analysis/inlining/path-notes.cpp
@@ -231,7 +231,7 @@ struct Owner {
 };
 
 void Owner::testGetDerefExprOnMemberExprWithADot() {
-	if (arr)  // expected-note {{Assuming pointer value is null}}
+	if (arr)  // expected-note {{Assuming field 'arr' is null}}
             // expected-note@-1 {{Taking false branch}}
 	  ;
 	arr[1].x = 1; //expected-warning {{Dereference of null pointer}}
diff --git a/clang/test/Analysis/null-deref-path-notes.cpp b/clang/test/Analysis/null-deref-path-notes.cpp
index dd54b24e78471..c7b0619e297b3 100644
--- a/clang/test/Analysis/null-deref-path-notes.cpp
+++ b/clang/test/Analysis/null-deref-path-notes.cpp
@@ -19,7 +19,7 @@ void c::f(B &g, int &i) {
                 // expected-note@-1{{Array access (via field 'd') results in a null pointer dereference}}
   B h, a; // expected-note{{Value assigned to 'h.d'}}
   a.d == __null; // expected-note{{Assuming the condition is true}}
-  a.d != h.d; // expected-note{{Assuming pointer value is null}}
+  a.d != h.d; // expected-note{{Assuming 'a.d' is equal to 'h.d'}}
   f(h, b); // expected-note{{Calling 'c::f'}}
 }
 }
diff --git a/clang/test/Analysis/osobject-retain-release.cpp b/clang/test/Analysis/osobject-retain-release.cpp
index 9d7cd854422eb..10ef144bf36e9 100644
--- a/clang/test/Analysis/osobject-retain-release.cpp
+++ b/clang/test/Analysis/osobject-retain-release.cpp
@@ -601,16 +601,18 @@ void test_smart_ptr_uaf() {
   {
     OSObjectPtr p(obj); // expected-note{{Calling constructor for 'smart_ptr<OSObject>'}}
    // expected-note@-1{{Returning from constructor for 'smart_ptr<OSObject>'}}
+    // expected-note@os_smart_ptr.h:13{{Field 'pointer' is non-null}}
     // expected-note@os_smart_ptr.h:13{{Taking true branch}}
     // expected-note@os_smart_ptr.h:14{{Calling 'smart_ptr::_retain'}}
     // expected-note@os_smart_ptr.h:71{{Reference count incremented. The object now has a +2 retain count}}
     // expected-note@os_smart_ptr.h:14{{Returning from 'smart_ptr::_retain'}}
   } // expected-note{{Calling '~smart_ptr'}}
+  // expected-note@os_smart_ptr.h:35{{Field 'pointer' is non-null}}
   // expected-note@os_smart_ptr.h:35{{Taking true branch}}
   // expected-note@os_smart_ptr.h:36{{Calling 'smart_ptr::_release'}}
   // expected-note@os_smart_ptr.h:76{{Reference count decremented. The object now has a +1 retain count}}
   // expected-note@os_smart_ptr.h:36{{Returning from 'smart_ptr::_release'}}
- // expected-note@-5{{Returning from '~smart_ptr'}}
+ // expected-note@-6{{Returning from '~smart_ptr'}}
   obj->release(); // expected-note{{Object released}}
   obj->release(); // expected-warning{{Reference-counted object is used after it is released}}
 // expected-note@-1{{Reference-counted object is used after it is released}}
@@ -621,16 +623,18 @@ void test_smart_ptr_leak() {
   {
     OSObjectPtr p(obj); // expected-note{{Calling constructor for 'smart_ptr<OSObject>'}}
    // expected-note@-1{{Returning from constructor for 'smart_ptr<OSObject>'}}
+    // expected-note@os_smart_ptr.h:13{{Field 'pointer' is non-null}}
     // expected-note@os_smart_ptr.h:13{{Taking true branch}}
     // expected-note@os_smart_ptr.h:14{{Calling 'smart_ptr::_retain'}}
     // expected-note@os_smart_ptr.h:71{{Reference count incremented. The object now has a +2 retain count}}
     // expected-note@os_smart_ptr.h:14{{Returning from 'smart_ptr::_retain'}}
   } // expected-note{{Calling '~smart_ptr'}}
+  // expected-note@os_smart_ptr.h:35{{Field 'pointer' is non-null}}
   // expected-note@os_smart_ptr.h:35{{Taking true branch}}
   // expected-note@os_smart_ptr.h:36{{Calling 'smart_ptr::_release'}}
   // expected-note@os_smart_ptr.h:76{{Reference count decremented. The object now has a +1 retain count}}
   // expected-note@os_smart_ptr.h:36{{Returning from 'smart_ptr::_release'}}
- // expected-note@-5{{Returning from '~smart_ptr'}}
+ // expected-note@-6{{Returning from '~smart_ptr'}}
 } // expected-warning{{Potential leak of an object stored into 'obj'}}
 // expected-note@-1{{Object leaked: object allocated and stored into 'obj' is not referenced later in this execution path and has a retain count of +1}}
 
diff --git a/clang/test/Analysis/uninit-vals.m b/clang/test/Analysis/uninit-vals.m
index 5b959c7bfe19e..b59f72b10770b 100644
--- a/clang/test/Analysis/uninit-vals.m
+++ b/clang/test/Analysis/uninit-vals.m
@@ -164,7 +164,7 @@ void PR14765_test() {
                                            // expected-note@-1{{TRUE}}
 
   testObj->origin = makePoint(0.0, 0.0);
-  if (testObj->size > 0) { ; } // expected-note{{Assuming the condition is false}}
+  if (testObj->size > 0) { ; } // expected-note{{Assuming field 'size' is <= 0}}
                                // expected-note@-1{{Taking false branch}}
 
   // FIXME: Assigning to 'testObj->origin' kills the default binding for the
@@ -219,13 +219,13 @@ void PR14765_test_int() {
                                                // expected-note@-1{{TRUE}}
 
   testObj->origin = makeIntPoint(1, 2);
-  if (testObj->size > 0) { ; } // expected-note{{Assuming the condition is false}}
+  if (testObj->size > 0) { ; } // expected-note{{Assuming field 'size' is <= 0}}
                                // expected-note@-1{{Taking false branch}}
-                               // expected-note@-2{{Assuming the condition is false}}
+                               // expected-note@-2{{Assuming field 'size' is <= 0}}
                                // expected-note@-3{{Taking false branch}}
-                               // expected-note@-4{{Assuming the condition is false}}
+                               // expected-note@-4{{Assuming field 'size' is <= 0}}
                                // expected-note@-5{{Taking false branch}}
-                               // expected-note@-6{{Assuming the condition is false}}
+                               // expected-note@-6{{Assuming field 'size' is <= 0}}
                                // expected-note@-7{{Taking false branch}}
 
   // FIXME: Assigning to 'testObj->origin' kills the default binding for the
@@ -321,9 +321,12 @@ void testSmallStructInLargerStruct() {
                                                // expected-note@-1{{TRUE}}
 
   testObj->origin = makeIntPoint2D(1, 2);
-  if (testObj->size > 0) { ; } // expected-note{{Taking false branch}}
+  if (testObj->size > 0) { ; } // expected-note{{Field 'size' is <= 0}}
                                // expected-note@-1{{Taking false branch}}
-                               // expected-note@-2{{Taking false branch}}
+                               // expected-note@-2{{Field 'size' is <= 0}}
+                               // expected-note@-3{{Taking false branch}}
+                               // expected-note@-4{{Field 'size' is <= 0}}
+                               // expected-note@-5{{Taking false branch}}
 
   clang_analyzer_eval(testObj->size == 0); // expected-warning{{TRUE}}
                                            // expected-note@-1{{TRUE}}

From 2e896b8b39bcaacce56943315cbca0a1daae2819 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:34:29 +0000
Subject: [PATCH 0552/1176] [analyzer] ConditionBRVisitor: Boolean support

Summary: -

Reviewers: NoQ, george.karpenkov

Reviewed By: NoQ, george.karpenkov

Subscribers: cfe-commits, xazax.hun, baloghadamsoftware, szepet, a.sidorin,
             mikhail.ramalho, Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58207

llvm-svn: 362027
---
 .../StaticAnalyzer/Core/BugReporterVisitors.cpp   | 15 +++++++++++----
 .../expected-plists/cxx-for-range.cpp.plist       |  4 ++--
 .../diagnostics/no-store-func-path-notes.cpp      |  2 +-
 clang/test/Analysis/inner-pointer.cpp             | 12 ++++++------
 clang/test/Analysis/use-after-move.cpp            |  2 +-
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index c45b519582104..db7559b10b094 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -2323,10 +2323,17 @@ bool ConditionBRVisitor::printValue(const Expr *CondVarExpr, raw_ostream &Out,
   if (!IsAssuming)
     IntValue = getConcreteIntegerValue(CondVarExpr, N);
 
-  if (IsAssuming || !IntValue.hasValue())
-    Out << (TookTrue ? "not equal to 0" : "0");
-  else
-    Out << *IntValue.getValue();
+  if (IsAssuming || !IntValue.hasValue()) {
+    if (Ty->isBooleanType())
+      Out << (TookTrue ? "true" : "false");
+    else
+      Out << (TookTrue ? "not equal to 0" : "0");
+  } else {
+    if (Ty->isBooleanType())
+      Out << (IntValue.getValue()->getBoolValue() ? "true" : "false");
+    else
+      Out << *IntValue.getValue();
+  }
 
   return true;
 }
diff --git a/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
index 5be7d0adb9694..f6bfc87db57af 100644
--- a/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/cxx-for-range.cpp.plist
@@ -829,9 +829,9 @@
        </array>
      </array>
      <key>extended_message</key>
-     <string>&apos;fail&apos; is 1</string>
+     <string>&apos;fail&apos; is true</string>
      <key>message</key>
-     <string>&apos;fail&apos; is 1</string>
+     <string>&apos;fail&apos; is true</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
diff --git a/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp b/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
index 39b7dd9e49265..c2a3d64801887 100644
--- a/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
+++ b/clang/test/Analysis/diagnostics/no-store-func-path-notes.cpp
@@ -102,7 +102,7 @@ struct C {
 
   C(int pX, int pY, bool Flag) {
     x = pX;
-    if (Flag) // expected-note{{Assuming 'Flag' is not equal to 0}}
+    if (Flag) // expected-note{{Assuming 'Flag' is true}}
               // expected-note@-1{{Taking true branch}}
       return; // expected-note{{Returning without writing to 'this->y'}}
     y = pY;
diff --git a/clang/test/Analysis/inner-pointer.cpp b/clang/test/Analysis/inner-pointer.cpp
index 81b750d7e5dd3..f4646c20fc294 100644
--- a/clang/test/Analysis/inner-pointer.cpp
+++ b/clang/test/Analysis/inner-pointer.cpp
@@ -38,9 +38,9 @@ void deref_after_scope_char(bool cond) {
   std::string s;
   const char *c2 = s.c_str();
   if (cond) {
-    // expected-note@-1 {{Assuming 'cond' is not equal to 0}}
+    // expected-note@-1 {{Assuming 'cond' is true}}
     // expected-note@-2 {{Taking true branch}}
-    // expected-note@-3 {{Assuming 'cond' is 0}}
+    // expected-note@-3 {{Assuming 'cond' is false}}
     // expected-note@-4 {{Taking false branch}}
     consume(c); // expected-warning {{Inner pointer of container used after re/deallocation}}
     // expected-note@-1 {{Inner pointer of container used after re/deallocation}}
@@ -73,9 +73,9 @@ void deref_after_scope_wchar_t(bool cond) {
   std::wstring s;
   const wchar_t *c2 = s.c_str();
   if (cond) {
-    // expected-note@-1 {{Assuming 'cond' is not equal to 0}}
+    // expected-note@-1 {{Assuming 'cond' is true}}
     // expected-note@-2 {{Taking true branch}}
-    // expected-note@-3 {{Assuming 'cond' is 0}}
+    // expected-note@-3 {{Assuming 'cond' is false}}
     // expected-note@-4 {{Taking false branch}}
     consume(c); // expected-warning {{Inner pointer of container used after re/deallocation}}
     // expected-note@-1 {{Inner pointer of container used after re/deallocation}}
@@ -122,9 +122,9 @@ void multiple_symbols(bool cond) {
   std::string s2;
   const char *c2 = s2.c_str();
   if (cond) {
-    // expected-note@-1 {{Assuming 'cond' is not equal to 0}}
+    // expected-note@-1 {{Assuming 'cond' is true}}
     // expected-note@-2 {{Taking true branch}}
-    // expected-note@-3 {{Assuming 'cond' is 0}}
+    // expected-note@-3 {{Assuming 'cond' is false}}
     // expected-note@-4 {{Taking false branch}}
     consume(c1); // expected-warning {{Inner pointer of container used after re/deallocation}}
     // expected-note@-1 {{Inner pointer of container used after re/deallocation}}
diff --git a/clang/test/Analysis/use-after-move.cpp b/clang/test/Analysis/use-after-move.cpp
index e58301df8b512..c25f4393cdf92 100644
--- a/clang/test/Analysis/use-after-move.cpp
+++ b/clang/test/Analysis/use-after-move.cpp
@@ -395,7 +395,7 @@ void uniqueTest(bool cond) {
   A b;
   b = std::move(a); // peaceful-note {{Object 'a' is moved}}
 
-  if (cond) { // peaceful-note {{Assuming 'cond' is not equal to 0}}
+  if (cond) { // peaceful-note {{Assuming 'cond' is true}}
               // peaceful-note@-1 {{Taking true branch}}
     a.foo(); // peaceful-warning {{Method called on moved-from object 'a'}}
              // peaceful-note@-1 {{Method called on moved-from object 'a'}}

From 4b281755ae4951ca83c287680b47d77433f3ee0a Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 29 May 2019 20:45:32 +0000
Subject: [PATCH 0553/1176] Revert "LLVM IR: update Clang tests for byval being
 a typed attribute."

The underlying LLVM change couldn't cope with llvm-link and broke LTO builds.

llvm-svn: 362028
---
 clang/test/CodeGen/aapcs-align.cpp               |  4 ++--
 .../test/CodeGenCXX/builtin-source-location.cpp  |  4 ++--
 clang/test/CodeGenCXX/wasm-args-returns.cpp      |  4 ++--
 .../CodeGenCXX/x86_64-arguments-nacl-x32.cpp     |  2 +-
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 ++++++++--------
 .../kernels-have-spir-cc-by-default.cl           |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index bcc4604d7422d..40fba7823524e 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -95,8 +95,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define void @g4
-// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval align 8
-// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval align 8
+// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
 // CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
 // CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
 
diff --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp
index f8bfd7d940b91..6a8387093dfec 100644
--- a/clang/test/CodeGenCXX/builtin-source-location.cpp
+++ b/clang/test/CodeGenCXX/builtin-source-location.cpp
@@ -104,7 +104,7 @@ struct TestInit {
 //
 // CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP_ONE:[^,]*]],
 // CHECK-CTOR-GLOBAL-SAME: i32 3400, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{.*}}%[[TMP_ONE]])
+// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{[^%]*}}%[[TMP_ONE]])
 #line 3400 "GlobalInitVal.cpp"
 TestInit GlobalInitVal;
 
@@ -119,7 +119,7 @@ extern "C" void test_init_function() {
 //
 // CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
 // CHECK-CTOR-LOCAL-SAME: i32 3500, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{.*}}%[[TMP]])
+// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{[^%]*}}%[[TMP]])
 #line 3500 "LocalInitVal.cpp"
   TestInit init_local;
   sink(init_local);
diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp
index 506540e86fd9c..a7c4e1e282a12 100644
--- a/clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,12 +30,12 @@ struct two_fields {
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* byval nocapture readonly align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
 // CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
-// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval align 8 %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* byval nonnull align 8 %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
diff --git a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
index 89d6dae5d30fc..3392b32bd2b6f 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -20,7 +20,7 @@ void f_struct_with_mdp(struct_with_mdp a) { (void)a; }
 struct struct_with_mdp_too_much {
   char *a; char *b; char *c; char *d; test_struct_mdp e;
 };
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval {{.*}} %a)
 void f_struct_with_mdp_too_much(struct_with_mdp_too_much a) {
   (void)a;
 }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 8b03fb00e3fde..aec00e76014ec 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@ flexible_array func_flexible_array_ret()
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval  align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 4392ef90677c5..5bb52e9beb514 100644
--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@ typedef struct test_struct {
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* nocapture {{.*}} byval
+// CHECK: struct.int_single* byval nocapture
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@ kernel void test_single(int_single input, global int* output) {
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* nocapture {{.*}} byval
+// CHECK: struct.int_pair* byval nocapture
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@ kernel void test_pair(int_pair input, global int* output) {
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* nocapture {{.*}} byval
+// CHECK: struct.test_struct* byval nocapture
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;

From 71ee3d02372af7361eda0b59163cf92653ac2bbb Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 29 May 2019 20:46:38 +0000
Subject: [PATCH 0554/1176] Revert "IR: add optional type to 'byval' function
 parameters"

The IRLinker doesn't delve into the new byval attribute when mapping types, and
this breaks LTO.

llvm-svn: 362029
---
 llvm/docs/LangRef.rst                         |   5 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |   1 -
 llvm/include/llvm/IR/Argument.h               |   3 -
 llvm/include/llvm/IR/Attributes.h             |  20 ----
 llvm/include/llvm/IR/CallSite.h               |   5 -
 llvm/include/llvm/IR/Function.h               |   5 -
 llvm/include/llvm/IR/InstrTypes.h             |   5 -
 llvm/lib/AsmParser/LLParser.cpp               |  24 +---
 llvm/lib/AsmParser/LLParser.h                 |   1 -
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  50 +-------
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  15 +--
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp   |   6 +-
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |   5 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |   8 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  18 +--
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   1 -
 llvm/lib/IR/AttributeImpl.h                   |  32 +-----
 llvm/lib/IR/Attributes.cpp                    | 108 +-----------------
 llvm/lib/IR/Function.cpp                      |   5 -
 llvm/lib/IR/Verifier.cpp                      |   5 -
 llvm/test/Assembler/byval-type-attr.ll        |  31 -----
 llvm/test/Assembler/invalid-byval-type1.ll    |   4 -
 llvm/test/Assembler/invalid-byval-type2.ll    |   4 -
 llvm/test/Assembler/invalid-byval-type3.ll    |   4 -
 llvm/test/Bitcode/Inputs/byval-upgrade.bc     | Bin 1092 -> 0 bytes
 llvm/test/Bitcode/attributes-3.3.ll           |   2 +-
 llvm/test/Bitcode/attributes.ll               |   2 +-
 llvm/test/Bitcode/byval-upgrade.test          |   7 --
 llvm/test/Bitcode/compatibility-3.6.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.7.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.8.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.9.ll        |   2 +-
 llvm/test/Bitcode/compatibility-4.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-5.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-6.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility.ll            |  11 +-
 llvm/test/Bitcode/highLevelStructure.3.2.ll   |   4 +-
 llvm/test/CodeGen/AArch64/byval-type.ll       |  37 ------
 .../test/Transforms/Inline/byval-tail-call.ll |   4 +-
 llvm/unittests/IR/AttributesTest.cpp          |  20 ----
 40 files changed, 41 insertions(+), 425 deletions(-)
 delete mode 100644 llvm/test/Assembler/byval-type-attr.ll
 delete mode 100644 llvm/test/Assembler/invalid-byval-type1.ll
 delete mode 100644 llvm/test/Assembler/invalid-byval-type2.ll
 delete mode 100644 llvm/test/Assembler/invalid-byval-type3.ll
 delete mode 100644 llvm/test/Bitcode/Inputs/byval-upgrade.bc
 delete mode 100644 llvm/test/Bitcode/byval-upgrade.test
 delete mode 100644 llvm/test/CodeGen/AArch64/byval-type.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2105ce9d669f7..840272df33fd6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1017,7 +1017,7 @@ Currently, only the following parameter attributes are defined:
     opposed to memory, though some targets use it to distinguish between
     two different kinds of registers). Use of this attribute is
     target-specific.
-``byval`` or ``byval(<ty>)``
+``byval``
     This indicates that the pointer parameter should really be passed by
     value to the function. The attribute implies that a hidden copy of
     the pointee is made between the caller and the callee, so the callee
@@ -1029,9 +1029,6 @@ Currently, only the following parameter attributes are defined:
     ``byval`` parameters). This is not a valid attribute for return
     values.
 
-    The byval attribute also supports an optional type argument, which must be
-    the same as the pointee type of the argument.
-
     The byval attribute also supports specifying an alignment with the
     align attribute. It indicates the alignment of the stack slot to
     form and the known alignment of the pointer specified to the call
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f06e01acae30c..cabca9cb22101 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -188,7 +188,6 @@ class TargetLoweringBase {
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     uint16_t Alignment = 0;
-    Type *ByValType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 952fbcdffb142..7997c863b8300 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -78,9 +78,6 @@ class Argument final : public Value {
   /// If this is a byval or inalloca argument, return its alignment.
   unsigned getParamAlignment() const;
 
-  /// If this is a byval argument, return its type.
-  Type *getParamByValType() const;
-
   /// Return true if this argument has the nest attribute.
   bool hasNestAttr() const;
 
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 06cc09e1cfc79..8d7f4018e846c 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -90,7 +90,6 @@ class Attribute {
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
-  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
 
   /// Return a uniquified Attribute object that has the specific
   /// alignment set.
@@ -103,7 +102,6 @@ class Attribute {
   static Attribute getWithAllocSizeArgs(LLVMContext &Context,
                                         unsigned ElemSizeArg,
                                         const Optional<unsigned> &NumElemsArg);
-  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -119,9 +117,6 @@ class Attribute {
   /// attribute.
   bool isStringAttribute() const;
 
-  /// Return true if the attribute is a type attribute.
-  bool isTypeAttribute() const;
-
   /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
@@ -144,10 +139,6 @@ class Attribute {
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
-  /// Return the attribute's value as a Type. This requires the attribute to be
-  /// a type attribute.
-  Type *getValueAsType() const;
-
   /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
@@ -288,7 +279,6 @@ class AttributeSet {
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
   uint64_t getDereferenceableOrNullBytes() const;
-  Type *getByValType() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp = false) const;
 
@@ -608,9 +598,6 @@ class AttributeList {
   /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
-  /// Return the byval type for the specified function parameter.
-  Type *getParamByValType(unsigned ArgNo) const;
-
   /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
@@ -710,7 +697,6 @@ class AttrBuilder {
   uint64_t DerefBytes = 0;
   uint64_t DerefOrNullBytes = 0;
   uint64_t AllocSizeArgs = 0;
-  Type *ByValType = nullptr;
 
 public:
   AttrBuilder() = default;
@@ -786,9 +772,6 @@ class AttrBuilder {
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
-  /// Retrieve the byval type.
-  Type *getByValType() const { return ByValType; }
-
   /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -813,9 +796,6 @@ class AttrBuilder {
   AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg);
 
-  /// This turns a byval type into the form used internally in Attribute.
-  AttrBuilder &addByValAttr(Type *Ty);
-
   /// Add an allocsize attribute, using the representation returned by
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h
index b47a96c5d5faa..183e387a422e5 100644
--- a/llvm/include/llvm/IR/CallSite.h
+++ b/llvm/include/llvm/IR/CallSite.h
@@ -415,11 +415,6 @@ class CallSiteBase {
     CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
-  /// Extract the byval type for a call or parameter (nullptr=unknown).
-  Type *getParamByValType(unsigned ArgNo) const {
-    CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
-  }
-
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 896c2189eb824..613d21bc64d24 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -431,11 +431,6 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
-  /// Extract the byval type for a parameter (nullptr=unknown).
-  Type *getParamByValType(unsigned ArgNo) const {
-    return AttributeSets.getParamByValType(ArgNo);
-  }
-
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 6ce76811c0e66..7ffa7a6f60e8f 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1560,11 +1560,6 @@ class CallBase : public Instruction {
     return Attrs.getParamAlignment(ArgNo);
   }
 
-  /// Extract the byval type for a call or parameter (nullptr=unknown).
-  Type *getParamByValType(unsigned ArgNo) const {
-    return Attrs.getParamByValType(ArgNo);
-  }
-
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 0a9a09e644d7a..95646675cb279 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1601,13 +1601,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_byval: {
-      Type *Ty;
-      if (ParseByValWithOptionalType(Ty))
-        return true;
-      B.addByValAttr(Ty);
-      continue;
-    }
+    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -2460,22 +2454,6 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   return false;
 }
 
-/// ParseByValWithOptionalType
-///   ::= byval
-///   ::= byval(<ty>)
-bool LLParser::ParseByValWithOptionalType(Type *&Result) {
-  Result = nullptr;
-  if (!EatIfPresent(lltok::kw_byval))
-    return true;
-  if (!EatIfPresent(lltok::lparen))
-    return false;
-  if (ParseType(Result))
-    return true;
-  if (!EatIfPresent(lltok::rparen))
-    return Error(Lex.getLoc(), "expected ')'");
-  return false;
-}
-
 /// ParseOptionalOperandBundles
 ///    ::= /*empty*/
 ///    ::= '[' OperandBundle [, OperandBundle ]* ']'
diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h
index ec4a61b5498d8..ad169afb93584 100644
--- a/llvm/lib/AsmParser/LLParser.h
+++ b/llvm/lib/AsmParser/LLParser.h
@@ -339,7 +339,6 @@ namespace llvm {
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
-    bool ParseByValWithOptionalType(Type *&Result);
 
     // Module Summary Index Parsing.
     bool SkipModuleSummaryEntry();
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9f562ba82db93..134ce0367031e 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -638,10 +638,6 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
     return getFnValueByID(ValNo, Ty);
   }
 
-  /// Upgrades old-style typeless byval attributes by adding the corresponding
-  /// argument's pointee type.
-  void propagateByValTypes(CallBase *CB);
-
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
@@ -1496,12 +1492,6 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
-          // Upgrade old-style byval attribute to one with a type, even if it's
-          // nullptr. We will have to insert the real type when we associate
-          // this AttributeList with a function.
-          if (Kind == Attribute::ByVal)
-            B.addByValAttr(nullptr);
-
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
@@ -1517,7 +1507,9 @@ Error BitcodeReader::parseAttributeGroupBlock() {
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
-        } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
+        } else {                     // String attribute
+          assert((Record[i] == 3 || Record[i] == 4) &&
+                 "Invalid attribute group entry");
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
@@ -1535,15 +1527,6 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
-        } else {
-          assert((Record[i] == 5 || Record[i] == 6) &&
-                 "Invalid attribute group entry");
-          bool HasType = Record[i] == 6;
-          Attribute::AttrKind Kind;
-          if (Error Err = parseAttrKind(Record[++i], &Kind))
-            return Err;
-          if (Kind == Attribute::ByVal)
-            B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
         }
       }
 
@@ -3045,17 +3028,6 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
-  // Upgrade any old-style byval without a type by propagating the argument's
-  // pointee type. There should be no opaque pointers where the byval type is
-  // implicit.
-  for (auto &Arg : Func->args()) {
-    if (Arg.hasByValAttr() && !Arg.getParamByValType()) {
-      Arg.removeAttr(Attribute::ByVal);
-      Arg.addAttr(Attribute::getWithByValType(
-          Context, Arg.getType()->getPointerElementType()));
-    }
-  }
-
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
@@ -3469,19 +3441,6 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   return Error::success();
 }
 
-void BitcodeReader::propagateByValTypes(CallBase *CB) {
-  for (unsigned i = 0; i < CB->getNumArgOperands(); ++i) {
-    if (CB->paramHasAttr(i, Attribute::ByVal) &&
-        !CB->getAttribute(i, Attribute::ByVal).getValueAsType()) {
-      CB->removeParamAttr(i, Attribute::ByVal);
-      CB->addParamAttr(
-          i, Attribute::getWithByValType(
-                 Context,
-                 CB->getArgOperand(i)->getType()->getPointerElementType()));
-    }
-  }
-}
-
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
@@ -4297,8 +4256,6 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
-      propagateByValTypes(cast<CallBase>(I));
-
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4774,7 +4731,6 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
-      propagateByValTypes(cast<CallBase>(I));
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index d243815667fdf..8e1e06226bb46 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -747,7 +747,7 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
-      } else if (Attr.isStringAttribute()) {
+      } else {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
@@ -758,13 +758,6 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
-      } else {
-        assert(Attr.isTypeAttribute());
-        Type *Ty = Attr.getValueAsType();
-        Record.push_back(Ty ? 6 : 5);
-        Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
-        if (Ty)
-          Record.push_back(VE.getTypeID(Attr.getValueAsType()));
       }
     }
 
@@ -4133,15 +4126,15 @@ void ModuleBitcodeWriter::write() {
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
-  // Emit information describing all of the types in the module.
-  writeTypeTable();
-
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
+  // Emit information describing all of the types in the module.
+  writeTypeTable();
+
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 143570fb20a8c..72d7000fad9dc 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -949,11 +949,9 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   incorporateFunctionMetadata(F);
 
   // Adding function arguments to the value table.
-  for (const auto &I : F.args()) {
+  for (const auto &I : F.args())
     EnumerateValue(&I);
-    if (I.hasAttribute(Attribute::ByVal) && I.getParamByValType())
-      EnumerateType(I.getParamByValType());
-  }
+
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 93727406a087a..f144b18aa6358 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -87,10 +87,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
 
   if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
-
-    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
-    Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
-
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index d887ed73c441e..8fb1a7b5bb9c2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1204,11 +1204,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
     if (Arg.IsByVal || Arg.IsInAlloca) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
-      unsigned FrameSize =
-          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
-
-      // For ByVal, alignment should come from FE. BE will guess if this info
-      // is not there, but there are cases it cannot get right.
+      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
+      // For ByVal, alignment should come from FE. BE will guess if this info is
+      // not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
         FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index da06ac7a414ff..fe857f73b2548 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9076,11 +9076,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       if (Args[i].IsByVal || Args[i].IsInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-
-        unsigned FrameSize = DL.getTypeAllocSize(
-            Args[i].ByValType ? Args[i].ByValType : ElementTy);
-        Flags.setByValSize(FrameSize);
-
+        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+        // For ByVal, alignment should come from FE.  BE will guess if this
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
@@ -9577,14 +9574,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(Arg.getType());
         Type *ElementTy = Ty->getElementType();
-
-        // For ByVal, size and alignment should be passed from FE.  BE will
-        // guess if this info is not there but there are cases it cannot get
-        // right.
-        unsigned FrameSize = DL.getTypeAllocSize(
-            Arg.getParamByValType() ? Arg.getParamByValType() : ElementTy);
-        Flags.setByValSize(FrameSize);
-
+        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Arg.getParamAlignment())
           FrameAlign = Arg.getParamAlignment();
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4ad578d80fab6..d636e613363e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -112,7 +112,6 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlignment(ArgIdx);
-  ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index f6898476382dd..8ebcb04a565d1 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -29,7 +29,6 @@
 namespace llvm {
 
 class LLVMContext;
-class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -42,8 +41,7 @@ class AttributeImpl : public FoldingSetNode {
   enum AttrEntryKind {
     EnumAttrEntry,
     IntAttrEntry,
-    StringAttrEntry,
-    TypeAttrEntry,
+    StringAttrEntry
   };
 
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -58,7 +56,6 @@ class AttributeImpl : public FoldingSetNode {
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
   bool isIntAttribute() const { return KindID == IntAttrEntry; }
   bool isStringAttribute() const { return KindID == StringAttrEntry; }
-  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
 
   bool hasAttribute(Attribute::AttrKind A) const;
   bool hasAttribute(StringRef Kind) const;
@@ -69,20 +66,16 @@ class AttributeImpl : public FoldingSetNode {
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
 
-  Type *getValueAsType() const;
-
   /// Used when sorting the attributes.
   bool operator<(const AttributeImpl &AI) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     if (isEnumAttribute())
-      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
+      Profile(ID, getKindAsEnum(), 0);
     else if (isIntAttribute())
       Profile(ID, getKindAsEnum(), getValueAsInt());
-    else if (isStringAttribute())
-      Profile(ID, getKindAsString(), getValueAsString());
     else
-      Profile(ID, getKindAsEnum(), getValueAsType());
+      Profile(ID, getKindAsString(), getValueAsString());
   }
 
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -95,12 +88,6 @@ class AttributeImpl : public FoldingSetNode {
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
-
-  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
-                      Type *Ty) {
-    ID.AddInteger(Kind);
-    ID.AddPointer(Ty);
-  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -158,18 +145,6 @@ class StringAttributeImpl : public AttributeImpl {
   StringRef getStringValue() const { return Val; }
 };
 
-class TypeAttributeImpl : public EnumAttributeImpl {
-  virtual void anchor();
-
-  Type *Ty;
-
-public:
-  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
-      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
-
-  Type *getTypeValue() const { return Ty; }
-};
-
 //===----------------------------------------------------------------------===//
 /// \class
 /// This class represents a group of attributes that apply to one
@@ -214,7 +189,6 @@ class AttributeSetNode final
   uint64_t getDereferenceableOrNullBytes() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp) const;
-  Type *getByValType() const;
 
   using iterator = const Attribute *;
 
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 839ef46b4f41d..0be09a05e82ab 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -121,27 +121,6 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
   return Attribute(PA);
 }
 
-Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
-                         Type *Ty) {
-  LLVMContextImpl *pImpl = Context.pImpl;
-  FoldingSetNodeID ID;
-  ID.AddInteger(Kind);
-  ID.AddPointer(Ty);
-
-  void *InsertPoint;
-  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
-
-  if (!PA) {
-    // If we didn't find any existing attributes of the same shape then create a
-    // new one and insert it.
-    PA = new TypeAttributeImpl(Kind, Ty);
-    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
-  }
-
-  // Return the Attribute that we found or created.
-  return Attribute(PA);
-}
-
 Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
@@ -167,10 +146,6 @@ Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
   return get(Context, DereferenceableOrNull, Bytes);
 }
 
-Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
-  return get(Context, ByVal, Ty);
-}
-
 Attribute
 Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg) {
@@ -195,13 +170,9 @@ bool Attribute::isStringAttribute() const {
   return pImpl && pImpl->isStringAttribute();
 }
 
-bool Attribute::isTypeAttribute() const {
-  return pImpl && pImpl->isTypeAttribute();
-}
-
 Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
-  assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
+  assert((isEnumAttribute() || isIntAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
   return pImpl->getKindAsEnum();
 }
@@ -227,14 +198,6 @@ StringRef Attribute::getValueAsString() const {
   return pImpl->getValueAsString();
 }
 
-Type *Attribute::getValueAsType() const {
-  if (!pImpl) return {};
-  assert(isTypeAttribute() &&
-         "Invalid attribute type to get the value as a type!");
-  return pImpl->getValueAsType();
-}
-
-
 bool Attribute::hasAttribute(AttrKind Kind) const {
   return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
 }
@@ -289,6 +252,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
+  if (hasAttribute(Attribute::ByVal))
+    return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
   if (hasAttribute(Attribute::SwiftError))
@@ -388,19 +353,6 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   if (hasAttribute(Attribute::ImmArg))
     return "immarg";
 
-  if (hasAttribute(Attribute::ByVal)) {
-    std::string Result;
-    Result += "byval";
-    if (Type *Ty = getValueAsType()) {
-      raw_string_ostream OS(Result);
-      Result += '(';
-      Ty->print(OS, false, true);
-      OS.flush();
-      Result += ')';
-    }
-    return Result;
-  }
-
   // FIXME: These should be output like this:
   //
   //   align=4
@@ -499,8 +451,6 @@ void IntAttributeImpl::anchor() {}
 
 void StringAttributeImpl::anchor() {}
 
-void TypeAttributeImpl::anchor() {}
-
 bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
   if (isStringAttribute()) return false;
   return getKindAsEnum() == A;
@@ -512,7 +462,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
 }
 
 Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
-  assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
+  assert(isEnumAttribute() || isIntAttribute());
   return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
 }
 
@@ -531,11 +481,6 @@ StringRef AttributeImpl::getValueAsString() const {
   return static_cast<const StringAttributeImpl *>(this)->getStringValue();
 }
 
-Type *AttributeImpl::getValueAsType() const {
-  assert(isTypeAttribute());
-  return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
-}
-
 bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   // This sorts the attributes with Attribute::AttrKinds coming first (sorted
   // relative to their enum value) and then strings.
@@ -543,23 +488,10 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
     if (AI.isIntAttribute()) return true;
     if (AI.isStringAttribute()) return true;
-    if (AI.isTypeAttribute()) return true;
-  }
-
-  if (isTypeAttribute()) {
-    if (AI.isEnumAttribute()) return false;
-    if (AI.isTypeAttribute()) {
-      assert(getKindAsEnum() != AI.getKindAsEnum() &&
-             "Comparison of types would be unstable");
-      return getKindAsEnum() < AI.getKindAsEnum();
-    }
-    if (AI.isIntAttribute()) return true;
-    if (AI.isStringAttribute()) return true;
   }
 
   if (isIntAttribute()) {
     if (AI.isEnumAttribute()) return false;
-    if (AI.isTypeAttribute()) return false;
     if (AI.isIntAttribute()) {
       if (getKindAsEnum() == AI.getKindAsEnum())
         return getValueAsInt() < AI.getValueAsInt();
@@ -568,9 +500,7 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isStringAttribute()) return true;
   }
 
-  assert(isStringAttribute());
   if (AI.isEnumAttribute()) return false;
-  if (AI.isTypeAttribute()) return false;
   if (AI.isIntAttribute()) return false;
   if (getKindAsString() == AI.getKindAsString())
     return getValueAsString() < AI.getValueAsString();
@@ -678,10 +608,6 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
   return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
 }
 
-Type *AttributeSet::getByValType() const {
-  return SetNode ? SetNode->getByValType() : nullptr;
-}
-
 std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
   return SetNode ? SetNode->getAllocSizeArgs()
                  : std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -765,9 +691,6 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
 
     Attribute Attr;
     switch (Kind) {
-    case Attribute::ByVal:
-      Attr = Attribute::getWithByValType(C, B.getByValType());
-      break;
     case Attribute::Alignment:
       Attr = Attribute::getWithAlignment(C, B.getAlignment());
       break;
@@ -837,13 +760,6 @@ unsigned AttributeSetNode::getStackAlignment() const {
   return 0;
 }
 
-Type *AttributeSetNode::getByValType() const {
-  for (const auto I : *this)
-    if (I.hasAttribute(Attribute::ByVal))
-      return I.getValueAsType();
-  return 0;
-}
-
 uint64_t AttributeSetNode::getDereferenceableBytes() const {
   for (const auto I : *this)
     if (I.hasAttribute(Attribute::Dereferenceable))
@@ -1342,11 +1258,6 @@ unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
-Type *AttributeList::getParamByValType(unsigned Index) const {
-  return getAttributes(Index+FirstArgIndex).getByValType();
-}
-
-
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }
@@ -1425,7 +1336,6 @@ void AttrBuilder::clear() {
   TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
-  ByValType = nullptr;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1450,8 +1360,6 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     Alignment = Attr.getAlignment();
   else if (Kind == Attribute::StackAlignment)
     StackAlignment = Attr.getStackAlignment();
-  else if (Kind == Attribute::ByVal)
-    ByValType = Attr.getValueAsType();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
   else if (Kind == Attribute::DereferenceableOrNull)
@@ -1474,8 +1382,6 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     Alignment = 0;
   else if (Val == Attribute::StackAlignment)
     StackAlignment = 0;
-  else if (Val == Attribute::ByVal)
-    ByValType = nullptr;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
   else if (Val == Attribute::DereferenceableOrNull)
@@ -1558,12 +1464,6 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
   return *this;
 }
 
-AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
-  Attrs[Attribute::ByVal] = true;
-  ByValType = Ty;
-  return *this;
-}
-
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index a4a78ca4deb96..b00deb677b31d 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -113,11 +113,6 @@ unsigned Argument::getParamAlignment() const {
   return getParent()->getParamAlignment(getArgNo());
 }
 
-Type *Argument::getParamByValType() const {
-  assert(getType()->isPointerTy() && "Only pointers have byval types");
-  return getParent()->getParamByValType(getArgNo());
-}
-
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 963bf82c98982..d5c3287e7134c 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1695,11 +1695,6 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
          "'noinline and alwaysinline' are incompatible!",
          V);
 
-  if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
-    Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
-           "Attribute 'byval' type does not match parameter!");
-  }
-
   AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
   Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
          "Wrong types for attribute: " +
diff --git a/llvm/test/Assembler/byval-type-attr.ll b/llvm/test/Assembler/byval-type-attr.ll
deleted file mode 100644
index eb17a30db3b2e..0000000000000
--- a/llvm/test/Assembler/byval-type-attr.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
-
-; CHECK: define void @foo(i32* byval(i32) align 4)
-define void @foo(i32* byval(i32) align 4) {
-  ret void
-}
-
-; CHECK: define void @bar({ i32*, i8 }* byval({ i32*, i8 }) align 4)
-define void @bar({i32*, i8}* byval({i32*, i8}) align 4) {
-  ret void
-}
-
-define void @caller({ i32*, i8 }* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
-; CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
-  call void @bar({i32*, i8}* byval %ptr)
-  invoke void @bar({i32*, i8}* byval %ptr) to label %success unwind label %fail
-
-success:
-  ret void
-
-fail:
-  landingpad { i8*, i32 } cleanup
-  ret void
-}
-
-; CHECK: declare void @baz([8 x i8]* byval([8 x i8]))
-%named_type = type [8 x i8]
-declare void @baz(%named_type* byval(%named_type))
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Assembler/invalid-byval-type1.ll b/llvm/test/Assembler/invalid-byval-type1.ll
deleted file mode 100644
index 236790e114289..0000000000000
--- a/llvm/test/Assembler/invalid-byval-type1.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-; CHECK: Attribute 'byval' type does not match parameter!
-declare void @foo(i32* byval(i8))
diff --git a/llvm/test/Assembler/invalid-byval-type2.ll b/llvm/test/Assembler/invalid-byval-type2.ll
deleted file mode 100644
index 3ca0d5e7c4c2b..0000000000000
--- a/llvm/test/Assembler/invalid-byval-type2.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-; CHECK: error: void type only allowed for function results
-declare void @foo(i32* byval(void))
diff --git a/llvm/test/Assembler/invalid-byval-type3.ll b/llvm/test/Assembler/invalid-byval-type3.ll
deleted file mode 100644
index 4626dd71c5b56..0000000000000
--- a/llvm/test/Assembler/invalid-byval-type3.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
-
-; CHECK: Attributes 'byval' and 'inalloca' do not support unsized types!
-declare void @foo(void()* byval(void()))
diff --git a/llvm/test/Bitcode/Inputs/byval-upgrade.bc b/llvm/test/Bitcode/Inputs/byval-upgrade.bc
deleted file mode 100644
index e0da41f78e773b0d9154313098c2adbc5d3e5a71..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1092
zcmX|APfXip6#o(jY#?(CjkZv1%LdkE)kNK}b;+DaB&J=^IPhnh(A0^9wG6hDIUx#x
zPL;uFopw=`QVyBaLy@@bmQHBufdj#`%a~Vn)iiNns1XdIilJ!hq)qB!FJamzeedV@
z^!)z(-unX5>h@c9gfN8Ah*fA;sG1_Eh_@p24`73tM`pm6;o`GxxBNv*Pc7y0%$riy
zS8Ir7*7D_uVn1sR%r^v<D3{$$INqyw4WxteI^Wjaq-7}oy!k?rxM%02b7}5-lWQR5
zIO#l@J^e-g{4MT55#shc5V8Qa+~m+TjyppI5M)QF2ke#H$L2x-4)8h%3eeRWtm7_R
zwcKu9N`cr(EoSXay*Dxqsij`3s|`(BLS5mT==7YU`v$82>bNEF<{n}G5Hk&;bA2X+
zj`P;>USsFf57|1Qnh0Vdq#%XA?dQ9y%8`&xToM#S{BLPLL)d@C5h|KZwP19yIcsxc
zZ#GkR;@yW9Y2e$}mCcItJ*fK%U?-u_>m_>BZt|ricS_F$6nr{~Pv)Z5J!JcxBA+W{
zr!~N?6_`hh^r%o%byHgm%vY3r8O&!SGjSQeOyJW&O_uQ)8A~KJnbVq$M)Dv_mIT{`
zVB0B^<vy~LT4%O-dXs0?!tAJUU0hMc$C76m^RB4g6eb#Fdm@Bi9>d=r(_$bMP0ng^
z2v0ci#NRa#c1qEP2`%c-VmWO(H~Yq6nLK<-{<TUT2-rw*k=|XTjjG%V(;Ga!KFe%`
zYk$c~o}%Q*Rpou%y&$>Mn7E*yf}}6z#Gf%SuUo$x*W@8BCadu=JQ>pB4lRCHYko9n
zJ4CiW`pDfZxzlAUapVDX!8w_&0=wpCpYZh7EW7oT-YqfDOzfzm-<_A<xuc3kU;BTJ
zgtVz7p3L?4KF``J46vWF8)15%XSNH>6%$)2m}ko2+V`)jZbKLIk`D-ZvF6>C#5AaS
z=0QDls!c&X1U}=$W4YP$I}~}aO8(YI{wcH-p7`0_(kq)KW*Z6!Hj8=Hx2(3LbazJb
z7`pd<!Bf<sN3RiFjuJRj6$Ju^aP&3IzX9goP#<#wulUT6oRfS&PhRpo*5~%<hl?-m
z`zvGL<C&v+(ND10m7T&3?~*Fs0TMCaUDXSOW_4Iu6y^}J0UopHX+N)U4oA1}BVZFz
z+ql0{C|i_kSjDdf?gxT*FmBSi>&$%5Yh8Bq4cICt${oFP7fH3I(6fqr^?U@6fP}9m
zY~izt;p1>5axpeM{7Eu7EMJUGem)V7jK`;k6T(P1=IIbRgvrE2{F2-eiB$jp7n@*I
AZvX%Q

diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll
index f1b37d1991228..b564425c37381 100644
--- a/llvm/test/Bitcode/attributes-3.3.ll
+++ b/llvm/test/Bitcode/attributes-3.3.ll
@@ -48,7 +48,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval(i8))
+; CHECK: define void @f8(i8* byval)
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 6f149c0d3bf7b..de3cf8dd4d73a 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -45,7 +45,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval(i8))
+; CHECK: define void @f8(i8* byval)
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/byval-upgrade.test b/llvm/test/Bitcode/byval-upgrade.test
deleted file mode 100644
index 1012bf728830f..0000000000000
--- a/llvm/test/Bitcode/byval-upgrade.test
+++ /dev/null
@@ -1,7 +0,0 @@
-RUN: llvm-dis %p/Inputs/byval-upgrade.bc -o - | FileCheck %s
-
-Make sure we upgrade old-stile IntAttribute byval records to a fully typed
-version correctly.
-
-CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
-CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll
index 322c95b02d1f8..e9313dfba870e 100644
--- a/llvm/test/Bitcode/compatibility-3.6.ll
+++ b/llvm/test/Bitcode/compatibility-3.6.ll
@@ -404,7 +404,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll
index e8260741373dd..82fc99055357a 100644
--- a/llvm/test/Bitcode/compatibility-3.7.ll
+++ b/llvm/test/Bitcode/compatibility-3.7.ll
@@ -410,7 +410,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll
index c8d8261915595..2e70a380d10ed 100644
--- a/llvm/test/Bitcode/compatibility-3.8.ll
+++ b/llvm/test/Bitcode/compatibility-3.8.ll
@@ -435,7 +435,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll
index f031e54a9f211..7c84daa7d3c44 100644
--- a/llvm/test/Bitcode/compatibility-3.9.ll
+++ b/llvm/test/Bitcode/compatibility-3.9.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll
index 8020df45da6a5..9e34d48c95f76 100644
--- a/llvm/test/Bitcode/compatibility-4.0.ll
+++ b/llvm/test/Bitcode/compatibility-4.0.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll
index 3b32d668af557..a4b3fca82b7bf 100644
--- a/llvm/test/Bitcode/compatibility-5.0.ll
+++ b/llvm/test/Bitcode/compatibility-5.0.ll
@@ -508,7 +508,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll
index 3b08c4a72311d..097e19ecb7ff8 100644
--- a/llvm/test/Bitcode/compatibility-6.0.ll
+++ b/llvm/test/Bitcode/compatibility-6.0.ll
@@ -515,7 +515,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 991bde69b61d7..06b81fa14a819 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -529,7 +529,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
@@ -1735,15 +1735,6 @@ define i8** @constexpr() {
 declare void @llvm.test.immarg.intrinsic(i32 immarg)
 ; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
 
-; byval attribute with type
-%named_type = type [8 x i8]
-declare void @byval_type(i32* byval(i32) align 2)
-declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
-declare void @byval_named_type(%named_type* byval(%named_type))
-; CHECK: declare void @byval_type(i32* byval(i32) align 2)
-; CHECK: declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
-; CHECK: declare void @byval_named_type([8 x i8]* byval([8 x i8]))
-
 ; CHECK: attributes #0 = { alignstack=4 }
 ; CHECK: attributes #1 = { alignstack=8 }
 ; CHECK: attributes #2 = { alwaysinline }
diff --git a/llvm/test/Bitcode/highLevelStructure.3.2.ll b/llvm/test/Bitcode/highLevelStructure.3.2.ll
index 91d6ee4ac2574..749b157cffc32 100644
--- a/llvm/test/Bitcode/highLevelStructure.3.2.ll
+++ b/llvm/test/Bitcode/highLevelStructure.3.2.ll
@@ -41,7 +41,7 @@ declare void @ParamAttr3(i8* sret)
 declare void @ParamAttr4(i8 signext)
 ; CHECK: declare void @ParamAttr5(i8* inreg)
 declare void @ParamAttr5(i8* inreg)
-; CHECK: declare void @ParamAttr6(i8* byval(i8))
+; CHECK: declare void @ParamAttr6(i8* byval)
 declare void @ParamAttr6(i8* byval)
 ; CHECK: declare void @ParamAttr7(i8* noalias)
 declare void @ParamAttr7(i8* noalias)
@@ -51,7 +51,7 @@ declare void @ParamAttr8(i8* nocapture)
 declare void @ParamAttr9(i8* nest noalias nocapture)
 ; CHECK: declare void @ParamAttr10{{[(i8* sret noalias nocapture) | (i8* noalias nocapture sret)]}}
 declare void @ParamAttr10(i8* sret noalias nocapture)
-;CHECK: declare void @ParamAttr11{{[(i8* byval(i8) noalias nocapture) | (i8* noalias nocapture byval(i8))]}}
+;CHECK: declare void @ParamAttr11{{[(i8* byval noalias nocapture) | (i8* noalias nocapture byval)]}}
 declare void @ParamAttr11(i8* byval noalias nocapture)
 ;CHECK: declare void @ParamAttr12{{[(i8* inreg noalias nocapture) | (i8* noalias nocapture inreg)]}}
 declare void @ParamAttr12(i8* inreg noalias nocapture)
diff --git a/llvm/test/CodeGen/AArch64/byval-type.ll b/llvm/test/CodeGen/AArch64/byval-type.ll
deleted file mode 100644
index 0c2e2dc471dd9..0000000000000
--- a/llvm/test/CodeGen/AArch64/byval-type.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
-
-define i8 @byval_match(i8* byval(i8) align 1, i8* byval %ptr) {
-; CHECK-LABEL: byval_match:
-; CHECK: ldrb w0, [sp, #8]
-  %res = load i8, i8* %ptr
-  ret i8 %res
-}
-
-define void @caller_match(i8* %p0, i8* %p1) {
-; CHECK-LABEL: caller_match:
-; CHECK: ldrb [[P1:w[0-9]+]], [x1]
-; CHECK: strb [[P1]], [sp, #8]
-; CHECK: ldrb [[P0:w[0-9]+]], [x0]
-; CHECK: strb [[P0]], [sp]
-; CHECK: bl byval_match
-  call i8 @byval_match(i8* byval(i8) align 1 %p0, i8* byval %p1)
-  ret void
-}
-
-define i8 @byval_large([3 x i64]* byval([3 x i64]) align 8, i8* byval %ptr) {
-; CHECK-LABEL: byval_large:
-; CHECK: ldrb w0, [sp, #24]
-  %res = load i8, i8* %ptr
-  ret i8 %res
-}
-
-define void @caller_large([3 x i64]* %p0, i8* %p1) {
-; CHECK-LABEL: caller_large:
-; CHECK: ldr [[P0HI:x[0-9]+]], [x0, #16]
-; CHECK: ldr [[P0LO:q[0-9]+]], [x0]
-; CHECK: str [[P0HI]], [sp, #16]
-; CHECK: str [[P0LO]], [sp]
-; CHECK: bl byval_large
-  call i8 @byval_large([3 x i64]* byval([3 x i64]) align 8 %p0, i8* byval %p1)
-  ret void
-}
diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll
index be495f1bcd379..8aafe7943f4ee 100644
--- a/llvm/test/Transforms/Inline/byval-tail-call.ll
+++ b/llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -56,7 +56,7 @@ define void @foobar(i32* %x) {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
+; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
 ; CHECK: ret void
   tail call void @bar2(i32* byval %x)
   ret void
@@ -67,7 +67,7 @@ define void @barfoo() {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
+; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
 ; CHECK: ret void
   %x = alloca i32
   tail call void @bar2(i32* byval %x)
diff --git a/llvm/unittests/IR/AttributesTest.cpp b/llvm/unittests/IR/AttributesTest.cpp
index 06da35aca5735..e0be2343a1445 100644
--- a/llvm/unittests/IR/AttributesTest.cpp
+++ b/llvm/unittests/IR/AttributesTest.cpp
@@ -8,7 +8,6 @@
 
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
@@ -41,10 +40,6 @@ TEST(Attributes, Ordering) {
   EXPECT_TRUE(Align4 < Deref5);
   EXPECT_TRUE(Align5 < Deref4);
 
-  Attribute ByVal = Attribute::get(C, Attribute::ByVal, Type::getInt32Ty(C));
-  EXPECT_FALSE(ByVal < Attribute::get(C, Attribute::ZExt));
-  EXPECT_TRUE(ByVal < Align4);
-
   AttributeList ASs[] = {AttributeList::get(C, 2, Attribute::ZExt),
                          AttributeList::get(C, 1, Attribute::SExt)};
 
@@ -171,19 +166,4 @@ TEST(Attributes, OverflowGet) {
   EXPECT_EQ(2U, AL.getNumAttrSets());
 }
 
-TEST(Attributes, StringRepresentation) {
-  LLVMContext C;
-  StructType *Ty = StructType::create(Type::getInt32Ty(C), "mystruct");
-
-  // Insufficiently careful printing can result in byval(%mystruct = { i32 })
-  Attribute A = Attribute::getWithByValType(C, Ty);
-  EXPECT_EQ(A.getAsString(), "byval(%mystruct)");
-
-  A = Attribute::getWithByValType(C, nullptr);
-  EXPECT_EQ(A.getAsString(), "byval");
-
-  A = Attribute::getWithByValType(C, Type::getInt32Ty(C));
-  EXPECT_EQ(A.getAsString(), "byval(i32)");
-}
-
 } // end anonymous namespace

From 74c5fac3bba77d796132c313b77c808b2165b75d Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 29 May 2019 20:47:27 +0000
Subject: [PATCH 0555/1176] [analyzer] Remove EndPath function as it is dead
 code

Summary: -

Reviewers: george.karpenkov

Reviewed By: george.karpenkov

Subscribers: baloghadamsoftware, cfe-commits, xazax.hun, szepet, a.sidorin,
             mikhail.ramalho, Szelethus, donat.nagy, dkrupp

Tags: #clang

Differential Revision: https://reviews.llvm.org/D53720

llvm-svn: 362030
---
 .../StaticAnalyzer/Core/PathSensitive/ConstraintManager.h     | 2 --
 .../clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h    | 4 ----
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp                  | 1 -
 3 files changed, 7 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
index 1baf8c57de86f..f85c373791585 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
@@ -166,8 +166,6 @@ class ConstraintManager {
                          const char *NL, unsigned int Space,
                          bool IsDot) const = 0;
 
-  virtual void EndPath(ProgramStateRef state) {}
-
   /// Convenience method to query the state to see if a symbol is null or
   /// not null, or if neither assumption can be made.
   ConditionTruthVal isNull(ProgramStateRef State, SymbolRef Sym) {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index cddae9d02e402..d38058f9af56d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -634,10 +634,6 @@ class ProgramStateManager {
 
     return ProgramStateTrait<T>::MakeContext(p);
   }
-
-  void EndPath(ProgramStateRef St) {
-    ConstraintMgr->EndPath(St);
-  }
 };
 
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 9a9b12dc60027..ad3c5d206a258 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2316,7 +2316,6 @@ void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
                                        Pred->getStackFrame()->getParent()));
 
   PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
-  StateMgr.EndPath(Pred->getState());
 
   ExplodedNodeSet Dst;
   if (Pred->getLocationContext()->inTopFrame()) {

From 79b3ea701c8be993e11ea211ad35be2d178f5a08 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 29 May 2019 20:47:59 +0000
Subject: [PATCH 0556/1176] LoopVersioningLICM: Respect convergent and
 noduplicate

llvm-svn: 362031
---
 .../Transforms/Scalar/LoopVersioningLICM.cpp  | 10 +-
 .../LoopVersioningLICM/convergent.ll          | 97 +++++++++++++++++++
 2 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVersioningLICM/convergent.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 87898df5ec15d..896dd8bcb9229 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -356,14 +356,22 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
 /// 1) Check all load store in loop body are non atomic & non volatile.
 /// 2) Check function call safety, by ensuring its not accessing memory.
 /// 3) Loop body shouldn't have any may throw instruction.
+/// 4) Loop body shouldn't have any convergent or noduplicate instructions.
 bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
   assert(I != nullptr && "Null instruction found!");
   // Check function call safety
-  if (auto *Call = dyn_cast<CallBase>(I))
+  if (auto *Call = dyn_cast<CallBase>(I)) {
+    if (Call->isConvergent() || Call->cannotDuplicate()) {
+      LLVM_DEBUG(dbgs() << "    Convergent call site found.\n");
+      return false;
+    }
+
     if (!AA->doesNotAccessMemory(Call)) {
       LLVM_DEBUG(dbgs() << "    Unsafe call site found.\n");
       return false;
     }
+  }
+
   // Avoid loops with possiblity of throw
   if (I->mayThrow()) {
     LLVM_DEBUG(dbgs() << "    May throw instruction found in loop body\n");
diff --git a/llvm/test/Transforms/LoopVersioningLICM/convergent.ll b/llvm/test/Transforms/LoopVersioningLICM/convergent.ll
new file mode 100644
index 0000000000000..55209be4f9c0a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioningLICM/convergent.ll
@@ -0,0 +1,97 @@
+; RUN: opt -S -loop-versioning-licm -licm-versioning-invariant-threshold=0 %s | FileCheck %s
+
+; Make sure the convergent attribute is respected, and no condition is
+; introduced
+
+; CHECK-LABEL: @test_convergent(
+; CHECK: call void @llvm.convergent()
+; CHECK-NOT: call void @llvm.convergent()
+define i32 @test_convergent(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #1 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %entry ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %entry ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+  store i32 %add, i32* %arrayidx, align 4
+  %load.arrayidx7 = load i32, i32* %arrayidx7, align 4
+  call void @llvm.convergent()
+  %add8 = add nsw i32 %load.arrayidx7, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11, %entry
+  ret i32 0
+}
+
+; CHECK-LABEL: @test_noduplicate(
+; CHECK: call void @llvm.noduplicate()
+; CHECK-NOT: call void @llvm.noduplicate()
+define i32 @test_noduplicate(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #2 {
+entry:
+  %cmp14 = icmp eq i32 %itr, 0
+  br i1 %cmp14, label %for.end13, label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.inc11
+  %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %entry ]
+  %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %entry ]
+  %cmp212 = icmp ult i32 %j.016, %itr
+  br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  %add = add i32 %i.015, %itr
+  %idxprom6 = zext i32 %i.015 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+  %idxprom = zext i32 %j.113 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+  store i32 %add, i32* %arrayidx, align 4
+  %load.arrayidx7 = load i32, i32* %arrayidx7, align 4
+  call void @llvm.noduplicate()
+  %add8 = add nsw i32 %load.arrayidx7, %add
+  store i32 %add8, i32* %arrayidx7, align 4
+  %inc = add nuw i32 %j.113, 1
+  %cmp2 = icmp ult i32 %inc, %itr
+  br i1 %cmp2, label %for.body3, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body3, %for.cond1.preheader
+  %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.body3 ]
+  %inc12 = add nuw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc12, %itr
+  br i1 %cmp, label %for.cond1.preheader, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11, %entry
+  ret i32 0
+}
+
+declare void @llvm.convergent() #1
+declare void @llvm.noduplicate() #2
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { norecurse nounwind readnone convergent }
+attributes #2 = { norecurse nounwind readnone noduplicate }

From 7d3e97fbe6dc98619dce3d33f2a83d391b8e30a8 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Wed, 29 May 2019 21:07:53 +0000
Subject: [PATCH 0557/1176] [Target] Sink some asserts into
 Process::GetLanguageRuntime

llvm-svn: 362032
---
 lldb/source/Target/Process.cpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index c88ef0dcf8184..66204da2c59b0 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1574,17 +1574,28 @@ LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language,
   if (m_finalizing)
     return nullptr;
 
+  LanguageRuntime *runtime = nullptr;
+
   std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
   LanguageRuntimeCollection::iterator pos;
   pos = m_language_runtimes.find(language);
-  if (pos == m_language_runtimes.end() || (retry_if_null && !(*pos).second)) {
+  if (pos == m_language_runtimes.end() || (retry_if_null && !pos->second)) {
     lldb::LanguageRuntimeSP runtime_sp(
         LanguageRuntime::FindPlugin(this, language));
 
     m_language_runtimes[language] = runtime_sp;
-    return runtime_sp.get();
+    runtime = runtime_sp.get();
   } else
-    return (*pos).second.get();
+    runtime = pos->second.get();
+
+  if (runtime)
+    // It's possible that a language runtime can support multiple LanguageTypes,
+    // for example, CPPLanguageRuntime will support eLanguageTypeC_plus_plus,
+    // eLanguageTypeC_plus_plus_03, etc. Because of this, we should get the
+    // primary language type and make sure that our runtime supports it.
+    assert(runtime->GetLanguageType() == Language::GetPrimaryLanguage(language));
+
+  return runtime;
 }
 
 CPPLanguageRuntime *Process::GetCPPLanguageRuntime(bool retry_if_null) {
@@ -1594,7 +1605,6 @@ CPPLanguageRuntime *Process::GetCPPLanguageRuntime(bool retry_if_null) {
   if (!runtime)
     return nullptr;
 
-  assert(runtime->GetLanguageType() == eLanguageTypeC_plus_plus);
   return static_cast<CPPLanguageRuntime *>(runtime);
 }
 
@@ -1605,7 +1615,6 @@ ObjCLanguageRuntime *Process::GetObjCLanguageRuntime(bool retry_if_null) {
   if (!runtime)
     return nullptr;
 
-  assert(runtime->GetLanguageType() == eLanguageTypeObjC);
   return static_cast<ObjCLanguageRuntime *>(runtime);
 }
 

From 46631dffc4b665bc2b8f473ab54c1b40cf87df76 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Wed, 29 May 2019 21:22:54 +0000
Subject: [PATCH 0558/1176] Fix Xcode project lldb unit test target so it
 compiles.

llvm-svn: 362033
---
 lldb/lldb.xcodeproj/project.pbxproj | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lldb/lldb.xcodeproj/project.pbxproj b/lldb/lldb.xcodeproj/project.pbxproj
index e6374e7e5735f..7c1e91eed47de 100644
--- a/lldb/lldb.xcodeproj/project.pbxproj
+++ b/lldb/lldb.xcodeproj/project.pbxproj
@@ -900,6 +900,7 @@
 		26D7E45D13D5E30A007FD12B /* SocketAddress.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26D7E45C13D5E30A007FD12B /* SocketAddress.cpp */; };
 		23CB15451D66DA9300EDDDE1 /* SocketAddressTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2321F9391BDD332400BA9A93 /* SocketAddressTest.cpp */; };
 		23CB153B1D66DA9300EDDDE1 /* SocketTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2321F93A1BDD332400BA9A93 /* SocketTest.cpp */; };
+		26FCE61C229F00F000D125BD /* SocketTestUtilities.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26FCE61B229F00F000D125BD /* SocketTestUtilities.cpp */; };
 		26603879211CA90F00329572 /* SourceBreakpoint.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26603870211CA90D00329572 /* SourceBreakpoint.cpp */; };
 		2689004C13353E0400698AC0 /* SourceManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7E8F10F1B85900F91463 /* SourceManager.cpp */; };
 		268900F313353E6F00698AC0 /* StackFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7F3810F1B90C00F91463 /* StackFrame.cpp */; };
@@ -2964,6 +2965,7 @@
 		26D7E45B13D5E2F9007FD12B /* SocketAddress.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = SocketAddress.h; path = include/lldb/Host/SocketAddress.h; sourceTree = "<group>"; };
 		2321F9391BDD332400BA9A93 /* SocketAddressTest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = SocketAddressTest.cpp; sourceTree = "<group>"; };
 		2321F93A1BDD332400BA9A93 /* SocketTest.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = SocketTest.cpp; sourceTree = "<group>"; };
+		26FCE61B229F00F000D125BD /* SocketTestUtilities.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SocketTestUtilities.cpp; sourceTree = "<group>"; };
 		26603870211CA90D00329572 /* SourceBreakpoint.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SourceBreakpoint.cpp; path = "tools/lldb-vscode/SourceBreakpoint.cpp"; sourceTree = "<group>"; };
 		2660386D211CA90C00329572 /* SourceBreakpoint.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SourceBreakpoint.h; path = "tools/lldb-vscode/SourceBreakpoint.h"; sourceTree = "<group>"; };
 		26BC7E8F10F1B85900F91463 /* SourceManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SourceManager.cpp; path = source/Core/SourceManager.cpp; sourceTree = "<group>"; };
@@ -3612,6 +3614,7 @@
 				2321F9381BDD332400BA9A93 /* CMakeLists.txt */,
 				2321F9391BDD332400BA9A93 /* SocketAddressTest.cpp */,
 				2321F93A1BDD332400BA9A93 /* SocketTest.cpp */,
+				26FCE61B229F00F000D125BD /* SocketTestUtilities.cpp */,
 				2321F9451BDD346100BA9A93 /* TaskPoolTest.cpp */,
 			);
 			path = Host;
@@ -7836,6 +7839,7 @@
 				23CB15411D66DA9300EDDDE1 /* StringExtractorTest.cpp in Sources */,
 				9A2057031F3A605200F6C293 /* VASprintfTest.cpp in Sources */,
 				4C639ED121FA684900A7B957 /* OptionsWithRawTest.cpp in Sources */,
+				26FCE61C229F00F000D125BD /* SocketTestUtilities.cpp in Sources */,
 				4C639ED321FA684900A7B957 /* EventTest.cpp in Sources */,
 				9A18903C1F47D5E600394BCA /* TestUtilities.cpp in Sources */,
 				4C639ECD21FA684900A7B957 /* StreamTest.cpp in Sources */,

From f3c89b1a6be08ad90b30306554e6724467debf16 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Wed, 29 May 2019 21:23:30 +0000
Subject: [PATCH 0559/1176] [ObjC] Fix encoding of ObjC pointer types that are
 pointers to typedefs

clang was encoding pointers to typedefs as if they were pointers to
structs because that is apparently what gcc is doing.

For example:

```
@class Class1;

typedef NSArray<Class1 *> MyArray;

void foo1(void) {
  const char *s0 = @encode(MyArray *); // "^{NSArray=#}"
  const char *s1 = @encode(NSArray<Class1 *> *); // "@"
}
```

This commit removes the code that was there to make clang compatible
with gcc and make clang emit the correct encoding for ObjC pointers,
which is "@".

rdar://problem/50563529

Differential Revision: https://reviews.llvm.org/D61974

llvm-svn: 362034
---
 clang/include/clang/AST/ASTContext.h   |  4 +--
 clang/lib/AST/ASTContext.cpp           | 39 ++------------------------
 clang/test/CodeGenObjC/encode-test-6.m |  4 +--
 clang/test/CodeGenObjC/encode-test.m   |  2 +-
 clang/test/CodeGenObjCXX/encode.mm     |  2 +-
 5 files changed, 8 insertions(+), 43 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 6856cb6585f82..93d93841b8391 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2897,7 +2897,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
   V(IsStructField, 4)                                                          \
   V(EncodeBlockParameters, 5)                                                  \
   V(EncodeClassNames, 6)                                                       \
-  V(EncodePointerToObjCTypedef, 7)
 
 #define V(N,I) ObjCEncOptions& set##N() { Bits |= 1 << I; return *this; }
 OPT_LIST(V)
@@ -2916,8 +2915,7 @@ OPT_LIST(V)
     LLVM_NODISCARD ObjCEncOptions forComponentType() const {
       ObjCEncOptions Mask = ObjCEncOptions()
                                 .setIsOutermostType()
-                                .setIsStructField()
-                                .setEncodePointerToObjCTypedef();
+                                .setIsStructField();
       return Bits & ~Mask.Bits;
     }
   };
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index c3a09723ef189..87ecb5a57b5bc 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6927,13 +6927,10 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
           getObjCEncodingForTypeImpl(Field->getType(), S,
                                      ObjCEncOptions().setExpandStructures(),
                                      Field);
-        else {
-          ObjCEncOptions NewOptions = ObjCEncOptions().setExpandStructures();
-          if (Options.EncodePointerToObjCTypedef())
-            NewOptions.setEncodePointerToObjCTypedef();
-          getObjCEncodingForTypeImpl(Field->getType(), S, NewOptions, FD,
+        else
+          getObjCEncodingForTypeImpl(Field->getType(), S,
+                                     ObjCEncOptions().setExpandStructures(), FD,
                                      NotEncodedT);
-        }
       }
     }
     S += '}';
@@ -6976,36 +6973,6 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
       return;
     }
 
-    QualType PointeeTy = OPT->getPointeeType();
-    if (!Options.EncodingProperty() &&
-        isa<TypedefType>(PointeeTy.getTypePtr()) &&
-        !Options.EncodePointerToObjCTypedef()) {
-      // Another historical/compatibility reason.
-      // We encode the underlying type which comes out as
-      // {...};
-      S += '^';
-      if (FD && OPT->getInterfaceDecl()) {
-        // Prevent recursive encoding of fields in some rare cases.
-        ObjCInterfaceDecl *OI = OPT->getInterfaceDecl();
-        SmallVector<const ObjCIvarDecl*, 32> Ivars;
-        DeepCollectObjCIvars(OI, true, Ivars);
-        for (unsigned i = 0, e = Ivars.size(); i != e; ++i) {
-          if (Ivars[i] == FD) {
-            S += '{';
-            S += OI->getObjCRuntimeNameAsString();
-            S += '}';
-            return;
-          }
-        }
-      }
-      ObjCEncOptions NewOptions =
-          ObjCEncOptions().setEncodePointerToObjCTypedef();
-      if (Options.ExpandPointedToStructures())
-        NewOptions.setExpandStructures();
-      getObjCEncodingForTypeImpl(PointeeTy, S, NewOptions, /*Field=*/nullptr);
-      return;
-    }
-
     S += '@';
     if (OPT->getInterfaceDecl() &&
         (FD || Options.EncodingProperty() || Options.EncodeClassNames())) {
diff --git a/clang/test/CodeGenObjC/encode-test-6.m b/clang/test/CodeGenObjC/encode-test-6.m
index 583ba5ab4de6f..99f53a5059edd 100644
--- a/clang/test/CodeGenObjC/encode-test-6.m
+++ b/clang/test/CodeGenObjC/encode-test-6.m
@@ -34,7 +34,7 @@ @implementation BABugExample
 @synthesize property = _property;
 @end
 
-// CHECK: private unnamed_addr constant [24 x i8] c"^{BABugExample=@}16
+// CHECK: private unnamed_addr constant [8 x i8] c"@16
 
 // rdar://14408244
 @class SCNCamera;
@@ -52,7 +52,7 @@ @implementation SCNCamera
     C3DCameraStorage _storage;
 }
 @end
-// CHECK: private unnamed_addr constant [39 x i8] c"{?=\22presentationInstance\22^{SCNCamera}}\00"
+// CHECK: private unnamed_addr constant [39 x i8] c"{?=\22presentationInstance\22@\22SCNCamera\22}\00"
 
 // rdar://16655340
 int i;
diff --git a/clang/test/CodeGenObjC/encode-test.m b/clang/test/CodeGenObjC/encode-test.m
index 113dbef95f39c..bf5340c5c5294 100644
--- a/clang/test/CodeGenObjC/encode-test.m
+++ b/clang/test/CodeGenObjC/encode-test.m
@@ -107,7 +107,7 @@ int main()
 // CHECK: @g4 = constant [6 x i8] c"{S=i}\00"
 const char g4[] = @encode(const struct S);
 
-// CHECK: @g5 = constant [12 x i8] c"^{Object=#}\00"
+// CHECK: @g5 = constant [2 x i8] c"@\00"
 const char g5[] = @encode(MyObj * const);
 
 ////
diff --git a/clang/test/CodeGenObjCXX/encode.mm b/clang/test/CodeGenObjCXX/encode.mm
index e4c7618f50c26..7bc64dafb476b 100644
--- a/clang/test/CodeGenObjCXX/encode.mm
+++ b/clang/test/CodeGenObjCXX/encode.mm
@@ -242,6 +242,6 @@ @implementation N
 @end
 
 const char *expand_struct() {
-  // CHECK: @{{.*}} = private unnamed_addr constant [16 x i8] c"{N={S<N>=^{N}}}\00"
+  // CHECK: @{{.*}} = private unnamed_addr constant [13 x i8] c"{N={S<N>=@}}\00"
   return @encode(N);
 }

From c8f2efe065c2c6b5bc3f071b6892006f2dc556b1 Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Wed, 29 May 2019 21:25:15 +0000
Subject: [PATCH 0560/1176] Use correct format specifier to silence -Wformat
 warning.

llvm-svn: 362035
---
 lldb/source/Utility/Timer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp
index 7492045bc3262..6b46d8ba73642 100644
--- a/lldb/source/Utility/Timer.cpp
+++ b/lldb/source/Utility/Timer.cpp
@@ -144,7 +144,8 @@ void Timer::DumpCategoryTimes(Stream *s) {
   llvm::sort(sorted.begin(), sorted.end(), CategoryMapIteratorSortCriterion);
 
   for (const auto &stats : sorted)
-    s->Printf("%.9f sec (total: %.3fs; child: %.3fs; count: %llu) for %s\n",
+    s->Printf("%.9f sec (total: %.3fs; child: %.3fs; count: %" PRIu64
+              ") for %s\n",
               stats.nanos / 1000000000., stats.nanos_total / 1000000000.,
               (stats.nanos_total - stats.nanos) / 1000000000., stats.count,
               stats.name);

From 86bad3f924b33d7985f34ed6b6aefdd86f204f5c Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 29 May 2019 21:26:25 +0000
Subject: [PATCH 0561/1176] [llvm-pdbutil] Dump inline call site line table
 annotations

This ports and improves on some existing llvm-readobj -codeview dumping
functionality that llvm-pdbutil lacked.

Helpful for comparing inline line tables between MSVC and clang.

llvm-svn: 362037
---
 .../llvm/DebugInfo/CodeView/SymbolRecord.h    | 44 +++++-----
 llvm/test/MC/COFF/cv-inline-linetable.s       | 39 ++++++++-
 .../llvm-pdbutil/MinimalSymbolDumper.cpp      | 82 +++++++++++++++++--
 3 files changed, 135 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index b98ada221a4a7..5e9a7432b9b6f 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -155,15 +156,19 @@ class CallerSym : public SymbolRecord {
   uint32_t RecordOffset;
 };
 
-struct BinaryAnnotationIterator {
-  struct AnnotationData {
-    BinaryAnnotationsOpCode OpCode;
-    StringRef Name;
-    uint32_t U1;
-    uint32_t U2;
-    int32_t S1;
-  };
+struct DecodedAnnotation {
+  StringRef Name;
+  ArrayRef<uint8_t> Bytes;
+  BinaryAnnotationsOpCode OpCode;
+  uint32_t U1 = 0;
+  uint32_t U2 = 0;
+  int32_t S1 = 0;
+};
 
+struct BinaryAnnotationIterator
+    : public iterator_facade_base<BinaryAnnotationIterator,
+                                  std::forward_iterator_tag,
+                                  DecodedAnnotation> {
   BinaryAnnotationIterator() = default;
   BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
   BinaryAnnotationIterator(const BinaryAnnotationIterator &Other)
@@ -173,10 +178,6 @@ struct BinaryAnnotationIterator {
     return Data == Other.Data;
   }
 
-  bool operator!=(const BinaryAnnotationIterator &Other) const {
-    return !(*this == Other);
-  }
-
   BinaryAnnotationIterator &operator=(const BinaryAnnotationIterator Other) {
     Data = Other.Data;
     return *this;
@@ -193,13 +194,7 @@ struct BinaryAnnotationIterator {
     return *this;
   }
 
-  BinaryAnnotationIterator operator++(int) {
-    BinaryAnnotationIterator Orig(*this);
-    ++(*this);
-    return Orig;
-  }
-
-  const AnnotationData &operator*() {
+  const DecodedAnnotation &operator*() {
     ParseCurrentAnnotation();
     return Current.getValue();
   }
@@ -241,17 +236,17 @@ struct BinaryAnnotationIterator {
              (ThirdByte << 8) | FourthByte;
 
     return -1;
-  };
+  }
 
   static int32_t DecodeSignedOperand(uint32_t Operand) {
     if (Operand & 1)
       return -(Operand >> 1);
     return Operand >> 1;
-  };
+  }
 
   static int32_t DecodeSignedOperand(ArrayRef<uint8_t> &Annotations) {
     return DecodeSignedOperand(GetCompressedAnnotation(Annotations));
-  };
+  }
 
   bool ParseCurrentAnnotation() {
     if (Current.hasValue())
@@ -259,7 +254,7 @@ struct BinaryAnnotationIterator {
 
     Next = Data;
     uint32_t Op = GetCompressedAnnotation(Next);
-    AnnotationData Result;
+    DecodedAnnotation Result;
     Result.OpCode = static_cast<BinaryAnnotationsOpCode>(Op);
     switch (Result.OpCode) {
     case BinaryAnnotationsOpCode::Invalid:
@@ -324,11 +319,12 @@ struct BinaryAnnotationIterator {
       break;
     }
     }
+    Result.Bytes = Data.take_front(Data.size() - Next.size());
     Current = Result;
     return true;
   }
 
-  Optional<AnnotationData> Current;
+  Optional<DecodedAnnotation> Current;
   ArrayRef<uint8_t> Data;
   ArrayRef<uint8_t> Next;
 };
diff --git a/llvm/test/MC/COFF/cv-inline-linetable.s b/llvm/test/MC/COFF/cv-inline-linetable.s
index 460c2e9ba2a8c..f226d6fe21f44 100644
--- a/llvm/test/MC/COFF/cv-inline-linetable.s
+++ b/llvm/test/MC/COFF/cv-inline-linetable.s
@@ -1,4 +1,7 @@
-# RUN: llvm-mc -triple=i686-pc-win32 -filetype=obj < %s | llvm-readobj --codeview | FileCheck %s
+# RUN: llvm-mc -triple=i686-pc-win32 -filetype=obj %s -o %t.o
+# RUN: llvm-readobj --codeview %t.o | FileCheck %s
+# RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=ASM
+# RUN: llvm-pdbutil dump -symbols %t.o | FileCheck %s --check-prefix=PDB
 	.text
 	.def	 @feat.00;
 	.scl	3;
@@ -43,6 +46,40 @@ Lfunc_begin0:
 	retl
 Lfunc_end0:
 
+# Check the disassembly so we have accurate instruction offsets in hex.
+# ASM-LABEL: ?baz@@YAXXZ:
+# ASM-NEXT:       0: {{.*}} pushl   %eax
+# ASM-NEXT:       1: {{.*}} addl    $6, 0
+# ASM-NEXT:       8: {{.*}} addl    $4, 0
+# ASM-NEXT:       f: {{.*}} movl    $1, (%esp)
+# ASM-NEXT:      16: {{.*}} leal    (%esp), %eax
+# ASM-NEXT:      19: {{.*}} addl    %eax, 0
+# ASM-NEXT:      1f: {{.*}} addl    $2, 0
+# ASM-NEXT:      26: {{.*}} addl    $3, 0
+# ASM-NEXT:      2d: {{.*}} addl    $5, 0
+# ASM-NEXT:      34: {{.*}} addl    $7, 0
+# ASM-NEXT:      3b: {{.*}} popl    %eax
+# ASM-NEXT:      3c: {{.*}} retl
+
+# PDB: S_GPROC32_ID {{.*}} `baz`
+# PDB: S_INLINESITE
+# PDB-NEXT: inlinee = 0x1003 (bar), parent = 0, end = 0
+# PDB-NEXT:   0B08      code 0x8 (+0x8) line 0 (-0)
+# PDB-NEXT:   0B27      code 0xF (+0x7) line 1 (+1)
+# PDB-NEXT:   0602      line 2 (+1)
+# PDB-NEXT:   031E      code 0x2D (+0x1E)
+# PDB-NEXT:   0407      code end 0x34 (+0x7)
+# PDB: S_INLINESITE
+# PDB-NEXT: inlinee = 0x1004 (foo), parent = 0, end = 0
+# PDB-NEXT:    0B0F      code 0xF (+0xF) line 0 (-0)
+# PDB-NEXT:    0B2A      code 0x19 (+0xA) line 1 (+1)
+# PDB-NEXT:    0B26      code 0x1F (+0x6) line 2 (+1)
+# PDB-NEXT:    0B27      code 0x26 (+0x7) line 3 (+1)
+# PDB-NEXT:    0407      code end 0x2D (+0x7)
+# PEB: S_INLINESITE_END
+# PEB: S_INLINESITE_END
+# PEB: S_PROC_ID_END
+
 	.section	.debug$T,"dr"
 	.long	4
 	.short	6
diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 50d70c070d2c3..d3c3f3da9c069 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -650,13 +650,85 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
 
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
   AutoIndent Indent(P, 7);
-  auto Bytes = makeArrayRef(IS.AnnotationData);
-  StringRef Annotations(reinterpret_cast<const char *>(Bytes.begin()),
-                        Bytes.size());
-
   P.formatLine("inlinee = {0}, parent = {1}, end = {2}", idIndex(IS.Inlinee),
                IS.Parent, IS.End);
-  P.formatLine("annotations = {0}", toHex(Annotations));
+
+  // Break down the annotation byte code and calculate code and line offsets.
+  // FIXME: It would be helpful if we could look up the initial file and inlinee
+  // lines offset using the inlinee index above.
+  uint32_t CodeOffset = 0;
+  int32_t LineOffset = 0;
+  for (auto &Annot : IS.annotations()) {
+    P.formatLine("  {0}", fmt_align(toHex(Annot.Bytes), AlignStyle::Left, 9));
+
+    auto formatCodeOffset = [&](uint32_t Delta) {
+      CodeOffset += Delta;
+      P.format(" code 0x{0} (+0x{1})", utohexstr(CodeOffset), utohexstr(Delta));
+    };
+    auto formatCodeLength = [&](uint32_t Length) {
+      // Notably, changing the code length does not affect the code offset.
+      P.format(" code end 0x{0} (+0x{1})", utohexstr(CodeOffset + Length),
+               utohexstr(Length));
+    };
+    auto formatLineOffset = [&](int32_t Delta) {
+      LineOffset += Delta;
+      char Sign = Delta > 0 ? '+' : '-';
+      P.format(" line {0} ({1}{2})", LineOffset, Sign, std::abs(Delta));
+    };
+
+    // Use the opcode to interpret the integer values.
+    switch (Annot.OpCode) {
+    case BinaryAnnotationsOpCode::Invalid:
+      break;
+    case BinaryAnnotationsOpCode::CodeOffset:
+    case BinaryAnnotationsOpCode::ChangeCodeOffset:
+      formatCodeOffset(Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeLineOffset:
+      formatLineOffset(Annot.S1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeLength:
+      formatCodeLength(Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+      formatCodeOffset(Annot.U1);
+      formatLineOffset(Annot.S1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+      formatCodeOffset(Annot.U2);
+      formatCodeLength(Annot.U1);
+      break;
+
+    case BinaryAnnotationsOpCode::ChangeFile: {
+      uint32_t FileOffset = Annot.U1;
+      StringRef Filename = "<unknown>";
+      if (SymGroup) {
+        if (Expected<StringRef> MaybeFile =
+                SymGroup->getNameFromStringTable(FileOffset))
+          Filename = *MaybeFile;
+        else
+          return MaybeFile.takeError();
+      }
+      P.format(" setfile {0} 0x{1}", utohexstr(FileOffset));
+      break;
+    }
+
+    // The rest of these are hard to convince MSVC to emit, so they are not as
+    // well understood.
+    case BinaryAnnotationsOpCode::ChangeCodeOffsetBase:
+      formatCodeOffset(Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeLineEndDelta:
+    case BinaryAnnotationsOpCode::ChangeRangeKind:
+    case BinaryAnnotationsOpCode::ChangeColumnStart:
+    case BinaryAnnotationsOpCode::ChangeColumnEnd:
+      P.format(" {0} {1}", Annot.Name, Annot.U1);
+      break;
+    case BinaryAnnotationsOpCode::ChangeColumnEndDelta:
+      P.format(" {0} {1}", Annot.Name, Annot.S1);
+      break;
+    }
+  }
   return Error::success();
 }
 

From 325003be02bff4291afb7c635cb0e75d7b3d1ba2 Mon Sep 17 00:00:00 2001
From: Amy Huang <akhuang@google.com>
Date: Wed, 29 May 2019 21:45:34 +0000
Subject: [PATCH 0562/1176] CodeView - add static data members to global
 variable debug info.

Summary:
Add static data members to IR debug info's list of global variables
so that they are emitted as S_CONSTANT records.

Related to https://bugs.llvm.org/show_bug.cgi?id=41615.

Reviewers: rnk

Subscribers: aprantl, cfe-commits, llvm-commits, thakis

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62167

llvm-svn: 362038
---
 clang/lib/CodeGen/CGDebugInfo.cpp             | 18 ++++-
 .../CodeGenCXX/debug-info-static-member.cpp   | 13 +++-
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp |  7 +-
 llvm/test/DebugInfo/COFF/global-constants.ll  | 73 ++++++++++++-------
 4 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 89c053f4b2905..b79169f0a060c 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -4361,9 +4361,13 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
     return;
   }
 
-  // Do not emit separate definitions for function local const/statics.
+  llvm::DIScope *DContext = nullptr;
+
+  // Do not emit separate definitions for function local consts.
   if (isa<FunctionDecl>(VD->getDeclContext()))
     return;
+
+  // Emit definition for static members in CodeView.
   VD = cast<ValueDecl>(VD->getCanonicalDecl());
   auto *VarD = cast<VarDecl>(VD);
   if (VarD->isStaticDataMember()) {
@@ -4375,10 +4379,16 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
     // through its scope.
     RetainedTypes.push_back(
         CGM.getContext().getRecordType(RD).getAsOpaquePtr());
-    return;
-  }
 
-  llvm::DIScope *DContext = getDeclContextDescriptor(VD);
+    if (!CGM.getCodeGenOpts().EmitCodeView)
+      return;
+
+    // Use the global scope for static members.
+    DContext = getContextDescriptor(
+        cast<Decl>(CGM.getContext().getTranslationUnitDecl()), TheCU);
+  } else {
+    DContext = getDeclContextDescriptor(VD);
+  }
 
   auto &GV = DeclCache[VD];
   if (GV)
diff --git a/clang/test/CodeGenCXX/debug-info-static-member.cpp b/clang/test/CodeGenCXX/debug-info-static-member.cpp
index 702d1f87e752c..8ad86843f4e9e 100644
--- a/clang/test/CodeGenCXX/debug-info-static-member.cpp
+++ b/clang/test/CodeGenCXX/debug-info-static-member.cpp
@@ -1,6 +1,7 @@
 // RUN: %clangxx -target x86_64-unknown-unknown -g %s -emit-llvm -S -o - | FileCheck %s
 // RUN: %clangxx -target x86_64-unknown-unknown -g -std=c++98 %s -emit-llvm -S -o - | FileCheck %s
 // RUN: %clangxx -target x86_64-unknown-unknown -g -std=c++11 %s -emit-llvm -S -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -gcodeview -debug-info-kind=limited %s -emit-llvm -o - | FileCheck --check-prefix MSVC %s
 // PR14471
 
 // CHECK: @_ZN1C1aE = dso_local global i32 4, align 4, !dbg [[A:![0-9]+]]
@@ -35,6 +36,7 @@ class C
 // CHECK: [[A]] = !DIGlobalVariableExpression(var: [[AV:.*]], expr: !DIExpression())
 // CHECK: [[AV]] = distinct !DIGlobalVariable(name: "a",
 // CHECK-SAME:                                declaration: ![[DECL_A:[0-9]+]])
+// MSVC: distinct !DIGlobalVariable(name: "a"
 //
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "X"{{.*}}, identifier: "_ZTS1X")
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "anon_static_decl_struct"
@@ -48,6 +50,7 @@ int C::a = 4;
 // CHECK: [[B]] = !DIGlobalVariableExpression(var: [[BV:.*]], expr: !DIExpression())
 // CHECK: [[BV]] = distinct !DIGlobalVariable(name: "b",
 // CHECK-SAME:                                declaration: ![[DECL_B:[0-9]+]])
+// MSVC: distinct !DIGlobalVariable(name: "b"
 // CHECK: ![[DECL_B]] = !DIDerivedType(tag: DW_TAG_member, name: "b"
 // CHECK-NOT:                                 size:
 // CHECK-NOT:                                 align:
@@ -95,6 +98,7 @@ int C::a = 4;
 int C::b = 2;
 // CHECK: [[C]] = !DIGlobalVariableExpression(var: [[CV:.*]], expr: !DIExpression())
 // CHECK: [[CV]] = distinct !DIGlobalVariable(name: "c", {{.*}} declaration: ![[DECL_C]])
+// MSVC: distinct !DIGlobalVariable(name: "c"
 int C::c = 1;
 
 int main()
@@ -114,11 +118,18 @@ struct anon_static_decl_struct {
 };
 }
 
-
 int ref() {
   return anon_static_decl_struct::anon_static_decl_var;
 }
 
+// In MSVC, static data members should be emitted as global variables when used.
+// MSVC: !DIGlobalVariableExpression(var: [[ANON_STATIC_DECL:![0-9]+]],
+// MSVC-SAME: !DIExpression(DW_OP_constu, 117, DW_OP_stack_value)
+// MSVC: [[ANON_STATIC_DECL]] = distinct !DIGlobalVariable(name: "anon_static_decl_var"
+// MSVC: !DIGlobalVariableExpression(var: [[STATIC_DECL_TEMPL:![0-9]+]]
+// MSVC-SAME: !DIExpression(DW_OP_constu, 7, DW_OP_stack_value)
+// MSVC: [[STATIC_DECL_TEMPL]] = distinct !DIGlobalVariable(name: "static_decl_templ_var"
+
 template<typename T>
 struct static_decl_templ {
   static const int static_decl_templ_var = 7;
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 417388e867847..b87757583a46c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -3069,7 +3069,12 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
     OS.EmitBinaryData(SRef);
 
     OS.AddComment("Name");
-    emitNullTerminatedSymbolName(OS, DIGV->getDisplayName());
+    // Get fully qualified name if global is a static data member.
+    std::string Name = DIGV->getDisplayName();
+    if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+            DIGV->getRawStaticDataMemberDeclaration()))
+      Name = getFullyQualifiedName(MemberDecl->getScope(), Name);
+    emitNullTerminatedSymbolName(OS, Name);
     endSymbolRecord(SConstantEnd);
   }
 }
diff --git a/llvm/test/DebugInfo/COFF/global-constants.ll b/llvm/test/DebugInfo/COFF/global-constants.ll
index b906886d0fd4a..502870da37605 100644
--- a/llvm/test/DebugInfo/COFF/global-constants.ll
+++ b/llvm/test/DebugInfo/COFF/global-constants.ll
@@ -3,18 +3,25 @@
 
 ; C++ source to regenerate:
 ; const int Test1 = 1;
+; struct Foo { static const int Test2 = 2; };
 ; int main() {
-;   return Test1;
+;   return Test1 + Foo::Test2;
 ; }
 ; $ clang t.cpp -S -emit-llvm -g -gcodeview -o t.ll
 
-; ASM-LABEL:  .long 241              # Symbol subsection for globals
+; ASM-LABEL:  .long 241             # Symbol subsection for globals
 
-; ASM:        .short {{.*-.*}}       # Record length
-; ASM:        .short 4359            # Record kind: S_CONSTANT
-; ASM-NEXT:   .long 4099             # Type
-; ASM-NEXT:   .byte 0x01, 0x00       # Value
-; ASM-NEXT:   .asciz "Test1"         # Name
+; ASM:        .short {{.*-.*}}      # Record length
+; ASM:        .short 4359           # Record kind: S_CONSTANT
+; ASM-NEXT:   .long 4099            # Type
+; ASM-NEXT:   .byte 0x01, 0x00      # Value
+; ASM-NEXT:   .asciz "Test1"        # Name
+
+; ASM:        .short {{.*-.*}}      # Record length
+; ASM:        .short 4359           # Record kind: S_CONSTANT
+; ASM:        .long 4099            # Type
+; ASM:        .byte 0x02, 0x00      # Value
+; ASM:        .asciz "Foo::Test2"   # Name
 
 ; OBJ:        CodeViewDebugInfo [
 ; OBJ:          Section: .debug$S
@@ -27,6 +34,12 @@
 ; OBJ-NEXT:         Value: 1
 ; OBJ-NEXT:         Name: Test1
 ; OBJ-NEXT:       }
+; OBJ:            ConstantSym {
+; OBJ-NEXT:         Kind: S_CONSTANT (0x1107)
+; OBJ-NEXT:         Type: const int (0x1003)
+; OBJ-NEXT:         Value: 2
+; OBJ-NEXT:         Name: Foo::Test2
+; OBJ-NEXT:       }
 
 ; ModuleID = 't.cpp'
 source_filename = "t.cpp"
@@ -34,31 +47,39 @@ target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc"
 
 ; Function Attrs: noinline norecurse nounwind optnone
-define dso_local i32 @main() #0 !dbg !13 {
+define dso_local i32 @main() #0 !dbg !19 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval, align 4
-  ret i32 1, !dbg !16
+  ret i32 3, !dbg !22
 }
 
+attributes #0 = { noinline norecurse nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10, !11}
-!llvm.ident = !{!12}
+!llvm.module.flags = !{!15, !16, !17}
+!llvm.ident = !{!18}
 
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 4a1902b6739e3087a03c0ac7ab85b640764e9335)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, nameTableKind: None)
-!1 = !DIFile(filename: "<stdin>", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "0d5ef00bdd80bdb409a3deac9938f20d")
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 2b66a49044196d8b90d95d7d3b5246ccbe3abc05)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, globals: !10, nameTableKind: None)
+!1 = !DIFile(filename: "<stdin>", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "77cff5e1c7b260440ed03b23c18809c3")
 !2 = !{}
 !3 = !{!4}
-!4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression(DW_OP_constu, 1, DW_OP_stack_value))
-!5 = distinct !DIGlobalVariable(name: "Test1", scope: !0, file: !6, line: 1, type: !7, isLocal: true, isDefinition: true)
-!6 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "0d5ef00bdd80bdb409a3deac9938f20d")
-!7 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, !"CodeView", i32 1}
-!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !{i32 1, !"wchar_size", i32 2}
-!12 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 4a1902b6739e3087a03c0ac7ab85b640764e9335)"}
-!13 = distinct !DISubprogram(name: "main", scope: !6, file: !6, line: 3, type: !14, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!14 = !DISubroutineType(types: !15)
-!15 = !{!8}
-!16 = !DILocation(line: 4, scope: !13)
+!4 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !5, line: 3, size: 8, flags: DIFlagTypePassByValue, elements: !6, identifier: ".?AUFoo@@")
+!5 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "77cff5e1c7b260440ed03b23c18809c3")
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "Test2", scope: !4, file: !5, line: 4, baseType: !8, flags: DIFlagStaticMember, extraData: i32 2)
+!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !{!11, !13}
+!11 = !DIGlobalVariableExpression(var: !12, expr: !DIExpression(DW_OP_constu, 1, DW_OP_stack_value))
+!12 = distinct !DIGlobalVariable(name: "Test1", scope: null, file: !5, line: 1, type: !8, isLocal: true, isDefinition: true)
+!13 = !DIGlobalVariableExpression(var: !14, expr: !DIExpression(DW_OP_constu, 2, DW_OP_stack_value))
+!14 = distinct !DIGlobalVariable(name: "Test2", scope: !0, file: !5, line: 4, type: !8, isLocal: true, isDefinition: true, declaration: !7)
+!15 = !{i32 2, !"CodeView", i32 1}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 2}
+!18 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 2b66a49044196d8b90d95d7d3b5246ccbe3abc05)"}
+!19 = distinct !DISubprogram(name: "main", scope: !5, file: !5, line: 7, type: !20, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!20 = !DISubroutineType(types: !21)
+!21 = !{!9}
+!22 = !DILocation(line: 8, scope: !19)

From b9b64468caca25922d84d4520d4cc9b709a05af2 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Wed, 29 May 2019 21:45:43 +0000
Subject: [PATCH 0563/1176] Move the pybool logic from CMake to Python for
 simplicity

This is how similar booleans are handled, and seems to work for me
locally.

llvm-svn: 362039
---
 debuginfo-tests/CMakeLists.txt     | 3 ---
 debuginfo-tests/lit.site.cfg.py.in | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/debuginfo-tests/CMakeLists.txt b/debuginfo-tests/CMakeLists.txt
index db35d5d5960f4..fbab61c527da0 100644
--- a/debuginfo-tests/CMakeLists.txt
+++ b/debuginfo-tests/CMakeLists.txt
@@ -13,9 +13,6 @@ set(DEBUGINFO_TEST_DEPS
   not
   )
 
-# Indicate if this is an MSVC environment.
-pythonize_bool(MSVC)
-
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
diff --git a/debuginfo-tests/lit.site.cfg.py.in b/debuginfo-tests/lit.site.cfg.py.in
index 491f4546c1d6c..e981938507793 100644
--- a/debuginfo-tests/lit.site.cfg.py.in
+++ b/debuginfo-tests/lit.site.cfg.py.in
@@ -17,7 +17,7 @@ config.has_lld = lit.util.pythonize_bool("@DEBUGINFO_TESTS_HAS_LLD@")
 config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.host_arch = "@HOST_ARCH@"
-config.is_msvc = @MSVC_PYBOOL@
+config.is_msvc = lit.util.pythonize_bool("@MSVC@")
 
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 

From 4bc710166fbda5d5ef72084cd0c0fab371774072 Mon Sep 17 00:00:00 2001
From: Seiya Nuta <nuta@seiya.me>
Date: Wed, 29 May 2019 22:21:12 +0000
Subject: [PATCH 0564/1176] [llvm-objcopy][MachO] Print an error message on use
 of unsupported options

Summary:
It is better to print an error message instead of silently ignoring unsupported options.

As mentioned in https://reviews.llvm.org/D57045, this is not the best solution and we should print which flag is not supported at some time.

Reviewers: alexshap, rupprecht, jhenderson, jakehehrlich

Reviewed By: alexshap, rupprecht, jakehehrlich

Subscribers: jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62578

llvm-svn: 362040
---
 .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 33aec0497b860..80fb18599ba3e 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -10,17 +10,53 @@
 #include "../CopyConfig.h"
 #include "MachOReader.h"
 #include "MachOWriter.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
 namespace objcopy {
 namespace macho {
 
+using namespace object;
+
+static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+  if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
+      Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
+      !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
+      !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
+      !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+      !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() ||
+      !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
+      !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
+      !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() ||
+      !Config.UnneededSymbolsToRemove.empty() ||
+      !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() ||
+      Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+      Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+      Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+      Config.StripDebug || Config.StripNonAlloc || Config.StripSections ||
+      Config.StripUnneeded || Config.DiscardMode != DiscardType::None ||
+      !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+    return createStringError(llvm::errc::invalid_argument,
+                             "option not supported by llvm-objcopy for MachO");
+  }
+
+  return Error::success();
+}
+
 Error executeObjcopyOnBinary(const CopyConfig &Config,
                              object::MachOObjectFile &In, Buffer &Out) {
   MachOReader Reader(In);
   std::unique_ptr<Object> O = Reader.create();
-  assert(O && "Unable to deserialize MachO object");
+  if (!O)
+    return createFileError(
+        Config.InputFilename,
+        createStringError(object_error::parse_failed,
+                          "unable to deserialize MachO object"));
+
+  if (Error E = handleArgs(Config, *O))
+    return createFileError(Config.InputFilename, std::move(E));
+
   MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
   return Writer.write();
 }

From 43ae5c59744bcfcd7a9092ef1a10c5618193d6fc Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Wed, 29 May 2019 23:25:44 +0000
Subject: [PATCH 0565/1176] [Commands] Remove commented out code

llvm-svn: 362042
---
 lldb/source/Commands/CommandObjectType.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp
index d9fa210117402..8619818cd12aa 100644
--- a/lldb/source/Commands/CommandObjectType.cpp
+++ b/lldb/source/Commands/CommandObjectType.cpp
@@ -2841,10 +2841,6 @@ class CommandObjectTypeLookup : public CommandObjectRaw {
                                  exe_ctx))
         return false;
 
-    // TargetSP
-    // target_sp(GetCommandInterpreter().GetDebugger().GetSelectedTarget());
-    // const bool fill_all_in = true;
-    // ExecutionContext exe_ctx(target_sp.get(), fill_all_in);
     ExecutionContextScope *best_scope = exe_ctx.GetBestExecutionContextScope();
 
     bool any_found = false;

From 192dd7df2f3104274db57e3b853390faa7e1aa25 Mon Sep 17 00:00:00 2001
From: Davide Italiano <davide@freebsd.org>
Date: Thu, 30 May 2019 00:35:43 +0000
Subject: [PATCH 0566/1176] [crashlog] Add a missing call to decode.

<rdar://problem/51139357>

llvm-svn: 362044
---
 lldb/examples/python/crashlog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index ab8f52feceea5..2519dc11e5cc8 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -257,7 +257,7 @@ def __init__(
 
         def find_matching_slice(self):
             dwarfdump_cmd_output = subprocess.check_output(
-                'dwarfdump --uuid "%s"' % self.path, shell=True)
+                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
             self_uuid = self.get_uuid()
             for line in dwarfdump_cmd_output.splitlines():
                 match = self.dwarfdump_uuid_regex.search(line)

From 954ec09aed4f2be04bb5f4e10dbb4ea8bd19ef9a Mon Sep 17 00:00:00 2001
From: Jennifer Yu <jennifer.yu@intel.com>
Date: Thu, 30 May 2019 01:05:46 +0000
Subject: [PATCH 0567/1176] clang support gnu asm goto. Syntax:   asm
 [volatile] goto ( AssemblerTemplate                       :                  
     : InputOperands                       : Clobbers                       :
 GotoLabels)

https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html

New llvm IR is "callbr" for inline asm goto instead "call" for inline asm
For:
asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
IR:
callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@foo, %label_true), i8* blockaddress(@foo, %loop)) #1
          to label %asm.fallthrough [label %label_true, label %loop], !srcloc !3

asm.fallthrough:

Compiler need to generate:
1> a dummy constarint 'X' for each label.
2> an unique fallthrough label for each asm goto stmt " asm.fallthrough%number".


Diagnostic
1>	duplicate asm operand name are used in output, input and label.
2>	goto out of scope.

llvm-svn: 362045
---
 clang/include/clang/AST/Stmt.h                |  51 ++++++-
 .../clang/Basic/DiagnosticParseKinds.td       |   4 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  10 +-
 clang/include/clang/Sema/Sema.h               |   1 +
 clang/lib/AST/ASTImporter.cpp                 |  10 +-
 clang/lib/AST/Stmt.cpp                        |  29 +++-
 clang/lib/AST/StmtPrinter.cpp                 |  20 ++-
 clang/lib/AST/StmtProfile.cpp                 |   3 +
 clang/lib/Analysis/CFG.cpp                    |  74 +++++++---
 clang/lib/CodeGen/CGStmt.cpp                  | 126 ++++++++++++------
 clang/lib/Parse/ParseStmtAsm.cpp              |  68 ++++++++--
 clang/lib/Sema/JumpDiagnostics.cpp            | 114 +++++++++-------
 clang/lib/Sema/SemaStmtAsm.cpp                |  48 ++++++-
 clang/lib/Sema/TreeTransform.h                |  16 ++-
 clang/lib/Serialization/ASTReaderStmt.cpp     |   7 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |   4 +
 clang/test/Analysis/asm-goto.cpp              |  52 ++++++++
 clang/test/CodeGen/asm-goto.c                 |  18 +++
 clang/test/CodeGen/asm.c                      |  12 ++
 clang/test/CodeGen/inline-asm-mixed-style.c   |  10 +-
 clang/test/Coverage/c-language-features.inc   |   4 +-
 clang/test/PCH/asm.h                          |   6 +-
 clang/test/Parser/asm.c                       |  50 +++++++
 clang/test/Parser/asm.cpp                     |  51 +++++++
 clang/test/Sema/asm-goto.cpp                  |  45 +++++++
 clang/test/Sema/asm.c                         |  21 +++
 clang/test/Sema/inline-asm-validate-tmpl.cpp  |  10 ++
 clang/test/Sema/scope-check.c                 |  16 +++
 28 files changed, 732 insertions(+), 148 deletions(-)
 create mode 100644 clang/test/Analysis/asm-goto.cpp
 create mode 100644 clang/test/CodeGen/asm-goto.c
 create mode 100644 clang/test/Sema/asm-goto.cpp

diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 77b2173fcb878..fe5d802688466 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -46,6 +46,7 @@ class Attr;
 class CapturedDecl;
 class Decl;
 class Expr;
+class AddrLabelExpr;
 class LabelDecl;
 class ODRHash;
 class PrinterHelper;
@@ -2816,13 +2817,15 @@ class GCCAsmStmt : public AsmStmt {
   StringLiteral **Constraints = nullptr;
   StringLiteral **Clobbers = nullptr;
   IdentifierInfo **Names = nullptr;
+  unsigned NumLabels = 0;
 
 public:
   GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple,
              bool isvolatile, unsigned numoutputs, unsigned numinputs,
              IdentifierInfo **names, StringLiteral **constraints, Expr **exprs,
              StringLiteral *asmstr, unsigned numclobbers,
-             StringLiteral **clobbers, SourceLocation rparenloc);
+             StringLiteral **clobbers, unsigned numlabels,
+             SourceLocation rparenloc);
 
   /// Build an empty inline-assembly statement.
   explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty) {}
@@ -2947,6 +2950,51 @@ class GCCAsmStmt : public AsmStmt {
     return const_cast<GCCAsmStmt*>(this)->getInputExpr(i);
   }
 
+  //===--- Labels ---===//
+
+  bool isAsmGoto() const {
+    return NumLabels > 0;
+  }
+
+  unsigned getNumLabels() const {
+    return NumLabels;
+  }
+
+  IdentifierInfo *getLabelIdentifier(unsigned i) const {
+    return Names[i + NumInputs];
+  }
+
+  AddrLabelExpr *getLabelExpr(unsigned i) const;
+  StringRef getLabelName(unsigned i) const;
+  using labels_iterator = CastIterator<AddrLabelExpr>;
+  using const_labels_iterator = ConstCastIterator<AddrLabelExpr>;
+  using labels_range = llvm::iterator_range<labels_iterator>;
+  using labels_const_range = llvm::iterator_range<const_labels_iterator>;
+
+  labels_iterator begin_labels() {
+    return &Exprs[0] + NumInputs;
+  }
+
+  labels_iterator end_labels() {
+    return &Exprs[0] + NumInputs + NumLabels;
+  }
+
+  labels_range labels() {
+    return labels_range(begin_labels(), end_labels());
+  }
+
+  const_labels_iterator begin_labels() const {
+    return &Exprs[0] + NumInputs;
+  }
+
+  const_labels_iterator end_labels() const {
+    return &Exprs[0] + NumInputs + NumLabels;
+  }
+
+  labels_const_range labels() const {
+    return labels_const_range(begin_labels(), end_labels());
+  }
+
 private:
   void setOutputsAndInputsAndClobbers(const ASTContext &C,
                                       IdentifierInfo **Names,
@@ -2954,6 +3002,7 @@ class GCCAsmStmt : public AsmStmt {
                                       Stmt **Exprs,
                                       unsigned NumOutputs,
                                       unsigned NumInputs,
+                                      unsigned NumLabels,
                                       StringLiteral **Clobbers,
                                       unsigned NumClobbers);
 
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index fb281a5be86a5..15a5ecf177514 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -27,8 +27,8 @@ def err_msasm_unable_to_create_target : Error<
   "MS-style inline assembly is not available: %0">;
 def err_gnu_inline_asm_disabled : Error<
   "GNU-style inline assembly is disabled">;
-def err_asm_goto_not_supported_yet : Error<
-  "'asm goto' constructs are not supported yet">;
+def err_asm_goto_cannot_have_output : Error<
+  "'asm goto' cannot have output constraints">;
 }
 
 let CategoryName = "Parse Issue" in {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7e03174b3d004..e750b062f80b2 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5064,12 +5064,12 @@ def warn_cxx98_compat_switch_into_protected_scope : Warning<
 def err_indirect_goto_without_addrlabel : Error<
   "indirect goto in function with no address-of-label expressions">;
 def err_indirect_goto_in_protected_scope : Error<
-  "cannot jump from this indirect goto statement to one of its possible targets">;
+  "cannot jump from this %select{indirect|asm}0 goto statement to one of its possible targets">;
 def warn_cxx98_compat_indirect_goto_in_protected_scope : Warning<
-  "jump from this indirect goto statement to one of its possible targets "
+  "jump from this %select{indirect|asm}0 goto statement to one of its possible targets "
   "is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
 def note_indirect_goto_target : Note<
-  "possible target of indirect goto statement">;
+  "possible target of %select{indirect|asm}0 goto statement">;
 def note_protected_by_variable_init : Note<
   "jump bypasses variable initialization">;
 def note_protected_by_variable_nontriv_destructor : Note<
@@ -7497,6 +7497,10 @@ let CategoryName = "Inline Assembly Issue" in {
     "use constraint modifier \"%0\"">;
   def note_asm_input_duplicate_first : Note<
     "constraint '%0' is already present here">;
+ def error_duplicate_asm_operand_name : Error<
+    "duplicate use of asm operand name \"%0\"">;
+ def note_duplicate_asm_operand_name : Note<
+    "asm operand name \"%0\" first referenced here">;
 }
 
   def error_inoutput_conflict_with_clobber : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d7486ec1c2617..d25d7decf3b38 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3971,6 +3971,7 @@ class Sema {
                              unsigned NumInputs, IdentifierInfo **Names,
                              MultiExprArg Constraints, MultiExprArg Exprs,
                              Expr *AsmString, MultiExprArg Clobbers,
+                             unsigned NumLabels,
                              SourceLocation RParenLoc);
 
   void FillInlineAsmIdentifierInfo(Expr *Res,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 2e4c304b3de20..1f1ec1d687c2e 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -5592,12 +5592,17 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       return InputOrErr.takeError();
   }
 
-  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
+  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs() +
+                               S->getNumLabels());
   if (Error Err = ImportContainerChecked(S->outputs(), Exprs))
     return std::move(Err);
 
+  if (Error Err =
+          ImportArrayChecked(S->inputs(), Exprs.begin() + S->getNumOutputs()))
+    return std::move(Err);
+
   if (Error Err = ImportArrayChecked(
-      S->inputs(), Exprs.begin() + S->getNumOutputs()))
+          S->labels(), Exprs.begin() + S->getNumOutputs() + S->getNumInputs()))
     return std::move(Err);
 
   ExpectedSLoc AsmLocOrErr = import(S->getAsmLoc());
@@ -5623,6 +5628,7 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       *AsmStrOrErr,
       S->getNumClobbers(),
       Clobbers.data(),
+      S->getNumLabels(),
       *RParenLocOrErr);
 }
 
diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index 4796ee87f72c6..e9a2c58f1a17c 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -444,6 +444,14 @@ void GCCAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
 
+AddrLabelExpr *GCCAsmStmt::getLabelExpr(unsigned i) const {
+  return cast<AddrLabelExpr>(Exprs[i + NumInputs]);
+}
+
+StringRef GCCAsmStmt::getLabelName(unsigned i) const {
+  return getLabelExpr(i)->getLabel()->getName();
+}
+
 /// getInputConstraint - Return the specified input constraint.  Unlike output
 /// constraints, these can be empty.
 StringRef GCCAsmStmt::getInputConstraint(unsigned i) const {
@@ -456,13 +464,16 @@ void GCCAsmStmt::setOutputsAndInputsAndClobbers(const ASTContext &C,
                                                 Stmt **Exprs,
                                                 unsigned NumOutputs,
                                                 unsigned NumInputs,
+                                                unsigned NumLabels,
                                                 StringLiteral **Clobbers,
                                                 unsigned NumClobbers) {
   this->NumOutputs = NumOutputs;
   this->NumInputs = NumInputs;
   this->NumClobbers = NumClobbers;
+  this->NumLabels = NumLabels;
+  assert(!(NumOutputs && NumLabels) && "asm goto cannot have outputs");
 
-  unsigned NumExprs = NumOutputs + NumInputs;
+  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
 
   C.Deallocate(this->Names);
   this->Names = new (C) IdentifierInfo*[NumExprs];
@@ -497,6 +508,10 @@ int GCCAsmStmt::getNamedOperand(StringRef SymbolicName) const {
     if (getInputName(i) == SymbolicName)
       return getNumOutputs() + NumPlusOperands + i;
 
+  for (unsigned i = 0, e = getNumLabels(); i != e; ++i)
+    if (getLabelName(i) == SymbolicName)
+      return i + getNumInputs();
+
   // Not found.
   return -1;
 }
@@ -614,8 +629,8 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
       while (CurPtr != StrEnd && isDigit(*CurPtr))
         N = N*10 + ((*CurPtr++)-'0');
 
-      unsigned NumOperands =
-        getNumOutputs() + getNumPlusOperands() + getNumInputs();
+      unsigned NumOperands = getNumOutputs() + getNumPlusOperands() +
+                             getNumInputs() + getNumLabels();
       if (N >= NumOperands) {
         DiagOffs = CurPtr-StrStart-1;
         return diag::err_asm_invalid_operand_number;
@@ -728,10 +743,12 @@ GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc,
                        unsigned numinputs, IdentifierInfo **names,
                        StringLiteral **constraints, Expr **exprs,
                        StringLiteral *asmstr, unsigned numclobbers,
-                       StringLiteral **clobbers, SourceLocation rparenloc)
+                       StringLiteral **clobbers, unsigned numlabels,
+                       SourceLocation rparenloc)
     : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-              numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
-  unsigned NumExprs = NumOutputs + NumInputs;
+              numinputs, numclobbers),
+              RParenLoc(rparenloc), AsmStr(asmstr), NumLabels(numlabels) {
+  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
 
   Names = new (C) IdentifierInfo*[NumExprs];
   std::copy(names, names + NumExprs, Names);
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 7fe0be5217dbc..563095f89b9b3 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -414,12 +414,15 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   if (Node->isVolatile())
     OS << "volatile ";
 
+  if (Node->isAsmGoto())
+    OS << "goto ";
+
   OS << "(";
   VisitStringLiteral(Node->getAsmString());
 
   // Outputs
   if (Node->getNumOutputs() != 0 || Node->getNumInputs() != 0 ||
-      Node->getNumClobbers() != 0)
+      Node->getNumClobbers() != 0 || Node->getNumLabels() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumOutputs(); i != e; ++i) {
@@ -439,7 +442,8 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Inputs
-  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0)
+  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0 ||
+      Node->getNumLabels() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumInputs(); i != e; ++i) {
@@ -459,7 +463,7 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Clobbers
-  if (Node->getNumClobbers() != 0)
+  if (Node->getNumClobbers() != 0 || Node->getNumLabels())
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumClobbers(); i != e; ++i) {
@@ -469,6 +473,16 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
     VisitStringLiteral(Node->getClobberStringLiteral(i));
   }
 
+  // Labels
+  if (Node->getNumLabels() != 0)
+    OS << " : ";
+
+  for (unsigned i = 0, e = Node->getNumLabels(); i != e; ++i) {
+    if (i != 0)
+      OS << ", ";
+    OS << Node->getLabelName(i);
+  }
+
   OS << ");";
   if (Policy.IncludeNewlines) OS << NL;
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 93bdcac8b5496..c5da5bfda9cb5 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -321,6 +321,9 @@ void StmtProfiler::VisitGCCAsmStmt(const GCCAsmStmt *S) {
   ID.AddInteger(S->getNumClobbers());
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     VisitStringLiteral(S->getClobberStringLiteral(I));
+  ID.AddInteger(S->getNumLabels());
+  for (auto *L : S->labels())
+    VisitDecl(L->getLabel());
 }
 
 void StmtProfiler::VisitMSAsmStmt(const MSAsmStmt *S) {
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 1d83359341528..b53bfcca37cd4 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -549,6 +549,7 @@ class CFGBuilder {
   CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, AddStmtChoice asc);
   CFGBlock *VisitForStmt(ForStmt *F);
   CFGBlock *VisitGotoStmt(GotoStmt *G);
+  CFGBlock *VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc);
   CFGBlock *VisitIfStmt(IfStmt *I);
   CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc);
   CFGBlock *VisitConstantExpr(ConstantExpr *E, AddStmtChoice asc);
@@ -1478,22 +1479,38 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
                                    E = BackpatchBlocks.end(); I != E; ++I ) {
 
     CFGBlock *B = I->block;
-    const GotoStmt *G = cast<GotoStmt>(B->getTerminator());
-    LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
-
-    // If there is no target for the goto, then we are looking at an
-    // incomplete AST.  Handle this by not registering a successor.
-    if (LI == LabelMap.end()) continue;
-
-    JumpTarget JT = LI->second;
-    prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
-                                              JT.scopePosition);
-    prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
-                                           JT.scopePosition);
-    const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
-        B, I->scopePosition, JT.scopePosition);
-    appendScopeBegin(JT.block, VD, G);
-    addSuccessor(B, JT.block);
+    if (auto *G = dyn_cast<GotoStmt>(B->getTerminator())) {
+      LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
+      // If there is no target for the goto, then we are looking at an
+      // incomplete AST.  Handle this by not registering a successor.
+      if (LI == LabelMap.end())
+        continue;
+      JumpTarget JT = LI->second;
+      prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
+                                                JT.scopePosition);
+      prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
+                                             JT.scopePosition);
+      const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
+          B, I->scopePosition, JT.scopePosition);
+      appendScopeBegin(JT.block, VD, G);
+      addSuccessor(B, JT.block);
+    };
+    if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) {
+      CFGBlock *Successor  = (I+1)->block;
+      for (auto *L : G->labels()) {
+        LabelMapTy::iterator LI = LabelMap.find(L->getLabel());
+        // If there is no target for the goto, then we are looking at an
+        // incomplete AST.  Handle this by not registering a successor.
+        if (LI == LabelMap.end())
+          continue;
+        JumpTarget JT = LI->second;
+        // Successor has been added, so skip it.
+        if (JT.block == Successor)
+          continue;
+        addSuccessor(B, JT.block);
+      }
+      I++;
+    }
   }
 
   // Add successors to the Indirect Goto Dispatch block (if we have one).
@@ -2142,6 +2159,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc) {
     case Stmt::GotoStmtClass:
       return VisitGotoStmt(cast<GotoStmt>(S));
 
+    case Stmt::GCCAsmStmtClass:
+      return VisitGCCAsmStmt(cast<GCCAsmStmt>(S), asc);
+
     case Stmt::IfStmtClass:
       return VisitIfStmt(cast<IfStmt>(S));
 
@@ -3146,6 +3166,28 @@ CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) {
   return Block;
 }
 
+CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) {
+  // Goto is a control-flow statement.  Thus we stop processing the current
+  // block and create a new one.
+
+  if (!G->isAsmGoto())
+    return VisitStmt(G, asc);
+
+  if (Block) {
+    Succ = Block;
+    if (badCFG)
+      return nullptr;
+  }
+  Block = createBlock();
+  Block->setTerminator(G);
+  // We will backpatch this block later for all the labels.
+  BackpatchBlocks.push_back(JumpSource(Block, ScopePos));
+  // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is
+  // used to avoid adding "Succ" again.
+  BackpatchBlocks.push_back(JumpSource(Succ, ScopePos));
+  return Block;
+}
+
 CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) {
   CFGBlock *LoopSuccessor = nullptr;
 
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c617b198d76d5..5c24db709250e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1896,6 +1896,55 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
   return llvm::MDNode::get(CGF.getLLVMContext(), Locs);
 }
 
+static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
+                              bool ReadOnly, bool ReadNone, const AsmStmt &S,
+                              const std::vector<llvm::Type *> &ResultRegTypes,
+                              CodeGenFunction &CGF,
+                              std::vector<llvm::Value *> &RegResults) {
+  Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                      llvm::Attribute::NoUnwind);
+  // Attach readnone and readonly attributes.
+  if (!HasSideEffect) {
+    if (ReadNone)
+      Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                          llvm::Attribute::ReadNone);
+    else if (ReadOnly)
+      Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                          llvm::Attribute::ReadOnly);
+  }
+
+  // Slap the source location of the inline asm into a !srcloc metadata on the
+  // call.
+  if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S))
+    Result.setMetadata("srcloc",
+                       getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
+  else {
+    // At least put the line number on MS inline asm blobs.
+    llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty,
+                                        S.getAsmLoc().getRawEncoding());
+    Result.setMetadata("srcloc",
+                       llvm::MDNode::get(CGF.getLLVMContext(),
+                                         llvm::ConstantAsMetadata::get(Loc)));
+  }
+
+  if (CGF.getLangOpts().assumeFunctionsAreConvergent())
+    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as bar.sync, and so can't have certain optimizations applied around
+    // them).
+    Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                        llvm::Attribute::Convergent);
+  // Extract all of the register value results from the asm.
+  if (ResultRegTypes.size() == 1) {
+    RegResults.push_back(&Result);
+  } else {
+    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
+      llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult");
+      RegResults.push_back(Tmp);
+    }
+  }
+}
+
 void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   // Assemble the final asm string.
   std::string AsmString = S.generateAsmString(getContext());
@@ -2138,6 +2187,29 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   }
   Constraints += InOutConstraints;
 
+  // Labels
+  SmallVector<llvm::BasicBlock *, 16> Transfer;
+  llvm::BasicBlock *Fallthrough = nullptr;
+  bool IsGCCAsmGoto = false;
+  if (const auto *GS =  dyn_cast<GCCAsmStmt>(&S)) {
+    IsGCCAsmGoto = GS->isAsmGoto();
+    if (IsGCCAsmGoto) {
+      for (auto *E : GS->labels()) {
+        JumpDest Dest = getJumpDestForLabel(E->getLabel());
+        Transfer.push_back(Dest.getBlock());
+        llvm::BlockAddress *BA =
+            llvm::BlockAddress::get(CurFn, Dest.getBlock());
+        Args.push_back(BA);
+        ArgTypes.push_back(BA->getType());
+        if (!Constraints.empty())
+          Constraints += ',';
+        Constraints += 'X';
+      }
+      StringRef Name = "asm.fallthrough";
+      Fallthrough = createBasicBlock(Name);
+    }
+  }
+
   // Clobbers
   for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
     StringRef Clobber = S.getClobber(i);
@@ -2180,52 +2252,18 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   llvm::InlineAsm *IA =
     llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
                          /* IsAlignStack */ false, AsmDialect);
-  llvm::CallInst *Result =
-      Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
-  Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                       llvm::Attribute::NoUnwind);
-
-  // Attach readnone and readonly attributes.
-  if (!HasSideEffect) {
-    if (ReadNone)
-      Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                           llvm::Attribute::ReadNone);
-    else if (ReadOnly)
-      Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                           llvm::Attribute::ReadOnly);
-  }
-
-  // Slap the source location of the inline asm into a !srcloc metadata on the
-  // call.
-  if (const GCCAsmStmt *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S)) {
-    Result->setMetadata("srcloc", getAsmSrcLocInfo(gccAsmStmt->getAsmString(),
-                                                   *this));
-  } else {
-    // At least put the line number on MS inline asm blobs.
-    auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());
-    Result->setMetadata("srcloc",
-                        llvm::MDNode::get(getLLVMContext(),
-                                          llvm::ConstantAsMetadata::get(Loc)));
-  }
-
-  if (getLangOpts().assumeFunctionsAreConvergent()) {
-    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
-    // convergent (meaning, they may call an intrinsically convergent op, such
-    // as bar.sync, and so can't have certain optimizations applied around
-    // them).
-    Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                         llvm::Attribute::Convergent);
-  }
-
-  // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
-  if (ResultRegTypes.size() == 1) {
-    RegResults.push_back(Result);
+  if (IsGCCAsmGoto) {
+    llvm::CallBrInst *Result =
+        Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
+    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
+                      ReadNone, S, ResultRegTypes, *this, RegResults);
+    EmitBlock(Fallthrough);
   } else {
-    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
-      llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult");
-      RegResults.push_back(Tmp);
-    }
+    llvm::CallInst *Result =
+        Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
+    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
+                      ReadNone, S, ResultRegTypes, *this, RegResults);
   }
 
   assert(RegResults.size() == ResultRegTypes.size());
diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index c63808a472b5a..75f3ac396e1a4 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -710,12 +710,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
 
   // Remember if this was a volatile asm.
   bool isVolatile = DS.getTypeQualifiers() & DeclSpec::TQ_volatile;
+  // Remember if this was a goto asm.
+  bool isGotoAsm = false;
 
-  // TODO: support "asm goto" constructs (PR#9295).
   if (Tok.is(tok::kw_goto)) {
-    Diag(Tok, diag::err_asm_goto_not_supported_yet);
-    SkipUntil(tok::r_paren, StopAtSemi);
-    return StmtError();
+    isGotoAsm = true;
+    ConsumeToken();
   }
 
   if (Tok.isNot(tok::l_paren)) {
@@ -753,7 +753,8 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     return Actions.ActOnGCCAsmStmt(AsmLoc, /*isSimple*/ true, isVolatile,
                                    /*NumOutputs*/ 0, /*NumInputs*/ 0, nullptr,
                                    Constraints, Exprs, AsmString.get(),
-                                   Clobbers, T.getCloseLocation());
+                                   Clobbers, /*NumLabels*/ 0,
+                                   T.getCloseLocation());
   }
 
   // Parse Outputs, if present.
@@ -763,6 +764,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     AteExtraColon = Tok.is(tok::coloncolon);
     ConsumeToken();
 
+    if (!AteExtraColon && isGotoAsm && Tok.isNot(tok::colon)) {
+      Diag(Tok, diag::err_asm_goto_cannot_have_output);
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return StmtError();
+    }
+
     if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs))
       return StmtError();
   }
@@ -789,12 +796,15 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
   unsigned NumInputs = Names.size() - NumOutputs;
 
   // Parse the clobbers, if present.
-  if (AteExtraColon || Tok.is(tok::colon)) {
-    if (!AteExtraColon)
+  if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) {
+    if (AteExtraColon)
+      AteExtraColon = false;
+    else {
+      AteExtraColon = Tok.is(tok::coloncolon);
       ConsumeToken();
-
+    }
     // Parse the asm-string list for clobbers if present.
-    if (Tok.isNot(tok::r_paren)) {
+    if (!AteExtraColon && isTokenStringLiteral()) {
       while (1) {
         ExprResult Clobber(ParseAsmStringLiteral());
 
@@ -808,11 +818,49 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
       }
     }
   }
+  if (!isGotoAsm && (Tok.isNot(tok::r_paren) || AteExtraColon)) {
+    Diag(Tok, diag::err_expected) << tok::r_paren;
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
+  }
+
+  // Parse the goto label, if present.
+  unsigned NumLabels = 0;
+  if (AteExtraColon || Tok.is(tok::colon)) {
+    if (!AteExtraColon)
+      ConsumeToken();
 
+    while (true) {
+      if (Tok.isNot(tok::identifier)) {
+        Diag(Tok, diag::err_expected) << tok::identifier;
+        SkipUntil(tok::r_paren, StopAtSemi);
+        return StmtError();
+      }
+      LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(),
+                                                  Tok.getLocation());
+      Names.push_back(Tok.getIdentifierInfo());
+      if (!LD) {
+        SkipUntil(tok::r_paren, StopAtSemi);
+        return StmtError();
+      }
+      ExprResult Res =
+          Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD);
+      Exprs.push_back(Res.get());
+      NumLabels++;
+      ConsumeToken();
+      if (!TryConsumeToken(tok::comma))
+        break;
+    }
+  } else if (isGotoAsm) {
+    Diag(Tok, diag::err_expected) << tok::colon;
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
+  }
   T.consumeClose();
   return Actions.ActOnGCCAsmStmt(
       AsmLoc, false, isVolatile, NumOutputs, NumInputs, Names.data(),
-      Constraints, Exprs, AsmString.get(), Clobbers, T.getCloseLocation());
+      Constraints, Exprs, AsmString.get(), Clobbers, NumLabels,
+      T.getCloseLocation());
 }
 
 /// ParseAsmOperands - Parse the asm-operands production as used by
diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp
index 2234d6ba9b11f..c8743df90e340 100644
--- a/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/clang/lib/Sema/JumpDiagnostics.cpp
@@ -65,8 +65,10 @@ class JumpScopeChecker {
   llvm::DenseMap<Stmt*, unsigned> LabelAndGotoScopes;
   SmallVector<Stmt*, 16> Jumps;
 
-  SmallVector<IndirectGotoStmt*, 4> IndirectJumps;
+  SmallVector<Stmt*, 4> IndirectJumps;
+  SmallVector<Stmt*, 4> AsmJumps;
   SmallVector<LabelDecl*, 4> IndirectJumpTargets;
+  SmallVector<LabelDecl*, 4> AsmJumpTargets;
 public:
   JumpScopeChecker(Stmt *Body, Sema &S);
 private:
@@ -76,10 +78,10 @@ class JumpScopeChecker {
   void BuildScopeInformation(Stmt *S, unsigned &origParentScope);
 
   void VerifyJumps();
-  void VerifyIndirectJumps();
+  void VerifyIndirectOrAsmJumps(bool IsAsmGoto);
   void NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes);
-  void DiagnoseIndirectJump(IndirectGotoStmt *IG, unsigned IGScope,
-                            LabelDecl *Target, unsigned TargetScope);
+  void DiagnoseIndirectOrAsmJump(Stmt *IG, unsigned IGScope, LabelDecl *Target,
+                                 unsigned TargetScope);
   void CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc,
                  unsigned JumpDiag, unsigned JumpDiagWarning,
                  unsigned JumpDiagCXX98Compat);
@@ -103,7 +105,8 @@ JumpScopeChecker::JumpScopeChecker(Stmt *Body, Sema &s)
 
   // Check that all jumps we saw are kosher.
   VerifyJumps();
-  VerifyIndirectJumps();
+  VerifyIndirectOrAsmJumps(false);
+  VerifyIndirectOrAsmJumps(true);
 }
 
 /// GetDeepestCommonScope - Finds the innermost scope enclosing the
@@ -316,7 +319,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     }
 
     LabelAndGotoScopes[S] = ParentScope;
-    IndirectJumps.push_back(cast<IndirectGotoStmt>(S));
+    IndirectJumps.push_back(S);
     break;
 
   case Stmt::SwitchStmtClass:
@@ -339,6 +342,18 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     Jumps.push_back(S);
     break;
 
+  case Stmt::GCCAsmStmtClass:
+    if (auto *GS = dyn_cast<GCCAsmStmt>(S))
+      if (GS->isAsmGoto()) {
+        // Remember both what scope a goto is in as well as the fact that we
+        // have it.  This makes the second scan not have to walk the AST again.
+        LabelAndGotoScopes[S] = ParentScope;
+        AsmJumps.push_back(GS);
+        for (auto *E : GS->labels())
+          AsmJumpTargets.push_back(E->getLabel());
+      }
+    break;
+
   case Stmt::IfStmtClass: {
     IfStmt *IS = cast<IfStmt>(S);
     if (!(IS->isConstexpr() || IS->isObjCAvailabilityCheck()))
@@ -629,14 +644,13 @@ void JumpScopeChecker::VerifyJumps() {
   }
 }
 
-/// VerifyIndirectJumps - Verify whether any possible indirect jump
-/// might cross a protection boundary.  Unlike direct jumps, indirect
-/// jumps count cleanups as protection boundaries:  since there's no
-/// way to know where the jump is going, we can't implicitly run the
-/// right cleanups the way we can with direct jumps.
-///
-/// Thus, an indirect jump is "trivial" if it bypasses no
-/// initializations and no teardowns.  More formally, an indirect jump
+/// VerifyIndirectOrAsmJumps - Verify whether any possible indirect goto or
+/// asm goto jump might cross a protection boundary.  Unlike direct jumps,
+/// indirect or asm goto jumps count cleanups as protection boundaries:
+/// since there's no way to know where the jump is going, we can't implicitly
+/// run the right cleanups the way we can with direct jumps.
+/// Thus, an indirect/asm jump is "trivial" if it bypasses no
+/// initializations and no teardowns.  More formally, an indirect/asm jump
 /// from A to B is trivial if the path out from A to DCA(A,B) is
 /// trivial and the path in from DCA(A,B) to B is trivial, where
 /// DCA(A,B) is the deepest common ancestor of A and B.
@@ -648,36 +662,41 @@ void JumpScopeChecker::VerifyJumps() {
 /// Under these definitions, this function checks that the indirect
 /// jump between A and B is trivial for every indirect goto statement A
 /// and every label B whose address was taken in the function.
-void JumpScopeChecker::VerifyIndirectJumps() {
-  if (IndirectJumps.empty()) return;
-
+void JumpScopeChecker::VerifyIndirectOrAsmJumps(bool IsAsmGoto) {
+  SmallVector<Stmt*, 4> GotoJumps = IsAsmGoto ? AsmJumps : IndirectJumps;
+  if (GotoJumps.empty())
+    return;
+  SmallVector<LabelDecl *, 4> JumpTargets =
+      IsAsmGoto ? AsmJumpTargets : IndirectJumpTargets;
   // If there aren't any address-of-label expressions in this function,
   // complain about the first indirect goto.
-  if (IndirectJumpTargets.empty()) {
-    S.Diag(IndirectJumps[0]->getGotoLoc(),
+  if (JumpTargets.empty()) {
+    assert(!IsAsmGoto &&"only indirect goto can get here");
+    S.Diag(GotoJumps[0]->getBeginLoc(),
            diag::err_indirect_goto_without_addrlabel);
     return;
   }
-
   // Collect a single representative of every scope containing an
-  // indirect goto.  For most code bases, this substantially cuts
+  // indirect or asm goto.  For most code bases, this substantially cuts
   // down on the number of jump sites we'll have to consider later.
-  typedef std::pair<unsigned, IndirectGotoStmt*> JumpScope;
+  typedef std::pair<unsigned, Stmt*> JumpScope;
   SmallVector<JumpScope, 32> JumpScopes;
   {
-    llvm::DenseMap<unsigned, IndirectGotoStmt*> JumpScopesMap;
-    for (SmallVectorImpl<IndirectGotoStmt*>::iterator
-           I = IndirectJumps.begin(), E = IndirectJumps.end(); I != E; ++I) {
-      IndirectGotoStmt *IG = *I;
+    llvm::DenseMap<unsigned, Stmt*> JumpScopesMap;
+    for (SmallVectorImpl<Stmt *>::iterator I = GotoJumps.begin(),
+                                           E = GotoJumps.end();
+         I != E; ++I) {
+      Stmt *IG = *I;
       if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(IG)))
         continue;
       unsigned IGScope = LabelAndGotoScopes[IG];
-      IndirectGotoStmt *&Entry = JumpScopesMap[IGScope];
+      Stmt *&Entry = JumpScopesMap[IGScope];
       if (!Entry) Entry = IG;
     }
     JumpScopes.reserve(JumpScopesMap.size());
-    for (llvm::DenseMap<unsigned, IndirectGotoStmt*>::iterator
-           I = JumpScopesMap.begin(), E = JumpScopesMap.end(); I != E; ++I)
+    for (llvm::DenseMap<unsigned, Stmt *>::iterator I = JumpScopesMap.begin(),
+                                                    E = JumpScopesMap.end();
+         I != E; ++I)
       JumpScopes.push_back(*I);
   }
 
@@ -685,8 +704,8 @@ void JumpScopeChecker::VerifyIndirectJumps() {
   // label whose address was taken somewhere in the function.
   // For most code bases, there will be only one such scope.
   llvm::DenseMap<unsigned, LabelDecl*> TargetScopes;
-  for (SmallVectorImpl<LabelDecl*>::iterator
-         I = IndirectJumpTargets.begin(), E = IndirectJumpTargets.end();
+  for (SmallVectorImpl<LabelDecl *>::iterator I = JumpTargets.begin(),
+                                              E = JumpTargets.end();
        I != E; ++I) {
     LabelDecl *TheLabel = *I;
     if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(TheLabel->getStmt())))
@@ -763,7 +782,7 @@ void JumpScopeChecker::VerifyIndirectJumps() {
       // Only diagnose if we didn't find something.
       if (IsReachable) continue;
 
-      DiagnoseIndirectJump(I->second, I->first, TargetLabel, TargetScope);
+      DiagnoseIndirectOrAsmJump(I->second, I->first, TargetLabel, TargetScope);
     }
   }
 }
@@ -784,12 +803,15 @@ static bool IsCXX98CompatWarning(Sema &S, unsigned InDiagNote) {
 }
 
 /// Produce primary diagnostic for an indirect jump statement.
-static void DiagnoseIndirectJumpStmt(Sema &S, IndirectGotoStmt *Jump,
-                                     LabelDecl *Target, bool &Diagnosed) {
+static void DiagnoseIndirectOrAsmJumpStmt(Sema &S, Stmt *Jump,
+                                          LabelDecl *Target, bool &Diagnosed) {
   if (Diagnosed)
     return;
-  S.Diag(Jump->getGotoLoc(), diag::err_indirect_goto_in_protected_scope);
-  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
+  bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
+  S.Diag(Jump->getBeginLoc(), diag::err_indirect_goto_in_protected_scope)
+      << IsAsmGoto;
+  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
+      << IsAsmGoto;
   Diagnosed = true;
 }
 
@@ -803,10 +825,9 @@ void JumpScopeChecker::NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes) {
 }
 
 /// Diagnose an indirect jump which is known to cross scopes.
-void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
-                                            unsigned JumpScope,
-                                            LabelDecl *Target,
-                                            unsigned TargetScope) {
+void JumpScopeChecker::DiagnoseIndirectOrAsmJump(Stmt *Jump, unsigned JumpScope,
+                                                 LabelDecl *Target,
+                                                 unsigned TargetScope) {
   if (CHECK_PERMISSIVE(JumpScope == TargetScope))
     return;
 
@@ -816,7 +837,7 @@ void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
   // Walk out the scope chain until we reach the common ancestor.
   for (unsigned I = JumpScope; I != Common; I = Scopes[I].ParentScope)
     if (Scopes[I].OutDiag) {
-      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].OutDiag);
     }
 
@@ -827,15 +848,18 @@ void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
     if (IsCXX98CompatWarning(S, Scopes[I].InDiag))
       ToScopesCXX98Compat.push_back(I);
     else if (Scopes[I].InDiag) {
-      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].InDiag);
     }
 
   // Diagnose this jump if it would be ill-formed in C++98.
   if (!Diagnosed && !ToScopesCXX98Compat.empty()) {
-    S.Diag(Jump->getGotoLoc(),
-           diag::warn_cxx98_compat_indirect_goto_in_protected_scope);
-    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
+    bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
+    S.Diag(Jump->getBeginLoc(),
+           diag::warn_cxx98_compat_indirect_goto_in_protected_scope)
+        << IsAsmGoto;
+    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
+        << IsAsmGoto;
     NoteJumpIntoScopes(ToScopesCXX98Compat);
   }
 }
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index 8c6012573c64f..ec8958c3c5f90 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -209,11 +209,12 @@ static StringRef extractRegisterName(const Expr *Expression,
 static SourceLocation
 getClobberConflictLocation(MultiExprArg Exprs, StringLiteral **Constraints,
                            StringLiteral **Clobbers, int NumClobbers,
+                           unsigned NumLabels,
                            const TargetInfo &Target, ASTContext &Cont) {
   llvm::StringSet<> InOutVars;
   // Collect all the input and output registers from the extended asm
   // statement in order to check for conflicts with the clobber list
-  for (unsigned int i = 0; i < Exprs.size(); ++i) {
+  for (unsigned int i = 0; i < Exprs.size() - NumLabels; ++i) {
     StringRef Constraint = Constraints[i]->getString();
     StringRef InOutReg = Target.getConstraintRegister(
         Constraint, extractRegisterName(Exprs[i], Target));
@@ -241,6 +242,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                                  unsigned NumInputs, IdentifierInfo **Names,
                                  MultiExprArg constraints, MultiExprArg Exprs,
                                  Expr *asmString, MultiExprArg clobbers,
+                                 unsigned NumLabels,
                                  SourceLocation RParenLoc) {
   unsigned NumClobbers = clobbers.size();
   StringLiteral **Constraints =
@@ -269,7 +271,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -330,7 +332,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
   }
 
@@ -352,7 +354,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -451,14 +453,15 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
   }
 
   GCCAsmStmt *NS =
     new (Context) GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                              NumInputs, Names, Constraints, Exprs.data(),
-                             AsmString, NumClobbers, Clobbers, RParenLoc);
+                             AsmString, NumClobbers, Clobbers, NumLabels,
+                             RParenLoc);
   // Validate the asm string, ensuring it makes sense given the operands we
   // have.
   SmallVector<GCCAsmStmt::AsmStringPiece, 8> Pieces;
@@ -476,8 +479,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
 
     // Look for the correct constraint index.
     unsigned ConstraintIdx = Piece.getOperandNo();
+    // Labels are the last in the Exprs list.
+    if (NS->isAsmGoto() && ConstraintIdx >= NS->getNumInputs())
+      continue;
     unsigned NumOperands = NS->getNumOutputs() + NS->getNumInputs();
-
     // Look for the (ConstraintIdx - NumOperands + 1)th constraint with
     // modifier '+'.
     if (ConstraintIdx >= NumOperands) {
@@ -660,10 +665,39 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   // Check for conflicts between clobber list and input or output lists
   SourceLocation ConstraintLoc =
       getClobberConflictLocation(Exprs, Constraints, Clobbers, NumClobbers,
+                                 NumLabels,
                                  Context.getTargetInfo(), Context);
   if (ConstraintLoc.isValid())
     targetDiag(ConstraintLoc, diag::error_inoutput_conflict_with_clobber);
 
+  // Check for duplicate asm operand name between input, output and label lists.
+  typedef std::pair<StringRef , Expr *> NamedOperand;
+  SmallVector<NamedOperand, 4> NamedOperandList;
+  for (unsigned i = 0, e = NumOutputs + NumInputs + NumLabels; i != e; ++i)
+    if (Names[i])
+      NamedOperandList.emplace_back(
+          std::make_pair(Names[i]->getName(), Exprs[i]));
+  // Sort NamedOperandList.
+  std::stable_sort(NamedOperandList.begin(), NamedOperandList.end(),
+              [](const NamedOperand &LHS, const NamedOperand &RHS) {
+                return LHS.first < RHS.first;
+              });
+  // Find adjacent duplicate operand.
+  SmallVector<NamedOperand, 4>::iterator Found =
+      std::adjacent_find(begin(NamedOperandList), end(NamedOperandList),
+                         [](const NamedOperand &LHS, const NamedOperand &RHS) {
+                           return LHS.first == RHS.first;
+                         });
+  if (Found != NamedOperandList.end()) {
+    Diag((Found + 1)->second->getBeginLoc(),
+         diag::error_duplicate_asm_operand_name)
+        << (Found + 1)->first;
+    Diag(Found->second->getBeginLoc(), diag::note_duplicate_asm_operand_name)
+        << Found->first;
+    return StmtError();
+  }
+  if (NS->isAsmGoto())
+    setFunctionHasBranchIntoScope();
   return NS;
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 6620885f23961..609a3cfcfca1d 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1373,10 +1373,11 @@ class TreeTransform {
                                unsigned NumInputs, IdentifierInfo **Names,
                                MultiExprArg Constraints, MultiExprArg Exprs,
                                Expr *AsmString, MultiExprArg Clobbers,
+                               unsigned NumLabels,
                                SourceLocation RParenLoc) {
     return getSema().ActOnGCCAsmStmt(AsmLoc, IsSimple, IsVolatile, NumOutputs,
                                      NumInputs, Names, Constraints, Exprs,
-                                     AsmString, Clobbers, RParenLoc);
+                                     AsmString, Clobbers, NumLabels, RParenLoc);
   }
 
   /// Build a new MS style inline asm statement.
@@ -7051,6 +7052,16 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
     Exprs.push_back(Result.get());
   }
 
+  // Go through the Labels.
+  for (unsigned I = 0, E = S->getNumLabels(); I != E; ++I) {
+    Names.push_back(S->getLabelIdentifier(I));
+
+    ExprResult Result = getDerived().TransformExpr(S->getLabelExpr(I));
+    if (Result.isInvalid())
+      return StmtError();
+    ExprsChanged |= Result.get() != S->getLabelExpr(I);
+    Exprs.push_back(Result.get());
+  }
   if (!getDerived().AlwaysRebuild() && !ExprsChanged)
     return S;
 
@@ -7064,7 +7075,8 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
                                         S->isVolatile(), S->getNumOutputs(),
                                         S->getNumInputs(), Names.data(),
                                         Constraints, Exprs, AsmString.get(),
-                                        Clobbers, S->getRParenLoc());
+                                        Clobbers, S->getNumLabels(),
+                                        S->getRParenLoc());
 }
 
 template<typename Derived>
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 4d879b46e1a4a..52aa3d961d200 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -370,12 +370,14 @@ void ASTStmtReader::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
+  S->NumLabels = Record.readInt();
   S->setRParenLoc(ReadSourceLocation());
   S->setAsmString(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
   unsigned NumOutputs = S->getNumOutputs();
   unsigned NumInputs = S->getNumInputs();
   unsigned NumClobbers = S->getNumClobbers();
+  unsigned NumLabels = S->getNumLabels();
 
   // Outputs and inputs
   SmallVector<IdentifierInfo *, 16> Names;
@@ -392,9 +394,14 @@ void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0; I != NumClobbers; ++I)
     Clobbers.push_back(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
+  // Labels
+  for (unsigned I = 0, N = NumLabels; I != N; ++I)
+    Exprs.push_back(Record.readSubStmt());
+
   S->setOutputsAndInputsAndClobbers(Record.getContext(),
                                     Names.data(), Constraints.data(),
                                     Exprs.data(), NumOutputs, NumInputs,
+                                    NumLabels,
                                     Clobbers.data(), NumClobbers);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index b0a35cf2f5655..776aab6bf51d2 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -283,6 +283,7 @@ void ASTStmtWriter::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
+  Record.push_back(S->getNumLabels());
   Record.AddSourceLocation(S->getRParenLoc());
   Record.AddStmt(S->getAsmString());
 
@@ -304,6 +305,9 @@ void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     Record.AddStmt(S->getClobberStringLiteral(I));
 
+  // Labels
+  for (auto *E : S->labels()) Record.AddStmt(E);
+
   Code = serialization::STMT_GCCASM;
 }
 
diff --git a/clang/test/Analysis/asm-goto.cpp b/clang/test/Analysis/asm-goto.cpp
new file mode 100644
index 0000000000000..3d4babc57bd2a
--- /dev/null
+++ b/clang/test/Analysis/asm-goto.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
+
+int foo(int cond)
+{
+label_true:
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  return 0;
+loop:
+  return 0;
+}
+
+// CHECK-LABEL: loop
+// CHECK-NEXT: 0
+// CHECK-NEXT: return
+// CHECK-NEXT: Preds (1): B3
+// CHECK-NEXT: Succs (1): B0
+
+// CHECK-LABEL: label_true
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (2): B3 B4
+// CHECK-NEXT: Succs (3): B2 B3 B1
+
+
+int bar(int cond)
+{
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::L1, L2);
+  return 0;
+L1:
+L2:
+  return 0;
+}
+
+// CHECK: [B4]
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (1): B5
+// CHECK-NEXT: Succs (3): B3 B2 B1
+
+int zoo(int n)
+{
+A5:
+A1:
+  asm goto("testl %0, %0; jne %l1;" :: "r"(n)::A1, A2, A3, A4, A5);
+A2:
+A3:
+A4:
+  return 0;
+}
+
+// CHECK-LABEL: A1
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (2): B5 B4
+// CHECK-NEXT: Succs (5): B3 B4 B2 B1 B5
diff --git a/clang/test/CodeGen/asm-goto.c b/clang/test/CodeGen/asm-goto.c
new file mode 100644
index 0000000000000..2c4a1a0c4df1c
--- /dev/null
+++ b/clang/test/CodeGen/asm-goto.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -O0 -emit-llvm  %s -o - | FileCheck %s
+
+int foo(int cond)
+{
+  // CHECK: callbr void asm sideeffect
+  // CHECK: to label %asm.fallthrough [label %label_true, label %loop], !srcloc !2
+  // CHECK: asm.fallthrough:
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  // CHECK: callbr void asm sideeffect
+  // CHECK: to label %asm.fallthrough1 [label %label_true, label %loop], !srcloc !3
+  // CHECK: asm.fallthrough1:
+  return 0;
+loop:
+  return 0;
+label_true:
+  return 1;
+}
diff --git a/clang/test/CodeGen/asm.c b/clang/test/CodeGen/asm.c
index 038d346e9993a..7de79639bfd72 100644
--- a/clang/test/CodeGen/asm.c
+++ b/clang/test/CodeGen/asm.c
@@ -262,3 +262,15 @@ void t31(int len) {
   // CHECK: @t31
   // CHECK: call void asm sideeffect "", "=*%rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"
 }
+
+// CHECK: @t32
+int t32(int cond)
+{
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  // CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@t32, %label_true), i8* blockaddress(@t32, %loop)) #1
+  return 0;
+loop:
+  return 0;
+label_true:
+  return 1;
+}
diff --git a/clang/test/CodeGen/inline-asm-mixed-style.c b/clang/test/CodeGen/inline-asm-mixed-style.c
index 6b830d9fa7a92..a9e111cd5ddcf 100644
--- a/clang/test/CodeGen/inline-asm-mixed-style.c
+++ b/clang/test/CodeGen/inline-asm-mixed-style.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -fsyntax-only -verify %s -DCHECK_ASM_GOTO
 // RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
 // REQUIRES: x86-registered-target
 
@@ -20,10 +19,11 @@ void f() {
   // CHECK: movl    %ebx, %eax
   // CHECK: movl    %ecx, %edx
 
-#ifdef CHECK_ASM_GOTO
-  __asm volatile goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
+  __asm volatile goto ("movl %ecx, %edx");
+  // CHECK: movl    %ecx, %edx
 
   __asm mov eax, ebx
-  __asm goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
-#endif
+  __asm goto ("movl %ecx, %edx");
+  // CHECK: movl    %ebx, %eax
+  // CHECK: movl    %ecx, %edx
 }
diff --git a/clang/test/Coverage/c-language-features.inc b/clang/test/Coverage/c-language-features.inc
index 356687907d905..ea3b96f6005a3 100644
--- a/clang/test/Coverage/c-language-features.inc
+++ b/clang/test/Coverage/c-language-features.inc
@@ -71,7 +71,9 @@ theif:
   }
 
   asm ("nop");
-
+  int cond;
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true);
+label_true:
   return;
 }
 
diff --git a/clang/test/PCH/asm.h b/clang/test/PCH/asm.h
index a568058d58f66..5a7268eff6e6a 100644
--- a/clang/test/PCH/asm.h
+++ b/clang/test/PCH/asm.h
@@ -1,10 +1,14 @@
 // Header for the PCH test asm.c
 
 void f() {
-  int i;
+  int i,cond;
 
   asm ("foo\n" : : "a" (i + 2));
   asm ("foo\n" : [symbolic_name] "=a" (i) : "[symbolic_name]" (i));
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+label_true:
+loop:
+  return;
 }
 
 void clobbers() {
diff --git a/clang/test/Parser/asm.c b/clang/test/Parser/asm.c
index 637f9d7ed42f8..489b545ebeda6 100644
--- a/clang/test/Parser/asm.c
+++ b/clang/test/Parser/asm.c
@@ -21,6 +21,56 @@ void f2() {
 }
 
 
+int a, b, c, d, e, f, g, h, i, j, k, l;
+
+void
+fgoto1 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
+               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
+               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
+            ::lab1,lab2);
+lab1: return;
+lab2: return;
+}
+
+void
+fgoto2 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
+               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
+               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
+            :: lab);
+  lab: return;
+}
+
+int zoo ()
+{
+  int x,cond,*e;
+  // expected-error@+1 {{expected ')'}}
+  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
+  // expected-error@+1 {{'asm goto' cannot have output constraints}}
+  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
+  // expected-error@+1 {{expected identifie}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" : );
+  // expected-error@+1 {{expected ':'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" );
+  // expected-error@+1 {{use of undeclared label 'x'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
+  // expected-error@+1 {{use of undeclared label 'b'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
+  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
+  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
+a:
+label_true:
+loop:
+  return 0;
+}
+
 // rdar://5952468
 __asm ; // expected-error {{expected '(' after 'asm'}}
 
diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp
index 9f64dfea476ed..9c4d62a255242 100644
--- a/clang/test/Parser/asm.cpp
+++ b/clang/test/Parser/asm.cpp
@@ -7,3 +7,54 @@ int foo4 asm (u"bar4"); // expected-error {{cannot use unicode string literal in
 int foo5 asm (U"bar5"); // expected-error {{cannot use unicode string literal in 'asm'}}
 int foo6 asm ("bar6"_x); // expected-error {{string literal with user-defined suffix cannot be used here}}
 int foo6 asm ("" L"bar7"); // expected-error {{cannot use wide string literal in 'asm'}}
+
+int zoo ()
+{
+  int x,cond,*e;
+  // expected-error@+1 {{expected ')'}}
+  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
+  // expected-error@+1  {{'asm goto' cannot have output constraints}}
+  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
+  // expected-error@+1 {{expected identifie}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" : );
+  // expected-error@+1  {{expected ':'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" );
+  // expected-error@+1 {{use of undeclared label 'x'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
+  // expected-error@+1 {{use of undeclared label 'b'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
+  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
+  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
+label_true:
+loop:
+a:
+  return 0;
+}
+
+
+int a, b, c, d, e, f, g, h, i, j, k, l;
+
+void
+fgoto1 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
+               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
+               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
+            ::lab1,lab2);
+lab1: return;
+lab2: return;
+}
+
+void
+fgoto2 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
+               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
+               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
+            :: lab);
+  lab: return;
+}
diff --git a/clang/test/Sema/asm-goto.cpp b/clang/test/Sema/asm-goto.cpp
new file mode 100644
index 0000000000000..f61a8096b83ec
--- /dev/null
+++ b/clang/test/Sema/asm-goto.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 %s -triple i386-pc-linux-gnu -verify -fsyntax-only
+
+struct NonTrivial {
+  ~NonTrivial();
+  int f(int);
+private:
+  int k;
+};
+void JumpDiagnostics(int n) {
+// expected-error@+1 {{cannot jump from this goto statement to its label}}
+  goto DirectJump;
+// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
+  NonTrivial tnp1;
+
+DirectJump:
+// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm goto("jmp %l0;" ::::Later);
+// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
+  NonTrivial tnp2;
+// expected-note@+1 {{possible target of asm goto statement}}
+Later:
+  return;
+}
+
+struct S { ~S(); };
+void foo(int a) {
+  if (a) {
+FOO:
+// expected-note@+2 {{jump exits scope of variable with non-trivial destructor}}
+// expected-note@+1 {{jump exits scope of variable with non-trivial destructor}}
+    S s;
+    void *p = &&BAR;
+// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm goto("jmp %l0;" ::::BAR);
+// expected-error@+1 {{cannot jump from this indirect goto statement to one of its possible targets}}
+    goto *p;
+    p = &&FOO;
+    goto *p;
+    return;
+  }
+// expected-note@+2 {{possible target of asm goto statement}}
+// expected-note@+1 {{possible target of indirect goto statement}}
+BAR:
+  return;
+}
diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c
index 67da197426cd5..29a55c610de49 100644
--- a/clang/test/Sema/asm.c
+++ b/clang/test/Sema/asm.c
@@ -295,3 +295,24 @@ int test17(int t0)
   return r0 + r1;
 }
 
+void test18()
+{
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm goto ("" : : : : lab, lab, lab2, lab);
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm goto ("xorw %[lab], %[lab]; je %l[lab]" : : [lab] "i" (0) : : lab);
+lab:;
+lab2:;
+  int x,x1;
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm ("" : [lab] "=r" (x),[lab] "+r" (x) : [lab1] "r" (x));
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm ("" : [lab] "=r" (x1) : [lab] "r" (x));
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm ("jne %l0":::);
+  asm goto ("jne %l0"::::lab);
+}
diff --git a/clang/test/Sema/inline-asm-validate-tmpl.cpp b/clang/test/Sema/inline-asm-validate-tmpl.cpp
index cf7eac3d83d43..9e234caa9c8df 100644
--- a/clang/test/Sema/inline-asm-validate-tmpl.cpp
+++ b/clang/test/Sema/inline-asm-validate-tmpl.cpp
@@ -23,3 +23,13 @@ template <int N> void	testc(int value)
 	asm("rol %1, %0" :"=r"(value): "I"(N + 1));
 }
 int	foo() { testc<2>(10); }
+
+// these should compile without error
+template <int N> bool testd()
+{
+  __asm goto ("" : : : : lab);
+  return true;
+lab:
+  return false;
+}
+bool foox() { return testd<0> (); }
diff --git a/clang/test/Sema/scope-check.c b/clang/test/Sema/scope-check.c
index fa37d10d070b9..0622450e2e7af 100644
--- a/clang/test/Sema/scope-check.c
+++ b/clang/test/Sema/scope-check.c
@@ -232,3 +232,19 @@ void test15(int n, void *pc) {
 
 // rdar://9024687
 int test16(int [sizeof &&z]); // expected-error {{use of address-of-label extension outside of a function body}}
+
+//Asm goto:
+int test16(int n)
+{
+  // expected-error@+2 {{cannot jump from this asm goto statement to one of its possible targets}}
+  // expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(n)::label_true, loop);
+  // expected-note@+2 {{jump bypasses initialization of variable length array}}
+  // expected-note@+1 {{possible target of asm goto statement}}
+  return ({int a[n];label_true: 2;});
+  // expected-note@+1 {{jump bypasses initialization of variable length array}}
+  int b[n];
+// expected-note@+1 {{possible target of asm goto statement}}
+loop:
+  return 0;
+}

From 158875ce0c95e883e91f4c562054a3ad57625b1a Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Thu, 30 May 2019 01:08:38 +0000
Subject: [PATCH 0568/1176] [BitstreamWriter][NFC] Remove obsolete comment.

The Abbv parameter was just a raw pointer when the comment was written.

llvm-svn: 362046
---
 llvm/include/llvm/Bitcode/BitstreamWriter.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Bitcode/BitstreamWriter.h b/llvm/include/llvm/Bitcode/BitstreamWriter.h
index 3de3998db524a..cd5b614d10539 100644
--- a/llvm/include/llvm/Bitcode/BitstreamWriter.h
+++ b/llvm/include/llvm/Bitcode/BitstreamWriter.h
@@ -486,10 +486,8 @@ class BitstreamWriter {
   }
 public:
 
-  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
-  /// method takes ownership of the specified abbrev.
+  /// Emits the abbreviation \p Abbv to the stream.
   unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
-    // Emit the abbreviation as a record.
     EncodeAbbrev(*Abbv);
     CurAbbrevs.push_back(std::move(Abbv));
     return static_cast<unsigned>(CurAbbrevs.size())-1 +

From 7e041d6dac7489735915ff1879992dc25cb365bc Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 01:24:31 +0000
Subject: [PATCH 0569/1176] [CMake] Set LLVM_PATH in the runtimes build

This avoids using llvm-config for inferring various paths within the
runtimes build. We also set LLVM_INCLUDE_DIR variable that's used by
these builds and move assignment of LLVM_BINARY_DIR and LLVM_LIBRARY_DIR
to the same location for consistency.

Differential Revision: https://reviews.llvm.org/D62637

llvm-svn: 362047
---
 llvm/runtimes/CMakeLists.txt | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 51ad2371f44f6..b9531daa4abf3 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -90,9 +90,15 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
   set(LLVM_RUNTIME_OUTPUT_INTDIR ${LLVM_TOOLS_BINARY_DIR})
 
   # This variable makes sure that e.g. llvm-lit is found.
+  set(LLVM_BINARY_DIR ${LLVM_BUILD_BINARY_DIR})
+  set(LLVM_LIBRARY_DIR ${LLVM_BUILD_LIBRARY_DIR})
+  set(LLVM_INCLUDE_DIR ${LLVM_BUILD_MAIN_INCLUDE_DIR})
   set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR})
   set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules)
 
+  # This variable is used by individual runtimes to locate LLVM files.
+  set(LLVM_PATH ${LLVM_BUILD_MAIN_SRC_DIR})
+
   if(APPLE)
     set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "")
   endif()
@@ -375,8 +381,6 @@ else() # if this is included from LLVM's CMake
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
-                                        -DLLVM_BINARY_DIR=${LLVM_BINARY_DIR}
-                                        -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR}
                                         -DLLVM_DEFAULT_TARGET_TRIPLE=${TARGET_TRIPLE}
                                         -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON
                                         -DCMAKE_C_COMPILER_TARGET=${TARGET_TRIPLE}
@@ -466,8 +470,6 @@ else() # if this is included from LLVM's CMake
                              # Builtins were built separately above
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
-                                        -DLLVM_BINARY_DIR=${LLVM_BINARY_DIR}
-                                        -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR}
                                         -DLLVM_DEFAULT_TARGET_TRIPLE=${target}
                                         -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON
                                         -DCMAKE_C_COMPILER_TARGET=${target}

From 996e62eef750942e174c4b80892b28e198e3a8d0 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 01:34:41 +0000
Subject: [PATCH 0570/1176] [runtimes] Support ELF dependent libraries feature

As of r360984, LLD supports dependent libraries feature for ELF.
libunwind, libc++abi and libc++ have library dependencies: libdl librt
and libpthread, which means that when libunwind and libc++ are being
statically linked (using -static-libstdc++ flag), user has to manually
specify -ldl -lpthread which is onerous.

This change includes the lib pragma to specify the library dependencies
directly in the source that uses those libraries. This doesn't make any
difference when using linkers that don't support dependent libraries.
However, when using LLD that has dependent libraries feature, users no
longer have to manually specifying library dependencies when using
static linking, linker will pick the library automatically.

Differential Revision: https://reviews.llvm.org/D62090

llvm-svn: 362048
---
 libcxx/src/algorithm.cpp                    | 5 +++++
 libcxx/src/chrono.cpp                       | 4 ++++
 libcxx/src/condition_variable.cpp           | 4 ++++
 libcxx/src/debug.cpp                        | 5 +++++
 libcxx/src/experimental/memory_resource.cpp | 3 +++
 libcxx/src/filesystem/operations.cpp        | 4 ++++
 libcxx/src/memory.cpp                       | 3 +++
 libcxx/src/mutex.cpp                        | 6 ++++++
 libcxx/src/shared_mutex.cpp                 | 3 +++
 libcxx/src/thread.cpp                       | 4 ++++
 libcxxabi/src/cxa_guard_impl.h              | 5 +++++
 libcxxabi/src/cxa_thread_atexit.cpp         | 6 ++++++
 libcxxabi/src/fallback_malloc.cpp           | 5 +++++
 libunwind/src/AddressSpace.hpp              | 3 +++
 libunwind/src/RWMutex.hpp                   | 3 +++
 15 files changed, 63 insertions(+)

diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp
index 28e452f52df12..5ce2a23b464fa 100644
--- a/libcxx/src/algorithm.cpp
+++ b/libcxx/src/algorithm.cpp
@@ -8,7 +8,12 @@
 
 #include "algorithm"
 #include "random"
+#ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp
index c1eb67b61d727..a2f88c94c0779 100644
--- a/libcxx/src/chrono.cpp
+++ b/libcxx/src/chrono.cpp
@@ -37,6 +37,10 @@
 #endif
 #endif
 
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "rt")
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace chrono
diff --git a/libcxx/src/condition_variable.cpp b/libcxx/src/condition_variable.cpp
index 4022ff2e9ad0f..69264c680d93e 100644
--- a/libcxx/src/condition_variable.cpp
+++ b/libcxx/src/condition_variable.cpp
@@ -15,6 +15,10 @@
 #include "system_error"
 #include "__undef_macros"
 
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 condition_variable::~condition_variable()
diff --git a/libcxx/src/debug.cpp b/libcxx/src/debug.cpp
index 7fdf90c37d976..950241310112b 100644
--- a/libcxx/src/debug.cpp
+++ b/libcxx/src/debug.cpp
@@ -13,7 +13,12 @@
 #include "string"
 #include "cstdio"
 #include "__hash_table"
+#ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
diff --git a/libcxx/src/experimental/memory_resource.cpp b/libcxx/src/experimental/memory_resource.cpp
index 22bc12c100506..84c95080496fd 100644
--- a/libcxx/src/experimental/memory_resource.cpp
+++ b/libcxx/src/experimental/memory_resource.cpp
@@ -12,6 +12,9 @@
 #include "atomic"
 #elif !defined(_LIBCPP_HAS_NO_THREADS)
 #include "mutex"
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
 #endif
 
 _LIBCPP_BEGIN_NAMESPACE_LFTS_PMR
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index 5ba979ca905ac..319d9f65d7381 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -44,6 +44,10 @@
 #include <sys/time.h> // for gettimeofday and timeval
 #endif                // !defined(CLOCK_REALTIME)
 
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "rt")
+#endif
+
 #if defined(_LIBCPP_COMPILER_GCC)
 #if _GNUC_VER < 500
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
diff --git a/libcxx/src/memory.cpp b/libcxx/src/memory.cpp
index 8b05c3f1649c9..6df7226b35756 100644
--- a/libcxx/src/memory.cpp
+++ b/libcxx/src/memory.cpp
@@ -10,6 +10,9 @@
 #ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
 #include "thread"
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
 #endif
 #include "include/atomic_support.h"
 
diff --git a/libcxx/src/mutex.cpp b/libcxx/src/mutex.cpp
index cecb89b3399fd..d100f2df23389 100644
--- a/libcxx/src/mutex.cpp
+++ b/libcxx/src/mutex.cpp
@@ -12,6 +12,12 @@
 #include "include/atomic_support.h"
 #include "__undef_macros"
 
+#ifndef _LIBCPP_HAS_NO_THREADS
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 #ifndef _LIBCPP_HAS_NO_THREADS
 
diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp
index e918e1bdf604c..3f1aecfdfe192 100644
--- a/libcxx/src/shared_mutex.cpp
+++ b/libcxx/src/shared_mutex.cpp
@@ -10,6 +10,9 @@
 #ifndef _LIBCPP_HAS_NO_THREADS
 
 #include "shared_mutex"
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
diff --git a/libcxx/src/thread.cpp b/libcxx/src/thread.cpp
index 29b06fdf660a9..92690f6679829 100644
--- a/libcxx/src/thread.cpp
+++ b/libcxx/src/thread.cpp
@@ -35,6 +35,10 @@
 #include <windows.h>
 #endif
 
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 thread::~thread()
diff --git a/libcxxabi/src/cxa_guard_impl.h b/libcxxabi/src/cxa_guard_impl.h
index 412099e6f4e71..bd6b15fce6a83 100644
--- a/libcxxabi/src/cxa_guard_impl.h
+++ b/libcxxabi/src/cxa_guard_impl.h
@@ -49,6 +49,11 @@
 
 #include <stdlib.h>
 #include <__threading_support>
+#ifndef _LIBCXXABI_HAS_NO_THREADS
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
 
 // To make testing possible, this header is included from both cxa_guard.cpp
 // and a number of tests.
diff --git a/libcxxabi/src/cxa_thread_atexit.cpp b/libcxxabi/src/cxa_thread_atexit.cpp
index da1df8617088a..38787f18fe30f 100644
--- a/libcxxabi/src/cxa_thread_atexit.cpp
+++ b/libcxxabi/src/cxa_thread_atexit.cpp
@@ -9,6 +9,12 @@
 #include "abort_message.h"
 #include "cxxabi.h"
 #include <__threading_support>
+#ifndef _LIBCXXABI_HAS_NO_THREADS
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
+
 #include <cstdlib>
 
 namespace __cxxabiv1 {
diff --git a/libcxxabi/src/fallback_malloc.cpp b/libcxxabi/src/fallback_malloc.cpp
index 8ec1eeefd849e..bae0fa4ac2323 100644
--- a/libcxxabi/src/fallback_malloc.cpp
+++ b/libcxxabi/src/fallback_malloc.cpp
@@ -12,6 +12,11 @@
 #include "fallback_malloc.h"
 
 #include <__threading_support>
+#ifndef _LIBCXXABI_HAS_NO_THREADS
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
+#endif
 
 #include <cstdlib> // for malloc, calloc, free
 #include <cstring> // for memset
diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index fb370ad1e79b4..6643953030975 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -27,6 +27,9 @@
 
 #if _LIBUNWIND_USE_DLADDR
 #include <dlfcn.h>
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "dl")
+#endif
 #endif
 
 #ifdef __APPLE__
diff --git a/libunwind/src/RWMutex.hpp b/libunwind/src/RWMutex.hpp
index 7a08bb2af32eb..4f234a77edf14 100644
--- a/libunwind/src/RWMutex.hpp
+++ b/libunwind/src/RWMutex.hpp
@@ -17,6 +17,9 @@
 #include <windows.h>
 #elif !defined(_LIBUNWIND_HAS_NO_THREADS)
 #include <pthread.h>
+#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#pragma comment(lib, "pthread")
+#endif
 #endif
 
 namespace libunwind {

From a05fda68bc5e6b42948a3e3ddf0df1e069c38c19 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 01:51:16 +0000
Subject: [PATCH 0571/1176] DWARFDebugInfoEntry: delete unused Extract() and
 rename FastExtract() to Extract()

The function Extract() is almost a duplicate of FastExtract() but is not used.
Delete it and rename FastExtract() to Extract().

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D62593

llvm-svn: 362049
---
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 171 +-----------------
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    |   7 +-
 .../Plugins/SymbolFile/DWARF/DWARFUnit.cpp    |   4 +-
 3 files changed, 10 insertions(+), 172 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index 3bfb287466489..76aaf5212391d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -31,9 +31,12 @@ using namespace lldb_private;
 using namespace std;
 extern int g_verbose;
 
-bool DWARFDebugInfoEntry::FastExtract(
-    const DWARFDataExtractor &debug_info_data, const DWARFUnit *cu,
-    lldb::offset_t *offset_ptr) {
+// Extract a debug info entry for a given compile unit from the .debug_info and
+// .debug_abbrev data within the SymbolFileDWARF class starting at the given
+// offset
+bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
+                                  const DWARFUnit *cu,
+                                  lldb::offset_t *offset_ptr) {
   m_offset = *offset_ptr;
   m_parent_idx = 0;
   m_sibling_idx = 0;
@@ -198,168 +201,6 @@ bool DWARFDebugInfoEntry::FastExtract(
   return false;
 }
 
-// Extract
-//
-// Extract a debug info entry for a given compile unit from the .debug_info and
-// .debug_abbrev data within the SymbolFileDWARF class starting at the given
-// offset
-bool DWARFDebugInfoEntry::Extract(const DWARFUnit *cu,
-                                  lldb::offset_t *offset_ptr) {
-  const DWARFDataExtractor &debug_info_data = cu->GetData();
-  //    const DWARFDataExtractor& debug_str_data =
-  //    dwarf2Data->get_debug_str_data();
-  const uint32_t cu_end_offset = cu->GetNextUnitOffset();
-  lldb::offset_t offset = *offset_ptr;
-  //  if (offset >= cu_end_offset)
-  //      Log::Status("DIE at offset 0x%8.8x is beyond the end of the current
-  //      compile unit (0x%8.8x)", m_offset, cu_end_offset);
-  if ((offset < cu_end_offset) && debug_info_data.ValidOffset(offset)) {
-    m_offset = offset;
-
-    const uint64_t abbr_idx = debug_info_data.GetULEB128(&offset);
-    lldbassert(abbr_idx <= UINT16_MAX);
-    m_abbr_idx = abbr_idx;
-    if (abbr_idx) {
-      const DWARFAbbreviationDeclaration *abbrevDecl =
-          cu->GetAbbreviations()->GetAbbreviationDeclaration(abbr_idx);
-
-      if (abbrevDecl) {
-        m_tag = abbrevDecl->Tag();
-        m_has_children = abbrevDecl->HasChildren();
-
-        bool isCompileUnitTag = (m_tag == DW_TAG_compile_unit ||
-                                 m_tag == DW_TAG_partial_unit);
-        if (cu && isCompileUnitTag)
-          const_cast<DWARFUnit *>(cu)->SetBaseAddress(0);
-
-        // Skip all data in the .debug_info for the attributes
-        const uint32_t numAttributes = abbrevDecl->NumAttributes();
-        for (uint32_t i = 0; i < numAttributes; ++i) {
-          DWARFFormValue form_value(cu);
-          dw_attr_t attr;
-          abbrevDecl->GetAttrAndFormValueByIndex(i, attr, form_value);
-          dw_form_t form = form_value.Form();
-
-          if (isCompileUnitTag &&
-              ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
-            if (form_value.ExtractValue(debug_info_data, &offset)) {
-              if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
-                const_cast<DWARFUnit *>(cu)->SetBaseAddress(
-                    form_value.Address());
-            }
-          } else {
-            bool form_is_indirect = false;
-            do {
-              form_is_indirect = false;
-              uint32_t form_size = 0;
-              switch (form) {
-              // Blocks if inlined data that have a length field and the data
-              // bytes inlined in the .debug_info
-              case DW_FORM_exprloc:
-              case DW_FORM_block:
-                form_size = debug_info_data.GetULEB128(&offset);
-                break;
-              case DW_FORM_block1:
-                form_size = debug_info_data.GetU8(&offset);
-                break;
-              case DW_FORM_block2:
-                form_size = debug_info_data.GetU16(&offset);
-                break;
-              case DW_FORM_block4:
-                form_size = debug_info_data.GetU32(&offset);
-                break;
-
-              // Inlined NULL terminated C-strings
-              case DW_FORM_string:
-                debug_info_data.GetCStr(&offset);
-                break;
-
-              // Compile unit address sized values
-              case DW_FORM_addr:
-                form_size = cu->GetAddressByteSize();
-                break;
-              case DW_FORM_ref_addr:
-                if (cu->GetVersion() <= 2)
-                  form_size = cu->GetAddressByteSize();
-                else
-                  form_size = 4;
-                break;
-
-              // 0 sized form
-              case DW_FORM_flag_present:
-              case DW_FORM_implicit_const:
-                form_size = 0;
-                break;
-
-              // 1 byte values
-              case DW_FORM_data1:
-              case DW_FORM_flag:
-              case DW_FORM_ref1:
-                form_size = 1;
-                break;
-
-              // 2 byte values
-              case DW_FORM_data2:
-              case DW_FORM_ref2:
-                form_size = 2;
-                break;
-
-              // 4 byte values
-              case DW_FORM_data4:
-              case DW_FORM_ref4:
-                form_size = 4;
-                break;
-
-              // 8 byte values
-              case DW_FORM_data8:
-              case DW_FORM_ref8:
-              case DW_FORM_ref_sig8:
-                form_size = 8;
-                break;
-
-              // signed or unsigned LEB 128 values
-              case DW_FORM_addrx:
-              case DW_FORM_sdata:
-              case DW_FORM_udata:
-              case DW_FORM_ref_udata:
-              case DW_FORM_GNU_addr_index:
-              case DW_FORM_GNU_str_index:
-                debug_info_data.Skip_LEB128(&offset);
-                break;
-
-              case DW_FORM_indirect:
-                form = debug_info_data.GetULEB128(&offset);
-                form_is_indirect = true;
-                break;
-
-              case DW_FORM_strp:
-              case DW_FORM_sec_offset:
-                debug_info_data.GetU32(&offset);
-                break;
-
-              default:
-                *offset_ptr = offset;
-                return false;
-              }
-
-              offset += form_size;
-            } while (form_is_indirect);
-          }
-        }
-        *offset_ptr = offset;
-        return true;
-      }
-    } else {
-      m_tag = 0;
-      m_has_children = false;
-      *offset_ptr = offset;
-      return true; // NULL debug tag entry
-    }
-  }
-
-  return false;
-}
-
 static DWARFRangeList GetRangesOrReportError(const DWARFUnit &unit,
                                              const DWARFDebugInfoEntry &die,
                                              const DWARFFormValue &value) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index f5da1bf7e5bf4..1d2eb3122b587 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -43,11 +43,8 @@ class DWARFDebugInfoEntry {
   void BuildFunctionAddressRangeTable(const DWARFUnit *cu,
                                       DWARFDebugAranges *debug_aranges) const;
 
-  bool FastExtract(const lldb_private::DWARFDataExtractor &debug_info_data,
-                   const DWARFUnit *cu,
-                   lldb::offset_t *offset_ptr);
-
-  bool Extract(const DWARFUnit *cu, lldb::offset_t *offset_ptr);
+  bool Extract(const lldb_private::DWARFDataExtractor &debug_info_data,
+               const DWARFUnit *cu, lldb::offset_t *offset_ptr);
 
   bool LookupAddress(const dw_addr_t address, const DWARFUnit *cu,
                      DWARFDebugInfoEntry **function_die,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index ca5aebd00c873..287ff60b600d2 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -61,7 +61,7 @@ void DWARFUnit::ExtractUnitDIEIfNeeded() {
   // parse
   const DWARFDataExtractor &data = GetData();
   if (offset < GetNextUnitOffset() &&
-      m_first_die.FastExtract(data, this, &offset)) {
+      m_first_die.Extract(data, this, &offset)) {
     AddUnitDIE(m_first_die);
     return;
   }
@@ -165,7 +165,7 @@ void DWARFUnit::ExtractDIEsRWLocked() {
   die_index_stack.reserve(32);
   die_index_stack.push_back(0);
   bool prev_die_had_children = false;
-  while (offset < next_cu_offset && die.FastExtract(data, this, &offset)) {
+  while (offset < next_cu_offset && die.Extract(data, this, &offset)) {
     const bool null_die = die.IsNULL();
     if (depth == 0) {
       assert(m_die_array.empty() && "Compile unit DIE already added");

From 0666f9c4e44e0841e5f2f0913659505789604f1b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 01:55:43 +0000
Subject: [PATCH 0572/1176] [Driver] -static-pie: add -z text

This matches gcc -static-pie. The intention is to prevent dynamic
relocations in read-only segments.

In ld.bfd and gold, -z notext is the default. If text relocations are needed:

* -z notext: allow and emit DF_TEXTREL.
  DF_TEXTREL is not emitted if there is no text relocation.
* -z text: error

In lld, -z text is the default (this change is a no-op).

* -z text: error on text relocations
* -z notext: allow text relocations, and emit DF_TEXTREL no matter whether
  text relocations exist.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D62606

llvm-svn: 362050
---
 clang/lib/Driver/ToolChains/Gnu.cpp | 2 ++
 clang/test/Driver/linux-ld.c        | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 30a61d5ad1207..c8520968e45ee 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -382,6 +382,8 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-static");
     CmdArgs.push_back("-pie");
     CmdArgs.push_back("--no-dynamic-linker");
+    CmdArgs.push_back("-z");
+    CmdArgs.push_back("text");
   }
 
   if (ToolChain.isNoExecStackDefault()) {
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index e602c6c323737..70fc178c56bf4 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -190,6 +190,8 @@
 // CHECK-CLANG-LD-STATIC-PIE: "-static"
 // CHECK-CLANG-LD-STATIC-PIE: "-pie"
 // CHECK-CLANG-LD-STATIC-PIE: "--no-dynamic-linker"
+// CHECK-CLANG-LD-STATIC-PIE: "-z"
+// CHECK-CLANG-LD-STATIC-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE: "-m" "elf_x86_64"
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
@@ -203,6 +205,8 @@
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "-static"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "-pie"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "--no-dynamic-linker"
+// CHECK-CLANG-LD-STATIC-PIE-PIE: "-z"
+// CHECK-CLANG-LD-STATIC-PIE-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "-m" "elf_x86_64"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
@@ -216,6 +220,8 @@
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "-static"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "-pie"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "--no-dynamic-linker"
+// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-z"
+// CHECK-CLANG-LD-STATIC-PIE-STATIC: "text"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "-m" "elf_x86_64"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"

From d9e9701c4b3f422296dbac729dbd9048eecbf7ee Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 02:13:15 +0000
Subject: [PATCH 0573/1176] [PowerPC] Set the default PLT mode on musl to
 Secure PLT

The musl libc only supports Secure PLT.

Patch by A. Wilcox!

Reviewed By: jhibbits

Differential Revision: https://reviews.llvm.org/D59185

llvm-svn: 362051
---
 clang/lib/Driver/ToolChains/Arch/PPC.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
index f0a3271564cf6..30f1a0d9022c2 100644
--- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
@@ -115,7 +115,7 @@ ppc::ReadGOTPtrMode ppc::getPPCReadGOTPtrMode(const Driver &D, const llvm::Tripl
                                               const ArgList &Args) {
   if (Args.getLastArg(options::OPT_msecure_plt))
     return ppc::ReadGOTPtrMode::SecurePlt;
-  if (Triple.isOSNetBSD() || Triple.isOSOpenBSD())
+  if (Triple.isOSNetBSD() || Triple.isOSOpenBSD() || Triple.isMusl())
     return ppc::ReadGOTPtrMode::SecurePlt;
   else
     return ppc::ReadGOTPtrMode::Bss;

From 2632ebb551db67801d41ee8e3ab931f639180a94 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 02:30:04 +0000
Subject: [PATCH 0574/1176] [Driver] Render target options (e.g.
 -fuse-init-array) for -fembed-bitcode

Modern ELF platforms use -fuse-init-array to emit .init_array instead of
.ctors .  ld.bfd and gold --ctors-in-init-array merge .init_array and
.ctors into .init_array but lld doesn't do that.

If crtbegin*.o crtend*.o don't provide .ctors/.dtors, such .ctors in
user object files can lead to crash (see PR42002. The first and the last
elements in .ctors/.dtors are ignored - they are traditionally provided
by crtbegin*.o crtend*.o).

Call addClangTargetOptions() to ensure -fuse-init-array is rendered on
modern ELF platforms. On Hexagon, this renders -target-feature
+reserved-r19 for -ffixed-r19.

Reviewed By: compnerd

Differential Revision: https://reviews.llvm.org/D62509

llvm-svn: 362052
---
 clang/lib/Driver/ToolChains/Clang.cpp | 3 +++
 clang/test/Driver/fembed-bitcode.c    | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3009bc8d292b1..650f73d36f2fa 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3666,6 +3666,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // Disable all llvm IR level optimizations.
     CmdArgs.push_back("-disable-llvm-passes");
 
+    // Render target options such as -fuse-init-array on modern ELF platforms.
+    TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
+
     // reject options that shouldn't be supported in bitcode
     // also reject kernel/kext
     static const constexpr unsigned kBitcodeOptionBlacklist[] = {
diff --git a/clang/test/Driver/fembed-bitcode.c b/clang/test/Driver/fembed-bitcode.c
index b0ec63f13ff38..c1f020b38ad2b 100644
--- a/clang/test/Driver/fembed-bitcode.c
+++ b/clang/test/Driver/fembed-bitcode.c
@@ -26,3 +26,11 @@
 // CHECK-AARCH64: "darwinpcs"
 // CHECK-AARCH64-NOT: "-fdebug-compilation-dir"
 
+// RUN: %clang -target x86_64-pc-freebsd12 -fembed-bitcode=all -c %s -### 2>&1 \
+// RUN:     | FileCheck --check-prefix=CHECK-INITARRAY %s
+// CHECK-INITARRAY: "-fuse-init-array"
+
+// RUN: %clang -target hexagon-unknown-elf -ffixed-r19 -fembed-bitcode=all -c %s -### 2>&1 \
+// RUN:     | FileCheck --check-prefix=CHECK-HEXAGON %s
+// CHECK-HEXAGON: "-target-feature"
+// CHECK-HEXAGON: "+reserved-r19"

From 1f67d9427931f61585d2392c3b958cd2b459af54 Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Thu, 30 May 2019 03:59:16 +0000
Subject: [PATCH 0575/1176] [X86] Add ENQCMD instructions

For more details about these instructions, please refer to the latest
ISE document:
https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference.

Patch by Tianqing Wang (tianqing)

Differential Revision: https://reviews.llvm.org/D62281

llvm-svn: 362053
---
 llvm/include/llvm/IR/IntrinsicsX86.td      | 10 +++
 llvm/lib/Support/Host.cpp                  |  1 +
 llvm/lib/Target/X86/X86.td                 |  2 +
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp    |  2 +
 llvm/lib/Target/X86/X86ISelLowering.cpp    | 23 +++++++
 llvm/lib/Target/X86/X86ISelLowering.h      |  3 +
 llvm/lib/Target/X86/X86InstrInfo.td        | 40 +++++++++++
 llvm/lib/Target/X86/X86Subtarget.h         |  4 ++
 llvm/test/CodeGen/X86/enqcmd-intrinsics.ll | 57 +++++++++++++++
 llvm/test/MC/Disassembler/X86/x86-16.txt   | 30 ++++++++
 llvm/test/MC/Disassembler/X86/x86-32.txt   | 42 ++++++++++++
 llvm/test/MC/Disassembler/X86/x86-64.txt   | 60 ++++++++++++++++
 llvm/test/MC/X86/x86-16.s                  | 40 +++++++++++
 llvm/test/MC/X86/x86-32-coverage.s         | 56 +++++++++++++++
 llvm/test/MC/X86/x86-64.s                  | 80 ++++++++++++++++++++++
 15 files changed, 450 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/enqcmd-intrinsics.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 2635e3d8648b9..ebf8f2cdd9054 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4872,3 +4872,13 @@ let TargetPrefix = "x86" in {
               Intrinsic<[llvm_v16f32_ty],
               [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
 }
+
+//===----------------------------------------------------------------------===//
+// ENQCMD - Enqueue Stores Instructions
+
+let TargetPrefix = "x86" in {
+  def int_x86_enqcmd : GCCBuiltin<"__builtin_ia32_enqcmd">,
+              Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+  def int_x86_enqcmds : GCCBuiltin<"__builtin_ia32_enqcmds">,
+              Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+}
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 542ebe9e53708..1aa2b82ce60a2 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1380,6 +1380,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
   Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
   Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
+  Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
 
   // There are two CPUID leafs which information associated with the pconfig
   // instruction:
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 8f6d201bbb405..4148a4458251b 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -263,6 +263,8 @@ def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
                                     "Support RDPID instructions">;
 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
                                       "Wait and pause enhancements">;
+def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
+                                     "Has ENQCMD instructions">;
 // On some processors, instructions that implicitly take two memory operands are
 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1bc7af73a4864..6fffb56c980a3 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2071,6 +2071,8 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
       Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
       Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+      Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
+      Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
       Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
       Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
     unsigned AddrSpace =
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e1c0c8a6bd501..d756be24d67fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23290,6 +23290,27 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
                          Operation.getValue(1));
     }
+    case Intrinsic::x86_enqcmd:
+    case Intrinsic::x86_enqcmds: {
+      SDLoc dl(Op);
+      SDValue Chain = Op.getOperand(0);
+      SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+      unsigned Opcode;
+      switch (IntNo) {
+      default: llvm_unreachable("Impossible intrinsic!");
+      case Intrinsic::x86_enqcmd:
+        Opcode = X86ISD::ENQCMD;
+        break;
+      case Intrinsic::x86_enqcmds:
+        Opcode = X86ISD::ENQCMDS;
+        break;
+      }
+      SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2),
+                                      Op.getOperand(3));
+      SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG);
+      return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
+                         Operation.getValue(1));
+    }
     }
     return SDValue();
   }
@@ -28270,6 +28291,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::NT_BRIND:           return "X86ISD::NT_BRIND";
   case X86ISD::UMWAIT:             return "X86ISD::UMWAIT";
   case X86ISD::TPAUSE:             return "X86ISD::TPAUSE";
+  case X86ISD::ENQCMD:             return "X86ISD:ENQCMD";
+  case X86ISD::ENQCMDS:            return "X86ISD:ENQCMDS";
   }
   return nullptr;
 }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index be3d29019db7c..a3ebe1001e398 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -589,6 +589,9 @@ namespace llvm {
       // User level wait
       UMWAIT, TPAUSE,
 
+      // Enqueue Stores Instructions
+      ENQCMD, ENQCMDS,
+
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LCMPXCHG8_DAG,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 56bc050007896..98af217ebcf85 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -127,6 +127,9 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
 
 def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
 
+def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                         SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
+
 def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
                             [SDNPHasChain,SDNPSideEffect]>;
 def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
@@ -307,6 +310,11 @@ def X86tpause : SDNode<"X86ISD::TPAUSE",
                                             SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
                        [SDNPHasChain, SDNPSideEffect]>;
 
+def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
+                       [SDNPHasChain, SDNPSideEffect]>;
+def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
+                       [SDNPHasChain, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -900,6 +908,7 @@ def HasINVPCID   : Predicate<"Subtarget->hasINVPCID()">;
 def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
 def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
+def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
@@ -2755,6 +2764,37 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
                    T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>;
 } // SchedRW
 
+//===----------------------------------------------------------------------===//
+// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
+//
+let SchedRW = [WriteStore], Defs = [EFLAGS] in {
+  def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
+                 T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+  def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
+                 T8XD, AdSize32, Requires<[HasENQCMD]>;
+  def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+                 "enqcmd\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
+                 T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+
+  def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
+                 T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+  def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
+                 T8XS, AdSize32, Requires<[HasENQCMD]>;
+  def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+                 "enqcmds\t{$src, $dst|$dst, $src}",
+                 [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
+                 T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+}
+
 //===----------------------------------------------------------------------===//
 // CLZERO Instruction
 //
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 43d4ab713181d..6fefe23182f9c 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -356,6 +356,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// Processor has AVX-512 bfloat16 floating-point extensions
   bool HasBF16 = false;
 
+  /// Processor supports ENQCMD instructions
+  bool HasENQCMD = false;
+
   /// Processor has AVX-512 Bit Algorithms instructions
   bool HasBITALG = false;
 
@@ -688,6 +691,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasSGX() const { return HasSGX; }
   bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
   bool hasINVPCID() const { return HasINVPCID; }
+  bool hasENQCMD() const { return HasENQCMD; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
   bool useRetpolineIndirectBranches() const {
     return UseRetpolineIndirectBranches;
diff --git a/llvm/test/CodeGen/X86/enqcmd-intrinsics.ll b/llvm/test/CodeGen/X86/enqcmd-intrinsics.ll
new file mode 100644
index 0000000000000..e7fa68fee33a8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/enqcmd-intrinsics.ll
@@ -0,0 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+enqcmd | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+enqcmd | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=+enqcmd | FileCheck %s --check-prefix=X32
+
+define i8 @test_enqcmd(i8* %dst, i8* %src) {
+entry:
+; X64-LABEL: test_enqcmd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    enqcmd (%rsi), %rdi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+
+; X86-LABEL: test_enqcmd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9+]}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9+]}}(%esp), %ecx
+; X86-NEXT:    enqcmd (%eax), %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+
+; X32-LABEL: test_enqcmd:
+; X32:       # %bb.0: # %entry
+; X32:         enqcmd (%esi), %edi
+; X32-NEXT:    sete %al
+; X32-NEXT:    retq
+  %0 = call i8 @llvm.x86.enqcmd(i8* %dst, i8* %src)
+  ret i8 %0
+}
+
+define i8 @test_enqcmds(i8* %dst, i8* %src) {
+entry:
+; X64-LABEL: test_enqcmds:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    enqcmds (%rsi), %rdi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+
+; X86-LABEL: test_enqcmds:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9+]}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9+]}}(%esp), %ecx
+; X86-NEXT:    enqcmds (%eax), %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+
+; X32-LABEL: test_enqcmds:
+; X32:       # %bb.0: # %entry
+; X32:         enqcmds (%esi), %edi
+; X32-NEXT:    sete %al
+; X32-NEXT:    retq
+  %0 = call i8 @llvm.x86.enqcmds(i8* %dst, i8* %src)
+  ret i8 %0
+}
+
+declare i8 @llvm.x86.enqcmd(i8*, i8*)
+declare i8 @llvm.x86.enqcmds(i8*, i8*)
diff --git a/llvm/test/MC/Disassembler/X86/x86-16.txt b/llvm/test/MC/Disassembler/X86/x86-16.txt
index 286aa88489cb4..5820b5e273a81 100644
--- a/llvm/test/MC/Disassembler/X86/x86-16.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-16.txt
@@ -806,3 +806,33 @@
 
 #CHECK: movdir64b (%si), %ax
 0x66 0x0f 0x38 0xf8 0x04
+
+# CHECK: enqcmd (%bx), %di
+0xf2,0x0f,0x38,0xf8,0x3f
+
+# CHECK: enqcmd 8128(%si), %ax
+0xf2,0x0f,0x38,0xf8,0x84,0xc0,0x1f
+
+# CHECK: enqcmd -8192(%di), %bx
+0xf2,0x0f,0x38,0xf8,0x9d,0x00,0xe0
+
+# CHECK: enqcmd 7408, %cx
+0xf2,0x0f,0x38,0xf8,0x0e,0xf0,0x1c
+
+# CHECK: enqcmds (%bx), %di
+0xf3,0x0f,0x38,0xf8,0x3f
+
+# CHECK: enqcmds 8128(%si), %ax
+0xf3,0x0f,0x38,0xf8,0x84,0xc0,0x1f
+
+# CHECK: enqcmds -8192(%di), %bx
+0xf3,0x0f,0x38,0xf8,0x9d,0x00,0xe0
+
+# CHECK: enqcmds 7408, %cx
+0xf3,0x0f,0x38,0xf8,0x0e,0xf0,0x1c
+
+# CHECK: enqcmd (%edi), %edi
+0x67,0xf2,0x0f,0x38,0xf8,0x3f
+
+# CHECK: enqcmds (%edi), %edi
+0x67,0xf3,0x0f,0x38,0xf8,0x3f
diff --git a/llvm/test/MC/Disassembler/X86/x86-32.txt b/llvm/test/MC/Disassembler/X86/x86-32.txt
index 068bfbecbd516..86157d0e8f616 100644
--- a/llvm/test/MC/Disassembler/X86/x86-32.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -901,3 +901,45 @@
 
 # CHECK: addb $0, 305419896(,%eiz)
 0x80 0x04 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: enqcmd 268435456(%ebp,%eax,8), %esi
+0xf2,0x0f,0x38,0xf8,0xb4,0xc5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmd (%ecx), %edi
+0xf2,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmd 8128(%ecx), %eax
+0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmd -8192(%edx), %ebx
+0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmd 485498096, %eax
+0xf2,0x0f,0x38,0xf8,0x05,0xf0,0x1c,0xf0,0x1c
+
+# CHECK: enqcmds 268435456(%ebp,%eax,8), %esi
+0xf3,0x0f,0x38,0xf8,0xb4,0xc5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmds (%ecx), %edi
+0xf3,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmds 8128(%ecx), %eax
+0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmds -8192(%edx), %ebx
+0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmds 485498096, %eax
+0xf3,0x0f,0x38,0xf8,0x05,0xf0,0x1c,0xf0,0x1c
+
+# CHECK: enqcmd (%bx,%di), %di
+0x67,0xf2,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmd 8128(%bx,%di), %ax
+0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f
+
+# CHECK: enqcmds (%bx,%di), %di
+0x67,0xf3,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmds 8128(%bx,%di), %ax
+0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f
diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt
index 0e7df7562d415..9b4064c3f73ab 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -631,3 +631,63 @@
 0x63 0x08
 # CHECK: movslq (%rax), %cx
 0x66 0x63 0x08
+
+# CHECK: enqcmd 485498096, %eax
+0x67,0xf2,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c
+
+# CHECK: enqcmd  268435456(%ebp,%r14d,8), %esi
+0x67,0xf2,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmd  (%r9d), %edi
+0x67,0xf2,0x41,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmd  8128(%ecx), %eax
+0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmd  -8192(%edx), %ebx
+0x67,0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmds 268435456(%ebp,%r14d,8), %esi
+0x67,0xf3,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmds (%r9d), %edi
+0x67,0xf3,0x41,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmds 8128(%ecx), %eax
+0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmds -8192(%edx), %ebx
+0x67,0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmds 485498096, %eax
+0x67,0xf3,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c
+
+# CHECK: enqcmd  268435456(%rbp,%r14,8), %rsi
+0xf2,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmd  (%r9), %rdi
+0xf2,0x41,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmd  8128(%rcx), %rax
+0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmd  -8192(%rdx), %rbx
+0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmd 485498096, %rax
+0xf2,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c
+
+# CHECK: enqcmds 268435456(%rbp,%r14,8), %rsi
+0xf3,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: enqcmds (%r9), %rdi
+0xf3,0x41,0x0f,0x38,0xf8,0x39
+
+# CHECK: enqcmds 8128(%rcx), %rax
+0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00
+
+# CHECK: enqcmds -8192(%rdx), %rbx
+0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff
+
+# CHECK: enqcmds 485498096, %rax
+0xf3,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c
diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
index 88ee77a6d4143..b849d0aacd2b5 100644
--- a/llvm/test/MC/X86/x86-16.s
+++ b/llvm/test/MC/X86/x86-16.s
@@ -989,3 +989,43 @@ movdir64b (%esi), %eax
 // CHECK: movdir64b (%si), %ax
 // CHECK: encoding: [0x66,0x0f,0x38,0xf8,0x04]
 movdir64b (%si), %ax
+
+// CHECK: enqcmd (%bx), %di
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x3f]
+enqcmd  (%bx), %di
+
+// CHECK: enqcmd 8128(%si), %ax
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x84,0xc0,0x1f]
+enqcmd  8128(%si), %ax
+
+// CHECK: enqcmd -8192(%di), %bx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x9d,0x00,0xe0]
+enqcmd  -8192(%di), %bx
+
+// CHECK: enqcmd 7408, %cx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x0e,0xf0,0x1c]
+enqcmd  7408, %cx
+
+// CHECK: enqcmds (%bx), %di
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x3f]
+enqcmds (%bx), %di
+
+// CHECK: enqcmds 8128(%si), %ax
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x84,0xc0,0x1f]
+enqcmds 8128(%si), %ax
+
+// CHECK: enqcmds -8192(%di), %bx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x9d,0x00,0xe0]
+enqcmds -8192(%di), %bx
+
+// CHECK: enqcmds 7408, %cx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x0e,0xf0,0x1c]
+enqcmds  7408, %cx
+
+// CHECK: enqcmd (%edi), %edi
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x3f]
+enqcmd  (%edi), %edi
+
+// CHECK: enqcmds (%edi), %edi
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x3f]
+enqcmds (%edi), %edi
diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s
index ca6de8fe7cfdd..dbff21471a213 100644
--- a/llvm/test/MC/X86/x86-32-coverage.s
+++ b/llvm/test/MC/X86/x86-32-coverage.s
@@ -10816,3 +10816,59 @@ aam $10
 // CHECK: aad # encoding: [0xd5,0x0a]
 // INTEL: aad{{$}}
 aad $10
+
+// CHECK: enqcmd 268435456(%ebp,%eax,8), %esi
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0xb4,0xc5,0x00,0x00,0x00,0x10]
+enqcmd  0x10000000(%ebp, %eax, 8), %esi
+
+// CHECK: enqcmd (%ecx), %edi
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x39]
+enqcmd  (%ecx), %edi
+
+// CHECK: enqcmd 8128(%ecx), %eax
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmd  8128(%ecx), %eax
+
+// CHECK: enqcmd -8192(%edx), %ebx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmd  -8192(%edx), %ebx
+
+// CHECK: enqcmd 485498096, %eax
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x05,0xf0,0x1c,0xf0,0x1c]
+enqcmd 485498096, %eax
+
+// CHECK: enqcmds 268435456(%ebp,%eax,8), %esi
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0xb4,0xc5,0x00,0x00,0x00,0x10]
+enqcmds 0x10000000(%ebp, %eax, 8), %esi
+
+// CHECK: enqcmds (%ecx), %edi
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x39]
+enqcmds (%ecx), %edi
+
+// CHECK: enqcmds 8128(%ecx), %eax
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmds 8128(%ecx), %eax
+
+// CHECK: enqcmds -8192(%edx), %ebx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmds -8192(%edx), %ebx
+
+// CHECK: enqcmds 485498096, %eax
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x05,0xf0,0x1c,0xf0,0x1c]
+enqcmds 485498096, %eax
+
+// CHECK: enqcmd (%bx,%di), %di
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x39]
+enqcmd  (%bx,%di), %di
+
+// CHECK: enqcmd 8128(%bx,%di), %ax
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f]
+enqcmd  8128(%bx,%di), %ax
+
+// CHECK: enqcmds (%bx,%di), %di
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x39]
+enqcmds (%bx,%di), %di
+
+// CHECK: enqcmds 8128(%bx,%di), %ax
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f]
+enqcmds 8128(%bx,%di), %ax
diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s
index 00f99667881d4..b2251ecb4473c 100644
--- a/llvm/test/MC/X86/x86-64.s
+++ b/llvm/test/MC/X86/x86-64.s
@@ -1797,3 +1797,83 @@ rep
 // CHECK: lock
 // This line has to be the last one in the file
 lock
+
+// CHECK: enqcmd 268435456(%ebp,%r14d,8), %esi
+// CHECK: encoding: [0x67,0xf2,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+enqcmd  0x10000000(%ebp, %r14d, 8), %esi
+
+// CHECK: enqcmd (%r9d), %edi
+// CHECK: encoding: [0x67,0xf2,0x41,0x0f,0x38,0xf8,0x39]
+enqcmd  (%r9d), %edi
+
+// CHECK: enqcmd 8128(%ecx), %eax
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmd  8128(%ecx), %eax
+
+// CHECK: enqcmd -8192(%edx), %ebx
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmd  -8192(%edx), %ebx
+
+// CHECK: enqcmd 485498096, %eax
+// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
+enqcmd 485498096, %eax
+
+// CHECK: enqcmds 268435456(%ebp,%r14d,8), %esi
+// CHECK: encoding: [0x67,0xf3,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+enqcmds 0x10000000(%ebp, %r14d, 8), %esi
+
+// CHECK: enqcmds (%r9d), %edi
+// CHECK: encoding: [0x67,0xf3,0x41,0x0f,0x38,0xf8,0x39]
+enqcmds (%r9d), %edi
+
+// CHECK: enqcmds 8128(%ecx), %eax
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmds 8128(%ecx), %eax
+
+// CHECK: enqcmds -8192(%edx), %ebx
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmds -8192(%edx), %ebx
+
+// CHECK: enqcmds 485498096, %eax
+// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
+enqcmds 485498096, %eax
+
+// CHECK: enqcmd 268435456(%rbp,%r14,8), %rsi
+// CHECK: encoding: [0xf2,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+enqcmd  0x10000000(%rbp, %r14, 8), %rsi
+
+// CHECK: enqcmd (%r9), %rdi
+// CHECK: encoding: [0xf2,0x41,0x0f,0x38,0xf8,0x39]
+enqcmd  (%r9), %rdi
+
+// CHECK: enqcmd 8128(%rcx), %rax
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmd  8128(%rcx), %rax
+
+// CHECK: enqcmd -8192(%rdx), %rbx
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmd  -8192(%rdx), %rbx
+
+// CHECK: enqcmd 485498096, %rax
+// CHECK: encoding: [0xf2,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
+enqcmd 485498096, %rax
+
+// CHECK: enqcmds 268435456(%rbp,%r14,8), %rsi
+// CHECK: encoding: [0xf3,0x42,0x0f,0x38,0xf8,0xb4,0xf5,0x00,0x00,0x00,0x10]
+enqcmds 0x10000000(%rbp, %r14, 8), %rsi
+
+// CHECK: enqcmds (%r9), %rdi
+// CHECK: encoding: [0xf3,0x41,0x0f,0x38,0xf8,0x39]
+enqcmds (%r9), %rdi
+
+// CHECK: enqcmds 8128(%rcx), %rax
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x81,0xc0,0x1f,0x00,0x00]
+enqcmds 8128(%rcx), %rax
+
+// CHECK: enqcmds -8192(%rdx), %rbx
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x9a,0x00,0xe0,0xff,0xff]
+enqcmds -8192(%rdx), %rbx
+
+// CHECK: enqcmds 485498096, %rax
+// CHECK: encoding: [0xf3,0x0f,0x38,0xf8,0x04,0x25,0xf0,0x1c,0xf0,0x1c]
+enqcmds 485498096, %rax

From 2c91c3b7af7cd4da64f1babde3798d65522a21e4 Mon Sep 17 00:00:00 2001
From: John McCall <rjmccall@apple.com>
Date: Thu, 30 May 2019 04:09:01 +0000
Subject: [PATCH 0576/1176] Add the `objc_class_stub` attribute.

Swift requires certain classes to be not just initialized lazily on first
use, but actually allocated lazily using information that is only available
at runtime.  This is incompatible with ObjC class initialization, or at least
not efficiently compatible, because there is no meaningful class symbol
that can be put in a class-ref variable at load time.  This leaves ObjC
code unable to access such classes, which is undesirable.

objc_class_stub says that class references should be resolved by calling
a new ObjC runtime function with a pointer to a new "class stub" structure.
Non-ObjC compilers (like Swift) can simply emit this structure when ObjC
interop is required for a class that cannot be statically allocated,
then apply this attribute to the `@interface` in the generated ObjC header
for the class.

This attribute can be thought of as a generalization of the existing
`objc_runtime_visible` attribute which permits more efficient class
resolution as well as supporting the additon of categories to the class.
Subclassing these classes from ObjC is currently not allowed.

Patch by Slava Pestov!

llvm-svn: 362054
---
 clang/include/clang/Basic/Attr.td             |  18 ++-
 clang/include/clang/Basic/AttrDocs.td         |  19 ++++
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/include/clang/Basic/ObjCRuntime.h       |  16 +++
 clang/lib/CodeGen/CGObjCMac.cpp               | 103 +++++++++++++++---
 clang/lib/Sema/SemaDeclAttr.cpp               |   3 +
 clang/lib/Sema/SemaDeclObjC.cpp               |   7 ++
 clang/test/CodeGenObjC/class-stubs.m          |  84 ++++++++++++++
 ...a-attribute-supported-attributes-list.test |   1 +
 .../SemaObjC/class-stub-attr-unsupported.m    |  10 ++
 clang/test/SemaObjC/class-stub-attr.m         |  27 +++++
 clang/utils/TableGen/ClangAttrEmitter.cpp     |  65 +++++------
 12 files changed, 312 insertions(+), 47 deletions(-)
 create mode 100644 clang/test/CodeGenObjC/class-stubs.m
 create mode 100644 clang/test/SemaObjC/class-stub-attr-unsupported.m
 create mode 100644 clang/test/SemaObjC/class-stub-attr.m

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 130899a2613a9..ad179009ea6a2 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -284,20 +284,25 @@ class SubjectList<list<AttrSubject> subjects, SubjectDiag diag = WarnDiag,
   string CustomDiag = customDiag;
 }
 
-class LangOpt<string name, bit negated = 0> {
+class LangOpt<string name, code customCode = [{}]> {
   string Name = name;
-  bit Negated = negated;
+
+  // A custom predicate, written as an expression evaluated in a context with
+  // "LangOpts" bound.
+  code CustomCode = customCode;
 }
 def MicrosoftExt : LangOpt<"MicrosoftExt">;
 def Borland : LangOpt<"Borland">;
 def CUDA : LangOpt<"CUDA">;
-def COnly : LangOpt<"CPlusPlus", 1>;
+def COnly : LangOpt<"COnly", "!LangOpts.CPlusPlus">;
 def CPlusPlus : LangOpt<"CPlusPlus">;
 def OpenCL : LangOpt<"OpenCL">;
 def RenderScript : LangOpt<"RenderScript">;
 def ObjC : LangOpt<"ObjC">;
 def BlocksSupported : LangOpt<"Blocks">;
 def ObjCAutoRefCount : LangOpt<"ObjCAutoRefCount">;
+def ObjCNonFragileRuntime : LangOpt<"ObjCNonFragileRuntime",
+                                    "LangOpts.ObjCRuntime.allowsClassStubs()">;
 
 // Language option for CMSE extensions
 def Cmse : LangOpt<"Cmse">;
@@ -1806,6 +1811,13 @@ def ObjCRuntimeVisible : Attr {
   let Documentation = [ObjCRuntimeVisibleDocs];
 }
 
+def ObjCClassStub : Attr {
+  let Spellings = [Clang<"objc_class_stub">];
+  let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
+  let Documentation = [ObjCClassStubDocs];
+  let LangOpts = [ObjCNonFragileRuntime];
+}
+
 def ObjCBoxable : Attr {
   let Spellings = [Clang<"objc_boxable">];
   let Subjects = SubjectList<[Record], ErrorDiag>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 4d93e56a3caf5..f7dd2e72ac839 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -1085,6 +1085,25 @@ them.
     }];
 }
 
+def ObjCClassStubDocs : Documentation {
+    let Category = DocCatType;
+    let Content = [{
+This attribute specifies that the Objective-C class to which it applies is
+instantiated at runtime.
+
+Unlike ``__attribute__((objc_runtime_visible))``, a class having this attribute
+still has a "class stub" that is visible to the linker. This allows categories
+to be defined. Static message sends with the class as a receiver use a special
+access pattern to ensure the class is lazily instantiated from the class stub.
+
+Classes annotated with this attribute cannot be subclassed and cannot have
+implementations defined for them. This attribute is intended for use in
+Swift-generated headers for classes defined in Swift.
+
+Adding or removing this attribute to a class is an ABI-breaking change.
+    }];
+}
+
 def ObjCBoxableDocs : Documentation {
     let Category = DocCatDecl;
     let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e750b062f80b2..c5a7b93cec28a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -904,6 +904,12 @@ def err_objc_root_class_subclass : Error<
 def err_restricted_superclass_mismatch : Error<
   "cannot subclass a class that was declared with the "
   "'objc_subclassing_restricted' attribute">;
+def err_class_stub_subclassing_mismatch : Error<
+  "'objc_class_stub' attribute cannot be specified on a class that does not "
+  "have the 'objc_subclassing_restricted' attribute">;
+def err_implementation_of_class_stub : Error<
+  "cannot declare implementation of a class declared with the "
+  "'objc_class_stub' attribute">;
 def warn_objc_root_class_missing : Warning<
   "class %0 defined without specifying a base class">,
   InGroup<ObjCRootClass>;
diff --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h
index fc87f20d562d4..2caebd58832aa 100644
--- a/clang/include/clang/Basic/ObjCRuntime.h
+++ b/clang/include/clang/Basic/ObjCRuntime.h
@@ -429,6 +429,22 @@ class ObjCRuntime {
     }
   }
 
+  /// Returns true if this Objective-C runtime supports Objective-C class
+  /// stubs.
+  bool allowsClassStubs() const {
+    switch (getKind()) {
+    case FragileMacOSX:
+    case GCC:
+    case GNUstep:
+    case ObjFW:
+      return false;
+    case MacOSX:
+    case iOS:
+    case WatchOS:
+      return true;
+    }
+  }
+
   /// Try to parse an Objective-C runtime specification from the given
   /// string.
   ///
diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index 79b00f2141160..4d4e54d214df6 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -721,6 +721,33 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper {
                                      "objc_begin_catch");
   }
 
+  /// Class objc_loadClassref (void *)
+  ///
+  /// Loads from a classref. For Objective-C stub classes, this invokes the
+  /// initialization callback stored inside the stub. For all other classes
+  /// this simply dereferences the pointer.
+  llvm::FunctionCallee getLoadClassrefFn() const {
+    // Add the non-lazy-bind attribute, since objc_loadClassref is likely to
+    // be called a lot.
+    //
+    // Also it is safe to make it readnone, since we never load or store the
+    // classref except by calling this function.
+    llvm::Type *params[] = { Int8PtrPtrTy };
+    llvm::FunctionCallee F = CGM.CreateRuntimeFunction(
+        llvm::FunctionType::get(ClassnfABIPtrTy, params, false),
+        "objc_loadClassref",
+        llvm::AttributeList::get(CGM.getLLVMContext(),
+                                 llvm::AttributeList::FunctionIndex,
+                                 {llvm::Attribute::NonLazyBind,
+                                  llvm::Attribute::ReadNone,
+                                  llvm::Attribute::NoUnwind}));
+    if (!CGM.getTriple().isOSBinFormatCOFF())
+      cast<llvm::Function>(F.getCallee())->setLinkage(
+        llvm::Function::ExternalWeakLinkage);
+
+    return F;
+  }
+
   llvm::StructType *EHTypeTy;
   llvm::Type *EHTypePtrTy;
 
@@ -877,6 +904,9 @@ class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
   /// DefinedCategories - List of defined categories.
   SmallVector<llvm::GlobalValue*, 16> DefinedCategories;
 
+  /// DefinedStubCategories - List of defined categories on class stubs.
+  SmallVector<llvm::GlobalValue*, 16> DefinedStubCategories;
+
   /// DefinedNonLazyCategories - List of defined "non-lazy" categories.
   SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyCategories;
 
@@ -1464,6 +1494,12 @@ class CGObjCNonFragileABIMac : public CGObjCCommonMac {
                                  bool isMetaclass,
                                  ForDefinition_t isForDefinition);
 
+  llvm::Constant *GetClassGlobalForClassRef(const ObjCInterfaceDecl *ID);
+
+  llvm::Value *EmitLoadOfClassRef(CodeGenFunction &CGF,
+                                  const ObjCInterfaceDecl *ID,
+                                  llvm::GlobalVariable *Entry);
+
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class reference.
   llvm::Value *EmitClassRef(CodeGenFunction &CGF,
@@ -1933,7 +1969,7 @@ llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() {
   std::string str =
     StringClass.empty() ? "OBJC_CLASS_$_NSConstantString"
                         : "OBJC_CLASS_$_" + StringClass;
-  auto GV = GetClassGlobal(str, NotForDefinition);
+  llvm::Constant *GV = GetClassGlobal(str, NotForDefinition);
 
   // Make sure the result is of the correct type.
   auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo());
@@ -6069,6 +6105,9 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() {
   AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$",
                      GetSectionName("__objc_catlist",
                                     "regular,no_dead_strip"));
+  AddModuleClassList(DefinedStubCategories, "OBJC_LABEL_STUB_CATEGORY_$",
+                     GetSectionName("__objc_catlist2",
+                                    "regular,no_dead_strip"));
   AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$",
                      GetSectionName("__objc_nlcatlist",
                                     "regular,no_dead_strip"));
@@ -6560,7 +6599,10 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   llvm::GlobalVariable *GCATV =
       finishAndCreateGlobal(values, ExtCatName.str(), CGM);
   CGM.addCompilerUsedGlobal(GCATV);
-  DefinedCategories.push_back(GCATV);
+  if (Interface->hasAttr<ObjCClassStubAttr>())
+    DefinedStubCategories.push_back(GCATV);
+  else
+    DefinedCategories.push_back(GCATV);
 
   // Determine if this category is also "non-lazy".
   if (ImplementationIsNonLazy(OCD))
@@ -7236,33 +7278,68 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name,
   return GV;
 }
 
+llvm::Constant *
+CGObjCNonFragileABIMac::GetClassGlobalForClassRef(const ObjCInterfaceDecl *ID) {
+  llvm::Constant *ClassGV = GetClassGlobal(ID, /*metaclass*/ false,
+                                           NotForDefinition);
+
+  if (!ID->hasAttr<ObjCClassStubAttr>())
+    return ClassGV;
+
+  ClassGV = llvm::ConstantExpr::getPointerCast(ClassGV, ObjCTypes.Int8PtrTy);
+
+  // Stub classes are pointer-aligned. Classrefs pointing at stub classes
+  // must set the least significant bit set to 1.
+  auto *Idx = llvm::ConstantInt::get(CGM.Int32Ty, 1);
+  return llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, ClassGV, Idx);
+}
+
+llvm::Value *
+CGObjCNonFragileABIMac::EmitLoadOfClassRef(CodeGenFunction &CGF,
+                                           const ObjCInterfaceDecl *ID,
+                                           llvm::GlobalVariable *Entry) {
+  if (ID && ID->hasAttr<ObjCClassStubAttr>()) {
+    // Classrefs pointing at Objective-C stub classes must be loaded by calling
+    // a special runtime function.
+    return CGF.EmitRuntimeCall(
+      ObjCTypes.getLoadClassrefFn(), Entry, "load_classref_result");
+  }
+
+  CharUnits Align = CGF.getPointerAlign();
+  return CGF.Builder.CreateAlignedLoad(Entry, Align);
+}
+
 llvm::Value *
 CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
                                            IdentifierInfo *II,
                                            const ObjCInterfaceDecl *ID) {
-  CharUnits Align = CGF.getPointerAlign();
   llvm::GlobalVariable *&Entry = ClassReferences[II];
 
   if (!Entry) {
     llvm::Constant *ClassGV;
     if (ID) {
-      ClassGV = GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition);
+      ClassGV = GetClassGlobalForClassRef(ID);
     } else {
       ClassGV = GetClassGlobal((getClassSymbolPrefix() + II->getName()).str(),
                                NotForDefinition);
+      assert(ClassGV->getType() == ObjCTypes.ClassnfABIPtrTy &&
+             "classref was emitted with the wrong type?");
     }
 
     std::string SectionName =
         GetSectionName("__objc_classrefs", "regular,no_dead_strip");
     Entry = new llvm::GlobalVariable(
-        CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
+        CGM.getModule(), ClassGV->getType(), false,
         getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV,
         "OBJC_CLASSLIST_REFERENCES_$_");
-    Entry->setAlignment(Align.getQuantity());
-    Entry->setSection(SectionName);
+    Entry->setAlignment(CGF.getPointerAlign().getQuantity());
+    if (!ID || !ID->hasAttr<ObjCClassStubAttr>())
+      Entry->setSection(SectionName);
+
     CGM.addCompilerUsedGlobal(Entry);
   }
-  return CGF.Builder.CreateAlignedLoad(Entry, Align);
+
+  return EmitLoadOfClassRef(CGF, ID, Entry);
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
@@ -7284,22 +7361,22 @@ llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef(
 llvm::Value *
 CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
                                           const ObjCInterfaceDecl *ID) {
-  CharUnits Align = CGF.getPointerAlign();
   llvm::GlobalVariable *&Entry = SuperClassReferences[ID->getIdentifier()];
 
   if (!Entry) {
-    auto ClassGV = GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition);
+    llvm::Constant *ClassGV = GetClassGlobalForClassRef(ID);
     std::string SectionName =
         GetSectionName("__objc_superrefs", "regular,no_dead_strip");
     Entry = new llvm::GlobalVariable(
-        CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false,
+        CGM.getModule(), ClassGV->getType(), false,
         getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV,
         "OBJC_CLASSLIST_SUP_REFS_$_");
-    Entry->setAlignment(Align.getQuantity());
+    Entry->setAlignment(CGF.getPointerAlign().getQuantity());
     Entry->setSection(SectionName);
     CGM.addCompilerUsedGlobal(Entry);
   }
-  return CGF.Builder.CreateAlignedLoad(Entry, Align);
+
+  return EmitLoadOfClassRef(CGF, ID, Entry);
 }
 
 /// EmitMetaClassRef - Return a Value * of the address of _class_t
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 03b38bf2158dd..84f00dbaa2eae 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6993,6 +6993,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case ParsedAttr::AT_ObjCSubclassingRestricted:
     handleSimpleAttribute<ObjCSubclassingRestrictedAttr>(S, D, AL);
     break;
+  case ParsedAttr::AT_ObjCClassStub:
+    handleSimpleAttribute<ObjCClassStubAttr>(S, D, AL);
+    break;
   case ParsedAttr::AT_ObjCExplicitProtocolImpl:
     handleObjCSuppresProtocolAttr(S, D, AL);
     break;
diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index 5ff1f9e3408cd..21d9b8c32266d 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -4061,6 +4061,9 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
         }
       }
 
+      if (IDecl->hasAttr<ObjCClassStubAttr>())
+        Diag(IC->getLocation(), diag::err_implementation_of_class_stub);
+
       if (LangOpts.ObjCRuntime.isNonFragile()) {
         while (IDecl->getSuperClass()) {
           DiagnoseDuplicateIvars(IDecl, IDecl->getSuperClass());
@@ -4089,6 +4092,10 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
         Diag(Super->getLocation(), diag::note_class_declared);
       }
     }
+
+    if (IntfDecl->hasAttr<ObjCClassStubAttr>() &&
+        !IntfDecl->hasAttr<ObjCSubclassingRestrictedAttr>())
+      Diag(IntfDecl->getLocation(), diag::err_class_stub_subclassing_mismatch);
   }
   DiagnoseVariableSizedIvars(*this, OCD);
   if (isInterfaceDeclKind) {
diff --git a/clang/test/CodeGenObjC/class-stubs.m b/clang/test/CodeGenObjC/class-stubs.m
new file mode 100644
index 0000000000000..fadb4433b8054
--- /dev/null
+++ b/clang/test/CodeGenObjC/class-stubs.m
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -Wno-objc-root-class -emit-llvm -o - %s | FileCheck %s
+
+// -- classref for the message send in main()
+//
+// The class is declared with objc_class_stub, so LSB of the class pointer
+// must be set to 1.
+//
+// CHECK-LABEL: @"OBJC_CLASSLIST_REFERENCES_$_" = internal global i8* getelementptr (i8, i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_Base" to i8*), i32 1), align 8
+
+// -- classref for the super message send in anotherClassMethod()
+//
+// Metaclasses do not use the "stub" mechanism and are referenced statically.
+//
+// CHECK-LABEL: @"OBJC_CLASSLIST_SUP_REFS_$_" = internal global %struct._class_t* @"OBJC_METACLASS_$_Derived", section "__DATA,__objc_superrefs,regular,no_dead_strip", align 8
+
+// -- classref for the super message send in anotherInstanceMethod()
+//
+// The class is declared with objc_class_stub, so LSB of the class pointer
+// must be set to 1.
+//
+// CHECK-LABEL: @"OBJC_CLASSLIST_SUP_REFS_$_.1" = internal global i8* getelementptr (i8, i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_Derived" to i8*), i32 1), section "__DATA,__objc_superrefs,regular,no_dead_strip", align 8
+
+// -- category list for class stubs goes in __objc_catlist2.
+//
+// CHECK-LABEL: @"OBJC_LABEL_STUB_CATEGORY_$" = internal global [1 x i8*] [i8* bitcast (%struct._category_t* @"_OBJC_$_CATEGORY_Derived_$_MyCategory" to i8*)], section "__DATA,__objc_catlist2,regular,no_dead_strip", align 8
+
+__attribute__((objc_class_stub))
+__attribute__((objc_subclassing_restricted))
+@interface Base
++ (void) classMethod;
+- (void) instanceMethod;
+@end
+
+__attribute__((objc_class_stub))
+__attribute__((objc_subclassing_restricted))
+@interface Derived : Base
+@end
+
+int main() {
+  [Base classMethod];
+}
+// CHECK-LABEL: define i32 @main()
+// CHECK-NEXT: entry:
+// CHECK-NEXT:   [[CLASS:%.*]] = call %struct._class_t* @objc_loadClassref(i8** @"OBJC_CLASSLIST_REFERENCES_$_")
+// CHECK-NEXT:   [[SELECTOR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-NEXT:   [[RECEIVER:%.*]] = bitcast %struct._class_t* [[CLASS]] to i8*
+// CHECK-NEXT:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* [[RECEIVER]], i8* [[SELECTOR]])
+// CHECK-NEXT:   ret i32 0
+
+// CHECK-LABEL: declare extern_weak %struct._class_t* @objc_loadClassref(i8**)
+// CHECK-SAME: [[ATTRLIST:#.*]]
+
+@implementation Derived (MyCategory)
+
++ (void) anotherClassMethod {
+  [super classMethod];
+}
+// CHECK-LABEL: define internal void @"\01+[Derived(MyCategory) anotherClassMethod]"(i8* %self, i8* %_cmd) #0 {
+// CHECK-NEXT: entry:
+// CHECK:        [[SUPER:%.*]] = alloca %struct._objc_super, align 8
+// CHECK:        [[METACLASS_REF:%.*]] = load %struct._class_t*, %struct._class_t** @"OBJC_CLASSLIST_SUP_REFS_$_", align 8
+// CHECK:        [[CAST_METACLASS_REF:%.*]] = bitcast %struct._class_t* [[METACLASS_REF]] to i8*
+// CHECK:        [[DEST:%.*]] = getelementptr inbounds %struct._objc_super, %struct._objc_super* [[SUPER]], i32 0, i32 1
+// CHECK:        store i8* [[CAST_METACLASS_REF]], i8** [[DEST]], align 8
+// CHECK:        call void bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to void (%struct._objc_super*, i8*)*)(%struct._objc_super* [[SUPER]], i8* {{%.*}})
+// CHECK:        ret void
+
+- (void) anotherInstanceMethod {
+  [super instanceMethod];
+}
+// CHECK-LABEL: define internal void @"\01-[Derived(MyCategory) anotherInstanceMethod]"(%0* %self, i8* %_cmd) #0 {
+// CHECK-NEXT: entry:
+// CHECK:        [[SUPER:%.*]] = alloca %struct._objc_super, align 8
+// CHECK:        [[CLASS_REF:%.*]] = call %struct._class_t* @objc_loadClassref(i8** @"OBJC_CLASSLIST_SUP_REFS_$_.1")
+// CHECK:        [[CAST_CLASS_REF:%.*]] = bitcast %struct._class_t* [[CLASS_REF]] to i8*
+// CHECK:        [[DEST:%.*]] = getelementptr inbounds %struct._objc_super, %struct._objc_super* [[SUPER]], i32 0, i32 1
+// CHECK:        store i8* [[CAST_CLASS_REF]], i8** [[DEST]], align 8
+// CHECK:        call void bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to void (%struct._objc_super*, i8*)*)(%struct._objc_super* [[SUPER]], i8* {{%.*}})
+// CHECK:        ret void
+
+@end
+
+// -- calls to objc_loadClassRef() are readnone
+// CHECK: attributes [[ATTRLIST]] = { nounwind nonlazybind readnone }
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index f138deac57e4f..f85c89ae015dc 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -97,6 +97,7 @@
 // CHECK-NEXT: ObjCBridge (SubjectMatchRule_record, SubjectMatchRule_type_alias)
 // CHECK-NEXT: ObjCBridgeMutable (SubjectMatchRule_record)
 // CHECK-NEXT: ObjCBridgeRelated (SubjectMatchRule_record)
+// CHECK-NEXT: ObjCClassStub (SubjectMatchRule_objc_interface)
 // CHECK-NEXT: ObjCDesignatedInitializer (SubjectMatchRule_objc_method)
 // CHECK-NEXT: ObjCException (SubjectMatchRule_objc_interface)
 // CHECK-NEXT: ObjCExplicitProtocolImpl (SubjectMatchRule_objc_protocol)
diff --git a/clang/test/SemaObjC/class-stub-attr-unsupported.m b/clang/test/SemaObjC/class-stub-attr-unsupported.m
new file mode 100644
index 0000000000000..cc5243fac6eab
--- /dev/null
+++ b/clang/test/SemaObjC/class-stub-attr-unsupported.m
@@ -0,0 +1,10 @@
+// RUN: %clang -target i386-apple-darwin -fsyntax-only -Xclang -verify %s
+// RUN: %clang -target i386-apple-darwin -x objective-c++ -fsyntax-only -Xclang -verify %s
+
+@interface NSObject
+@end
+
+__attribute__((objc_class_stub)) // expected-warning {{'objc_class_stub' attribute ignored}}
+__attribute__((objc_subclassing_restricted))
+@interface StubClass : NSObject
+@end
diff --git a/clang/test/SemaObjC/class-stub-attr.m b/clang/test/SemaObjC/class-stub-attr.m
new file mode 100644
index 0000000000000..46c07d8b5f789
--- /dev/null
+++ b/clang/test/SemaObjC/class-stub-attr.m
@@ -0,0 +1,27 @@
+// RUN: %clang -target x86_64-apple-darwin -fsyntax-only -Xclang -verify %s
+// RUN: %clang -target x86_64-apple-darwin -x objective-c++ -fsyntax-only -Xclang -verify %s
+
+@interface NSObject
+@end
+
+__attribute__((objc_class_stub))
+@interface MissingSubclassingRestrictedAttribute : NSObject // expected-error {{'objc_class_stub' attribute cannot be specified on a class that does not have the 'objc_subclassing_restricted' attribute}}
+@end
+
+__attribute__((objc_class_stub))
+__attribute__((objc_subclassing_restricted))
+@interface ValidClassStubAttribute : NSObject
+@end
+
+@implementation ValidClassStubAttribute // expected-error {{cannot declare implementation of a class declared with the 'objc_class_stub' attribute}}
+@end
+
+@implementation ValidClassStubAttribute (MyCategory)
+@end
+
+__attribute__((objc_class_stub(123))) // expected-error {{'objc_class_stub' attribute takes no arguments}}
+@interface InvalidClassStubAttribute : NSObject
+@end
+
+__attribute__((objc_class_stub)) // expected-error {{'objc_class_stub' attribute only applies to Objective-C interfaces}}
+int cannotHaveObjCClassStubAttribute() {}
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 077bfe48ab385..a32d80557e683 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -1922,6 +1922,30 @@ bool PragmaClangAttributeSupport::isAttributedSupported(
   return true;
 }
 
+static std::string GenerateTestExpression(ArrayRef<Record *> LangOpts) {
+  std::string Test;
+
+  for (auto *E : LangOpts) {
+    if (!Test.empty())
+      Test += " || ";
+
+    const StringRef Code = E->getValueAsString("CustomCode");
+    if (!Code.empty()) {
+      Test += "(";
+      Test += Code;
+      Test += ")";
+    } else {
+      Test += "LangOpts.";
+      Test += E->getValueAsString("Name");
+    }
+  }
+
+  if (Test.empty())
+    return "true";
+
+  return Test;
+}
+
 std::string
 PragmaClangAttributeSupport::generateStrictConformsTo(const Record &Attr,
                                                       raw_ostream &OS) {
@@ -1948,19 +1972,8 @@ PragmaClangAttributeSupport::generateStrictConformsTo(const Record &Attr,
       // rules if the specific language options are specified.
       std::vector<Record *> LangOpts = Rule.getLangOpts();
       OS << "  MatchRules.push_back(std::make_pair(" << Rule.getEnumValue()
-         << ", /*IsSupported=*/";
-      if (!LangOpts.empty()) {
-        for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I) {
-          const StringRef Part = (*I)->getValueAsString("Name");
-          if ((*I)->getValueAsBit("Negated"))
-            OS << "!";
-          OS << "LangOpts." << Part;
-          if (I + 1 != E)
-            OS << " || ";
-        }
-      } else
-        OS << "true";
-      OS << "));\n";
+         << ", /*IsSupported=*/" << GenerateTestExpression(LangOpts)
+         << "));\n";
     }
   }
   OS << "}\n\n";
@@ -3431,23 +3444,12 @@ static std::string GenerateLangOptRequirements(const Record &R,
   if (LangOpts.empty())
     return "defaultDiagnoseLangOpts";
 
-  // Generate the test condition, as well as a unique function name for the
-  // diagnostic test. The list of options should usually be short (one or two
-  // options), and the uniqueness isn't strictly necessary (it is just for
-  // codegen efficiency).
-  std::string FnName = "check", Test;
-  for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I) {
-    const StringRef Part = (*I)->getValueAsString("Name");
-    if ((*I)->getValueAsBit("Negated")) {
-      FnName += "Not";
-      Test += "!";
-    }
-    Test += "S.LangOpts.";
-    Test +=  Part;
-    if (I + 1 != E)
-      Test += " || ";
-    FnName += Part;
-  }
+  // Generate a unique function name for the diagnostic test. The list of
+  // options should usually be short (one or two options), and the
+  // uniqueness isn't strictly necessary (it is just for codegen efficiency).
+  std::string FnName = "check";
+  for (auto I = LangOpts.begin(), E = LangOpts.end(); I != E; ++I)
+    FnName += (*I)->getValueAsString("Name");
   FnName += "LangOpts";
 
   // If this code has already been generated, simply return the previous
@@ -3458,7 +3460,8 @@ static std::string GenerateLangOptRequirements(const Record &R,
     return *I;
 
   OS << "static bool " << FnName << "(Sema &S, const ParsedAttr &Attr) {\n";
-  OS << "  if (" << Test << ")\n";
+  OS << "  auto &LangOpts = S.LangOpts;\n";
+  OS << "  if (" << GenerateTestExpression(LangOpts) << ")\n";
   OS << "    return true;\n\n";
   OS << "  S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) ";
   OS << "<< Attr.getName();\n";

From 789b7f0828b08f5c4bf9ff1ff7ef733c73ecdc0a Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 04:40:21 +0000
Subject: [PATCH 0577/1176] [runtimes] Check if pragma comment(lib, ...) is
 supported first

This fixes the issue introduced by r362048 where we always use
pragma comment(lib, ...) for dependent libraries when the compiler
is Clang, but older Clang versions don't support this pragma so
we need to check first if it's supported before using it.

llvm-svn: 362055
---
 libcxx/CMakeLists.txt                       |  4 ++++
 libcxx/cmake/config-ix.cmake                | 10 +++++++++-
 libcxx/src/algorithm.cpp                    |  2 +-
 libcxx/src/chrono.cpp                       |  2 +-
 libcxx/src/debug.cpp                        |  2 +-
 libcxx/src/experimental/memory_resource.cpp |  2 +-
 libcxx/src/filesystem/operations.cpp        |  2 +-
 libcxx/src/mutex.cpp                        |  2 +-
 libcxx/src/shared_mutex.cpp                 |  2 +-
 libcxx/src/thread.cpp                       |  2 +-
 libcxxabi/CMakeLists.txt                    |  4 ++++
 libcxxabi/cmake/config-ix.cmake             |  9 +++++++++
 libcxxabi/src/cxa_exception_storage.cpp     |  4 ++++
 libcxxabi/src/cxa_guard_impl.h              |  2 +-
 libcxxabi/src/cxa_thread_atexit.cpp         |  2 +-
 libcxxabi/src/fallback_malloc.cpp           |  2 +-
 libunwind/CMakeLists.txt                    |  4 ++++
 libunwind/cmake/config-ix.cmake             | 11 +++++++++--
 libunwind/src/AddressSpace.hpp              |  2 +-
 libunwind/src/RWMutex.hpp                   |  2 +-
 20 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 1096898d055ec..50ccbf6c42612 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -552,6 +552,10 @@ if (NOT LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS)
   add_definitions(-D_LIBCPP_DISABLE_NEW_DELETE_DEFINITIONS)
 endif()
 
+if (LIBCXX_HAS_COMMENT_LIB_PRAGMA)
+  add_definitions(-D_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 # Warning flags ===============================================================
 add_definitions(-D_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 add_compile_flags_if_supported(
diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index 23ae71025e546..b2d275bbb25a3 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -1,6 +1,7 @@
 include(CheckLibraryExists)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
+include(CheckCSourceCompiles)
 
 if(WIN32 AND NOT MINGW)
   # NOTE(compnerd) this is technically a lie, there is msvcrt, but for now, lets
@@ -59,6 +60,14 @@ if (LIBCXX_SUPPORTS_NODEFAULTLIBS_FLAG)
   endif ()
 endif ()
 
+# Check compiler pragmas
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  check_c_source_compiles("
+#pragma comment(lib, \"c\")
+int main() { return 0; }
+" LIBCXX_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 if(NOT WIN32 OR MINGW)
   include(CheckLibcxxAtomic)
 endif()
@@ -72,7 +81,6 @@ check_cxx_compiler_flag(/EHs-                   LIBCXX_HAS_NO_EHS_FLAG)
 check_cxx_compiler_flag(/EHa-                   LIBCXX_HAS_NO_EHA_FLAG)
 check_cxx_compiler_flag(/GR-                    LIBCXX_HAS_NO_GR_FLAG)
 
-
 # Check libraries
 if(WIN32 AND NOT MINGW)
   # TODO(compnerd) do we want to support an emulation layer that allows for the
diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp
index 5ce2a23b464fa..a110ae8b1a6ee 100644
--- a/libcxx/src/algorithm.cpp
+++ b/libcxx/src/algorithm.cpp
@@ -10,7 +10,7 @@
 #include "random"
 #ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp
index a2f88c94c0779..8f533f1059ed8 100644
--- a/libcxx/src/chrono.cpp
+++ b/libcxx/src/chrono.cpp
@@ -37,7 +37,7 @@
 #endif
 #endif
 
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "rt")
 #endif
 
diff --git a/libcxx/src/debug.cpp b/libcxx/src/debug.cpp
index 950241310112b..c4cc281d586b8 100644
--- a/libcxx/src/debug.cpp
+++ b/libcxx/src/debug.cpp
@@ -15,7 +15,7 @@
 #include "__hash_table"
 #ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxx/src/experimental/memory_resource.cpp b/libcxx/src/experimental/memory_resource.cpp
index 84c95080496fd..9aa077942b0d2 100644
--- a/libcxx/src/experimental/memory_resource.cpp
+++ b/libcxx/src/experimental/memory_resource.cpp
@@ -12,7 +12,7 @@
 #include "atomic"
 #elif !defined(_LIBCPP_HAS_NO_THREADS)
 #include "mutex"
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index 319d9f65d7381..69350ddfe9dae 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -44,7 +44,7 @@
 #include <sys/time.h> // for gettimeofday and timeval
 #endif                // !defined(CLOCK_REALTIME)
 
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "rt")
 #endif
 
diff --git a/libcxx/src/mutex.cpp b/libcxx/src/mutex.cpp
index d100f2df23389..33a8197dadf84 100644
--- a/libcxx/src/mutex.cpp
+++ b/libcxx/src/mutex.cpp
@@ -13,7 +13,7 @@
 #include "__undef_macros"
 
 #ifndef _LIBCPP_HAS_NO_THREADS
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp
index 3f1aecfdfe192..eb3f5f3506f5e 100644
--- a/libcxx/src/shared_mutex.cpp
+++ b/libcxx/src/shared_mutex.cpp
@@ -10,7 +10,7 @@
 #ifndef _LIBCPP_HAS_NO_THREADS
 
 #include "shared_mutex"
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 
diff --git a/libcxx/src/thread.cpp b/libcxx/src/thread.cpp
index 92690f6679829..39bb9e9bac63e 100644
--- a/libcxx/src/thread.cpp
+++ b/libcxx/src/thread.cpp
@@ -35,7 +35,7 @@
 #include <windows.h>
 #endif
 
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 75c83cc79cf5a..e1977278fc8be 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -441,6 +441,10 @@ if (LIBCXXABI_BAREMETAL)
     add_definitions(-DLIBCXXABI_BAREMETAL)
 endif()
 
+if (LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
+  add_definitions(-D_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 string(REPLACE ";" " " LIBCXXABI_CXX_FLAGS "${LIBCXXABI_CXX_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LIBCXXABI_CXX_FLAGS}")
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${LIBCXXABI_C_FLAGS}")
diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake
index 379b554774975..30d2ae237d1c2 100644
--- a/libcxxabi/cmake/config-ix.cmake
+++ b/libcxxabi/cmake/config-ix.cmake
@@ -1,6 +1,7 @@
 include(CheckLibraryExists)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
+include(CheckCSourceCompiles)
 
 check_library_exists(c fopen "" LIBCXXABI_HAS_C_LIB)
 if (NOT LIBCXXABI_USE_COMPILER_RT)
@@ -48,6 +49,14 @@ if (LIBCXXABI_HAS_NODEFAULTLIBS_FLAG)
   endif ()
 endif ()
 
+# Check compiler pragmas
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  check_c_source_compiles("
+#pragma comment(lib, \"c\")
+int main() { return 0; }
+" LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 # Check compiler flags
 check_c_compiler_flag(-funwind-tables         LIBCXXABI_HAS_FUNWIND_TABLES)
 check_cxx_compiler_flag(-fno-exceptions       LIBCXXABI_HAS_NO_EXCEPTIONS_FLAG)
diff --git a/libcxxabi/src/cxa_exception_storage.cpp b/libcxxabi/src/cxa_exception_storage.cpp
index 93506ac5c589c..81ba5f0207ad1 100644
--- a/libcxxabi/src/cxa_exception_storage.cpp
+++ b/libcxxabi/src/cxa_exception_storage.cpp
@@ -46,6 +46,10 @@ extern "C" {
 #include "abort_message.h"
 #include "fallback_malloc.h"
 
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
+#pragma comment(lib, "pthread")
+#endif
+
 //  In general, we treat all threading errors as fatal.
 //  We cannot call std::terminate() because that will in turn
 //  call __cxa_get_globals() and cause infinite recursion.
diff --git a/libcxxabi/src/cxa_guard_impl.h b/libcxxabi/src/cxa_guard_impl.h
index bd6b15fce6a83..935ba80d85c7d 100644
--- a/libcxxabi/src/cxa_guard_impl.h
+++ b/libcxxabi/src/cxa_guard_impl.h
@@ -50,7 +50,7 @@
 #include <stdlib.h>
 #include <__threading_support>
 #ifndef _LIBCXXABI_HAS_NO_THREADS
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxxabi/src/cxa_thread_atexit.cpp b/libcxxabi/src/cxa_thread_atexit.cpp
index 38787f18fe30f..3b60c2919f445 100644
--- a/libcxxabi/src/cxa_thread_atexit.cpp
+++ b/libcxxabi/src/cxa_thread_atexit.cpp
@@ -10,7 +10,7 @@
 #include "cxxabi.h"
 #include <__threading_support>
 #ifndef _LIBCXXABI_HAS_NO_THREADS
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libcxxabi/src/fallback_malloc.cpp b/libcxxabi/src/fallback_malloc.cpp
index bae0fa4ac2323..73ea28ed82449 100644
--- a/libcxxabi/src/fallback_malloc.cpp
+++ b/libcxxabi/src/fallback_malloc.cpp
@@ -13,7 +13,7 @@
 
 #include <__threading_support>
 #ifndef _LIBCXXABI_HAS_NO_THREADS
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 65cd5b2c6720d..16bfb9a82020c 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -362,6 +362,10 @@ if (WIN32 AND LIBUNWIND_ENABLE_STATIC AND NOT LIBUNWIND_ENABLE_SHARED)
   add_definitions(-D_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS)
 endif()
 
+if (LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+  add_definitions(-D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 #===============================================================================
 # Setup Source Code
 #===============================================================================
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index 670c31f2d092d..2c27ecf5207b8 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -1,7 +1,7 @@
-
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
+include(CheckCSourceCompiles)
 
 check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
 
@@ -55,6 +55,14 @@ if (LIBUNWIND_HAS_NODEFAULTLIBS_FLAG)
   endif ()
 endif ()
 
+# Check compiler pragmas
+if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  check_c_source_compiles("
+#pragma comment(lib, \"c\")
+int main() { return 0; }
+" LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+endif()
+
 # Check compiler flags
 check_c_compiler_flag(-funwind-tables         LIBUNWIND_HAS_FUNWIND_TABLES)
 check_cxx_compiler_flag(-fno-exceptions       LIBUNWIND_HAS_NO_EXCEPTIONS_FLAG)
@@ -96,4 +104,3 @@ endif()
 
 check_library_exists(dl dladdr "" LIBUNWIND_HAS_DL_LIB)
 check_library_exists(pthread pthread_once "" LIBUNWIND_HAS_PTHREAD_LIB)
-
diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index 6643953030975..fb07c807db9e9 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -27,7 +27,7 @@
 
 #if _LIBUNWIND_USE_DLADDR
 #include <dlfcn.h>
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "dl")
 #endif
 #endif
diff --git a/libunwind/src/RWMutex.hpp b/libunwind/src/RWMutex.hpp
index 4f234a77edf14..a37ac77144f38 100644
--- a/libunwind/src/RWMutex.hpp
+++ b/libunwind/src/RWMutex.hpp
@@ -17,7 +17,7 @@
 #include <windows.h>
 #elif !defined(_LIBUNWIND_HAS_NO_THREADS)
 #include <pthread.h>
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif

From e4cfa89915b7e16a095272f6603a3df83b982d38 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 30 May 2019 05:03:12 +0000
Subject: [PATCH 0578/1176] [LV] Inform about exactly reason of loop illegality

Currently, only the following information is provided by LoopVectorizer
in the case when the CF of the loop is not legal for vectorization:

 LV: Can't vectorize the instructions or CFG
    LV: Not vectorizing: Cannot prove legality.

But this information is not enough for the root cause analysis; what is
exactly wrong with the loop should also be printed:

 LV: Not vectorizing: The exiting block is not the loop latch.

Patch by Pavel Samolysov.

Reviewers: mkuper, hsaito, rengolin, fhahn

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D62311

llvm-svn: 362056
---
 .../Vectorize/LoopVectorizationLegality.cpp   | 12 ++-
 .../LoopVectorize/legal_preheader_check.ll    | 27 ------
 .../LoopVectorize/loop-legality-checks.ll     | 82 +++++++++++++++++++
 3 files changed, 92 insertions(+), 29 deletions(-)
 delete mode 100644 llvm/test/Transforms/LoopVectorize/legal_preheader_check.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7361a3a3cdc46..09b5a51add719 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -599,9 +599,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // Check that this PHI type is allowed.
         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
+          LLVM_DEBUG(dbgs()
+                 << "LV: Not vectorizing: Found a non-int non-pointer PHI.\n");
           ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
                     << "loop control flow is not understood by vectorizer");
-          LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
         }
 
@@ -967,7 +968,8 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   // be canonicalized.
   if (!Lp->getLoopPreheader()) {
-    LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
+    LLVM_DEBUG(dbgs()
+             << "LV: Not vectorizing: Loop doesn't have a legal pre-header.\n");
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
@@ -978,6 +980,8 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
 
   // We must have a single backedge.
   if (Lp->getNumBackEdges() != 1) {
+    LLVM_DEBUG(dbgs()
+            << "LV: Not vectorizing: The loop must have a single backedge.\n");
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
@@ -988,6 +992,8 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
 
   // We must have a single exiting block.
   if (!Lp->getExitingBlock()) {
+    LLVM_DEBUG(dbgs()
+            << "LV: Not vectorizing: The loop must have an exiting block.\n");
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
@@ -1000,6 +1006,8 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
   // checked at the end of each iteration. With that we can assume that all
   // instructions in the loop are executed the same number of times.
   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
+    LLVM_DEBUG(dbgs()
+          << "LV: Not vectorizing: The exiting block is not the loop latch.\n");
     ORE->emit(createMissedAnalysis("CFGNotUnderstood")
               << "loop control flow is not understood by vectorizer");
     if (DoExtraAnalysis)
diff --git a/llvm/test/Transforms/LoopVectorize/legal_preheader_check.ll b/llvm/test/Transforms/LoopVectorize/legal_preheader_check.ll
deleted file mode 100644
index 32aa796394d69..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/legal_preheader_check.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -loop-vectorize -debug -S -o /dev/null 2>&1 | FileCheck %s
-; REQUIRES: asserts
-
-; D40973
-; Make sure LV legal bails out when the loop doesn't have a legal pre-header.
-
-; CHECK: LV: Loop doesn't have a legal pre-header.
-
-define void @inc(i32 %n, i8* %P) {
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %BB1, label %BB2
-
-BB1:
-  indirectbr i8* %P, [label %.lr.ph]
-
-BB2:
-  br label %.lr.ph
-
-.lr.ph:
-  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %BB1 ], [ 0, %BB2 ]
-  %indvars.iv.next = add i32 %indvars.iv, 1
-  %exitcond = icmp eq i32 %indvars.iv.next, %n
-  br i1 %exitcond, label %._crit_edge, label %.lr.ph
-
-._crit_edge:
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll b/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll
new file mode 100644
index 0000000000000..4e39181e53dd1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -S -disable-output 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Make sure LV legal bails out when the exiting block != loop latch.
+; CHECK-LABEL: "latch_is_not_exiting"
+; CHECK: LV: Not vectorizing: The exiting block is not the loop latch.
+define i32 @latch_is_not_exiting() {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, 16
+  br i1 %cmp, label %for.body, label %for.second
+
+for.second:
+  %cmps = icmp sgt i32 %inc, 16
+  br i1 %cmps, label %for.body, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; Make sure LV legal bails out when there is no exiting block
+; CHECK-LABEL: "no_exiting_block"
+; CHECK: LV: Not vectorizing: The loop must have an exiting block.
+define i32 @no_exiting_block() {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, 16
+  br i1 %cmp, label %for.body, label %for.second
+
+for.second:
+  br label %for.body
+}
+
+; Make sure LV legal bails out when there is a non-int, non-ptr phi
+; CHECK-LABEL: "invalid_phi_types"
+; CHECK: LV: Not vectorizing: Found a non-int non-pointer PHI.
+define i32 @invalid_phi_types() {
+entry:
+  br label %for.body
+
+for.body:
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %vec.sum.02 = phi <2 x i32> [ zeroinitializer, %entry ], [ <i32 8, i32 8>, %for.body ]
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; D40973
+; Make sure LV legal bails out when the loop doesn't have a legal pre-header.
+; CHECK-LABEL: "inc"
+; CHECK: LV: Not vectorizing: Loop doesn't have a legal pre-header.
+define void @inc(i32 %n, i8* %P) {
+  %1 = icmp sgt i32 %n, 0
+  br i1 %1, label %BB1, label %BB2
+
+BB1:
+  indirectbr i8* %P, [label %.lr.ph]
+
+BB2:
+  br label %.lr.ph
+
+.lr.ph:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %BB1 ], [ 0, %BB2 ]
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:
+  ret void
+}

From f1ddf431b5dd47a6c03ed1420af69facace8cb1d Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 05:38:06 +0000
Subject: [PATCH 0579/1176] [runtimes] Use -Wunknown-pragmas for the pragma
 check

This is a follow up to r362055, we need -Wunknown-pragmas otherwise
the check is going to succeed it the pragma isn't supported.

llvm-svn: 362057
---
 libcxx/cmake/config-ix.cmake    | 4 ++++
 libcxxabi/cmake/config-ix.cmake | 4 ++++
 libunwind/cmake/config-ix.cmake | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index b2d275bbb25a3..cedc767928798 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -1,3 +1,4 @@
+include(CMakePushCheckState)
 include(CheckLibraryExists)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
@@ -62,10 +63,13 @@ endif ()
 
 # Check compiler pragmas
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
 int main() { return 0; }
 " LIBCXX_HAS_COMMENT_LIB_PRAGMA)
+  cmake_pop_check_state()
 endif()
 
 if(NOT WIN32 OR MINGW)
diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake
index 30d2ae237d1c2..04d58fcc98d92 100644
--- a/libcxxabi/cmake/config-ix.cmake
+++ b/libcxxabi/cmake/config-ix.cmake
@@ -1,3 +1,4 @@
+include(CMakePushCheckState)
 include(CheckLibraryExists)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
@@ -51,10 +52,13 @@ endif ()
 
 # Check compiler pragmas
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
 int main() { return 0; }
 " LIBCXXABI_HAS_COMMENT_LIB_PRAGMA)
+  cmake_pop_check_state()
 endif()
 
 # Check compiler flags
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index 2c27ecf5207b8..07a95ce1a46a8 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -1,3 +1,4 @@
+include(CMakePushCheckState)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
@@ -57,10 +58,13 @@ endif ()
 
 # Check compiler pragmas
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas")
   check_c_source_compiles("
 #pragma comment(lib, \"c\")
 int main() { return 0; }
 " LIBUNWIND_HAS_COMMENT_LIB_PRAGMA)
+  cmake_pop_check_state()
 endif()
 
 # Check compiler flags

From 0528726a69c2f9245645c9efcec2294d2d99b908 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 06:08:56 +0000
Subject: [PATCH 0580/1176] [libcxx][libcxxabi] Remove the unused CMake checks

These seemed to have been used in the past but were since removed
by the add_compile_flags_if_supported functions that combine these
these checks and adding the flag, but the original checks were never
removed.

Differential Revision: https://reviews.llvm.org/D62566

llvm-svn: 362058
---
 libcxx/cmake/config-ix.cmake    |  9 ---------
 libcxxabi/cmake/config-ix.cmake | 33 +--------------------------------
 2 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index cedc767928798..a7a684c9d5f04 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -76,15 +76,6 @@ if(NOT WIN32 OR MINGW)
   include(CheckLibcxxAtomic)
 endif()
 
-# Check compiler flags
-
-check_cxx_compiler_flag(/WX                     LIBCXX_HAS_WX_FLAG)
-check_cxx_compiler_flag(/WX-                    LIBCXX_HAS_NO_WX_FLAG)
-check_cxx_compiler_flag(/EHsc                   LIBCXX_HAS_EHSC_FLAG)
-check_cxx_compiler_flag(/EHs-                   LIBCXX_HAS_NO_EHS_FLAG)
-check_cxx_compiler_flag(/EHa-                   LIBCXX_HAS_NO_EHA_FLAG)
-check_cxx_compiler_flag(/GR-                    LIBCXX_HAS_NO_GR_FLAG)
-
 # Check libraries
 if(WIN32 AND NOT MINGW)
   # TODO(compnerd) do we want to support an emulation layer that allows for the
diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake
index 04d58fcc98d92..9195dfc74e19b 100644
--- a/libcxxabi/cmake/config-ix.cmake
+++ b/libcxxabi/cmake/config-ix.cmake
@@ -62,38 +62,7 @@ int main() { return 0; }
 endif()
 
 # Check compiler flags
-check_c_compiler_flag(-funwind-tables         LIBCXXABI_HAS_FUNWIND_TABLES)
-check_cxx_compiler_flag(-fno-exceptions       LIBCXXABI_HAS_NO_EXCEPTIONS_FLAG)
-check_cxx_compiler_flag(-fno-rtti             LIBCXXABI_HAS_NO_RTTI_FLAG)
-check_cxx_compiler_flag(-fstrict-aliasing     LIBCXXABI_HAS_FSTRICT_ALIASING_FLAG)
-check_cxx_compiler_flag(-nostdinc++           LIBCXXABI_HAS_NOSTDINCXX_FLAG)
-check_cxx_compiler_flag(-Wall                 LIBCXXABI_HAS_WALL_FLAG)
-check_cxx_compiler_flag(-W                    LIBCXXABI_HAS_W_FLAG)
-check_cxx_compiler_flag(-Wunused-function     LIBCXXABI_HAS_WUNUSED_FUNCTION_FLAG)
-check_cxx_compiler_flag(-Wunused-variable     LIBCXXABI_HAS_WUNUSED_VARIABLE_FLAG)
-check_cxx_compiler_flag(-Wunused-parameter    LIBCXXABI_HAS_WUNUSED_PARAMETER_FLAG)
-check_cxx_compiler_flag(-Wstrict-aliasing     LIBCXXABI_HAS_WSTRICT_ALIASING_FLAG)
-check_cxx_compiler_flag(-Wstrict-overflow     LIBCXXABI_HAS_WSTRICT_OVERFLOW_FLAG)
-check_cxx_compiler_flag(-Wwrite-strings       LIBCXXABI_HAS_WWRITE_STRINGS_FLAG)
-check_cxx_compiler_flag(-Wchar-subscripts     LIBCXXABI_HAS_WCHAR_SUBSCRIPTS_FLAG)
-check_cxx_compiler_flag(-Wmismatched-tags     LIBCXXABI_HAS_WMISMATCHED_TAGS_FLAG)
-check_cxx_compiler_flag(-Wmissing-braces      LIBCXXABI_HAS_WMISSING_BRACES_FLAG)
-check_cxx_compiler_flag(-Wshorten-64-to-32    LIBCXXABI_HAS_WSHORTEN_64_TO_32_FLAG)
-check_cxx_compiler_flag(-Wsign-conversion     LIBCXXABI_HAS_WSIGN_CONVERSION_FLAG)
-check_cxx_compiler_flag(-Wsign-compare        LIBCXXABI_HAS_WSIGN_COMPARE_FLAG)
-check_cxx_compiler_flag(-Wshadow              LIBCXXABI_HAS_WSHADOW_FLAG)
-check_cxx_compiler_flag(-Wconversion          LIBCXXABI_HAS_WCONVERSION_FLAG)
-check_cxx_compiler_flag(-Wnewline-eof         LIBCXXABI_HAS_WNEWLINE_EOF_FLAG)
-check_cxx_compiler_flag(-Wundef               LIBCXXABI_HAS_WUNDEF_FLAG)
-check_cxx_compiler_flag(-pedantic             LIBCXXABI_HAS_PEDANTIC_FLAG)
-check_cxx_compiler_flag(-Werror               LIBCXXABI_HAS_WERROR_FLAG)
-check_cxx_compiler_flag(-Wno-error            LIBCXXABI_HAS_WNO_ERROR_FLAG)
-check_cxx_compiler_flag(/WX                   LIBCXXABI_HAS_WX_FLAG)
-check_cxx_compiler_flag(/WX-                  LIBCXXABI_HAS_NO_WX_FLAG)
-check_cxx_compiler_flag(/EHsc                 LIBCXXABI_HAS_EHSC_FLAG)
-check_cxx_compiler_flag(/EHs-                 LIBCXXABI_HAS_NO_EHS_FLAG)
-check_cxx_compiler_flag(/EHa-                 LIBCXXABI_HAS_NO_EHA_FLAG)
-check_cxx_compiler_flag(/GR-                  LIBCXXABI_HAS_NO_GR_FLAG)
+check_cxx_compiler_flag(-nostdinc++ LIBCXXABI_HAS_NOSTDINCXX_FLAG)
 
 # Check libraries
 check_library_exists(dl dladdr "" LIBCXXABI_HAS_DL_LIB)

From 54d3c3d43651756bf37d5a1c73c85d889b739c1a Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 06:48:13 +0000
Subject: [PATCH 0581/1176] Mark CodeGen/asm-goto.c as x86 specific after
 r362045

llvm-svn: 362059
---
 clang/test/CodeGen/asm-goto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/asm-goto.c b/clang/test/CodeGen/asm-goto.c
index 2c4a1a0c4df1c..99e97f2a41e12 100644
--- a/clang/test/CodeGen/asm-goto.c
+++ b/clang/test/CodeGen/asm-goto.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -O0 -emit-llvm  %s -o - | FileCheck %s
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64 -O0 -emit-llvm %s -o - | FileCheck %s
 
 int foo(int cond)
 {

From a807495fd1907a6ec424e797621c00e4cbeb522e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 30 May 2019 06:48:13 +0000
Subject: [PATCH 0582/1176] [LoopVectorize] Precommit tests for D62510. NFC

llvm-svn: 362060
---
 .../Transforms/LoopVectorize/X86/fneg-cost.ll | 26 ++++++++++++++
 llvm/test/Transforms/LoopVectorize/fneg.ll    | 35 +++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/fneg.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
new file mode 100644
index 0000000000000..6589871674291
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: Found an estimated cost of 2 for VF 1 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 6 for VF 2 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 14 for VF 4 For instruction:   %neg = fneg float %{{.*}}
+define void @fneg_cost(float* %a, i64 %n) {
+entry:
+  br label %for.body
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %neg = fneg float %0
+  store float %neg, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/fneg.ll b/llvm/test/Transforms/LoopVectorize/fneg.ll
new file mode 100644
index 0000000000000..8e5e2aae9fdd3
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/fneg.ll
@@ -0,0 +1,35 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+define void @foo(float* %a, i64 %n) {
+; CHECK:       vector.body:
+; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = fneg float [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT:    [[TMP9:%.*]] = fneg float [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT:    [[TMP11:%.*]] = fneg float [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
+; CHECK:         store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %sub = fneg float %0
+  store float %sub, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, %n
+  br i1 %cmp, label %for.exit, label %for.body
+
+for.exit:
+  ret void
+}

From b19977d5afbff801b3228d89b14147397a935ffe Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 06:57:27 +0000
Subject: [PATCH 0583/1176] [runtimes] Use _LIBCPP_HAS_COMMENT_LIB_PRAGMA in
 all relevant files

These two sources were omitted in r362055.

llvm-svn: 362061
---
 libcxx/src/condition_variable.cpp | 2 +-
 libcxx/src/memory.cpp             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/src/condition_variable.cpp b/libcxx/src/condition_variable.cpp
index 69264c680d93e..8d769f42f6fbd 100644
--- a/libcxx/src/condition_variable.cpp
+++ b/libcxx/src/condition_variable.cpp
@@ -15,7 +15,7 @@
 #include "system_error"
 #include "__undef_macros"
 
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 
diff --git a/libcxx/src/memory.cpp b/libcxx/src/memory.cpp
index 6df7226b35756..c8c00383cf92d 100644
--- a/libcxx/src/memory.cpp
+++ b/libcxx/src/memory.cpp
@@ -10,7 +10,7 @@
 #ifndef _LIBCPP_HAS_NO_THREADS
 #include "mutex"
 #include "thread"
-#if defined(__unix__) &&  defined(__ELF__) && defined(__clang__)
+#if defined(__unix__) &&  defined(__ELF__) && defined(_LIBCPP_HAS_COMMENT_LIB_PRAGMA)
 #pragma comment(lib, "pthread")
 #endif
 #endif

From 851f57effaaa8b13d345142f43dbed35e3047f23 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 30 May 2019 07:21:08 +0000
Subject: [PATCH 0584/1176] [AST] asm goto labels don't have constraints, don't
 try to copy them.

Found by asan.

llvm-svn: 362062
---
 clang/lib/AST/Stmt.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index e9a2c58f1a17c..0a4d403106bd4 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -483,9 +483,10 @@ void GCCAsmStmt::setOutputsAndInputsAndClobbers(const ASTContext &C,
   this->Exprs = new (C) Stmt*[NumExprs];
   std::copy(Exprs, Exprs + NumExprs, this->Exprs);
 
+  unsigned NumConstraints = NumOutputs + NumInputs;
   C.Deallocate(this->Constraints);
-  this->Constraints = new (C) StringLiteral*[NumExprs];
-  std::copy(Constraints, Constraints + NumExprs, this->Constraints);
+  this->Constraints = new (C) StringLiteral*[NumConstraints];
+  std::copy(Constraints, Constraints + NumConstraints, this->Constraints);
 
   C.Deallocate(this->Clobbers);
   this->Clobbers = new (C) StringLiteral*[NumClobbers];
@@ -756,8 +757,9 @@ GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc,
   Exprs = new (C) Stmt*[NumExprs];
   std::copy(exprs, exprs + NumExprs, Exprs);
 
-  Constraints = new (C) StringLiteral*[NumExprs];
-  std::copy(constraints, constraints + NumExprs, Constraints);
+  unsigned NumConstraints = NumOutputs + NumInputs;
+  Constraints = new (C) StringLiteral*[NumConstraints];
+  std::copy(constraints, constraints + NumConstraints, Constraints);
 
   Clobbers = new (C) StringLiteral*[NumClobbers];
   std::copy(clobbers, clobbers + NumClobbers, Clobbers);

From f04b3635c40e76e525f47807f64fdbe0a9fdf4ba Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 30 May 2019 07:25:22 +0000
Subject: [PATCH 0585/1176] [lldb-server] Support 'g' packets

Differential Revision: https://reviews.llvm.org/D62221
Patch by Guilherme Andrade <guiandrade@google.com>.

llvm-svn: 362063
---
 .../tools/lldb-server/TestGdbRemoteGPacket.py |  41 -----
 .../tools/lldb-server/TestLldbGdbServer.py    |   4 +-
 .../tools/lldb-server/gdbremote_testcase.py   |   5 +
 .../lldb-server/register-reading/Makefile     |   5 +
 .../register-reading/TestGdbRemoteGPacket.py  | 153 ++++++++++++++++++
 .../lldb-server/register-reading/main.cpp     |  54 +++++++
 .../GDBRemoteCommunicationServerLLGS.cpp      |  58 +++++++
 .../GDBRemoteCommunicationServerLLGS.h        |   2 +
 .../Utility/StringExtractorGDBRemote.cpp      |   4 +-
 9 files changed, 279 insertions(+), 47 deletions(-)
 delete mode 100644 lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteGPacket.py
 create mode 100644 lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/Makefile
 create mode 100644 lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/TestGdbRemoteGPacket.py
 create mode 100644 lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/main.cpp

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteGPacket.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteGPacket.py
deleted file mode 100644
index cfadbc8f7d0f0..0000000000000
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteGPacket.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import print_function
-
-
-import gdbremote_testcase
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
-from lldbsuite.test import lldbutil
-
-
-class TestGdbRemoteGPacket(gdbremote_testcase.GdbRemoteTestCaseBase):
-
-    mydir = TestBase.compute_mydir(__file__)
-
-    def run_test_g_packet(self):
-        self.build()
-        self.prep_debug_monitor_and_inferior()
-        self.test_sequence.add_log_lines(
-            ["read packet: $g#67",
-             {"direction": "send", "regex": r"^\$(.+)#[0-9a-fA-F]{2}$",
-              "capture": {1: "register_bank"}}],
-            True)
-        self.connect_to_debug_monitor()
-        context = self.expect_gdbremote_sequence()
-        register_bank = context.get("register_bank")
-        self.assertTrue(register_bank[0] != 'E')
-
-        self.test_sequence.add_log_lines(
-            ["read packet: $G" + register_bank + "#00",
-             {"direction": "send", "regex": r"^\$(.+)#[0-9a-fA-F]{2}$",
-              "capture": {1: "G_reply"}}],
-            True)
-        context = self.expect_gdbremote_sequence()
-        self.assertTrue(context.get("G_reply")[0] != 'E')
-
-
-    @skipIfOutOfTreeDebugserver
-    @debugserver_test
-    @skipIfDarwinEmbedded
-    def test_g_packet_debugserver(self):
-        self.init_debugserver_test()
-        self.run_test_g_packet()
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py
index da2333920163c..a3bad6873f134 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py
@@ -558,9 +558,7 @@ def p_returns_correct_data_size_for_each_qRegisterInfo(self):
         self.assertIsNotNone(reg_infos)
         self.assertTrue(len(reg_infos) > 0)
 
-        inferior_exe_path = self.getBuildArtifact("a.out")
-        Target = self.dbg.CreateTarget(inferior_exe_path)
-        byte_order = Target.GetByteOrder()
+        byte_order = self.get_target_byte_order()
 
         # Read value for each register.
         reg_index = 0
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
index 6b807a0347c24..e7c63bf21e821 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
@@ -378,6 +378,11 @@ def get_debug_monitor_command_line_args(self, attach_pid=None):
             commandline_args += ["--named-pipe", self.named_pipe_path]
         return commandline_args
 
+    def get_target_byte_order(self):
+        inferior_exe_path = self.getBuildArtifact("a.out")
+        target = self.dbg.CreateTarget(inferior_exe_path)
+        return target.GetByteOrder()
+
     def launch_debug_monitor(self, attach_pid=None, logfile=None):
         # Create the command line.
         commandline_args = self.get_debug_monitor_command_line_args(
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/Makefile b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/Makefile
new file mode 100644
index 0000000000000..314f1cb2f077b
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/Makefile
@@ -0,0 +1,5 @@
+LEVEL = ../../../make
+
+CXX_SOURCES := main.cpp
+
+include $(LEVEL)/Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/TestGdbRemoteGPacket.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/TestGdbRemoteGPacket.py
new file mode 100644
index 0000000000000..e13daeb6d9a43
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/TestGdbRemoteGPacket.py
@@ -0,0 +1,153 @@
+from __future__ import print_function
+
+
+import gdbremote_testcase
+import textwrap
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+def _extract_register_value(reg_info, reg_bank, byte_order, bytes_per_entry=8):
+    reg_offset = int(reg_info["offset"])*2
+    reg_byte_size = int(2 * int(reg_info["bitsize"]) / 8)
+    # Create slice with the contents of the register.
+    reg_slice = reg_bank[reg_offset:reg_offset+reg_byte_size]
+
+    reg_value = []
+    # Wrap slice according to bytes_per_entry.
+    for entry in textwrap.wrap(reg_slice, 2 * bytes_per_entry):
+        # Invert the bytes order if target uses little-endian.
+        if byte_order == lldb.eByteOrderLittle:
+            entry = "".join(reversed([entry[i:i+2] for i in range(0,
+                                          len(entry),2)]))
+        reg_value.append("0x" + entry)
+
+    return reg_value
+
+
+class TestGdbRemoteGPacket(gdbremote_testcase.GdbRemoteTestCaseBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    def run_test_g_packet(self):
+        self.build()
+        self.prep_debug_monitor_and_inferior()
+        self.test_sequence.add_log_lines(
+            ["read packet: $g#67",
+             {"direction": "send", "regex": r"^\$(.+)#[0-9a-fA-F]{2}$",
+              "capture": {1: "register_bank"}}],
+            True)
+        self.connect_to_debug_monitor()
+        context = self.expect_gdbremote_sequence()
+        register_bank = context.get("register_bank")
+        self.assertTrue(register_bank[0] != 'E')
+
+        self.test_sequence.add_log_lines(
+            ["read packet: $G" + register_bank + "#00",
+             {"direction": "send", "regex": r"^\$(.+)#[0-9a-fA-F]{2}$",
+              "capture": {1: "G_reply"}}],
+            True)
+        context = self.expect_gdbremote_sequence()
+        self.assertTrue(context.get("G_reply")[0] != 'E')
+
+    @skipIfOutOfTreeDebugserver
+    @debugserver_test
+    @skipIfDarwinEmbedded
+    def test_g_packet_debugserver(self):
+        self.init_debugserver_test()
+        self.run_test_g_packet()
+
+    @skipIf(archs=no_match(["x86_64"]))
+    def g_returns_correct_data(self, with_suffix):
+        procs = self.prep_debug_monitor_and_inferior()
+
+        self.add_register_info_collection_packets()
+        if with_suffix:
+            self.add_thread_suffix_request_packets()
+        self.add_threadinfo_collection_packets()
+        context = self.expect_gdbremote_sequence()
+        self.assertIsNotNone(context)
+
+        # Gather register info.
+        reg_infos = self.parse_register_info_packets(context)
+        self.assertIsNotNone(reg_infos)
+        self.add_lldb_register_index(reg_infos)
+        # Index register info entries by name.
+        reg_infos = {info['name']: info for info in reg_infos}
+
+        # Gather thread info.
+        if with_suffix:
+            threads = self.parse_threadinfo_packets(context)
+            self.assertIsNotNone(threads)
+            thread_id = threads[0]
+            self.assertIsNotNone(thread_id)
+        else:
+            thread_id = None
+
+        # Send vCont packet to resume the inferior.
+        self.test_sequence.add_log_lines(["read packet: $vCont;c#a8",
+                                          {"direction": "send",
+                                           "regex": r"^\$T([0-9a-fA-F]{2}).*#[0-9a-fA-F]{2}$",
+                                           "capture": {1: "hex_exit_code"}},
+                                          ],
+                                         True)
+
+        # Send g packet to retrieve the register bank
+        if thread_id:
+            g_request = "read packet: $g;thread:{:x}#00".format(thread_id)
+        else:
+            g_request = "read packet: $g#00"
+        self.test_sequence.add_log_lines(
+            [g_request,
+             {"direction": "send", "regex": r"^\$(.+)#[0-9a-fA-F]{2}$",
+              "capture": {1: "register_bank"}}],
+            True)
+        context = self.expect_gdbremote_sequence()
+        self.assertIsNotNone(context)
+        reg_bank = context.get("register_bank")
+        self.assertTrue(reg_bank[0] != 'E')
+
+        byte_order = self.get_target_byte_order()
+        get_reg_value = lambda reg_name : _extract_register_value(
+            reg_infos[reg_name], reg_bank, byte_order)
+
+        self.assertEqual(['0x0102030405060708'], get_reg_value('r8'))
+        self.assertEqual(['0x1112131415161718'], get_reg_value('r9'))
+        self.assertEqual(['0x2122232425262728'], get_reg_value('r10'))
+        self.assertEqual(['0x3132333435363738'], get_reg_value('r11'))
+        self.assertEqual(['0x4142434445464748'], get_reg_value('r12'))
+        self.assertEqual(['0x5152535455565758'], get_reg_value('r13'))
+        self.assertEqual(['0x6162636465666768'], get_reg_value('r14'))
+        self.assertEqual(['0x7172737475767778'], get_reg_value('r15'))
+
+        self.assertEqual(
+            ['0x020406080a0c0e01', '0x030507090b0d0f00'], get_reg_value('xmm8'))
+        self.assertEqual(
+            ['0x121416181a1c1e11', '0x131517191b1d1f10'], get_reg_value('xmm9'))
+        self.assertEqual(
+            ['0x222426282a2c2e21', '0x232527292b2d2f20'], get_reg_value('xmm10'))
+        self.assertEqual(
+            ['0x323436383a3c3e31', '0x333537393b3d3f30'], get_reg_value('xmm11'))
+        self.assertEqual(
+            ['0x424446484a4c4e41', '0x434547494b4d4f40'], get_reg_value('xmm12'))
+        self.assertEqual(
+            ['0x525456585a5c5e51', '0x535557595b5d5f50'], get_reg_value('xmm13'))
+        self.assertEqual(
+            ['0x626466686a6c6e61', '0x636567696b6d6f60'], get_reg_value('xmm14'))
+        self.assertEqual(
+            ['0x727476787a7c7e71', '0x737577797b7d7f70'], get_reg_value('xmm15'))
+
+    @llgs_test
+    def test_g_returns_correct_data_with_suffix_llgs(self):
+        self.init_llgs_test()
+        self.build()
+        self.set_inferior_startup_launch()
+        self.g_returns_correct_data(True)
+
+    @llgs_test
+    def test_g_returns_correct_data_no_suffix_llgs(self):
+        self.init_llgs_test()
+        self.build()
+        self.set_inferior_startup_launch()
+        self.g_returns_correct_data(False)
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/main.cpp b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/main.cpp
new file mode 100644
index 0000000000000..32eda6d3c5594
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/register-reading/main.cpp
@@ -0,0 +1,54 @@
+#include <cstdint>
+
+struct alignas(16) xmm_t {
+  uint64_t a, b;
+};
+
+int main() {
+  uint64_t r8 = 0x0102030405060708;
+  uint64_t r9 = 0x1112131415161718;
+  uint64_t r10 = 0x2122232425262728;
+  uint64_t r11 = 0x3132333435363738;
+  uint64_t r12 = 0x4142434445464748;
+  uint64_t r13 = 0x5152535455565758;
+  uint64_t r14 = 0x6162636465666768;
+  uint64_t r15 = 0x7172737475767778;
+
+  xmm_t xmm8 = {0x020406080A0C0E01, 0x030507090B0D0F00};
+  xmm_t xmm9 = {0x121416181A1C1E11, 0x131517191B1D1F10};
+  xmm_t xmm10 = {0x222426282A2C2E21, 0x232527292B2D2F20};
+  xmm_t xmm11 = {0x323436383A3C3E31, 0x333537393B3D3F30};
+  xmm_t xmm12 = {0x424446484A4C4E41, 0x434547494B4D4F40};
+  xmm_t xmm13 = {0x525456585A5C5E51, 0x535557595B5D5F50};
+  xmm_t xmm14 = {0x626466686A6C6E61, 0x636567696B6D6F60};
+  xmm_t xmm15 = {0x727476787A7C7E71, 0x737577797B7D7F70};
+
+  asm volatile("movq    %0, %%r8\n\t"
+               "movq    %1, %%r9\n\t"
+               "movq    %2, %%r10\n\t"
+               "movq    %3, %%r11\n\t"
+               "movq    %4, %%r12\n\t"
+               "movq    %5, %%r13\n\t"
+               "movq    %6, %%r14\n\t"
+               "movq    %7, %%r15\n\t"
+               "\n\t"
+               "movaps  %8, %%xmm8\n\t"
+               "movaps  %9, %%xmm9\n\t"
+               "movaps  %10, %%xmm10\n\t"
+               "movaps  %11, %%xmm11\n\t"
+               "movaps  %12, %%xmm12\n\t"
+               "movaps  %13, %%xmm13\n\t"
+               "movaps  %14, %%xmm14\n\t"
+               "movaps  %15, %%xmm15\n\t"
+               "\n\t"
+               "int3"
+               :
+               : "g"(r8), "g"(r9), "g"(r10), "g"(r11), "g"(r12), "g"(r13),
+                 "g"(r14), "g"(r15), "m"(xmm8), "m"(xmm9), "m"(xmm10),
+                 "m"(xmm11), "m"(xmm12), "m"(xmm13), "m"(xmm14), "m"(xmm15)
+               : "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+                 "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13",
+                 "%xmm14", "%xmm15");
+
+  return 0;
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index 045f4b43b40c7..2778b1243344c 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -187,6 +187,9 @@ void GDBRemoteCommunicationServerLLGS::RegisterPacketHandlers() {
       StringExtractorGDBRemote::eServerPacketType_jTraceConfigRead,
       &GDBRemoteCommunicationServerLLGS::Handle_jTraceConfigRead);
 
+  RegisterMemberFunctionHandler(StringExtractorGDBRemote::eServerPacketType_g,
+                                &GDBRemoteCommunicationServerLLGS::Handle_g);
+
   RegisterPacketHandler(StringExtractorGDBRemote::eServerPacketType_k,
                         [this](StringExtractorGDBRemote packet, Status &error,
                                bool &interrupt, bool &quit) {
@@ -1891,6 +1894,61 @@ GDBRemoteCommunicationServerLLGS::Handle_qsThreadInfo(
   return SendPacketNoLock("l");
 }
 
+GDBRemoteCommunication::PacketResult
+GDBRemoteCommunicationServerLLGS::Handle_g(StringExtractorGDBRemote &packet) {
+  Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD));
+
+  // Move past packet name.
+  packet.SetFilePos(strlen("g"));
+
+  // Get the thread to use.
+  NativeThreadProtocol *thread = GetThreadFromSuffix(packet);
+  if (!thread) {
+    LLDB_LOG(log, "failed, no thread available");
+    return SendErrorResponse(0x15);
+  }
+
+  // Get the thread's register context.
+  NativeRegisterContext &reg_ctx = thread->GetRegisterContext();
+
+  std::vector<uint8_t> regs_buffer;
+  for (uint32_t reg_num = 0; reg_num < reg_ctx.GetUserRegisterCount();
+       ++reg_num) {
+    const RegisterInfo *reg_info = reg_ctx.GetRegisterInfoAtIndex(reg_num);
+
+    if (reg_info == nullptr) {
+      LLDB_LOG(log, "failed to get register info for register index {0}",
+               reg_num);
+      return SendErrorResponse(0x15);
+    }
+
+    if (reg_info->value_regs != nullptr)
+      continue; // skip registers that are contained in other registers
+
+    RegisterValue reg_value;
+    Status error = reg_ctx.ReadRegister(reg_info, reg_value);
+    if (error.Fail()) {
+      LLDB_LOG(log, "failed to read register at index {0}", reg_num);
+      return SendErrorResponse(0x15);
+    }
+
+    if (reg_info->byte_offset + reg_info->byte_size >= regs_buffer.size())
+      // Resize the buffer to guarantee it can store the register offsetted
+      // data.
+      regs_buffer.resize(reg_info->byte_offset + reg_info->byte_size);
+
+    // Copy the register offsetted data to the buffer.
+    memcpy(regs_buffer.data() + reg_info->byte_offset, reg_value.GetBytes(),
+           reg_info->byte_size);
+  }
+
+  // Write the response.
+  StreamGDBRemote response;
+  response.PutBytesAsRawHex8(regs_buffer.data(), regs_buffer.size());
+
+  return SendPacketNoLock(response.GetString());
+}
+
 GDBRemoteCommunication::PacketResult
 GDBRemoteCommunicationServerLLGS::Handle_p(StringExtractorGDBRemote &packet) {
   Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD));
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
index 1609174b1d353..1f626c2ad7656 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
@@ -178,6 +178,8 @@ class GDBRemoteCommunicationServerLLGS
 
   PacketResult Handle_QPassSignals(StringExtractorGDBRemote &packet);
 
+  PacketResult Handle_g(StringExtractorGDBRemote &packet);
+
   void SetCurrentThreadID(lldb::tid_t tid);
 
   lldb::tid_t GetCurrentThreadID() const;
diff --git a/lldb/source/Utility/StringExtractorGDBRemote.cpp b/lldb/source/Utility/StringExtractorGDBRemote.cpp
index 683b62cb25916..905c36988eee1 100644
--- a/lldb/source/Utility/StringExtractorGDBRemote.cpp
+++ b/lldb/source/Utility/StringExtractorGDBRemote.cpp
@@ -377,9 +377,7 @@ StringExtractorGDBRemote::GetServerPacketType() const {
     break;
 
   case 'g':
-    if (packet_size == 1)
-      return eServerPacketType_g;
-    break;
+    return eServerPacketType_g;
 
   case 'G':
     return eServerPacketType_G;

From de234847e9dbc26d8b3377871fde3943c6fb309f Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 07:30:37 +0000
Subject: [PATCH 0586/1176] [TableGen] New default operand "undef_tied_input"

This is a new special identifier which you can use as a default in
OperandWithDefaultOps. The idea is that you use it for an input
operand of an instruction that's tied to an output operand, and its
semantics are that (in the default case) the input operand's value is
not used at all.

The detailed effect is that when instruction selection emits the
instruction in the form of a pre-regalloc MachineInstr, it creates an
IMPLICIT_DEF node to use as that input.

If you're creating an MCInst with explicit register names, then the
right handling would be to set the input operand to the same register
as the output one (honouring the tie) and to add the 'undef' flag
indicating that that register is deemed to acquire a new don't-care
definition just before we read it. But I haven't done that in this
commit, because there was no need to - no Tablegen backend seems to
autogenerate default fields in an MCInst.

Patch by: Simon Tatham

Differential Revision: https://reviews.llvm.org/D60696

llvm-svn: 362064
---
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp |  2 +-
 llvm/utils/TableGen/DAGISelMatcherGen.cpp  | 11 ++++++++
 llvm/utils/TableGen/GlobalISelEmitter.cpp  | 29 +++++++++++++++++++---
 3 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 4b491bf3c9243..54f3e006608ad 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2132,7 +2132,7 @@ static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo,
 
   if (R->getName() == "node" || R->getName() == "srcvalue" ||
       R->getName() == "zero_reg" || R->getName() == "immAllOnesV" ||
-      R->getName() == "immAllZerosV") {
+      R->getName() == "immAllZerosV" || R->getName() == "undef_tied_input") {
     // Placeholder.
     return TypeSetByHwMode(); // Unknown.
   }
diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index b2285befad3af..3d3ae9c21ebef 100644
--- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -691,6 +691,17 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
       return;
     }
 
+    if (Def->getName() == "undef_tied_input") {
+      std::array<MVT::SimpleValueType, 1> ResultVTs = { N->getSimpleType(0) };
+      std::array<unsigned, 0> InstOps;
+      auto IDOperandNo = NextRecordedOperandNo++;
+      AddMatcher(new EmitNodeMatcher("TargetOpcode::IMPLICIT_DEF",
+                                     ResultVTs, InstOps, false, false, false,
+                                     false, -1, IDOperandNo));
+      ResultOps.push_back(IDOperandNo);
+      return;
+    }
+
     // Handle a reference to a register class. This is used
     // in COPY_TO_SUBREG instructions.
     if (Def->isSubClassOf("RegisterOperand"))
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 839529601b988..ad13b33f8b75c 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -3037,7 +3037,8 @@ class GlobalISelEmitter {
   importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
                             BuildMIAction &DstMIBuilder,
                             TreePatternNode *DstChild);
-  Error importDefaultOperandRenderers(BuildMIAction &DstMIBuilder,
+  Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
+                                      BuildMIAction &DstMIBuilder,
                                       DagInit *DefaultOps) const;
   Error
   importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
@@ -3777,7 +3778,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
     // end up with too many rendered operands.
     if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
       DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
-      if (auto Error = importDefaultOperandRenderers(DstMIBuilder, DefaultOps))
+      if (auto Error = importDefaultOperandRenderers(
+            InsertPt, M, DstMIBuilder, DefaultOps))
         return std::move(Error);
       ++NumDefaultOps;
       continue;
@@ -3802,19 +3804,38 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
 }
 
 Error GlobalISelEmitter::importDefaultOperandRenderers(
-    BuildMIAction &DstMIBuilder, DagInit *DefaultOps) const {
+    action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+    DagInit *DefaultOps) const {
   for (const auto *DefaultOp : DefaultOps->getArgs()) {
+    Optional<LLTCodeGen> OpTyOrNone = None;
+
     // Look through ValueType operators.
     if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
       if (const DefInit *DefaultDagOperator =
               dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
         if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+          OpTyOrNone = MVTToLLT(getValueType(
+                                  DefaultDagOperator->getDef()));
           DefaultOp = DefaultDagOp->getArg(0);
       }
     }
 
     if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
-      DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
+      auto Def = DefaultDefOp->getDef();
+      if (Def->getName() == "undef_tied_input") {
+        unsigned TempRegID = M.allocateTempRegID();
+        M.insertAction<MakeTempRegisterAction>(
+          InsertPt, OpTyOrNone.getValue(), TempRegID);
+        InsertPt = M.insertAction<BuildMIAction>(
+          InsertPt, M.allocateOutputInsnID(),
+          &Target.getInstruction(RK.getDef("IMPLICIT_DEF")));
+        BuildMIAction &IDMIBuilder = *static_cast<BuildMIAction *>(
+          InsertPt->get());
+        IDMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+        DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+      } else {
+        DstMIBuilder.addRenderer<AddRegisterRenderer>(Def);
+      }
       continue;
     }
 

From 433a19168379cc87c7ea4ee8dbdca5b76f2e4b30 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Thu, 30 May 2019 07:34:39 +0000
Subject: [PATCH 0587/1176] [CMake] Use find_package(LLVM) instead of
 LLVMConfig

This addresses an issues introduced in r362047.

Differential Revision: https://reviews.llvm.org/D62640

llvm-svn: 362065
---
 libunwind/CMakeLists.txt     |  2 +-
 llvm/runtimes/CMakeLists.txt | 11 ++---------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 16bfb9a82020c..b51922a48fe28 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -15,7 +15,7 @@ set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
   )
 
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
   project(libunwind)
 
   # Rely on llvm-config.
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index b9531daa4abf3..e91003b5b19f6 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -59,12 +59,13 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
   cmake_minimum_required(VERSION 3.4.3)
   project(Runtimes C CXX ASM)
 
+  find_package(LLVM PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH)
+
   # Add the root project's CMake modules, and the LLVM build's modules to the
   # CMake module path.
   list(INSERT CMAKE_MODULE_PATH 0
     "${CMAKE_CURRENT_SOURCE_DIR}/../cmake"
     "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules"
-    "${LLVM_LIBRARY_DIR}/cmake/llvm"
   )
 
   # Some of the runtimes will conditionally use the compiler-rt sanitizers
@@ -79,20 +80,12 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
     endif()
   endif()
 
-  # LLVMConfig.cmake contains a bunch of CMake variables from the LLVM build.
-  # This file is installed as part of LLVM distributions, so this can be used
-  # either from a build directory or an installed LLVM.
-  include(LLVMConfig)
-
   # Setting these variables will allow the sub-build to put their outputs into
   # the library and bin directories of the top-level build.
   set(LLVM_LIBRARY_OUTPUT_INTDIR ${LLVM_LIBRARY_DIR})
   set(LLVM_RUNTIME_OUTPUT_INTDIR ${LLVM_TOOLS_BINARY_DIR})
 
   # This variable makes sure that e.g. llvm-lit is found.
-  set(LLVM_BINARY_DIR ${LLVM_BUILD_BINARY_DIR})
-  set(LLVM_LIBRARY_DIR ${LLVM_BUILD_LIBRARY_DIR})
-  set(LLVM_INCLUDE_DIR ${LLVM_BUILD_MAIN_INCLUDE_DIR})
   set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR})
   set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules)
 

From bb4839d4157b3dc2552687f2e6dcb20245d2988e Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 07:38:09 +0000
Subject: [PATCH 0588/1176] [TableGen] AsmMatcher: allow repeated input
 operands

If an assembly instruction has to mention an input operand name twice,
for example the MVE VMOV instruction that accesses two lanes of the
same vector by writing 'vmov r1, r2, q0[3], q0[1]', then the obvious
way to write its AsmString is to include the same operand (here $Qd)
twice. But this causes the AsmMatcher generator to omit that
instruction completely from the match table, on the basis that the
generator isn't clever enough to deal with the duplication.

But you need to have _some_ way of dealing with an instruction like
this - and in this case, where the mnemonic is shared with many other
instructions that the AsmMatcher does handle, it would be very painful
to take it out of the AsmMatcher system completely.

A nicer way is to add a custom AsmMatchConverter routine, and let that
deal with the problem if the autogenerated converter can't. But that
doesn't work, because TableGen leaves the instruction out of its table
_even_ if you provide a custom converter.

Solution: this change, which makes TableGen relax the restriction on
duplicated operands in the case where there's a custom converter.

Patch by: Simon Tatham

Differential Revision: https://reviews.llvm.org/D60695

llvm-svn: 362066
---
 llvm/utils/TableGen/AsmMatcherEmitter.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 1229b65b24be2..146d10835b8db 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1072,8 +1072,9 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
   // handle, the target should be refactored to use operands instead of
   // modifiers.
   //
-  // Also, check for instructions which reference the operand multiple times;
-  // this implies a constraint we would not honor.
+  // Also, check for instructions which reference the operand multiple times,
+  // if they don't define a custom AsmMatcher: this implies a constraint that
+  // the built-in matching code would not honor.
   std::set<std::string> OperandNames;
   for (const AsmOperand &Op : AsmOperands) {
     StringRef Tok = Op.Token;
@@ -1083,7 +1084,8 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
                       "' not supported by asm matcher.  Mark isCodeGenOnly!");
     // Verify that any operand is only mentioned once.
     // We reject aliases and ignore instructions for now.
-    if (!IsAlias && Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+    if (!IsAlias && TheDef->getValueAsString("AsmMatchConverter").empty() &&
+        Tok[0] == '$' && !OperandNames.insert(Tok).second) {
       LLVM_DEBUG({
         errs() << "warning: '" << TheDef->getName() << "': "
                << "ignoring instruction with tied operand '"

From 9b8a0d7110e97db67f48368ee623d06cbb6075a5 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 08:03:02 +0000
Subject: [PATCH 0589/1176] asm goto: fix out-of-bounds read of Constraints
 after rC362045

When parsing goto labels, Names and Exprs are expanded but Constraints
is not, this may cause a out-of-bounds read later in:

// GCCAsmStmt::GCCAsmStmt
// `constraints` has only `NumExprs - NumLabels` elements
  Constraints = new (C) StringLiteral*[NumExprs];
  std::copy(constraints, constraints + NumExprs, Constraints);

llvm-svn: 362067
---
 clang/lib/Parse/ParseStmtAsm.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index 75f3ac396e1a4..e1c48da5f2ba3 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -846,6 +846,7 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
       ExprResult Res =
           Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD);
       Exprs.push_back(Res.get());
+      Constraints.emplace_back();
       NumLabels++;
       ConsumeToken();
       if (!TryConsumeToken(tok::comma))

From 5857bf5d1e5a5ffe5ae51a38514ee55495c0cc69 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 08:07:06 +0000
Subject: [PATCH 0590/1176] [ARM] Add an MVE execution domain

MVE architecturally specifies a 'beat' system in which a vector
instruction executed now will complete its actual operation over the
next four cycles, so it can overlap with the execution of the previous
and next MVE instruction.

This makes it generally an advantage to avoid moving values back and
forth between MVE registers and anywhere else, if there's any sensible
way to do the same processing in whatever register type the values
already occupied.

That's just what the 'execution domain' system is supposed to achieve.
So here we add a new execution domain which will contain all the MVE
vector instructions when they are added.

Patch by: Simon Tatham

Differential Revision: https://reviews.llvm.org/D60703

llvm-svn: 362068
---
 llvm/lib/Target/ARM/ARMInstrFormats.td         | 9 +++++----
 llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 00a72026c277d..b09aa8a97dfc3 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -120,14 +120,15 @@ def IndexModePost : IndexMode<2>;
 def IndexModeUpd  : IndexMode<3>;
 
 // Instruction execution domain.
-class Domain<bits<3> val> {
-  bits<3> Value = val;
+class Domain<bits<4> val> {
+  bits<4> Value = val;
 }
 def GenericDomain : Domain<0>;
 def VFPDomain     : Domain<1>; // Instructions in VFP domain only
 def NeonDomain    : Domain<2>; // Instructions in Neon domain only
 def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
 def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+def MVEDomain : Domain<8>; // Instructions in MVE and ARMv8.1m
 
 //===----------------------------------------------------------------------===//
 // ARM special operands.
@@ -324,8 +325,8 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
   let TSFlags{12-7} = Form;
   let TSFlags{13}    = isUnaryDataProc;
   let TSFlags{14}    = canXformTo16Bit;
-  let TSFlags{17-15} = D.Value;
-  let TSFlags{18}    = thumbArithFlagSetting;
+  let TSFlags{18-15} = D.Value;
+  let TSFlags{19}    = thumbArithFlagSetting;
 
   let Constraints = cstr;
   let Itinerary = itin;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 21a7f2dfb33d1..a654e645ba0ff 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -385,16 +385,17 @@ namespace ARMII {
     // instruction. Used by the parser to determine whether to require the 'S'
     // suffix on the mnemonic (when not in an IT block) or preclude it (when
     // in an IT block).
-    ThumbArithFlagSetting = 1 << 18,
+    ThumbArithFlagSetting = 1 << 19,
 
     //===------------------------------------------------------------------===//
     // Code domain.
     DomainShift   = 15,
-    DomainMask    = 7 << DomainShift,
+    DomainMask    = 15 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
     DomainNEON    = 2 << DomainShift,
     DomainNEONA8  = 4 << DomainShift,
+    DomainMVE     = 8 << DomainShift,
 
     //===------------------------------------------------------------------===//
     // Field shifts - such shifts are used to set field while generating

From 833dba01d9f2393f89ed4bd69e1b3b5c084285c3 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 30 May 2019 08:21:25 +0000
Subject: [PATCH 0591/1176] Make CompileUnit::GetSupportFiles return a const
 list

There's no reason for anyone to modify a list from outside of a symbol
file (as that would break a lot of invariants that symbol files depend
on).

Make the function return a const FileSpecList and fix up a couple of
places that were needlessly binding non-const references to the result
of this function.

llvm-svn: 362069
---
 lldb/include/lldb/Symbol/CompileUnit.h |  2 +-
 lldb/source/API/SBCompileUnit.cpp      | 14 ++++++--------
 lldb/source/Symbol/CompileUnit.cpp     |  4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h
index 9e14c99edb77d..c3ba2e2176e7a 100644
--- a/lldb/include/lldb/Symbol/CompileUnit.h
+++ b/lldb/include/lldb/Symbol/CompileUnit.h
@@ -232,7 +232,7 @@ class CompileUnit : public std::enable_shared_from_this<CompileUnit>,
   ///
   /// \return
   ///     A support file list object.
-  FileSpecList &GetSupportFiles();
+  const FileSpecList &GetSupportFiles();
 
   /// Get the compile unit's imported module list.
   ///
diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp
index 48b501043e1cd..c9ca70645d958 100644
--- a/lldb/source/API/SBCompileUnit.cpp
+++ b/lldb/source/API/SBCompileUnit.cpp
@@ -118,10 +118,9 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
 uint32_t SBCompileUnit::GetNumSupportFiles() const {
   LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBCompileUnit, GetNumSupportFiles);
 
-  if (m_opaque_ptr) {
-    FileSpecList &support_files = m_opaque_ptr->GetSupportFiles();
-    return support_files.GetSize();
-  }
+  if (m_opaque_ptr)
+    return m_opaque_ptr->GetSupportFiles().GetSize();
+
   return 0;
 }
 
@@ -155,9 +154,8 @@ SBFileSpec SBCompileUnit::GetSupportFileAtIndex(uint32_t idx) const {
 
   SBFileSpec sb_file_spec;
   if (m_opaque_ptr) {
-    FileSpecList &support_files = m_opaque_ptr->GetSupportFiles();
-    FileSpec file_spec = support_files.GetFileSpecAtIndex(idx);
-    sb_file_spec.SetFileSpec(file_spec);
+    FileSpec spec = m_opaque_ptr->GetSupportFiles().GetFileSpecAtIndex(idx);
+    sb_file_spec.SetFileSpec(spec);
   }
 
 
@@ -172,7 +170,7 @@ uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx,
                      sb_file, full);
 
   if (m_opaque_ptr) {
-    FileSpecList &support_files = m_opaque_ptr->GetSupportFiles();
+    const FileSpecList &support_files = m_opaque_ptr->GetSupportFiles();
     return support_files.FindFileIndex(start_idx, sb_file.ref(), full);
   }
   return 0;
diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp
index ba9fd2776f334..5fb9b6b9f7292 100644
--- a/lldb/source/Symbol/CompileUnit.cpp
+++ b/lldb/source/Symbol/CompileUnit.cpp
@@ -248,7 +248,7 @@ uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line,
     // All the line table entries actually point to the version of the Compile
     // Unit that is in the support files (the one at 0 was artificially added.)
     // So prefer the one further on in the support files if it exists...
-    FileSpecList &support_files = GetSupportFiles();
+    const FileSpecList &support_files = GetSupportFiles();
     const bool full = true;
     file_idx = support_files.FindFileIndex(
         1, support_files.GetFileSpecAtIndex(0), full);
@@ -397,7 +397,7 @@ const std::vector<SourceModule> &CompileUnit::GetImportedModules() {
   return m_imported_modules;
 }
 
-FileSpecList &CompileUnit::GetSupportFiles() {
+const FileSpecList &CompileUnit::GetSupportFiles() {
   if (m_support_files.GetSize() == 0) {
     if (m_flags.IsClear(flagsParsedSupportFiles)) {
       m_flags.Set(flagsParsedSupportFiles);

From 028413f5ae011f29e6a951e961087f529b984cbc Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 30 May 2019 08:25:17 +0000
Subject: [PATCH 0592/1176] [AArch64][SVE2] Asm: add ext (immediate offset,
 constructive) instruction

Summary:
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62518

llvm-svn: 362070
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  3 +
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 15 ++++
 llvm/test/MC/AArch64/SVE2/ext-diagnostics.s   | 84 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ext.s               | 20 +++++
 4 files changed, 122 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/ext-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ext.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index cffbf43f0868e..75d8909eb11ea 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1324,6 +1324,9 @@ let Predicates = [HasSVE2] in {
   // sve_int_rotate_imm
   defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
 
+  // SVE2 extract vector (immediate offset, constructive)
+  def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 2ab53c5acac3a..740201f769d9a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -874,6 +874,21 @@ class sve_int_perm_extract_i<string asm>
   let ElementSize = ElementSizeNone;
 }
 
+class sve2_int_perm_extract_i_cons<string asm>
+: I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8),
+  asm, "\t$Zd, $Zn, $imm8",
+  "", []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zn;
+  bits<8> imm8;
+  let Inst{31-21} = 0b00000101011;
+  let Inst{20-16} = imm8{7-3};
+  let Inst{15-13} = 0b000;
+  let Inst{12-10} = imm8{2-0};
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Vector Select Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s
new file mode 100644
index 0000000000000..be1ac2ebc6d11
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s
@@ -0,0 +1,84 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element widths.
+
+ext z0.h, { z1.h, z2.h }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ext z0.h, { z1.h, z2.h }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.s, { z1.s, z2.s }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ext z0.s, { z1.s, z2.s }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.d, { z1.d, z2.d }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ext z0.d, { z1.d, z2.d }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Invalid immediate range.
+
+ext z0.b, { z1.b, z2.b }, #-1
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255].
+// CHECK-NEXT: ext z0.b, { z1.b, z2.b }, #-1
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { z1.b, z2.b }, #256
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 255].
+// CHECK-NEXT: ext z0.b, { z1.b, z2.b }, #256
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ext z0.b, { }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ext z0.b, { }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { z1.b }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ext z0.b, { z1.b }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { z1.b, z2.b, z3.b }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ext z0.b, { z1.b, z2.b, z3.b }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { z1.b, z2.h }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: ext z0.b, { z1.b, z2.h }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { z1.b, z31.b }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: ext z0.b, { z1.b, z31.b }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ext z0.b, { v0.4b, v1.4b }, #0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ext z0.b, { v0.4b, v1.4b }, #0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+ext z31.b, { z30.b, z31.b }, #255
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ext z31.b, { z30.b, z31.b }, #255
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.b, p0/z, z6.b
+ext z31.b, { z30.b, z31.b }, #255
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ext z31.b, { z30.b, z31.b }, #255
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ext.s b/llvm/test/MC/AArch64/SVE2/ext.s
new file mode 100644
index 0000000000000..17e8249127626
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ext.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ext z0.b, { z1.b, z2.b }, #0
+// CHECK-INST: ext z0.b, { z1.b, z2.b }, #0
+// CHECK-ENCODING: [0x20,0x00,0x60,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 00 60 05 <unknown>
+
+ext z31.b, { z30.b, z31.b }, #255
+// CHECK-INST: ext z31.b, { z30.b, z31.b }, #255
+// CHECK-ENCODING: [0xdf,0x1f,0x7f,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 1f 7f 05 <unknown>

From 455c529f77ac7c7b46322069f747a558a8fd04c1 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 30 May 2019 08:35:12 +0000
Subject: [PATCH 0593/1176] [AArch64][SVE2] Asm: support FCVTX/FLOGB
 instructions

Summary:

Patch completes SVE2 support for:

    SVE Floating Point Unary Operations - Predicated Group

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62526

llvm-svn: 362071
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  6 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  6 +++
 llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s | 43 +++++++++++++++
 llvm/test/MC/AArch64/SVE2/fcvtx.s             | 50 +++++++++++++++++
 llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s | 28 ++++++++++
 llvm/test/MC/AArch64/SVE2/flogb.s             | 54 +++++++++++++++++++
 6 files changed, 187 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/fcvtx.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/flogb.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 75d8909eb11ea..7eacf5fca62e7 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1327,6 +1327,12 @@ let Predicates = [HasSVE2] in {
   // SVE2 extract vector (immediate offset, constructive)
   def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
 
+  // SVE floating-point convert precision
+  def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
+
+  // SVE floating-point convert to integer
+  defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 740201f769d9a..8cf4db7733a87 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1676,6 +1676,12 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
   def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
 }
 
+multiclass sve2_fp_flogb<string asm> {
+  def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
+  def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
+  def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Unary Operations - Unpredicated Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
new file mode 100644
index 0000000000000..4c6ee7ad4c2dc
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
@@ -0,0 +1,43 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvtx    z0.b, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx    z0.b, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx    z0.h, p0/m, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx    z0.h, p0/m, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx    z0.s, p0/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx    z0.s, p0/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx    z0.d, p0/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx    z0.d, p0/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+fcvtx   z0.s, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fcvtx   z0.s, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// error: restricted predicate has range [0, 7].
+
+fcvtx    z0.s, p8/m, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: fcvtx    z0.s, p8/m, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtx.s b/llvm/test/MC/AArch64/SVE2/fcvtx.s
new file mode 100644
index 0000000000000..55d60cfeb10de
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fcvtx.s
@@ -0,0 +1,50 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+
+fcvtx    z0.s, p0/m, z0.d
+// CHECK-INST: fcvtx    z0.s, p0/m, z0.d
+// CHECK-ENCODING: [0x00,0xa0,0x0a,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 00 a0 0a 65 <unknown>
+
+fcvtx    z30.s, p7/m, z31.d
+// CHECK-INST: fcvtx    z30.s, p7/m, z31.d
+// CHECK-ENCODING: [0xfe,0xbf,0x0a,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: fe bf 0a 65 <unknown>
+
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z5.d, p0/z, z7.d
+// CHECK-INST: movprfx	z5.d, p0/z, z7.d
+// CHECK-ENCODING: [0xe5,0x20,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 20 d0 04 <unknown>
+
+fcvtx    z5.s, p0/m, z0.d
+// CHECK-INST: fcvtx	z5.s, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0x0a,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 05 a0 0a 65 <unknown>
+
+movprfx z5, z7
+// CHECK-INST: movprfx	z5, z7
+// CHECK-ENCODING: [0xe5,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: e5 bc 20 04 <unknown>
+
+fcvtx    z5.s, p0/m, z0.d
+// CHECK-INST: fcvtx	z5.s, p0/m, z0.d
+// CHECK-ENCODING: [0x05,0xa0,0x0a,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 05 a0 0a 65 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
new file mode 100644
index 0000000000000..76608787c37a4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
@@ -0,0 +1,28 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+flogb   z0.b, p0/m, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: flogb   z0.b, p0/m, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+flogb   z0.s, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: flogb   z0.s, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+flogb   z0.s, p8/m, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7]
+// CHECK-NEXT: flogb   z0.s, p8/m, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/flogb.s b/llvm/test/MC/AArch64/SVE2/flogb.s
new file mode 100644
index 0000000000000..4ef28f0859a39
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/flogb.s
@@ -0,0 +1,54 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+flogb    z31.h, p7/m, z31.h
+// CHECK-INST: flogb	z31.h, p7/m, z31.h
+// CHECK-ENCODING: [0xff,0xbf,0x1a,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1a 65 <unknown>
+
+flogb    z31.s, p7/m, z31.s
+// CHECK-INST: flogb	z31.s, p7/m, z31.s
+// CHECK-ENCODING: [0xff,0xbf,0x1c,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1c 65 <unknown>
+
+flogb    z31.d, p7/m, z31.d
+// CHECK-INST: flogb	z31.d, p7/m, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0x1e,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1e 65 <unknown>
+
+
+// --------------------------------------------------------------------------//
+// Test compatibility with MOVPRFX instruction.
+
+movprfx z4.d, p7/z, z6.d
+// CHECK-INST: movprfx	z4.d, p7/z, z6.d
+// CHECK-ENCODING: [0xc4,0x3c,0xd0,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 3c d0 04 <unknown>
+
+flogb    z4.d, p7/m, z31.d
+// CHECK-INST: flogb	z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0x1e,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e4 bf 1e 65 <unknown>
+
+movprfx z4, z6
+// CHECK-INST: movprfx	z4, z6
+// CHECK-ENCODING: [0xc4,0xbc,0x20,0x04]
+// CHECK-ERROR: instruction requires: sve
+// CHECK-UNKNOWN: c4 bc 20 04 <unknown>
+
+flogb    z4.d, p7/m, z31.d
+// CHECK-INST: flogb	z4.d, p7/m, z31.d
+// CHECK-ENCODING: [0xe4,0xbf,0x1e,0x65]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: e4 bf 1e 65 <unknown>

From ebe23041f087cb9decc7db12b256cf4fab48cb72 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 30 May 2019 08:44:27 +0000
Subject: [PATCH 0594/1176] [AArch64][SVE2] Asm: support SVE2 load instructions

Summary:
Patch adds support for the following instructions:
    * LDNT1SB, LDNT1B, LDNT1SH, LDNT1H, LDNT1SW, LDNT1W, LDNT1D

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62528

llvm-svn: 362072
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 15 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 40 ++++++++
 llvm/test/MC/AArch64/SVE/ldnt1b-diagnostics.s |  4 +-
 .../test/MC/AArch64/SVE2/ldnt1b-diagnostics.s | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1b.s            | 80 ++++++++++++++++
 .../test/MC/AArch64/SVE2/ldnt1d-diagnostics.s | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1d.s            | 44 +++++++++
 .../test/MC/AArch64/SVE2/ldnt1h-diagnostics.s | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1h.s            | 80 ++++++++++++++++
 .../MC/AArch64/SVE2/ldnt1sb-diagnostics.s     | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1sb.s           | 80 ++++++++++++++++
 .../MC/AArch64/SVE2/ldnt1sh-diagnostics.s     | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1sh.s           | 80 ++++++++++++++++
 .../MC/AArch64/SVE2/ldnt1sw-diagnostics.s     | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1sw.s           | 44 +++++++++
 .../test/MC/AArch64/SVE2/ldnt1w-diagnostics.s | 91 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/ldnt1w.s            | 80 ++++++++++++++++
 17 files changed, 1182 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1b-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1b.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1d-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1d.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1h-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1h.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sb-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sb.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sh-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sh.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sw-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1sw.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1w-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/ldnt1w.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7eacf5fca62e7..82aab630fa746 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1333,6 +1333,21 @@ let Predicates = [HasSVE2] in {
   // SVE floating-point convert to integer
   defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
 
+  // Non-temporal contiguous loads (vector + register)
+  defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
+  defm LDNT1B_ZZR_S  : sve2_mem_cldnt_vs<0b00001, "ldnt1b",  Z_s, ZPR32>;
+  defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
+  defm LDNT1H_ZZR_S  : sve2_mem_cldnt_vs<0b00101, "ldnt1h",  Z_s, ZPR32>;
+  defm LDNT1W_ZZR_S  : sve2_mem_cldnt_vs<0b01001, "ldnt1w",  Z_s, ZPR32>;
+
+  defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
+  defm LDNT1B_ZZR_D  : sve2_mem_cldnt_vs<0b10010, "ldnt1b",  Z_d, ZPR64>;
+  defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
+  defm LDNT1H_ZZR_D  : sve2_mem_cldnt_vs<0b10110, "ldnt1h",  Z_d, ZPR64>;
+  defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
+  defm LDNT1W_ZZR_D  : sve2_mem_cldnt_vs<0b11010, "ldnt1w",  Z_d, ZPR64>;
+  defm LDNT1D_ZZR_D  : sve2_mem_cldnt_vs<0b11110, "ldnt1d",  Z_d, ZPR64>;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8cf4db7733a87..e9d86e4c22ef8 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4974,6 +4974,46 @@ multiclass sve_mem_p_fill<string asm> {
                   (!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
 }
 
+class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
+                             RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+  asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Rm;
+  bits<5> Zn;
+  bits<5> Zt;
+  let Inst{31}    = 0b1;
+  let Inst{30}    = opc{4};
+  let Inst{29-25} = 0b00010;
+  let Inst{24-23} = opc{3-2};
+  let Inst{22-21} = 0b00;
+  let Inst{20-16} = Rm;
+  let Inst{15}    = 0b1;
+  let Inst{14-13} = opc{1-0};
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zt;
+
+  let mayLoad = 1;
+}
+
+multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
+                             RegisterOperand listty, ZPRRegOp zprty> {
+  def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+                                     asm, listty>;
+
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Memory - 64-bit Gather Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/ldnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
index b02f633f5ca14..0379bb27e1c48 100644
--- a/llvm/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/ldnt1b-diagnostics.s
@@ -23,12 +23,12 @@ ldnt1b z0.h, p0/z, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 ldnt1b z0.s, p0/z, [x0]
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // CHECK-NEXT: ldnt1b z0.s, p0/z, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 ldnt1b z0.d, p0/z, [x0]
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // CHECK-NEXT: ldnt1b z0.d, p0/z, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1b-diagnostics.s
new file mode 100644
index 0000000000000..6f50516a6e8bf
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1b-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1b { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1b { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1b { z0.s }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1b { z0.s }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1b { z0.d }, p0/z, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1b { z0.d }, p0/z, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1b { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1b { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1b { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1b { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1b { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1b { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1b { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1b { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1b { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1b  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1b  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1b  { z0.s }, p0/z, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1b  { z0.s }, p0/z, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1b.s b/llvm/test/MC/AArch64/SVE2/ldnt1b.s
new file mode 100644
index 0000000000000..71f1ae6a513e8
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1b.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1b z0.s, p0/z, [z1.s]
+// CHECK-INST: ldnt1b { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 1f 84 <unknown>
+
+ldnt1b z31.s, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1b { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1f 84 <unknown>
+
+ldnt1b z31.s, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1b { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x00,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 00 84 <unknown>
+
+ldnt1b z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1b { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 1f c4 <unknown>
+
+ldnt1b z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1b { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 1f c4 <unknown>
+
+ldnt1b z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1b { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x00,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 00 c4 <unknown>
+
+ldnt1b { z0.s }, p0/z, [z1.s]
+// CHECK-INST: ldnt1b { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 1f 84 <unknown>
+
+ldnt1b { z31.s }, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1b { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1f 84 <unknown>
+
+ldnt1b { z31.s }, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1b { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x00,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 00 84 <unknown>
+
+ldnt1b { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1b { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 1f c4 <unknown>
+
+ldnt1b { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1b { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 1f c4 <unknown>
+
+ldnt1b { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1b { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x00,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 00 c4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1d-diagnostics.s
new file mode 100644
index 0000000000000..15381a64f86cf
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1d-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1d { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1d { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1d { z0.s }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.s }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1d { z0.d }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1d { z0.d }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1d { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1d { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1d { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1d { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1d { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1d { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1d { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1d { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1d { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1d  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1d  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1d  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1d  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1d.s b/llvm/test/MC/AArch64/SVE2/ldnt1d.s
new file mode 100644
index 0000000000000..c83c0c2425d1b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1d.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1d z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1d { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x9f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 9f c5 <unknown>
+
+ldnt1d z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1d { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x9f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 9f c5 <unknown>
+
+ldnt1d z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1d { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x80,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 80 c5 <unknown>
+
+ldnt1d { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1d { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x9f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 9f c5 <unknown>
+
+ldnt1d { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1d { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x9f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 9f c5 <unknown>
+
+ldnt1d { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1d { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x80,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 80 c5 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1h-diagnostics.s
new file mode 100644
index 0000000000000..f73d6a344e6fa
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1h-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1h { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1h { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1h { z0.s }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1h { z0.s }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1h { z0.d }, p0/z, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1h { z0.d }, p0/z, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1h { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1h { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1h { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1h { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1h { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1h { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1h { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1h { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1h { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1h  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1h  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1h  { z0.s }, p0/z, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1h  { z0.s }, p0/z, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1h.s b/llvm/test/MC/AArch64/SVE2/ldnt1h.s
new file mode 100644
index 0000000000000..419bb43dad78b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1h.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1h z0.s, p0/z, [z1.s]
+// CHECK-INST: ldnt1h { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 9f 84 <unknown>
+
+ldnt1h z31.s, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1h { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 9f 84 <unknown>
+
+ldnt1h z31.s, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1h { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x80,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 80 84 <unknown>
+
+ldnt1h z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1h { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 9f c4 <unknown>
+
+ldnt1h z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1h { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 9f c4 <unknown>
+
+ldnt1h z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1h { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x80,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 80 c4 <unknown>
+
+ldnt1h { z0.s }, p0/z, [z1.s]
+// CHECK-INST: ldnt1h { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 9f 84 <unknown>
+
+ldnt1h { z31.s }, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1h { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 9f 84 <unknown>
+
+ldnt1h { z31.s }, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1h { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x80,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 80 84 <unknown>
+
+ldnt1h { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1h { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 9f c4 <unknown>
+
+ldnt1h { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1h { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 9f c4 <unknown>
+
+ldnt1h { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1h { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x80,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 80 c4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1sb-diagnostics.s
new file mode 100644
index 0000000000000..a679e40798650
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sb-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1sb { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sb { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1sb { z0.s }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1sb { z0.s }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sb { z0.d }, p0/z, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1sb { z0.d }, p0/z, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1sb { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1sb { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1sb { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1sb { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1sb { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1sb { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sb { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sb { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sb { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1sb  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sb  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1sb  { z0.s }, p0/z, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sb  { z0.s }, p0/z, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sb.s b/llvm/test/MC/AArch64/SVE2/ldnt1sb.s
new file mode 100644
index 0000000000000..cb7ae1e6d4461
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sb.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1sb z0.s, p0/z, [z1.s]
+// CHECK-INST: ldnt1sb { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f 84 <unknown>
+
+ldnt1sb z31.s, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1sb { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f 84 <unknown>
+
+ldnt1sb z31.s, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 84 <unknown>
+
+ldnt1sb z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1sb { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f c4 <unknown>
+
+ldnt1sb z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sb { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f c4 <unknown>
+
+ldnt1sb z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 c4 <unknown>
+
+ldnt1sb { z0.s }, p0/z, [z1.s]
+// CHECK-INST: ldnt1sb { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f 84 <unknown>
+
+ldnt1sb { z31.s }, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1sb { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f 84 <unknown>
+
+ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1sb { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 84 <unknown>
+
+ldnt1sb { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1sb { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f c4 <unknown>
+
+ldnt1sb { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sb { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f c4 <unknown>
+
+ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sb { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 c4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sh-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1sh-diagnostics.s
new file mode 100644
index 0000000000000..07b1f69236f87
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sh-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1sh { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sh { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1sh { z0.s }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1sh { z0.s }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sh { z0.d }, p0/z, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1sh { z0.d }, p0/z, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1sh { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1sh { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1sh { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1sh { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1sh { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1sh { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sh { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sh { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sh { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1sh  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sh  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1sh  { z0.s }, p0/z, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sh  { z0.s }, p0/z, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sh.s b/llvm/test/MC/AArch64/SVE2/ldnt1sh.s
new file mode 100644
index 0000000000000..7d8f4d228d5ae
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sh.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1sh z0.s, p0/z, [z1.s]
+// CHECK-INST: ldnt1sh { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0x80,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 9f 84 <unknown>
+
+ldnt1sh z31.s, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1sh { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0x9f,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 9f 84 <unknown>
+
+ldnt1sh z31.s, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x80,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 80 84 <unknown>
+
+ldnt1sh z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1sh { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 9f c4 <unknown>
+
+ldnt1sh z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sh { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 9f c4 <unknown>
+
+ldnt1sh z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x80,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 80 c4 <unknown>
+
+ldnt1sh { z0.s }, p0/z, [z1.s]
+// CHECK-INST: ldnt1sh { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0x80,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 9f 84 <unknown>
+
+ldnt1sh { z31.s }, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1sh { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0x9f,0x9f,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 9f 84 <unknown>
+
+ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1sh { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x80,0x84]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 80 84 <unknown>
+
+ldnt1sh { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1sh { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 9f c4 <unknown>
+
+ldnt1sh { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sh { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x9f,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 9f c4 <unknown>
+
+ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sh { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x80,0xc4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 80 c4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sw-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1sw-diagnostics.s
new file mode 100644
index 0000000000000..a86bc5a8d9f6a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sw-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1sw { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sw { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sw { z0.s }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.s }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1sw { z0.d }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1sw { z0.d }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1sw { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1sw { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1sw { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1sw { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1sw { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1sw { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sw { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1sw { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1sw { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1sw  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sw  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1sw  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1sw  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1sw.s b/llvm/test/MC/AArch64/SVE2/ldnt1sw.s
new file mode 100644
index 0000000000000..52b474739e55f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1sw.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1sw z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1sw { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f c5 <unknown>
+
+ldnt1sw z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sw { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f c5 <unknown>
+
+ldnt1sw z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 c5 <unknown>
+
+ldnt1sw { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1sw { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0x80,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 80 1f c5 <unknown>
+
+ldnt1sw { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1sw { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0x9f,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 1f c5 <unknown>
+
+ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1sw { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x9f,0x00,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 9f 00 c5 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ldnt1w-diagnostics.s
new file mode 100644
index 0000000000000..b393f40520db1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1w-diagnostics.s
@@ -0,0 +1,91 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+ldnt1w { z0.b }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { z0.b }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1w { z0.h }, p0/z, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { z0.h }, p0/z, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+ldnt1w { z0.s }, p0/z, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1w { z0.s }, p0/z, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1w { z0.d }, p0/z, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ldnt1w { z0.d }, p0/z, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+ldnt1w { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { z0.d }, p0/z, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+ldnt1w { z0.d }, p0/m, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { z0.d }, p0/m, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+ldnt1w { z27.d }, p8/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: ldnt1w { z27.d }, p8/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+ldnt1w { }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: ldnt1w { }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1w { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { z0.d, z1.d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ldnt1w { v0.2d }, p0/z, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: ldnt1w { v0.2d }, p0/z, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ldnt1w  { z0.d }, p0/z, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1w  { z0.d }, p0/z, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ldnt1w  { z0.s }, p0/z, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ldnt1w  { z0.s }, p0/z, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/ldnt1w.s b/llvm/test/MC/AArch64/SVE2/ldnt1w.s
new file mode 100644
index 0000000000000..89191303d3e73
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/ldnt1w.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+ldnt1w z0.s, p0/z, [z1.s]
+// CHECK-INST: ldnt1w { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x1f,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 1f 85 <unknown>
+
+ldnt1w z31.s, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1w { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x1f,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1f 85 <unknown>
+
+ldnt1w z31.s, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1w { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x00,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 00 85 <unknown>
+
+ldnt1w z0.d, p0/z, [z1.d]
+// CHECK-INST: ldnt1w { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 1f c5 <unknown>
+
+ldnt1w z31.d, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1w { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 1f c5 <unknown>
+
+ldnt1w z31.d, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1w { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x00,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 00 c5 <unknown>
+
+ldnt1w { z0.s }, p0/z, [z1.s]
+// CHECK-INST: ldnt1w { z0.s }, p0/z, [z1.s]
+// CHECK-ENCODING: [0x20,0xa0,0x1f,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 a0 1f 85 <unknown>
+
+ldnt1w { z31.s }, p7/z, [z31.s, xzr]
+// CHECK-INST: ldnt1w { z31.s }, p7/z, [z31.s]
+// CHECK-ENCODING: [0xff,0xbf,0x1f,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 1f 85 <unknown>
+
+ldnt1w { z31.s }, p7/z, [z31.s, x0]
+// CHECK-INST: ldnt1w { z31.s }, p7/z, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0xbf,0x00,0x85]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff bf 00 85 <unknown>
+
+ldnt1w { z0.d }, p0/z, [z1.d]
+// CHECK-INST: ldnt1w { z0.d }, p0/z, [z1.d]
+// CHECK-ENCODING: [0x20,0xc0,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 c0 1f c5 <unknown>
+
+ldnt1w { z31.d }, p7/z, [z31.d, xzr]
+// CHECK-INST: ldnt1w { z31.d }, p7/z, [z31.d]
+// CHECK-ENCODING: [0xff,0xdf,0x1f,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 1f c5 <unknown>
+
+ldnt1w { z31.d }, p7/z, [z31.d, x0]
+// CHECK-INST: ldnt1w { z31.d }, p7/z, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0xdf,0x00,0xc5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff df 00 c5 <unknown>

From 7fad4289319ada61b44a8ffa3017fff960d2b72d Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 30 May 2019 08:51:39 +0000
Subject: [PATCH 0595/1176] [AArch64][SVE2] Asm: support SVE2 vector splice
 (constructive)

Summary:
The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: SjoerdMeijer

Differential Revision: https://reviews.llvm.org/D62530

llvm-svn: 362073
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  3 +
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 24 ++++++
 .../test/MC/AArch64/SVE2/splice-diagnostics.s | 83 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/splice.s            | 32 +++++++
 4 files changed, 142 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/splice-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/splice.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 82aab630fa746..e89dc90541561 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1348,6 +1348,9 @@ let Predicates = [HasSVE2] in {
   defm LDNT1W_ZZR_D  : sve2_mem_cldnt_vs<0b11010, "ldnt1w",  Z_d, ZPR64>;
   defm LDNT1D_ZZR_D  : sve2_mem_cldnt_vs<0b11110, "ldnt1d",  Z_d, ZPR64>;
 
+  // SVE2 vector splice (constructive)
+  defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+
   // Predicated shifts
   defm SQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e9d86e4c22ef8..f22995489690f 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4275,6 +4275,30 @@ multiclass sve_int_perm_splice<string asm> {
   def _D : sve_int_perm_splice<0b11, asm, ZPR64>;
 }
 
+class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
+                               ZPRRegOp zprty, RegisterOperand VecList>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, VecList:$Zn),
+  asm, "\t$Zd, $Pg, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zn;
+  bits<5> Zd;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21-13} = 0b101101100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_perm_splice_cons<string asm> {
+  def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8,  ZZ_b>;
+  def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
+  def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
+  def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
+}
+
 class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
                        ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
diff --git a/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s b/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s
new file mode 100644
index 0000000000000..caab6e1d03ead
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s
@@ -0,0 +1,83 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element widths.
+
+splice z0.b, p0, { z1.h, z2.h }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: splice z0.b, p0, { z1.h, z2.h }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+splice z0.b, p0, { }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: splice z0.b, p0, { }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0, { z1.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: splice z0.b, p0, { z1.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0, { z1.b, z2.b, z3.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b, z3.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0, { z1.b, z2.h }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: splice z0.b, p0, { z1.b, z2.h }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0, { z1.b, z31.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: splice z0.b, p0, { z1.b, z31.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0, { v0.4b, v1.4b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: splice z0.b, p0, { v0.4b, v1.4b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+splice z0.b, p0/z, { z1.b, z2.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: splice z0.b, p0/z, { z1.b, z2.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+splice z0.b, p0/m, { z1.b, z2.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: splice z0.b, p0/m, { z1.b, z2.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+splice z0.b, p8, { z1.b, z2.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7]
+// CHECK-NEXT: splice z0.b, p8, { z1.b, z2.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+splice z31.b, p0, { z30.b, z31.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: splice z31.b, p0, { z30.b, z31.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.b, p0/z, z6.b
+splice z31.b, p0, { z30.b, z31.b }
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: splice z31.b, p0, { z30.b, z31.b }
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/splice.s b/llvm/test/MC/AArch64/SVE2/splice.s
new file mode 100644
index 0000000000000..4bc03e3bd38f5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/splice.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+splice  z29.b, p7, { z30.b, z31.b }
+// CHECK-INST: splice  z29.b, p7, { z30.b, z31.b }
+// CHECK-ENCODING: [0xdd,0x9f,0x2d,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 2d 05 <unknown>
+
+splice  z29.h, p7, { z30.h, z31.h }
+// CHECK-INST: splice  z29.h, p7, { z30.h, z31.h }
+// CHECK-ENCODING: [0xdd,0x9f,0x6d,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f 6d 05 <unknown>
+
+splice  z29.s, p7, { z30.s, z31.s }
+// CHECK-INST: splice  z29.s, p7, { z30.s, z31.s }
+// CHECK-ENCODING: [0xdd,0x9f,0xad,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f ad 05 <unknown>
+
+splice  z29.d, p7, { z30.d, z31.d }
+// CHECK-INST: splice  z29.d, p7, { z30.d, z31.d }
+// CHECK-ENCODING: [0xdd,0x9f,0xed,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: dd 9f ed 05 <unknown>

From fa147c97d6ba1a59fe885a193a40a274158f38da Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Thu, 30 May 2019 09:09:01 +0000
Subject: [PATCH 0596/1176] [llvm-objcopy] Remove %p format specifiers

On 32-bit machines %p expects 32 bit values, however
addresses in llvm-objcopy are always 64 bits.

llvm-svn: 362074
---
 llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test | 6 +++---
 llvm/tools/llvm-objcopy/ELF/Object.cpp            | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
index 9275dddff1a52..f9c9155e4a46b 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/ihex-writer.test
@@ -67,8 +67,8 @@
 # SIGN_EXTENDED-NEXT: :051000000001020304E1
 # SIGN_EXTENDED-NEXT: :00000001FF
 
-# BAD-ADDR: error: {{.*}}: Section '.text2' address range [{{.*}}, {{.*}}] is not 32 bit
-# BAD-ADDR2: error: {{.*}}: Section '.text3' address range [{{.*}}, {{.*}}] is not 32 bit
+# BAD-ADDR: error: {{.*}}: Section '.text2' address range [0x{{.*}}, 0x{{.*}}] is not 32 bit
+# BAD-ADDR2: error: {{.*}}: Section '.text3' address range [0x{{.*}}, 0x{{.*}}] is not 32 bit
 
 # There shouldn't be 'ExtendedAddr' nor 'Data' records
 # ZERO_SIZE_SEC-NOT:  :02000004
@@ -78,4 +78,4 @@
 # START1: :040000030000FFFFFB
 # START2: :0400000500100000E7
 # START3: :040000058000100067
-# BAD-START: error: {{.*}}: Entry point address {{.*}} overflows 32 bits
+# BAD-START: error: {{.*}}: Entry point address 0x{{.*}} overflows 32 bits
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index a43fab28e8821..e70a3b1fde138 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -2019,7 +2019,7 @@ Error IHexWriter::checkSection(const SectionBase &Sec) {
   if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
     return createStringError(
         errc::invalid_argument,
-        "Section '%s' address range [%p, %p] is not 32 bit", Sec.Name.c_str(),
+        "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", Sec.Name.c_str(),
         Addr, Addr + Sec.Size - 1);
   return Error::success();
 }
@@ -2036,7 +2036,7 @@ Error IHexWriter::finalize() {
   // We can't write 64-bit addresses.
   if (addressOverflows32bit(Obj.Entry))
     return createStringError(errc::invalid_argument,
-                             "Entry point address %p overflows 32 bits.",
+                             "Entry point address 0x%llx overflows 32 bits.",
                              Obj.Entry);
 
   // If any section we're to write has segment then we

From 7e4d62a8dd41f8e87a62887d89e34055dbb1a3e4 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 30 May 2019 09:39:36 +0000
Subject: [PATCH 0597/1176] DWARFASTParserClang: Move attribute parsing into a
 single function

Summary:
The ParseTypeFromDWARF function consists of a huge switch on the kind of
type being parsed. Each case in this switch starts with parsing the
attributes of the current DIE. A lot of these attributes are specific to
one kind of a type, but a lot of them are common too, leading to code
duplication.

This patch reduces the duplication (and the size of ParseTypeFromDWARF)
by moving the attribute parsing to a separate function. It creates a
struct (ParsedTypeAttributes), which contains a parsed form of all
attributes which are useful for parsing any kind of a type. The parsing
code for a specific type kind can then access the fields which are
relevant for that specific case.

Reviewers: JDevlieghere, clayborg, aprantl

Subscribers: jdoerfert, lldb-commits

Differential Revision: https://reviews.llvm.org/D62477

llvm-svn: 362075
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 1061 +++++++----------
 1 file changed, 427 insertions(+), 634 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index a260684285d40..decabb435dc7b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -228,13 +228,169 @@ static void CompleteExternalTagDeclType(ClangASTImporter &ast_importer,
   }
 }
 
+namespace {
+/// Parsed form of all attributes that are relevant for type reconstruction.
+/// Some attributes are relevant for all kinds of types (declaration), while
+/// others are only meaningful to a specific type (is_virtual)
+struct ParsedTypeAttributes {
+  ParsedTypeAttributes(const DWARFDIE &die, CompileUnit &comp_unit);
+
+  AccessType accessibility = eAccessNone;
+  bool is_artificial = false;
+  bool is_complete_objc_class = false;
+  bool is_explicit = false;
+  bool is_forward_declaration = false;
+  bool is_inline = false;
+  bool is_scoped_enum = false;
+  bool is_vector = false;
+  bool is_virtual = false;
+  clang::StorageClass storage = clang::SC_None;
+  const char *mangled_name = nullptr;
+  ConstString name;
+  Declaration decl;
+  DWARFDIE object_pointer;
+  DWARFFormValue abstract_origin;
+  DWARFFormValue containing_type;
+  DWARFFormValue signature;
+  DWARFFormValue specification;
+  DWARFFormValue type;
+  LanguageType class_language = eLanguageTypeUnknown;
+  llvm::Optional<uint64_t> byte_size;
+  size_t calling_convention = llvm::dwarf::DW_CC_normal;
+  uint32_t bit_stride = 0;
+  uint32_t byte_stride = 0;
+  uint32_t encoding = 0;
+};
+} // namespace
+
+ParsedTypeAttributes::ParsedTypeAttributes(const DWARFDIE &die,
+                                           CompileUnit &comp_unit) {
+  DWARFAttributes attributes;
+  size_t num_attributes = die.GetAttributes(attributes);
+  for (size_t i = 0; i < num_attributes; ++i) {
+    dw_attr_t attr = attributes.AttributeAtIndex(i);
+    DWARFFormValue form_value;
+    if (!attributes.ExtractFormValueAtIndex(i, form_value))
+      continue;
+    switch (attr) {
+    case DW_AT_abstract_origin:
+      abstract_origin = form_value;
+      break;
+
+    case DW_AT_accessibility:
+      accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
+      break;
+
+    case DW_AT_artificial:
+      is_artificial = form_value.Boolean();
+      break;
+
+    case DW_AT_bit_stride:
+      bit_stride = form_value.Unsigned();
+      break;
+
+    case DW_AT_byte_size:
+      byte_size = form_value.Unsigned();
+      break;
+
+    case DW_AT_byte_stride:
+      byte_stride = form_value.Unsigned();
+      break;
+
+    case DW_AT_calling_convention:
+      calling_convention = form_value.Unsigned();
+      break;
+
+    case DW_AT_containing_type:
+      containing_type = form_value;
+      break;
+
+    case DW_AT_decl_file:
+      decl.SetFile(comp_unit.GetSupportFiles().GetFileSpecAtIndex(
+          form_value.Unsigned()));
+      break;
+    case DW_AT_decl_line:
+      decl.SetLine(form_value.Unsigned());
+      break;
+    case DW_AT_decl_column:
+      decl.SetColumn(form_value.Unsigned());
+      break;
+
+    case DW_AT_declaration:
+      is_forward_declaration = form_value.Boolean();
+      break;
+
+    case DW_AT_encoding:
+      encoding = form_value.Unsigned();
+      break;
+
+    case DW_AT_enum_class:
+      is_scoped_enum = form_value.Boolean();
+      break;
+
+    case DW_AT_explicit:
+      is_explicit = form_value.Boolean();
+      break;
+
+    case DW_AT_external:
+      if (form_value.Unsigned())
+        storage = clang::SC_Extern;
+      break;
+
+    case DW_AT_inline:
+      is_inline = form_value.Boolean();
+      break;
+
+    case DW_AT_linkage_name:
+    case DW_AT_MIPS_linkage_name:
+      mangled_name = form_value.AsCString();
+      break;
+
+    case DW_AT_name:
+      name.SetCString(form_value.AsCString());
+      break;
+
+    case DW_AT_object_pointer:
+      object_pointer = form_value.Reference();
+      break;
+
+    case DW_AT_signature:
+      signature = form_value;
+      break;
+
+    case DW_AT_specification:
+      specification = form_value;
+      break;
+
+    case DW_AT_type:
+      type = form_value;
+      break;
+
+    case DW_AT_virtuality:
+      is_virtual = form_value.Boolean();
+      break;
+
+    case DW_AT_APPLE_objc_complete_type:
+      is_complete_objc_class = form_value.Signed();
+      break;
+
+    case DW_AT_APPLE_runtime_class:
+      class_language = (LanguageType)form_value.Signed();
+      break;
+
+    case DW_AT_GNU_vector:
+      is_vector = form_value.Boolean();
+      break;
+    }
+  }
+}
+
 TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                                                const DWARFDIE &die, Log *log,
                                                bool *type_is_new_ptr) {
   if (type_is_new_ptr)
     *type_is_new_ptr = false;
 
-  AccessType accessibility = eAccessNone;
   if (!die)
     return nullptr;
   SymbolFileDWARF *dwarf = die.GetDWARF();
@@ -260,8 +416,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   // Set a bit that lets us know that we are currently parsing this
   dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED;
 
-  if (DWARFDIE signature_die =
-          die.GetAttributeValueAsReferenceDIE(DW_AT_signature)) {
+  ParsedTypeAttributes attrs(die, *sc.comp_unit);
+
+  if (DWARFDIE signature_die = attrs.signature.Reference()) {
     if (TypeSP type_sp =
             ParseTypeFromDWARF(sc, signature_die, log, type_is_new_ptr)) {
       dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
@@ -279,20 +436,11 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
 
   const dw_tag_t tag = die.Tag();
 
-  bool is_forward_declaration = false;
-  DWARFAttributes attributes;
-  const char *type_name_cstr = NULL;
-  const char *mangled_name_cstr = NULL;
-  ConstString type_name_const_str;
   Type::ResolveState resolve_state = Type::eResolveStateUnresolved;
-  llvm::Optional<uint64_t> byte_size;
-  Declaration decl;
 
   Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID;
   CompilerType clang_type;
-  DWARFFormValue form_value;
 
-  dw_attr_t attr;
   TypeSP type_sp;
   LanguageType cu_language = die.GetLanguage();
   switch (tag) {
@@ -305,49 +453,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_restrict_type:
   case DW_TAG_volatile_type:
   case DW_TAG_unspecified_type: {
-    const size_t num_attributes = die.GetAttributes(attributes);
-    uint32_t encoding = 0;
-    DWARFFormValue encoding_uid;
-
-    if (num_attributes > 0) {
-      uint32_t i;
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_decl_file:
-            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                form_value.Unsigned()));
-            break;
-          case DW_AT_decl_line:
-            decl.SetLine(form_value.Unsigned());
-            break;
-          case DW_AT_decl_column:
-            decl.SetColumn(form_value.Unsigned());
-            break;
-          case DW_AT_name:
-            type_name_cstr = form_value.AsCString();
-            if (type_name_cstr)
-              type_name_const_str.SetCString(type_name_cstr);
-            break;
-          case DW_AT_byte_size:
-            byte_size = form_value.Unsigned();
-            break;
-          case DW_AT_encoding:
-            encoding = form_value.Unsigned();
-            break;
-          case DW_AT_type:
-            encoding_uid = form_value;
-            break;
-          default:
-          case DW_AT_sibling:
-            break;
-          }
-        }
-      }
-    }
-
-    if (tag == DW_TAG_typedef && encoding_uid.IsValid()) {
+    if (tag == DW_TAG_typedef && attrs.type.IsValid()) {
       // Try to parse a typedef from the DWO file first as modules can
       // contain typedef'ed structures that have no names like:
       //
@@ -367,7 +473,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       // will have a hard time tracking down an unnammed structure type in
       // the module DWO file, so we make sure we don't get into this
       // situation by always resolving typedefs from the DWO file.
-      const DWARFDIE encoding_die = encoding_uid.Reference();
+      const DWARFDIE encoding_die = attrs.type.Reference();
 
       // First make sure that the die that this is typedef'ed to _is_ just
       // a declaration (DW_AT_declaration == 1), not a full definition
@@ -391,8 +497,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       break;
 
     case DW_TAG_unspecified_type:
-      if (strcmp(type_name_cstr, "nullptr_t") == 0 ||
-          strcmp(type_name_cstr, "decltype(nullptr)") == 0) {
+      if (attrs.name == "nullptr_t" || attrs.name == "decltype(nullptr)") {
         resolve_state = Type::eResolveStateFull;
         clang_type = m_ast.GetBasicType(eBasicTypeNullPtr);
         break;
@@ -404,7 +509,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     case DW_TAG_base_type:
       resolve_state = Type::eResolveStateFull;
       clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
-          type_name_cstr, encoding, byte_size.getValueOr(0) * 8);
+          attrs.name.GetCString(), attrs.encoding,
+          attrs.byte_size.getValueOr(0) * 8);
       break;
 
     case DW_TAG_pointer_type:
@@ -458,7 +564,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                   clang_type = m_ast.CreateBlockPointerType(
                       lldb_function_type_sp->GetForwardCompilerType());
                   encoding_data_type = Type::eEncodingIsUID;
-                  encoding_uid.Clear();
+                  attrs.type.Clear();
                   resolve_state = Type::eResolveStateFull;
                 }
               }
@@ -471,12 +577,12 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
 
       if (cu_language == eLanguageTypeObjC ||
           cu_language == eLanguageTypeObjC_plus_plus) {
-        if (type_name_cstr != nullptr) {
+        if (attrs.name) {
           static ConstString g_objc_type_name_id("id");
           static ConstString g_objc_type_name_Class("Class");
           static ConstString g_objc_type_name_selector("SEL");
 
-          if (type_name_const_str == g_objc_type_name_id) {
+          if (attrs.name == g_objc_type_name_id) {
             if (log)
               dwarf->GetObjectFile()->GetModule()->LogMessage(
                   log,
@@ -485,10 +591,10 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                   die.GetOffset(), die.GetTagAsCString(), die.GetName());
             clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
             encoding_data_type = Type::eEncodingIsUID;
-            encoding_uid.Clear();
+            attrs.type.Clear();
             resolve_state = Type::eResolveStateFull;
 
-          } else if (type_name_const_str == g_objc_type_name_Class) {
+          } else if (attrs.name == g_objc_type_name_Class) {
             if (log)
               dwarf->GetObjectFile()->GetModule()->LogMessage(
                   log,
@@ -497,9 +603,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                   die.GetOffset(), die.GetTagAsCString(), die.GetName());
             clang_type = m_ast.GetBasicType(eBasicTypeObjCClass);
             encoding_data_type = Type::eEncodingIsUID;
-            encoding_uid.Clear();
+            attrs.type.Clear();
             resolve_state = Type::eResolveStateFull;
-          } else if (type_name_const_str == g_objc_type_name_selector) {
+          } else if (attrs.name == g_objc_type_name_selector) {
             if (log)
               dwarf->GetObjectFile()->GetModule()->LogMessage(
                   log,
@@ -508,15 +614,15 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                   die.GetOffset(), die.GetTagAsCString(), die.GetName());
             clang_type = m_ast.GetBasicType(eBasicTypeObjCSel);
             encoding_data_type = Type::eEncodingIsUID;
-            encoding_uid.Clear();
+            attrs.type.Clear();
             resolve_state = Type::eResolveStateFull;
           }
         } else if (encoding_data_type == Type::eEncodingIsPointerUID &&
-                   encoding_uid.IsValid()) {
+                   attrs.type.IsValid()) {
           // Clang sometimes erroneously emits id as objc_object*.  In that
           // case we fix up the type to "id".
 
-          const DWARFDIE encoding_die = encoding_uid.Reference();
+          const DWARFDIE encoding_die = attrs.type.Reference();
 
           if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) {
             if (const char *struct_name = encoding_die.GetName()) {
@@ -530,7 +636,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                       die.GetOffset(), die.GetTagAsCString(), die.GetName());
                 clang_type = m_ast.GetBasicType(eBasicTypeObjCID);
                 encoding_data_type = Type::eEncodingIsUID;
-                encoding_uid.Clear();
+                attrs.type.Clear();
                 resolve_state = Type::eResolveStateFull;
               }
             }
@@ -540,8 +646,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     }
 
     type_sp = std::make_shared<Type>(
-        die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-        dwarf->GetUID(DIERef(encoding_uid)), encoding_data_type, &decl,
+        die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr,
+        dwarf->GetUID(DIERef(attrs.type)), encoding_data_type, &attrs.decl,
         clang_type, resolve_state);
 
     dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
@@ -550,72 +656,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   case DW_TAG_structure_type:
   case DW_TAG_union_type:
   case DW_TAG_class_type: {
-    LanguageType class_language = eLanguageTypeUnknown;
-    bool is_complete_objc_class = false;
-    size_t calling_convention = llvm::dwarf::CallingConvention::DW_CC_normal;
-
-    const size_t num_attributes = die.GetAttributes(attributes);
-    if (num_attributes > 0) {
-      uint32_t i;
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_decl_file:
-            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                form_value.Unsigned()));
-            break;
-
-          case DW_AT_decl_line:
-            decl.SetLine(form_value.Unsigned());
-            break;
-
-          case DW_AT_decl_column:
-            decl.SetColumn(form_value.Unsigned());
-            break;
-
-          case DW_AT_name:
-            type_name_cstr = form_value.AsCString();
-            type_name_const_str.SetCString(type_name_cstr);
-            break;
-
-          case DW_AT_byte_size:
-            byte_size = form_value.Unsigned();
-            break;
-
-          case DW_AT_accessibility:
-            accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
-            break;
-
-          case DW_AT_declaration:
-            is_forward_declaration = form_value.Boolean();
-            break;
-
-          case DW_AT_APPLE_runtime_class:
-            class_language = (LanguageType)form_value.Signed();
-            break;
-
-          case DW_AT_APPLE_objc_complete_type:
-            is_complete_objc_class = form_value.Signed();
-            break;
-          case DW_AT_calling_convention:
-            calling_convention = form_value.Unsigned();
-            break;
-
-          case DW_AT_allocated:
-          case DW_AT_associated:
-          case DW_AT_data_location:
-          case DW_AT_description:
-          case DW_AT_start_scope:
-          case DW_AT_visibility:
-          default:
-          case DW_AT_sibling:
-            break;
-          }
-        }
-      }
-    }
-
     // UniqueDWARFASTType is large, so don't create a local variables on
     // the stack, put it on the heap. This function is often called
     // recursively and clang isn't good and sharing the stack space for
@@ -623,10 +663,10 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     std::unique_ptr<UniqueDWARFASTType> unique_ast_entry_up(
         new UniqueDWARFASTType());
 
-    ConstString unique_typename(type_name_const_str);
-    Declaration unique_decl(decl);
+    ConstString unique_typename(attrs.name);
+    Declaration unique_decl(attrs.decl);
 
-    if (type_name_const_str) {
+    if (attrs.name) {
       if (Language::LanguageIsCPlusPlus(cu_language)) {
         // For C++, we rely solely upon the one definition rule that says
         // only one thing can exist at a given decl context. We ignore the
@@ -638,7 +678,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       }
 
       if (dwarf->GetUniqueDWARFASTTypeMap().Find(
-              unique_typename, die, unique_decl, byte_size ? *byte_size : -1,
+              unique_typename, die, unique_decl, attrs.byte_size.getValueOr(-1),
               *unique_ast_entry_up)) {
         type_sp = unique_ast_entry_up->m_type_sp;
         if (type_sp) {
@@ -664,8 +704,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       default_accessibility = eAccessPrivate;
     }
 
-    if (byte_size && *byte_size == 0 && type_name_cstr && !die.HasChildren() &&
-        cu_language == eLanguageTypeObjC) {
+    if (attrs.byte_size && *attrs.byte_size == 0 && attrs.name &&
+        !die.HasChildren() && cu_language == eLanguageTypeObjC) {
       // Work around an issue with clang at the moment where forward
       // declarations for objective C classes are emitted as:
       //  DW_TAG_structure_type [2]
@@ -676,20 +716,20 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       //
       // Note that there is no DW_AT_declaration and there are no children,
       // and the byte size is zero.
-      is_forward_declaration = true;
+      attrs.is_forward_declaration = true;
     }
 
-    if (class_language == eLanguageTypeObjC ||
-        class_language == eLanguageTypeObjC_plus_plus) {
-      if (!is_complete_objc_class &&
+    if (attrs.class_language == eLanguageTypeObjC ||
+        attrs.class_language == eLanguageTypeObjC_plus_plus) {
+      if (!attrs.is_complete_objc_class &&
           die.Supports_DW_AT_APPLE_objc_complete_type()) {
         // We have a valid eSymbolTypeObjCClass class symbol whose name
         // matches the current objective C class that we are trying to find
         // and this DIE isn't the complete definition (we checked
         // is_complete_objc_class above and know it is false), so the real
         // definition is in here somewhere
-        type_sp = dwarf->FindCompleteObjCDefinitionTypeForDIE(
-            die, type_name_const_str, true);
+        type_sp =
+            dwarf->FindCompleteObjCDefinitionTypeForDIE(die, attrs.name, true);
 
         if (!type_sp) {
           SymbolFileDWARFDebugMap *debug_map_symfile =
@@ -698,7 +738,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
             // We weren't able to find a full declaration in this DWARF,
             // see if we have a declaration anywhere else...
             type_sp = debug_map_symfile->FindCompleteObjCDefinitionTypeForDIE(
-                die, type_name_const_str, true);
+                die, attrs.name, true);
           }
         }
 
@@ -709,7 +749,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                 "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is an "
                 "incomplete objc type, complete type is 0x%8.8" PRIx64,
                 static_cast<void *>(this), die.GetOffset(),
-                DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
+                DW_TAG_value_to_name(tag), attrs.name.GetCString(),
+                type_sp->GetID());
           }
 
           // We found a real definition for this type elsewhere so lets use
@@ -721,7 +762,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       }
     }
 
-    if (is_forward_declaration) {
+    if (attrs.is_forward_declaration) {
       // We have a forward declaration to a type and we need to try and
       // find a full declaration. We look in the current type index just in
       // case we have a forward declaration followed by an actual
@@ -733,7 +774,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
             "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
             "forward declaration, trying to find complete type",
             static_cast<void *>(this), die.GetOffset(),
-            DW_TAG_value_to_name(tag), type_name_cstr);
+            DW_TAG_value_to_name(tag), attrs.name.GetCString());
       }
 
       // See if the type comes from a DWO module and if so, track down that
@@ -767,7 +808,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
               "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
               "forward declaration, complete type is 0x%8.8" PRIx64,
               static_cast<void *>(this), die.GetOffset(),
-              DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
+              DW_TAG_value_to_name(tag), attrs.name.GetCString(),
+              type_sp->GetID());
         }
 
         // We found a real definition for this type elsewhere so lets use
@@ -794,27 +836,27 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       // not an issue in the non-gmodules case because the debug info will
       // always contain a full definition of parent types in that case.
       CompleteExternalTagDeclType(GetClangASTImporter(), decl_ctx, die,
-                                  type_name_cstr);
+                                  attrs.name.GetCString());
 
-      if (accessibility == eAccessNone && decl_ctx) {
+      if (attrs.accessibility == eAccessNone && decl_ctx) {
         // Check the decl context that contains this class/struct/union. If
         // it is a class we must give it an accessibility.
         const clang::Decl::Kind containing_decl_kind = decl_ctx->getDeclKind();
         if (DeclKindIsCXXClass(containing_decl_kind))
-          accessibility = default_accessibility;
+          attrs.accessibility = default_accessibility;
       }
 
       ClangASTMetadata metadata;
       metadata.SetUserID(die.GetID());
       metadata.SetIsDynamicCXXType(dwarf->ClassOrStructIsVirtual(die));
 
-      if (type_name_cstr && strchr(type_name_cstr, '<')) {
+      if (attrs.name.GetStringRef().contains('<')) {
         ClangASTContext::TemplateParameterInfos template_param_infos;
         if (ParseTemplateParameterInfos(die, template_param_infos)) {
           clang::ClassTemplateDecl *class_template_decl =
-              m_ast.ParseClassTemplateDecl(decl_ctx, accessibility,
-                                           type_name_cstr, tag_decl_kind,
-                                           template_param_infos);
+              m_ast.ParseClassTemplateDecl(decl_ctx, attrs.accessibility,
+                                           attrs.name.GetCString(),
+                                           tag_decl_kind, template_param_infos);
           if (!class_template_decl) {
             if (log) {
               dwarf->GetObjectFile()->GetModule()->LogMessage(
@@ -822,7 +864,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                   "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" "
                   "clang::ClassTemplateDecl failed to return a decl.",
                   static_cast<void *>(this), die.GetOffset(),
-                  DW_TAG_value_to_name(tag), type_name_cstr);
+                  DW_TAG_value_to_name(tag), attrs.name.GetCString());
             }
             return TypeSP();
           }
@@ -842,9 +884,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
 
       if (!clang_type_was_created) {
         clang_type_was_created = true;
-        clang_type =
-            m_ast.CreateRecordType(decl_ctx, accessibility, type_name_cstr,
-                                   tag_decl_kind, class_language, &metadata);
+        clang_type = m_ast.CreateRecordType(
+            decl_ctx, attrs.accessibility, attrs.name.GetCString(),
+            tag_decl_kind, attrs.class_language, &metadata);
       }
     }
 
@@ -852,12 +894,12 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     // parameters in any class methods need it for the clang types for
     // function prototypes.
     LinkDeclContextToDIE(m_ast.GetDeclContextForType(clang_type), die);
-    type_sp = std::make_shared<Type>(die.GetID(), dwarf, type_name_const_str,
-                                     byte_size, nullptr, LLDB_INVALID_UID,
-                                     Type::eEncodingIsUID, &decl, clang_type,
+    type_sp = std::make_shared<Type>(die.GetID(), dwarf, attrs.name,
+                                     attrs.byte_size, nullptr, LLDB_INVALID_UID,
+                                     Type::eEncodingIsUID, &attrs.decl, clang_type,
                                      Type::eResolveStateForward);
 
-    type_sp->SetIsCompleteObjCClass(is_complete_objc_class);
+    type_sp->SetIsCompleteObjCClass(attrs.is_complete_objc_class);
 
     // Add our type to the unique type map so we don't end up creating many
     // copies of the same type over and over in the ASTContext for our
@@ -865,11 +907,11 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     unique_ast_entry_up->m_type_sp = type_sp;
     unique_ast_entry_up->m_die = die;
     unique_ast_entry_up->m_declaration = unique_decl;
-    unique_ast_entry_up->m_byte_size = byte_size.getValueOr(0);
+    unique_ast_entry_up->m_byte_size = attrs.byte_size.getValueOr(0);
     dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename,
                                              *unique_ast_entry_up);
 
-    if (is_forward_declaration && die.HasChildren()) {
+    if (attrs.is_forward_declaration && die.HasChildren()) {
       // Check to see if the DIE actually has a definition, some version of
       // GCC will
       // emit DIEs with DW_AT_declaration set to true, but yet still have
@@ -887,7 +929,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         case DW_TAG_typedef:
         case DW_TAG_union_type:
           child_die.Clear();
-          is_forward_declaration = false;
+          attrs.is_forward_declaration = false;
           break;
         default:
           child_die = child_die.GetSibling();
@@ -896,7 +938,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       }
     }
 
-    if (!is_forward_declaration) {
+    if (!attrs.is_forward_declaration) {
       // Always start the definition for a class type so that if the class
       // has child classes or types that require the class to be created
       // for use as their decl contexts the class will be ready to accept
@@ -910,7 +952,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
               "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
               "definition.\nPlease file a bug and attach the file at the "
               "start of this error message",
-              die.GetOffset(), type_name_cstr);
+              die.GetOffset(), attrs.name.GetCString());
         }
 
         if (tag == DW_TAG_structure_type) // this only applies in C
@@ -932,8 +974,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         // class is needed as the declaration context for a contained class
         // or type without the need to complete that type..
 
-        if (class_language != eLanguageTypeObjC &&
-            class_language != eLanguageTypeObjC_plus_plus)
+        if (attrs.class_language != eLanguageTypeObjC &&
+            attrs.class_language != eLanguageTypeObjC_plus_plus)
           ClangASTContext::StartTagDeclarationDefinition(clang_type);
 
         // Leave this as a forward declaration until we need to know the
@@ -961,7 +1003,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     // do this for pass by value - which implies the Trivial ABI. There
     // isn't a way to assert that something that would normally be pass by
     // value is pass by reference, so we ignore that attribute if set.
-    if (calling_convention == llvm::dwarf::DW_CC_pass_by_value) {
+    if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_value) {
       clang::CXXRecordDecl *record_decl =
           m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
       if (record_decl) {
@@ -969,7 +1011,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
       }
     }
 
-    if (calling_convention == llvm::dwarf::DW_CC_pass_by_reference) {
+    if (attrs.calling_convention == llvm::dwarf::DW_CC_pass_by_reference) {
       clang::CXXRecordDecl *record_decl =
           m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType());
       if (record_decl)
@@ -980,281 +1022,119 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
   } break;
 
   case DW_TAG_enumeration_type: {
-    bool is_scoped = false;
-    DWARFFormValue encoding_form;
-
-    const size_t num_attributes = die.GetAttributes(attributes);
-    if (num_attributes > 0) {
-      uint32_t i;
-
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_decl_file:
-            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                form_value.Unsigned()));
-            break;
-          case DW_AT_decl_line:
-            decl.SetLine(form_value.Unsigned());
-            break;
-          case DW_AT_decl_column:
-            decl.SetColumn(form_value.Unsigned());
-            break;
-          case DW_AT_name:
-            type_name_cstr = form_value.AsCString();
-            type_name_const_str.SetCString(type_name_cstr);
-            break;
-          case DW_AT_type:
-            encoding_form = form_value;
-            break;
-          case DW_AT_byte_size:
-            byte_size = form_value.Unsigned();
-            break;
-          case DW_AT_accessibility:
-            break; // accessibility =
-                   // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
-          case DW_AT_declaration:
-            is_forward_declaration = form_value.Boolean();
-            break;
-          case DW_AT_enum_class:
-            is_scoped = form_value.Boolean();
-            break;
-          case DW_AT_allocated:
-          case DW_AT_associated:
-          case DW_AT_bit_stride:
-          case DW_AT_byte_stride:
-          case DW_AT_data_location:
-          case DW_AT_description:
-          case DW_AT_start_scope:
-          case DW_AT_visibility:
-          case DW_AT_specification:
-          case DW_AT_abstract_origin:
-          case DW_AT_sibling:
-            break;
-          }
-        }
-      }
+    if (attrs.is_forward_declaration) {
+      type_sp = ParseTypeFromDWO(die, log);
+      if (type_sp)
+        return type_sp;
 
-      if (is_forward_declaration) {
-        type_sp = ParseTypeFromDWO(die, log);
-        if (type_sp)
-          return type_sp;
+      DWARFDeclContext die_decl_ctx;
+      die.GetDWARFDeclContext(die_decl_ctx);
 
-        DWARFDeclContext die_decl_ctx;
-        die.GetDWARFDeclContext(die_decl_ctx);
+      type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
 
-        type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx);
+      if (!type_sp) {
+        SymbolFileDWARFDebugMap *debug_map_symfile =
+            dwarf->GetDebugMapSymfile();
+        if (debug_map_symfile) {
+          // We weren't able to find a full declaration in this DWARF,
+          // see if we have a declaration anywhere else...
+          type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
+              die_decl_ctx);
+        }
+      }
 
-        if (!type_sp) {
-          SymbolFileDWARFDebugMap *debug_map_symfile =
-              dwarf->GetDebugMapSymfile();
-          if (debug_map_symfile) {
-            // We weren't able to find a full declaration in this DWARF,
-            // see if we have a declaration anywhere else...
-            type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext(
-                die_decl_ctx);
-          }
+      if (type_sp) {
+        if (log) {
+          dwarf->GetObjectFile()->GetModule()->LogMessage(
+              log,
+              "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
+              "forward declaration, complete type is 0x%8.8" PRIx64,
+              static_cast<void *>(this), die.GetOffset(),
+              DW_TAG_value_to_name(tag), attrs.name.GetCString(),
+              type_sp->GetID());
         }
 
-        if (type_sp) {
-          if (log) {
-            dwarf->GetObjectFile()->GetModule()->LogMessage(
-                log,
-                "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a "
-                "forward declaration, complete type is 0x%8.8" PRIx64,
-                static_cast<void *>(this), die.GetOffset(),
-                DW_TAG_value_to_name(tag), type_name_cstr, type_sp->GetID());
-          }
+        // We found a real definition for this type elsewhere so lets use
+        // it and cache the fact that we found a complete type for this
+        // die
+        dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
+        clang::DeclContext *defn_decl_ctx =
+            GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
+        if (defn_decl_ctx)
+          LinkDeclContextToDIE(defn_decl_ctx, die);
+        return type_sp;
+      }
+    }
+    DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                 DW_TAG_value_to_name(tag), type_name_cstr);
 
-          // We found a real definition for this type elsewhere so lets use
-          // it and cache the fact that we found a complete type for this
-          // die
-          dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get();
-          clang::DeclContext *defn_decl_ctx =
-              GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID()));
-          if (defn_decl_ctx)
-            LinkDeclContextToDIE(defn_decl_ctx, die);
-          return type_sp;
-        }
+    CompilerType enumerator_clang_type;
+    clang_type.SetCompilerType(
+        &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
+    if (!clang_type) {
+      if (attrs.type.IsValid()) {
+        Type *enumerator_type = dwarf->ResolveTypeUID(DIERef(attrs.type));
+        if (enumerator_type)
+          enumerator_clang_type = enumerator_type->GetFullCompilerType();
       }
-      DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                   DW_TAG_value_to_name(tag), type_name_cstr);
-
-      CompilerType enumerator_clang_type;
-      clang_type.SetCompilerType(
-          &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE()));
-      if (!clang_type) {
-        if (encoding_form.IsValid()) {
-          Type *enumerator_type = dwarf->ResolveTypeUID(DIERef(encoding_form));
-          if (enumerator_type)
-            enumerator_clang_type = enumerator_type->GetFullCompilerType();
-        }
 
-        if (!enumerator_clang_type) {
-          if (byte_size) {
-            enumerator_clang_type =
-                m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
-                    NULL, DW_ATE_signed, *byte_size * 8);
-          } else {
-            enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt);
-          }
+      if (!enumerator_clang_type) {
+        if (attrs.byte_size) {
+          enumerator_clang_type =
+              m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize(
+                  NULL, DW_ATE_signed, *attrs.byte_size * 8);
+        } else {
+          enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt);
         }
-
-        clang_type = m_ast.CreateEnumerationType(
-            type_name_cstr, GetClangDeclContextContainingDIE(die, nullptr),
-            decl, enumerator_clang_type, is_scoped);
-      } else {
-        enumerator_clang_type =
-            m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType());
       }
 
-      LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type),
-                           die);
+      clang_type = m_ast.CreateEnumerationType(
+          attrs.name.GetCString(),
+          GetClangDeclContextContainingDIE(die, nullptr), attrs.decl,
+          enumerator_clang_type, attrs.is_scoped_enum);
+    } else {
+      enumerator_clang_type =
+          m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType());
+    }
 
-      type_sp = std::make_shared<Type>(
-          die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-          dwarf->GetUID(DIERef(encoding_form)), Type::eEncodingIsUID, &decl,
-          clang_type, Type::eResolveStateForward);
-
-      if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
-        if (die.HasChildren()) {
-          SymbolContext cu_sc(die.GetLLDBCompileUnit());
-          bool is_signed = false;
-          enumerator_clang_type.IsIntegerType(is_signed);
-          ParseChildEnumerators(cu_sc, clang_type, is_signed,
-                                type_sp->GetByteSize().getValueOr(0), die);
-        }
-        ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
-      } else {
-        dwarf->GetObjectFile()->GetModule()->ReportError(
-            "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
-            "definition.\nPlease file a bug and attach the file at the "
-            "start of this error message",
-            die.GetOffset(), type_name_cstr);
+    LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type),
+                         die);
+
+    type_sp = std::make_shared<Type>(
+        die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr,
+        dwarf->GetUID(DIERef(attrs.type)), Type::eEncodingIsUID, &attrs.decl,
+        clang_type, Type::eResolveStateForward);
+
+    if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) {
+      if (die.HasChildren()) {
+        SymbolContext cu_sc(die.GetLLDBCompileUnit());
+        bool is_signed = false;
+        enumerator_clang_type.IsIntegerType(is_signed);
+        ParseChildEnumerators(cu_sc, clang_type, is_signed,
+                              type_sp->GetByteSize().getValueOr(0), die);
       }
+      ClangASTContext::CompleteTagDeclarationDefinition(clang_type);
+    } else {
+      dwarf->GetObjectFile()->GetModule()->ReportError(
+          "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its "
+          "definition.\nPlease file a bug and attach the file at the "
+          "start of this error message",
+          die.GetOffset(), attrs.name.GetCString());
     }
   } break;
 
   case DW_TAG_inlined_subroutine:
   case DW_TAG_subprogram:
   case DW_TAG_subroutine_type: {
-    DWARFFormValue type_die_form;
     bool is_variadic = false;
-    bool is_inline = false;
     bool is_static = false;
-    bool is_virtual = false;
-    bool is_explicit = false;
-    bool is_artificial = false;
     bool has_template_params = false;
-    DWARFFormValue specification_die_form;
-    DWARFFormValue abstract_origin_die_form;
-    DWARFDIE object_pointer_die;
 
     unsigned type_quals = 0;
-    clang::StorageClass storage =
-        clang::SC_None; //, Extern, Static, PrivateExtern
-
-    const size_t num_attributes = die.GetAttributes(attributes);
-    if (num_attributes > 0) {
-      uint32_t i;
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_decl_file:
-            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                form_value.Unsigned()));
-            break;
-          case DW_AT_decl_line:
-            decl.SetLine(form_value.Unsigned());
-            break;
-          case DW_AT_decl_column:
-            decl.SetColumn(form_value.Unsigned());
-            break;
-          case DW_AT_name:
-            type_name_cstr = form_value.AsCString();
-            type_name_const_str.SetCString(type_name_cstr);
-            break;
-
-          case DW_AT_linkage_name:
-          case DW_AT_MIPS_linkage_name:
-            mangled_name_cstr = form_value.AsCString();
-            break;
-          case DW_AT_type:
-            type_die_form = form_value;
-            break;
-          case DW_AT_accessibility:
-            accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
-            break;
-          case DW_AT_declaration:
-            break; // is_forward_declaration = form_value.Boolean(); break;
-          case DW_AT_inline:
-            is_inline = form_value.Boolean();
-            break;
-          case DW_AT_virtuality:
-            is_virtual = form_value.Boolean();
-            break;
-          case DW_AT_explicit:
-            is_explicit = form_value.Boolean();
-            break;
-          case DW_AT_artificial:
-            is_artificial = form_value.Boolean();
-            break;
-
-          case DW_AT_external:
-            if (form_value.Unsigned()) {
-              if (storage == clang::SC_None)
-                storage = clang::SC_Extern;
-              else
-                storage = clang::SC_PrivateExtern;
-            }
-            break;
-
-          case DW_AT_specification:
-            specification_die_form = form_value;
-            break;
-
-          case DW_AT_abstract_origin:
-            abstract_origin_die_form = form_value;
-            break;
-
-          case DW_AT_object_pointer:
-            object_pointer_die = form_value.Reference();
-            break;
-
-          case DW_AT_allocated:
-          case DW_AT_associated:
-          case DW_AT_address_class:
-          case DW_AT_calling_convention:
-          case DW_AT_data_location:
-          case DW_AT_elemental:
-          case DW_AT_entry_pc:
-          case DW_AT_frame_base:
-          case DW_AT_high_pc:
-          case DW_AT_low_pc:
-          case DW_AT_prototyped:
-          case DW_AT_pure:
-          case DW_AT_ranges:
-          case DW_AT_recursive:
-          case DW_AT_return_addr:
-          case DW_AT_segment:
-          case DW_AT_start_scope:
-          case DW_AT_static_link:
-          case DW_AT_trampoline:
-          case DW_AT_visibility:
-          case DW_AT_vtable_elem_location:
-          case DW_AT_description:
-          case DW_AT_sibling:
-            break;
-          }
-        }
-      }
-    }
 
     std::string object_pointer_name;
-    if (object_pointer_die) {
-      const char *object_pointer_name_cstr = object_pointer_die.GetName();
+    if (attrs.object_pointer) {
+      const char *object_pointer_name_cstr = attrs.object_pointer.GetName();
       if (object_pointer_name_cstr)
         object_pointer_name = object_pointer_name_cstr;
     }
@@ -1265,8 +1145,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
     CompilerType return_clang_type;
     Type *func_type = NULL;
 
-    if (type_die_form.IsValid())
-      func_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
+    if (attrs.type.IsValid())
+      func_type = dwarf->ResolveTypeUID(DIERef(attrs.type));
 
     if (func_type)
       return_clang_type = func_type->GetForwardCompilerType();
@@ -1322,10 +1202,10 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         return_clang_type, function_param_types.data(),
         function_param_types.size(), is_variadic, type_quals);
 
-    if (type_name_cstr) {
+    if (attrs.name) {
       bool type_handled = false;
       if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) {
-        ObjCLanguage::MethodName objc_method(type_name_cstr, true);
+        ObjCLanguage::MethodName objc_method(attrs.name.GetStringRef(), true);
         if (objc_method.IsValid(true)) {
           CompilerType class_opaque_type;
           ConstString class_name(objc_method.GetClassName());
@@ -1346,13 +1226,13 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
           if (class_opaque_type) {
             // If accessibility isn't set to anything valid, assume public
             // for now...
-            if (accessibility == eAccessNone)
-              accessibility = eAccessPublic;
+            if (attrs.accessibility == eAccessNone)
+              attrs.accessibility = eAccessPublic;
 
             clang::ObjCMethodDecl *objc_method_decl =
                 m_ast.AddMethodToObjCObjectType(
-                    class_opaque_type, type_name_cstr, clang_type,
-                    accessibility, is_artificial, is_variadic);
+                    class_opaque_type, attrs.name.GetCString(), clang_type,
+                    attrs.accessibility, attrs.is_artificial, is_variadic);
             type_handled = objc_method_decl != NULL;
             if (type_handled) {
               LinkDeclContextToDIE(
@@ -1401,7 +1281,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
               }
             }
 
-            if (specification_die_form.IsValid()) {
+            if (attrs.specification.IsValid()) {
               // We have a specification which we are going to base our
               // function prototype off of, so we need this type to be
               // completed so that the m_die_to_decl_ctx for the method in
@@ -1411,7 +1291,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
               // have been made with the specification and not with this
               // die.
               DWARFDIE spec_die =
-                  dwarf->DebugInfo()->GetDIE(DIERef(specification_die_form));
+                  dwarf->DebugInfo()->GetDIE(DIERef(attrs.specification));
               clang::DeclContext *spec_clang_decl_ctx =
                   GetClangDeclContextForDIE(spec_die);
               if (spec_clang_decl_ctx) {
@@ -1420,11 +1300,10 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                 dwarf->GetObjectFile()->GetModule()->ReportWarning(
                     "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x"
                     ") has no decl\n",
-                    die.GetID(),
-                    specification_die_form.Reference().GetOffset());
+                    die.GetID(), attrs.specification.Reference().GetOffset());
               }
               type_handled = true;
-            } else if (abstract_origin_die_form.IsValid()) {
+            } else if (attrs.abstract_origin.IsValid()) {
               // We have a specification which we are going to base our
               // function prototype off of, so we need this type to be
               // completed so that the m_die_to_decl_ctx for the method in
@@ -1432,7 +1311,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
               class_type->GetForwardCompilerType();
 
               DWARFDIE abs_die =
-                  dwarf->DebugInfo()->GetDIE(DIERef(abstract_origin_die_form));
+                  dwarf->DebugInfo()->GetDIE(DIERef(attrs.abstract_origin));
               clang::DeclContext *abs_clang_decl_ctx =
                   GetClangDeclContextForDIE(abs_die);
               if (abs_clang_decl_ctx) {
@@ -1441,8 +1320,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                 dwarf->GetObjectFile()->GetModule()->ReportWarning(
                     "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x"
                     ") has no decl\n",
-                    die.GetID(),
-                    abstract_origin_die_form.Reference().GetOffset());
+                    die.GetID(), attrs.abstract_origin.Reference().GetOffset());
               }
               type_handled = true;
             } else {
@@ -1471,7 +1349,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                              method_iter++) {
                           clang::CXXMethodDecl *method_decl = *method_iter;
                           if (method_decl->getNameInfo().getAsString() ==
-                              std::string(type_name_cstr)) {
+                              attrs.name.GetStringRef()) {
                             if (method_decl->getType() ==
                                 ClangUtil::GetQualType(clang_type)) {
                               add_method = false;
@@ -1492,8 +1370,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                       llvm::PrettyStackTraceFormat stack_trace(
                           "SymbolFileDWARF::ParseType() is adding a method "
                           "%s to class %s in DIE 0x%8.8" PRIx64 " from %s",
-                          type_name_cstr, class_type->GetName().GetCString(),
-                          die.GetID(),
+                          attrs.name.GetCString(),
+                          class_type->GetName().GetCString(), die.GetID(),
                           dwarf->GetObjectFile()
                               ->GetFileSpec()
                               .GetPath()
@@ -1503,15 +1381,16 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
                       // Neither GCC 4.2 nor clang++ currently set a valid
                       // accessibility in the DWARF for C++ methods...
                       // Default to public for now...
-                      if (accessibility == eAccessNone)
-                        accessibility = eAccessPublic;
+                      if (attrs.accessibility == eAccessNone)
+                        attrs.accessibility = eAccessPublic;
 
                       clang::CXXMethodDecl *cxx_method_decl =
                           m_ast.AddMethodToCXXRecordType(
                               class_opaque_type.GetOpaqueQualType(),
-                              type_name_cstr, mangled_name_cstr, clang_type,
-                              accessibility, is_virtual, is_static, is_inline,
-                              is_explicit, is_attr_used, is_artificial);
+                              attrs.name.GetCString(), attrs.mangled_name,
+                              clang_type, attrs.accessibility, attrs.is_virtual,
+                              is_static, attrs.is_inline, attrs.is_explicit,
+                              is_attr_used, attrs.is_artificial);
 
                       type_handled = cxx_method_decl != NULL;
 
@@ -1582,8 +1461,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         clang::FunctionDecl *function_decl = nullptr;
         clang::FunctionDecl *template_function_decl = nullptr;
 
-        if (abstract_origin_die_form.IsValid()) {
-          DWARFDIE abs_die = abstract_origin_die_form.Reference();
+        if (attrs.abstract_origin.IsValid()) {
+          DWARFDIE abs_die = attrs.abstract_origin.Reference();
 
           SymbolContext sc;
 
@@ -1602,7 +1481,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
           function_decl = m_ast.CreateFunctionDeclaration(
               ignore_containing_context ? m_ast.GetTranslationUnitDecl()
                                         : containing_decl_ctx,
-              type_name_cstr, clang_type, storage, is_inline);
+              attrs.name.GetCString(), clang_type, attrs.storage,
+              attrs.is_inline);
 
           if (has_template_params) {
             ClangASTContext::TemplateParameterInfos template_param_infos;
@@ -1610,11 +1490,12 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
             template_function_decl = m_ast.CreateFunctionDeclaration(
                 ignore_containing_context ? m_ast.GetTranslationUnitDecl()
                                           : containing_decl_ctx,
-                type_name_cstr, clang_type, storage, is_inline);
+                attrs.name.GetCString(), clang_type, attrs.storage,
+                attrs.is_inline);
             clang::FunctionTemplateDecl *func_template_decl =
                 m_ast.CreateFunctionTemplateDecl(
-                    containing_decl_ctx, template_function_decl, type_name_cstr,
-                    template_param_infos);
+                    containing_decl_ctx, template_function_decl,
+                    attrs.name.GetCString(), template_param_infos);
             m_ast.CreateFunctionTemplateSpecializationInfo(
                 function_decl, func_template_decl, template_param_infos);
           }
@@ -1650,203 +1531,115 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc,
         }
       }
     }
-    type_sp = std::make_shared<Type>(die.GetID(), dwarf, type_name_const_str,
-                                     llvm::None, nullptr, LLDB_INVALID_UID,
-                                     Type::eEncodingIsUID, &decl, clang_type,
-                                     Type::eResolveStateFull);
+    type_sp = std::make_shared<Type>(
+        die.GetID(), dwarf, attrs.name, llvm::None, nullptr, LLDB_INVALID_UID,
+        Type::eEncodingIsUID, &attrs.decl, clang_type, Type::eResolveStateFull);
     assert(type_sp.get());
   } break;
 
   case DW_TAG_array_type: {
-    DWARFFormValue type_die_form;
-    uint32_t byte_stride = 0;
-    uint32_t bit_stride = 0;
-    bool is_vector = false;
-    const size_t num_attributes = die.GetAttributes(attributes);
+    DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
+                 DW_TAG_value_to_name(tag), type_name_cstr);
 
-    if (num_attributes > 0) {
-      uint32_t i;
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_decl_file:
-            decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                form_value.Unsigned()));
-            break;
-          case DW_AT_decl_line:
-            decl.SetLine(form_value.Unsigned());
-            break;
-          case DW_AT_decl_column:
-            decl.SetColumn(form_value.Unsigned());
-            break;
-          case DW_AT_name:
-            type_name_cstr = form_value.AsCString();
-            type_name_const_str.SetCString(type_name_cstr);
-            break;
+    DIERef type_die_ref(attrs.type);
+    Type *element_type = dwarf->ResolveTypeUID(type_die_ref);
 
-          case DW_AT_type:
-            type_die_form = form_value;
-            break;
-          case DW_AT_byte_size:
-            break; // byte_size = form_value.Unsigned(); break;
-          case DW_AT_byte_stride:
-            byte_stride = form_value.Unsigned();
-            break;
-          case DW_AT_bit_stride:
-            bit_stride = form_value.Unsigned();
-            break;
-          case DW_AT_GNU_vector:
-            is_vector = form_value.Boolean();
-            break;
-          case DW_AT_accessibility:
-            break; // accessibility =
-                   // DW_ACCESS_to_AccessType(form_value.Unsigned()); break;
-          case DW_AT_declaration:
-            break; // is_forward_declaration = form_value.Boolean(); break;
-          case DW_AT_allocated:
-          case DW_AT_associated:
-          case DW_AT_data_location:
-          case DW_AT_description:
-          case DW_AT_ordering:
-          case DW_AT_start_scope:
-          case DW_AT_visibility:
-          case DW_AT_specification:
-          case DW_AT_abstract_origin:
-          case DW_AT_sibling:
-            break;
-          }
-        }
+    if (element_type) {
+      auto array_info = ParseChildArrayInfo(die);
+      if (array_info) {
+        attrs.byte_stride = array_info->byte_stride;
+        attrs.bit_stride = array_info->bit_stride;
       }
-
-      DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(),
-                   DW_TAG_value_to_name(tag), type_name_cstr);
-
-      DIERef type_die_ref(type_die_form);
-      Type *element_type = dwarf->ResolveTypeUID(type_die_ref);
-
-      if (element_type) {
-        auto array_info = ParseChildArrayInfo(die);
-        if (array_info) {
-          byte_stride = array_info->byte_stride;
-          bit_stride = array_info->bit_stride;
+      if (attrs.byte_stride == 0 && attrs.bit_stride == 0)
+        attrs.byte_stride = element_type->GetByteSize().getValueOr(0);
+      CompilerType array_element_type = element_type->GetForwardCompilerType();
+
+      if (ClangASTContext::IsCXXClassType(array_element_type) &&
+          !array_element_type.GetCompleteType()) {
+        ModuleSP module_sp = die.GetModule();
+        if (module_sp) {
+          if (die.GetCU()->GetProducer() == eProducerClang)
+            module_sp->ReportError(
+                "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
+                "class/union/struct element type DIE 0x%8.8x that is a "
+                "forward declaration, not a complete definition.\nTry "
+                "compiling the source file with -fstandalone-debug or "
+                "disable -gmodules",
+                die.GetOffset(), type_die_ref.die_offset);
+          else
+            module_sp->ReportError(
+                "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
+                "class/union/struct element type DIE 0x%8.8x that is a "
+                "forward declaration, not a complete definition.\nPlease "
+                "file a bug against the compiler and include the "
+                "preprocessed output for %s",
+                die.GetOffset(), type_die_ref.die_offset,
+                die.GetLLDBCompileUnit()
+                    ? die.GetLLDBCompileUnit()->GetPath().c_str()
+                    : "the source file");
         }
-        if (byte_stride == 0 && bit_stride == 0)
-          byte_stride = element_type->GetByteSize().getValueOr(0);
-        CompilerType array_element_type =
-            element_type->GetForwardCompilerType();
-
-        if (ClangASTContext::IsCXXClassType(array_element_type) &&
-            !array_element_type.GetCompleteType()) {
-          ModuleSP module_sp = die.GetModule();
-          if (module_sp) {
-            if (die.GetCU()->GetProducer() == eProducerClang)
-              module_sp->ReportError(
-                  "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
-                  "class/union/struct element type DIE 0x%8.8x that is a "
-                  "forward declaration, not a complete definition.\nTry "
-                  "compiling the source file with -fstandalone-debug or "
-                  "disable -gmodules",
-                  die.GetOffset(), type_die_ref.die_offset);
-            else
-              module_sp->ReportError(
-                  "DWARF DW_TAG_array_type DIE at 0x%8.8x has a "
-                  "class/union/struct element type DIE 0x%8.8x that is a "
-                  "forward declaration, not a complete definition.\nPlease "
-                  "file a bug against the compiler and include the "
-                  "preprocessed output for %s",
-                  die.GetOffset(), type_die_ref.die_offset,
-                  die.GetLLDBCompileUnit()
-                      ? die.GetLLDBCompileUnit()->GetPath().c_str()
-                      : "the source file");
-          }
 
-          // We have no choice other than to pretend that the element class
-          // type is complete. If we don't do this, clang will crash when
-          // trying to layout the class. Since we provide layout
-          // assistance, all ivars in this class and other classes will be
-          // fine, this is the best we can do short of crashing.
-          if (ClangASTContext::StartTagDeclarationDefinition(
-                  array_element_type)) {
-            ClangASTContext::CompleteTagDeclarationDefinition(
-                array_element_type);
-          } else {
-            module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to "
-                                   "start its definition.\nPlease file a "
-                                   "bug and attach the file at the start "
-                                   "of this error message",
-                                   type_die_ref.die_offset);
-          }
+        // We have no choice other than to pretend that the element class
+        // type is complete. If we don't do this, clang will crash when
+        // trying to layout the class. Since we provide layout
+        // assistance, all ivars in this class and other classes will be
+        // fine, this is the best we can do short of crashing.
+        if (ClangASTContext::StartTagDeclarationDefinition(
+                array_element_type)) {
+          ClangASTContext::CompleteTagDeclarationDefinition(array_element_type);
+        } else {
+          module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to "
+                                 "start its definition.\nPlease file a "
+                                 "bug and attach the file at the start "
+                                 "of this error message",
+                                 type_die_ref.die_offset);
         }
+      }
 
-        uint64_t array_element_bit_stride = byte_stride * 8 + bit_stride;
-        if (array_info && array_info->element_orders.size() > 0) {
-          uint64_t num_elements = 0;
-          auto end = array_info->element_orders.rend();
-          for (auto pos = array_info->element_orders.rbegin(); pos != end;
-               ++pos) {
-            num_elements = *pos;
-            clang_type = m_ast.CreateArrayType(array_element_type, num_elements,
-                                               is_vector);
-            array_element_type = clang_type;
-            array_element_bit_stride =
-                num_elements ? array_element_bit_stride * num_elements
-                             : array_element_bit_stride;
-          }
-        } else {
-          clang_type = m_ast.CreateArrayType(array_element_type, 0, is_vector);
+      uint64_t array_element_bit_stride =
+          attrs.byte_stride * 8 + attrs.bit_stride;
+      if (array_info && array_info->element_orders.size() > 0) {
+        uint64_t num_elements = 0;
+        auto end = array_info->element_orders.rend();
+        for (auto pos = array_info->element_orders.rbegin(); pos != end;
+             ++pos) {
+          num_elements = *pos;
+          clang_type = m_ast.CreateArrayType(array_element_type, num_elements,
+                                             attrs.is_vector);
+          array_element_type = clang_type;
+          array_element_bit_stride =
+              num_elements ? array_element_bit_stride * num_elements
+                           : array_element_bit_stride;
         }
-        ConstString empty_name;
-        type_sp = std::make_shared<Type>(
-            die.GetID(), dwarf, empty_name, array_element_bit_stride / 8,
-            nullptr, dwarf->GetUID(DIERef(type_die_form)), Type::eEncodingIsUID,
-            &decl, clang_type, Type::eResolveStateFull);
-        type_sp->SetEncodingType(element_type);
-        m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID());
+      } else {
+        clang_type = m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector);
       }
+      ConstString empty_name;
+      type_sp = std::make_shared<Type>(
+          die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr,
+          dwarf->GetUID(type_die_ref), Type::eEncodingIsUID, &attrs.decl,
+          clang_type, Type::eResolveStateFull);
+      type_sp->SetEncodingType(element_type);
+      m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID());
     }
   } break;
 
   case DW_TAG_ptr_to_member_type: {
-    DWARFFormValue type_die_form;
-    DWARFFormValue containing_type_die_form;
+    Type *pointee_type = dwarf->ResolveTypeUID(DIERef(attrs.type));
+    Type *class_type = dwarf->ResolveTypeUID(DIERef(attrs.containing_type));
 
-    const size_t num_attributes = die.GetAttributes(attributes);
+    CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType();
+    CompilerType class_clang_type = class_type->GetLayoutCompilerType();
 
-    if (num_attributes > 0) {
-      uint32_t i;
-      for (i = 0; i < num_attributes; ++i) {
-        attr = attributes.AttributeAtIndex(i);
-        if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-          switch (attr) {
-          case DW_AT_type:
-            type_die_form = form_value;
-            break;
-          case DW_AT_containing_type:
-            containing_type_die_form = form_value;
-            break;
-          }
-        }
-      }
-
-      Type *pointee_type = dwarf->ResolveTypeUID(DIERef(type_die_form));
-      Type *class_type =
-          dwarf->ResolveTypeUID(DIERef(containing_type_die_form));
+    clang_type = ClangASTContext::CreateMemberPointerType(class_clang_type,
+                                                          pointee_clang_type);
 
-      CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType();
-      CompilerType class_clang_type = class_type->GetLayoutCompilerType();
-
-      clang_type = ClangASTContext::CreateMemberPointerType(class_clang_type,
-                                                            pointee_clang_type);
-
-      if (llvm::Optional<uint64_t> clang_type_size =
-              clang_type.GetByteSize(nullptr)) {
-        byte_size = *clang_type_size;
-        type_sp = std::make_shared<Type>(
-            die.GetID(), dwarf, type_name_const_str, byte_size, nullptr,
-            LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type,
-            Type::eResolveStateForward);
-      }
+    if (llvm::Optional<uint64_t> clang_type_size =
+            clang_type.GetByteSize(nullptr)) {
+      type_sp = std::make_shared<Type>(
+          die.GetID(), dwarf, attrs.name, *clang_type_size, nullptr,
+          LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type,
+          Type::eResolveStateForward);
     }
 
     break;

From df1fbbb4ccecabf4419066453ce4cc56204f367e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 09:39:45 +0000
Subject: [PATCH 0598/1176] Fix MSVC "not all control paths return a value"
 warning.

llvm-svn: 362076
---
 clang/include/clang/Basic/ObjCRuntime.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h
index 2caebd58832aa..5329b38c20722 100644
--- a/clang/include/clang/Basic/ObjCRuntime.h
+++ b/clang/include/clang/Basic/ObjCRuntime.h
@@ -443,6 +443,7 @@ class ObjCRuntime {
     case WatchOS:
       return true;
     }
+    llvm_unreachable("bad kind");
   }
 
   /// Try to parse an Objective-C runtime specification from the given

From 3475a46ec23d170d086e98a470f3a3f5492845bd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 09:40:35 +0000
Subject: [PATCH 0599/1176] Fix sphinx warning about missing footnote.

llvm-svn: 362077
---
 llvm/docs/ORCv2DesignAndImplementation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/ORCv2DesignAndImplementation.rst b/llvm/docs/ORCv2DesignAndImplementation.rst
index b69cd163bcefc..a99d2aa48bf38 100644
--- a/llvm/docs/ORCv2DesignAndImplementation.rst
+++ b/llvm/docs/ORCv2DesignAndImplementation.rst
@@ -39,7 +39,7 @@ Features
 
 ORC provides the following features:
 
-- *JIT-linking* links relocatable object files (COFF, ELF, MachO)[1]_ into a
+- *JIT-linking* links relocatable object files (COFF, ELF, MachO) [1]_ into a
   target process an runtime. The target process may be the same process that
   contains the JIT session object and jit-linker, or may be another process
   (even one running on a different machine or architecture) that communicates

From 0526c0cd8e73317ddef71b81b78631b16f4e9cd0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 10:00:20 +0000
Subject: [PATCH 0600/1176] [ELF] Implement Local Dynamic style TLSDESC for
 x86-64

For the Local Dynamic case of TLSDESC, _TLS_MODULE_BASE_ is defined as a
special TLS symbol that makes:

1) Without relaxation: it produces a dynamic TLSDESC relocation that
computes 0. Adding @dtpoff to access a TLS symbol.
2) With LD->LE relaxation: _TLS_MODULE_BASE_@tpoff = 0 (lowest address in
the TLS block). Adding @tpoff to access a TLS symbol.

For 1), this saves dynamic relocations and GOT slots as otherwise
(General Dynamic) we would create an R_X86_64_TLSDESC and reserve two
GOT slots for each symbol.

Add ElfSym::TlsModuleBase and change the signature of getTlsTpOffset()
to special case _TLS_MODULE_BASE_.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62577

llvm-svn: 362078
---
 lld/ELF/InputSection.cpp         | 16 +++++++-----
 lld/ELF/Symbols.cpp              |  1 +
 lld/ELF/Symbols.h                |  3 +++
 lld/ELF/Writer.cpp               | 21 +++++++++++++++
 lld/test/ELF/x86-64-tlsdesc-ld.s | 45 ++++++++++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/ELF/x86-64-tlsdesc-ld.s

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 2ff6b4800b787..45bafd321ded8 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -584,24 +584,28 @@ static Relocation *getRISCVPCRelHi20(const Symbol *Sym, uint64_t Addend) {
 
 // A TLS symbol's virtual address is relative to the TLS segment. Add a
 // target-specific adjustment to produce a thread-pointer-relative offset.
-static int64_t getTlsTpOffset() {
+static int64_t getTlsTpOffset(const Symbol &S) {
+  // On targets that support TLSDESC, _TLS_MODULE_BASE_@tpoff = 0.
+  if (&S == ElfSym::TlsModuleBase)
+    return 0;
+
   switch (Config->EMachine) {
   case EM_ARM:
   case EM_AARCH64:
     // Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
     // followed by a variable amount of alignment padding, followed by the TLS
     // segment.
-    return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
+    return S.getVA(0) + alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
   case EM_386:
   case EM_X86_64:
     // Variant 2. The TLS segment is located just before the thread pointer.
-    return -alignTo(Out::TlsPhdr->p_memsz, Out::TlsPhdr->p_align);
+    return S.getVA(0) - alignTo(Out::TlsPhdr->p_memsz, Out::TlsPhdr->p_align);
   case EM_PPC64:
     // The thread pointer points to a fixed offset from the start of the
     // executable's TLS segment. An offset of 0x7000 allows a signed 16-bit
     // offset to reach 0x1000 of TCB/thread-library data and 0xf000 of the
     // program's TLS segment.
-    return -0x7000;
+    return S.getVA(0) - 0x7000;
   default:
     llvm_unreachable("unhandled Config->EMachine");
   }
@@ -745,12 +749,12 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
     // loaders.
     if (Sym.isUndefined())
       return A;
-    return Sym.getVA(A) + getTlsTpOffset();
+    return getTlsTpOffset(Sym) + A;
   case R_RELAX_TLS_GD_TO_LE_NEG:
   case R_NEG_TLS:
     if (Sym.isUndefined())
       return A;
-    return -Sym.getVA(0) - getTlsTpOffset() + A;
+    return -getTlsTpOffset(Sym) + A;
   case R_SIZE:
     return Sym.getSize() + A;
   case R_TLSDESC:
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index e8c6377fb596d..e82d1ac1fdf64 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -39,6 +39,7 @@ Defined *ElfSym::MipsGpDisp;
 Defined *ElfSym::MipsLocalGp;
 Defined *ElfSym::RelaIpltStart;
 Defined *ElfSym::RelaIpltEnd;
+Defined *ElfSym::TlsModuleBase;
 
 static uint64_t getSymVA(const Symbol &Sym, int64_t &Addend) {
   switch (Sym.kind()) {
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 03de7009ce56b..685f25494b536 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -436,6 +436,9 @@ struct ElfSym {
   // __rel{,a}_iplt_{start,end} symbols.
   static Defined *RelaIpltStart;
   static Defined *RelaIpltEnd;
+
+  // _TLS_MODULE_BASE_ on targets that support TLSDESC.
+  static Defined *TlsModuleBase;
 };
 
 // A buffer class that is large enough to hold any Symbol-derived
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 00d676a3c89c3..975fddf618bd1 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1606,6 +1606,27 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
     if (!dyn_cast_or_null<Defined>(Symtab->find("__global_pointer$")))
       addOptionalRegular("__global_pointer$", findSection(".sdata"), 0x800);
 
+  if (Config->EMachine == EM_X86_64) {
+    // On targets that support TLSDESC, _TLS_MODULE_BASE_ is defined in such a
+    // way that:
+    //
+    // 1) Without relaxation: it produces a dynamic TLSDESC relocation that
+    // computes 0.
+    // 2) With LD->LE relaxation: _TLS_MODULE_BASE_@tpoff = 0 (lowest address in
+    // the TLS block).
+    //
+    // 2) is special cased in @tpoff computation. To satisfy 1), we define it as
+    // an absolute symbol of zero. This is different from GNU linkers which
+    // define _TLS_MODULE_BASE_ relative to the first TLS section.
+    Symbol *S = Symtab->find("_TLS_MODULE_BASE_");
+    if (S && S->isUndefined()) {
+      S->resolve(Defined{/*File=*/nullptr, S->getName(), STB_GLOBAL, STV_HIDDEN,
+                         STT_TLS, /*Value=*/0, 0,
+                         /*Section=*/nullptr});
+      ElfSym::TlsModuleBase = cast<Defined>(S);
+    }
+  }
+
   // This responsible for splitting up .eh_frame section into
   // pieces. The relocation scan uses those pieces, so this has to be
   // earlier.
diff --git a/lld/test/ELF/x86-64-tlsdesc-ld.s b/lld/test/ELF/x86-64-tlsdesc-ld.s
new file mode 100644
index 0000000000000..c478f0baa7ff7
--- /dev/null
+++ b/lld/test/ELF/x86-64-tlsdesc-ld.s
@@ -0,0 +1,45 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=LD-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=LD %s
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+## Check _TLS_MODULE_BASE_ used by LD produces a dynamic relocation with a value of 0.
+# LD-REL:      .rela.dyn {
+# LD-REL-NEXT:   0x20A0 R_X86_64_TLSDESC - 0x0
+# LD-REL-NEXT: }
+
+## 0x20a0-0x1007 = 4249
+## dtpoff(a) = 8, dtpoff(b) = 12
+# LD:            leaq 4249(%rip), %rax
+# LD-NEXT: 1007: callq *(%rax)
+# LD-NEXT:       movl %fs:8(%rax), %edx
+# LD-NEXT:       addl %fs:12(%rax), %edx
+
+## When producing an executable, the LD code sequence can be relaxed to LE.
+## It is the same as GD->LE.
+## tpoff(_TLS_MODULE_BASE_) = 0, tpoff(a) = -8, tpoff(b) = -4
+
+# NOREL: no relocations
+
+# LE:      movq $0, %rax
+# LE-NEXT: nop
+# LE-NEXT: movl %fs:-8(%rax), %edx
+# LE-NEXT: addl %fs:-4(%rax), %edx
+
+leaq _TLS_MODULE_BASE_@tlsdesc(%rip), %rax
+call *_TLS_MODULE_BASE_@tlscall(%rax)
+movl %fs:a@dtpoff(%rax), %edx
+addl %fs:b@dtpoff(%rax), %edx
+
+.section .tbss
+.zero 8
+a:
+.zero 4
+b:
+.zero 4

From 3a34bccd206483c04e24e5ae91a3848b749dd72b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 10:05:52 +0000
Subject: [PATCH 0601/1176] Revert "asm goto: fix out-of-bounds read of
 Constraints after rC362045"

It was fixed by rC362062.

llvm-svn: 362079
---
 clang/lib/Parse/ParseStmtAsm.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index e1c48da5f2ba3..75f3ac396e1a4 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -846,7 +846,6 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
       ExprResult Res =
           Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD);
       Exprs.push_back(Res.get());
-      Constraints.emplace_back();
       NumLabels++;
       ConsumeToken();
       if (!TryConsumeToken(tok::comma))

From e3406c42a42153781935e6dcf9d2561aa316dff5 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 30 May 2019 10:14:41 +0000
Subject: [PATCH 0602/1176] [llvm-readobj/llvm-readelf] - Implement GNU style
 dumper of the SHT_GNU_verneed section.

It was not implemented yet, we had only LLVM style dumper implemented.
Section description is here: https://refspecs.linuxfoundation.org/LSB_2.0.1/LSB-Core/LSB-Core/symverrqmts.html

Differential revision: https://reviews.llvm.org/D62516

llvm-svn: 362080
---
 .../tools/llvm-readobj/elf-verneed-flags.yaml | 107 ++++++++++++++++++
 .../tools/llvm-readobj/elf-versioninfo.test   |  13 ++-
 llvm/tools/llvm-readobj/ELFDumper.cpp         |  88 ++++++++++++--
 3 files changed, 193 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/tools/llvm-readobj/elf-verneed-flags.yaml

diff --git a/llvm/test/tools/llvm-readobj/elf-verneed-flags.yaml b/llvm/test/tools/llvm-readobj/elf-verneed-flags.yaml
new file mode 100644
index 0000000000000..e121e38ec75e4
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/elf-verneed-flags.yaml
@@ -0,0 +1,107 @@
+## Check how llvm-readobj/llvm-readelf tools dump the flags of SHT_GNU_verneed
+## section entries.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-readobj -V %t | FileCheck %s --check-prefix=LLVM-VERDEF
+# RUN: llvm-readelf -V %t | FileCheck %s --check-prefix=GNU-VERDEF
+
+# LLVM-VERDEF:      SHT_GNU_verneed {
+# LLVM-VERDEF-NEXT:   Dependency {
+# LLVM-VERDEF-NEXT:     Version: 1
+# LLVM-VERDEF-NEXT:     Count: 6
+# LLVM-VERDEF-NEXT:     FileName: dso.so.0
+# LLVM-VERDEF-NEXT:     Entries [
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: Base (0x1)
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: base
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: Weak (0x2)
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: weak
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: Info (0x4)
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: info
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: 0x7
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: all
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: 0x8
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: unknown
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:       Entry {
+# LLVM-VERDEF-NEXT:         Hash: 0
+# LLVM-VERDEF-NEXT:         Flags: 0xF
+# LLVM-VERDEF-NEXT:         Index: 0
+# LLVM-VERDEF-NEXT:         Name: all_and_unknown
+# LLVM-VERDEF-NEXT:       }
+# LLVM-VERDEF-NEXT:     ]
+# LLVM-VERDEF-NEXT:   }
+# LLVM-VERDEF-NEXT: }
+
+# GNU-VERDEF:      Version needs section '.gnu.version_r' contains 1 entries:
+# GNU-VERDEF-NEXT:  Addr: 0000000000000000  Offset: 0x000200  Link: 6 (.dynstr)
+# GNU-VERDEF-NEXT:   0x0000: Version: 1  File: dso.so.0  Cnt: 6
+# GNU-VERDEF-NEXT:   0x0010:   Name: base  Flags: BASE  Version: 0
+# GNU-VERDEF-NEXT:   0x0020:   Name: weak  Flags: WEAK  Version: 0
+# GNU-VERDEF-NEXT:   0x0030:   Name: info  Flags: INFO  Version: 0
+# GNU-VERDEF-NEXT:   0x0040:   Name: all  Flags: BASE | WEAK | INFO  Version: 0
+# GNU-VERDEF-NEXT:   0x0050:   Name: unknown  Flags: <unknown>  Version: 0
+# GNU-VERDEF-NEXT:   0x0060:   Name: all_and_unknown  Flags: BASE | WEAK | INFO | <unknown>  Version: 0
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+  Entry:           0x0000000000201000
+Sections:
+  - Name:            .gnu.version_r
+    Type:            SHT_GNU_verneed
+    Link:            .dynstr
+    Info:            0x0000000000000001
+    Dependencies:
+      - Version:         1
+        File:            dso.so.0
+        Entries:
+          - Name:            base
+            Hash:            0
+            Flags:           0x1
+            Other:           0
+          - Name:            weak
+            Hash:            0
+            Flags:           0x2
+            Other:           0
+          - Name:            info
+            Hash:            0
+            Flags:           0x4
+            Other:           0
+          - Name:            all
+            Hash:            0
+            Flags:           0x7
+            Other:           0
+          - Name:            unknown
+            Hash:            0
+            Flags:           0x8
+            Other:           0
+          - Name:            all_and_unknown
+            Hash:            0
+            Flags:           0xf
+            Other:           0
+## Needed to trigger .dynstr creation, which is required by .gnu.version_r
+DynamicSymbols:
+  - Name:    f1
+    Binding: STB_GLOBAL
diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
index 42c158693d8f3..cca5bb1b40989 100644
--- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test
+++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
@@ -173,6 +173,13 @@ DynamicSymbols:
 # GNU-NEXT:  Addr: 0000000000000000  Offset: 0x000280  Link: 7 (.dynsym)
 # GNU-NEXT:   000:   0 (*local*) 2 (VERSION1) 3 (VERSION2) 4 (v1)
 # GNU-NEXT:   004:   5 (v2)      6 (v3)
-
-# GNU: Dumper for .gnu.version_d is not implemented
-# GNU: Dumper for .gnu.version_r is not implemented
+# GNU-EMPTY:
+# GNU-NEXT: Dumper for .gnu.version_d is not implemented
+# GNU-EMPTY:
+# GNU-NEXT: Version needs section '.gnu.version_r' contains 2 entries:
+# GNU-NEXT:  Addr: 0000000000000000  Offset: 0x0002cc  Link: 8 (.dynstr)
+# GNU-NEXT:   0x0000: Version: 1  File: verneed1.so.0  Cnt: 2
+# GNU-NEXT:   0x0010:   Name: v1  Flags: none  Version: 4
+# GNU-NEXT:   0x0020:   Name: v2  Flags: none  Version: 5
+# GNU-NEXT:   0x0030: Version: 1  File: verneed2.so.0  Cnt: 1
+# GNU-NEXT:   0x0040:   Name: v3  Flags: none  Version: 6
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 047b463582c9b..fdef4b79ebf36 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3424,22 +3424,30 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
 }
 
 template <class ELFT>
-void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
-                                               const Elf_Shdr *Sec) {
-  if (!Sec)
-    return;
-
+static void printGNUVersionSectionProlog(formatted_raw_ostream &OS,
+                                         const Twine &Name, unsigned EntriesNum,
+                                         const ELFFile<ELFT> *Obj,
+                                         const typename ELFT::Shdr *Sec) {
   StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
-  uint64_t Entries = Sec->sh_size / sizeof(Elf_Versym);
-
-  OS << "Version symbols section '" << SecName << "' "
-     << "contains " << Entries << " entries:\n";
+  OS << Name << " section '" << SecName << "' "
+     << "contains " << EntriesNum << " entries:\n";
 
-  const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+  const typename ELFT::Shdr *SymTab =
+      unwrapOrError(Obj->getSection(Sec->sh_link));
   StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab));
   OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
      << "  Offset: " << format_hex(Sec->sh_offset, 8)
      << "  Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+                                               const Elf_Shdr *Sec) {
+  if (!Sec)
+    return;
+
+  unsigned Entries = Sec->sh_size / sizeof(Elf_Versym);
+  printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec);
 
   const uint8_t *VersymBuf =
       reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
@@ -3491,6 +3499,28 @@ void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
 
   StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
   OS << "Dumper for " << SecName << " is not implemented\n";
+  OS << '\n';
+}
+
+static std::string verNeedFlagToString(unsigned Flags) {
+  if (Flags == 0)
+    return "none";
+
+  std::string Ret;
+  auto AddFlag = [&Ret, &Flags](unsigned Flag, StringRef Name) {
+    if (!(Flags & Flag))
+      return;
+    if (!Ret.empty())
+      Ret += " | ";
+    Ret += Name;
+    Flags &= ~Flag;
+  };
+
+  AddFlag(VER_FLG_BASE, "BASE");
+  AddFlag(VER_FLG_WEAK, "WEAK");
+  AddFlag(VER_FLG_INFO, "INFO");
+  AddFlag(~0, "<unknown>");
+  return Ret;
 }
 
 template <class ELFT>
@@ -3499,8 +3529,42 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
   if (!Sec)
     return;
 
-  StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
-  OS << "Dumper for " << SecName << " is not implemented\n";
+  unsigned VerneedNum = Sec->sh_info;
+  printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec);
+
+  ArrayRef<uint8_t> SecData = unwrapOrError(Obj->getSectionContents(Sec));
+
+  const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+  StringRef StringTable = {
+      reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+      StrTabSec->sh_size};
+
+  const uint8_t *VerneedBuf = SecData.data();
+  for (unsigned I = 0; I < VerneedNum; ++I) {
+    const Elf_Verneed *Verneed =
+        reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+
+    OS << format("  0x%04x: Version: %u  File: %s  Cnt: %u\n",
+                 reinterpret_cast<const uint8_t *>(Verneed) - SecData.begin(),
+                 (unsigned)Verneed->vn_version,
+                 StringTable.drop_front(Verneed->vn_file).data(),
+                 (unsigned)Verneed->vn_cnt);
+
+    const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+    for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+      const Elf_Vernaux *Vernaux =
+          reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+
+      OS << format("  0x%04x:   Name: %s  Flags: %s  Version: %u\n",
+                   reinterpret_cast<const uint8_t *>(Vernaux) - SecData.begin(),
+                   StringTable.drop_front(Vernaux->vna_name).data(),
+                   verNeedFlagToString(Vernaux->vna_flags).c_str(),
+                   (unsigned)Vernaux->vna_other);
+      VernauxBuf += Vernaux->vna_next;
+    }
+    VerneedBuf += Verneed->vn_next;
+  }
+  OS << '\n';
 }
 
 // Hash histogram shows  statistics of how efficient the hash was for the

From 32aac1727a2ab3031ad0878330059f94cca7b5dc Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 10:25:20 +0000
Subject: [PATCH 0603/1176] [X86][SSE] Improve bool vector extload (PR26091)

We already have good codegen for (vXiY *ext(vXi1 bitcast(iX))) cases, this patch uses it for loads of vXi1 types as well - changing the load into a iX integer load, and bitcasting so that combineToExtendBoolVectorInReg can then use it.

Differential Revision: https://reviews.llvm.org/D62449

llvm-svn: 362081
---
 llvm/lib/Target/X86/X86ISelLowering.cpp    |   15 +
 llvm/test/CodeGen/X86/vector-sext-widen.ll | 2959 ++------------------
 llvm/test/CodeGen/X86/vector-sext.ll       | 2959 ++------------------
 3 files changed, 485 insertions(+), 5448 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d756be24d67fa..4a6af21f35ad0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38923,6 +38923,21 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
     return DCI.CombineTo(N, NewVec, TF, true);
   }
 
+  // Bool vector load - attempt to cast to an integer, as we have good
+  // (vXiY *ext(vXi1 bitcast(iX))) handling.
+  if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() &&
+      RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) {
+    unsigned NumElts = RegVT.getVectorNumElements();
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+    if (TLI.isTypeLegal(IntVT)) {
+      SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
+                                    Ld->getPointerInfo(), Alignment,
+                                    Ld->getMemOperand()->getFlags());
+      SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
+      return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true);
+    }
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/vector-sext-widen.ll b/llvm/test/CodeGen/X86/vector-sext-widen.ll
index c4a2286b8eac6..8fbab65a21560 100644
--- a/llvm/test/CodeGen/X86/vector-sext-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-sext-widen.ll
@@ -2026,193 +2026,32 @@ define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
 }
 
 define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_8i1_to_8i16:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movsbq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shrq $7, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $57, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $58, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $59, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $60, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $61, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $62, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    shlq $63, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_8i1_to_8i16:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movsbq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shrq $7, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $57, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $58, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $59, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $60, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $61, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $62, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    shlq $63, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_8i1_to_8i16:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movsbq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
-; SSE41-NEXT:    shrl $7, %eax
-; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_8i1_to_8i16:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_8i1_to_8i16:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movsbq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $60, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $7, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_8i1_to_8i16:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movsbq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $7, %eax
-; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_8i1_to_8i16:
@@ -2232,84 +2071,16 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movsbl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $25, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $26, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $27, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm0
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movsbl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm0
-; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
-; X32-SSE41-NEXT:    shrl $7, %eax
-; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm0
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <8 x i1>, <8 x i1>* %ptr
  %Y = sext <8 x i1> %X to <8 x i16>
@@ -2466,322 +2237,81 @@ entry:
 }
 
 define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_8i1_to_8i32:
+; SSE-LABEL: load_sext_8i1_to_8i32:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_8i1_to_8i32:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_8i1_to_8i32:
+; AVX2:       # %bb.0: # %entry
+; AVX2-NEXT:    vpbroadcastd (%rdi), %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_sext_8i1_to_8i32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    kmovw (%rdi), %k1
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i32:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzbl (%rdi), %eax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $7, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $6, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $5, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $4, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $2, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shrl %eax
-; SSE2-NEXT:    andl $1, %eax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    pslld $31, %xmm0
-; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psrad $24, %xmm1
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: load_sext_8i1_to_8i32:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzbl (%rdi), %eax
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $7, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $6, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $5, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $4, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $2, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shrl %eax
-; SSSE3-NEXT:    andl $1, %eax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    pslld $31, %xmm0
-; SSSE3-NEXT:    psrad $31, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    pslld $31, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_8i1_to_8i32:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzbl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    movl %eax, %edx
-; SSE41-NEXT:    andl $1, %edx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $2, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $3, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $4, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $5, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $6, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
-; SSE41-NEXT:    shrl $7, %eax
-; SSE41-NEXT:    pinsrw $7, %eax, %xmm1
-; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT:    pslld $31, %xmm0
-; SSE41-NEXT:    psrad $31, %xmm0
-; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE41-NEXT:    pslld $31, %xmm1
-; SSE41-NEXT:    psrad $31, %xmm1
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: load_sext_8i1_to_8i32:
-; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movsbq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $59, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $7, %rcx
-; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm1
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rax
-; AVX1-NEXT:    sarq $63, %rax
-; AVX1-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_sext_8i1_to_8i32:
-; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movsbq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $59, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $7, %rcx
-; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm1
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rax
-; AVX2-NEXT:    sarq $63, %rax
-; AVX2-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: load_sext_8i1_to_8i32:
-; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    kmovw (%rdi), %k1
-; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512-NEXT:    retq
-;
-; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movzbl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $6, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $5, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $4, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $3, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $2, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    shrl %eax
-; X32-SSE2-NEXT:    andl $1, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT:    pslld $31, %xmm0
-; X32-SSE2-NEXT:    psrad $31, %xmm0
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    pslld $31, %xmm1
-; X32-SSE2-NEXT:    psrad $31, %xmm1
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movzbl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    andl $1, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $2, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $3, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $4, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $5, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $6, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $7, %eax
-; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm1
-; X32-SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X32-SSE41-NEXT:    pslld $31, %xmm0
-; X32-SSE41-NEXT:    psrad $31, %xmm0
-; X32-SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE41-NEXT:    pslld $31, %xmm1
-; X32-SSE41-NEXT:    psrad $31, %xmm1
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <8 x i1>, <8 x i1>* %ptr
- %Y = sext <8 x i1> %X to <8 x i32>
- ret <8 x i32> %Y
-}
-
-define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_8i8_to_8i32:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrad $24, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_8i8_to_8i32:
+; SSSE3-LABEL: load_sext_8i8_to_8i32:
 ; SSSE3:       # %bb.0: # %entry
 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2840,388 +2370,50 @@ entry:
 define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ; SSE2-LABEL: load_sext_16i1_to_16i8:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %r15
-; SSE2-NEXT:    pushq %r14
-; SSE2-NEXT:    pushq %r13
-; SSE2-NEXT:    pushq %r12
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movswq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %r8
-; SSE2-NEXT:    movq %rax, %r9
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movq %rax, %r11
-; SSE2-NEXT:    movq %rax, %r14
-; SSE2-NEXT:    movq %rax, %r15
-; SSE2-NEXT:    movq %rax, %r12
-; SSE2-NEXT:    movq %rax, %r13
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    movq %rax, %rsi
-; SSE2-NEXT:    movq %rax, %rdi
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    shrq $15, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm0
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    shlq $49, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm1
-; SSE2-NEXT:    shlq $50, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm2
-; SSE2-NEXT:    shlq $51, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm3
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    shlq $61, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    shlq $62, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm5
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    shlq $63, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSE2-NEXT:    shlq $58, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT:    shlq $59, %rdi
-; SSE2-NEXT:    sarq $63, %rdi
-; SSE2-NEXT:    movd %edi, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT:    shlq $57, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm2
-; SSE2-NEXT:    shrq $7, %rax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r12
-; SSE2-NEXT:    popq %r13
-; SSE2-NEXT:    popq %r14
-; SSE2-NEXT:    popq %r15
-; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_sext_16i1_to_16i8:
 ; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %r15
-; SSSE3-NEXT:    pushq %r14
-; SSSE3-NEXT:    pushq %r13
-; SSSE3-NEXT:    pushq %r12
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movswq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %r8
-; SSSE3-NEXT:    movq %rax, %r9
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movq %rax, %r11
-; SSSE3-NEXT:    movq %rax, %r14
-; SSSE3-NEXT:    movq %rax, %r15
-; SSSE3-NEXT:    movq %rax, %r12
-; SSSE3-NEXT:    movq %rax, %r13
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    movq %rax, %rsi
-; SSSE3-NEXT:    movq %rax, %rdi
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    shrq $15, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm0
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    movsbq %al, %rax
-; SSSE3-NEXT:    shlq $49, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm1
-; SSSE3-NEXT:    shlq $50, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm2
-; SSSE3-NEXT:    shlq $51, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm3
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    shlq $61, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    shlq $62, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm5
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSSE3-NEXT:    shlq $63, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSSE3-NEXT:    shlq $58, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT:    shlq $59, %rdi
-; SSSE3-NEXT:    sarq $63, %rdi
-; SSSE3-NEXT:    movd %edi, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT:    shlq $57, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm2
-; SSSE3-NEXT:    shrq $7, %rax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %r12
-; SSSE3-NEXT:    popq %r13
-; SSSE3-NEXT:    popq %r14
-; SSSE3-NEXT:    popq %r15
-; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_sext_16i1_to_16i8:
 ; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movswq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_16i1_to_16i8:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movswq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $60, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movsbq %al, %rcx
-; AVX1-NEXT:    shrl $7, %ecx
-; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $55, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $54, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $53, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $52, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $51, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $50, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $49, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $15, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
+; AVX1-NEXT:    # xmm1 = mem[0,0]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_16i1_to_16i8:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movswq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movsbq %al, %rcx
-; AVX2-NEXT:    shrl $7, %ecx
-; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $55, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $54, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $53, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $52, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $51, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $50, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $49, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $15, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_16i1_to_16i8:
@@ -3242,160 +2434,24 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ;
 ; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    pushl %ebx
-; X32-SSE2-NEXT:    pushl %edi
-; X32-SSE2-NEXT:    pushl %esi
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movswl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shrl $15, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shll $17, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    shll $18, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm1
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    shll $19, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $20, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    shll $21, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm6
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    shll $22, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shll $23, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm5
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $28, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movsbl %al, %eax
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
-; X32-SSE2-NEXT:    shll $30, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
-; X32-SSE2-NEXT:    shll $31, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $26, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-SSE2-NEXT:    shll $27, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $25, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm1
-; X32-SSE2-NEXT:    shrl $7, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X32-SSE2-NEXT:    popl %esi
-; X32-SSE2-NEXT:    popl %edi
-; X32-SSE2-NEXT:    popl %ebx
-; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE2-NEXT:    pand %xmm1, %xmm0
+; X32-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movswl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm0
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; X32-SSE41-NEXT:    movsbl %al, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $23, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $22, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $21, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $20, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $19, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $18, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $17, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; X32-SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; X32-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE41-NEXT:    pand %xmm1, %xmm0
+; X32-SSE41-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <16 x i1>, <16 x i1>* %ptr
@@ -3404,442 +2460,43 @@ entry:
 }
 
 define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_16i1_to_16i16:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzwl (%rdi), %eax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $15, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $14, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $13, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $12, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $11, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $10, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $9, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $7, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $6, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $5, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $4, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $2, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shrl %eax
-; SSE2-NEXT:    andl $1, %eax
-; SSE2-NEXT:    movd %eax, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    psraw $15, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    psllw $15, %xmm1
-; SSE2-NEXT:    psraw $15, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_16i1_to_16i16:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzwl (%rdi), %eax
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $15, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $14, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $13, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $12, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $11, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $10, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $9, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $8, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $7, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $6, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $5, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $4, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $2, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shrl %eax
-; SSSE3-NEXT:    andl $1, %eax
-; SSSE3-NEXT:    movd %eax, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    psllw $15, %xmm0
-; SSSE3-NEXT:    psraw $15, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    psllw $15, %xmm1
-; SSSE3-NEXT:    psraw $15, %xmm1
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_16i1_to_16i16:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzwl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    movl %eax, %edx
-; SSE41-NEXT:    andl $1, %edx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $2, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $3, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $4, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $5, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $6, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $8, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $9, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $10, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $11, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $12, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $13, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $14, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    psraw $15, %xmm0
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    psllw $15, %xmm1
-; SSE41-NEXT:    psraw $15, %xmm1
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_16i1_to_16i16:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_16i1_to_16i16:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    .cfi_def_cfa_offset 16
-; AVX1-NEXT:    pushq %r15
-; AVX1-NEXT:    .cfi_def_cfa_offset 24
-; AVX1-NEXT:    pushq %r14
-; AVX1-NEXT:    .cfi_def_cfa_offset 32
-; AVX1-NEXT:    pushq %r13
-; AVX1-NEXT:    .cfi_def_cfa_offset 40
-; AVX1-NEXT:    pushq %r12
-; AVX1-NEXT:    .cfi_def_cfa_offset 48
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    .cfi_def_cfa_offset 56
-; AVX1-NEXT:    .cfi_offset %rbx, -56
-; AVX1-NEXT:    .cfi_offset %r12, -48
-; AVX1-NEXT:    .cfi_offset %r13, -40
-; AVX1-NEXT:    .cfi_offset %r14, -32
-; AVX1-NEXT:    .cfi_offset %r15, -24
-; AVX1-NEXT:    .cfi_offset %rbp, -16
-; AVX1-NEXT:    movswq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $55, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vmovd %ecx, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    movq %rax, %r11
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    movl %eax, %r9d
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    movsbq %al, %rbp
-; AVX1-NEXT:    shlq $54, %rax
-; AVX1-NEXT:    sarq $63, %rax
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $53, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $52, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $51, %r11
-; AVX1-NEXT:    sarq $63, %r11
-; AVX1-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $50, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $49, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $15, %r9d
-; AVX1-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $63, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vmovd %r13d, %xmm1
-; AVX1-NEXT:    shlq $62, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $61, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $58, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $57, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $7, %ebp
-; AVX1-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    .cfi_def_cfa_offset 48
-; AVX1-NEXT:    popq %r12
-; AVX1-NEXT:    .cfi_def_cfa_offset 40
-; AVX1-NEXT:    popq %r13
-; AVX1-NEXT:    .cfi_def_cfa_offset 32
-; AVX1-NEXT:    popq %r14
-; AVX1-NEXT:    .cfi_def_cfa_offset 24
-; AVX1-NEXT:    popq %r15
-; AVX1-NEXT:    .cfi_def_cfa_offset 16
-; AVX1-NEXT:    popq %rbp
-; AVX1-NEXT:    .cfi_def_cfa_offset 8
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_16i1_to_16i16:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    .cfi_def_cfa_offset 16
-; AVX2-NEXT:    pushq %r15
-; AVX2-NEXT:    .cfi_def_cfa_offset 24
-; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    .cfi_def_cfa_offset 32
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    .cfi_def_cfa_offset 40
-; AVX2-NEXT:    pushq %r12
-; AVX2-NEXT:    .cfi_def_cfa_offset 48
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    .cfi_def_cfa_offset 56
-; AVX2-NEXT:    .cfi_offset %rbx, -56
-; AVX2-NEXT:    .cfi_offset %r12, -48
-; AVX2-NEXT:    .cfi_offset %r13, -40
-; AVX2-NEXT:    .cfi_offset %r14, -32
-; AVX2-NEXT:    .cfi_offset %r15, -24
-; AVX2-NEXT:    .cfi_offset %rbp, -16
-; AVX2-NEXT:    movswq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $55, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vmovd %ecx, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    movq %rax, %r11
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    movl %eax, %r9d
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    movsbq %al, %rbp
-; AVX2-NEXT:    shlq $54, %rax
-; AVX2-NEXT:    sarq $63, %rax
-; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $53, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $52, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $51, %r11
-; AVX2-NEXT:    sarq $63, %r11
-; AVX2-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $50, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $49, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $15, %r9d
-; AVX2-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $63, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vmovd %r13d, %xmm1
-; AVX2-NEXT:    shlq $62, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $61, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $58, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $57, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $7, %ebp
-; AVX2-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    .cfi_def_cfa_offset 48
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    .cfi_def_cfa_offset 40
-; AVX2-NEXT:    popq %r13
-; AVX2-NEXT:    .cfi_def_cfa_offset 32
-; AVX2-NEXT:    popq %r14
-; AVX2-NEXT:    .cfi_def_cfa_offset 24
-; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    .cfi_def_cfa_offset 16
-; AVX2-NEXT:    popq %rbp
-; AVX2-NEXT:    .cfi_def_cfa_offset 8
+; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_16i1_to_16i16:
@@ -3856,165 +2513,20 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movzwl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $15, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $14, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $13, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $12, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $11, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $10, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $9, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $8, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $6, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $5, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $4, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $3, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $2, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    shrl %eax
-; X32-SSE2-NEXT:    andl $1, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm4
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE2-NEXT:    psllw $15, %xmm0
-; X32-SSE2-NEXT:    psraw $15, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    psllw $15, %xmm1
-; X32-SSE2-NEXT:    psraw $15, %xmm1
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movzwl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    andl $1, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $2, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $3, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $4, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $5, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $6, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $8, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $9, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $10, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $11, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $12, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $13, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $14, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; X32-SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; X32-SSE41-NEXT:    psllw $15, %xmm0
-; X32-SSE41-NEXT:    psraw $15, %xmm0
-; X32-SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; X32-SSE41-NEXT:    psllw $15, %xmm1
-; X32-SSE41-NEXT:    psraw $15, %xmm1
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqw %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <16 x i1>, <16 x i1>* %ptr
  %Y = sext <16 x i1> %X to <16 x i16>
@@ -4022,762 +2534,49 @@ entry:
 }
 
 define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
-; SSE2-LABEL: load_sext_32i1_to_32i8:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %r15
-; SSE2-NEXT:    pushq %r14
-; SSE2-NEXT:    pushq %r13
-; SSE2-NEXT:    pushq %r12
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movswq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movq %rax, %r8
-; SSE2-NEXT:    movq %rax, %r9
-; SSE2-NEXT:    movq %rax, %r11
-; SSE2-NEXT:    movq %rax, %r14
-; SSE2-NEXT:    movq %rax, %r15
-; SSE2-NEXT:    movq %rax, %r12
-; SSE2-NEXT:    movq %rax, %r13
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    movq %rax, %rsi
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    shrq $15, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm0
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    shlq $49, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm15
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSE2-NEXT:    shlq $50, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm8
-; SSE2-NEXT:    shlq $51, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm3
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm9
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm6
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm10
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm2
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm11
-; SSE2-NEXT:    shlq $61, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm5
-; SSE2-NEXT:    shlq $62, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm12
-; SSE2-NEXT:    shlq $63, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shlq $58, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm13
-; SSE2-NEXT:    shlq $59, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm7
-; SSE2-NEXT:    shlq $57, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm4
-; SSE2-NEXT:    shrq $7, %rax
-; SSE2-NEXT:    movd %eax, %xmm14
-; SSE2-NEXT:    movswq 2(%rdi), %rsi
-; SSE2-NEXT:    movq %rsi, %r8
-; SSE2-NEXT:    movq %rsi, %r9
-; SSE2-NEXT:    movq %rsi, %r10
-; SSE2-NEXT:    movq %rsi, %r11
-; SSE2-NEXT:    movq %rsi, %r14
-; SSE2-NEXT:    movq %rsi, %r15
-; SSE2-NEXT:    movq %rsi, %r12
-; SSE2-NEXT:    movq %rsi, %r13
-; SSE2-NEXT:    movq %rsi, %rbx
-; SSE2-NEXT:    movq %rsi, %rax
-; SSE2-NEXT:    movq %rsi, %rcx
-; SSE2-NEXT:    movq %rsi, %rdx
-; SSE2-NEXT:    movq %rsi, %rdi
-; SSE2-NEXT:    movq %rsi, %rbp
-; SSE2-NEXT:    shrq $15, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm1
-; SSE2-NEXT:    movq %rsi, %rbp
-; SSE2-NEXT:    movsbq %sil, %rsi
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
-; SSE2-NEXT:    shlq $49, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
-; SSE2-NEXT:    shlq $50, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm4
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
-; SSE2-NEXT:    shlq $51, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm5
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT:    shlq $61, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT:    shlq $62, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm6
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; SSE2-NEXT:    shlq $63, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    shlq $58, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE2-NEXT:    shlq $59, %rdi
-; SSE2-NEXT:    sarq $63, %rdi
-; SSE2-NEXT:    movd %edi, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    shlq $57, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm2
-; SSE2-NEXT:    shrq $7, %rsi
-; SSE2-NEXT:    movd %esi, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r12
-; SSE2-NEXT:    popq %r13
-; SSE2-NEXT:    popq %r14
-; SSE2-NEXT:    popq %r15
-; SSE2-NEXT:    popq %rbp
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_32i1_to_32i8:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %r15
-; SSSE3-NEXT:    pushq %r14
-; SSSE3-NEXT:    pushq %r13
-; SSSE3-NEXT:    pushq %r12
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movswq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movq %rax, %r8
-; SSSE3-NEXT:    movq %rax, %r9
-; SSSE3-NEXT:    movq %rax, %r11
-; SSSE3-NEXT:    movq %rax, %r14
-; SSSE3-NEXT:    movq %rax, %r15
-; SSSE3-NEXT:    movq %rax, %r12
-; SSSE3-NEXT:    movq %rax, %r13
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    movq %rax, %rsi
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    shrq $15, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    shlq $49, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm15
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movsbq %al, %rax
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSSE3-NEXT:    shlq $50, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm8
-; SSSE3-NEXT:    shlq $51, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm3
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm9
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm6
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm10
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm2
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm11
-; SSSE3-NEXT:    shlq $61, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm5
-; SSSE3-NEXT:    shlq $62, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm12
-; SSSE3-NEXT:    shlq $63, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shlq $58, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm13
-; SSSE3-NEXT:    shlq $59, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm7
-; SSSE3-NEXT:    shlq $57, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm4
-; SSSE3-NEXT:    shrq $7, %rax
-; SSSE3-NEXT:    movd %eax, %xmm14
-; SSSE3-NEXT:    movswq 2(%rdi), %rsi
-; SSSE3-NEXT:    movq %rsi, %r8
-; SSSE3-NEXT:    movq %rsi, %r9
-; SSSE3-NEXT:    movq %rsi, %r10
-; SSSE3-NEXT:    movq %rsi, %r11
-; SSSE3-NEXT:    movq %rsi, %r14
-; SSSE3-NEXT:    movq %rsi, %r15
-; SSSE3-NEXT:    movq %rsi, %r12
-; SSSE3-NEXT:    movq %rsi, %r13
-; SSSE3-NEXT:    movq %rsi, %rbx
-; SSSE3-NEXT:    movq %rsi, %rax
-; SSSE3-NEXT:    movq %rsi, %rcx
-; SSSE3-NEXT:    movq %rsi, %rdx
-; SSSE3-NEXT:    movq %rsi, %rdi
-; SSSE3-NEXT:    movq %rsi, %rbp
-; SSSE3-NEXT:    shrq $15, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm1
-; SSSE3-NEXT:    movq %rsi, %rbp
-; SSSE3-NEXT:    movsbq %sil, %rsi
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
-; SSSE3-NEXT:    shlq $49, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
-; SSSE3-NEXT:    shlq $50, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm4
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
-; SSSE3-NEXT:    shlq $51, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm5
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT:    shlq $61, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT:    shlq $62, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm6
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; SSSE3-NEXT:    shlq $63, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    shlq $58, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSSE3-NEXT:    shlq $59, %rdi
-; SSSE3-NEXT:    sarq $63, %rdi
-; SSSE3-NEXT:    movd %edi, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    shlq $57, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm2
-; SSSE3-NEXT:    shrq $7, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm5
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %r12
-; SSSE3-NEXT:    popq %r13
-; SSSE3-NEXT:    popq %r14
-; SSSE3-NEXT:    popq %r15
-; SSSE3-NEXT:    popq %rbp
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_32i1_to_32i8:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movswq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
-; SSE41-NEXT:    movswq 2(%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_32i1_to_32i8:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_32i1_to_32i8:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    pushq %r15
-; AVX1-NEXT:    pushq %r14
-; AVX1-NEXT:    pushq %r13
-; AVX1-NEXT:    pushq %r12
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    movslq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $47, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vmovd %ecx, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    movq %rax, %r11
-; AVX1-NEXT:    movq %rax, %r9
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $46, %rbp
-; AVX1-NEXT:    sarq $63, %rbp
-; AVX1-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $45, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    shlq $44, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $43, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $42, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    shlq $41, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    shlq $40, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    shlq $39, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    shlq $38, %r11
-; AVX1-NEXT:    sarq $63, %r11
-; AVX1-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    movsbq %al, %r11
-; AVX1-NEXT:    shlq $37, %r9
-; AVX1-NEXT:    sarq $63, %r9
-; AVX1-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r9
-; AVX1-NEXT:    shlq $36, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    shlq $35, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    shlq $34, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    shlq $33, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    shrq $31, %rbp
-; AVX1-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm1
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movswq %ax, %rax
-; AVX1-NEXT:    shlq $62, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $59, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $58, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $57, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $7, %r11d
-; AVX1-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $55, %r9
-; AVX1-NEXT:    sarq $63, %r9
-; AVX1-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $54, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $53, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $52, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $51, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $50, %rbp
-; AVX1-NEXT:    sarq $63, %rbp
-; AVX1-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $49, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $15, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    popq %r12
-; AVX1-NEXT:    popq %r13
-; AVX1-NEXT:    popq %r14
-; AVX1-NEXT:    popq %r15
-; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_32i1_to_32i8:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    pushq %r15
-; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    pushq %r12
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movslq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $47, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vmovd %ecx, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    movq %rax, %r11
-; AVX2-NEXT:    movq %rax, %r9
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $46, %rbp
-; AVX2-NEXT:    sarq $63, %rbp
-; AVX2-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $45, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    shlq $44, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $43, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $42, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    shlq $41, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    shlq $40, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    shlq $39, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    shlq $38, %r11
-; AVX2-NEXT:    sarq $63, %r11
-; AVX2-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    movsbq %al, %r11
-; AVX2-NEXT:    shlq $37, %r9
-; AVX2-NEXT:    sarq $63, %r9
-; AVX2-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r9
-; AVX2-NEXT:    shlq $36, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    shlq $35, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    shlq $34, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    shlq $33, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    shrq $31, %rbp
-; AVX2-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm1
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movswq %ax, %rax
-; AVX2-NEXT:    shlq $62, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $59, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $58, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $57, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $7, %r11d
-; AVX2-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $55, %r9
-; AVX2-NEXT:    sarq $63, %r9
-; AVX2-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $54, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $53, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $52, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $51, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $50, %rbp
-; AVX2-NEXT:    sarq $63, %rbp
-; AVX2-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $49, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $15, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    popq %r13
-; AVX2-NEXT:    popq %r14
-; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_32i1_to_32i8:
@@ -4798,309 +2597,21 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    pushl %ebx
-; X32-SSE2-NEXT:    pushl %edi
-; X32-SSE2-NEXT:    pushl %esi
-; X32-SSE2-NEXT:    subl $28, %esp
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movswl (%eax), %edx
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shrl $15, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shll $17, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm4
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    shll $18, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm1
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    shll $19, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm2
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    shll $20, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm5
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    shll $21, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm6
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shll $22, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    shll $23, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm3
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; X32-SSE2-NEXT:    shll $28, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm0
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; X32-SSE2-NEXT:    shll $29, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm1
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    movsbl %dl, %edx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    shll $31, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $26, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm7
-; X32-SSE2-NEXT:    shll $27, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm2
-; X32-SSE2-NEXT:    shll $25, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm6
-; X32-SSE2-NEXT:    shrl $7, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm5
-; X32-SSE2-NEXT:    movswl 2(%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shrl $15, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm4
-; X32-SSE2-NEXT:    movdqu %xmm4, (%esp) # 16-byte Spill
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X32-SSE2-NEXT:    shll $17, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm4
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $18, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; X32-SSE2-NEXT:    shll $19, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm5
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; X32-SSE2-NEXT:    shll $20, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm6
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT:    shll $21, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm1
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; X32-SSE2-NEXT:    shll $22, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm3
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    movdqu (%esp), %xmm2 # 16-byte Reload
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $23, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $28, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movsbl %al, %eax
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X32-SSE2-NEXT:    shll $30, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
-; X32-SSE2-NEXT:    shll $31, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $26, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm5
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; X32-SSE2-NEXT:    shll $27, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; X32-SSE2-NEXT:    shll $25, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm4
-; X32-SSE2-NEXT:    shrl $7, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm5
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X32-SSE2-NEXT:    addl $28, %esp
-; X32-SSE2-NEXT:    popl %esi
-; X32-SSE2-NEXT:    popl %edi
-; X32-SSE2-NEXT:    popl %ebx
-; X32-SSE2-NEXT:    popl %ebp
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    pushl %esi
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movswl (%eax), %ecx
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $30, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movl %ecx, %esi
-; X32-SSE41-NEXT:    shll $31, %esi
-; X32-SSE41-NEXT:    sarl $31, %esi
-; X32-SSE41-NEXT:    movd %esi, %xmm0
-; X32-SSE41-NEXT:    pinsrb $1, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $29, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $2, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $28, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $3, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $27, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $4, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $26, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $5, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $25, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $6, %edx, %xmm0
-; X32-SSE41-NEXT:    movsbl %cl, %edx
-; X32-SSE41-NEXT:    shrl $7, %edx
-; X32-SSE41-NEXT:    pinsrb $7, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $23, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $8, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $22, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $9, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $21, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $10, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $20, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $11, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $19, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $12, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $18, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $13, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $17, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $14, %edx, %xmm0
-; X32-SSE41-NEXT:    shrl $15, %ecx
-; X32-SSE41-NEXT:    pinsrb $15, %ecx, %xmm0
-; X32-SSE41-NEXT:    movswl 2(%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    movsbl %al, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $23, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $22, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $21, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $20, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $19, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $18, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $17, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; X32-SSE41-NEXT:    popl %esi
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqb %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <32 x i1>, <32 x i1>* %ptr
  %Y = sext <32 x i1> %X to <32 x i8>
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 50efdc10af6e1..2d51df117fafe 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -2026,193 +2026,32 @@ define <2 x i64> @load_sext_4i8_to_4i64_extract(<4 x i8> *%ptr) {
 }
 
 define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_8i1_to_8i16:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movsbq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shrq $7, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $57, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $58, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $59, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $60, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $61, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    shlq $62, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    shlq $63, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_8i1_to_8i16:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movsbq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shrq $7, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $57, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $58, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $59, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $60, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $61, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    shlq $62, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    shlq $63, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_8i1_to_8i16:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movsbq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
-; SSE41-NEXT:    shrl $7, %eax
-; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_8i1_to_8i16:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_8i1_to_8i16:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movsbq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $60, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $7, %eax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_8i1_to_8i16:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movsbq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $7, %eax
-; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_8i1_to_8i16:
@@ -2232,84 +2071,16 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movsbl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $25, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $26, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $27, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $28, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    shll $31, %eax
-; X32-SSE2-NEXT:    sarl $31, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm0
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movsbl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm0
-; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
-; X32-SSE41-NEXT:    shrl $7, %eax
-; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm0
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <8 x i1>, <8 x i1>* %ptr
  %Y = sext <8 x i1> %X to <8 x i16>
@@ -2466,322 +2237,81 @@ entry:
 }
 
 define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_8i1_to_8i32:
+; SSE-LABEL: load_sext_8i1_to_8i32:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_8i1_to_8i32:
+; AVX1:       # %bb.0: # %entry
+; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_8i1_to_8i32:
+; AVX2:       # %bb.0: # %entry
+; AVX2-NEXT:    vpbroadcastd (%rdi), %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: load_sext_8i1_to_8i32:
+; AVX512:       # %bb.0: # %entry
+; AVX512-NEXT:    kmovw (%rdi), %k1
+; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i32:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzbl (%rdi), %eax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $7, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $6, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $5, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $4, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $2, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shrl %eax
-; SSE2-NEXT:    andl $1, %eax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    pslld $31, %xmm0
-; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psrad $24, %xmm1
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: load_sext_8i1_to_8i32:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzbl (%rdi), %eax
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $7, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $6, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $5, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $4, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $2, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shrl %eax
-; SSSE3-NEXT:    andl $1, %eax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    pslld $31, %xmm0
-; SSSE3-NEXT:    psrad $31, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    pslld $31, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_8i1_to_8i32:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzbl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    movl %eax, %edx
-; SSE41-NEXT:    andl $1, %edx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $2, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $3, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $4, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $5, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $6, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
-; SSE41-NEXT:    shrl $7, %eax
-; SSE41-NEXT:    pinsrw $7, %eax, %xmm1
-; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT:    pslld $31, %xmm0
-; SSE41-NEXT:    psrad $31, %xmm0
-; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE41-NEXT:    pslld $31, %xmm1
-; SSE41-NEXT:    psrad $31, %xmm1
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: load_sext_8i1_to_8i32:
-; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movsbq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $59, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shrq $7, %rcx
-; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm1
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rax
-; AVX1-NEXT:    sarq $63, %rax
-; AVX1-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_sext_8i1_to_8i32:
-; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movsbq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $59, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shrq $7, %rcx
-; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm1
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rax
-; AVX2-NEXT:    sarq $63, %rax
-; AVX2-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: load_sext_8i1_to_8i32:
-; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    kmovw (%rdi), %k1
-; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
-; AVX512-NEXT:    retq
-;
-; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movzbl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $6, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $5, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $4, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $3, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $2, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    shrl %eax
-; X32-SSE2-NEXT:    andl $1, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT:    pslld $31, %xmm0
-; X32-SSE2-NEXT:    psrad $31, %xmm0
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT:    pslld $31, %xmm1
-; X32-SSE2-NEXT:    psrad $31, %xmm1
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movzbl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    andl $1, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $2, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $3, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $4, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $5, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $6, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $7, %eax
-; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm1
-; X32-SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X32-SSE41-NEXT:    pslld $31, %xmm0
-; X32-SSE41-NEXT:    psrad $31, %xmm0
-; X32-SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE41-NEXT:    pslld $31, %xmm1
-; X32-SSE41-NEXT:    psrad $31, %xmm1
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <8 x i1>, <8 x i1>* %ptr
- %Y = sext <8 x i1> %X to <8 x i32>
- ret <8 x i32> %Y
-}
-
-define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_8i8_to_8i32:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psrad $24, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_8i8_to_8i32:
+; SSSE3-LABEL: load_sext_8i8_to_8i32:
 ; SSSE3:       # %bb.0: # %entry
 ; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -2840,388 +2370,50 @@ entry:
 define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ; SSE2-LABEL: load_sext_16i1_to_16i8:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %r15
-; SSE2-NEXT:    pushq %r14
-; SSE2-NEXT:    pushq %r13
-; SSE2-NEXT:    pushq %r12
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movswq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %r8
-; SSE2-NEXT:    movq %rax, %r9
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movq %rax, %r11
-; SSE2-NEXT:    movq %rax, %r14
-; SSE2-NEXT:    movq %rax, %r15
-; SSE2-NEXT:    movq %rax, %r12
-; SSE2-NEXT:    movq %rax, %r13
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    movq %rax, %rsi
-; SSE2-NEXT:    movq %rax, %rdi
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    shrq $15, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm0
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    shlq $49, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm1
-; SSE2-NEXT:    shlq $50, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm2
-; SSE2-NEXT:    shlq $51, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm3
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    shlq $61, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    shlq $62, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm5
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    shlq $63, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSE2-NEXT:    shlq $58, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT:    shlq $59, %rdi
-; SSE2-NEXT:    sarq $63, %rdi
-; SSE2-NEXT:    movd %edi, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT:    shlq $57, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm2
-; SSE2-NEXT:    shrq $7, %rax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r12
-; SSE2-NEXT:    popq %r13
-; SSE2-NEXT:    popq %r14
-; SSE2-NEXT:    popq %r15
-; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_sext_16i1_to_16i8:
 ; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %r15
-; SSSE3-NEXT:    pushq %r14
-; SSSE3-NEXT:    pushq %r13
-; SSSE3-NEXT:    pushq %r12
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movswq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %r8
-; SSSE3-NEXT:    movq %rax, %r9
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movq %rax, %r11
-; SSSE3-NEXT:    movq %rax, %r14
-; SSSE3-NEXT:    movq %rax, %r15
-; SSSE3-NEXT:    movq %rax, %r12
-; SSSE3-NEXT:    movq %rax, %r13
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    movq %rax, %rsi
-; SSSE3-NEXT:    movq %rax, %rdi
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    shrq $15, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm0
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    movsbq %al, %rax
-; SSSE3-NEXT:    shlq $49, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm1
-; SSSE3-NEXT:    shlq $50, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm2
-; SSSE3-NEXT:    shlq $51, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm3
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    shlq $61, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT:    shlq $62, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm5
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSSE3-NEXT:    shlq $63, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSSE3-NEXT:    shlq $58, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT:    shlq $59, %rdi
-; SSSE3-NEXT:    sarq $63, %rdi
-; SSSE3-NEXT:    movd %edi, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT:    shlq $57, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm2
-; SSSE3-NEXT:    shrq $7, %rax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %r12
-; SSSE3-NEXT:    popq %r13
-; SSSE3-NEXT:    popq %r14
-; SSSE3-NEXT:    popq %r15
-; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_sext_16i1_to_16i8:
 ; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movswq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqb %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_16i1_to_16i8:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    movswq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $62, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm0
-; AVX1-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $60, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $58, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $57, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movsbq %al, %rcx
-; AVX1-NEXT:    shrl $7, %ecx
-; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $55, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $54, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $53, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $52, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $51, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $50, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $49, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $15, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
+; AVX1-NEXT:    # xmm1 = mem[0,0]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_16i1_to_16i8:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    movswq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $62, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm0
-; AVX2-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $60, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $58, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $57, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movsbq %al, %rcx
-; AVX2-NEXT:    shrl $7, %ecx
-; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $55, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $54, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $53, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $52, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $51, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $50, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $49, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $15, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_16i1_to_16i8:
@@ -3242,160 +2434,24 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
 ;
 ; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    pushl %ebx
-; X32-SSE2-NEXT:    pushl %edi
-; X32-SSE2-NEXT:    pushl %esi
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movswl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shrl $15, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shll $17, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    shll $18, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm1
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    shll $19, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shll $20, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    shll $21, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm6
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    shll $22, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shll $23, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm5
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $28, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movsbl %al, %eax
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
-; X32-SSE2-NEXT:    shll $30, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
-; X32-SSE2-NEXT:    shll $31, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $26, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-SSE2-NEXT:    shll $27, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $25, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm1
-; X32-SSE2-NEXT:    shrl $7, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X32-SSE2-NEXT:    popl %esi
-; X32-SSE2-NEXT:    popl %edi
-; X32-SSE2-NEXT:    popl %ebx
-; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE2-NEXT:    pand %xmm1, %xmm0
+; X32-SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X32-SSE2-NEXT:    retl
 ;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movswl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm0
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; X32-SSE41-NEXT:    movsbl %al, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $23, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $22, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $21, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $20, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $19, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $18, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $17, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; X32-SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; X32-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE41-NEXT:    pand %xmm1, %xmm0
+; X32-SSE41-NEXT:    pcmpeqb %xmm1, %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
  %X = load <16 x i1>, <16 x i1>* %ptr
@@ -3404,442 +2460,43 @@ entry:
 }
 
 define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
-; SSE2-LABEL: load_sext_16i1_to_16i16:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    movzwl (%rdi), %eax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $15, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $14, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $13, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $12, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $11, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $10, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $9, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $8, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $7, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $6, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $5, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $4, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $3, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    shrl $2, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shrl %eax
-; SSE2-NEXT:    andl $1, %eax
-; SSE2-NEXT:    movd %eax, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    psraw $15, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    psllw $15, %xmm1
-; SSE2-NEXT:    psraw $15, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_16i1_to_16i16:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movzwl (%rdi), %eax
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $15, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $14, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $13, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $12, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $11, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $10, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $9, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $8, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $7, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $6, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $5, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $4, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $3, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    shrl $2, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    movl %eax, %ecx
-; SSSE3-NEXT:    andl $1, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shrl %eax
-; SSSE3-NEXT:    andl $1, %eax
-; SSSE3-NEXT:    movd %eax, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    psllw $15, %xmm0
-; SSSE3-NEXT:    psraw $15, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    psllw $15, %xmm1
-; SSSE3-NEXT:    psraw $15, %xmm1
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_16i1_to_16i16:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movzwl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    movl %eax, %edx
-; SSE41-NEXT:    andl $1, %edx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $2, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $3, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $4, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $5, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $6, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $8, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $9, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $10, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $11, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $12, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $13, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $14, %ecx
-; SSE41-NEXT:    andl $1, %ecx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    psraw $15, %xmm0
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; SSE41-NEXT:    psllw $15, %xmm1
-; SSE41-NEXT:    psraw $15, %xmm1
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_16i1_to_16i16:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_16i1_to_16i16:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    .cfi_def_cfa_offset 16
-; AVX1-NEXT:    pushq %r15
-; AVX1-NEXT:    .cfi_def_cfa_offset 24
-; AVX1-NEXT:    pushq %r14
-; AVX1-NEXT:    .cfi_def_cfa_offset 32
-; AVX1-NEXT:    pushq %r13
-; AVX1-NEXT:    .cfi_def_cfa_offset 40
-; AVX1-NEXT:    pushq %r12
-; AVX1-NEXT:    .cfi_def_cfa_offset 48
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    .cfi_def_cfa_offset 56
-; AVX1-NEXT:    .cfi_offset %rbx, -56
-; AVX1-NEXT:    .cfi_offset %r12, -48
-; AVX1-NEXT:    .cfi_offset %r13, -40
-; AVX1-NEXT:    .cfi_offset %r14, -32
-; AVX1-NEXT:    .cfi_offset %r15, -24
-; AVX1-NEXT:    .cfi_offset %rbp, -16
-; AVX1-NEXT:    movswq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $55, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vmovd %ecx, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    movq %rax, %r11
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    movl %eax, %r9d
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    movsbq %al, %rbp
-; AVX1-NEXT:    shlq $54, %rax
-; AVX1-NEXT:    sarq $63, %rax
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $53, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $52, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $51, %r11
-; AVX1-NEXT:    sarq $63, %r11
-; AVX1-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $50, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $49, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
-; AVX1-NEXT:    shrl $15, %r9d
-; AVX1-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
-; AVX1-NEXT:    shlq $63, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vmovd %r13d, %xmm1
-; AVX1-NEXT:    shlq $62, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $61, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $59, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $58, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $57, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $7, %ebp
-; AVX1-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    .cfi_def_cfa_offset 48
-; AVX1-NEXT:    popq %r12
-; AVX1-NEXT:    .cfi_def_cfa_offset 40
-; AVX1-NEXT:    popq %r13
-; AVX1-NEXT:    .cfi_def_cfa_offset 32
-; AVX1-NEXT:    popq %r14
-; AVX1-NEXT:    .cfi_def_cfa_offset 24
-; AVX1-NEXT:    popq %r15
-; AVX1-NEXT:    .cfi_def_cfa_offset 16
-; AVX1-NEXT:    popq %rbp
-; AVX1-NEXT:    .cfi_def_cfa_offset 8
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_16i1_to_16i16:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    .cfi_def_cfa_offset 16
-; AVX2-NEXT:    pushq %r15
-; AVX2-NEXT:    .cfi_def_cfa_offset 24
-; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    .cfi_def_cfa_offset 32
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    .cfi_def_cfa_offset 40
-; AVX2-NEXT:    pushq %r12
-; AVX2-NEXT:    .cfi_def_cfa_offset 48
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    .cfi_def_cfa_offset 56
-; AVX2-NEXT:    .cfi_offset %rbx, -56
-; AVX2-NEXT:    .cfi_offset %r12, -48
-; AVX2-NEXT:    .cfi_offset %r13, -40
-; AVX2-NEXT:    .cfi_offset %r14, -32
-; AVX2-NEXT:    .cfi_offset %r15, -24
-; AVX2-NEXT:    .cfi_offset %rbp, -16
-; AVX2-NEXT:    movswq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $55, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vmovd %ecx, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    movq %rax, %r11
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    movl %eax, %r9d
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    movsbq %al, %rbp
-; AVX2-NEXT:    shlq $54, %rax
-; AVX2-NEXT:    sarq $63, %rax
-; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $53, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $52, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $51, %r11
-; AVX2-NEXT:    sarq $63, %r11
-; AVX2-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $50, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $49, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
-; AVX2-NEXT:    shrl $15, %r9d
-; AVX2-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
-; AVX2-NEXT:    shlq $63, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vmovd %r13d, %xmm1
-; AVX2-NEXT:    shlq $62, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $61, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $59, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $58, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $57, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $7, %ebp
-; AVX2-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    .cfi_def_cfa_offset 48
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    .cfi_def_cfa_offset 40
-; AVX2-NEXT:    popq %r13
-; AVX2-NEXT:    .cfi_def_cfa_offset 32
-; AVX2-NEXT:    popq %r14
-; AVX2-NEXT:    .cfi_def_cfa_offset 24
-; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    .cfi_def_cfa_offset 16
-; AVX2-NEXT:    popq %rbp
-; AVX2-NEXT:    .cfi_def_cfa_offset 8
+; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_16i1_to_16i16:
@@ -3856,165 +2513,20 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movzwl (%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $15, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $14, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $13, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $12, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $11, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $10, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $9, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $8, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $7, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $6, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $5, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $4, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $3, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    shrl $2, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    andl $1, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    shrl %eax
-; X32-SSE2-NEXT:    andl $1, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm4
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE2-NEXT:    psllw $15, %xmm0
-; X32-SSE2-NEXT:    psraw $15, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    psllw $15, %xmm1
-; X32-SSE2-NEXT:    psraw $15, %xmm1
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movzwl (%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    andl $1, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $2, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $3, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $4, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $5, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $6, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $8, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $9, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $10, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $11, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $12, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $13, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shrl $14, %ecx
-; X32-SSE41-NEXT:    andl $1, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; X32-SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; X32-SSE41-NEXT:    psllw $15, %xmm0
-; X32-SSE41-NEXT:    psraw $15, %xmm0
-; X32-SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; X32-SSE41-NEXT:    psllw $15, %xmm1
-; X32-SSE41-NEXT:    psraw $15, %xmm1
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqw %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <16 x i1>, <16 x i1>* %ptr
  %Y = sext <16 x i1> %X to <16 x i16>
@@ -4022,762 +2534,49 @@ entry:
 }
 
 define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
-; SSE2-LABEL: load_sext_32i1_to_32i8:
-; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %r15
-; SSE2-NEXT:    pushq %r14
-; SSE2-NEXT:    pushq %r13
-; SSE2-NEXT:    pushq %r12
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movswq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movq %rax, %r8
-; SSE2-NEXT:    movq %rax, %r9
-; SSE2-NEXT:    movq %rax, %r11
-; SSE2-NEXT:    movq %rax, %r14
-; SSE2-NEXT:    movq %rax, %r15
-; SSE2-NEXT:    movq %rax, %r12
-; SSE2-NEXT:    movq %rax, %r13
-; SSE2-NEXT:    movq %rax, %rdx
-; SSE2-NEXT:    movq %rax, %rsi
-; SSE2-NEXT:    movq %rax, %rcx
-; SSE2-NEXT:    movq %rax, %rbp
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    shrq $15, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm0
-; SSE2-NEXT:    movq %rax, %rbx
-; SSE2-NEXT:    shlq $49, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm15
-; SSE2-NEXT:    movq %rax, %r10
-; SSE2-NEXT:    movsbq %al, %rax
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSE2-NEXT:    shlq $50, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm8
-; SSE2-NEXT:    shlq $51, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm3
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm9
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm6
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm10
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm2
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm11
-; SSE2-NEXT:    shlq $61, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm5
-; SSE2-NEXT:    shlq $62, %rsi
-; SSE2-NEXT:    sarq $63, %rsi
-; SSE2-NEXT:    movd %esi, %xmm12
-; SSE2-NEXT:    shlq $63, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    shlq $58, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm13
-; SSE2-NEXT:    shlq $59, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm7
-; SSE2-NEXT:    shlq $57, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm4
-; SSE2-NEXT:    shrq $7, %rax
-; SSE2-NEXT:    movd %eax, %xmm14
-; SSE2-NEXT:    movswq 2(%rdi), %rsi
-; SSE2-NEXT:    movq %rsi, %r8
-; SSE2-NEXT:    movq %rsi, %r9
-; SSE2-NEXT:    movq %rsi, %r10
-; SSE2-NEXT:    movq %rsi, %r11
-; SSE2-NEXT:    movq %rsi, %r14
-; SSE2-NEXT:    movq %rsi, %r15
-; SSE2-NEXT:    movq %rsi, %r12
-; SSE2-NEXT:    movq %rsi, %r13
-; SSE2-NEXT:    movq %rsi, %rbx
-; SSE2-NEXT:    movq %rsi, %rax
-; SSE2-NEXT:    movq %rsi, %rcx
-; SSE2-NEXT:    movq %rsi, %rdx
-; SSE2-NEXT:    movq %rsi, %rdi
-; SSE2-NEXT:    movq %rsi, %rbp
-; SSE2-NEXT:    shrq $15, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm1
-; SSE2-NEXT:    movq %rsi, %rbp
-; SSE2-NEXT:    movsbq %sil, %rsi
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
-; SSE2-NEXT:    shlq $49, %r8
-; SSE2-NEXT:    sarq $63, %r8
-; SSE2-NEXT:    movd %r8d, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
-; SSE2-NEXT:    shlq $50, %r9
-; SSE2-NEXT:    sarq $63, %r9
-; SSE2-NEXT:    movd %r9d, %xmm4
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
-; SSE2-NEXT:    shlq $51, %r10
-; SSE2-NEXT:    sarq $63, %r10
-; SSE2-NEXT:    movd %r10d, %xmm5
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT:    shlq $52, %r11
-; SSE2-NEXT:    sarq $63, %r11
-; SSE2-NEXT:    movd %r11d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT:    shlq $53, %r14
-; SSE2-NEXT:    sarq $63, %r14
-; SSE2-NEXT:    movd %r14d, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSE2-NEXT:    shlq $54, %r15
-; SSE2-NEXT:    sarq $63, %r15
-; SSE2-NEXT:    movd %r15d, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSE2-NEXT:    shlq $55, %r12
-; SSE2-NEXT:    sarq $63, %r12
-; SSE2-NEXT:    movd %r12d, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    shlq $60, %r13
-; SSE2-NEXT:    sarq $63, %r13
-; SSE2-NEXT:    movd %r13d, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT:    shlq $61, %rbx
-; SSE2-NEXT:    sarq $63, %rbx
-; SSE2-NEXT:    movd %ebx, %xmm4
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT:    shlq $62, %rax
-; SSE2-NEXT:    sarq $63, %rax
-; SSE2-NEXT:    movd %eax, %xmm6
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; SSE2-NEXT:    shlq $63, %rcx
-; SSE2-NEXT:    sarq $63, %rcx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    shlq $58, %rdx
-; SSE2-NEXT:    sarq $63, %rdx
-; SSE2-NEXT:    movd %edx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE2-NEXT:    shlq $59, %rdi
-; SSE2-NEXT:    sarq $63, %rdi
-; SSE2-NEXT:    movd %edi, %xmm4
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT:    shlq $57, %rbp
-; SSE2-NEXT:    sarq $63, %rbp
-; SSE2-NEXT:    movd %ebp, %xmm2
-; SSE2-NEXT:    shrq $7, %rsi
-; SSE2-NEXT:    movd %esi, %xmm5
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r12
-; SSE2-NEXT:    popq %r13
-; SSE2-NEXT:    popq %r14
-; SSE2-NEXT:    popq %r15
-; SSE2-NEXT:    popq %rbp
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_32i1_to_32i8:
-; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %r15
-; SSSE3-NEXT:    pushq %r14
-; SSSE3-NEXT:    pushq %r13
-; SSSE3-NEXT:    pushq %r12
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movswq (%rdi), %rax
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movq %rax, %r8
-; SSSE3-NEXT:    movq %rax, %r9
-; SSSE3-NEXT:    movq %rax, %r11
-; SSSE3-NEXT:    movq %rax, %r14
-; SSSE3-NEXT:    movq %rax, %r15
-; SSSE3-NEXT:    movq %rax, %r12
-; SSSE3-NEXT:    movq %rax, %r13
-; SSSE3-NEXT:    movq %rax, %rdx
-; SSSE3-NEXT:    movq %rax, %rsi
-; SSSE3-NEXT:    movq %rax, %rcx
-; SSSE3-NEXT:    movq %rax, %rbp
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    shrq $15, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm0
-; SSSE3-NEXT:    movq %rax, %rbx
-; SSSE3-NEXT:    shlq $49, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm15
-; SSSE3-NEXT:    movq %rax, %r10
-; SSSE3-NEXT:    movsbq %al, %rax
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSSE3-NEXT:    shlq $50, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm8
-; SSSE3-NEXT:    shlq $51, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm3
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm9
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm6
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm10
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm2
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm11
-; SSSE3-NEXT:    shlq $61, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm5
-; SSSE3-NEXT:    shlq $62, %rsi
-; SSSE3-NEXT:    sarq $63, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm12
-; SSSE3-NEXT:    shlq $63, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    shlq $58, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm13
-; SSSE3-NEXT:    shlq $59, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm7
-; SSSE3-NEXT:    shlq $57, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm4
-; SSSE3-NEXT:    shrq $7, %rax
-; SSSE3-NEXT:    movd %eax, %xmm14
-; SSSE3-NEXT:    movswq 2(%rdi), %rsi
-; SSSE3-NEXT:    movq %rsi, %r8
-; SSSE3-NEXT:    movq %rsi, %r9
-; SSSE3-NEXT:    movq %rsi, %r10
-; SSSE3-NEXT:    movq %rsi, %r11
-; SSSE3-NEXT:    movq %rsi, %r14
-; SSSE3-NEXT:    movq %rsi, %r15
-; SSSE3-NEXT:    movq %rsi, %r12
-; SSSE3-NEXT:    movq %rsi, %r13
-; SSSE3-NEXT:    movq %rsi, %rbx
-; SSSE3-NEXT:    movq %rsi, %rax
-; SSSE3-NEXT:    movq %rsi, %rcx
-; SSSE3-NEXT:    movq %rsi, %rdx
-; SSSE3-NEXT:    movq %rsi, %rdi
-; SSSE3-NEXT:    movq %rsi, %rbp
-; SSSE3-NEXT:    shrq $15, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm1
-; SSSE3-NEXT:    movq %rsi, %rbp
-; SSSE3-NEXT:    movsbq %sil, %rsi
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
-; SSSE3-NEXT:    shlq $49, %r8
-; SSSE3-NEXT:    sarq $63, %r8
-; SSSE3-NEXT:    movd %r8d, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
-; SSSE3-NEXT:    shlq $50, %r9
-; SSSE3-NEXT:    sarq $63, %r9
-; SSSE3-NEXT:    movd %r9d, %xmm4
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
-; SSSE3-NEXT:    shlq $51, %r10
-; SSSE3-NEXT:    sarq $63, %r10
-; SSSE3-NEXT:    movd %r10d, %xmm5
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSSE3-NEXT:    shlq $52, %r11
-; SSSE3-NEXT:    sarq $63, %r11
-; SSSE3-NEXT:    movd %r11d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSSE3-NEXT:    shlq $53, %r14
-; SSSE3-NEXT:    sarq $63, %r14
-; SSSE3-NEXT:    movd %r14d, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSSE3-NEXT:    shlq $54, %r15
-; SSSE3-NEXT:    sarq $63, %r15
-; SSSE3-NEXT:    movd %r15d, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSSE3-NEXT:    shlq $55, %r12
-; SSSE3-NEXT:    sarq $63, %r12
-; SSSE3-NEXT:    movd %r12d, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    shlq $60, %r13
-; SSSE3-NEXT:    sarq $63, %r13
-; SSSE3-NEXT:    movd %r13d, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT:    shlq $61, %rbx
-; SSSE3-NEXT:    sarq $63, %rbx
-; SSSE3-NEXT:    movd %ebx, %xmm4
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT:    shlq $62, %rax
-; SSSE3-NEXT:    sarq $63, %rax
-; SSSE3-NEXT:    movd %eax, %xmm6
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
-; SSSE3-NEXT:    shlq $63, %rcx
-; SSSE3-NEXT:    sarq $63, %rcx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    shlq $58, %rdx
-; SSSE3-NEXT:    sarq $63, %rdx
-; SSSE3-NEXT:    movd %edx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSSE3-NEXT:    shlq $59, %rdi
-; SSSE3-NEXT:    sarq $63, %rdi
-; SSSE3-NEXT:    movd %edi, %xmm4
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT:    shlq $57, %rbp
-; SSSE3-NEXT:    sarq $63, %rbp
-; SSSE3-NEXT:    movd %ebp, %xmm2
-; SSSE3-NEXT:    shrq $7, %rsi
-; SSSE3-NEXT:    movd %esi, %xmm5
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %r12
-; SSSE3-NEXT:    popq %r13
-; SSSE3-NEXT:    popq %r14
-; SSSE3-NEXT:    popq %r15
-; SSSE3-NEXT:    popq %rbp
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_32i1_to_32i8:
-; SSE41:       # %bb.0: # %entry
-; SSE41-NEXT:    movswq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm0
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
-; SSE41-NEXT:    movswq 2(%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $62, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shlq $63, %rdx
-; SSE41-NEXT:    sarq $63, %rdx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $61, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $60, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $59, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $58, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $57, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; SSE41-NEXT:    movsbq %al, %rcx
-; SSE41-NEXT:    shrl $7, %ecx
-; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $55, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $54, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $53, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $52, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $51, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $50, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shlq $49, %rcx
-; SSE41-NEXT:    sarq $63, %rcx
-; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; SSE41-NEXT:    shrl $15, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; SSE41-NEXT:    retq
+; SSE-LABEL: load_sext_32i1_to_32i8:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
+; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: load_sext_32i1_to_32i8:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    pushq %r15
-; AVX1-NEXT:    pushq %r14
-; AVX1-NEXT:    pushq %r13
-; AVX1-NEXT:    pushq %r12
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    movslq (%rdi), %rax
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $47, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vmovd %ecx, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    movq %rax, %r11
-; AVX1-NEXT:    movq %rax, %r9
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $46, %rbp
-; AVX1-NEXT:    sarq $63, %rbp
-; AVX1-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $45, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r8
-; AVX1-NEXT:    shlq $44, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    shlq $43, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rcx
-; AVX1-NEXT:    shlq $42, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rdi
-; AVX1-NEXT:    shlq $41, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r13
-; AVX1-NEXT:    shlq $40, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rsi
-; AVX1-NEXT:    shlq $39, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r10
-; AVX1-NEXT:    shlq $38, %r11
-; AVX1-NEXT:    sarq $63, %r11
-; AVX1-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
-; AVX1-NEXT:    movsbq %al, %r11
-; AVX1-NEXT:    shlq $37, %r9
-; AVX1-NEXT:    sarq $63, %r9
-; AVX1-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r9
-; AVX1-NEXT:    shlq $36, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbx
-; AVX1-NEXT:    shlq $35, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r14
-; AVX1-NEXT:    shlq $34, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r15
-; AVX1-NEXT:    shlq $33, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %r12
-; AVX1-NEXT:    shrq $31, %rbp
-; AVX1-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rax, %rbp
-; AVX1-NEXT:    shlq $63, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vmovd %edx, %xmm1
-; AVX1-NEXT:    movq %rax, %rdx
-; AVX1-NEXT:    movswq %ax, %rax
-; AVX1-NEXT:    shlq $62, %r8
-; AVX1-NEXT:    sarq $63, %r8
-; AVX1-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $61, %rcx
-; AVX1-NEXT:    sarq $63, %rcx
-; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $60, %rdi
-; AVX1-NEXT:    sarq $63, %rdi
-; AVX1-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $59, %r13
-; AVX1-NEXT:    sarq $63, %r13
-; AVX1-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $58, %rsi
-; AVX1-NEXT:    sarq $63, %rsi
-; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $57, %r10
-; AVX1-NEXT:    sarq $63, %r10
-; AVX1-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $7, %r11d
-; AVX1-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $55, %r9
-; AVX1-NEXT:    sarq $63, %r9
-; AVX1-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $54, %rbx
-; AVX1-NEXT:    sarq $63, %rbx
-; AVX1-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $53, %r14
-; AVX1-NEXT:    sarq $63, %r14
-; AVX1-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $52, %r15
-; AVX1-NEXT:    sarq $63, %r15
-; AVX1-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $51, %r12
-; AVX1-NEXT:    sarq $63, %r12
-; AVX1-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $50, %rbp
-; AVX1-NEXT:    sarq $63, %rbp
-; AVX1-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
-; AVX1-NEXT:    shlq $49, %rdx
-; AVX1-NEXT:    sarq $63, %rdx
-; AVX1-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX1-NEXT:    shrl $15, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    popq %r12
-; AVX1-NEXT:    popq %r13
-; AVX1-NEXT:    popq %r14
-; AVX1-NEXT:    popq %r15
-; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: load_sext_32i1_to_32i8:
 ; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    pushq %r15
-; AVX2-NEXT:    pushq %r14
-; AVX2-NEXT:    pushq %r13
-; AVX2-NEXT:    pushq %r12
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movslq (%rdi), %rax
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $47, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vmovd %ecx, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    movq %rax, %r11
-; AVX2-NEXT:    movq %rax, %r9
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $46, %rbp
-; AVX2-NEXT:    sarq $63, %rbp
-; AVX2-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $45, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r8
-; AVX2-NEXT:    shlq $44, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    shlq $43, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rcx
-; AVX2-NEXT:    shlq $42, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rdi
-; AVX2-NEXT:    shlq $41, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r13
-; AVX2-NEXT:    shlq $40, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rsi
-; AVX2-NEXT:    shlq $39, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r10
-; AVX2-NEXT:    shlq $38, %r11
-; AVX2-NEXT:    sarq $63, %r11
-; AVX2-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
-; AVX2-NEXT:    movsbq %al, %r11
-; AVX2-NEXT:    shlq $37, %r9
-; AVX2-NEXT:    sarq $63, %r9
-; AVX2-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r9
-; AVX2-NEXT:    shlq $36, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbx
-; AVX2-NEXT:    shlq $35, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r14
-; AVX2-NEXT:    shlq $34, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r15
-; AVX2-NEXT:    shlq $33, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %r12
-; AVX2-NEXT:    shrq $31, %rbp
-; AVX2-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rax, %rbp
-; AVX2-NEXT:    shlq $63, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vmovd %edx, %xmm1
-; AVX2-NEXT:    movq %rax, %rdx
-; AVX2-NEXT:    movswq %ax, %rax
-; AVX2-NEXT:    shlq $62, %r8
-; AVX2-NEXT:    sarq $63, %r8
-; AVX2-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $61, %rcx
-; AVX2-NEXT:    sarq $63, %rcx
-; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $60, %rdi
-; AVX2-NEXT:    sarq $63, %rdi
-; AVX2-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $59, %r13
-; AVX2-NEXT:    sarq $63, %r13
-; AVX2-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $58, %rsi
-; AVX2-NEXT:    sarq $63, %rsi
-; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $57, %r10
-; AVX2-NEXT:    sarq $63, %r10
-; AVX2-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $7, %r11d
-; AVX2-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $55, %r9
-; AVX2-NEXT:    sarq $63, %r9
-; AVX2-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $54, %rbx
-; AVX2-NEXT:    sarq $63, %rbx
-; AVX2-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $53, %r14
-; AVX2-NEXT:    sarq $63, %r14
-; AVX2-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $52, %r15
-; AVX2-NEXT:    sarq $63, %r15
-; AVX2-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $51, %r12
-; AVX2-NEXT:    sarq $63, %r12
-; AVX2-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $50, %rbp
-; AVX2-NEXT:    sarq $63, %rbp
-; AVX2-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
-; AVX2-NEXT:    shlq $49, %rdx
-; AVX2-NEXT:    sarq $63, %rdx
-; AVX2-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX2-NEXT:    shrl $15, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %r12
-; AVX2-NEXT:    popq %r13
-; AVX2-NEXT:    popq %r14
-; AVX2-NEXT:    popq %r15
-; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: load_sext_32i1_to_32i8:
@@ -4798,309 +2597,21 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
-; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
-; X32-SSE2:       # %bb.0: # %entry
-; X32-SSE2-NEXT:    pushl %ebp
-; X32-SSE2-NEXT:    pushl %ebx
-; X32-SSE2-NEXT:    pushl %edi
-; X32-SSE2-NEXT:    pushl %esi
-; X32-SSE2-NEXT:    subl $28, %esp
-; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    movswl (%eax), %edx
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shrl $15, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm0
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shll $17, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm4
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    shll $18, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm1
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    shll $19, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm2
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    shll $20, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm5
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    shll $21, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm6
-; X32-SSE2-NEXT:    movl %edx, %ecx
-; X32-SSE2-NEXT:    shll $22, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %edx, %ebp
-; X32-SSE2-NEXT:    shll $23, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm3
-; X32-SSE2-NEXT:    movl %edx, %esi
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; X32-SSE2-NEXT:    shll $28, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm0
-; X32-SSE2-NEXT:    movl %edx, %edi
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; X32-SSE2-NEXT:    shll $29, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm1
-; X32-SSE2-NEXT:    movl %edx, %ebx
-; X32-SSE2-NEXT:    movsbl %dl, %edx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-SSE2-NEXT:    shll $30, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm2
-; X32-SSE2-NEXT:    shll $31, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm0
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $26, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm7
-; X32-SSE2-NEXT:    shll $27, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm2
-; X32-SSE2-NEXT:    shll $25, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm6
-; X32-SSE2-NEXT:    shrl $7, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm5
-; X32-SSE2-NEXT:    movswl 2(%eax), %eax
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    shrl $15, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm4
-; X32-SSE2-NEXT:    movdqu %xmm4, (%esp) # 16-byte Spill
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X32-SSE2-NEXT:    shll $17, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm4
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $18, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; X32-SSE2-NEXT:    shll $19, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm5
-; X32-SSE2-NEXT:    movl %eax, %ecx
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
-; X32-SSE2-NEXT:    shll $20, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm6
-; X32-SSE2-NEXT:    movl %eax, %esi
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT:    shll $21, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm1
-; X32-SSE2-NEXT:    movl %eax, %edi
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; X32-SSE2-NEXT:    shll $22, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm3
-; X32-SSE2-NEXT:    movl %eax, %ebx
-; X32-SSE2-NEXT:    movdqu (%esp), %xmm2 # 16-byte Reload
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; X32-SSE2-NEXT:    shll $23, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm2
-; X32-SSE2-NEXT:    movl %eax, %edx
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $28, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm7
-; X32-SSE2-NEXT:    movl %eax, %ebp
-; X32-SSE2-NEXT:    movsbl %al, %eax
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; X32-SSE2-NEXT:    shll $29, %ecx
-; X32-SSE2-NEXT:    sarl $31, %ecx
-; X32-SSE2-NEXT:    movd %ecx, %xmm3
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X32-SSE2-NEXT:    shll $30, %esi
-; X32-SSE2-NEXT:    sarl $31, %esi
-; X32-SSE2-NEXT:    movd %esi, %xmm4
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
-; X32-SSE2-NEXT:    shll $31, %edi
-; X32-SSE2-NEXT:    sarl $31, %edi
-; X32-SSE2-NEXT:    movd %edi, %xmm1
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
-; X32-SSE2-NEXT:    shll $26, %ebx
-; X32-SSE2-NEXT:    sarl $31, %ebx
-; X32-SSE2-NEXT:    movd %ebx, %xmm5
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; X32-SSE2-NEXT:    shll $27, %edx
-; X32-SSE2-NEXT:    sarl $31, %edx
-; X32-SSE2-NEXT:    movd %edx, %xmm3
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; X32-SSE2-NEXT:    shll $25, %ebp
-; X32-SSE2-NEXT:    sarl $31, %ebp
-; X32-SSE2-NEXT:    movd %ebp, %xmm4
-; X32-SSE2-NEXT:    shrl $7, %eax
-; X32-SSE2-NEXT:    movd %eax, %xmm5
-; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
-; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X32-SSE2-NEXT:    addl $28, %esp
-; X32-SSE2-NEXT:    popl %esi
-; X32-SSE2-NEXT:    popl %edi
-; X32-SSE2-NEXT:    popl %ebx
-; X32-SSE2-NEXT:    popl %ebp
-; X32-SSE2-NEXT:    retl
-;
-; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
-; X32-SSE41:       # %bb.0: # %entry
-; X32-SSE41-NEXT:    pushl %esi
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    movswl (%eax), %ecx
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $30, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movl %ecx, %esi
-; X32-SSE41-NEXT:    shll $31, %esi
-; X32-SSE41-NEXT:    sarl $31, %esi
-; X32-SSE41-NEXT:    movd %esi, %xmm0
-; X32-SSE41-NEXT:    pinsrb $1, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $29, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $2, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $28, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $3, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $27, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $4, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $26, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $5, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $25, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $6, %edx, %xmm0
-; X32-SSE41-NEXT:    movsbl %cl, %edx
-; X32-SSE41-NEXT:    shrl $7, %edx
-; X32-SSE41-NEXT:    pinsrb $7, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $23, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $8, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $22, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $9, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $21, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $10, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $20, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $11, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $19, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $12, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $18, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $13, %edx, %xmm0
-; X32-SSE41-NEXT:    movl %ecx, %edx
-; X32-SSE41-NEXT:    shll $17, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    pinsrb $14, %edx, %xmm0
-; X32-SSE41-NEXT:    shrl $15, %ecx
-; X32-SSE41-NEXT:    pinsrb $15, %ecx, %xmm0
-; X32-SSE41-NEXT:    movswl 2(%eax), %eax
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $30, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    movl %eax, %edx
-; X32-SSE41-NEXT:    shll $31, %edx
-; X32-SSE41-NEXT:    sarl $31, %edx
-; X32-SSE41-NEXT:    movd %edx, %xmm1
-; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $29, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $28, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $27, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $26, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $25, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
-; X32-SSE41-NEXT:    movsbl %al, %ecx
-; X32-SSE41-NEXT:    shrl $7, %ecx
-; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $23, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $22, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $21, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $20, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $19, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $18, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
-; X32-SSE41-NEXT:    movl %eax, %ecx
-; X32-SSE41-NEXT:    shll $17, %ecx
-; X32-SSE41-NEXT:    sarl $31, %ecx
-; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
-; X32-SSE41-NEXT:    shrl $15, %eax
-; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; X32-SSE41-NEXT:    popl %esi
-; X32-SSE41-NEXT:    retl
+; X32-SSE-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE:       # %bb.0: # %entry
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; X32-SSE-NEXT:    pand %xmm2, %xmm1
+; X32-SSE-NEXT:    pcmpeqb %xmm2, %xmm1
+; X32-SSE-NEXT:    retl
 entry:
  %X = load <32 x i1>, <32 x i1>* %ptr
  %Y = sext <32 x i1> %X to <32 x i8>

From c372f41c18aadbad09e72800304b301d4c475571 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 30 May 2019 10:36:52 +0000
Subject: [PATCH 0604/1176] [llvm-readobj/llvm-readelf] - Implement GNU style
 dumper of the SHT_GNU_verdef section.

It was not implemented yet, we had only LLVM style dumper implemented.
Section description is here: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html

Differential revision: https://reviews.llvm.org/D62520

llvm-svn: 362082
---
 .../tools/llvm-readobj/elf-versioninfo.test   | 12 +++-
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 57 ++++++++++++++-----
 2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
index cca5bb1b40989..ba48e91b21c9b 100644
--- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test
+++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test
@@ -30,7 +30,7 @@ Sections:
         Flags:      0
         VersionNdx: 2
         Hash:       175630257
-        Names:           
+        Names:
           - VERSION1
       - Version:    1
         Flags:      0
@@ -64,7 +64,7 @@ Sections:
             Hash:  1937
             Flags: 0
             Other: 6
-DynamicSymbols:  
+DynamicSymbols:
   - Name:            sym1
     Binding:         STB_GLOBAL
   - Name:            sym2
@@ -174,7 +174,13 @@ DynamicSymbols:
 # GNU-NEXT:   000:   0 (*local*) 2 (VERSION1) 3 (VERSION2) 4 (v1)
 # GNU-NEXT:   004:   5 (v2)      6 (v3)
 # GNU-EMPTY:
-# GNU-NEXT: Dumper for .gnu.version_d is not implemented
+# GNU-NEXT: Version definition section '.gnu.version_d' contains 3 entries:
+# GNU-NEXT:  Addr: 0000000000000000  Offset: 0x00028c  Link: 8 (.dynstr)
+# GNU-NEXT:   0x0000: Rev: 1  Flags: none  Index: 2  Cnt: 1  Name: VERSION1
+# GNU-NEXT:   0x001c: Rev: 1  Flags: none  Index: 3  Cnt: 2  Name: VERSION2
+# GNU-NEXT:   0x0038: Parent 1: VERSION1
+# GNU-NEXT:   0x001c: Rev: 1  Flags: none  Index: 3  Cnt: 2  Name: VERSION2
+# GNU-NEXT:   0x0038: Parent 1: VERSION1
 # GNU-EMPTY:
 # GNU-NEXT: Version needs section '.gnu.version_r' contains 2 entries:
 # GNU-NEXT:  Addr: 0000000000000000  Offset: 0x0002cc  Link: 8 (.dynstr)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index fdef4b79ebf36..1aea4935da6ff 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3491,18 +3491,7 @@ void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
   OS << '\n';
 }
 
-template <class ELFT>
-void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
-                                                   const Elf_Shdr *Sec) {
-  if (!Sec)
-    return;
-
-  StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
-  OS << "Dumper for " << SecName << " is not implemented\n";
-  OS << '\n';
-}
-
-static std::string verNeedFlagToString(unsigned Flags) {
+static std::string versionFlagToString(unsigned Flags) {
   if (Flags == 0)
     return "none";
 
@@ -3523,6 +3512,48 @@ static std::string verNeedFlagToString(unsigned Flags) {
   return Ret;
 }
 
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+                                                   const Elf_Shdr *Sec) {
+  if (!Sec)
+    return;
+
+  unsigned VerDefsNum = Sec->sh_info;
+  printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec);
+
+  const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+  StringRef StringTable(
+      reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+      StrTabSec->sh_size);
+
+  const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data();
+  const uint8_t *Begin = VerdefBuf;
+
+  while (VerDefsNum--) {
+    const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+    OS << format("  0x%04x: Rev: %u  Flags: %s  Index: %u  Cnt: %u",
+                 VerdefBuf - Begin, (unsigned)Verdef->vd_version,
+                 versionFlagToString(Verdef->vd_flags).c_str(),
+                 (unsigned)Verdef->vd_ndx, (unsigned)Verdef->vd_cnt);
+
+    const uint8_t *VerdauxBuf = VerdefBuf + Verdef->vd_aux;
+    const Elf_Verdaux *Verdaux =
+        reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+    OS << format("  Name: %s\n",
+                 StringTable.drop_front(Verdaux->vda_name).data());
+
+    for (unsigned I = 1; I < Verdef->vd_cnt; ++I) {
+      VerdauxBuf += Verdaux->vda_next;
+      Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+      OS << format("  0x%04x: Parent %u: %s\n", VerdauxBuf - Begin, I,
+                   StringTable.drop_front(Verdaux->vda_name).data());
+    }
+
+    VerdefBuf += Verdef->vd_next;
+  }
+  OS << '\n';
+}
+
 template <class ELFT>
 void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
                                                    const Elf_Shdr *Sec) {
@@ -3558,7 +3589,7 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
       OS << format("  0x%04x:   Name: %s  Flags: %s  Version: %u\n",
                    reinterpret_cast<const uint8_t *>(Vernaux) - SecData.begin(),
                    StringTable.drop_front(Vernaux->vna_name).data(),
-                   verNeedFlagToString(Vernaux->vna_flags).c_str(),
+                   versionFlagToString(Vernaux->vna_flags).c_str(),
                    (unsigned)Vernaux->vna_other);
       VernauxBuf += Vernaux->vna_next;
     }

From 9e7be9b7459b1030dbbddf24f80c3bebc316ad9b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 10:41:04 +0000
Subject: [PATCH 0605/1176] [CostModel][X86] Add bool vector and/or/xor cost
 tests

llvm-svn: 362083
---
 llvm/test/Analysis/CostModel/X86/arith.ll | 192 ++++++++++++++++++++++
 1 file changed, 192 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/X86/arith.ll b/llvm/test/Analysis/CostModel/X86/arith.ll
index b998732acbcb1..f52cbf88b77bc 100644
--- a/llvm/test/Analysis/CostModel/X86/arith.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith.ll
@@ -422,6 +422,13 @@ define i32 @or(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'or'
@@ -441,6 +448,13 @@ define i32 @or(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'or'
@@ -460,6 +474,13 @@ define i32 @or(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'or'
@@ -479,6 +500,13 @@ define i32 @or(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'or'
@@ -498,6 +526,13 @@ define i32 @or(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'or'
@@ -517,6 +552,13 @@ define i32 @or(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'or'
@@ -536,6 +578,13 @@ define i32 @or(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = or <64 x i8> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = or <32 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'or'
@@ -555,6 +604,13 @@ define i32 @or(i32 %arg) {
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = or <16 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = or <32 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = or <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = or i1 undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = or <2 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = or <4 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = or <8 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = or <16 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = or <32 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = or <64 x i1> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = or i64 undef, undef
@@ -577,6 +633,14 @@ define i32 @or(i32 %arg) {
   %V32I8 = or <32 x i8> undef, undef
   %V64I8 = or <64 x i8> undef, undef
 
+  %I1 = or i1 undef, undef
+  %V2I1 = or <2 x i1> undef, undef
+  %V4I1 = or <4 x i1> undef, undef
+  %V8I1 = or <8 x i1> undef, undef
+  %V16I1 = or <16 x i1> undef, undef
+  %V32I1 = or <32 x i1> undef, undef
+  %V64I1 = or <64 x i1> undef, undef
+
   ret i32 undef
 }
 
@@ -598,6 +662,13 @@ define i32 @xor(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'xor'
@@ -617,6 +688,13 @@ define i32 @xor(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'xor'
@@ -636,6 +714,13 @@ define i32 @xor(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'xor'
@@ -655,6 +740,13 @@ define i32 @xor(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'xor'
@@ -674,6 +766,13 @@ define i32 @xor(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'xor'
@@ -693,6 +792,13 @@ define i32 @xor(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'xor'
@@ -712,6 +818,13 @@ define i32 @xor(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'xor'
@@ -731,6 +844,13 @@ define i32 @xor(i32 %arg) {
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = xor <16 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = xor <32 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = xor <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = xor i1 undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = xor <2 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = xor <4 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = xor <8 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = xor <16 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = xor <32 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = xor <64 x i1> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = xor i64 undef, undef
@@ -753,6 +873,14 @@ define i32 @xor(i32 %arg) {
   %V32I8 = xor <32 x i8> undef, undef
   %V64I8 = xor <64 x i8> undef, undef
 
+  %I1 = xor i1 undef, undef
+  %V2I1 = xor <2 x i1> undef, undef
+  %V4I1 = xor <4 x i1> undef, undef
+  %V8I1 = xor <8 x i1> undef, undef
+  %V16I1 = xor <16 x i1> undef, undef
+  %V32I1 = xor <32 x i1> undef, undef
+  %V64I1 = xor <64 x i1> undef, undef
+
   ret i32 undef
 }
 
@@ -774,6 +902,13 @@ define i32 @and(i32 %arg) {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'and'
@@ -793,6 +928,13 @@ define i32 @and(i32 %arg) {
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'and'
@@ -812,6 +954,13 @@ define i32 @and(i32 %arg) {
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'and'
@@ -831,6 +980,13 @@ define i32 @and(i32 %arg) {
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512DQ-LABEL: 'and'
@@ -850,6 +1006,13 @@ define i32 @and(i32 %arg) {
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'and'
@@ -869,6 +1032,13 @@ define i32 @and(i32 %arg) {
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'and'
@@ -888,6 +1058,13 @@ define i32 @and(i32 %arg) {
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = and <64 x i8> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I1 = and <32 x i1> undef, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; BTVER2-LABEL: 'and'
@@ -907,6 +1084,13 @@ define i32 @and(i32 %arg) {
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = and <16 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = and <32 x i8> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = and <64 x i8> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I1 = and i1 undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I1 = and <2 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I1 = and <4 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I1 = and <8 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I1 = and <16 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I1 = and <32 x i1> undef, undef
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I1 = and <64 x i1> undef, undef
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = and i64 undef, undef
@@ -929,6 +1113,14 @@ define i32 @and(i32 %arg) {
   %V32I8 = and <32 x i8> undef, undef
   %V64I8 = and <64 x i8> undef, undef
 
+  %I1 = and i1 undef, undef
+  %V2I1 = and <2 x i1> undef, undef
+  %V4I1 = and <4 x i1> undef, undef
+  %V8I1 = and <8 x i1> undef, undef
+  %V16I1 = and <16 x i1> undef, undef
+  %V32I1 = and <32 x i1> undef, undef
+  %V64I1 = and <64 x i1> undef, undef
+
   ret i32 undef
 }
 

From 28e1ff2c3fbc98cc35d34c0ad6678628795c142e Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 30 May 2019 10:42:47 +0000
Subject: [PATCH 0606/1176] [llvm-readobj] - An attemp to fix BB.

BB failed:
http://lab.llvm.org:8011/builders/clang-armv7-linux-build-cache/builds/15062/steps/build%20stage%201/logs/stdio

Error was:
/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/tools/llvm-readobj/ELFDumper.cpp:3540:7:
error: non-constant-expression cannot be narrowed from type 'llvm::support::detail::packed_endian_specific_integral<unsigned long long,
llvm::support::endianness::little, 1>::value_type' (aka 'unsigned long long') to 'size_t' (aka 'unsigned int') in initializer list [-Wc++11-narrowing]
      StrTabSec->sh_size};

llvm-svn: 362084
---
 llvm/tools/llvm-readobj/ELFDumper.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 1aea4935da6ff..1012cf1085ff1 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3524,7 +3524,7 @@ void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
   const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
   StringRef StringTable(
       reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
-      StrTabSec->sh_size);
+      (size_t)StrTabSec->sh_size);
 
   const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data();
   const uint8_t *Begin = VerdefBuf;
@@ -3568,7 +3568,7 @@ void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
   const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
   StringRef StringTable = {
       reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
-      StrTabSec->sh_size};
+      (size_t)StrTabSec->sh_size};
 
   const uint8_t *VerneedBuf = SecData.data();
   for (unsigned I = 0; I < VerneedNum; ++I) {

From 9d1f8a9dd2af0bf400e2b757fa6254aa64ee0ee4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 10:44:36 +0000
Subject: [PATCH 0607/1176] Fix Wdocumentation warning. NFCI.

llvm-svn: 362085
---
 clang/include/clang/StaticAnalyzer/Core/CheckerManager.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 532b908df32dd..98c69039efd1f 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -410,7 +410,6 @@ class CheckerManager {
   /// \param Out   The output stream
   /// \param State The state being printed
   /// \param NL    The preferred representation of a newline.
-  /// \param Sep   The preferred separator between different messages.
   /// \param Space The preferred space between the left side and the message.
   /// \param IsDot Whether the message will be printed in 'dot' format.
   void runCheckersForPrintStateJson(raw_ostream &Out, ProgramStateRef State,

From c767011329135194ce5f04d2db5e98366ec14586 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 30 May 2019 11:24:16 +0000
Subject: [PATCH 0608/1176] DWARFASTParserClang: Delete dead code

This removes places where DW_AT_decl_file/line/column was being parsed,
but not used.

llvm-svn: 362086
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 33 -------------------
 1 file changed, 33 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index decabb435dc7b..ceddac4dbf287 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2481,7 +2481,6 @@ bool DWARFASTParserClang::ParseChildMembers(
       DWARFAttributes attributes;
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
-        Declaration decl;
         const char *name = nullptr;
         const char *prop_name = nullptr;
         const char *prop_getter_name = nullptr;
@@ -2505,16 +2504,6 @@ bool DWARFASTParserClang::ParseChildMembers(
           DWARFFormValue form_value;
           if (attributes.ExtractFormValueAtIndex(i, form_value)) {
             switch (attr) {
-            case DW_AT_decl_file:
-              decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                  form_value.Unsigned()));
-              break;
-            case DW_AT_decl_line:
-              decl.SetLine(form_value.Unsigned());
-              break;
-            case DW_AT_decl_column:
-              decl.SetColumn(form_value.Unsigned());
-              break;
             case DW_AT_name:
               name = form_value.AsCString();
               break;
@@ -2960,7 +2949,6 @@ bool DWARFASTParserClang::ParseChildMembers(
       DWARFAttributes attributes;
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
-        Declaration decl;
         DWARFFormValue encoding_form;
         AccessType accessibility = default_accessibility;
         bool is_virtual = false;
@@ -2972,16 +2960,6 @@ bool DWARFASTParserClang::ParseChildMembers(
           DWARFFormValue form_value;
           if (attributes.ExtractFormValueAtIndex(i, form_value)) {
             switch (attr) {
-            case DW_AT_decl_file:
-              decl.SetFile(sc.comp_unit->GetSupportFiles().GetFileSpecAtIndex(
-                  form_value.Unsigned()));
-              break;
-            case DW_AT_decl_line:
-              decl.SetLine(form_value.Unsigned());
-              break;
-            case DW_AT_decl_column:
-              decl.SetColumn(form_value.Unsigned());
-              break;
             case DW_AT_type:
               encoding_form = form_value;
               break;
@@ -3105,7 +3083,6 @@ size_t DWARFASTParserClang::ParseChildParameters(
       const size_t num_attributes = die.GetAttributes(attributes);
       if (num_attributes > 0) {
         const char *name = nullptr;
-        Declaration decl;
         DWARFFormValue param_type_die_form;
         bool is_artificial = false;
         // one of None, Auto, Register, Extern, Static, PrivateExtern
@@ -3117,16 +3094,6 @@ size_t DWARFASTParserClang::ParseChildParameters(
           DWARFFormValue form_value;
           if (attributes.ExtractFormValueAtIndex(i, form_value)) {
             switch (attr) {
-            case DW_AT_decl_file:
-              decl.SetFile(comp_unit.GetSupportFiles().GetFileSpecAtIndex(
-                  form_value.Unsigned()));
-              break;
-            case DW_AT_decl_line:
-              decl.SetLine(form_value.Unsigned());
-              break;
-            case DW_AT_decl_column:
-              decl.SetColumn(form_value.Unsigned());
-              break;
             case DW_AT_name:
               name = form_value.AsCString();
               break;

From ce127bb60ea1d570e50a6730b468ba537fd08751 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 30 May 2019 12:35:19 +0000
Subject: [PATCH 0609/1176] [OpenCL] Support logical vector operators in C++
 mode

Support logical operators on vectors in C++ for OpenCL mode, to
preserve backwards compatibility with OpenCL C.

Differential Revision: https://reviews.llvm.org/D62588

llvm-svn: 362087
---
 clang/lib/Sema/SemaExpr.cpp             | 5 +++--
 clang/test/CodeGenOpenCL/logical-ops.cl | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8eccb4b0c5d30..762ab673fa87c 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -10902,7 +10902,7 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
   if (vType.isNull())
     return InvalidOperands(Loc, LHS, RHS);
   if (getLangOpts().OpenCL && getLangOpts().OpenCLVersion < 120 &&
-      vType->hasFloatingRepresentation())
+      !getLangOpts().OpenCLCPlusPlus && vType->hasFloatingRepresentation())
     return InvalidOperands(Loc, LHS, RHS);
   // FIXME: The check for C++ here is for GCC compatibility. GCC rejects the
   //        usage of the logical operators && and || with vectors in C. This
@@ -13165,7 +13165,8 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
       }
     } else if (resultType->isExtVectorType()) {
       if (Context.getLangOpts().OpenCL &&
-          Context.getLangOpts().OpenCLVersion < 120) {
+          Context.getLangOpts().OpenCLVersion < 120 &&
+          !Context.getLangOpts().OpenCLCPlusPlus) {
         // OpenCL v1.1 6.3.h: The logical operator not (!) does not
         // operate on vector float types.
         QualType T = resultType->getAs<ExtVectorType>()->getElementType();
diff --git a/clang/test/CodeGenOpenCL/logical-ops.cl b/clang/test/CodeGenOpenCL/logical-ops.cl
index ac1c1b5454b77..77334d3b37b72 100644
--- a/clang/test/CodeGenOpenCL/logical-ops.cl
+++ b/clang/test/CodeGenOpenCL/logical-ops.cl
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=c++ -O1 -triple x86_64-unknown-linux-gnu | FileCheck %s
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 

From 7eb95d672dbf302a422ae4dbb24dd7cc583b65df Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 12:37:05 +0000
Subject: [PATCH 0610/1176] [ARM] Introduce separate features for FP registers

The MVE extension in Arm v8.1-M permits the use of some move, load and
store isntructions which access the FP registers, even if there's no
actual FP support in the processor (in particular, if you have the
integer-only version of MVE).

Therefore, we need separate subtarget features to condition those
instructions on, which are implied by both FP and MVE but are not part
of either.

Patch mostly by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60694

llvm-svn: 362088
---
 llvm/lib/Target/ARM/ARM.td             |  25 ++++-
 llvm/lib/Target/ARM/ARMInstrNEON.td    |   2 +-
 llvm/lib/Target/ARM/ARMInstrVFP.td     |  45 ++++++---
 llvm/lib/Target/ARM/ARMPredicates.td   |   9 ++
 llvm/lib/Target/ARM/ARMSubtarget.h     |   6 ++
 llvm/test/MC/ARM/fullfp16-neg.s        |  20 ++--
 llvm/test/MC/ARM/mve-fp-registers.s    | 133 +++++++++++++++++++++++++
 llvm/test/MC/ARM/single-precision-fp.s |   4 +-
 llvm/test/MC/ARM/vmrs_vmsr.s           |  12 +--
 9 files changed, 220 insertions(+), 36 deletions(-)
 create mode 100644 llvm/test/MC/ARM/mve-fp-registers.s

diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 9af350c055117..62cd79c9347a3 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -32,9 +32,26 @@ def ModeSoftFloat         : SubtargetFeature<"soft-float","UseSoftFloat",
 //
 
 // Floating Point, HW Division and Neon Support
+
+// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only
+// version).
+def FeatureFPRegs         : SubtargetFeature<"fpregs", "HasFPRegs", "true",
+                                             "Enable FP registers">;
+
+// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16
+// extension) and MVE (even in the integer-only version).
+def FeatureFPRegs16       : SubtargetFeature<"fpregs16", "HasFPRegs16", "true",
+                                             "Enable 16-bit FP registers",
+                                             [FeatureFPRegs]>;
+
+def FeatureFPRegs64       : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
+                                             "Enable 64-bit FP registers",
+                                             [FeatureFPRegs]>;
+
 def FeatureFP64           : SubtargetFeature<"fp64", "HasFP64", "true",
                                              "Floating point unit supports "
-                                             "double precision">;
+                                             "double precision",
+                                             [FeatureFPRegs64]>;
 
 def FeatureD32            : SubtargetFeature<"d32", "HasD32", "true",
                                              "Extend FP to 32 double registers">;
@@ -63,7 +80,9 @@ multiclass VFPver<string name, string query, string description,
         !cast<SubtargetFeature>(NAME # "_SP")]>;
 }
 
-defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions">;
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
+                         [], [FeatureFPRegs]>;
+
 defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
                          [FeatureVFP2]>;
 
@@ -84,7 +103,7 @@ defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
 def FeatureFullFP16       : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
                                              "Enable full half-precision "
                                              "floating point",
-                                             [FeatureFPARMv8_D16_SP]>;
+                                             [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
 
 def FeatureFP16FML        : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
                                              "Enable full half-precision "
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0c4e765d8ad2c..3aa44313a2783 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -6179,7 +6179,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
                           IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]>,
-                Requires<[HasVFP2, HasFastVGETLNi32]> {
+                Requires<[HasFPRegs, HasFastVGETLNi32]> {
   let Inst{21} = lane{0};
 }
 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index d1b32f531c05f..88405a83b4e28 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -141,11 +141,13 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
+            Requires<[HasFPRegs]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
-                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -155,17 +157,19 @@ let isUnpredicable = 1 in
 def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
                  [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
+            Requires<[HasFPRegs]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
-                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
+            Requires<[HasFPRegs]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -175,7 +179,7 @@ let isUnpredicable = 1 in
 def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
                  [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
-            Requires<[HasFullFP16]>;
+            Requires<[HasFPRegs16]>;
 
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
@@ -183,6 +187,7 @@ def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
 
 multiclass vfp_ldst_mult<string asm, bit L_bit,
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  let Predicates = [HasFPRegs] in {
   // Double Precision
   def DIA :
     AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -250,6 +255,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     // VFP pipelines.
     let D = VFPNeonDomain;
   }
+  }
 }
 
 let hasSideEffects = 0 in {
@@ -318,6 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
 // However, there is no UAL syntax for them, so we keep them around for
 // (dis)assembly only.
 multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+  let Predicates = [HasFPRegs] in {
   // Unknown precision
   def XIA :
     AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -340,6 +347,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> {
     let Inst{21}    = 1;            // Writeback
     let Inst{20}    = L_bit;
   }
+  }
 }
 
 defm FLDM : vfp_ldstx_mult<"fldm", 1>;
@@ -1031,11 +1039,13 @@ let hasSideEffects = 0 in {
 let isMoveReg = 1 in {
 def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs DPR:$Dd), (ins DPR:$Dm),
-                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+             Requires<[HasFPRegs64]>;
 
 def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
                   (outs SPR:$Sd), (ins SPR:$Sm),
-                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+             Requires<[HasFPRegs]>;
 } // isMoveReg
 
 let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
@@ -1060,6 +1070,7 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
                       (outs GPR:$Rt), (ins SPR:$Sn),
                       IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
                       [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+             Requires<[HasFPRegs]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1083,7 +1094,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
                       (outs SPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
                       [(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
-             Requires<[HasVFP2, UseVMOVSR]>,
+             Requires<[HasFPRegs, UseVMOVSR]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -1109,6 +1120,7 @@ def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
                         (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
                         IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
                  [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1137,6 +1149,7 @@ def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
                       (outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
                  IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[HasFPRegs]>,
                Sched<[WriteFPMOV]> {
   bits<5> src1;
   bits<4> Rt;
@@ -1164,6 +1177,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
                       (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
                       IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
                       [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Dm;
@@ -1208,6 +1222,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
                 IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[HasFPRegs]>,
               Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> dst1;
@@ -1234,7 +1249,7 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
                       (outs GPR:$Rt), (ins HPR:$Sn),
                       IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
                       [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<4> Rt;
@@ -1256,7 +1271,7 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
                       (outs HPR:$Sn), (ins GPR:$Rt),
                       IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
                       [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
-             Requires<[HasFullFP16]>,
+             Requires<[HasFPRegs16]>,
              Sched<[WriteFPMOV]> {
   // Instruction operands.
   bits<5> Sn;
@@ -2286,13 +2301,14 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+    Rt = 0b1111 /* apsr_nzcv */ in
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
                         "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  // Application level FPSCR -> GPR
- let hasSideEffects = 1, Uses = [FPSCR] in
+ let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
  def VMRS :  MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
                         "vmrs", "\t$Rt, fpscr",
                         [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
@@ -2341,6 +2357,7 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
  let Defs = [FPSCR] in {
+   let Predicates = [HasFPRegs] in
    // Application level GPR -> FPSCR
    def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
                        "vmsr", "\tfpscr, $src",
@@ -2474,7 +2491,7 @@ def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
 def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 
 
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
                     (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
 def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index 37c3098796128..ab4ed3936aba3 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -26,6 +26,15 @@ def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
 def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
                                  AssemblerPredicate<"HasV8MMainlineOps",
                                                     "armv8m.main">;
+def HasFPRegs        : Predicate<"Subtarget->hasFPRegs()">,
+                                 AssemblerPredicate<"FeatureFPRegs",
+                                                    "fp registers">;
+def HasFPRegs16      : Predicate<"Subtarget->hasFPRegs16()">,
+                                 AssemblerPredicate<"FeatureFPRegs16",
+                                                    "16-bit fp registers">;
+def HasFPRegs64      : Predicate<"Subtarget->hasFPRegs64()">,
+                                 AssemblerPredicate<"FeatureFPRegs64",
+                                                    "64-bit fp registers">;
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index abedc6f6d81d3..03bea3572ce28 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -165,6 +165,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool HasVFPv4 = false;
   bool HasFPARMv8 = false;
   bool HasNEON = false;
+  bool HasFPRegs = false;
+  bool HasFPRegs16 = false;
+  bool HasFPRegs64 = false;
 
   /// Versions of the VFP flags restricted to single precision, or to
   /// 16 d-registers, or both.
@@ -566,6 +569,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+  bool hasFPRegs() const { return HasFPRegs; }
+  bool hasFPRegs16() const { return HasFPRegs16; }
+  bool hasFPRegs64() const { return HasFPRegs64; }
 
   /// @{
   /// These functions are obsolete, please consider adding subtarget features
diff --git a/llvm/test/MC/ARM/fullfp16-neg.s b/llvm/test/MC/ARM/fullfp16-neg.s
index e7fba2abaae58..7069cbc61768f 100644
--- a/llvm/test/MC/ARM/fullfp16-neg.s
+++ b/llvm/test/MC/ARM/fullfp16-neg.s
@@ -165,25 +165,25 @@
   vldr.16 s2, [pc, #510]
   vldr.16 s3, [pc, #-510]
   vldr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
 
   vstr.16 s1, [pc, #6]
   vstr.16 s2, [pc, #510]
   vstr.16 s3, [pc, #-510]
   vstr.16 s4, [r4, #-18]
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
 
   vmov.f16 s0, #1.0
 @ CHECK: instruction requires: full half-float
 
   vmov.f16 s1, r2
   vmov.f16 r3, s4
-@ CHECK: instruction requires: full half-float
-@ CHECK: instruction requires: full half-float
+@ CHECK: instruction requires: 16-bit fp registers
+@ CHECK: instruction requires: 16-bit fp registers
diff --git a/llvm/test/MC/ARM/mve-fp-registers.s b/llvm/test/MC/ARM/mve-fp-registers.s
new file mode 100644
index 0000000000000..aff7649a8d843
--- /dev/null
+++ b/llvm/test/MC/ARM/mve-fp-registers.s
@@ -0,0 +1,133 @@
+// Some simple operations on S, D and Q registers (loads, stores and moves) are
+// also avaliable in MVE, even in the integer-only version. Some of these
+// instructions (operating on D or Q registers, or FP16 values) are only
+// available for certain targets.
+
+// Note that it's not always obvious which instructions are available, for
+// example several instructions operating on D registers are available for
+// single-precision only FPUs.
+
+// All of these instructions are rejected if no VFP or MVE features are
+// present.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding 2>%t < %s
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP32 --check-prefix=NOFP64
+
+// VFP and NEON implementations by default have FP32 and FP64, but not FP16.
+// The VFPv3 FP16 extension just added conversion instructions, which we don't
+// care about here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp2 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+neon 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+// The v8.2A FP16 extension added loads, stores and moves for FP16.
+// RUN: llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+fp-armv8,+fullfp16 < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP16 --check-prefix=FP32 --check-prefix=FP64
+
+// M-profile FPUs (e.g. Cortex-M4/M7/M33) do not have FP16 instructions, and
+// the FP64 instructions are optional. They are also limited to 16 D registers,
+// but we don't test that here.
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4d16sp 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32
+// RUN: FileCheck %s < %t --check-prefix=NOFP16 --check-prefix=NOFP64
+// RUN: not llvm-mc -triple=thumbv8.1m.main -show-encoding -mattr=+vfp4,-d32 2>%t < %s | \
+// RUN:     FileCheck %s --check-prefix=CHECK --check-prefix=FP32 --check-prefix=FP64
+// RUN: FileCheck %s < %t --check-prefix=NOFP16
+
+vldmia  r0, {d0}
+# FP32: vldmia  r0, {d0}               @ encoding: [0x90,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {d0}
+# FP32: vstmia  r0, {d0}                @ encoding: [0x80,0xec,0x02,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldmia  r0, {s0}
+# FP32: vldmia  r0, {s0}                @ encoding: [0x90,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstmia  r0, {s0}
+# FP32: vstmia  r0, {s0}                @ encoding: [0x80,0xec,0x01,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fldmdbx r0!, {d0}
+# FP32: fldmdbx r0!, {d0}               @ encoding: [0x30,0xed,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+fstmiax r0, {d0}
+# FP32: fstmiax r0, {d0}                @ encoding: [0x80,0xec,0x03,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr.16 s0, [r0]
+# FP16: vldr.16 s0, [r0]                @ encoding: [0x90,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vldr s0, [r0]
+# FP32: vldr    s0, [r0]                @ encoding: [0x90,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vldr d0, [r0]
+# FP32: vldr    d0, [r0]                @ encoding: [0x90,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr.16 s0, [r0]
+# FP16: vstr.16 s0, [r0]                @ encoding: [0x80,0xed,0x00,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vstr s0, [r0]
+# FP32: vstr    s0, [r0]                @ encoding: [0x80,0xed,0x00,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vstr d0, [r0]
+# FP32: vstr    d0, [r0]                @ encoding: [0x80,0xed,0x00,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f16 r0, s0
+# FP16: vmov.f16        r0, s0          @ encoding: [0x10,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov.f16 s0, r0
+# FP16: vmov.f16        s0, r0          @ encoding: [0x00,0xee,0x10,0x09]
+# NOFP16: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 16-bit fp registers
+
+vmov s0, r0
+# FP32: vmov    s0, r0                  @ encoding: [0x00,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, s0
+# FP32: vmov    r0, s0                  @ encoding: [0x10,0xee,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, d0
+# FP32: vmov    r0, r1, d0              @ encoding: [0x51,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov d0, r0, r1
+# FP32: vmov    d0, r0, r1              @ encoding: [0x41,0xec,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov r0, r1, s0, s1
+# FP32: vmov    r0, r1, s0, s1          @ encoding: [0x51,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov s0, s1, r0, r1
+# FP32: vmov    s0, s1, r0, r1          @ encoding: [0x41,0xec,0x10,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f32 s0, s1
+# FP32: vmov.f32        s0, s1          @ encoding: [0xb0,0xee,0x60,0x0a]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
+
+vmov.f64 d0, d1
+# FP64: vmov.f64        d0, d1          @ encoding: [0xb0,0xee,0x41,0x0b]
+# NOFP64: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: 64-bit fp registers
+
+vmov.32 r0, d1[0]
+# FP32: vmov.32 r0, d1[0]               @ encoding: [0x11,0xee,0x10,0x0b]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: error: instruction requires: fp registers
+
+vmrs apsr_nzcv, fpscr
+# FP32: vmrs    APSR_nzcv, fpscr        @ encoding: [0xf1,0xee,0x10,0xfa]
+# NOFP32: :[[@LINE-2]]:{{[0-9]+}}: {{note|error}}: instruction requires: fp registers
diff --git a/llvm/test/MC/ARM/single-precision-fp.s b/llvm/test/MC/ARM/single-precision-fp.s
index 9de4b10183737..1b541f8bb074e 100644
--- a/llvm/test/MC/ARM/single-precision-fp.s
+++ b/llvm/test/MC/ARM/single-precision-fp.s
@@ -1,4 +1,4 @@
-@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-neon 2> %t > %t2
+@ RUN: not llvm-mc < %s -triple thumbv8-unknown-unknown -show-encoding -mattr=-fp64,-fpregs64,-neon 2> %t > %t2
 @ RUN:     FileCheck %s < %t --check-prefix=CHECK-ERRORS
 @ RUN:     FileCheck %s < %t2
 
@@ -72,7 +72,7 @@
         @ FIXME: overlapping aliases and a probable TableGen indeterminacy mean
         @ that the actual reason can vary by platform.
         vmov.f64 d11, d10
-@ CHECK-ERRORS: instruction requires: NEON
+@ CHECK-ERRORS: instruction requires: 64-bit fp registers
 @ CHECK-ERRORS-NEXT: vmov.f64 d11, d10
 
         vcvt.f64.s32 d9, s8
diff --git a/llvm/test/MC/ARM/vmrs_vmsr.s b/llvm/test/MC/ARM/vmrs_vmsr.s
index edca917c72a32..9193ae1b12ed2 100644
--- a/llvm/test/MC/ARM/vmrs_vmsr.s
+++ b/llvm/test/MC/ARM/vmrs_vmsr.s
@@ -103,10 +103,10 @@
 // ERROR-V8M: invalid operand for instruction
 // ERROR-V8M: invalid operand for instruction
 
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
@@ -172,9 +172,9 @@
 // ERROR-V8M: operand must be a register in range [r0, r14]
 
 // ERROR-NOVFP: invalid instruction
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: instruction requires: VFP2
 // ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
-// ERROR-NOVFP: instruction requires: VFP2
+// ERROR-NOVFP: instruction requires: fp registers
 // ERROR-NOVFP: invalid instruction
 // ERROR-NOVFP: invalid instruction

From 31e6d8feea1ac18d568cbd5299c54bdae9da5f44 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Thu, 30 May 2019 12:39:05 +0000
Subject: [PATCH 0611/1176] [llvm-readobj] - Rewrite reloc-types.test to use
 YAML. NFCI.

This change rewrites and splits reloc-types.test
to use yaml2obj instead of precompiled binaries.
That allowed to remove 7 precompiled objects from the inputs.

I took the existent objects, used obj2yaml on them, simplified the result and
used yaml2obj in the test case with the result.

Notes:
* I converted, but did not remove relocs.obj.elf-i386, relocs.obj.elf-x86_64 or relocs.obj.elf-mips objects
because found they are used in other tests.
* I was unable to convert relocs.obj.elf-ppc64, because obj2yaml hangs on this file for me.
* I was unable to convert relocs.obj.macho-arm, relocs.obj.macho-i386 and relocs.obj.macho-x86_64
because the output produced by obj2yaml does not seem to be correct.
* Because of the above I did not remove the script for creating all
of those objects: test\tools\llvm-readobj\Inputs\relocs.py

Differential revision: https://reviews.llvm.org/D62594

llvm-svn: 362089
---
 .../llvm-readobj/Inputs/relocs.obj.coff-i386  | Bin 305 -> 0 bytes
 .../Inputs/relocs.obj.coff-x86_64             | Bin 424 -> 0 bytes
 .../Inputs/relocs.obj.elf-aarch64             | Bin 4232 -> 0 bytes
 .../Inputs/relocs.obj.elf-aarch64-ilp32       |   0
 .../llvm-readobj/Inputs/relocs.obj.elf-arm    | Bin 2112 -> 0 bytes
 .../llvm-readobj/Inputs/relocs.obj.elf-lanai  | Bin 429 -> 0 bytes
 .../Inputs/relocs.obj.elf-mips64el            | Bin 2216 -> 0 bytes
 .../llvm-readobj/reloc-types-coff-i386.test   |  63 ++
 .../llvm-readobj/reloc-types-coff-x64.test    |  81 ++
 .../llvm-readobj/reloc-types-elf-aarch64.test | 393 ++++++++++
 .../llvm-readobj/reloc-types-elf-arm.test     | 420 +++++++++++
 .../llvm-readobj/reloc-types-elf-i386.test    | 148 ++++
 .../llvm-readobj/reloc-types-elf-lanai.test   |  47 ++
 .../llvm-readobj/reloc-types-elf-mips.test    | 181 +++++
 .../llvm-readobj/reloc-types-elf-mips64.test  | 283 +++++++
 .../llvm-readobj/reloc-types-elf-ppc64.test   |  38 +
 .../llvm-readobj/reloc-types-elf-x64.test     | 145 ++++
 .../llvm-readobj/reloc-types-macho-arm.test   |  16 +
 .../llvm-readobj/reloc-types-macho-i386.test  |  12 +
 .../llvm-readobj/reloc-types-macho-x64.test   |  16 +
 llvm/test/tools/llvm-readobj/reloc-types.test | 699 ------------------
 21 files changed, 1843 insertions(+), 699 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64-ilp32
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-lanai
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-coff-i386.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-coff-x64.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-ppc64.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-macho-arm.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-macho-i386.test
 create mode 100644 llvm/test/tools/llvm-readobj/reloc-types-macho-x64.test
 delete mode 100644 llvm/test/tools/llvm-readobj/reloc-types.test

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386 b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
deleted file mode 100644
index 9c24ac8d9d3c6c45c180884edb8c3edcbc2e656f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 305
zcmeZaWMlw=??B89W-&16m84dbK-nP57l>_uxD&(y0&XByU@%DNZs`VbsY8J*X95|=
zzy)P7G6+IhObilG78`>+l*P`V24!(F=t5as3?@((H-j~l#lzqPW$`k2LRlgxUIYaI
aGb6~8Adi6nBuLWo^C4U)$;7|{WB~v>aU4|u

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64 b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
deleted file mode 100644
index 356437e66cb8d4c3eea29b1929fb8625c526e74a..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 424
zcmc)Hs}90Y5CqV<58maKAQ1>8AqXTAi9{kQ3K&971p?U+lKvJS#!fqiFJP1Ho~^jK
zon|iw*4;*^1+0tnqmVh?);sGD7ptlD0-N+&l@<4Y&uDk>ya*D>VJ3OZrGTXrv65QY
zO6~d!c}fR+z3bv2^>CE>I7tI!Qi7W_1asplBRr)sN@?<wEOQTz*)mU7^CvEO-mH3h
GMAH{=elWEF

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64 b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
deleted file mode 100644
index 658b0ea6227ab9337ba6332c679714ef4d219319..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4232
zcmeI!D@<HL6vpwpyTC5*_oKYZv!w-=H-(laE83<4fj}TCmIe|8B!pc72}uzM1OkCT
zAP@)y0zpy)0)apvNP|FBAP@)yqN1Xr_psj$xBslFZBIgG=67fA46`$5_sjdy55Yjd
z@gm@Sbsk%uI!-+JSdTqaV@|}$bZaZ${{Q_Wdk|fio}G*)rx%k>^m8KNL=!WK<U(@t
zvs?4ZrMZXp)MRq<@x=UnEz2Eh=g~@k#MHRMVa@P&a_jb(_Rz(St{ZE)snVM#l4?3t
zD%;)C-VsvUD>{7l!80p4SDtJ3&c7XBoMx;`jyHdgHGF~XaGKp`>6RMTLN_!v?3T7A
z{QqlQ<JxV=y~gKQ!xx<2_vnvms$crvp|lyu){$-{=G^tvc`5p1Mm1U%quowoX)et(
z9f^l0-Wx47`rX@~AlyD@ixAvfLk7Go)!oU2d*?0-UaS6Lc!N9wZ;@xi+vGX$4tXxT
zTb>8+m*>NW<OT2%c_BO|FM_|57sEfwOW>pOQuvs>3_dO|hfl~W;Gg7`@VLARJ|(Y)
z&&X@wv+`Q_yu1#cl-I)-<xk<u@&@>dyb-=CZ-TGMo8jy77Wjs|6}~BNhi}O{;M?*}
z_>MdZ-<5a4_vGF1eR&W3K;8>Kl=s1p<o)nt`2hSxJ_tXRzk;91hv4V(VfcmIz6AVv
z>{1?pU&({;@A44*TAl&Fk!Qkh<yr7Mc^G~#kHCM(v*ADGIq+ZdT)6jDlLxnaZIKVR
zJZn(^xBO^P2)DduQ3SVqWKj&aJYrD-x4d9c3b*@gQ3khrXi*MtlUKm)zFAbl?Veav
z!R>roJb~M}w5W!U$ZOy+c`f{%ybk_RUJoCYKZTFU8{p&eM)-uh3I0jm43EoO;8XHe
z_>BA+d{+J(J}+;BC*?2Ti}IK7WqCV%MQ*<*{rO>4-U(lmN8#)8F8GGL8@?&;fp5us
z;oI^)_>R0EzAGPq@5u+@`|?-t1NjjAP(BPllD~!@%iq9H<RkD?`CIsz+<x-<Kd*Cn
g0Dd74!Y}0^_?0{Z{#~94zm{jgZ{%V4tvmw%4gNX^S^xk5

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64-ilp32 b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64-ilp32
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
deleted file mode 100644
index 206c933d478f7bfa4289c78702bf2338a008758b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2112
zcmeIyF>{nf7y#h!$OR4v7%*VKh!G;57|x(U14f7%AWGDrQ6mJ%g+wAG8gn0zXoSE}
zP*6}%P*9|xprD{gK|w)5K|w)5K|w)5!8jBY6ch~b?0KJtia)^4>^$%8zW46l-p(D_
z)V?{DN<~RaMcL^8Q53~9iYw)fMY2&QDvzE#efrPK{_+lzHxT7U`Ui7y|7aZLdJBap
zR~RV7BXPc0lsGn29QWnpeDc&@Xt|8=7H5_+!yByp_dOP8(xv&3GrJTAYAu8wJ{Q(P
zUPfP48kaG&)@ilEIH@;R9P(e3S2-<5Sgh9&f>H~6hqhdwzSWe%J?3WV;X8(XL#?HH
zvZpYrvM?tErIyr7#);3*KR+rf-awKW>QCPxjMM+!Z^#Pgi0bmWhNv($6ynevGMq8R
za4$1h!7_&YG^<$7YR+U0XR($U)^RqooWpv~Ww@^jhTnD`n>e4%e1$EnWGff2jSJb%
zSJ}ZTc5)HBxR~91jXkVpAD3`|OF78bIm{Zye1oH0#&N#M3D)vB-{J`_=Si;MDc12c
z-{u*v<XOJMNoIMD@A5oX@dDrDMb>kQt9gkzUgrC}!UkUD8eZcEyv`4KgN?k&wY<fT
zc$**d4x4zF>v)grd7q!~0h{@dpYjnm@G&<sJf9Y(`580(oE7|nm271dH?f+VS;H-?
zWgF}GCA0jB_1ww^wzH96vx(oZncuR79c<+`w(&c*^Luu%lb!s5UHp;V{E0p6Vjq9z
z0Jn3HJ2=d4#@xwK?&3IibAmlQ&OA@BmnXT0r`X5S?B^N&!n54VNe=KFf8}}Z;|1>L
zMGkU`L%hV_c$vTR3Ws@>fAAVdc%22_V9c95z*{`X+dRZO9OYe(@gD!=eg4G<9Opy+
Y%||@U$2`LD$2Gw;k21q!tl%V{0gW_%$p8QV

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-lanai b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-lanai
deleted file mode 100644
index 70e25d1fca319223b9c6d923fd39633d909f8dcc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 429
zcmb<-^>JflVq|~=MusmyHj)6t6R?~KnABikWdJJ&X~Bl!<{%hwvjpH`a3L0;RWJ?*
zkj(^D!UJS8L)k!Wj6ii@AOd8wLfH~PHXE2-!Jt=^nv<wkl3G#1pjTW{RFaqkq$_hl
zG)aIDs2XN3NaGP8mIE=M0LiW3FoM|6zyJyz8JHp<4RbHZEEpRkT>!)(P%)5SL2^L9
aGKc{&%<n)ZLjVvf05Qlu5C*9e-~s@iln}-M

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el
deleted file mode 100644
index a9779645c3125dbbb389123cb0e49abedcb314b4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2216
zcmdtisgD#v6bIm%`xJBE_kH$U2n1pTkP#~o2n2%J>?SLAlg&<x*@Vm>5C{YUfj}S-
z2m}IwK&(KlK%htQr%<n^Ugww2C~&-F_tpEls;j!Xrne7{9y%Qkm(bx5e&OH(@&a(K
zd@g%>WXpN)zxp$-PS2NydAc@*alBdyV|BJNugvAKY;nH6Q1!{uT*4OpQ`0L)(-u?4
znCs#fqHMcuHI20!b-C4V_P4&s^Z51njLv<T=^oZW&>kT?5cavkvJc(!Z^uUujHPLL
z{s|Qj_GQ)OH78u>=0Y!Mt-&WVt=<0od1>~w(~ujDbkZ^G^W!yp+ja04Me`k+k=(w!
z`*#zu-<ayv^#!thB-biC9M-b=`IS4J*4EK`q)_g1SqtR3mAl<4QNINJlcB)lv9^iN
zm2$7wI@X1K_kDk#k6wq0iq5LP-_Oev4fH@Dpi9v~4+ewm1N2ZR#6Ct3hxswZ9rQ?q
zA5+{#k4EWus*oQ$6>6QaSWK5<t{ckZ@i_Z^^hAQUODxcn$t3qbLQkbq?2pjX=`{Oe
z^h}1gT|7b0X0zP?DS9rKV}FL8&(qIA#fi@9xe5h-ec}wgSS;#NoTHaYCH717a=FZI
zqE{*v_BDF7T4jHZUaQsEU!d3Pb@rF&jYfn06?(JTWPgp`YPHzkptsv?_P6Mr4j)VL
z4!zs$a{u?}y<U&~1A4ztpJNptbyoX77z}hNKA{hXL-x<;hGDROK_87q>|fEx<1za;
n^vT5f{C-EDPN&@e27NZ0vHw8dY&Pt-=-cg<{U`cvw`2bUn(9zL

diff --git a/llvm/test/tools/llvm-readobj/reloc-types-coff-i386.test b/llvm/test/tools/llvm-readobj/reloc-types-coff-i386.test
new file mode 100644
index 0000000000000..466530c53ed08
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-coff-i386.test
@@ -0,0 +1,63 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for coff-i386 target.
+
+# RUN: yaml2obj %s -o %t-coff-i386.o
+# RUN: llvm-readobj -r --expand-relocs %t-coff-i386.o | FileCheck %s
+
+# CHECK: Type: IMAGE_REL_I386_ABSOLUTE (0)
+# CHECK: Type: IMAGE_REL_I386_DIR16 (1)
+# CHECK: Type: IMAGE_REL_I386_REL16 (2)
+# CHECK: Type: IMAGE_REL_I386_DIR32 (6)
+# CHECK: Type: IMAGE_REL_I386_DIR32NB (7)
+# CHECK: Type: IMAGE_REL_I386_SEG12 (9)
+# CHECK: Type: IMAGE_REL_I386_SECTION (10)
+# CHECK: Type: IMAGE_REL_I386_SECREL (11)
+# CHECK: Type: IMAGE_REL_I386_TOKEN (12)
+# CHECK: Type: IMAGE_REL_I386_SECREL7 (13)
+# CHECK: Type: IMAGE_REL_I386_REL32 (20)
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_I386
+  Characteristics: [  ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     00
+    Relocations:
+      - VirtualAddress: 3
+        Type:           IMAGE_REL_I386_ABSOLUTE
+      - VirtualAddress: 10
+        Type:           IMAGE_REL_I386_DIR16
+      - VirtualAddress: 17
+        Type:           IMAGE_REL_I386_REL16
+      - VirtualAddress: 24
+        Type:           IMAGE_REL_I386_DIR32
+      - VirtualAddress: 31
+        Type:           IMAGE_REL_I386_DIR32NB
+      - VirtualAddress: 38
+        Type:           IMAGE_REL_I386_SEG12
+      - VirtualAddress: 45
+        Type:           IMAGE_REL_I386_SECTION
+      - VirtualAddress: 52
+        Type:           IMAGE_REL_I386_SECREL
+      - VirtualAddress: 59
+        Type:           IMAGE_REL_I386_TOKEN
+      - VirtualAddress: 66
+        Type:           IMAGE_REL_I386_SECREL7
+      - VirtualAddress: 73
+        Type:           IMAGE_REL_I386_REL32
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          77
+      NumberOfRelocations: 11
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-coff-x64.test b/llvm/test/tools/llvm-readobj/reloc-types-coff-x64.test
new file mode 100644
index 0000000000000..b78cc037073b1
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-coff-x64.test
@@ -0,0 +1,81 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for coff-x64 target.
+
+# RUN: yaml2obj %s -o %t-coff-x86_64.o
+# RUN: llvm-readobj -r --expand-relocs %t-coff-x86_64.o | FileCheck %s
+
+# CHECK: Type: IMAGE_REL_AMD64_ABSOLUTE (0)
+# CHECK: Type: IMAGE_REL_AMD64_ADDR64 (1)
+# CHECK: Type: IMAGE_REL_AMD64_ADDR32 (2)
+# CHECK: Type: IMAGE_REL_AMD64_ADDR32NB (3)
+# CHECK: Type: IMAGE_REL_AMD64_REL32 (4)
+# CHECK: Type: IMAGE_REL_AMD64_REL32_1 (5)
+# CHECK: Type: IMAGE_REL_AMD64_REL32_2 (6)
+# CHECK: Type: IMAGE_REL_AMD64_REL32_3 (7)
+# CHECK: Type: IMAGE_REL_AMD64_REL32_4 (8)
+# CHECK: Type: IMAGE_REL_AMD64_REL32_5 (9)
+# CHECK: Type: IMAGE_REL_AMD64_SECTION (10)
+# CHECK: Type: IMAGE_REL_AMD64_SECREL (11)
+# CHECK: Type: IMAGE_REL_AMD64_SECREL7 (12)
+# CHECK: Type: IMAGE_REL_AMD64_TOKEN (13)
+# CHECK: Type: IMAGE_REL_AMD64_SREL32 (14)
+# CHECK: Type: IMAGE_REL_AMD64_PAIR (15)
+# CHECK: Type: IMAGE_REL_AMD64_SSPAN32 (16)
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     00
+    Relocations:
+      - VirtualAddress: 4
+        Type:           IMAGE_REL_AMD64_ABSOLUTE
+      - VirtualAddress: 12
+        Type:           IMAGE_REL_AMD64_ADDR64
+      - VirtualAddress: 20
+        Type:           IMAGE_REL_AMD64_ADDR32
+      - VirtualAddress: 28
+        Type:           IMAGE_REL_AMD64_ADDR32NB
+      - VirtualAddress: 36
+        Type:           IMAGE_REL_AMD64_REL32
+      - VirtualAddress: 44
+        Type:           IMAGE_REL_AMD64_REL32_1
+      - VirtualAddress: 52
+        Type:           IMAGE_REL_AMD64_REL32_2
+      - VirtualAddress: 60
+        Type:           IMAGE_REL_AMD64_REL32_3
+      - VirtualAddress: 68
+        Type:           IMAGE_REL_AMD64_REL32_4
+      - VirtualAddress: 76
+        Type:           IMAGE_REL_AMD64_REL32_5
+      - VirtualAddress: 84
+        Type:           IMAGE_REL_AMD64_SECTION
+      - VirtualAddress: 92
+        Type:           IMAGE_REL_AMD64_SECREL
+      - VirtualAddress: 100
+        Type:           IMAGE_REL_AMD64_SECREL7
+      - VirtualAddress: 108
+        Type:           IMAGE_REL_AMD64_TOKEN
+      - VirtualAddress: 116
+        Type:           IMAGE_REL_AMD64_SREL32
+      - VirtualAddress: 124
+        Type:           IMAGE_REL_AMD64_PAIR
+      - VirtualAddress: 132
+        Type:           IMAGE_REL_AMD64_SSPAN32
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          136
+      NumberOfRelocations: 17
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test
new file mode 100644
index 0000000000000..ba9cc7da65c72
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-aarch64.test
@@ -0,0 +1,393 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for aarch64 target.
+
+# RUN: yaml2obj %s -o %t-aarch64.o
+# RUN: llvm-readobj -r --expand-relocs %t-aarch64.o | FileCheck %s
+
+# CHECK: Type: R_AARCH64_NONE (0)
+# CHECK: Type: R_AARCH64_ABS64 (257)
+# CHECK: Type: R_AARCH64_ABS32 (258)
+# CHECK: Type: R_AARCH64_ABS16 (259)
+# CHECK: Type: R_AARCH64_PREL64 (260)
+# CHECK: Type: R_AARCH64_PREL32 (261)
+# CHECK: Type: R_AARCH64_PREL16 (262)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G0 (263)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G0_NC (264)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G1 (265)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G1_NC (266)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G2 (267)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G2_NC (268)
+# CHECK: Type: R_AARCH64_MOVW_UABS_G3 (269)
+# CHECK: Type: R_AARCH64_MOVW_SABS_G0 (270)
+# CHECK: Type: R_AARCH64_MOVW_SABS_G1 (271)
+# CHECK: Type: R_AARCH64_MOVW_SABS_G2 (272)
+# CHECK: Type: R_AARCH64_LD_PREL_LO19 (273)
+# CHECK: Type: R_AARCH64_ADR_PREL_LO21 (274)
+# CHECK: Type: R_AARCH64_ADR_PREL_PG_HI21 (275)
+# CHECK: Type: R_AARCH64_ADR_PREL_PG_HI21_NC (276)
+# CHECK: Type: R_AARCH64_ADD_ABS_LO12_NC (277)
+# CHECK: Type: R_AARCH64_LDST8_ABS_LO12_NC (278)
+# CHECK: Type: R_AARCH64_TSTBR14 (279)
+# CHECK: Type: R_AARCH64_CONDBR19 (280)
+# CHECK: Type: R_AARCH64_JUMP26 (282)
+# CHECK: Type: R_AARCH64_CALL26 (283)
+# CHECK: Type: R_AARCH64_LDST16_ABS_LO12_NC (284)
+# CHECK: Type: R_AARCH64_LDST32_ABS_LO12_NC (285)
+# CHECK: Type: R_AARCH64_LDST64_ABS_LO12_NC (286)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G0 (287)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G0_NC (288)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G1 (289)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G1_NC (290)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G2 (291)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G2_NC (292)
+# CHECK: Type: R_AARCH64_MOVW_PREL_G3 (293)
+# CHECK: Type: R_AARCH64_LDST128_ABS_LO12_NC (299)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G0 (300)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G0_NC (301)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G1 (302)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G1_NC (303)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G2 (304)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G2_NC (305)
+# CHECK: Type: R_AARCH64_MOVW_GOTOFF_G3 (306)
+# CHECK: Type: R_AARCH64_GOTREL64 (307)
+# CHECK: Type: R_AARCH64_GOTREL32 (308)
+# CHECK: Type: R_AARCH64_GOT_LD_PREL19 (309)
+# CHECK: Type: R_AARCH64_LD64_GOTOFF_LO15 (310)
+# CHECK: Type: R_AARCH64_ADR_GOT_PAGE (311)
+# CHECK: Type: R_AARCH64_LD64_GOT_LO12_NC (312)
+# CHECK: Type: R_AARCH64_LD64_GOTPAGE_LO15 (313)
+# CHECK: Type: R_AARCH64_TLSGD_ADR_PREL21 (512)
+# CHECK: Type: R_AARCH64_TLSGD_ADR_PAGE21 (513)
+# CHECK: Type: R_AARCH64_TLSGD_ADD_LO12_NC (514)
+# CHECK: Type: R_AARCH64_TLSGD_MOVW_G1 (515)
+# CHECK: Type: R_AARCH64_TLSGD_MOVW_G0_NC (516)
+# CHECK: Type: R_AARCH64_TLSLD_ADR_PREL21 (517)
+# CHECK: Type: R_AARCH64_TLSLD_ADR_PAGE21 (518)
+# CHECK: Type: R_AARCH64_TLSLD_ADD_LO12_NC (519)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_G1 (520)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_G0_NC (521)
+# CHECK: Type: R_AARCH64_TLSLD_LD_PREL19 (522)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G2 (523)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1 (524)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC (525)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0 (526)
+# CHECK: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC (527)
+# CHECK: Type: R_AARCH64_TLSLD_ADD_DTPREL_HI12 (528)
+# CHECK: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12 (529)
+# CHECK: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC (530)
+# CHECK: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 (531)
+# CHECK: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC (532)
+# CHECK: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 (533)
+# CHECK: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC (534)
+# CHECK: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 (535)
+# CHECK: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC (536)
+# CHECK: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 (537)
+# CHECK: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC (538)
+# CHECK: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 (539)
+# CHECK: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC (540)
+# CHECK: Type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 (541)
+# CHECK: Type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC (542)
+# CHECK: Type: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 (543)
+# CHECK: Type: R_AARCH64_TLSLE_MOVW_TPREL_G2 (544)
+# CHECK: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1 (545)
+# CHECK: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC (546)
+# CHECK: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0 (547)
+# CHECK: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC (548)
+# CHECK: Type: R_AARCH64_TLSLE_ADD_TPREL_HI12 (549)
+# CHECK: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12 (550)
+# CHECK: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC (551)
+# CHECK: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12 (552)
+# CHECK: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC (553)
+# CHECK: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12 (554)
+# CHECK: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC (555)
+# CHECK: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12 (556)
+# CHECK: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC (557)
+# CHECK: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12 (558)
+# CHECK: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
+# CHECK: Type: R_AARCH64_TLSDESC_LD_PREL19 (560)
+# CHECK: Type: R_AARCH64_TLSDESC_ADR_PREL21 (561)
+# CHECK: Type: R_AARCH64_TLSDESC_ADR_PAGE21 (562)
+# CHECK: Type: R_AARCH64_TLSDESC_LD64_LO12 (563)
+# CHECK: Type: R_AARCH64_TLSDESC_ADD_LO12 (564)
+# CHECK: Type: R_AARCH64_TLSDESC_OFF_G1 (565)
+# CHECK: Type: R_AARCH64_TLSDESC_OFF_G0_NC (566)
+# CHECK: Type: R_AARCH64_TLSDESC_LDR (567)
+# CHECK: Type: R_AARCH64_TLSDESC_ADD (568)
+# CHECK: Type: R_AARCH64_TLSDESC_CALL (569)
+# CHECK: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12 (570)
+# CHECK: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC (571)
+# CHECK: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12 (572)
+# CHECK: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC (573)
+# CHECK: Type: R_AARCH64_COPY (1024)
+# CHECK: Type: R_AARCH64_GLOB_DAT (1025)
+# CHECK: Type: R_AARCH64_JUMP_SLOT (1026)
+# CHECK: Type: R_AARCH64_RELATIVE (1027)
+# CHECK: Type: R_AARCH64_TLS_DTPREL64 (1028)
+# CHECK: Type: R_AARCH64_TLS_DTPMOD64 (1029)
+# CHECK: Type: R_AARCH64_TLS_TPREL64 (1030)
+# CHECK: Type: R_AARCH64_TLSDESC (1031)
+# CHECK: Type: R_AARCH64_IRELATIVE (1032)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_AARCH64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rela.text
+    Type:         SHT_RELA
+    Link:         .symtab
+    AddressAlign: 0x0000000000000008
+    EntSize:      0x0000000000000018
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000000
+        Type:   R_AARCH64_NONE
+      - Offset: 0x0000000000000004
+        Type:   R_AARCH64_ABS64
+      - Offset: 0x0000000000000008
+        Type:   R_AARCH64_ABS32
+      - Offset: 0x000000000000000C
+        Type:   R_AARCH64_ABS16
+      - Offset: 0x0000000000000010
+        Type:   R_AARCH64_PREL64
+      - Offset: 0x0000000000000014
+        Type:   R_AARCH64_PREL32
+      - Offset: 0x0000000000000018
+        Type:   R_AARCH64_PREL16
+      - Offset: 0x000000000000001C
+        Type:   R_AARCH64_MOVW_UABS_G0
+      - Offset: 0x0000000000000020
+        Type:   R_AARCH64_MOVW_UABS_G0_NC
+      - Offset: 0x0000000000000024
+        Type:   R_AARCH64_MOVW_UABS_G1
+      - Offset: 0x0000000000000028
+        Type:   R_AARCH64_MOVW_UABS_G1_NC
+      - Offset: 0x000000000000002C
+        Type:   R_AARCH64_MOVW_UABS_G2
+      - Offset: 0x0000000000000030
+        Type:   R_AARCH64_MOVW_UABS_G2_NC
+      - Offset: 0x0000000000000034
+        Type:   R_AARCH64_MOVW_UABS_G3
+      - Offset: 0x0000000000000038
+        Type:   R_AARCH64_MOVW_SABS_G0
+      - Offset: 0x000000000000003C
+        Type:   R_AARCH64_MOVW_SABS_G1
+      - Offset: 0x0000000000000040
+        Type:   R_AARCH64_MOVW_SABS_G2
+      - Offset: 0x0000000000000044
+        Type:   R_AARCH64_LD_PREL_LO19
+      - Offset: 0x0000000000000048
+        Type:   R_AARCH64_ADR_PREL_LO21
+      - Offset: 0x000000000000004C
+        Type:   R_AARCH64_ADR_PREL_PG_HI21
+      - Offset: 0x0000000000000050
+        Type:   R_AARCH64_ADR_PREL_PG_HI21_NC
+      - Offset: 0x0000000000000054
+        Type:   R_AARCH64_ADD_ABS_LO12_NC
+      - Offset: 0x0000000000000058
+        Type:   R_AARCH64_LDST8_ABS_LO12_NC
+      - Offset: 0x000000000000005C
+        Type:   R_AARCH64_TSTBR14
+      - Offset: 0x0000000000000060
+        Type:   R_AARCH64_CONDBR19
+      - Offset: 0x0000000000000064
+        Type:   R_AARCH64_JUMP26
+      - Offset: 0x0000000000000068
+        Type:   R_AARCH64_CALL26
+      - Offset: 0x000000000000006C
+        Type:   R_AARCH64_LDST16_ABS_LO12_NC
+      - Offset: 0x0000000000000070
+        Type:   R_AARCH64_LDST32_ABS_LO12_NC
+      - Offset: 0x0000000000000074
+        Type:   R_AARCH64_LDST64_ABS_LO12_NC
+      - Offset: 0x0000000000000078
+        Type:   R_AARCH64_MOVW_PREL_G0
+      - Offset: 0x000000000000007C
+        Type:   R_AARCH64_MOVW_PREL_G0_NC
+      - Offset: 0x0000000000000080
+        Type:   R_AARCH64_MOVW_PREL_G1
+      - Offset: 0x0000000000000084
+        Type:   R_AARCH64_MOVW_PREL_G1_NC
+      - Offset: 0x0000000000000088
+        Type:   R_AARCH64_MOVW_PREL_G2
+      - Offset: 0x000000000000008C
+        Type:   R_AARCH64_MOVW_PREL_G2_NC
+      - Offset: 0x0000000000000090
+        Type:   R_AARCH64_MOVW_PREL_G3
+      - Offset: 0x0000000000000094
+        Type:   R_AARCH64_LDST128_ABS_LO12_NC
+      - Offset: 0x0000000000000098
+        Type:   R_AARCH64_MOVW_GOTOFF_G0
+      - Offset: 0x000000000000009C
+        Type:   R_AARCH64_MOVW_GOTOFF_G0_NC
+      - Offset: 0x00000000000000A0
+        Type:   R_AARCH64_MOVW_GOTOFF_G1
+      - Offset: 0x00000000000000A4
+        Type:   R_AARCH64_MOVW_GOTOFF_G1_NC
+      - Offset: 0x00000000000000A8
+        Type:   R_AARCH64_MOVW_GOTOFF_G2
+      - Offset: 0x00000000000000AC
+        Type:   R_AARCH64_MOVW_GOTOFF_G2_NC
+      - Offset: 0x00000000000000B0
+        Type:   R_AARCH64_MOVW_GOTOFF_G3
+      - Offset: 0x00000000000000B4
+        Type:   R_AARCH64_GOTREL64
+      - Offset: 0x00000000000000B8
+        Type:   R_AARCH64_GOTREL32
+      - Offset: 0x00000000000000BC
+        Type:   R_AARCH64_GOT_LD_PREL19
+      - Offset: 0x00000000000000C0
+        Type:   R_AARCH64_LD64_GOTOFF_LO15
+      - Offset: 0x00000000000000C4
+        Type:   R_AARCH64_ADR_GOT_PAGE
+      - Offset: 0x00000000000000C8
+        Type:   R_AARCH64_LD64_GOT_LO12_NC
+      - Offset: 0x00000000000000CC
+        Type:   R_AARCH64_LD64_GOTPAGE_LO15
+      - Offset: 0x00000000000000D0
+        Type:   R_AARCH64_TLSGD_ADR_PREL21
+      - Offset: 0x00000000000000D4
+        Type:   R_AARCH64_TLSGD_ADR_PAGE21
+      - Offset: 0x00000000000000D8
+        Type:   R_AARCH64_TLSGD_ADD_LO12_NC
+      - Offset: 0x00000000000000DC
+        Type:   R_AARCH64_TLSGD_MOVW_G1
+      - Offset: 0x00000000000000E0
+        Type:   R_AARCH64_TLSGD_MOVW_G0_NC
+      - Offset: 0x00000000000000E4
+        Type:   R_AARCH64_TLSLD_ADR_PREL21
+      - Offset: 0x00000000000000E8
+        Type:   R_AARCH64_TLSLD_ADR_PAGE21
+      - Offset: 0x00000000000000EC
+        Type:   R_AARCH64_TLSLD_ADD_LO12_NC
+      - Offset: 0x00000000000000F0
+        Type:   R_AARCH64_TLSLD_MOVW_G1
+      - Offset: 0x00000000000000F4
+        Type:   R_AARCH64_TLSLD_MOVW_G0_NC
+      - Offset: 0x00000000000000F8
+        Type:   R_AARCH64_TLSLD_LD_PREL19
+      - Offset: 0x00000000000000FC
+        Type:   R_AARCH64_TLSLD_MOVW_DTPREL_G2
+      - Offset: 0x0000000000000100
+        Type:   R_AARCH64_TLSLD_MOVW_DTPREL_G1
+      - Offset: 0x0000000000000104
+        Type:   R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
+      - Offset: 0x0000000000000108
+        Type:   R_AARCH64_TLSLD_MOVW_DTPREL_G0
+      - Offset: 0x000000000000010C
+        Type:   R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
+      - Offset: 0x0000000000000110
+        Type:   R_AARCH64_TLSLD_ADD_DTPREL_HI12
+      - Offset: 0x0000000000000114
+        Type:   R_AARCH64_TLSLD_ADD_DTPREL_LO12
+      - Offset: 0x0000000000000118
+        Type:   R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+      - Offset: 0x000000000000011C
+        Type:   R_AARCH64_TLSLD_LDST8_DTPREL_LO12
+      - Offset: 0x0000000000000120
+        Type:   R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
+      - Offset: 0x0000000000000124
+        Type:   R_AARCH64_TLSLD_LDST16_DTPREL_LO12
+      - Offset: 0x0000000000000128
+        Type:   R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
+      - Offset: 0x000000000000012C
+        Type:   R_AARCH64_TLSLD_LDST32_DTPREL_LO12
+      - Offset: 0x0000000000000130
+        Type:   R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
+      - Offset: 0x0000000000000134
+        Type:   R_AARCH64_TLSLD_LDST64_DTPREL_LO12
+      - Offset: 0x0000000000000138
+        Type:   R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
+      - Offset: 0x000000000000013C
+        Type:   R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
+      - Offset: 0x0000000000000140
+        Type:   R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
+      - Offset: 0x0000000000000144
+        Type:   R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21
+      - Offset: 0x0000000000000148
+        Type:   R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
+      - Offset: 0x000000000000014C
+        Type:   R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+      - Offset: 0x0000000000000150
+        Type:   R_AARCH64_TLSLE_MOVW_TPREL_G2
+      - Offset: 0x0000000000000154
+        Type:   R_AARCH64_TLSLE_MOVW_TPREL_G1
+      - Offset: 0x0000000000000158
+        Type:   R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
+      - Offset: 0x000000000000015C
+        Type:   R_AARCH64_TLSLE_MOVW_TPREL_G0
+      - Offset: 0x0000000000000160
+        Type:   R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+      - Offset: 0x0000000000000164
+        Type:   R_AARCH64_TLSLE_ADD_TPREL_HI12
+      - Offset: 0x0000000000000168
+        Type:   R_AARCH64_TLSLE_ADD_TPREL_LO12
+      - Offset: 0x000000000000016C
+        Type:   R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
+      - Offset: 0x0000000000000170
+        Type:   R_AARCH64_TLSLE_LDST8_TPREL_LO12
+      - Offset: 0x0000000000000174
+        Type:   R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
+      - Offset: 0x0000000000000178
+        Type:   R_AARCH64_TLSLE_LDST16_TPREL_LO12
+      - Offset: 0x000000000000017C
+        Type:   R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
+      - Offset: 0x0000000000000180
+        Type:   R_AARCH64_TLSLE_LDST32_TPREL_LO12
+      - Offset: 0x0000000000000184
+        Type:   R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+      - Offset: 0x0000000000000188
+        Type:   R_AARCH64_TLSLE_LDST64_TPREL_LO12
+      - Offset: 0x000000000000018C
+        Type:   R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+      - Offset: 0x0000000000000190
+        Type:   R_AARCH64_TLSDESC_LD_PREL19
+      - Offset: 0x0000000000000194
+        Type:   R_AARCH64_TLSDESC_ADR_PREL21
+      - Offset: 0x0000000000000198
+        Type:   R_AARCH64_TLSDESC_ADR_PAGE21
+      - Offset: 0x000000000000019C
+        Type:   R_AARCH64_TLSDESC_LD64_LO12
+      - Offset: 0x00000000000001A0
+        Type:   R_AARCH64_TLSDESC_ADD_LO12
+      - Offset: 0x00000000000001A4
+        Type:   R_AARCH64_TLSDESC_OFF_G1
+      - Offset: 0x00000000000001A8
+        Type:   R_AARCH64_TLSDESC_OFF_G0_NC
+      - Offset: 0x00000000000001AC
+        Type:   R_AARCH64_TLSDESC_LDR
+      - Offset: 0x00000000000001B0
+        Type:   R_AARCH64_TLSDESC_ADD
+      - Offset: 0x00000000000001B4
+        Type:   R_AARCH64_TLSDESC_CALL
+      - Offset: 0x00000000000001B8
+        Type:   R_AARCH64_TLSLE_LDST128_TPREL_LO12
+      - Offset: 0x00000000000001BC
+        Type:   R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC
+      - Offset: 0x00000000000001C0
+        Type:   R_AARCH64_TLSLD_LDST128_DTPREL_LO12
+      - Offset: 0x00000000000001C4
+        Type:   R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC
+      - Offset: 0x00000000000001C8
+        Type:   R_AARCH64_COPY
+      - Offset: 0x00000000000001CC
+        Type:   R_AARCH64_GLOB_DAT
+      - Offset: 0x00000000000001D0
+        Type:   R_AARCH64_JUMP_SLOT
+      - Offset: 0x00000000000001D4
+        Type:   R_AARCH64_RELATIVE
+      - Offset: 0x00000000000001D8
+        Type:   R_AARCH64_TLS_DTPREL64
+      - Offset: 0x00000000000001DC
+        Type:   R_AARCH64_TLS_DTPMOD64
+      - Offset: 0x00000000000001E0
+        Type:   R_AARCH64_TLS_TPREL64
+      - Offset: 0x00000000000001E4
+        Type:   R_AARCH64_TLSDESC
+      - Offset: 0x00000000000001E8
+        Type:   R_AARCH64_IRELATIVE
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test
new file mode 100644
index 0000000000000..6e29637d35792
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-arm.test
@@ -0,0 +1,420 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for arm target.
+
+# RUN: yaml2obj %s -o %t-arm.o
+# RUN: llvm-readobj -r --expand-relocs %t-arm.o | FileCheck %s
+
+# CHECK: Type: R_ARM_NONE (0)
+# CHECK: Type: R_ARM_PC24 (1)
+# CHECK: Type: R_ARM_ABS32 (2)
+# CHECK: Type: R_ARM_REL32 (3)
+# CHECK: Type: R_ARM_LDR_PC_G0 (4)
+# CHECK: Type: R_ARM_ABS16 (5)
+# CHECK: Type: R_ARM_ABS12 (6)
+# CHECK: Type: R_ARM_THM_ABS5 (7)
+# CHECK: Type: R_ARM_ABS8 (8)
+# CHECK: Type: R_ARM_SBREL32 (9)
+# CHECK: Type: R_ARM_THM_CALL (10)
+# CHECK: Type: R_ARM_THM_PC8 (11)
+# CHECK: Type: R_ARM_BREL_ADJ (12)
+# CHECK: Type: R_ARM_TLS_DESC (13)
+# CHECK: Type: R_ARM_THM_SWI8 (14)
+# CHECK: Type: R_ARM_XPC25 (15)
+# CHECK: Type: R_ARM_THM_XPC22 (16)
+# CHECK: Type: R_ARM_TLS_DTPMOD32 (17)
+# CHECK: Type: R_ARM_TLS_DTPOFF32 (18)
+# CHECK: Type: R_ARM_TLS_TPOFF32 (19)
+# CHECK: Type: R_ARM_COPY (20)
+# CHECK: Type: R_ARM_GLOB_DAT (21)
+# CHECK: Type: R_ARM_JUMP_SLOT (22)
+# CHECK: Type: R_ARM_RELATIVE (23)
+# CHECK: Type: R_ARM_GOTOFF32 (24)
+# CHECK: Type: R_ARM_BASE_PREL (25)
+# CHECK: Type: R_ARM_GOT_BREL (26)
+# CHECK: Type: R_ARM_CALL (28)
+# CHECK: Type: R_ARM_JUMP24 (29)
+# CHECK: Type: R_ARM_THM_JUMP24 (30)
+# CHECK: Type: R_ARM_BASE_ABS (31)
+# CHECK: Type: R_ARM_ALU_PCREL_7_0 (32)
+# CHECK: Type: R_ARM_ALU_PCREL_15_8 (33)
+# CHECK: Type: R_ARM_ALU_PCREL_23_15 (34)
+# CHECK: Type: R_ARM_LDR_SBREL_11_0_NC (35)
+# CHECK: Type: R_ARM_ALU_SBREL_19_12_NC (36)
+# CHECK: Type: R_ARM_ALU_SBREL_27_20_CK (37)
+# CHECK: Type: R_ARM_TARGET1 (38)
+# CHECK: Type: R_ARM_SBREL31 (39)
+# CHECK: Type: R_ARM_V4BX (40)
+# CHECK: Type: R_ARM_TARGET2 (41)
+# CHECK: Type: R_ARM_PREL31 (42)
+# CHECK: Type: R_ARM_MOVW_ABS_NC (43)
+# CHECK: Type: R_ARM_MOVT_ABS (44)
+# CHECK: Type: R_ARM_MOVW_PREL_NC (45)
+# CHECK: Type: R_ARM_MOVT_PREL (46)
+# CHECK: Type: R_ARM_THM_MOVW_ABS_NC (47)
+# CHECK: Type: R_ARM_THM_MOVT_ABS (48)
+# CHECK: Type: R_ARM_THM_MOVW_PREL_NC (49)
+# CHECK: Type: R_ARM_THM_MOVT_PREL (50)
+# CHECK: Type: R_ARM_THM_JUMP19 (51)
+# CHECK: Type: R_ARM_THM_JUMP6 (52)
+# CHECK: Type: R_ARM_THM_ALU_PREL_11_0 (53)
+# CHECK: Type: R_ARM_THM_PC12 (54)
+# CHECK: Type: R_ARM_ABS32_NOI (55)
+# CHECK: Type: R_ARM_REL32_NOI (56)
+# CHECK: Type: R_ARM_ALU_PC_G0_NC (57)
+# CHECK: Type: R_ARM_ALU_PC_G0 (58)
+# CHECK: Type: R_ARM_ALU_PC_G1_NC (59)
+# CHECK: Type: R_ARM_ALU_PC_G1 (60)
+# CHECK: Type: R_ARM_ALU_PC_G2 (61)
+# CHECK: Type: R_ARM_LDR_PC_G1 (62)
+# CHECK: Type: R_ARM_LDR_PC_G2 (63)
+# CHECK: Type: R_ARM_LDRS_PC_G0 (64)
+# CHECK: Type: R_ARM_LDRS_PC_G1 (65)
+# CHECK: Type: R_ARM_LDRS_PC_G2 (66)
+# CHECK: Type: R_ARM_LDC_PC_G0 (67)
+# CHECK: Type: R_ARM_LDC_PC_G1 (68)
+# CHECK: Type: R_ARM_LDC_PC_G2 (69)
+# CHECK: Type: R_ARM_ALU_SB_G0_NC (70)
+# CHECK: Type: R_ARM_ALU_SB_G0 (71)
+# CHECK: Type: R_ARM_ALU_SB_G1_NC (72)
+# CHECK: Type: R_ARM_ALU_SB_G1 (73)
+# CHECK: Type: R_ARM_ALU_SB_G2 (74)
+# CHECK: Type: R_ARM_LDR_SB_G0 (75)
+# CHECK: Type: R_ARM_LDR_SB_G1 (76)
+# CHECK: Type: R_ARM_LDR_SB_G2 (77)
+# CHECK: Type: R_ARM_LDRS_SB_G0 (78)
+# CHECK: Type: R_ARM_LDRS_SB_G1 (79)
+# CHECK: Type: R_ARM_LDRS_SB_G2 (80)
+# CHECK: Type: R_ARM_LDC_SB_G0 (81)
+# CHECK: Type: R_ARM_LDC_SB_G1 (82)
+# CHECK: Type: R_ARM_LDC_SB_G2 (83)
+# CHECK: Type: R_ARM_MOVW_BREL_NC (84)
+# CHECK: Type: R_ARM_MOVT_BREL (85)
+# CHECK: Type: R_ARM_MOVW_BREL (86)
+# CHECK: Type: R_ARM_THM_MOVW_BREL_NC (87)
+# CHECK: Type: R_ARM_THM_MOVT_BREL (88)
+# CHECK: Type: R_ARM_THM_MOVW_BREL (89)
+# CHECK: Type: R_ARM_TLS_GOTDESC (90)
+# CHECK: Type: R_ARM_TLS_CALL (91)
+# CHECK: Type: R_ARM_TLS_DESCSEQ (92)
+# CHECK: Type: R_ARM_THM_TLS_CALL (93)
+# CHECK: Type: R_ARM_PLT32_ABS (94)
+# CHECK: Type: R_ARM_GOT_ABS (95)
+# CHECK: Type: R_ARM_GOT_PREL (96)
+# CHECK: Type: R_ARM_GOT_BREL12 (97)
+# CHECK: Type: R_ARM_GOTOFF12 (98)
+# CHECK: Type: R_ARM_GOTRELAX (99)
+# CHECK: Type: R_ARM_GNU_VTENTRY (100)
+# CHECK: Type: R_ARM_GNU_VTINHERIT (101)
+# CHECK: Type: R_ARM_THM_JUMP11 (102)
+# CHECK: Type: R_ARM_THM_JUMP8 (103)
+# CHECK: Type: R_ARM_TLS_GD32 (104)
+# CHECK: Type: R_ARM_TLS_LDM32 (105)
+# CHECK: Type: R_ARM_TLS_LDO32 (106)
+# CHECK: Type: R_ARM_TLS_IE32 (107)
+# CHECK: Type: R_ARM_TLS_LE32 (108)
+# CHECK: Type: R_ARM_TLS_LDO12 (109)
+# CHECK: Type: R_ARM_TLS_LE12 (110)
+# CHECK: Type: R_ARM_TLS_IE12GP (111)
+# CHECK: Type: R_ARM_PRIVATE_0 (112)
+# CHECK: Type: R_ARM_PRIVATE_1 (113)
+# CHECK: Type: R_ARM_PRIVATE_2 (114)
+# CHECK: Type: R_ARM_PRIVATE_3 (115)
+# CHECK: Type: R_ARM_PRIVATE_4 (116)
+# CHECK: Type: R_ARM_PRIVATE_5 (117)
+# CHECK: Type: R_ARM_PRIVATE_6 (118)
+# CHECK: Type: R_ARM_PRIVATE_7 (119)
+# CHECK: Type: R_ARM_PRIVATE_8 (120)
+# CHECK: Type: R_ARM_PRIVATE_9 (121)
+# CHECK: Type: R_ARM_PRIVATE_10 (122)
+# CHECK: Type: R_ARM_PRIVATE_11 (123)
+# CHECK: Type: R_ARM_PRIVATE_12 (124)
+# CHECK: Type: R_ARM_PRIVATE_13 (125)
+# CHECK: Type: R_ARM_PRIVATE_14 (126)
+# CHECK: Type: R_ARM_PRIVATE_15 (127)
+# CHECK: Type: R_ARM_ME_TOO (128)
+# CHECK: Type: R_ARM_THM_TLS_DESCSEQ16 (129)
+# CHECK: Type: R_ARM_THM_TLS_DESCSEQ32 (130)
+# CHECK: Type: R_ARM_IRELATIVE (160)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_ARM
+  Flags:   [ EF_ARM_EABI_VER5 ]
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rel.text
+    Type:         SHT_REL
+    Link:         .symtab
+    AddressAlign: 0x0000000000000004
+    EntSize:      0x0000000000000008
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000000
+        Type:   R_ARM_NONE
+      - Offset: 0x0000000000000004
+        Type:   R_ARM_PC24
+      - Offset: 0x0000000000000008
+        Type:   R_ARM_ABS32
+      - Offset: 0x000000000000000C
+        Type:   R_ARM_REL32
+      - Offset: 0x0000000000000010
+        Type:   R_ARM_LDR_PC_G0
+      - Offset: 0x0000000000000014
+        Type:   R_ARM_ABS16
+      - Offset: 0x0000000000000018
+        Type:   R_ARM_ABS12
+      - Offset: 0x000000000000001C
+        Type:   R_ARM_THM_ABS5
+      - Offset: 0x0000000000000020
+        Type:   R_ARM_ABS8
+      - Offset: 0x0000000000000024
+        Type:   R_ARM_SBREL32
+      - Offset: 0x0000000000000028
+        Type:   R_ARM_THM_CALL
+      - Offset: 0x000000000000002C
+        Type:   R_ARM_THM_PC8
+      - Offset: 0x0000000000000030
+        Type:   R_ARM_BREL_ADJ
+      - Offset: 0x0000000000000034
+        Type:   R_ARM_TLS_DESC
+      - Offset: 0x0000000000000038
+        Type:   R_ARM_THM_SWI8
+      - Offset: 0x000000000000003C
+        Type:   R_ARM_XPC25
+      - Offset: 0x0000000000000040
+        Type:   R_ARM_THM_XPC22
+      - Offset: 0x0000000000000044
+        Type:   R_ARM_TLS_DTPMOD32
+      - Offset: 0x0000000000000048
+        Type:   R_ARM_TLS_DTPOFF32
+      - Offset: 0x000000000000004C
+        Type:   R_ARM_TLS_TPOFF32
+      - Offset: 0x0000000000000050
+        Type:   R_ARM_COPY
+      - Offset: 0x0000000000000054
+        Type:   R_ARM_GLOB_DAT
+      - Offset: 0x0000000000000058
+        Type:   R_ARM_JUMP_SLOT
+      - Offset: 0x000000000000005C
+        Type:   R_ARM_RELATIVE
+      - Offset: 0x0000000000000060
+        Type:   R_ARM_GOTOFF32
+      - Offset: 0x0000000000000064
+        Type:   R_ARM_BASE_PREL
+      - Offset: 0x0000000000000068
+        Type:   R_ARM_GOT_BREL
+      - Offset: 0x000000000000006C
+        Type:   R_ARM_PLT32
+      - Offset: 0x0000000000000070
+        Type:   R_ARM_CALL
+      - Offset: 0x0000000000000074
+        Type:   R_ARM_JUMP24
+      - Offset: 0x0000000000000078
+        Type:   R_ARM_THM_JUMP24
+      - Offset: 0x000000000000007C
+        Type:   R_ARM_BASE_ABS
+      - Offset: 0x0000000000000080
+        Type:   R_ARM_ALU_PCREL_7_0
+      - Offset: 0x0000000000000084
+        Type:   R_ARM_ALU_PCREL_15_8
+      - Offset: 0x0000000000000088
+        Type:   R_ARM_ALU_PCREL_23_15
+      - Offset: 0x000000000000008C
+        Type:   R_ARM_LDR_SBREL_11_0_NC
+      - Offset: 0x0000000000000090
+        Type:   R_ARM_ALU_SBREL_19_12_NC
+      - Offset: 0x0000000000000094
+        Type:   R_ARM_ALU_SBREL_27_20_CK
+      - Offset: 0x0000000000000098
+        Type:   R_ARM_TARGET1
+      - Offset: 0x000000000000009C
+        Type:   R_ARM_SBREL31
+      - Offset: 0x00000000000000A0
+        Type:   R_ARM_V4BX
+      - Offset: 0x00000000000000A4
+        Type:   R_ARM_TARGET2
+      - Offset: 0x00000000000000A8
+        Type:   R_ARM_PREL31
+      - Offset: 0x00000000000000AC
+        Type:   R_ARM_MOVW_ABS_NC
+      - Offset: 0x00000000000000B0
+        Type:   R_ARM_MOVT_ABS
+      - Offset: 0x00000000000000B4
+        Type:   R_ARM_MOVW_PREL_NC
+      - Offset: 0x00000000000000B8
+        Type:   R_ARM_MOVT_PREL
+      - Offset: 0x00000000000000BC
+        Type:   R_ARM_THM_MOVW_ABS_NC
+      - Offset: 0x00000000000000C0
+        Type:   R_ARM_THM_MOVT_ABS
+      - Offset: 0x00000000000000C4
+        Type:   R_ARM_THM_MOVW_PREL_NC
+      - Offset: 0x00000000000000C8
+        Type:   R_ARM_THM_MOVT_PREL
+      - Offset: 0x00000000000000CC
+        Type:   R_ARM_THM_JUMP19
+      - Offset: 0x00000000000000D0
+        Type:   R_ARM_THM_JUMP6
+      - Offset: 0x00000000000000D4
+        Type:   R_ARM_THM_ALU_PREL_11_0
+      - Offset: 0x00000000000000D8
+        Type:   R_ARM_THM_PC12
+      - Offset: 0x00000000000000DC
+        Type:   R_ARM_ABS32_NOI
+      - Offset: 0x00000000000000E0
+        Type:   R_ARM_REL32_NOI
+      - Offset: 0x00000000000000E4
+        Type:   R_ARM_ALU_PC_G0_NC
+      - Offset: 0x00000000000000E8
+        Type:   R_ARM_ALU_PC_G0
+      - Offset: 0x00000000000000EC
+        Type:   R_ARM_ALU_PC_G1_NC
+      - Offset: 0x00000000000000F0
+        Type:   R_ARM_ALU_PC_G1
+      - Offset: 0x00000000000000F4
+        Type:   R_ARM_ALU_PC_G2
+      - Offset: 0x00000000000000F8
+        Type:   R_ARM_LDR_PC_G1
+      - Offset: 0x00000000000000FC
+        Type:   R_ARM_LDR_PC_G2
+      - Offset: 0x0000000000000100
+        Type:   R_ARM_LDRS_PC_G0
+      - Offset: 0x0000000000000104
+        Type:   R_ARM_LDRS_PC_G1
+      - Offset: 0x0000000000000108
+        Type:   R_ARM_LDRS_PC_G2
+      - Offset: 0x000000000000010C
+        Type:   R_ARM_LDC_PC_G0
+      - Offset: 0x0000000000000110
+        Type:   R_ARM_LDC_PC_G1
+      - Offset: 0x0000000000000114
+        Type:   R_ARM_LDC_PC_G2
+      - Offset: 0x0000000000000118
+        Type:   R_ARM_ALU_SB_G0_NC
+      - Offset: 0x000000000000011C
+        Type:   R_ARM_ALU_SB_G0
+      - Offset: 0x0000000000000120
+        Type:   R_ARM_ALU_SB_G1_NC
+      - Offset: 0x0000000000000124
+        Type:   R_ARM_ALU_SB_G1
+      - Offset: 0x0000000000000128
+        Type:   R_ARM_ALU_SB_G2
+      - Offset: 0x000000000000012C
+        Type:   R_ARM_LDR_SB_G0
+      - Offset: 0x0000000000000130
+        Type:   R_ARM_LDR_SB_G1
+      - Offset: 0x0000000000000134
+        Type:   R_ARM_LDR_SB_G2
+      - Offset: 0x0000000000000138
+        Type:   R_ARM_LDRS_SB_G0
+      - Offset: 0x000000000000013C
+        Type:   R_ARM_LDRS_SB_G1
+      - Offset: 0x0000000000000140
+        Type:   R_ARM_LDRS_SB_G2
+      - Offset: 0x0000000000000144
+        Type:   R_ARM_LDC_SB_G0
+      - Offset: 0x0000000000000148
+        Type:   R_ARM_LDC_SB_G1
+      - Offset: 0x000000000000014C
+        Type:   R_ARM_LDC_SB_G2
+      - Offset: 0x0000000000000150
+        Type:   R_ARM_MOVW_BREL_NC
+      - Offset: 0x0000000000000154
+        Type:   R_ARM_MOVT_BREL
+      - Offset: 0x0000000000000158
+        Type:   R_ARM_MOVW_BREL
+      - Offset: 0x000000000000015C
+        Type:   R_ARM_THM_MOVW_BREL_NC
+      - Offset: 0x0000000000000160
+        Type:   R_ARM_THM_MOVT_BREL
+      - Offset: 0x0000000000000164
+        Type:   R_ARM_THM_MOVW_BREL
+      - Offset: 0x0000000000000168
+        Type:   R_ARM_TLS_GOTDESC
+      - Offset: 0x000000000000016C
+        Type:   R_ARM_TLS_CALL
+      - Offset: 0x0000000000000170
+        Type:   R_ARM_TLS_DESCSEQ
+      - Offset: 0x0000000000000174
+        Type:   R_ARM_THM_TLS_CALL
+      - Offset: 0x0000000000000178
+        Type:   R_ARM_PLT32_ABS
+      - Offset: 0x000000000000017C
+        Type:   R_ARM_GOT_ABS
+      - Offset: 0x0000000000000180
+        Type:   R_ARM_GOT_PREL
+      - Offset: 0x0000000000000184
+        Type:   R_ARM_GOT_BREL12
+      - Offset: 0x0000000000000188
+        Type:   R_ARM_GOTOFF12
+      - Offset: 0x000000000000018C
+        Type:   R_ARM_GOTRELAX
+      - Offset: 0x0000000000000190
+        Type:   R_ARM_GNU_VTENTRY
+      - Offset: 0x0000000000000194
+        Type:   R_ARM_GNU_VTINHERIT
+      - Offset: 0x0000000000000198
+        Type:   R_ARM_THM_JUMP11
+      - Offset: 0x000000000000019C
+        Type:   R_ARM_THM_JUMP8
+      - Offset: 0x00000000000001A0
+        Type:   R_ARM_TLS_GD32
+      - Offset: 0x00000000000001A4
+        Type:   R_ARM_TLS_LDM32
+      - Offset: 0x00000000000001A8
+        Type:   R_ARM_TLS_LDO32
+      - Offset: 0x00000000000001AC
+        Type:   R_ARM_TLS_IE32
+      - Offset: 0x00000000000001B0
+        Type:   R_ARM_TLS_LE32
+      - Offset: 0x00000000000001B4
+        Type:   R_ARM_TLS_LDO12
+      - Offset: 0x00000000000001B8
+        Type:   R_ARM_TLS_LE12
+      - Offset: 0x00000000000001BC
+        Type:   R_ARM_TLS_IE12GP
+      - Offset: 0x00000000000001C0
+        Type:   R_ARM_PRIVATE_0
+      - Offset: 0x00000000000001C4
+        Type:   R_ARM_PRIVATE_1
+      - Offset: 0x00000000000001C8
+        Type:   R_ARM_PRIVATE_2
+      - Offset: 0x00000000000001CC
+        Type:   R_ARM_PRIVATE_3
+      - Offset: 0x00000000000001D0
+        Type:   R_ARM_PRIVATE_4
+      - Offset: 0x00000000000001D4
+        Type:   R_ARM_PRIVATE_5
+      - Offset: 0x00000000000001D8
+        Type:   R_ARM_PRIVATE_6
+      - Offset: 0x00000000000001DC
+        Type:   R_ARM_PRIVATE_7
+      - Offset: 0x00000000000001E0
+        Type:   R_ARM_PRIVATE_8
+      - Offset: 0x00000000000001E4
+        Type:   R_ARM_PRIVATE_9
+      - Offset: 0x00000000000001E8
+        Type:   R_ARM_PRIVATE_10
+      - Offset: 0x00000000000001EC
+        Type:   R_ARM_PRIVATE_11
+      - Offset: 0x00000000000001F0
+        Type:   R_ARM_PRIVATE_12
+      - Offset: 0x00000000000001F4
+        Type:   R_ARM_PRIVATE_13
+      - Offset: 0x00000000000001F8
+        Type:   R_ARM_PRIVATE_14
+      - Offset: 0x00000000000001FC
+        Type:   R_ARM_PRIVATE_15
+      - Offset: 0x0000000000000200
+        Type:   R_ARM_ME_TOO
+      - Offset: 0x0000000000000204
+        Type:   R_ARM_THM_TLS_DESCSEQ16
+      - Offset: 0x0000000000000208
+        Type:   R_ARM_THM_TLS_DESCSEQ32
+      - Offset: 0x000000000000020C
+        Type:   R_ARM_IRELATIVE
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
new file mode 100644
index 0000000000000..b968757614751
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
@@ -0,0 +1,148 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for i386 target.
+
+# RUN: yaml2obj %s -o %t-i386.o
+# RUN: llvm-readobj -r --expand-relocs %t-i386.o | FileCheck %s
+
+# CHECK: Type: R_386_NONE (0)
+# CHECK: Type: R_386_32 (1)
+# CHECK: Type: R_386_PC32 (2)
+# CHECK: Type: R_386_GOT32 (3)
+# CHECK: Type: R_386_PLT32 (4)
+# CHECK: Type: R_386_COPY (5)
+# CHECK: Type: R_386_GLOB_DAT (6)
+# CHECK: Type: R_386_JUMP_SLOT (7)
+# CHECK: Type: R_386_RELATIVE (8)
+# CHECK: Type: R_386_GOTOFF (9)
+# CHECK: Type: R_386_GOTPC (10)
+# CHECK: Type: R_386_32PLT (11)
+# CHECK: Type: R_386_TLS_TPOFF (14)
+# CHECK: Type: R_386_TLS_IE (15)
+# CHECK: Type: R_386_TLS_GOTIE (16)
+# CHECK: Type: R_386_TLS_LE (17)
+# CHECK: Type: R_386_TLS_GD (18)
+# CHECK: Type: R_386_TLS_LDM (19)
+# CHECK: Type: R_386_16 (20)
+# CHECK: Type: R_386_PC16 (21)
+# CHECK: Type: R_386_8 (22)
+# CHECK: Type: R_386_PC8 (23)
+# CHECK: Type: R_386_TLS_GD_32 (24)
+# CHECK: Type: R_386_TLS_GD_PUSH (25)
+# CHECK: Type: R_386_TLS_GD_CALL (26)
+# CHECK: Type: R_386_TLS_GD_POP (27)
+# CHECK: Type: R_386_TLS_LDM_32 (28)
+# CHECK: Type: R_386_TLS_LDM_PUSH (29)
+# CHECK: Type: R_386_TLS_LDM_CALL (30)
+# CHECK: Type: R_386_TLS_LDM_POP (31)
+# CHECK: Type: R_386_TLS_LDO_32 (32)
+# CHECK: Type: R_386_TLS_IE_32 (33)
+# CHECK: Type: R_386_TLS_LE_32 (34)
+# CHECK: Type: R_386_TLS_DTPMOD32 (35)
+# CHECK: Type: R_386_TLS_DTPOFF32 (36)
+# CHECK: Type: R_386_TLS_TPOFF32 (37)
+# CHECK: Type: R_386_TLS_GOTDESC (39)
+# CHECK: Type: R_386_TLS_DESC_CALL (40)
+# CHECK: Type: R_386_TLS_DESC (41)
+# CHECK: Type: R_386_IRELATIVE (42)
+# CHECK: Type: R_386_GOT32X (43)
+
+--- !ELF
+FileHeader:
+  Class:    ELFCLASS32
+  Data:     ELFDATA2LSB
+  OSABI:    ELFOSABI_GNU
+  Type:     ET_REL
+  Machine:  EM_386
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rel.text
+    Type:         SHT_REL
+    Link:         .symtab
+    AddressAlign: 0x0000000000000004
+    EntSize:      0x0000000000000008
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000002
+        Type:   R_386_NONE
+      - Offset: 0x0000000000000008
+        Type:   R_386_32
+      - Offset: 0x000000000000000E
+        Type:   R_386_PC32
+      - Offset: 0x0000000000000014
+        Type:   R_386_GOT32
+      - Offset: 0x000000000000001A
+        Type:   R_386_PLT32
+      - Offset: 0x0000000000000020
+        Type:   R_386_COPY
+      - Offset: 0x0000000000000026
+        Type:   R_386_GLOB_DAT
+      - Offset: 0x000000000000002C
+        Type:   R_386_JUMP_SLOT
+      - Offset: 0x0000000000000032
+        Type:   R_386_RELATIVE
+      - Offset: 0x0000000000000038
+        Type:   R_386_GOTOFF
+      - Offset: 0x000000000000003E
+        Type:   R_386_GOTPC
+      - Offset: 0x0000000000000044
+        Type:   R_386_32PLT
+      - Offset: 0x000000000000004A
+        Type:   R_386_TLS_TPOFF
+      - Offset: 0x0000000000000050
+        Type:   R_386_TLS_IE
+      - Offset: 0x0000000000000056
+        Type:   R_386_TLS_GOTIE
+      - Offset: 0x000000000000005C
+        Type:   R_386_TLS_LE
+      - Offset: 0x0000000000000062
+        Type:   R_386_TLS_GD
+      - Offset: 0x0000000000000068
+        Type:   R_386_TLS_LDM
+      - Offset: 0x000000000000006E
+        Type:   R_386_16
+      - Offset: 0x0000000000000074
+        Type:   R_386_PC16
+      - Offset: 0x000000000000007A
+        Type:   R_386_8
+      - Offset: 0x0000000000000080
+        Type:   R_386_PC8
+      - Offset: 0x0000000000000086
+        Type:   R_386_TLS_GD_32
+      - Offset: 0x000000000000008C
+        Type:   R_386_TLS_GD_PUSH
+      - Offset: 0x0000000000000092
+        Type:   R_386_TLS_GD_CALL
+      - Offset: 0x0000000000000098
+        Type:   R_386_TLS_GD_POP
+      - Offset: 0x000000000000009E
+        Type:   R_386_TLS_LDM_32
+      - Offset: 0x00000000000000A4
+        Type:   R_386_TLS_LDM_PUSH
+      - Offset: 0x00000000000000AA
+        Type:   R_386_TLS_LDM_CALL
+      - Offset: 0x00000000000000B0
+        Type:   R_386_TLS_LDM_POP
+      - Offset: 0x00000000000000B6
+        Type:   R_386_TLS_LDO_32
+      - Offset: 0x00000000000000BC
+        Type:   R_386_TLS_IE_32
+      - Offset: 0x00000000000000C2
+        Type:   R_386_TLS_LE_32
+      - Offset: 0x00000000000000C8
+        Type:   R_386_TLS_DTPMOD32
+      - Offset: 0x00000000000000CE
+        Type:   R_386_TLS_DTPOFF32
+      - Offset: 0x00000000000000D4
+        Type:   R_386_TLS_TPOFF32
+      - Offset: 0x00000000000000DA
+        Type:   R_386_TLS_GOTDESC
+      - Offset: 0x00000000000000E0
+        Type:   R_386_TLS_DESC_CALL
+      - Offset: 0x00000000000000E6
+        Type:   R_386_TLS_DESC
+      - Offset: 0x00000000000000EC
+        Type:   R_386_IRELATIVE
+      - Offset: 0x00000000000000F2
+        Type:   R_386_GOT32X
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test
new file mode 100644
index 0000000000000..270e2c397d3ac
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-lanai.test
@@ -0,0 +1,47 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for lanai target.
+
+# RUN: yaml2obj %s -o %t-lanai.o
+# RUN: llvm-readobj -r --expand-relocs %t-lanai.o | FileCheck %s
+
+# CHECK: Type: R_LANAI_NONE (0)
+# CHECK: Type: R_LANAI_21 (1)
+# CHECK: Type: R_LANAI_21_F (2)
+# CHECK: Type: R_LANAI_25 (3)
+# CHECK: Type: R_LANAI_32 (4)
+# CHECK: Type: R_LANAI_HI16 (5)
+# CHECK: Type: R_LANAI_LO16 (6)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2MSB
+  Type:    ET_REL
+  Machine: EM_LANAI
+Sections:
+  - Name:         .text
+    Type:         SHT_PROGBITS
+    Flags:        [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign: 0x0000000000000004
+    Content:      00
+  - Name:         .rela.text
+    Type:         SHT_RELA
+    Link:         .symtab
+    AddressAlign: 0x0000000000000004
+    EntSize:      0x000000000000000C
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000000
+        Type:   R_LANAI_NONE
+      - Offset: 0x0000000000000004
+        Type:   R_LANAI_21
+      - Offset: 0x0000000000000008
+        Type:   R_LANAI_21_F
+      - Offset: 0x000000000000000C
+        Type:   R_LANAI_25
+      - Offset: 0x0000000000000010
+        Type:   R_LANAI_32
+      - Offset: 0x0000000000000014
+        Type:   R_LANAI_HI16
+      - Offset: 0x0000000000000018
+        Type:   R_LANAI_LO16
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test
new file mode 100644
index 0000000000000..b948a3d5a6d0f
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips.test
@@ -0,0 +1,181 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for mips target.
+
+# RUN: yaml2obj %s -o %t-mips.o
+# RUN: llvm-readobj -r --expand-relocs %t-mips.o | FileCheck %s
+
+# CHECK: Type: R_MIPS_NONE (0)
+# CHECK: Type: R_MIPS_16 (1)
+# CHECK: Type: R_MIPS_32 (2)
+# CHECK: Type: R_MIPS_REL32 (3)
+# CHECK: Type: R_MIPS_26 (4)
+# CHECK: Type: R_MIPS_HI16 (5)
+# CHECK: Type: R_MIPS_LO16 (6)
+# CHECK: Type: R_MIPS_GPREL16 (7)
+# CHECK: Type: R_MIPS_LITERAL (8)
+# CHECK: Type: R_MIPS_GOT16 (9)
+# CHECK: Type: R_MIPS_PC16 (10)
+# CHECK: Type: R_MIPS_CALL16 (11)
+# CHECK: Type: R_MIPS_GPREL32 (12)
+# CHECK: Type: R_MIPS_SHIFT5 (16)
+# CHECK: Type: R_MIPS_SHIFT6 (17)
+# CHECK: Type: R_MIPS_64 (18)
+# CHECK: Type: R_MIPS_GOT_DISP (19)
+# CHECK: Type: R_MIPS_GOT_PAGE (20)
+# CHECK: Type: R_MIPS_GOT_OFST (21)
+# CHECK: Type: R_MIPS_GOT_HI16 (22)
+# CHECK: Type: R_MIPS_GOT_LO16 (23)
+# CHECK: Type: R_MIPS_SUB (24)
+# CHECK: Type: R_MIPS_INSERT_A (25)
+# CHECK: Type: R_MIPS_INSERT_B (26)
+# CHECK: Type: R_MIPS_DELETE (27)
+# CHECK: Type: R_MIPS_HIGHER (28)
+# CHECK: Type: R_MIPS_HIGHEST (29)
+# CHECK: Type: R_MIPS_CALL_HI16 (30)
+# CHECK: Type: R_MIPS_CALL_LO16 (31)
+# CHECK: Type: R_MIPS_SCN_DISP (32)
+# CHECK: Type: R_MIPS_REL16 (33)
+# CHECK: Type: R_MIPS_ADD_IMMEDIATE (34)
+# CHECK: Type: R_MIPS_PJUMP (35)
+# CHECK: Type: R_MIPS_RELGOT (36)
+# CHECK: Type: R_MIPS_JALR (37)
+# CHECK: Type: R_MIPS_TLS_DTPMOD32 (38)
+# CHECK: Type: R_MIPS_TLS_DTPREL32 (39)
+# CHECK: Type: R_MIPS_TLS_DTPMOD64 (40)
+# CHECK: Type: R_MIPS_TLS_DTPREL64 (41)
+# CHECK: Type: R_MIPS_TLS_GD (42)
+# CHECK: Type: R_MIPS_TLS_LDM (43)
+# CHECK: Type: R_MIPS_TLS_DTPREL_HI16 (44)
+# CHECK: Type: R_MIPS_TLS_DTPREL_LO16 (45)
+# CHECK: Type: R_MIPS_TLS_GOTTPREL (46)
+# CHECK: Type: R_MIPS_TLS_TPREL32 (47)
+# CHECK: Type: R_MIPS_TLS_TPREL64 (48)
+# CHECK: Type: R_MIPS_TLS_TPREL_HI16 (49)
+# CHECK: Type: R_MIPS_TLS_TPREL_LO16 (50)
+# CHECK: Type: R_MIPS_GLOB_DAT (51)
+# CHECK: Type: R_MIPS_COPY (126)
+# CHECK: Type: R_MIPS_JUMP_SLOT (127)
+# CHECK: Type: R_MIPS_NUM (218)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2MSB
+  OSABI:   ELFOSABI_GNU
+  Type:    ET_REL
+  Machine: EM_MIPS
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rel.text
+    Type:         SHT_REL
+    Link:         .symtab
+    AddressAlign: 0x0000000000000004
+    EntSize:      0x0000000000000008
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000000
+        Type:   R_MIPS_NONE
+      - Offset: 0x0000000000000004
+        Type:   R_MIPS_16
+      - Offset: 0x0000000000000008
+        Type:   R_MIPS_32
+      - Offset: 0x000000000000000C
+        Type:   R_MIPS_REL32
+      - Offset: 0x0000000000000010
+        Type:   R_MIPS_26
+      - Offset: 0x0000000000000014
+        Type:   R_MIPS_HI16
+      - Offset: 0x0000000000000018
+        Type:   R_MIPS_LO16
+      - Offset: 0x000000000000001C
+        Type:   R_MIPS_GPREL16
+      - Offset: 0x0000000000000020
+        Type:   R_MIPS_LITERAL
+      - Offset: 0x0000000000000024
+        Type:   R_MIPS_GOT16
+      - Offset: 0x0000000000000028
+        Type:   R_MIPS_PC16
+      - Offset: 0x000000000000002C
+        Type:   R_MIPS_CALL16
+      - Offset: 0x0000000000000030
+        Type:   R_MIPS_GPREL32
+      - Offset: 0x0000000000000034
+        Type:   R_MIPS_SHIFT5
+      - Offset: 0x0000000000000038
+        Type:   R_MIPS_SHIFT6
+      - Offset: 0x000000000000003C
+        Type:   R_MIPS_64
+      - Offset: 0x0000000000000040
+        Type:   R_MIPS_GOT_DISP
+      - Offset: 0x0000000000000044
+        Type:   R_MIPS_GOT_PAGE
+      - Offset: 0x0000000000000048
+        Type:   R_MIPS_GOT_OFST
+      - Offset: 0x000000000000004C
+        Type:   R_MIPS_GOT_HI16
+      - Offset: 0x0000000000000050
+        Type:   R_MIPS_GOT_LO16
+      - Offset: 0x0000000000000054
+        Type:   R_MIPS_SUB
+      - Offset: 0x0000000000000058
+        Type:   R_MIPS_INSERT_A
+      - Offset: 0x000000000000005C
+        Type:   R_MIPS_INSERT_B
+      - Offset: 0x0000000000000060
+        Type:   R_MIPS_DELETE
+      - Offset: 0x0000000000000064
+        Type:   R_MIPS_HIGHER
+      - Offset: 0x0000000000000068
+        Type:   R_MIPS_HIGHEST
+      - Offset: 0x000000000000006C
+        Type:   R_MIPS_CALL_HI16
+      - Offset: 0x0000000000000070
+        Type:   R_MIPS_CALL_LO16
+      - Offset: 0x0000000000000074
+        Type:   R_MIPS_SCN_DISP
+      - Offset: 0x0000000000000078
+        Type:   R_MIPS_REL16
+      - Offset: 0x000000000000007C
+        Type:   R_MIPS_ADD_IMMEDIATE
+      - Offset: 0x0000000000000080
+        Type:   R_MIPS_PJUMP
+      - Offset: 0x0000000000000084
+        Type:   R_MIPS_RELGOT
+      - Offset: 0x0000000000000088
+        Type:   R_MIPS_JALR
+      - Offset: 0x000000000000008C
+        Type:   R_MIPS_TLS_DTPMOD32
+      - Offset: 0x0000000000000090
+        Type:   R_MIPS_TLS_DTPREL32
+      - Offset: 0x0000000000000094
+        Type:   R_MIPS_TLS_DTPMOD64
+      - Offset: 0x0000000000000098
+        Type:   R_MIPS_TLS_DTPREL64
+      - Offset: 0x000000000000009C
+        Type:   R_MIPS_TLS_GD
+      - Offset: 0x00000000000000A0
+        Type:   R_MIPS_TLS_LDM
+      - Offset: 0x00000000000000A4
+        Type:   R_MIPS_TLS_DTPREL_HI16
+      - Offset: 0x00000000000000A8
+        Type:   R_MIPS_TLS_DTPREL_LO16
+      - Offset: 0x00000000000000AC
+        Type:   R_MIPS_TLS_GOTTPREL
+      - Offset: 0x00000000000000B0
+        Type:   R_MIPS_TLS_TPREL32
+      - Offset: 0x00000000000000B4
+        Type:   R_MIPS_TLS_TPREL64
+      - Offset: 0x00000000000000B8
+        Type:   R_MIPS_TLS_TPREL_HI16
+      - Offset: 0x00000000000000BC
+        Type:   R_MIPS_TLS_TPREL_LO16
+      - Offset: 0x00000000000000C0
+        Type:   R_MIPS_GLOB_DAT
+      - Offset: 0x00000000000000C4
+        Type:   R_MIPS_COPY
+      - Offset: 0x00000000000000C8
+        Type:   R_MIPS_JUMP_SLOT
+      - Offset: 0x00000000000000CC
+        Type:   R_MIPS_NUM
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test
new file mode 100644
index 0000000000000..f04064217bcb6
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-mips64.test
@@ -0,0 +1,283 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for mips64 target.
+
+# RUN: yaml2obj %s -o %t-mips64.o
+# RUN: llvm-readobj -r --expand-relocs %t-mips64.o | FileCheck %s
+
+# CHECK: Type: R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE (0)
+# CHECK: Type: R_MIPS_16/R_MIPS_16/R_MIPS_16 (65793)
+# CHECK: Type: R_MIPS_32/R_MIPS_32/R_MIPS_32 (131586)
+# CHECK: Type: R_MIPS_REL32/R_MIPS_REL32/R_MIPS_REL32 (197379)
+# CHECK: Type: R_MIPS_26/R_MIPS_26/R_MIPS_26 (263172)
+# CHECK: Type: R_MIPS_HI16/R_MIPS_HI16/R_MIPS_HI16 (328965)
+# CHECK: Type: R_MIPS_LO16/R_MIPS_LO16/R_MIPS_LO16 (394758)
+# CHECK: Type: R_MIPS_GPREL16/R_MIPS_GPREL16/R_MIPS_GPREL16 (460551)
+# CHECK: Type: R_MIPS_LITERAL/R_MIPS_LITERAL/R_MIPS_LITERAL (526344)
+# CHECK: Type: R_MIPS_GOT16/R_MIPS_GOT16/R_MIPS_GOT16 (592137)
+# CHECK: Type: R_MIPS_PC16/R_MIPS_PC16/R_MIPS_PC16 (657930)
+# CHECK: Type: R_MIPS_CALL16/R_MIPS_CALL16/R_MIPS_CALL16 (723723)
+# CHECK: Type: R_MIPS_GPREL32/R_MIPS_GPREL32/R_MIPS_GPREL32 (789516)
+# CHECK: Type: R_MIPS_SHIFT5/R_MIPS_SHIFT5/R_MIPS_SHIFT5 (1052688)
+# CHECK: Type: R_MIPS_SHIFT6/R_MIPS_SHIFT6/R_MIPS_SHIFT6 (1118481)
+# CHECK: Type: R_MIPS_64/R_MIPS_64/R_MIPS_64 (1184274)
+# CHECK: Type: R_MIPS_GOT_DISP/R_MIPS_GOT_DISP/R_MIPS_GOT_DISP (1250067)
+# CHECK: Type: R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE (1315860)
+# CHECK: Type: R_MIPS_GOT_OFST/R_MIPS_GOT_OFST/R_MIPS_GOT_OFST (1381653)
+# CHECK: Type: R_MIPS_GOT_HI16/R_MIPS_GOT_HI16/R_MIPS_GOT_HI16 (1447446)
+# CHECK: Type: R_MIPS_GOT_LO16/R_MIPS_GOT_LO16/R_MIPS_GOT_LO16 (1513239)
+# CHECK: Type: R_MIPS_SUB/R_MIPS_SUB/R_MIPS_SUB (1579032)
+# CHECK: Type: R_MIPS_INSERT_A/R_MIPS_INSERT_A/R_MIPS_INSERT_A (1644825)
+# CHECK: Type: R_MIPS_INSERT_B/R_MIPS_INSERT_B/R_MIPS_INSERT_B (1710618)
+# CHECK: Type: R_MIPS_DELETE/R_MIPS_DELETE/R_MIPS_DELETE (1776411)
+# CHECK: Type: R_MIPS_HIGHER/R_MIPS_HIGHER/R_MIPS_HIGHER (1842204)
+# CHECK: Type: R_MIPS_HIGHEST/R_MIPS_HIGHEST/R_MIPS_HIGHEST (1907997)
+# CHECK: Type: R_MIPS_CALL_HI16/R_MIPS_CALL_HI16/R_MIPS_CALL_HI16 (1973790)
+# CHECK: Type: R_MIPS_CALL_LO16/R_MIPS_CALL_LO16/R_MIPS_CALL_LO16 (2039583)
+# CHECK: Type: R_MIPS_SCN_DISP/R_MIPS_SCN_DISP/R_MIPS_SCN_DISP (2105376)
+# CHECK: Type: R_MIPS_REL16/R_MIPS_REL16/R_MIPS_REL16 (2171169)
+# CHECK: Type: R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE (2236962)
+# CHECK: Type: R_MIPS_PJUMP/R_MIPS_PJUMP/R_MIPS_PJUMP (2302755)
+# CHECK: Type: R_MIPS_RELGOT/R_MIPS_RELGOT/R_MIPS_RELGOT (2368548)
+# CHECK: Type: R_MIPS_JALR/R_MIPS_JALR/R_MIPS_JALR (2434341)
+# CHECK: Type: R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32 (2500134)
+# CHECK: Type: R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32 (2565927)
+# CHECK: Type: R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64 (2631720)
+# CHECK: Type: R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64 (2697513)
+# CHECK: Type: R_MIPS_TLS_GD/R_MIPS_TLS_GD/R_MIPS_TLS_GD (2763306)
+# CHECK: Type: R_MIPS_TLS_LDM/R_MIPS_TLS_LDM/R_MIPS_TLS_LDM (2829099)
+# CHECK: Type: R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16 (2894892)
+# CHECK: Type: R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16 (2960685)
+# CHECK: Type: R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL (3026478)
+# CHECK: Type: R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32 (3092271)
+# CHECK: Type: R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64 (3158064)
+# CHECK: Type: R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16 (3223857)
+# CHECK: Type: R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16 (3289650)
+# CHECK: Type: R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT (3355443)
+# CHECK: Type: R_MIPS_COPY/R_MIPS_COPY/R_MIPS_COPY (8289918)
+# CHECK: Type: R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT (8355711)
+# CHECK: Type: R_MIPS_NUM/R_MIPS_NUM/R_MIPS_NUM (14342874)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  OSABI:   ELFOSABI_GNU
+  Type:    ET_REL
+  Machine: EM_MIPS
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rela.text
+    Type:         SHT_RELA
+    Link:         .symtab
+    AddressAlign: 0x0000000000000008
+    EntSize:      0x0000000000000018
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000000
+        Type:   R_MIPS_NONE
+      - Offset: 0x0000000000000004
+        Type:   R_MIPS_16
+        Type2:  R_MIPS_16
+        Type3:  R_MIPS_16
+      - Offset: 0x0000000000000008
+        Type:   R_MIPS_32
+        Type2:  R_MIPS_32
+        Type3:  R_MIPS_32
+      - Offset: 0x000000000000000C
+        Type:   R_MIPS_REL32
+        Type2:  R_MIPS_REL32
+        Type3:  R_MIPS_REL32
+      - Offset: 0x0000000000000010
+        Type:   R_MIPS_26
+        Type2:  R_MIPS_26
+        Type3:  R_MIPS_26
+      - Offset: 0x0000000000000014
+        Type:   R_MIPS_HI16
+        Type2:  R_MIPS_HI16
+        Type3:  R_MIPS_HI16
+      - Offset: 0x0000000000000018
+        Type:   R_MIPS_LO16
+        Type2:  R_MIPS_LO16
+        Type3:  R_MIPS_LO16
+      - Offset: 0x000000000000001C
+        Type:   R_MIPS_GPREL16
+        Type2:  R_MIPS_GPREL16
+        Type3:  R_MIPS_GPREL16
+      - Offset: 0x0000000000000020
+        Type:   R_MIPS_LITERAL
+        Type2:  R_MIPS_LITERAL
+        Type3:  R_MIPS_LITERAL
+      - Offset: 0x0000000000000024
+        Type:   R_MIPS_GOT16
+        Type2:  R_MIPS_GOT16
+        Type3:  R_MIPS_GOT16
+      - Offset: 0x0000000000000028
+        Type:   R_MIPS_PC16
+        Type2:  R_MIPS_PC16
+        Type3:  R_MIPS_PC16
+      - Offset: 0x000000000000002C
+        Type:   R_MIPS_CALL16
+        Type2:  R_MIPS_CALL16
+        Type3:  R_MIPS_CALL16
+      - Offset: 0x0000000000000030
+        Type:   R_MIPS_GPREL32
+        Type2:  R_MIPS_GPREL32
+        Type3:  R_MIPS_GPREL32
+      - Offset: 0x0000000000000034
+        Type:   R_MIPS_SHIFT5
+        Type2:  R_MIPS_SHIFT5
+        Type3:  R_MIPS_SHIFT5
+      - Offset: 0x0000000000000038
+        Type:   R_MIPS_SHIFT6
+        Type2:  R_MIPS_SHIFT6
+        Type3:  R_MIPS_SHIFT6
+      - Offset: 0x000000000000003C
+        Type:   R_MIPS_64
+        Type2:  R_MIPS_64
+        Type3:  R_MIPS_64
+      - Offset: 0x0000000000000040
+        Type:   R_MIPS_GOT_DISP
+        Type2:  R_MIPS_GOT_DISP
+        Type3:  R_MIPS_GOT_DISP
+      - Offset: 0x0000000000000044
+        Type:   R_MIPS_GOT_PAGE
+        Type2:  R_MIPS_GOT_PAGE
+        Type3:  R_MIPS_GOT_PAGE
+      - Offset: 0x0000000000000048
+        Type:   R_MIPS_GOT_OFST
+        Type2:  R_MIPS_GOT_OFST
+        Type3:  R_MIPS_GOT_OFST
+      - Offset: 0x000000000000004C
+        Type:   R_MIPS_GOT_HI16
+        Type2:  R_MIPS_GOT_HI16
+        Type3:  R_MIPS_GOT_HI16
+      - Offset: 0x0000000000000050
+        Type:   R_MIPS_GOT_LO16
+        Type2:  R_MIPS_GOT_LO16
+        Type3:  R_MIPS_GOT_LO16
+      - Offset: 0x0000000000000054
+        Type:   R_MIPS_SUB
+        Type2:  R_MIPS_SUB
+        Type3:  R_MIPS_SUB
+      - Offset: 0x0000000000000058
+        Type:   R_MIPS_INSERT_A
+        Type2:  R_MIPS_INSERT_A
+        Type3:  R_MIPS_INSERT_A
+      - Offset: 0x000000000000005C
+        Type:   R_MIPS_INSERT_B
+        Type2:  R_MIPS_INSERT_B
+        Type3:  R_MIPS_INSERT_B
+      - Offset: 0x0000000000000060
+        Type:   R_MIPS_DELETE
+        Type2:  R_MIPS_DELETE
+        Type3:  R_MIPS_DELETE
+      - Offset: 0x0000000000000064
+        Type:   R_MIPS_HIGHER
+        Type2:  R_MIPS_HIGHER
+        Type3:  R_MIPS_HIGHER
+      - Offset: 0x0000000000000068
+        Type:   R_MIPS_HIGHEST
+        Type2:  R_MIPS_HIGHEST
+        Type3:  R_MIPS_HIGHEST
+      - Offset: 0x000000000000006C
+        Type:   R_MIPS_CALL_HI16
+        Type2:  R_MIPS_CALL_HI16
+        Type3:  R_MIPS_CALL_HI16
+      - Offset: 0x0000000000000070
+        Type:   R_MIPS_CALL_LO16
+        Type2:  R_MIPS_CALL_LO16
+        Type3:  R_MIPS_CALL_LO16
+      - Offset: 0x0000000000000074
+        Type:   R_MIPS_SCN_DISP
+        Type2:  R_MIPS_SCN_DISP
+        Type3:  R_MIPS_SCN_DISP
+      - Offset: 0x0000000000000078
+        Type:   R_MIPS_REL16
+        Type2:  R_MIPS_REL16
+        Type3:  R_MIPS_REL16
+      - Offset: 0x000000000000007C
+        Type:   R_MIPS_ADD_IMMEDIATE
+        Type2:  R_MIPS_ADD_IMMEDIATE
+        Type3:  R_MIPS_ADD_IMMEDIATE
+      - Offset: 0x0000000000000080
+        Type:   R_MIPS_PJUMP
+        Type2:  R_MIPS_PJUMP
+        Type3:  R_MIPS_PJUMP
+      - Offset: 0x0000000000000084
+        Type:   R_MIPS_RELGOT
+        Type2:  R_MIPS_RELGOT
+        Type3:  R_MIPS_RELGOT
+      - Offset: 0x0000000000000088
+        Type:   R_MIPS_JALR
+        Type2:  R_MIPS_JALR
+        Type3:  R_MIPS_JALR
+      - Offset: 0x000000000000008C
+        Type:   R_MIPS_TLS_DTPMOD32
+        Type2:  R_MIPS_TLS_DTPMOD32
+        Type3:  R_MIPS_TLS_DTPMOD32
+      - Offset: 0x0000000000000090
+        Type:   R_MIPS_TLS_DTPREL32
+        Type2:  R_MIPS_TLS_DTPREL32
+        Type3:  R_MIPS_TLS_DTPREL32
+      - Offset: 0x0000000000000094
+        Type:   R_MIPS_TLS_DTPMOD64
+        Type2:  R_MIPS_TLS_DTPMOD64
+        Type3:  R_MIPS_TLS_DTPMOD64
+      - Offset: 0x0000000000000098
+        Type:   R_MIPS_TLS_DTPREL64
+        Type2:  R_MIPS_TLS_DTPREL64
+        Type3:  R_MIPS_TLS_DTPREL64
+      - Offset: 0x000000000000009C
+        Type:   R_MIPS_TLS_GD
+        Type2:  R_MIPS_TLS_GD
+        Type3:  R_MIPS_TLS_GD
+      - Offset: 0x00000000000000A0
+        Type:   R_MIPS_TLS_LDM
+        Type2:  R_MIPS_TLS_LDM
+        Type3:  R_MIPS_TLS_LDM
+      - Offset: 0x00000000000000A4
+        Type:   R_MIPS_TLS_DTPREL_HI16
+        Type2:  R_MIPS_TLS_DTPREL_HI16
+        Type3:  R_MIPS_TLS_DTPREL_HI16
+      - Offset: 0x00000000000000A8
+        Type:   R_MIPS_TLS_DTPREL_LO16
+        Type2:  R_MIPS_TLS_DTPREL_LO16
+        Type3:  R_MIPS_TLS_DTPREL_LO16
+      - Offset: 0x00000000000000AC
+        Type:   R_MIPS_TLS_GOTTPREL
+        Type2:  R_MIPS_TLS_GOTTPREL
+        Type3:  R_MIPS_TLS_GOTTPREL
+      - Offset: 0x00000000000000B0
+        Type:   R_MIPS_TLS_TPREL32
+        Type2:  R_MIPS_TLS_TPREL32
+        Type3:  R_MIPS_TLS_TPREL32
+      - Offset: 0x00000000000000B4
+        Type:   R_MIPS_TLS_TPREL64
+        Type2:  R_MIPS_TLS_TPREL64
+        Type3:  R_MIPS_TLS_TPREL64
+      - Offset: 0x00000000000000B8
+        Type:   R_MIPS_TLS_TPREL_HI16
+        Type2:  R_MIPS_TLS_TPREL_HI16
+        Type3:  R_MIPS_TLS_TPREL_HI16
+      - Offset: 0x00000000000000BC
+        Type:   R_MIPS_TLS_TPREL_LO16
+        Type2:  R_MIPS_TLS_TPREL_LO16
+        Type3:  R_MIPS_TLS_TPREL_LO16
+      - Offset: 0x00000000000000C0
+        Type:   R_MIPS_GLOB_DAT
+        Type2:  R_MIPS_GLOB_DAT
+        Type3:  R_MIPS_GLOB_DAT
+      - Offset: 0x00000000000000C4
+        Type:   R_MIPS_COPY
+        Type2:  R_MIPS_COPY
+        Type3:  R_MIPS_COPY
+      - Offset: 0x00000000000000C8
+        Type:   R_MIPS_JUMP_SLOT
+        Type2:  R_MIPS_JUMP_SLOT
+        Type3:  R_MIPS_JUMP_SLOT
+      - Offset: 0x00000000000000CC
+        Type:   R_MIPS_NUM
+        Type2:  R_MIPS_NUM
+        Type3:  R_MIPS_NUM
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-ppc64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-ppc64.test
new file mode 100644
index 0000000000000..f1102c3e218c7
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-ppc64.test
@@ -0,0 +1,38 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for ppc64 target.
+
+## TODO: Use yaml2obj. obj2yaml now hangs on dumping relocs.obj.elf-ppc64
+# RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-ppc64 | FileCheck %s
+
+# CHECK: Type: R_PPC64_NONE (0)
+# CHECK: Type: R_PPC64_ADDR32 (1)
+# CHECK: Type: R_PPC64_ADDR16_LO (4)
+# CHECK: Type: R_PPC64_ADDR16_HI (5)
+# CHECK: Type: R_PPC64_ADDR14 (7)
+# CHECK: Type: R_PPC64_REL24 (10)
+# CHECK: Type: R_PPC64_REL32 (26)
+# CHECK: Type: R_PPC64_ADDR64 (38)
+# CHECK: Type: R_PPC64_ADDR16_HIGHER (39)
+# CHECK: Type: R_PPC64_ADDR16_HIGHEST (41)
+# CHECK: Type: R_PPC64_REL64 (44)
+# CHECK: Type: R_PPC64_TOC16 (47)
+# CHECK: Type: R_PPC64_TOC16_LO (48)
+# CHECK: Type: R_PPC64_TOC16_HA (50)
+# CHECK: Type: R_PPC64_TOC (51)
+# CHECK: Type: R_PPC64_ADDR16_DS (56)
+# CHECK: Type: R_PPC64_ADDR16_LO_DS (57)
+# CHECK: Type: R_PPC64_TOC16_DS (63)
+# CHECK: Type: R_PPC64_TOC16_LO_DS (64)
+# CHECK: Type: R_PPC64_TLS (67)
+# CHECK: Type: R_PPC64_TPREL16_LO (70)
+# CHECK: Type: R_PPC64_TPREL16_HA (72)
+# CHECK: Type: R_PPC64_DTPREL16_LO (75)
+# CHECK: Type: R_PPC64_DTPREL16_HA (77)
+# CHECK: Type: R_PPC64_GOT_TLSGD16_LO (80)
+# CHECK: Type: R_PPC64_GOT_TLSGD16_HA (82)
+# CHECK: Type: R_PPC64_GOT_TLSLD16_LO (84)
+# CHECK: Type: R_PPC64_GOT_TLSLD16_HA (86)
+# CHECK: Type: R_PPC64_GOT_TPREL16_LO_DS (88)
+# CHECK: Type: R_PPC64_GOT_TPREL16_HA (90)
+# CHECK: Type: R_PPC64_TLSGD (107)
+# CHECK: Type: R_PPC64_TLSLD (108)
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
new file mode 100644
index 0000000000000..7b3fa1cf593d0
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
@@ -0,0 +1,145 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for x86_64 target.
+
+# RUN: yaml2obj %s -o %t-x86_64.o
+# RUN: llvm-readobj -r --expand-relocs %t-x86_64.o | FileCheck %s
+
+# CHECK: Type: R_X86_64_NONE (0)
+# CHECK: Type: R_X86_64_64 (1)
+# CHECK: Type: R_X86_64_PC32 (2)
+# CHECK: Type: R_X86_64_GOT32 (3)
+# CHECK: Type: R_X86_64_PLT32 (4)
+# CHECK: Type: R_X86_64_COPY (5)
+# CHECK: Type: R_X86_64_GLOB_DAT (6)
+# CHECK: Type: R_X86_64_JUMP_SLOT (7)
+# CHECK: Type: R_X86_64_RELATIVE (8)
+# CHECK: Type: R_X86_64_GOTPCREL (9)
+# CHECK: Type: R_X86_64_32 (10)
+# CHECK: Type: R_X86_64_32S (11)
+# CHECK: Type: R_X86_64_16 (12)
+# CHECK: Type: R_X86_64_PC16 (13)
+# CHECK: Type: R_X86_64_8 (14)
+# CHECK: Type: R_X86_64_PC8 (15)
+# CHECK: Type: R_X86_64_DTPMOD64 (16)
+# CHECK: Type: R_X86_64_DTPOFF64 (17)
+# CHECK: Type: R_X86_64_TPOFF64 (18)
+# CHECK: Type: R_X86_64_TLSGD (19)
+# CHECK: Type: R_X86_64_TLSLD (20)
+# CHECK: Type: R_X86_64_DTPOFF32 (21)
+# CHECK: Type: R_X86_64_GOTTPOFF (22)
+# CHECK: Type: R_X86_64_TPOFF32 (23)
+# CHECK: Type: R_X86_64_PC64 (24)
+# CHECK: Type: R_X86_64_GOTOFF64 (25)
+# CHECK: Type: R_X86_64_GOTPC32 (26)
+# CHECK: Type: R_X86_64_GOT64 (27)
+# CHECK: Type: R_X86_64_GOTPCREL64 (28)
+# CHECK: Type: R_X86_64_GOTPC64 (29)
+# CHECK: Type: R_X86_64_GOTPLT64 (30)
+# CHECK: Type: R_X86_64_PLTOFF64 (31)
+# CHECK: Type: R_X86_64_SIZE32 (32)
+# CHECK: Type: R_X86_64_SIZE64 (33)
+# CHECK: Type: R_X86_64_GOTPC32_TLSDESC (34)
+# CHECK: Type: R_X86_64_TLSDESC_CALL (35)
+# CHECK: Type: R_X86_64_TLSDESC (36)
+# CHECK: Type: R_X86_64_IRELATIVE (37)
+# CHECK: Type: R_X86_64_GOTPCRELX (41)
+# CHECK: Type: R_X86_64_REX_GOTPCRELX (42)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  OSABI:   ELFOSABI_GNU
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Content: 00
+  - Name:         .rela.text
+    Type:         SHT_RELA
+    Link:         .symtab
+    AddressAlign: 0x0000000000000008
+    EntSize:      0x0000000000000018
+    Info:         .text
+    Relocations:
+      - Offset: 0x0000000000000003
+        Type:   R_X86_64_NONE
+      - Offset: 0x000000000000000A
+        Type:   R_X86_64_64
+      - Offset: 0x0000000000000011
+        Type:   R_X86_64_PC32
+      - Offset: 0x0000000000000018
+        Type:   R_X86_64_GOT32
+      - Offset: 0x000000000000001F
+        Type:   R_X86_64_PLT32
+      - Offset: 0x0000000000000026
+        Type:   R_X86_64_COPY
+      - Offset: 0x000000000000002D
+        Type:   R_X86_64_GLOB_DAT
+      - Offset: 0x0000000000000034
+        Type:   R_X86_64_JUMP_SLOT
+      - Offset: 0x000000000000003B
+        Type:   R_X86_64_RELATIVE
+      - Offset: 0x0000000000000042
+        Type:   R_X86_64_GOTPCREL
+      - Offset: 0x0000000000000049
+        Type:   R_X86_64_32
+      - Offset: 0x0000000000000050
+        Type:   R_X86_64_32S
+      - Offset: 0x0000000000000057
+        Type:   R_X86_64_16
+      - Offset: 0x000000000000005E
+        Type:   R_X86_64_PC16
+      - Offset: 0x0000000000000065
+        Type:   R_X86_64_8
+      - Offset: 0x000000000000006C
+        Type:   R_X86_64_PC8
+      - Offset: 0x0000000000000073
+        Type:   R_X86_64_DTPMOD64
+      - Offset: 0x000000000000007A
+        Type:   R_X86_64_DTPOFF64
+      - Offset: 0x0000000000000081
+        Type:   R_X86_64_TPOFF64
+      - Offset: 0x0000000000000088
+        Type:   R_X86_64_TLSGD
+      - Offset: 0x000000000000008F
+        Type:   R_X86_64_TLSLD
+      - Offset: 0x0000000000000096
+        Type:   R_X86_64_DTPOFF32
+      - Offset: 0x000000000000009D
+        Type:   R_X86_64_GOTTPOFF
+      - Offset: 0x00000000000000A4
+        Type:   R_X86_64_TPOFF32
+      - Offset: 0x00000000000000AB
+        Type:   R_X86_64_PC64
+      - Offset: 0x00000000000000B2
+        Type:   R_X86_64_GOTOFF64
+      - Offset: 0x00000000000000B9
+        Type:   R_X86_64_GOTPC32
+      - Offset: 0x00000000000000C0
+        Type:   R_X86_64_GOT64
+      - Offset: 0x00000000000000C7
+        Type:   R_X86_64_GOTPCREL64
+      - Offset: 0x00000000000000CE
+        Type:   R_X86_64_GOTPC64
+      - Offset: 0x00000000000000D5
+        Type:   R_X86_64_GOTPLT64
+      - Offset: 0x00000000000000DC
+        Type:   R_X86_64_PLTOFF64
+      - Offset: 0x00000000000000E3
+        Type:   R_X86_64_SIZE32
+      - Offset: 0x00000000000000EA
+        Type:   R_X86_64_SIZE64
+      - Offset: 0x00000000000000F1
+        Type:   R_X86_64_GOTPC32_TLSDESC
+      - Offset: 0x00000000000000F8
+        Type:   R_X86_64_TLSDESC_CALL
+      - Offset: 0x00000000000000FF
+        Type:   R_X86_64_TLSDESC
+      - Offset: 0x0000000000000106
+        Type:   R_X86_64_IRELATIVE
+      - Offset: 0x000000000000010A
+        Type:   R_X86_64_GOTPCRELX
+      - Offset: 0x000000000000010E
+        Type:   R_X86_64_REX_GOTPCRELX
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-macho-arm.test b/llvm/test/tools/llvm-readobj/reloc-types-macho-arm.test
new file mode 100644
index 0000000000000..3444ab153282b
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-macho-arm.test
@@ -0,0 +1,16 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for macho-arm target.
+
+## TODO: use yaml2obj.
+# RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-arm | FileCheck %s
+
+# CHECK: Type: ARM_RELOC_VANILLA (0)
+# CHECK: Type: ARM_RELOC_PAIR (1)
+# CHECK: Type: ARM_RELOC_SECTDIFF (2)
+# CHECK: Type: ARM_RELOC_LOCAL_SECTDIFF (3)
+# CHECK: Type: ARM_RELOC_PB_LA_PTR (4)
+# CHECK: Type: ARM_RELOC_BR24 (5)
+# CHECK: Type: ARM_THUMB_RELOC_BR22 (6)
+# CHECK: Type: ARM_THUMB_32BIT_BRANCH (7)
+# CHECK: Type: ARM_RELOC_HALF (8)
+# CHECK: Type: ARM_RELOC_HALF_SECTDIFF (9)
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-macho-i386.test b/llvm/test/tools/llvm-readobj/reloc-types-macho-i386.test
new file mode 100644
index 0000000000000..5b14933840ed8
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-macho-i386.test
@@ -0,0 +1,12 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for macho-i386 target.
+
+## TODO: use yaml2obj.
+# RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-i386 | FileCheck %s
+
+# CHECK: Type: GENERIC_RELOC_VANILLA (0)
+# CHECK: Type: GENERIC_RELOC_PAIR (1)
+# CHECK: Type: GENERIC_RELOC_SECTDIFF (2)
+# CHECK: Type: GENERIC_RELOC_PB_LA_PTR (3)
+# CHECK: Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
+# CHECK: Type: GENERIC_RELOC_TLV (5)
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-macho-x64.test b/llvm/test/tools/llvm-readobj/reloc-types-macho-x64.test
new file mode 100644
index 0000000000000..49e0a8b3ef60d
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/reloc-types-macho-x64.test
@@ -0,0 +1,16 @@
+## Test that llvm-readobj/llvm-readelf shows proper relocation type
+## names and values for macho-x64 target.
+
+## TODO: use yaml2obj.
+# RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-x86_64 | FileCheck %s
+
+# CHECK: Type: X86_64_RELOC_UNSIGNED (0)
+# CHECK: Type: X86_64_RELOC_SIGNED (1)
+# CHECK: Type: X86_64_RELOC_BRANCH (2)
+# CHECK: Type: X86_64_RELOC_GOT_LOAD (3)
+# CHECK: Type: X86_64_RELOC_GOT (4)
+# CHECK: Type: X86_64_RELOC_SUBTRACTOR (5)
+# CHECK: Type: X86_64_RELOC_SIGNED_1 (6)
+# CHECK: Type: X86_64_RELOC_SIGNED_2 (7)
+# CHECK: Type: X86_64_RELOC_SIGNED_4 (8)
+# CHECK: Type: X86_64_RELOC_TLV (9)
diff --git a/llvm/test/tools/llvm-readobj/reloc-types.test b/llvm/test/tools/llvm-readobj/reloc-types.test
deleted file mode 100644
index 1d5210f36d491..0000000000000
--- a/llvm/test/tools/llvm-readobj/reloc-types.test
+++ /dev/null
@@ -1,699 +0,0 @@
-// Test that libObject and subsequently llvm-readobj shows proper relocation type
-// names and values.
-
-// Todo: ELF-PPC, ELF-HEXAGON
-
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-i386     | FileCheck %s -check-prefix ELF-32
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-x86_64   | FileCheck %s -check-prefix ELF-64
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-aarch64  | FileCheck %s -check-prefix ELF-AARCH64
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-arm      | FileCheck %s -check-prefix ELF-ARM
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-lanai    | FileCheck %s -check-prefix ELF-LANAI
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-mips     | FileCheck %s -check-prefix ELF-MIPS
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-mips64el | FileCheck %s -check-prefix ELF-MIPS64EL
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.elf-ppc64    | FileCheck %s -check-prefix ELF-PPC64
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.coff-i386    | FileCheck %s -check-prefix COFF-32
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.coff-x86_64  | FileCheck %s -check-prefix COFF-64
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-arm    | FileCheck %s -check-prefix MACHO-ARM
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-i386   | FileCheck %s -check-prefix MACHO-32
-RUN: llvm-readobj -r --expand-relocs %p/Inputs/relocs.obj.macho-x86_64 | FileCheck %s -check-prefix MACHO-64
-
-
-ELF-32: Type: R_386_NONE (0)
-ELF-32: Type: R_386_32 (1)
-ELF-32: Type: R_386_PC32 (2)
-ELF-32: Type: R_386_GOT32 (3)
-ELF-32: Type: R_386_PLT32 (4)
-ELF-32: Type: R_386_COPY (5)
-ELF-32: Type: R_386_GLOB_DAT (6)
-ELF-32: Type: R_386_JUMP_SLOT (7)
-ELF-32: Type: R_386_RELATIVE (8)
-ELF-32: Type: R_386_GOTOFF (9)
-ELF-32: Type: R_386_GOTPC (10)
-ELF-32: Type: R_386_32PLT (11)
-ELF-32: Type: R_386_TLS_TPOFF (14)
-ELF-32: Type: R_386_TLS_IE (15)
-ELF-32: Type: R_386_TLS_GOTIE (16)
-ELF-32: Type: R_386_TLS_LE (17)
-ELF-32: Type: R_386_TLS_GD (18)
-ELF-32: Type: R_386_TLS_LDM (19)
-ELF-32: Type: R_386_16 (20)
-ELF-32: Type: R_386_PC16 (21)
-ELF-32: Type: R_386_8 (22)
-ELF-32: Type: R_386_PC8 (23)
-ELF-32: Type: R_386_TLS_GD_32 (24)
-ELF-32: Type: R_386_TLS_GD_PUSH (25)
-ELF-32: Type: R_386_TLS_GD_CALL (26)
-ELF-32: Type: R_386_TLS_GD_POP (27)
-ELF-32: Type: R_386_TLS_LDM_32 (28)
-ELF-32: Type: R_386_TLS_LDM_PUSH (29)
-ELF-32: Type: R_386_TLS_LDM_CALL (30)
-ELF-32: Type: R_386_TLS_LDM_POP (31)
-ELF-32: Type: R_386_TLS_LDO_32 (32)
-ELF-32: Type: R_386_TLS_IE_32 (33)
-ELF-32: Type: R_386_TLS_LE_32 (34)
-ELF-32: Type: R_386_TLS_DTPMOD32 (35)
-ELF-32: Type: R_386_TLS_DTPOFF32 (36)
-ELF-32: Type: R_386_TLS_TPOFF32 (37)
-ELF-32: Type: R_386_TLS_GOTDESC (39)
-ELF-32: Type: R_386_TLS_DESC_CALL (40)
-ELF-32: Type: R_386_TLS_DESC (41)
-ELF-32: Type: R_386_IRELATIVE (42)
-_LF-32: Type: R_386_NUM (43)
-
-ELF-64: Type: R_X86_64_NONE (0)
-ELF-64: Type: R_X86_64_64 (1)
-ELF-64: Type: R_X86_64_PC32 (2)
-ELF-64: Type: R_X86_64_GOT32 (3)
-ELF-64: Type: R_X86_64_PLT32 (4)
-ELF-64: Type: R_X86_64_COPY (5)
-ELF-64: Type: R_X86_64_GLOB_DAT (6)
-ELF-64: Type: R_X86_64_JUMP_SLOT (7)
-ELF-64: Type: R_X86_64_RELATIVE (8)
-ELF-64: Type: R_X86_64_GOTPCREL (9)
-ELF-64: Type: R_X86_64_32 (10)
-ELF-64: Type: R_X86_64_32S (11)
-ELF-64: Type: R_X86_64_16 (12)
-ELF-64: Type: R_X86_64_PC16 (13)
-ELF-64: Type: R_X86_64_8 (14)
-ELF-64: Type: R_X86_64_PC8 (15)
-ELF-64: Type: R_X86_64_DTPMOD64 (16)
-ELF-64: Type: R_X86_64_DTPOFF64 (17)
-ELF-64: Type: R_X86_64_TPOFF64 (18)
-ELF-64: Type: R_X86_64_TLSGD (19)
-ELF-64: Type: R_X86_64_TLSLD (20)
-ELF-64: Type: R_X86_64_DTPOFF32 (21)
-ELF-64: Type: R_X86_64_GOTTPOFF (22)
-ELF-64: Type: R_X86_64_TPOFF32 (23)
-ELF-64: Type: R_X86_64_PC64 (24)
-ELF-64: Type: R_X86_64_GOTOFF64 (25)
-ELF-64: Type: R_X86_64_GOTPC32 (26)
-ELF-64: Type: R_X86_64_GOT64 (27)
-ELF-64: Type: R_X86_64_GOTPCREL64 (28)
-ELF-64: Type: R_X86_64_GOTPC64 (29)
-ELF-64: Type: R_X86_64_GOTPLT64 (30)
-ELF-64: Type: R_X86_64_PLTOFF64 (31)
-ELF-64: Type: R_X86_64_SIZE32 (32)
-ELF-64: Type: R_X86_64_SIZE64 (33)
-ELF-64: Type: R_X86_64_GOTPC32_TLSDESC (34)
-ELF-64: Type: R_X86_64_TLSDESC_CALL (35)
-ELF-64: Type: R_X86_64_TLSDESC (36)
-ELF-64: Type: R_X86_64_IRELATIVE (37)
-
-ELF-PPC: Type: R_PPC_NONE (0)
-ELF-PPC: Type: R_PPC_ADDR32 (1)
-ELF-PPC: Type: R_PPC_ADDR24 (2)
-ELF-PPC: Type: R_PPC_ADDR16 (3)
-ELF-PPC: Type: R_PPC_ADDR16_LO (4)
-ELF-PPC: Type: R_PPC_ADDR16_HI (5)
-ELF-PPC: Type: R_PPC_ADDR16_HA (6)
-ELF-PPC: Type: R_PPC_ADDR14 (7)
-ELF-PPC: Type: R_PPC_ADDR14_BRTAKEN (8)
-ELF-PPC: Type: R_PPC_ADDR14_BRNTAKEN (9)
-ELF-PPC: Type: R_PPC_REL24 (10)
-ELF-PPC: Type: R_PPC_REL14 (11)
-ELF-PPC: Type: R_PPC_REL14_BRTAKEN (12)
-ELF-PPC: Type: R_PPC_REL14_BRNTAKEN (13)
-ELF-PPC: Type: R_PPC_REL32 (26)
-ELF-PPC: Type: R_PPC_TPREL16_LO (70)
-ELF-PPC: Type: R_PPC_TPREL16_HA (72)
-
-ELF-PPC64: Type: R_PPC64_NONE (0)
-ELF-PPC64: Type: R_PPC64_ADDR32 (1)
-ELF-PPC64: Type: R_PPC64_ADDR16_LO (4)
-ELF-PPC64: Type: R_PPC64_ADDR16_HI (5)
-ELF-PPC64: Type: R_PPC64_ADDR14 (7)
-ELF-PPC64: Type: R_PPC64_REL24 (10)
-ELF-PPC64: Type: R_PPC64_REL32 (26)
-ELF-PPC64: Type: R_PPC64_ADDR64 (38)
-ELF-PPC64: Type: R_PPC64_ADDR16_HIGHER (39)
-ELF-PPC64: Type: R_PPC64_ADDR16_HIGHEST (41)
-ELF-PPC64: Type: R_PPC64_REL64 (44)
-ELF-PPC64: Type: R_PPC64_TOC16 (47)
-ELF-PPC64: Type: R_PPC64_TOC16_LO (48)
-ELF-PPC64: Type: R_PPC64_TOC16_HA (50)
-ELF-PPC64: Type: R_PPC64_TOC (51)
-ELF-PPC64: Type: R_PPC64_ADDR16_DS (56)
-ELF-PPC64: Type: R_PPC64_ADDR16_LO_DS (57)
-ELF-PPC64: Type: R_PPC64_TOC16_DS (63)
-ELF-PPC64: Type: R_PPC64_TOC16_LO_DS (64)
-ELF-PPC64: Type: R_PPC64_TLS (67)
-ELF-PPC64: Type: R_PPC64_TPREL16_LO (70)
-ELF-PPC64: Type: R_PPC64_TPREL16_HA (72)
-ELF-PPC64: Type: R_PPC64_DTPREL16_LO (75)
-ELF-PPC64: Type: R_PPC64_DTPREL16_HA (77)
-ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_LO (80)
-ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_HA (82)
-ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_LO (84)
-ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_HA (86)
-ELF-PPC64: Type: R_PPC64_GOT_TPREL16_LO_DS (88)
-ELF-PPC64: Type: R_PPC64_GOT_TPREL16_HA (90)
-ELF-PPC64: Type: R_PPC64_TLSGD (107)
-ELF-PPC64: Type: R_PPC64_TLSLD (108)
-
-ELF-AARCH64: Type: R_AARCH64_NONE (0)
-ELF-AARCH64: Type: R_AARCH64_ABS64 (257)
-ELF-AARCH64: Type: R_AARCH64_ABS32 (258)
-ELF-AARCH64: Type: R_AARCH64_ABS16 (259)
-ELF-AARCH64: Type: R_AARCH64_PREL64 (260)
-ELF-AARCH64: Type: R_AARCH64_PREL32 (261)
-ELF-AARCH64: Type: R_AARCH64_PREL16 (262)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0 (263)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0_NC (264)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1 (265)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1_NC (266)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2 (267)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2_NC (268)
-ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G3 (269)
-ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G0 (270)
-ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G1 (271)
-ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G2 (272)
-ELF-AARCH64: Type: R_AARCH64_LD_PREL_LO19 (273)
-ELF-AARCH64: Type: R_AARCH64_ADR_PREL_LO21 (274)
-ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21 (275)
-ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21_NC (276)
-ELF-AARCH64: Type: R_AARCH64_ADD_ABS_LO12_NC (277)
-ELF-AARCH64: Type: R_AARCH64_LDST8_ABS_LO12_NC (278)
-ELF-AARCH64: Type: R_AARCH64_TSTBR14 (279)
-ELF-AARCH64: Type: R_AARCH64_CONDBR19 (280)
-ELF-AARCH64: Type: R_AARCH64_JUMP26 (282)
-ELF-AARCH64: Type: R_AARCH64_CALL26 (283)
-ELF-AARCH64: Type: R_AARCH64_LDST16_ABS_LO12_NC (284)
-ELF-AARCH64: Type: R_AARCH64_LDST32_ABS_LO12_NC (285)
-ELF-AARCH64: Type: R_AARCH64_LDST64_ABS_LO12_NC (286)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G0 (287)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G0_NC (288)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G1 (289)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G1_NC (290)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G2 (291)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G2_NC (292)
-ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G3 (293)
-ELF-AARCH64: Type: R_AARCH64_LDST128_ABS_LO12_NC (299)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G0 (300)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G0_NC (301)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G1 (302)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G1_NC (303)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G2 (304)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G2_NC (305)
-ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G3 (306)
-ELF-AARCH64: Type: R_AARCH64_GOTREL64 (307)
-ELF-AARCH64: Type: R_AARCH64_GOTREL32 (308)
-ELF-AARCH64: Type: R_AARCH64_GOT_LD_PREL19 (309)
-ELF-AARCH64: Type: R_AARCH64_LD64_GOTOFF_LO15 (310)
-ELF-AARCH64: Type: R_AARCH64_ADR_GOT_PAGE (311)
-ELF-AARCH64: Type: R_AARCH64_LD64_GOT_LO12_NC (312)
-ELF-AARCH64: Type: R_AARCH64_LD64_GOTPAGE_LO15 (313)
-ELF-AARCH64: Type: R_AARCH64_TLSGD_ADR_PREL21 (512)
-ELF-AARCH64: Type: R_AARCH64_TLSGD_ADR_PAGE21 (513)
-ELF-AARCH64: Type: R_AARCH64_TLSGD_ADD_LO12_NC (514)
-ELF-AARCH64: Type: R_AARCH64_TLSGD_MOVW_G1 (515)
-ELF-AARCH64: Type: R_AARCH64_TLSGD_MOVW_G0_NC (516)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADR_PREL21 (517)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADR_PAGE21 (518)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_LO12_NC (519)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_G1 (520)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_G0_NC (521)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LD_PREL19 (522)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G2 (523)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1 (524)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC (525)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0 (526)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC (527)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_HI12 (528)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12 (529)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC (530)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 (531)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC (532)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 (533)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC (534)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 (535)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC (536)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 (537)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC (538)
-ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 (539)
-ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC (540)
-ELF-AARCH64: Type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 (541)
-ELF-AARCH64: Type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC (542)
-ELF-AARCH64: Type: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 (543)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G2 (544)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1 (545)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC (546)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0 (547)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC (548)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_HI12 (549)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12 (550)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC (551)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12 (552)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC (553)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12 (554)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC (555)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12 (556)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC (557)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12 (558)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD_PREL19 (560)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PREL21 (561)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE21 (562)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12 (563)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12 (564)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G1 (565)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G0_NC (566)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_LDR (567)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD (568)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_CALL (569)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12 (570)
-ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC (571)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12 (572)
-ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC (573)
-ELF-AARCH64: Type: R_AARCH64_COPY (1024)
-ELF-AARCH64: Type: R_AARCH64_GLOB_DAT (1025)
-ELF-AARCH64: Type: R_AARCH64_JUMP_SLOT (1026)
-ELF-AARCH64: Type: R_AARCH64_RELATIVE (1027)
-ELF-AARCH64: Type: R_AARCH64_TLS_DTPREL64 (1028)
-ELF-AARCH64: Type: R_AARCH64_TLS_DTPMOD64 (1029)
-ELF-AARCH64: Type: R_AARCH64_TLS_TPREL64 (1030)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC (1031)
-ELF-AARCH64: Type: R_AARCH64_IRELATIVE (1032)
-
-ELF-ARM: Type: R_ARM_NONE (0)
-ELF-ARM: Type: R_ARM_PC24 (1)
-ELF-ARM: Type: R_ARM_ABS32 (2)
-ELF-ARM: Type: R_ARM_REL32 (3)
-ELF-ARM: Type: R_ARM_LDR_PC_G0 (4)
-ELF-ARM: Type: R_ARM_ABS16 (5)
-ELF-ARM: Type: R_ARM_ABS12 (6)
-ELF-ARM: Type: R_ARM_THM_ABS5 (7)
-ELF-ARM: Type: R_ARM_ABS8 (8)
-ELF-ARM: Type: R_ARM_SBREL32 (9)
-ELF-ARM: Type: R_ARM_THM_CALL (10)
-ELF-ARM: Type: R_ARM_THM_PC8 (11)
-ELF-ARM: Type: R_ARM_BREL_ADJ (12)
-ELF-ARM: Type: R_ARM_TLS_DESC (13)
-ELF-ARM: Type: R_ARM_THM_SWI8 (14)
-ELF-ARM: Type: R_ARM_XPC25 (15)
-ELF-ARM: Type: R_ARM_THM_XPC22 (16)
-ELF-ARM: Type: R_ARM_TLS_DTPMOD32 (17)
-ELF-ARM: Type: R_ARM_TLS_DTPOFF32 (18)
-ELF-ARM: Type: R_ARM_TLS_TPOFF32 (19)
-ELF-ARM: Type: R_ARM_COPY (20)
-ELF-ARM: Type: R_ARM_GLOB_DAT (21)
-ELF-ARM: Type: R_ARM_JUMP_SLOT (22)
-ELF-ARM: Type: R_ARM_RELATIVE (23)
-ELF-ARM: Type: R_ARM_GOTOFF32 (24)
-ELF-ARM: Type: R_ARM_BASE_PREL (25)
-ELF-ARM: Type: R_ARM_GOT_BREL (26)
-ELF-ARM: Type: R_ARM_CALL (28)
-ELF-ARM: Type: R_ARM_JUMP24 (29)
-ELF-ARM: Type: R_ARM_THM_JUMP24 (30)
-ELF-ARM: Type: R_ARM_BASE_ABS (31)
-ELF-ARM: Type: R_ARM_ALU_PCREL_7_0 (32)
-ELF-ARM: Type: R_ARM_ALU_PCREL_15_8 (33)
-ELF-ARM: Type: R_ARM_ALU_PCREL_23_15 (34)
-ELF-ARM: Type: R_ARM_LDR_SBREL_11_0_NC (35)
-ELF-ARM: Type: R_ARM_ALU_SBREL_19_12_NC (36)
-ELF-ARM: Type: R_ARM_ALU_SBREL_27_20_CK (37)
-ELF-ARM: Type: R_ARM_TARGET1 (38)
-ELF-ARM: Type: R_ARM_SBREL31 (39)
-ELF-ARM: Type: R_ARM_V4BX (40)
-ELF-ARM: Type: R_ARM_TARGET2 (41)
-ELF-ARM: Type: R_ARM_PREL31 (42)
-ELF-ARM: Type: R_ARM_MOVW_ABS_NC (43)
-ELF-ARM: Type: R_ARM_MOVT_ABS (44)
-ELF-ARM: Type: R_ARM_MOVW_PREL_NC (45)
-ELF-ARM: Type: R_ARM_MOVT_PREL (46)
-ELF-ARM: Type: R_ARM_THM_MOVW_ABS_NC (47)
-ELF-ARM: Type: R_ARM_THM_MOVT_ABS (48)
-ELF-ARM: Type: R_ARM_THM_MOVW_PREL_NC (49)
-ELF-ARM: Type: R_ARM_THM_MOVT_PREL (50)
-ELF-ARM: Type: R_ARM_THM_JUMP19 (51)
-ELF-ARM: Type: R_ARM_THM_JUMP6 (52)
-ELF-ARM: Type: R_ARM_THM_ALU_PREL_11_0 (53)
-ELF-ARM: Type: R_ARM_THM_PC12 (54)
-ELF-ARM: Type: R_ARM_ABS32_NOI (55)
-ELF-ARM: Type: R_ARM_REL32_NOI (56)
-ELF-ARM: Type: R_ARM_ALU_PC_G0_NC (57)
-ELF-ARM: Type: R_ARM_ALU_PC_G0 (58)
-ELF-ARM: Type: R_ARM_ALU_PC_G1_NC (59)
-ELF-ARM: Type: R_ARM_ALU_PC_G1 (60)
-ELF-ARM: Type: R_ARM_ALU_PC_G2 (61)
-ELF-ARM: Type: R_ARM_LDR_PC_G1 (62)
-ELF-ARM: Type: R_ARM_LDR_PC_G2 (63)
-ELF-ARM: Type: R_ARM_LDRS_PC_G0 (64)
-ELF-ARM: Type: R_ARM_LDRS_PC_G1 (65)
-ELF-ARM: Type: R_ARM_LDRS_PC_G2 (66)
-ELF-ARM: Type: R_ARM_LDC_PC_G0 (67)
-ELF-ARM: Type: R_ARM_LDC_PC_G1 (68)
-ELF-ARM: Type: R_ARM_LDC_PC_G2 (69)
-ELF-ARM: Type: R_ARM_ALU_SB_G0_NC (70)
-ELF-ARM: Type: R_ARM_ALU_SB_G0 (71)
-ELF-ARM: Type: R_ARM_ALU_SB_G1_NC (72)
-ELF-ARM: Type: R_ARM_ALU_SB_G1 (73)
-ELF-ARM: Type: R_ARM_ALU_SB_G2 (74)
-ELF-ARM: Type: R_ARM_LDR_SB_G0 (75)
-ELF-ARM: Type: R_ARM_LDR_SB_G1 (76)
-ELF-ARM: Type: R_ARM_LDR_SB_G2 (77)
-ELF-ARM: Type: R_ARM_LDRS_SB_G0 (78)
-ELF-ARM: Type: R_ARM_LDRS_SB_G1 (79)
-ELF-ARM: Type: R_ARM_LDRS_SB_G2 (80)
-ELF-ARM: Type: R_ARM_LDC_SB_G0 (81)
-ELF-ARM: Type: R_ARM_LDC_SB_G1 (82)
-ELF-ARM: Type: R_ARM_LDC_SB_G2 (83)
-ELF-ARM: Type: R_ARM_MOVW_BREL_NC (84)
-ELF-ARM: Type: R_ARM_MOVT_BREL (85)
-ELF-ARM: Type: R_ARM_MOVW_BREL (86)
-ELF-ARM: Type: R_ARM_THM_MOVW_BREL_NC (87)
-ELF-ARM: Type: R_ARM_THM_MOVT_BREL (88)
-ELF-ARM: Type: R_ARM_THM_MOVW_BREL (89)
-ELF-ARM: Type: R_ARM_TLS_GOTDESC (90)
-ELF-ARM: Type: R_ARM_TLS_CALL (91)
-ELF-ARM: Type: R_ARM_TLS_DESCSEQ (92)
-ELF-ARM: Type: R_ARM_THM_TLS_CALL (93)
-ELF-ARM: Type: R_ARM_PLT32_ABS (94)
-ELF-ARM: Type: R_ARM_GOT_ABS (95)
-ELF-ARM: Type: R_ARM_GOT_PREL (96)
-ELF-ARM: Type: R_ARM_GOT_BREL12 (97)
-ELF-ARM: Type: R_ARM_GOTOFF12 (98)
-ELF-ARM: Type: R_ARM_GOTRELAX (99)
-ELF-ARM: Type: R_ARM_GNU_VTENTRY (100)
-ELF-ARM: Type: R_ARM_GNU_VTINHERIT (101)
-ELF-ARM: Type: R_ARM_THM_JUMP11 (102)
-ELF-ARM: Type: R_ARM_THM_JUMP8 (103)
-ELF-ARM: Type: R_ARM_TLS_GD32 (104)
-ELF-ARM: Type: R_ARM_TLS_LDM32 (105)
-ELF-ARM: Type: R_ARM_TLS_LDO32 (106)
-ELF-ARM: Type: R_ARM_TLS_IE32 (107)
-ELF-ARM: Type: R_ARM_TLS_LE32 (108)
-ELF-ARM: Type: R_ARM_TLS_LDO12 (109)
-ELF-ARM: Type: R_ARM_TLS_LE12 (110)
-ELF-ARM: Type: R_ARM_TLS_IE12GP (111)
-ELF-ARM: Type: R_ARM_PRIVATE_0 (112)
-ELF-ARM: Type: R_ARM_PRIVATE_1 (113)
-ELF-ARM: Type: R_ARM_PRIVATE_2 (114)
-ELF-ARM: Type: R_ARM_PRIVATE_3 (115)
-ELF-ARM: Type: R_ARM_PRIVATE_4 (116)
-ELF-ARM: Type: R_ARM_PRIVATE_5 (117)
-ELF-ARM: Type: R_ARM_PRIVATE_6 (118)
-ELF-ARM: Type: R_ARM_PRIVATE_7 (119)
-ELF-ARM: Type: R_ARM_PRIVATE_8 (120)
-ELF-ARM: Type: R_ARM_PRIVATE_9 (121)
-ELF-ARM: Type: R_ARM_PRIVATE_10 (122)
-ELF-ARM: Type: R_ARM_PRIVATE_11 (123)
-ELF-ARM: Type: R_ARM_PRIVATE_12 (124)
-ELF-ARM: Type: R_ARM_PRIVATE_13 (125)
-ELF-ARM: Type: R_ARM_PRIVATE_14 (126)
-ELF-ARM: Type: R_ARM_PRIVATE_15 (127)
-ELF-ARM: Type: R_ARM_ME_TOO (128)
-ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ16 (129)
-ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ32 (130)
-ELF-ARM: Type: R_ARM_IRELATIVE (160)
-
-ELF-LANAI: Type: R_LANAI_NONE (0)
-ELF-LANAI: Type: R_LANAI_21 (1)
-ELF-LANAI: Type: R_LANAI_21_F (2)
-ELF-LANAI: Type: R_LANAI_25 (3)
-ELF-LANAI: Type: R_LANAI_32 (4)
-ELF-LANAI: Type: R_LANAI_HI16 (5)
-ELF-LANAI: Type: R_LANAI_LO16 (6)
-
-ELF-MIPS: Type: R_MIPS_NONE (0)
-ELF-MIPS: Type: R_MIPS_16 (1)
-ELF-MIPS: Type: R_MIPS_32 (2)
-ELF-MIPS: Type: R_MIPS_REL32 (3)
-ELF-MIPS: Type: R_MIPS_26 (4)
-ELF-MIPS: Type: R_MIPS_HI16 (5)
-ELF-MIPS: Type: R_MIPS_LO16 (6)
-ELF-MIPS: Type: R_MIPS_GPREL16 (7)
-ELF-MIPS: Type: R_MIPS_LITERAL (8)
-ELF-MIPS: Type: R_MIPS_GOT16 (9)
-ELF-MIPS: Type: R_MIPS_PC16 (10)
-ELF-MIPS: Type: R_MIPS_CALL16 (11)
-ELF-MIPS: Type: R_MIPS_GPREL32 (12)
-ELF-MIPS: Type: R_MIPS_SHIFT5 (16)
-ELF-MIPS: Type: R_MIPS_SHIFT6 (17)
-ELF-MIPS: Type: R_MIPS_64 (18)
-ELF-MIPS: Type: R_MIPS_GOT_DISP (19)
-ELF-MIPS: Type: R_MIPS_GOT_PAGE (20)
-ELF-MIPS: Type: R_MIPS_GOT_OFST (21)
-ELF-MIPS: Type: R_MIPS_GOT_HI16 (22)
-ELF-MIPS: Type: R_MIPS_GOT_LO16 (23)
-ELF-MIPS: Type: R_MIPS_SUB (24)
-ELF-MIPS: Type: R_MIPS_INSERT_A (25)
-ELF-MIPS: Type: R_MIPS_INSERT_B (26)
-ELF-MIPS: Type: R_MIPS_DELETE (27)
-ELF-MIPS: Type: R_MIPS_HIGHER (28)
-ELF-MIPS: Type: R_MIPS_HIGHEST (29)
-ELF-MIPS: Type: R_MIPS_CALL_HI16 (30)
-ELF-MIPS: Type: R_MIPS_CALL_LO16 (31)
-ELF-MIPS: Type: R_MIPS_SCN_DISP (32)
-ELF-MIPS: Type: R_MIPS_REL16 (33)
-ELF-MIPS: Type: R_MIPS_ADD_IMMEDIATE (34)
-ELF-MIPS: Type: R_MIPS_PJUMP (35)
-ELF-MIPS: Type: R_MIPS_RELGOT (36)
-ELF-MIPS: Type: R_MIPS_JALR (37)
-ELF-MIPS: Type: R_MIPS_TLS_DTPMOD32 (38)
-ELF-MIPS: Type: R_MIPS_TLS_DTPREL32 (39)
-ELF-MIPS: Type: R_MIPS_TLS_DTPMOD64 (40)
-ELF-MIPS: Type: R_MIPS_TLS_DTPREL64 (41)
-ELF-MIPS: Type: R_MIPS_TLS_GD (42)
-ELF-MIPS: Type: R_MIPS_TLS_LDM (43)
-ELF-MIPS: Type: R_MIPS_TLS_DTPREL_HI16 (44)
-ELF-MIPS: Type: R_MIPS_TLS_DTPREL_LO16 (45)
-ELF-MIPS: Type: R_MIPS_TLS_GOTTPREL (46)
-ELF-MIPS: Type: R_MIPS_TLS_TPREL32 (47)
-ELF-MIPS: Type: R_MIPS_TLS_TPREL64 (48)
-ELF-MIPS: Type: R_MIPS_TLS_TPREL_HI16 (49)
-ELF-MIPS: Type: R_MIPS_TLS_TPREL_LO16 (50)
-ELF-MIPS: Type: R_MIPS_GLOB_DAT (51)
-ELF-MIPS: Type: R_MIPS_COPY (126)
-ELF-MIPS: Type: R_MIPS_JUMP_SLOT (127)
-ELF-MIPS: Type: R_MIPS_NUM (218)
-ELF-MIPS64EL: Type: R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE (0)
-ELF-MIPS64EL: Type: R_MIPS_16/R_MIPS_16/R_MIPS_16 (65793)
-ELF-MIPS64EL: Type: R_MIPS_32/R_MIPS_32/R_MIPS_32 (131586)
-ELF-MIPS64EL: Type: R_MIPS_REL32/R_MIPS_REL32/R_MIPS_REL32 (197379)
-ELF-MIPS64EL: Type: R_MIPS_26/R_MIPS_26/R_MIPS_26 (263172)
-ELF-MIPS64EL: Type: R_MIPS_HI16/R_MIPS_HI16/R_MIPS_HI16 (328965)
-ELF-MIPS64EL: Type: R_MIPS_LO16/R_MIPS_LO16/R_MIPS_LO16 (394758)
-ELF-MIPS64EL: Type: R_MIPS_GPREL16/R_MIPS_GPREL16/R_MIPS_GPREL16 (460551)
-ELF-MIPS64EL: Type: R_MIPS_LITERAL/R_MIPS_LITERAL/R_MIPS_LITERAL (526344)
-ELF-MIPS64EL: Type: R_MIPS_GOT16/R_MIPS_GOT16/R_MIPS_GOT16 (592137)
-ELF-MIPS64EL: Type: R_MIPS_PC16/R_MIPS_PC16/R_MIPS_PC16 (657930)
-ELF-MIPS64EL: Type: R_MIPS_CALL16/R_MIPS_CALL16/R_MIPS_CALL16 (723723)
-ELF-MIPS64EL: Type: R_MIPS_GPREL32/R_MIPS_GPREL32/R_MIPS_GPREL32 (789516)
-ELF-MIPS64EL: Type: R_MIPS_SHIFT5/R_MIPS_SHIFT5/R_MIPS_SHIFT5 (1052688)
-ELF-MIPS64EL: Type: R_MIPS_SHIFT6/R_MIPS_SHIFT6/R_MIPS_SHIFT6 (1118481)
-ELF-MIPS64EL: Type: R_MIPS_64/R_MIPS_64/R_MIPS_64 (1184274)
-ELF-MIPS64EL: Type: R_MIPS_GOT_DISP/R_MIPS_GOT_DISP/R_MIPS_GOT_DISP (1250067)
-ELF-MIPS64EL: Type: R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE (1315860)
-ELF-MIPS64EL: Type: R_MIPS_GOT_OFST/R_MIPS_GOT_OFST/R_MIPS_GOT_OFST (1381653)
-ELF-MIPS64EL: Type: R_MIPS_GOT_HI16/R_MIPS_GOT_HI16/R_MIPS_GOT_HI16 (1447446)
-ELF-MIPS64EL: Type: R_MIPS_GOT_LO16/R_MIPS_GOT_LO16/R_MIPS_GOT_LO16 (1513239)
-ELF-MIPS64EL: Type: R_MIPS_SUB/R_MIPS_SUB/R_MIPS_SUB (1579032)
-ELF-MIPS64EL: Type: R_MIPS_INSERT_A/R_MIPS_INSERT_A/R_MIPS_INSERT_A (1644825)
-ELF-MIPS64EL: Type: R_MIPS_INSERT_B/R_MIPS_INSERT_B/R_MIPS_INSERT_B (1710618)
-ELF-MIPS64EL: Type: R_MIPS_DELETE/R_MIPS_DELETE/R_MIPS_DELETE (1776411)
-ELF-MIPS64EL: Type: R_MIPS_HIGHER/R_MIPS_HIGHER/R_MIPS_HIGHER (1842204)
-ELF-MIPS64EL: Type: R_MIPS_HIGHEST/R_MIPS_HIGHEST/R_MIPS_HIGHEST (1907997)
-ELF-MIPS64EL: Type: R_MIPS_CALL_HI16/R_MIPS_CALL_HI16/R_MIPS_CALL_HI16 (1973790)
-ELF-MIPS64EL: Type: R_MIPS_CALL_LO16/R_MIPS_CALL_LO16/R_MIPS_CALL_LO16 (2039583)
-ELF-MIPS64EL: Type: R_MIPS_SCN_DISP/R_MIPS_SCN_DISP/R_MIPS_SCN_DISP (2105376)
-ELF-MIPS64EL: Type: R_MIPS_REL16/R_MIPS_REL16/R_MIPS_REL16 (2171169)
-ELF-MIPS64EL: Type: R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE (2236962)
-ELF-MIPS64EL: Type: R_MIPS_PJUMP/R_MIPS_PJUMP/R_MIPS_PJUMP (2302755)
-ELF-MIPS64EL: Type: R_MIPS_RELGOT/R_MIPS_RELGOT/R_MIPS_RELGOT (2368548)
-ELF-MIPS64EL: Type: R_MIPS_JALR/R_MIPS_JALR/R_MIPS_JALR (2434341)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32 (2500134)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32 (2565927)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64 (2631720)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64 (2697513)
-ELF-MIPS64EL: Type: R_MIPS_TLS_GD/R_MIPS_TLS_GD/R_MIPS_TLS_GD (2763306)
-ELF-MIPS64EL: Type: R_MIPS_TLS_LDM/R_MIPS_TLS_LDM/R_MIPS_TLS_LDM (2829099)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16 (2894892)
-ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16 (2960685)
-ELF-MIPS64EL: Type: R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL (3026478)
-ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32 (3092271)
-ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64 (3158064)
-ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16 (3223857)
-ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16 (3289650)
-ELF-MIPS64EL: Type: R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT (3355443)
-ELF-MIPS64EL: Type: R_MIPS_COPY/R_MIPS_COPY/R_MIPS_COPY (8289918)
-ELF-MIPS64EL: Type: R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT (8355711)
-ELF-MIPS64EL: Type: R_MIPS_NUM/R_MIPS_NUM/R_MIPS_NUM (14342874)
-
-ELF-HEXAGON: Type: R_HEX_NONE (0)
-ELF-HEXAGON: Type: R_HEX_B22_PCREL (1)
-ELF-HEXAGON: Type: R_HEX_B15_PCREL (2)
-ELF-HEXAGON: Type: R_HEX_B7_PCREL (3)
-ELF-HEXAGON: Type: R_HEX_LO16 (4)
-ELF-HEXAGON: Type: R_HEX_HI16 (5)
-ELF-HEXAGON: Type: R_HEX_32 (6)
-ELF-HEXAGON: Type: R_HEX_16 (7)
-ELF-HEXAGON: Type: R_HEX_8 (8)
-ELF-HEXAGON: Type: R_HEX_GPREL16_0 (9)
-ELF-HEXAGON: Type: R_HEX_GPREL16_1 (10)
-ELF-HEXAGON: Type: R_HEX_GPREL16_2 (11)
-ELF-HEXAGON: Type: R_HEX_GPREL16_3 (12)
-ELF-HEXAGON: Type: R_HEX_HL16 (13)
-ELF-HEXAGON: Type: R_HEX_B13_PCREL (14)
-ELF-HEXAGON: Type: R_HEX_B9_PCREL (15)
-ELF-HEXAGON: Type: R_HEX_B32_PCREL_X (16)
-ELF-HEXAGON: Type: R_HEX_32_6_X (17)
-ELF-HEXAGON: Type: R_HEX_B22_PCREL_X (18)
-ELF-HEXAGON: Type: R_HEX_B15_PCREL_X (19)
-ELF-HEXAGON: Type: R_HEX_B13_PCREL_X (20)
-ELF-HEXAGON: Type: R_HEX_B9_PCREL_X (21)
-ELF-HEXAGON: Type: R_HEX_B7_PCREL_X (22)
-ELF-HEXAGON: Type: R_HEX_16_X (23)
-ELF-HEXAGON: Type: R_HEX_12_X (24)
-ELF-HEXAGON: Type: R_HEX_11_X (25)
-ELF-HEXAGON: Type: R_HEX_10_X (26)
-ELF-HEXAGON: Type: R_HEX_9_X (27)
-ELF-HEXAGON: Type: R_HEX_8_X (28)
-ELF-HEXAGON: Type: R_HEX_7_X (29)
-ELF-HEXAGON: Type: R_HEX_6_X (30)
-ELF-HEXAGON: Type: R_HEX_32_PCREL (31)
-ELF-HEXAGON: Type: R_HEX_COPY (32)
-ELF-HEXAGON: Type: R_HEX_GLOB_DAT (33)
-ELF-HEXAGON: Type: R_HEX_JMP_SLOT (34)
-ELF-HEXAGON: Type: R_HEX_RELATIVE (35)
-ELF-HEXAGON: Type: R_HEX_PLT_B22_PCREL (36)
-ELF-HEXAGON: Type: R_HEX_GOTREL_LO16 (37)
-ELF-HEXAGON: Type: R_HEX_GOTREL_HI16 (38)
-ELF-HEXAGON: Type: R_HEX_GOTREL_32 (39)
-ELF-HEXAGON: Type: R_HEX_GOT_LO16 (40)
-ELF-HEXAGON: Type: R_HEX_GOT_HI16 (41)
-ELF-HEXAGON: Type: R_HEX_GOT_32 (42)
-ELF-HEXAGON: Type: R_HEX_GOT_16 (43)
-ELF-HEXAGON: Type: R_HEX_DTPMOD_32 (44)
-ELF-HEXAGON: Type: R_HEX_DTPREL_LO16 (45)
-ELF-HEXAGON: Type: R_HEX_DTPREL_HI16 (46)
-ELF-HEXAGON: Type: R_HEX_DTPREL_32 (47)
-ELF-HEXAGON: Type: R_HEX_DTPREL_16 (48)
-ELF-HEXAGON: Type: R_HEX_GD_PLT_B22_PCREL (49)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_LO16 (50)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_HI16 (51)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_32 (52)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_16 (53)
-ELF-HEXAGON: Type: R_HEX_IE_LO16 (54)
-ELF-HEXAGON: Type: R_HEX_IE_HI16 (55)
-ELF-HEXAGON: Type: R_HEX_IE_32 (56)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_LO16 (57)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_HI16 (58)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_32 (59)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_16 (60)
-ELF-HEXAGON: Type: R_HEX_TPREL_LO16 (61)
-ELF-HEXAGON: Type: R_HEX_TPREL_HI16 (62)
-ELF-HEXAGON: Type: R_HEX_TPREL_32 (63)
-ELF-HEXAGON: Type: R_HEX_TPREL_16 (64)
-ELF-HEXAGON: Type: R_HEX_6_PCREL_X (65)
-ELF-HEXAGON: Type: R_HEX_GOTREL_32_6_X (66)
-ELF-HEXAGON: Type: R_HEX_GOTREL_16_X (67)
-ELF-HEXAGON: Type: R_HEX_GOTREL_11_X (68)
-ELF-HEXAGON: Type: R_HEX_GOT_32_6_X (69)
-ELF-HEXAGON: Type: R_HEX_GOT_16_X (70)
-ELF-HEXAGON: Type: R_HEX_GOT_11_X (71)
-ELF-HEXAGON: Type: R_HEX_DTPREL_32_6_X (72)
-ELF-HEXAGON: Type: R_HEX_DTPREL_16_X (73)
-ELF-HEXAGON: Type: R_HEX_DTPREL_11_X (74)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_32_6_X (75)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_16_X (76)
-ELF-HEXAGON: Type: R_HEX_GD_GOT_11_X (77)
-ELF-HEXAGON: Type: R_HEX_IE_32_6_X (78)
-ELF-HEXAGON: Type: R_HEX_IE_16_X (79)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_32_6_X (80)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_16_X (81)
-ELF-HEXAGON: Type: R_HEX_IE_GOT_11_X (82)
-ELF-HEXAGON: Type: R_HEX_TPREL_32_6_X (83)
-ELF-HEXAGON: Type: R_HEX_TPREL_16_X (84)
-ELF-HEXAGON: Type: R_HEX_TPREL_11_X (85)
-
-COFF-32: Type: IMAGE_REL_I386_ABSOLUTE (0)
-COFF-32: Type: IMAGE_REL_I386_DIR16 (1)
-COFF-32: Type: IMAGE_REL_I386_REL16 (2)
-COFF-32: Type: IMAGE_REL_I386_DIR32 (6)
-COFF-32: Type: IMAGE_REL_I386_DIR32NB (7)
-COFF-32: Type: IMAGE_REL_I386_SEG12 (9)
-COFF-32: Type: IMAGE_REL_I386_SECTION (10)
-COFF-32: Type: IMAGE_REL_I386_SECREL (11)
-COFF-32: Type: IMAGE_REL_I386_TOKEN (12)
-COFF-32: Type: IMAGE_REL_I386_SECREL7 (13)
-COFF-32: Type: IMAGE_REL_I386_REL32 (20)
-
-COFF-64: Type: IMAGE_REL_AMD64_ABSOLUTE (0)
-COFF-64: Type: IMAGE_REL_AMD64_ADDR64 (1)
-COFF-64: Type: IMAGE_REL_AMD64_ADDR32 (2)
-COFF-64: Type: IMAGE_REL_AMD64_ADDR32NB (3)
-COFF-64: Type: IMAGE_REL_AMD64_REL32 (4)
-COFF-64: Type: IMAGE_REL_AMD64_REL32_1 (5)
-COFF-64: Type: IMAGE_REL_AMD64_REL32_2 (6)
-COFF-64: Type: IMAGE_REL_AMD64_REL32_3 (7)
-COFF-64: Type: IMAGE_REL_AMD64_REL32_4 (8)
-COFF-64: Type: IMAGE_REL_AMD64_REL32_5 (9)
-COFF-64: Type: IMAGE_REL_AMD64_SECTION (10)
-COFF-64: Type: IMAGE_REL_AMD64_SECREL (11)
-COFF-64: Type: IMAGE_REL_AMD64_SECREL7 (12)
-COFF-64: Type: IMAGE_REL_AMD64_TOKEN (13)
-COFF-64: Type: IMAGE_REL_AMD64_SREL32 (14)
-COFF-64: Type: IMAGE_REL_AMD64_PAIR (15)
-COFF-64: Type: IMAGE_REL_AMD64_SSPAN32 (16)
-
-COFF-ARM: Type: IMAGE_REL_ARM_ABSOLUTE (0x0000)
-COFF-ARM: Type: IMAGE_REL_ARM_ADDR32 (0x0001)
-COFF-ARM: Type: IMAGE_REL_ARM_ADDR32NB (0x0002)
-COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24 (0x0003)
-COFF-ARM: Type: IMAGE_REL_ARM_BRANCH11 (0x0004)
-COFF-ARM: Type: IMAGE_REL_ARM_TOKEN (0x0005)
-COFF-ARM: Type: IMAGE_REL_ARM_BLX24 (0x0008)
-COFF-ARM: Type: IMAGE_REL_ARM_BLX11 (0x0009)
-COFF-ARM: Type: IMAGE_REL_ARM_SECTION (0x000E)
-COFF-ARM: Type: IMAGE_REL_ARM_SECREL (0x000F)
-COFF-ARM: Type: IMAGE_REL_ARM_MOV32A (0x0010)
-COFF-ARM: Type: IMAGE_REL_ARM_MOV32T (0x0011)
-COFF-ARM: Type: IMAGE_REL_ARM_BRANCH20T (0x0012)
-COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24T (0x0014)
-COFF-ARM: Type: IMAGE_REL_ARM_BLX23T (0x0015)
-
-MACHO-32: Type: GENERIC_RELOC_VANILLA (0)
-MACHO-32: Type: GENERIC_RELOC_PAIR (1)
-MACHO-32: Type: GENERIC_RELOC_SECTDIFF (2)
-MACHO-32: Type: GENERIC_RELOC_PB_LA_PTR (3)
-MACHO-32: Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
-MACHO-32: Type: GENERIC_RELOC_TLV (5)
-
-MACHO-64: Type: X86_64_RELOC_UNSIGNED (0)
-MACHO-64: Type: X86_64_RELOC_SIGNED (1)
-MACHO-64: Type: X86_64_RELOC_BRANCH (2)
-MACHO-64: Type: X86_64_RELOC_GOT_LOAD (3)
-MACHO-64: Type: X86_64_RELOC_GOT (4)
-MACHO-64: Type: X86_64_RELOC_SUBTRACTOR (5)
-MACHO-64: Type: X86_64_RELOC_SIGNED_1 (6)
-MACHO-64: Type: X86_64_RELOC_SIGNED_2 (7)
-MACHO-64: Type: X86_64_RELOC_SIGNED_4 (8)
-MACHO-64: Type: X86_64_RELOC_TLV (9)
-
-MACHO-ARM: Type: ARM_RELOC_VANILLA (0)
-MACHO-ARM: Type: ARM_RELOC_PAIR (1)
-MACHO-ARM: Type: ARM_RELOC_SECTDIFF (2)
-MACHO-ARM: Type: ARM_RELOC_LOCAL_SECTDIFF (3)
-MACHO-ARM: Type: ARM_RELOC_PB_LA_PTR (4)
-MACHO-ARM: Type: ARM_RELOC_BR24 (5)
-MACHO-ARM: Type: ARM_THUMB_RELOC_BR22 (6)
-MACHO-ARM: Type: ARM_THUMB_32BIT_BRANCH (7)
-MACHO-ARM: Type: ARM_RELOC_HALF (8)
-MACHO-ARM: Type: ARM_RELOC_HALF_SECTDIFF (9)
-
-MACHO-PPC: PPC_RELOC_VANILLA (0)
-MACHO-PPC: PPC_RELOC_PAIR (1)
-MACHO-PPC: PPC_RELOC_BR14 (2)
-MACHO-PPC: PPC_RELOC_BR24 (3)
-MACHO-PPC: PPC_RELOC_HI16 (4)
-MACHO-PPC: PPC_RELOC_LO16 (5)
-MACHO-PPC: PPC_RELOC_HA16 (6)
-MACHO-PPC: PPC_RELOC_LO14 (7)
-MACHO-PPC: PPC_RELOC_SECTDIFF (8)
-MACHO-PPC: PPC_RELOC_PB_LA_PTR (9)
-MACHO-PPC: PPC_RELOC_HI16_SECTDIFF (10)
-MACHO-PPC: PPC_RELOC_LO16_SECTDIFF (11)
-MACHO-PPC: PPC_RELOC_HA16_SECTDIFF (12)
-MACHO-PPC: PPC_RELOC_JBSR (13)
-MACHO-PPC: PPC_RELOC_LO14_SECTDIFF (14)
-MACHO-PPC: PPC_RELOC_LOCAL_SECTDIFF (15)

From 930dee2c0b8a1128bd0ba23995d666899ef76c89 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 12:57:04 +0000
Subject: [PATCH 0612/1176] [ARM] add target arch definitions for 8.1-M and MVE

This adds:
- LLVM subtarget features to make all the new instructions conditional on,
- CPU and FPU names for use on clang's command line, with default FPUs set
  so that "armv8.1-m.main+fp" and "armv8.1-m.main+fp.dp" will select the right
  FPU features,
- architecture extension names "mve" and "mve.fp",
- ABI build attribute support for v8.1-M (a new value for Tag_CPU_arch) and MVE
  (a new actual tag).

Patch mostly by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60698

llvm-svn: 362090
---
 llvm/include/llvm/ADT/Triple.h                |  1 +
 .../include/llvm/Support/ARMAttributeParser.h |  2 ++
 .../include/llvm/Support/ARMBuildAttributes.h |  6 +++++
 llvm/include/llvm/Support/ARMTargetParser.def |  6 +++++
 llvm/include/llvm/Support/ARMTargetParser.h   |  4 +++-
 llvm/lib/Object/ELFObjectFile.cpp             | 18 ++++++++++++++
 llvm/lib/Support/ARMAttributeParser.cpp       | 17 ++++++++++++-
 llvm/lib/Support/ARMBuildAttrs.cpp            |  1 +
 llvm/lib/Support/ARMTargetParser.cpp          |  7 ++++++
 llvm/lib/Support/Triple.cpp                   |  2 ++
 llvm/lib/Target/ARM/ARM.td                    | 24 +++++++++++++++++++
 llvm/lib/Target/ARM/ARMPredicates.td          | 12 ++++++++++
 llvm/lib/Target/ARM/ARMSubtarget.h            |  9 ++++++-
 .../ARM/MCTargetDesc/ARMTargetStreamer.cpp    |  9 ++++++-
 llvm/test/CodeGen/ARM/build-attributes.ll     |  9 +++++++
 llvm/unittests/Support/ARMAttributeParser.cpp | 20 ++++++++++++++++
 llvm/unittests/Support/TargetParserTest.cpp   | 15 ++++++++----
 17 files changed, 154 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 5c4eaae4fb1b3..3da2659c6cd6d 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -109,6 +109,7 @@ class Triple {
     ARMSubArch_v8r,
     ARMSubArch_v8m_baseline,
     ARMSubArch_v8m_mainline,
+    ARMSubArch_v8_1m_mainline,
     ARMSubArch_v7,
     ARMSubArch_v7em,
     ARMSubArch_v7m,
diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h
index c7aeaf99cbbad..f6c39abb4f211 100644
--- a/llvm/include/llvm/Support/ARMAttributeParser.h
+++ b/llvm/include/llvm/Support/ARMAttributeParser.h
@@ -53,6 +53,8 @@ class ARMAttributeParser {
                  uint32_t &Offset);
   void Advanced_SIMD_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
                           uint32_t &Offset);
+  void MVE_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
+                uint32_t &Offset);
   void PCS_config(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
                   uint32_t &Offset);
   void ABI_PCS_R9_use(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h
index 1299c0b7d8693..90481eaa1677c 100644
--- a/llvm/include/llvm/Support/ARMBuildAttributes.h
+++ b/llvm/include/llvm/Support/ARMBuildAttributes.h
@@ -67,6 +67,7 @@ enum AttrType {
   MPextension_use           = 42, // recoded from 70 (ABI r2.08)
   DIV_use                   = 44,
   DSP_extension             = 46,
+  MVE_arch                  = 48,
   also_compatible_with      = 65,
   conformance               = 67,
   Virtualization_use        = 68,
@@ -110,6 +111,7 @@ enum CPUArch {
   v8_R     = 15,  // e.g. Cortex R52
   v8_M_Base= 16,  // v8_M_Base AArch32
   v8_M_Main= 17,  // v8_M_Main AArch32
+  v8_1_M_Main=21, // v8_1_M_Main AArch32
 };
 
 enum CPUArchProfile {               // (=7), uleb128
@@ -151,6 +153,10 @@ enum {
   AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
   AllowNeonARMv8_1a = 4,// ARM v8.1-A SIMD was permitted (RDMA)
 
+  // Tag_MVE_arch, (=48), uleb128
+  AllowMVEInteger = 1, // integer-only MVE was permitted
+  AllowMVEIntegerAndFloat = 2, // both integer and floating point MVE were permitted
+
   // Tag_ABI_PCS_R9_use, (=14), uleb128
   R9IsGPR = 0,        // R9 used as v6 (just another callee-saved register)
   R9IsSB = 1,         // R9 used as a global static base rgister
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 6c11128584d4b..7dd86aa1986e1 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -31,6 +31,8 @@ ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FPUVersion::VFPV4, NeonSupportLevel::None
 ARM_FPU("fpv5-d16", FK_FPV5_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::D16)
 ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::SP_D16)
 ARM_FPU("fp-armv8", FK_FP_ARMV8, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::None)
+ARM_FPU("fp-armv8-fullfp16-d16", FK_FP_ARMV8_FULLFP16_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::D16)
+ARM_FPU("fp-armv8-fullfp16-sp-d16", FK_FP_ARMV8_FULLFP16_SP_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::SP_D16)
 ARM_FPU("neon", FK_NEON, FPUVersion::VFPV3, NeonSupportLevel::Neon, FPURestriction::None)
 ARM_FPU("neon-fp16", FK_NEON_FP16, FPUVersion::VFPV3_FP16, NeonSupportLevel::Neon, FPURestriction::None)
 ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FPUVersion::VFPV4, NeonSupportLevel::Neon, FPURestriction::None)
@@ -118,6 +120,8 @@ ARM_ARCH("armv8-m.base", ARMV8MBaseline, "8-M.Baseline", "v8m.base",
           ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIVTHUMB)
 ARM_ARCH("armv8-m.main", ARMV8MMainline, "8-M.Mainline", "v8m.main",
           ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIVTHUMB)
+ARM_ARCH("armv8.1-m.main", ARMV8_1MMainline, "8.1-M.Mainline", "v8.1m.main",
+         ARMBuildAttrs::CPUArch::v8_1_M_Main, FK_FP_ARMV8_FULLFP16_SP_D16, ARM::AEK_HWDIVTHUMB | ARM::AEK_RAS)
 // Non-standard Arch names.
 ARM_ARCH("iwmmxt", IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
           FK_NONE, ARM::AEK_NONE)
@@ -144,6 +148,8 @@ ARM_ARCH_EXT_NAME("aes",      ARM::AEK_AES,      "+aes",   "-aes")
 ARM_ARCH_EXT_NAME("dotprod",  ARM::AEK_DOTPROD,  "+dotprod","-dotprod")
 ARM_ARCH_EXT_NAME("dsp",      ARM::AEK_DSP,      "+dsp",   "-dsp")
 ARM_ARCH_EXT_NAME("fp",       ARM::AEK_FP,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("mve",      ARM::AEK_SIMD,     "+mve",   "-mve")
+ARM_ARCH_EXT_NAME("mve.fp",   (ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
 ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
 ARM_ARCH_EXT_NAME("mp",       ARM::AEK_MP,       nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("simd",     ARM::AEK_SIMD,     nullptr,  nullptr)
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index d594f1748e1d2..ca9e26202cf14 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -50,6 +50,7 @@ enum ArchExtKind : unsigned {
   AEK_SVE2SM4 =     1 << 21,
   AEK_SVE2SHA3 =    1 << 22,
   AEK_BITPERM =     1 << 23,
+  AEK_FP_DP   =     1 << 24,
   // Unsupported extensions.
   AEK_OS = 0x8000000,
   AEK_IWMMXT = 0x10000000,
@@ -131,7 +132,8 @@ enum class FPUVersion {
   VFPV3,
   VFPV3_FP16,
   VFPV4,
-  VFPV5
+  VFPV5,
+  VFPV5_FULLFP16,
 };
 
 // An FPU name restricts the FPU in one of three ways:
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 1c3469b5971af..c7b715793048c 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -230,6 +230,24 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
     }
   }
 
+  if (Attributes.hasAttribute(ARMBuildAttrs::MVE_arch)) {
+    switch(Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch)) {
+    default:
+      break;
+    case ARMBuildAttrs::Not_Allowed:
+      Features.AddFeature("mve", false);
+      Features.AddFeature("mve.fp", false);
+      break;
+    case ARMBuildAttrs::AllowMVEInteger:
+      Features.AddFeature("mve.fp", false);
+      Features.AddFeature("mve");
+      break;
+    case ARMBuildAttrs::AllowMVEIntegerAndFloat:
+      Features.AddFeature("mve.fp");
+      break;
+    }
+  }
+
   if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) {
     switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) {
     default:
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 0b7db2f2f7205..08b939a873465 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -37,6 +37,7 @@ ARMAttributeParser::DisplayRoutines[] = {
   ATTRIBUTE_HANDLER(FP_arch),
   ATTRIBUTE_HANDLER(WMMX_arch),
   ATTRIBUTE_HANDLER(Advanced_SIMD_arch),
+  ATTRIBUTE_HANDLER(MVE_arch),
   ATTRIBUTE_HANDLER(PCS_config),
   ATTRIBUTE_HANDLER(ABI_PCS_R9_use),
   ATTRIBUTE_HANDLER(ABI_PCS_RW_data),
@@ -132,7 +133,9 @@ void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
   static const char *const Strings[] = {
     "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
     "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
-    "ARM v7E-M", "ARM v8"
+    "ARM v7E-M", "ARM v8", nullptr,
+    "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr,
+    "ARM v8.1-M Mainline"
   };
 
   uint64_t Value = ParseInteger(Data, Offset);
@@ -213,6 +216,18 @@ void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
   PrintAttribute(Tag, Value, ValueDesc);
 }
 
+void ARMAttributeParser::MVE_arch(AttrType Tag, const uint8_t *Data,
+                                  uint32_t &Offset) {
+  static const char *const Strings[] = {
+    "Not Permitted", "MVE integer", "MVE integer and float"
+  };
+
+  uint64_t Value = ParseInteger(Data, Offset);
+  StringRef ValueDesc =
+    (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+  PrintAttribute(Tag, Value, ValueDesc);
+}
+
 void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
                                     uint32_t &Offset) {
   static const char *const Strings[] = {
diff --git a/llvm/lib/Support/ARMBuildAttrs.cpp b/llvm/lib/Support/ARMBuildAttrs.cpp
index f5fb64bb7ba69..d0c4fb792cb8c 100644
--- a/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -28,6 +28,7 @@ const struct {
   { ARMBuildAttrs::FP_arch, "Tag_FP_arch" },
   { ARMBuildAttrs::WMMX_arch, "Tag_WMMX_arch" },
   { ARMBuildAttrs::Advanced_SIMD_arch, "Tag_Advanced_SIMD_arch" },
+  { ARMBuildAttrs::MVE_arch, "Tag_MVE_arch" },
   { ARMBuildAttrs::PCS_config, "Tag_PCS_config" },
   { ARMBuildAttrs::ABI_PCS_R9_use, "Tag_ABI_PCS_R9_use" },
   { ARMBuildAttrs::ABI_PCS_RW_data, "Tag_ABI_PCS_RW_data" },
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index 02f0d95ff2784..a33f602e532bf 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -77,6 +77,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
   case ArchKind::ARMV8R:
   case ArchKind::ARMV8MBaseline:
   case ArchKind::ARMV8MMainline:
+  case ArchKind::ARMV8_1MMainline:
     return 8;
   case ArchKind::INVALID:
     return 0;
@@ -93,6 +94,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
   case ArchKind::ARMV7EM:
   case ArchKind::ARMV8MMainline:
   case ArchKind::ARMV8MBaseline:
+  case ArchKind::ARMV8_1MMainline:
     return ProfileKind::M;
   case ArchKind::ARMV7R:
   case ArchKind::ARMV8R:
@@ -151,6 +153,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
       .Case("v8r", "v8-r")
       .Case("v8m.base", "v8-m.base")
       .Case("v8m.main", "v8-m.main")
+      .Case("v8.1m.main", "v8.1-m.main")
       .Default(Arch);
 }
 
@@ -164,6 +167,10 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
   // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
   // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
   switch (FPUNames[FPUKind].FPUVer) {
+  case FPUVersion::VFPV5_FULLFP16:
+    Features.push_back("+fp-armv8");
+    Features.push_back("+fullfp16");
+    break;
   case FPUVersion::VFPV5:
     Features.push_back("+fp-armv8");
     break;
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index caf39a761d7e6..eacfe3b6bf0af 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -625,6 +625,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
     return Triple::ARMSubArch_v8m_baseline;
   case ARM::ArchKind::ARMV8MMainline:
     return Triple::ARMSubArch_v8m_mainline;
+  case ARM::ArchKind::ARMV8_1MMainline:
+    return Triple::ARMSubArch_v8_1m_mainline;
   default:
     return Triple::NoSubArch;
   }
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 62cd79c9347a3..5671c0bd83157 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -498,6 +498,19 @@ def HasV8_5aOps   : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
                                    "Support ARM v8.5a instructions",
                                    [HasV8_4aOps, FeatureSB]>;
 
+def HasV8_1MMainlineOps : SubtargetFeature<
+               "v8.1m.main", "HasV8_1MMainlineOps", "true",
+               "Support ARM v8-1M Mainline instructions",
+               [HasV8MMainlineOps]>;
+def HasMVEIntegerOps : SubtargetFeature<
+               "mve", "HasMVEIntegerOps", "true",
+               "Support M-Class Vector Extension with integer ops",
+               [HasV8_1MMainlineOps, FeatureDSP, FeatureFPRegs16, FeatureFPRegs64]>;
+def HasMVEFloatOps : SubtargetFeature<
+               "mve.fp", "HasMVEFloatOps", "true",
+               "Support M-Class Vector Extension with integer and floating ops",
+               [HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Processor subtarget features.
 //
@@ -783,6 +796,17 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
                                                        FeatureAcquireRelease,
                                                        FeatureMClass]>;
 
+def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline",
+                                                      [HasV8_1MMainlineOps,
+                                                       FeatureNoARM,
+                                                       ModeThumb,
+                                                       FeatureDB,
+                                                       FeatureHWDivThumb,
+                                                       Feature8MSecExt,
+                                                       FeatureAcquireRelease,
+                                                       FeatureMClass,
+                                                       FeatureRAS]>;
+
 // Aliases
 def IWMMXT   : Architecture<"iwmmxt",      "ARMv5te",  [ARMv5te]>;
 def IWMMXT2  : Architecture<"iwmmxt2",     "ARMv5te",  [ARMv5te]>;
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index ab4ed3936aba3..1df22e60be5f6 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -26,6 +26,15 @@ def HasV8MBaseline   : Predicate<"Subtarget->hasV8MBaselineOps()">,
 def HasV8MMainline   : Predicate<"Subtarget->hasV8MMainlineOps()">,
                                  AssemblerPredicate<"HasV8MMainlineOps",
                                                     "armv8m.main">;
+def HasV8_1MMainline : Predicate<"Subtarget->hasV8_1MMainlineOps()">,
+                                 AssemblerPredicate<"HasV8_1MMainlineOps",
+                                                    "armv8.1m.main">;
+def HasMVEInt        : Predicate<"Subtarget->hasMVEIntegerOps()">,
+                                 AssemblerPredicate<"HasMVEIntegerOps",
+                                                    "mve">;
+def HasMVEFloat      : Predicate<"Subtarget->hasMVEFloatOps()">,
+                                 AssemblerPredicate<"HasMVEFloatOps",
+                                                    "mve.fp">;
 def HasFPRegs        : Predicate<"Subtarget->hasFPRegs()">,
                                  AssemblerPredicate<"FeatureFPRegs",
                                                     "fp registers">;
@@ -35,6 +44,9 @@ def HasFPRegs16      : Predicate<"Subtarget->hasFPRegs16()">,
 def HasFPRegs64      : Predicate<"Subtarget->hasFPRegs64()">,
                                  AssemblerPredicate<"FeatureFPRegs64",
                                                     "64-bit fp registers">;
+def HasFPRegsV8_1M   : Predicate<"Subtarget->hasFPRegs() && Subtarget->hasV8_1MMainlineOps()">,
+                                 AssemblerPredicate<"FeatureFPRegs,HasV8_1MMainlineOps",
+                                                    "armv8.1m.main with FP or MVE">;
 def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
                                  AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 03bea3572ce28..8123ef6224bb5 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -110,7 +110,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     ARMv8a,
     ARMv8mBaseline,
     ARMv8mMainline,
-    ARMv8r
+    ARMv8r,
+    ARMv81mMainline,
   };
 
 public:
@@ -157,6 +158,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool HasV8_5aOps = false;
   bool HasV8MBaselineOps = false;
   bool HasV8MMainlineOps = false;
+  bool HasV8_1MMainlineOps = false;
+  bool HasMVEIntegerOps = false;
+  bool HasMVEFloatOps = false;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -569,6 +573,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool hasV8_5aOps() const { return HasV8_5aOps; }
   bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
   bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+  bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
+  bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
+  bool hasMVEFloatOps() const { return HasMVEFloatOps; }
   bool hasFPRegs() const { return HasFPRegs; }
   bool hasFPRegs16() const { return HasFPRegs16; }
   bool hasFPRegs64() const { return HasFPRegs64; }
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 9502a5d7c393b..b863517c0cca9 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -124,7 +124,9 @@ static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
     if (STI.hasFeature(ARM::FeatureRClass))
       return ARMBuildAttrs::v8_R;
     return ARMBuildAttrs::v8_A;
-  } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+  } else if (STI.hasFeature(ARM::HasV8_1MMainlineOps))
+    return ARMBuildAttrs::v8_1_M_Main;
+  else if (STI.hasFeature(ARM::HasV8MMainlineOps))
     return ARMBuildAttrs::v8_M_Main;
   else if (STI.hasFeature(ARM::HasV7Ops)) {
     if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
@@ -262,6 +264,11 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
   if (STI.hasFeature(ARM::FeatureMP))
     emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
 
+  if (STI.hasFeature(ARM::HasMVEFloatOps))
+    emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEIntegerAndFloat);
+  else if (STI.hasFeature(ARM::HasMVEIntegerOps))
+    emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEInteger);
+
   // Hardware divide in ARM mode is part of base arch, starting from ARMv8.
   // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
   // It is not possible to produce DisallowDIV: if hwdiv is present in the base
diff --git a/llvm/test/CodeGen/ARM/build-attributes.ll b/llvm/test/CodeGen/ARM/build-attributes.ll
index f349530fb4893..39d173c4f9f5c 100644
--- a/llvm/test/CodeGen/ARM/build-attributes.ll
+++ b/llvm/test/CodeGen/ARM/build-attributes.ll
@@ -240,6 +240,9 @@
 ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m35p | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m35p -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi | FileCheck %s --check-prefix=ARMv81M-MAIN
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEINT
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEFP
 
 ; CPU-SUPPORTED-NOT: is not a recognized processor for this target
 
@@ -1769,6 +1772,12 @@
 ; ARMv8R: .eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format
 ; ARMv8R: .eabi_attribute 14, 0   @ Tag_ABI_PCS_R9_use
 
+; ARMv81M-MAIN: .eabi_attribute 6, 21 @ Tag_CPU_arch
+; ARMv81M-MAIN-NOT: .eabi_attribute 48
+; ARMv81M-MAIN-MVEINT: .eabi_attribute 6, 21 @ Tag_CPU_arch
+; ARMv81M-MAIN-MVEINT: .eabi_attribute 48, 1 @ Tag_MVE_arch
+; ARMv81M-MAIN-MVEFP: .eabi_attribute 6, 21 @ Tag_CPU_arch
+; ARMv81M-MAIN-MVEFP: .eabi_attribute 48, 2 @ Tag_MVE_arch
 define i32 @f(i64 %z) {
     ret i32 0
 }
diff --git a/llvm/unittests/Support/ARMAttributeParser.cpp b/llvm/unittests/Support/ARMAttributeParser.cpp
index 994011872b962..6781acc59a72b 100644
--- a/llvm/unittests/Support/ARMAttributeParser.cpp
+++ b/llvm/unittests/Support/ARMAttributeParser.cpp
@@ -75,6 +75,16 @@ TEST(CPUArchBuildAttr, testBuildAttr) {
                                ARMBuildAttrs::v6S_M));
   EXPECT_TRUE(testBuildAttr(6, 13, ARMBuildAttrs::CPU_arch,
                                ARMBuildAttrs::v7E_M));
+  EXPECT_TRUE(testBuildAttr(6, 14, ARMBuildAttrs::CPU_arch,
+                               ARMBuildAttrs::v8_A));
+  EXPECT_TRUE(testBuildAttr(6, 15, ARMBuildAttrs::CPU_arch,
+                               ARMBuildAttrs::v8_R));
+  EXPECT_TRUE(testBuildAttr(6, 16, ARMBuildAttrs::CPU_arch,
+                               ARMBuildAttrs::v8_M_Base));
+  EXPECT_TRUE(testBuildAttr(6, 17, ARMBuildAttrs::CPU_arch,
+                               ARMBuildAttrs::v8_M_Main));
+  EXPECT_TRUE(testBuildAttr(6, 21, ARMBuildAttrs::CPU_arch,
+                               ARMBuildAttrs::v8_1_M_Main));
 }
 
 TEST(CPUArchProfileBuildAttr, testBuildAttr) {
@@ -159,6 +169,16 @@ TEST(FPHPBuildAttr, testBuildAttr) {
                             ARMBuildAttrs::AllowHPFP));
 }
 
+TEST(MVEBuildAttr, testBuildAttr) {
+  EXPECT_TRUE(testTagString(48, "Tag_MVE_arch"));
+  EXPECT_TRUE(testBuildAttr(48, 0, ARMBuildAttrs::MVE_arch,
+                            ARMBuildAttrs::Not_Allowed));
+  EXPECT_TRUE(testBuildAttr(48, 1, ARMBuildAttrs::MVE_arch,
+                            ARMBuildAttrs::AllowMVEInteger));
+  EXPECT_TRUE(testBuildAttr(48, 2, ARMBuildAttrs::MVE_arch,
+                            ARMBuildAttrs::AllowMVEIntegerAndFloat));
+}
+
 TEST(CPUAlignBuildAttr, testBuildAttr) {
   EXPECT_TRUE(testTagString(34, "Tag_CPU_unaligned_access"));
   EXPECT_TRUE(testBuildAttr(34, 0, ARMBuildAttrs::CPU_unaligned_access,
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 08ed7645ea0e4..278339acc3bd8 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -27,7 +27,8 @@ const char *ARMArch[] = {
     "armv8l",      "armv8.1-a",    "armv8.1a",    "armv8.2-a",    "armv8.2a",
     "armv8.3-a",   "armv8.3a",     "armv8.4-a",   "armv8.4a",     "armv8.5-a",
     "armv8.5a",     "armv8-r",     "armv8r",      "armv8-m.base", "armv8m.base",
-    "armv8-m.main", "armv8m.main", "iwmmxt",      "iwmmxt2",      "xscale"
+    "armv8-m.main", "armv8m.main", "iwmmxt",      "iwmmxt2",      "xscale",
+    "armv8.1-m.main",
 };
 
 bool testARMCPU(StringRef CPUName, StringRef ExpectedArch,
@@ -417,6 +418,9 @@ TEST(TargetParserTest, testARMArch) {
   EXPECT_TRUE(
       testARMArch("armv8-m.main", "generic", "v8m.main",
                           ARMBuildAttrs::CPUArch::v8_M_Main));
+  EXPECT_TRUE(
+      testARMArch("armv8.1-m.main", "generic", "v8.1m.main",
+                          ARMBuildAttrs::CPUArch::v8_1_M_Main));
   EXPECT_TRUE(
       testARMArch("iwmmxt", "iwmmxt", "",
                           ARMBuildAttrs::CPUArch::v5TE));
@@ -569,7 +573,7 @@ TEST(TargetParserTest, ARMExtensionFeatures) {
   unsigned Extensions = ARM::AEK_CRC | ARM::AEK_CRYPTO | ARM::AEK_DSP |
                         ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_MP |
                         ARM::AEK_SEC | ARM::AEK_VIRT | ARM::AEK_RAS | ARM::AEK_FP16 |
-                        ARM::AEK_FP16FML;
+                        ARM::AEK_FP16FML | ARM::AEK_FP_DP;
 
   for (unsigned i = 0; i <= Extensions; i++)
     EXPECT_TRUE(i == 0 ? !ARM::getExtensionFeatures(i, Features)
@@ -605,7 +609,9 @@ TEST(TargetParserTest, ARMArchExtFeature) {
                               {"iwmmxt2", "noiwmmxt2", nullptr, nullptr},
                               {"maverick", "maverick", nullptr, nullptr},
                               {"xscale", "noxscale", nullptr, nullptr},
-                              {"sb", "nosb", "+sb", "-sb"}};
+                              {"sb", "nosb", "+sb", "-sb"},
+                              {"mve", "nomve", "+mve", "-mve"},
+                              {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
   for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]), ARM::getArchExtFeature(ArchExt[i][0]));
@@ -628,7 +634,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
       "v7",   "v7a",    "v7ve",  "v7hl",   "v7l",   "v7-r",   "v7r",   "v7-m",
       "v7m",  "v7k",    "v7s",   "v7e-m",  "v7em",  "v8-a",   "v8",    "v8a",
       "v8l",  "v8.1-a", "v8.1a", "v8.2-a", "v8.2a", "v8.3-a", "v8.3a", "v8.4-a",
-      "v8.4a", "v8.5-a","v8.5a", "v8-r"
+      "v8.4a", "v8.5-a","v8.5a", "v8-r",   "v8m.base", "v8m.main", "v8.1m.main"
   };
 
   for (unsigned i = 0; i < array_lengthof(Arch); i++) {
@@ -677,6 +683,7 @@ TEST(TargetParserTest, ARMparseArchProfile) {
     case ARM::ArchKind::ARMV7EM:
     case ARM::ArchKind::ARMV8MMainline:
     case ARM::ArchKind::ARMV8MBaseline:
+    case ARM::ArchKind::ARMV8_1MMainline:
       EXPECT_EQ(ARM::ProfileKind::M, ARM::parseArchProfile(ARMArch[i]));
       break;
     case ARM::ArchKind::ARMV7R:

From fae2e46766c492e003e3be8bf32ef1acd5e1dcc0 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 13:01:53 +0000
Subject: [PATCH 0613/1176] [LoopIdiomRecognize][NFC] Sort includes

Split off from D61144

llvm-svn: 362091
---
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 9f98d03e9c152..055d3054222fb 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -36,6 +36,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -55,7 +56,6 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -86,8 +86,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <cassert>

From e8578953ac6c3f708aa2081c9b11e0e946020e7c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 13:02:06 +0000
Subject: [PATCH 0614/1176] [LoopIdiom] Basic OptimizationRemarkEmitter
 handling

Summary:
I'm adding ORE to memset/memcpy formation, with tests,
but mainly this is split off from D61144.

Reviewers: reames, anemet, thegameg, craig.topper

Reviewed By: thegameg

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62631

llvm-svn: 362092
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 44 +++++++++++++--
 .../LoopIdiom/memcpy-debugify-remarks.ll      | 51 +++++++++++++++++
 .../LoopIdiom/memset-debugify-remarks.ll      | 55 +++++++++++++++++++
 .../Transforms/LoopIdiom/nontemporal_store.ll |  2 +-
 4 files changed, 147 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
 create mode 100644 llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 055d3054222fb..811cbf3552ae1 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -51,6 +51,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -119,6 +120,7 @@ class LoopIdiomRecognize {
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
+  OptimizationRemarkEmitter &ORE;
   bool ApplyCodeSizeHeuristics;
 
 public:
@@ -126,8 +128,9 @@ class LoopIdiomRecognize {
                               LoopInfo *LI, ScalarEvolution *SE,
                               TargetLibraryInfo *TLI,
                               const TargetTransformInfo *TTI,
-                              const DataLayout *DL)
-      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
+                              const DataLayout *DL,
+                              OptimizationRemarkEmitter &ORE)
+      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
 
   bool runOnLoop(Loop *L);
 
@@ -220,7 +223,12 @@ class LoopIdiomRecognizeLegacyPass : public LoopPass {
             *L->getHeader()->getParent());
     const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
 
-    LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+    // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+    // pass.  Function analyses need to be preserved across loop transformations
+    // but ORE cannot be preserved (see comment before the pass definition).
+    OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
+
+    LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
     return LIR.runOnLoop(L);
   }
 
@@ -242,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
                                               LPMUpdater &) {
   const auto *DL = &L.getHeader()->getModule()->getDataLayout();
 
-  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
+  const auto &FAM =
+      AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+  Function *F = L.getHeader()->getParent();
+
+  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+  // FIXME: This should probably be optional rather than required.
+  if (!ORE)
+    report_fatal_error(
+        "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
+        "at a higher level");
+
+  LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
+                         *ORE);
   if (!LIR.runOnLoop(&L))
     return PreservedAnalyses::all();
 
@@ -951,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
                     << "\n");
   NewCall->setDebugLoc(TheStore->getDebugLoc());
 
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
+                              NewCall->getDebugLoc(), Preheader)
+           << "Transformed loop-strided store into a call to "
+           << ore::NV("NewFunction", NewCall->getCalledFunction())
+           << "() function";
+  });
+
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
   for (auto *I : Stores)
@@ -1083,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
                     << "    from store ptr=" << *StoreEv << " at: " << *SI
                     << "\n");
 
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
+                              NewCall->getDebugLoc(), Preheader)
+           << "Formed a call to "
+           << ore::NV("NewFunction", NewCall->getCalledFunction())
+           << "() function";
+  });
+
   // Okay, the memcpy has been formed.  Zap the original store and anything that
   // feeds into it.
   deleteDeadInstruction(SI);
diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
new file mode 100644
index 0000000000000..40a11395026d9
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info  < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
+
+; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function
+
+define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test6_dest_align(
+; CHECK-NEXT:  bb.nph:
+; CHECK-NEXT:    [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
+; CHECK-NEXT:    [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[SIZE:%.*]], 2, !dbg !18
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg !18
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20
+; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21
+; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void, !dbg !27
+;
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+  %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+  %V = load i32, i32* %I.0.014, align 1
+  store i32 %V, i32* %DestI, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll
new file mode 100644
index 0000000000000..54b39e9451704
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info  < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
+
+; void my_basic_memset(char* begin, char* end, char value) {
+;   for( ; begin != end; ++begin)
+;     *begin = value;
+; }
+
+; CHECK: remark: <stdin>:4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function
+
+define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) {
+; CHECK-LABEL: @_Z15my_basic_memsetPcS_c(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg !15
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata !9, metadata !DIExpression()), !dbg !15
+; CHECK-NEXT:    br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg !16
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg !17
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg !17
+; CHECK-NEXT:    [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg !17
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]], !dbg !17
+; CHECK:       for.body:
+; CHECK-NEXT:    [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg !18
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i8* [[PTR_ADDR_04]], metadata !11, metadata !DIExpression()), !dbg !18
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR_ADDR_04]], i64 1, !dbg !19
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR]], metadata !13, metadata !DIExpression()), !dbg !19
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[END]], !dbg !20
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !14, metadata !DIExpression()), !dbg !20
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !21
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]], !dbg !22
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void, !dbg !22
+;
+entry:
+  %cmp3 = icmp eq i8* %ptr, %end
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %ptr.addr.04 = phi i8* [ %incdec.ptr, %for.body ], [ %ptr, %entry ]
+  store i8 %value, i8* %ptr.addr.04, align 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.04, i64 1
+  %cmp = icmp eq i8* %incdec.ptr, %end
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll b/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll
index a5f8c7c451c73..c43c1c669e164 100644
--- a/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll
+++ b/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -loop-idiom < %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(loop-idiom)' < %s -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 

From 8f220a5d2c85ee75c0bca4e07bff57cace6cf07a Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 13:02:11 +0000
Subject: [PATCH 0615/1176] [NFC][Codegen] Add add+sub/sub+add constant-fold
 tests for from D62257

add+sub/sub+add when second operands are constants should be folded
into a single add, just like with add+add.

llvm-svn: 362093
---
 llvm/test/CodeGen/AArch64/vec_add.ll | 126 ++++++++++++++++++++
 llvm/test/CodeGen/X86/vec_add.ll     | 165 ++++++++++++++++++++++++++-
 2 files changed, 288 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/vec_add.ll

diff --git a/llvm/test/CodeGen/AArch64/vec_add.ll b/llvm/test/CodeGen/AArch64/vec_add.ll
new file mode 100644
index 0000000000000..9609822b54f33
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vec_add.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown  | FileCheck %s
+
+declare void @use(<4 x i32> %arg)
+
+define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+	%tmp9 = add <2 x i64> %b, %a
+	ret <2 x i64> %tmp9
+}
+
+define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_add_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #10
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v2.4s, #1
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    mvni v0.4s, #1
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x9, .LCPI4_1
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI4_1]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v1.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
+  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mvni v1.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #8
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT:    adrp x8, .LCPI7_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI7_1]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -3, i32 undef, i32 -2>
+  %t1 = add <4 x i32> %t0, <i32 21, i32 undef, i32 8, i32 8>
+  ret <4 x i32> %t1
+}
diff --git a/llvm/test/CodeGen/X86/vec_add.ll b/llvm/test/CodeGen/X86/vec_add.ll
index 3d144e8ea3e9a..48ccf34dd0b0a 100644
--- a/llvm/test/CodeGen/X86/vec_add.ll
+++ b/llvm/test/CodeGen/X86/vec_add.ll
@@ -1,7 +1,166 @@
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
+
+declare void @use(<4 x i32> %arg)
 
 define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
-entry:
-	%tmp9 = add <2 x i64> %b, %a		; <<2 x i64>> [#uses=1]
+; X86-LABEL: test:
+; X86:       # %bb.0:
+; X86-NEXT:    paddq %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: test:
+; X64:       # %bb.0:
+; X64-NEXT:    paddq %xmm1, %xmm0
+; X64-NEXT:    retq
+	%tmp9 = add <2 x i64> %b, %a
 	ret <2 x i64> %tmp9
 }
+
+define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: add_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
+  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -3, i32 undef, i32 -2>
+  %t1 = add <4 x i32> %t0, <i32 21, i32 undef, i32 8, i32 8>
+  ret <4 x i32> %t1
+}

From 5359bb4d3163c0a1bfaa62fb2e6e79e3d20ce921 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 30 May 2019 14:01:24 +0000
Subject: [PATCH 0616/1176] [ARM] LowerVECTOR_SHUFFLE - fix uninitialized
 variable warnings. NFCI.

llvm-svn: 362094
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7dd2fef89ee79..9231ad20aa3ab 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7007,8 +7007,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
                          DAG.getConstant(Lane, dl, MVT::i32));
     }
 
-    bool ReverseVEXT;
-    unsigned Imm;
+    bool ReverseVEXT = false;
+    unsigned Imm = 0;
     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
       if (ReverseVEXT)
         std::swap(V1, V2);
@@ -7033,8 +7033,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     // source operands and with masks corresponding to both results of one of
     // these operations, DAG memoization will ensure that a single node is
     // used for both shuffles.
-    unsigned WhichResult;
-    bool isV_UNDEF;
+    unsigned WhichResult = 0;
+    bool isV_UNDEF = false;
     if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
             ShuffleMask, VT, WhichResult, isV_UNDEF)) {
       if (isV_UNDEF)

From 064ae08e864eacd0881cb291f412ff727a5094bb Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 14:16:29 +0000
Subject: [PATCH 0617/1176] [ELF][test] Update znotext-plt-relocations.s to
 emit R_X86_64_PC32

This is a leftover from r325379.

The intention of this test was to check in a non-pic link, R_X86_64_PC32
to a STT_FUNC created a PLT. However, after the llvm-mc change in
r325569, this code path is no longer exercised. Use the r325379 trick to
keep testing R_X86_64_PC32.

llvm-svn: 362095
---
 lld/test/ELF/znotext-plt-relocations.s | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/test/ELF/znotext-plt-relocations.s b/lld/test/ELF/znotext-plt-relocations.s
index ad305b0c1f6c8..3bc1f417cb0e1 100644
--- a/lld/test/ELF/znotext-plt-relocations.s
+++ b/lld/test/ELF/znotext-plt-relocations.s
@@ -16,5 +16,6 @@
 
 .text
 _start:
-callq atexit
+.byte 0xe8
+.long atexit - . - 4
 .quad foo

From 24f12711ae27e80291c3bab6097cef99168eb21f Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 14:22:26 +0000
Subject: [PATCH 0618/1176] [ARM] Add CLI support for Armv8.1-M and MVE

Given the existing infrastructure in LLVM side for +fp and +fp.dp,
this is more or less trivial, needing only one tiny source change and
a couple of tests.

Patch by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60699

llvm-svn: 362096
---
 clang/lib/Basic/Targets/ARM.cpp   |  2 +
 clang/test/Driver/armv8.1m.main.c | 34 ++++++++++++++++
 clang/test/Driver/armv8.1m.main.s | 65 +++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+)
 create mode 100644 clang/test/Driver/armv8.1m.main.c
 create mode 100644 clang/test/Driver/armv8.1m.main.s

diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 18ad466afe709..b6ad82260c627 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -197,6 +197,8 @@ StringRef ARMTargetInfo::getCPUAttr() const {
     return "8M_MAIN";
   case llvm::ARM::ArchKind::ARMV8R:
     return "8R";
+  case llvm::ARM::ArchKind::ARMV8_1MMainline:
+    return "8_1M_MAIN";
   }
 }
 
diff --git a/clang/test/Driver/armv8.1m.main.c b/clang/test/Driver/armv8.1m.main.c
new file mode 100644
index 0000000000000..46f421fcbb0dc
--- /dev/null
+++ b/clang/test/Driver/armv8.1m.main.c
@@ -0,0 +1,34 @@
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+dsp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-DSP < %t %s
+// CHECK-DSP: "-target-feature" "+dsp"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FP < %t %s
+// CHECK-FP: "-target-feature" "+fp-armv8"
+// CHECK-FP-NOT: "-target-feature" "+fp64"
+// CHECK-FP-NOT: "-target-feature" "+d32"
+// CHECK-FP: "-target-feature" "+fullfp16"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp.dp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FPDP < %t %s
+// CHECK-FPDP: "-target-feature" "+fp-armv8"
+// CHECK-FPDP: "-target-feature" "+fullfp16"
+// CHECK-FPDP: "-target-feature" "+fp64"
+// CHECK-FPDP-NOT: "-target-feature" "+d32"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-MVE < %t %s
+// CHECK-MVE: "-target-feature" "+mve"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-MVEFP < %t %s
+// CHECK-MVEFP: "-target-feature" "+mve.fp"
+// CHECK-MVEFP-NOT: "-target-feature" "+fp64"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-MVEFP_DP < %t %s
+// CHECK-MVEFP_DP: "-target-feature" "+mve.fp"
+// CHECK-MVEFP_DP: "-target-feature" "+fp64"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1m.main+fp -S %s
+double foo (double a) { return a; }
diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
new file mode 100644
index 0000000000000..4dbe97953093f
--- /dev/null
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -0,0 +1,65 @@
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8-m.main %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V8M < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+dsp %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_DSP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp.dp %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve+fp %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVE_FP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
+# RUN: %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp %s
+
+.syntax unified
+.thumb
+.text
+
+csinc r0, r1, r2, eq
+# ERROR-V8M: :[[@LINE-1]]:1: error
+
+qadd     r0, r1, r2
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_FP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-4]]:1: error
+
+vadd.f16 s0, s1, s2
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
+
+vabs.f32 s0, s1
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
+
+vcmp.f64 d0,d1
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-5]]:1: error
+# ERROR-V81M_MVE_FP: :[[@LINE-6]]:1: error
+# ERROR-V81M_MVEFP: :[[@LINE-7]]:1: error
+
+asrl r0, r1, r2
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
+
+vcadd.i8 q0, q1, q2, #90
+# ERROR-V8M: :[[@LINE-1]]:1: error
+# ERROR-V81M: :[[@LINE-2]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error

From eb072b5a6a2c4b27236d6f4ecbe57012783ce626 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 14:34:29 +0000
Subject: [PATCH 0619/1176] [ARM] Change the MC names for VMAXNM/VMINNM

Now the NEON ones have a prefix "NEON_", and the VFP ones have a
prefix "VFP_". This is so that the regex in ARMScheduleA57.td can be
made to match both of _those_ classes of VMAXNM without also matching
the MVE ones that are going to be introduced soon. NFCI.

Patch by Simon Tatham.

Differential Revision: https://reviews.llvm.org/D60700

llvm-svn: 362097
---
 llvm/lib/Target/ARM/ARMInstrNEON.td   | 64 +++++++++++++--------------
 llvm/lib/Target/ARM/ARMInstrVFP.td    |  4 +-
 llvm/lib/Target/ARM/ARMScheduleA57.td |  3 +-
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3aa44313a2783..effee0fcd387b 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5537,22 +5537,22 @@ def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
 
 // VMAXNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v2f32, v2f32, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v4f32, v4f32, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
-                            v4f16, v4f16, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
-  def VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
-                            v8f16, v8f16, fmaxnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
+                                  v2f32, v2f32, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
+                                  v4f32, v4f32, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                                  v4f16, v4f16, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                                  v8f16, v8f16, fmaxnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 //   VMIN     : Vector Minimum
@@ -5579,22 +5579,22 @@ def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
 
 // VMINNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v2f32, v2f32, fminnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v4f32, v4f32, fminnum, 1>,
-                            Requires<[HasV8, HasNEON]>;
-  def VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f16",
-                            v4f16, v4f16, fminnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
-  def VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
-                            N3RegFrm, NoItinerary, "vminnm", "f16",
-                            v8f16, v8f16, fminnum, 1>,
-                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f32",
+                                  v2f32, v2f32, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f32",
+                                  v4f32, v4f32, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON]>;
+  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f16",
+                                  v4f16, v4f16, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+                                  N3RegFrm, NoItinerary, "vminnm", "f16",
+                                  v8f16, v8f16, fminnum, 1>,
+                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 // Vector Pairwise Operations.
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 88405a83b4e28..6f042d39a4c4e 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -533,8 +533,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
   }
 }
 
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
+defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index 6db6eda317abf..de970ae2e17de 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -1174,7 +1174,8 @@ def : InstRW<[A57Write_8cyc_1V], (instregex
 
 // ASIMD FP max/min
 def : InstRW<[A57Write_5cyc_1V], (instregex
-  "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+  "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
+  "(NEON|VFP)_VMINNM")>;
 
 // ASIMD FP multiply
 def A57WriteVMUL_VecFP  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }

From 17604c3486cbe7c27cadac1757cd0a9109a92792 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Thu, 30 May 2019 14:48:43 +0000
Subject: [PATCH 0620/1176] [analyzer] print() JSONify chain: Fix build-bot
 breaks

Summary:
Printing out a map structure different in different environments so that
this patch generalize the test-case to check for the 'no stmt'-case
anywhere in the Store.

llvm-svn: 362098
---
 clang/test/Analysis/dump_egraph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index fd966cb78fd60..94919b636fdfd 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -20,5 +20,5 @@ void foo() {
 
 // CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
 
-// CHECK: \"store\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
+// CHECK: \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 

From bdaa39ea6ca4a1a0263ca340abef33376a17ad63 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 14:50:10 +0000
Subject: [PATCH 0621/1176] [ELF] De-template addUndefined() and
 addWrappedSymbols(). NFC

llvm-svn: 362099
---
 lld/ELF/Driver.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index a234b8fe3c7d6..b49ab85a9c7c5 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1483,7 +1483,7 @@ static void readSymbolPartitionSection(InputSectionBase *S) {
   Sym->Partition = NewPart.getNumber();
 }
 
-template <class ELFT> static Symbol *addUndefined(StringRef Name) {
+static Symbol *addUndefined(StringRef Name) {
   return Symtab->addSymbol(
       Undefined{nullptr, Name, STB_GLOBAL, STV_DEFAULT, 0});
 }
@@ -1530,7 +1530,6 @@ struct WrappedSymbol {
 // This function instantiates wrapper symbols. At this point, they seem
 // like they are not being used at all, so we explicitly set some flags so
 // that LTO won't eliminate them.
-template <class ELFT>
 static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
   std::vector<WrappedSymbol> V;
   DenseSet<StringRef> Seen;
@@ -1544,8 +1543,8 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &Args) {
     if (!Sym)
       continue;
 
-    Symbol *Real = addUndefined<ELFT>(Saver.save("__real_" + Name));
-    Symbol *Wrap = addUndefined<ELFT>(Saver.save("__wrap_" + Name));
+    Symbol *Real = addUndefined(Saver.save("__real_" + Name));
+    Symbol *Wrap = addUndefined(Saver.save("__wrap_" + Name));
     V.push_back({Sym, Real, Wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
@@ -1648,7 +1647,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
   // Some symbols (such as __ehdr_start) are defined lazily only when there
   // are undefined symbols for them, so we add these to trigger that logic.
   for (StringRef Name : Script->ReferencedSymbols)
-    addUndefined<ELFT>(Name);
+    addUndefined(Name);
 
   // Handle the `--undefined <sym>` options.
   for (StringRef S : Config->Undefined)
@@ -1701,7 +1700,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
   Out::ElfHeader->Size = sizeof(typename ELFT::Ehdr);
 
   // Create wrapped symbols for -wrap option.
-  std::vector<WrappedSymbol> Wrapped = addWrappedSymbols<ELFT>(Args);
+  std::vector<WrappedSymbol> Wrapped = addWrappedSymbols(Args);
 
   // We need to create some reserved symbols such as _end. Create them.
   if (!Config->Relocatable)

From d74c2131c31b5946a7f358bd9cd1467a07761cf5 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 30 May 2019 15:04:06 +0000
Subject: [PATCH 0622/1176] Follow up of r362096

The new tests were failing, because I missed dependent patch D60697.
I have removed the failing cases for now, which I will restore once
D60697 is in.

llvm-svn: 362100
---
 clang/test/Driver/armv8.1m.main.c | 20 --------------------
 clang/test/Driver/armv8.1m.main.s | 17 ++---------------
 2 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/clang/test/Driver/armv8.1m.main.c b/clang/test/Driver/armv8.1m.main.c
index 46f421fcbb0dc..675d98c8fd138 100644
--- a/clang/test/Driver/armv8.1m.main.c
+++ b/clang/test/Driver/armv8.1m.main.c
@@ -2,20 +2,6 @@
 // RUN: FileCheck --check-prefix=CHECK-DSP < %t %s
 // CHECK-DSP: "-target-feature" "+dsp"
 
-// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp  -### %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-FP < %t %s
-// CHECK-FP: "-target-feature" "+fp-armv8"
-// CHECK-FP-NOT: "-target-feature" "+fp64"
-// CHECK-FP-NOT: "-target-feature" "+d32"
-// CHECK-FP: "-target-feature" "+fullfp16"
-
-// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp.dp  -### %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-FPDP < %t %s
-// CHECK-FPDP: "-target-feature" "+fp-armv8"
-// CHECK-FPDP: "-target-feature" "+fullfp16"
-// CHECK-FPDP: "-target-feature" "+fp64"
-// CHECK-FPDP-NOT: "-target-feature" "+d32"
-
 // RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve  -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-MVE < %t %s
 // CHECK-MVE: "-target-feature" "+mve"
@@ -25,10 +11,4 @@
 // CHECK-MVEFP: "-target-feature" "+mve.fp"
 // CHECK-MVEFP-NOT: "-target-feature" "+fp64"
 
-// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp  -### %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-MVEFP_DP < %t %s
-// CHECK-MVEFP_DP: "-target-feature" "+mve.fp"
-// CHECK-MVEFP_DP: "-target-feature" "+fp64"
-
-// RUN: %clang -target arm-arm-none-eabi -march=armv8.1m.main+fp -S %s
 double foo (double a) { return a; }
diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
index 4dbe97953093f..cdeb423116f9f 100644
--- a/clang/test/Driver/armv8.1m.main.s
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -4,17 +4,10 @@
 # RUN:      FileCheck --check-prefix=ERROR-V81M < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+dsp %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_DSP < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp %s 2>%t
-# RUN:      FileCheck --check-prefix=ERROR-V81M_FP < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp.dp %s 2>%t
-# RUN:      FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve+fp %s 2>%t
-# RUN:      FileCheck --check-prefix=ERROR-V81M_MVE_FP < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
-# RUN: %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp %s
 
 .syntax unified
 .thumb
@@ -45,21 +38,15 @@ vcmp.f64 d0,d1
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_FP: :[[@LINE-4]]:1: error
-# ERROR-V81M_MVE: :[[@LINE-5]]:1: error
-# ERROR-V81M_MVE_FP: :[[@LINE-6]]:1: error
-# ERROR-V81M_MVEFP: :[[@LINE-7]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
+# ERROR-V81M_MVEFP: :[[@LINE-5]]:1: error
 
 asrl r0, r1, r2
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_FP: :[[@LINE-4]]:1: error
-# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
 
 vcadd.i8 q0, q1, q2, #90
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_FP: :[[@LINE-4]]:1: error
-# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error

From 32d545f930ce44614ac8398693dacd1d6dbc41a3 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Thu, 30 May 2019 15:15:57 +0000
Subject: [PATCH 0623/1176] [analyzer] print() JSONify chain: Fix possible
 build-bot breaks

Summary:
Printing constructing_objects could be non-deterministic as it is a map.

llvm-svn: 362101
---
 clang/test/Analysis/dump_egraph.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/dump_egraph.cpp b/clang/test/Analysis/dump_egraph.cpp
index 94919b636fdfd..c62e4bfd4c106 100644
--- a/clang/test/Analysis/dump_egraph.cpp
+++ b/clang/test/Analysis/dump_egraph.cpp
@@ -16,9 +16,9 @@ void foo() {
   T t;
 }
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": {{[0-9]+}}, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
+// CHECK: \"location_context\": \"#0 Call\", \"calling\": \"foo\", \"call_line\": null, \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 1, \"stmt_id\": {{[0-9]+}}, \"kind\": \"construct into local variable\", \"argument_index\": null, \"pretty\": \"T t;\", \"value\": \"&t\"
 
-// CHECK: \"constructing_objects\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
+// CHECK: \"location_context\": \"#0 Call\", \"calling\": \"T::T\", \"call_line\": \"16\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"lctx_id\": 2, \"init_id\": {{[0-9]+}}, \"kind\": \"construct into member variable\", \"argument_index\": null, \"pretty\": \"s\", \"value\": \"&t-\>s\"
 
 // CHECK: \"cluster\": \"t\", \"items\": [\l&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\{ \"kind\": \"Default\", \"offset\": 0, \"value\": \"conj_$3\{int, LC3, no stmt, #1\}\"
 

From f61b5481fd2ce47c5891e856ab1a2a6e8c0e9538 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Thu, 30 May 2019 15:18:07 +0000
Subject: [PATCH 0624/1176] [OpenCL] Fix OpenCL/SPIR version metadata in C++
 mode.

C++ is derived from OpenCL v2.0 therefore set the versions
identically.

Differential Revision: https://reviews.llvm.org/D62657

llvm-svn: 362102
---
 clang/lib/CodeGen/CodeGenModule.cpp      | 13 +++++++++----
 clang/test/CodeGenOpenCL/spir_version.cl |  3 +++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8cfb4e60e0de7..e79eb71b79323 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -564,11 +564,13 @@ void CodeGenModule::Release() {
     if (getTriple().isSPIR()) {
       // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
       // opencl.spir.version named metadata.
+      // C++ is backwards compatible with OpenCL v2.0.
+      auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion;
       llvm::Metadata *SPIRVerElts[] = {
           llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-              Int32Ty, LangOpts.OpenCLVersion / 100)),
+              Int32Ty, Version / 100)),
           llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-              Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))};
+              Int32Ty, (Version / 100 > 1) ? 0 : 2))};
       llvm::NamedMDNode *SPIRVerMD =
           TheModule.getOrInsertNamedMetadata("opencl.spir.version");
       llvm::LLVMContext &Ctx = TheModule.getContext();
@@ -623,11 +625,14 @@ void CodeGenModule::Release() {
 void CodeGenModule::EmitOpenCLMetadata() {
   // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
   // opencl.ocl.version named metadata node.
+  // C++ is backwards compatible with OpenCL v2.0.
+  // FIXME: We might need to add CXX version at some point too?
+  auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion;
   llvm::Metadata *OCLVerElts[] = {
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-          Int32Ty, LangOpts.OpenCLVersion / 100)),
+          Int32Ty, Version / 100)),
       llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-          Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))};
+          Int32Ty, (Version % 100) / 10))};
   llvm::NamedMDNode *OCLVerMD =
       TheModule.getOrInsertNamedMetadata("opencl.ocl.version");
   llvm::LLVMContext &Ctx = TheModule.getContext();
diff --git a/clang/test/CodeGenOpenCL/spir_version.cl b/clang/test/CodeGenOpenCL/spir_version.cl
index ac5b8e8c7fa5e..03f3f20885de5 100644
--- a/clang/test/CodeGenOpenCL/spir_version.cl
+++ b/clang/test/CodeGenOpenCL/spir_version.cl
@@ -5,6 +5,9 @@
 // RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-SPIR-CL12
 // RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-SPIR-CL20
 
+
+// RUN: %clang_cc1 %s -triple "spir64-unknown-unknown" -emit-llvm -o - -cl-std=c++ | FileCheck %s --check-prefix=CHECK-SPIR-CL20
+
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AMDGCN-CL10
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL1.2 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL12
 // RUN: %clang_cc1 %s -triple "amdgcn--amdhsa" -emit-llvm -o - -cl-std=CL2.0 | FileCheck %s --check-prefix=CHECK-AMDGCN-CL20

From 202c3ffcbfda4d2a53d4b8e5c1c9140b321b02da Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Thu, 30 May 2019 15:21:23 +0000
Subject: [PATCH 0625/1176] Improve DWARF parsing and accessing by 1% to 2%

When LLDB first started we didn't have our mmap of the DWARF data done correctly and if the backing file would change we would get live changes as the file changed and it would cause problems. We now mmap correctly and do not run into these issues. There was legacy code in DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(...) that would always extract the abbrev index each time the function was called to verify that DWARF data hadn't changed and a warning was emitted if it did. We no longer need this and the code was removed. The other thing this function did when it parsed the abbrev index was give us the offset of the first attribute bytes by adding the LEB128 size to the offset. This required an extra parameter to DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(...) which is now removed. I added "lldb::offset_t DWARFDebugInfoEntry::GetFirstAttributeOffset() const" which calculates this when we need it and modified all sites that need the offset to call it.

Now that we aren't decoding and verifying the abbrev index, it speeds up DWARF access by 1% to 2%.

Differential Revision: https://reviews.llvm.org/D62634

llvm-svn: 362103
---
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 51 ++++++-------------
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    |  5 +-
 2 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index 76aaf5212391d..c8684d20acd40 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -12,6 +12,8 @@
 
 #include <algorithm>
 
+#include "llvm/Support/LEB128.h"
+
 #include "lldb/Core/Module.h"
 #include "lldb/Expression/DWARFExpression.h"
 #include "lldb/Symbol/ObjectFile.h"
@@ -239,15 +241,14 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
   std::vector<DIERef> die_refs;
   bool set_frame_base_loclist_addr = false;
 
-  lldb::offset_t offset;
-  const DWARFAbbreviationDeclaration *abbrevDecl =
-      GetAbbreviationDeclarationPtr(cu, offset);
+  auto abbrevDecl = GetAbbreviationDeclarationPtr(cu);
 
   SymbolFileDWARF *dwarf2Data = cu->GetSymbolFileDWARF();
   lldb::ModuleSP module = dwarf2Data->GetObjectFile()->GetModule();
 
   if (abbrevDecl) {
     const DWARFDataExtractor &debug_info_data = cu->GetData();
+    lldb::offset_t offset = GetFirstAttributeOffset();
 
     if (!debug_info_data.ValidOffset(offset))
       return false;
@@ -561,13 +562,10 @@ void DWARFDebugInfoEntry::DumpAttribute(
 size_t DWARFDebugInfoEntry::GetAttributes(
     const DWARFUnit *cu, DWARFAttributes &attributes,
     uint32_t curr_depth) const {
-  const DWARFAbbreviationDeclaration *abbrevDecl = nullptr;
-  lldb::offset_t offset = 0;
-  if (cu)
-    abbrevDecl = GetAbbreviationDeclarationPtr(cu, offset);
-
+  auto abbrevDecl = GetAbbreviationDeclarationPtr(cu);
   if (abbrevDecl) {
     const DWARFDataExtractor &debug_info_data = cu->GetData();
+    lldb::offset_t offset = GetFirstAttributeOffset();
 
     const uint32_t num_attributes = abbrevDecl->NumAttributes();
     for (uint32_t i = 0; i < num_attributes; ++i) {
@@ -631,15 +629,14 @@ dw_offset_t DWARFDebugInfoEntry::GetAttributeValue(
                              form_value, end_attr_offset_ptr,
                              check_specification_or_abstract_origin);
 
-  lldb::offset_t offset;
-  const DWARFAbbreviationDeclaration *abbrevDecl =
-      GetAbbreviationDeclarationPtr(cu, offset);
+  auto abbrevDecl = GetAbbreviationDeclarationPtr(cu);
 
   if (abbrevDecl) {
     uint32_t attr_idx = abbrevDecl->FindAttributeIndex(attr);
 
     if (attr_idx != DW_INVALID_INDEX) {
       const DWARFDataExtractor &debug_info_data = cu->GetData();
+      lldb::offset_t offset = GetFirstAttributeOffset();
 
       uint32_t idx = 0;
       while (idx < attr_idx)
@@ -1244,35 +1241,17 @@ bool DWARFDebugInfoEntry::LookupAddress(const dw_addr_t address,
   return found_address;
 }
 
+lldb::offset_t DWARFDebugInfoEntry::GetFirstAttributeOffset() const {
+  return GetOffset() + llvm::getULEB128Size(m_abbr_idx);
+}
+
 const DWARFAbbreviationDeclaration *
-DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(
-    const DWARFUnit *cu, lldb::offset_t &offset) const {
+DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const {
   if (cu) {
-    offset = GetOffset();
-
     const DWARFAbbreviationDeclarationSet *abbrev_set = cu->GetAbbreviations();
-    if (abbrev_set) {
-      const DWARFAbbreviationDeclaration *abbrev_decl =
-          abbrev_set->GetAbbreviationDeclaration(m_abbr_idx);
-      if (abbrev_decl) {
-        // Make sure the abbreviation code still matches. If it doesn't and the
-        // DWARF data was mmap'ed, the backing file might have been modified
-        // which is bad news.
-        const uint64_t abbrev_code = cu->GetData().GetULEB128(&offset);
-
-        if (abbrev_decl->Code() == abbrev_code)
-          return abbrev_decl;
-
-        SymbolFileDWARF *dwarf2Data = cu->GetSymbolFileDWARF();
-
-        dwarf2Data->GetObjectFile()->GetModule()->ReportErrorIfModifyDetected(
-            "0x%8.8x: the DWARF debug information has been modified (abbrev "
-            "code was %u, and is now %u)",
-            GetOffset(), (uint32_t)abbrev_decl->Code(), (uint32_t)abbrev_code);
-      }
-    }
+    if (abbrev_set)
+      return abbrev_set->GetAbbreviationDeclaration(m_abbr_idx);
   }
-  offset = DW_INVALID_OFFSET;
   return nullptr;
 }
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index 1d2eb3122b587..77a97577883d4 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -118,8 +118,9 @@ class DWARFDebugInfoEntry {
       lldb_private::DWARFExpression *frame_base = nullptr) const;
 
   const DWARFAbbreviationDeclaration *
-  GetAbbreviationDeclarationPtr(const DWARFUnit *cu,
-                                lldb::offset_t &offset) const;
+  GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const;
+
+  lldb::offset_t GetFirstAttributeOffset() const;
 
   dw_tag_t Tag() const { return m_tag; }
 

From 913604a637d59a44ce81601a024034b4b0221ec6 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Thu, 30 May 2019 15:26:37 +0000
Subject: [PATCH 0626/1176] [NFC][ARM][ParallelDSP] Refactor narrow sequence

Most of the code used for finding a 'narrow' sequence is not used,
so I've removed it and simplified the calls from the smlad matcher.

llvm-svn: 362104
---
 llvm/lib/Target/ARM/ARMParallelDSP.cpp | 67 ++++++++------------------
 1 file changed, 19 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index beb44fb12e957..3cff9b56851c6 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -248,45 +248,6 @@ namespace {
   };
 }
 
-// MaxBitwidth: the maximum supported bitwidth of the elements in the DSP
-// instructions, which is set to 16. So here we should collect all i8 and i16
-// narrow operations.
-// TODO: we currently only collect i16, and will support i8 later, so that's
-// why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth.
-template<unsigned MaxBitWidth>
-static bool IsNarrowSequence(Value *V, ValueList &VL) {
-  ConstantInt *CInt;
-
-  if (match(V, m_ConstantInt(CInt))) {
-    // TODO: if a constant is used, it needs to fit within the bit width.
-    return false;
-  }
-
-  auto *I = dyn_cast<Instruction>(V);
-  if (!I)
-   return false;
-
-  Value *Val, *LHS, *RHS;
-  if (match(V, m_Trunc(m_Value(Val)))) {
-    if (cast<TruncInst>(I)->getDestTy()->getIntegerBitWidth() == MaxBitWidth)
-      return IsNarrowSequence<MaxBitWidth>(Val, VL);
-  } else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) {
-    // TODO: we need to implement sadd16/sadd8 for this, which enables to
-    // also do the rewrite for smlad8.ll, but it is unsupported for now.
-    return false;
-  } else if (match(V, m_ZExtOrSExt(m_Value(Val)))) {
-    if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
-      return false;
-
-    if (match(Val, m_Load(m_Value()))) {
-      VL.push_back(Val);
-      VL.push_back(I);
-      return true;
-    }
-  }
-  return false;
-}
-
 template<typename MemInst>
 static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
                                   const DataLayout &DL, ScalarEvolution &SE) {
@@ -507,6 +468,18 @@ bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
   return false;
 }
 
+template<typename InstType, unsigned BitWidth>
+bool IsExtendingLoad(Value *V) {
+  auto *I = dyn_cast<InstType>(V);
+  if (!I)
+    return false;
+
+  if (I->getSrcTy()->getIntegerBitWidth() != BitWidth)
+    return false;
+
+  return isa<LoadInst>(I->getOperand(0));
+}
+
 static void MatchParallelMACSequences(Reduction &R,
                                       OpChainList &Candidates) {
   Instruction *Acc = R.AccIntAdd;
@@ -526,15 +499,13 @@ static void MatchParallelMACSequences(Reduction &R,
         return true;
       break;
     case Instruction::Mul: {
-      Value *MulOp0 = I->getOperand(0);
-      Value *MulOp1 = I->getOperand(1);
-      if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1)) {
-        ValueList LHS;
-        ValueList RHS;
-        if (IsNarrowSequence<16>(MulOp0, LHS) &&
-            IsNarrowSequence<16>(MulOp1, RHS)) {
-          Candidates.push_back(make_unique<BinOpChain>(I, LHS, RHS));
-        }
+      Value *Op0 = I->getOperand(0);
+      Value *Op1 = I->getOperand(1);
+      if (IsExtendingLoad<SExtInst, 16>(Op0) &&
+          IsExtendingLoad<SExtInst, 16>(Op1)) {
+        ValueList LHS = { cast<SExtInst>(Op0)->getOperand(0), Op0 };
+        ValueList RHS = { cast<SExtInst>(Op1)->getOperand(0), Op1 };
+        Candidates.push_back(make_unique<BinOpChain>(I, LHS, RHS));
       }
       return false;
     }

From e6ddde57e27e03054fd0a7cd627ce7917559a690 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Thu, 30 May 2019 15:32:33 +0000
Subject: [PATCH 0627/1176] Fix a regression in DWARF access speed caused by
 svn revision 356190

The issue was caused by the error checking code that was added. It was incorrectly adding an extra abbreviation when DWARFEnumState::Complete was received since it would push an extra abbreviation onto the list with the abbreviation code of zero. This cause m_idx_offset in each DWARFAbbreviationDeclarationSet to be set to UINT32_MAX. This valid indicates we must linearly search for attributes, not access them in O(1) time. This caused every DWARFDebugInfoEntry that would try to get its DWARFAbbreviationDeclaration from the CU's DWARFAbbreviationDeclarationSet to always linearly search the abbreviation set for a given abbreviation code. Easy to see why this would cause things to be slow.

This regression was caused by: https://reviews.llvm.org/D59370. I asked to ensure there was no regression is parsing or access speed, but that must not have been done. In my test with 40 DWARF files trying to set a breakpoint by function name and in a header file, I see a 8% speed improvement with this fix.

There was no regression in correctness, just very inefficient access.

Added full unit testing for DWARFAbbreviationDeclarationSet parsing to ensure this doesn't regress.

Differential Revision: https://reviews.llvm.org/D62630

llvm-svn: 362105
---
 .../SymbolFile/DWARF/DWARFDebugAbbrev.cpp     |  14 +-
 .../SymbolFile/DWARF/DWARFDebugAbbrev.h       |   4 +
 .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp | 253 ++++++++++++++++++
 3 files changed, 263 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
index e9aa8f360bcab..26301566a8e14 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.cpp
@@ -29,21 +29,19 @@ DWARFAbbreviationDeclarationSet::extract(const DWARFDataExtractor &data,
   Clear();
   DWARFAbbreviationDeclaration abbrevDeclaration;
   dw_uleb128_t prev_abbr_code = 0;
-  DWARFEnumState state = DWARFEnumState::MoreItems;
-  while (state == DWARFEnumState::MoreItems) {
+  while (true) {
     llvm::Expected<DWARFEnumState> es =
         abbrevDeclaration.extract(data, offset_ptr);
     if (!es)
       return es.takeError();
-
-    state = *es;
+    if (*es == DWARFEnumState::Complete)
+      break;
     m_decls.push_back(abbrevDeclaration);
     if (m_idx_offset == 0)
       m_idx_offset = abbrevDeclaration.Code();
-    else {
-      if (prev_abbr_code + 1 != abbrevDeclaration.Code())
-        m_idx_offset =
-            UINT32_MAX; // Out of order indexes, we can't do O(1) lookups...
+    else if (prev_abbr_code + 1 != abbrevDeclaration.Code()) {
+      // Out of order indexes, we can't do O(1) lookups...
+      m_idx_offset = UINT32_MAX;
     }
     prev_abbr_code = abbrevDeclaration.Code();
   }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h
index 391df8de4bace..9c4729326081c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h
@@ -45,6 +45,10 @@ class DWARFAbbreviationDeclarationSet {
   const DWARFAbbreviationDeclaration *
   GetAbbreviationDeclaration(dw_uleb128_t abbrCode) const;
 
+  /// Unit test accessor functions.
+  /// @{
+  uint32_t GetIndexOffset() const { return m_idx_offset; }
+  /// @}
 private:
   dw_offset_t m_offset;
   uint32_t m_idx_offset;
diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index cca5a612e713a..f0dd24764ae2c 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -15,6 +15,9 @@
 #include "llvm/Support/Path.h"
 
 #include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h"
+#include "Plugins/SymbolFile/DWARF/DWARFAbbreviationDeclaration.h"
+#include "Plugins/SymbolFile/DWARF/DWARFDataExtractor.h"
+#include "Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h"
 #include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h"
 #include "Plugins/SymbolFile/PDB/SymbolFilePDB.h"
 #include "TestingSupport/TestUtilities.h"
@@ -28,8 +31,13 @@
 #include "lldb/Symbol/LineTable.h"
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/DataEncoder.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/StreamString.h"
 
+
+
+using namespace lldb;
 using namespace lldb_private;
 
 class SymbolFileDWARFTests : public testing::Test {
@@ -76,3 +84,248 @@ TEST_F(SymbolFileDWARFTests, TestAbilitiesForDWARF) {
   uint32_t expected_abilities = SymbolFile::kAllAbilities;
   EXPECT_EQ(expected_abilities, symfile->CalculateAbilities());
 }
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start1) {
+  // Test that if we have a .debug_abbrev that contains ordered abbreviation
+  // codes that start at 1, that we get O(1) access.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(2); // Abbrev code 2
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+ 
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we have O(1) access to each abbreviation by making sure the
+  // index offset is 1 and not UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), 1);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(1);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(2);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOrder1Start5) {
+  // Test that if we have a .debug_abbrev that contains ordered abbreviation
+  // codes that start at 5, that we get O(1) access.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(5); // Abbrev code 5
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(6); // Abbrev code 6
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we have O(1) access to each abbreviation by making sure the
+  // index offset is 5 and not UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), 5);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(5);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(6);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevOutOfOrder) {
+  // Test that if we have a .debug_abbrev that contains unordered abbreviation
+  // codes, that we can access the information correctly.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(2); // Abbrev code 2
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_yes);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_subprogram);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  EXPECT_FALSE(bool(error));
+  // Make sure we don't have O(1) access to each abbreviation by making sure
+  // the index offset is UINT32_MAX
+  EXPECT_EQ(abbrev_set.GetIndexOffset(), UINT32_MAX);
+  
+  auto abbrev1 = abbrev_set.GetAbbreviationDeclaration(2);
+  EXPECT_EQ(abbrev1->Tag(), DW_TAG_compile_unit);
+  EXPECT_TRUE(abbrev1->HasChildren());
+  EXPECT_EQ(abbrev1->NumAttributes(), 1);
+  auto abbrev2 = abbrev_set.GetAbbreviationDeclaration(1);
+  EXPECT_EQ(abbrev2->Tag(), DW_TAG_subprogram);
+  EXPECT_FALSE(abbrev2->HasChildren());
+  EXPECT_EQ(abbrev2->NumAttributes(), 1);
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevInvalidNULLTag) {
+  // Test that we detect when an abbreviation has a NULL tag and that we get
+  // an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(0); // Invalid NULL tag here!
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("abbrev decl requires non-null tag.",
+            llvm::toString(std::move(error)));
+
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevNullAttrValidForm) {
+  // Test that we detect when an abbreviation has a NULL attribute and a non
+  // NULL form and that we get an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(0); // Invalid NULL DW_AT
+  encoder.PutULEB128(DW_FORM_strp); // With a valid form
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("malformed abbreviation declaration attribute",
+            llvm::toString(std::move(error)));
+  
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevValidAttrNullForm) {
+  // Test that we detect when an abbreviation has a valid attribute and a
+  // NULL form and that we get an error when decoding.
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name); // Valid attribute
+  encoder.PutULEB128(0); // NULL form
+  encoder.PutULEB128(0);
+  encoder.PutULEB128(0);
+  
+  encoder.PutULEB128(0); // Abbrev code 0 (termination)
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("malformed abbreviation declaration attribute",
+            llvm::toString(std::move(error)));
+}
+
+TEST_F(SymbolFileDWARFTests, TestAbbrevMissingTerminator) {
+  // Test that we detect when an abbreviation has a valid attribute and a
+  // form, but is missing the NULL attribute and form that terminates an
+  // abbreviation
+  
+  const auto byte_order = eByteOrderLittle;
+  const uint8_t addr_size = 4;
+  StreamString encoder(Stream::eBinary, addr_size, byte_order);
+  encoder.PutULEB128(1); // Abbrev code 1
+  encoder.PutULEB128(DW_TAG_compile_unit);
+  encoder.PutHex8(DW_CHILDREN_no);
+  encoder.PutULEB128(DW_AT_name);
+  encoder.PutULEB128(DW_FORM_strp);
+  // Don't add the NULL DW_AT and NULL DW_FORM terminator
+  
+  DWARFDataExtractor data;
+  data.SetData(encoder.GetData(), encoder.GetSize(), byte_order);
+  DWARFAbbreviationDeclarationSet abbrev_set;
+  lldb::offset_t data_offset = 0;
+  llvm::Error error = abbrev_set.extract(data, &data_offset);
+  // Verify we get an error
+  EXPECT_TRUE(bool(error));
+  EXPECT_EQ("abbreviation declaration attribute list not terminated with a "
+            "null entry", llvm::toString(std::move(error)));
+}

From d0f34fd19850c79f8741301c3bfe0312ca24324c Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Thu, 30 May 2019 15:38:02 +0000
Subject: [PATCH 0628/1176] Revert "clang support gnu asm goto."

This reverts commit 954ec09aed4f2be04bb5f4e10dbb4ea8bd19ef9a.

Reverting due to test failures as requested by Jennifer Yu.

Conflicts:
	clang/test/CodeGen/asm-goto.c

llvm-svn: 362106
---
 clang/include/clang/AST/Stmt.h                |  51 +------
 .../clang/Basic/DiagnosticParseKinds.td       |   4 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  10 +-
 clang/include/clang/Sema/Sema.h               |   1 -
 clang/lib/AST/ASTImporter.cpp                 |  10 +-
 clang/lib/AST/Stmt.cpp                        |  29 +---
 clang/lib/AST/StmtPrinter.cpp                 |  20 +--
 clang/lib/AST/StmtProfile.cpp                 |   3 -
 clang/lib/Analysis/CFG.cpp                    |  74 +++-------
 clang/lib/CodeGen/CGStmt.cpp                  | 126 ++++++------------
 clang/lib/Parse/ParseStmtAsm.cpp              |  68 ++--------
 clang/lib/Sema/JumpDiagnostics.cpp            | 114 +++++++---------
 clang/lib/Sema/SemaStmtAsm.cpp                |  48 +------
 clang/lib/Sema/TreeTransform.h                |  16 +--
 clang/lib/Serialization/ASTReaderStmt.cpp     |   7 -
 clang/lib/Serialization/ASTWriterStmt.cpp     |   4 -
 clang/test/Analysis/asm-goto.cpp              |  52 --------
 clang/test/CodeGen/asm-goto.c                 |  19 ---
 clang/test/CodeGen/asm.c                      |  12 --
 clang/test/CodeGen/inline-asm-mixed-style.c   |  10 +-
 clang/test/Coverage/c-language-features.inc   |   4 +-
 clang/test/PCH/asm.h                          |   6 +-
 clang/test/Parser/asm.c                       |  50 -------
 clang/test/Parser/asm.cpp                     |  51 -------
 clang/test/Sema/asm-goto.cpp                  |  45 -------
 clang/test/Sema/asm.c                         |  21 ---
 clang/test/Sema/inline-asm-validate-tmpl.cpp  |  10 --
 clang/test/Sema/scope-check.c                 |  16 ---
 28 files changed, 148 insertions(+), 733 deletions(-)
 delete mode 100644 clang/test/Analysis/asm-goto.cpp
 delete mode 100644 clang/test/CodeGen/asm-goto.c
 delete mode 100644 clang/test/Sema/asm-goto.cpp

diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index fe5d802688466..77b2173fcb878 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -46,7 +46,6 @@ class Attr;
 class CapturedDecl;
 class Decl;
 class Expr;
-class AddrLabelExpr;
 class LabelDecl;
 class ODRHash;
 class PrinterHelper;
@@ -2817,15 +2816,13 @@ class GCCAsmStmt : public AsmStmt {
   StringLiteral **Constraints = nullptr;
   StringLiteral **Clobbers = nullptr;
   IdentifierInfo **Names = nullptr;
-  unsigned NumLabels = 0;
 
 public:
   GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple,
              bool isvolatile, unsigned numoutputs, unsigned numinputs,
              IdentifierInfo **names, StringLiteral **constraints, Expr **exprs,
              StringLiteral *asmstr, unsigned numclobbers,
-             StringLiteral **clobbers, unsigned numlabels,
-             SourceLocation rparenloc);
+             StringLiteral **clobbers, SourceLocation rparenloc);
 
   /// Build an empty inline-assembly statement.
   explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty) {}
@@ -2950,51 +2947,6 @@ class GCCAsmStmt : public AsmStmt {
     return const_cast<GCCAsmStmt*>(this)->getInputExpr(i);
   }
 
-  //===--- Labels ---===//
-
-  bool isAsmGoto() const {
-    return NumLabels > 0;
-  }
-
-  unsigned getNumLabels() const {
-    return NumLabels;
-  }
-
-  IdentifierInfo *getLabelIdentifier(unsigned i) const {
-    return Names[i + NumInputs];
-  }
-
-  AddrLabelExpr *getLabelExpr(unsigned i) const;
-  StringRef getLabelName(unsigned i) const;
-  using labels_iterator = CastIterator<AddrLabelExpr>;
-  using const_labels_iterator = ConstCastIterator<AddrLabelExpr>;
-  using labels_range = llvm::iterator_range<labels_iterator>;
-  using labels_const_range = llvm::iterator_range<const_labels_iterator>;
-
-  labels_iterator begin_labels() {
-    return &Exprs[0] + NumInputs;
-  }
-
-  labels_iterator end_labels() {
-    return &Exprs[0] + NumInputs + NumLabels;
-  }
-
-  labels_range labels() {
-    return labels_range(begin_labels(), end_labels());
-  }
-
-  const_labels_iterator begin_labels() const {
-    return &Exprs[0] + NumInputs;
-  }
-
-  const_labels_iterator end_labels() const {
-    return &Exprs[0] + NumInputs + NumLabels;
-  }
-
-  labels_const_range labels() const {
-    return labels_const_range(begin_labels(), end_labels());
-  }
-
 private:
   void setOutputsAndInputsAndClobbers(const ASTContext &C,
                                       IdentifierInfo **Names,
@@ -3002,7 +2954,6 @@ class GCCAsmStmt : public AsmStmt {
                                       Stmt **Exprs,
                                       unsigned NumOutputs,
                                       unsigned NumInputs,
-                                      unsigned NumLabels,
                                       StringLiteral **Clobbers,
                                       unsigned NumClobbers);
 
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 15a5ecf177514..fb281a5be86a5 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -27,8 +27,8 @@ def err_msasm_unable_to_create_target : Error<
   "MS-style inline assembly is not available: %0">;
 def err_gnu_inline_asm_disabled : Error<
   "GNU-style inline assembly is disabled">;
-def err_asm_goto_cannot_have_output : Error<
-  "'asm goto' cannot have output constraints">;
+def err_asm_goto_not_supported_yet : Error<
+  "'asm goto' constructs are not supported yet">;
 }
 
 let CategoryName = "Parse Issue" in {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c5a7b93cec28a..1beb7fda9bfd7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5070,12 +5070,12 @@ def warn_cxx98_compat_switch_into_protected_scope : Warning<
 def err_indirect_goto_without_addrlabel : Error<
   "indirect goto in function with no address-of-label expressions">;
 def err_indirect_goto_in_protected_scope : Error<
-  "cannot jump from this %select{indirect|asm}0 goto statement to one of its possible targets">;
+  "cannot jump from this indirect goto statement to one of its possible targets">;
 def warn_cxx98_compat_indirect_goto_in_protected_scope : Warning<
-  "jump from this %select{indirect|asm}0 goto statement to one of its possible targets "
+  "jump from this indirect goto statement to one of its possible targets "
   "is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
 def note_indirect_goto_target : Note<
-  "possible target of %select{indirect|asm}0 goto statement">;
+  "possible target of indirect goto statement">;
 def note_protected_by_variable_init : Note<
   "jump bypasses variable initialization">;
 def note_protected_by_variable_nontriv_destructor : Note<
@@ -7503,10 +7503,6 @@ let CategoryName = "Inline Assembly Issue" in {
     "use constraint modifier \"%0\"">;
   def note_asm_input_duplicate_first : Note<
     "constraint '%0' is already present here">;
- def error_duplicate_asm_operand_name : Error<
-    "duplicate use of asm operand name \"%0\"">;
- def note_duplicate_asm_operand_name : Note<
-    "asm operand name \"%0\" first referenced here">;
 }
 
   def error_inoutput_conflict_with_clobber : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d25d7decf3b38..d7486ec1c2617 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3971,7 +3971,6 @@ class Sema {
                              unsigned NumInputs, IdentifierInfo **Names,
                              MultiExprArg Constraints, MultiExprArg Exprs,
                              Expr *AsmString, MultiExprArg Clobbers,
-                             unsigned NumLabels,
                              SourceLocation RParenLoc);
 
   void FillInlineAsmIdentifierInfo(Expr *Res,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 1f1ec1d687c2e..2e4c304b3de20 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -5592,17 +5592,12 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       return InputOrErr.takeError();
   }
 
-  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs() +
-                               S->getNumLabels());
+  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
   if (Error Err = ImportContainerChecked(S->outputs(), Exprs))
     return std::move(Err);
 
-  if (Error Err =
-          ImportArrayChecked(S->inputs(), Exprs.begin() + S->getNumOutputs()))
-    return std::move(Err);
-
   if (Error Err = ImportArrayChecked(
-          S->labels(), Exprs.begin() + S->getNumOutputs() + S->getNumInputs()))
+      S->inputs(), Exprs.begin() + S->getNumOutputs()))
     return std::move(Err);
 
   ExpectedSLoc AsmLocOrErr = import(S->getAsmLoc());
@@ -5628,7 +5623,6 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       *AsmStrOrErr,
       S->getNumClobbers(),
       Clobbers.data(),
-      S->getNumLabels(),
       *RParenLocOrErr);
 }
 
diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index 0a4d403106bd4..68a5a2d6ab0ed 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -444,14 +444,6 @@ void GCCAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
 
-AddrLabelExpr *GCCAsmStmt::getLabelExpr(unsigned i) const {
-  return cast<AddrLabelExpr>(Exprs[i + NumInputs]);
-}
-
-StringRef GCCAsmStmt::getLabelName(unsigned i) const {
-  return getLabelExpr(i)->getLabel()->getName();
-}
-
 /// getInputConstraint - Return the specified input constraint.  Unlike output
 /// constraints, these can be empty.
 StringRef GCCAsmStmt::getInputConstraint(unsigned i) const {
@@ -464,16 +456,13 @@ void GCCAsmStmt::setOutputsAndInputsAndClobbers(const ASTContext &C,
                                                 Stmt **Exprs,
                                                 unsigned NumOutputs,
                                                 unsigned NumInputs,
-                                                unsigned NumLabels,
                                                 StringLiteral **Clobbers,
                                                 unsigned NumClobbers) {
   this->NumOutputs = NumOutputs;
   this->NumInputs = NumInputs;
   this->NumClobbers = NumClobbers;
-  this->NumLabels = NumLabels;
-  assert(!(NumOutputs && NumLabels) && "asm goto cannot have outputs");
 
-  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
+  unsigned NumExprs = NumOutputs + NumInputs;
 
   C.Deallocate(this->Names);
   this->Names = new (C) IdentifierInfo*[NumExprs];
@@ -509,10 +498,6 @@ int GCCAsmStmt::getNamedOperand(StringRef SymbolicName) const {
     if (getInputName(i) == SymbolicName)
       return getNumOutputs() + NumPlusOperands + i;
 
-  for (unsigned i = 0, e = getNumLabels(); i != e; ++i)
-    if (getLabelName(i) == SymbolicName)
-      return i + getNumInputs();
-
   // Not found.
   return -1;
 }
@@ -630,8 +615,8 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
       while (CurPtr != StrEnd && isDigit(*CurPtr))
         N = N*10 + ((*CurPtr++)-'0');
 
-      unsigned NumOperands = getNumOutputs() + getNumPlusOperands() +
-                             getNumInputs() + getNumLabels();
+      unsigned NumOperands =
+        getNumOutputs() + getNumPlusOperands() + getNumInputs();
       if (N >= NumOperands) {
         DiagOffs = CurPtr-StrStart-1;
         return diag::err_asm_invalid_operand_number;
@@ -744,12 +729,10 @@ GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc,
                        unsigned numinputs, IdentifierInfo **names,
                        StringLiteral **constraints, Expr **exprs,
                        StringLiteral *asmstr, unsigned numclobbers,
-                       StringLiteral **clobbers, unsigned numlabels,
-                       SourceLocation rparenloc)
+                       StringLiteral **clobbers, SourceLocation rparenloc)
     : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-              numinputs, numclobbers),
-              RParenLoc(rparenloc), AsmStr(asmstr), NumLabels(numlabels) {
-  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
+              numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
+  unsigned NumExprs = NumOutputs + NumInputs;
 
   Names = new (C) IdentifierInfo*[NumExprs];
   std::copy(names, names + NumExprs, Names);
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 563095f89b9b3..7fe0be5217dbc 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -414,15 +414,12 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   if (Node->isVolatile())
     OS << "volatile ";
 
-  if (Node->isAsmGoto())
-    OS << "goto ";
-
   OS << "(";
   VisitStringLiteral(Node->getAsmString());
 
   // Outputs
   if (Node->getNumOutputs() != 0 || Node->getNumInputs() != 0 ||
-      Node->getNumClobbers() != 0 || Node->getNumLabels() != 0)
+      Node->getNumClobbers() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumOutputs(); i != e; ++i) {
@@ -442,8 +439,7 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Inputs
-  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0 ||
-      Node->getNumLabels() != 0)
+  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumInputs(); i != e; ++i) {
@@ -463,7 +459,7 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Clobbers
-  if (Node->getNumClobbers() != 0 || Node->getNumLabels())
+  if (Node->getNumClobbers() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumClobbers(); i != e; ++i) {
@@ -473,16 +469,6 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
     VisitStringLiteral(Node->getClobberStringLiteral(i));
   }
 
-  // Labels
-  if (Node->getNumLabels() != 0)
-    OS << " : ";
-
-  for (unsigned i = 0, e = Node->getNumLabels(); i != e; ++i) {
-    if (i != 0)
-      OS << ", ";
-    OS << Node->getLabelName(i);
-  }
-
   OS << ");";
   if (Policy.IncludeNewlines) OS << NL;
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index c5da5bfda9cb5..93bdcac8b5496 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -321,9 +321,6 @@ void StmtProfiler::VisitGCCAsmStmt(const GCCAsmStmt *S) {
   ID.AddInteger(S->getNumClobbers());
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     VisitStringLiteral(S->getClobberStringLiteral(I));
-  ID.AddInteger(S->getNumLabels());
-  for (auto *L : S->labels())
-    VisitDecl(L->getLabel());
 }
 
 void StmtProfiler::VisitMSAsmStmt(const MSAsmStmt *S) {
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index b53bfcca37cd4..1d83359341528 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -549,7 +549,6 @@ class CFGBuilder {
   CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, AddStmtChoice asc);
   CFGBlock *VisitForStmt(ForStmt *F);
   CFGBlock *VisitGotoStmt(GotoStmt *G);
-  CFGBlock *VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc);
   CFGBlock *VisitIfStmt(IfStmt *I);
   CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc);
   CFGBlock *VisitConstantExpr(ConstantExpr *E, AddStmtChoice asc);
@@ -1479,38 +1478,22 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
                                    E = BackpatchBlocks.end(); I != E; ++I ) {
 
     CFGBlock *B = I->block;
-    if (auto *G = dyn_cast<GotoStmt>(B->getTerminator())) {
-      LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
-      // If there is no target for the goto, then we are looking at an
-      // incomplete AST.  Handle this by not registering a successor.
-      if (LI == LabelMap.end())
-        continue;
-      JumpTarget JT = LI->second;
-      prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
-                                                JT.scopePosition);
-      prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
-                                             JT.scopePosition);
-      const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
-          B, I->scopePosition, JT.scopePosition);
-      appendScopeBegin(JT.block, VD, G);
-      addSuccessor(B, JT.block);
-    };
-    if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) {
-      CFGBlock *Successor  = (I+1)->block;
-      for (auto *L : G->labels()) {
-        LabelMapTy::iterator LI = LabelMap.find(L->getLabel());
-        // If there is no target for the goto, then we are looking at an
-        // incomplete AST.  Handle this by not registering a successor.
-        if (LI == LabelMap.end())
-          continue;
-        JumpTarget JT = LI->second;
-        // Successor has been added, so skip it.
-        if (JT.block == Successor)
-          continue;
-        addSuccessor(B, JT.block);
-      }
-      I++;
-    }
+    const GotoStmt *G = cast<GotoStmt>(B->getTerminator());
+    LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
+
+    // If there is no target for the goto, then we are looking at an
+    // incomplete AST.  Handle this by not registering a successor.
+    if (LI == LabelMap.end()) continue;
+
+    JumpTarget JT = LI->second;
+    prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
+                                              JT.scopePosition);
+    prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
+                                           JT.scopePosition);
+    const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
+        B, I->scopePosition, JT.scopePosition);
+    appendScopeBegin(JT.block, VD, G);
+    addSuccessor(B, JT.block);
   }
 
   // Add successors to the Indirect Goto Dispatch block (if we have one).
@@ -2159,9 +2142,6 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc) {
     case Stmt::GotoStmtClass:
       return VisitGotoStmt(cast<GotoStmt>(S));
 
-    case Stmt::GCCAsmStmtClass:
-      return VisitGCCAsmStmt(cast<GCCAsmStmt>(S), asc);
-
     case Stmt::IfStmtClass:
       return VisitIfStmt(cast<IfStmt>(S));
 
@@ -3166,28 +3146,6 @@ CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) {
   return Block;
 }
 
-CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) {
-  // Goto is a control-flow statement.  Thus we stop processing the current
-  // block and create a new one.
-
-  if (!G->isAsmGoto())
-    return VisitStmt(G, asc);
-
-  if (Block) {
-    Succ = Block;
-    if (badCFG)
-      return nullptr;
-  }
-  Block = createBlock();
-  Block->setTerminator(G);
-  // We will backpatch this block later for all the labels.
-  BackpatchBlocks.push_back(JumpSource(Block, ScopePos));
-  // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is
-  // used to avoid adding "Succ" again.
-  BackpatchBlocks.push_back(JumpSource(Succ, ScopePos));
-  return Block;
-}
-
 CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) {
   CFGBlock *LoopSuccessor = nullptr;
 
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 5c24db709250e..c617b198d76d5 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1896,55 +1896,6 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
   return llvm::MDNode::get(CGF.getLLVMContext(), Locs);
 }
 
-static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
-                              bool ReadOnly, bool ReadNone, const AsmStmt &S,
-                              const std::vector<llvm::Type *> &ResultRegTypes,
-                              CodeGenFunction &CGF,
-                              std::vector<llvm::Value *> &RegResults) {
-  Result.addAttribute(llvm::AttributeList::FunctionIndex,
-                      llvm::Attribute::NoUnwind);
-  // Attach readnone and readonly attributes.
-  if (!HasSideEffect) {
-    if (ReadNone)
-      Result.addAttribute(llvm::AttributeList::FunctionIndex,
-                          llvm::Attribute::ReadNone);
-    else if (ReadOnly)
-      Result.addAttribute(llvm::AttributeList::FunctionIndex,
-                          llvm::Attribute::ReadOnly);
-  }
-
-  // Slap the source location of the inline asm into a !srcloc metadata on the
-  // call.
-  if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S))
-    Result.setMetadata("srcloc",
-                       getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
-  else {
-    // At least put the line number on MS inline asm blobs.
-    llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty,
-                                        S.getAsmLoc().getRawEncoding());
-    Result.setMetadata("srcloc",
-                       llvm::MDNode::get(CGF.getLLVMContext(),
-                                         llvm::ConstantAsMetadata::get(Loc)));
-  }
-
-  if (CGF.getLangOpts().assumeFunctionsAreConvergent())
-    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
-    // convergent (meaning, they may call an intrinsically convergent op, such
-    // as bar.sync, and so can't have certain optimizations applied around
-    // them).
-    Result.addAttribute(llvm::AttributeList::FunctionIndex,
-                        llvm::Attribute::Convergent);
-  // Extract all of the register value results from the asm.
-  if (ResultRegTypes.size() == 1) {
-    RegResults.push_back(&Result);
-  } else {
-    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
-      llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult");
-      RegResults.push_back(Tmp);
-    }
-  }
-}
-
 void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   // Assemble the final asm string.
   std::string AsmString = S.generateAsmString(getContext());
@@ -2187,29 +2138,6 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   }
   Constraints += InOutConstraints;
 
-  // Labels
-  SmallVector<llvm::BasicBlock *, 16> Transfer;
-  llvm::BasicBlock *Fallthrough = nullptr;
-  bool IsGCCAsmGoto = false;
-  if (const auto *GS =  dyn_cast<GCCAsmStmt>(&S)) {
-    IsGCCAsmGoto = GS->isAsmGoto();
-    if (IsGCCAsmGoto) {
-      for (auto *E : GS->labels()) {
-        JumpDest Dest = getJumpDestForLabel(E->getLabel());
-        Transfer.push_back(Dest.getBlock());
-        llvm::BlockAddress *BA =
-            llvm::BlockAddress::get(CurFn, Dest.getBlock());
-        Args.push_back(BA);
-        ArgTypes.push_back(BA->getType());
-        if (!Constraints.empty())
-          Constraints += ',';
-        Constraints += 'X';
-      }
-      StringRef Name = "asm.fallthrough";
-      Fallthrough = createBasicBlock(Name);
-    }
-  }
-
   // Clobbers
   for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
     StringRef Clobber = S.getClobber(i);
@@ -2252,18 +2180,52 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   llvm::InlineAsm *IA =
     llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
                          /* IsAlignStack */ false, AsmDialect);
+  llvm::CallInst *Result =
+      Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
+  Result->addAttribute(llvm::AttributeList::FunctionIndex,
+                       llvm::Attribute::NoUnwind);
+
+  // Attach readnone and readonly attributes.
+  if (!HasSideEffect) {
+    if (ReadNone)
+      Result->addAttribute(llvm::AttributeList::FunctionIndex,
+                           llvm::Attribute::ReadNone);
+    else if (ReadOnly)
+      Result->addAttribute(llvm::AttributeList::FunctionIndex,
+                           llvm::Attribute::ReadOnly);
+  }
+
+  // Slap the source location of the inline asm into a !srcloc metadata on the
+  // call.
+  if (const GCCAsmStmt *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S)) {
+    Result->setMetadata("srcloc", getAsmSrcLocInfo(gccAsmStmt->getAsmString(),
+                                                   *this));
+  } else {
+    // At least put the line number on MS inline asm blobs.
+    auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());
+    Result->setMetadata("srcloc",
+                        llvm::MDNode::get(getLLVMContext(),
+                                          llvm::ConstantAsMetadata::get(Loc)));
+  }
+
+  if (getLangOpts().assumeFunctionsAreConvergent()) {
+    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as bar.sync, and so can't have certain optimizations applied around
+    // them).
+    Result->addAttribute(llvm::AttributeList::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
+  // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
-  if (IsGCCAsmGoto) {
-    llvm::CallBrInst *Result =
-        Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
-    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
-                      ReadNone, S, ResultRegTypes, *this, RegResults);
-    EmitBlock(Fallthrough);
+  if (ResultRegTypes.size() == 1) {
+    RegResults.push_back(Result);
   } else {
-    llvm::CallInst *Result =
-        Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
-    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
-                      ReadNone, S, ResultRegTypes, *this, RegResults);
+    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
+      llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult");
+      RegResults.push_back(Tmp);
+    }
   }
 
   assert(RegResults.size() == ResultRegTypes.size());
diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index 75f3ac396e1a4..c63808a472b5a 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -710,12 +710,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
 
   // Remember if this was a volatile asm.
   bool isVolatile = DS.getTypeQualifiers() & DeclSpec::TQ_volatile;
-  // Remember if this was a goto asm.
-  bool isGotoAsm = false;
 
+  // TODO: support "asm goto" constructs (PR#9295).
   if (Tok.is(tok::kw_goto)) {
-    isGotoAsm = true;
-    ConsumeToken();
+    Diag(Tok, diag::err_asm_goto_not_supported_yet);
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
   }
 
   if (Tok.isNot(tok::l_paren)) {
@@ -753,8 +753,7 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     return Actions.ActOnGCCAsmStmt(AsmLoc, /*isSimple*/ true, isVolatile,
                                    /*NumOutputs*/ 0, /*NumInputs*/ 0, nullptr,
                                    Constraints, Exprs, AsmString.get(),
-                                   Clobbers, /*NumLabels*/ 0,
-                                   T.getCloseLocation());
+                                   Clobbers, T.getCloseLocation());
   }
 
   // Parse Outputs, if present.
@@ -764,12 +763,6 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     AteExtraColon = Tok.is(tok::coloncolon);
     ConsumeToken();
 
-    if (!AteExtraColon && isGotoAsm && Tok.isNot(tok::colon)) {
-      Diag(Tok, diag::err_asm_goto_cannot_have_output);
-      SkipUntil(tok::r_paren, StopAtSemi);
-      return StmtError();
-    }
-
     if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs))
       return StmtError();
   }
@@ -796,15 +789,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
   unsigned NumInputs = Names.size() - NumOutputs;
 
   // Parse the clobbers, if present.
-  if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) {
-    if (AteExtraColon)
-      AteExtraColon = false;
-    else {
-      AteExtraColon = Tok.is(tok::coloncolon);
+  if (AteExtraColon || Tok.is(tok::colon)) {
+    if (!AteExtraColon)
       ConsumeToken();
-    }
+
     // Parse the asm-string list for clobbers if present.
-    if (!AteExtraColon && isTokenStringLiteral()) {
+    if (Tok.isNot(tok::r_paren)) {
       while (1) {
         ExprResult Clobber(ParseAsmStringLiteral());
 
@@ -818,49 +808,11 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
       }
     }
   }
-  if (!isGotoAsm && (Tok.isNot(tok::r_paren) || AteExtraColon)) {
-    Diag(Tok, diag::err_expected) << tok::r_paren;
-    SkipUntil(tok::r_paren, StopAtSemi);
-    return StmtError();
-  }
-
-  // Parse the goto label, if present.
-  unsigned NumLabels = 0;
-  if (AteExtraColon || Tok.is(tok::colon)) {
-    if (!AteExtraColon)
-      ConsumeToken();
 
-    while (true) {
-      if (Tok.isNot(tok::identifier)) {
-        Diag(Tok, diag::err_expected) << tok::identifier;
-        SkipUntil(tok::r_paren, StopAtSemi);
-        return StmtError();
-      }
-      LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(),
-                                                  Tok.getLocation());
-      Names.push_back(Tok.getIdentifierInfo());
-      if (!LD) {
-        SkipUntil(tok::r_paren, StopAtSemi);
-        return StmtError();
-      }
-      ExprResult Res =
-          Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD);
-      Exprs.push_back(Res.get());
-      NumLabels++;
-      ConsumeToken();
-      if (!TryConsumeToken(tok::comma))
-        break;
-    }
-  } else if (isGotoAsm) {
-    Diag(Tok, diag::err_expected) << tok::colon;
-    SkipUntil(tok::r_paren, StopAtSemi);
-    return StmtError();
-  }
   T.consumeClose();
   return Actions.ActOnGCCAsmStmt(
       AsmLoc, false, isVolatile, NumOutputs, NumInputs, Names.data(),
-      Constraints, Exprs, AsmString.get(), Clobbers, NumLabels,
-      T.getCloseLocation());
+      Constraints, Exprs, AsmString.get(), Clobbers, T.getCloseLocation());
 }
 
 /// ParseAsmOperands - Parse the asm-operands production as used by
diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp
index c8743df90e340..2234d6ba9b11f 100644
--- a/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/clang/lib/Sema/JumpDiagnostics.cpp
@@ -65,10 +65,8 @@ class JumpScopeChecker {
   llvm::DenseMap<Stmt*, unsigned> LabelAndGotoScopes;
   SmallVector<Stmt*, 16> Jumps;
 
-  SmallVector<Stmt*, 4> IndirectJumps;
-  SmallVector<Stmt*, 4> AsmJumps;
+  SmallVector<IndirectGotoStmt*, 4> IndirectJumps;
   SmallVector<LabelDecl*, 4> IndirectJumpTargets;
-  SmallVector<LabelDecl*, 4> AsmJumpTargets;
 public:
   JumpScopeChecker(Stmt *Body, Sema &S);
 private:
@@ -78,10 +76,10 @@ class JumpScopeChecker {
   void BuildScopeInformation(Stmt *S, unsigned &origParentScope);
 
   void VerifyJumps();
-  void VerifyIndirectOrAsmJumps(bool IsAsmGoto);
+  void VerifyIndirectJumps();
   void NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes);
-  void DiagnoseIndirectOrAsmJump(Stmt *IG, unsigned IGScope, LabelDecl *Target,
-                                 unsigned TargetScope);
+  void DiagnoseIndirectJump(IndirectGotoStmt *IG, unsigned IGScope,
+                            LabelDecl *Target, unsigned TargetScope);
   void CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc,
                  unsigned JumpDiag, unsigned JumpDiagWarning,
                  unsigned JumpDiagCXX98Compat);
@@ -105,8 +103,7 @@ JumpScopeChecker::JumpScopeChecker(Stmt *Body, Sema &s)
 
   // Check that all jumps we saw are kosher.
   VerifyJumps();
-  VerifyIndirectOrAsmJumps(false);
-  VerifyIndirectOrAsmJumps(true);
+  VerifyIndirectJumps();
 }
 
 /// GetDeepestCommonScope - Finds the innermost scope enclosing the
@@ -319,7 +316,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     }
 
     LabelAndGotoScopes[S] = ParentScope;
-    IndirectJumps.push_back(S);
+    IndirectJumps.push_back(cast<IndirectGotoStmt>(S));
     break;
 
   case Stmt::SwitchStmtClass:
@@ -342,18 +339,6 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     Jumps.push_back(S);
     break;
 
-  case Stmt::GCCAsmStmtClass:
-    if (auto *GS = dyn_cast<GCCAsmStmt>(S))
-      if (GS->isAsmGoto()) {
-        // Remember both what scope a goto is in as well as the fact that we
-        // have it.  This makes the second scan not have to walk the AST again.
-        LabelAndGotoScopes[S] = ParentScope;
-        AsmJumps.push_back(GS);
-        for (auto *E : GS->labels())
-          AsmJumpTargets.push_back(E->getLabel());
-      }
-    break;
-
   case Stmt::IfStmtClass: {
     IfStmt *IS = cast<IfStmt>(S);
     if (!(IS->isConstexpr() || IS->isObjCAvailabilityCheck()))
@@ -644,13 +629,14 @@ void JumpScopeChecker::VerifyJumps() {
   }
 }
 
-/// VerifyIndirectOrAsmJumps - Verify whether any possible indirect goto or
-/// asm goto jump might cross a protection boundary.  Unlike direct jumps,
-/// indirect or asm goto jumps count cleanups as protection boundaries:
-/// since there's no way to know where the jump is going, we can't implicitly
-/// run the right cleanups the way we can with direct jumps.
-/// Thus, an indirect/asm jump is "trivial" if it bypasses no
-/// initializations and no teardowns.  More formally, an indirect/asm jump
+/// VerifyIndirectJumps - Verify whether any possible indirect jump
+/// might cross a protection boundary.  Unlike direct jumps, indirect
+/// jumps count cleanups as protection boundaries:  since there's no
+/// way to know where the jump is going, we can't implicitly run the
+/// right cleanups the way we can with direct jumps.
+///
+/// Thus, an indirect jump is "trivial" if it bypasses no
+/// initializations and no teardowns.  More formally, an indirect jump
 /// from A to B is trivial if the path out from A to DCA(A,B) is
 /// trivial and the path in from DCA(A,B) to B is trivial, where
 /// DCA(A,B) is the deepest common ancestor of A and B.
@@ -662,41 +648,36 @@ void JumpScopeChecker::VerifyJumps() {
 /// Under these definitions, this function checks that the indirect
 /// jump between A and B is trivial for every indirect goto statement A
 /// and every label B whose address was taken in the function.
-void JumpScopeChecker::VerifyIndirectOrAsmJumps(bool IsAsmGoto) {
-  SmallVector<Stmt*, 4> GotoJumps = IsAsmGoto ? AsmJumps : IndirectJumps;
-  if (GotoJumps.empty())
-    return;
-  SmallVector<LabelDecl *, 4> JumpTargets =
-      IsAsmGoto ? AsmJumpTargets : IndirectJumpTargets;
+void JumpScopeChecker::VerifyIndirectJumps() {
+  if (IndirectJumps.empty()) return;
+
   // If there aren't any address-of-label expressions in this function,
   // complain about the first indirect goto.
-  if (JumpTargets.empty()) {
-    assert(!IsAsmGoto &&"only indirect goto can get here");
-    S.Diag(GotoJumps[0]->getBeginLoc(),
+  if (IndirectJumpTargets.empty()) {
+    S.Diag(IndirectJumps[0]->getGotoLoc(),
            diag::err_indirect_goto_without_addrlabel);
     return;
   }
+
   // Collect a single representative of every scope containing an
-  // indirect or asm goto.  For most code bases, this substantially cuts
+  // indirect goto.  For most code bases, this substantially cuts
   // down on the number of jump sites we'll have to consider later.
-  typedef std::pair<unsigned, Stmt*> JumpScope;
+  typedef std::pair<unsigned, IndirectGotoStmt*> JumpScope;
   SmallVector<JumpScope, 32> JumpScopes;
   {
-    llvm::DenseMap<unsigned, Stmt*> JumpScopesMap;
-    for (SmallVectorImpl<Stmt *>::iterator I = GotoJumps.begin(),
-                                           E = GotoJumps.end();
-         I != E; ++I) {
-      Stmt *IG = *I;
+    llvm::DenseMap<unsigned, IndirectGotoStmt*> JumpScopesMap;
+    for (SmallVectorImpl<IndirectGotoStmt*>::iterator
+           I = IndirectJumps.begin(), E = IndirectJumps.end(); I != E; ++I) {
+      IndirectGotoStmt *IG = *I;
       if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(IG)))
         continue;
       unsigned IGScope = LabelAndGotoScopes[IG];
-      Stmt *&Entry = JumpScopesMap[IGScope];
+      IndirectGotoStmt *&Entry = JumpScopesMap[IGScope];
       if (!Entry) Entry = IG;
     }
     JumpScopes.reserve(JumpScopesMap.size());
-    for (llvm::DenseMap<unsigned, Stmt *>::iterator I = JumpScopesMap.begin(),
-                                                    E = JumpScopesMap.end();
-         I != E; ++I)
+    for (llvm::DenseMap<unsigned, IndirectGotoStmt*>::iterator
+           I = JumpScopesMap.begin(), E = JumpScopesMap.end(); I != E; ++I)
       JumpScopes.push_back(*I);
   }
 
@@ -704,8 +685,8 @@ void JumpScopeChecker::VerifyIndirectOrAsmJumps(bool IsAsmGoto) {
   // label whose address was taken somewhere in the function.
   // For most code bases, there will be only one such scope.
   llvm::DenseMap<unsigned, LabelDecl*> TargetScopes;
-  for (SmallVectorImpl<LabelDecl *>::iterator I = JumpTargets.begin(),
-                                              E = JumpTargets.end();
+  for (SmallVectorImpl<LabelDecl*>::iterator
+         I = IndirectJumpTargets.begin(), E = IndirectJumpTargets.end();
        I != E; ++I) {
     LabelDecl *TheLabel = *I;
     if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(TheLabel->getStmt())))
@@ -782,7 +763,7 @@ void JumpScopeChecker::VerifyIndirectOrAsmJumps(bool IsAsmGoto) {
       // Only diagnose if we didn't find something.
       if (IsReachable) continue;
 
-      DiagnoseIndirectOrAsmJump(I->second, I->first, TargetLabel, TargetScope);
+      DiagnoseIndirectJump(I->second, I->first, TargetLabel, TargetScope);
     }
   }
 }
@@ -803,15 +784,12 @@ static bool IsCXX98CompatWarning(Sema &S, unsigned InDiagNote) {
 }
 
 /// Produce primary diagnostic for an indirect jump statement.
-static void DiagnoseIndirectOrAsmJumpStmt(Sema &S, Stmt *Jump,
-                                          LabelDecl *Target, bool &Diagnosed) {
+static void DiagnoseIndirectJumpStmt(Sema &S, IndirectGotoStmt *Jump,
+                                     LabelDecl *Target, bool &Diagnosed) {
   if (Diagnosed)
     return;
-  bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
-  S.Diag(Jump->getBeginLoc(), diag::err_indirect_goto_in_protected_scope)
-      << IsAsmGoto;
-  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
-      << IsAsmGoto;
+  S.Diag(Jump->getGotoLoc(), diag::err_indirect_goto_in_protected_scope);
+  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
   Diagnosed = true;
 }
 
@@ -825,9 +803,10 @@ void JumpScopeChecker::NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes) {
 }
 
 /// Diagnose an indirect jump which is known to cross scopes.
-void JumpScopeChecker::DiagnoseIndirectOrAsmJump(Stmt *Jump, unsigned JumpScope,
-                                                 LabelDecl *Target,
-                                                 unsigned TargetScope) {
+void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
+                                            unsigned JumpScope,
+                                            LabelDecl *Target,
+                                            unsigned TargetScope) {
   if (CHECK_PERMISSIVE(JumpScope == TargetScope))
     return;
 
@@ -837,7 +816,7 @@ void JumpScopeChecker::DiagnoseIndirectOrAsmJump(Stmt *Jump, unsigned JumpScope,
   // Walk out the scope chain until we reach the common ancestor.
   for (unsigned I = JumpScope; I != Common; I = Scopes[I].ParentScope)
     if (Scopes[I].OutDiag) {
-      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].OutDiag);
     }
 
@@ -848,18 +827,15 @@ void JumpScopeChecker::DiagnoseIndirectOrAsmJump(Stmt *Jump, unsigned JumpScope,
     if (IsCXX98CompatWarning(S, Scopes[I].InDiag))
       ToScopesCXX98Compat.push_back(I);
     else if (Scopes[I].InDiag) {
-      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].InDiag);
     }
 
   // Diagnose this jump if it would be ill-formed in C++98.
   if (!Diagnosed && !ToScopesCXX98Compat.empty()) {
-    bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
-    S.Diag(Jump->getBeginLoc(),
-           diag::warn_cxx98_compat_indirect_goto_in_protected_scope)
-        << IsAsmGoto;
-    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
-        << IsAsmGoto;
+    S.Diag(Jump->getGotoLoc(),
+           diag::warn_cxx98_compat_indirect_goto_in_protected_scope);
+    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
     NoteJumpIntoScopes(ToScopesCXX98Compat);
   }
 }
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index ec8958c3c5f90..8c6012573c64f 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -209,12 +209,11 @@ static StringRef extractRegisterName(const Expr *Expression,
 static SourceLocation
 getClobberConflictLocation(MultiExprArg Exprs, StringLiteral **Constraints,
                            StringLiteral **Clobbers, int NumClobbers,
-                           unsigned NumLabels,
                            const TargetInfo &Target, ASTContext &Cont) {
   llvm::StringSet<> InOutVars;
   // Collect all the input and output registers from the extended asm
   // statement in order to check for conflicts with the clobber list
-  for (unsigned int i = 0; i < Exprs.size() - NumLabels; ++i) {
+  for (unsigned int i = 0; i < Exprs.size(); ++i) {
     StringRef Constraint = Constraints[i]->getString();
     StringRef InOutReg = Target.getConstraintRegister(
         Constraint, extractRegisterName(Exprs[i], Target));
@@ -242,7 +241,6 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                                  unsigned NumInputs, IdentifierInfo **Names,
                                  MultiExprArg constraints, MultiExprArg Exprs,
                                  Expr *asmString, MultiExprArg clobbers,
-                                 unsigned NumLabels,
                                  SourceLocation RParenLoc) {
   unsigned NumClobbers = clobbers.size();
   StringLiteral **Constraints =
@@ -271,7 +269,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, NumLabels, RParenLoc);
+                     NumClobbers, Clobbers, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -332,7 +330,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, NumLabels, RParenLoc);
+                     NumClobbers, Clobbers, RParenLoc);
     }
   }
 
@@ -354,7 +352,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, NumLabels, RParenLoc);
+                     NumClobbers, Clobbers, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -453,15 +451,14 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, NumLabels, RParenLoc);
+                     NumClobbers, Clobbers, RParenLoc);
     }
   }
 
   GCCAsmStmt *NS =
     new (Context) GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                              NumInputs, Names, Constraints, Exprs.data(),
-                             AsmString, NumClobbers, Clobbers, NumLabels,
-                             RParenLoc);
+                             AsmString, NumClobbers, Clobbers, RParenLoc);
   // Validate the asm string, ensuring it makes sense given the operands we
   // have.
   SmallVector<GCCAsmStmt::AsmStringPiece, 8> Pieces;
@@ -479,10 +476,8 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
 
     // Look for the correct constraint index.
     unsigned ConstraintIdx = Piece.getOperandNo();
-    // Labels are the last in the Exprs list.
-    if (NS->isAsmGoto() && ConstraintIdx >= NS->getNumInputs())
-      continue;
     unsigned NumOperands = NS->getNumOutputs() + NS->getNumInputs();
+
     // Look for the (ConstraintIdx - NumOperands + 1)th constraint with
     // modifier '+'.
     if (ConstraintIdx >= NumOperands) {
@@ -665,39 +660,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   // Check for conflicts between clobber list and input or output lists
   SourceLocation ConstraintLoc =
       getClobberConflictLocation(Exprs, Constraints, Clobbers, NumClobbers,
-                                 NumLabels,
                                  Context.getTargetInfo(), Context);
   if (ConstraintLoc.isValid())
     targetDiag(ConstraintLoc, diag::error_inoutput_conflict_with_clobber);
 
-  // Check for duplicate asm operand name between input, output and label lists.
-  typedef std::pair<StringRef , Expr *> NamedOperand;
-  SmallVector<NamedOperand, 4> NamedOperandList;
-  for (unsigned i = 0, e = NumOutputs + NumInputs + NumLabels; i != e; ++i)
-    if (Names[i])
-      NamedOperandList.emplace_back(
-          std::make_pair(Names[i]->getName(), Exprs[i]));
-  // Sort NamedOperandList.
-  std::stable_sort(NamedOperandList.begin(), NamedOperandList.end(),
-              [](const NamedOperand &LHS, const NamedOperand &RHS) {
-                return LHS.first < RHS.first;
-              });
-  // Find adjacent duplicate operand.
-  SmallVector<NamedOperand, 4>::iterator Found =
-      std::adjacent_find(begin(NamedOperandList), end(NamedOperandList),
-                         [](const NamedOperand &LHS, const NamedOperand &RHS) {
-                           return LHS.first == RHS.first;
-                         });
-  if (Found != NamedOperandList.end()) {
-    Diag((Found + 1)->second->getBeginLoc(),
-         diag::error_duplicate_asm_operand_name)
-        << (Found + 1)->first;
-    Diag(Found->second->getBeginLoc(), diag::note_duplicate_asm_operand_name)
-        << Found->first;
-    return StmtError();
-  }
-  if (NS->isAsmGoto())
-    setFunctionHasBranchIntoScope();
   return NS;
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 609a3cfcfca1d..6620885f23961 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1373,11 +1373,10 @@ class TreeTransform {
                                unsigned NumInputs, IdentifierInfo **Names,
                                MultiExprArg Constraints, MultiExprArg Exprs,
                                Expr *AsmString, MultiExprArg Clobbers,
-                               unsigned NumLabels,
                                SourceLocation RParenLoc) {
     return getSema().ActOnGCCAsmStmt(AsmLoc, IsSimple, IsVolatile, NumOutputs,
                                      NumInputs, Names, Constraints, Exprs,
-                                     AsmString, Clobbers, NumLabels, RParenLoc);
+                                     AsmString, Clobbers, RParenLoc);
   }
 
   /// Build a new MS style inline asm statement.
@@ -7052,16 +7051,6 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
     Exprs.push_back(Result.get());
   }
 
-  // Go through the Labels.
-  for (unsigned I = 0, E = S->getNumLabels(); I != E; ++I) {
-    Names.push_back(S->getLabelIdentifier(I));
-
-    ExprResult Result = getDerived().TransformExpr(S->getLabelExpr(I));
-    if (Result.isInvalid())
-      return StmtError();
-    ExprsChanged |= Result.get() != S->getLabelExpr(I);
-    Exprs.push_back(Result.get());
-  }
   if (!getDerived().AlwaysRebuild() && !ExprsChanged)
     return S;
 
@@ -7075,8 +7064,7 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
                                         S->isVolatile(), S->getNumOutputs(),
                                         S->getNumInputs(), Names.data(),
                                         Constraints, Exprs, AsmString.get(),
-                                        Clobbers, S->getNumLabels(),
-                                        S->getRParenLoc());
+                                        Clobbers, S->getRParenLoc());
 }
 
 template<typename Derived>
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 52aa3d961d200..4d879b46e1a4a 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -370,14 +370,12 @@ void ASTStmtReader::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
-  S->NumLabels = Record.readInt();
   S->setRParenLoc(ReadSourceLocation());
   S->setAsmString(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
   unsigned NumOutputs = S->getNumOutputs();
   unsigned NumInputs = S->getNumInputs();
   unsigned NumClobbers = S->getNumClobbers();
-  unsigned NumLabels = S->getNumLabels();
 
   // Outputs and inputs
   SmallVector<IdentifierInfo *, 16> Names;
@@ -394,14 +392,9 @@ void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0; I != NumClobbers; ++I)
     Clobbers.push_back(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
-  // Labels
-  for (unsigned I = 0, N = NumLabels; I != N; ++I)
-    Exprs.push_back(Record.readSubStmt());
-
   S->setOutputsAndInputsAndClobbers(Record.getContext(),
                                     Names.data(), Constraints.data(),
                                     Exprs.data(), NumOutputs, NumInputs,
-                                    NumLabels,
                                     Clobbers.data(), NumClobbers);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 776aab6bf51d2..b0a35cf2f5655 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -283,7 +283,6 @@ void ASTStmtWriter::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
-  Record.push_back(S->getNumLabels());
   Record.AddSourceLocation(S->getRParenLoc());
   Record.AddStmt(S->getAsmString());
 
@@ -305,9 +304,6 @@ void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     Record.AddStmt(S->getClobberStringLiteral(I));
 
-  // Labels
-  for (auto *E : S->labels()) Record.AddStmt(E);
-
   Code = serialization::STMT_GCCASM;
 }
 
diff --git a/clang/test/Analysis/asm-goto.cpp b/clang/test/Analysis/asm-goto.cpp
deleted file mode 100644
index 3d4babc57bd2a..0000000000000
--- a/clang/test/Analysis/asm-goto.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
-
-int foo(int cond)
-{
-label_true:
-  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
-  return 0;
-loop:
-  return 0;
-}
-
-// CHECK-LABEL: loop
-// CHECK-NEXT: 0
-// CHECK-NEXT: return
-// CHECK-NEXT: Preds (1): B3
-// CHECK-NEXT: Succs (1): B0
-
-// CHECK-LABEL: label_true
-// CHECK-NEXT: asm goto
-// CHECK-NEXT: Preds (2): B3 B4
-// CHECK-NEXT: Succs (3): B2 B3 B1
-
-
-int bar(int cond)
-{
-  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::L1, L2);
-  return 0;
-L1:
-L2:
-  return 0;
-}
-
-// CHECK: [B4]
-// CHECK-NEXT: asm goto
-// CHECK-NEXT: Preds (1): B5
-// CHECK-NEXT: Succs (3): B3 B2 B1
-
-int zoo(int n)
-{
-A5:
-A1:
-  asm goto("testl %0, %0; jne %l1;" :: "r"(n)::A1, A2, A3, A4, A5);
-A2:
-A3:
-A4:
-  return 0;
-}
-
-// CHECK-LABEL: A1
-// CHECK-NEXT: asm goto
-// CHECK-NEXT: Preds (2): B5 B4
-// CHECK-NEXT: Succs (5): B3 B4 B2 B1 B5
diff --git a/clang/test/CodeGen/asm-goto.c b/clang/test/CodeGen/asm-goto.c
deleted file mode 100644
index 99e97f2a41e12..0000000000000
--- a/clang/test/CodeGen/asm-goto.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple x86_64 -O0 -emit-llvm %s -o - | FileCheck %s
-
-int foo(int cond)
-{
-  // CHECK: callbr void asm sideeffect
-  // CHECK: to label %asm.fallthrough [label %label_true, label %loop], !srcloc !2
-  // CHECK: asm.fallthrough:
-  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
-  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
-  // CHECK: callbr void asm sideeffect
-  // CHECK: to label %asm.fallthrough1 [label %label_true, label %loop], !srcloc !3
-  // CHECK: asm.fallthrough1:
-  return 0;
-loop:
-  return 0;
-label_true:
-  return 1;
-}
diff --git a/clang/test/CodeGen/asm.c b/clang/test/CodeGen/asm.c
index 7de79639bfd72..038d346e9993a 100644
--- a/clang/test/CodeGen/asm.c
+++ b/clang/test/CodeGen/asm.c
@@ -262,15 +262,3 @@ void t31(int len) {
   // CHECK: @t31
   // CHECK: call void asm sideeffect "", "=*%rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"
 }
-
-// CHECK: @t32
-int t32(int cond)
-{
-  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
-  // CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@t32, %label_true), i8* blockaddress(@t32, %loop)) #1
-  return 0;
-loop:
-  return 0;
-label_true:
-  return 1;
-}
diff --git a/clang/test/CodeGen/inline-asm-mixed-style.c b/clang/test/CodeGen/inline-asm-mixed-style.c
index a9e111cd5ddcf..6b830d9fa7a92 100644
--- a/clang/test/CodeGen/inline-asm-mixed-style.c
+++ b/clang/test/CodeGen/inline-asm-mixed-style.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -fsyntax-only -verify %s -DCHECK_ASM_GOTO
 // RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
 // REQUIRES: x86-registered-target
 
@@ -19,11 +20,10 @@ void f() {
   // CHECK: movl    %ebx, %eax
   // CHECK: movl    %ecx, %edx
 
-  __asm volatile goto ("movl %ecx, %edx");
-  // CHECK: movl    %ecx, %edx
+#ifdef CHECK_ASM_GOTO
+  __asm volatile goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
 
   __asm mov eax, ebx
-  __asm goto ("movl %ecx, %edx");
-  // CHECK: movl    %ebx, %eax
-  // CHECK: movl    %ecx, %edx
+  __asm goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
+#endif
 }
diff --git a/clang/test/Coverage/c-language-features.inc b/clang/test/Coverage/c-language-features.inc
index ea3b96f6005a3..356687907d905 100644
--- a/clang/test/Coverage/c-language-features.inc
+++ b/clang/test/Coverage/c-language-features.inc
@@ -71,9 +71,7 @@ theif:
   }
 
   asm ("nop");
-  int cond;
-  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true);
-label_true:
+
   return;
 }
 
diff --git a/clang/test/PCH/asm.h b/clang/test/PCH/asm.h
index 5a7268eff6e6a..a568058d58f66 100644
--- a/clang/test/PCH/asm.h
+++ b/clang/test/PCH/asm.h
@@ -1,14 +1,10 @@
 // Header for the PCH test asm.c
 
 void f() {
-  int i,cond;
+  int i;
 
   asm ("foo\n" : : "a" (i + 2));
   asm ("foo\n" : [symbolic_name] "=a" (i) : "[symbolic_name]" (i));
-  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
-label_true:
-loop:
-  return;
 }
 
 void clobbers() {
diff --git a/clang/test/Parser/asm.c b/clang/test/Parser/asm.c
index 489b545ebeda6..637f9d7ed42f8 100644
--- a/clang/test/Parser/asm.c
+++ b/clang/test/Parser/asm.c
@@ -21,56 +21,6 @@ void f2() {
 }
 
 
-int a, b, c, d, e, f, g, h, i, j, k, l;
-
-void
-fgoto1 (void)
-{
-  __asm__ volatile goto (""
-            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
-               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
-               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
-            ::lab1,lab2);
-lab1: return;
-lab2: return;
-}
-
-void
-fgoto2 (void)
-{
-  __asm__ volatile goto (""
-            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
-               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
-               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
-            :: lab);
-  lab: return;
-}
-
-int zoo ()
-{
-  int x,cond,*e;
-  // expected-error@+1 {{expected ')'}}
-  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
-  // expected-error@+1 {{'asm goto' cannot have output constraints}}
-  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
-  // expected-error@+1 {{expected identifie}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" : );
-  // expected-error@+1 {{expected ':'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" );
-  // expected-error@+1 {{use of undeclared label 'x'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
-  // expected-error@+1 {{use of undeclared label 'b'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
-  // expected-error@+1 {{invalid operand number in inline asm string}}
-  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
-  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
-  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
-a:
-label_true:
-loop:
-  return 0;
-}
-
 // rdar://5952468
 __asm ; // expected-error {{expected '(' after 'asm'}}
 
diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp
index 9c4d62a255242..9f64dfea476ed 100644
--- a/clang/test/Parser/asm.cpp
+++ b/clang/test/Parser/asm.cpp
@@ -7,54 +7,3 @@ int foo4 asm (u"bar4"); // expected-error {{cannot use unicode string literal in
 int foo5 asm (U"bar5"); // expected-error {{cannot use unicode string literal in 'asm'}}
 int foo6 asm ("bar6"_x); // expected-error {{string literal with user-defined suffix cannot be used here}}
 int foo6 asm ("" L"bar7"); // expected-error {{cannot use wide string literal in 'asm'}}
-
-int zoo ()
-{
-  int x,cond,*e;
-  // expected-error@+1 {{expected ')'}}
-  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
-  // expected-error@+1  {{'asm goto' cannot have output constraints}}
-  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
-  // expected-error@+1 {{expected identifie}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" : );
-  // expected-error@+1  {{expected ':'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" );
-  // expected-error@+1 {{use of undeclared label 'x'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
-  // expected-error@+1 {{use of undeclared label 'b'}}
-  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
-  // expected-error@+1 {{invalid operand number in inline asm string}}
-  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
-  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
-  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
-label_true:
-loop:
-a:
-  return 0;
-}
-
-
-int a, b, c, d, e, f, g, h, i, j, k, l;
-
-void
-fgoto1 (void)
-{
-  __asm__ volatile goto (""
-            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
-               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
-               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
-            ::lab1,lab2);
-lab1: return;
-lab2: return;
-}
-
-void
-fgoto2 (void)
-{
-  __asm__ volatile goto (""
-            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
-               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
-               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
-            :: lab);
-  lab: return;
-}
diff --git a/clang/test/Sema/asm-goto.cpp b/clang/test/Sema/asm-goto.cpp
deleted file mode 100644
index f61a8096b83ec..0000000000000
--- a/clang/test/Sema/asm-goto.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// RUN: %clang_cc1 %s -triple i386-pc-linux-gnu -verify -fsyntax-only
-
-struct NonTrivial {
-  ~NonTrivial();
-  int f(int);
-private:
-  int k;
-};
-void JumpDiagnostics(int n) {
-// expected-error@+1 {{cannot jump from this goto statement to its label}}
-  goto DirectJump;
-// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
-  NonTrivial tnp1;
-
-DirectJump:
-// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
-  asm goto("jmp %l0;" ::::Later);
-// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
-  NonTrivial tnp2;
-// expected-note@+1 {{possible target of asm goto statement}}
-Later:
-  return;
-}
-
-struct S { ~S(); };
-void foo(int a) {
-  if (a) {
-FOO:
-// expected-note@+2 {{jump exits scope of variable with non-trivial destructor}}
-// expected-note@+1 {{jump exits scope of variable with non-trivial destructor}}
-    S s;
-    void *p = &&BAR;
-// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
-  asm goto("jmp %l0;" ::::BAR);
-// expected-error@+1 {{cannot jump from this indirect goto statement to one of its possible targets}}
-    goto *p;
-    p = &&FOO;
-    goto *p;
-    return;
-  }
-// expected-note@+2 {{possible target of asm goto statement}}
-// expected-note@+1 {{possible target of indirect goto statement}}
-BAR:
-  return;
-}
diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c
index 29a55c610de49..67da197426cd5 100644
--- a/clang/test/Sema/asm.c
+++ b/clang/test/Sema/asm.c
@@ -295,24 +295,3 @@ int test17(int t0)
   return r0 + r1;
 }
 
-void test18()
-{
-  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
-  // expected-note@+1 {{asm operand name "lab" first referenced here}}
-  asm goto ("" : : : : lab, lab, lab2, lab);
-  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
-  // expected-note@+1 {{asm operand name "lab" first referenced here}}
-  asm goto ("xorw %[lab], %[lab]; je %l[lab]" : : [lab] "i" (0) : : lab);
-lab:;
-lab2:;
-  int x,x1;
-  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
-  // expected-note@+1 {{asm operand name "lab" first referenced here}}
-  asm ("" : [lab] "=r" (x),[lab] "+r" (x) : [lab1] "r" (x));
-  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
-  // expected-note@+1 {{asm operand name "lab" first referenced here}}
-  asm ("" : [lab] "=r" (x1) : [lab] "r" (x));
-  // expected-error@+1 {{invalid operand number in inline asm string}}
-  asm ("jne %l0":::);
-  asm goto ("jne %l0"::::lab);
-}
diff --git a/clang/test/Sema/inline-asm-validate-tmpl.cpp b/clang/test/Sema/inline-asm-validate-tmpl.cpp
index 9e234caa9c8df..cf7eac3d83d43 100644
--- a/clang/test/Sema/inline-asm-validate-tmpl.cpp
+++ b/clang/test/Sema/inline-asm-validate-tmpl.cpp
@@ -23,13 +23,3 @@ template <int N> void	testc(int value)
 	asm("rol %1, %0" :"=r"(value): "I"(N + 1));
 }
 int	foo() { testc<2>(10); }
-
-// these should compile without error
-template <int N> bool testd()
-{
-  __asm goto ("" : : : : lab);
-  return true;
-lab:
-  return false;
-}
-bool foox() { return testd<0> (); }
diff --git a/clang/test/Sema/scope-check.c b/clang/test/Sema/scope-check.c
index 0622450e2e7af..fa37d10d070b9 100644
--- a/clang/test/Sema/scope-check.c
+++ b/clang/test/Sema/scope-check.c
@@ -232,19 +232,3 @@ void test15(int n, void *pc) {
 
 // rdar://9024687
 int test16(int [sizeof &&z]); // expected-error {{use of address-of-label extension outside of a function body}}
-
-//Asm goto:
-int test16(int n)
-{
-  // expected-error@+2 {{cannot jump from this asm goto statement to one of its possible targets}}
-  // expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
-  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(n)::label_true, loop);
-  // expected-note@+2 {{jump bypasses initialization of variable length array}}
-  // expected-note@+1 {{possible target of asm goto statement}}
-  return ({int a[n];label_true: 2;});
-  // expected-note@+1 {{jump bypasses initialization of variable length array}}
-  int b[n];
-// expected-note@+1 {{possible target of asm goto statement}}
-loop:
-  return 0;
-}

From 7316670ef0a6666357bee8ea62a0eec38cdd7f97 Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Thu, 30 May 2019 15:38:05 +0000
Subject: [PATCH 0629/1176] Remove length modifier when using assignment
 suppression in TimerTest

Summary:
This is useless and it's giving warnings in the build bots:
/home/motus/netbsd8/netbsd8/llvm/tools/lldb/unittests/Utility/TimerTest.cpp:67:43: warning: use of assignment suppression and length modifier together in gnu_scanf format [-Wformat=]

Reviewers: xiaobai

Subscribers: krytarowski, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62626

llvm-svn: 362107
---
 lldb/unittests/Utility/TimerTest.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/unittests/Utility/TimerTest.cpp b/lldb/unittests/Utility/TimerTest.cpp
index 53d0ff12d891a..9e3dc8a03917b 100644
--- a/lldb/unittests/Utility/TimerTest.cpp
+++ b/lldb/unittests/Utility/TimerTest.cpp
@@ -62,7 +62,7 @@ TEST(TimerTest, CategoryTimes2) {
   Timer::DumpCategoryTimes(&ss);
   double seconds1, seconds2;
   ASSERT_EQ(2, sscanf(ss.GetData(),
-                      "%lf sec (total: %*lfs; child: %*lfs; count: %*d) for "
+                      "%lf sec (total: %*fs; child: %*fs; count: %*d) for "
                       "CAT1%*[\n ]%lf sec for CAT2",
                       &seconds1, &seconds2))
       << "String: " << ss.GetData();
@@ -98,7 +98,7 @@ TEST(TimerTest, CategoryTimesStats) {
   ASSERT_EQ(
       6, sscanf(ss.GetData(),
                 "%lf sec (total: %lfs; child: %lfs; count: %d) for CAT1%*[\n ]"
-                "%lf sec (total: %*lfs; child: %*lfs; count: %d) for CAT2",
+                "%lf sec (total: %*fs; child: %*fs; count: %d) for CAT2",
                 &seconds1, &total1, &child1, &count1, &seconds2, &count2))
       << "String: " << ss.GetData();
   EXPECT_NEAR(total1 - child1, seconds1, 0.002);

From 0317e46a6322037cef6160dd7216dfe0499aaa4b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 30 May 2019 15:52:11 +0000
Subject: [PATCH 0630/1176] [ELF] Delete dead SHT_NOBITS->SHT_PROGBITS code
 after r358981

After D60131/r358981, we no longer create SHT_NOBITS sections that may
contain ByteCommand (BYTE, SHORT, LONG, QUAD).

llvm-svn: 362108
---
 lld/ELF/OutputSections.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index becca8356232c..8927b69a64a1f 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -273,11 +273,6 @@ static void finalizeShtGroup(OutputSection *OS,
 }
 
 void OutputSection::finalize() {
-  if (Type == SHT_NOBITS)
-    for (BaseCommand *Base : SectionCommands)
-      if (isa<ByteCommand>(Base))
-        Type = SHT_PROGBITS;
-
   std::vector<InputSection *> V = getInputSections(this);
   InputSection *First = V.empty() ? nullptr : V[0];
 

From 019d270e43967558de00ee4b6d624a2225a2fb77 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 16:07:11 +0000
Subject: [PATCH 0631/1176] [DAGCombine] Revert of recommit of
 "binop-with-const hoisting" patches

I was looking into an endless combine loop the uncommitted follow-up patch
was causing, and it appears even these patches can exibit such an
endless loop. The root cause is that we try to hoist one binop (add/sub) with
constant operand, and if we get two such binops both of which are
eligible for this hoisting, we get stuck.

Some cases may highlight missing constant-folds.

Reverts r361871,r361872,r361873,r361874.

llvm-svn: 362109
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 --------
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 10 ++-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 30 +++----
 llvm/test/CodeGen/AArch64/vec_add.ll          | 14 ++--
 llvm/test/CodeGen/AArch64/xor.ll              | 18 ++--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 14 ++--
 .../CodeGen/SPARC/2013-05-17-CallFrame.ll     |  5 +-
 llvm/test/CodeGen/SystemZ/alloca-03.ll        | 11 +--
 llvm/test/CodeGen/X86/combine-add.ll          |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 +++++++++----------
 llvm/test/CodeGen/X86/shift-amount-mod.ll     |  9 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 52 ++++++------
 llvm/test/CodeGen/X86/vec_add.ll              |  8 +-
 llvm/test/CodeGen/X86/xor.ll                  | 62 +++++++-------
 llvm/test/CodeGen/X86/zext-sext.ll            | 21 +++--
 15 files changed, 174 insertions(+), 201 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1518efd125672..d2045d764a075 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2303,13 +2303,6 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     }
   }
 
-  // (x - y) + -1  ->  add (xor y, -1), x
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isAllOnesOrAllOnesSplat(N1)) {
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
-    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
-  }
-
   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
     return Combined;
 
@@ -2930,33 +2923,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
-  // (x - y) - 1  ->  add (xor y, -1), x
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
-    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
-                              DAG.getAllOnesConstant(DL, VT));
-    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
-  }
-
-  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
-    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
-  }
-  // y - (x + C)  ->  (y - x) - C
-  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N1.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
-    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
-  }
-  // (x - C) - y  ->  (x - y) - C
-  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
-  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
-    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
-    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
-  }
-
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index c91700436bb96..6daef644761b5 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -486,7 +486,8 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg w8, w1
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -499,7 +500,8 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg x8, x1
+; CHECK-NEXT:    mov w8, #64
+; CHECK-NEXT:    sub x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -513,7 +515,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w2, w1
+; CHECK-NEXT:    add w8, w1, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -525,7 +527,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x2, x1
+; CHECK-NEXT:    add x8, x1, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 344016ea5027b..c571dac94b81e 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w2, w8
-; CHECK-NEXT:    sub w0, w8, #32 // =32
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w2, w8
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    sub w0, w8, #32 // =32
+; CHECK-NEXT:    sub w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    add w8, w8, #32 // =32
+; CHECK-NEXT:    sub w0, w8, w2
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/AArch64/vec_add.ll b/llvm/test/CodeGen/AArch64/vec_add.ll
index 9609822b54f33..208ad95c85319 100644
--- a/llvm/test/CodeGen/AArch64/vec_add.ll
+++ b/llvm/test/CodeGen/AArch64/vec_add.ll
@@ -26,10 +26,10 @@ define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: add_const_sub_const:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvni v2.4s, #1
 ; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvni v1.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
@@ -62,12 +62,12 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK-LABEL: add_const_sub_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x9, .LCPI4_1
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
-; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    adrp x8, .LCPI4_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll
index ca6c0dfabba48..1dca55a971308 100644
--- a/llvm/test/CodeGen/AArch64/xor.ll
+++ b/llvm/test/CodeGen/AArch64/xor.ll
@@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
 define i32 @add_of_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w1
-; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    sub w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w1
-; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    sub w8, w0, w1
+; CHECK-NEXT:    sub w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -40,8 +40,9 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -51,8 +52,9 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn v1.16b, v1.16b
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 2dd7e20c00ccf..20c84c5b63277 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -9,16 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT0-NEXT:    s_load_dword s2, s[0:1], 0xb
-; VARIANT0-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT0-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT0-NEXT:    s_mov_b32 s6, 0
 ; VARIANT0-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT:    s_add_i32 s2, s2, -1
 ; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; VARIANT0-NEXT:    s_barrier
+; VARIANT0-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -30,15 +30,15 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1:       ; %bb.0: ; %entry
 ; VARIANT1-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT1-NEXT:    s_load_dword s2, s[0:1], 0xb
-; VARIANT1-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT1-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT1-NEXT:    s_mov_b32 s6, 0
 ; VARIANT1-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT1-NEXT:    s_add_i32 s2, s2, -1
 ; VARIANT1-NEXT:    s_barrier
+; VARIANT1-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT1-NEXT:    s_waitcnt expcnt(0)
@@ -59,7 +59,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    v_xad_u32 v3, v0, -1, s0
+; VARIANT2-NEXT:    s_add_i32 s0, s0, -1
+; VARIANT2-NEXT:    v_sub_u32_e32 v3, s0, v0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -81,7 +82,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    v_xad_u32 v3, v0, -1, s0
+; VARIANT3-NEXT:    s_add_i32 s0, s0, -1
+; VARIANT3-NEXT:    v_sub_u32_e32 v3, s0, v0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 274e99b114c32..1a97e4e317e57 100644
--- a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -15,9 +15,10 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
 ; V8-NEXT:    .cfi_register 15, 31
 ; V8-NEXT:    add %i0, 7, %i0
 ; V8-NEXT:    and %i0, -8, %i0
+; V8-NEXT:    add %i0, 8, %i0
 ; V8-NEXT:    sub %sp, %i0, %i0
-; V8-NEXT:    add %i0, -8, %sp
-; V8-NEXT:    add %i0, 88, %o0
+; V8-NEXT:    add %i0, 96, %o0
+; V8-NEXT:    mov %i0, %sp
 ; V8-NEXT:    add %sp, -16, %sp
 ; V8-NEXT:    st %o0, [%sp+104]
 ; V8-NEXT:    st %o0, [%sp+100]
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index cac569ff41fa3..343071211b751 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -75,12 +75,13 @@ define void @f3(i64 %len) {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    sllg %r0, %r2, 3
+; CHECK-NEXT:    sllg %r2, %r2, 3
+; CHECK-NEXT:    la %r0, 120(%r2)
 ; CHECK-NEXT:    sgr %r1, %r0
-; CHECK-NEXT:    lay %r15, -120(%r1)
-; CHECK-NEXT:    la %r1, 160(%r1)
-; CHECK-NEXT:    nill %r1, 65408
-; CHECK-NEXT:    mvghi 0(%r1), 10
+; CHECK-NEXT:    la %r2, 280(%r1)
+; CHECK-NEXT:    nill %r2, 65408
+; CHECK-NEXT:    lgr %r15, %r1
+; CHECK-NEXT:    mvghi 0(%r2), 10
 ; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
 ; CHECK-NEXT:    br %r14
   %x = alloca i64, i64 %len, align 128
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 1d20fcf33d742..6f5f1370e6b4e 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
-; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index fd3d83ed2cbec..c9a577dbaa92b 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:    movq %rdi, %rbp
+; CHECK-NEXT:    movq %rdx, %r14
+; CHECK-NEXT:    movq %rdi, %r15
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,11 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
+; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT:    movl $1, %r15d
+; CHECK-NEXT:    movl $1, %r14d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -92,47 +91,48 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r14d, %r14d
-; CHECK-NEXT:    testb %r14b, %r14b
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    testb %r12b, %r12b
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r14), %eax
+; CHECK-NEXT:    leal 1(%r12), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %r13d
+; CHECK-NEXT:    movl $-1, %ecx
 ; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %r13d
+; CHECK-NEXT:    movl $1, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    ## implicit-def: $r13
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r12
+; CHECK-NEXT:    ## implicit-def: $r13
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    js LBB0_55
 ; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    movq %rax, %r12
+; CHECK-NEXT:    movq %rax, %r13
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
@@ -157,15 +157,16 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_34
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r12), %rax
+; CHECK-NEXT:    leaq 1(%r13), %rax
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_29
 ; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r12
+; CHECK-NEXT:    incq %r13
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal -324(%r13), %eax
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
+; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -175,11 +176,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, %r13d
+; CHECK-NEXT:    cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, %r13d
+; CHECK-NEXT:    cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -194,8 +195,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r12)
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movb $0, (%r13)
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
@@ -207,22 +208,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %ecx ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %r13d
+; CHECK-NEXT:    movl $2, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %r13d
+; CHECK-NEXT:    movl $20, %ecx
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r14), %eax
+; CHECK-NEXT:    leal -268(%r12), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
@@ -232,12 +233,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r14d, %r13d
+; CHECK-NEXT:    movl %r12d, %ecx
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r15d
-; CHECK-NEXT:    testl %r15d, %r15d
-; CHECK-NEXT:    movl %r13d, %r14d
+; CHECK-NEXT:    decl %r14d
+; CHECK-NEXT:    testl %r14d, %r14d
+; CHECK-NEXT:    movl %ecx, %r12d
 ; CHECK-NEXT:    jg LBB0_13
 ; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -254,28 +255,27 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %r13d, %r13d
+; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %r13d
-; CHECK-NEXT:    cmpl $16, %r13d
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:    cmpl $16, %ecx
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %r13d, %eax
+; CHECK-NEXT:    btl %ecx, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    subq %rbp, %rbx
-; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
-; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
+; CHECK-NEXT:    addq $8189, %r15 ## imm = 0x1FFD
+; CHECK-NEXT:    subq %rbx, %r15
+; CHECK-NEXT:    addq _syHistory@{{.*}}(%rip), %r15
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    decq %rax
+; CHECK-NEXT:    decq %r15
 ; CHECK-NEXT:    jmp LBB0_49
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
@@ -302,8 +302,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
-; CHECK-NEXT:    movq %rbx, %rbp
+; CHECK-NEXT:    movq %r15, %rbx
 ; CHECK-NEXT:    jmp LBB0_48
 ; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index e8af5f66d36c9..6c268d8a27f42 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $32, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %ecx
+; X64-NEXT:    movl $32, %ecx
+; X64-NEXT:    subl %esi, %ecx
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
@@ -1139,10 +1139,9 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $64, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 7da5c7db2d689..2ffbfcb56b2f7 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    addl $-32, %eax
+; X32-NEXT:    addl $32, %ecx
+; X32-NEXT:    subl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    leal -32(%rdx,%rsi), %eax
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    addl $32, %edi
+; X64-NEXT:    subl %edi, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $-32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    subl %edx, %esi
 ; X64-NEXT:    leal 32(%rsi), %eax
+; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd %xmm2, %xmm1
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
-; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm0, %xmm2
+; X32-NEXT:    movdqa %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd %xmm2, %xmm1
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    psubd %xmm1, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm0, %xmm2
+; X64-NEXT:    movdqa %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    psubd %xmm2, %xmm1
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    psubd %xmm2, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    psubd %xmm2, %xmm1
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    psubd %xmm2, %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
diff --git a/llvm/test/CodeGen/X86/vec_add.ll b/llvm/test/CodeGen/X86/vec_add.ll
index 48ccf34dd0b0a..6e34397dd548d 100644
--- a/llvm/test/CodeGen/X86/vec_add.ll
+++ b/llvm/test/CodeGen/X86/vec_add.ll
@@ -36,14 +36,14 @@ define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
 ; X86-LABEL: add_const_sub_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
@@ -85,14 +85,14 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: add_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 654382f7b73e9..5ef5999be95f4 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -532,24 +532,22 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    decl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    notl %esi
-; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
+; X64-LIN-NEXT:    subl %esi, %edi
+; X64-LIN-NEXT:    leal -1(%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    notl %edx
-; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
+; X64-WIN-NEXT:    subl %edx, %ecx
+; X64-WIN-NEXT:    leal -1(%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -560,24 +558,22 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not_decrement:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    decl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    notl %esi
-; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
+; X64-LIN-NEXT:    subl %esi, %edi
+; X64-LIN-NEXT:    leal -1(%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    notl %edx
-; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
+; X64-WIN-NEXT:    subl %edx, %ecx
+; X64-WIN-NEXT:    leal -1(%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -587,23 +583,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not:
 ; X32:       # %bb.0:
-; X32-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-NEXT:    pxor %xmm1, %xmm2
-; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
-; X64-LIN-NEXT:    pxor %xmm1, %xmm2
-; X64-LIN-NEXT:    paddd %xmm2, %xmm0
+; X64-LIN-NEXT:    psubd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-LIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
+; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    pxor (%rdx), %xmm0
-; X64-WIN-NEXT:    paddd (%rcx), %xmm0
+; X64-WIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -613,23 +610,24 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not_decrement:
 ; X32:       # %bb.0:
-; X32-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-NEXT:    pxor %xmm1, %xmm2
-; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm1, %xmm1
+; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
-; X64-LIN-NEXT:    pxor %xmm1, %xmm2
-; X64-LIN-NEXT:    paddd %xmm2, %xmm0
+; X64-LIN-NEXT:    psubd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
+; X64-LIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
+; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    pxor (%rdx), %xmm0
-; X64-WIN-NEXT:    paddd (%rcx), %xmm0
+; X64-WIN-NEXT:    paddd %xmm1, %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 84096e3b6805d..7034378a880b5 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -15,27 +15,30 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 ; CHECK-NEXT:    subq %rax, %rsi
 ; CHECK-NEXT:    movq (%rdx), %rax
 ; CHECK-NEXT:    movswl 8(%rdi), %edx
+; CHECK-NEXT:    movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
 ; CHECK-NEXT:    movswl (%rax,%rsi,2), %eax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    imull %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    addl $2138875574, %eax # imm = 0x7F7CA6B6
 ; CHECK-NEXT:    cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT:    movslq %eax, %rdi
+; CHECK-NEXT:    movslq %eax, %r8
 ; CHECK-NEXT:    setl %dl
 ; CHECK-NEXT:    cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT:    movq %rdi, %r8
+; CHECK-NEXT:    movq %r8, %r9
 ; CHECK-NEXT:    leal -1(%rdx,%rdx), %edx
 ; CHECK-NEXT:    cmovlel %edx, %esi
-; CHECK-NEXT:    subq %rax, %r8
+; CHECK-NEXT:    subq %rax, %r9
+; CHECK-NEXT:    addq %r8, %rdi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %esi
-; CHECK-NEXT:    cmovneq %rax, %r8
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    cmovnsq %rax, %r8
-; CHECK-NEXT:    movq (%rcx), %rax
-; CHECK-NEXT:    subq %r8, %rdi
-; CHECK-NEXT:    leaq -2138875574(%rax,%rdi), %rax
+; CHECK-NEXT:    cmovneq %rax, %r9
+; CHECK-NEXT:    testl %r8d, %r8d
+; CHECK-NEXT:    cmovnsq %rax, %r9
+; CHECK-NEXT:    movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
+; CHECK-NEXT:    subq %r9, %rdi
+; CHECK-NEXT:    addq (%rcx), %rdi
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    movq %rax, (%rcx)
 ; CHECK-NEXT:    retq
 entry:

From 700fdb10706186a464cc86b8c1fae0cd778a449d Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 16:07:19 +0000
Subject: [PATCH 0632/1176] [NFC][Codegen] Add better test coverage for
 potential add/sub constant folding

This adds hopefully-full test coverage for all the possible permutations:
First op is one of:
* x + c1
* x - c1
* c1 - x

Second op is one of:
* + c2
* - c2
* c2 -

And thus 3*3=9 patterns.
Some of them show missed constant-folds.

Without previous patch (the revert), these tests were causing endless
dagcombine loop. I really should have thought about this first :S

llvm-svn: 362110
---
 .../AArch64/addsub-constant-folding.ll        | 455 ++++++++++++
 llvm/test/CodeGen/AArch64/vec_add.ll          | 126 ----
 .../CodeGen/X86/addsub-constant-folding.ll    | 657 ++++++++++++++++++
 llvm/test/CodeGen/X86/vec_add.ll              | 166 -----
 4 files changed, 1112 insertions(+), 292 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/vec_add.ll
 create mode 100644 llvm/test/CodeGen/X86/addsub-constant-folding.ll
 delete mode 100644 llvm/test/CodeGen/X86/vec_add.ll

diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
new file mode 100644
index 0000000000000..6d0f22301105c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -0,0 +1,455 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+declare void @use(<4 x i32> %arg)
+
+; (x+c1)+c2
+
+define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_add_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #10
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #10
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_add_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x+c1)-c2
+
+define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_sub_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(x+c1)
+
+define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_const_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvni v1.4s, #5
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    mvni v0.4s, #5
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: add_const_const_sub_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI8_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (x-c1)+c2
+
+define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_add_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI11_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    adrp x8, .LCPI11_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_1]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x-c1)-c2
+
+define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_sub_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_sub_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(x-c1)
+
+define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_const_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #10
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: sub_const_const_sub_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI17_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (c1-x)+c2
+
+define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_add_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #10
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #10
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_add_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI20_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI20_0]
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (c1-x)-c2
+
+define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_sub_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_sub_const_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI23_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT:    adrp x8, .LCPI23_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI23_1]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(c1-x)
+
+define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_const_sub:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32 // =32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add sp, sp, #32 // =32
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: const_sub_const_sub_nonsplat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI26_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI26_0]
+; CHECK-NEXT:    adrp x8, .LCPI26_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI26_1]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ret
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
diff --git a/llvm/test/CodeGen/AArch64/vec_add.ll b/llvm/test/CodeGen/AArch64/vec_add.ll
deleted file mode 100644
index 208ad95c85319..0000000000000
--- a/llvm/test/CodeGen/AArch64/vec_add.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-unknown-unknown  | FileCheck %s
-
-declare void @use(<4 x i32> %arg)
-
-define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
-; CHECK-NEXT:    ret
-	%tmp9 = add <2 x i64> %b, %a
-	ret <2 x i64> %tmp9
-}
-
-define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_add_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #10
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_sub_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    mvni v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_sub_const_extrause:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32 // =32
-; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl use
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    mvni v0.4s, #1
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add sp, sp, #32 // =32
-; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_sub_const_nonsplat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI4_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    adrp x8, .LCPI4_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_1]
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_add_const:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvni v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
-  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_add_const_extrause:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #32 // =32
-; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    mvni v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl use
-; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    movi v0.4s, #8
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add sp, sp, #32 // =32
-; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
-  call void @use(<4 x i32> %t0)
-  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_add_const_nonsplat:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT:    adrp x8, .LCPI7_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI7_1]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -3, i32 undef, i32 -2>
-  %t1 = add <4 x i32> %t0, <i32 21, i32 undef, i32 8, i32 8>
-  ret <4 x i32> %t1
-}
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
new file mode 100644
index 0000000000000..100c3666e5a40
--- /dev/null
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -0,0 +1,657 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
+
+declare void @use(<4 x i32> %arg)
+
+; (x+c1)+c2
+
+define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: add_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: add_const_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    movdqa %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X86-NEXT:    paddd %xmm1, %xmm0
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X64-NEXT:    paddd %xmm1, %xmm0
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: add_const_add_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_add_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x+c1)-c2
+
+define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const:
+; X64:       # %bb.0:
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: add_const_sub_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(x+c1)
+
+define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
+; X86-LABEL: add_const_const_sub:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_const_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: add_const_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    movdqa %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X86-NEXT:    paddd %xmm1, %xmm0
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
+; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
+; X86-NEXT:    psubd %xmm1, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X64-NEXT:    paddd %xmm1, %xmm0
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
+; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: add_const_const_sub_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_const_sub_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (x-c1)+c2
+
+define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: sub_const_add_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x-c1)-c2
+
+define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
+; X86-LABEL: sub_const_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_sub_const:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: sub_const_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: sub_const_sub_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_sub_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(x-c1)
+
+define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
+; X86-LABEL: sub_const_const_sub:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_const_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: sub_const_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
+; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
+; X86-NEXT:    psubd %xmm1, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
+; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: sub_const_const_sub_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_const_sub_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (c1-x)+c2
+
+define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
+; X86-LABEL: const_sub_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: const_sub_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    movdqa %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X86-NEXT:    psubd %xmm1, %xmm0
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
+; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
+; X86-NEXT:    psubd %xmm1, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X64-NEXT:    psubd %xmm1, %xmm0
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
+; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: const_sub_add_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_add_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (c1-x)-c2
+
+define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
+; X86-LABEL: const_sub_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_sub_const:
+; X64:       # %bb.0:
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: const_sub_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm1, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: const_sub_sub_const_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_sub_const_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; c2-(c1-x)
+
+define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
+; X86-LABEL: const_sub_const_sub:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_const_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: const_sub_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 32
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm1, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    calll use
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
+; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
+; X86-NEXT:    psubd %xmm1, %xmm0
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    callq use
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
+; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    addq $24, %rsp
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: const_sub_const_sub_nonsplat:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_const_sub_nonsplat:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
diff --git a/llvm/test/CodeGen/X86/vec_add.ll b/llvm/test/CodeGen/X86/vec_add.ll
deleted file mode 100644
index 6e34397dd548d..0000000000000
--- a/llvm/test/CodeGen/X86/vec_add.ll
+++ /dev/null
@@ -1,166 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
-
-declare void @use(<4 x i32> %arg)
-
-define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
-; X86-LABEL: test:
-; X86:       # %bb.0:
-; X86-NEXT:    paddq %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: test:
-; X64:       # %bb.0:
-; X64-NEXT:    paddq %xmm1, %xmm0
-; X64-NEXT:    retq
-	%tmp9 = add <2 x i64> %b, %a
-	ret <2 x i64> %tmp9
-}
-
-define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
-; X86-LABEL: add_const_add_const:
-; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: add_const_add_const:
-; X64:       # %bb.0:
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    retq
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
-; X86-LABEL: add_const_sub_const:
-; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: add_const_sub_const:
-; X64:       # %bb.0:
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    retq
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: add_const_sub_const_extrause:
-; X86:       # %bb.0:
-; X86-NEXT:    subl $28, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
-; X86-NEXT:    calll use
-; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    addl $28, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-;
-; X64-LABEL: add_const_sub_const_extrause:
-; X64:       # %bb.0:
-; X64-NEXT:    subq $24, %rsp
-; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
-; X64-NEXT:    callq use
-; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    addq $24, %rsp
-; X64-NEXT:    .cfi_def_cfa_offset 8
-; X64-NEXT:    retq
-  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -2, i32 -2, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: add_const_sub_const_nonsplat:
-; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: add_const_sub_const_nonsplat:
-; X64:       # %bb.0:
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    retq
-  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> %t0, <i32 -2, i32 -3, i32 undef, i32 -2>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
-; X86-LABEL: sub_const_add_const:
-; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: sub_const_add_const:
-; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
-  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: sub_const_add_const_extrause:
-; X86:       # %bb.0:
-; X86-NEXT:    subl $28, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
-; X86-NEXT:    calll use
-; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    addl $28, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-;
-; X64-LABEL: sub_const_add_const_extrause:
-; X64:       # %bb.0:
-; X64-NEXT:    subq $24, %rsp
-; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
-; X64-NEXT:    callq use
-; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    addq $24, %rsp
-; X64-NEXT:    .cfi_def_cfa_offset 8
-; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -2, i32 -2, i32 -2>
-  call void @use(<4 x i32> %t0)
-  %t1 = add <4 x i32> %t0, <i32 8, i32 8, i32 8, i32 8>
-  ret <4 x i32> %t1
-}
-
-define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: sub_const_add_const_nonsplat:
-; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: sub_const_add_const_nonsplat:
-; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %arg, <i32 -2, i32 -3, i32 undef, i32 -2>
-  %t1 = add <4 x i32> %t0, <i32 21, i32 undef, i32 8, i32 8>
-  ret <4 x i32> %t1
-}

From 7c75ac0c60d623c8bbdc1b6d924e4ff85547b9ca Mon Sep 17 00:00:00 2001
From: Hansang Bae <hansang.bae@intel.com>
Date: Thu, 30 May 2019 16:32:20 +0000
Subject: [PATCH 0633/1176] Add checks before pointer dereferencing

This change adds checks before dereferencing a pointer returned from a
function.

Differential Revision: https://reviews.llvm.org/D62224

llvm-svn: 362111
---
 openmp/runtime/src/ompt-specific.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index 99c4b6118cbd9..63153d274efb3 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -210,7 +210,8 @@ ompt_data_t *__ompt_get_thread_data_internal() {
 void __ompt_thread_assign_wait_id(void *variable) {
   kmp_info_t *ti = ompt_get_thread();
 
-  ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)(uintptr_t)variable;
+  if (ti)
+    ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)(uintptr_t)variable;
 }
 
 int __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) {
@@ -432,6 +433,9 @@ int __ompt_get_task_memory_internal(void **addr, size_t *size, int blocknum) {
     return 0; // support only a single block
 
   kmp_info_t *thr = ompt_get_thread();
+  if (!thr)
+    return 0;
+
   kmp_taskdata_t *taskdata = thr->th.th_current_task;
   kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
 

From 2e1807678d4d49abfe03489641141d777baf9ad4 Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Thu, 30 May 2019 16:44:47 +0000
Subject: [PATCH 0634/1176] [FPEnv] Added a special UnrollVectorOp method to
 deal with the chain on StrictFP opcodes

This change creates UnrollVectorOp_StrictFP. The purpose of this is to address a failure that consistently occurs when calling StrictFP functions on vectors whose number of elements is 3 + 2n on most platforms, such as PowerPC or SystemZ. The old UnrollVectorOp method does not expect that the vector that it will unroll will have a chain, so it has an assert that prevents it from running if this is the case. This new StrictFP version of the method deals with the chain while unrolling the vector. With this new function in place during vector widending, llc can run vector-constrained-fp-intrinsics.ll for SystemZ successfully.

Submitted by:	Drew Wock <drew.wock@sas.com>
Reviewed by:	Cameron McInally, Kevin P. Neal
Approved by:	Cameron McInally
Differential Revision:	http://reviews.llvm.org/D62546

llvm-svn: 362112
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |    5 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   59 +-
 .../vector-constrained-fp-intrinsics.ll       | 6445 +++++++++++++++++
 3 files changed, 6508 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index a0e7c8a89c187..8beaf145e0f85 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -857,6 +857,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
   SDValue WidenVecOp_VECREDUCE(SDNode *N);
 
+  /// Helper function to generate a set of operations to perform
+  /// a vector operation for a wider type.
+  ///
+  SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
+
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index aefc2aabf64b4..072f15b1f4c3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1318,6 +1318,63 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
   ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDLoc dl(N);
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  //The results of each unrolled operation, including the chain.
+  EVT ChainVTs[] = {EltVT, MVT::Other};
+  SmallVector<SDValue, 8> Chains;
+
+  unsigned i;
+  for (i = 0; i != NE; ++i) {
+    Operands[0] = Chain;
+    for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+                    DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+                          DAG.getDataLayout())));
+      } else {
+        Operands[j] = Operand;
+      }
+    }
+    SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+    Scalar.getNode()->setFlags(N->getFlags());
+
+    //Add in the scalar as well as its chain value to the
+    //result vectors.
+    Scalars.push_back(Scalar);
+    Chains.push_back(Scalar.getValue(1));
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(DAG.getUNDEF(EltVT));
+
+  // Build a new factor node to connect the chain back together.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  // Create a new BUILD_VECTOR node
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+  return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
 void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
                                               SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -2968,7 +3025,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+    return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
new file mode 100644
index 0000000000000..fd5895ff9d2e8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -0,0 +1,6445 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=s390x-linux-gnu < %s | FileCheck --check-prefix=S390X %s
+; RUN: llc -O3 -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck --check-prefix=SZ13 %s
+
+define <1 x float> @constrained_vector_fdiv_v1f32() {
+; S390X-LABEL: constrained_vector_fdiv_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI0_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI0_1
+; S390X-NEXT:    deb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI0_0
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    deb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %div
+}
+
+define <2 x double> @constrained_vector_fdiv_v2f64() {
+; S390X-LABEL: constrained_vector_fdiv_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI1_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI1_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI1_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %div
+}
+
+define <3 x float> @constrained_vector_fdiv_v3f32() {
+; S390X-LABEL: constrained_vector_fdiv_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI2_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_1
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_2
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_3
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    debr %f0, %f1
+; S390X-NEXT:    debr %f2, %f1
+; S390X-NEXT:    debr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI2_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI2_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    debr %f1, %f0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v3, 1, 1
+; SZ13-NEXT:    debr %f2, %f0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    debr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %div
+}
+
+define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fdiv_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI3_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddb %f1, 16(%r2)
+; S390X-NEXT:    ddb %f0, 8(%r2)
+; S390X-NEXT:    ddb %f2, 0(%r2)
+; S390X-NEXT:    std %f1, 16(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI3_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI3_1
+; SZ13-NEXT:    ldeb %f1, 0(%r1)
+; SZ13-NEXT:    ddb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %div, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fdiv_v4f64() {
+; S390X-LABEL: constrained_vector_fdiv_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI4_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    ddbr %f4, %f1
+; S390X-NEXT:    ddbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI4_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI4_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI4_2
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %div
+}
+
+define <1 x float> @constrained_vector_frem_v1f32() {
+; S390X-LABEL: constrained_vector_frem_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI5_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI5_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI5_0
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %rem
+}
+
+define <2 x double> @constrained_vector_frem_v2f64() {
+; S390X-LABEL: constrained_vector_frem_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI6_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI6_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI6_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI6_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %rem
+}
+
+define <3 x float> @constrained_vector_frem_v3f32() {
+; S390X-LABEL: constrained_vector_frem_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI7_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI7_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    larl %r1, .LCPI7_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    larl %r1, .LCPI7_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI7_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI7_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %rem
+}
+
+define void @constrained_vector_frem_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_frem_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f2, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI8_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI8_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI8_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v2, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v2, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI8_0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %rem, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_frem_v4f64() {
+; S390X-LABEL: constrained_vector_frem_v4f64:
+; S390X:       # %bb.0:
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI9_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI9_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_3
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v4f64:
+; SZ13:       # %bb.0:
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI9_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI9_1
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    larl %r1, .LCPI9_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %rem
+}
+
+define <1 x float> @constrained_vector_fmul_v1f32() {
+; S390X-LABEL: constrained_vector_fmul_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI10_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI10_1
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 2.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %mul
+}
+
+define <2 x double> @constrained_vector_fmul_v2f64() {
+; S390X-LABEL: constrained_vector_fmul_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI11_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI11_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI11_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %mul
+}
+
+define <3 x float> @constrained_vector_fmul_v3f32() {
+; S390X-LABEL: constrained_vector_fmul_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI12_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    meeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_3
+; S390X-NEXT:    meeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 8
+; SZ13-NEXT:    larl %r1, .LCPI12_0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI12_1
+; SZ13-NEXT:    meebr %f2, %f0
+; SZ13-NEXT:    meeb %f0, 0(%r1)
+; SZ13-NEXT:    vmrhf %v0, %v2, %v0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
+           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
+                        float 0x7FF0000000000000>,
+           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %mul
+}
+
+define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fmul_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI13_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    mdb %f0, 16(%r2)
+; S390X-NEXT:    mdb %f2, 8(%r2)
+; S390X-NEXT:    mdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI13_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI13_1
+; SZ13-NEXT:    vfmdb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    mdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                        double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %mul, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fmul_v4f64() {
+; S390X-LABEL: constrained_vector_fmul_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI14_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    mdbr %f4, %f1
+; S390X-NEXT:    mdbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI14_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_2
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfmdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 2.000000e+00, double 3.000000e+00,
+                         double 4.000000e+00, double 5.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %mul
+}
+
+define <1 x float> @constrained_vector_fadd_v1f32() {
+; S390X-LABEL: constrained_vector_fadd_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI15_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI15_1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %add
+}
+
+define <2 x double> @constrained_vector_fadd_v2f64() {
+; S390X-LABEL: constrained_vector_fadd_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI16_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI16_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI16_2
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI16_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI16_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %add
+}
+
+define <3 x float> @constrained_vector_fadd_v3f32() {
+; S390X-LABEL: constrained_vector_fadd_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI17_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_1
+; S390X-NEXT:    ler %f2, %f1
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_2
+; S390X-NEXT:    aeb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f4
+; S390X-NEXT:    aebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    vgmf %v2, 1, 1
+; SZ13-NEXT:    vgmf %v3, 2, 8
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    aebr %f2, %f0
+; SZ13-NEXT:    aebr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %add
+}
+
+define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fadd_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI18_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    adb %f0, 16(%r2)
+; S390X-NEXT:    adb %f2, 8(%r2)
+; S390X-NEXT:    adb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI18_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI18_1
+; SZ13-NEXT:    vfadb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    adb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %add, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fadd_v4f64() {
+; S390X-LABEL: constrained_vector_fadd_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI19_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    adbr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_4
+; S390X-NEXT:    adb %f6, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI19_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_2
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfadb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %add
+}
+
+define <1 x float> @constrained_vector_fsub_v1f32() {
+; S390X-LABEL: constrained_vector_fsub_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI20_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI20_1
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    sebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %sub
+}
+
+define <2 x double> @constrained_vector_fsub_v2f64() {
+; S390X-LABEL: constrained_vector_fsub_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI21_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI21_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI21_2
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI21_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %sub
+}
+
+define <3 x float> @constrained_vector_fsub_v3f32() {
+; S390X-LABEL: constrained_vector_fsub_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI22_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    seb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f1
+; S390X-NEXT:    sebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v2, 15
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    sebr %f2, %f1
+; SZ13-NEXT:    vgmf %v1, 1, 1
+; SZ13-NEXT:    vgbm %v3, 15
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    sebr %f3, %f1
+; SZ13-NEXT:    vgmf %v1, 2, 8
+; SZ13-NEXT:    sebr %f0, %f1
+; SZ13-NEXT:    vmrhf %v0, %v3, %v0
+; SZ13-NEXT:    vrepf %v1, %v2, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %sub
+}
+
+define void @constrained_vector_fsub_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fsub_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI23_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    sdb %f0, 16(%r2)
+; S390X-NEXT:    sdb %f2, 8(%r2)
+; S390X-NEXT:    sdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v0, %v1, %v0
+; SZ13-NEXT:    sdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
+           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %sub, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fsub_v4f64() {
+; S390X-LABEL: constrained_vector_fsub_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI24_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_3
+; S390X-NEXT:    ldeb %f3, 0(%r1)
+; S390X-NEXT:    ldr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_4
+; S390X-NEXT:    sdb %f6, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    sdbr %f4, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI24_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    larl %r1, .LCPI24_1
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
+           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %sub
+}
+
+define <1 x float> @constrained_vector_sqrt_v1f32() {
+; S390X-LABEL: constrained_vector_sqrt_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI25_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI25_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
+                              <1 x float> <float 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %sqrt
+}
+
+define <2 x double> @constrained_vector_sqrt_v2f64() {
+; S390X-LABEL: constrained_vector_sqrt_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI26_0
+; S390X-NEXT:    larl %r2, .LCPI26_1
+; S390X-NEXT:    ldeb %f0, 0(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI26_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %sqrt
+}
+
+define <3 x float> @constrained_vector_sqrt_v3f32() {
+; S390X-LABEL: constrained_vector_sqrt_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI27_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_1
+; S390X-NEXT:    sqeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_2
+; S390X-NEXT:    sqeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI27_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_1
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    sqeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_2
+; SZ13-NEXT:    sqeb %f2, 0(%r1)
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sqrt
+}
+
+define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sqrt_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    sqdb %f0, 16(%r2)
+; S390X-NEXT:    sqdb %f1, 8(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    sqdb %f1, 16(%r2)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vfsqdb %v0, %v0
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sqrt, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sqrt_v4f64() {
+; S390X-LABEL: constrained_vector_sqrt_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI29_0
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_1
+; S390X-NEXT:    sqdb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_3
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_2
+; S390X-NEXT:    sqdb %f6, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI29_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    larl %r1, .LCPI29_1
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v26, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %sqrt
+}
+
+define <1 x float> @constrained_vector_pow_v1f32() {
+; S390X-LABEL: constrained_vector_pow_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI30_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI30_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI30_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI30_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %pow
+}
+
+define <2 x double> @constrained_vector_pow_v2f64() {
+; S390X-LABEL: constrained_vector_pow_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI31_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI31_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI31_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI31_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI31_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI31_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
+                             <2 x double> <double 42.1, double 42.2>,
+                             <2 x double> <double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %pow
+}
+
+define <3 x float> @constrained_vector_pow_v3f32() {
+; S390X-LABEL: constrained_vector_pow_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI32_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI32_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    larl %r1, .LCPI32_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    larl %r1, .LCPI32_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI32_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI32_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
+                             <3 x float> <float 42.0, float 43.0, float 44.0>,
+                             <3 x float> <float 3.0, float 3.0, float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <3 x float> %pow
+}
+
+define void @constrained_vector_pow_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_pow_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI33_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI33_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %pow, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_pow_v4f64() {
+; S390X-LABEL: constrained_vector_pow_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI34_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI34_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_4
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI34_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI34_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI34_3
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_4
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
+                             <4 x double> <double 42.1, double 42.2,
+                                           double 42.3, double 42.4>,
+                             <4 x double> <double 3.0, double 3.0,
+                                           double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %pow
+}
+
+define <1 x float> @constrained_vector_powi_v1f32() {
+; S390X-LABEL: constrained_vector_powi_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI35_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI35_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
+                              <1 x float> <float 42.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %powi
+}
+
+define <2 x double> @constrained_vector_powi_v2f64() {
+; S390X-LABEL: constrained_vector_powi_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI36_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI36_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI36_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI36_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
+                              <2 x double> <double 42.1, double 42.2>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %powi
+}
+
+define <3 x float> @constrained_vector_powi_v3f32() {
+; S390X-LABEL: constrained_vector_powi_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI37_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI37_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI37_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI37_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %powi
+}
+
+define void @constrained_vector_powi_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_powi_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI38_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI38_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI38_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    std %f0, 8(%r13)
+; S390X-NEXT:    std %f9, 0(%r13)
+; S390X-NEXT:    std %f8, 16(%r13)
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI38_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI38_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI38_2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 280(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          i32 3,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %powi, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_powi_v4f64() {
+; S390X-LABEL: constrained_vector_powi_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI39_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI39_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI39_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %powi
+}
+
+define <1 x float> @constrained_vector_sin_v1f32() {
+; S390X-LABEL: constrained_vector_sin_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI40_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI40_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %sin
+}
+
+define <2 x double> @constrained_vector_sin_v2f64() {
+; S390X-LABEL: constrained_vector_sin_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI41_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI41_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI41_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI41_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %sin
+}
+
+define <3 x float> @constrained_vector_sin_v3f32() {
+; S390X-LABEL: constrained_vector_sin_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI42_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    larl %r1, .LCPI42_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    larl %r1, .LCPI42_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI42_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sin
+}
+
+define void @constrained_vector_sin_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sin_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sin, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sin_v4f64() {
+; S390X-LABEL: constrained_vector_sin_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI44_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI44_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI44_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %sin
+}
+
+define <1 x float> @constrained_vector_cos_v1f32() {
+; S390X-LABEL: constrained_vector_cos_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI45_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI45_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %cos
+}
+
+define <2 x double> @constrained_vector_cos_v2f64() {
+; S390X-LABEL: constrained_vector_cos_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI46_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI46_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI46_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI46_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %cos
+}
+
+define <3 x float> @constrained_vector_cos_v3f32() {
+; S390X-LABEL: constrained_vector_cos_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI47_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    larl %r1, .LCPI47_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    larl %r1, .LCPI47_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI47_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %cos
+}
+
+define void @constrained_vector_cos_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_cos_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %cos, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_cos_v4f64() {
+; S390X-LABEL: constrained_vector_cos_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI49_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI49_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI49_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %cos
+}
+
+define <1 x float> @constrained_vector_exp_v1f32() {
+; S390X-LABEL: constrained_vector_exp_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI50_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI50_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp
+}
+
+define <2 x double> @constrained_vector_exp_v2f64() {
+; S390X-LABEL: constrained_vector_exp_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI51_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI51_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI51_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI51_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %exp
+}
+
+define <3 x float> @constrained_vector_exp_v3f32() {
+; S390X-LABEL: constrained_vector_exp_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI52_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    larl %r1, .LCPI52_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    larl %r1, .LCPI52_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI52_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp
+}
+
+define void @constrained_vector_exp_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp_v4f64() {
+; S390X-LABEL: constrained_vector_exp_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI54_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI54_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI54_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %exp
+}
+
+define <1 x float> @constrained_vector_exp2_v1f32() {
+; S390X-LABEL: constrained_vector_exp2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI55_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI55_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp2
+}
+
+define <2 x double> @constrained_vector_exp2_v2f64() {
+; S390X-LABEL: constrained_vector_exp2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI56_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI56_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI56_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI56_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
+                              <2 x double> <double 42.1, double 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %exp2
+}
+
+define <3 x float> @constrained_vector_exp2_v3f32() {
+; S390X-LABEL: constrained_vector_exp2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI57_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI57_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI57_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI57_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp2
+}
+
+define void @constrained_vector_exp2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp2_v4f64() {
+; S390X-LABEL: constrained_vector_exp2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI59_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI59_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI59_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %exp2
+}
+
+define <1 x float> @constrained_vector_log_v1f32() {
+; S390X-LABEL: constrained_vector_log_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI60_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI60_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log
+}
+
+define <2 x double> @constrained_vector_log_v2f64() {
+; S390X-LABEL: constrained_vector_log_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI61_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI61_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI61_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI61_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %log
+}
+
+define <3 x float> @constrained_vector_log_v3f32() {
+; S390X-LABEL: constrained_vector_log_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI62_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    larl %r1, .LCPI62_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    larl %r1, .LCPI62_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI62_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log
+}
+
+define void @constrained_vector_log_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log_v4f64() {
+; S390X-LABEL: constrained_vector_log_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI64_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI64_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI64_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %log
+}
+
+define <1 x float> @constrained_vector_log10_v1f32() {
+; S390X-LABEL: constrained_vector_log10_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI65_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI65_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log10
+}
+
+define <2 x double> @constrained_vector_log10_v2f64() {
+; S390X-LABEL: constrained_vector_log10_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI66_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI66_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI66_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI66_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
+                               <2 x double> <double 42.0, double 42.1>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <2 x double> %log10
+}
+
+define <3 x float> @constrained_vector_log10_v3f32() {
+; S390X-LABEL: constrained_vector_log10_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI67_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    larl %r1, .LCPI67_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    larl %r1, .LCPI67_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI67_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log10
+}
+
+define void @constrained_vector_log10_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log10, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log10_v4f64() {
+; S390X-LABEL: constrained_vector_log10_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI69_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI69_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI69_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
+                               <4 x double> <double 42.0, double 42.1,
+                                             double 42.2, double 42.3>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <4 x double> %log10
+}
+
+define <1 x float> @constrained_vector_log2_v1f32() {
+; S390X-LABEL: constrained_vector_log2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI70_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI70_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log2
+}
+
+define <2 x double> @constrained_vector_log2_v2f64() {
+; S390X-LABEL: constrained_vector_log2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI71_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI71_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI71_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI71_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %log2
+}
+
+define <3 x float> @constrained_vector_log2_v3f32() {
+; S390X-LABEL: constrained_vector_log2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI72_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI72_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI72_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI72_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log2
+}
+
+define void @constrained_vector_log2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log2_v4f64() {
+; S390X-LABEL: constrained_vector_log2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI74_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI74_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI74_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %log2
+}
+
+define <1 x float> @constrained_vector_rint_v1f32() {
+; S390X-LABEL: constrained_vector_rint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI75_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI75_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %rint
+}
+
+define <2 x double> @constrained_vector_rint_v2f64() {
+; S390X-LABEL: constrained_vector_rint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI76_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI76_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI76_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> <double 42.1, double 42.0>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32() {
+; S390X-LABEL: constrained_vector_rint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI77_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    fiebr %f2, 0, %f1
+; S390X-NEXT:    fiebr %f4, 0, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI77_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    fiebr %f1, 0, %f1
+; SZ13-NEXT:    fiebr %f2, 0, %f2
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %rint
+}
+
+define void @constrained_vector_rint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_rint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f1, 0, %f1
+; S390X-NEXT:    fidbr %f2, 0, %f2
+; S390X-NEXT:    std %f2, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f0, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 0, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 0
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %rint, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_rint_v4f64() {
+; S390X-LABEL: constrained_vector_rint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI79_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    fidbr %f4, 0, %f3
+; S390X-NEXT:    fidbr %f6, 0, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI79_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI79_1
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> <double 42.1, double 42.2,
+                                      double 42.3, double 42.4>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI80_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI80_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
+                               <1 x float> <float 42.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %nearby
+}
+
+define <2 x double> @constrained_vector_nearbyint_v2f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI81_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI81_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI81_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> <double 42.1, double 42.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI82_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    larl %r1, .LCPI82_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    larl %r1, .LCPI82_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI82_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 0, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 0, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %nearby
+}
+
+define void @constrained_vector_nearbyint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_nearbyint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %nearby, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_nearbyint_v4f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI84_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI84_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI84_1
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> <double 42.1, double 42.2,
+                                              double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %nearby
+}
+
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; S390X-LABEL: constrained_vector_maxnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI85_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI85_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI85_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI85_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; S390X-LABEL: constrained_vector_maxnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI86_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI86_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI86_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI86_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; S390X-LABEL: constrained_vector_maxnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI87_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    larl %r1, .LCPI87_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    larl %r1, .LCPI87_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI87_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %max
+}
+
+define void @constrained_vector_log10_maxnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_maxnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI88_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI88_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI88_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_maxnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI88_0
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_2
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %max, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; S390X-LABEL: constrained_vector_maxnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI89_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI89_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI89_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; S390X-LABEL: constrained_vector_minnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI90_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI90_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI90_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI90_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+ entry:
+  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; S390X-LABEL: constrained_vector_minnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI91_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI91_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI91_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI91_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; S390X-LABEL: constrained_vector_minnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI92_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    larl %r1, .LCPI92_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    larl %r1, .LCPI92_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI92_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %min
+}
+
+define void @constrained_vector_minnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_minnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI93_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI93_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+ %b = load <3 x double>, <3 x double>* %a
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %min, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; S390X-LABEL: constrained_vector_minnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI94_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI94_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI94_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %min
+}
+
+define <1 x float> @constrained_vector_fptrunc_v1f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v1f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI95_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v1f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI95_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    wledb %v24, %f0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
+                                <1 x double><double 42.1>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <1 x float> %result
+}
+
+define <2 x float> @constrained_vector_fptrunc_v2f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI96_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI96_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI96_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI96_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x float> %result
+}
+
+define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %dest) {
+; S390X-LABEL: constrained_vector_fptrunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 16(%r2)
+; S390X-NEXT:    ld %f2, 8(%r2)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    lgdr %r0, %f0
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ledbr %f0, %f2
+; S390X-NEXT:    lgdr %r1, %f0
+; S390X-NEXT:    srlg %r1, %r1, 32
+; S390X-NEXT:    lr %r0, %r1
+; S390X-NEXT:    ledbr %f0, %f1
+; S390X-NEXT:    ste %f0, 8(%r3)
+; S390X-NEXT:    stg %r0, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ledbra %f2, 0, %f1, 0
+; SZ13-NEXT:    vrepg %v1, %v1, 1
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v1, %v1, %v1
+; SZ13-NEXT:    ste %f0, 8(%r3)
+; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %src
+  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
+                                <3 x double> %b,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  store <3 x float> %result, <3 x float>* %dest
+  ret void
+}
+
+define <4 x float> @constrained_vector_fptrunc_v4f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI98_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    ledbr %f4, %f3
+; S390X-NEXT:    ledbr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI98_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    larl %r1, .LCPI98_2
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_3
+; SZ13-NEXT:    ld %f2, 0(%r1)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f2, 0, %f2, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_fpext_v1f32() {
+; S390X-LABEL: constrained_vector_fpext_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI99_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI99_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    wldeb %v24, %f0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
+                                <1 x float><float 42.0>,
+                                metadata !"fpexcept.strict")
+  ret <1 x double> %result
+}
+
+define <2 x double> @constrained_vector_fpext_v2f32() {
+; S390X-LABEL: constrained_vector_fpext_v2f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI100_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI100_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v2f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI100_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI100_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %result
+}
+
+define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %dest) {
+; S390X-LABEL: constrained_vector_fpext_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    sllg %r1, %r0, 32
+; S390X-NEXT:    ldgr %f1, %r1
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ldgr %f2, %r0
+; S390X-NEXT:    ldebr %f2, %f2
+; S390X-NEXT:    ldebr %f1, %f1
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    std %f0, 16(%r3)
+; S390X-NEXT:    std %f1, 8(%r3)
+; S390X-NEXT:    std %f2, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vrepf %v2, %v0, 1
+; SZ13-NEXT:    ldebr %f1, %f0
+; SZ13-NEXT:    ldebr %f2, %f2
+; SZ13-NEXT:    vrepf %v0, %v0, 2
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    vmrhg %v1, %v1, %v2
+; SZ13-NEXT:    std %f0, 16(%r3)
+; SZ13-NEXT:    vst %v1, 0(%r3)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x float>, <3 x float>* %src
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
+                              <3 x float> %b,
+                              metadata !"fpexcept.strict")
+  store <3 x double> %result, <3 x double>* %dest
+  ret void
+}
+
+define <4 x double> @constrained_vector_fpext_v4f32() {
+; S390X-LABEL: constrained_vector_fpext_v4f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI102_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_3
+; S390X-NEXT:    le %f5, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    ldebr %f4, %f3
+; S390X-NEXT:    ldebr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v4f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI102_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    larl %r1, .LCPI102_2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_3
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %result
+}
+
+define <1 x float> @constrained_vector_ceil_v1f32() {
+; S390X-LABEL: constrained_vector_ceil_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI103_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %ceil
+}
+
+define <2 x double> @constrained_vector_ceil_v2f64() {
+; S390X-LABEL: constrained_vector_ceil_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI104_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    larl %r1, .LCPI104_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI104_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 6
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %ceil
+}
+
+define <3 x float> @constrained_vector_ceil_v3f32() {
+; S390X-LABEL: constrained_vector_ceil_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI105_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    larl %r1, .LCPI105_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    larl %r1, .LCPI105_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI105_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI105_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 6, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 6, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %ceil
+}
+
+define <3 x double> @constrained_vector_ceil_v3f64() {
+; S390X-LABEL: constrained_vector_ceil_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI106_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    larl %r1, .LCPI106_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    larl %r1, .LCPI106_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI106_0
+; SZ13-NEXT:    fidbra %f4, 6, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 6
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %ceil
+}
+
+define <1 x float> @constrained_vector_floor_v1f32() {
+; S390X-LABEL: constrained_vector_floor_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI107_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %floor
+}
+
+
+define <2 x double> @constrained_vector_floor_v2f64() {
+; S390X-LABEL: constrained_vector_floor_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI108_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    larl %r1, .LCPI108_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI108_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 7
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %floor
+}
+
+define <3 x float> @constrained_vector_floor_v3f32() {
+; S390X-LABEL: constrained_vector_floor_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI109_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    larl %r1, .LCPI109_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    larl %r1, .LCPI109_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI109_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI109_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 7, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 7, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %floor
+}
+
+define <3 x double> @constrained_vector_floor_v3f64() {
+; S390X-LABEL: constrained_vector_floor_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI110_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    larl %r1, .LCPI110_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    larl %r1, .LCPI110_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI110_0
+; SZ13-NEXT:    fidbra %f4, 7, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 7
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %floor
+}
+
+define <1 x float> @constrained_vector_round_v1f32() {
+; S390X-LABEL: constrained_vector_round_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI111_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %round
+}
+
+define <2 x double> @constrained_vector_round_v2f64() {
+; S390X-LABEL: constrained_vector_round_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI112_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    larl %r1, .LCPI112_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI112_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 1
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %round
+}
+
+define <3 x float> @constrained_vector_round_v3f32() {
+; S390X-LABEL: constrained_vector_round_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI113_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    larl %r1, .LCPI113_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    larl %r1, .LCPI113_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI113_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI113_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 1, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 1, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %round
+}
+
+
+define <3 x double> @constrained_vector_round_v3f64() {
+; S390X-LABEL: constrained_vector_round_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI114_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    larl %r1, .LCPI114_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    larl %r1, .LCPI114_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI114_0
+; SZ13-NEXT:    fidbra %f4, 1, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 1
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %round
+}
+
+define <1 x float> @constrained_vector_trunc_v1f32() {
+; S390X-LABEL: constrained_vector_trunc_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI115_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %trunc
+}
+
+define <2 x double> @constrained_vector_trunc_v2f64() {
+; S390X-LABEL: constrained_vector_trunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI116_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    larl %r1, .LCPI116_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI116_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 5
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %trunc
+}
+
+define <3 x float> @constrained_vector_trunc_v3f32() {
+; S390X-LABEL: constrained_vector_trunc_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI117_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    larl %r1, .LCPI117_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    larl %r1, .LCPI117_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI117_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI117_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 5, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 5, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %trunc
+}
+
+define <3 x double> @constrained_vector_trunc_v3f64() {
+; S390X-LABEL: constrained_vector_trunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI118_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    larl %r1, .LCPI118_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    larl %r1, .LCPI118_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI118_0
+; SZ13-NEXT:    fidbra %f4, 5, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 5
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %trunc
+}
+
+; Single width declarations
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+
+; Scalar width declarations
+declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
+declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+
+; Illegal width declarations
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
+declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+
+; Double width declarations
+declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From d45eaf9405c865cae0737a92d5ebce4e485106c3 Mon Sep 17 00:00:00 2001
From: "J. Ryan Stinnett" <jryans@gmail.com>
Date: Thu, 30 May 2019 16:46:22 +0000
Subject: [PATCH 0635/1176] [Docs] Modernize references to macOS

Summary:
This updates all places in documentation that refer to "Mac OS X", "OS X", etc.
to instead use the modern name "macOS" when no specific version number is
mentioned.

If a specific version is mentioned, this attempts to use the OS name at the time
of that version:

* Mac OS X for 10.0 - 10.7
* OS X for 10.8 - 10.11
* macOS for 10.12 - present

Reviewers: JDevlieghere

Subscribers: mgorny, christof, arphaman, cfe-commits, lldb-commits, libcxx-commits, llvm-commits

Tags: #clang, #lldb, #libc, #llvm

Differential Revision: https://reviews.llvm.org/D62654

llvm-svn: 362113
---
 clang/docs/AddressSanitizer.rst                    |  8 ++++----
 clang/docs/AutomaticReferenceCounting.rst          |  2 +-
 clang/docs/ClangCommandLineReference.rst           |  2 +-
 clang/docs/CommandGuide/clang.rst                  |  2 +-
 clang/docs/LeakSanitizer.rst                       |  2 +-
 clang/docs/Modules.rst                             |  2 +-
 clang/docs/SafeStack.rst                           |  2 +-
 clang/docs/UndefinedBehaviorSanitizer.rst          |  2 +-
 clang/docs/UsersManual.rst                         |  8 ++++----
 clang/docs/analyzer/checkers.rst                   |  2 +-
 clang/docs/analyzer/developer-docs/DebugChecks.rst |  2 +-
 libcxx/docs/BuildingLibcxx.rst                     |  2 +-
 libcxx/docs/UsingLibcxx.rst                        |  4 ++--
 libcxx/docs/index.rst                              |  2 +-
 libunwind/docs/index.rst                           |  2 +-
 lld/docs/sphinx_intro.rst                          |  4 ++--
 lldb/docs/lldb-gdb-remote.txt                      | 14 +++++++-------
 lldb/docs/resources/build.rst                      |  8 ++++++--
 lldb/docs/use/remote.rst                           |  4 ++--
 llvm/docs/CMake.rst                                |  2 +-
 llvm/docs/CommandGuide/llvm-ar.rst                 |  4 ++--
 llvm/docs/CompilerWriterInfo.rst                   |  4 ++--
 llvm/docs/DebuggingJITedCode.rst                   |  2 +-
 llvm/docs/GettingStarted.rst                       |  8 ++++----
 llvm/docs/ProgrammersManual.rst                    |  4 ++--
 llvm/docs/TestingGuide.rst                         |  2 +-
 llvm/docs/WritingAnLLVMPass.rst                    |  2 +-
 27 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst
index 67ef59b5ea2ae..05888dce6a2d1 100644
--- a/clang/docs/AddressSanitizer.rst
+++ b/clang/docs/AddressSanitizer.rst
@@ -119,7 +119,7 @@ force disabled by setting ``ASAN_OPTIONS=symbolize=0``):
         #1 0x7f7ddabcac4d in __libc_start_main ??:0
     ...
 
-Note that on OS X you may need to run ``dsymutil`` on your binary to have the
+Note that on macOS you may need to run ``dsymutil`` on your binary to have the
 file\:line info in the AddressSanitizer reports.
 
 Additional Checks
@@ -134,14 +134,14 @@ globals defined in another translation unit. To enable this check at runtime,
 you should set environment variable
 ``ASAN_OPTIONS=check_initialization_order=1``.
 
-Note that this option is not supported on OS X.
+Note that this option is not supported on macOS.
 
 Memory leak detection
 ---------------------
 
 For more information on leak detector in AddressSanitizer, see
 :doc:`LeakSanitizer`. The leak detection is turned on by default on Linux,
-and can be enabled using ``ASAN_OPTIONS=detect_leaks=1`` on OS X;
+and can be enabled using ``ASAN_OPTIONS=detect_leaks=1`` on macOS;
 however, it is not yet supported on other platforms.
 
 Issue Suppression
@@ -273,7 +273,7 @@ Supported Platforms
 AddressSanitizer is supported on:
 
 * Linux i386/x86\_64 (tested on Ubuntu 12.04)
-* OS X 10.7 - 10.11 (i386/x86\_64)
+* macOS 10.7 - 10.11 (i386/x86\_64)
 * iOS Simulator
 * Android ARM
 * NetBSD i386/x86\_64
diff --git a/clang/docs/AutomaticReferenceCounting.rst b/clang/docs/AutomaticReferenceCounting.rst
index 746c445f9019e..9e4456085b6ea 100644
--- a/clang/docs/AutomaticReferenceCounting.rst
+++ b/clang/docs/AutomaticReferenceCounting.rst
@@ -268,7 +268,7 @@ ARC's semantics and restrictions.
   * There must be reliable conventions for whether and when "ownership" is
     passed between caller and callee, for both arguments and return values.
     Objective-C methods follow such a convention very reliably, at least for
-    system libraries on Mac OS X, and functions always pass objects at +0.  The
+    system libraries on macOS, and functions always pass objects at +0.  The
     C-based APIs for Core Foundation objects, on the other hand, have much more
     varied transfer semantics.
 
diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index 5ec947a7c0bf2..ddba3b10b7899 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -2218,7 +2218,7 @@ Generate branches with extended addressability, usually via indirect jumps.
 
 .. option:: -mmacosx-version-min=<arg>, -mmacos-version-min=<arg>
 
-Set Mac OS X deployment target
+Set macOS deployment target
 
 .. option:: -mmcu=<arg>
 
diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index a75b6c911571a..84e0dddb8e1fc 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -316,7 +316,7 @@ number of cross compilers, or may only support a native target.
 
 .. option:: -mmacosx-version-min=<version>
 
-  When building for Mac OS X, specify the minimum version supported by your
+  When building for macOS, specify the minimum version supported by your
   application.
 
 .. option:: -miphoneos-version-min
diff --git a/clang/docs/LeakSanitizer.rst b/clang/docs/LeakSanitizer.rst
index 3601587c42f15..53a3ee15055c8 100644
--- a/clang/docs/LeakSanitizer.rst
+++ b/clang/docs/LeakSanitizer.rst
@@ -17,7 +17,7 @@ detection phase.
 Usage
 =====
 
-LeakSanitizer is supported on x86\_64 Linux and OS X. In order to use it,
+LeakSanitizer is supported on x86\_64 Linux and macOS. In order to use it,
 simply build your program with :doc:`AddressSanitizer`:
 
 .. code-block:: console
diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst
index 7aee4ffee2df7..34ad55a4876e6 100644
--- a/clang/docs/Modules.rst
+++ b/clang/docs/Modules.rst
@@ -360,7 +360,7 @@ The *module-id* should consist of only a single *identifier*, which provides the
 
 The ``explicit`` qualifier can only be applied to a submodule, i.e., a module that is nested within another module. The contents of explicit submodules are only made available when the submodule itself was explicitly named in an import declaration or was re-exported from an imported module.
 
-The ``framework`` qualifier specifies that this module corresponds to a Darwin-style framework. A Darwin-style framework (used primarily on Mac OS X and iOS) is contained entirely in directory ``Name.framework``, where ``Name`` is the name of the framework (and, therefore, the name of the module). That directory has the following layout:
+The ``framework`` qualifier specifies that this module corresponds to a Darwin-style framework. A Darwin-style framework (used primarily on macOS and iOS) is contained entirely in directory ``Name.framework``, where ``Name`` is the name of the framework (and, therefore, the name of the module). That directory has the following layout:
 
 .. parsed-literal::
 
diff --git a/clang/docs/SafeStack.rst b/clang/docs/SafeStack.rst
index c1e09da935579..8b5557bf59b05 100644
--- a/clang/docs/SafeStack.rst
+++ b/clang/docs/SafeStack.rst
@@ -126,7 +126,7 @@ and link command lines.
 Supported Platforms
 -------------------
 
-SafeStack was tested on Linux, NetBSD, FreeBSD and MacOSX.
+SafeStack was tested on Linux, NetBSD, FreeBSD and macOS.
 
 Low-level API
 -------------
diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 1e06a181b2318..2456f5040d451 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -295,7 +295,7 @@ UndefinedBehaviorSanitizer is supported on the following operating systems:
 * NetBSD
 * FreeBSD
 * OpenBSD
-* OS X 10.6 onwards
+* macOS
 * Windows
 
 The runtime library is relatively portable and platform independent. If the OS
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index c9195e908276c..39d9d966b24bf 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -1006,7 +1006,7 @@ on-disk cache that contains the vital information necessary to reduce
 some of the work needed to process a corresponding header file. While
 details of precompiled headers vary between compilers, precompiled
 headers have been shown to be highly effective at speeding up program
-compilation on systems with very large system headers (e.g., Mac OS X).
+compilation on systems with very large system headers (e.g., macOS).
 
 Generating a PCH File
 ^^^^^^^^^^^^^^^^^^^^^
@@ -2746,7 +2746,7 @@ X86
 ^^^
 
 The support for X86 (both 32-bit and 64-bit) is considered stable on
-Darwin (Mac OS X), Linux, FreeBSD, and Dragonfly BSD: it has been tested
+Darwin (macOS), Linux, FreeBSD, and Dragonfly BSD: it has been tested
 to correctly compile many large C, C++, Objective-C, and Objective-C++
 codebases.
 
@@ -2801,8 +2801,8 @@ backend.
 Operating System Features and Limitations
 -----------------------------------------
 
-Darwin (Mac OS X)
-^^^^^^^^^^^^^^^^^
+Darwin (macOS)
+^^^^^^^^^^^^^^
 
 Thread Sanitizer is not supported.
 
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 6a266eb1d9e90..93b8f0cbf5c43 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -778,7 +778,7 @@ Check for null pointers being passed as arguments to C string functions:
 
 osx
 ^^^
-OS X checkers.
+macOS checkers.
 
 osx.API (C)
 """""""""""
diff --git a/clang/docs/analyzer/developer-docs/DebugChecks.rst b/clang/docs/analyzer/developer-docs/DebugChecks.rst
index 56ce015d645a1..3f9bed78604f0 100644
--- a/clang/docs/analyzer/developer-docs/DebugChecks.rst
+++ b/clang/docs/analyzer/developer-docs/DebugChecks.rst
@@ -15,7 +15,7 @@ General Analysis Dumpers
 
 These checkers are used to dump the results of various infrastructural analyses
 to stderr. Some checkers also have "view" variants, which will display a graph
-using a 'dot' format viewer (such as Graphviz on OS X) instead.
+using a 'dot' format viewer (such as Graphviz on macOS) instead.
 
 - debug.DumpCallGraph, debug.ViewCallGraph: Show the call graph generated for
   the current translation unit. This is used to determine the order in which to
diff --git a/libcxx/docs/BuildingLibcxx.rst b/libcxx/docs/BuildingLibcxx.rst
index c334269a6d55a..06f89608a2c46 100644
--- a/libcxx/docs/BuildingLibcxx.rst
+++ b/libcxx/docs/BuildingLibcxx.rst
@@ -41,7 +41,7 @@ The basic steps needed to build libc++ are:
 
    .. warning::
      * Replacing your systems libc++ installation could render the system non-functional.
-     * Mac OS X will not boot without a valid copy of ``libc++.1.dylib`` in ``/usr/lib``.
+     * macOS will not boot without a valid copy of ``libc++.1.dylib`` in ``/usr/lib``.
 
 
 The instructions are for building libc++ on
diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst
index e2af5c6ecf384..ef3023ea8f9f6 100644
--- a/libcxx/docs/UsingLibcxx.rst
+++ b/libcxx/docs/UsingLibcxx.rst
@@ -15,7 +15,7 @@ If you already have libc++ installed you can use it with clang.
     $ clang++ -stdlib=libc++ test.cpp
     $ clang++ -std=c++11 -stdlib=libc++ test.cpp
 
-On OS X and FreeBSD libc++ is the default standard library
+On macOS and FreeBSD libc++ is the default standard library
 and the ``-stdlib=libc++`` is not required.
 
 .. _alternate libcxx:
@@ -34,7 +34,7 @@ can use the following options.
 The option ``-Wl,-rpath,<libcxx-install-prefix>/lib`` adds a runtime library
 search path. Meaning that the systems dynamic linker will look for libc++ in
 ``<libcxx-install-prefix>/lib`` whenever the program is run. Alternatively the
-environment variable ``LD_LIBRARY_PATH`` (``DYLD_LIBRARY_PATH`` on OS X) can
+environment variable ``LD_LIBRARY_PATH`` (``DYLD_LIBRARY_PATH`` on macOS) can
 be used to change the dynamic linkers search paths after a program is compiled.
 
 An example of using ``LD_LIBRARY_PATH``:
diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst
index 0fd35407f3401..933d0fe311544 100644
--- a/libcxx/docs/index.rst
+++ b/libcxx/docs/index.rst
@@ -93,7 +93,7 @@ and GCC.
 ============ ==================== ============ ========================
 OS           Arch                 Compilers    ABI Library
 ============ ==================== ============ ========================
-Mac OS X     i386, x86_64         Clang, GCC   libc++abi
+macOS        i386, x86_64         Clang, GCC   libc++abi
 FreeBSD 10+  i386, x86_64, ARM    Clang, GCC   libcxxrt, libc++abi
 Linux        i386, x86_64         Clang, GCC   libc++abi
 ============ ==================== ============ ========================
diff --git a/libunwind/docs/index.rst b/libunwind/docs/index.rst
index 9e53b411ed01c..a4e21bb3c336c 100644
--- a/libunwind/docs/index.rst
+++ b/libunwind/docs/index.rst
@@ -50,7 +50,7 @@ FreeBSD      i386, x86_64, ARM64      Clang        DWARF CFI
 iOS          ARM                      Clang        SjLj
 Linux        ARM                      Clang, GCC   EHABI
 Linux        i386, x86_64, ARM64      Clang, GCC   DWARF CFI
-Mac OS X     i386, x86_64             Clang, GCC   DWARF CFI
+macOS        i386, x86_64             Clang, GCC   DWARF CFI
 NetBSD       x86_64                   Clang, GCC   DWARF CFI
 Windows      i386, x86_64, ARM, ARM64 Clang        DWARF CFI
 ============ ======================== ============ ========================
diff --git a/lld/docs/sphinx_intro.rst b/lld/docs/sphinx_intro.rst
index 6bb9816b5ab47..b671cdc3df64e 100644
--- a/lld/docs/sphinx_intro.rst
+++ b/lld/docs/sphinx_intro.rst
@@ -43,8 +43,8 @@ to install it using:
     Use your distribution's standard package management tool to install it,
     i.e., ``apt-get install easy_install`` or ``yum install easy_install``.
 
-  Mac OS X
-    All modern Mac OS X systems come with ``easy_install`` as part of the base
+  macOS
+    All modern macOS systems come with ``easy_install`` as part of the base
     system.
 
   Windows
diff --git a/lldb/docs/lldb-gdb-remote.txt b/lldb/docs/lldb-gdb-remote.txt
index 6a70dc02c64b4..c327477d6914a 100644
--- a/lldb/docs/lldb-gdb-remote.txt
+++ b/lldb/docs/lldb-gdb-remote.txt
@@ -787,9 +787,9 @@ os_version: a version string that represents the current OS version (10.8.2)
 watchpoint_exceptions_received: one of "before" or "after" to specify if a watchpoint is triggered before or after the pc when it stops
 default_packet_timeout: an unsigned number that specifies the default timeout in seconds
 distribution_id: optional. For linux, specifies distribution id (e.g. ubuntu, fedora, etc.)
-osmajor: optional, specifies the major version number of the OS (e.g. for Mac OS X 10.11.2, it would be 10)
-osminor: optional, specifies the minor version number of the OS (e.g. for Mac OS X 10.11.2, it would be 11)
-ospatch: optional, specifies the patch level number of the OS (e.g. for Mac OS X 10.11.2, it would be 2)
+osmajor: optional, specifies the major version number of the OS (e.g. for macOS 10.12.2, it would be 10)
+osminor: optional, specifies the minor version number of the OS (e.g. for macOS 10.12.2, it would be 12)
+ospatch: optional, specifies the patch level number of the OS (e.g. for macOS 10.12.2, it would be 2)
 
 //----------------------------------------------------------------------
 // "qGDBServerVersion"
@@ -1160,7 +1160,7 @@ for this region.
 //  second form of this packet is used, otherwise the first form is 
 //  used. This packet is called prior to executing an expression, so
 //  the remote GDB server should do anything it needs to in order to 
-//  ensure the registers that are saved are correct. On MacOSX this
+//  ensure the registers that are saved are correct. On macOS this
 //  involves calling "thread_abort_safely(mach_port_t thread)" to 
 //  ensure we get the correct registers for a thread in case it is
 //  currently having code run on its behalf in the kernel.
@@ -1723,7 +1723,7 @@ for this region.
 //  There are three ways this packet can be used.  All three return a dictionary of
 //  binary images formatted the same way.
 //
-//  On MacOS X 10.11, iOS 9, tvOS 9, watchOS 2 and earlier, the packet is used like
+//  On OS X 10.11, iOS 9, tvOS 9, watchOS 2 and earlier, the packet is used like
 //       jGetLoadedDynamicLibrariesInfos:{"image_count":1,"image_list_address":140734800075128}
 //  where the image_list_address is an array of {void* load_addr, void* mod_date, void* pathname}
 //  in the inferior process memory (and image_count is the number of elements in this array).
@@ -1863,9 +1863,9 @@ server to expedite memory that the client is likely to use (e.g., areas around t
 stack pointer, which are needed for computing backtraces) and it reduces the packet
 count.
 
-On MacOSX with debugserver, we expedite the frame pointer backchain for a thread
+On macOS with debugserver, we expedite the frame pointer backchain for a thread
 (up to 256 entries) by reading 2 pointers worth of bytes at the frame pointer (for
-the previous FP and PC), and follow the backchain. Most backtraces on MacOSX and
+the previous FP and PC), and follow the backchain. Most backtraces on macOS and
 iOS now don't require us to read any memory!
 
 //----------------------------------------------------------------------
diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst
index 377c544986473..f71fb9e414b56 100644
--- a/lldb/docs/resources/build.rst
+++ b/lldb/docs/resources/build.rst
@@ -117,8 +117,12 @@ There are two ways to build LLDB on macOS: Using Xcode and using CMake
 
 **Preliminaries**
 
-* Xcode 4.3 or newer requires the "Command Line Tools" component (XCode->Preferences->Downloads->Components).
-* Mac OS X Lion or newer requires installing `Swig <http://swig.org/>`_.
+In addition to any dependencies required by LLVM and Clang, LLDB needs a few
+development packages that may also need to be installed depending on your
+system. The current list of dependencies are:
+
+* Xcode 4.3 or newer requires the "Command Line Tools" component (XCode->Preferences->Downloads->Components)
+* `Swig <http://swig.org/>`_
 
 **Building LLDB with Xcode**
 
diff --git a/lldb/docs/use/remote.rst b/lldb/docs/use/remote.rst
index ea18bc14a184e..0eb43fbb82fca 100644
--- a/lldb/docs/use/remote.rst
+++ b/lldb/docs/use/remote.rst
@@ -17,7 +17,7 @@ advanced debugging operations, like copying files from/to the remote system and
 can be used to execute arbitrary shell commands on the remote system.
 
 In order to reduce code complexity and improve remote debugging experience LLDB
-on Linux and OSX uses the remote debugging stub even when debugging a process
+on Linux and macOS uses the remote debugging stub even when debugging a process
 locally. This is achieved by spawning a remote stub process locally and
 communicating with it over the loopback interface. In the case of local
 debugging this whole process is transparent to the user. The platform binary is
@@ -44,7 +44,7 @@ gdb-remote stub. A single binary facilitates deployment and reduces code size,
 since the two functions share a lot of code. The lldb-server binary is also
 statically linked with the rest of LLDB (unlike lldb, which dynamically links
 to liblldb.so by default), so it does not have any dependencies on the rest of
-lldb. On Mac OSX and iOS, the remote-gdb functionality is implemented by the
+lldb. On macOS and iOS, the remote-gdb functionality is implemented by the
 debugserver binary, which you will need to deploy alongside lldb-server.
 
 The binaries mentioned above need to be present on the remote system to enable
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index 92e7b9c2a4734..3234eb0d57fd4 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -533,7 +533,7 @@ LLVM-specific variables
   `share/doc/llvm/ocaml-html`.
 
 **LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL
-  OS X Only: If enabled CMake will generate a target named
+  macOS Only: If enabled CMake will generate a target named
   'install-xcode-toolchain'. This target will create a directory at
   $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can
   be used to override the default system tools.
diff --git a/llvm/docs/CommandGuide/llvm-ar.rst b/llvm/docs/CommandGuide/llvm-ar.rst
index d3ee993f738eb..d10ec919795ce 100644
--- a/llvm/docs/CommandGuide/llvm-ar.rst
+++ b/llvm/docs/CommandGuide/llvm-ar.rst
@@ -229,7 +229,7 @@ STANDARDS
 
 The **llvm-ar** utility is intended to provide a superset of the IEEE Std 1003.2
 (POSIX.2) functionality for ``ar``. **llvm-ar** can read both SVR4 and BSD4.4 (or
-Mac OS X) archives. If the ``f`` modifier is given to the ``x`` or ``r`` operations
+macOS) archives. If the ``f`` modifier is given to the ``x`` or ``r`` operations
 then **llvm-ar** will write SVR4 compatible archives. Without this modifier,
 **llvm-ar** will write BSD4.4 compatible archives that have long names
 immediately after the header and indicated using the "#1/ddd" notation for the
@@ -240,7 +240,7 @@ FILE FORMAT
 -----------
 
 
-The file format for LLVM Archive files is similar to that of BSD 4.4 or Mac OSX
+The file format for LLVM Archive files is similar to that of BSD 4.4 or macOS
 archive files. In fact, except for the symbol table, the ``ar`` commands on those
 operating systems should be able to read LLVM archive files. The details of the
 file format follow.
diff --git a/llvm/docs/CompilerWriterInfo.rst b/llvm/docs/CompilerWriterInfo.rst
index 731d24a1c4e5e..7058574925ae3 100644
--- a/llvm/docs/CompilerWriterInfo.rst
+++ b/llvm/docs/CompilerWriterInfo.rst
@@ -140,8 +140,8 @@ Linux
 * `ELF for the ARM 64-bit Architecture (AArch64) <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf>`_
 * `System z ELF ABI Supplement <http://legacy.redhat.com/pub/redhat/linux/7.1/es/os/s390x/doc/lzsabi0.pdf>`_
 
-OS X
-----
+macOS
+-----
 
 * `Mach-O Runtime Architecture <http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html>`_
 * `Notes on Mach-O ABI <http://www.unsanity.org/archives/000044.php>`_
diff --git a/llvm/docs/DebuggingJITedCode.rst b/llvm/docs/DebuggingJITedCode.rst
index d6101d5100340..fad4c9bfd2037 100644
--- a/llvm/docs/DebuggingJITedCode.rst
+++ b/llvm/docs/DebuggingJITedCode.rst
@@ -29,7 +29,7 @@ GDB Version
 In order to debug code JIT-ed by LLVM, you need GDB 7.0 or newer, which is
 available on most modern distributions of Linux.  The version of GDB that
 Apple ships with Xcode has been frozen at 6.3 for a while.  LLDB may be a
-better option for debugging JIT-ed code on Mac OS X.
+better option for debugging JIT-ed code on macOS.
 
 
 Debugging MCJIT-ed code
diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst
index f10bcd32151f8..5901d28aa28c7 100644
--- a/llvm/docs/GettingStarted.rst
+++ b/llvm/docs/GettingStarted.rst
@@ -128,8 +128,8 @@ FreeBSD            x86\ :sup:`1`         GCC, Clang
 FreeBSD            amd64                 GCC, Clang
 NetBSD             x86\ :sup:`1`         GCC, Clang
 NetBSD             amd64                 GCC, Clang
-MacOS X\ :sup:`2`  PowerPC               GCC
-MacOS X            x86                   GCC, Clang
+macOS\ :sup:`2`    PowerPC               GCC
+macOS              x86                   GCC, Clang
 Cygwin/Win32       x86\ :sup:`1, 3`      GCC
 Windows            x86\ :sup:`1`         Visual Studio
 Windows x64        x86-64                Visual Studio
@@ -272,7 +272,7 @@ newer version of Gold.
 Getting a Modern Host C++ Toolchain
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-This section mostly applies to Linux and older BSDs. On Mac OS X, you should
+This section mostly applies to Linux and older BSDs. On macOS, you should
 have a sufficiently modern Xcode, or you will likely need to upgrade until you
 do. Windows does not have a "system compiler", so you must install either Visual
 Studio 2015 or a recent version of mingw64. FreeBSD 10.0 and newer have a modern
@@ -711,7 +711,7 @@ define compiler flags and variables used during the CMake test operations.
 
 The result of such a build is executables that are not runnable on the build
 host but can be executed on the target. As an example the following CMake
-invocation can generate build files targeting iOS. This will work on Mac OS X
+invocation can generate build files targeting iOS. This will work on macOS
 with the latest Xcode:
 
 .. code-block:: console
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 75b6239375acb..fe5e6610dc8d8 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -1372,8 +1372,8 @@ these functions in your code in places you want to debug.
 
 Getting this to work requires a small amount of setup.  On Unix systems
 with X11, install the `graphviz <http://www.graphviz.org>`_ toolkit, and make
-sure 'dot' and 'gv' are in your path.  If you are running on Mac OS X, download
-and install the Mac OS X `Graphviz program
+sure 'dot' and 'gv' are in your path.  If you are running on macOS, download
+and install the macOS `Graphviz program
 <http://www.pixelglow.com/graphviz/>`_ and add
 ``/Applications/Graphviz.app/Contents/MacOS/`` (or wherever you install it) to
 your path. The programs need not be present when configuring, building or
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 949fecf4ccfcf..fed9a6bfcabe2 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -511,7 +511,7 @@ RUN lines:
    The suffix for the host platforms shared library files. This includes the
    period as the first character.
 
-   Example: ``.so`` (Linux), ``.dylib`` (OS X), ``.dll`` (Windows)
+   Example: ``.so`` (Linux), ``.dylib`` (macOS), ``.dll`` (Windows)
 
 ``%exeext``
    The suffix for the host platforms executable files. This includes the
diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst
index 19dc6c11c9b6d..9e857fb96570f 100644
--- a/llvm/docs/WritingAnLLVMPass.rst
+++ b/llvm/docs/WritingAnLLVMPass.rst
@@ -77,7 +77,7 @@ This build script specifies that ``Hello.cpp`` file in the current directory
 is to be compiled and linked into a shared object ``$(LEVEL)/lib/LLVMHello.so`` that
 can be dynamically loaded by the :program:`opt` tool via its :option:`-load`
 option. If your operating system uses a suffix other than ``.so`` (such as
-Windows or Mac OS X), the appropriate extension will be used.
+Windows or macOS), the appropriate extension will be used.
 
 Now that we have the build scripts set up, we just need to write the code for
 the pass itself.

From 2ae4b3318170e6595bcf652f8b78894213475e53 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 16:50:43 +0000
Subject: [PATCH 0636/1176] [NFC][Codegen] Potential add/sub constant folding:
 fixup non-splat tests

llvm-svn: 362114
---
 .../AArch64/addsub-constant-folding.ll        | 20 ++++++----
 .../CodeGen/X86/addsub-constant-folding.ll    | 38 +++++++++----------
 2 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index 6d0f22301105c..38c3dfd7546ba 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -94,10 +94,13 @@ define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    adrp x8, .LCPI5_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI5_1]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 
@@ -245,10 +248,13 @@ define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    adrp x8, .LCPI14_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI14_1]
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 
@@ -393,11 +399,11 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI23_0]
 ; CHECK-NEXT:    adrp x8, .LCPI23_1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI23_1]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index 100c3666e5a40..dc9ee4ca6367e 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -126,19 +126,17 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: add_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
-; X86-NEXT:    psubd %xmm0, %xmm1
-; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 
@@ -341,19 +339,17 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: sub_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
-; X86-NEXT:    psubd %xmm0, %xmm1
-; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 
@@ -569,17 +565,21 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: const_sub_sub_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <21,u,8,8>
+; X86-NEXT:    psubd %xmm0, %xmm1
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_sub_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <21,u,8,8>
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
-  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
   ret <4 x i32> %t1
 }
 

From ee319034ab8f84f68d9f54425d782e9d080e76c4 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 30 May 2019 16:53:05 +0000
Subject: [PATCH 0637/1176] [libcxx] Add regex test cases from PR40904

llvm-svn: 362115
---
 .../re.alg.match/inverted_character_classes.pass.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
index d48d86ee64cf2..5190357209a13 100644
--- a/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
@@ -41,5 +41,15 @@ int main(int, char**) {
     assert(std::regex_match("X", std::regex("[^\\W]")));
     assert(std::regex_match("_", std::regex("[^\\W]")));
 
-  return 0;
+    // Those test cases are taken from PR40904
+    assert(std::regex_match("abZcd", std::regex("^ab[\\d\\D]cd")));
+    assert(std::regex_match("ab5cd", std::regex("^ab[\\d\\D]cd")));
+    assert(std::regex_match("abZcd", std::regex("^ab[\\D]cd")));
+    assert(std::regex_match("abZcd", std::regex("^ab\\Dcd")));
+    assert(std::regex_match("ab5cd", std::regex("^ab[\\d]cd")));
+    assert(std::regex_match("ab5cd", std::regex("^ab\\dcd")));
+    assert(!std::regex_match("abZcd", std::regex("^ab\\dcd")));
+    assert(!std::regex_match("ab5cd", std::regex("^ab\\Dcd")));
+
+    return 0;
 }

From aeae786bfe89d9f720596707eb675c6d0418aec5 Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg@gmail.com>
Date: Thu, 30 May 2019 17:03:35 +0000
Subject: [PATCH 0638/1176] Code and comment cleanups [NFC]

Changes:
- update comments to detail the info can come from .debug_info or .debug_types
- Rename "debug_info_data" to "data" now that we can get data from .debug_info or .debug_types.
- Also call DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(...) instead of manually grabbing abbreviation.

llvm-svn: 362116
---
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 88 +++++++++----------
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.h    |  7 +-
 2 files changed, 44 insertions(+), 51 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index c8684d20acd40..27db5c090789e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -33,16 +33,15 @@ using namespace lldb_private;
 using namespace std;
 extern int g_verbose;
 
-// Extract a debug info entry for a given compile unit from the .debug_info and
-// .debug_abbrev data within the SymbolFileDWARF class starting at the given
-// offset
-bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
+// Extract a debug info entry for a given DWARFUnit from the data
+// starting at the offset in offset_ptr
+bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data,
                                   const DWARFUnit *cu,
                                   lldb::offset_t *offset_ptr) {
   m_offset = *offset_ptr;
   m_parent_idx = 0;
   m_sibling_idx = 0;
-  const uint64_t abbr_idx = debug_info_data.GetULEB128(offset_ptr);
+  const uint64_t abbr_idx = data.GetULEB128(offset_ptr);
   lldbassert(abbr_idx <= UINT16_MAX);
   m_abbr_idx = abbr_idx;
 
@@ -51,10 +50,7 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
 
   if (m_abbr_idx) {
     lldb::offset_t offset = *offset_ptr;
-
-    const DWARFAbbreviationDeclaration *abbrevDecl =
-        cu->GetAbbreviations()->GetAbbreviationDeclaration(m_abbr_idx);
-
+    auto *abbrevDecl = GetAbbreviationDeclarationPtr(cu);
     if (abbrevDecl == nullptr) {
       cu->GetSymbolFileDWARF()->GetObjectFile()->GetModule()->ReportError(
           "{0x%8.8x}: invalid abbreviation code %u, please file a bug and "
@@ -66,14 +62,14 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
     }
     m_tag = abbrevDecl->Tag();
     m_has_children = abbrevDecl->HasChildren();
-    // Skip all data in the .debug_info for the attributes
+    // Skip all data in the .debug_info or .debug_types for the attributes
     const uint32_t numAttributes = abbrevDecl->NumAttributes();
     uint32_t i;
     dw_form_t form;
     for (i = 0; i < numAttributes; ++i) {
       form = abbrevDecl->GetFormByIndexUnchecked(i);
-
-      llvm::Optional<uint8_t> fixed_skip_size = DWARFFormValue::GetFixedSize(form, cu);
+      llvm::Optional<uint8_t> fixed_skip_size =
+          DWARFFormValue::GetFixedSize(form, cu);
       if (fixed_skip_size)
         offset += *fixed_skip_size;
       else {
@@ -83,24 +79,24 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
           uint32_t form_size = 0;
           switch (form) {
           // Blocks if inlined data that have a length field and the data bytes
-          // inlined in the .debug_info
+          // inlined in the .debug_info/.debug_types
           case DW_FORM_exprloc:
           case DW_FORM_block:
-            form_size = debug_info_data.GetULEB128(&offset);
+            form_size = data.GetULEB128(&offset);
             break;
           case DW_FORM_block1:
-            form_size = debug_info_data.GetU8_unchecked(&offset);
+            form_size = data.GetU8_unchecked(&offset);
             break;
           case DW_FORM_block2:
-            form_size = debug_info_data.GetU16_unchecked(&offset);
+            form_size = data.GetU16_unchecked(&offset);
             break;
           case DW_FORM_block4:
-            form_size = debug_info_data.GetU32_unchecked(&offset);
+            form_size = data.GetU32_unchecked(&offset);
             break;
 
           // Inlined NULL terminated C-strings
           case DW_FORM_string:
-            debug_info_data.GetCStr(&offset);
+            data.GetCStr(&offset);
             break;
 
           // Compile unit address sized values
@@ -166,17 +162,17 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &debug_info_data,
           case DW_FORM_GNU_addr_index:
           case DW_FORM_GNU_str_index:
           case DW_FORM_strx:
-            debug_info_data.Skip_LEB128(&offset);
+            data.Skip_LEB128(&offset);
             break;
 
           case DW_FORM_indirect:
             form_is_indirect = true;
-            form = debug_info_data.GetULEB128(&offset);
+            form = data.GetULEB128(&offset);
             break;
 
           case DW_FORM_strp:
           case DW_FORM_sec_offset:
-            debug_info_data.GetU32(&offset);
+            data.GetU32(&offset);
             break;
 
           case DW_FORM_implicit_const:
@@ -247,10 +243,10 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
   lldb::ModuleSP module = dwarf2Data->GetObjectFile()->GetModule();
 
   if (abbrevDecl) {
-    const DWARFDataExtractor &debug_info_data = cu->GetData();
+    const DWARFDataExtractor &data = cu->GetData();
     lldb::offset_t offset = GetFirstAttributeOffset();
 
-    if (!debug_info_data.ValidOffset(offset))
+    if (!data.ValidOffset(offset))
       return false;
 
     const uint32_t numAttributes = abbrevDecl->NumAttributes();
@@ -261,7 +257,7 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
       dw_attr_t attr;
       abbrevDecl->GetAttrAndFormValueByIndex(i, attr, form_value);
 
-      if (form_value.ExtractValue(debug_info_data, &offset)) {
+      if (form_value.ExtractValue(data, &offset)) {
         switch (attr) {
         case DW_AT_low_pc:
           lo_pc = form_value.Address();
@@ -347,9 +343,9 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
           if (frame_base) {
             if (form_value.BlockData()) {
               uint32_t block_offset =
-                  form_value.BlockData() - debug_info_data.GetDataStart();
+                  form_value.BlockData() - data.GetDataStart();
               uint32_t block_length = form_value.Unsigned();
-              *frame_base = DWARFExpression(module, debug_info_data, cu,
+              *frame_base = DWARFExpression(module, data, cu,
                                             block_offset, block_length);
             } else {
               const DWARFDataExtractor &debug_loc_data =
@@ -416,32 +412,30 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
 // stream.
 void DWARFDebugInfoEntry::Dump(const DWARFUnit *cu, Stream &s,
                                uint32_t recurse_depth) const {
-  const DWARFDataExtractor &debug_info_data = cu->GetData();
+  const DWARFDataExtractor &data = cu->GetData();
   lldb::offset_t offset = m_offset;
 
-  if (debug_info_data.ValidOffset(offset)) {
-    dw_uleb128_t abbrCode = debug_info_data.GetULEB128(&offset);
+  if (data.ValidOffset(offset)) {
+    dw_uleb128_t abbrCode = data.GetULEB128(&offset);
 
     s.Printf("\n0x%8.8x: ", m_offset);
     s.Indent();
     if (abbrCode != m_abbr_idx) {
       s.Printf("error: DWARF has been modified\n");
     } else if (abbrCode) {
-      const DWARFAbbreviationDeclaration *abbrevDecl =
-          cu->GetAbbreviations()->GetAbbreviationDeclaration(abbrCode);
-
+      auto *abbrevDecl = GetAbbreviationDeclarationPtr(cu);
       if (abbrevDecl) {
         s.PutCString(DW_TAG_value_to_name(abbrevDecl->Tag()));
         s.Printf(" [%u] %c\n", abbrCode, abbrevDecl->HasChildren() ? '*' : ' ');
 
-        // Dump all data in the .debug_info for the attributes
+        // Dump all data in the .debug_info/.debug_types for the attributes
         const uint32_t numAttributes = abbrevDecl->NumAttributes();
         for (uint32_t i = 0; i < numAttributes; ++i) {
           DWARFFormValue form_value(cu);
           dw_attr_t attr;
           abbrevDecl->GetAttrAndFormValueByIndex(i, attr, form_value);
 
-          DumpAttribute(cu, debug_info_data, &offset, s, attr, form_value);
+          DumpAttribute(cu, data, &offset, s, attr, form_value);
         }
 
         const DWARFDebugInfoEntry *child = GetFirstChild();
@@ -470,7 +464,7 @@ void DWARFDebugInfoEntry::Dump(const DWARFUnit *cu, Stream &s,
 // display of attributes is done (disassemble location lists, show enumeration
 // values for attributes, etc).
 void DWARFDebugInfoEntry::DumpAttribute(
-    const DWARFUnit *cu, const DWARFDataExtractor &debug_info_data,
+    const DWARFUnit *cu, const DWARFDataExtractor &data,
     lldb::offset_t *offset_ptr, Stream &s, dw_attr_t attr,
     DWARFFormValue &form_value) {
   bool show_form = s.GetFlags().Test(DWARFDebugInfo::eDumpFlag_ShowForm);
@@ -482,7 +476,7 @@ void DWARFDebugInfoEntry::DumpAttribute(
     s.Printf("[%s", DW_FORM_value_to_name(form_value.Form()));
   }
 
-  if (!form_value.ExtractValue(debug_info_data, offset_ptr))
+  if (!form_value.ExtractValue(data, offset_ptr))
     return;
 
   if (show_form) {
@@ -517,7 +511,7 @@ void DWARFDebugInfoEntry::DumpAttribute(
     const uint8_t *blockData = form_value.BlockData();
     if (blockData) {
       // Location description is inlined in data in the form value
-      DWARFDataExtractor locationData(debug_info_data,
+      DWARFDataExtractor locationData(data,
                                       (*offset_ptr) - form_value.Unsigned(),
                                       form_value.Unsigned());
       DWARFExpression::PrintDWARFExpression(
@@ -564,7 +558,7 @@ size_t DWARFDebugInfoEntry::GetAttributes(
     uint32_t curr_depth) const {
   auto abbrevDecl = GetAbbreviationDeclarationPtr(cu);
   if (abbrevDecl) {
-    const DWARFDataExtractor &debug_info_data = cu->GetData();
+    const DWARFDataExtractor &data = cu->GetData();
     lldb::offset_t offset = GetFirstAttributeOffset();
 
     const uint32_t num_attributes = abbrevDecl->NumAttributes();
@@ -593,7 +587,7 @@ size_t DWARFDebugInfoEntry::GetAttributes(
       }
 
       if ((attr == DW_AT_specification) || (attr == DW_AT_abstract_origin)) {
-        if (form_value.ExtractValue(debug_info_data, &offset)) {
+        if (form_value.ExtractValue(data, &offset)) {
           DWARFDIE spec_die = form_value.Reference();
           if (spec_die)
             spec_die.GetAttributes(attributes, curr_depth + 1);
@@ -603,7 +597,7 @@ size_t DWARFDebugInfoEntry::GetAttributes(
         if (fixed_skip_size)
           offset += *fixed_skip_size;
         else
-          DWARFFormValue::SkipValue(form, debug_info_data, &offset, cu);
+          DWARFFormValue::SkipValue(form, data, &offset, cu);
       }
     }
   } else {
@@ -614,10 +608,10 @@ size_t DWARFDebugInfoEntry::GetAttributes(
 
 // GetAttributeValue
 //
-// Get the value of an attribute and return the .debug_info offset of the
-// attribute if it was properly extracted into form_value, or zero if we fail
-// since an offset of zero is invalid for an attribute (it would be a compile
-// unit header).
+// Get the value of an attribute and return the .debug_info or .debug_types
+// offset of the attribute if it was properly extracted into form_value,
+// or zero if we fail since an offset of zero is invalid for an attribute (it
+// would be a compile unit header).
 dw_offset_t DWARFDebugInfoEntry::GetAttributeValue(
     const DWARFUnit *cu, const dw_attr_t attr, DWARFFormValue &form_value,
     dw_offset_t *end_attr_offset_ptr,
@@ -635,18 +629,18 @@ dw_offset_t DWARFDebugInfoEntry::GetAttributeValue(
     uint32_t attr_idx = abbrevDecl->FindAttributeIndex(attr);
 
     if (attr_idx != DW_INVALID_INDEX) {
-      const DWARFDataExtractor &debug_info_data = cu->GetData();
+      const DWARFDataExtractor &data = cu->GetData();
       lldb::offset_t offset = GetFirstAttributeOffset();
 
       uint32_t idx = 0;
       while (idx < attr_idx)
         DWARFFormValue::SkipValue(abbrevDecl->GetFormByIndex(idx++),
-                                  debug_info_data, &offset, cu);
+                                  data, &offset, cu);
 
       const dw_offset_t attr_offset = offset;
       form_value.SetUnit(cu);
       form_value.SetForm(abbrevDecl->GetFormByIndex(idx));
-      if (form_value.ExtractValue(debug_info_data, &offset)) {
+      if (form_value.ExtractValue(data, &offset)) {
         if (end_attr_offset_ptr)
           *end_attr_offset_ptr = offset;
         return attr_offset;
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index 77a97577883d4..1e7b5f27642d3 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -43,7 +43,7 @@ class DWARFDebugInfoEntry {
   void BuildFunctionAddressRangeTable(const DWARFUnit *cu,
                                       DWARFDebugAranges *debug_aranges) const;
 
-  bool Extract(const lldb_private::DWARFDataExtractor &debug_info_data,
+  bool Extract(const lldb_private::DWARFDataExtractor &data,
                const DWARFUnit *cu, lldb::offset_t *offset_ptr);
 
   bool LookupAddress(const dw_addr_t address, const DWARFUnit *cu,
@@ -107,7 +107,7 @@ class DWARFDebugInfoEntry {
 
   static void
   DumpAttribute(const DWARFUnit *cu,
-                const lldb_private::DWARFDataExtractor &debug_info_data,
+                const lldb_private::DWARFDataExtractor &data,
                 lldb::offset_t *offset_ptr, lldb_private::Stream &s,
                 dw_attr_t attr, DWARFFormValue &form_value);
 
@@ -169,8 +169,7 @@ class DWARFDebugInfoEntry {
   void SetParentIndex(uint32_t idx) { m_parent_idx = idx; }
 
 protected:
-  dw_offset_t
-      m_offset; // Offset within the .debug_info of the start of this entry
+  dw_offset_t m_offset; // Offset within the .debug_info/.debug_types
   uint32_t m_parent_idx; // How many to subtract from "this" to get the parent.
                          // If zero this die has no parent
   uint32_t m_sibling_idx : 31, // How many to add to "this" to get the sibling.

From d3db7b40b0538f737c5a519d18845e9db10df025 Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Thu, 30 May 2019 17:10:21 +0000
Subject: [PATCH 0639/1176] Revert r362112, it broke the bots with the message
 "Unsupported vector argument or return type"

Differential Revision:	http://reviews.llvm.org/D62546

llvm-svn: 362117
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |    5 -
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   59 +-
 .../vector-constrained-fp-intrinsics.ll       | 6445 -----------------
 3 files changed, 1 insertion(+), 6508 deletions(-)
 delete mode 100644 llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8beaf145e0f85..a0e7c8a89c187 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -857,11 +857,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
   SDValue WidenVecOp_VECREDUCE(SDNode *N);
 
-  /// Helper function to generate a set of operations to perform
-  /// a vector operation for a wider type.
-  ///
-  SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
-
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 072f15b1f4c3e..95aace3dc9a63 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1318,63 +1318,6 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
   ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
-SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
-  SDValue Chain = N->getOperand(0);
-  EVT VT = N->getValueType(0);
-  unsigned NE = VT.getVectorNumElements();
-  EVT EltVT = VT.getVectorElementType();
-  SDLoc dl(N);
-
-  SmallVector<SDValue, 8> Scalars;
-  SmallVector<SDValue, 4> Operands(N->getNumOperands());
-
-  // If ResNE is 0, fully unroll the vector op.
-  if (ResNE == 0)
-    ResNE = NE;
-  else if (NE > ResNE)
-    NE = ResNE;
-
-  //The results of each unrolled operation, including the chain.
-  EVT ChainVTs[] = {EltVT, MVT::Other};
-  SmallVector<SDValue, 8> Chains;
-
-  unsigned i;
-  for (i = 0; i != NE; ++i) {
-    Operands[0] = Chain;
-    for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
-      SDValue Operand = N->getOperand(j);
-      EVT OperandVT = Operand.getValueType();
-      if (OperandVT.isVector()) {
-        EVT OperandEltVT = OperandVT.getVectorElementType();
-        Operands[j] =
-            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
-                    DAG.getConstant(i, dl, TLI.getVectorIdxTy(
-                          DAG.getDataLayout())));
-      } else {
-        Operands[j] = Operand;
-      }
-    }
-    SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
-    Scalar.getNode()->setFlags(N->getFlags());
-
-    //Add in the scalar as well as its chain value to the
-    //result vectors.
-    Scalars.push_back(Scalar);
-    Chains.push_back(Scalar.getValue(1));
-  }
-
-  for (; i < ResNE; ++i)
-    Scalars.push_back(DAG.getUNDEF(EltVT));
-
-  // Build a new factor node to connect the chain back together.
-  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
-  ReplaceValueWith(SDValue(N, 1), Chain);
-
-  // Create a new BUILD_VECTOR node
-  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
-  return DAG.getBuildVector(VecVT, dl, Scalars);
-}
-
 void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
                                               SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -3025,7 +2968,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
+    return UnrollVectorOp(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
deleted file mode 100644
index fd5895ff9d2e8..0000000000000
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ /dev/null
@@ -1,6445 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 -mtriple=s390x-linux-gnu < %s | FileCheck --check-prefix=S390X %s
-; RUN: llc -O3 -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck --check-prefix=SZ13 %s
-
-define <1 x float> @constrained_vector_fdiv_v1f32() {
-; S390X-LABEL: constrained_vector_fdiv_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI0_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI0_1
-; S390X-NEXT:    deb %f0, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fdiv_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI0_0
-; SZ13-NEXT:    vgmf %v0, 2, 8
-; SZ13-NEXT:    deb %f0, 0(%r1)
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
-           <1 x float> <float 1.000000e+00>,
-           <1 x float> <float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %div
-}
-
-define <2 x double> @constrained_vector_fdiv_v2f64() {
-; S390X-LABEL: constrained_vector_fdiv_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI1_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI1_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI1_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ddbr %f0, %f1
-; S390X-NEXT:    ddbr %f2, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fdiv_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI1_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI1_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
-           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
-           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %div
-}
-
-define <3 x float> @constrained_vector_fdiv_v3f32() {
-; S390X-LABEL: constrained_vector_fdiv_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI2_0
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI2_1
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI2_2
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI2_3
-; S390X-NEXT:    le %f4, 0(%r1)
-; S390X-NEXT:    debr %f0, %f1
-; S390X-NEXT:    debr %f2, %f1
-; S390X-NEXT:    debr %f4, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fdiv_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI2_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI2_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    debr %f1, %f0
-; SZ13-NEXT:    vgmf %v2, 2, 8
-; SZ13-NEXT:    vgmf %v3, 1, 1
-; SZ13-NEXT:    debr %f2, %f0
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    debr %f3, %f0
-; SZ13-NEXT:    vmrhf %v0, %v2, %v3
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
-           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
-           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %div
-}
-
-define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_fdiv_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI3_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI3_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI3_0
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ddb %f1, 16(%r2)
-; S390X-NEXT:    ddb %f0, 8(%r2)
-; S390X-NEXT:    ddb %f2, 0(%r2)
-; S390X-NEXT:    std %f1, 16(%r2)
-; S390X-NEXT:    std %f0, 8(%r2)
-; S390X-NEXT:    std %f2, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fdiv_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI3_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v0, %v1, %v0
-; SZ13-NEXT:    larl %r1, .LCPI3_1
-; SZ13-NEXT:    ldeb %f1, 0(%r1)
-; SZ13-NEXT:    ddb %f1, 16(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
-; SZ13-NEXT:    vst %v0, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
-           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
-           <3 x double> %b,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  store <3 x double> %div, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_fdiv_v4f64() {
-; S390X-LABEL: constrained_vector_fdiv_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI4_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_1
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_3
-; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI4_4
-; S390X-NEXT:    ldeb %f6, 0(%r1)
-; S390X-NEXT:    ddbr %f0, %f1
-; S390X-NEXT:    ddbr %f2, %f1
-; S390X-NEXT:    ddbr %f4, %f1
-; S390X-NEXT:    ddbr %f6, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fdiv_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI4_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI4_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v24, %v1, %v0
-; SZ13-NEXT:    larl %r1, .LCPI4_2
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfddb %v26, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
-           <4 x double> <double 1.000000e+00, double 2.000000e+00,
-                         double 3.000000e+00, double 4.000000e+00>,
-           <4 x double> <double 1.000000e+01, double 1.000000e+01,
-                         double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %div
-}
-
-define <1 x float> @constrained_vector_frem_v1f32() {
-; S390X-LABEL: constrained_vector_frem_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI5_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI5_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmodf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_frem_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI5_0
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v0, 2, 8
-; SZ13-NEXT:    brasl %r14, fmodf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
-           <1 x float> <float 1.000000e+00>,
-           <1 x float> <float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %rem
-}
-
-define <2 x double> @constrained_vector_frem_v2f64() {
-; S390X-LABEL: constrained_vector_frem_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI6_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI6_1
-; S390X-NEXT:    ldeb %f8, 0(%r1)
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI6_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_frem_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -184
-; SZ13-NEXT:    .cfi_def_cfa_offset 344
-; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI6_0
-; SZ13-NEXT:    ldeb %f8, 0(%r1)
-; SZ13-NEXT:    vgmg %v0, 1, 1
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vgmg %v0, 2, 11
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
-           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
-           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %rem
-}
-
-define <3 x float> @constrained_vector_frem_v3f32() {
-; S390X-LABEL: constrained_vector_frem_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI7_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI7_1
-; S390X-NEXT:    le %f8, 0(%r1)
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, fmodf@PLT
-; S390X-NEXT:    larl %r1, .LCPI7_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, fmodf@PLT
-; S390X-NEXT:    larl %r1, .LCPI7_3
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f10, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, fmodf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f9
-; S390X-NEXT:    ler %f2, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_frem_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI7_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI7_1
-; SZ13-NEXT:    lde %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmodf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vgmf %v0, 2, 8
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmodf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vgmf %v0, 1, 1
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmodf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
-           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
-           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %rem
-}
-
-define void @constrained_vector_frem_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_frem_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f2, 0(%r2)
-; S390X-NEXT:    larl %r1, .LCPI8_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI8_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI8_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_frem_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v2, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    vgmg %v0, 2, 11
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v2, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v2, %v0, 1
-; SZ13-NEXT:    vgmg %v0, 1, 1
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    larl %r1, .LCPI8_0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
-           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
-           <3 x double> %b,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  store <3 x double> %rem, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_frem_v4f64() {
-; S390X-LABEL: constrained_vector_frem_v4f64:
-; S390X:       # %bb.0:
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -192
-; S390X-NEXT:    .cfi_def_cfa_offset 352
-; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    .cfi_offset %f11, -192
-; S390X-NEXT:    larl %r1, .LCPI9_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI9_1
-; S390X-NEXT:    ldeb %f8, 0(%r1)
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI9_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI9_3
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    larl %r1, .LCPI9_4
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f11, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, fmod@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    ldr %f2, %f10
-; S390X-NEXT:    ldr %f4, %f11
-; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_frem_v4f64:
-; SZ13:       # %bb.0:
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI9_0
-; SZ13-NEXT:    ldeb %f8, 0(%r1)
-; SZ13-NEXT:    vgmg %v0, 1, 1
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vgmg %v0, 2, 11
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI9_1
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    larl %r1, .LCPI9_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmod@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
-           <4 x double> <double 1.000000e+00, double 2.000000e+00,
-                         double 3.000000e+00, double 4.000000e+00>,
-           <4 x double> <double 1.000000e+01, double 1.000000e+01,
-                         double 1.000000e+01, double 1.000000e+01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %rem
-}
-
-define <1 x float> @constrained_vector_fmul_v1f32() {
-; S390X-LABEL: constrained_vector_fmul_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI10_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI10_1
-; S390X-NEXT:    meeb %f0, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fmul_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 1, 1
-; SZ13-NEXT:    vgmf %v1, 1, 8
-; SZ13-NEXT:    meebr %f1, %f0
-; SZ13-NEXT:    vlr %v24, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 2.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %mul
-}
-
-define <2 x double> @constrained_vector_fmul_v2f64() {
-; S390X-LABEL: constrained_vector_fmul_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI11_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI11_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI11_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    mdbr %f0, %f1
-; S390X-NEXT:    mdbr %f2, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fmul_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI11_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI11_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfmdb %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
-           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %mul
-}
-
-define <3 x float> @constrained_vector_fmul_v3f32() {
-; S390X-LABEL: constrained_vector_fmul_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI12_0
-; S390X-NEXT:    le %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI12_1
-; S390X-NEXT:    ler %f0, %f4
-; S390X-NEXT:    meeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI12_2
-; S390X-NEXT:    ler %f2, %f4
-; S390X-NEXT:    meeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI12_3
-; S390X-NEXT:    meeb %f4, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fmul_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 1, 8
-; SZ13-NEXT:    larl %r1, .LCPI12_0
-; SZ13-NEXT:    vgmf %v2, 2, 8
-; SZ13-NEXT:    vgmf %v1, 1, 8
-; SZ13-NEXT:    meeb %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI12_1
-; SZ13-NEXT:    meebr %f2, %f0
-; SZ13-NEXT:    meeb %f0, 0(%r1)
-; SZ13-NEXT:    vmrhf %v0, %v2, %v0
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
-           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
-                        float 0x7FF0000000000000>,
-           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %mul
-}
-
-define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_fmul_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI13_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    mdb %f0, 16(%r2)
-; S390X-NEXT:    mdb %f2, 8(%r2)
-; S390X-NEXT:    mdb %f1, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fmul_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI13_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI13_1
-; SZ13-NEXT:    vfmdb %v0, %v1, %v0
-; SZ13-NEXT:    ld %f1, 0(%r1)
-; SZ13-NEXT:    mdb %f1, 16(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
-; SZ13-NEXT:    vst %v0, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
-           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                        double 0x7FEFFFFFFFFFFFFF>,
-           <3 x double> %b,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  store <3 x double> %mul, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_fmul_v4f64() {
-; S390X-LABEL: constrained_vector_fmul_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI14_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI14_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI14_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI14_3
-; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI14_4
-; S390X-NEXT:    ldeb %f6, 0(%r1)
-; S390X-NEXT:    mdbr %f0, %f1
-; S390X-NEXT:    mdbr %f2, %f1
-; S390X-NEXT:    mdbr %f4, %f1
-; S390X-NEXT:    mdbr %f6, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fmul_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI14_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI14_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI14_2
-; SZ13-NEXT:    vfmdb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfmdb %v26, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
-           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <4 x double> <double 2.000000e+00, double 3.000000e+00,
-                         double 4.000000e+00, double 5.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %mul
-}
-
-define <1 x float> @constrained_vector_fadd_v1f32() {
-; S390X-LABEL: constrained_vector_fadd_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI15_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI15_1
-; S390X-NEXT:    aeb %f0, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fadd_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 8
-; SZ13-NEXT:    vgmf %v1, 1, 8
-; SZ13-NEXT:    aebr %f1, %f0
-; SZ13-NEXT:    vlr %v24, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 1.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %add
-}
-
-define <2 x double> @constrained_vector_fadd_v2f64() {
-; S390X-LABEL: constrained_vector_fadd_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI16_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI16_1
-; S390X-NEXT:    ld %f2, 0(%r1)
-; S390X-NEXT:    adbr %f0, %f2
-; S390X-NEXT:    larl %r1, .LCPI16_2
-; S390X-NEXT:    adb %f2, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fadd_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI16_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI16_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    vfadb %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
-           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %add
-}
-
-define <3 x float> @constrained_vector_fadd_v3f32() {
-; S390X-LABEL: constrained_vector_fadd_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI17_0
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI17_1
-; S390X-NEXT:    ler %f2, %f1
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    aeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI17_2
-; S390X-NEXT:    aeb %f2, 0(%r1)
-; S390X-NEXT:    lzer %f4
-; S390X-NEXT:    aebr %f4, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fadd_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgbm %v0, 15
-; SZ13-NEXT:    vgmf %v2, 1, 1
-; SZ13-NEXT:    vgmf %v3, 2, 8
-; SZ13-NEXT:    lzer %f1
-; SZ13-NEXT:    aebr %f1, %f0
-; SZ13-NEXT:    aebr %f2, %f0
-; SZ13-NEXT:    aebr %f3, %f0
-; SZ13-NEXT:    vmrhf %v0, %v2, %v3
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
-           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
-                        float 0xFFFFFFFFE0000000>,
-           <3 x float> <float 2.0, float 1.0, float 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %add
-}
-
-define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_fadd_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI18_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    adb %f0, 16(%r2)
-; S390X-NEXT:    adb %f2, 8(%r2)
-; S390X-NEXT:    adb %f1, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fadd_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI18_0
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI18_1
-; SZ13-NEXT:    vfadb %v0, %v1, %v0
-; SZ13-NEXT:    ld %f1, 0(%r1)
-; SZ13-NEXT:    adb %f1, 16(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
-; SZ13-NEXT:    vst %v0, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
-           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF>,
-           <3 x double> %b,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  store <3 x double> %add, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_fadd_v4f64() {
-; S390X-LABEL: constrained_vector_fadd_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI19_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI19_1
-; S390X-NEXT:    ld %f6, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI19_3
-; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    adbr %f0, %f6
-; S390X-NEXT:    larl %r1, .LCPI19_2
-; S390X-NEXT:    ldr %f2, %f6
-; S390X-NEXT:    adb %f2, 0(%r1)
-; S390X-NEXT:    adbr %f4, %f6
-; S390X-NEXT:    larl %r1, .LCPI19_4
-; S390X-NEXT:    adb %f6, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fadd_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI19_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI19_1
-; SZ13-NEXT:    vl %v1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI19_2
-; SZ13-NEXT:    vfadb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfadb %v26, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
-           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
-                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
-           <4 x double> <double 1.000000e+00, double 1.000000e-01,
-                         double 2.000000e+00, double 2.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %add
-}
-
-define <1 x float> @constrained_vector_fsub_v1f32() {
-; S390X-LABEL: constrained_vector_fsub_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI20_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI20_1
-; S390X-NEXT:    seb %f0, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fsub_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 8
-; SZ13-NEXT:    vgmf %v1, 1, 8
-; SZ13-NEXT:    sebr %f1, %f0
-; SZ13-NEXT:    vlr %v24, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
-           <1 x float> <float 0x7FF0000000000000>,
-           <1 x float> <float 1.000000e+00>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <1 x float> %sub
-}
-
-define <2 x double> @constrained_vector_fsub_v2f64() {
-; S390X-LABEL: constrained_vector_fsub_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI21_1
-; S390X-NEXT:    ld %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI21_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f0, %f2
-; S390X-NEXT:    larl %r1, .LCPI21_2
-; S390X-NEXT:    sdb %f2, 0(%r1)
-; S390X-NEXT:    sdbr %f0, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fsub_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI21_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vgmg %v1, 12, 10
-; SZ13-NEXT:    vfsdb %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
-           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
-           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <2 x double> %sub
-}
-
-define <3 x float> @constrained_vector_fsub_v3f32() {
-; S390X-LABEL: constrained_vector_fsub_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI22_0
-; S390X-NEXT:    le %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI22_1
-; S390X-NEXT:    ler %f0, %f4
-; S390X-NEXT:    seb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI22_2
-; S390X-NEXT:    ler %f2, %f4
-; S390X-NEXT:    seb %f2, 0(%r1)
-; S390X-NEXT:    lzer %f1
-; S390X-NEXT:    sebr %f4, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fsub_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgbm %v2, 15
-; SZ13-NEXT:    lzer %f1
-; SZ13-NEXT:    sebr %f2, %f1
-; SZ13-NEXT:    vgmf %v1, 1, 1
-; SZ13-NEXT:    vgbm %v3, 15
-; SZ13-NEXT:    vgbm %v0, 15
-; SZ13-NEXT:    sebr %f3, %f1
-; SZ13-NEXT:    vgmf %v1, 2, 8
-; SZ13-NEXT:    sebr %f0, %f1
-; SZ13-NEXT:    vmrhf %v0, %v3, %v0
-; SZ13-NEXT:    vrepf %v1, %v2, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    br %r14
-entry:
-  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
-           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
-                        float 0xFFFFFFFFE0000000>,
-           <3 x float> <float 2.0, float 1.0, float 0.0>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <3 x float> %sub
-}
-
-define void @constrained_vector_fsub_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_fsub_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI23_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ldr %f1, %f0
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    sdb %f0, 16(%r2)
-; S390X-NEXT:    sdb %f2, 8(%r2)
-; S390X-NEXT:    sdb %f1, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f2, 8(%r2)
-; S390X-NEXT:    std %f1, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fsub_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vgmg %v1, 12, 10
-; SZ13-NEXT:    vfsdb %v0, %v1, %v0
-; SZ13-NEXT:    sdb %f1, 16(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
-; SZ13-NEXT:    vst %v0, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
-           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
-                         double 0xFFEFFFFFFFFFFFFF>,
-           <3 x double> %b,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  store <3 x double> %sub, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_fsub_v4f64() {
-; S390X-LABEL: constrained_vector_fsub_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI24_1
-; S390X-NEXT:    ld %f6, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI24_0
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f0, %f6
-; S390X-NEXT:    larl %r1, .LCPI24_2
-; S390X-NEXT:    ldr %f2, %f6
-; S390X-NEXT:    sdb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI24_3
-; S390X-NEXT:    ldeb %f3, 0(%r1)
-; S390X-NEXT:    ldr %f4, %f6
-; S390X-NEXT:    larl %r1, .LCPI24_4
-; S390X-NEXT:    sdb %f6, 0(%r1)
-; S390X-NEXT:    sdbr %f0, %f1
-; S390X-NEXT:    sdbr %f4, %f3
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fsub_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI24_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vgmg %v1, 12, 10
-; SZ13-NEXT:    larl %r1, .LCPI24_1
-; SZ13-NEXT:    vfsdb %v24, %v1, %v0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsdb %v26, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
-           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
-                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
-           <4 x double> <double 1.000000e+00, double 1.000000e-01,
-                         double 2.000000e+00, double 2.000000e-01>,
-           metadata !"round.dynamic",
-           metadata !"fpexcept.strict")
-  ret <4 x double> %sub
-}
-
-define <1 x float> @constrained_vector_sqrt_v1f32() {
-; S390X-LABEL: constrained_vector_sqrt_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI25_0
-; S390X-NEXT:    sqeb %f0, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sqrt_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI25_0
-; SZ13-NEXT:    sqeb %f0, 0(%r1)
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
-                              <1 x float> <float 42.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <1 x float> %sqrt
-}
-
-define <2 x double> @constrained_vector_sqrt_v2f64() {
-; S390X-LABEL: constrained_vector_sqrt_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI26_0
-; S390X-NEXT:    larl %r2, .LCPI26_1
-; S390X-NEXT:    ldeb %f0, 0(%r2)
-; S390X-NEXT:    sqdb %f2, 0(%r1)
-; S390X-NEXT:    sqdbr %f0, %f0
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sqrt_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI26_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsqdb %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
-                              <2 x double> <double 42.0, double 42.1>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %sqrt
-}
-
-define <3 x float> @constrained_vector_sqrt_v3f32() {
-; S390X-LABEL: constrained_vector_sqrt_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI27_0
-; S390X-NEXT:    sqeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI27_1
-; S390X-NEXT:    sqeb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI27_2
-; S390X-NEXT:    sqeb %f4, 0(%r1)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sqrt_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI27_0
-; SZ13-NEXT:    sqeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI27_1
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    sqeb %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI27_2
-; SZ13-NEXT:    sqeb %f2, 0(%r1)
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %sqrt
-}
-
-define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_sqrt_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    sqdb %f0, 16(%r2)
-; S390X-NEXT:    sqdb %f1, 8(%r2)
-; S390X-NEXT:    sqdb %f2, 0(%r2)
-; S390X-NEXT:    std %f0, 16(%r2)
-; S390X-NEXT:    std %f1, 8(%r2)
-; S390X-NEXT:    std %f2, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sqrt_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    sqdb %f1, 16(%r2)
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    std %f1, 16(%r2)
-; SZ13-NEXT:    vfsqdb %v0, %v0
-; SZ13-NEXT:    vst %v0, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %sqrt, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_sqrt_v4f64() {
-; S390X-LABEL: constrained_vector_sqrt_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI29_0
-; S390X-NEXT:    sqdb %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI29_1
-; S390X-NEXT:    sqdb %f4, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI29_3
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI29_2
-; S390X-NEXT:    sqdb %f6, 0(%r1)
-; S390X-NEXT:    sqdbr %f0, %f0
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sqrt_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI29_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsqdb %v24, %v0
-; SZ13-NEXT:    larl %r1, .LCPI29_1
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfsqdb %v26, %v0
-; SZ13-NEXT:    br %r14
- entry:
-  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
-                              <4 x double> <double 42.0, double 42.1,
-                                            double 42.2, double 42.3>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %sqrt
-}
-
-define <1 x float> @constrained_vector_pow_v1f32() {
-; S390X-LABEL: constrained_vector_pow_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI30_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI30_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, powf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_pow_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI30_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI30_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, powf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
-                             <1 x float> <float 42.0>,
-                             <1 x float> <float 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %pow
-}
-
-define <2 x double> @constrained_vector_pow_v2f64() {
-; S390X-LABEL: constrained_vector_pow_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI31_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI31_1
-; S390X-NEXT:    ldeb %f8, 0(%r1)
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    larl %r1, .LCPI31_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_pow_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -184
-; SZ13-NEXT:    .cfi_def_cfa_offset 344
-; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI31_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI31_1
-; SZ13-NEXT:    ldeb %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    larl %r1, .LCPI31_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
-                             <2 x double> <double 42.1, double 42.2>,
-                             <2 x double> <double 3.0, double 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %pow
-}
-
-define <3 x float> @constrained_vector_pow_v3f32() {
-; S390X-LABEL: constrained_vector_pow_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI32_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI32_1
-; S390X-NEXT:    le %f8, 0(%r1)
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, powf@PLT
-; S390X-NEXT:    larl %r1, .LCPI32_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, powf@PLT
-; S390X-NEXT:    larl %r1, .LCPI32_3
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f10, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, powf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f9
-; S390X-NEXT:    ler %f2, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_pow_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI32_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI32_1
-; SZ13-NEXT:    lde %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, powf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI32_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, powf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI32_3
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, powf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
-                             <3 x float> <float 42.0, float 43.0, float 44.0>,
-                             <3 x float> <float 3.0, float 3.0, float 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <3 x float> %pow
-}
-
-define void @constrained_vector_pow_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_pow_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -192
-; S390X-NEXT:    .cfi_def_cfa_offset 352
-; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    .cfi_offset %f11, -192
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    larl %r1, .LCPI33_0
-; S390X-NEXT:    ldeb %f9, 0(%r1)
-; S390X-NEXT:    ld %f10, 8(%r2)
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    ldr %f11, %f0
-; S390X-NEXT:    ldr %f0, %f10
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f10, 8(%r13)
-; S390X-NEXT:    std %f11, 0(%r13)
-; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_pow_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -208
-; SZ13-NEXT:    .cfi_def_cfa_offset 368
-; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    .cfi_offset %f9, -176
-; SZ13-NEXT:    larl %r1, .LCPI33_0
-; SZ13-NEXT:    ldeb %f9, 0(%r1)
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
-                          <3 x double> %b,
-                          <3 x double> <double 3.0, double 3.0, double 3.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %pow, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_pow_v4f64() {
-; S390X-LABEL: constrained_vector_pow_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -192
-; S390X-NEXT:    .cfi_def_cfa_offset 352
-; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    .cfi_offset %f11, -192
-; S390X-NEXT:    larl %r1, .LCPI34_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI34_1
-; S390X-NEXT:    ldeb %f8, 0(%r1)
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    larl %r1, .LCPI34_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    larl %r1, .LCPI34_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    larl %r1, .LCPI34_4
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f11, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    brasl %r14, pow@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    ldr %f2, %f10
-; S390X-NEXT:    ldr %f4, %f11
-; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_pow_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI34_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI34_1
-; SZ13-NEXT:    ldeb %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    larl %r1, .LCPI34_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI34_3
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    larl %r1, .LCPI34_4
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, pow@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
-                             <4 x double> <double 42.1, double 42.2,
-                                           double 42.3, double 42.4>,
-                             <4 x double> <double 3.0, double 3.0,
-                                           double 3.0, double 3.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %pow
-}
-
-define <1 x float> @constrained_vector_powi_v1f32() {
-; S390X-LABEL: constrained_vector_powi_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI35_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    brasl %r14, __powisf2@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_powi_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI35_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powisf2@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
-                              <1 x float> <float 42.0>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <1 x float> %powi
-}
-
-define <2 x double> @constrained_vector_powi_v2f64() {
-; S390X-LABEL: constrained_vector_powi_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI36_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI36_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_powi_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI36_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI36_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
-                              <2 x double> <double 42.1, double 42.2>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %powi
-}
-
-define <3 x float> @constrained_vector_powi_v3f32() {
-; S390X-LABEL: constrained_vector_powi_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI37_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    brasl %r14, __powisf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI37_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, __powisf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI37_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, __powisf2@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_powi_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI37_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powisf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI37_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powisf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI37_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powisf2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %powi
-}
-
-define void @constrained_vector_powi_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_powi_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI38_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI38_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI38_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    std %f0, 8(%r13)
-; S390X-NEXT:    std %f9, 0(%r13)
-; S390X-NEXT:    std %f8, 16(%r13)
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_powi_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI38_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI38_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI38_2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 280(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
-                          <3 x double> <double 42.0, double 42.1, double 42.2>,
-                          i32 3,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %powi, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_powi_v4f64() {
-; S390X-LABEL: constrained_vector_powi_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI39_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI39_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI39_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    larl %r1, .LCPI39_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    lghi %r2, 3
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, __powidf2@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_powi_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI39_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI39_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI39_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI39_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    lghi %r2, 3
-; SZ13-NEXT:    brasl %r14, __powidf2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
-                              <4 x double> <double 42.1, double 42.2,
-                                            double 42.3, double 42.4>,
-                              i32 3,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %powi
-}
-
-define <1 x float> @constrained_vector_sin_v1f32() {
-; S390X-LABEL: constrained_vector_sin_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI40_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, sinf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sin_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI40_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sinf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %sin
-}
-
-define <2 x double> @constrained_vector_sin_v2f64() {
-; S390X-LABEL: constrained_vector_sin_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI41_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    larl %r1, .LCPI41_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sin_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI41_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI41_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %sin
-}
-
-define <3 x float> @constrained_vector_sin_v3f32() {
-; S390X-LABEL: constrained_vector_sin_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI42_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, sinf@PLT
-; S390X-NEXT:    larl %r1, .LCPI42_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, sinf@PLT
-; S390X-NEXT:    larl %r1, .LCPI42_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, sinf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sin_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI42_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sinf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI42_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sinf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI42_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sinf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %sin
-}
-
-define void @constrained_vector_sin_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_sin_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sin_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %sin, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_sin_v4f64() {
-; S390X-LABEL: constrained_vector_sin_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI44_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    larl %r1, .LCPI44_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    larl %r1, .LCPI44_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    larl %r1, .LCPI44_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, sin@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_sin_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI44_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI44_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI44_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI44_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, sin@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %sin
-}
-
-define <1 x float> @constrained_vector_cos_v1f32() {
-; S390X-LABEL: constrained_vector_cos_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI45_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, cosf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_cos_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI45_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cosf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %cos
-}
-
-define <2 x double> @constrained_vector_cos_v2f64() {
-; S390X-LABEL: constrained_vector_cos_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI46_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    larl %r1, .LCPI46_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_cos_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI46_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    larl %r1, .LCPI46_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %cos
-}
-
-define <3 x float> @constrained_vector_cos_v3f32() {
-; S390X-LABEL: constrained_vector_cos_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI47_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, cosf@PLT
-; S390X-NEXT:    larl %r1, .LCPI47_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, cosf@PLT
-; S390X-NEXT:    larl %r1, .LCPI47_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, cosf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_cos_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI47_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cosf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI47_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cosf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI47_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cosf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %cos
-}
-
-define void @constrained_vector_cos_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_cos_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_cos_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %cos, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_cos_v4f64() {
-; S390X-LABEL: constrained_vector_cos_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI49_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    larl %r1, .LCPI49_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    larl %r1, .LCPI49_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    larl %r1, .LCPI49_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, cos@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_cos_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI49_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    larl %r1, .LCPI49_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI49_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    larl %r1, .LCPI49_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, cos@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %cos
-}
-
-define <1 x float> @constrained_vector_exp_v1f32() {
-; S390X-LABEL: constrained_vector_exp_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI50_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, expf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI50_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, expf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %exp
-}
-
-define <2 x double> @constrained_vector_exp_v2f64() {
-; S390X-LABEL: constrained_vector_exp_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI51_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    larl %r1, .LCPI51_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI51_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    larl %r1, .LCPI51_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %exp
-}
-
-define <3 x float> @constrained_vector_exp_v3f32() {
-; S390X-LABEL: constrained_vector_exp_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI52_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, expf@PLT
-; S390X-NEXT:    larl %r1, .LCPI52_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, expf@PLT
-; S390X-NEXT:    larl %r1, .LCPI52_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, expf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI52_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, expf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI52_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, expf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI52_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, expf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %exp
-}
-
-define void @constrained_vector_exp_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_exp_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %exp, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_exp_v4f64() {
-; S390X-LABEL: constrained_vector_exp_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI54_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    larl %r1, .LCPI54_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    larl %r1, .LCPI54_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    larl %r1, .LCPI54_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI54_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    larl %r1, .LCPI54_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI54_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    larl %r1, .LCPI54_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %exp
-}
-
-define <1 x float> @constrained_vector_exp2_v1f32() {
-; S390X-LABEL: constrained_vector_exp2_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI55_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp2f@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp2_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI55_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2f@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %exp2
-}
-
-define <2 x double> @constrained_vector_exp2_v2f64() {
-; S390X-LABEL: constrained_vector_exp2_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI56_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    larl %r1, .LCPI56_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp2_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI56_0
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI56_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
-                              <2 x double> <double 42.1, double 42.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %exp2
-}
-
-define <3 x float> @constrained_vector_exp2_v3f32() {
-; S390X-LABEL: constrained_vector_exp2_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI57_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp2f@PLT
-; S390X-NEXT:    larl %r1, .LCPI57_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2f@PLT
-; S390X-NEXT:    larl %r1, .LCPI57_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2f@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp2_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI57_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI57_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI57_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2f@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %exp2
-}
-
-define void @constrained_vector_exp2_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_exp2_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp2_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %exp2, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_exp2_v4f64() {
-; S390X-LABEL: constrained_vector_exp2_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI59_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    larl %r1, .LCPI59_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    larl %r1, .LCPI59_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    larl %r1, .LCPI59_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, exp2@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_exp2_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI59_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI59_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI59_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI59_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, exp2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
-                              <4 x double> <double 42.1, double 42.2,
-                                            double 42.3, double 42.4>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %exp2
-}
-
-define <1 x float> @constrained_vector_log_v1f32() {
-; S390X-LABEL: constrained_vector_log_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI60_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, logf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI60_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, logf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log
-}
-
-define <2 x double> @constrained_vector_log_v2f64() {
-; S390X-LABEL: constrained_vector_log_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI61_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    larl %r1, .LCPI61_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI61_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    larl %r1, .LCPI61_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
-                             <2 x double> <double 42.0, double 42.1>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <2 x double> %log
-}
-
-define <3 x float> @constrained_vector_log_v3f32() {
-; S390X-LABEL: constrained_vector_log_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI62_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, logf@PLT
-; S390X-NEXT:    larl %r1, .LCPI62_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, logf@PLT
-; S390X-NEXT:    larl %r1, .LCPI62_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, logf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI62_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, logf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI62_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, logf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI62_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, logf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log
-}
-
-define void @constrained_vector_log_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_log_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %log, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_log_v4f64() {
-; S390X-LABEL: constrained_vector_log_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI64_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    larl %r1, .LCPI64_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    larl %r1, .LCPI64_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    larl %r1, .LCPI64_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI64_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    larl %r1, .LCPI64_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI64_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    larl %r1, .LCPI64_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
-                             <4 x double> <double 42.0, double 42.1,
-                                           double 42.2, double 42.3>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <4 x double> %log
-}
-
-define <1 x float> @constrained_vector_log10_v1f32() {
-; S390X-LABEL: constrained_vector_log10_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI65_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log10f@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI65_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10f@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log10
-}
-
-define <2 x double> @constrained_vector_log10_v2f64() {
-; S390X-LABEL: constrained_vector_log10_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI66_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    larl %r1, .LCPI66_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI66_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    larl %r1, .LCPI66_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
-                               <2 x double> <double 42.0, double 42.1>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <2 x double> %log10
-}
-
-define <3 x float> @constrained_vector_log10_v3f32() {
-; S390X-LABEL: constrained_vector_log10_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI67_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log10f@PLT
-; S390X-NEXT:    larl %r1, .LCPI67_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, log10f@PLT
-; S390X-NEXT:    larl %r1, .LCPI67_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, log10f@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI67_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI67_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI67_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10f@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log10
-}
-
-define void @constrained_vector_log10_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_log10_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %log10, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_log10_v4f64() {
-; S390X-LABEL: constrained_vector_log10_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI69_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    larl %r1, .LCPI69_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    larl %r1, .LCPI69_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    larl %r1, .LCPI69_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log10@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI69_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    larl %r1, .LCPI69_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI69_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    larl %r1, .LCPI69_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log10@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
-                               <4 x double> <double 42.0, double 42.1,
-                                             double 42.2, double 42.3>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <4 x double> %log10
-}
-
-define <1 x float> @constrained_vector_log2_v1f32() {
-; S390X-LABEL: constrained_vector_log2_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI70_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log2f@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log2_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI70_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2f@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %log2
-}
-
-define <2 x double> @constrained_vector_log2_v2f64() {
-; S390X-LABEL: constrained_vector_log2_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI71_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    larl %r1, .LCPI71_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log2_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI71_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI71_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
-                              <2 x double> <double 42.0, double 42.1>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <2 x double> %log2
-}
-
-define <3 x float> @constrained_vector_log2_v3f32() {
-; S390X-LABEL: constrained_vector_log2_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI72_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log2f@PLT
-; S390X-NEXT:    larl %r1, .LCPI72_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, log2f@PLT
-; S390X-NEXT:    larl %r1, .LCPI72_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, log2f@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log2_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI72_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI72_1
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2f@PLT
-; SZ13-NEXT:    larl %r1, .LCPI72_2
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2f@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %log2
-}
-
-define void @constrained_vector_log2_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_log2_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log2_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %log2, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_log2_v4f64() {
-; S390X-LABEL: constrained_vector_log2_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI74_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    larl %r1, .LCPI74_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    larl %r1, .LCPI74_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    larl %r1, .LCPI74_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, log2@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log2_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI74_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI74_1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI74_2
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    larl %r1, .LCPI74_3
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    brasl %r14, log2@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
-                              <4 x double> <double 42.0, double 42.1,
-                                            double 42.2, double 42.3>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <4 x double> %log2
-}
-
-define <1 x float> @constrained_vector_rint_v1f32() {
-; S390X-LABEL: constrained_vector_rint_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI75_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    fiebr %f0, 0, %f0
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_rint_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI75_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    fiebr %f0, 0, %f0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
-                             <1 x float> <float 42.0>,
-                             metadata !"round.dynamic",
-                             metadata !"fpexcept.strict")
-  ret <1 x float> %rint
-}
-
-define <2 x double> @constrained_vector_rint_v2f64() {
-; S390X-LABEL: constrained_vector_rint_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI76_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI76_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    fidbr %f0, 0, %f0
-; S390X-NEXT:    fidbr %f2, 0, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_rint_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI76_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
-; SZ13-NEXT:    br %r14
-entry:
-  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
-                        <2 x double> <double 42.1, double 42.0>,
-                        metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
-  ret <2 x double> %rint
-}
-
-define <3 x float> @constrained_vector_rint_v3f32() {
-; S390X-LABEL: constrained_vector_rint_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI77_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI77_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI77_2
-; S390X-NEXT:    le %f3, 0(%r1)
-; S390X-NEXT:    fiebr %f0, 0, %f0
-; S390X-NEXT:    fiebr %f2, 0, %f1
-; S390X-NEXT:    fiebr %f4, 0, %f3
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_rint_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI77_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI77_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI77_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    fiebr %f0, 0, %f0
-; SZ13-NEXT:    fiebr %f1, 0, %f1
-; SZ13-NEXT:    fiebr %f2, 0, %f2
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
- entry:
-  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %rint
-}
-
-define void @constrained_vector_rint_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_rint_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f1, 8(%r2)
-; S390X-NEXT:    ld %f2, 16(%r2)
-; S390X-NEXT:    fidbr %f0, 0, %f0
-; S390X-NEXT:    fidbr %f1, 0, %f1
-; S390X-NEXT:    fidbr %f2, 0, %f2
-; S390X-NEXT:    std %f2, 16(%r2)
-; S390X-NEXT:    std %f1, 8(%r2)
-; S390X-NEXT:    std %f0, 0(%r2)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_rint_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vl %v1, 0(%r2)
-; SZ13-NEXT:    ld %f0, 16(%r2)
-; SZ13-NEXT:    vfidb %v1, %v1, 0, 0
-; SZ13-NEXT:    fidbra %f0, 0, %f0, 0
-; SZ13-NEXT:    std %f0, 16(%r2)
-; SZ13-NEXT:    vst %v1, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %rint, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_rint_v4f64() {
-; S390X-LABEL: constrained_vector_rint_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI79_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_2
-; S390X-NEXT:    ld %f3, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_3
-; S390X-NEXT:    ld %f5, 0(%r1)
-; S390X-NEXT:    fidbr %f0, 0, %f0
-; S390X-NEXT:    fidbr %f2, 0, %f1
-; S390X-NEXT:    fidbr %f4, 0, %f3
-; S390X-NEXT:    fidbr %f6, 0, %f5
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_rint_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI79_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI79_1
-; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v26, %v0, 0, 0
-; SZ13-NEXT:    br %r14
-entry:
-  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
-                        <4 x double> <double 42.1, double 42.2,
-                                      double 42.3, double 42.4>,
-                        metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
-  ret <4 x double> %rint
-}
-
-define <1 x float> @constrained_vector_nearbyint_v1f32() {
-; S390X-LABEL: constrained_vector_nearbyint_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI80_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, nearbyintf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_nearbyint_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI80_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
-                               <1 x float> <float 42.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %nearby
-}
-
-define <2 x double> @constrained_vector_nearbyint_v2f64() {
-; S390X-LABEL: constrained_vector_nearbyint_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI81_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    larl %r1, .LCPI81_1
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_nearbyint_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI81_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
-; SZ13-NEXT:    br %r14
-entry:
-  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
-                                <2 x double> <double 42.1, double 42.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %nearby
-}
-
-define <3 x float> @constrained_vector_nearbyint_v3f32() {
-; S390X-LABEL: constrained_vector_nearbyint_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI82_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, nearbyintf@PLT
-; S390X-NEXT:    larl %r1, .LCPI82_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyintf@PLT
-; S390X-NEXT:    larl %r1, .LCPI82_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyintf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_nearbyint_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI82_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI82_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI82_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
-; SZ13-NEXT:    fiebra %f1, 0, %f1, 4
-; SZ13-NEXT:    fiebra %f2, 0, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %nearby
-}
-
-define void @constrained_vector_nearbyint_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_nearbyint_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_nearbyint_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vl %v1, 0(%r2)
-; SZ13-NEXT:    ld %f0, 16(%r2)
-; SZ13-NEXT:    vfidb %v1, %v1, 4, 0
-; SZ13-NEXT:    fidbra %f0, 0, %f0, 4
-; SZ13-NEXT:    std %f0, 16(%r2)
-; SZ13-NEXT:    vst %v1, 0(%r2)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
-                          <3 x double> %b,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %nearby, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_nearbyint_v4f64() {
-; S390X-LABEL: constrained_vector_nearbyint_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI84_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    larl %r1, .LCPI84_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    larl %r1, .LCPI84_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    larl %r1, .LCPI84_3
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, nearbyint@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_nearbyint_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI84_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI84_1
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v26, %v0, 4, 0
-; SZ13-NEXT:    br %r14
-entry:
-  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
-                                <4 x double> <double 42.1, double 42.2,
-                                              double 42.3, double 42.4>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %nearby
-}
-
-define <1 x float> @constrained_vector_maxnum_v1f32() {
-; S390X-LABEL: constrained_vector_maxnum_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI85_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI85_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmaxf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_maxnum_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI85_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI85_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmaxf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
-                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %max
-}
-
-define <2 x double> @constrained_vector_maxnum_v2f64() {
-; S390X-LABEL: constrained_vector_maxnum_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI86_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI86_1
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI86_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI86_3
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_maxnum_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI86_0
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI86_1
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    larl %r1, .LCPI86_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI86_3
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
-                                <2 x double> <double 43.0, double 42.0>,
-                                <2 x double> <double 41.0, double 40.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %max
-}
-
-define <3 x float> @constrained_vector_maxnum_v3f32() {
-; S390X-LABEL: constrained_vector_maxnum_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI87_0
-; S390X-NEXT:    le %f8, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI87_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    brasl %r14, fmaxf@PLT
-; S390X-NEXT:    larl %r1, .LCPI87_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI87_3
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, fmaxf@PLT
-; S390X-NEXT:    larl %r1, .LCPI87_4
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f10, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, fmaxf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f9
-; S390X-NEXT:    ler %f2, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_maxnum_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI87_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI87_1
-; SZ13-NEXT:    lde %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fmaxf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI87_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, fmaxf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI87_3
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI87_4
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmaxf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
-                              <3 x float> <float 43.0, float 44.0, float 45.0>,
-                              <3 x float> <float 41.0, float 42.0, float 43.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %max
-}
-
-define void @constrained_vector_log10_maxnum_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_log10_maxnum_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    larl %r1, .LCPI88_0
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ld %f9, 8(%r2)
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI88_1
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f9
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI88_2
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f9, 8(%r13)
-; S390X-NEXT:    std %f10, 0(%r13)
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_log10_maxnum_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI88_0
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    larl %r1, .LCPI88_1
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    larl %r1, .LCPI88_2
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %a
-  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
-                          <3 x double> %b,
-                          <3 x double> <double 40.0, double 41.0, double 42.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %max, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_maxnum_v4f64() {
-; S390X-LABEL: constrained_vector_maxnum_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI89_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI89_1
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI89_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI89_3
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI89_4
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI89_5
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    larl %r1, .LCPI89_6
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI89_7
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmax@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_maxnum_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI89_0
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI89_1
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    larl %r1, .LCPI89_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI89_3
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI89_4
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI89_5
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    larl %r1, .LCPI89_6
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI89_7
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmax@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
-                                <4 x double> <double 44.0, double 45.0,
-                                              double 46.0, double 47.0>,
-                                <4 x double> <double 40.0, double 41.0,
-                                              double 42.0, double 43.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %max
-}
-
-define <1 x float> @constrained_vector_minnum_v1f32() {
-; S390X-LABEL: constrained_vector_minnum_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI90_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI90_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fminf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_minnum_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -160
-; SZ13-NEXT:    .cfi_def_cfa_offset 320
-; SZ13-NEXT:    larl %r1, .LCPI90_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI90_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fminf@PLT
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
-; SZ13-NEXT:    br %r14
- entry:
-  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
-                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %min
-}
-
-define <2 x double> @constrained_vector_minnum_v2f64() {
-; S390X-LABEL: constrained_vector_minnum_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI91_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI91_1
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    larl %r1, .LCPI91_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI91_3
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_minnum_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -176
-; SZ13-NEXT:    .cfi_def_cfa_offset 336
-; SZ13-NEXT:    larl %r1, .LCPI91_0
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI91_1
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI91_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI91_3
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
-                                <2 x double> <double 43.0, double 42.0>,
-                                <2 x double> <double 41.0, double 40.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %min
-}
-
-define <3 x float> @constrained_vector_minnum_v3f32() {
-; S390X-LABEL: constrained_vector_minnum_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI92_0
-; S390X-NEXT:    le %f8, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI92_1
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    brasl %r14, fminf@PLT
-; S390X-NEXT:    larl %r1, .LCPI92_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI92_3
-; S390X-NEXT:    le %f2, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, fminf@PLT
-; S390X-NEXT:    larl %r1, .LCPI92_4
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f10, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    ler %f2, %f8
-; S390X-NEXT:    brasl %r14, fminf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f9
-; S390X-NEXT:    ler %f2, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_minnum_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -200
-; SZ13-NEXT:    .cfi_def_cfa_offset 360
-; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    larl %r1, .LCPI92_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI92_1
-; SZ13-NEXT:    lde %f8, 0(%r1)
-; SZ13-NEXT:    ldr %f2, %f8
-; SZ13-NEXT:    brasl %r14, fminf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI92_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    brasl %r14, fminf@PLT
-; SZ13-NEXT:    larl %r1, .LCPI92_3
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI92_4
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fminf@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vrepf %v1, %v1, 0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
-                              <3 x float> <float 43.0, float 44.0, float 45.0>,
-                              <3 x float> <float 41.0, float 42.0, float 43.0>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %min
-}
-
-define void @constrained_vector_minnum_v3f64(<3 x double>* %a) {
-; S390X-LABEL: constrained_vector_minnum_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
-; S390X-NEXT:    .cfi_offset %r13, -56
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -192
-; S390X-NEXT:    .cfi_def_cfa_offset 352
-; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    .cfi_offset %f11, -192
-; S390X-NEXT:    lgr %r13, %r2
-; S390X-NEXT:    ld %f8, 16(%r2)
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    larl %r1, .LCPI93_0
-; S390X-NEXT:    ldeb %f9, 0(%r1)
-; S390X-NEXT:    ld %f10, 8(%r2)
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    ldr %f11, %f0
-; S390X-NEXT:    ldr %f0, %f10
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    std %f0, 16(%r13)
-; S390X-NEXT:    std %f10, 8(%r13)
-; S390X-NEXT:    std %f11, 0(%r13)
-; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_minnum_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
-; SZ13-NEXT:    .cfi_offset %r13, -56
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -208
-; SZ13-NEXT:    .cfi_def_cfa_offset 368
-; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
-; SZ13-NEXT:    .cfi_offset %f8, -168
-; SZ13-NEXT:    .cfi_offset %f9, -176
-; SZ13-NEXT:    larl %r1, .LCPI93_0
-; SZ13-NEXT:    ldeb %f9, 0(%r1)
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    ld %f8, 16(%r2)
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    lgr %r13, %r2
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    vrepg %v0, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v1, %v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldr %f0, %f8
-; SZ13-NEXT:    ldr %f2, %f9
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13)
-; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
-; SZ13-NEXT:    br %r14
-entry:
- %b = load <3 x double>, <3 x double>* %a
- %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
-                          <3 x double> %b,
-                          <3 x double> <double 3.0, double 3.0, double 3.0>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  store <3 x double> %min, <3 x double>* %a
-  ret void
-}
-
-define <4 x double> @constrained_vector_minnum_v4f64() {
-; S390X-LABEL: constrained_vector_minnum_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI94_0
-; S390X-NEXT:    ldeb %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI94_1
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    larl %r1, .LCPI94_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI94_3
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    larl %r1, .LCPI94_4
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI94_5
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    larl %r1, .LCPI94_6
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI94_7
-; S390X-NEXT:    ldeb %f2, 0(%r1)
-; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, fmin@PLT
-; S390X-NEXT:    ldr %f6, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ldr %f4, %f10
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_minnum_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
-; SZ13-NEXT:    .cfi_offset %r14, -48
-; SZ13-NEXT:    .cfi_offset %r15, -40
-; SZ13-NEXT:    aghi %r15, -192
-; SZ13-NEXT:    .cfi_def_cfa_offset 352
-; SZ13-NEXT:    larl %r1, .LCPI94_0
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI94_1
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI94_2
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI94_3
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v0, %v0, %v1
-; SZ13-NEXT:    larl %r1, .LCPI94_4
-; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI94_5
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    larl %r1, .LCPI94_6
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
-; SZ13-NEXT:    ldeb %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI94_7
-; SZ13-NEXT:    ldeb %f2, 0(%r1)
-; SZ13-NEXT:    brasl %r14, fmin@PLT
-; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
-; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
-; SZ13-NEXT:    vmrhg %v26, %v0, %v1
-; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
-; SZ13-NEXT:    br %r14
-entry:
-  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
-                                <4 x double> <double 44.0, double 45.0,
-                                              double 46.0, double 47.0>,
-                                <4 x double> <double 40.0, double 41.0,
-                                              double 42.0, double 43.0>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %min
-}
-
-define <1 x float> @constrained_vector_fptrunc_v1f64() {
-; S390X-LABEL: constrained_vector_fptrunc_v1f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI95_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fptrunc_v1f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI95_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    wledb %v24, %f0, 0, 0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
-                                <1 x double><double 42.1>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <1 x float> %result
-}
-
-define <2 x float> @constrained_vector_fptrunc_v2f64() {
-; S390X-LABEL: constrained_vector_fptrunc_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI96_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI96_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    ledbr %f2, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI96_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI96_1
-; SZ13-NEXT:    ld %f1, 0(%r1)
-; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
-; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    vmrhg %v24, %v0, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
-                                <2 x double><double 42.1, double 42.2>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x float> %result
-}
-
-define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %dest) {
-; S390X-LABEL: constrained_vector_fptrunc_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    ld %f0, 0(%r2)
-; S390X-NEXT:    ld %f1, 16(%r2)
-; S390X-NEXT:    ld %f2, 8(%r2)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    lgdr %r0, %f0
-; S390X-NEXT:    nilf %r0, 0
-; S390X-NEXT:    ledbr %f0, %f2
-; S390X-NEXT:    lgdr %r1, %f0
-; S390X-NEXT:    srlg %r1, %r1, 32
-; S390X-NEXT:    lr %r0, %r1
-; S390X-NEXT:    ledbr %f0, %f1
-; S390X-NEXT:    ste %f0, 8(%r3)
-; S390X-NEXT:    stg %r0, 0(%r3)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vl %v1, 0(%r2)
-; SZ13-NEXT:    ledbra %f2, 0, %f1, 0
-; SZ13-NEXT:    vrepg %v1, %v1, 1
-; SZ13-NEXT:    ld %f0, 16(%r2)
-; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
-; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
-; SZ13-NEXT:    vmrhf %v1, %v2, %v1
-; SZ13-NEXT:    vmrhg %v1, %v1, %v1
-; SZ13-NEXT:    ste %f0, 8(%r3)
-; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x double>, <3 x double>* %src
-  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
-                                <3 x double> %b,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  store <3 x float> %result, <3 x float>* %dest
-  ret void
-}
-
-define <4 x float> @constrained_vector_fptrunc_v4f64() {
-; S390X-LABEL: constrained_vector_fptrunc_v4f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI98_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI98_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI98_2
-; S390X-NEXT:    ld %f3, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI98_3
-; S390X-NEXT:    ld %f5, 0(%r1)
-; S390X-NEXT:    ledbr %f0, %f0
-; S390X-NEXT:    ledbr %f2, %f1
-; S390X-NEXT:    ledbr %f4, %f3
-; S390X-NEXT:    ledbr %f6, %f5
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI98_0
-; SZ13-NEXT:    ld %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI98_1
-; SZ13-NEXT:    ld %f1, 0(%r1)
-; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
-; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
-; SZ13-NEXT:    larl %r1, .LCPI98_2
-; SZ13-NEXT:    vmrhf %v0, %v1, %v0
-; SZ13-NEXT:    ld %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI98_3
-; SZ13-NEXT:    ld %f2, 0(%r1)
-; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
-; SZ13-NEXT:    ledbra %f2, 0, %f2, 0
-; SZ13-NEXT:    vmrhf %v1, %v2, %v1
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
-                                <4 x double><double 42.1, double 42.2,
-                                             double 42.3, double 42.4>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <4 x float> %result
-}
-
-define <1 x double> @constrained_vector_fpext_v1f32() {
-; S390X-LABEL: constrained_vector_fpext_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI99_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fpext_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI99_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    wldeb %v24, %f0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
-                                <1 x float><float 42.0>,
-                                metadata !"fpexcept.strict")
-  ret <1 x double> %result
-}
-
-define <2 x double> @constrained_vector_fpext_v2f32() {
-; S390X-LABEL: constrained_vector_fpext_v2f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI100_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI100_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    ldebr %f2, %f1
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fpext_v2f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI100_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI100_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    ldebr %f0, %f0
-; SZ13-NEXT:    ldebr %f1, %f1
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
-                                <2 x float><float 42.0, float 43.0>,
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %result
-}
-
-define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %dest) {
-; S390X-LABEL: constrained_vector_fpext_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    lg %r0, 0(%r2)
-; S390X-NEXT:    le %f0, 8(%r2)
-; S390X-NEXT:    sllg %r1, %r0, 32
-; S390X-NEXT:    ldgr %f1, %r1
-; S390X-NEXT:    nilf %r0, 0
-; S390X-NEXT:    ldgr %f2, %r0
-; S390X-NEXT:    ldebr %f2, %f2
-; S390X-NEXT:    ldebr %f1, %f1
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    std %f0, 16(%r3)
-; S390X-NEXT:    std %f1, 8(%r3)
-; S390X-NEXT:    std %f2, 0(%r3)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fpext_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vl %v0, 0(%r2)
-; SZ13-NEXT:    vrepf %v2, %v0, 1
-; SZ13-NEXT:    ldebr %f1, %f0
-; SZ13-NEXT:    ldebr %f2, %f2
-; SZ13-NEXT:    vrepf %v0, %v0, 2
-; SZ13-NEXT:    ldebr %f0, %f0
-; SZ13-NEXT:    vmrhg %v1, %v1, %v2
-; SZ13-NEXT:    std %f0, 16(%r3)
-; SZ13-NEXT:    vst %v1, 0(%r3)
-; SZ13-NEXT:    br %r14
-entry:
-  %b = load <3 x float>, <3 x float>* %src
-  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
-                              <3 x float> %b,
-                              metadata !"fpexcept.strict")
-  store <3 x double> %result, <3 x double>* %dest
-  ret void
-}
-
-define <4 x double> @constrained_vector_fpext_v4f32() {
-; S390X-LABEL: constrained_vector_fpext_v4f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI102_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI102_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI102_2
-; S390X-NEXT:    le %f3, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI102_3
-; S390X-NEXT:    le %f5, 0(%r1)
-; S390X-NEXT:    ldebr %f0, %f0
-; S390X-NEXT:    ldebr %f2, %f1
-; S390X-NEXT:    ldebr %f4, %f3
-; S390X-NEXT:    ldebr %f6, %f5
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_fpext_v4f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI102_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI102_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    ldebr %f0, %f0
-; SZ13-NEXT:    ldebr %f1, %f1
-; SZ13-NEXT:    larl %r1, .LCPI102_2
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI102_3
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    ldebr %f0, %f0
-; SZ13-NEXT:    ldebr %f1, %f1
-; SZ13-NEXT:    vmrhg %v26, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
-                                <4 x float><float 42.0, float 43.0,
-                                            float 44.0, float 45.0>,
-                                metadata !"fpexcept.strict")
-  ret <4 x double> %result
-}
-
-define <1 x float> @constrained_vector_ceil_v1f32() {
-; S390X-LABEL: constrained_vector_ceil_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI103_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, ceilf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_ceil_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
-; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %ceil
-}
-
-define <2 x double> @constrained_vector_ceil_v2f64() {
-; S390X-LABEL: constrained_vector_ceil_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI104_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, ceil@PLT
-; S390X-NEXT:    larl %r1, .LCPI104_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, ceil@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_ceil_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI104_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 6
-; SZ13-NEXT:    br %r14
-entry:
-  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %ceil
-}
-
-define <3 x float> @constrained_vector_ceil_v3f32() {
-; S390X-LABEL: constrained_vector_ceil_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI105_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, ceilf@PLT
-; S390X-NEXT:    larl %r1, .LCPI105_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, ceilf@PLT
-; S390X-NEXT:    larl %r1, .LCPI105_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, ceilf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_ceil_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI105_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI105_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
-; SZ13-NEXT:    fiebra %f1, 6, %f1, 4
-; SZ13-NEXT:    fiebra %f2, 6, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %ceil
-}
-
-define <3 x double> @constrained_vector_ceil_v3f64() {
-; S390X-LABEL: constrained_vector_ceil_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI106_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, ceil@PLT
-; S390X-NEXT:    larl %r1, .LCPI106_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, ceil@PLT
-; S390X-NEXT:    larl %r1, .LCPI106_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, ceil@PLT
-; S390X-NEXT:    ldr %f4, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_ceil_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmg %v0, 2, 12
-; SZ13-NEXT:    larl %r1, .LCPI106_0
-; SZ13-NEXT:    fidbra %f4, 6, %f0, 4
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v0, %v0, 4, 6
-; SZ13-NEXT:    vrepg %v2, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    br %r14
-entry:
-  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %ceil
-}
-
-define <1 x float> @constrained_vector_floor_v1f32() {
-; S390X-LABEL: constrained_vector_floor_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI107_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, floorf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_floor_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
-; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %floor
-}
-
-
-define <2 x double> @constrained_vector_floor_v2f64() {
-; S390X-LABEL: constrained_vector_floor_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI108_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, floor@PLT
-; S390X-NEXT:    larl %r1, .LCPI108_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, floor@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_floor_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI108_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 7
-; SZ13-NEXT:    br %r14
-entry:
-  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %floor
-}
-
-define <3 x float> @constrained_vector_floor_v3f32() {
-; S390X-LABEL: constrained_vector_floor_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI109_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, floorf@PLT
-; S390X-NEXT:    larl %r1, .LCPI109_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, floorf@PLT
-; S390X-NEXT:    larl %r1, .LCPI109_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, floorf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_floor_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI109_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI109_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
-; SZ13-NEXT:    fiebra %f1, 7, %f1, 4
-; SZ13-NEXT:    fiebra %f2, 7, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %floor
-}
-
-define <3 x double> @constrained_vector_floor_v3f64() {
-; S390X-LABEL: constrained_vector_floor_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI110_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, floor@PLT
-; S390X-NEXT:    larl %r1, .LCPI110_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, floor@PLT
-; S390X-NEXT:    larl %r1, .LCPI110_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, floor@PLT
-; S390X-NEXT:    ldr %f4, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_floor_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmg %v0, 2, 12
-; SZ13-NEXT:    larl %r1, .LCPI110_0
-; SZ13-NEXT:    fidbra %f4, 7, %f0, 4
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v0, %v0, 4, 7
-; SZ13-NEXT:    vrepg %v2, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    br %r14
-entry:
-  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %floor
-}
-
-define <1 x float> @constrained_vector_round_v1f32() {
-; S390X-LABEL: constrained_vector_round_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI111_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, roundf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_round_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
-; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %round
-}
-
-define <2 x double> @constrained_vector_round_v2f64() {
-; S390X-LABEL: constrained_vector_round_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI112_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, round@PLT
-; S390X-NEXT:    larl %r1, .LCPI112_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, round@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_round_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI112_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 1
-; SZ13-NEXT:    br %r14
-entry:
-  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %round
-}
-
-define <3 x float> @constrained_vector_round_v3f32() {
-; S390X-LABEL: constrained_vector_round_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI113_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, roundf@PLT
-; S390X-NEXT:    larl %r1, .LCPI113_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, roundf@PLT
-; S390X-NEXT:    larl %r1, .LCPI113_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, roundf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_round_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI113_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI113_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
-; SZ13-NEXT:    fiebra %f1, 1, %f1, 4
-; SZ13-NEXT:    fiebra %f2, 1, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %round
-}
-
-
-define <3 x double> @constrained_vector_round_v3f64() {
-; S390X-LABEL: constrained_vector_round_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI114_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, round@PLT
-; S390X-NEXT:    larl %r1, .LCPI114_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, round@PLT
-; S390X-NEXT:    larl %r1, .LCPI114_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, round@PLT
-; S390X-NEXT:    ldr %f4, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_round_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmg %v0, 2, 12
-; SZ13-NEXT:    larl %r1, .LCPI114_0
-; SZ13-NEXT:    fidbra %f4, 1, %f0, 4
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v0, %v0, 4, 1
-; SZ13-NEXT:    vrepg %v2, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    br %r14
-entry:
-  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %round
-}
-
-define <1 x float> @constrained_vector_trunc_v1f32() {
-; S390X-LABEL: constrained_vector_trunc_v1f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -160
-; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI115_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, truncf@PLT
-; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_trunc_v1f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
-; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
-; SZ13-NEXT:    vlr %v24, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
-                               <1 x float> <float 1.5>,
-                               metadata !"round.dynamic",
-                               metadata !"fpexcept.strict")
-  ret <1 x float> %trunc
-}
-
-define <2 x double> @constrained_vector_trunc_v2f64() {
-; S390X-LABEL: constrained_vector_trunc_v2f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI116_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, trunc@PLT
-; S390X-NEXT:    larl %r1, .LCPI116_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, trunc@PLT
-; S390X-NEXT:    ldr %f2, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_trunc_v2f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI116_0
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 5
-; SZ13-NEXT:    br %r14
-entry:
-  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
-                                metadata !"round.dynamic",
-                                metadata !"fpexcept.strict")
-  ret <2 x double> %trunc
-}
-
-define <3 x float> @constrained_vector_trunc_v3f32() {
-; S390X-LABEL: constrained_vector_trunc_v3f32:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI117_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, truncf@PLT
-; S390X-NEXT:    larl %r1, .LCPI117_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, truncf@PLT
-; S390X-NEXT:    larl %r1, .LCPI117_2
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
-; S390X-NEXT:    brasl %r14, truncf@PLT
-; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    ler %f0, %f8
-; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_trunc_v3f32:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI117_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI117_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
-; SZ13-NEXT:    fiebra %f1, 5, %f1, 4
-; SZ13-NEXT:    fiebra %f2, 5, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
-; SZ13-NEXT:    br %r14
-entry:
-  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
-                              metadata !"round.dynamic",
-                              metadata !"fpexcept.strict")
-  ret <3 x float> %trunc
-}
-
-define <3 x double> @constrained_vector_trunc_v3f64() {
-; S390X-LABEL: constrained_vector_trunc_v3f64:
-; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
-; S390X-NEXT:    .cfi_offset %r14, -48
-; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI118_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, trunc@PLT
-; S390X-NEXT:    larl %r1, .LCPI118_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, trunc@PLT
-; S390X-NEXT:    larl %r1, .LCPI118_2
-; S390X-NEXT:    ldeb %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
-; S390X-NEXT:    brasl %r14, trunc@PLT
-; S390X-NEXT:    ldr %f4, %f0
-; S390X-NEXT:    ldr %f0, %f8
-; S390X-NEXT:    ldr %f2, %f9
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
-; S390X-NEXT:    br %r14
-;
-; SZ13-LABEL: constrained_vector_trunc_v3f64:
-; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmg %v0, 2, 12
-; SZ13-NEXT:    larl %r1, .LCPI118_0
-; SZ13-NEXT:    fidbra %f4, 5, %f0, 4
-; SZ13-NEXT:    vl %v0, 0(%r1)
-; SZ13-NEXT:    vfidb %v0, %v0, 4, 5
-; SZ13-NEXT:    vrepg %v2, %v0, 1
-; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
-; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
-; SZ13-NEXT:    br %r14
-entry:
-  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
-                          <3 x double> <double 1.1, double 1.9, double 1.5>,
-                          metadata !"round.dynamic",
-                          metadata !"fpexcept.strict")
-  ret <3 x double> %trunc
-}
-
-; Single width declarations
-declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
-declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
-
-; Scalar width declarations
-declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
-declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
-declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
-declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
-
-; Illegal width declarations
-declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
-declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
-
-; Double width declarations
-declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
-declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
-declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From 51ce0b196a8babe7ac8b81da69139a2eae3cca0b Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Thu, 30 May 2019 17:21:45 +0000
Subject: [PATCH 0640/1176] Correct error in revert of r362112.

Differential Revision:	http://reviews.llvm.org/D62546

llvm-svn: 362118
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 95aace3dc9a63..aefc2aabf64b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2968,7 +2968,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return UnrollVectorOp(N, WidenVT.getVectorNumElements());
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;

From d02f4a1043c0c6b472e6cfeb8a34f282d7cccb31 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Thu, 30 May 2019 17:31:54 +0000
Subject: [PATCH 0641/1176] Add Attribute NoThrow as an Exception Specifier
 Type

In response to https://bugs.llvm.org/show_bug.cgi?id=33235, it became
clear that the current mechanism of hacking through checks for the
exception specification of a function gets confused really quickly when
there are alternate exception specifiers.

This patch introcues EST_NoThrow, which is the equivilent of
EST_noexcept when caused by EST_noThrow. The existing implementation is
left in place to cover functions with no FunctionProtoType.

Differential Revision: https://reviews.llvm.org/D62435

llvm-svn: 362119
---
 clang/include/clang-c/Index.h                 |  9 ++-
 clang/include/clang/AST/Decl.h                |  8 +++
 clang/include/clang/AST/Type.h                |  1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +
 .../clang/Basic/ExceptionSpecificationType.h  |  4 +-
 clang/lib/AST/ASTContext.cpp                  |  5 +-
 clang/lib/AST/JSONNodeDumper.cpp              |  4 +-
 clang/lib/AST/Type.cpp                        |  1 +
 clang/lib/Sema/SemaDeclAttr.cpp               |  3 +-
 clang/lib/Sema/SemaDeclCXX.cpp                |  2 +
 clang/lib/Sema/SemaExprCXX.cpp                |  3 +
 clang/lib/Sema/SemaType.cpp                   | 58 ++++++++++++++++++-
 .../SemaCXX/nothrow-vs-exception-specs.cpp    | 55 ++++++++++++++++++
 clang/tools/libclang/CXType.cpp               |  2 +
 14 files changed, 151 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/SemaCXX/nothrow-vs-exception-specs.cpp

diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 7982d65bf234a..a5ed91dd1cb13 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -32,7 +32,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 57
+#define CINDEX_VERSION_MINOR 58
 
 #define CINDEX_VERSION_ENCODE(major, minor) ( \
       ((major) * 10000)                       \
@@ -221,7 +221,12 @@ enum CXCursor_ExceptionSpecificationKind {
   /**
    * The exception specification has not been parsed yet.
    */
-  CXCursor_ExceptionSpecificationKind_Unparsed
+  CXCursor_ExceptionSpecificationKind_Unparsed,
+
+  /**
+   * The cursor has a __declspec(nothrow) exception specification.
+   */
+  CXCursor_ExceptionSpecificationKind_NoThrow
 };
 
 /**
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 6c5f5944f3872..f295eca44e83a 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2330,6 +2330,14 @@ class FunctionDecl : public DeclaratorDecl,
     return T->castAs<FunctionType>()->getReturnType();
   }
 
+  /// Gets the ExceptionSpecificationType as declared.
+  ExceptionSpecificationType getExceptionSpecType() const {
+    auto *TSI = getTypeSourceInfo();
+    QualType T = TSI ? TSI->getType() : getType();
+    const auto *FPT = T->getAs<FunctionProtoType>();
+    return FPT ? FPT->getExceptionSpecType() : EST_None;
+  }
+
   /// Attempt to compute an informative source range covering the
   /// function exception specification, if any.
   SourceRange getExceptionSpecSourceRange() const;
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index bc5484d3c11bb..66c3de72f5f48 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -3855,6 +3855,7 @@ class FunctionProtoType final
     case EST_MSAny:
     case EST_BasicNoexcept:
     case EST_Unparsed:
+    case EST_NoThrow:
       return {0, 0, 0};
 
     case EST_Dynamic:
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 1beb7fda9bfd7..058d7d4e7a40a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2792,6 +2792,9 @@ def warn_dllimport_dropped_from_inline_function : Warning<
   InGroup<IgnoredAttributes>;
 def warn_attribute_ignored : Warning<"%0 attribute ignored">,
   InGroup<IgnoredAttributes>;
+def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
+  " exception specification; attribute ignored">,
+  InGroup<IgnoredAttributes>;
 def warn_attribute_ignored_on_inline :
   Warning<"%0 attribute ignored on inline function">,
   InGroup<IgnoredAttributes>;
diff --git a/clang/include/clang/Basic/ExceptionSpecificationType.h b/clang/include/clang/Basic/ExceptionSpecificationType.h
index 2f65efe71008c..5616860555c8a 100644
--- a/clang/include/clang/Basic/ExceptionSpecificationType.h
+++ b/clang/include/clang/Basic/ExceptionSpecificationType.h
@@ -22,6 +22,7 @@ enum ExceptionSpecificationType {
   EST_DynamicNone,      ///< throw()
   EST_Dynamic,          ///< throw(T1, T2)
   EST_MSAny,            ///< Microsoft throw(...) extension
+  EST_NoThrow,          ///< Microsoft __declspec(nothrow) extension
   EST_BasicNoexcept,    ///< noexcept
   EST_DependentNoexcept,///< noexcept(expression), value-dependent
   EST_NoexceptFalse,    ///< noexcept(expression), evals to 'false'
@@ -41,7 +42,8 @@ inline bool isComputedNoexcept(ExceptionSpecificationType ESpecType) {
 }
 
 inline bool isNoexceptExceptionSpec(ExceptionSpecificationType ESpecType) {
-  return ESpecType == EST_BasicNoexcept || isComputedNoexcept(ESpecType);
+  return ESpecType == EST_BasicNoexcept || ESpecType == EST_NoThrow ||
+         isComputedNoexcept(ESpecType);
 }
 
 inline bool isUnresolvedExceptionSpec(ExceptionSpecificationType ESpecType) {
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 87ecb5a57b5bc..4f1df7cdf190a 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3742,7 +3742,10 @@ QualType ASTContext::getFunctionTypeInternal(
         break;
       }
 
-      case EST_DynamicNone: case EST_BasicNoexcept: case EST_NoexceptTrue:
+      case EST_DynamicNone:
+      case EST_BasicNoexcept:
+      case EST_NoexceptTrue:
+      case EST_NoThrow:
         CanonicalEPI.ExceptionSpec.Type = EST_BasicNoexcept;
         break;
 
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
index 43cad2bf26ee0..991cf09614235 100644
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -464,7 +464,9 @@ void JSONNodeDumper::VisitFunctionProtoType(const FunctionProtoType *T) {
     //JOS.attributeWithCall("exceptionSpecExpr",
     //                    [this, E]() { Visit(E.ExceptionSpec.NoexceptExpr); });
     break;
-
+  case EST_NoThrow:
+    JOS.attribute("exceptionSpec", "nothrow");
+    break;
   // FIXME: I cannot find a way to trigger these cases while dumping the AST. I
   // suspect you can only run into them when executing an AST dump from within
   // the debugger, which is not a use case we worry about for the JSON dumping
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index e45b1611d1fa4..733ca232dd037 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3077,6 +3077,7 @@ CanThrowResult FunctionProtoType::canThrow() const {
   case EST_DynamicNone:
   case EST_BasicNoexcept:
   case EST_NoexceptTrue:
+  case EST_NoThrow:
     return CT_Cannot;
 
   case EST_None:
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 84f00dbaa2eae..932cb18a93cef 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6853,7 +6853,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleNoCfCheckAttr(S, D, AL);
     break;
   case ParsedAttr::AT_NoThrow:
-    handleSimpleAttribute<NoThrowAttr>(S, D, AL);
+    if (!AL.isUsedAsTypeAttr())
+      handleSimpleAttribute<NoThrowAttr>(S, D, AL);
     break;
   case ParsedAttr::AT_CUDAShared:
     handleSharedAttr(S, D, AL);
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index b3920ff01bdff..35863a326628b 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -192,6 +192,7 @@ Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
   // If this function has a basic noexcept, it doesn't affect the outcome.
   case EST_BasicNoexcept:
   case EST_NoexceptTrue:
+  case EST_NoThrow:
     return;
   // If we're still at noexcept(true) and there's a throw() callee,
   // change to that specification.
@@ -15457,6 +15458,7 @@ bool Sema::checkThisInStaticMemberFunctionExceptionSpec(CXXMethodDecl *Method) {
   case EST_Uninstantiated:
   case EST_Unevaluated:
   case EST_BasicNoexcept:
+  case EST_NoThrow:
   case EST_DynamicNone:
   case EST_MSAny:
   case EST_None:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 00b158debc5a2..6e67968929ad0 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -6045,6 +6045,8 @@ mergeExceptionSpecs(Sema &S, FunctionProtoType::ExceptionSpecInfo ESI1,
   if (EST2 == EST_NoexceptFalse) return ESI2;
 
   // If either of them is non-throwing, the result is the other.
+  if (EST1 == EST_NoThrow) return ESI2;
+  if (EST2 == EST_NoThrow) return ESI1;
   if (EST1 == EST_DynamicNone) return ESI2;
   if (EST2 == EST_DynamicNone) return ESI1;
   if (EST1 == EST_BasicNoexcept) return ESI2;
@@ -6073,6 +6075,7 @@ mergeExceptionSpecs(Sema &S, FunctionProtoType::ExceptionSpecInfo ESI1,
   case EST_DependentNoexcept:
   case EST_NoexceptFalse:
   case EST_NoexceptTrue:
+  case EST_NoThrow:
     llvm_unreachable("handled above");
 
   case EST_Dynamic: {
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 91743bb59fef9..e0d43a780e794 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -130,6 +130,7 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
   case ParsedAttr::AT_Regparm:                                                 \
   case ParsedAttr::AT_AnyX86NoCallerSavedRegisters:                            \
   case ParsedAttr::AT_AnyX86NoCfCheck:                                         \
+  case ParsedAttr::AT_NoThrow:                                                 \
     CALLING_CONV_ATTRS_CASELIST
 
 // Microsoft-specific type qualifiers.
@@ -4516,7 +4517,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       // If the function declarator has a prototype (i.e. it is not () and
       // does not have a K&R-style identifier list), then the arguments are part
       // of the type, otherwise the argument list is ().
-      const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
+      DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
       IsQualifiedFunction =
           FTI.hasMethodTypeQualifiers() || FTI.hasRefQualifier();
 
@@ -6945,6 +6946,61 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
     return true;
   }
 
+  if (attr.getKind() == ParsedAttr::AT_NoThrow) {
+    if (S.CheckAttrNoArgs(attr))
+      return true;
+
+    // Delay if this is not a function type.
+    if (!unwrapped.isFunctionType())
+      return false;
+
+    // Otherwise we can process right away.
+    auto *Proto = unwrapped.get()->getAs<FunctionProtoType>();
+
+    // In the case where this is a FunctionNoProtoType instead of a
+    // FunctionProtoType, let the existing NoThrowAttr implementation do its
+    // thing.
+    if (!Proto)
+      return false;
+
+    attr.setUsedAsTypeAttr();
+
+    // MSVC ignores nothrow if it is in conflict with an explicit exception
+    // specification.
+    if (Proto->hasExceptionSpec()) {
+      switch (Proto->getExceptionSpecType()) {
+      case EST_None:
+        llvm_unreachable("This doesn't have an exception spec!");
+        LLVM_FALLTHROUGH;
+      case EST_DynamicNone:
+      case EST_BasicNoexcept:
+      case EST_NoexceptTrue:
+      case EST_NoThrow:
+        // Exception spec doesn't conflict with nothrow, so don't warn.
+        break;
+
+      case EST_Dynamic:
+      case EST_MSAny:
+      case EST_NoexceptFalse:
+      case EST_DependentNoexcept:
+      case EST_Unevaluated:
+      case EST_Uninstantiated:
+      case EST_Unparsed:
+        S.Diag(attr.getLoc(), diag::warn_nothrow_attribute_ignored);
+        break;
+      }
+      return true;
+    }
+
+    type = unwrapped.wrap(
+        S, S.Context
+               .getFunctionTypeWithExceptionSpec(
+                   QualType{Proto, 0},
+                   FunctionProtoType::ExceptionSpecInfo{EST_NoThrow})
+               ->getAs<FunctionType>());
+    return true;
+  }
+
   // Delay if the type didn't work out to a function.
   if (!unwrapped.isFunctionType()) return false;
 
diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
new file mode 100644
index 0000000000000..f9bc90e30ed49
--- /dev/null
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 %s -fcxx-exceptions -fdeclspec -fsyntax-only -Wexceptions -verify -std=c++14
+// RUN: %clang_cc1 %s -fcxx-exceptions -fdeclspec -fsyntax-only -Wexceptions -verify -std=c++17 -DCPP17
+
+__attribute__((nothrow)) void f1();
+static_assert(noexcept(f1()), "");
+void f1() noexcept;
+// expected-error@+2 {{exception specification in declaration does not match previous declaration}}
+// expected-note@-2 {{previous declaration is here}}
+void f1() noexcept(false);
+
+__attribute__((nothrow)) void f2();
+static_assert(noexcept(f2()), "");
+// expected-error@+2 {{exception specification in declaration does not match previous declaration}}
+// expected-note@-3 {{previous declaration is here}}
+void f2() noexcept(false);
+
+void f3() __attribute__((nothrow));
+static_assert(noexcept(f3()), "");
+void f3() noexcept;
+// expected-error@+2 {{exception specification in declaration does not match previous declaration}}
+// expected-note@-2 {{previous declaration is here}}
+void f3() noexcept(false);
+
+// Still noexcept due to throw()
+__attribute__((nothrow)) void f4() throw();
+static_assert(noexcept(f4()), "");
+
+// Still noexcept due to noexcept
+__attribute__((nothrow)) void f5() noexcept;
+static_assert(noexcept(f5()), "");
+
+// Still noexcept due to noexcept(true)
+__attribute__((nothrow)) void f6() noexcept(true);
+static_assert(noexcept(f6()), "");
+
+#ifndef CPP17
+// Doesn't override C++ implementation.
+// expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
+__attribute__((nothrow)) void f7() throw(int);
+static_assert(!noexcept(f7()), "");
+#endif
+
+// Doesn't override C++ implementation.
+// expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
+__attribute__((nothrow)) void f8() noexcept(false);
+static_assert(!noexcept(f8()), "");
+
+__declspec(nothrow) void foo1() noexcept;
+__declspec(nothrow) void foo2() noexcept(true);
+// expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
+__declspec(nothrow) void foo3() noexcept(false);
+__declspec(nothrow) void foo4() noexcept(noexcept(foo1()));
+__declspec(nothrow) void foo5() noexcept(noexcept(foo2()));
+// expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
+__declspec(nothrow) void foo6() noexcept(noexcept(foo3()));
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index c7ac57238223c..acecf87d0cda8 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -742,6 +742,8 @@ getExternalExceptionSpecificationKind(ExceptionSpecificationType EST) {
     return CXCursor_ExceptionSpecificationKind_MSAny;
   case EST_BasicNoexcept:
     return CXCursor_ExceptionSpecificationKind_BasicNoexcept;
+  case EST_NoThrow:
+    return CXCursor_ExceptionSpecificationKind_NoThrow;
   case EST_NoexceptFalse:
   case EST_NoexceptTrue:
   case EST_DependentNoexcept:

From 2980f3c18f980b20bb8b5106978555ef147035f6 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Thu, 30 May 2019 17:54:26 +0000
Subject: [PATCH 0642/1176] [NFC] Fix SmallVector::append comments

Fix the copy-pasted comment.
Remove low-value comments.

llvm-svn: 362120
---
 llvm/include/llvm/ADT/SmallVector.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 09c3e8a2d8e07..17586904d2128 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -386,22 +386,18 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> {
                 std::input_iterator_tag>::value>::type>
   void append(in_iter in_start, in_iter in_end) {
     size_type NumInputs = std::distance(in_start, in_end);
-    // Grow allocated space if needed.
     if (NumInputs > this->capacity() - this->size())
       this->grow(this->size()+NumInputs);
 
-    // Copy the new elements over.
     this->uninitialized_copy(in_start, in_end, this->end());
     this->set_size(this->size() + NumInputs);
   }
 
-  /// Add the specified range to the end of the SmallVector.
+  /// Append \p NumInputs copies of \p Elt to the end.
   void append(size_type NumInputs, const T &Elt) {
-    // Grow allocated space if needed.
     if (NumInputs > this->capacity() - this->size())
       this->grow(this->size()+NumInputs);
 
-    // Copy the new elements over.
     std::uninitialized_fill_n(this->end(), NumInputs, Elt);
     this->set_size(this->size() + NumInputs);
   }

From 50daaa5f6b2636578ac70ed08e0db246be3b95b8 Mon Sep 17 00:00:00 2001
From: Michael Trent <mtrent@apple.com>
Date: Thu, 30 May 2019 17:56:05 +0000
Subject: [PATCH 0643/1176] Support Universal dSYM files in llvm-objdump

Summary:
Commonly programmers use llvm-objdump to disassemble Mach-O target
binaries with Mach-O dSYMS. While llvm-objdump allows programmers to
disassemble Universal binaries, it previously did not recognize
Universal dSYM files. This change updates llvm-objdump to support
passing in Universal files via the -dsym option. Now, when
disassembling a Mach-O file either as a stand alone file or as an entry
in a Universal binariy, llvm-objdump will search through a Universal
dSYM for a Mach-O matching the architecture flag of the file being
disassembled.

Reviewers: pete, lhames

Reviewed By: pete

Subscribers: rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62642

llvm-svn: 362121
---
 .../X86/macho-disassemble-g-dsym.test         | 11 ++++
 llvm/tools/llvm-objdump/MachODump.cpp         | 63 +++++++++++++++++--
 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index f06567764b84b..01ccd3044c086 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -5,3 +5,14 @@
 // RUN: FileCheck --input-file %t0 %s
 
 CHECK: Disassembly of section __TEXT,__text:
+
+// RUN: dsymutil %p/../../dsymutil/Inputs/fat-test.dylib -o fat-test.dylib.dSYM
+// RUN: llvm-objdump -m -d -g -dsym fat-test.dylib.dSYM/Contents/Resources/DWARF/fat-test.dylib %p/../../dsymutil/Inputs/fat-test.dylib | FileCheck -check-prefix MACHO_DSYM %s
+// RUN: dsymutil %p/../../dsymutil/Inputs/basic.macho.x86_64 -o basic.macho.x86_64.dSYM
+// RUN: llvm-objdump -m -d -g -dsym basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 %p/../../dsymutil/Inputs/basic.macho.x86_64 | FileCheck -check-prefix MACHO_DSYM %s
+
+MACHO_DSYM: (__TEXT,__text) section
+
+// RUN: llvm-objdump -m -d -g -dsym %p/../Inputs/libbogus11.a %p/../../dsymutil/Inputs/basic.macho.x86_64 2>&1 | FileCheck -check-prefix BAD_INPUT %s
+
+BAD_INPUT: is not a Mach-O or Universal file type.
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index ea92ef9268a1b..b684daacb611e 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7223,11 +7223,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   raw_ostream &DebugOut = nulls();
 #endif
 
+  // Try to find debug info and set up the DIContext for it.
   std::unique_ptr<DIContext> diContext;
-  ObjectFile *DbgObj = MachOOF;
+  std::unique_ptr<Binary> DSYMBinary;
   std::unique_ptr<MemoryBuffer> DSYMBuf;
-  // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
+    ObjectFile *DbgObj = MachOOF;
+
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
     if (!DSYMFile.empty()) {
@@ -7238,12 +7240,61 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
         return;
       }
 
-      std::unique_ptr<MachOObjectFile> DbgObjCheck = unwrapOrError(
-          ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef()),
-          DSYMFile.getValue());
-      DbgObj = DbgObjCheck.release();
       // We need to keep the file alive, because we're replacing DbgObj with it.
       DSYMBuf = std::move(BufOrErr.get());
+
+      Expected<std::unique_ptr<Binary>> BinaryOrErr =
+      createBinary(DSYMBuf.get()->getMemBufferRef());
+      if (!BinaryOrErr) {
+        report_error(BinaryOrErr.takeError(), DSYMFile);
+        return;
+      }
+
+      // We need to keep the Binary elive with the buffer
+      DSYMBinary = std::move(BinaryOrErr.get());
+    
+      if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
+        // this is a Mach-O object file, use it
+        if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
+          DbgObj = MachDSYM;
+        }
+        else {
+          WithColor::error(errs(), "llvm-objdump")
+            << DSYMFile << " is not a Mach-O file type.\n";
+          return;
+        }
+      }
+      else if (auto UB = dyn_cast<MachOUniversalBinary>(DSYMBinary.get())){
+        // this is a Universal Binary, find a Mach-O for this architecture
+        uint32_t CPUType, CPUSubType;
+        const char *ArchFlag;
+        if (MachOOF->is64Bit()) {
+          const MachO::mach_header_64 H_64 = MachOOF->getHeader64();
+          CPUType = H_64.cputype;
+          CPUSubType = H_64.cpusubtype;
+        } else {
+          const MachO::mach_header H = MachOOF->getHeader();
+          CPUType = H.cputype;
+          CPUSubType = H.cpusubtype;
+        }
+        Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
+                                                  &ArchFlag);
+        Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
+            UB->getObjectForArch(ArchFlag);
+        if (!MachDSYM) {
+          report_error(MachDSYM.takeError(), DSYMFile);
+          return;
+        }
+    
+        // We need to keep the Binary elive with the buffer
+        DbgObj = &*MachDSYM.get();
+        DSYMBinary = std::move(*MachDSYM);
+      }
+      else {
+        WithColor::error(errs(), "llvm-objdump")
+          << DSYMFile << " is not a Mach-O or Universal file type.\n";
+        return;
+      }
     }
 
     // Setup the DIContext

From 0f4446b2700a02612297bdb73a75a784a46d31bf Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Thu, 30 May 2019 18:06:28 +0000
Subject: [PATCH 0644/1176] [MIR-Canon] Add support for rewriting VRegs that
 are typed but don't have an RC.

There were crashes (addrspace-memoperands.mir was only one of them) in MIR that
had operands that came from before register classes were set. With these
operands, creating a replacement vreg (for MIR-Canon's renaming) needs to use
the vreg type rather than the RegisterClass which is not present.

Differential Revision: https://reviews.llvm.org/D62543

llvm-svn: 362122
---
 llvm/lib/CodeGen/MIRCanonicalizerPass.cpp             | 11 ++++++-----
 .../CodeGen/MIR/AArch64/addrspace-memoperands.mir     |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index e7db863e1f9f8..e8a6e409fb512 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -500,14 +500,15 @@ class NamedVRegCursor {
     return virtualVRegNumber;
   }
 
-  unsigned createVirtualRegister(const TargetRegisterClass *RC) {
+  unsigned createVirtualRegister(unsigned VReg) {
     std::string S;
     raw_string_ostream OS(S);
     OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
     OS.flush();
     virtualVRegNumber++;
-
-    return MRI.createVirtualRegister(RC, OS.str());
+    if (auto RC = MRI.getRegClassOrNull(VReg))
+      return MRI.createVirtualRegister(RC, OS.str());
+    return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
   }
 };
 } // namespace
@@ -557,7 +558,7 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
       continue;
     }
 
-    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));
+    auto Rename = NVC.createVirtualRegister(Reg);
 
     if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
       LLVM_DEBUG(dbgs() << "Mapping vreg ";);
@@ -741,7 +742,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
     MachineInstr &MI = *MII++;
     Changed = true;
     unsigned vRegToRename = MI.getOperand(0).getReg();
-    auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename));
+    auto Rename = NVC.createVirtualRegister(vRegToRename);
 
     std::vector<MachineOperand *> RenameMOs;
     for (auto &MO : MRI.reg_operands(vRegToRename)) {
diff --git a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir
index c33e48b882fb6..bc5c06d7f674b 100644
--- a/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/addrspace-memoperands.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass mir-canonicalizer -o - %s
 
 --- |
 

From 5d5f6299229610d69c596a08d1d9344a1c47531c Mon Sep 17 00:00:00 2001
From: Michael Trent <mtrent@apple.com>
Date: Thu, 30 May 2019 18:17:10 +0000
Subject: [PATCH 0645/1176] Reverting change r362121 due to
 lld-x86_64-ubuntu-fast test failures

llvm-svn: 362123
---
 .../X86/macho-disassemble-g-dsym.test         | 11 ----
 llvm/tools/llvm-objdump/MachODump.cpp         | 63 ++-----------------
 2 files changed, 6 insertions(+), 68 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index 01ccd3044c086..f06567764b84b 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -5,14 +5,3 @@
 // RUN: FileCheck --input-file %t0 %s
 
 CHECK: Disassembly of section __TEXT,__text:
-
-// RUN: dsymutil %p/../../dsymutil/Inputs/fat-test.dylib -o fat-test.dylib.dSYM
-// RUN: llvm-objdump -m -d -g -dsym fat-test.dylib.dSYM/Contents/Resources/DWARF/fat-test.dylib %p/../../dsymutil/Inputs/fat-test.dylib | FileCheck -check-prefix MACHO_DSYM %s
-// RUN: dsymutil %p/../../dsymutil/Inputs/basic.macho.x86_64 -o basic.macho.x86_64.dSYM
-// RUN: llvm-objdump -m -d -g -dsym basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 %p/../../dsymutil/Inputs/basic.macho.x86_64 | FileCheck -check-prefix MACHO_DSYM %s
-
-MACHO_DSYM: (__TEXT,__text) section
-
-// RUN: llvm-objdump -m -d -g -dsym %p/../Inputs/libbogus11.a %p/../../dsymutil/Inputs/basic.macho.x86_64 2>&1 | FileCheck -check-prefix BAD_INPUT %s
-
-BAD_INPUT: is not a Mach-O or Universal file type.
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index b684daacb611e..ea92ef9268a1b 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7223,13 +7223,11 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   raw_ostream &DebugOut = nulls();
 #endif
 
-  // Try to find debug info and set up the DIContext for it.
   std::unique_ptr<DIContext> diContext;
-  std::unique_ptr<Binary> DSYMBinary;
+  ObjectFile *DbgObj = MachOOF;
   std::unique_ptr<MemoryBuffer> DSYMBuf;
+  // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
-    ObjectFile *DbgObj = MachOOF;
-
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
     if (!DSYMFile.empty()) {
@@ -7240,61 +7238,12 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
         return;
       }
 
+      std::unique_ptr<MachOObjectFile> DbgObjCheck = unwrapOrError(
+          ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef()),
+          DSYMFile.getValue());
+      DbgObj = DbgObjCheck.release();
       // We need to keep the file alive, because we're replacing DbgObj with it.
       DSYMBuf = std::move(BufOrErr.get());
-
-      Expected<std::unique_ptr<Binary>> BinaryOrErr =
-      createBinary(DSYMBuf.get()->getMemBufferRef());
-      if (!BinaryOrErr) {
-        report_error(BinaryOrErr.takeError(), DSYMFile);
-        return;
-      }
-
-      // We need to keep the Binary elive with the buffer
-      DSYMBinary = std::move(BinaryOrErr.get());
-    
-      if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
-        // this is a Mach-O object file, use it
-        if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
-          DbgObj = MachDSYM;
-        }
-        else {
-          WithColor::error(errs(), "llvm-objdump")
-            << DSYMFile << " is not a Mach-O file type.\n";
-          return;
-        }
-      }
-      else if (auto UB = dyn_cast<MachOUniversalBinary>(DSYMBinary.get())){
-        // this is a Universal Binary, find a Mach-O for this architecture
-        uint32_t CPUType, CPUSubType;
-        const char *ArchFlag;
-        if (MachOOF->is64Bit()) {
-          const MachO::mach_header_64 H_64 = MachOOF->getHeader64();
-          CPUType = H_64.cputype;
-          CPUSubType = H_64.cpusubtype;
-        } else {
-          const MachO::mach_header H = MachOOF->getHeader();
-          CPUType = H.cputype;
-          CPUSubType = H.cpusubtype;
-        }
-        Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
-                                                  &ArchFlag);
-        Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
-            UB->getObjectForArch(ArchFlag);
-        if (!MachDSYM) {
-          report_error(MachDSYM.takeError(), DSYMFile);
-          return;
-        }
-    
-        // We need to keep the Binary elive with the buffer
-        DbgObj = &*MachDSYM.get();
-        DSYMBinary = std::move(*MachDSYM);
-      }
-      else {
-        WithColor::error(errs(), "llvm-objdump")
-          << DSYMFile << " is not a Mach-O or Universal file type.\n";
-        return;
-      }
     }
 
     // Setup the DIContext

From 778e445c58c52d5b23aafe89855b93a00eac46e5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 30 May 2019 18:19:35 +0000
Subject: [PATCH 0646/1176] [LoopVectorize] Add FNeg instruction support

Differential Revision: https://reviews.llvm.org/D62510

llvm-svn: 362124
---
 llvm/include/llvm/IR/IRBuilder.h              | 18 ++++++++++++
 .../Transforms/Vectorize/LoopVectorize.cpp    | 29 +++++++++++++------
 .../Transforms/LoopVectorize/X86/fneg-cost.ll |  6 ++--
 llvm/test/Transforms/LoopVectorize/fneg.ll    | 15 ++--------
 4 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index d052666354f79..6cda431a3b852 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1383,6 +1383,24 @@ class IRBuilder : public IRBuilderBase, public Inserter {
     return Insert(UnOp, Name);
   }
 
+  /// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
+  /// Correct number of operands must be passed accordingly.
+  Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
+                      const Twine &Name = "",
+                      MDNode *FPMathTag = nullptr) {
+    if (Instruction::isBinaryOp(Opc)) {
+      assert(Ops.size() == 2 && "Invalid number of operands!");
+      return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
+                         Ops[0], Ops[1], Name, FPMathTag);
+    }
+    if (Instruction::isUnaryOp(Opc)) {
+      assert(Ops.size() == 1 && "Invalid number of operands!");
+      return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
+                        Ops[0], Name, FPMathTag);
+    }
+    llvm_unreachable("Unexpected opcode!");
+  }
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Memory Instructions
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ad3030cedc30b..a43a76724c894 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3969,6 +3969,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
+  case Instruction::FNeg:
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::FDiv:
@@ -3979,21 +3980,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor: {
-    // Just widen binops.
-    auto *BinOp = cast<BinaryOperator>(&I);
-    setDebugLocFromInst(Builder, BinOp);
+    // Just widen unops and binops.
+    setDebugLocFromInst(Builder, &I);
 
     for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
-      Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
-      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+      SmallVector<Value *, 2> Ops;
+      for (Value *Op : I.operands())
+        Ops.push_back(getOrCreateVectorValue(Op, Part));
+
+      Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
 
-      if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
-        VecOp->copyIRFlags(BinOp);
+      if (auto *VecOp = dyn_cast<Instruction>(V))
+        VecOp->copyIRFlags(&I);
 
       // Use this vector value for all users of the original instruction.
       VectorLoopValueMap.setVectorValue(&I, Part, V);
-      addMetadata(V, BinOp);
+      addMetadata(V, &I);
     }
 
     break;
@@ -5960,6 +5962,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                    I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
                    Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
   }
+  case Instruction::FNeg: {
+    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+    return N * TTI.getArithmeticInstrCost(
+                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+                   I->getOperand(0));
+  }
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
     const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -6589,6 +6599,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
     case Instruction::FCmp:
     case Instruction::FDiv:
     case Instruction::FMul:
+    case Instruction::FNeg:
     case Instruction::FPExt:
     case Instruction::FPToSI:
     case Instruction::FPToUI:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
index 6589871674291..5aedf451ed20a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
@@ -5,9 +5,9 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-; CHECK: Found an estimated cost of 2 for VF 1 For instruction:   %neg = fneg float %{{.*}}
-; CHECK: Found an estimated cost of 6 for VF 2 For instruction:   %neg = fneg float %{{.*}}
-; CHECK: Found an estimated cost of 14 for VF 4 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 1 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 2 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %neg = fneg float %{{.*}}
 define void @fneg_cost(float* %a, i64 %n) {
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/fneg.ll b/llvm/test/Transforms/LoopVectorize/fneg.ll
index 8e5e2aae9fdd3..103e795b2115f 100644
--- a/llvm/test/Transforms/LoopVectorize/fneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/fneg.ll
@@ -3,19 +3,8 @@
 define void @foo(float* %a, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = fneg float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = fneg float [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT:    [[TMP11:%.*]] = fneg float [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
-; CHECK:         store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
+; CHECK:         store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
 ;
 entry:
   br label %for.body

From ec1b4d1f6fb0fe0c11ef4532db0c657067baf14c Mon Sep 17 00:00:00 2001
From: Hansang Bae <hansang.bae@intel.com>
Date: Thu, 30 May 2019 18:35:07 +0000
Subject: [PATCH 0647/1176] Fix OMP_TARGET_OFFLOAD parsing

Current parsing allows trailing string after the permitted value,
MANDATORY|DISABLED|DEFAULT -- e.g., "mandatorynot" is also recognized
as "MANDATORY". Such cases should be recognized as incorrect/unknown
value.

Differential Revision: https://reviews.llvm.org/D62431

llvm-svn: 362125
---
 openmp/runtime/src/kmp_settings.cpp          |  6 +-
 openmp/runtime/test/env/omp_target_offload.c | 62 ++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)
 create mode 100644 openmp/runtime/test/env/omp_target_offload.c

diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 3f38bec00c20b..114cd9c9bac4a 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -1250,11 +1250,11 @@ static void __kmp_stg_parse_target_offload(char const *name, char const *value,
   if (*next == '\0')
     return;
   scan = next;
-  if (__kmp_match_str("MANDATORY", scan, &next)) {
+  if (!__kmp_strcasecmp_with_sentinel("mandatory", scan, 0)) {
     __kmp_target_offload = tgt_mandatory;
-  } else if (__kmp_match_str("DISABLED", scan, &next)) {
+  } else if (!__kmp_strcasecmp_with_sentinel("disabled", scan, 0)) {
     __kmp_target_offload = tgt_disabled;
-  } else if (__kmp_match_str("DEFAULT", scan, &next)) {
+  } else if (!__kmp_strcasecmp_with_sentinel("default", scan, 0)) {
     __kmp_target_offload = tgt_default;
   } else {
     KMP_WARNING(SyntaxErrorUsing, name, "DEFAULT");
diff --git a/openmp/runtime/test/env/omp_target_offload.c b/openmp/runtime/test/env/omp_target_offload.c
new file mode 100644
index 0000000000000..91ce108b55b08
--- /dev/null
+++ b/openmp/runtime/test/env/omp_target_offload.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run
+#include <string.h>
+#include <stdlib.h>
+
+enum kmp_target_offload_kind {
+  tgt_disabled = 0,
+  tgt_default = 1,
+  tgt_mandatory = 2
+};
+
+extern int __kmpc_get_target_offload();
+
+const char *disabled_examples[] = {
+    // Allowed inputs
+    "disabled", "DISABLED", "Disabled", "dIsAbLeD", "DiSaBlEd"};
+
+const char *default_examples[] = {
+    // Allowed inputs
+    "default", "DEFAULT", "Default", "deFAulT", "DEfaULt",
+    // These should be changed to default (failed match)
+    "mandatry", "defaults", "disable", "enabled", "mandatorynot"};
+
+const char *mandatory_examples[] = {
+    // Allowed inputs
+    "mandatory", "MANDATORY", "Mandatory", "manDatoRy", "MANdATOry"};
+
+// Return target-offload-var ICV
+int get_target_offload_icv() {
+#pragma omp parallel
+  {}
+  return __kmpc_get_target_offload();
+}
+
+int main() {
+  int i;
+  const char *omp_target_offload = "OMP_TARGET_OFFLOAD=";
+  char buf[80];
+
+  for (i = 0; i < sizeof(disabled_examples) / sizeof(char *); ++i) {
+    strcpy(buf, omp_target_offload);
+    strcat(buf, disabled_examples[i]);
+    kmp_set_defaults(buf);
+    if (tgt_disabled != get_target_offload_icv())
+      return EXIT_FAILURE;
+  }
+  for (i = 0; i < sizeof(default_examples) / sizeof(char *); ++i) {
+    strcpy(buf, omp_target_offload);
+    strcat(buf, default_examples[i]);
+    kmp_set_defaults(buf);
+    if (tgt_default != get_target_offload_icv())
+      return EXIT_FAILURE;
+  }
+  for (i = 0; i < sizeof(mandatory_examples) / sizeof(char *); ++i) {
+    strcpy(buf, omp_target_offload);
+    strcat(buf, mandatory_examples[i]);
+    kmp_set_defaults(buf);
+    if (tgt_mandatory != get_target_offload_icv())
+      return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}

From 9bbdde259803f8a02d992a0d47b174a1efc8442f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 30 May 2019 18:46:13 +0000
Subject: [PATCH 0648/1176] [LV] Remove the redundant using
 LoopVectorizationPlanner:VPlanPtr

VPlan.h already contains the declaration of VPlanPtr type alias:

using VPlanPtr = std::unique_ptr<VPlan>;

The LoopVectorizationPlanner class also contains the same declaration
of VPlanPtr and therefore LoopVectorize requires a long wording when
its methods return VPlanPtr:

    LoopVectorizationPlanner::VPlanPtr
    LoopVectorizationPlanner::buildVPlanWithVPRecipes(...)

but LoopVectorize.cpp includes VPlan.h (via LoopVectorizationPlanner.h)
and can use VPlanPtr from that header.

Patch by Pavel Samolysov.

Reviewers: hsaito, rengolin, fhahn

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D62576

llvm-svn: 362126
---
 llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h | 4 +---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp          | 7 +++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 8b7cf300eef84..97077cce83e3a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -198,11 +198,9 @@ class LoopVectorizationPlanner {
   /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
-  /// The profitablity analysis.
+  /// The profitability analysis.
   LoopVectorizationCostModel &CM;
 
-  using VPlanPtr = std::unique_ptr<VPlan>;
-
   SmallVector<VPlanPtr, 4> VPlans;
 
   /// This class is used to enable the VPlan to invoke a method of ILV. This is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a43a76724c894..79528e5927e08 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -56,6 +56,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "LoopVectorizationPlanner.h"
 #include "VPRecipeBuilder.h"
+#include "VPlan.h"
 #include "VPlanHCFGBuilder.h"
 #include "VPlanHCFGTransforms.h"
 #include "VPlanPredicator.h"
@@ -6829,8 +6830,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
   }
 }
 
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlanWithVPRecipes(
+VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
     SmallPtrSetImpl<Instruction *> &DeadInstructions) {
   // Hold a mapping from predicated instructions to their recipes, in order to
@@ -6954,8 +6954,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
   return Plan;
 }
 
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   // Outer loop handling: They may require CFG and instruction level
   // transformations before even evaluating whether vectorization is profitable.
   // Since we cannot modify the incoming IR, we need to build VPlan upfront in

From 7fecdf36cc5b41dc5ad85d58c6e3b97b4fce6d00 Mon Sep 17 00:00:00 2001
From: Tim Renouf <tpr.llvm@botech.co.uk>
Date: Thu, 30 May 2019 18:46:34 +0000
Subject: [PATCH 0649/1176] [AMDGPU] Added target-specific attribute
 amdgpu-max-memory-clause

With LLPC, previous investigation has suggested that si-scheduler
interacts badly with SiFormMemoryClauses on an XNACK target in some
games.

That needs further investigation in the future. In the meantime, this
commit adds a target-specific attribute to allow us to disable
SIFormMemoryClauses by setting it to 1 on a per-function basis for LLPC
to use.

Differential Revision: https://reviews.llvm.org/D62572

Change-Id: Ia0ca12ce79093cbbe86caded723ffb13384ede92
llvm-svn: 362127
---
 .../lib/Target/AMDGPU/SIFormMemoryClauses.cpp |  4 +-
 .../CodeGen/AMDGPU/disable_form_clauses.ll    | 65 +++++++++++++++++++
 2 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll

diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 2cbed99beb2ff..f3c9ad63a80ab 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -319,6 +319,8 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
 
   MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
   MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
+  unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
+      MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
 
   for (MachineBasicBlock &MBB : MF) {
     MachineBasicBlock::instr_iterator Next;
@@ -339,7 +341,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
         continue;
 
       unsigned Length = 1;
-      for ( ; Next != E && Length < MaxClause; ++Next) {
+      for ( ; Next != E && Length < FuncMaxClause; ++Next) {
         if (!isValidClauseInst(*Next, IsVMEM))
           break;
 
diff --git a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
new file mode 100644
index 0000000000000..dd6f03f981704
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr -stop-after=si-form-memory-clauses < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}name:{{[ 	]*}}vector_clause
+; GCN:      BUNDLE
+; GCN-NEXT: LOAD_DWORDX2
+; GCN-NEXT: LOAD_DWORDX2
+; GCN-NEXT: {{^ *[}]}}
+define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp2 = zext i32 %tmp to i64
+  %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp2
+  %tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 16
+  %tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp2
+  %tmp6 = add nuw nsw i64 %tmp2, 1
+  %tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp6
+  %tmp8 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp7, align 16
+  %tmp9 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp6
+  %tmp10 = add nuw nsw i64 %tmp2, 2
+  %tmp11 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp10
+  %tmp12 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp11, align 16
+  %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp10
+  %tmp14 = add nuw nsw i64 %tmp2, 3
+  %tmp15 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp14
+  %tmp16 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp15, align 16
+  %tmp17 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp14
+  store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp5, align 16
+  store <4 x i32> %tmp8, <4 x i32> addrspace(1)* %tmp9, align 16
+  store <4 x i32> %tmp12, <4 x i32> addrspace(1)* %tmp13, align 16
+  store <4 x i32> %tmp16, <4 x i32> addrspace(1)* %tmp17, align 16
+  ret void
+}
+
+; GCN-LABEL: {{^}}name:{{[ 	]*}}no_vector_clause
+; GCN-NOT:   BUNDLE
+define amdgpu_kernel void @no_vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) #0 {
+bb:
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp2 = zext i32 %tmp to i64
+  %tmp3 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp2
+  %tmp4 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp3, align 16
+  %tmp5 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp2
+  %tmp6 = add nuw nsw i64 %tmp2, 1
+  %tmp7 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp6
+  %tmp8 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp7, align 16
+  %tmp9 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp6
+  %tmp10 = add nuw nsw i64 %tmp2, 2
+  %tmp11 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp10
+  %tmp12 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp11, align 16
+  %tmp13 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp10
+  %tmp14 = add nuw nsw i64 %tmp2, 3
+  %tmp15 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 %tmp14
+  %tmp16 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp15, align 16
+  %tmp17 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg1, i64 %tmp14
+  store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %tmp5, align 16
+  store <4 x i32> %tmp8, <4 x i32> addrspace(1)* %tmp9, align 16
+  store <4 x i32> %tmp12, <4 x i32> addrspace(1)* %tmp13, align 16
+  store <4 x i32> %tmp16, <4 x i32> addrspace(1)* %tmp17, align 16
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+attributes #0 = { "amdgpu-max-memory-clause"="1" }
+

From b7141207a483d39b99c2b4da4eb3bb591eca9e1a Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Thu, 30 May 2019 18:48:23 +0000
Subject: [PATCH 0650/1176] Reapply: IR: add optional type to 'byval' function
 parameters

When we switch to opaque pointer types we will need some way to describe
how many bytes a 'byval' parameter should occupy on the stack. This adds
a (for now) optional extra type parameter.

If present, the type must match the pointee type of the argument.

The original commit did not remap byval types when linking modules, which broke
LTO. This version fixes that.

Note to front-end maintainers: if this causes test failures, it's probably
because the "byval" attribute is printed after attributes without any parameter
after this change.

llvm-svn: 362128
---
 llvm/docs/LangRef.rst                         |   5 +-
 llvm/docs/ReleaseNotes.rst                    |   5 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |   1 +
 llvm/include/llvm/IR/Argument.h               |   3 +
 llvm/include/llvm/IR/Attributes.h             |  20 +++
 llvm/include/llvm/IR/CallSite.h               |   5 +
 llvm/include/llvm/IR/Function.h               |   5 +
 llvm/include/llvm/IR/InstrTypes.h             |   5 +
 llvm/lib/AsmParser/LLParser.cpp               |  24 +++-
 llvm/lib/AsmParser/LLParser.h                 |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  50 +++++++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  15 ++-
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp   |   6 +-
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |   5 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    |   8 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  18 ++-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   1 +
 llvm/lib/IR/AttributeImpl.h                   |  32 ++++-
 llvm/lib/IR/Attributes.cpp                    | 116 +++++++++++++++++-
 llvm/lib/IR/Function.cpp                      |   5 +
 llvm/lib/IR/Verifier.cpp                      |   5 +
 llvm/lib/Linker/IRMover.cpp                   |  20 +++
 llvm/lib/Transforms/Utils/ValueMapper.cpp     |  15 +++
 llvm/test/Assembler/byval-type-attr.ll        |  31 +++++
 llvm/test/Assembler/invalid-byval-type1.ll    |   4 +
 llvm/test/Assembler/invalid-byval-type2.ll    |   4 +
 llvm/test/Assembler/invalid-byval-type3.ll    |   4 +
 llvm/test/Bitcode/Inputs/byval-upgrade.bc     | Bin 0 -> 1092 bytes
 llvm/test/Bitcode/attributes-3.3.ll           |   2 +-
 llvm/test/Bitcode/attributes.ll               |   2 +-
 llvm/test/Bitcode/byval-upgrade.test          |   7 ++
 llvm/test/Bitcode/compatibility-3.6.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.7.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.8.ll        |   2 +-
 llvm/test/Bitcode/compatibility-3.9.ll        |   2 +-
 llvm/test/Bitcode/compatibility-4.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-5.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility-6.0.ll        |   2 +-
 llvm/test/Bitcode/compatibility.ll            |  11 +-
 llvm/test/Bitcode/highLevelStructure.3.2.ll   |   4 +-
 llvm/test/CodeGen/AArch64/byval-type.ll       |  37 ++++++
 llvm/test/Linker/Inputs/byval-types-1.ll      |   8 ++
 llvm/test/Linker/byval-types.ll               |  17 +++
 .../test/Transforms/Inline/byval-tail-call.ll |   4 +-
 llvm/unittests/IR/AttributesTest.cpp          |  20 +++
 45 files changed, 497 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/Assembler/byval-type-attr.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type1.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type2.ll
 create mode 100644 llvm/test/Assembler/invalid-byval-type3.ll
 create mode 100644 llvm/test/Bitcode/Inputs/byval-upgrade.bc
 create mode 100644 llvm/test/Bitcode/byval-upgrade.test
 create mode 100644 llvm/test/CodeGen/AArch64/byval-type.ll
 create mode 100644 llvm/test/Linker/Inputs/byval-types-1.ll
 create mode 100644 llvm/test/Linker/byval-types.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 840272df33fd6..2105ce9d669f7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1017,7 +1017,7 @@ Currently, only the following parameter attributes are defined:
     opposed to memory, though some targets use it to distinguish between
     two different kinds of registers). Use of this attribute is
     target-specific.
-``byval``
+``byval`` or ``byval(<ty>)``
     This indicates that the pointer parameter should really be passed by
     value to the function. The attribute implies that a hidden copy of
     the pointee is made between the caller and the callee, so the callee
@@ -1029,6 +1029,9 @@ Currently, only the following parameter attributes are defined:
     ``byval`` parameters). This is not a valid attribute for return
     values.
 
+    The byval attribute also supports an optional type argument, which must be
+    the same as the pointee type of the argument.
+
     The byval attribute also supports specifying an alignment with the
     align attribute. It indicates the alignment of the stack slot to
     form and the known alignment of the pointer specified to the call
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 95fd43940b76f..60a840eccef05 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -67,6 +67,11 @@ Changes to the LLVM IR
   type is now mandatory. Specify `i8* null` to migrate from the obsoleted
   2-field form.
 
+* The ``byval`` attribute can now take a type parameter:
+  ``byval(<ty>)``. If present it must be identical to the argument's
+  pointee type. In the next release we intend to make this parameter
+  mandatory in preparation for opaque pointer types.
+
 Changes to the ARM Backend
 --------------------------
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cabca9cb22101..f06e01acae30c 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -188,6 +188,7 @@ class TargetLoweringBase {
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     uint16_t Alignment = 0;
+    Type *ByValType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 7997c863b8300..952fbcdffb142 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -78,6 +78,9 @@ class Argument final : public Value {
   /// If this is a byval or inalloca argument, return its alignment.
   unsigned getParamAlignment() const;
 
+  /// If this is a byval argument, return its type.
+  Type *getParamByValType() const;
+
   /// Return true if this argument has the nest attribute.
   bool hasNestAttr() const;
 
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 8d7f4018e846c..06cc09e1cfc79 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -90,6 +90,7 @@ class Attribute {
   static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
   static Attribute get(LLVMContext &Context, StringRef Kind,
                        StringRef Val = StringRef());
+  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
 
   /// Return a uniquified Attribute object that has the specific
   /// alignment set.
@@ -102,6 +103,7 @@ class Attribute {
   static Attribute getWithAllocSizeArgs(LLVMContext &Context,
                                         unsigned ElemSizeArg,
                                         const Optional<unsigned> &NumElemsArg);
+  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
 
   //===--------------------------------------------------------------------===//
   // Attribute Accessors
@@ -117,6 +119,9 @@ class Attribute {
   /// attribute.
   bool isStringAttribute() const;
 
+  /// Return true if the attribute is a type attribute.
+  bool isTypeAttribute() const;
+
   /// Return true if the attribute is present.
   bool hasAttribute(AttrKind Val) const;
 
@@ -139,6 +144,10 @@ class Attribute {
   /// attribute to be a string attribute.
   StringRef getValueAsString() const;
 
+  /// Return the attribute's value as a Type. This requires the attribute to be
+  /// a type attribute.
+  Type *getValueAsType() const;
+
   /// Returns the alignment field of an attribute as a byte alignment
   /// value.
   unsigned getAlignment() const;
@@ -279,6 +288,7 @@ class AttributeSet {
   unsigned getStackAlignment() const;
   uint64_t getDereferenceableBytes() const;
   uint64_t getDereferenceableOrNullBytes() const;
+  Type *getByValType() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp = false) const;
 
@@ -598,6 +608,9 @@ class AttributeList {
   /// Return the alignment for the specified function parameter.
   unsigned getParamAlignment(unsigned ArgNo) const;
 
+  /// Return the byval type for the specified function parameter.
+  Type *getParamByValType(unsigned ArgNo) const;
+
   /// Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
 
@@ -697,6 +710,7 @@ class AttrBuilder {
   uint64_t DerefBytes = 0;
   uint64_t DerefOrNullBytes = 0;
   uint64_t AllocSizeArgs = 0;
+  Type *ByValType = nullptr;
 
 public:
   AttrBuilder() = default;
@@ -772,6 +786,9 @@ class AttrBuilder {
   /// dereferenceable_or_null attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
 
+  /// Retrieve the byval type.
+  Type *getByValType() const { return ByValType; }
+
   /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
   /// doesn't exist, pair(0, 0) is returned.
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -796,6 +813,9 @@ class AttrBuilder {
   AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg);
 
+  /// This turns a byval type into the form used internally in Attribute.
+  AttrBuilder &addByValAttr(Type *Ty);
+
   /// Add an allocsize attribute, using the representation returned by
   /// Attribute.getIntValue().
   AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h
index 183e387a422e5..b47a96c5d5faa 100644
--- a/llvm/include/llvm/IR/CallSite.h
+++ b/llvm/include/llvm/IR/CallSite.h
@@ -415,6 +415,11 @@ class CallSiteBase {
     CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
+  }
+
   /// Extract the number of dereferenceable bytes for a call or parameter
   /// (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 613d21bc64d24..896c2189eb824 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -431,6 +431,11 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return AttributeSets.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   /// @param i AttributeList index, referring to a return value or argument.
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 7ffa7a6f60e8f..6ce76811c0e66 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1560,6 +1560,11 @@ class CallBase : public Instruction {
     return Attrs.getParamAlignment(ArgNo);
   }
 
+  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  Type *getParamByValType(unsigned ArgNo) const {
+    return Attrs.getParamByValType(ArgNo);
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 95646675cb279..0a9a09e644d7a 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1601,7 +1601,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
+    case lltok::kw_byval: {
+      Type *Ty;
+      if (ParseByValWithOptionalType(Ty))
+        return true;
+      B.addByValAttr(Ty);
+      continue;
+    }
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -2454,6 +2460,22 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   return false;
 }
 
+/// ParseByValWithOptionalType
+///   ::= byval
+///   ::= byval(<ty>)
+bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+  Result = nullptr;
+  if (!EatIfPresent(lltok::kw_byval))
+    return true;
+  if (!EatIfPresent(lltok::lparen))
+    return false;
+  if (ParseType(Result))
+    return true;
+  if (!EatIfPresent(lltok::rparen))
+    return Error(Lex.getLoc(), "expected ')'");
+  return false;
+}
+
 /// ParseOptionalOperandBundles
 ///    ::= /*empty*/
 ///    ::= '[' OperandBundle [, OperandBundle ]* ']'
diff --git a/llvm/lib/AsmParser/LLParser.h b/llvm/lib/AsmParser/LLParser.h
index ad169afb93584..ec4a61b5498d8 100644
--- a/llvm/lib/AsmParser/LLParser.h
+++ b/llvm/lib/AsmParser/LLParser.h
@@ -339,6 +339,7 @@ namespace llvm {
     bool ParseFnAttributeValuePairs(AttrBuilder &B,
                                     std::vector<unsigned> &FwdRefAttrGrps,
                                     bool inAttrGrp, LocTy &BuiltinLoc);
+    bool ParseByValWithOptionalType(Type *&Result);
 
     // Module Summary Index Parsing.
     bool SkipModuleSummaryEntry();
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 134ce0367031e..9f562ba82db93 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -638,6 +638,10 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
     return getFnValueByID(ValNo, Ty);
   }
 
+  /// Upgrades old-style typeless byval attributes by adding the corresponding
+  /// argument's pointee type.
+  void propagateByValTypes(CallBase *CB);
+
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
@@ -1492,6 +1496,12 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           if (Error Err = parseAttrKind(Record[++i], &Kind))
             return Err;
 
+          // Upgrade old-style byval attribute to one with a type, even if it's
+          // nullptr. We will have to insert the real type when we associate
+          // this AttributeList with a function.
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(nullptr);
+
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
@@ -1507,9 +1517,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
             B.addDereferenceableOrNullAttr(Record[++i]);
           else if (Kind == Attribute::AllocSize)
             B.addAllocSizeAttrFromRawRepr(Record[++i]);
-        } else {                     // String attribute
-          assert((Record[i] == 3 || Record[i] == 4) &&
-                 "Invalid attribute group entry");
+        } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
           bool HasValue = (Record[i++] == 4);
           SmallString<64> KindStr;
           SmallString<64> ValStr;
@@ -1527,6 +1535,15 @@ Error BitcodeReader::parseAttributeGroupBlock() {
           }
 
           B.addAttribute(KindStr.str(), ValStr.str());
+        } else {
+          assert((Record[i] == 5 || Record[i] == 6) &&
+                 "Invalid attribute group entry");
+          bool HasType = Record[i] == 6;
+          Attribute::AttrKind Kind;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
+          if (Kind == Attribute::ByVal)
+            B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
         }
       }
 
@@ -3028,6 +3045,17 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   Func->setLinkage(getDecodedLinkage(RawLinkage));
   Func->setAttributes(getAttributes(Record[4]));
 
+  // Upgrade any old-style byval without a type by propagating the argument's
+  // pointee type. There should be no opaque pointers where the byval type is
+  // implicit.
+  for (auto &Arg : Func->args()) {
+    if (Arg.hasByValAttr() && !Arg.getParamByValType()) {
+      Arg.removeAttr(Attribute::ByVal);
+      Arg.addAttr(Attribute::getWithByValType(
+          Context, Arg.getType()->getPointerElementType()));
+    }
+  }
+
   unsigned Alignment;
   if (Error Err = parseAlignmentValue(Record[5], Alignment))
     return Err;
@@ -3441,6 +3469,19 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   return Error::success();
 }
 
+void BitcodeReader::propagateByValTypes(CallBase *CB) {
+  for (unsigned i = 0; i < CB->getNumArgOperands(); ++i) {
+    if (CB->paramHasAttr(i, Attribute::ByVal) &&
+        !CB->getAttribute(i, Attribute::ByVal).getValueAsType()) {
+      CB->removeParamAttr(i, Attribute::ByVal);
+      CB->addParamAttr(
+          i, Attribute::getWithByValType(
+                 Context,
+                 CB->getArgOperand(i)->getType()->getPointerElementType()));
+    }
+  }
+}
+
 /// Lazily parse the specified function body block.
 Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
@@ -4256,6 +4297,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       cast<InvokeInst>(I)->setCallingConv(
           static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
+
       break;
     }
     case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4731,6 +4774,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      propagateByValTypes(cast<CallBase>(I));
       if (FMF.any()) {
         if (!isa<FPMathOperator>(I))
           return error("Fast-math-flags specified for call without "
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8e1e06226bb46..d243815667fdf 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -747,7 +747,7 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
         Record.push_back(1);
         Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
         Record.push_back(Attr.getValueAsInt());
-      } else {
+      } else if (Attr.isStringAttribute()) {
         StringRef Kind = Attr.getKindAsString();
         StringRef Val = Attr.getValueAsString();
 
@@ -758,6 +758,13 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
           Record.append(Val.begin(), Val.end());
           Record.push_back(0);
         }
+      } else {
+        assert(Attr.isTypeAttribute());
+        Type *Ty = Attr.getValueAsType();
+        Record.push_back(Ty ? 6 : 5);
+        Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+        if (Ty)
+          Record.push_back(VE.getTypeID(Attr.getValueAsType()));
       }
     }
 
@@ -4126,15 +4133,15 @@ void ModuleBitcodeWriter::write() {
   // Emit blockinfo, which defines the standard abbreviations etc.
   writeBlockInfo();
 
+  // Emit information describing all of the types in the module.
+  writeTypeTable();
+
   // Emit information about attribute groups.
   writeAttributeGroupTable();
 
   // Emit information about parameter attributes.
   writeAttributeTable();
 
-  // Emit information describing all of the types in the module.
-  writeTypeTable();
-
   writeComdats();
 
   // Emit top-level description of module, including target triple, inline asm,
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 72d7000fad9dc..143570fb20a8c 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -949,9 +949,11 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   incorporateFunctionMetadata(F);
 
   // Adding function arguments to the value table.
-  for (const auto &I : F.args())
+  for (const auto &I : F.args()) {
     EnumerateValue(&I);
-
+    if (I.hasAttribute(Attribute::ByVal) && I.getParamByValType())
+      EnumerateType(I.getParamByValType());
+  }
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index f144b18aa6358..93727406a087a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -87,7 +87,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
 
   if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
-    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+    auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
     // For ByVal, alignment should be passed from FE.  BE will guess if
     // this info is not there but there are cases it cannot get right.
     unsigned FrameAlign;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8fb1a7b5bb9c2..d887ed73c441e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1204,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
     if (Arg.IsByVal || Arg.IsInAlloca) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
-      unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
-      // For ByVal, alignment should come from FE. BE will guess if this info is
-      // not there, but there are cases it cannot get right.
+      unsigned FrameSize =
+          DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+      // For ByVal, alignment should come from FE. BE will guess if this info
+      // is not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
         FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fe857f73b2548..da06ac7a414ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9076,8 +9076,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       if (Args[i].IsByVal || Args[i].IsInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should come from FE.  BE will guess if this
+
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Args[i].ByValType ? Args[i].ByValType : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
@@ -9574,9 +9577,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(Arg.getType());
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
-        // For ByVal, alignment should be passed from FE.  BE will guess if
-        // this info is not there but there are cases it cannot get right.
+
+        // For ByVal, size and alignment should be passed from FE.  BE will
+        // guess if this info is not there but there are cases it cannot get
+        // right.
+        unsigned FrameSize = DL.getTypeAllocSize(
+            Arg.getParamByValType() ? Arg.getParamByValType() : ElementTy);
+        Flags.setByValSize(FrameSize);
+
         unsigned FrameAlign;
         if (Arg.getParamAlignment())
           FrameAlign = Arg.getParamAlignment();
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d636e613363e4..4ad578d80fab6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -112,6 +112,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlignment(ArgIdx);
+  ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 8ebcb04a565d1..f6898476382dd 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -29,6 +29,7 @@
 namespace llvm {
 
 class LLVMContext;
+class Type;
 
 //===----------------------------------------------------------------------===//
 /// \class
@@ -41,7 +42,8 @@ class AttributeImpl : public FoldingSetNode {
   enum AttrEntryKind {
     EnumAttrEntry,
     IntAttrEntry,
-    StringAttrEntry
+    StringAttrEntry,
+    TypeAttrEntry,
   };
 
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -56,6 +58,7 @@ class AttributeImpl : public FoldingSetNode {
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
   bool isIntAttribute() const { return KindID == IntAttrEntry; }
   bool isStringAttribute() const { return KindID == StringAttrEntry; }
+  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
 
   bool hasAttribute(Attribute::AttrKind A) const;
   bool hasAttribute(StringRef Kind) const;
@@ -66,16 +69,20 @@ class AttributeImpl : public FoldingSetNode {
   StringRef getKindAsString() const;
   StringRef getValueAsString() const;
 
+  Type *getValueAsType() const;
+
   /// Used when sorting the attributes.
   bool operator<(const AttributeImpl &AI) const;
 
   void Profile(FoldingSetNodeID &ID) const {
     if (isEnumAttribute())
-      Profile(ID, getKindAsEnum(), 0);
+      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
     else if (isIntAttribute())
       Profile(ID, getKindAsEnum(), getValueAsInt());
-    else
+    else if (isStringAttribute())
       Profile(ID, getKindAsString(), getValueAsString());
+    else
+      Profile(ID, getKindAsEnum(), getValueAsType());
   }
 
   static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -88,6 +95,12 @@ class AttributeImpl : public FoldingSetNode {
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      Type *Ty) {
+    ID.AddInteger(Kind);
+    ID.AddPointer(Ty);
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -145,6 +158,18 @@ class StringAttributeImpl : public AttributeImpl {
   StringRef getStringValue() const { return Val; }
 };
 
+class TypeAttributeImpl : public EnumAttributeImpl {
+  virtual void anchor();
+
+  Type *Ty;
+
+public:
+  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+  Type *getTypeValue() const { return Ty; }
+};
+
 //===----------------------------------------------------------------------===//
 /// \class
 /// This class represents a group of attributes that apply to one
@@ -189,6 +214,7 @@ class AttributeSetNode final
   uint64_t getDereferenceableOrNullBytes() const;
   std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
   std::string getAsString(bool InAttrGrp) const;
+  Type *getByValType() const;
 
   using iterator = const Attribute *;
 
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 0be09a05e82ab..67d35def9ffdf 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -121,6 +121,27 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
   return Attribute(PA);
 }
 
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+                         Type *Ty) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+  ID.AddPointer(Ty);
+
+  void *InsertPoint;
+  AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (!PA) {
+    // If we didn't find any existing attributes of the same shape then create a
+    // new one and insert it.
+    PA = new TypeAttributeImpl(Kind, Ty);
+    pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+  }
+
+  // Return the Attribute that we found or created.
+  return Attribute(PA);
+}
+
 Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
@@ -146,6 +167,10 @@ Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
   return get(Context, DereferenceableOrNull, Bytes);
 }
 
+Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
+  return get(Context, ByVal, Ty);
+}
+
 Attribute
 Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
                                 const Optional<unsigned> &NumElemsArg) {
@@ -170,9 +195,13 @@ bool Attribute::isStringAttribute() const {
   return pImpl && pImpl->isStringAttribute();
 }
 
+bool Attribute::isTypeAttribute() const {
+  return pImpl && pImpl->isTypeAttribute();
+}
+
 Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
-  assert((isEnumAttribute() || isIntAttribute()) &&
+  assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
   return pImpl->getKindAsEnum();
 }
@@ -198,6 +227,14 @@ StringRef Attribute::getValueAsString() const {
   return pImpl->getValueAsString();
 }
 
+Type *Attribute::getValueAsType() const {
+  if (!pImpl) return {};
+  assert(isTypeAttribute() &&
+         "Invalid attribute type to get the value as a type!");
+  return pImpl->getValueAsType();
+}
+
+
 bool Attribute::hasAttribute(AttrKind Kind) const {
   return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
 }
@@ -252,8 +289,6 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
-  if (hasAttribute(Attribute::ByVal))
-    return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
   if (hasAttribute(Attribute::SwiftError))
@@ -353,6 +388,19 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   if (hasAttribute(Attribute::ImmArg))
     return "immarg";
 
+  if (hasAttribute(Attribute::ByVal)) {
+    std::string Result;
+    Result += "byval";
+    if (Type *Ty = getValueAsType()) {
+      raw_string_ostream OS(Result);
+      Result += '(';
+      Ty->print(OS, false, true);
+      OS.flush();
+      Result += ')';
+    }
+    return Result;
+  }
+
   // FIXME: These should be output like this:
   //
   //   align=4
@@ -451,6 +499,8 @@ void IntAttributeImpl::anchor() {}
 
 void StringAttributeImpl::anchor() {}
 
+void TypeAttributeImpl::anchor() {}
+
 bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
   if (isStringAttribute()) return false;
   return getKindAsEnum() == A;
@@ -462,7 +512,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
 }
 
 Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
-  assert(isEnumAttribute() || isIntAttribute());
+  assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
   return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
 }
 
@@ -481,6 +531,11 @@ StringRef AttributeImpl::getValueAsString() const {
   return static_cast<const StringAttributeImpl *>(this)->getStringValue();
 }
 
+Type *AttributeImpl::getValueAsType() const {
+  assert(isTypeAttribute());
+  return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
+}
+
 bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   // This sorts the attributes with Attribute::AttrKinds coming first (sorted
   // relative to their enum value) and then strings.
@@ -488,10 +543,23 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
     if (AI.isIntAttribute()) return true;
     if (AI.isStringAttribute()) return true;
+    if (AI.isTypeAttribute()) return true;
+  }
+
+  if (isTypeAttribute()) {
+    if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) {
+      assert(getKindAsEnum() != AI.getKindAsEnum() &&
+             "Comparison of types would be unstable");
+      return getKindAsEnum() < AI.getKindAsEnum();
+    }
+    if (AI.isIntAttribute()) return true;
+    if (AI.isStringAttribute()) return true;
   }
 
   if (isIntAttribute()) {
     if (AI.isEnumAttribute()) return false;
+    if (AI.isTypeAttribute()) return false;
     if (AI.isIntAttribute()) {
       if (getKindAsEnum() == AI.getKindAsEnum())
         return getValueAsInt() < AI.getValueAsInt();
@@ -500,7 +568,9 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
     if (AI.isStringAttribute()) return true;
   }
 
+  assert(isStringAttribute());
   if (AI.isEnumAttribute()) return false;
+  if (AI.isTypeAttribute()) return false;
   if (AI.isIntAttribute()) return false;
   if (getKindAsString() == AI.getKindAsString())
     return getValueAsString() < AI.getValueAsString();
@@ -608,6 +678,10 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
   return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
 }
 
+Type *AttributeSet::getByValType() const {
+  return SetNode ? SetNode->getByValType() : nullptr;
+}
+
 std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
   return SetNode ? SetNode->getAllocSizeArgs()
                  : std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -691,6 +765,9 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
 
     Attribute Attr;
     switch (Kind) {
+    case Attribute::ByVal:
+      Attr = Attribute::getWithByValType(C, B.getByValType());
+      break;
     case Attribute::Alignment:
       Attr = Attribute::getWithAlignment(C, B.getAlignment());
       break;
@@ -760,6 +837,13 @@ unsigned AttributeSetNode::getStackAlignment() const {
   return 0;
 }
 
+Type *AttributeSetNode::getByValType() const {
+  for (const auto I : *this)
+    if (I.hasAttribute(Attribute::ByVal))
+      return I.getValueAsType();
+  return 0;
+}
+
 uint64_t AttributeSetNode::getDereferenceableBytes() const {
   for (const auto I : *this)
     if (I.hasAttribute(Attribute::Dereferenceable))
@@ -1258,6 +1342,11 @@ unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
   return getAttributes(ArgNo + FirstArgIndex).getAlignment();
 }
 
+Type *AttributeList::getParamByValType(unsigned Index) const {
+  return getAttributes(Index+FirstArgIndex).getByValType();
+}
+
+
 unsigned AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }
@@ -1336,6 +1425,7 @@ void AttrBuilder::clear() {
   TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
+  ByValType = nullptr;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1360,6 +1450,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     Alignment = Attr.getAlignment();
   else if (Kind == Attribute::StackAlignment)
     StackAlignment = Attr.getStackAlignment();
+  else if (Kind == Attribute::ByVal)
+    ByValType = Attr.getValueAsType();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
   else if (Kind == Attribute::DereferenceableOrNull)
@@ -1382,6 +1474,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     Alignment = 0;
   else if (Val == Attribute::StackAlignment)
     StackAlignment = 0;
+  else if (Val == Attribute::ByVal)
+    ByValType = nullptr;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
   else if (Val == Attribute::DereferenceableOrNull)
@@ -1464,6 +1558,12 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
+  Attrs[Attribute::ByVal] = true;
+  ByValType = Ty;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
@@ -1481,6 +1581,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   if (!AllocSizeArgs)
     AllocSizeArgs = B.AllocSizeArgs;
 
+  if (!ByValType)
+    ByValType = B.ByValType;
+
   Attrs |= B.Attrs;
 
   for (auto I : B.td_attrs())
@@ -1506,6 +1609,9 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
   if (B.AllocSizeArgs)
     AllocSizeArgs = 0;
 
+  if (B.ByValType)
+    ByValType = nullptr;
+
   Attrs &= ~B.Attrs;
 
   for (auto I : B.td_attrs())
@@ -1565,7 +1671,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
       return false;
 
   return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
-         DerefBytes == B.DerefBytes;
+         DerefBytes == B.DerefBytes && ByValType == B.ByValType;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index b00deb677b31d..a4a78ca4deb96 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -113,6 +113,11 @@ unsigned Argument::getParamAlignment() const {
   return getParent()->getParamAlignment(getArgNo());
 }
 
+Type *Argument::getParamByValType() const {
+  assert(getType()->isPointerTy() && "Only pointers have byval types");
+  return getParent()->getParamByValType(getArgNo());
+}
+
 uint64_t Argument::getDereferenceableBytes() const {
   assert(getType()->isPointerTy() &&
          "Only pointers have dereferenceable bytes");
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index d5c3287e7134c..963bf82c98982 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1695,6 +1695,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
          "'noinline and alwaysinline' are incompatible!",
          V);
 
+  if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+    Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
+           "Attribute 'byval' type does not match parameter!");
+  }
+
   AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
   Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
          "Wrong types for attribute: " +
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 7635092dd434a..37515d93ed501 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -489,6 +489,10 @@ class IRLinker {
   void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
   Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
 
+  /// Replace all types in the source AttributeList with the
+  /// corresponding destination type.
+  AttributeList mapAttributeTypes(LLVMContext &C, AttributeList Attrs);
+
   /// Functions that take care of cloning a specific global value type
   /// into the destination module.
   GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
@@ -628,6 +632,21 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
   return NewDGV;
 }
 
+AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
+  for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+    if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+      Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+      if (!Ty)
+        continue;
+
+      Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+      Attrs = Attrs.addAttribute(
+          C, i, Attribute::getWithByValType(C, TypeMap.get(Ty)));
+    }
+  }
+  return Attrs;
+}
+
 /// Link the function in the source module into the destination module if
 /// needed, setting up mapping information.
 Function *IRLinker::copyFunctionProto(const Function *SF) {
@@ -637,6 +656,7 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
       Function::Create(TypeMap.get(SF->getFunctionType()),
                        GlobalValue::ExternalLinkage, SF->getName(), &DstM);
   F->copyAttributesFrom(SF);
+  F->setAttributes(mapAttributeTypes(F->getContext(), F->getAttributes()));
   return F;
 }
 
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 1710f97d23401..fbc3407c301fc 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -913,6 +913,21 @@ void Mapper::remapInstruction(Instruction *I) {
       Tys.push_back(TypeMapper->remapType(Ty));
     CS.mutateFunctionType(FunctionType::get(
         TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+
+    LLVMContext &C = CS->getContext();
+    AttributeList Attrs = CS.getAttributes();
+    for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+      if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+        Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+        if (!Ty)
+          continue;
+
+        Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+        Attrs = Attrs.addAttribute(
+            C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+      }
+    }
+    CS.setAttributes(Attrs);
     return;
   }
   if (auto *AI = dyn_cast<AllocaInst>(I))
diff --git a/llvm/test/Assembler/byval-type-attr.ll b/llvm/test/Assembler/byval-type-attr.ll
new file mode 100644
index 0000000000000..eb17a30db3b2e
--- /dev/null
+++ b/llvm/test/Assembler/byval-type-attr.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: define void @foo(i32* byval(i32) align 4)
+define void @foo(i32* byval(i32) align 4) {
+  ret void
+}
+
+; CHECK: define void @bar({ i32*, i8 }* byval({ i32*, i8 }) align 4)
+define void @bar({i32*, i8}* byval({i32*, i8}) align 4) {
+  ret void
+}
+
+define void @caller({ i32*, i8 }* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+; CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+  call void @bar({i32*, i8}* byval %ptr)
+  invoke void @bar({i32*, i8}* byval %ptr) to label %success unwind label %fail
+
+success:
+  ret void
+
+fail:
+  landingpad { i8*, i32 } cleanup
+  ret void
+}
+
+; CHECK: declare void @baz([8 x i8]* byval([8 x i8]))
+%named_type = type [8 x i8]
+declare void @baz(%named_type* byval(%named_type))
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Assembler/invalid-byval-type1.ll b/llvm/test/Assembler/invalid-byval-type1.ll
new file mode 100644
index 0000000000000..236790e114289
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type1.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attribute 'byval' type does not match parameter!
+declare void @foo(i32* byval(i8))
diff --git a/llvm/test/Assembler/invalid-byval-type2.ll b/llvm/test/Assembler/invalid-byval-type2.ll
new file mode 100644
index 0000000000000..3ca0d5e7c4c2b
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: error: void type only allowed for function results
+declare void @foo(i32* byval(void))
diff --git a/llvm/test/Assembler/invalid-byval-type3.ll b/llvm/test/Assembler/invalid-byval-type3.ll
new file mode 100644
index 0000000000000..4626dd71c5b56
--- /dev/null
+++ b/llvm/test/Assembler/invalid-byval-type3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: Attributes 'byval' and 'inalloca' do not support unsized types!
+declare void @foo(void()* byval(void()))
diff --git a/llvm/test/Bitcode/Inputs/byval-upgrade.bc b/llvm/test/Bitcode/Inputs/byval-upgrade.bc
new file mode 100644
index 0000000000000000000000000000000000000000..e0da41f78e773b0d9154313098c2adbc5d3e5a71
GIT binary patch
literal 1092
zcmX|APfXip6#o(jY#?(CjkZv1%LdkE)kNK}b;+DaB&J=^IPhnh(A0^9wG6hDIUx#x
zPL;uFopw=`QVyBaLy@@bmQHBufdj#`%a~Vn)iiNns1XdIilJ!hq)qB!FJamzeedV@
z^!)z(-unX5>h@c9gfN8Ah*fA;sG1_Eh_@p24`73tM`pm6;o`GxxBNv*Pc7y0%$riy
zS8Ir7*7D_uVn1sR%r^v<D3{$$INqyw4WxteI^Wjaq-7}oy!k?rxM%02b7}5-lWQR5
zIO#l@J^e-g{4MT55#shc5V8Qa+~m+TjyppI5M)QF2ke#H$L2x-4)8h%3eeRWtm7_R
zwcKu9N`cr(EoSXay*Dxqsij`3s|`(BLS5mT==7YU`v$82>bNEF<{n}G5Hk&;bA2X+
zj`P;>USsFf57|1Qnh0Vdq#%XA?dQ9y%8`&xToM#S{BLPLL)d@C5h|KZwP19yIcsxc
zZ#GkR;@yW9Y2e$}mCcItJ*fK%U?-u_>m_>BZt|ricS_F$6nr{~Pv)Z5J!JcxBA+W{
zr!~N?6_`hh^r%o%byHgm%vY3r8O&!SGjSQeOyJW&O_uQ)8A~KJnbVq$M)Dv_mIT{`
zVB0B^<vy~LT4%O-dXs0?!tAJUU0hMc$C76m^RB4g6eb#Fdm@Bi9>d=r(_$bMP0ng^
z2v0ci#NRa#c1qEP2`%c-VmWO(H~Yq6nLK<-{<TUT2-rw*k=|XTjjG%V(;Ga!KFe%`
zYk$c~o}%Q*Rpou%y&$>Mn7E*yf}}6z#Gf%SuUo$x*W@8BCadu=JQ>pB4lRCHYko9n
zJ4CiW`pDfZxzlAUapVDX!8w_&0=wpCpYZh7EW7oT-YqfDOzfzm-<_A<xuc3kU;BTJ
zgtVz7p3L?4KF``J46vWF8)15%XSNH>6%$)2m}ko2+V`)jZbKLIk`D-ZvF6>C#5AaS
z=0QDls!c&X1U}=$W4YP$I}~}aO8(YI{wcH-p7`0_(kq)KW*Z6!Hj8=Hx2(3LbazJb
z7`pd<!Bf<sN3RiFjuJRj6$Ju^aP&3IzX9goP#<#wulUT6oRfS&PhRpo*5~%<hl?-m
z`zvGL<C&v+(ND10m7T&3?~*Fs0TMCaUDXSOW_4Iu6y^}J0UopHX+N)U4oA1}BVZFz
z+ql0{C|i_kSjDdf?gxT*FmBSi>&$%5Yh8Bq4cICt${oFP7fH3I(6fqr^?U@6fP}9m
zY~izt;p1>5axpeM{7Eu7EMJUGem)V7jK`;k6T(P1=IIbRgvrE2{F2-eiB$jp7n@*I
AZvX%Q

literal 0
HcmV?d00001

diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll
index b564425c37381..f1b37d1991228 100644
--- a/llvm/test/Bitcode/attributes-3.3.ll
+++ b/llvm/test/Bitcode/attributes-3.3.ll
@@ -48,7 +48,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index de3cf8dd4d73a..6f149c0d3bf7b 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -45,7 +45,7 @@ define void @f7(i8* noalias)
 }
 
 define void @f8(i8* byval)
-; CHECK: define void @f8(i8* byval)
+; CHECK: define void @f8(i8* byval(i8))
 {
         ret void;
 }
diff --git a/llvm/test/Bitcode/byval-upgrade.test b/llvm/test/Bitcode/byval-upgrade.test
new file mode 100644
index 0000000000000..1012bf728830f
--- /dev/null
+++ b/llvm/test/Bitcode/byval-upgrade.test
@@ -0,0 +1,7 @@
+RUN: llvm-dis %p/Inputs/byval-upgrade.bc -o - | FileCheck %s
+
+Make sure we upgrade old-stile IntAttribute byval records to a fully typed
+version correctly.
+
+CHECK: call void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
+CHECK: invoke void @bar({ i32*, i8 }* byval({ i32*, i8 }) %ptr)
diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll
index e9313dfba870e..322c95b02d1f8 100644
--- a/llvm/test/Bitcode/compatibility-3.6.ll
+++ b/llvm/test/Bitcode/compatibility-3.6.ll
@@ -404,7 +404,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll
index 82fc99055357a..e8260741373dd 100644
--- a/llvm/test/Bitcode/compatibility-3.7.ll
+++ b/llvm/test/Bitcode/compatibility-3.7.ll
@@ -410,7 +410,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll
index 2e70a380d10ed..c8d8261915595 100644
--- a/llvm/test/Bitcode/compatibility-3.8.ll
+++ b/llvm/test/Bitcode/compatibility-3.8.ll
@@ -435,7 +435,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll
index 7c84daa7d3c44..f031e54a9f211 100644
--- a/llvm/test/Bitcode/compatibility-3.9.ll
+++ b/llvm/test/Bitcode/compatibility-3.9.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll
index 9e34d48c95f76..8020df45da6a5 100644
--- a/llvm/test/Bitcode/compatibility-4.0.ll
+++ b/llvm/test/Bitcode/compatibility-4.0.ll
@@ -504,7 +504,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll
index a4b3fca82b7bf..3b32d668af557 100644
--- a/llvm/test/Bitcode/compatibility-5.0.ll
+++ b/llvm/test/Bitcode/compatibility-5.0.ll
@@ -508,7 +508,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll
index 097e19ecb7ff8..3b08c4a72311d 100644
--- a/llvm/test/Bitcode/compatibility-6.0.ll
+++ b/llvm/test/Bitcode/compatibility-6.0.ll
@@ -515,7 +515,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 06b81fa14a819..991bde69b61d7 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -529,7 +529,7 @@ declare void @f.param.signext(i8 signext)
 declare void @f.param.inreg(i8 inreg)
 ; CHECK: declare void @f.param.inreg(i8 inreg)
 declare void @f.param.byval({ i8, i8 }* byval)
-; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval({ i8, i8 }))
 declare void @f.param.inalloca(i8* inalloca)
 ; CHECK: declare void @f.param.inalloca(i8* inalloca)
 declare void @f.param.sret(i8* sret)
@@ -1735,6 +1735,15 @@ define i8** @constexpr() {
 declare void @llvm.test.immarg.intrinsic(i32 immarg)
 ; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg)
 
+; byval attribute with type
+%named_type = type [8 x i8]
+declare void @byval_type(i32* byval(i32) align 2)
+declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+declare void @byval_named_type(%named_type* byval(%named_type))
+; CHECK: declare void @byval_type(i32* byval(i32) align 2)
+; CHECK: declare void @byval_type2({ i8, i8* }* byval({ i8, i8* }))
+; CHECK: declare void @byval_named_type([8 x i8]* byval([8 x i8]))
+
 ; CHECK: attributes #0 = { alignstack=4 }
 ; CHECK: attributes #1 = { alignstack=8 }
 ; CHECK: attributes #2 = { alwaysinline }
diff --git a/llvm/test/Bitcode/highLevelStructure.3.2.ll b/llvm/test/Bitcode/highLevelStructure.3.2.ll
index 749b157cffc32..91d6ee4ac2574 100644
--- a/llvm/test/Bitcode/highLevelStructure.3.2.ll
+++ b/llvm/test/Bitcode/highLevelStructure.3.2.ll
@@ -41,7 +41,7 @@ declare void @ParamAttr3(i8* sret)
 declare void @ParamAttr4(i8 signext)
 ; CHECK: declare void @ParamAttr5(i8* inreg)
 declare void @ParamAttr5(i8* inreg)
-; CHECK: declare void @ParamAttr6(i8* byval)
+; CHECK: declare void @ParamAttr6(i8* byval(i8))
 declare void @ParamAttr6(i8* byval)
 ; CHECK: declare void @ParamAttr7(i8* noalias)
 declare void @ParamAttr7(i8* noalias)
@@ -51,7 +51,7 @@ declare void @ParamAttr8(i8* nocapture)
 declare void @ParamAttr9(i8* nest noalias nocapture)
 ; CHECK: declare void @ParamAttr10{{[(i8* sret noalias nocapture) | (i8* noalias nocapture sret)]}}
 declare void @ParamAttr10(i8* sret noalias nocapture)
-;CHECK: declare void @ParamAttr11{{[(i8* byval noalias nocapture) | (i8* noalias nocapture byval)]}}
+;CHECK: declare void @ParamAttr11{{[(i8* byval(i8) noalias nocapture) | (i8* noalias nocapture byval(i8))]}}
 declare void @ParamAttr11(i8* byval noalias nocapture)
 ;CHECK: declare void @ParamAttr12{{[(i8* inreg noalias nocapture) | (i8* noalias nocapture inreg)]}}
 declare void @ParamAttr12(i8* inreg noalias nocapture)
diff --git a/llvm/test/CodeGen/AArch64/byval-type.ll b/llvm/test/CodeGen/AArch64/byval-type.ll
new file mode 100644
index 0000000000000..0c2e2dc471dd9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/byval-type.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+
+define i8 @byval_match(i8* byval(i8) align 1, i8* byval %ptr) {
+; CHECK-LABEL: byval_match:
+; CHECK: ldrb w0, [sp, #8]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_match(i8* %p0, i8* %p1) {
+; CHECK-LABEL: caller_match:
+; CHECK: ldrb [[P1:w[0-9]+]], [x1]
+; CHECK: strb [[P1]], [sp, #8]
+; CHECK: ldrb [[P0:w[0-9]+]], [x0]
+; CHECK: strb [[P0]], [sp]
+; CHECK: bl byval_match
+  call i8 @byval_match(i8* byval(i8) align 1 %p0, i8* byval %p1)
+  ret void
+}
+
+define i8 @byval_large([3 x i64]* byval([3 x i64]) align 8, i8* byval %ptr) {
+; CHECK-LABEL: byval_large:
+; CHECK: ldrb w0, [sp, #24]
+  %res = load i8, i8* %ptr
+  ret i8 %res
+}
+
+define void @caller_large([3 x i64]* %p0, i8* %p1) {
+; CHECK-LABEL: caller_large:
+; CHECK: ldr [[P0HI:x[0-9]+]], [x0, #16]
+; CHECK: ldr [[P0LO:q[0-9]+]], [x0]
+; CHECK: str [[P0HI]], [sp, #16]
+; CHECK: str [[P0LO]], [sp]
+; CHECK: bl byval_large
+  call i8 @byval_large([3 x i64]* byval([3 x i64]) align 8 %p0, i8* byval %p1)
+  ret void
+}
diff --git a/llvm/test/Linker/Inputs/byval-types-1.ll b/llvm/test/Linker/Inputs/byval-types-1.ll
new file mode 100644
index 0000000000000..3da8b91f619e3
--- /dev/null
+++ b/llvm/test/Linker/Inputs/byval-types-1.ll
@@ -0,0 +1,8 @@
+%struct = type {i32, i8}
+
+declare void @baz(%struct* byval(%struct))
+
+define void @foo(%struct* byval(%struct) %a) {
+  call void @baz(%struct* byval(%struct) %a)
+  ret void
+}
diff --git a/llvm/test/Linker/byval-types.ll b/llvm/test/Linker/byval-types.ll
new file mode 100644
index 0000000000000..d6bd4aa0c7b9b
--- /dev/null
+++ b/llvm/test/Linker/byval-types.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-link %s %p/Inputs/byval-types-1.ll -S | FileCheck %s
+
+%struct = type {i32, i8}
+
+declare void @foo(%struct* byval(%struct) %a)
+
+define void @bar() {
+  %ptr = alloca %struct
+; CHECK: call void @foo(%struct* byval(%struct) %ptr)
+  call void @foo(%struct* byval(%struct) %ptr)
+  ret void
+}
+
+; CHECK: define void @foo(%struct* byval(%struct) %a)
+; CHECK-NEXT:   call void @baz(%struct* byval(%struct) %a)
+
+; CHECK: declare void @baz(%struct* byval(%struct))
diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll
index 8aafe7943f4ee..be495f1bcd379 100644
--- a/llvm/test/Transforms/Inline/byval-tail-call.ll
+++ b/llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -56,7 +56,7 @@ define void @foobar(i32* %x) {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   tail call void @bar2(i32* byval %x)
   ret void
@@ -67,7 +67,7 @@ define void @barfoo() {
 ; CHECK: %[[POS:.*]] = alloca i32
 ; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
-; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]]
+; CHECK: tail call void @ext2(i32* nonnull byval %[[POS]]
 ; CHECK: ret void
   %x = alloca i32
   tail call void @bar2(i32* byval %x)
diff --git a/llvm/unittests/IR/AttributesTest.cpp b/llvm/unittests/IR/AttributesTest.cpp
index e0be2343a1445..06da35aca5735 100644
--- a/llvm/unittests/IR/AttributesTest.cpp
+++ b/llvm/unittests/IR/AttributesTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
@@ -40,6 +41,10 @@ TEST(Attributes, Ordering) {
   EXPECT_TRUE(Align4 < Deref5);
   EXPECT_TRUE(Align5 < Deref4);
 
+  Attribute ByVal = Attribute::get(C, Attribute::ByVal, Type::getInt32Ty(C));
+  EXPECT_FALSE(ByVal < Attribute::get(C, Attribute::ZExt));
+  EXPECT_TRUE(ByVal < Align4);
+
   AttributeList ASs[] = {AttributeList::get(C, 2, Attribute::ZExt),
                          AttributeList::get(C, 1, Attribute::SExt)};
 
@@ -166,4 +171,19 @@ TEST(Attributes, OverflowGet) {
   EXPECT_EQ(2U, AL.getNumAttrSets());
 }
 
+TEST(Attributes, StringRepresentation) {
+  LLVMContext C;
+  StructType *Ty = StructType::create(Type::getInt32Ty(C), "mystruct");
+
+  // Insufficiently careful printing can result in byval(%mystruct = { i32 })
+  Attribute A = Attribute::getWithByValType(C, Ty);
+  EXPECT_EQ(A.getAsString(), "byval(%mystruct)");
+
+  A = Attribute::getWithByValType(C, nullptr);
+  EXPECT_EQ(A.getAsString(), "byval");
+
+  A = Attribute::getWithByValType(C, Type::getInt32Ty(C));
+  EXPECT_EQ(A.getAsString(), "byval(i32)");
+}
+
 } // end anonymous namespace

From fcb00d4aec7de1f51292b65d660001f70b95f7b3 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Thu, 30 May 2019 18:49:19 +0000
Subject: [PATCH 0651/1176] Reapply: LLVM IR: update Clang tests for byval
 being a typed attribute.

Since byval is now a typed attribute it gets sorted slightly differently by
LLVM when the order of attributes is being canonicalized. This updates the few
Clang tests that depend on the old order.

Clang patch is unchanged.

llvm-svn: 362129
---
 clang/test/CodeGen/aapcs-align.cpp               |  4 ++--
 .../test/CodeGenCXX/builtin-source-location.cpp  |  4 ++--
 clang/test/CodeGenCXX/wasm-args-returns.cpp      |  4 ++--
 .../CodeGenCXX/x86_64-arguments-nacl-x32.cpp     |  2 +-
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl    | 16 ++++++++--------
 .../kernels-have-spir-cc-by-default.cl           |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index 40fba7823524e..bcc4604d7422d 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -95,8 +95,8 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define void @g4
-// CHECK: call void @f4(i32 1, %struct.SF16* byval nonnull align 8
-// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* byval nonnull align 8
+// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval align 8
 // CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
 // CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
 
diff --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp
index 6a8387093dfec..f8bfd7d940b91 100644
--- a/clang/test/CodeGenCXX/builtin-source-location.cpp
+++ b/clang/test/CodeGenCXX/builtin-source-location.cpp
@@ -104,7 +104,7 @@ struct TestInit {
 //
 // CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP_ONE:[^,]*]],
 // CHECK-CTOR-GLOBAL-SAME: i32 3400, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{[^%]*}}%[[TMP_ONE]])
+// CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{.*}}%[[TMP_ONE]])
 #line 3400 "GlobalInitVal.cpp"
 TestInit GlobalInitVal;
 
@@ -119,7 +119,7 @@ extern "C" void test_init_function() {
 //
 // CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
 // CHECK-CTOR-LOCAL-SAME: i32 3500, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
-// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{[^%]*}}%[[TMP]])
+// CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{.*}}%[[TMP]])
 #line 3500 "LocalInitVal.cpp"
   TestInit init_local;
   sink(init_local);
diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp
index a7c4e1e282a12..506540e86fd9c 100644
--- a/clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,12 +30,12 @@ struct two_fields {
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* byval nocapture readonly align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
 // CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
-// CHECK: call void @_Z3use10two_fields(%struct.two_fields* byval nonnull align 8 %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval align 8 %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
diff --git a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
index 3392b32bd2b6f..89d6dae5d30fc 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -20,7 +20,7 @@ void f_struct_with_mdp(struct_with_mdp a) { (void)a; }
 struct struct_with_mdp_too_much {
   char *a; char *b; char *c; char *d; test_struct_mdp e;
 };
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval {{.*}} %a)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval
 void f_struct_with_mdp_too_much(struct_with_mdp_too_much a) {
   (void)a;
 }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index aec00e76014ec..8b03fb00e3fde 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@ flexible_array func_flexible_array_ret()
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval  align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 5bb52e9beb514..4392ef90677c5 100644
--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@ typedef struct test_struct {
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* byval nocapture
+// CHECK: struct.int_single* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@ kernel void test_single(int_single input, global int* output) {
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* byval nocapture
+// CHECK: struct.int_pair* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@ kernel void test_pair(int_pair input, global int* output) {
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* byval nocapture
+// CHECK: struct.test_struct* nocapture {{.*}} byval
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;

From ef95679741e01235c32547e5ae7f8259f703847a Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:10 +0000
Subject: [PATCH 0652/1176] [DAGCombine] Use FoldConstantArithmetic() to
 perform ((c1-A)+c2) -> (c1+c2)-A fold

Summary: No tests change, and i'm not sure how to test this, but it's better safe than sorry.

Reviewers: spatel, RKSimon, craig.topper, t.p.northover

Reviewed By: craig.topper

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62661

llvm-svn: 362130
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d2045d764a075..49cd7596f5af4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2164,10 +2164,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     // fold ((c1-A)+c2) -> (c1+c2)-A
     if (N0.getOpcode() == ISD::SUB &&
         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
-      // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
-      return DAG.getNode(ISD::SUB, DL, VT,
-                         DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
-                         N0.getOperand(1));
+      SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+                                               N0.getOperand(0).getNode());
+      assert(Add && "Constant folding failed");
+      return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
     }
 
     // add (sext i1 X), 1 -> zext (not i1 X)

From cc9a9cf2378493ac3ba1f71a754561abbe0a766b Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:19 +0000
Subject: [PATCH 0653/1176] [DAGCombine] ((A-c1)+c2) -> (A+(c2-c1)) 
 constant-fold

Summary:
This was the root cause of the endless combine loop in D62257

https://rise4fun.com/Alive/d3W

Reviewers: RKSimon, spatel, craig.topper, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, javed.absar, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62662

llvm-svn: 362131
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 ++++++++
 .../AArch64/addsub-constant-folding.ll        | 22 +++++--------------
 .../CodeGen/X86/addsub-constant-folding.ll    | 12 ++--------
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49cd7596f5af4..9a06077587d44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2161,6 +2161,15 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     return N0;
 
   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+    // fold ((A-c1)+c2) -> (A+(c2-c1))
+    if (N0.getOpcode() == ISD::SUB &&
+        isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+      SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+                                               N0.getOperand(1).getNode());
+      assert(Sub && "Constant folding failed");
+      return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+    }
+
     // fold ((c1-A)+c2) -> (c1+c2)-A
     if (N0.getOpcode() == ISD::SUB &&
         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index 38c3dfd7546ba..a1bf779e1dbfd 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -157,9 +157,7 @@ define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
 ; CHECK-LABEL: sub_const_add_const:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    mvni v1.4s, #5
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -175,12 +173,12 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    bl use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    mvni v0.4s, #5
 ; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
@@ -195,10 +193,7 @@ define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    adrp x8, .LCPI11_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_1]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
@@ -412,9 +407,7 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
 ; CHECK-LABEL: const_sub_const_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    mvni v1.4s, #5
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -450,10 +443,7 @@ define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI26_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI26_0]
-; CHECK-NEXT:    adrp x8, .LCPI26_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI26_1]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
   %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index dc9ee4ca6367e..c3cbe5ed4b43e 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -222,13 +222,11 @@ define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
 ; X86-LABEL: sub_const_add_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_add_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -241,8 +239,8 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    calll use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
@@ -254,8 +252,8 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    callq use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
@@ -271,13 +269,11 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: sub_const_add_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_add_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -588,13 +584,11 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
 ; X86-LABEL: const_sub_const_sub:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_const_sub:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -642,13 +636,11 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: const_sub_const_sub_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_const_sub_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg

From 9ff3159b4aede9a53d65e67ada4a7e2ba1aaa301 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:26 +0000
Subject: [PATCH 0654/1176] [DAGCombine] Use FoldConstantArithmetic() to
 perform C2-(A+C1) -> (C2-C1)-A fold

Summary:
No tests change, and i'm not sure how to test this, but it's better safe than sorry.

Reviewers: spatel, RKSimon, craig.topper, t.p.northover

Reviewed By: craig.topper

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62663

llvm-svn: 362132
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9a06077587d44..c04dbd276cfe7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2868,7 +2868,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue N11 = N1.getOperand(1);
     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
-      SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+      SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+                                                N11.getNode());
+      assert(NewC && "Constant folding failed");
       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
     }
   }

From 0a3dbbcdfb5482987125fdb8267820351d13a545 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:32 +0000
Subject: [PATCH 0655/1176] [DAGCombine] (A+C1)-C2 -> A+(C1-C2) constant-fold

Summary:
Direct sibling of D62662, the root cause of the endless combine loop in D62257

https://rise4fun.com/Alive/d3W

Reviewers: RKSimon, craig.topper, spatel, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, javed.absar, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62664

llvm-svn: 362133
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp    | 10 ++++++++++
 .../CodeGen/AArch64/addsub-constant-folding.ll   | 13 ++++---------
 llvm/test/CodeGen/X86/addsub-constant-folding.ll | 16 ++++++++--------
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c04dbd276cfe7..da205841a2208 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2863,6 +2863,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
 
+  // fold (A+C1)-C2 -> A+(C1-C2)
+  if (N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+  }
+
   // fold C2-(A+C1) -> (C2-C1)-A
   if (N1.getOpcode() == ISD::ADD) {
     SDValue N11 = N1.getOperand(1);
diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index a1bf779e1dbfd..8afa4ded1e04e 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -56,10 +56,8 @@ define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: add_const_sub_const:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    movi v1.4s, #6
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #2
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
@@ -74,13 +72,13 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    bl use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    movi v0.4s, #2
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    movi v0.4s, #6
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -94,10 +92,7 @@ define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT:    adrp x8, .LCPI5_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI5_1]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index c3cbe5ed4b43e..0053bc2b611d3 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -78,13 +78,11 @@ define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
 ; X86-LABEL: add_const_sub_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
@@ -96,11 +94,13 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqa %xmm0, %xmm1
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X86-NEXT:    paddd %xmm1, %xmm0
 ; X86-NEXT:    calll use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -109,11 +109,13 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa %xmm0, %xmm1
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    callq use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
@@ -127,13 +129,11 @@ define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: add_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>

From 691b5e2eccc5c21f020793c39c3e011348df19f7 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:42 +0000
Subject: [PATCH 0656/1176] [DAGCombine] (A-C1)-C2 -> A-(C1+C2) constant-fold

Summary: https://rise4fun.com/Alive/Mb1M

Reviewers: RKSimon, craig.topper, spatel, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, javed.absar, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62689

llvm-svn: 362134
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp        | 10 ++++++++++
 llvm/test/CodeGen/AArch64/addsub-constant-folding.ll | 11 +++--------
 llvm/test/CodeGen/X86/addsub-constant-folding.ll     |  8 ++------
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index da205841a2208..8bbd5cd201239 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2885,6 +2885,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     }
   }
 
+  // fold (A-C1)-C2 -> A-(C1+C2)
+  if (N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+  }
+
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index 8afa4ded1e04e..de87aa4348ab6 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -200,9 +200,7 @@ define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: sub_const_sub_const:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    movi v1.4s, #10
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -218,12 +216,12 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    bl use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    movi v0.4s, #2
+; CHECK-NEXT:    movi v0.4s, #10
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
@@ -238,10 +236,7 @@ define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT:    adrp x8, .LCPI14_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI14_1]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index 0053bc2b611d3..e24f35382fd0c 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -287,13 +287,11 @@ define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
 ; X86-LABEL: sub_const_sub_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_sub_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
@@ -305,8 +303,8 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    calll use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
@@ -318,8 +316,8 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    callq use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
@@ -336,13 +334,11 @@ define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: sub_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
   %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>

From 7eb8b5b5ddb4517dfb168e4700779a4f2be8960a Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 19:27:51 +0000
Subject: [PATCH 0657/1176] [DAGCombine] ((c1-A)-c2) -> ((c1-c2)-A) 
 constant-fold

Summary: https://rise4fun.com/Alive/B0A

Reviewers: t.p.northover, RKSimon, spatel, craig.topper

Reviewed By: RKSimon

Subscribers: javed.absar, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62691

llvm-svn: 362135
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++
 .../AArch64/addsub-constant-folding.ll        | 13 ++-----
 .../CodeGen/X86/addsub-constant-folding.ll    | 37 +++++++++----------
 3 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8bbd5cd201239..b6164ac4ded2a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2895,6 +2895,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
   }
 
+  // fold (c1-A)-c2 -> (c1-c2)-A
+  if (N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+    SDValue NewC = DAG.FoldConstantArithmetic(
+        ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+    assert(NewC && "Constant folding failed");
+    return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+  }
+
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index de87aa4348ab6..47a236e373be2 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -344,10 +344,8 @@ define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: const_sub_sub_const:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.4s, #8
+; CHECK-NEXT:    movi v1.4s, #6
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    movi v1.4s, #2
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
@@ -362,13 +360,13 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    movi v1.4s, #8
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    bl use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT:    movi v0.4s, #2
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    movi v0.4s, #6
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -382,10 +380,7 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT:    adrp x8, .LCPI23_1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI23_1]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
   %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index e24f35382fd0c..3c48494ae39de 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -500,17 +500,15 @@ define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
 define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
 ; X86-LABEL: const_sub_sub_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
 ; X86-NEXT:    psubd %xmm0, %xmm1
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_sub_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -523,13 +521,14 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
-; X86-NEXT:    psubd %xmm0, %xmm1
-; X86-NEXT:    movdqu %xmm1, (%esp) # 16-byte Spill
-; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    movdqa %xmm0, %xmm1
+; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X86-NEXT:    psubd %xmm1, %xmm0
 ; X86-NEXT:    calll use
-; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
+; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
+; X86-NEXT:    psubd %xmm1, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -538,13 +537,13 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = [8,8,8,8]
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
-; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
+; X64-NEXT:    psubd %xmm1, %xmm0
 ; X64-NEXT:    callq use
-; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
+; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
@@ -557,17 +556,15 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 ; X86-LABEL: const_sub_sub_const_nonsplat:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa {{.*#+}} xmm1 = <21,u,8,8>
+; X86-NEXT:    movdqa {{.*#+}} xmm1 = <19,u,u,6>
 ; X86-NEXT:    psubd %xmm0, %xmm1
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_sub_const_nonsplat:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa {{.*#+}} xmm1 = <21,u,8,8>
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = <19,u,u,6>
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg

From e0a4da8c0a2a0628fdae427c6eb2949b3bcbdfa0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 30 May 2019 19:33:18 +0000
Subject: [PATCH 0658/1176] AMDGPU/GlobalISel: Add wave scratch offset argument

Avoids crashing in PEI in a future change.

llvm-svn: 362136
---
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 42 +++++++++++++++++++
 ...irtranslator-amdgpu_kernel-system-sgprs.ll | 10 +++++
 2 files changed, 52 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 249498e35ef3f..ff34759227ba9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -156,6 +156,43 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
 }
 
+static unsigned findFirstFreeSGPR(CCState &CCInfo) {
+  unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+  for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
+    if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
+      return AMDGPU::SGPR0 + Reg;
+    }
+  }
+  llvm_unreachable("Cannot allocate sgpr");
+}
+
+static void allocateSystemSGPRs(CCState &CCInfo,
+                                MachineFunction &MF,
+                                SIMachineFunctionInfo &Info,
+                                CallingConv::ID CallConv,
+                                bool IsShader) {
+  if (Info.hasPrivateSegmentWaveByteOffset()) {
+    // Scratch wave offset passed in system SGPR.
+    unsigned PrivateSegmentWaveByteOffsetReg;
+
+    if (IsShader) {
+      PrivateSegmentWaveByteOffsetReg =
+        Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
+
+      // This is true if the scratch wave byte offset doesn't have a fixed
+      // location.
+      if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
+        PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+        Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+      }
+    } else
+      PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
+
+    MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+    CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+  }
+}
+
 bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
                                               const Function &F,
                                               ArrayRef<unsigned> VRegs) const {
@@ -171,6 +208,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
   const DataLayout &DL = F.getParent()->getDataLayout();
 
+  bool IsShader = AMDGPU::isShader(F.getCallingConv());
+
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
 
@@ -242,6 +281,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
       ++i;
     }
 
+    allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
     return true;
   }
 
@@ -313,6 +353,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
     }
+
+    allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
     return true;
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
new file mode 100644
index 0000000000000..00b126463f720
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -global-isel %s -o - | FileCheck -check-prefix=HSA %s
+
+; HSA-LABEL: name: default_kernel
+; HSA: liveins:
+; HSA-NEXT: - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3', virtual-reg: '%0' }
+; HSA-NEXT: - { reg: '$sgpr4', virtual-reg: '%1' }
+; HSA-NEXT: frameInfo:
+define amdgpu_kernel void @default_kernel() {
+  ret void
+}

From 04a38b924e7e37ad166b3f73f229411e3a2dfa25 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Thu, 30 May 2019 19:42:25 +0000
Subject: [PATCH 0659/1176] [NFC][InstCombine] Add unary FNeg tests to fmul.ll

llvm-svn: 362137
---
 llvm/test/Transforms/InstCombine/fmul.ll | 205 +++++++++++++++++++++++
 1 file changed, 205 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 16d138539f91f..fcf0941c867c8 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -12,6 +12,16 @@ define float @neg_constant(float %x) {
   ret float %mul
 }
 
+define float @unary_neg_constant(float %x) {
+; CHECK-LABEL: @unary_neg_constant(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf float [[X:%.*]], -2.000000e+01
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fneg float %x
+  %mul = fmul ninf float %sub, 2.0e+1
+  ret float %mul
+}
+
 define <2 x float> @neg_constant_vec(<2 x float> %x) {
 ; CHECK-LABEL: @neg_constant_vec(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00>
@@ -22,6 +32,16 @@ define <2 x float> @neg_constant_vec(<2 x float> %x) {
   ret <2 x float> %mul
 }
 
+define <2 x float> @unary_neg_constant_vec(<2 x float> %x) {
+; CHECK-LABEL: @unary_neg_constant_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fneg <2 x float> %x
+  %mul = fmul ninf <2 x float> %sub, <float 2.0, float 3.0>
+  ret <2 x float> %mul
+}
+
 define <2 x float> @neg_constant_vec_undef(<2 x float> %x) {
 ; CHECK-LABEL: @neg_constant_vec_undef(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00>
@@ -43,6 +63,16 @@ define float @neg_nsz_constant(float %x) {
   ret float %mul
 }
 
+define float @unary_neg_nsz_constant(float %x) {
+; CHECK-LABEL: @unary_neg_nsz_constant(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul nnan float [[X:%.*]], -2.000000e+01
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub = fneg nsz float %x
+  %mul = fmul nnan float %sub, 2.0e+1
+  ret float %mul
+}
+
 ; (-0.0 - X) * (-0.0 - Y) => X * Y
 define float @neg_neg(float %x, float %y) {
 ; CHECK-LABEL: @neg_neg(
@@ -55,6 +85,39 @@ define float @neg_neg(float %x, float %y) {
   ret float %mul
 }
 
+define float @unary_neg_unary_neg(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_unary_neg(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fneg float %x
+  %sub2 = fneg float %y
+  %mul = fmul arcp float %sub1, %sub2
+  ret float %mul
+}
+
+define float @unary_neg_neg(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fneg float %x
+  %sub2 = fsub float -0.0, %y
+  %mul = fmul arcp float %sub1, %sub2
+  ret float %mul
+}
+
+define float @neg_unary_neg(float %x, float %y) {
+; CHECK-LABEL: @neg_unary_neg(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %sub1 = fsub float -0.0, %x
+  %sub2 = fneg float %y
+  %mul = fmul arcp float %sub1, %sub2
+  ret float %mul
+}
+
 define <2 x float> @neg_neg_vec(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_neg_vec(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -66,6 +129,39 @@ define <2 x float> @neg_neg_vec(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %mul
 }
 
+define <2 x float> @unary_neg_unary_neg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_unary_neg_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub1 = fneg <2 x float> %x
+  %sub2 = fneg <2 x float> %y
+  %mul = fmul arcp <2 x float> %sub1, %sub2
+  ret <2 x float> %mul
+}
+
+define <2 x float> @unary_neg_neg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_neg_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub1 = fneg <2 x float> %x
+  %sub2 = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %mul = fmul arcp <2 x float> %sub1, %sub2
+  ret <2 x float> %mul
+}
+
+define <2 x float> @neg_unary_neg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_unary_neg_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub1 = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %sub2 = fneg <2 x float> %y
+  %mul = fmul arcp <2 x float> %sub1, %sub2
+  ret <2 x float> %mul
+}
+
 define <2 x float> @neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_neg_vec_undef(
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -77,6 +173,28 @@ define <2 x float> @neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %mul
 }
 
+define <2 x float> @unary_neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_neg_vec_undef(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %neg = fneg <2 x float> %x
+  %sub = fsub <2 x float> <float undef, float -0.0>, %y
+  %mul = fmul arcp <2 x float> %neg, %sub
+  ret <2 x float> %mul
+}
+
+define <2 x float> @neg_unary_neg_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @neg_unary_neg_vec_undef(
+; CHECK-NEXT:    [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fsub <2 x float> <float -0.0, float undef>, %x
+  %neg = fneg <2 x float> %y
+  %mul = fmul arcp <2 x float> %sub, %neg
+  ret <2 x float> %mul
+}
+
 ; (0.0 - X) * (0.0 - Y) => X * Y
 define float @neg_neg_nsz(float %x, float %y) {
 ; CHECK-LABEL: @neg_neg_nsz(
@@ -108,6 +226,57 @@ define float @neg_neg_multi_use(float %x, float %y) {
   ret float %mul
 }
 
+define float @unary_neg_unary_neg_multi_use(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_unary_neg_multi_use(
+; CHECK-NEXT:    [[NX:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NY:%.*]] = fneg float [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[X]], [[Y]]
+; CHECK-NEXT:    call void @use_f32(float [[NX]])
+; CHECK-NEXT:    call void @use_f32(float [[NY]])
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %nx = fneg float %x
+  %ny = fneg float %y
+  %mul = fmul afn float %nx, %ny
+  call void @use_f32(float %nx)
+  call void @use_f32(float %ny)
+  ret float %mul
+}
+
+define float @unary_neg_neg_multi_use(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_neg_multi_use(
+; CHECK-NEXT:    [[NX:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[NY:%.*]] = fsub float -0.000000e+00, [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[X]], [[Y]]
+; CHECK-NEXT:    call void @use_f32(float [[NX]])
+; CHECK-NEXT:    call void @use_f32(float [[NY]])
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %nx = fneg float %x
+  %ny = fsub float -0.0, %y
+  %mul = fmul afn float %nx, %ny
+  call void @use_f32(float %nx)
+  call void @use_f32(float %ny)
+  ret float %mul
+}
+
+define float @neg_unary_neg_multi_use(float %x, float %y) {
+; CHECK-LABEL: @neg_unary_neg_multi_use(
+; CHECK-NEXT:    [[NX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    [[NY:%.*]] = fneg float [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul afn float [[X]], [[Y]]
+; CHECK-NEXT:    call void @use_f32(float [[NX]])
+; CHECK-NEXT:    call void @use_f32(float [[NY]])
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %nx = fsub float -0.0, %x
+  %ny = fneg float %y
+  %mul = fmul afn float %nx, %ny
+  call void @use_f32(float %nx)
+  call void @use_f32(float %ny)
+  ret float %mul
+}
+
 ; (-0.0 - X) * Y => -0.0 - (X * Y)
 define float @neg_sink(float %x, float %y) {
 ; CHECK-LABEL: @neg_sink(
@@ -120,6 +289,17 @@ define float @neg_sink(float %x, float %y) {
   ret float %mul
 }
 
+define float @unary_neg_sink(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_sink(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[MUL]]
+;
+  %neg = fneg float %x
+  %mul = fmul float %neg, %y
+  ret float %mul
+}
+
 define <2 x float> @neg_sink_vec(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_sink_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -131,6 +311,18 @@ define <2 x float> @neg_sink_vec(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %mul
 }
 
+; FIXME: Should generate a unary FNeg.
+define <2 x float> @unary_neg_sink_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_neg_sink_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[MUL]]
+;
+  %sub = fneg <2 x float> %x
+  %mul = fmul <2 x float> %sub, %y
+  ret <2 x float> %mul
+}
+
 define <2 x float> @neg_sink_vec_undef(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @neg_sink_vec_undef(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -169,6 +361,19 @@ define float @neg_sink_multi_use(float %x, float %y) {
   ret float %mul2
 }
 
+define float @unary_neg_sink_multi_use(float %x, float %y) {
+; CHECK-LABEL: @unary_neg_sink_multi_use(
+; CHECK-NEXT:    [[SUB1:%.*]] = fneg float [[X:%.*]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB1]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL2:%.*]] = fmul float [[MUL]], [[SUB1]]
+; CHECK-NEXT:    ret float [[MUL2]]
+;
+  %sub1 = fneg float %x
+  %mul = fmul float %sub1, %y
+  %mul2 = fmul float %mul, %sub1
+  ret float %mul2
+}
+
 ; Don't crash when attempting to cast a constant FMul to an instruction.
 define void @test8(i32* %inout) {
 ; CHECK-LABEL: @test8(

From 5f0f4e3ae03afa84034c01ddaac559e0b5cb6532 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Thu, 30 May 2019 19:45:32 +0000
Subject: [PATCH 0660/1176] [GWP-ASan] Mutex implementation [2].

Summary:
See D60593 for further information.
This patch pulls out the mutex implementation and the required definitions file.

We implement our own mutex for GWP-ASan currently, because:

1. We must be compatible with the sum of the most restrictive elements of the supporting allocator's build system. Current targets for GWP-ASan include Scudo (on Linux and Fuchsia), and bionic (on Android).
2. Scudo specifies `-nostdlib++ -nonodefaultlibs`, meaning we can't use `std::mutex` or `mtx_t`.
3. We can't use `sanitizer_common`'s mutex, as the supporting allocators cannot afford the extra maintenance (Android, Fuchsia) and code size (Fuchsia) overheads that this would incur.

In future, we would like to implement a shared base mutex for GWP-ASan, Scudo and sanitizer_common. This will likely happen when both GWP-ASan and Scudo standalone are not in the development phase, at which point they will have stable requirements.

Reviewers: vlad.tsyrklevich, morehouse, jfb

Reviewed By: morehouse

Subscribers: dexonsmith, srhines, cfe-commits, kubamracek, mgorny, cryptoad, jfb, #sanitizers, llvm-commits, vitalybuka, eugenis

Tags: #sanitizers, #llvm, #clang

Differential Revision: https://reviews.llvm.org/D61923

llvm-svn: 362138
---
 clang/runtime/CMakeLists.txt                  |  2 +-
 compiler-rt/lib/gwp_asan/CMakeLists.txt       |  8 +-
 compiler-rt/lib/gwp_asan/mutex.h              | 50 +++++++++++
 .../platform_specific/mutex_posix.cpp         | 30 +++++++
 compiler-rt/lib/gwp_asan/tests/CMakeLists.txt | 49 ++++++++++
 compiler-rt/lib/gwp_asan/tests/driver.cpp     | 14 +++
 compiler-rt/lib/gwp_asan/tests/mutex_test.cpp | 89 +++++++++++++++++++
 compiler-rt/test/gwp_asan/CMakeLists.txt      | 45 ++++++++++
 compiler-rt/test/gwp_asan/dummy_test.cc       |  4 +
 compiler-rt/test/gwp_asan/lit.cfg             | 31 +++++++
 compiler-rt/test/gwp_asan/lit.site.cfg.in     | 11 +++
 .../test/gwp_asan/unit/lit.site.cfg.in        |  9 ++
 12 files changed, 340 insertions(+), 2 deletions(-)
 create mode 100644 compiler-rt/lib/gwp_asan/mutex.h
 create mode 100644 compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
 create mode 100644 compiler-rt/lib/gwp_asan/tests/driver.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
 create mode 100644 compiler-rt/test/gwp_asan/dummy_test.cc
 create mode 100644 compiler-rt/test/gwp_asan/lit.cfg
 create mode 100644 compiler-rt/test/gwp_asan/lit.site.cfg.in
 create mode 100644 compiler-rt/test/gwp_asan/unit/lit.site.cfg.in

diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt
index e1e52b0e45229..e20cc26f60af8 100644
--- a/clang/runtime/CMakeLists.txt
+++ b/clang/runtime/CMakeLists.txt
@@ -132,7 +132,7 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
     # Add top-level targets for various compiler-rt test suites.
     set(COMPILER_RT_TEST_SUITES check-fuzzer check-asan check-hwasan check-asan-dynamic check-dfsan
       check-lsan check-msan check-sanitizer check-tsan check-ubsan check-ubsan-minimal
-      check-profile check-cfi check-cfi-and-supported check-safestack)
+      check-profile check-cfi check-cfi-and-supported check-safestack check-gwp_asan)
     foreach(test_suite ${COMPILER_RT_TEST_SUITES})
       get_ext_project_build_command(run_test_suite ${test_suite})
       add_custom_target(${test_suite}
diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt
index 1f6d973b3a6cd..6c83d86c6c899 100644
--- a/compiler-rt/lib/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt
@@ -3,17 +3,19 @@ add_compiler_rt_component(gwp_asan)
 include_directories(..)
 
 set(GWP_ASAN_SOURCES
+  platform_specific/mutex_posix.cpp
   random.cpp
 )
 
 set(GWP_ASAN_HEADERS
+  mutex.h
   random.h
 )
 
 # Ensure that GWP-ASan meets the delegated requirements of some supporting
 # allocators. Some supporting allocators (e.g. scudo standalone) cannot use any
 # parts of the C++ standard library.
-set(GWP_ASAN_CFLAGS -fno-rtti -fno-exceptions -nostdinc++)
+set(GWP_ASAN_CFLAGS -fno-rtti -fno-exceptions -nostdinc++ -pthread)
 
 # Remove -stdlib= which is unused when passing -nostdinc++.
 string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
@@ -37,3 +39,7 @@ if (COMPILER_RT_HAS_GWP_ASAN)
       ADDITIONAL_HEADERS ${GWP_ASAN_HEADERS}
       CFLAGS ${GWP_ASAN_CFLAGS})
 endif()
+
+if(COMPILER_RT_INCLUDE_TESTS)
+  add_subdirectory(tests)
+endif()
diff --git a/compiler-rt/lib/gwp_asan/mutex.h b/compiler-rt/lib/gwp_asan/mutex.h
new file mode 100644
index 0000000000000..c29df4cde1640
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/mutex.h
@@ -0,0 +1,50 @@
+//===-- mutex.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_MUTEX_H_
+#define GWP_ASAN_MUTEX_H_
+
+#ifdef __unix__
+#include <pthread.h>
+#else
+#error "GWP-ASan is not supported on this platform."
+#endif
+
+namespace gwp_asan {
+class Mutex {
+public:
+  constexpr Mutex() = default;
+  ~Mutex() = default;
+  Mutex(const Mutex &) = delete;
+  Mutex &operator=(const Mutex &) = delete;
+  // Lock the mutex.
+  void lock();
+  // Nonblocking trylock of the mutex. Returns true if the lock was acquired.
+  bool tryLock();
+  // Unlock the mutex.
+  void unlock();
+
+private:
+#ifdef __unix__
+  pthread_mutex_t Mu = PTHREAD_MUTEX_INITIALIZER;
+#endif // defined(__unix__)
+};
+
+class ScopedLock {
+public:
+  explicit ScopedLock(Mutex &Mx) : Mu(Mx) { Mu.lock(); }
+  ~ScopedLock() { Mu.unlock(); }
+  ScopedLock(const ScopedLock &) = delete;
+  ScopedLock &operator=(const ScopedLock &) = delete;
+
+private:
+  Mutex &Mu;
+};
+} // namespace gwp_asan
+
+#endif // GWP_ASAN_MUTEX_H_
diff --git a/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp b/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
new file mode 100644
index 0000000000000..e15bca8825711
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
@@ -0,0 +1,30 @@
+//===-- mutex_posix.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mutex.h"
+
+#include <assert.h>
+#include <pthread.h>
+
+namespace gwp_asan {
+void Mutex::lock() {
+  int Status = pthread_mutex_lock(&Mu);
+  assert(Status == 0);
+  // Remove warning for non-debug builds.
+  (void)Status;
+}
+
+bool Mutex::tryLock() { return pthread_mutex_trylock(&Mu) == 0; }
+
+void Mutex::unlock() {
+  int Status = pthread_mutex_unlock(&Mu);
+  assert(Status == 0);
+  // Remove warning for non-debug builds.
+  (void)Status;
+}
+} // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
new file mode 100644
index 0000000000000..6a59be5bca615
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
@@ -0,0 +1,49 @@
+include(CompilerRTCompile)
+
+set(GWP_ASAN_UNITTEST_CFLAGS
+  ${COMPILER_RT_UNITTEST_CFLAGS}
+  ${COMPILER_RT_GTEST_CFLAGS}
+  -I${COMPILER_RT_SOURCE_DIR}/lib/
+  -O2)
+
+file(GLOB GWP_ASAN_HEADERS ../*.h)
+file(GLOB GWP_ASAN_UNITTESTS *.cpp)
+set(GWP_ASAN_UNIT_TEST_HEADERS
+  ${GWP_ASAN_HEADERS})
+
+add_custom_target(GwpAsanUnitTests)
+set_target_properties(GwpAsanUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
+
+set(GWP_ASAN_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS})
+list(APPEND GWP_ASAN_UNITTEST_LINK_FLAGS --driver-mode=g++)
+if(NOT WIN32)
+  list(APPEND GWP_ASAN_UNITTEST_LINK_FLAGS -lpthread)
+endif()
+
+if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST GWP_ASAN_SUPPORTED_ARCH)
+  # GWP-ASan unit tests are only run on the host machine.
+  set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH})
+
+  set(GWP_ASAN_TEST_RUNTIME RTGwpAsanTest.${arch})
+
+  set(GWP_ASAN_TEST_RUNTIME_OBJECTS
+    $<TARGET_OBJECTS:RTGwpAsan.${arch}>)
+
+  add_library(${GWP_ASAN_TEST_RUNTIME} STATIC
+    ${GWP_ASAN_TEST_RUNTIME_OBJECTS})
+
+  set_target_properties(${GWP_ASAN_TEST_RUNTIME} PROPERTIES
+    ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    FOLDER "Compiler-RT Runtime tests")
+
+  set(GwpAsanTestObjects)
+  generate_compiler_rt_tests(GwpAsanTestObjects
+    GwpAsanUnitTests "GwpAsan-${arch}-Test" ${arch}
+    SOURCES ${GWP_ASAN_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE}
+    RUNTIME ${GWP_ASAN_TEST_RUNTIME}
+    DEPS gtest ${GWP_ASAN_UNIT_TEST_HEADERS}
+    CFLAGS ${GWP_ASAN_UNITTEST_CFLAGS}
+    LINK_FLAGS ${GWP_ASAN_UNITTEST_LINK_FLAGS})
+  set_target_properties(GwpAsanUnitTests PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
diff --git a/compiler-rt/lib/gwp_asan/tests/driver.cpp b/compiler-rt/lib/gwp_asan/tests/driver.cpp
new file mode 100644
index 0000000000000..b402cec1126b3
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/driver.cpp
@@ -0,0 +1,14 @@
+//===-- driver.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+int main(int argc, char **argv) {
+  testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp b/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
new file mode 100644
index 0000000000000..36f7e1d2323d3
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
@@ -0,0 +1,89 @@
+//===-- mutex_test.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/mutex.h"
+#include "gtest/gtest.h"
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+using gwp_asan::Mutex;
+using gwp_asan::ScopedLock;
+
+TEST(GwpAsanMutexTest, LockUnlockTest) {
+  Mutex Mu;
+
+  ASSERT_TRUE(Mu.tryLock());
+  ASSERT_FALSE(Mu.tryLock());
+  Mu.unlock();
+
+  Mu.lock();
+  Mu.unlock();
+
+  // Ensure that the mutex actually unlocked.
+  ASSERT_TRUE(Mu.tryLock());
+  Mu.unlock();
+}
+
+TEST(GwpAsanMutexTest, ScopedLockUnlockTest) {
+  Mutex Mu;
+  { ScopedLock L(Mu); }
+  // Locking will fail here if the scoped lock failed to unlock.
+  EXPECT_TRUE(Mu.tryLock());
+  Mu.unlock();
+
+  {
+    ScopedLock L(Mu);
+    EXPECT_FALSE(Mu.tryLock()); // Check that the c'tor did lock.
+
+    // Manually unlock and check that this succeeds.
+    Mu.unlock();
+    EXPECT_TRUE(Mu.tryLock()); // Manually lock.
+  }
+  EXPECT_TRUE(Mu.tryLock()); // Assert that the scoped destructor did unlock.
+  Mu.unlock();
+}
+
+static void synchronousIncrementTask(std::atomic<bool> *StartingGun, Mutex *Mu,
+                                     unsigned *Counter,
+                                     unsigned NumIterations) {
+  while (!StartingGun) {
+    // Wait for starting gun.
+  }
+  for (unsigned i = 0; i < NumIterations; ++i) {
+    ScopedLock L(*Mu);
+    (*Counter)++;
+  }
+}
+
+static void runSynchronisedTest(unsigned NumThreads, unsigned CounterMax) {
+  std::vector<std::thread> Threads;
+
+  ASSERT_TRUE(CounterMax % NumThreads == 0);
+
+  std::atomic<bool> StartingGun{false};
+  Mutex Mu;
+  unsigned Counter = 0;
+
+  for (unsigned i = 0; i < NumThreads; ++i)
+    Threads.emplace_back(synchronousIncrementTask, &StartingGun, &Mu, &Counter,
+                         CounterMax / NumThreads);
+
+  StartingGun = true;
+  for (auto &T : Threads)
+    T.join();
+
+  EXPECT_EQ(CounterMax, Counter);
+}
+
+TEST(GwpAsanMutexTest, SynchronisedCounterTest) {
+  runSynchronisedTest(4, 100000);
+  runSynchronisedTest(1000, 1000000);
+}
diff --git a/compiler-rt/test/gwp_asan/CMakeLists.txt b/compiler-rt/test/gwp_asan/CMakeLists.txt
index e69de29bb2d1d..2782b9a0c1101 100644
--- a/compiler-rt/test/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/test/gwp_asan/CMakeLists.txt
@@ -0,0 +1,45 @@
+set(GWP_ASAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(GWP_ASAN_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+
+set(GWP_ASAN_TESTSUITES)
+
+set(GWP_ASAN_UNITTEST_DEPS)
+set(GWP_ASAN_TEST_DEPS
+  ${SANITIZER_COMMON_LIT_TEST_DEPS}
+  gwp_asan)
+
+if (COMPILER_RT_INCLUDE_TESTS)
+  list(APPEND GWP_ASAN_TEST_DEPS GwpAsanUnitTests)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/unit/lit.site.cfg)
+  add_lit_testsuite(check-gwp_asan-unit "Running GWP-ASan unit tests"
+    ${CMAKE_CURRENT_BINARY_DIR}/unit
+    DEPENDS ${GWP_ASAN_TEST_DEPS})
+  set_target_properties(check-gwp_asan-unit PROPERTIES FOLDER
+    "Compiler-RT Tests")
+    list(APPEND GWP_ASAN_TEST_DEPS check-gwp_asan-unit)
+endif()
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  )
+
+foreach(arch ${GWP_ASAN_SUPPORTED_ARCH})
+  set(GWP_ASAN_TEST_TARGET_ARCH ${arch})
+  string(TOLOWER "-${arch}" GWP_ASAN_TEST_CONFIG_SUFFIX)
+  get_test_cc_for_arch(${arch} GWP_ASAN_TEST_TARGET_CC GWP_ASAN_TEST_TARGET_CFLAGS)
+  string(TOUPPER ${arch} ARCH_UPPER_CASE)
+  set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config)
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg)
+  list(APPEND GWP_ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME})
+endforeach()
+
+add_lit_testsuite(check-gwp_asan "Running the GWP-ASan tests"
+  ${GWP_ASAN_TESTSUITES}
+  DEPENDS ${GWP_ASAN_TEST_DEPS})
+set_target_properties(check-gwp_asan PROPERTIES FOLDER "Compiler-RT Misc")
diff --git a/compiler-rt/test/gwp_asan/dummy_test.cc b/compiler-rt/test/gwp_asan/dummy_test.cc
new file mode 100644
index 0000000000000..93e522ab9f0a9
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/dummy_test.cc
@@ -0,0 +1,4 @@
+// Exists to simply stop warnings about lit not discovering any tests here.
+// RUN: %clang %s
+
+int main() { return 0; }
diff --git a/compiler-rt/test/gwp_asan/lit.cfg b/compiler-rt/test/gwp_asan/lit.cfg
new file mode 100644
index 0000000000000..e8ef2925e79d0
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/lit.cfg
@@ -0,0 +1,31 @@
+# -*- Python -*-
+
+import os
+
+# Setup config name.
+config.name = 'GWP-ASan' + config.name_suffix
+
+# Setup source root.
+config.test_source_root = os.path.dirname(__file__)
+
+# Test suffixes.
+config.suffixes = ['.c', '.cc', '.cpp', '.test']
+
+# C & CXX flags.
+c_flags = ([config.target_cflags])
+
+# Android doesn't want -lrt.
+if not config.android:
+  c_flags += ["-lrt"]
+
+cxx_flags = (c_flags + config.cxx_mode_flags + ["-std=c++11"])
+
+def build_invocation(compile_flags):
+  return " " + " ".join([config.clang] + compile_flags) + " "
+
+# Add substitutions.
+config.substitutions.append(("%clang ", build_invocation(c_flags)))
+
+# GWP-ASan tests are currently supported on Linux only.
+if config.host_os not in ['Linux']:
+   config.unsupported = True
diff --git a/compiler-rt/test/gwp_asan/lit.site.cfg.in b/compiler-rt/test/gwp_asan/lit.site.cfg.in
new file mode 100644
index 0000000000000..1fb1c1955f61a
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/lit.site.cfg.in
@@ -0,0 +1,11 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.name_suffix = "@GWP_ASAN_TEST_CONFIG_SUFFIX@"
+config.target_arch = "@GWP_ASAN_TEST_TARGET_ARCH@"
+config.target_cflags = "@GWP_ASAN_TEST_TARGET_CFLAGS@"
+
+# Load common config for all compiler-rt lit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured")
+
+# Load tool-specific config that would do the real work.
+lit_config.load_config(config, "@GWP_ASAN_LIT_SOURCE_DIR@/lit.cfg")
diff --git a/compiler-rt/test/gwp_asan/unit/lit.site.cfg.in b/compiler-rt/test/gwp_asan/unit/lit.site.cfg.in
new file mode 100644
index 0000000000000..9378004b8dcd6
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/unit/lit.site.cfg.in
@@ -0,0 +1,9 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.name = "GwpAsan-Unittest"
+# Load common config for all compiler-rt unit tests.
+lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/unittests/lit.common.unit.configured")
+
+config.test_exec_root = os.path.join("@COMPILER_RT_BINARY_DIR@",
+                                     "lib", "gwp_asan", "tests")
+config.test_source_root = config.test_exec_root

From 0e124b37bd7fa42ab1087668b1fa7c35a9684deb Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 30 May 2019 19:59:20 +0000
Subject: [PATCH 0661/1176] [RuntimeDyld] Apply padding and alignment bumps to
 all sections with stubs, and increase the MachO/x86-64 stub alignment to 8.

Stub alignment should be guaranteed for any section containing RuntimeDyld
stubs/GOT-entries. To do this we should pad and align all sections containing
stubs, not just code sections.

This commit also bumps the MachO/x86-64 stub alignment to 8, so that GOT entries
will be aligned.

llvm-svn: 362139
---
 llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp  | 11 +++++------
 .../RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h      |  2 +-
 .../RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s    |  5 +++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index f99868db4439b..b1d22b89af085 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -799,13 +799,12 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
     pData = data.data();
   }
 
-  // Code section alignment needs to be at least as high as stub alignment or
-  // padding calculations may by incorrect when the section is remapped to a
-  // higher alignment.
-  if (IsCode) {
+  // If there are any stubs then the section alignment needs to be at least as
+  // high as stub alignment or padding calculations may by incorrect when the
+  // section is remapped.
+  if (StubBufSize != 0) {
     Alignment = std::max(Alignment, getStubAlignment());
-    if (StubBufSize > 0)
-      PaddingSize += getStubAlignment() - 1;
+    PaddingSize += getStubAlignment() - 1;
   }
 
   // Some sections, such as debug info, don't need to be loaded for execution.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 249f8dc0f29e5..28febbdb948c3 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -28,7 +28,7 @@ class RuntimeDyldMachOX86_64
 
   unsigned getMaxStubSize() const override { return 8; }
 
-  unsigned getStubAlignment() override { return 1; }
+  unsigned getStubAlignment() override { return 8; }
 
   Expected<relocation_iterator>
   processRelocationRef(unsigned SectionID, relocation_iterator RelI,
diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/llvm/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
index dc69a15072597..92052957a37da 100644
--- a/llvm/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
+++ b/llvm/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -22,8 +22,9 @@ insn2:
 	movl	x(%rip), %eax
 
 # Test PC-rel GOT relocation.
-# Verify both the contents of the GOT entry for y, and that the movq instruction
-# references the correct GOT entry address:
+# Verify the alignment of the GOT entry, the contents of the GOT entry for y,
+# and that the movq instruction references the correct GOT entry address:
+# rtdyld-check: stub_addr(test_x86-64.o/__text, y)[2:0] = 0
 # rtdyld-check: *{8}(stub_addr(test_x86-64.o/__text, y)) = y
 # rtdyld-check: decode_operand(insn3, 4) = stub_addr(test_x86-64.o/__text, y) - next_pc(insn3)
 insn3:

From 9b2aeb77b041f8d2ef2ba039ba93b6effcebe4b9 Mon Sep 17 00:00:00 2001
From: Douglas Yung <douglas.yung@sony.com>
Date: Thu, 30 May 2019 20:02:51 +0000
Subject: [PATCH 0662/1176] Mark test as requiring an ARM target.

llvm-svn: 362140
---
 clang/test/Driver/armv8.1m.main.s | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
index cdeb423116f9f..ca6becd933f60 100644
--- a/clang/test/Driver/armv8.1m.main.s
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -1,3 +1,4 @@
+# REQUIRES: arm-registered-target
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8-m.main %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V8M < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main %s 2>%t

From c58130bc8445edfa7438fe210da442b7019d0363 Mon Sep 17 00:00:00 2001
From: Michael Trent <mtrent@apple.com>
Date: Thu, 30 May 2019 20:09:09 +0000
Subject: [PATCH 0663/1176] Write new tests for r362121

Summary:
The tests for r362121 ran dsymutil against a test binary every time.
This caused problems on lld-x86_64-ubuntu-fast as dsymutil required
a lipo tool be available to process those binaries.

This change rewrites the new test cases in macho-disassemble-g-dsym
to use bespoke test binaries (exe and dwarf) simplifying the test's
runtime dependencies.

The changes to tools/llvm-objdump/MachODump.cpp are unchanged from
r362121

Reviewers: pete, lhames, JDevlieghere

Reviewed By: pete

Subscribers: smeenai, aprantl, rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62694

llvm-svn: 362141
---
 .../llvm-objdump/X86/Inputs/hello-macho-fat   | Bin 0 -> 25072 bytes
 .../X86/Inputs/hello-macho-fat.dwarf          | Bin 0 -> 17713 bytes
 .../llvm-objdump/X86/Inputs/hello-macho-thin  | Bin 0 -> 8696 bytes
 .../X86/Inputs/hello-macho-thin.dwarf         | Bin 0 -> 8817 bytes
 .../X86/macho-disassemble-g-dsym.test         |  14 ++++
 llvm/tools/llvm-objdump/MachODump.cpp         |  63 ++++++++++++++++--
 6 files changed, 71 insertions(+), 6 deletions(-)
 create mode 100755 llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat
 create mode 100644 llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat.dwarf
 create mode 100755 llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin
 create mode 100644 llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin.dwarf

diff --git a/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat b/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat
new file mode 100755
index 0000000000000000000000000000000000000000..bcfbeeb97de2e385e13b1edfd700b1c0f0746ac7
GIT binary patch
literal 25072
zcmeI2PiP!f9LL`#tu7kd&87aANl2s>yAw@YgCJxjOSf3k)NHkg^4#4_vJ102VP}>$
z*26*($)S?72QLLr9>i1i;6=si#hyewTTxKRMUbLhf8V!nc4s$5dl6Fl{lRa3@Av-v
z=Dp9GftmN-w>LlgTq!k@QtDo@X{9n^Cbs1=hP<{?qe{*9@+3ha0TLhq5+DH*AOR8}
z0TLhq5+DH*xWfp1cjNb)^5dV9AAdC>x?eQ2SE+ZDBPpdG7A=Tsy_7qie=Yy=DdY}{
ztP}jFAkGg`QR`y<Y;mx<X^&-xq*!0NqGCm{){%EU>Q@|&w<H5Vyr*mx_<Q=*;k1*k
z)?w5thrO)fcyHNw_t+Y-A2&$bcyW0*p00ZSy4RFS!|~cS-Xk^ud~ol{j{a4`s99^Q
z*$Tt)-na3@7=fRZf!I=@b*pi%)>zTC#%f^v!TI>e#>1RL;19>>mUOR}*Eut<3%O!0
zQ6lp<Ft73aS}Ti6SXeTOR$4cF9d6Xifv?x2ruesxhv(FY#6kO5z~9`@W@x<@Bp#C;
z@zS<`aXk3L@si`#+AsZE|1v}I;3wl@aV_ch->Z`+7GKIQoQU5EXNa5lkZn6SnB+Jo
zm1@tRf@2o%R9fsq@U1vbcSUbTEb3uQD6fh?CU$N@sVidd7hMvCfC&_J@3U6SE#;uJ
zA9%lPt$ku0QQX6Ag-zG5mEDyMe?@%AqoU}?;)QX2=*rAjAG|V~yMA`ce^&Aw(Uhp#
zCwCV`+OXU6cM9*dpzX;)89(CSJx_`mn`EF2<Jf!kC+TLY-xul6@<teW_3VpT=~u1X
zZT+Me#DCN2GB{*?@S>0a36KB@kN^pg011%5Fo83L&Fh7Wzh%D8|AnH$r5BEf`B?q(
zfS6~#+06gBd9`ry%gh^Jbf&i?>*S@!H^k(YJJYaRRk^5Nf9`ZT@(bw)Pdn#=roS?Q
z&v5e{{1_4<NsI4WeE;H3&<plk7-@Vrk4ejnX)Vs+?}N!mUbdvI`Swm`GED*`KmsH{
z0wh2JBtQZrKmsH{0wi!p6BwV&oW=mg=dn3wHp6vU_J?Kj`gAkBrJ9-XdA%+h(9x>G
zdWA}9*^eL5^{V#HdzDt?$zFE})hFtuT0>1^F=A9*djfSu;>1x&$9sRs$9{Iw55!+h
zr_{TX+pnV>YY<tvU|j?0Pei8@KJunSu^$M5zbKcazk*CF=+^#Jcjpk<2aMm3kM$JE
z@3lbeq;D2rl$ysrDbkMYDDPyT4TG514x;Xv&})WnJ!*Q5$UU`O3<5uNE0xIDa%)Wv
zH-u5>9yvDm-0bYpW3DobK-r3NmtW1hMXyv3-G$=qv@{J5*GrXZaCo&8Wdqgin{=H*
z0wh2JBtQZrKmsH{0wh2JByblJh|mAaIf1tMM-~P=|KEQQ;`9FT`Tp3CYw-L(&;R3D
z;Q9afjp*9jJ^%m5gEtPm_u=!$FaKD${OR}WZ}a^BAV+#6KmsH{0wh2JBtQbYn*h)M
r@9z0w??`|INPq-LfCNZ@1W14cNPq-L;BF+atIq$A^8Ek*eg6L+o!RLr

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat.dwarf b/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-fat.dwarf
new file mode 100644
index 0000000000000000000000000000000000000000..57855afaa11f3d50db19bf5480c6e431e87c1364
GIT binary patch
literal 17713
zcmeHN&u<$=6rT0Q#l*!;sAw9B#8L&(An*@TN=Omd(2zu>P1S}{(Zgu%EUByY+VQSS
z3zf)`6MsMt5mG59jvNXH#DxnNZm7z=$4b3$DN^9Q**BZ@dJ~L<5JK-Ivv20jn{Q{o
znO)oa-p||L{~(0O7(zS-+JsUS;^;c)EZK6<{!!b&-Y362daw-x4IDTM1)Rl~h(vM(
z^kFE!O*}gE#n*ESx3?B=fA?$W<||}}a|ls@kyt=7P|#nDg>?(`GtiubvIs@WbG3!}
z59iM>Ln9#yD`87E5$7H_O1UzBX{EncjfeB4NfwN{V84`sb3NGcOyZq80Hi?oi#sl2
zBvyyUM!YGdAN1;eWJ=<FLU{Wkj}TusbA+e+Lp*uKaXU^IoRWAOg!i-t0FB&7GBCe}
zA9S1E8aX8KZV?`+RO!P84=H=z$4zflHocG9s@=aH-w+<Ixt1h`wxh;%omK04s<u*#
z^}u@WwXV2tb-__mdam?uwCZhFc7iUn)A4YhYr*kakbiYPGDx}Bj%||xyd2GMFTD75
zOX=Es&2Q{4@bpct=eu)Xy|;As^!%wMeNR{k-srw~g9hR?#;+4`?gA0O*9W!p?~)g&
zastN+yVmKV^nCI(LdsRA-dmGy({s?Y8(x+Cg{3R$TH|$kb<b`&eiE-r^TX#9O4U+o
z*Fy|1xX~HJyG(fczPdalp53**HOC*g-X7tBQqmg|k8Z00ygP(<n(*!niD%dAUFX{1
zdORe&*9q_8ka+MgQ6rM;F<lAwOONoTpQFkyN=m!aaUH1^7krX<n}nzL?dIRX>)4GX
z-WK6a(R{Xs#8aCkWq#X)w@i53L*ljTR~tAaIlnXT!D@hA&+p7p0%pYwFayj0Gr$Zm
z1I)lb!~lMcKVXb0H2mIiNVSE~zkl%?O6!Vrt~-rh;K1)udli0<%9h>q)EGW61Iz$3
zzzi@0%m6dM3@`)C05iZ0FaylMf1d%2tBi<e;b<7f?}aB&;d=lmhJfg^+b*g&g%FQt
z^gDs4vgYXhTq%<S=Lzr)LxC8P!f1K)Y5-p)>~y^Yt)>@-QeTRC!mz0ZjtASpC6WcG
zji^>mO>V!Kp}v!#<D;WFB9w8_#iNO*HrL^h9TCR3@qFQ6)+~%o94UPJ_T;$nB!ql3
zMS+#-VY4uoKbe1Fa_Qh(5D_(}gjskqpI5=n8pf%th6`~%>zdQ`n{CgkREp(d**d1I
zFIOsMXca5R)&F!vU+H}x{7bE%>v%zF`NB%O?fRugBTyQnrGeuIe(Cs$+1F-fUOf>~
zh-58Zs4dcJsHo6>xCoTc7Ul|E{p{9FBvf51XiZ{#ERIQz{R1TKgP1CF12mFth<y-Q
z&%$3GYT>!c8=vQAp%2MKe`FRSRbL&OQ|NIx=t(^#+GC>gPjDlQLP4FDe{%NIaXEdr
z{L|*encDqJQ!eM9hz+1B63##2{1d$yOkDwu_igwP4vn0D5<a=AAss?kx{|JA`68Tu
z0u*unN%}s~*1mie&Ogz#ux19B0cL<1U<Q~0W*}mK^G_lwxWWuD1Iz$3zzi@0%m6dM
b3@`)C05iZ0{BIfH{1eVU(fKU@O8&{8XH_PL

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin b/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin
new file mode 100755
index 0000000000000000000000000000000000000000..c7283b408cda36a68973ce38e1391de39ac1b6a2
GIT binary patch
literal 8696
zcmeHN&x;&I6n>LU;-E2^y@<c*jaga6Oh@x;L4@6~LnauKU3Y>aN;yAzcLsW<m+l^t
z)yqoou#mu9a>?J2L%@GPiDEn@$B=UpB1H5UL><4crh2ElcM-2bJ$U`<d-dvd)mKwc
z+i&mu@$28uh!iG7Hg<`~0cdHD$T#AkCGrxq3RP-x{&eNj%17tv%}OB)cSOu{4}nV6
zDxcJ{A$oi%>l1@96-ks4O2yu#cpRJ0w}=HW-`ggHc=tG*U?UBcisF7F+H&RdeQEOT
zH4zC;1{6)cq<@r8wLO2$3lWshw_)<VY!VQY_bu5rzh)GNo!+Vm$mhFm^1)~!PD>^?
zI+W`7E_Qk?)#<GS8fW+8hRMf0rx53JjCw{-?(3}H*M<4oe5wcgm)X~3e~rcOO%mlO
zrFy=K*1L_sS8H(yymP){@(SeP_(aIRexGqDwHl;;Cfo8A&HR#l#QA*bbt~o9|80Jy
zTt4D-KAf=(9-phTXU=_CSvZrN2`9%JFy*OP98G$i2XStXFu)PvT8gkQ!Y1N4IL`n_
zU>V0XvA+$x4|aA+<U82spo>t7&I8NXXN`r~QD!hcbG}T}epm;}cewp1bp1}lZLRw)
zV9F~{&g1@X?^nOSc5-R(b?4!=E3F^jgU^8$pt2uthqA4?-2GR9=bGzDWCrVJ9-i~G
z=(WMYKCWZW?FX1<VLTViXL&t}y>9tb8T0BiMx)<^Vg4_i5f5(}OunpG1}p=X0n318
zz%pPNunbrR@(f(4Za%DDep33W@+Z5hS3fugb5kBa5A*KNo0X@Vx2u<bD1ClwI6Z*t
z?A6!SVdj^I)3k#&9`ehh;c$pQA^qST=VB20ttno^^*#7iHXv!s`z`Okd=vF6a~Ect
z_vU>VS<<5cvVT5kNBS|qSbg_SJ8j!CU>UFsSOzQumI2FvWxz6E8L$jk1}p=Xf&VoF
zlLt#nT)^ZUi*vddt{SKhqj<d(76%fRCg;=|3efS2@Oy<66zcK2RJX0XOJ1`dd#H8S
z89vpmcX~3-j}a4c_jSfXlE!hgliDBstY@cj0K76$kehG({u%rE4Wf((ziY7lE%Z<d
z({~8UdLV`P93E-hqSFYcb)Yb+9Mb)Q#^e0_o&x{Y2<A>>{Q*W~j&)MD9aNOJbD%T_
z318b7buUC-7`fdz^m?&-ez_I|e&jZrv9^u=D*m}4iX->fiP@7!j~+kaiv9>xZi>77
jamB59^={-Y)c(szJ@9b1-fRbl<94T44rDYr%$@QNMwauR

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin.dwarf b/llvm/test/tools/llvm-objdump/X86/Inputs/hello-macho-thin.dwarf
new file mode 100644
index 0000000000000000000000000000000000000000..9bcd6e2130cfc4465aa95763886c2c3e67e82fcf
GIT binary patch
literal 8817
zcmeI2O-vI(6vt;N)oQhX#1F*8)nF8({SXBdqf|j4sHosa<S^Nm6>GNJ(%s@mOvIZ9
z4_>%<FdpRQ(SuhHp1m6G9F3ecLEoEwQ<iPPnt1Rq1G8^F-u!mvKfC?d*DpW5t!!pY
z3o*vFLXJRw3k@>0Ltzi(#qWK?pXPh6zj~fqntwU<`2@9V=oo{Mm?1SlMt|1qm36TD
zpxOp`7&7PA(nEuH2XBl)C6ELw&=#yB=PNjJJ~4P_Vr{P&59h0u!q6x)_H*u<kK9$y
zAn#fehyvY5S6x`6uMCwrf0c8`EoYocQ;_$Bc$+F79$!`?#FPCVkKZ?~qFDl`Aa9O%
zdnEuYYF|lJ^UFGJDQ8cULy-5HcwlO#HEi(Uylg+r*;71c-z$jv+WmM>Jlu1cf(+S?
z7WcJR?CU^!BJJye{oH6@@w$~6M{#aj+<BbO6f9nJOHi-P!|Plo94`~<FV05|oKF{g
zmq`^bLi5{*=U+GHma)<Nn*YGVThl+^)%)u5$koxoff0F6=mFj6zBo(k!ZrHWiJXrB
z2EA^lZ(JfTFv$Q%_qt|iQF1;}8o~LLnJG_mD`%V7v>vaY{FyEbwXNh8%Ng6qn@*6|
zPxHgqDNOykFzX(CJoj<2nm0*2d0$P|$umlZJ#9Kw>n#%xj7a4=d30M<@fL_TO1y<S
zc}6BvGH0vzW0`oTiML!Q4_-{fh~R#7ro8K=OuWtmR9MGwZWN1_$;IY^PmuS9c=Eb^
z^EbSrkqz<|iPu5%S*(*ME|wbeTOr;U@mA{O6*3R9I3zf~EAYmuLA#vam4gIUMk$~a
zPzopolmbctrNBQ@06)ezY0ZL$pF4JmI%D$tFMdMF-{<BdGh22|_&#b(!S_*~H*&TZ
zqYg>|rGQdEDWDWk3Md7X0!jg;fKosypcGIF{QnAIT&0oihoh!xKV#d`;Bx@T8iVMw
zRfvl?1!G$p<THVt;n3ElNTMME&bz_a3k70C3ZvzfM+5jMVYTZb)N-~LN<CKT@xrDO
z><_ksOC=3KsZrD-VshJ97xndl^^dO15mDML3y&c@b!Y|-;YOylXoq60;ZUr3cYEx^
zg|-%L2ZVeZ;tUJ@dqS}@(Z1;6wvpEJ5D^V^uu$w=G%A9dX^d0pSqtKP`m9-Uas^vY
zrQ*qWQa>VEkEc>esKrw!#eZi-zQj!j{1bV%WZG_GY<!|nu$)9T>k5m}QrC1`Cvmd7
zx2LP?RJSJ~iZwi*9;V$8QK4<H2uxm^_gjGn&t5-8LB(3ZN)i2IaZGURFGzkLWQt#I
sfJL!2wh1E5Vfc$e$vana?nSg0`cO>tM`a;W_3p?SLHFaJr{*bt0~<UM!2kdN

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index f06567764b84b..c4841bbcb521f 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -5,3 +5,17 @@
 // RUN: FileCheck --input-file %t0 %s
 
 CHECK: Disassembly of section __TEXT,__text:
+
+// RUN: llvm-objdump -m -d -g -dsym %p/Inputs/hello-macho-fat.dwarf %p/Inputs/hello-macho-fat | FileCheck -check-prefix MACHO_DSYM %s
+// RUN: llvm-objdump -m -d -g -dsym %p/Inputs/hello-macho-fat.dwarf %p/Inputs/hello-macho-thin | FileCheck -check-prefix MACHO_DSYM %s
+// RUN: llvm-objdump -m -d -g -dsym %p/Inputs/hello-macho-thin.dwarf %p/Inputs/hello-macho-thin | FileCheck -check-prefix MACHO_DSYM %s
+
+MACHO_DSYM: (__TEXT,__text) section
+
+// RUN: llvm-objdump -m -d -g -dsym %p/../Inputs/libbogus11.a %p/../../dsymutil/Inputs/basic.macho.x86_64 2>&1 | FileCheck -check-prefix BAD_INPUT %s
+
+BAD_INPUT: is not a Mach-O or Universal file type.
+
+// RUN: not llvm-objdump -m -d -g -dsym %p/Inputs %p/Inputs/hello-macho-thin 2>&1 | FileCheck -check-prefix DIRECTORY %s
+
+DIRECTORY: Is a directory
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index ea92ef9268a1b..b684daacb611e 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7223,11 +7223,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   raw_ostream &DebugOut = nulls();
 #endif
 
+  // Try to find debug info and set up the DIContext for it.
   std::unique_ptr<DIContext> diContext;
-  ObjectFile *DbgObj = MachOOF;
+  std::unique_ptr<Binary> DSYMBinary;
   std::unique_ptr<MemoryBuffer> DSYMBuf;
-  // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
+    ObjectFile *DbgObj = MachOOF;
+
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
     if (!DSYMFile.empty()) {
@@ -7238,12 +7240,61 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
         return;
       }
 
-      std::unique_ptr<MachOObjectFile> DbgObjCheck = unwrapOrError(
-          ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef()),
-          DSYMFile.getValue());
-      DbgObj = DbgObjCheck.release();
       // We need to keep the file alive, because we're replacing DbgObj with it.
       DSYMBuf = std::move(BufOrErr.get());
+
+      Expected<std::unique_ptr<Binary>> BinaryOrErr =
+      createBinary(DSYMBuf.get()->getMemBufferRef());
+      if (!BinaryOrErr) {
+        report_error(BinaryOrErr.takeError(), DSYMFile);
+        return;
+      }
+
+      // We need to keep the Binary elive with the buffer
+      DSYMBinary = std::move(BinaryOrErr.get());
+    
+      if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
+        // this is a Mach-O object file, use it
+        if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
+          DbgObj = MachDSYM;
+        }
+        else {
+          WithColor::error(errs(), "llvm-objdump")
+            << DSYMFile << " is not a Mach-O file type.\n";
+          return;
+        }
+      }
+      else if (auto UB = dyn_cast<MachOUniversalBinary>(DSYMBinary.get())){
+        // this is a Universal Binary, find a Mach-O for this architecture
+        uint32_t CPUType, CPUSubType;
+        const char *ArchFlag;
+        if (MachOOF->is64Bit()) {
+          const MachO::mach_header_64 H_64 = MachOOF->getHeader64();
+          CPUType = H_64.cputype;
+          CPUSubType = H_64.cpusubtype;
+        } else {
+          const MachO::mach_header H = MachOOF->getHeader();
+          CPUType = H.cputype;
+          CPUSubType = H.cpusubtype;
+        }
+        Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
+                                                  &ArchFlag);
+        Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
+            UB->getObjectForArch(ArchFlag);
+        if (!MachDSYM) {
+          report_error(MachDSYM.takeError(), DSYMFile);
+          return;
+        }
+    
+        // We need to keep the Binary elive with the buffer
+        DbgObj = &*MachDSYM.get();
+        DSYMBinary = std::move(*MachDSYM);
+      }
+      else {
+        WithColor::error(errs(), "llvm-objdump")
+          << DSYMFile << " is not a Mach-O or Universal file type.\n";
+        return;
+      }
     }
 
     // Setup the DIContext

From 1d9ec7a81b7651d8f02e28cfb328e2a034941a8c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 20:36:54 +0000
Subject: [PATCH 0664/1176] [DAGCombiner][X86][AArch64][AMDGPU] (x + C) - y  ->
  (x - y) + C  fold. Try 3

Summary:
The main motivation is shown by all these `neg` instructions that are now created.
In particular, the `@reg32_lshr_by_negated_unfolded_sub_b` test.

AArch64 test changes all look good (`neg` created), or neutral.

X86 changes look neutral (vectors), or good (`neg` / `xor eax, eax` created).

I'm not sure about `X86/ragreedy-hoist-spill.ll`, it looks like the spill
is now hoisted into preheader (which should still be good?),
2 4-byte reloads become 1 8-byte reload, and are elsewhere,
but i'm not sure how that affects that loop.

I'm unable to interpret AMDGPU change, looks neutral-ish?

This is hopefully a step towards solving [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].

https://rise4fun.com/Alive/pkdq (we are missing more patterns, i'll submit them later)

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.

Reviewers: craig.topper, RKSimon, spatel, arsenm

Reviewed By: RKSimon

Subscribers: bjope, qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62223

llvm-svn: 362142
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 ++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  6 +-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 16 ++--
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 21 ++---
 llvm/test/CodeGen/X86/combine-add.ll          |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 83 ++++++++++---------
 llvm/test/CodeGen/X86/shift-amount-mod.ll     |  9 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 20 ++---
 llvm/test/CodeGen/X86/zext-sext.ll            | 21 ++---
 9 files changed, 96 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b6164ac4ded2a..dc4f7f2d5908e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2964,6 +2964,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index 6daef644761b5..d349eb09f7353 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -486,8 +486,7 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
+; CHECK-NEXT:    neg w8, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -500,8 +499,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
+; CHECK-NEXT:    neg x8, x1
 ; CHECK-NEXT:    sub x8, x8, x2
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index c571dac94b81e..8886954623f7c 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -96,8 +96,8 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -124,8 +124,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -152,8 +152,8 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w8, w2
+; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -282,8 +282,8 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -346,8 +346,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 20c84c5b63277..71c8f6926c1f9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -15,10 +15,11 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_add_i32 s2, s2, -1
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VARIANT0-NEXT:    s_waitcnt expcnt(0)
+; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -36,12 +37,12 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_add_i32 s2, s2, -1
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
+; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_sub_i32_e32 v3, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -59,8 +60,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT2-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -82,8 +83,8 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    s_add_i32 s0, s0, -1
-; VARIANT3-NEXT:    v_sub_u32_e32 v3, s0, v0
+; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
+; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/combine-add.ll b/llvm/test/CodeGen/X86/combine-add.ll
index 6f5f1370e6b4e..1d20fcf33d742 100644
--- a/llvm/test/CodeGen/X86/combine-add.ll
+++ b/llvm/test/CodeGen/X86/combine-add.ll
@@ -210,16 +210,16 @@ define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32>
 define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) {
 ; SSE-LABEL: combine_vec_add_sub_sub:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    psubd %xmm1, %xmm0
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: combine_vec_add_sub_sub:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %1 = sub <4 x i32> %a, %b
   %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c9a577dbaa92b..fd3d83ed2cbec 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,8 +67,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_55
 ; CHECK-NEXT:  ## %bb.6: ## %SyTime.exit2720
-; CHECK-NEXT:    movq %rdx, %r14
-; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    movq %rdi, %rbp
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
 ; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    cmpq %rax, %rcx
@@ -78,10 +78,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $32, %esi
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:  LBB0_8: ## %while.body.preheader
-; CHECK-NEXT:    imulq $1040, %r14, %rax ## imm = 0x410
+; CHECK-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT:    imulq $1040, %rbx, %rax ## imm = 0x410
 ; CHECK-NEXT:    movq _syBuf@{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    leaq 8(%rcx,%rax), %rbx
-; CHECK-NEXT:    movl $1, %r14d
+; CHECK-NEXT:    movl $1, %r15d
 ; CHECK-NEXT:    movq _syCTRO@{{.*}}(%rip), %rax
 ; CHECK-NEXT:    movb $1, %cl
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -91,48 +92,47 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %cl, %cl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %do.end
-; CHECK-NEXT:    xorl %r12d, %r12d
-; CHECK-NEXT:    testb %r12b, %r12b
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    testb %r14b, %r14b
 ; CHECK-NEXT:    jne LBB0_11
 ; CHECK-NEXT:  ## %bb.12: ## %while.body200.preheader
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:    jmp LBB0_13
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_14: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    leal 1(%r12), %eax
+; CHECK-NEXT:    leal 1(%r14), %eax
 ; CHECK-NEXT:    cmpl $21, %eax
 ; CHECK-NEXT:    ja LBB0_20
 ; CHECK-NEXT:  ## %bb.15: ## %while.body200
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $-1, %ecx
+; CHECK-NEXT:    movl $-1, %r13d
 ; CHECK-NEXT:    movslq (%rsi,%rax,4), %rax
 ; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_18: ## %while.cond201.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    movl $1, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_44: ## %while.cond1037.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:    jmp LBB0_55
 ; CHECK-NEXT:  LBB0_26: ## %sw.bb474
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.27: ## %do.body479.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    ## implicit-def: $r13
+; CHECK-NEXT:    ## implicit-def: $r12
 ; CHECK-NEXT:    jne LBB0_34
 ; CHECK-NEXT:  ## %bb.28: ## %land.rhs485.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -145,7 +145,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    js LBB0_55
 ; CHECK-NEXT:  ## %bb.30: ## %cond.true.i.i2780
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    movq %rax, %r13
+; CHECK-NEXT:    movq %rax, %r12
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_32
 ; CHECK-NEXT:  ## %bb.31: ## %lor.rhs500
@@ -157,16 +157,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    je LBB0_34
 ; CHECK-NEXT:  LBB0_32: ## %do.body479.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    leaq 1(%r13), %rax
+; CHECK-NEXT:    leaq 1(%r12), %rax
 ; CHECK-NEXT:    testb %dl, %dl
 ; CHECK-NEXT:    jne LBB0_29
 ; CHECK-NEXT:  ## %bb.33: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    incq %r13
+; CHECK-NEXT:    incq %r12
 ; CHECK-NEXT:  LBB0_34: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
-; CHECK-NEXT:    addl $-324, %eax ## imm = 0xFEBC
+; CHECK-NEXT:    leal -324(%r13), %eax
 ; CHECK-NEXT:    cmpl $59, %eax
 ; CHECK-NEXT:    ja LBB0_35
 ; CHECK-NEXT:  ## %bb.57: ## %if.end517
@@ -176,11 +175,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_38
 ; CHECK-NEXT:  LBB0_35: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $11, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $11, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.36: ## %if.end517
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmpl $24, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl $24, %r13d
 ; CHECK-NEXT:    je LBB0_38
 ; CHECK-NEXT:  ## %bb.37: ## %if.then532
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
@@ -195,8 +194,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  ## %bb.39: ## %for.cond542.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    movb $0, (%r13)
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movb $0, (%r12)
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rsi
 ; CHECK-NEXT:    leaq {{.*}}(%rip), %rdi
 ; CHECK-NEXT:    jmp LBB0_21
@@ -208,22 +207,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jb LBB0_55
 ; CHECK-NEXT:  ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    movl $268, %ecx ## imm = 0x10C
+; CHECK-NEXT:    movl $268, %r13d ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    movl $2, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:  LBB0_40: ## %sw.bb566
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %ecx
+; CHECK-NEXT:    movl $20, %r13d
 ; CHECK-NEXT:    jmp LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_13: ## %while.body200
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_29 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_38 Depth 2
-; CHECK-NEXT:    leal -268(%r12), %eax
+; CHECK-NEXT:    leal -268(%r14), %eax
 ; CHECK-NEXT:    cmpl $105, %eax
 ; CHECK-NEXT:    ja LBB0_14
 ; CHECK-NEXT:  ## %bb.56: ## %while.body200
@@ -233,12 +232,12 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmpq *%rax
 ; CHECK-NEXT:  LBB0_20: ## %sw.bb256
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl %r12d, %ecx
+; CHECK-NEXT:    movl %r14d, %r13d
 ; CHECK-NEXT:  LBB0_21: ## %while.cond197.backedge
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    decl %r14d
-; CHECK-NEXT:    testl %r14d, %r14d
-; CHECK-NEXT:    movl %ecx, %r12d
+; CHECK-NEXT:    decl %r15d
+; CHECK-NEXT:    testl %r15d, %r15d
+; CHECK-NEXT:    movl %r13d, %r14d
 ; CHECK-NEXT:    jg LBB0_13
 ; CHECK-NEXT:    jmp LBB0_22
 ; CHECK-NEXT:    .p2align 4, 0x90
@@ -255,27 +254,28 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_11:
 ; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r13d, %r13d
 ; CHECK-NEXT:  LBB0_22: ## %while.end1465
-; CHECK-NEXT:    incl %ecx
-; CHECK-NEXT:    cmpl $16, %ecx
+; CHECK-NEXT:    incl %r13d
+; CHECK-NEXT:    cmpl $16, %r13d
 ; CHECK-NEXT:    ja LBB0_50
 ; CHECK-NEXT:  ## %bb.23: ## %while.end1465
 ; CHECK-NEXT:    movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT:    btl %ecx, %eax
+; CHECK-NEXT:    btl %r13d, %eax
 ; CHECK-NEXT:    jae LBB0_50
 ; CHECK-NEXT:  ## %bb.24:
-; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    addq $8189, %r15 ## imm = 0x1FFD
-; CHECK-NEXT:    subq %rbx, %r15
-; CHECK-NEXT:    addq _syHistory@{{.*}}(%rip), %r15
+; CHECK-NEXT:    subq %rbp, %rbx
+; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
+; CHECK-NEXT:    leaq 8189(%rbx,%rax), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    decq %r15
+; CHECK-NEXT:    decq %rax
 ; CHECK-NEXT:    jmp LBB0_49
 ; CHECK-NEXT:  LBB0_50: ## %for.cond1480.preheader
 ; CHECK-NEXT:    movl $512, %eax ## imm = 0x200
@@ -302,7 +302,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_55: ## %if.then.i
 ; CHECK-NEXT:    ud2
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477.loopexit
-; CHECK-NEXT:    movq %r15, %rbx
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT:    movq %rbx, %rbp
 ; CHECK-NEXT:    jmp LBB0_48
 ; CHECK-NEXT:  LBB0_16: ## %while.cond635.preheader
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 6c268d8a27f42..e8af5f66d36c9 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -1111,7 +1111,7 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1120,9 +1120,9 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
-; X64-NEXT:    subl %esi, %ecx
+; X64-NEXT:    negl %ecx
 ; X64-NEXT:    subl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
@@ -1139,9 +1139,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 2ffbfcb56b2f7..37a3dcbd0e4a3 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -156,16 +156,16 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -203,16 +203,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 %t0, 32
@@ -250,16 +250,16 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    subl %edx, %esi
 ; X64-NEXT:    leal 32(%rsi), %eax
-; X64-NEXT:    subl %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = sub i32 32, %t0
@@ -416,15 +416,15 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
@@ -504,16 +504,16 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    psubd %xmm2, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    psubd %xmm2, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 7034378a880b5..84096e3b6805d 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -15,30 +15,27 @@ define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 ; CHECK-NEXT:    subq %rax, %rsi
 ; CHECK-NEXT:    movq (%rdx), %rax
 ; CHECK-NEXT:    movswl 8(%rdi), %edx
-; CHECK-NEXT:    movabsq $5089792277106559579, %rdi # imm = 0x46A2931BF1768A5B
 ; CHECK-NEXT:    movswl (%rax,%rsi,2), %eax
 ; CHECK-NEXT:    movl $1, %esi
 ; CHECK-NEXT:    imull %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    addl $2138875574, %eax # imm = 0x7F7CA6B6
 ; CHECK-NEXT:    cmpl $-8608074, %eax # imm = 0xFF7CA6B6
-; CHECK-NEXT:    movslq %eax, %r8
+; CHECK-NEXT:    movslq %eax, %rdi
 ; CHECK-NEXT:    setl %dl
 ; CHECK-NEXT:    cmpl $2138875573, %eax # imm = 0x7F7CA6B5
-; CHECK-NEXT:    movq %r8, %r9
+; CHECK-NEXT:    movq %rdi, %r8
 ; CHECK-NEXT:    leal -1(%rdx,%rdx), %edx
 ; CHECK-NEXT:    cmovlel %edx, %esi
-; CHECK-NEXT:    subq %rax, %r9
-; CHECK-NEXT:    addq %r8, %rdi
+; CHECK-NEXT:    subq %rax, %r8
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $1, %esi
-; CHECK-NEXT:    cmovneq %rax, %r9
-; CHECK-NEXT:    testl %r8d, %r8d
-; CHECK-NEXT:    cmovnsq %rax, %r9
-; CHECK-NEXT:    movabsq $-5089792279245435153, %rax # imm = 0xB95D6CE38F0CCEEF
-; CHECK-NEXT:    subq %r9, %rdi
-; CHECK-NEXT:    addq (%rcx), %rdi
-; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    cmovneq %rax, %r8
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    cmovnsq %rax, %r8
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    subq %r8, %rdi
+; CHECK-NEXT:    leaq -2138875574(%rax,%rdi), %rax
 ; CHECK-NEXT:    movq %rax, (%rcx)
 ; CHECK-NEXT:    retq
 entry:

From 05ad5fd213cf5ee16c5ce5bb653fb92fe381a442 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 20:37:18 +0000
Subject: [PATCH 0665/1176] [DAGCombiner][X86][AArch64][SPARC][SystemZ] y - (x
 + C)  ->  (y - x) - C  fold. Try 3

Summary:
Direct sibling of D62223 patch.
While i don't have a direct motivational pattern for this,
it would seem to make sense to handle both patterns (or none),
for symmetry?

The aarch64 changes look neutral;
sparc and systemz look like improvement (one less instruction each);
x86 changes - 32bit case improves, 64bit case shows that LEA no longer
gets constructed, which may be because that whole test is `-mattr=+slow-lea,+slow-3ops-lea`

https://rise4fun.com/Alive/ffh

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.

Reviewers: RKSimon, craig.topper, spatel, t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, jyknight, javed.absar, kristof.beyls, fedor.sergeev, jrtc27, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62252

llvm-svn: 362143
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  4 +--
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 12 ++++----
 .../CodeGen/SPARC/2013-05-17-CallFrame.ll     |  5 ++--
 llvm/test/CodeGen/SystemZ/alloca-03.ll        | 11 ++++----
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 28 +++++++++----------
 6 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc4f7f2d5908e..10ae53a723d64 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2970,6 +2970,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
   }
+  // y - (x + C)  ->  (y - x) - C
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+      isConstantOrConstantVector(N1.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index d349eb09f7353..c91700436bb96 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -513,7 +513,7 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    add w8, w2, w1
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -525,7 +525,7 @@ define i32 @reg32_lshr_by_b_sub_negated_unfolded(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_b_sub_negated_unfolded(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_b_sub_negated_unfolded:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    add x8, x2, x1
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 8886954623f7c..167ca6a10ec13 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -107,9 +107,9 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, #32 // =32
-; CHECK-NEXT:    sub w0, w2, w8
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w8, w2, w8
+; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -295,9 +295,9 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
diff --git a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
index 1a97e4e317e57..274e99b114c32 100644
--- a/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@@ -15,10 +15,9 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
 ; V8-NEXT:    .cfi_register 15, 31
 ; V8-NEXT:    add %i0, 7, %i0
 ; V8-NEXT:    and %i0, -8, %i0
-; V8-NEXT:    add %i0, 8, %i0
 ; V8-NEXT:    sub %sp, %i0, %i0
-; V8-NEXT:    add %i0, 96, %o0
-; V8-NEXT:    mov %i0, %sp
+; V8-NEXT:    add %i0, -8, %sp
+; V8-NEXT:    add %i0, 88, %o0
 ; V8-NEXT:    add %sp, -16, %sp
 ; V8-NEXT:    st %o0, [%sp+104]
 ; V8-NEXT:    st %o0, [%sp+100]
diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll
index 343071211b751..cac569ff41fa3 100644
--- a/llvm/test/CodeGen/SystemZ/alloca-03.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll
@@ -75,13 +75,12 @@ define void @f3(i64 %len) {
 ; CHECK-NEXT:    lgr %r11, %r15
 ; CHECK-NEXT:    .cfi_def_cfa_register %r11
 ; CHECK-NEXT:    lgr %r1, %r15
-; CHECK-NEXT:    sllg %r2, %r2, 3
-; CHECK-NEXT:    la %r0, 120(%r2)
+; CHECK-NEXT:    sllg %r0, %r2, 3
 ; CHECK-NEXT:    sgr %r1, %r0
-; CHECK-NEXT:    la %r2, 280(%r1)
-; CHECK-NEXT:    nill %r2, 65408
-; CHECK-NEXT:    lgr %r15, %r1
-; CHECK-NEXT:    mvghi 0(%r2), 10
+; CHECK-NEXT:    lay %r15, -120(%r1)
+; CHECK-NEXT:    la %r1, 160(%r1)
+; CHECK-NEXT:    nill %r1, 65408
+; CHECK-NEXT:    mvghi 0(%r1), 10
 ; CHECK-NEXT:    lmg %r11, %r15, 248(%r11)
 ; CHECK-NEXT:    br %r14
   %x = alloca i64, i64 %len, align 128
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 37a3dcbd0e4a3..59a42ad9e3926 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -178,16 +178,16 @@ define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl $32, %ecx
-; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    addl %ecx, %eax
+; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %eax
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    addl $32, %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal -32(%rdx,%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, %b
   %t1 = add i32 %t0, 32 ; constant always on RHS
@@ -434,18 +434,18 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    psubd %xmm0, %xmm2
-; X32-NEXT:    movdqa %xmm2, %xmm0
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd %xmm2, %xmm1
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
-; X64-NEXT:    psubd %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd %xmm2, %xmm1
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS

From 63b4741534a9c1520aa618b1bc69b00771a7aa1b Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 20:37:29 +0000
Subject: [PATCH 0666/1176] [DAGCombine][X86][AArch64][AMDGPU] (x - y) + -1  ->
  add (xor y, -1), x  fold. Try 3

Summary:
This prevents regressions in next patch,
and somewhat recovers from the regression to AMDGPU test in D62223.

It is indeed not great that we leave vector decrement,
don't transform it into vector add all-ones..

https://rise4fun.com/Alive/ZRl

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.

Reviewers: RKSimon, craig.topper, spatel, arsenm

Reviewed By: RKSimon, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, javed.absar, dstuttard, tpr, t-tye, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62263

llvm-svn: 362144
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++
 llvm/test/CodeGen/AArch64/xor.ll              | 18 +++---
 .../CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll   | 19 +++---
 llvm/test/CodeGen/X86/xor.ll                  | 62 ++++++++++---------
 4 files changed, 62 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 10ae53a723d64..c733c24d5d78a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2312,6 +2312,13 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
     }
   }
 
+  // (x - y) + -1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isAllOnesOrAllOnesSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
     return Combined;
 
@@ -2964,6 +2971,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
     return V;
 
+  // (x - y) - 1  ->  add (xor y, -1), x
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+  }
+
   // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
       isConstantOrConstantVector(N0.getOperand(1))) {
diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll
index 1dca55a971308..ca6c0dfabba48 100644
--- a/llvm/test/CodeGen/AArch64/xor.ll
+++ b/llvm/test/CodeGen/AArch64/xor.ll
@@ -18,8 +18,8 @@ define i32 @PR39657(i8* %p, i64 %x) {
 define i32 @add_of_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -29,8 +29,8 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; CHECK-LABEL: add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w0, w8, #1 // =1
+; CHECK-NEXT:    mvn w8, w1
+; CHECK-NEXT:    add w0, w8, w0
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -40,9 +40,8 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.2d, #0xffffffffffffffff
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -52,9 +51,8 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: vec_add_of_not_decrement:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v1.16b, v1.16b
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 71c8f6926c1f9..2dd7e20c00ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -9,17 +9,16 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT0:       ; %bb.0: ; %entry
 ; VARIANT0-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT0-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT0-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT0-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT0-NEXT:    s_mov_b32 s6, 0
 ; VARIANT0-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT0-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT0-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT0-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT0-NEXT:    s_waitcnt expcnt(0)
-; VARIANT0-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
-; VARIANT0-NEXT:    s_waitcnt vmcnt(0)
+; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
+; VARIANT0-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
 ; VARIANT0-NEXT:    s_barrier
-; VARIANT0-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT0-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT0-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
 ; VARIANT0-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
@@ -31,18 +30,18 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT1:       ; %bb.0: ; %entry
 ; VARIANT1-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; VARIANT1-NEXT:    s_load_dword s2, s[0:1], 0xb
+; VARIANT1-NEXT:    v_not_b32_e32 v3, v0
 ; VARIANT1-NEXT:    s_mov_b32 s7, 0xf000
 ; VARIANT1-NEXT:    s_mov_b32 s6, 0
 ; VARIANT1-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; VARIANT1-NEXT:    v_mov_b32_e32 v2, 0
 ; VARIANT1-NEXT:    s_waitcnt lgkmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
-; VARIANT1-NEXT:    s_waitcnt expcnt(0)
-; VARIANT1-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
+; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, s2, v3
 ; VARIANT1-NEXT:    s_barrier
-; VARIANT1-NEXT:    v_add_i32_e32 v3, vcc, -1, v0
 ; VARIANT1-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT1-NEXT:    v_lshl_b64 v[3:4], v[3:4], 2
+; VARIANT1-NEXT:    s_waitcnt expcnt(0)
 ; VARIANT1-NEXT:    buffer_load_dword v0, v[3:4], s[4:7], 0 addr64
 ; VARIANT1-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT1-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
@@ -60,8 +59,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT2-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT2-NEXT:    s_waitcnt vmcnt(0)
 ; VARIANT2-NEXT:    s_barrier
-; VARIANT2-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT2-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT2-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT2-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT2-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT2-NEXT:    v_mov_b32_e32 v0, s3
@@ -83,8 +81,7 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
 ; VARIANT3-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; VARIANT3-NEXT:    global_store_dword v[1:2], v0, off
 ; VARIANT3-NEXT:    s_barrier
-; VARIANT3-NEXT:    v_sub_u32_e32 v0, s0, v0
-; VARIANT3-NEXT:    v_add_u32_e32 v3, -1, v0
+; VARIANT3-NEXT:    v_xad_u32 v3, v0, -1, s0
 ; VARIANT3-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
 ; VARIANT3-NEXT:    v_lshlrev_b64 v[3:4], 2, v[3:4]
 ; VARIANT3-NEXT:    v_mov_b32_e32 v0, s3
diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll
index 5ef5999be95f4..654382f7b73e9 100644
--- a/llvm/test/CodeGen/X86/xor.ll
+++ b/llvm/test/CodeGen/X86/xor.ll
@@ -532,22 +532,24 @@ define i32 @add_of_not(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = add i32 %t0, -1
@@ -558,22 +560,24 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 ; X32-LABEL: add_of_not_decrement:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    decl %eax
+; X32-NEXT:    notl %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
+; X64-LIN-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-LIN-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-LIN-NEXT:    subl %esi, %edi
-; X64-LIN-NEXT:    leal -1(%rdi), %eax
+; X64-LIN-NEXT:    notl %esi
+; X64-LIN-NEXT:    leal (%rsi,%rdi), %eax
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
+; X64-WIN-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-WIN-NEXT:    # kill: def $ecx killed $ecx def $rcx
-; X64-WIN-NEXT:    subl %edx, %ecx
-; X64-WIN-NEXT:    leal -1(%rcx), %eax
+; X64-WIN-NEXT:    notl %edx
+; X64-WIN-NEXT:    leal (%rdx,%rcx), %eax
 ; X64-WIN-NEXT:    retq
   %t0 = sub i32 %x, %y
   %r = sub i32 %t0, 1
@@ -583,24 +587,23 @@ define i32 @add_of_not_decrement(i32 %x, i32 %y) {
 define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = add <4 x i32> %t0, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -610,24 +613,23 @@ define <4 x i32> @vec_add_of_not(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @vec_add_of_not_decrement(<4 x i32> %x, <4 x i32> %y) {
 ; X32-LABEL: vec_add_of_not_decrement:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    pcmpeqd %xmm1, %xmm1
-; X32-NEXT:    paddd %xmm1, %xmm0
+; X32-NEXT:    pcmpeqd %xmm2, %xmm2
+; X32-NEXT:    pxor %xmm1, %xmm2
+; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LIN-LABEL: vec_add_of_not_decrement:
 ; X64-LIN:       # %bb.0:
-; X64-LIN-NEXT:    psubd %xmm1, %xmm0
-; X64-LIN-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-LIN-NEXT:    paddd %xmm1, %xmm0
+; X64-LIN-NEXT:    pcmpeqd %xmm2, %xmm2
+; X64-LIN-NEXT:    pxor %xmm1, %xmm2
+; X64-LIN-NEXT:    paddd %xmm2, %xmm0
 ; X64-LIN-NEXT:    retq
 ;
 ; X64-WIN-LABEL: vec_add_of_not_decrement:
 ; X64-WIN:       # %bb.0:
-; X64-WIN-NEXT:    movdqa (%rcx), %xmm1
-; X64-WIN-NEXT:    psubd (%rdx), %xmm1
 ; X64-WIN-NEXT:    pcmpeqd %xmm0, %xmm0
-; X64-WIN-NEXT:    paddd %xmm1, %xmm0
+; X64-WIN-NEXT:    pxor (%rdx), %xmm0
+; X64-WIN-NEXT:    paddd (%rcx), %xmm0
 ; X64-WIN-NEXT:    retq
   %t0 = sub <4 x i32> %x, %y
   %r = sub <4 x i32> %t0, <i32 1, i32 1, i32 1, i32 1>

From 57aa36ff9164f40cf99f38428246c59d41a74ddd Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 20:37:39 +0000
Subject: [PATCH 0667/1176] [DAGCombine] (x - C) - y  ->  (x - y) - C  fold.
 Try 3

Summary:
Again only vectors affected. Frustrating. Let me take a look into that..

https://rise4fun.com/Alive/AAq

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs, and then reverted in
rL362109 to fix missing constant folds that were causing
endless combine loops.

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, JDevlieghere, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62294

llvm-svn: 362145
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp     | 7 +++++++
 llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll | 2 +-
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll     | 4 ++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c733c24d5d78a..a3f89a86fce43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2990,6 +2990,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
   }
+  // (x - C) - y  ->  (x - y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 167ca6a10ec13..344016ea5027b 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -314,8 +314,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 59a42ad9e3926..7da5c7db2d689 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -460,15 +460,15 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    psubd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    psubd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>

From a4e3b50e2651510d2d93a0f3128d4bcceb3ea3d3 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 20:37:49 +0000
Subject: [PATCH 0668/1176] [DAGCombiner][X86][AArch64] (x - C) + y  ->  (x +
 y) - C  fold. Try 2

Summary:
Only vector tests are being affected here,
since subtraction by scalar constant is rewritten
as addition by negated constant.

No surprising test changes.

https://rise4fun.com/Alive/pbT

This is a recommit, originally committed in rL361852, but reverted
to investigate test-suite compile-time hangs.

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62257

llvm-svn: 362146
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  8 ++++++++
 .../CodeGen/AArch64/sink-addsub-of-const.ll   |  6 +++---
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 12 +++++------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 20 +++++++++----------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 16 +++++++--------
 llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 12 +++++------
 6 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a3f89a86fce43..38494f4b24185 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2470,6 +2470,14 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
+  // Hoist one-use subtraction by constant:  (x - C) + y  ->  (x + y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
   // rather than 'add 0/-1' (the zext should get folded).
   // add (sext i1 Y), X --> sub X, (zext i1 Y)
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 344016ea5027b..0e1a426c77f29 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI23_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 7da5c7db2d689..4544707d07a2f 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x
 define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4
 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index 2c41ee31a101d..58c972164d8f1 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    psubb %xmm2, %xmm0
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm0, %xmm1
+; SSE2-NEXT:    psubb %xmm2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
-; SSE41-NEXT:    psubb %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm0, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vzeroupper
 ; AVX2NOBW-NEXT:    retq
 ;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
   %res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm3, %xmm2
-; SSE2-NEXT:    psubb %xmm3, %xmm2
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm2, %xmm1
+; SSE2-NEXT:    psubb %xmm3, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psllw $3, %xmm2
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm3, %xmm2
-; SSE41-NEXT:    psubb %xmm3, %xmm2
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm2, %xmm1
+; SSE41-NEXT:    psubb %xmm3, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm2
 ; SSE41-NEXT:    psllw $3, %xmm2
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
index d612d73448754..eda349005cda7 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
@@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm2
 ; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
@@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_32i8:
@@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512BW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %res
@@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm4
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
@@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm3
 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
@@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
@@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512BW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
index 86c706c03a70c..961bec56e5d3c 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
@@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
@@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
@@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm4
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX512F-NEXT:    vpand %ymm8, %ymm4, %ymm4
@@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm3
 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
 ; AVX512F-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
@@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpsubb %zmm3, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm3, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsllw $3, %zmm1, %zmm2
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1

From a481b01e958b19c2eb6564ab8930eae9058967f8 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Thu, 30 May 2019 20:45:12 +0000
Subject: [PATCH 0669/1176] [c++2a] Fix assertion failure if we would walk over
 more than one level of derived-to-base conversion path when implicitly
 starting union subobject lifetimes in constant evaluation.

llvm-svn: 362147
---
 clang/lib/AST/ExprConstant.cpp                   |  3 ++-
 clang/test/SemaCXX/constant-expression-cxx2a.cpp | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ac21b63cc79bb..df9b3067b8d43 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -5031,7 +5031,8 @@ static bool HandleUnionActiveMemberChange(EvalInfo &Info, const Expr *LHSExpr,
       if (ICE->getCastKind() != CK_DerivedToBase &&
           ICE->getCastKind() != CK_UncheckedDerivedToBase)
         break;
-      for (const CXXBaseSpecifier *Elt : ICE->path()) {
+      // Walk path backwards as we walk up from the base to the derived class.
+      for (const CXXBaseSpecifier *Elt : llvm::reverse(ICE->path())) {
         --PathLength;
         (void)Elt;
         assert(declaresSameEntity(Elt->getType()->getAsCXXRecordDecl(),
diff --git a/clang/test/SemaCXX/constant-expression-cxx2a.cpp b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
index aa534ce592e34..f29f4beb29082 100644
--- a/clang/test/SemaCXX/constant-expression-cxx2a.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx2a.cpp
@@ -521,4 +521,14 @@ namespace Union {
     u1 = u2;
     return true;
   }();
+
+  struct S1 {
+    int n;
+  };
+  struct S2 : S1 {};
+  struct S3 : S2 {};
+  void f() {
+    S3 s;
+    s.n = 0;
+  }
 }

From 5c4c44310a38ff77e15585636da57a66e737570d Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 30 May 2019 20:46:31 +0000
Subject: [PATCH 0670/1176] [pstl] Remove various warnings in the pstl headers
 and tests

- unused parameter warnings
- don't use single-letter template parameter names, like we do in libc++
- sign-comparison warnings
- unused variables in the tests
- unused local typedefs in the tests
- the use of #include_next
- field reordering in the tests
- unused lambda captures

Note that the rationale for why the static_casts to unsigned are OK is
that last - first must always be non-negative, since [first, last) is
a valid range.

llvm-svn: 362148
---
 pstl/include/pstl/internal/algorithm_impl.h   | 36 +++++++++----------
 pstl/include/pstl/internal/numeric_impl.h     |  2 +-
 .../pstl/internal/parallel_backend_serial.h   |  4 +--
 .../pstl/internal/unseq_backend_simd.h        | 16 ++++-----
 pstl/include/pstl/internal/utils.h            |  8 ++---
 pstl/test/CMakeLists.txt                      |  2 ++
 .../generate.pass.cpp                         |  6 ++--
 .../alg.nonmodifying/adjacent_find.pass.cpp   | 10 +++---
 .../alg.nonmodifying/equal.pass.cpp           |  2 +-
 .../alg.nonmodifying/find_end.pass.cpp        |  4 +--
 .../alg.nonmodifying/search_n.pass.cpp        |  2 +-
 .../alg.set.operations/set.pass.cpp           |  2 +-
 .../std/algorithms/alg.sorting/sort.pass.cpp  |  2 +-
 pstl/test/support/utils.h                     | 14 ++++----
 14 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/pstl/include/pstl/internal/algorithm_impl.h b/pstl/include/pstl/internal/algorithm_impl.h
index 5fecef4f7cd0b..770665626bed6 100644
--- a/pstl/include/pstl/internal/algorithm_impl.h
+++ b/pstl/include/pstl/internal/algorithm_impl.h
@@ -599,19 +599,19 @@ _RandomAccessIterator
 __find_subrange(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __global_last,
                 _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector) noexcept
 {
-    if (__global_last - __first < __count || __count < 1)
+    if (static_cast<_Size>(__global_last - __first) < __count || __count < 1)
     {
         return __last; // According to the standard last shall be returned when count < 1
     }
 
     auto __n = __global_last - __first;
     auto __unary_pred = __equal_value_by_pred<_Tp, _BinaryPredicate>(__value, __pred);
-    while (__first != __last && (__global_last - __first >= __count))
+    while (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count))
     {
         __first = __internal::__brick_find_if(__first, __last, __unary_pred, __is_vector);
 
         // check that all of elements in [first+1, first+count) equal to value
-        if (__first != __last && (__global_last - __first >= __count) &&
+        if (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count) &&
             !__internal::__brick_any_of(__first + 1, __first + __count,
                                         __not_pred<decltype(__unary_pred)>(__unary_pred), __is_vector))
         {
@@ -2244,7 +2244,7 @@ __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _Binary
 template <class _ForwardIterator, class _BinaryPredicate>
 _ForwardIterator
 __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
-                      /* IsVector = */ std::false_type, bool __or_semantic) noexcept
+                      /* IsVector = */ std::false_type, bool) noexcept
 {
     return std::adjacent_find(__first, __last, __pred);
 }
@@ -2689,7 +2689,7 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __firs
 
 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
 bool
-__pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector,
                    /*is_parallel=*/std::false_type) noexcept
 {
@@ -2699,7 +2699,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa
 template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare, class _IsVector>
 bool
 __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
-                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector __is_vector,
+                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector,
                    /*is_parallel=*/std::true_type)
 {
     if (__first2 >= __last2)
@@ -2761,7 +2761,7 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar
                   _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector)
 {
     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
-    typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
 
     struct _SetRange
     {
@@ -2776,7 +2776,7 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar
     const _DifferenceType __n1 = __last1 - __first1;
     const _DifferenceType __n2 = __last2 - __first2;
 
-    __par_backend::__buffer<_T> __buf(__size_func(__n1, __n2));
+    __par_backend::__buffer<_Tp> __buf(__size_func(__n1, __n2));
 
     return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector,
                                          __comp, __size_func, __set_op, &__buf]() {
@@ -3007,11 +3007,11 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw
     if (__n1 + __n2 <= __set_algo_cut_off)
         return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
 
-    typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
     return __internal::__parallel_set_union_op(
         std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
         [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
-           _T* __result,
+           _Tp* __result,
            _Compare __comp) { return std::set_union(__first1, __last1, __first2, __last2, __result, __comp); },
         __is_vector);
 }
@@ -3056,7 +3056,7 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1
                            _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
                            _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::true_type)
 {
-    typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
 
     const auto __n1 = __last1 - __first1;
@@ -3086,7 +3086,7 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1
             std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp,
             [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
             [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
-               _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
+               _ForwardIterator2 __last2, _Tp* __result, _Compare __comp) {
                 return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
             },
             __is_vector);
@@ -3100,7 +3100,7 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1
             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp,
             [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
             [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
-               _ForwardIterator2 __last2, _T* __result, _Compare __comp) {
+               _ForwardIterator2 __last2, _Tp* __result, _Compare __comp) {
                 return std::set_intersection(__first2, __last2, __first1, __last1, __result, __comp);
             },
             __is_vector);
@@ -3151,7 +3151,7 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1,
                          _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
                          _Compare __comp, _IsVector __is_vector, /*is_parallel=*/std::true_type)
 {
-    typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
     typedef typename std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType;
 
     const auto __n1 = __last1 - __first1;
@@ -3195,9 +3195,9 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1,
     if (__n1 + __n2 > __set_algo_cut_off)
         return __internal::__parallel_set_op(
             std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
-            [](_DifferenceType __n, _DifferenceType __m) { return __n; },
+            [](_DifferenceType __n, _DifferenceType) { return __n; },
             [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
-               _ForwardIterator2 __last2, _T* __result,
+               _ForwardIterator2 __last2, _Tp* __result,
                _Compare __comp) { return std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); },
             __is_vector);
 
@@ -3254,11 +3254,11 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1
     if (__n1 + __n2 <= __set_algo_cut_off)
         return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
 
-    typedef typename std::iterator_traits<_OutputIterator>::value_type _T;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
     return __internal::__parallel_set_union_op(
         std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
         [](_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
-           _T* __result, _Compare __comp) {
+           _Tp* __result, _Compare __comp) {
             return std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp);
         },
         __is_vector);
diff --git a/pstl/include/pstl/internal/numeric_impl.h b/pstl/include/pstl/internal/numeric_impl.h
index 1f108aa596f8d..8302afdf9651b 100644
--- a/pstl/include/pstl/internal/numeric_impl.h
+++ b/pstl/include/pstl/internal/numeric_impl.h
@@ -284,7 +284,7 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs
                                         }) -
                          1);
             },
-            [](_Tp __res) {});
+            [](_Tp) {});
         return __result + (__last - __first);
     });
 }
diff --git a/pstl/include/pstl/internal/parallel_backend_serial.h b/pstl/include/pstl/internal/parallel_backend_serial.h
index e4b3a700d1f8d..089f9988bfa5a 100644
--- a/pstl/include/pstl/internal/parallel_backend_serial.h
+++ b/pstl/include/pstl/internal/parallel_backend_serial.h
@@ -110,10 +110,10 @@ template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _Ran
           typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
 void
 __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
-                 _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __out,
+                 _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __outit,
                  _Compare __comp, _LeafMerge __leaf_merge)
 {
-    __leaf_merge(__first1, __last1, __first2, __last2, __out, __comp);
+    __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
 }
 
 template <class _ExecutionPolicy, typename _F1, typename _F2>
diff --git a/pstl/include/pstl/internal/unseq_backend_simd.h b/pstl/include/pstl/internal/unseq_backend_simd.h
index 464d747c41b4a..a05de39f7576f 100644
--- a/pstl/include/pstl/internal/unseq_backend_simd.h
+++ b/pstl/include/pstl/internal/unseq_backend_simd.h
@@ -189,14 +189,14 @@ __simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pr
         }
         if (__found)
         {
-            _DifferenceType __i;
+            _DifferenceType __i2;
             // This will vectorize
-            for (__i = 0; __i < __block_size; ++__i)
+            for (__i2 = 0; __i2 < __block_size; ++__i2)
             {
-                if (__lane[__i])
+                if (__lane[__i2])
                     break;
             }
-            return std::make_pair(__first1 + __i, __first2 + __i);
+            return std::make_pair(__first1 + __i2, __first2 + __i2);
         }
         __first1 += __block_size;
         __first2 += __block_size;
@@ -486,15 +486,15 @@ __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _Un
             __lane[__j] = __binary_op(__lane[__j], __f(last_iteration + __j));
         }
         // combiner
-        for (_Size __i = 0; __i < __block_size; ++__i)
+        for (_Size __j = 0; __j < __block_size; ++__j)
         {
-            __init = __binary_op(__init, __lane[__i]);
+            __init = __binary_op(__init, __lane[__j]);
         }
         // destroyer
         _PSTL_PRAGMA_SIMD
-        for (_Size __i = 0; __i < __block_size; ++__i)
+        for (_Size __j = 0; __j < __block_size; ++__j)
         {
-            __lane[__i].~_Tp();
+            __lane[__j].~_Tp();
         }
     }
     else
diff --git a/pstl/include/pstl/internal/utils.h b/pstl/include/pstl/internal/utils.h
index 1711f29267885..a1a0a7ff61d97 100644
--- a/pstl/include/pstl/internal/utils.h
+++ b/pstl/include/pstl/internal/utils.h
@@ -45,7 +45,7 @@ __invoke_if(std::true_type, _Fp __f)
 
 template <typename _Fp>
 void
-__invoke_if(std::false_type, _Fp __f)
+__invoke_if(std::false_type, _Fp)
 {
 }
 
@@ -58,20 +58,20 @@ __invoke_if_not(std::false_type, _Fp __f)
 
 template <typename _Fp>
 void
-__invoke_if_not(std::true_type, _Fp __f)
+__invoke_if_not(std::true_type, _Fp)
 {
 }
 
 template <typename _F1, typename _F2>
 typename std::result_of<_F1()>::type
-__invoke_if_else(std::true_type, _F1 __f1, _F2 __f2)
+__invoke_if_else(std::true_type, _F1 __f1, _F2)
 {
     return __f1();
 }
 
 template <typename _F1, typename _F2>
 typename std::result_of<_F2()>::type
-__invoke_if_else(std::false_type, _F1 __f1, _F2 __f2)
+__invoke_if_else(std::false_type, _F1, _F2 __f2)
 {
     return __f2();
 }
diff --git a/pstl/test/CMakeLists.txt b/pstl/test/CMakeLists.txt
index dae99e129be15..bc027c30d085f 100644
--- a/pstl/test/CMakeLists.txt
+++ b/pstl/test/CMakeLists.txt
@@ -21,6 +21,7 @@ add_custom_target(check-pstl
 add_library(test_stdlib INTERFACE)
 target_include_directories(test_stdlib INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/support/stdlib")
 target_link_libraries(test_stdlib INTERFACE pstl::ParallelSTL)
+target_compile_options(test_stdlib INTERFACE -Wno-gnu-include-next)
 
 file(GLOB_RECURSE UNIT_TESTS "*.pass.cpp")
 foreach(_file IN LISTS UNIT_TESTS)
@@ -31,6 +32,7 @@ foreach(_file IN LISTS UNIT_TESTS)
 
     add_executable(${_target} EXCLUDE_FROM_ALL "${_file}")
     target_include_directories(${_target} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
+    target_compile_options(${_target} PRIVATE -Wno-unused-local-typedef -Wno-unused-variable)
     target_link_libraries(${_target} PRIVATE test_stdlib)
     set_target_properties(${_target} PROPERTIES CXX_EXTENSIONS NO
                                                 RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
diff --git a/pstl/test/std/algorithms/alg.modifying.operations/generate.pass.cpp b/pstl/test/std/algorithms/alg.modifying.operations/generate.pass.cpp
index 9b423df29330a..185e98c86d2a6 100644
--- a/pstl/test/std/algorithms/alg.modifying.operations/generate.pass.cpp
+++ b/pstl/test/std/algorithms/alg.modifying.operations/generate.pass.cpp
@@ -46,7 +46,8 @@ struct test_generate
         {
             Generator_count<T> g;
             generate(exec, first, last, g);
-            EXPECT_TRUE(std::count(first, last, g.default_value()) == n, "generate wrong result for generate");
+            Size count = std::count(first, last, g.default_value());
+            EXPECT_TRUE(count == n, "generate wrong result for generate");
             std::fill(first, last, T(0));
         }
 
@@ -54,7 +55,8 @@ struct test_generate
             Generator_count<T> g;
             const auto m = n / 2;
             auto last = generate_n(exec, first, m, g);
-            EXPECT_TRUE(std::count(first, last, g.default_value()) == m && last == std::next(first, m),
+            Size count = std::count(first, last, g.default_value());
+            EXPECT_TRUE(count == m && last == std::next(first, m),
                         "generate_n wrong result for generate_n");
             std::fill(first, last, T(0));
         }
diff --git a/pstl/test/std/algorithms/alg.nonmodifying/adjacent_find.pass.cpp b/pstl/test/std/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
index 1bccb53025426..b2ae541649789 100644
--- a/pstl/test/std/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
+++ b/pstl/test/std/algorithms/alg.nonmodifying/adjacent_find.pass.cpp
@@ -39,12 +39,12 @@ test_adjacent_find_by_type()
 {
 
     size_t counts[] = {2, 3, 500};
-    for (int32_t c = 0; c < const_size(counts); ++c)
+    for (size_t c = 0; c < const_size(counts); ++c)
     {
 
-        for (int32_t e = 0; e < (counts[c] >= 64 ? 64 : (counts[c] == 2 ? 1 : 2)); ++e)
+        for (size_t e = 0; e < (counts[c] >= 64 ? 64 : (counts[c] == 2 ? 1 : 2)); ++e)
         {
-            Sequence<T> in(counts[c], [](int32_t v) -> T { return T(v); }); //fill 0...n
+            Sequence<T> in(counts[c], [](size_t v) -> T { return T(v); }); //fill 0...n
             in[e] = in[e + 1] = -1;                                         //make an adjacent pair
 
             auto i = std::adjacent_find(in.cbegin(), in.cend(), std::equal_to<T>());
@@ -56,9 +56,9 @@ test_adjacent_find_by_type()
     }
 
     //special cases: size=0, size=1;
-    for (int32_t expect = 0; expect < 1; ++expect)
+    for (size_t expect = 0; expect < 1; ++expect)
     {
-        Sequence<T> in(expect, [](int32_t v) -> T { return T(v); }); //fill 0...n
+        Sequence<T> in(expect, [](size_t v) -> T { return T(v); }); //fill 0...n
         auto i = std::adjacent_find(in.cbegin(), in.cend(), std::equal_to<T>());
         EXPECT_TRUE(i == in.cbegin() + expect, "std::adjacent_find returned wrong result");
 
diff --git a/pstl/test/std/algorithms/alg.nonmodifying/equal.pass.cpp b/pstl/test/std/algorithms/alg.nonmodifying/equal.pass.cpp
index ea4c09ec18d35..192babb29e088 100644
--- a/pstl/test/std/algorithms/alg.nonmodifying/equal.pass.cpp
+++ b/pstl/test/std/algorithms/alg.nonmodifying/equal.pass.cpp
@@ -20,10 +20,10 @@ using namespace TestUtils;
 
 struct UserType
 {
+    size_t key;
     float32_t f;
     float64_t d;
     int32_t i;
-    size_t key;
 
     bool
     operator()(UserType a, UserType b)
diff --git a/pstl/test/std/algorithms/alg.nonmodifying/find_end.pass.cpp b/pstl/test/std/algorithms/alg.nonmodifying/find_end.pass.cpp
index dfe499c3d1ecf..466314e643d30 100644
--- a/pstl/test/std/algorithms/alg.nonmodifying/find_end.pass.cpp
+++ b/pstl/test/std/algorithms/alg.nonmodifying/find_end.pass.cpp
@@ -68,8 +68,8 @@ test(const std::size_t bits)
 
     const std::size_t max_n1 = 1000;
     const std::size_t max_n2 = (max_n1 * 10) / 8;
-    Sequence<T> in(max_n1, [max_n1, bits](std::size_t k) { return T(2 * HashBits(max_n1, bits - 1) ^ 1); });
-    Sequence<T> sub(max_n2, [max_n1, bits](std::size_t k) { return T(2 * HashBits(max_n1, bits - 1)); });
+    Sequence<T> in(max_n1, [bits](std::size_t k) { return T(2 * HashBits(max_n1, bits - 1) ^ 1); });
+    Sequence<T> sub(max_n2, [bits](std::size_t k) { return T(2 * HashBits(max_n1, bits - 1)); });
     for (std::size_t n1 = 0; n1 <= max_n1; n1 = n1 <= 16 ? n1 + 1 : size_t(3.1415 * n1))
     {
         std::size_t sub_n[] = {0, 1, 3, n1, (n1 * 10) / 8};
diff --git a/pstl/test/std/algorithms/alg.nonmodifying/search_n.pass.cpp b/pstl/test/std/algorithms/alg.nonmodifying/search_n.pass.cpp
index 28fcb36ad80b7..ac8a422698c66 100644
--- a/pstl/test/std/algorithms/alg.nonmodifying/search_n.pass.cpp
+++ b/pstl/test/std/algorithms/alg.nonmodifying/search_n.pass.cpp
@@ -67,7 +67,7 @@ test()
             }
             for (auto r : res)
             {
-                Sequence<T> in(n1, [n1](std::size_t k) { return T(0); });
+                Sequence<T> in(n1, [](std::size_t k) { return T(0); });
                 std::size_t i = r, isub = 0;
                 for (; i < n1 & isub < n2; ++i, ++isub)
                     in[i] = value;
diff --git a/pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp b/pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
index dbe59d02adf84..27e83a1129b81 100644
--- a/pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
+++ b/pstl/test/std/algorithms/alg.sorting/alg.set.operations/set.pass.cpp
@@ -116,7 +116,7 @@ test_set(Compare compare)
         for (std::size_t m = 0; m < n_max; m = m <= 16 ? m + 1 : size_t(2.71828 * m))
         {
             //prepare the input ranges
-            Sequence<T1> in1(n, [n](std::size_t k) { return rand() % (2 * k + 1); });
+            Sequence<T1> in1(n, [](std::size_t k) { return rand() % (2 * k + 1); });
             Sequence<T2> in2(m, [m](std::size_t k) { return (m % 2) * rand() + rand() % (k + 1); });
 
             std::sort(in1.begin(), in1.end(), compare);
diff --git a/pstl/test/std/algorithms/alg.sorting/sort.pass.cpp b/pstl/test/std/algorithms/alg.sorting/sort.pass.cpp
index 1997ec56c8b6b..2cdb24fbcaaa1 100644
--- a/pstl/test/std/algorithms/alg.sorting/sort.pass.cpp
+++ b/pstl/test/std/algorithms/alg.sorting/sort.pass.cpp
@@ -83,7 +83,7 @@ class ParanoidKey
         index = k.index;
         return *this;
     }
-    ParanoidKey(int32_t index, int32_t value, OddTag) : index(index), value(value) {}
+    ParanoidKey(int32_t index, int32_t value, OddTag) : value(value), index(index) {}
     ParanoidKey(ParanoidKey&& k) : value(k.value), index(k.index)
     {
         EXPECT_TRUE(k.isConstructed(), "source for move-construction is dead");
diff --git a/pstl/test/support/utils.h b/pstl/test/support/utils.h
index 6547d931c294f..541dc8c3f7541 100644
--- a/pstl/test/support/utils.h
+++ b/pstl/test/support/utils.h
@@ -30,7 +30,7 @@ typedef float float32_t;
 
 template <class T, std::size_t N>
 constexpr size_t
-const_size(const T (&array)[N]) noexcept
+const_size(const T (&)[N]) noexcept
 {
     return N;
 }
@@ -119,7 +119,7 @@ expect_equal(Iterator1 expected_first, Iterator2 actual_first, Size n, const cha
              const char* message)
 {
     size_t error_count = 0;
-    for (size_t k = 0; k < n && error_count < 10; ++k, ++expected_first, ++actual_first)
+    for (Size k = 0; k < n && error_count < 10; ++k, ++expected_first, ++actual_first)
     {
         if (!(*expected_first == *actual_first))
         {
@@ -752,7 +752,7 @@ struct invoke_if_<std::false_type, std::false_type>
 {
     template <typename Op, typename... Rest>
     void
-    operator()(bool is_allow, Op op, Rest&&... rest)
+    operator()(bool, Op op, Rest&&... rest)
     {
         op(std::forward<Rest>(rest)...);
     }
@@ -787,14 +787,14 @@ struct non_const_wrapper_tagged : non_const_wrapper
 
     template <typename Policy, typename Iterator>
     typename std::enable_if<IsPositiveCondition != is_same_iterator_category<Iterator, IteratorTag>::value, void>::type
-    operator()(Policy&& exec, Iterator iter)
+    operator()(Policy&&, Iterator)
     {
     }
 
     template <typename Policy, typename InputIterator, typename OutputIterator>
     typename std::enable_if<IsPositiveCondition != is_same_iterator_category<OutputIterator, IteratorTag>::value,
                             void>::type
-    operator()(Policy&& exec, InputIterator input_iter, OutputIterator out_iter)
+    operator()(Policy&&, InputIterator, OutputIterator)
     {
     }
 };
@@ -999,7 +999,7 @@ struct iterator_invoker<std::forward_iterator_tag, /*isReverse=*/std::true_type>
 {
     template <typename... Rest>
     void
-    operator()(Rest&&... rest)
+    operator()(Rest&&...)
     {
     }
 };
@@ -1226,7 +1226,7 @@ test_algo_basic_double(F&& f)
 
 template <typename Policy, typename F>
 static void
-invoke_if(Policy&& p, F f)
+invoke_if(Policy&&, F f)
 {
 #if _PSTL_ICC_16_VC14_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN || _PSTL_ICC_17_VC141_TEST_SIMD_LAMBDA_DEBUG_32_BROKEN
     __pstl::__internal::invoke_if_not(__pstl::__internal::allow_unsequenced<Policy>(), f);

From 9bd9a03ad04cc2377e5443c834b1af3f42617a39 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Thu, 30 May 2019 20:48:05 +0000
Subject: [PATCH 0671/1176] Attempt to fix 'mutex.h' not found when building
 mutex_posix.

llvm-svn: 362149
---
 compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp b/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
index e15bca8825711..8bd405e1074cb 100644
--- a/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
+++ b/compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mutex.h"
+#include "gwp_asan/mutex.h"
 
 #include <assert.h>
 #include <pthread.h>

From 0fe645c0866bbae0d1735eeba4ff8b26af7bf9d8 Mon Sep 17 00:00:00 2001
From: Martin Storsjo <martin@martin.st>
Date: Thu, 30 May 2019 20:53:21 +0000
Subject: [PATCH 0672/1176] [InstCombine] Avoid use after free in DenseMap,
 when built with GCC

Previously, this used a statement like this:
    Map[A] = Map[B];

This is equivalent to the following:
    const auto &Src = Map[B];
    auto &Dest = Map[A];
    Dest = Src;

The second statement, "auto &Dest = Map[A];" can insert a new
element into the DenseMap, which can potentially grow and reallocate
the DenseMap's internal storage, which will invalidate the existing
reference to the source. When doing the actual assignment,
the Src reference is dereferenced, accessing memory that was
freed when the DenseMap grew.

This issue hasn't shown up when LLVM was built with Clang, because
the right hand side ended up dereferenced before evaulating the
left hand side. (If the value type is a larger data type, Clang doesn't
do this but behaves like GCC.)

With GCC, a cast to Value* isn't enough to make it dereference the
right hand side reference before invoking operator[] (while that is
enough to make Clang/LLVM do the right thing for larger types), but
storing it in an intermediate variable in a separate statement works.

This fixes PR42065.

Differential Revision: https://reviews.llvm.org/D62624

llvm-svn: 362150
---
 llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b3eb75ea8a8b7..a04e56916f8ef 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -703,7 +703,10 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
       continue;
 
     if (auto *CI = dyn_cast<CastInst>(Val)) {
-      NewInsts[CI] = NewInsts[CI->getOperand(0)];
+      // Don't get rid of the intermediate variable here; the store can grow
+      // the map which will invalidate the reference to the input value.
+      Value *V = NewInsts[CI->getOperand(0)];
+      NewInsts[CI] = V;
       continue;
     }
     if (auto *GEP = dyn_cast<GEPOperator>(Val)) {

From a100042b27f6fce3e314bebca13ce23270eb0705 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 30 May 2019 20:58:28 +0000
Subject: [PATCH 0673/1176] [RuntimeDyld] Update reserveAllocationSpace to
 account for stub padding.

This should fix the buildbot failures caused by r362139.

llvm-svn: 362151
---
 llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index b1d22b89af085..e0642adbd31c7 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -540,7 +540,14 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
         return errorCodeToError(EC);
 
       uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section);
-      uint64_t SectionSize = DataSize + StubBufSize;
+
+      uint64_t PaddingSize = 0;
+      if (Name == ".eh_frame")
+        PaddingSize += 4;
+      if (StubBufSize != 0)
+        PaddingSize += getStubAlignment() - 1;
+
+      uint64_t SectionSize = DataSize + PaddingSize + StubBufSize;
 
       // The .eh_frame section (at least on Linux) needs an extra four bytes
       // padded

From e906f2a370e3db25d717a41c5a5b3e0d9d2aee86 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 30 May 2019 21:03:10 +0000
Subject: [PATCH 0674/1176] [CVP] Generalize willNotOverflow(); NFC

Change argument from WithOverflowInst to BinaryOpIntrinsic, so this
function can also be used for saturating math intrinsics.

llvm-svn: 362152
---
 .../Transforms/Scalar/CorrelatedValuePropagation.cpp   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 4e4715be61aed..eab49b5f8b37e 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -398,14 +398,14 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
   return Changed;
 }
 
-// See if we can prove that the given overflow intrinsic will not overflow.
-static bool willNotOverflow(WithOverflowInst *WO, LazyValueInfo *LVI) {
+// See if we can prove that the given binary op intrinsic will not overflow.
+static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
   ConstantRange LRange = LVI->getConstantRange(
-      WO->getLHS(), WO->getParent(), WO);
+      BO->getLHS(), BO->getParent(), BO);
   ConstantRange RRange = LVI->getConstantRange(
-      WO->getRHS(), WO->getParent(), WO);
+      BO->getRHS(), BO->getParent(), BO);
   ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
-      WO->getBinaryOp(), RRange, WO->getNoWrapKind());
+      BO->getBinaryOp(), RRange, BO->getNoWrapKind());
   return NWRegion.contains(LRange);
 }
 

From 751be7d51a3639f0c167a2a4c0d135e72e6bf03b Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 30 May 2019 21:03:17 +0000
Subject: [PATCH 0675/1176] [CVP] Add tests for non-overflowing saturating
 math; NFC

llvm-svn: 362153
---
 .../CorrelatedValuePropagation/overflows.ll   | 100 +++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index 9edf4789b8e4e..860ebafd0749d 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -19,6 +19,13 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)
 
 declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8)
+
+declare i8 @llvm.uadd.sat.i8(i8, i8)
+declare i8 @llvm.sadd.sat.i8(i8, i8)
+declare i8 @llvm.usub.sat.i8(i8, i8)
+declare i8 @llvm.ssub.sat.i8(i8, i8)
+
 declare void @llvm.trap()
 
 
@@ -719,8 +726,99 @@ cleanup2:                                         ; preds = %while.end
 define { i8, i1 } @signed_mul_constant_folding() {
 ; CHECK-LABEL: @signed_mul_constant_folding(
 ; CHECK-NEXT:    ret { i8, i1 } { i8 2, i1 false }
+;
   %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 2)
   ret { i8, i1 } %mul
 }
 
-declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8)
+define i8 @uadd_sat_no_overflow(i8 %x) {
+; CHECK-LABEL: @uadd_sat_no_overflow(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cmp = icmp ugt i8 %x, 100
+  br i1 %cmp, label %trap, label %cont
+
+trap:
+  call void @llvm.trap()
+  unreachable
+
+cont:
+  %res = call i8 @llvm.uadd.sat.i8(i8 %x, i8 100)
+  ret i8 %res
+}
+
+define i8 @sadd_sat_no_overflow(i8 %x) {
+; CHECK-LABEL: @sadd_sat_no_overflow(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 20)
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cmp = icmp sgt i8 %x, 100
+  br i1 %cmp, label %trap, label %cont
+
+trap:
+  call void @llvm.trap()
+  unreachable
+
+cont:
+  %res = call i8 @llvm.sadd.sat.i8(i8 %x, i8 20)
+  ret i8 %res
+}
+
+define i8 @usub_sat_no_overflow(i8 %x) {
+; CHECK-LABEL: @usub_sat_no_overflow(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[X:%.*]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cmp = icmp ult i8 %x, 100
+  br i1 %cmp, label %trap, label %cont
+
+trap:
+  call void @llvm.trap()
+  unreachable
+
+cont:
+  %res = call i8 @llvm.usub.sat.i8(i8 %x, i8 100)
+  ret i8 %res
+}
+
+define i8 @ssub_sat_no_overflow(i8 %x) {
+; CHECK-LABEL: @ssub_sat_no_overflow(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], -100
+; CHECK-NEXT:    br i1 [[CMP]], label [[TRAP:%.*]], label [[CONT:%.*]]
+; CHECK:       trap:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       cont:
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X]], i8 20)
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %cmp = icmp slt i8 %x, -100
+  br i1 %cmp, label %trap, label %cont
+
+trap:
+  call void @llvm.trap()
+  unreachable
+
+cont:
+  %res = call i8 @llvm.ssub.sat.i8(i8 %x, i8 20)
+  ret i8 %res
+}

From 41dc5526a69d13856b6dc4811e5e7cc89f45b284 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Thu, 30 May 2019 21:03:53 +0000
Subject: [PATCH 0676/1176] [Target] Generalize Process::IsPossibleDynamicValue

llvm-svn: 362154
---
 lldb/source/Target/Process.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 66204da2c59b0..1d1fda18e3dde 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1631,12 +1631,12 @@ bool Process::IsPossibleDynamicValue(ValueObject &in_value) {
     return runtime ? runtime->CouldHaveDynamicValue(in_value) : false;
   }
 
-  LanguageRuntime *cpp_runtime = GetLanguageRuntime(eLanguageTypeC_plus_plus);
-  if (cpp_runtime && cpp_runtime->CouldHaveDynamicValue(in_value))
-    return true;
+  for (LanguageRuntime *runtime : GetLanguageRuntimes()) {
+    if (runtime->CouldHaveDynamicValue(in_value))
+      return true;
+  }
 
-  LanguageRuntime *objc_runtime = GetLanguageRuntime(eLanguageTypeObjC);
-  return objc_runtime ? objc_runtime->CouldHaveDynamicValue(in_value) : false;
+  return false;
 }
 
 void Process::SetDynamicCheckers(DynamicCheckerFunctions *dynamic_checkers) {

From e38a82405b86f6d92da3d6b97f45ecd09bb9b682 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 30 May 2019 21:09:19 +0000
Subject: [PATCH 0677/1176] [pstl] Use OpenMP pragmas with Clang, which
 supports them

Most importantly, Clang doesn't seem to support the non-OpenMP pragmas
that were being used previously.

llvm-svn: 362155
---
 pstl/include/pstl/internal/pstl_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pstl/include/pstl/internal/pstl_config.h b/pstl/include/pstl/internal/pstl_config.h
index a6867cac40cc2..72f31c31f5cc4 100644
--- a/pstl/include/pstl/internal/pstl_config.h
+++ b/pstl/include/pstl/internal/pstl_config.h
@@ -50,7 +50,7 @@
 #endif
 
 // Enable SIMD for compilers that support OpenMP 4.0
-#if (_OPENMP >= 201307) || (__INTEL_COMPILER >= 1600) || (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900)
+#if (_OPENMP >= 201307) || (__INTEL_COMPILER >= 1600) || (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || defined(__clang__)
 #    define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd)
 #    define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd)
 #    define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM))

From 46511d75b5bf3278bff5262181e07e0f977690b8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 21:10:37 +0000
Subject: [PATCH 0678/1176] [DAGCombine] Limit 'hoist add/sub binop w/ constant
 op' to non-opaque consts

I don't have a test case for these, but there is a test case for D62266
where, even after all the constant-folding patches, we still end up
with endless combine loop. Which makes sense, since we don't constant
fold for opaque constants.

llvm-svn: 362156
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 38494f4b24185..773e0281b173b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2470,10 +2470,11 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
-  // Hoist one-use subtraction by constant:  (x - C) + y  ->  (x + y) - C
+  // Hoist one-use subtraction by non-opaque constant:
+  //   (x - C) + y  ->  (x + y) - C
   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
   }
@@ -2986,22 +2987,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
   }
 
-  // Hoist one-use addition by constant:  (x + C) - y  ->  (x - y) + C
+  // Hoist one-use addition by non-opaque constant:
+  //   (x + C) - y  ->  (x - y) + C
   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
   }
   // y - (x + C)  ->  (y - x) - C
   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
-      isConstantOrConstantVector(N1.getOperand(1))) {
+      isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
   }
   // (x - C) - y  ->  (x - y) - C
   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
-      isConstantOrConstantVector(N0.getOperand(1))) {
+      isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
   }

From fc3dfd3e35a637cdbef1bc21c56a31c28e4a5b4c Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Thu, 30 May 2019 21:35:32 +0000
Subject: [PATCH 0679/1176] Fix constexpr __builtin_*_overflow issue when
 unsigned->signed operand.

As reported here https://bugs.llvm.org/show_bug.cgi?id=42000, it was
possible to get the constexpr version of __builtin_*_overflow to give
the wrong answer.

This was because when extending the operands to fit the largest type (so
that the math could be done), the decision on whether to sign/zero
extend the operands was based on the result signedness, not on the
operands signedness.

In the reported case, (unsigned char)255 - (int)100 needed
to have each extended to the int in order to do the math.  However, when
extending the first operand to 'int', we incorrectly sign extended it
instead of zero extending.  Thus, the result didnt fit back into the
unsigned char.

The fix for this was simply to choose zero/sign extension based on the
sign of the operand itself.

Differential Revision: https://reviews.llvm.org/D62665

llvm-svn: 362157
---
 clang/lib/AST/ExprConstant.cpp           | 6 ++----
 clang/test/SemaCXX/builtins-overflow.cpp | 3 +++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index df9b3067b8d43..57d428282cb86 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9454,10 +9454,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
       if (IsSigned && !AllSigned)
         ++MaxBits;
 
-      LHS = APSInt(IsSigned ? LHS.sextOrSelf(MaxBits) : LHS.zextOrSelf(MaxBits),
-                   !IsSigned);
-      RHS = APSInt(IsSigned ? RHS.sextOrSelf(MaxBits) : RHS.zextOrSelf(MaxBits),
-                   !IsSigned);
+      LHS = APSInt(LHS.extOrTrunc(MaxBits), !IsSigned);
+      RHS = APSInt(RHS.extOrTrunc(MaxBits), !IsSigned);
       Result = APSInt(MaxBits, !IsSigned);
     }
 
diff --git a/clang/test/SemaCXX/builtins-overflow.cpp b/clang/test/SemaCXX/builtins-overflow.cpp
index 65733c0c154f5..c84b7da00b543 100644
--- a/clang/test/SemaCXX/builtins-overflow.cpp
+++ b/clang/test/SemaCXX/builtins-overflow.cpp
@@ -2,6 +2,7 @@
 // expected-no-diagnostics
 
 #include <limits.h>
+#include <stdint.h>
 
 int a() {
   const int x = 3;
@@ -50,6 +51,7 @@ constexpr Result<RET> sub(LHS &&lhs, RHS &&rhs) {
 static_assert(sub<unsigned char>(static_cast<char>(0),static_cast<char>(1)) == Result<unsigned char>{true, UCHAR_MAX});
 static_assert(sub<char>(static_cast<unsigned char>(0),static_cast<unsigned char>(1)) == Result<char>{false, -1});
 static_assert(sub<unsigned short>(static_cast<short>(0),static_cast<short>(1)) == Result<unsigned short>{true, USHRT_MAX});
+static_assert(sub<uint8_t>(static_cast<uint8_t>(255),static_cast<int>(100)) == Result<uint8_t>{false, 155});
 
 static_assert(sub<int>(17,22) == Result<int>{false, -5});
 static_assert(sub<int>(INT_MAX - 22, -23) == Result<int>{true, INT_MIN});
@@ -91,3 +93,4 @@ constexpr Result<int> smul(int lhs, int rhs) {
 static_assert(smul(17,22) == Result<int>{false, 374});
 static_assert(smul(INT_MAX / 22, 23) == Result<int>{true, -2049870757});
 static_assert(smul(INT_MIN / 22, -23) == Result<int>{true, -2049870757});
+

From daaecf98c9ac41d2ad1da5bbf53303c2167501b5 Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Thu, 30 May 2019 21:37:25 +0000
Subject: [PATCH 0680/1176] [MIR-Canon] Fixing case where MachineFunction is
 empty.

In cases where the machine function is empty: bail on the RPO traversal.

Differential Revision: https://reviews.llvm.org/D62617

llvm-svn: 362158
---
 llvm/lib/CodeGen/MIRCanonicalizerPass.cpp  |  2 ++
 llvm/test/CodeGen/MIR/AArch64/empty-MF.mir | 11 +++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 llvm/test/CodeGen/MIR/AArch64/empty-MF.mir

diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index e8a6e409fb512..650240e60fef9 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -104,6 +104,8 @@ INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
                     "Rename Register Operands Canonically", false, false)
 
 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+  if (MF.empty())
+    return {};
   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
   std::vector<MachineBasicBlock *> RPOList;
   for (auto MBB : RPOT) {
diff --git a/llvm/test/CodeGen/MIR/AArch64/empty-MF.mir b/llvm/test/CodeGen/MIR/AArch64/empty-MF.mir
new file mode 100644
index 0000000000000..40b1bff06ff5a
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AArch64/empty-MF.mir
@@ -0,0 +1,11 @@
+# RUN: llc -mtriple aarch64-- -run-pass mir-canonicalizer %s -o - | FileCheck %s
+# CHECK: ...
+# CHECK-NEXT: ---
+# CHECK-NEXT: name:            foo
+# CHECK: body:             |
+# CHECK-EMPTY:
+# CHECK-NEXT: ...
+---
+name: foo
+body:             |
+...

From 31f1939848397e52617506e1a5af8035f4405a82 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 30 May 2019 21:41:21 +0000
Subject: [PATCH 0681/1176] [NFC][ARM] Add a test that potentially causes
 endless combine loop with D62266

llvm-svn: 362159
---
 .../CodeGen/ARM/sub-from-const-hoisting.ll    | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/sub-from-const-hoisting.ll

diff --git a/llvm/test/CodeGen/ARM/sub-from-const-hoisting.ll b/llvm/test/CodeGen/ARM/sub-from-const-hoisting.ll
new file mode 100644
index 0000000000000..bcdc507eae6b5
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/sub-from-const-hoisting.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-arm-none-eabi"
+
+; From https://reviews.llvm.org/D62266
+; This used to cause an endless loop in DAGCombine,
+; since we were hoisting two binops with constants, one constant being opaque,
+; which is why they weren't getting constant folded.
+
+@a = dso_local local_unnamed_addr global i32 0, align 4
+@b = dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: minsize nounwind optsize
+define dso_local i32 @c() local_unnamed_addr #0 {
+; CHECK-LABEL: c:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    ldr r0, .LCPI0_0
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    ldr r4, .LCPI0_1
+; CHECK-NEXT:    subs r0, r4, r0
+; CHECK-NEXT:    adds r0, r0, #1
+; CHECK-NEXT:    bl d
+; CHECK-NEXT:    ldr r0, .LCPI0_2
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    cmp r0, r4
+; CHECK-NEXT:    ble .LBB0_2
+; CHECK-NEXT:  @ %bb.1: @ %if.then
+; CHECK-NEXT:    bl e
+; CHECK-NEXT:  .LBB0_2: @ %if.end
+; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long a
+; CHECK-NEXT:  .LCPI0_1:
+; CHECK-NEXT:    .long 1999 @ 0x7cf
+; CHECK-NEXT:  .LCPI0_2:
+; CHECK-NEXT:    .long b
+entry:
+  %0 = load i32, i32* @a, align 4
+  %sub = sub nsw i32 2000, %0
+  %call = tail call i32 bitcast (i32 (...)* @d to i32 (i32)*)(i32 %sub) #2
+  %1 = load i32, i32* @b, align 4
+  %cmp = icmp sgt i32 %1, 1999
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call1 = tail call i32 bitcast (i32 (...)* @e to i32 ()*)() #2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 undef
+}
+
+declare dso_local i32 @d(...) local_unnamed_addr #1
+declare dso_local i32 @e(...) local_unnamed_addr #1

From 6ada11f13466a758597f912156be47ffca2e1408 Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Thu, 30 May 2019 21:45:59 +0000
Subject: [PATCH 0682/1176] [Remarks][NFC] Move the serialization to
 lib/Remarks

Separate the remark serialization to YAML from the LLVM Diagnostics.

This adds a new serialization abstraction: remarks::Serializer. It's
completely independent from lib/IR and it provides an easy way to
replace YAML by providing a new remarks::Serializer.

Differential Revision: https://reviews.llvm.org/D62632

llvm-svn: 362160
---
 clang/lib/CodeGen/CodeGenAction.cpp          |   3 +-
 llvm/include/llvm/IR/DiagnosticInfo.h        |  13 +-
 llvm/include/llvm/IR/RemarkStreamer.h        |  34 ++--
 llvm/include/llvm/Remarks/RemarkSerializer.h |  68 ++++++++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp   |  26 +--
 llvm/lib/IR/DiagnosticInfo.cpp               | 137 ---------------
 llvm/lib/IR/RemarkStreamer.cpp               |  78 ++++++++-
 llvm/lib/LTO/LTO.cpp                         |   5 +-
 llvm/lib/Remarks/CMakeLists.txt              |   1 +
 llvm/lib/Remarks/YAMLRemarkSerializer.cpp    | 166 +++++++++++++++++++
 llvm/tools/llc/llc.cpp                       |   5 +-
 llvm/tools/opt/opt.cpp                       |   3 +-
 12 files changed, 355 insertions(+), 184 deletions(-)
 create mode 100644 llvm/include/llvm/Remarks/RemarkSerializer.h
 create mode 100644 llvm/lib/Remarks/YAMLRemarkSerializer.cpp

diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index 1f61dc37834ec..7671010fc98cf 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -279,7 +279,8 @@ namespace clang {
         }
 
         Ctx.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
-            CodeGenOpts.OptRecordFile, OptRecordFile->os()));
+            CodeGenOpts.OptRecordFile,
+            llvm::make_unique<remarks::YAMLSerializer>(OptRecordFile->os())));
 
         if (!CodeGenOpts.OptRecordPasses.empty())
           if (Error E = Ctx.getRemarkStreamer()->setFilter(
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index ab4c692e18b8b..373663289dbdf 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -465,12 +465,15 @@ class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase {
   virtual bool isEnabled() const = 0;
 
   StringRef getPassName() const { return PassName; }
+  StringRef getRemarkName() const { return RemarkName; }
   std::string getMsg() const;
   Optional<uint64_t> getHotness() const { return Hotness; }
   void setHotness(Optional<uint64_t> H) { Hotness = H; }
 
   bool isVerbose() const { return IsVerbose; }
 
+  ArrayRef<Argument> getArgs() const { return Args; }
+
   static bool classof(const DiagnosticInfo *DI) {
     return (DI->getKind() >= DK_FirstRemark &&
             DI->getKind() <= DK_LastRemark) ||
@@ -500,7 +503,7 @@ class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase {
   const char *PassName;
 
   /// Textual identifier for the remark (single-word, camel-case). Can be used
-  /// by external tools reading the YAML output file for optimization remarks to
+  /// by external tools reading the output file for optimization remarks to
   /// identify the remark.
   StringRef RemarkName;
 
@@ -518,8 +521,6 @@ class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithLocationBase {
   /// the optimization records and not in the remark printed in the compiler
   /// output.
   int FirstExtraArgIndex = -1;
-
-  friend struct yaml::MappingTraits<DiagnosticInfoOptimizationBase *>;
 };
 
 /// Allow the insertion operator to return the actual remark type rather than a
@@ -1001,12 +1002,6 @@ class DiagnosticInfoUnsupported : public DiagnosticInfoWithLocationBase {
   void print(DiagnosticPrinter &DP) const override;
 };
 
-namespace yaml {
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
-  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag);
-};
-} // namespace yaml
-
 } // end namespace llvm
 
 #endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/llvm/include/llvm/IR/RemarkStreamer.h b/llvm/include/llvm/IR/RemarkStreamer.h
index b4672c269da4a..621ebb3436c52 100644
--- a/llvm/include/llvm/IR/RemarkStreamer.h
+++ b/llvm/include/llvm/IR/RemarkStreamer.h
@@ -14,10 +14,9 @@
 #define LLVM_IR_REMARKSTREAMER_H
 
 #include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Remarks/RemarkSerializer.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 #include <vector>
@@ -27,33 +26,36 @@ namespace llvm {
 class RemarkStreamer {
   /// The filename that the remark diagnostics are emitted to.
   const std::string Filename;
-  /// The open raw_ostream that the remark diagnostics are emitted to.
-  raw_ostream &OS;
   /// The regex used to filter remarks based on the passes that emit them.
   Optional<Regex> PassFilter;
+  /// The object used to serialize the remarks to a specific format.
+  std::unique_ptr<remarks::Serializer> Serializer;
 
-  /// The YAML streamer.
-  yaml::Output YAMLOutput;
-
-  /// The string table containing all the unique strings used in the output.
-  /// The table will be serialized in a section to be consumed after the
-  /// compilation.
-  remarks::StringTable StrTab;
+  /// Temporary buffer for converting diagnostics into remark objects. This is
+  /// used for the remark arguments that are converted from a vector of
+  /// diagnostic arguments to a vector of remark arguments.
+  SmallVector<remarks::Argument, 8> TmpArgs;
+  /// Convert diagnostics into remark objects. The result uses \p TmpArgs as a
+  /// temporary buffer for the remark arguments, and relies on all the strings
+  /// to be kept in memory until the next call to `toRemark`.
+  /// The lifetime of the members of the result is bound to the lifetime of both
+  /// the remark streamer and the LLVM diagnostics.
+  remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag);
 
 public:
-  RemarkStreamer(StringRef Filename, raw_ostream& OS);
+  RemarkStreamer(StringRef Filename,
+                 std::unique_ptr<remarks::Serializer> Serializer);
   /// Return the filename that the remark diagnostics are emitted to.
   StringRef getFilename() const { return Filename; }
   /// Return stream that the remark diagnostics are emitted to.
-  raw_ostream &getStream() { return OS; }
+  raw_ostream &getStream() { return Serializer->OS; }
+  /// Return the serializer used for this stream.
+  remarks::Serializer &getSerializer() { return *Serializer; }
   /// Set a pass filter based on a regex \p Filter.
   /// Returns an error if the regex is invalid.
   Error setFilter(StringRef Filter);
   /// Emit a diagnostic through the streamer.
   void emit(const DiagnosticInfoOptimizationBase &Diag);
-  /// The string table used during emission.
-  remarks::StringTable &getStringTable() { return StrTab; }
-  const remarks::StringTable &getStringTable() const { return StrTab; }
 };
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h
new file mode 100644
index 0000000000000..7431ace4d21e5
--- /dev/null
+++ b/llvm/include/llvm/Remarks/RemarkSerializer.h
@@ -0,0 +1,68 @@
+//===-- RemarkSerializer.h - Remark serialization interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for serializing remarks to different formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_SERIALIZER_H
+#define LLVM_REMARKS_REMARK_SERIALIZER_H
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace remarks {
+
+/// This is the base class for a remark serializer.
+/// It includes support for using a string table while emitting.
+struct Serializer {
+  /// The open raw_ostream that the remark diagnostics are emitted to.
+  raw_ostream &OS;
+  /// The string table containing all the unique strings used in the output.
+  /// The table can be serialized to be consumed after the compilation.
+  Optional<StringTable> StrTab;
+
+  Serializer(raw_ostream &OS) : OS(OS), StrTab() {}
+
+  /// This is just an interface.
+  virtual ~Serializer() = default;
+  virtual void emit(const Remark &Remark) = 0;
+};
+
+/// Wether the serializer should use a string table while emitting.
+enum class UseStringTable { No, Yes };
+
+/// Serialize the remarks to YAML. One remark entry looks like this:
+/// --- !<TYPE>
+/// Pass:            <PASSNAME>
+/// Name:            <REMARKNAME>
+/// DebugLoc:        { File: <SOURCEFILENAME>, Line: <SOURCELINE>,
+///                    Column: <SOURCECOLUMN> }
+/// Function:        <FUNCTIONNAME>
+/// Args:
+///   - <KEY>: <VALUE>
+///     DebugLoc:        { File: <FILE>, Line: <LINE>, Column: <COL> }
+/// ...
+struct YAMLSerializer : public Serializer {
+  /// The YAML streamer.
+  yaml::Output YAMLOutput;
+
+  YAMLSerializer(raw_ostream &OS,
+                 UseStringTable UseStringTable = UseStringTable::No);
+
+  /// Emit a remark to the stream.
+  void emit(const Remark &Remark) override;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3317952d05d0a..7adc59d096fee 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Pass.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
@@ -1347,6 +1348,7 @@ void AsmPrinter::emitRemarksSection(Module &M) {
   RemarkStreamer *RS = M.getContext().getRemarkStreamer();
   if (!RS)
     return;
+  const remarks::Serializer &Serializer = RS->getSerializer();
 
   // Switch to the right section: .remarks/__remarks.
   MCSection *RemarksSection =
@@ -1368,23 +1370,27 @@ void AsmPrinter::emitRemarksSection(Module &M) {
   // Note: we need to use the streamer here to emit it in the section. We can't
   // just use the serialize function with a raw_ostream because of the way
   // MCStreamers work.
-  const remarks::StringTable &StrTab = RS->getStringTable();
-  std::vector<StringRef> StrTabStrings = StrTab.serialize();
-  uint64_t StrTabSize = StrTab.SerializedSize;
+  uint64_t StrTabSize =
+      Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
   // Emit the total size of the string table (the size itself excluded):
   // little-endian uint64_t.
   // The total size is located after the version number.
+  // Note: even if no string table is used, emit 0.
   std::array<char, 8> StrTabSizeBuf;
   support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
   OutStreamer->EmitBinaryData(
       StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
-  // Emit a list of null-terminated strings.
-  // Note: the order is important here: the ID used in the remarks corresponds
-  // to the position of the string in the section.
-  for (StringRef Str : StrTabStrings) {
-    OutStreamer->EmitBytes(Str);
-    // Explicitly emit a '\0'.
-    OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+
+  if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
+    std::vector<StringRef> StrTabStrings = StrTab->serialize();
+    // Emit a list of null-terminated strings.
+    // Note: the order is important here: the ID used in the remarks corresponds
+    // to the position of the string in the section.
+    for (StringRef Str : StrTabStrings) {
+      OutStreamer->EmitBytes(Str);
+      // Explicitly emit a '\0'.
+      OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+    }
   }
 
   // Emit the null-terminated absolute path to the remark file.
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 7c387e957244a..4a8e3cca34930 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -43,8 +43,6 @@
 
 using namespace llvm;
 
-cl::opt<bool> UseStringTable("remarks-yaml-string-table", cl::init(false));
-
 int llvm::getNextAvailablePluginDiagnosticKind() {
   static std::atomic<int> PluginKindID(DK_FirstPluginKind);
   return ++PluginKindID;
@@ -374,138 +372,3 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
 
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
-
-template <typename T>
-static void mapRemarkHeader(
-    yaml::IO &io, T PassName, T RemarkName, DiagnosticLocation DL,
-    T FunctionName, Optional<uint64_t> Hotness,
-    SmallVectorImpl<DiagnosticInfoOptimizationBase::Argument> &Args) {
-  io.mapRequired("Pass", PassName);
-  io.mapRequired("Name", RemarkName);
-  if (!io.outputting() || DL.isValid())
-    io.mapOptional("DebugLoc", DL);
-  io.mapRequired("Function", FunctionName);
-  io.mapOptional("Hotness", Hotness);
-  io.mapOptional("Args", Args);
-}
-
-namespace llvm {
-namespace yaml {
-
-void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
-    IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
-  assert(io.outputting() && "input not yet implemented");
-
-  if (io.mapTag("!Passed",
-                (OptDiag->getKind() == DK_OptimizationRemark ||
-                 OptDiag->getKind() == DK_MachineOptimizationRemark)))
-    ;
-  else if (io.mapTag(
-               "!Missed",
-               (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
-                OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
-    ;
-  else if (io.mapTag(
-               "!Analysis",
-               (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
-                OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
-    ;
-  else if (io.mapTag("!AnalysisFPCommute",
-                     OptDiag->getKind() ==
-                         DK_OptimizationRemarkAnalysisFPCommute))
-    ;
-  else if (io.mapTag("!AnalysisAliasing",
-                     OptDiag->getKind() ==
-                         DK_OptimizationRemarkAnalysisAliasing))
-    ;
-  else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
-    ;
-  else
-    llvm_unreachable("Unknown remark type");
-
-  // These are read-only for now.
-  DiagnosticLocation DL = OptDiag->getLocation();
-  StringRef FN =
-      GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
-
-  StringRef PassName(OptDiag->PassName);
-  if (UseStringTable) {
-    remarks::StringTable &StrTab =
-        reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
-    unsigned PassID = StrTab.add(PassName).first;
-    unsigned NameID = StrTab.add(OptDiag->RemarkName).first;
-    unsigned FunctionID = StrTab.add(FN).first;
-    mapRemarkHeader(io, PassID, NameID, DL, FunctionID, OptDiag->Hotness,
-                    OptDiag->Args);
-  } else {
-    mapRemarkHeader(io, PassName, OptDiag->RemarkName, DL, FN, OptDiag->Hotness,
-                    OptDiag->Args);
-  }
-}
-
-template <> struct MappingTraits<DiagnosticLocation> {
-  static void mapping(IO &io, DiagnosticLocation &DL) {
-    assert(io.outputting() && "input not yet implemented");
-
-    StringRef File = DL.getRelativePath();
-    unsigned Line = DL.getLine();
-    unsigned Col = DL.getColumn();
-
-    if (UseStringTable) {
-      remarks::StringTable &StrTab =
-          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
-      unsigned FileID = StrTab.add(File).first;
-      io.mapRequired("File", FileID);
-    } else {
-      io.mapRequired("File", File);
-    }
-
-    io.mapRequired("Line", Line);
-    io.mapRequired("Column", Col);
-  }
-
-  static const bool flow = true;
-};
-
-/// Helper struct for multiline string block literals. Use this type to preserve
-/// newlines in strings.
-struct StringBlockVal {
-  StringRef Value;
-  StringBlockVal(const std::string &Value) : Value(Value) {}
-};
-
-template <> struct BlockScalarTraits<StringBlockVal> {
-  static void output(const StringBlockVal &S, void *Ctx, raw_ostream &OS) {
-    return ScalarTraits<StringRef>::output(S.Value, Ctx, OS);
-  }
-
-  static StringRef input(StringRef Scalar, void *Ctx, StringBlockVal &S) {
-    return ScalarTraits<StringRef>::input(Scalar, Ctx, S.Value);
-  }
-};
-
-// Implement this as a mapping for now to get proper quotation for the value.
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
-  static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
-    assert(io.outputting() && "input not yet implemented");
-
-    if (UseStringTable) {
-      remarks::StringTable &StrTab =
-          reinterpret_cast<RemarkStreamer *>(io.getContext())->getStringTable();
-      auto ValueID = StrTab.add(A.Val).first;
-      io.mapRequired(A.Key.data(), ValueID);
-    } else if (StringRef(A.Val).count('\n') > 1) {
-      StringBlockVal S(A.Val);
-      io.mapRequired(A.Key.data(), S);
-    } else {
-      io.mapRequired(A.Key.data(), A.Val);
-    }
-    if (A.Loc.isValid())
-      io.mapOptional("DebugLoc", A.Loc);
-  }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp
index d2a4ed4adf49c..fe1a128f47353 100644
--- a/llvm/lib/IR/RemarkStreamer.cpp
+++ b/llvm/lib/IR/RemarkStreamer.cpp
@@ -12,12 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/RemarkStreamer.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
 
 using namespace llvm;
 
-RemarkStreamer::RemarkStreamer(StringRef Filename, raw_ostream &OS)
-    : Filename(Filename), OS(OS),
-      YAMLOutput(OS, reinterpret_cast<void *>(this)), StrTab() {
+RemarkStreamer::RemarkStreamer(StringRef Filename,
+                               std::unique_ptr<remarks::Serializer> Serializer)
+    : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) {
   assert(!Filename.empty() && "This needs to be a real filename.");
 }
 
@@ -31,12 +34,75 @@ Error RemarkStreamer::setFilter(StringRef Filter) {
   return Error::success();
 }
 
+/// DiagnosticKind -> remarks::Type
+static remarks::Type toRemarkType(enum DiagnosticKind Kind) {
+  switch (Kind) {
+  default:
+    return remarks::Type::Unknown;
+  case DK_OptimizationRemark:
+  case DK_MachineOptimizationRemark:
+    return remarks::Type::Passed;
+  case DK_OptimizationRemarkMissed:
+  case DK_MachineOptimizationRemarkMissed:
+    return remarks::Type::Missed;
+  case DK_OptimizationRemarkAnalysis:
+  case DK_MachineOptimizationRemarkAnalysis:
+    return remarks::Type::Analysis;
+  case DK_OptimizationRemarkAnalysisFPCommute:
+    return remarks::Type::AnalysisFPCommute;
+  case DK_OptimizationRemarkAnalysisAliasing:
+    return remarks::Type::AnalysisAliasing;
+  case DK_OptimizationFailure:
+    return remarks::Type::Failure;
+  }
+}
+
+/// DiagnosticLocation -> remarks::RemarkLocation.
+static Optional<remarks::RemarkLocation>
+toRemarkLocation(const DiagnosticLocation &DL) {
+  if (!DL.isValid())
+    return None;
+  StringRef File = DL.getRelativePath();
+  unsigned Line = DL.getLine();
+  unsigned Col = DL.getColumn();
+  return remarks::RemarkLocation{File, Line, Col};
+}
+
+/// LLVM Diagnostic -> Remark
+remarks::Remark
+RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) {
+  // Re-use the buffer.
+  TmpArgs.clear();
+
+  remarks::Remark R; // The result.
+  R.RemarkType = toRemarkType(static_cast<DiagnosticKind>(Diag.getKind()));
+  R.PassName = Diag.getPassName();
+  R.RemarkName = Diag.getRemarkName();
+  R.FunctionName =
+      GlobalValue::dropLLVMManglingEscape(Diag.getFunction().getName());
+  R.Loc = toRemarkLocation(Diag.getLocation());
+  R.Hotness = Diag.getHotness();
+
+  // Use TmpArgs to build the list of arguments and re-use the memory allocated
+  // from previous remark conversions.
+  for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) {
+    TmpArgs.emplace_back();
+    TmpArgs.back().Key = Arg.Key;
+    TmpArgs.back().Val = Arg.Val;
+    TmpArgs.back().Loc = toRemarkLocation(Arg.Loc);
+  }
+  R.Args = TmpArgs; // This is valid until the next call to this function.
+
+  return R;
+}
+
 void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) {
   if (Optional<Regex> &Filter = PassFilter)
     if (!Filter->match(Diag.getPassName()))
       return;
 
-  DiagnosticInfoOptimizationBase *DiagPtr =
-      const_cast<DiagnosticInfoOptimizationBase *>(&Diag);
-  YAMLOutput << DiagPtr;
+  // First, convert the diagnostic to a remark.
+  remarks::Remark R = toRemark(Diag);
+  // Then, emit the remark through the serializer.
+  Serializer->emit(R);
 }
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index ce1b0a1e4f613..882b15525c185 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1356,8 +1356,9 @@ lto::setupOptimizationRemarks(LLVMContext &Context,
       llvm::make_unique<ToolOutputFile>(Filename, EC, sys::fs::F_None);
   if (EC)
     return errorCodeToError(EC);
-  Context.setRemarkStreamer(
-      llvm::make_unique<RemarkStreamer>(Filename, DiagnosticFile->os()));
+  Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
+      Filename,
+      llvm::make_unique<remarks::YAMLSerializer>(DiagnosticFile->os())));
 
   if (!LTORemarksPasses.empty())
     if (Error E = Context.getRemarkStreamer()->setFilter(LTORemarksPasses))
diff --git a/llvm/lib/Remarks/CMakeLists.txt b/llvm/lib/Remarks/CMakeLists.txt
index ccbca7ea4f4cd..73383597accd5 100644
--- a/llvm/lib/Remarks/CMakeLists.txt
+++ b/llvm/lib/Remarks/CMakeLists.txt
@@ -3,4 +3,5 @@ add_llvm_library(LLVMRemarks
   RemarkParser.cpp
   RemarkStringTable.cpp
   YAMLRemarkParser.cpp
+  YAMLRemarkSerializer.cpp
 )
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
new file mode 100644
index 0000000000000..47fef5dd2aee9
--- /dev/null
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -0,0 +1,166 @@
+//===- YAMLRemarkSerializer.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the YAML remark serializer using
+// LLVM's YAMLTraits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+cl::opt<bool> RemarksYAMLStringTable("remarks-yaml-string-table",
+                                     cl::init(false));
+
+// Use the same keys whether we use a string table or not (respectively, T is an
+// unsigned or a StringRef).
+template <typename T>
+static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName,
+                            Optional<RemarkLocation> RL, T FunctionName,
+                            Optional<uint64_t> Hotness,
+                            ArrayRef<Argument> Args) {
+  io.mapRequired("Pass", PassName);
+  io.mapRequired("Name", RemarkName);
+  io.mapOptional("DebugLoc", RL);
+  io.mapRequired("Function", FunctionName);
+  io.mapOptional("Hotness", Hotness);
+  io.mapOptional("Args", Args);
+}
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<remarks::Remark *> {
+  static void mapping(IO &io, remarks::Remark *&Remark) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (io.mapTag("!Passed", (Remark->RemarkType == Type::Passed)))
+      ;
+    else if (io.mapTag("!Missed", (Remark->RemarkType == Type::Missed)))
+      ;
+    else if (io.mapTag("!Analysis", (Remark->RemarkType == Type::Analysis)))
+      ;
+    else if (io.mapTag("!AnalysisFPCommute",
+                       (Remark->RemarkType == Type::AnalysisFPCommute)))
+      ;
+    else if (io.mapTag("!AnalysisAliasing",
+                       (Remark->RemarkType == Type::AnalysisAliasing)))
+      ;
+    else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure)))
+      ;
+    else
+      llvm_unreachable("Unknown remark type");
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      unsigned PassID = StrTab->add(Remark->PassName).first;
+      unsigned NameID = StrTab->add(Remark->RemarkName).first;
+      unsigned FunctionID = StrTab->add(Remark->FunctionName).first;
+      mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID,
+                      Remark->Hotness, Remark->Args);
+    } else {
+      mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc,
+                      Remark->FunctionName, Remark->Hotness, Remark->Args);
+    }
+  }
+};
+
+template <> struct MappingTraits<RemarkLocation> {
+  static void mapping(IO &io, RemarkLocation &RL) {
+    assert(io.outputting() && "input not yet implemented");
+
+    StringRef File = RL.SourceFilePath;
+    unsigned Line = RL.SourceLine;
+    unsigned Col = RL.SourceColumn;
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      unsigned FileID = StrTab->add(File).first;
+      io.mapRequired("File", FileID);
+    } else {
+      io.mapRequired("File", File);
+    }
+
+    io.mapRequired("Line", Line);
+    io.mapRequired("Column", Col);
+  }
+
+  static const bool flow = true;
+};
+
+/// Helper struct for multiline string block literals. Use this type to preserve
+/// newlines in strings.
+struct StringBlockVal {
+  StringRef Value;
+  StringBlockVal(const std::string &Value) : Value(Value) {}
+};
+
+template <> struct BlockScalarTraits<StringBlockVal> {
+  static void output(const StringBlockVal &S, void *Ctx, raw_ostream &OS) {
+    return ScalarTraits<StringRef>::output(S.Value, Ctx, OS);
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, StringBlockVal &S) {
+    return ScalarTraits<StringRef>::input(Scalar, Ctx, S.Value);
+  }
+};
+
+/// ArrayRef is not really compatible with the YAMLTraits. Everything should be
+/// immutable in an ArrayRef, while the SequenceTraits expect a mutable version
+/// for inputting, but we're only using the outputting capabilities here.
+/// This is a hack, but still nicer than having to manually call the YAMLIO
+/// internal methods.
+/// Keep this in this file so that it doesn't get misused from YAMLTraits.h.
+template <typename T> struct SequenceTraits<ArrayRef<T>> {
+  static size_t size(IO &io, ArrayRef<T> &seq) { return seq.size(); }
+  static Argument &element(IO &io, ArrayRef<T> &seq, size_t index) {
+    assert(io.outputting() && "input not yet implemented");
+    // The assert above should make this "safer" to satisfy the YAMLTraits.
+    return const_cast<T &>(seq[index]);
+  }
+};
+
+/// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<Argument> {
+  static void mapping(IO &io, Argument &A) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (Optional<StringTable> &StrTab =
+            reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+      auto ValueID = StrTab->add(A.Val).first;
+      io.mapRequired(A.Key.data(), ValueID);
+    } else if (StringRef(A.Val).count('\n') > 1) {
+      StringBlockVal S(A.Val);
+      io.mapRequired(A.Key.data(), S);
+    } else {
+      io.mapRequired(A.Key.data(), A.Val);
+    }
+    io.mapOptional("DebugLoc", A.Loc);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
+
+YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable)
+    : Serializer(OS), YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+  if (UseStringTable == UseStringTable::Yes || RemarksYAMLStringTable)
+    StrTab.emplace();
+}
+
+void YAMLSerializer::emit(const Remark &Remark) {
+  // Again, YAMLTraits expect a non-const object for inputting, but we're not
+  // using that here.
+  auto R = const_cast<remarks::Remark *>(&Remark);
+  YAMLOutput << R;
+}
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index be103845e9779..66c5cd0ad43ad 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -340,8 +340,9 @@ int main(int argc, char **argv) {
       WithColor::error(errs(), argv[0]) << EC.message() << '\n';
       return 1;
     }
-    Context.setRemarkStreamer(
-        llvm::make_unique<RemarkStreamer>(RemarksFilename, YamlFile->os()));
+    Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
+        RemarksFilename,
+        llvm::make_unique<remarks::YAMLSerializer>(YamlFile->os())));
 
     if (!RemarksPasses.empty())
       if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) {
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 2119cc0183f21..7053c2deb3771 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -563,7 +563,8 @@ int main(int argc, char **argv) {
       return 1;
     }
     Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
-        RemarksFilename, OptRemarkFile->os()));
+        RemarksFilename,
+        llvm::make_unique<remarks::YAMLSerializer>(OptRemarkFile->os())));
 
     if (!RemarksPasses.empty())
       if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses)) {

From 86e73f51d77df26581ccb1b36447d0db9d5ac647 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 30 May 2019 21:57:23 +0000
Subject: [PATCH 0683/1176] [WebAssembly] Improve feature validation error
 messages

Summary:
Add the names of the input files responsible for each error to the
messages.

Reviewers: sbc100, azakai

Subscribers: dschuff, jgravelle-google, aheejin, sunfish, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62704

llvm-svn: 362162
---
 lld/test/wasm/shared-memory-no-atomics.yaml |  2 +-
 lld/test/wasm/target-feature-required.yaml  |  6 +--
 lld/test/wasm/target-feature-used.yaml      |  4 +-
 lld/wasm/Writer.cpp                         | 52 +++++++++++++--------
 4 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/lld/test/wasm/shared-memory-no-atomics.yaml b/lld/test/wasm/shared-memory-no-atomics.yaml
index e4c7b76c2d957..1588f2f0029e1 100644
--- a/lld/test/wasm/shared-memory-no-atomics.yaml
+++ b/lld/test/wasm/shared-memory-no-atomics.yaml
@@ -57,4 +57,4 @@ Sections:
 # NO-SHARED-NEXT:     - Initial:         0x00000002
 # NO-SHARED-NOT:        Maximum:
 
-# SHARED: 'atomics' feature is disallowed, so --shared-memory must not be used{{$}}
+# SHARED: 'atomics' feature is disallowed by {{.*}}shared-memory-no-atomics.yaml.tmp1.o, so --shared-memory must not be used{{$}}
diff --git a/lld/test/wasm/target-feature-required.yaml b/lld/test/wasm/target-feature-required.yaml
index ce67efc9cbd98..ce9857a7e89ad 100644
--- a/lld/test/wasm/target-feature-required.yaml
+++ b/lld/test/wasm/target-feature-required.yaml
@@ -51,7 +51,7 @@ Sections:
 # SPECIFIED-NEXT:         Name:            foo
 # SPECIFIED-NEXT: ...
 
-# UNSPECIFIED: Target feature 'foo' is not allowed.{{$}}
+# UNSPECIFIED: Target feature 'foo' used by {{.*}}target-feature-required.yaml.tmp1.o is not allowed.{{$}}
 
 # UNSPECIFIED-NOCHECK:        - Type:            CUSTOM
 # UNSPECIFIED-NOCHECK-NEXT:     Name:            target_features
@@ -71,7 +71,7 @@ Sections:
 # REQUIRED-NEXT:         Name:            foo
 # REQUIRED-NEXT: ...
 
-# DISALLOWED: Target feature 'foo' is disallowed. Use --no-check-features to suppress.{{$}}
+# DISALLOWED: Target feature 'foo' used in {{.*}}target-feature-required.yaml.tmp1.o is disallowed by {{.*}}target-feature-required.yaml.tmp.disallowed.o. Use --no-check-features to suppress.{{$}}
 
 # DISALLOWED-NOCHECK:        - Type:            CUSTOM
 # DISALLOWED-NOCHECK-NEXT:     Name:            target_features
@@ -80,7 +80,7 @@ Sections:
 # DISALLOWED-NOCHECK-NEXT:         Name:            foo
 # DISALLOWED-NOCHECK-NEXT: ...
 
-# NONE: Missing required target feature 'foo'. Use --no-check-features to suppress.{{$}}
+# NONE: Missing target feature 'foo' in {{.*}}target-feature-required.yaml.tmp.none.o, required by {{.*}}target-feature-required.yaml.tmp1.o. Use --no-check-features to suppress.{{$}}
 
 # NONE-NOCHECK:        - Type:            CUSTOM
 # NONE-NOCHECK-NEXT:     Name:            target_features
diff --git a/lld/test/wasm/target-feature-used.yaml b/lld/test/wasm/target-feature-used.yaml
index 7301a13ea6f1c..c9c19c7494c0a 100644
--- a/lld/test/wasm/target-feature-used.yaml
+++ b/lld/test/wasm/target-feature-used.yaml
@@ -53,7 +53,7 @@ Sections:
 # SPECIFIED-NEXT:         Name:            foo
 # SPECIFIED-NEXT: ...
 
-# UNSPECIFIED: Target feature 'foo' is not allowed.{{$}}
+# UNSPECIFIED: Target feature 'foo' used by {{.*}}target-feature-used.yaml.tmp1.o is not allowed.{{$}}
 
 # UNSPECIFIED-NOCHECK:        - Type:            CUSTOM
 # UNSPECIFIED-NOCHECK-NEXT:     Name:            target_features
@@ -80,7 +80,7 @@ Sections:
 # REQUIRED-NEXT:         Name:            foo
 # REQUIRED-NEXT: ...
 
-# DISALLOWED: Target feature 'foo' is disallowed. Use --no-check-features to suppress.{{$}}
+# DISALLOWED: Target feature 'foo' used in {{.*}}target-feature-used.yaml.tmp1.o is disallowed by {{.*}}target-feature-used.yaml.tmp.disallowed.o. Use --no-check-features to suppress.{{$}}
 
 # DISALLOWED-NOCHECK:        - Type:            CUSTOM
 # DISALLOWED-NOCHECK-NEXT:     Name:            target_features
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 5df364fb7706d..b7f5afc2dcbef 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -348,9 +348,9 @@ void Writer::finalizeSections() {
 }
 
 void Writer::populateTargetFeatures() {
-  SmallSet<std::string, 8> Used;
-  SmallSet<std::string, 8> Required;
-  SmallSet<std::string, 8> Disallowed;
+  StringMap<std::string> Used;
+  StringMap<std::string> Required;
+  StringMap<std::string> Disallowed;
 
   // Only infer used features if user did not specify features
   bool InferFeatures = !Config->Features.hasValue();
@@ -365,17 +365,18 @@ void Writer::populateTargetFeatures() {
 
   // Find the sets of used, required, and disallowed features
   for (ObjFile *File : Symtab->ObjectFiles) {
+    StringRef FileName(File->getName());
     for (auto &Feature : File->getWasmObj()->getTargetFeatures()) {
       switch (Feature.Prefix) {
       case WASM_FEATURE_PREFIX_USED:
-        Used.insert(Feature.Name);
+        Used.insert({Feature.Name, FileName});
         break;
       case WASM_FEATURE_PREFIX_REQUIRED:
-        Used.insert(Feature.Name);
-        Required.insert(Feature.Name);
+        Used.insert({Feature.Name, FileName});
+        Required.insert({Feature.Name, FileName});
         break;
       case WASM_FEATURE_PREFIX_DISALLOWED:
-        Disallowed.insert(Feature.Name);
+        Disallowed.insert({Feature.Name, FileName});
         break;
       default:
         error("Unrecognized feature policy prefix " +
@@ -385,41 +386,52 @@ void Writer::populateTargetFeatures() {
   }
 
   if (InferFeatures)
-    Out.TargetFeaturesSec->Features.insert(Used.begin(), Used.end());
-
-  if (Out.TargetFeaturesSec->Features.count("atomics") && !Config->SharedMemory)
-    error("'atomics' feature is used, so --shared-memory must be used");
+    Out.TargetFeaturesSec->Features.insert(Used.keys().begin(),
+                                           Used.keys().end());
+
+  if (Out.TargetFeaturesSec->Features.count("atomics") &&
+      !Config->SharedMemory) {
+    if (InferFeatures)
+      error(Twine("'atomics' feature is used by ") + Used["atomics"] +
+            ", so --shared-memory must be used");
+    else
+      error("'atomics' feature is used, so --shared-memory must be used");
+  }
 
   if (!Config->CheckFeatures)
     return;
 
   if (Disallowed.count("atomics") && Config->SharedMemory)
-    error(
-        "'atomics' feature is disallowed, so --shared-memory must not be used");
+    error("'atomics' feature is disallowed by " + Disallowed["atomics"] +
+          ", so --shared-memory must not be used");
 
   // Validate that used features are allowed in output
   if (!InferFeatures) {
-    for (auto &Feature : Used) {
+    for (auto &Feature : Used.keys()) {
       if (!Out.TargetFeaturesSec->Features.count(Feature))
-        error(Twine("Target feature '") + Feature + "' is not allowed.");
+        error(Twine("Target feature '") + Feature + "' used by " +
+              Used[Feature] + " is not allowed.");
     }
   }
 
   // Validate the required and disallowed constraints for each file
   for (ObjFile *File : Symtab->ObjectFiles) {
+    StringRef FileName(File->getName());
     SmallSet<std::string, 8> ObjectFeatures;
     for (auto &Feature : File->getWasmObj()->getTargetFeatures()) {
       if (Feature.Prefix == WASM_FEATURE_PREFIX_DISALLOWED)
         continue;
       ObjectFeatures.insert(Feature.Name);
       if (Disallowed.count(Feature.Name))
-        error(Twine("Target feature '") + Feature.Name +
-              "' is disallowed. Use --no-check-features to suppress.");
+        error(Twine("Target feature '") + Feature.Name + "' used in " +
+              FileName + " is disallowed by " + Disallowed[Feature.Name] +
+              ". Use --no-check-features to suppress.");
     }
-    for (auto &Feature : Required) {
+    for (auto &Feature : Required.keys()) {
       if (!ObjectFeatures.count(Feature))
-        error(Twine("Missing required target feature '") + Feature +
-              "'. Use --no-check-features to suppress.");
+        error(Twine("Missing target feature '") + Feature + "' in " + FileName +
+              ", required by " + Required[Feature] +
+              ". Use --no-check-features to suppress.");
     }
   }
 }

From 365e59248056e8ba24848a5857117b01a066ef68 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Thu, 30 May 2019 21:58:47 +0000
Subject: [PATCH 0684/1176] Attempt to fix test failure for armv8.

Looks like armv8 can't handle a thousand threads, which GWP-ASan
requests when running a synchronised mutex test. Limiting this to 100 to
attempt to fix the build issue.

llvm-svn: 362163
---
 compiler-rt/lib/gwp_asan/tests/mutex_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp b/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
index 36f7e1d2323d3..5bc53b902185f 100644
--- a/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
+++ b/compiler-rt/lib/gwp_asan/tests/mutex_test.cpp
@@ -84,6 +84,6 @@ static void runSynchronisedTest(unsigned NumThreads, unsigned CounterMax) {
 }
 
 TEST(GwpAsanMutexTest, SynchronisedCounterTest) {
-  runSynchronisedTest(4, 100000);
-  runSynchronisedTest(1000, 1000000);
+  runSynchronisedTest(4, 1000000);
+  runSynchronisedTest(100, 1000000);
 }

From e5a7a858f56ce71d0bacf87cb8ef5e48666f34e1 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Thu, 30 May 2019 22:00:18 +0000
Subject: [PATCH 0685/1176] [Target] Generalize language-specific behavior in
 ThreadPlanStepThrough

Summary:
When creating a ThreadPlan to step through a trampoline, we ask the
ObjC language runtime and the CPP language runtime to come up with such a thread
plan if the dynamic loader fails to give us one. I don't see why this behavior
can't be language agnostic.

Differential Revision: https://reviews.llvm.org/D61921

llvm-svn: 362164
---
 lldb/include/lldb/Target/CPPLanguageRuntime.h |  2 +-
 lldb/include/lldb/Target/LanguageRuntime.h    |  3 +++
 .../include/lldb/Target/ObjCLanguageRuntime.h |  3 ---
 lldb/source/Target/ThreadPlanStepThrough.cpp  | 26 +++++++------------
 4 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/lldb/include/lldb/Target/CPPLanguageRuntime.h b/lldb/include/lldb/Target/CPPLanguageRuntime.h
index a144a4884f453..8de885372862b 100644
--- a/lldb/include/lldb/Target/CPPLanguageRuntime.h
+++ b/lldb/include/lldb/Target/CPPLanguageRuntime.h
@@ -61,7 +61,7 @@ class CPPLanguageRuntime : public LanguageRuntime {
   /// \return
   ///      A ThreadPlan Shared pointer
   lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread,
-                                                  bool stop_others);
+                                                  bool stop_others) override;
 
   bool IsRuntimeSupportValue(ValueObject &valobj) override;
 protected:
diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h
index 43fcf52eb9573..105a0040f004a 100644
--- a/lldb/include/lldb/Target/LanguageRuntime.h
+++ b/lldb/include/lldb/Target/LanguageRuntime.h
@@ -143,6 +143,9 @@ class LanguageRuntime : public PluginInterface {
     return false;
   }
 
+  virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread,
+                                                          bool stop_others) = 0;
+
   /// Identify whether a value is a language implementation detaul
   /// that should be hidden from the user interface by default.
   virtual bool IsRuntimeSupportValue(ValueObject &valobj) { return false; }
diff --git a/lldb/include/lldb/Target/ObjCLanguageRuntime.h b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
index e8fd897f3bbfd..c31d7255d6d7d 100644
--- a/lldb/include/lldb/Target/ObjCLanguageRuntime.h
+++ b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
@@ -216,9 +216,6 @@ class ObjCLanguageRuntime : public LanguageRuntime {
 
   virtual bool HasReadObjCLibrary() = 0;
 
-  virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread,
-                                                          bool stop_others) = 0;
-
   lldb::addr_t LookupInMethodCache(lldb::addr_t class_addr, lldb::addr_t sel);
 
   void AddToMethodCache(lldb::addr_t class_addr, lldb::addr_t sel,
diff --git a/lldb/source/Target/ThreadPlanStepThrough.cpp b/lldb/source/Target/ThreadPlanStepThrough.cpp
index d08d77965b731..e46eba00184e1 100644
--- a/lldb/source/Target/ThreadPlanStepThrough.cpp
+++ b/lldb/source/Target/ThreadPlanStepThrough.cpp
@@ -8,9 +8,8 @@
 
 #include "lldb/Target/ThreadPlanStepThrough.h"
 #include "lldb/Breakpoint/Breakpoint.h"
-#include "lldb/Target/CPPLanguageRuntime.h"
 #include "lldb/Target/DynamicLoader.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
+#include "lldb/Target/LanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/Target.h"
@@ -85,22 +84,17 @@ void ThreadPlanStepThrough::LookForPlanToStepThroughFromCurrentPC() {
     m_sub_plan_sp =
         loader->GetStepThroughTrampolinePlan(m_thread, m_stop_others);
 
-  // If that didn't come up with anything, try the ObjC runtime plugin:
-  if (!m_sub_plan_sp.get()) {
-    ObjCLanguageRuntime *objc_runtime =
-        m_thread.GetProcess()->GetObjCLanguageRuntime();
-    if (objc_runtime)
+  // If the DynamicLoader was unable to provide us with a ThreadPlan, then we
+  // try the LanguageRuntimes.
+  if (!m_sub_plan_sp) {
+    for (LanguageRuntime *runtime :
+         m_thread.GetProcess()->GetLanguageRuntimes()) {
       m_sub_plan_sp =
-          objc_runtime->GetStepThroughTrampolinePlan(m_thread, m_stop_others);
-
-    CPPLanguageRuntime *cpp_runtime =
-        m_thread.GetProcess()->GetCPPLanguageRuntime();
+          runtime->GetStepThroughTrampolinePlan(m_thread, m_stop_others);
 
-    // If the ObjC runtime did not provide us with a step though plan then if we
-    // have it check the C++ runtime for a step though plan.
-    if (!m_sub_plan_sp.get() && cpp_runtime)
-      m_sub_plan_sp =
-          cpp_runtime->GetStepThroughTrampolinePlan(m_thread, m_stop_others);
+      if (m_sub_plan_sp)
+        break;
+    }
   }
 
   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_STEP));

From 48998d10e089bb20ff1e47fcf573b58602b285b2 Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Thu, 30 May 2019 22:01:56 +0000
Subject: [PATCH 0686/1176] [Remarks] Fix usage of enum class

Breaks the build on some compilers:

http://lab.llvm.org:8011/builders/clang-cmake-x86_64-avx2-linux/builds/9720/steps/build%20stage%201/logs/stdio

llvm-svn: 362165
---
 llvm/include/llvm/Remarks/RemarkSerializer.h | 2 +-
 llvm/lib/Remarks/YAMLRemarkSerializer.cpp    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Remarks/RemarkSerializer.h b/llvm/include/llvm/Remarks/RemarkSerializer.h
index 7431ace4d21e5..def5c2e166204 100644
--- a/llvm/include/llvm/Remarks/RemarkSerializer.h
+++ b/llvm/include/llvm/Remarks/RemarkSerializer.h
@@ -56,7 +56,7 @@ struct YAMLSerializer : public Serializer {
   yaml::Output YAMLOutput;
 
   YAMLSerializer(raw_ostream &OS,
-                 UseStringTable UseStringTable = UseStringTable::No);
+                 UseStringTable UseStringTable = remarks::UseStringTable::No);
 
   /// Emit a remark to the stream.
   void emit(const Remark &Remark) override;
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
index 47fef5dd2aee9..14de44fbe32c5 100644
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -154,7 +154,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
 
 YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable)
     : Serializer(OS), YAMLOutput(OS, reinterpret_cast<void *>(this)) {
-  if (UseStringTable == UseStringTable::Yes || RemarksYAMLStringTable)
+  if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable)
     StrTab.emplace();
 }
 

From dd3a9caf477a01375b9a30416aed834cf85ed3ae Mon Sep 17 00:00:00 2001
From: Amy Huang <akhuang@google.com>
Date: Thu, 30 May 2019 22:04:11 +0000
Subject: [PATCH 0687/1176] Add enums as global variables in the IR metadata.

Summary:
Keeps track of the enums that were used by saving them as DIGlobalVariables,
since CodeView emits debug info for global constants.

Reviewers: rnk

Subscribers: aprantl, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62635

llvm-svn: 362166
---
 clang/lib/CodeGen/CGDebugInfo.cpp            |  12 +-
 clang/test/CodeGenCXX/debug-info-enum.cpp    |  14 ++
 llvm/test/DebugInfo/COFF/global-constants.ll | 148 ++++++++++++-------
 3 files changed, 113 insertions(+), 61 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index b79169f0a060c..a297025547ec8 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -4240,7 +4240,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
 
 llvm::DIDerivedType *
 CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
-  if (!D->isStaticDataMember())
+  if (!D || !D->isStaticDataMember())
     return nullptr;
 
   auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
@@ -4353,12 +4353,14 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
   StringRef Name = VD->getName();
   llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
 
-  // Do not use global variables for enums.
+  // Do not use global variables for enums, unless for CodeView.
   if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) {
     const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
     assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
     (void)ED;
-    return;
+
+    if (!CGM.getCodeGenOpts().EmitCodeView)
+      return;
   }
 
   llvm::DIScope *DContext = nullptr;
@@ -4369,8 +4371,8 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
 
   // Emit definition for static members in CodeView.
   VD = cast<ValueDecl>(VD->getCanonicalDecl());
-  auto *VarD = cast<VarDecl>(VD);
-  if (VarD->isStaticDataMember()) {
+  auto *VarD = dyn_cast<VarDecl>(VD);
+  if (VarD && VarD->isStaticDataMember()) {
     auto *RD = cast<RecordDecl>(VarD->getDeclContext());
     getDeclContextDescriptor(VarD);
     // Ensure that the type is retained even though it's otherwise unreferenced.
diff --git a/clang/test/CodeGenCXX/debug-info-enum.cpp b/clang/test/CodeGenCXX/debug-info-enum.cpp
index 447edba446dc2..088b94dd080cc 100644
--- a/clang/test/CodeGenCXX/debug-info-enum.cpp
+++ b/clang/test/CodeGenCXX/debug-info-enum.cpp
@@ -1,9 +1,15 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -gcodeview -debug-info-kind=limited %s -o - | FileCheck --check-prefix MSVC %s
 
 // CHECK: !DICompileUnit(
 // CHECK-SAME:           enums: [[ENUMS:![0-9]*]]
 // CHECK: [[ENUMS]] = !{[[E1:![0-9]*]], [[E2:![0-9]*]], [[E3:![0-9]*]]}
 
+// In MSVC check that used enum values are emitted as globals.
+// MSVC: !DICompileUnit(
+// MSVC-SAME:           globals: [[GLOBALS:![0-9]*]]
+// MSVC: [[GLOBALS]] = !{[[G1:![0-9]*]], [[G2:![0-9]*]]}
+
 namespace test1 {
 // CHECK: [[E1]] = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e"
 // CHECK-SAME:                      scope: [[TEST1:![0-9]*]]
@@ -12,6 +18,10 @@ namespace test1 {
 // CHECK: [[TEST1]] = !DINamespace(name: "test1"
 // CHECK: [[TEST1_ENUMS]] = !{[[TEST1_E:![0-9]*]]}
 // CHECK: [[TEST1_E]] = !DIEnumerator(name: "E", value: 0, isUnsigned: true)
+
+// MSVC: [[G1]] = !DIGlobalVariableExpression(var: [[VAR1:![0-9]*]],
+// MSVC-SAME:                                 expr: !DIExpression(DW_OP_constu, 0
+// MSVC: [[VAR1]] = distinct !DIGlobalVariable(name: "E"
 enum e { E };
 void foo() {
   int v = E;
@@ -25,6 +35,10 @@ namespace test2 {
 // CHECK-SAME:                      elements: [[TEST1_ENUMS]]
 // CHECK-SAME:                      identifier: "_ZTSN5test21eE"
 // CHECK: [[TEST2]] = !DINamespace(name: "test2"
+
+// MSVC: [[G2]] = !DIGlobalVariableExpression(var: [[VAR2:![0-9]*]],
+// MSVC-SAME:                                 expr: !DIExpression(DW_OP_constu, 0
+// MSVC: [[VAR2]] = distinct !DIGlobalVariable(name: "E"
 enum e { E };
 bool func(int i) {
   return i == E;
diff --git a/llvm/test/DebugInfo/COFF/global-constants.ll b/llvm/test/DebugInfo/COFF/global-constants.ll
index 502870da37605..163a2f477ebe4 100644
--- a/llvm/test/DebugInfo/COFF/global-constants.ll
+++ b/llvm/test/DebugInfo/COFF/global-constants.ll
@@ -2,26 +2,41 @@
 ; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ
 
 ; C++ source to regenerate:
-; const int Test1 = 1;
-; struct Foo { static const int Test2 = 2; };
-; int main() {
-;   return Test1 + Foo::Test2;
+; const float TestConst1 = 3.14;
+; struct S {
+;   static const int TestConst2 = -10;
+; }
+; enum TestEnum : int {
+;    ENUM_A = 2147000000,
+;    ENUM_B = -2147000000,
+; };
+; void useConst(int);
+; void foo() {
+;   useConst(TestConst1);
+;   useConst(S::TestConst2);
+;   useConst(ENUM_B);
 ; }
 ; $ clang t.cpp -S -emit-llvm -g -gcodeview -o t.ll
 
-; ASM-LABEL:  .long 241             # Symbol subsection for globals
-
-; ASM:        .short {{.*-.*}}      # Record length
-; ASM:        .short 4359           # Record kind: S_CONSTANT
-; ASM-NEXT:   .long 4099            # Type
-; ASM-NEXT:   .byte 0x01, 0x00      # Value
-; ASM-NEXT:   .asciz "Test1"        # Name
-
-; ASM:        .short {{.*-.*}}      # Record length
-; ASM:        .short 4359           # Record kind: S_CONSTANT
-; ASM:        .long 4099            # Type
-; ASM:        .byte 0x02, 0x00      # Value
-; ASM:        .asciz "Foo::Test2"   # Name
+; ASM-LABEL:  .long 241                     # Symbol subsection for globals
+; ASM:        .short {{.*-.*}}              # Record length
+; ASM:        .short 4359                   # Record kind: S_CONSTANT
+; ASM-NEXT:   .long 4099                    # Type
+; ASM-NEXT:   .byte 0x04, 0x80, 0xc3, 0xf5  # Value
+; ASM-NEXT:   .byte 0x48, 0x40
+; ASM-NEXT:   .asciz "TestConst1"           # Name
+; ASM:        .short {{.*-.*}}              # Record length
+; ASM:        .short 4359                   # Record kind: S_CONSTANT
+; ASM-NEXT:   .long 4100                    # Type
+; ASM-NEXT:   .byte 0x61, 0x00              # Value
+; ASM-NEXT:   .asciz "S::TestConst2"        # Name
+; ASM:        .short {{.*-.*}}              # Record length
+; ASM:        .short 4359                   # Record kind: S_CONSTANT
+; ASM-NEXT:   .long 4102                    # Type
+; ASM-NEXT:   .byte 0x0a, 0x80, 0x40, 0x61  # Value
+; ASM-NEXT:   .byte 0x07, 0x80, 0xff, 0xff
+; ASM-NEXT:   .byte 0xff, 0xff
+; ASM-NEXT:   .asciz "ENUM_B"               # Name
 
 ; OBJ:        CodeViewDebugInfo [
 ; OBJ:          Section: .debug$S
@@ -30,56 +45,77 @@
 ; OBJ:            SubSectionType: Symbols (0xF1)
 ; OBJ:            ConstantSym {
 ; OBJ-NEXT:         Kind: S_CONSTANT (0x1107)
-; OBJ-NEXT:         Type: const int (0x1003)
-; OBJ-NEXT:         Value: 1
-; OBJ-NEXT:         Name: Test1
+; OBJ-NEXT:         Type: const float (0x1003)
+; OBJ-NEXT:         Value: 1078523331
+; OBJ-NEXT:         Name: TestConst1
 ; OBJ-NEXT:       }
-; OBJ:            ConstantSym {
+; OBJ-NEXT:       ConstantSym {
 ; OBJ-NEXT:         Kind: S_CONSTANT (0x1107)
-; OBJ-NEXT:         Type: const int (0x1003)
-; OBJ-NEXT:         Value: 2
-; OBJ-NEXT:         Name: Foo::Test2
+; OBJ-NEXT:         Type: const char (0x1004)
+; OBJ-NEXT:         Value: 97
+; OBJ-NEXT:         Name: S::TestConst2
 ; OBJ-NEXT:       }
+; OBJ-NEXT:       ConstantSym {
+; OBJ-NEXT:         Kind: S_CONSTANT (0x1107)
+; OBJ-NEXT:         Type: TestEnum (0x1006)
+; OBJ-NEXT:         Value: 18446744071562551616
+; OBJ-NEXT:         Name: ENUM_B
+; OBJ-NEXT:       }
+
 
 ; ModuleID = 't.cpp'
 source_filename = "t.cpp"
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
+target triple = "x86_64-w64-windows-gnu"
 
-; Function Attrs: noinline norecurse nounwind optnone
-define dso_local i32 @main() #0 !dbg !19 {
+; Function Attrs: noinline nounwind optnone
+define dso_local void @_Z3foov() #0 !dbg !28 {
 entry:
-  %retval = alloca i32, align 4
-  store i32 0, i32* %retval, align 4
-  ret i32 3, !dbg !22
+  call void @_Z8useConsti(i32 3), !dbg !32
+  call void @_Z8useConsti(i32 97), !dbg !33
+  call void @_Z8useConsti(i32 -2147000000), !dbg !34
+  ret void, !dbg !35
 }
 
-attributes #0 = { noinline norecurse nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+declare dso_local void @_Z8useConsti(i32) #1
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!15, !16, !17}
-!llvm.ident = !{!18}
+!llvm.module.flags = !{!24, !25, !26}
+!llvm.ident = !{!27}
 
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git 2b66a49044196d8b90d95d7d3b5246ccbe3abc05)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, globals: !10, nameTableKind: None)
-!1 = !DIFile(filename: "<stdin>", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "77cff5e1c7b260440ed03b23c18809c3")
-!2 = !{}
-!3 = !{!4}
-!4 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !5, line: 3, size: 8, flags: DIFlagTypePassByValue, elements: !6, identifier: ".?AUFoo@@")
-!5 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "77cff5e1c7b260440ed03b23c18809c3")
-!6 = !{!7}
-!7 = !DIDerivedType(tag: DW_TAG_member, name: "Test2", scope: !4, file: !5, line: 4, baseType: !8, flags: DIFlagStaticMember, extraData: i32 2)
-!8 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!10 = !{!11, !13}
-!11 = !DIGlobalVariableExpression(var: !12, expr: !DIExpression(DW_OP_constu, 1, DW_OP_stack_value))
-!12 = distinct !DIGlobalVariable(name: "Test1", scope: null, file: !5, line: 1, type: !8, isLocal: true, isDefinition: true)
-!13 = !DIGlobalVariableExpression(var: !14, expr: !DIExpression(DW_OP_constu, 2, DW_OP_stack_value))
-!14 = distinct !DIGlobalVariable(name: "Test2", scope: !0, file: !5, line: 4, type: !8, isLocal: true, isDefinition: true, declaration: !7)
-!15 = !{i32 2, !"CodeView", i32 1}
-!16 = !{i32 2, !"Debug Info Version", i32 3}
-!17 = !{i32 1, !"wchar_size", i32 2}
-!18 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git 2b66a49044196d8b90d95d7d3b5246ccbe3abc05)"}
-!19 = distinct !DISubprogram(name: "main", scope: !5, file: !5, line: 7, type: !20, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!20 = !DISubroutineType(types: !21)
-!21 = !{!9}
-!22 = !DILocation(line: 8, scope: !19)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git dee1891507401f396290b5d9cb5717d6b0755337)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !9, globals: !15, nameTableKind: None)
+!1 = !DIFile(filename: "<stdin>", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "6d700c7d582557a012214ac1f1f8721b")
+!2 = !{!3}
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "TestEnum", file: !4, line: 5, baseType: !5, size: 32, elements: !6, identifier: "_ZTS8TestEnum")
+!4 = !DIFile(filename: "t.cpp", directory: "C:\5Csrc\5Ctest", checksumkind: CSK_MD5, checksum: "6d700c7d582557a012214ac1f1f8721b")
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!6 = !{!7, !8}
+!7 = !DIEnumerator(name: "ENUM_A", value: 2147000000)
+!8 = !DIEnumerator(name: "ENUM_B", value: -2147000000)
+!9 = !{!10}
+!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !4, line: 2, size: 8, flags: DIFlagTypePassByValue, elements: !11, identifier: "_ZTS1S")
+!11 = !{!12}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "TestConst2", scope: !10, file: !4, line: 3, baseType: !13, flags: DIFlagStaticMember, extraData: i8 97)
+!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+!14 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!15 = !{!16, !20, !22}
+!16 = !DIGlobalVariableExpression(var: !17, expr: !DIExpression(DW_OP_constu, 1078523331, DW_OP_stack_value))
+!17 = distinct !DIGlobalVariable(name: "TestConst1", scope: !0, file: !4, line: 1, type: !18, isLocal: true, isDefinition: true)
+!18 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !19)
+!19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+!20 = !DIGlobalVariableExpression(var: !21, expr: !DIExpression(DW_OP_constu, 97, DW_OP_stack_value))
+!21 = distinct !DIGlobalVariable(name: "TestConst2", scope: !0, file: !4, line: 3, type: !13, isLocal: true, isDefinition: true, declaration: !12)
+!22 = !DIGlobalVariableExpression(var: !23, expr: !DIExpression(DW_OP_constu, 18446744071562551616, DW_OP_stack_value))
+!23 = distinct !DIGlobalVariable(name: "ENUM_B", scope: !0, file: !4, line: 7, type: !3, isLocal: true, isDefinition: true)
+!24 = !{i32 2, !"CodeView", i32 1}
+!25 = !{i32 2, !"Debug Info Version", i32 3}
+!26 = !{i32 1, !"wchar_size", i32 2}
+!27 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git dee1891507401f396290b5d9cb5717d6b0755337)"}
+!28 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !4, file: !4, line: 10, type: !29, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !31)
+!29 = !DISubroutineType(types: !30)
+!30 = !{null}
+!31 = !{}
+!32 = !DILocation(line: 11, scope: !28)
+!33 = !DILocation(line: 12, scope: !28)
+!34 = !DILocation(line: 13, scope: !28)
+!35 = !DILocation(line: 14, scope: !28)

From 5e1881f9b234958555105649a5dd2af06d50cf05 Mon Sep 17 00:00:00 2001
From: Michael Trent <mtrent@apple.com>
Date: Thu, 30 May 2019 22:11:29 +0000
Subject: [PATCH 0688/1176] Update the tests in r362121 / r362141 to allow for
 Windows-specific error messages: "Is a directory" instead of "is a directory"

This should resolve the errors being reported on clang-x64-windows-msvc.

llvm-svn: 362167
---
 llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index c4841bbcb521f..a312999bfb098 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -18,4 +18,5 @@ BAD_INPUT: is not a Mach-O or Universal file type.
 
 // RUN: not llvm-objdump -m -d -g -dsym %p/Inputs %p/Inputs/hello-macho-thin 2>&1 | FileCheck -check-prefix DIRECTORY %s
 
-DIRECTORY: Is a directory
+// Windows will emit "Is a directory", whereas others emit "is a directory"
+DIRECTORY: {{[iI]}}s a directory

From f1e300ca1adf90bab86588855c6031802248d38d Mon Sep 17 00:00:00 2001
From: Douglas Yung <douglas.yung@sony.com>
Date: Thu, 30 May 2019 22:20:31 +0000
Subject: [PATCH 0689/1176] Fix test to add missing '|' to regex.

llvm-svn: 362168
---
 llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index a312999bfb098..c9f72fa752b28 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -19,4 +19,4 @@ BAD_INPUT: is not a Mach-O or Universal file type.
 // RUN: not llvm-objdump -m -d -g -dsym %p/Inputs %p/Inputs/hello-macho-thin 2>&1 | FileCheck -check-prefix DIRECTORY %s
 
 // Windows will emit "Is a directory", whereas others emit "is a directory"
-DIRECTORY: {{[iI]}}s a directory
+DIRECTORY: {{[i|I]}}s a directory

From 760a9ee63c9cbe703bfeae8dcbb45abfab2554b2 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Thu, 30 May 2019 22:25:48 +0000
Subject: [PATCH 0690/1176] Support codesigning bundles and forcing

Summary:
Clangd's framework is assembled by copying binaries from the lib and bin directories into a bundle shape. This results in an invalid bundle code signature because the signature only applies to the binaries not the resources.

This patch adds two new options to `llvm_codesign` to enable re-signing the library and XPC service as bundles.

The `BUNDLE_PATH` option allow specifying an explicit path to codesign, which enables signing bundles which aren't generated using CMake's `FRAMEWORK` or `BUNDLE` target properties.

The `FORCE` option allows re-signing binaries that have already been signed. This is required for how clangd exposes the clangd library and tools as both XPC and non-XPC services using the same binary.

Reviewers: jkorous, bogner

Reviewed By: bogner

Subscribers: mgorny, ilya-biryukov, dexonsmith, arphaman, kadircet, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62693

llvm-svn: 362169
---
 .../cmake/modules/CreateClangdXPCFramework.cmake   |  5 +++++
 llvm/cmake/modules/AddLLVM.cmake                   | 14 +++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake b/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake
index 2299bc8ff4f22..fad58660df0a6 100644
--- a/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake
+++ b/clang-tools-extra/clangd/xpc/cmake/modules/CreateClangdXPCFramework.cmake
@@ -70,4 +70,9 @@ macro(create_clangd_xpc_framework target name)
     ${target}
     ${CLANGD_FRAMEWORK_LOCATION}
   )
+
+  # clangd is already signed as a standalone executable, so it must be forced.
+  llvm_codesign(ClangdXPC BUNDLE_PATH "${CLANGD_FRAMEWORK_OUT_LOCATION}/XPCServices/${CLANGD_XPC_SERVICE_NAME}.xpc/" FORCE)
+  # ClangdXPC library is already signed as a standalone library, so it must be forced.
+  llvm_codesign(ClangdXPC BUNDLE_PATH "${CLANGD_FRAMEWORK_LOCATION}" FORCE)
 endmacro(create_clangd_xpc_framework)
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 4151275473bb7..874cc76e8e5fe 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1659,9 +1659,9 @@ function(llvm_externalize_debuginfo name)
   endif()
 endfunction()
 
-# Usage: llvm_codesign(name [ENTITLEMENTS file])
+# Usage: llvm_codesign(name [FORCE] [ENTITLEMENTS file] [BUNDLE_PATH path])
 function(llvm_codesign name)
-  cmake_parse_arguments(ARG "" "ENTITLEMENTS" "" ${ARGN})
+  cmake_parse_arguments(ARG "FORCE" "ENTITLEMENTS;BUNDLE_PATH" "" ${ARGN})
 
   if(NOT LLVM_CODESIGNING_IDENTITY)
     return()
@@ -1691,12 +1691,20 @@ function(llvm_codesign name)
       set(pass_entitlements --entitlements ${ARG_ENTITLEMENTS})
     endif()
 
+    if (NOT ARG_BUNDLE_PATH)
+      set(ARG_BUNDLE_PATH $<TARGET_FILE:${name}>)
+    endif()
+
+    if(ARG_FORCE)
+      set(force_flag "-f")
+    endif()
+
     add_custom_command(
       TARGET ${name} POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E
               env CODESIGN_ALLOCATE=${CMAKE_CODESIGN_ALLOCATE}
               ${CMAKE_CODESIGN} -s ${LLVM_CODESIGNING_IDENTITY}
-              ${pass_entitlements} $<TARGET_FILE:${name}>
+              ${pass_entitlements} ${force_flag} ${ARG_BUNDLE_PATH}
     )
   endif()
 endfunction()

From d6b74cc859a1d77778c5b810013873c003561841 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 30 May 2019 22:29:06 +0000
Subject: [PATCH 0691/1176] [X86] Remove code that unnecessarily sets EXTLOAD
 with src type of v2f32/v4f32/v8f32 as Legal for SSE2/AVX/AVX512 respectively.
 NFC

The LoadExt table defaults to all combinations being Legal. For
vector types, only src VTs with an i1 element type were ever changed.
So we don't need to mark them legal manually.

llvm-svn: 362170
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4a6af21f35ad0..f97907fb7e5b8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -962,9 +962,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
-
     // We want to legalize this to an f64 load rather than an i64 load on
     // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
     // store.
@@ -1145,9 +1142,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     if (!Subtarget.hasAVX512())
       setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
-
     // In the customized shift lowering, the legal v8i32/v4i64 cases
     // in AVX2 will be recognized.
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1380,9 +1374,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
 
-    for (MVT VT : MVT::fp_vector_valuetypes())
-      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
-
     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8,  Legal);
       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);

From 073f3f1609cd0dbadc9923cf919521e1b2c5b2c3 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Thu, 30 May 2019 23:21:13 +0000
Subject: [PATCH 0692/1176] Fix "fallthrough annotation in unreachable code"
 warning.

llvm-svn: 362171
---
 clang/lib/Sema/SemaType.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index e0d43a780e794..eac7c44c76ae4 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -6971,7 +6971,7 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
       switch (Proto->getExceptionSpecType()) {
       case EST_None:
         llvm_unreachable("This doesn't have an exception spec!");
-        LLVM_FALLTHROUGH;
+
       case EST_DynamicNone:
       case EST_BasicNoexcept:
       case EST_NoexceptTrue:

From 375dec5e45146a3ba38f6b91f4b051cbd7467079 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Thu, 30 May 2019 23:21:14 +0000
Subject: [PATCH 0693/1176] Refactor OpenMP stack management.

Instead of duplicating access to the directive stack throughout
SemaOpenMP.cpp, consolidate it to a few methods and call those
everywhere else. In passing, simplify adjacent code where possible.

No functionality change intended.

llvm-svn: 362172
---
 clang/lib/Sema/SemaOpenMP.cpp | 438 +++++++++++++++++-----------------
 1 file changed, 224 insertions(+), 214 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 19a7d784c5633..5a6b49961f2e2 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -173,18 +173,78 @@ class DSAStackTy {
   bool ForceCaptureByReferenceInTargetExecutable = false;
   CriticalsWithHintsTy Criticals;
 
-  using iterator = StackTy::const_reverse_iterator;
+  /// Iterators over the stack iterate in order from innermost to outermost
+  /// directive.
+  using const_iterator = StackTy::const_reverse_iterator;
+  const_iterator begin() const {
+    return Stack.empty() ? const_iterator() : Stack.back().first.rbegin();
+  }
+  const_iterator end() const {
+    return Stack.empty() ? const_iterator() : Stack.back().first.rend();
+  }
+  using iterator = StackTy::reverse_iterator;
+  iterator begin() {
+    return Stack.empty() ? iterator() : Stack.back().first.rbegin();
+  }
+  iterator end() {
+    return Stack.empty() ? iterator() : Stack.back().first.rend();
+  }
 
-  DSAVarData getDSA(iterator &Iter, ValueDecl *D) const;
-
-  /// Checks if the variable is a local for OpenMP region.
-  bool isOpenMPLocal(VarDecl *D, iterator Iter) const;
+  // Convenience operations to get at the elements of the stack.
 
   bool isStackEmpty() const {
     return Stack.empty() ||
            Stack.back().second != CurrentNonCapturingFunctionScope ||
            Stack.back().first.empty();
   }
+  size_t getStackSize() const {
+    return isStackEmpty() ? 0 : Stack.back().first.size();
+  }
+
+  SharingMapTy *getTopOfStackOrNull() {
+    if (isStackEmpty())
+      return nullptr;
+    return &Stack.back().first.back();
+  }
+  const SharingMapTy *getTopOfStackOrNull() const {
+    return const_cast<DSAStackTy&>(*this).getTopOfStackOrNull();
+  }
+  SharingMapTy &getTopOfStack() {
+    assert(!isStackEmpty() && "no current directive");
+    return *getTopOfStackOrNull();
+  }
+  const SharingMapTy &getTopOfStack() const {
+    return const_cast<DSAStackTy&>(*this).getTopOfStack();
+  }
+
+  SharingMapTy *getSecondOnStackOrNull() {
+    size_t Size = getStackSize();
+    if (Size <= 1)
+      return nullptr;
+    return &Stack.back().first[Size - 2];
+  }
+  const SharingMapTy *getSecondOnStackOrNull() const {
+    return const_cast<DSAStackTy&>(*this).getSecondOnStackOrNull();
+  }
+
+  /// Get the stack element at a certain level (previously returned by
+  /// \c getNestingLevel).
+  ///
+  /// Note that nesting levels count from outermost to innermost, and this is
+  /// the reverse of our iteration order where new inner levels are pushed at
+  /// the front of the stack.
+  SharingMapTy &getStackElemAtLevel(unsigned Level) {
+    assert(Level < getStackSize() && "no such stack element");
+    return Stack.back().first[Level];
+  }
+  const SharingMapTy &getStackElemAtLevel(unsigned Level) const {
+    return const_cast<DSAStackTy&>(*this).getStackElemAtLevel(Level);
+  }
+
+  DSAVarData getDSA(const_iterator &Iter, ValueDecl *D) const;
+
+  /// Checks if the variable is a local for OpenMP region.
+  bool isOpenMPLocal(VarDecl *D, const_iterator Iter) const;
 
   /// Vector of previously declared requires directives
   SmallVector<const OMPRequiresDecl *, 2> RequiresDecls;
@@ -249,28 +309,28 @@ class DSAStackTy {
   void loopInit() {
     assert(isOpenMPLoopDirective(getCurrentDirective()) &&
            "Expected loop-based directive.");
-    Stack.back().first.back().LoopStart = true;
+    getTopOfStack().LoopStart = true;
   }
   /// Start capturing of the variables in the loop context.
   void loopStart() {
     assert(isOpenMPLoopDirective(getCurrentDirective()) &&
            "Expected loop-based directive.");
-    Stack.back().first.back().LoopStart = false;
+    getTopOfStack().LoopStart = false;
   }
   /// true, if variables are captured, false otherwise.
   bool isLoopStarted() const {
     assert(isOpenMPLoopDirective(getCurrentDirective()) &&
            "Expected loop-based directive.");
-    return !Stack.back().first.back().LoopStart;
+    return !getTopOfStack().LoopStart;
   }
   /// Marks (or clears) declaration as possibly loop counter.
   void resetPossibleLoopCounter(const Decl *D = nullptr) {
-    Stack.back().first.back().PossiblyLoopCounter =
+    getTopOfStack().PossiblyLoopCounter =
         D ? D->getCanonicalDecl() : D;
   }
   /// Gets the possible loop counter decl.
   const Decl *getPossiblyLoopCunter() const {
-    return Stack.back().first.back().PossiblyLoopCounter;
+    return getTopOfStack().PossiblyLoopCounter;
   }
   /// Start new OpenMP region stack in new non-capturing function.
   void pushFunction() {
@@ -350,16 +410,16 @@ class DSAStackTy {
                                    Expr *&TaskgroupDescriptor) const;
   /// Return reduction reference expression for the current taskgroup.
   Expr *getTaskgroupReductionRef() const {
-    assert(Stack.back().first.back().Directive == OMPD_taskgroup &&
+    assert(getTopOfStack().Directive == OMPD_taskgroup &&
            "taskgroup reference expression requested for non taskgroup "
            "directive.");
-    return Stack.back().first.back().TaskgroupReductionRef;
+    return getTopOfStack().TaskgroupReductionRef;
   }
   /// Checks if the given \p VD declaration is actually a taskgroup reduction
   /// descriptor variable at the \p Level of OpenMP regions.
   bool isTaskgroupReductionRef(const ValueDecl *VD, unsigned Level) const {
-    return Stack.back().first[Level].TaskgroupReductionRef &&
-           cast<DeclRefExpr>(Stack.back().first[Level].TaskgroupReductionRef)
+    return getStackElemAtLevel(Level).TaskgroupReductionRef &&
+           cast<DeclRefExpr>(getStackElemAtLevel(Level).TaskgroupReductionRef)
                    ->getDecl() == VD;
   }
 
@@ -405,18 +465,18 @@ class DSAStackTy {
 
   /// Returns currently analyzed directive.
   OpenMPDirectiveKind getCurrentDirective() const {
-    return isStackEmpty() ? OMPD_unknown : Stack.back().first.back().Directive;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->Directive : OMPD_unknown;
   }
   /// Returns directive kind at specified level.
   OpenMPDirectiveKind getDirective(unsigned Level) const {
     assert(!isStackEmpty() && "No directive at specified level.");
-    return Stack.back().first[Level].Directive;
+    return getStackElemAtLevel(Level).Directive;
   }
   /// Returns parent directive.
   OpenMPDirectiveKind getParentDirective() const {
-    if (isStackEmpty() || Stack.back().first.size() == 1)
-      return OMPD_unknown;
-    return std::next(Stack.back().first.rbegin())->Directive;
+    const SharingMapTy *Parent = getSecondOnStackOrNull();
+    return Parent ? Parent->Directive : OMPD_unknown;
   }
 
   /// Add requires decl to internal vector
@@ -468,41 +528,38 @@ class DSAStackTy {
 
   /// Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
-    assert(!isStackEmpty());
-    Stack.back().first.back().DefaultAttr = DSA_none;
-    Stack.back().first.back().DefaultAttrLoc = Loc;
+    getTopOfStack().DefaultAttr = DSA_none;
+    getTopOfStack().DefaultAttrLoc = Loc;
   }
   /// Set default data sharing attribute to shared.
   void setDefaultDSAShared(SourceLocation Loc) {
-    assert(!isStackEmpty());
-    Stack.back().first.back().DefaultAttr = DSA_shared;
-    Stack.back().first.back().DefaultAttrLoc = Loc;
+    getTopOfStack().DefaultAttr = DSA_shared;
+    getTopOfStack().DefaultAttrLoc = Loc;
   }
   /// Set default data mapping attribute to 'tofrom:scalar'.
   void setDefaultDMAToFromScalar(SourceLocation Loc) {
-    assert(!isStackEmpty());
-    Stack.back().first.back().DefaultMapAttr = DMA_tofrom_scalar;
-    Stack.back().first.back().DefaultMapAttrLoc = Loc;
+    getTopOfStack().DefaultMapAttr = DMA_tofrom_scalar;
+    getTopOfStack().DefaultMapAttrLoc = Loc;
   }
 
   DefaultDataSharingAttributes getDefaultDSA() const {
     return isStackEmpty() ? DSA_unspecified
-                          : Stack.back().first.back().DefaultAttr;
+                          : getTopOfStack().DefaultAttr;
   }
   SourceLocation getDefaultDSALocation() const {
     return isStackEmpty() ? SourceLocation()
-                          : Stack.back().first.back().DefaultAttrLoc;
+                          : getTopOfStack().DefaultAttrLoc;
   }
   DefaultMapAttributes getDefaultDMA() const {
     return isStackEmpty() ? DMA_unspecified
-                          : Stack.back().first.back().DefaultMapAttr;
+                          : getTopOfStack().DefaultMapAttr;
   }
   DefaultMapAttributes getDefaultDMAAtLevel(unsigned Level) const {
-    return Stack.back().first[Level].DefaultMapAttr;
+    return getStackElemAtLevel(Level).DefaultMapAttr;
   }
   SourceLocation getDefaultDMALocation() const {
     return isStackEmpty() ? SourceLocation()
-                          : Stack.back().first.back().DefaultMapAttrLoc;
+                          : getTopOfStack().DefaultMapAttrLoc;
   }
 
   /// Checks if the specified variable is a threadprivate.
@@ -514,82 +571,77 @@ class DSAStackTy {
   /// Marks current region as ordered (it has an 'ordered' clause).
   void setOrderedRegion(bool IsOrdered, const Expr *Param,
                         OMPOrderedClause *Clause) {
-    assert(!isStackEmpty());
     if (IsOrdered)
-      Stack.back().first.back().OrderedRegion.emplace(Param, Clause);
+      getTopOfStack().OrderedRegion.emplace(Param, Clause);
     else
-      Stack.back().first.back().OrderedRegion.reset();
+      getTopOfStack().OrderedRegion.reset();
   }
   /// Returns true, if region is ordered (has associated 'ordered' clause),
   /// false - otherwise.
   bool isOrderedRegion() const {
-    if (isStackEmpty())
-      return false;
-    return Stack.back().first.rbegin()->OrderedRegion.hasValue();
+    if (const SharingMapTy *Top = getTopOfStackOrNull())
+      return Top->OrderedRegion.hasValue();
+    return false;
   }
   /// Returns optional parameter for the ordered region.
   std::pair<const Expr *, OMPOrderedClause *> getOrderedRegionParam() const {
-    if (isStackEmpty() ||
-        !Stack.back().first.rbegin()->OrderedRegion.hasValue())
-      return std::make_pair(nullptr, nullptr);
-    return Stack.back().first.rbegin()->OrderedRegion.getValue();
+    if (const SharingMapTy *Top = getTopOfStackOrNull())
+      if (Top->OrderedRegion.hasValue())
+        return Top->OrderedRegion.getValue();
+    return std::make_pair(nullptr, nullptr);
   }
   /// Returns true, if parent region is ordered (has associated
   /// 'ordered' clause), false - otherwise.
   bool isParentOrderedRegion() const {
-    if (isStackEmpty() || Stack.back().first.size() == 1)
-      return false;
-    return std::next(Stack.back().first.rbegin())->OrderedRegion.hasValue();
+    if (const SharingMapTy *Parent = getSecondOnStackOrNull())
+      return Parent->OrderedRegion.hasValue();
+    return false;
   }
   /// Returns optional parameter for the ordered region.
   std::pair<const Expr *, OMPOrderedClause *>
   getParentOrderedRegionParam() const {
-    if (isStackEmpty() || Stack.back().first.size() == 1 ||
-        !std::next(Stack.back().first.rbegin())->OrderedRegion.hasValue())
-      return std::make_pair(nullptr, nullptr);
-    return std::next(Stack.back().first.rbegin())->OrderedRegion.getValue();
+    if (const SharingMapTy *Parent = getSecondOnStackOrNull())
+      if (Parent->OrderedRegion.hasValue())
+        return Parent->OrderedRegion.getValue();
+    return std::make_pair(nullptr, nullptr);
   }
   /// Marks current region as nowait (it has a 'nowait' clause).
   void setNowaitRegion(bool IsNowait = true) {
-    assert(!isStackEmpty());
-    Stack.back().first.back().NowaitRegion = IsNowait;
+    getTopOfStack().NowaitRegion = IsNowait;
   }
   /// Returns true, if parent region is nowait (has associated
   /// 'nowait' clause), false - otherwise.
   bool isParentNowaitRegion() const {
-    if (isStackEmpty() || Stack.back().first.size() == 1)
-      return false;
-    return std::next(Stack.back().first.rbegin())->NowaitRegion;
+    if (const SharingMapTy *Parent = getSecondOnStackOrNull())
+      return Parent->NowaitRegion;
+    return false;
   }
   /// Marks parent region as cancel region.
   void setParentCancelRegion(bool Cancel = true) {
-    if (!isStackEmpty() && Stack.back().first.size() > 1) {
-      auto &StackElemRef = *std::next(Stack.back().first.rbegin());
-      StackElemRef.CancelRegion |= StackElemRef.CancelRegion || Cancel;
-    }
+    if (SharingMapTy *Parent = getSecondOnStackOrNull())
+      Parent->CancelRegion |= Cancel;
   }
   /// Return true if current region has inner cancel construct.
   bool isCancelRegion() const {
-    return isStackEmpty() ? false : Stack.back().first.back().CancelRegion;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->CancelRegion : false;
   }
 
   /// Set collapse value for the region.
   void setAssociatedLoops(unsigned Val) {
-    assert(!isStackEmpty());
-    Stack.back().first.back().AssociatedLoops = Val;
+    getTopOfStack().AssociatedLoops = Val;
   }
   /// Return collapse value for region.
   unsigned getAssociatedLoops() const {
-    return isStackEmpty() ? 0 : Stack.back().first.back().AssociatedLoops;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->AssociatedLoops : 0;
   }
 
   /// Marks current target region as one with closely nested teams
   /// region.
   void setParentTeamsRegionLoc(SourceLocation TeamsRegionLoc) {
-    if (!isStackEmpty() && Stack.back().first.size() > 1) {
-      std::next(Stack.back().first.rbegin())->InnerTeamsRegionLoc =
-          TeamsRegionLoc;
-    }
+    if (SharingMapTy *Parent = getSecondOnStackOrNull())
+      Parent->InnerTeamsRegionLoc = TeamsRegionLoc;
   }
   /// Returns true, if current region has closely nested teams region.
   bool hasInnerTeamsRegion() const {
@@ -597,16 +649,17 @@ class DSAStackTy {
   }
   /// Returns location of the nested teams region (if any).
   SourceLocation getInnerTeamsRegionLoc() const {
-    return isStackEmpty() ? SourceLocation()
-                          : Stack.back().first.back().InnerTeamsRegionLoc;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->InnerTeamsRegionLoc : SourceLocation();
   }
 
   Scope *getCurScope() const {
-    return isStackEmpty() ? nullptr : Stack.back().first.back().CurScope;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->CurScope : nullptr;
   }
   SourceLocation getConstructLoc() const {
-    return isStackEmpty() ? SourceLocation()
-                          : Stack.back().first.back().ConstructLoc;
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top ? Top->ConstructLoc : SourceLocation();
   }
 
   /// Do the check specified in \a Check to all component lists and return true
@@ -619,8 +672,8 @@ class DSAStackTy {
           Check) const {
     if (isStackEmpty())
       return false;
-    auto SI = Stack.back().first.rbegin();
-    auto SE = Stack.back().first.rend();
+    auto SI = begin();
+    auto SE = end();
 
     if (SI == SE)
       return false;
@@ -649,17 +702,12 @@ class DSAStackTy {
           bool(OMPClauseMappableExprCommon::MappableExprComponentListRef,
                OpenMPClauseKind)>
           Check) const {
-    if (isStackEmpty())
-      return false;
-
-    auto StartI = Stack.back().first.begin();
-    auto EndI = Stack.back().first.end();
-    if (std::distance(StartI, EndI) <= (int)Level)
+    if (getStackSize() <= Level)
       return false;
-    std::advance(StartI, Level);
 
-    auto MI = StartI->MappedExprComponents.find(VD);
-    if (MI != StartI->MappedExprComponents.end())
+    const SharingMapTy &StackElem = getStackElemAtLevel(Level);
+    auto MI = StackElem.MappedExprComponents.find(VD);
+    if (MI != StackElem.MappedExprComponents.end())
       for (OMPClauseMappableExprCommon::MappableExprComponentListRef L :
            MI->second.Components)
         if (Check(L, MI->second.Kind))
@@ -673,10 +721,7 @@ class DSAStackTy {
       const ValueDecl *VD,
       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
       OpenMPClauseKind WhereFoundClauseKind) {
-    assert(!isStackEmpty() &&
-           "Not expecting to retrieve components from a empty stack!");
-    MappedExprComponentTy &MEC =
-        Stack.back().first.back().MappedExprComponents[VD];
+    MappedExprComponentTy &MEC = getTopOfStack().MappedExprComponents[VD];
     // Create new entry and append the new components there.
     MEC.Components.resize(MEC.Components.size() + 1);
     MEC.Components.back().append(Components.begin(), Components.end());
@@ -685,19 +730,17 @@ class DSAStackTy {
 
   unsigned getNestingLevel() const {
     assert(!isStackEmpty());
-    return Stack.back().first.size() - 1;
+    return getStackSize() - 1;
   }
   void addDoacrossDependClause(OMPDependClause *C,
                                const OperatorOffsetTy &OpsOffs) {
-    assert(!isStackEmpty() && Stack.back().first.size() > 1);
-    SharingMapTy &StackElem = *std::next(Stack.back().first.rbegin());
-    assert(isOpenMPWorksharingDirective(StackElem.Directive));
-    StackElem.DoacrossDepends.try_emplace(C, OpsOffs);
+    SharingMapTy *Parent = getSecondOnStackOrNull();
+    assert(Parent && isOpenMPWorksharingDirective(Parent->Directive));
+    Parent->DoacrossDepends.try_emplace(C, OpsOffs);
   }
   llvm::iterator_range<DoacrossDependMapTy::const_iterator>
   getDoacrossDependClauses() const {
-    assert(!isStackEmpty());
-    const SharingMapTy &StackElem = Stack.back().first.back();
+    const SharingMapTy &StackElem = getTopOfStack();
     if (isOpenMPWorksharingDirective(StackElem.Directive)) {
       const DoacrossDependMapTy &Ref = StackElem.DoacrossDepends;
       return llvm::make_range(Ref.begin(), Ref.end());
@@ -708,13 +751,13 @@ class DSAStackTy {
 
   // Store types of classes which have been explicitly mapped
   void addMappedClassesQualTypes(QualType QT) {
-    SharingMapTy &StackElem = Stack.back().first.back();
+    SharingMapTy &StackElem = getTopOfStack();
     StackElem.MappedClassesQualTypes.insert(QT);
   }
 
   // Return set of mapped classes types
   bool isClassPreviouslyMapped(QualType QT) const {
-    const SharingMapTy &StackElem = Stack.back().first.back();
+    const SharingMapTy &StackElem = getTopOfStack();
     return StackElem.MappedClassesQualTypes.count(QT) != 0;
   }
 
@@ -723,16 +766,11 @@ class DSAStackTy {
     assert(*OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
                E->getDecl()) == OMPDeclareTargetDeclAttr::MT_Link &&
            "Expected declare target link global.");
-    if (isStackEmpty())
-      return;
-    auto It = Stack.back().first.rbegin();
-    while (It != Stack.back().first.rend() &&
-           !isOpenMPTargetExecutionDirective(It->Directive))
-      ++It;
-    if (It != Stack.back().first.rend()) {
-      assert(isOpenMPTargetExecutionDirective(It->Directive) &&
-             "Expected target executable directive.");
-      It->DeclareTargetLinkVarDecls.push_back(E);
+    for (auto &Elem : *this) {
+      if (isOpenMPTargetExecutionDirective(Elem.Directive)) {
+        Elem.DeclareTargetLinkVarDecls.push_back(E);
+        return;
+      }
     }
   }
 
@@ -741,7 +779,7 @@ class DSAStackTy {
   ArrayRef<DeclRefExpr *> getLinkGlobals() const {
     assert(isOpenMPTargetExecutionDirective(getCurrentDirective()) &&
            "Expected target executable directive.");
-    return Stack.back().first.back().DeclareTargetLinkVarDecls;
+    return getTopOfStack().DeclareTargetLinkVarDecls;
   }
 };
 
@@ -797,13 +835,13 @@ static ValueDecl *getCanonicalDecl(ValueDecl *D) {
       getCanonicalDecl(const_cast<const ValueDecl *>(D)));
 }
 
-DSAStackTy::DSAVarData DSAStackTy::getDSA(iterator &Iter,
+DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter,
                                           ValueDecl *D) const {
   D = getCanonicalDecl(D);
   auto *VD = dyn_cast<VarDecl>(D);
   const auto *FD = dyn_cast<FieldDecl>(D);
   DSAVarData DVar;
-  if (isStackEmpty() || Iter == Stack.back().first.rend()) {
+  if (Iter == end()) {
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a region but not in construct]
     //  File-scope or namespace-scope variables referenced in called routines
@@ -878,7 +916,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(iterator &Iter,
     //  bound to the current team is shared.
     if (isOpenMPTaskingDirective(DVar.DKind)) {
       DSAVarData DVarTemp;
-      iterator I = Iter, E = Stack.back().first.rend();
+      const_iterator I = Iter, E = end();
       do {
         ++I;
         // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables
@@ -910,7 +948,7 @@ const Expr *DSAStackTy::addUniqueAligned(const ValueDecl *D,
                                          const Expr *NewDE) {
   assert(!isStackEmpty() && "Data sharing attributes stack is empty");
   D = getCanonicalDecl(D);
-  SharingMapTy &StackElem = Stack.back().first.back();
+  SharingMapTy &StackElem = getTopOfStack();
   auto It = StackElem.AlignedMap.find(D);
   if (It == StackElem.AlignedMap.end()) {
     assert(NewDE && "Unexpected nullptr expr to be added into aligned map");
@@ -924,7 +962,7 @@ const Expr *DSAStackTy::addUniqueAligned(const ValueDecl *D,
 void DSAStackTy::addLoopControlVariable(const ValueDecl *D, VarDecl *Capture) {
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty");
   D = getCanonicalDecl(D);
-  SharingMapTy &StackElem = Stack.back().first.back();
+  SharingMapTy &StackElem = getTopOfStack();
   StackElem.LCVMap.try_emplace(
       D, LCDeclInfo(StackElem.LCVMap.size() + 1, Capture));
 }
@@ -933,7 +971,7 @@ const DSAStackTy::LCDeclInfo
 DSAStackTy::isLoopControlVariable(const ValueDecl *D) const {
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty");
   D = getCanonicalDecl(D);
-  const SharingMapTy &StackElem = Stack.back().first.back();
+  const SharingMapTy &StackElem = getTopOfStack();
   auto It = StackElem.LCVMap.find(D);
   if (It != StackElem.LCVMap.end())
     return It->second;
@@ -942,23 +980,21 @@ DSAStackTy::isLoopControlVariable(const ValueDecl *D) const {
 
 const DSAStackTy::LCDeclInfo
 DSAStackTy::isParentLoopControlVariable(const ValueDecl *D) const {
-  assert(!isStackEmpty() && Stack.back().first.size() > 1 &&
-         "Data-sharing attributes stack is empty");
+  const SharingMapTy *Parent = getSecondOnStackOrNull();
+  assert(Parent && "Data-sharing attributes stack is empty");
   D = getCanonicalDecl(D);
-  const SharingMapTy &StackElem = *std::next(Stack.back().first.rbegin());
-  auto It = StackElem.LCVMap.find(D);
-  if (It != StackElem.LCVMap.end())
+  auto It = Parent->LCVMap.find(D);
+  if (It != Parent->LCVMap.end())
     return It->second;
   return {0, nullptr};
 }
 
 const ValueDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) const {
-  assert(!isStackEmpty() && Stack.back().first.size() > 1 &&
-         "Data-sharing attributes stack is empty");
-  const SharingMapTy &StackElem = *std::next(Stack.back().first.rbegin());
-  if (StackElem.LCVMap.size() < I)
+  const SharingMapTy *Parent = getSecondOnStackOrNull();
+  assert(Parent && "Data-sharing attributes stack is empty");
+  if (Parent->LCVMap.size() < I)
     return nullptr;
-  for (const auto &Pair : StackElem.LCVMap)
+  for (const auto &Pair : Parent->LCVMap)
     if (Pair.second.first == I)
       return Pair.first;
   return nullptr;
@@ -973,8 +1009,7 @@ void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A,
     Data.RefExpr.setPointer(E);
     Data.PrivateCopy = nullptr;
   } else {
-    assert(!isStackEmpty() && "Data-sharing attributes stack is empty");
-    DSAInfo &Data = Stack.back().first.back().SharingMap[D];
+    DSAInfo &Data = getTopOfStack().SharingMap[D];
     assert(Data.Attributes == OMPC_unknown || (A == Data.Attributes) ||
            (A == OMPC_firstprivate && Data.Attributes == OMPC_lastprivate) ||
            (A == OMPC_lastprivate && Data.Attributes == OMPC_firstprivate) ||
@@ -989,8 +1024,7 @@ void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A,
     Data.RefExpr.setPointerAndInt(E, IsLastprivate);
     Data.PrivateCopy = PrivateCopy;
     if (PrivateCopy) {
-      DSAInfo &Data =
-          Stack.back().first.back().SharingMap[PrivateCopy->getDecl()];
+      DSAInfo &Data = getTopOfStack().SharingMap[PrivateCopy->getDecl()];
       Data.Attributes = A;
       Data.RefExpr.setPointerAndInt(PrivateCopy, IsLastprivate);
       Data.PrivateCopy = nullptr;
@@ -1035,16 +1069,16 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
   D = getCanonicalDecl(D);
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty");
   assert(
-      Stack.back().first.back().SharingMap[D].Attributes == OMPC_reduction &&
+      getTopOfStack().SharingMap[D].Attributes == OMPC_reduction &&
       "Additional reduction info may be specified only for reduction items.");
-  ReductionData &ReductionData = Stack.back().first.back().ReductionMap[D];
+  ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
   assert(ReductionData.ReductionRange.isInvalid() &&
-         Stack.back().first.back().Directive == OMPD_taskgroup &&
+         getTopOfStack().Directive == OMPD_taskgroup &&
          "Additional reduction info may be specified only once for reduction "
          "items.");
   ReductionData.set(BOK, SR);
   Expr *&TaskgroupReductionRef =
-      Stack.back().first.back().TaskgroupReductionRef;
+      getTopOfStack().TaskgroupReductionRef;
   if (!TaskgroupReductionRef) {
     VarDecl *VD = buildVarDecl(SemaRef, SR.getBegin(),
                                SemaRef.Context.VoidPtrTy, ".task_red.");
@@ -1058,16 +1092,16 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
   D = getCanonicalDecl(D);
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty");
   assert(
-      Stack.back().first.back().SharingMap[D].Attributes == OMPC_reduction &&
+      getTopOfStack().SharingMap[D].Attributes == OMPC_reduction &&
       "Additional reduction info may be specified only for reduction items.");
-  ReductionData &ReductionData = Stack.back().first.back().ReductionMap[D];
+  ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
   assert(ReductionData.ReductionRange.isInvalid() &&
-         Stack.back().first.back().Directive == OMPD_taskgroup &&
+         getTopOfStack().Directive == OMPD_taskgroup &&
          "Additional reduction info may be specified only once for reduction "
          "items.");
   ReductionData.set(ReductionRef, SR);
   Expr *&TaskgroupReductionRef =
-      Stack.back().first.back().TaskgroupReductionRef;
+      getTopOfStack().TaskgroupReductionRef;
   if (!TaskgroupReductionRef) {
     VarDecl *VD = buildVarDecl(SemaRef, SR.getBegin(),
                                SemaRef.Context.VoidPtrTy, ".task_red.");
@@ -1081,11 +1115,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
     Expr *&TaskgroupDescriptor) const {
   D = getCanonicalDecl(D);
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
-  if (Stack.back().first.empty())
-      return DSAVarData();
-  for (iterator I = std::next(Stack.back().first.rbegin(), 1),
-                E = Stack.back().first.rend();
-       I != E; std::advance(I, 1)) {
+  for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
     const DSAInfo &Data = I->SharingMap.lookup(D);
     if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
       continue;
@@ -1110,11 +1140,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
     Expr *&TaskgroupDescriptor) const {
   D = getCanonicalDecl(D);
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
-  if (Stack.back().first.empty())
-      return DSAVarData();
-  for (iterator I = std::next(Stack.back().first.rbegin(), 1),
-                E = Stack.back().first.rend();
-       I != E; std::advance(I, 1)) {
+  for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
     const DSAInfo &Data = I->SharingMap.lookup(D);
     if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
       continue;
@@ -1134,21 +1160,17 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
   return DSAVarData();
 }
 
-bool DSAStackTy::isOpenMPLocal(VarDecl *D, iterator Iter) const {
+bool DSAStackTy::isOpenMPLocal(VarDecl *D, const_iterator I) const {
   D = D->getCanonicalDecl();
-  if (!isStackEmpty()) {
-    iterator I = Iter, E = Stack.back().first.rend();
-    Scope *TopScope = nullptr;
-    while (I != E && !isImplicitOrExplicitTaskingRegion(I->Directive) &&
-           !isOpenMPTargetExecutionDirective(I->Directive))
-      ++I;
-    if (I == E)
-      return false;
-    TopScope = I->CurScope ? I->CurScope->getParent() : nullptr;
-    Scope *CurScope = getCurScope();
-    while (CurScope && CurScope != TopScope && !CurScope->isDeclScope(D))
-      CurScope = CurScope->getParent();
-    return CurScope != TopScope;
+  for (const_iterator E = end(); I != E; ++I) {
+    if (isImplicitOrExplicitTaskingRegion(I->Directive) ||
+        isOpenMPTargetExecutionDirective(I->Directive)) {
+      Scope *TopScope = I->CurScope ? I->CurScope->getParent() : nullptr;
+      Scope *CurScope = getCurScope();
+      while (CurScope && CurScope != TopScope && !CurScope->isDeclScope(D))
+        CurScope = CurScope->getParent();
+      return CurScope != TopScope;
+    }
   }
   return false;
 }
@@ -1236,15 +1258,14 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D,
   if (SemaRef.getLangOpts().OpenMPCUDAMode && VD &&
       VD->isLocalVarDeclOrParm() && !isStackEmpty() &&
       !isLoopControlVariable(D).first) {
-    iterator IterTarget =
-        std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(),
-                     [](const SharingMapTy &Data) {
-                       return isOpenMPTargetExecutionDirective(Data.Directive);
-                     });
-    if (IterTarget != Stack.back().first.rend()) {
-      iterator ParentIterTarget = std::next(IterTarget, 1);
-      for (iterator Iter = Stack.back().first.rbegin();
-           Iter != ParentIterTarget; std::advance(Iter, 1)) {
+    const_iterator IterTarget =
+        std::find_if(begin(), end(), [](const SharingMapTy &Data) {
+          return isOpenMPTargetExecutionDirective(Data.Directive);
+        });
+    if (IterTarget != end()) {
+      const_iterator ParentIterTarget = IterTarget + 1;
+      for (const_iterator Iter = begin();
+           Iter != ParentIterTarget; ++Iter) {
         if (isOpenMPLocal(VD, Iter)) {
           DVar.RefExpr =
               buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(),
@@ -1253,7 +1274,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D,
           return DVar;
         }
       }
-      if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) {
+      if (!isClauseParsingMode() || IterTarget != begin()) {
         auto DSAIter = IterTarget->SharingMap.find(D);
         if (DSAIter != IterTarget->SharingMap.end() &&
             isOpenMPPrivate(DSAIter->getSecond().Attributes)) {
@@ -1261,7 +1282,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D,
           DVar.CKind = OMPC_threadprivate;
           return DVar;
         }
-        iterator End = Stack.back().first.rend();
+        const_iterator End = end();
         if (!SemaRef.isOpenMPCapturedByRef(
                 D, std::distance(ParentIterTarget, End))) {
           DVar.RefExpr =
@@ -1321,10 +1342,10 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D,
 
   // Explicitly specified attributes and local variables with predetermined
   // attributes.
-  iterator I = Stack.back().first.rbegin();
-  iterator EndI = Stack.back().first.rend();
+  const_iterator I = begin();
+  const_iterator EndI = end();
   if (FromParent && I != EndI)
-    std::advance(I, 1);
+    ++I;
   auto It = I->SharingMap.find(D);
   if (It != I->SharingMap.end()) {
     const DSAInfo &Data = It->getSecond();
@@ -1341,14 +1362,14 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D,
 const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D,
                                                         bool FromParent) const {
   if (isStackEmpty()) {
-    iterator I;
+    const_iterator I;
     return getDSA(I, D);
   }
   D = getCanonicalDecl(D);
-  iterator StartI = Stack.back().first.rbegin();
-  iterator EndI = Stack.back().first.rend();
+  const_iterator StartI = begin();
+  const_iterator EndI = end();
   if (FromParent && StartI != EndI)
-    std::advance(StartI, 1);
+    ++StartI;
   return getDSA(StartI, D);
 }
 
@@ -1360,14 +1381,15 @@ DSAStackTy::hasDSA(ValueDecl *D,
   if (isStackEmpty())
     return {};
   D = getCanonicalDecl(D);
-  iterator I = Stack.back().first.rbegin();
-  iterator EndI = Stack.back().first.rend();
+  const_iterator I = begin();
+  const_iterator EndI = end();
   if (FromParent && I != EndI)
-    std::advance(I, 1);
-  for (; I != EndI; std::advance(I, 1)) {
-    if (!DPred(I->Directive) && !isImplicitOrExplicitTaskingRegion(I->Directive))
+    ++I;
+  for (; I != EndI; ++I) {
+    if (!DPred(I->Directive) &&
+        !isImplicitOrExplicitTaskingRegion(I->Directive))
       continue;
-    iterator NewI = I;
+    const_iterator NewI = I;
     DSAVarData DVar = getDSA(NewI, D);
     if (I == NewI && CPred(DVar.CKind))
       return DVar;
@@ -1382,13 +1404,13 @@ const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA(
   if (isStackEmpty())
     return {};
   D = getCanonicalDecl(D);
-  iterator StartI = Stack.back().first.rbegin();
-  iterator EndI = Stack.back().first.rend();
+  const_iterator StartI = begin();
+  const_iterator EndI = end();
   if (FromParent && StartI != EndI)
-    std::advance(StartI, 1);
+    ++StartI;
   if (StartI == EndI || !DPred(StartI->Directive))
     return {};
-  iterator NewI = StartI;
+  const_iterator NewI = StartI;
   DSAVarData DVar = getDSA(NewI, D);
   return (NewI == StartI && CPred(DVar.CKind)) ? DVar : DSAVarData();
 }
@@ -1396,23 +1418,19 @@ const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA(
 bool DSAStackTy::hasExplicitDSA(
     const ValueDecl *D, const llvm::function_ref<bool(OpenMPClauseKind)> CPred,
     unsigned Level, bool NotLastprivate) const {
-  if (isStackEmpty())
+  if (getStackSize() <= Level)
     return false;
   D = getCanonicalDecl(D);
-  auto StartI = Stack.back().first.begin();
-  auto EndI = Stack.back().first.end();
-  if (std::distance(StartI, EndI) <= (int)Level)
-    return false;
-  std::advance(StartI, Level);
-  auto I = StartI->SharingMap.find(D);
-  if ((I != StartI->SharingMap.end()) &&
-         I->getSecond().RefExpr.getPointer() &&
-         CPred(I->getSecond().Attributes) &&
-         (!NotLastprivate || !I->getSecond().RefExpr.getInt()))
+  const SharingMapTy &StackElem = getStackElemAtLevel(Level);
+  auto I = StackElem.SharingMap.find(D);
+  if (I != StackElem.SharingMap.end() &&
+      I->getSecond().RefExpr.getPointer() &&
+      CPred(I->getSecond().Attributes) &&
+      (!NotLastprivate || !I->getSecond().RefExpr.getInt()))
     return true;
   // Check predetermined rules for the loop control variables.
-  auto LI = StartI->LCVMap.find(D);
-  if (LI != StartI->LCVMap.end())
+  auto LI = StackElem.LCVMap.find(D);
+  if (LI != StackElem.LCVMap.end())
     return CPred(OMPC_private);
   return false;
 }
@@ -1420,14 +1438,10 @@ bool DSAStackTy::hasExplicitDSA(
 bool DSAStackTy::hasExplicitDirective(
     const llvm::function_ref<bool(OpenMPDirectiveKind)> DPred,
     unsigned Level) const {
-  if (isStackEmpty())
+  if (getStackSize() <= Level)
     return false;
-  auto StartI = Stack.back().first.begin();
-  auto EndI = Stack.back().first.end();
-  if (std::distance(StartI, EndI) <= (int)Level)
-    return false;
-  std::advance(StartI, Level);
-  return DPred(StartI->Directive);
+  const SharingMapTy &StackElem = getStackElemAtLevel(Level);
+  return DPred(StackElem.Directive);
 }
 
 bool DSAStackTy::hasDirective(
@@ -1436,13 +1450,9 @@ bool DSAStackTy::hasDirective(
         DPred,
     bool FromParent) const {
   // We look only in the enclosing region.
-  if (isStackEmpty())
-    return false;
-  auto StartI = std::next(Stack.back().first.rbegin());
-  auto EndI = Stack.back().first.rend();
-  if (FromParent && StartI != EndI)
-    StartI = std::next(StartI);
-  for (auto I = StartI, EE = EndI; I != EE; ++I) {
+  size_t Skip = FromParent ? 2 : 1;
+  for (const_iterator I = begin() + std::min(Skip, getStackSize()), E = end();
+       I != E; ++I) {
     if (DPred(I->Directive, I->DirectiveName, I->ConstructLoc))
       return true;
   }

From d556095135cf5d4b952b96e4fe7635791b67c2c8 Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Thu, 30 May 2019 23:30:35 +0000
Subject: [PATCH 0694/1176] Make ConnectionFileDescription work with all
 sockets

Summary:
My main goal here is to make lldb-server work with Android Studio.

This is currently not the case because lldb-server is started in platform mode listening on a domain socket. When Android Studio connects to it lldb-server crashes because even though it's listening on a domain socket as soon as it gets a connection it asserts that it's a TCP connection, which will obviously fails for any non-tcp connection.

To do this I came up with a new method called GetConnectURI() in Socket that returns the URI needed to connect to the connected portion of the socket.

Reviewers: labath, clayborg, xiaobai

Reviewed By: labath

Subscribers: mgorny, jfb, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62089

llvm-svn: 362173
---
 lldb/include/lldb/Host/Socket.h               |  3 +
 lldb/include/lldb/Host/common/TCPSocket.h     |  2 +
 lldb/include/lldb/Host/common/UDPSocket.h     |  2 +
 lldb/include/lldb/Host/posix/DomainSocket.h   |  3 +
 lldb/source/Host/common/TCPSocket.cpp         |  8 +++
 lldb/source/Host/common/UDPSocket.cpp         |  8 +++
 .../posix/ConnectionFileDescriptorPosix.cpp   |  8 +--
 lldb/source/Host/posix/DomainSocket.cpp       | 25 ++++++++
 .../GDBRemoteCommunicationServerPlatform.cpp  | 23 ++++---
 lldb/unittests/Host/SocketTest.cpp            | 62 +++++++++++++++++++
 10 files changed, 125 insertions(+), 19 deletions(-)

diff --git a/lldb/include/lldb/Host/Socket.h b/lldb/include/lldb/Host/Socket.h
index 0981808a7d022..6f96bd73e753e 100644
--- a/lldb/include/lldb/Host/Socket.h
+++ b/lldb/include/lldb/Host/Socket.h
@@ -102,6 +102,9 @@ class Socket : public IOObject {
                                 std::string &host_str, std::string &port_str,
                                 int32_t &port, Status *error_ptr);
 
+  // If this Socket is connected then return the URI used to connect.
+  virtual std::string GetRemoteConnectionURI() const { return ""; };
+
 protected:
   Socket(SocketProtocol protocol, bool should_close,
          bool m_child_process_inherit);
diff --git a/lldb/include/lldb/Host/common/TCPSocket.h b/lldb/include/lldb/Host/common/TCPSocket.h
index 526c39b3f361f..faf3bb693c325 100644
--- a/lldb/include/lldb/Host/common/TCPSocket.h
+++ b/lldb/include/lldb/Host/common/TCPSocket.h
@@ -46,6 +46,8 @@ class TCPSocket : public Socket {
 
   bool IsValid() const override;
 
+  std::string GetRemoteConnectionURI() const override;
+
 private:
   TCPSocket(NativeSocket socket, const TCPSocket &listen_socket);
 
diff --git a/lldb/include/lldb/Host/common/UDPSocket.h b/lldb/include/lldb/Host/common/UDPSocket.h
index 8ab6783da169e..b7b6db67d10d9 100644
--- a/lldb/include/lldb/Host/common/UDPSocket.h
+++ b/lldb/include/lldb/Host/common/UDPSocket.h
@@ -19,6 +19,8 @@ class UDPSocket : public Socket {
   static Status Connect(llvm::StringRef name, bool child_processes_inherit,
                         Socket *&socket);
 
+  std::string GetRemoteConnectionURI() const override;
+
 private:
   UDPSocket(NativeSocket socket);
 
diff --git a/lldb/include/lldb/Host/posix/DomainSocket.h b/lldb/include/lldb/Host/posix/DomainSocket.h
index 87db2dbddeac9..e407ce16bbcbb 100644
--- a/lldb/include/lldb/Host/posix/DomainSocket.h
+++ b/lldb/include/lldb/Host/posix/DomainSocket.h
@@ -20,11 +20,14 @@ class DomainSocket : public Socket {
   Status Listen(llvm::StringRef name, int backlog) override;
   Status Accept(Socket *&socket) override;
 
+  std::string GetRemoteConnectionURI() const override;
+
 protected:
   DomainSocket(SocketProtocol protocol, bool child_processes_inherit);
 
   virtual size_t GetNameOffset() const;
   virtual void DeleteSocketFile(llvm::StringRef name);
+  std::string GetSocketName() const;
 
 private:
   DomainSocket(NativeSocket socket, const DomainSocket &listen_socket);
diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp
index f97ca9eb55b86..201ddd352ddae 100644
--- a/lldb/source/Host/common/TCPSocket.cpp
+++ b/lldb/source/Host/common/TCPSocket.cpp
@@ -118,6 +118,14 @@ std::string TCPSocket::GetRemoteIPAddress() const {
   return "";
 }
 
+std::string TCPSocket::GetRemoteConnectionURI() const {
+  if (m_socket != kInvalidSocketValue) {
+    return llvm::formatv("connect://[{0}]:{1}", GetRemoteIPAddress(),
+                         GetRemotePortNumber());
+  }
+  return "";
+};
+
 Status TCPSocket::CreateSocket(int domain) {
   Status error;
   if (IsValid())
diff --git a/lldb/source/Host/common/UDPSocket.cpp b/lldb/source/Host/common/UDPSocket.cpp
index e1bcd0c18da32..8dbf57d6fe4ec 100644
--- a/lldb/source/Host/common/UDPSocket.cpp
+++ b/lldb/source/Host/common/UDPSocket.cpp
@@ -134,3 +134,11 @@ Status UDPSocket::Connect(llvm::StringRef name, bool child_processes_inherit,
   error.Clear();
   return error;
 }
+
+std::string UDPSocket::GetRemoteConnectionURI() const {
+  if (m_socket != kInvalidSocketValue) {
+    return llvm::formatv("udp://[{0}]:{1}", m_sockaddr.GetIPAddress(),
+                         m_sockaddr.GetPort());
+  }
+  return "";
+}
diff --git a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
index 237d11acb418f..067e85972ecac 100644
--- a/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
+++ b/lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp
@@ -758,13 +758,7 @@ void ConnectionFileDescriptor::SetChildProcessesInherit(
 }
 
 void ConnectionFileDescriptor::InitializeSocket(Socket *socket) {
-  assert(socket->GetSocketProtocol() == Socket::ProtocolTcp);
-  TCPSocket *tcp_socket = static_cast<TCPSocket *>(socket);
-
   m_write_sp.reset(socket);
   m_read_sp = m_write_sp;
-  StreamString strm;
-  strm.Printf("connect://[%s]:%u", tcp_socket->GetRemoteIPAddress().c_str(),
-              tcp_socket->GetRemotePortNumber());
-  m_uri = strm.GetString();
+  m_uri = socket->GetRemoteConnectionURI();
 }
diff --git a/lldb/source/Host/posix/DomainSocket.cpp b/lldb/source/Host/posix/DomainSocket.cpp
index c6366e44e79d8..f4b5f428742bb 100644
--- a/lldb/source/Host/posix/DomainSocket.cpp
+++ b/lldb/source/Host/posix/DomainSocket.cpp
@@ -125,3 +125,28 @@ size_t DomainSocket::GetNameOffset() const { return 0; }
 void DomainSocket::DeleteSocketFile(llvm::StringRef name) {
   llvm::sys::fs::remove(name);
 }
+
+std::string DomainSocket::GetSocketName() const {
+  if (m_socket != kInvalidSocketValue) {
+    struct sockaddr_un saddr_un;
+    saddr_un.sun_family = AF_UNIX;
+    socklen_t sock_addr_len = sizeof(struct sockaddr_un);
+    if (::getpeername(m_socket, (struct sockaddr *)&saddr_un, &sock_addr_len) ==
+        0)
+      return std::string(saddr_un.sun_path + GetNameOffset(),
+                         sock_addr_len -
+                             offsetof(struct sockaddr_un, sun_path) -
+                             GetNameOffset());
+  }
+  return "";
+}
+
+std::string DomainSocket::GetRemoteConnectionURI() const {
+  if (m_socket != kInvalidSocketValue) {
+    return llvm::formatv("{0}://{1}",
+                         GetNameOffset() == 0 ? "unix-connect"
+                                              : "unix-abstract-connect",
+                         GetSocketName());
+  }
+  return "";
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
index 7157f53fb836e..6deb75f2f0211 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
@@ -115,25 +115,24 @@ Status GDBRemoteCommunicationServerPlatform::LaunchGDBServer(
                 this, std::placeholders::_1),
       false);
 
-  llvm::StringRef platform_scheme;
-  llvm::StringRef platform_ip;
-  int platform_port;
-  llvm::StringRef platform_path;
-  std::string platform_uri = GetConnection()->GetURI();
-  bool ok = UriParser::Parse(platform_uri, platform_scheme, platform_ip,
-                             platform_port, platform_path);
-  UNUSED_IF_ASSERT_DISABLED(ok);
-  assert(ok);
-
   std::ostringstream url;
 // debugserver does not accept the URL scheme prefix.
 #if !defined(__APPLE__)
   url << m_socket_scheme << "://";
 #endif
   uint16_t *port_ptr = &port;
-  if (m_socket_protocol == Socket::ProtocolTcp)
+  if (m_socket_protocol == Socket::ProtocolTcp) {
+    llvm::StringRef platform_scheme;
+    llvm::StringRef platform_ip;
+    int platform_port;
+    llvm::StringRef platform_path;
+    std::string platform_uri = GetConnection()->GetURI();
+    bool ok = UriParser::Parse(platform_uri, platform_scheme, platform_ip,
+                               platform_port, platform_path);
+    UNUSED_IF_ASSERT_DISABLED(ok);
+    assert(ok);
     url << platform_ip.str() << ":" << port;
-  else {
+  } else {
     socket_name = GetDomainSocketPath("gdbserver").GetPath();
     url << socket_name;
     port_ptr = nullptr;
diff --git a/lldb/unittests/Host/SocketTest.cpp b/lldb/unittests/Host/SocketTest.cpp
index 26a8bd765c6ef..0d1851e5c0268 100644
--- a/lldb/unittests/Host/SocketTest.cpp
+++ b/lldb/unittests/Host/SocketTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SocketTestUtilities.h"
+#include "lldb/Utility/UriParser.h"
 #include "gtest/gtest.h"
 
 using namespace lldb_private;
@@ -147,3 +148,64 @@ TEST_F(SocketTest, TCPListen0GetPort) {
   EXPECT_TRUE(socket_up->IsValid());
   EXPECT_NE(socket_up->GetLocalPortNumber(), 0);
 }
+
+TEST_F(SocketTest, TCPGetConnectURI) {
+  std::unique_ptr<TCPSocket> socket_a_up;
+  std::unique_ptr<TCPSocket> socket_b_up;
+  if (!IsAddressFamilySupported("127.0.0.1")) {
+    GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support.";
+    return;
+  }
+  CreateTCPConnectedSockets("127.0.0.1", &socket_a_up, &socket_b_up);
+
+  llvm::StringRef scheme;
+  llvm::StringRef hostname;
+  int port;
+  llvm::StringRef path;
+  std::string uri(socket_a_up->GetRemoteConnectionURI());
+  EXPECT_TRUE(UriParser::Parse(uri, scheme, hostname, port, path));
+  EXPECT_EQ(scheme, "connect");
+  EXPECT_EQ(port, socket_a_up->GetRemotePortNumber());
+}
+
+TEST_F(SocketTest, UDPGetConnectURI) {
+  if (!IsAddressFamilySupported("127.0.0.1")) {
+    GTEST_LOG_(WARNING) << "Skipping test due to missing IPv4 support.";
+    return;
+  }
+  Socket *socket;
+  bool child_processes_inherit = false;
+  auto error =
+      UDPSocket::Connect("127.0.0.1:0", child_processes_inherit, socket);
+
+  llvm::StringRef scheme;
+  llvm::StringRef hostname;
+  int port;
+  llvm::StringRef path;
+  std::string uri(socket->GetRemoteConnectionURI());
+  EXPECT_TRUE(UriParser::Parse(uri, scheme, hostname, port, path));
+  EXPECT_EQ(scheme, "udp");
+}
+
+#ifndef LLDB_DISABLE_POSIX
+TEST_F(SocketTest, DomainGetConnectURI) {
+  llvm::SmallString<64> domain_path;
+  std::error_code EC =
+      llvm::sys::fs::createUniqueDirectory("DomainListenConnectAccept", domain_path);
+  ASSERT_FALSE(EC);
+  llvm::sys::path::append(domain_path, "test");
+
+  std::unique_ptr<DomainSocket> socket_a_up;
+  std::unique_ptr<DomainSocket> socket_b_up;
+  CreateDomainConnectedSockets(domain_path, &socket_a_up, &socket_b_up);
+
+  llvm::StringRef scheme;
+  llvm::StringRef hostname;
+  int port;
+  llvm::StringRef path;
+  std::string uri(socket_a_up->GetRemoteConnectionURI());
+  EXPECT_TRUE(UriParser::Parse(uri, scheme, hostname, port, path));
+  EXPECT_EQ(scheme, "unix-connect");
+  EXPECT_EQ(path, domain_path);
+}
+#endif
\ No newline at end of file

From 18659f84b2c284085768bfbf73bc82185707d2f4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 30 May 2019 23:31:36 +0000
Subject: [PATCH 0695/1176] MISched: Fix -misched-regpressure=0 if subreg
 liveness enabled

Test is waiting on fixing several more crashes in the AMDGPU scheduler
implementation with this.

llvm-svn: 362174
---
 llvm/lib/CodeGen/MachineScheduler.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 0e7974133e488..92c051d5c467e 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2761,8 +2761,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
 
   // After subtarget overrides, apply command line options.
-  if (!EnableRegPressure)
+  if (!EnableRegPressure) {
     RegionPolicy.ShouldTrackPressure = false;
+    RegionPolicy.ShouldTrackLaneMasks = false;
+  }
 
   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
   // e.g. -misched-bottomup=false allows scheduling in both directions.

From 70dc2200a2de2f69d95777e1c33e2fbb28d998f2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 30 May 2019 23:35:24 +0000
Subject: [PATCH 0696/1176] [X86] Remove result type constraints from the
 extloadv2f32/extloadv4f32/extloadv8f32 PatFrags. NFC

The result types aren't mentioned in the pattern name so really shouldn't be in the PatFrags.

The users of these either have their own type constraint or rely on the type constranit system to realize the only legal extend would be to f64.

llvm-svn: 362175
---
 llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index c7f1021b87bd4..8a6f09f5cacf5 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -746,9 +746,9 @@ def loadv32i16   : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
 def loadv64i8    : PatFrag<(ops node:$ptr), (v64i8  (load node:$ptr))>;
 
 // 128-/256-/512-bit extload pattern fragments
-def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
-def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
-def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv8f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
 
 // Like 'store', but always requires vector size alignment.
 def alignedstore : PatFrag<(ops node:$val, node:$ptr),

From b2f45ac299509c46ed5b41c263a1e834c49bed29 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Thu, 30 May 2019 23:54:43 +0000
Subject: [PATCH 0697/1176] [clangd] clang-format SymbolCollector.cpp

llvm-svn: 362176
---
 clang-tools-extra/clangd/index/SymbolCollector.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index af1938dafa8ab..507d0ea6ba64d 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -416,8 +416,7 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name,
   return true;
 }
 
-void SymbolCollector::setIncludeLocation(const Symbol &S,
-                                         SourceLocation Loc) {
+void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
   if (Opts.CollectIncludePath)
     if (shouldCollectIncludePath(S.SymInfo.Kind))
       // Use the expansion location to get the #include header since this is
@@ -681,7 +680,7 @@ static bool isErrorAboutInclude(llvm::StringRef Line) {
   if (!Line.consume_front("#"))
     return false;
   Line = Line.ltrim();
-  if (! Line.startswith("error"))
+  if (!Line.startswith("error"))
     return false;
   return Line.contains_lower("includ"); // Matches "include" or "including".
 }
@@ -689,7 +688,7 @@ static bool isErrorAboutInclude(llvm::StringRef Line) {
 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
   llvm::StringRef Line;
   // Only sniff up to 100 lines or 10KB.
-  Content = Content.take_front(100*100);
+  Content = Content.take_front(100 * 100);
   for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
     std::tie(Line, Content) = Content.split('\n');
     if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))

From 4a585a3edd1c96f19b819d44df25c376e4987a44 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Fri, 31 May 2019 00:18:42 +0000
Subject: [PATCH 0698/1176] Make CPlusPlusNameParser robust against nullptr
 StringRefs.

There is likely also an underlying bug in all code that calls
CPlusPlusNameParser with nullptrs, but this patch can also stand for
itself.

rdar://problem/49072829

llvm-svn: 362177
---
 .../Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp      | 2 ++
 lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
index a098f129a1d68..932db17b964a5 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
@@ -640,6 +640,8 @@ static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
 }
 
 void CPlusPlusNameParser::ExtractTokens() {
+  if (m_text.empty())
+    return;
   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
                      m_text.data(), m_text.data() + m_text.size());
   const auto &kw_map = GetKeywordsMap();
diff --git a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
index 3047bea6bcecb..150bef1590f4a 100644
--- a/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
+++ b/lldb/unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
+#include "Plugins/Language/CPlusPlus/CPlusPlusNameParser.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -192,3 +193,8 @@ TEST(CPlusPlusLanguage, FindAlternateFunctionManglings) {
   EXPECT_THAT(FindAlternate("_ZN1A1fEai"), Contains("_ZN1A1fEci"));
   EXPECT_THAT(FindAlternate("_bogus"), IsEmpty());
 }
+
+TEST(CPlusPlusLanguage, CPlusPlusNameParser) {
+  // Don't crash.
+  CPlusPlusNameParser((const char *)nullptr);
+}

From 2fdd95c1c823ebafc9286f7b730339c38636c5a8 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 31 May 2019 00:45:09 +0000
Subject: [PATCH 0699/1176] Defer capture initialization for blocks until after
 we've left the function scope.

This removes one of the last few cases where we build expressions in the
wrong function scope context. No functionality change intended.

llvm-svn: 362178
---
 .../clang/Sema/AnalysisBasedWarnings.h        |   2 +-
 clang/include/clang/Sema/ScopeInfo.h          |   2 +
 clang/include/clang/Sema/Sema.h               |  20 ++-
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |   9 +-
 clang/lib/Sema/Sema.cpp                       |  47 ++++---
 clang/lib/Sema/SemaExpr.cpp                   | 130 ++++++++++--------
 clang/test/Analysis/blocks.mm                 |   7 +-
 7 files changed, 130 insertions(+), 87 deletions(-)

diff --git a/clang/include/clang/Sema/AnalysisBasedWarnings.h b/clang/include/clang/Sema/AnalysisBasedWarnings.h
index d5df5364edb12..e13fe955eaf48 100644
--- a/clang/include/clang/Sema/AnalysisBasedWarnings.h
+++ b/clang/include/clang/Sema/AnalysisBasedWarnings.h
@@ -90,7 +90,7 @@ class AnalysisBasedWarnings {
   AnalysisBasedWarnings(Sema &s);
 
   void IssueWarnings(Policy P, FunctionScopeInfo *fscope,
-                     const Decl *D, const BlockExpr *blkExpr);
+                     const Decl *D, QualType BlockType);
 
   Policy getDefaultPolicy() { return DefaultPolicy; }
 
diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 375d93111adb4..7b6e0118f3805 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -487,6 +487,8 @@ class FunctionScopeInfo {
   /// Clear out the information in this function scope, making it
   /// suitable for reuse.
   void Clear();
+
+  bool isPlainFunction() const { return Kind == SK_Function; }
 };
 
 class Capture {
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d7486ec1c2617..7ec9f4737b212 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -595,7 +595,7 @@ class Sema {
   using MaybeODRUseExprSet = llvm::SmallPtrSet<Expr *, 2>;
   MaybeODRUseExprSet MaybeODRUseExprs;
 
-  std::unique_ptr<sema::FunctionScopeInfo> PreallocatedFunctionScope;
+  std::unique_ptr<sema::FunctionScopeInfo> CachedFunctionScope;
 
   /// Stack containing information about each of the nested
   /// function, block, and method scopes that are currently active.
@@ -1408,10 +1408,24 @@ class Sema {
   void PushCapturedRegionScope(Scope *RegionScope, CapturedDecl *CD,
                                RecordDecl *RD,
                                CapturedRegionKind K);
-  void
+
+  /// Custom deleter to allow FunctionScopeInfos to be kept alive for a short
+  /// time after they've been popped.
+  class PoppedFunctionScopeDeleter {
+    Sema *Self;
+
+  public:
+    explicit PoppedFunctionScopeDeleter(Sema *Self) : Self(Self) {}
+    void operator()(sema::FunctionScopeInfo *Scope) const;
+  };
+
+  using PoppedFunctionScopePtr =
+      std::unique_ptr<sema::FunctionScopeInfo, PoppedFunctionScopeDeleter>;
+
+  PoppedFunctionScopePtr
   PopFunctionScopeInfo(const sema::AnalysisBasedWarnings::Policy *WP = nullptr,
                        const Decl *D = nullptr,
-                       const BlockExpr *blkExpr = nullptr);
+                       QualType BlockType = QualType());
 
   sema::FunctionScopeInfo *getCurFunction() const {
     return FunctionScopes.empty() ? nullptr : FunctionScopes.back();
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index bac407b832e15..ce01909f18589 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -620,7 +620,7 @@ struct CheckFallThroughDiagnostics {
 /// of a noreturn function.  We assume that functions and blocks not marked
 /// noreturn will return.
 static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
-                                    const BlockExpr *blkExpr,
+                                    QualType BlockType,
                                     const CheckFallThroughDiagnostics &CD,
                                     AnalysisDeclContext &AC,
                                     sema::FunctionScopeInfo *FSI) {
@@ -641,9 +641,8 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body,
     HasNoReturn = MD->hasAttr<NoReturnAttr>();
   }
   else if (isa<BlockDecl>(D)) {
-    QualType BlockTy = blkExpr->getType();
     if (const FunctionType *FT =
-          BlockTy->getPointeeType()->getAs<FunctionType>()) {
+          BlockType->getPointeeType()->getAs<FunctionType>()) {
       if (FT->getReturnType()->isVoidType())
         ReturnsVoid = true;
       if (FT->getNoReturnAttr())
@@ -2012,7 +2011,7 @@ static void flushDiagnostics(Sema &S, const sema::FunctionScopeInfo *fscope) {
 void clang::sema::
 AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
                                      sema::FunctionScopeInfo *fscope,
-                                     const Decl *D, const BlockExpr *blkExpr) {
+                                     const Decl *D, QualType BlockType) {
 
   // We avoid doing analysis-based warnings when there are errors for
   // two reasons:
@@ -2138,7 +2137,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
                    : (fscope->isCoroutine()
                           ? CheckFallThroughDiagnostics::MakeForCoroutine(D)
                           : CheckFallThroughDiagnostics::MakeForFunction(D)));
-    CheckFallThroughForBody(S, D, Body, blkExpr, CD, AC, fscope);
+    CheckFallThroughForBody(S, D, Body, BlockType, CD, AC, fscope);
   }
 
   // Warning: check for unreachable code
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 71b2f47ee51bf..1a8948b94dc86 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -176,8 +176,6 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
       ExpressionEvaluationContext::PotentiallyEvaluated, 0, CleanupInfo{},
       nullptr, ExpressionEvaluationContextRecord::EK_Other);
 
-  PreallocatedFunctionScope.reset(new FunctionScopeInfo(Diags));
-
   // Initialization of data sharing attributes stack for OpenMP
   InitDataSharingAttributesStack();
 
@@ -354,8 +352,7 @@ Sema::~Sema() {
 
   // Kill all the active scopes.
   for (sema::FunctionScopeInfo *FSI : FunctionScopes)
-    if (FSI != PreallocatedFunctionScope.get())
-      delete FSI;
+    delete FSI;
 
   // Tell the SemaConsumer to forget about us; we're going out of scope.
   if (SemaConsumer *SC = dyn_cast<SemaConsumer>(&Consumer))
@@ -1596,10 +1593,10 @@ Scope *Sema::getScopeForContext(DeclContext *Ctx) {
 
 /// Enter a new function scope
 void Sema::PushFunctionScope() {
-  if (FunctionScopes.empty()) {
-    // Use PreallocatedFunctionScope to avoid allocating memory when possible.
-    PreallocatedFunctionScope->Clear();
-    FunctionScopes.push_back(PreallocatedFunctionScope.get());
+  if (FunctionScopes.empty() && CachedFunctionScope) {
+    // Use CachedFunctionScope to avoid allocating memory when possible.
+    CachedFunctionScope->Clear();
+    FunctionScopes.push_back(CachedFunctionScope.release());
   } else {
     FunctionScopes.push_back(new FunctionScopeInfo(getDiagnostics()));
   }
@@ -1680,30 +1677,42 @@ static void markEscapingByrefs(const FunctionScopeInfo &FSI, Sema &S) {
   }
 }
 
-void Sema::PopFunctionScopeInfo(const AnalysisBasedWarnings::Policy *WP,
-                                const Decl *D, const BlockExpr *blkExpr) {
+/// Pop a function (or block or lambda or captured region) scope from the stack.
+///
+/// \param WP The warning policy to use for CFG-based warnings, or null if such
+///        warnings should not be produced.
+/// \param D The declaration corresponding to this function scope, if producing
+///        CFG-based warnings.
+/// \param BlockType The type of the block expression, if D is a BlockDecl.
+Sema::PoppedFunctionScopePtr
+Sema::PopFunctionScopeInfo(const AnalysisBasedWarnings::Policy *WP,
+                           const Decl *D, QualType BlockType) {
   assert(!FunctionScopes.empty() && "mismatched push/pop!");
 
-  // This function shouldn't be called after popping the current function scope.
-  // markEscapingByrefs calls PerformMoveOrCopyInitialization, which can call
-  // PushFunctionScope, which can cause clearing out PreallocatedFunctionScope
-  // when FunctionScopes is empty.
   markEscapingByrefs(*FunctionScopes.back(), *this);
 
-  FunctionScopeInfo *Scope = FunctionScopes.pop_back_val();
+  PoppedFunctionScopePtr Scope(FunctionScopes.pop_back_val(),
+                               PoppedFunctionScopeDeleter(this));
 
   if (LangOpts.OpenMP)
-    popOpenMPFunctionRegion(Scope);
+    popOpenMPFunctionRegion(Scope.get());
 
   // Issue any analysis-based warnings.
   if (WP && D)
-    AnalysisWarnings.IssueWarnings(*WP, Scope, D, blkExpr);
+    AnalysisWarnings.IssueWarnings(*WP, Scope.get(), D, BlockType);
   else
     for (const auto &PUD : Scope->PossiblyUnreachableDiags)
       Diag(PUD.Loc, PUD.PD);
 
-  // Delete the scope unless its our preallocated scope.
-  if (Scope != PreallocatedFunctionScope.get())
+  return Scope;
+}
+
+void Sema::PoppedFunctionScopeDeleter::
+operator()(sema::FunctionScopeInfo *Scope) const {
+  // Stash the function scope for later reuse if it's for a normal function.
+  if (Scope->isPlainFunction() && !Self->CachedFunctionScope)
+    Self->CachedFunctionScope.reset(Scope);
+  else
     delete Scope;
 }
 
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 762ab673fa87c..7a86e885bd736 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -13823,8 +13823,6 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   if (BSI->HasImplicitReturnType)
     deduceClosureReturnType(*BSI);
 
-  PopDeclContext();
-
   QualType RetTy = Context.VoidTy;
   if (!BSI->ReturnType.isNull())
     RetTy = BSI->ReturnType;
@@ -13832,17 +13830,6 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   bool NoReturn = BD->hasAttr<NoReturnAttr>();
   QualType BlockTy;
 
-  // Set the captured variables on the block.
-  SmallVector<BlockDecl::Capture, 4> Captures;
-  for (Capture &Cap : BSI->Captures) {
-    if (Cap.isInvalid() || Cap.isThisCapture())
-      continue;
-    BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(),
-                              Cap.isNested(), Cap.getInitExpr());
-    Captures.push_back(NewCap);
-  }
-  BD->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0);
-
   // If the user wrote a function type in some form, try to use that.
   if (!BSI->FunctionType.isNull()) {
     const FunctionType *FTy = BSI->FunctionType->getAs<FunctionType>();
@@ -13898,9 +13885,80 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
       !BD->isDependentContext())
     computeNRVO(Body, BSI);
 
-  BlockExpr *Result = new (Context) BlockExpr(BD, BlockTy);
+  PopDeclContext();
+
+  // Pop the block scope now but keep it alive to the end of this function.
   AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
-  PopFunctionScopeInfo(&WP, Result->getBlockDecl(), Result);
+  PoppedFunctionScopePtr ScopeRAII = PopFunctionScopeInfo(&WP, BD, BlockTy);
+
+  // Set the captured variables on the block.
+  SmallVector<BlockDecl::Capture, 4> Captures;
+  for (Capture &Cap : BSI->Captures) {
+    if (Cap.isInvalid() || Cap.isThisCapture())
+      continue;
+
+    VarDecl *Var = Cap.getVariable();
+    Expr *CopyExpr = nullptr;
+    if (getLangOpts().CPlusPlus && Cap.isCopyCapture()) {
+      if (const RecordType *Record =
+              Cap.getCaptureType()->getAs<RecordType>()) {
+        // The capture logic needs the destructor, so make sure we mark it.
+        // Usually this is unnecessary because most local variables have
+        // their destructors marked at declaration time, but parameters are
+        // an exception because it's technically only the call site that
+        // actually requires the destructor.
+        if (isa<ParmVarDecl>(Var))
+          FinalizeVarWithDestructor(Var, Record);
+
+        // Enter a separate potentially-evaluated context while building block
+        // initializers to isolate their cleanups from those of the block
+        // itself.
+        // FIXME: Is this appropriate even when the block itself occurs in an
+        // unevaluated operand?
+        EnterExpressionEvaluationContext EvalContext(
+            *this, ExpressionEvaluationContext::PotentiallyEvaluated);
+
+        SourceLocation Loc = Cap.getLocation();
+
+        ExprResult Result = BuildDeclarationNameExpr(
+            CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
+
+        // According to the blocks spec, the capture of a variable from
+        // the stack requires a const copy constructor.  This is not true
+        // of the copy/move done to move a __block variable to the heap.
+        if (!Result.isInvalid() &&
+            !Result.get()->getType().isConstQualified()) {
+          Result = ImpCastExprToType(Result.get(),
+                                     Result.get()->getType().withConst(),
+                                     CK_NoOp, VK_LValue);
+        }
+
+        if (!Result.isInvalid()) {
+          Result = PerformCopyInitialization(
+              InitializedEntity::InitializeBlock(Var->getLocation(),
+                                                 Cap.getCaptureType(), false),
+              Loc, Result.get());
+        }
+
+        // Build a full-expression copy expression if initialization
+        // succeeded and used a non-trivial constructor.  Recover from
+        // errors by pretending that the copy isn't necessary.
+        if (!Result.isInvalid() &&
+            !cast<CXXConstructExpr>(Result.get())->getConstructor()
+                ->isTrivial()) {
+          Result = MaybeCreateExprWithCleanups(Result);
+          CopyExpr = Result.get();
+        }
+      }
+    }
+
+    BlockDecl::Capture NewCap(Var, Cap.isBlockCapture(), Cap.isNested(),
+                              CopyExpr);
+    Captures.push_back(NewCap);
+  }
+  BD->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0);
+
+  BlockExpr *Result = new (Context) BlockExpr(BD, BlockTy);
 
   // If the block isn't obviously global, i.e. it captures anything at
   // all, then we need to do a few things in the surrounding context:
@@ -15192,7 +15250,6 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
                                  QualType &DeclRefType,
                                  const bool Nested,
                                  Sema &S, bool Invalid) {
-  Expr *CopyExpr = nullptr;
   bool ByRef = false;
 
   // Blocks are not allowed to capture arrays, excepting OpenCL.
@@ -15264,51 +15321,12 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
     // Block capture by copy introduces 'const'.
     CaptureType = CaptureType.getNonReferenceType().withConst();
     DeclRefType = CaptureType;
-
-    if (S.getLangOpts().CPlusPlus && BuildAndDiagnose) {
-      if (const RecordType *Record = DeclRefType->getAs<RecordType>()) {
-        // The capture logic needs the destructor, so make sure we mark it.
-        // Usually this is unnecessary because most local variables have
-        // their destructors marked at declaration time, but parameters are
-        // an exception because it's technically only the call site that
-        // actually requires the destructor.
-        if (isa<ParmVarDecl>(Var))
-          S.FinalizeVarWithDestructor(Var, Record);
-
-        // Enter a new evaluation context to insulate the copy
-        // full-expression.
-        EnterExpressionEvaluationContext scope(
-            S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
-
-        // According to the blocks spec, the capture of a variable from
-        // the stack requires a const copy constructor.  This is not true
-        // of the copy/move done to move a __block variable to the heap.
-        Expr *DeclRef = new (S.Context) DeclRefExpr(
-            S.Context, Var, Nested, DeclRefType.withConst(), VK_LValue, Loc);
-
-        ExprResult Result
-          = S.PerformCopyInitialization(
-              InitializedEntity::InitializeBlock(Var->getLocation(),
-                                                  CaptureType, false),
-              Loc, DeclRef);
-
-        // Build a full-expression copy expression if initialization
-        // succeeded and used a non-trivial constructor.  Recover from
-        // errors by pretending that the copy isn't necessary.
-        if (!Result.isInvalid() &&
-            !cast<CXXConstructExpr>(Result.get())->getConstructor()
-                ->isTrivial()) {
-          Result = S.MaybeCreateExprWithCleanups(Result);
-          CopyExpr = Result.get();
-        }
-      }
-    }
   }
 
   // Actually capture the variable.
   if (BuildAndDiagnose)
     BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(),
-                    CaptureType, CopyExpr, Invalid);
+                    CaptureType, nullptr, Invalid);
 
   return !Invalid;
 }
diff --git a/clang/test/Analysis/blocks.mm b/clang/test/Analysis/blocks.mm
index 8a3f1708514e9..97c531e0c5dce 100644
--- a/clang/test/Analysis/blocks.mm
+++ b/clang/test/Analysis/blocks.mm
@@ -52,9 +52,10 @@ void testBlockWithCopyExpression(StructWithCopyConstructor s) {
 
 // CHECK: [B1]
 // CHECK-NEXT:   1: s
-// CHECK-NEXT:   2: [B1.1] (CXXConstructExpr, const struct StructWithCopyConstructor)
-// CHECK-NEXT:   3: ^{ }
-// CHECK-NEXT:   4: (void)([B1.3]) (CStyleCastExpr, ToVoid, void)
+// CHECK-NEXT:   2: [B1.1] (ImplicitCastExpr, NoOp, const struct StructWithCopyConstructor)
+// CHECK-NEXT:   3: [B1.2] (CXXConstructExpr, const struct StructWithCopyConstructor)
+// CHECK-NEXT:   4: ^{ }
+// CHECK-NEXT:   5: (void)([B1.4]) (CStyleCastExpr, ToVoid, void)
 // CHECK-NEXT:   Preds (1): B2
 // CHECK-NEXT:   Succs (1): B0
 

From 0621a8f353631c72e497dba01790b39f83943679 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 31 May 2019 00:45:10 +0000
Subject: [PATCH 0700/1176] Defer capture initialization for captured regions
 until after we've left the captured region scope.

This removes a case where we would build expressions (and mark
declarations odr-used) in the wrong scope.

Remove the now-unused 'capture initializer' field on sema::Capture
(except for 'this' captures, which still need to be cleaned up).

No functionality change intended (except that we now very slightly more
precisely determine whether we need to use a capture or not when another
captured region encloses an OpenMP captured region).

llvm-svn: 362179
---
 clang/include/clang/Sema/ScopeInfo.h          | 15 ++--
 clang/lib/Sema/ScopeInfo.cpp                  | 20 +++--
 clang/lib/Sema/SemaDecl.cpp                   |  2 +-
 clang/lib/Sema/SemaExpr.cpp                   | 19 +----
 clang/lib/Sema/SemaLambda.cpp                 | 29 +++----
 clang/lib/Sema/SemaOpenMP.cpp                 | 80 ++++++++++++++++---
 clang/lib/Sema/SemaStmt.cpp                   | 46 +++++++----
 clang/test/OpenMP/for_lastprivate_codegen.cpp |  5 +-
 8 files changed, 145 insertions(+), 71 deletions(-)

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 7b6e0118f3805..215025c62d29f 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -557,8 +557,8 @@ class Capture {
 public:
   Capture(VarDecl *Var, bool Block, bool ByRef, bool IsNested,
           SourceLocation Loc, SourceLocation EllipsisLoc, QualType CaptureType,
-          Expr *Cpy, bool Invalid)
-      : CapturedVar(Var), InitExpr(Cpy), Loc(Loc), EllipsisLoc(EllipsisLoc),
+          bool Invalid)
+      : CapturedVar(Var), Loc(Loc), EllipsisLoc(EllipsisLoc),
         CaptureType(CaptureType),
         Kind(Block ? Cap_Block : ByRef ? Cap_ByRef : Cap_ByCopy),
         Nested(IsNested), CapturesThis(false), ODRUsed(false),
@@ -593,6 +593,9 @@ class Capture {
 
   bool isInvalid() const { return Invalid; }
 
+  /// Determine whether this capture is an init-capture.
+  bool isInitCapture() const;
+
   bool isODRUsed() const { return ODRUsed; }
   bool isNonODRUsed() const { return NonODRUsed; }
   void markUsed(bool IsODRUse) {
@@ -624,8 +627,8 @@ class Capture {
   /// that would store this capture.
   QualType getCaptureType() const { return CaptureType; }
 
-  Expr *getInitExpr() const {
-    assert(!isVLATypeCapture() && "no init expression for type capture");
+  Expr *getThisInitExpr() const {
+    assert(isThisCapture() && "no 'this' init expression for non-this capture");
     return InitExpr;
   }
 };
@@ -665,9 +668,9 @@ class CapturingScopeInfo : public FunctionScopeInfo {
 
   void addCapture(VarDecl *Var, bool isBlock, bool isByref, bool isNested,
                   SourceLocation Loc, SourceLocation EllipsisLoc,
-                  QualType CaptureType, Expr *Cpy, bool Invalid) {
+                  QualType CaptureType, bool Invalid) {
     Captures.push_back(Capture(Var, isBlock, isByref, isNested, Loc,
-                               EllipsisLoc, CaptureType, Cpy, Invalid));
+                               EllipsisLoc, CaptureType, Invalid));
     CaptureMap[Var] = Captures.size();
   }
 
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index dd309a2811850..e84e592a4827e 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -112,13 +112,6 @@ FunctionScopeInfo::WeakObjectProfileTy::getBaseInfo(const Expr *E) {
   return BaseInfoTy(D, IsExact);
 }
 
-bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
-  for (auto &Cap : Captures)
-    if (Cap.isVLATypeCapture() && Cap.getCapturedVLAType() == VAT)
-      return true;
-  return false;
-}
-
 FunctionScopeInfo::WeakObjectProfileTy::WeakObjectProfileTy(
                                           const ObjCPropertyRefExpr *PropE)
     : Base(nullptr, true), Property(getBestPropertyDecl(PropE)) {
@@ -223,6 +216,19 @@ void FunctionScopeInfo::markSafeWeakUse(const Expr *E) {
   ThisUse->markSafe();
 }
 
+bool Capture::isInitCapture() const {
+  // Note that a nested capture of an init-capture is not itself an
+  // init-capture.
+  return !isNested() && isVariableCapture() && getVariable()->isInitCapture();
+}
+
+bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
+  for (auto &Cap : Captures)
+    if (Cap.isVLATypeCapture() && Cap.getCapturedVLAType() == VAT)
+      return true;
+  return false;
+}
+
 void LambdaScopeInfo::getPotentialVariableCapture(unsigned Idx, VarDecl *&VD,
                                                   Expr *&E) const {
   assert(Idx < getNumPotentialVariableCaptures() &&
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index fbc410f014d97..759eb531c50fe 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12939,7 +12939,7 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
           /*RefersToEnclosingVariableOrCapture*/true, C.getLocation(),
           /*EllipsisLoc*/C.isPackExpansion()
                          ? C.getEllipsisLoc() : SourceLocation(),
-          CaptureType, /*Expr*/ nullptr, /*Invalid*/false);
+          CaptureType, /*Invalid*/false);
 
     } else if (C.capturesThis()) {
       LSI->addThisCapture(/*Nested*/ false, C.getLocation(), I->getType(),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 7a86e885bd736..5746a102b7124 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -15326,7 +15326,7 @@ static bool captureInBlock(BlockScopeInfo *BSI, VarDecl *Var,
   // Actually capture the variable.
   if (BuildAndDiagnose)
     BSI->addCapture(Var, HasBlocksAttr, ByRef, Nested, Loc, SourceLocation(),
-                    CaptureType, nullptr, Invalid);
+                    CaptureType, Invalid);
 
   return !Invalid;
 }
@@ -15360,22 +15360,10 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
   else
     CaptureType = DeclRefType;
 
-  Expr *CopyExpr = nullptr;
-  if (BuildAndDiagnose) {
-    // The current implementation assumes that all variables are captured
-    // by references. Since there is no capture by copy, no expression
-    // evaluation will be needed.
-    CopyExpr = new (S.Context) DeclRefExpr(
-        S.Context, Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc);
-    Var->setReferenced(true);
-    Var->markUsed(S.Context);
-  }
-
   // Actually capture the variable.
   if (BuildAndDiagnose)
     RSI->addCapture(Var, /*isBlock*/ false, ByRef, RefersToCapturedVariable,
-                    Loc, SourceLocation(), CaptureType, CopyExpr,
-                    Invalid);
+                    Loc, SourceLocation(), CaptureType, Invalid);
 
   return !Invalid;
 }
@@ -15474,8 +15462,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
   // Add the capture.
   if (BuildAndDiagnose)
     LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable,
-                    Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr,
-                    Invalid);
+                    Loc, EllipsisLoc, CaptureType, Invalid);
 
   return !Invalid;
 }
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 6d487cc832513..a17a3da67fdec 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -844,9 +844,10 @@ VarDecl *Sema::createLambdaInitCaptureVarDecl(SourceLocation Loc,
 }
 
 void Sema::addInitCapture(LambdaScopeInfo *LSI, VarDecl *Var) {
+  assert(Var->isInitCapture() && "init capture flag should be set");
   LSI->addCapture(Var, /*isBlock*/false, Var->getType()->isReferenceType(),
                   /*isNested*/false, Var->getLocation(), SourceLocation(),
-                  Var->getType(), Var->getInit(), /*Invalid*/false);
+                  Var->getType(), /*Invalid*/false);
 }
 
 void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
@@ -1488,8 +1489,8 @@ mapImplicitCaptureStyle(CapturingScopeInfo::ImplicitCaptureStyle ICS) {
 }
 
 bool Sema::CaptureHasSideEffects(const Capture &From) {
-  if (!From.isVLATypeCapture()) {
-    Expr *Init = From.getInitExpr();
+  if (From.isInitCapture()) {
+    Expr *Init = From.getVariable()->getInit();
     if (Init && Init->HasSideEffects(Context))
       return true;
   }
@@ -1637,7 +1638,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       if (!CurContext->isDependentContext() && !IsImplicit && !From.isODRUsed()) {
         // Initialized captures that are non-ODR used may not be eliminated.
         bool NonODRUsedInitCapture =
-            IsGenericLambda && From.isNonODRUsed() && From.getInitExpr();
+            IsGenericLambda && From.isNonODRUsed() && From.isInitCapture();
         if (!NonODRUsedInitCapture) {
           bool IsLast = (I + 1) == LSI->NumExplicitCaptures;
           SourceRange FixItRange;
@@ -1682,7 +1683,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
         Captures.push_back(
             LambdaCapture(From.getLocation(), IsImplicit,
                           From.isCopyCapture() ? LCK_StarThis : LCK_This));
-        CaptureInits.push_back(From.getInitExpr());
+        CaptureInits.push_back(From.getThisInitExpr());
         continue;
       }
       if (From.isVLATypeCapture()) {
@@ -1696,15 +1697,15 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
       LambdaCaptureKind Kind = From.isCopyCapture() ? LCK_ByCopy : LCK_ByRef;
       Captures.push_back(LambdaCapture(From.getLocation(), IsImplicit, Kind,
                                        Var, From.getEllipsisLoc()));
-      Expr *Init = From.getInitExpr();
-      if (!Init) {
-        auto InitResult = performLambdaVarCaptureInitialization(
-            *this, From, Field, CaptureDefaultLoc, IsImplicit);
-        if (InitResult.isInvalid())
-          return ExprError();
-        Init = InitResult.get();
-      }
-      CaptureInits.push_back(Init);
+
+      ExprResult Init =
+          From.isInitCapture()
+              ? Var->getInit()
+              : performLambdaVarCaptureInitialization(
+                    *this, From, Field, CaptureDefaultLoc, IsImplicit);
+      if (Init.isInvalid())
+        return ExprError();
+      CaptureInits.push_back(Init.get());
     }
 
     // C++11 [expr.prim.lambda]p6:
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 5a6b49961f2e2..7e75a98070878 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -143,6 +143,7 @@ class DSAStackTy {
     bool NowaitRegion = false;
     bool CancelRegion = false;
     bool LoopStart = false;
+    bool BodyComplete = false;
     SourceLocation InnerTeamsRegionLoc;
     /// Reference to the taskgroup task_reduction reference expression.
     Expr *TaskgroupReductionRef = nullptr;
@@ -172,19 +173,22 @@ class DSAStackTy {
   /// captured by reference.
   bool ForceCaptureByReferenceInTargetExecutable = false;
   CriticalsWithHintsTy Criticals;
+  unsigned IgnoredStackElements = 0;
 
   /// Iterators over the stack iterate in order from innermost to outermost
   /// directive.
   using const_iterator = StackTy::const_reverse_iterator;
   const_iterator begin() const {
-    return Stack.empty() ? const_iterator() : Stack.back().first.rbegin();
+    return Stack.empty() ? const_iterator()
+                         : Stack.back().first.rbegin() + IgnoredStackElements;
   }
   const_iterator end() const {
     return Stack.empty() ? const_iterator() : Stack.back().first.rend();
   }
   using iterator = StackTy::reverse_iterator;
   iterator begin() {
-    return Stack.empty() ? iterator() : Stack.back().first.rbegin();
+    return Stack.empty() ? iterator()
+                         : Stack.back().first.rbegin() + IgnoredStackElements;
   }
   iterator end() {
     return Stack.empty() ? iterator() : Stack.back().first.rend();
@@ -195,16 +199,18 @@ class DSAStackTy {
   bool isStackEmpty() const {
     return Stack.empty() ||
            Stack.back().second != CurrentNonCapturingFunctionScope ||
-           Stack.back().first.empty();
+           Stack.back().first.size() <= IgnoredStackElements;
   }
   size_t getStackSize() const {
-    return isStackEmpty() ? 0 : Stack.back().first.size();
+    return isStackEmpty() ? 0
+                          : Stack.back().first.size() - IgnoredStackElements;
   }
 
   SharingMapTy *getTopOfStackOrNull() {
-    if (isStackEmpty())
+    size_t Size = getStackSize();
+    if (Size == 0)
       return nullptr;
-    return &Stack.back().first.back();
+    return &Stack.back().first[Size - 1];
   }
   const SharingMapTy *getTopOfStackOrNull() const {
     return const_cast<DSAStackTy&>(*this).getTopOfStackOrNull();
@@ -280,6 +286,14 @@ class DSAStackTy {
   }
   void setClauseParsingMode(OpenMPClauseKind K) { ClauseKindMode = K; }
 
+  bool isBodyComplete() const {
+    const SharingMapTy *Top = getTopOfStackOrNull();
+    return Top && Top->BodyComplete;
+  }
+  void setBodyComplete() {
+    getTopOfStack().BodyComplete = true;
+  }
+
   bool isForceVarCapturing() const { return ForceCapturing; }
   void setForceVarCapturing(bool V) { ForceCapturing = V; }
 
@@ -292,6 +306,8 @@ class DSAStackTy {
 
   void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName,
             Scope *CurScope, SourceLocation Loc) {
+    assert(!IgnoredStackElements &&
+           "cannot change stack while ignoring elements");
     if (Stack.empty() ||
         Stack.back().second != CurrentNonCapturingFunctionScope)
       Stack.emplace_back(StackTy(), CurrentNonCapturingFunctionScope);
@@ -300,11 +316,39 @@ class DSAStackTy {
   }
 
   void pop() {
+    assert(!IgnoredStackElements &&
+           "cannot change stack while ignoring elements");
     assert(!Stack.back().first.empty() &&
            "Data-sharing attributes stack is empty!");
     Stack.back().first.pop_back();
   }
 
+  /// RAII object to temporarily leave the scope of a directive when we want to
+  /// logically operate in its parent.
+  class ParentDirectiveScope {
+    DSAStackTy &Self;
+    bool Active;
+  public:
+    ParentDirectiveScope(DSAStackTy &Self, bool Activate)
+        : Self(Self), Active(false) {
+      if (Activate)
+        enable();
+    }
+    ~ParentDirectiveScope() { disable(); }
+    void disable() {
+      if (Active) {
+        --Self.IgnoredStackElements;
+        Active = false;
+      }
+    }
+    void enable() {
+      if (!Active) {
+        ++Self.IgnoredStackElements;
+        Active = true;
+      }
+    }
+  };
+
   /// Marks that we're started loop parsing.
   void loopInit() {
     assert(isOpenMPLoopDirective(getCurrentDirective()) &&
@@ -334,12 +378,16 @@ class DSAStackTy {
   }
   /// Start new OpenMP region stack in new non-capturing function.
   void pushFunction() {
+    assert(!IgnoredStackElements &&
+           "cannot change stack while ignoring elements");
     const FunctionScopeInfo *CurFnScope = SemaRef.getCurFunction();
     assert(!isa<CapturingScopeInfo>(CurFnScope));
     CurrentNonCapturingFunctionScope = CurFnScope;
   }
   /// Pop region stack for non-capturing function.
   void popFunction(const FunctionScopeInfo *OldFSI) {
+    assert(!IgnoredStackElements &&
+           "cannot change stack while ignoring elements");
     if (!Stack.empty() && Stack.back().second == OldFSI) {
       assert(Stack.back().first.empty());
       Stack.pop_back();
@@ -1711,13 +1759,20 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   D = getCanonicalDecl(D);
 
+  // If we want to determine whether the variable should be captured from the
+  // perspective of the current capturing scope, and we've already left all the
+  // capturing scopes of the top directive on the stack, check from the
+  // perspective of its parent directive (if any) instead.
+  DSAStackTy::ParentDirectiveScope InParentDirectiveRAII(
+      *DSAStack, CheckScopeInfo && DSAStack->isBodyComplete());
+
   // If we are attempting to capture a global variable in a directive with
   // 'target' we return true so that this global is also mapped to the device.
   //
   auto *VD = dyn_cast<VarDecl>(D);
-  if (VD && !VD->hasLocalStorage()) {
-    if (isInOpenMPDeclareTargetContext() &&
-        (getCurCapturedRegion() || getCurBlock() || getCurLambda())) {
+  if (VD && !VD->hasLocalStorage() &&
+      (getCurCapturedRegion() || getCurBlock() || getCurLambda())) {
+    if (isInOpenMPDeclareTargetContext()) {
       // Try to mark variable as declare target if it is used in capturing
       // regions.
       if (!OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
@@ -1734,6 +1789,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
   }
   // Capture variables captured by reference in lambdas for target-based
   // directives.
+  // FIXME: Triggering capture from here is completely inappropriate.
   if (VD && !DSAStack->isClauseParsingMode()) {
     if (const auto *RD = VD->getType()
                              .getCanonicalType()
@@ -1742,6 +1798,7 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
       bool SavedForceCaptureByReferenceInTargetExecutable =
           DSAStack->isForceCaptureByReferenceInTargetExecutable();
       DSAStack->setForceCaptureByReferenceInTargetExecutable(/*V=*/true);
+      InParentDirectiveRAII.disable();
       if (RD->isLambda()) {
         llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
         FieldDecl *ThisCapture;
@@ -1771,6 +1828,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
           }
         }
       }
+      if (CheckScopeInfo && DSAStack->isBodyComplete())
+        InParentDirectiveRAII.enable();
       DSAStack->setForceCaptureByReferenceInTargetExecutable(
           SavedForceCaptureByReferenceInTargetExecutable);
     }
@@ -3392,6 +3451,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
     return StmtError();
   }
   StmtResult SR = S;
+  unsigned CompletedRegions = 0;
   for (OpenMPDirectiveKind ThisCaptureRegion : llvm::reverse(CaptureRegions)) {
     // Mark all variables in private list clauses as used in inner region.
     // Required for proper codegen of combined directives.
@@ -3413,6 +3473,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
         }
       }
     }
+    if (++CompletedRegions == CaptureRegions.size())
+      DSAStack->setBodyComplete();
     SR = ActOnCapturedRegionEnd(SR.get());
   }
   return SR;
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 357e257abe096..7a9a801b188aa 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4223,7 +4223,7 @@ Sema::CreateCapturedStmtRecordDecl(CapturedDecl *&CD, SourceLocation Loc,
   return RD;
 }
 
-static void
+static bool
 buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
                              SmallVectorImpl<CapturedStmt::Capture> &Captures,
                              SmallVectorImpl<Expr *> &CaptureInits) {
@@ -4237,7 +4237,7 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     if (Cap.isThisCapture()) {
       Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                                CapturedStmt::VCK_This));
-      CaptureInits.push_back(Cap.getInitExpr());
+      CaptureInits.push_back(Cap.getThisInitExpr());
       continue;
     } else if (Cap.isVLATypeCapture()) {
       Captures.push_back(
@@ -4248,13 +4248,25 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
 
     if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
       S.setOpenMPCaptureKind(Field, Cap.getVariable(), RSI->OpenMPLevel);
-    Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
+
+    VarDecl *Var = Cap.getVariable();
+    SourceLocation Loc = Cap.getLocation();
+
+    // FIXME: For a non-reference capture, we need to build an expression to
+    // perform a copy here!
+    ExprResult Init = S.BuildDeclarationNameExpr(
+        CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
+    if (Init.isInvalid())
+      return true;
+
+    Captures.push_back(CapturedStmt::Capture(Loc,
                                              Cap.isReferenceCapture()
                                                  ? CapturedStmt::VCK_ByRef
                                                  : CapturedStmt::VCK_ByCopy,
-                                             Cap.getVariable()));
-    CaptureInits.push_back(Cap.getInitExpr());
+                                             Var));
+    CaptureInits.push_back(Init.get());
   }
+  return false;
 }
 
 void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope,
@@ -4347,25 +4359,31 @@ void Sema::ActOnCapturedRegionStart(SourceLocation Loc, Scope *CurScope,
 void Sema::ActOnCapturedRegionError() {
   DiscardCleanupsInEvaluationContext();
   PopExpressionEvaluationContext();
+  PopDeclContext();
+  PoppedFunctionScopePtr ScopeRAII = PopFunctionScopeInfo();
+  CapturedRegionScopeInfo *RSI = cast<CapturedRegionScopeInfo>(ScopeRAII.get());
 
-  CapturedRegionScopeInfo *RSI = getCurCapturedRegion();
   RecordDecl *Record = RSI->TheRecordDecl;
   Record->setInvalidDecl();
 
   SmallVector<Decl*, 4> Fields(Record->fields());
   ActOnFields(/*Scope=*/nullptr, Record->getLocation(), Record, Fields,
               SourceLocation(), SourceLocation(), ParsedAttributesView());
-
-  PopDeclContext();
-  PopFunctionScopeInfo();
 }
 
 StmtResult Sema::ActOnCapturedRegionEnd(Stmt *S) {
-  CapturedRegionScopeInfo *RSI = getCurCapturedRegion();
+  // Leave the captured scope before we start creating captures in the
+  // enclosing scope.
+  DiscardCleanupsInEvaluationContext();
+  PopExpressionEvaluationContext();
+  PopDeclContext();
+  PoppedFunctionScopePtr ScopeRAII = PopFunctionScopeInfo();
+  CapturedRegionScopeInfo *RSI = cast<CapturedRegionScopeInfo>(ScopeRAII.get());
 
   SmallVector<CapturedStmt::Capture, 4> Captures;
   SmallVector<Expr *, 4> CaptureInits;
-  buildCapturedStmtCaptureList(*this, RSI, Captures, CaptureInits);
+  if (buildCapturedStmtCaptureList(*this, RSI, Captures, CaptureInits))
+    return StmtError();
 
   CapturedDecl *CD = RSI->TheCapturedDecl;
   RecordDecl *RD = RSI->TheRecordDecl;
@@ -4377,11 +4395,5 @@ StmtResult Sema::ActOnCapturedRegionEnd(Stmt *S) {
   CD->setBody(Res->getCapturedStmt());
   RD->completeDefinition();
 
-  DiscardCleanupsInEvaluationContext();
-  PopExpressionEvaluationContext();
-
-  PopDeclContext();
-  PopFunctionScopeInfo();
-
   return Res;
 }
diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp
index 57fb4ad9d586b..b7c82c2f303ee 100644
--- a/clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -193,10 +193,13 @@ int main() {
   // LAMBDA-LABEL: @main
   // LAMBDA: alloca [[SS_TY]],
   // LAMBDA: alloca [[CAP_TY:%.+]],
+  // FIXME: The outer lambda should not capture 'sivar'; that capture is not
+  // used for anything.
+  // LAMBDA: store {{.*}}@_ZZ4mainE5sivar,
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]]([[CAP_TY]]*
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* %{{.+}})
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* @_ZZ4mainE5sivar)
 #pragma omp parallel
 #pragma omp for lastprivate(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {

From 9d21f510ee4f63841effb4e76979e64ea76a8a99 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 31 May 2019 01:04:00 +0000
Subject: [PATCH 0701/1176] Fix -DBUILD_SHARED_LIBS=ON build after rL362160

Differential Revision: https://reviews.llvm.org/D62709

llvm-svn: 362180
---
 clang/lib/CodeGen/CMakeLists.txt | 1 +
 llvm/lib/LTO/LLVMBuild.txt       | 1 +
 llvm/tools/llc/CMakeLists.txt    | 1 +
 llvm/tools/opt/CMakeLists.txt    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 416bc4dc3122e..7d418d697cfa2 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -17,6 +17,7 @@ set(LLVM_LINK_COMPONENTS
   Object
   Passes
   ProfileData
+  Remarks
   ScalarOpts
   Support
   Target
diff --git a/llvm/lib/LTO/LLVMBuild.txt b/llvm/lib/LTO/LLVMBuild.txt
index ed2b150ddaf99..1afbe1ff2d859 100644
--- a/llvm/lib/LTO/LLVMBuild.txt
+++ b/llvm/lib/LTO/LLVMBuild.txt
@@ -32,6 +32,7 @@ required_libraries =
  ObjCARC
  Object
  Passes
+ Remarks
  Scalar
  Support
  Target
diff --git a/llvm/tools/llc/CMakeLists.txt b/llvm/tools/llc/CMakeLists.txt
index 130f0cd3cb9e8..300efdab10d0c 100644
--- a/llvm/tools/llc/CMakeLists.txt
+++ b/llvm/tools/llc/CMakeLists.txt
@@ -10,6 +10,7 @@ set(LLVM_LINK_COMPONENTS
   IRReader
   MC
   MIRParser
+  Remarks
   ScalarOpts
   SelectionDAG
   Support
diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt
index c9e44449dc2fb..cb4ba5cfbced7 100644
--- a/llvm/tools/opt/CMakeLists.txt
+++ b/llvm/tools/opt/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
   Instrumentation
   MC
   ObjCARCOpts
+  Remarks
   ScalarOpts
   Support
   Target

From a35c50c9a4d1c15b4830c40013e73ec7933f8f6c Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Fri, 31 May 2019 01:16:43 +0000
Subject: [PATCH 0702/1176] [CMake][Fuchsia] Use libc++ ABI v2 on Darwin as
 well

Since we share headers between host and target builds, we need to use
the same version for both.

Differential Revision: https://reviews.llvm.org/D62712

llvm-svn: 362181
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index 4f024a7fb5106..cedc69fa6f6e1 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -49,6 +49,7 @@ if(APPLE)
   set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
   set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+  set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
 endif()
 
 foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unknown-linux-gnu;x86_64-unknown-linux-gnu)

From b5a45bb77e05647f04bbd9780d70aabe9f251155 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 31 May 2019 01:17:04 +0000
Subject: [PATCH 0703/1176] Defer building 'this' captures until we have left
 the capturing region and returned to the context in which 'this' should be
 captured.

This means we now always mark 'this' referenced from the context in
which it's actually referenced, rather than potentially from some
context nested within that.

llvm-svn: 362182
---
 clang/include/clang/Sema/ScopeInfo.h      | 26 ++++++-----------
 clang/include/clang/Sema/Sema.h           |  5 ++++
 clang/lib/Sema/SemaDecl.cpp               |  2 +-
 clang/lib/Sema/SemaExprCXX.cpp            | 35 +----------------------
 clang/lib/Sema/SemaLambda.cpp             | 23 +++++++++++++--
 clang/lib/Sema/SemaStmt.cpp               |  6 ++--
 clang/test/AST/ast-dump-expr-json.cpp     |  6 ++--
 clang/test/AST/ast-dump-expr.cpp          |  4 +--
 clang/test/SemaCXX/lambda-expressions.cpp |  6 ++--
 9 files changed, 46 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 215025c62d29f..177c88d7e8475 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -517,11 +517,6 @@ class Capture {
     VarDecl *CapturedVar;
   };
 
-  /// Expression to initialize a field of the given type. This is only required
-  /// if we are capturing ByVal and the variable's type has a non-trivial copy
-  /// constructor.
-  Expr *InitExpr = nullptr;
-
   /// The source location at which the first capture occurred.
   SourceLocation Loc;
 
@@ -566,8 +561,8 @@ class Capture {
 
   enum IsThisCapture { ThisCapture };
   Capture(IsThisCapture, bool IsNested, SourceLocation Loc,
-          QualType CaptureType, Expr *Cpy, const bool ByCopy, bool Invalid)
-      : InitExpr(Cpy), Loc(Loc), CaptureType(CaptureType),
+          QualType CaptureType, const bool ByCopy, bool Invalid)
+      : Loc(Loc), CaptureType(CaptureType),
         Kind(ByCopy ? Cap_ByCopy : Cap_ByRef), Nested(IsNested),
         CapturesThis(true), ODRUsed(false), NonODRUsed(false),
         Invalid(Invalid) {}
@@ -626,11 +621,6 @@ class Capture {
   /// the type of the non-static data member in the lambda/block structure
   /// that would store this capture.
   QualType getCaptureType() const { return CaptureType; }
-
-  Expr *getThisInitExpr() const {
-    assert(isThisCapture() && "no 'this' init expression for non-this capture");
-    return InitExpr;
-  }
 };
 
 class CapturingScopeInfo : public FunctionScopeInfo {
@@ -681,7 +671,7 @@ class CapturingScopeInfo : public FunctionScopeInfo {
   }
 
   void addThisCapture(bool isNested, SourceLocation Loc, QualType CaptureType,
-                      Expr *Cpy, bool ByCopy);
+                      bool ByCopy);
 
   /// Determine whether the C++ 'this' is captured.
   bool isCXXThisCaptured() const { return CXXThisCaptureIndex != 0; }
@@ -1025,12 +1015,12 @@ void FunctionScopeInfo::recordUseOfWeak(const ExprT *E, bool IsRead) {
   Uses.push_back(WeakUseTy(E, IsRead));
 }
 
-inline void
-CapturingScopeInfo::addThisCapture(bool isNested, SourceLocation Loc,
-                                   QualType CaptureType, Expr *Cpy,
-                                   const bool ByCopy) {
+inline void CapturingScopeInfo::addThisCapture(bool isNested,
+                                               SourceLocation Loc,
+                                               QualType CaptureType,
+                                               bool ByCopy) {
   Captures.push_back(Capture(Capture::ThisCapture, isNested, Loc, CaptureType,
-                             Cpy, ByCopy, /*Invalid*/ false));
+                             ByCopy, /*Invalid*/ false));
   CXXThisCaptureIndex = Captures.size();
 }
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 7ec9f4737b212..3e128df7fba2a 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5291,6 +5291,11 @@ class Sema {
       const unsigned *const FunctionScopeIndexToStopAt = nullptr,
       bool ByCopy = false);
 
+  /// Initialize the given 'this' capture with a suitable 'this' or '*this'
+  /// expression.
+  ExprResult performThisCaptureInitialization(const sema::Capture &Capture,
+                                              bool IsImplicit);
+
   /// Determine whether the given type is the type of *this that is used
   /// outside of the body of a member function for a type that is currently
   /// being defined.
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 759eb531c50fe..6bd7b4e071101 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12943,7 +12943,7 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
 
     } else if (C.capturesThis()) {
       LSI->addThisCapture(/*Nested*/ false, C.getLocation(), I->getType(),
-                          /*Expr*/ nullptr, C.getCaptureKind() == LCK_StarThis);
+                          C.getCaptureKind() == LCK_StarThis);
     } else {
       LSI->addVLATypeCapture(C.getLocation(), I->getCapturedVLAType(),
                              I->getType());
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 6e67968929ad0..ac050fa1ef55c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1134,24 +1134,6 @@ Sema::CXXThisScopeRAII::~CXXThisScopeRAII() {
   }
 }
 
-static Expr *captureThis(Sema &S, ASTContext &Context, RecordDecl *RD,
-                         QualType ThisTy, QualType CaptureType,
-                         SourceLocation Loc, const bool ByCopy) {
-  Expr *This = new (Context) CXXThisExpr(Loc, ThisTy, /*isImplicit*/ true);
-  if (ByCopy) {
-    Expr *StarThis = S.CreateBuiltinUnaryOp(Loc, UO_Deref, This).get();
-    InitializedEntity Entity =
-        InitializedEntity::InitializeLambdaCapture(nullptr, CaptureType, Loc);
-    InitializationKind InitKind =
-        InitializationKind::CreateDirect(Loc, Loc, Loc);
-    InitializationSequence Init(S, Entity, InitKind, StarThis);
-    ExprResult ER = Init.Perform(S, Entity, InitKind, StarThis);
-    if (ER.isInvalid()) return nullptr;
-    return ER.get();
-  }
-  return This;
-}
-
 bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
     bool BuildAndDiagnose, const unsigned *const FunctionScopeIndexToStopAt,
     const bool ByCopy) {
@@ -1241,13 +1223,10 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
           dyn_cast<LambdaScopeInfo>(FunctionScopes[MaxFunctionScopesIndex])) &&
          "Only a lambda can capture the enclosing object (referred to by "
          "*this) by copy");
-  // FIXME: We need to delay this marking in PotentiallyPotentiallyEvaluated
-  // contexts.
   QualType ThisTy = getCurrentThisType();
   for (int idx = MaxFunctionScopesIndex; NumCapturingClosures;
        --idx, --NumCapturingClosures) {
     CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FunctionScopes[idx]);
-    Expr *ThisExpr = nullptr;
 
     // The type of the corresponding data member (not a 'this' pointer if 'by
     // copy').
@@ -1261,20 +1240,8 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit,
       CaptureType.removeLocalCVRQualifiers(Qualifiers::CVRMask);
     }
 
-    if (LambdaScopeInfo *LSI = dyn_cast<LambdaScopeInfo>(CSI)) {
-      // For lambda expressions, build a field and an initializing expression,
-      // and capture the *enclosing object* by copy only if this is the first
-      // iteration.
-      ThisExpr = captureThis(*this, Context, LSI->Lambda, ThisTy, CaptureType,
-                             Loc, ByCopy && idx == MaxFunctionScopesIndex);
-
-    } else if (CapturedRegionScopeInfo *RSI
-        = dyn_cast<CapturedRegionScopeInfo>(FunctionScopes[idx]))
-      ThisExpr = captureThis(*this, Context, RSI->TheRecordDecl, ThisTy,
-                             CaptureType, Loc, false /*ByCopy*/);
-
     bool isNested = NumCapturingClosures > 1;
-    CSI->addThisCapture(isNested, Loc, CaptureType, ThisExpr, ByCopy);
+    CSI->addThisCapture(isNested, Loc, CaptureType, ByCopy);
   }
   return false;
 }
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index a17a3da67fdec..d3f3b60926fa3 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1431,6 +1431,24 @@ static void addBlockPointerConversion(Sema &S,
   Class->addDecl(Conversion);
 }
 
+ExprResult Sema::performThisCaptureInitialization(const Capture &Cap,
+                                                  bool IsImplicit) {
+  QualType ThisTy = getCurrentThisType();
+  SourceLocation Loc = Cap.getLocation();
+  Expr *This = BuildCXXThisExpr(Loc, ThisTy, IsImplicit);
+  if (Cap.isReferenceCapture())
+    return This;
+
+  // Capture (by copy) of '*this'.
+  Expr *StarThis = CreateBuiltinUnaryOp(Loc, UO_Deref, This).get();
+  InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
+      nullptr, Cap.getCaptureType(), Loc);
+  InitializationKind InitKind =
+      InitializationKind::CreateDirect(Loc, Loc, Loc);
+  InitializationSequence Init(*this, Entity, InitKind, StarThis);
+  return Init.Perform(*this, Entity, InitKind, StarThis);
+}
+
 static ExprResult performLambdaVarCaptureInitialization(
     Sema &S, const Capture &Capture, FieldDecl *Field,
     SourceLocation ImplicitCaptureLoc, bool IsImplicitCapture) {
@@ -1680,10 +1698,11 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
                      getLocForEndOfToken(CaptureDefaultLoc), ", this");
         }
 
+        ExprResult Init = performThisCaptureInitialization(From, IsImplicit);
         Captures.push_back(
             LambdaCapture(From.getLocation(), IsImplicit,
                           From.isCopyCapture() ? LCK_StarThis : LCK_This));
-        CaptureInits.push_back(From.getThisInitExpr());
+        CaptureInits.push_back(Init.get());
         continue;
       }
       if (From.isVLATypeCapture()) {
@@ -1703,8 +1722,6 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
               ? Var->getInit()
               : performLambdaVarCaptureInitialization(
                     *this, From, Field, CaptureDefaultLoc, IsImplicit);
-      if (Init.isInvalid())
-        return ExprError();
       CaptureInits.push_back(Init.get());
     }
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 7a9a801b188aa..3a7acd20274ec 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4235,9 +4235,11 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     FieldDecl *Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
 
     if (Cap.isThisCapture()) {
+      ExprResult Init =
+          S.performThisCaptureInitialization(Cap, /*Implicit*/ true);
       Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                                CapturedStmt::VCK_This));
-      CaptureInits.push_back(Cap.getThisInitExpr());
+      CaptureInits.push_back(Init.get());
       continue;
     } else if (Cap.isVLATypeCapture()) {
       Captures.push_back(
@@ -4256,8 +4258,6 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     // perform a copy here!
     ExprResult Init = S.BuildDeclarationNameExpr(
         CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
-    if (Init.isInvalid())
-      return true;
 
     Captures.push_back(CapturedStmt::Capture(Loc,
                                              Cap.isReferenceCapture()
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index 90a3bb0734221..3a8e745e50f3e 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -3941,8 +3941,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                  "type": {
 // CHECK-NEXT:                   "qualType": "V *"
 // CHECK-NEXT:                  },
-// CHECK-NEXT:                  "valueCategory": "rvalue",
-// CHECK-NEXT:                  "implicit": true
+// CHECK-NEXT:                  "valueCategory": "rvalue"
 // CHECK-NEXT:                 },
 // CHECK-NEXT:                 {
 // CHECK-NEXT:                  "id": "0x{{.*}}",
@@ -4173,8 +4172,7 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                      "type": {
 // CHECK-NEXT:                       "qualType": "V *"
 // CHECK-NEXT:                      },
-// CHECK-NEXT:                      "valueCategory": "rvalue",
-// CHECK-NEXT:                      "implicit": true
+// CHECK-NEXT:                      "valueCategory": "rvalue"
 // CHECK-NEXT:                     }
 // CHECK-NEXT:                    ]
 // CHECK-NEXT:                   }
diff --git a/clang/test/AST/ast-dump-expr.cpp b/clang/test/AST/ast-dump-expr.cpp
index 693dd573079f3..47f69a882ecba 100644
--- a/clang/test/AST/ast-dump-expr.cpp
+++ b/clang/test/AST/ast-dump-expr.cpp
@@ -255,7 +255,7 @@ void PrimaryExpressions(Ts... a) {
       // CHECK-NEXT: CXXMethodDecl
       // CHECK-NEXT: CompoundStmt
       // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V *'
-      // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' implicit this
+      // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' this
 
       [*this]{};
       // CHECK: LambdaExpr 0x{{[^ ]*}} <line:[[@LINE-1]]:7, col:15>
@@ -272,7 +272,7 @@ void PrimaryExpressions(Ts... a) {
       // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V'
       // CHECK-NEXT: ParenListExpr 0x{{[^ ]*}} <col:8> 'NULL TYPE'
       // CHECK-NEXT: UnaryOperator 0x{{[^ ]*}} <col:8> '<dependent type>' prefix '*' cannot overflow
-      // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' implicit this
+      // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' this
     }
   };
 
diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp
index 8b0b83078b0a1..311def9147f36 100644
--- a/clang/test/SemaCXX/lambda-expressions.cpp
+++ b/clang/test/SemaCXX/lambda-expressions.cpp
@@ -105,7 +105,7 @@ namespace SpecialMembers {
     a = static_cast<decltype(a)&&>(a); // expected-error {{copy assignment operator is implicitly deleted}}
   }
   struct P {
-    P(const P&) = delete; // expected-note {{deleted here}}
+    P(const P&) = delete; // expected-note 2{{deleted here}}
   };
   struct Q {
     ~Q() = delete; // expected-note {{deleted here}}
@@ -117,7 +117,9 @@ namespace SpecialMembers {
     R &operator=(R&&) = delete;
   };
   void g(P &p, Q &q, R &r) {
-    auto pp = [p]{}; // expected-error {{deleted constructor}}
+    // FIXME: The note attached to the second error here is just amazingly bad.
+    auto pp = [p]{}; // expected-error {{deleted constructor}} expected-error {{deleted copy constructor of '(lambda}}
+    // expected-note@-1 {{copy constructor of '' is implicitly deleted because field '' has a deleted copy constructor}}
     auto qq = [q]{}; // expected-error {{deleted function}} expected-note {{because}}
 
     auto a = [r]{}; // expected-note 2{{here}}

From 059b823e709a90d3b5d2d24663830e59b3bdded9 Mon Sep 17 00:00:00 2001
From: John McCall <rjmccall@apple.com>
Date: Fri, 31 May 2019 01:21:36 +0000
Subject: [PATCH 0704/1176] Fix the predefined exponent limit macros for the
 16-bit IEEE format.

The magnitude range of normalized _Float16 is 2^-14 (~6e-5) to
(2-2^-10)*2^15 (65504).  You might think, then, that the code is
correct to defne FLT16_MIN_EXP and FLT16_MAX_EXP to be -14 and 15
respectively.  However, for some reason the C specification actually
specifies a bias for these macros:

C11 5.2.4.2.2:

  - minimum negative integer such that FLT_RADIX raised to one less than
    that power is a normalized floating-point number, e_min:
      FLT_MIN_EXP
      DBL_MIN_EXP
      LDBL_MIN_EXP

  - maximum integer such that FLT_RADIX raised to one less than that
    power is a representable finite floating-point number, e_max:
      FLT_MAX_EXP
      DBL_MAX_EXP
      LDBL_MAX_EXP

FLT16_MIN_EXP and FLT16_MAX_EXP should clearly be biased the same way,
and other compilers do in fact do so, as do our OpenCL headers for `half`.

Additionally, FLT16_MIN_10_EXP is just wrong.

llvm-svn: 362183
---
 clang/lib/Frontend/InitPreprocessor.cpp | 6 +++---
 clang/test/Headers/float16.c            | 6 +++---
 clang/test/Preprocessor/init.c          | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index cc479dd010f9e..1741ba5e5203e 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -122,10 +122,10 @@ static void DefineFloatMacros(MacroBuilder &Builder, StringRef Prefix,
                    "4.94065645841246544176568792868221e-324",
                    "1.92592994438723585305597794258492732e-34");
   int MantissaDigits = PickFP(Sem, 11, 24, 53, 64, 106, 113);
-  int Min10Exp = PickFP(Sem, -13, -37, -307, -4931, -291, -4931);
+  int Min10Exp = PickFP(Sem, -4, -37, -307, -4931, -291, -4931);
   int Max10Exp = PickFP(Sem, 4, 38, 308, 4932, 308, 4932);
-  int MinExp = PickFP(Sem, -14, -125, -1021, -16381, -968, -16381);
-  int MaxExp = PickFP(Sem, 15, 128, 1024, 16384, 1024, 16384);
+  int MinExp = PickFP(Sem, -13, -125, -1021, -16381, -968, -16381);
+  int MaxExp = PickFP(Sem, 16, 128, 1024, 16384, 1024, 16384);
   Min = PickFP(Sem, "6.103515625e-5", "1.17549435e-38", "2.2250738585072014e-308",
                "3.36210314311209350626e-4932",
                "2.00416836000897277799610805135016e-292",
diff --git a/clang/test/Headers/float16.c b/clang/test/Headers/float16.c
index 90ba053b2871f..8d39bf957f4fe 100644
--- a/clang/test/Headers/float16.c
+++ b/clang/test/Headers/float16.c
@@ -13,7 +13,7 @@
 
 #ifndef FLT16_MIN_10_EXP
     #error "Macro FLT16_MIN_10_EXP is missing."
-#elif   FLT16_MIN_10_EXP > -13
+#elif   FLT16_MIN_10_EXP > -4
     #error "Macro FLT16_MIN_10_EXP is invalid."
 #endif
 
@@ -21,7 +21,7 @@ _Static_assert(FLT16_MIN_10_EXP == __FLT16_MIN_10_EXP__, "");
 
 #ifndef FLT16_MIN_EXP
     #error "Macro FLT16_MIN_EXP is missing."
-#elif   FLT16_MIN_EXP > -14
+#elif   FLT16_MIN_EXP > -13
     #error "Macro FLT16_MIN_EXP is invalid."
 #endif
 
@@ -37,7 +37,7 @@ _Static_assert(FLT16_MAX_10_EXP == __FLT16_MAX_10_EXP__, "");
 
 #ifndef FLT16_MAX_EXP
     #error "Macro FLT16_MAX_EXP is missing."
-#elif   FLT16_MAX_EXP < 15
+#elif   FLT16_MAX_EXP < 16
     #error "Macro FLT16_MAX_EXP is invalid."
 #endif
 
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 8eb101c6a416f..8df3b4bd2ccf4 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -310,10 +310,10 @@
 // AARCH64:#define __FLT16_HAS_QUIET_NAN__ 1
 // AARCH64:#define __FLT16_MANT_DIG__ 11
 // AARCH64:#define __FLT16_MAX_10_EXP__ 4
-// AARCH64:#define __FLT16_MAX_EXP__ 15
+// AARCH64:#define __FLT16_MAX_EXP__ 16
 // AARCH64:#define __FLT16_MAX__ 6.5504e+4F16
-// AARCH64:#define __FLT16_MIN_10_EXP__ (-13)
-// AARCH64:#define __FLT16_MIN_EXP__ (-14)
+// AARCH64:#define __FLT16_MIN_10_EXP__ (-4)
+// AARCH64:#define __FLT16_MIN_EXP__ (-13)
 // AARCH64:#define __FLT16_MIN__ 6.103515625e-5F16
 // AARCH64:#define __FLT_DENORM_MIN__ 1.40129846e-45F
 // AARCH64:#define __FLT_DIG__ 6

From dbd3ce92e691848a34fb7623b487d699de7674d7 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Fri, 31 May 2019 01:25:16 +0000
Subject: [PATCH 0705/1176] PR39728: When completing a class, complete the
 destructor first.

We need to know whether the destructor is trivial in order to tell
whether other parts of the class are valid (in particular, this affects
whether the type is a literal type, which affects whether defaulted
special members can be declared constexpr or are implicitly constexpr).

llvm-svn: 362184
---
 clang/lib/Sema/SemaDeclCXX.cpp                | 92 +++++++++++--------
 .../SemaCXX/constant-expression-cxx1y.cpp     | 16 ++++
 2 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 35863a326628b..0956aff21e174 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -6125,9 +6125,60 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
   if (HasTrivialABI)
     Record->setHasTrivialSpecialMemberForCall();
 
+  auto CompleteMemberFunction = [&](CXXMethodDecl *M) {
+    // Check whether the explicitly-defaulted special members are valid.
+    if (!M->isInvalidDecl() && M->isExplicitlyDefaulted())
+      CheckExplicitlyDefaultedSpecialMember(M);
+
+    // For an explicitly defaulted or deleted special member, we defer
+    // determining triviality until the class is complete. That time is now!
+    CXXSpecialMember CSM = getSpecialMember(M);
+    if (!M->isImplicit() && !M->isUserProvided()) {
+      if (CSM != CXXInvalid) {
+        M->setTrivial(SpecialMemberIsTrivial(M, CSM));
+        // Inform the class that we've finished declaring this member.
+        Record->finishedDefaultedOrDeletedMember(M);
+        M->setTrivialForCall(
+            HasTrivialABI ||
+            SpecialMemberIsTrivial(M, CSM, TAH_ConsiderTrivialABI));
+        Record->setTrivialForCallFlags(M);
+      }
+    }
+
+    // Set triviality for the purpose of calls if this is a user-provided
+    // copy/move constructor or destructor.
+    if ((CSM == CXXCopyConstructor || CSM == CXXMoveConstructor ||
+         CSM == CXXDestructor) && M->isUserProvided()) {
+      M->setTrivialForCall(HasTrivialABI);
+      Record->setTrivialForCallFlags(M);
+    }
+
+    if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
+        M->hasAttr<DLLExportAttr>()) {
+      if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
+          M->isTrivial() &&
+          (CSM == CXXDefaultConstructor || CSM == CXXCopyConstructor ||
+           CSM == CXXDestructor))
+        M->dropAttr<DLLExportAttr>();
+
+      if (M->hasAttr<DLLExportAttr>()) {
+        DefineImplicitSpecialMember(*this, M, M->getLocation());
+        ActOnFinishInlineFunctionDef(M);
+      }
+    }
+  };
+
   bool HasMethodWithOverrideControl = false,
        HasOverridingMethodWithoutOverrideControl = false;
   if (!Record->isDependentType()) {
+    // Check the destructor before any other member function. We need to
+    // determine whether it's trivial in order to determine whether the claas
+    // type is a literal type, which is a prerequisite for determining whether
+    // other special member functions are valid and whether they're implicitly
+    // 'constexpr'.
+    if (CXXDestructorDecl *Dtor = Record->getDestructor())
+      CompleteMemberFunction(Dtor);
+
     for (auto *M : Record->methods()) {
       // See if a method overloads virtual methods in a base
       // class without overriding any.
@@ -6137,46 +6188,9 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
         HasMethodWithOverrideControl = true;
       else if (M->size_overridden_methods() > 0)
         HasOverridingMethodWithoutOverrideControl = true;
-      // Check whether the explicitly-defaulted special members are valid.
-      if (!M->isInvalidDecl() && M->isExplicitlyDefaulted())
-        CheckExplicitlyDefaultedSpecialMember(M);
-
-      // For an explicitly defaulted or deleted special member, we defer
-      // determining triviality until the class is complete. That time is now!
-      CXXSpecialMember CSM = getSpecialMember(M);
-      if (!M->isImplicit() && !M->isUserProvided()) {
-        if (CSM != CXXInvalid) {
-          M->setTrivial(SpecialMemberIsTrivial(M, CSM));
-          // Inform the class that we've finished declaring this member.
-          Record->finishedDefaultedOrDeletedMember(M);
-          M->setTrivialForCall(
-              HasTrivialABI ||
-              SpecialMemberIsTrivial(M, CSM, TAH_ConsiderTrivialABI));
-          Record->setTrivialForCallFlags(M);
-        }
-      }
-
-      // Set triviality for the purpose of calls if this is a user-provided
-      // copy/move constructor or destructor.
-      if ((CSM == CXXCopyConstructor || CSM == CXXMoveConstructor ||
-           CSM == CXXDestructor) && M->isUserProvided()) {
-        M->setTrivialForCall(HasTrivialABI);
-        Record->setTrivialForCallFlags(M);
-      }
 
-      if (!M->isInvalidDecl() && M->isExplicitlyDefaulted() &&
-          M->hasAttr<DLLExportAttr>()) {
-        if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015) &&
-            M->isTrivial() &&
-            (CSM == CXXDefaultConstructor || CSM == CXXCopyConstructor ||
-             CSM == CXXDestructor))
-          M->dropAttr<DLLExportAttr>();
-
-        if (M->hasAttr<DLLExportAttr>()) {
-          DefineImplicitSpecialMember(*this, M, M->getLocation());
-          ActOnFinishInlineFunctionDef(M);
-        }
-      }
+      if (!isa<CXXDestructorDecl>(M))
+        CompleteMemberFunction(M);
     }
   }
 
diff --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
index fe69d10502365..6a344c8d4a5aa 100644
--- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1205,3 +1205,19 @@ namespace ObjectsUnderConstruction {
   // The lifetime of 'n' begins at the initialization, not before.
   constexpr int n = ++const_cast<int&>(n); // expected-error {{constant expression}} expected-note {{modification}}
 }
+
+namespace PR39728 {
+  struct Comment0 {
+    Comment0 &operator=(const Comment0 &) = default;
+    ~Comment0() = default;
+  };
+  constexpr void f() {
+    Comment0 a;
+    a = a;
+  }
+  static_assert((f(), true), "");
+  struct Comment1 {
+    constexpr Comment1 &operator=(const Comment1 &) = default; // OK
+    ~Comment1() = default;
+  };
+}

From 2ab7af29c6cad5abac3b4da48df33d21b87fc216 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Fri, 31 May 2019 01:34:51 +0000
Subject: [PATCH 0706/1176] [CMake] Provide an option to use relative paths in
 debug info

CMake always uses absolute file paths in the generated compiler
invocation which results in absolute file paths being embedded in debug
info. This is undesirable when building a toolchain e.g. on bots as the
debug info may embed the bot source checkout path which is meaningless
anywhere else.

This change introduces the LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO which uses
-fdebug-prefix-map (where supported) options to rewrite paths embedded
into debug info with relative ones. Additionally, LLVM_SOURCE_PREFIX can
be used to override the path to source directory with a different one.

Differential Revision: https://reviews.llvm.org/D62622

llvm-svn: 362185
---
 clang/cmake/caches/Fuchsia-stage2.cmake          |  1 +
 llvm/cmake/modules/HandleLLVMOptions.cmake       | 16 ++++++++++++++++
 .../cmake/modules/LLVMExternalProjectUtils.cmake |  2 ++
 llvm/runtimes/CMakeLists.txt                     |  2 ++
 4 files changed, 21 insertions(+)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index cedc69fa6f6e1..100a9b164e5ea 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -15,6 +15,7 @@ set(LLVM_ENABLE_ZLIB ON CACHE BOOL "")
 set(LLVM_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "")
 set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
 set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
+set(LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO ON CACHE BOOL "")
 
 set(CLANG_DEFAULT_CXX_STDLIB libc++ CACHE STRING "")
 if(NOT APPLE)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index cb9a01e1d39f7..1aa0804a3c0d2 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -977,3 +977,19 @@ if(macos_signposts_available)
     endif()
   endif()
 endif()
+
+option(LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO "Use relative paths in debug info" OFF)
+set(LLVM_SOURCE_PREFIX "" CACHE STRING "Use prefix for sources in debug info")
+
+if(LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO)
+  check_c_compiler_flag("-fdebug-prefix-map=foo=bar" SUPPORTS_FDEBUG_PREFIX_MAP)
+  if(LLVM_ENABLE_PROJECTS_USED)
+    get_filename_component(source_root "${LLVM_MAIN_SRC_DIR}/.." ABSOLUTE)
+  else()
+    set(source_root "${LLVM_MAIN_SRC_DIR}")
+  endif()
+  file(RELATIVE_PATH relative_root "${source_root}" "${CMAKE_BINARY_DIR}")
+  append_if(SUPPORTS_FDEBUG_PREFIX_MAP "-fdebug-prefix-map=${CMAKE_BINARY_DIR}=${relative_root}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+  append_if(SUPPORTS_FDEBUG_PREFIX_MAP "-fdebug-prefix-map=${source_root}/=${LLVM_SOURCE_PREFIX}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+  add_flag_if_supported("-no-canonical-prefixes" NO_CANONICAL_PREFIXES)
+endif()
diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index 8190896737f10..9a6adab165673 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -231,6 +231,8 @@ function(llvm_ExternalProject_Add name source_dir)
                -DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR}
                -DLLVM_HOST_TRIPLE=${LLVM_HOST_TRIPLE}
                -DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT}
+               -DLLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO=${LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO}
+               -DLLVM_SOURCE_PREFIX=${LLVM_SOURCE_PREFIX}
                -DPACKAGE_VERSION=${PACKAGE_VERSION}
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index e91003b5b19f6..6ac92cc6703cc 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -375,6 +375,7 @@ else() # if this is included from LLVM's CMake
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
                                         -DLLVM_DEFAULT_TARGET_TRIPLE=${TARGET_TRIPLE}
+                                        -DLLVM_ENABLE_PROJECTS_USED=${LLVM_ENABLE_PROJECTS_USED}
                                         -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON
                                         -DCMAKE_C_COMPILER_TARGET=${TARGET_TRIPLE}
                                         -DCMAKE_CXX_COMPILER_TARGET=${TARGET_TRIPLE}
@@ -464,6 +465,7 @@ else() # if this is included from LLVM's CMake
                              CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off
                                         -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
                                         -DLLVM_DEFAULT_TARGET_TRIPLE=${target}
+                                        -DLLVM_ENABLE_PROJECTS_USED=${LLVM_ENABLE_PROJECTS_USED}
                                         -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON
                                         -DCMAKE_C_COMPILER_TARGET=${target}
                                         -DCMAKE_CXX_COMPILER_TARGET=${target}

From 48387ec187266e2c4df9bc7016bb6e7267ea5cee Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Fri, 31 May 2019 01:50:07 +0000
Subject: [PATCH 0707/1176] Revert "[X86] Fix i386 struct and union parameter
 alignment"

This reverts commit d61cb749f4ac2c90244906d756e80a5c4a7ffa89 (SVN:
361934).

According to James suggestion, revert this change. Please ref:
https://reviews.llvm.org/D60748

llvm-svn: 362186
---
 clang/lib/CodeGen/TargetInfo.cpp            | 13 ++---------
 clang/test/CodeGen/x86_32-align-linux.c     | 25 ---------------------
 clang/test/CodeGen/x86_32-arguments-linux.c | 24 ++++++++++----------
 3 files changed, 14 insertions(+), 48 deletions(-)
 delete mode 100644 clang/test/CodeGen/x86_32-align-linux.c

diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 4b96aa13d00e6..24b7b9f97f9a7 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1010,7 +1010,6 @@ class X86_32ABIInfo : public SwiftABIInfo {
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
-  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -1077,7 +1076,6 @@ class X86_32ABIInfo : public SwiftABIInfo {
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
-      IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
@@ -1494,15 +1492,8 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
   if (Align <= MinABIStackAlignInBytes)
     return 0; // Use default alignment.
 
-  if (IsLinuxABI) {
-    // i386 System V ABI 2.1: Structures and unions assume the alignment of their
-    // most strictly aligned component.
-    //
-    // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
-    // want to spend any effort dealing with the ramifications of ABI breaks.
-    return Align;
-  } else if (!IsDarwinVectorABI) {
-    // On non-Darwin and non-Linux, the stack type alignment is always 4.
+  // On non-Darwin, the stack type alignment is always 4.
+  if (!IsDarwinVectorABI) {
     // Set explicit alignment, since we may need to realign the top.
     return MinABIStackAlignInBytes;
   }
diff --git a/clang/test/CodeGen/x86_32-align-linux.c b/clang/test/CodeGen/x86_32-align-linux.c
deleted file mode 100644
index 5fce3f5f295c7..0000000000000
--- a/clang/test/CodeGen/x86_32-align-linux.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
-// RUN: FileCheck < %t %s
-
-#include <immintrin.h>
-
-typedef union {
-        int d[4];
-        __m128 m;
-} M128;
-
-extern void foo(int, ...);
-
-M128 a;
-
-// CHECK-LABEL: define void @test
-// CHECK: entry:
-// CHECK: call void (i32, ...) @foo(i32 1, %union.M128* byval align 16
-// CHECK: call void (i32, ...) @foo(i32 1, <4 x float>
-
-void test(void)
-{
-  foo(1, a);
-  foo(1, a.m);
-}
-
diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/x86_32-arguments-linux.c
index 3718980ba16b9..02eac51216af7 100644
--- a/clang/test/CodeGen/x86_32-arguments-linux.c
+++ b/clang/test/CodeGen/x86_32-arguments-linux.c
@@ -3,21 +3,21 @@
 
 // CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 8 %a3,
-// CHECK: <1 x double> %a4, %struct.s56_2* byval align 8 %a5,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 32 %a11,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval align 32 %a13)
+// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
+// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4,
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 8 %{{[^ ]*}},
-// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 8 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 32 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 32 %{{[^ ]*}})
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}},
+// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}},
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 4 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 4 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 4 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 4 %{{[^ ]*}})
 // CHECK: }
 //
 // <rdar://problem/7964854> [i386] clang misaligns long double in structures

From d2f53af605ab0f43beb73a62cee0b2e9fb24f11e Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Fri, 31 May 2019 02:23:33 +0000
Subject: [PATCH 0708/1176] Redirect test output to /dev/null

llvm-svn: 362187
---
 clang/test/Driver/armv8.1m.main.s | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
index ca6becd933f60..546ddca4ef14f 100644
--- a/clang/test/Driver/armv8.1m.main.s
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -1,13 +1,13 @@
 # REQUIRES: arm-registered-target
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8-m.main %s 2>%t
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8-m.main -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V8M < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main %s 2>%t
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+dsp %s 2>%t
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+dsp -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_DSP < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve %s 2>%t
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
-# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp %s 2>%t
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
 
 .syntax unified

From 2e67d0c842c5c937f1f1053f211e1c5755f9edad Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Fri, 31 May 2019 02:50:41 +0000
Subject: [PATCH 0709/1176] [X86] Add VP2INTERSECT instructions

Support Intel AVX512 VP2INTERSECT instructions in llvm

Patch by Xiang Zhang (xiangzhangllvm)

Differential Revision: https://reviews.llvm.org/D62366

llvm-svn: 362188
---
 llvm/include/llvm/IR/IntrinsicsX86.td         |  28 +
 .../Support/X86DisassemblerDecoderCommon.h    |   1 +
 llvm/lib/Target/X86/AsmParser/X86Operand.h    |  49 ++
 .../X86/Disassembler/X86Disassembler.cpp      |   1 +
 .../Disassembler/X86DisassemblerDecoder.cpp   |   4 +
 .../X86/Disassembler/X86DisassemblerDecoder.h |   7 +
 .../X86/MCTargetDesc/X86InstPrinterCommon.cpp |  25 +
 .../X86/MCTargetDesc/X86InstPrinterCommon.h   |   1 +
 llvm/lib/Target/X86/X86.td                    |   4 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  23 +
 llvm/lib/Target/X86/X86ISelLowering.h         |   3 +
 llvm/lib/Target/X86/X86InstrAVX512.td         |  57 ++
 llvm/lib/Target/X86/X86InstrFragmentsSIMD.td  |   4 +
 llvm/lib/Target/X86/X86InstrInfo.cpp          |   8 +
 llvm/lib/Target/X86/X86InstrInfo.td           |  28 +
 llvm/lib/Target/X86/X86MCInstLower.cpp        |  71 +++
 llvm/lib/Target/X86/X86RegisterInfo.td        |  12 +
 llvm/lib/Target/X86/X86Subtarget.h            |   4 +
 .../X86/avx512vlvp2intersect-intrinsics.ll    | 593 ++++++++++++++++++
 .../X86/avx512vp2intersect-intrinsics.ll      | 240 +++++++
 .../X86/inline-asm-avx512f-x-constraint.ll    |   3 +-
 .../X86/vp2intersect_multiple_pairs.ll        | 150 +++++
 .../X86/avx512-vp2intersect-32-att.txt        |  16 +
 .../X86/avx512-vp2intersect-64-att.txt        |  16 +
 .../X86/avx512_vp2intersect-32-intel.txt      |  43 ++
 .../X86/avx512_vp2intersect-64-intel.txt      |  43 ++
 .../X86/avx512vp2intersectvl-att.txt          |  86 +++
 .../X86/avx512vp2intersectvl-intel.txt        |  85 +++
 .../X86/x86-64-avx512vp2intersectvl-att.txt   |  85 +++
 .../X86/x86-64-avx512vp2intersectvl-intel.txt |  85 +++
 llvm/test/MC/X86/avx512vp2intersectvl-att.s   | 113 ++++
 llvm/test/MC/X86/avx512vp2intersectvl-intel.s | 113 ++++
 .../MC/X86/x86-32-avx512_vp2intersect-intel.s |  57 ++
 .../MC/X86/x86-32-avx512vp2intersect-att.s    | 225 +++++++
 .../MC/X86/x86-64-avx512_vp2intersect-intel.s |  57 ++
 .../MC/X86/x86-64-avx512vp2intersect-att.s    | 231 +++++++
 .../MC/X86/x86-64-avx512vp2intersectvl-att.s  | 113 ++++
 .../X86/x86-64-avx512vp2intersectvl-intel.s   | 113 ++++
 llvm/utils/TableGen/X86RecognizableInstr.cpp  |  20 +
 39 files changed, 2816 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
 create mode 100644 llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
 create mode 100644 llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512-vp2intersect-32-att.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512-vp2intersect-64-att.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512_vp2intersect-32-intel.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512_vp2intersect-64-intel.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-att.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-intel.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-att.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-intel.txt
 create mode 100644 llvm/test/MC/X86/avx512vp2intersectvl-att.s
 create mode 100644 llvm/test/MC/X86/avx512vp2intersectvl-intel.s
 create mode 100644 llvm/test/MC/X86/x86-32-avx512_vp2intersect-intel.s
 create mode 100644 llvm/test/MC/X86/x86-32-avx512vp2intersect-att.s
 create mode 100644 llvm/test/MC/X86/x86-64-avx512_vp2intersect-intel.s
 create mode 100644 llvm/test/MC/X86/x86-64-avx512vp2intersect-att.s
 create mode 100644 llvm/test/MC/X86/x86-64-avx512vp2intersectvl-att.s
 create mode 100644 llvm/test/MC/X86/x86-64-avx512vp2intersectvl-intel.s

diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index ebf8f2cdd9054..2003aa3fd1db4 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4697,6 +4697,34 @@ let TargetPrefix = "x86" in {
                     [IntrNoMem, ImmArg<3>]>;
 }
 
+// vp2intersect
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_vp2intersect_q_512 :
+          Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_q_256 :
+          Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_q_128 :
+          Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_512 :
+          Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_256 :
+          Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_vp2intersect_d_128 :
+          Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty],
+                    [IntrNoMem]>;
+}
+
 // Misc.
 let TargetPrefix = "x86" in {
   // NOTE: These comparison intrinsics are not used by clang as long as the
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 0ee0661a9e724..c12d5382dca27 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -429,6 +429,7 @@ enum OperandEncoding {
   ENUM_ENTRY(TYPE_YMM,        "32-byte")                                       \
   ENUM_ENTRY(TYPE_ZMM,        "64-byte")                                       \
   ENUM_ENTRY(TYPE_VK,         "mask register")                                 \
+  ENUM_ENTRY(TYPE_VK_PAIR,    "mask register pair")                            \
   ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
   ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
   ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 24715f7070d2e..a771ba366318e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -451,6 +451,31 @@ struct X86Operand final : public MCParsedAsmOperand {
       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
   }
 
+  bool isVK1Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
+  }
+
+  bool isVK2Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg());
+  }
+
+  bool isVK4Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg());
+  }
+
+  bool isVK8Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg());
+  }
+
+  bool isVK16Pair() const {
+    return Kind == Register &&
+      X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg());
+  }
+
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.
     if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -482,6 +507,30 @@ struct X86Operand final : public MCParsedAsmOperand {
     addExpr(Inst, getImm());
   }
 
+  void addMaskPairOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    unsigned Reg = getReg();
+    switch (Reg) {
+    case X86::K0:
+    case X86::K1:
+      Reg = X86::K0_K1;
+      break;
+    case X86::K2:
+    case X86::K3:
+      Reg = X86::K2_K3;
+      break;
+    case X86::K4:
+    case X86::K5:
+      Reg = X86::K4_K5;
+      break;
+    case X86::K6:
+    case X86::K7:
+      Reg = X86::K6_K7;
+      break;
+    }
+    Inst.addOperand(MCOperand::createReg(Reg));
+  }
+
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 5) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 9fa4aabab77b0..9a635bbe5f85c 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -694,6 +694,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM:
   case TYPE_YMM:
   case TYPE_ZMM:
+  case TYPE_VK_PAIR:
   case TYPE_VK:
   case TYPE_DEBUGREG:
   case TYPE_CONTROLREG:
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index b5789b5d83727..76aadc794023c 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1468,6 +1468,10 @@ static int readModRM(struct InternalInstruction* insn) {
       if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_K0 + index;                         \
+    case TYPE_VK_PAIR:                                    \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_K0_K1 + (index / 2);                \
     case TYPE_MM64:                                       \
       return prefix##_MM0 + (index & 0x7);                \
     case TYPE_SEGMENTREG:                                 \
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 8e0749d10cffe..7c0a42c019e35 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -324,6 +324,12 @@ namespace X86Disassembler {
   ENTRY(K6)        \
   ENTRY(K7)
 
+#define REGS_MASK_PAIRS \
+  ENTRY(K0_K1)     \
+  ENTRY(K2_K3)     \
+  ENTRY(K4_K5)     \
+  ENTRY(K6_K7)
+
 #define REGS_SEGMENT \
   ENTRY(ES)          \
   ENTRY(CS)          \
@@ -393,6 +399,7 @@ namespace X86Disassembler {
   REGS_YMM            \
   REGS_ZMM            \
   REGS_MASKS          \
+  REGS_MASK_PAIRS     \
   REGS_SEGMENT        \
   REGS_DEBUG          \
   REGS_CONTROL        \
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 878ce590550b3..a215550769765 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -335,3 +335,28 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
   else if (Flags & X86::IP_HAS_REPEAT)
     O << "\trep\t";
 }
+
+void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &OS) {
+  // In assembly listings, a pair is represented by one of its members, any
+  // of the two.  Here, we pick k0, k2, k4, k6, but we could as well
+  // print K2_K3 as "k3".  It would probably make a lot more sense, if
+  // the assembly would look something like:
+  // "vp2intersect %zmm5, %zmm7, {%k2, %k3}"
+  // but this can work too.
+  switch (MI->getOperand(OpNo).getReg()) {
+  case X86::K0_K1:
+    printRegName(OS, X86::K0);
+    return;
+  case X86::K2_K3:
+    printRegName(OS, X86::K2);
+    return;
+  case X86::K4_K5:
+    printRegName(OS, X86::K4);
+    return;
+  case X86::K6_K7:
+    printRegName(OS, X86::K6);
+    return;
+  }
+  llvm_unreachable("Unknown mask pair register name");
+}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index 03c761dbcb74c..8e28f24b619a9 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -33,6 +33,7 @@ class X86InstPrinterCommon : public MCInstPrinter {
 protected:
   void printInstFlags(const MCInst *MI, raw_ostream &O);
   void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 4148a4458251b..52d90d711c0db 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -173,6 +173,10 @@ def FeatureBF16    : SubtargetFeature<"avx512bf16", "HasBF16", "true",
 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
                        "Enable AVX-512 Bit Algorithms",
                         [FeatureBWI]>;
+def FeatureVP2INTERSECT  : SubtargetFeature<"avx512vp2intersect",
+                                            "HasVP2INTERSECT", "true",
+                                            "Enable AVX-512 vp2intersect",
+                                            [FeatureAVX512]>;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f97907fb7e5b8..253f4487976c5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22944,6 +22944,28 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     }
     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   }
+
+  case Intrinsic::x86_avx512_vp2intersect_q_512:
+  case Intrinsic::x86_avx512_vp2intersect_q_256:
+  case Intrinsic::x86_avx512_vp2intersect_q_128:
+  case Intrinsic::x86_avx512_vp2intersect_d_512:
+  case Intrinsic::x86_avx512_vp2intersect_d_256:
+  case Intrinsic::x86_avx512_vp2intersect_d_128: {
+    MVT MaskVT = Op.getSimpleValueType();
+
+    SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
+    SDLoc DL(Op);
+
+    SDValue Operation =
+        DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
+                    Op->getOperand(1), Op->getOperand(2));
+
+    SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
+                                                 MaskVT, Operation);
+    SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
+                                                 MaskVT, Operation);
+    return DAG.getMergeValues({Result0, Result1}, DL);
+  }
   }
 }
 
@@ -28284,6 +28306,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TPAUSE:             return "X86ISD::TPAUSE";
   case X86ISD::ENQCMD:             return "X86ISD:ENQCMD";
   case X86ISD::ENQCMDS:            return "X86ISD:ENQCMDS";
+  case X86ISD::VP2INTERSECT:       return "X86ISD::VP2INTERSECT";
   }
   return nullptr;
 }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index a3ebe1001e398..7eed866614a04 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -592,6 +592,9 @@ namespace llvm {
       // Enqueue Stores Instructions
       ENQCMD, ENQCMDS,
 
+      // For avx512-vp2intersect
+      VP2INTERSECT,
+
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LCMPXCHG8_DAG,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 97e696981b186..20380bb84481f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -26,6 +26,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
   // Corresponding mask register class.
   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
 
+  // Corresponding mask register pair class.
+  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
+                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
+
   // Corresponding write-mask register class.
   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
 
@@ -12556,6 +12560,59 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
                      Sched<[SchedWriteFMA.ZMM.Folded]>;
 }
 
+let hasSideEffects = 0 in {
+  def MASKPAIR16STORE : PseudoI<(outs), (ins VK16PAIR:$src, anymem:$dst),
+             [(store VK16PAIR:$src, addr:$dst)]>;
+  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src),
+             [(set VK16PAIR:$dst, (load addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// VP2INTERSECT
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
+  def rr : I<0x68, MRMSrcReg,
+                  (outs _.KRPC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2),
+                  !strconcat("vp2intersect", _.Suffix,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
+                  EVEX_4V, T8XD;
+
+  def rm : I<0x68, MRMSrcMem,
+                  (outs _.KRPC:$dst),
+                  (ins  _.RC:$src1, _.MemOp:$src2),
+                  !strconcat("vp2intersect", _.Suffix,
+                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
+
+  def rmb : I<0x68, MRMSrcMem,
+                  (outs _.KRPC:$dst),
+                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
+                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
+                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
+                  [(set _.KRPC:$dst, (X86vp2intersect
+                             _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
+                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
+  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
+    defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
+
+  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
+    defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
+    defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
+  }
+}
+
+defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
+defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
+
 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
                              X86SchedWriteWidths sched,
                              AVX512VLVectorVTInfo _SrcVTInfo,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 8a6f09f5cacf5..50d81fcaf83ea 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -505,6 +505,10 @@ def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound, [SDNPCommutat
 def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound, [SDNPCommutative]>;
 def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound, [SDNPCommutative]>;
 
+def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
+                              SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+                                                   SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
+
 def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
 def x86vpmadd52l     : SDNode<"X86ISD::VPMADD52L",     SDTIFma, [SDNPCommutative]>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 20d3cf0d92788..124ad5dfdf4c0 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2877,6 +2877,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
       assert(STI.hasBWI() && "KMOVD requires BWI");
       return load ? X86::KMOVDkm : X86::KMOVDmk;
     }
+    // All of these mask pair classes have the same spill size, the same kind
+    // of kmov instructions can be used with all of them.
+    if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
+        X86::VK16PAIRRegClass.hasSubClassEq(RC))
+      return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
     llvm_unreachable("Unknown 4-byte regclass");
   case 8:
     if (X86::GR64RegClass.hasSubClassEq(RC))
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 98af217ebcf85..85255096a7d3a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -757,6 +757,33 @@ def lea64mem : Operand<i64> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+let RenderMethod = "addMaskPairOperands" in {
+  def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
+  def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
+  def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
+  def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
+  def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
+}
+
+def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
+  let ParserMatchClass = VK1PairAsmOperand;
+}
+
+def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
+  let ParserMatchClass = VK2PairAsmOperand;
+}
+
+def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
+  let ParserMatchClass = VK4PairAsmOperand;
+}
+
+def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
+  let ParserMatchClass = VK8PairAsmOperand;
+}
+
+def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
+  let ParserMatchClass = VK16PairAsmOperand;
+}
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -843,6 +870,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
 def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
 def PKU        : Predicate<"Subtarget->hasPKU()">;
 def HasVNNI    : Predicate<"Subtarget->hasVNNI()">;
+def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
 def HasBF16      : Predicate<"Subtarget->hasBF16()">;
 
 def HasBITALG    : Predicate<"Subtarget->hasBITALG()">;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 6ed9a533d51f4..9de2d18e0be11 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1680,6 +1680,77 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::TLS_base_addr64:
     return LowerTlsAddr(MCInstLowering, *MI);
 
+  // Loading/storing mask pairs requires two kmov operations. The second one of these
+  // needs a 2 byte displacement relative to the specified address (with 32 bit spill
+  // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
+  // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
+  //
+  // The displacement value might wrap around in theory, thus the asserts in both
+  // cases.
+  case X86::MASKPAIR16LOAD: {
+    int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
+    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+    const X86RegisterInfo *RI =
+      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+    unsigned Reg = MI->getOperand(0).getReg();
+    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+    // Load the first mask register
+    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
+    MIB.addReg(Reg0);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+      MIB.addOperand(Op.getValue());
+    }
+    EmitAndCountInstruction(MIB);
+
+    // Load the second mask register of the pair
+    MIB = MCInstBuilder(X86::KMOVWkm);
+    MIB.addReg(Reg1);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      if (i == X86::AddrDisp) {
+        MIB.addImm(Disp + 2);
+      } else {
+        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+        MIB.addOperand(Op.getValue());
+      }
+    }
+    EmitAndCountInstruction(MIB);
+    return;
+  }
+
+  case X86::MASKPAIR16STORE: {
+    int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
+    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+    const X86RegisterInfo *RI =
+      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+    unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
+    unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+    unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+    // Store the first mask register
+    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
+    for (int i = 0; i < X86::AddrNumOperands; ++i)
+      MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
+    MIB.addReg(Reg0);
+    EmitAndCountInstruction(MIB);
+
+    // Store the second mask register of the pair
+    MIB = MCInstBuilder(X86::KMOVWmk);
+    for (int i = 0; i < X86::AddrNumOperands; ++i) {
+      if (i == X86::AddrDisp) {
+        MIB.addImm(Disp + 2);
+      } else {
+        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
+        MIB.addOperand(Op.getValue());
+      }
+    }
+    MIB.addReg(Reg1);
+    EmitAndCountInstruction(MIB);
+    return;
+  }
+
   case X86::MOVPC32r: {
     // This is a pseudo op for a two instruction sequence with a label, which
     // looks like:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index c0acff9c8c3ec..0528b90c1fd53 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -28,6 +28,8 @@ let Namespace = "X86" in {
   def sub_32bit    : SubRegIndex<32>;
   def sub_xmm      : SubRegIndex<128>;
   def sub_ymm      : SubRegIndex<256>;
+  def sub_mask_0   : SubRegIndex<-1>;
+  def sub_mask_1   : SubRegIndex<-1, -1>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -594,6 +596,16 @@ def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
 def VK32    : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
 def VK64    : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
 
+// Mask register pairs
+def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
+                             [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
+
+def VK1PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK2PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK4PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK8PAIR   : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK16PAIR  : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+
 def VK1WM   : RegisterClass<"X86", [v1i1],  16,  (sub VK1, K0)> {let Size = 16;}
 def VK2WM   : RegisterClass<"X86", [v2i1],  16,  (sub VK2, K0)> {let Size = 16;}
 def VK4WM   : RegisterClass<"X86", [v4i1],  16,  (sub VK4, K0)> {let Size = 16;}
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 6fefe23182f9c..9e5613654afa9 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -362,6 +362,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// Processor has AVX-512 Bit Algorithms instructions
   bool HasBITALG = false;
 
+  /// Processor has AVX-512 vp2intersect instructions
+  bool HasVP2INTERSECT = false;
+
   /// Processor supports MPX - Memory Protection Extensions
   bool HasMPX = false;
 
@@ -679,6 +682,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasPKU() const { return HasPKU; }
   bool hasVNNI() const { return HasVNNI; }
   bool hasBF16() const { return HasBF16; }
+  bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
   bool hasBITALG() const { return HasBITALG; }
   bool hasMPX() const { return HasMPX; }
   bool hasSHSTK() const { return HasSHSTK; }
diff --git a/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
new file mode 100644
index 0000000000000..3dc76ec069291
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll
@@ -0,0 +1,593 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+define void @test_mm256_2intersect_epi32(<4 x i64> %a, <4 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi32:
+; X86: # %bb.0:                                # %entry
+; X86-NEXT:    movl    4(%esp), %eax           # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vp2intersectd   %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0xc1]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    8(%esp), %eax           # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectd   %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0xc1]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movb    %cl, (%rdi)             # encoding: [0x88,0x0f]
+; X64-NEXT:    movb    %al, (%rsi)             # encoding: [0x88,0x06]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = bitcast <4 x i64> %b to <8 x i32>
+  %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %0, <8 x i32> %1)
+  %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
+  %4 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %3, <8 x i1>* %4, align 8
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
+  %6 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %5, <8 x i1>* %6, align 8
+  ret void
+}
+
+define void @test_mm256_2intersect_epi64(<4 x i64> %a, <4 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
+; X86-NEXT:    vp2intersectq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb %cl, (%eax) # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectq   %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdi)             # encoding: [0x88,0x07]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rsi)             # encoding: [0x88,0x06]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %a, <4 x i64> %b)
+  %1 = extractvalue { <4 x i1>, <4 x i1> } %0, 0
+  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = bitcast <8 x i1> %2 to i8
+  store i8 %3, i8* %m0, align 1
+  %4 = extractvalue { <4 x i1>, <4 x i1> } %0, 1
+  %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %6 = bitcast <8 x i1> %5 to i8
+  store i8 %6, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm256_2intersect_epi32_p(<4 x i64>* nocapture readonly %a, <4 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi32_p:
+; X86:       # %bb.0:                                # %entry
+; X86-NEXT:    movl    12(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x08]
+; X86-NEXT:    movl    4(%esp), %edx           # encoding: [0x8b,0x54,0x24,0x04]
+; X86-NEXT:    vmovaps (%edx), %ymm0           # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x02]
+; X86-NEXT:    vp2intersectd   (%ecx), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x01]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    16(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x10]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi32_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %ymm0           # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
+; X64-NEXT:    vp2intersectd   (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x28,0x68,0x06]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %esi               # encoding: [0xc5,0xf8,0x93,0xf0]
+; X64-NEXT:    movb    %sil, (%rdx)            # encoding: [0x40,0x88,0x32]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = bitcast <4 x i64>* %a to <8 x i32>*
+  %1 = load <8 x i32>, <8 x i32>* %0, align 32
+  %2 = bitcast <4 x i64>* %b to <8 x i32>*
+  %3 = load <8 x i32>, <8 x i32>* %2, align 32
+  %4 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %1, <8 x i32> %3)
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %4, 0
+  %6 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %5, <8 x i1>* %6, align 8
+  %7 = extractvalue { <8 x i1>, <8 x i1> } %4, 1
+  %8 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %7, <8 x i1>* %8, align 8
+  ret void
+}
+
+define void @test_mm256_2intersect_epi64_p(<4 x i64>* nocapture readonly %a, <4 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi64_p:
+; X86:         .cfi_startproc
+; X86-NEXT:    # %bb.0:                                # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    20(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    16(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    12(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vmovaps (%esi), %ymm0           # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x06]
+; X86-NEXT:    vp2intersectq   (%edx), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x02]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi64_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %ymm0           # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
+; X64-NEXT:    vp2intersectq   (%rsi), %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0x06]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load <4 x i64>, <4 x i64>* %a, align 32
+  %1 = load <4 x i64>, <4 x i64>* %b, align 32
+  %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %0, <4 x i64> %1)
+  %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
+  %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
+  %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm256_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi32_b:
+; X86:         .cfi_startproc
+; X86-NEXT: # %bb.0:                                # %entry
+; X86-NEXT:    movl    12(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x08]
+; X86-NEXT:    movl    4(%esp), %edx           # encoding: [0x8b,0x54,0x24,0x04]
+; X86-NEXT:    vbroadcastss    (%edx), %ymm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x02]
+; X86-NEXT:    vp2intersectd   (%ecx){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x01]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    16(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x10]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi32_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vbroadcastss    (%rdi), %ymm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07]
+; X64-NEXT:    vp2intersectd   (%rsi){1to8}, %ymm0, %k0 # encoding: [0x62,0xf2,0x7f,0x38,0x68,0x06]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %esi               # encoding: [0xc5,0xf8,0x93,0xf0]
+; X64-NEXT:    movb    %sil, (%rdx)            # encoding: [0x40,0x88,0x32]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load i32, i32* %a, align 4
+  %vecinit.i.i = insertelement <8 x i32> undef, i32 %0, i32 0
+  %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
+  %1 = load i32, i32* %b, align 4
+  %vecinit.i.i2 = insertelement <8 x i32> undef, i32 %1, i32 0
+  %vecinit7.i.i3 = shufflevector <8 x i32> %vecinit.i.i2, <8 x i32> undef, <8 x i32> zeroinitializer
+  %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %vecinit7.i.i, <8 x i32> %vecinit7.i.i3)
+  %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
+  %4 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %3, <8 x i1>* %4, align 8
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
+  %6 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %5, <8 x i1>* %6, align 8
+  ret void
+}
+
+define void @test_mm256_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm256_2intersect_epi64_b:
+; X86:         .cfi_startproc
+; X86-NEXT:    # %bb.0:                                # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    20(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    16(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    12(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vbroadcastsd    (%esi), %ymm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x06]
+; X86-NEXT:    vbroadcastsd    (%edx), %ymm1   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x0a]
+; X86-NEXT:    vp2intersectq   %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x28,0x68,0xc1]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_2intersect_epi64_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vbroadcastsd    (%rdi), %ymm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0x07]
+; X64-NEXT:    vp2intersectq   (%rsi){1to4}, %ymm0, %k0 # encoding: [0x62,0xf2,0xff,0x38,0x68,0x06]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load i64, i64* %a, align 8
+  %vecinit.i.i = insertelement <4 x i64> undef, i64 %0, i32 0
+  %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
+  %1 = load i64, i64* %b, align 8
+  %vecinit.i.i2 = insertelement <4 x i64> undef, i64 %1, i32 0
+  %vecinit3.i.i3 = shufflevector <4 x i64> %vecinit.i.i2, <4 x i64> undef, <4 x i32> zeroinitializer
+  %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %vecinit3.i.i, <4 x i64> %vecinit3.i.i3)
+  %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
+  %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
+  %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi32(<2 x i64> %a, <2 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi32:
+; X86:         .cfi_startproc
+; X86-NEXT:    # %bb.0:                                # %entry
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %eax           # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x04]
+; X86-NEXT:    vp2intersectd   %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0xc1]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectd   %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0xc1]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdi)             # encoding: [0x88,0x07]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rsi)             # encoding: [0x88,0x06]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = bitcast <2 x i64> %a to <4 x i32>
+  %1 = bitcast <2 x i64> %b to <4 x i32>
+  %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %0, <4 x i32> %1)
+  %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
+  %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
+  %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi64(<2 x i64> %a, <2 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi64:
+; X86: # %bb.0:                                # %entry
+; X86-NEXT:    movl    8(%esp), %eax           # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movl    4(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x04]
+; X86-NEXT:    vp2intersectq   %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
+; X86-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectq   %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
+; X64-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdi)             # encoding: [0x88,0x07]
+; X64-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rsi)             # encoding: [0x88,0x06]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %a, <2 x i64> %b)
+  %1 = extractvalue { <2 x i1>, <2 x i1> } %0, 0
+  %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %3 = bitcast <8 x i1> %2 to i8
+  store i8 %3, i8* %m0, align 1
+  %4 = extractvalue { <2 x i1>, <2 x i1> } %0, 1
+  %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %6 = bitcast <8 x i1> %5 to i8
+  store i8 %6, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi32_p(<2 x i64>* nocapture readonly %a, <2 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi32_p:
+; X86: # %bb.0:                                # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vmovaps (%esi), %xmm0           # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
+; X86-NEXT:    vp2intersectd   (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x02]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi32_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %xmm0           # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
+; X64-NEXT:    vp2intersectd   (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x08,0x68,0x06]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = bitcast <2 x i64>* %a to <4 x i32>*
+  %1 = load <4 x i32>, <4 x i32>* %0, align 16
+  %2 = bitcast <2 x i64>* %b to <4 x i32>*
+  %3 = load <4 x i32>, <4 x i32>* %2, align 16
+  %4 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %1, <4 x i32> %3)
+  %5 = extractvalue { <4 x i1>, <4 x i1> } %4, 0
+  %6 = shufflevector <4 x i1> %5, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %7 = bitcast <8 x i1> %6 to i8
+  store i8 %7, i8* %m0, align 1
+  %8 = extractvalue { <4 x i1>, <4 x i1> } %4, 1
+  %9 = shufflevector <4 x i1> %8, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %10 = bitcast <8 x i1> %9 to i8
+  store i8 %10, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi64_p(<2 x i64>* nocapture readonly %a, <2 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi64_p:
+; X86: # %bb.0:                                # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vmovaps (%esi), %xmm0           # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06]
+; X86-NEXT:    vp2intersectq   (%edx), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x02]
+; X86-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi64_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %xmm0           # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
+; X64-NEXT:    vp2intersectq   (%rsi), %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0x06]
+; X64-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load <2 x i64>, <2 x i64>* %a, align 16
+  %1 = load <2 x i64>, <2 x i64>* %b, align 16
+  %2 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %0, <2 x i64> %1)
+  %3 = extractvalue { <2 x i1>, <2 x i1> } %2, 0
+  %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <2 x i1>, <2 x i1> } %2, 1
+  %7 = shufflevector <2 x i1> %6, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi32_b:
+; X86: # %bb.0:                                # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    {{[0-9]+}}(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vbroadcastss    (%esi), %xmm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x06]
+; X86-NEXT:    vp2intersectd   (%edx){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x02]
+; X86-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X86-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi32_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vbroadcastss    (%rdi), %xmm0   # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07]
+; X64-NEXT:    vp2intersectd   (%rsi){1to4}, %xmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x18,0x68,0x06]
+; X64-NEXT:    kshiftlw        $12, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $12, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0c]
+; X64-NEXT:    kshiftrw        $12, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load i32, i32* %a, align 4
+  %vecinit.i.i = insertelement <4 x i32> undef, i32 %0, i32 0
+  %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %1 = load i32, i32* %b, align 4
+  %vecinit.i.i2 = insertelement <4 x i32> undef, i32 %1, i32 0
+  %vecinit3.i.i3 = shufflevector <4 x i32> %vecinit.i.i2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %2 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %vecinit3.i.i, <4 x i32> %vecinit3.i.i3)
+  %3 = extractvalue { <4 x i1>, <4 x i1> } %2, 0
+  %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <4 x i1>, <4 x i1> } %2, 1
+  %7 = shufflevector <4 x i1> %6, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+define void @test_mm_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm_2intersect_epi64_b:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl   %esi                    # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl    20(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl    16(%esp), %ecx          # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl    12(%esp), %edx          # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %esi           # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vmovddup        (%esi), %xmm0   # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x06]
+; X86-NEXT:                                    # xmm0 = mem[0,0]
+; X86-NEXT:    vmovddup        (%edx), %xmm1   # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x0a]
+; X86-NEXT:                                    # xmm1 = mem[0,0]
+; X86-NEXT:    vp2intersectq   %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x08,0x68,0xc1]
+; X86-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X86-NEXT:    kmovw   %k2, %edx               # encoding: [0xc5,0xf8,0x93,0xd2]
+; X86-NEXT:    movb    %dl, (%ecx)             # encoding: [0x88,0x11]
+; X86-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X86-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X86-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    popl    %esi                    # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_2intersect_epi64_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovddup        (%rdi), %xmm0   # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07]
+; X64-NEXT:                                    # xmm0 = mem[0,0]
+; X64-NEXT:    vp2intersectq   (%rsi){1to2}, %xmm0, %k0 # encoding: [0x62,0xf2,0xff,0x18,0x68,0x06]
+; X64-NEXT:    kshiftlw        $14, %k0, %k2   # encoding: [0xc4,0xe3,0xf9,0x32,0xd0,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k2, %k2   # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; X64-NEXT:    kmovw   %k2, %eax               # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    movb    %al, (%rdx)             # encoding: [0x88,0x02]
+; X64-NEXT:    kshiftlw        $14, %k1, %k0   # encoding: [0xc4,0xe3,0xf9,0x32,0xc1,0x0e]
+; X64-NEXT:    kshiftrw        $14, %k0, %k0   # encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
+; X64-NEXT:    kmovw   %k0, %eax               # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load i64, i64* %a, align 8
+  %vecinit.i.i = insertelement <2 x i64> undef, i64 %0, i32 0
+  %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
+  %1 = load i64, i64* %b, align 8
+  %vecinit.i.i2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %vecinit1.i.i3 = shufflevector <2 x i64> %vecinit.i.i2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %2 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %vecinit1.i.i, <2 x i64> %vecinit1.i.i3)
+  %3 = extractvalue { <2 x i1>, <2 x i1> } %2, 0
+  %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %5 = bitcast <8 x i1> %4 to i8
+  store i8 %5, i8* %m0, align 1
+  %6 = extractvalue { <2 x i1>, <2 x i1> } %2, 1
+  %7 = shufflevector <2 x i1> %6, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  %8 = bitcast <8 x i1> %7 to i8
+  store i8 %8, i8* %m1, align 1
+  ret void
+}
+
+declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32>, <8 x i32>)
+declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64>, <4 x i64>)
+declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32>, <4 x i32>)
+declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
diff --git a/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
new file mode 100644
index 0000000000000..e2415b03c1e71
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll
@@ -0,0 +1,240 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+define void @test_mm512_2intersect_epi32(<8 x i64> %a, <8 x i64> %b, i16* nocapture %m0, i16* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
+; X86-NEXT:    vp2intersectd %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0xc1]
+; X86-NEXT:    kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
+; X86-NEXT:    kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_2intersect_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectd %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0xc1]
+; X64-NEXT:    kmovw %k0, (%rdi) # encoding: [0xc5,0xf8,0x91,0x07]
+; X64-NEXT:    kmovw %k1, (%rsi) # encoding: [0xc5,0xf8,0x91,0x0e]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = bitcast <8 x i64> %a to <16 x i32>
+  %1 = bitcast <8 x i64> %b to <16 x i32>
+  %2 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %0, <16 x i32> %1)
+  %3 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
+  %4 = bitcast i16* %m0 to <16 x i1>*
+  store <16 x i1> %3, <16 x i1>* %4, align 16
+  %5 = extractvalue { <16 x i1>, <16 x i1> } %2, 1
+  %6 = bitcast i16* %m1 to <16 x i1>*
+  store <16 x i1> %5, <16 x i1>* %6, align 16
+  ret void
+}
+
+define void @test_mm512_2intersect_epi64(<8 x i64> %a, <8 x i64> %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    vp2intersectq %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    8(%esp), %eax           # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_2intersect_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vp2intersectq   %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %ecx               # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movb    %cl, (%rdi)             # encoding: [0x88,0x0f]
+; X64-NEXT:    movb    %al, (%rsi)             # encoding: [0x88,0x06]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %a, <8 x i64> %b)
+  %1 = extractvalue { <8 x i1>, <8 x i1> } %0, 0
+  %2 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %1, <8 x i1>* %2, align 8
+  %3 = extractvalue { <8 x i1>, <8 x i1> } %0, 1
+  %4 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %3, <8 x i1>* %4, align 8
+  ret void
+}
+
+define void @test_mm512_2intersect_epi32_p(<8 x i64>* nocapture readonly %a, <8 x i64>* nocapture readonly %b, i16* nocapture %m0, i16* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi32_p:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vmovaps (%esi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x06]
+; X86-NEXT:    vp2intersectd (%edx), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x02]
+; X86-NEXT:    kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
+; X86-NEXT:    kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
+; X86-NEXT:    popl %esi # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_2intersect_epi32_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
+; X64-NEXT:    vp2intersectd (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x48,0x68,0x06]
+; X64-NEXT:    kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
+; X64-NEXT:    kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = bitcast <8 x i64>* %a to <16 x i32>*
+  %1 = load <16 x i32>, <16 x i32>* %0, align 64
+  %2 = bitcast <8 x i64>* %b to <16 x i32>*
+  %3 = load <16 x i32>, <16 x i32>* %2, align 64
+  %4 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %1, <16 x i32> %3)
+  %5 = extractvalue { <16 x i1>, <16 x i1> } %4, 0
+  %6 = bitcast i16* %m0 to <16 x i1>*
+  store <16 x i1> %5, <16 x i1>* %6, align 16
+  %7 = extractvalue { <16 x i1>, <16 x i1> } %4, 1
+  %8 = bitcast i16* %m1 to <16 x i1>*
+  store <16 x i1> %7, <16 x i1>* %8, align 16
+  ret void
+}
+
+define void @test_mm512_2intersect_epi64_p(<8 x i64>* nocapture readonly %a, <8 x i64>* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi64_p:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl    12(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x08]
+; X86-NEXT:    movl    4(%esp), %edx           # encoding: [0x8b,0x54,0x24,0x04]
+; X86-NEXT:    vmovaps (%edx), %zmm0           # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x02]
+; X86-NEXT:    vp2intersectq   (%ecx), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x01]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    16(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x10]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl
+
+;
+; X64-LABEL: test_mm512_2intersect_epi64_p:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vmovaps (%rdi), %zmm0           # encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
+; X64-NEXT:    vp2intersectq   (%rsi), %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0x06]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %esi               # encoding: [0xc5,0xf8,0x93,0xf0]
+; X64-NEXT:    movb    %sil, (%rdx)            # encoding: [0x40,0x88,0x32]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load <8 x i64>, <8 x i64>* %a, align 64
+  %1 = load <8 x i64>, <8 x i64>* %b, align 64
+  %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %0, <8 x i64> %1)
+  %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
+  %4 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %3, <8 x i1>* %4, align 8
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
+  %6 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %5, <8 x i1>* %6, align 8
+  ret void
+}
+
+define void @test_mm512_2intersect_epi32_b(i32* nocapture readonly %a, i32* nocapture readonly %b, i16* nocapture %m0, i16* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi32_b:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi # encoding: [0x56]
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08]
+; X86-NEXT:    vbroadcastss (%esi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x06]
+; X86-NEXT:    vp2intersectd (%edx){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x02]
+; X86-NEXT:    kmovw %k0, (%ecx) # encoding: [0xc5,0xf8,0x91,0x01]
+; X86-NEXT:    kmovw %k1, (%eax) # encoding: [0xc5,0xf8,0x91,0x08]
+; X86-NEXT:    popl %esi # encoding: [0x5e]
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_2intersect_epi32_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vbroadcastss (%rdi), %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
+; X64-NEXT:    vp2intersectd (%rsi){1to16}, %zmm0, %k0 # encoding: [0x62,0xf2,0x7f,0x58,0x68,0x06]
+; X64-NEXT:    kmovw %k0, (%rdx) # encoding: [0xc5,0xf8,0x91,0x02]
+; X64-NEXT:    kmovw %k1, (%rcx) # encoding: [0xc5,0xf8,0x91,0x09]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load i32, i32* %a, align 4
+  %vecinit.i = insertelement <16 x i32> undef, i32 %0, i32 0
+  %vecinit15.i = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+  %1 = load i32, i32* %b, align 4
+  %vecinit.i2 = insertelement <16 x i32> undef, i32 %1, i32 0
+  %vecinit15.i3 = shufflevector <16 x i32> %vecinit.i2, <16 x i32> undef, <16 x i32> zeroinitializer
+  %2 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %vecinit15.i, <16 x i32> %vecinit15.i3)
+  %3 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
+  %4 = bitcast i16* %m0 to <16 x i1>*
+  store <16 x i1> %3, <16 x i1>* %4, align 16
+  %5 = extractvalue { <16 x i1>, <16 x i1> } %2, 1
+  %6 = bitcast i16* %m1 to <16 x i1>*
+  store <16 x i1> %5, <16 x i1>* %6, align 16
+  ret void
+}
+
+define void @test_mm512_2intersect_epi64_b(i64* nocapture readonly %a, i64* nocapture readonly %b, i8* nocapture %m0, i8* nocapture %m1) {
+; X86-LABEL: test_mm512_2intersect_epi64_b:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl    12(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x0c]
+; X86-NEXT:    movl    8(%esp), %ecx           # encoding: [0x8b,0x4c,0x24,0x08]
+; X86-NEXT:    movl    4(%esp), %edx           # encoding: [0x8b,0x54,0x24,0x04]
+; X86-NEXT:    vbroadcastsd    (%edx), %zmm0   # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x02]
+; X86-NEXT:    vbroadcastsd    (%ecx), %zmm1   # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x09]
+; X86-NEXT:    vp2intersectq   %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x48,0x68,0xc1]
+; X86-NEXT:    kmovw   %k1, %ecx               # encoding: [0xc5,0xf8,0x93,0xc9]
+; X86-NEXT:    kmovw   %k0, %edx               # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movb    %dl, (%eax)             # encoding: [0x88,0x10]
+; X86-NEXT:    movl    16(%esp), %eax          # encoding: [0x8b,0x44,0x24,0x10]
+; X86-NEXT:    movb    %cl, (%eax)             # encoding: [0x88,0x08]
+; X86-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl                            # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_2intersect_epi64_b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    vbroadcastsd    (%rdi), %zmm0   # encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
+; X64-NEXT:    vp2intersectq   (%rsi){1to8}, %zmm0, %k0 # encoding: [0x62,0xf2,0xff,0x58,0x68,0x06]
+; X64-NEXT:    kmovw   %k1, %eax               # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    kmovw   %k0, %esi               # encoding: [0xc5,0xf8,0x93,0xf0]
+; X64-NEXT:    movb    %sil, (%rdx)            # encoding: [0x40,0x88,0x32]
+; X64-NEXT:    movb    %al, (%rcx)             # encoding: [0x88,0x01]
+; X64-NEXT:    vzeroupper                      # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq                            # encoding: [0xc3]
+entry:
+  %0 = load i64, i64* %a, align 8
+  %vecinit.i = insertelement <8 x i64> undef, i64 %0, i32 0
+  %vecinit7.i = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+  %1 = load i64, i64* %b, align 8
+  %vecinit.i2 = insertelement <8 x i64> undef, i64 %1, i32 0
+  %vecinit7.i3 = shufflevector <8 x i64> %vecinit.i2, <8 x i64> undef, <8 x i32> zeroinitializer
+  %2 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %vecinit7.i, <8 x i64> %vecinit7.i3)
+  %3 = extractvalue { <8 x i1>, <8 x i1> } %2, 0
+  %4 = bitcast i8* %m0 to <8 x i1>*
+  store <8 x i1> %3, <8 x i1>* %4, align 8
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %2, 1
+  %6 = bitcast i8* %m1 to <8 x i1>*
+  store <8 x i1> %5, <8 x i1>* %6, align 8
+  ret void
+}
+
+declare { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>, <16 x i32>)
+declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64>, <8 x i64>)
diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
index 2f711a86bc278..56e0855d955e7 100644
--- a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
@@ -2,7 +2,8 @@
 
 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1
 ; CHECK: %[[REG2:.*]]:vr512_0_15 = COPY %2
-; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, 7340042, def %{{.*}}, 1179657, %{{.*}}, 7340041, %[[REG1]], 7340041, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
+; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
+
 define <8 x i64> @mask_Yk_i8(i8 signext %msk, <8 x i64> %x, <8 x i64> %y) {
 entry:
   %0 = tail call <8 x i64> asm "vpaddq\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <8 x i64> %x, <8 x i64> %y)
diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
new file mode 100644
index 0000000000000..a38d95b6f9472
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X64
+
+; Test with more than four live mask pairs
+
+define void @test(<16 x i32> %a0, <16 x i32> %b0,
+                  <16 x i32> %a1, <16 x i32> %b1,
+                  <16 x i32> %a2, <16 x i32> %b2,
+                  <16 x i32> %a3, <16 x i32> %b3,
+                  <16 x i32> %a4, <16 x i32> %b4,
+                 i16* nocapture %m0, i16* nocapture %m1) {
+; X86-LABEL: test:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-64, %esp
+; X86-NEXT:    subl $64, %esp
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:    movl 456(%ebp), %esi
+; X86-NEXT:    vmovaps 328(%ebp), %zmm3
+; X86-NEXT:    vmovaps 200(%ebp), %zmm4
+; X86-NEXT:    vmovaps 72(%ebp), %zmm5
+; X86-NEXT:    vp2intersectd %zmm1, %zmm0, %k0
+; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    kmovw %k1, {{[0-9]+}}(%esp)
+; X86-NEXT:    vp2intersectd 8(%ebp), %zmm2, %k0
+; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    kmovw %k1, {{[0-9]+}}(%esp)
+; X86-NEXT:    vp2intersectd 136(%ebp), %zmm5, %k0
+; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    kmovw %k1, {{[0-9]+}}(%esp)
+; X86-NEXT:    vp2intersectd 264(%ebp), %zmm4, %k0
+; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    kmovw %k1, {{[0-9]+}}(%esp)
+; X86-NEXT:    vp2intersectd 392(%ebp), %zmm3, %k0
+; X86-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    kmovw %k1, {{[0-9]+}}(%esp)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    calll dummy
+; X86-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    kmovw %k0, %eax
+; X86-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    kmovw %k0, %ecx
+; X86-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    kmovw %k0, %edx
+; X86-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    kmovw %k0, %edi
+; X86-NEXT:    addl %edi, %eax
+; X86-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 # 4-byte Folded Reload
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k3
+; X86-NEXT:    kmovw %k2, %edi
+; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    kmovw %k1, %ecx
+;
+; X64-LABEL: test:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbp, -16
+; X64-NEXT:    movq %rsp, %rbp
+; X64-NEXT:    .cfi_def_cfa_register %rbp
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    andq $-64, %rsp
+; X64-NEXT:    subq $64, %rsp
+; X64-NEXT:    .cfi_offset %rbx, -32
+; X64-NEXT:    .cfi_offset %r14, -24
+; X64-NEXT:    movq %rdi, %r14
+; X64-NEXT:    vmovaps 16(%rbp), %zmm8
+; X64-NEXT:    vp2intersectd %zmm1, %zmm0, %k0
+; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    vp2intersectd %zmm3, %zmm2, %k0
+; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    vp2intersectd %zmm5, %zmm4, %k0
+; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    vp2intersectd %zmm7, %zmm6, %k0
+; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    vp2intersectd 80(%rbp), %zmm8, %k0
+; X64-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
+; X64-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp)
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    callq dummy
+; X64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X64-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; X64-NEXT:    kmovw %k0, %eax
+; X64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X64-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; X64-NEXT:    kmovw %k0, %ecx
+; X64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X64-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; X64-NEXT:    kmovw %k0, %edx
+; X64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X64-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; X64-NEXT:    kmovw %k0, %esi
+; X64-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 4-byte Folded Reload
+; X64-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
+; X64-NEXT:    kmovw %k0, %edi
+; X64-NEXT:    kmovw %k1, %ebx
+entry:
+  %0 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a0, <16 x i32> %b0)
+  %1 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a1, <16 x i32> %b1)
+  %2 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a2, <16 x i32> %b2)
+  %3 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a3, <16 x i32> %b3)
+  %4 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a4, <16 x i32> %b4)
+
+  %5 = extractvalue { <16 x i1>, <16 x i1> } %0, 0
+  %6 = extractvalue { <16 x i1>, <16 x i1> } %1, 0
+  %7 = extractvalue { <16 x i1>, <16 x i1> } %2, 0
+  %8 = extractvalue { <16 x i1>, <16 x i1> } %3, 0
+  %9 = extractvalue { <16 x i1>, <16 x i1> } %4, 0
+  %10 = extractvalue { <16 x i1>, <16 x i1> } %0, 1
+  %11 = extractvalue { <16 x i1>, <16 x i1> } %1, 1
+
+  call void @dummy()
+
+  %12 = bitcast <16 x i1> %5 to i16
+  %13 = bitcast <16 x i1> %6 to i16
+  %14 = bitcast <16 x i1> %7 to i16
+  %15 = bitcast <16 x i1> %8 to i16
+  %16 = bitcast <16 x i1> %9 to i16
+  %17 = bitcast <16 x i1> %10 to i16
+  %18 = bitcast <16 x i1> %11 to i16
+
+  %19 = add i16 %12, %13
+  %20 = add i16 %14, %15
+  %21 = add i16 %16, %17
+  %22 = add i16 %19, %21
+  %23 = add i16 %22, %20
+
+  store i16 %23, i16* %m0, align 16
+  ret void
+}
+
+declare { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>, <16 x i32>)
+declare void @dummy()
diff --git a/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-32-att.txt b/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-32-att.txt
new file mode 100644
index 0000000000000..7e66c64debdf3
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-32-att.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# Coverage
+#CHECK: vp2intersectq  %zmm2, %zmm1, %k0
+0x62 0xf2 0xf7 0x48 0x68 0xc2
+
+# Instruction encodes k1, but we print k0 anyways.
+# Not sure if GNU binutils does the same.
+#CHECK: vp2intersectq  %zmm2, %zmm1, %k0
+0x62 0xf2 0xf7 0x48 0x68 0xca
+
+#CHECK: vp2intersectq  (%esi){1to8}, %zmm4, %k6
+0x62 0xf2 0xdf 0x58 0x68 0x36
+
+#CHECK: vp2intersectd  %xmm7, %xmm4, %k6
+0x62 0xf2 0x5f 0x08 0x68 0xff
diff --git a/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-64-att.txt b/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-64-att.txt
new file mode 100644
index 0000000000000..fc87c98538dc1
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-vp2intersect-64-att.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# Coverage
+#CHECK: vp2intersectq  %zmm2, %zmm1, %k0
+0x62 0xf2 0xf7 0x48 0x68 0xc2
+
+# Instruction encodes k1, but we print k0 anyways.
+# Not sure if GNU binutils does the same.
+#CHECK: vp2intersectq  %zmm2, %zmm1, %k0
+0x62 0xf2 0xf7 0x48 0x68 0xca
+
+#CHECK: vp2intersectq  (%rsi){1to8}, %zmm9, %k6
+0x62 0xf2 0xb7 0x58 0x68 0x36
+
+#CHECK: vp2intersectd  %xmm7, %xmm9, %k6
+0x62 0xf2 0x37 0x08 0x68 0xff
diff --git a/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-32-intel.txt b/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-32-intel.txt
new file mode 100644
index 0000000000000..baf5eacbd5c89
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-32-intel.txt
@@ -0,0 +1,43 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+# CHECK: vp2intersectd k4, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0x68,0xe4
+
+# CHECK: vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
+0x62,0xf2,0x67,0x58,0x68,0x20
+
+# CHECK: vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0x68,0x61,0x7f
+
+# CHECK: vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x67,0x58,0x68,0x62,0x80
+
+# CHECK: vp2intersectq k4, zmm3, zmm4
+0x62,0xf2,0xe7,0x48,0x68,0xe4
+
+# CHECK: vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0xe7,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0xe7,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
+0x62,0xf2,0xe7,0x58,0x68,0x20
+
+# CHECK: vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0xe7,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0xe7,0x48,0x68,0x61,0x7f
+
+# CHECK: vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
+0x62,0xf2,0xe7,0x58,0x68,0x62,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-64-intel.txt b/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-64-intel.txt
new file mode 100644
index 0000000000000..5156ad6d88e6e
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512_vp2intersect-64-intel.txt
@@ -0,0 +1,43 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+# CHECK: vp2intersectd k4, zmm23, zmm24
+0x62,0x92,0x47,0x40,0x68,0xe0
+
+# CHECK: vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0x47,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0x47,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
+0x62,0xf2,0x47,0x50,0x68,0x25,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xf2,0x47,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xf2,0x47,0x40,0x68,0x61,0x7f
+
+# CHECK: vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
+0x62,0xf2,0x47,0x50,0x68,0x62,0x80
+
+# CHECK: vp2intersectq k4, zmm23, zmm24
+0x62,0x92,0xc7,0x40,0x68,0xe0
+
+# CHECK: vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0xc7,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0xc7,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
+0x62,0xf2,0xc7,0x50,0x68,0x25,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xf2,0xc7,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xf2,0xc7,0x40,0x68,0x61,0x7f
+
+# CHECK: vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
+0x62,0xf2,0xc7,0x50,0x68,0x62,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-att.txt b/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-att.txt
new file mode 100644
index 0000000000000..9cb68e659c523
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-att.txt
@@ -0,0 +1,86 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# CHECK: vp2intersectd %ymm4, %ymm3, %k6
+0x62,0xf2,0x67,0x28,0x68,0xf4
+
+# CHECK: vp2intersectd %xmm4, %xmm3, %k6
+0x62,0xf2,0x67,0x08,0x68,0xf4
+
+# CHECK: vp2intersectd  268435456(%esp,%esi,8), %ymm3, %k6
+0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd  291(%edi,%eax,4), %ymm3, %k6
+0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd  (%eax){1to8}, %ymm3, %k6
+0x62,0xf2,0x67,0x38,0x68,0x30
+
+# CHECK: vp2intersectd  -1024(,%ebp,2), %ymm3, %k6
+0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectd  4064(%ecx), %ymm3, %k6
+0x62,0xf2,0x67,0x28,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd  -512(%edx){1to8}, %ymm3, %k6
+0x62,0xf2,0x67,0x38,0x68,0x72,0x80
+
+# CHECK: vp2intersectd  268435456(%esp,%esi,8), %xmm3, %k6
+0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd  291(%edi,%eax,4), %xmm3, %k6
+0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd  (%eax){1to4}, %xmm3, %k6
+0x62,0xf2,0x67,0x18,0x68,0x30
+
+# CHECK: vp2intersectd  -512(,%ebp,2), %xmm3, %k6
+0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectd  2032(%ecx), %xmm3, %k6
+0x62,0xf2,0x67,0x08,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd  -512(%edx){1to4}, %xmm3, %k6
+0x62,0xf2,0x67,0x18,0x68,0x72,0x80
+
+# CHECK: vp2intersectq %ymm4, %ymm3, %k6
+0x62,0xf2,0xe7,0x28,0x68,0xf4
+
+# CHECK: vp2intersectq %xmm4, %xmm3, %k6
+0x62,0xf2,0xe7,0x08,0x68,0xf4
+
+# CHECK: vp2intersectq  268435456(%esp,%esi,8), %ymm3, %k6
+0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq  291(%edi,%eax,4), %ymm3, %k6
+0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq  (%eax){1to4}, %ymm3, %k6
+0x62,0xf2,0xe7,0x38,0x68,0x30
+
+# CHECK: vp2intersectq  -1024(,%ebp,2), %ymm3, %k6
+0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectq  4064(%ecx), %ymm3, %k6
+0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq  -1024(%edx){1to4}, %ymm3, %k6
+0x62,0xf2,0xe7,0x38,0x68,0x72,0x80
+
+# CHECK: vp2intersectq  268435456(%esp,%esi,8), %xmm3, %k6
+0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq  291(%edi,%eax,4), %xmm3, %k6
+0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq  (%eax){1to2}, %xmm3, %k6
+0x62,0xf2,0xe7,0x18,0x68,0x30
+
+# CHECK: vp2intersectq  -512(,%ebp,2), %xmm3, %k6
+0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectq  2032(%ecx), %xmm3, %k6
+0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq  -1024(%edx){1to2}, %xmm3, %k6
+0x62,0xf2,0xe7,0x18,0x68,0x72,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-intel.txt b/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-intel.txt
new file mode 100644
index 0000000000000..0c34a6ec016c3
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512vp2intersectvl-intel.txt
@@ -0,0 +1,85 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vp2intersectd k6, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0x68,0xf4
+
+# CHECK: vp2intersectd k6, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0x68,0xf4
+
+# CHECK: vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
+0x62,0xf2,0x67,0x38,0x68,0x30
+
+# CHECK: vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x67,0x38,0x68,0x72,0x80
+
+# CHECK: vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
+0x62,0xf2,0x67,0x18,0x68,0x30
+
+# CHECK: vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x67,0x18,0x68,0x72,0x80
+
+# CHECK: vp2intersectq k6, ymm3, ymm4
+0x62,0xf2,0xe7,0x28,0x68,0xf4
+
+# CHECK: vp2intersectq k6, xmm3, xmm4
+0x62,0xf2,0xe7,0x08,0x68,0xf4
+
+# CHECK: vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
+0x62,0xf2,0xe7,0x38,0x68,0x30
+
+# CHECK: vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
+0x62,0xf2,0xe7,0x38,0x68,0x72,0x80
+
+# CHECK: vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
+0x62,0xf2,0xe7,0x18,0x68,0x30
+
+# CHECK: vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
+0x62,0xf2,0xe7,0x18,0x68,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-att.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-att.txt
new file mode 100644
index 0000000000000..b166ccbf6ee07
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-att.txt
@@ -0,0 +1,85 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# CHECK: vp2intersectd %ymm24, %ymm23, %k6
+0x62,0x92,0x47,0x20,0x68,0xf0
+
+# CHECK: vp2intersectd %xmm24, %xmm23, %k6
+0x62,0x92,0x47,0x00,0x68,0xf0
+
+# CHECK: vp2intersectd  268435456(%rbp,%r14,8), %ymm23, %k6
+0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd  291(%r8,%rax,4), %ymm23, %k6
+0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd  (%rip){1to8}, %ymm23, %k6
+0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectd  -1024(,%rbp,2), %ymm23, %k6
+0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectd  4064(%rcx), %ymm23, %k6
+0x62,0xf2,0x47,0x20,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd  -512(%rdx){1to8}, %ymm23, %k6
+0x62,0xf2,0x47,0x30,0x68,0x72,0x80
+
+# CHECK: vp2intersectd  268435456(%rbp,%r14,8), %xmm23, %k6
+0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd  291(%r8,%rax,4), %xmm23, %k6
+0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd  (%rip){1to4}, %xmm23, %k6
+0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectd  -512(,%rbp,2), %xmm23, %k6
+0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectd  2032(%rcx), %xmm23, %k6
+0x62,0xf2,0x47,0x00,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd  -512(%rdx){1to4}, %xmm23, %k6
+0x62,0xf2,0x47,0x10,0x68,0x72,0x80
+
+# CHECK: vp2intersectq %ymm24, %ymm23, %k6
+0x62,0x92,0xc7,0x20,0x68,0xf0
+
+# CHECK: vp2intersectq %xmm24, %xmm23, %k6
+0x62,0x92,0xc7,0x00,0x68,0xf0
+
+# CHECK: vp2intersectq  268435456(%rbp,%r14,8), %ymm23, %k6
+0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq  291(%r8,%rax,4), %ymm23, %k6
+0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq  (%rip){1to4}, %ymm23, %k6
+0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectq  -1024(,%rbp,2), %ymm23, %k6
+0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectq  4064(%rcx), %ymm23, %k6
+0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq  -1024(%rdx){1to4}, %ymm23, %k6
+0x62,0xf2,0xc7,0x30,0x68,0x72,0x80
+
+# CHECK: vp2intersectq  268435456(%rbp,%r14,8), %xmm23, %k6
+0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq  291(%r8,%rax,4), %xmm23, %k6
+0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq  (%rip){1to2}, %xmm23, %k6
+0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectq  -512(,%rbp,2), %xmm23, %k6
+0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectq  2032(%rcx), %xmm23, %k6
+0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq  -1024(%rdx){1to2}, %xmm23, %k6
+0x62,0xf2,0xc7,0x10,0x68,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-intel.txt b/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-intel.txt
new file mode 100644
index 0000000000000..b952b668454c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/x86-64-avx512vp2intersectvl-intel.txt
@@ -0,0 +1,85 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vp2intersectd k6, ymm23, ymm24
+0x62,0x92,0x47,0x20,0x68,0xf0
+
+# CHECK: vp2intersectd k6, xmm23, xmm24
+0x62,0x92,0x47,0x00,0x68,0xf0
+
+# CHECK: vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
+0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xf2,0x47,0x20,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
+0x62,0xf2,0x47,0x30,0x68,0x72,0x80
+
+# CHECK: vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
+0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xf2,0x47,0x00,0x68,0x71,0x7f
+
+# CHECK: vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
+0x62,0xf2,0x47,0x10,0x68,0x72,0x80
+
+# CHECK: vp2intersectq k6, ymm23, ymm24
+0x62,0x92,0xc7,0x20,0x68,0xf0
+
+# CHECK: vp2intersectq k6, xmm23, xmm24
+0x62,0x92,0xc7,0x00,0x68,0xf0
+
+# CHECK: vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
+0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
+0x62,0xf2,0xc7,0x30,0x68,0x72,0x80
+
+# CHECK: vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
+0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f
+
+# CHECK: vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
+0x62,0xf2,0xc7,0x10,0x68,0x72,0x80
diff --git a/llvm/test/MC/X86/avx512vp2intersectvl-att.s b/llvm/test/MC/X86/avx512vp2intersectvl-att.s
new file mode 100644
index 0000000000000..afd589027afb6
--- /dev/null
+++ b/llvm/test/MC/X86/avx512vp2intersectvl-att.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vp2intersectd %ymm4, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xf4]
+          vp2intersectd %ymm4, %ymm3, %k6
+
+// CHECK: vp2intersectd %xmm4, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xf4]
+          vp2intersectd %xmm4, %xmm3, %k6
+
+// CHECK: vp2intersectd  268435456(%esp,%esi,8), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectd  268435456(%esp,%esi,8), %ymm3, %k6
+
+// CHECK: vp2intersectd  291(%edi,%eax,4), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectd  291(%edi,%eax,4), %ymm3, %k6
+
+// CHECK: vp2intersectd  (%eax){1to8}, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x30]
+          vp2intersectd  (%eax){1to8}, %ymm3, %k6
+
+// CHECK: vp2intersectd  -1024(,%ebp,2), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectd  -1024(,%ebp,2), %ymm3, %k6
+
+// CHECK: vp2intersectd  4064(%ecx), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x71,0x7f]
+          vp2intersectd  4064(%ecx), %ymm3, %k6
+
+// CHECK: vp2intersectd  -512(%edx){1to8}, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x72,0x80]
+          vp2intersectd  -512(%edx){1to8}, %ymm3, %k6
+
+// CHECK: vp2intersectd  268435456(%esp,%esi,8), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectd  268435456(%esp,%esi,8), %xmm3, %k6
+
+// CHECK: vp2intersectd  291(%edi,%eax,4), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectd  291(%edi,%eax,4), %xmm3, %k6
+
+// CHECK: vp2intersectd  (%eax){1to4}, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x30]
+          vp2intersectd  (%eax){1to4}, %xmm3, %k6
+
+// CHECK: vp2intersectd  -512(,%ebp,2), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectd  -512(,%ebp,2), %xmm3, %k6
+
+// CHECK: vp2intersectd  2032(%ecx), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x71,0x7f]
+          vp2intersectd  2032(%ecx), %xmm3, %k6
+
+// CHECK: vp2intersectd  -512(%edx){1to4}, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x72,0x80]
+          vp2intersectd  -512(%edx){1to4}, %xmm3, %k6
+
+// CHECK: vp2intersectq %ymm4, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xf4]
+          vp2intersectq %ymm4, %ymm3, %k6
+
+// CHECK: vp2intersectq %xmm4, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xf4]
+          vp2intersectq %xmm4, %xmm3, %k6
+
+// CHECK: vp2intersectq  268435456(%esp,%esi,8), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectq  268435456(%esp,%esi,8), %ymm3, %k6
+
+// CHECK: vp2intersectq  291(%edi,%eax,4), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectq  291(%edi,%eax,4), %ymm3, %k6
+
+// CHECK: vp2intersectq  (%eax){1to4}, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x30]
+          vp2intersectq  (%eax){1to4}, %ymm3, %k6
+
+// CHECK: vp2intersectq  -1024(,%ebp,2), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectq  -1024(,%ebp,2), %ymm3, %k6
+
+// CHECK: vp2intersectq  4064(%ecx), %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f]
+          vp2intersectq  4064(%ecx), %ymm3, %k6
+
+// CHECK: vp2intersectq  -1024(%edx){1to4}, %ymm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x72,0x80]
+          vp2intersectq  -1024(%edx){1to4}, %ymm3, %k6
+
+// CHECK: vp2intersectq  268435456(%esp,%esi,8), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectq  268435456(%esp,%esi,8), %xmm3, %k6
+
+// CHECK: vp2intersectq  291(%edi,%eax,4), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectq  291(%edi,%eax,4), %xmm3, %k6
+
+// CHECK: vp2intersectq  (%eax){1to2}, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x30]
+          vp2intersectq  (%eax){1to2}, %xmm3, %k6
+
+// CHECK: vp2intersectq  -512(,%ebp,2), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectq  -512(,%ebp,2), %xmm3, %k6
+
+// CHECK: vp2intersectq  2032(%ecx), %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f]
+          vp2intersectq  2032(%ecx), %xmm3, %k6
+
+// CHECK: vp2intersectq  -1024(%edx){1to2}, %xmm3, %k6
+// CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x72,0x80]
+          vp2intersectq  -1024(%edx){1to2}, %xmm3, %k6
diff --git a/llvm/test/MC/X86/avx512vp2intersectvl-intel.s b/llvm/test/MC/X86/avx512vp2intersectvl-intel.s
new file mode 100644
index 0000000000000..188d1bb461f7a
--- /dev/null
+++ b/llvm/test/MC/X86/avx512vp2intersectvl-intel.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple i386-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vp2intersectd k6, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xf4]
+          vp2intersectd k6, ymm3, ymm4
+
+// CHECK: vp2intersectd k6, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xf4]
+          vp2intersectd k6, xmm3, xmm4
+
+// CHECK: vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectd k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectd k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x30]
+          vp2intersectd k6, ymm3, dword ptr [eax]{1to8}
+
+// CHECK: vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectd k6, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0x68,0x71,0x7f]
+          vp2intersectd k6, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x67,0x38,0x68,0x72,0x80]
+          vp2intersectd k6, ymm3, dword ptr [edx - 512]{1to8}
+
+// CHECK: vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectd k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectd k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x30]
+          vp2intersectd k6, xmm3, dword ptr [eax]{1to4}
+
+// CHECK: vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectd k6, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0x68,0x71,0x7f]
+          vp2intersectd k6, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x67,0x18,0x68,0x72,0x80]
+          vp2intersectd k6, xmm3, dword ptr [edx - 512]{1to4}
+
+// CHECK: vp2intersectq k6, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xf4]
+          vp2intersectq k6, ymm3, ymm4
+
+// CHECK: vp2intersectq k6, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xf4]
+          vp2intersectq k6, xmm3, xmm4
+
+// CHECK: vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectq k6, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectq k6, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x30]
+          vp2intersectq k6, ymm3, qword ptr [eax]{1to4}
+
+// CHECK: vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectq k6, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x28,0x68,0x71,0x7f]
+          vp2intersectq k6, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x38,0x68,0x72,0x80]
+          vp2intersectq k6, ymm3, qword ptr [edx - 1024]{1to4}
+
+// CHECK: vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectq k6, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0xb4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectq k6, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x30]
+          vp2intersectq k6, xmm3, qword ptr [eax]{1to2}
+
+// CHECK: vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectq k6, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x08,0x68,0x71,0x7f]
+          vp2intersectq k6, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x18,0x68,0x72,0x80]
+          vp2intersectq k6, xmm3, qword ptr [edx - 1024]{1to2}
diff --git a/llvm/test/MC/X86/x86-32-avx512_vp2intersect-intel.s b/llvm/test/MC/X86/x86-32-avx512_vp2intersect-intel.s
new file mode 100644
index 0000000000000..3ea3e4fd64654
--- /dev/null
+++ b/llvm/test/MC/X86/x86-32-avx512_vp2intersect-intel.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -triple i386-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// CHECK: vp2intersectd k4, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xe4]
+          vp2intersectd k4, zmm3, zmm4
+
+// CHECK: vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectd k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectd k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x67,0x58,0x68,0x20]
+          vp2intersectd k4, zmm3, dword ptr [eax]{1to16}
+
+// CHECK: vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
+          vp2intersectd k4, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0x68,0x61,0x7f]
+          vp2intersectd k4, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x67,0x58,0x68,0x62,0x80]
+          vp2intersectd k4, zmm3, dword ptr [edx - 512]{1to16}
+
+// CHECK: vp2intersectq k4, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xe4]
+          vp2intersectq k4, zmm3, zmm4
+
+// CHECK: vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xa4,0xf4,0x00,0x00,0x00,0x10]
+          vp2intersectq k4, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0xa4,0x87,0x23,0x01,0x00,0x00]
+          vp2intersectq k4, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x58,0x68,0x20]
+          vp2intersectq k4, zmm3, qword ptr [eax]{1to8}
+
+// CHECK: vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
+          vp2intersectq k4, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0xe7,0x48,0x68,0x61,0x7f]
+          vp2intersectq k4, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf2,0xe7,0x58,0x68,0x62,0x80]
+          vp2intersectq k4, zmm3, qword ptr [edx - 1024]{1to8}
+
diff --git a/llvm/test/MC/X86/x86-32-avx512vp2intersect-att.s b/llvm/test/MC/X86/x86-32-avx512vp2intersect-att.s
new file mode 100644
index 0000000000000..49aca2a32126d
--- /dev/null
+++ b/llvm/test/MC/X86/x86-32-avx512vp2intersect-att.s
@@ -0,0 +1,225 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vp2intersectq        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
+vp2intersectq  %zmm2, %zmm1, %k0
+
+// CHECK: vp2intersectq        (%edi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
+vp2intersectq  (%edi), %zmm1, %k0
+
+// CHECK: vp2intersectq        (%edi){1to8}, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
+vp2intersectq  (%edi){1to8}, %zmm1, %k0
+
+// CHECK: vp2intersectq        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
+vp2intersectq  %zmm2, %zmm1, %k1
+
+// CHECK: vp2intersectq        (%edi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
+vp2intersectq  (%edi), %zmm1, %k1
+
+// CHECK: vp2intersectq        (%edi){1to8}, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
+vp2intersectq  (%edi){1to8}, %zmm1, %k1
+
+// CHECK: vp2intersectq        %zmm7, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0xf7]
+vp2intersectq  %zmm7, %zmm4, %k6
+
+// CHECK: vp2intersectq        (%esi), %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0x36]
+vp2intersectq  (%esi), %zmm4, %k6
+
+// CHECK: vp2intersectq        (%esi){1to8}, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x58,0x68,0x36]
+vp2intersectq  (%esi){1to8}, %zmm4, %k6
+
+// CHECK: vp2intersectq        %zmm7, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0xf7]
+vp2intersectq  %zmm7, %zmm4, %k7
+
+// CHECK: vp2intersectq        (%esi), %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x48,0x68,0x36]
+vp2intersectq  (%esi), %zmm4, %k7
+
+// CHECK: vp2intersectq        (%esi){1to8}, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x58,0x68,0x36]
+vp2intersectq  (%esi){1to8}, %zmm4, %k7
+
+// CHECK: vp2intersectq        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
+vp2intersectq  %ymm2, %ymm1, %k0
+
+// CHECK: vp2intersectq        (%edi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
+vp2intersectq  (%edi), %ymm1, %k0
+
+// CHECK: vp2intersectq        (%edi){1to4}, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
+vp2intersectq  (%edi){1to4}, %ymm1, %k0
+
+// CHECK: vp2intersectq        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
+vp2intersectq  %ymm2, %ymm1, %k1
+
+// CHECK: vp2intersectq        (%edi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
+vp2intersectq  (%edi), %ymm1, %k1
+
+// CHECK: vp2intersectq        (%edi){1to4}, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
+vp2intersectq  (%edi){1to4}, %ymm1, %k1
+
+// CHECK: vp2intersectq        %ymm7, %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0xf7]
+vp2intersectq  %ymm7, %ymm4, %k6
+
+// CHECK: vp2intersectq        (%esi), %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0x36]
+vp2intersectq  (%esi), %ymm4, %k6
+
+// CHECK: vp2intersectq        (%esi){1to4}, %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x38,0x68,0x36]
+vp2intersectq  (%esi){1to4}, %ymm4, %k6
+
+// CHECK: vp2intersectq        %ymm7, %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0xf7]
+vp2intersectq  %ymm7, %ymm4, %k7
+
+// CHECK: vp2intersectq        (%esi), %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x28,0x68,0x36]
+vp2intersectq  (%esi), %ymm4, %k7
+
+// CHECK: vp2intersectq        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
+vp2intersectq  %xmm2, %xmm1, %k0
+
+// CHECK: vp2intersectq        (%edi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
+vp2intersectq  (%edi), %xmm1, %k0
+
+// CHECK: vp2intersectq        (%edi){1to2}, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x18,0x68,0x07]
+vp2intersectq  (%edi){1to2}, %xmm1, %k0
+
+// CHECK: vp2intersectq        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
+vp2intersectq  %xmm2, %xmm1, %k1
+
+// CHECK: vp2intersectq        (%edi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
+vp2intersectq  (%edi), %xmm1, %k1
+
+// CHECK: vp2intersectq        %xmm7, %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0xf7]
+vp2intersectq  %xmm7, %xmm4, %k6
+
+// CHECK: vp2intersectq        (%esi), %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0x36]
+vp2intersectq  (%esi), %xmm4, %k6
+
+// CHECK: vp2intersectq        %xmm7, %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0xf7]
+vp2intersectq  %xmm7, %xmm4, %k7
+
+// CHECK: vp2intersectq        (%esi), %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0xdf,0x08,0x68,0x36]
+vp2intersectq  (%esi), %xmm4, %k7
+
+// CHECK: vp2intersectd        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
+vp2intersectd  %zmm2, %zmm1, %k0
+
+// CHECK: vp2intersectd        (%edi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
+vp2intersectd  (%edi), %zmm1, %k0
+
+// CHECK: vp2intersectd        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
+vp2intersectd  %zmm2, %zmm1, %k1
+
+// CHECK: vp2intersectd        (%edi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
+vp2intersectd  (%edi), %zmm1, %k1
+
+// CHECK: vp2intersectd        %zmm7, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0xf7]
+vp2intersectd  %zmm7, %zmm4, %k6
+
+// CHECK: vp2intersectd        (%esi), %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0x36]
+vp2intersectd  (%esi), %zmm4, %k6
+
+// CHECK: vp2intersectd        %zmm7, %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0xf7]
+vp2intersectd  %zmm7, %zmm4, %k7
+
+// CHECK: vp2intersectd        (%esi), %zmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x48,0x68,0x36]
+vp2intersectd  (%esi), %zmm4, %k7
+
+// CHECK: vp2intersectd        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
+vp2intersectd  %ymm2, %ymm1, %k0
+
+// CHECK: vp2intersectd        (%edi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
+vp2intersectd  (%edi), %ymm1, %k0
+
+// CHECK: vp2intersectd        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
+vp2intersectd  %ymm2, %ymm1, %k1
+
+// CHECK: vp2intersectd        (%edi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
+vp2intersectd  (%edi), %ymm1, %k1
+
+// CHECK: vp2intersectd        %ymm7, %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0xf7]
+vp2intersectd  %ymm7, %ymm4, %k6
+
+// CHECK: vp2intersectd        (%esi), %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0x36]
+vp2intersectd  (%esi), %ymm4, %k6
+
+// CHECK: vp2intersectd        %ymm7, %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0xf7]
+vp2intersectd  %ymm7, %ymm4, %k7
+
+// CHECK: vp2intersectd        (%esi), %ymm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x28,0x68,0x36]
+vp2intersectd  (%esi), %ymm4, %k7
+
+// CHECK: vp2intersectd        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
+vp2intersectd  %xmm2, %xmm1, %k0
+
+// CHECK: vp2intersectd        (%edi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
+vp2intersectd  (%edi), %xmm1, %k0
+
+// CHECK: vp2intersectd        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
+vp2intersectd  %xmm2, %xmm1, %k1
+
+// CHECK: vp2intersectd        (%edi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
+vp2intersectd  (%edi), %xmm1, %k1
+
+// CHECK: vp2intersectd        %xmm7, %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0xf7]
+vp2intersectd  %xmm7, %xmm4, %k6
+
+// CHECK: vp2intersectd        (%esi), %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0x36]
+vp2intersectd  (%esi), %xmm4, %k6
+
+// CHECK: vp2intersectd        %xmm7, %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0xf7]
+vp2intersectd  %xmm7, %xmm4, %k7
+
+// CHECK: vp2intersectd        (%esi), %xmm4, %k6
+// CHECK: encoding: [0x62,0xf2,0x5f,0x08,0x68,0x36]
+vp2intersectd  (%esi), %xmm4, %k7
diff --git a/llvm/test/MC/X86/x86-64-avx512_vp2intersect-intel.s b/llvm/test/MC/X86/x86-64-avx512_vp2intersect-intel.s
new file mode 100644
index 0000000000000..1dd9501bb2613
--- /dev/null
+++ b/llvm/test/MC/X86/x86-64-avx512_vp2intersect-intel.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// CHECK: vp2intersectd k4, zmm23, zmm24
+// CHECK: encoding: [0x62,0x92,0x47,0x40,0x68,0xe0]
+          vp2intersectd k4, zmm23, zmm24
+
+// CHECK: vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0x47,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectd k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0x47,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectd k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x47,0x50,0x68,0x25,0x00,0x00,0x00,0x00]
+          vp2intersectd k4, zmm23, dword ptr [rip]{1to16}
+
+// CHECK: vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x47,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
+          vp2intersectd k4, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x47,0x40,0x68,0x61,0x7f]
+          vp2intersectd k4, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x47,0x50,0x68,0x62,0x80]
+          vp2intersectd k4, zmm23, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vp2intersectq k4, zmm23, zmm24
+// CHECK: encoding: [0x62,0x92,0xc7,0x40,0x68,0xe0]
+          vp2intersectq k4, zmm23, zmm24
+
+// CHECK: vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0xc7,0x40,0x68,0xa4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectq k4, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0xc7,0x40,0x68,0xa4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectq k4, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x50,0x68,0x25,0x00,0x00,0x00,0x00]
+          vp2intersectq k4, zmm23, qword ptr [rip]{1to8}
+
+// CHECK: vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x40,0x68,0x24,0x6d,0x00,0xf8,0xff,0xff]
+          vp2intersectq k4, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x40,0x68,0x61,0x7f]
+          vp2intersectq k4, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x50,0x68,0x62,0x80]
+          vp2intersectq k4, zmm23, qword ptr [rdx - 1024]{1to8}
+
diff --git a/llvm/test/MC/X86/x86-64-avx512vp2intersect-att.s b/llvm/test/MC/X86/x86-64-avx512vp2intersect-att.s
new file mode 100644
index 0000000000000..72e907f9b3848
--- /dev/null
+++ b/llvm/test/MC/X86/x86-64-avx512vp2intersect-att.s
@@ -0,0 +1,231 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// v8i64 vectors
+// CHECK: vp2intersectq        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
+vp2intersectq  %zmm2, %zmm1, %k0
+
+// CHECK: vp2intersectq        (%rdi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
+vp2intersectq  (%rdi), %zmm1, %k0
+
+// CHECK: vp2intersectq        (%rdi){1to8}, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
+vp2intersectq  (%rdi){1to8}, %zmm1, %k0
+
+// CHECK: vp2intersectq        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0xc2]
+vp2intersectq  %zmm2, %zmm1, %k1
+
+// CHECK: vp2intersectq        (%rdi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x48,0x68,0x07]
+vp2intersectq  (%rdi), %zmm1, %k1
+
+// CHECK: vp2intersectq        (%rdi){1to8}, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x58,0x68,0x07]
+vp2intersectq  (%rdi){1to8}, %zmm1, %k1
+
+// CHECK: vp2intersectq        %zmm7, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0xf7]
+vp2intersectq  %zmm7, %zmm9, %k6
+
+// CHECK: vp2intersectq        (%rsi), %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0x36]
+vp2intersectq  (%rsi), %zmm9, %k6
+
+// CHECK: vp2intersectq        (%rsi){1to8}, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x58,0x68,0x36]
+vp2intersectq  (%rsi){1to8}, %zmm9, %k6
+
+// CHECK: vp2intersectq        %zmm7, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0xf7]
+vp2intersectq  %zmm7, %zmm9, %k7
+
+// CHECK: vp2intersectq        (%rsi), %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x48,0x68,0x36]
+vp2intersectq  (%rsi), %zmm9, %k7
+
+// CHECK: vp2intersectq        (%rsi){1to8}, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x58,0x68,0x36]
+vp2intersectq  (%rsi){1to8}, %zmm9, %k7
+
+// v4i64 vectors
+// CHECK: vp2intersectq        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
+vp2intersectq  %ymm2, %ymm1, %k0
+
+// CHECK: vp2intersectq        (%rdi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
+vp2intersectq  (%rdi), %ymm1, %k0
+
+// CHECK: vp2intersectq        (%rdi){1to4}, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
+vp2intersectq  (%rdi){1to4}, %ymm1, %k0
+
+// CHECK: vp2intersectq        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0xc2]
+vp2intersectq  %ymm2, %ymm1, %k1
+
+// CHECK: vp2intersectq        (%rdi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x28,0x68,0x07]
+vp2intersectq  (%rdi), %ymm1, %k1
+
+// CHECK: vp2intersectq        (%rdi){1to4}, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x38,0x68,0x07]
+vp2intersectq  (%rdi){1to4}, %ymm1, %k1
+
+// CHECK: vp2intersectq        %ymm7, %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0xf7]
+vp2intersectq  %ymm7, %ymm9, %k6
+
+// CHECK: vp2intersectq        (%rsi), %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0x36]
+vp2intersectq  (%rsi), %ymm9, %k6
+
+// CHECK: vp2intersectq        (%rsi){1to4}, %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x38,0x68,0x36]
+vp2intersectq  (%rsi){1to4}, %ymm9, %k6
+
+// CHECK: vp2intersectq        %ymm7, %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0xf7]
+vp2intersectq  %ymm7, %ymm9, %k7
+
+// CHECK: vp2intersectq        (%rsi), %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x28,0x68,0x36]
+vp2intersectq  (%rsi), %ymm9, %k7
+
+// v2i64 vectors
+// CHECK: vp2intersectq        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
+vp2intersectq  %xmm2, %xmm1, %k0
+
+// CHECK: vp2intersectq        (%rdi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
+vp2intersectq  (%rdi), %xmm1, %k0
+
+// CHECK: vp2intersectq        (%rdi){1to2}, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x18,0x68,0x07]
+vp2intersectq  (%rdi){1to2}, %xmm1, %k0
+
+// CHECK: vp2intersectq        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0xc2]
+vp2intersectq  %xmm2, %xmm1, %k1
+
+// CHECK: vp2intersectq        (%rdi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0xf7,0x08,0x68,0x07]
+vp2intersectq  (%rdi), %xmm1, %k1
+
+// CHECK: vp2intersectq        %xmm7, %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0xf7]
+vp2intersectq  %xmm7, %xmm9, %k6
+
+// CHECK: vp2intersectq        (%rsi), %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0x36]
+vp2intersectq  (%rsi), %xmm9, %k6
+
+// CHECK: vp2intersectq        %xmm7, %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0xf7]
+vp2intersectq  %xmm7, %xmm9, %k7
+
+// CHECK: vp2intersectq        (%rsi), %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0xb7,0x08,0x68,0x36]
+vp2intersectq  (%rsi), %xmm9, %k7
+
+// v16i32 vectors
+// CHECK: vp2intersectd        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
+vp2intersectd  %zmm2, %zmm1, %k0
+
+// CHECK: vp2intersectd        (%rdi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
+vp2intersectd  (%rdi), %zmm1, %k0
+
+// CHECK: vp2intersectd        %zmm2, %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0xc2]
+vp2intersectd  %zmm2, %zmm1, %k1
+
+// CHECK: vp2intersectd        (%rdi), %zmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x48,0x68,0x07]
+vp2intersectd  (%rdi), %zmm1, %k1
+
+// CHECK: vp2intersectd        %zmm7, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0xf7]
+vp2intersectd  %zmm7, %zmm9, %k6
+
+// CHECK: vp2intersectd        (%rsi), %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0x36]
+vp2intersectd  (%rsi), %zmm9, %k6
+
+// CHECK: vp2intersectd        %zmm7, %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0xf7]
+vp2intersectd  %zmm7, %zmm9, %k7
+
+// CHECK: vp2intersectd        (%rsi), %zmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x48,0x68,0x36]
+vp2intersectd  (%rsi), %zmm9, %k7
+
+// v8i32 vectors
+// CHECK: vp2intersectd        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
+vp2intersectd  %ymm2, %ymm1, %k0
+
+// CHECK: vp2intersectd        (%rdi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
+vp2intersectd  (%rdi), %ymm1, %k0
+
+// CHECK: vp2intersectd        %ymm2, %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0xc2]
+vp2intersectd  %ymm2, %ymm1, %k1
+
+// CHECK: vp2intersectd        (%rdi), %ymm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x28,0x68,0x07]
+vp2intersectd  (%rdi), %ymm1, %k1
+
+// CHECK: vp2intersectd        %ymm7, %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0xf7]
+vp2intersectd  %ymm7, %ymm9, %k6
+
+// CHECK: vp2intersectd        (%rsi), %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0x36]
+vp2intersectd  (%rsi), %ymm9, %k6
+
+// CHECK: vp2intersectd        %ymm7, %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0xf7]
+vp2intersectd  %ymm7, %ymm9, %k7
+
+// CHECK: vp2intersectd        (%rsi), %ymm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x28,0x68,0x36]
+vp2intersectd  (%rsi), %ymm9, %k7
+
+// v4i32 vectors
+// CHECK: vp2intersectd        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
+vp2intersectd  %xmm2, %xmm1, %k0
+
+// CHECK: vp2intersectd        (%rdi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
+vp2intersectd  (%rdi), %xmm1, %k0
+
+// CHECK: vp2intersectd        %xmm2, %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0xc2]
+vp2intersectd  %xmm2, %xmm1, %k1
+
+// CHECK: vp2intersectd        (%rdi), %xmm1, %k0
+// CHECK: encoding: [0x62,0xf2,0x77,0x08,0x68,0x07]
+vp2intersectd  (%rdi), %xmm1, %k1
+
+// CHECK: vp2intersectd        %xmm7, %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0xf7]
+vp2intersectd  %xmm7, %xmm9, %k6
+
+// CHECK: vp2intersectd        (%rsi), %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0x36]
+vp2intersectd  (%rsi), %xmm9, %k6
+
+// CHECK: vp2intersectd        %xmm7, %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0xf7]
+vp2intersectd  %xmm7, %xmm9, %k7
+
+// CHECK: vp2intersectd        (%rsi), %xmm9, %k6
+// CHECK: encoding: [0x62,0xf2,0x37,0x08,0x68,0x36]
+vp2intersectd  (%rsi), %xmm9, %k7
diff --git a/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-att.s b/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-att.s
new file mode 100644
index 0000000000000..040075b2ebe3a
--- /dev/null
+++ b/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-att.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s  | FileCheck %s
+
+// CHECK: vp2intersectd %ymm24, %ymm23, %k6
+// CHECK: encoding: [0x62,0x92,0x47,0x20,0x68,0xf0]
+          vp2intersectd %ymm24, %ymm23, %k6
+
+// CHECK: vp2intersectd %xmm24, %xmm23, %k6
+// CHECK: encoding: [0x62,0x92,0x47,0x00,0x68,0xf0]
+          vp2intersectd %xmm24, %xmm23, %k6
+
+// CHECK: vp2intersectd  268435456(%rbp,%r14,8), %ymm23, %k6
+// CHECK: encoding: [0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectd  268435456(%rbp,%r14,8), %ymm23, %k6
+
+// CHECK: vp2intersectd  291(%r8,%rax,4), %ymm23, %k6
+// CHECK: encoding: [0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectd  291(%r8,%rax,4), %ymm23, %k6
+
+// CHECK: vp2intersectd  (%rip){1to8}, %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectd  (%rip){1to8}, %ymm23, %k6
+
+// CHECK: vp2intersectd  -1024(,%rbp,2), %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectd  -1024(,%rbp,2), %ymm23, %k6
+
+// CHECK: vp2intersectd  4064(%rcx), %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x71,0x7f]
+          vp2intersectd  4064(%rcx), %ymm23, %k6
+
+// CHECK: vp2intersectd  -512(%rdx){1to8}, %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x72,0x80]
+          vp2intersectd  -512(%rdx){1to8}, %ymm23, %k6
+
+// CHECK: vp2intersectd  268435456(%rbp,%r14,8), %xmm23, %k6
+// CHECK: encoding: [0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectd  268435456(%rbp,%r14,8), %xmm23, %k6
+
+// CHECK: vp2intersectd  291(%r8,%rax,4), %xmm23, %k6
+// CHECK: encoding: [0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectd  291(%r8,%rax,4), %xmm23, %k6
+
+// CHECK: vp2intersectd  (%rip){1to4}, %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectd  (%rip){1to4}, %xmm23, %k6
+
+// CHECK: vp2intersectd  -512(,%rbp,2), %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectd  -512(,%rbp,2), %xmm23, %k6
+
+// CHECK: vp2intersectd  2032(%rcx), %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x71,0x7f]
+          vp2intersectd  2032(%rcx), %xmm23, %k6
+
+// CHECK: vp2intersectd  -512(%rdx){1to4}, %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x72,0x80]
+          vp2intersectd  -512(%rdx){1to4}, %xmm23, %k6
+
+// CHECK: vp2intersectq %ymm24, %ymm23, %k6
+// CHECK: encoding: [0x62,0x92,0xc7,0x20,0x68,0xf0]
+          vp2intersectq %ymm24, %ymm23, %k6
+
+// CHECK: vp2intersectq %xmm24, %xmm23, %k6
+// CHECK: encoding: [0x62,0x92,0xc7,0x00,0x68,0xf0]
+          vp2intersectq %xmm24, %xmm23, %k6
+
+// CHECK: vp2intersectq  268435456(%rbp,%r14,8), %ymm23, %k6
+// CHECK: encoding: [0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectq  268435456(%rbp,%r14,8), %ymm23, %k6
+
+// CHECK: vp2intersectq  291(%r8,%rax,4), %ymm23, %k6
+// CHECK: encoding: [0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectq  291(%r8,%rax,4), %ymm23, %k6
+
+// CHECK: vp2intersectq  (%rip){1to4}, %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectq  (%rip){1to4}, %ymm23, %k6
+
+// CHECK: vp2intersectq  -1024(,%rbp,2), %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectq  -1024(,%rbp,2), %ymm23, %k6
+
+// CHECK: vp2intersectq  4064(%rcx), %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f]
+          vp2intersectq  4064(%rcx), %ymm23, %k6
+
+// CHECK: vp2intersectq  -1024(%rdx){1to4}, %ymm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x72,0x80]
+          vp2intersectq  -1024(%rdx){1to4}, %ymm23, %k6
+
+// CHECK: vp2intersectq  268435456(%rbp,%r14,8), %xmm23, %k6
+// CHECK: encoding: [0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectq  268435456(%rbp,%r14,8), %xmm23, %k6
+
+// CHECK: vp2intersectq  291(%r8,%rax,4), %xmm23, %k6
+// CHECK: encoding: [0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectq  291(%r8,%rax,4), %xmm23, %k6
+
+// CHECK: vp2intersectq  (%rip){1to2}, %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectq  (%rip){1to2}, %xmm23, %k6
+
+// CHECK: vp2intersectq  -512(,%rbp,2), %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectq  -512(,%rbp,2), %xmm23, %k6
+
+// CHECK: vp2intersectq  2032(%rcx), %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f]
+          vp2intersectq  2032(%rcx), %xmm23, %k6
+
+// CHECK: vp2intersectq  -1024(%rdx){1to2}, %xmm23, %k6
+// CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x72,0x80]
+          vp2intersectq  -1024(%rdx){1to2}, %xmm23, %k6
diff --git a/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-intel.s b/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-intel.s
new file mode 100644
index 0000000000000..dd89c3ff71588
--- /dev/null
+++ b/llvm/test/MC/X86/x86-64-avx512vp2intersectvl-intel.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vp2intersectd k6, ymm23, ymm24
+// CHECK: encoding: [0x62,0x92,0x47,0x20,0x68,0xf0]
+          vp2intersectd k6, ymm23, ymm24
+
+// CHECK: vp2intersectd k6, xmm23, xmm24
+// CHECK: encoding: [0x62,0x92,0x47,0x00,0x68,0xf0]
+          vp2intersectd k6, xmm23, xmm24
+
+// CHECK: vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0x47,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectd k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0x47,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectd k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectd k6, ymm23, dword ptr [rip]{1to8}
+
+// CHECK: vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectd k6, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x47,0x20,0x68,0x71,0x7f]
+          vp2intersectd k6, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x47,0x30,0x68,0x72,0x80]
+          vp2intersectd k6, ymm23, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0x47,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectd k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0x47,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectd k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectd k6, xmm23, dword ptr [rip]{1to4}
+
+// CHECK: vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectd k6, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x47,0x00,0x68,0x71,0x7f]
+          vp2intersectd k6, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x47,0x10,0x68,0x72,0x80]
+          vp2intersectd k6, xmm23, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vp2intersectq k6, ymm23, ymm24
+// CHECK: encoding: [0x62,0x92,0xc7,0x20,0x68,0xf0]
+          vp2intersectq k6, ymm23, ymm24
+
+// CHECK: vp2intersectq k6, xmm23, xmm24
+// CHECK: encoding: [0x62,0x92,0xc7,0x00,0x68,0xf0]
+          vp2intersectq k6, xmm23, xmm24
+
+// CHECK: vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0xc7,0x20,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectq k6, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0xc7,0x20,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectq k6, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectq k6, ymm23, qword ptr [rip]{1to4}
+
+// CHECK: vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x34,0x6d,0x00,0xfc,0xff,0xff]
+          vp2intersectq k6, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x20,0x68,0x71,0x7f]
+          vp2intersectq k6, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x30,0x68,0x72,0x80]
+          vp2intersectq k6, ymm23, qword ptr [rdx - 1024]{1to4}
+
+// CHECK: vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xb2,0xc7,0x00,0x68,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vp2intersectq k6, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xd2,0xc7,0x00,0x68,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vp2intersectq k6, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x35,0x00,0x00,0x00,0x00]
+          vp2intersectq k6, xmm23, qword ptr [rip]{1to2}
+
+// CHECK: vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x34,0x6d,0x00,0xfe,0xff,0xff]
+          vp2intersectq k6, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xf2,0xc7,0x00,0x68,0x71,0x7f]
+          vp2intersectq k6, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
+// CHECK: encoding: [0x62,0xf2,0xc7,0x10,0x68,0x72,0x80]
+          vp2intersectq k6, xmm23, qword ptr [rdx - 1024]{1to2}
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 20ac25909b418..ab8a8855c4788 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -932,6 +932,11 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("VK32WM",              TYPE_VK)
   TYPE("VK64",                TYPE_VK)
   TYPE("VK64WM",              TYPE_VK)
+  TYPE("VK1Pair",             TYPE_VK_PAIR)
+  TYPE("VK2Pair",             TYPE_VK_PAIR)
+  TYPE("VK4Pair",             TYPE_VK_PAIR)
+  TYPE("VK8Pair",             TYPE_VK_PAIR)
+  TYPE("VK16Pair",            TYPE_VK_PAIR)
   TYPE("vx64mem",             TYPE_MVSIBX)
   TYPE("vx128mem",            TYPE_MVSIBX)
   TYPE("vx256mem",            TYPE_MVSIBX)
@@ -1016,6 +1021,11 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_RM)
   ENCODING("VK32",            ENCODING_RM)
   ENCODING("VK64",            ENCODING_RM)
+  ENCODING("VK1PAIR",         ENCODING_RM)
+  ENCODING("VK2PAIR",         ENCODING_RM)
+  ENCODING("VK4PAIR",         ENCODING_RM)
+  ENCODING("VK8PAIR",         ENCODING_RM)
+  ENCODING("VK16PAIR",        ENCODING_RM)
   ENCODING("BNDR",            ENCODING_RM)
   errs() << "Unhandled R/M register encoding " << s << "\n";
   llvm_unreachable("Unhandled R/M register encoding");
@@ -1050,6 +1060,11 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_REG)
   ENCODING("VK32",            ENCODING_REG)
   ENCODING("VK64",            ENCODING_REG)
+  ENCODING("VK1Pair",         ENCODING_REG)
+  ENCODING("VK2Pair",         ENCODING_REG)
+  ENCODING("VK4Pair",         ENCODING_REG)
+  ENCODING("VK8Pair",         ENCODING_REG)
+  ENCODING("VK16Pair",        ENCODING_REG)
   ENCODING("VK1WM",           ENCODING_REG)
   ENCODING("VK2WM",           ENCODING_REG)
   ENCODING("VK4WM",           ENCODING_REG)
@@ -1084,6 +1099,11 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
   ENCODING("VK16",            ENCODING_VVVV)
   ENCODING("VK32",            ENCODING_VVVV)
   ENCODING("VK64",            ENCODING_VVVV)
+  ENCODING("VK1PAIR",         ENCODING_VVVV)
+  ENCODING("VK2PAIR",         ENCODING_VVVV)
+  ENCODING("VK4PAIR",         ENCODING_VVVV)
+  ENCODING("VK8PAIR",         ENCODING_VVVV)
+  ENCODING("VK16PAIR",        ENCODING_VVVV)
   errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
   llvm_unreachable("Unhandled VEX.vvvv register encoding");
 }

From 20b80fc4842de363a433fc4bc1a05fa44fd8ec12 Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Fri, 31 May 2019 03:45:11 +0000
Subject: [PATCH 0710/1176] Fix bad go bindings test.

After r362128, the "byval" attribute has a stricter check and will cause an
assertion.  Remove the "byval" test case for now.

llvm-svn: 362189
---
 llvm/bindings/go/llvm/ir_test.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/bindings/go/llvm/ir_test.go b/llvm/bindings/go/llvm/ir_test.go
index 5dd0598c01268..601a22589d70b 100644
--- a/llvm/bindings/go/llvm/ir_test.go
+++ b/llvm/bindings/go/llvm/ir_test.go
@@ -51,7 +51,6 @@ func TestAttributes(t *testing.T) {
 		"sanitize_address",
 		"alwaysinline",
 		"builtin",
-		"byval",
 		"convergent",
 		"inalloca",
 		"inlinehint",

From fc3ed1ec506714abcc5f779b685d149419e1a207 Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <wuzish@cn.ibm.com>
Date: Fri, 31 May 2019 04:42:13 +0000
Subject: [PATCH 0711/1176] re-commit r361928: [PowerPC] [Clang] Port SSE
 intrinsics to PowerPC

Port xmmintrin.h which include Intel SSE intrinsics implementation to PowerPC platform (using Altivec).

The new headers containing those implemenations are located into a directory named ppc_wrappers
which has higher priority when the platform is PowerPC on Linux. They are mainly developed by Steven Munroe,
with contributions from Paul Clarke, Bill Schmidt, Jinsong Ji and Zixuan Wu.

Patched by: Qiu Chaofan <qiucf@cn.ibm.com>
Reviewed By: Jinsong Ji

Differential Revision: https://reviews.llvm.org/D62121

llvm-svn: 362190
---
 clang/lib/Headers/CMakeLists.txt           |    2 +
 clang/lib/Headers/ppc_wrappers/mm_malloc.h |   48 +
 clang/lib/Headers/ppc_wrappers/xmmintrin.h | 1838 +++++++++++++++++
 clang/test/CodeGen/ppc-mm-malloc-le.c      |   72 +
 clang/test/CodeGen/ppc-mm-malloc.c         |   72 +
 clang/test/CodeGen/ppc-mmintrin.c          |    9 +-
 clang/test/CodeGen/ppc-xmmintrin.c         | 2090 ++++++++++++++++++++
 clang/test/Headers/ppc-intrinsics.c        |   13 -
 clang/test/Headers/ppc-mmx-intrinsics.c    |   11 +
 clang/test/Headers/ppc-sse-intrinsics.c    |   22 +
 10 files changed, 4160 insertions(+), 17 deletions(-)
 create mode 100644 clang/lib/Headers/ppc_wrappers/mm_malloc.h
 create mode 100644 clang/lib/Headers/ppc_wrappers/xmmintrin.h
 create mode 100644 clang/test/CodeGen/ppc-mm-malloc-le.c
 create mode 100644 clang/test/CodeGen/ppc-mm-malloc.c
 create mode 100644 clang/test/CodeGen/ppc-xmmintrin.c
 delete mode 100644 clang/test/Headers/ppc-intrinsics.c
 create mode 100644 clang/test/Headers/ppc-mmx-intrinsics.c
 create mode 100644 clang/test/Headers/ppc-sse-intrinsics.c

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 392ca2ae391c9..f7a3e5410ced5 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -126,6 +126,8 @@ set(cuda_wrapper_files
 
 set(ppc_wrapper_files
   ppc_wrappers/mmintrin.h
+  ppc_wrappers/xmmintrin.h
+  ppc_wrappers/mm_malloc.h
 )
 
 set(openmp_wrapper_files
diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
new file mode 100644
index 0000000000000..36589194b3e2f
--- /dev/null
+++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
@@ -0,0 +1,48 @@
+/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+   may not be visible.  */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+  /* PowerPC64 ELF V2 ABI requires quadword alignment.  */
+  size_t vec_align = sizeof (__vector float);
+  /* Linux GLIBC malloc alignment is at least 2 X ptr size.  */
+  size_t malloc_align = (sizeof (void *) + sizeof (void *));
+  void *ptr;
+
+  if (alignment == malloc_align && alignment == vec_align)
+    return malloc (size);
+  if (alignment < vec_align)
+    alignment = vec_align;
+  if (posix_memalign (&ptr, alignment, size) == 0)
+    return ptr;
+  else
+    return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+  free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
new file mode 100644
index 0000000000000..1b322b66519a6
--- /dev/null
+++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
@@ -0,0 +1,1838 @@
+/*===---- xmmintrin.h - Implementation of SSE intrinsics on PowerPC --------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+   User Guide and Reference, version 9.0.  */
+
+#ifndef NO_WARN_X86_INTRINSICS
+/* This header file is to help porting code using Intel intrinsics
+   explicitly from x86_64 to powerpc64/powerpc64le.
+
+   Since X86 SSE intrinsics mainly handles __m128 type, PowerPC
+   VMX/VSX ISA is a good match for vector float SIMD operations.
+   However scalar float operations in vector (XMM) registers require
+   the POWER8 VSX ISA (2.07) level. There are differences for data
+   format and placement of float scalars in the vector register, which
+   require extra steps to match SSE scalar float semantics on POWER.
+
+   It should be noted that there's much difference between X86_64's
+   MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use
+   portable <fenv.h> instead of access MXSCR directly.
+
+   Most SSE scalar float intrinsic operations can be performed more
+   efficiently as C language float scalar operations or optimized to
+   use vector SIMD operations. We recommend this for new applications. */
+#error "Please read comment above.  Use -DNO_WARN_X86_INTRINSICS to disable this error."
+#endif
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+/* Define four value permute mask */
+#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
+
+#include <altivec.h>
+
+/* Avoid collisions between altivec.h and strict adherence to C++ and
+   C11 standards.  This should eventually be done inside altivec.h itself,
+   but only after testing a full distro build.  */
+#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \
+				 (defined(__STDC_VERSION__) &&	\
+				  __STDC_VERSION__ >= 201112L))
+#undef vector
+#undef pixel
+#undef bool
+#endif
+
+/* We need type definitions from the MMX header file.  */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free ().  */
+#if __STDC_HOSTED__
+#include <mm_malloc.h>
+#endif
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+   vector types, and their scalar components.  */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Unaligned version of the same type.  */
+typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
+				       __aligned__ (1)));
+
+/* Internal data types for implementing the intrinsics.  */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create an undefined vector.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+  __m128 __Y = __Y;
+  return __Y;
+}
+
+/* Create a vector of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+  return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Load four SPFP values from P.  The address must be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+  return ((__m128)vec_ld(0, (__v4sf*)__P));
+}
+
+/* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+  return (vec_vsx_ld(0, __P));
+}
+
+/* Load four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+  __v4sf   __tmp;
+  __m128 result;
+  static const __vector unsigned char permute_vector =
+    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
+	0x17, 0x10, 0x11, 0x12, 0x13 };
+
+  __tmp = vec_ld (0, (__v4sf *) __P);
+  result = (__m128) vec_perm (__tmp, __tmp, permute_vector);
+  return result;
+}
+
+/* Create a vector with all four elements equal to F.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
+}
+
+/* Create the vector [Z Y X W].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z].  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+  return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Store four SPFP values.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+  vec_st((__v4sf)__A, 0, (__v4sf*)__P);
+}
+
+/* Store four SPFP values.  The address need not be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+  *(__m128_u *)__P = __A;
+}
+
+/* Store four SPFP values in reverse order.  The address must be aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+  __v4sf   __tmp;
+  static const __vector unsigned char permute_vector =
+    { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
+	0x17, 0x10, 0x11, 0x12, 0x13 };
+
+  __tmp = (__m128) vec_perm (__A, __A, permute_vector);
+
+  _mm_store_ps (__P, __tmp);
+}
+
+/* Store the lower SPFP value across four words.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+  __v4sf __va = vec_splat((__v4sf)__A, 0);
+  _mm_store_ps (__P, __va);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+  _mm_store1_ps (__P, __A);
+}
+
+/* Create a vector with element 0 as F and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+  return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Sets the low SPFP value of A from the low value of B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+
+  return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask));
+}
+
+/* Create a vector with element 0 as *P and the rest zero.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+  return _mm_set_ss (*__P);
+}
+
+/* Stores the lower SPFP value.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+  *__P = ((__v4sf)__A)[0];
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+   floating-point) values of A and B; the upper three SPFP values are
+   passed through from A.  */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a + b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] + __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a - b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] - __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a * b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] * __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+#ifdef _ARCH_PWR7
+  __m128 a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+     results. So to insure we don't generate spurious exceptions
+     (from the upper double values) we splat the lower double
+     before we to the operation.  */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = a / b;
+  /* Then we merge the lower float result with the original upper
+     float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+#else
+  __A[0] = __A[0] / __B[0];
+  return (__A);
+#endif
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = vec_sqrt (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+/* Perform the respective operation on the four SPFP values in A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A + (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A - (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A * (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) ((__v4sf)__A / (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+  return (vec_sqrt ((__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+  return (vec_re ((__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+  return (vec_rsqrte (__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = _mm_rcp_ps (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+  __m128 a, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower double)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper double values) we splat the lower double
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  c = vec_rsqrte (a);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel (__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+  __v4sf a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower float)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper float values) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf)__A, 0);
+  b = vec_splat ((__v4sf)__B, 0);
+  c = vec_min (a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+  __v4sf a, b, c;
+  static const __vector unsigned int mask = {0xffffffff, 0, 0, 0};
+  /* PowerISA VSX does not allow partial (for just lower float)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper float values) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat (__A, 0);
+  b = vec_splat (__B, 0);
+  c = vec_max (a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return (vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+  __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, m);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+  __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, m);
+}
+
+/* Perform logical bit-wise operations on 128-bit values.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_and ((__v4sf)__A, (__v4sf)__B));
+//  return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_andc ((__v4sf)__B, (__v4sf)__A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_or ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_xor ((__v4sf)__A, (__v4sf)__B));
+}
+
+/* Perform a comparison on the four SPFP values of A and B.  For each
+   element, if the comparison is true, place a mask of all ones in the
+   result, otherwise a mask of zeros.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpeq ((__v4sf)__A,(__v4sf) __B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128  __A, __m128  __B)
+{
+  __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B);
+  return ((__m128)vec_nor (temp, temp));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+  return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B));
+}
+
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128  __A, __m128  __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
+  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
+  return ((__m128 ) vec_and (c, d));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
+  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
+  return ((__m128 ) vec_or (c, d));
+}
+
+/* Perform a comparison on the lower SPFP values of A and B.  If the
+   comparison is true, place a mask of all ones in the result, otherwise a
+   mask of zeros.  The upper three SPFP values are passed through from A.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128  __A, __m128  __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpeq(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmplt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmple(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpgt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpge(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpeq(a, b);
+  c = vec_nor (c, c);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpge(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmpgt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we to the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmple(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+  __v4sf a, b, c;
+  /* PowerISA VMX does not allow partial (for just element 0)
+   * results. So to insure we don't generate spurious exceptions
+   * (from the upper elements) we splat the lower float
+   * before we do the operation. */
+  a = vec_splat ((__v4sf) __A, 0);
+  b = vec_splat ((__v4sf) __B, 0);
+  c = (__v4sf) vec_cmplt(a, b);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a);
+  d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b);
+  c = vec_and (c, d);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+  __vector unsigned int a, b;
+  __vector unsigned int c, d;
+  static const __vector unsigned int float_exp_mask =
+    { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
+  static const __vector unsigned int mask =
+    { 0xffffffff, 0, 0, 0 };
+
+  a = (__vector unsigned int) vec_abs ((__v4sf)__A);
+  b = (__vector unsigned int) vec_abs ((__v4sf)__B);
+  c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask);
+  d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask);
+  c = vec_or (c, d);
+  /* Then we merge the lower float result with the original upper
+   * float elements from __A.  */
+  return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask));
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+   and 0 if false.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] == __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] < __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] <= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] > __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] >= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] != __B[0]);
+}
+
+/* FIXME
+ * The __mm_ucomi??_ss implementations below are exactly the same as
+ * __mm_comi??_ss because GCC for PowerPC only generates unordered
+ * compares (scalar and vector).
+ * Technically __mm_comieq_ss et al should be using the ordered
+ * compare and signal for QNaNs.
+ * The __mm_ucomieq_sd et all should be OK, as is.
+ */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] == __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] < __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] <= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] > __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] >= __B[0]);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+  return (__A[0] != __B[0]);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+  return ((__v4sf)__A)[0];
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+   rounding mode.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+  __m64 res = 0;
+#ifdef _ARCH_PWR8
+  double dtmp;
+  __asm__(
+#ifdef __LITTLE_ENDIAN__
+      "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+      "xscvspdp %x2,%x0;\n"
+      "fctiw  %2,%2;\n"
+      "mfvsrd  %1,%x2;\n"
+      : "+wa" (__A),
+        "=r" (res),
+        "=f" (dtmp)
+      : );
+#else
+  res = __builtin_rint(__A[0]);
+#endif
+  return (res);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+  return _mm_cvtss_si32 (__A);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the
+   current rounding mode.  */
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+  __m64 res = 0;
+#ifdef _ARCH_PWR8
+  double dtmp;
+  __asm__(
+#ifdef __LITTLE_ENDIAN__
+      "xxsldwi %x0,%x0,%x0,3;\n"
+#endif
+      "xscvspdp %x2,%x0;\n"
+      "fctid  %2,%2;\n"
+      "mfvsrd  %1,%x2;\n"
+      : "+wa" (__A),
+        "=r" (res),
+        "=f" (dtmp)
+      : );
+#else
+  res = __builtin_llrint(__A[0]);
+#endif
+  return (res);
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+  return _mm_cvtss_si64 ((__v4sf) __A);
+}
+
+/* Constants for use with _mm_prefetch.  */
+enum _mm_hint
+{
+  /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
+  _MM_HINT_ET0 = 7,
+  _MM_HINT_ET1 = 6,
+  _MM_HINT_T0 = 3,
+  _MM_HINT_T1 = 2,
+  _MM_HINT_T2 = 1,
+  _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+   processor.  The selector I specifies the type of prefetch operation.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+  /* Current PowerPC will ignores the hint parameters.  */
+  __builtin_prefetch (__P);
+}
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+   current rounding mode.  Return the integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+  /* Splat two lower SPFP values to both halves.  */
+  __v4sf temp, rounded;
+  __vector unsigned long long result;
+
+  /* Splat two lower SPFP values to both halves.  */
+  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  rounded = vec_rint(temp);
+  result = (__vector unsigned long long) vec_cts (rounded, 0);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+  return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+  return _mm_cvttss_si32 (__A);
+}
+
+/* Intel intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+  /* Extract the lower float element.  */
+  float temp = __A[0];
+  /* truncate to 32-bit integer and return.  */
+  return temp;
+}
+
+/* Truncate the two lower SPFP values to 32-bit integers.  Return the
+   integers in packed form.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+  __v4sf temp;
+  __vector unsigned long long result;
+
+  /* Splat two lower SPFP values to both halves.  */
+  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
+  result = (__vector unsigned long long) vec_cts (temp, 0);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+  return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+  float temp = __B;
+  __A[0] = temp;
+
+  return __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+  return _mm_cvtsi32_ss (__A, __B);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+/* Intel intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+  float temp = __B;
+  __A[0] = temp;
+
+  return __A;
+}
+
+/* Microsoft intrinsic.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return _mm_cvtsi64_ss (__A, __B);
+}
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+   as the two lower elements in A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128        __A, __m64        __B)
+{
+  __vector signed int vm1;
+  __vector float vf1;
+
+  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
+  vf1 = (__vector float) vec_ctf (vm1, 0);
+
+  return ((__m128) (__vector unsigned long long)
+    { ((__vector unsigned long long)vf1) [0],
+	((__vector unsigned long long)__A) [1]});
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+  return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+  __vector signed short vs8;
+  __vector signed int vi4;
+  __vector float vf1;
+
+  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
+  vi4 = vec_vupklsh (vs8);
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+  const __vector unsigned short zero =
+    { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __vector unsigned short vs8;
+  __vector unsigned int vi4;
+  __vector float vf1;
+
+  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
+  vi4 = (__vector unsigned int) vec_mergel
+#ifdef __LITTLE_ENDIAN__
+                                           (vs8, zero);
+#else
+                                           (zero, vs8);
+#endif
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+  __vector signed char vc16;
+  __vector signed short vs8;
+  __vector signed int vi4;
+  __vector float vf1;
+
+  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
+  vs8 = vec_vupkhsb (vc16);
+  vi4 = vec_vupkhsh (vs8);
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_cvtpu8_ps (__m64  __A)
+{
+  const __vector unsigned char zero =
+    { 0, 0, 0, 0, 0, 0, 0, 0 };
+  __vector unsigned char vc16;
+  __vector unsigned short vs8;
+  __vector unsigned int vi4;
+  __vector float vf1;
+
+  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
+#ifdef __LITTLE_ENDIAN__
+  vs8 = (__vector unsigned short) vec_mergel (vc16, zero);
+  vi4 = (__vector unsigned int) vec_mergeh (vs8,
+					    (__vector unsigned short) zero);
+#else
+  vs8 = (__vector unsigned short) vec_mergel (zero, vc16);
+  vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero,
+                                            vs8);
+#endif
+  vf1 = (__vector float) vec_ctf (vi4, 0);
+
+  return (__m128) vf1;
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps (__m64 __A, __m64 __B)
+{
+  __vector signed int vi4;
+  __vector float vf4;
+
+  vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B };
+  vf4 = (__vector float) vec_ctf (vi4, 0);
+  return (__m128) vf4;
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16 (__m128 __A)
+{
+  __v4sf rounded;
+  __vector signed int temp;
+  __vector unsigned long long result;
+
+  rounded = vec_rint(__A);
+  temp = vec_cts (rounded, 0);
+  result = (__vector unsigned long long) vec_pack (temp, temp);
+
+  return (__m64) ((__vector long long) result)[0];
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8 (__m128 __A)
+{
+  __v4sf rounded;
+  __vector signed int tmp_i;
+  static const __vector signed int zero = {0, 0, 0, 0};
+  __vector signed short tmp_s;
+  __vector signed char res_v;
+
+  rounded = vec_rint(__A);
+  tmp_i = vec_cts (rounded, 0);
+  tmp_s = vec_pack (tmp_i, zero);
+  res_v = vec_pack (tmp_s, tmp_s);
+  return (__m64) ((__vector long long) res_v)[0];
+}
+
+/* Selects four specific SPFP values from A and B based on MASK.  */
+extern __inline  __m128  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_shuffle_ps (__m128  __A, __m128  __B, int const __mask)
+{
+  unsigned long element_selector_10 = __mask & 0x03;
+  unsigned long element_selector_32 = (__mask >> 2) & 0x03;
+  unsigned long element_selector_54 = (__mask >> 4) & 0x03;
+  unsigned long element_selector_76 = (__mask >> 6) & 0x03;
+  static const unsigned int permute_selectors[4] =
+    {
+#ifdef __LITTLE_ENDIAN__
+      0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+#else
+      0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
+#endif
+    };
+  __vector unsigned int t;
+
+  t[0] = permute_selectors[element_selector_10];
+  t[1] = permute_selectors[element_selector_32];
+  t[2] = permute_selectors[element_selector_54] + 0x10101010;
+  t[3] = permute_selectors[element_selector_76] + 0x10101010;
+  return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t);
+}
+
+/* Selects and interleaves the upper two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_vmrglw ((__v4sf) __A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_vmrghw ((__v4sf) __A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+   the lower two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
+  __a [1] = __p [1];
+
+  return (__m128)__a;
+}
+
+/* Stores the upper two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
+
+  *__P = __a[1];
+}
+
+/* Moves the upper two values of B into the lower two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_mergel ((__vector unsigned long long)__B,
+			      (__vector unsigned long long)__A);
+}
+
+/* Moves the lower two values of B into the upper two values of A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+  return (__m128) vec_mergeh ((__vector unsigned long long)__A,
+			      (__vector unsigned long long)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+   the upper two values are passed through from A.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+  __vector unsigned long long __a = (__vector unsigned long long)__A;
+  __vector unsigned long long __p = vec_splats(*__P);
+  __a [0] = __p [0];
+
+  return (__m128)__a;
+}
+
+/* Stores the lower two SPFP values of A into P.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+  __vector unsigned long long __a = (__vector unsigned long long) __A;
+
+  *__P = __a[0];
+}
+
+#ifdef _ARCH_PWR8
+/* Intrinsic functions that require PowerISA 2.07 minimum.  */
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128  __A)
+{
+  __vector unsigned long long result;
+  static const __vector unsigned int perm_mask =
+    {
+#ifdef __LITTLE_ENDIAN__
+	0x00204060, 0x80808080, 0x80808080, 0x80808080
+#else
+      0x80808080, 0x80808080, 0x80808080, 0x00204060
+#endif
+    };
+
+  result = ((__vector unsigned long long)
+	    vec_vbpermq ((__vector unsigned char) __A,
+			 (__vector unsigned char) perm_mask));
+
+#ifdef __LITTLE_ENDIAN__
+  return result[1];
+#else
+  return result[0];
+#endif
+}
+#endif /* _ARCH_PWR8 */
+
+/* Create a vector with all four elements equal to *P.  */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+  return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+  return _mm_load1_ps (__P);
+}
+
+/* Extracts one of the four words of A.  The selector N must be immediate.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+  unsigned int shiftr = __N & 3;
+#ifdef __BIG_ENDIAN__
+  shiftr = 3 - shiftr;
+#endif
+
+  return ((__A >> (shiftr * 16)) & 0xffff);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+  return _mm_extract_pi16 (__A, __N);
+}
+
+/* Inserts word D into one of four words of A.  The selector N must be
+   immediate.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+  const int shiftl = (__N & 3) * 16;
+  const __m64 shiftD = (const __m64) __D << shiftl;
+  const __m64 mask = 0xffffUL << shiftl;
+  __m64 result = (__A & (~mask)) | (shiftD & mask);
+
+  return (result);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+  return _mm_insert_pi16 (__A, __D, __N);
+}
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector signed short a, b, r;
+  __vector __bool short c;
+
+  a = (__vector signed short)vec_splats (__A);
+  b = (__vector signed short)vec_splats (__B);
+  c = (__vector __bool short)vec_cmpgt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+  res.as_short[0] =
+      (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
+  res.as_short[1] =
+      (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
+  res.as_short[2] =
+      (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
+  res.as_short[3] =
+      (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+  return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector unsigned char a, b, r;
+  __vector __bool char c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = (__vector __bool char)vec_cmpgt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+  long i;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+
+  for (i = 0; i < 8; i++)
+  res.as_char[i] =
+      ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ?
+	  m1.as_char[i] : m2.as_char[i];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+  return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector signed short a, b, r;
+  __vector __bool short c;
+
+  a = (__vector signed short)vec_splats (__A);
+  b = (__vector signed short)vec_splats (__B);
+  c = (__vector __bool short)vec_cmplt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+  res.as_short[0] =
+      (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
+  res.as_short[1] =
+      (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
+  res.as_short[2] =
+      (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
+  res.as_short[3] =
+      (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+  return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+#if _ARCH_PWR8
+  __vector unsigned char a, b, r;
+  __vector __bool char c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = (__vector __bool char)vec_cmplt (a, b);
+  r = vec_sel (b, a, c);
+  return (__m64) ((__vector long long) r)[0];
+#else
+  __m64_union m1, m2, res;
+  long i;
+
+  m1.as_m64 = __A;
+  m2.as_m64 = __B;
+
+
+  for (i = 0; i < 8; i++)
+  res.as_char[i] =
+      ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ?
+	  m1.as_char[i] : m2.as_char[i];
+
+  return (__m64) res.as_m64;
+#endif
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+  return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+  unsigned long long p =
+#ifdef __LITTLE_ENDIAN__
+                         0x0008101820283038UL; // permute control for sign bits
+#else
+                         0x3830282018100800UL; // permute control for sign bits
+#endif
+  return __builtin_bpermd (p, __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+  return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+   in B and produce the high 16 bits of the 32-bit results.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+  __vector unsigned short a, b;
+  __vector unsigned short c;
+  __vector unsigned int w0, w1;
+  __vector unsigned char xform1 = {
+#ifdef __LITTLE_ENDIAN__
+      0x02, 0x03, 0x12, 0x13,  0x06, 0x07, 0x16, 0x17,
+      0x0A, 0x0B, 0x1A, 0x1B,  0x0E, 0x0F, 0x1E, 0x1F
+#else
+      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15,
+      0x00, 0x01, 0x10, 0x11,  0x04, 0x05, 0x14, 0x15
+#endif
+    };
+
+  a = (__vector unsigned short)vec_splats (__A);
+  b = (__vector unsigned short)vec_splats (__B);
+
+  w0 = vec_vmuleuh (a, b);
+  w1 = vec_vmulouh (a, b);
+  c = (__vector unsigned short)vec_perm (w0, w1, xform1);
+
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+  return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+  unsigned long element_selector_10 = __N & 0x03;
+  unsigned long element_selector_32 = (__N >> 2) & 0x03;
+  unsigned long element_selector_54 = (__N >> 4) & 0x03;
+  unsigned long element_selector_76 = (__N >> 6) & 0x03;
+  static const unsigned short permute_selectors[4] =
+    {
+#ifdef __LITTLE_ENDIAN__
+	      0x0908, 0x0B0A, 0x0D0C, 0x0F0E
+#else
+	      0x0607, 0x0405, 0x0203, 0x0001
+#endif
+    };
+  __m64_union t;
+  __vector unsigned long long a, p, r;
+
+#ifdef __LITTLE_ENDIAN__
+  t.as_short[0] = permute_selectors[element_selector_10];
+  t.as_short[1] = permute_selectors[element_selector_32];
+  t.as_short[2] = permute_selectors[element_selector_54];
+  t.as_short[3] = permute_selectors[element_selector_76];
+#else
+  t.as_short[3] = permute_selectors[element_selector_10];
+  t.as_short[2] = permute_selectors[element_selector_32];
+  t.as_short[1] = permute_selectors[element_selector_54];
+  t.as_short[0] = permute_selectors[element_selector_76];
+#endif
+  p = vec_splats (t.as_m64);
+  a = vec_splats (__A);
+  r = vec_perm (a, a, (__vector unsigned char)p);
+  return (__m64) ((__vector long long) r)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+  return _mm_shuffle_pi16 (__A, __N);
+}
+
+/* Conditionally store byte elements of A into P.  The high bit of each
+   byte in the selector N determines whether the corresponding byte from
+   A is stored.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+  __m64 hibit = 0x8080808080808080UL;
+  __m64 mask, tmp;
+  __m64 *p = (__m64*)__P;
+
+  tmp = *p;
+  mask = _mm_cmpeq_pi8 ((__N & hibit), hibit);
+  tmp = (tmp & (~mask)) | (__A & mask);
+  *p = tmp;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+  _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  __vector unsigned char a, b, c;
+
+  a = (__vector unsigned char)vec_splats (__A);
+  b = (__vector unsigned char)vec_splats (__B);
+  c = vec_avg (a, b);
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  __vector unsigned short a, b, c;
+
+  a = (__vector unsigned short)vec_splats (__A);
+  b = (__vector unsigned short)vec_splats (__B);
+  c = vec_avg (a, b);
+  return (__m64) ((__vector long long) c)[0];
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+  return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+extern __inline    __m64    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64  __A, __m64  __B)
+{
+  __vector unsigned char a, b;
+  __vector unsigned char vmin, vmax, vabsdiff;
+  __vector signed int vsum;
+  const __vector unsigned int zero =
+    { 0, 0, 0, 0 };
+  __m64_union result = {0};
+
+  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
+  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
+  vmin = vec_min (a, b);
+  vmax = vec_max (a, b);
+  vabsdiff = vec_sub (vmax, vmin);
+  /* Sum four groups of bytes into integers.  */
+  vsum = (__vector signed int) vec_sum4s (vabsdiff, zero);
+  /* Sum across four integers with integer result.  */
+  vsum = vec_sums (vsum, (__vector signed int) zero);
+  /* The sum is in the right most 32-bits of the vector result.
+     Transfer to a GPR and truncate to 16 bits.  */
+  result.as_short[0] = vsum[3];
+  return result.as_m64;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+  return _mm_sad_pu8 (__A, __B);
+}
+
+/* Stores the data in A to the address P without polluting the caches.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+  /* Use the data cache block touch for store transient.  */
+  __asm__ (
+    "	dcbtstt	0,%0"
+    :
+    : "b" (__P)
+    : "memory"
+  );
+  *__P = __A;
+}
+
+/* Likewise.  The address must be 16-byte aligned.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+  /* Use the data cache block touch for store transient.  */
+  __asm__ (
+    "	dcbtstt	0,%0"
+    :
+    : "b" (__P)
+    : "memory"
+  );
+  _mm_store_ps (__P, __A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+   any subsequent store.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+  /* Generate a light weight sync.  */
+  __atomic_thread_fence (__ATOMIC_RELEASE);
+}
+
+/* The execution of the next instruction is delayed by an implementation
+   specific amount of time.  The instruction does not modify the
+   architectural state.  This is after the pop_options pragma because
+   it does not require SSE support in the processor--the encoding is a
+   nop on processors that do not support it.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+  /* There is no exact match with this construct, but the following is
+     close to the desired effect.  */
+#if _ARCH_PWR8
+  /* On power8 and later processors we can depend on Program Priority
+     (PRI) and associated "very low" PPI setting.  Since we don't know
+     what PPI this thread is running at we: 1) save the current PRI
+     from the PPR SPR into a local GRP, 2) set the PRI to "very low*
+     via the special or 31,31,31 encoding. 3) issue an "isync" to
+     insure the PRI change takes effect before we execute any more
+     instructions.
+     Now we can execute a lwsync (release barrier) while we execute
+     this thread at "very low" PRI.  Finally we restore the original
+     PRI and continue execution.  */
+  unsigned long __PPR;
+
+  __asm__ volatile (
+    "	mfppr	%0;"
+    "   or 31,31,31;"
+    "   isync;"
+    "   lwsync;"
+    "   isync;"
+    "   mtppr	%0;"
+    : "=r" (__PPR)
+    :
+    : "memory"
+  );
+#else
+  /* For older processor where we may not even have Program Priority
+     controls we can only depend on Heavy Weight Sync.  */
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+#endif
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3].  */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)			\
+do {									\
+  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);	\
+  __v4sf __t0 = vec_vmrghw (__r0, __r1);			\
+  __v4sf __t1 = vec_vmrghw (__r2, __r3);			\
+  __v4sf __t2 = vec_vmrglw (__r0, __r1);			\
+  __v4sf __t3 = vec_vmrglw (__r2, __r3);			\
+  (row0) = (__v4sf)vec_mergeh ((__vector long long)__t0, 	\
+			       (__vector long long)__t1);	\
+  (row1) = (__v4sf)vec_mergel ((__vector long long)__t0,	\
+			       (__vector long long)__t1);	\
+  (row2) = (__v4sf)vec_mergeh ((__vector long long)__t2,	\
+			       (__vector long long)__t3);	\
+  (row3) = (__v4sf)vec_mergel ((__vector long long)__t2,	\
+			       (__vector long long)__t3);	\
+} while (0)
+
+/* For backward source compatibility.  */
+//# include <emmintrin.h>
+
+#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/clang/test/CodeGen/ppc-mm-malloc-le.c b/clang/test/CodeGen/ppc-mm-malloc-le.c
new file mode 100644
index 0000000000000..14c1b25d280f6
--- /dev/null
+++ b/clang/test/CodeGen/ppc-mm-malloc-le.c
@@ -0,0 +1,72 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+// UNSUPPORTED: !powerpc64le-
+// The stdlib.h included in mm_malloc.h references native system header
+// like: bits/libc-header-start.h or features.h, cross-compile it may
+// require installing target headers in build env, otherwise expecting
+// failures. So this test will focus on native build only.
+
+// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
+
+#include <mm_malloc.h>
+
+
+void __attribute__((noinline))
+test_mm_malloc() {
+  char *buf = _mm_malloc(100, 16);
+  _mm_free(buf);
+}
+
+// CHECK-LABEL: @test_mm_malloc
+
+// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
+// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
+// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
+// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
+// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG11]]:
+// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
+// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG16]]:
+// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
+// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG12]]:
+// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
+// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG23]]:
+// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
+// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
+// CHECK: [[REG24]]:
+// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
+// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG31]]:
+// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
+// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG32]]:
+// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG19]]:
+// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
+// CHECK-NEXT: ret i8* [[REG34]]
+
+// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
+// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
+// CHECK-NEXT: call void @free(i8* [[REG37]])
+// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/ppc-mm-malloc.c b/clang/test/CodeGen/ppc-mm-malloc.c
new file mode 100644
index 0000000000000..b85d8d98c5982
--- /dev/null
+++ b/clang/test/CodeGen/ppc-mm-malloc.c
@@ -0,0 +1,72 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+// UNSUPPORTED: !powerpc64-
+// The stdlib.h included in mm_malloc.h references native system header
+// like: bits/libc-header-start.h or features.h, cross-compile it may
+// require installing target headers in build env, otherwise expecting
+// failures. So this test will focus on native build only.
+
+// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s
+
+#include <mm_malloc.h>
+
+
+void __attribute__((noinline))
+test_mm_malloc() {
+  char *buf = _mm_malloc(100, 16);
+  _mm_free(buf);
+}
+
+// CHECK-LABEL: @test_mm_malloc
+
+// CHECK: define internal i8* @_mm_malloc(i64 [[REG1:[0-9a-zA-Z_%.]+]], i64 [[REG2:[0-9a-zA-Z_%.]+]])
+// CHECK: [[REG3:[0-9a-zA-Z_%.]+]] = alloca i8*, align 8
+// CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
+// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
+// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG11]]:
+// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
+// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG16]]:
+// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
+// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG12]]:
+// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
+// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG23]]:
+// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
+// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
+// CHECK: [[REG24]]:
+// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
+// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = call signext i32 @posix_memalign(i8** [[REG29:[0-9a-zA-Z_%.]+]], i64 [[REG26]], i64 [[REG27]])
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = icmp eq i32 [[REG28]], 0
+// CHECK-NEXT: br i1 [[REG30]], label %[[REG31:[0-9a-zA-Z_%.]+]], label %[[REG32:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG31]]:
+// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG29]], align 8
+// CHECK-NEXT: store i8* [[REG33]], i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG32]]:
+// CHECK-NEXT: store i8* null, i8** [[REG3]], align 8
+// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG19]]:
+// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG3]], align 8
+// CHECK-NEXT: ret i8* [[REG34]]
+
+// CHECK: define internal void @_mm_free(i8* [[REG35:[0-9a-zA-Z_%.]+]])
+// CHECK: store i8* [[REG35]], i8** [[REG36:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i8*, i8** [[REG36]], align 8
+// CHECK-NEXT: call void @free(i8* [[REG37]])
+// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/ppc-mmintrin.c b/clang/test/CodeGen/ppc-mmintrin.c
index 212a387ec35b8..019672863331d 100644
--- a/clang/test/CodeGen/ppc-mmintrin.c
+++ b/clang/test/CodeGen/ppc-mmintrin.c
@@ -1,12 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: powerpc-registered-target
 
-// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
+// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
-// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
+// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE
-// RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
+// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE
-// RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
+// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
 
 #include <mmintrin.h>
diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/ppc-xmmintrin.c
new file mode 100644
index 0000000000000..fd81937da637e
--- /dev/null
+++ b/clang/test/CodeGen/ppc-xmmintrin.c
@@ -0,0 +1,2090 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+// RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
+// RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+#include <xmmintrin.h>
+
+__m128 res, m1, m2;
+__m64 res64, ms[2];
+float fs[4];
+int i, i2;
+long long i64;
+
+// CHECK-LE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
+// CHECK-BE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2
+
+// CHECK-LE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
+// CHECK-BE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
+
+void __attribute__((noinline))
+test_add() {
+  res = _mm_add_ps(m1, m2);
+  res = _mm_add_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_add
+
+// CHECK: define available_externally <4 x float> @_mm_add_ps(<4 x float> [[REG1:[0-9a-zA-Z_%.]+]], <4 x float> [[REG2:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1]], <4 x float>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG2]], <4 x float>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG3]], align 16
+// CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG4]], align 16
+// CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG5]], [[REG6]]
+// CHECK-NEXT: ret <4 x float> [[REG7]]
+
+// CHECK: define available_externally <4 x float> @_mm_add_ss(<4 x float> [[REG8:[0-9a-zA-Z_%.]+]], <4 x float> [[REG9:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG8]], <4 x float>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG9]], <4 x float>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
+// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG12]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG11]], align 16
+// CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG15]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG16]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
+// CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
+// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG18]], [[REG19]]
+// CHECK-NEXT: store <4 x float> [[REG20]], <4 x float>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
+// CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG21]], align 16
+// CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG22]], <4 x float> [[REG23]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG24]]
+
+void __attribute__((noinline))
+test_avg() {
+  res64 = _mm_avg_pu16(ms[0], ms[1]);
+  res64 = _mm_avg_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_avg
+
+// CHECK: define available_externally i64 @_mm_avg_pu16
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG25]])
+// CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG28]])
+// CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG29]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG30]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG31]], <8 x i16> [[REG32]])
+// CHECK-NEXT: store <8 x i16> [[REG33]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG34]] to <2 x i64>
+// CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG35]], i32 0
+// CHECK-NEXT: ret i64 [[REG36]]
+
+// CHECK: define available_externally i64 @_mm_avg_pu8
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG37]])
+// CHECK-NEXT: [[REG39:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG38]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG39]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG40]])
+// CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG41]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG42]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG43]], <16 x i8> [[REG44]])
+// CHECK-NEXT: store <16 x i8> [[REG45]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG46]] to <2 x i64>
+// CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG47]], i32 0
+// CHECK-NEXT: ret i64 [[REG48]]
+
+void __attribute__((noinline))
+test_alt_name_avg() {
+  res64 = _m_pavgw(ms[0], ms[1]);
+  res64 = _m_pavgb(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_avg
+
+// CHECK: define available_externally i64 @_m_pavgw
+// CHECK: [[REG49:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu16
+// CHECK-NEXT: ret i64 [[REG49]]
+
+// CHECK: define available_externally i64 @_m_pavgb
+// CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu8
+// CHECK-NEXT: ret i64 [[REG50]]
+
+void __attribute__((noinline))
+test_cmp() {
+  res = _mm_cmpeq_ps(m1, m2);
+  res = _mm_cmpeq_ss(m1, m2);
+  res = _mm_cmpge_ps(m1, m2);
+  res = _mm_cmpge_ss(m1, m2);
+  res = _mm_cmpgt_ps(m1, m2);
+  res = _mm_cmpgt_ss(m1, m2);
+  res = _mm_cmple_ps(m1, m2);
+  res = _mm_cmple_ss(m1, m2);
+  res = _mm_cmplt_ps(m1, m2);
+  res = _mm_cmplt_ss(m1, m2);
+  res = _mm_cmpneq_ps(m1, m2);
+  res = _mm_cmpneq_ss(m1, m2);
+  res = _mm_cmpnge_ps(m1, m2);
+  res = _mm_cmpnge_ss(m1, m2);
+  res = _mm_cmpngt_ps(m1, m2);
+  res = _mm_cmpngt_ss(m1, m2);
+  res = _mm_cmpnle_ps(m1, m2);
+  res = _mm_cmpnle_ss(m1, m2);
+  res = _mm_cmpnlt_ps(m1, m2);
+  res = _mm_cmpnlt_ss(m1, m2);
+  res = _mm_cmpord_ps(m1, m2);
+  res = _mm_cmpord_ss(m1, m2);
+  res = _mm_cmpunord_ps(m1, m2);
+  res = _mm_cmpunord_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_cmp
+
+// CHECK: define available_externally <4 x float> @_mm_cmpeq_ps
+// CHECK: [[REG51:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG51]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG52]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpeq_ss
+// CHECK: [[REG53:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG53]], <4 x float>* [[REG54:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG55]], <4 x float>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG54]], align 16
+// CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG56]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG57]], <4 x float> [[REG58]])
+// CHECK: [[REG59:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG59]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpge_ps
+// CHECK: [[REG60:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG60]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG61]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpge_ss
+// CHECK: [[REG62:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG62]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG64]], <4 x float>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
+// CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG65]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG66]], <4 x float> [[REG67]])
+// CHECK: [[REG68:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG68]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpgt_ps
+// CHECK: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG69]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG70]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpgt_ss
+// CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG71]], <4 x float>* [[REG72:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG73]], <4 x float>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG72]], align 16
+// CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG74]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG75]], <4 x float> [[REG76]])
+// CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG77]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmple_ps
+// CHECK: [[REG78:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG78]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG79]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmple_ss
+// CHECK: [[REG80:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG80]], <4 x float>* [[REG81:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG82:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG82]], <4 x float>* [[REG83:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG81]], align 16
+// CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG83]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG84]], <4 x float> [[REG85]])
+// CHECK: [[REG86:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG86]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmplt_ps
+// CHECK: [[REG87:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG87]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG88]]
+
+// CHECK: @_mm_cmplt_ss
+// CHECK: [[REG89:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG89]], <4 x float>* [[REG90:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG90]], align 16
+// CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG93]], <4 x float> [[REG94]])
+// CHECK: [[REG95:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG95]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpneq_ps
+// CHECK: [[REG96:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG96]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG97]], <4 x float>* [[REG98:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
+// CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
+// CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_nor(float vector[4], float vector[4])(<4 x float> [[REG99]], <4 x float> [[REG100]])
+// CHECK-NEXT: ret <4 x float> [[REG101]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpneq_ss
+// CHECK: [[REG102:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG102]], <4 x float>* [[REG103:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG104:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG104]], <4 x float>* [[REG105:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG103]], align 16
+// CHECK-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG105]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG106]], <4 x float> [[REG107]])
+// CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
+// CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG108]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnge_ps
+// CHECK: [[REG109:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG109]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG110]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnge_ss
+// CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG111]], <4 x float>* [[REG112:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG113:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG113]], <4 x float>* [[REG114:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG112]], align 16
+// CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG114]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG115]], <4 x float> [[REG116]])
+// CHECK: [[REG117:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG117]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpngt_ps
+// CHECK: [[REG118:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG118]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG119]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpngt_ss
+// CHECK: [[REG120:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG120]], <4 x float>* [[REG121:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG122:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG122]], <4 x float>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG121]], align 16
+// CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG123]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG124]], <4 x float> [[REG125]])
+// CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG126]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnle_ps
+// CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG127]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG128]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnle_ss
+// CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG129]], <4 x float>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG131]], <4 x float>* [[REG132:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG130]], align 16
+// CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG132]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG133]], <4 x float> [[REG134]])
+// CHECK: [[REG135:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG135]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ps
+// CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
+// CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG136]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG137]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpnlt_ss
+// CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG138]], <4 x float>* [[REG139:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG140]], <4 x float>* [[REG141:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG139]], align 16
+// CHECK-NEXT: [[REG143:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG141]], align 16
+// CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG142]], <4 x float> [[REG143]])
+// CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG144]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpord_ps
+// CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG145]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG146]], <4 x i32>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG148]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG149]], <4 x i32>* [[REG150:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG147]], align 16
+// CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG151]])
+// CHECK-NEXT: store <4 x i32> [[REG152]], <4 x i32>* [[REG153:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG150]], align 16
+// CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG154]])
+// CHECK-NEXT: store <4 x i32> [[REG155]], <4 x i32>* [[REG156:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG153]], align 16
+// CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG156]], align 16
+// CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> {{[0-9a-zA-Z_%.]+}}, <4 x i32> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG159]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG160]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpord_ss
+// CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
+// CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG161]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG162]], <4 x i32>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG164:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
+// CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG164]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG165]], <4 x i32>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG163]], align 16
+// CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG167]])
+// CHECK-NEXT: store <4 x i32> [[REG168]], <4 x i32>* [[REG161:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG166]], align 16
+// CHECK-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG169]])
+// CHECK-NEXT: store <4 x i32> [[REG170]], <4 x i32>* [[REG171:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
+// CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG171]], align 16
+// CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG172]], <4 x i32> [[REG173]])
+// CHECK-NEXT: store <4 x i32> [[REG174]], <4 x i32>* [[REG161]], align 16
+// CHECK: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
+// CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG175]] to <4 x float>
+// CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> [[REG176]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG177]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpunord_ps
+// CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG178]])
+// CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG179]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG180]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG181]])
+// CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG182]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG183]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG184]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG185]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG186]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG187]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG188:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG189:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG188]], <4 x i32> [[REG189]])
+// CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG190]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG191]]
+
+// CHECK: define available_externally <4 x float> @_mm_cmpunord_ss
+// CHECK: [[REG192:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG192]])
+// CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG193]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG194]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG195]])
+// CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG196]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG197]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG198]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG199]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG200:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG200]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK-NEXT: store <4 x i32> [[REG201]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG204:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG202]], <4 x i32> [[REG203]])
+// CHECK-NEXT: store <4 x i32> [[REG204]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG206]] to <4 x float>
+// CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG205]], <4 x float> [[REG207]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG208]]
+
+void __attribute__((noinline))
+test_comi() {
+  i = _mm_comieq_ss(m1, m2);
+  i = _mm_comige_ss(m1, m2);
+  i = _mm_comigt_ss(m1, m2);
+  i = _mm_comile_ss(m1, m2);
+  i = _mm_comilt_ss(m1, m2);
+  i = _mm_comineq_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_comi
+
+// CHECK: define available_externally signext i32 @_mm_comieq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG209]], i32 0
+// CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG211]], i32 0
+// CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG210]], [[REG212]]
+// CHECK-NEXT: [[REG214:[0-9a-zA-Z_%.]+]] = zext i1 [[REG213]] to i32
+// CHECK-NEXT: ret i32 [[REG214]]
+
+// CHECK: define available_externally signext i32 @_mm_comige_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG216:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG215]], i32 0
+// CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG217]], i32 0
+// CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG216]], [[REG218]]
+// CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = zext i1 [[REG219]] to i32
+// CHECK-NEXT: ret i32 [[REG220]]
+
+// CHECK: define available_externally signext i32 @_mm_comigt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG222:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG221]], i32 0
+// CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG223]], i32 0
+// CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG222]], [[REG224]]
+// CHECK-NEXT: [[REG226:[0-9a-zA-Z_%.]+]] = zext i1 [[REG225]] to i32
+// CHECK-NEXT: ret i32 [[REG226]]
+
+// CHECK: define available_externally signext i32 @_mm_comile_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG227]], i32 0
+// CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG229]], i32 0
+// CHECK-NEXT: [[REG231:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG228]], [[REG230]]
+// CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = zext i1 [[REG231]] to i32
+// CHECK-NEXT: ret i32 [[REG232]]
+
+// CHECK: define available_externally signext i32 @_mm_comilt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG233]], i32 0
+// CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG235]], i32 0
+// CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG234]], [[REG236]]
+// CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = zext i1 [[REG237]] to i32
+// CHECK-NEXT: ret i32 [[REG238]]
+
+// CHECK: define available_externally signext i32 @_mm_comineq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG239]], i32 0
+// CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG241]], i32 0
+// CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG240]], [[REG242]]
+// CHECK-NEXT: [[REG244:[0-9a-zA-Z_%.]+]] = zext i1 [[REG243]] to i32
+// CHECK-NEXT: ret i32 [[REG244]]
+
+void __attribute__((noinline))
+test_convert() {
+  res = _mm_cvt_pi2ps(m1, ms[1]);
+  res64 = _mm_cvt_ps2pi(m1);
+  res = _mm_cvt_si2ss(m1, i);
+  i = _mm_cvt_ss2si(m1);
+  res = _mm_cvtpi16_ps(ms[0]);
+  res = _mm_cvtpi32_ps(m1, ms[1]);
+  res = _mm_cvtpi32x2_ps(ms[0], ms[1]);
+  res = _mm_cvtpi8_ps(ms[0]);
+  res64 = _mm_cvtps_pi16(m1);
+  res64 = _mm_cvtps_pi32(m1);
+  res64 = _mm_cvtps_pi8(m1);
+  res = _mm_cvtpu16_ps(ms[0]);
+  res = _mm_cvtpu8_ps(ms[0]);
+  res = _mm_cvtsi32_ss(m1, i);
+  res = _mm_cvtsi64_ss(m1, i64);
+  fs[0] = _mm_cvtss_f32(m1);
+  i = _mm_cvtss_si32(m1);
+  i64 = _mm_cvtss_si64(m1);
+  res64 = _mm_cvtt_ps2pi(m1);
+  i = _mm_cvtt_ss2si(m1);
+  res64 = _mm_cvttps_pi32(m1);
+  i = _mm_cvttss_si32(m1);
+  i64 = _mm_cvttss_si64(m1);
+}
+
+// CHECK-LABEL: @test_convert
+
+// CHECK: define available_externally <4 x float> @_mm_cvt_pi2ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtpi32_ps(<4 x float> [[REG245]], i64 [[REG246]])
+// CHECK-NEXT: ret <4 x float> [[REG247]]
+
+// CHECK: define available_externally i64 @_mm_cvt_ps2pi
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtps_pi32(<4 x float> [[REG248]])
+// CHECK-NEXT: ret i64 [[REG249]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvt_si2ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtsi32_ss(<4 x float> [[REG250]], i32 signext [[REG251]])
+// CHECK-NEXT: ret <4 x float> [[REG252]]
+
+// CHECK: define available_externally signext i32 @_mm_cvt_ss2si
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtss_si32(<4 x float> [[REG253]])
+// CHECK-NEXT: ret i32 [[REG254]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi16_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG255]], i32 0
+// CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG256]], i64 [[REG257]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG258]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG259]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG260]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupklsh(short vector[8])(<8 x i16> [[REG261]])
+// CHECK-NEXT: store <4 x i32> [[REG262]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG263]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG264]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG265]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi32_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
+// CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG270]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG272]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG273]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG274]] to <2 x i64>
+// CHECK-NEXT: [[REG276:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG275]], i32 0
+// CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG276]], i32 0
+// CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG278]] to <2 x i64>
+// CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG279]], i32 1
+// CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG277]], i64 [[REG280]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG281]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG283:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG282]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG283]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi32x2_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG284]], i32 0
+// CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG287:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG285]], i64 [[REG286]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG287]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG288]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG289]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG290]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG291]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG292]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpi8_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG293]], i32 0
+// CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG294]], i64 [[REG295]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG296]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG298]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG300:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_vupkhsb(signed char vector[16])(<16 x i8> [[REG299]])
+// CHECK-NEXT: store <8 x i16> [[REG300]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupkhsh(short vector[8])(<8 x i16> [[REG301]])
+// CHECK-NEXT: store <4 x i32> [[REG302]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG303]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG304]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG305]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi16
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG307:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG306]])
+// CHECK-NEXT: store <4 x float> [[REG307]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG308]], i32 0)
+// CHECK-NEXT: store <4 x i32> [[REG309]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG310]], <4 x i32> [[REG311]])
+// CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG312]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG313]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG314]], i32 0
+// CHECK-NEXT: ret i64 [[REG315]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG316:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG316]] to <2 x i64>
+// CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG317]], i32 zeroext 0)
+// CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG318]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG319]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG320]])
+// CHECK-NEXT: store <4 x float> [[REG321]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG322:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG322]], i32 0)
+// CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG323]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG324]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG325]], i32 0
+// CHECK-NEXT: ret i64 [[REG326]]
+
+// CHECK: define available_externally i64 @_mm_cvtps_pi8
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG327]])
+// CHECK-NEXT: store <4 x float> [[REG328]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG329]], i32 0)
+// CHECK-NEXT: store <4 x i32> [[REG330]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG331]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <8 x i16> [[REG332]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(short vector[8], short vector[8])(<8 x i16> [[REG333]], <8 x i16> [[REG334]])
+// CHECK-NEXT: store <16 x i8> [[REG335]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG336]] to <2 x i64>
+// CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG337]], i32 0
+// CHECK-NEXT: ret i64 [[REG338]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpu16_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG339]], i32 0
+// CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG340]], i64 [[REG341]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG342]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG343]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG344]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG345]], <8 x i16> zeroinitializer)
+// CHECK-BE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG345]])
+// CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG346]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG347]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG348]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG349]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG350]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtpu8_ps
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG351]], i32 0
+// CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG354:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG352]], i64 [[REG353]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG354]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG355]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG356]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG357]], <16 x i8> zeroinitializer)
+// CHECK-BE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> zeroinitializer, <16 x i8> [[REG357]])
+// CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG358]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG359]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG360]], <8 x i16> zeroinitializer)
+// CHECK-BE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG360]])
+// CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG361]] to <4 x i32>
+// CHECK-NEXT: store <4 x i32> [[REG362]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG363]], i32 0)
+// CHECK-NEXT: store <4 x float> [[REG364]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG365]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtsi32_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG366]] to float
+// CHECK-NEXT: store float [[REG367]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG369]], float [[REG368]], i32 0
+// CHECK-NEXT: store <4 x float> [[REG370]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG371]]
+
+// CHECK: define available_externally <4 x float> @_mm_cvtsi64_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG372]] to float
+// CHECK-NEXT: store float [[REG373]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG375]], float [[REG374]], i32 0
+// CHECK-NEXT: store <4 x float> [[REG376]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG377]]
+
+// CHECK: define available_externally float @_mm_cvtss_f32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG378]], i32 0
+// CHECK-NEXT: ret float [[REG379]]
+
+// CHECK: define available_externally signext i32 @_mm_cvtss_si32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
+// CHECK-BE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
+// CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 0
+// CHECK-NEXT: [[REG383:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 1
+// CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 2
+// CHECK-NEXT: store <4 x float> [[REG382]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 [[REG383]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store double [[REG384]], double* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG385:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG386:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG385]] to i32
+// CHECK-NEXT: ret i32 [[REG386]]
+
+// CHECK: define available_externally i64 @_mm_cvtss_si64
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
+// CHECK-BE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
+// CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 0
+// CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 1
+// CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 2
+// CHECK-NEXT: store <4 x float> [[REG389]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store i64 [[REG390]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store double [[REG391]], double* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG392]]
+
+// CHECK: define available_externally i64 @_mm_cvtt_ps2pi
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK: [[REG393:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttps_pi32(<4 x float> [[REG393]])
+// CHECK-NEXT: ret i64 [[REG394]]
+
+// CHECK: define available_externally signext i32 @_mm_cvtt_ss2si
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvttss_si32(<4 x float> [[REG395]])
+// CHECK-NEXT: ret i32 [[REG396]]
+
+// CHECK: define available_externally i64 @_mm_cvttps_pi32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG397]] to <2 x i64>
+// CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG398]], i32 zeroext 0)
+// CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <4 x float>
+// CHECK-NEXT: store <4 x float> [[REG400]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG401]], i32 0)
+// CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG402]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG403]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG404]], i32 0
+// CHECK-NEXT: ret i64 [[REG405]]
+
+// CHECK: define available_externally signext i32 @_mm_cvttss_si32
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG407:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG406]], i32 0
+// CHECK-NEXT: store float [[REG407]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = fptosi float [[REG408]] to i32
+// CHECK-NEXT: ret i32 [[REG409]]
+
+// CHECK: define available_externally i64 @_mm_cvttss_si64
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG410]], i32 0
+// CHECK-NEXT: store float [[REG411]], float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = fptosi float [[REG412]] to i64
+// CHECK-NEXT: ret i64 [[REG413]]
+
+void __attribute__((noinline))
+test_div() {
+  res = _mm_div_ps(m1, m2);
+  res = _mm_div_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_div
+
+// CHECK: define available_externally <4 x float> @_mm_div_ps(<4 x float> [[REG414:[0-9a-zA-Z_%.]+]], <4 x float> [[REG415:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG414]], <4 x float>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG415]], <4 x float>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG416]], align 16
+// CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG417]], align 16
+// CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG418]], [[REG419]]
+// CHECK-NEXT: ret <4 x float> [[REG420]]
+
+// CHECK: define available_externally <4 x float> @_mm_div_ss(<4 x float> [[REG421:[0-9a-zA-Z_%.]+]], <4 x float> [[REG422:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG421]], <4 x float>* [[REG423:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG422]], <4 x float>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
+// CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG425]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG426]], <4 x float>* [[REG427:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG424]], align 16
+// CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG428]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG429]], <4 x float>* [[REG430:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG431:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG427]], align 16
+// CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG430]], align 16
+// CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG431]], [[REG432]]
+// CHECK-NEXT: store <4 x float> [[REG433]], <4 x float>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
+// CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG434]], align 16
+// CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG435]], <4 x float> [[REG436]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG437]]
+
+void __attribute__((noinline))
+test_extract() {
+  i = _mm_extract_pi16(ms[0], i2);
+  i = _m_pextrw(ms[0], i2);
+}
+
+// CHECK-LABEL: @test_extract
+
+// CHECK: define available_externally signext i32 @_mm_extract_pi16
+// CHECK: [[REG438:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG439:[0-9a-zA-Z_%.]+]] = and i32 [[REG438]], 3
+// CHECK-NEXT: store i32 [[REG439]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-BE: sub i32 3, {{[0-9a-zA-Z_%.]+}}
+// CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = mul i32 [[REG441]], 16
+// CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = zext i32 [[REG442]] to i64
+// CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = lshr i64 [[REG440]], [[REG443]]
+// CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = and i64 [[REG444]], 65535
+// CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG445]] to i32
+// CHECK-NEXT: ret i32 [[REG446]]
+
+// CHECK: define available_externally signext i32 @_m_pextrw
+// CHECK: [[REG447:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_extract_pi16
+// CHECK-NEXT: ret i32 [[REG447]]
+
+void __attribute__((noinline))
+test_insert() {
+  res64 = _mm_insert_pi16(ms[0], i, i2);
+  res64 = _m_pinsrw(ms[0], i, i2);
+}
+
+// CHECK-LABEL: @test_insert
+
+// CHECK: define available_externally i64 @_mm_insert_pi16
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = and i32 [[REG448]], 3
+// CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG449]], 16
+// CHECK-NEXT: store i32 [[REG450]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = sext i32 [[REG451]] to i64
+// CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = zext i32 [[REG453]] to i64
+// CHECK-NEXT: [[REG455:[0-9a-zA-Z_%.]+]] = shl i64 [[REG452]], [[REG454]]
+// CHECK-NEXT: store i64 [[REG455]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = zext i32 [[REG456]] to i64
+// CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = shl i64 65535, [[REG457]]
+// CHECK-NEXT: store i64 [[REG458]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG461:[0-9a-zA-Z_%.]+]] = xor i64 [[REG460]], -1
+// CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = and i64 [[REG459]], [[REG461]]
+// CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG464:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = and i64 [[REG463]], [[REG464]]
+// CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = or i64 [[REG462]], [[REG465]]
+// CHECK-NEXT: store i64 [[REG466]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: ret i64 [[REG467]]
+
+// CHECK: define available_externally i64 @_m_pinsrw
+// CHECK: [[REG468:[0-9a-zA-Z_%.]+]] = call i64 @_mm_insert_pi16
+// CHECK-NEXT: ret i64 [[REG468]]
+
+void __attribute__((noinline))
+test_load() {
+  res = _mm_load_ps(fs);
+  res = _mm_load_ps1(fs);
+  res = _mm_load_ss(fs);
+  res = _mm_load1_ps(fs);
+  res = _mm_loadh_pi(m1, &ms[0]);
+  res = _mm_loadl_pi(m1, &ms[0]);
+  res = _mm_loadr_ps(fs);
+  res = _mm_loadu_ps(fs);
+}
+
+// CHECK-LABEL: @test_load
+
+// CHECK: define available_externally <4 x float> @_mm_load_ps
+// CHECK: [[REG469:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
+// CHECK-NEXT: ret <4 x float> [[REG469]]
+
+// CHECK: define available_externally <4 x float> @_mm_load_ps1
+// CHECK: [[REG470:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_load1_ps
+// CHECK-NEXT: ret <4 x float> [[REG470]]
+
+// CHECK: define available_externally <4 x float> @_mm_load_ss
+// CHECK: [[REG471:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set_ss
+// CHECK-NEXT: ret <4 x float> [[REG471]]
+
+// CHECK: define available_externally <4 x float> @_mm_load1_ps
+// CHECK: [[REG472:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps
+// CHECK-NEXT: ret <4 x float> [[REG472]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadh_pi
+// CHECK: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: store <2 x i64> [[REG473]], <2 x i64>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG474]], align 16
+// CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG475]], i32 1
+// CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG477]], i64 [[REG476]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG479]], <2 x i64>* [[REG478]], align 16
+// CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478]], align 16
+// CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG480]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG481]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadl_pi
+// CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
+// CHECK-NEXT: store <2 x i64> [[REG482]], <2 x i64>* [[REG483:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG483]], align 16
+// CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
+// CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG488:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG486]], i64 [[REG485]], i32 0
+// CHECK-NEXT: store <2 x i64> [[REG488]], <2 x i64>* [[REG487]], align 16
+// CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487]], align 16
+// CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG489]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG490]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadr_ps
+// CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(int, float vector[4] const*)
+// CHECK-NEXT: store <4 x float> [[REG491]], <4 x float>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
+// CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
+// CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG493]], <4 x float> [[REG494]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+// CHECK-NEXT: store <4 x float> [[REG495]], <4 x float>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG496]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG497]]
+
+// CHECK: define available_externally <4 x float> @_mm_loadu_ps
+// CHECK: [[REG498:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vsx_ld(int, float const*)
+// CHECK-NEXT: ret <4 x float> [[REG498]]
+
+void __attribute__((noinline))
+test_logic() {
+  res = _mm_or_ps(m1, m2);
+  res = _mm_and_ps(m1, m2);
+  res = _mm_andnot_ps(m1, m2);
+  res = _mm_xor_ps(m1, m2);
+}
+
+// CHECK-LABEL: @test_logic
+
+// CHECK: define available_externally <4 x float> @_mm_or_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_or(float vector[4], float vector[4])(<4 x float> [[REG499]], <4 x float> [[REG500]])
+// CHECK-NEXT: ret <4 x float> [[REG501]]
+
+// CHECK: define available_externally <4 x float> @_mm_and_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_and(float vector[4], float vector[4])(<4 x float> [[REG502]], <4 x float> [[REG503]])
+// CHECK-NEXT: ret <4 x float> [[REG504]]
+
+// CHECK: define available_externally <4 x float> @_mm_andnot_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_andc(float vector[4], float vector[4])(<4 x float> [[REG505]], <4 x float> [[REG506]])
+// CHECK-NEXT: ret <4 x float> [[REG507]]
+
+// CHECK: define available_externally <4 x float> @_mm_xor_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG508]], <4 x float> [[REG509]])
+// CHECK-NEXT: ret <4 x float> [[REG510]]
+
+void __attribute__((noinline))
+test_max() {
+  res = _mm_max_ps(m1, m2);
+  res = _mm_max_ss(m1, m2);
+  res64 = _mm_max_pi16(ms[0], ms[1]);
+  res64 = _mm_max_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_max
+
+// CHECK: define available_externally <4 x float> @_mm_max_ps(<4 x float> [[REG511:[0-9a-zA-Z_%.]+]], <4 x float> [[REG512:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG511]], <4 x float>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG512]], <4 x float>* [[REG514:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
+// CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
+// CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG515]], <4 x float> [[REG516]])
+// CHECK-NEXT: store <4 x i32> [[REG517]], <4 x i32>* [[REG518:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
+// CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
+// CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG518]], align 16
+// CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG519]], <4 x float> [[REG520]], <4 x i32> [[REG521]])
+// CHECK-NEXT: ret <4 x float> [[REG522]]
+
+// CHECK: define available_externally <4 x float> @_mm_max_ss
+// CHECK: [[REG523:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG523]], <4 x float>* [[REG524:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG525:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG525]], <4 x float>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG524]], align 16
+// CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG526]], align 16
+// CHECK-NEXT: call <4 x float> @vec_max(float vector[4], float vector[4])(<4 x float> [[REG527]], <4 x float> [[REG528]])
+// CHECK: [[REG529:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG529]]
+
+// CHECK: define available_externally i64 @_mm_max_pi16
+// CHECK: [[REG530:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG530]])
+// CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG531]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG532]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG533]])
+// CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG534]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG535]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG536]], <8 x i16> [[REG537]])
+// CHECK-NEXT: store <8 x i16> [[REG538]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG540:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG539]], <8 x i16> [[REG540]], <8 x i16> [[REG541]])
+// CHECK-NEXT: store <8 x i16> [[REG542]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG543]] to <2 x i64>
+// CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG544]], i32 0
+// CHECK-NEXT: ret i64 [[REG545]]
+
+// CHECK: define available_externally i64 @_mm_max_pu8
+// CHECK: [[REG546:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG546]])
+// CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG547]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG548]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG549]])
+// CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG551]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG552]], <16 x i8> [[REG553]])
+// CHECK-NEXT: store <16 x i8> [[REG554]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG555:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG556:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG557:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG555]], <16 x i8> [[REG556]], <16 x i8> [[REG557]])
+// CHECK-NEXT: store <16 x i8> [[REG558]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG559]] to <2 x i64>
+// CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG560]], i32 0
+// CHECK-NEXT: ret i64 [[REG561]]
+
+void __attribute__((noinline))
+test_alt_name_max() {
+  res64 = _m_pmaxsw(ms[0], ms[1]);
+  res64 = _m_pmaxub(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_max
+
+// CHECK: define available_externally i64 @_m_pmaxsw
+// CHECK: [[REG562:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pi16
+// CHECK-NEXT: ret i64 [[REG562]]
+
+// CHECK: define available_externally i64 @_m_pmaxub
+// CHECK: [[REG563:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pu8
+// CHECK-NEXT: ret i64 [[REG563]]
+
+void __attribute__((noinline))
+test_min() {
+  res = _mm_min_ps(m1, m2);
+  res = _mm_min_ss(m1, m2);
+  res64 = _mm_min_pi16(ms[0], ms[1]);
+  res64 = _mm_min_pu8(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_min
+
+// CHECK: define available_externally <4 x float> @_mm_min_ps(<4 x float> [[REG517:[0-9a-zA-Z_%.]+]], <4 x float> [[REG518:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG517]], <4 x float>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG518]], <4 x float>* [[REG565:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
+// CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
+// CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG566]], <4 x float> [[REG567]])
+// CHECK-NEXT: store <4 x i32> [[REG568]], <4 x i32>* [[REG569:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
+// CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
+// CHECK-NEXT: [[REG572:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG569]], align 16
+// CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG570]], <4 x float> [[REG571]], <4 x i32> [[REG572]])
+// CHECK-NEXT: ret <4 x float> [[REG573]]
+
+// CHECK: define available_externally <4 x float> @_mm_min_ss
+// CHECK: [[REG574:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG574]], <4 x float>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
+// CHECK: [[REG576:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
+// CHECK-NEXT: store <4 x float> [[REG576]], <4 x float>* [[REG577:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG575]], align 16
+// CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG577]], align 16
+// CHECK-NEXT: call <4 x float> @vec_min(float vector[4], float vector[4])(<4 x float> [[REG578]], <4 x float> [[REG579]])
+// CHECK: [[REG580:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG580]]
+
+// CHECK: define available_externally i64 @_mm_min_pi16
+// CHECK: [[REG581:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG581]])
+// CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG582]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG583]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG584]])
+// CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG585]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG586]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG588:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG589:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> [[REG587]], <8 x i16> [[REG588]])
+// CHECK-NEXT: store <8 x i16> [[REG589]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG590]], <8 x i16> [[REG591]], <8 x i16> [[REG592]])
+// CHECK-NEXT: store <8 x i16> [[REG593]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG594:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG594]] to <2 x i64>
+// CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG595]], i32 0
+// CHECK-NEXT: ret i64 [[REG596]]
+
+// CHECK: define available_externally i64 @_mm_min_pu8
+// CHECK: [[REG597:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG597]])
+// CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG598]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG599]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG600]])
+// CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG601]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG602]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG603]], <16 x i8> [[REG604]])
+// CHECK-NEXT: store <16 x i8> [[REG605]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG606]], <16 x i8> [[REG607]], <16 x i8> [[REG608]])
+// CHECK-NEXT: store <16 x i8> [[REG609]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG610]] to <2 x i64>
+// CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG611]], i32 0
+// CHECK-NEXT: ret i64 [[REG612]]
+
+void __attribute__((noinline))
+test_alt_name_min() {
+  res64 = _m_pminsw(ms[0], ms[1]);
+  res64 = _m_pminub(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_alt_name_min
+
+// CHECK: define available_externally i64 @_m_pminsw
+// CHECK: [[REG613:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pi16
+// CHECK-NEXT: ret i64 [[REG613]]
+
+// CHECK: define available_externally i64 @_m_pminub
+// CHECK: [[REG614:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pu8
+// CHECK-NEXT: ret i64 [[REG614]]
+
+void __attribute__((noinline))
+test_move() {
+  _mm_maskmove_si64(ms[0], ms[1], (char *)&res64);
+  res = _mm_move_ss(m1, m2);
+  res = _mm_movehl_ps(m1, m2);
+  res = _mm_movelh_ps(m1, m2);
+  i = _mm_movemask_pi8(ms[0]);
+  i = _mm_movemask_ps(m1);
+}
+
+// CHECK-LABEL: @test_move
+
+// CHECK: define available_externally void @_mm_maskmove_si64
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 -9187201950435737472, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG615:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG615]] to i64*
+// CHECK-NEXT: store i64* [[REG616]], i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG618:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
+// CHECK-NEXT: store i64 [[REG618]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG619:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = and i64 [[REG619]], [[REG620]]
+// CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8(i64 [[REG621]], i64 [[REG622]])
+// CHECK-NEXT: store i64 [[REG623]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = xor i64 [[REG625]], -1
+// CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = and i64 [[REG624]], [[REG626]]
+// CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = and i64 [[REG628]], [[REG629]]
+// CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = or i64 [[REG627]], [[REG630]]
+// CHECK-NEXT: store i64 [[REG631]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG632]], i64* [[REG633]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally <4 x float> @_mm_move_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG634]], <4 x float> [[REG635]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG636]]
+
+// CHECK: define available_externally <4 x float> @_mm_movehl_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG637]] to <2 x i64>
+// CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG639]] to <2 x i64>
+// CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG638]], <2 x i64> [[REG640]])
+// CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG641]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG642]]
+
+// CHECK: define available_externally <4 x float> @_mm_movelh_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG643]] to <2 x i64>
+// CHECK-NEXT: [[REG645:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG645]] to <2 x i64>
+// CHECK-NEXT: [[REG647:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG644]], <2 x i64> [[REG646]])
+// CHECK-NEXT: [[REG648:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG647]] to <4 x float>
+// CHECK-NEXT: ret <4 x float> [[REG648]]
+
+// CHECK: define available_externally signext i32 @_mm_movemask_pi8
+// CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-LE-NEXT: store i64 2269495618449464, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-BE-NEXT: store i64 4048780183313844224, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = call i64 @llvm.ppc.bpermd(i64 [[REG649]], i64 [[REG650]])
+// CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG651]] to i32
+// CHECK-NEXT: ret i32 [[REG652]]
+
+// CHECK: define available_externally signext i32 @_mm_movemask_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG653]] to <16 x i8>
+// CHECK-LE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
+// CHECK-BE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
+// CHECK-NEXT: store <2 x i64> [[REG655]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-LE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 1
+// CHECK-BE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 0
+// CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG657]] to i32
+// CHECK-NEXT: ret i32 [[REG658]]
+
+void __attribute__((noinline))
+test_alt_name_move() {
+  i = _m_pmovmskb(ms[0]);
+  _m_maskmovq(ms[0], ms[1], (char *)&res64);
+}
+
+// CHECK-LABEL: @test_alt_name_move
+
+// CHECK: define available_externally signext i32 @_m_pmovmskb
+// CHECK: [[REG659:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_movemask_pi8
+// CHECK-NEXT: ret i32 [[REG659]]
+
+// CHECK: define available_externally void @_m_maskmovq
+// CHECK: call void @_mm_maskmove_si64
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_mul() {
+  res = _mm_mul_ps(m1, m2);
+  res = _mm_mul_ss(m1, m2);
+  res64 = _mm_mulhi_pu16(ms[0], ms[1]);
+  res64 = _m_pmulhuw(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_mul
+
+// CHECK: define available_externally <4 x float> @_mm_mul_ps(<4 x float> [[REG660:[0-9a-zA-Z_%.]+]], <4 x float> [[REG661:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG660]], <4 x float>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG661]], <4 x float>* [[REG663:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG662]], align 16
+// CHECK-NEXT: [[REG665:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG663]], align 16
+// CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG664]], [[REG665]]
+// CHECK-NEXT: ret <4 x float> [[REG666]]
+
+// CHECK: define available_externally <4 x float> @_mm_mul_ss(<4 x float> [[REG667:[0-9a-zA-Z_%.]+]], <4 x float> [[REG668:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG667]], <4 x float>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG668]], <4 x float>* [[REG670:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
+// CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG671]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG672]], <4 x float>* [[REG673:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG670]], align 16
+// CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG674]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG675]], <4 x float>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG673]], align 16
+// CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG676]], align 16
+// CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG677]], [[REG678]]
+// CHECK-NEXT: store <4 x float> [[REG679]], <4 x float>* [[REG680:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
+// CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG680]], align 16
+// CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG681]], <4 x float> [[REG682]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG683]]
+
+// CHECK: define available_externally i64 @_mm_mulhi_pu16(i64 [[REG684:[0-9a-zA-Z_%.]+]], i64 [[REG685:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG684]], i64* [[REG686:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG685]], i64* [[REG687:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG688:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG686]], align 8
+// CHECK-NEXT: [[REG689:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG688]])
+// CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG689]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG690]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG687]], align 8
+// CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG691]])
+// CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG692]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG693]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG696:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> [[REG694]], <8 x i16> [[REG695]])
+// CHECK-NEXT: store <4 x i32> [[REG696]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> [[REG697]], <8 x i16> [[REG698]])
+// CHECK-NEXT: store <4 x i32> [[REG699]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> [[REG700]], <4 x i32> [[REG701]], <16 x i8> [[REG702]])
+// CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG703]] to <8 x i16>
+// CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG705]] to <2 x i64>
+// CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG706]], i32 0
+// CHECK-NEXT: ret i64 [[REG707]]
+
+// CHECK: define available_externally i64 @_m_pmulhuw(i64 [[REG708:[0-9a-zA-Z_%.]+]], i64 [[REG709:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG708]], i64* [[REG710:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG709]], i64* [[REG711:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: [[REG712:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG710]], align 8
+// CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG711]], align 8
+// CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pu16(i64 [[REG712]], i64 [[REG713]])
+// CHECK-NEXT: ret i64 [[REG714]]
+
+void __attribute__((noinline))
+test_prefetch() {
+  _mm_prefetch(ms, i);
+}
+
+// CHECK-LABEL: @test_prefetch
+
+// CHECK: define available_externally void @_mm_prefetch
+// CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
+// CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void @llvm.prefetch(i8* [[REG715]], i32 0, i32 3, i32 1)
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_rcp() {
+  res = _mm_rcp_ps(m1);
+  res = _mm_rcp_ss(m1);
+}
+
+// CHECK-LABEL: @test_rcp
+
+// CHECK: define available_externally <4 x float> @_mm_rcp_ps
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_re(float vector[4])(<4 x float> [[REG716]])
+// CHECK-NEXT: ret <4 x float> [[REG717]]
+
+// CHECK: define available_externally <4 x float> @_mm_rcp_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG718]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG719]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG720:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_rcp_ps(<4 x float> [[REG720]])
+// CHECK-NEXT: store <4 x float> [[REG721]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG722]], <4 x float> [[REG723]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG724]]
+
+void __attribute__((noinline))
+test_rsqrt() {
+  res = _mm_rsqrt_ps(m1);
+  res = _mm_rsqrt_ss(m1);
+}
+
+// CHECK-LABEL: @test_rsqrt
+
+// CHECK: define available_externally <4 x float> @_mm_rsqrt_ps
+// CHECK: [[REG725:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: ret <4 x float> [[REG725]]
+
+// CHECK: define available_externally <4 x float> @_mm_rsqrt_ss
+// CHECK: [[REG726:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG726]], <4 x float>* [[REG727:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG727]], align 16
+// CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> [[REG728]])
+// CHECK-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG730:[0-9a-zA_Z_%.]+]], align 16
+// CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG730]], align 16
+// CHECK-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG731]], <4 x float> [[REG732]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG733]]
+
+void __attribute__((noinline))
+test_sad() {
+  res64 = _mm_sad_pu8(ms[0], ms[1]);
+  res64 = _m_psadbw(ms[0], ms[1]);
+}
+
+// CHECK-LABEL: @test_sad
+
+// CHECK: define available_externally i64 @_mm_sad_pu8(i64 [[REG734:[0-9a-zA-Z_%.]+]], i64 [[REG735:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG734]], i64* [[REG736:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i64 [[REG735]], i64* [[REG737:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i8*
+// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[REG738]], i8 0, i64 8, i1 false)
+// CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG736]], align 8
+// CHECK-NEXT: [[REG740:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG739]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG740]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG742]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG737]], align 8
+// CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG743]], i32 1
+// CHECK-NEXT: store <2 x i64> [[REG744]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG745]] to <16 x i8>
+// CHECK-NEXT: store <16 x i8> [[REG746]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG747]], <16 x i8> [[REG748]])
+// CHECK-NEXT: store <16 x i8> [[REG749]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG750]], <16 x i8> [[REG751]])
+// CHECK-NEXT: store <16 x i8> [[REG752]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG754:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG753]], <16 x i8> [[REG754]])
+// CHECK-NEXT: store <16 x i8> [[REG755]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> [[REG756]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sums(<4 x i32> [[REG758]], <4 x i32> zeroinitializer)
+// CHECK-NEXT: store <4 x i32> [[REG759]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG760]], i32 3
+// CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG761]] to i16
+// CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG763]], i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG762]], i16* [[REG764]], align 8
+// CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
+// CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG765]], align 8
+// CHECK-NEXT: ret i64 [[REG766]]
+
+// CHECK: define available_externally i64 @_m_psadbw
+// CHECK: [[REG767:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sad_pu8
+// CHECK-NEXT: ret i64 [[REG767]]
+
+void __attribute__((noinline))
+test_set() {
+  res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]);
+  res = _mm_set_ps1(fs[0]);
+  res = _mm_set_ss(fs[0]);
+  res = _mm_set1_ps(fs[0]);
+  res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]);
+}
+
+// CHECK-LABEL: @test_set
+
+// CHECK: define available_externally <4 x float> @_mm_set_ps(float [[REG768:[0-9a-zA-Z_%.]+]], float [[REG769:[0-9a-zA-Z_%.]+]], float [[REG770:[0-9a-zA-Z_%.]+]], float [[REG771:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG768]], float* [[REG772:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG769]], float* [[REG773:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG770]], float* [[REG774:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG771]], float* [[REG775:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load float, float* [[REG775]], align 4
+// CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG776]], i32 0
+// CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load float, float* [[REG774]], align 4
+// CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG777]], float [[REG778]], i32 1
+// CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load float, float* [[REG773]], align 4
+// CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG779]], float [[REG780]], i32 2
+// CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load float, float* [[REG772]], align 4
+// CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG781]], float [[REG782]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG783]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG784:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG784]]
+
+// CHECK: define available_externally <4 x float> @_mm_set_ps1(float [[REG785:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG785]], float* [[REG786:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = load float, float* [[REG786]], align 4
+// CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps(float [[REG787]])
+// CHECK-NEXT: ret <4 x float> [[REG788]]
+
+// CHECK: define available_externally <4 x float> @_mm_set_ss(float [[REG789:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG789:[0-9a-zA-Z_%.]+]], float* [[REG790:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = load float, float* [[REG790]], align 4
+// CHECK-NEXT: [[REG792:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG791]], i32 0
+// CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG792]], float 0.000000e+00, i32 1
+// CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG793]], float 0.000000e+00, i32 2
+// CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG794]], float 0.000000e+00, i32 3
+// CHECK-NEXT: store <4 x float> [[REG795]], <4 x float>* [[REG796:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG796]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG797]]
+
+// CHECK: define available_externally <4 x float> @_mm_set1_ps(float [[REG798:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG798]], float* [[REG799:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG800]], i32 0
+// CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG801]], float [[REG802]], i32 1
+// CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG803]], float [[REG804]], i32 2
+// CHECK-NEXT: [[REG806:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
+// CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG805]], float [[REG806]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG807]], <4 x float>* [[REG808:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG808]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG809]]
+
+// CHECK: define available_externally <4 x float> @_mm_setr_ps(float [[REG810:[0-9a-zA-Z_%.]+]], float [[REG811:[0-9a-zA-Z_%.]+]], float [[REG812:[0-9a-zA-Z_%.]+]], float [[REG813:[0-9a-zA-Z_%.]+]])
+// CHECK: store float [[REG810]], float* [[REG814:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG811]], float* [[REG815:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG812]], float* [[REG816:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: store float [[REG813]], float* [[REG817:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = load float, float* [[REG814]], align 4
+// CHECK-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG818]], i32 0
+// CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load float, float* [[REG815]], align 4
+// CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG819]], float [[REG820]], i32 1
+// CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load float, float* [[REG816]], align 4
+// CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG821]], float [[REG822]], i32 2
+// CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load float, float* [[REG817]], align 4
+// CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG823]], float [[REG824]], i32 3
+// CHECK-NEXT: store <4 x float> [[REG825]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: ret <4 x float> [[REG826]]
+
+void __attribute__((noinline))
+test_setzero() {
+  res = _mm_setzero_ps();
+}
+
+// CHECK-LABEL: @test_setzero
+
+// CHECK: define available_externally <4 x float> @_mm_setzero_ps
+// CHECK: store <4 x float> zeroinitializer, <4 x float>* [[REG827:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG827]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG828]]
+
+void __attribute__((noinline))
+test_sfence() {
+  _mm_sfence();
+}
+
+// CHECK-LABEL: @test_sfence
+
+// CHECK: define available_externally void @_mm_sfence
+// CHECK: fence release
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_shuffle() {
+  res64 = _mm_shuffle_pi16(ms[0], i);
+  res = _mm_shuffle_ps(m1, m2, i);
+  res64 = _m_pshufw(ms[0], i);
+}
+
+// CHECK-LABEL: @test_shuffle
+
+// CHECK: define available_externally i64 @_mm_shuffle_pi16(i64 [[REG829:[0-9a-zA-Z_%.]+]], i32 signext [[REG830:[0-9a-zA-Z_%.]+]])
+// CHECK: store i64 [[REG829]], i64* [[REG831:[0-9a-zA-Z_%.]+]], align 8
+// CHECK-NEXT: store i32 [[REG830]], i32* [[REG832:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = and i32 [[REG833]], 3
+// CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = sext i32 [[REG834]] to i64
+// CHECK-NEXT: store i64 [[REG835]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG836]], 2
+// CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = and i32 [[REG837]], 3
+// CHECK-NEXT: [[REG839:[0-9a-zA-Z_%.]+]] = sext i32 [[REG838]] to i64
+// CHECK-NEXT: store i64 [[REG839]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG840:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG840]], 4
+// CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = and i32 [[REG841]], 3
+// CHECK-NEXT: [[REG843:[0-9a-zA-Z_%.]+]] = sext i32 [[REG842]] to i64
+// CHECK-NEXT: store i64 [[REG843]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
+// CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG844]], 6
+// CHECK-NEXT: [[REG846:[0-9a-zA-Z_%.]+]] = and i32 [[REG845]], 3
+// CHECK-NEXT: [[REG847:[0-9a-zA-Z_%.]+]] = sext i32 [[REG846]] to i64
+// CHECK-NEXT: store i64 [[REG847]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG848]]
+// CHECK-NEXT: [[REG850:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG849]], align 2
+// CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 0
+// CHECK-BE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 3
+// CHECK-NEXT: store i16 [[REG850]], i16* [[REG852]]
+// CHECK-NEXT: [[REG853:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG854:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG853]]
+// CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG854]], align 2
+// CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 1
+// CHECK-BE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 2
+// CHECK-NEXT: store i16 [[REG855]], i16* [[REG857]]
+// CHECK-NEXT: [[REG858:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG858]]
+// CHECK-NEXT: [[REG860:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG859]], align 2
+// CHECK-NEXT: [[REG861:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 2
+// CHECK-BE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 1
+// CHECK-NEXT: store i16 [[REG860]], i16* [[REG862]]
+// CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG863]]
+// CHECK-NEXT: [[REG865:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG864]], align 2
+// CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
+// CHECK-LE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 3
+// CHECK-BE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 0
+// CHECK-NEXT: store i16 [[REG865]], i16* [[REG867]]
+// CHECK-NEXT: [[REG868:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
+// CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG868]], align 8
+// CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG869]])
+// CHECK-NEXT: store <2 x i64> [[REG870]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG871:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG831]], align 8
+// CHECK-NEXT: [[REG872:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG871]])
+// CHECK-NEXT: store <2 x i64> [[REG872]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG873:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG874:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG875]] to <16 x i8>
+// CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])(<2 x i64> [[REG873]], <2 x i64> [[REG874]], <16 x i8> [[REG876]])
+// CHECK-NEXT: store <2 x i64> [[REG877]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG878:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG879:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG878]], i32 0
+// CHECK-NEXT: ret i64 [[REG879]]
+
+// CHECK: define available_externally <4 x float> @_mm_shuffle_ps(<4 x float> [[REG880:[0-9a-zA-Z_%.]+]], <4 x float> [[REG881:[0-9a-zA-Z_%.]+]], i32 signext [[REG882:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG880]], <4 x float>* [[REG883:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG881]], <4 x float>* [[REG884:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store i32 [[REG882]], i32* [[REG885:[0-9a-zA-Z_%.]+]], align 4
+// CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = and i32 [[REG886]], 3
+// CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = sext i32 [[REG887]] to i64
+// CHECK-NEXT: store i64 [[REG888]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG890:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG889]], 2
+// CHECK-NEXT: [[REG891:[0-9a-zA-Z_%.]+]] = and i32 [[REG890]], 3
+// CHECK-NEXT: [[REG892:[0-9a-zA-Z_%.]+]] = sext i32 [[REG891]] to i64
+// CHECK-NEXT: store i64 [[REG892]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG893:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG893]], 4
+// CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = and i32 [[REG894]], 3
+// CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = sext i32 [[REG895]] to i64
+// CHECK-NEXT: store i64 [[REG896]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
+// CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG897]], 6
+// CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = and i32 [[REG898]], 3
+// CHECK-NEXT: [[REG900:[0-9a-zA-Z_%.]+]] = sext i32 [[REG899]] to i64
+// CHECK-NEXT: store i64 [[REG900]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG901:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG902:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG901]]
+// CHECK-NEXT: [[REG903:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG902]], align 4
+// CHECK-NEXT: [[REG904:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG905:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG904]], i32 [[REG903]], i32 0
+// CHECK-NEXT: store <4 x i32> [[REG905]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG906]]
+// CHECK-NEXT: [[REG908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG907]], align 4
+// CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG909]], i32 [[REG908]], i32 1
+// CHECK-NEXT: store <4 x i32> [[REG910]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG911]]
+// CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG912]], align 4
+// CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = add i32 [[REG913]], 269488144
+// CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG915]], i32 [[REG914]], i32 2
+// CHECK-NEXT: store <4 x i32> [[REG916]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG917:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG918:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG917]]
+// CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG918]], align 4
+// CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = add i32 [[REG919]], 269488144
+// CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG921]], i32 [[REG920]], i32 3
+// CHECK-NEXT: store <4 x i32> [[REG922]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG923:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG924:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG925]] to <16 x i8>
+// CHECK-NEXT: [[REG927:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG923]], <4 x float> [[REG924]], <16 x i8> [[REG926]])
+// CHECK-NEXT: ret <4 x float> [[REG927]]
+
+// CHECK: define available_externally i64 @_m_pshufw
+// CHECK: [[REG928:[0-9a-zA-Z_%.]+]] = call i64 @_mm_shuffle_pi16
+// CHECK-NEXT: ret i64 [[REG928]]
+
+void __attribute__((noinline))
+test_sqrt() {
+  res = _mm_sqrt_ps(m1);
+  res = _mm_sqrt_ss(m1);
+}
+
+// CHECK-LABEL: @test_sqrt
+
+// CHECK: define available_externally <4 x float> @_mm_sqrt_ps
+// CHECK: [[REG929:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
+// CHECK-NEXT: ret <4 x float> [[REG929]]
+
+// CHECK: define available_externally <4 x float> @_mm_sqrt_ss
+// CHECK: [[REG930:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG930]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG931]], <4 x float>* [[REG932:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG933:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG932]], align 16
+// CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> [[REG933]])
+// CHECK-NEXT: store <4 x float> [[REG934]], <4 x float>* [[REG935:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG936:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG937:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG935]], align 16
+// CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG936]], <4 x float> [[REG937]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG938]]
+
+void __attribute__((noinline))
+test_store() {
+  _mm_store_ps(fs, m1);
+  _mm_store_ps1(fs, m1);
+  _mm_store_ss(fs, m1);
+  _mm_store1_ps(fs, m1);
+  _mm_storeh_pi(ms, m1);
+  _mm_storel_pi(ms, m1);
+  _mm_storer_ps(fs, m1);
+}
+
+// CHECK-LABEL: @test_store
+
+// CHECK: define available_externally void @_mm_store_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG941:[0-9a-zA-Z_%.]+]] = bitcast float* [[REG940]] to <4 x float>*
+// CHECK-NEXT: call void @vec_st(float vector[4], int, float vector[4]*)(<4 x float> [[REG939]], i32 signext 0, <4 x float>* [[REG941]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store_ps1
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG942:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG943:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store1_ps(float* [[REG942]], <4 x float> [[REG943]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store_ss
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG944:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG944]], i32 0
+// CHECK-NEXT: [[REG946:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store float [[REG945]], float* [[REG946]], align 4
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_store1_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG947]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG948]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG949]], <4 x float> [[REG950]])
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storeh_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG952:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG951]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG952]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG953:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG953]], i32 1
+// CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG954]], i64* [[REG955]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storel_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG956]] to <2 x i64>
+// CHECK-NEXT: store <2 x i64> [[REG957]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG958:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG959:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG958]], i32 0
+// CHECK-NEXT: [[REG960:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG959]], i64* [[REG960]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_storer_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG961:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG963:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG961]], <4 x float> [[REG962]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
+// CHECK-NEXT: store <4 x float> [[REG963]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG964]], <4 x float> [[REG965]])
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_stream() {
+  _mm_stream_pi(&res64, ms[0]);
+  _mm_stream_ps(&fs[0], m1);
+}
+
+// CHECK-LABEL: @test_stream
+
+/// CHECK: define available_externally void @_mm_stream_pi
+// CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG966]])
+// CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store i64 [[REG967]], i64* [[REG968]], align 8
+// CHECK-NEXT: ret void
+
+// CHECK: define available_externally void @_mm_stream_ps
+// CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG969:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* [[REG969]])
+// CHECK-NEXT: [[REG970:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
+// CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: call void @_mm_store_ps(float* [[REG970]], <4 x float> [[REG971]])
+// CHECK-NEXT: ret void
+
+void __attribute__((noinline))
+test_sub() {
+  res = _mm_sub_ps(m1, m2);
+  res = _mm_sub_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_sub
+
+// CHECK: define available_externally <4 x float> @_mm_sub_ps(<4 x float> [[REG972:[0-9a-zA-Z_%.]+]], <4 x float> [[REG973:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG972]], <4 x float>* [[REG974:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG973]], <4 x float>* [[REG975:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG974]], align 16
+// CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG975]], align 16
+// CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG976]], [[REG977]]
+// CHECK-NEXT: ret <4 x float> [[REG978]]
+
+// CHECK: define available_externally <4 x float> @_mm_sub_ss(<4 x float> [[REG979:[0-9a-zA-Z_%.]+]], <4 x float> [[REG980:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG979]], <4 x float>* [[REG981:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG980]], <4 x float>* [[REG982:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
+// CHECK-NEXT: [[REG984:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG983]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG984]], <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG982]], align 16
+// CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG986]], i32 zeroext 0)
+// CHECK-NEXT: store <4 x float> [[REG987]], <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG990:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG991:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG989]], [[REG990]]
+// CHECK-NEXT: store <4 x float> [[REG991]], <4 x float>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG993:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
+// CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG992]], align 16
+// CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG993]], <4 x float> [[REG994]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
+// CHECK-NEXT: ret <4 x float> [[REG995]]
+
+void __attribute__((noinline))
+test_transpose() {
+  __m128 m3, m4;
+  _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
+}
+
+// CHECK-LABEL: @test_transpose
+
+// CHECK: br label %[[REG996:[0-9a-zA-Z_%.]+]]
+// CHECK: [[REG996]]:
+// CHECK: [[REG997:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: [[REG998:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
+// CHECK: [[REG999:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: [[REG1000:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
+// CHECK: [[REG1001:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: [[REG1002:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+// CHECK: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
+// CHECK: [[REG1004:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
+// CHECK: ret void
+
+void __attribute__((noinline))
+test_ucomi() {
+  i = _mm_ucomieq_ss(m1, m2);
+  i = _mm_ucomige_ss(m1, m2);
+  i = _mm_ucomigt_ss(m1, m2);
+  i = _mm_ucomile_ss(m1, m2);
+  i = _mm_ucomilt_ss(m1, m2);
+  i = _mm_ucomineq_ss(m1, m2);
+}
+
+// CHECK-LABEL: @test_ucomi
+
+// CHECK: define available_externally signext i32 @_mm_ucomieq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1005:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1006:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1005]], i32 0
+// CHECK-NEXT: [[REG1007:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1007]], i32 0
+// CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG1006]], [[REG1008]]
+// CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1009]] to i32
+// CHECK-NEXT: ret i32 [[REG1010]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomige_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1011:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1012:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1011]], i32 0
+// CHECK-NEXT: [[REG1013:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1014:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1013]], i32 0
+// CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG1012]], [[REG1014]]
+// CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1015]] to i32
+// CHECK-NEXT: ret i32 [[REG1016]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomigt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1018:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1017]], i32 0
+// CHECK-NEXT: [[REG1019:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1020:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1019]], i32 0
+// CHECK-NEXT: [[REG1021:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG1018]], [[REG1020]]
+// CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1021]] to i32
+// CHECK-NEXT: ret i32 [[REG1022]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomile_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1023]], i32 0
+// CHECK-NEXT: [[REG1025:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1026:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1025]], i32 0
+// CHECK-NEXT: [[REG1027:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG1024]], [[REG1026]]
+// CHECK-NEXT: [[REG1028:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1027]] to i32
+// CHECK-NEXT: ret i32 [[REG1028]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomilt_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1029]], i32 0
+// CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1032:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1031]], i32 0
+// CHECK-NEXT: [[REG1033:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG1030]], [[REG1032]]
+// CHECK-NEXT: [[REG1034:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1033]] to i32
+// CHECK-NEXT: ret i32 [[REG1034]]
+
+// CHECK: define available_externally signext i32 @_mm_ucomineq_ss
+// CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1035:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1035]], i32 0
+// CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
+// CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1037]], i32 0
+// CHECK-NEXT: [[REG1039:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG1036]], [[REG1038]]
+// CHECK-NEXT: [[REG1040:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1039]] to i32
+// CHECK-NEXT: ret i32 [[REG1040]]
+
+void __attribute__((noinline))
+test_undefined() {
+  res = _mm_undefined_ps();
+}
+
+// CHECK-LABEL: @test_undefined
+
+// CHECK: define available_externally <4 x float> @_mm_undefined_ps
+// CHECK: [[REG1041:[0-9a-zA-Z_%.]+]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[REG1042:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1042]], <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
+// CHECK-NEXT: ret <4 x float> [[REG1043]]
+
+void __attribute__((noinline))
+test_unpack() {
+  res = _mm_unpackhi_ps(m1, m2);
+  res = _mm_unpacklo_ps(m1, m2);
+}
+
+// CHECK-LABEL: @test_unpack
+
+// CHECK: define available_externally <4 x float> @_mm_unpackhi_ps(<4 x float> [[REG1044:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1045:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1044]], <4 x float>* [[REG1046:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1045]], <4 x float>* [[REG1047:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG1048:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1046]], align 16
+// CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1047]], align 16
+// CHECK-NEXT: [[REG1050:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])(<4 x float> [[REG1048]], <4 x float> [[REG1049]])
+// CHECK-NEXT: ret <4 x float> [[REG1050]]
+
+// CHECK: define available_externally <4 x float> @_mm_unpacklo_ps(<4 x float> [[REG1051:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1052:[0-9a-zA-Z_%.]+]])
+// CHECK: store <4 x float> [[REG1051]], <4 x float>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: store <4 x float> [[REG1052]], <4 x float>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
+// CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1053]], align 16
+// CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1054]], align 16
+// CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> [[REG1055]], <4 x float> [[REG1056]])
+// CHECK-NEXT: ret <4 x float> [[REG1057]]
diff --git a/clang/test/Headers/ppc-intrinsics.c b/clang/test/Headers/ppc-intrinsics.c
deleted file mode 100644
index 622ce90c76258..0000000000000
--- a/clang/test/Headers/ppc-intrinsics.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// REQUIRES: powerpc-registered-target
-
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -o - | FileCheck %s
-// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -target powerpc64-gnu-linux %s -Xclang -verify -x c++ -o - | FileCheck %s
-// expected-no-diagnostics
-
-// RUN: not %clang -S -emit-llvm -target powerpc64-gnu-linux %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
-
-#include <mmintrin.h>
-// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
-
-// CHECK: target triple = "powerpc64-
-// CHECK: !llvm.module.flags =
diff --git a/clang/test/Headers/ppc-mmx-intrinsics.c b/clang/test/Headers/ppc-mmx-intrinsics.c
new file mode 100644
index 0000000000000..406694d1ad9a2
--- /dev/null
+++ b/clang/test/Headers/ppc-mmx-intrinsics.c
@@ -0,0 +1,11 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify
+// RUN: %clang -S -emit-llvm -DNO_WARN_X86_INTRINSICS -mcpu=pwr7 -target powerpc64-unknown-linux-gnu %s -Xclang -verify -x c++
+// expected-no-diagnostics
+
+// RUN: not %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr7 %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
+
+#include <mmintrin.h>
+// CHECK-ERROR: mmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
diff --git a/clang/test/Headers/ppc-sse-intrinsics.c b/clang/test/Headers/ppc-sse-intrinsics.c
new file mode 100644
index 0000000000000..91906f0b08c81
--- /dev/null
+++ b/clang/test/Headers/ppc-sse-intrinsics.c
@@ -0,0 +1,22 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: powerpc-registered-target
+
+// Since mm_malloc.h references system native stdlib.h, doing cross-compile
+// testing may cause unexpected problems. This would affect xmmintrin.h and
+// other following intrinsics headers. If there's need to test them using
+// cross-compile, please add -ffreestanding to compiler options, like
+// test/CodeGen/ppc-xmmintrin.c.
+
+// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify
+// RUN: %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm -DNO_WARN_X86_INTRINSICS %s -mcpu=pwr7 -Xclang -verify -x c++
+// expected-no-diagnostics
+
+// RUN: not %clang -target powerpc64-unknown-linux-gnu -S -emit-llvm %s -mcpu=pwr7 -o /dev/null 2>&1 | FileCheck %s -check-prefix=SSE-ERROR
+
+// Don't include mm_malloc.h, it's system specific.
+#define _MM_MALLOC_H_INCLUDED
+
+// Altivec must be enabled.
+#include <xmmintrin.h>
+
+// SSE-ERROR: xmmintrin.h:{{[0-9]+}}:{{[0-9]+}}: error: "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."

From 2a901401fe453ca5b17048f7e6d74d9c8c91cbf9 Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Fri, 31 May 2019 04:49:58 +0000
Subject: [PATCH 0712/1176] [MIR-Canon] Hardening propagateLocalCopies.

This is am almost NFC, it does the following:
- If there is no register class for a COPY's src or dst, bail.
- Fixes uses iterator invalidation bug.

Differential Revision: https://reviews.llvm.org/D62713

llvm-svn: 362191
---
 llvm/lib/CodeGen/MIRCanonicalizerPass.cpp        | 16 ++++++++++++----
 llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir |  9 +++++++--
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 650240e60fef9..d36c0c89ba0b1 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -343,15 +343,23 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
       continue;
     if (!TargetRegisterInfo::isVirtualRegister(Src))
       continue;
+    // Not folding COPY instructions if regbankselect has not set the RCs.
+    // Why are we only considering Register Classes? Because the verifier
+    // sometimes gets upset if the register classes don't match even if the
+    // types do. A future patch might add COPY folding for matching types in
+    // pre-registerbankselect code.
+    if (!MRI.getRegClassOrNull(Dst))
+      continue;
     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
       continue;
 
-    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
-      MachineOperand *MO = &*UI;
+    std::vector<MachineOperand *> Uses;
+    for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
+      Uses.push_back(&*UI);
+    for (auto *MO : Uses)
       MO->setReg(Src);
-      Changed = true;
-    }
 
+    Changed = true;
     MI->eraseFromParent();
   }
 
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
index 65ad2ff41a7e2..005014d5e83f4 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir
@@ -1,8 +1,13 @@
 # RUN: llc -o -  -march=amdgcn  -run-pass mir-canonicalizer  -x mir %s | FileCheck %s
 
+# CHECK: %namedVReg4354:vgpr_32 = COPY $vgpr0
 # CHECK: %namedVReg1352:vgpr_32 = COPY %namedVReg4353
-# CHECK: %namedVReg1359:vgpr_32 = COPY %namedVReg1362
-# CHECK: %namedVReg1360:vgpr_32 = COPY %namedVReg1363
+# CHECK-NEXT: %namedVReg1358:vgpr_32 = COPY %namedVReg1361
+# CHECK-NEXT: %namedVReg1359:vgpr_32 = COPY %namedVReg1362
+# CHECK-NEXT: %namedVReg1353:vreg_64 = REG_SEQUENCE %namedVReg4354, %subreg.sub0, %namedVReg1352, %subreg.sub1
+# CHECK-NEXT: %namedVReg1354:sgpr_128 = REG_SEQUENCE %namedVReg4354, %subreg.sub0, %namedVReg1352, %subreg.sub1, %namedVReg1358, %subreg.sub2, %namedVReg1359, %subreg.sub3
+# This tests for the itereator invalidation fix (reviews.llvm.org/D62713)
+# CHECK-NEXT: BUFFER_STORE_DWORD_ADDR64 %namedVReg1352, %namedVReg1353, %namedVReg1354, 0, 0, 0, 0, 0, 0, implicit $exec
 ...
 ---
 name: foo

From c9e27be585582bf3ae8d5831a669c640f0d6aaef Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Fri, 31 May 2019 05:06:54 +0000
Subject: [PATCH 0713/1176] Fix off-by-one error.

The created string is one char too large, so it pulls the terminating NULL as
the last character of the string.  This later causes SocketTest.cpp to fail.

llvm-svn: 362192
---
 lldb/source/Host/posix/DomainSocket.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Host/posix/DomainSocket.cpp b/lldb/source/Host/posix/DomainSocket.cpp
index f4b5f428742bb..2710213efde2f 100644
--- a/lldb/source/Host/posix/DomainSocket.cpp
+++ b/lldb/source/Host/posix/DomainSocket.cpp
@@ -136,7 +136,7 @@ std::string DomainSocket::GetSocketName() const {
       return std::string(saddr_un.sun_path + GetNameOffset(),
                          sock_addr_len -
                              offsetof(struct sockaddr_un, sun_path) -
-                             GetNameOffset());
+                             GetNameOffset() - 1);
   }
   return "";
 }

From 23066033a1b70b5981ea86207cd87e9106b31771 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 05:20:27 +0000
Subject: [PATCH 0714/1176] [X86] Correct the ins operand order for
 MASKPAIR16STORE to match other store instructions.

This makes the 5 address operands come first. And the data operand comes last.

This matches the operand order the instruction is created with. It's also the
expected order in X86MCInstLower. So everything appeared to work, but the
operands didn't match their declared type.

Fixes a -verify-machineinstrs failure.

Also remove the isel patterns from these instructions since they should only
be used for stack spills and reloads. I'm not even sure what types the patterns
were looking for to match.

llvm-svn: 362193
---
 llvm/lib/Target/X86/X86InstrAVX512.td                | 8 ++++----
 llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 20380bb84481f..d93059b44f32d 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12561,10 +12561,10 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
 }
 
 let hasSideEffects = 0 in {
-  def MASKPAIR16STORE : PseudoI<(outs), (ins VK16PAIR:$src, anymem:$dst),
-             [(store VK16PAIR:$src, addr:$dst)]>;
-  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src),
-             [(set VK16PAIR:$dst, (load addr:$src))]>;
+  let mayStore = 1 in
+  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
+  let mayLoad = 1 in
+  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
index a38d95b6f9472..afd213b53a02a 100644
--- a/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
+++ b/llvm/test/CodeGen/X86/vp2intersect_multiple_pairs.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vp2intersect -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
 
 ; Test with more than four live mask pairs
 

From 30935ef0bcdc7703eb2199e3fc5095198497e889 Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Fri, 31 May 2019 05:55:07 +0000
Subject: [PATCH 0715/1176] Fix problem with r362192

The string returned only sometimes ends in NULL.  Explicitly check for the NULL
and pop off the NULL if it is there.

llvm-svn: 362194
---
 lldb/source/Host/posix/DomainSocket.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Host/posix/DomainSocket.cpp b/lldb/source/Host/posix/DomainSocket.cpp
index 2710213efde2f..27872f48129c0 100644
--- a/lldb/source/Host/posix/DomainSocket.cpp
+++ b/lldb/source/Host/posix/DomainSocket.cpp
@@ -132,11 +132,14 @@ std::string DomainSocket::GetSocketName() const {
     saddr_un.sun_family = AF_UNIX;
     socklen_t sock_addr_len = sizeof(struct sockaddr_un);
     if (::getpeername(m_socket, (struct sockaddr *)&saddr_un, &sock_addr_len) ==
-        0)
-      return std::string(saddr_un.sun_path + GetNameOffset(),
-                         sock_addr_len -
-                             offsetof(struct sockaddr_un, sun_path) -
-                             GetNameOffset() - 1);
+        0) {
+      std::string name(saddr_un.sun_path + GetNameOffset(),
+                       sock_addr_len -
+                           offsetof(struct sockaddr_un, sun_path) -
+                           GetNameOffset());
+      if (name.back() == '\0') name.pop_back();
+      return name;
+    }
   }
   return "";
 }

From 0d63cef180ccc6f5afab824cc784949acb137713 Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Fri, 31 May 2019 06:02:38 +0000
Subject: [PATCH 0716/1176] [MIR-Canon] Skip the first N vreg names lazily.

This consolidates the vreg skip code into one function (SkipVRegs()).
SkipVRegs() now knows if it should skip as if it is the first initialization or
subsequent skips.

The first skip is also done the first time createVirtualRegister is called by
the cursor instead of by the cursor's constructor. This prevents verifier
errors on machine functions that have no vregs (where the verifier will
complain that there are vregs when the function uses none).

Differential Revision: https://reviews.llvm.org/D62717

llvm-svn: 362195
---
 llvm/lib/CodeGen/MIRCanonicalizerPass.cpp        | 16 +++++++---------
 .../MIR/AArch64/multiple-lhs-operands.mir        |  3 +++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index d36c0c89ba0b1..a4097232d7d59 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -483,18 +483,14 @@ class NamedVRegCursor {
   unsigned virtualVRegNumber;
 
 public:
-  NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) {
-    unsigned VRegGapIndex = 0;
-    const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
-    unsigned I = MRI.createIncompleteVirtualRegister();
-    const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
-    virtualVRegNumber = E;
-  }
+  NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
 
   void SkipVRegs() {
     unsigned VRegGapIndex = 1;
+    if (!virtualVRegNumber) {
+      VRegGapIndex = 0;
+      virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+    }
     const unsigned VR_GAP = (++VRegGapIndex * 1000);
 
     unsigned I = virtualVRegNumber;
@@ -511,6 +507,8 @@ class NamedVRegCursor {
   }
 
   unsigned createVirtualRegister(unsigned VReg) {
+    if (!virtualVRegNumber)
+      SkipVRegs();
     std::string S;
     raw_string_ostream OS(S);
     OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
diff --git a/llvm/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir b/llvm/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
index 5f4bd9897d8b7..1437e975596d8 100644
--- a/llvm/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
@@ -2,6 +2,9 @@
 # This test ensures that the MIR parser can parse multiple register machine
 # operands before '='.
 
+# This tests that a MIR file with no vregs does not get altered by mir-canon.
+# RUN: llc -mtriple=aarch64 -o - -run-pass mir-canonicalizer -verify-machineinstrs %s
+
 --- |
 
   declare void @foo()

From cc3629d545a846fb3680f0e5b4691f3c829fd2dc Mon Sep 17 00:00:00 2001
From: Pengfei Wang <pengfei.wang@intel.com>
Date: Fri, 31 May 2019 06:09:35 +0000
Subject: [PATCH 0717/1176] [X86] Add VP2INTERSECT instructions

Support intel AVX512 VP2INTERSECT instructions in clang

Patch by Xiang Zhang (xiangzhangllvm)

Differential Revision: https://reviews.llvm.org/D62367

llvm-svn: 362196
---
 clang/docs/ClangCommandLineReference.rst      |   2 +
 clang/include/clang/Basic/BuiltinsX86.def     |   6 +
 clang/include/clang/Driver/Options.td         |   2 +
 clang/lib/Basic/Targets/X86.cpp               |   8 +-
 clang/lib/Basic/Targets/X86.h                 |   1 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  42 ++++++
 clang/lib/Headers/CMakeLists.txt              |   2 +
 .../lib/Headers/avx512vlvp2intersectintrin.h  | 121 ++++++++++++++++++
 clang/lib/Headers/avx512vp2intersectintrin.h  |  77 +++++++++++
 clang/lib/Headers/immintrin.h                 |  10 ++
 clang/test/CodeGen/attr-target-x86.c          |   4 +-
 .../test/CodeGen/intel-avx512vlvp2intersect.c |  36 ++++++
 clang/test/CodeGen/intel-avx512vp2intersect.c |  20 +++
 clang/test/Driver/x86-target-features.c       |   5 +
 clang/test/Preprocessor/x86_target_features.c |  10 ++
 15 files changed, 343 insertions(+), 3 deletions(-)
 create mode 100644 clang/lib/Headers/avx512vlvp2intersectintrin.h
 create mode 100644 clang/lib/Headers/avx512vp2intersectintrin.h
 create mode 100644 clang/test/CodeGen/intel-avx512vlvp2intersect.c
 create mode 100644 clang/test/CodeGen/intel-avx512vp2intersect.c

diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index ddba3b10b7899..ee58c2d30be53 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -2639,6 +2639,8 @@ X86
 
 .. option:: -mavx512vnni, -mno-avx512vnni
 
+.. option:: -mavx512vp2intersect, -mno-avx512vp2intersect
+
 .. option:: -mavx512vpopcntdq, -mno-avx512vpopcntdq
 
 .. option:: -mbmi, -mno-bmi
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7db66c7b140dc..47f79b90fc21a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1840,6 +1840,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtneps2bf16_512_mask, "V16sV16fV16sUs", "ncV:512:
 TARGET_BUILTIN(__builtin_ia32_dpbf16ps_128, "V4fV4fV4iV4i", "ncV:128:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dpbf16ps_256, "V8fV8fV8iV8i", "ncV:256:", "avx512bf16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dpbf16ps_512, "V16fV16fV16iV16i", "ncV:512:", "avx512bf16")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_512, "vV8LLiV8LLiUc*Uc*", "nV:512:", "avx512vp2intersect")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_256, "vV4LLiV4LLiUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_q_128, "vV2LLiV2LLiUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_512, "vV16iV16iUs*Us*", "nV:512:", "avx512vp2intersect")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_256, "vV8iV8iUc*Uc*", "nV:256:", "avx512vp2intersect,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "avx512vp2intersect,avx512vl")
 
 // generic select intrinsics
 TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 68f415fb31d63..b86d39261e6ef 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2894,6 +2894,8 @@ def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>;
 def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>;
 def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
 def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
+def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group<m_x86_Features_Group>;
+def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group<m_x86_Features_Group>;
 def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
 def mno_adx : Flag<["-"], "mno-adx">, Group<m_x86_Features_Group>;
 def maes : Flag<["-"], "maes">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 7bef7ce9c66e2..02e6ed2db61d0 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -524,6 +524,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
     Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
     Features["avx512bitalg"] = Features["avx512vnni"] = false;
     Features["avx512vbmi2"] = Features["avx512bf16"] = false;
+    Features["avx512vp2intersect"] = false;
     break;
   }
 }
@@ -774,6 +775,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512VBMI2 = true;
     } else if (Feature == "+avx512ifma") {
       HasAVX512IFMA = true;
+    } else if (Feature == "+avx512vp2intersect") {
+      HasAVX512VP2INTERSECT = true;
     } else if (Feature == "+sha") {
       HasSHA = true;
     } else if (Feature == "+mpx") {
@@ -1166,7 +1169,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AVX512VBMI2__");
   if (HasAVX512IFMA)
     Builder.defineMacro("__AVX512IFMA__");
-
+  if (HasAVX512VP2INTERSECT)
+    Builder.defineMacro("__AVX512VP2INTERSECT__");
   if (HasSHA)
     Builder.defineMacro("__SHA__");
 
@@ -1322,6 +1326,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("avx512vbmi", true)
       .Case("avx512vbmi2", true)
       .Case("avx512ifma", true)
+      .Case("avx512vp2intersect", true)
       .Case("bmi", true)
       .Case("bmi2", true)
       .Case("cldemote", true)
@@ -1401,6 +1406,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512vbmi", HasAVX512VBMI)
       .Case("avx512vbmi2", HasAVX512VBMI2)
       .Case("avx512ifma", HasAVX512IFMA)
+      .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
       .Case("bmi", HasBMI)
       .Case("bmi2", HasBMI2)
       .Case("cldemote", HasCLDEMOTE)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index d7a87f8d7c829..d84425d53d806 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAVX512VBMI = false;
   bool HasAVX512VBMI2 = false;
   bool HasAVX512IFMA = false;
+  bool HasAVX512VP2INTERSECT = false;
   bool HasSHA = false;
   bool HasMPX = false;
   bool HasSHSTK = false;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bc798cab11e95..9fe2b315edc2c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11710,6 +11710,48 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
   }
 
+  case X86::BI__builtin_ia32_vp2intersect_q_512:
+  case X86::BI__builtin_ia32_vp2intersect_q_256:
+  case X86::BI__builtin_ia32_vp2intersect_q_128:
+  case X86::BI__builtin_ia32_vp2intersect_d_512:
+  case X86::BI__builtin_ia32_vp2intersect_d_256:
+  case X86::BI__builtin_ia32_vp2intersect_d_128: {
+    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+    Intrinsic::ID ID;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_vp2intersect_q_512:
+      ID = Intrinsic::x86_avx512_vp2intersect_q_512;
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_q_256:
+      ID = Intrinsic::x86_avx512_vp2intersect_q_256;
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_q_128:
+      ID = Intrinsic::x86_avx512_vp2intersect_q_128;
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_512:
+      ID = Intrinsic::x86_avx512_vp2intersect_d_512;
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_256:
+      ID = Intrinsic::x86_avx512_vp2intersect_d_256;
+      break;
+    case X86::BI__builtin_ia32_vp2intersect_d_128:
+      ID = Intrinsic::x86_avx512_vp2intersect_d_128;
+      break;
+    }
+
+    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
+    Value *Result = Builder.CreateExtractValue(Call, 0);
+    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
+    Value *Store = Builder.CreateDefaultAlignedStore(Result, Ops[2]);
+
+    Result = Builder.CreateExtractValue(Call, 1);
+    Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
+    Store = Builder.CreateDefaultAlignedStore(Result, Ops[3]);
+    return Store;
+  }
+
   case X86::BI__builtin_ia32_vpmultishiftqb128:
   case X86::BI__builtin_ia32_vpmultishiftqb256:
   case X86::BI__builtin_ia32_vpmultishiftqb512: {
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f7a3e5410ced5..dda76ed4e0dac 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -27,6 +27,8 @@ set(files
   avx512vlcdintrin.h
   avx512vldqintrin.h
   avx512vlintrin.h
+  avx512vp2intersectintrin.h
+  avx512vlvp2intersectintrin.h
   avx512vpopcntdqvlintrin.h
   avx512vnniintrin.h
   avx512vlvnniintrin.h
diff --git a/clang/lib/Headers/avx512vlvp2intersectintrin.h b/clang/lib/Headers/avx512vlvp2intersectintrin.h
new file mode 100644
index 0000000000000..3e0815e5d46ff
--- /dev/null
+++ b/clang/lib/Headers/avx512vlvp2intersectintrin.h
@@ -0,0 +1,121 @@
+/*===------ avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics ------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlvp2intersectintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLVP2INTERSECT_H
+#define _AVX512VLVP2INTERSECT_H
+
+#define __DEFAULT_FN_ATTRS128 \
+  __attribute__((__always_inline__, __nodebug__,  __target__("avx512vl,avx512vp2intersect"), \
+                 __min_vector_width__(128)))
+
+#define __DEFAULT_FN_ATTRS256 \
+  __attribute__((__always_inline__, __nodebug__,  __target__("avx512vl,avx512vp2intersect"), \
+                 __min_vector_width__(256)))
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between dwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
+///
+/// \param __a
+///    A 256-bit vector of [8 x i32].
+/// \param __b
+///    A 256-bit vector of [8 x i32]
+/// \param __m0
+///    A pointer point to 8-bit mask
+/// \param __m1
+///    A pointer point to 8-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS256
+_mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) {
+  __builtin_ia32_vp2intersect_d_256((__v8si)__a, (__v8si)__b, __m0, __m1);
+}
+
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between quadwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
+///
+/// \param __a
+///    A 256-bit vector of [4 x i64].
+/// \param __b
+///    A 256-bit vector of [4 x i64]
+/// \param __m0
+///    A pointer point to 8-bit mask
+/// \param __m1
+///    A pointer point to 8-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS256
+_mm256_2intersect_epi64(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) {
+  __builtin_ia32_vp2intersect_q_256((__v4di)__a, (__v4di)__b, __m0, __m1);
+}
+
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between dwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
+///
+/// \param __a
+///    A 128-bit vector of [4 x i32].
+/// \param __b
+///    A 128-bit vector of [4 x i32]
+/// \param __m0
+///    A pointer point to 8-bit mask
+/// \param __m1
+///    A pointer point to 8-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS128
+_mm_2intersect_epi32(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) {
+  __builtin_ia32_vp2intersect_d_128((__v4si)__a, (__v4si)__b, __m0, __m1);
+}
+
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between quadwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
+///
+/// \param __a
+///    A 128-bit vector of [2 x i64].
+/// \param __b
+///    A 128-bit vector of [2 x i64]
+/// \param __m0
+///    A pointer point to 8-bit mask
+/// \param __m1
+///    A pointer point to 8-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS128
+_mm_2intersect_epi64(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) {
+  __builtin_ia32_vp2intersect_q_128((__v2di)__a, (__v2di)__b, __m0, __m1);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif
diff --git a/clang/lib/Headers/avx512vp2intersectintrin.h b/clang/lib/Headers/avx512vp2intersectintrin.h
new file mode 100644
index 0000000000000..5d3cb48cfd20f
--- /dev/null
+++ b/clang/lib/Headers/avx512vp2intersectintrin.h
@@ -0,0 +1,77 @@
+/*===------- avx512vpintersectintrin.h - VP2INTERSECT intrinsics ------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vp2intersect.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VP2INTERSECT_H
+#define _AVX512VP2INTERSECT_H
+
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__,  __target__("avx512vp2intersect"), \
+                 __min_vector_width__(512)))
+
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between dwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTD </c> instruction.
+///
+/// \param __a
+///    A 512-bit vector of [16 x i32].
+/// \param __b
+///    A 512-bit vector of [16 x i32]
+/// \param __m0
+///    A pointer point to 16-bit mask
+/// \param __m1
+///    A pointer point to 16-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_2intersect_epi32(__m512i __a, __m512i __b, __mmask16 *__m0, __mmask16 *__m1) {
+  __builtin_ia32_vp2intersect_d_512((__v16si)__a, (__v16si)__b, __m0, __m1);
+}
+
+/// Store, in an even/odd pair of mask registers, the indicators of the
+/// locations of value matches between quadwords in operands __a and __b.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VP2INTERSECTQ </c> instruction.
+///
+/// \param __a
+///    A 512-bit vector of [8 x i64].
+/// \param __b
+///    A 512-bit vector of [8 x i64]
+/// \param __m0
+///    A pointer point to 8-bit mask
+/// \param __m1
+///    A pointer point to 8-bit mask
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_2intersect_epi64(__m512i __a, __m512i __b, __mmask8 *__m0, __mmask8 *__m1) {
+  __builtin_ia32_vp2intersect_q_512((__v8di)__a, (__v8di)__b, __m0, __m1);
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index ea009bd88b147..73245f3858ec9 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -421,6 +421,16 @@ _storebe_i64(void * __P, long long __D) {
 #include <invpcidintrin.h>
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+  defined(__AVX512VP2INTERSECT__)
+#include <avx512vp2intersectintrin.h>
+#endif
+
+#if !defined(_MSC_VER) || __has_feature(modules) || \
+  (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
+#include <avx512vlvp2intersectintrin.h>
+#endif
+
 #if defined(_MSC_VER) && __has_extension(gnu_asm)
 /* Define the default attributes for these intrinsics */
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index e3a2cb2e1603b..73486e5ee1fe7 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -50,9 +50,9 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
 // CHECK: use_before_def{{.*}} #7
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
diff --git a/clang/test/CodeGen/intel-avx512vlvp2intersect.c b/clang/test/CodeGen/intel-avx512vlvp2intersect.c
new file mode 100644
index 0000000000000..c607a6996928e
--- /dev/null
+++ b/clang/test/CodeGen/intel-avx512vlvp2intersect.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) {
+// CHECK-LABEL: test_mm256_2intersect_epi32
+// CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+  _mm256_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) {
+// CHECK-LABEL: test_mm256_2intersect_epi64
+// CHECK: call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  _mm256_2intersect_epi64(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) {
+// CHECK-LABEL: test_mm_2intersect_epi32
+// CHECK: call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+  _mm_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) {
+// CHECK-LABEL: test_mm_2intersect_epi64
+// CHECK: call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+// CHECK: extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1
+  _mm_2intersect_epi64(a, b, m0, m1);
+}
diff --git a/clang/test/CodeGen/intel-avx512vp2intersect.c b/clang/test/CodeGen/intel-avx512vp2intersect.c
new file mode 100644
index 0000000000000..bcbf6076eec33
--- /dev/null
+++ b/clang/test/CodeGen/intel-avx512vp2intersect.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) {
+// CHECK-LABEL: test_mm512_2intersect_epi32
+// CHECK: call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+// CHECK: extractvalue { <16 x i1>, <16 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <16 x i1>, <16 x i1> } %{{.*}}, 1
+  _mm512_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) {
+// CHECK-LABEL: test_mm512_2intersect_epi64
+// CHECK: call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+// CHECK: extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+  _mm512_2intersect_epi64(a, b, m0, m1);
+}
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index d925f6824c9e2..5866d38648eb8 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -125,6 +125,11 @@
 // VBMI2: "-target-feature" "+avx512vbmi2"
 // NO-VBMI2: "-target-feature" "-avx512vbmi2"
 
+// RUN: %clang -target i386-linux-gnu -mavx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VP2INTERSECT %s
+// RUN: %clang -target i386-linux-gnu -mno-avx512vp2intersect %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VP2INTERSECT %s
+// VP2INTERSECT: "-target-feature" "+avx512vp2intersect"
+// NO-VP2INTERSECT: "-target-feature" "-avx512vp2intersect"
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=RDPID %s
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpid %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-RDPID %s
 // RDPID: "-target-feature" "+rdpid"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index fa815dc61e88a..2dfca34121c1e 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -458,3 +458,13 @@
 
 // AVX512BF16_NOAVX512VL: #define __AVX512BF16__ 1
 
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -x c -E -dM -o - %s | FileCheck  -check-prefix=VP2INTERSECT %s
+
+// VP2INTERSECT: #define __AVX512F__ 1
+// VP2INTERSECT: #define __AVX512VP2INTERSECT__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512vp2intersect -x c -E -dM -o - %s | FileCheck  -check-prefix=NOVP2INTERSECT %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512vp2intersect -mno-avx512f -x c -E -dM -o - %s | FileCheck  -check-prefix=NOVP2INTERSECT %s
+
+// NOVP2INTERSECT-NOT: #define __AVX512VP2INTERSECT__ 1
+

From 8cb076ec6e0f49805441c2adc54547c6363adba6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 06:21:45 +0000
Subject: [PATCH 0718/1176] [X86] Add test case for PR42079. NFC

llvm-svn: 362197
---
 llvm/test/CodeGen/X86/vec_fpext.ll | 45 ++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vec_fpext.ll b/llvm/test/CodeGen/X86/vec_fpext.ll
index 3007c8d71dc86..082a533665785 100644
--- a/llvm/test/CodeGen/X86/vec_fpext.ll
+++ b/llvm/test/CodeGen/X86/vec_fpext.ll
@@ -298,3 +298,48 @@ entry:
   %2  = fpext <2 x float> %1 to <2 x double>
   ret <2 x double> %2
 }
+
+; Make sure we don't narrow a volatile load.
+; FIXME: We incorrectly narrow it for avx512vl.
+define <2 x double> @PR42079(<4 x float>* %x) {
+; X32-SSE-LABEL: PR42079:
+; X32-SSE:       # %bb.0:
+; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X32-SSE-NEXT:    movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00]
+; X32-SSE-NEXT:    cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
+; X32-SSE-NEXT:    retl # encoding: [0xc3]
+;
+; X32-AVX-LABEL: PR42079:
+; X32-AVX:       # %bb.0:
+; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X32-AVX-NEXT:    vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00]
+; X32-AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
+; X32-AVX-NEXT:    retl # encoding: [0xc3]
+;
+; X32-AVX512VL-LABEL: PR42079:
+; X32-AVX512VL:       # %bb.0:
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X32-AVX512VL-NEXT:    vcvtps2pd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x00]
+; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
+;
+; X64-SSE-LABEL: PR42079:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07]
+; X64-SSE-NEXT:    cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0]
+; X64-SSE-NEXT:    retq # encoding: [0xc3]
+;
+; X64-AVX-LABEL: PR42079:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07]
+; X64-AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
+; X64-AVX-NEXT:    retq # encoding: [0xc3]
+;
+; X64-AVX512VL-LABEL: PR42079:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07]
+; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
+  %a = load volatile <4 x float>, <4 x float>* %x
+  %b = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+  %c = fpext <2 x float> %b to <2 x double>
+  ret <2 x double> %c
+}

From 73b07284df2760433da1294f445ea6f8f8eabce9 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 06:21:49 +0000
Subject: [PATCH 0719/1176] [X86] Add test to show missed opportunity to use
 masked vcvtps2pd for vselect+extload.

llvm-svn: 362198
---
 llvm/test/CodeGen/X86/avx512-cvt.ll | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index ff1eadcf9ca85..aa7f533c1d421 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -778,6 +778,30 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
   ret <4 x double> %c
 }
 
+define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+; NOVL-LABEL: f32to4f64_mask_load:
+; NOVL:       # %bb.0:
+; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm2
+; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
+; NOVL-NEXT:    vmovapd %zmm2, %zmm0 {%k1} {z}
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVL-NEXT:    retq
+;
+; VL-LABEL: f32to4f64_mask_load:
+; VL:       # %bb.0:
+; VL-NEXT:    vcvtps2pd (%rdi), %ymm2
+; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
+; VL-NEXT:    vmovapd %ymm2, %ymm0 {%k1} {z}
+; VL-NEXT:    retq
+  %b = load <4 x float>, <4 x float>* %p
+  %a = fpext <4 x float> %b to <4 x double>
+  %mask = fcmp ogt <4 x double> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
+  ret <4 x double> %c
+}
+
 define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
 ; ALL-LABEL: f32tof64_inreg:
 ; ALL:       # %bb.0:

From b79cc5f8024447ab5235a62192dd1425052f5b7c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 06:21:53 +0000
Subject: [PATCH 0720/1176] [X86] Remove avx512 isel patterns for
 fpextend+load. Prefer to only match fp extloads instead.

DAG combine will usually fold fpextend+load to an fp extload anyway. So the
256 and 512 patterns were probably unnecessary. The 128 bit pattern was special
in that it looked for a v4f32 load, but then used it in an instruction that
only loads 64-bits. This is bad if the load happens to be volatile. We could
probably make the patterns volatile aware, but that's more work for something
that's probably rare. The peephole pass might kick in and save us anyway. We
might also be able to fix this with some additional DAG combines.

This also adds patterns for vselect+extload to enabled masked vcvtps2pd to be
used. Previously we looked for the unlikely vselect+fpextend+load.

llvm-svn: 362199
---
 llvm/lib/Target/X86/X86InstrAVX512.td | 68 ++++++++++++++++++++++-----
 llvm/test/CodeGen/X86/avx512-cvt.ll   |  3 +-
 llvm/test/CodeGen/X86/vec_fpext.ll    |  7 +--
 3 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d93059b44f32d..f6e4e85119200 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7677,19 +7677,73 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 
+// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
+multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                                X86VectorVTInfo _Src, SDNode OpNode,
+                                X86FoldableSchedWrite sched,
+                                string Broadcast = _.BroadcastStr,
+                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+                                RegisterClass MaskRC = _.KRCWM,
+                                PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> {
+
+  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
+                         (ins MaskRC:$mask, _Src.RC:$src),
+                          OpcodeStr, "$src", "$src",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+                         (vselect MaskRC:$mask,
+                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
+                         EVEX, Sched<[sched]>;
+
+  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins MemOp:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
+                         (ins MaskRC:$mask, MemOp:$src),
+                         OpcodeStr#Alias, "$src", "$src",
+                         (_.VT (LdFrag addr:$src)),
+                         (vselect MaskRC:$mask,
+                                  (_.VT (OpNode (_Src.VT
+                                                 (_Src.LdFrag addr:$src)))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
+                         EVEX, Sched<[sched.Folded]>;
+
+  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.ScalarMemOp:$src),
+                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
+                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
+                         OpcodeStr,
+                         "${src}"##Broadcast, "${src}"##Broadcast,
+                         (_.VT (OpNode (_Src.VT
+                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
+                            )),
+                         (vselect MaskRC:$mask,
+                                  (_.VT
+                                   (OpNode
+                                    (_Src.VT
+                                     (X86VBroadcast
+                                      (_Src.ScalarLdFrag addr:$src))))),
+                                  _.RC:$src0),
+                         vselect, "$src0 = $dst">,
+                         EVEX, EVEX_B, Sched<[sched.Folded]>;
+}
+
 // Extend Float to Double
 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
                            X86SchedWriteWidths sched> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
+    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
                             fpextend, sched.ZMM>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
                                X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
+    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
                                sched.YMM>, EVEX_V256;
   }
 }
@@ -7784,9 +7838,6 @@ let Predicates = [HasAVX512] in {
                      (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
                      v8f32x_info.ImmAllZerosV),
             (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
-
-  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
-              (VCVTPS2PDZrm addr:$src)>;
 }
 
 let Predicates = [HasVLX] in {
@@ -7819,11 +7870,6 @@ let Predicates = [HasVLX] in {
                      v4f32x_info.ImmAllZerosV),
             (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
 
-  def : Pat<(v2f64 (extloadv2f32 addr:$src)),
-              (VCVTPS2PDZ128rm addr:$src)>;
-  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
-              (VCVTPS2PDZ256rm addr:$src)>;
-
   // Special patterns to allow use of X86vmfpround for masking. Instruction
   // patterns have been disabled with null_frag.
   def : Pat<(X86vfpround (v2f64 VR128X:$src)),
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index aa7f533c1d421..48293c08a5cb4 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -791,9 +791,8 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x
 ;
 ; VL-LABEL: f32to4f64_mask_load:
 ; VL:       # %bb.0:
-; VL-NEXT:    vcvtps2pd (%rdi), %ymm2
 ; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
-; VL-NEXT:    vmovapd %ymm2, %ymm0 {%k1} {z}
+; VL-NEXT:    vcvtps2pd (%rdi), %ymm0 {%k1} {z}
 ; VL-NEXT:    retq
   %b = load <4 x float>, <4 x float>* %p
   %a = fpext <4 x float> %b to <4 x double>
diff --git a/llvm/test/CodeGen/X86/vec_fpext.ll b/llvm/test/CodeGen/X86/vec_fpext.ll
index 082a533665785..46ad1f16f3c08 100644
--- a/llvm/test/CodeGen/X86/vec_fpext.ll
+++ b/llvm/test/CodeGen/X86/vec_fpext.ll
@@ -300,7 +300,6 @@ entry:
 }
 
 ; Make sure we don't narrow a volatile load.
-; FIXME: We incorrectly narrow it for avx512vl.
 define <2 x double> @PR42079(<4 x float>* %x) {
 ; X32-SSE-LABEL: PR42079:
 ; X32-SSE:       # %bb.0:
@@ -319,7 +318,8 @@ define <2 x double> @PR42079(<4 x float>* %x) {
 ; X32-AVX512VL-LABEL: PR42079:
 ; X32-AVX512VL:       # %bb.0:
 ; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X32-AVX512VL-NEXT:    vcvtps2pd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x00]
+; X32-AVX512VL-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
+; X32-AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
 ; X32-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-SSE-LABEL: PR42079:
@@ -336,7 +336,8 @@ define <2 x double> @PR42079(<4 x float>* %x) {
 ;
 ; X64-AVX512VL-LABEL: PR42079:
 ; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07]
+; X64-AVX512VL-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
+; X64-AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %a = load volatile <4 x float>, <4 x float>* %x
   %b = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>

From cb0ad5accba29f5e0e53e937e87a7a6a62abfd49 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 07:38:14 +0000
Subject: [PATCH 0721/1176] [X86] Copy a test case from avx512-cvt.ll to
 avx512-cvt-widen.ll. NFC

llvm-svn: 362200
---
 llvm/test/CodeGen/X86/avx512-cvt-widen.ll | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
index 45a26f737e2cc..1cdd9cf496164 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
@@ -776,6 +776,29 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
   ret <4 x double> %c
 }
 
+define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+; NOVL-LABEL: f32to4f64_mask_load:
+; NOVL:       # %bb.0:
+; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm2
+; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
+; NOVL-NEXT:    vmovapd %zmm2, %zmm0 {%k1} {z}
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVL-NEXT:    retq
+;
+; VL-LABEL: f32to4f64_mask_load:
+; VL:       # %bb.0:
+; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
+; VL-NEXT:    vcvtps2pd (%rdi), %ymm0 {%k1} {z}
+; VL-NEXT:    retq
+  %b = load <4 x float>, <4 x float>* %p
+  %a = fpext <4 x float> %b to <4 x double>
+  %mask = fcmp ogt <4 x double> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
+  ret <4 x double> %c
+}
+
 define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
 ; ALL-LABEL: f32tof64_inreg:
 ; ALL:       # %bb.0:

From 67d43e0744490c3ad7d1e8413e17a46b5fb1a6d7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 07:38:18 +0000
Subject: [PATCH 0722/1176] [X86] Add test cases for a volatile load shrinking
 bug involving cvtdq2pd. NFC

Similar to PR42079

llvm-svn: 362201
---
 llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll | 292 ++++++++++---------
 llvm/test/CodeGen/X86/vec_int_to_fp.ll       | 292 ++++++++++---------
 2 files changed, 308 insertions(+), 276 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
index 73c0eb323b3ae..eb85beb53988f 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
@@ -3155,6 +3155,22 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
   ret <2 x double> %cvt
 }
 
+define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
+; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %ld = load volatile <4 x i32>, <4 x i32> *%a
+  %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %cvt = sitofp <2 x i32> %b to <2 x double>
+  ret <2 x double> %cvt
+}
+
 define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) {
 ; SSE2-LABEL: sitofp_load_2i16_to_2f64:
 ; SSE2:       # %bb.0:
@@ -4375,41 +4391,41 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    movdqa 16(%rdi), %xmm0
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_1
+; SSE2-NEXT:    js .LBB77_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    jmp .LBB76_3
-; SSE2-NEXT:  .LBB76_1:
+; SSE2-NEXT:    jmp .LBB77_3
+; SSE2-NEXT:  .LBB77_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:  .LBB76_3:
+; SSE2-NEXT:  .LBB77_3:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_4
+; SSE2-NEXT:    js .LBB77_4
 ; SSE2-NEXT:  # %bb.5:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE2-NEXT:    jmp .LBB76_6
-; SSE2-NEXT:  .LBB76_4:
+; SSE2-NEXT:    jmp .LBB77_6
+; SSE2-NEXT:  .LBB77_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE2-NEXT:    addss %xmm3, %xmm3
-; SSE2-NEXT:  .LBB76_6:
+; SSE2-NEXT:  .LBB77_6:
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_7
+; SSE2-NEXT:    js .LBB77_7
 ; SSE2-NEXT:  # %bb.8:
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB76_9
-; SSE2-NEXT:  .LBB76_7:
+; SSE2-NEXT:    jmp .LBB77_9
+; SSE2-NEXT:  .LBB77_7:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4417,17 +4433,17 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB76_9:
+; SSE2-NEXT:  .LBB77_9:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_10
+; SSE2-NEXT:    js .LBB77_10
 ; SSE2-NEXT:  # %bb.11:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE2-NEXT:    jmp .LBB76_12
-; SSE2-NEXT:  .LBB76_10:
+; SSE2-NEXT:    jmp .LBB77_12
+; SSE2-NEXT:  .LBB77_10:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4435,7 +4451,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE2-NEXT:    addss %xmm2, %xmm2
-; SSE2-NEXT:  .LBB76_12:
+; SSE2-NEXT:  .LBB77_12:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
@@ -4446,26 +4462,26 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    movdqa 16(%rdi), %xmm1
 ; SSE41-NEXT:    pextrq $1, %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_1
+; SSE41-NEXT:    js .LBB77_1
 ; SSE41-NEXT:  # %bb.2:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE41-NEXT:    jmp .LBB76_3
-; SSE41-NEXT:  .LBB76_1:
+; SSE41-NEXT:    jmp .LBB77_3
+; SSE41-NEXT:  .LBB77_1:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    addss %xmm2, %xmm2
-; SSE41-NEXT:  .LBB76_3:
+; SSE41-NEXT:  .LBB77_3:
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_4
+; SSE41-NEXT:    js .LBB77_4
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB76_6
-; SSE41-NEXT:  .LBB76_4:
+; SSE41-NEXT:    jmp .LBB77_6
+; SSE41-NEXT:  .LBB77_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4473,16 +4489,16 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB76_6:
+; SSE41-NEXT:  .LBB77_6:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
 ; SSE41-NEXT:    movq %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_7
+; SSE41-NEXT:    js .LBB77_7
 ; SSE41-NEXT:  # %bb.8:
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE41-NEXT:    jmp .LBB76_9
-; SSE41-NEXT:  .LBB76_7:
+; SSE41-NEXT:    jmp .LBB77_9
+; SSE41-NEXT:  .LBB77_7:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4490,17 +4506,17 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    addss %xmm2, %xmm2
-; SSE41-NEXT:  .LBB76_9:
+; SSE41-NEXT:  .LBB77_9:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
 ; SSE41-NEXT:    pextrq $1, %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_10
+; SSE41-NEXT:    js .LBB77_10
 ; SSE41-NEXT:  # %bb.11:
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; SSE41-NEXT:    retq
-; SSE41-NEXT:  .LBB76_10:
+; SSE41-NEXT:  .LBB77_10:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4517,56 +4533,56 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; VEX-NEXT:    vmovdqa 16(%rdi), %xmm0
 ; VEX-NEXT:    vpextrq $1, %xmm2, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_1
+; VEX-NEXT:    js .LBB77_1
 ; VEX-NEXT:  # %bb.2:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
-; VEX-NEXT:    jmp .LBB76_3
-; VEX-NEXT:  .LBB76_1:
+; VEX-NEXT:    jmp .LBB77_3
+; VEX-NEXT:  .LBB77_1:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; VEX-NEXT:  .LBB76_3:
+; VEX-NEXT:  .LBB77_3:
 ; VEX-NEXT:    vmovq %xmm2, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_4
+; VEX-NEXT:    js .LBB77_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    jmp .LBB76_6
-; VEX-NEXT:  .LBB76_4:
+; VEX-NEXT:    jmp .LBB77_6
+; VEX-NEXT:  .LBB77_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB76_6:
+; VEX-NEXT:  .LBB77_6:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_7
+; VEX-NEXT:    js .LBB77_7
 ; VEX-NEXT:  # %bb.8:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    jmp .LBB76_9
-; VEX-NEXT:  .LBB76_7:
+; VEX-NEXT:    jmp .LBB77_9
+; VEX-NEXT:  .LBB77_7:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB76_9:
+; VEX-NEXT:  .LBB77_9:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
 ; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_10
+; VEX-NEXT:    js .LBB77_10
 ; VEX-NEXT:  # %bb.11:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
 ; VEX-NEXT:    retq
-; VEX-NEXT:  .LBB76_10:
+; VEX-NEXT:  .LBB77_10:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
@@ -4764,41 +4780,41 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    movdqa 48(%rdi), %xmm1
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_1
+; SSE2-NEXT:    js .LBB81_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE2-NEXT:    jmp .LBB80_3
-; SSE2-NEXT:  .LBB80_1:
+; SSE2-NEXT:    jmp .LBB81_3
+; SSE2-NEXT:  .LBB81_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE2-NEXT:    addss %xmm3, %xmm3
-; SSE2-NEXT:  .LBB80_3:
+; SSE2-NEXT:  .LBB81_3:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_4
+; SSE2-NEXT:    js .LBB81_4
 ; SSE2-NEXT:  # %bb.5:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
-; SSE2-NEXT:    jmp .LBB80_6
-; SSE2-NEXT:  .LBB80_4:
+; SSE2-NEXT:    jmp .LBB81_6
+; SSE2-NEXT:  .LBB81_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
 ; SSE2-NEXT:    addss %xmm4, %xmm4
-; SSE2-NEXT:  .LBB80_6:
+; SSE2-NEXT:  .LBB81_6:
 ; SSE2-NEXT:    movq %xmm5, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_7
+; SSE2-NEXT:    js .LBB81_7
 ; SSE2-NEXT:  # %bb.8:
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB80_9
-; SSE2-NEXT:  .LBB80_7:
+; SSE2-NEXT:    jmp .LBB81_9
+; SSE2-NEXT:  .LBB81_7:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4806,30 +4822,30 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB80_9:
+; SSE2-NEXT:  .LBB81_9:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm5, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_10
+; SSE2-NEXT:    js .LBB81_10
 ; SSE2-NEXT:  # %bb.11:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm6
-; SSE2-NEXT:    jmp .LBB80_12
-; SSE2-NEXT:  .LBB80_10:
+; SSE2-NEXT:    jmp .LBB81_12
+; SSE2-NEXT:  .LBB81_10:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm6
 ; SSE2-NEXT:    addss %xmm6, %xmm6
-; SSE2-NEXT:  .LBB80_12:
+; SSE2-NEXT:  .LBB81_12:
 ; SSE2-NEXT:    movq %xmm1, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_13
+; SSE2-NEXT:    js .LBB81_13
 ; SSE2-NEXT:  # %bb.14:
 ; SSE2-NEXT:    xorps %xmm5, %xmm5
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
-; SSE2-NEXT:    jmp .LBB80_15
-; SSE2-NEXT:  .LBB80_13:
+; SSE2-NEXT:    jmp .LBB81_15
+; SSE2-NEXT:  .LBB81_13:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4837,32 +4853,32 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm5, %xmm5
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
 ; SSE2-NEXT:    addss %xmm5, %xmm5
-; SSE2-NEXT:  .LBB80_15:
+; SSE2-NEXT:  .LBB81_15:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm1, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_16
+; SSE2-NEXT:    js .LBB81_16
 ; SSE2-NEXT:  # %bb.17:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm7
-; SSE2-NEXT:    jmp .LBB80_18
-; SSE2-NEXT:  .LBB80_16:
+; SSE2-NEXT:    jmp .LBB81_18
+; SSE2-NEXT:  .LBB81_16:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm7
 ; SSE2-NEXT:    addss %xmm7, %xmm7
-; SSE2-NEXT:  .LBB80_18:
+; SSE2-NEXT:  .LBB81_18:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_19
+; SSE2-NEXT:    js .LBB81_19
 ; SSE2-NEXT:  # %bb.20:
 ; SSE2-NEXT:    xorps %xmm1, %xmm1
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    jmp .LBB80_21
-; SSE2-NEXT:  .LBB80_19:
+; SSE2-NEXT:    jmp .LBB81_21
+; SSE2-NEXT:  .LBB81_19:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4870,18 +4886,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm1, %xmm1
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:  .LBB80_21:
+; SSE2-NEXT:  .LBB81_21:
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_22
+; SSE2-NEXT:    js .LBB81_22
 ; SSE2-NEXT:  # %bb.23:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE2-NEXT:    jmp .LBB80_24
-; SSE2-NEXT:  .LBB80_22:
+; SSE2-NEXT:    jmp .LBB81_24
+; SSE2-NEXT:  .LBB81_22:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4889,7 +4905,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE2-NEXT:    addss %xmm2, %xmm2
-; SSE2-NEXT:  .LBB80_24:
+; SSE2-NEXT:  .LBB81_24:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE2-NEXT:    retq
@@ -4902,26 +4918,26 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    movdqa 48(%rdi), %xmm2
 ; SSE41-NEXT:    pextrq $1, %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_1
+; SSE41-NEXT:    js .LBB81_1
 ; SSE41-NEXT:  # %bb.2:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE41-NEXT:    jmp .LBB80_3
-; SSE41-NEXT:  .LBB80_1:
+; SSE41-NEXT:    jmp .LBB81_3
+; SSE41-NEXT:  .LBB81_1:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE41-NEXT:    addss %xmm3, %xmm3
-; SSE41-NEXT:  .LBB80_3:
+; SSE41-NEXT:  .LBB81_3:
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_4
+; SSE41-NEXT:    js .LBB81_4
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB80_6
-; SSE41-NEXT:  .LBB80_4:
+; SSE41-NEXT:    jmp .LBB81_6
+; SSE41-NEXT:  .LBB81_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4929,29 +4945,29 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB80_6:
+; SSE41-NEXT:  .LBB81_6:
 ; SSE41-NEXT:    movq %xmm4, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_7
+; SSE41-NEXT:    js .LBB81_7
 ; SSE41-NEXT:  # %bb.8:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm5
-; SSE41-NEXT:    jmp .LBB80_9
-; SSE41-NEXT:  .LBB80_7:
+; SSE41-NEXT:    jmp .LBB81_9
+; SSE41-NEXT:  .LBB81_7:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm5
 ; SSE41-NEXT:    addss %xmm5, %xmm5
-; SSE41-NEXT:  .LBB80_9:
+; SSE41-NEXT:  .LBB81_9:
 ; SSE41-NEXT:    pextrq $1, %xmm4, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_10
+; SSE41-NEXT:    js .LBB81_10
 ; SSE41-NEXT:  # %bb.11:
 ; SSE41-NEXT:    xorps %xmm4, %xmm4
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm4
-; SSE41-NEXT:    jmp .LBB80_12
-; SSE41-NEXT:  .LBB80_10:
+; SSE41-NEXT:    jmp .LBB81_12
+; SSE41-NEXT:  .LBB81_10:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4959,30 +4975,30 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm4, %xmm4
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm4
 ; SSE41-NEXT:    addss %xmm4, %xmm4
-; SSE41-NEXT:  .LBB80_12:
+; SSE41-NEXT:  .LBB81_12:
 ; SSE41-NEXT:    pextrq $1, %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_13
+; SSE41-NEXT:    js .LBB81_13
 ; SSE41-NEXT:  # %bb.14:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm6
-; SSE41-NEXT:    jmp .LBB80_15
-; SSE41-NEXT:  .LBB80_13:
+; SSE41-NEXT:    jmp .LBB81_15
+; SSE41-NEXT:  .LBB81_13:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm6
 ; SSE41-NEXT:    addss %xmm6, %xmm6
-; SSE41-NEXT:  .LBB80_15:
+; SSE41-NEXT:  .LBB81_15:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
 ; SSE41-NEXT:    movq %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_16
+; SSE41-NEXT:    js .LBB81_16
 ; SSE41-NEXT:  # %bb.17:
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:    jmp .LBB80_18
-; SSE41-NEXT:  .LBB80_16:
+; SSE41-NEXT:    jmp .LBB81_18
+; SSE41-NEXT:  .LBB81_16:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4990,17 +5006,17 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE41-NEXT:    addss %xmm1, %xmm1
-; SSE41-NEXT:  .LBB80_18:
+; SSE41-NEXT:  .LBB81_18:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[2,3]
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm5[0],xmm0[3]
 ; SSE41-NEXT:    movq %xmm2, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_19
+; SSE41-NEXT:    js .LBB81_19
 ; SSE41-NEXT:  # %bb.20:
 ; SSE41-NEXT:    xorps %xmm3, %xmm3
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE41-NEXT:    jmp .LBB80_21
-; SSE41-NEXT:  .LBB80_19:
+; SSE41-NEXT:    jmp .LBB81_21
+; SSE41-NEXT:  .LBB81_19:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -5008,18 +5024,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm3, %xmm3
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE41-NEXT:    addss %xmm3, %xmm3
-; SSE41-NEXT:  .LBB80_21:
+; SSE41-NEXT:  .LBB81_21:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
 ; SSE41-NEXT:    pextrq $1, %xmm2, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_22
+; SSE41-NEXT:    js .LBB81_22
 ; SSE41-NEXT:  # %bb.23:
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
 ; SSE41-NEXT:    retq
-; SSE41-NEXT:  .LBB80_22:
+; SSE41-NEXT:  .LBB81_22:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -5038,121 +5054,121 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; VEX-NEXT:    vmovdqa 48(%rdi), %xmm3
 ; VEX-NEXT:    vpextrq $1, %xmm4, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_1
+; VEX-NEXT:    js .LBB81_1
 ; VEX-NEXT:  # %bb.2:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
-; VEX-NEXT:    jmp .LBB80_3
-; VEX-NEXT:  .LBB80_1:
+; VEX-NEXT:    jmp .LBB81_3
+; VEX-NEXT:  .LBB81_1:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB80_3:
+; VEX-NEXT:  .LBB81_3:
 ; VEX-NEXT:    vmovq %xmm4, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_4
+; VEX-NEXT:    js .LBB81_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm5
-; VEX-NEXT:    jmp .LBB80_6
-; VEX-NEXT:  .LBB80_4:
+; VEX-NEXT:    jmp .LBB81_6
+; VEX-NEXT:  .LBB81_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; VEX-NEXT:    vaddss %xmm4, %xmm4, %xmm5
-; VEX-NEXT:  .LBB80_6:
+; VEX-NEXT:  .LBB81_6:
 ; VEX-NEXT:    vmovq %xmm3, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_7
+; VEX-NEXT:    js .LBB81_7
 ; VEX-NEXT:  # %bb.8:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
-; VEX-NEXT:    jmp .LBB80_9
-; VEX-NEXT:  .LBB80_7:
+; VEX-NEXT:    jmp .LBB81_9
+; VEX-NEXT:  .LBB81_7:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
 ; VEX-NEXT:    vaddss %xmm4, %xmm4, %xmm4
-; VEX-NEXT:  .LBB80_9:
+; VEX-NEXT:  .LBB81_9:
 ; VEX-NEXT:    vpextrq $1, %xmm3, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_10
+; VEX-NEXT:    js .LBB81_10
 ; VEX-NEXT:  # %bb.11:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
-; VEX-NEXT:    jmp .LBB80_12
-; VEX-NEXT:  .LBB80_10:
+; VEX-NEXT:    jmp .LBB81_12
+; VEX-NEXT:  .LBB81_10:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
 ; VEX-NEXT:    vaddss %xmm3, %xmm3, %xmm3
-; VEX-NEXT:  .LBB80_12:
+; VEX-NEXT:  .LBB81_12:
 ; VEX-NEXT:    vpextrq $1, %xmm1, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_13
+; VEX-NEXT:    js .LBB81_13
 ; VEX-NEXT:  # %bb.14:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm6
-; VEX-NEXT:    jmp .LBB80_15
-; VEX-NEXT:  .LBB80_13:
+; VEX-NEXT:    jmp .LBB81_15
+; VEX-NEXT:  .LBB81_13:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm6
 ; VEX-NEXT:    vaddss %xmm6, %xmm6, %xmm6
-; VEX-NEXT:  .LBB80_15:
+; VEX-NEXT:  .LBB81_15:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3]
 ; VEX-NEXT:    vmovq %xmm1, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_16
+; VEX-NEXT:    js .LBB81_16
 ; VEX-NEXT:  # %bb.17:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm1
-; VEX-NEXT:    jmp .LBB80_18
-; VEX-NEXT:  .LBB80_16:
+; VEX-NEXT:    jmp .LBB81_18
+; VEX-NEXT:  .LBB81_16:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm1
 ; VEX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; VEX-NEXT:  .LBB80_18:
+; VEX-NEXT:  .LBB81_18:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm5 = xmm1[0],xmm6[0],xmm1[2,3]
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm4[0],xmm2[3]
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_19
+; VEX-NEXT:    js .LBB81_19
 ; VEX-NEXT:  # %bb.20:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm2
-; VEX-NEXT:    jmp .LBB80_21
-; VEX-NEXT:  .LBB80_19:
+; VEX-NEXT:    jmp .LBB81_21
+; VEX-NEXT:  .LBB81_19:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB80_21:
+; VEX-NEXT:  .LBB81_21:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0,1],xmm2[0],xmm5[3]
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
 ; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_22
+; VEX-NEXT:    js .LBB81_22
 ; VEX-NEXT:  # %bb.23:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm0
-; VEX-NEXT:    jmp .LBB80_24
-; VEX-NEXT:  .LBB80_22:
+; VEX-NEXT:    jmp .LBB81_24
+; VEX-NEXT:  .LBB81_22:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB80_24:
+; VEX-NEXT:  .LBB81_24:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
 ; VEX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; VEX-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 580ab291f9af3..d394383ddc22f 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -3155,6 +3155,22 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
   ret <2 x double> %cvt
 }
 
+define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
+; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %ld = load volatile <4 x i32>, <4 x i32> *%a
+  %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %cvt = sitofp <2 x i32> %b to <2 x double>
+  ret <2 x double> %cvt
+}
+
 define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) {
 ; SSE2-LABEL: sitofp_load_2i16_to_2f64:
 ; SSE2:       # %bb.0:
@@ -4371,41 +4387,41 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    movdqa 16(%rdi), %xmm0
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_1
+; SSE2-NEXT:    js .LBB77_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    jmp .LBB76_3
-; SSE2-NEXT:  .LBB76_1:
+; SSE2-NEXT:    jmp .LBB77_3
+; SSE2-NEXT:  .LBB77_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:  .LBB76_3:
+; SSE2-NEXT:  .LBB77_3:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_4
+; SSE2-NEXT:    js .LBB77_4
 ; SSE2-NEXT:  # %bb.5:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE2-NEXT:    jmp .LBB76_6
-; SSE2-NEXT:  .LBB76_4:
+; SSE2-NEXT:    jmp .LBB77_6
+; SSE2-NEXT:  .LBB77_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE2-NEXT:    addss %xmm3, %xmm3
-; SSE2-NEXT:  .LBB76_6:
+; SSE2-NEXT:  .LBB77_6:
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_7
+; SSE2-NEXT:    js .LBB77_7
 ; SSE2-NEXT:  # %bb.8:
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB76_9
-; SSE2-NEXT:  .LBB76_7:
+; SSE2-NEXT:    jmp .LBB77_9
+; SSE2-NEXT:  .LBB77_7:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4413,17 +4429,17 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB76_9:
+; SSE2-NEXT:  .LBB77_9:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB76_10
+; SSE2-NEXT:    js .LBB77_10
 ; SSE2-NEXT:  # %bb.11:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE2-NEXT:    jmp .LBB76_12
-; SSE2-NEXT:  .LBB76_10:
+; SSE2-NEXT:    jmp .LBB77_12
+; SSE2-NEXT:  .LBB77_10:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4431,7 +4447,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE2-NEXT:    addss %xmm2, %xmm2
-; SSE2-NEXT:  .LBB76_12:
+; SSE2-NEXT:  .LBB77_12:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
@@ -4442,26 +4458,26 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    movdqa 16(%rdi), %xmm1
 ; SSE41-NEXT:    pextrq $1, %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_1
+; SSE41-NEXT:    js .LBB77_1
 ; SSE41-NEXT:  # %bb.2:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE41-NEXT:    jmp .LBB76_3
-; SSE41-NEXT:  .LBB76_1:
+; SSE41-NEXT:    jmp .LBB77_3
+; SSE41-NEXT:  .LBB77_1:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    addss %xmm2, %xmm2
-; SSE41-NEXT:  .LBB76_3:
+; SSE41-NEXT:  .LBB77_3:
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_4
+; SSE41-NEXT:    js .LBB77_4
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB76_6
-; SSE41-NEXT:  .LBB76_4:
+; SSE41-NEXT:    jmp .LBB77_6
+; SSE41-NEXT:  .LBB77_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4469,16 +4485,16 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB76_6:
+; SSE41-NEXT:  .LBB77_6:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
 ; SSE41-NEXT:    movq %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_7
+; SSE41-NEXT:    js .LBB77_7
 ; SSE41-NEXT:  # %bb.8:
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE41-NEXT:    jmp .LBB76_9
-; SSE41-NEXT:  .LBB76_7:
+; SSE41-NEXT:    jmp .LBB77_9
+; SSE41-NEXT:  .LBB77_7:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4486,17 +4502,17 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    addss %xmm2, %xmm2
-; SSE41-NEXT:  .LBB76_9:
+; SSE41-NEXT:  .LBB77_9:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
 ; SSE41-NEXT:    pextrq $1, %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB76_10
+; SSE41-NEXT:    js .LBB77_10
 ; SSE41-NEXT:  # %bb.11:
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; SSE41-NEXT:    retq
-; SSE41-NEXT:  .LBB76_10:
+; SSE41-NEXT:  .LBB77_10:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4513,56 +4529,56 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
 ; VEX-NEXT:    vmovdqa 16(%rdi), %xmm0
 ; VEX-NEXT:    vpextrq $1, %xmm2, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_1
+; VEX-NEXT:    js .LBB77_1
 ; VEX-NEXT:  # %bb.2:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
-; VEX-NEXT:    jmp .LBB76_3
-; VEX-NEXT:  .LBB76_1:
+; VEX-NEXT:    jmp .LBB77_3
+; VEX-NEXT:  .LBB77_1:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; VEX-NEXT:  .LBB76_3:
+; VEX-NEXT:  .LBB77_3:
 ; VEX-NEXT:    vmovq %xmm2, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_4
+; VEX-NEXT:    js .LBB77_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    jmp .LBB76_6
-; VEX-NEXT:  .LBB76_4:
+; VEX-NEXT:    jmp .LBB77_6
+; VEX-NEXT:  .LBB77_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB76_6:
+; VEX-NEXT:  .LBB77_6:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_7
+; VEX-NEXT:    js .LBB77_7
 ; VEX-NEXT:  # %bb.8:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
-; VEX-NEXT:    jmp .LBB76_9
-; VEX-NEXT:  .LBB76_7:
+; VEX-NEXT:    jmp .LBB77_9
+; VEX-NEXT:  .LBB77_7:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB76_9:
+; VEX-NEXT:  .LBB77_9:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
 ; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB76_10
+; VEX-NEXT:    js .LBB77_10
 ; VEX-NEXT:  # %bb.11:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm3, %xmm0
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
 ; VEX-NEXT:    retq
-; VEX-NEXT:  .LBB76_10:
+; VEX-NEXT:  .LBB77_10:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
@@ -4760,41 +4776,41 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    movdqa 48(%rdi), %xmm1
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_1
+; SSE2-NEXT:    js .LBB81_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE2-NEXT:    jmp .LBB80_3
-; SSE2-NEXT:  .LBB80_1:
+; SSE2-NEXT:    jmp .LBB81_3
+; SSE2-NEXT:  .LBB81_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE2-NEXT:    addss %xmm3, %xmm3
-; SSE2-NEXT:  .LBB80_3:
+; SSE2-NEXT:  .LBB81_3:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_4
+; SSE2-NEXT:    js .LBB81_4
 ; SSE2-NEXT:  # %bb.5:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
-; SSE2-NEXT:    jmp .LBB80_6
-; SSE2-NEXT:  .LBB80_4:
+; SSE2-NEXT:    jmp .LBB81_6
+; SSE2-NEXT:  .LBB81_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm4
 ; SSE2-NEXT:    addss %xmm4, %xmm4
-; SSE2-NEXT:  .LBB80_6:
+; SSE2-NEXT:  .LBB81_6:
 ; SSE2-NEXT:    movq %xmm5, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_7
+; SSE2-NEXT:    js .LBB81_7
 ; SSE2-NEXT:  # %bb.8:
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB80_9
-; SSE2-NEXT:  .LBB80_7:
+; SSE2-NEXT:    jmp .LBB81_9
+; SSE2-NEXT:  .LBB81_7:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4802,30 +4818,30 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB80_9:
+; SSE2-NEXT:  .LBB81_9:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm5, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_10
+; SSE2-NEXT:    js .LBB81_10
 ; SSE2-NEXT:  # %bb.11:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm6
-; SSE2-NEXT:    jmp .LBB80_12
-; SSE2-NEXT:  .LBB80_10:
+; SSE2-NEXT:    jmp .LBB81_12
+; SSE2-NEXT:  .LBB81_10:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm6
 ; SSE2-NEXT:    addss %xmm6, %xmm6
-; SSE2-NEXT:  .LBB80_12:
+; SSE2-NEXT:  .LBB81_12:
 ; SSE2-NEXT:    movq %xmm1, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_13
+; SSE2-NEXT:    js .LBB81_13
 ; SSE2-NEXT:  # %bb.14:
 ; SSE2-NEXT:    xorps %xmm5, %xmm5
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
-; SSE2-NEXT:    jmp .LBB80_15
-; SSE2-NEXT:  .LBB80_13:
+; SSE2-NEXT:    jmp .LBB81_15
+; SSE2-NEXT:  .LBB81_13:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4833,32 +4849,32 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm5, %xmm5
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm5
 ; SSE2-NEXT:    addss %xmm5, %xmm5
-; SSE2-NEXT:  .LBB80_15:
+; SSE2-NEXT:  .LBB81_15:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm1, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_16
+; SSE2-NEXT:    js .LBB81_16
 ; SSE2-NEXT:  # %bb.17:
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm7
-; SSE2-NEXT:    jmp .LBB80_18
-; SSE2-NEXT:  .LBB80_16:
+; SSE2-NEXT:    jmp .LBB81_18
+; SSE2-NEXT:  .LBB81_16:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm7
 ; SSE2-NEXT:    addss %xmm7, %xmm7
-; SSE2-NEXT:  .LBB80_18:
+; SSE2-NEXT:  .LBB81_18:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_19
+; SSE2-NEXT:    js .LBB81_19
 ; SSE2-NEXT:  # %bb.20:
 ; SSE2-NEXT:    xorps %xmm1, %xmm1
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    jmp .LBB80_21
-; SSE2-NEXT:  .LBB80_19:
+; SSE2-NEXT:    jmp .LBB81_21
+; SSE2-NEXT:  .LBB81_19:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4866,18 +4882,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm1, %xmm1
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:  .LBB80_21:
+; SSE2-NEXT:  .LBB81_21:
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0]
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm2, %rax
 ; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB80_22
+; SSE2-NEXT:    js .LBB81_22
 ; SSE2-NEXT:  # %bb.23:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
-; SSE2-NEXT:    jmp .LBB80_24
-; SSE2-NEXT:  .LBB80_22:
+; SSE2-NEXT:    jmp .LBB81_24
+; SSE2-NEXT:  .LBB81_22:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
@@ -4885,7 +4901,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE2-NEXT:    addss %xmm2, %xmm2
-; SSE2-NEXT:  .LBB80_24:
+; SSE2-NEXT:  .LBB81_24:
 ; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0]
 ; SSE2-NEXT:    retq
@@ -4898,26 +4914,26 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    movdqa 48(%rdi), %xmm2
 ; SSE41-NEXT:    pextrq $1, %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_1
+; SSE41-NEXT:    js .LBB81_1
 ; SSE41-NEXT:  # %bb.2:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE41-NEXT:    jmp .LBB80_3
-; SSE41-NEXT:  .LBB80_1:
+; SSE41-NEXT:    jmp .LBB81_3
+; SSE41-NEXT:  .LBB81_1:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE41-NEXT:    addss %xmm3, %xmm3
-; SSE41-NEXT:  .LBB80_3:
+; SSE41-NEXT:  .LBB81_3:
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_4
+; SSE41-NEXT:    js .LBB81_4
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB80_6
-; SSE41-NEXT:  .LBB80_4:
+; SSE41-NEXT:    jmp .LBB81_6
+; SSE41-NEXT:  .LBB81_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4925,29 +4941,29 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB80_6:
+; SSE41-NEXT:  .LBB81_6:
 ; SSE41-NEXT:    movq %xmm4, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_7
+; SSE41-NEXT:    js .LBB81_7
 ; SSE41-NEXT:  # %bb.8:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm5
-; SSE41-NEXT:    jmp .LBB80_9
-; SSE41-NEXT:  .LBB80_7:
+; SSE41-NEXT:    jmp .LBB81_9
+; SSE41-NEXT:  .LBB81_7:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm5
 ; SSE41-NEXT:    addss %xmm5, %xmm5
-; SSE41-NEXT:  .LBB80_9:
+; SSE41-NEXT:  .LBB81_9:
 ; SSE41-NEXT:    pextrq $1, %xmm4, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_10
+; SSE41-NEXT:    js .LBB81_10
 ; SSE41-NEXT:  # %bb.11:
 ; SSE41-NEXT:    xorps %xmm4, %xmm4
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm4
-; SSE41-NEXT:    jmp .LBB80_12
-; SSE41-NEXT:  .LBB80_10:
+; SSE41-NEXT:    jmp .LBB81_12
+; SSE41-NEXT:  .LBB81_10:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4955,30 +4971,30 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm4, %xmm4
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm4
 ; SSE41-NEXT:    addss %xmm4, %xmm4
-; SSE41-NEXT:  .LBB80_12:
+; SSE41-NEXT:  .LBB81_12:
 ; SSE41-NEXT:    pextrq $1, %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_13
+; SSE41-NEXT:    js .LBB81_13
 ; SSE41-NEXT:  # %bb.14:
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm6
-; SSE41-NEXT:    jmp .LBB80_15
-; SSE41-NEXT:  .LBB80_13:
+; SSE41-NEXT:    jmp .LBB81_15
+; SSE41-NEXT:  .LBB81_13:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
 ; SSE41-NEXT:    orq %rcx, %rax
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm6
 ; SSE41-NEXT:    addss %xmm6, %xmm6
-; SSE41-NEXT:  .LBB80_15:
+; SSE41-NEXT:  .LBB81_15:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
 ; SSE41-NEXT:    movq %xmm1, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_16
+; SSE41-NEXT:    js .LBB81_16
 ; SSE41-NEXT:  # %bb.17:
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:    jmp .LBB80_18
-; SSE41-NEXT:  .LBB80_16:
+; SSE41-NEXT:    jmp .LBB81_18
+; SSE41-NEXT:  .LBB81_16:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -4986,17 +5002,17 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm1, %xmm1
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE41-NEXT:    addss %xmm1, %xmm1
-; SSE41-NEXT:  .LBB80_18:
+; SSE41-NEXT:  .LBB81_18:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[2,3]
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm5[0],xmm0[3]
 ; SSE41-NEXT:    movq %xmm2, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_19
+; SSE41-NEXT:    js .LBB81_19
 ; SSE41-NEXT:  # %bb.20:
 ; SSE41-NEXT:    xorps %xmm3, %xmm3
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
-; SSE41-NEXT:    jmp .LBB80_21
-; SSE41-NEXT:  .LBB80_19:
+; SSE41-NEXT:    jmp .LBB81_21
+; SSE41-NEXT:  .LBB81_19:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -5004,18 +5020,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; SSE41-NEXT:    xorps %xmm3, %xmm3
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm3
 ; SSE41-NEXT:    addss %xmm3, %xmm3
-; SSE41-NEXT:  .LBB80_21:
+; SSE41-NEXT:  .LBB81_21:
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
 ; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
 ; SSE41-NEXT:    pextrq $1, %xmm2, %rax
 ; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    js .LBB80_22
+; SSE41-NEXT:    js .LBB81_22
 ; SSE41-NEXT:  # %bb.23:
 ; SSE41-NEXT:    xorps %xmm2, %xmm2
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm2
 ; SSE41-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
 ; SSE41-NEXT:    retq
-; SSE41-NEXT:  .LBB80_22:
+; SSE41-NEXT:  .LBB81_22:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
 ; SSE41-NEXT:    andl $1, %eax
@@ -5034,121 +5050,121 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
 ; VEX-NEXT:    vmovdqa 48(%rdi), %xmm3
 ; VEX-NEXT:    vpextrq $1, %xmm4, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_1
+; VEX-NEXT:    js .LBB81_1
 ; VEX-NEXT:  # %bb.2:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
-; VEX-NEXT:    jmp .LBB80_3
-; VEX-NEXT:  .LBB80_1:
+; VEX-NEXT:    jmp .LBB81_3
+; VEX-NEXT:  .LBB81_1:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB80_3:
+; VEX-NEXT:  .LBB81_3:
 ; VEX-NEXT:    vmovq %xmm4, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_4
+; VEX-NEXT:    js .LBB81_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm5
-; VEX-NEXT:    jmp .LBB80_6
-; VEX-NEXT:  .LBB80_4:
+; VEX-NEXT:    jmp .LBB81_6
+; VEX-NEXT:  .LBB81_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm5, %xmm4
 ; VEX-NEXT:    vaddss %xmm4, %xmm4, %xmm5
-; VEX-NEXT:  .LBB80_6:
+; VEX-NEXT:  .LBB81_6:
 ; VEX-NEXT:    vmovq %xmm3, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_7
+; VEX-NEXT:    js .LBB81_7
 ; VEX-NEXT:  # %bb.8:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
-; VEX-NEXT:    jmp .LBB80_9
-; VEX-NEXT:  .LBB80_7:
+; VEX-NEXT:    jmp .LBB81_9
+; VEX-NEXT:  .LBB81_7:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm4
 ; VEX-NEXT:    vaddss %xmm4, %xmm4, %xmm4
-; VEX-NEXT:  .LBB80_9:
+; VEX-NEXT:  .LBB81_9:
 ; VEX-NEXT:    vpextrq $1, %xmm3, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_10
+; VEX-NEXT:    js .LBB81_10
 ; VEX-NEXT:  # %bb.11:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
-; VEX-NEXT:    jmp .LBB80_12
-; VEX-NEXT:  .LBB80_10:
+; VEX-NEXT:    jmp .LBB81_12
+; VEX-NEXT:  .LBB81_10:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm3
 ; VEX-NEXT:    vaddss %xmm3, %xmm3, %xmm3
-; VEX-NEXT:  .LBB80_12:
+; VEX-NEXT:  .LBB81_12:
 ; VEX-NEXT:    vpextrq $1, %xmm1, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_13
+; VEX-NEXT:    js .LBB81_13
 ; VEX-NEXT:  # %bb.14:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm6
-; VEX-NEXT:    jmp .LBB80_15
-; VEX-NEXT:  .LBB80_13:
+; VEX-NEXT:    jmp .LBB81_15
+; VEX-NEXT:  .LBB81_13:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm6, %xmm6
 ; VEX-NEXT:    vaddss %xmm6, %xmm6, %xmm6
-; VEX-NEXT:  .LBB80_15:
+; VEX-NEXT:  .LBB81_15:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3]
 ; VEX-NEXT:    vmovq %xmm1, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_16
+; VEX-NEXT:    js .LBB81_16
 ; VEX-NEXT:  # %bb.17:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm1
-; VEX-NEXT:    jmp .LBB80_18
-; VEX-NEXT:  .LBB80_16:
+; VEX-NEXT:    jmp .LBB81_18
+; VEX-NEXT:  .LBB81_16:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm1
 ; VEX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; VEX-NEXT:  .LBB80_18:
+; VEX-NEXT:  .LBB81_18:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm5 = xmm1[0],xmm6[0],xmm1[2,3]
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm4[0],xmm2[3]
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_19
+; VEX-NEXT:    js .LBB81_19
 ; VEX-NEXT:  # %bb.20:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm2
-; VEX-NEXT:    jmp .LBB80_21
-; VEX-NEXT:  .LBB80_19:
+; VEX-NEXT:    jmp .LBB81_21
+; VEX-NEXT:  .LBB81_19:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm2
 ; VEX-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; VEX-NEXT:  .LBB80_21:
+; VEX-NEXT:  .LBB81_21:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0,1],xmm2[0],xmm5[3]
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0]
 ; VEX-NEXT:    vpextrq $1, %xmm0, %rax
 ; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    js .LBB80_22
+; VEX-NEXT:    js .LBB81_22
 ; VEX-NEXT:  # %bb.23:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm0
-; VEX-NEXT:    jmp .LBB80_24
-; VEX-NEXT:  .LBB80_22:
+; VEX-NEXT:    jmp .LBB81_24
+; VEX-NEXT:  .LBB81_22:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
 ; VEX-NEXT:    andl $1, %eax
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm7, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB80_24:
+; VEX-NEXT:  .LBB81_24:
 ; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
 ; VEX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; VEX-NEXT:    retq

From cded5737109524d7cc756ac364a29cde835942b3 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 07:38:22 +0000
Subject: [PATCH 0723/1176] [X86] Add test cases for failure to use 128-bit
 masked vcvtdq2pd when load starts as v2i32.

llvm-svn: 362202
---
 llvm/test/CodeGen/X86/avx512-cvt-widen.ll | 106 ++++++++++++++++++++++
 llvm/test/CodeGen/X86/avx512-cvt.ll       | 106 ++++++++++++++++++++++
 2 files changed, 212 insertions(+)

diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
index 1cdd9cf496164..5b991847d68d5 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
@@ -2533,3 +2533,109 @@ define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) {
   %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
   ret <16 x i32> %select
 }
+
+define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
+; SSE-LABEL: sitofp_load_2i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_load_2i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+; NOVLDQ-LABEL: test_sito2f64_mask_load:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
+; NOVLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_sito2f64_mask_load:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
+; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLDQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_sito2f64_mask_load:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_sito2f64_mask_load:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
+; DQNOVL-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
+  %mask = icmp slt <2 x i64> %c, zeroinitializer
+  %ld = load <2 x i32>, <2 x i32> *%a
+  %cvt = sitofp <2 x i32> %ld to <2 x double>
+  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
+  ret <2 x double> %sel
+}
+
+define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
+; SSE-LABEL: sitofp_load_2i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_load_2i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+; NOVLDQ-LABEL: test_uito2f64_mask_load:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
+; NOVLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; NOVLDQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_uito2f64_mask_load:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
+; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_uito2f64_mask_load:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLNODQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_uito2f64_mask_load:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; DQNOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
+  %mask = icmp slt <2 x i64> %c, zeroinitializer
+  %ld = load <2 x i32>, <2 x i32> *%a
+  %cvt = uitofp <2 x i32> %ld to <2 x double>
+  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
+  ret <2 x double> %sel
+}
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index 48293c08a5cb4..2d1202a9200e4 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -2508,3 +2508,109 @@ define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) {
   %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer
   ret <16 x i32> %select
 }
+
+define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
+; SSE-LABEL: sitofp_load_2i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_load_2i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+; NOVLDQ-LABEL: test_sito2f64_mask_load:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
+; NOVLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_sito2f64_mask_load:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
+; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLDQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_sito2f64_mask_load:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_sito2f64_mask_load:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
+; DQNOVL-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
+  %mask = icmp slt <2 x i64> %c, zeroinitializer
+  %ld = load <2 x i32>, <2 x i32> *%a
+  %cvt = sitofp <2 x i32> %ld to <2 x double>
+  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
+  ret <2 x double> %sel
+}
+
+define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
+; SSE-LABEL: sitofp_load_2i32_to_2f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_load_2i32_to_2f64:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    retq
+; NOVLDQ-LABEL: test_uito2f64_mask_load:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; NOVLDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
+; NOVLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; NOVLDQ-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT:    vzeroupper
+; NOVLDQ-NEXT:    retq
+;
+; VLDQ-LABEL: test_uito2f64_mask_load:
+; VLDQ:       # %bb.0:
+; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
+; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    retq
+;
+; VLNODQ-LABEL: test_uito2f64_mask_load:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
+; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; VLNODQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    retq
+;
+; DQNOVL-LABEL: test_uito2f64_mask_load:
+; DQNOVL:       # %bb.0:
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; DQNOVL-NEXT:    vpmovq2m %zmm0, %k1
+; DQNOVL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; DQNOVL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; DQNOVL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z}
+; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; DQNOVL-NEXT:    vzeroupper
+; DQNOVL-NEXT:    retq
+  %mask = icmp slt <2 x i64> %c, zeroinitializer
+  %ld = load <2 x i32>, <2 x i32> *%a
+  %cvt = uitofp <2 x i32> %ld to <2 x double>
+  %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer
+  ret <2 x double> %sel
+}

From 31d00d80a21ffbc5bc03c7b90de030b29660a3bd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 07:38:26 +0000
Subject: [PATCH 0724/1176] [X86] Remove patterns for
 X86VSintToFP/X86VUintToFP+loadv4f32 to v2f64.

These patterns can incorrectly narrow a volatile load from 128-bits to 64-bits.
Similar to PR42079.

Switch to using (v4i32 (bitcast (v2i64 (scalar_to_vector (loadi64))))) as the
load pattern used in the instructions.

This probably still has issues in 32-bit mode where loadi64 isn't legal. Maybe
we should use VZMOVL for widened loads even when we don't need the upper bits
as zeroes?

llvm-svn: 362203
---
 llvm/lib/Target/X86/X86InstrAVX512.td        | 63 ++++----------------
 llvm/lib/Target/X86/X86InstrSSE.td           | 14 +++--
 llvm/test/CodeGen/X86/avx512-cvt-widen.ll    | 12 ++--
 llvm/test/CodeGen/X86/avx512-cvt.ll          | 12 ++--
 llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll |  6 +-
 llvm/test/CodeGen/X86/vec_int_to_fp.ll       |  6 +-
 6 files changed, 36 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f6e4e85119200..753f1b71b07bc 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7608,7 +7608,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                           X86FoldableSchedWrite sched,
                           string Broadcast = _.BroadcastStr,
                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
-                          RegisterClass MaskRC = _.KRCWM> {
+                          RegisterClass MaskRC = _.KRCWM,
+                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
 
   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _Src.RC:$src),
@@ -7627,8 +7628,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
                          (ins MaskRC:$mask, MemOp:$src),
                          OpcodeStr#Alias, "$src", "$src",
-                         (_.VT (OpNode (_Src.VT
-                             (_Src.LdFrag addr:$src)))),
+                         LdDAG,
                          (vselect MaskRC:$mask,
                                   (_.VT (OpNode (_Src.VT
                                                  (_Src.LdFrag addr:$src)))),
@@ -7683,53 +7683,10 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
                                 X86FoldableSchedWrite sched,
                                 string Broadcast = _.BroadcastStr,
                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
-                                RegisterClass MaskRC = _.KRCWM,
-                                PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> {
-
-  defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                         (ins _Src.RC:$src),
-                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
-                         (ins MaskRC:$mask, _Src.RC:$src),
-                          OpcodeStr, "$src", "$src",
-                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
-                         (vselect MaskRC:$mask,
-                                  (_.VT (OpNode (_Src.VT _Src.RC:$src))),
-                                  _.RC:$src0),
-                         vselect, "$src0 = $dst">,
-                         EVEX, Sched<[sched]>;
-
-  defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                         (ins MemOp:$src),
-                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
-                         (ins MaskRC:$mask, MemOp:$src),
-                         OpcodeStr#Alias, "$src", "$src",
-                         (_.VT (LdFrag addr:$src)),
-                         (vselect MaskRC:$mask,
-                                  (_.VT (OpNode (_Src.VT
-                                                 (_Src.LdFrag addr:$src)))),
-                                  _.RC:$src0),
-                         vselect, "$src0 = $dst">,
-                         EVEX, Sched<[sched.Folded]>;
-
-  defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                         (ins _Src.ScalarMemOp:$src),
-                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
-                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
-                         OpcodeStr,
-                         "${src}"##Broadcast, "${src}"##Broadcast,
-                         (_.VT (OpNode (_Src.VT
-                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
-                            )),
-                         (vselect MaskRC:$mask,
-                                  (_.VT
-                                   (OpNode
-                                    (_Src.VT
-                                     (X86VBroadcast
-                                      (_Src.ScalarLdFrag addr:$src))))),
-                                  _.RC:$src0),
-                         vselect, "$src0 = $dst">,
-                         EVEX, EVEX_B, Sched<[sched.Folded]>;
-}
+                                RegisterClass MaskRC = _.KRCWM>
+  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
+                   MemOp, MaskRC,
+                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
 
 // Extend Float to Double
 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
@@ -7910,7 +7867,11 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
-                               OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
+                               OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+                               (v2f64 (OpNode128 (bc_v4i32
+                                (v2i64
+                                 (scalar_to_vector (loadi64 addr:$src))))))>,
+                               EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
                                sched.YMM>, EVEX_V256;
   }
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 18d9af8bdcd10..23aea3ea90817 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1655,7 +1655,10 @@ let hasSideEffects = 0, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+                          (v2f64 (X86VSintToFP
+                                  (bc_v4i32
+                                   (v2i64 (scalar_to_vector
+                                           (loadi64 addr:$src)))))))]>,
                         VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1679,7 +1682,10 @@ let hasSideEffects = 0, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+                         (v2f64 (X86VSintToFP
+                                 (bc_v4i32
+                                  (v2i64 (scalar_to_vector
+                                          (loadi64 addr:$src)))))))]>,
                        Sched<[WriteCvtI2PDLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1689,16 +1695,12 @@ def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 
 // AVX register conversion intrinsics
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (VCVTDQ2PDrm addr:$src)>;
   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
             (VCVTDQ2PDrm addr:$src)>;
 } // Predicates = [HasAVX, NoVLX]
 
 // SSE2 register conversion intrinsics
 let Predicates = [UseSSE2] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
-            (CVTDQ2PDrm addr:$src)>;
   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
             (CVTDQ2PDrm addr:$src)>;
 } // Predicates = [UseSSE2]
diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
index 5b991847d68d5..90631baaf47eb 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
@@ -2558,16 +2558,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
 ; VLDQ-LABEL: test_sito2f64_mask_load:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
-; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLDQ-NEXT:    retq
 ;
 ; VLNODQ-LABEL: test_sito2f64_mask_load:
 ; VLNODQ:       # %bb.0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    retq
 ;
 ; DQNOVL-LABEL: test_sito2f64_mask_load:
@@ -2611,16 +2609,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
 ; VLDQ-LABEL: test_uito2f64_mask_load:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
-; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLDQ-NEXT:    retq
 ;
 ; VLNODQ-LABEL: test_uito2f64_mask_load:
 ; VLNODQ:       # %bb.0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    retq
 ;
 ; DQNOVL-LABEL: test_uito2f64_mask_load:
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index 2d1202a9200e4..2b5112650a969 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -2533,16 +2533,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
 ; VLDQ-LABEL: test_sito2f64_mask_load:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
-; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLDQ-NEXT:    retq
 ;
 ; VLNODQ-LABEL: test_sito2f64_mask_load:
 ; VLNODQ:       # %bb.0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT:    vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    retq
 ;
 ; DQNOVL-LABEL: test_sito2f64_mask_load:
@@ -2586,16 +2584,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
 ; VLDQ-LABEL: test_uito2f64_mask_load:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vpmovq2m %xmm0, %k1
-; VLDQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLDQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLDQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLDQ-NEXT:    retq
 ;
 ; VLNODQ-LABEL: test_uito2f64_mask_load:
 ; VLNODQ:       # %bb.0:
 ; VLNODQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; VLNODQ-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
-; VLNODQ-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; VLNODQ-NEXT:    vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT:    vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
 ; VLNODQ-NEXT:    retq
 ;
 ; DQNOVL-LABEL: test_uito2f64_mask_load:
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
index eb85beb53988f..78bc214358bc6 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
@@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
 define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
 ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    movaps (%rdi), %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    vmovaps (%rdi), %xmm0
+; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %ld = load volatile <4 x i32>, <4 x i32> *%a
   %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index d394383ddc22f..9b543075f3b29 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
 define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
 ; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    cvtdq2pd (%rdi), %xmm0
+; SSE-NEXT:    movaps (%rdi), %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtdq2pd (%rdi), %xmm0
+; AVX-NEXT:    vmovaps (%rdi), %xmm0
+; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %ld = load volatile <4 x i32>, <4 x i32> *%a
   %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>

From f4a6dd28b6a801d4c7dd8b8aaa33f54d2258171d Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Fri, 31 May 2019 08:06:17 +0000
Subject: [PATCH 0725/1176] [MIPS GlobalISel] Lower call for callee that is
 register

Lower call for callee that is register for MIPS32.
Register should contain callee function address.

Differential Revision: https://reviews.llvm.org/D62585

llvm-svn: 362204
---
 llvm/lib/Target/Mips/MipsCallLowering.cpp     | 14 +++++++------
 .../Mips/GlobalISel/irtranslator/call.ll      | 20 +++++++++++++++++++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 0cee6e732ec0b..04aff60b2f23d 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -522,12 +522,8 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   MachineInstrBuilder CallSeqStart =
       MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN);
 
-  // FIXME: Add support for pic calling sequences, long call sequences for O32,
-  //       N32 and N64. First handle the case when Callee.isReg().
-  if (Callee.isReg())
-    return false;
-
-  MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(Mips::JAL);
+  MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(
+      Callee.isReg() ? Mips::JALRPseudo : Mips::JAL);
   MIB.addDef(Mips::SP, RegState::Implicit);
   MIB.add(Callee);
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
@@ -573,6 +569,12 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   CallSeqStart.addImm(NextStackOffset).addImm(0);
 
   MIRBuilder.insertInstr(MIB);
+  if (MIB->getOpcode() == Mips::JALRPseudo) {
+    const MipsSubtarget &STI =
+        static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+    MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+                         *STI.getRegBankInfo());
+  }
 
   if (OrigRet.Reg) {
 
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
index b1ac25827a6e6..1d901db54a8ff 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
@@ -25,3 +25,23 @@ entry:
   %doublez = add i32 %z, %z
   ret i32 %doublez
 }
+
+define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
+  ; MIPS32-LABEL: name: call_reg
+  ; MIPS32: bb.1.entry:
+  ; MIPS32:   liveins: $a0, $a1, $a2
+  ; MIPS32:   [[COPY:%[0-9]+]]:gpr32(p0) = COPY $a0
+  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   $a0 = COPY [[COPY1]](s32)
+  ; MIPS32:   $a1 = COPY [[COPY2]](s32)
+  ; MIPS32:   JALRPseudo [[COPY]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   $v0 = COPY [[COPY3]](s32)
+  ; MIPS32:   RetRA implicit $v0
+entry:
+  %call = call i32 %f_ptr(i32 %x, i32 %y)
+  ret i32 %call
+}

From b457e430f3fc8579a41d76a01a2272b11be29f79 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 08:10:23 +0000
Subject: [PATCH 0726/1176] [InstructionSimplify] Add missing implementation of
 llvm::SimplifyUnOp. NFC

There are no callers currently, but the function is declared so we should at
least implement it.

llvm-svn: 362205
---
 llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 1f8245d30f6ff..9eb0e908aab43 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4589,6 +4589,10 @@ static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
   }
 }
 
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+  return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
+}
+
 Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
                             const SimplifyQuery &Q) {
   return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);

From 9058b50fb2d33a89ae711071e82e2e85a65d0081 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Fri, 31 May 2019 08:15:28 +0000
Subject: [PATCH 0727/1176] [mips] Move initGlobalBaseReg to MipsFunctionInfo.
 NFC

Move initGlobalBaseReg from MipsSEDAGToDAGISel to MipsFunctionInfo.
This way functions used for handling position independent code during
instruction selection, getGlobalBaseReg and initGlobalBaseReg,
end up in same class.

Differential Revision: https://reviews.llvm.org/D62586

llvm-svn: 362206
---
 llvm/lib/Target/Mips/MipsMachineFunction.cpp | 88 +++++++++++++++++++
 llvm/lib/Target/Mips/MipsMachineFunction.h   |  4 +
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp  | 91 +-------------------
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h    |  4 -
 4 files changed, 93 insertions(+), 94 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 86c90d6538115..57749054b5f94 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -51,6 +51,94 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
   return GlobalBaseReg;
 }
 
+void MipsFunctionInfo::initGlobalBaseReg() {
+  if (!GlobalBaseReg)
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  DebugLoc DL;
+  unsigned V0, V1;
+  const TargetRegisterClass *RC;
+  const MipsABIInfo &ABI =
+      static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
+  RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+
+  V0 = RegInfo.createVirtualRegister(RC);
+  V1 = RegInfo.createVirtualRegister(RC);
+
+  if (ABI.IsN64()) {
+    MF.getRegInfo().addLiveIn(Mips::T9_64);
+    MBB.addLiveIn(Mips::T9_64);
+
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // daddu $v1, $v0, $t9
+    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = &MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+        .addReg(Mips::T9_64);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  if (!MF.getTarget().isPositionIndependent()) {
+    // Set global register to __gnu_local_gp.
+    //
+    // lui   $v0, %hi(__gnu_local_gp)
+    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+        .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+        .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+    return;
+  }
+
+  MF.getRegInfo().addLiveIn(Mips::T9);
+  MBB.addLiveIn(Mips::T9);
+
+  if (ABI.IsN32()) {
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // addu $v1, $v0, $t9
+    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = &MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    return;
+  }
+
+  assert(ABI.IsO32());
+
+  // For O32 ABI, the following instruction sequence is emitted to initialize
+  // the global base register:
+  //
+  //  0. lui   $2, %hi(_gp_disp)
+  //  1. addiu $2, $2, %lo(_gp_disp)
+  //  2. addu  $globalbasereg, $2, $t9
+  //
+  // We emit only the last instruction here.
+  //
+  // GNU linker requires that the first two instructions appear at the beginning
+  // of a function and no instructions be inserted before or between them.
+  // The two instructions are emitted during lowering to MC layer in order to
+  // avoid any reordering.
+  //
+  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+  // reads it.
+  MF.getRegInfo().addLiveIn(Mips::V0);
+  MBB.addLiveIn(Mips::V0);
+  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+      .addReg(Mips::V0).addReg(Mips::T9);
+}
+
 void MipsFunctionInfo::createEhDataRegsFI() {
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   for (int I = 0; I < 4; ++I) {
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h
index 9e7b9b704eac5..b2c6c8d4f338e 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -34,6 +34,10 @@ class MipsFunctionInfo : public MachineFunctionInfo {
   bool globalBaseRegSet() const;
   unsigned getGlobalBaseReg();
 
+  // Insert instructions to initialize the global base register in the
+  // first MBB of the function.
+  void initGlobalBaseReg();
+
   int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
   void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 5308ab8c4b3a3..cc6efe57eff53 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -134,97 +134,8 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
   return true;
 }
 
-void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
-  if (!MipsFI->globalBaseRegSet())
-    return;
-
-  MachineBasicBlock &MBB = MF.front();
-  MachineBasicBlock::iterator I = MBB.begin();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL;
-  unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
-  const TargetRegisterClass *RC;
-  const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
-  RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
-
-  V0 = RegInfo.createVirtualRegister(RC);
-  V1 = RegInfo.createVirtualRegister(RC);
-
-  if (ABI.IsN64()) {
-    MF.getRegInfo().addLiveIn(Mips::T9_64);
-    MBB.addLiveIn(Mips::T9_64);
-
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // daddu $v1, $v0, $t9
-    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = &MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
-      .addReg(Mips::T9_64);
-    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  if (!MF.getTarget().isPositionIndependent()) {
-    // Set global register to __gnu_local_gp.
-    //
-    // lui   $v0, %hi(__gnu_local_gp)
-    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
-      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
-    return;
-  }
-
-  MF.getRegInfo().addLiveIn(Mips::T9);
-  MBB.addLiveIn(Mips::T9);
-
-  if (ABI.IsN32()) {
-    // lui $v0, %hi(%neg(%gp_rel(fname)))
-    // addu $v1, $v0, $t9
-    // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
-    const GlobalValue *FName = &MF.getFunction();
-    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
-    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
-      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    return;
-  }
-
-  assert(ABI.IsO32());
-
-  // For O32 ABI, the following instruction sequence is emitted to initialize
-  // the global base register:
-  //
-  //  0. lui   $2, %hi(_gp_disp)
-  //  1. addiu $2, $2, %lo(_gp_disp)
-  //  2. addu  $globalbasereg, $2, $t9
-  //
-  // We emit only the last instruction here.
-  //
-  // GNU linker requires that the first two instructions appear at the beginning
-  // of a function and no instructions be inserted before or between them.
-  // The two instructions are emitted during lowering to MC layer in order to
-  // avoid any reordering.
-  //
-  // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
-  // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
-  // reads it.
-  MF.getRegInfo().addLiveIn(Mips::V0);
-  MBB.addLiveIn(Mips::V0);
-  BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
-    .addReg(Mips::V0).addReg(Mips::T9);
-}
-
 void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
-  initGlobalBaseReg(MF);
+  MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg();
 
   MachineRegisterInfo *MRI = &MF.getRegInfo();
 
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 72cbde83addb5..ce594e1fb4fa5 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -130,10 +130,6 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
 
   void processFunctionAfterISel(MachineFunction &MF) override;
 
-  // Insert instructions to initialize the global base register in the
-  // first MBB of the function.
-  void initGlobalBaseReg(MachineFunction &MF);
-
   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;

From 750d148e8fe0c35f850a51a5f75297110ef008ad Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 31 May 2019 08:20:43 +0000
Subject: [PATCH 0728/1176] [ELF][test] Restore linkerscript/symbol-location.s
 to test getLinkerScriptLocation()

The test (the only test that checks getLinkerScriptLocation()) deleted
by r358652 can be restored by replacing R_X86_64_PLT32 with
R_X86_64_PC32, and changing -pie to -shared (preemptable). Then, the
symbol will not be a link-time constant and a -fPIC error will be
issued.

llvm-svn: 362207
---
 lld/test/ELF/linkerscript/symbol-location.s | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 lld/test/ELF/linkerscript/symbol-location.s

diff --git a/lld/test/ELF/linkerscript/symbol-location.s b/lld/test/ELF/linkerscript/symbol-location.s
new file mode 100644
index 0000000000000..91070d8ec6df9
--- /dev/null
+++ b/lld/test/ELF/linkerscript/symbol-location.s
@@ -0,0 +1,16 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo 'foo = _start;' > %t.script
+# RUN: not ld.lld -shared -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s
+
+## Here we check that symbol 'foo' location is reported properly.
+
+# CHECK: error: relocation R_X86_64_PC32 cannot be used against symbol foo
+# CHECK: >>> defined in {{.*}}.script:1
+# CHECK: >>> referenced by {{.*}}.o:(.text+0x1)
+
+.text
+.globl _start
+_start:
+  .byte 0xe8
+  .long foo - .

From 7c1ac8269ac48321d6b2e53ae0d3e0af46c498f1 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 31 May 2019 08:23:48 +0000
Subject: [PATCH 0729/1176] [NFC][Codegen] Add/sub constant-folding: add scalar
 tests too

Just for completeness.

llvm-svn: 362208
---
 .../AArch64/addsub-constant-folding.ll        | 438 ++++++++---
 .../CodeGen/X86/addsub-constant-folding.ll    | 697 +++++++++++++++---
 2 files changed, 953 insertions(+), 182 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
index 47a236e373be2..bb3b2ebe8ecd1 100644
--- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll
@@ -1,13 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
 
-declare void @use(<4 x i32> %arg)
+declare void @use(i32 %arg)
+declare void @vec_use(<4 x i32> %arg)
 
 ; (x+c1)+c2
 
-define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+define i32 @add_const_add_const(i32 %arg) {
 ; CHECK-LABEL: add_const_add_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, #10 // =10
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: add_const_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    add w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    add w0, w19, #10 // =10
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_add_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #10
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
@@ -16,8 +46,8 @@ define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_add_const_extrause:
+define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -26,7 +56,7 @@ define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #10
@@ -34,16 +64,16 @@ define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_add_const_nonsplat:
+define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_add_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI2_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -53,9 +83,38 @@ define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (x+c1)-c2
 
-define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+define i32 @add_const_sub_const(i32 %arg) {
 ; CHECK-LABEL: add_const_sub_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w0, w0, #6 // =6
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: add_const_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    add w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    add w0, w19, #6 // =6
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_sub_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #6
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
@@ -64,8 +123,8 @@ define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_sub_const_extrause:
+define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -74,7 +133,7 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #6
@@ -82,16 +141,16 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_sub_const_nonsplat:
+define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_sub_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    adrp x8, .LCPI9_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI9_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -101,9 +160,40 @@ define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(x+c1)
 
-define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
+define i32 @add_const_const_sub(i32 %arg) {
 ; CHECK-LABEL: add_const_const_sub:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-6
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @add_const_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: add_const_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    add w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    mov w8, #-6
+; CHECK-NEXT:    sub w0, w8, w19
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_const_sub:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.4s, #5
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -112,8 +202,8 @@ define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_const_sub_extrause:
+define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -122,7 +212,7 @@ define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    mvni v0.4s, #5
@@ -130,16 +220,16 @@ define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: add_const_const_sub_nonsplat:
+define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_add_const_const_sub_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -149,9 +239,38 @@ define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
 
 ; (x-c1)+c2
 
-define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+define i32 @sub_const_add_const(i32 %arg) {
 ; CHECK-LABEL: sub_const_add_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, #6 // =6
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: sub_const_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    sub w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    sub w0, w19, #6 // =6
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_add_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.4s, #5
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
@@ -160,8 +279,8 @@ define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_add_const_extrause:
+define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -170,7 +289,7 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    mvni v0.4s, #5
@@ -178,16 +297,16 @@ define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_add_const_nonsplat:
+define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_add_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI11_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    adrp x8, .LCPI19_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -197,9 +316,38 @@ define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (x-c1)-c2
 
-define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
+define i32 @sub_const_sub_const(i32 %arg) {
 ; CHECK-LABEL: sub_const_sub_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, #10 // =10
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: sub_const_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    sub w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    sub w0, w19, #10 // =10
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_sub_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #10
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
@@ -208,8 +356,8 @@ define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_sub_const_extrause:
+define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -218,7 +366,7 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #10
@@ -226,16 +374,16 @@ define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_sub_const_nonsplat:
+define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_sub_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    adrp x8, .LCPI24_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI24_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -245,9 +393,40 @@ define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(x-c1)
 
-define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
+define i32 @sub_const_const_sub(i32 %arg) {
 ; CHECK-LABEL: sub_const_const_sub:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @sub_const_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: sub_const_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    sub w0, w0, #8 // =8
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    sub w0, w8, w19
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_const_sub:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #10
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -256,8 +435,8 @@ define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_const_sub_extrause:
+define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -266,7 +445,7 @@ define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #2
@@ -274,16 +453,16 @@ define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: sub_const_const_sub_nonsplat:
+define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_sub_const_const_sub_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT:    adrp x8, .LCPI29_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI29_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
@@ -293,9 +472,41 @@ define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
 
 ; (c1-x)+c2
 
-define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
+define i32 @const_sub_add_const(i32 %arg) {
 ; CHECK-LABEL: const_sub_add_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: const_sub_add_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    sub w0, w8, w19
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_add_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #10
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -304,8 +515,8 @@ define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_add_const_extrause:
+define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_add_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -314,7 +525,7 @@ define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #10
@@ -322,16 +533,16 @@ define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_add_const_nonsplat:
+define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_add_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI20_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI20_0]
+; CHECK-NEXT:    adrp x8, .LCPI34_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI34_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
@@ -341,9 +552,41 @@ define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (c1-x)-c2
 
-define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
+define i32 @const_sub_sub_const(i32 %arg) {
 ; CHECK-LABEL: const_sub_sub_const:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: const_sub_sub_const_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    mov w8, #6
+; CHECK-NEXT:    sub w0, w8, w19
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_sub_const:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #6
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -352,8 +595,8 @@ define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_sub_const_extrause:
+define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_sub_const_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -362,7 +605,7 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #6
@@ -370,16 +613,16 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_sub_const_nonsplat:
+define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_sub_const_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI23_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT:    adrp x8, .LCPI39_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI39_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
@@ -389,9 +632,40 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(c1-x)
 
-define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
+define i32 @const_sub_const_sub(i32 %arg) {
 ; CHECK-LABEL: const_sub_const_sub:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w0, w0, #6 // =6
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @const_sub_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: const_sub_const_sub_extrause:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x19, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w19, -16
+; CHECK-NEXT:    mov w8, #8
+; CHECK-NEXT:    sub w19, w8, w0
+; CHECK-NEXT:    mov w0, w19
+; CHECK-NEXT:    bl use
+; CHECK-NEXT:    mov w8, #2
+; CHECK-NEXT:    sub w0, w8, w19
+; CHECK-NEXT:    ldp x19, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_const_sub:
+; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.4s, #5
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
@@ -400,8 +674,8 @@ define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_const_sub_extrause:
+define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_const_sub_extrause:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32 // =32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -410,7 +684,7 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    movi v1.4s, #8
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    bl use
+; CHECK-NEXT:    bl vec_use
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    movi v0.4s, #2
@@ -418,16 +692,16 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-NEXT:    add sp, sp, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
-; CHECK-LABEL: const_sub_const_sub_nonsplat:
+define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: vec_const_sub_const_sub_nonsplat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI26_0
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI26_0]
+; CHECK-NEXT:    adrp x8, .LCPI44_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI44_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
diff --git a/llvm/test/CodeGen/X86/addsub-constant-folding.ll b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
index 3c48494ae39de..09cd2bcea962d 100644
--- a/llvm/test/CodeGen/X86/addsub-constant-folding.ll
+++ b/llvm/test/CodeGen/X86/addsub-constant-folding.ll
@@ -2,18 +2,73 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,SSE,X86-SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE,X64-SSE
 
-declare void @use(<4 x i32> %arg)
+declare void @use(i32 %arg)
+declare void @vec_use(<4 x i32> %arg)
 
 ; (x+c1)+c2
 
-define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
+define i32 @add_const_add_const(i32 %arg) {
 ; X86-LABEL: add_const_add_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $10, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_add_const:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 10(%rdi), %eax
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_add_const_extrause(i32 %arg) {
+; X86-LABEL: add_const_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal 8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    addl $10, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal 8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    leal 10(%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_add_const_add_const:
+; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -21,8 +76,8 @@ define <4 x i32> @add_const_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: add_const_add_const_extrause:
+define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_add_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -30,14 +85,14 @@ define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X86-NEXT:    paddd %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_add_const_extrause:
+; X64-LABEL: vec_add_const_add_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -45,25 +100,25 @@ define <4 x i32> @add_const_add_const_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X64-NEXT:    paddd %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: add_const_add_const_nonsplat:
+define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_add_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_add_const_nonsplat:
+; X64-LABEL: vec_add_const_add_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
@@ -74,14 +129,68 @@ define <4 x i32> @add_const_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (x+c1)-c2
 
-define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
+define i32 @add_const_sub_const(i32 %arg) {
 ; X86-LABEL: add_const_sub_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $6, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: add_const_sub_const:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 6(%rdi), %eax
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_sub_const_extrause(i32 %arg) {
+; X86-LABEL: add_const_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal 8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    addl $6, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal 8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    leal 6(%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_add_const_sub_const:
+; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -89,8 +198,8 @@ define <4 x i32> @add_const_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: add_const_sub_const_extrause:
+define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_sub_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -98,14 +207,14 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X86-NEXT:    paddd %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_sub_const_extrause:
+; X64-LABEL: vec_add_const_sub_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -113,25 +222,25 @@ define <4 x i32> @add_const_sub_const_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X64-NEXT:    paddd %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: add_const_sub_const_nonsplat:
+define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_sub_const_nonsplat:
+; X64-LABEL: vec_add_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
@@ -142,15 +251,70 @@ define <4 x i32> @add_const_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(x+c1)
 
-define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
+define i32 @add_const_const_sub(i32 %arg) {
 ; X86-LABEL: add_const_const_sub:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl $-6, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_const_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $-6, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @add_const_const_sub_extrause(i32 %arg) {
+; X86-LABEL: add_const_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal 8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    movl $-6, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: add_const_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal 8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    movl $-6, %eax
+; X64-NEXT:    subl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_const_sub:
+; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_const_sub:
+; X64-LABEL: vec_add_const_const_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [4294967290,4294967290,4294967290,4294967290]
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -161,8 +325,8 @@ define <4 x i32> @add_const_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
-; X86-LABEL: add_const_const_sub_extrause:
+define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_const_sub_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -170,7 +334,7 @@ define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X86-NEXT:    paddd %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
 ; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
 ; X86-NEXT:    psubd %xmm1, %xmm0
@@ -178,7 +342,7 @@ define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_const_sub_extrause:
+; X64-LABEL: vec_add_const_const_sub_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -186,27 +350,27 @@ define <4 x i32> @add_const_const_sub_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X64-NEXT:    paddd %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [4294967290,4294967290,4294967290,4294967290]
 ; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: add_const_const_sub_nonsplat:
+define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_add_const_const_sub_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: add_const_const_sub_nonsplat:
+; X64-LABEL: vec_add_const_const_sub_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = <4294967277,u,u,4294967290>
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -219,14 +383,68 @@ define <4 x i32> @add_const_const_sub_nonsplat(<4 x i32> %arg) {
 
 ; (x-c1)+c2
 
-define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
+define i32 @sub_const_add_const(i32 %arg) {
 ; X86-LABEL: sub_const_add_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $-6, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_add_const:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -6(%rdi), %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_add_const_extrause(i32 %arg) {
+; X86-LABEL: sub_const_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal -8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    addl $-6, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal -8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    leal -6(%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_add_const:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_sub_const_add_const:
+; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -234,45 +452,45 @@ define <4 x i32> @sub_const_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_add_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: sub_const_add_const_extrause:
+define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_add_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_add_const_extrause:
+; X64-LABEL: vec_sub_const_add_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: sub_const_add_const_nonsplat:
+define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_add_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_add_const_nonsplat:
+; X64-LABEL: vec_sub_const_add_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
@@ -283,14 +501,68 @@ define <4 x i32> @sub_const_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (x-c1)-c2
 
-define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
+define i32 @sub_const_sub_const(i32 %arg) {
 ; X86-LABEL: sub_const_sub_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $-10, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_const_sub_const:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -10(%rdi), %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_sub_const_extrause(i32 %arg) {
+; X86-LABEL: sub_const_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal -8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    addl $-10, %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal -8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    leal -10(%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_sub_const:
+; X86:       # %bb.0:
+; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_sub_const_sub_const:
+; X64:       # %bb.0:
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -298,45 +570,45 @@ define <4 x i32> @sub_const_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_sub_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: sub_const_sub_const_extrause:
+define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_sub_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqu (%esp), %xmm0 # 16-byte Reload
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    addl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_sub_const_extrause:
+; X64-LABEL: vec_sub_const_sub_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: sub_const_sub_const_nonsplat:
+define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_sub_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_sub_const_nonsplat:
+; X64-LABEL: vec_sub_const_sub_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
@@ -347,15 +619,70 @@ define <4 x i32> @sub_const_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(x-c1)
 
-define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
+define i32 @sub_const_const_sub(i32 %arg) {
 ; X86-LABEL: sub_const_const_sub:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl $10, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_const_sub:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $10, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @sub_const_const_sub_extrause(i32 %arg) {
+; X86-LABEL: sub_const_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    leal -8(%esi), %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    movl $10, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: sub_const_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    leal -8(%rbx), %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    movl $10, %eax
+; X64-NEXT:    subl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_const_sub:
+; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_const_sub:
+; X64-LABEL: vec_sub_const_const_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -366,14 +693,14 @@ define <4 x i32> @sub_const_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
-; X86-LABEL: sub_const_const_sub_extrause:
+define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_const_sub_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
 ; X86-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
 ; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
 ; X86-NEXT:    psubd %xmm1, %xmm0
@@ -381,33 +708,33 @@ define <4 x i32> @sub_const_const_sub_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_const_sub_extrause:
+; X64-LABEL: vec_sub_const_const_sub_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
 ; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: sub_const_const_sub_nonsplat:
+define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_sub_const_const_sub_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: sub_const_const_sub_nonsplat:
+; X64-LABEL: vec_sub_const_const_sub_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -420,15 +747,72 @@ define <4 x i32> @sub_const_const_sub_nonsplat(<4 x i32> %arg) {
 
 ; (c1-x)+c2
 
-define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
+define i32 @const_sub_add_const(i32 %arg) {
 ; X86-LABEL: const_sub_add_const:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl $10, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_add_const:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $10, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_add_const_extrause(i32 %arg) {
+; X86-LABEL: const_sub_add_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $8, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    movl $10, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_add_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    movl $8, %edi
+; X64-NEXT:    subl %ebx, %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    movl $10, %eax
+; X64-NEXT:    subl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_add_const:
+; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_add_const:
+; X64-LABEL: vec_const_sub_add_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [10,10,10,10]
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -439,8 +823,8 @@ define <4 x i32> @const_sub_add_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: const_sub_add_const_extrause:
+define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_add_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -448,7 +832,7 @@ define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X86-NEXT:    psubd %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
 ; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
 ; X86-NEXT:    psubd %xmm1, %xmm0
@@ -456,7 +840,7 @@ define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_add_const_extrause:
+; X64-LABEL: vec_const_sub_add_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -464,27 +848,27 @@ define <4 x i32> @const_sub_add_const_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [10,10,10,10]
 ; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: const_sub_add_const_nonsplat:
+define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_add_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_add_const_nonsplat:
+; X64-LABEL: vec_const_sub_add_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = <23,u,u,10>
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -497,15 +881,72 @@ define <4 x i32> @const_sub_add_const_nonsplat(<4 x i32> %arg) {
 
 ; (c1-x)-c2
 
-define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
+define i32 @const_sub_sub_const(i32 %arg) {
 ; X86-LABEL: const_sub_sub_const:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl $6, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_sub_const:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $6, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_sub_const_extrause(i32 %arg) {
+; X86-LABEL: const_sub_sub_const_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl $8, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    movl $6, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_sub_const_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    movl $8, %edi
+; X64-NEXT:    subl %ebx, %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    movl $6, %eax
+; X64-NEXT:    subl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_sub_const:
+; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_sub_const:
+; X64-LABEL: vec_const_sub_sub_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -516,8 +957,8 @@ define <4 x i32> @const_sub_sub_const(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
-; X86-LABEL: const_sub_sub_const_extrause:
+define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_sub_const_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -525,7 +966,7 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    movdqu %xmm0, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X86-NEXT:    psubd %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
 ; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
 ; X86-NEXT:    psubd %xmm1, %xmm0
@@ -533,7 +974,7 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_sub_const_extrause:
+; X64-LABEL: vec_const_sub_sub_const_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -541,27 +982,27 @@ define <4 x i32> @const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [8,8,8,8]
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [6,6,6,6]
 ; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: const_sub_sub_const_nonsplat:
+define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_sub_const_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movdqa {{.*#+}} xmm1 = <19,u,u,6>
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqa %xmm1, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_sub_const_nonsplat:
+; X64-LABEL: vec_const_sub_sub_const_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movdqa {{.*#+}} xmm1 = <19,u,u,6>
 ; X64-NEXT:    psubd %xmm0, %xmm1
@@ -574,14 +1015,70 @@ define <4 x i32> @const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 
 ; c2-(c1-x)
 
-define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
+define i32 @const_sub_const_sub(i32 %arg) {
 ; X86-LABEL: const_sub_const_sub:
 ; X86:       # %bb.0:
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl $-6, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: const_sub_const_sub:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -6(%rdi), %eax
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @const_sub_const_sub_extrause(i32 %arg) {
+; X86-LABEL: const_sub_const_sub_extrause:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl $8, %esi
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_adjust_cfa_offset 4
+; X86-NEXT:    calll use
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    .cfi_adjust_cfa_offset -4
+; X86-NEXT:    movl $2, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: const_sub_const_sub_extrause:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    .cfi_offset %rbx, -16
+; X64-NEXT:    movl $8, %ebx
+; X64-NEXT:    subl %edi, %ebx
+; X64-NEXT:    movl %ebx, %edi
+; X64-NEXT:    callq use
+; X64-NEXT:    movl $2, %eax
+; X64-NEXT:    subl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_const_sub:
+; X86:       # %bb.0:
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_const_sub_const_sub:
+; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -589,8 +1086,8 @@ define <4 x i32> @const_sub_const_sub(<4 x i32> %arg) {
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
-; X86-LABEL: const_sub_const_sub_extrause:
+define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_const_sub_extrause:
 ; X86:       # %bb.0:
 ; X86-NEXT:    subl $28, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 32
@@ -598,7 +1095,7 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    psubd %xmm0, %xmm1
 ; X86-NEXT:    movdqu %xmm1, (%esp) # 16-byte Spill
 ; X86-NEXT:    movdqa %xmm1, %xmm0
-; X86-NEXT:    calll use
+; X86-NEXT:    calll vec_use
 ; X86-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
 ; X86-NEXT:    movdqu (%esp), %xmm1 # 16-byte Reload
 ; X86-NEXT:    psubd %xmm1, %xmm0
@@ -606,7 +1103,7 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_const_sub_extrause:
+; X64-LABEL: vec_const_sub_const_sub_extrause:
 ; X64:       # %bb.0:
 ; X64-NEXT:    subq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 32
@@ -614,25 +1111,25 @@ define <4 x i32> @const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; X64-NEXT:    psubd %xmm0, %xmm1
 ; X64-NEXT:    movdqa %xmm1, (%rsp) # 16-byte Spill
 ; X64-NEXT:    movdqa %xmm1, %xmm0
-; X64-NEXT:    callq use
+; X64-NEXT:    callq vec_use
 ; X64-NEXT:    movdqa {{.*#+}} xmm0 = [2,2,2,2]
 ; X64-NEXT:    psubd (%rsp), %xmm0 # 16-byte Folded Reload
 ; X64-NEXT:    addq $24, %rsp
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
-  call void @use(<4 x i32> %t0)
+  call void @vec_use(<4 x i32> %t0)
   %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
   ret <4 x i32> %t1
 }
 
-define <4 x i32> @const_sub_const_sub_nonsplat(<4 x i32> %arg) {
-; X86-LABEL: const_sub_const_sub_nonsplat:
+define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
+; X86-LABEL: vec_const_sub_const_sub_nonsplat:
 ; X86:       # %bb.0:
 ; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: const_sub_const_sub_nonsplat:
+; X64-LABEL: vec_const_sub_const_sub_nonsplat:
 ; X64:       # %bb.0:
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq

From d1d915b8da91e3e527706c838e2cb4b338336e7a Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 31 May 2019 08:24:07 +0000
Subject: [PATCH 0730/1176] [NFC][InstCombine] Copy add/sub constant-folding
 tests from codegen

Last three patterns are missed.

llvm-svn: 362209
---
 .../InstCombine/addsub-constant-folding.ll    | 536 ++++++++++++++++++
 1 file changed, 536 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/addsub-constant-folding.ll

diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
new file mode 100644
index 0000000000000..fc98f249fc524
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
@@ -0,0 +1,536 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @use(i32 %arg)
+declare void @vec_use(<4 x i32> %arg)
+
+; (x+c1)+c2
+
+define i32 @add_const_add_const(i32 %arg) {
+; CHECK-LABEL: @add_const_add_const(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], 10
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: @add_const_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], 8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG]], 10
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_add_const(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_add_const_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 23, i32 undef, i32 undef, i32 10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x+c1)-c2
+
+define i32 @add_const_sub_const(i32 %arg) {
+; CHECK-LABEL: @add_const_sub_const(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], 6
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @add_const_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: @add_const_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], 8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG]], 6
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_sub_const(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_sub_const_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 19, i32 undef, i32 undef, i32 6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; c2-(x+c1)
+
+define i32 @add_const_const_sub(i32 %arg) {
+; CHECK-LABEL: @add_const_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 -6, [[ARG:%.*]]
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @add_const_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: @add_const_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], 8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 -6, [[ARG]]
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = add i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>, [[ARG:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>, [[ARG]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_add_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_add_const_const_sub_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 -19, i32 undef, i32 undef, i32 -6>, [[ARG:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = add <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (x-c1)+c2
+
+define i32 @sub_const_add_const(i32 %arg) {
+; CHECK-LABEL: @sub_const_add_const(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], -6
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: @sub_const_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG]], -6
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_add_const(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_add_const_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -19, i32 undef, i32 undef, i32 -6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (x-c1)-c2
+
+define i32 @sub_const_sub_const(i32 %arg) {
+; CHECK-LABEL: @sub_const_sub_const(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], -10
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @sub_const_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: @sub_const_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG]], -10
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_sub_const(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -10, i32 -10, i32 -10, i32 -10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -10, i32 -10, i32 -10, i32 -10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_sub_const_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -23, i32 undef, i32 undef, i32 -10>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; c2-(x-c1)
+
+define i32 @sub_const_const_sub(i32 %arg) {
+; CHECK-LABEL: @sub_const_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 10, [[ARG:%.*]]
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @sub_const_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: @sub_const_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add i32 [[ARG:%.*]], -8
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 10, [[ARG]]
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 %arg, 8
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_sub_const_const_sub_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 23, i32 undef, i32 undef, i32 10>, [[ARG:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> %arg, <i32 21, i32 undef, i32 8, i32 8>
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+; (c1-x)+c2
+; FIXME
+
+define i32 @const_sub_add_const(i32 %arg) {
+; CHECK-LABEL: @const_sub_add_const(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], 2
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_add_const_extrause(i32 %arg) {
+; CHECK-LABEL: @const_sub_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], 2
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = add i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_add_const(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_add_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = add <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_add_const_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 3, i32 undef, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = add <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; (c1-x)-c2
+; FIXME
+
+define i32 @const_sub_sub_const(i32 %arg) {
+; CHECK-LABEL: @const_sub_sub_const(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], -2
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define i32 @const_sub_sub_const_extrause(i32 %arg) {
+; CHECK-LABEL: @const_sub_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], -2
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 %t0, 2
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_sub_const(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_sub_const_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_sub_const_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -3, i32 undef, i32 -2>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> %t0, <i32 2, i32 3, i32 undef, i32 2>
+  ret <4 x i32> %t1
+}
+
+; c2-(c1-x)
+; FIXME
+
+define i32 @const_sub_const_sub(i32 %arg) {
+; CHECK-LABEL: @const_sub_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG:%.*]], -6
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define i32 @const_sub_const_sub_extrause(i32 %arg) {
+; CHECK-LABEL: @const_sub_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
+; CHECK-NEXT:    call void @use(i32 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 2, [[T0]]
+; CHECK-NEXT:    ret i32 [[T1]]
+;
+  %t0 = sub i32 8, %arg
+  call void @use(i32 %t0)
+  %t1 = sub i32 2, %t0
+  ret i32 %t1
+}
+
+define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_const_sub(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_const_sub_extrause(
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, [[T0]]
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
+  call void @vec_use(<4 x i32> %t0)
+  %t1 = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %t0
+  ret <4 x i32> %t1
+}
+
+define <4 x i32> @vec_const_sub_const_sub_nonsplat(<4 x i32> %arg) {
+; CHECK-LABEL: @vec_const_sub_const_sub_nonsplat(
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -19, i32 undef, i32 undef, i32 -6>
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
+  %t1 = sub <4 x i32> <i32 2, i32 3, i32 undef, i32 2>, %t0
+  ret <4 x i32> %t1
+}

From efcd3c000991ba9e98428810b7dffecbd8848f57 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Fri, 31 May 2019 08:27:06 +0000
Subject: [PATCH 0731/1176] [MIPS GlobalISel] Handle position independent code

Handle position independent code for MIPS32.
When callee is global address, lower call will emit callee
as G_GLOBAL_VALUE and add target flag if needed.
Support $gp in getRegBankFromRegClass().
Select G_GLOBAL_VALUE, specially handle case when
there are target flags attached by lowerCall.

Differential Revision: https://reviews.llvm.org/D62589

llvm-svn: 362210
---
 llvm/lib/Target/Mips/MipsCallLowering.cpp     |  23 ++-
 .../Target/Mips/MipsInstructionSelector.cpp   |  65 +++++--
 llvm/lib/Target/Mips/MipsMachineFunction.cpp  |   8 +
 llvm/lib/Target/Mips/MipsMachineFunction.h    |   1 +
 llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp |   1 +
 .../instruction-select/gloal_address_pic.mir  | 165 ++++++++++++++++++
 .../irtranslator/global_address_pic.ll        |  62 +++++++
 .../GlobalISel/llvm-ir/global_address_pic.ll  |  97 ++++++++++
 .../regbankselect/global_address_pic.mir      |  48 +++++
 9 files changed, 451 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/irtranslator/global_address_pic.ll
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/regbankselect/global_address_pic.mir

diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 04aff60b2f23d..50fb986e5d84d 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -14,6 +14,7 @@
 
 #include "MipsCallLowering.h"
 #include "MipsCCState.h"
+#include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -522,10 +523,22 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   MachineInstrBuilder CallSeqStart =
       MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN);
 
+  const bool IsCalleeGlobalPIC =
+      Callee.isGlobal() && TM.isPositionIndependent();
+
   MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(
-      Callee.isReg() ? Mips::JALRPseudo : Mips::JAL);
+      Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL);
   MIB.addDef(Mips::SP, RegState::Implicit);
-  MIB.add(Callee);
+  if (IsCalleeGlobalPIC) {
+    unsigned CalleeReg =
+        MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32));
+    MachineInstr *CalleeGlobalValue =
+        MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal());
+    if (!Callee.getGlobal()->hasLocalLinkage())
+      CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL);
+    MIB.addUse(CalleeReg);
+  } else
+    MIB.add(Callee);
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
 
@@ -568,6 +581,12 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
   NextStackOffset = alignTo(NextStackOffset, StackAlignment);
   CallSeqStart.addImm(NextStackOffset).addImm(0);
 
+  if (IsCalleeGlobalPIC) {
+    MIRBuilder.buildCopy(
+        Mips::GP,
+        MF.getInfo<MipsFunctionInfo>()->getGlobalBaseRegForGlobalISel());
+    MIB.addDef(Mips::GP, RegState::Implicit);
+  }
   MIRBuilder.insertInstr(MIB);
   if (MIB->getOpcode() == Mips::JALRPseudo) {
     const MipsSubtarget &STI =
diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index a03e7ef57b7bf..442244af609ff 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -11,6 +11,7 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
+#include "MipsMachineFunction.h"
 #include "MipsRegisterBankInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -356,29 +357,59 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     return true;
   }
   case G_GLOBAL_VALUE: {
-    if (MF.getTarget().isPositionIndependent())
-      return false;
-
     const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
-    unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
-    MachineInstr *LUi, *ADDiu;
+    if (MF.getTarget().isPositionIndependent()) {
+      MachineInstr *LWGOT = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
+                                .addDef(I.getOperand(0).getReg())
+                                .addReg(MF.getInfo<MipsFunctionInfo>()
+                                            ->getGlobalBaseRegForGlobalISel())
+                                .addGlobalAddress(GVal);
+      // Global Values that don't have local linkage are handled differently
+      // when they are part of call sequence. MipsCallLowering::lowerCall
+      // creates G_GLOBAL_VALUE instruction as part of call sequence and adds
+      // MO_GOT_CALL flag when Callee doesn't have local linkage.
+      if (I.getOperand(1).getTargetFlags() == MipsII::MO_GOT_CALL)
+        LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT_CALL);
+      else
+        LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT);
+      LWGOT->addMemOperand(
+          MF, MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
+                                      MachineMemOperand::MOLoad, 4, 4));
+      if (!constrainSelectedInstRegOperands(*LWGOT, TII, TRI, RBI))
+        return false;
 
-    LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
-              .addDef(LUiReg)
-              .addGlobalAddress(GVal);
-    LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+      if (GVal->hasLocalLinkage()) {
+        unsigned LWGOTDef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+        LWGOT->getOperand(0).setReg(LWGOTDef);
 
-    ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+        MachineInstr *ADDiu =
+            BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
                 .addDef(I.getOperand(0).getReg())
-                .addUse(LUiReg)
+                .addReg(LWGOTDef)
                 .addGlobalAddress(GVal);
-    ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
-
-    if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
-      return false;
-    if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
-      return false;
+        ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+        if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+          return false;
+      }
+    } else {
+      unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+
+      MachineInstr *LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+                              .addDef(LUiReg)
+                              .addGlobalAddress(GVal);
+      LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+      if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+        return false;
 
+      MachineInstr *ADDiu =
+          BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+              .addDef(I.getOperand(0).getReg())
+              .addUse(LUiReg)
+              .addGlobalAddress(GVal);
+      ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+      if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+        return false;
+    }
     I.eraseFromParent();
     return true;
   }
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 57749054b5f94..d489fac937ebf 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -51,6 +51,14 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
   return GlobalBaseReg;
 }
 
+unsigned MipsFunctionInfo::getGlobalBaseRegForGlobalISel() {
+  if (!GlobalBaseReg) {
+    getGlobalBaseReg();
+    initGlobalBaseReg();
+  }
+  return GlobalBaseReg;
+}
+
 void MipsFunctionInfo::initGlobalBaseReg() {
   if (!GlobalBaseReg)
     return;
diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h
index b2c6c8d4f338e..d9d53c8d6318e 100644
--- a/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -33,6 +33,7 @@ class MipsFunctionInfo : public MachineFunctionInfo {
 
   bool globalBaseRegSet() const;
   unsigned getGlobalBaseReg();
+  unsigned getGlobalBaseRegForGlobalISel();
 
   // Insert instructions to initialize the global base register in the
   // first MBB of the function.
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 1192db7e1a143..4814ef4b0397a 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -79,6 +79,7 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
   case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
   case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
   case Mips::SP32RegClassID:
+  case Mips::GP32RegClassID:
     return getRegBank(Mips::GPRBRegBankID);
   case Mips::FGRCCRegClassID:
   case Mips::FGR32RegClassID:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir
new file mode 100644
index 0000000000000..d91a36e600425
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/gloal_address_pic.mir
@@ -0,0 +1,165 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+
+  @val = global i32 0
+  @val_with_local_linkage = internal global i32 1
+
+  declare i32 @f(i32, i32)
+  define internal void @f_with_local_linkage() {entry: ret void}
+  define void @call_global() {entry: ret void}
+  define void @call_global_with_local_linkage() {entry: ret void}
+  define void @ret_global_int() {entry: ret void}
+  define void @ret_global_int_with_local_linkage() {entry: ret void}
+
+...
+---
+name:            f_with_local_linkage
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1
+
+    ; MIPS32-LABEL: name: f_with_local_linkage
+    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu [[COPY1]], [[COPY]]
+    ; MIPS32: $v0 = COPY [[ADDu]]
+    ; MIPS32: RetRA implicit $v0
+    %0:gprb(s32) = COPY $a0
+    %1:gprb(s32) = COPY $a1
+    %2:gprb(s32) = G_ADD %1, %0
+    $v0 = COPY %2(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            call_global
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $t9, $v0
+
+    ; MIPS32-LABEL: name: call_global
+    ; MIPS32: liveins: $a0, $a1, $t9, $v0, $t9, $v0
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[ADDu1:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got-call) @f :: (load 4 from got)
+    ; MIPS32: $a0 = COPY [[COPY]]
+    ; MIPS32: $a1 = COPY [[COPY1]]
+    ; MIPS32: $gp = COPY [[ADDu1]]
+    ; MIPS32: JALRPseudo [[LW]], csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    ; MIPS32: [[COPY2:%[0-9]+]]:gpr32 = COPY $v0
+    ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: $v0 = COPY [[COPY2]]
+    ; MIPS32: RetRA implicit $v0
+    %4:gpr32 = ADDu $v0, $t9
+    %0:gprb(s32) = COPY $a0
+    %1:gprb(s32) = COPY $a1
+    ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    %3:gpr32(p0) = G_GLOBAL_VALUE target-flags(mips-got-call) @f
+    $a0 = COPY %0(s32)
+    $a1 = COPY %1(s32)
+    $gp = COPY %4
+    JALRPseudo %3(p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    %2:gprb(s32) = COPY $v0
+    ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    $v0 = COPY %2(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            call_global_with_local_linkage
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $t9, $v0
+
+    ; MIPS32-LABEL: name: call_global_with_local_linkage
+    ; MIPS32: liveins: $a0, $a1, $t9, $v0, $t9, $v0
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[ADDu1:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @f_with_local_linkage :: (load 4 from got)
+    ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LW]], target-flags(mips-abs-lo) @f_with_local_linkage
+    ; MIPS32: $a0 = COPY [[COPY]]
+    ; MIPS32: $a1 = COPY [[COPY1]]
+    ; MIPS32: $gp = COPY [[ADDu1]]
+    ; MIPS32: JALRPseudo [[ADDiu]], csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    ; MIPS32: [[COPY2:%[0-9]+]]:gpr32 = COPY $v0
+    ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: $v0 = COPY [[COPY2]]
+    ; MIPS32: RetRA implicit $v0
+    %4:gpr32 = ADDu $v0, $t9
+    %0:gprb(s32) = COPY $a0
+    %1:gprb(s32) = COPY $a1
+    ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    %3:gpr32(p0) = G_GLOBAL_VALUE @f_with_local_linkage
+    $a0 = COPY %0(s32)
+    $a1 = COPY %1(s32)
+    $gp = COPY %4
+    JALRPseudo %3(p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    %2:gprb(s32) = COPY $v0
+    ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    $v0 = COPY %2(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ret_global_int
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    ; MIPS32-LABEL: name: ret_global_int
+    ; MIPS32: liveins: $t9, $v0
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val :: (load 4 from got)
+    ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[LW]], 0 :: (load 4 from @val)
+    ; MIPS32: $v0 = COPY [[LW1]]
+    ; MIPS32: RetRA implicit $v0
+    %1:gprb(p0) = G_GLOBAL_VALUE @val
+    %0:gprb(s32) = G_LOAD %1(p0) :: (load 4 from @val)
+    $v0 = COPY %0(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ret_global_int_with_local_linkage
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    ; MIPS32-LABEL: name: ret_global_int_with_local_linkage
+    ; MIPS32: liveins: $t9, $v0
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[LW:%[0-9]+]]:gpr32 = LW [[ADDu]], target-flags(mips-got) @val_with_local_linkage :: (load 4 from got)
+    ; MIPS32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu [[LW]], target-flags(mips-abs-lo) @val_with_local_linkage
+    ; MIPS32: [[LW1:%[0-9]+]]:gpr32 = LW [[ADDiu]], 0 :: (load 4 from @val_with_local_linkage)
+    ; MIPS32: $v0 = COPY [[LW1]]
+    ; MIPS32: RetRA implicit $v0
+    %1:gprb(p0) = G_GLOBAL_VALUE @val_with_local_linkage
+    %0:gprb(s32) = G_LOAD %1(p0) :: (load 4 from @val_with_local_linkage)
+    $v0 = COPY %0(s32)
+    RetRA implicit $v0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/global_address_pic.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/global_address_pic.ll
new file mode 100644
index 0000000000000..1987337789cfb
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/global_address_pic.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -relocation-model=pic -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32_PIC
+
+declare i32 @f(i32 %a, i32 %b);
+
+define internal i32 @f_with_local_linkage(i32 %x, i32 %y) {
+  ; MIPS32_PIC-LABEL: name: f_with_local_linkage
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]]
+  ; MIPS32_PIC:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
+entry:
+  %add = add i32 %y, %x
+  ret i32 %add
+}
+
+define i32 @call_global(i32 %a, i32 %b) {
+  ; MIPS32_PIC-LABEL: name: call_global
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $t9, $v0
+  ; MIPS32_PIC:   [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[GV:%[0-9]+]]:gpr32(p0) = G_GLOBAL_VALUE target-flags(mips-got-call) @f
+  ; MIPS32_PIC:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32_PIC:   $gp = COPY [[ADDu]]
+  ; MIPS32_PIC:   JALRPseudo [[GV]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   $v0 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
+entry:
+  %call = call i32 @f(i32 %a, i32 %b)
+  ret i32 %call
+}
+
+define i32 @call_global_with_local_linkage(i32 %a, i32 %b) {
+  ; MIPS32_PIC-LABEL: name: call_global_with_local_linkage
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $t9, $v0
+  ; MIPS32_PIC:   [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[GV:%[0-9]+]]:gpr32(p0) = G_GLOBAL_VALUE @f_with_local_linkage
+  ; MIPS32_PIC:   $a0 = COPY [[COPY]](s32)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY1]](s32)
+  ; MIPS32_PIC:   $gp = COPY [[ADDu]]
+  ; MIPS32_PIC:   JALRPseudo [[GV]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   $v0 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
+entry:
+  %call = call i32 @f_with_local_linkage(i32 %a, i32 %b)
+  ret i32 %call
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
new file mode 100644
index 0000000000000..e293a565fc707
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel -relocation-model=pic -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32_PIC
+
+declare i32 @f(i32 %a, i32 %b);
+
+define internal i32 @f_with_local_linkage(i32 %x, i32 %y) {
+; MIPS32_PIC-LABEL: f_with_local_linkage:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    addu $2, $5, $4
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %add = add i32 %y, %x
+  ret i32 %add
+}
+
+define i32 @call_global(i32 %a, i32 %b) {
+; MIPS32_PIC-LABEL: call_global:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $25, %call16(f)($1)
+; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    jalr $25
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %call = call i32 @f(i32 %a, i32 %b)
+  ret i32 %call
+}
+
+define i32 @call_global_with_local_linkage(i32 %a, i32 %b) {
+; MIPS32_PIC-LABEL: call_global_with_local_linkage:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $2, %got(f_with_local_linkage)($1)
+; MIPS32_PIC-NEXT:    addiu $25, $2, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    jalr $25
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %call = call i32 @f_with_local_linkage(i32 %a, i32 %b)
+  ret i32 %call
+}
+
+
+@val = global i32 0
+@val_with_local_linkage = internal global i32 1
+
+define i32 @ret_global_int() {
+; MIPS32_PIC-LABEL: ret_global_int:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $1, %got(val)($1)
+; MIPS32_PIC-NEXT:    lw $2, 0($1)
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %0 = load i32, i32* @val
+  ret i32 %0
+}
+
+define i32 @ret_global_int_with_local_linkage() {
+; MIPS32_PIC-LABEL: ret_global_int_with_local_linkage:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $1, %got(val_with_local_linkage)($1)
+; MIPS32_PIC-NEXT:    addiu $1, $1, %lo(val_with_local_linkage)
+; MIPS32_PIC-NEXT:    lw $2, 0($1)
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %0 = load i32, i32* @val_with_local_linkage
+  ret i32 %0
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/global_address_pic.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/global_address_pic.mir
new file mode 100644
index 0000000000000..163c693f2a13f
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/global_address_pic.mir
@@ -0,0 +1,48 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -relocation-model=pic -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+
+  declare i32 @f(i32, i32)
+  define void @call_global() {entry: ret void}
+
+...
+---
+name:            call_global
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $t9, $v0
+
+    ; MIPS32-LABEL: name: call_global
+    ; MIPS32: liveins: $a0, $a1, $t9, $v0
+    ; MIPS32: [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
+    ; MIPS32: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: [[GV:%[0-9]+]]:gpr32(p0) = G_GLOBAL_VALUE target-flags(mips-got-call) @f
+    ; MIPS32: $a0 = COPY [[COPY]](s32)
+    ; MIPS32: $a1 = COPY [[COPY1]](s32)
+    ; MIPS32: $gp = COPY [[ADDu]]
+    ; MIPS32: JALRPseudo [[GV]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $v0
+    ; MIPS32: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    ; MIPS32: $v0 = COPY [[COPY2]](s32)
+    ; MIPS32: RetRA implicit $v0
+    %4:gpr32 = ADDu $v0, $t9
+    %0:_(s32) = COPY $a0
+    %1:_(s32) = COPY $a1
+    ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+    %3:gpr32(p0) = G_GLOBAL_VALUE target-flags(mips-got-call) @f
+    $a0 = COPY %0(s32)
+    $a1 = COPY %1(s32)
+    $gp = COPY %4
+    JALRPseudo %3(p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+    %2:_(s32) = COPY $v0
+    ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+    $v0 = COPY %2(s32)
+    RetRA implicit $v0
+
+...
+

From 3cac8d258acb2d52f50a71e11efc83dc582c538b Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 31 May 2019 08:39:34 +0000
Subject: [PATCH 0732/1176] Follow up and fix for rL362064

Fix the misleadingly indentation introduced in rL362064. This will get rid of
the compiler warning, and it was actually a bug. This change will be used and
tested in D62669.

llvm-svn: 362211
---
 llvm/utils/TableGen/GlobalISelEmitter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index ad13b33f8b75c..4ec7a81927836 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -3813,10 +3813,11 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
     if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
       if (const DefInit *DefaultDagOperator =
               dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
-        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+        if (DefaultDagOperator->getDef()->isSubClassOf("ValueType")) {
           OpTyOrNone = MVTToLLT(getValueType(
                                   DefaultDagOperator->getDef()));
           DefaultOp = DefaultDagOp->getArg(0);
+        }
       }
     }
 

From f317debdb803fad8b0d2d665b5b62775981d488a Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Fri, 31 May 2019 08:40:08 +0000
Subject: [PATCH 0733/1176] [MIPS GlobalISel] Add detailed tests for lower call

Test different operand types of callee and their behavior whether
relocation model is pic or not.
Possible operand types are:
Register (function pointer),
External symbol (used for libcalls e.g. __udivdi3 or memcpy),
Global address.

Global address has different handling depending on relocation model
and linkage type. Register and external symbol do not.

Differential Revision: https://reviews.llvm.org/D62590

llvm-svn: 362212
---
 .../Mips/GlobalISel/irtranslator/call.ll      | 136 +++++++++++++-
 .../CodeGen/Mips/GlobalISel/llvm-ir/call.ll   | 167 +++++++++++++++++-
 2 files changed, 298 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
index 1d901db54a8ff..c76750208fc69 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -stop-after=irtranslator -relocation-model=pic -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32_PIC
 
 declare i32 @f(i32 %a, i32 %b);
 
-define i32 @g(i32 %a0, i32 %a1, i32 %x, i32 %y) {
-  ; MIPS32-LABEL: name: g
+define i32 @call_global(i32 %a0, i32 %a1, i32 %x, i32 %y) {
+  ; MIPS32-LABEL: name: call_global
   ; MIPS32: bb.1.entry:
   ; MIPS32:   liveins: $a0, $a1, $a2, $a3
   ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
@@ -20,12 +21,95 @@ define i32 @g(i32 %a0, i32 %a1, i32 %x, i32 %y) {
   ; MIPS32:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY4]]
   ; MIPS32:   $v0 = COPY [[ADD]](s32)
   ; MIPS32:   RetRA implicit $v0
+  ; MIPS32_PIC-LABEL: name: call_global
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $a2, $a3, $t9, $v0
+  ; MIPS32_PIC:   [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32_PIC:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[GV:%[0-9]+]]:gpr32(p0) = G_GLOBAL_VALUE target-flags(mips-got-call) @f
+  ; MIPS32_PIC:   $a0 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY3]](s32)
+  ; MIPS32_PIC:   $gp = COPY [[ADDu]]
+  ; MIPS32_PIC:   JALRPseudo [[GV]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+  ; MIPS32_PIC:   [[COPY4:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY4]]
+  ; MIPS32_PIC:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
 entry:
   %z = call i32 @f(i32 %x, i32 %y)
   %doublez = add i32 %z, %z
   ret i32 %doublez
 }
 
+define internal i32 @f_with_local_linkage(i32 %x, i32 %y) {
+  ; MIPS32-LABEL: name: f_with_local_linkage
+  ; MIPS32: bb.1.entry:
+  ; MIPS32:   liveins: $a0, $a1
+  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]]
+  ; MIPS32:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32_PIC-LABEL: name: f_with_local_linkage
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY]]
+  ; MIPS32_PIC:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
+entry:
+  %add = add i32 %y, %x
+  ret i32 %add
+}
+
+define i32 @call_global_with_local_linkage(i32 %a0, i32 %a1, i32 %x, i32 %y) {
+  ; MIPS32-LABEL: name: call_global_with_local_linkage
+  ; MIPS32: bb.1.entry:
+  ; MIPS32:   liveins: $a0, $a1, $a2, $a3
+  ; MIPS32:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   $a0 = COPY [[COPY2]](s32)
+  ; MIPS32:   $a1 = COPY [[COPY3]](s32)
+  ; MIPS32:   JAL @f_with_local_linkage, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+  ; MIPS32:   [[COPY4:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY4]]
+  ; MIPS32:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32:   RetRA implicit $v0
+  ; MIPS32_PIC-LABEL: name: call_global_with_local_linkage
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $a2, $a3, $t9, $v0
+  ; MIPS32_PIC:   [[ADDu:%[0-9]+]]:gpr32 = ADDu $v0, $t9
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32_PIC:   [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[GV:%[0-9]+]]:gpr32(p0) = G_GLOBAL_VALUE @f_with_local_linkage
+  ; MIPS32_PIC:   $a0 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY3]](s32)
+  ; MIPS32_PIC:   $gp = COPY [[ADDu]]
+  ; MIPS32_PIC:   JALRPseudo [[GV]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $gp, implicit-def $v0
+  ; MIPS32_PIC:   [[COPY4:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY4]]
+  ; MIPS32_PIC:   $v0 = COPY [[ADD]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
+entry:
+  %z = call i32 @f_with_local_linkage(i32 %x, i32 %y)
+  %doublez = add i32 %z, %z
+  ret i32 %doublez
+}
+
 define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
   ; MIPS32-LABEL: name: call_reg
   ; MIPS32: bb.1.entry:
@@ -41,7 +125,55 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
   ; MIPS32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
   ; MIPS32:   $v0 = COPY [[COPY3]](s32)
   ; MIPS32:   RetRA implicit $v0
+  ; MIPS32_PIC-LABEL: name: call_reg
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $a2
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:gpr32(p0) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   $a0 = COPY [[COPY1]](s32)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   JALRPseudo [[COPY]](p0), csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit-def $v0
+  ; MIPS32_PIC:   [[COPY3:%[0-9]+]]:_(s32) = COPY $v0
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   $v0 = COPY [[COPY3]](s32)
+  ; MIPS32_PIC:   RetRA implicit $v0
 entry:
   %call = call i32 %f_ptr(i32 %x, i32 %y)
   ret i32 %call
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg)
+
+define void @call_symbol(i8* nocapture readonly %src, i8* nocapture %dest, i32 signext %length) {
+  ; MIPS32-LABEL: name: call_symbol
+  ; MIPS32: bb.1.entry:
+  ; MIPS32:   liveins: $a0, $a1, $a2
+  ; MIPS32:   [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+  ; MIPS32:   [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+  ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   $a0 = COPY [[COPY1]](p0)
+  ; MIPS32:   $a1 = COPY [[COPY]](p0)
+  ; MIPS32:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32:   JAL &memcpy, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2
+  ; MIPS32:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32:   RetRA
+  ; MIPS32_PIC-LABEL: name: call_symbol
+  ; MIPS32_PIC: bb.1.entry:
+  ; MIPS32_PIC:   liveins: $a0, $a1, $a2
+  ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+  ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+  ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+  ; MIPS32_PIC:   ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   $a0 = COPY [[COPY1]](p0)
+  ; MIPS32_PIC:   $a1 = COPY [[COPY]](p0)
+  ; MIPS32_PIC:   $a2 = COPY [[COPY2]](s32)
+  ; MIPS32_PIC:   JAL &memcpy, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2
+  ; MIPS32_PIC:   ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
+  ; MIPS32_PIC:   RetRA
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %length, i1 false)
+  ret void
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
index 2e388e491a00a..f7952e4462361 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel  -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32
+; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32
+; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel -relocation-model=pic -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32_PIC
 
 declare i32 @f(i32 %a, i32 %b);
 
-define i32 @g(i32 %a0, i32 %a1, i32 %x, i32 %y) {
-; MIPS32-LABEL: g:
+define i32 @call_global(i32 %a0, i32 %a1, i32 %x, i32 %y) {
+; MIPS32-LABEL: call_global:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -24
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 24
@@ -19,8 +20,168 @@ define i32 @g(i32 %a0, i32 %a1, i32 %x, i32 %y) {
 ; MIPS32-NEXT:    addiu $sp, $sp, 24
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
+;
+; MIPS32_PIC-LABEL: call_global:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $25, %call16(f)($1)
+; MIPS32_PIC-NEXT:    move $4, $6
+; MIPS32_PIC-NEXT:    move $5, $7
+; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    jalr $25
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    addu $2, $2, $2
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
 entry:
   %z = call i32 @f(i32 %x, i32 %y)
   %doublez = add i32 %z, %z
   ret i32 %doublez
 }
+
+define internal i32 @f_with_local_linkage(i32 %x, i32 %y) {
+; MIPS32-LABEL: f_with_local_linkage:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addu $2, $5, $4
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS32_PIC-LABEL: f_with_local_linkage:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    addu $2, $5, $4
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %add = add i32 %y, %x
+  ret i32 %add
+}
+
+define i32 @call_global_with_local_linkage(i32 %a0, i32 %a1, i32 %x, i32 %y) {
+; MIPS32-LABEL: call_global_with_local_linkage:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    move $4, $6
+; MIPS32-NEXT:    move $5, $7
+; MIPS32-NEXT:    jal f_with_local_linkage
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    addu $2, $2, $2
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS32_PIC-LABEL: call_global_with_local_linkage:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    lui $2, %hi(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $2, $2, %lo(_gp_disp)
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    addu $1, $2, $25
+; MIPS32_PIC-NEXT:    lw $2, %got(f_with_local_linkage)($1)
+; MIPS32_PIC-NEXT:    addiu $25, $2, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT:    move $4, $6
+; MIPS32_PIC-NEXT:    move $5, $7
+; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    jalr $25
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    addu $2, $2, $2
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %z = call i32 @f_with_local_linkage(i32 %x, i32 %y)
+  %doublez = add i32 %z, %z
+  ret i32 %doublez
+}
+
+define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
+; MIPS32-LABEL: call_reg:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $4, $5
+; MIPS32-NEXT:    move $5, $6
+; MIPS32-NEXT:    lw $25, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jalr $25
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS32_PIC-LABEL: call_reg:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    move $4, $5
+; MIPS32_PIC-NEXT:    move $5, $6
+; MIPS32_PIC-NEXT:    lw $25, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    jalr $25
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  %call = call i32 %f_ptr(i32 %x, i32 %y)
+  ret i32 %call
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg)
+
+define void @call_symbol(i8* nocapture readonly %src, i8* nocapture %dest, i32 signext %length) {
+; MIPS32-LABEL: call_symbol:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -24
+; MIPS32-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    .cfi_offset 31, -4
+; MIPS32-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $4, $5
+; MIPS32-NEXT:    lw $5, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    jal memcpy
+; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    addiu $sp, $sp, 24
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+;
+; MIPS32_PIC-LABEL: call_symbol:
+; MIPS32_PIC:       # %bb.0: # %entry
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, -24
+; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
+; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
+; MIPS32_PIC-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    move $4, $5
+; MIPS32_PIC-NEXT:    lw $5, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    jal memcpy
+; MIPS32_PIC-NEXT:    nop
+; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    addiu $sp, $sp, 24
+; MIPS32_PIC-NEXT:    jr $ra
+; MIPS32_PIC-NEXT:    nop
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %length, i1 false)
+  ret void
+}

From 2e870011b6202649872c77c7046956166a3b849d Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 31 May 2019 08:59:40 +0000
Subject: [PATCH 0734/1176] [AArch64][SVE2] Asm: support SVE2 store
 instructions

Summary:
Patch adds support for the following instructions:
    * STNT1B, STNT1H, STNT1S, STNT1D

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62599

llvm-svn: 362213
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 +++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 37 +++++++++
 llvm/test/MC/AArch64/SVE/stnt1b-diagnostics.s |  4 +-
 .../test/MC/AArch64/SVE2/stnt1b-diagnostics.s | 82 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/stnt1b.s            | 80 ++++++++++++++++++
 .../test/MC/AArch64/SVE2/stnt1d-diagnostics.s | 82 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/stnt1d.s            | 44 ++++++++++
 .../test/MC/AArch64/SVE2/stnt1h-diagnostics.s | 82 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/stnt1h.s            | 80 ++++++++++++++++++
 .../test/MC/AArch64/SVE2/stnt1w-diagnostics.s | 82 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/stnt1w.s            | 80 ++++++++++++++++++
 11 files changed, 661 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1b-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1b.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1d-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1d.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1h-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1h.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1w-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/stnt1w.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e89dc90541561..a7d31cc0c6b20 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1357,6 +1357,16 @@ let Predicates = [HasSVE2] in {
   defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
   defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
   defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+
+  // Non-temporal contiguous stores (vector + register)
+  defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
+  defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
+  defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+
+  defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
+  defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
+  defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
+  defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
 }
 
 let Predicates = [HasSVE2AES] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f22995489690f..afe845a52e565 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3827,6 +3827,43 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
                  (!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
 }
 
+class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
+                             RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+  asm, "\t$Zt, $Pg, [$Zn, $Rm]",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Rm;
+  bits<5> Zn;
+  bits<5> Zt;
+  let Inst{31-25} = 0b1110010;
+  let Inst{24-22} = opc;
+  let Inst{21}    = 0b0;
+  let Inst{20-16} = Rm;
+  let Inst{15-13} = 0b001;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zt;
+
+  let mayStore = 1;
+}
+
+multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
+                             RegisterOperand listty, ZPRRegOp zprty> {
+  def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+                                     asm, listty>;
+
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+                 (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
 class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
                      RegisterOperand VecList, RegisterOperand zprext>
 : I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
diff --git a/llvm/test/MC/AArch64/SVE/stnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE/stnt1b-diagnostics.s
index 4b7806e5b0e81..0d0488d810e8e 100644
--- a/llvm/test/MC/AArch64/SVE/stnt1b-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/stnt1b-diagnostics.s
@@ -23,12 +23,12 @@ stnt1b z0.h, p0, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 stnt1b z0.s, p0, [x0]
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // CHECK-NEXT: stnt1b z0.s, p0, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 stnt1b z0.d, p0, [x0]
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 // CHECK-NEXT: stnt1b z0.d, p0, [x0]
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2/stnt1b-diagnostics.s
new file mode 100644
index 0000000000000..dbf30a6cc0fa9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1b-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+stnt1b { z0.b }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1b { z0.b }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1b { z0.h }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1b { z0.h }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+stnt1b { z0.s }, p0, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1b { z0.s }, p0, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1b { z0.d }, p0, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1b { z0.d }, p0, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+stnt1b { z0.d }, p0, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1b { z0.d }, p0, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+stnt1b { z27.d }, p8, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: stnt1b { z27.d }, p8, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+stnt1b { }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: stnt1b { }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1b { z0.d, z1.d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1b { z0.d, z1.d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1b { v0.2d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1b { v0.2d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+stnt1b  { z0.d }, p0, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1b  { z0.d }, p0, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1b  { z0.s }, p0, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1b  { z0.s }, p0, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1b.s b/llvm/test/MC/AArch64/SVE2/stnt1b.s
new file mode 100644
index 0000000000000..cab8a6b7f30e2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1b.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+stnt1b z0.s, p0, [z1.s]
+// CHECK-INST: stnt1b { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0x5f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 5f e4 <unknown>
+
+stnt1b z31.s, p7, [z31.s, xzr]
+// CHECK-INST: stnt1b { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0x5f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 5f e4 <unknown>
+
+stnt1b z31.s, p7, [z31.s, x0]
+// CHECK-INST: stnt1b { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x40,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 40 e4 <unknown>
+
+stnt1b z0.d, p0, [z1.d]
+// CHECK-INST: stnt1b { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x1f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 1f e4 <unknown>
+
+stnt1b z31.d, p7, [z31.d, xzr]
+// CHECK-INST: stnt1b { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x1f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 1f e4 <unknown>
+
+stnt1b z31.d, p7, [z31.d, x0]
+// CHECK-INST: stnt1b { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x00,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 00 e4 <unknown>
+
+stnt1b { z0.s }, p0, [z1.s]
+// CHECK-INST: stnt1b { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0x5f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 5f e4 <unknown>
+
+stnt1b { z31.s }, p7, [z31.s, xzr]
+// CHECK-INST: stnt1b { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0x5f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 5f e4 <unknown>
+
+stnt1b { z31.s }, p7, [z31.s, x0]
+// CHECK-INST: stnt1b { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x40,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 40 e4 <unknown>
+
+stnt1b { z0.d }, p0, [z1.d]
+// CHECK-INST: stnt1b { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x1f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 1f e4 <unknown>
+
+stnt1b { z31.d }, p7, [z31.d, xzr]
+// CHECK-INST: stnt1b { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x1f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 1f e4 <unknown>
+
+stnt1b { z31.d }, p7, [z31.d, x0]
+// CHECK-INST: stnt1b { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x00,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 00 e4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2/stnt1d-diagnostics.s
new file mode 100644
index 0000000000000..c5be82611c512
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1d-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+stnt1d { z0.b }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { z0.b }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1d { z0.h }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { z0.h }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1d { z0.s }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { z0.s }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+stnt1d { z0.d }, p0, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1d { z0.d }, p0, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+stnt1d { z0.d }, p0, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { z0.d }, p0, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+stnt1d { z27.d }, p8, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: stnt1d { z27.d }, p8, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+stnt1d { }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: stnt1d { }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1d { z0.d, z1.d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { z0.d, z1.d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1d { v0.2d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1d { v0.2d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+stnt1d  { z0.d }, p0, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1d  { z0.d }, p0, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1d  { z0.d }, p0, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1d  { z0.d }, p0, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1d.s b/llvm/test/MC/AArch64/SVE2/stnt1d.s
new file mode 100644
index 0000000000000..462b8bce0104a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1d.s
@@ -0,0 +1,44 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+stnt1d z0.d, p0, [z1.d]
+// CHECK-INST: stnt1d { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x9f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 9f e5 <unknown>
+
+stnt1d z31.d, p7, [z31.d, xzr]
+// CHECK-INST: stnt1d { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x9f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 9f e5 <unknown>
+
+stnt1d z31.d, p7, [z31.d, x0]
+// CHECK-INST: stnt1d { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x80,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 80 e5 <unknown>
+
+stnt1d { z0.d }, p0, [z1.d]
+// CHECK-INST: stnt1d { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x9f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 9f e5 <unknown>
+
+stnt1d { z31.d }, p7, [z31.d, xzr]
+// CHECK-INST: stnt1d { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x9f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 9f e5 <unknown>
+
+stnt1d { z31.d }, p7, [z31.d, x0]
+// CHECK-INST: stnt1d { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x80,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 80 e5 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2/stnt1h-diagnostics.s
new file mode 100644
index 0000000000000..db60d05b7c728
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1h-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+stnt1h { z0.b }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1h { z0.b }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1h { z0.h }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1h { z0.h }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+stnt1h { z0.s }, p0, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1h { z0.s }, p0, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1h { z0.d }, p0, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1h { z0.d }, p0, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+stnt1h { z0.d }, p0, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1h { z0.d }, p0, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+stnt1h { z27.d }, p8, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: stnt1h { z27.d }, p8, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+stnt1h { }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: stnt1h { }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1h { z0.d, z1.d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1h { z0.d, z1.d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1h { v0.2d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1h { v0.2d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+stnt1h  { z0.d }, p0, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1h  { z0.d }, p0, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1h  { z0.s }, p0, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1h  { z0.s }, p0, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1h.s b/llvm/test/MC/AArch64/SVE2/stnt1h.s
new file mode 100644
index 0000000000000..07c9c328636ba
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1h.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+stnt1h z0.s, p0, [z1.s]
+// CHECK-INST: stnt1h { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0xdf,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 df e4 <unknown>
+
+stnt1h z31.s, p7, [z31.s, xzr]
+// CHECK-INST: stnt1h { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0xdf,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f df e4 <unknown>
+
+stnt1h z31.s, p7, [z31.s, x0]
+// CHECK-INST: stnt1h { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0xc0,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f c0 e4 <unknown>
+
+stnt1h z0.d, p0, [z1.d]
+// CHECK-INST: stnt1h { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x9f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 9f e4 <unknown>
+
+stnt1h z31.d, p7, [z31.d, xzr]
+// CHECK-INST: stnt1h { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x9f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 9f e4 <unknown>
+
+stnt1h z31.d, p7, [z31.d, x0]
+// CHECK-INST: stnt1h { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x80,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 80 e4 <unknown>
+
+stnt1h { z0.s }, p0, [z1.s]
+// CHECK-INST: stnt1h { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0xdf,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 df e4 <unknown>
+
+stnt1h { z31.s }, p7, [z31.s, xzr]
+// CHECK-INST: stnt1h { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0xdf,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f df e4 <unknown>
+
+stnt1h { z31.s }, p7, [z31.s, x0]
+// CHECK-INST: stnt1h { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0xc0,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f c0 e4 <unknown>
+
+stnt1h { z0.d }, p0, [z1.d]
+// CHECK-INST: stnt1h { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x9f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 9f e4 <unknown>
+
+stnt1h { z31.d }, p7, [z31.d, xzr]
+// CHECK-INST: stnt1h { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x9f,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 9f e4 <unknown>
+
+stnt1h { z31.d }, p7, [z31.d, x0]
+// CHECK-INST: stnt1h { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x80,0xe4]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 80 e4 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2/stnt1w-diagnostics.s
new file mode 100644
index 0000000000000..c552e36edcee0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1w-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// --------------------------------------------------------------------------//
+// Invalid result type.
+
+stnt1w { z0.b }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1w { z0.b }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1w { z0.h }, p0, [z0.s]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1w { z0.h }, p0, [z0.s]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid base vector.
+
+stnt1w { z0.s }, p0, [z0.b]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1w { z0.s }, p0, [z0.b]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1w { z0.d }, p0, [z0.h]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: stnt1w { z0.d }, p0, [z0.h]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid offset type.
+
+stnt1w { z0.d }, p0, [z0.d, z1.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1w { z0.d }, p0, [z0.d, z1.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// restricted predicate has range [0, 7].
+
+stnt1w { z27.d }, p8, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: restricted predicate has range [0, 7].
+// CHECK-NEXT: stnt1w { z27.d }, p8, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+stnt1w { }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: stnt1w { }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1w { z0.d, z1.d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1w { z0.d, z1.d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+stnt1w { v0.2d }, p0, [z0.d]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: stnt1w { v0.2d }, p0, [z0.d]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+stnt1w  { z0.d }, p0, [z0.d, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1w  { z0.d }, p0, [z0.d, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+stnt1w  { z0.s }, p0, [z0.s, x0]
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: stnt1w  { z0.s }, p0, [z0.s, x0]
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/stnt1w.s b/llvm/test/MC/AArch64/SVE2/stnt1w.s
new file mode 100644
index 0000000000000..90d4c8e6e142a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/stnt1w.s
@@ -0,0 +1,80 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+stnt1w z0.s, p0, [z1.s]
+// CHECK-INST: stnt1w { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0x5f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 5f e5 <unknown>
+
+stnt1w z31.s, p7, [z31.s, xzr]
+// CHECK-INST: stnt1w { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0x5f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 5f e5 <unknown>
+
+stnt1w z31.s, p7, [z31.s, x0]
+// CHECK-INST: stnt1w { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x40,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 40 e5 <unknown>
+
+stnt1w z0.d, p0, [z1.d]
+// CHECK-INST: stnt1w { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x1f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 1f e5 <unknown>
+
+stnt1w z31.d, p7, [z31.d, xzr]
+// CHECK-INST: stnt1w { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x1f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 1f e5 <unknown>
+
+stnt1w z31.d, p7, [z31.d, x0]
+// CHECK-INST: stnt1w { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x00,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 00 e5 <unknown>
+
+stnt1w { z0.s }, p0, [z1.s]
+// CHECK-INST: stnt1w { z0.s }, p0, [z1.s]
+// CHECK-ENCODING: [0x20,0x20,0x5f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 5f e5 <unknown>
+
+stnt1w { z31.s }, p7, [z31.s, xzr]
+// CHECK-INST: stnt1w { z31.s }, p7, [z31.s]
+// CHECK-ENCODING: [0xff,0x3f,0x5f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 5f e5 <unknown>
+
+stnt1w { z31.s }, p7, [z31.s, x0]
+// CHECK-INST: stnt1w { z31.s }, p7, [z31.s, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x40,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 40 e5 <unknown>
+
+stnt1w { z0.d }, p0, [z1.d]
+// CHECK-INST: stnt1w { z0.d }, p0, [z1.d]
+// CHECK-ENCODING: [0x20,0x20,0x1f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 20 20 1f e5 <unknown>
+
+stnt1w { z31.d }, p7, [z31.d, xzr]
+// CHECK-INST: stnt1w { z31.d }, p7, [z31.d]
+// CHECK-ENCODING: [0xff,0x3f,0x1f,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 1f e5 <unknown>
+
+stnt1w { z31.d }, p7, [z31.d, x0]
+// CHECK-INST: stnt1w { z31.d }, p7, [z31.d, x0]
+// CHECK-ENCODING: [0xff,0x3f,0x00,0xe5]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 3f 00 e5 <unknown>

From 087d1337f8a9d080cd9aaafbbbfcce4eff532784 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 31 May 2019 09:06:53 +0000
Subject: [PATCH 0735/1176] [AArch64][SVE2] Asm: support TBL/TBX instructions

Summary:
A three sources variant of the TBL instruction is added to the existing
SVE instruction in SVE2. This is implemented with minor changes to the
existing TableGen class. TBX is a new instruction with its own
definition.

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62600

llvm-svn: 362214
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  4 ++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 47 ++++++++++++++---
 llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s   | 51 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/tbl.s               | 32 ++++++++++++
 llvm/test/MC/AArch64/SVE2/tbx-diagnostics.s   | 22 ++++++++
 llvm/test/MC/AArch64/SVE2/tbx.s               | 32 ++++++++++++
 6 files changed, 181 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/tbl.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/tbx-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/tbx.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a7d31cc0c6b20..6a6fedd03038c 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1367,6 +1367,10 @@ let Predicates = [HasSVE2] in {
   defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
   defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
   defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+
+  // SVE table lookup (three sources)
+  defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
+  defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx">;
 }
 
 let Predicates = [HasSVE2AES] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index afe845a52e565..c06b23eca1bc1 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -700,8 +700,8 @@ multiclass sve_int_perm_dup_i<string asm> {
                   (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
 }
 
-class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
-                       RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm,
+                       ZPRRegOp zprty, RegisterOperand VecList>
 : I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
   asm, "\t$Zd, $Zn, $Zm",
   "",
@@ -713,16 +713,18 @@ class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
   let Inst{23-22} = sz8_64;
   let Inst{21}    = 0b1;
   let Inst{20-16} = Zm;
-  let Inst{15-10} = 0b001100;
+  let Inst{15-13} = 0b001;
+  let Inst{12-11} = opc;
+  let Inst{10}    = 0b0;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
 }
 
 multiclass sve_int_perm_tbl<string asm> {
-  def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>;
-  def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>;
-  def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>;
-  def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>;
+  def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8,  Z_b>;
+  def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>;
+  def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>;
+  def _D : sve_int_perm_tbl<0b11, 0b10, asm, ZPR64, Z_d>;
 
   def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
                  (!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>;
@@ -734,6 +736,37 @@ multiclass sve_int_perm_tbl<string asm> {
                  (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>;
 }
 
+multiclass sve2_int_perm_tbl<string asm> {
+  def _B : sve_int_perm_tbl<0b00, 0b01, asm, ZPR8,  ZZ_b>;
+  def _H : sve_int_perm_tbl<0b01, 0b01, asm, ZPR16, ZZ_h>;
+  def _S : sve_int_perm_tbl<0b10, 0b01, asm, ZPR32, ZZ_s>;
+  def _D : sve_int_perm_tbl<0b11, 0b01, asm, ZPR64, ZZ_d>;
+}
+
+class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+  asm, "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zd;
+  bits<5> Zm;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b001011;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+}
+
+multiclass sve2_int_perm_tbx<string asm> {
+  def _B : sve2_int_perm_tbx<0b00, asm, ZPR8>;
+  def _H : sve2_int_perm_tbx<0b01, asm, ZPR16>;
+  def _S : sve2_int_perm_tbx<0b10, asm, ZPR32>;
+  def _D : sve2_int_perm_tbx<0b11, asm, ZPR64>;
+}
+
 class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$Zn),
   asm, "\t$Zd, $Zn",
diff --git a/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s
new file mode 100644
index 0000000000000..1c74e4bbc8274
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s
@@ -0,0 +1,51 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+tbl z0.b, { z1.b, z2.b }, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Invalid vector list.
+
+tbl z0.d, { }, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector register expected
+// CHECK-NEXT: tbl z0.d, { }, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+tbl z0.d, { z1.d, z2.d, z3.d }, z4.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: tbl z0.d, { z1.d, z2.d, z3.d }, z4.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+tbl z0.d, { z1.d, z2.b }, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+// CHECK-NEXT: tbl z0.d, { z1.d, z2.b }, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+tbl z0.d, { z1.d, z21.d }, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential
+// CHECK-NEXT: tbl z0.d, { z1.d, z21.d }, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+tbl z0.d, { v0.2d, v1.2d }, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: tbl z0.d, { v0.2d, v1.2d }, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+tbl  z31.d, { z30.d, z31.d }, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbl  z31.d, { z30.d, z31.d }, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+tbl  z31.d, { z30.d, z31.d }, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbl  z31.d, { z30.d, z31.d }, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/tbl.s b/llvm/test/MC/AArch64/SVE2/tbl.s
new file mode 100644
index 0000000000000..6cb122ed353f0
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/tbl.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+tbl  z28.b, { z29.b, z30.b }, z31.b
+// CHECK-INST: tbl  z28.b, { z29.b, z30.b }, z31.b
+// CHECK-ENCODING: [0xbc,0x2b,0x3f,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bc 2b 3f 05 <unknown>
+
+tbl  z28.h, { z29.h, z30.h }, z31.h
+// CHECK-INST: tbl  z28.h, { z29.h, z30.h }, z31.h
+// CHECK-ENCODING: [0xbc,0x2b,0x7f,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bc 2b 7f 05 <unknown>
+
+tbl  z28.s, { z29.s, z30.s }, z31.s
+// CHECK-INST: tbl  z28.s, { z29.s, z30.s }, z31.s
+// CHECK-ENCODING: [0xbc,0x2b,0xbf,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bc 2b bf 05 <unknown>
+
+tbl  z28.d, { z29.d, z30.d }, z31.d
+// CHECK-INST: tbl  z28.d, { z29.d, z30.d }, z31.d
+// CHECK-ENCODING: [0xbc,0x2b,0xff,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: bc 2b ff 05 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/tbx-diagnostics.s b/llvm/test/MC/AArch64/SVE2/tbx-diagnostics.s
new file mode 100644
index 0000000000000..08aa4f0d0df1b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/tbx-diagnostics.s
@@ -0,0 +1,22 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+tbx z0.b, z1.b, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: tbx z0.b, z1.b, z2.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.d, p0/z, z6.d
+tbx  z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbx  z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+tbx  z31.d, z31.d, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: tbx  z31.d, z31.d, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/tbx.s b/llvm/test/MC/AArch64/SVE2/tbx.s
new file mode 100644
index 0000000000000..8e49185873447
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/tbx.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+tbx  z31.b, z31.b, z31.b
+// CHECK-INST: tbx	z31.b, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0x2f,0x3f,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f 3f 05 <unknown>
+
+tbx  z31.h, z31.h, z31.h
+// CHECK-INST: tbx	z31.h, z31.h, z31.h
+// CHECK-ENCODING: [0xff,0x2f,0x7f,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f 7f 05 <unknown>
+
+tbx  z31.s, z31.s, z31.s
+// CHECK-INST: tbx	z31.s, z31.s, z31.s
+// CHECK-ENCODING: [0xff,0x2f,0xbf,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f bf 05 <unknown>
+
+tbx  z31.d, z31.d, z31.d
+// CHECK-INST: tbx	z31.d, z31.d, z31.d
+// CHECK-ENCODING: [0xff,0x2f,0xff,0x05]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 2f ff 05 <unknown>

From 0fc3a0739850fd84ec910046b90514d0c97f2420 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 31 May 2019 09:13:55 +0000
Subject: [PATCH 0736/1176] [AArch64][SVE2] Asm: support WHILE instructions

Summary:
Patch adds support for the following instructions:
    * WHILEGE, WHILEGT, WHILEHS, WHILEHI, WHILEWR, WHILERW

The specification can be found here:
https://developer.arm.com/docs/ddi0602/latest

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62601

llvm-svn: 362215
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 15 ++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 26 +++++++
 .../MC/AArch64/SVE2/whilege-diagnostics.s     | 29 ++++++++
 llvm/test/MC/AArch64/SVE2/whilege.s           | 68 +++++++++++++++++++
 .../MC/AArch64/SVE2/whilegt-diagnostics.s     | 29 ++++++++
 llvm/test/MC/AArch64/SVE2/whilegt.s           | 68 +++++++++++++++++++
 .../MC/AArch64/SVE2/whilehi-diagnostics.s     | 29 ++++++++
 llvm/test/MC/AArch64/SVE2/whilehi.s           | 68 +++++++++++++++++++
 .../MC/AArch64/SVE2/whilehs-diagnostics.s     | 29 ++++++++
 llvm/test/MC/AArch64/SVE2/whilehs.s           | 68 +++++++++++++++++++
 .../MC/AArch64/SVE2/whilerw-diagnostics.s     | 25 +++++++
 llvm/test/MC/AArch64/SVE2/whilerw.s           | 32 +++++++++
 .../MC/AArch64/SVE2/whilewr-diagnostics.s     | 25 +++++++
 llvm/test/MC/AArch64/SVE2/whilewr.s           | 32 +++++++++
 14 files changed, 543 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilege-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilege.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilegt-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilegt.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilehi-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilehi.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilehs-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilehs.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilerw-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilerw.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilewr-diagnostics.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/whilewr.s

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6a6fedd03038c..79ab42f4c0807 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1371,6 +1371,21 @@ let Predicates = [HasSVE2] in {
   // SVE table lookup (three sources)
   defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx">;
+
+  // SVE integer compare scalar count and limit
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
+
+  // SVE pointer conflict compare
+  defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
+  defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
 }
 
 let Predicates = [HasSVE2AES] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index c06b23eca1bc1..808e594670813 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3270,6 +3270,32 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm> {
   def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
 }
 
+class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
+                        PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
+  asm, "\t$Pd, $Rn, $Rm",
+  "", []>, Sched<[]> {
+  bits<4> Pd;
+  bits<5> Rm;
+  bits<5> Rn;
+  let Inst{31-24} = 0b00100101;
+  let Inst{23-22} = sz8_64;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Rm;
+  let Inst{15-10} = 0b001100;
+  let Inst{9-5}   = Rn;
+  let Inst{4}     = rw;
+  let Inst{3-0}   = Pd;
+
+  let Defs = [NZCV];
+}
+
+multiclass sve2_int_while_rr<bits<1> rw, string asm> {
+  def _B : sve2_int_while_rr<0b00, rw, asm, PPR8>;
+  def _H : sve2_int_while_rr<0b01, rw, asm, PPR16>;
+  def _S : sve2_int_while_rr<0b10, rw, asm, PPR32>;
+  def _D : sve2_int_while_rr<0b11, rw, asm, PPR64>;
+}
 
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Fast Reduction Group
diff --git a/llvm/test/MC/AArch64/SVE2/whilege-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilege-diagnostics.s
new file mode 100644
index 0000000000000..974deb0597b5e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilege-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilege  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilege  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilege  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilege  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilege  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilege  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+whilege  p15, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: whilege  p15, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilege.s b/llvm/test/MC/AArch64/SVE2/whilege.s
new file mode 100644
index 0000000000000..a506794f449d7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilege.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilege  p15.b, xzr, x0
+// CHECK-INST: whilege	p15.b, xzr, x0
+// CHECK-ENCODING: [0xef,0x13,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ef 13 20 25 <unknown>
+
+whilege  p15.b, x0, xzr
+// CHECK-INST: whilege	p15.b, x0, xzr
+// CHECK-ENCODING: [0x0f,0x10,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 10 3f 25 <unknown>
+
+whilege  p15.b, wzr, w0
+// CHECK-INST: whilege	p15.b, wzr, w0
+// CHECK-ENCODING: [0xef,0x03,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ef 03 20 25 <unknown>
+
+whilege  p15.b, w0, wzr
+// CHECK-INST: whilege	p15.b, w0, wzr
+// CHECK-ENCODING: [0x0f,0x00,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 00 3f 25 <unknown>
+
+whilege  p15.h, x0, xzr
+// CHECK-INST: whilege	p15.h, x0, xzr
+// CHECK-ENCODING: [0x0f,0x10,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 10 7f 25 <unknown>
+
+whilege  p15.h, w0, wzr
+// CHECK-INST: whilege	p15.h, w0, wzr
+// CHECK-ENCODING: [0x0f,0x00,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 00 7f 25 <unknown>
+
+whilege  p15.s, x0, xzr
+// CHECK-INST: whilege	p15.s, x0, xzr
+// CHECK-ENCODING: [0x0f,0x10,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 10 bf 25 <unknown>
+
+whilege  p15.s, w0, wzr
+// CHECK-INST: whilege	p15.s, w0, wzr
+// CHECK-ENCODING: [0x0f,0x00,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 00 bf 25 <unknown>
+
+whilege  p15.d, w0, wzr
+// CHECK-INST: whilege	p15.d, w0, wzr
+// CHECK-ENCODING: [0x0f,0x00,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 00 ff 25 <unknown>
+
+whilege  p15.d, x0, xzr
+// CHECK-INST: whilege	p15.d, x0, xzr
+// CHECK-ENCODING: [0x0f,0x10,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 10 ff 25 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/whilegt-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilegt-diagnostics.s
new file mode 100644
index 0000000000000..4d8e35146a51f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilegt-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilegt  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilegt  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilegt  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilegt  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilegt  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilegt  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+whilegt  p15, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: whilegt  p15, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilegt.s b/llvm/test/MC/AArch64/SVE2/whilegt.s
new file mode 100644
index 0000000000000..3f307bcc56a0e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilegt.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilegt  p15.b, xzr, x0
+// CHECK-INST: whilegt	p15.b, xzr, x0
+// CHECK-ENCODING: [0xff,0x13,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 13 20 25 <unknown>
+
+whilegt  p15.b, x0, xzr
+// CHECK-INST: whilegt	p15.b, x0, xzr
+// CHECK-ENCODING: [0x1f,0x10,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 10 3f 25 <unknown>
+
+whilegt  p15.b, wzr, w0
+// CHECK-INST: whilegt	p15.b, wzr, w0
+// CHECK-ENCODING: [0xff,0x03,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 03 20 25 <unknown>
+
+whilegt  p15.b, w0, wzr
+// CHECK-INST: whilegt	p15.b, w0, wzr
+// CHECK-ENCODING: [0x1f,0x00,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 00 3f 25 <unknown>
+
+whilegt  p15.h, x0, xzr
+// CHECK-INST: whilegt	p15.h, x0, xzr
+// CHECK-ENCODING: [0x1f,0x10,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 10 7f 25 <unknown>
+
+whilegt  p15.h, w0, wzr
+// CHECK-INST: whilegt	p15.h, w0, wzr
+// CHECK-ENCODING: [0x1f,0x00,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 00 7f 25 <unknown>
+
+whilegt  p15.s, x0, xzr
+// CHECK-INST: whilegt	p15.s, x0, xzr
+// CHECK-ENCODING: [0x1f,0x10,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 10 bf 25 <unknown>
+
+whilegt  p15.s, w0, wzr
+// CHECK-INST: whilegt	p15.s, w0, wzr
+// CHECK-ENCODING: [0x1f,0x00,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 00 bf 25 <unknown>
+
+whilegt  p15.d, w0, wzr
+// CHECK-INST: whilegt	p15.d, w0, wzr
+// CHECK-ENCODING: [0x1f,0x00,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 00 ff 25 <unknown>
+
+whilegt  p15.d, x0, xzr
+// CHECK-INST: whilegt	p15.d, x0, xzr
+// CHECK-ENCODING: [0x1f,0x10,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 10 ff 25 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/whilehi-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilehi-diagnostics.s
new file mode 100644
index 0000000000000..d5c278409f859
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilehi-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilehi  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehi  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilehi  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehi  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilehi  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehi  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+whilehi  p15, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: whilehi  p15, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilehi.s b/llvm/test/MC/AArch64/SVE2/whilehi.s
new file mode 100644
index 0000000000000..8e6272dd0239d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilehi.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilehi  p15.b, xzr, x0
+// CHECK-INST: whilehi	p15.b, xzr, x0
+// CHECK-ENCODING: [0xff,0x1b,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 1b 20 25 <unknown>
+
+whilehi  p15.b, x0, xzr
+// CHECK-INST: whilehi	p15.b, x0, xzr
+// CHECK-ENCODING: [0x1f,0x18,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 18 3f 25 <unknown>
+
+whilehi  p15.b, wzr, w0
+// CHECK-INST: whilehi	p15.b, wzr, w0
+// CHECK-ENCODING: [0xff,0x0b,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ff 0b 20 25 <unknown>
+
+whilehi  p15.b, w0, wzr
+// CHECK-INST: whilehi	p15.b, w0, wzr
+// CHECK-ENCODING: [0x1f,0x08,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 08 3f 25 <unknown>
+
+whilehi  p15.h, x0, xzr
+// CHECK-INST: whilehi	p15.h, x0, xzr
+// CHECK-ENCODING: [0x1f,0x18,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 18 7f 25 <unknown>
+
+whilehi  p15.h, w0, wzr
+// CHECK-INST: whilehi	p15.h, w0, wzr
+// CHECK-ENCODING: [0x1f,0x08,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 08 7f 25 <unknown>
+
+whilehi  p15.s, x0, xzr
+// CHECK-INST: whilehi	p15.s, x0, xzr
+// CHECK-ENCODING: [0x1f,0x18,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 18 bf 25 <unknown>
+
+whilehi  p15.s, w0, wzr
+// CHECK-INST: whilehi	p15.s, w0, wzr
+// CHECK-ENCODING: [0x1f,0x08,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 08 bf 25 <unknown>
+
+whilehi  p15.d, w0, wzr
+// CHECK-INST: whilehi	p15.d, w0, wzr
+// CHECK-ENCODING: [0x1f,0x08,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 08 ff 25 <unknown>
+
+whilehi  p15.d, x0, xzr
+// CHECK-INST: whilehi	p15.d, x0, xzr
+// CHECK-ENCODING: [0x1f,0x18,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 1f 18 ff 25 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/whilehs-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilehs-diagnostics.s
new file mode 100644
index 0000000000000..b1ddafeedcad2
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilehs-diagnostics.s
@@ -0,0 +1,29 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilehs  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehs  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilehs  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehs  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilehs  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilehs  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+whilehs  p15, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: whilehs  p15, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilehs.s b/llvm/test/MC/AArch64/SVE2/whilehs.s
new file mode 100644
index 0000000000000..1e01a1964be45
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilehs.s
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilehs  p15.b, xzr, x0
+// CHECK-INST: whilehs	p15.b, xzr, x0
+// CHECK-ENCODING: [0xef,0x1b,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ef 1b 20 25 <unknown>
+
+whilehs  p15.b, x0, xzr
+// CHECK-INST: whilehs	p15.b, x0, xzr
+// CHECK-ENCODING: [0x0f,0x18,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 18 3f 25 <unknown>
+
+whilehs  p15.b, wzr, w0
+// CHECK-INST: whilehs	p15.b, wzr, w0
+// CHECK-ENCODING: [0xef,0x0b,0x20,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: ef 0b 20 25 <unknown>
+
+whilehs  p15.b, w0, wzr
+// CHECK-INST: whilehs	p15.b, w0, wzr
+// CHECK-ENCODING: [0x0f,0x08,0x3f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 08 3f 25 <unknown>
+
+whilehs  p15.h, x0, xzr
+// CHECK-INST: whilehs	p15.h, x0, xzr
+// CHECK-ENCODING: [0x0f,0x18,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 18 7f 25 <unknown>
+
+whilehs  p15.h, w0, wzr
+// CHECK-INST: whilehs	p15.h, w0, wzr
+// CHECK-ENCODING: [0x0f,0x08,0x7f,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 08 7f 25 <unknown>
+
+whilehs  p15.s, x0, xzr
+// CHECK-INST: whilehs	p15.s, x0, xzr
+// CHECK-ENCODING: [0x0f,0x18,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 18 bf 25 <unknown>
+
+whilehs  p15.s, w0, wzr
+// CHECK-INST: whilehs	p15.s, w0, wzr
+// CHECK-ENCODING: [0x0f,0x08,0xbf,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 08 bf 25 <unknown>
+
+whilehs  p15.d, w0, wzr
+// CHECK-INST: whilehs	p15.d, w0, wzr
+// CHECK-ENCODING: [0x0f,0x08,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 08 ff 25 <unknown>
+
+whilehs  p15.d, x0, xzr
+// CHECK-INST: whilehs	p15.d, x0, xzr
+// CHECK-ENCODING: [0x0f,0x18,0xff,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: 0f 18 ff 25 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/whilerw-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilerw-diagnostics.s
new file mode 100644
index 0000000000000..5ea3b6d3a2e6a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilerw-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilerw  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilerw  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilerw  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilerw  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilerw  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilerw  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilerw  p15.b, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilerw  p15.b, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilerw.s b/llvm/test/MC/AArch64/SVE2/whilerw.s
new file mode 100644
index 0000000000000..b4a5bd0962f3f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilerw.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilerw  p15.b, x30, x30
+// CHECK-INST: whilerw  p15.b, x30, x30
+// CHECK-ENCODING: [0xdf,0x33,0x3e,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 33 3e 25 <unknown>
+
+whilerw  p15.h, x30, x30
+// CHECK-INST: whilerw  p15.h, x30, x30
+// CHECK-ENCODING: [0xdf,0x33,0x7e,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 33 7e 25 <unknown>
+
+whilerw  p15.s, x30, x30
+// CHECK-INST: whilerw  p15.s, x30, x30
+// CHECK-ENCODING: [0xdf,0x33,0xbe,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 33 be 25 <unknown>
+
+whilerw  p15.d, x30, x30
+// CHECK-INST: whilerw  p15.d, x30, x30
+// CHECK-ENCODING: [0xdf,0x33,0xfe,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: df 33 fe 25 <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/whilewr-diagnostics.s b/llvm/test/MC/AArch64/SVE2/whilewr-diagnostics.s
new file mode 100644
index 0000000000000..b2e202985d917
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilewr-diagnostics.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2  2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid scalar registers
+
+whilewr  p15.b, xzr, sp
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilewr  p15.b, xzr, sp
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilewr  p15.b, xzr, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilewr  p15.b, xzr, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilewr  p15.b, w0, x0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilewr  p15.b, w0, x0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+whilewr  p15.b, w0, w0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: whilewr  p15.b, w0, w0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2/whilewr.s b/llvm/test/MC/AArch64/SVE2/whilewr.s
new file mode 100644
index 0000000000000..588d1b2262b34
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/whilewr.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d -mattr=+sve2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2 < %s \
+// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+whilewr  p15.b, x30, x30
+// CHECK-INST: whilewr  p15.b, x30, x30
+// CHECK-ENCODING: [0xcf,0x33,0x3e,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 33 3e 25 <unknown>
+
+whilewr  p15.h, x30, x30
+// CHECK-INST: whilewr  p15.h, x30, x30
+// CHECK-ENCODING: [0xcf,0x33,0x7e,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 33 7e 25 <unknown>
+
+whilewr  p15.s, x30, x30
+// CHECK-INST: whilewr  p15.s, x30, x30
+// CHECK-ENCODING: [0xcf,0x33,0xbe,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 33 be 25 <unknown>
+
+whilewr  p15.d, x30, x30
+// CHECK-INST: whilewr  p15.d, x30, x30
+// CHECK-ENCODING: [0xcf,0x33,0xfe,0x25]
+// CHECK-ERROR: instruction requires: sve2
+// CHECK-UNKNOWN: cf 33 fe 25 <unknown>

From 886c4ef35aacefd33dab1d1b674a872d08036f00 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 31 May 2019 09:47:04 +0000
Subject: [PATCH 0737/1176] [InstCombine] 'add (sub C1, X), C2 --> sub (add C1,
 C2), X' constant-fold

https://rise4fun.com/Alive/qJQ

llvm-svn: 362216
---
 .../InstCombine/InstCombineAddSub.cpp         |  9 +++++-
 .../InstCombine/addsub-constant-folding.ll    | 28 +++++++------------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2d608e74b69a2..d422b07d49fe5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -872,7 +872,14 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
   if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
     return NV;
 
-  Value *X, *Y;
+  Value *X;
+  Constant *Op00C;
+
+  // add (sub C1, X), C2 --> sub (add C1, C2), X
+  if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))
+    return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);
+
+  Value *Y;
 
   // add (sub X, Y), -1 --> add (not Y), X
   if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
index fc98f249fc524..66c1e6e91684e 100644
--- a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
+++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
@@ -353,12 +353,10 @@ define <4 x i32> @vec_sub_const_const_sub_nonsplat(<4 x i32> %arg) {
 }
 
 ; (c1-x)+c2
-; FIXME
 
 define i32 @const_sub_add_const(i32 %arg) {
 ; CHECK-LABEL: @const_sub_add_const(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], 2
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 10, [[ARG:%.*]]
 ; CHECK-NEXT:    ret i32 [[T1]]
 ;
   %t0 = sub i32 8, %arg
@@ -370,7 +368,7 @@ define i32 @const_sub_add_const_extrause(i32 %arg) {
 ; CHECK-LABEL: @const_sub_add_const_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @use(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], 2
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 10, [[ARG]]
 ; CHECK-NEXT:    ret i32 [[T1]]
 ;
   %t0 = sub i32 8, %arg
@@ -381,8 +379,7 @@ define i32 @const_sub_add_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_add_const(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -394,7 +391,7 @@ define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_add_const_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -405,8 +402,7 @@ define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
 
 define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_add_const_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 2, i32 3, i32 undef, i32 2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 23, i32 undef, i32 undef, i32 10>, [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg
@@ -415,12 +411,10 @@ define <4 x i32> @vec_const_sub_add_const_nonsplat(<4 x i32> %arg) {
 }
 
 ; (c1-x)-c2
-; FIXME
 
 define i32 @const_sub_sub_const(i32 %arg) {
 ; CHECK-LABEL: @const_sub_sub_const(
-; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], -2
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 6, [[ARG:%.*]]
 ; CHECK-NEXT:    ret i32 [[T1]]
 ;
   %t0 = sub i32 8, %arg
@@ -432,7 +426,7 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) {
 ; CHECK-LABEL: @const_sub_sub_const_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @use(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add i32 [[T0]], -2
+; CHECK-NEXT:    [[T1:%.*]] = sub i32 6, [[ARG]]
 ; CHECK-NEXT:    ret i32 [[T1]]
 ;
   %t0 = sub i32 8, %arg
@@ -443,8 +437,7 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_sub_const(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 6, i32 6, i32 6, i32 6>, [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -456,7 +449,7 @@ define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_sub_const_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 6, i32 6, i32 6, i32 6>, [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -467,8 +460,7 @@ define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
 
 define <4 x i32> @vec_const_sub_sub_const_nonsplat(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_sub_const_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, [[ARG:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0]], <i32 -2, i32 -3, i32 undef, i32 -2>
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 19, i32 undef, i32 undef, i32 6>, [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 21, i32 undef, i32 8, i32 8>, %arg

From 39390d8317003ffc0627a30ae913bba3b3616efb Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 31 May 2019 09:47:16 +0000
Subject: [PATCH 0738/1176] [InstCombine] 'C-(C2-X) --> X+(C-C2)' constant-fold

It looks this fold was already partially happening, indirectly
via some other folds, but with one-use limitation.
No other fold here has that restriction.

https://rise4fun.com/Alive/ftR

llvm-svn: 362217
---
 llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp      | 7 ++++++-
 .../test/Transforms/InstCombine/addsub-constant-folding.ll | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d422b07d49fe5..e6b32ba13a4a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1609,8 +1609,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       if (Instruction *R = foldOpIntoPhi(I, PN))
         return R;
 
-    // C-(X+C2) --> (C-C2)-X
     Constant *C2;
+
+    // C-(C2-X) --> X+(C-C2)
+    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+      return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
+
+    // C-(X+C2) --> (C-C2)-X
     if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
       return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
   }
diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
index 66c1e6e91684e..2d9918737d2ac 100644
--- a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
+++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
@@ -485,7 +485,7 @@ define i32 @const_sub_const_sub_extrause(i32 %arg) {
 ; CHECK-LABEL: @const_sub_const_sub_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub i32 8, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @use(i32 [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub i32 2, [[T0]]
+; CHECK-NEXT:    [[T1:%.*]] = add i32 [[ARG]], -6
 ; CHECK-NEXT:    ret i32 [[T1]]
 ;
   %t0 = sub i32 8, %arg
@@ -508,7 +508,7 @@ define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_const_sub_extrause(
 ; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 2, i32 2, i32 2, i32 2>, [[T0]]
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -6, i32 -6, i32 -6, i32 -6>
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg

From 3f29cfd91545594e8a5d018e8d82a9d6bccd341d Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 31 May 2019 10:12:22 +0000
Subject: [PATCH 0739/1176] [ELF] Replace a dead test in getSymVA() with
 assert()

Symbols relative to discarded comdat sections are Undefined instead of
Defined now (after D59649 and D61583). The `== &InputSection::Discarded`
test becomes dead. I cannot find a test related to this behavior.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62725

llvm-svn: 362218
---
 lld/ELF/Symbols.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index e82d1ac1fdf64..8246efcaf1087 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -47,17 +47,11 @@ static uint64_t getSymVA(const Symbol &Sym, int64_t &Addend) {
     auto &D = cast<Defined>(Sym);
     SectionBase *IS = D.Section;
 
-    // According to the ELF spec reference to a local symbol from outside
-    // the group are not allowed. Unfortunately .eh_frame breaks that rule
-    // and must be treated specially. For now we just replace the symbol with
-    // 0.
-    if (IS == &InputSection::Discarded)
-      return 0;
-
     // This is an absolute symbol.
     if (!IS)
       return D.Value;
 
+    assert(IS != &InputSection::Discarded);
     IS = IS->Repl;
 
     uint64_t Offset = D.Value;

From 802c9b59d5c51a787511fe1408f07d6fb5a3bf59 Mon Sep 17 00:00:00 2001
From: Russell Gallop <russell.gallop@gmail.com>
Date: Fri, 31 May 2019 10:14:04 +0000
Subject: [PATCH 0740/1176] ftime-trace: Trace loop passes

These can take a significant amount of time in some builds.

Suggested by Andrea Di Biagio.

Differential Revision: https://reviews.llvm.org/D62666

llvm-svn: 362219
---
 llvm/lib/Analysis/LoopPass.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Analysis/LoopPass.cpp b/llvm/lib/Analysis/LoopPass.cpp
index c57ec2a3e24c7..4ab3798039d84 100644
--- a/llvm/lib/Analysis/LoopPass.cpp
+++ b/llvm/lib/Analysis/LoopPass.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -209,6 +210,8 @@ bool LPPassManager::runOnFunction(Function &F) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
 
+      llvm::TimeTraceScope LoopPassScope("RunLoopPass", P->getPassName());
+
       dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
                    CurrentLoop->getHeader()->getName());
       dumpRequiredSet(P);

From e98baf863126aa579e098e45280442dc6f7e8f59 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 31 May 2019 10:35:45 +0000
Subject: [PATCH 0741/1176] [ELF] Delete GotEntrySize and GotPltEntrySize

GotEntrySize and GotPltEntrySize were added in D22288. Later, with
the introduction of wordsize() (then Config->Wordsize), they become
redundant, because there is no target that sets GotEntrySize or
GotPltEntrySize to a number different from Config->Wordsize.

Reviewed By: grimar, ruiu

Differential Revision: https://reviews.llvm.org/D62727

llvm-svn: 362220
---
 lld/ELF/Arch/AArch64.cpp      |  2 --
 lld/ELF/Arch/AMDGPU.cpp       |  1 -
 lld/ELF/Arch/ARM.cpp          |  2 --
 lld/ELF/Arch/Hexagon.cpp      |  3 +--
 lld/ELF/Arch/Mips.cpp         |  2 --
 lld/ELF/Arch/PPC64.cpp        |  2 --
 lld/ELF/Arch/SPARCV9.cpp      |  1 -
 lld/ELF/Arch/X86.cpp          |  2 --
 lld/ELF/Arch/X86_64.cpp       |  2 --
 lld/ELF/Symbols.cpp           | 12 +++++-------
 lld/ELF/SyntheticSections.cpp | 18 ++++++++----------
 lld/ELF/Target.h              |  2 --
 12 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 02630c337d996..3f35a4f7c739a 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -61,8 +61,6 @@ AArch64::AArch64() {
   PltRel = R_AARCH64_JUMP_SLOT;
   TlsDescRel = R_AARCH64_TLSDESC;
   TlsGotRel = R_AARCH64_TLS_TPREL64;
-  GotEntrySize = 8;
-  GotPltEntrySize = 8;
   PltEntrySize = 16;
   PltHeaderSize = 32;
   DefaultMaxPageSize = 65536;
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 1798a2997db5b..8f434e23047e6 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -35,7 +35,6 @@ AMDGPU::AMDGPU() {
   RelativeRel = R_AMDGPU_RELATIVE64;
   GotRel = R_AMDGPU_ABS64;
   NoneRel = R_AMDGPU_NONE;
-  GotEntrySize = 8;
 }
 
 static uint32_t getEFlags(InputFile *File) {
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index fe3dc8002bc87..a43063c50439a 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -56,8 +56,6 @@ ARM::ARM() {
   TlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
   TlsOffsetRel = R_ARM_TLS_DTPOFF32;
   GotBaseSymInGotPlt = false;
-  GotEntrySize = 4;
-  GotPltEntrySize = 4;
   PltEntrySize = 16;
   PltHeaderSize = 32;
   TrapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 0ac48c8e01c1b..f0a00b04b9a73 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -40,11 +40,10 @@ Hexagon::Hexagon() {
   PltRel = R_HEX_JMP_SLOT;
   RelativeRel = R_HEX_RELATIVE;
   GotRel = R_HEX_GLOB_DAT;
-  GotEntrySize = 4;
+
   // The zero'th GOT entry is reserved for the address of _DYNAMIC.  The
   // next 3 are reserved for the dynamic loader.
   GotPltHeaderEntriesNum = 4;
-  GotPltEntrySize = 4;
 
   PltEntrySize = 16;
   PltHeaderSize = 32;
diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp
index e1a95efb9f0c9..0f419a62d1ddb 100644
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -46,8 +46,6 @@ template <class ELFT> class MIPS final : public TargetInfo {
 template <class ELFT> MIPS<ELFT>::MIPS() {
   GotPltHeaderEntriesNum = 2;
   DefaultMaxPageSize = 65536;
-  GotEntrySize = sizeof(typename ELFT::uint);
-  GotPltEntrySize = sizeof(typename ELFT::uint);
   GotBaseSymInGotPlt = false;
   PltEntrySize = 16;
   PltHeaderSize = 32;
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 8a1b4f887b899..ae0d63ab74828 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -288,9 +288,7 @@ PPC64::PPC64() {
   PltRel = R_PPC64_JMP_SLOT;
   RelativeRel = R_PPC64_RELATIVE;
   IRelativeRel = R_PPC64_IRELATIVE;
-  GotEntrySize = 8;
   PltEntrySize = 4;
-  GotPltEntrySize = 8;
   GotBaseSymInGotPlt = false;
   GotHeaderEntriesNum = 1;
   GotPltHeaderEntriesNum = 2;
diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp
index 9be0696596cf6..44bc279341198 100644
--- a/lld/ELF/Arch/SPARCV9.cpp
+++ b/lld/ELF/Arch/SPARCV9.cpp
@@ -37,7 +37,6 @@ SPARCV9::SPARCV9() {
   NoneRel = R_SPARC_NONE;
   PltRel = R_SPARC_JMP_SLOT;
   RelativeRel = R_SPARC_RELATIVE;
-  GotEntrySize = 8;
   PltEntrySize = 32;
   PltHeaderSize = 4 * PltEntrySize;
 
diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index 468ca20f5405a..ed748d880b34b 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -55,8 +55,6 @@ X86::X86() {
   TlsGotRel = R_386_TLS_TPOFF;
   TlsModuleIndexRel = R_386_TLS_DTPMOD32;
   TlsOffsetRel = R_386_TLS_DTPOFF32;
-  GotEntrySize = 4;
-  GotPltEntrySize = 4;
   PltEntrySize = 16;
   PltHeaderSize = 16;
   TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index fdaf63b11bc3e..5be9294c0d3bf 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -59,8 +59,6 @@ X86_64::X86_64() {
   TlsGotRel = R_X86_64_TPOFF64;
   TlsModuleIndexRel = R_X86_64_DTPMOD64;
   TlsOffsetRel = R_X86_64_DTPOFF64;
-  GotEntrySize = 8;
-  GotPltEntrySize = 8;
   PltEntrySize = 16;
   PltHeaderSize = 16;
   TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 8246efcaf1087..231ba7c1fd632 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -136,9 +136,7 @@ uint64_t Symbol::getGotVA() const {
   return In.Got->getVA() + getGotOffset();
 }
 
-uint64_t Symbol::getGotOffset() const {
-  return GotIndex * Target->GotEntrySize;
-}
+uint64_t Symbol::getGotOffset() const { return GotIndex * Config->Wordsize; }
 
 uint64_t Symbol::getGotPltVA() const {
   if (IsInIplt)
@@ -148,13 +146,13 @@ uint64_t Symbol::getGotPltVA() const {
 
 uint64_t Symbol::getGotPltOffset() const {
   if (IsInIplt)
-    return PltIndex * Target->GotPltEntrySize;
-  return (PltIndex + Target->GotPltHeaderEntriesNum) * Target->GotPltEntrySize;
+    return PltIndex * Config->Wordsize;
+  return (PltIndex + Target->GotPltHeaderEntriesNum) * Config->Wordsize;
 }
 
 uint64_t Symbol::getPPC64LongBranchOffset() const {
   assert(PPC64BranchltIndex != 0xffff);
-  return PPC64BranchltIndex * Target->GotPltEntrySize;
+  return PPC64BranchltIndex * Config->Wordsize;
 }
 
 uint64_t Symbol::getPltVA() const {
@@ -172,7 +170,7 @@ uint64_t Symbol::getPltVA() const {
 uint64_t Symbol::getPPC64LongBranchTableVA() const {
   assert(PPC64BranchltIndex != 0xffff);
   return In.PPC64LongBranchTarget->getVA() +
-         PPC64BranchltIndex * Target->GotPltEntrySize;
+         PPC64BranchltIndex * Config->Wordsize;
 }
 
 uint64_t Symbol::getSize() const {
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 672e0b2030666..9606eeda86bd4 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -542,8 +542,8 @@ void EhFrameSection::writeTo(uint8_t *Buf) {
 }
 
 GotSection::GotSection()
-    : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
-                       Target->GotEntrySize, ".got") {
+    : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Config->Wordsize,
+                       ".got") {
   // PPC64 saves the ElfSym::GlobalOffsetTable .TOC. as the first entry in the
   // .got. If there are no references to .TOC. in the symbol table,
   // ElfSym::GlobalOffsetTable will not be defined and we won't need to save
@@ -1030,7 +1030,7 @@ void MipsGotSection::writeTo(uint8_t *Buf) {
 GotPltSection::GotPltSection()
     : SyntheticSection(SHF_ALLOC | SHF_WRITE,
                        Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS,
-                       Target->GotPltEntrySize,
+                       Config->Wordsize,
                        Config->EMachine == EM_PPC64 ? ".plt" : ".got.plt") {}
 
 void GotPltSection::addEntry(Symbol &Sym) {
@@ -1039,13 +1039,12 @@ void GotPltSection::addEntry(Symbol &Sym) {
 }
 
 size_t GotPltSection::getSize() const {
-  return (Target->GotPltHeaderEntriesNum + Entries.size()) *
-         Target->GotPltEntrySize;
+  return (Target->GotPltHeaderEntriesNum + Entries.size()) * Config->Wordsize;
 }
 
 void GotPltSection::writeTo(uint8_t *Buf) {
   Target->writeGotPltHeader(Buf);
-  Buf += Target->GotPltHeaderEntriesNum * Target->GotPltEntrySize;
+  Buf += Target->GotPltHeaderEntriesNum * Config->Wordsize;
   for (const Symbol *B : Entries) {
     Target->writeGotPlt(Buf, *B);
     Buf += Config->Wordsize;
@@ -1076,7 +1075,7 @@ static StringRef getIgotPltName() {
 IgotPltSection::IgotPltSection()
     : SyntheticSection(SHF_ALLOC | SHF_WRITE,
                        Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS,
-                       Target->GotPltEntrySize, getIgotPltName()) {}
+                       Config->Wordsize, getIgotPltName()) {}
 
 void IgotPltSection::addEntry(Symbol &Sym) {
   assert(Sym.PltIndex == Entries.size());
@@ -1084,7 +1083,7 @@ void IgotPltSection::addEntry(Symbol &Sym) {
 }
 
 size_t IgotPltSection::getSize() const {
-  return Entries.size() * Target->GotPltEntrySize;
+  return Entries.size() * Config->Wordsize;
 }
 
 void IgotPltSection::writeTo(uint8_t *Buf) {
@@ -3228,7 +3227,6 @@ size_t PPC64LongBranchTargetSection::getSize() const {
 }
 
 void PPC64LongBranchTargetSection::writeTo(uint8_t *Buf) {
-  assert(Target->GotPltEntrySize == 8);
   // If linking non-pic we have the final addresses of the targets and they get
   // written to the table directly. For pic the dynamic linker will allocate
   // the section and fill it it.
@@ -3241,7 +3239,7 @@ void PPC64LongBranchTargetSection::writeTo(uint8_t *Buf) {
     // must be a local-call.
     write64(Buf,
             Sym->getVA() + getPPC64GlobalEntryToLocalEntryOffset(Sym->StOther));
-    Buf += Target->GotPltEntrySize;
+    Buf += 8;
   }
 }
 
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index ca38a4556467a..b6ccb2bc7d95c 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -99,8 +99,6 @@ class TargetInfo {
   RelType TlsGotRel;
   RelType TlsModuleIndexRel;
   RelType TlsOffsetRel;
-  unsigned GotEntrySize = 0;
-  unsigned GotPltEntrySize = 0;
   unsigned PltEntrySize;
   unsigned PltHeaderSize;
 

From 10c548b83988242e06dbdfd370a3efcf1536a930 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Fri, 31 May 2019 11:51:42 +0000
Subject: [PATCH 0742/1176] gn build: Merge r362190

llvm-svn: 362221
---
 llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index c100abedb2c02..a51af9880bec2 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -157,6 +157,8 @@ copy("Headers") {
     "cuda_wrappers/complex",
     "cuda_wrappers/new",
     "ppc_wrappers/mmintrin.h",
+    "ppc_wrappers/xmmintrin.h",
+    "ppc_wrappers/mm_malloc.h",
     "openmp_wrappers/math.h",
     "openmp_wrappers/cmath",
     "openmp_wrappers/__clang_openmp_math.h",

From f23ae7348f4e13b83a3a7a6d8419e9874e308a80 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Fri, 31 May 2019 11:52:59 +0000
Subject: [PATCH 0743/1176] gn build: Merge r362196

llvm-svn: 362222
---
 llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index a51af9880bec2..7ccefbc9689cd 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -64,6 +64,8 @@ copy("Headers") {
     "avx512vlcdintrin.h",
     "avx512vldqintrin.h",
     "avx512vlintrin.h",
+    "avx512vp2intersectintrin.h",
+    "avx512vlvp2intersectintrin.h",
     "avx512vpopcntdqvlintrin.h",
     "avx512vnniintrin.h",
     "avx512vlvnniintrin.h",

From 155bd6c3b065bce95dd47c0d5d93b0f0938abd3f Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Fri, 31 May 2019 12:07:05 +0000
Subject: [PATCH 0744/1176] gn build: Merge r362160

llvm-svn: 362223
---
 llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
index e5ac0f508805e..59d15041a5268 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
@@ -9,5 +9,6 @@ static_library("Remarks") {
     "RemarkParser.cpp",
     "RemarkStringTable.cpp",
     "YAMLRemarkParser.cpp",
+    "YAMLRemarkSerializer.cpp",
   ]
 }

From 60d88e0e9050e232faa80ddee2c7f0b3cae649f8 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Fri, 31 May 2019 13:16:21 +0000
Subject: [PATCH 0745/1176] [llvm-readobj] - Remove excessive `dynamic.test`

dynamic.test is a test that checks dumping of
dynamic tags. It uses precompiled objects as inputs
and it is completely excessive nowadays:

Now we have elf-dynamic-tags-machine-specific.test
and elf-dynamic-tags.test.
(https://github.com/llvm-mirror/llvm/blob/master/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test)
(https://github.com/llvm-mirror/llvm/blob/master/test/tools/llvm-readobj/elf-dynamic-tags.test)

First is used to check target specific tags and second tests the common flags.
These tests use YAML, which is much better than using precompiled binaries.

Note that new reviews tend to update the YAML based
tests to add new tags, e.g. see D62596.

With this patch it became possible to remove
dynamic-table-so.aarch64 binary from the inputs folder.
(other binaries are still used in other tests).

Differential revision: https://reviews.llvm.org/D62728

llvm-svn: 362224
---
 .../Inputs/dynamic-table-so.aarch64           | Bin 8960 -> 0 bytes
 .../tools/llvm-readobj/Inputs/dynamic-table.c |   2 -
 llvm/test/tools/llvm-readobj/dynamic.test     | 246 ------------------
 3 files changed, 248 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/dynamic-table-so.aarch64
 delete mode 100644 llvm/test/tools/llvm-readobj/dynamic.test

diff --git a/llvm/test/tools/llvm-readobj/Inputs/dynamic-table-so.aarch64 b/llvm/test/tools/llvm-readobj/Inputs/dynamic-table-so.aarch64
deleted file mode 100644
index 1e992294dc0643d1c05adf4101f0d5ac501b400d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8960
zcmeHNeT-CB6+g2>p=J445c#ke-V&l)<Gx+Kx{Eepc9vzJ*#%q{YmLd<w==W5)6T~_
z^9FZIBeXWAt+uj-NHoTv{vrHBNi1<uQ?~REF~$TnklK*AMU7Z71)J86=`x;k?>RGX
z-h0z(Li~#h@11*p_k6u`?|pOcd3&&@cY8x91X9BAQ=rz3xf+Q`$7-nnM4%Jq;e9o%
zQoMInyL`35yEP;fl!U&MiK&IUe`=xjLFjmc#t$(SIck#JMV7nBazbbLQG~L2n1eEO
zUEqE%U59;Cgc2{6d3aI1o}a`+Mfin&?k`S)L#P=(<le;n{V4ZA_I7YQg!1Dk`^!*A
z7_Zo_eenF)$R`Sq9sc9%d*1uz_sf5sdgE_TevC{||2=q-9QpUm`L!XK6FEy5g>(sC
z@ZzRx(Vk1!pDsMz`13!#@c7HGKXmVFU;e`CM-G1X@w3m}T(p1SfunE!{;fq1zjbVa
z*z1)rS{e)pLKqffJjHV@P1DN=P}Gmu+5nuSjR5?sh%F7kZN!WKyt5AeBH~T30y>+>
zQnfsX)h1XD?H;^{_;P$MDKZbvD}KM8Q+4p8jC=J{pqD};T&D?%WsfH43*r7aA4i=0
z_v%@$_!qzfvM{hs<!5*WJug_+NG6xH3XYw3EDNkuHsyddj0CKm1ASH^nNN<S3QjUV
z(AS;LWs?K;P&%pmf_tp^xJ?S|bm}o=d~m<j&l<YZcA=0gz;G@H89SAQv7%FebZRJW
z7INnOU?mf_W1~w$g@X3TT_^8UPcoZOb*4xl*1I#>ZEY~ucc`bO8!aTgm_oQ3{Qoo~
z;a|h5tj}YU-^Ll;0Ke4Tm3bWI0iWXWndbQ|_!#3YJZ}V-_S9lM&qAn&)~J+e7cO&(
z`YyO|_xtyv3ulu`)oU)iX6V_U22}3Wf42*F>u+-5?xnqw@p-7EQJhCQd*Iy3*)UAv
z8a_IE;FS~8Ve7K18*X6#VSvL+ZpBJXSGP_Y4Qnp1F~XPR!oJf>8X~2p22g#fJr~<+
zUG4eUj_UT+#pg~wiTZHN4o7dqO7Y`cCnHeRc?fa%bF35rc<CSI@(S|j3ABOu>E>ye
z1iYG8;|1{Rt(|c5B)|anA8x)9D|Mlafw}@^kiYr;?jz0bcYOn|BR@c1@*{dZR*GVs
zd?2~m123z1yxj<;BWQE*d}K=`1e=v#GqDoVm&g}b7Ad__E+6{?UIrZMA3P6R-foPP
zW)GZKF{HSX48<0Vj>!;2N;be5iZRNpP(ELam7XY<(dVs~|BaZ8MY$V(do8@?Q$Vq3
zZ=O8y>YAxjXlJGo&XVoWf=CGlF`g*5^WtYO5pPS%m^%KdHFXZ{$vBEWY41p;)45L@
z59RXd1nRz1?nK~D1nxxOP6Yms5eTs@+Otquph;yP&c~+8{%;lc%YOV`Zp$7)9U2gm
z{fry~rT?8<<s6CJV8gOkFnBW|dxS`XRy<!5_#IQ(E3D+U?0v5Beuc&G=SUYi<4}>k
z%?jPrhZ(HD%l=$s!~f#;4C@oQ2s<jzG0l4D97IL@xmDKBXE1-6+Y;v+-2Nv&Z;9LT
z99rA$yURKEc4<7saq`l7`eWVQpD|kY4HdIav8@+3@%fyw$=qUYXkFi4ROtGR_h|&;
zX*)Y&>`&$ksa)3Bh+OSPi<2*AA2l{??&$2;bRU3On8-Nx5K<?v>8RAQIVWk3WQ*od
zF_liVrV^l9qjq5w%)~?%B{g;Ox`!>*G%W1PC(||waBVE@fT@l#rjs1Ux;o65`J6gj
zn8{IVIB#c?U^;lr(43Z?&)XB4Q|gbzQ8j9{GpRV5%%LK@nM%9Xq{QR7OeUFi>c!KI
zhw-c-d5K!Qp|ZYe@i54`s>SDktiM|PE|7Uzi_Zm_-?cbqd1OJmI>LafMizDr?mvf8
zcNpe_%r6%^;*FI&Q!Va4pK3h|p;s2HOCSu3;7ftHKhL1`G*!-RE;Z=SlFECo7GDY%
zJoyT(=OdVhk~cwBA`HvmG{0}ED8=EUAn&zWd^yNDuom~9->E-bc{^3P$%{E~vs^`V
zZLcE!IDa1T23P#$y{h^B<EU}}xWrLUeY^KS+A~i?&o<9^ovEW|8u1WjIK1(f=b9rg
zbmXJ*8@%Yby!chdJ3ajGkPv2WyzzgJ`Q_ZM*zmRp108bZRb=$F9{>FLxZ?NY+Yl!|
zC0`)=wI2Wc+@tjPan0|aCpqK~8kd8JQ>M?mj-FDuf4w}T@RhJj9Ax^u!mIfx={|z^
zTvr}U@H1Em@<;pcU%%f$J)wYgt@-`);m3;KkCXi%yFXF<{&AmH_|39+UH^*tz4^3D
z%<o<Ae@8qh4tn0shvS}gF^l{LIqNB_a03*S$NMDWYdyTwj55DBZ`r4Cf8KHc@uj#+
z^yUM<SVzw=;zoeoqSE8<_&VZ2dC_O<=>Jw7{21fj`2Ps;puFe_#Do0&MIHRI(&K;c
zL0hqqZ^IO1+ekd#8qenng*MyH$4Bqq)S6CZi{q`hLutbuP@8F1|NA*eLp<-K%p9y6
zhq#?iTVF}qk6Odo+iW3=7SK?^DGm>tam<K5*xhgS?%X?oDbZ@K)^ajdoU*kAkR+`O
zT9-h%M9vyX=Z5UGm2h(Tf@K%c?_6dqoph23vt#q-j$kI08`hg73pYOb2~gW9D^bj3
zCQ!uHQpw~RR^Csw;#M%7MEBPv%=L8JK(~z;I<uYkWtO$QzpJmu>e;nTC41>HrJg9{
ztWi6gpgi=phj(@L?d(Rwn4cDoSnuv=SFg2u`}Vy(1J*!Sw6_P7#{<24t-jsc_F*I1
zwU>|&N^n<`(8|6$sU3;9^3Cp)H0Q5%_T1f3n`Ec7ca088h+{BmSR}b!O@XUSc~BZ%
zxi7<8o1ItNU;k?T-w`@5(oZTbA5~=kDs&y=Lim^@O)#<cE}of0Ue1w1_b{iNr##yR
z{1U12%_1M+iqIK+uTmkqq(kK8d>3I{<PCOEsGL)gO%+xrP1u1oIunY#?5~BkFsJx0
zd_wnOKb<k9E&K2aWqERoY>WR4Ljo(*C-Smi7dp+1VqfIt`Liq^Wj_4T(uL_DH%Xq(
z?q2zE#K<oZ8x%RFvaduIRXDuTgaX#w@`qSX=vakQSDLsjl)m@5<>kAz&|d0v6|eoL
zu+J?ozk@=>j;Qd;Kg;qPm_hbcLM7iK^5Txr7g%1#PtGwyz4pBA@1Z=6iF{9zJdS*?
z_VRn%KSbF@B###rue^-6?yZi$v`=A$zIuwh<cX&F!9Pr%zB^JZL|*p!7ye86b0|ww
zM&xDRCVw9=!-1CP7Jp=1FL><BKJGMs%BA;<yNJ9{I&ZlBm-F2<mXElFu`Y5#>0_8%
zUd|yc?4a<88gKgw>y+o0{ZV@zc@BrDgE!cIdj-)Ik>BmXMP4ZVy@1$uvmRvoZ@NLO
ti=4pJmbot&(J}Rh2>3CI&ujEM(}kBjzr4RFUl(NG;Kb$?k8-d5{{R?egmVA@

diff --git a/llvm/test/tools/llvm-readobj/Inputs/dynamic-table.c b/llvm/test/tools/llvm-readobj/Inputs/dynamic-table.c
index 0af66ca0c53f4..9d45720eb6726 100644
--- a/llvm/test/tools/llvm-readobj/Inputs/dynamic-table.c
+++ b/llvm/test/tools/llvm-readobj/Inputs/dynamic-table.c
@@ -4,8 +4,6 @@
 //       -o dynamic-table-so.mips
 // clang -target mipsel-linux-gnu -lc dynamic-table.c \
 //       -o dynamic-table-exe.mips
-// clang -target aarch64-linux-gnu -fPIC -shared dynamic-table.c\
-//       -o dynamic-table-so.aarch64
 int puts(const char *);
 
 __thread int foo;
diff --git a/llvm/test/tools/llvm-readobj/dynamic.test b/llvm/test/tools/llvm-readobj/dynamic.test
deleted file mode 100644
index 5f3cdabad62ef..0000000000000
--- a/llvm/test/tools/llvm-readobj/dynamic.test
+++ /dev/null
@@ -1,246 +0,0 @@
-// Check dynamic section tags in case of shared library file.
-RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-so.mips \
-RUN:     | FileCheck %s -check-prefix ELF-MIPS
-RUN: llvm-readelf --dynamic-table %p/Inputs/dynamic-table-so.mips \
-RUN:     | FileCheck %s --strict-whitespace -check-prefix ELF-MIPS-GNU
-
-ELF-MIPS: Format: ELF32-mips
-ELF-MIPS: Arch: mipsel
-ELF-MIPS: AddressSize: 32bit
-ELF-MIPS: LoadName:
-ELF-MIPS: DynamicSection [ (23 entries)
-ELF-MIPS:   Tag        Type                 Name/Value
-ELF-MIPS:   0x00000001 NEEDED               Shared library: [libc.so.6]
-ELF-MIPS:   0x0000000C INIT                 0x528
-ELF-MIPS:   0x0000000D FINI                 0x860
-ELF-MIPS:   0x00000004 HASH                 0x210
-ELF-MIPS:   0x00000005 STRTAB               0x3D8
-ELF-MIPS:   0x00000006 SYMTAB               0x2A8
-ELF-MIPS:   0x0000000A STRSZ                231 (bytes)
-ELF-MIPS:   0x0000000B SYMENT               16 (bytes)
-ELF-MIPS:   0x00000003 PLTGOT               0x108E0
-ELF-MIPS:   0x00000011 REL                  0x518
-ELF-MIPS:   0x00000012 RELSZ                16 (bytes)
-ELF-MIPS:   0x00000013 RELENT               8 (bytes)
-ELF-MIPS:   0x70000001 MIPS_RLD_VERSION     1
-ELF-MIPS:   0x70000005 MIPS_FLAGS           NOTPOT
-ELF-MIPS:   0x70000006 MIPS_BASE_ADDRESS    0x0
-ELF-MIPS:   0x7000000A MIPS_LOCAL_GOTNO     10
-ELF-MIPS:   0x70000011 MIPS_SYMTABNO        19
-ELF-MIPS:   0x70000012 MIPS_UNREFEXTNO      26
-ELF-MIPS:   0x70000013 MIPS_GOTSYM          0xD
-ELF-MIPS:   0x6FFFFFFE VERNEED              0x4E8
-ELF-MIPS:   0x6FFFFFFF VERNEEDNUM           1
-ELF-MIPS:   0x6FFFFFF0 VERSYM               0x4C0
-ELF-MIPS:   0x00000000 NULL                 0x0
-ELF-MIPS: ]
-
-ELF-MIPS-GNU:      Dynamic section at offset 0x{{.*}} contains 23 entries:
-ELF-MIPS-GNU-NEXT:   Tag        Type                 Name/Value
-ELF-MIPS-GNU-NEXT:   0x00000001 (NEEDED)             Shared library: [libc.so.6]
-ELF-MIPS-GNU-NEXT:   0x0000000c (INIT)               0x528
-ELF-MIPS-GNU-NEXT:   0x0000000d (FINI)               0x860
-ELF-MIPS-GNU-NEXT:   0x00000004 (HASH)               0x210
-ELF-MIPS-GNU-NEXT:   0x00000005 (STRTAB)             0x3d8
-ELF-MIPS-GNU-NEXT:   0x00000006 (SYMTAB)             0x2a8
-ELF-MIPS-GNU-NEXT:   0x0000000a (STRSZ)              231 (bytes)
-ELF-MIPS-GNU-NEXT:   0x0000000b (SYMENT)             16 (bytes)
-ELF-MIPS-GNU-NEXT:   0x00000003 (PLTGOT)             0x108e0
-ELF-MIPS-GNU-NEXT:   0x00000011 (REL)                0x518
-ELF-MIPS-GNU-NEXT:   0x00000012 (RELSZ)              16 (bytes)
-ELF-MIPS-GNU-NEXT:   0x00000013 (RELENT)             8 (bytes)
-ELF-MIPS-GNU-NEXT:   0x70000001 (MIPS_RLD_VERSION)   1
-ELF-MIPS-GNU-NEXT:   0x70000005 (MIPS_FLAGS)         NOTPOT
-ELF-MIPS-GNU-NEXT:   0x70000006 (MIPS_BASE_ADDRESS)  0x0
-ELF-MIPS-GNU-NEXT:   0x7000000a (MIPS_LOCAL_GOTNO)   10
-ELF-MIPS-GNU-NEXT:   0x70000011 (MIPS_SYMTABNO)      19
-ELF-MIPS-GNU-NEXT:   0x70000012 (MIPS_UNREFEXTNO)    26
-ELF-MIPS-GNU-NEXT:   0x70000013 (MIPS_GOTSYM)        0xd
-ELF-MIPS-GNU-NEXT:   0x6ffffffe (VERNEED)            0x4e8
-ELF-MIPS-GNU-NEXT:   0x6fffffff (VERNEEDNUM)         1
-ELF-MIPS-GNU-NEXT:   0x6ffffff0 (VERSYM)             0x4c0
-ELF-MIPS-GNU-NEXT:   0x00000000 (NULL)               0x0
-
-// Check dynamic section tags in case of non-pic executable file.
-RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-exe.mips \
-RUN:     | FileCheck %s -check-prefix ELF-MIPS-EXE
-
-ELF-MIPS-EXE: Format: ELF32-mips
-ELF-MIPS-EXE: Arch: mipsel
-ELF-MIPS-EXE: AddressSize: 32bit
-ELF-MIPS-EXE: LoadName:
-ELF-MIPS-EXE: DynamicSection [ (26 entries)
-ELF-MIPS-EXE:   Tag        Type                 Name/Value
-ELF-MIPS-EXE:   0x00000001 NEEDED               Shared library: [libc.so.6]
-ELF-MIPS-EXE:   0x0000000C INIT                 0x400418
-ELF-MIPS-EXE:   0x0000000D FINI                 0x4007B0
-ELF-MIPS-EXE:   0x00000004 HASH                 0x4002B8
-ELF-MIPS-EXE:   0x00000005 STRTAB               0x40036C
-ELF-MIPS-EXE:   0x00000006 SYMTAB               0x4002EC
-ELF-MIPS-EXE:   0x0000000A STRSZ                107 (bytes)
-ELF-MIPS-EXE:   0x0000000B SYMENT               16 (bytes)
-ELF-MIPS-EXE:   0x70000016 MIPS_RLD_MAP         0x410880
-ELF-MIPS-EXE:   0x00000015 DEBUG                0x0
-ELF-MIPS-EXE:   0x00000003 PLTGOT               0x410890
-ELF-MIPS-EXE:   0x70000001 MIPS_RLD_VERSION     1
-ELF-MIPS-EXE:   0x70000005 MIPS_FLAGS           NOTPOT
-ELF-MIPS-EXE:   0x70000006 MIPS_BASE_ADDRESS    0x400000
-ELF-MIPS-EXE:   0x7000000A MIPS_LOCAL_GOTNO     5
-ELF-MIPS-EXE:   0x70000011 MIPS_SYMTABNO        8
-ELF-MIPS-EXE:   0x70000012 MIPS_UNREFEXTNO      32
-ELF-MIPS-EXE:   0x70000013 MIPS_GOTSYM          0x7
-ELF-MIPS-EXE:   0x00000014 PLTREL               REL
-ELF-MIPS-EXE:   0x00000017 JMPREL               0x400408
-ELF-MIPS-EXE:   0x00000002 PLTRELSZ             16 (bytes)
-ELF-MIPS-EXE:   0x70000032 MIPS_PLTGOT          0x410854
-ELF-MIPS-EXE:   0x6FFFFFFE VERNEED              0x4003E8
-ELF-MIPS-EXE:   0x6FFFFFFF VERNEEDNUM           1
-ELF-MIPS-EXE:   0x6FFFFFF0 VERSYM               0x4003D8
-ELF-MIPS-EXE:   0x00000000 NULL                 0x0
-ELF-MIPS-EXE: ]
-
-RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-exe.x86 \
-RUN:     | FileCheck %s -check-prefix ELF-X86-EXE
-
-ELF-X86-EXE: Format: ELF32-i386
-ELF-X86-EXE: Arch: i386
-ELF-X86-EXE: AddressSize: 32bit
-ELF-X86-EXE: LoadName:
-ELF-X86-EXE: DynamicSection [ (30 entries)
-ELF-X86-EXE:   Tag        Type                 Name/Value
-ELF-X86-EXE:   0x00000001 NEEDED               Shared library: [libstdc++.so.6]
-ELF-X86-EXE:   0x00000001 NEEDED               Shared library: [libgcc_s.so.1]
-ELF-X86-EXE:   0x00000001 NEEDED               Shared library: [libc.so.6]
-ELF-X86-EXE:   0x0000000C INIT                 0x62C
-ELF-X86-EXE:   0x0000000D FINI                 0x920
-ELF-X86-EXE:   0x00000019 INIT_ARRAY           0x19FC
-ELF-X86-EXE:   0x0000001B INIT_ARRAYSZ         4 (bytes)
-ELF-X86-EXE:   0x0000001A FINI_ARRAY           0x1A00
-ELF-X86-EXE:   0x0000001C FINI_ARRAYSZ         4 (bytes)
-ELF-X86-EXE:   0x00000004 HASH                 0x18C
-ELF-X86-EXE:   0x6FFFFEF5 GNU_HASH             0x1E4
-ELF-X86-EXE:   0x00000005 STRTAB               0x328
-ELF-X86-EXE:   0x00000006 SYMTAB               0x218
-ELF-X86-EXE:   0x0000000A STRSZ                408 (bytes)
-ELF-X86-EXE:   0x0000000B SYMENT               16 (bytes)
-ELF-X86-EXE:   0x00000015 DEBUG                0x0
-ELF-X86-EXE:   0x00000003 PLTGOT               0x1B30
-ELF-X86-EXE:   0x00000002 PLTRELSZ             64 (bytes)
-ELF-X86-EXE:   0x00000014 PLTREL               REL
-ELF-X86-EXE:   0x00000017 JMPREL               0x5EC
-ELF-X86-EXE:   0x00000011 REL                  0x564
-ELF-X86-EXE:   0x00000012 RELSZ                136 (bytes)
-ELF-X86-EXE:   0x00000013 RELENT               8 (bytes)
-ELF-X86-EXE:   0x00000016 TEXTREL
-ELF-X86-EXE:   0x0000001E FLAGS                TEXTREL
-ELF-X86-EXE:   0x6FFFFFFE VERNEED              0x4E4
-ELF-X86-EXE:   0x6FFFFFFF VERNEEDNUM           3
-ELF-X86-EXE:   0x6FFFFFF0 VERSYM               0x4C0
-ELF-X86-EXE:   0x6FFFFFFA RELCOUNT             6
-ELF-X86-EXE:   0x00000000 NULL                 0x0
-ELF-X86-EXE: ]
-
-RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-so.x86 \
-RUN:     | FileCheck %s -check-prefix ELF-X86-SO
-
-ELF-X86-SO: Format: ELF64-x86-64
-ELF-X86-SO: Arch: x86_64
-ELF-X86-SO: AddressSize: 64bit
-ELF-X86-SO: LoadName: 
-ELF-X86-SO: DynamicSection [ ({{[0-9]+}} entries)
-ELF-X86-SO:   Tag                Type                 Name/Value
-ELF-X86-SO:   0x0000000000000001 NEEDED               Shared library: [libc.so.6]
-ELF-X86-SO:   0x0000000000000001 NEEDED               Shared library: [ld-linux-x86-64.so.2]
-ELF-X86-SO:   0x000000007FFFFFFF FILTER               Filter library: [filter.so]
-ELF-X86-SO:   0x000000007FFFFFFD AUXILIARY            Auxiliary library: [aux.so]
-ELF-X86-SO:   0x000000000000000C INIT                 0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x000000000000000D FINI                 0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x0000000000000019 INIT_ARRAY           0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x000000000000001B INIT_ARRAYSZ         8 (bytes)
-ELF-X86-SO:   0x000000000000001A FINI_ARRAY           0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x000000000000001C FINI_ARRAYSZ         8 (bytes)
-ELF-X86-SO:   0x000000006FFFFEF5 GNU_HASH             0x1C8
-ELF-X86-SO:   0x0000000000000005 STRTAB               0x3A0
-ELF-X86-SO:   0x0000000000000006 SYMTAB               0x208
-ELF-X86-SO:   0x000000000000000A STRSZ                {{[0-9]+}} (bytes)
-ELF-X86-SO:   0x000000000000000B SYMENT               24 (bytes)
-ELF-X86-SO:   0x0000000000000003 PLTGOT               0x201000
-ELF-X86-SO:   0x0000000000000002 PLTRELSZ             48 (bytes)
-ELF-X86-SO:   0x0000000000000014 PLTREL               RELA
-ELF-X86-SO:   0x0000000000000017 JMPREL               0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x0000000000000007 RELA                 0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x0000000000000008 RELASZ               240 (bytes)
-ELF-X86-SO:   0x0000000000000009 RELAENT              24 (bytes)
-ELF-X86-SO:   0x000000006FFFFFFE VERNEED              0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x000000006FFFFFFF VERNEEDNUM           2
-ELF-X86-SO:   0x000000006FFFFFF0 VERSYM               0x{{[0-9A-F]+}}
-ELF-X86-SO:   0x000000006FFFFFF9 RELACOUNT            3
-ELF-X86-SO:   0x0000000000000000 NULL                 0x0
-
-RUN: llvm-readobj --dynamic-table %p/Inputs/dynamic-table-so.aarch64 \
-RUN:     | FileCheck %s -check-prefix ELF-AARCH64-SO
-RUN: llvm-readelf --dynamic-table %p/Inputs/dynamic-table-so.aarch64 \
-RUN:     | FileCheck %s --strict-whitespace -check-prefix ELF-AARCH64-SO-GNU
-
-ELF-AARCH64-SO: Format: ELF64-aarch64-little
-ELF-AARCH64-SO: Arch: aarch64
-ELF-AARCH64-SO: AddressSize: 64bit
-ELF-AARCH64-SO: LoadName: 
-ELF-AARCH64-SO: DynamicSection [ (26 entries)
-ELF-AARCH64-SO:   Tag                Type                 Name/Value
-ELF-AARCH64-SO:   0x0000000000000001 NEEDED               Shared library: [libc.so.6]
-ELF-AARCH64-SO:   0x000000000000000C INIT                 0x660
-ELF-AARCH64-SO:   0x000000000000000D FINI                 0x83C
-ELF-AARCH64-SO:   0x0000000000000019 INIT_ARRAY           0x10DB8
-ELF-AARCH64-SO:   0x000000000000001B INIT_ARRAYSZ         8 (bytes)
-ELF-AARCH64-SO:   0x000000000000001A FINI_ARRAY           0x10DC0
-ELF-AARCH64-SO:   0x000000000000001C FINI_ARRAYSZ         8 (bytes)
-ELF-AARCH64-SO:   0x000000006FFFFEF5 GNU_HASH             0x1F0
-ELF-AARCH64-SO:   0x0000000000000005 STRTAB               0x420
-ELF-AARCH64-SO:   0x0000000000000006 SYMTAB               0x240
-ELF-AARCH64-SO:   0x000000000000000A STRSZ                210 (bytes)
-ELF-AARCH64-SO:   0x000000000000000B SYMENT               24 (bytes)
-ELF-AARCH64-SO:   0x0000000000000003 PLTGOT               0x10FE8
-ELF-AARCH64-SO:   0x0000000000000002 PLTRELSZ             96 (bytes)
-ELF-AARCH64-SO:   0x0000000000000014 PLTREL               RELA
-ELF-AARCH64-SO:   0x0000000000000017 JMPREL               0x600
-ELF-AARCH64-SO:   0x000000006FFFFEF6 TLSDESC_PLT          0x6D0
-ELF-AARCH64-SO:   0x000000006FFFFEF7 TLSDESC_GOT          0x10FE0
-ELF-AARCH64-SO:   0x0000000000000007 RELA                 0x540
-ELF-AARCH64-SO:   0x0000000000000008 RELASZ               192 (bytes)
-ELF-AARCH64-SO:   0x0000000000000009 RELAENT              24 (bytes)
-ELF-AARCH64-SO:   0x000000006FFFFFFE VERNEED              0x520
-ELF-AARCH64-SO:   0x000000006FFFFFFF VERNEEDNUM           1
-ELF-AARCH64-SO:   0x000000006FFFFFF0 VERSYM               0x4F2
-ELF-AARCH64-SO:   0x000000006FFFFFF9 RELACOUNT            3
-ELF-AARCH64-SO:   0x0000000000000000 NULL                 0x0
-ELF-AARCH64-SO: ]
-
-ELF-AARCH64-SO-GNU:      Dynamic section at offset 0x{{.*}} contains 26 entries:
-ELF-AARCH64-SO-GNU-NEXT:   Tag                Type                 Name/Value
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000c (INIT)               0x660
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000d (FINI)               0x83c
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000019 (INIT_ARRAY)         0x10db8
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001b (INIT_ARRAYSZ)       8 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001a (FINI_ARRAY)         0x10dc0
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000001c (FINI_ARRAYSZ)       8 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef5 (GNU_HASH)           0x1f0
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000005 (STRTAB)             0x420
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000006 (SYMTAB)             0x240
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000a (STRSZ)              210 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x000000000000000b (SYMENT)             24 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000003 (PLTGOT)             0x10fe8
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000002 (PLTRELSZ)           96 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000014 (PLTREL)             RELA
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000017 (JMPREL)             0x600
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef6 (TLSDESC_PLT)        0x6d0
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffef7 (TLSDESC_GOT)        0x10fe0
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000007 (RELA)               0x540
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000008 (RELASZ)             192 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000009 (RELAENT)            24 (bytes)
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffffe (VERNEED)            0x520
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006fffffff (VERNEEDNUM)         1
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffff0 (VERSYM)             0x4f2
-ELF-AARCH64-SO-GNU-NEXT:   0x000000006ffffff9 (RELACOUNT)          3
-ELF-AARCH64-SO-GNU-NEXT:   0x0000000000000000 (NULL)               0x0

From 54182eb7b0dbc601787a6c5270626b28796beb48 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 31 May 2019 14:26:19 +0000
Subject: [PATCH 0746/1176] Fix for PR42089, regression from r362119

The implementation of the NoThrow ExceptionSpecificationType missed a
switch statement for forming the diagnostic when an out-of-line member
redeclaration misses the exception specification.  This patch adds the
correct case statement.

llvm-svn: 362225
---
 clang/lib/Sema/SemaExceptionSpec.cpp       | 9 ++++++++-
 clang/test/SemaCXX/MicrosoftExtensions.cpp | 9 +++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 59b919bb86ce3..e8f559af4da8a 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -381,6 +381,11 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
     // when declaring a replaceable global allocation function.
     DiagID = diag::ext_missing_exception_specification;
     ReturnValueOnError = false;
+  } else if (ESI.Type == EST_NoThrow) {
+    // Allow missing attribute 'nothrow' in redeclarations, since this is a very
+    // common omission.
+    DiagID = diag::ext_missing_exception_specification;
+    ReturnValueOnError = false;
   } else {
     DiagID = diag::err_missing_exception_specification;
     ReturnValueOnError = true;
@@ -421,7 +426,9 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
     OldProto->getNoexceptExpr()->printPretty(OS, nullptr, getPrintingPolicy());
     OS << ")";
     break;
-
+  case EST_NoThrow:
+    OS <<"__attribute__((nothrow))";
+    break;
   default:
     llvm_unreachable("This spec type is compatible with none.");
   }
diff --git a/clang/test/SemaCXX/MicrosoftExtensions.cpp b/clang/test/SemaCXX/MicrosoftExtensions.cpp
index 55adb68d2e4f6..8accc88bf37f3 100644
--- a/clang/test/SemaCXX/MicrosoftExtensions.cpp
+++ b/clang/test/SemaCXX/MicrosoftExtensions.cpp
@@ -517,6 +517,15 @@ void PR34109(int* a) {
   delete a;
 }
 
+namespace PR42089 {
+  struct S {
+    __attribute__((nothrow)) void Foo(); // expected-note {{previous declaration is here}}
+    __attribute__((nothrow)) void Bar();
+  };
+  void S::Foo(){} // expected-warning {{is missing exception specification}}
+  __attribute__((nothrow)) void S::Bar(){}
+}
+
 #elif TEST2
 
 // Check that __unaligned is not recognized if MS extensions are not enabled

From 488c509d457d46f5db81a526bef6590eadb2fc0e Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Fri, 31 May 2019 14:38:16 +0000
Subject: [PATCH 0747/1176] [clangd] Add missing license for rename.cpp, NFC.

llvm-svn: 362226
---
 clang-tools-extra/clangd/refactor/Rename.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp
index 46fd19d29f281..8b21e635c9557 100644
--- a/clang-tools-extra/clangd/refactor/Rename.cpp
+++ b/clang-tools-extra/clangd/refactor/Rename.cpp
@@ -1,3 +1,11 @@
+//===--- Rename.cpp - Symbol-rename refactorings -----------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
 #include "refactor/Rename.h"
 #include "clang/Tooling/Refactoring/RefactoringResultConsumer.h"
 #include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
@@ -65,7 +73,7 @@ renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos,
   // Right now we only support renaming the main file, so we
   // drop replacements not for the main file. In the future, we might
   // also support rename with wider scope.
-  // Rename sometimes returns duplicate edits (which is a bug). A side-effect of 
+  // Rename sometimes returns duplicate edits (which is a bug). A side-effect of
   // adding them to a single Replacements object is these are deduplicated.
   for (const tooling::AtomicChange &Change : ResultCollector.Result->get()) {
     for (const auto &Rep : Change.getReplacements()) {

From 66c25def005a476d970e8c3cca9850f49a8ea84d Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 14:49:31 +0000
Subject: [PATCH 0748/1176] [NFC][InstCombine] Add unary FNeg tests to fma.ll

llvm-svn: 362227
---
 llvm/test/Transforms/InstCombine/fma.ll | 96 +++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 7bb6619b4fd16..f3314c622bcf5 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -19,6 +19,17 @@ define float @fma_fneg_x_fneg_y(float %x, float %y, float %z) {
   ret float %fma
 }
 
+define float @fma_unary_fneg_x_unary_fneg_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fma_unary_fneg_x_unary_fneg_y(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %fma = call float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
 define <2 x float> @fma_fneg_x_fneg_y_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @fma_fneg_x_fneg_y_vec(
 ; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]])
@@ -30,6 +41,17 @@ define <2 x float> @fma_fneg_x_fneg_y_vec(<2 x float> %x, <2 x float> %y, <2 x f
   ret <2 x float> %fma
 }
 
+define <2 x float> @fma_unary_fneg_x_unary_fneg_y_vec(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fma_unary_fneg_x_unary_fneg_y_vec(
+; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[FMA]]
+;
+  %xn = fneg <2 x float> %x
+  %yn = fneg <2 x float> %y
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %xn, <2 x float> %yn, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
 define <2 x float> @fma_fneg_x_fneg_y_vec_undef(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @fma_fneg_x_fneg_y_vec_undef(
 ; CHECK-NEXT:    [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]])
@@ -52,6 +74,17 @@ define float @fma_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
   ret float %fma
 }
 
+define float @fma_unary_fneg_x_unary_fneg_y_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: @fma_unary_fneg_x_unary_fneg_y_fast(
+; CHECK-NEXT:    [[FMA:%.*]] = call fast float @llvm.fma.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %fma = call fast float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fma
+}
+
 define float @fma_fneg_const_fneg_y(float %y, float %z) {
 ; CHECK-LABEL: @fma_fneg_const_fneg_y(
 ; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
@@ -62,6 +95,16 @@ define float @fma_fneg_const_fneg_y(float %y, float %z) {
   ret float %fma
 }
 
+define float @fma_unary_fneg_const_unary_fneg_y(float %y, float %z) {
+; CHECK-LABEL: @fma_unary_fneg_const_unary_fneg_y(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %y.fneg = fneg float %y
+  %fma = call float @llvm.fma.f32(float fneg (float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fma
+}
+
 define float @fma_fneg_x_fneg_const(float %x, float %z) {
 ; CHECK-LABEL: @fma_fneg_x_fneg_const(
 ; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
@@ -72,6 +115,16 @@ define float @fma_fneg_x_fneg_const(float %x, float %z) {
   ret float %fma
 }
 
+define float @fma_unary_fneg_x_unary_fneg_const(float %x, float %z) {
+; CHECK-LABEL: @fma_unary_fneg_x_unary_fneg_const(
+; CHECK-NEXT:    [[FMA:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMA]]
+;
+  %x.fneg = fneg float %x
+  %fma = call float @llvm.fma.f32(float %x.fneg, float fneg (float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fma
+}
+
 define float @fma_fabs_x_fabs_y(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fma_fabs_x_fabs_y(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
@@ -116,6 +169,17 @@ define float @fmuladd_fneg_x_fneg_y(float %x, float %y, float %z) {
   ret float %fmuladd
 }
 
+define float @fmuladd_unary_fneg_x_unary_fneg_y(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmuladd_unary_fneg_x_unary_fneg_y(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
 define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
@@ -128,6 +192,18 @@ define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
   ret float %fmuladd
 }
 
+define float @fmuladd_unary_fneg_x_unary_fneg_y_fast(float %x, float %y, float %z) {
+; CHECK-LABEL: @fmuladd_unary_fneg_x_unary_fneg_y_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[FMULADD:%.*]] = fadd fast float [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
 define float @fmuladd_fneg_const_fneg_y(float %y, float %z) {
 ; CHECK-LABEL: @fmuladd_fneg_const_fneg_y(
 ; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
@@ -138,6 +214,16 @@ define float @fmuladd_fneg_const_fneg_y(float %y, float %z) {
   ret float %fmuladd
 }
 
+define float @fmuladd_unary_fneg_const_unary_fneg_y(float %y, float %z) {
+; CHECK-LABEL: @fmuladd_unary_fneg_const_unary_fneg_y(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[Y:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %y.fneg = fneg float %y
+  %fmuladd = call float @llvm.fmuladd.f32(float fneg (float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
+  ret float %fmuladd
+}
+
 define float @fmuladd_fneg_x_fneg_const(float %x, float %z) {
 ; CHECK-LABEL: @fmuladd_fneg_x_fneg_const(
 ; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
@@ -148,6 +234,16 @@ define float @fmuladd_fneg_x_fneg_const(float %x, float %z) {
   ret float %fmuladd
 }
 
+define float @fmuladd_unary_fneg_x_unary_fneg_const(float %x, float %z) {
+; CHECK-LABEL: @fmuladd_unary_fneg_x_unary_fneg_const(
+; CHECK-NEXT:    [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X:%.*]], float bitcast (i32 ptrtoint (i32* @external to i32) to float), float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[FMULADD]]
+;
+  %x.fneg = fneg float %x
+  %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float fneg (float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z)
+  ret float %fmuladd
+}
+
 define float @fmuladd_fabs_x_fabs_y(float %x, float %y, float %z) {
 ; CHECK-LABEL: @fmuladd_fabs_x_fabs_y(
 ; CHECK-NEXT:    [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])

From dc0e6c009b897538c77c81ed7828b2d55d4dd3fa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 31 May 2019 15:05:06 +0000
Subject: [PATCH 0749/1176] [UpdateTestChecks] Add support for -march=r600 to
 match existing -march=amdgcn support

llvm-svn: 362228
---
 llvm/utils/UpdateTestChecks/asm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 247c301bff4ea..7fb93fab56519 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -236,6 +236,7 @@ def scrub_asm_wasm32(asm, args):
 def get_triple_from_march(march):
   triples = {
       'amdgcn': 'amdgcn',
+      'r600': 'r600',
       'mips': 'mips',
       'sparc': 'sparc',
   }

From 27d6ea9698cdf81c97c3d74d514f3c5b58b46aa0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 31 May 2019 15:06:14 +0000
Subject: [PATCH 0750/1176] [AMDGPU] Regenerate CTLZ tests for an upcoming
 patch

llvm-svn: 362229
---
 llvm/test/CodeGen/AMDGPU/ctlz.ll | 1134 ++++++++++++++++++++++++++----
 1 file changed, 1006 insertions(+), 128 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 612f31fd3af85..dae8402a1ae51 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s -enable-var-scope -check-prefixes=FUNC,GCN,SI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -enable-var-scope -check-prefixes=FUNC,GCN,VI
+; RUN: llc < %s -march=r600 -mcpu=cypress -verify-machineinstrs | FileCheck %s -enable-var-scope -check-prefixes=FUNC,EG
 
 declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
@@ -16,34 +17,109 @@ declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
 
 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: {{^}}s_ctlz_i32:
-; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; GCN-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
-; GCN-DAG: v_cmp_ne_u32_e64 vcc, [[VAL]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
-; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], 32, [[VCTLZ]], vcc
-; GCN: buffer_store_dword [[RESULT]]
-; GCN: s_endpgm
-
-; EG: FFBH_UINT
-; EG: CNDE_INT
 define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+; SI-LABEL: s_ctlz_i32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_flbit_i32_b32 s0, s2
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    v_cmp_ne_u32_e64 vcc, s2, 0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v0, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_ctlz_i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_flbit_i32_b32 s1, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s1
+; VI-NEXT:    v_cmp_ne_u32_e64 vcc, s0, 0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v0, vcc
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: s_ctlz_i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     FFBH_UINT * T0.W, KC0[2].Z,
+; EG-NEXT:     CNDE_INT T0.X, KC0[2].Z, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    32(4.484155e-44), 2(2.802597e-45)
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i32:
-; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
-; GCN: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
-; GCN: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
-; GCN: v_cndmask_b32_e32 [[RESULT:v[0-9]+]], 32, [[CTLZ]], vcc
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
-
-; EG: FFBH_UINT
-; EG: CNDE_INT
 define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v1, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v1, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 3, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT T0.X, T0.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    32(4.484155e-44), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
   %val = load i32, i32 addrspace(1)* %in.gep, align 4
@@ -52,18 +128,73 @@ define amdgpu_kernel void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrsp
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_v2i32:
-; GCN: {{buffer|flat}}_load_dwordx2
-; GCN: v_ffbh_u32_e32
-; GCN: v_ffbh_u32_e32
-; GCN: buffer_store_dwordx2
-; GCN: s_endpgm
-
-; EG: FFBH_UINT
-; EG: CNDE_INT
-; EG: FFBH_UINT
-; EG: CNDE_INT
 define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_v2i32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v2, v1
+; SI-NEXT:    v_ffbh_u32_e32 v3, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, 32, v2, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v3, vcc
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_v2i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v2, v1
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v1, 32, v2, vcc
+; VI-NEXT:    v_ffbh_u32_e32 v3, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v3, vcc
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_v2i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 6, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.Y,
+; EG-NEXT:     CNDE_INT T0.Y, T0.Y, literal.x, PV.W,
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, T0.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    32(4.484155e-44), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid
   %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8
@@ -72,28 +203,91 @@ define amdgpu_kernel void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_v4i32:
-; GCN: {{buffer|flat}}_load_dwordx4
-; GCN: v_ffbh_u32_e32
-; GCN: v_ffbh_u32_e32
-; GCN: v_ffbh_u32_e32
-; GCN: v_ffbh_u32_e32
-; GCN: buffer_store_dwordx4
-; GCN: s_endpgm
-
-
-; EG-DAG: FFBH_UINT
-; EG-DAG: CNDE_INT
-
-; EG-DAG: FFBH_UINT
-; EG-DAG: CNDE_INT
-
-; EG-DAG: FFBH_UINT
-; EG-DAG: CNDE_INT
-
-; EG-DAG: FFBH_UINT
-; EG-DAG: CNDE_INT
 define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_v4i32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v4, v3
+; SI-NEXT:    v_ffbh_u32_e32 v5, v2
+; SI-NEXT:    v_ffbh_u32_e32 v6, v1
+; SI-NEXT:    v_ffbh_u32_e32 v7, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; SI-NEXT:    v_cndmask_b32_e32 v3, 32, v4, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, 32, v5, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, 32, v6, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v7, vcc
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_v4i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v4, v3
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; VI-NEXT:    v_cndmask_b32_e32 v3, 32, v4, vcc
+; VI-NEXT:    v_ffbh_u32_e32 v5, v2
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; VI-NEXT:    v_cndmask_b32_e32 v2, 32, v5, vcc
+; VI-NEXT:    v_ffbh_u32_e32 v6, v1
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v1, 32, v6, vcc
+; VI-NEXT:    v_ffbh_u32_e32 v7, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v7, vcc
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_v4i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 12, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    4(5.605194e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T1.W, T0.W,
+; EG-NEXT:     FFBH_UINT T2.W, T0.Z,
+; EG-NEXT:     CNDE_INT * T0.W, T0.W, literal.x, PV.W, BS:VEC_021/SCL_122
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.Z, T0.Z, literal.x, PV.W,
+; EG-NEXT:     FFBH_UINT * T1.W, T0.Y,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.Y, T0.Y, literal.x, PV.W,
+; EG-NEXT:     FFBH_UINT * T1.W, T0.X,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, T0.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    32(4.484155e-44), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid
   %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16
@@ -102,63 +296,286 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i8:
-; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
-; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
-; VI-DAG: v_ffbh_u32_sdwa [[FFBH:v[0-9]+]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
-; VI: v_cmp_ne_u16_e32 vcc, 0, [[VAL]]
-
-; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc
-
-; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]]
-; VI: v_add_u32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]]
-; GCN: buffer_store_byte [[RESULT]],
-; GCN: s_endpgm
 define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s6, s2
+; SI-NEXT:    s_mov_b32 s7, s3
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v1, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; SI-NEXT:    v_subrev_i32_e32 v0, vcc, 24, v0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_mov_b32 s2, s6
+; VI-NEXT:    s_mov_b32 s3, s7
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; VI-NEXT:    v_cmp_ne_u16_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, -16, v0
+; VI-NEXT:    v_add_u16_e32 v0, -8, v0
+; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i8:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 15, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT T0.W, T0.X, literal.x, PV.W,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.y,
+; EG-NEXT:    32(4.484155e-44), 3(4.203895e-45)
+; EG-NEXT:     ADD_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    -24(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     LSHL * T1.W, T1.W, literal.y,
+; EG-NEXT:    255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT:     LSHL T0.X, PV.W, PS,
+; EG-NEXT:     LSHL * T0.W, literal.x, PS,
+; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %val = load i8, i8 addrspace(1)* %valptr
   %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
   store i8 %ctlz, i8 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}s_ctlz_i64:
-; GCN: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
-; GCN-DAG: v_cmp_eq_u32_e64 vcc, s[[HI]], 0{{$}}
-; GCN-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
-; GCN-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
-; GCN-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
-; GCN-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[ADD]]
-; GCN-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
-; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
-; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
-; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
 define amdgpu_kernel void @s_ctlz_i64(i64 addrspace(1)* noalias %out, [8 x i32], i64 %val) nounwind {
+; SI-LABEL: s_ctlz_i64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_flbit_i32_b32 s0, s2
+; SI-NEXT:    s_flbit_i32_b32 s1, s3
+; SI-NEXT:    s_add_i32 s0, s0, 32
+; SI-NEXT:    s_or_b32 s2, s2, s3
+; SI-NEXT:    v_mov_b32_e32 v0, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s0
+; SI-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT:    v_cmp_ne_u32_e64 vcc, s2, 0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_ctlz_i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x4c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_flbit_i32_b32 s2, s0
+; VI-NEXT:    s_flbit_i32_b32 s3, s1
+; VI-NEXT:    s_add_i32 s2, s2, 32
+; VI-NEXT:    v_mov_b32_e32 v0, s3
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 0
+; VI-NEXT:    s_or_b32 s0, s0, s1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT:    v_cmp_ne_u32_e64 vcc, s0, 0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: s_ctlz_i64:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     FFBH_UINT * T0.W, KC0[4].W,
+; EG-NEXT:     CNDE_INT * T0.W, KC0[4].W, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     FFBH_UINT T1.W, KC0[5].X,
+; EG-NEXT:     ADD_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, KC0[5].X, PS, PV.W,
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
   store i64 %ctlz, i64 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:
 define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+; SI-LABEL: s_ctlz_i64_trunc:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_flbit_i32_b32 s0, s2
+; SI-NEXT:    s_flbit_i32_b32 s1, s3
+; SI-NEXT:    s_add_i32 s0, s0, 32
+; SI-NEXT:    s_or_b32 s2, s2, s3
+; SI-NEXT:    v_mov_b32_e32 v0, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s0
+; SI-NEXT:    v_cmp_eq_u32_e64 vcc, s3, 0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT:    v_cmp_ne_u32_e64 vcc, s2, 0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_ctlz_i64_trunc:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_flbit_i32_b32 s2, s0
+; VI-NEXT:    s_flbit_i32_b32 s3, s1
+; VI-NEXT:    s_add_i32 s2, s2, 32
+; VI-NEXT:    v_mov_b32_e32 v0, s3
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_cmp_eq_u32_e64 vcc, s1, 0
+; VI-NEXT:    s_or_b32 s0, s0, s1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT:    v_cmp_ne_u32_e64 vcc, s0, 0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: s_ctlz_i64_trunc:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 8, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     FFBH_UINT * T0.W, KC0[2].W,
+; EG-NEXT:     CNDE_INT * T0.W, KC0[2].W, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     FFBH_UINT T1.W, KC0[3].X,
+; EG-NEXT:     ADD_INT * T0.W, PV.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, KC0[3].X, PS, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
   %trunc = trunc i64 %ctlz to i32
   store i32 %trunc, i32 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i64:
-; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]]
-; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
-; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
-; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
-; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc
-; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
-; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]]
-; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc
-; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI:[0-9]+]]{{\]}}
 define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+; SI-LABEL: v_ctlz_i64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v4, v2
+; SI-NEXT:    v_ffbh_u32_e32 v5, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; SI-NEXT:    v_cndmask_b32_e32 v3, v5, v4, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, 64, v3, vcc
+; SI-NEXT:    v_mov_b32_e32 v3, v1
+; SI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; VI-NEXT:    v_mov_b32_e32 v5, 0
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v6, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s2, v0
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v0, v2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
+; VI-NEXT:    v_ffbh_u32_e32 v6, v3
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i64:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 10, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T1.W, T0.X,
+; EG-NEXT:     CNDE_INT * T1.W, T0.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     FFBH_UINT T2.W, T0.Y,
+; EG-NEXT:     ADD_INT * T1.W, PV.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, T0.Y, PS, PV.W,
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.W,
+; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
@@ -168,8 +585,84 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:
 define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+; SI-LABEL: v_ctlz_i64_trunc:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v2, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
+; SI-NEXT:    s_mov_b64 s[10:11], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v3
+; SI-NEXT:    v_ffbh_u32_e32 v5, v4
+; SI-NEXT:    v_or_b32_e32 v3, v3, v4
+; SI-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
+; SI-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
+; SI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
+; SI-NEXT:    buffer_store_dword v0, v[1:2], s[8:11], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i64_trunc:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v3, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v4, s3
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v5, v3, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v4, v0
+; VI-NEXT:    v_add_u32_e32 v4, vcc, 32, v4
+; VI-NEXT:    v_ffbh_u32_e32 v5, v1
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v1, vcc
+; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i64_trunc:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 10, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T1.XY, T1.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T1.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T1.X,
+; EG-NEXT:     CNDE_INT * T0.W, T1.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL T0.Z, T0.X, literal.x,
+; EG-NEXT:     FFBH_UINT T1.W, T1.Y,
+; EG-NEXT:     ADD_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    2(2.802597e-45), 32(4.484155e-44)
+; EG-NEXT:     CNDE_INT T0.X, T1.Y, PS, PV.W,
+; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, PV.Z,
+; EG-NEXT:     LSHR * T1.X, PV.W, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -180,12 +673,62 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
-; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
-; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
 define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i32_sel_eq_neg1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v0
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i32_sel_eq_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v0, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i32_sel_eq_neg1:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 5, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT * T0.W, T0.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, T0.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    -1(nan), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
   %val = load i32, i32 addrspace(1)* %in.gep
@@ -196,12 +739,62 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
-; GCN: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]],
-; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[RESULT]],
-; GCN: s_endpgm
 define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i32_sel_ne_neg1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v0
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i32_sel_ne_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v0, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i32_sel_ne_neg1:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 5, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT * T0.W, T0.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, T0.X, literal.x, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    -1(nan), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
   %val = load i32, i32 addrspace(1)* %in.gep
@@ -213,13 +806,72 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out
 }
 
 ; TODO: Should be able to eliminate select here as well.
-; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
-; GCN: {{buffer|flat}}_load_dword
-; GCN: v_ffbh_u32_e32
-; GCN: v_cmp
-; GCN: v_cndmask
-; GCN: s_endpgm
 define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i32_sel_eq_bitwidth:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v1, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 32, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i32_sel_eq_bitwidth:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v1, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 32, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i32_sel_eq_bitwidth:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 7, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT * T0.W, T0.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     SETE_INT * T1.W, PV.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, PV.W, T0.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    -1(nan), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
   %val = load i32, i32 addrspace(1)* %in.gep
@@ -230,13 +882,72 @@ define amdgpu_kernel void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
-; GCN: {{buffer|flat}}_load_dword
-; GCN: v_ffbh_u32_e32
-; GCN: v_cmp
-; GCN: v_cndmask
-; GCN: s_endpgm
 define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i32_sel_ne_bitwidth:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v1, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 32, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i32_sel_ne_bitwidth:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v1, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 32, v1, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 32, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i32_sel_ne_bitwidth:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 7, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     FFBH_UINT * T0.W, T0.X,
+; EG-NEXT:     CNDE_INT * T0.W, T0.X, literal.x, PV.W,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     SETNE_INT * T1.W, PV.W, literal.x,
+; EG-NEXT:    32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.X, PV.W, literal.x, T0.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    -1(nan), 2(2.802597e-45)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid
   %val = load i32, i32 addrspace(1)* %in.gep
@@ -247,11 +958,65 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
-; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
-; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
-; GCN: {{buffer|flat}}_store_byte [[FFBH]],
  define amdgpu_kernel void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i8_sel_eq_neg1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_ubyte v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v0
+; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i8_sel_eq_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ubyte v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v0, v0
+; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i8_sel_eq_neg1:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, T0.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     FFBH_UINT T0.W, T0.X,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     LSHL * T1.W, PS, literal.y,
+; EG-NEXT:    255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT:     LSHL T0.X, PV.W, PS,
+; EG-NEXT:     LSHL * T0.W, literal.x, PS,
+; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid
   %val = load i8, i8 addrspace(1)* %valptr.gep
@@ -262,11 +1027,69 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
-; SI: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]],
-; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
-; SI: buffer_store_short [[FFBH]],
  define amdgpu_kernel void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i16_sel_eq_neg1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s6, s2
+; SI-NEXT:    s_mov_b32 s7, s3
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i16_sel_eq_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_mov_b32 s2, s6
+; VI-NEXT:    s_mov_b32 s3, s7
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v1, v0
+; VI-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, 32, v1, s[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, -16, v0
+; VI-NEXT:    v_mov_b32_e32 v1, 0xffff
+; VI-NEXT:    v_cndmask_b32_e64 v0, v1, v0, s[0:1]
+; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i16_sel_eq_neg1:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     FFBH_UINT T0.W, T0.X,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     LSHL * T1.W, PS, literal.y,
+; EG-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
+; EG-NEXT:     LSHL T0.X, PV.W, PS,
+; EG-NEXT:     LSHL * T0.W, literal.x, PS,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %val = load i16, i16 addrspace(1)* %valptr
   %ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
   %cmp = icmp eq i16 %val, 0
@@ -276,12 +1099,67 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
-; GCN: {{buffer|flat}}_load_ubyte [[VAL:v[0-9]+]],
-; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
-; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
-; GCN: {{buffer|flat}}_store_byte [[TRUNC]],
 define amdgpu_kernel void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+; SI-LABEL: v_ctlz_i7_sel_eq_neg1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    buffer_load_ubyte v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_ffbh_u32_e32 v0, v0
+; SI-NEXT:    v_and_b32_e32 v0, 0x7f, v0
+; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_ctlz_i7_sel_eq_neg1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ubyte v0, v[0:1]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_ffbh_u32_e32 v0, v0
+; VI-NEXT:    v_and_b32_e32 v0, 0x7f, v0
+; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; EG-LABEL: v_ctlz_i7_sel_eq_neg1:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 12, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, T0.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     FFBH_UINT T0.W, T0.X,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].Y, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     LSHL * T1.W, PS, literal.y,
+; EG-NEXT:    127(1.779649e-43), 3(4.203895e-45)
+; EG-NEXT:     LSHL T0.X, PV.W, PS,
+; EG-NEXT:     LSHL * T0.W, literal.x, PS,
+; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %tid = call i32 @llvm.r600.read.tidig.x()
   %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid
   %val = load i7, i7 addrspace(1)* %valptr.gep

From db6a1d4f2418dbab602ac162afe8708c5b98ed02 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 31 May 2019 15:06:51 +0000
Subject: [PATCH 0751/1176] [AMDGPU] Regenerate add/sub shrink constant tests
 for an upcoming patch

llvm-svn: 362230
---
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 435 ++++++++++++++++--
 1 file changed, 390 insertions(+), 45 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a185bc35ee5d7..50a7a96c54a2d 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -1,13 +1,43 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
 
 ; Test that add/sub with a constant is swapped to sub/add with negated
 ; constant to minimize code size.
 
-; GCN-LABEL: {{^}}v_test_i32_x_sub_64:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
 define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_x_sub_64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_x_sub_64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -18,12 +48,47 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]]
-; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
-; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
 define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_x_sub_64_multi_use:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    buffer_store_dword v3, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_x_sub_64_multi_use:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v4, v[0:1]
+; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
+; VI-NEXT:    v_subrev_u32_e32 v1, vcc, 64, v4
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_subrev_u32_e32 v0, vcc, 64, v0
+; VI-NEXT:    flat_store_dword v[2:3], v1
+; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -37,10 +102,39 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out,
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_64_sub_x:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
 define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_64_sub_x:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v2, vcc, 64, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_64_sub_x:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v2, vcc, 64, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -51,10 +145,39 @@ define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_x_sub_65:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]]
 define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_x_sub_65:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_i32_e32 v2, vcc, 0xffffffbf, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_x_sub_65:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 0xffffffbf, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -65,10 +188,39 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_65_sub_x:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]]
 define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_65_sub_x:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v2, vcc, 0x41, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_65_sub_x:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v2, vcc, 0x41, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -79,10 +231,39 @@ define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]]
 define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_x_sub_neg16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_i32_e32 v2, vcc, 16, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_x_sub_neg16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 16, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -93,10 +274,39 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 ad
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]]
 define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_neg16_sub_x:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v2, vcc, -16, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_neg16_sub_x:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v2, vcc, -16, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -107,10 +317,39 @@ define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 ad
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]]
 define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_x_sub_neg17:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_add_i32_e32 v2, vcc, 17, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_x_sub_neg17:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 17, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -121,10 +360,39 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 ad
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x:
-; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]]
-; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]]
 define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i32_neg17_sub_x:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v2, vcc, 0xffffffef, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i32_neg17_sub_x:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v2, vcc, 0xffffffef, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %tid.ext
@@ -135,19 +403,64 @@ define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 ad
   ret void
 }
 
-; GCN-LABEL: {{^}}s_test_i32_x_sub_64:
-; GCN: s_load_dword [[X:s[0-9]+]]
-; GCN: s_sub_i32 s{{[0-9]+}}, [[X]], 64
 define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 {
+; SI-LABEL: s_test_i32_x_sub_64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_sub_i32 s0, s0, 64
+; SI-NEXT:    ;;#ASMSTART
+; SI-NEXT:    ; use s0
+; SI-NEXT:    ;;#ASMEND
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_test_i32_x_sub_64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s0, s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_sub_i32 s0, s0, 64
+; VI-NEXT:    ;;#ASMSTART
+; VI-NEXT:    ; use s0
+; VI-NEXT:    ;;#ASMEND
+; VI-NEXT:    s_endpgm
   %result = sub i32 %x, 64
   call void asm sideeffect "; use $0", "s"(i32 %result)
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i16_x_sub_64:
-; VI: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
-; VI: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
 define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i16_x_sub_64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i16_x_sub_64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_subrev_u16_e32 v2, 64, v3
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext
@@ -158,15 +471,47 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(i16 addrspace(1)* %out, i16 addrs
   ret void
 }
 
-; GCN-LABEL: {{^}}v_test_i16_x_sub_64_multi_use:
-; GCN: {{buffer|flat}}_load_ushort [[X:v[0-9]+]]
-; GCN: {{buffer|flat}}_load_ushort [[Y:v[0-9]+]]
-; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[X]]
-; VI-DAG: v_subrev_u16_e32 v{{[0-9]+}}, 64, [[Y]]
-
-; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]]
-; SI-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]]
 define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+; SI-LABEL: v_test_i16_x_sub_64_multi_use:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_subrev_i32_e32 v2, vcc, 64, v2
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_subrev_i32_e32 v3, vcc, 64, v3
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    buffer_store_short v3, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_test_i16_x_sub_64_multi_use:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 1, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v4, v[0:1]
+; VI-NEXT:    flat_load_ushort v0, v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
+; VI-NEXT:    v_subrev_u16_e32 v1, 64, v4
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_subrev_u16_e32 v0, 64, v0
+; VI-NEXT:    flat_store_short v[2:3], v1
+; VI-NEXT:    flat_store_short v[2:3], v0
+; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
   %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 %tid.ext

From aea3149e6c7ce0c3aafb6353f8547085b1ded297 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 15:10:34 +0000
Subject: [PATCH 0752/1176] [NFC][InstCombine] Add unary FNeg tests to fdiv.ll

llvm-svn: 362231
---
 llvm/test/Transforms/InstCombine/fdiv.ll | 118 +++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 796eef93cdc4a..619554e6253d7 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -198,6 +198,39 @@ define float @fneg_fneg(float %x, float %y) {
   ret float %div
 }
 
+define float @unary_fneg_unary_fneg(float %x, float %y) {
+; CHECK-LABEL: @unary_fneg_unary_fneg(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %div = fdiv float %x.fneg, %y.fneg
+  ret float %div
+}
+
+define float @unary_fneg_fneg(float %x, float %y) {
+; CHECK-LABEL: @unary_fneg_fneg(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fsub float -0.0, %y
+  %div = fdiv float %x.fneg, %y.fneg
+  ret float %div
+}
+
+define float @fneg_unary_fneg(float %x, float %y) {
+; CHECK-LABEL: @fneg_unary_fneg(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fsub float -0.0, %x
+  %y.fneg = fneg float %y
+  %div = fdiv float %x.fneg, %y.fneg
+  ret float %div
+}
+
 ; The test above shows that no FMF are needed, but show that we are not dropping FMF.
 
 define float @fneg_fneg_fast(float %x, float %y) {
@@ -211,6 +244,17 @@ define float @fneg_fneg_fast(float %x, float %y) {
   ret float %div
 }
 
+define float @unary_fneg_unary_fneg_fast(float %x, float %y) {
+; CHECK-LABEL: @unary_fneg_unary_fneg_fast(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %x.fneg = fneg float %x
+  %y.fneg = fneg float %y
+  %div = fdiv fast float %x.fneg, %y.fneg
+  ret float %div
+}
+
 define <2 x float> @fneg_fneg_vec(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_fneg_vec(
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -222,6 +266,39 @@ define <2 x float> @fneg_fneg_vec(<2 x float> %x, <2 x float> %y) {
   ret <2 x float> %div
 }
 
+define <2 x float> @unary_fneg_unary_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_fneg_unary_fneg_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %xneg = fneg <2 x float> %x
+  %yneg = fneg <2 x float> %y
+  %div = fdiv <2 x float> %xneg, %yneg
+  ret <2 x float> %div
+}
+
+define <2 x float> @fneg_unary_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_unary_fneg_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %xneg = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %yneg = fneg <2 x float> %y
+  %div = fdiv <2 x float> %xneg, %yneg
+  ret <2 x float> %div
+}
+
+define <2 x float> @unary_fneg_fneg_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_fneg_fneg_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %xneg = fneg <2 x float> %x
+  %yneg = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %div = fdiv <2 x float> %xneg, %yneg
+  ret <2 x float> %div
+}
+
 define <2 x float> @fneg_fneg_vec_undef_elts(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_fneg_vec_undef_elts(
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -243,6 +320,16 @@ define float @fneg_dividend_constant_divisor(float %x) {
   ret  float %div
 }
 
+define float @unary_fneg_dividend_constant_divisor(float %x) {
+; CHECK-LABEL: @unary_fneg_dividend_constant_divisor(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nsz float [[X:%.*]], -3.000000e+00
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %neg = fneg float %x
+  %div = fdiv nsz float %neg, 3.0
+  ret  float %div
+}
+
 define float @fneg_divisor_constant_dividend(float %x) {
 ; CHECK-LABEL: @fneg_divisor_constant_dividend(
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv nnan float 3.000000e+00, [[X:%.*]]
@@ -253,6 +340,16 @@ define float @fneg_divisor_constant_dividend(float %x) {
   ret float %div
 }
 
+define float @unary_fneg_divisor_constant_dividend(float %x) {
+; CHECK-LABEL: @unary_fneg_divisor_constant_dividend(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv nnan float 3.000000e+00, [[X:%.*]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+  %neg = fneg float %x
+  %div = fdiv nnan float -3.0, %neg
+  ret float %div
+}
+
 define <2 x float> @fneg_dividend_constant_divisor_vec(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_dividend_constant_divisor_vec(
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00>
@@ -263,6 +360,16 @@ define <2 x float> @fneg_dividend_constant_divisor_vec(<2 x float> %x) {
   ret <2 x float> %div
 }
 
+define <2 x float> @unary_fneg_dividend_constant_divisor_vec(<2 x float> %x) {
+; CHECK-LABEL: @unary_fneg_dividend_constant_divisor_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %neg = fneg <2 x float> %x
+  %div = fdiv ninf <2 x float> %neg, <float 3.0, float -8.0>
+  ret <2 x float> %div
+}
+
 define <2 x float> @fneg_dividend_constant_divisor_vec_undef_elt(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_dividend_constant_divisor_vec_undef_elt(
 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00>
@@ -283,6 +390,17 @@ define <2 x float> @fneg_divisor_constant_dividend_vec(<2 x float> %x) {
   ret <2 x float> %div
 }
 
+define <2 x float> @unary_fneg_divisor_constant_dividend_vec(<2 x float> %x) {
+; CHECK-LABEL: @unary_fneg_divisor_constant_dividend_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv afn <2 x float> <float 3.000000e+00, float -5.000000e+00>, [[X:%.*]]
+; CHECK-NEXT:    ret <2 x float> [[DIV]]
+;
+  %neg = fneg <2 x float> %x
+  %div = fdiv afn <2 x float> <float -3.0, float 5.0>, %neg
+  ret <2 x float> %div
+}
+
+
 ; X / (X * Y) --> 1.0 / Y
 
 define float @div_factor(float %x, float %y) {

From 42d6c268b2716923ce11d62a2eef3265da88fa09 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Fri, 31 May 2019 15:29:55 +0000
Subject: [PATCH 0753/1176] Revise test case due to the change from CUDA 10+.

llvm-svn: 362232
---
 clang/test/Driver/offloading-interoperability.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/offloading-interoperability.c b/clang/test/Driver/offloading-interoperability.c
index 75d2ce9239dd2..9c80d91d1d78c 100644
--- a/clang/test/Driver/offloading-interoperability.c
+++ b/clang/test/Driver/offloading-interoperability.c
@@ -11,7 +11,7 @@
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE:      clang{{.*}}" "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NOT:  -fopenmp
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NEXT: ptxas" "-m64"
-// NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NEXT: fatbinary" "--cuda" "-64"
+// NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NEXT: fatbinary"{{( "--cuda")?}} "-64"
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NEXT: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux-gnu"
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE:      -fopenmp
 // NO-OPENMP-FLAGS-FOR-CUDA-DEVICE-NEXT: {{ld(.exe)?"}} {{.*}}"-m" "elf64lppc"

From 18e7bf5c4dcb16a723654ee89b1952a5f59283bf Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Fri, 31 May 2019 15:35:19 +0000
Subject: [PATCH 0754/1176] [MachinePipeliner][NFC] Add some debug log and
 statistics

This is to add some log and statistics for debugging

Differential Revision: https://reviews.llvm.org/D62165

llvm-svn: 362233
---
 llvm/lib/CodeGen/MachinePipeliner.cpp | 83 +++++++++++++++++++++++----
 1 file changed, 71 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index af159f1c45514..604b3187b71e9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -96,6 +96,14 @@ using namespace llvm;
 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
 STATISTIC(NumPipelined, "Number of loops software pipelined");
 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
+STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
+STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
+STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
+STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
+STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
+STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
+STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
+STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
 
 /// A command line option to turn software pipelining on or off.
 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -289,16 +297,28 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
   LI.TBB = nullptr;
   LI.FBB = nullptr;
   LI.BrCond.clear();
-  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
+    LLVM_DEBUG(
+        dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+    NumFailBranch++;
     return false;
+  }
 
   LI.LoopInductionVar = nullptr;
   LI.LoopCompare = nullptr;
-  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+    LLVM_DEBUG(
+        dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+    NumFailLoop++;
     return false;
+  }
 
-  if (!L.getLoopPreheader())
+  if (!L.getLoopPreheader()) {
+    LLVM_DEBUG(
+        dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+    NumFailPreheader++;
     return false;
+  }
 
   // Remove any subregisters from inputs to phi nodes.
   preprocessPhiNodes(*L.getHeader());
@@ -413,12 +433,21 @@ void SwingSchedulerDAG::schedule() {
                     << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
 
   // Can't schedule a loop without a valid MII.
-  if (MII == 0)
+  if (MII == 0) {
+    LLVM_DEBUG(
+        dbgs()
+        << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+    NumFailZeroMII++;
     return;
+  }
 
   // Don't pipeline large loops.
-  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
+    LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
+                      << ", we don't pipleline large loops\n");
+    NumFailLargeMaxMII++;
     return;
+  }
 
   computeNodeFunctions(NodeSets);
 
@@ -456,17 +485,27 @@ void SwingSchedulerDAG::schedule() {
   SMSchedule Schedule(Pass.MF);
   Scheduled = schedulePipeline(Schedule);
 
-  if (!Scheduled)
+  if (!Scheduled){
+    LLVM_DEBUG(dbgs() << "No schedule found, return\n");
+    NumFailNoSchedule++;
     return;
+  }
 
   unsigned numStages = Schedule.getMaxStageCount();
   // No need to generate pipeline if there are no overlapped iterations.
-  if (numStages == 0)
+  if (numStages == 0) {
+    LLVM_DEBUG(
+        dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+    NumFailZeroStage++;
     return;
-
+  }
   // Check that the maximum stage count is less than user-defined limit.
-  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
+    LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
+                      << " : too many stages, abort\n");
+    NumFailLargeMaxStage++;
     return;
+  }
 
   generatePipelinedLoop(Schedule);
   ++NumPipelined;
@@ -926,6 +965,7 @@ struct FuncUnitSorter {
 /// instruction cannot be reserved in an existing DFA, we create a new one.
 unsigned SwingSchedulerDAG::calculateResMII() {
 
+  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
   SmallVector<ResourceManager*, 8> Resources;
   MachineBasicBlock *MBB = Loop.getHeader();
   Resources.push_back(new ResourceManager(&MF.getSubtarget()));
@@ -956,6 +996,11 @@ unsigned SwingSchedulerDAG::calculateResMII() {
     unsigned ReservedCycles = 0;
     SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
     SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
+    LLVM_DEBUG({
+      dbgs() << "Trying to reserve resource for " << NumCycles
+             << " cycles for \n";
+      MI->dump();
+    });
     for (unsigned C = 0; C < NumCycles; ++C)
       while (RI != RE) {
         if ((*RI++)->canReserveResources(*MI)) {
@@ -968,8 +1013,13 @@ unsigned SwingSchedulerDAG::calculateResMII() {
       --RI;
       (*RI)->reserveResources(*MI);
     }
+
+    LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+                      << ", NumCycles:" << NumCycles << "\n");
     // Add new DFAs, if needed, to reserve resources.
     for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+      LLVM_DEBUG(dbgs() << "NewResource created to reserve resources"
+                        << "\n");
       ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
       assert(NewResource->canReserveResources(*MI) && "Reserve error.");
       NewResource->reserveResources(*MI);
@@ -977,6 +1027,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
     }
   }
   int Resmii = Resources.size();
+  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
   // Delete the memory for each of the DFAs that were created earlier.
   for (ResourceManager *RI : Resources) {
     ResourceManager *D = RI;
@@ -1862,8 +1913,11 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
 /// Process the nodes in the computed order and create the pipelined schedule
 /// of the instructions, if possible. Return true if a schedule is found.
 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
-  if (NodeOrder.empty())
+
+  if (NodeOrder.empty()){
+    LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
     return false;
+  }
 
   bool scheduleFound = false;
   unsigned II = 0;
@@ -1889,13 +1943,14 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
       Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
                             II, this);
       LLVM_DEBUG({
+        dbgs() << "\n";
         dbgs() << "Inst (" << SU->NodeNum << ") ";
         SU->getInstr()->dump();
         dbgs() << "\n";
       });
       LLVM_DEBUG({
-        dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
-               << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+        dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
+                         LateStart, SchedEnd, SchedStart);
       });
 
       if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
@@ -3244,6 +3299,10 @@ void SwingSchedulerDAG::postprocessDAG() {
 /// the relative values of StartCycle and EndCycle.
 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
   bool forward = true;
+  LLVM_DEBUG({
+    dbgs() << "Trying to insert node between " << StartCycle << " and "
+           << EndCycle << " II: " << II << "\n";
+  });
   if (StartCycle > EndCycle)
     forward = false;
 

From 6d2a4712f3accd3fa34ed2bdecdf2ebf987dccca Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 15:40:03 +0000
Subject: [PATCH 0755/1176] [NFC][InstCombine] Add unary FNeg tests to fcmp.ll

llvm-svn: 362234
---
 llvm/test/Transforms/InstCombine/fcmp.ll | 118 +++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index be7aedc7c60fd..cd941fea90e1c 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -68,6 +68,16 @@ define i1 @fneg_constant_swap_pred(float %x) {
   ret i1 %cmp
 }
 
+define i1 @unary_fneg_constant_swap_pred(float %x) {
+; CHECK-LABEL: @unary_fneg_constant_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt float [[X:%.*]], -1.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg = fneg float %x
+  %cmp = fcmp ogt float %neg, 1.0
+  ret i1 %cmp
+}
+
 define <2 x i1> @fneg_constant_swap_pred_vec(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_constant_swap_pred_vec(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00>
@@ -78,6 +88,16 @@ define <2 x i1> @fneg_constant_swap_pred_vec(<2 x float> %x) {
   ret <2 x i1> %cmp
 }
 
+define <2 x i1> @unary_fneg_constant_swap_pred_vec(<2 x float> %x) {
+; CHECK-LABEL: @unary_fneg_constant_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg = fneg <2 x float> %x
+  %cmp = fcmp ogt <2 x float> %neg, <float 1.0, float 2.0>
+  ret <2 x i1> %cmp
+}
+
 define <2 x i1> @fneg_constant_swap_pred_vec_undef(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_constant_swap_pred_vec_undef(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00>
@@ -100,6 +120,16 @@ define i1 @fneg_fmf(float %x) {
   ret i1 %r
 }
 
+define i1 @unary_fneg_fmf(float %x) {
+; CHECK-LABEL: @unary_fneg_fmf(
+; CHECK-NEXT:    [[R:%.*]] = fcmp fast oeq float [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %n = fneg fast float %x
+  %r = fcmp fast oeq float %n, 42.0
+  ret i1 %r
+}
+
 ; The new fcmp should have the same FMF as the original, vector edition.
 
 define <2 x i1> @fcmp_fneg_fmf_vec(<2 x float> %x) {
@@ -123,6 +153,39 @@ define i1 @fneg_fneg_swap_pred(float %x, float %y) {
   ret i1 %cmp
 }
 
+define i1 @unary_fneg_unary_fneg_swap_pred(float %x, float %y) {
+; CHECK-LABEL: @unary_fneg_unary_fneg_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg1 = fneg float %x
+  %neg2 = fneg float %y
+  %cmp = fcmp nnan olt float %neg1, %neg2
+  ret i1 %cmp
+}
+
+define i1 @unary_fneg_fneg_swap_pred(float %x, float %y) {
+; CHECK-LABEL: @unary_fneg_fneg_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg1 = fneg float %x
+  %neg2 = fsub float -0.0, %y
+  %cmp = fcmp nnan olt float %neg1, %neg2
+  ret i1 %cmp
+}
+
+define i1 @fneg_unary_fneg_swap_pred(float %x, float %y) {
+; CHECK-LABEL: @fneg_unary_fneg_swap_pred(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %neg1 = fsub float -0.0, %x
+  %neg2 = fneg float %y
+  %cmp = fcmp nnan olt float %neg1, %neg2
+  ret i1 %cmp
+}
+
 define <2 x i1> @fneg_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_fneg_swap_pred_vec(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -134,6 +197,39 @@ define <2 x i1> @fneg_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
   ret <2 x i1> %cmp
 }
 
+define <2 x i1> @unary_fneg_unary_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_fneg_unary_fneg_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fneg <2 x float> %x
+  %neg2 = fneg <2 x float> %y
+  %cmp = fcmp ninf olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @unary_fneg_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_fneg_fneg_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fneg <2 x float> %x
+  %neg2 = fsub <2 x float> <float -0.0, float -0.0>, %y
+  %cmp = fcmp ninf olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fneg_unary_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_unary_fneg_swap_pred_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ninf ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fsub <2 x float> <float -0.0, float -0.0>, %x
+  %neg2 = fneg <2 x float> %y
+  %cmp = fcmp ninf olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
 define <2 x i1> @fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fneg_fneg_swap_pred_vec_undef(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]]
@@ -145,6 +241,28 @@ define <2 x i1> @fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) {
   ret <2 x i1> %cmp
 }
 
+define <2 x i1> @unary_fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @unary_fneg_fneg_swap_pred_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fneg <2 x float> %x
+  %neg2 = fsub <2 x float> <float undef, float -0.0>, %y
+  %cmp = fcmp olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
+define <2 x i1> @fneg_unary_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fneg_unary_fneg_swap_pred_vec_undef(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %neg1 = fsub <2 x float> <float -0.0, float undef>, %x
+  %neg2 = fneg <2 x float> %y
+  %cmp = fcmp olt <2 x float> %neg1, %neg2
+  ret <2 x i1> %cmp
+}
+
 define i1 @test7(float %x) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], 0.000000e+00

From 7477fcd93a8c5492f8a4a8bef2985c0e656ec396 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 31 May 2019 15:41:19 +0000
Subject: [PATCH 0756/1176] [PPC64][test] Delete redundant labels from
 ppc64-relocs.s

llvm-svn: 362235
---
 lld/test/ELF/ppc64-relocs.s | 100 ++++++++----------------------------
 1 file changed, 20 insertions(+), 80 deletions(-)

diff --git a/lld/test/ELF/ppc64-relocs.s b/lld/test/ELF/ppc64-relocs.s
index c3f4b2752ab6d..1804db9253f58 100644
--- a/lld/test/ELF/ppc64-relocs.s
+++ b/lld/test/ELF/ppc64-relocs.s
@@ -30,142 +30,86 @@ _start:
         .tc .LJTI0_0[TC],.LJTI0_0
 
 .section .R_PPC64_TOC16_LO_DS,"ax",@progbits
-.globl .FR_PPC64_TOC16_LO_DS
-.FR_PPC64_TOC16_LO_DS:
   ld 1, .L1@toc@l(2)
 
-# CHECK: Disassembly of section .R_PPC64_TOC16_LO_DS:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_TOC16_LO_DS:
+# CHECK-LABEL: Disassembly of section .R_PPC64_TOC16_LO_DS:
 # CHECK: 1001000c:       ld 1, -32768(2)
 
 .section .R_PPC64_TOC16_LO,"ax",@progbits
-.globl .FR_PPC64_TOC16_LO
-.FR_PPC64_TOC16_LO:
   addi  1, 2, .L1@toc@l
 
-# CHECK: Disassembly of section .R_PPC64_TOC16_LO:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_TOC16_LO:
+# CHECK-LABEL: Disassembly of section .R_PPC64_TOC16_LO:
 # CHECK: 10010010:       addi 1, 2, -32768
 
 .section .R_PPC64_TOC16_HI,"ax",@progbits
-.globl .FR_PPC64_TOC16_HI
-.FR_PPC64_TOC16_HI:
   addis 1, 2, .L1@toc@h
 
-# CHECK: Disassembly of section .R_PPC64_TOC16_HI:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_TOC16_HI:
+# CHECK-LABEL: Disassembly of section .R_PPC64_TOC16_HI:
 # CHECK: 10010014:       addis 1, 2, -1
 
 .section .R_PPC64_TOC16_HA,"ax",@progbits
-.globl .FR_PPC64_TOC16_HA
-.FR_PPC64_TOC16_HA:
   addis 1, 2, .L1@toc@ha
 
-# CHECK: Disassembly of section .R_PPC64_TOC16_HA:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_TOC16_HA:
+# CHECK-LABEL: Disassembly of section .R_PPC64_TOC16_HA:
 # CHECK: 10010018:       addis 1, 2, 0
 
 .section .R_PPC64_REL24,"ax",@progbits
-.globl .FR_PPC64_REL24
-.FR_PPC64_REL24:
-  b .Lfoox
-.section .R_PPC64_REL24_2,"ax",@progbits
-.Lfoox:
-
-# CHECK: Disassembly of section .R_PPC64_REL24:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_REL24:
+  b 1f
+1:
+
+# CHECK-LABEL: Disassembly of section .R_PPC64_REL24:
 # CHECK: 1001001c:       b .+4
 
 .section .R_PPC64_REL14,"ax",@progbits
-.globl .FR_PPC64_REL14
-.FR_PPC64_REL14:
-  beq .Lfooy
-.section .R_PPC64_REL14_2,"ax",@progbits
-.Lfooy:
-
-# CHECK: Disassembly of section .R_PPC64_REL14:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_REL14:
+  beq 1f
+1:
+
+# CHECK-LABEL: Disassembly of section .R_PPC64_REL14:
 # CHECK: 10010020:       bt 2, .+4
 
 .section .R_PPC64_ADDR16_LO,"ax",@progbits
-.globl .FR_PPC64_ADDR16_LO
-.FR_PPC64_ADDR16_LO:
   li 1, .Lfoo@l
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_LO:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_LO:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_LO:
 # CHECK: 10010024:       li 1, 0
 
 .section .R_PPC64_ADDR16_HI,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HI
-.FR_PPC64_ADDR16_HI:
   li 1, .Lfoo@h
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HI:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HI:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HI:
 # CHECK: 10010028:       li 1, 4097
 
 .section .R_PPC64_ADDR16_HA,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HA
-.FR_PPC64_ADDR16_HA:
   li 1, .Lfoo@ha
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HA:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HA:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HA:
 # CHECK: 1001002c:       li 1, 4097
 
 .section .R_PPC64_ADDR16_HIGHER,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HIGHER
-.FR_PPC64_ADDR16_HIGHER:
   li 1, .Lfoo@higher
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HIGHER:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HIGHER:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HIGHER:
 # CHECK: 10010030:       li 1, 0
 
 .section .R_PPC64_ADDR16_HIGHERA,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HIGHERA
-.FR_PPC64_ADDR16_HIGHERA:
   li 1, .Lfoo@highera
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HIGHERA:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HIGHERA:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HIGHERA:
 # CHECK: 10010034:       li 1, 0
 
 .section .R_PPC64_ADDR16_HIGHEST,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HIGHEST
-.FR_PPC64_ADDR16_HIGHEST:
   li 1, .Lfoo@highest
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HIGHEST:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HIGHEST:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HIGHEST:
 # CHECK: 10010038:       li 1, 0
 
 .section .R_PPC64_ADDR16_HIGHESTA,"ax",@progbits
-.globl .FR_PPC64_ADDR16_HIGHESTA
-.FR_PPC64_ADDR16_HIGHESTA:
   li 1, .Lfoo@highesta
 
-# CHECK: Disassembly of section .R_PPC64_ADDR16_HIGHESTA:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_ADDR16_HIGHESTA:
+# CHECK-LABEL: Disassembly of section .R_PPC64_ADDR16_HIGHESTA:
 # CHECK: 1001003c:       li 1, 0
 
 .section  .R_PPC64_REL32, "ax",@progbits
-.globl .FR_PPC64_REL32
-.FR_PPC64_REL32:
   addis 5, 2, .LC0@toc@ha
   ld 5, .LC0@toc@l(5)
 .LBB0_2:
@@ -180,16 +124,12 @@ _start:
 # Address of rodata + value stored at rodata entry
 # should equal address of LBB0_2.
 # 0x10000190 + 0xfeb4 = 0x10010044
-# CHECK: Disassembly of section .R_PPC64_REL32:
-# CHECK-EMPTY:
-# CHECK: .FR_PPC64_REL32:
+# CHECK-LABEL: Disassembly of section .R_PPC64_REL32:
 # CHECK: 10010040:       addis 5, 2, 0
 # CHECK: 10010044:       ld 5, -32736(5)
 # CHECK: 10010048:       add 3, 3, 4
 
 .section .R_PPC64_REL64, "ax",@progbits
-.globl  .FR_PPC64_REL64
-.FR_PPC64_REL64:
         .cfi_startproc
         .cfi_personality 148, __foo
         li 0, 1

From 24016eb3746636448ceb1ad6f01b62be4ab00e56 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 31 May 2019 15:56:27 +0000
Subject: [PATCH 0757/1176] Suppress nothrow/exception spec conflict warning
 when ES is parsed.

The previously added warning ended up causing false positives when
nothrow was used on member functions, where the exception specification
wasn't yet parsed.  So, throw() and noexcept(true) both were incorrectly
warning.  There doesn't seem to be a good way to force these to be parsed
to identify which they are (and likely should not be), so suppress the warning.

For now, unevaluated/uninstantiated are left as warnings as I am not
creative enough to find a reproducer that causes a false positive for
either.

llvm-svn: 362236
---
 clang/lib/Sema/SemaType.cpp                       |  4 +++-
 clang/test/SemaCXX/nothrow-vs-exception-specs.cpp | 13 +++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index eac7c44c76ae4..43ac435a82498 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -6976,7 +6976,10 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
       case EST_BasicNoexcept:
       case EST_NoexceptTrue:
       case EST_NoThrow:
+      case EST_Unparsed:
         // Exception spec doesn't conflict with nothrow, so don't warn.
+        // Unparsed is included in this, since method signatures aren't parsed
+        // until after the fact.
         break;
 
       case EST_Dynamic:
@@ -6985,7 +6988,6 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
       case EST_DependentNoexcept:
       case EST_Unevaluated:
       case EST_Uninstantiated:
-      case EST_Unparsed:
         S.Diag(attr.getLoc(), diag::warn_nothrow_attribute_ignored);
         break;
       }
diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
index f9bc90e30ed49..563f604067072 100644
--- a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -53,3 +53,16 @@ __declspec(nothrow) void foo4() noexcept(noexcept(foo1()));
 __declspec(nothrow) void foo5() noexcept(noexcept(foo2()));
 // expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
 __declspec(nothrow) void foo6() noexcept(noexcept(foo3()));
+
+// FIXME: It would be nice to be able to warn on these, however at the time we
+// evaluate the nothrow, these have yet to be parsed, so the data is not yet
+// there.
+struct S {
+  __declspec(nothrow) void f1();
+#ifndef CPP17
+  __declspec(nothrow) void f2() throw();
+  __declspec(nothrow) void f3() throw(int);
+#endif
+  __declspec(nothrow) void f4() noexcept(true);
+  __declspec(nothrow) void f5() noexcept(false);
+};

From c3a24e93d52730d9926ed1f8281b3e4b7aece48e Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot@google.com>
Date: Fri, 31 May 2019 16:11:17 +0000
Subject: [PATCH 0758/1176] [PPC] Correctly adjust branch probability in
 PPCReduceCRLogicals

In PPCReduceCRLogicals after splitting the original MBB into 2, the 2 impacted branches still use original branch probability. This is unreasonable. Suppose we have following code, and the probability of each successor is 50%.

    condc = conda || condb
    br condc, label %target, label %fallthrough

It can be transformed to following,

    br conda, label %target, label %newbb
  newbb:
    br condb, label %target, label %fallthrough

Since each branch has a probability of 50% to each successor, the total probability to %fallthrough is 25% now, and the total probability to %target is 75%. This actually changed the original profiling data. A more reasonable probability can be set to 70% to the false side for each branch instruction, so the total probability to %fallthrough is close to 50%.

This patch assumes the branch target with two incoming edges have same edge frequency and computes new probability fore each target, and keep the total probability to original targets unchanged.

Differential Revision: https://reviews.llvm.org/D62430

llvm-svn: 362237
---
 llvm/include/llvm/Support/BranchProbability.h | 13 +++
 .../Target/PowerPC/PPCReduceCRLogicals.cpp    | 41 +++++++--
 llvm/test/CodeGen/PowerPC/reduce_cr.ll        | 88 +++++++++++++++++++
 llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll  |  2 +-
 4 files changed, 137 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/reduce_cr.ll

diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h
index b7dddd56af78d..cd9d369b4f4e6 100644
--- a/llvm/include/llvm/Support/BranchProbability.h
+++ b/llvm/include/llvm/Support/BranchProbability.h
@@ -118,6 +118,13 @@ class BranchProbability {
     return *this;
   }
 
+  BranchProbability &operator/=(BranchProbability RHS) {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    N = (static_cast<uint64_t>(N) * D + RHS.N / 2) / RHS.N;
+    return *this;
+  }
+
   BranchProbability &operator/=(uint32_t RHS) {
     assert(N != UnknownN &&
            "Unknown probability cannot participate in arithmetics.");
@@ -150,6 +157,12 @@ class BranchProbability {
     return Prob;
   }
 
+  BranchProbability operator/(BranchProbability RHS) const {
+    BranchProbability Prob(*this);
+    Prob /= RHS;
+    return Prob;
+  }
+
   BranchProbability operator/(uint32_t RHS) const {
     BranchProbability Prob(*this);
     Prob /= RHS;
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 45f8907a08e17..8eaa6dfe2bf71 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -166,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
                                            : *ThisMBB->succ_begin();
   MachineBasicBlock *NewBRTarget =
       BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
-  BranchProbability ProbToNewTarget =
-      !BSI.MBPI ? BranchProbability::getUnknown()
-                : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+  // It's impossible to know the precise branch probability after the split.
+  // But it still needs to be reasonable, the whole probability to original
+  // targets should not be changed.
+  // After split NewBRTarget will get two incoming edges. Assume P0 is the
+  // original branch probability to NewBRTarget, P1 and P2 are new branch
+  // probabilies to NewBRTarget after split. If the two edge frequencies are
+  // same, then
+  //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
+  //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
+  BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
+  BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
+  ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+  ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+  if (BSI.MBPI) {
+    if (BSI.BranchToFallThrough) {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbOrigFallThrough.getCompl();
+    } else {
+      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+      ProbFallThrough = ProbToNewTarget.getCompl();
+      ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+      ProbOrigFallThrough = ProbOrigTarget.getCompl();
+    }
+  }
 
   // Create a new basic block.
   MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -180,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
   // Move everything after SplitBefore into the new block.
   NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
   NewMBB->transferSuccessors(ThisMBB);
+  if (!ProbOrigTarget.isUnknown()) {
+    auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+    NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+    MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+    NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+  }
 
-  // Add the two successors to ThisMBB. The probabilities come from the
-  // existing blocks if available.
+  // Add the two successors to ThisMBB.
   ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
-  ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+  ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
 
   // Add the branches to ThisMBB.
   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
diff --git a/llvm/test/CodeGen/PowerPC/reduce_cr.ll b/llvm/test/CodeGen/PowerPC/reduce_cr.ll
new file mode 100644
index 0000000000000..6ef00d52149a3
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/reduce_cr.ll
@@ -0,0 +1,88 @@
+; RUN: llc -O2 -ppc-reduce-cr-logicals -print-machine-bfi -o - %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+; First block frequency info
+;CHECK:      block-frequency-info: loop_test
+;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
+;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
+;CHECK-NEXT: - BB2[test1]: float = 1.6667, int = 21
+;CHECK-NEXT: - BB3[optional1]: float = 0.625, int = 8
+
+;CHECK:      block-frequency-info: loop_test
+;CHECK:      block-frequency-info: loop_test
+;CHECK:      block-frequency-info: loop_test
+
+; Last block frequency info
+;CHECK:      block-frequency-info: loop_test
+;CHECK-NEXT: - BB0[entry]: float = 1.0, int = 12
+;CHECK-NEXT: - BB1[for.check]: float = 2.6667, int = 34
+;CHECK-NEXT: - BB2[for.check]: float = 2.1667, int = 27
+;CHECK-NEXT: - BB3[test1]: float = 1.6667, int = 21
+;CHECK-NEXT: - BB4[optional1]: float = 0.625, int = 8
+
+
+define void @loop_test(i32* %tags, i32 %count) {
+entry:
+  br label %for.check
+for.check:
+  %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
+  %done.count = icmp ugt i32 %count.loop, 0
+  %tag_ptr = getelementptr inbounds i32, i32* %tags, i32 %count
+  %tag = load i32, i32* %tag_ptr
+  %done.tag = icmp eq i32 %tag, 0
+  %done = and i1 %done.count, %done.tag
+  br i1 %done, label %test1, label %exit, !prof !1
+test1:
+  %tagbit1 = and i32 %tag, 1
+  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
+  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
+optional1:
+  call void @a()
+  call void @a()
+  call void @a()
+  call void @a()
+  br label %test2
+test2:
+  %tagbit2 = and i32 %tag, 2
+  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
+  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
+optional2:
+  call void @b()
+  call void @b()
+  call void @b()
+  call void @b()
+  br label %test3
+test3:
+  %tagbit3 = and i32 %tag, 4
+  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
+  br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
+optional3:
+  call void @c()
+  call void @c()
+  call void @c()
+  call void @c()
+  br label %test4
+test4:
+  %tagbit4 = and i32 %tag, 8
+  %tagbit4eq0 = icmp eq i32 %tagbit4, 0
+  br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
+optional4:
+  call void @d()
+  call void @d()
+  call void @d()
+  call void @d()
+  br label %for.latch
+for.latch:
+  %count.sub = sub i32 %count.loop, 1
+  br label %for.check
+exit:
+  ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!1 = !{!"branch_weights", i32 5, i32 3}
diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
index 51bd26d6828cc..b6251c2bc4621 100644
--- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs -tail-dup-placement=false < %s | FileCheck %s
 ; RUN: llc -ppc-reduce-cr-logicals -verify-machineinstrs \
 ; RUN:   -ppc-gen-isel=false < %s | FileCheck --check-prefix=CHECK-NO-ISEL %s
 target datalayout = "E-m:e-i64:64-n32:64"

From 8ff009a461a3f2e1cfdf4fed0b042db6473f0bda Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 16:17:04 +0000
Subject: [PATCH 0759/1176] [NFC][InstCombine] Add unary FNeg tests to fabs.ll

llvm-svn: 362238
---
 llvm/test/Transforms/InstCombine/fabs.ll | 76 ++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index 2dcdc52210a45..ef4fc25dff1a4 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -327,6 +327,17 @@ define double @select_fcmp_nnan_nsz_olt_zero(double %x) {
   ret double %fabs
 }
 
+define double @select_fcmp_nnan_nsz_olt_zero_unary_fneg(double %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_olt_zero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %ltzero = fcmp nnan nsz olt double %x, 0.0
+  %negx = fneg double %x
+  %fabs = select i1 %ltzero, double %negx, double %x
+  ret double %fabs
+}
+
 ; X < -0.0 ? -X : X --> fabs(X)
 
 define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
@@ -340,6 +351,17 @@ define float @select_fcmp_nnan_nsz_olt_negzero(float %x) {
   ret float %fabs
 }
 
+define float @select_fcmp_nnan_nsz_olt_negzero_unary_fneg(float %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_olt_negzero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %ltzero = fcmp nnan nsz ninf olt float %x, -0.0
+  %negx = fneg float %x
+  %fabs = select i1 %ltzero, float %negx, float %x
+  ret float %fabs
+}
+
 ; X <= 0.0 ? -X : X --> fabs(X)
 
 define double @select_fcmp_nnan_nsz_ole_zero(double %x) {
@@ -353,6 +375,17 @@ define double @select_fcmp_nnan_nsz_ole_zero(double %x) {
   ret double %fabs
 }
 
+define double @select_fcmp_nnan_nsz_ole_zero_unary_fneg(double %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ole_zero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %lezero = fcmp fast ole double %x, 0.0
+  %negx = fneg double %x
+  %fabs = select i1 %lezero, double %negx, double %x
+  ret double %fabs
+}
+
 ; X <= -0.0 ? -X : X --> fabs(X)
 
 define float @select_fcmp_nnan_nsz_ole_negzero(float %x) {
@@ -366,6 +399,17 @@ define float @select_fcmp_nnan_nsz_ole_negzero(float %x) {
   ret float %fabs
 }
 
+define float @select_fcmp_nnan_nsz_ole_negzero_unary_fneg(float %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ole_negzero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %lezero = fcmp nnan nsz ole float %x, -0.0
+  %negx = fneg float %x
+  %fabs = select i1 %lezero, float %negx, float %x
+  ret float %fabs
+}
+
 ; X > 0.0 ? X : (0.0 - X) --> fabs(X)
 
 define <2 x float> @select_fcmp_nnan_nsz_ogt_zero(<2 x float> %x) {
@@ -379,6 +423,17 @@ define <2 x float> @select_fcmp_nnan_nsz_ogt_zero(<2 x float> %x) {
   ret <2 x float> %fabs
 }
 
+define <2 x float> @select_fcmp_nnan_nsz_ogt_zero_unary_fneg(<2 x float> %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_ogt_zero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz arcp <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %gtzero = fcmp nnan nsz arcp ogt <2 x float> %x, zeroinitializer
+  %negx = fneg <2 x float> %x
+  %fabs = select <2 x i1> %gtzero, <2 x float> %x, <2 x float> %negx
+  ret <2 x float> %fabs
+}
+
 ; X > -0.0 ? X : (0.0 - X) --> fabs(X)
 
 define half @select_fcmp_nnan_nsz_ogt_negzero(half %x) {
@@ -405,6 +460,17 @@ define <2 x double> @select_fcmp_nnan_nsz_oge_zero(<2 x double> %x) {
   ret <2 x double> %fabs
 }
 
+define <2 x double> @select_fcmp_nnan_nsz_oge_zero_unary_fneg(<2 x double> %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_oge_zero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call reassoc nnan nsz <2 x double> @llvm.fabs.v2f64(<2 x double> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %gezero = fcmp nnan nsz reassoc oge <2 x double> %x, zeroinitializer
+  %negx = fneg <2 x double> %x
+  %fabs = select <2 x i1> %gezero, <2 x double> %x, <2 x double> %negx
+  ret <2 x double> %fabs
+}
+
 ; X > -0.0 ? X : (0.0 - X) --> fabs(X)
 
 define half @select_fcmp_nnan_nsz_oge_negzero(half %x) {
@@ -418,3 +484,13 @@ define half @select_fcmp_nnan_nsz_oge_negzero(half %x) {
   ret half %fabs
 }
 
+define half @select_fcmp_nnan_nsz_oge_negzero_unary_fneg(half %x) {
+; CHECK-LABEL: @select_fcmp_nnan_nsz_oge_negzero_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT:    ret half [[TMP1]]
+;
+  %gezero = fcmp nnan nsz oge half %x, -0.0
+  %negx = fneg half %x
+  %fabs = select i1 %gezero, half %x, half %negx
+  ret half %fabs
+}

From fbbe5230f434104d10e56bf54d0dbc5d7b9aad18 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Fri, 31 May 2019 16:19:26 +0000
Subject: [PATCH 0760/1176] [AMDGPU] Use InliningThresholdMultiplier for inline
 hint

AMDGPU uses multiplier 9 for the inline cost. It is taken into account
everywhere except for inline hint threshold. As a result we are penalizing
functions with the inline hint making them less probable to be inlined
than those without the hint. Defaults are 225 for a normal function and
325 for a function with an inline hint. Currently we have effective
threshold 225 * 9 = 2025 for normal functions and just 325 for those with
the hint. That is fixed by this patch.

Differential Revision: https://reviews.llvm.org/D62707

llvm-svn: 362239
---
 llvm/lib/Target/AMDGPU/AMDGPUInline.cpp       |  3 +-
 .../Transforms/Inline/AMDGPU/inline-hint.ll   | 77 +++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/Inline/AMDGPU/inline-hint.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index 22c7c0d51f457..72388a195de6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -111,7 +111,8 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
     Callee->hasFnAttribute(Attribute::InlineHint);
   if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
       && !Caller->hasFnAttribute(Attribute::MinSize))
-    Thres = Params.HintThreshold.getValue();
+    Thres = Params.HintThreshold.getValue() *
+            TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
 
   const DataLayout &DL = Caller->getParent()->getDataLayout();
   if (!Callee)
diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-hint.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-hint.ll
new file mode 100644
index 0000000000000..f1cc19bd15725
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/inline-hint.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-inline --inline-threshold=1 --inlinehint-threshold=2 < %s | FileCheck %s
+
+define hidden <16 x i32> @div_hint(<16 x i32> %x, <16 x i32> %y) #0 {
+entry:
+  %div.1 = udiv <16 x i32> %x, %y
+  %div.2 = udiv <16 x i32> %div.1, %y
+  %div.3 = udiv <16 x i32> %div.2, %y
+  %div.4 = udiv <16 x i32> %div.3, %y
+  %div.5 = udiv <16 x i32> %div.4, %y
+  %div.6 = udiv <16 x i32> %div.5, %y
+  %div.7 = udiv <16 x i32> %div.6, %y
+  %div.8 = udiv <16 x i32> %div.7, %y
+  %div.9 = udiv <16 x i32> %div.8, %y
+  %div.10 = udiv <16 x i32> %div.9, %y
+  %div.11 = udiv <16 x i32> %div.10, %y
+  %div.12 = udiv <16 x i32> %div.11, %y
+  %div.13 = udiv <16 x i32> %div.12, %y
+  %div.14 = udiv <16 x i32> %div.13, %y
+  %div.15 = udiv <16 x i32> %div.14, %y
+  %div.16 = udiv <16 x i32> %div.15, %y
+  %div.17 = udiv <16 x i32> %div.16, %y
+  %div.18 = udiv <16 x i32> %div.17, %y
+  %div.19 = udiv <16 x i32> %div.18, %y
+  ret <16 x i32> %div.19
+}
+
+; CHECK-LABEL: define amdgpu_kernel void @caller_hint
+; CHECK-NOT: call
+; CHECK: udiv
+; CHECK: ret void
+define amdgpu_kernel void @caller_hint(<16 x i32> addrspace(1)* nocapture %x, <16 x i32> addrspace(1)* nocapture readonly %y) {
+entry:
+  %tmp = load <16 x i32>, <16 x i32> addrspace(1)* %x, align 4
+  %tmp1 = load <16 x i32>, <16 x i32> addrspace(1)* %y, align 4
+  %div.i = tail call <16 x i32> @div_hint(<16 x i32> %tmp, <16 x i32> %tmp1) #0
+  store <16 x i32> %div.i, <16 x i32> addrspace(1)* %x, align 4
+  ret void
+}
+
+define hidden <16 x i32> @div_nohint(<16 x i32> %x, <16 x i32> %y) {
+entry:
+  %div.1 = udiv <16 x i32> %x, %y
+  %div.2 = udiv <16 x i32> %div.1, %y
+  %div.3 = udiv <16 x i32> %div.2, %y
+  %div.4 = udiv <16 x i32> %div.3, %y
+  %div.5 = udiv <16 x i32> %div.4, %y
+  %div.6 = udiv <16 x i32> %div.5, %y
+  %div.7 = udiv <16 x i32> %div.6, %y
+  %div.8 = udiv <16 x i32> %div.7, %y
+  %div.9 = udiv <16 x i32> %div.8, %y
+  %div.10 = udiv <16 x i32> %div.9, %y
+  %div.11 = udiv <16 x i32> %div.10, %y
+  %div.12 = udiv <16 x i32> %div.11, %y
+  %div.13 = udiv <16 x i32> %div.12, %y
+  %div.14 = udiv <16 x i32> %div.13, %y
+  %div.15 = udiv <16 x i32> %div.14, %y
+  %div.16 = udiv <16 x i32> %div.15, %y
+  %div.17 = udiv <16 x i32> %div.16, %y
+  %div.18 = udiv <16 x i32> %div.17, %y
+  %div.19 = udiv <16 x i32> %div.18, %y
+  ret <16 x i32> %div.19
+}
+
+; CHECK-LABEL: define amdgpu_kernel void @caller_nohint
+; CHECK-NOT: udiv
+; CHECK: tail call <16 x i32> @div_nohint
+; CHECK: ret void
+define amdgpu_kernel void @caller_nohint(<16 x i32> addrspace(1)* nocapture %x, <16 x i32> addrspace(1)* nocapture readonly %y) {
+entry:
+  %tmp = load <16 x i32>, <16 x i32> addrspace(1)* %x
+  %tmp1 = load <16 x i32>, <16 x i32> addrspace(1)* %y
+  %div.i = tail call <16 x i32> @div_nohint(<16 x i32> %tmp, <16 x i32> %tmp1)
+  store <16 x i32> %div.i, <16 x i32> addrspace(1)* %x
+  ret void
+}
+
+attributes #0 = { inlinehint }

From a33964b57006a178ffe0f982b1dec6c3f8d1a6a5 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 31 May 2019 16:27:44 +0000
Subject: [PATCH 0761/1176] [FormatEntity] Ignore ASCII escape sequences when
 colors are disabled.

This patch makes the FormatEntity honor the debugger's color settings by
not inserting ASCII escape sequences when colors are disabled.

Differential revision: https://reviews.llvm.org/D62714

llvm-svn: 362240
---
 lldb/include/lldb/Core/FormatEntity.h         |  2 +-
 lldb/lit/Settings/Inputs/main.c               |  2 ++
 lldb/lit/Settings/TestFrameFormatColor.test   | 12 +++++++++
 lldb/lit/Settings/TestFrameFormatNoColor.test | 12 +++++++++
 lldb/source/Core/FormatEntity.cpp             | 27 ++++++++++---------
 5 files changed, 42 insertions(+), 13 deletions(-)
 create mode 100644 lldb/lit/Settings/Inputs/main.c
 create mode 100644 lldb/lit/Settings/TestFrameFormatColor.test
 create mode 100644 lldb/lit/Settings/TestFrameFormatNoColor.test

diff --git a/lldb/include/lldb/Core/FormatEntity.h b/lldb/include/lldb/Core/FormatEntity.h
index a0ece6833cd83..634d9df2ad839 100644
--- a/lldb/include/lldb/Core/FormatEntity.h
+++ b/lldb/include/lldb/Core/FormatEntity.h
@@ -41,7 +41,7 @@ class FormatEntity {
       Invalid,
       ParentNumber,
       ParentString,
-      InsertString,
+      EscapeCode,
       Root,
       String,
       Scope,
diff --git a/lldb/lit/Settings/Inputs/main.c b/lldb/lit/Settings/Inputs/main.c
new file mode 100644
index 0000000000000..c029ddd96cd52
--- /dev/null
+++ b/lldb/lit/Settings/Inputs/main.c
@@ -0,0 +1,2 @@
+int foo() { return 0; }
+int main() { return foo(); }
diff --git a/lldb/lit/Settings/TestFrameFormatColor.test b/lldb/lit/Settings/TestFrameFormatColor.test
new file mode 100644
index 0000000000000..87be0a3749f35
--- /dev/null
+++ b/lldb/lit/Settings/TestFrameFormatColor.test
@@ -0,0 +1,12 @@
+# RUN: %clang -g -O0 %S/Inputs/main.c -o %t.out
+# RUN: %lldb -x -b -s %s %t.out | FileCheck %s
+settings set use-color true
+settings set -f frame-format "frame #${frame.index}: \`${ansi.fg.green}{${function.name-with-args}${ansi.normal}\n"
+b foo
+run
+bt
+c
+q
+
+# Check the ASCII escape code
+# CHECK: 
diff --git a/lldb/lit/Settings/TestFrameFormatNoColor.test b/lldb/lit/Settings/TestFrameFormatNoColor.test
new file mode 100644
index 0000000000000..dd1a320f8829c
--- /dev/null
+++ b/lldb/lit/Settings/TestFrameFormatNoColor.test
@@ -0,0 +1,12 @@
+# RUN: %clang -g -O0 %S/Inputs/main.c -o %t.out
+# RUN: %lldb -x -b -s %s %t.out | FileCheck %s
+settings set use-color false
+settings set -f frame-format "frame #${frame.index}: \`${ansi.fg.green}{${function.name-with-args}${ansi.normal}\n"
+b foo
+run
+bt
+c
+q
+
+# Check the ASCII escape code
+# CHECK-NOT: 
diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp
index 6a88f3c541523..6af3a47ca1ce6 100644
--- a/lldb/source/Core/FormatEntity.cpp
+++ b/lldb/source/Core/FormatEntity.cpp
@@ -94,7 +94,7 @@ enum FileKind { FileError = 0, Basename, Dirname, Fullpath };
         static_cast<uint32_t>(llvm::array_lengthof(c)), c, true                \
   }
 #define ENTRY_STRING(n, s)                                                     \
-  { n, s, FormatEntity::Entry::Type::InsertString, 0, 0, nullptr, false }
+  { n, s, FormatEntity::Entry::Type::EscapeCode, 0, 0, nullptr, false }
 static FormatEntity::Entry::Definition g_string_entry[] = {
     ENTRY("*", ParentString)};
 
@@ -307,7 +307,7 @@ const char *FormatEntity::Entry::TypeToCString(Type t) {
     ENUM_TO_CSTR(Invalid);
     ENUM_TO_CSTR(ParentNumber);
     ENUM_TO_CSTR(ParentString);
-    ENUM_TO_CSTR(InsertString);
+    ENUM_TO_CSTR(EscapeCode);
     ENUM_TO_CSTR(Root);
     ENUM_TO_CSTR(String);
     ENUM_TO_CSTR(Scope);
@@ -1102,8 +1102,17 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
                                   // FormatEntity::Entry::Definition encoding
   case Entry::Type::ParentString: // Only used for
                                   // FormatEntity::Entry::Definition encoding
-  case Entry::Type::InsertString: // Only used for
-                                  // FormatEntity::Entry::Definition encoding
+    return false;
+  case Entry::Type::EscapeCode:
+    if (exe_ctx) {
+      if (Target *target = exe_ctx->GetTargetPtr()) {
+        Debugger &debugger = target->GetDebugger();
+        if (debugger.GetUseColor()) {
+          s.PutCString(entry.string);
+          return true;
+        }
+      }
+    }
     return false;
 
   case Entry::Type::Root:
@@ -1896,7 +1905,7 @@ static Status ParseEntry(const llvm::StringRef &format_str,
         entry.number = entry_def->data;
         return error; // Success
 
-      case FormatEntity::Entry::Type::InsertString:
+      case FormatEntity::Entry::Type::EscapeCode:
         entry.type = entry_def->type;
         entry.string = entry_def->string;
         return error; // Success
@@ -2265,13 +2274,7 @@ Status FormatEntity::ParseInternal(llvm::StringRef &format, Entry &parent_entry,
               return error;
             }
           }
-          // Check if this entry just wants to insert a constant string value
-          // into the parent_entry, if so, insert the string with AppendText,
-          // else append the entry to the parent_entry.
-          if (entry.type == Entry::Type::InsertString)
-            parent_entry.AppendText(entry.string.c_str());
-          else
-            parent_entry.AppendEntry(std::move(entry));
+          parent_entry.AppendEntry(std::move(entry));
         }
       }
       break;

From ac79007205ef96f81acef54c9ded34b6e6a8dcb4 Mon Sep 17 00:00:00 2001
From: "Kevin P. Neal" <kevin.neal@sas.com>
Date: Fri, 31 May 2019 16:32:12 +0000
Subject: [PATCH 0762/1176] Revert revert of r362112 with minor SystemZ test
 file corrections.

[FPEnv] Added a special UnrollVectorOp method to deal with the chain on StrictFP opcodes

This change creates UnrollVectorOp_StrictFP. The purpose of this is to address a failure that consistently occurs when calling StrictFP functions on vectors whose number of elements is 3 + 2n on most platforms, such as PowerPC or SystemZ. The old UnrollVectorOp method does not expect that the vector that it will unroll will have a chain, so it has an assert that prevents it from running if this is the case. This new StrictFP version of the method deals with the chain while unrolling the vector. With this new function in place during vector widending, llc can run vector-constrained-fp-intrinsics.ll for SystemZ successfully.

Submitted by:	Drew Wock <drew.wock@sas.com>
Reviewed by:	Cameron McInally, Kevin P. Neal
Approved by:	Cameron McInally
Differential Revision:	https://reviews.llvm.org/D62546

llvm-svn: 362241
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |    5 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   59 +-
 .../vector-constrained-fp-intrinsics.ll       | 6449 +++++++++++++++++
 3 files changed, 6512 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index a0e7c8a89c187..8beaf145e0f85 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -857,6 +857,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
   SDValue WidenVecOp_VECREDUCE(SDNode *N);
 
+  /// Helper function to generate a set of operations to perform
+  /// a vector operation for a wider type.
+  ///
+  SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
+
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index aefc2aabf64b4..072f15b1f4c3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1318,6 +1318,63 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
   ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDLoc dl(N);
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  //The results of each unrolled operation, including the chain.
+  EVT ChainVTs[] = {EltVT, MVT::Other};
+  SmallVector<SDValue, 8> Chains;
+
+  unsigned i;
+  for (i = 0; i != NE; ++i) {
+    Operands[0] = Chain;
+    for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+                    DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+                          DAG.getDataLayout())));
+      } else {
+        Operands[j] = Operand;
+      }
+    }
+    SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+    Scalar.getNode()->setFlags(N->getFlags());
+
+    //Add in the scalar as well as its chain value to the
+    //result vectors.
+    Scalars.push_back(Scalar);
+    Chains.push_back(Scalar.getValue(1));
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(DAG.getUNDEF(EltVT));
+
+  // Build a new factor node to connect the chain back together.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  // Create a new BUILD_VECTOR node
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+  return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
 void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
                                               SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -2968,7 +3025,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+    return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
new file mode 100644
index 0000000000000..2e3aea01f0b95
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -0,0 +1,6449 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=s390x-linux-gnu  < %s | FileCheck --check-prefix=S390X %s
+; RUN: llc -O3 -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck --check-prefix=SZ13 %s
+
+define <1 x float> @constrained_vector_fdiv_v1f32() {
+; S390X-LABEL: constrained_vector_fdiv_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI0_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI0_1
+; S390X-NEXT:    deb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI0_0
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    deb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %div
+}
+
+define <2 x double> @constrained_vector_fdiv_v2f64() {
+; S390X-LABEL: constrained_vector_fdiv_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI1_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI1_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI1_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %div
+}
+
+define <3 x float> @constrained_vector_fdiv_v3f32() {
+; S390X-LABEL: constrained_vector_fdiv_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI2_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_1
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_2
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_3
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    debr %f0, %f1
+; S390X-NEXT:    debr %f2, %f1
+; S390X-NEXT:    debr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI2_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI2_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    debr %f1, %f0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v3, 1, 1
+; SZ13-NEXT:    debr %f2, %f0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    debr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %div
+}
+
+define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fdiv_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI3_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddb %f1, 16(%r2)
+; S390X-NEXT:    ddb %f0, 8(%r2)
+; S390X-NEXT:    ddb %f2, 0(%r2)
+; S390X-NEXT:    std %f1, 16(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI3_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI3_1
+; SZ13-NEXT:    ldeb %f1, 0(%r1)
+; SZ13-NEXT:    ddb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %div, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fdiv_v4f64() {
+; S390X-LABEL: constrained_vector_fdiv_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI4_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    ddbr %f4, %f1
+; S390X-NEXT:    ddbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI4_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI4_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI4_2
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %div
+}
+
+define <1 x float> @constrained_vector_frem_v1f32() {
+; S390X-LABEL: constrained_vector_frem_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI5_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI5_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI5_0
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %rem
+}
+
+define <2 x double> @constrained_vector_frem_v2f64() {
+; S390X-LABEL: constrained_vector_frem_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI6_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI6_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI6_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI6_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %rem
+}
+
+define <3 x float> @constrained_vector_frem_v3f32() {
+; S390X-LABEL: constrained_vector_frem_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI7_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI7_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    larl %r1, .LCPI7_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    larl %r1, .LCPI7_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI7_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI7_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %rem
+}
+
+define void @constrained_vector_frem_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_frem_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f2, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI8_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI8_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI8_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v2, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v2, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI8_0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %rem, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_frem_v4f64() {
+; S390X-LABEL: constrained_vector_frem_v4f64:
+; S390X:       # %bb.0:
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI9_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI9_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_3
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    larl %r1, .LCPI9_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v4f64:
+; SZ13:       # %bb.0:
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI9_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI9_1
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    larl %r1, .LCPI9_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %rem
+}
+
+define <1 x float> @constrained_vector_fmul_v1f32() {
+; S390X-LABEL: constrained_vector_fmul_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI10_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI10_1
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 2.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %mul
+}
+
+define <2 x double> @constrained_vector_fmul_v2f64() {
+; S390X-LABEL: constrained_vector_fmul_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI11_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI11_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI11_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %mul
+}
+
+define <3 x float> @constrained_vector_fmul_v3f32() {
+; S390X-LABEL: constrained_vector_fmul_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI12_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    meeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_3
+; S390X-NEXT:    meeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 8
+; SZ13-NEXT:    larl %r1, .LCPI12_0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI12_1
+; SZ13-NEXT:    meebr %f2, %f0
+; SZ13-NEXT:    meeb %f0, 0(%r1)
+; SZ13-NEXT:    vmrhf %v0, %v2, %v0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
+           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
+                        float 0x7FF0000000000000>,
+           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %mul
+}
+
+define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fmul_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI13_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    mdb %f0, 16(%r2)
+; S390X-NEXT:    mdb %f2, 8(%r2)
+; S390X-NEXT:    mdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI13_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI13_1
+; SZ13-NEXT:    vfmdb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    mdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                        double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %mul, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fmul_v4f64() {
+; S390X-LABEL: constrained_vector_fmul_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI14_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    mdbr %f4, %f1
+; S390X-NEXT:    mdbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI14_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_2
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfmdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 2.000000e+00, double 3.000000e+00,
+                         double 4.000000e+00, double 5.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %mul
+}
+
+define <1 x float> @constrained_vector_fadd_v1f32() {
+; S390X-LABEL: constrained_vector_fadd_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI15_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI15_1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %add
+}
+
+define <2 x double> @constrained_vector_fadd_v2f64() {
+; S390X-LABEL: constrained_vector_fadd_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI16_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI16_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI16_2
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI16_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI16_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %add
+}
+
+define <3 x float> @constrained_vector_fadd_v3f32() {
+; S390X-LABEL: constrained_vector_fadd_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI17_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_1
+; S390X-NEXT:    ler %f2, %f1
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_2
+; S390X-NEXT:    aeb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f4
+; S390X-NEXT:    aebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    vgmf %v2, 1, 1
+; SZ13-NEXT:    vgmf %v3, 2, 8
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    aebr %f2, %f0
+; SZ13-NEXT:    aebr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %add
+}
+
+define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fadd_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI18_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    adb %f0, 16(%r2)
+; S390X-NEXT:    adb %f2, 8(%r2)
+; S390X-NEXT:    adb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI18_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI18_1
+; SZ13-NEXT:    vfadb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    adb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %add, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fadd_v4f64() {
+; S390X-LABEL: constrained_vector_fadd_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI19_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    adbr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_4
+; S390X-NEXT:    adb %f6, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI19_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_2
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfadb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %add
+}
+
+define <1 x float> @constrained_vector_fsub_v1f32() {
+; S390X-LABEL: constrained_vector_fsub_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI20_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI20_1
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    sebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %sub
+}
+
+define <2 x double> @constrained_vector_fsub_v2f64() {
+; S390X-LABEL: constrained_vector_fsub_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI21_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI21_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI21_2
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI21_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %sub
+}
+
+define <3 x float> @constrained_vector_fsub_v3f32() {
+; S390X-LABEL: constrained_vector_fsub_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI22_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    seb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f1
+; S390X-NEXT:    sebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v2, 15
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    sebr %f2, %f1
+; SZ13-NEXT:    vgmf %v1, 1, 1
+; SZ13-NEXT:    vgbm %v3, 15
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    sebr %f3, %f1
+; SZ13-NEXT:    vgmf %v1, 2, 8
+; SZ13-NEXT:    sebr %f0, %f1
+; SZ13-NEXT:    vmrhf %v0, %v3, %v0
+; SZ13-NEXT:    vrepf %v1, %v2, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %sub
+}
+
+define void @constrained_vector_fsub_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fsub_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI23_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    sdb %f0, 16(%r2)
+; S390X-NEXT:    sdb %f2, 8(%r2)
+; S390X-NEXT:    sdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v0, %v1, %v0
+; SZ13-NEXT:    sdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
+           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %sub, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fsub_v4f64() {
+; S390X-LABEL: constrained_vector_fsub_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI24_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_3
+; S390X-NEXT:    ldeb %f3, 0(%r1)
+; S390X-NEXT:    ldr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_4
+; S390X-NEXT:    sdb %f6, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    sdbr %f4, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI24_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    larl %r1, .LCPI24_1
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
+           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %sub
+}
+
+define <1 x float> @constrained_vector_sqrt_v1f32() {
+; S390X-LABEL: constrained_vector_sqrt_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI25_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI25_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
+                              <1 x float> <float 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %sqrt
+}
+
+define <2 x double> @constrained_vector_sqrt_v2f64() {
+; S390X-LABEL: constrained_vector_sqrt_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI26_0
+; S390X-NEXT:    larl %r2, .LCPI26_1
+; S390X-NEXT:    ldeb %f0, 0(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI26_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %sqrt
+}
+
+define <3 x float> @constrained_vector_sqrt_v3f32() {
+; S390X-LABEL: constrained_vector_sqrt_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI27_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_1
+; S390X-NEXT:    sqeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_2
+; S390X-NEXT:    sqeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI27_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_1
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    sqeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_2
+; SZ13-NEXT:    sqeb %f2, 0(%r1)
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sqrt
+}
+
+define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sqrt_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    sqdb %f0, 16(%r2)
+; S390X-NEXT:    sqdb %f1, 8(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    sqdb %f1, 16(%r2)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vfsqdb %v0, %v0
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sqrt, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sqrt_v4f64() {
+; S390X-LABEL: constrained_vector_sqrt_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI29_0
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_1
+; S390X-NEXT:    sqdb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_3
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_2
+; S390X-NEXT:    sqdb %f6, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI29_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    larl %r1, .LCPI29_1
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v26, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %sqrt
+}
+
+define <1 x float> @constrained_vector_pow_v1f32() {
+; S390X-LABEL: constrained_vector_pow_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI30_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI30_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI30_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI30_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %pow
+}
+
+define <2 x double> @constrained_vector_pow_v2f64() {
+; S390X-LABEL: constrained_vector_pow_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI31_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI31_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI31_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI31_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI31_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI31_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
+                             <2 x double> <double 42.1, double 42.2>,
+                             <2 x double> <double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %pow
+}
+
+define <3 x float> @constrained_vector_pow_v3f32() {
+; S390X-LABEL: constrained_vector_pow_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI32_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI32_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    larl %r1, .LCPI32_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    larl %r1, .LCPI32_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI32_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI32_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
+                             <3 x float> <float 42.0, float 43.0, float 44.0>,
+                             <3 x float> <float 3.0, float 3.0, float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <3 x float> %pow
+}
+
+define void @constrained_vector_pow_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_pow_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI33_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI33_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %pow, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_pow_v4f64() {
+; S390X-LABEL: constrained_vector_pow_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI34_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI34_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    larl %r1, .LCPI34_4
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI34_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI34_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI34_3
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_4
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
+                             <4 x double> <double 42.1, double 42.2,
+                                           double 42.3, double 42.4>,
+                             <4 x double> <double 3.0, double 3.0,
+                                           double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %pow
+}
+
+define <1 x float> @constrained_vector_powi_v1f32() {
+; S390X-LABEL: constrained_vector_powi_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI35_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI35_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
+                              <1 x float> <float 42.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %powi
+}
+
+define <2 x double> @constrained_vector_powi_v2f64() {
+; S390X-LABEL: constrained_vector_powi_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI36_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI36_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI36_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI36_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
+                              <2 x double> <double 42.1, double 42.2>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %powi
+}
+
+define <3 x float> @constrained_vector_powi_v3f32() {
+; S390X-LABEL: constrained_vector_powi_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI37_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI37_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI37_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI37_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %powi
+}
+
+define void @constrained_vector_powi_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_powi_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI38_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI38_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI38_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    std %f0, 8(%r13)
+; S390X-NEXT:    std %f9, 0(%r13)
+; S390X-NEXT:    std %f8, 16(%r13)
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI38_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI38_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI38_2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 280(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          i32 3,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %powi, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_powi_v4f64() {
+; S390X-LABEL: constrained_vector_powi_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI39_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    larl %r1, .LCPI39_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI39_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI39_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %powi
+}
+
+define <1 x float> @constrained_vector_sin_v1f32() {
+; S390X-LABEL: constrained_vector_sin_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI40_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI40_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %sin
+}
+
+define <2 x double> @constrained_vector_sin_v2f64() {
+; S390X-LABEL: constrained_vector_sin_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI41_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI41_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI41_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI41_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %sin
+}
+
+define <3 x float> @constrained_vector_sin_v3f32() {
+; S390X-LABEL: constrained_vector_sin_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI42_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    larl %r1, .LCPI42_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    larl %r1, .LCPI42_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI42_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sin
+}
+
+define void @constrained_vector_sin_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sin_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sin, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sin_v4f64() {
+; S390X-LABEL: constrained_vector_sin_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI44_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    larl %r1, .LCPI44_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI44_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI44_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %sin
+}
+
+define <1 x float> @constrained_vector_cos_v1f32() {
+; S390X-LABEL: constrained_vector_cos_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI45_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI45_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %cos
+}
+
+define <2 x double> @constrained_vector_cos_v2f64() {
+; S390X-LABEL: constrained_vector_cos_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI46_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI46_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI46_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI46_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %cos
+}
+
+define <3 x float> @constrained_vector_cos_v3f32() {
+; S390X-LABEL: constrained_vector_cos_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI47_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    larl %r1, .LCPI47_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    larl %r1, .LCPI47_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI47_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %cos
+}
+
+define void @constrained_vector_cos_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_cos_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %cos, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_cos_v4f64() {
+; S390X-LABEL: constrained_vector_cos_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI49_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    larl %r1, .LCPI49_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI49_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI49_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %cos
+}
+
+define <1 x float> @constrained_vector_exp_v1f32() {
+; S390X-LABEL: constrained_vector_exp_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI50_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI50_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp
+}
+
+define <2 x double> @constrained_vector_exp_v2f64() {
+; S390X-LABEL: constrained_vector_exp_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI51_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI51_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI51_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI51_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %exp
+}
+
+define <3 x float> @constrained_vector_exp_v3f32() {
+; S390X-LABEL: constrained_vector_exp_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI52_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    larl %r1, .LCPI52_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    larl %r1, .LCPI52_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI52_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp
+}
+
+define void @constrained_vector_exp_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp_v4f64() {
+; S390X-LABEL: constrained_vector_exp_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI54_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    larl %r1, .LCPI54_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI54_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI54_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %exp
+}
+
+define <1 x float> @constrained_vector_exp2_v1f32() {
+; S390X-LABEL: constrained_vector_exp2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI55_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI55_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp2
+}
+
+define <2 x double> @constrained_vector_exp2_v2f64() {
+; S390X-LABEL: constrained_vector_exp2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI56_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI56_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI56_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI56_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
+                              <2 x double> <double 42.1, double 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %exp2
+}
+
+define <3 x float> @constrained_vector_exp2_v3f32() {
+; S390X-LABEL: constrained_vector_exp2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI57_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI57_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI57_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI57_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp2
+}
+
+define void @constrained_vector_exp2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp2_v4f64() {
+; S390X-LABEL: constrained_vector_exp2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI59_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    larl %r1, .LCPI59_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI59_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI59_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %exp2
+}
+
+define <1 x float> @constrained_vector_log_v1f32() {
+; S390X-LABEL: constrained_vector_log_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI60_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI60_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log
+}
+
+define <2 x double> @constrained_vector_log_v2f64() {
+; S390X-LABEL: constrained_vector_log_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI61_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI61_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI61_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI61_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %log
+}
+
+define <3 x float> @constrained_vector_log_v3f32() {
+; S390X-LABEL: constrained_vector_log_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI62_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    larl %r1, .LCPI62_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    larl %r1, .LCPI62_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI62_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log
+}
+
+define void @constrained_vector_log_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log_v4f64() {
+; S390X-LABEL: constrained_vector_log_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI64_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    larl %r1, .LCPI64_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI64_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI64_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %log
+}
+
+define <1 x float> @constrained_vector_log10_v1f32() {
+; S390X-LABEL: constrained_vector_log10_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI65_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI65_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log10
+}
+
+define <2 x double> @constrained_vector_log10_v2f64() {
+; S390X-LABEL: constrained_vector_log10_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI66_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI66_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI66_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI66_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
+                               <2 x double> <double 42.0, double 42.1>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <2 x double> %log10
+}
+
+define <3 x float> @constrained_vector_log10_v3f32() {
+; S390X-LABEL: constrained_vector_log10_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI67_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    larl %r1, .LCPI67_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    larl %r1, .LCPI67_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI67_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log10
+}
+
+define void @constrained_vector_log10_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log10, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log10_v4f64() {
+; S390X-LABEL: constrained_vector_log10_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI69_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    larl %r1, .LCPI69_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI69_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI69_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
+                               <4 x double> <double 42.0, double 42.1,
+                                             double 42.2, double 42.3>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <4 x double> %log10
+}
+
+define <1 x float> @constrained_vector_log2_v1f32() {
+; S390X-LABEL: constrained_vector_log2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI70_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI70_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log2
+}
+
+define <2 x double> @constrained_vector_log2_v2f64() {
+; S390X-LABEL: constrained_vector_log2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI71_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI71_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI71_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI71_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %log2
+}
+
+define <3 x float> @constrained_vector_log2_v3f32() {
+; S390X-LABEL: constrained_vector_log2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI72_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI72_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    larl %r1, .LCPI72_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI72_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log2
+}
+
+define void @constrained_vector_log2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log2_v4f64() {
+; S390X-LABEL: constrained_vector_log2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI74_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    larl %r1, .LCPI74_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI74_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI74_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %log2
+}
+
+define <1 x float> @constrained_vector_rint_v1f32() {
+; S390X-LABEL: constrained_vector_rint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI75_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI75_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %rint
+}
+
+define <2 x double> @constrained_vector_rint_v2f64() {
+; S390X-LABEL: constrained_vector_rint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI76_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI76_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI76_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> <double 42.1, double 42.0>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32() {
+; S390X-LABEL: constrained_vector_rint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI77_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    fiebr %f2, 0, %f1
+; S390X-NEXT:    fiebr %f4, 0, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI77_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    fiebr %f1, 0, %f1
+; SZ13-NEXT:    fiebr %f2, 0, %f2
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %rint
+}
+
+define void @constrained_vector_rint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_rint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f1, 0, %f1
+; S390X-NEXT:    fidbr %f2, 0, %f2
+; S390X-NEXT:    std %f2, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f0, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 0, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 0
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %rint, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_rint_v4f64() {
+; S390X-LABEL: constrained_vector_rint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI79_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    fidbr %f4, 0, %f3
+; S390X-NEXT:    fidbr %f6, 0, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI79_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI79_1
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> <double 42.1, double 42.2,
+                                      double 42.3, double 42.4>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI80_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI80_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
+                               <1 x float> <float 42.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %nearby
+}
+
+define <2 x double> @constrained_vector_nearbyint_v2f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI81_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI81_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI81_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> <double 42.1, double 42.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI82_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    larl %r1, .LCPI82_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    larl %r1, .LCPI82_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI82_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 0, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 0, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %nearby
+}
+
+define void @constrained_vector_nearbyint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_nearbyint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %nearby, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_nearbyint_v4f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI84_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    larl %r1, .LCPI84_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI84_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI84_1
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> <double 42.1, double 42.2,
+                                              double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %nearby
+}
+
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; S390X-LABEL: constrained_vector_maxnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI85_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI85_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI85_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI85_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; S390X-LABEL: constrained_vector_maxnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI86_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI86_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI86_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI86_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; S390X-LABEL: constrained_vector_maxnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI87_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    larl %r1, .LCPI87_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    larl %r1, .LCPI87_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmaxf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI87_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %max
+}
+
+define void @constrained_vector_log10_maxnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_maxnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI88_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI88_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI88_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_maxnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI88_0
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_2
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %max, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; S390X-LABEL: constrained_vector_maxnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI89_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    larl %r1, .LCPI89_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI89_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI89_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; S390X-LABEL: constrained_vector_minnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI90_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI90_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI90_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI90_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+ entry:
+  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; S390X-LABEL: constrained_vector_minnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI91_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI91_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI91_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI91_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; S390X-LABEL: constrained_vector_minnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI92_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    larl %r1, .LCPI92_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    larl %r1, .LCPI92_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fminf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI92_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %min
+}
+
+define void @constrained_vector_minnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_minnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI93_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI93_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+ %b = load <3 x double>, <3 x double>* %a
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %min, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; S390X-LABEL: constrained_vector_minnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI94_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    larl %r1, .LCPI94_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin@PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI94_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI94_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin@PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %min
+}
+
+define <1 x float> @constrained_vector_fptrunc_v1f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v1f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI95_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v1f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI95_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    wledb %v24, %f0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
+                                <1 x double><double 42.1>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <1 x float> %result
+}
+
+define <2 x float> @constrained_vector_fptrunc_v2f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI96_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI96_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI96_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI96_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x float> %result
+}
+
+define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %dest) {
+; S390X-LABEL: constrained_vector_fptrunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 16(%r2)
+; S390X-NEXT:    ld %f2, 8(%r2)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    lgdr %r0, %f0
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ledbr %f0, %f2
+; S390X-NEXT:    lgdr %r1, %f0
+; S390X-NEXT:    srlg %r1, %r1, 32
+; S390X-NEXT:    lr %r0, %r1
+; S390X-NEXT:    ledbr %f0, %f1
+; S390X-NEXT:    ste %f0, 8(%r3)
+; S390X-NEXT:    stg %r0, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ledbra %f2, 0, %f1, 0
+; SZ13-NEXT:    vrepg %v1, %v1, 1
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v1, %v1, %v1
+; SZ13-NEXT:    ste %f0, 8(%r3)
+; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %src
+  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
+                                <3 x double> %b,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  store <3 x float> %result, <3 x float>* %dest
+  ret void
+}
+
+define <4 x float> @constrained_vector_fptrunc_v4f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI98_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    ledbr %f4, %f3
+; S390X-NEXT:    ledbr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI98_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    larl %r1, .LCPI98_2
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_3
+; SZ13-NEXT:    ld %f2, 0(%r1)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f2, 0, %f2, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_fpext_v1f32() {
+; S390X-LABEL: constrained_vector_fpext_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI99_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI99_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    wldeb %v24, %f0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
+                                <1 x float><float 42.0>,
+                                metadata !"fpexcept.strict")
+  ret <1 x double> %result
+}
+
+define <2 x double> @constrained_vector_fpext_v2f32() {
+; S390X-LABEL: constrained_vector_fpext_v2f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI100_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI100_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v2f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI100_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI100_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %result
+}
+
+define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %dest) {
+; S390X-LABEL: constrained_vector_fpext_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    sllg %r1, %r0, 32
+; S390X-NEXT:    ldgr %f1, %r1
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ldgr %f2, %r0
+; S390X-NEXT:    ldebr %f2, %f2
+; S390X-NEXT:    ldebr %f1, %f1
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    std %f0, 16(%r3)
+; S390X-NEXT:    std %f1, 8(%r3)
+; S390X-NEXT:    std %f2, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vrepf %v2, %v0, 1
+; SZ13-NEXT:    ldebr %f1, %f0
+; SZ13-NEXT:    ldebr %f2, %f2
+; SZ13-NEXT:    vrepf %v0, %v0, 2
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    vmrhg %v1, %v1, %v2
+; SZ13-NEXT:    std %f0, 16(%r3)
+; SZ13-NEXT:    vst %v1, 0(%r3)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x float>, <3 x float>* %src
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
+                              <3 x float> %b,
+                              metadata !"fpexcept.strict")
+  store <3 x double> %result, <3 x double>* %dest
+  ret void
+}
+
+define <4 x double> @constrained_vector_fpext_v4f32() {
+; S390X-LABEL: constrained_vector_fpext_v4f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI102_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_3
+; S390X-NEXT:    le %f5, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    ldebr %f4, %f3
+; S390X-NEXT:    ldebr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v4f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI102_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    larl %r1, .LCPI102_2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_3
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %result
+}
+
+define <1 x float> @constrained_vector_ceil_v1f32() {
+; S390X-LABEL: constrained_vector_ceil_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI103_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %ceil
+}
+
+define <2 x double> @constrained_vector_ceil_v2f64() {
+; S390X-LABEL: constrained_vector_ceil_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI104_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    larl %r1, .LCPI104_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI104_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 6
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %ceil
+}
+
+define <3 x float> @constrained_vector_ceil_v3f32() {
+; S390X-LABEL: constrained_vector_ceil_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI105_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    larl %r1, .LCPI105_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    larl %r1, .LCPI105_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI105_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI105_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 6, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 6, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %ceil
+}
+
+define void @constrained_vector_ceil_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_ceil_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, ceil@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 6
+; SZ13-NEXT:    fidbra %f0, 6, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %ceil, <3 x double>* %a
+  ret void
+}
+
+define <1 x float> @constrained_vector_floor_v1f32() {
+; S390X-LABEL: constrained_vector_floor_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI107_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %floor
+}
+
+
+define <2 x double> @constrained_vector_floor_v2f64() {
+; S390X-LABEL: constrained_vector_floor_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI108_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    larl %r1, .LCPI108_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI108_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 7
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %floor
+}
+
+define <3 x float> @constrained_vector_floor_v3f32() {
+; S390X-LABEL: constrained_vector_floor_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI109_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    larl %r1, .LCPI109_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    larl %r1, .LCPI109_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI109_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI109_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 7, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 7, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %floor
+}
+
+define void @constrained_vector_floor_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_floor_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, floor@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 7
+; SZ13-NEXT:    fidbra %f0, 7, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %floor, <3 x double>* %a
+  ret void
+}
+
+define <1 x float> @constrained_vector_round_v1f32() {
+; S390X-LABEL: constrained_vector_round_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI111_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %round
+}
+
+define <2 x double> @constrained_vector_round_v2f64() {
+; S390X-LABEL: constrained_vector_round_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI112_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    larl %r1, .LCPI112_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI112_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 1
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %round
+}
+
+define <3 x float> @constrained_vector_round_v3f32() {
+; S390X-LABEL: constrained_vector_round_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI113_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    larl %r1, .LCPI113_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    larl %r1, .LCPI113_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI113_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI113_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 1, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 1, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %round
+}
+
+
+define void @constrained_vector_round_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_round_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, round@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 1
+; SZ13-NEXT:    fidbra %f0, 1, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %round, <3 x double>* %a
+  ret void
+}
+
+define <1 x float> @constrained_vector_trunc_v1f32() {
+; S390X-LABEL: constrained_vector_trunc_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI115_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %trunc
+}
+
+define <2 x double> @constrained_vector_trunc_v2f64() {
+; S390X-LABEL: constrained_vector_trunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI116_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    larl %r1, .LCPI116_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI116_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 5
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %trunc
+}
+
+define <3 x float> @constrained_vector_trunc_v3f32() {
+; S390X-LABEL: constrained_vector_trunc_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI117_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    larl %r1, .LCPI117_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    larl %r1, .LCPI117_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf@PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI117_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI117_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 5, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 5, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %trunc
+}
+
+define void @constrained_vector_trunc_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_trunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, trunc@PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 5
+; SZ13-NEXT:    fidbra %f0, 5, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %trunc, <3 x double>* %a
+  ret void
+}
+
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+
+declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
+declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
+declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+
+declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)

From 1e692d1777ae34dcb93524b5798651a29defae09 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 31 May 2019 16:46:05 +0000
Subject: [PATCH 0763/1176] [CVP] Simplify non-overflowing saturating add/sub

If we can determine that a saturating add/sub will not overflow
based on range analysis, convert it into a simple binary operation.
This is a sibling transform to the existing with.overflow handling.

Differential Revision: https://reviews.llvm.org/D62703

llvm-svn: 362242
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 25 ++++++++++++++++++-
 .../CorrelatedValuePropagation/overflows.ll   | 16 ++++++------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index eab49b5f8b37e..1715838a07eb4 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -63,6 +63,8 @@ STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 STATISTIC(NumOverflows, "Number of overflow checks removed");
+STATISTIC(NumSaturating,
+    "Number of saturating arithmetics converted to normal arithmetics");
 
 static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(true));
 
@@ -413,7 +415,7 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   IRBuilder<> B(WO);
   Value *NewOp = B.CreateBinOp(
       WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
-  // Constant-holing could have happened.
+  // Constant-folding could have happened.
   if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
     if (WO->isSigned())
       Inst->setHasNoSignedWrap();
@@ -428,6 +430,20 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   ++NumOverflows;
 }
 
+static void processSaturatingInst(SaturatingInst *SI) {
+  BinaryOperator *BinOp = BinaryOperator::Create(
+      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+  BinOp->setDebugLoc(SI->getDebugLoc());
+  if (SI->isSigned())
+    BinOp->setHasNoSignedWrap();
+  else
+    BinOp->setHasNoUnsignedWrap();
+
+  SI->replaceAllUsesWith(BinOp);
+  SI->eraseFromParent();
+  ++NumSaturating;
+}
+
 /// Infer nonnull attributes for the arguments at the specified callsite.
 static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
   SmallVector<unsigned, 4> ArgNos;
@@ -440,6 +456,13 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
     }
   }
 
+  if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
+    if (willNotOverflow(SI, LVI)) {
+      processSaturatingInst(SI);
+      return true;
+    }
+  }
+
   // Deopt bundle operands are intended to capture state with minimal
   // perturbance of the code otherwise.  If we can find a constant value for
   // any such operand and remove a use of the original value, that's
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index 860ebafd0749d..b692e0d1ac387 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -739,8 +739,8 @@ define i8 @uadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X]], i8 100)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = add nuw i8 [[X]], 100
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp ugt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -762,8 +762,8 @@ define i8 @sadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 20)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = add nsw i8 [[X]], 20
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp sgt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -785,8 +785,8 @@ define i8 @usub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X]], i8 100)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = sub nuw i8 [[X]], 100
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp ult i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -808,8 +808,8 @@ define i8 @ssub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X]], i8 20)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = sub nsw i8 [[X]], 20
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp slt i8 %x, -100
   br i1 %cmp, label %trap, label %cont

From c61762797ea4ec868d06c5a5d90dc17f0b19ea68 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 31 May 2019 16:46:38 +0000
Subject: [PATCH 0764/1176] Suppress nothrow/Exception spec conflict warning
 when we dont know the ES.

In any situation where the Exception Spec isn't clear, suppress the
warning to avoid false positives.

llvm-svn: 362243
---
 clang/lib/Sema/SemaType.cpp                       | 14 +++++++-------
 clang/test/SemaCXX/nothrow-vs-exception-specs.cpp |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 43ac435a82498..d473fb6c8f3dc 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -6976,18 +6976,18 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
       case EST_BasicNoexcept:
       case EST_NoexceptTrue:
       case EST_NoThrow:
-      case EST_Unparsed:
         // Exception spec doesn't conflict with nothrow, so don't warn.
-        // Unparsed is included in this, since method signatures aren't parsed
-        // until after the fact.
+        LLVM_FALLTHROUGH;
+      case EST_Unparsed:
+      case EST_Uninstantiated:
+      case EST_DependentNoexcept:
+      case EST_Unevaluated:
+        // We don't have enough information to properly determine if there is a
+        // conflict, so suppress the warning.
         break;
-
       case EST_Dynamic:
       case EST_MSAny:
       case EST_NoexceptFalse:
-      case EST_DependentNoexcept:
-      case EST_Unevaluated:
-      case EST_Uninstantiated:
         S.Diag(attr.getLoc(), diag::warn_nothrow_attribute_ignored);
         break;
       }
diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
index 563f604067072..78416d8c0b6e0 100644
--- a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -54,6 +54,9 @@ __declspec(nothrow) void foo5() noexcept(noexcept(foo2()));
 // expected-warning@+1{{'nothrow' attribute conflicts with exception specification; attribute ignored}}
 __declspec(nothrow) void foo6() noexcept(noexcept(foo3()));
 
+template<typename F>
+__declspec(nothrow) void foo7() noexcept(noexcept(F()));
+
 // FIXME: It would be nice to be able to warn on these, however at the time we
 // evaluate the nothrow, these have yet to be parsed, so the data is not yet
 // there.

From 8dda4a16753dbe8b0b0c1015bc30b58b2dde2cf1 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Fri, 31 May 2019 16:54:38 +0000
Subject: [PATCH 0765/1176] [Tests] Add tests for loop predication of loops
 w/ne latch conditions

llvm-svn: 362244
---
 llvm/test/Transforms/LoopPredication/basic.ll | 172 ++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index 329b13bee002f..0037dcdff91a8 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -1595,3 +1595,175 @@ exit:
   %result = phi i32 [ %loop.acc.next, %loop ]
   ret i32 %result
 }
+
+; NE Check (as produced by LFTR) where we can prove Start < End via simple
+; instruction analysis
+define i32 @ne_latch_zext(i32* %array, i32 %length, i16 %n16) {
+; CHECK-LABEL: @ne_latch_zext(
+; CHECK-NEXT:  loop.preheader:
+; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+loop.preheader:
+  %n = zext i16 %n16 to i32
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ne i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; NE Check (as produced by LFTR) where we can prove Start < End via the
+; condition guarding the loop entry.
+define i32 @ne_latch_dom_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @ne_latch_dom_check(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ne i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; NE latch - can't prove (end-start) mod step == 0 (i.e. might wrap
+; around several times or even be infinite)
+define i32 @neg_ne_latch_mod_step(i32* %array, i32 %length, i16 %n16) {
+; CHECK-LABEL: @neg_ne_latch_mod_step(
+; CHECK-NEXT:  loop.preheader:
+; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 3
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+loop.preheader:
+  %n = zext i16 %n16 to i32
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add i32 %i, 3
+  %continue = icmp ne i32 %i.next, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; NE latch - TODO: could prove (end-start) mod step == 0
+define i32 @ne_latch_mod_step(i32* %array, i32 %length) {
+; CHECK-LABEL: @ne_latch_mod_step(
+; CHECK-NEXT:  loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], 400
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+loop.preheader:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add nuw i32 %i, 2
+  %continue = icmp ne i32 %i.next, 400
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+; NE Latch - but end > start so wraps around and not equivelent to a ult
+define i32 @neg_ne_latch_swapped_order(i32* %array, i32 %length) {
+; CHECK-LABEL: @neg_ne_latch_swapped_order(
+; CHECK-NEXT:  loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 400, [[LOOP_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+loop.preheader:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 400, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add i32 %i, 1
+  %continue = icmp ne i32 %i.next, 0
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+

From 68fa6ddb9d441fa54e26f3f13372617a35f30d61 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 31 May 2019 17:00:48 +0000
Subject: [PATCH 0766/1176] Replace 'default' in an enum-over-a-switch with the
 missing list.

This suppressed the Wswitch warning causing me to miss it and write an
assertion failure.

llvm-svn: 362245
---
 clang/lib/Sema/SemaExceptionSpec.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index e8f559af4da8a..8f3ebc29b5207 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -429,7 +429,11 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
   case EST_NoThrow:
     OS <<"__attribute__((nothrow))";
     break;
-  default:
+  case EST_None:
+  case EST_MSAny:
+  case EST_Unevaluated:
+  case EST_Uninstantiated:
+  case EST_Unparsed:
     llvm_unreachable("This spec type is compatible with none.");
   }
 

From 312f3a2bbf45ab2c0cbbbc9ad04fb5d4c0517aaf Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Fri, 31 May 2019 17:18:34 +0000
Subject: [PATCH 0767/1176] [MCA] Refactor class BottleneckAnalysis. NFCI

The resource pressure distribution computation is now delegated by class
BottleneckAnalysis to an instance of class PressureTracker.
Class PressureTracker is also responsible for:
 - tracking users of processor resource units.
 - tracking the number of delay cycles caused by increases in backpressure.

BottleneckAnalysis internally initializes a dependency graph. Each nodes
represents an instruction in the input code sequence.  Edges of the dependency
graph are critical register/memory/resource dependencies.  Dependencies are only
added to the graph if they are seen as critical by backend pressure events.

The DependencyGraph is currently unused. It is possible to print the dependency
 graph (see method DependencyGraph::dump()) for debugging purposes.
The long term goal is to use the information stored by the dependency graph in
order to do critical path computation.

llvm-svn: 362246
---
 .../llvm-mca/Views/BottleneckAnalysis.cpp     | 303 +++++++++++++++---
 .../tools/llvm-mca/Views/BottleneckAnalysis.h | 180 ++++++++---
 llvm/tools/llvm-mca/llvm-mca.cpp              |   3 +-
 3 files changed, 406 insertions(+), 80 deletions(-)

diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index 182a97aed374a..ea2cb71bae7e1 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Views/BottleneckAnalysis.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/MCA/Support.h"
 #include "llvm/Support/Format.h"
 
@@ -22,18 +21,219 @@ namespace mca {
 
 #define DEBUG_TYPE "llvm-mca"
 
-BottleneckAnalysis::BottleneckAnalysis(const MCSchedModel &Model)
-    : SM(Model), TotalCycles(0), BPI({0, 0, 0, 0, 0}),
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+    : SM(Model),
       ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
-      ProcResourceMasks(Model.getNumProcResourceKinds()),
+      ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
       ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
-      PressureIncreasedBecauseOfResources(false),
-      PressureIncreasedBecauseOfDataDependencies(false),
-      SeenStallCycles(false) {
-  computeProcResourceMasks(SM, ProcResourceMasks);
-  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
-    ResIdx2ProcResID[Index] = I;
+      ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+  computeProcResourceMasks(SM, ProcResID2Mask);
+
+  // Ignore the invalid resource at index zero.
+  unsigned NextResourceUsersIdx = 0;
+  for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+    ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+    NextResourceUsersIdx += ProcResource.NumUnits;
+    uint64_t ResourceMask = ProcResID2Mask[I];
+    ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+  }
+
+  ResourceUsers.resize(NextResourceUsersIdx);
+  std::fill(ResourceUsers.begin(), ResourceUsers.end(), ~0U);
+}
+
+void PressureTracker::getUniqueUsers(
+    uint64_t ResourceMask, SmallVectorImpl<unsigned> &UniqueUsers) const {
+  unsigned Index = getResourceStateIndex(ResourceMask);
+  unsigned ProcResID = ResIdx2ProcResID[Index];
+  const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+  for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+    unsigned From = getResourceUser(ProcResID, I);
+    if (find(UniqueUsers, From) == UniqueUsers.end())
+      UniqueUsers.emplace_back(From);
+  }
+}
+
+void PressureTracker::handleInstructionEvent(const HWInstructionEvent &Event) {
+  unsigned IID = Event.IR.getSourceIndex();
+  switch (Event.Type) {
+  default:
+    break;
+  case HWInstructionEvent::Dispatched:
+    IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+    break;
+  case HWInstructionEvent::Executed:
+    IPI.erase(IID);
+    break;
+  case HWInstructionEvent::Issued: {
+    const auto &IIE = static_cast<const HWInstructionIssuedEvent &>(Event);
+    using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
+    using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
+    for (const ResourceUse &Use : IIE.UsedResources) {
+      const ResourceRef &RR = Use.first;
+      unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+      Index += countTrailingZeros(RR.second);
+      ResourceUsers[Index] = IID;
+    }
+  }
+  }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+    uint64_t CumulativeMask) {
+  while (CumulativeMask) {
+    uint64_t Current = CumulativeMask & (-CumulativeMask);
+    unsigned ResIdx = getResourceStateIndex(Current);
+    unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+    uint64_t Mask = ProcResID2Mask[ProcResID];
+
+    if (Mask == Current) {
+      ResourcePressureDistribution[ProcResID]++;
+      CumulativeMask ^= Current;
+      continue;
+    }
+
+    Mask ^= Current;
+    while (Mask) {
+      uint64_t SubUnit = Mask & (-Mask);
+      ResIdx = getResourceStateIndex(SubUnit);
+      ProcResID = ResIdx2ProcResID[ResIdx];
+      ResourcePressureDistribution[ProcResID]++;
+      Mask ^= SubUnit;
+    }
+
+    CumulativeMask ^= Current;
+  }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+  assert(Event.Reason != HWPressureEvent::INVALID &&
+         "Unexpected invalid event!");
+
+  switch (Event.Reason) {
+  default:
+    break;
+
+  case HWPressureEvent::RESOURCES: {
+    const uint64_t ResourceMask = Event.ResourceMask;
+    updateResourcePressureDistribution(Event.ResourceMask);
+
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      const Instruction &IS = *IR.getInstruction();
+      unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+      if (!BusyResources)
+        continue;
+
+      IPI[IR.getSourceIndex()].ResourcePressureCycles++;
+    }
+    break;
+  }
+
+  case HWPressureEvent::REGISTER_DEPS:
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      unsigned IID = IR.getSourceIndex();
+      IPI[IID].RegisterPressureCycles++;
+    }
+    break;
+
+  case HWPressureEvent::MEMORY_DEPS:
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      unsigned IID = IR.getSourceIndex();
+      IPI[IID].MemoryPressureCycles++;
+    }
+  }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const {
+  OS << "\nREG DEPS\n";
+  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+    const DGNode &Node = Nodes[I];
+    for (const DependencyEdge &DE : Node.RegDeps) {
+      bool LoopCarried = I >= DE.IID;
+      OS << " FROM: " << I << " TO: " << DE.IID
+         << (LoopCarried ? " (loop carried)" : "             ")
+         << " - REGISTER: ";
+      MCIP.printRegName(OS, DE.ResourceOrRegID);
+      OS << " - CYCLES: " << DE.Cycles << '\n';
+    }
+  }
+}
+
+void DependencyGraph::dumpMemDeps(raw_ostream &OS) const {
+  OS << "\nMEM DEPS\n";
+  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+    const DGNode &Node = Nodes[I];
+    for (const DependencyEdge &DE : Node.MemDeps) {
+      bool LoopCarried = I >= DE.IID;
+      OS << " FROM: " << I << " TO: " << DE.IID
+         << (LoopCarried ? " (loop carried)" : "             ")
+         << " - MEMORY - CYCLES: " << DE.Cycles << '\n';
+    }
+  }
+}
+
+void DependencyGraph::dumpResDeps(raw_ostream &OS) const {
+  OS << "\nRESOURCE DEPS\n";
+  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+    const DGNode &Node = Nodes[I];
+    for (const DependencyEdge &DE : Node.ResDeps) {
+      bool LoopCarried = I >= DE.IID;
+      OS << " FROM: " << I << " TO: " << DE.IID
+         << (LoopCarried ? "(loop carried)" : "             ")
+         << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+      OS << " - CYCLES: " << DE.Cycles << '\n';
+    }
+  }
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
+                                 DependencyEdge &&Dep) {
+  auto It = find_if(Vec, [Dep](DependencyEdge &DE) {
+    return DE.IID == Dep.IID && DE.ResourceOrRegID == Dep.ResourceOrRegID;
+  });
+
+  if (It != Vec.end()) {
+    It->Cycles += Dep.Cycles;
+    return;
+  }
+
+  Vec.emplace_back(Dep);
+  Nodes[Dep.IID].NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+                                       MCInstPrinter &Printer,
+                                       ArrayRef<MCInst> Sequence,
+                                       unsigned Executions)
+    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()),
+      DG(Sequence.size()), Source(Sequence), Iterations(Executions),
+      TotalCycles(0), PressureIncreasedBecauseOfResources(false),
+      PressureIncreasedBecauseOfRegisterDependencies(false),
+      PressureIncreasedBecauseOfMemoryDependencies(false),
+      SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+  Tracker.handleInstructionEvent(Event);
+  if (Event.Type != HWInstructionEvent::Issued)
+    return;
+
+  const unsigned IID = Event.IR.getSourceIndex();
+  const Instruction &IS = *Event.IR.getInstruction();
+  unsigned Cycles = Tracker.getRegisterPressureCycles(IID);
+  unsigned To = IID % Source.size();
+  if (Cycles) {
+    const CriticalDependency &RegDep = IS.getCriticalRegDep();
+    unsigned From = RegDep.IID % Source.size();
+    DG.addRegDep(From, To, RegDep.RegID, Cycles);
+  }
+  Cycles = Tracker.getMemoryPressureCycles(IID);
+  if (Cycles) {
+    const CriticalDependency &MemDep = IS.getCriticalMemDep();
+    unsigned From = MemDep.IID % Source.size();
+    DG.addMemDep(From, To, Cycles);
   }
 }
 
@@ -41,47 +241,67 @@ void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
   assert(Event.Reason != HWPressureEvent::INVALID &&
          "Unexpected invalid event!");
 
+  Tracker.handlePressureEvent(Event);
+
   switch (Event.Reason) {
   default:
     break;
 
   case HWPressureEvent::RESOURCES: {
     PressureIncreasedBecauseOfResources = true;
-    ++BPI.ResourcePressureCycles;
-    uint64_t ResourceMask = Event.ResourceMask;
-    while (ResourceMask) {
-      uint64_t Current = ResourceMask & (-ResourceMask);
-      unsigned Index = getResourceStateIndex(Current);
-      unsigned ProcResID = ResIdx2ProcResID[Index];
-      const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
-      if (!PRDesc.SubUnitsIdxBegin) {
-        ResourcePressureDistribution[Index]++;
-        ResourceMask ^= Current;
-        continue;
-      }
 
-      for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
-        unsigned OtherProcResID = PRDesc.SubUnitsIdxBegin[I];
-        unsigned OtherMask = ProcResourceMasks[OtherProcResID];
-        ResourcePressureDistribution[getResourceStateIndex(OtherMask)]++;
+    SmallVector<unsigned, 4> UniqueUsers;
+    for (const InstRef &IR : Event.AffectedInstructions) {
+      const Instruction &IS = *IR.getInstruction();
+      unsigned To = IR.getSourceIndex() % Source.size();
+      unsigned BusyResources =
+          IS.getCriticalResourceMask() & Event.ResourceMask;
+      while (BusyResources) {
+        uint64_t Current = BusyResources & (-BusyResources);
+        Tracker.getUniqueUsers(Current, UniqueUsers);
+        for (unsigned User : UniqueUsers)
+          DG.addResourceDep(User % Source.size(), To, Current, 1);
+        BusyResources ^= Current;
       }
-
-      ResourceMask ^= Current;
+      UniqueUsers.clear();
     }
+
     break;
   }
 
   case HWPressureEvent::REGISTER_DEPS:
-    PressureIncreasedBecauseOfDataDependencies = true;
-    ++BPI.RegisterDependencyCycles;
+    PressureIncreasedBecauseOfRegisterDependencies = true;
     break;
   case HWPressureEvent::MEMORY_DEPS:
-    PressureIncreasedBecauseOfDataDependencies = true;
-    ++BPI.MemoryDependencyCycles;
+    PressureIncreasedBecauseOfMemoryDependencies = true;
     break;
   }
 }
 
+void BottleneckAnalysis::onCycleEnd() {
+  ++TotalCycles;
+
+  bool PressureIncreasedBecauseOfDataDependencies =
+      PressureIncreasedBecauseOfRegisterDependencies ||
+      PressureIncreasedBecauseOfMemoryDependencies;
+  if (!PressureIncreasedBecauseOfResources &&
+      !PressureIncreasedBecauseOfDataDependencies)
+    return;
+
+  ++BPI.PressureIncreaseCycles;
+  if (PressureIncreasedBecauseOfRegisterDependencies)
+    ++BPI.RegisterDependencyCycles;
+  if (PressureIncreasedBecauseOfMemoryDependencies)
+    ++BPI.MemoryDependencyCycles;
+  if (PressureIncreasedBecauseOfDataDependencies)
+    ++BPI.DataDependencyCycles;
+  if (PressureIncreasedBecauseOfResources)
+    ++BPI.ResourcePressureCycles;
+  PressureIncreasedBecauseOfResources = false;
+  PressureIncreasedBecauseOfRegisterDependencies = false;
+  PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
     OS << "\nNo resource or data dependency bottlenecks discovered.\n";
@@ -107,12 +327,13 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
      << "% ]";
 
   if (BPI.PressureIncreaseCycles) {
-    for (unsigned I = 0, E = ResourcePressureDistribution.size(); I < E; ++I) {
-      if (ResourcePressureDistribution[I]) {
-        double Frequency =
-            (double)ResourcePressureDistribution[I] * 100 / TotalCycles;
-        unsigned Index = ResIdx2ProcResID[getResourceStateIndex(1ULL << I)];
-        const MCProcResourceDesc &PRDesc = *SM.getProcResource(Index);
+    ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+    const MCSchedModel &SM = STI.getSchedModel();
+    for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+      unsigned ResourceCycles = Distribution[I];
+      if (ResourceCycles) {
+        double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+        const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
         OS << "\n  - " << PRDesc.Name << "  [ "
            << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
       }
@@ -121,11 +342,9 @@ void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
 
   OS << "\n  Data Dependencies:      [ "
      << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
-
   OS << "\n  - Register Dependencies [ "
      << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
      << "% ]";
-
   OS << "\n  - Memory Dependencies   [ "
      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
      << "% ]\n\n";
@@ -137,6 +356,8 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
   printBottleneckHints(TempStream);
   TempStream.flush();
   OS << Buffer;
+  LLVM_DEBUG(DG.dump(OS, MCIP));
 }
+
 } // namespace mca.
 } // namespace llvm
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index 129ffb3d16378..c9f4865e960cb 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -8,10 +8,10 @@
 /// \file
 ///
 /// This file implements the bottleneck analysis view.
-/// 
+///
 /// This view internally observes backend pressure increase events in order to
 /// identify potential sources of bottlenecks.
-/// 
+///
 /// Example of bottleneck analysis report:
 ///
 /// Cycles with backend pressure increase [ 33.40% ]
@@ -29,17 +29,151 @@
 
 #include "Views/View.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace mca {
 
+class PressureTracker {
+  const MCSchedModel &SM;
+
+  // Resource pressure distribution. There is an element for every processor
+  // resource declared by the scheduling model. Quantities are number of cycles.
+  SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+  // Each processor resource is associated with a so-called processor resource
+  // mask. This vector allows to correlate processor resource IDs with processor
+  // resource masks. There is exactly one element per each processor resource
+  // declared by the scheduling model.
+  SmallVector<uint64_t, 4> ProcResID2Mask;
+
+  // Maps processor resource state indices (returned by calls to
+  // `getResourceStateIndex(Mask)` to processor resource identifiers.
+  SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+  // Maps Processor Resource identifiers to ResourceUsers indices.
+  SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+  // Identifies the last user of a processor resource unit.
+  // This vector is updated on every instruction issued event.
+  // There is one entry for every processor resource unit declared by the
+  // processor model. An all_ones value is treated like an invalid instruction
+  // identifier.
+  SmallVector<unsigned, 4> ResourceUsers;
+
+  struct InstructionPressureInfo {
+    unsigned RegisterPressureCycles;
+    unsigned MemoryPressureCycles;
+    unsigned ResourcePressureCycles;
+  };
+  DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+  void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+  unsigned getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+    unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+    return ResourceUsers[Index + UnitID];
+  }
+
+public:
+  PressureTracker(const MCSchedModel &Model);
+
+  ArrayRef<unsigned> getResourcePressureDistribution() const {
+    return ResourcePressureDistribution;
+  }
+
+  void getUniqueUsers(uint64_t ResourceMask,
+                      SmallVectorImpl<unsigned> &Users) const;
+
+  unsigned getRegisterPressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.RegisterPressureCycles;
+  }
+
+  unsigned getMemoryPressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.MemoryPressureCycles;
+  }
+
+  unsigned getResourcePressureCycles(unsigned IID) const {
+    assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+    const InstructionPressureInfo &Info = IPI.find(IID)->second;
+    return Info.ResourcePressureCycles;
+  }
+
+  void handlePressureEvent(const HWPressureEvent &Event);
+  void handleInstructionEvent(const HWInstructionEvent &Event);
+};
+
+class DependencyGraph {
+  struct DependencyEdge {
+    unsigned IID;
+    uint64_t ResourceOrRegID;
+    uint64_t Cycles;
+  };
+
+  struct DGNode {
+    unsigned NumPredecessors;
+    SmallVector<DependencyEdge, 8> RegDeps;
+    SmallVector<DependencyEdge, 8> MemDeps;
+    SmallVector<DependencyEdge, 8> ResDeps;
+  };
+  SmallVector<DGNode, 16> Nodes;
+
+  void addDepImpl(SmallVectorImpl<DependencyEdge> &Vec, DependencyEdge &&DE);
+
+  DependencyGraph(const DependencyGraph &) = delete;
+  DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+public:
+  DependencyGraph(unsigned NumNodes) : Nodes(NumNodes, DGNode()) {}
+
+  void addRegDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy) {
+    addDepImpl(Nodes[From].RegDeps, {To, RegID, Cy});
+  }
+  void addMemDep(unsigned From, unsigned To, unsigned Cy) {
+    addDepImpl(Nodes[From].MemDeps, {To, /* unused */ 0, Cy});
+  }
+  void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy) {
+    addDepImpl(Nodes[From].ResDeps, {To, Mask, Cy});
+  }
+
+#ifndef NDEBUG
+  void dumpRegDeps(raw_ostream &OS, MCInstPrinter &MCIP) const;
+  void dumpMemDeps(raw_ostream &OS) const;
+  void dumpResDeps(raw_ostream &OS) const;
+
+  void dump(raw_ostream &OS, llvm::MCInstPrinter &MCIP) const {
+    dumpRegDeps(OS, MCIP);
+    dumpMemDeps(OS);
+    dumpResDeps(OS);
+  }
+#endif
+};
+
 /// A view that collects and prints a few performance numbers.
 class BottleneckAnalysis : public View {
-  const llvm::MCSchedModel &SM;
+  const MCSubtargetInfo &STI;
+  MCInstPrinter &MCIP;
+  PressureTracker Tracker;
+  DependencyGraph DG;
+
+  ArrayRef<MCInst> Source;
+  unsigned Iterations;
   unsigned TotalCycles;
 
+  bool PressureIncreasedBecauseOfResources;
+  bool PressureIncreasedBecauseOfRegisterDependencies;
+  bool PressureIncreasedBecauseOfMemoryDependencies;
+  // True if throughput was affected by dispatch stalls.
+  bool SeenStallCycles;
+
   struct BackPressureInfo {
     // Cycles where backpressure increased.
     unsigned PressureIncreaseCycles;
@@ -54,47 +188,17 @@ class BottleneckAnalysis : public View {
   };
   BackPressureInfo BPI;
 
-  // Resource pressure distribution. There is an element for every processor
-  // resource declared by the scheduling model. Quantities are number of cycles.
-  llvm::SmallVector<unsigned, 8> ResourcePressureDistribution;
-
-  // Each processor resource is associated with a so-called processor resource
-  // mask. This vector allows to correlate processor resource IDs with processor
-  // resource masks. There is exactly one element per each processor resource
-  // declared by the scheduling model.
-  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
-
-  // Used to map resource indices to actual processor resource IDs.
-  llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
-
-  // True if resource pressure events were notified during this cycle.
-  bool PressureIncreasedBecauseOfResources;
-  bool PressureIncreasedBecauseOfDataDependencies;
-
-  // True if throughput was affected by dispatch stalls.
-  bool SeenStallCycles;
-
   // Prints a bottleneck message to OS.
-  void printBottleneckHints(llvm::raw_ostream &OS) const;
+  void printBottleneckHints(raw_ostream &OS) const;
 
 public:
-  BottleneckAnalysis(const llvm::MCSchedModel &Model);
-
-  void onCycleEnd() override {
-    ++TotalCycles;
-    if (PressureIncreasedBecauseOfResources ||
-        PressureIncreasedBecauseOfDataDependencies) {
-      ++BPI.PressureIncreaseCycles;
-      if (PressureIncreasedBecauseOfDataDependencies)
-        ++BPI.DataDependencyCycles;
-      PressureIncreasedBecauseOfResources = false;
-      PressureIncreasedBecauseOfDataDependencies = false;
-    }
-  }
+  BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+                     ArrayRef<MCInst> Sequence, unsigned Iterations);
 
+  void onCycleEnd() override;
   void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
-
   void onEvent(const HWPressureEvent &Event) override;
+  void onEvent(const HWInstructionEvent &Event) override;
 
   void printView(llvm::raw_ostream &OS) const override;
 };
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 8ce1d03c785e0..75886a6b31b38 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -487,7 +487,8 @@ int main(int argc, char **argv) {
           llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
 
     if (EnableBottleneckAnalysis)
-      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(SM));
+      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(
+          *STI, *IP, Insts, S.getNumIterations()));
 
     if (PrintInstructionInfoView)
       Printer.addView(

From 3ea6b24f41471358a51134f71177bc7caa2eacfc Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Fri, 31 May 2019 17:34:25 +0000
Subject: [PATCH 0768/1176] [MIR-Canon] Don't do vreg skip for independent
 instructions if there are none.

We don't want to create vregs if there is nothing to use them for. That causes
verifier errors.

Differential Revision: https://reviews.llvm.org/D62740

llvm-svn: 362247
---
 llvm/lib/CodeGen/MIRCanonicalizerPass.cpp                  | 3 ++-
 llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index a4097232d7d59..c7d1131d7b8a4 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -743,7 +743,8 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
   // of the MachineBasicBlock so that they are named in the order that we sorted
   // them alphabetically. Eventually we wont need SkipVRegs because we will use
   // named vregs instead.
-  NVC.SkipVRegs();
+  if (IdempotentInstCount)
+    NVC.SkipVRegs();
 
   auto MII = MBB->begin();
   for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
index abb2dde4d9e0b..629f7aefd6aff 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir
@@ -1,4 +1,5 @@
 # RUN: llc -march=amdgcn -run-pass=none -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn  -run-pass mir-canonicalizer -verify-machineinstrs -o - %s
 
 # Previously getReservedRegs was called before parsing
 # machineFunctionInfo, but the AMDGPU implementation depends on

From 0c84dafd6b52003d6948dbce20d0df61874c037e Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Fri, 31 May 2019 17:40:49 +0000
Subject: [PATCH 0769/1176] [CMake] Feed BUNDLE_PATH through llvm target
 wrappers

This feeds the new llvm_codsign BUNDLE_PATH option through from the llvm target wrapper functions, so that you can specify the BUNDLE_PATH on the target's codesign.

llvm-svn: 362248
---
 llvm/cmake/modules/AddLLVM.cmake | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 874cc76e8e5fe..bd78a9f6783d7 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -390,7 +390,7 @@ endfunction(set_windows_version_resource_properties)
 function(llvm_add_library name)
   cmake_parse_arguments(ARG
     "MODULE;SHARED;STATIC;OBJECT;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME;NO_INSTALL_RPATH"
-    "OUTPUT_NAME;PLUGIN_TOOL;ENTITLEMENTS"
+    "OUTPUT_NAME;PLUGIN_TOOL;ENTITLEMENTS;BUNDLE_PATH"
     "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS"
     ${ARGN})
   list(APPEND LLVM_COMMON_DEPENDS ${ARG_DEPENDS})
@@ -594,7 +594,7 @@ function(llvm_add_library name)
 
   if(ARG_SHARED OR ARG_MODULE)
     llvm_externalize_debuginfo(${name})
-    llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS})
+    llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS} BUNDLE_PATH ${ARG_BUNDLE_PATH})
   endif()
   # clang and newer versions of ninja use high-resolutions timestamps,
   # but older versions of libtool on Darwin don't, so the archive will
@@ -716,7 +716,7 @@ endmacro(add_llvm_library name)
 macro(add_llvm_executable name)
   cmake_parse_arguments(ARG
     "DISABLE_LLVM_LINK_LLVM_DYLIB;IGNORE_EXTERNALIZE_DEBUGINFO;NO_INSTALL_RPATH"
-    "ENTITLEMENTS"
+    "ENTITLEMENTS;BUNDLE_PATH"
     "DEPENDS"
     ${ARGN})
 
@@ -798,7 +798,7 @@ macro(add_llvm_executable name)
     target_link_libraries(${name} PRIVATE ${LLVM_PTHREAD_LIB})
   endif()
 
-  llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS})
+  llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS} BUNDLE_PATH ${ARG_BUNDLE_PATH})
 endmacro(add_llvm_executable name)
 
 function(export_executable_symbols target)

From 4595f01b062b48290fee50749154022d7d8cf9fd Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Fri, 31 May 2019 17:54:12 +0000
Subject: [PATCH 0770/1176] [analyzer] print() JSONify: ExplodedNode revision

Summary: Revert node-ID removal.

Reviewers: NoQ, xazax.hun, ravikandhadai, baloghadamsoftware, Szelethus

Subscribers: szepet, rnkovacs, a.sidorin, mikhail.ramalho, donat.nagy, dkrupp,
             cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62658

llvm-svn: 362249
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index ad3c5d206a258..b217889390414 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3075,8 +3075,8 @@ struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
     const unsigned int Space = 1;
     ProgramStateRef State = N->getState();
 
-    Out << "{ \"node_id\": \"" << (const void *)N
-        << "\", \"state_id\": " << State->getID()
+    Out << "{ \"node_id\": " << N->getID(G) << ", \"pointer\": \""
+        << (const void *)N << "\", \"state_id\": " << State->getID()
         << ", \"has_report\": " << (nodeHasBugReport(N) ? "true" : "false")
         << ",\\l";
 
@@ -3094,7 +3094,7 @@ struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
           else
             Out << "null }";
         },
-	// Adds a comma and a new-line between each program point.
+        // Adds a comma and a new-line between each program point.
         [&](const ExplodedNode *) { Out << ",\\l"; },
         [&](const ExplodedNode *) { return false; });
 

From 065bd45da9de57668cfaabe1b851a2e08acea215 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Fri, 31 May 2019 18:01:42 +0000
Subject: [PATCH 0771/1176] [MCA] Remove unused fields from BottleneckAnalysis.
 NFC

This should appease the buildbots.

llvm-svn: 362251
---
 llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp | 11 ++++-------
 llvm/tools/llvm-mca/Views/BottleneckAnalysis.h   | 13 +++++++------
 llvm/tools/llvm-mca/llvm-mca.cpp                 |  3 +--
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index ea2cb71bae7e1..cde896fbb5fb0 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -205,12 +205,10 @@ void DependencyGraph::addDepImpl(SmallVectorImpl<DependencyEdge> &Vec,
 }
 
 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
-                                       MCInstPrinter &Printer,
-                                       ArrayRef<MCInst> Sequence,
-                                       unsigned Executions)
-    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()),
-      DG(Sequence.size()), Source(Sequence), Iterations(Executions),
-      TotalCycles(0), PressureIncreasedBecauseOfResources(false),
+                                       ArrayRef<MCInst> Sequence)
+    : STI(sti), Tracker(STI.getSchedModel()), DG(Sequence.size()),
+      Source(Sequence), TotalCycles(0),
+      PressureIncreasedBecauseOfResources(false),
       PressureIncreasedBecauseOfRegisterDependencies(false),
       PressureIncreasedBecauseOfMemoryDependencies(false),
       SeenStallCycles(false), BPI() {}
@@ -356,7 +354,6 @@ void BottleneckAnalysis::printView(raw_ostream &OS) const {
   printBottleneckHints(TempStream);
   TempStream.flush();
   OS << Buffer;
-  LLVM_DEBUG(DG.dump(OS, MCIP));
 }
 
 } // namespace mca.
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index c9f4865e960cb..4c4dc193e1379 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -149,7 +149,7 @@ class DependencyGraph {
   void dumpMemDeps(raw_ostream &OS) const;
   void dumpResDeps(raw_ostream &OS) const;
 
-  void dump(raw_ostream &OS, llvm::MCInstPrinter &MCIP) const {
+  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
     dumpRegDeps(OS, MCIP);
     dumpMemDeps(OS);
     dumpResDeps(OS);
@@ -160,12 +160,10 @@ class DependencyGraph {
 /// A view that collects and prints a few performance numbers.
 class BottleneckAnalysis : public View {
   const MCSubtargetInfo &STI;
-  MCInstPrinter &MCIP;
   PressureTracker Tracker;
   DependencyGraph DG;
 
   ArrayRef<MCInst> Source;
-  unsigned Iterations;
   unsigned TotalCycles;
 
   bool PressureIncreasedBecauseOfResources;
@@ -192,15 +190,18 @@ class BottleneckAnalysis : public View {
   void printBottleneckHints(raw_ostream &OS) const;
 
 public:
-  BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
-                     ArrayRef<MCInst> Sequence, unsigned Iterations);
+  BottleneckAnalysis(const MCSubtargetInfo &STI, ArrayRef<MCInst> Sequence);
 
   void onCycleEnd() override;
   void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
   void onEvent(const HWPressureEvent &Event) override;
   void onEvent(const HWInstructionEvent &Event) override;
 
-  void printView(llvm::raw_ostream &OS) const override;
+  void printView(raw_ostream &OS) const override;
+
+#ifndef NDEBUG
+  void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
+#endif
 };
 
 } // namespace mca
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 75886a6b31b38..a875c70e17fe0 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -487,8 +487,7 @@ int main(int argc, char **argv) {
           llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
 
     if (EnableBottleneckAnalysis)
-      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(
-          *STI, *IP, Insts, S.getNumIterations()));
+      Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(*STI, Insts));
 
     if (PrintInstructionInfoView)
       Printer.addView(

From 7fc6a55688c816f5fc1a5481ae7af25be7500356 Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Fri, 31 May 2019 18:35:30 +0000
Subject: [PATCH 0772/1176] Add include for 'test_macros.h' to all the tests
 that were missing them. Thanks to Zoe for the (big, but simple) patch. NFC
 intended.

llvm-svn: 362252
---
 .../alg.random.shuffle/random_shuffle.cxx1z.pass.cpp            | 2 ++
 libcxx/test/libcxx/algorithms/version.pass.cpp                  | 2 ++
 .../atomics/atomics.order/memory_order.underlying_type.pass.cpp | 2 ++
 libcxx/test/libcxx/atomics/version.pass.cpp                     | 2 ++
 libcxx/test/libcxx/containers/associative/map/at.abort.pass.cpp | 2 ++
 .../libcxx/containers/associative/map/at.const.abort.pass.cpp   | 2 ++
 libcxx/test/libcxx/containers/associative/map/version.pass.cpp  | 2 ++
 .../libcxx/containers/associative/non_const_comparator.pass.cpp | 2 ++
 libcxx/test/libcxx/containers/associative/set/version.pass.cpp  | 2 ++
 .../containers/associative/tree_balance_after_insert.pass.cpp   | 2 ++
 .../libcxx/containers/associative/tree_left_rotate.pass.cpp     | 2 ++
 libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp  | 2 ++
 .../libcxx/containers/associative/tree_right_rotate.pass.cpp    | 2 ++
 .../test/libcxx/containers/associative/undef_min_max.pass.cpp   | 2 ++
 .../libcxx/containers/container.adaptors/queue/version.pass.cpp | 2 ++
 .../libcxx/containers/container.adaptors/stack/version.pass.cpp | 2 ++
 libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp         | 2 ++
 libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp         | 2 ++
 .../containers/sequences/array/array.zero/db_back.pass.cpp      | 1 +
 .../containers/sequences/array/array.zero/db_front.pass.cpp     | 1 +
 .../containers/sequences/array/array.zero/db_indexing.pass.cpp  | 1 +
 libcxx/test/libcxx/containers/sequences/array/version.pass.cpp  | 2 ++
 .../test/libcxx/containers/sequences/deque/incomplete.pass.cpp  | 2 ++
 .../libcxx/containers/sequences/deque/pop_back_empty.pass.cpp   | 2 ++
 libcxx/test/libcxx/containers/sequences/deque/version.pass.cpp  | 2 ++
 .../libcxx/containers/sequences/forwardlist/version.pass.cpp    | 2 ++
 .../libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp | 2 ++
 .../libcxx/containers/sequences/list/list.cons/db_move.pass.cpp | 1 +
 .../sequences/list/list.modifiers/emplace_db1.pass.cpp          | 2 ++
 .../sequences/list/list.modifiers/erase_iter_db1.pass.cpp       | 2 ++
 .../sequences/list/list.modifiers/erase_iter_db2.pass.cpp       | 2 ++
 .../sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp  | 2 ++
 .../sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp  | 2 ++
 .../sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp  | 2 ++
 .../sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp  | 2 ++
 .../list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp      | 1 +
 .../list/list.modifiers/insert_iter_rvalue_db1.pass.cpp         | 2 ++
 .../list/list.modifiers/insert_iter_size_value_db1.pass.cpp     | 2 ++
 .../list/list.modifiers/insert_iter_value_db1.pass.cpp          | 2 ++
 .../sequences/list/list.modifiers/pop_back_db1.pass.cpp         | 2 ++
 .../sequences/list/list.ops/db_splice_pos_list.pass.cpp         | 2 ++
 .../sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp    | 2 ++
 .../list/list.ops/db_splice_pos_list_iter_iter.pass.cpp         | 2 ++
 libcxx/test/libcxx/containers/sequences/list/version.pass.cpp   | 2 ++
 .../containers/sequences/vector/const_value_type.pass.cpp       | 2 ++
 .../libcxx/containers/sequences/vector/pop_back_empty.pass.cpp  | 2 ++
 .../sequences/vector/vector.cons/construct_iter_iter.pass.cpp   | 1 +
 .../vector/vector.cons/construct_iter_iter_alloc.pass.cpp       | 1 +
 libcxx/test/libcxx/containers/sequences/vector/version.pass.cpp | 2 ++
 libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp          | 2 ++
 libcxx/test/libcxx/containers/unord/next_prime.pass.cpp         | 2 ++
 .../test/libcxx/containers/unord/non_const_comparator.pass.cpp  | 2 ++
 libcxx/test/libcxx/containers/unord/unord.map/at.abort.pass.cpp | 2 ++
 .../libcxx/containers/unord/unord.map/at.const.abort.pass.cpp   | 2 ++
 libcxx/test/libcxx/containers/unord/unord.map/version.pass.cpp  | 2 ++
 libcxx/test/libcxx/containers/unord/unord.set/version.pass.cpp  | 2 ++
 .../debug/containers/db_associative_container_tests.pass.cpp    | 1 +
 .../db_sequence_container_iterators.multithread.pass.cpp        | 2 ++
 .../debug/containers/db_sequence_container_iterators.pass.cpp   | 1 +
 .../libcxx/debug/containers/db_unord_container_tests.pass.cpp   | 1 +
 libcxx/test/libcxx/debug/debug_abort.pass.cpp                   | 2 ++
 libcxx/test/libcxx/debug/debug_helper_test.pass.cpp             | 1 +
 libcxx/test/libcxx/debug/debug_register.pass.cpp                | 2 ++
 .../libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.cxx1z.pass.cpp  | 2 ++
 .../depr/depr.function.objects/depr.adaptors.cxx1z.pass.cpp     | 2 ++
 libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp    | 2 ++
 libcxx/test/libcxx/depr/enable_removed_cpp17_features.pass.cpp  | 2 ++
 .../libcxx/depr/exception.unexpected/get_unexpected.pass.cpp    | 2 ++
 .../libcxx/depr/exception.unexpected/set_unexpected.pass.cpp    | 2 ++
 .../test/libcxx/depr/exception.unexpected/unexpected.pass.cpp   | 2 ++
 .../test/libcxx/diagnostics/assertions/version_cassert.pass.cpp | 2 ++
 libcxx/test/libcxx/diagnostics/errno/version_cerrno.pass.cpp    | 2 ++
 libcxx/test/libcxx/diagnostics/nodiscard.pass.cpp               | 2 ++
 libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.pass.cpp    | 2 ++
 libcxx/test/libcxx/diagnostics/std.exceptions/version.pass.cpp  | 2 ++
 libcxx/test/libcxx/diagnostics/syserr/version.pass.cpp          | 2 ++
 .../algorithms/header.algorithm.synop/includes.pass.cpp         | 2 ++
 libcxx/test/libcxx/experimental/algorithms/version.pass.cpp     | 2 ++
 .../libcxx/experimental/diagnostics/syserr/version.pass.cpp     | 2 ++
 libcxx/test/libcxx/experimental/filesystem/version.pass.cpp     | 2 ++
 .../construct_piecewise_pair.pass.cpp                           | 2 ++
 .../memory.polymorphic.allocator.mem/db_deallocate.pass.cpp     | 2 ++
 .../memory.resource.adaptor.mem/db_deallocate.pass.cpp          | 2 ++
 .../header_deque_libcpp_version.pass.cpp                        | 2 ++
 .../header_forward_list_libcpp_version.pass.cpp                 | 2 ++
 .../memory.resource.aliases/header_list_libcpp_version.pass.cpp | 2 ++
 .../memory.resource.aliases/header_map_libcpp_version.pass.cpp  | 2 ++
 .../header_regex_libcpp_version.pass.cpp                        | 2 ++
 .../memory.resource.aliases/header_set_libcpp_version.pass.cpp  | 2 ++
 .../header_string_libcpp_version.pass.cpp                       | 2 ++
 .../header_unordered_map_libcpp_version.pass.cpp                | 2 ++
 .../header_unordered_set_libcpp_version.pass.cpp                | 2 ++
 .../header_vector_libcpp_version.pass.cpp                       | 2 ++
 .../global_memory_resource_lifetime.pass.cpp                    | 2 ++
 .../new_delete_resource_lifetime.pass.cpp                       | 2 ++
 .../experimental/memory/memory.resource.synop/version.pass.cpp  | 2 ++
 .../libcxx/experimental/numerics/numeric.ops/version.pass.cpp   | 2 ++
 .../libcxx/experimental/strings/string.view/version.pass.cpp    | 2 ++
 libcxx/test/libcxx/experimental/utilities/any/version.pass.cpp  | 2 ++
 libcxx/test/libcxx/experimental/utilities/meta/version.pass.cpp | 2 ++
 .../libcxx/experimental/utilities/optional/version.pass.cpp     | 2 ++
 .../test/libcxx/experimental/utilities/ratio/version.pass.cpp   | 2 ++
 libcxx/test/libcxx/experimental/utilities/time/version.pass.cpp | 2 ++
 .../test/libcxx/experimental/utilities/tuple/version.pass.cpp   | 2 ++
 .../test/libcxx/experimental/utilities/utility/version.pass.cpp | 2 ++
 libcxx/test/libcxx/extensions/hash/specializations.pass.cpp     | 2 ++
 .../input.output/file.streams/c.files/version_ccstdio.pass.cpp  | 2 ++
 .../file.streams/c.files/version_cinttypes.pass.cpp             | 2 ++
 .../input.output/file.streams/fstreams/fstream.close.pass.cpp   | 1 +
 .../file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp   | 1 +
 .../fstreams/fstream.members/open_wchar_pointer.pass.cpp        | 1 +
 .../file.streams/fstreams/ifstream.cons/wchar_pointer.pass.cpp  | 2 ++
 .../fstreams/ifstream.members/open_wchar_pointer.pass.cpp       | 2 ++
 .../file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp  | 1 +
 .../fstreams/ofstream.members/open_wchar_pointer.pass.cpp       | 1 +
 .../libcxx/input.output/file.streams/fstreams/version.pass.cpp  | 2 ++
 libcxx/test/libcxx/input.output/filesystems/version.pass.cpp    | 2 ++
 .../input.output/iostream.format/input.streams/version.pass.cpp | 2 ++
 .../iostream.format/output.streams/version.pass.cpp             | 2 ++
 .../input.output/iostream.format/std.manip/version.pass.cpp     | 2 ++
 .../test/libcxx/input.output/iostream.forward/version.pass.cpp  | 2 ++
 .../test/libcxx/input.output/iostream.objects/version.pass.cpp  | 2 ++
 .../iostreams.base/ios/iostate.flags/clear.abort.pass.cpp       | 2 ++
 libcxx/test/libcxx/input.output/iostreams.base/version.pass.cpp | 2 ++
 libcxx/test/libcxx/input.output/stream.buffers/version.pass.cpp | 2 ++
 libcxx/test/libcxx/input.output/string.streams/version.pass.cpp | 2 ++
 libcxx/test/libcxx/iterators/advance.debug1.pass.cpp            | 1 +
 libcxx/test/libcxx/iterators/failed.pass.cpp                    | 2 ++
 libcxx/test/libcxx/iterators/next.debug1.pass.cpp               | 1 +
 libcxx/test/libcxx/iterators/prev.debug1.pass.cpp               | 1 +
 libcxx/test/libcxx/iterators/version.pass.cpp                   | 2 ++
 libcxx/test/libcxx/language.support/cmp/version.pass.cpp        | 2 ++
 libcxx/test/libcxx/language.support/cstdint/version.pass.cpp    | 2 ++
 .../libcxx/language.support/support.dynamic/version.pass.cpp    | 2 ++
 .../libcxx/language.support/support.exception/version.pass.cpp  | 2 ++
 .../libcxx/language.support/support.initlist/version.pass.cpp   | 2 ++
 .../support.limits/c.limits/version_cfloat.pass.cpp             | 2 ++
 .../support.limits/c.limits/version_climits.pass.cpp            | 2 ++
 .../language.support/support.limits/limits/version.pass.cpp     | 2 ++
 .../libcxx/language.support/support.limits/version.pass.cpp     | 2 ++
 .../test/libcxx/language.support/support.rtti/version.pass.cpp  | 2 ++
 .../language.support/support.runtime/version_csetjmp.pass.cpp   | 2 ++
 .../language.support/support.runtime/version_csignal.pass.cpp   | 2 ++
 .../language.support/support.runtime/version_cstdarg.pass.cpp   | 2 ++
 .../language.support/support.runtime/version_cstdbool.pass.cpp  | 2 ++
 .../language.support/support.runtime/version_cstdlib.pass.cpp   | 2 ++
 .../language.support/support.runtime/version_ctime.pass.cpp     | 2 ++
 .../test/libcxx/language.support/support.types/version.pass.cpp | 2 ++
 libcxx/test/libcxx/localization/c.locales/version.pass.cpp      | 2 ++
 .../localization/locale.categories/__scan_keyword.pass.cpp      | 2 ++
 libcxx/test/libcxx/localization/locale.stdcvt/version.pass.cpp  | 2 ++
 libcxx/test/libcxx/localization/locales/locale.abort.pass.cpp   | 2 ++
 .../libcxx/localization/locales/locale.category.abort.pass.cpp  | 2 ++
 .../conversions/conversions.string/ctor_move.pass.cpp           | 2 ++
 .../locales/locale/locale.types/locale.facet/facet.pass.cpp     | 2 ++
 .../locales/locale/locale.types/locale.id/id.pass.cpp           | 2 ++
 .../test/libcxx/localization/locales/use_facet.abort.pass.cpp   | 2 ++
 libcxx/test/libcxx/localization/version.pass.cpp                | 2 ++
 libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp     | 2 ++
 libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp       | 2 ++
 libcxx/test/libcxx/numerics/c.math/ctgmath.pass.cpp             | 2 ++
 libcxx/test/libcxx/numerics/c.math/tgmath_h.pass.cpp            | 2 ++
 libcxx/test/libcxx/numerics/c.math/version_cmath.pass.cpp       | 2 ++
 libcxx/test/libcxx/numerics/cfenv/version.pass.cpp              | 2 ++
 libcxx/test/libcxx/numerics/complex.number/__sqr.pass.cpp       | 2 ++
 .../libcxx/numerics/complex.number/ccmplx/ccomplex.pass.cpp     | 2 ++
 libcxx/test/libcxx/numerics/complex.number/version.pass.cpp     | 2 ++
 libcxx/test/libcxx/numerics/numarray/version.pass.cpp           | 2 ++
 libcxx/test/libcxx/numerics/numeric.ops/version.pass.cpp        | 2 ++
 libcxx/test/libcxx/numerics/rand/rand.synopsis/version.pass.cpp | 2 ++
 .../basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp | 2 ++
 .../basic.string/string.modifiers/insert_iter_char_db1.pass.cpp | 2 ++
 .../string.modifiers/insert_iter_size_char_db1.pass.cpp         | 2 ++
 libcxx/test/libcxx/strings/c.strings/version_cctype.pass.cpp    | 2 ++
 libcxx/test/libcxx/strings/c.strings/version_cstring.pass.cpp   | 2 ++
 libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp    | 2 ++
 libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp    | 2 ++
 libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp   | 2 ++
 libcxx/test/libcxx/strings/version.pass.cpp                     | 2 ++
 .../thread/futures/futures.promise/set_exception.pass.cpp       | 1 +
 .../futures.promise/set_exception_at_thread_exit.pass.cpp       | 1 +
 libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp   | 2 ++
 libcxx/test/libcxx/thread/futures/version.pass.cpp              | 2 ++
 .../PR30202_notify_from_pthread_created_thread.pass.cpp         | 2 ++
 .../thread.condition.condvar/native_handle.pass.cpp             | 2 ++
 libcxx/test/libcxx/thread/thread.condition/version.pass.cpp     | 2 ++
 .../thread.mutex.class/native_handle.pass.cpp                   | 2 ++
 .../thread.mutex.recursive/native_handle.pass.cpp               | 2 ++
 .../thread.mutex/thread_safety_annotations_not_enabled.pass.cpp | 2 ++
 .../thread/thread.mutex/thread_safety_lock_unlock.pass.cpp      | 2 ++
 .../thread.mutex/thread_safety_requires_capability.pass.cpp     | 2 ++
 libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp         | 2 ++
 .../thread.thread.member/native_handle.pass.cpp                 | 2 ++
 .../thread/thread.threads/thread.thread.class/types.pass.cpp    | 2 ++
 .../thread/thread.threads/thread.thread.this/sleep_for.pass.cpp | 2 ++
 libcxx/test/libcxx/thread/thread.threads/version.pass.cpp       | 2 ++
 libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp     | 2 ++
 libcxx/test/libcxx/type_traits/lazy_metafunctions.pass.cpp      | 2 ++
 libcxx/test/libcxx/utilities/any/size_and_alignment.pass.cpp    | 2 ++
 libcxx/test/libcxx/utilities/any/small_type.pass.cpp            | 1 +
 libcxx/test/libcxx/utilities/any/version.pass.cpp               | 2 ++
 .../libcxx/utilities/function.objects/refwrap/binary.pass.cpp   | 2 ++
 .../libcxx/utilities/function.objects/refwrap/unary.pass.cpp    | 2 ++
 libcxx/test/libcxx/utilities/function.objects/version.pass.cpp  | 2 ++
 .../util.dynamic.safety/get_pointer_safety_new_abi.pass.cpp     | 2 ++
 .../utilities/memory/util.smartptr/race_condition.pass.cpp      | 2 ++
 libcxx/test/libcxx/utilities/memory/version.pass.cpp            | 2 ++
 .../meta.unary.prop/__has_operator_addressof.pass.cpp           | 2 ++
 libcxx/test/libcxx/utilities/meta/version.pass.cpp              | 2 ++
 .../optional.object/optional.object.assign/copy.pass.cpp        | 2 ++
 .../optional.object/optional.object.assign/move.pass.cpp        | 2 ++
 .../optional/optional.object/optional.object.ctor/copy.pass.cpp | 2 ++
 .../optional/optional.object/optional.object.ctor/move.pass.cpp | 2 ++
 .../utilities/optional/optional.object/triviality.abi.pass.cpp  | 2 ++
 libcxx/test/libcxx/utilities/optional/version.pass.cpp          | 2 ++
 libcxx/test/libcxx/utilities/ratio/version.pass.cpp             | 2 ++
 libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp  | 2 ++
 libcxx/test/libcxx/utilities/template.bitset/version.pass.cpp   | 2 ++
 libcxx/test/libcxx/utilities/time/version.pass.cpp              | 2 ++
 .../libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp    | 2 ++
 libcxx/test/libcxx/utilities/tuple/version.pass.cpp             | 2 ++
 libcxx/test/libcxx/utilities/type.index/version.pass.cpp        | 2 ++
 .../test/libcxx/utilities/utility/__is_inplace_index.pass.cpp   | 2 ++
 libcxx/test/libcxx/utilities/utility/__is_inplace_type.pass.cpp | 2 ++
 .../test/libcxx/utilities/utility/pairs/pairs.pair/U_V.pass.cpp | 2 ++
 .../utility/pairs/pairs.pair/assign_tuple_like.pass.cpp         | 2 ++
 .../utility/pairs/pairs.pair/const_first_const_second.pass.cpp  | 2 ++
 .../utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp  | 2 ++
 .../libcxx/utilities/utility/pairs/pairs.pair/default.pass.cpp  | 2 ++
 .../utilities/utility/pairs/pairs.pair/piecewise.pass.cpp       | 2 ++
 .../utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp     | 2 ++
 libcxx/test/libcxx/utilities/utility/version.pass.cpp           | 2 ++
 .../utilities/variant/variant.variant/variant_size.pass.cpp     | 2 ++
 libcxx/test/libcxx/utilities/variant/version.pass.cpp           | 2 ++
 .../alg.modifying.operations/alg.partitions/partition.pass.cpp  | 1 +
 .../alg.random.sample/sample.stable.pass.cpp                    | 1 +
 .../alg.modifying.operations/alg.reverse/reverse.pass.cpp       | 1 +
 .../alg.modifying.operations/alg.swap/iter_swap.pass.cpp        | 2 ++
 .../std/algorithms/alg.sorting/alg.clamp/clamp.comp.pass.cpp    | 2 ++
 libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.pass.cpp | 2 ++
 .../alg.heap.operations/make.heap/make_heap.pass.cpp            | 2 ++
 .../alg.sorting/alg.heap.operations/pop.heap/pop_heap.pass.cpp  | 2 ++
 .../alg.heap.operations/push.heap/push_heap.pass.cpp            | 2 ++
 .../alg.heap.operations/sort.heap/sort_heap.pass.cpp            | 2 ++
 .../std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp | 1 +
 .../std/algorithms/alg.sorting/alg.min.max/max_element.pass.cpp | 1 +
 .../std/algorithms/alg.sorting/alg.min.max/min_element.pass.cpp | 1 +
 .../algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp  | 1 +
 .../algorithms/alg.sorting/alg.nth.element/nth_element.pass.cpp | 2 ++
 .../alg.permutation.generators/next_permutation.pass.cpp        | 1 +
 .../alg.permutation.generators/next_permutation_comp.pass.cpp   | 1 +
 .../alg.permutation.generators/prev_permutation.pass.cpp        | 1 +
 .../alg.permutation.generators/prev_permutation_comp.pass.cpp   | 1 +
 .../alg.set.operations/set.difference/set_difference.pass.cpp   | 1 +
 .../set.difference/set_difference_comp.pass.cpp                 | 1 +
 .../set.symmetric.difference/set_symmetric_difference.pass.cpp  | 1 +
 .../set_symmetric_difference_comp.pass.cpp                      | 1 +
 .../alg.sorting/alg.set.operations/set.union/set_union.pass.cpp | 1 +
 .../alg.set.operations/set.union/set_union_comp.pass.cpp        | 1 +
 .../alg.set.operations/set.union/set_union_move.pass.cpp        | 1 +
 .../alg.sorting/alg.sort/is.sorted/is_sorted.pass.cpp           | 1 +
 .../alg.sorting/alg.sort/is.sorted/is_sorted_comp.pass.cpp      | 1 +
 .../alg.sorting/alg.sort/is.sorted/is_sorted_until.pass.cpp     | 1 +
 .../alg.sort/is.sorted/is_sorted_until_comp.pass.cpp            | 1 +
 .../alg.sort/partial.sort.copy/partial_sort_copy.pass.cpp       | 1 +
 .../alg.sort/partial.sort.copy/partial_sort_copy_comp.pass.cpp  | 1 +
 .../alg.sorting/alg.sort/partial.sort/partial_sort.pass.cpp     | 2 ++
 .../test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp | 2 ++
 .../alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp       | 2 ++
 .../std/atomics/atomics.fences/atomic_signal_fence.pass.cpp     | 2 ++
 .../std/atomics/atomics.fences/atomic_thread_fence.pass.cpp     | 2 ++
 libcxx/test/std/atomics/atomics.flag/atomic_flag_clear.pass.cpp | 2 ++
 .../atomics/atomics.flag/atomic_flag_clear_explicit.pass.cpp    | 2 ++
 .../std/atomics/atomics.flag/atomic_flag_test_and_set.pass.cpp  | 2 ++
 .../atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp     | 2 ++
 libcxx/test/std/atomics/atomics.flag/clear.pass.cpp             | 2 ++
 libcxx/test/std/atomics/atomics.flag/init.pass.cpp              | 2 ++
 libcxx/test/std/atomics/atomics.flag/test_and_set.pass.cpp      | 2 ++
 .../std/atomics/atomics.general/replace_failure_order.pass.cpp  | 2 ++
 .../test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp | 2 ++
 libcxx/test/std/atomics/atomics.lockfree/lockfree.pass.cpp      | 2 ++
 libcxx/test/std/atomics/atomics.order/kill_dependency.pass.cpp  | 2 ++
 libcxx/test/std/atomics/atomics.order/memory_order.pass.cpp     | 2 ++
 libcxx/test/std/atomics/atomics.order/memory_order_new.pass.cpp | 2 ++
 .../std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp | 2 ++
 .../atomics/atomics.types.generic/integral_typedefs.pass.cpp    | 2 ++
 .../atomics/atomics.types.generic/trivially_copyable.pass.cpp   | 2 ++
 .../atomic_compare_exchange_strong.pass.cpp                     | 1 +
 .../atomic_compare_exchange_strong_explicit.pass.cpp            | 1 +
 .../atomic_compare_exchange_weak.pass.cpp                       | 1 +
 .../atomic_compare_exchange_weak_explicit.pass.cpp              | 1 +
 .../atomics.types.operations.req/atomic_exchange.pass.cpp       | 1 +
 .../atomic_exchange_explicit.pass.cpp                           | 1 +
 .../atomics.types.operations.req/atomic_fetch_add.pass.cpp      | 1 +
 .../atomic_fetch_add_explicit.pass.cpp                          | 1 +
 .../atomics.types.operations.req/atomic_fetch_and.pass.cpp      | 1 +
 .../atomic_fetch_and_explicit.pass.cpp                          | 1 +
 .../atomics.types.operations.req/atomic_fetch_or.pass.cpp       | 1 +
 .../atomic_fetch_or_explicit.pass.cpp                           | 1 +
 .../atomics.types.operations.req/atomic_fetch_sub.pass.cpp      | 1 +
 .../atomic_fetch_sub_explicit.pass.cpp                          | 1 +
 .../atomics.types.operations.req/atomic_fetch_xor.pass.cpp      | 1 +
 .../atomic_fetch_xor_explicit.pass.cpp                          | 1 +
 .../atomics.types.operations.req/atomic_init.pass.cpp           | 1 +
 .../atomics.types.operations.req/atomic_is_lock_free.pass.cpp   | 1 +
 .../atomics.types.operations.req/atomic_load.pass.cpp           | 1 +
 .../atomics.types.operations.req/atomic_load_explicit.pass.cpp  | 1 +
 .../atomics.types.operations.req/atomic_store.pass.cpp          | 1 +
 .../atomics.types.operations.req/atomic_store_explicit.pass.cpp | 1 +
 .../atomics.types.operations.req/atomic_var_init.pass.cpp       | 2 ++
 .../atomics.types.operations.req/ctor.pass.cpp                  | 1 +
 libcxx/test/std/containers/associative/map/compare.pass.cpp     | 2 ++
 .../test/std/containers/associative/map/gcc_workaround.pass.cpp | 2 ++
 .../std/containers/associative/map/incomplete_type.pass.cpp     | 2 ++
 .../std/containers/associative/map/map.access/empty.pass.cpp    | 1 +
 .../containers/associative/map/map.access/index_tuple.pass.cpp  | 2 ++
 .../std/containers/associative/map/map.access/size.pass.cpp     | 1 +
 .../test/std/containers/associative/map/map.cons/alloc.pass.cpp | 1 +
 .../associative/map/map.cons/assign_initializer_list.pass.cpp   | 1 +
 .../std/containers/associative/map/map.cons/compare.pass.cpp    | 1 +
 .../containers/associative/map/map.cons/compare_alloc.pass.cpp  | 1 +
 .../std/containers/associative/map/map.cons/copy_alloc.pass.cpp | 1 +
 .../containers/associative/map/map.cons/copy_assign.pass.cpp    | 1 +
 .../std/containers/associative/map/map.cons/default.pass.cpp    | 1 +
 .../associative/map/map.cons/default_recursive.pass.cpp         | 2 ++
 .../associative/map/map.cons/initializer_list.pass.cpp          | 1 +
 .../associative/map/map.cons/initializer_list_compare.pass.cpp  | 1 +
 .../map/map.cons/initializer_list_compare_alloc.pass.cpp        | 1 +
 .../std/containers/associative/map/map.cons/iter_iter.pass.cpp  | 1 +
 .../containers/associative/map/map.cons/iter_iter_comp.pass.cpp | 1 +
 .../test/std/containers/associative/map/map.cons/move.pass.cpp  | 1 +
 .../containers/associative/map/map.cons/move_assign.pass.cpp    | 1 +
 .../containers/associative/map/map.modifiers/emplace.pass.cpp   | 1 +
 .../associative/map/map.modifiers/emplace_hint.pass.cpp         | 1 +
 .../associative/map/map.modifiers/erase_iter.pass.cpp           | 1 +
 .../associative/map/map.modifiers/erase_iter_iter.pass.cpp      | 1 +
 .../containers/associative/map/map.modifiers/erase_key.pass.cpp | 1 +
 .../associative/map/map.modifiers/extract_iterator.pass.cpp     | 1 +
 .../associative/map/map.modifiers/extract_key.pass.cpp          | 1 +
 .../insert_and_emplace_allocator_requirements.pass.cpp          | 1 +
 .../map/map.modifiers/insert_initializer_list.pass.cpp          | 1 +
 .../associative/map/map.modifiers/insert_iter_iter.pass.cpp     | 1 +
 .../associative/map/map.modifiers/insert_node_type.pass.cpp     | 1 +
 .../map/map.modifiers/insert_node_type_hint.pass.cpp            | 1 +
 .../associative/map/map.modifiers/insert_or_assign.pass.cpp     | 2 ++
 .../associative/map/map.modifiers/try.emplace.pass.cpp          | 2 ++
 .../test/std/containers/associative/map/map.ops/count0.pass.cpp | 1 +
 .../containers/associative/map/map.ops/equal_range0.pass.cpp    | 1 +
 .../test/std/containers/associative/map/map.ops/find0.pass.cpp  | 1 +
 .../containers/associative/map/map.ops/lower_bound0.pass.cpp    | 1 +
 .../containers/associative/map/map.ops/upper_bound0.pass.cpp    | 1 +
 .../containers/associative/map/map.special/member_swap.pass.cpp | 1 +
 .../associative/map/map.special/non_member_swap.pass.cpp        | 1 +
 libcxx/test/std/containers/associative/map/types.pass.cpp       | 1 +
 libcxx/test/std/containers/associative/multimap/empty.pass.cpp  | 1 +
 .../containers/associative/multimap/incomplete_type.pass.cpp    | 2 ++
 .../associative/multimap/multimap.cons/alloc.pass.cpp           | 1 +
 .../multimap/multimap.cons/assign_initializer_list.pass.cpp     | 1 +
 .../associative/multimap/multimap.cons/compare.pass.cpp         | 1 +
 .../associative/multimap/multimap.cons/compare_alloc.pass.cpp   | 1 +
 .../associative/multimap/multimap.cons/copy_alloc.pass.cpp      | 1 +
 .../associative/multimap/multimap.cons/copy_assign.pass.cpp     | 1 +
 .../associative/multimap/multimap.cons/default.pass.cpp         | 1 +
 .../multimap/multimap.cons/default_recursive.pass.cpp           | 2 ++
 .../multimap/multimap.cons/initializer_list.pass.cpp            | 1 +
 .../multimap/multimap.cons/initializer_list_compare.pass.cpp    | 1 +
 .../multimap.cons/initializer_list_compare_alloc.pass.cpp       | 1 +
 .../associative/multimap/multimap.cons/iter_iter_comp.pass.cpp  | 1 +
 .../multimap/multimap.cons/iter_iter_comp_alloc.pass.cpp        | 1 +
 .../containers/associative/multimap/multimap.cons/move.pass.cpp | 1 +
 .../associative/multimap/multimap.cons/move_assign.pass.cpp     | 1 +
 .../associative/multimap/multimap.modifiers/emplace.pass.cpp    | 1 +
 .../multimap/multimap.modifiers/emplace_hint.pass.cpp           | 1 +
 .../associative/multimap/multimap.modifiers/erase_iter.pass.cpp | 1 +
 .../multimap/multimap.modifiers/erase_iter_iter.pass.cpp        | 1 +
 .../associative/multimap/multimap.modifiers/erase_key.pass.cpp  | 1 +
 .../multimap/multimap.modifiers/extract_iterator.pass.cpp       | 1 +
 .../multimap/multimap.modifiers/extract_key.pass.cpp            | 1 +
 .../multimap.modifiers/insert_allocator_requirements.pass.cpp   | 1 +
 .../multimap.modifiers/insert_initializer_list.pass.cpp         | 1 +
 .../multimap/multimap.modifiers/insert_iter_iter.pass.cpp       | 1 +
 .../multimap/multimap.modifiers/insert_node_type.pass.cpp       | 1 +
 .../multimap/multimap.modifiers/insert_node_type_hint.pass.cpp  | 1 +
 .../associative/multimap/multimap.ops/count0.pass.cpp           | 1 +
 .../associative/multimap/multimap.ops/equal_range0.pass.cpp     | 1 +
 .../containers/associative/multimap/multimap.ops/find0.pass.cpp | 1 +
 .../associative/multimap/multimap.ops/lower_bound0.pass.cpp     | 1 +
 .../associative/multimap/multimap.ops/upper_bound0.pass.cpp     | 1 +
 .../associative/multimap/multimap.special/member_swap.pass.cpp  | 1 +
 .../multimap/multimap.special/non_member_swap.pass.cpp          | 1 +
 libcxx/test/std/containers/associative/multimap/scary.pass.cpp  | 2 ++
 libcxx/test/std/containers/associative/multimap/size.pass.cpp   | 1 +
 libcxx/test/std/containers/associative/multimap/types.pass.cpp  | 1 +
 .../test/std/containers/associative/multiset/emplace.pass.cpp   | 1 +
 .../std/containers/associative/multiset/emplace_hint.pass.cpp   | 1 +
 libcxx/test/std/containers/associative/multiset/empty.pass.cpp  | 1 +
 .../std/containers/associative/multiset/erase_iter.pass.cpp     | 1 +
 .../containers/associative/multiset/erase_iter_iter.pass.cpp    | 1 +
 .../test/std/containers/associative/multiset/erase_key.pass.cpp | 1 +
 .../containers/associative/multiset/extract_iterator.pass.cpp   | 1 +
 .../std/containers/associative/multiset/extract_key.pass.cpp    | 1 +
 .../containers/associative/multiset/incomplete_type.pass.cpp    | 2 ++
 .../test/std/containers/associative/multiset/insert_cv.pass.cpp | 1 +
 .../multiset/insert_emplace_allocator_requirements.pass.cpp     | 1 +
 .../associative/multiset/insert_initializer_list.pass.cpp       | 1 +
 .../std/containers/associative/multiset/insert_iter_cv.pass.cpp | 1 +
 .../containers/associative/multiset/insert_iter_iter.pass.cpp   | 1 +
 .../std/containers/associative/multiset/insert_iter_rv.pass.cpp | 1 +
 .../containers/associative/multiset/insert_node_type.pass.cpp   | 1 +
 .../associative/multiset/insert_node_type_hint.pass.cpp         | 1 +
 .../test/std/containers/associative/multiset/insert_rv.pass.cpp | 1 +
 .../associative/multiset/multiset.cons/alloc.pass.cpp           | 1 +
 .../multiset/multiset.cons/assign_initializer_list.pass.cpp     | 1 +
 .../associative/multiset/multiset.cons/compare.pass.cpp         | 1 +
 .../associative/multiset/multiset.cons/compare_alloc.pass.cpp   | 1 +
 .../associative/multiset/multiset.cons/copy_alloc.pass.cpp      | 1 +
 .../associative/multiset/multiset.cons/copy_assign.pass.cpp     | 1 +
 .../associative/multiset/multiset.cons/default.pass.cpp         | 1 +
 .../multiset/multiset.cons/initializer_list_compare.pass.cpp    | 1 +
 .../multiset.cons/initializer_list_compare_alloc.pass.cpp       | 1 +
 .../associative/multiset/multiset.cons/iter_iter.pass.cpp       | 1 +
 .../associative/multiset/multiset.cons/iter_iter_comp.pass.cpp  | 1 +
 .../containers/associative/multiset/multiset.cons/move.pass.cpp | 1 +
 .../associative/multiset/multiset.cons/move_assign.pass.cpp     | 1 +
 .../associative/multiset/multiset.special/member_swap.pass.cpp  | 1 +
 .../multiset/multiset.special/non_member_swap.pass.cpp          | 1 +
 libcxx/test/std/containers/associative/multiset/scary.pass.cpp  | 2 ++
 libcxx/test/std/containers/associative/multiset/size.pass.cpp   | 1 +
 libcxx/test/std/containers/associative/multiset/types.pass.cpp  | 1 +
 libcxx/test/std/containers/associative/set/emplace.pass.cpp     | 1 +
 .../test/std/containers/associative/set/emplace_hint.pass.cpp   | 1 +
 libcxx/test/std/containers/associative/set/empty.pass.cpp       | 1 +
 libcxx/test/std/containers/associative/set/erase_iter.pass.cpp  | 1 +
 .../std/containers/associative/set/erase_iter_iter.pass.cpp     | 1 +
 libcxx/test/std/containers/associative/set/erase_key.pass.cpp   | 1 +
 .../std/containers/associative/set/extract_iterator.pass.cpp    | 1 +
 libcxx/test/std/containers/associative/set/extract_key.pass.cpp | 1 +
 .../test/std/containers/associative/set/gcc_workaround.pass.cpp | 2 ++
 .../std/containers/associative/set/incomplete_type.pass.cpp     | 2 ++
 .../set/insert_and_emplace_allocator_requirements.pass.cpp      | 1 +
 libcxx/test/std/containers/associative/set/insert_cv.pass.cpp   | 1 +
 .../containers/associative/set/insert_initializer_list.pass.cpp | 1 +
 .../test/std/containers/associative/set/insert_iter_cv.pass.cpp | 1 +
 .../std/containers/associative/set/insert_iter_iter.pass.cpp    | 1 +
 .../test/std/containers/associative/set/insert_iter_rv.pass.cpp | 1 +
 .../std/containers/associative/set/insert_node_type.pass.cpp    | 1 +
 .../containers/associative/set/insert_node_type_hint.pass.cpp   | 1 +
 libcxx/test/std/containers/associative/set/insert_rv.pass.cpp   | 1 +
 .../test/std/containers/associative/set/set.cons/alloc.pass.cpp | 1 +
 .../associative/set/set.cons/assign_initializer_list.pass.cpp   | 1 +
 .../std/containers/associative/set/set.cons/compare.pass.cpp    | 1 +
 .../containers/associative/set/set.cons/compare_alloc.pass.cpp  | 1 +
 .../std/containers/associative/set/set.cons/copy_alloc.pass.cpp | 1 +
 .../containers/associative/set/set.cons/copy_assign.pass.cpp    | 1 +
 .../std/containers/associative/set/set.cons/default.pass.cpp    | 1 +
 .../associative/set/set.cons/initializer_list.pass.cpp          | 1 +
 .../associative/set/set.cons/initializer_list_compare.pass.cpp  | 1 +
 .../std/containers/associative/set/set.cons/iter_iter.pass.cpp  | 1 +
 .../containers/associative/set/set.cons/iter_iter_comp.pass.cpp | 1 +
 .../test/std/containers/associative/set/set.cons/move.pass.cpp  | 1 +
 .../containers/associative/set/set.cons/move_assign.pass.cpp    | 1 +
 .../containers/associative/set/set.special/member_swap.pass.cpp | 1 +
 .../associative/set/set.special/non_member_swap.pass.cpp        | 1 +
 libcxx/test/std/containers/associative/set/size.pass.cpp        | 1 +
 libcxx/test/std/containers/associative/set/types.pass.cpp       | 1 +
 .../priority.queue/priqueue.cons.alloc/ctor_copy_alloc.pass.cpp | 1 +
 .../priority.queue/priqueue.cons.alloc/ctor_move_alloc.pass.cpp | 1 +
 .../priority.queue/priqueue.cons/assign_copy.pass.cpp           | 2 ++
 .../priority.queue/priqueue.cons/assign_move.pass.cpp           | 1 +
 .../priority.queue/priqueue.cons/ctor_comp.pass.cpp             | 1 +
 .../priority.queue/priqueue.cons/ctor_comp_container.pass.cpp   | 2 ++
 .../priority.queue/priqueue.cons/ctor_comp_rcontainer.pass.cpp  | 1 +
 .../priority.queue/priqueue.cons/ctor_copy.pass.cpp             | 2 ++
 .../priority.queue/priqueue.cons/ctor_default.pass.cpp          | 1 +
 .../priority.queue/priqueue.cons/ctor_iter_iter.pass.cpp        | 2 ++
 .../priority.queue/priqueue.cons/ctor_iter_iter_comp.pass.cpp   | 2 ++
 .../priqueue.cons/ctor_iter_iter_comp_cont.pass.cpp             | 2 ++
 .../priqueue.cons/ctor_iter_iter_comp_rcont.pass.cpp            | 1 +
 .../priority.queue/priqueue.cons/ctor_move.pass.cpp             | 1 +
 .../priority.queue/priqueue.cons/dtor_noexcept.pass.cpp         | 1 +
 .../priority.queue/priqueue.cons/move_assign_noexcept.pass.cpp  | 1 +
 .../priority.queue/priqueue.cons/move_noexcept.pass.cpp         | 1 +
 .../priority.queue/priqueue.members/emplace.pass.cpp            | 1 +
 .../priority.queue/priqueue.members/empty.pass.cpp              | 2 ++
 .../priority.queue/priqueue.members/pop.pass.cpp                | 2 ++
 .../priority.queue/priqueue.members/push.pass.cpp               | 2 ++
 .../priority.queue/priqueue.members/push_rvalue.pass.cpp        | 1 +
 .../priority.queue/priqueue.members/size.pass.cpp               | 2 ++
 .../priority.queue/priqueue.members/swap.pass.cpp               | 2 ++
 .../priority.queue/priqueue.members/top.pass.cpp                | 2 ++
 .../priority.queue/priqueue.special/swap.pass.cpp               | 2 ++
 .../priority.queue/priqueue.special/swap_noexcept.pass.cpp      | 1 +
 .../containers/container.adaptors/priority.queue/types.pass.cpp | 2 ++
 .../queue/queue.cons.alloc/ctor_queue_alloc.pass.cpp            | 1 +
 .../queue/queue.cons.alloc/ctor_rcontainer_alloc.pass.cpp       | 1 +
 .../queue/queue.cons.alloc/ctor_rqueue_alloc.pass.cpp           | 1 +
 .../container.adaptors/queue/queue.cons/ctor_container.pass.cpp | 2 ++
 .../container.adaptors/queue/queue.cons/ctor_copy.pass.cpp      | 2 ++
 .../container.adaptors/queue/queue.cons/ctor_default.pass.cpp   | 1 +
 .../container.adaptors/queue/queue.cons/ctor_move.pass.cpp      | 1 +
 .../queue/queue.cons/ctor_rcontainer.pass.cpp                   | 1 +
 .../container.adaptors/queue/queue.cons/dtor_noexcept.pass.cpp  | 1 +
 .../queue/queue.cons/move_assign_noexcept.pass.cpp              | 1 +
 .../container.adaptors/queue/queue.defn/assign_copy.pass.cpp    | 2 ++
 .../container.adaptors/queue/queue.defn/assign_move.pass.cpp    | 1 +
 .../container.adaptors/queue/queue.defn/back.pass.cpp           | 2 ++
 .../container.adaptors/queue/queue.defn/back_const.pass.cpp     | 2 ++
 .../container.adaptors/queue/queue.defn/empty.pass.cpp          | 2 ++
 .../container.adaptors/queue/queue.defn/front.pass.cpp          | 2 ++
 .../container.adaptors/queue/queue.defn/front_const.pass.cpp    | 2 ++
 .../containers/container.adaptors/queue/queue.defn/pop.pass.cpp | 2 ++
 .../container.adaptors/queue/queue.defn/push.pass.cpp           | 2 ++
 .../container.adaptors/queue/queue.defn/push_rv.pass.cpp        | 1 +
 .../container.adaptors/queue/queue.defn/size.pass.cpp           | 2 ++
 .../container.adaptors/queue/queue.defn/swap.pass.cpp           | 2 ++
 .../container.adaptors/queue/queue.defn/types.pass.cpp          | 2 ++
 .../containers/container.adaptors/queue/queue.ops/eq.pass.cpp   | 2 ++
 .../containers/container.adaptors/queue/queue.ops/lt.pass.cpp   | 2 ++
 .../container.adaptors/queue/queue.special/swap.pass.cpp        | 2 ++
 .../queue/queue.special/swap_noexcept.pass.cpp                  | 1 +
 .../stack/stack.cons.alloc/ctor_copy_alloc.pass.cpp             | 1 +
 .../stack/stack.cons.alloc/ctor_rcontainer_alloc.pass.cpp       | 1 +
 .../stack/stack.cons.alloc/ctor_rqueue_alloc.pass.cpp           | 1 +
 .../container.adaptors/stack/stack.cons/ctor_container.pass.cpp | 2 ++
 .../container.adaptors/stack/stack.cons/ctor_copy.pass.cpp      | 2 ++
 .../container.adaptors/stack/stack.cons/ctor_default.pass.cpp   | 1 +
 .../container.adaptors/stack/stack.cons/ctor_move.pass.cpp      | 1 +
 .../stack/stack.cons/ctor_rcontainer.pass.cpp                   | 1 +
 .../container.adaptors/stack/stack.cons/dtor_noexcept.pass.cpp  | 1 +
 .../stack/stack.cons/move_assign_noexcept.pass.cpp              | 1 +
 .../container.adaptors/stack/stack.defn/assign_copy.pass.cpp    | 2 ++
 .../container.adaptors/stack/stack.defn/assign_move.pass.cpp    | 1 +
 .../container.adaptors/stack/stack.defn/empty.pass.cpp          | 2 ++
 .../containers/container.adaptors/stack/stack.defn/pop.pass.cpp | 2 ++
 .../container.adaptors/stack/stack.defn/push.pass.cpp           | 2 ++
 .../container.adaptors/stack/stack.defn/push_rv.pass.cpp        | 1 +
 .../container.adaptors/stack/stack.defn/size.pass.cpp           | 2 ++
 .../container.adaptors/stack/stack.defn/swap.pass.cpp           | 2 ++
 .../containers/container.adaptors/stack/stack.defn/top.pass.cpp | 2 ++
 .../container.adaptors/stack/stack.defn/top_const.pass.cpp      | 2 ++
 .../container.adaptors/stack/stack.defn/types.pass.cpp          | 2 ++
 .../containers/container.adaptors/stack/stack.ops/eq.pass.cpp   | 2 ++
 .../containers/container.adaptors/stack/stack.ops/lt.pass.cpp   | 2 ++
 .../container.adaptors/stack/stack.special/swap.pass.cpp        | 2 ++
 .../stack/stack.special/swap_noexcept.pass.cpp                  | 1 +
 libcxx/test/std/containers/container.node/node_handle.pass.cpp  | 1 +
 .../std/containers/sequences/array/array.cons/default.pass.cpp  | 1 +
 .../sequences/array/array.cons/initializer_list.pass.cpp        | 1 +
 .../std/containers/sequences/array/array.fill/fill.pass.cpp     | 1 +
 .../std/containers/sequences/array/array.tuple/get_rv.pass.cpp  | 1 +
 .../sequences/array/array.tuple/tuple_element.pass.cpp          | 2 ++
 .../containers/sequences/array/array.tuple/tuple_size.pass.cpp  | 2 ++
 .../sequences/array/array.zero/tested_elsewhere.pass.cpp        | 2 ++
 libcxx/test/std/containers/sequences/array/contiguous.pass.cpp  | 2 ++
 .../std/containers/sequences/deque/deque.cons/alloc.pass.cpp    | 1 +
 .../sequences/deque/deque.cons/assign_initializer_list.pass.cpp | 1 +
 .../containers/sequences/deque/deque.cons/copy_alloc.pass.cpp   | 1 +
 .../std/containers/sequences/deque/deque.cons/default.pass.cpp  | 1 +
 .../sequences/deque/deque.cons/initializer_list.pass.cpp        | 1 +
 .../sequences/deque/deque.cons/initializer_list_alloc.pass.cpp  | 1 +
 .../std/containers/sequences/deque/deque.cons/move.pass.cpp     | 1 +
 .../containers/sequences/deque/deque.cons/move_assign.pass.cpp  | 1 +
 .../std/containers/sequences/deque/deque.cons/op_equal.pass.cpp | 1 +
 .../deque/deque.cons/op_equal_initializer_list.pass.cpp         | 1 +
 .../containers/sequences/deque/deque.cons/size_value.pass.cpp   | 1 +
 .../sequences/deque/deque.cons/size_value_alloc.pass.cpp        | 1 +
 .../containers/sequences/deque/deque.modifiers/emplace.pass.cpp | 1 +
 .../deque/deque.modifiers/erase_iter.invalidation.pass.cpp      | 2 ++
 .../deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp | 2 ++
 .../deque/deque.modifiers/insert_iter_initializer_list.pass.cpp | 1 +
 .../sequences/deque/deque.modifiers/insert_rvalue.pass.cpp      | 1 +
 .../deque/deque.modifiers/pop_back.invalidation.pass.cpp        | 2 ++
 .../sequences/deque/deque.modifiers/pop_back.pass.cpp           | 1 +
 .../deque/deque.modifiers/pop_front.invalidation.pass.cpp       | 2 ++
 .../sequences/deque/deque.modifiers/pop_front.pass.cpp          | 1 +
 .../sequences/deque/deque.modifiers/push_back.pass.cpp          | 1 +
 .../deque/deque.modifiers/push_back_exception_safety.pass.cpp   | 1 +
 .../sequences/deque/deque.modifiers/push_back_rvalue.pass.cpp   | 1 +
 .../sequences/deque/deque.modifiers/push_front.pass.cpp         | 1 +
 .../deque/deque.modifiers/push_front_exception_safety.pass.cpp  | 1 +
 .../sequences/deque/deque.modifiers/push_front_rvalue.pass.cpp  | 1 +
 .../std/containers/sequences/deque/deque.special/copy.pass.cpp  | 1 +
 .../sequences/deque/deque.special/copy_backward.pass.cpp        | 1 +
 .../std/containers/sequences/deque/deque.special/move.pass.cpp  | 1 +
 .../sequences/deque/deque.special/move_backward.pass.cpp        | 1 +
 .../std/containers/sequences/deque/deque.special/swap.pass.cpp  | 1 +
 libcxx/test/std/containers/sequences/deque/types.pass.cpp       | 1 +
 .../sequences/forwardlist/forwardlist.access/front.pass.cpp     | 1 +
 .../sequences/forwardlist/forwardlist.cons/alloc.pass.cpp       | 1 +
 .../sequences/forwardlist/forwardlist.cons/assign_copy.pass.cpp | 1 +
 .../sequences/forwardlist/forwardlist.cons/assign_init.pass.cpp | 1 +
 .../sequences/forwardlist/forwardlist.cons/assign_move.pass.cpp | 1 +
 .../forwardlist/forwardlist.cons/assign_op_init.pass.cpp        | 1 +
 .../forwardlist/forwardlist.cons/assign_range.pass.cpp          | 1 +
 .../forwardlist/forwardlist.cons/assign_size_value.pass.cpp     | 1 +
 .../sequences/forwardlist/forwardlist.cons/copy_alloc.pass.cpp  | 1 +
 .../sequences/forwardlist/forwardlist.cons/default.pass.cpp     | 1 +
 .../forwardlist/forwardlist.cons/default_recursive.pass.cpp     | 2 ++
 .../sequences/forwardlist/forwardlist.cons/init.pass.cpp        | 1 +
 .../sequences/forwardlist/forwardlist.cons/init_alloc.pass.cpp  | 1 +
 .../sequences/forwardlist/forwardlist.cons/move.pass.cpp        | 1 +
 .../sequences/forwardlist/forwardlist.cons/move_alloc.pass.cpp  | 1 +
 .../sequences/forwardlist/forwardlist.cons/range.pass.cpp       | 1 +
 .../sequences/forwardlist/forwardlist.cons/range_alloc.pass.cpp | 1 +
 .../sequences/forwardlist/forwardlist.cons/size_value.pass.cpp  | 1 +
 .../forwardlist/forwardlist.cons/size_value_alloc.pass.cpp      | 1 +
 .../forwardlist/forwardlist.iter/before_begin.pass.cpp          | 1 +
 .../forwardlist/forwardlist.modifiers/emplace_after.pass.cpp    | 1 +
 .../forwardlist/forwardlist.modifiers/erase_after_many.pass.cpp | 1 +
 .../forwardlist/forwardlist.modifiers/erase_after_one.pass.cpp  | 1 +
 .../forwardlist.modifiers/insert_after_const.pass.cpp           | 1 +
 .../forwardlist.modifiers/insert_after_init.pass.cpp            | 1 +
 .../forwardlist.modifiers/insert_after_range.pass.cpp           | 1 +
 .../forwardlist/forwardlist.modifiers/insert_after_rv.pass.cpp  | 1 +
 .../forwardlist.modifiers/insert_after_size_value.pass.cpp      | 1 +
 .../forwardlist/forwardlist.modifiers/pop_front.pass.cpp        | 1 +
 .../forwardlist/forwardlist.modifiers/push_front_const.pass.cpp | 1 +
 .../forwardlist.modifiers/push_front_exception_safety.pass.cpp  | 2 ++
 .../forwardlist/forwardlist.modifiers/push_front_rv.pass.cpp    | 1 +
 .../forwardlist/forwardlist.modifiers/resize_size.pass.cpp      | 1 +
 .../sequences/forwardlist/forwardlist.ops/merge.pass.cpp        | 1 +
 .../sequences/forwardlist/forwardlist.ops/merge_pred.pass.cpp   | 1 +
 .../sequences/forwardlist/forwardlist.ops/remove.pass.cpp       | 1 +
 .../sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp    | 1 +
 .../sequences/forwardlist/forwardlist.ops/reverse.pass.cpp      | 1 +
 .../sequences/forwardlist/forwardlist.ops/sort.pass.cpp         | 1 +
 .../sequences/forwardlist/forwardlist.ops/sort_pred.pass.cpp    | 1 +
 .../forwardlist/forwardlist.ops/splice_after_flist.pass.cpp     | 1 +
 .../forwardlist/forwardlist.ops/splice_after_range.pass.cpp     | 1 +
 .../sequences/forwardlist/forwardlist.ops/unique.pass.cpp       | 1 +
 .../sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp  | 1 +
 .../sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp | 1 +
 .../forwardlist/forwardlist.spec/non_member_swap.pass.cpp       | 1 +
 libcxx/test/std/containers/sequences/forwardlist/types.pass.cpp | 1 +
 .../test/std/containers/sequences/list/incomplete_type.pass.cpp | 2 ++
 .../sequences/list/list.capacity/resize_size.pass.cpp           | 1 +
 .../sequences/list/list.capacity/resize_size_value.pass.cpp     | 1 +
 .../containers/sequences/list/list.cons/assign_copy.pass.cpp    | 1 +
 .../sequences/list/list.cons/assign_initializer_list.pass.cpp   | 1 +
 .../containers/sequences/list/list.cons/assign_move.pass.cpp    | 1 +
 .../std/containers/sequences/list/list.cons/copy_alloc.pass.cpp | 1 +
 .../std/containers/sequences/list/list.cons/default.pass.cpp    | 1 +
 .../sequences/list/list.cons/default_stack_alloc.pass.cpp       | 1 +
 .../sequences/list/list.cons/initializer_list.pass.cpp          | 1 +
 .../sequences/list/list.cons/initializer_list_alloc.pass.cpp    | 1 +
 .../containers/sequences/list/list.cons/input_iterator.pass.cpp | 1 +
 .../test/std/containers/sequences/list/list.cons/move.pass.cpp  | 1 +
 .../std/containers/sequences/list/list.cons/move_alloc.pass.cpp | 1 +
 .../sequences/list/list.cons/op_equal_initializer_list.pass.cpp | 1 +
 .../sequences/list/list.cons/size_value_alloc.pass.cpp          | 1 +
 .../sequences/list/list.modifiers/emplace_front.pass.cpp        | 1 +
 .../sequences/list/list.modifiers/erase_iter.pass.cpp           | 1 +
 .../sequences/list/list.modifiers/erase_iter_iter.pass.cpp      | 1 +
 .../list/list.modifiers/insert_iter_initializer_list.pass.cpp   | 1 +
 .../sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp   | 1 +
 .../containers/sequences/list/list.modifiers/pop_front.pass.cpp | 1 +
 .../containers/sequences/list/list.modifiers/push_back.pass.cpp | 1 +
 .../list/list.modifiers/push_back_exception_safety.pass.cpp     | 2 ++
 .../sequences/list/list.modifiers/push_back_rvalue.pass.cpp     | 1 +
 .../sequences/list/list.modifiers/push_front.pass.cpp           | 1 +
 .../list/list.modifiers/push_front_exception_safety.pass.cpp    | 2 ++
 .../sequences/list/list.modifiers/push_front_rvalue.pass.cpp    | 1 +
 .../test/std/containers/sequences/list/list.ops/merge.pass.cpp  | 1 +
 .../std/containers/sequences/list/list.ops/merge_comp.pass.cpp  | 1 +
 .../std/containers/sequences/list/list.ops/remove_if.pass.cpp   | 1 +
 .../std/containers/sequences/list/list.ops/reverse.pass.cpp     | 1 +
 .../test/std/containers/sequences/list/list.ops/sort.pass.cpp   | 1 +
 .../std/containers/sequences/list/list.ops/sort_comp.pass.cpp   | 1 +
 .../test/std/containers/sequences/list/list.ops/unique.pass.cpp | 1 +
 .../std/containers/sequences/list/list.ops/unique_pred.pass.cpp | 1 +
 .../std/containers/sequences/list/list.special/swap.pass.cpp    | 1 +
 libcxx/test/std/containers/sequences/list/types.pass.cpp        | 1 +
 .../std/containers/sequences/vector.bool/assign_copy.pass.cpp   | 1 +
 .../sequences/vector.bool/assign_initializer_list.pass.cpp      | 1 +
 .../std/containers/sequences/vector.bool/assign_move.pass.cpp   | 1 +
 .../test/std/containers/sequences/vector.bool/capacity.pass.cpp | 1 +
 .../test/std/containers/sequences/vector.bool/emplace.pass.cpp  | 1 +
 .../std/containers/sequences/vector.bool/enabled_hash.pass.cpp  | 1 +
 .../std/containers/sequences/vector.bool/erase_iter.pass.cpp    | 1 +
 .../containers/sequences/vector.bool/erase_iter_iter.pass.cpp   | 1 +
 libcxx/test/std/containers/sequences/vector.bool/find.pass.cpp  | 2 ++
 .../containers/sequences/vector.bool/initializer_list.pass.cpp  | 1 +
 .../sequences/vector.bool/initializer_list_alloc.pass.cpp       | 1 +
 .../sequences/vector.bool/insert_iter_initializer_list.pass.cpp | 1 +
 .../sequences/vector.bool/insert_iter_size_value.pass.cpp       | 1 +
 .../containers/sequences/vector.bool/insert_iter_value.pass.cpp | 1 +
 .../std/containers/sequences/vector.bool/move_alloc.pass.cpp    | 1 +
 .../sequences/vector.bool/op_equal_initializer_list.pass.cpp    | 1 +
 .../std/containers/sequences/vector.bool/push_back.pass.cpp     | 1 +
 .../test/std/containers/sequences/vector.bool/reserve.pass.cpp  | 1 +
 .../std/containers/sequences/vector.bool/resize_size.pass.cpp   | 1 +
 .../containers/sequences/vector.bool/resize_size_value.pass.cpp | 1 +
 .../std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp | 1 +
 libcxx/test/std/containers/sequences/vector.bool/swap.pass.cpp  | 1 +
 libcxx/test/std/containers/sequences/vector.bool/types.pass.cpp | 1 +
 .../std/containers/sequences/vector.bool/vector_bool.pass.cpp   | 1 +
 libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp | 1 +
 libcxx/test/std/containers/sequences/vector/types.pass.cpp      | 1 +
 .../sequences/vector/vector.capacity/capacity.pass.cpp          | 1 +
 .../sequences/vector/vector.capacity/reserve.pass.cpp           | 1 +
 .../sequences/vector/vector.capacity/resize_size_value.pass.cpp | 1 +
 .../sequences/vector/vector.capacity/shrink_to_fit.pass.cpp     | 1 +
 .../containers/sequences/vector/vector.capacity/swap.pass.cpp   | 1 +
 .../sequences/vector/vector.cons/assign_copy.pass.cpp           | 1 +
 .../vector/vector.cons/assign_initializer_list.pass.cpp         | 1 +
 .../sequences/vector/vector.cons/assign_move.pass.cpp           | 1 +
 .../sequences/vector/vector.cons/assign_size_value.pass.cpp     | 1 +
 .../sequences/vector/vector.cons/default.recursive.pass.cpp     | 2 ++
 .../sequences/vector/vector.cons/initializer_list.pass.cpp      | 1 +
 .../vector/vector.cons/initializer_list_alloc.pass.cpp          | 1 +
 .../containers/sequences/vector/vector.cons/move_alloc.pass.cpp | 1 +
 .../sequences/vector/vector.cons/move_assign_noexcept.pass.cpp  | 1 +
 .../sequences/vector/vector.cons/move_noexcept.pass.cpp         | 1 +
 .../vector/vector.cons/op_equal_initializer_list.pass.cpp       | 1 +
 .../std/containers/sequences/vector/vector.data/data.pass.cpp   | 1 +
 .../containers/sequences/vector/vector.data/data_const.pass.cpp | 1 +
 .../sequences/vector/vector.modifiers/emplace_extra.pass.cpp    | 1 +
 .../sequences/vector/vector.modifiers/erase_iter.pass.cpp       | 1 +
 .../sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp  | 1 +
 .../vector.modifiers/insert_iter_initializer_list.pass.cpp      | 1 +
 .../sequences/vector/vector.modifiers/push_back.pass.cpp        | 1 +
 .../sequences/vector/vector.modifiers/push_back_rvalue.pass.cpp | 1 +
 .../containers/sequences/vector/vector.special/swap.pass.cpp    | 1 +
 libcxx/test/std/containers/unord/unord.map/compare.pass.cpp     | 2 ++
 libcxx/test/std/containers/unord/unord.map/count.pass.cpp       | 1 +
 libcxx/test/std/containers/unord/unord.map/eq.pass.cpp          | 1 +
 .../std/containers/unord/unord.map/equal_range_const.pass.cpp   | 1 +
 .../containers/unord/unord.map/equal_range_non_const.pass.cpp   | 1 +
 libcxx/test/std/containers/unord/unord.map/find_const.pass.cpp  | 1 +
 .../test/std/containers/unord/unord.map/find_non_const.pass.cpp | 1 +
 .../std/containers/unord/unord.map/incomplete_type.pass.cpp     | 2 ++
 .../std/containers/unord/unord.map/local_iterators.pass.cpp     | 1 +
 libcxx/test/std/containers/unord/unord.map/types.pass.cpp       | 1 +
 .../unord/unord.map/unord.map.cnstr/assign_init.pass.cpp        | 1 +
 .../unord/unord.map/unord.map.elem/index_tuple.pass.cpp         | 2 ++
 .../unord/unord.map/unord.map.modifiers/emplace.pass.cpp        | 1 +
 .../unord/unord.map/unord.map.modifiers/emplace_hint.pass.cpp   | 1 +
 .../unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp | 2 ++
 .../unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp | 2 ++
 .../unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp  | 2 ++
 .../unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp  | 2 ++
 .../unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp  | 2 ++
 .../unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp  | 2 ++
 .../unord/unord.map/unord.map.modifiers/erase_range.pass.cpp    | 1 +
 .../unord.map/unord.map.modifiers/extract_iterator.pass.cpp     | 1 +
 .../unord/unord.map/unord.map.modifiers/extract_key.pass.cpp    | 1 +
 .../insert_and_emplace_allocator_requirements.pass.cpp          | 1 +
 .../unord.map.modifiers/insert_hint_const_lvalue.pass.cpp       | 1 +
 .../unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp   | 1 +
 .../unord/unord.map/unord.map.modifiers/insert_init.pass.cpp    | 1 +
 .../unord.map/unord.map.modifiers/insert_node_type.pass.cpp     | 1 +
 .../unord.map.modifiers/insert_node_type_hint.pass.cpp          | 1 +
 .../unord.map/unord.map.modifiers/insert_or_assign.pass.cpp     | 2 ++
 .../unord/unord.map/unord.map.modifiers/insert_range.pass.cpp   | 1 +
 .../unord/unord.map/unord.map.modifiers/insert_rvalue.pass.cpp  | 1 +
 .../unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp    | 2 ++
 .../unord/unord.map/unord.map.swap/db_swap_1.pass.cpp           | 2 ++
 libcxx/test/std/containers/unord/unord.multimap/count.pass.cpp  | 1 +
 .../std/containers/unord/unord.multimap/db_iterators_7.pass.cpp | 1 +
 .../std/containers/unord/unord.multimap/db_iterators_8.pass.cpp | 1 +
 .../unord/unord.multimap/db_local_iterators_7.pass.cpp          | 1 +
 .../unord/unord.multimap/db_local_iterators_8.pass.cpp          | 1 +
 libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp     | 1 +
 .../containers/unord/unord.multimap/equal_range_const.pass.cpp  | 1 +
 .../unord/unord.multimap/equal_range_non_const.pass.cpp         | 1 +
 .../std/containers/unord/unord.multimap/find_const.pass.cpp     | 1 +
 .../std/containers/unord/unord.multimap/find_non_const.pass.cpp | 1 +
 .../std/containers/unord/unord.multimap/incomplete.pass.cpp     | 2 ++
 .../std/containers/unord/unord.multimap/load_factor.pass.cpp    | 1 +
 .../containers/unord/unord.multimap/local_iterators.pass.cpp    | 1 +
 .../containers/unord/unord.multimap/max_bucket_count.pass.cpp   | 1 +
 .../containers/unord/unord.multimap/max_load_factor.pass.cpp    | 1 +
 libcxx/test/std/containers/unord/unord.multimap/scary.pass.cpp  | 2 ++
 libcxx/test/std/containers/unord/unord.multimap/types.pass.cpp  | 1 +
 .../unord.multimap/unord.multimap.cnstr/assign_init.pass.cpp    | 1 +
 .../unord.multimap/unord.multimap.modifiers/emplace.pass.cpp    | 1 +
 .../unord.multimap.modifiers/erase_const_iter.pass.cpp          | 1 +
 .../unord.multimap.modifiers/erase_iter_db1.pass.cpp            | 2 ++
 .../unord.multimap.modifiers/erase_iter_db2.pass.cpp            | 2 ++
 .../unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp       | 2 ++
 .../unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp       | 2 ++
 .../unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp       | 2 ++
 .../unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp       | 2 ++
 .../unord.multimap/unord.multimap.modifiers/erase_key.pass.cpp  | 1 +
 .../unord.multimap.modifiers/erase_range.pass.cpp               | 1 +
 .../unord.multimap.modifiers/extract_iterator.pass.cpp          | 1 +
 .../unord.multimap.modifiers/extract_key.pass.cpp               | 1 +
 .../insert_allocator_requirements.pass.cpp                      | 1 +
 .../unord.multimap.modifiers/insert_const_lvalue.pass.cpp       | 1 +
 .../unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp  | 1 +
 .../unord.multimap.modifiers/insert_hint_rvalue.pass.cpp        | 1 +
 .../unord.multimap.modifiers/insert_init.pass.cpp               | 1 +
 .../unord.multimap.modifiers/insert_node_type.pass.cpp          | 1 +
 .../unord.multimap.modifiers/insert_node_type_hint.pass.cpp     | 1 +
 .../unord.multimap.modifiers/insert_range.pass.cpp              | 1 +
 .../unord.multimap.modifiers/insert_rvalue.pass.cpp             | 1 +
 .../unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp | 2 ++
 libcxx/test/std/containers/unord/unord.multiset/count.pass.cpp  | 1 +
 .../std/containers/unord/unord.multiset/db_iterators_7.pass.cpp | 1 +
 .../std/containers/unord/unord.multiset/db_iterators_8.pass.cpp | 1 +
 .../unord/unord.multiset/db_local_iterators_7.pass.cpp          | 1 +
 .../unord/unord.multiset/db_local_iterators_8.pass.cpp          | 1 +
 .../test/std/containers/unord/unord.multiset/emplace.pass.cpp   | 1 +
 .../std/containers/unord/unord.multiset/emplace_hint.pass.cpp   | 1 +
 libcxx/test/std/containers/unord/unord.multiset/eq.pass.cpp     | 1 +
 .../containers/unord/unord.multiset/equal_range_const.pass.cpp  | 1 +
 .../unord/unord.multiset/equal_range_non_const.pass.cpp         | 1 +
 .../containers/unord/unord.multiset/erase_const_iter.pass.cpp   | 1 +
 .../std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp | 2 ++
 .../std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp | 2 ++
 .../unord/unord.multiset/erase_iter_iter_db1.pass.cpp           | 2 ++
 .../unord/unord.multiset/erase_iter_iter_db2.pass.cpp           | 2 ++
 .../unord/unord.multiset/erase_iter_iter_db3.pass.cpp           | 2 ++
 .../unord/unord.multiset/erase_iter_iter_db4.pass.cpp           | 2 ++
 .../test/std/containers/unord/unord.multiset/erase_key.pass.cpp | 1 +
 .../std/containers/unord/unord.multiset/erase_range.pass.cpp    | 1 +
 .../containers/unord/unord.multiset/extract_iterator.pass.cpp   | 1 +
 .../std/containers/unord/unord.multiset/extract_key.pass.cpp    | 1 +
 .../std/containers/unord/unord.multiset/find_const.pass.cpp     | 1 +
 .../std/containers/unord/unord.multiset/find_non_const.pass.cpp | 1 +
 .../std/containers/unord/unord.multiset/incomplete.pass.cpp     | 2 ++
 .../unord/unord.multiset/insert_const_lvalue.pass.cpp           | 1 +
 .../insert_emplace_allocator_requirements.pass.cpp              | 1 +
 .../unord/unord.multiset/insert_hint_const_lvalue.pass.cpp      | 1 +
 .../std/containers/unord/unord.multiset/insert_init.pass.cpp    | 1 +
 .../containers/unord/unord.multiset/insert_node_type.pass.cpp   | 1 +
 .../unord/unord.multiset/insert_node_type_hint.pass.cpp         | 1 +
 .../std/containers/unord/unord.multiset/insert_range.pass.cpp   | 1 +
 .../std/containers/unord/unord.multiset/load_factor.pass.cpp    | 1 +
 .../containers/unord/unord.multiset/local_iterators.pass.cpp    | 1 +
 .../containers/unord/unord.multiset/max_bucket_count.pass.cpp   | 1 +
 .../containers/unord/unord.multiset/max_load_factor.pass.cpp    | 1 +
 libcxx/test/std/containers/unord/unord.multiset/scary.pass.cpp  | 2 ++
 libcxx/test/std/containers/unord/unord.multiset/types.pass.cpp  | 1 +
 .../unord.multiset/unord.multiset.cnstr/assign_init.pass.cpp    | 1 +
 .../unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp | 2 ++
 libcxx/test/std/containers/unord/unord.set/count.pass.cpp       | 1 +
 .../test/std/containers/unord/unord.set/db_iterators_7.pass.cpp | 1 +
 .../test/std/containers/unord/unord.set/db_iterators_8.pass.cpp | 1 +
 .../containers/unord/unord.set/db_local_iterators_7.pass.cpp    | 1 +
 .../containers/unord/unord.set/db_local_iterators_8.pass.cpp    | 1 +
 libcxx/test/std/containers/unord/unord.set/emplace.pass.cpp     | 1 +
 .../test/std/containers/unord/unord.set/emplace_hint.pass.cpp   | 1 +
 libcxx/test/std/containers/unord/unord.set/eq.pass.cpp          | 1 +
 .../std/containers/unord/unord.set/equal_range_const.pass.cpp   | 1 +
 .../containers/unord/unord.set/equal_range_non_const.pass.cpp   | 1 +
 .../std/containers/unord/unord.set/erase_const_iter.pass.cpp    | 1 +
 .../test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp | 2 ++
 .../test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp | 2 ++
 .../std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp | 2 ++
 .../std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp | 2 ++
 .../std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp | 2 ++
 .../std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp | 2 ++
 libcxx/test/std/containers/unord/unord.set/erase_key.pass.cpp   | 1 +
 libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp | 1 +
 .../std/containers/unord/unord.set/extract_iterator.pass.cpp    | 1 +
 libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp | 1 +
 libcxx/test/std/containers/unord/unord.set/find_const.pass.cpp  | 1 +
 .../test/std/containers/unord/unord.set/find_non_const.pass.cpp | 1 +
 libcxx/test/std/containers/unord/unord.set/incomplete.pass.cpp  | 2 ++
 .../insert_and_emplace_allocator_requirements.pass.cpp          | 1 +
 .../std/containers/unord/unord.set/insert_const_lvalue.pass.cpp | 1 +
 .../unord/unord.set/insert_hint_const_lvalue.pass.cpp           | 1 +
 libcxx/test/std/containers/unord/unord.set/insert_init.pass.cpp | 1 +
 .../std/containers/unord/unord.set/insert_node_type.pass.cpp    | 1 +
 .../containers/unord/unord.set/insert_node_type_hint.pass.cpp   | 1 +
 .../test/std/containers/unord/unord.set/insert_range.pass.cpp   | 1 +
 libcxx/test/std/containers/unord/unord.set/load_factor.pass.cpp | 1 +
 .../std/containers/unord/unord.set/local_iterators.pass.cpp     | 1 +
 .../std/containers/unord/unord.set/max_bucket_count.pass.cpp    | 1 +
 .../std/containers/unord/unord.set/max_load_factor.pass.cpp     | 1 +
 libcxx/test/std/containers/unord/unord.set/types.pass.cpp       | 1 +
 .../unord/unord.set/unord.set.cnstr/assign_init.pass.cpp        | 1 +
 .../unord/unord.set/unord.set.cnstr/move_alloc.pass.cpp         | 1 +
 .../unord/unord.set/unord.set.swap/db_swap_1.pass.cpp           | 2 ++
 libcxx/test/std/containers/views/span.tuple/tuple_size.pass.cpp | 2 ++
 .../depr.auto.ptr/auto.ptr/auto.ptr.cons/assignment.pass.cpp    | 1 +
 .../depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert.pass.cpp  | 1 +
 .../auto.ptr/auto.ptr.cons/convert_assignment.pass.cpp          | 1 +
 .../std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/copy.pass.cpp | 1 +
 .../depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/pointer.pass.cpp  | 1 +
 .../auto.ptr/auto.ptr.conv/assign_from_auto_ptr_ref.pass.cpp    | 1 +
 .../auto.ptr/auto.ptr.conv/convert_from_auto_ptr_ref.pass.cpp   | 1 +
 .../auto.ptr/auto.ptr.conv/convert_to_auto_ptr.pass.cpp         | 1 +
 .../auto.ptr/auto.ptr.conv/convert_to_auto_ptr_ref.pass.cpp     | 1 +
 .../depr/depr.auto.ptr/auto.ptr/auto.ptr.members/arrow.pass.cpp | 1 +
 .../depr/depr.auto.ptr/auto.ptr/auto.ptr.members/deref.pass.cpp | 1 +
 .../depr.auto.ptr/auto.ptr/auto.ptr.members/release.pass.cpp    | 1 +
 .../depr/depr.auto.ptr/auto.ptr/auto.ptr.members/reset.pass.cpp | 1 +
 .../test/std/depr/depr.auto.ptr/auto.ptr/element_type.pass.cpp  | 2 ++
 libcxx/test/std/depr/depr.c.headers/assert_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/ciso646.pass.cpp            | 2 ++
 libcxx/test/std/depr/depr.c.headers/complex.h.pass.cpp          | 2 ++
 libcxx/test/std/depr/depr.c.headers/ctype_h.pass.cpp            | 2 ++
 libcxx/test/std/depr/depr.c.headers/errno_h.pass.cpp            | 2 ++
 libcxx/test/std/depr/depr.c.headers/fenv_h.pass.cpp             | 2 ++
 libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp         | 2 ++
 libcxx/test/std/depr/depr.c.headers/iso646_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/limits_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp             | 1 +
 libcxx/test/std/depr/depr.c.headers/setjmp_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/signal_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/stdbool_h.pass.cpp          | 2 ++
 libcxx/test/std/depr/depr.c.headers/stdint_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/string_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/tgmath_h.pass.cpp           | 2 ++
 libcxx/test/std/depr/depr.c.headers/time_h.pass.cpp             | 2 ++
 libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp            | 2 ++
 libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp           | 2 ++
 .../pointer_to_binary_function.pass.cpp                         | 2 ++
 .../pointer_to_unary_function.pass.cpp                          | 2 ++
 .../depr.function.pointer.adaptors/ptr_fun1.pass.cpp            | 2 ++
 .../depr.function.pointer.adaptors/ptr_fun2.pass.cpp            | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun.pass.cpp         | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun1.pass.cpp        | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun1_ref_t.pass.cpp  | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun1_t.pass.cpp      | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun_ref.pass.cpp     | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun_ref1.pass.cpp    | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun_ref_t.pass.cpp   | 2 ++
 .../depr.member.pointer.adaptors/const_mem_fun_t.pass.cpp       | 2 ++
 .../depr.adaptors/depr.member.pointer.adaptors/mem_fun.pass.cpp | 2 ++
 .../depr.member.pointer.adaptors/mem_fun1.pass.cpp              | 2 ++
 .../depr.member.pointer.adaptors/mem_fun1_ref_t.pass.cpp        | 2 ++
 .../depr.member.pointer.adaptors/mem_fun1_t.pass.cpp            | 2 ++
 .../depr.member.pointer.adaptors/mem_fun_ref.pass.cpp           | 2 ++
 .../depr.member.pointer.adaptors/mem_fun_ref1.pass.cpp          | 2 ++
 .../depr.member.pointer.adaptors/mem_fun_ref_t.pass.cpp         | 2 ++
 .../depr.member.pointer.adaptors/mem_fun_t.pass.cpp             | 2 ++
 .../depr.function.objects/depr.base/binary_function.pass.cpp    | 2 ++
 .../depr.function.objects/depr.base/unary_function.pass.cpp     | 2 ++
 .../depr/depr.lib.binders/depr.lib.bind.1st/bind1st.pass.cpp    | 1 +
 .../depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.pass.cpp    | 1 +
 .../depr.lib.binders/depr.lib.binder.1st/binder1st.pass.cpp     | 1 +
 .../depr.lib.binders/depr.lib.binder.2nd/binder2nd.pass.cpp     | 1 +
 .../depr.istrstream/depr.istrstream.cons/ccp.pass.cpp           | 2 ++
 .../depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp      | 2 ++
 .../depr.istrstream/depr.istrstream.cons/cp.pass.cpp            | 2 ++
 .../depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp       | 2 ++
 .../depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp      | 2 ++
 .../depr.istrstream/depr.istrstream.members/str.pass.cpp        | 2 ++
 .../std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp | 2 ++
 .../depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp  | 2 ++
 .../depr.ostrstream/depr.ostrstream.cons/default.pass.cpp       | 2 ++
 .../depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp     | 2 ++
 .../depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp     | 2 ++
 .../depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp      | 2 ++
 .../depr.ostrstream/depr.ostrstream.members/str.pass.cpp        | 2 ++
 .../std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp | 2 ++
 .../depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp    | 2 ++
 .../depr.strstream/depr.strstream.cons/default.pass.cpp         | 2 ++
 .../depr.strstream/depr.strstream.dest/rdbuf.pass.cpp           | 2 ++
 .../depr.strstream/depr.strstream.oper/freeze.pass.cpp          | 2 ++
 .../depr.strstream/depr.strstream.oper/pcount.pass.cpp          | 2 ++
 .../depr.strstream/depr.strstream.oper/str.pass.cpp             | 2 ++
 .../std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp  | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp  | 2 ++
 .../depr.strstreambuf.cons/cp_size_cp.pass.cpp                  | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp | 2 ++
 .../depr.strstreambuf.cons/custom_alloc.pass.cpp                | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp   | 2 ++
 .../depr.strstreambuf.cons/scp_size_scp.pass.cpp                | 2 ++
 .../depr.strstreambuf.cons/ucp_size_ucp.pass.cpp                | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp | 2 ++
 .../depr.strstreambuf.members/overflow.pass.cpp                 | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp | 2 ++
 .../depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp    | 2 ++
 .../depr.strstreambuf.virtuals/overflow.pass.cpp                | 2 ++
 .../depr.strstreambuf.virtuals/pbackfail.pass.cpp               | 2 ++
 .../depr.strstreambuf.virtuals/seekoff.pass.cpp                 | 2 ++
 .../depr.strstreambuf.virtuals/seekpos.pass.cpp                 | 2 ++
 .../depr.strstreambuf.virtuals/setbuf.pass.cpp                  | 2 ++
 .../depr.strstreambuf.virtuals/underflow.pass.cpp               | 2 ++
 .../depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp   | 2 ++
 .../exception.unexpected/set.unexpected/get_unexpected.pass.cpp | 2 ++
 .../exception.unexpected/set.unexpected/set_unexpected.pass.cpp | 2 ++
 .../unexpected.handler/unexpected_handler.pass.cpp              | 2 ++
 .../depr/exception.unexpected/unexpected/unexpected.pass.cpp    | 2 ++
 libcxx/test/std/diagnostics/assertions/cassert.pass.cpp         | 2 ++
 libcxx/test/std/diagnostics/errno/cerrno.pass.cpp               | 2 ++
 .../std.exceptions/domain.error/domain_error.pass.cpp           | 2 ++
 .../std.exceptions/invalid.argument/invalid_argument.pass.cpp   | 2 ++
 .../std.exceptions/length.error/length_error.pass.cpp           | 2 ++
 .../diagnostics/std.exceptions/logic.error/logic_error.pass.cpp | 2 ++
 .../std.exceptions/out.of.range/out_of_range.pass.cpp           | 2 ++
 .../std.exceptions/overflow.error/overflow_error.pass.cpp       | 2 ++
 .../diagnostics/std.exceptions/range.error/range_error.pass.cpp | 2 ++
 .../std.exceptions/runtime.error/runtime_error.pass.cpp         | 2 ++
 .../std.exceptions/underflow.error/underflow_error.pass.cpp     | 2 ++
 libcxx/test/std/diagnostics/syserr/errc.pass.cpp                | 2 ++
 .../syserr/syserr.compare/eq_error_code_error_code.pass.cpp     | 2 ++
 .../syserr/syserr.errcat/syserr.errcat.derived/message.pass.cpp | 2 ++
 .../syserr.errcat.nonvirtuals/default_ctor.pass.cpp             | 2 ++
 .../syserr/syserr.errcat/syserr.errcat.nonvirtuals/eq.pass.cpp  | 2 ++
 .../syserr/syserr.errcat/syserr.errcat.nonvirtuals/lt.pass.cpp  | 2 ++
 .../syserr/syserr.errcat/syserr.errcat.nonvirtuals/neq.pass.cpp | 2 ++
 .../syserr.errcat.overview/error_category.pass.cpp              | 2 ++
 .../syserr.errcat.virtuals/default_error_condition.pass.cpp     | 2 ++
 .../syserr.errcat.virtuals/equivalent_error_code_int.pass.cpp   | 2 ++
 .../equivalent_int_error_condition.pass.cpp                     | 2 ++
 .../syserr.errcode.constructors/ErrorCodeEnum.pass.cpp          | 2 ++
 .../syserr.errcode/syserr.errcode.constructors/default.pass.cpp | 2 ++
 .../syserr.errcode.constructors/int_error_category.pass.cpp     | 2 ++
 .../syserr.errcode.modifiers/ErrorCodeEnum.pass.cpp             | 2 ++
 .../syserr.errcode/syserr.errcode.modifiers/assign.pass.cpp     | 2 ++
 .../syserr.errcode/syserr.errcode.modifiers/clear.pass.cpp      | 2 ++
 .../syserr/syserr.errcode/syserr.errcode.nonmembers/lt.pass.cpp | 2 ++
 .../syserr.errcode.nonmembers/make_error_code.pass.cpp          | 2 ++
 .../syserr.errcode.nonmembers/stream_inserter.pass.cpp          | 2 ++
 .../syserr.errcode/syserr.errcode.observers/bool.pass.cpp       | 2 ++
 .../syserr.errcode/syserr.errcode.observers/category.pass.cpp   | 2 ++
 .../syserr.errcode.observers/default_error_condition.pass.cpp   | 2 ++
 .../syserr.errcode/syserr.errcode.observers/message.pass.cpp    | 2 ++
 .../syserr.errcode/syserr.errcode.observers/value.pass.cpp      | 2 ++
 .../ErrorConditionEnum.pass.cpp                                 | 2 ++
 .../syserr.errcondition.constructors/default.pass.cpp           | 2 ++
 .../int_error_category.pass.cpp                                 | 2 ++
 .../syserr.errcondition.modifiers/ErrorConditionEnum.pass.cpp   | 2 ++
 .../syserr.errcondition.modifiers/assign.pass.cpp               | 2 ++
 .../syserr.errcondition.modifiers/clear.pass.cpp                | 2 ++
 .../syserr.errcondition.nonmembers/lt.pass.cpp                  | 2 ++
 .../make_error_condition.pass.cpp                               | 2 ++
 .../syserr.errcondition.observers/bool.pass.cpp                 | 2 ++
 .../syserr.errcondition.observers/category.pass.cpp             | 2 ++
 .../syserr.errcondition.observers/message.pass.cpp              | 2 ++
 .../syserr.errcondition.observers/value.pass.cpp                | 2 ++
 .../std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp    | 2 ++
 .../syserr.syserr.members/ctor_error_code.pass.cpp              | 2 ++
 .../ctor_error_code_const_char_pointer.pass.cpp                 | 2 ++
 .../syserr.syserr.members/ctor_error_code_string.pass.cpp       | 2 ++
 .../syserr.syserr.members/ctor_int_error_category.pass.cpp      | 2 ++
 .../ctor_int_error_category_const_char_pointer.pass.cpp         | 2 ++
 .../ctor_int_error_category_string.pass.cpp                     | 2 ++
 .../test/std/experimental/algorithms/alg.search/search.pass.cpp | 1 +
 .../experimental/filesystem/fs.req.namespace/namespace.pass.cpp | 2 ++
 .../func.searchers/func.searchers.boyer_moore/default.pass.cpp  | 1 +
 .../func.searchers/func.searchers.boyer_moore/hash.pass.cpp     | 1 +
 .../func.searchers.boyer_moore/hash.pred.pass.cpp               | 1 +
 .../func.searchers/func.searchers.boyer_moore/pred.pass.cpp     | 1 +
 .../func.searchers.boyer_moore_horspool/default.pass.cpp        | 1 +
 .../func.searchers.boyer_moore_horspool/hash.pass.cpp           | 1 +
 .../func.searchers.boyer_moore_horspool/hash.pred.pass.cpp      | 1 +
 .../func.searchers.boyer_moore_horspool/pred.pass.cpp           | 1 +
 .../func/func.searchers/func.searchers.default/default.pass.cpp | 1 +
 .../func.searchers/func.searchers.default/default.pred.pass.cpp | 1 +
 .../make_default_searcher.pass.cpp                              | 1 +
 .../make_default_searcher.pred.pass.cpp                         | 1 +
 .../experimental/func/header.functional.synop/includes.pass.cpp | 2 ++
 libcxx/test/std/experimental/iterator/nothing_to_do.pass.cpp    | 2 ++
 .../coroutine.handle/coroutine.handle.con/assign.pass.cpp       | 2 ++
 .../coroutine.handle/coroutine.handle.con/construct.pass.cpp    | 2 ++
 .../coroutine.handle.export/from_address.pass.cpp               | 2 ++
 .../coroutine.handle.noop/noop_coroutine.pass.cpp               | 2 ++
 .../support.coroutines/coroutine.handle/void_handle.pass.cpp    | 2 ++
 .../support.coroutines/coroutine.traits/promise_type.pass.cpp   | 2 ++
 .../support.coroutines/end.to.end/await_result.pass.cpp         | 2 ++
 .../support.coroutines/end.to.end/bool_await_suspend.pass.cpp   | 2 ++
 .../support.coroutines/end.to.end/expected.pass.cpp             | 2 ++
 .../support.coroutines/end.to.end/generator.pass.cpp            | 1 +
 .../language.support/support.coroutines/end.to.end/go.pass.cpp  | 2 ++
 .../support.coroutines/end.to.end/multishot_func.pass.cpp       | 2 ++
 .../support.coroutines/end.to.end/oneshot_func.pass.cpp         | 2 ++
 .../language.support/support.coroutines/includes.pass.cpp       | 1 +
 .../memory.polymorphic.allocator.ctor/assign.pass.cpp           | 2 ++
 .../memory.polymorphic.allocator.ctor/copy.pass.cpp             | 2 ++
 .../memory.polymorphic.allocator.ctor/default.pass.cpp          | 2 ++
 .../memory_resource_convert.pass.cpp                            | 2 ++
 .../memory.polymorphic.allocator.ctor/other_alloc.pass.cpp      | 2 ++
 .../memory.polymorphic.allocator.eq/equal.pass.cpp              | 2 ++
 .../memory.polymorphic.allocator.eq/not_equal.pass.cpp          | 2 ++
 .../memory.polymorphic.allocator.mem/construct_pair.pass.cpp    | 2 ++
 .../memory.polymorphic.allocator.mem/deallocate.pass.cpp        | 2 ++
 .../memory.polymorphic.allocator.mem/destroy.pass.cpp           | 2 ++
 .../memory.polymorphic.allocator.mem/resource.pass.cpp          | 2 ++
 .../select_on_container_copy_construction.pass.cpp              | 2 ++
 .../memory.resource.adaptor.ctor/alloc_copy.pass.cpp            | 2 ++
 .../memory.resource.adaptor.ctor/alloc_move.pass.cpp            | 2 ++
 .../memory.resource.adaptor.ctor/default.pass.cpp               | 2 ++
 .../memory.resource.adaptor.mem/do_is_equal.pass.cpp            | 2 ++
 .../memory.resource.adaptor.overview/overview.pass.cpp          | 2 ++
 .../memory/memory.resource.aliases/header_deque_synop.pass.cpp  | 2 ++
 .../memory.resource.aliases/header_forward_list_synop.pass.cpp  | 2 ++
 .../memory/memory.resource.aliases/header_list_synop.pass.cpp   | 2 ++
 .../memory/memory.resource.aliases/header_map_synop.pass.cpp    | 2 ++
 .../memory/memory.resource.aliases/header_regex_synop.pass.cpp  | 2 ++
 .../memory/memory.resource.aliases/header_set_synop.pass.cpp    | 2 ++
 .../memory/memory.resource.aliases/header_string_synop.pass.cpp | 2 ++
 .../memory.resource.aliases/header_unordered_map_synop.pass.cpp | 2 ++
 .../memory.resource.aliases/header_unordered_set_synop.pass.cpp | 2 ++
 .../memory/memory.resource.aliases/header_vector_synop.pass.cpp | 2 ++
 .../memory/memory.resource.global/default_resource.pass.cpp     | 2 ++
 .../memory/memory.resource.global/new_delete_resource.pass.cpp  | 2 ++
 .../memory.resource/memory.resource.eq/not_equal.pass.cpp       | 2 ++
 .../memory.resource/memory.resource.public/deallocate.pass.cpp  | 2 ++
 .../memory/memory.resource/memory.resource.public/dtor.pass.cpp | 2 ++
 .../memory.resource/memory.resource.public/is_equal.pass.cpp    | 2 ++
 .../std/experimental/simd/simd.abi/vector_extension.pass.cpp    | 2 ++
 libcxx/test/std/experimental/simd/simd.access/default.pass.cpp  | 2 ++
 libcxx/test/std/experimental/simd/simd.casts/simd_cast.pass.cpp | 2 ++
 .../std/experimental/simd/simd.casts/static_simd_cast.pass.cpp  | 2 ++
 libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp  | 2 ++
 libcxx/test/std/experimental/simd/simd.cons/default.pass.cpp    | 2 ++
 libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp  | 2 ++
 .../std/experimental/simd/simd.traits/abi_for_size.pass.cpp     | 2 ++
 .../propagate_const.assignment/assign.pass.cpp                  | 1 +
 .../assign_convertible_element_type.pass.cpp                    | 1 +
 .../assign_convertible_propagate_const.pass.cpp                 | 1 +
 .../propagate_const.assignment/assign_element_type.pass.cpp     | 1 +
 .../propagate_const.assignment/move_assign.pass.cpp             | 1 +
 .../propagate_const.assignment/move_assign_convertible.pass.cpp | 1 +
 .../move_assign_convertible_propagate_const.pass.cpp            | 1 +
 .../convertible_element_type.explicit.ctor.pass.cpp             | 1 +
 .../convertible_element_type.non-explicit.ctor.pass.cpp         | 1 +
 .../convertible_propagate_const.copy_ctor.pass.cpp              | 1 +
 .../convertible_propagate_const.explicit.move_ctor.pass.cpp     | 1 +
 .../convertible_propagate_const.move_ctor.pass.cpp              | 1 +
 .../propagate_const.ctors/copy_ctor.pass.cpp                    | 1 +
 .../propagate_const.ctors/element_type.explicit.ctor.pass.cpp   | 1 +
 .../element_type.non-explicit.ctor.pass.cpp                     | 1 +
 .../propagate_const.ctors/move_ctor.pass.cpp                    | 1 +
 .../propagate_const.non-const_observers/dereference.pass.cpp    | 1 +
 .../explicit_operator_element_type_ptr.pass.cpp                 | 1 +
 .../propagate_const.non-const_observers/get.pass.cpp            | 1 +
 .../propagate_const.non-const_observers/op_arrow.pass.cpp       | 1 +
 .../operator_element_type_ptr.pass.cpp                          | 1 +
 .../propagate_const.observers/dereference.pass.cpp              | 1 +
 .../explicit_operator_element_type_ptr.pass.cpp                 | 1 +
 .../propagate_const.observers/get.pass.cpp                      | 1 +
 .../propagate_const.observers/op_arrow.pass.cpp                 | 1 +
 .../operator_element_type_ptr.pass.cpp                          | 1 +
 .../propagate_const/propagate_const.class/swap.pass.cpp         | 1 +
 .../propagate_const/propagate_const.nonmembers/hash.pass.cpp    | 1 +
 .../equal_to.pass.cpp                                           | 1 +
 .../greater.pass.cpp                                            | 1 +
 .../greater_equal.pass.cpp                                      | 1 +
 .../propagate_const.comparison_function_objects/less.pass.cpp   | 1 +
 .../less_equal.pass.cpp                                         | 1 +
 .../not_equal_to.pass.cpp                                       | 1 +
 .../propagate_const.relops/equal.pass.cpp                       | 1 +
 .../propagate_const.relops/greater_equal.pass.cpp               | 1 +
 .../propagate_const.relops/greater_than.pass.cpp                | 1 +
 .../propagate_const.relops/less_equal.pass.cpp                  | 1 +
 .../propagate_const.relops/less_than.pass.cpp                   | 1 +
 .../propagate_const.relops/not_equal.pass.cpp                   | 1 +
 .../propagate_const/propagate_const.nonmembers/swap.pass.cpp    | 1 +
 .../utilities/utility/utility.erased.type/erased_type.pass.cpp  | 2 ++
 .../utilities/utility/utility.synop/includes.pass.cpp           | 2 ++
 .../std/input.output/file.streams/c.files/cinttypes.pass.cpp    | 2 ++
 .../file.streams/fstreams/filebuf.assign/member_swap.pass.cpp   | 1 +
 .../file.streams/fstreams/filebuf.assign/move_assign.pass.cpp   | 1 +
 .../fstreams/filebuf.assign/nonmember_swap.pass.cpp             | 1 +
 .../file.streams/fstreams/filebuf.cons/default.pass.cpp         | 2 ++
 .../file.streams/fstreams/filebuf.cons/move.pass.cpp            | 1 +
 .../file.streams/fstreams/filebuf.members/open_path.pass.cpp    | 1 +
 .../file.streams/fstreams/filebuf.members/open_pointer.pass.cpp | 1 +
 .../file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp    | 1 +
 .../file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp   | 1 +
 .../input.output/file.streams/fstreams/filebuf/types.pass.cpp   | 2 ++
 .../file.streams/fstreams/fstream.assign/member_swap.pass.cpp   | 1 +
 .../file.streams/fstreams/fstream.assign/move_assign.pass.cpp   | 1 +
 .../fstreams/fstream.assign/nonmember_swap.pass.cpp             | 1 +
 .../file.streams/fstreams/fstream.cons/default.pass.cpp         | 2 ++
 .../file.streams/fstreams/fstream.cons/move.pass.cpp            | 1 +
 .../file.streams/fstreams/fstream.cons/path.pass.cpp            | 1 +
 .../file.streams/fstreams/fstream.cons/pointer.pass.cpp         | 1 +
 .../file.streams/fstreams/fstream.cons/string.pass.cpp          | 1 +
 .../file.streams/fstreams/fstream.members/close.pass.cpp        | 1 +
 .../file.streams/fstreams/fstream.members/open_path.pass.cpp    | 1 +
 .../file.streams/fstreams/fstream.members/open_pointer.pass.cpp | 1 +
 .../file.streams/fstreams/fstream.members/open_string.pass.cpp  | 1 +
 .../file.streams/fstreams/fstream.members/rdbuf.pass.cpp        | 2 ++
 .../input.output/file.streams/fstreams/fstream/types.pass.cpp   | 2 ++
 .../file.streams/fstreams/ifstream.assign/member_swap.pass.cpp  | 2 ++
 .../file.streams/fstreams/ifstream.assign/move_assign.pass.cpp  | 2 ++
 .../fstreams/ifstream.assign/nonmember_swap.pass.cpp            | 2 ++
 .../file.streams/fstreams/ifstream.cons/default.pass.cpp        | 2 ++
 .../file.streams/fstreams/ifstream.cons/move.pass.cpp           | 2 ++
 .../file.streams/fstreams/ifstream.cons/path.pass.cpp           | 2 ++
 .../file.streams/fstreams/ifstream.cons/pointer.pass.cpp        | 2 ++
 .../file.streams/fstreams/ifstream.cons/string.pass.cpp         | 2 ++
 .../file.streams/fstreams/ifstream.members/close.pass.cpp       | 2 ++
 .../file.streams/fstreams/ifstream.members/open_path.pass.cpp   | 2 ++
 .../fstreams/ifstream.members/open_pointer.pass.cpp             | 2 ++
 .../file.streams/fstreams/ifstream.members/open_string.pass.cpp | 2 ++
 .../file.streams/fstreams/ifstream.members/rdbuf.pass.cpp       | 2 ++
 .../input.output/file.streams/fstreams/ifstream/types.pass.cpp  | 2 ++
 .../file.streams/fstreams/ofstream.assign/member_swap.pass.cpp  | 1 +
 .../file.streams/fstreams/ofstream.assign/move_assign.pass.cpp  | 1 +
 .../fstreams/ofstream.assign/nonmember_swap.pass.cpp            | 1 +
 .../file.streams/fstreams/ofstream.cons/default.pass.cpp        | 2 ++
 .../file.streams/fstreams/ofstream.cons/move.pass.cpp           | 1 +
 .../file.streams/fstreams/ofstream.cons/path.pass.cpp           | 1 +
 .../file.streams/fstreams/ofstream.cons/pointer.pass.cpp        | 1 +
 .../file.streams/fstreams/ofstream.cons/string.pass.cpp         | 1 +
 .../file.streams/fstreams/ofstream.members/close.pass.cpp       | 1 +
 .../file.streams/fstreams/ofstream.members/open_path.pass.cpp   | 1 +
 .../fstreams/ofstream.members/open_pointer.pass.cpp             | 1 +
 .../file.streams/fstreams/ofstream.members/open_string.pass.cpp | 1 +
 .../file.streams/fstreams/ofstream.members/rdbuf.pass.cpp       | 1 +
 .../input.output/file.streams/fstreams/ofstream/types.pass.cpp  | 2 ++
 .../class.directory_entry/directory_entry.cons/default.pass.cpp | 2 ++
 .../directory_entry.cons/default_const.pass.cpp                 | 2 ++
 .../directory_entry.obs/comparisons.pass.cpp                    | 2 ++
 .../directory_entry.obs/file_size.pass.cpp                      | 2 ++
 .../directory_entry.obs/file_type_obs.pass.cpp                  | 2 ++
 .../directory_entry.obs/hard_link_count.pass.cpp                | 2 ++
 .../directory_entry.obs/last_write_time.pass.cpp                | 2 ++
 .../class.directory_entry/directory_entry.obs/path.pass.cpp     | 2 ++
 .../class.directory_entry/directory_entry.obs/status.pass.cpp   | 2 ++
 .../directory_entry.obs/symlink_status.pass.cpp                 | 2 ++
 .../filesystems/class.file_status/file_status.cons.pass.cpp     | 2 ++
 .../filesystems/class.file_status/file_status.mods.pass.cpp     | 2 ++
 .../filesystems/class.file_status/file_status.obs.pass.cpp      | 2 ++
 .../filesystems/fs.req.namespace/namespace.pass.cpp             | 2 ++
 .../input.output/iostream.format/ext.manip/get_money.pass.cpp   | 1 +
 .../input.output/iostream.format/ext.manip/get_time.pass.cpp    | 1 +
 .../input.output/iostream.format/ext.manip/put_money.pass.cpp   | 1 +
 .../input.output/iostream.format/ext.manip/put_time.pass.cpp    | 1 +
 .../iostreamclass/iostream.assign/member_swap.pass.cpp          | 2 ++
 .../iostreamclass/iostream.assign/move_assign.pass.cpp          | 2 ++
 .../input.streams/iostreamclass/iostream.cons/move.pass.cpp     | 2 ++
 .../iostreamclass/iostream.cons/streambuf.pass.cpp              | 2 ++
 .../iostream.format/input.streams/iostreamclass/types.pass.cpp  | 2 ++
 .../istream.formatted/istream_extractors/basic_ios.pass.cpp     | 2 ++
 .../istream.formatted/istream_extractors/ios_base.pass.cpp      | 2 ++
 .../istream.formatted/istream_extractors/istream.pass.cpp       | 2 ++
 .../input.streams/istream.rvalue/rvalue.pass.cpp                | 2 ++
 .../input.streams/istream.unformatted/ignore_0xff.pass.cpp      | 2 ++
 .../input.streams/istream.unformatted/readsome.pass.cpp         | 2 ++
 .../input.streams/istream.unformatted/seekg.pass.cpp            | 2 ++
 .../input.streams/istream.unformatted/seekg_off.pass.cpp        | 2 ++
 .../input.streams/istream.unformatted/tellg.pass.cpp            | 2 ++
 .../input.streams/istream/istream.assign/member_swap.pass.cpp   | 2 ++
 .../input.streams/istream/istream.assign/move_assign.pass.cpp   | 2 ++
 .../input.streams/istream/istream.cons/move.pass.cpp            | 2 ++
 .../input.streams/istream/istream.cons/streambuf.pass.cpp       | 2 ++
 .../input.streams/istream/istream_sentry/ctor.pass.cpp          | 2 ++
 .../iostream.format/input.streams/istream/types.pass.cpp        | 2 ++
 .../output.streams/ostream.assign/member_swap.pass.cpp          | 2 ++
 .../output.streams/ostream.assign/move_assign.pass.cpp          | 2 ++
 .../iostream.format/output.streams/ostream.cons/move.pass.cpp   | 2 ++
 .../output.streams/ostream.cons/streambuf.pass.cpp              | 2 ++
 .../ostream.inserters.arithmetic/bool.pass.cpp                  | 2 ++
 .../ostream.inserters.arithmetic/double.pass.cpp                | 2 ++
 .../ostream.inserters.arithmetic/float.pass.cpp                 | 2 ++
 .../ostream.formatted/ostream.inserters.arithmetic/int.pass.cpp | 2 ++
 .../ostream.inserters.arithmetic/long.pass.cpp                  | 2 ++
 .../ostream.inserters.arithmetic/long_double.pass.cpp           | 2 ++
 .../ostream.inserters.arithmetic/long_long.pass.cpp             | 2 ++
 .../ostream.inserters.arithmetic/minmax_showbase.pass.cpp       | 2 ++
 .../ostream.inserters.arithmetic/minus1.pass.cpp                | 2 ++
 .../ostream.inserters.arithmetic/pointer.pass.cpp               | 2 ++
 .../ostream.inserters.arithmetic/short.pass.cpp                 | 2 ++
 .../ostream.inserters.arithmetic/unsigned_int.pass.cpp          | 2 ++
 .../ostream.inserters.arithmetic/unsigned_long.pass.cpp         | 2 ++
 .../ostream.inserters.arithmetic/unsigned_long_long.pass.cpp    | 2 ++
 .../ostream.inserters.arithmetic/unsigned_short.pass.cpp        | 2 ++
 .../ostream.inserters.character/CharT.pass.cpp                  | 2 ++
 .../ostream.inserters.character/CharT_pointer.pass.cpp          | 2 ++
 .../ostream.formatted/ostream.inserters.character/char.pass.cpp | 2 ++
 .../ostream.inserters.character/char_pointer.pass.cpp           | 2 ++
 .../ostream.inserters.character/char_to_wide.pass.cpp           | 2 ++
 .../ostream.inserters.character/char_to_wide_pointer.pass.cpp   | 2 ++
 .../ostream.inserters.character/signed_char.pass.cpp            | 2 ++
 .../ostream.inserters.character/signed_char_pointer.pass.cpp    | 2 ++
 .../ostream.inserters.character/unsigned_char.pass.cpp          | 2 ++
 .../ostream.inserters.character/unsigned_char_pointer.pass.cpp  | 2 ++
 .../ostream.formatted/ostream.inserters/basic_ios.pass.cpp      | 2 ++
 .../ostream.formatted/ostream.inserters/ios_base.pass.cpp       | 2 ++
 .../ostream.formatted/ostream.inserters/ostream.pass.cpp        | 2 ++
 .../ostream.formatted/ostream.inserters/streambuf.pass.cpp      | 2 ++
 .../iostream.format/output.streams/ostream.manip/endl.pass.cpp  | 2 ++
 .../iostream.format/output.streams/ostream.manip/ends.pass.cpp  | 2 ++
 .../iostream.format/output.streams/ostream.manip/flush.pass.cpp | 2 ++
 .../output.streams/ostream.rvalue/CharT_pointer.pass.cpp        | 2 ++
 .../iostream.format/output.streams/ostream.seeks/seekp.pass.cpp | 2 ++
 .../output.streams/ostream.seeks/seekp2.pass.cpp                | 2 ++
 .../iostream.format/output.streams/ostream.seeks/tellp.pass.cpp | 2 ++
 .../output.streams/ostream.unformatted/flush.pass.cpp           | 2 ++
 .../output.streams/ostream.unformatted/put.pass.cpp             | 2 ++
 .../output.streams/ostream.unformatted/write.pass.cpp           | 2 ++
 .../iostream.format/output.streams/ostream/types.pass.cpp       | 2 ++
 .../output.streams/ostream_sentry/construct.pass.cpp            | 2 ++
 .../input.output/iostream.format/quoted.manip/quoted.pass.cpp   | 2 ++
 .../iostream.format/std.manip/resetiosflags.pass.cpp            | 2 ++
 .../std/input.output/iostream.format/std.manip/setbase.pass.cpp | 2 ++
 .../std/input.output/iostream.format/std.manip/setfill.pass.cpp | 2 ++
 .../input.output/iostream.format/std.manip/setiosflags.pass.cpp | 2 ++
 .../iostream.format/std.manip/setprecision.pass.cpp             | 2 ++
 .../std/input.output/iostream.format/std.manip/setw.pass.cpp    | 2 ++
 libcxx/test/std/input.output/iostream.forward/iosfwd.pass.cpp   | 2 ++
 .../iostream.objects/narrow.stream.objects/cerr.pass.cpp        | 2 ++
 .../iostream.objects/narrow.stream.objects/cin.pass.cpp         | 2 ++
 .../iostream.objects/narrow.stream.objects/clog.pass.cpp        | 2 ++
 .../iostream.objects/narrow.stream.objects/cout.pass.cpp        | 2 ++
 .../iostream.objects/wide.stream.objects/wcerr.pass.cpp         | 2 ++
 .../iostream.objects/wide.stream.objects/wcin.pass.cpp          | 2 ++
 .../iostream.objects/wide.stream.objects/wclog.pass.cpp         | 2 ++
 .../iostream.objects/wide.stream.objects/wcout.pass.cpp         | 2 ++
 .../iostreams.base/fpos/fpos.members/state.pass.cpp             | 2 ++
 .../iostreams.base/fpos/fpos.operations/addition.pass.cpp       | 2 ++
 .../iostreams.base/fpos/fpos.operations/ctor_int.pass.cpp       | 2 ++
 .../iostreams.base/fpos/fpos.operations/difference.pass.cpp     | 2 ++
 .../iostreams.base/fpos/fpos.operations/eq_int.pass.cpp         | 2 ++
 .../iostreams.base/fpos/fpos.operations/offset.pass.cpp         | 2 ++
 .../iostreams.base/fpos/fpos.operations/streamsize.pass.cpp     | 2 ++
 .../iostreams.base/fpos/fpos.operations/subtraction.pass.cpp    | 2 ++
 .../iostreams.base/ios.base/fmtflags.state/flags.pass.cpp       | 2 ++
 .../ios.base/fmtflags.state/flags_fmtflags.pass.cpp             | 2 ++
 .../iostreams.base/ios.base/fmtflags.state/precision.pass.cpp   | 2 ++
 .../ios.base/fmtflags.state/precision_streamsize.pass.cpp       | 2 ++
 .../ios.base/fmtflags.state/setf_fmtflags.pass.cpp              | 2 ++
 .../ios.base/fmtflags.state/setf_fmtflags_mask.pass.cpp         | 2 ++
 .../iostreams.base/ios.base/fmtflags.state/unsetf_mask.pass.cpp | 2 ++
 .../iostreams.base/ios.base/fmtflags.state/width.pass.cpp       | 2 ++
 .../ios.base/fmtflags.state/width_streamsize.pass.cpp           | 2 ++
 .../ios.base/ios.base.callback/register_callback.pass.cpp       | 1 +
 .../iostreams.base/ios.base/ios.base.cons/dtor.pass.cpp         | 2 ++
 .../iostreams.base/ios.base/ios.base.locales/getloc.pass.cpp    | 2 ++
 .../iostreams.base/ios.base/ios.base.locales/imbue.pass.cpp     | 1 +
 .../iostreams.base/ios.base/ios.base.storage/iword.pass.cpp     | 2 ++
 .../iostreams.base/ios.base/ios.base.storage/pword.pass.cpp     | 2 ++
 .../iostreams.base/ios.base/ios.base.storage/xalloc.pass.cpp    | 2 ++
 .../ios.base/ios.members.static/sync_with_stdio.pass.cpp        | 2 ++
 .../ios.types/ios_failure/ctor_char_pointer_error_code.pass.cpp | 2 ++
 .../ios.types/ios_failure/ctor_string_error_code.pass.cpp       | 2 ++
 .../ios.base/ios.types/ios_fmtflags/fmtflags.pass.cpp           | 2 ++
 .../ios.base/ios.types/ios_iostate/iostate.pass.cpp             | 2 ++
 .../ios.base/ios.types/ios_openmode/openmode.pass.cpp           | 2 ++
 .../ios.base/ios.types/ios_seekdir/seekdir.pass.cpp             | 2 ++
 .../input.output/iostreams.base/ios.base/nothing_to_do.pass.cpp | 2 ++
 .../iostreams.base/ios/basic.ios.cons/ctor_streambuf.pass.cpp   | 2 ++
 .../iostreams.base/ios/basic.ios.members/fill.pass.cpp          | 2 ++
 .../ios/basic.ios.members/fill_char_type.pass.cpp               | 2 ++
 .../iostreams.base/ios/basic.ios.members/imbue.pass.cpp         | 1 +
 .../iostreams.base/ios/basic.ios.members/move.pass.cpp          | 1 +
 .../iostreams.base/ios/basic.ios.members/narrow.pass.cpp        | 2 ++
 .../iostreams.base/ios/basic.ios.members/rdbuf.pass.cpp         | 2 ++
 .../ios/basic.ios.members/rdbuf_streambuf.pass.cpp              | 2 ++
 .../iostreams.base/ios/basic.ios.members/swap.pass.cpp          | 1 +
 .../iostreams.base/ios/basic.ios.members/tie.pass.cpp           | 2 ++
 .../iostreams.base/ios/basic.ios.members/tie_ostream.pass.cpp   | 2 ++
 .../iostreams.base/ios/basic.ios.members/widen.pass.cpp         | 2 ++
 .../input.output/iostreams.base/ios/iostate.flags/bad.pass.cpp  | 2 ++
 .../input.output/iostreams.base/ios/iostate.flags/eof.pass.cpp  | 2 ++
 .../iostreams.base/ios/iostate.flags/exceptions.pass.cpp        | 2 ++
 .../input.output/iostreams.base/ios/iostate.flags/fail.pass.cpp | 2 ++
 .../input.output/iostreams.base/ios/iostate.flags/good.pass.cpp | 2 ++
 .../input.output/iostreams.base/ios/iostate.flags/not.pass.cpp  | 2 ++
 .../iostreams.base/ios/iostate.flags/rdstate.pass.cpp           | 2 ++
 libcxx/test/std/input.output/iostreams.base/ios/types.pass.cpp  | 2 ++
 .../std.ios.manip/adjustfield.manip/internal.pass.cpp           | 2 ++
 .../std.ios.manip/adjustfield.manip/left.pass.cpp               | 2 ++
 .../std.ios.manip/adjustfield.manip/right.pass.cpp              | 2 ++
 .../iostreams.base/std.ios.manip/basefield.manip/dec.pass.cpp   | 2 ++
 .../iostreams.base/std.ios.manip/basefield.manip/hex.pass.cpp   | 2 ++
 .../iostreams.base/std.ios.manip/basefield.manip/oct.pass.cpp   | 2 ++
 .../std.ios.manip/error.reporting/iostream_category.pass.cpp    | 2 ++
 .../std.ios.manip/error.reporting/make_error_code.pass.cpp      | 2 ++
 .../std.ios.manip/error.reporting/make_error_condition.pass.cpp | 2 ++
 .../std.ios.manip/floatfield.manip/defaultfloat.pass.cpp        | 2 ++
 .../std.ios.manip/floatfield.manip/fixed.pass.cpp               | 2 ++
 .../std.ios.manip/floatfield.manip/hexfloat.pass.cpp            | 2 ++
 .../std.ios.manip/floatfield.manip/scientific.pass.cpp          | 2 ++
 .../std.ios.manip/fmtflags.manip/boolalpha.pass.cpp             | 2 ++
 .../std.ios.manip/fmtflags.manip/noboolalpha.pass.cpp           | 2 ++
 .../std.ios.manip/fmtflags.manip/noshowbase.pass.cpp            | 2 ++
 .../std.ios.manip/fmtflags.manip/noshowpoint.pass.cpp           | 2 ++
 .../std.ios.manip/fmtflags.manip/noshowpos.pass.cpp             | 2 ++
 .../std.ios.manip/fmtflags.manip/noskipws.pass.cpp              | 2 ++
 .../std.ios.manip/fmtflags.manip/nounitbuf.pass.cpp             | 2 ++
 .../std.ios.manip/fmtflags.manip/nouppercase.pass.cpp           | 2 ++
 .../std.ios.manip/fmtflags.manip/showbase.pass.cpp              | 2 ++
 .../std.ios.manip/fmtflags.manip/showpoint.pass.cpp             | 2 ++
 .../std.ios.manip/fmtflags.manip/showpos.pass.cpp               | 2 ++
 .../iostreams.base/std.ios.manip/fmtflags.manip/skipws.pass.cpp | 2 ++
 .../std.ios.manip/fmtflags.manip/unitbuf.pass.cpp               | 2 ++
 .../std.ios.manip/fmtflags.manip/uppercase.pass.cpp             | 2 ++
 .../input.output/iostreams.base/stream.types/streamoff.pass.cpp | 2 ++
 .../iostreams.base/stream.types/streamsize.pass.cpp             | 2 ++
 .../stream.buffers/streambuf/streambuf.cons/copy.pass.cpp       | 1 +
 .../stream.buffers/streambuf/streambuf.cons/default.pass.cpp    | 1 +
 .../streambuf.members/streambuf.buffer/pubseekoff.pass.cpp      | 2 ++
 .../streambuf.members/streambuf.buffer/pubseekpos.pass.cpp      | 2 ++
 .../streambuf.members/streambuf.buffer/pubsetbuf.pass.cpp       | 2 ++
 .../streambuf.members/streambuf.buffer/pubsync.pass.cpp         | 2 ++
 .../streambuf.members/streambuf.locales/locales.pass.cpp        | 1 +
 .../streambuf.members/streambuf.pub.get/in_avail.pass.cpp       | 2 ++
 .../streambuf.members/streambuf.pub.get/sbumpc.pass.cpp         | 2 ++
 .../streambuf.members/streambuf.pub.get/sgetc.pass.cpp          | 2 ++
 .../streambuf.members/streambuf.pub.get/sgetn.pass.cpp          | 2 ++
 .../streambuf.members/streambuf.pub.get/snextc.pass.cpp         | 2 ++
 .../streambuf.members/streambuf.pub.pback/sputbackc.pass.cpp    | 2 ++
 .../streambuf.members/streambuf.pub.pback/sungetc.pass.cpp      | 2 ++
 .../streambuf.members/streambuf.pub.put/sputc.pass.cpp          | 2 ++
 .../streambuf.members/streambuf.pub.put/sputn.pass.cpp          | 2 ++
 .../streambuf.protected/streambuf.assign/assign.pass.cpp        | 1 +
 .../streambuf.protected/streambuf.assign/swap.pass.cpp          | 1 +
 .../streambuf.protected/streambuf.get.area/gbump.pass.cpp       | 2 ++
 .../streambuf.protected/streambuf.get.area/setg.pass.cpp        | 2 ++
 .../streambuf.protected/streambuf.put.area/pbump.pass.cpp       | 2 ++
 .../streambuf.protected/streambuf.put.area/setp.pass.cpp        | 2 ++
 .../streambuf.virtuals/streambuf.virt.get/showmanyc.pass.cpp    | 2 ++
 .../streambuf.virtuals/streambuf.virt.get/uflow.pass.cpp        | 2 ++
 .../streambuf.virtuals/streambuf.virt.get/underflow.pass.cpp    | 2 ++
 .../streambuf.virtuals/streambuf.virt.get/xsgetn.pass.cpp       | 2 ++
 .../streambuf.virtuals/streambuf.virt.pback/pbackfail.pass.cpp  | 2 ++
 .../streambuf.virtuals/streambuf.virt.put/overflow.pass.cpp     | 2 ++
 .../streambuf.virt.put/xsputn.PR14074.pass.cpp                  | 1 +
 .../streambuf.virtuals/streambuf.virt.put/xsputn.pass.cpp       | 2 ++
 .../std/input.output/stream.buffers/streambuf/types.pass.cpp    | 2 ++
 .../istringstream/istringstream.assign/member_swap.pass.cpp     | 2 ++
 .../istringstream/istringstream.assign/move.pass.cpp            | 2 ++
 .../istringstream/istringstream.assign/nonmember_swap.pass.cpp  | 2 ++
 .../istringstream/istringstream.cons/default.pass.cpp           | 2 ++
 .../istringstream/istringstream.cons/move.pass.cpp              | 2 ++
 .../istringstream/istringstream.cons/string.pass.cpp            | 2 ++
 .../istringstream/istringstream.members/str.pass.cpp            | 2 ++
 .../input.output/string.streams/istringstream/types.pass.cpp    | 2 ++
 .../ostringstream/ostringstream.assign/member_swap.pass.cpp     | 2 ++
 .../ostringstream/ostringstream.assign/move.pass.cpp            | 2 ++
 .../ostringstream/ostringstream.assign/nonmember_swap.pass.cpp  | 2 ++
 .../ostringstream/ostringstream.cons/default.pass.cpp           | 2 ++
 .../ostringstream/ostringstream.cons/move.pass.cpp              | 2 ++
 .../ostringstream/ostringstream.cons/string.pass.cpp            | 2 ++
 .../ostringstream/ostringstream.members/str.pass.cpp            | 2 ++
 .../input.output/string.streams/ostringstream/types.pass.cpp    | 2 ++
 .../stringbuf/stringbuf.assign/member_swap.pass.cpp             | 2 ++
 .../string.streams/stringbuf/stringbuf.assign/move.pass.cpp     | 2 ++
 .../stringbuf/stringbuf.assign/nonmember_swap.pass.cpp          | 2 ++
 .../string.streams/stringbuf/stringbuf.cons/default.pass.cpp    | 2 ++
 .../string.streams/stringbuf/stringbuf.cons/move.pass.cpp       | 2 ++
 .../string.streams/stringbuf/stringbuf.cons/string.pass.cpp     | 2 ++
 .../string.streams/stringbuf/stringbuf.members/str.pass.cpp     | 2 ++
 .../stringbuf/stringbuf.virtuals/overflow.pass.cpp              | 2 ++
 .../stringbuf/stringbuf.virtuals/pbackfail.pass.cpp             | 2 ++
 .../stringbuf/stringbuf.virtuals/seekoff.pass.cpp               | 2 ++
 .../stringbuf/stringbuf.virtuals/seekpos.pass.cpp               | 2 ++
 .../string.streams/stringbuf/stringbuf.virtuals/setbuf.pass.cpp | 2 ++
 .../stringbuf/stringbuf.virtuals/underflow.pass.cpp             | 2 ++
 .../std/input.output/string.streams/stringbuf/types.pass.cpp    | 2 ++
 .../string.streams/stringstream.cons/default.pass.cpp           | 2 ++
 .../input.output/string.streams/stringstream.cons/move.pass.cpp | 2 ++
 .../string.streams/stringstream.cons/move2.pass.cpp             | 2 ++
 .../string.streams/stringstream.cons/string.pass.cpp            | 2 ++
 .../stringstream.cons/stringstream.assign/member_swap.pass.cpp  | 2 ++
 .../stringstream.cons/stringstream.assign/move.pass.cpp         | 2 ++
 .../stringstream.assign/nonmember_swap.pass.cpp                 | 2 ++
 .../string.streams/stringstream.members/str.pass.cpp            | 2 ++
 .../std/input.output/string.streams/stringstream/types.pass.cpp | 2 ++
 .../iterator.primitives/iterator.basic/iterator.pass.cpp        | 2 ++
 .../iterator.primitives/iterator.operations/advance.pass.cpp    | 1 +
 .../iterator.primitives/iterator.operations/distance.pass.cpp   | 1 +
 .../iterator.primitives/iterator.operations/next.pass.cpp       | 1 +
 .../iterator.primitives/iterator.operations/prev.pass.cpp       | 1 +
 .../iterator.primitives/iterator.traits/const_pointer.pass.cpp  | 2 ++
 .../iterator.traits/const_volatile_pointer.pass.cpp             | 2 ++
 .../iterator.primitives/iterator.traits/empty.pass.cpp          | 2 ++
 .../iterator.primitives/iterator.traits/iterator.pass.cpp       | 2 ++
 .../iterator.primitives/iterator.traits/pointer.pass.cpp        | 2 ++
 .../iterator.traits/volatile_pointer.pass.cpp                   | 2 ++
 .../std.iterator.tags/bidirectional_iterator_tag.pass.cpp       | 2 ++
 .../std.iterator.tags/forward_iterator_tag.pass.cpp             | 2 ++
 .../std.iterator.tags/input_iterator_tag.pass.cpp               | 2 ++
 .../std.iterator.tags/output_iterator_tag.pass.cpp              | 2 ++
 .../std.iterator.tags/random_access_iterator_tag.pass.cpp       | 2 ++
 .../back.insert.iter.cons/container.pass.cpp                    | 2 ++
 .../back.insert.iter.ops/back.insert.iter.op++/post.pass.cpp    | 2 ++
 .../back.insert.iter.ops/back.insert.iter.op++/pre.pass.cpp     | 2 ++
 .../back.insert.iter.ops/back.insert.iter.op=/lv_value.pass.cpp | 2 ++
 .../back.insert.iter.ops/back.insert.iter.op=/rv_value.pass.cpp | 2 ++
 .../back.insert.iter.op_astrk/test.pass.cpp                     | 2 ++
 .../back.insert.iter.ops/back.inserter/test.pass.cpp            | 2 ++
 .../insert.iterators/back.insert.iterator/types.pass.cpp        | 2 ++
 .../front.insert.iter.cons/container.pass.cpp                   | 2 ++
 .../front.insert.iter.ops/front.insert.iter.op++/post.pass.cpp  | 2 ++
 .../front.insert.iter.ops/front.insert.iter.op++/pre.pass.cpp   | 2 ++
 .../front.insert.iter.op=/lv_value.pass.cpp                     | 2 ++
 .../front.insert.iter.op=/rv_value.pass.cpp                     | 2 ++
 .../front.insert.iter.op_astrk/test.pass.cpp                    | 2 ++
 .../front.insert.iter.ops/front.inserter/test.pass.cpp          | 2 ++
 .../insert.iterators/front.insert.iterator/types.pass.cpp       | 2 ++
 .../insert.iter.ops/insert.iter.cons/test.pass.cpp              | 2 ++
 .../insert.iter.ops/insert.iter.op++/post.pass.cpp              | 2 ++
 .../insert.iter.ops/insert.iter.op++/pre.pass.cpp               | 2 ++
 .../insert.iter.ops/insert.iter.op=/lv_value.pass.cpp           | 2 ++
 .../insert.iter.ops/insert.iter.op=/rv_value.pass.cpp           | 2 ++
 .../insert.iter.ops/insert.iter.op_astrk/test.pass.cpp          | 2 ++
 .../insert.iterators/insert.iter.ops/inserter/test.pass.cpp     | 2 ++
 .../reverse.iterators/reverse.iterator/types.pass.cpp           | 1 +
 .../istream.iterator/istream.iterator.cons/istream.pass.cpp     | 2 ++
 .../istream.iterator/istream.iterator.ops/arrow.pass.cpp        | 2 ++
 .../istream.iterator/istream.iterator.ops/dereference.pass.cpp  | 2 ++
 .../istream.iterator/istream.iterator.ops/equal.pass.cpp        | 2 ++
 .../istream.iterator.ops/post_increment.pass.cpp                | 2 ++
 .../istream.iterator.ops/pre_increment.pass.cpp                 | 2 ++
 .../istreambuf.iterator.cons/istream.pass.cpp                   | 2 ++
 .../istreambuf.iterator/istreambuf.iterator.cons/proxy.pass.cpp | 2 ++
 .../istreambuf.iterator.cons/streambuf.pass.cpp                 | 2 ++
 .../istreambuf.iterator_equal/equal.pass.cpp                    | 2 ++
 .../istreambuf.iterator_op!=/not_equal.pass.cpp                 | 2 ++
 .../istreambuf.iterator_op++/dereference.pass.cpp               | 2 ++
 .../istreambuf.iterator/istreambuf.iterator_op==/equal.pass.cpp | 2 ++
 .../istreambuf.iterator_op_astrk/post_increment.pass.cpp        | 2 ++
 .../istreambuf.iterator_op_astrk/pre_increment.pass.cpp         | 2 ++
 .../istreambuf.iterator_proxy/proxy.pass.cpp                    | 2 ++
 .../stream.iterators/iterator.range/begin_array.pass.cpp        | 2 ++
 .../stream.iterators/iterator.range/begin_const.pass.cpp        | 2 ++
 .../stream.iterators/iterator.range/begin_non_const.pass.cpp    | 2 ++
 .../stream.iterators/iterator.range/end_array.pass.cpp          | 2 ++
 .../stream.iterators/iterator.range/end_const.pass.cpp          | 2 ++
 .../stream.iterators/iterator.range/end_non_const.pass.cpp      | 2 ++
 .../ostream.iterator/ostream.iterator.cons.des/copy.pass.cpp    | 2 ++
 .../ostream.iterator/ostream.iterator.cons.des/ostream.pass.cpp | 2 ++
 .../ostream.iterator.cons.des/ostream_delim.pass.cpp            | 2 ++
 .../ostream.iterator/ostream.iterator.ops/dereference.pass.cpp  | 2 ++
 .../ostream.iterator/ostream.iterator.ops/increment.pass.cpp    | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.cons/ostream.pass.cpp   | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.cons/streambuf.pass.cpp | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.ops/assign_c.pass.cpp   | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.ops/deref.pass.cpp      | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp     | 2 ++
 .../ostreambuf.iterator/ostreambuf.iter.ops/increment.pass.cpp  | 2 ++
 .../std/language.support/cstdint/cstdint.syn/cstdint.pass.cpp   | 2 ++
 .../support.dynamic/alloc.errors/bad.alloc/bad_alloc.pass.cpp   | 2 ++
 .../alloc.errors/new.badlength/bad_array_new_length.pass.cpp    | 2 ++
 .../alloc.errors/new.handler/new_handler.pass.cpp               | 2 ++
 .../alloc.errors/set.new.handler/get_new_handler.pass.cpp       | 2 ++
 .../alloc.errors/set.new.handler/set_new_handler.pass.cpp       | 2 ++
 .../new.delete/new.delete.placement/new.pass.cpp                | 2 ++
 .../new.delete/new.delete.placement/new_array.pass.cpp          | 2 ++
 .../support.exception/bad.exception/bad_exception.pass.cpp      | 2 ++
 .../support.exception/except.nested/rethrow_nested.pass.cpp     | 2 ++
 .../exception.terminate/set.terminate/get_terminate.pass.cpp    | 2 ++
 .../exception.terminate/set.terminate/set_terminate.pass.cpp    | 2 ++
 .../terminate.handler/terminate_handler.pass.cpp                | 2 ++
 .../exception.terminate/terminate/terminate.pass.cpp            | 2 ++
 .../support.exception/exception/exception.pass.cpp              | 2 ++
 .../support.exception/propagation/current_exception.pass.cpp    | 2 ++
 .../support.exception/propagation/exception_ptr.pass.cpp        | 2 ++
 .../support.exception/propagation/make_exception_ptr.pass.cpp   | 2 ++
 .../support.exception/propagation/rethrow_exception.pass.cpp    | 2 ++
 .../support.exception/uncaught/uncaught_exception.pass.cpp      | 2 ++
 .../support.exception/uncaught/uncaught_exceptions.pass.cpp     | 2 ++
 .../language.support/support.initlist/include_cxx03.pass.cpp    | 2 ++
 .../test/std/language.support/support.initlist/types.pass.cpp   | 2 ++
 .../language.support/support.limits/c.limits/climits.pass.cpp   | 2 ++
 .../support.limits/limits/denorm.style/check_values.pass.cpp    | 2 ++
 .../support.limits/limits/is_specialized.pass.cpp               | 2 ++
 .../support.limits/limits/numeric.limits/default.pass.cpp       | 2 ++
 .../support.limits/limits/round.style/check_values.pass.cpp     | 2 ++
 .../test/std/language.support/support.limits/version.pass.cpp   | 2 ++
 .../language.support/support.rtti/bad.cast/bad_cast.pass.cpp    | 2 ++
 .../support.rtti/bad.typeid/bad_typeid.pass.cpp                 | 2 ++
 .../language.support/support.rtti/type.info/type_info.pass.cpp  | 2 ++
 .../support.rtti/type.info/type_info_hash.pass.cpp              | 2 ++
 .../test/std/language.support/support.runtime/csetjmp.pass.cpp  | 2 ++
 .../test/std/language.support/support.runtime/csignal.pass.cpp  | 2 ++
 .../test/std/language.support/support.runtime/cstdbool.pass.cpp | 2 ++
 .../std/language.support/support.start.term/quick_exit.pass.cpp | 2 ++
 libcxx/test/std/language.support/support.types/null.pass.cpp    | 2 ++
 .../support.types/nullptr_t_integral_cast.pass.cpp              | 2 ++
 .../test/std/language.support/support.types/ptrdiff_t.pass.cpp  | 2 ++
 libcxx/test/std/language.support/support.types/size_t.pass.cpp  | 2 ++
 libcxx/test/std/localization/c.locales/clocale.pass.cpp         | 2 ++
 .../category.collate/locale.collate.byname/compare.pass.cpp     | 1 +
 .../category.collate/locale.collate.byname/hash.pass.cpp        | 1 +
 .../category.collate/locale.collate.byname/transform.pass.cpp   | 1 +
 .../category.collate/locale.collate.byname/types.pass.cpp       | 1 +
 .../category.collate/locale.collate/ctor.pass.cpp               | 2 ++
 .../locale.collate/locale.collate.members/compare.pass.cpp      | 2 ++
 .../locale.collate/locale.collate.members/hash.pass.cpp         | 2 ++
 .../locale.collate/locale.collate.members/transform.pass.cpp    | 2 ++
 .../category.collate/locale.collate/types.pass.cpp              | 2 ++
 .../locale.categories/category.ctype/ctype_base.pass.cpp        | 2 ++
 .../facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp     | 2 ++
 .../facet.ctype.special/facet.ctype.char.members/ctor.pass.cpp  | 2 ++
 .../facet.ctype.special/facet.ctype.char.members/is_1.pass.cpp  | 2 ++
 .../facet.ctype.char.members/is_many.pass.cpp                   | 2 ++
 .../facet.ctype.char.members/narrow_1.pass.cpp                  | 2 ++
 .../facet.ctype.char.members/narrow_many.pass.cpp               | 2 ++
 .../facet.ctype.char.members/scan_is.pass.cpp                   | 2 ++
 .../facet.ctype.char.members/scan_not.pass.cpp                  | 2 ++
 .../facet.ctype.special/facet.ctype.char.members/table.pass.cpp | 2 ++
 .../facet.ctype.char.members/tolower_1.pass.cpp                 | 2 ++
 .../facet.ctype.char.members/tolower_many.pass.cpp              | 2 ++
 .../facet.ctype.char.members/toupper_1.pass.cpp                 | 2 ++
 .../facet.ctype.char.members/toupper_many.pass.cpp              | 2 ++
 .../facet.ctype.char.members/widen_1.pass.cpp                   | 2 ++
 .../facet.ctype.char.members/widen_many.pass.cpp                | 2 ++
 .../facet.ctype.char.statics/classic_table.pass.cpp             | 2 ++
 .../category.ctype/facet.ctype.special/types.pass.cpp           | 2 ++
 .../category.ctype/locale.codecvt.byname/ctor_char.pass.cpp     | 1 +
 .../category.ctype/locale.codecvt.byname/ctor_char16_t.pass.cpp | 2 ++
 .../category.ctype/locale.codecvt.byname/ctor_char32_t.pass.cpp | 2 ++
 .../category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp  | 1 +
 .../category.ctype/locale.codecvt/codecvt_base.pass.cpp         | 2 ++
 .../category.ctype/locale.codecvt/ctor_char.pass.cpp            | 2 ++
 .../category.ctype/locale.codecvt/ctor_char16_t.pass.cpp        | 2 ++
 .../category.ctype/locale.codecvt/ctor_char32_t.pass.cpp        | 2 ++
 .../category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp         | 2 ++
 .../locale.codecvt.members/char16_t_always_noconv.pass.cpp      | 2 ++
 .../locale.codecvt.members/char16_t_encoding.pass.cpp           | 2 ++
 .../locale.codecvt/locale.codecvt.members/char16_t_in.pass.cpp  | 2 ++
 .../locale.codecvt.members/char16_t_length.pass.cpp             | 2 ++
 .../locale.codecvt.members/char16_t_max_length.pass.cpp         | 2 ++
 .../locale.codecvt/locale.codecvt.members/char16_t_out.pass.cpp | 2 ++
 .../locale.codecvt.members/char16_t_unshift.pass.cpp            | 2 ++
 .../locale.codecvt.members/char32_t_always_noconv.pass.cpp      | 2 ++
 .../locale.codecvt.members/char32_t_encoding.pass.cpp           | 2 ++
 .../locale.codecvt/locale.codecvt.members/char32_t_in.pass.cpp  | 2 ++
 .../locale.codecvt.members/char32_t_length.pass.cpp             | 2 ++
 .../locale.codecvt.members/char32_t_max_length.pass.cpp         | 2 ++
 .../locale.codecvt/locale.codecvt.members/char32_t_out.pass.cpp | 2 ++
 .../locale.codecvt.members/char32_t_unshift.pass.cpp            | 2 ++
 .../locale.codecvt.members/char_always_noconv.pass.cpp          | 2 ++
 .../locale.codecvt.members/char_encoding.pass.cpp               | 2 ++
 .../locale.codecvt/locale.codecvt.members/char_in.pass.cpp      | 2 ++
 .../locale.codecvt/locale.codecvt.members/char_length.pass.cpp  | 2 ++
 .../locale.codecvt.members/char_max_length.pass.cpp             | 2 ++
 .../locale.codecvt/locale.codecvt.members/char_out.pass.cpp     | 2 ++
 .../locale.codecvt/locale.codecvt.members/char_unshift.pass.cpp | 2 ++
 .../locale.codecvt.members/utf_sanity_check.pass.cpp            | 2 ++
 .../locale.codecvt.members/wchar_t_always_noconv.pass.cpp       | 2 ++
 .../locale.codecvt.members/wchar_t_encoding.pass.cpp            | 2 ++
 .../locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp   | 2 ++
 .../locale.codecvt.members/wchar_t_length.pass.cpp              | 2 ++
 .../locale.codecvt.members/wchar_t_max_length.pass.cpp          | 2 ++
 .../locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp  | 2 ++
 .../locale.codecvt.members/wchar_t_unshift.pass.cpp             | 2 ++
 .../category.ctype/locale.codecvt/types_char.pass.cpp           | 2 ++
 .../category.ctype/locale.codecvt/types_char16_t.pass.cpp       | 2 ++
 .../category.ctype/locale.codecvt/types_char32_t.pass.cpp       | 2 ++
 .../category.ctype/locale.codecvt/types_wchar_t.pass.cpp        | 2 ++
 .../category.ctype/locale.ctype.byname/is_1.pass.cpp            | 1 +
 .../category.ctype/locale.ctype.byname/is_many.pass.cpp         | 1 +
 .../category.ctype/locale.ctype.byname/mask.pass.cpp            | 2 ++
 .../category.ctype/locale.ctype.byname/narrow_1.pass.cpp        | 1 +
 .../category.ctype/locale.ctype.byname/narrow_many.pass.cpp     | 1 +
 .../category.ctype/locale.ctype.byname/scan_is.pass.cpp         | 1 +
 .../category.ctype/locale.ctype.byname/scan_not.pass.cpp        | 1 +
 .../category.ctype/locale.ctype.byname/tolower_1.pass.cpp       | 1 +
 .../category.ctype/locale.ctype.byname/tolower_many.pass.cpp    | 1 +
 .../category.ctype/locale.ctype.byname/toupper_1.pass.cpp       | 1 +
 .../category.ctype/locale.ctype.byname/toupper_many.pass.cpp    | 1 +
 .../category.ctype/locale.ctype.byname/types.pass.cpp           | 1 +
 .../category.ctype/locale.ctype.byname/widen_1.pass.cpp         | 1 +
 .../category.ctype/locale.ctype.byname/widen_many.pass.cpp      | 1 +
 .../locale.categories/category.ctype/locale.ctype/ctor.pass.cpp | 2 ++
 .../locale.ctype/locale.ctype.members/is_1.pass.cpp             | 2 ++
 .../locale.ctype/locale.ctype.members/is_many.pass.cpp          | 2 ++
 .../locale.ctype/locale.ctype.members/narrow_1.pass.cpp         | 2 ++
 .../locale.ctype/locale.ctype.members/narrow_many.pass.cpp      | 2 ++
 .../locale.ctype/locale.ctype.members/scan_is.pass.cpp          | 2 ++
 .../locale.ctype/locale.ctype.members/scan_not.pass.cpp         | 2 ++
 .../locale.ctype/locale.ctype.members/tolower_1.pass.cpp        | 2 ++
 .../locale.ctype/locale.ctype.members/tolower_many.pass.cpp     | 2 ++
 .../locale.ctype/locale.ctype.members/toupper_1.pass.cpp        | 2 ++
 .../locale.ctype/locale.ctype.members/toupper_many.pass.cpp     | 2 ++
 .../locale.ctype/locale.ctype.members/widen_1.pass.cpp          | 2 ++
 .../locale.ctype/locale.ctype.members/widen_many.pass.cpp       | 2 ++
 .../category.ctype/locale.ctype/types.pass.cpp                  | 2 ++
 .../category.messages/locale.messages/ctor.pass.cpp             | 2 ++
 .../locale.messages.members/not_testable.pass.cpp               | 2 ++
 .../category.messages/locale.messages/messages_base.pass.cpp    | 2 ++
 .../category.messages/locale.messages/types.pass.cpp            | 2 ++
 .../category.monetary/locale.money.get/ctor.pass.cpp            | 2 ++
 .../locale.money.get.members/get_long_double_en_US.pass.cpp     | 1 +
 .../locale.money.get.members/get_long_double_ru_RU.pass.cpp     | 1 +
 .../locale.money.get.members/get_long_double_zh_CN.pass.cpp     | 1 +
 .../locale.money.get.members/get_string_en_US.pass.cpp          | 1 +
 .../category.monetary/locale.money.get/types.pass.cpp           | 2 ++
 .../category.monetary/locale.money.put/ctor.pass.cpp            | 2 ++
 .../locale.money.put.members/put_long_double_en_US.pass.cpp     | 1 +
 .../locale.money.put.members/put_long_double_ru_RU.pass.cpp     | 1 +
 .../locale.money.put.members/put_long_double_zh_CN.pass.cpp     | 1 +
 .../locale.money.put.members/put_string_en_US.pass.cpp          | 1 +
 .../category.monetary/locale.money.put/types.pass.cpp           | 2 ++
 .../locale.moneypunct.byname/frac_digits.pass.cpp               | 1 +
 .../locale.moneypunct.byname/grouping.pass.cpp                  | 1 +
 .../locale.moneypunct.byname/neg_format.pass.cpp                | 1 +
 .../locale.moneypunct.byname/negative_sign.pass.cpp             | 1 +
 .../locale.moneypunct.byname/pos_format.pass.cpp                | 1 +
 .../locale.moneypunct.byname/positive_sign.pass.cpp             | 1 +
 .../category.monetary/locale.moneypunct/ctor.pass.cpp           | 2 ++
 .../locale.moneypunct.members/curr_symbol.pass.cpp              | 2 ++
 .../locale.moneypunct.members/decimal_point.pass.cpp            | 2 ++
 .../locale.moneypunct.members/frac_digits.pass.cpp              | 2 ++
 .../locale.moneypunct.members/grouping.pass.cpp                 | 2 ++
 .../locale.moneypunct.members/neg_format.pass.cpp               | 2 ++
 .../locale.moneypunct.members/negative_sign.pass.cpp            | 2 ++
 .../locale.moneypunct.members/pos_format.pass.cpp               | 2 ++
 .../locale.moneypunct.members/positive_sign.pass.cpp            | 2 ++
 .../locale.moneypunct.members/thousands_sep.pass.cpp            | 2 ++
 .../category.monetary/locale.moneypunct/money_base.pass.cpp     | 2 ++
 .../category.monetary/locale.moneypunct/types.pass.cpp          | 2 ++
 .../category.numeric/locale.nm.put/ctor.pass.cpp                | 2 ++
 .../locale.nm.put/facet.num.put.members/put_bool.pass.cpp       | 1 +
 .../locale.nm.put/facet.num.put.members/put_double.pass.cpp     | 1 +
 .../locale.nm.put/facet.num.put.members/put_long.pass.cpp       | 1 +
 .../facet.num.put.members/put_long_double.pass.cpp              | 1 +
 .../locale.nm.put/facet.num.put.members/put_long_long.pass.cpp  | 1 +
 .../locale.nm.put/facet.num.put.members/put_pointer.pass.cpp    | 1 +
 .../facet.num.put.members/put_unsigned_long.pass.cpp            | 1 +
 .../facet.num.put.members/put_unsigned_long_long.pass.cpp       | 1 +
 .../category.numeric/locale.nm.put/types.pass.cpp               | 2 ++
 .../category.numeric/locale.num.get/ctor.pass.cpp               | 2 ++
 .../locale.num.get/facet.num.get.members/get_bool.pass.cpp      | 1 +
 .../locale.num.get/facet.num.get.members/get_double.pass.cpp    | 1 +
 .../locale.num.get/facet.num.get.members/get_float.pass.cpp     | 1 +
 .../locale.num.get/facet.num.get.members/get_long.pass.cpp      | 1 +
 .../facet.num.get.members/get_long_double.pass.cpp              | 1 +
 .../locale.num.get/facet.num.get.members/get_long_long.pass.cpp | 1 +
 .../locale.num.get/facet.num.get.members/get_pointer.pass.cpp   | 1 +
 .../facet.num.get.members/get_unsigned_int.pass.cpp             | 1 +
 .../facet.num.get.members/get_unsigned_long.pass.cpp            | 1 +
 .../facet.num.get.members/get_unsigned_long_long.pass.cpp       | 1 +
 .../facet.num.get.members/get_unsigned_short.pass.cpp           | 1 +
 .../locale.num.get/facet.num.get.members/test_min_max.pass.cpp  | 2 ++
 .../category.numeric/locale.num.get/types.pass.cpp              | 2 ++
 .../category.time/locale.time.get.byname/date_order.pass.cpp    | 1 +
 .../locale.time.get.byname/date_order_wide.pass.cpp             | 1 +
 .../category.time/locale.time.get.byname/get_date.pass.cpp      | 1 +
 .../category.time/locale.time.get.byname/get_date_wide.pass.cpp | 1 +
 .../category.time/locale.time.get.byname/get_monthname.pass.cpp | 1 +
 .../locale.time.get.byname/get_monthname_wide.pass.cpp          | 1 +
 .../category.time/locale.time.get.byname/get_one.pass.cpp       | 1 +
 .../category.time/locale.time.get.byname/get_one_wide.pass.cpp  | 1 +
 .../category.time/locale.time.get.byname/get_time.pass.cpp      | 1 +
 .../category.time/locale.time.get.byname/get_time_wide.pass.cpp | 1 +
 .../category.time/locale.time.get.byname/get_weekday.pass.cpp   | 1 +
 .../locale.time.get.byname/get_weekday_wide.pass.cpp            | 1 +
 .../category.time/locale.time.get.byname/get_year.pass.cpp      | 1 +
 .../category.time/locale.time.get.byname/get_year_wide.pass.cpp | 1 +
 .../category.time/locale.time.get/ctor.pass.cpp                 | 2 ++
 .../locale.time.get/locale.time.get.members/date_order.pass.cpp | 1 +
 .../locale.time.get/locale.time.get.members/get_date.pass.cpp   | 1 +
 .../locale.time.get.members/get_date_wide.pass.cpp              | 1 +
 .../locale.time.get/locale.time.get.members/get_many.pass.cpp   | 1 +
 .../locale.time.get.members/get_monthname.pass.cpp              | 1 +
 .../locale.time.get.members/get_monthname_wide.pass.cpp         | 1 +
 .../locale.time.get/locale.time.get.members/get_one.pass.cpp    | 1 +
 .../locale.time.get/locale.time.get.members/get_time.pass.cpp   | 1 +
 .../locale.time.get.members/get_time_wide.pass.cpp              | 1 +
 .../locale.time.get.members/get_weekday.pass.cpp                | 1 +
 .../locale.time.get.members/get_weekday_wide.pass.cpp           | 1 +
 .../locale.time.get/locale.time.get.members/get_year.pass.cpp   | 1 +
 .../category.time/locale.time.get/time_base.pass.cpp            | 2 ++
 .../category.time/locale.time.get/types.pass.cpp                | 2 ++
 .../category.time/locale.time.put.byname/put1.pass.cpp          | 1 +
 .../category.time/locale.time.put/ctor.pass.cpp                 | 2 ++
 .../locale.time.put/locale.time.put.members/put1.pass.cpp       | 1 +
 .../locale.time.put/locale.time.put.members/put2.pass.cpp       | 1 +
 .../category.time/locale.time.put/types.pass.cpp                | 2 ++
 .../locale.numpunct.byname/decimal_point.pass.cpp               | 1 +
 .../facet.numpunct/locale.numpunct/ctor.pass.cpp                | 2 ++
 .../facet.numpunct.members/decimal_point.pass.cpp               | 2 ++
 .../locale.numpunct/facet.numpunct.members/falsename.pass.cpp   | 2 ++
 .../locale.numpunct/facet.numpunct.members/grouping.pass.cpp    | 2 ++
 .../facet.numpunct.members/thousands_sep.pass.cpp               | 2 ++
 .../locale.numpunct/facet.numpunct.members/truename.pass.cpp    | 2 ++
 .../facet.numpunct/locale.numpunct/types.pass.cpp               | 2 ++
 .../test/std/localization/locale.stdcvt/codecvt_mode.pass.cpp   | 2 ++
 .../test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp  | 2 ++
 .../locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp          | 2 ++
 .../localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp  | 2 ++
 .../std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp    | 2 ++
 .../localization/locale.stdcvt/codecvt_utf16_length.pass.cpp    | 2 ++
 .../locale.stdcvt/codecvt_utf16_max_length.pass.cpp             | 2 ++
 .../std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp   | 2 ++
 .../localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp   | 2 ++
 .../test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp   | 2 ++
 .../locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp           | 2 ++
 .../localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp   | 2 ++
 .../std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp     | 2 ++
 .../std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp | 2 ++
 .../localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp | 2 ++
 .../std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp    | 2 ++
 .../localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp    | 2 ++
 .../locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp     | 2 ++
 .../locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp          | 2 ++
 .../localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp   | 2 ++
 .../locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp            | 2 ++
 .../locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp        | 2 ++
 .../localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp  | 2 ++
 .../locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp           | 2 ++
 .../locales/locale.convenience/classification/isalnum.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isalpha.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/iscntrl.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isdigit.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isgraph.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/islower.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isprint.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/ispunct.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isspace.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isupper.pass.cpp  | 2 ++
 .../locales/locale.convenience/classification/isxdigit.pass.cpp | 2 ++
 .../conversions/conversions.buffer/overflow.pass.cpp            | 2 ++
 .../conversions/conversions.buffer/pbackfail.pass.cpp           | 2 ++
 .../conversions/conversions.buffer/rdbuf.pass.cpp               | 2 ++
 .../conversions/conversions.buffer/seekoff.pass.cpp             | 2 ++
 .../conversions/conversions.buffer/state.pass.cpp               | 2 ++
 .../conversions/conversions.buffer/test.pass.cpp                | 2 ++
 .../conversions/conversions.buffer/underflow.pass.cpp           | 2 ++
 .../conversions/conversions.character/tolower.pass.cpp          | 2 ++
 .../conversions/conversions.character/toupper.pass.cpp          | 2 ++
 .../conversions/conversions.string/converted.pass.cpp           | 2 ++
 .../conversions/conversions.string/ctor_codecvt_state.pass.cpp  | 2 ++
 .../conversions/conversions.string/ctor_copy.pass.cpp           | 2 ++
 .../conversions/conversions.string/from_bytes.pass.cpp          | 2 ++
 .../conversions/conversions.string/state.pass.cpp               | 2 ++
 .../conversions/conversions.string/to_bytes.pass.cpp            | 2 ++
 .../conversions/conversions.string/types.pass.cpp               | 2 ++
 .../locales/locale.global.templates/has_facet.pass.cpp          | 2 ++
 .../std/localization/locales/locale/locale.cons/assign.pass.cpp | 1 +
 .../std/localization/locales/locale/locale.cons/copy.pass.cpp   | 1 +
 .../localization/locales/locale/locale.cons/default.pass.cpp    | 1 +
 .../locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp | 1 +
 .../locales/locale/locale.cons/locale_facetptr.pass.cpp         | 1 +
 .../locales/locale/locale.cons/locale_locale_cat.pass.cpp       | 1 +
 .../locales/locale/locale.cons/locale_string_cat.pass.cpp       | 1 +
 .../std/localization/locales/locale/locale.cons/string.pass.cpp | 1 +
 .../localization/locales/locale/locale.members/name.pass.cpp    | 1 +
 .../locales/locale/locale.operators/compare.pass.cpp            | 2 ++
 .../localization/locales/locale/locale.operators/eq.pass.cpp    | 1 +
 .../localization/locales/locale/locale.statics/classic.pass.cpp | 2 ++
 .../localization/locales/locale/locale.statics/global.pass.cpp  | 1 +
 .../locale/locale.types/locale.category/category.pass.cpp       | 2 ++
 libcxx/test/std/numerics/c.math/ctgmath.pass.cpp                | 2 ++
 libcxx/test/std/numerics/c.math/tgmath_h.pass.cpp               | 2 ++
 libcxx/test/std/numerics/cfenv/cfenv.syn/cfenv.pass.cpp         | 2 ++
 .../test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp   | 2 ++
 libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp | 1 +
 .../test/std/numerics/complex.number/cmplx.over/conj.pass.cpp   | 1 +
 .../test/std/numerics/complex.number/cmplx.over/norm.pass.cpp   | 1 +
 libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp | 1 +
 .../test/std/numerics/complex.number/cmplx.over/proj.pass.cpp   | 1 +
 .../numerics/complex.number/complex.literals/literals1.pass.cpp | 2 ++
 .../numerics/complex.number/complex.literals/literals2.pass.cpp | 2 ++
 .../complex.member.ops/assignment_complex.pass.cpp              | 2 ++
 .../complex.member.ops/assignment_scalar.pass.cpp               | 2 ++
 .../complex.member.ops/divide_equal_complex.pass.cpp            | 2 ++
 .../complex.member.ops/divide_equal_scalar.pass.cpp             | 2 ++
 .../complex.member.ops/minus_equal_complex.pass.cpp             | 2 ++
 .../complex.member.ops/minus_equal_scalar.pass.cpp              | 2 ++
 .../complex.member.ops/plus_equal_complex.pass.cpp              | 2 ++
 .../complex.member.ops/plus_equal_scalar.pass.cpp               | 2 ++
 .../complex.member.ops/times_equal_complex.pass.cpp             | 2 ++
 .../complex.member.ops/times_equal_scalar.pass.cpp              | 2 ++
 .../complex.number/complex.ops/complex_divide_complex.pass.cpp  | 1 +
 .../complex.number/complex.ops/complex_divide_scalar.pass.cpp   | 2 ++
 .../complex.number/complex.ops/complex_minus_complex.pass.cpp   | 2 ++
 .../complex.number/complex.ops/complex_minus_scalar.pass.cpp    | 2 ++
 .../complex.number/complex.ops/complex_plus_complex.pass.cpp    | 2 ++
 .../complex.number/complex.ops/complex_plus_scalar.pass.cpp     | 2 ++
 .../complex.number/complex.ops/complex_times_complex.pass.cpp   | 1 +
 .../complex.number/complex.ops/complex_times_scalar.pass.cpp    | 2 ++
 .../complex.number/complex.ops/scalar_divide_complex.pass.cpp   | 2 ++
 .../complex.number/complex.ops/scalar_minus_complex.pass.cpp    | 2 ++
 .../complex.number/complex.ops/scalar_plus_complex.pass.cpp     | 2 ++
 .../complex.number/complex.ops/scalar_times_complex.pass.cpp    | 2 ++
 .../numerics/complex.number/complex.ops/stream_input.pass.cpp   | 2 ++
 .../numerics/complex.number/complex.ops/stream_output.pass.cpp  | 2 ++
 .../numerics/complex.number/complex.ops/unary_minus.pass.cpp    | 2 ++
 .../std/numerics/complex.number/complex.ops/unary_plus.pass.cpp | 2 ++
 .../complex.number/complex.transcendentals/acos.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/acosh.pass.cpp       | 1 +
 .../complex.number/complex.transcendentals/asin.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/asinh.pass.cpp       | 1 +
 .../complex.number/complex.transcendentals/atan.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/atanh.pass.cpp       | 1 +
 .../complex.number/complex.transcendentals/cos.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/cosh.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/exp.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/log.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/log10.pass.cpp       | 1 +
 .../complex.transcendentals/pow_complex_complex.pass.cpp        | 1 +
 .../complex.transcendentals/pow_complex_scalar.pass.cpp         | 1 +
 .../complex.transcendentals/pow_scalar_complex.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/sin.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/sinh.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/sqrt.pass.cpp        | 1 +
 .../complex.number/complex.transcendentals/tan.pass.cpp         | 1 +
 .../complex.number/complex.transcendentals/tanh.pass.cpp        | 1 +
 .../std/numerics/complex.number/complex.value.ops/abs.pass.cpp  | 1 +
 .../std/numerics/complex.number/complex.value.ops/arg.pass.cpp  | 1 +
 .../std/numerics/complex.number/complex.value.ops/conj.pass.cpp | 2 ++
 .../std/numerics/complex.number/complex.value.ops/imag.pass.cpp | 2 ++
 .../std/numerics/complex.number/complex.value.ops/norm.pass.cpp | 1 +
 .../numerics/complex.number/complex.value.ops/polar.pass.cpp    | 1 +
 .../std/numerics/complex.number/complex.value.ops/proj.pass.cpp | 1 +
 .../std/numerics/complex.number/complex.value.ops/real.pass.cpp | 2 ++
 libcxx/test/std/numerics/complex.number/complex/types.pass.cpp  | 2 ++
 libcxx/test/std/numerics/complex.number/layout.pass.cpp         | 2 ++
 .../numerics/numarray/class.gslice/gslice.cons/default.pass.cpp | 2 ++
 .../class.gslice/gslice.cons/start_size_stride.pass.cpp         | 2 ++
 .../numerics/numarray/class.slice/cons.slice/default.pass.cpp   | 2 ++
 .../numarray/class.slice/cons.slice/start_size_stride.pass.cpp  | 2 ++
 .../gslice.array.assign/gslice_array.pass.cpp                   | 2 ++
 .../template.gslice.array/gslice.array.assign/valarray.pass.cpp | 2 ++
 .../gslice.array.comp.assign/addition.pass.cpp                  | 2 ++
 .../template.gslice.array/gslice.array.comp.assign/and.pass.cpp | 2 ++
 .../gslice.array.comp.assign/divide.pass.cpp                    | 2 ++
 .../gslice.array.comp.assign/modulo.pass.cpp                    | 2 ++
 .../gslice.array.comp.assign/multiply.pass.cpp                  | 2 ++
 .../template.gslice.array/gslice.array.comp.assign/or.pass.cpp  | 2 ++
 .../gslice.array.comp.assign/shift_left.pass.cpp                | 2 ++
 .../gslice.array.comp.assign/shift_right.pass.cpp               | 2 ++
 .../gslice.array.comp.assign/subtraction.pass.cpp               | 2 ++
 .../template.gslice.array/gslice.array.comp.assign/xor.pass.cpp | 2 ++
 .../gslice.array.fill/assign_value.pass.cpp                     | 2 ++
 .../std/numerics/numarray/template.gslice.array/types.pass.cpp  | 2 ++
 .../indirect.array.assign/indirect_array.pass.cpp               | 2 ++
 .../indirect.array.assign/valarray.pass.cpp                     | 2 ++
 .../indirect.array.comp.assign/addition.pass.cpp                | 2 ++
 .../indirect.array.comp.assign/and.pass.cpp                     | 2 ++
 .../indirect.array.comp.assign/divide.pass.cpp                  | 2 ++
 .../indirect.array.comp.assign/modulo.pass.cpp                  | 2 ++
 .../indirect.array.comp.assign/multiply.pass.cpp                | 2 ++
 .../indirect.array.comp.assign/or.pass.cpp                      | 2 ++
 .../indirect.array.comp.assign/shift_left.pass.cpp              | 2 ++
 .../indirect.array.comp.assign/shift_right.pass.cpp             | 2 ++
 .../indirect.array.comp.assign/subtraction.pass.cpp             | 2 ++
 .../indirect.array.comp.assign/xor.pass.cpp                     | 2 ++
 .../indirect.array.fill/assign_value.pass.cpp                   | 2 ++
 .../numerics/numarray/template.indirect.array/types.pass.cpp    | 2 ++
 .../template.mask.array/mask.array.assign/mask_array.pass.cpp   | 2 ++
 .../template.mask.array/mask.array.assign/valarray.pass.cpp     | 2 ++
 .../mask.array.comp.assign/addition.pass.cpp                    | 2 ++
 .../template.mask.array/mask.array.comp.assign/and.pass.cpp     | 2 ++
 .../template.mask.array/mask.array.comp.assign/divide.pass.cpp  | 2 ++
 .../template.mask.array/mask.array.comp.assign/modulo.pass.cpp  | 2 ++
 .../mask.array.comp.assign/multiply.pass.cpp                    | 2 ++
 .../template.mask.array/mask.array.comp.assign/or.pass.cpp      | 2 ++
 .../mask.array.comp.assign/shift_left.pass.cpp                  | 2 ++
 .../mask.array.comp.assign/shift_right.pass.cpp                 | 2 ++
 .../mask.array.comp.assign/subtraction.pass.cpp                 | 2 ++
 .../template.mask.array/mask.array.comp.assign/xor.pass.cpp     | 2 ++
 .../template.mask.array/mask.array.fill/assign_value.pass.cpp   | 2 ++
 .../std/numerics/numarray/template.mask.array/types.pass.cpp    | 2 ++
 .../template.slice.array/slice.arr.assign/slice_array.pass.cpp  | 2 ++
 .../template.slice.array/slice.arr.assign/valarray.pass.cpp     | 2 ++
 .../slice.arr.comp.assign/addition.pass.cpp                     | 2 ++
 .../template.slice.array/slice.arr.comp.assign/and.pass.cpp     | 2 ++
 .../template.slice.array/slice.arr.comp.assign/divide.pass.cpp  | 2 ++
 .../template.slice.array/slice.arr.comp.assign/modulo.pass.cpp  | 2 ++
 .../slice.arr.comp.assign/multiply.pass.cpp                     | 2 ++
 .../template.slice.array/slice.arr.comp.assign/or.pass.cpp      | 2 ++
 .../slice.arr.comp.assign/shift_left.pass.cpp                   | 2 ++
 .../slice.arr.comp.assign/shift_right.pass.cpp                  | 2 ++
 .../slice.arr.comp.assign/subtraction.pass.cpp                  | 2 ++
 .../template.slice.array/slice.arr.comp.assign/xor.pass.cpp     | 2 ++
 .../template.slice.array/slice.arr.fill/assign_value.pass.cpp   | 2 ++
 .../std/numerics/numarray/template.slice.array/types.pass.cpp   | 2 ++
 .../test/std/numerics/numarray/template.valarray/types.pass.cpp | 2 ++
 .../numarray/template.valarray/valarray.access/access.pass.cpp  | 2 ++
 .../template.valarray/valarray.access/const_access.pass.cpp     | 2 ++
 .../template.valarray/valarray.assign/copy_assign.pass.cpp      | 2 ++
 .../valarray.assign/gslice_array_assign.pass.cpp                | 2 ++
 .../valarray.assign/indirect_array_assign.pass.cpp              | 2 ++
 .../valarray.assign/initializer_list_assign.pass.cpp            | 2 ++
 .../valarray.assign/mask_array_assign.pass.cpp                  | 2 ++
 .../template.valarray/valarray.assign/move_assign.pass.cpp      | 2 ++
 .../valarray.assign/slice_array_assign.pass.cpp                 | 2 ++
 .../template.valarray/valarray.assign/value_assign.pass.cpp     | 2 ++
 .../template.valarray/valarray.cassign/and_valarray.pass.cpp    | 2 ++
 .../template.valarray/valarray.cassign/and_value.pass.cpp       | 2 ++
 .../template.valarray/valarray.cassign/divide_valarray.pass.cpp | 2 ++
 .../template.valarray/valarray.cassign/divide_value.pass.cpp    | 2 ++
 .../template.valarray/valarray.cassign/minus_valarray.pass.cpp  | 2 ++
 .../template.valarray/valarray.cassign/minus_value.pass.cpp     | 2 ++
 .../template.valarray/valarray.cassign/modulo_valarray.pass.cpp | 2 ++
 .../template.valarray/valarray.cassign/modulo_value.pass.cpp    | 2 ++
 .../template.valarray/valarray.cassign/or_valarray.pass.cpp     | 2 ++
 .../template.valarray/valarray.cassign/or_value.pass.cpp        | 2 ++
 .../template.valarray/valarray.cassign/plus_valarray.pass.cpp   | 2 ++
 .../template.valarray/valarray.cassign/plus_value.pass.cpp      | 2 ++
 .../valarray.cassign/shift_left_valarray.pass.cpp               | 2 ++
 .../valarray.cassign/shift_left_value.pass.cpp                  | 2 ++
 .../valarray.cassign/shift_right_valarray.pass.cpp              | 2 ++
 .../valarray.cassign/shift_right_value.pass.cpp                 | 2 ++
 .../template.valarray/valarray.cassign/times_valarray.pass.cpp  | 2 ++
 .../template.valarray/valarray.cassign/times_value.pass.cpp     | 2 ++
 .../template.valarray/valarray.cassign/xor_valarray.pass.cpp    | 2 ++
 .../template.valarray/valarray.cassign/xor_value.pass.cpp       | 2 ++
 .../numarray/template.valarray/valarray.cons/copy.pass.cpp      | 2 ++
 .../numarray/template.valarray/valarray.cons/default.pass.cpp   | 2 ++
 .../template.valarray/valarray.cons/gslice_array.pass.cpp       | 2 ++
 .../template.valarray/valarray.cons/indirect_array.pass.cpp     | 2 ++
 .../template.valarray/valarray.cons/initializer_list.pass.cpp   | 2 ++
 .../template.valarray/valarray.cons/mask_array.pass.cpp         | 2 ++
 .../numarray/template.valarray/valarray.cons/move.pass.cpp      | 2 ++
 .../template.valarray/valarray.cons/pointer_size.pass.cpp       | 2 ++
 .../numarray/template.valarray/valarray.cons/size.pass.cpp      | 2 ++
 .../template.valarray/valarray.cons/slice_array.pass.cpp        | 2 ++
 .../template.valarray/valarray.cons/value_size.pass.cpp         | 2 ++
 .../template.valarray/valarray.members/apply_cref.pass.cpp      | 2 ++
 .../template.valarray/valarray.members/apply_value.pass.cpp     | 2 ++
 .../numarray/template.valarray/valarray.members/cshift.pass.cpp | 2 ++
 .../numarray/template.valarray/valarray.members/max.pass.cpp    | 2 ++
 .../numarray/template.valarray/valarray.members/min.pass.cpp    | 2 ++
 .../numarray/template.valarray/valarray.members/resize.pass.cpp | 2 ++
 .../numarray/template.valarray/valarray.members/shift.pass.cpp  | 2 ++
 .../numarray/template.valarray/valarray.members/size.pass.cpp   | 2 ++
 .../numarray/template.valarray/valarray.members/sum.pass.cpp    | 2 ++
 .../numarray/template.valarray/valarray.members/swap.pass.cpp   | 2 ++
 .../template.valarray/valarray.sub/gslice_const.pass.cpp        | 2 ++
 .../template.valarray/valarray.sub/gslice_non_const.pass.cpp    | 2 ++
 .../valarray.sub/indirect_array_const.pass.cpp                  | 2 ++
 .../valarray.sub/indirect_array_non_const.pass.cpp              | 2 ++
 .../template.valarray/valarray.sub/slice_const.pass.cpp         | 2 ++
 .../template.valarray/valarray.sub/slice_non_const.pass.cpp     | 2 ++
 .../template.valarray/valarray.sub/valarray_bool_const.pass.cpp | 2 ++
 .../valarray.sub/valarray_bool_non_const.pass.cpp               | 2 ++
 .../numarray/template.valarray/valarray.unary/bit_not.pass.cpp  | 2 ++
 .../numarray/template.valarray/valarray.unary/negate.pass.cpp   | 2 ++
 .../numarray/template.valarray/valarray.unary/not.pass.cpp      | 2 ++
 .../numarray/template.valarray/valarray.unary/plus.pass.cpp     | 2 ++
 .../valarray.binary/and_valarray_valarray.pass.cpp              | 2 ++
 .../valarray.binary/and_valarray_value.pass.cpp                 | 2 ++
 .../valarray.binary/and_value_valarray.pass.cpp                 | 2 ++
 .../valarray.binary/divide_valarray_valarray.pass.cpp           | 2 ++
 .../valarray.binary/divide_valarray_value.pass.cpp              | 2 ++
 .../valarray.binary/divide_value_valarray.pass.cpp              | 2 ++
 .../valarray.binary/minus_valarray_valarray.pass.cpp            | 2 ++
 .../valarray.binary/minus_valarray_value.pass.cpp               | 2 ++
 .../valarray.binary/minus_value_valarray.pass.cpp               | 2 ++
 .../valarray.binary/modulo_valarray_valarray.pass.cpp           | 2 ++
 .../valarray.binary/modulo_valarray_value.pass.cpp              | 2 ++
 .../valarray.binary/modulo_value_valarray.pass.cpp              | 2 ++
 .../valarray.binary/or_valarray_valarray.pass.cpp               | 2 ++
 .../valarray.binary/or_valarray_value.pass.cpp                  | 2 ++
 .../valarray.binary/or_value_valarray.pass.cpp                  | 2 ++
 .../valarray.binary/plus_valarray_valarray.pass.cpp             | 2 ++
 .../valarray.binary/plus_valarray_value.pass.cpp                | 2 ++
 .../valarray.binary/plus_value_valarray.pass.cpp                | 2 ++
 .../valarray.binary/shift_left_valarray_valarray.pass.cpp       | 2 ++
 .../valarray.binary/shift_left_valarray_value.pass.cpp          | 2 ++
 .../valarray.binary/shift_left_value_valarray.pass.cpp          | 2 ++
 .../valarray.binary/shift_right_valarray_valarray.pass.cpp      | 2 ++
 .../valarray.binary/shift_right_valarray_value.pass.cpp         | 2 ++
 .../valarray.binary/shift_right_value_valarray.pass.cpp         | 2 ++
 .../valarray.binary/times_valarray_valarray.pass.cpp            | 2 ++
 .../valarray.binary/times_valarray_value.pass.cpp               | 2 ++
 .../valarray.binary/times_value_valarray.pass.cpp               | 2 ++
 .../valarray.binary/xor_valarray_valarray.pass.cpp              | 2 ++
 .../valarray.binary/xor_valarray_value.pass.cpp                 | 2 ++
 .../valarray.binary/xor_value_valarray.pass.cpp                 | 2 ++
 .../valarray.comparison/and_valarray_valarray.pass.cpp          | 2 ++
 .../valarray.comparison/and_valarray_value.pass.cpp             | 2 ++
 .../valarray.comparison/and_value_valarray.pass.cpp             | 2 ++
 .../valarray.comparison/equal_valarray_valarray.pass.cpp        | 2 ++
 .../valarray.comparison/equal_valarray_value.pass.cpp           | 2 ++
 .../valarray.comparison/equal_value_valarray.pass.cpp           | 2 ++
 .../greater_equal_valarray_valarray.pass.cpp                    | 2 ++
 .../valarray.comparison/greater_equal_valarray_value.pass.cpp   | 2 ++
 .../valarray.comparison/greater_equal_value_valarray.pass.cpp   | 2 ++
 .../valarray.comparison/greater_valarray_valarray.pass.cpp      | 2 ++
 .../valarray.comparison/greater_valarray_value.pass.cpp         | 2 ++
 .../valarray.comparison/greater_value_valarray.pass.cpp         | 2 ++
 .../valarray.comparison/less_equal_valarray_valarray.pass.cpp   | 2 ++
 .../valarray.comparison/less_equal_valarray_value.pass.cpp      | 2 ++
 .../valarray.comparison/less_equal_value_valarray.pass.cpp      | 2 ++
 .../valarray.comparison/less_valarray_valarray.pass.cpp         | 2 ++
 .../valarray.comparison/less_valarray_value.pass.cpp            | 2 ++
 .../valarray.comparison/less_value_valarray.pass.cpp            | 2 ++
 .../valarray.comparison/not_equal_valarray_valarray.pass.cpp    | 2 ++
 .../valarray.comparison/not_equal_valarray_value.pass.cpp       | 2 ++
 .../valarray.comparison/not_equal_value_valarray.pass.cpp       | 2 ++
 .../valarray.comparison/or_valarray_valarray.pass.cpp           | 2 ++
 .../valarray.comparison/or_valarray_value.pass.cpp              | 2 ++
 .../valarray.comparison/or_value_valarray.pass.cpp              | 2 ++
 .../numarray/valarray.nonmembers/valarray.special/swap.pass.cpp | 2 ++
 .../valarray.transcend/abs_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/acos_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/asin_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/atan2_valarray_valarray.pass.cpp         | 2 ++
 .../valarray.transcend/atan2_valarray_value.pass.cpp            | 2 ++
 .../valarray.transcend/atan2_value_valarray.pass.cpp            | 2 ++
 .../valarray.transcend/atan_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/cos_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/cosh_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/exp_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/log10_valarray.pass.cpp                  | 2 ++
 .../valarray.transcend/log_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/pow_valarray_valarray.pass.cpp           | 2 ++
 .../valarray.transcend/pow_valarray_value.pass.cpp              | 2 ++
 .../valarray.transcend/pow_value_valarray.pass.cpp              | 2 ++
 .../valarray.transcend/sin_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/sinh_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/sqrt_valarray.pass.cpp                   | 2 ++
 .../valarray.transcend/tan_valarray.pass.cpp                    | 2 ++
 .../valarray.transcend/tanh_valarray.pass.cpp                   | 2 ++
 .../std/numerics/numarray/valarray.range/begin_const.pass.cpp   | 2 ++
 .../numerics/numarray/valarray.range/begin_non_const.pass.cpp   | 2 ++
 .../std/numerics/numarray/valarray.range/end_const.pass.cpp     | 2 ++
 .../std/numerics/numarray/valarray.range/end_non_const.pass.cpp | 2 ++
 .../std/numerics/numeric.ops/accumulate/accumulate.pass.cpp     | 1 +
 .../std/numerics/numeric.ops/accumulate/accumulate_op.pass.cpp  | 1 +
 .../numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp | 1 +
 .../numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp  | 1 +
 .../numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp | 1 +
 .../numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp       | 1 +
 .../numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp  | 1 +
 .../numerics/numeric.ops/inner.product/inner_product.pass.cpp   | 1 +
 .../numeric.ops/inner.product/inner_product_comp.pass.cpp       | 1 +
 libcxx/test/std/numerics/numeric.ops/numeric.iota/iota.pass.cpp | 1 +
 .../test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp  | 2 ++
 .../std/numerics/numeric.ops/partial.sum/partial_sum.pass.cpp   | 1 +
 .../numerics/numeric.ops/partial.sum/partial_sum_op.pass.cpp    | 1 +
 libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp     | 1 +
 .../test/std/numerics/numeric.ops/reduce/reduce_init.pass.cpp   | 1 +
 .../std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp     | 1 +
 .../transform_exclusive_scan_init_bop_uop.pass.cpp              | 1 +
 .../transform_inclusive_scan_bop_uop.pass.cpp                   | 1 +
 .../transform_inclusive_scan_bop_uop_init.pass.cpp              | 1 +
 .../transform_reduce_iter_iter_init_bop_uop.pass.cpp            | 1 +
 .../transform_reduce_iter_iter_iter_init.pass.cpp               | 1 +
 .../transform_reduce_iter_iter_iter_init_op_op.pass.cpp         | 1 +
 .../numerics/rand/rand.adapt/rand.adapt.disc/assign.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.disc/copy.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.disc/ctor_engine_copy.pass.cpp   | 2 ++
 .../rand/rand.adapt/rand.adapt.disc/ctor_engine_move.pass.cpp   | 2 ++
 .../rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.disc/default.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.disc/discard.pass.cpp   | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.disc/eval.pass.cpp  | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp    | 2 ++
 .../rand/rand.adapt/rand.adapt.disc/result_type.pass.cpp        | 2 ++
 .../rand/rand.adapt/rand.adapt.disc/seed_result_type.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.disc/seed_sseq.pass.cpp | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.ibits/assign.pass.cpp   | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.ibits/copy.pass.cpp | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/ctor_engine_copy.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/ctor_engine_move.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp         | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.ibits/default.pass.cpp  | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.ibits/discard.pass.cpp  | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp   | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/seed_result_type.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.ibits/seed_sseq.pass.cpp         | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.shuf/assign.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.shuf/copy.pass.cpp  | 2 ++
 .../rand/rand.adapt/rand.adapt.shuf/ctor_engine_copy.pass.cpp   | 2 ++
 .../rand/rand.adapt/rand.adapt.shuf/ctor_engine_move.pass.cpp   | 2 ++
 .../rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.shuf/default.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.shuf/discard.pass.cpp   | 2 ++
 .../std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp    | 2 ++
 .../rand/rand.adapt/rand.adapt.shuf/seed_result_type.pass.cpp   | 2 ++
 .../numerics/rand/rand.adapt/rand.adapt.shuf/seed_sseq.pass.cpp | 2 ++
 libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp      | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp       | 2 ++
 .../rand.dist.bern.bernoulli/ctor_double.pass.cpp               | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp         | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp  | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp         | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp        | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp        | 2 ++
 .../rand.dist.bern.bernoulli/param_assign.pass.cpp              | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp   | 2 ++
 .../rand.dist.bern.bernoulli/param_types.pass.cpp               | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp  | 2 ++
 .../rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp      | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp  | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp       | 2 ++
 .../rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp        | 2 ++
 .../rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp         | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp      | 2 ++
 .../rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp      | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp       | 2 ++
 .../rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp        | 2 ++
 .../rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp       | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp         | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp      | 2 ++
 .../rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp   | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp | 2 ++
 .../rand.dist.bern.negbin/ctor_int_double.pass.cpp              | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp  | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp  | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp    | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp      | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp   | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp     | 2 ++
 .../rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp | 2 ++
 .../rand.dist.norm.cauchy/ctor_double_double.pass.cpp           | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp      | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp   | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp      | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp   | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp   | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp       | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp      | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp    | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp         | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp  | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp       | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp       | 2 ++
 .../rand.dist.norm.lognormal/ctor_double_double.pass.cpp        | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp       | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp        | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp        | 2 ++
 .../rand.dist.norm.lognormal/param_assign.pass.cpp              | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp   | 2 ++
 .../rand.dist.norm.lognormal/param_types.pass.cpp               | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp      | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp | 2 ++
 .../rand.dist.norm.normal/ctor_double_double.pass.cpp           | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp    | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp      | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp   | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp     | 2 ++
 .../rand.dist.norm/rand.dist.norm.normal/types.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp    | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp        | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp         | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp  | 2 ++
 .../rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp       | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp         | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp  | 2 ++
 .../rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp | 2 ++
 .../rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp      | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp       | 2 ++
 .../rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp        | 2 ++
 .../rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp         | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp      | 2 ++
 .../rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp         | 2 ++
 .../rand.dist.pois.extreme/ctor_double_double.pass.cpp          | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp         | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp        | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp  | 2 ++
 .../rand.dist.pois.gamma/ctor_double_double.pass.cpp            | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp      | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp      | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp         | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp         | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp        | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp       | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp         | 2 ++
 .../rand.dist.pois.weibull/ctor_double_double.pass.cpp          | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp         | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp | 2 ++
 .../rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp   | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp     | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp  | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp    | 2 ++
 .../rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp        | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp      | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp        | 2 ++
 .../rand.dist.samp.discrete/ctor_default.pass.cpp               | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp   | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp   | 2 ++
 .../rand.dist.samp.discrete/ctor_iterator.pass.cpp              | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp        | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp         | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp         | 2 ++
 .../rand.dist.samp.discrete/param_assign.pass.cpp               | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp  | 2 ++
 .../rand.dist.samp.discrete/param_ctor_default.pass.cpp         | 2 ++
 .../rand.dist.samp.discrete/param_ctor_func.pass.cpp            | 2 ++
 .../rand.dist.samp.discrete/param_ctor_init.pass.cpp            | 2 ++
 .../rand.dist.samp.discrete/param_ctor_iterator.pass.cpp        | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp    | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp   | 2 ++
 .../rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp       | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp     | 2 ++
 .../rand.dist.samp.pconst/ctor_init_func.pass.cpp               | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp    | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp    | 2 ++
 .../rand.dist.samp.pconst/param_ctor_default.pass.cpp           | 2 ++
 .../rand.dist.samp.pconst/param_ctor_func.pass.cpp              | 2 ++
 .../rand.dist.samp.pconst/param_ctor_init_func.pass.cpp         | 2 ++
 .../rand.dist.samp.pconst/param_ctor_iterator.pass.cpp          | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp      | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp   | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp     | 2 ++
 .../rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp         | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp       | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp         | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp    | 2 ++
 .../rand.dist.samp.plinear/ctor_init_func.pass.cpp              | 2 ++
 .../rand.dist.samp.plinear/ctor_iterator.pass.cpp               | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp         | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp   | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp    | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp | 2 ++
 .../rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp   | 2 ++
 .../rand.dist.samp.plinear/param_ctor_default.pass.cpp          | 2 ++
 .../rand.dist.samp.plinear/param_ctor_func.pass.cpp             | 2 ++
 .../rand.dist.samp.plinear/param_ctor_init_func.pass.cpp        | 2 ++
 .../rand.dist.samp.plinear/param_ctor_iterator.pass.cpp         | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp     | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp  | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp    | 2 ++
 .../rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp    | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp       | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp         | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp   | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp  | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp  | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp       | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp         | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp  | 2 ++
 .../rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp     | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp   | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp     | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/ctor_int_int.pass.cpp      | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp        | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp  | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp     | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp        | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp         | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp  | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp | 2 ++
 .../rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp      | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp        | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp        | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp       | 2 ++
 .../rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp         | 2 ++
 .../rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.lcong/assign.pass.cpp   | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.lcong/copy.pass.cpp     | 2 ++
 .../rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp      | 2 ++
 .../numerics/rand/rand.eng/rand.eng.lcong/ctor_sseq.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.lcong/default.pass.cpp  | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.lcong/discard.pass.cpp  | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.lcong/eval.pass.cpp     | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp  | 2 ++
 .../numerics/rand/rand.eng/rand.eng.lcong/result_type.pass.cpp  | 2 ++
 .../rand/rand.eng/rand.eng.lcong/seed_result_type.pass.cpp      | 2 ++
 .../numerics/rand/rand.eng/rand.eng.lcong/seed_sseq.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.mers/assign.pass.cpp    | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.mers/copy.pass.cpp | 2 ++
 .../rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp       | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp | 2 ++
 .../rand/rand.eng/rand.eng.mers/ctor_sseq_all_zero.pass.cpp     | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.mers/default.pass.cpp   | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.mers/discard.pass.cpp   | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.mers/eval.pass.cpp | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp   | 2 ++
 .../numerics/rand/rand.eng/rand.eng.mers/result_type.pass.cpp   | 2 ++
 .../rand/rand.eng/rand.eng.mers/seed_result_type.pass.cpp       | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.mers/seed_sseq.pass.cpp | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.sub/assign.pass.cpp     | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.sub/copy.pass.cpp  | 2 ++
 .../rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp        | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp  | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.sub/default.pass.cpp    | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.sub/discard.pass.cpp    | 2 ++
 .../test/std/numerics/rand/rand.eng/rand.eng.sub/eval.pass.cpp  | 2 ++
 libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp | 2 ++
 .../numerics/rand/rand.eng/rand.eng.sub/result_type.pass.cpp    | 2 ++
 .../rand/rand.eng/rand.eng.sub/seed_result_type.pass.cpp        | 2 ++
 .../std/numerics/rand/rand.eng/rand.eng.sub/seed_sseq.pass.cpp  | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/knuth_b.pass.cpp      | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/minstd_rand.pass.cpp  | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/minstd_rand0.pass.cpp | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/mt19937.pass.cpp      | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/mt19937_64.pass.cpp   | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/ranlux24.pass.cpp     | 2 ++
 .../test/std/numerics/rand/rand.predef/ranlux24_base.pass.cpp   | 2 ++
 libcxx/test/std/numerics/rand/rand.predef/ranlux48.pass.cpp     | 2 ++
 .../test/std/numerics/rand/rand.predef/ranlux48_base.pass.cpp   | 2 ++
 .../rand.util/rand.util.canonical/generate_canonical.pass.cpp   | 1 +
 .../numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp  | 2 ++
 .../numerics/rand/rand.util/rand.util.seedseq/generate.pass.cpp | 2 ++
 .../rand/rand.util/rand.util.seedseq/initializer_list.pass.cpp  | 2 ++
 .../numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp | 2 ++
 .../numerics/rand/rand.util/rand.util.seedseq/types.pass.cpp    | 2 ++
 .../re/re.alg/re.alg.match/inverted_character_classes.pass.cpp  | 2 ++
 .../std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp | 2 ++
 .../test/std/re/re.regex/re.regex.construct/ptr_size.pass.cpp   | 2 ++
 .../test/std/strings/basic.string.hash/enabled_hashes.pass.cpp  | 2 ++
 libcxx/test/std/strings/basic.string.literals/literal1.pass.cpp | 2 ++
 libcxx/test/std/strings/basic.string.literals/literal2.pass.cpp | 2 ++
 libcxx/test/std/strings/basic.string.literals/literal3.pass.cpp | 2 ++
 .../test/std/strings/basic.string/string.access/back.pass.cpp   | 1 +
 .../std/strings/basic.string/string.access/db_back.pass.cpp     | 1 +
 .../std/strings/basic.string/string.access/db_cback.pass.cpp    | 1 +
 .../std/strings/basic.string/string.access/db_cfront.pass.cpp   | 1 +
 .../std/strings/basic.string/string.access/db_cindex.pass.cpp   | 1 +
 .../std/strings/basic.string/string.access/db_front.pass.cpp    | 1 +
 .../std/strings/basic.string/string.access/db_index.pass.cpp    | 1 +
 .../test/std/strings/basic.string/string.access/front.pass.cpp  | 1 +
 .../test/std/strings/basic.string/string.access/index.pass.cpp  | 1 +
 .../std/strings/basic.string/string.capacity/clear.pass.cpp     | 1 +
 .../std/strings/basic.string/string.capacity/length.pass.cpp    | 1 +
 .../std/strings/basic.string/string.capacity/max_size.pass.cpp  | 1 +
 .../strings/basic.string/string.capacity/over_max_size.pass.cpp | 1 +
 .../test/std/strings/basic.string/string.capacity/size.pass.cpp | 1 +
 .../strings/basic.string/string.cons/initializer_list.pass.cpp  | 1 +
 .../string.cons/initializer_list_assignment.pass.cpp            | 1 +
 .../std/strings/basic.string/string.iterators/begin.pass.cpp    | 1 +
 .../std/strings/basic.string/string.iterators/cbegin.pass.cpp   | 1 +
 .../std/strings/basic.string/string.iterators/cend.pass.cpp     | 1 +
 .../std/strings/basic.string/string.iterators/crbegin.pass.cpp  | 1 +
 .../std/strings/basic.string/string.iterators/crend.pass.cpp    | 1 +
 .../basic.string/string.iterators/db_iterators_2.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_3.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_4.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_5.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_6.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_7.pass.cpp       | 1 +
 .../basic.string/string.iterators/db_iterators_8.pass.cpp       | 1 +
 .../test/std/strings/basic.string/string.iterators/end.pass.cpp | 1 +
 .../strings/basic.string/string.iterators/iterators.pass.cpp    | 2 ++
 .../std/strings/basic.string/string.iterators/rbegin.pass.cpp   | 1 +
 .../std/strings/basic.string/string.iterators/rend.pass.cpp     | 1 +
 .../string.modifiers/string_append/iterator.pass.cpp            | 1 +
 .../string_insert/iter_initializer_list.pass.cpp                | 1 +
 .../string.modifiers/string_insert/iter_iter_iter.pass.cpp      | 1 +
 .../string_op_plus_equal/initializer_list.pass.cpp              | 1 +
 .../string_replace/iter_iter_initializer_list.pass.cpp          | 1 +
 .../string_replace/iter_iter_iter_iter.pass.cpp                 | 1 +
 .../string.modifiers/string_replace/iter_iter_pointer.pass.cpp  | 1 +
 .../string.nonmembers/string.io/get_line_delim_rv.pass.cpp      | 1 +
 .../string.nonmembers/string.io/get_line_rv.pass.cpp            | 1 +
 .../string.nonmembers/string.io/stream_insert.pass.cpp          | 1 +
 .../string.nonmembers/string_op!=/pointer_string.pass.cpp       | 1 +
 .../string.nonmembers/string_op!=/string_pointer.pass.cpp       | 1 +
 .../string.nonmembers/string_op!=/string_string.pass.cpp        | 1 +
 .../string.nonmembers/string_op!=/string_string_view.pass.cpp   | 1 +
 .../string.nonmembers/string_op!=/string_view_string.pass.cpp   | 1 +
 .../string.nonmembers/string_operator==/pointer_string.pass.cpp | 1 +
 .../string.nonmembers/string_operator==/string_pointer.pass.cpp | 1 +
 .../string.nonmembers/string_operator==/string_string.pass.cpp  | 1 +
 .../string_operator==/string_string_view.pass.cpp               | 1 +
 .../string_operator==/string_view_string.pass.cpp               | 1 +
 .../string.nonmembers/string_opgt/pointer_string.pass.cpp       | 1 +
 .../string.nonmembers/string_opgt/string_pointer.pass.cpp       | 1 +
 .../string.nonmembers/string_opgt/string_string.pass.cpp        | 1 +
 .../string.nonmembers/string_opgt/string_string_view.pass.cpp   | 1 +
 .../string.nonmembers/string_opgt/string_view_string.pass.cpp   | 1 +
 .../string.nonmembers/string_opgt=/pointer_string.pass.cpp      | 1 +
 .../string.nonmembers/string_opgt=/string_pointer.pass.cpp      | 1 +
 .../string.nonmembers/string_opgt=/string_string.pass.cpp       | 1 +
 .../string.nonmembers/string_opgt=/string_string_view.pass.cpp  | 1 +
 .../string.nonmembers/string_opgt=/string_view_string.pass.cpp  | 1 +
 .../string.nonmembers/string_oplt/pointer_string.pass.cpp       | 1 +
 .../string.nonmembers/string_oplt/string_pointer.pass.cpp       | 1 +
 .../string.nonmembers/string_oplt/string_string.pass.cpp        | 1 +
 .../string.nonmembers/string_oplt/string_string_view.pass.cpp   | 1 +
 .../string.nonmembers/string_oplt/string_view_string.pass.cpp   | 1 +
 .../string.nonmembers/string_oplt=/pointer_string.pass.cpp      | 1 +
 .../string.nonmembers/string_oplt=/string_pointer.pass.cpp      | 1 +
 .../string.nonmembers/string_oplt=/string_string.pass.cpp       | 1 +
 .../string.nonmembers/string_oplt=/string_string_view.pass.cpp  | 1 +
 .../string.nonmembers/string_oplt=/string_view_string.pass.cpp  | 1 +
 .../basic.string/string.ops/string.accessors/c_str.pass.cpp     | 1 +
 .../string.ops/string.accessors/get_allocator.pass.cpp          | 1 +
 .../basic.string/string.ops/string_compare/pointer.pass.cpp     | 1 +
 .../basic.string/string.ops/string_compare/string_view.pass.cpp | 1 +
 .../string.ops/string_find.first.not.of/char_size.pass.cpp      | 1 +
 .../string.ops/string_find.first.not.of/pointer_size.pass.cpp   | 1 +
 .../string_find.first.not.of/pointer_size_size.pass.cpp         | 1 +
 .../string_find.first.not.of/string_view_size.pass.cpp          | 1 +
 .../string.ops/string_find.first.of/char_size.pass.cpp          | 1 +
 .../string.ops/string_find.first.of/pointer_size.pass.cpp       | 1 +
 .../string.ops/string_find.first.of/pointer_size_size.pass.cpp  | 1 +
 .../string.ops/string_find.first.of/string_view_size.pass.cpp   | 1 +
 .../string.ops/string_find.last.not.of/char_size.pass.cpp       | 1 +
 .../string.ops/string_find.last.not.of/pointer_size.pass.cpp    | 1 +
 .../string_find.last.not.of/pointer_size_size.pass.cpp          | 1 +
 .../string_find.last.not.of/string_view_size.pass.cpp           | 1 +
 .../string.ops/string_find.last.of/char_size.pass.cpp           | 1 +
 .../string.ops/string_find.last.of/pointer_size.pass.cpp        | 1 +
 .../string.ops/string_find.last.of/pointer_size_size.pass.cpp   | 1 +
 .../string.ops/string_find.last.of/string_view_size.pass.cpp    | 1 +
 .../basic.string/string.ops/string_find/char_size.pass.cpp      | 1 +
 .../basic.string/string.ops/string_find/pointer_size.pass.cpp   | 1 +
 .../string.ops/string_find/pointer_size_size.pass.cpp           | 1 +
 .../string.ops/string_find/string_view_size.pass.cpp            | 1 +
 .../basic.string/string.ops/string_rfind/char_size.pass.cpp     | 1 +
 .../basic.string/string.ops/string_rfind/pointer_size.pass.cpp  | 1 +
 .../string.ops/string_rfind/pointer_size_size.pass.cpp          | 1 +
 .../string.ops/string_rfind/string_view_size.pass.cpp           | 1 +
 .../std/strings/basic.string/string.require/contiguous.pass.cpp | 1 +
 libcxx/test/std/strings/basic.string/types.pass.cpp             | 1 +
 .../char.traits.specializations.char/assign3.pass.cpp           | 2 ++
 .../char.traits.specializations.char/copy.pass.cpp              | 2 ++
 .../char.traits.specializations.char/eof.pass.cpp               | 2 ++
 .../char.traits.specializations.char/eq.pass.cpp                | 2 ++
 .../char.traits.specializations.char/eq_int_type.pass.cpp       | 2 ++
 .../char.traits.specializations.char/lt.pass.cpp                | 2 ++
 .../char.traits.specializations.char/move.pass.cpp              | 2 ++
 .../char.traits.specializations.char/not_eof.pass.cpp           | 2 ++
 .../char.traits.specializations.char/to_char_type.pass.cpp      | 2 ++
 .../char.traits.specializations.char/to_int_type.pass.cpp       | 2 ++
 .../char.traits.specializations.char/types.pass.cpp             | 2 ++
 .../char.traits.specializations.char16_t/assign3.pass.cpp       | 2 ++
 .../char.traits.specializations.char16_t/copy.pass.cpp          | 2 ++
 .../char.traits.specializations.char16_t/eof.pass.cpp           | 2 ++
 .../char.traits.specializations.char16_t/move.pass.cpp          | 2 ++
 .../char.traits.specializations.char16_t/types.pass.cpp         | 2 ++
 .../char.traits.specializations.char32_t/assign3.pass.cpp       | 2 ++
 .../char.traits.specializations.char32_t/copy.pass.cpp          | 2 ++
 .../char.traits.specializations.char32_t/eof.pass.cpp           | 2 ++
 .../char.traits.specializations.char32_t/move.pass.cpp          | 2 ++
 .../char.traits.specializations.char32_t/types.pass.cpp         | 2 ++
 .../char.traits.specializations.char8_t/assign3.pass.cpp        | 2 ++
 .../char.traits.specializations.char8_t/copy.pass.cpp           | 2 ++
 .../char.traits.specializations.char8_t/eof.pass.cpp            | 2 ++
 .../char.traits.specializations.char8_t/move.pass.cpp           | 2 ++
 .../char.traits.specializations.char8_t/types.pass.cpp          | 2 ++
 .../char.traits.specializations.wchar.t/assign3.pass.cpp        | 2 ++
 .../char.traits.specializations.wchar.t/copy.pass.cpp           | 2 ++
 .../char.traits.specializations.wchar.t/eof.pass.cpp            | 2 ++
 .../char.traits.specializations.wchar.t/eq.pass.cpp             | 2 ++
 .../char.traits.specializations.wchar.t/eq_int_type.pass.cpp    | 2 ++
 .../char.traits.specializations.wchar.t/lt.pass.cpp             | 2 ++
 .../char.traits.specializations.wchar.t/move.pass.cpp           | 2 ++
 .../char.traits.specializations.wchar.t/not_eof.pass.cpp        | 2 ++
 .../char.traits.specializations.wchar.t/to_char_type.pass.cpp   | 2 ++
 .../char.traits.specializations.wchar.t/to_int_type.pass.cpp    | 2 ++
 .../char.traits.specializations.wchar.t/types.pass.cpp          | 2 ++
 libcxx/test/std/strings/string.conversions/to_string.pass.cpp   | 2 ++
 libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp  | 2 ++
 .../string.view.comparison/opeq.string_view.string.pass.cpp     | 2 ++
 .../string.view.comparison/opge.string_view.string.pass.cpp     | 2 ++
 .../string.view.comparison/opgt.string_view.string.pass.cpp     | 2 ++
 .../string.view.comparison/ople.string_view.string.pass.cpp     | 2 ++
 .../string.view.comparison/oplt.string_view.string.pass.cpp     | 2 ++
 .../string.view.comparison/opne.string_view.string.pass.cpp     | 2 ++
 .../find_first_not_of_string_view_size.pass.cpp                 | 2 ++
 .../string.view.find/find_first_of_string_view_size.pass.cpp    | 2 ++
 .../string.view.find/find_last_not_of_string_view_size.pass.cpp | 2 ++
 .../string.view.find/find_last_of_string_view_size.pass.cpp     | 2 ++
 .../string.view/string.view.hash/enabled_hashes.pass.cpp        | 2 ++
 .../strings/string.view/string.view.io/stream_insert.pass.cpp   | 2 ++
 .../string.view/string.view.synop/nothing_to_do.pass.cpp        | 2 ++
 .../string.view/string.view.template/nothing_to_do.pass.cpp     | 2 ++
 .../strings/string.view/string_view.literals/literal1.pass.cpp  | 2 ++
 .../strings/string.view/string_view.literals/literal2.pass.cpp  | 2 ++
 .../strings/string.view/string_view.literals/literal3.pass.cpp  | 2 ++
 .../std/thread/futures/futures.async/async_race.38682.pass.cpp  | 2 ++
 .../test/std/thread/futures/futures.async/async_race.pass.cpp   | 2 ++
 .../futures/futures.errors/default_error_condition.pass.cpp     | 2 ++
 .../futures/futures.errors/equivalent_error_code_int.pass.cpp   | 2 ++
 .../futures.errors/equivalent_int_error_condition.pass.cpp      | 2 ++
 .../std/thread/futures/futures.errors/future_category.pass.cpp  | 2 ++
 .../std/thread/futures/futures.errors/make_error_code.pass.cpp  | 2 ++
 .../thread/futures/futures.errors/make_error_condition.pass.cpp | 2 ++
 .../test/std/thread/futures/futures.future_error/types.pass.cpp | 2 ++
 .../std/thread/futures/futures.overview/future_errc.pass.cpp    | 2 ++
 .../std/thread/futures/futures.overview/future_status.pass.cpp  | 2 ++
 .../test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp | 1 +
 libcxx/test/std/thread/futures/futures.promise/default.pass.cpp | 2 ++
 .../std/thread/futures/futures.promise/set_exception.pass.cpp   | 2 ++
 .../futures.promise/set_exception_at_thread_exit.pass.cpp       | 2 ++
 .../futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp  | 2 ++
 .../test/std/thread/futures/futures.promise/set_rvalue.pass.cpp | 2 ++
 .../futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp  | 2 ++
 .../futures.promise/set_value_at_thread_exit_const.pass.cpp     | 2 ++
 .../futures.promise/set_value_at_thread_exit_void.pass.cpp      | 2 ++
 .../std/thread/futures/futures.promise/set_value_void.pass.cpp  | 2 ++
 libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp    | 1 +
 .../std/thread/futures/futures.promise/uses_allocator.pass.cpp  | 1 +
 .../thread/futures/futures.shared_future/ctor_future.pass.cpp   | 2 ++
 .../std/thread/futures/futures.shared_future/default.pass.cpp   | 2 ++
 .../test/std/thread/futures/futures.shared_future/dtor.pass.cpp | 1 +
 .../thread/futures/futures.shared_future/move_assign.pass.cpp   | 2 ++
 .../std/thread/futures/futures.shared_future/move_ctor.pass.cpp | 2 ++
 .../test/std/thread/futures/futures.shared_future/wait.pass.cpp | 2 ++
 .../std/thread/futures/futures.shared_future/wait_for.pass.cpp  | 2 ++
 .../thread/futures/futures.shared_future/wait_until.pass.cpp    | 2 ++
 .../futures.task/futures.task.members/assign_move.pass.cpp      | 2 ++
 .../futures.task/futures.task.members/ctor_default.pass.cpp     | 2 ++
 .../futures.task/futures.task.members/ctor_func.pass.cpp        | 2 ++
 .../futures.task/futures.task.members/ctor_func_alloc.pass.cpp  | 1 +
 .../futures.task/futures.task.members/ctor_move.pass.cpp        | 2 ++
 .../futures/futures.task/futures.task.members/swap.pass.cpp     | 2 ++
 .../futures/futures.task/futures.task.nonmembers/swap.pass.cpp  | 2 ++
 .../futures.task.nonmembers/uses_allocator.pass.cpp             | 1 +
 .../std/thread/futures/futures.unique_future/default.pass.cpp   | 2 ++
 .../test/std/thread/futures/futures.unique_future/dtor.pass.cpp | 1 +
 .../thread/futures/futures.unique_future/move_assign.pass.cpp   | 2 ++
 .../std/thread/futures/futures.unique_future/move_ctor.pass.cpp | 2 ++
 .../std/thread/futures/futures.unique_future/share.pass.cpp     | 2 ++
 .../test/std/thread/futures/futures.unique_future/wait.pass.cpp | 2 ++
 .../std/thread/futures/futures.unique_future/wait_for.pass.cpp  | 2 ++
 .../thread/futures/futures.unique_future/wait_until.pass.cpp    | 2 ++
 libcxx/test/std/thread/macro.pass.cpp                           | 2 ++
 libcxx/test/std/thread/thread.condition/cv_status.pass.cpp      | 2 ++
 .../thread/thread.condition/notify_all_at_thread_exit.pass.cpp  | 2 ++
 .../thread.condition/thread.condition.condvar/default.pass.cpp  | 2 ++
 .../thread.condition.condvar/destructor.pass.cpp                | 2 ++
 .../thread.condition.condvar/notify_all.pass.cpp                | 2 ++
 .../thread.condition.condvar/notify_one.pass.cpp                | 2 ++
 .../thread.condition/thread.condition.condvar/wait.pass.cpp     | 2 ++
 .../thread.condition/thread.condition.condvar/wait_for.pass.cpp | 2 ++
 .../thread.condition.condvar/wait_for_pred.pass.cpp             | 2 ++
 .../thread.condition.condvar/wait_pred.pass.cpp                 | 2 ++
 .../thread.condition.condvar/wait_until.pass.cpp                | 2 ++
 .../thread.condition.condvar/wait_until_pred.pass.cpp           | 2 ++
 .../thread.condition.condvarany/default.pass.cpp                | 2 ++
 .../thread.condition.condvarany/destructor.pass.cpp             | 2 ++
 .../thread.condition.condvarany/notify_all.pass.cpp             | 2 ++
 .../thread.condition.condvarany/notify_one.pass.cpp             | 2 ++
 .../thread.condition/thread.condition.condvarany/wait.pass.cpp  | 2 ++
 .../thread.condition.condvarany/wait_for.pass.cpp               | 2 ++
 .../thread.condition.condvarany/wait_for_pred.pass.cpp          | 2 ++
 .../thread.condition.condvarany/wait_pred.pass.cpp              | 2 ++
 .../thread.condition.condvarany/wait_until.pass.cpp             | 2 ++
 .../thread.condition.condvarany/wait_until_pred.pass.cpp        | 2 ++
 .../thread.lock/thread.lock.guard/adopt_lock.pass.cpp           | 2 ++
 .../thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp   | 2 ++
 .../thread.lock.shared/thread.lock.shared.cons/default.pass.cpp | 2 ++
 .../thread.lock.shared.cons/move_assign.pass.cpp                | 2 ++
 .../thread.lock.shared.cons/move_ctor.pass.cpp                  | 2 ++
 .../thread.lock.shared.cons/mutex_adopt_lock.pass.cpp           | 2 ++
 .../thread.lock.shared.cons/mutex_defer_lock.pass.cpp           | 2 ++
 .../thread.lock.shared.cons/mutex_try_to_lock.pass.cpp          | 2 ++
 .../thread.lock.shared.mod/member_swap.pass.cpp                 | 2 ++
 .../thread.lock.shared.mod/nonmember_swap.pass.cpp              | 2 ++
 .../thread.lock.shared/thread.lock.shared.mod/release.pass.cpp  | 2 ++
 .../thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp    | 2 ++
 .../thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp  | 2 ++
 .../thread.lock.shared.obs/owns_lock.pass.cpp                   | 2 ++
 .../thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp  | 2 ++
 .../thread.lock.unique/thread.lock.unique.cons/default.pass.cpp | 2 ++
 .../thread.lock.unique.cons/move_assign.pass.cpp                | 2 ++
 .../thread.lock.unique.cons/move_ctor.pass.cpp                  | 2 ++
 .../thread.lock.unique.cons/mutex_adopt_lock.pass.cpp           | 2 ++
 .../thread.lock.unique.cons/mutex_defer_lock.pass.cpp           | 2 ++
 .../thread.lock.unique.cons/mutex_duration.pass.cpp             | 2 ++
 .../thread.lock.unique.cons/mutex_time_point.pass.cpp           | 2 ++
 .../thread.lock.unique.cons/mutex_try_to_lock.pass.cpp          | 2 ++
 .../thread.lock.unique.mod/member_swap.pass.cpp                 | 2 ++
 .../thread.lock.unique.mod/nonmember_swap.pass.cpp              | 2 ++
 .../thread.lock.unique/thread.lock.unique.mod/release.pass.cpp  | 2 ++
 .../thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp    | 2 ++
 .../thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp  | 2 ++
 .../thread.lock.unique.obs/owns_lock.pass.cpp                   | 2 ++
 .../thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp  | 2 ++
 libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp  | 2 ++
 .../thread.mutex.class/default.pass.cpp                         | 2 ++
 .../thread.mutex.class/lock.pass.cpp                            | 2 ++
 .../thread.mutex.class/try_lock.pass.cpp                        | 2 ++
 .../thread.mutex.recursive/default.pass.cpp                     | 2 ++
 .../thread.mutex.recursive/lock.pass.cpp                        | 2 ++
 .../thread.mutex.recursive/try_lock.pass.cpp                    | 2 ++
 .../thread.shared_mutex.class/default.pass.cpp                  | 2 ++
 .../thread.shared_mutex.class/try_lock.pass.cpp                 | 2 ++
 .../thread.shared_mutex.class/try_lock_shared.pass.cpp          | 2 ++
 .../thread.sharedtimedmutex.class/default.pass.cpp              | 2 ++
 .../thread.sharedtimedmutex.class/try_lock.pass.cpp             | 2 ++
 .../try_lock_until_deadlock_bug.pass.cpp                        | 2 ++
 .../thread.timedmutex.class/default.pass.cpp                    | 2 ++
 .../thread.timedmutex.class/lock.pass.cpp                       | 2 ++
 .../thread.timedmutex.class/try_lock.pass.cpp                   | 2 ++
 .../thread.timedmutex.class/try_lock_for.pass.cpp               | 2 ++
 .../thread.timedmutex.class/try_lock_until.pass.cpp             | 2 ++
 .../thread.timedmutex.recursive/default.pass.cpp                | 2 ++
 .../thread.timedmutex.recursive/lock.pass.cpp                   | 2 ++
 .../thread.timedmutex.recursive/try_lock.pass.cpp               | 2 ++
 .../thread.timedmutex.recursive/try_lock_for.pass.cpp           | 2 ++
 .../thread.timedmutex.recursive/try_lock_until.pass.cpp         | 2 ++
 .../thread.mutex/thread.once/thread.once.callonce/race.pass.cpp | 2 ++
 .../thread.thread.class/thread.thread.algorithm/swap.pass.cpp   | 2 ++
 .../thread.thread.class/thread.thread.assign/move.pass.cpp      | 2 ++
 .../thread.thread.class/thread.thread.assign/move2.pass.cpp     | 2 ++
 .../thread.thread.class/thread.thread.constr/default.pass.cpp   | 2 ++
 .../thread.thread.class/thread.thread.constr/move.pass.cpp      | 2 ++
 .../thread.thread.class/thread.thread.destr/dtor.pass.cpp       | 2 ++
 .../thread.thread.class/thread.thread.id/assign.pass.cpp        | 2 ++
 .../thread.thread.class/thread.thread.id/copy.pass.cpp          | 2 ++
 .../thread.thread.class/thread.thread.id/default.pass.cpp       | 2 ++
 .../thread.thread.id/enabled_hashes.pass.cpp                    | 2 ++
 .../thread.thread.class/thread.thread.id/eq.pass.cpp            | 2 ++
 .../thread.thread.class/thread.thread.id/lt.pass.cpp            | 2 ++
 .../thread.thread.class/thread.thread.id/stream.pass.cpp        | 2 ++
 .../thread.thread.class/thread.thread.member/get_id.pass.cpp    | 2 ++
 .../thread.thread.class/thread.thread.member/joinable.pass.cpp  | 2 ++
 .../thread.thread.class/thread.thread.member/swap.pass.cpp      | 2 ++
 .../thread.thread.static/hardware_concurrency.pass.cpp          | 2 ++
 .../thread/thread.threads/thread.thread.this/get_id.pass.cpp    | 2 ++
 .../thread.threads/thread.thread.this/sleep_until.pass.cpp      | 2 ++
 .../std/thread/thread.threads/thread.thread.this/yield.pass.cpp | 2 ++
 .../allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp   | 1 +
 .../allocator.adaptor.cnstr/converting_copy.pass.cpp            | 1 +
 .../allocator.adaptor.cnstr/converting_move.pass.cpp            | 1 +
 .../allocator.adaptor/allocator.adaptor.cnstr/copy.pass.cpp     | 1 +
 .../allocator.adaptor/allocator.adaptor.cnstr/default.pass.cpp  | 1 +
 .../allocator.adaptor.members/allocate_size.pass.cpp            | 1 +
 .../allocator.adaptor.members/allocate_size_hint.pass.cpp       | 1 +
 .../allocator.adaptor.members/construct.pass.cpp                | 1 +
 .../allocator.adaptor.members/construct_pair.pass.cpp           | 2 ++
 .../construct_pair_const_lvalue_pair.pass.cpp                   | 2 ++
 .../allocator.adaptor.members/construct_pair_piecewise.pass.cpp | 2 ++
 .../allocator.adaptor.members/construct_pair_rvalue.pass.cpp    | 2 ++
 .../allocator.adaptor.members/construct_pair_values.pass.cpp    | 2 ++
 .../allocator.adaptor.members/construct_type.pass.cpp           | 2 ++
 .../allocator.adaptor.members/deallocate.pass.cpp               | 1 +
 .../allocator.adaptor.members/destroy.pass.cpp                  | 1 +
 .../allocator.adaptor.members/inner_allocator.pass.cpp          | 1 +
 .../allocator.adaptor.members/max_size.pass.cpp                 | 1 +
 .../allocator.adaptor.members/outer_allocator.pass.cpp          | 1 +
 .../select_on_container_copy_construction.pass.cpp              | 1 +
 .../allocator.adaptor.types/allocator_pointers.pass.cpp         | 2 ++
 .../allocator.adaptor.types/inner_allocator_type.pass.cpp       | 1 +
 .../allocator.adaptor.types/is_always_equal.pass.cpp            | 1 +
 .../propagate_on_container_copy_assignment.pass.cpp             | 1 +
 .../propagate_on_container_move_assignment.pass.cpp             | 1 +
 .../propagate_on_container_swap.pass.cpp                        | 1 +
 .../scoped.adaptor.operators/copy_assign.pass.cpp               | 1 +
 .../allocator.adaptor/scoped.adaptor.operators/eq.pass.cpp      | 1 +
 .../scoped.adaptor.operators/move_assign.pass.cpp               | 1 +
 libcxx/test/std/utilities/allocator.adaptor/types.pass.cpp      | 1 +
 .../std/utilities/any/any.class/any.modifiers/reset.pass.cpp    | 1 +
 .../std/utilities/any/any.class/any.modifiers/swap.pass.cpp     | 1 +
 .../utilities/any/any.class/any.observers/has_value.pass.cpp    | 1 +
 .../test/std/utilities/any/any.class/not_literal_type.pass.cpp  | 2 ++
 libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp      | 2 ++
 .../utilities/charconv/charconv.from.chars/integral.pass.cpp    | 1 +
 .../std/utilities/charconv/charconv.to.chars/integral.pass.cpp  | 1 +
 .../function.objects/arithmetic.operations/transparent.pass.cpp | 2 ++
 .../func.bind.bind/PR23141_invoke_not_constexpr.pass.cpp        | 2 ++
 .../bind/func.bind/func.bind.bind/copy.pass.cpp                 | 2 ++
 .../func.bind/func.bind.bind/invoke_function_object.pass.cpp    | 2 ++
 .../bind/func.bind/func.bind.bind/invoke_int_0.pass.cpp         | 2 ++
 .../bind/func.bind/func.bind.bind/invoke_lvalue.pass.cpp        | 2 ++
 .../bind/func.bind/func.bind.bind/invoke_rvalue.pass.cpp        | 2 ++
 .../bind/func.bind/func.bind.bind/invoke_void_0.pass.cpp        | 2 ++
 .../bind/func.bind/func.bind.bind/nested.pass.cpp               | 2 ++
 .../func.bind/func.bind.isbind/is_bind_expression_03.pass.cpp   | 2 ++
 .../function.objects/bitwise.operations/bit_not.pass.cpp        | 2 ++
 .../function.objects/bitwise.operations/transparent.pass.cpp    | 2 ++
 .../utilities/function.objects/comparisons/transparent.pass.cpp | 2 ++
 .../std/utilities/function.objects/func.invoke/invoke.pass.cpp  | 2 ++
 .../utilities/function.objects/func.memfn/member_data.pass.cpp  | 2 ++
 .../function.objects/func.memfn/member_function_const.pass.cpp  | 2 ++
 .../func.memfn/member_function_const_volatile.pass.cpp          | 2 ++
 .../func.memfn/member_function_volatile.pass.cpp                | 2 ++
 .../function.objects/func.require/binary_function.pass.cpp      | 2 ++
 .../function.objects/func.require/unary_function.pass.cpp       | 2 ++
 .../func.search/func.search.bm/default.pass.cpp                 | 1 +
 .../function.objects/func.search/func.search.bm/hash.pass.cpp   | 1 +
 .../func.search/func.search.bm/hash.pred.pass.cpp               | 1 +
 .../function.objects/func.search/func.search.bm/pred.pass.cpp   | 1 +
 .../func.search/func.search.bmh/default.pass.cpp                | 1 +
 .../function.objects/func.search/func.search.bmh/hash.pass.cpp  | 1 +
 .../func.search/func.search.bmh/hash.pred.pass.cpp              | 1 +
 .../function.objects/func.search/func.search.bmh/pred.pass.cpp  | 1 +
 .../func.search/func.search.default/default.pass.cpp            | 1 +
 .../func.search/func.search.default/default.pred.pass.cpp       | 1 +
 .../func.wrap/func.wrap.badcall/bad_function_call.pass.cpp      | 2 ++
 .../func.wrap.badcall.const/bad_function_call_ctor.pass.cpp     | 2 ++
 .../func.wrap.func/func.wrap.func.cap/operator_bool.pass.cpp    | 2 ++
 .../func.wrap.func/func.wrap.func.con/F_incomplete.pass.cpp     | 2 ++
 .../func.wrap/func.wrap.func/func.wrap.func.con/alloc.pass.cpp  | 1 +
 .../func.wrap.func/func.wrap.func.con/alloc_function.pass.cpp   | 1 +
 .../func.wrap.func/func.wrap.func.con/alloc_nullptr.pass.cpp    | 1 +
 .../func.wrap.func/func.wrap.func.con/default.pass.cpp          | 2 ++
 .../func.wrap.func/func.wrap.func.con/nullptr_t.pass.cpp        | 2 ++
 .../func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp | 2 ++
 .../func.wrap/func.wrap.func/func.wrap.func.inv/invoke.pass.cpp | 2 ++
 .../func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp   | 2 ++
 .../func.wrap.func/func.wrap.func.nullptr/operator_==.pass.cpp  | 2 ++
 .../func.wrap.func/func.wrap.func.targ/target.pass.cpp          | 2 ++
 .../func.wrap.func/func.wrap.func.targ/target_type.pass.cpp     | 2 ++
 .../function.objects/func.wrap/func.wrap.func/types.pass.cpp    | 2 ++
 .../function.objects/logical.operations/transparent.pass.cpp    | 2 ++
 .../utilities/function.objects/negators/binary_negate.pass.cpp  | 2 ++
 .../test/std/utilities/function.objects/negators/not1.pass.cpp  | 2 ++
 .../test/std/utilities/function.objects/negators/not2.pass.cpp  | 2 ++
 .../utilities/function.objects/negators/unary_negate.pass.cpp   | 2 ++
 .../function.objects/refwrap/refwrap.access/conversion.pass.cpp | 2 ++
 .../refwrap/refwrap.assign/copy_assign.pass.cpp                 | 2 ++
 .../function.objects/refwrap/refwrap.const/copy_ctor.pass.cpp   | 2 ++
 .../refwrap/refwrap.const/ctor.incomplete.pass.cpp              | 2 ++
 .../function.objects/refwrap/refwrap.const/type_ctor.pass.cpp   | 2 ++
 .../refwrap/refwrap.helpers/cref.incomplete.pass.cpp            | 2 ++
 .../function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp    | 2 ++
 .../function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp    | 2 ++
 .../refwrap/refwrap.helpers/ref.incomplete.pass.cpp             | 2 ++
 .../function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp     | 2 ++
 .../function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp     | 2 ++
 .../function.objects/refwrap/refwrap.invoke/invoke.pass.cpp     | 2 ++
 .../refwrap/refwrap.invoke/invoke_int_0.pass.cpp                | 2 ++
 .../refwrap/refwrap.invoke/invoke_void_0.pass.cpp               | 2 ++
 .../test/std/utilities/function.objects/refwrap/type.pass.cpp   | 2 ++
 .../function.objects/refwrap/unwrap_ref_decay.pass.cpp          | 2 ++
 .../function.objects/refwrap/unwrap_reference.pass.cpp          | 2 ++
 .../std/utilities/function.objects/refwrap/weak_result.pass.cpp | 2 ++
 .../function.objects/unord.hash/enabled_hashes.pass.cpp         | 2 ++
 .../std/utilities/intseq/intseq.general/integer_seq.pass.cpp    | 2 ++
 .../std/utilities/intseq/intseq.intseq/integer_seq.pass.cpp     | 2 ++
 .../std/utilities/intseq/intseq.make/make_integer_seq.pass.cpp  | 2 ++
 .../intseq/intseq.make/make_integer_seq_fallback.pass.cpp       | 2 ++
 .../std/utilities/memory/allocator.tag/allocator_arg.pass.cpp   | 2 ++
 .../allocator.traits/allocator.traits.members/allocate.pass.cpp | 1 +
 .../allocator.traits.members/deallocate.pass.cpp                | 1 +
 .../allocator.traits.types/is_always_equal.pass.cpp             | 2 ++
 .../utilities/memory/allocator.traits/allocator_type.pass.cpp   | 2 ++
 .../std/utilities/memory/allocator.traits/value_type.pass.cpp   | 2 ++
 .../memory/default.allocator/allocator.globals/eq.pass.cpp      | 2 ++
 .../memory/default.allocator/allocator.members/address.pass.cpp | 2 ++
 .../default.allocator/allocator.members/max_size.pass.cpp       | 2 ++
 .../memory/default.allocator/allocator_pointers.pass.cpp        | 2 ++
 .../utilities/memory/default.allocator/allocator_void.pass.cpp  | 2 ++
 .../utilities/memory/pointer.traits/difference_type.pass.cpp    | 2 ++
 .../std/utilities/memory/pointer.traits/element_type.pass.cpp   | 2 ++
 .../test/std/utilities/memory/pointer.traits/pointer.pass.cpp   | 2 ++
 .../pointer.traits/pointer.traits.functions/pointer_to.pass.cpp | 2 ++
 libcxx/test/std/utilities/memory/ptr.align/align.pass.cpp       | 2 ++
 .../specialized.addressof/addressof.pass.cpp                    | 2 ++
 .../specialized.addressof/constexpr_addressof.pass.cpp          | 2 ++
 .../specialized.destroy/destroy_at.pass.cpp                     | 2 ++
 .../std/utilities/memory/temporary.buffer/overaligned.pass.cpp  | 2 ++
 .../utilities/memory/temporary.buffer/temporary_buffer.pass.cpp | 2 ++
 .../utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp  | 2 ++
 .../memory/util.dynamic.safety/declare_no_pointers.pass.cpp     | 2 ++
 .../memory/util.dynamic.safety/declare_reachable.pass.cpp       | 2 ++
 .../memory/util.dynamic.safety/get_pointer_safety.pass.cpp      | 2 ++
 .../util.smartptr/util.smartptr.hash/enabled_hash.pass.cpp      | 2 ++
 .../util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp   | 2 ++
 .../util.smartptr.getdeleter/get_deleter.pass.cpp               | 1 +
 .../util.smartptr.shared.assign/auto_ptr_Y.pass.cpp             | 2 ++
 .../util.smartptr.shared.assign/shared_ptr.pass.cpp             | 2 ++
 .../util.smartptr.shared.assign/shared_ptr_Y.pass.cpp           | 2 ++
 .../util.smartptr.shared.assign/shared_ptr_Y_rv.pass.cpp        | 2 ++
 .../util.smartptr.shared.assign/shared_ptr_rv.pass.cpp          | 2 ++
 .../util.smartptr.shared.assign/unique_ptr_Y.pass.cpp           | 2 ++
 .../util.smartptr.shared.cast/const_pointer_cast.pass.cpp       | 2 ++
 .../util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp     | 2 ++
 .../util.smartptr.shared.cast/static_pointer_cast.pass.cpp      | 2 ++
 .../util.smartptr.shared.cmp/cmp_nullptr.pass.cpp               | 2 ++
 .../util.smartptr.shared/util.smartptr.shared.cmp/eq.pass.cpp   | 2 ++
 .../util.smartptr.shared/util.smartptr.shared.cmp/lt.pass.cpp   | 2 ++
 .../util.smartptr.shared.const/default.pass.cpp                 | 2 ++
 .../util.smartptr.shared.const/nullptr_t.pass.cpp               | 2 ++
 .../util.smartptr.shared.const/nullptr_t_deleter.pass.cpp       | 1 +
 .../nullptr_t_deleter_allocator.pass.cpp                        | 1 +
 .../nullptr_t_deleter_allocator_throw.pass.cpp                  | 1 +
 .../util.smartptr.shared.const/pointer.pass.cpp                 | 2 ++
 .../util.smartptr.shared.const/pointer_deleter.pass.cpp         | 1 +
 .../pointer_deleter_allocator.pass.cpp                          | 1 +
 .../pointer_deleter_allocator_throw.pass.cpp                    | 1 +
 .../util.smartptr.shared.const/pointer_deleter_throw.pass.cpp   | 1 +
 .../util.smartptr.shared.const/pointer_throw.pass.cpp           | 2 ++
 .../util.smartptr.shared.const/shared_ptr.pass.cpp              | 2 ++
 .../util.smartptr.shared.const/shared_ptr_Y.pass.cpp            | 2 ++
 .../util.smartptr.shared.const/shared_ptr_pointer.pass.cpp      | 2 ++
 .../util.smartptr.shared.create/allocate_shared.pass.cpp        | 1 +
 .../util.smartptr.shared.create/allocate_shared_cxx03.pass.cpp  | 1 +
 .../util.smartptr.shared.create/make_shared.volatile.pass.cpp   | 2 ++
 .../util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp    | 2 ++
 .../util.smartptr.shared.mod/reset.pass.cpp                     | 2 ++
 .../util.smartptr.shared.mod/reset_pointer.pass.cpp             | 2 ++
 .../util.smartptr.shared.mod/reset_pointer_deleter.pass.cpp     | 1 +
 .../reset_pointer_deleter_allocator.pass.cpp                    | 1 +
 .../util.smartptr.shared/util.smartptr.shared.mod/swap.pass.cpp | 2 ++
 .../util.smartptr.shared.obs/arrow.pass.cpp                     | 2 ++
 .../util.smartptr.shared.obs/dereference.pass.cpp               | 2 ++
 .../util.smartptr.shared.obs/op_bool.pass.cpp                   | 2 ++
 .../util.smartptr.shared.obs/unique.pass.cpp                    | 2 ++
 .../util.smartptr.shared.spec/swap.pass.cpp                     | 2 ++
 .../memory/util.smartptr/util.smartptr.weak/types.pass.cpp      | 2 ++
 .../util.smartptr.weak.assign/shared_ptr_Y.pass.cpp             | 2 ++
 .../util.smartptr.weak.assign/weak_ptr.pass.cpp                 | 2 ++
 .../util.smartptr.weak.assign/weak_ptr_Y.pass.cpp               | 2 ++
 .../util.smartptr.weak.const/default.pass.cpp                   | 2 ++
 .../util.smartptr.weak.const/shared_ptr_Y.pass.cpp              | 2 ++
 .../util.smartptr.weak.const/weak_ptr_Y.pass.cpp                | 2 ++
 .../util.smartptr.weak/util.smartptr.weak.mod/reset.pass.cpp    | 2 ++
 .../util.smartptr.weak/util.smartptr.weak.mod/swap.pass.cpp     | 2 ++
 .../util.smartptr.weak/util.smartptr.weak.obs/expired.pass.cpp  | 2 ++
 .../util.smartptr.weak/util.smartptr.weak.obs/lock.pass.cpp     | 2 ++
 .../util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp    | 2 ++
 .../util.smartptr/util.smartptr.weakptr/bad_weak_ptr.pass.cpp   | 2 ++
 .../test/std/utilities/meta/meta.logical/conjunction.pass.cpp   | 2 ++
 .../test/std/utilities/meta/meta.logical/disjunction.pass.cpp   | 2 ++
 libcxx/test/std/utilities/meta/meta.logical/negation.pass.cpp   | 2 ++
 .../utilities/meta/meta.rel/is_convertible_fallback.pass.cpp    | 2 ++
 .../std/utilities/meta/meta.rel/is_nothrow_convertible.pass.cpp | 2 ++
 .../utilities/meta/meta.unary/meta.unary.cat/nullptr.pass.cpp   | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/array.pass.cpp    | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/class.pass.cpp    | 2 ++
 .../std/utilities/meta/meta.unary/meta.unary.comp/enum.pass.cpp | 2 ++
 .../meta/meta.unary/meta.unary.comp/floating_point.pass.cpp     | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/function.pass.cpp | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp | 2 ++
 .../meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp   | 2 ++
 .../meta/meta.unary/meta.unary.comp/is_unbounded_array.pass.cpp | 2 ++
 .../meta/meta.unary/meta.unary.comp/lvalue_ref.pass.cpp         | 2 ++
 .../meta.unary/meta.unary.comp/member_function_pointer.pass.cpp | 2 ++
 .../meta.unary/meta.unary.comp/member_object_pointer.pass.cpp   | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/pointer.pass.cpp  | 2 ++
 .../meta/meta.unary/meta.unary.comp/rvalue_ref.pass.cpp         | 2 ++
 .../utilities/meta/meta.unary/meta.unary.comp/union.pass.cpp    | 2 ++
 .../std/utilities/meta/meta.unary/meta.unary.comp/void.pass.cpp | 2 ++
 .../optional/optional.bad_optional_access/default.pass.cpp      | 2 ++
 .../optional/optional.bad_optional_access/derive.pass.cpp       | 2 ++
 .../std/utilities/optional/optional.comp_with_t/equal.pass.cpp  | 2 ++
 .../utilities/optional/optional.comp_with_t/greater.pass.cpp    | 2 ++
 .../optional/optional.comp_with_t/greater_equal.pass.cpp        | 2 ++
 .../utilities/optional/optional.comp_with_t/less_equal.pass.cpp | 2 ++
 .../utilities/optional/optional.comp_with_t/less_than.pass.cpp  | 2 ++
 .../utilities/optional/optional.comp_with_t/not_equal.pass.cpp  | 2 ++
 .../std/utilities/optional/optional.hash/enabled_hash.pass.cpp  | 2 ++
 libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp  | 2 ++
 .../test/std/utilities/optional/optional.nullops/equal.pass.cpp | 2 ++
 .../std/utilities/optional/optional.nullops/greater.pass.cpp    | 2 ++
 .../utilities/optional/optional.nullops/greater_equal.pass.cpp  | 2 ++
 .../std/utilities/optional/optional.nullops/less_equal.pass.cpp | 2 ++
 .../std/utilities/optional/optional.nullops/less_than.pass.cpp  | 2 ++
 .../std/utilities/optional/optional.nullops/not_equal.pass.cpp  | 2 ++
 .../std/utilities/optional/optional.nullopt/nullopt_t.pass.cpp  | 2 ++
 .../optional.object/optional.object.ctor/deduct.pass.cpp        | 2 ++
 .../optional.object/optional.object.ctor/nullopt_t.pass.cpp     | 2 ++
 .../optional/optional.object/optional.object.dtor/dtor.pass.cpp | 2 ++
 .../optional/optional.object/optional.object.mod/reset.pass.cpp | 2 ++
 .../optional.object.observe/value_or_const.pass.cpp             | 2 ++
 .../utilities/optional/optional.object/special_members.pass.cpp | 2 ++
 .../std/utilities/optional/optional.object/triviality.pass.cpp  | 2 ++
 .../test/std/utilities/optional/optional.object/types.pass.cpp  | 2 ++
 .../test/std/utilities/optional/optional.relops/equal.pass.cpp  | 2 ++
 .../utilities/optional/optional.relops/greater_equal.pass.cpp   | 2 ++
 .../utilities/optional/optional.relops/greater_than.pass.cpp    | 2 ++
 .../std/utilities/optional/optional.relops/less_equal.pass.cpp  | 2 ++
 .../std/utilities/optional/optional.relops/less_than.pass.cpp   | 2 ++
 .../std/utilities/optional/optional.relops/not_equal.pass.cpp   | 2 ++
 .../optional/optional.specalg/make_optional_explicit.pass.cpp   | 2 ++
 .../optional.syn/optional_includes_initializer_list.pass.cpp    | 2 ++
 .../std/utilities/ratio/ratio.arithmetic/ratio_add.pass.cpp     | 2 ++
 .../std/utilities/ratio/ratio.arithmetic/ratio_divide.pass.cpp  | 2 ++
 .../utilities/ratio/ratio.arithmetic/ratio_multiply.pass.cpp    | 2 ++
 .../utilities/ratio/ratio.arithmetic/ratio_subtract.pass.cpp    | 2 ++
 libcxx/test/std/utilities/ratio/ratio.ratio/ratio.pass.cpp      | 2 ++
 libcxx/test/std/utilities/ratio/typedefs.pass.cpp               | 2 ++
 .../unique.ptr/unique.ptr.class/unique.ptr.asgn/move.pass.cpp   | 1 +
 .../unique.ptr.asgn/move_convert.runtime.pass.cpp               | 1 +
 .../unique.ptr.asgn/move_convert.single.pass.cpp                | 1 +
 .../unique.ptr/unique.ptr.class/unique.ptr.asgn/null.pass.cpp   | 1 +
 .../unique.ptr.class/unique.ptr.asgn/nullptr.pass.cpp           | 1 +
 .../unique.ptr.ctor/move_convert.runtime.pass.cpp               | 1 +
 .../unique.ptr/unique.ptr.class/unique.ptr.dtor/null.pass.cpp   | 2 ++
 .../unique.ptr.class/unique.ptr.modifiers/reset.single.pass.cpp | 1 +
 .../unique.ptr.class/unique.ptr.modifiers/reset_self.pass.cpp   | 2 ++
 .../unique.ptr.observers/dereference.single.pass.cpp            | 2 ++
 .../unique.ptr.observers/op_arrow.single.pass.cpp               | 2 ++
 .../unique.ptr.observers/op_subscript.runtime.pass.cpp          | 2 ++
 .../unique.ptr/unique.ptr.create/make_unique.array.pass.cpp     | 2 ++
 .../unique.ptr/unique.ptr.create/make_unique.single.pass.cpp    | 2 ++
 .../unique.ptr.dltr/unique.ptr.dltr.dflt/convert_ctor.pass.cpp  | 2 ++
 .../unique.ptr.dltr/unique.ptr.dltr.dflt/default.pass.cpp       | 2 ++
 .../unique.ptr.dltr/unique.ptr.dltr.dflt1/convert_ctor.pass.cpp | 2 ++
 .../unique.ptr.dltr/unique.ptr.dltr.dflt1/default.pass.cpp      | 2 ++
 .../smartptr/unique.ptr/unique.ptr.special/cmp_nullptr.pass.cpp | 2 ++
 .../smartptr/unique.ptr/unique.ptr.special/eq.pass.cpp          | 1 +
 .../smartptr/unique.ptr/unique.ptr.special/rel.pass.cpp         | 1 +
 .../utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp | 2 ++
 .../std/utilities/template.bitset/bitset.members/all.pass.cpp   | 2 ++
 .../std/utilities/template.bitset/bitset.members/any.pass.cpp   | 2 ++
 .../std/utilities/template.bitset/bitset.members/none.pass.cpp  | 2 ++
 .../std/utilities/template.bitset/bitset.members/size.pass.cpp  | 2 ++
 .../utilities/template.bitset/bitset.members/to_ullong.pass.cpp | 2 ++
 .../utilities/template.bitset/bitset.members/to_ulong.pass.cpp  | 2 ++
 .../template.bitset/bitset.operators/stream_out.pass.cpp        | 2 ++
 libcxx/test/std/utilities/template.bitset/includes.pass.cpp     | 2 ++
 libcxx/test/std/utilities/time/days.pass.cpp                    | 2 ++
 libcxx/test/std/utilities/time/hours.pass.cpp                   | 2 ++
 libcxx/test/std/utilities/time/microseconds.pass.cpp            | 2 ++
 libcxx/test/std/utilities/time/milliseconds.pass.cpp            | 2 ++
 libcxx/test/std/utilities/time/minutes.pass.cpp                 | 2 ++
 libcxx/test/std/utilities/time/months.pass.cpp                  | 2 ++
 libcxx/test/std/utilities/time/nanoseconds.pass.cpp             | 2 ++
 libcxx/test/std/utilities/time/seconds.pass.cpp                 | 2 ++
 .../time/time.clock/time.clock.file/consistency.pass.cpp        | 2 ++
 .../time/time.clock/time.clock.file/rep_signed.pass.cpp         | 2 ++
 .../time/time.clock/time.clock.hires/consistency.pass.cpp       | 2 ++
 .../std/utilities/time/time.clock/time.clock.hires/now.pass.cpp | 2 ++
 .../time/time.clock/time.clock.steady/consistency.pass.cpp      | 2 ++
 .../utilities/time/time.clock/time.clock.steady/now.pass.cpp    | 2 ++
 .../time/time.clock/time.clock.system/consistency.pass.cpp      | 2 ++
 .../time/time.clock/time.clock.system/from_time_t.pass.cpp      | 2 ++
 .../utilities/time/time.clock/time.clock.system/now.pass.cpp    | 2 ++
 .../time/time.clock/time.clock.system/rep_signed.pass.cpp       | 2 ++
 .../time/time.clock/time.clock.system/to_time_t.pass.cpp        | 2 ++
 .../std/utilities/time/time.duration/default_ratio.pass.cpp     | 2 ++
 .../utilities/time/time.duration/time.duration.alg/abs.pass.cpp | 2 ++
 .../time/time.duration/time.duration.cast/ceil.pass.cpp         | 2 ++
 .../time/time.duration/time.duration.cast/floor.pass.cpp        | 2 ++
 .../time/time.duration/time.duration.cast/round.pass.cpp        | 2 ++
 .../time.duration/time.duration.cons/convert_overflow.pass.cpp  | 2 ++
 .../time.duration/time.duration.literals/literals1.pass.cpp     | 2 ++
 .../time.duration/time.duration.literals/literals2.pass.cpp     | 2 ++
 libcxx/test/std/utilities/time/time.duration/types.pass.cpp     | 2 ++
 .../std/utilities/time/time.point/default_duration.pass.cpp     | 2 ++
 .../std/utilities/time/time.point/time.point.cast/ceil.pass.cpp | 2 ++
 .../utilities/time/time.point/time.point.cast/floor.pass.cpp    | 2 ++
 .../utilities/time/time.point/time.point.cast/round.pass.cpp    | 2 ++
 .../time.traits/time.traits.specializations/duration.pass.cpp   | 2 ++
 .../time.traits/time.traits.specializations/time_point.pass.cpp | 2 ++
 libcxx/test/std/utilities/time/weeks.pass.cpp                   | 2 ++
 libcxx/test/std/utilities/time/years.pass.cpp                   | 2 ++
 .../std/utilities/tuple/tuple.general/tuple.smartptr.pass.cpp   | 2 ++
 .../test/std/utilities/tuple/tuple.tuple/TupleFunction.pass.cpp | 2 ++
 .../tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp | 1 +
 .../tuple/tuple.tuple/tuple.apply/apply_large_arity.pass.cpp    | 2 ++
 .../tuple/tuple.tuple/tuple.assign/const_pair.pass.cpp          | 2 ++
 .../tuple/tuple.tuple/tuple.assign/convert_copy.pass.cpp        | 2 ++
 .../tuple/tuple.tuple/tuple.assign/convert_move.pass.cpp        | 2 ++
 .../std/utilities/tuple/tuple.tuple/tuple.assign/move.pass.cpp  | 1 +
 .../utilities/tuple/tuple.tuple/tuple.assign/move_pair.pass.cpp | 2 ++
 .../tuple.assign/tuple_array_template_depth.pass.cpp            | 2 ++
 .../tuple.cnstr/PR22806_constrain_tuple_like_ctor.pass.cpp      | 2 ++
 .../tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp          | 2 ++
 .../PR27684_contains_ref_to_incomplete_type.pass.cpp            | 2 ++
 .../utilities/tuple/tuple.tuple/tuple.cnstr/PR31384.pass.cpp    | 2 ++
 .../std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc.pass.cpp  | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_UTypes.pass.cpp         | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_const_Types.pass.cpp    | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_const_pair.pass.cpp     | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_convert_copy.pass.cpp   | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_convert_move.pass.cpp   | 1 +
 .../utilities/tuple/tuple.tuple/tuple.cnstr/alloc_copy.pass.cpp | 1 +
 .../utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move.pass.cpp | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/alloc_move_pair.pass.cpp      | 1 +
 .../tuple/tuple.tuple/tuple.cnstr/convert_move.pass.cpp         | 2 ++
 .../utilities/tuple/tuple.tuple/tuple.cnstr/default.pass.cpp    | 1 +
 .../std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp   | 2 ++
 .../std/utilities/tuple/tuple.tuple/tuple.cnstr/move.pass.cpp   | 1 +
 .../utilities/tuple/tuple.tuple/tuple.cnstr/move_pair.pass.cpp  | 2 ++
 .../tuple/tuple.tuple/tuple.cnstr/test_lazy_sfinae.pass.cpp     | 2 ++
 .../tuple.tuple/tuple.cnstr/tuple_array_template_depth.pass.cpp | 2 ++
 .../std/utilities/tuple/tuple.tuple/tuple.elem/get_rv.pass.cpp  | 2 ++
 .../tuple/tuple.tuple/tuple.elem/tuple.by.type.pass.cpp         | 2 ++
 .../tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp | 2 ++
 .../tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp     | 2 ++
 .../tuple/tuple.tuple/tuple.helper/tuple_size.pass.cpp          | 2 ++
 .../tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp     | 2 ++
 .../tuple.helper/tuple_size_structured_bindings.pass.cpp        | 2 ++
 .../tuple/tuple.tuple/tuple.helper/tuple_size_v.pass.cpp        | 2 ++
 .../tuple.tuple/tuple.helper/tuple_size_value_sfinae.pass.cpp   | 2 ++
 .../tuple/tuple.tuple/tuple.special/non_member_swap.pass.cpp    | 1 +
 .../utilities/tuple/tuple.tuple/tuple.swap/member_swap.pass.cpp | 1 +
 .../tuple/tuple.tuple/tuple.traits/uses_allocator.pass.cpp      | 2 ++
 .../utilities/type.index/type.index.hash/enabled_hash.pass.cpp  | 2 ++
 .../test/std/utilities/type.index/type.index.hash/hash.pass.cpp | 2 ++
 .../std/utilities/type.index/type.index.members/ctor.pass.cpp   | 2 ++
 .../std/utilities/type.index/type.index.members/eq.pass.cpp     | 2 ++
 .../utilities/type.index/type.index.members/hash_code.pass.cpp  | 2 ++
 .../std/utilities/type.index/type.index.members/lt.pass.cpp     | 2 ++
 .../std/utilities/type.index/type.index.members/name.pass.cpp   | 2 ++
 .../type.index/type.index.overview/copy_assign.pass.cpp         | 2 ++
 .../utilities/type.index/type.index.overview/copy_ctor.pass.cpp | 2 ++
 libcxx/test/std/utilities/utility/as_const/as_const.pass.cpp    | 2 ++
 libcxx/test/std/utilities/utility/operators/rel_ops.pass.cpp    | 2 ++
 .../std/utilities/utility/pairs/pair.astuple/get_rv.pass.cpp    | 2 ++
 .../utilities/utility/pairs/pair.astuple/pairs.by.type.pass.cpp | 2 ++
 .../utilities/utility/pairs/pair.astuple/tuple_element.pass.cpp | 2 ++
 .../utilities/utility/pairs/pair.astuple/tuple_size.pass.cpp    | 2 ++
 .../utility/pairs/pair.piecewise/piecewise_construct.pass.cpp   | 2 ++
 libcxx/test/std/utilities/utility/pairs/pairs.pair/U_V.pass.cpp | 2 ++
 .../std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp | 2 ++
 .../utility/pairs/pairs.pair/assign_pair_cxx03.pass.cpp         | 2 ++
 .../utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp  | 2 ++
 .../utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp        | 2 ++
 .../utility/pairs/pairs.pair/const_first_const_second.pass.cpp  | 2 ++
 .../pairs/pairs.pair/const_first_const_second_cxx03.pass.cpp    | 2 ++
 .../utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp  | 2 ++
 .../utility/pairs/pairs.pair/const_pair_U_V_cxx03.pass.cpp      | 2 ++
 .../std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp   | 2 ++
 .../std/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp | 2 ++
 .../pairs/pairs.pair/special_member_generation_test.pass.cpp    | 2 ++
 .../test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp   | 2 ++
 .../test/std/utilities/utility/pairs/pairs.pair/types.pass.cpp  | 2 ++
 .../utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp | 2 ++
 libcxx/test/std/utilities/utility/synopsis.pass.cpp             | 2 ++
 .../variant.bad_variant_access/bad_variant_access.pass.cpp      | 2 ++
 .../std/utilities/variant/variant.hash/enabled_hash.pass.cpp    | 2 ++
 .../std/utilities/variant/variant.helpers/variant_size.pass.cpp | 2 ++
 .../std/utilities/variant/variant.monostate/monostate.pass.cpp  | 2 ++
 .../utilities/variant/variant.synopsis/variant_npos.pass.cpp    | 2 ++
 .../test/support/test.support/test_convertible_header.pass.cpp  | 2 ++
 libcxx/test/support/test.support/test_demangle.pass.cpp         | 1 +
 .../support/test.support/test_poisoned_hash_helper.pass.cpp     | 2 ++
 .../test.workarounds/c1xx_broken_is_trivially_copyable.pass.cpp | 1 +
 .../support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp | 1 +
 3295 files changed, 5607 insertions(+)

diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
index ec8a265b4574d..2c34f7748cdd2 100644
--- a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
@@ -31,6 +31,8 @@
 #include <algorithm>
 #include <vector>
 
+#include "test_macros.h"
+
 struct gen
 {
     std::ptrdiff_t operator()(std::ptrdiff_t n)
diff --git a/libcxx/test/libcxx/algorithms/version.pass.cpp b/libcxx/test/libcxx/algorithms/version.pass.cpp
index e7d368789421a..57d84b3ffce83 100644
--- a/libcxx/test/libcxx/algorithms/version.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <algorithm>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/atomics/atomics.order/memory_order.underlying_type.pass.cpp b/libcxx/test/libcxx/atomics/atomics.order/memory_order.underlying_type.pass.cpp
index feae9bbcd06d4..0f31f43e2b7cb 100644
--- a/libcxx/test/libcxx/atomics/atomics.order/memory_order.underlying_type.pass.cpp
+++ b/libcxx/test/libcxx/atomics/atomics.order/memory_order.underlying_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 enum cpp17_memory_order {
   cpp17_memory_order_relaxed, cpp17_memory_order_consume, cpp17_memory_order_acquire,
diff --git a/libcxx/test/libcxx/atomics/version.pass.cpp b/libcxx/test/libcxx/atomics/version.pass.cpp
index 48114a3e5db6d..71617252d6006 100644
--- a/libcxx/test/libcxx/atomics/version.pass.cpp
+++ b/libcxx/test/libcxx/atomics/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <atomic>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/associative/map/at.abort.pass.cpp b/libcxx/test/libcxx/containers/associative/map/at.abort.pass.cpp
index d34f48f4dae25..d4edbd752d200 100644
--- a/libcxx/test/libcxx/containers/associative/map/at.abort.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/map/at.abort.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <map>
 
+#include "test_macros.h"
+
 
 void exit_success(int) {
     std::_Exit(EXIT_SUCCESS);
diff --git a/libcxx/test/libcxx/containers/associative/map/at.const.abort.pass.cpp b/libcxx/test/libcxx/containers/associative/map/at.const.abort.pass.cpp
index 705ada8693633..6860e34314980 100644
--- a/libcxx/test/libcxx/containers/associative/map/at.const.abort.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/map/at.const.abort.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <map>
 
+#include "test_macros.h"
+
 
 void exit_success(int) {
     std::_Exit(EXIT_SUCCESS);
diff --git a/libcxx/test/libcxx/containers/associative/map/version.pass.cpp b/libcxx/test/libcxx/containers/associative/map/version.pass.cpp
index 8a498c60dbf6c..ebc45e4d4867f 100644
--- a/libcxx/test/libcxx/containers/associative/map/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/map/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp b/libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp
index 83a1cee11275f..8e3ec7c635898 100644
--- a/libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp
@@ -18,6 +18,8 @@
 #include <map>
 #include <functional>
 
+#include "test_macros.h"
+
 
 template <template <typename ...> class Container>
 void test_set() {
diff --git a/libcxx/test/libcxx/containers/associative/set/version.pass.cpp b/libcxx/test/libcxx/containers/associative/set/version.pass.cpp
index b0d9abd6ac789..f5a76c7743314 100644
--- a/libcxx/test/libcxx/containers/associative/set/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/set/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <set>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/associative/tree_balance_after_insert.pass.cpp b/libcxx/test/libcxx/containers/associative/tree_balance_after_insert.pass.cpp
index e178a408fb723..c3d100e53711b 100644
--- a/libcxx/test/libcxx/containers/associative/tree_balance_after_insert.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/tree_balance_after_insert.pass.cpp
@@ -16,6 +16,8 @@
 #include <__tree>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Node
 {
     Node* __left_;
diff --git a/libcxx/test/libcxx/containers/associative/tree_left_rotate.pass.cpp b/libcxx/test/libcxx/containers/associative/tree_left_rotate.pass.cpp
index 5f775c3f6afed..b766489adc692 100644
--- a/libcxx/test/libcxx/containers/associative/tree_left_rotate.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/tree_left_rotate.pass.cpp
@@ -16,6 +16,8 @@
 #include <__tree>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Node
 {
     Node* __left_;
diff --git a/libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp b/libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp
index c3ec20c55c15a..e9afd845a7e31 100644
--- a/libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/tree_remove.pass.cpp
@@ -16,6 +16,8 @@
 #include <__tree>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Node
 {
     Node* __left_;
diff --git a/libcxx/test/libcxx/containers/associative/tree_right_rotate.pass.cpp b/libcxx/test/libcxx/containers/associative/tree_right_rotate.pass.cpp
index 5332d7b16b6d6..04d2b094aad69 100644
--- a/libcxx/test/libcxx/containers/associative/tree_right_rotate.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/tree_right_rotate.pass.cpp
@@ -16,6 +16,8 @@
 #include <__tree>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Node
 {
     Node* __left_;
diff --git a/libcxx/test/libcxx/containers/associative/undef_min_max.pass.cpp b/libcxx/test/libcxx/containers/associative/undef_min_max.pass.cpp
index 53dd87871d309..1408da9740295 100644
--- a/libcxx/test/libcxx/containers/associative/undef_min_max.pass.cpp
+++ b/libcxx/test/libcxx/containers/associative/undef_min_max.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   std::map<int, int> m;
   ((void)m);
diff --git a/libcxx/test/libcxx/containers/container.adaptors/queue/version.pass.cpp b/libcxx/test/libcxx/containers/container.adaptors/queue/version.pass.cpp
index 353c091795cc1..685626291fb71 100644
--- a/libcxx/test/libcxx/containers/container.adaptors/queue/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/container.adaptors/queue/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <queue>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/container.adaptors/stack/version.pass.cpp b/libcxx/test/libcxx/containers/container.adaptors/stack/version.pass.cpp
index e8da8c52b7359..9ee45bcc9cc36 100644
--- a/libcxx/test/libcxx/containers/container.adaptors/stack/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/container.adaptors/stack/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <stack>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp b/libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp
index e27124120684c..bfcb8aeda3f93 100644
--- a/libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp
+++ b/libcxx/test/libcxx/containers/gnu_cxx/hash_map.pass.cpp
@@ -19,6 +19,8 @@ struct equal_to;
 struct unique_ptr;
 #include <ext/hash_map>
 
+#include "test_macros.h"
+
 
 namespace __gnu_cxx {
 template class hash_map<int, int>;
diff --git a/libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp b/libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp
index 65c0e9791bf80..b3e1e19566329 100644
--- a/libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp
+++ b/libcxx/test/libcxx/containers/gnu_cxx/hash_set.pass.cpp
@@ -18,6 +18,8 @@ struct equal_to;
 struct unique_ptr;
 #include <ext/hash_set>
 
+#include "test_macros.h"
+
 namespace __gnu_cxx {
 template class hash_set<int>;
 }
diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp
index 42c6322b4dd59..1cd95f36149a4 100644
--- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_back.pass.cpp
@@ -17,6 +17,7 @@
 
 #define _LIBCPP_DEBUG 1
 #include <array>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp
index 83721acd97d43..98149fa20ad15 100644
--- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_front.pass.cpp
@@ -17,6 +17,7 @@
 
 #define _LIBCPP_DEBUG 1
 #include <array>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp
index 0681067a21e1e..8cbf0d753cd80 100644
--- a/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/array/array.zero/db_indexing.pass.cpp
@@ -17,6 +17,7 @@
 
 #define _LIBCPP_DEBUG 1
 #include <array>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/containers/sequences/array/version.pass.cpp b/libcxx/test/libcxx/containers/sequences/array/version.pass.cpp
index 29b15ad3e374f..8cd0a6eaacea5 100644
--- a/libcxx/test/libcxx/containers/sequences/array/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/array/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <array>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/sequences/deque/incomplete.pass.cpp b/libcxx/test/libcxx/containers/sequences/deque/incomplete.pass.cpp
index 8179768d586f6..5576f2b8f9046 100644
--- a/libcxx/test/libcxx/containers/sequences/deque/incomplete.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/deque/incomplete.pass.cpp
@@ -16,6 +16,8 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A {
   std::deque<A> d;
   std::deque<A>::iterator it;
diff --git a/libcxx/test/libcxx/containers/sequences/deque/pop_back_empty.pass.cpp b/libcxx/test/libcxx/containers/sequences/deque/pop_back_empty.pass.cpp
index 169c0f72d6de6..c9e8167203f4e 100644
--- a/libcxx/test/libcxx/containers/sequences/deque/pop_back_empty.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/deque/pop_back_empty.pass.cpp
@@ -15,6 +15,8 @@
 #include <cstdlib>
 #include <deque>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     std::deque<int> q;
diff --git a/libcxx/test/libcxx/containers/sequences/deque/version.pass.cpp b/libcxx/test/libcxx/containers/sequences/deque/version.pass.cpp
index 8f05025f1aaba..c55d5b4eb6b51 100644
--- a/libcxx/test/libcxx/containers/sequences/deque/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/deque/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <deque>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/sequences/forwardlist/version.pass.cpp b/libcxx/test/libcxx/containers/sequences/forwardlist/version.pass.cpp
index cbe6d5821c152..94023ecff4e58 100644
--- a/libcxx/test/libcxx/containers/sequences/forwardlist/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/forwardlist/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <forward_list>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp
index da0eb5ce0c303..13555410a4a45 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_copy.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::list<int> l1;
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp
index dcd05ec419bc7..5e7f3c419875b 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.cons/db_move.pass.cpp
@@ -21,6 +21,7 @@
 #include <list>
 #include <cstdlib>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp
index b570fef66a08f..41a7795fcf448 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/emplace_db1.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int i_;
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp
index c573bf7bed665..fe4452713c404 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp
index 65cc4b8f8d646..c592b262a9a55 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_db2.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp
index 971f2bd3f5951..cafbd9242029d 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp
index 131529e5c7d71..2384d109a19ad 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db2.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp
index a9a35056ae7c7..086b4282763a9 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db3.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp
index 642ee4498f5ce..e579600a28474 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/erase_iter_iter_db4.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp
index c7c7f76b9e2f7..27ccaface876f 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_iter_iter_db1.pass.cpp
@@ -21,6 +21,7 @@
 #include <list>
 #include <cstdlib>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp
index 10503bd9cd719..a9b63ba3b445e 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_rvalue_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::list<int> v1(3);
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp
index 7a658e3942de8..67bdfd24ea00f 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_size_value_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::list<int> c1(100);
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp
index cdf01fe9079f2..9f1153a0495b5 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/insert_iter_value_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp
index 8649f12a88790..fe607f03419b1 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.modifiers/pop_back_db1.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp
index 23323d8c6fb75..9a65d02b20225 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp
index 37a206d2c97ca..71aad132829c9 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp
index 768c3d6f12200..af85e92fe99fc 100644
--- a/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/list.ops/db_splice_pos_list_iter_iter.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/libcxx/containers/sequences/list/version.pass.cpp b/libcxx/test/libcxx/containers/sequences/list/version.pass.cpp
index 677c085b41f65..8137ca740013b 100644
--- a/libcxx/test/libcxx/containers/sequences/list/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/list/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <list>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/sequences/vector/const_value_type.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/const_value_type.pass.cpp
index d3407e3aa8b16..893cfa0d86e1a 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/const_value_type.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/const_value_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <vector>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::vector<const int> v = {1, 2, 3};
diff --git a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp
index 1d1e3a1ba3d9a..b35c6dbff6a8d 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/pop_back_empty.pass.cpp
@@ -15,6 +15,8 @@
 #include <cstdlib>
 #include <vector>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     std::vector<int> v;
diff --git a/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp
index 81263dec0bda4..9e3fb886e6075 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 void test_ctor_under_alloc() {
diff --git a/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp
index 0100507eab041..fa1bd2d4fda32 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/vector.cons/construct_iter_iter_alloc.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 void test_ctor_under_alloc() {
diff --git a/libcxx/test/libcxx/containers/sequences/vector/version.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/version.pass.cpp
index 93fd2e679b544..ef20070cdb76e 100644
--- a/libcxx/test/libcxx/containers/sequences/vector/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/sequences/vector/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <vector>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp b/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp
index d38e0ad8f13b0..82f7f0b908041 100644
--- a/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp
@@ -22,6 +22,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 bool
 is_power_of_two(unsigned long n)
diff --git a/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp b/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp
index 6a82ea1d6b222..642e5fa1d63f5 100644
--- a/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp
@@ -19,6 +19,8 @@
 #include <__hash_table>
 #include <cassert>
 
+#include "test_macros.h"
+
 bool
 is_prime(size_t n)
 {
diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp
index e2eeda53964e9..e03b47e56014c 100644
--- a/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp
@@ -18,6 +18,8 @@
 #include <unordered_map>
 #include <functional>
 
+#include "test_macros.h"
+
 
 template <template <typename ...> class Container>
 void test_set() {
diff --git a/libcxx/test/libcxx/containers/unord/unord.map/at.abort.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/at.abort.pass.cpp
index b65af169b9b58..364d24e17295b 100644
--- a/libcxx/test/libcxx/containers/unord/unord.map/at.abort.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/unord.map/at.abort.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <unordered_map>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     std::signal(SIGABRT, [](int) { std::_Exit(EXIT_SUCCESS); });
diff --git a/libcxx/test/libcxx/containers/unord/unord.map/at.const.abort.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/at.const.abort.pass.cpp
index af2a2cd7691a9..cc4cf2b37dc9b 100644
--- a/libcxx/test/libcxx/containers/unord/unord.map/at.const.abort.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/unord.map/at.const.abort.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <unordered_map>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     std::signal(SIGABRT, [](int) { std::_Exit(EXIT_SUCCESS); });
diff --git a/libcxx/test/libcxx/containers/unord/unord.map/version.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.map/version.pass.cpp
index 983acde548b8f..09196a89f9843 100644
--- a/libcxx/test/libcxx/containers/unord/unord.map/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/unord.map/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/containers/unord/unord.set/version.pass.cpp b/libcxx/test/libcxx/containers/unord/unord.set/version.pass.cpp
index 63144528cf930..7463275f2c277 100644
--- a/libcxx/test/libcxx/containers/unord/unord.set/version.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/unord.set/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <unordered_set>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp b/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp
index af31c7474a46d..e837da778ab80 100644
--- a/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp
+++ b/libcxx/test/libcxx/debug/containers/db_associative_container_tests.pass.cpp
@@ -23,6 +23,7 @@
 #include <utility>
 #include <cassert>
 #include "container_debug_tests.hpp"
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 using namespace IteratorDebugChecks;
diff --git a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp
index 3cd0ce0315830..75b9c0da5270c 100644
--- a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp
+++ b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.multithread.pass.cpp
@@ -26,6 +26,8 @@
 #include <vector>
 #include "container_debug_tests.hpp"
 
+#include "test_macros.h"
+
 
 template <typename Container>
 Container makeContainer(int size) {
diff --git a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp
index e3932bc0981a5..43f1dd9ce72dc 100644
--- a/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp
+++ b/libcxx/test/libcxx/debug/containers/db_sequence_container_iterators.pass.cpp
@@ -23,6 +23,7 @@
 #include <vector>
 #include <deque>
 #include "container_debug_tests.hpp"
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 using namespace IteratorDebugChecks;
diff --git a/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp b/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp
index 99b4006857157..5ddbfa9b6c297 100644
--- a/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp
+++ b/libcxx/test/libcxx/debug/containers/db_unord_container_tests.pass.cpp
@@ -22,6 +22,7 @@
 #include <utility>
 #include <cassert>
 #include "container_debug_tests.hpp"
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 using namespace IteratorDebugChecks;
diff --git a/libcxx/test/libcxx/debug/debug_abort.pass.cpp b/libcxx/test/libcxx/debug/debug_abort.pass.cpp
index 270f2cb0f43b1..15c9173b285a3 100644
--- a/libcxx/test/libcxx/debug/debug_abort.pass.cpp
+++ b/libcxx/test/libcxx/debug/debug_abort.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <__debug>
 
+#include "test_macros.h"
+
 void signal_handler(int signal)
 {
     if (signal == SIGABRT)
diff --git a/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp b/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp
index 7b8db3d45bfb0..fada39ccbaf3d 100644
--- a/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp
+++ b/libcxx/test/libcxx/debug/debug_helper_test.pass.cpp
@@ -18,6 +18,7 @@
 #define _LIBCPP_DEBUG 1
 
 #include <__debug>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 
diff --git a/libcxx/test/libcxx/debug/debug_register.pass.cpp b/libcxx/test/libcxx/debug/debug_register.pass.cpp
index 1d147af107327..d687fc90a1ca1 100644
--- a/libcxx/test/libcxx/debug/debug_register.pass.cpp
+++ b/libcxx/test/libcxx/debug/debug_register.pass.cpp
@@ -20,6 +20,8 @@
 #include <__debug>
 #include <cassert>
 
+#include "test_macros.h"
+
 void my_debug_function(std::__libcpp_debug_info const& info) {
   assert(info.__msg_ == std::string("foo"));
   std::exit(0);
diff --git a/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.cxx1z.pass.cpp b/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.cxx1z.pass.cpp
index baa4fbf91ed78..67dadd640e40e 100644
--- a/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.cxx1z.pass.cpp
+++ b/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.cxx1z.pass.cpp
@@ -22,6 +22,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::auto_ptr<int> p;
diff --git a/libcxx/test/libcxx/depr/depr.function.objects/depr.adaptors.cxx1z.pass.cpp b/libcxx/test/libcxx/depr/depr.function.objects/depr.adaptors.cxx1z.pass.cpp
index c13edeacd2492..3469c27e05a92 100644
--- a/libcxx/test/libcxx/depr/depr.function.objects/depr.adaptors.cxx1z.pass.cpp
+++ b/libcxx/test/libcxx/depr/depr.function.objects/depr.adaptors.cxx1z.pass.cpp
@@ -20,6 +20,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int identity(int v) { return v; }
 int sum(int a, int b) { return a + b; }
 
diff --git a/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp b/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp
index 148f233cee2c9..59279f7ac6c28 100644
--- a/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp
+++ b/libcxx/test/libcxx/depr/depr.str.strstreams/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <strstream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.pass.cpp b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.pass.cpp
index 2065b2b4eeaf2..115a5f3b45232 100644
--- a/libcxx/test/libcxx/depr/enable_removed_cpp17_features.pass.cpp
+++ b/libcxx/test/libcxx/depr/enable_removed_cpp17_features.pass.cpp
@@ -13,6 +13,8 @@
 #define _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES
 #include <__config>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
 #error _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS must be defined
 #endif
diff --git a/libcxx/test/libcxx/depr/exception.unexpected/get_unexpected.pass.cpp b/libcxx/test/libcxx/depr/exception.unexpected/get_unexpected.pass.cpp
index ca14271f02dcc..a40d889cfb424 100644
--- a/libcxx/test/libcxx/depr/exception.unexpected/get_unexpected.pass.cpp
+++ b/libcxx/test/libcxx/depr/exception.unexpected/get_unexpected.pass.cpp
@@ -15,6 +15,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/libcxx/depr/exception.unexpected/set_unexpected.pass.cpp b/libcxx/test/libcxx/depr/exception.unexpected/set_unexpected.pass.cpp
index dd861941a447b..745c3cda5bcff 100644
--- a/libcxx/test/libcxx/depr/exception.unexpected/set_unexpected.pass.cpp
+++ b/libcxx/test/libcxx/depr/exception.unexpected/set_unexpected.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/libcxx/depr/exception.unexpected/unexpected.pass.cpp b/libcxx/test/libcxx/depr/exception.unexpected/unexpected.pass.cpp
index b9bdabe0e76e9..fa7ed3e1d9870 100644
--- a/libcxx/test/libcxx/depr/exception.unexpected/unexpected.pass.cpp
+++ b/libcxx/test/libcxx/depr/exception.unexpected/unexpected.pass.cpp
@@ -14,6 +14,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 void fexit()
 {
     std::exit(0);
diff --git a/libcxx/test/libcxx/diagnostics/assertions/version_cassert.pass.cpp b/libcxx/test/libcxx/diagnostics/assertions/version_cassert.pass.cpp
index f2fb0952402e3..af076cc3a56d0 100644
--- a/libcxx/test/libcxx/diagnostics/assertions/version_cassert.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/assertions/version_cassert.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cassert>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/diagnostics/errno/version_cerrno.pass.cpp b/libcxx/test/libcxx/diagnostics/errno/version_cerrno.pass.cpp
index a8c51c01337fb..f452b05a52afa 100644
--- a/libcxx/test/libcxx/diagnostics/errno/version_cerrno.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/errno/version_cerrno.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <cerrno>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/diagnostics/nodiscard.pass.cpp b/libcxx/test/libcxx/diagnostics/nodiscard.pass.cpp
index 1db9a67a8b309..7b768a026c8d0 100644
--- a/libcxx/test/libcxx/diagnostics/nodiscard.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/nodiscard.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <__config>
 
+#include "test_macros.h"
+
 _LIBCPP_NODISCARD_EXT int foo() { return 42; }
 
 int main(int, char**) {
diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.pass.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.pass.cpp
index 959ba4854b871..edf5a25e1db7b 100644
--- a/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.pass.cpp
@@ -14,6 +14,8 @@
 #define _LIBCPP_DISABLE_NODISCARD_AFTER_CXX17
 #include <__config>
 
+#include "test_macros.h"
+
 _LIBCPP_NODISCARD_AFTER_CXX17 int foo() { return 6; }
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/diagnostics/std.exceptions/version.pass.cpp b/libcxx/test/libcxx/diagnostics/std.exceptions/version.pass.cpp
index 147f4d2b9b3d2..91d91b406e4dd 100644
--- a/libcxx/test/libcxx/diagnostics/std.exceptions/version.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/std.exceptions/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <stdexcept>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/diagnostics/syserr/version.pass.cpp b/libcxx/test/libcxx/diagnostics/syserr/version.pass.cpp
index 4b987a668ad68..29e6095576c7c 100644
--- a/libcxx/test/libcxx/diagnostics/syserr/version.pass.cpp
+++ b/libcxx/test/libcxx/diagnostics/syserr/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <system_error>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/algorithms/header.algorithm.synop/includes.pass.cpp b/libcxx/test/libcxx/experimental/algorithms/header.algorithm.synop/includes.pass.cpp
index 271e94347d168..ba207e9f583c6 100644
--- a/libcxx/test/libcxx/experimental/algorithms/header.algorithm.synop/includes.pass.cpp
+++ b/libcxx/test/libcxx/experimental/algorithms/header.algorithm.synop/includes.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/algorithm>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_ALGORITHM
 #  error "<experimental/algorithm> must include <algorithm>"
 #endif
diff --git a/libcxx/test/libcxx/experimental/algorithms/version.pass.cpp b/libcxx/test/libcxx/experimental/algorithms/version.pass.cpp
index c43ad68a96efc..0a96474c83b12 100644
--- a/libcxx/test/libcxx/experimental/algorithms/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/algorithms/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/algorithm>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #  error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/diagnostics/syserr/version.pass.cpp b/libcxx/test/libcxx/experimental/diagnostics/syserr/version.pass.cpp
index 4f6d28c4c3549..357f80f3f9513 100644
--- a/libcxx/test/libcxx/experimental/diagnostics/syserr/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/diagnostics/syserr/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/system_error>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/filesystem/version.pass.cpp b/libcxx/test/libcxx/experimental/filesystem/version.pass.cpp
index d8b2cbbd14f33..98cbeab5b56d5 100644
--- a/libcxx/test/libcxx/experimental/filesystem/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/filesystem/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/filesystem>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp
index e12d31d5ff7aa..730030ffded83 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_piecewise_pair.pass.cpp
@@ -47,6 +47,8 @@
 #include <cstdlib>
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 template <class T>
diff --git a/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/db_deallocate.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/db_deallocate.pass.cpp
index 38fa265f8ed2c..2855ba122878c 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/db_deallocate.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/db_deallocate.pass.cpp
@@ -24,6 +24,8 @@ int AssertCount = 0;
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/db_deallocate.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/db_deallocate.pass.cpp
index d7a56be43877d..e9061928855e8 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/db_deallocate.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/db_deallocate.pass.cpp
@@ -24,6 +24,8 @@ int AssertCount = 0;
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_deque_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_deque_libcpp_version.pass.cpp
index 53423a7a7a8bc..d9d32b0bf4b3b 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_deque_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_deque_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/deque>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_forward_list_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_forward_list_libcpp_version.pass.cpp
index 8f71d91abad8f..86e195558d31d 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_forward_list_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_forward_list_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/forward_list>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_list_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_list_libcpp_version.pass.cpp
index 3cdf4794d3156..699d8a3ed05e3 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_list_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_list_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/list>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_map_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_map_libcpp_version.pass.cpp
index 94b636f66756d..9ce8dc001a20d 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_map_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_map_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/map>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp
index a34c52301b68e..bf2b599934f2e 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_regex_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/regex>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_set_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_set_libcpp_version.pass.cpp
index 70e34c9b0ca39..e1d496bc9ed55 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_set_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_set_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/set>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_string_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_string_libcpp_version.pass.cpp
index 7969b4f25b8f4..aae6af2a8d543 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_string_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_string_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/string>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_map_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_map_libcpp_version.pass.cpp
index 71cfb2c1ea802..2339f29ce3094 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_map_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_map_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/unordered_map>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_set_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_set_libcpp_version.pass.cpp
index 55a992c7135b6..3ee11327b347f 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_set_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_unordered_set_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/unordered_set>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_vector_libcpp_version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_vector_libcpp_version.pass.cpp
index 89a8fb0aab9f6..2375a13ba23b3 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_vector_libcpp_version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.aliases/header_vector_libcpp_version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/vector>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error header must provide _LIBCPP_VERSION
 #endif
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp
index 0405992a93a45..fffa994b2335d 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.global/global_memory_resource_lifetime.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 struct POSType {
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp
index 16fe293831add..fedc0b79718aa 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.global/new_delete_resource_lifetime.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 struct POSType {
diff --git a/libcxx/test/libcxx/experimental/memory/memory.resource.synop/version.pass.cpp b/libcxx/test/libcxx/experimental/memory/memory.resource.synop/version.pass.cpp
index e8d6285818202..32208bc462441 100644
--- a/libcxx/test/libcxx/experimental/memory/memory.resource.synop/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/memory/memory.resource.synop/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <experimental/memory_resource>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/numerics/numeric.ops/version.pass.cpp b/libcxx/test/libcxx/experimental/numerics/numeric.ops/version.pass.cpp
index f8b642d07b9a2..9a3c44a928af6 100644
--- a/libcxx/test/libcxx/experimental/numerics/numeric.ops/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/numerics/numeric.ops/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/numeric>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/strings/string.view/version.pass.cpp b/libcxx/test/libcxx/experimental/strings/string.view/version.pass.cpp
index 7300a5501ebbf..65eb691315a00 100644
--- a/libcxx/test/libcxx/experimental/strings/string.view/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/strings/string.view/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/string_view>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/any/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/any/version.pass.cpp
index ecfdecfea9333..796d55bb5fe48 100644
--- a/libcxx/test/libcxx/experimental/utilities/any/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/any/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/any>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/meta/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/meta/version.pass.cpp
index 3568c6fe53d3f..3e4f67e86de66 100644
--- a/libcxx/test/libcxx/experimental/utilities/meta/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/meta/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/type_traits>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/optional/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/optional/version.pass.cpp
index 0e84f241a6575..65d8577c94ba2 100644
--- a/libcxx/test/libcxx/experimental/utilities/optional/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/optional/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/optional>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/ratio/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/ratio/version.pass.cpp
index 0357c79efeea1..1c33148f49ada 100644
--- a/libcxx/test/libcxx/experimental/utilities/ratio/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/ratio/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/ratio>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/time/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/time/version.pass.cpp
index 5ff26f7b2dcdb..db542bfe43b04 100644
--- a/libcxx/test/libcxx/experimental/utilities/time/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/time/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/chrono>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/tuple/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/tuple/version.pass.cpp
index 4c1e305a45644..cf9bd4988ee46 100644
--- a/libcxx/test/libcxx/experimental/utilities/tuple/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/tuple/version.pass.cpp
@@ -13,6 +13,8 @@
 #endif
 #include <experimental/tuple>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/experimental/utilities/utility/version.pass.cpp b/libcxx/test/libcxx/experimental/utilities/utility/version.pass.cpp
index 5ba32b1ca207d..ab57eb0704364 100644
--- a/libcxx/test/libcxx/experimental/utilities/utility/version.pass.cpp
+++ b/libcxx/test/libcxx/experimental/utilities/utility/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/utility>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/extensions/hash/specializations.pass.cpp b/libcxx/test/libcxx/extensions/hash/specializations.pass.cpp
index 9397bbc402937..f5a97d309041b 100644
--- a/libcxx/test/libcxx/extensions/hash/specializations.pass.cpp
+++ b/libcxx/test/libcxx/extensions/hash/specializations.pass.cpp
@@ -12,6 +12,8 @@
 #include <ext/hash_map>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     char str[] = "test";
diff --git a/libcxx/test/libcxx/input.output/file.streams/c.files/version_ccstdio.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/c.files/version_ccstdio.pass.cpp
index d8ff6a7ac0a48..99a3a4aa8dacb 100644
--- a/libcxx/test/libcxx/input.output/file.streams/c.files/version_ccstdio.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/c.files/version_ccstdio.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdio>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/file.streams/c.files/version_cinttypes.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/c.files/version_cinttypes.pass.cpp
index 23cecf9cbef15..0b7150d3eb10e 100644
--- a/libcxx/test/libcxx/input.output/file.streams/c.files/version_cinttypes.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/c.files/version_cinttypes.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cinttypes>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.close.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.close.pass.cpp
index 6af8769815652..90092f371b313 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.close.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.close.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp
index 19442da51ac65..1557a7c89abb1 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp
index 0dead68c97dac..c18ce90098359 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.cons/wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.cons/wchar_pointer.pass.cpp
index 178c7d69a43ec..9dd79a1f87c61 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.cons/wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.cons/wchar_pointer.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.members/open_wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.members/open_wchar_pointer.pass.cpp
index 2e8b3620ed1ae..226cf8d832715 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.members/open_wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ifstream.members/open_wchar_pointer.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifdef _LIBCPP_HAS_OPEN_WITH_WCHAR
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp
index 7d6304cc9c945..15650b36686ee 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp
index 58f08b86533ef..d119ae624186b 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/version.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/version.pass.cpp
index 51532408873f7..d15a9bdebee9d 100644
--- a/libcxx/test/libcxx/input.output/file.streams/fstreams/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <fstream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/filesystems/version.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/version.pass.cpp
index b0f03174425d6..a9db5b0f02e60 100644
--- a/libcxx/test/libcxx/input.output/filesystems/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/filesystems/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <filesystem>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostream.format/input.streams/version.pass.cpp b/libcxx/test/libcxx/input.output/iostream.format/input.streams/version.pass.cpp
index 77ed7563bdaa6..74ef7b1385ce9 100644
--- a/libcxx/test/libcxx/input.output/iostream.format/input.streams/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.format/input.streams/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <istream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostream.format/output.streams/version.pass.cpp b/libcxx/test/libcxx/input.output/iostream.format/output.streams/version.pass.cpp
index f16e9a079d012..120cbe41bf55d 100644
--- a/libcxx/test/libcxx/input.output/iostream.format/output.streams/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.format/output.streams/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ostream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostream.format/std.manip/version.pass.cpp b/libcxx/test/libcxx/input.output/iostream.format/std.manip/version.pass.cpp
index 498410ed106ff..fa1df70f1627a 100644
--- a/libcxx/test/libcxx/input.output/iostream.format/std.manip/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.format/std.manip/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <iomanip>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostream.forward/version.pass.cpp b/libcxx/test/libcxx/input.output/iostream.forward/version.pass.cpp
index 70f1ec6626fb1..b843a18f7dac0 100644
--- a/libcxx/test/libcxx/input.output/iostream.forward/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.forward/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <iosfwd>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostream.objects/version.pass.cpp b/libcxx/test/libcxx/input.output/iostream.objects/version.pass.cpp
index 7081e5abf1618..628970e5b7c82 100644
--- a/libcxx/test/libcxx/input.output/iostream.objects/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.objects/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/iostreams.base/ios/iostate.flags/clear.abort.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/ios/iostate.flags/clear.abort.pass.cpp
index e6dc1c981002d..a302398feb7f2 100644
--- a/libcxx/test/libcxx/input.output/iostreams.base/ios/iostate.flags/clear.abort.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostreams.base/ios/iostate.flags/clear.abort.pass.cpp
@@ -22,6 +22,8 @@
 #include <ios>
 #include <streambuf>
 
+#include "test_macros.h"
+
 
 void exit_success(int) {
     std::_Exit(EXIT_SUCCESS);
diff --git a/libcxx/test/libcxx/input.output/iostreams.base/version.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/version.pass.cpp
index 4b873a92661f4..f64419cffd38e 100644
--- a/libcxx/test/libcxx/input.output/iostreams.base/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostreams.base/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ios>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/stream.buffers/version.pass.cpp b/libcxx/test/libcxx/input.output/stream.buffers/version.pass.cpp
index cc55444b55b79..94019e8464560 100644
--- a/libcxx/test/libcxx/input.output/stream.buffers/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/stream.buffers/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <streambuf>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/input.output/string.streams/version.pass.cpp b/libcxx/test/libcxx/input.output/string.streams/version.pass.cpp
index 03beac77bca89..a0044b8dce5f0 100644
--- a/libcxx/test/libcxx/input.output/string.streams/version.pass.cpp
+++ b/libcxx/test/libcxx/input.output/string.streams/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <sstream>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp b/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp
index 37aa3dfb208c2..572ca3d0f5e4c 100644
--- a/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp
+++ b/libcxx/test/libcxx/iterators/advance.debug1.pass.cpp
@@ -19,6 +19,7 @@
 #define _LIBCPP_DEBUG 0
 
 #include <iterator>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 #include "test_iterators.h"
diff --git a/libcxx/test/libcxx/iterators/failed.pass.cpp b/libcxx/test/libcxx/iterators/failed.pass.cpp
index e44c15ebaa2d3..07c4de132cae3 100644
--- a/libcxx/test/libcxx/iterators/failed.pass.cpp
+++ b/libcxx/test/libcxx/iterators/failed.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/libcxx/iterators/next.debug1.pass.cpp b/libcxx/test/libcxx/iterators/next.debug1.pass.cpp
index 72d9fd4445b84..e3797f2eea02c 100644
--- a/libcxx/test/libcxx/iterators/next.debug1.pass.cpp
+++ b/libcxx/test/libcxx/iterators/next.debug1.pass.cpp
@@ -19,6 +19,7 @@
 #define _LIBCPP_DEBUG 0
 
 #include <iterator>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 #include "test_iterators.h"
diff --git a/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp b/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp
index da7c93113a572..42222de4b6ede 100644
--- a/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp
+++ b/libcxx/test/libcxx/iterators/prev.debug1.pass.cpp
@@ -20,6 +20,7 @@
 #define _LIBCPP_DEBUG 0
 
 #include <iterator>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 #include "test_iterators.h"
diff --git a/libcxx/test/libcxx/iterators/version.pass.cpp b/libcxx/test/libcxx/iterators/version.pass.cpp
index 16668898a4b4e..31c99ca6fb009 100644
--- a/libcxx/test/libcxx/iterators/version.pass.cpp
+++ b/libcxx/test/libcxx/iterators/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <iterator>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/cmp/version.pass.cpp b/libcxx/test/libcxx/language.support/cmp/version.pass.cpp
index 9d2ae8ac11356..859c567065487 100644
--- a/libcxx/test/libcxx/language.support/cmp/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/cmp/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <compare>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/cstdint/version.pass.cpp b/libcxx/test/libcxx/language.support/cstdint/version.pass.cpp
index 9f11f15d118ae..da1d4a11c0708 100644
--- a/libcxx/test/libcxx/language.support/cstdint/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/cstdint/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdint>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.dynamic/version.pass.cpp b/libcxx/test/libcxx/language.support/support.dynamic/version.pass.cpp
index c3f542ca8407d..40a7f8c26a0db 100644
--- a/libcxx/test/libcxx/language.support/support.dynamic/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.dynamic/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <new>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.exception/version.pass.cpp b/libcxx/test/libcxx/language.support/support.exception/version.pass.cpp
index 495a8cde9697d..e46ab3c65fb45 100644
--- a/libcxx/test/libcxx/language.support/support.exception/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.exception/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <exception>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.initlist/version.pass.cpp b/libcxx/test/libcxx/language.support/support.initlist/version.pass.cpp
index 6f42987b16e03..4691706fe0bb7 100644
--- a/libcxx/test/libcxx/language.support/support.initlist/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.initlist/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <initializer_list>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.limits/c.limits/version_cfloat.pass.cpp b/libcxx/test/libcxx/language.support/support.limits/c.limits/version_cfloat.pass.cpp
index baa925f7d05fb..9b4a33bc7e9b3 100644
--- a/libcxx/test/libcxx/language.support/support.limits/c.limits/version_cfloat.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.limits/c.limits/version_cfloat.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cfloat>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.limits/c.limits/version_climits.pass.cpp b/libcxx/test/libcxx/language.support/support.limits/c.limits/version_climits.pass.cpp
index 208b16ed78072..1aba72d820f2c 100644
--- a/libcxx/test/libcxx/language.support/support.limits/c.limits/version_climits.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.limits/c.limits/version_climits.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <climits>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.limits/limits/version.pass.cpp b/libcxx/test/libcxx/language.support/support.limits/limits/version.pass.cpp
index a17643bc45fad..76076bfe094bf 100644
--- a/libcxx/test/libcxx/language.support/support.limits/limits/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.limits/limits/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <limits>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.limits/version.pass.cpp b/libcxx/test/libcxx/language.support/support.limits/version.pass.cpp
index 4277147f6e191..b4731204c6678 100644
--- a/libcxx/test/libcxx/language.support/support.limits/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.limits/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <version>
 
+#include "test_macros.h"
+
 #if !defined(_LIBCPP_VERSION)
 #error "_LIBCPP_VERSION must be defined after including <version>"
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.rtti/version.pass.cpp b/libcxx/test/libcxx/language.support/support.rtti/version.pass.cpp
index 3d21c8487b9c1..21388012cf1d3 100644
--- a/libcxx/test/libcxx/language.support/support.rtti/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.rtti/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <typeinfo>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_csetjmp.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_csetjmp.pass.cpp
index 9bceaf8238f3f..2069667031f6d 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_csetjmp.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_csetjmp.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <csetjmp>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_csignal.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_csignal.pass.cpp
index b93fb0d172c71..f688ffdb23708 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_csignal.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_csignal.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <csignal>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_cstdarg.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_cstdarg.pass.cpp
index 0ddd98b52c29d..bdb2c0e9c5dc4 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_cstdarg.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_cstdarg.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdarg>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_cstdbool.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_cstdbool.pass.cpp
index 85f1fb34d83b5..0c17570e27ddf 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_cstdbool.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_cstdbool.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdbool>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_cstdlib.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_cstdlib.pass.cpp
index 9a5a02fb882cb..3932cf049c175 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_cstdlib.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_cstdlib.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdlib>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.runtime/version_ctime.pass.cpp b/libcxx/test/libcxx/language.support/support.runtime/version_ctime.pass.cpp
index bc2d039b3d024..9bf285a2f4b55 100644
--- a/libcxx/test/libcxx/language.support/support.runtime/version_ctime.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.runtime/version_ctime.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ctime>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/language.support/support.types/version.pass.cpp b/libcxx/test/libcxx/language.support/support.types/version.pass.cpp
index 5dd755c0066ca..c263118beccfe 100644
--- a/libcxx/test/libcxx/language.support/support.types/version.pass.cpp
+++ b/libcxx/test/libcxx/language.support/support.types/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstddef>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/localization/c.locales/version.pass.cpp b/libcxx/test/libcxx/localization/c.locales/version.pass.cpp
index 2dfc76dd909c4..e5b6e41700eea 100644
--- a/libcxx/test/libcxx/localization/c.locales/version.pass.cpp
+++ b/libcxx/test/libcxx/localization/c.locales/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <clocale>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/localization/locale.categories/__scan_keyword.pass.cpp b/libcxx/test/libcxx/localization/locale.categories/__scan_keyword.pass.cpp
index b85bd8a580f59..1ecf378de5b61 100644
--- a/libcxx/test/libcxx/localization/locale.categories/__scan_keyword.pass.cpp
+++ b/libcxx/test/libcxx/localization/locale.categories/__scan_keyword.pass.cpp
@@ -38,6 +38,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::ctype<char>& ct = std::use_facet<std::ctype<char> >(std::locale::classic());
diff --git a/libcxx/test/libcxx/localization/locale.stdcvt/version.pass.cpp b/libcxx/test/libcxx/localization/locale.stdcvt/version.pass.cpp
index 738ab5e41f42b..e97819b37e4d8 100644
--- a/libcxx/test/libcxx/localization/locale.stdcvt/version.pass.cpp
+++ b/libcxx/test/libcxx/localization/locale.stdcvt/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <codecvt>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/localization/locales/locale.abort.pass.cpp b/libcxx/test/libcxx/localization/locales/locale.abort.pass.cpp
index 5817ebdfda524..cff67c0573aee 100644
--- a/libcxx/test/libcxx/localization/locales/locale.abort.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/locale.abort.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <locale>
 
+#include "test_macros.h"
+
 
 void exit_success(int) {
     std::_Exit(EXIT_SUCCESS);
diff --git a/libcxx/test/libcxx/localization/locales/locale.category.abort.pass.cpp b/libcxx/test/libcxx/localization/locales/locale.category.abort.pass.cpp
index cf50415a2c99f..60e5696dd546b 100644
--- a/libcxx/test/libcxx/localization/locales/locale.category.abort.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/locale.category.abort.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <locale>
 
+#include "test_macros.h"
+
 
 void exit_success(int) {
     std::_Exit(EXIT_SUCCESS);
diff --git a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp
index 3aac6f532797a..4ad81b8d9d0b5 100644
--- a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt_utf8<wchar_t> Codecvt;
diff --git a/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/facet.pass.cpp b/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/facet.pass.cpp
index 7be14d6be2bd8..072c85a11d2b8 100644
--- a/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/facet.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.facet/facet.pass.cpp
@@ -22,6 +22,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct my_facet
     : public std::locale::facet
 {
diff --git a/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.id/id.pass.cpp b/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.id/id.pass.cpp
index 758d7f8b81a10..5e0113474c9d1 100644
--- a/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.id/id.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/locale/locale.types/locale.id/id.pass.cpp
@@ -21,6 +21,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::locale::id id0;
 std::locale::id id2;
 std::locale::id id1;
diff --git a/libcxx/test/libcxx/localization/locales/use_facet.abort.pass.cpp b/libcxx/test/libcxx/localization/locales/use_facet.abort.pass.cpp
index 64700eab9d192..0145e005ba044 100644
--- a/libcxx/test/libcxx/localization/locales/use_facet.abort.pass.cpp
+++ b/libcxx/test/libcxx/localization/locales/use_facet.abort.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <locale>
 
+#include "test_macros.h"
+
 
 struct my_facet : public std::locale::facet {
     static std::locale::id id;
diff --git a/libcxx/test/libcxx/localization/version.pass.cpp b/libcxx/test/libcxx/localization/version.pass.cpp
index 1d1294593d272..7750f4c34ad07 100644
--- a/libcxx/test/libcxx/localization/version.pass.cpp
+++ b/libcxx/test/libcxx/localization/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <locale>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp b/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp
index 7099eed2e0fbb..21e05fce48603 100644
--- a/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp
+++ b/libcxx/test/libcxx/memory/aligned_allocation_macro.pass.cpp
@@ -22,6 +22,8 @@
 
 #include <new>
 
+#include "test_macros.h"
+
 
 #ifdef _LIBCPP_HAS_NO_ALIGNED_ALLOCATION
 #   error "libc++ should have aligned allocation in C++17 and up when targeting a platform that supports it"
diff --git a/libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp b/libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp
index 330b3a134f7b2..c85bf7e8fdaab 100644
--- a/libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp
+++ b/libcxx/test/libcxx/numerics/c.math/constexpr-fns.pass.cpp
@@ -22,6 +22,8 @@
 
 #include <cmath>
 
+#include "test_macros.h"
+
 static_assert(std::__libcpp_isnan_or_builtin(0.) == false, "");
 static_assert(std::__libcpp_isinf_or_builtin(0.0) == false, "");
 static_assert(std::__libcpp_isfinite_or_builtin(0.0) == true, "");
diff --git a/libcxx/test/libcxx/numerics/c.math/ctgmath.pass.cpp b/libcxx/test/libcxx/numerics/c.math/ctgmath.pass.cpp
index 81eac056bef66..39b79dba75420 100644
--- a/libcxx/test/libcxx/numerics/c.math/ctgmath.pass.cpp
+++ b/libcxx/test/libcxx/numerics/c.math/ctgmath.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ctgmath>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/c.math/tgmath_h.pass.cpp b/libcxx/test/libcxx/numerics/c.math/tgmath_h.pass.cpp
index d3cd15ca0283d..d564d7327efb3 100644
--- a/libcxx/test/libcxx/numerics/c.math/tgmath_h.pass.cpp
+++ b/libcxx/test/libcxx/numerics/c.math/tgmath_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <tgmath.h>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/c.math/version_cmath.pass.cpp b/libcxx/test/libcxx/numerics/c.math/version_cmath.pass.cpp
index 1b4ab9a6d8ea4..281068f02dc26 100644
--- a/libcxx/test/libcxx/numerics/c.math/version_cmath.pass.cpp
+++ b/libcxx/test/libcxx/numerics/c.math/version_cmath.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cmath>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/cfenv/version.pass.cpp b/libcxx/test/libcxx/numerics/cfenv/version.pass.cpp
index 9ce5b9c4cb230..e166834970993 100644
--- a/libcxx/test/libcxx/numerics/cfenv/version.pass.cpp
+++ b/libcxx/test/libcxx/numerics/cfenv/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <cfenv>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/complex.number/__sqr.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/__sqr.pass.cpp
index 3a6aec0ac6791..97f4a2419483c 100644
--- a/libcxx/test/libcxx/numerics/complex.number/__sqr.pass.cpp
+++ b/libcxx/test/libcxx/numerics/complex.number/__sqr.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/libcxx/numerics/complex.number/ccmplx/ccomplex.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/ccmplx/ccomplex.pass.cpp
index ff03bd09f841a..da3a4974ea766 100644
--- a/libcxx/test/libcxx/numerics/complex.number/ccmplx/ccomplex.pass.cpp
+++ b/libcxx/test/libcxx/numerics/complex.number/ccmplx/ccomplex.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ccomplex>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/complex.number/version.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/version.pass.cpp
index ec3996e54f3d2..d8310a76f40e7 100644
--- a/libcxx/test/libcxx/numerics/complex.number/version.pass.cpp
+++ b/libcxx/test/libcxx/numerics/complex.number/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <complex>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/numarray/version.pass.cpp b/libcxx/test/libcxx/numerics/numarray/version.pass.cpp
index b921ae247accd..34f666e7e2c98 100644
--- a/libcxx/test/libcxx/numerics/numarray/version.pass.cpp
+++ b/libcxx/test/libcxx/numerics/numarray/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <valarray>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/numeric.ops/version.pass.cpp b/libcxx/test/libcxx/numerics/numeric.ops/version.pass.cpp
index 50a07a639368c..ed051fcda8c61 100644
--- a/libcxx/test/libcxx/numerics/numeric.ops/version.pass.cpp
+++ b/libcxx/test/libcxx/numerics/numeric.ops/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <numeric>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/numerics/rand/rand.synopsis/version.pass.cpp b/libcxx/test/libcxx/numerics/rand/rand.synopsis/version.pass.cpp
index b9f876d0f9c02..60497e704e536 100644
--- a/libcxx/test/libcxx/numerics/rand/rand.synopsis/version.pass.cpp
+++ b/libcxx/test/libcxx/numerics/rand/rand.synopsis/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <random>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp
index b0de5d8792e76..6da77e4365b6f 100644
--- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp
+++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/clear_and_shrink_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::string l = "Long string so that allocation definitely, for sure, absolutely happens. Probably.";
diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp
index 7925e1597b728..eb07fe88a55fe 100644
--- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp
+++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_char_db1.pass.cpp
@@ -18,6 +18,8 @@
 #include <stdexcept>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp
index 81f888f86950e..4db97609ff51e 100644
--- a/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp
+++ b/libcxx/test/libcxx/strings/basic.string/string.modifiers/insert_iter_size_char_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if _LIBCPP_DEBUG >= 1
diff --git a/libcxx/test/libcxx/strings/c.strings/version_cctype.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cctype.pass.cpp
index 47e8576798a2b..07e91c4e309ba 100644
--- a/libcxx/test/libcxx/strings/c.strings/version_cctype.pass.cpp
+++ b/libcxx/test/libcxx/strings/c.strings/version_cctype.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cctype>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/c.strings/version_cstring.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cstring.pass.cpp
index 21388b41050c2..37721759c2a34 100644
--- a/libcxx/test/libcxx/strings/c.strings/version_cstring.pass.cpp
+++ b/libcxx/test/libcxx/strings/c.strings/version_cstring.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstring>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp
index 8c6a9de70739d..e14340a8e0f41 100644
--- a/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp
+++ b/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <cuchar>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp
index f7539d4e83866..1e2eff5bbf633 100644
--- a/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp
+++ b/libcxx/test/libcxx/strings/c.strings/version_cwchar.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cwchar>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp
index 06aacb1fee870..d82198039c585 100644
--- a/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp
+++ b/libcxx/test/libcxx/strings/c.strings/version_cwctype.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cwctype>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/strings/version.pass.cpp b/libcxx/test/libcxx/strings/version.pass.cpp
index f106780e350a7..9b549a3afec4c 100644
--- a/libcxx/test/libcxx/strings/version.pass.cpp
+++ b/libcxx/test/libcxx/strings/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <string>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp
index de71ee78a16e7..b3621a4e697d0 100644
--- a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp
+++ b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception.pass.cpp
@@ -29,6 +29,7 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
index 909c735f464f9..49a41362db9a5 100644
--- a/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
+++ b/libcxx/test/libcxx/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
@@ -27,6 +27,7 @@
 #include <exception>
 #include <cstdlib>
 #include <cassert>
+#include "test_macros.h"
 #include "debug_mode_helper.h"
 
 
diff --git a/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp b/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp
index 75bf2950331fd..ee6b9199f09f5 100644
--- a/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp
+++ b/libcxx/test/libcxx/thread/futures/futures.task/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <future>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/thread/futures/version.pass.cpp b/libcxx/test/libcxx/thread/futures/version.pass.cpp
index fd2c30433001c..601e6a48e7955 100644
--- a/libcxx/test/libcxx/thread/futures/version.pass.cpp
+++ b/libcxx/test/libcxx/thread/futures/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <future>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
index 2ad5c625604c8..04c104c5688d8 100644
--- a/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.condition/PR30202_notify_from_pthread_created_thread.pass.cpp
@@ -29,6 +29,8 @@
 #include <cassert>
 #include <pthread.h>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 bool exited = false;
diff --git a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp
index 4b983ff05b201..a4c7342f8a61e 100644
--- a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp
@@ -20,6 +20,8 @@
 #include <condition_variable>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::condition_variable::native_handle_type,
diff --git a/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp
index 2354b3b9fd8ed..552223a6361b4 100644
--- a/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.condition/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <condition_variable>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp
index b85efcb64ca5b..269de52cbbf80 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/native_handle.pass.cpp
@@ -20,6 +20,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::mutex m;
diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp
index 4a6c53995305f..dda2d94a95c92 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/native_handle.pass.cpp
@@ -20,6 +20,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::recursive_mutex m;
diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp
index 65a1d6e1225f5..2e181dc3b65e4 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_annotations_not_enabled.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   std::mutex m;
   m.lock();
diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp
index e29801228627b..cc355eef515e3 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_lock_unlock.pass.cpp
@@ -20,6 +20,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 std::mutex m;
 int foo __attribute__((guarded_by(m)));
 
diff --git a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp
index 2e427f217c85a..538fbcc142de8 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/thread_safety_requires_capability.pass.cpp
@@ -20,6 +20,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 std::mutex m;
 int foo __attribute__((guarded_by(m)));
 
diff --git a/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp b/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp
index abe4fda96aa14..9a18ec72f7cfa 100644
--- a/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.mutex/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp
index 1bf7e521d5cbc..0fa0d7ce76427 100644
--- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/thread.thread.member/native_handle.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp
index 4f6bd12064b44..ce47404884c63 100644
--- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.class/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <thread>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::thread::native_handle_type, pthread_t>::value), "");
diff --git a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
index f11f40611335b..56120df98b33b 100644
--- a/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.threads/thread.thread.this/sleep_for.pass.cpp
@@ -32,6 +32,8 @@
 #include <signal.h>
 #include <sys/time.h>
 
+#include "test_macros.h"
+
 void sig_action(int) {}
 
 int main(int, char**)
diff --git a/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp b/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp
index 3d7866228db31..4ced04d5ee2d4 100644
--- a/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp
+++ b/libcxx/test/libcxx/thread/thread.threads/version.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <thread>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp b/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
index 535268560a935..735609bf4df47 100644
--- a/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
+++ b/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
@@ -23,6 +23,8 @@
 
 #include "user_defined_integral.hpp"
 
+#include "test_macros.h"
+
 template <class T>
 struct EnumType
 {
diff --git a/libcxx/test/libcxx/type_traits/lazy_metafunctions.pass.cpp b/libcxx/test/libcxx/type_traits/lazy_metafunctions.pass.cpp
index 2ea1d6891f7b0..59090194009f6 100644
--- a/libcxx/test/libcxx/type_traits/lazy_metafunctions.pass.cpp
+++ b/libcxx/test/libcxx/type_traits/lazy_metafunctions.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class Type>
 struct Identity {
     typedef Type type;
diff --git a/libcxx/test/libcxx/utilities/any/size_and_alignment.pass.cpp b/libcxx/test/libcxx/utilities/any/size_and_alignment.pass.cpp
index 4e3646660c42b..b6220667e2208 100644
--- a/libcxx/test/libcxx/utilities/any/size_and_alignment.pass.cpp
+++ b/libcxx/test/libcxx/utilities/any/size_and_alignment.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <any>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::any;
diff --git a/libcxx/test/libcxx/utilities/any/small_type.pass.cpp b/libcxx/test/libcxx/utilities/any/small_type.pass.cpp
index 9df6efc1402de..78ff35192c0ec 100644
--- a/libcxx/test/libcxx/utilities/any/small_type.pass.cpp
+++ b/libcxx/test/libcxx/utilities/any/small_type.pass.cpp
@@ -13,6 +13,7 @@
 // Check that the size and alignment of any are what we expect.
 
 #include <any>
+#include "test_macros.h"
 #include "any_helpers.h"
 
 constexpr std::size_t BufferSize = (sizeof(void*) * 3);
diff --git a/libcxx/test/libcxx/utilities/any/version.pass.cpp b/libcxx/test/libcxx/utilities/any/version.pass.cpp
index ee5bc9928e1f1..6c5c43e8f9b32 100644
--- a/libcxx/test/libcxx/utilities/any/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/any/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <any>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp b/libcxx/test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp
index 1f5bbcdef3ba8..8532ca740b631 100644
--- a/libcxx/test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp
+++ b/libcxx/test/libcxx/utilities/function.objects/refwrap/binary.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 class functor1
     : public std::unary_function<int, char>
 {
diff --git a/libcxx/test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp b/libcxx/test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp
index 429722e4711ec..09c304397d00e 100644
--- a/libcxx/test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp
+++ b/libcxx/test/libcxx/utilities/function.objects/refwrap/unary.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 class functor1
     : public std::unary_function<int, char>
 {
diff --git a/libcxx/test/libcxx/utilities/function.objects/version.pass.cpp b/libcxx/test/libcxx/utilities/function.objects/version.pass.cpp
index 6f8540f1a2425..6418733713df6 100644
--- a/libcxx/test/libcxx/utilities/function.objects/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/function.objects/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <functional>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/memory/util.dynamic.safety/get_pointer_safety_new_abi.pass.cpp b/libcxx/test/libcxx/utilities/memory/util.dynamic.safety/get_pointer_safety_new_abi.pass.cpp
index c8e0c7292998e..7a4a4cbf48956 100644
--- a/libcxx/test/libcxx/utilities/memory/util.dynamic.safety/get_pointer_safety_new_abi.pass.cpp
+++ b/libcxx/test/libcxx/utilities/memory/util.dynamic.safety/get_pointer_safety_new_abi.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
   {
diff --git a/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp b/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp
index bf12e145942b6..0f7096d5f17e4 100644
--- a/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp
+++ b/libcxx/test/libcxx/utilities/memory/util.smartptr/race_condition.pass.cpp
@@ -20,6 +20,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::shared_ptr<int> Ptr;
 typedef std::weak_ptr<int> WeakPtr;
 
diff --git a/libcxx/test/libcxx/utilities/memory/version.pass.cpp b/libcxx/test/libcxx/utilities/memory/version.pass.cpp
index 5b10e1425f5ec..cb885560c0291 100644
--- a/libcxx/test/libcxx/utilities/memory/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/memory/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <memory>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/meta/meta.unary/meta.unary.prop/__has_operator_addressof.pass.cpp b/libcxx/test/libcxx/utilities/meta/meta.unary/meta.unary.prop/__has_operator_addressof.pass.cpp
index 80bd2e73861ae..9159b4108f8c5 100644
--- a/libcxx/test/libcxx/utilities/meta/meta.unary/meta.unary.prop/__has_operator_addressof.pass.cpp
+++ b/libcxx/test/libcxx/utilities/meta/meta.unary/meta.unary.prop/__has_operator_addressof.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 struct A
 {
diff --git a/libcxx/test/libcxx/utilities/meta/version.pass.cpp b/libcxx/test/libcxx/utilities/meta/version.pass.cpp
index 7f4cbd841dd81..18f7536f56b78 100644
--- a/libcxx/test/libcxx/utilities/meta/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/meta/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/copy.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/copy.pass.cpp
index 1f559904c88d4..d88c2cb9a2aaa 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/copy.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <type_traits>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {};
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/move.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
index 325bcb452a0fa..8eaeb0925a169 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.assign/move.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <utility>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {};
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
index 694ab01569cb2..ea3ac4ad65d11 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <type_traits>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {};
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index 383eaa98649be..2d5c7ac5e8473 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <utility>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {};
diff --git a/libcxx/test/libcxx/utilities/optional/optional.object/triviality.abi.pass.cpp b/libcxx/test/libcxx/utilities/optional/optional.object/triviality.abi.pass.cpp
index 36f5bb9373921..ce36603d85790 100644
--- a/libcxx/test/libcxx/utilities/optional/optional.object/triviality.abi.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/optional.object/triviality.abi.pass.cpp
@@ -26,6 +26,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 template <class T>
 struct SpecialMemberTest {
     using O = std::optional<T>;
diff --git a/libcxx/test/libcxx/utilities/optional/version.pass.cpp b/libcxx/test/libcxx/utilities/optional/version.pass.cpp
index 49b263a37e587..315e8849aef04 100644
--- a/libcxx/test/libcxx/utilities/optional/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/optional/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/ratio/version.pass.cpp b/libcxx/test/libcxx/utilities/ratio/version.pass.cpp
index 112111c37bfd5..26514b194bfe0 100644
--- a/libcxx/test/libcxx/utilities/ratio/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/ratio/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp b/libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp
index 3ec17b079ebc8..42deaaa4b2d37 100644
--- a/libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp
+++ b/libcxx/test/libcxx/utilities/template.bitset/includes.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <bitset>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_CSTDDEF
 #error <cstddef> has not been included
 #endif
diff --git a/libcxx/test/libcxx/utilities/template.bitset/version.pass.cpp b/libcxx/test/libcxx/utilities/template.bitset/version.pass.cpp
index 94df4d1bab99e..e055895a99316 100644
--- a/libcxx/test/libcxx/utilities/template.bitset/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/template.bitset/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <bitset>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/time/version.pass.cpp b/libcxx/test/libcxx/utilities/time/version.pass.cpp
index d1093dd4afcf6..5e183169134b3 100644
--- a/libcxx/test/libcxx/utilities/time/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/time/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp b/libcxx/test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp
index 20938208794e5..11dd2898135b8 100644
--- a/libcxx/test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp
+++ b/libcxx/test/libcxx/utilities/tuple/tuple.tuple/empty_member.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <tuple>
 
+#include "test_macros.h"
+
 struct A {};
 
 struct B {};
diff --git a/libcxx/test/libcxx/utilities/tuple/version.pass.cpp b/libcxx/test/libcxx/utilities/tuple/version.pass.cpp
index 28232a99b13f7..1a13410eb3050 100644
--- a/libcxx/test/libcxx/utilities/tuple/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/tuple/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <tuple>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/type.index/version.pass.cpp b/libcxx/test/libcxx/utilities/type.index/version.pass.cpp
index 38cd0254a5803..aa549fea442c9 100644
--- a/libcxx/test/libcxx/utilities/type.index/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/type.index/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <typeindex>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/utility/__is_inplace_index.pass.cpp b/libcxx/test/libcxx/utilities/utility/__is_inplace_index.pass.cpp
index a2559f208f37c..d196a1840bc96 100644
--- a/libcxx/test/libcxx/utilities/utility/__is_inplace_index.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/__is_inplace_index.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 struct S {};
 
 int main(int, char**) {
diff --git a/libcxx/test/libcxx/utilities/utility/__is_inplace_type.pass.cpp b/libcxx/test/libcxx/utilities/utility/__is_inplace_type.pass.cpp
index 534fb50592660..e477d616a782a 100644
--- a/libcxx/test/libcxx/utilities/utility/__is_inplace_type.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/__is_inplace_type.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 struct S {};
 
 int main(int, char**) {
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/U_V.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
index e03fa6d7f3d39..9a4853d4bbaaa 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 
 struct ExplicitT {
     constexpr explicit ExplicitT(int x) : value(x) {}
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/assign_tuple_like.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/assign_tuple_like.pass.cpp
index 5765700fe1360..7895072a84e2c 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/assign_tuple_like.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/assign_tuple_like.pass.cpp
@@ -22,6 +22,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 // Clang warns about missing braces when initializing std::array.
 #if defined(__clang__)
 #pragma clang diagnostic ignored "-Wmissing-braces"
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
index a2fac173ec7cc..9252e7aaea22b 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 
 struct ExplicitT {
     constexpr explicit ExplicitT(int x) : value(x) {}
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
index 16d714ab9a7ed..3b644eeb6c9b4 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 
 struct ExplicitT {
     constexpr explicit ExplicitT(int x) : value(x) {}
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/default.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/default.pass.cpp
index a7f0f87645251..2ca9e7a58bbf7 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/default.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <utility>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 struct ThrowingDefault {
   ThrowingDefault() { }
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
index e4c953840129e..259aa64b35ce9 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
@@ -22,6 +22,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     using NonThrowingConvert = NonThrowingTypes::ConvertingType;
diff --git a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
index 8cc83f7bd0b3e..93b0281512c87 100644
--- a/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <utility>
 
+#include "test_macros.h"
+
 
 struct ExplicitT {
     constexpr explicit ExplicitT(int x) : value(x) {}
diff --git a/libcxx/test/libcxx/utilities/utility/version.pass.cpp b/libcxx/test/libcxx/utilities/utility/version.pass.cpp
index ca783db7fc13d..52d1c43ed0f04 100644
--- a/libcxx/test/libcxx/utilities/utility/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/utility/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp b/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp
index 1bfe0e9fdff4d..df6f0916f3a0a 100644
--- a/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp
+++ b/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp
@@ -18,6 +18,8 @@
 #include <utility>
 #include <variant>
 
+#include "test_macros.h"
+
 template <class Sequence>
 struct make_variant_imp;
 
diff --git a/libcxx/test/libcxx/utilities/variant/version.pass.cpp b/libcxx/test/libcxx/utilities/variant/version.pass.cpp
index 3ef8ed50c9140..1dcb75d180d8a 100644
--- a/libcxx/test/libcxx/utilities/variant/version.pass.cpp
+++ b/libcxx/test/libcxx/utilities/variant/version.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <variant>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_VERSION
 #error _LIBCPP_VERSION not defined
 #endif
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
index 97af585b932c5..c88daeeecfc61 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/partition.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct is_odd
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/sample.stable.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/sample.stable.pass.cpp
index 58e6084626a90..59a657796ac33 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/sample.stable.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/sample.stable.pass.cpp
@@ -20,6 +20,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 // Stable if and only if PopulationIterator meets the requirements of a
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
index 7838a0ca2f12b..e7930a57acf0f 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/reverse.pass.cpp
@@ -16,6 +16,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter>
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
index 419bb4bbbffa7..2fbd905e5be69 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/iter_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int i = 1;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.comp.pass.cpp
index 482af9ef33130..f314f7bebe1f7 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.comp.pass.cpp
@@ -17,6 +17,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Tag {
     Tag() : val(0), tag("Default") {}
     Tag(int a, const char *b) : val(a), tag(b) {}
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.pass.cpp
index 4066a39453e3f..982591ac32c31 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/clamp.pass.cpp
@@ -16,6 +16,8 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Tag {
     Tag() : val(0), tag("Default") {}
     Tag(int a, const char *b) : val(a), tag(b) {}
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/make_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/make_heap.pass.cpp
index 3d862ca4f25ec..76244a9f0b37f 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/make_heap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/make_heap.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void test(int N)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/pop_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/pop_heap.pass.cpp
index 2b434983cfcb3..fa0e1120b3093 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/pop_heap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/pop_heap.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void test(int N)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/push_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/push_heap.pass.cpp
index 7db79e3bc5dba..208bf6e80f503 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/push_heap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/push_heap.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void test(int N)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/sort_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/sort_heap.pass.cpp
index 947affcf086e0..7d48b87aea571 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/sort_heap.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/sort_heap.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void test(int N)
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
index 208221416812b..cba09f51ecd8b 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.merge/inplace_merge.pass.cpp
@@ -18,6 +18,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #if TEST_STD_VER >= 11
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/max_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/max_element.pass.cpp
index cb5341ca04705..db79601d102c1 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/max_element.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/max_element.pass.cpp
@@ -17,6 +17,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/min_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/min_element.pass.cpp
index 151bfa8127aa2..e2d5ede408564 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/min_element.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/min_element.pass.cpp
@@ -17,6 +17,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
index 8b56ac180e507..7ea8bed4c8464 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/minmax_element.pass.cpp
@@ -17,6 +17,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/nth_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/nth_element.pass.cpp
index abde620d03876..18c2d13929381 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/nth_element.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/nth_element.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation.pass.cpp
index 62d5b42e23e00..ba0c3c6532ab9 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation.pass.cpp
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include <cstdio>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation_comp.pass.cpp
index 4416ed1e4a888..71e5313ebf5fc 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/next_permutation_comp.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include <cstdio>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation.pass.cpp
index 044a6444ab012..9d1f0869f8ef7 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation.pass.cpp
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include <cstdio>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation_comp.pass.cpp
index 760daae36baa2..59b314c510434 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/prev_permutation_comp.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include <cstdio>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference.pass.cpp
index 4d1f537b7f157..ec1e671ddaf02 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference.pass.cpp
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference_comp.pass.cpp
index 2597174c6068f..2ed7ca91dfba3 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.difference/set_difference_comp.pass.cpp
@@ -22,6 +22,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference.pass.cpp
index c74d6623bc5fb..f8965dfb22790 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference.pass.cpp
@@ -21,6 +21,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference_comp.pass.cpp
index 99e75b1224a9f..5e67074280ebc 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.symmetric.difference/set_symmetric_difference_comp.pass.cpp
@@ -23,6 +23,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union.pass.cpp
index 827c2c1900107..a4196cab2031f 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union.pass.cpp
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_comp.pass.cpp
index c8d1d28828c26..aa4828b10895a 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_comp.pass.cpp
@@ -22,6 +22,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class OutIter>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_move.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_move.pass.cpp
index 45bd455bb8191..4d6c78703f098 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_move.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.set.operations/set.union/set_union_move.pass.cpp
@@ -25,6 +25,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted.pass.cpp
index 6e2ea5f3aed23..67d7a9eae4cee 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted.pass.cpp
@@ -16,6 +16,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #if TEST_STD_VER > 17
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_comp.pass.cpp
index c5624d994d6c4..103bcf84ed6b3 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #if TEST_STD_VER > 17
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until.pass.cpp
index 4396a4fc82025..229ce5821014c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until.pass.cpp
@@ -16,6 +16,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #if TEST_STD_VER > 17
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until_comp.pass.cpp
index 48696cf083852..617cd7aacbfdb 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/is.sorted/is_sorted_until_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #if TEST_STD_VER > 17
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy.pass.cpp
index 45a6fef6701d1..bcc1975b7992c 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy.pass.cpp
@@ -20,6 +20,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy_comp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy_comp.pass.cpp
index a1c2b0f9c8a3a..65c121d0555b9 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy_comp.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/partial_sort_copy_comp.pass.cpp
@@ -23,6 +23,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/partial_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/partial_sort.pass.cpp
index b41eb12d6aaf5..9d4da70c4c0f0 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/partial_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/partial_sort.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 void
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
index 8f2845732cc92..1adba43b5ed37 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/sort.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 template <class RI>
diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
index c433baab44a6a..aee94c63465f9 100644
--- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/stable_sort.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mt19937 randomness;
 
 template <class RI>
diff --git a/libcxx/test/std/atomics/atomics.fences/atomic_signal_fence.pass.cpp b/libcxx/test/std/atomics/atomics.fences/atomic_signal_fence.pass.cpp
index bf5325940e41c..323d77d7aced1 100644
--- a/libcxx/test/std/atomics/atomics.fences/atomic_signal_fence.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.fences/atomic_signal_fence.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <atomic>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::atomic_signal_fence(std::memory_order_seq_cst);
diff --git a/libcxx/test/std/atomics/atomics.fences/atomic_thread_fence.pass.cpp b/libcxx/test/std/atomics/atomics.fences/atomic_thread_fence.pass.cpp
index d237f2de11878..ab376868eebc0 100644
--- a/libcxx/test/std/atomics/atomics.fences/atomic_thread_fence.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.fences/atomic_thread_fence.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <atomic>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::atomic_thread_fence(std::memory_order_seq_cst);
diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear.pass.cpp
index 23cb3d2b6fa40..f89d77a479ad4 100644
--- a/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear_explicit.pass.cpp
index d87291297eed5..53f50ab59cf06 100644
--- a/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_clear_explicit.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set.pass.cpp
index d73dc316d96e2..22e4b66d45c5a 100644
--- a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
index 972a6e84b6ff0..9c1c222ed9438 100644
--- a/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/atomic_flag_test_and_set_explicit.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.flag/clear.pass.cpp b/libcxx/test/std/atomics/atomics.flag/clear.pass.cpp
index 33378e4bd87ff..676cfc5ff6236 100644
--- a/libcxx/test/std/atomics/atomics.flag/clear.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/clear.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.flag/init.pass.cpp b/libcxx/test/std/atomics/atomics.flag/init.pass.cpp
index a45784d802b3b..52cffea34f788 100644
--- a/libcxx/test/std/atomics/atomics.flag/init.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/init.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::atomic_flag f = ATOMIC_FLAG_INIT;
diff --git a/libcxx/test/std/atomics/atomics.flag/test_and_set.pass.cpp b/libcxx/test/std/atomics/atomics.flag/test_and_set.pass.cpp
index 1a198c1be84f3..393c4627368ce 100644
--- a/libcxx/test/std/atomics/atomics.flag/test_and_set.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.flag/test_and_set.pass.cpp
@@ -18,6 +18,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/atomics/atomics.general/replace_failure_order.pass.cpp b/libcxx/test/std/atomics/atomics.general/replace_failure_order.pass.cpp
index ee23841382bfc..631f97a0582d3 100644
--- a/libcxx/test/std/atomics/atomics.general/replace_failure_order.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.general/replace_failure_order.pass.cpp
@@ -23,6 +23,8 @@
 
 #include <atomic>
 
+#include "test_macros.h"
+
 int main(int, char**) {
     std::atomic<int> i;
     volatile std::atomic<int> v;
diff --git a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
index 523f477cad43c..a53eb741eae82 100644
--- a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
@@ -15,6 +15,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 #if !defined(__cpp_lib_atomic_is_always_lock_free)
 # error Feature test macro missing.
 #endif
diff --git a/libcxx/test/std/atomics/atomics.lockfree/lockfree.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/lockfree.pass.cpp
index b86893e0b9cb5..064afcaa7ed67 100644
--- a/libcxx/test/std/atomics/atomics.lockfree/lockfree.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.lockfree/lockfree.pass.cpp
@@ -24,6 +24,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(ATOMIC_BOOL_LOCK_FREE == 0 ||
diff --git a/libcxx/test/std/atomics/atomics.order/kill_dependency.pass.cpp b/libcxx/test/std/atomics/atomics.order/kill_dependency.pass.cpp
index 998b0cef38b25..7cad751841e58 100644
--- a/libcxx/test/std/atomics/atomics.order/kill_dependency.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.order/kill_dependency.pass.cpp
@@ -15,6 +15,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::kill_dependency(5) == 5);
diff --git a/libcxx/test/std/atomics/atomics.order/memory_order.pass.cpp b/libcxx/test/std/atomics/atomics.order/memory_order.pass.cpp
index c756d0b187b81..8e35cf19500b7 100644
--- a/libcxx/test/std/atomics/atomics.order/memory_order.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.order/memory_order.pass.cpp
@@ -19,6 +19,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(static_cast<int>(std::memory_order_relaxed) == 0);
diff --git a/libcxx/test/std/atomics/atomics.order/memory_order_new.pass.cpp b/libcxx/test/std/atomics/atomics.order/memory_order_new.pass.cpp
index e9a571dfa8b3a..771803a428cef 100644
--- a/libcxx/test/std/atomics/atomics.order/memory_order_new.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.order/memory_order_new.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <atomic>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::memory_order_relaxed == std::memory_order::relaxed);
diff --git a/libcxx/test/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp
index a0648ff9c0a18..139b0524a8234 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/cstdint_typedefs.pass.cpp
@@ -39,6 +39,8 @@
 #include <type_traits>
 #include <cstdint>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::atomic<  std::int_least8_t>,   std::atomic_int_least8_t>::value), "");
diff --git a/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp
index faa682b8cef25..dd59c301a8e09 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/integral_typedefs.pass.cpp
@@ -40,6 +40,8 @@
 #include <atomic>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::atomic<char>, std::atomic_char>::value), "");
diff --git a/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp
index 229761eb32685..5e2c46c4f95cf 100644
--- a/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.generic/trivially_copyable.pass.cpp
@@ -58,6 +58,8 @@
 #include <thread> // for thread_id
 #include <chrono> // for nanoseconds
 
+#include "test_macros.h"
+
 struct TriviallyCopyable {
     TriviallyCopyable ( int i ) : i_(i) {}
     int i_;
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong.pass.cpp
index 041845d5be2a2..0c748785846f6 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong_explicit.pass.cpp
index 99a8508863368..e94f191e9532a 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_strong_explicit.pass.cpp
@@ -26,6 +26,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak.pass.cpp
index a2a9e205db8ec..5379f776fa28b 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak.pass.cpp
@@ -24,6 +24,7 @@
 #include <cassert>
 
 #include <cmpxchg_loop.h>
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak_explicit.pass.cpp
index 2ad17f1cbe6e7..8a8a6d61e63da 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_compare_exchange_weak_explicit.pass.cpp
@@ -28,6 +28,7 @@
 
 #include <cmpxchg_loop.h>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange.pass.cpp
index d13238e652839..9fd9e0a6f50e2 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange_explicit.pass.cpp
index 2acbcb20f7ce8..3e75c60b49a80 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_exchange_explicit.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp
index f84a489834867..e584ea955d754 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add.pass.cpp
@@ -31,6 +31,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp
index fbdf3fffcd73e..548101a409e9e 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_add_explicit.pass.cpp
@@ -31,6 +31,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and.pass.cpp
index dfaaaa3e5b461..aad3209e4fab8 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and_explicit.pass.cpp
index d31245a84217c..cbcc2231d0fca 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_and_explicit.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or.pass.cpp
index 741dca00e86c7..f86624a977cae 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or_explicit.pass.cpp
index e56e946f43cb1..12686835aa285 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_or_explicit.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp
index 13fde4ad6a75c..20ec7688bb2ba 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub.pass.cpp
@@ -31,6 +31,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp
index af97bcc60221e..f26cefcbdb074 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_sub_explicit.pass.cpp
@@ -32,6 +32,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor.pass.cpp
index 0e6f99f36291a..4979e0106ff8d 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor_explicit.pass.cpp
index ece15694561e4..639cc230f8249 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_fetch_xor_explicit.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_init.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_init.pass.cpp
index 0e5b920f91e8b..285e1af2ce0cb 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_init.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_init.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp
index bfa24dae56244..9ca2207ba85bb 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_is_lock_free.pass.cpp
@@ -21,6 +21,7 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load.pass.cpp
index b775c54673726..da0b955d4a828 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load_explicit.pass.cpp
index 0384baa5a7f4b..350e3bd2cec32 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_load_explicit.pass.cpp
@@ -23,6 +23,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store.pass.cpp
index 0fb3bc7dda936..62d67586db1a0 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store_explicit.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store_explicit.pass.cpp
index 11aa295dee4b2..dd70963df2930 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store_explicit.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_store_explicit.pass.cpp
@@ -22,6 +22,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 template <class T>
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp
index 1588af3273fb6..6861dd6539921 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_var_init.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::atomic<int> v = ATOMIC_VAR_INIT(5);
diff --git a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp
index d692e931b49a4..f5506a0aeac1a 100644
--- a/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.types.operations/atomics.types.operations.req/ctor.pass.cpp
@@ -21,6 +21,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "atomic_helpers.h"
 
 struct UserType {
diff --git a/libcxx/test/std/containers/associative/map/compare.pass.cpp b/libcxx/test/std/containers/associative/map/compare.pass.cpp
index 84de271842be6..1a582a02d8468 100644
--- a/libcxx/test/std/containers/associative/map/compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/compare.pass.cpp
@@ -19,6 +19,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Key {
   template <typename T> Key(const T&) {}
   bool operator< (const Key&) const { return false; }
diff --git a/libcxx/test/std/containers/associative/map/gcc_workaround.pass.cpp b/libcxx/test/std/containers/associative/map/gcc_workaround.pass.cpp
index 6c87e51f79441..e69a24110b663 100644
--- a/libcxx/test/std/containers/associative/map/gcc_workaround.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/gcc_workaround.pass.cpp
@@ -11,6 +11,8 @@
 #include <map>
 std::map<int,int>::iterator it;
 #include <set>
+
+#include "test_macros.h"
 using std::set;
 using std::multiset;
 
diff --git a/libcxx/test/std/containers/associative/map/incomplete_type.pass.cpp b/libcxx/test/std/containers/associative/map/incomplete_type.pass.cpp
index a45c50c32940f..c663f8b618c3a 100644
--- a/libcxx/test/std/containers/associative/map/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/incomplete_type.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 struct A {
     typedef std::map<A, A> Map;
     int data;
diff --git a/libcxx/test/std/containers/associative/map/map.access/empty.pass.cpp b/libcxx/test/std/containers/associative/map/map.access/empty.pass.cpp
index cff13df7af288..a4cd019ee4df3 100644
--- a/libcxx/test/std/containers/associative/map/map.access/empty.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.access/empty.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.access/index_tuple.pass.cpp b/libcxx/test/std/containers/associative/map/map.access/index_tuple.pass.cpp
index bc99f6ef8e3c3..fccd96855fa8e 100644
--- a/libcxx/test/std/containers/associative/map/map.access/index_tuple.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.access/index_tuple.pass.cpp
@@ -21,6 +21,8 @@
 
 #include <tuple>
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/std/containers/associative/map/map.access/size.pass.cpp b/libcxx/test/std/containers/associative/map/map.access/size.pass.cpp
index bb4b14e026fb1..837f720ec84d4 100644
--- a/libcxx/test/std/containers/associative/map/map.access/size.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.access/size.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.cons/alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/alloc.pass.cpp
index 5bb9abc88f7d3..45e867109f8ef 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp
index 612838ef6670a..c277e9e192d87 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.cons/compare.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/compare.pass.cpp
index 40a8e38aef2f5..d51ad324953dd 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/compare.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.cons/compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/compare_alloc.pass.cpp
index 71bc32295e062..7437a7f14506a 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/compare_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/copy_alloc.pass.cpp
index d25504382e4ef..93ff5e20acf54 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/copy_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp
index a902e05603be9..578958aa0c1b0 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp
@@ -20,6 +20,7 @@
 
 #include <iostream>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.cons/default.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/default.pass.cpp
index 5d3fcaee117ad..55a99d20a53ce 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp
index af8fbe79f0bdd..610ca109ef5ac 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/default_recursive.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 struct X
 {
     std::map<int, X> m;
diff --git a/libcxx/test/std/containers/associative/map/map.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/initializer_list.pass.cpp
index 1303f7ef2bad4..559f5d8fa672d 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare.pass.cpp
index 9b6a47ac334ca..fdfec0d00e6e7 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare_alloc.pass.cpp
index 0da3115f76f7c..f2ba8dfb63239 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/initializer_list_compare_alloc.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/iter_iter.pass.cpp
index 243800cfd9ba9..fc6a8d2e24695 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.cons/iter_iter_comp.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/iter_iter_comp.pass.cpp
index 12a079ea01c72..25760fa09960d 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/iter_iter_comp.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/iter_iter_comp.pass.cpp
@@ -16,6 +16,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.cons/move.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/move.pass.cpp
index ecf8c9dabfd1d..e62046f7ef293 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp
index 758d0f83f86cb..56473da0e8299 100644
--- a/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/emplace.pass.cpp
index 382e5c8badf84..32def964ba60c 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/emplace.pass.cpp
@@ -19,6 +19,7 @@
 #include <cassert>
 #include <tuple>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/emplace_hint.pass.cpp
index 516d88054b776..171a437b74e0f 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/emplace_hint.pass.cpp
@@ -18,6 +18,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter.pass.cpp
index 0f23ef638101a..827f3bb588902 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter_iter.pass.cpp
index 71fa96ce11b7e..91ca0da60fb1e 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/erase_iter_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/erase_key.pass.cpp
index da96499b09fb5..c37cadd6d1dc0 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/erase_key.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp
index f2b67c9ff630a..2024e7c3f0e54 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp
index 018e9acf89863..0e47d047f3301 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
index f3e84002123c9..ea6bd2381c9f3 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
@@ -18,6 +18,7 @@
 
 #include <map>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../../map_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_initializer_list.pass.cpp
index ea6c1380054ae..b4bbdd507aedd 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_iter_iter.pass.cpp
index a6a7763363b04..d59c4810cca27 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type.pass.cpp
index f3f1662600e15..3576ccffb0f95 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp
index 084f7ee2c74ab..9777989f0af1d 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_or_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_or_assign.pass.cpp
index bd9625a09501b..673db86b7f327 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_or_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_or_assign.pass.cpp
@@ -27,6 +27,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 class Moveable
 {
     Moveable(const Moveable&);
diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/try.emplace.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/try.emplace.pass.cpp
index fe9484a3ce3b1..ea7b18b5624dc 100644
--- a/libcxx/test/std/containers/associative/map/map.modifiers/try.emplace.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.modifiers/try.emplace.pass.cpp
@@ -25,6 +25,8 @@
 #include <cassert>
 #include <tuple>
 
+#include "test_macros.h"
+
 class Moveable
 {
     Moveable(const Moveable&);
diff --git a/libcxx/test/std/containers/associative/map/map.ops/count0.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/count0.pass.cpp
index cce0444fdc331..163327114f6b0 100644
--- a/libcxx/test/std/containers/associative/map/map.ops/count0.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.ops/count0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.ops/equal_range0.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/equal_range0.pass.cpp
index 22f067a2a70cc..92dcd2d85633b 100644
--- a/libcxx/test/std/containers/associative/map/map.ops/equal_range0.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.ops/equal_range0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.ops/find0.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/find0.pass.cpp
index affc61efb545b..1c843adcb34c2 100644
--- a/libcxx/test/std/containers/associative/map/map.ops/find0.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.ops/find0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.ops/lower_bound0.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/lower_bound0.pass.cpp
index a92790f2bd6fc..cb74648789cb3 100644
--- a/libcxx/test/std/containers/associative/map/map.ops/lower_bound0.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.ops/lower_bound0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.ops/upper_bound0.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/upper_bound0.pass.cpp
index 8f58df61b7d97..6780f6f44ed9d 100644
--- a/libcxx/test/std/containers/associative/map/map.ops/upper_bound0.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.ops/upper_bound0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.special/member_swap.pass.cpp b/libcxx/test/std/containers/associative/map/map.special/member_swap.pass.cpp
index a41e43f7a0663..ddce2225e0c63 100644
--- a/libcxx/test/std/containers/associative/map/map.special/member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.special/member_swap.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/map/map.special/non_member_swap.pass.cpp b/libcxx/test/std/containers/associative/map/map.special/non_member_swap.pass.cpp
index 811acc4573ee8..9a8c86316b604 100644
--- a/libcxx/test/std/containers/associative/map/map.special/non_member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/map.special/non_member_swap.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/map/types.pass.cpp b/libcxx/test/std/containers/associative/map/types.pass.cpp
index 35fc067433f02..0cfb7c00e813c 100644
--- a/libcxx/test/std/containers/associative/map/types.pass.cpp
+++ b/libcxx/test/std/containers/associative/map/types.pass.cpp
@@ -31,6 +31,7 @@
 #include <map>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/empty.pass.cpp b/libcxx/test/std/containers/associative/multimap/empty.pass.cpp
index 12866a0f2d1c9..45a1d76bd83fa 100644
--- a/libcxx/test/std/containers/associative/multimap/empty.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/empty.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp b/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp
index 0132ce9fe12e7..6e7c8e770202e 100644
--- a/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 struct A {
     typedef std::multimap<A, A> Map;
     int data;
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp
index 6e7e3aa09723d..4c16350ecdf5c 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp
index 0374062515c68..5c1d4c9680ae0 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp
index 54bf998ecdd11..223bb6204b4d6 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp
index 44942036c73bf..3405dd3b14e5c 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_alloc.pass.cpp
index 7144a25f9d44a..42a6f23d0f87b 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_assign.pass.cpp
index 6816a5ee43f0d..c102baa16d15b 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy_assign.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/default.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/default.pass.cpp
index 6b33088434db5..5248874c2e48d 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp
index b51b6b63b7de8..e468ebf023b1a 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/default_recursive.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 struct X
 {
     std::multimap<int, X> m;
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list.pass.cpp
index 2642ba6a8eeb6..78b98bc94f210 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare.pass.cpp
index c8e2d293fa078..591cf1e306c79 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare_alloc.pass.cpp
index 592dec9974974..8eba09a508402 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/initializer_list_compare_alloc.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp.pass.cpp
index d10904a4d448b..0c76fabef1879 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp_alloc.pass.cpp
index a71c757ffd727..199200947bdea 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/iter_iter_comp_alloc.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/move.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/move.pass.cpp
index cef6857740c79..c7255e00cba7a 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/move_assign.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/move_assign.pass.cpp
index 386c11ecba2c9..e5365438273e1 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace.pass.cpp
index 76d9b171832b5..e5d0ecab52ee8 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace.pass.cpp
@@ -18,6 +18,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace_hint.pass.cpp
index 3ad09f38a70d4..ae9797975ba92 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/emplace_hint.pass.cpp
@@ -18,6 +18,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter.pass.cpp
index a0f70d640bd8a..3da8eebad31dd 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter_iter.pass.cpp
index deef1a1461dc6..efaf9a8da7c22 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_iter_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_key.pass.cpp
index 0ab1d4cb8abfd..ea8f6b2fe43fd 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/erase_key.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp
index fe3c788324421..02ee3aec96db8 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp
index e2a80dab25ff7..f15a0e7af39d9 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp
index 9a791af57e61c..6b76b5c7ecd88 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_allocator_requirements.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../../map_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_initializer_list.pass.cpp
index 33104ca88b535..d43cc9c9483bd 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_iter_iter.pass.cpp
index 9533a62896d74..5bf2cf511f3c2 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type.pass.cpp
index 7fb62a7c16ee1..9fcd741a2a450 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp
index 847a701369268..3490b7ca8752f 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/count0.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/count0.pass.cpp
index 75f9f22281655..03aa04d8060a4 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.ops/count0.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/count0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/equal_range0.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/equal_range0.pass.cpp
index c01395f0a4122..38575fdee7cb1 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.ops/equal_range0.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/equal_range0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/find0.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/find0.pass.cpp
index 39a8735bccbb0..1be9c26d183c4 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.ops/find0.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/find0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/lower_bound0.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/lower_bound0.pass.cpp
index 1311c9c5a47be..19be658683086 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.ops/lower_bound0.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/lower_bound0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.ops/upper_bound0.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.ops/upper_bound0.pass.cpp
index 28c9ff75a7510..78df2c4968d29 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.ops/upper_bound0.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.ops/upper_bound0.pass.cpp
@@ -23,6 +23,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "is_transparent.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.special/member_swap.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.special/member_swap.pass.cpp
index fe8399713efe0..dfe2831890569 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.special/member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.special/member_swap.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/multimap.special/non_member_swap.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.special/non_member_swap.pass.cpp
index 3e75991ee9533..da4a0ef2175c9 100644
--- a/libcxx/test/std/containers/associative/multimap/multimap.special/non_member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/multimap.special/non_member_swap.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <map>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../test_compare.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multimap/scary.pass.cpp b/libcxx/test/std/containers/associative/multimap/scary.pass.cpp
index faf839b8ee17f..7dcd4e1a09bac 100644
--- a/libcxx/test/std/containers/associative/multimap/scary.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/scary.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <map>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::map<int, int> M1;
diff --git a/libcxx/test/std/containers/associative/multimap/size.pass.cpp b/libcxx/test/std/containers/associative/multimap/size.pass.cpp
index df18f7b58ccad..fdb8f72b5f712 100644
--- a/libcxx/test/std/containers/associative/multimap/size.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/size.pass.cpp
@@ -15,6 +15,7 @@
 #include <map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multimap/types.pass.cpp b/libcxx/test/std/containers/associative/multimap/types.pass.cpp
index 67723f5cdc4a6..c9d2f3da6638d 100644
--- a/libcxx/test/std/containers/associative/multimap/types.pass.cpp
+++ b/libcxx/test/std/containers/associative/multimap/types.pass.cpp
@@ -31,6 +31,7 @@
 #include <map>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/emplace.pass.cpp b/libcxx/test/std/containers/associative/multiset/emplace.pass.cpp
index 1cabd12fce047..0092242f3df1e 100644
--- a/libcxx/test/std/containers/associative/multiset/emplace.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/emplace.pass.cpp
@@ -18,6 +18,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multiset/emplace_hint.pass.cpp b/libcxx/test/std/containers/associative/multiset/emplace_hint.pass.cpp
index 17db3b4b366ec..35fe887dc0687 100644
--- a/libcxx/test/std/containers/associative/multiset/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/emplace_hint.pass.cpp
@@ -18,6 +18,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multiset/empty.pass.cpp b/libcxx/test/std/containers/associative/multiset/empty.pass.cpp
index 2ca20491fca47..837c990ccc826 100644
--- a/libcxx/test/std/containers/associative/multiset/empty.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/empty.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/erase_iter.pass.cpp b/libcxx/test/std/containers/associative/multiset/erase_iter.pass.cpp
index bcedbf338a668..b8512d14fdb49 100644
--- a/libcxx/test/std/containers/associative/multiset/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/erase_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/associative/multiset/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/multiset/erase_iter_iter.pass.cpp
index 03c7252859454..830c9dd59a433 100644
--- a/libcxx/test/std/containers/associative/multiset/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/erase_iter_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/erase_key.pass.cpp b/libcxx/test/std/containers/associative/multiset/erase_key.pass.cpp
index 4b1db05296091..df7d334e85f15 100644
--- a/libcxx/test/std/containers/associative/multiset/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/erase_key.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp
index ef2a64eded056..bfbcfa6643b5f 100644
--- a/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp b/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp
index d95667def3a7d..4320d5370d9b1 100644
--- a/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/incomplete_type.pass.cpp b/libcxx/test/std/containers/associative/multiset/incomplete_type.pass.cpp
index a118a6230ebcf..4f2970d5840fb 100644
--- a/libcxx/test/std/containers/associative/multiset/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/incomplete_type.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <set>
 
+#include "test_macros.h"
+
 struct A {
     typedef std::multiset<A> Set;
     int data;
diff --git a/libcxx/test/std/containers/associative/multiset/insert_cv.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_cv.pass.cpp
index 856d54da0d4be..8610152e9d9b6 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_cv.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_cv.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp
index 083dea73917a1..93e66695f6654 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_emplace_allocator_requirements.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../set_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_initializer_list.pass.cpp
index 7f7a00c15f838..7a49ae4b3bcd5 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_initializer_list.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/insert_iter_cv.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_iter_cv.pass.cpp
index e29e7b484eb51..494a4ae1d1437 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_iter_cv.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_iter_cv.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_iter_iter.pass.cpp
index 242b9d7f6cc98..ce5bd3833dc44 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/insert_iter_rv.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_iter_rv.pass.cpp
index e905c5c407276..3b7f5288bfef1 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_iter_rv.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_iter_rv.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/insert_node_type.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_node_type.pass.cpp
index 7cf2cebfb5814..823a592b67b73 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp
index d4d6871fb1374..4f30573611eb6 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/multiset/insert_rv.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_rv.pass.cpp
index 3f73a2813b093..55a3870ca21e1 100644
--- a/libcxx/test/std/containers/associative/multiset/insert_rv.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/insert_rv.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/alloc.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/alloc.pass.cpp
index 9ceac884b64dc..8aa09cae659d0 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/assign_initializer_list.pass.cpp
index c84b042930ae3..eeb9b79bf045a 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/assign_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/compare.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/compare.pass.cpp
index d35de106f8531..16f7486cf2d4f 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/compare.pass.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/compare_alloc.pass.cpp
index f044b2790b9f5..0b261e2571e26 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/compare_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_alloc.pass.cpp
index 25e6d6efb2dfa..31b8a2eb82b4d 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_assign.pass.cpp
index 7992c7cae95bd..1848cdba48813 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/copy_assign.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/default.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/default.pass.cpp
index 88c5244f8f9e9..793c0f5c7ef9f 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare.pass.cpp
index cf4c11dcfb622..ada51d28fd3ea 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare_alloc.pass.cpp
index 5f26864cd2130..2ff3cee132fd2 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/initializer_list_compare_alloc.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter.pass.cpp
index 9d521c279055e..f71061467af4b 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter_comp.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter_comp.pass.cpp
index 25b4364c182a9..9f4e9edc7387e 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter_comp.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/iter_iter_comp.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "../../../test_compare.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/move.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/move.pass.cpp
index 0d6cc72a66eb0..873fe015ef31d 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/move_assign.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/move_assign.pass.cpp
index 6f584f22cf8af..fdae5051e80f2 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.special/member_swap.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.special/member_swap.pass.cpp
index 9ac0f1709e956..7e7b097fdff16 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.special/member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.special/member_swap.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/multiset.special/non_member_swap.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.special/non_member_swap.pass.cpp
index a3bbf551d3d4d..2686b94dce0be 100644
--- a/libcxx/test/std/containers/associative/multiset/multiset.special/non_member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/multiset.special/non_member_swap.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <set>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../test_compare.h"
 
diff --git a/libcxx/test/std/containers/associative/multiset/scary.pass.cpp b/libcxx/test/std/containers/associative/multiset/scary.pass.cpp
index 5065ab96eaaac..fc4efd46edee3 100644
--- a/libcxx/test/std/containers/associative/multiset/scary.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/scary.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <set>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::set<int> M1;
diff --git a/libcxx/test/std/containers/associative/multiset/size.pass.cpp b/libcxx/test/std/containers/associative/multiset/size.pass.cpp
index bb5616e9a6218..d0350251dc38a 100644
--- a/libcxx/test/std/containers/associative/multiset/size.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/size.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/multiset/types.pass.cpp b/libcxx/test/std/containers/associative/multiset/types.pass.cpp
index 96e8ec4f0b1d2..a6d0dd58100bb 100644
--- a/libcxx/test/std/containers/associative/multiset/types.pass.cpp
+++ b/libcxx/test/std/containers/associative/multiset/types.pass.cpp
@@ -31,6 +31,7 @@
 #include <set>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/emplace.pass.cpp b/libcxx/test/std/containers/associative/set/emplace.pass.cpp
index e48f2e1e4474f..0c236cf3c361e 100644
--- a/libcxx/test/std/containers/associative/set/emplace.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/emplace.pass.cpp
@@ -18,6 +18,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/set/emplace_hint.pass.cpp b/libcxx/test/std/containers/associative/set/emplace_hint.pass.cpp
index a7ed7266be208..894d21c4934bb 100644
--- a/libcxx/test/std/containers/associative/set/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/emplace_hint.pass.cpp
@@ -18,6 +18,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/set/empty.pass.cpp b/libcxx/test/std/containers/associative/set/empty.pass.cpp
index c00ab68ffc9e4..8bf1e1295eda4 100644
--- a/libcxx/test/std/containers/associative/set/empty.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/empty.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/erase_iter.pass.cpp b/libcxx/test/std/containers/associative/set/erase_iter.pass.cpp
index 49ce4f29e77a3..8e2f6350da502 100644
--- a/libcxx/test/std/containers/associative/set/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/erase_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/associative/set/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/set/erase_iter_iter.pass.cpp
index 86fd52c2cb4e4..5139ec2f37522 100644
--- a/libcxx/test/std/containers/associative/set/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/erase_iter_iter.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/erase_key.pass.cpp b/libcxx/test/std/containers/associative/set/erase_key.pass.cpp
index 3ceec88500b23..2951434e4d5ce 100644
--- a/libcxx/test/std/containers/associative/set/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/erase_key.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp
index 1ba13e318a6c0..1c55757544de5 100644
--- a/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/set/extract_key.pass.cpp b/libcxx/test/std/containers/associative/set/extract_key.pass.cpp
index 4417e86367b11..f2b33bbaa3d1b 100644
--- a/libcxx/test/std/containers/associative/set/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/associative/set/gcc_workaround.pass.cpp b/libcxx/test/std/containers/associative/set/gcc_workaround.pass.cpp
index 23db04405df24..0eb500210ec16 100644
--- a/libcxx/test/std/containers/associative/set/gcc_workaround.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/gcc_workaround.pass.cpp
@@ -11,6 +11,8 @@
 #include <set>
 std::set<int> s;
 #include <map>
+
+#include "test_macros.h"
 using std::map;
 using std::multimap;
 
diff --git a/libcxx/test/std/containers/associative/set/incomplete_type.pass.cpp b/libcxx/test/std/containers/associative/set/incomplete_type.pass.cpp
index d3b93c59972f8..d50b56ff1efcd 100644
--- a/libcxx/test/std/containers/associative/set/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/incomplete_type.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <set>
 
+#include "test_macros.h"
+
 struct A {
     typedef std::set<A> Set;
     int data;
diff --git a/libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp
index 11be14b02b076..918c10ec04223 100644
--- a/libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_and_emplace_allocator_requirements.pass.cpp
@@ -17,6 +17,7 @@
 // UNSUPPORTED: c++98, c++03
 
 #include <set>
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../set_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/associative/set/insert_cv.pass.cpp b/libcxx/test/std/containers/associative/set/insert_cv.pass.cpp
index a97e76eb57668..ff1119355ce1c 100644
--- a/libcxx/test/std/containers/associative/set/insert_cv.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_cv.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/associative/set/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/set/insert_initializer_list.pass.cpp
index ce5cc6fd2d745..6d98d6d899aa0 100644
--- a/libcxx/test/std/containers/associative/set/insert_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_initializer_list.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/insert_iter_cv.pass.cpp b/libcxx/test/std/containers/associative/set/insert_iter_cv.pass.cpp
index be27e5e4e0754..b4de0a355d134 100644
--- a/libcxx/test/std/containers/associative/set/insert_iter_cv.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_iter_cv.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/associative/set/insert_iter_iter.pass.cpp
index 35c2dca30f48c..2bde21aafe9fc 100644
--- a/libcxx/test/std/containers/associative/set/insert_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/insert_iter_rv.pass.cpp b/libcxx/test/std/containers/associative/set/insert_iter_rv.pass.cpp
index 08eba9fe60fb1..045c6a4d6bd67 100644
--- a/libcxx/test/std/containers/associative/set/insert_iter_rv.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_iter_rv.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/insert_node_type.pass.cpp b/libcxx/test/std/containers/associative/set/insert_node_type.pass.cpp
index 188aea2bb0af2..babf088c3822d 100644
--- a/libcxx/test/std/containers/associative/set/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp
index 6e8c140997c80..3f94b8ea030e0 100644
--- a/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <set>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/associative/set/insert_rv.pass.cpp b/libcxx/test/std/containers/associative/set/insert_rv.pass.cpp
index 092fd8a710b5a..e17391c480897 100644
--- a/libcxx/test/std/containers/associative/set/insert_rv.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/insert_rv.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/alloc.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/alloc.pass.cpp
index 591b28c184b9b..751a5e9ab1f0d 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp
index 0127b1d8147bf..e04f49c999326 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/compare.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/compare.pass.cpp
index a4e9718e2b40d..66115b99c2809 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/compare.pass.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/compare_alloc.pass.cpp
index 41c7d02894b1f..fb13c5fbd0c8c 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/compare_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/compare_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/copy_alloc.pass.cpp
index 6b1010c33ff27..cdc78e86defb6 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/copy_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/copy_assign.pass.cpp
index c1f37f83dcc86..abea6ba29660e 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/copy_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/copy_assign.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/default.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/default.pass.cpp
index 88dc3a262c803..466f5132f3a88 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/initializer_list.pass.cpp
index e4742bbdae056..0e9fea7baea02 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/initializer_list.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/initializer_list_compare.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/initializer_list_compare.pass.cpp
index cf4b78af9343c..ef3f3a3d48a81 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/initializer_list_compare.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/initializer_list_compare.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <set>
 #include <cassert>
+#include "test_macros.h"
 #include "../../../test_compare.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/iter_iter.pass.cpp
index 25143a7df100b..89cabff4b9082 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/iter_iter_comp.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/iter_iter_comp.pass.cpp
index f9c2e4a98ef23..24046ff09a80d 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/iter_iter_comp.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/iter_iter_comp.pass.cpp
@@ -16,6 +16,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "../../../test_compare.h"
 
diff --git a/libcxx/test/std/containers/associative/set/set.cons/move.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/move.pass.cpp
index 516274efca400..178d09f268890 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/associative/set/set.cons/move_assign.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/move_assign.pass.cpp
index ba5c767f30bc1..ca615228d2a14 100644
--- a/libcxx/test/std/containers/associative/set/set.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.cons/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "../../../test_compare.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/associative/set/set.special/member_swap.pass.cpp b/libcxx/test/std/containers/associative/set/set.special/member_swap.pass.cpp
index 455c34ec89bb8..ee91ca3bcc34e 100644
--- a/libcxx/test/std/containers/associative/set/set.special/member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.special/member_swap.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/set.special/non_member_swap.pass.cpp b/libcxx/test/std/containers/associative/set/set.special/non_member_swap.pass.cpp
index b111de9cac689..63df517b7a031 100644
--- a/libcxx/test/std/containers/associative/set/set.special/non_member_swap.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/set.special/non_member_swap.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <set>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../test_compare.h"
 
diff --git a/libcxx/test/std/containers/associative/set/size.pass.cpp b/libcxx/test/std/containers/associative/set/size.pass.cpp
index b73d833769c2e..bdce58e98f3fe 100644
--- a/libcxx/test/std/containers/associative/set/size.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/size.pass.cpp
@@ -15,6 +15,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/associative/set/types.pass.cpp b/libcxx/test/std/containers/associative/set/types.pass.cpp
index 5c7bd25a5b558..2d29a800f9c39 100644
--- a/libcxx/test/std/containers/associative/set/types.pass.cpp
+++ b/libcxx/test/std/containers/associative/set/types.pass.cpp
@@ -31,6 +31,7 @@
 #include <set>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_copy_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_copy_alloc.pass.cpp
index c461718430c57..152030ff5546a 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_copy_alloc.pass.cpp
@@ -24,6 +24,7 @@ make(int n)
     return c;
 }
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 template <class T>
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_move_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_move_alloc.pass.cpp
index 98dc207c19990..bd5afa0efd385 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons.alloc/ctor_move_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_copy.pass.cpp
index 5b7760d0546db..8bbee85a75983 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_copy.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <functional>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_move.pass.cpp
index 20f62d9bf64eb..71b2116e37fe0 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/assign_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp.pass.cpp
index 02f1bcaf0592b..b51d96f31e8ec 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp.pass.cpp
@@ -13,6 +13,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_container.pass.cpp
index 487b86c5f52cc..de395de1cdf31 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_container.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_container.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <functional>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_rcontainer.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_rcontainer.pass.cpp
index 47980032c05d6..af3d449c193a7 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_rcontainer.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_comp_rcontainer.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_copy.pass.cpp
index fa8bae2b91bfe..5ab4337f8152b 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_copy.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <functional>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_default.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_default.pass.cpp
index 4c8dd524aebf4..c50a0fbe51d6b 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_default.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_default.pass.cpp
@@ -13,6 +13,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter.pass.cpp
index d2afe72cad5d4..c3838fc97b8cd 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter.pass.cpp
@@ -15,6 +15,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {3, 5, 2, 0, 6, 8, 1};
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp.pass.cpp
index caee12f0f5e4e..0ae3d102e6e3c 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp.pass.cpp
@@ -16,6 +16,8 @@
 #include <functional>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {3, 5, 2, 0, 6, 8, 1};
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_cont.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_cont.pass.cpp
index 0b07667924086..adba98aac85f2 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_cont.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_cont.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {3, 5, 2, 0, 6, 8, 1};
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_rcont.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_rcont.pass.cpp
index 6bc4417f47859..2f847b30489f3 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_rcont.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_iter_iter_comp_rcont.pass.cpp
@@ -17,6 +17,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_move.pass.cpp
index 4158012445abe..1468265a1c0a5 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/ctor_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/dtor_noexcept.pass.cpp
index af583a9e27a75..2536036cb4171 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/dtor_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/dtor_noexcept.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_assign_noexcept.pass.cpp
index 3fbd53dc43e01..a941cd006e77f 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_assign_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_assign_noexcept.pass.cpp
@@ -19,6 +19,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_noexcept.pass.cpp
index 7c6b5f2136f35..450f1f71a7eb4 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.cons/move_noexcept.pass.cpp
@@ -19,6 +19,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/emplace.pass.cpp
index 928533075d021..06d786c9d0567 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/emplace.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/emplace.pass.cpp
@@ -17,6 +17,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.pass.cpp
index f8f9279d576c6..9629bd9b16de7 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/pop.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/pop.pass.cpp
index a6fc9509c8bcf..fcba8329d7e63 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/pop.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/pop.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push.pass.cpp
index 01c0ab61832a3..039087c45211c 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push_rvalue.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push_rvalue.pass.cpp
index cf474dec56552..ce3249ff6c553 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/push_rvalue.pass.cpp
@@ -17,6 +17,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/size.pass.cpp
index 393a97c28a418..6fa21279637d4 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/size.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/size.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/swap.pass.cpp
index bc3f453d886ab..295eaf08e0e3a 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/swap.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q1;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/top.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/top.pass.cpp
index ea0e489f61704..72153bc532f7f 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/top.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/top.pass.cpp
@@ -15,6 +15,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap.pass.cpp
index bc75df0d384e1..1f5b86dc0ecf9 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap.pass.cpp
@@ -17,6 +17,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::priority_queue<int> q1;
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap_noexcept.pass.cpp
index ad4254c2b79f0..820a588cbb8c6 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.special/swap_noexcept.pass.cpp
@@ -20,6 +20,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/types.pass.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/types.pass.cpp
index 5471281905017..fb801d831e9e8 100644
--- a/libcxx/test/std/containers/container.adaptors/priority.queue/types.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/priority.queue/types.pass.cpp
@@ -32,6 +32,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "test_macros.h"
+
 struct test
     : private std::priority_queue<int>
 {
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_queue_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_queue_alloc.pass.cpp
index 8a66c6f12e7e4..38ba6f7c4a7c2 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_queue_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_queue_alloc.pass.cpp
@@ -14,6 +14,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rcontainer_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rcontainer_alloc.pass.cpp
index 3af4fb0dacc44..6e6c1b5fa7b82 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rcontainer_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rcontainer_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rqueue_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rqueue_alloc.pass.cpp
index 29a742df7d41c..3aec80f5f6ef7 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rqueue_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons.alloc/ctor_rqueue_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_container.pass.cpp
index dad35d2d1696a..8162176d14d0f 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_container.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_container.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_copy.pass.cpp
index 19e46a23662e5..5c9c012b617a0 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_copy.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_default.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_default.pass.cpp
index c5c8b17a6901e..74f78ca78a43e 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_default.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_default.pass.cpp
@@ -13,6 +13,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_move.pass.cpp
index c275d5d602f8b..cb7714c25ff62 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_rcontainer.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_rcontainer.pass.cpp
index 3812ce9169b5d..1719ca458acca 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_rcontainer.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/ctor_rcontainer.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/dtor_noexcept.pass.cpp
index 18e42ea3a7eda..2e91ace799096 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/dtor_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/dtor_noexcept.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/move_assign_noexcept.pass.cpp
index a82ab8fa41c7c..c1620d0b00237 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.cons/move_assign_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.cons/move_assign_noexcept.pass.cpp
@@ -18,6 +18,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_copy.pass.cpp
index 98385a6f7cc3e..8e1f82f1b05f0 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_copy.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_move.pass.cpp
index de30e5cfe2ebc..18ed0a4aa59cd 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/assign_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back.pass.cpp
index cb115c7022294..2fa537cae1cd9 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back_const.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back_const.pass.cpp
index 3a6e4c890372f..822d7dbf08bae 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back_const.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/back_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.pass.cpp
index cc0fc56b7c4eb..844e7ce8e9638 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front.pass.cpp
index 9c8d253fb924b..d4cbf1e913fa1 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front_const.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front_const.pass.cpp
index 5ad1ae97aaec4..4e0fc103780af 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front_const.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/front_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/pop.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/pop.pass.cpp
index 128cda5123723..218bec3bcc536 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/pop.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/pop.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push.pass.cpp
index b2a784ccfa856..d772307e36933 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push_rv.pass.cpp
index 17c442b151efe..c7c6a04fef5e1 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push_rv.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/push_rv.pass.cpp
@@ -15,6 +15,7 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/size.pass.cpp
index fb4fdfcac4d82..de53c5d669d1f 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/size.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/size.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::queue<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/swap.pass.cpp
index 3635cea4a91ae..e342aeb558a89 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/swap.pass.cpp
@@ -13,6 +13,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/types.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/types.pass.cpp
index 8623710eaa210..bef4060db2da8 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/types.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/types.pass.cpp
@@ -26,6 +26,8 @@
 #include <queue>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct test
     : private std::queue<int>
 {
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.ops/eq.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.ops/eq.pass.cpp
index b4a3327d86b9a..8e7825b3fef17 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.ops/eq.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.ops/eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.ops/lt.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.ops/lt.pass.cpp
index a8eeb1be19747..246632f75b4de 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.ops/lt.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.ops/lt.pass.cpp
@@ -23,6 +23,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap.pass.cpp
index 1adc4f1f474ee..56fbb7418ac9b 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap.pass.cpp
@@ -14,6 +14,8 @@
 #include <queue>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap_noexcept.pass.cpp
index b8cc387d186b2..48a5332acbebe 100644
--- a/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/queue/queue.special/swap_noexcept.pass.cpp
@@ -19,6 +19,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_copy_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_copy_alloc.pass.cpp
index 0d8481228fd8d..1ca3cdbaa2d5a 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_copy_alloc.pass.cpp
@@ -14,6 +14,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rcontainer_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rcontainer_alloc.pass.cpp
index 5181c673916c4..23a05fd4af911 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rcontainer_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rcontainer_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rqueue_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rqueue_alloc.pass.cpp
index c5ff35d2d7a72..e0e2f21cd40c6 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rqueue_alloc.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons.alloc/ctor_rqueue_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_container.pass.cpp
index c649e238f5bf3..50a7b338267a4 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_container.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_container.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_copy.pass.cpp
index ef3606366ffeb..e38c12b91b0e9 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_copy.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_default.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_default.pass.cpp
index 460cf27ec843f..acfacd4e36a51 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_default.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_default.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_move.pass.cpp
index 86f4414c91e32..8356963a4c9a2 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_rcontainer.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_rcontainer.pass.cpp
index 28fb5655f94f7..7bdbb2631d690 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_rcontainer.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/ctor_rcontainer.pass.cpp
@@ -15,6 +15,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/dtor_noexcept.pass.cpp
index 7c5fd648684e7..8dc8663a46998 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/dtor_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/dtor_noexcept.pass.cpp
@@ -15,6 +15,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/move_assign_noexcept.pass.cpp
index 6ed6b8250aa38..59b1d763e4004 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.cons/move_assign_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.cons/move_assign_noexcept.pass.cpp
@@ -18,6 +18,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_copy.pass.cpp
index df34e4c63c879..81ba10e1dfc47 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_copy.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_move.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_move.pass.cpp
index ad77defe21434..894d209344ae5 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/assign_move.pass.cpp
@@ -15,6 +15,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.pass.cpp
index a51045e613df5..e278c4a4c7a1d 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/pop.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/pop.pass.cpp
index 95472d7e2c6b2..d97dd8f37e085 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/pop.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/pop.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push.pass.cpp
index 70c085f17dd4e..91e08bf98a33e 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push_rv.pass.cpp
index 8969d237b54c4..1a847e0a3ca1d 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push_rv.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/push_rv.pass.cpp
@@ -15,6 +15,7 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/size.pass.cpp
index 26f2e22ee8eac..893984ae2297d 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/size.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/size.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/swap.pass.cpp
index 88ec3cdfe3d28..560cc823d44fa 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/swap.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top.pass.cpp
index 6923cc9d27be2..c5ef96dc2bab8 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top_const.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top_const.pass.cpp
index a5e8c49fa203f..974e3419c262d 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top_const.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/top_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::stack<int> q;
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/types.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/types.pass.cpp
index 55fc27f8430dc..944fd42ecccbf 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/types.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/types.pass.cpp
@@ -27,6 +27,8 @@
 #include <vector>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct test
     : private std::stack<int>
 {
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.ops/eq.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.ops/eq.pass.cpp
index 306869f0e9b0b..c826046da96aa 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.ops/eq.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.ops/eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.ops/lt.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.ops/lt.pass.cpp
index 3c8734befda21..5ac440e8e0247 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.ops/lt.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.ops/lt.pass.cpp
@@ -23,6 +23,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap.pass.cpp
index cb1323b58737c..1f4a382eaaaa3 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap.pass.cpp
@@ -14,6 +14,8 @@
 #include <stack>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 C
 make(int n)
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap_noexcept.pass.cpp
index 415ea607e17eb..eb48c25d3b5c1 100644
--- a/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/stack/stack.special/swap_noexcept.pass.cpp
@@ -19,6 +19,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/container.node/node_handle.pass.cpp b/libcxx/test/std/containers/container.node/node_handle.pass.cpp
index 40cd8d0499ad8..0ddcf0b9070f7 100644
--- a/libcxx/test/std/containers/container.node/node_handle.pass.cpp
+++ b/libcxx/test/std/containers/container.node/node_handle.pass.cpp
@@ -12,6 +12,7 @@
 #include <unordered_map>
 #include <set>
 #include <map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 using namespace std;
diff --git a/libcxx/test/std/containers/sequences/array/array.cons/default.pass.cpp b/libcxx/test/std/containers/sequences/array/array.cons/default.pass.cpp
index daa6a52521082..16671e3b5d1b1 100644
--- a/libcxx/test/std/containers/sequences/array/array.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 
 // std::array is explicitly allowed to be initialized with A a = { init-list };.
 // Disable the missing braces warning for this reason.
+#include "test_macros.h"
 #include "disable_missing_braces_warning.h"
 
 struct NoDefault {
diff --git a/libcxx/test/std/containers/sequences/array/array.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/array/array.cons/initializer_list.pass.cpp
index e85269796fb92..110b8a7c20a9a 100644
--- a/libcxx/test/std/containers/sequences/array/array.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.cons/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 
 // std::array is explicitly allowed to be initialized with A a = { init-list };.
 // Disable the missing braces warning for this reason.
+#include "test_macros.h"
 #include "disable_missing_braces_warning.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/array/array.fill/fill.pass.cpp b/libcxx/test/std/containers/sequences/array/array.fill/fill.pass.cpp
index db7363ab9757a..f480d17393d45 100644
--- a/libcxx/test/std/containers/sequences/array/array.fill/fill.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.fill/fill.pass.cpp
@@ -15,6 +15,7 @@
 
 // std::array is explicitly allowed to be initialized with A a = { init-list };.
 // Disable the missing braces warning for this reason.
+#include "test_macros.h"
 #include "disable_missing_braces_warning.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/array/array.tuple/get_rv.pass.cpp b/libcxx/test/std/containers/sequences/array/array.tuple/get_rv.pass.cpp
index d36fcdcc20668..d1d8b28f4c9d7 100644
--- a/libcxx/test/std/containers/sequences/array/array.tuple/get_rv.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.tuple/get_rv.pass.cpp
@@ -19,6 +19,7 @@
 
 // std::array is explicitly allowed to be initialized with A a = { init-list };.
 // Disable the missing braces warning for this reason.
+#include "test_macros.h"
 #include "disable_missing_braces_warning.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/array/array.tuple/tuple_element.pass.cpp b/libcxx/test/std/containers/sequences/array/array.tuple/tuple_element.pass.cpp
index fbf5210f26fcf..fb7aa7cf02ca5 100644
--- a/libcxx/test/std/containers/sequences/array/array.tuple/tuple_element.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.tuple/tuple_element.pass.cpp
@@ -13,6 +13,8 @@
 #include <array>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test()
 {
diff --git a/libcxx/test/std/containers/sequences/array/array.tuple/tuple_size.pass.cpp b/libcxx/test/std/containers/sequences/array/array.tuple/tuple_size.pass.cpp
index dddcbcaff7f37..e31407ac5a7d7 100644
--- a/libcxx/test/std/containers/sequences/array/array.tuple/tuple_size.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.tuple/tuple_size.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <array>
 
+#include "test_macros.h"
+
 template <class T, std::size_t N>
 void test()
 {
diff --git a/libcxx/test/std/containers/sequences/array/array.zero/tested_elsewhere.pass.cpp b/libcxx/test/std/containers/sequences/array/array.zero/tested_elsewhere.pass.cpp
index 966e603d13481..af4ab87048eb4 100644
--- a/libcxx/test/std/containers/sequences/array/array.zero/tested_elsewhere.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/array.zero/tested_elsewhere.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <array>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp b/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp
index e0ab5b61c4434..41a7153e88d46 100644
--- a/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp
+++ b/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp
@@ -13,6 +13,8 @@
 #include <array>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 void test_contiguous ( const C &c )
 {
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/alloc.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/alloc.pass.cpp
index e2700b958a73d..8e4560e339f0b 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/alloc.pass.cpp
@@ -13,6 +13,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../NotConstructible.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/assign_initializer_list.pass.cpp
index edca369a54ab5..16164ccc3f690 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/assign_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/copy_alloc.pass.cpp
index 4334fd6a539c8..4e75ff72873e4 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/copy_alloc.pass.cpp
@@ -13,6 +13,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/default.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/default.pass.cpp
index f132eb5bee85d..b7c2ef1e9c13f 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/default.pass.cpp
@@ -13,6 +13,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../NotConstructible.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list.pass.cpp
index 02cbadd6c68ee..794a964a88440 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list_alloc.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list_alloc.pass.cpp
index 1450c978b6882..bef50b5296cd6 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/initializer_list_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/move.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/move.pass.cpp
index b8fdc989247ef..287b5063cef59 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/move.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign.pass.cpp
index 5fcfbb8d9baa2..ffc85d5620c71 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal.pass.cpp
index c26ddec0c1949..985d4bf31b65f 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <deque>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal_initializer_list.pass.cpp
index 5f415020128c9..848874c026000 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/op_equal_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/size_value.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/size_value.pass.cpp
index 8c432182a679e..8730975f5b255 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/size_value.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/size_value_alloc.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/size_value_alloc.pass.cpp
index d7e4b3df99165..a6da71ddba087 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.cons/size_value_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.cons/size_value_alloc.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class T, class Allocator>
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/emplace.pass.cpp
index f4713dfdfef7d..12e9d80f37a84 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/emplace.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter.invalidation.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter.invalidation.pass.cpp
index 54395114a0cd8..de80e5b6f3ed0 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter.invalidation.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter.invalidation.pass.cpp
@@ -16,6 +16,8 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename C>
 void del_at_start(C c)
 {
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp
index 3a8a06d58cf4b..4c82b51a06b4a 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp
@@ -18,6 +18,8 @@
 #include <cstdint>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename C>
 void del_at_start(C c, size_t num)
 {
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_iter_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_iter_initializer_list.pass.cpp
index e0da02f7e7e6d..097d807e06474 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_iter_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_rvalue.pass.cpp
index eec8e0a499845..144ee2c872487 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_rvalue.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.invalidation.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.invalidation.pass.cpp
index 7b5427b83686d..9e3bfdf53d3d4 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.invalidation.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.invalidation.pass.cpp
@@ -16,6 +16,8 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename C>
 void test(C c)
 {
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.pass.cpp
index b0315eb54be81..541adc2c5945d 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_back.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.invalidation.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.invalidation.pass.cpp
index 3ff1b5b9f176a..d7d32f3ae7465 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.invalidation.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.invalidation.pass.cpp
@@ -16,6 +16,8 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename C>
 void test(C c)
 {
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.pass.cpp
index 9d25d168492e8..642bee18e35b5 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/pop_front.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back.pass.cpp
index d0a73c37f9961..b813de85cf251 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back.pass.cpp
@@ -15,6 +15,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp
index d4c46f0bcbc39..376f042e53b71 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp
@@ -12,6 +12,7 @@
 // void push_back(const value_type& x);
 
 #include <deque>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_rvalue.pass.cpp
index 29354468134c1..b91853afb2f86 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_rvalue.pass.cpp
@@ -17,6 +17,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front.pass.cpp
index dee483c795710..851acbdeee66b 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp
index 103f2c41d58f7..c43839780d2ba 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <deque>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 
 // Flag that makes the copy constructor for CMyClass throw an exception
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_rvalue.pass.cpp
index 7a66554d12701..84cc5734bff5c 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_rvalue.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/copy.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/copy.pass.cpp
index f861c424af4bd..e2df2cb84c9ca 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.special/copy.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.special/copy.pass.cpp
@@ -17,6 +17,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/copy_backward.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/copy_backward.pass.cpp
index b5225ae71bc89..b6417f79463f0 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.special/copy_backward.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.special/copy_backward.pass.cpp
@@ -17,6 +17,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/move.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/move.pass.cpp
index d1c2a3d726a26..ae0a75167bf30 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.special/move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.special/move.pass.cpp
@@ -17,6 +17,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/move_backward.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/move_backward.pass.cpp
index 9193609d200cd..50da5b790c824 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.special/move_backward.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.special/move_backward.pass.cpp
@@ -17,6 +17,7 @@
 #include <deque>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/swap.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/swap.pass.cpp
index 33910e419b73b..61fa31c5e05c8 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.special/swap.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <deque>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/deque/types.pass.cpp b/libcxx/test/std/containers/sequences/deque/types.pass.cpp
index cfab930f390d9..0881d89fc7de4 100644
--- a/libcxx/test/std/containers/sequences/deque/types.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/types.pass.cpp
@@ -32,6 +32,7 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../Copyable.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.access/front.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.access/front.pass.cpp
index 2509e9b2c4fa4..74eb3e65bb83c 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.access/front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.access/front.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/alloc.pass.cpp
index c362e20515307..d2dda09d82cd9 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/alloc.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../../NotConstructible.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_copy.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_copy.pass.cpp
index e40d405c9328c..07d34a1ac6ba5 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_copy.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_init.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_init.pass.cpp
index 40405dd2e7374..53dc00f1f6385 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_init.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_init.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_move.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_move.pass.cpp
index 36e4ea0ca0c5b..920bd0ea3ffd5 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_move.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_op_init.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_op_init.pass.cpp
index 14c098b660b0e..317d9e8d4ee70 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_op_init.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_op_init.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_range.pass.cpp
index c0b934445e0a5..989f1654449b4 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_range.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_size_value.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_size_value.pass.cpp
index ea4cc811e4bfa..4824dd8890aaf 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/assign_size_value.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/copy_alloc.pass.cpp
index 9788ca5ff6bac..3eb33a66d15eb 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/copy_alloc.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default.pass.cpp
index 1694faf466b24..7313909af5a4f 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_recursive.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_recursive.pass.cpp
index 98b120f5350b9..affe050564b87 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_recursive.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_recursive.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <forward_list>
 
+#include "test_macros.h"
+
 struct X
 {
     std::forward_list<X> q;
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init.pass.cpp
index fda636073a8f4..f82312b4db010 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init_alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init_alloc.pass.cpp
index cdef7c07ef5df..d20ee2fd563ff 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/init_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move.pass.cpp
index eedec348783bc..274309efa0148 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_alloc.pass.cpp
index 7db6a41e85e72..9288d46bcc561 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range.pass.cpp
index ce9cd59d1a3ad..721cbae2c513e 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range_alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range_alloc.pass.cpp
index 1a85d3feebec5..3451a6941f1ce 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/range_alloc.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value.pass.cpp
index e3f247202d430..4da7394a8b66e 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value_alloc.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value_alloc.pass.cpp
index cc5394f5ab382..fb37e8dbdbbd9 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size_value_alloc.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/before_begin.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/before_begin.pass.cpp
index 638a78327df0d..d633a64d6a541 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/before_begin.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.iter/before_begin.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/emplace_after.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/emplace_after.pass.cpp
index f2581239824ae..8eeb2a6ba7c97 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/emplace_after.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/emplace_after.pass.cpp
@@ -16,6 +16,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_many.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_many.pass.cpp
index 0a431a8965d46..05c190f1c87e9 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_many.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_many.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_one.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_one.pass.cpp
index 59e687f6c4c3d..d3820337fb431 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_one.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/erase_after_one.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_const.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_const.pass.cpp
index 3ba4f9e23f190..2e3426a77b35f 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_const.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_const.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_init.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_init.pass.cpp
index be5c6e5176cf2..bce393d465ae4 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_init.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_init.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_range.pass.cpp
index 4cbc92edd7437..88b8bc07acd36 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_range.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_rv.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_rv.pass.cpp
index 2495a707a9829..7d331d7981a5d 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_rv.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_rv.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_size_value.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_size_value.pass.cpp
index 7898fea9219ad..198054a1de4d1 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/insert_after_size_value.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/pop_front.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/pop_front.pass.cpp
index d28f10e81923b..e820318d0a5d3 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/pop_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/pop_front.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_const.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_const.pass.cpp
index 192227ee2b11d..273c959d8cef6 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_const.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_const.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_exception_safety.pass.cpp
index 8b122f1f937be..a1590abfea7b6 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_exception_safety.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_exception_safety.pass.cpp
@@ -14,6 +14,8 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
+
 // Flag that makes the copy constructor for CMyClass throw an exception
 static bool gCopyConstructorShouldThow = false;
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_rv.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_rv.pass.cpp
index 268101671cb55..dfbe53e7a3426 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_rv.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/push_front_rv.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/resize_size.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/resize_size.pass.cpp
index ed2de98007293..dc631fe9aa2a4 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/resize_size.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.modifiers/resize_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge.pass.cpp
index 5a55ae963de36..95d21b0c23cce 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_pred.pass.cpp
index 3de61a37f7253..e6255556b8410 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_pred.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_pred.pass.cpp
@@ -15,6 +15,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove.pass.cpp
index ca3ec253da326..3b5e4a21b19ae 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct S {
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp
index a7218f7c05949..2a4f079a3bdee 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "counting_predicates.hpp"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/reverse.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/reverse.pass.cpp
index 82b6813fff082..60f21b737cfd0 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/reverse.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/reverse.pass.cpp
@@ -15,6 +15,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class C>
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort.pass.cpp
index 50dcdd41f16e7..c0f3f3668b6a7 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort.pass.cpp
@@ -17,6 +17,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort_pred.pass.cpp
index 0e676938169fb..6af5df873f288 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort_pred.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/sort_pred.pass.cpp
@@ -19,6 +19,7 @@
 #include <cassert>
 #include <iostream>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_flist.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_flist.pass.cpp
index e883aee2ce4be..960c9beec60b9 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_flist.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_flist.pass.cpp
@@ -15,6 +15,7 @@
 #include <iterator>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 typedef int T;
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp
index 741e58689f726..4f34d7eb6555a 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 typedef ptrdiff_t T;
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique.pass.cpp
index 07a4eae97b0d6..6ce2da5736446 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp
index 97d8c6c389c84..84a825e3668f1 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct PredLWG529 {
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp
index 5e0438c62bfe4..1ce4e5576730c 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp
@@ -14,6 +14,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp
index 5b9b590d55512..c61068d5997b9 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp
@@ -15,6 +15,7 @@
 #include <forward_list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/forwardlist/types.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/types.pass.cpp
index 01a7db039d46f..9867bf855e8b2 100644
--- a/libcxx/test/std/containers/sequences/forwardlist/types.pass.cpp
+++ b/libcxx/test/std/containers/sequences/forwardlist/types.pass.cpp
@@ -27,6 +27,7 @@
 #include <forward_list>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct A { std::forward_list<A> v; }; // incomplete type support
diff --git a/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp b/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp
index e68f06176b7b5..103117fb40fd3 100644
--- a/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <list>
 
+#include "test_macros.h"
+
 struct A {
     std::list<A> l;
     std::list<A>::iterator it;
diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp
index 3c9e240d7523b..671e89ecff5c8 100644
--- a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp
index db1c1419cca90..112287a550b3d 100644
--- a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp
index 91ac1cfa6aa4f..39369d4f8295c 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp
index 54c91ae4a12cc..285d62be5f9b2 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp
index 1e826ed911503..f5492692a61ae 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp
index 14e9585022560..99d7879de15c9 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp
index 348390c458607..32f2729b70960 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp
index 2e0b5203b8e97..67c886b4ee7bc 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp
index bd4ffeb137134..82535b3f5d2e5 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp
index f6eca2112e1d5..ed0d1cc5ffb3d 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp
index f7491f4501f78..3666f32e39195 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp
index a79a3723231cf..8668a282feb89 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp
index d18ea2c4a0f4f..01e5ba4a14be3 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp
index 58ea88f431506..bc5bf41d31f91 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp
index 282de37bbc536..bced58d0208ba 100644
--- a/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp
index eece4186caa10..5328b14c73dda 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp
@@ -16,6 +16,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 class A
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp
index 87486b61690dd..b7cb3003dd390 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp
index 1df39913c53ca..fbd7628bfbce4 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp
index 98dcd0ba28df7..7dbdcec337a22 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp
index 4ded48991a5bf..8d8cd9ea8c19a 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp
index 3decb94c4214f..7f20e0a4c11b6 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp
index e4aa404416f56..29f22a566bf34 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_exception_safety.pass.cpp
index a6bbc256e0beb..e4595ef251279 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_exception_safety.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_exception_safety.pass.cpp
@@ -14,6 +14,8 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
+
 // Flag that makes the copy constructor for CMyClass throw an exception
 static bool gCopyConstructorShouldThow = false;
 
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp
index 5894c2cc889d0..9561b88c89ad6 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp
index 27e39e90a7bc3..2b6393179a209 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_exception_safety.pass.cpp
index 49f86222aeed4..ebc9ac7f44be7 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_exception_safety.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_exception_safety.pass.cpp
@@ -14,6 +14,8 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
+
 // Flag that makes the copy constructor for CMyClass throw an exception
 static bool gCopyConstructorShouldThow = false;
 
diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp
index 0020112858f03..4df0c7a35525f 100644
--- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp
@@ -15,6 +15,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp
index c2a552cf62c10..08a5139f942b0 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp
@@ -14,6 +14,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp
index 911c3d09af18f..594e58643e7b7 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp
@@ -15,6 +15,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp
index ec9b756ce5965..37242355e46d7 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp
@@ -14,6 +14,7 @@
 #include <cassert>
 #include <functional>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "counting_predicates.hpp"
 
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp
index 0cf1242df5913..6ebd7f9b611f7 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp
index 816287288703d..820419ac34e51 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp
index 2f8b08b001601..2c8710a935b01 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp
@@ -17,6 +17,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 std::mt19937 randomness;
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp
index 651ffbc7baee8..90f0fd2f37cd3 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp
index d929a35d325d3..6338324bcfa75 100644
--- a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp
@@ -13,6 +13,7 @@
 #include <list>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 bool g(int x, int y)
diff --git a/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp b/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp
index be7df207bdd19..a82070d5cb854 100644
--- a/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <list>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/list/types.pass.cpp b/libcxx/test/std/containers/sequences/list/types.pass.cpp
index 914f9abab7d07..6e6b07dde5469 100644
--- a/libcxx/test/std/containers/sequences/list/types.pass.cpp
+++ b/libcxx/test/std/containers/sequences/list/types.pass.cpp
@@ -24,6 +24,7 @@
 #include <list>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct A { std::list<A> v; }; // incomplete type support
diff --git a/libcxx/test/std/containers/sequences/vector.bool/assign_copy.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/assign_copy.pass.cpp
index 5aa86839ff4f0..c1233ea8f10c0 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/assign_copy.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/assign_initializer_list.pass.cpp
index bbd980bd32552..968ddfbe61bc3 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/assign_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/assign_move.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/assign_move.pass.cpp
index b70e9cde65b44..e1b66957b2444 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/assign_move.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/capacity.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/capacity.pass.cpp
index e24ebe00c6a04..cb14329067e06 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/capacity.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/capacity.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/emplace.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/emplace.pass.cpp
index 129cbff154ec3..d525371f3a701 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/emplace.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/emplace.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
index f6631cf2d49c3..9ed0898fa0d02 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/enabled_hash.pass.cpp
@@ -16,6 +16,7 @@
 #include <vector>
 
 #include "poisoned_hash_helper.hpp"
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**) {
diff --git a/libcxx/test/std/containers/sequences/vector.bool/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/erase_iter.pass.cpp
index c3d6bfd5de01f..9cb75bd9a49e5 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/erase_iter.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/erase_iter_iter.pass.cpp
index 89763017e9714..df5c7070092d1 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/erase_iter_iter.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/find.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/find.pass.cpp
index 883b5b4dbca58..0c587e32ff15b 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/find.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/find.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/sequences/vector.bool/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/initializer_list.pass.cpp
index d510b86fed076..915eeb31baa5c 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/initializer_list_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/initializer_list_alloc.pass.cpp
index 27d8420a6df7d..a712d1ba4f703 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/initializer_list_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/initializer_list_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_initializer_list.pass.cpp
index 519752da1030b..3f75cfafc3350 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp
index 5774ab5f6ca7b..7019b6d05d52b 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp
index 2502865c78e2b..593b41d8bef69 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/move_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/move_alloc.pass.cpp
index f2ff5303b0fa5..fbeecd5561ad1 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/move_alloc.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/op_equal_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/op_equal_initializer_list.pass.cpp
index 22384fe8d06c9..42e61c4624a06 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/op_equal_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/op_equal_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/push_back.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/push_back.pass.cpp
index 438869be16cc3..b49dd1151b284 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/push_back.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/push_back.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/reserve.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/reserve.pass.cpp
index 039c1bc18fa34..23bd49d9730ed 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/reserve.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/reserve.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/resize_size.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/resize_size.pass.cpp
index 53e83ac7effad..c9d7f3dbb9ee4 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/resize_size.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/resize_size.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/resize_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/resize_size_value.pass.cpp
index ef0cb6160c6a3..9cc7e2c98d70c 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/resize_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/resize_size_value.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
index 59714d5fb5ccb..7ec8e66fbea45 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/shrink_to_fit.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector.bool/swap.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/swap.pass.cpp
index 9ff11113d82c8..6dbd08cea9520 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/swap.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector.bool/types.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/types.pass.cpp
index d15973a38b1b8..07053ee4b03cf 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/types.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/types.pass.cpp
@@ -30,6 +30,7 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../Copyable.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector_bool.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector_bool.pass.cpp
index 5f9ae3decb6ec..e2cf53ec6bd3a 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/vector_bool.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/vector_bool.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp b/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp
index 99d9d6ecae7a3..21b90a698842e 100644
--- a/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/types.pass.cpp b/libcxx/test/std/containers/sequences/vector/types.pass.cpp
index 0a04c25527a63..3dbc9592c3266 100644
--- a/libcxx/test/std/containers/sequences/vector/types.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/types.pass.cpp
@@ -32,6 +32,7 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "../../Copyable.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/capacity.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/capacity.pass.cpp
index a8ee9f2296234..41d885f250e03 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/capacity.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/capacity.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp
index 4cf3b2d338c05..78cfcc3422cb9 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/resize_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/resize_size_value.pass.cpp
index 4d9f7931d8aca..726ff8a7abef8 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/resize_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/resize_size_value.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
index 36125bb932c38..ec19d6f137911 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/shrink_to_fit.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/swap.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/swap.pass.cpp
index e2fa0d8b92832..33df254c11696 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.capacity/swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/swap.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.pass.cpp
index f6d8dd50b7e50..a318546119060 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_copy.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_initializer_list.pass.cpp
index 4673df9558de8..c245f6739e61d 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.pass.cpp
index 4b70c7843f19a..9f35971575133 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_move.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_size_value.pass.cpp
index b33d5066e4f31..ff590ba99195f 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_size_value.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_size_value.pass.cpp
@@ -14,6 +14,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/default.recursive.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/default.recursive.pass.cpp
index 1558ea4b619f4..449751af11156 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/default.recursive.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/default.recursive.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <vector>
 
+#include "test_macros.h"
+
 struct X
 {
     std::vector<X> q;
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list.pass.cpp
index 168e3b58bd5a4..e94a7119ce2bf 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list_alloc.pass.cpp
index 633b5c5e2ea12..5112f54c71d30 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/initializer_list_alloc.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move_alloc.pass.cpp
index 2f15a14e8c89d..f091fb5dce32c 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move_alloc.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <vector>
 #include <cassert>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp
index c5c5e29cda4f7..2488a3b7fdf1d 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp
@@ -20,6 +20,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp
index 1228414289f3e..55356af0c6ad5 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp
@@ -18,6 +18,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/op_equal_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/op_equal_initializer_list.pass.cpp
index 61c20b7c7f062..ee847510a8b75 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/op_equal_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/op_equal_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.data/data.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.data/data.pass.cpp
index 3477c5eb1bd0b..91eb2a2b324c5 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.data/data.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.data/data.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
index ec5016d2f6cbe..2b76fe42d6e67 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.data/data_const.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp
index ec5f778565a6f..993122a0cb6c0 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
index aac35f9f8114a..fb9b4bfbe4ef7 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
index 7682000d75e77..dae920ca008e8 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_initializer_list.pass.cpp
index 30d0cd686a4e0..018d4515d0b9a 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
 
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back.pass.cpp
index d22136ddbcbe7..dcb6d4448ed50 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back.pass.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <cassert>
 #include <cstddef>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back_rvalue.pass.cpp
index d876eb6170a12..26ebf1d7db0c4 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/push_back_rvalue.pass.cpp
@@ -15,6 +15,7 @@
 #include <vector>
 #include <cassert>
 #include <cstddef>
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/sequences/vector/vector.special/swap.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.special/swap.pass.cpp
index 2ded4fe750f66..7e89ea9852796 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.special/swap.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.special/swap.pass.cpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <iterator>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 #include "asan_testing.h"
diff --git a/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp b/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp
index 7d6cbf9a8ad16..b9c0c2b81d66f 100644
--- a/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp
@@ -18,6 +18,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Key {
   template <typename T> Key(const T&) {}
   bool operator== (const Key&) const { return true; }
diff --git a/libcxx/test/std/containers/unord/unord.map/count.pass.cpp b/libcxx/test/std/containers/unord/unord.map/count.pass.cpp
index 1a1bea90b8268..fc83dc4f9258e 100644
--- a/libcxx/test/std/containers/unord/unord.map/count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/count.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp
index d284e822ade86..5c924f07e5376 100644
--- a/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp
@@ -22,6 +22,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/equal_range_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/equal_range_const.pass.cpp
index b9dd9a64e2ec2..3519fb531a1ee 100644
--- a/libcxx/test/std/containers/unord/unord.map/equal_range_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/equal_range_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/equal_range_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/equal_range_non_const.pass.cpp
index 029222d5caf8e..47dc892de37e2 100644
--- a/libcxx/test/std/containers/unord/unord.map/equal_range_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/equal_range_non_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/find_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/find_const.pass.cpp
index 1d63b4be2ef08..7da7a9c321b64 100644
--- a/libcxx/test/std/containers/unord/unord.map/find_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/find_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/find_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/find_non_const.pass.cpp
index 58a9cd35afd46..e242a7d2a8d04 100644
--- a/libcxx/test/std/containers/unord/unord.map/find_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/find_non_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/incomplete_type.pass.cpp b/libcxx/test/std/containers/unord/unord.map/incomplete_type.pass.cpp
index ddcd6e14c50b8..cf2cc679f704c 100644
--- a/libcxx/test/std/containers/unord/unord.map/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/incomplete_type.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
+
 template <class Tp>
 struct MyHash {
   MyHash() {}
diff --git a/libcxx/test/std/containers/unord/unord.map/local_iterators.pass.cpp b/libcxx/test/std/containers/unord/unord.map/local_iterators.pass.cpp
index e24e1811ac1d8..5da3a0b3d932c 100644
--- a/libcxx/test/std/containers/unord/unord.map/local_iterators.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/local_iterators.pass.cpp
@@ -23,6 +23,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/types.pass.cpp b/libcxx/test/std/containers/unord/unord.map/types.pass.cpp
index e194f69f8ac17..403a0c1599ada 100644
--- a/libcxx/test/std/containers/unord/unord.map/types.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/types.pass.cpp
@@ -30,6 +30,7 @@
 #include <unordered_map>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_init.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_init.pass.cpp
index e9e6289888622..106ab173ab3b1 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_init.pass.cpp
@@ -23,6 +23,7 @@
 #include <cmath>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.elem/index_tuple.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.elem/index_tuple.pass.cpp
index 4719b5583b31e..d88750bcd3353 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.elem/index_tuple.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.elem/index_tuple.pass.cpp
@@ -21,6 +21,8 @@
 #include <unordered_map>
 #include <tuple>
 
+#include "test_macros.h"
+
 using namespace std;
 
 struct my_hash
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace.pass.cpp
index 63a269601f093..6a4588d30e952 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace.pass.cpp
@@ -20,6 +20,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.pass.cpp
index 01e8d9c6fab7b..43f8ce901e990 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp
index 38ba03d5e88f6..9ed47578d0fe6 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp
index 887f7859b3e72..0b44f0be2e961 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <exception>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp
index 15c6745adc95f..6ca0f7a615ad2 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db1.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp
index 0ae0674adb79d..7c714abc69e03 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp
index 134d075ec6ec1..9061bb02a6438 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db3.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp
index 17745175bae1a..0edd67a585188 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_iter_iter_db4.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp
index 839d65733dea5..a14c26f943192 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp
index 3ad30510f92c0..c93647dec521c 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp
index 4d6c24e6e996c..300ee2ec296cb 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
index 71d456e6619d1..383af960b4b63 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_requirements.pass.cpp
@@ -18,6 +18,7 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../../map_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp
index b1b77eb9c184e..10032c090673c 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp
index b7374d1a6eeac..bcfc785ece4fd 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_rvalue.pass.cpp
@@ -25,6 +25,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_init.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_init.pass.cpp
index 27e8747348e7c..136aa101ae2a3 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_init.pass.cpp
@@ -20,6 +20,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type.pass.cpp
index 24d0a23a59ff3..d09941d77d43d 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type.pass.cpp
@@ -15,6 +15,7 @@
 // insert_return_type insert(node_type&&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp
index 21ccb88caf0bb..432ae387e3523 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp
index 7fc3ff187be43..71d0820f195a5 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp
@@ -25,6 +25,8 @@
 #include <cassert>
 #include <tuple>
 
+#include "test_macros.h"
+
 
 class Moveable
 {
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_range.pass.cpp
index 1d51bdb4fbc43..ac83319554736 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_range.pass.cpp
@@ -19,6 +19,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue.pass.cpp
index 1f8528ab5b167..3725f453ff90a 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp
index c3ee0050a26a1..a4cba68655482 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp
@@ -25,6 +25,8 @@
 #include <cassert>
 #include <tuple>
 
+#include "test_macros.h"
+
 class Moveable
 {
     Moveable(const Moveable&);
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp
index 67a49d4bb8bf9..6a80c09f9a7a9 100644
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/db_swap_1.pass.cpp
@@ -21,6 +21,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if _LIBCPP_DEBUG >= 1
diff --git a/libcxx/test/std/containers/unord/unord.multimap/count.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/count.pass.cpp
index 4a6ec5d6576e8..d3a82bed259a6 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/count.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp
index 463b49938efbc..871bd791b9373 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/db_iterators_7.pass.cpp
@@ -21,6 +21,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp
index 38395e5f9605c..821de2553a591 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/db_iterators_8.pass.cpp
@@ -21,6 +21,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp
index 04d8b3ff06c55..9da1b6113be02 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_7.pass.cpp
@@ -21,6 +21,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp
index 69ef06993dcf0..dd56647d140ec 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/db_local_iterators_8.pass.cpp
@@ -21,6 +21,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp
index 5751916875fd1..b47fa32529e0e 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp
@@ -22,6 +22,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/equal_range_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/equal_range_const.pass.cpp
index 148081be7be6a..e936b96296a35 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/equal_range_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/equal_range_const.pass.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/equal_range_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/equal_range_non_const.pass.cpp
index 5da87166bdab4..7549103e82563 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/equal_range_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/equal_range_non_const.pass.cpp
@@ -19,6 +19,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/find_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/find_const.pass.cpp
index 271bf9d0c6085..b35bed3c5644e 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/find_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/find_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/find_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/find_non_const.pass.cpp
index 3e642e346f149..2b823723dd7f7 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/find_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/find_non_const.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/incomplete.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/incomplete.pass.cpp
index 6ea4931299b6d..5d76e6f83e989 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/incomplete.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/incomplete.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
+
 template <class Tp>
 struct MyHash {
   MyHash() {}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/load_factor.pass.cpp
index ae8a8403b8594..4e848ef50f919 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/load_factor.pass.cpp
@@ -20,6 +20,7 @@
 #include <cfloat>
 #include <cmath>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/local_iterators.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/local_iterators.pass.cpp
index b5dd2d4b0fd03..12b11aac8face 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/local_iterators.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/local_iterators.pass.cpp
@@ -24,6 +24,7 @@
 #include <set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/max_bucket_count.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/max_bucket_count.pass.cpp
index c55f4de8d44de..1dac9024cbedd 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/max_bucket_count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/max_bucket_count.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/max_load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/max_load_factor.pass.cpp
index ed46b681b7d5b..05a964d7998ad 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/max_load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/max_load_factor.pass.cpp
@@ -23,6 +23,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/scary.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/scary.pass.cpp
index 4c4b1cd110e0a..b306da2f818ed 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/scary.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/scary.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::unordered_map<int, int> M1;
diff --git a/libcxx/test/std/containers/unord/unord.multimap/types.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/types.pass.cpp
index 2cb74a1545d7f..5af2f68b61686 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/types.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/types.pass.cpp
@@ -30,6 +30,7 @@
 #include <unordered_map>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/assign_init.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/assign_init.pass.cpp
index dc0cbd6f48696..fe8666dd44739 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/assign_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/assign_init.pass.cpp
@@ -24,6 +24,7 @@
 #include <cmath>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace.pass.cpp
index 1a20fb876daa0..1a38c816978af 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace.pass.cpp
@@ -20,6 +20,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp
index 3c1cb13b30edb..b237e3a26b9fb 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp
index 30fae95f7595f..940db985cbc7e 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp
index 3c0418497b0b7..4da7e60a6fa5a 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <exception>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp
index 6c3a2cf38556f..b14c85dabd1a4 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db1.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp
index 6b0ea3528c741..03be37ae73227 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp
index 0b53c1cdf08f9..cd1892ee17d6e 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db3.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp
index 7cea5e78938b0..7ecf36646db3d 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_iter_iter_db4.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_key.pass.cpp
index f13f4bd7c08df..25464c2c3495a 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_key.pass.cpp
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
index 5cea5a5fa7d58..4a0d607238141 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_range.pass.cpp
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp
index a06aca77432c7..2b13def4b1aad 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp
index 272d5acfb76ab..a6e79e1013c15 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp
index 73fe6b49b79c8..31dd4b3851f8b 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_allocator_requirements.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <unordered_map>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../../map_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_const_lvalue.pass.cpp
index 8eaa69509495d..fb3d390aa74ac 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_const_lvalue.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp
index 83cf7b86acb3d..ee7b3374ad1aa 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_const_lvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp
index 2993fe7572aeb..ec54c1152d237 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_hint_rvalue.pass.cpp
@@ -25,6 +25,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_init.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_init.pass.cpp
index e586512974264..ea3726df3cf5e 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_init.pass.cpp
@@ -22,6 +22,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type.pass.cpp
index fd1cfa114cc00..9840748a648df 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(node_type&&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp
index 70d207551a204..1a301f87c36b5 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <unordered_map>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_range.pass.cpp
index 9ba37088272d7..55067be7ca89f 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_range.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../check_consecutive.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_rvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_rvalue.pass.cpp
index 92b91b07c3184..8f315b537e806 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_rvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_rvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp
index 3e15211d50101..65fb5ae37860a 100644
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.swap/db_swap_1.pass.cpp
@@ -21,6 +21,8 @@
 #include <unordered_map>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if _LIBCPP_DEBUG >= 1
diff --git a/libcxx/test/std/containers/unord/unord.multiset/count.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/count.pass.cpp
index 40cef2af203f4..9feb51c789259 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/count.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp
index 3b5c4582f2a03..0b401e66d708b 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/db_iterators_7.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp
index c86f4a28960dc..88280e6353d3d 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/db_iterators_8.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp
index a04ad804bacb7..5ec15771c49ce 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_7.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp
index 85e1ccacb8d18..0d92b9578166d 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/db_local_iterators_8.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/emplace.pass.cpp
index 67c5d1642066f..21590dbe9280c 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/emplace.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/emplace.pass.cpp
@@ -20,6 +20,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/emplace_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/emplace_hint.pass.cpp
index 61f06edd485b4..ea7886b9cbda2 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/emplace_hint.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/eq.pass.cpp
index 761ad7051e7e2..22d3207d0ff74 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/eq.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/eq.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/equal_range_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/equal_range_const.pass.cpp
index ddfd77e18c6cf..6ef53875fcbcb 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/equal_range_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/equal_range_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/equal_range_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/equal_range_non_const.pass.cpp
index a148f65b173d1..3c70d8c89cf9b 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/equal_range_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/equal_range_non_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp
index 0a92f7d06210d..2dee50143df8f 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp
index 073043f5a364b..788b5ac4f63b6 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp
index 28768eaf5c2b3..68828d5f84e81 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <exception>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp
index 0a9853d66e36c..118fadf522154 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db1.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp
index cc1ec00961a13..ad6b8cddcde1e 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp
index a1de8cb7c7108..5b5bbbae77f8f 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db3.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp
index a82ecfc3cc364..fbeb8e0752012 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_iter_iter_db4.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_key.pass.cpp
index ba7248d746885..522b6de2b87b1 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_key.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 #if TEST_STD_VER >= 11
diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
index c6bb4b597c3f5..fdb65ff025c53 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/erase_range.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp
index 01994120c8a30..2e32fcfcf3166 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp
index 380b39f85e2f1..f718f0766a8e1 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/find_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/find_const.pass.cpp
index efa8bfc12b529..65570dcb51e93 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/find_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/find_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/find_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/find_non_const.pass.cpp
index 4eeb8ac6a8054..a70109bb94448 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/find_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/find_non_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/incomplete.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/incomplete.pass.cpp
index 0aeb246ca437a..a90aa9a7e1975 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/incomplete.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/incomplete.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <unordered_set>
 
+#include "test_macros.h"
+
 template <class Tp>
 struct MyHash {
   MyHash() {}
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_const_lvalue.pass.cpp
index 8200dc2fc94e7..6f7c05bea2bab 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_const_lvalue.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp
index d7474f24e7c0f..0f05fb2a84781 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_emplace_allocator_requirements.pass.cpp
@@ -15,6 +15,7 @@
 // UNSUPPORTED: c++98, c++03
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../set_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp
index ede013ef99e43..981481b6cad79 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_hint_const_lvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_init.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_init.pass.cpp
index 4467c74b4395e..3d45651b23087 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_init.pass.cpp
@@ -19,6 +19,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type.pass.cpp
index c660ab22d6be3..9a02b2ef5df9f 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <unordered_set>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp
index e95dd31e39ece..8766fa067a777 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_range.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_range.pass.cpp
index b8742f520b922..a3a97494aabb6 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/insert_range.pass.cpp
@@ -18,6 +18,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/load_factor.pass.cpp
index bb3350e868988..76c8132efe408 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/load_factor.pass.cpp
@@ -19,6 +19,7 @@
 #include <cfloat>
 #include <cmath>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/local_iterators.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/local_iterators.pass.cpp
index 5aa65756371c8..5d4053272d176 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/local_iterators.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/local_iterators.pass.cpp
@@ -22,6 +22,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/max_bucket_count.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/max_bucket_count.pass.cpp
index 7fb76c41c492a..675101b4d33af 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/max_bucket_count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/max_bucket_count.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/max_load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/max_load_factor.pass.cpp
index c89aa6d339f13..566ea42d09fe3 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/max_load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/max_load_factor.pass.cpp
@@ -22,6 +22,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/scary.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/scary.pass.cpp
index 670c6406a2bd6..9c9a16c7b043c 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/scary.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/scary.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <unordered_set>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::unordered_set<int> M1;
diff --git a/libcxx/test/std/containers/unord/unord.multiset/types.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/types.pass.cpp
index 81f8334c28f11..2c229a8680d50 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/types.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/types.pass.cpp
@@ -29,6 +29,7 @@
 #include <unordered_set>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_init.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_init.pass.cpp
index 795370c44fe8b..dc2e46f6bae5e 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/assign_init.pass.cpp
@@ -22,6 +22,7 @@
 #include <cmath>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp
index a546d0d7d1de8..e1f45e17baff6 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.swap/db_swap_1.pass.cpp
@@ -21,6 +21,8 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if _LIBCPP_DEBUG >= 1
diff --git a/libcxx/test/std/containers/unord/unord.set/count.pass.cpp b/libcxx/test/std/containers/unord/unord.set/count.pass.cpp
index 971e126fdc676..fc9fce5991d13 100644
--- a/libcxx/test/std/containers/unord/unord.set/count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/count.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp
index 614a0fb9f1a18..0de979ae27a5d 100644
--- a/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/db_iterators_7.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp
index 1fd0e8ef78d74..b6acf233276fb 100644
--- a/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/db_iterators_8.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp b/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp
index ec00103e50b27..df0f20bcb074a 100644
--- a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/db_local_iterators_7.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp b/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp
index 3d31858db8879..71121025192d7 100644
--- a/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/db_local_iterators_8.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.set/emplace.pass.cpp
index 6616aa51c2cd8..215d833b238c8 100644
--- a/libcxx/test/std/containers/unord/unord.set/emplace.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/emplace.pass.cpp
@@ -20,6 +20,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/emplace_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.set/emplace_hint.pass.cpp
index 3e82bb32dd968..a485f30194ca5 100644
--- a/libcxx/test/std/containers/unord/unord.set/emplace_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/emplace_hint.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../../Emplaceable.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.set/eq.pass.cpp
index 5362f57f4f5d2..a0ea30fae0705 100644
--- a/libcxx/test/std/containers/unord/unord.set/eq.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/eq.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/equal_range_const.pass.cpp b/libcxx/test/std/containers/unord/unord.set/equal_range_const.pass.cpp
index 9489deaf2ea44..707b3721cb235 100644
--- a/libcxx/test/std/containers/unord/unord.set/equal_range_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/equal_range_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/equal_range_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.set/equal_range_non_const.pass.cpp
index 6713dbd91e081..a2872fedd2ce8 100644
--- a/libcxx/test/std/containers/unord/unord.set/equal_range_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/equal_range_non_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp
index 3d9cfe6834fa9..cc709132d9102 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 struct TemplateConstructor
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp
index a65086ee8c66a..063ed931738fc 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_db1.pass.cpp
@@ -17,6 +17,8 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp
index c7f64da355cc7..30dd46f11be07 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <exception>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp
index 002a24bf1bc15..a84b0601e8418 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db1.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp
index 59bf0cc33fe6f..9fb3bc365803f 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db2.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp
index c522fce26336c..9f056e8373ac5 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db3.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp
index a7c3c0020756c..f56297dc74b04 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_iter_iter_db4.pass.cpp
@@ -19,6 +19,8 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_key.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_key.pass.cpp
index 912a4ae6b15b9..a50f56b0a3232 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_key.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 #if TEST_STD_VER >= 11
diff --git a/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp
index 907063c6a5a93..4832481ca1c18 100644
--- a/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/erase_range.pass.cpp
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp
index 03dfcc6028498..199188080cfe9 100644
--- a/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(const_iterator);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp
index b2a6f04935b14..d915199422554 100644
--- a/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp
@@ -15,6 +15,7 @@
 // node_type extract(key_type const&);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "Counter.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/find_const.pass.cpp b/libcxx/test/std/containers/unord/unord.set/find_const.pass.cpp
index f226a69606fd2..96d17873ba697 100644
--- a/libcxx/test/std/containers/unord/unord.set/find_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/find_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/find_non_const.pass.cpp b/libcxx/test/std/containers/unord/unord.set/find_non_const.pass.cpp
index 4b24b2f447c35..d49efd7eea414 100644
--- a/libcxx/test/std/containers/unord/unord.set/find_non_const.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/find_non_const.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/incomplete.pass.cpp b/libcxx/test/std/containers/unord/unord.set/incomplete.pass.cpp
index b77f679fef5c3..d87d080555e23 100644
--- a/libcxx/test/std/containers/unord/unord.set/incomplete.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/incomplete.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <unordered_set>
 
+#include "test_macros.h"
+
 template <class Tp>
 struct MyHash {
   MyHash() {}
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp
index 34905e3c8dcf1..f70dc705139ef 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <unordered_set>
 
+#include "test_macros.h"
 #include "container_test_types.h"
 #include "../../set_allocator_requirement_test_templates.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_const_lvalue.pass.cpp
index 097b221a65bd0..a0f5ed6c98cd9 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_const_lvalue.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp
index e3765ca02cb3e..0f7c8812e9f13 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.pass.cpp
@@ -21,6 +21,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template<class Container>
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_init.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_init.pass.cpp
index c60fcb16bff22..345025249faec 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_init.pass.cpp
@@ -19,6 +19,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_node_type.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_node_type.pass.cpp
index f41c936857bac..a3bdf711a83df 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_node_type.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_node_type.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <unordered_set>
 #include <type_traits>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp
index ae5e8976e48f5..230e314242877 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp
@@ -15,6 +15,7 @@
 // iterator insert(const_iterator hint, node_type&&);
 
 #include <unordered_set>
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class Container>
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_range.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_range.pass.cpp
index cb365483a9db9..164a88120d0ca 100644
--- a/libcxx/test/std/containers/unord/unord.set/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/insert_range.pass.cpp
@@ -18,6 +18,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/containers/unord/unord.set/load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.set/load_factor.pass.cpp
index c5857b7b47c88..0952a4c3f2134 100644
--- a/libcxx/test/std/containers/unord/unord.set/load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/load_factor.pass.cpp
@@ -19,6 +19,7 @@
 #include <cfloat>
 #include <cmath>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/local_iterators.pass.cpp b/libcxx/test/std/containers/unord/unord.set/local_iterators.pass.cpp
index ad3de5b0ae0d7..adc0164947bb9 100644
--- a/libcxx/test/std/containers/unord/unord.set/local_iterators.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/local_iterators.pass.cpp
@@ -22,6 +22,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/max_bucket_count.pass.cpp b/libcxx/test/std/containers/unord/unord.set/max_bucket_count.pass.cpp
index 121147a08e7c5..c830f444bbaee 100644
--- a/libcxx/test/std/containers/unord/unord.set/max_bucket_count.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/max_bucket_count.pass.cpp
@@ -17,6 +17,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/max_load_factor.pass.cpp b/libcxx/test/std/containers/unord/unord.set/max_load_factor.pass.cpp
index ac345a1d4ea2b..d6f7727b2807f 100644
--- a/libcxx/test/std/containers/unord/unord.set/max_load_factor.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/max_load_factor.pass.cpp
@@ -22,6 +22,7 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/types.pass.cpp b/libcxx/test/std/containers/unord/unord.set/types.pass.cpp
index 7b1531dc28f6e..8d5a22fdb24cf 100644
--- a/libcxx/test/std/containers/unord/unord.set/types.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/types.pass.cpp
@@ -29,6 +29,7 @@
 #include <unordered_set>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/assign_init.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/assign_init.pass.cpp
index bbb2045cb9cab..9c037186427a3 100644
--- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/assign_init.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/assign_init.pass.cpp
@@ -22,6 +22,7 @@
 #include <cmath>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.pass.cpp
index bc6629acdb728..0201adcaa94ec 100644
--- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.pass.cpp
@@ -22,6 +22,7 @@
 #include <cmath>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "../../../test_compare.h"
 #include "../../../test_hash.h"
 #include "test_allocator.h"
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp
index f14f0ddeb7bcd..7682a274690b6 100644
--- a/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.swap/db_swap_1.pass.cpp
@@ -21,6 +21,8 @@
 #include <unordered_set>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if _LIBCPP_DEBUG >= 1
diff --git a/libcxx/test/std/containers/views/span.tuple/tuple_size.pass.cpp b/libcxx/test/std/containers/views/span.tuple/tuple_size.pass.cpp
index a2394026cc2af..1e8b8d0a9c4a0 100644
--- a/libcxx/test/std/containers/views/span.tuple/tuple_size.pass.cpp
+++ b/libcxx/test/std/containers/views/span.tuple/tuple_size.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <span>
 
+#include "test_macros.h"
+
 template <class T, std::size_t N>
 void test()
 {
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/assignment.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/assignment.pass.cpp
index c7994dab8ed4d..91fb00f07998a 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/assignment.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/assignment.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert.pass.cpp
index bfad7aeaa79d5..d51f03f47ae9d 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../AB.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert_assignment.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert_assignment.pass.cpp
index 6dfb5216e607e..1cc207ee523f7 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert_assignment.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/convert_assignment.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../AB.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/copy.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/copy.pass.cpp
index 7690615a5f3dc..fd85e766d19ff 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/copy.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/copy.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/pointer.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/pointer.pass.cpp
index 625e62c18d625..fb30eb04b3f35 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/pointer.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.cons/pointer.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/assign_from_auto_ptr_ref.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/assign_from_auto_ptr_ref.pass.cpp
index 80a1f039dbfb1..ac7dd7492ef7f 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/assign_from_auto_ptr_ref.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/assign_from_auto_ptr_ref.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_from_auto_ptr_ref.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_from_auto_ptr_ref.pass.cpp
index 7f9a1943537ac..79217f201c829 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_from_auto_ptr_ref.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_from_auto_ptr_ref.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../AB.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr.pass.cpp
index 54f96a657d277..464d3415699d3 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../AB.h"
 
 std::auto_ptr<B>
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr_ref.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr_ref.pass.cpp
index ac4d301bdcc26..b5fc43509ff9e 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr_ref.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.conv/convert_to_auto_ptr_ref.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../AB.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/arrow.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/arrow.pass.cpp
index fe06b12b805a8..39da7ba5258f5 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/arrow.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/arrow.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/deref.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/deref.pass.cpp
index 334b02a7c006d..c3034073d3083 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/deref.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/deref.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/release.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/release.pass.cpp
index c3db0e0e8d4f7..f27857087482d 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/release.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/release.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/reset.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/reset.pass.cpp
index c9879e0a089c4..8a5cb19ba02a1 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/reset.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/auto.ptr.members/reset.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../A.h"
 
 void
diff --git a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/element_type.pass.cpp b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/element_type.pass.cpp
index 27be271525fee..57ea1c2b89851 100644
--- a/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/element_type.pass.cpp
+++ b/libcxx/test/std/depr/depr.auto.ptr/auto.ptr/element_type.pass.cpp
@@ -23,6 +23,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/depr/depr.c.headers/assert_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/assert_h.pass.cpp
index d680f33efccb9..364e931dc8924 100644
--- a/libcxx/test/std/depr/depr.c.headers/assert_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/assert_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <assert.h>
 
+#include "test_macros.h"
+
 #ifndef assert
 #error assert not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/ciso646.pass.cpp b/libcxx/test/std/depr/depr.c.headers/ciso646.pass.cpp
index 3eb4064e69e1f..b619499a5dee7 100644
--- a/libcxx/test/std/depr/depr.c.headers/ciso646.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/ciso646.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ciso646>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/depr/depr.c.headers/complex.h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/complex.h.pass.cpp
index d92ddb67b0e8d..92dc58c2df0cf 100644
--- a/libcxx/test/std/depr/depr.c.headers/complex.h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/complex.h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <complex.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::complex<double> d;
diff --git a/libcxx/test/std/depr/depr.c.headers/ctype_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/ctype_h.pass.cpp
index 61b539d409395..b5deeee7ae215 100644
--- a/libcxx/test/std/depr/depr.c.headers/ctype_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/ctype_h.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 #ifdef isalnum
 #error isalnum defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/errno_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/errno_h.pass.cpp
index 985cdc7f5c4d2..30f8ac05107b3 100644
--- a/libcxx/test/std/depr/depr.c.headers/errno_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/errno_h.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <errno.h>
 
+#include "test_macros.h"
+
 #ifndef EDOM
 #error EDOM not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/fenv_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/fenv_h.pass.cpp
index 6b38f4e6c5d77..5a1633a2936a0 100644
--- a/libcxx/test/std/depr/depr.c.headers/fenv_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/fenv_h.pass.cpp
@@ -13,6 +13,8 @@
 #include <fenv.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef FE_DIVBYZERO
 #error FE_DIVBYZERO not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp
index a088733119e94..e759a7d3c7564 100644
--- a/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <inttypes.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef INT8_MIN
 #error INT8_MIN not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/iso646_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/iso646_h.pass.cpp
index 77ca126278d91..61510feb3c066 100644
--- a/libcxx/test/std/depr/depr.c.headers/iso646_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/iso646_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <iso646.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     // Nothing to test
diff --git a/libcxx/test/std/depr/depr.c.headers/limits_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/limits_h.pass.cpp
index 5dba10edfb7e1..bd0c7b9e0f535 100644
--- a/libcxx/test/std/depr/depr.c.headers/limits_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/limits_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <limits.h>
 
+#include "test_macros.h"
+
 #ifndef CHAR_BIT
 #error CHAR_BIT not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp
index fd2419bb70757..51af3aebc4d5f 100644
--- a/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/locale_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <locale.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef LC_ALL
 #error LC_ALL not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp
index 2077a07233369..94a2fc4d2ff9d 100644
--- a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp
@@ -12,6 +12,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "hexfloat.h"
 #include "truncate_fp.h"
 
diff --git a/libcxx/test/std/depr/depr.c.headers/setjmp_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/setjmp_h.pass.cpp
index 1878f4f847078..3c938012bdf53 100644
--- a/libcxx/test/std/depr/depr.c.headers/setjmp_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/setjmp_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <setjmp.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef setjmp
 #error setjmp not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/signal_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/signal_h.pass.cpp
index 463d670ba1eba..07a729eee69d4 100644
--- a/libcxx/test/std/depr/depr.c.headers/signal_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/signal_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <signal.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef SIG_DFL
 #error SIG_DFL not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/stdbool_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdbool_h.pass.cpp
index 132ad9c5cd964..ca580a8db8e99 100644
--- a/libcxx/test/std/depr/depr.c.headers/stdbool_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/stdbool_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <stdbool.h>
 
+#include "test_macros.h"
+
 #ifndef __bool_true_false_are_defined
 #error __bool_true_false_are_defined not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/stdint_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdint_h.pass.cpp
index 68efe7992af93..36f0ae8fd68f7 100644
--- a/libcxx/test/std/depr/depr.c.headers/stdint_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/stdint_h.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     // typedef int8_t
diff --git a/libcxx/test/std/depr/depr.c.headers/string_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/string_h.pass.cpp
index 8ed1513b35659..c5eac1adbf968 100644
--- a/libcxx/test/std/depr/depr.c.headers/string_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/string_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <string.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef NULL
 #error NULL not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/tgmath_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/tgmath_h.pass.cpp
index 28cf93ada0aba..88e42fb3fefda 100644
--- a/libcxx/test/std/depr/depr.c.headers/tgmath_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/tgmath_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <tgmath.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::complex<double> cd;
diff --git a/libcxx/test/std/depr/depr.c.headers/time_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/time_h.pass.cpp
index 5c2cc57bc6907..31aff19c535f0 100644
--- a/libcxx/test/std/depr/depr.c.headers/time_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/time_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <time.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef NULL
 #error NULL not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp
index b964ea76f9424..e9ad90d90e8e5 100644
--- a/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/wchar_h.pass.cpp
@@ -12,6 +12,8 @@
 #include <stdarg.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef NULL
 #error NULL not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp
index 1774a7f0897e6..8ec5bbd4674a5 100644
--- a/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/wctype_h.pass.cpp
@@ -11,6 +11,8 @@
 #include <wctype.h>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef WEOF
 #error WEOF not defined
 #endif
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_binary_function.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_binary_function.pass.cpp
index 25dbcb59b9c1b..1d3f8b099feff 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_binary_function.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_binary_function.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 double binary_f(int i, short j) {return i - j + .75;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_unary_function.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_unary_function.pass.cpp
index 9304a27e4340f..67d739c9b4e0a 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_unary_function.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/pointer_to_unary_function.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 double unary_f(int i) {return 0.5 - i;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun1.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun1.pass.cpp
index c9d2951136e67..2c3d69cf3d0eb 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun1.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun1.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 double unary_f(int i) {return 0.5 - i;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun2.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun2.pass.cpp
index 8dfd15c12f403..b88ac539fc8a0 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun2.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.function.pointer.adaptors/ptr_fun2.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 double binary_f(int i, short j) {return i - j + .75;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun.pass.cpp
index b4128d5eba06e..30f47ac5f6b43 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1.pass.cpp
index 5fd9f3b05ba97..d7c2454644a27 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_ref_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_ref_t.pass.cpp
index 0898ae140b644..7afb50fa75b63 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_ref_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_ref_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_t.pass.cpp
index a8cab5ac79649..eea2ad2e46f8a 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun1_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref.pass.cpp
index 62b82afebccff..6675686dde587 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref1.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref1.pass.cpp
index 5cd0a65cca0de..5094c917831c7 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref1.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref1.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref_t.pass.cpp
index a2daa5138b771..9100192ca3533 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_ref_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_t.pass.cpp
index fb7944f6ece4f..458ee72f1fe2e 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/const_mem_fun_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun.pass.cpp
index 93b93355b6c36..2c6cf3f41caec 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1.pass.cpp
index d6db4efc9dffa..845f8b29bdaf1 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_ref_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_ref_t.pass.cpp
index c77a0c0143cff..5d770fb23b528 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_ref_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_ref_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_t.pass.cpp
index 2620e7a8e20f4..7d693b56d92ba 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun1_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref.pass.cpp
index 9a822d816c9a7..39539c3ce2c88 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref1.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref1.pass.cpp
index 74a0f48395a3f..f8e5e3369b598 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref1.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref1.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref_t.pass.cpp
index 68ca30f8a83d0..83252d718e316 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_ref_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_t.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_t.pass.cpp
index 0255845518d78..570e2dc07aa02 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_t.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.adaptors/depr.member.pointer.adaptors/mem_fun_t.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char a1() {return 5;}
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.base/binary_function.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.base/binary_function.pass.cpp
index dbe6d1eb9ef90..1ed25c7d9fd21 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.base/binary_function.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.base/binary_function.pass.cpp
@@ -21,6 +21,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::binary_function<int, unsigned, char>::first_argument_type, int>::value), "");
diff --git a/libcxx/test/std/depr/depr.function.objects/depr.base/unary_function.pass.cpp b/libcxx/test/std/depr/depr.function.objects/depr.base/unary_function.pass.cpp
index 0aaf3fc136c37..fb3f97ddf1aae 100644
--- a/libcxx/test/std/depr/depr.function.objects/depr.base/unary_function.pass.cpp
+++ b/libcxx/test/std/depr/depr.function.objects/depr.base/unary_function.pass.cpp
@@ -20,6 +20,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::unary_function<unsigned, char>::argument_type, unsigned>::value), "");
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.pass.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.pass.cpp
index 01e4120fc33fa..5a99f6be5bddb 100644
--- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.pass.cpp
+++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../test_func.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.pass.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.pass.cpp
index 45ee542b0b8c1..7c9666dbb837f 100644
--- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.pass.cpp
+++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../test_func.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.pass.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.pass.cpp
index c5bc30b08fdf7..54ca8f778aa72 100644
--- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.pass.cpp
+++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.pass.cpp
@@ -29,6 +29,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../test_func.h"
 
 class test
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.pass.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.pass.cpp
index 6c9f6f041ae98..2d7b7adc468fa 100644
--- a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.pass.cpp
+++ b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.pass.cpp
@@ -29,6 +29,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../test_func.h"
 
 class test
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp
index 90288578f1510..9823361b509e5 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp
index 867225bf851d5..0808f45ba52d8 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp
index 106aaa7687a57..681d7b7cf4c0d 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp
index 79a0bfeec2fc7..96f17b0072790 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp
index 68b2ee8c40bb6..80401264f3020 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp
index bae10783837bf..0ee341b226fbe 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp
index 80f5af082d348..93cdbf677ec39 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <strstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::istream, std::istrstream>::value), "");
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp
index 81c84fe321795..64a12ab3620a4 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp
index 5f9e9e6a88ed7..6960f75c59fc5 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostrstream out;
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp
index 4b3412edc880f..f52ff6c13ce2c 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp
index 73a4bb81e2507..080e05000c888 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp
index ff58af45b204e..df38ea977ed37 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp
index 60ec02f341d24..5de41cd72306e 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp
index 5fde850372300..3e753702065bc 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <strstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::ostream, std::ostrstream>::value), "");
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp
index 2387b1eebbe4a..96c2890c2cc07 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp
index 0e278490ab7a7..907985b3de40d 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <cstring>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::strstream inout;
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp
index b4eb17397a626..210e73057f087 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp
index dab7c04a6bbae..f5de291a87974 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp
index 3a7641b23ea69..abf928ffe5aaa 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp
index f3482c756d0d9..59ee9095ec6a1 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp
index 7944b789996ad..3e9705366428b 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <strstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::iostream, std::strstream>::value), "");
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp
index 08cb61914a619..57969fb374ffa 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp
index 19eb15f948ce8..4fe716006c191 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp
index 8cba11faca813..001151c367d78 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp
index 9546b74dbd563..b4efb11c9881f 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp
index d7587c0055e03..025bc0b74c8c9 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int called = 0;
 
 void* my_alloc(std::size_t)
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp
index 2764efd0efda6..a7a3fbcf96f42 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp
index fdd46164bbbea..afe34072cb91f 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp
index 80aafd4481a46..e7b8d6d139cfa 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp
index 4cbb7b80aa729..54587c5941a3f 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp
index b47a34ed8f706..b7f8ad93ac78d 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp
@@ -20,6 +20,8 @@
 #include <string>
 #include <strstream>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   std::ostrstream oss;
   std::string s;
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp
index 1d2f34d924b2f..1bf17d94ed02a 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp
index 04ecb94cbd645..841334b373f37 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp
index 19cb7abf12f9a..b2267a0aac227 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp
index 59fdb640d53a1..1f92b2057da8b 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test
     : public std::strstreambuf
 {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp
index 8e1f22eeb117d..94b1346ed82f3 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp
@@ -16,6 +16,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp
index 9d13dc4aa826b..f7e96ed4020cb 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp
@@ -16,6 +16,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp
index c78004e211918..e663862d39073 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp
index e8da2957c6a96..6aa4b434253b0 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp
@@ -15,6 +15,8 @@
 #include <strstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test
     : public std::strstreambuf
 {
diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp
index 398605af98a5a..bf86406c96f48 100644
--- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp
+++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp
@@ -14,6 +14,8 @@
 #include <strstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::streambuf, std::strstreambuf>::value), "");
diff --git a/libcxx/test/std/depr/exception.unexpected/set.unexpected/get_unexpected.pass.cpp b/libcxx/test/std/depr/exception.unexpected/set.unexpected/get_unexpected.pass.cpp
index 7b11c3037f04b..1120471153482 100644
--- a/libcxx/test/std/depr/exception.unexpected/set.unexpected/get_unexpected.pass.cpp
+++ b/libcxx/test/std/depr/exception.unexpected/set.unexpected/get_unexpected.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/depr/exception.unexpected/set.unexpected/set_unexpected.pass.cpp b/libcxx/test/std/depr/exception.unexpected/set.unexpected/set_unexpected.pass.cpp
index 38ae81ec9b074..f4e36999690ed 100644
--- a/libcxx/test/std/depr/exception.unexpected/set.unexpected/set_unexpected.pass.cpp
+++ b/libcxx/test/std/depr/exception.unexpected/set.unexpected/set_unexpected.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/depr/exception.unexpected/unexpected.handler/unexpected_handler.pass.cpp b/libcxx/test/std/depr/exception.unexpected/unexpected.handler/unexpected_handler.pass.cpp
index e4d4869d77a44..f4ff832ffd220 100644
--- a/libcxx/test/std/depr/exception.unexpected/unexpected.handler/unexpected_handler.pass.cpp
+++ b/libcxx/test/std/depr/exception.unexpected/unexpected.handler/unexpected_handler.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <exception>
 
+#include "test_macros.h"
+
 void f() {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/depr/exception.unexpected/unexpected/unexpected.pass.cpp b/libcxx/test/std/depr/exception.unexpected/unexpected/unexpected.pass.cpp
index 2562b78848710..e3b164eabecab 100644
--- a/libcxx/test/std/depr/exception.unexpected/unexpected/unexpected.pass.cpp
+++ b/libcxx/test/std/depr/exception.unexpected/unexpected/unexpected.pass.cpp
@@ -14,6 +14,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1()
 {
     std::exit(0);
diff --git a/libcxx/test/std/diagnostics/assertions/cassert.pass.cpp b/libcxx/test/std/diagnostics/assertions/cassert.pass.cpp
index a18a4d0df039f..79e17fc9d9fb3 100644
--- a/libcxx/test/std/diagnostics/assertions/cassert.pass.cpp
+++ b/libcxx/test/std/diagnostics/assertions/cassert.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cassert>
 
+#include "test_macros.h"
+
 #ifndef assert
 #error assert not defined
 #endif
diff --git a/libcxx/test/std/diagnostics/errno/cerrno.pass.cpp b/libcxx/test/std/diagnostics/errno/cerrno.pass.cpp
index 452f99e337afa..233d728493838 100644
--- a/libcxx/test/std/diagnostics/errno/cerrno.pass.cpp
+++ b/libcxx/test/std/diagnostics/errno/cerrno.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <cerrno>
 
+#include "test_macros.h"
+
 #ifndef E2BIG
 #error E2BIG not defined
 #endif
diff --git a/libcxx/test/std/diagnostics/std.exceptions/domain.error/domain_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/domain.error/domain_error.pass.cpp
index 31a320d113aef..d01be6a874fc5 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/domain.error/domain_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/domain.error/domain_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::logic_error, std::domain_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/invalid.argument/invalid_argument.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/invalid.argument/invalid_argument.pass.cpp
index 00d9a9296dfa8..a6d78039ab5a9 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/invalid.argument/invalid_argument.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/invalid.argument/invalid_argument.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::logic_error, std::invalid_argument>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/length.error/length_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/length.error/length_error.pass.cpp
index 1e8f1e46c8e2b..3a9096f777ec8 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/length.error/length_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/length.error/length_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::logic_error, std::length_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/logic.error/logic_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/logic.error/logic_error.pass.cpp
index e30b3b9f8ee33..48a401e3b536e 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/logic.error/logic_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/logic.error/logic_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::logic_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/out.of.range/out_of_range.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/out.of.range/out_of_range.pass.cpp
index 01a5b46b4a45f..ab5192de0808c 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/out.of.range/out_of_range.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/out.of.range/out_of_range.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::logic_error, std::out_of_range>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/overflow.error/overflow_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/overflow.error/overflow_error.pass.cpp
index a9e7fb9942298..a5c3b9cad9e81 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/overflow.error/overflow_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/overflow.error/overflow_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::runtime_error, std::overflow_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/range.error/range_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/range.error/range_error.pass.cpp
index 92d1151ff6c1a..7316b1870c63b 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/range.error/range_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/range.error/range_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::runtime_error, std::range_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/runtime.error/runtime_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/runtime.error/runtime_error.pass.cpp
index c4b8eea74c8f6..5bb0fff914a45 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/runtime.error/runtime_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/runtime.error/runtime_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::runtime_error>::value),
diff --git a/libcxx/test/std/diagnostics/std.exceptions/underflow.error/underflow_error.pass.cpp b/libcxx/test/std/diagnostics/std.exceptions/underflow.error/underflow_error.pass.cpp
index 88a3f46fbdc8c..03d7356304caa 100644
--- a/libcxx/test/std/diagnostics/std.exceptions/underflow.error/underflow_error.pass.cpp
+++ b/libcxx/test/std/diagnostics/std.exceptions/underflow.error/underflow_error.pass.cpp
@@ -14,6 +14,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::runtime_error, std::underflow_error>::value),
diff --git a/libcxx/test/std/diagnostics/syserr/errc.pass.cpp b/libcxx/test/std/diagnostics/syserr/errc.pass.cpp
index 201878d2dd58c..6a04a3c4f088d 100644
--- a/libcxx/test/std/diagnostics/syserr/errc.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/errc.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <system_error>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(static_cast<int>(std::errc::address_family_not_supported) == EAFNOSUPPORT, "");
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.compare/eq_error_code_error_code.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.compare/eq_error_code_error_code.pass.cpp
index 0e2dbe552bd72..c63bfcd955a69 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.compare/eq_error_code_error_code.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.compare/eq_error_code_error_code.pass.cpp
@@ -20,6 +20,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::error_code e_code1(5, std::generic_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.derived/message.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.derived/message.pass.cpp
index ed580198efc4c..a899638ce169a 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.derived/message.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.derived/message.pass.cpp
@@ -18,6 +18,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat1 = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/default_ctor.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/default_ctor.pass.cpp
index 185f96e26eb9e..6ea32e8c59e67 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/default_ctor.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/default_ctor.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test1
     : public std::error_category
 {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/eq.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/eq.pass.cpp
index ce09481e32488..411feade96576 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/eq.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/eq.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat1 = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/lt.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/lt.pass.cpp
index db6b3b90814e1..6cce942f63dbc 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/lt.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/lt.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat1 = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/neq.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/neq.pass.cpp
index 2826018d3d57c..65bc81b09cc85 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/neq.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.nonvirtuals/neq.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat1 = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.overview/error_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.overview/error_category.pass.cpp
index 112c3944885fd..5108e89c87149 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.overview/error_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.overview/error_category.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <system_error>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::error_category* p = 0;
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/default_error_condition.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/default_error_condition.pass.cpp
index 07daf6f491279..4c975d1490107 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/default_error_condition.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/default_error_condition.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_error_code_int.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_error_code_int.pass.cpp
index 7682227980ed5..de7599908e509 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_error_code_int.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_error_code_int.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_int_error_condition.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_int_error_condition.pass.cpp
index 7e627d4097a88..9682a9c6d865d 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_int_error_condition.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.virtuals/equivalent_int_error_condition.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::generic_category();
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/ErrorCodeEnum.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/ErrorCodeEnum.pass.cpp
index bfebd012b8d0d..bfd6fd6c8da09 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/ErrorCodeEnum.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/ErrorCodeEnum.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 enum testing
 {
     zero, one, two
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/default.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/default.pass.cpp
index 3a7249edeae74..99f1c0b11bff8 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/default.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::error_code ec;
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/int_error_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/int_error_category.pass.cpp
index 5a2150fbf7ffb..63fac7d6c8a94 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/int_error_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.constructors/int_error_category.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/ErrorCodeEnum.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/ErrorCodeEnum.pass.cpp
index a98e22944568f..0f5aa8b7bcb96 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/ErrorCodeEnum.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/ErrorCodeEnum.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 enum testing
 {
     zero, one, two
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/assign.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/assign.pass.cpp
index 998cfd3545e18..02aee60882dde 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/assign.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/clear.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/clear.pass.cpp
index c4b7eca258a43..14ba9e0a7339f 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/clear.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.modifiers/clear.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/lt.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/lt.pass.cpp
index 9dc37fc342528..97d1de42150ab 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/lt.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/lt.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/make_error_code.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/make_error_code.pass.cpp
index 1f4603f7c9872..9f65a933fd4f0 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/make_error_code.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/make_error_code.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp
index 37e1d817dbbf5..78f1ca7da52ca 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.nonmembers/stream_inserter.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostringstream out;
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/bool.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/bool.pass.cpp
index 11bea5a344e64..a67ddb397f24c 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/bool.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/bool.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/category.pass.cpp
index 16197d934a09f..36d10182d097d 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/category.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_code ec(6, std::generic_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/default_error_condition.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/default_error_condition.pass.cpp
index 4c92b488e2054..2c8f3cd6e9e81 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/default_error_condition.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/default_error_condition.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/message.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/message.pass.cpp
index 513eeae77d5be..e787ef9ee9ea0 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/message.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/message.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_code ec(6, std::generic_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/value.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/value.pass.cpp
index f1fcee4146991..5881777f1b07c 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/value.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcode/syserr.errcode.observers/value.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_code ec(6, std::system_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/ErrorConditionEnum.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/ErrorConditionEnum.pass.cpp
index 63e718dda308b..24aa1e7e76790 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/ErrorConditionEnum.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/ErrorConditionEnum.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/default.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/default.pass.cpp
index f39904a12e8f8..8399fbd1f014e 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/default.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::error_condition ec;
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/int_error_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/int_error_category.pass.cpp
index b30c23f1913fc..3daed1077f03f 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/int_error_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.constructors/int_error_category.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/ErrorConditionEnum.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/ErrorConditionEnum.pass.cpp
index 129e30e3fa319..a263d76a57d72 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/ErrorConditionEnum.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/ErrorConditionEnum.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/assign.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/assign.pass.cpp
index a0e27ee5e7713..18c8b3679c873 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/assign.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/clear.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/clear.pass.cpp
index 5de51aa954a6e..e3cfe60dcf337 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/clear.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.modifiers/clear.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/lt.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/lt.pass.cpp
index f1c24514b60dd..cc1cdf9c04d90 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/lt.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/lt.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/make_error_condition.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/make_error_condition.pass.cpp
index e9e65db5965b5..ec8ce3b3c959d 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/make_error_condition.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.nonmembers/make_error_condition.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/bool.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/bool.pass.cpp
index bccdf5fb0ac73..449d3c263df4a 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/bool.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/bool.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/category.pass.cpp
index f4710a7226c06..18cdedbfb8201 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/category.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_condition ec(6, std::generic_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/message.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/message.pass.cpp
index e533e84ee4bf6..ce75678d64566 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/message.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/message.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_condition ec(6, std::generic_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/value.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/value.pass.cpp
index d78b1787492ba..0cdc327beaba0 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/value.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.errcondition/syserr.errcondition.observers/value.pass.cpp
@@ -15,6 +15,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_condition ec(6, std::system_category());
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
index c127e900a1ce9..7d1510391fcab 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code.pass.cpp
index 3e721c2d2e2ca..c011c5a5afcfb 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::system_error se(static_cast<int>(std::errc::not_a_directory),
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_const_char_pointer.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_const_char_pointer.pass.cpp
index d15ff75bf9c3c..ee679ae4a4a88 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_const_char_pointer.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_const_char_pointer.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::string what_arg("test message");
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_string.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_string.pass.cpp
index 8e2b078005cae..a7df85bcb642f 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_string.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_error_code_string.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::string what_arg("test message");
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category.pass.cpp
index b1ac08e666d3e..153342f68a23f 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::system_error se(static_cast<int>(std::errc::not_a_directory),
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_const_char_pointer.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_const_char_pointer.pass.cpp
index a6d24326783a3..89a4ec3cca2a9 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_const_char_pointer.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_const_char_pointer.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::string what_arg("test message");
diff --git a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_string.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_string.pass.cpp
index 913c675b9a16b..0eb96a41c43a8 100644
--- a/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_string.pass.cpp
+++ b/libcxx/test/std/diagnostics/syserr/syserr.syserr/syserr.syserr.members/ctor_int_error_category_string.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::string what_arg("test message");
diff --git a/libcxx/test/std/experimental/algorithms/alg.search/search.pass.cpp b/libcxx/test/std/experimental/algorithms/alg.search/search.pass.cpp
index 45b1f09729b64..47e041f055b4f 100644
--- a/libcxx/test/std/experimental/algorithms/alg.search/search.pass.cpp
+++ b/libcxx/test/std/experimental/algorithms/alg.search/search.pass.cpp
@@ -20,6 +20,7 @@
 #include <experimental/algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 int searcher_called = 0;
diff --git a/libcxx/test/std/experimental/filesystem/fs.req.namespace/namespace.pass.cpp b/libcxx/test/std/experimental/filesystem/fs.req.namespace/namespace.pass.cpp
index 87086d4295896..c9f89b322ec6d 100644
--- a/libcxx/test/std/experimental/filesystem/fs.req.namespace/namespace.pass.cpp
+++ b/libcxx/test/std/experimental/filesystem/fs.req.namespace/namespace.pass.cpp
@@ -15,6 +15,8 @@
 #include <experimental/filesystem>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   static_assert(std::is_same<
           std::experimental::filesystem::path,
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/default.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/default.pass.cpp
index 35c5e50bbe721..4feda6e22d71f 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/default.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/default.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pass.cpp
index 245b3ddfc7f92..8aad52d57193d 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pred.pass.cpp
index ad90954758e38..5b637210841ed 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/hash.pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/pred.pass.cpp
index a361b90b85267..8dc3d2eddfd50 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore/pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/default.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/default.pass.cpp
index 95426f81c88f5..ed3d5737869d3 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/default.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/default.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pass.cpp
index 151a0f46b4186..7552382f08be0 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pass.cpp
@@ -34,6 +34,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pred.pass.cpp
index bcdaa87c4a490..36ed6575dea4f 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/hash.pred.pass.cpp
@@ -34,6 +34,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/pred.pass.cpp
index 06a93c5d7b711..82d4de2001eff 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.boyer_moore_horspool/pred.pass.cpp
@@ -34,6 +34,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pass.cpp
index b3f6c66387fcb..d5e7b6092ac6f 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pred.pass.cpp
index f1573a2e06728..052ce5b05a0ec 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/default.pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pass.cpp
index 5187569ecf382..a846095741a09 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pass.cpp
@@ -20,6 +20,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pred.pass.cpp b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pred.pass.cpp
index a6996437ec1f3..b2465accd5fc7 100644
--- a/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pred.pass.cpp
+++ b/libcxx/test/std/experimental/func/func.searchers/func.searchers.default/func.searchers.default.creation/make_default_searcher.pred.pass.cpp
@@ -20,6 +20,7 @@
 #include <experimental/functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/experimental/func/header.functional.synop/includes.pass.cpp b/libcxx/test/std/experimental/func/header.functional.synop/includes.pass.cpp
index 4b913c2d780e1..e62c52576f0aa 100644
--- a/libcxx/test/std/experimental/func/header.functional.synop/includes.pass.cpp
+++ b/libcxx/test/std/experimental/func/header.functional.synop/includes.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <experimental/functional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
   std::function<int(int)> x;
diff --git a/libcxx/test/std/experimental/iterator/nothing_to_do.pass.cpp b/libcxx/test/std/experimental/iterator/nothing_to_do.pass.cpp
index 782c1a03ee553..1cd92d047c4c7 100644
--- a/libcxx/test/std/experimental/iterator/nothing_to_do.pass.cpp
+++ b/libcxx/test/std/experimental/iterator/nothing_to_do.pass.cpp
@@ -8,6 +8,8 @@
 
 #include <experimental/iterator>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   return 0;
 }
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/assign.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/assign.pass.cpp
index 2af0b717ea698..b1ebf2ab8042b 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/assign.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/assign.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace coro = std::experimental;
 
 template <class C>
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/construct.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/construct.pass.cpp
index 7832856c1cf64..81096376189ce 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/construct.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.con/construct.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace coro = std::experimental;
 
 template <class C>
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.export/from_address.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.export/from_address.pass.cpp
index 9c4a647e5fc8d..398fe725d6417 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.export/from_address.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.export/from_address.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace coro = std::experimental;
 
 template <class C>
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp
index 27b83ce02e15f..bc16f3f72c56f 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/coroutine.handle.noop/noop_coroutine.pass.cpp
@@ -19,6 +19,8 @@
 #include <cassert>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #if __has_builtin(__builtin_coro_noop)
 
 namespace coro = std::experimental::coroutines_v1;
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/void_handle.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/void_handle.pass.cpp
index 8536e23d4624e..8a0f5206391a8 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/void_handle.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.handle/void_handle.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <experimental/coroutine>
 
+#include "test_macros.h"
+
 namespace coro = std::experimental;
 
 struct A {
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.traits/promise_type.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.traits/promise_type.pass.cpp
index 781f264ffd7ac..95248ac664b78 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.traits/promise_type.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/coroutine.traits/promise_type.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <experimental/coroutine>
 
+#include "test_macros.h"
+
 namespace coro = std::experimental;
 
 template <class T, class = typename T::promise_type>
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/await_result.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/await_result.pass.cpp
index c540ca9fb3537..e181c2517c4ee 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/await_result.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/await_result.pass.cpp
@@ -12,6 +12,8 @@
 #include <experimental/coroutine>
 #include <cassert>
 
+#include "test_macros.h"
+
 using namespace std::experimental;
 
 struct coro_t {
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/bool_await_suspend.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/bool_await_suspend.pass.cpp
index fb15ade8bb2ba..0fc02f5e317f2 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/bool_await_suspend.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/bool_await_suspend.pass.cpp
@@ -15,6 +15,8 @@
 #include <experimental/coroutine>
 #include <cassert>
 
+#include "test_macros.h"
+
 using namespace std::experimental;
 
 struct coro_t {
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp
index a899092d03478..07fcee36965d9 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/expected.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <experimental/coroutine>
 #include <cassert>
+
+#include "test_macros.h"
 using namespace std::experimental;
 
 struct error {};
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/generator.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/generator.pass.cpp
index 84b4deb9ce454..c0ac5ecc0b5c6 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/generator.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/generator.pass.cpp
@@ -16,6 +16,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "coroutine_types.h"
 
 using namespace std::experimental;
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/go.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/go.pass.cpp
index 994bd87d6de44..82174d56cf2dd 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/go.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/go.pass.cpp
@@ -12,6 +12,8 @@
 #include <experimental/coroutine>
 #include <cassert>
 
+#include "test_macros.h"
+
 using namespace std::experimental;
 
 bool cancel = false;
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/multishot_func.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/multishot_func.pass.cpp
index fed97ea2308db..a346497b4377c 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/multishot_func.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/multishot_func.pass.cpp
@@ -12,6 +12,8 @@
 #include <experimental/coroutine>
 #include <cassert>
 
+#include "test_macros.h"
+
 using namespace std::experimental;
 
 // This file tests, multishot, movable std::function like thing using coroutine
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/oneshot_func.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/oneshot_func.pass.cpp
index 4bab2dd3bf2f9..91cc13737524c 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/oneshot_func.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/end.to.end/oneshot_func.pass.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 using namespace std::experimental;
 
 // This file tests, one shot, movable std::function like thing using coroutine
diff --git a/libcxx/test/std/experimental/language.support/support.coroutines/includes.pass.cpp b/libcxx/test/std/experimental/language.support/support.coroutines/includes.pass.cpp
index 440ffa113a13f..7997add41d891 100644
--- a/libcxx/test/std/experimental/language.support/support.coroutines/includes.pass.cpp
+++ b/libcxx/test/std/experimental/language.support/support.coroutines/includes.pass.cpp
@@ -19,6 +19,7 @@ int main(int, char**) {
   // std::nothrow is not implicitly defined by the compiler when the include is
   // missing, unlike other parts of <new>. Therefore we use std::nothrow to
   // test for #include <new>
+
   (void)std::nothrow;
 
   return 0;
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/assign.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/assign.pass.cpp
index 35a98b789ef77..320599d089bb1 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/assign.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/copy.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/copy.pass.cpp
index ce3c9be105db9..c7839dcb6991a 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/copy.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/copy.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp
index 59d234eea1ebf..90efbf46c97bd 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/default.pass.cpp
@@ -20,6 +20,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/memory_resource_convert.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/memory_resource_convert.pass.cpp
index 77748ea122a2c..d0d33b3b0023a 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/memory_resource_convert.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/memory_resource_convert.pass.cpp
@@ -20,6 +20,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/other_alloc.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/other_alloc.pass.cpp
index 21a56f67b5f9e..aeb3a554efa7f 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/other_alloc.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.ctor/other_alloc.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp
index 8b9c976f86d62..8b216e1c5460d 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/equal.pass.cpp
@@ -23,6 +23,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp
index e6489be2c36b7..803e9a0ba69d6 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.eq/not_equal.pass.cpp
@@ -23,6 +23,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp
index 4102fa93aa101..de8a26195d537 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/construct_pair.pass.cpp
@@ -23,6 +23,8 @@
 #include <cstdlib>
 #include "uses_alloc_types.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int constructed = 0;
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp
index 9de134008c46b..5677b868cb9a6 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp
@@ -20,6 +20,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 template <size_t S, size_t Align>
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp
index 2f6853c7848ed..b9b482b21ee2e 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/destroy.pass.cpp
@@ -21,6 +21,8 @@
 #include <cassert>
 #include <cstdlib>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int count = 0;
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp
index 0883b2c5102c3..16f8f6743f7be 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/resource.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp
index 9998e58efba87..e15c3ebd40d07 100644
--- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/select_on_container_copy_construction.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_copy.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_copy.pass.cpp
index 4466b2b4e5155..e8d9c77bcc2d0 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_copy.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_copy.pass.cpp
@@ -19,6 +19,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_move.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_move.pass.cpp
index b6af8515100cb..959ec44049f24 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_move.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/alloc_move.pass.cpp
@@ -19,6 +19,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/default.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/default.pass.cpp
index 53481ab39f215..2885a39a55866 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/default.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.ctor/default.pass.cpp
@@ -21,6 +21,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/do_is_equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/do_is_equal.pass.cpp
index 71c36693c3c77..f9879780398aa 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/do_is_equal.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.mem/do_is_equal.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 using std::size_t;
 namespace ex = std::experimental::pmr;
 
diff --git a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.overview/overview.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.overview/overview.pass.cpp
index 873dfd9391cc8..1691025848c6c 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.overview/overview.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.adaptor/memory.resource.adaptor.overview/overview.pass.cpp
@@ -17,6 +17,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp
index 6b5a3fbd54231..7527ac3420d10 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_deque_synop.pass.cpp
@@ -23,6 +23,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp
index 5104c44dc7291..7fd09e496405c 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_forward_list_synop.pass.cpp
@@ -23,6 +23,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp
index e865e28f3b317..39f9cae67415c 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_list_synop.pass.cpp
@@ -23,6 +23,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp
index bb3016f9315a2..d7b0e5e8e1594 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_map_synop.pass.cpp
@@ -27,6 +27,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp
index d0f62e57f3cd8..751847e0ea2c7 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_regex_synop.pass.cpp
@@ -29,6 +29,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 template <class Iter, class PmrTypedef>
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp
index 3bfd5fd6a2352..ce97287e123e2 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_set_synop.pass.cpp
@@ -27,6 +27,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp
index b3fdbde6d3e92..faf1f85f053b9 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_string_synop.pass.cpp
@@ -30,6 +30,8 @@
 
 #include "constexpr_char_traits.hpp"
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 template <class Char, class PmrTypedef>
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp
index 60a7c5c73666f..6487eb09a09ce 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_map_synop.pass.cpp
@@ -27,6 +27,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 template <class T>
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp
index a090c2efa3bdb..a93256797992c 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_unordered_set_synop.pass.cpp
@@ -27,6 +27,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 template <class T>
diff --git a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp
index 9cedde94b0e25..c85b231d123f6 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.aliases/header_vector_synop.pass.cpp
@@ -23,6 +23,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace pmr = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp
index df32a4dcf64a8..ac2726ef9a7f5 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.global/default_resource.pass.cpp
@@ -33,6 +33,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 using namespace std::experimental::pmr;
 
 int main(int, char**) {
diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp
index ab0fedcb8f72f..9b9c505e6e420 100644
--- a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp
@@ -18,6 +18,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 struct assert_on_compare : public ex::memory_resource
diff --git a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.eq/not_equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.eq/not_equal.pass.cpp
index 037bb1a961377..3f5e35558a2dc 100644
--- a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.eq/not_equal.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.eq/not_equal.pass.cpp
@@ -18,6 +18,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::pmr;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/deallocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/deallocate.pass.cpp
index e8c2d5bfff151..c315508d66f15 100644
--- a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/deallocate.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/deallocate.pass.cpp
@@ -28,6 +28,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 using std::experimental::pmr::memory_resource;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/dtor.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/dtor.pass.cpp
index be5ea2fb008e6..b017c3556624e 100644
--- a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/dtor.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/dtor.pass.cpp
@@ -24,6 +24,8 @@
 
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 using std::experimental::pmr::memory_resource;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/is_equal.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/is_equal.pass.cpp
index f0e517870852a..9f66b8adcb643 100644
--- a/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/is_equal.pass.cpp
+++ b/libcxx/test/std/experimental/memory/memory.resource/memory.resource.public/is_equal.pass.cpp
@@ -26,6 +26,8 @@
 #include <cassert>
 #include "test_memory_resource.hpp"
 
+#include "test_macros.h"
+
 using std::experimental::pmr::memory_resource;
 
 int main(int, char**)
diff --git a/libcxx/test/std/experimental/simd/simd.abi/vector_extension.pass.cpp b/libcxx/test/std/experimental/simd/simd.abi/vector_extension.pass.cpp
index 6bd56d2dda7d4..6e9a16611a90c 100644
--- a/libcxx/test/std/experimental/simd/simd.abi/vector_extension.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.abi/vector_extension.pass.cpp
@@ -15,6 +15,8 @@
 #include <experimental/simd>
 #include <cstdint>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 constexpr inline int reg_width() {
diff --git a/libcxx/test/std/experimental/simd/simd.access/default.pass.cpp b/libcxx/test/std/experimental/simd/simd.access/default.pass.cpp
index 6e6e4fe9e9a2a..c617344a5a15d 100644
--- a/libcxx/test/std/experimental/simd/simd.access/default.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.access/default.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstdint>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 template <typename SimdType>
diff --git a/libcxx/test/std/experimental/simd/simd.casts/simd_cast.pass.cpp b/libcxx/test/std/experimental/simd/simd.casts/simd_cast.pass.cpp
index ec4a32bf9d728..1e7fe02f8da1f 100644
--- a/libcxx/test/std/experimental/simd/simd.casts/simd_cast.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.casts/simd_cast.pass.cpp
@@ -17,6 +17,8 @@
 #include <experimental/simd>
 #include <cstdint>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 static_assert(
diff --git a/libcxx/test/std/experimental/simd/simd.casts/static_simd_cast.pass.cpp b/libcxx/test/std/experimental/simd/simd.casts/static_simd_cast.pass.cpp
index 40922ce1153ed..55258bd59bdf0 100644
--- a/libcxx/test/std/experimental/simd/simd.casts/static_simd_cast.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.casts/static_simd_cast.pass.cpp
@@ -17,6 +17,8 @@
 #include <experimental/simd>
 #include <cstdint>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 static_assert(
diff --git a/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp
index 25dd8a4c53410..cf038e3842786 100644
--- a/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdint>
 #include <experimental/simd>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 template <class T, class... Args>
diff --git a/libcxx/test/std/experimental/simd/simd.cons/default.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/default.pass.cpp
index 6eebe0ec0cf83..3e6200bb17165 100644
--- a/libcxx/test/std/experimental/simd/simd.cons/default.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <cstdint>
 #include <experimental/simd>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 int main(int, char**) {
diff --git a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp
index 19880e9d3015d..500ccce817d8c 100644
--- a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp
@@ -19,6 +19,8 @@
 #include <experimental/simd>
 #include <cstdint>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 template <class T, class... Args>
diff --git a/libcxx/test/std/experimental/simd/simd.traits/abi_for_size.pass.cpp b/libcxx/test/std/experimental/simd/simd.traits/abi_for_size.pass.cpp
index 2bf28dc8a211b..ba4e07cf92821 100644
--- a/libcxx/test/std/experimental/simd/simd.traits/abi_for_size.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.traits/abi_for_size.pass.cpp
@@ -18,6 +18,8 @@
 #include <cstdint>
 #include <experimental/simd>
 
+#include "test_macros.h"
+
 namespace ex = std::experimental::parallelism_v2;
 
 static_assert(std::is_same<typename ex::abi_for_size<int, 4>::type,
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign.pass.cpp
index 078c8c7f0679f..be9879e2526a9 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const& propagate_const::operator=(const propagate_const<U>&)=delete;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_element_type.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_element_type.pass.cpp
index 512b5998138e5..d3f5f03c67ee8 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_element_type.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_element_type.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const& propagate_const::operator=(U&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_propagate_const.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_propagate_const.pass.cpp
index 0da532ab4a283..1fa0dd08c9079 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_propagate_const.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_convertible_propagate_const.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const& operator=(U&& u); // won't bind to propagate_const
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_element_type.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_element_type.pass.cpp
index 896ef9c90b986..2743f5774266a 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_element_type.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/assign_element_type.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const& propagate_const::operator=(U&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign.pass.cpp
index f1546afcc5f28..2d63e3c0fa3b0 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const& propagate_const::operator=(propagate_const<U>&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible.pass.cpp
index dfff0bc9350aa..fcd1342b12cf6 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const& propagate_const::operator=(propagate_const<U>&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible_propagate_const.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible_propagate_const.pass.cpp
index 894910eaa8765..f382e5ec2ec80 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible_propagate_const.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.assignment/move_assign_convertible_propagate_const.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const& operator=(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.explicit.ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.explicit.ctor.pass.cpp
index 24e27cb6b5608..f9d1a81bfbad6 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.explicit.ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.explicit.ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const& operator=(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.non-explicit.ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.non-explicit.ctor.pass.cpp
index 9cb325db00f1b..f05cc696a2fe8 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.non-explicit.ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_element_type.non-explicit.ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.copy_ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.copy_ctor.pass.cpp
index e440245b5e40f..af798180931b0 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.copy_ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.copy_ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const& operator(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.explicit.move_ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.explicit.move_ctor.pass.cpp
index 3166ebe70c1a1..7ea0c1f33fcc5 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.explicit.move_ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.explicit.move_ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.move_ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.move_ctor.pass.cpp
index ea1ac4262f332..277460da859f5 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.move_ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/convertible_propagate_const.move_ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> constexpr propagate_const(propagate_const<_Up>&& pu);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/copy_ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/copy_ctor.pass.cpp
index 7c0558f398f66..bed1f6b212d27 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/copy_ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/copy_ctor.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const(const propagate_const&)=delete;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.explicit.ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.explicit.ctor.pass.cpp
index aee637620a701..3eb078e86137a 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.explicit.ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.explicit.ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const(U&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.non-explicit.ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.non-explicit.ctor.pass.cpp
index 93e50578a97a7..580d33d205769 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.non-explicit.ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/element_type.non-explicit.ctor.pass.cpp
@@ -13,6 +13,7 @@
 // template <class U> propagate_const(U&&);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/move_ctor.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/move_ctor.pass.cpp
index 662a605f49c3e..24fc665af3305 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/move_ctor.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.ctors/move_ctor.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const(propagate_const&&)=default;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/dereference.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/dereference.pass.cpp
index 4e47bac3715c7..261d0cbb2f823 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/dereference.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/dereference.pass.cpp
@@ -13,6 +13,7 @@
 // element_type& propagate_const::operator*();
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/explicit_operator_element_type_ptr.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/explicit_operator_element_type_ptr.pass.cpp
index 6ce5d407c58ef..08126f27cf138 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/explicit_operator_element_type_ptr.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/explicit_operator_element_type_ptr.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const::operator element_type*();
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/get.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/get.pass.cpp
index 1a12c3bdd3d79..79c43255f5377 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/get.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/get.pass.cpp
@@ -13,6 +13,7 @@
 // element_type* propagate_const::get();
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/op_arrow.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/op_arrow.pass.cpp
index bdc6c69373070..ea6fe1645b295 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/op_arrow.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/op_arrow.pass.cpp
@@ -13,6 +13,7 @@
 // element_type* propagate_const::operator->();
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/operator_element_type_ptr.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/operator_element_type_ptr.pass.cpp
index 46244b3917239..176ca7244851f 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/operator_element_type_ptr.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.non-const_observers/operator_element_type_ptr.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const::operator element_type*();
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/dereference.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/dereference.pass.cpp
index 758bca4694ecc..17b7fa64e320f 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/dereference.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/dereference.pass.cpp
@@ -13,6 +13,7 @@
 // const element_type& propagate_const::operator*() const;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/explicit_operator_element_type_ptr.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/explicit_operator_element_type_ptr.pass.cpp
index 1f98f03d2fa44..d1f4bc55d677e 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/explicit_operator_element_type_ptr.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/explicit_operator_element_type_ptr.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const::operator const element_type*() const;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <type_traits>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/get.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/get.pass.cpp
index 71aea68edc21f..ce053fdfd5ce9 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/get.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/get.pass.cpp
@@ -13,6 +13,7 @@
 // const element_type* propagate_const::get() const;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/op_arrow.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/op_arrow.pass.cpp
index dcb1b92455706..f8916aecae4e7 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/op_arrow.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/op_arrow.pass.cpp
@@ -13,6 +13,7 @@
 // const element_type* propagate_const::operator->() const;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/operator_element_type_ptr.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/operator_element_type_ptr.pass.cpp
index bfd295a1fa049..652ef2b853faf 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/operator_element_type_ptr.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/propagate_const.observers/operator_element_type_ptr.pass.cpp
@@ -13,6 +13,7 @@
 // propagate_const::operator const element_type*() const;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/swap.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/swap.pass.cpp
index 03b32dd0ec6e2..930f9469caa4f 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/swap.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.class/swap.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> constexpr void propagate_const::swap(propagate_const<T>& x);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp
index 28ac9c22460c3..6cc5dce42ebf5 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct hash<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/equal_to.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/equal_to.pass.cpp
index 85e400504768a..2374a745863f9 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/equal_to.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/equal_to.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct equal_to<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater.pass.cpp
index ab7b5e9a21d5a..7ba3922ab28cd 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct greater<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater_equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater_equal.pass.cpp
index f30e0e9d22ec7..04564719c7c68 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater_equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/greater_equal.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct greater_equal<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less.pass.cpp
index 75afd95e115f4..4bd7064c1628e 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct less<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less_equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less_equal.pass.cpp
index 4f6523a320f04..929b089445e1f 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less_equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/less_equal.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct less_equal<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/not_equal_to.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/not_equal_to.pass.cpp
index 1c303ae28dba1..273ab12678e34 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/not_equal_to.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.comparison_function_objects/not_equal_to.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> struct not_equal_to<experimental::fundamentals_v2::propagate_const<T>>;
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/equal.pass.cpp
index dafc355a48e43..59b27546cb5a1 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/equal.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator==(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_equal.pass.cpp
index 36f3747303eb6..685a633570e54 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_equal.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator>=(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_than.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_than.pass.cpp
index 6abadb39b238a..9dfad677d8ca6 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_than.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/greater_than.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator>(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_equal.pass.cpp
index 703faed74e56f..ac8319e5b0f74 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_equal.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator<=(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_than.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_than.pass.cpp
index 7481418d38439..0bdc3377a6333 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_than.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/less_than.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator<(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/not_equal.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/not_equal.pass.cpp
index ebfc623155768..0045690df372d 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/not_equal.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/propagate_const.relops/not_equal.pass.cpp
@@ -15,6 +15,7 @@
 // template <class T> constexpr bool operator!=(const propagate_const<T>& x, const T& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/swap.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/swap.pass.cpp
index 84923f78dc1f0..dcb294adee798 100644
--- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/swap.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/swap.pass.cpp
@@ -13,6 +13,7 @@
 // template <class T> constexpr void swap(propagate_const<T>& x, propagate_const<T>& y);
 
 #include <experimental/propagate_const>
+#include "test_macros.h"
 #include "propagate_const_helpers.h"
 #include <cassert>
 
diff --git a/libcxx/test/std/experimental/utilities/utility/utility.erased.type/erased_type.pass.cpp b/libcxx/test/std/experimental/utilities/utility/utility.erased.type/erased_type.pass.cpp
index 36bf4f793b4af..3f2243425d39e 100644
--- a/libcxx/test/std/experimental/utilities/utility/utility.erased.type/erased_type.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/utility/utility.erased.type/erased_type.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/utility>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
   std::experimental::erased_type e;
diff --git a/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp b/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp
index ddf053f2429be..7e27adfab1971 100644
--- a/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp
+++ b/libcxx/test/std/experimental/utilities/utility/utility.synop/includes.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <experimental/utility>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_UTILITY
 #   error "<experimental/utility> must include <utility>"
 #endif
diff --git a/libcxx/test/std/input.output/file.streams/c.files/cinttypes.pass.cpp b/libcxx/test/std/input.output/file.streams/c.files/cinttypes.pass.cpp
index e9e32bb67342b..5a49bb8ba64af 100644
--- a/libcxx/test/std/input.output/file.streams/c.files/cinttypes.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/c.files/cinttypes.pass.cpp
@@ -11,6 +11,8 @@
 #include <cinttypes>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef INT8_MIN
 #error INT8_MIN not defined
 #endif
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/member_swap.pass.cpp
index 9bfcec0a48fa6..c4e67c1f7d262 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/member_swap.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp
index a397e6cc1388b..a6eef7b3a6a45 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/nonmember_swap.pass.cpp
index f23c119af90d5..4b7ec4756197e 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.assign/nonmember_swap.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/default.pass.cpp
index 5efbb03c8a60f..58086904f2379 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp
index 922e514d7f453..b816cc82279ea 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp
index 4f8bacb205d70..4da8f72c159eb 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_path.pass.cpp
@@ -16,6 +16,7 @@
 #include <fstream>
 #include <filesystem>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 namespace fs = std::filesystem;
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_pointer.pass.cpp
index bd662a9f7f825..8b959880209a5 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/open_pointer.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
index 6636a42233242..72d85323893a2 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/overflow.pass.cpp
@@ -17,6 +17,7 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
index 47760a9162774..1e3029a4edcf7 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/underflow.pass.cpp
@@ -18,6 +18,7 @@
 #include <cstddef>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf/types.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf/types.pass.cpp
index 40c010f6f70b5..ad4249f4d8090 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf/types.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_streambuf<char>, std::basic_filebuf<char> >::value), "");
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/member_swap.pass.cpp
index 7c94de924d9dc..d08ca9e04326d 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/member_swap.pass.cpp
@@ -16,6 +16,7 @@
 #include <fstream>
 #include <utility>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 std::pair<std::string, std::string> get_temp_file_names() {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp
index d2cc6ce722f99..a7b78f6984c0a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/nonmember_swap.pass.cpp
index 071ca5d85e734..e222392ffff97 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.assign/nonmember_swap.pass.cpp
@@ -17,6 +17,7 @@
 #include <fstream>
 #include <utility>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
index b38bbb4431f66..ac746783cc9e7 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
index fb639f1c50b78..882a07ffc01d4 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp
index 19a64314ee831..0ff24fe23a7a1 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp
@@ -20,6 +20,7 @@
 #include <fstream>
 #include <filesystem>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 namespace fs = std::filesystem;
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
index 4cade955b9b41..5ecdcc51e286e 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
index 15a2c70239366..d07926ac6ae8a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/string.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/close.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/close.pass.cpp
index 94a06c4670cca..ac708d12e5362 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/close.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/close.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp
index f9de1c7d7623e..27dcc3387fddf 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_path.pass.cpp
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <filesystem>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**) {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_pointer.pass.cpp
index 32f1d0096a301..a406c6afb8493 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_string.pass.cpp
index 90cd5611701bd..854b668e843e2 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/open_string.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/rdbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/rdbuf.pass.cpp
index 0f39fc63b6227..2d86c25eb034d 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.members/rdbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream/types.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream/types.pass.cpp
index 783cfa3a7369f..9274b1854bf85 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/fstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_iostream<char>, std::basic_fstream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/member_swap.pass.cpp
index 17b88140d540e..7832464b8e066 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp
index d5fe0984ee617..ec50ec686bb5f 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/nonmember_swap.pass.cpp
index c4cd592d11e28..587dcc23d2fe6 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.assign/nonmember_swap.pass.cpp
@@ -17,6 +17,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
index 7e76d6f8aef88..e4fffa7f81b93 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
index d8a58ac53ceea..ac19eea943d8d 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp
index 2e92735c816ab..52a367047e228 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp
@@ -21,6 +21,8 @@
 #include <filesystem>
 #include <cassert>
 
+#include "test_macros.h"
+
 namespace fs = std::filesystem;
 
 int main(int, char**) {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
index d44b3be75d15b..577670106cff9 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/pointer.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
index c4e979e30c6fc..c1ab706dd4f4a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/string.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/close.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/close.pass.cpp
index e72bd548763f1..9bd4f723c12dc 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/close.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/close.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp
index ecac4896e34f5..9d30e8ece52c1 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_path.pass.cpp
@@ -20,6 +20,8 @@
 #include <filesystem>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   {
     std::ifstream fs;
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_pointer.pass.cpp
index 50ec53fdc7827..fd2adba4398fc 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_pointer.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_string.pass.cpp
index 155ae0e631d3b..b5fd9fc5522a5 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/open_string.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/rdbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/rdbuf.pass.cpp
index 455d2274e2b7c..c09f76161ef8f 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.members/rdbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream/types.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream/types.pass.cpp
index 620c3960895b4..15a044021f640 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_istream<char>, std::basic_ifstream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/member_swap.pass.cpp
index fcfb94ecf20d1..4917bbf89a0f1 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/member_swap.pass.cpp
@@ -16,6 +16,7 @@
 #include <fstream>
 #include <utility>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 std::pair<std::string, std::string> get_temp_file_names() {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp
index fbc3bf59738c3..4b947af4119e3 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/move_assign.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/nonmember_swap.pass.cpp
index 3cbf508d94e76..1ee265a4ba46e 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.assign/nonmember_swap.pass.cpp
@@ -17,6 +17,7 @@
 #include <fstream>
 #include <utility>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 std::pair<std::string, std::string> get_temp_file_names() {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
index baa4bfe9d8af3..1b0ad8240e0cf 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
index 3a3e11e9114ce..c68809210b49a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/move.pass.cpp
@@ -17,6 +17,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp
index b742f5f6ef71f..4e89f375d3929 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <filesystem>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 namespace fs = std::filesystem;
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
index ce23d5e36f418..a00f763dfa74d 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
index c19c278f457de..bd81f19260871 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/string.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/close.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/close.pass.cpp
index 3ea39985ecbee..21c362a57933a 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/close.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/close.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp
index 2061fa5f33bab..c5315b00ee614 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_path.pass.cpp
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <filesystem>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 namespace fs = std::filesystem;
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_pointer.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_pointer.pass.cpp
index 5bf58814d26eb..eb39065c22d73 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_pointer.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_string.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_string.pass.cpp
index 52db618e1050d..e817bbe677709 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_string.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/open_string.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/rdbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/rdbuf.pass.cpp
index a7b51fb35d69d..dab7e3286f35c 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.members/rdbuf.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <fstream>
 #include <cassert>
+#include "test_macros.h"
 #include "platform_support.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream/types.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream/types.pass.cpp
index 231807195f1c3..0986d5907bac5 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <fstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_ostream<char>, std::basic_ofstream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default.pass.cpp
index 1cb88a3510f68..5d28de35cdf43 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   using namespace fs;
   // Default
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default_const.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default_const.pass.cpp
index 0f681531a0913..39fc5212b6848 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default_const.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/default_const.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   using namespace fs;
   // Default
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp
index 12158349ef868..2cac5470017f5 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp
@@ -24,6 +24,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 #define CHECK_OP(Op) \
   static_assert(std::is_same<decltype(ce. operator Op (ce)), bool>::value, ""); \
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
index f61222bbbb019..2d9d938eb104c 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
@@ -24,6 +24,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_obs_testsuite)
 
 TEST_CASE(signatures) {
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
index 036c879397510..0a0c5ac7a89f5 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
@@ -22,6 +22,8 @@
 #include "filesystem_test_helper.hpp"
 #include "rapid-cxx-test.hpp"
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_obs_testsuite)
 
 TEST_CASE(file_dne) {
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
index e89ccf9ec53b9..207eb6d2d70a3 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
@@ -22,6 +22,8 @@
 #include "filesystem_test_helper.hpp"
 #include "rapid-cxx-test.hpp"
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_obs_testsuite)
 
 TEST_CASE(signatures) {
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
index 8427fd1818164..5da5528ed8b22 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
@@ -22,6 +22,8 @@
 #include "filesystem_test_helper.hpp"
 #include "rapid-cxx-test.hpp"
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_obs_testsuite)
 
 TEST_CASE(signatures) {
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/path.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/path.pass.cpp
index 28bd2752ec0ed..fea35d398facd 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/path.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/path.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 void test_path_method() {
   using namespace fs;
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
index 2a59cdb1822a8..2763c47052f2c 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
@@ -22,6 +22,8 @@
 #include "filesystem_test_helper.hpp"
 #include "rapid-cxx-test.hpp"
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_status_testsuite)
 
 TEST_CASE(test_basic) {
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
index 55821af5eaf0d..bfa16fd57d0ed 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
@@ -22,6 +22,8 @@
 #include "filesystem_test_helper.hpp"
 #include "rapid-cxx-test.hpp"
 
+#include "test_macros.h"
+
 TEST_SUITE(directory_entry_obs_suite)
 
 TEST_CASE(test_signature) {
diff --git a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.cons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.cons.pass.cpp
index 74fdaaf208440..6d5b9e6a9e0d2 100644
--- a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.cons.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.cons.pass.cpp
@@ -21,6 +21,8 @@
 
 #include "test_convertible.hpp"
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
  using namespace fs;
diff --git a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.mods.pass.cpp b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.mods.pass.cpp
index 0ee9f709ba49a..3e7df00d38ab7 100644
--- a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.mods.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.mods.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
   using namespace fs;
diff --git a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.obs.pass.cpp
index ec4863139f487..5583d84b3208b 100644
--- a/libcxx/test/std/input.output/filesystems/class.file_status/file_status.obs.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.file_status/file_status.obs.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
   using namespace fs;
diff --git a/libcxx/test/std/input.output/filesystems/fs.req.namespace/namespace.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.req.namespace/namespace.pass.cpp
index 6586433690106..d895024c53472 100644
--- a/libcxx/test/std/input.output/filesystems/fs.req.namespace/namespace.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.req.namespace/namespace.pass.cpp
@@ -15,6 +15,8 @@
 #include <filesystem>
 #include <type_traits>
 
+#include "test_macros.h"
+
 using namespace std::filesystem;
 
 int main(int, char**) {
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
index cb1f2c68ee5ce..36868adabe2e7 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
@@ -16,6 +16,7 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/get_time.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/get_time.pass.cpp
index ebf62c08e720b..7fd353c4c0a02 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/get_time.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/get_time.pass.cpp
@@ -16,6 +16,7 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
index d924d77bc7d84..5c896892b5a14 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
@@ -16,6 +16,7 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/put_time.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/put_time.pass.cpp
index faa99e0634386..620896788afb9 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/put_time.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/put_time.pass.cpp
@@ -16,6 +16,7 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/member_swap.pass.cpp
index 44b394b8fb013..00e44f9fdea30 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp
index c7918ec8d73de..796b84c82f628 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.assign/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp
index 611a7a670d267..d055a743c0f90 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/streambuf.pass.cpp
index c12abc0e2dcb0..012737fa480de 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/iostream.cons/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/types.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/types.pass.cpp
index 46d7a16aac74c..7985633f94e1d 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/types.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/iostreamclass/types.pass.cpp
@@ -24,6 +24,8 @@
 #include <istream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_istream<char>, std::basic_iostream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/basic_ios.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/basic_ios.pass.cpp
index 704c4997d5290..14f48c02acad2 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/basic_ios.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/basic_ios.pass.cpp
@@ -17,6 +17,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int f_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/ios_base.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/ios_base.pass.cpp
index ec25dc5a7e0e7..d502aa9f48dbe 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/ios_base.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/ios_base.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int f_called = 0;
 
 std::ios_base&
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/istream.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/istream.pass.cpp
index f3829c25ac465..82337fd863d07 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/istream.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.formatted/istream_extractors/istream.pass.cpp
@@ -17,6 +17,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int f_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.rvalue/rvalue.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.rvalue/rvalue.pass.cpp
index 8d0af7347031f..fe4ad9fa741e8 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.rvalue/rvalue.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.rvalue/rvalue.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore_0xff.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore_0xff.pass.cpp
index acf90e560861b..16d5aeacbd76b 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore_0xff.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/ignore_0xff.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int bad=-1;
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/readsome.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/readsome.pass.cpp
index f99752cc1a524..e2fe3788e865f 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/readsome.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/readsome.pass.cpp
@@ -13,6 +13,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg.pass.cpp
index c16a639786365..fc442d4bd2fdc 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg.pass.cpp
@@ -13,6 +13,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp
index 93a7f1912d963..24b5238c50ece 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/seekg_off.pass.cpp
@@ -17,6 +17,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int seekoff_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/tellg.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/tellg.pass.cpp
index 918685b8693ee..32a7787317fcc 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/tellg.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/tellg.pass.cpp
@@ -13,6 +13,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/member_swap.pass.cpp
index dbb2bb69e3687..0e6cdbb74504b 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp
index 455edbf72ac44..eeb6a5fc8e6d9 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.assign/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp
index 4830d04d48c77..0cc03255a730c 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/streambuf.pass.cpp
index 339489dfb5e6d..669c1c9e2da4a 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream.cons/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream_sentry/ctor.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream_sentry/ctor.pass.cpp
index fdebd66fb76ea..f4e84378e31f0 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream_sentry/ctor.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/istream_sentry/ctor.pass.cpp
@@ -16,6 +16,8 @@
 #include <istream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int sync_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream/types.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream/types.pass.cpp
index a5362d94b8cb7..7470adfac159a 100644
--- a/libcxx/test/std/input.output/iostream.format/input.streams/istream/types.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <istream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_ios<char>, std::basic_istream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/member_swap.pass.cpp
index 433d78ead195d..0a8279692c9dc 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp
index 4241b02a36d7a..c8704418f4b00 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.assign/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp
index 811b7fa85664e..86c5192fb9b8a 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/streambuf.pass.cpp
index 78a3a53c74e10..d3c837581cc19 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.cons/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/bool.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/bool.pass.cpp
index a0622b3656755..2ee607b372896 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/bool.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/bool.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/double.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/double.pass.cpp
index 2c83723cbfa00..d68be897a524f 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/double.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/double.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/float.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/float.pass.cpp
index 851086abe4272..e2d5ee6bed198 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/float.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/float.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/int.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/int.pass.cpp
index 7dae78f9def6d..3ca0741e71a3f 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/int.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/int.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long.pass.cpp
index 8f2ec631c03f9..b100c02081ed8 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_double.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_double.pass.cpp
index b0c9950bd7ad5..8b9586556aef0 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_double.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_long.pass.cpp
index d87096f7213db..e1033dc2718a4 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_long.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/long_long.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp
index 6db1b55cca7a1..34dacbc12e142 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minmax_showbase.pass.cpp
@@ -36,6 +36,8 @@
 #include <sstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <typename T>
 static void test(std::ios_base::fmtflags fmt, const char *expected)
 {
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
index c2b188a15f967..730f626267856 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp
@@ -29,6 +29,8 @@
 #include <cstdint>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename T>
 void test_octal(const char *expected)
 {
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/pointer.pass.cpp
index f400f33544bc5..61fd0a804ecd3 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/pointer.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/short.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/short.pass.cpp
index c45d5797b5ece..808d9838ecacd 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/short.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/short.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_int.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_int.pass.cpp
index c24381923d1ac..40664e70c314e 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_int.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_int.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long.pass.cpp
index 03b6396430241..bf4b92627fe18 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long_long.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long_long.pass.cpp
index 3c12f148898ce..edc55e2e27f69 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long_long.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_long_long.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_short.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_short.pass.cpp
index 6cc4c71a094a6..001070a831c05 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_short.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/unsigned_short.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp
index 127c0c7dc5b2b..a04a80c1bad29 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp
index 85edde06d4fe1..95615a9e2082e 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/CharT_pointer.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char.pass.cpp
index 5532a68650b92..9fd9eb0a026b0 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_pointer.pass.cpp
index f6e2445faeb7e..baea963f56086 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_pointer.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp
index f12478e538418..7d9772609a719 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp
index 1b11d8550ae2e..0587f6f3c387f 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/char_to_wide_pointer.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char.pass.cpp
index 26f295ec2d083..7616f23db6f26 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char_pointer.pass.cpp
index 83143521bc56c..1812d06138258 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/signed_char_pointer.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char.pass.cpp
index e45281f866fa4..1264942e7f6a3 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char_pointer.pass.cpp
index 55b429b2dc4b2..ba6d1ea2fb120 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.character/unsigned_char_pointer.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/basic_ios.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/basic_ios.pass.cpp
index 921311f99cee0..8e1e0fdf0f0c8 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/basic_ios.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/basic_ios.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ios_base.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ios_base.pass.cpp
index b10330b7c69a5..de4a17f1ba785 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ios_base.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ios_base.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ostream.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ostream.pass.cpp
index e57e5412c39cb..299878b8cd4a1 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ostream.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/ostream.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/streambuf.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/streambuf.pass.cpp
index d2935ca734a1a..daf5ba102a8d4 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/endl.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/endl.pass.cpp
index 03cd411294b53..71f9e8df02968 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/endl.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/endl.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int sync_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/ends.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/ends.pass.cpp
index 5f18aecf907f5..94135f784652d 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/ends.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/ends.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/flush.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/flush.pass.cpp
index 666a92532f8ab..6fcba87a3c4dc 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/flush.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.manip/flush.pass.cpp
@@ -17,6 +17,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int sync_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.rvalue/CharT_pointer.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.rvalue/CharT_pointer.pass.cpp
index 724593f1acda8..7e571e775b6e4 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.rvalue/CharT_pointer.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.rvalue/CharT_pointer.pass.cpp
@@ -20,6 +20,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 template <class CharT>
 class testbuf
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp.pass.cpp
index 7be006f1060f0..b6fb1741c32ab 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int seekpos_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp2.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp2.pass.cpp
index dc8e5ed5cd3ad..06a6beb523295 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp2.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/seekp2.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int seekoff_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/tellp.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/tellp.pass.cpp
index d9361e83978e2..59210426c084b 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/tellp.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.seeks/tellp.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int seekoff_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/flush.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/flush.pass.cpp
index 15a3b59ea0a37..6d91d71419464 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/flush.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/flush.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int sync_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/put.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/put.pass.cpp
index 79f7d9f9fb8b0..08a701829c1f3 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/put.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/put.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/write.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/write.pass.cpp
index 9ebfdf54b5087..c81c770c1d3d3 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/write.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.unformatted/write.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 class testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream/types.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream/types.pass.cpp
index e0e9cddde410a..8a4a5f5372436 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream/types.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <ostream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_ios<char>, std::basic_ostream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream_sentry/construct.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream_sentry/construct.pass.cpp
index c21776a9aa4ed..c865db75157b9 100644
--- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream_sentry/construct.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream_sentry/construct.pass.cpp
@@ -16,6 +16,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int sync_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp
index b87797da5a1c0..294831e5b021f 100644
--- a/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/quoted.manip/quoted.pass.cpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, class Traits>
 bool is_skipws ( const std::basic_istream<CharT, Traits>& is ) {
     return ( is.flags() & std::ios_base::skipws ) != 0;
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/resetiosflags.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/resetiosflags.pass.cpp
index 637aa4ee317e5..1603d9fb62533 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/resetiosflags.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/resetiosflags.pass.cpp
@@ -15,6 +15,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setbase.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setbase.pass.cpp
index 580ae4d24c3cf..be5edf8bf22fd 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/setbase.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setbase.pass.cpp
@@ -15,6 +15,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setfill.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setfill.pass.cpp
index 4398ff61301cb..68002b9107d5b 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/setfill.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setfill.pass.cpp
@@ -14,6 +14,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setiosflags.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setiosflags.pass.cpp
index ccf605ad7adeb..8b249f46ca101 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/setiosflags.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setiosflags.pass.cpp
@@ -15,6 +15,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setprecision.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setprecision.pass.cpp
index e570faf7cb7be..7063cf3add69c 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/setprecision.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setprecision.pass.cpp
@@ -15,6 +15,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.format/std.manip/setw.pass.cpp b/libcxx/test/std/input.output/iostream.format/std.manip/setw.pass.cpp
index 44aa41e57465f..346d5f4fabc72 100644
--- a/libcxx/test/std/input.output/iostream.format/std.manip/setw.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/std.manip/setw.pass.cpp
@@ -15,6 +15,8 @@
 #include <ostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/iostream.forward/iosfwd.pass.cpp b/libcxx/test/std/input.output/iostream.forward/iosfwd.pass.cpp
index 5c60dcca2869e..3bf880a15edc6 100644
--- a/libcxx/test/std/input.output/iostream.forward/iosfwd.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.forward/iosfwd.pass.cpp
@@ -11,6 +11,8 @@
 #include <iosfwd>
 #include <cwchar>  // for mbstate_t
 
+#include "test_macros.h"
+
 template <class Ptr> void test()
 {
     Ptr p = 0;
diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp
index ef3cbf676aa40..5b01f33bf4b0e 100644
--- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.pass.cpp
@@ -13,6 +13,8 @@
 #include <iostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp
index d28255043b601..0b3672a4585cd 100644
--- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.pass.cpp
@@ -15,6 +15,8 @@
 #include <iostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp
index 97e67fddf81f6..68e3729475016 100644
--- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp
index 44ae085779214..f1d53b773ac12 100644
--- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp
index 0af3f5ee7e521..1683c49fbf6d8 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.pass.cpp
@@ -13,6 +13,8 @@
 #include <iostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp
index 68c1528602b88..c653b2f60678b 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.pass.cpp
@@ -15,6 +15,8 @@
 #include <iostream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp
index ad7e35b515137..f396500890d88 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp
index 5703c61639d32..b6bd1ef4ea18e 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <iostream>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if 0
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.members/state.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.members/state.pass.cpp
index 3938d79804cb6..a793fec615884 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.members/state.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.members/state.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::fpos<int> f;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/addition.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/addition.pass.cpp
index 30bdabc368c68..1a3adac6fe076 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/addition.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/addition.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/ctor_int.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/ctor_int.pass.cpp
index e27c9068784d6..6e8026bcccd87 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/ctor_int.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/ctor_int.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/difference.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/difference.pass.cpp
index 114e382a71f9b..88890562b3506 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/difference.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/difference.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/eq_int.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/eq_int.pass.cpp
index 1b1a5f33d1c6a..c649f6b15e848 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/eq_int.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/eq_int.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/offset.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/offset.pass.cpp
index a8e763f72454f..5bf28f0ae956b 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/offset.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/offset.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/streamsize.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/streamsize.pass.cpp
index 9d9cd79024e92..1ff971b36c78f 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/streamsize.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/streamsize.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::streamoff o(5);
diff --git a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/subtraction.pass.cpp b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/subtraction.pass.cpp
index b38378b7e8cc8..83d03861b393e 100644
--- a/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/subtraction.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/fpos/fpos.operations/subtraction.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::fpos<std::mbstate_t> P;
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags.pass.cpp
index da147bd82f8b1..cc7da710c08a1 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags_fmtflags.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags_fmtflags.pass.cpp
index ed39a54245a55..a57ff68e92265 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags_fmtflags.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/flags_fmtflags.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision.pass.cpp
index f6387c828f253..c90ee67f90e39 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision_streamsize.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision_streamsize.pass.cpp
index 475ddc47f8ac4..ef18319bd1867 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision_streamsize.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/precision_streamsize.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags.pass.cpp
index d8ca9cc8239eb..96d72570f4be7 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags_mask.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags_mask.pass.cpp
index 6793ced7fe039..dfbf1068da3d0 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags_mask.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/setf_fmtflags_mask.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/unsetf_mask.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/unsetf_mask.pass.cpp
index f20acff1347f3..a11fcac60c7b4 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/unsetf_mask.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/unsetf_mask.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width.pass.cpp
index fc2601a456756..9a864ce76bb31 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width_streamsize.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width_streamsize.pass.cpp
index 3b389e5fd223f..189d1495f6515 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width_streamsize.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/fmtflags.state/width_streamsize.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.callback/register_callback.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.callback/register_callback.pass.cpp
index 316d23a528bca..da28695aada49 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.callback/register_callback.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.callback/register_callback.pass.cpp
@@ -19,6 +19,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class test
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.cons/dtor.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.cons/dtor.pass.cpp
index 7c78ea5667d9d..1db5608737850 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.cons/dtor.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.cons/dtor.pass.cpp
@@ -17,6 +17,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/getloc.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/getloc.pass.cpp
index 06b8d83c79ed2..71135adb58ea4 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/getloc.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/getloc.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/imbue.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/imbue.pass.cpp
index ad8898a170f43..46c0b548e0226 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/imbue.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.locales/imbue.pass.cpp
@@ -19,6 +19,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class test
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/iword.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/iword.pass.cpp
index 84eb18370c3fe..336d4091740b3 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/iword.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/iword.pass.cpp
@@ -19,6 +19,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/pword.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/pword.pass.cpp
index c4594615daab6..4b479a90f79bd 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/pword.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/pword.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstdint>
 
+#include "test_macros.h"
+
 class test
     : public std::ios
 {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/xalloc.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/xalloc.pass.cpp
index fa666c26d748d..93fb0849dc0fe 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/xalloc.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.base.storage/xalloc.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int index = std::ios_base::xalloc();
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.members.static/sync_with_stdio.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.members.static/sync_with_stdio.pass.cpp
index cd219971e549d..0ec24b77560e5 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.members.static/sync_with_stdio.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.members.static/sync_with_stdio.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert( std::ios_base::sync_with_stdio(false));
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_char_pointer_error_code.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_char_pointer_error_code.pass.cpp
index 382aeda4f11ea..2cd66c694ecef 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_char_pointer_error_code.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_char_pointer_error_code.pass.cpp
@@ -17,6 +17,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_string_error_code.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_string_error_code.pass.cpp
index 610e6ad1527be..ed222480c84ab 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_string_error_code.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_failure/ctor_string_error_code.pass.cpp
@@ -17,6 +17,8 @@
 #include <system_error>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     // LWG2462 std::ios_base::failure is overspecified
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_fmtflags/fmtflags.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_fmtflags/fmtflags.pass.cpp
index 2eed477b0dd48..bf964cd0cfd7c 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_fmtflags/fmtflags.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_fmtflags/fmtflags.pass.cpp
@@ -32,6 +32,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::ios_base::boolalpha);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_iostate/iostate.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_iostate/iostate.pass.cpp
index 7e982f25f5f43..e60dea7db8fd8 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_iostate/iostate.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_iostate/iostate.pass.cpp
@@ -18,6 +18,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::ios_base::badbit);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_openmode/openmode.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_openmode/openmode.pass.cpp
index ab21f96b7fcb9..268343947e926 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_openmode/openmode.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_openmode/openmode.pass.cpp
@@ -20,6 +20,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::ios_base::app);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_seekdir/seekdir.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_seekdir/seekdir.pass.cpp
index dfa955c7f5538..c1ae4f9e3e8df 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_seekdir/seekdir.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/ios.types/ios_seekdir/seekdir.pass.cpp
@@ -17,6 +17,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::ios_base::beg != std::ios_base::cur);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios.base/nothing_to_do.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios.base/nothing_to_do.pass.cpp
index c4eff25bcb146..09b469211a20c 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios.base/nothing_to_do.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios.base/nothing_to_do.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ios>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.cons/ctor_streambuf.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.cons/ctor_streambuf.pass.cpp
index 01c0d4679bc9f..a81e11cec18bd 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.cons/ctor_streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.cons/ctor_streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill.pass.cpp
index f45c6c8b25c76..b4cfdca50e986 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill_char_type.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill_char_type.pass.cpp
index 1c42a03fb317b..b2d593bbd768d 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill_char_type.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/fill_char_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/imbue.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/imbue.pass.cpp
index ed0df788daa02..579e04ab3b3c3 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/imbue.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/imbue.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/move.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/move.pass.cpp
index 5f99f3db0ae11..a011c1aa6a90b 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/move.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/move.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp
index afab4ec5def5d..5b0c903deec62 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/narrow.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::wios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf.pass.cpp
index f104cada62714..af27d8f9ccc42 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf_streambuf.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf_streambuf.pass.cpp
index 5c4e24a82ebe7..c7ae9656c6757 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf_streambuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/rdbuf_streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/swap.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/swap.pass.cpp
index 40e95bae7047b..a882fb60c8082 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/swap.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/swap.pass.cpp
@@ -19,6 +19,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 struct testbuf
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie.pass.cpp
index c0d7ac173324a..3e76cb3177ca6 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::basic_ios<char> ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie_ostream.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie_ostream.pass.cpp
index 4ce5966add870..57728f6a358c2 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie_ostream.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/tie_ostream.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/widen.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/widen.pass.cpp
index 0ae5637184ceb..0de166e88a424 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/widen.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/basic.ios.members/widen.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/bad.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/bad.pass.cpp
index 2308cfa929f6e..223b82a2eff24 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/bad.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/bad.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/eof.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/eof.pass.cpp
index bf1d0246d4d08..ae9acc9f92d36 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/eof.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/eof.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/exceptions.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/exceptions.pass.cpp
index 4632e004333fd..f5c3c3d96a800 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/exceptions.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/exceptions.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/fail.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/fail.pass.cpp
index 3ae215e45f489..e5f1d91c57b32 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/fail.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/fail.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/good.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/good.pass.cpp
index 19c05edce67b6..ad907192af96e 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/good.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/good.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/not.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/not.pass.cpp
index 20ddb35dccaff..2036699d39805 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/not.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/not.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/rdstate.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/rdstate.pass.cpp
index 37886ac83e2c8..4853dedfdf270 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/rdstate.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/iostate.flags/rdstate.pass.cpp
@@ -15,6 +15,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ios ios(0);
diff --git a/libcxx/test/std/input.output/iostreams.base/ios/types.pass.cpp b/libcxx/test/std/input.output/iostreams.base/ios/types.pass.cpp
index b4a4d7c57d87c..0c4b997715255 100644
--- a/libcxx/test/std/input.output/iostreams.base/ios/types.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/ios/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <ios>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::ios_base, std::basic_ios<char> >::value), "");
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/internal.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/internal.pass.cpp
index fba2e71ca7d3e..a1203bd59e119 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/internal.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/internal.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/left.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/left.pass.cpp
index f89d6b9e78fff..64771c5e5b935 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/left.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/left.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/right.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/right.pass.cpp
index 399d3ba51dcc3..180f60cacb604 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/right.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/adjustfield.manip/right.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/dec.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/dec.pass.cpp
index 98740cdc026b2..878e4b986b918 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/dec.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/dec.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/hex.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/hex.pass.cpp
index 39addcdcf24b5..d8ea0a2e91e75 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/hex.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/hex.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/oct.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/oct.pass.cpp
index 92b2d4ee7efad..cbff40a66ef02 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/oct.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/basefield.manip/oct.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/iostream_category.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/iostream_category.pass.cpp
index e017c632a4d9e..62e52ab3f9b55 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/iostream_category.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/iostream_category.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat1 = std::iostream_category();
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_code.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_code.pass.cpp
index 060b6284cac18..2aaca4cfb1776 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_code.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_code.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_condition.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_condition.pass.cpp
index 3970708bfa023..f4e16f41ab246 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_condition.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/error.reporting/make_error_condition.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/defaultfloat.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/defaultfloat.pass.cpp
index bb8c424a162f4..172a2ccb0eaa3 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/defaultfloat.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/defaultfloat.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/fixed.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/fixed.pass.cpp
index 94cbf1a53d9ce..98b5466cd42bb 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/fixed.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/fixed.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/hexfloat.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/hexfloat.pass.cpp
index c24d7f999d92c..3a2fbbb792a2a 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/hexfloat.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/hexfloat.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/scientific.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/scientific.pass.cpp
index c8a4819447bef..0f5e649e7fdb2 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/scientific.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/floatfield.manip/scientific.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/boolalpha.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/boolalpha.pass.cpp
index 176267dd6fe8e..a52dd4106ea37 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/boolalpha.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/boolalpha.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noboolalpha.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noboolalpha.pass.cpp
index 27d61cee7aa94..92be09a91c8fd 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noboolalpha.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noboolalpha.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowbase.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowbase.pass.cpp
index b730afa88baf9..bbb2ebef121ef 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowbase.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowbase.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpoint.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpoint.pass.cpp
index 0d9f33ea5c1d3..bf7166a4713d5 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpoint.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpoint.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpos.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpos.pass.cpp
index fa54cd647355d..ddf9f7c883584 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpos.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noshowpos.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noskipws.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noskipws.pass.cpp
index 9ee5ea8e61a37..086ff2aabc149 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noskipws.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/noskipws.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nounitbuf.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nounitbuf.pass.cpp
index ce06e12bd1198..ec6d47c5ae720 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nounitbuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nounitbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nouppercase.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nouppercase.pass.cpp
index 8e0554620997f..ce74a6550850f 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nouppercase.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/nouppercase.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showbase.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showbase.pass.cpp
index 7f1338c5dbfd2..531df599322e8 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showbase.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showbase.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpoint.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpoint.pass.cpp
index 03cf312d0f313..e7c90f58a42fa 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpoint.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpoint.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpos.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpos.pass.cpp
index 2fb0d6511549f..dee45800470b5 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpos.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/showpos.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/skipws.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/skipws.pass.cpp
index 2c64cb8dfb158..c6ad7e7d5f0c1 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/skipws.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/skipws.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/unitbuf.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/unitbuf.pass.cpp
index 6acedc6a5c2d4..1c58420546380 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/unitbuf.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/unitbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/uppercase.pass.cpp b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/uppercase.pass.cpp
index e97763fff03c2..1c6433a2fbd4d 100644
--- a/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/uppercase.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/std.ios.manip/fmtflags.manip/uppercase.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct testbuf : public std::streambuf {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/input.output/iostreams.base/stream.types/streamoff.pass.cpp b/libcxx/test/std/input.output/iostreams.base/stream.types/streamoff.pass.cpp
index 20b953d552758..c8c1b4c53b1f0 100644
--- a/libcxx/test/std/input.output/iostreams.base/stream.types/streamoff.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/stream.types/streamoff.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_integral<std::streamoff>::value, "");
diff --git a/libcxx/test/std/input.output/iostreams.base/stream.types/streamsize.pass.cpp b/libcxx/test/std/input.output/iostreams.base/stream.types/streamsize.pass.cpp
index 50fa21e8d52c2..f8dcae55524c2 100644
--- a/libcxx/test/std/input.output/iostreams.base/stream.types/streamsize.pass.cpp
+++ b/libcxx/test/std/input.output/iostreams.base/stream.types/streamsize.pass.cpp
@@ -13,6 +13,8 @@
 #include <ios>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_integral<std::streamsize>::value, "");
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
index 405c72995912e..9c47c074d03bd 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/copy.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/default.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/default.pass.cpp
index 15475d3d68426..eaa1a9b03c94e 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.cons/default.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekoff.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekoff.pass.cpp
index b49fc2094ed95..63fd87c44ce87 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekoff.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekoff.pass.cpp
@@ -17,6 +17,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekpos.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekpos.pass.cpp
index 1095c9148c7bd..cd97e95d4d296 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekpos.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubseekpos.pass.cpp
@@ -17,6 +17,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsetbuf.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsetbuf.pass.cpp
index c0efb1710abf9..553613dde6920 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsetbuf.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsetbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsync.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsync.pass.cpp
index 8d7528ee26194..d4ec2482d02e5 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsync.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.buffer/pubsync.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.locales/locales.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.locales/locales.pass.cpp
index 835944fcf310e..97b20f7019dd3 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.locales/locales.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.locales/locales.pass.cpp
@@ -20,6 +20,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/in_avail.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/in_avail.pass.cpp
index 6d11a8e89690a..f8325c6de9f5e 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/in_avail.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/in_avail.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int showmanyc_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sbumpc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sbumpc.pass.cpp
index 4ac2d6fe547f6..e8e2b1346278a 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sbumpc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sbumpc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int uflow_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetc.pass.cpp
index 8baefb279cf33..c0f07eec0a1b2 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int underflow_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetn.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetn.pass.cpp
index 9088ed846b943..8fb942822d45a 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetn.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/sgetn.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int xsgetn_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/snextc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/snextc.pass.cpp
index 54965bca2fc78..85bd0e4b0be24 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/snextc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.get/snextc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int uflow_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sputbackc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sputbackc.pass.cpp
index 3b63ba3cef9cc..e27833ac8a9d6 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sputbackc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sputbackc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int pbackfail_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sungetc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sungetc.pass.cpp
index 07c1600b0b2f4..540d2687cff53 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sungetc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.pback/sungetc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int pbackfail_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputc.pass.cpp
index 989b61d4e191e..177e91d5b8ddc 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int overflow_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputn.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputn.pass.cpp
index 01bd9d487d830..841a61ea0415d 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputn.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.members/streambuf.pub.put/sputn.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int xsputn_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
index 6109a6aa47168..e16094bacf9ef 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/assign.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
index 2809d63124fbe..d96a60ab41903 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.assign/swap.pass.cpp
@@ -18,6 +18,7 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/gbump.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/gbump.pass.cpp
index 161461dee9521..2091a9d092c8d 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/gbump.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/gbump.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.pass.cpp
index b303465f02da2..8f01ac9edc109 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.get.area/setg.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump.pass.cpp
index e151d32273c25..346951ff527b8 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.pass.cpp
index 6ca36227b53c5..201a65dff0c7c 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/setp.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct test
     : public std::basic_streambuf<CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/showmanyc.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/showmanyc.pass.cpp
index 5a238e8845522..ca5e0b95ac4b8 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/showmanyc.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/showmanyc.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int showmanyc_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/uflow.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/uflow.pass.cpp
index 2f86c3b1a961f..36a2e8510f25f 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/uflow.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/uflow.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int underflow_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/underflow.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/underflow.pass.cpp
index 1d2ce7c5c2352..b2a215736933e 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/underflow.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/underflow.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test
     : public std::basic_streambuf<char>
 {
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/xsgetn.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/xsgetn.pass.cpp
index f5a95821b28c8..db106574da71b 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/xsgetn.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.get/xsgetn.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 struct test
     : public std::basic_streambuf<char>
 {
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.pback/pbackfail.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.pback/pbackfail.pass.cpp
index 217ff8c32fcbb..75a5455123765 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.pback/pbackfail.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.pback/pbackfail.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 int pbackfail_called = 0;
 
 struct test
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/overflow.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/overflow.pass.cpp
index e067088f50b40..b082f4eb1fb6b 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/overflow.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/overflow.pass.cpp
@@ -16,6 +16,8 @@
 #include <streambuf>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test
     : public std::basic_streambuf<char>
 {
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.PR14074.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.PR14074.pass.cpp
index d0b7b1971ac19..3c51bc2af4671 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.PR14074.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.PR14074.pass.cpp
@@ -23,6 +23,7 @@
 #include <fstream>
 #include <sstream>
 #include <string>
+#include "test_macros.h"
 #include "platform_support.h"
 
 
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.pass.cpp
index 1c30c8f2836e4..74ffdc8eab4af 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.virtuals/streambuf.virt.put/xsputn.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 struct test
     : public std::basic_streambuf<char>
 {
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/types.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/types.pass.cpp
index 434f8e3ec12ae..a80c23e4a4b00 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/types.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <streambuf>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::streambuf::char_type, char>::value), "");
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/member_swap.pass.cpp
index d0a5863ed8489..e17bf5a6b1d13 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp
index 5677959438ff2..594e6c8bff22e 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/nonmember_swap.pass.cpp
index b8b00310dbe6a..d16b73fd3effb 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.assign/nonmember_swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
index 414c2269581ec..93b7cdd4376f1 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
index 27eb5a745c818..13a3988cf6be4 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
index 04733d6042f93..a667869f1898b 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.cons/string.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.pass.cpp
index 4c010e2b4346b..4948f80dcb7ec 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/istringstream.members/str.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/istringstream/types.pass.cpp b/libcxx/test/std/input.output/string.streams/istringstream/types.pass.cpp
index 9731964422974..da1b1de7358d8 100644
--- a/libcxx/test/std/input.output/string.streams/istringstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/istringstream/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <sstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_istream<char>, std::basic_istringstream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/member_swap.pass.cpp
index 01cc58ae6ae3b..5991f87e5bf57 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp
index bad3c7316d228..a0c1522c6e05f 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/nonmember_swap.pass.cpp
index d251e6e085f07..f387c74dbfe17 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.assign/nonmember_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
index f772a9040072c..b541fd35f7c22 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
index 3b562bafd52f4..15c4fb5c215ee 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
index 98782dce1f6e2..5ff91b4ac7ddc 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.cons/string.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.pass.cpp
index 56a85159ccae1..e141d4f791ee5 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/ostringstream.members/str.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/ostringstream/types.pass.cpp b/libcxx/test/std/input.output/string.streams/ostringstream/types.pass.cpp
index 0da5f98d46ffd..b29419279a37c 100644
--- a/libcxx/test/std/input.output/string.streams/ostringstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/ostringstream/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <sstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_ostream<char>, std::basic_ostringstream<char> >::value), "");
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/member_swap.pass.cpp
index 6977d3189cf83..d63f077299008 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/move.pass.cpp
index 653edc0d19cb7..bdb3bcabb1610 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/move.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/nonmember_swap.pass.cpp
index 38562fc322057..70324dabd496e 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.assign/nonmember_swap.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
index 836509f2fddbf..f15e75bb2114e 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template<typename CharT>
 struct testbuf
     : std::basic_stringbuf<CharT>
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp
index a3cccd39c87d2..9fb588d1a4304 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/move.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/string.pass.cpp
index de211c80ea1be..409d293734b20 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.cons/string.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp
index 78b572454a5d3..f536ba25ed703 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.members/str.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/overflow.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/overflow.pass.cpp
index c9fdd0abcc9d1..db50944864bbe 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/overflow.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/overflow.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int overflow_called = 0;
 
 template <class CharT>
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/pbackfail.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/pbackfail.pass.cpp
index 458e393c8ec36..0cd24ded18650 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/pbackfail.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/pbackfail.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_stringbuf<CharT>
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekoff.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekoff.pass.cpp
index 1bee5c216a523..a778d03b3f132 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekoff.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekoff.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekpos.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekpos.pass.cpp
index fde91e74ba035..1980292edf59d 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekpos.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/seekpos.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/setbuf.pass.cpp
index f833debe39da4..0347ef37b3168 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/setbuf.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/setbuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/underflow.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/underflow.pass.cpp
index 23b77d229ca80..a3490e20d4317 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/underflow.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/stringbuf.virtuals/underflow.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 struct testbuf
     : public std::basic_stringbuf<CharT>
diff --git a/libcxx/test/std/input.output/string.streams/stringbuf/types.pass.cpp b/libcxx/test/std/input.output/string.streams/stringbuf/types.pass.cpp
index c27db8535991b..cec616947a04a 100644
--- a/libcxx/test/std/input.output/string.streams/stringbuf/types.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringbuf/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <sstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_streambuf<char>, std::basic_stringbuf<char> >::value), "");
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/default.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/default.pass.cpp
index 11cf288fcc9f1..904690894569a 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/default.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp
index 2a73ad9c2157f..6ed18e289016a 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/move2.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/move2.pass.cpp
index 044e62824ca2c..1dc2f873307a1 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/move2.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/move2.pass.cpp
@@ -21,6 +21,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::vector<std::istringstream> vecis;
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/string.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/string.pass.cpp
index 29c81b3be6272..a3659baf6f944 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/string.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/string.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template<typename T>
 struct NoDefaultAllocator : std::allocator<T>
 {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/member_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/member_swap.pass.cpp
index 2e0f4471a1caf..ddd117ea36417 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/member_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp
index c3088679ed642..5d6c549298c39 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/move.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/nonmember_swap.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/nonmember_swap.pass.cpp
index 06a95dcb39415..6304071639a64 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.cons/stringstream.assign/nonmember_swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream.members/str.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream.members/str.pass.cpp
index 1dc655193b8ec..8645e5703e48d 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream.members/str.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream.members/str.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/input.output/string.streams/stringstream/types.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/types.pass.cpp
index 580ccb9a4e6e1..11990e6a57f81 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/types.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <sstream>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::basic_iostream<char>, std::basic_stringstream<char> >::value), "");
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.basic/iterator.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.basic/iterator.pass.cpp
index b5929ca8c1059..49abf67692f66 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.basic/iterator.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.basic/iterator.pass.cpp
@@ -22,6 +22,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 template <class T>
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
index 454080563708b..eea2bff77bf15 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp
@@ -22,6 +22,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
index bd1b02a484df7..1968aa80b66cc 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/distance.pass.cpp
@@ -19,6 +19,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
index 26ec32cab2959..e79b8ffc520f8 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/next.pass.cpp
@@ -16,6 +16,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
index 8faaf3d05f076..eba18a7c9e3de 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp
@@ -14,6 +14,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_pointer.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_pointer.pass.cpp
index 5abf59bf3dea6..b167f58fd5a4d 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_pointer.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_pointer.pass.cpp
@@ -21,6 +21,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_volatile_pointer.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_volatile_pointer.pass.cpp
index 358abb619b140..5bda7e7190dbd 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_volatile_pointer.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/const_volatile_pointer.pass.cpp
@@ -14,6 +14,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/empty.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/empty.pass.cpp
index 35fc877f2067c..b9917e5a51862 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/empty.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/empty.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <iterator>
 
+#include "test_macros.h"
+
 struct not_an_iterator
 {
 };
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iterator.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iterator.pass.cpp
index 1e4d87c757ccb..fc9d5d679b925 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iterator.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/iterator.pass.cpp
@@ -21,6 +21,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 struct test_iterator
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/pointer.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/pointer.pass.cpp
index 6016f6d408815..79deed7b7ba78 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/pointer.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/pointer.pass.cpp
@@ -21,6 +21,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/volatile_pointer.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/volatile_pointer.pass.cpp
index 035360645dc6b..63933cf1b0b07 100644
--- a/libcxx/test/std/iterators/iterator.primitives/iterator.traits/volatile_pointer.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/iterator.traits/volatile_pointer.pass.cpp
@@ -14,6 +14,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/bidirectional_iterator_tag.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/bidirectional_iterator_tag.pass.cpp
index 8380fb6b4e2c9..06a8d7de59acb 100644
--- a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/bidirectional_iterator_tag.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/bidirectional_iterator_tag.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::bidirectional_iterator_tag tag;
diff --git a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/forward_iterator_tag.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/forward_iterator_tag.pass.cpp
index 0afdc3eca0a14..9c30144cf15e6 100644
--- a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/forward_iterator_tag.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/forward_iterator_tag.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::forward_iterator_tag tag;
diff --git a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/input_iterator_tag.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/input_iterator_tag.pass.cpp
index 26de37448d108..59389ee75a2ff 100644
--- a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/input_iterator_tag.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/input_iterator_tag.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::input_iterator_tag tag;
diff --git a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/output_iterator_tag.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/output_iterator_tag.pass.cpp
index 657e6f8ea1847..e6ec3ca30ad0c 100644
--- a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/output_iterator_tag.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/output_iterator_tag.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::output_iterator_tag tag;
diff --git a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/random_access_iterator_tag.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/random_access_iterator_tag.pass.cpp
index 5448a6715738d..2f30b4c8f099e 100644
--- a/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/random_access_iterator_tag.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/std.iterator.tags/random_access_iterator_tag.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::random_access_iterator_tag tag;
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.cons/container.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.cons/container.pass.cpp
index 2aad3fa3f4807..8e1d7d17aa5f2 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.cons/container.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.cons/container.pass.cpp
@@ -16,6 +16,8 @@
 #include <vector>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/post.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/post.pass.cpp
index d36b1ce063078..34d72edc31c7e 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/post.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/post.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/pre.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/pre.pass.cpp
index 512eb1e56b316..551c7be921277 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/pre.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op++/pre.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/lv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/lv_value.pass.cpp
index 2b76a27148e6c..9e8cad2bb387e 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/lv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/lv_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/rv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/rv_value.pass.cpp
index 506b7b6f0005a..7a78b24054ad0 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/rv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op=/rv_value.pass.cpp
@@ -22,6 +22,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op_astrk/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op_astrk/test.pass.cpp
index 460f723a407b6..43b291a8ed7eb 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op_astrk/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.insert.iter.op_astrk/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.inserter/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.inserter/test.pass.cpp
index bd6df64488aab..eda9471b43dbd 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.inserter/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iter.ops/back.inserter/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/types.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/types.pass.cpp
index 470392bc55147..e1cc4974c66f1 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/types.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/back.insert.iterator/types.pass.cpp
@@ -28,6 +28,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "test_macros.h"
+
 template <class C>
 struct find_container
     : private std::back_insert_iterator<C>
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.cons/container.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.cons/container.pass.cpp
index 2ef4ba8799485..22c97bdb63143 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.cons/container.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.cons/container.pass.cpp
@@ -16,6 +16,8 @@
 #include <list>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/post.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/post.pass.cpp
index 7c9b09ffd2aa5..78bd85fbc4937 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/post.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/post.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/pre.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/pre.pass.cpp
index ea5c024105bea..29c8f5552d29d 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/pre.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op++/pre.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/lv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/lv_value.pass.cpp
index 5e1a86d57f0a9..9cb8be5c40184 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/lv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/lv_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/rv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/rv_value.pass.cpp
index 450f395d49a87..88051325746bb 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/rv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op=/rv_value.pass.cpp
@@ -20,6 +20,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op_astrk/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op_astrk/test.pass.cpp
index 3367229b42220..ac7a1fbc6a3c9 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op_astrk/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.insert.iter.op_astrk/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.inserter/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.inserter/test.pass.cpp
index b7436778d1147..2ce42da80928a 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.inserter/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iter.ops/front.inserter/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/types.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/types.pass.cpp
index c65a8e6f68d3a..97139ae2acbed 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/types.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/front.insert.iterator/types.pass.cpp
@@ -29,6 +29,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "test_macros.h"
+
 template <class C>
 struct find_container
     : private std::front_insert_iterator<C>
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.cons/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.cons/test.pass.cpp
index 531dac03c9f16..f84df4d0a27e7 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.cons/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.cons/test.pass.cpp
@@ -16,6 +16,8 @@
 #include <vector>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/post.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/post.pass.cpp
index a3148e2e2a4bd..d2d7311681bc0 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/post.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/post.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/pre.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/pre.pass.cpp
index 99c686095002b..a4bd716d47a28 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/pre.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op++/pre.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/lv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/lv_value.pass.cpp
index fe8260b999f6a..783bf918872f3 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/lv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/lv_value.pass.cpp
@@ -19,6 +19,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c1, typename C::difference_type j,
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/rv_value.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/rv_value.pass.cpp
index 7a5addb1ab026..592560095c03c 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/rv_value.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op=/rv_value.pass.cpp
@@ -23,6 +23,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c1, typename C::difference_type j,
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op_astrk/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op_astrk/test.pass.cpp
index 8ef0383eed27c..c8e92404f5cd7 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op_astrk/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/insert.iter.op_astrk/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/inserter/test.pass.cpp b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/inserter/test.pass.cpp
index e1ee829bce72a..7d499c04432fc 100644
--- a/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/inserter/test.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/insert.iterators/insert.iter.ops/inserter/test.pass.cpp
@@ -17,6 +17,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 template <class C>
 void
 test(C c)
diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iterator/types.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iterator/types.pass.cpp
index 11ac625258c71..0efda120d7894 100644
--- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iterator/types.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iterator/types.pass.cpp
@@ -27,6 +27,7 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class It>
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.cons/istream.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.cons/istream.pass.cpp
index a4c47974daee7..bda4a18c972c3 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.cons/istream.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.cons/istream.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::istringstream inf(" 1 23");
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/arrow.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/arrow.pass.cpp
index 5409cc5955b4c..bcb5e039d8a5c 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/arrow.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/arrow.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     double d_;
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/dereference.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/dereference.pass.cpp
index c99e723b04104..7fda2adffd82e 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/dereference.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/dereference.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::istringstream inf(" 1 23");
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/equal.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/equal.pass.cpp
index 616a3ca38520c..373d5ed0170c1 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/equal.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/equal.pass.cpp
@@ -22,6 +22,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::istringstream inf1(" 1 23");
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/post_increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/post_increment.pass.cpp
index 83d206e719b3b..80c19a89c57ae 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/post_increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/post_increment.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::istringstream inf(" 1 23");
diff --git a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/pre_increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/pre_increment.pass.cpp
index ab61f57f9dc88..563701080789d 100644
--- a/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/pre_increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istream.iterator/istream.iterator.ops/pre_increment.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::istringstream inf(" 1 23");
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/istream.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/istream.pass.cpp
index b51d19a8e5e94..6b52cf5ebcf14 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/istream.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/istream.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/proxy.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/proxy.pass.cpp
index 87afe840d03e0..4fd650637080f 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/proxy.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/proxy.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/streambuf.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/streambuf.pass.cpp
index d92cddde1f155..e851379ca1e55 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/streambuf.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator.cons/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_equal/equal.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_equal/equal.pass.cpp
index 1fcdf7af146d4..95db77e67cf66 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_equal/equal.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_equal/equal.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op!=/not_equal.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op!=/not_equal.pass.cpp
index d4184aa6ce66c..daf325c8d46b3 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op!=/not_equal.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op!=/not_equal.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op++/dereference.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op++/dereference.pass.cpp
index d60302ad40d74..1309c69c719d1 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op++/dereference.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op++/dereference.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op==/equal.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op==/equal.pass.cpp
index 875989f5df568..1c0c88f3e07ea 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op==/equal.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op==/equal.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/post_increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/post_increment.pass.cpp
index e3121494df420..7c52e1bc712a6 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/post_increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/post_increment.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/pre_increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/pre_increment.pass.cpp
index 9d05cbda340d7..da11ca0317234 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/pre_increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_op_astrk/pre_increment.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_proxy/proxy.pass.cpp b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_proxy/proxy.pass.cpp
index 74e1813d512b0..62e7acf01156e 100644
--- a/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_proxy/proxy.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/istreambuf.iterator/istreambuf.iterator_proxy/proxy.pass.cpp
@@ -28,6 +28,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_array.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_array.pass.cpp
index 8d7500cf292d6..2d69d7118a97e 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_array.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_array.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_const.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_const.pass.cpp
index 06b5e7907e806..3cb82a323f294 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_const.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_non_const.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_non_const.pass.cpp
index 75e61d3b41178..6e523dae22877 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_non_const.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/begin_non_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_array.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_array.pass.cpp
index a6721155afd16..cc29b6e122127 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_array.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_array.pass.cpp
@@ -13,6 +13,8 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_const.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_const.pass.cpp
index 78a6affd8a29c..d9975b8c67002 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_const.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_non_const.pass.cpp b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_non_const.pass.cpp
index 9970ec922c384..e8578dc6018dd 100644
--- a/libcxx/test/std/iterators/stream.iterators/iterator.range/end_non_const.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/iterator.range/end_non_const.pass.cpp
@@ -13,6 +13,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int ia[] = {1, 2, 3};
diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/copy.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/copy.pass.cpp
index 491f3bc1cec46..bb1354b0191da 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/copy.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostringstream outf;
diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream.pass.cpp
index 78abcfab677e2..3c3460857d07a 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct MyTraits : std::char_traits<char> {};
 
 typedef std::basic_ostringstream<char, MyTraits> StringStream;
diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream_delim.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream_delim.pass.cpp
index 2c48189f91a88..766e989b0fd9a 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream_delim.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.cons.des/ostream_delim.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct MyTraits : std::char_traits<char> {};
 
diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/dereference.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/dereference.pass.cpp
index 6cb190ab4913c..67d8241dbc39d 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/dereference.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/dereference.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostringstream os;
diff --git a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/increment.pass.cpp
index eedab8115d87c..17169bf7529fd 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostream.iterator/ostream.iterator.ops/increment.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostringstream os;
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/ostream.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/ostream.pass.cpp
index aa6031a8ad8a5..7be77cb404332 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/ostream.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/ostream.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/streambuf.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/streambuf.pass.cpp
index 2c64dc29fdb26..0fda71b6aac85 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/streambuf.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.cons/streambuf.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/assign_c.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/assign_c.pass.cpp
index fe51fba1fd5f8..b33eb5b06cfe0 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/assign_c.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/assign_c.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/deref.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/deref.pass.cpp
index 4904320b0a004..fcd88b94dc371 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/deref.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/deref.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp
index fa67513ad6e28..d2714d1be3730 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/failed.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename Char, typename Traits = std::char_traits<Char> >
 struct my_streambuf : public std::basic_streambuf<Char,Traits> {
     typedef typename std::basic_streambuf<Char,Traits>::int_type  int_type;
diff --git a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/increment.pass.cpp b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/increment.pass.cpp
index 81ae55ae7a677..49f9458f9e437 100644
--- a/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/increment.pass.cpp
+++ b/libcxx/test/std/iterators/stream.iterators/ostreambuf.iterator/ostreambuf.iter.ops/increment.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/language.support/cstdint/cstdint.syn/cstdint.pass.cpp b/libcxx/test/std/language.support/cstdint/cstdint.syn/cstdint.pass.cpp
index ec4afd7f69dba..00c09b774f193 100644
--- a/libcxx/test/std/language.support/cstdint/cstdint.syn/cstdint.pass.cpp
+++ b/libcxx/test/std/language.support/cstdint/cstdint.syn/cstdint.pass.cpp
@@ -18,6 +18,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     // typedef std::int8_t
diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/bad.alloc/bad_alloc.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/bad.alloc/bad_alloc.pass.cpp
index f0b2bd21fec69..a4016bff74139 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/bad.alloc/bad_alloc.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/bad.alloc/bad_alloc.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_alloc>::value),
diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.badlength/bad_array_new_length.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.badlength/bad_array_new_length.pass.cpp
index 35fd130412525..4018383e1a42b 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.badlength/bad_array_new_length.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.badlength/bad_array_new_length.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::bad_alloc, std::bad_array_new_length>::value),
diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.handler/new_handler.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.handler/new_handler.pass.cpp
index b69fe15232c0c..b6fb1582e74cb 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.handler/new_handler.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/new.handler/new_handler.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f() {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
index a9ed3b0cf3687..d20a65d8b6b48 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
@@ -11,6 +11,8 @@
 #include <new>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/set_new_handler.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/set_new_handler.pass.cpp
index cff382b0ee31c..b0becc9efe8c4 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/set_new_handler.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/set_new_handler.pass.cpp
@@ -11,6 +11,8 @@
 #include <new>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new.pass.cpp
index 8256b93185dfd..b857b397fc83e 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new.pass.cpp
@@ -11,6 +11,8 @@
 #include <new>
 #include <cassert>
 
+#include "test_macros.h"
+
 int A_constructed = 0;
 
 struct A
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array.pass.cpp
index 8a78df6bffb30..99a5442b6fb1f 100644
--- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array.pass.cpp
@@ -11,6 +11,8 @@
 #include <new>
 #include <cassert>
 
+#include "test_macros.h"
+
 int A_constructed = 0;
 
 struct A
diff --git a/libcxx/test/std/language.support/support.exception/bad.exception/bad_exception.pass.cpp b/libcxx/test/std/language.support/support.exception/bad.exception/bad_exception.pass.cpp
index e5f4fbe6dcf9c..b6a8f9caa1f6c 100644
--- a/libcxx/test/std/language.support/support.exception/bad.exception/bad_exception.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/bad.exception/bad_exception.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_exception>::value),
diff --git a/libcxx/test/std/language.support/support.exception/except.nested/rethrow_nested.pass.cpp b/libcxx/test/std/language.support/support.exception/except.nested/rethrow_nested.pass.cpp
index 204c3b567e70c..a3bf862f3f1ce 100644
--- a/libcxx/test/std/language.support/support.exception/except.nested/rethrow_nested.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/except.nested/rethrow_nested.pass.cpp
@@ -17,6 +17,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int data_;
diff --git a/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/get_terminate.pass.cpp b/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/get_terminate.pass.cpp
index 851d93bc0c8ae..debc8b42820d9 100644
--- a/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/get_terminate.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/get_terminate.pass.cpp
@@ -12,6 +12,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/set_terminate.pass.cpp b/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/set_terminate.pass.cpp
index e4464b9afc1a7..860eddc7200da 100644
--- a/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/set_terminate.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/exception.terminate/set.terminate/set_terminate.pass.cpp
@@ -12,6 +12,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1() {}
 void f2() {}
 
diff --git a/libcxx/test/std/language.support/support.exception/exception.terminate/terminate.handler/terminate_handler.pass.cpp b/libcxx/test/std/language.support/support.exception/exception.terminate/terminate.handler/terminate_handler.pass.cpp
index 2519f0bc9cebf..748e71ac13ba2 100644
--- a/libcxx/test/std/language.support/support.exception/exception.terminate/terminate.handler/terminate_handler.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/exception.terminate/terminate.handler/terminate_handler.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f() {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/language.support/support.exception/exception.terminate/terminate/terminate.pass.cpp b/libcxx/test/std/language.support/support.exception/exception.terminate/terminate/terminate.pass.cpp
index 4243fb5cadf05..f585c77d84b9c 100644
--- a/libcxx/test/std/language.support/support.exception/exception.terminate/terminate/terminate.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/exception.terminate/terminate/terminate.pass.cpp
@@ -12,6 +12,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 void f1()
 {
     std::exit(0);
diff --git a/libcxx/test/std/language.support/support.exception/exception/exception.pass.cpp b/libcxx/test/std/language.support/support.exception/exception/exception.pass.cpp
index 893a7d5b36314..1a04aca654ad2 100644
--- a/libcxx/test/std/language.support/support.exception/exception/exception.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/exception/exception.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_polymorphic<std::exception>::value,
diff --git a/libcxx/test/std/language.support/support.exception/propagation/current_exception.pass.cpp b/libcxx/test/std/language.support/support.exception/propagation/current_exception.pass.cpp
index c95368163d41e..de924016c4fcc 100644
--- a/libcxx/test/std/language.support/support.exception/propagation/current_exception.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/propagation/current_exception.pass.cpp
@@ -18,6 +18,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int constructed;
diff --git a/libcxx/test/std/language.support/support.exception/propagation/exception_ptr.pass.cpp b/libcxx/test/std/language.support/support.exception/propagation/exception_ptr.pass.cpp
index 164e7774be9e3..5bd11903a6085 100644
--- a/libcxx/test/std/language.support/support.exception/propagation/exception_ptr.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/propagation/exception_ptr.pass.cpp
@@ -15,6 +15,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::exception_ptr p;
diff --git a/libcxx/test/std/language.support/support.exception/propagation/make_exception_ptr.pass.cpp b/libcxx/test/std/language.support/support.exception/propagation/make_exception_ptr.pass.cpp
index b26212fd136b4..8d7f6d0ead589 100644
--- a/libcxx/test/std/language.support/support.exception/propagation/make_exception_ptr.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/propagation/make_exception_ptr.pass.cpp
@@ -14,6 +14,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int constructed;
diff --git a/libcxx/test/std/language.support/support.exception/propagation/rethrow_exception.pass.cpp b/libcxx/test/std/language.support/support.exception/propagation/rethrow_exception.pass.cpp
index 015dbef224f7d..49c30c3be10d1 100644
--- a/libcxx/test/std/language.support/support.exception/propagation/rethrow_exception.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/propagation/rethrow_exception.pass.cpp
@@ -14,6 +14,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int constructed;
diff --git a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exception.pass.cpp b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exception.pass.cpp
index 61cfc8f11c107..004d525521d09 100644
--- a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exception.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exception.pass.cpp
@@ -12,6 +12,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     ~A()
diff --git a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp
index c25e4d2e069ff..c498b5778baef 100644
--- a/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp
+++ b/libcxx/test/std/language.support/support.exception/uncaught/uncaught_exceptions.pass.cpp
@@ -22,6 +22,8 @@
 #include <exception>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Uncaught {
     Uncaught(int depth) : d_(depth) {}
     ~Uncaught() { assert(std::uncaught_exceptions() == d_); }
diff --git a/libcxx/test/std/language.support/support.initlist/include_cxx03.pass.cpp b/libcxx/test/std/language.support/support.initlist/include_cxx03.pass.cpp
index 282636ed05b81..c4643a88b5fea 100644
--- a/libcxx/test/std/language.support/support.initlist/include_cxx03.pass.cpp
+++ b/libcxx/test/std/language.support/support.initlist/include_cxx03.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <initializer_list>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/language.support/support.initlist/types.pass.cpp b/libcxx/test/std/language.support/support.initlist/types.pass.cpp
index 1b48980a2402d..55f2094c41c22 100644
--- a/libcxx/test/std/language.support/support.initlist/types.pass.cpp
+++ b/libcxx/test/std/language.support/support.initlist/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <initializer_list>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/language.support/support.limits/c.limits/climits.pass.cpp b/libcxx/test/std/language.support/support.limits/c.limits/climits.pass.cpp
index d124f7ca28d40..75015eb570182 100644
--- a/libcxx/test/std/language.support/support.limits/c.limits/climits.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/c.limits/climits.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <climits>
 
+#include "test_macros.h"
+
 #ifndef CHAR_BIT
 #error CHAR_BIT not defined
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/denorm.style/check_values.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/denorm.style/check_values.pass.cpp
index adbd102dc2038..484c84425cd49 100644
--- a/libcxx/test/std/language.support/support.limits/limits/denorm.style/check_values.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/denorm.style/check_values.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <limits>
 
+#include "test_macros.h"
+
 typedef char one;
 struct two {one _[2];};
 
diff --git a/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
index b836555afc75b..e4546fee4bd91 100644
--- a/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
@@ -26,6 +26,8 @@
 #include <limits>
 #include <complex>
 
+#include "test_macros.h"
+
 template <class T>
 void test()
 {
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits/default.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits/default.pass.cpp
index 6e258c13a7888..88ca21fbba481 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits/default.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits/default.pass.cpp
@@ -14,6 +14,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     A(int i = 0) : data_(i) {}
diff --git a/libcxx/test/std/language.support/support.limits/limits/round.style/check_values.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/round.style/check_values.pass.cpp
index b1a4e4450a1ee..bb47763f376c8 100644
--- a/libcxx/test/std/language.support/support.limits/limits/round.style/check_values.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/round.style/check_values.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <limits>
 
+#include "test_macros.h"
+
 typedef char one;
 struct two {one _[2];};
 
diff --git a/libcxx/test/std/language.support/support.limits/version.pass.cpp b/libcxx/test/std/language.support/support.limits/version.pass.cpp
index 783af5c728b5a..edcb97678501b 100644
--- a/libcxx/test/std/language.support/support.limits/version.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/version.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <version>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/language.support/support.rtti/bad.cast/bad_cast.pass.cpp b/libcxx/test/std/language.support/support.rtti/bad.cast/bad_cast.pass.cpp
index 23afd223cc40e..5614752a58fde 100644
--- a/libcxx/test/std/language.support/support.rtti/bad.cast/bad_cast.pass.cpp
+++ b/libcxx/test/std/language.support/support.rtti/bad.cast/bad_cast.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_cast>::value),
diff --git a/libcxx/test/std/language.support/support.rtti/bad.typeid/bad_typeid.pass.cpp b/libcxx/test/std/language.support/support.rtti/bad.typeid/bad_typeid.pass.cpp
index 94424bb03ad07..a9097af96c0d1 100644
--- a/libcxx/test/std/language.support/support.rtti/bad.typeid/bad_typeid.pass.cpp
+++ b/libcxx/test/std/language.support/support.rtti/bad.typeid/bad_typeid.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_typeid>::value),
diff --git a/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp b/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp
index 980bfeeccdcb6..99c354b3d5a0b 100644
--- a/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp
+++ b/libcxx/test/std/language.support/support.rtti/type.info/type_info.pass.cpp
@@ -13,6 +13,8 @@
 #include <cstring>
 #include <cassert>
 
+#include "test_macros.h"
+
 bool test_constructor_explicit(std::type_info const&) { return false; }
 bool test_constructor_explicit(std::string const&) { return true; }
 
diff --git a/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp b/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp
index c65f3bbf7900e..4232770b8ffe1 100644
--- a/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp
+++ b/libcxx/test/std/language.support/support.rtti/type.info/type_info_hash.pass.cpp
@@ -12,6 +12,8 @@
 #include <cstring>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::type_info& t1 = typeid(int);
diff --git a/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp b/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp
index c1fa71b0e376e..0e3d8f69e99c2 100644
--- a/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp
+++ b/libcxx/test/std/language.support/support.runtime/csetjmp.pass.cpp
@@ -11,6 +11,8 @@
 #include <csetjmp>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef setjmp
 #error setjmp not defined
 #endif
diff --git a/libcxx/test/std/language.support/support.runtime/csignal.pass.cpp b/libcxx/test/std/language.support/support.runtime/csignal.pass.cpp
index dcfb4f99f5f24..4548d3417ec43 100644
--- a/libcxx/test/std/language.support/support.runtime/csignal.pass.cpp
+++ b/libcxx/test/std/language.support/support.runtime/csignal.pass.cpp
@@ -11,6 +11,8 @@
 #include <csignal>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef SIG_DFL
 #error SIG_DFL not defined
 #endif
diff --git a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
index 461e77c99838e..1d0e9b06a43d2 100644
--- a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
+++ b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <cstdbool>
 
+#include "test_macros.h"
+
 #ifndef __bool_true_false_are_defined
 #error __bool_true_false_are_defined not defined
 #endif
diff --git a/libcxx/test/std/language.support/support.start.term/quick_exit.pass.cpp b/libcxx/test/std/language.support/support.start.term/quick_exit.pass.cpp
index 50d408aa0ba0f..f3e361931d9cc 100644
--- a/libcxx/test/std/language.support/support.start.term/quick_exit.pass.cpp
+++ b/libcxx/test/std/language.support/support.start.term/quick_exit.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <cstdlib>
 
+#include "test_macros.h"
+
 void f() {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/language.support/support.types/null.pass.cpp b/libcxx/test/std/language.support/support.types/null.pass.cpp
index 66ecdbc147c93..d0787ff37d32d 100644
--- a/libcxx/test/std/language.support/support.types/null.pass.cpp
+++ b/libcxx/test/std/language.support/support.types/null.pass.cpp
@@ -8,6 +8,8 @@
 
 #include <cstddef>
 
+#include "test_macros.h"
+
 #ifndef NULL
 #error NULL not defined
 #endif
diff --git a/libcxx/test/std/language.support/support.types/nullptr_t_integral_cast.pass.cpp b/libcxx/test/std/language.support/support.types/nullptr_t_integral_cast.pass.cpp
index b7696df974be1..b21bfc2f0ef75 100644
--- a/libcxx/test/std/language.support/support.types/nullptr_t_integral_cast.pass.cpp
+++ b/libcxx/test/std/language.support/support.types/nullptr_t_integral_cast.pass.cpp
@@ -16,6 +16,8 @@
 #include <cstddef>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ptrdiff_t i = reinterpret_cast<std::ptrdiff_t>(nullptr);
diff --git a/libcxx/test/std/language.support/support.types/ptrdiff_t.pass.cpp b/libcxx/test/std/language.support/support.types/ptrdiff_t.pass.cpp
index de6f7726fe292..518acd77a106c 100644
--- a/libcxx/test/std/language.support/support.types/ptrdiff_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.types/ptrdiff_t.pass.cpp
@@ -9,6 +9,8 @@
 #include <cstddef>
 #include <type_traits>
 
+#include "test_macros.h"
+
 // ptrdiff_t should:
 
 //  1. be in namespace std.
diff --git a/libcxx/test/std/language.support/support.types/size_t.pass.cpp b/libcxx/test/std/language.support/support.types/size_t.pass.cpp
index 5c840457b3701..5bfaebb24f493 100644
--- a/libcxx/test/std/language.support/support.types/size_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.types/size_t.pass.cpp
@@ -9,6 +9,8 @@
 #include <cstddef>
 #include <type_traits>
 
+#include "test_macros.h"
+
 // size_t should:
 
 //  1. be in namespace std.
diff --git a/libcxx/test/std/localization/c.locales/clocale.pass.cpp b/libcxx/test/std/localization/c.locales/clocale.pass.cpp
index d8bd81b2ba3f1..3b53481ccfcd4 100644
--- a/libcxx/test/std/localization/c.locales/clocale.pass.cpp
+++ b/libcxx/test/std/localization/c.locales/clocale.pass.cpp
@@ -11,6 +11,8 @@
 #include <clocale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef _LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS
 
 #ifndef LC_ALL
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
index 1d76fa6094f0e..32c63633ae8bf 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
@@ -30,6 +30,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp
index 40c15d6e9f4ee..eaad7a901bfdb 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp
@@ -20,6 +20,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
index 0b86979f55d2f..cab2f01a5d1f5 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
@@ -23,6 +23,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/types.pass.cpp
index f4dfd652227a6..364783e4c977f 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/types.pass.cpp
@@ -28,6 +28,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/ctor.pass.cpp
index 856074d391ba7..eb8003e826bbd 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/ctor.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 class my_facet
     : public std::collate<C>
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp
index bfbbebe398552..df657da29143c 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp
@@ -16,6 +16,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp
index 07e29b17f6197..b555805a078be 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp
@@ -18,6 +18,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/transform.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/transform.pass.cpp
index 7588a82be4133..0f1bf2296d21c 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/transform.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/transform.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/types.pass.cpp
index 63e2739f4e6ad..1742e6a6d19a5 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/ctype_base.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/ctype_base.pass.cpp
index b7da91b4686cf..525be48e03d30 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/ctype_base.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/ctype_base.pass.cpp
@@ -35,6 +35,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp
index e38af450f7fb7..0f5f8d994a76d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/ctor.pass.cpp
index 4ec37db021a45..740ee8cdd0e1e 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 class my_facet
     : public std::ctype<char>
 {
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_1.pass.cpp
index 562f6c25e6b93..50612a52cde1b 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_many.pass.cpp
index c073a955e78b2..5caf8cfaf3a6b 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/is_many.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_1.pass.cpp
index d2fa02201a3eb..2323735114af6 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_many.pass.cpp
index 4814695403579..18696f3c0e582 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/narrow_many.pass.cpp
@@ -17,6 +17,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_is.pass.cpp
index 043ca679608c0..0efde61aa3632 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_is.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_is.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_not.pass.cpp
index 066a06a7f4c51..4d815f2e121ff 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_not.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/scan_not.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/table.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/table.pass.cpp
index 6a0fea0b1fde4..d35ab1aa727fd 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/table.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/table.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::ctype<char> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_1.pass.cpp
index ddf4fbdb057f6..59dca09965f4d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_many.pass.cpp
index b307d46273550..2474122a1e36c 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/tolower_many.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_1.pass.cpp
index 8b5505910eeb2..e54cef7d00be8 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_many.pass.cpp
index 3d1c453cf3983..7d1796661dbcc 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/toupper_many.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_1.pass.cpp
index 81c3ab6b3c41f..9ed46d29e9c4b 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_many.pass.cpp
index 35d9335c252f8..aff494e387cfa 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.members/widen_many.pass.cpp
@@ -17,6 +17,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp
index 7f46238d68246..c824b1bd28ffe 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::ctype<char> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/types.pass.cpp
index c46dbb3e6f108..8a97a5b65dde8 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char.pass.cpp
index 03d17375e5e52..33de9013378c4 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char.pass.cpp
@@ -16,6 +16,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h"
 
 typedef std::codecvt_byname<char, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t.pass.cpp
index eedf192db2693..f18946cd3e04b 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char16_t.pass.cpp
@@ -16,6 +16,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt_byname<char16_t, char, std::mbstate_t> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t.pass.cpp
index 8e5d70356ab2d..732942e84b606 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_char32_t.pass.cpp
@@ -16,6 +16,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt_byname<char32_t, char, std::mbstate_t> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp
index 5503192ca98b4..8b1ae464f4148 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt.byname/ctor_wchar_t.pass.cpp
@@ -18,6 +18,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/codecvt_base.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/codecvt_base.pass.cpp
index c2e40542e0eac..a77dce636d454 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/codecvt_base.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/codecvt_base.pass.cpp
@@ -17,6 +17,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::codecvt_base::ok == 0);
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char.pass.cpp
index 3f0dc9e7cc191..6a2541bd5c868 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t.pass.cpp
index e2df342d4e6b9..acc191175fa7a 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char16_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 //#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
 
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t.pass.cpp
index 0df7f3515df72..fc41ad00b091c 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_char32_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 //#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
 
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp
index 6917e1b72206e..c5abf8a3b4e18 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/ctor_wchar_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_always_noconv.pass.cpp
index e1741aec05e4b..99938a98bc7dd 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_always_noconv.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_encoding.pass.cpp
index f4614984ad74b..64753df0bcc6b 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_encoding.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_in.pass.cpp
index 2a6a07ef95768..d7dafb675303c 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_in.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_length.pass.cpp
index 038ae971136cf..a6d8f31a764c1 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_max_length.pass.cpp
index bcaa7052876dc..f574244f59153 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_max_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_out.pass.cpp
index cff42b0c05492..f459e83621362 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_out.pass.cpp
@@ -21,6 +21,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_unshift.pass.cpp
index 5b027bae7b5b3..5c1536609f5c1 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_unshift.pass.cpp
@@ -18,6 +18,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char16_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_always_noconv.pass.cpp
index 2b2f136e3b44c..d0a28e29cdc91 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_always_noconv.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_encoding.pass.cpp
index 0d2f35fc2c2a4..35fa9164c1869 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_encoding.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_in.pass.cpp
index eb7c53fcea6a0..5c49a1faa5179 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_in.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_length.pass.cpp
index da853068883b0..c0139f50ac8be 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_max_length.pass.cpp
index f31dba747c78b..25af11cea7925 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_max_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_out.pass.cpp
index 7ed5609436c48..f88443250472c 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_out.pass.cpp
@@ -21,6 +21,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_unshift.pass.cpp
index aaf9a6a20e698..c1112a0a2c7c1 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_unshift.pass.cpp
@@ -18,6 +18,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char32_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_always_noconv.pass.cpp
index c253bbed72f42..a5ef0b46ed7bc 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_always_noconv.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_encoding.pass.cpp
index 79c26add6ab68..ec1fd6dc48b67 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_encoding.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_in.pass.cpp
index 1f2cdb6be4587..0845117a4cc84 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_in.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_length.pass.cpp
index ad45cba5f4342..b19a073afd88a 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_max_length.pass.cpp
index 437e72b94cad4..4346621c809b5 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_max_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_out.pass.cpp
index be266746cdab3..942e295029288 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_out.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_unshift.pass.cpp
index a3d9e3d14bdf3..e71caaac64555 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char_unshift.pass.cpp
@@ -18,6 +18,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<char, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp
index eaae7b6e97bde..49b7f9da99c33 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/utf_sanity_check.pass.cpp
@@ -20,6 +20,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt<char32_t, char, std::mbstate_t> F32_8;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp
index 484b2213d24bd..10ea879ed4bc7 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_always_noconv.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp
index 9c075af137288..e46754e0aa09f 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_encoding.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp
index bec0e6cb0ac75..832aedfff33b8 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_in.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp
index 4fd5d32938662..536bab4871318 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp
index 90d91315197f2..8e58726bf919f 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_max_length.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp
index bc12bdbce646e..90caf5e585078 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_out.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstddef>
 #include <cstring>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp
index e0f7c3c9520e8..bc7246f19b109 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/wchar_t_unshift.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char.pass.cpp
index 455cf03ee7161..f474f40ff57e7 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char.pass.cpp
@@ -24,6 +24,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt<char, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t.pass.cpp
index f52c60f1bfd39..defe65cae20fb 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char16_t.pass.cpp
@@ -24,6 +24,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt<char16_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t.pass.cpp
index c75de419c8675..0c286f1231b1d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_char32_t.pass.cpp
@@ -24,6 +24,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt<char32_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp
index 07e25be718c3e..da367747ba621 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/types_wchar_t.pass.cpp
@@ -24,6 +24,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt<wchar_t, char, std::mbstate_t> F;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
index 3331c5a67d803..77ba7bd9f436d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
@@ -18,6 +18,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
index 6751fd60de0b1..31994fc5eb700 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
@@ -21,6 +21,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp
index 3a6360eb9dee8..d9ca398d3accf 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/mask.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp
index 6d19377250194..6e4b15a48040e 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_1.pass.cpp
@@ -18,6 +18,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp
index 7ab4874fbcd3a..0c2d477fbf253 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/narrow_many.pass.cpp
@@ -20,6 +20,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
index b736dd7862c8d..02388b84bcfeb 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
@@ -21,6 +21,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
index fa7674a8ca085..6bfb6b7a69f29 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
@@ -21,6 +21,7 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
index ab5daa7aa3886..702d84a437f90 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
@@ -17,6 +17,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
index 29403cb10101b..7965239621715 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
index 56304a7558aa6..dd4f80d94da6e 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
@@ -18,6 +18,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
index bfc3bf848ad10..d66b5f1c5a2f3 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/types.pass.cpp
index ce0a0e30ed6cc..9fb4c44c4d826 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/types.pass.cpp
@@ -26,6 +26,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp
index 1dc9b7de80566..1327e7e01bc8f 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <limits.h>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp
index 67a97ba68265d..f1a30dd9605bf 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp
@@ -21,6 +21,7 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp
index f53d4e9645d35..ab7d149fc1e68 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 class my_facet
     : public std::ctype<C>
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp
index 23a1aa9d21a2e..b5c897b95fa35 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp
index d9dd5b58ab461..6f0da92b8fbd1 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/is_many.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp
index 55e1f378b4397..d9732144f265d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp
index 47c2b51887e3f..9709620dc3672 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/narrow_many.pass.cpp
@@ -17,6 +17,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp
index 1891b155b312a..95921a4c06512 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_is.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp
index 40cc8c0fa0dff..6e778d3807b8d 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/scan_not.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <stdio.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp
index 1ae14410c6f9e..e8fbb514559df 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp
index 711343d3c5aa5..29a4dcddc95d4 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/tolower_many.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp
index fbc28a1b87cb8..15e036f739b95 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp
index 963e894a7e12d..431b01a62f780 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/toupper_many.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp
index c2570a3113643..03d753e0b896a 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp
index a43817a25a1c5..de3027f05a2f3 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/locale.ctype.members/widen_many.pass.cpp
@@ -17,6 +17,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp
index 35b5d3282b947..4dd30bbedb8fc 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/ctor.pass.cpp
index ddbbe66694bed..db8cbd0a0f350 100644
--- a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::messages<char> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/locale.messages.members/not_testable.pass.cpp b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/locale.messages.members/not_testable.pass.cpp
index c0166f80f3429..41fb3d8840dae 100644
--- a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/locale.messages.members/not_testable.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/locale.messages.members/not_testable.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 // As far as I can tell, the messages facet is untestable.  I have a best
 // effort implementation in the hopes that in the future I will learn how
 // to test it.
diff --git a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/messages_base.pass.cpp b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/messages_base.pass.cpp
index ce6d70be72884..9fd30c3689ed9 100644
--- a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/messages_base.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/messages_base.pass.cpp
@@ -17,6 +17,8 @@
 #include <locale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::messages_base mb;
diff --git a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/types.pass.cpp
index 436290698cc15..58b785478e04c 100644
--- a/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.messages/locale.messages/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <locale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::messages<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/ctor.pass.cpp
index 360ff3ad61f5f..9b7f6029e613e 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::money_get<char, const char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
index 9fec21f51b96d..2fe66a5c6b790 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
@@ -19,6 +19,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
index c13849804fcdd..2783afc93767b 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp
@@ -31,6 +31,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
index 9b006f55b7a28..f75a0bbb3df88 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp
@@ -25,6 +25,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
index 1b1a471e185c3..5cb20dfe4f71e 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
@@ -19,6 +19,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/types.pass.cpp
index 7bc04801efaa9..ed4baadbdfaf8 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <locale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::money_get<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/ctor.pass.cpp
index 309d26c5d74a9..b3f358d4e5650 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::money_put<char, char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
index d6e4d6cc1d9a8..46135895bc07e 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
@@ -19,6 +19,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
index 36f97b1d8164b..a35922b90ea1c 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp
@@ -31,6 +31,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
index a300ba8475238..d33d40bcebef5 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp
@@ -25,6 +25,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
index cd1ff643f8331..b0f5351746dcf 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
@@ -19,6 +19,7 @@
 #include <ios>
 #include <streambuf>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/types.pass.cpp
index bd797313c85ed..4ac5487b1e959 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <locale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::money_put<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/frac_digits.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/frac_digits.pass.cpp
index 07d78229a4fe1..d022afeb328bc 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/frac_digits.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/frac_digits.pass.cpp
@@ -21,6 +21,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
index 2c2da48642e09..5f1bf96143e47 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp
@@ -26,6 +26,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
index f3f637b60df58..a4b0557c1d7e4 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp
@@ -26,6 +26,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/negative_sign.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/negative_sign.pass.cpp
index 5567fc08eb541..8dfa00a7253e9 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/negative_sign.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/negative_sign.pass.cpp
@@ -21,6 +21,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
index f7d396a662516..3c4cd6d778dde 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp
@@ -26,6 +26,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/positive_sign.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/positive_sign.pass.cpp
index 43dfa2a2f1831..7aac27bf6fb67 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/positive_sign.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/positive_sign.pass.cpp
@@ -21,6 +21,7 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/ctor.pass.cpp
index 3b52f7afc742a..b9cef9fbbc8ca 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/curr_symbol.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/curr_symbol.pass.cpp
index 7b3b75a19c213..9786fc2106416 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/curr_symbol.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/curr_symbol.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/decimal_point.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/decimal_point.pass.cpp
index 34d02032cc935..6af4826c2d88f 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/decimal_point.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/decimal_point.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/frac_digits.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/frac_digits.pass.cpp
index 50365584d2453..5b1404a88115a 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/frac_digits.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/frac_digits.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/grouping.pass.cpp
index 9e12e322044c5..c4423e518f455 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/grouping.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/grouping.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/neg_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/neg_format.pass.cpp
index cb51199095981..69f124570adb6 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/neg_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/neg_format.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/negative_sign.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/negative_sign.pass.cpp
index 6f134e7847e5b..6dab0b34bdad7 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/negative_sign.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/negative_sign.pass.cpp
@@ -20,6 +20,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/pos_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/pos_format.pass.cpp
index bff44a69d88f2..4df85b4177ab5 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/pos_format.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/pos_format.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/positive_sign.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/positive_sign.pass.cpp
index 8686e20051b25..d6db0e96b71de 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/positive_sign.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/positive_sign.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/thousands_sep.pass.cpp
index 42d28d09656ce..2cb8b9ccc1f1e 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/locale.moneypunct.members/thousands_sep.pass.cpp
@@ -19,6 +19,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::moneypunct<char> F;
 
 class Fnf
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/money_base.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/money_base.pass.cpp
index 58f04905a0a66..5ebf0e8caf91d 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/money_base.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/money_base.pass.cpp
@@ -18,6 +18,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::money_base mb; ((void)mb);
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/types.pass.cpp
index 24ddadfb3a7e4..242915b2b4ebd 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct/types.pass.cpp
@@ -26,6 +26,8 @@
 #include <locale>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/ctor.pass.cpp
index 5a3af4b953659..4b8079fc7a4eb 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::num_put<char, char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp
index 79aa68dde8b70..2ca1eeb3775dd 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_bool.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp
index 062d5cfd40491..e8084f8f76abc 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <streambuf>
 #include <cmath>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp
index 1aee8bdfe864a..f9034fbf8179a 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
index 3e71a1dfbfb8b..16c7d61cc9364 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
@@ -23,6 +23,7 @@
 #include <cassert>
 #include <streambuf>
 #include <cmath>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp
index a3c49d36ae6d4..ef9685c32f2d0 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_long.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
index d366c3842bf91..d9406e5b93948 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_pointer.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp
index 420d22fb02869..f7a3f9124e069 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp
index 1ad3065c074ba..fb3126c02deef 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_unsigned_long_long.pass.cpp
@@ -16,6 +16,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/types.pass.cpp
index 6011c75c1fe69..a2617ddadfb7c 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::num_put<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/ctor.pass.cpp
index 096939d08420a..cd18e6960cfd8 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::num_get<char, char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_bool.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_bool.pass.cpp
index b577d96b73c4e..ceef901e59a99 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_bool.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_bool.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
index 3980d488d9635..7b26f7f056925 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp
@@ -23,6 +23,7 @@
 #include <cassert>
 #include <streambuf>
 #include <cmath>
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "hexfloat.h"
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
index 1ac313f7d7a37..37385086636ea 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <streambuf>
 #include <cmath>
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "hexfloat.h"
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
index 13511728678fc..42126260d980c 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <limits>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
index 49e8ae750d9cd..06a27e44cf05b 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <streambuf>
 #include <cmath>
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "hexfloat.h"
 
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_long.pass.cpp
index c3a66a9836aa6..5ffd09de438ca 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_long.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_pointer.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_pointer.pass.cpp
index 23b6ad932d6ac..c6ea2d9c97373 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_pointer.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_pointer.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
index 8a1ee3914f7d4..c7ef8778f831e 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_int.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
index e97c460c7e403..e9e0d514a4d4d 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
index a5d57df05cc77..4090a836fe1d0 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_long_long.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
index 261cac3725f79..699fa76b03d3a 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_unsigned_short.pass.cpp
@@ -17,6 +17,7 @@
 #include <ios>
 #include <cassert>
 #include <streambuf>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::num_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_min_max.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_min_max.pass.cpp
index f267a6cb15267..5a2e081055ba3 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_min_max.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_min_max.pass.cpp
@@ -11,6 +11,8 @@
 #include <cassert>
 #include <iostream>
 
+#include "test_macros.h"
+
 using namespace std;
 
 template <class T>
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/types.pass.cpp
index 42f210d3958d6..ea9ea9fcf0b60 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::num_get<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order.pass.cpp
index 30624ecc8912f..9e5dda43e3331 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order.pass.cpp
@@ -19,6 +19,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp
index d62071b94e280..c3687ae247032 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/date_order_wide.pass.cpp
@@ -19,6 +19,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date.pass.cpp
index e5b5913256d7d..526f4b8e69a7e 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date.pass.cpp
@@ -28,6 +28,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp
index 5506e4cace8d0..c9f6a35016dec 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_date_wide.pass.cpp
@@ -28,6 +28,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
index 5311a85650806..648c079bdf3de 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname.pass.cpp
@@ -20,6 +20,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp
index e45260a6690a3..211c04342e504 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_monthname_wide.pass.cpp
@@ -20,6 +20,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp
index bda40c56d2746..d851574f7631e 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one.pass.cpp
@@ -26,6 +26,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp
index d8715f825d055..de81236503e4d 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_one_wide.pass.cpp
@@ -26,6 +26,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time.pass.cpp
index a007415d03403..2411d211c5f25 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time.pass.cpp
@@ -21,6 +21,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp
index 0e2481d6a84cf..991f73c752aec 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_time_wide.pass.cpp
@@ -21,6 +21,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
index 308b08529eac0..648a01b3cc147 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday.pass.cpp
@@ -24,6 +24,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp
index c52462e37c2bf..719aae3931397 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_weekday_wide.pass.cpp
@@ -24,6 +24,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year.pass.cpp
index 09df423719ab1..7319e8f411fc8 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year.pass.cpp
@@ -20,6 +20,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp
index 78c35e969bdf6..e8a495aebc053 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get.byname/get_year_wide.pass.cpp
@@ -20,6 +20,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/ctor.pass.cpp
index 6fb9899de7b09..babf72e6e3b47 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::time_get<char, const char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/date_order.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/date_order.pass.cpp
index 47b06e9df042d..cbb9686f650cd 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/date_order.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/date_order.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::time_get<char, input_iterator<const char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date.pass.cpp
index 1a1dae754f01d..83d2bcd1d54c9 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp
index 1fe184bea2fca..46f013601f190 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_date_wide.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const wchar_t*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_many.pass.cpp
index 2416f6275d730..b2faf2335695f 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_many.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname.pass.cpp
index 8e61dcca876a1..0d460cc51e48a 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp
index a8eb4864ecb00..840db5bb8b98c 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_monthname_wide.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const wchar_t*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_one.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_one.pass.cpp
index 0c6d9085b57ae..fa67180e71ff9 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_one.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_one.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time.pass.cpp
index ca0227973dd11..380d525ab1429 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp
index 98f2a8570ef59..44cd664c03d7e 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_time_wide.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const wchar_t*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday.pass.cpp
index 16e853a49915c..f5b9ed06222db 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp
index 3b7b4e9d83ba6..210a11aca7064 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_weekday_wide.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const wchar_t*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_year.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_year.pass.cpp
index 8a0b840591442..8fa5d04975d7e 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_year.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/locale.time.get.members/get_year.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef input_iterator<const char*> I;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/time_base.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/time_base.pass.cpp
index c1b509b40d539..e9a309a40716d 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/time_base.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/time_base.pass.cpp
@@ -17,6 +17,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::time_base::dateorder d = std::time_base::no_order;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/types.pass.cpp
index bd74f7da8e615..8d8889551444d 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.get/types.pass.cpp
@@ -27,6 +27,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::time_get<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
index cd70133274ec2..210a9aefa6754 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
@@ -31,6 +31,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 #include "platform_support.h" // locale name macros
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/ctor.pass.cpp
index 9d0ec1ad7183e..733dcce409596 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::time_put<char, char*> F;
 
 class my_facet
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put1.pass.cpp
index 94faa80f1eff3..5018e3d411b6e 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put1.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::time_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp
index a3b6cf5b25cd8..14bfeb7217698 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <locale>
 #include <cassert>
+#include "test_macros.h"
 #include "test_iterators.h"
 
 typedef std::time_put<char, output_iterator<char*> > F;
diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/types.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/types.pass.cpp
index 4361094f4b0c1..8a661a78cb129 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::locale::facet, std::time_put<char> >::value), "");
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/decimal_point.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/decimal_point.pass.cpp
index f8132872d4ca4..b93d1c432d714 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/decimal_point.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/decimal_point.pass.cpp
@@ -18,6 +18,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/ctor.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/ctor.pass.cpp
index e3a10a5c3a3d5..a48ba0efc48bb 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/ctor.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class C>
 class my_facet
     : public std::numpunct<C>
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/decimal_point.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/decimal_point.pass.cpp
index 5322c8a87f1a0..0c25943f0ff3a 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/decimal_point.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/decimal_point.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/falsename.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/falsename.pass.cpp
index d1f20f3bbdb89..c238604bde56d 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/falsename.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/falsename.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/grouping.pass.cpp
index 4c23c51acde3f..ee84fcb5e3e30 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/grouping.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/grouping.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/thousands_sep.pass.cpp
index 2044c98a1df5b..92a32eda115c0 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/thousands_sep.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/thousands_sep.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/truename.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/truename.pass.cpp
index 359c96bef4ee4..e89c95c89903e 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/truename.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/facet.numpunct.members/truename.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/types.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/types.pass.cpp
index 212670af402a2..eb59310ab7e02 100644
--- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/types.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l = std::locale::classic();
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_mode.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_mode.pass.cpp
index def721c4aac92..09a83047e7196 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_mode.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_mode.pass.cpp
@@ -18,6 +18,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::consume_header == 4);
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp
index a66129610fe42..88892fb76414c 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16.pass.cpp
@@ -24,6 +24,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(globalMemCounter.checkOutstandingNewEq(0));
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp
index 6bd37789d3024..7788fad2eac6f 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_always_noconv.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp
index 2e8a1833c7897..c968cf61882c8 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_encoding.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp
index 4ccf933d601ca..139a4b284625c 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp
index 39ecb8f0ea095..caec8ea734713 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp
@@ -22,6 +22,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp
index fa8c3269ad405..29f123c5ebf57 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
index beabf842ec38a..f99f8f4c35724 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp
index 2c37e25781568..261363f563057 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_unshift.pass.cpp
@@ -23,6 +23,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp
index 450f525092946..30cb5571ba744 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8.pass.cpp
@@ -24,6 +24,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(globalMemCounter.checkOutstandingNewEq(0));
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp
index 7d7ba19be8d65..2235e7b275a17 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_always_noconv.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp
index d8e689f62351b..70862e4ccd639 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_encoding.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp
index 611d06305c941..9f162d5461bc6 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp
index 2df1c96035c51..fde91c15d069e 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp
@@ -22,6 +22,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp
index 57e5f5850c6e0..7bd4d8902565a 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
index f8b56bcb6cf6a..96089fbb05435 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp
index a41f997977f9a..6aa021a8d1640 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_unshift.pass.cpp
@@ -23,6 +23,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp
index c7fe09caf1306..6b65978ba706a 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_always_noconv.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp
index 595f7888e1ca6..3b4fa34b2a7fc 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_encoding.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
index 482521032afd8..2ec13bb9834dd 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp
index 33a4b5f04bdaf..2dc4ce5bef706 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_length.pass.cpp
@@ -22,6 +22,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp
index 5d93d929e5686..6459b2eaabbc4 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_max_length.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
index 89908eb7756a8..4b3f02d6550c0 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp
@@ -24,6 +24,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp
index 79b670055d557..ccecf79110381 100644
--- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp
+++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_unshift.pass.cpp
@@ -23,6 +23,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isalnum.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isalnum.pass.cpp
index 2cf9e4b5749a7..f4cb6370cedba 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isalnum.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isalnum.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isalpha.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isalpha.pass.cpp
index 800c26cb918c2..e4e3f43deae01 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isalpha.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isalpha.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/iscntrl.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/iscntrl.pass.cpp
index d5cd4a6b96258..5f4711f46300b 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/iscntrl.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/iscntrl.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isdigit.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isdigit.pass.cpp
index 2e71bd394ac3b..f8a4d32af4980 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isdigit.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isdigit.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isgraph.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isgraph.pass.cpp
index 406b7cbf97367..944b5eb689635 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isgraph.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isgraph.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/islower.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/islower.pass.cpp
index 2fc9ece026d2f..a864d84bec45f 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/islower.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/islower.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isprint.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isprint.pass.cpp
index 36fa16cbc7496..e2f75abbc82d0 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isprint.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isprint.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/ispunct.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/ispunct.pass.cpp
index db11337805c0d..7cbc5066bd736 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/ispunct.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/ispunct.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isspace.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isspace.pass.cpp
index 62e50d2fc25f5..cde85fc42a09b 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isspace.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isspace.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isupper.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isupper.pass.cpp
index c986c7cdddc2c..297207a483bb6 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isupper.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isupper.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/classification/isxdigit.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/classification/isxdigit.pass.cpp
index 245f2ed6033cc..facdb084518fc 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/classification/isxdigit.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/classification/isxdigit.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp
index 7c404288596b0..b29fce578540d 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test_buf
     : public std::wbuffer_convert<std::codecvt_utf8<wchar_t> >
 {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp
index dc4144b159c76..d8e60c0d85ad8 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test_buf
     : public std::wbuffer_convert<std::codecvt_utf8<wchar_t> >
 {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp
index b58d1d8eb7bd4..1d6e34388da0d 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::wbuffer_convert<std::codecvt_utf8<wchar_t> > B;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp
index b50f10cc1714d..33fc9f23bed5b 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp
@@ -22,6 +22,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 class test_codecvt
     : public std::codecvt<wchar_t, char, std::mbstate_t>
 {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp
index 0541dbfb93b43..84a0fdcf1f343 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::wbuffer_convert<std::codecvt_utf8<wchar_t> > B;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp
index e309f3eb65018..2d5564a5bffb1 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp
@@ -15,6 +15,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp
index 6d04935b30112..0b25e139efc37 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct test_buf
     : public std::wbuffer_convert<std::codecvt_utf8<wchar_t> >
 {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/tolower.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/tolower.pass.cpp
index 9885dca53577e..f686e718efdd7 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/tolower.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/tolower.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/toupper.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/toupper.pass.cpp
index 34f675b0592fc..5a0f6d9a91d7d 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/toupper.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.character/toupper.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::locale l;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
index 986b0e31324c4..955bbb09c65d8 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp
@@ -16,6 +16,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp
index 6e2d5ff5e9c03..292777ae4002e 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp
@@ -16,6 +16,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp
index d035c3160eb4d..e8ebb004a09f8 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp
@@ -21,6 +21,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt_utf8<wchar_t> Codecvt;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
index c02f82b51806b..deb4b96cf72d9 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp
@@ -19,6 +19,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp
index a7588dc252293..f6dcd063d7777 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <codecvt>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::codecvt_utf8<wchar_t> Codecvt;
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
index 3736e4dd0e76c..849b99022c9aa 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp
@@ -19,6 +19,8 @@
 #include <codecvt>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT, size_t = sizeof(CharT)>
 struct TestHelper;
 template <class CharT>
diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp
index c2dea9a906717..a73739e0061c7 100644
--- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp
@@ -22,6 +22,8 @@
 #include <locale>
 #include <codecvt>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locales/locale.global.templates/has_facet.pass.cpp b/libcxx/test/std/localization/locales/locale.global.templates/has_facet.pass.cpp
index 66539d1eabaf1..00fc8aea10225 100644
--- a/libcxx/test/std/localization/locales/locale.global.templates/has_facet.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale.global.templates/has_facet.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct my_facet
     : public std::locale::facet
 {
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp
index 369fee4cb1985..969c94542afa3 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/assign.pass.cpp
@@ -18,6 +18,7 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp
index 4f96ab41554b9..62aee240a7f9c 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/copy.pass.cpp
@@ -17,6 +17,7 @@
 #include <new>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 void check(const std::locale& loc)
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp
index 5c1e922cbfe96..614996bb44ec0 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/default.pass.cpp
@@ -15,6 +15,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 #include "count_new.hpp"
 
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp
index 70f2cb92b797e..7064f6fd2e83b 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_char_pointer_cat.pass.cpp
@@ -22,6 +22,7 @@
 #include <cassert>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 void check(const std::locale& loc)
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp
index 35c06ce50400f..cac3fa9c61efb 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_facetptr.pass.cpp
@@ -17,6 +17,7 @@
 #include <cassert>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp
index ba54e85542edd..fb7d5953d6991 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_locale_cat.pass.cpp
@@ -22,6 +22,7 @@
 #include <cassert>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 void check(const std::locale& loc)
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp
index 3cb3aadac97c4..d495036da7357 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/locale_string_cat.pass.cpp
@@ -22,6 +22,7 @@
 #include <cassert>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 
diff --git a/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp
index 55b2f88c2bef2..ee9ddbdca3e80 100644
--- a/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.cons/string.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 void check(const std::locale& loc)
diff --git a/libcxx/test/std/localization/locales/locale/locale.members/name.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/name.pass.cpp
index 96ebdf39147dd..ce354b3ae6493 100644
--- a/libcxx/test/std/localization/locales/locale/locale.members/name.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.members/name.pass.cpp
@@ -15,6 +15,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp
index ea083d1370dab..0710e28c558fb 100644
--- a/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp
@@ -15,6 +15,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/localization/locales/locale/locale.operators/eq.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.operators/eq.pass.cpp
index 1efb487bc8dc3..e87bfba0c44f1 100644
--- a/libcxx/test/std/localization/locales/locale/locale.operators/eq.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.operators/eq.pass.cpp
@@ -15,6 +15,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 int main(int, char**)
diff --git a/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp
index 7594edcc2c633..1ca5364b82d24 100644
--- a/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.statics/classic.pass.cpp
@@ -13,6 +13,8 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
+
 void check(const std::locale& loc)
 {
     assert(std::has_facet<std::collate<char> >(loc));
diff --git a/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp
index 57f55aa1a76d6..4cf3b0b044b3c 100644
--- a/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.statics/global.pass.cpp
@@ -15,6 +15,7 @@
 #include <locale>
 #include <cassert>
 
+#include "test_macros.h"
 #include "platform_support.h" // locale name macros
 
 void check(const std::locale& loc)
diff --git a/libcxx/test/std/localization/locales/locale/locale.types/locale.category/category.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.types/locale.category/category.pass.cpp
index 11c3de2975f3c..b515cf87c5188 100644
--- a/libcxx/test/std/localization/locales/locale/locale.types/locale.category/category.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.types/locale.category/category.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
index 4cba6031aafb6..2c8d054fbc527 100644
--- a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ctgmath>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::complex<double> cd;
diff --git a/libcxx/test/std/numerics/c.math/tgmath_h.pass.cpp b/libcxx/test/std/numerics/c.math/tgmath_h.pass.cpp
index 3fab28b0425d9..50e14ed9ddc9f 100644
--- a/libcxx/test/std/numerics/c.math/tgmath_h.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/tgmath_h.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <tgmath.h>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 
diff --git a/libcxx/test/std/numerics/cfenv/cfenv.syn/cfenv.pass.cpp b/libcxx/test/std/numerics/cfenv/cfenv.syn/cfenv.pass.cpp
index 61378439298d2..054c813d69800 100644
--- a/libcxx/test/std/numerics/cfenv/cfenv.syn/cfenv.pass.cpp
+++ b/libcxx/test/std/numerics/cfenv/cfenv.syn/cfenv.pass.cpp
@@ -13,6 +13,8 @@
 #include <cfenv>
 #include <type_traits>
 
+#include "test_macros.h"
+
 #ifndef FE_DIVBYZERO
 #error FE_DIVBYZERO not defined
 #endif
diff --git a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
index ad1f4c423ac68..cc3f8cd6a9beb 100644
--- a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ccomplex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::complex<double> d;
diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp
index bbc865a5ad668..0152761da67cc 100644
--- a/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp
@@ -16,6 +16,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/conj.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/conj.pass.cpp
index 46bf69aade5af..3367620aaf3be 100644
--- a/libcxx/test/std/numerics/complex.number/cmplx.over/conj.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/cmplx.over/conj.pass.cpp
@@ -18,6 +18,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/norm.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/norm.pass.cpp
index 69a2eada84cd4..8dc6daa93fc5f 100644
--- a/libcxx/test/std/numerics/complex.number/cmplx.over/norm.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/cmplx.over/norm.pass.cpp
@@ -16,6 +16,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp
index 802b9e7735a5b..54a6cba9c0011 100644
--- a/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp
@@ -24,6 +24,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/proj.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/proj.pass.cpp
index 41b82b0d7b953..a395f6d6bdd9b 100644
--- a/libcxx/test/std/numerics/complex.number/cmplx.over/proj.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/cmplx.over/proj.pass.cpp
@@ -18,6 +18,7 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.literals/literals1.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.literals/literals1.pass.cpp
index ba9532a5ee321..6b3e916ba2d5e 100644
--- a/libcxx/test/std/numerics/complex.number/complex.literals/literals1.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.literals/literals1.pass.cpp
@@ -13,6 +13,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals;
diff --git a/libcxx/test/std/numerics/complex.number/complex.literals/literals2.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.literals/literals2.pass.cpp
index 0b8d2f9cb3fd6..90239bd3bc933 100644
--- a/libcxx/test/std/numerics/complex.number/complex.literals/literals2.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.literals/literals2.pass.cpp
@@ -13,6 +13,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std;
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_complex.pass.cpp
index d0ccb14f156cd..e6522c0628cbf 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_complex.pass.cpp
@@ -14,6 +14,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T, class X>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_scalar.pass.cpp
index faab37ea4fdcf..176c3c7591c3d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/assignment_scalar.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_complex.pass.cpp
index 052c2dceeca09..989177b25bff9 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_complex.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_scalar.pass.cpp
index 63d34b0512eeb..100cd34f1064c 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/divide_equal_scalar.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_complex.pass.cpp
index 09cde61241bd6..b47793d09f0a5 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_complex.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_scalar.pass.cpp
index ae5b07157def4..6fccd8bcb9fe5 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/minus_equal_scalar.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_complex.pass.cpp
index 0c86b6750d31b..43357a80d6234 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_complex.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_scalar.pass.cpp
index 498724a36270f..78bf371eeb28d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/plus_equal_scalar.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_complex.pass.cpp
index fc690072bfa17..aabe9229cb52a 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_complex.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_scalar.pass.cpp
index 6cb95ea660a26..600c79f8d2100 100644
--- a/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.member.ops/times_equal_scalar.pass.cpp
@@ -13,6 +13,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_complex.pass.cpp
index 5166fa57ff14d..363b0189ed1cd 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_complex.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_scalar.pass.cpp
index e7a1d81cf6261..24a0e8cb61a38 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_divide_scalar.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const T& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_complex.pass.cpp
index 999a2c91dc0e7..f96cc47a470fb 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_scalar.pass.cpp
index 9aea6819c732a..3002085968517 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_minus_scalar.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const T& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_complex.pass.cpp
index 5a2fdcfb0da67..963b026f26990 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_scalar.pass.cpp
index 4f9dfb1d41667..f4d6b6b9ac569 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_plus_scalar.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const T& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_complex.pass.cpp
index f2203d4db93df..bcf85a9aabb83 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_complex.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_scalar.pass.cpp
index 9fface6b75f7b..897d2c290f2d7 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/complex_times_scalar.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& lhs, const T& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_divide_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_divide_complex.pass.cpp
index 01b706dd7b2ab..2b72a1a78d07e 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_divide_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_divide_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const T& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_minus_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_minus_complex.pass.cpp
index 006572492731e..9cd9207c0323e 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_minus_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_minus_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const T& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_plus_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_plus_complex.pass.cpp
index d8fc8a6d95841..6f8e0495b475c 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_plus_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_plus_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const T& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_times_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_times_complex.pass.cpp
index a33347db06a27..3ab8be29f88e6 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/scalar_times_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/scalar_times_complex.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const T& lhs, const std::complex<T>& rhs, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp
index 4f33b97ebc8fc..5c78fe98dd13d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/stream_input.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp
index 2f1fa91e8dcd0..9e7049ea93bde 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/stream_output.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::complex<double> c(1, 2);
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/unary_minus.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/unary_minus.pass.cpp
index 0249240e830e9..a61218323a616 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/unary_minus.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/unary_minus.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.ops/unary_plus.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.ops/unary_plus.pass.cpp
index c5c2b6de12dfb..4c693a581a492 100644
--- a/libcxx/test/std/numerics/complex.number/complex.ops/unary_plus.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.ops/unary_plus.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp
index ecb6696890519..3158a3bc33d1c 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp
index 4a22dde0287d0..424a3b1b82e1a 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp
index 91ec6e9bdabf9..51da1c002a294 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp
index 18ac1f17a83f8..b53509242c378 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp
index 1816e2f99b036..f0c801649509d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp
index 5e4bb13f5afbf..a126032bf8c24 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/cos.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/cos.pass.cpp
index 2085a4c85c616..0571363de50c3 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/cos.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/cos.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/cosh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/cosh.pass.cpp
index e95c2968de6c2..ad437bf44b996 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/cosh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/cosh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/exp.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/exp.pass.cpp
index fc638d135becd..9abeb319cfc9c 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/exp.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/exp.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp
index 35f0c5c41b47d..562d125e05323 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp
index 676175507c9d6..78818f0de15b2 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp
index d34ab0c7ca1c1..91754fac4d0a8 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp
index 7ffdd6136e66b..4b1aef23281db 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp
index e4b5d3d14b6e5..6022fddfaa755 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sin.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sin.pass.cpp
index 6e33f705454c6..ceececa46f7cf 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sin.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sin.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sinh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sinh.pass.cpp
index 7a9e7989830fc..933ff71d8a09b 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sinh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sinh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp
index a0b8433022fca..12fd9a2c0440a 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/tan.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/tan.pass.cpp
index b4bc207fcccb6..5c1f61ef644d3 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/tan.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/tan.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/tanh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/tanh.pass.cpp
index 1be3a2cd0e776..f1859d78fbf7a 100644
--- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/tanh.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/tanh.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/abs.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/abs.pass.cpp
index 7a518fc378ee8..d5ed2a6ba148c 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/abs.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/abs.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp
index 280ccc8cbc4ba..49c54372a8e00 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/conj.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/conj.pass.cpp
index 8c144ffbf2f7a..d24673cc8f33b 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/conj.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/conj.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test(const std::complex<T>& z, std::complex<T> x)
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/imag.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/imag.pass.cpp
index fe7cb3a96809e..0035823dbabd3 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/imag.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/imag.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/norm.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/norm.pass.cpp
index fe197ff21c21a..6e6dc10f1928d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/norm.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/norm.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/polar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/polar.pass.cpp
index b7450abed6804..2b9c764abb591 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/polar.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/polar.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/proj.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/proj.pass.cpp
index 238429b5520b7..e118613b8dd31 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/proj.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/proj.pass.cpp
@@ -15,6 +15,7 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
 #include "../cases.h"
 
 template <class T>
diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/real.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/real.pass.cpp
index 138785900c3cc..c1d4f401c1c4d 100644
--- a/libcxx/test/std/numerics/complex.number/complex.value.ops/real.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/real.pass.cpp
@@ -15,6 +15,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/complex/types.pass.cpp b/libcxx/test/std/numerics/complex.number/complex/types.pass.cpp
index 517743071dccc..974d02c089881 100644
--- a/libcxx/test/std/numerics/complex.number/complex/types.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/complex/types.pass.cpp
@@ -19,6 +19,8 @@
 #include <complex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/complex.number/layout.pass.cpp b/libcxx/test/std/numerics/complex.number/layout.pass.cpp
index bcb81189abca4..d39b2de132c13 100644
--- a/libcxx/test/std/numerics/complex.number/layout.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/layout.pass.cpp
@@ -11,6 +11,8 @@
 #include <complex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/default.pass.cpp b/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/default.pass.cpp
index 312425afbf1af..9d9a7fcb9a231 100644
--- a/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/default.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::gslice gs;
diff --git a/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/start_size_stride.pass.cpp b/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/start_size_stride.pass.cpp
index 682bb83fd1cde..1e11213d109bb 100644
--- a/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/start_size_stride.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/class.gslice/gslice.cons/start_size_stride.pass.cpp
@@ -16,6 +16,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::size_t a1[] = {1, 2, 3};
diff --git a/libcxx/test/std/numerics/numarray/class.slice/cons.slice/default.pass.cpp b/libcxx/test/std/numerics/numarray/class.slice/cons.slice/default.pass.cpp
index 92c17b8a2c499..4cb92cdc4c3bc 100644
--- a/libcxx/test/std/numerics/numarray/class.slice/cons.slice/default.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/class.slice/cons.slice/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::slice s;
diff --git a/libcxx/test/std/numerics/numarray/class.slice/cons.slice/start_size_stride.pass.cpp b/libcxx/test/std/numerics/numarray/class.slice/cons.slice/start_size_stride.pass.cpp
index 72bff97571298..c6203492e7c11 100644
--- a/libcxx/test/std/numerics/numarray/class.slice/cons.slice/start_size_stride.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/class.slice/cons.slice/start_size_stride.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::slice s(1, 3, 2);
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/gslice_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/gslice_array.pass.cpp
index 3a916257b88c6..83f2a6291ef08 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/gslice_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/gslice_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/valarray.pass.cpp
index e1aca3b7ca894..e15e2116d94c2 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.assign/valarray.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/addition.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/addition.pass.cpp
index 9c82a6f949166..1d6c6d756703f 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/addition.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/addition.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/and.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/and.pass.cpp
index bfe8ab288df1e..934b4433a5527 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/and.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/and.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/divide.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/divide.pass.cpp
index ec54bc4bb90db..5e2372a9b2393 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/divide.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/divide.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/modulo.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/modulo.pass.cpp
index 63ad3a7778ebb..bf289021ed947 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/modulo.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/modulo.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/multiply.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/multiply.pass.cpp
index b22fd3015d9f1..7f9f2c2c57bda 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/multiply.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/multiply.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/or.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/or.pass.cpp
index 0b068935f91d5..519329d7eca48 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/or.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/or.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_left.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_left.pass.cpp
index 912e48acab336..6837141d471bc 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_left.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_left.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_right.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_right.pass.cpp
index 2c8598f7b1dbd..f7ab2f7c38b5e 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_right.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/shift_right.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/subtraction.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/subtraction.pass.cpp
index 8b1271b04c737..2370ba65db9e3 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/subtraction.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/subtraction.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/xor.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/xor.pass.cpp
index 9a981ece8296f..005b07cc1fafd 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/xor.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.comp.assign/xor.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.fill/assign_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.fill/assign_value.pass.cpp
index c7c0925809359..3e8b9bdf6f0f1 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.fill/assign_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/gslice.array.fill/assign_value.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.gslice.array/types.pass.cpp b/libcxx/test/std/numerics/numarray/template.gslice.array/types.pass.cpp
index 9263c0e051f12..92bb8563c0408 100644
--- a/libcxx/test/std/numerics/numarray/template.gslice.array/types.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.gslice.array/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::gslice_array<int>::value_type, int>::value), "");
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/indirect_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/indirect_array.pass.cpp
index c19152bb9bd2b..b0fed049e9898 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/indirect_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/indirect_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/valarray.pass.cpp
index 0bc4b58179ab2..e912cc706c72c 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.assign/valarray.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/addition.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/addition.pass.cpp
index 3ed95f9cb1e21..4073aceb01322 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/addition.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/addition.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/and.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/and.pass.cpp
index 00fd2f18da72a..49dee00b81a12 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/and.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/and.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/divide.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/divide.pass.cpp
index 1a9ca265ad9f2..70a9395e66d89 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/divide.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/divide.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/modulo.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/modulo.pass.cpp
index bad0b950e35bf..885589fabd549 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/modulo.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/modulo.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/multiply.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/multiply.pass.cpp
index 7e78f0a8f39f9..66650228fbefd 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/multiply.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/multiply.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/or.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/or.pass.cpp
index ba32accc64a42..73ba5a2ba57d4 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/or.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/or.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_left.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_left.pass.cpp
index deff80cc3a427..95a362435412a 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_left.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_left.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_right.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_right.pass.cpp
index d2ac739541f9a..262494d48636a 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_right.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/shift_right.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/subtraction.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/subtraction.pass.cpp
index d94422c09276a..b6f85eb6bcd7d 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/subtraction.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/subtraction.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/xor.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/xor.pass.cpp
index 06e06682740ff..57f7af44508a5 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/xor.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.comp.assign/xor.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.fill/assign_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.fill/assign_value.pass.cpp
index e327d2630e84d..49f8540d7d4e5 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.fill/assign_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/indirect.array.fill/assign_value.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.indirect.array/types.pass.cpp b/libcxx/test/std/numerics/numarray/template.indirect.array/types.pass.cpp
index 5d06c5baf677e..d51eaf2699275 100644
--- a/libcxx/test/std/numerics/numarray/template.indirect.array/types.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.indirect.array/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::indirect_array<int>::value_type, int>::value), "");
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/mask_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/mask_array.pass.cpp
index 22ce22ad62c01..500341aceed37 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/mask_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/mask_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/valarray.pass.cpp
index e364c442ff7a3..d3f8284dd7000 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.assign/valarray.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/addition.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/addition.pass.cpp
index e8f0958b53c12..6c046516cc7d9 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/addition.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/addition.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/and.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/and.pass.cpp
index ab2937986c2cc..f9c73be08b025 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/and.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/and.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/divide.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/divide.pass.cpp
index cd67632efd052..3c43ded557c32 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/divide.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/divide.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/modulo.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/modulo.pass.cpp
index 7cf8b585d0347..0a1054c20cae6 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/modulo.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/modulo.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/multiply.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/multiply.pass.cpp
index 537bf40d8d688..37936e69cfde7 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/multiply.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/multiply.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/or.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/or.pass.cpp
index d0297b831e899..c3d72c6efc327 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/or.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/or.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_left.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_left.pass.cpp
index eee4c1d461c96..7322edfaffc02 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_left.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_left.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_right.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_right.pass.cpp
index b65c19f99c43e..1b17173e76d11 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_right.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/shift_right.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/subtraction.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/subtraction.pass.cpp
index 40fddd123a08d..173a97d644128 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/subtraction.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/subtraction.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/xor.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/xor.pass.cpp
index a04b6da270557..75553c2a09fd1 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/xor.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.comp.assign/xor.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.fill/assign_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.fill/assign_value.pass.cpp
index bb4fd85912b41..5ea3df4896f16 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.fill/assign_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/mask.array.fill/assign_value.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.mask.array/types.pass.cpp b/libcxx/test/std/numerics/numarray/template.mask.array/types.pass.cpp
index 1d4acea6f11bf..fff3f637e3d5b 100644
--- a/libcxx/test/std/numerics/numarray/template.mask.array/types.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.mask.array/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::mask_array<int>::value_type, int>::value), "");
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/slice_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/slice_array.pass.cpp
index 9683c7dfbd2c1..039221a62a556 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/slice_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/slice_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/valarray.pass.cpp
index 88a5b44ab24f0..d1d9fe18bea76 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.assign/valarray.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/addition.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/addition.pass.cpp
index 0433877e749ad..6037471db0046 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/addition.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/addition.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/and.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/and.pass.cpp
index 90bbe4ef33379..1d7d4c9ac5680 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/and.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/and.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/divide.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/divide.pass.cpp
index ae1383b84bdea..b55a1f533dfdb 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/divide.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/divide.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/modulo.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/modulo.pass.cpp
index 89c1acfb3817b..f474438d85870 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/modulo.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/modulo.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/multiply.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/multiply.pass.cpp
index b7c6b13530a18..ebfbcf7cbc0b4 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/multiply.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/multiply.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/or.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/or.pass.cpp
index 0f37579e86d5a..70d3345246094 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/or.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/or.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_left.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_left.pass.cpp
index 547a8cd279a50..2fb286c94ce87 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_left.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_left.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_right.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_right.pass.cpp
index 99c4ef94311bd..5923a24b09fb7 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_right.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/shift_right.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/subtraction.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/subtraction.pass.cpp
index db513bc9629f1..656fbb184e4e4 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/subtraction.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/subtraction.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/xor.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/xor.pass.cpp
index 4ecba47238e20..cd8a1b357edd0 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/xor.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.comp.assign/xor.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.fill/assign_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.fill/assign_value.pass.cpp
index ab2156b462728..56f2a4e3e117b 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.fill/assign_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/slice.arr.fill/assign_value.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.slice.array/types.pass.cpp b/libcxx/test/std/numerics/numarray/template.slice.array/types.pass.cpp
index fccde7edb6bb0..33d6bbd732d9e 100644
--- a/libcxx/test/std/numerics/numarray/template.slice.array/types.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.slice.array/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::slice_array<int>::value_type, int>::value), "");
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/types.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/types.pass.cpp
index f37ba0f77171a..3d7e04ab87dd0 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/types.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <valarray>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::valarray<int>::value_type, int>::value), "");
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/access.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/access.pass.cpp
index d92154130b88d..4a72cc73c12fc 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/access.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/access.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/const_access.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/const_access.pass.cpp
index a0174ccb3ff63..ef9ac713af474 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/const_access.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.access/const_access.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/copy_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/copy_assign.pass.cpp
index 777d922a43738..b216c67c172ad 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/copy_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/copy_assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 struct S
 {
     S() : x_(0) { default_ctor_called = true; }
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/gslice_array_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/gslice_array_assign.pass.cpp
index df5ae916225b9..d989d97406926 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/gslice_array_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/gslice_array_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/indirect_array_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/indirect_array_assign.pass.cpp
index f8b5243b91905..5e207f9a782e9 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/indirect_array_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/indirect_array_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/initializer_list_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/initializer_list_assign.pass.cpp
index 1f9e5a51f6ecd..1426a7f898643 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/initializer_list_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/initializer_list_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 struct S
 {
     S() : x_(0) { default_ctor_called = true; }
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/mask_array_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/mask_array_assign.pass.cpp
index aeb95a10b74d2..b5134105a3595 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/mask_array_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/mask_array_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/move_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/move_assign.pass.cpp
index 522c0a2a33e42..ca8e0568243cf 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/move_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/slice_array_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/slice_array_assign.pass.cpp
index 68b0e37d410a7..08f5f66185db5 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/slice_array_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/slice_array_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/value_assign.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/value_assign.pass.cpp
index 3adb1465cf565..cec900fa57449 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/value_assign.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.assign/value_assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_valarray.pass.cpp
index 60b30715312c7..5824d5344bfd8 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_value.pass.cpp
index 2873721504115..67c193c2ae493 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/and_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_valarray.pass.cpp
index fdb9975d5d3fe..6bca4cf766919 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_value.pass.cpp
index a309767ffba38..e97903744a1fb 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/divide_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_valarray.pass.cpp
index a8ef9152b00f7..80ecabfdaabd8 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_value.pass.cpp
index 263ac820a7201..ffec89f344130 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/minus_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_valarray.pass.cpp
index 79cfeb0c41597..176d38039f60b 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_value.pass.cpp
index b0ea0a2986bdc..f784780916724 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/modulo_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_valarray.pass.cpp
index df962a044f27e..6435f18ec46e0 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_value.pass.cpp
index 1be8942ab7a50..68c20be0fe5c2 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/or_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_valarray.pass.cpp
index 3700e5c4758c8..70879ad63a142 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_value.pass.cpp
index c8c5d1ef9c640..770eb12f66752 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/plus_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_valarray.pass.cpp
index f642ce4316dc2..9c5dcc5d56665 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_value.pass.cpp
index 8cba6b4dac5a0..221abf3911cb8 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_left_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_valarray.pass.cpp
index d50971b394daf..9f8f5bcf823d9 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_value.pass.cpp
index 670599afd3459..2ce87678dbef4 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/shift_right_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_valarray.pass.cpp
index f7e3da5bf9a46..070534c4f751f 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_value.pass.cpp
index 9632799683b6f..8fd9f457a74b9 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/times_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_valarray.pass.cpp
index bf08055117ad8..970013b65ff46 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_value.pass.cpp
index 0a3d3200f90f5..0a1c947a515f8 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cassign/xor_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/copy.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/copy.pass.cpp
index 8a9c6baae3a02..1914efed06d7d 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/copy.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/default.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/default.pass.cpp
index b560398024445..9595fbe89ae63 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/default.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct S {
     S() { ctor_called = true; }
     static bool ctor_called;
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/gslice_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/gslice_array.pass.cpp
index fdab3e3e8e4bb..0b3fecd3e0be6 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/gslice_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/gslice_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/indirect_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/indirect_array.pass.cpp
index 3a62b0a54793f..90a18b42979f6 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/indirect_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/indirect_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/initializer_list.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/initializer_list.pass.cpp
index 1f5986eda1806..1f0554e120f1a 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/initializer_list.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/mask_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/mask_array.pass.cpp
index 4559c36e7cac9..79595710f44ef 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/mask_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/mask_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/move.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/move.pass.cpp
index 0ef6f3cee532f..ceeca1349a988 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/move.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/move.pass.cpp
@@ -19,6 +19,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/pointer_size.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/pointer_size.pass.cpp
index a0b4a31aebe2c..3413bfa4a0f5d 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/pointer_size.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/pointer_size.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp
index 95417e58a63bc..4c87bd4e816ca 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct S {
     S() : x(1) {}
     ~S() { ++cnt_dtor; }
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/slice_array.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/slice_array.pass.cpp
index 332a617153d64..358979f93869a 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/slice_array.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/slice_array.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/value_size.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/value_size.pass.cpp
index 03e4add4648a0..48416f97411dd 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/value_size.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/value_size.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_cref.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_cref.pass.cpp
index 65277870aa57b..5703b99395881 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_cref.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_cref.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef int T;
 
 T f(const T& t) {return t + 5;}
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_value.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_value.pass.cpp
index fd100b5e54e6d..379f28b178d03 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/apply_value.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef int T;
 
 T f(T t) {return t + 5;}
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/cshift.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/cshift.pass.cpp
index 14ca081a1e918..3b9cc2b2cfd86 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/cshift.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/cshift.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/max.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/max.pass.cpp
index bdd84c118367a..fcc57453c05d8 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/max.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/max.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/min.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/min.pass.cpp
index ca04a930853d5..8534e3fd681a1 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/min.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/min.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/resize.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/resize.pass.cpp
index e92e7420b3cb0..43cc84d8853bf 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/resize.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/resize.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/shift.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/shift.pass.cpp
index 1a7628eb34951..06528e953e2c2 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/shift.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/shift.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/size.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/size.pass.cpp
index f790627233de8..5757350c87ca9 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/size.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/size.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/sum.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/sum.pass.cpp
index 084f00fb97ee1..2f7f0ec5afb6c 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/sum.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/sum.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/swap.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/swap.pass.cpp
index 12a7d8fd8ea6f..ce46cbdd52d08 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/swap.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.members/swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_const.pass.cpp
index e30c29bbab5fd..bff8e5ad99a84 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_non_const.pass.cpp
index 69a6b99b608df..f184bff03cb3e 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/gslice_non_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_const.pass.cpp
index e2da62d9262f0..d8b99a7d3d874 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_non_const.pass.cpp
index a5abe4e8afdb5..10a924a5d67f3 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/indirect_array_non_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_const.pass.cpp
index 15f53ed21a8bf..750251d563181 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_non_const.pass.cpp
index 4747767ea6ee5..ebc79a45acf43 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/slice_non_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_const.pass.cpp
index 6424afce6dc87..66d0d2339e6a7 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_non_const.pass.cpp
index f4d9796f5d317..4fd4593b1853a 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.sub/valarray_bool_non_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/bit_not.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/bit_not.pass.cpp
index 7f31355d0e040..eac7a7afa48db 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/bit_not.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/bit_not.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/negate.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/negate.pass.cpp
index a89b24d233b13..5bcf4b1adb4bd 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/negate.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/negate.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/not.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/not.pass.cpp
index 3975510fba76c..1abc5112cb60d 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/not.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/not.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/plus.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/plus.pass.cpp
index b1f7f313f3f49..4193efe6e3b04 100644
--- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/plus.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.unary/plus.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_valarray.pass.cpp
index 3be9074db3c8e..9b5661e6752a4 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_value.pass.cpp
index 4f1bf8ad66b4e..5df2a371209df 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_value_valarray.pass.cpp
index 05990124e2615..7a058b84750d3 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/and_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_valarray.pass.cpp
index 50c6a14848aad..72a57f2cd8c00 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_value.pass.cpp
index f5e0b27271ab7..516025ed33277 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_value_valarray.pass.cpp
index dde6955bbb717..3fd39d45ea8a0 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/divide_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_valarray.pass.cpp
index f1df168b3554a..fe9f34548faf0 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_value.pass.cpp
index 0ea4f0c1d8847..f81eaa91e8980 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_value_valarray.pass.cpp
index f2131d10e0cdc..33760149c1772 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/minus_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_valarray.pass.cpp
index 22d82f4b57e52..e77a19126dc76 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_value.pass.cpp
index f498e7af58888..811ec8c8bc530 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_value_valarray.pass.cpp
index fbd407ce29ab4..79e52dd1fd898 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/modulo_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_valarray.pass.cpp
index f305243d5a5da..2f524b716cbbb 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_value.pass.cpp
index 90fa4b4fd0275..cd2dbcd39bb9a 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_value_valarray.pass.cpp
index 295dd6bdc5fed..bd2e78693ea81 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/or_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_valarray.pass.cpp
index 19a410e4faaea..631b2ee13c58a 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_value.pass.cpp
index 2aef9c1a17451..4fb63633f9b9a 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_value_valarray.pass.cpp
index ba598f62ddc7a..758058e26c082 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/plus_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_valarray.pass.cpp
index e71fa1056ead7..d1e815176d22c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_value.pass.cpp
index 3945c1bf4547b..bbc63d23cb6f8 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_value_valarray.pass.cpp
index 93276345210fb..46278be314e45 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_left_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_valarray.pass.cpp
index 9422d6be871c7..9f73581e77076 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_value.pass.cpp
index 8a68f30ca6f8a..48069e5a7f486 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_value_valarray.pass.cpp
index 519fd2b3bd9d2..397016bc38b90 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/shift_right_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_valarray.pass.cpp
index bc5e7329c8a1d..0e56d4324c89c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_value.pass.cpp
index 330f5e0a70b20..bcdb62ddf0376 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_value_valarray.pass.cpp
index 4fa8bb2d20966..caba57868515e 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/times_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_valarray.pass.cpp
index fd4fb084c0a65..db531314bb4cd 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_value.pass.cpp
index c5082f553c734..4ad7a0f04ddd5 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_valarray_value.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_value_valarray.pass.cpp
index 377f03ed38d54..37ce06eafbb11 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.binary/xor_value_valarray.pass.cpp
@@ -16,6 +16,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_valarray.pass.cpp
index 3f3ede056ee38..0f3b3b2d7e0a2 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_value.pass.cpp
index de5808e5710b2..36fc3fa135534 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_value_valarray.pass.cpp
index c73ec1e54cb23..ec4c86eb6efd0 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/and_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_valarray.pass.cpp
index 187126fc8ed1d..ef3ed72f6f8e6 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_value.pass.cpp
index 01c04a62c8002..ecc98d606d079 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_value_valarray.pass.cpp
index b0db6a0d18e22..c3e077e95c48e 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/equal_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_valarray.pass.cpp
index c8de6208aaee4..64db931988146 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_value.pass.cpp
index cf568b5307b26..dac04a34215c1 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_value_valarray.pass.cpp
index c66a60e062c89..c9c693c6d5167 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_equal_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_valarray.pass.cpp
index 351b662be07bb..512f8e6387232 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_value.pass.cpp
index f895b0783b714..9da362fa9c95a 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_value_valarray.pass.cpp
index a54b770256bea..4682896a66281 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/greater_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_valarray.pass.cpp
index c8812f3fd89ec..8a4ffef999d73 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_value.pass.cpp
index 03caf34a60a39..c7118b642209d 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_value_valarray.pass.cpp
index 5026b73d4e21e..76d929049ea0a 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_equal_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_valarray.pass.cpp
index 59943f912e3bd..59210bca72ff4 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_value.pass.cpp
index 9ced475713922..f800228c89d8f 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_value_valarray.pass.cpp
index 770d5a96f7dca..8edd643092f05 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/less_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_valarray.pass.cpp
index 1892a70717639..c3e2c19023a92 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_value.pass.cpp
index 3cdb89739583a..05a8c2fb1e173 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_value_valarray.pass.cpp
index 49ffeda05ae5c..4c9fb808b0dbc 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/not_equal_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_valarray.pass.cpp
index f62cb4f8cc2fc..48b287e3427b0 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_value.pass.cpp
index df73f85e398bf..029e497c3e20e 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_valarray_value.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_value_valarray.pass.cpp
index 3798acc67d951..ff5e6c87f2241 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.comparison/or_value_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.special/swap.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.special/swap.pass.cpp
index 2200ddfe015ae..033f5a49b80df 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.special/swap.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.special/swap.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/abs_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/abs_valarray.pass.cpp
index d721c8442634f..902cb86875241 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/abs_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/abs_valarray.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/acos_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/acos_valarray.pass.cpp
index 18b5bcb5b6c2a..1d670245f64c5 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/acos_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/acos_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/asin_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/asin_valarray.pass.cpp
index 9401200e5c7b1..e11a40cdf283c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/asin_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/asin_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_valarray.pass.cpp
index fcbd63b8317b3..12d33c03feba3 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_value.pass.cpp
index 59928d447ea7d..1195b765681b1 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_valarray_value.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_value_valarray.pass.cpp
index ed42627d1f911..c670aba31df8c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan2_value_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan_valarray.pass.cpp
index 7176b9343e535..a491c255047b3 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/atan_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cos_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cos_valarray.pass.cpp
index bc58e4af23cb2..e5faa7edaac48 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cos_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cos_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cosh_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cosh_valarray.pass.cpp
index b453edd08c8c1..e3d8006897c54 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cosh_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/cosh_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/exp_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/exp_valarray.pass.cpp
index 8e95f870410e2..68456ca2260d1 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/exp_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/exp_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log10_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log10_valarray.pass.cpp
index 39514ed6833fe..1550eb16477ff 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log10_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log10_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log_valarray.pass.cpp
index 050d58fa5991b..36d69b5c61b77 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/log_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_valarray.pass.cpp
index 93b8a14cbe69e..e2e92a2f0b719 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_value.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_value.pass.cpp
index 62c140c04da47..145ce85ecb4dd 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_value.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_valarray_value.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_value_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_value_valarray.pass.cpp
index 0c8a76b6dac36..799ac1cf81e6c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_value_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/pow_value_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sin_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sin_valarray.pass.cpp
index 92d6f4492a5b9..f713b9f9aafce 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sin_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sin_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sinh_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sinh_valarray.pass.cpp
index 190c212ac8bca..0691735ee4a4c 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sinh_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sinh_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sqrt_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sqrt_valarray.pass.cpp
index 805bde633e3e1..9d7fc72120512 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sqrt_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/sqrt_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tan_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tan_valarray.pass.cpp
index 4f5b69d0884bd..6d87fd88f9913 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tan_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tan_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tanh_valarray.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tanh_valarray.pass.cpp
index c63696a838057..20006550d2f3e 100644
--- a/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tanh_valarray.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.nonmembers/valarray.transcend/tanh_valarray.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cstddef>
 
+#include "test_macros.h"
+
 bool is_about(double x, double y, int p)
 {
     std::ostringstream o;
diff --git a/libcxx/test/std/numerics/numarray/valarray.range/begin_const.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.range/begin_const.pass.cpp
index 35e5e4206aa73..fcf55ce74ff88 100644
--- a/libcxx/test/std/numerics/numarray/valarray.range/begin_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.range/begin_const.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.range/begin_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.range/begin_non_const.pass.cpp
index e0d8e71da260b..dc9cb028bf42e 100644
--- a/libcxx/test/std/numerics/numarray/valarray.range/begin_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.range/begin_non_const.pass.cpp
@@ -17,6 +17,8 @@
 #include <valarray>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.range/end_const.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.range/end_const.pass.cpp
index d1424d3f0c170..a7422daf29ce9 100644
--- a/libcxx/test/std/numerics/numarray/valarray.range/end_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.range/end_const.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numarray/valarray.range/end_non_const.pass.cpp b/libcxx/test/std/numerics/numarray/valarray.range/end_non_const.pass.cpp
index 5e1cbd4a80510..73a066942ecb8 100644
--- a/libcxx/test/std/numerics/numarray/valarray.range/end_non_const.pass.cpp
+++ b/libcxx/test/std/numerics/numarray/valarray.range/end_non_const.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate.pass.cpp b/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate.pass.cpp
index 80a048d07ed93..8513635de3e13 100644
--- a/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate.pass.cpp
@@ -17,6 +17,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate_op.pass.cpp
index c7a55b971e697..f2e3a6bc26c32 100644
--- a/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/accumulate/accumulate_op.pass.cpp
@@ -19,6 +19,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp
index 447ceb61ed8f4..6e4b36ffe18b9 100644
--- a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp
@@ -21,6 +21,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class T, class Iter2>
diff --git a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp
index 46cb0800b7b5b..7b2ad76f905b8 100644
--- a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp
@@ -22,6 +22,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class T, class Op, class Iter2>
diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp
index 0ab019c5e000f..0f5695f51e672 100644
--- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp
@@ -21,6 +21,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2>
diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp
index 88633ac84ede2..bbea5cfeb09a9 100644
--- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp
@@ -23,6 +23,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class T, class Op, class Iter2>
diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp
index c6e691aeeb827..7d294f0827501 100644
--- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp
@@ -22,6 +22,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class T, class Op, class Iter2>
diff --git a/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product.pass.cpp
index fa5c1e8341a7e..5e87325937a08 100644
--- a/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product.pass.cpp
@@ -21,6 +21,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product_comp.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product_comp.pass.cpp
index e42e3cea9154d..3f9ed7ee8951b 100644
--- a/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product_comp.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/inner.product/inner_product_comp.pass.cpp
@@ -23,6 +23,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter1, class Iter2, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.iota/iota.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.iota/iota.pass.cpp
index 2c1c08e8b540a..2cf99f8d44e3b 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.iota/iota.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.iota/iota.pass.cpp
@@ -14,6 +14,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class InIter>
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
index bba3780bdc820..22f2a6333014e 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.gcd/gcd.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>    // for rand()
 #include <type_traits>
 
+#include "test_macros.h"
+
 constexpr struct {
   int x;
   int y;
diff --git a/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum.pass.cpp b/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum.pass.cpp
index 4ea410712ca6c..51ef81340d10c 100644
--- a/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum.pass.cpp
@@ -19,6 +19,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class InIter, class OutIter>
diff --git a/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum_op.pass.cpp
index ab51b5b5b2425..9bc79fdcae74b 100644
--- a/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/partial.sum/partial_sum_op.pass.cpp
@@ -21,6 +21,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class InIter, class OutIter>
diff --git a/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp b/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
index 031a12d290b0e..56d6c8eb451f3 100644
--- a/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/reduce/reduce.pass.cpp
@@ -16,6 +16,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init.pass.cpp
index 19c6b7d5fc8d2..a2c697f536ec7 100644
--- a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init.pass.cpp
@@ -15,6 +15,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter, class T>
diff --git a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp
index adcf928796834..a5e187d6a8be7 100644
--- a/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp
@@ -15,6 +15,7 @@
 #include <numeric>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <class Iter, class T, class Op>
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp
index 528802cb4bfee..2e399e7088a70 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp
@@ -25,6 +25,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct add_one {
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp
index f7a32131719f3..441ef9d47fe95 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp
@@ -26,6 +26,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct add_one {
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp
index 56e5bc6e75c87..5ac618ce61dc1 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp
@@ -25,6 +25,7 @@
 #include <iterator>
 #include <vector>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct add_one {
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp
index 38071531b22ac..71c5f2b11df07 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp
@@ -20,6 +20,7 @@
 #include <utility>
 #include <iterator>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_iterators.h"
 
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
index d74267c71fb31..f0d34e9495fc5 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_iterators.h"
 
diff --git a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp
index 27bad12ce6191..6328f92e585d9 100644
--- a/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp
@@ -20,6 +20,7 @@
 #include <cassert>
 #include <iterator>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "test_iterators.h"
 
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/assign.pass.cpp
index 5deb1d50cfa41..9923d933d38e8 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/copy.pass.cpp
index 443f4f8d86c80..9072ef27f0003 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_copy.pass.cpp
index 57f2bcc76a31f..ea0f4581c735c 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_move.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_move.pass.cpp
index ade8e8d81cff0..59f46eba5b867 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_move.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_engine_move.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp
index 6fe0947631c90..c3f781209fd4f 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_result_type.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp
index fe2d9fea312ca..3cb9df560d539 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/ctor_sseq.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/default.pass.cpp
index 32af7046fdd08..0662fa5e67f2a 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/discard.pass.cpp
index a6f4d64e3343a..ef925bc559698 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/discard.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/eval.pass.cpp
index 75f07aecae57c..3f35697b545ae 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/eval.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp
index a9fbd7963e86a..9336b29a3c551 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/io.pass.cpp
@@ -27,6 +27,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/result_type.pass.cpp
index a18e09bd15f86..469d380379578 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/result_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_result_type.pass.cpp
index bfa93767eaaa8..a2f80529ad681 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_result_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_sseq.pass.cpp
index 5506cffa11735..6e99e8a35feff 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.disc/seed_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/assign.pass.cpp
index 79205aacc73db..014ed6ef90d51 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/copy.pass.cpp
index 22e68626ede73..d4db4ccafb5a9 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_copy.pass.cpp
index 65fad3e2f8348..aa9569b97c82a 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_move.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_move.pass.cpp
index f3b1d526fde2b..06d4c3483f123 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_move.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_engine_move.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp
index 84817b16ea4df..27f408f4af416 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_result_type.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp
index d2792f98f05d9..37ca482ee0b77 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/ctor_sseq.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/default.pass.cpp
index d7e72e3e72af0..f3f4ecbc6b2f1 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/discard.pass.cpp
index 8f292192361e0..cf6a06964f873 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/discard.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp
index b1a73193c243b..0fb967acfb43d 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/io.pass.cpp
@@ -27,6 +27,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_result_type.pass.cpp
index 8225044d778be..e633d968ccf02 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_result_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_sseq.pass.cpp
index 04374d00098f8..c2cefef953663 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.ibits/seed_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/assign.pass.cpp
index da08f58edb69b..49a6010b09fb0 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/copy.pass.cpp
index 5788371c36e65..aab2941f33534 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_copy.pass.cpp
index 0cc29d496dc8c..e66976b25a300 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_move.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_move.pass.cpp
index 35474e6a0d6a5..662a16862617d 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_move.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_engine_move.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp
index 5ca51e151e074..e805538630175 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_result_type.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp
index 136e7fe873f93..22823ea91396e 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/ctor_sseq.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/default.pass.cpp
index fff5cee2d0759..3b3cc18a540bb 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/discard.pass.cpp
index 1b86048c2e29d..a4ed40633f7fc 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/discard.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp
index 5d80b3afcbcd1..6bf4c8a00bdda 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/io.pass.cpp
@@ -27,6 +27,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_result_type.pass.cpp
index 24ccfbbf8e0b8..f5ba1ebe2abdb 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_result_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_sseq.pass.cpp
index 8dfbcb00ee71c..615f58b77450c 100644
--- a/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.adapt/rand.adapt.shuf/seed_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp b/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp
index 539c238ba3a48..e93657c487224 100644
--- a/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.device/entropy.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::random_device r;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp
index 5ed93470d030a..1f037dfba7f2d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp
index e3d866ad46168..53bfdcd99f28a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp
index a8f76bb232491..e3abdd785ec72 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_double.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp
index a568cb2b78a03..04ad19e6a2f12 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/ctor_param.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp
index 24babc11829fa..6e32b7c46ce7a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eq.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp
index e28c39099f88f..9a7af92931dd3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp
index bf1d1174337cc..5584a9d164489 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/eval_param.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp
index 0e960d6acbadf..5f16633f66476 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/get_param.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp
index 7c7829796d8f5..68e050aab5807 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/io.pass.cpp
@@ -24,6 +24,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp
index 6f4ac7e3d4758..206b5169446fc 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/max.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp
index 8c369dbf82809..8f36bf7a37fd7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/min.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp
index b4fcd04ecfc34..3b86da19f1ece 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp
index 96ddd4bc4a9c5..61e9f875743df 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp
index 612e6c6860ce3..2666ee1c488f4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_ctor.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp
index cde5611ec37d8..c01c332a77bd6 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_eq.pass.cpp
@@ -16,6 +16,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp
index e1d9532f019ff..b0dc65ad02806 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/param_types.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp
index 03e3d8a8c78d6..c53e0fe2cd138 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/set_param.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp
index 539b809b068a8..b1b0263b1917e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bernoulli/types.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp
index e997198adb6e9..264f620bfef56 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp
index 2c2b65c9441a8..9b4ded238dab7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp
index 26a6e3a6622b8..23fa5f8aed542 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_int_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp
index fa69b91237ac8..77954ddac5146 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp
index dbe086ad4c726..63281c601c51a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp
index fea71ccb87614..7885ff6f7c02e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp
index cd4d006787817..78d6aedde73a5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp
index a3ba48f55832e..49c4946282e62 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp
index cb272152a3bae..7edf05fae8bcf 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp
index c8ca662b630ec..ab6ae0fa90e7d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp
index ce793f67b7f53..22f2f19736f97 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp
index 069d6e440b85a..afbe88239be2b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp
index f2b78e6d8f4d0..403a010d15433 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp
index 8ba09a681b2a5..96f5a13b7acce 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp
index 0ba4381ad185b..e620662ab92b8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp
index ba94d412a9f39..84433504d610d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp
index ec82a93ffc07d..4b9231e327b62 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp
index c52681a1354ec..22e8978b6810b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.bin/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp
index ae49feb3e3d4b..7b1273b562201 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp
index 73ff6d68f9546..1d069c2834432 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp
index 1d4388e4dc34b..5828867bacd3c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp
index 3bcebc36f80ff..009c3f0d642ef 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp
index 47ea0f282476e..0d391d4f4e202 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp
index 46ec881219363..bdeaa205404ab 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp
index 825fa3f5c4d52..16cb7fb0a45f9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp
index 35679a8627e6f..5c2489c530c6a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp
index 36de49f78cfc4..6cb22c8df4598 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp
index 9e785da2eed78..081fad1a19796 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp
index 63f69f25e2abb..17d383700652e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp
index c88af2d68e1a0..6f44e1d1bdd44 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp
index 117b98bc12972..34ceca3051178 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp
index 71710228597d7..d56da92a824bc 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp
index a741f06f603b2..23a2dfb1b2ad2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp
index 2f9efc268c923..34f1d66e9c5ed 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp
index 74d49d5d8e2e9..99bc433c8703f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp
index a62b66134e7bc..17674b1ffe4b2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.geo/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp
index 20f7f9ffd10c8..ed343eb9c6cc3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp
index 73ee12e430ba8..ba525c6119de7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp
index 0b9418b7c26a3..eee225caa054f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_int_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp
index 485a3ae98f9c6..8f71af95a3fc8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp
index b93084e479f43..7424bf2014910 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp
index 4a8f78842466f..6d7d7ea9c78db 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp
index b99f6fb07e5f1..0b03982a737e5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp
index 63fe3804315c5..b15d8f9ac67e2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp
index a26f199061748..e0e43d1a2db7e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp
index 3bc54a4be4d59..1b70b14b69a98 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp
index b2354d77c0e35..cb7a6dcdc03e1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp
index 7968b01e84360..12b826b2a382e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp
index b16818b26a483..4d03324cf6062 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp
index ad7908d314c1c..930711a1bb055 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp
index 793b4361cd944..30c195c7db53f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp
index c0164edc8faad..06d415823047d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp
index 8971be5ee48b6..6d894d2bc2970 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp
index ea177377a8815..142c82cb21d56 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.bern/rand.dist.bern.negbin/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp
index dfe8785ff9fb0..353eec451672d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp
index 85511d161077b..fdc7ac916bca0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp
index f452b6e0bf377..fedad4b4de0df 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp
index 3a8ed3cc959fc..665d12181f7d5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp
index bc42b94f9ec56..2442d9725e909 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp
index 2b63645ae7d92..198a5cf334368 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <algorithm>
 
+#include "test_macros.h"
+
 double
 f(double x, double a, double b)
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp
index 56921fe0fa729..e34da00b34b68 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <algorithm>
 
+#include "test_macros.h"
+
 double
 f(double x, double a, double b)
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp
index c3c88b8d7e0ab..c9db2fde6e80c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp
index db50cfd5c9383..2f6bace070e94 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp
index 963e8ed379b86..c72cfcc67a231 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp
index 59044b8e27268..d05e40aef25b2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp
index 04d94e74db5fc..c9ee3894b8d5e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp
index 88739df6827cd..deb40ddfb3403 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp
index 8563bafbb8c20..bb6a1baac34b9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp
index 97553701ecf00..caa1cb9bc952d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp
index ca0539c414059..b76bda5cb37c1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp
index ba66ec16ab653..4756e662c89be 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp
index 09f79da51260e..1dbf2dddf8ef4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.cauchy/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp
index 75242c2b9438d..3bf99a337f250 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp
index 3eeba40d01e50..d01fb911e9499 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp
index a778b2face488..ba1fce65afa60 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp
index 922e44f6116f9..cb6d02e99d0f4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp
index 8b9ad4574286c..867ec5cb829fd 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp
index b080886a8d16c..2a8dfd31aa08e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp
index 86ac86ed35084..52864739c9b3c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/eval_param.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp
index 55fda0872c65b..4285caee044be 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp
index 1017861318f2a..06d7b4e4320ec 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp
index 19ab87ca9e3b9..703debcbe8566 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp
index af5b4561728ba..65db7ccbafcd9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp
index 0b13690f2b0f3..ed32f0ac071a1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp
index 22fe4b4ec306a..40419486df894 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp
index b3bbd8668a0a4..fcef96f54d1bb 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp
index f615acad8c5e7..815c6cc8ccede 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp
index a6727c429fa79..3e5512f9d90a9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp
index d497407e1e0e7..5a961ae0d3e5e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp
index e33551debf2ab..a6b914b9c5ead 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.chisq/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp
index c73d46ccb0f3a..f4c9fb20f5966 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp
index 9a9670154b432..4f8726ca8629f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp
index b29664ee48638..5e0e4410be1ae 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp
index 3318d52229fe9..f742193c4905c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp
index b91da051158df..ed8f218380d6a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp
index 9a4cdf175d222..dfb636cf01828 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <algorithm>
 #include <cmath>
 
+#include "test_macros.h"
+
 double fac(double x)
 {
     double r = 1;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp
index 59a19d57ed269..39628b2a33457 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/eval_param.pass.cpp
@@ -21,6 +21,8 @@
 #include <algorithm>
 #include <cmath>
 
+#include "test_macros.h"
+
 double fac(double x)
 {
     double r = 1;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp
index 716d852dd10e5..cc38df6169620 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp
index 870d086ccd650..76d4a48ec02f7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp
index 5e9c2968bd7c7..ba6a001788c0c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp
index 8aca42d94e7ac..41bcb848134e8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp
index 3622aeb3933c7..b9ad99c7d4178 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp
index cc936174c3825..f224b5ce467d7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp
index b7bef507d08ea..cd9dfe5077a35 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp
index 1345723ec1b2d..e8f14ead780a5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp
index 8fdb9fccfcc9a..ebd4e6b3bdeb8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp
index cad5deda3ca77..a78347bf681ec 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp
index 567ed9f127371..92ac0f2adc793 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.f/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp
index 567ae63b8a545..263e52c20c425 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp
index f27ea836a9912..1847a97434aa2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp
index e19839f2a483a..5d0652d60a49f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp
index 09ee798b8fd82..d831ede82a250 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp
index 7257f57b1dcc9..30f2a574d49b5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp
index 90841754881c7..9f25cea6540ec 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp
index 1c40e66b224f5..9c9eb858e5bfa 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp
index e56cae65b364a..49d2b5725c5f5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp
index 204a8f5f3b2d8..cd67c03b6a237 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp
index 2297a4160d340..78970c41c4e99 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp
index 84154865b93ce..cf5a7093d5198 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp
index 9999b8499f04a..28ae7d27f8ab5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp
index 6ad49592fb191..32936fc7f824a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp
index cb1735b790f69..fd5e61582efff 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp
index a6be4db10ad2e..010d7ee8a90e5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp
index 99f13be070ca4..6824fd8cb4e89 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp
index 9924757177d66..4de505ee6f4aa 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp
index 6bff26088a05a..a0b76cd930bca 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.lognormal/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp
index 492a0ea6aefd8..8214621f92e99 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp
index f2326bbc34332..774d5de2d3293 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp
index 2c4462f252a1d..3d0fb91bd73da 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp
index 66331187c52fc..45f80f03cda3e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp
index 87b7c4d7d0d58..feace94b04e5f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp
index 5362aef09f80e..b869fbde93592 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp
index 343bdd7dc6f7c..3b5cae11f6fe0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/eval_param.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp
index a293371069b4a..46359949a5953 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp
index 601f8d58014b9..ae58744d558d7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp
index 24adfc05c7ca1..0b14fe85e07d7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp
index 0e2c27a183e3f..8f4a21a3b8fbc 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp
index 4a37869230bd6..ee443557c24ea 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp
index 7ae72e6dfd229..5a461506683a0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp
index e947060e52098..b770bd6f2ad80 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp
index cf7fa39cf31bd..2110e25dcf76e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp
index 2fef65e8c2946..27434dae22634 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp
index 46f1cb2ee0596..e82bd9de83abe 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp
index f532786c9d8d0..a422fa0a02637 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.normal/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp
index bb6ced5e8875d..6e737fe560a85 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp
index a6aa61180fa96..1b9158a979a9a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp
index a133ff30c622b..9ce6f61d07b40 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp
index be11e9224c16c..9c527569da0c1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp
index 2de6ca5513bd8..9a79491d7b0f8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp
index bb1630ea44df1..a73e06bbf03e9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp
index 3b939010a99ab..01ce61a680af6 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp
index 170aed3dbec4a..82b38480093b5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp
index 60f9a6fb3ffc9..2d172dc149c07 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp
index f2fe365443b73..b2df1f6ff2fe5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp
index ab98be4504f64..476c8ec977d5a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp
index 54e9313ae677d..971b6b00ff5a8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp
index a27a735ca41a8..7aad07b387d97 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp
index 897a3e3cff482..b449239b81b8c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp
index cd3a04a341e15..9cd0844766429 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp
index 1acecc1d11032..25e59d0c1b5da 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp
index 85c6a3cb1cda3..e5432ae4cdf26 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp
index f89da9f9ede58..2c786cfd78cb5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.norm/rand.dist.norm.t/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp
index bfee1f279ad84..86481d7a30f96 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp
index 41119b4fefc3c..07601c1376476 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp
index 530d7de9a93cf..a085668685e1b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp
index 174e91f8f3112..94a6e724fa321 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp
index 609f226a54000..dd69d9345a730 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp
index bfe7e8da385a5..8bceb918ecd5d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp
index 00054a76e68f3..016ba27587cf7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/eval_param.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp
index 9cb46bf04f690..c12dc35264f13 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp
index e8613480c6e94..f634f3efe2a2b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp
index 9859883c1257b..e268336f4252d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp
index e1957295213fb..3aac03a875897 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp
index 9958d632b2a82..6dd27505bae40 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp
index 676eac6465bd6..493cd8551a2a9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp
index 8483bf9ff7505..900bb0fef7312 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp
index e36ffe029f7ad..1f1fb9ee2f48a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp
index 1b137acf9a907..93d000883c616 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp
index 7147b313c79fa..7e35150a6ba1a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp
index 289c2f1a4a126..429c5ee0b1312 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.exp/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp
index 15333b04804e9..1c2ad090b509f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp
index a71dd8e31a41c..b8c631ab6bae9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp
index 1123da3f2262d..7d55d03ca81b6 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp
index 4160b0337e3ab..74e74fcfa8027 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp
index ab14c2ac5c466..9f2b07734d346 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp
index c83e78e01f695..9aaf6ce0ab3ae 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp
index bf3df44555986..cb390c0452ad3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp
index 27499a49a5f86..daf4944403a17 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp
index 219d0f1e2da7b..a3434d8544694 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp
index bfa6f24835654..b06381cd59ded 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp
index bd97a0ba3965d..edd7c3bda9f54 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp
index b92b6fbe93396..348644cd5c260 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp
index f64a32030506c..df85d23cfce27 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp
index 906f7160afc7a..2c56e9bbacbf1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp
index c4e4a7060ea9e..8c16696805ba8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp
index 30c46459bb6a4..de504da604ece 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp
index 88fff04fb102d..ab3b1e557b246 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp
index e96c0d4ad4555..bce0dd5f9d4f9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.extreme/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp
index 35eb5c277cb2c..535f3d33ca9d8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp
index 962374efe876e..3c03793cc9797 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp
index 31ce06d15aa9d..4bbd68d8c5f65 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp
index c78821f871dd0..26348cccf52ec 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp
index ee365f8e7c278..488a687cbd8a3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp
index f9e678d1caab2..7af50cdb8b1a9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp
index aeb0bbf31e0f2..a782770153ac5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp
index 82b1c9bfd7823..c7d9aa87ee5e4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp
index 6732ca210ffa3..ee9f20f563fa4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp
index eb9e2b4f2e0f0..78b5a725029bc 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp
index 3eda5a65183cc..0136c4b0f738e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp
index 28d3997c4d61c..5606c35184a52 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp
index dc2b32dd7eecf..f46280c227503 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp
index 333b670f1dae3..54024f9d0cea1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp
index 2ba854163de9c..da7fad8a4a761 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp
index 0fc07ef95843e..a8a06b37db902 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp
index 4af868a270be3..0db448218dd51 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp
index 31a33c0cf8acd..e95cfb1cd8759 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.gamma/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp
index 4379d0bfd0559..cff7a6ee96b7f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp
index 60fa5193360b0..4e7f40344c5f4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp
index e27d13331d25d..2b8d95770e534 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp
index 117adb25a5045..a9eb91f534868 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp
index 0d7dda0d9725b..991f7f1ecf141 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp
index 588eddba2ebc2..4b07d239364f2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp
index 67f726843134a..8d60be4e656e4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/eval_param.pass.cpp
@@ -20,6 +20,8 @@
 #include <vector>
 #include <numeric>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp
index a55a3837c7fcc..0deda61e41d5b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp
index 4aec884f6b90b..afd77131ddc2f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp
index b1cb125084a3f..dd6ad7b33d524 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp
index e65319c56c216..69192aba40c92 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp
index 393153a7c0621..00b1b12893c39 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp
index a02e7250fc71f..24f0e40961e8c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp
index 1e395fdfd22a6..73c1e7019ab99 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp
index 5e9aa27ccf442..ec889bc857a0e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp
index b81d15cf0be15..fd518d0d01069 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp
index d75e6a1daf8e0..67b848265b2b7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp
index 982919268be43..10664c2128fe4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.poisson/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp
index ff81b81208318..6c461c5c1ab4a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp
index 0cfafa7257a91..44faee8dcf155 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp
index 3f4d55eb9bd13..41591c0c99040 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_double_double.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp
index e876a2df0e686..835e9dfe89b94 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp
index 1de323aef1043..cadf59ad1da99 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp
index 88e40b29c5487..641f061d15a65 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp
index 3959f440797d2..a719819f58964 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/eval_param.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp
index c2fbf7b98b9ec..38b40c7b42a17 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp
index 6b4f4e9ddbb66..56b25275e71ae 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp
index 3d9fe0b357afb..e3f15c9b7fbc5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp
index f92384041ac43..f33b750c356b9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp
index add72f68563e4..b2d86eb9939f9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp
index f2b7e95a4c33c..0d989124b62a5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp
index 6a03330c15628..eab03afbad817 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp
index e47f576728c2d..40cfaa531f503 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp
index 08c58d03d1a26..d5491ea9e4abd 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp
index f3c5a20d29e51..b4de398010dc0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp
index 51b97e081d7fc..b258a80bf4b0f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.pois/rand.dist.pois.weibull/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp
index f1d5b3b24c2d7..34295bf2d0118 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp
index 19f8dc179e287..0264f88c4aef3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp
index f8d769dbda071..c56e6b52012f4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp
index 198b845b3e67e..23068ca937d32 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return x+1;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp
index a9c1e2bb42c74..d77a16cb27be4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_init.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp
index 66912cf4a94eb..1cc5293a3a020 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_iterator.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp
index a25c9fbd31c19..f41cca56a7d1e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp
index 409cc39ebb14c..8a0bf7547a050 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp
index 5dd70d144fa5e..703b5077b003a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp
index 6cc4e90b25eb6..f058e681e41c7 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/eval_param.pass.cpp
@@ -19,6 +19,8 @@
 #include <vector>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp
index 59e33128243fe..9d2af0b6b21fb 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp
index ff9434c8a16ae..f4576e37202f2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp
index d365763914fa6..df7b6d778f5fc 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp
index 259eddc5d0237..78066a4e6cbbf 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp
index c50a2536c2092..82dd841252c41 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp
index 9b7e6b9edcf6b..67c3d5451164d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp
index 680ce405c7e7f..fbbea0ca4ed1e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_default.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp
index 4dd919dc4ecde..74fd9d26a446d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return x+1;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp
index 91adbdb07dae4..b1081259ff340 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_init.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp
index b553ffab127eb..de63323f5b811 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_ctor_iterator.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp
index 3ca6786933fa9..082df23c6950b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp
index fab19160264ca..94ef68a73863f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp
index 9858a30a77872..a76eeaed6012d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp
index d45475ba36fdd..0571a4805b8d2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.discrete/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp
index 3b63601aee9f5..1b0462b41d450 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp
index 82b0d8ef3f935..7f7251fb9f789 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp
index 836b690082a93..8e4466d3d274a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_default.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp
index 7e7537ad80491..cd1b7fdfd87ce 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return 2*x;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp
index 2a5833b76b04b..06419746dc789 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_init_func.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double f(double x)
 {
     return x*2;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp
index 695a7ba5f1686..6b35258fdf384 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_iterator.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp
index 1a10bb31b3608..cd3572c22f865 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp
index 18b5d59bd108c..01c045a999145 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp
index d00be22e551b9..c24d1a4437931 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp
@@ -22,6 +22,8 @@
 #include <algorithm>   // for sort
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp
index c82a6b8515aa1..d9f2628a3f03f 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp
index 90f469480d9a6..db7c0d6d2e312 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp
index b22fdfa584086..6256e59d55743 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp
index 19c11c66f229a..fe0c23f61ecc9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp
index 8a5fe519fb623..533f3e27017eb 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp
index 7c42c738c145b..a97708dcfd6b1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp
index f9eec8d731f01..80b442a458a91 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp
index ac2f724b08ed3..3485e7b62a349 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp
index d14a50817c63c..4dfdf43c08d3a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return 2*x;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp
index b7e5a49639e5c..4512d51c24977 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_init_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double f(double x)
 {
     return x*2;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp
index 96dda54d2465a..6c5bad55e57c2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_ctor_iterator.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp
index 4571613f98791..1c0de16aeb911 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp
index 3d4c25e882a61..ef8494c5bf6a9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp
index a34187cb2cecd..0cc0142df086c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp
index eec866112d8cf..f0d377235705a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.pconst/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp
index ff478a05b79aa..59740caf3377e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp
index ba5e6d59a17a9..8cd000a9336d3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp
index ded81c974fca7..a8784df20ed86 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_default.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp
index 92f9d414553c5..a5915be2ab6d3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return 2*x;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp
index 60ba16d9faea8..2434e132a782c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_init_func.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double f(double x)
 {
     return x*2;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp
index 541976ad1d54a..502e62a3821e5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_iterator.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp
index 1ecbe87c20db4..6d46b475185ae 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp
index 19eda7357b20d..02ff3e26d139b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp
index 995d86fc8f83c..ea8924effe395 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <limits>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp
index 621320ca49e03..9c9365fa772ab 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <limits>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp
index 7776330c04979..b9450c0952e83 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp
index 258fdb77b8324..92f4d76664810 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp
index ea6530eae0086..e0c68578bc475 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp
index 80c77d6e32727..a0ec3221a3342 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp
index 145e616306d53..47b5c28b7e733 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp
index b409f58f08ec7..b88d9c252f7ea 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp
index 69d4d71f4e4f9..2c593a0561f8e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp
index c6ea33b1f15f9..c65b2102b5a01 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double fw(double x)
 {
     return 2*x;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp
index 3972715d0afb0..d9bf0880258d1 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_init_func.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 double f(double x)
 {
     return x*2;
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp
index 4a51902a00f4d..240c8480ff9d0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_ctor_iterator.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp
index e7a15d683059d..ee0562965282b 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp
index f6d65ddcc8e77..6797666ff3c7d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp
index c5697d7ce6485..49c3361bd12ab 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp
index 9d8bdf6fca13d..e1b182b0581c4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.samp/rand.dist.samp.plinear/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp
index e34abf08899ed..f266df69e54c2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp
index 796aebacc7dbe..f39d34d6a802c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp
index 8a4fd33589ba7..1e7a35e2af52c 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_int_int.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp
index 2546810ec5716..e6eae47913097 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp
index 47e1c89532d49..09794a12efd77 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
index a4e769bd77e65..e3ab0ab94de18 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp
index 77257a8656a82..234cd3655b833 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval_param.pass.cpp
@@ -19,6 +19,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp
index 62d144fe59528..da7230e86358e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp
index 3e029696508e6..f3b921afec3c9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp
index 58ac8571f5b11..f4740fb127ad6 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp
index 18b9b50457293..ae9609490ad07 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp
index 33d677a91312e..1127b80fe6035 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp
index 1eae36b9632f0..daea7055a2804 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp
index 6fbf499ca5e2c..0f95414bb7e12 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp
index b58a965676a56..aea4f26269137 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp
index b4844a64b01bd..d3a849fe94f33 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp
index 59b7b0728b5e9..4eaed99d95251 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp
index 60ff1cb3303a6..23152ae4cc99a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp
index e348c04ed60dc..fc6c9ee1788e6 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp
index 4fcc9efc3e6a8..8b52398a57ca4 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_int_int.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_int_int.pass.cpp
index c252c5bbbe34e..d377a37432d7d 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_int_int.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_int_int.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp
index 20df543ea3def..1403ebcfbcc3e 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/ctor_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp
index fb318626d5529..8f250ad09fa76 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eq.pass.cpp
@@ -19,6 +19,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp
index 99fe1f825ecaf..b28bc6fc7c1c3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval.pass.cpp
@@ -21,6 +21,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp
index 495f0e9c34a1a..2355c5fa105a5 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/eval_param.pass.cpp
@@ -19,6 +19,8 @@
 #include <numeric>
 #include <cstddef>
 
+#include "test_macros.h"
+
 template <class T>
 inline
 T
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp
index b603755452580..9b2e72d71b9d9 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/get_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp
index 1f6eb580657a6..e5abad268fddb 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/io.pass.cpp
@@ -25,6 +25,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp
index ea75181f80ea1..d33787c11bfb2 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/max.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp
index 46455e8b41664..63f1da5098204 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/min.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp
index ab4d5969377d4..13c588575adee 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp
index 24260405cfc49..12ca132310505 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp
index 15cad423ef00b..3ff34292b4cd3 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp
index 2127aebf482a0..19e05bd5b3b7a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_eq.pass.cpp
@@ -17,6 +17,8 @@
 #include <limits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp
index d2677580e346a..3f2309c651b79 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/param_types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp
index f651d72d1b618..97c14853f58b8 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/set_param.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp
index 431b7374a0bd4..4218f4ccc89bd 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.real/types.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/assign.pass.cpp
index cfaad1a33a5a9..12620848626fc 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T, T a, T c, T m>
 void
 test1()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/copy.pass.cpp
index 35585423442ed..5dac0772cb0e9 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T, T a, T c, T m>
 void
 test1()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp
index 4f2da2f612ad6..a273612460056 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_result_type.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test1()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_sseq.pass.cpp
index 7e82f6395f931..562f86b41728b 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/ctor_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/default.pass.cpp
index 372d98a656c71..10bc1d71d8e89 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T, T a, T c, T m>
 void
 test1()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/discard.pass.cpp
index dcbb3e0656be2..a60bd261af973 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/discard.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 rand0()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/eval.pass.cpp
index 3ee4d9113aceb..e7c7f15892630 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/eval.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 randu()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp
index b12de7b019036..6a93427f8e524 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/io.pass.cpp
@@ -27,6 +27,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/result_type.pass.cpp
index e0bb2e02cc17a..ebcc9736d71c3 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/result_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_result_type.pass.cpp
index 3103bf7efb521..f601d9542d3a8 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_result_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test1()
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_sseq.pass.cpp
index e7725bd0b581a..23a34be24bccb 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/seed_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/assign.pass.cpp
index 3e1002b061eb6..5682dd8a42a89 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/copy.pass.cpp
index b1273f1bbc32d..432f73453acbc 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/copy.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp
index 4eca3baad324a..a1f367f6aab54 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_result_type.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp
index b46029a292392..9c98e77ace74f 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq_all_zero.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq_all_zero.pass.cpp
index a2489cf63660a..6fdcb18ef8b00 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq_all_zero.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/ctor_sseq_all_zero.pass.cpp
@@ -27,6 +27,8 @@
 #include <cstddef>
 #if TEST_STD_VER >= 11
 #include <initializer_list>
+
+#include "test_macros.h"
 #endif
 
 struct all_zero_seed_seq {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/default.pass.cpp
index 35364f312a932..f65b99d4e9eec 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/default.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/discard.pass.cpp
index 750afb1e8a112..17913f9c6707c 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/discard.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/eval.pass.cpp
index 03e87dad8c45f..e08f1ac9c9833 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/eval.pass.cpp
@@ -19,6 +19,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp
index cbc764e8dd6eb..6cd1e34f2b579 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/io.pass.cpp
@@ -33,6 +33,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/result_type.pass.cpp
index 7987d65748e74..b407b092d437b 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/result_type.pass.cpp
@@ -20,6 +20,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_result_type.pass.cpp
index ff7d07374cbf1..3212a8f008304 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_result_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_sseq.pass.cpp
index 8ad2ec289f685..78285ac6baa7f 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.mers/seed_sseq.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/assign.pass.cpp
index e6cce1b2da70f..fa788b590253a 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/assign.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/assign.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/copy.pass.cpp
index c39f39858e417..ce7f382f83431 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/copy.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp
index 5a178e82e6f41..0a4c536aa8112 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_result_type.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp
index 0ed496e71af0c..08c7a3f6251f0 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/ctor_sseq.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/default.pass.cpp
index dbc4c5b4cb82c..0ee6a1522b45d 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/default.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/discard.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/discard.pass.cpp
index 4d039289bffbb..d111571a62bc7 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/discard.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/discard.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/eval.pass.cpp
index d8d03c09ee5a9..b41b9de5e55e8 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/eval.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp
index d3eeda3be4a8f..d5222842294d1 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/io.pass.cpp
@@ -27,6 +27,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/result_type.pass.cpp
index 5312bb1ff3aff..2406b775b8ae4 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/result_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_result_type.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_result_type.pass.cpp
index 637841a679bfd..7eef6b20b9fb1 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_result_type.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_result_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_sseq.pass.cpp b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_sseq.pass.cpp
index 85199c0ef3b85..99ce46f0035d3 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_sseq.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.sub/seed_sseq.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 void
 test1()
 {
diff --git a/libcxx/test/std/numerics/rand/rand.predef/knuth_b.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/knuth_b.pass.cpp
index d81f788e5e052..02a142b4d779a 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/knuth_b.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/knuth_b.pass.cpp
@@ -13,6 +13,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::knuth_b e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/minstd_rand.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/minstd_rand.pass.cpp
index d38b009e5f8b1..1115eb885dbf5 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/minstd_rand.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/minstd_rand.pass.cpp
@@ -14,6 +14,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::minstd_rand e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/minstd_rand0.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/minstd_rand0.pass.cpp
index 4e4f07eae781c..993b28c8611af 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/minstd_rand0.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/minstd_rand0.pass.cpp
@@ -14,6 +14,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::minstd_rand0 e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/mt19937.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/mt19937.pass.cpp
index 16390964f58a6..0e47b6cea15ed 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/mt19937.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/mt19937.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::mt19937 e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/mt19937_64.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/mt19937_64.pass.cpp
index c6a3b4ebdf91d..b2bd5d4e3c43e 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/mt19937_64.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/mt19937_64.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::mt19937_64 e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/ranlux24.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/ranlux24.pass.cpp
index 9805d1add87db..ba64854b16179 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/ranlux24.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/ranlux24.pass.cpp
@@ -13,6 +13,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ranlux24 e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/ranlux24_base.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/ranlux24_base.pass.cpp
index 21c68699bfaee..a6227fe0d1980 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/ranlux24_base.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/ranlux24_base.pass.cpp
@@ -13,6 +13,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ranlux24_base e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/ranlux48.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/ranlux48.pass.cpp
index 2ab37e1c72885..2799803f7f293 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/ranlux48.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/ranlux48.pass.cpp
@@ -13,6 +13,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ranlux48 e;
diff --git a/libcxx/test/std/numerics/rand/rand.predef/ranlux48_base.pass.cpp b/libcxx/test/std/numerics/rand/rand.predef/ranlux48_base.pass.cpp
index 93bb14c5c939d..a96bc53faece5 100644
--- a/libcxx/test/std/numerics/rand/rand.predef/ranlux48_base.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.predef/ranlux48_base.pass.cpp
@@ -13,6 +13,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ranlux48_base e;
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp
index a05c0846af70b..1693aa28971f2 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.canonical/generate_canonical.pass.cpp
@@ -14,6 +14,7 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
 #include "truncate_fp.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
index 87608e63c04dc..33f855bab30c3 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::seed_seq s;
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.pass.cpp
index e328777782b8b..c7a4779bff12d 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/initializer_list.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/initializer_list.pass.cpp
index 656981b1a9303..a227ff69028c9 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/initializer_list.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/initializer_list.pass.cpp
@@ -18,6 +18,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::seed_seq s= {5, 4, 3, 2, 1};
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
index 10f7b34727e37..2e2c6365eb4f0 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
@@ -16,6 +16,8 @@
 #include <random>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     unsigned a[5] = {5, 4, 3, 2, 1};
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/types.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/types.pass.cpp
index 50cfa7d59ccd9..82a12517c7b9b 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/types.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <random>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::seed_seq::result_type, std::uint_least32_t>::value), "");
diff --git a/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
index 5190357209a13..9f3c44a06d262 100644
--- a/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.match/inverted_character_classes.pass.cpp
@@ -14,6 +14,8 @@
 #include <cassert>
 #include <regex>
 
+#include "test_macros.h"
+
 
 int main(int, char**) {
     assert(std::regex_match("X", std::regex("[X]")));
diff --git a/libcxx/test/std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp
index 63fd5ae57aec9..9334bcc5b2b09 100644
--- a/libcxx/test/std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp
+++ b/libcxx/test/std/re/re.alg/re.alg.search/invert_neg_word_search.pass.cpp
@@ -18,6 +18,8 @@
 #include <regex>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 // PR34310
 int main(int, char**)
diff --git a/libcxx/test/std/re/re.regex/re.regex.construct/ptr_size.pass.cpp b/libcxx/test/std/re/re.regex/re.regex.construct/ptr_size.pass.cpp
index 29fa3ca57ac2d..1d99069f1b0de 100644
--- a/libcxx/test/std/re/re.regex/re.regex.construct/ptr_size.pass.cpp
+++ b/libcxx/test/std/re/re.regex/re.regex.construct/ptr_size.pass.cpp
@@ -15,6 +15,8 @@
 #include <regex>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class CharT>
 void
 test(const CharT* p, std::size_t len, unsigned mc)
diff --git a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
index 0fecb1bb081ff..eba6659da638e 100644
--- a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/strings/basic.string.literals/literal1.pass.cpp b/libcxx/test/std/strings/basic.string.literals/literal1.pass.cpp
index 92777c4f428fe..74d8cb33523f2 100644
--- a/libcxx/test/std/strings/basic.string.literals/literal1.pass.cpp
+++ b/libcxx/test/std/strings/basic.string.literals/literal1.pass.cpp
@@ -12,6 +12,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals;
diff --git a/libcxx/test/std/strings/basic.string.literals/literal2.pass.cpp b/libcxx/test/std/strings/basic.string.literals/literal2.pass.cpp
index 6f73ae9a10376..2afc150ddc6b1 100644
--- a/libcxx/test/std/strings/basic.string.literals/literal2.pass.cpp
+++ b/libcxx/test/std/strings/basic.string.literals/literal2.pass.cpp
@@ -12,6 +12,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals::string_literals;
diff --git a/libcxx/test/std/strings/basic.string.literals/literal3.pass.cpp b/libcxx/test/std/strings/basic.string.literals/literal3.pass.cpp
index b7a8d55368753..75390e5ae9032 100644
--- a/libcxx/test/std/strings/basic.string.literals/literal3.pass.cpp
+++ b/libcxx/test/std/strings/basic.string.literals/literal3.pass.cpp
@@ -12,6 +12,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std;
diff --git a/libcxx/test/std/strings/basic.string/string.access/back.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/back.pass.cpp
index 69b88cce886ee..e2a0c2679889c 100644
--- a/libcxx/test/std/strings/basic.string/string.access/back.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/back.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp
index e1cf707a6f0b1..548bf01087569 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_back.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp
index e3e6db525e4c3..47cda4983bf63 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_cback.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp
index c9b2ba7c88ed4..12b5e511b241d 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_cfront.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp
index c7b430efc9834..a369cb6234f92 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_cindex.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp
index 73db22405103a..6f8dcd5df7c99 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_front.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp
index d3f2e8d28d0eb..bc2502316dab7 100644
--- a/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/db_index.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.access/front.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/front.pass.cpp
index c1ce626b07f6f..eaaa678126ae4 100644
--- a/libcxx/test/std/strings/basic.string/string.access/front.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/front.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp b/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp
index ec02fa4c32b06..782d92ef5a8dc 100644
--- a/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.access/index.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/clear.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/clear.pass.cpp
index 914842bb7a653..62b3627edf6cd 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/clear.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/clear.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/length.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/length.pass.cpp
index b61ec488e27e3..df3cdc0f86528 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/length.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/length.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp
index 8f8c9a3fb428c..32f0be2775a1c 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp
@@ -21,6 +21,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
index 9832df536c9ad..17d689ee53f76 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp
@@ -21,6 +21,7 @@
 #include <cassert>
 #include <stdexcept>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/size.pass.cpp
index f3f89a5a6d861..5c7fe29f7346d 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.cons/initializer_list.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/initializer_list.pass.cpp
index a106203d49ca4..6e21b02895ddf 100644
--- a/libcxx/test/std/strings/basic.string/string.cons/initializer_list.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.cons/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/strings/basic.string/string.cons/initializer_list_assignment.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/initializer_list_assignment.pass.cpp
index dcb9bb911b439..53440494c0779 100644
--- a/libcxx/test/std/strings/basic.string/string.cons/initializer_list_assignment.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.cons/initializer_list_assignment.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/begin.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/begin.pass.cpp
index fbae9fab7d2f1..8fa5e67f7e839 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/begin.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/begin.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/cbegin.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/cbegin.pass.cpp
index 9886d56bbd98b..78216dcdbd035 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/cbegin.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/cbegin.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/cend.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/cend.pass.cpp
index 1a3d30775f2ef..0af2c0aeffedc 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/cend.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/cend.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/crbegin.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/crbegin.pass.cpp
index 687c34368ae8d..4476f88a055de 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/crbegin.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/crbegin.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/crend.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/crend.pass.cpp
index 86aaad699d4c5..6cd57aa53a410 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/crend.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/crend.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp
index 0b496262fffa7..41323e0458d45 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_2.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp
index 0782461b58433..ea0f2866310c9 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_3.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp
index 1a46f86c28d76..07b1b825c65a8 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_4.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp
index 77caf1b814a16..a183282f40909 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_5.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp
index 126c3661c3925..a24c370dea226 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_6.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp
index f1083a43954c8..16a20878658d5 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_7.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp
index 0420f5ebcdbd8..2275020b4d73b 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/db_iterators_8.pass.cpp
@@ -20,6 +20,7 @@
 #include <exception>
 #include <cstdlib>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/end.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/end.pass.cpp
index 86b00a3706d15..079bfdfef439e 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/end.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/end.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
index 08448b1fd89f5..e2f3d3a7cf98c 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
@@ -20,6 +20,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     { // N3644 testing
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/rbegin.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/rbegin.pass.cpp
index 479584c3708b9..e20b10df4f84c 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/rbegin.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/rbegin.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/rend.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/rend.pass.cpp
index 9b54058a60d8a..fdeb9098e2f3a 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/rend.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/rend.pass.cpp
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp
index 8f280e828d905..d6112601f2e09 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/iterator.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_initializer_list.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_initializer_list.pass.cpp
index 6dd043c6db93a..bbeca11141667 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_initializer_list.pass.cpp
@@ -16,6 +16,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp
index 6d4f1a6686e9e..d52a579349df0 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_insert/iter_iter_iter.pass.cpp
@@ -18,6 +18,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_op_plus_equal/initializer_list.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_op_plus_equal/initializer_list.pass.cpp
index 689389eb7a64d..7c961eb90b590 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_op_plus_equal/initializer_list.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_op_plus_equal/initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_initializer_list.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_initializer_list.pass.cpp
index 3df49f32535b9..8f17d6e4c5a26 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_initializer_list.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_initializer_list.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp
index 47a1193be7327..1d4a46b7c886f 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_iter_iter.pass.cpp
@@ -16,6 +16,7 @@
 #include <iterator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "test_iterators.h"
 
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_pointer.pass.cpp
index 8ed1dc3df3059..37d51d45af5b2 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/iter_iter_pointer.pass.cpp
@@ -17,6 +17,7 @@
 #include <algorithm>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp
index b1511f21192d5..a90bd8b8dbe81 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_delim_rv.pass.cpp
@@ -19,6 +19,7 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp
index cf4772c43d355..7abf14e43c610 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line_rv.pass.cpp
@@ -19,6 +19,7 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/stream_insert.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/stream_insert.pass.cpp
index b30fd5afaf5f9..b60fc338b2f56 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/stream_insert.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/stream_insert.pass.cpp
@@ -17,6 +17,7 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/pointer_string.pass.cpp
index bf2cc8413ab12..3de398264d4dc 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_pointer.pass.cpp
index 76e0abe23784c..220bde69caf00 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string.pass.cpp
index 30aeb501a5927..51d9f60629cbe 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string_view.pass.cpp
index 5b01455e7f8bb..80ef8daf2c039 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_view_string.pass.cpp
index 2d6957e3494c3..bd81bb1667065 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_op!=/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/pointer_string.pass.cpp
index 5db04b3a8d679..0efd829ee0d17 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_pointer.pass.cpp
index aa79e17d12193..ae0d3e88d5faa 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string.pass.cpp
index 357a91fd9f595..387c0016dc15b 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string_view.pass.cpp
index 06c16d0707912..a525c752d6cd1 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_view_string.pass.cpp
index 7ebdc0934930e..793fa2388b274 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_operator==/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/pointer_string.pass.cpp
index db28cd4d1b464..2e96167b41997 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_pointer.pass.cpp
index 56521e91662be..57c979d0caafa 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string.pass.cpp
index 0cfafb80911eb..cc6f331756f96 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string_view.pass.cpp
index 692fbd4fe8168..7f6ad7d3bd210 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_view_string.pass.cpp
index f6a2fa55588f3..b96ee17aa2e60 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/pointer_string.pass.cpp
index e868531a6833a..9082f36c94882 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_pointer.pass.cpp
index 8aad50782472c..72ed73aa2d8c3 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string.pass.cpp
index f0ab16cbb62af..604698a8d0c10 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string_view.pass.cpp
index c7eb3e3e45ec9..131f83ea44648 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_view_string.pass.cpp
index b49c1348a9909..45c0ed11eed02 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_opgt=/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/pointer_string.pass.cpp
index 1609c4d87d997..5e96bf37b3450 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_pointer.pass.cpp
index 079344bb8295d..f01b69e620b3f 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string.pass.cpp
index 586faa40f0734..b6a61f7034f6c 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string_view.pass.cpp
index 79393def5ddde..fa81ae88fbeed 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_view_string.pass.cpp
index 7e8b139c332a3..54f8015442a02 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/pointer_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/pointer_string.pass.cpp
index cae9233dfb261..5c43125974e95 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/pointer_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/pointer_string.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_pointer.pass.cpp
index 07f9b355185e5..3b1dc2cb8a165 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_pointer.pass.cpp
@@ -14,6 +14,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string.pass.cpp
index 8d40f2f0a5b1f..219ed5101b298 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string.pass.cpp
@@ -15,6 +15,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string_view.pass.cpp
index 3df9e39cc0da6..52806ae60bdd6 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_view_string.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_view_string.pass.cpp
index cb7b1a4c451b7..961fc0e5b87a2 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_view_string.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string_oplt=/string_view_string.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string.accessors/c_str.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string.accessors/c_str.pass.cpp
index 2677230c2d528..ee7f4c4718af7 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string.accessors/c_str.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string.accessors/c_str.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string.accessors/get_allocator.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string.accessors/get_allocator.pass.cpp
index 7edf5a88d9db1..652e995e1c47f 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string.accessors/get_allocator.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string.accessors/get_allocator.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_compare/pointer.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_compare/pointer.pass.cpp
index 0af8ce02f9534..eac5d0b739388 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_compare/pointer.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_compare/pointer.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int sign(int x)
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_compare/string_view.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_compare/string_view.pass.cpp
index 2abc7fc1ac189..f2cec20888094 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_compare/string_view.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_compare/string_view.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int sign(int x)
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/char_size.pass.cpp
index 7bada8446ffa2..9d2d0f729a263 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size.pass.cpp
index bd20ea1bb4a1d..c1556b1b21fca 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size_size.pass.cpp
index 46c5511d9aefa..de629093b66c1 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/string_view_size.pass.cpp
index 54ce737ecc608..eea47227e9c7d 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.not.of/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/char_size.pass.cpp
index da630f07e762f..9e8dfdafb59c3 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size.pass.cpp
index 2162ea14cf1f8..fd37402c5a084 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size_size.pass.cpp
index e0bb0f2fb777d..145471628a168 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/string_view_size.pass.cpp
index 8cd272408a02c..33bc971ae479d 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.first.of/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/char_size.pass.cpp
index fd77eb89d86fe..c2950250b0876 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size.pass.cpp
index 1234ccb4de205..a5ac3e8ec9701 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size_size.pass.cpp
index 4c07f4d6129ba..5bc5a8cf203ce 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/string_view_size.pass.cpp
index da5054b7df658..7952906155876 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.not.of/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/char_size.pass.cpp
index 4292b1eb8bff9..8ea48fe0dba1f 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size.pass.cpp
index 0b86a10c5122d..8045ac056d04a 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size_size.pass.cpp
index 9c5f670a9c723..855139faaa4dd 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/string_view_size.pass.cpp
index ccf181faa5559..63f03e74fa01f 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find.last.of/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find/char_size.pass.cpp
index 5700d09575fbb..6f92ef12d551c 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size.pass.cpp
index 6e7ae3d41f4b5..1cc6cb96a40ba 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size_size.pass.cpp
index fad7507173da2..d640be7218fe3 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_find/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_find/string_view_size.pass.cpp
index c1e78af10250a..08baa745ea51f 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_find/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_find/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/char_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/char_size.pass.cpp
index 46ced31239188..e3484bda79ed2 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/char_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/char_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size.pass.cpp
index 715b5e5785566..b77d808f9baed 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size_size.pass.cpp
index e96700952277f..8e4e32e52a5f9 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/pointer_size_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S>
diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/string_view_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/string_view_size.pass.cpp
index ea4f9c2059709..7d0c4ff5d9be5 100644
--- a/libcxx/test/std/strings/basic.string/string.ops/string_rfind/string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.ops/string_rfind/string_view_size.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 template <class S, class SV>
diff --git a/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp b/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp
index be090c90e1772..af34fc97ff5b2 100644
--- a/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp
@@ -13,6 +13,7 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/strings/basic.string/types.pass.cpp b/libcxx/test/std/strings/basic.string/types.pass.cpp
index 5aa2c7cf8bebd..910b3364d6b89 100644
--- a/libcxx/test/std/strings/basic.string/types.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/types.pass.cpp
@@ -35,6 +35,7 @@
 #include <iterator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_traits.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/assign3.pass.cpp
index e2cbe3d5dbe0d..cc10294b0140d 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/assign3.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/assign3.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     char s2[3] = {0};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/copy.pass.cpp
index 3a8e3bda6bc17..7e76c0bbd3db2 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/copy.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     char s1[] = {1, 2, 3};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eof.pass.cpp
index 0ea0995da1fd0..93eb2a68dbfc0 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<char>::eof() == EOF);
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq.pass.cpp
index f0cef792d1456..e379848a1acde 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<char>::eq('a', 'a'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq_int_type.pass.cpp
index 0fbb4389c6015..4c82d650480be 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq_int_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/eq_int_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert( std::char_traits<char>::eq_int_type('a', 'a'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp
index e62090cf4c467..fa14e666e9e90 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/lt.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert( std::char_traits<char>::lt('\0', 'A'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/move.pass.cpp
index ecbb61901a908..4a132649d5038 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/move.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/move.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     char s1[] = {1, 2, 3};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/not_eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/not_eof.pass.cpp
index 1c37a7c2efbaf..a03479b08b0c4 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/not_eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/not_eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<char>::not_eof('a') == 'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_char_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_char_type.pass.cpp
index d8a957016f9d8..079a83e766b3e 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_char_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_char_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<char>::to_char_type('a') == 'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_int_type.pass.cpp
index dd903f9e6c02d..525395de4afe0 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_int_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/to_int_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<char>::to_int_type('a') == 'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/types.pass.cpp
index 30d31ebc920fb..ffbb73510b1e5 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/types.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char/types.pass.cpp
@@ -19,6 +19,8 @@
 #include <string>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::char_traits<char>::char_type, char>::value), "");
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/assign3.pass.cpp
index 8838b81e14b4f..77207826aabff 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/assign3.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/assign3.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/copy.pass.cpp
index 102f15acdee3e..bedc55db0ed31 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/copy.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/eof.pass.cpp
index c80b0792a4923..502432bc17ef6 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/move.pass.cpp
index aa55e0d11ce90..f822437809f4b 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/move.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/move.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/types.pass.cpp
index 41e3fd4d57b1a..415ddd46c3a25 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/types.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char16_t/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cstdint>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/assign3.pass.cpp
index 26985481c1f4f..c63d025438583 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/assign3.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/assign3.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/copy.pass.cpp
index 8b2d6ce907428..7f9b383b0879c 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/copy.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/eof.pass.cpp
index 5c28f47bb4839..32535fe09e15e 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/move.pass.cpp
index 7cda99bd8300e..6d335fa3c1010 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/move.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/move.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/types.pass.cpp
index 5ec558f54700c..0297da0ba08cc 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/types.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char32_t/types.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cstdint>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign3.pass.cpp
index f9c176e9b2f50..426ca075dce74 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign3.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign3.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/copy.pass.cpp
index 7bf949197a84c..bfd25c93c44f2 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/copy.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/copy.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/eof.pass.cpp
index e112556886dcf..6b5a0a5fe486d 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/eof.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/move.pass.cpp
index a5e1359dd4d77..ffb81fd30f107 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/move.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/move.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/types.pass.cpp
index 245dcd87d99cf..94db0a79ac255 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/types.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cstdint>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp
index d4ed434094581..373d374b33d5a 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/assign3.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     wchar_t s2[3] = {0};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp
index 309c21316c434..3665297cd1663 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/copy.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     wchar_t s1[] = {1, 2, 3};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp
index 6190220fe9012..cc4f1eac923f6 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<wchar_t>::eof() == WEOF);
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp
index 701a6502fc6e2..400c293267b69 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<wchar_t>::eq(L'a', L'a'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp
index b218186b43f80..fbb1709000dfa 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/eq_int_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert( std::char_traits<wchar_t>::eq_int_type(L'a', L'a'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp
index 9abd9cf59d7a7..15a16b0cd92da 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/lt.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(!std::char_traits<wchar_t>::lt(L'a', L'a'));
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp
index 341a90233a838..67610cb141be7 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/move.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     wchar_t s1[] = {1, 2, 3};
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp
index 92f08b1d52004..6ae2763b83ff5 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/not_eof.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<wchar_t>::not_eof(L'a') == L'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp
index f479bec9daf40..6d5ef1122d549 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_char_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<wchar_t>::to_char_type(L'a') == L'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp
index 11fe2419b1a59..74f8e68dec1ea 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/to_int_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::char_traits<wchar_t>::to_int_type(L'a') == L'a');
diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp
index 9781d55e392ba..810a142854acc 100644
--- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp
+++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.wchar.t/types.pass.cpp
@@ -19,6 +19,8 @@
 #include <string>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::char_traits<wchar_t>::char_type, wchar_t>::value), "");
diff --git a/libcxx/test/std/strings/string.conversions/to_string.pass.cpp b/libcxx/test/std/strings/string.conversions/to_string.pass.cpp
index 23729cd4f49ed..4dafcabdba8e9 100644
--- a/libcxx/test/std/strings/string.conversions/to_string.pass.cpp
+++ b/libcxx/test/std/strings/string.conversions/to_string.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <sstream>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test_signed()
diff --git a/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp b/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp
index 02a262a0f29ba..53ef0789a5541 100644
--- a/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp
+++ b/libcxx/test/std/strings/string.conversions/to_wstring.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <sstream>
 
+#include "test_macros.h"
+
 template <class T>
 void
 test_signed()
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/opeq.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/opeq.string_view.string.pass.cpp
index d27d4c4458825..33113218064a8 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/opeq.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/opeq.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const std::string &lhs, S rhs, bool x)
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/opge.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/opge.string_view.string.pass.cpp
index dddaa390ed379..dd81e8092f473 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/opge.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/opge.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& lhs, const typename S::value_type* rhs, bool x, bool y)
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/opgt.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/opgt.string_view.string.pass.cpp
index 84c9478bfc5d5..0d7703aae9bd2 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/opgt.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/opgt.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& lhs, const typename S::value_type* rhs, bool x, bool y)
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/ople.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/ople.string_view.string.pass.cpp
index 80e80757bf9f5..117c042d0fc62 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/ople.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/ople.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& lhs, const typename S::value_type* rhs, bool x, bool y)
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/oplt.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/oplt.string_view.string.pass.cpp
index f611bac7a846b..cfd18f288fa64 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/oplt.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/oplt.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& lhs, const typename S::value_type* rhs, bool x, bool y)
diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/opne.string_view.string.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/opne.string_view.string.pass.cpp
index 613eaf7b32f9c..0e987dc446529 100644
--- a/libcxx/test/std/strings/string.view/string.view.comparison/opne.string_view.string.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.comparison/opne.string_view.string.pass.cpp
@@ -17,6 +17,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const std::string &lhs, S rhs, bool x)
diff --git a/libcxx/test/std/strings/string.view/string.view.find/find_first_not_of_string_view_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.find/find_first_not_of_string_view_size.pass.cpp
index 37445b578c72b..1408ffb8cfdb3 100644
--- a/libcxx/test/std/strings/string.view/string.view.find/find_first_not_of_string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.find/find_first_not_of_string_view_size.pass.cpp
@@ -13,6 +13,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& s, const S& str, typename S::size_type pos, typename S::size_type x)
diff --git a/libcxx/test/std/strings/string.view/string.view.find/find_first_of_string_view_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.find/find_first_of_string_view_size.pass.cpp
index c705f0266d867..aad1094538d3e 100644
--- a/libcxx/test/std/strings/string.view/string.view.find/find_first_of_string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.find/find_first_of_string_view_size.pass.cpp
@@ -13,6 +13,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& s, const S& str, typename S::size_type pos, typename S::size_type x)
diff --git a/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_string_view_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_string_view_size.pass.cpp
index e90e38c0d7085..289d5e314d748 100644
--- a/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.find/find_last_not_of_string_view_size.pass.cpp
@@ -13,6 +13,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& s, const S& str, typename S::size_type pos, typename S::size_type x)
diff --git a/libcxx/test/std/strings/string.view/string.view.find/find_last_of_string_view_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.find/find_last_of_string_view_size.pass.cpp
index 02c1184eec8a7..c60fc00347c0b 100644
--- a/libcxx/test/std/strings/string.view/string.view.find/find_last_of_string_view_size.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.find/find_last_of_string_view_size.pass.cpp
@@ -13,6 +13,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class S>
 void
 test(const S& s, const S& str, typename S::size_type pos, typename S::size_type x)
diff --git a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
index d52f16303b6ab..8363b003e7200 100644
--- a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp b/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp
index d4dcbdc6b0a7d..c427e4a84403a 100644
--- a/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.io/stream_insert.pass.cpp
@@ -17,6 +17,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::string_view;
 using std::wstring_view;
 
diff --git a/libcxx/test/std/strings/string.view/string.view.synop/nothing_to_do.pass.cpp b/libcxx/test/std/strings/string.view/string.view.synop/nothing_to_do.pass.cpp
index 45edec7f4b068..9ab36cbdfe9a5 100644
--- a/libcxx/test/std/strings/string.view/string.view.synop/nothing_to_do.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.synop/nothing_to_do.pass.cpp
@@ -8,6 +8,8 @@
 
 #include <string_view>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   return 0;
 }
diff --git a/libcxx/test/std/strings/string.view/string.view.template/nothing_to_do.pass.cpp b/libcxx/test/std/strings/string.view/string.view.template/nothing_to_do.pass.cpp
index 45edec7f4b068..9ab36cbdfe9a5 100644
--- a/libcxx/test/std/strings/string.view/string.view.template/nothing_to_do.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string.view.template/nothing_to_do.pass.cpp
@@ -8,6 +8,8 @@
 
 #include <string_view>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   return 0;
 }
diff --git a/libcxx/test/std/strings/string.view/string_view.literals/literal1.pass.cpp b/libcxx/test/std/strings/string.view/string_view.literals/literal1.pass.cpp
index ba667e09cd189..7e23a70eba486 100644
--- a/libcxx/test/std/strings/string.view/string_view.literals/literal1.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string_view.literals/literal1.pass.cpp
@@ -15,6 +15,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals;
diff --git a/libcxx/test/std/strings/string.view/string_view.literals/literal2.pass.cpp b/libcxx/test/std/strings/string.view/string_view.literals/literal2.pass.cpp
index cb49280bfec59..88677d2b433fe 100644
--- a/libcxx/test/std/strings/string.view/string_view.literals/literal2.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string_view.literals/literal2.pass.cpp
@@ -15,6 +15,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals::string_view_literals;
diff --git a/libcxx/test/std/strings/string.view/string_view.literals/literal3.pass.cpp b/libcxx/test/std/strings/string.view/string_view.literals/literal3.pass.cpp
index 710933dd95a48..810802365f352 100644
--- a/libcxx/test/std/strings/string.view/string_view.literals/literal3.pass.cpp
+++ b/libcxx/test/std/strings/string.view/string_view.literals/literal3.pass.cpp
@@ -15,6 +15,8 @@
 #include <string_view>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std;
diff --git a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp
index 826704a75d363..25708b819ac35 100644
--- a/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.async/async_race.38682.pass.cpp
@@ -29,6 +29,8 @@
 #include <numeric>
 #include <vector>
 
+#include "test_macros.h"
+
 
 static int worker(std::vector<int> const& data) {
   return std::accumulate(data.begin(), data.end(), 0);
diff --git a/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp b/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp
index 9da57e38ae93f..4562169f68dfd 100644
--- a/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.async/async_race.pass.cpp
@@ -28,6 +28,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int f_async() {
     typedef std::chrono::milliseconds ms;
     std::this_thread::sleep_for(ms(200));
diff --git a/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp
index fbb7eb13d92fd..b712254fed127 100644
--- a/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/default_error_condition.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::future_category();
diff --git a/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp
index 3ba3410942dac..379191dac068e 100644
--- a/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/equivalent_error_code_int.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::future_category();
diff --git a/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp
index 9d0e1cf31ba89..9c0dade18e896 100644
--- a/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/equivalent_int_error_condition.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& e_cat = std::future_category();
diff --git a/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp
index 7b9d72344251a..3e5aed2e670b4 100644
--- a/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/future_category.pass.cpp
@@ -16,6 +16,8 @@
 #include <cstring>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::error_category& ec = std::future_category();
diff --git a/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp
index d9e50bf42d347..c28e157704701 100644
--- a/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/make_error_code.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp b/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp
index d055591024657..31997dcd564ea 100644
--- a/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.errors/make_error_condition.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp b/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp
index edf18ba5a9a02..d8ea4ce2de9d5 100644
--- a/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.future_error/types.pass.cpp
@@ -15,6 +15,8 @@
 #include <future>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_convertible<std::future_error*,
diff --git a/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp
index d7840f45ce6bf..35e1de9dbe2a7 100644
--- a/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.overview/future_errc.pass.cpp
@@ -23,6 +23,8 @@
 
 #include <future>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::future_errc::broken_promise != std::future_errc::future_already_retrieved, "");
diff --git a/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp b/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp
index ceff64f7ebb2d..3b6304d474507 100644
--- a/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.overview/future_status.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <future>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(static_cast<int>(std::future_status::ready) == 0, "");
diff --git a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp
index ece8b94a5632a..2386510d31f36 100644
--- a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp
@@ -19,6 +19,7 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp
index 600f99dd992fa..e9b9251906f0d 100644
--- a/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/default.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp
index 030620ad43f85..b634858d22db1 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_exception.pass.cpp
@@ -19,6 +19,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
index a1a32882beeda..dede79674031f 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp
@@ -19,6 +19,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 void func(std::promise<int> p)
 {
     p.set_exception_at_thread_exit(std::make_exception_ptr(3));
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp
index 9c3b09086b3da..a9bde3e0bf68b 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int i = 0;
 
 void func(std::promise<int&> p)
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp
index 7f54baa8cec92..e5f7d53c8b246 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_rvalue.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     A() {}
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp
index bddd661355699..d6ea501c7e30a 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_rvalue_at_thread_exit.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 void func(std::promise<std::unique_ptr<int>> p)
 {
     p.set_value_at_thread_exit(std::unique_ptr<int>(new int(5)));
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp
index 9258a001149ad..61db7cd97c2b8 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_const.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 void func(std::promise<int> p)
 {
     const int i = 5;
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp
index 1a204421ee929..6314c95a82bf5 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_value_at_thread_exit_void.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int i = 0;
 
 void func(std::promise<void> p)
diff --git a/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp
index d505b3aabf4b9..16f1022b10c4b 100644
--- a/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/set_value_void.pass.cpp
@@ -19,6 +19,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp
index 2b78b1d384b4b..94aadfb328fa4 100644
--- a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp
@@ -20,6 +20,7 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp
index 1a5028bce3ac2..ad75ff033b676 100644
--- a/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.promise/uses_allocator.pass.cpp
@@ -17,6 +17,7 @@
 //      : true_type { };
 
 #include <future>
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp
index b75450cb9e97d..cd22560d84d8a 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/ctor_future.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp
index 0387b97a70c8c..90f164203724a 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp
index fe49c2208ee97..6eef6150403ec 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp
@@ -19,6 +19,7 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp
index 3940530528db7..e220497094c96 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp
index e1d982d0ee10a..21579b299fd8d 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/move_ctor.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp
index f78ca6bfc4205..34a71660440a5 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 void func1(std::promise<int> p)
 {
     std::this_thread::sleep_for(std::chrono::milliseconds(500));
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp
index 913127af3272a..74d91bf9a1b64 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/wait_for.pass.cpp
@@ -20,6 +20,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::chrono::milliseconds ms;
 
 void func1(std::promise<int> p)
diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp
index 09787fedc3a63..e45125901d934 100644
--- a/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.shared_future/wait_until.pass.cpp
@@ -23,6 +23,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 enum class WorkerThreadState { Uninitialized, AllowedToRun, Exiting };
 typedef std::chrono::milliseconds ms;
 
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp
index 9da7a96e241c1..d1566489a760a 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/assign_move.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     long data_;
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp
index 5472c717adce4..ef820cb9ae89f 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_default.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp
index 20ee8b4b4d2d6..9ac24baf35584 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func.pass.cpp
@@ -19,6 +19,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     long data_;
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp
index 766987ce0dfc3..38b8e4dabea14 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp
@@ -21,6 +21,7 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp
index c517182d3ad78..f7121269a834d 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_move.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     long data_;
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp
index 2cd97900df721..8e9ee59992ac2 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/swap.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     long data_;
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp
index 8c1c19eca6208..23a0c4d6f37fe 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/swap.pass.cpp
@@ -20,6 +20,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     long data_;
diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.pass.cpp
index 5257a7008cd2d..4eb16f52958d8 100644
--- a/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.task/futures.task.nonmembers/uses_allocator.pass.cpp
@@ -25,6 +25,7 @@
 //      : true_type { };
 
 #include <future>
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp
index 60ef645e3c955..d3dc86e3c4095 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp
index ec27219daffbf..2f09ffa02da58 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp
@@ -19,6 +19,7 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp
index b0f0e2b5c54b2..a626ed870ad15 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/move_assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp
index aca5dda64f59b..8ceb9e5a0306f 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/move_ctor.pass.cpp
@@ -17,6 +17,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp
index 979f93cccf7b7..bcf22cfb83284 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/share.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp
index 11fc80868f6d4..956e62eb41aa8 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp
@@ -18,6 +18,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 void func1(std::promise<int> p)
 {
     std::this_thread::sleep_for(std::chrono::milliseconds(500));
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp
index 91f962fd18f81..2bc05664681b5 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/wait_for.pass.cpp
@@ -20,6 +20,8 @@
 #include <future>
 #include <cassert>
 
+#include "test_macros.h"
+
 typedef std::chrono::milliseconds ms;
 
 void func1(std::promise<int> p)
diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp
index 28d9b638aed2d..5ff7da5779fcf 100644
--- a/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp
+++ b/libcxx/test/std/thread/futures/futures.unique_future/wait_until.pass.cpp
@@ -21,6 +21,8 @@
 #include <atomic>
 #include <cassert>
 
+#include "test_macros.h"
+
 enum class WorkerThreadState { Uninitialized, AllowedToRun, Exiting };
 typedef std::chrono::milliseconds ms;
 
diff --git a/libcxx/test/std/thread/macro.pass.cpp b/libcxx/test/std/thread/macro.pass.cpp
index 640db4aaa3ef1..0c16a0d0516df 100644
--- a/libcxx/test/std/thread/macro.pass.cpp
+++ b/libcxx/test/std/thread/macro.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <thread>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 #ifndef __STDCPP_THREADS__
diff --git a/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp b/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp
index af980c3eed365..1de9bc7e22175 100644
--- a/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/cv_status.pass.cpp
@@ -15,6 +15,8 @@
 #include <condition_variable>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(static_cast<int>(std::cv_status::no_timeout) == 0);
diff --git a/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp b/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp
index 9a0e51e3bfcf0..8dd6d630a79c5 100644
--- a/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/notify_all_at_thread_exit.pass.cpp
@@ -23,6 +23,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp
index aab97f9e14a76..a734ce9d6b424 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <condition_variable>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::condition_variable cv;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp
index 6550109fd7954..1c4eeb94d01b4 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/destructor.pass.cpp
@@ -19,6 +19,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable* cv;
 std::mutex m;
 typedef std::unique_lock<std::mutex> Lock;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp
index 46c53a863f581..f0f614d1c24ed 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_all.pass.cpp
@@ -19,6 +19,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
index eb1de67db9fa2..07f9868177a8c 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
@@ -20,6 +20,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 std::condition_variable cv;
 std::mutex mut;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp
index 03bcfeea94d76..e89bb313b47c4 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait.pass.cpp
@@ -19,6 +19,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp
index 505997fff4630..1bb74a30d45f4 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for.pass.cpp
@@ -25,6 +25,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp
index e92ce4583c554..f3322b9b7c041 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_for_pred.pass.cpp
@@ -24,6 +24,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 class Pred
 {
     int& i_;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp
index 0de8524ed1ed8..c4034fc2b215d 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_pred.pass.cpp
@@ -21,6 +21,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable cv;
 std::mutex mut;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp
index 7f1bdf827cbff..12ccf3f1c06e0 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until.pass.cpp
@@ -23,6 +23,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Clock
 {
     typedef std::chrono::milliseconds duration;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp
index f21b1b54bc66a..fde6f98ab9858 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/wait_until_pred.pass.cpp
@@ -26,6 +26,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Clock
 {
     typedef std::chrono::milliseconds duration;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp
index 0c35da0321b2f..29e8a8ed65371 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <condition_variable>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::condition_variable_any cv;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp
index 35580d4293b40..4b9d719ec5d77 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/destructor.pass.cpp
@@ -19,6 +19,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any* cv;
 std::mutex m;
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp
index d12c936028859..0a3328bca9230 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_all.pass.cpp
@@ -19,6 +19,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any cv;
 
 typedef std::timed_mutex L0;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp
index 27a0f87e59fe7..38b0cba8fc100 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/notify_one.pass.cpp
@@ -21,6 +21,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any cv;
 
 typedef std::timed_mutex L0;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp
index a3b2e87c9e9eb..291fcad827d98 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait.pass.cpp
@@ -20,6 +20,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any cv;
 
 typedef std::timed_mutex L0;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp
index d472a698feca6..a8e036d94da6e 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for.pass.cpp
@@ -24,6 +24,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any cv;
 
 typedef std::timed_mutex L0;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp
index cbf0193ade392..7b91efaa950bc 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_for_pred.pass.cpp
@@ -23,6 +23,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 class Pred
 {
     int& i_;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp
index eafc434d0c283..41bcd80794edb 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_pred.pass.cpp
@@ -21,6 +21,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::condition_variable_any cv;
 
 typedef std::timed_mutex L0;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp
index e14944906b710..de48880c7683c 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until.pass.cpp
@@ -22,6 +22,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Clock
 {
     typedef std::chrono::milliseconds duration;
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp
index 5eb253a75fc56..7cb48a6f68ec0 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvarany/wait_until_pred.pass.cpp
@@ -26,6 +26,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Clock
 {
     typedef std::chrono::milliseconds duration;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp
index fc76eb34eb4ae..dc0719596c69f 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/adopt_lock.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp
index b9cdb4dec5df2..03d7351b4a5b5 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.guard/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <mutex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::lock_guard<std::mutex>::mutex_type,
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
index dfa6ccb14b908..ace3e9d9acf4a 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
@@ -19,6 +19,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_lock<std::shared_timed_mutex> ul;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
index 616375ae1460c..8145a405b5ec8 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
index ebaa3b69bbc1c..ee5b55d63d933 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
index 924ffed7d6144..573fbfde7218c 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
index afe2821e6c8b9..053b4dd423cbb 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
index 0894648014bbc..630544b440fe4 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
@@ -24,6 +24,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp
index ce385ddbd6615..010f02b40b9c9 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/member_swap.pass.cpp
@@ -18,6 +18,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     void lock_shared() {}
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp
index cec13f0f2f7e1..6e29856c47707 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/nonmember_swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     void lock_shared() {}
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp
index f2e5820cf0c70..744ae278daebf 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.mod/release.pass.cpp
@@ -18,6 +18,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     static int lock_count;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
index 6bbff79b7bcfb..3f792b5d26a66 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
@@ -19,6 +19,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
index b3b0165ac39c6..ec3d296beefc3 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
@@ -19,6 +19,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
index f401afad545be..361f256b5c602 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <shared_mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp
index 44d19e8dbb4dc..7f98579f57fbb 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/types.pass.cpp
@@ -23,6 +23,8 @@
 #include <shared_mutex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::shared_lock<std::mutex>::mutex_type,
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp
index 74b2651410233..7dc374c232388 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::unique_lock<std::mutex> ul;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp
index 1e663766b10ba..3fc00b5906ad2 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_assign.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp
index 8ea0a1f2d461c..f3440a98e306e 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/move_ctor.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp
index d957c6d24b026..e599ca59f5707 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_adopt_lock.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp
index af6853160f620..1224a8c77ec9b 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_defer_lock.pass.cpp
@@ -18,6 +18,8 @@
 #include <cassert>
 #include "nasty_containers.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp
index 8699dd554affb..110d2b91fcb14 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_duration.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp
index ab46dacad3c76..50d55427a95f2 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_time_point.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp
index 448be8e7e142b..3c068d015251c 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp
index 707755f59a8d6..8641bcbeab733 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/member_swap.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     void lock() {}
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp
index 1c05657d6babd..ecb262009b503 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/nonmember_swap.pass.cpp
@@ -18,6 +18,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     void lock() {}
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp
index 9751149b9bd91..eb26114bb1034 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.mod/release.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct mutex
 {
     static int lock_count;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp
index 899f965dfe18a..f9f3050b09604 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/mutex.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp
index 1affe8deb19dd..681fc4ba5d799 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/op_bool.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp
index 2c5496b294b34..c25e9a2176875 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.obs/owns_lock.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 int main(int, char**)
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp
index 7dc093ac6390a..85aa6f5d75efc 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <mutex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::unique_lock<std::mutex>::mutex_type,
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp
index 150d9b4aafc61..84c39baafa6aa 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/types.pass.cpp
@@ -21,6 +21,8 @@
 #include <mutex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::defer_lock_t T1;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp
index b5a608eeb0cec..e2fd416b6e43d 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <mutex>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_nothrow_default_constructible<std::mutex>::value, "");
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp
index 6999792d7961a..d955ccbe08895 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp
index 71221558331ac..c5abe77172477 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp
index e32c92f0c97b8..2e27d157f2662 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/default.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::recursive_mutex m;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp
index 12ce0e2ce6f50..70061936bfd54 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp
index 092343fe752a4..8a133c1114631 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp
index ecd29b7485a74..e6d35f40eecc0 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/default.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <shared_mutex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_mutex m;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp
index 9d6b558ffe127..ad2094b1f65ea 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp
index b9538b5649757..fb605f6db573c 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp
@@ -23,6 +23,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp
index 83b30b98dd91e..7b128e5196396 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/default.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <shared_mutex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_timed_mutex m;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp
index 6b2d9a543e57e..9932bd45b68a7 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp
index 865ab92545a6f..d17d91b9e72d0 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_until_deadlock_bug.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::shared_timed_mutex m;
 
 const int total_readers = 2;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp
index c879f192ee18a..8485d73c037f9 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/default.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::timed_mutex m;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp
index c775b3f5386ac..baa1939420e74 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp
index 7398b7f6d0b8d..769b22e4685fe 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp
index 0103cdf64195e..e6f5455940e92 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_for.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp
index 350bb767d48ea..1074904a45226 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock_until.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp
index ee6124c027b7e..028a98a84977e 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/default.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <mutex>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::recursive_timed_mutex m;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp
index ac478598596ca..3dabae3b5a95e 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp
@@ -21,6 +21,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp
index 05b22c0deb1ff..d5517b18a12c7 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_timed_mutex m;
 
 typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp
index 9e5ad5ee9a5da..8d58a6623e9dd 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_for.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp
index f6b9d106a1b76..fa8c9ae704f81 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock_until.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::recursive_timed_mutex m;
 
 typedef std::chrono::steady_clock Clock;
diff --git a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp
index 511aa3e804b05..4ff0274c17a45 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.once/thread.once.callonce/race.pass.cpp
@@ -23,6 +23,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 std::once_flag flg0;
 long global = 0;
 
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp
index 68f20d7532b5c..f2cbba6bda5a7 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.algorithm/swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp
index cbc32c8c2ce1d..c42c1d07b784d 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp
index 81c6d77e8a719..1d76eefc39344 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.assign/move2.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp
index 135d3ceba9997..2e3af5d14fb5c 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread t;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp
index 25703b2c3cabe..ac606085d6010 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/move.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp
index 320b4459b94c6..34e0e51b1200a 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.destr/dtor.pass.cpp
@@ -20,6 +20,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp
index fb4b7eb5ae054..fddc9538b2f8c 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/assign.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id0;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp
index f95617b4f53ca..7da109752b968 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/copy.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id0;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp
index 32a083ca8aae6..4d2bcfaee6e1a 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
index 3858508e1c9a1..48cb83b546d74 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/enabled_hashes.pass.cpp
@@ -18,6 +18,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp
index 5c557fddc7f91..e58f1d7d0d4a8 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/eq.pass.cpp
@@ -18,6 +18,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id0;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp
index 8af73045a792e..3ef7f362532ae 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/lt.pass.cpp
@@ -20,6 +20,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id0;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp
index a1541c12e2293..9f1f18dd66475 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.id/stream.pass.cpp
@@ -20,6 +20,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id0 = std::this_thread::get_id();
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp
index 006bc1e65c49d..676666c342623 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/get_id.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp
index 6f1308cbad097..a2ffe8bb19a26 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/joinable.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp
index f43805d7fe6f9..a1ab4781d95a2 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.member/swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class G
 {
     int alive_;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp
index 5493f27a4940e..65e99e015af95 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp
@@ -17,6 +17,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::thread::hardware_concurrency() > 0);
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp
index 1bf46cdb54f4b..4149f5a43a6ce 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/get_id.pass.cpp
@@ -15,6 +15,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::thread::id id = std::this_thread::get_id();
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp
index c73144db0c15d..16fb12e79e1d2 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp
@@ -18,6 +18,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::system_clock Clock;
diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp
index 6f772b5c2860c..bcb1a2f0f871a 100644
--- a/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp
+++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/yield.pass.cpp
@@ -15,6 +15,8 @@
 #include <thread>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::this_thread::yield();
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
index 961eda3ac8f06..957ceffea306c 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp
@@ -20,6 +20,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_copy.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_copy.pass.cpp
index d3734cae10e9a..5328f6b4f8a0b 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_copy.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_copy.pass.cpp
@@ -20,6 +20,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_move.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_move.pass.cpp
index 427e299578802..26dfb257cb2ae 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_move.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/converting_move.pass.cpp
@@ -20,6 +20,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/copy.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/copy.pass.cpp
index 69b767b909e2a..48843ba1b999b 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/copy.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/copy.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/default.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/default.pass.cpp
index fdf21cec3e08c..c3fc9730868c8 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/default.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/default.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.pass.cpp
index 05a1649e4a6dd..9bebc4deaed8d 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.pass.cpp
index db9338d31b861..f4aa62fa4bb73 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct.pass.cpp
index 97ae33d9157ae..5201c5c6cab7e 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct.pass.cpp
@@ -19,6 +19,7 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 struct B
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair.pass.cpp
index 7aa45f05add9d..9748d66fb452c 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair.pass.cpp
@@ -25,6 +25,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 
 void test_no_inner_alloc()
 {
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_const_lvalue_pair.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_const_lvalue_pair.pass.cpp
index d1a03bc05fce0..103f8f4132768 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_const_lvalue_pair.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_const_lvalue_pair.pass.cpp
@@ -25,6 +25,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 
 void test_no_inner_alloc()
 {
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_piecewise.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_piecewise.pass.cpp
index 14f413bd86715..d8f5e7c787946 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_piecewise.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_piecewise.pass.cpp
@@ -26,6 +26,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 
 void test_no_inner_alloc()
 {
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_rvalue.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_rvalue.pass.cpp
index c26b4652cc490..5632e2112d9e7 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_rvalue.pass.cpp
@@ -25,6 +25,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 
 void test_no_inner_alloc()
 {
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_values.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_values.pass.cpp
index 7a4b14956f034..9ba2d155d2a4f 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_values.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_pair_values.pass.cpp
@@ -25,6 +25,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 
 void test_no_inner_alloc()
 {
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_type.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_type.pass.cpp
index e93f37f412e5b..2dee95d34dab9 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_type.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/construct_type.pass.cpp
@@ -25,6 +25,8 @@
 #include "uses_alloc_types.hpp"
 #include "controlled_allocators.hpp"
 
+#include "test_macros.h"
+
 // - If uses_allocator_v<T, inner_allocator_type> is false and
 //   is_constructible_v<T, Args...> is true, calls
 //   OUTERMOST_ALLOC_TRAITS(*this)::construct(
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/deallocate.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/deallocate.pass.cpp
index 425f00ad4f43d..ce37c82e60ad2 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/deallocate.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/deallocate.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/destroy.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/destroy.pass.cpp
index 50c9d24f07d3b..1d8ca370b19ae 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/destroy.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/destroy.pass.cpp
@@ -19,6 +19,7 @@
 #include <cassert>
 #include <string>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 struct B
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/inner_allocator.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/inner_allocator.pass.cpp
index 7e73939f8ee05..cbe8a7ec6fb6a 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/inner_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/inner_allocator.pass.cpp
@@ -19,6 +19,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/max_size.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/max_size.pass.cpp
index de9cf4d0cad75..5882213f2416b 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/max_size.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/max_size.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/outer_allocator.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/outer_allocator.pass.cpp
index 9a90d17c0c6bf..7c812c3ad6639 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/outer_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/outer_allocator.pass.cpp
@@ -19,6 +19,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/select_on_container_copy_construction.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/select_on_container_copy_construction.pass.cpp
index 8253fee5518d3..9d79ac2f8398a 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/select_on_container_copy_construction.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/select_on_container_copy_construction.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/allocator_pointers.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/allocator_pointers.pass.cpp
index e17a6e9c5cb19..c3d9175cfd90f 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/allocator_pointers.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/allocator_pointers.pass.cpp
@@ -13,6 +13,8 @@
 #include <cassert>
 
 // #include <memory>
+
+#include "test_macros.h"
 //
 // template <class Alloc>
 // struct allocator_traits
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/inner_allocator_type.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/inner_allocator_type.pass.cpp
index 2aa7a98911477..b04bdf528451c 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/inner_allocator_type.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/inner_allocator_type.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/is_always_equal.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/is_always_equal.pass.cpp
index 628505f65cdc4..55f8482b1a36b 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/is_always_equal.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/is_always_equal.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_copy_assignment.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_copy_assignment.pass.cpp
index fea53af299e7c..c22d6ec0f5f3d 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_copy_assignment.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_copy_assignment.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_move_assignment.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_move_assignment.pass.cpp
index d04ea6f1a4399..bf363e3afa72e 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_move_assignment.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_move_assignment.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_swap.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_swap.pass.cpp
index fcec67879fcac..70c40d5b3481b 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_swap.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.types/propagate_on_container_swap.pass.cpp
@@ -18,6 +18,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/copy_assign.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/copy_assign.pass.cpp
index 1f871876f1359..ca22e1265953e 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/copy_assign.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/copy_assign.pass.cpp
@@ -19,6 +19,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/eq.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/eq.pass.cpp
index aaf5c1d59add5..c26c7a071a11c 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/eq.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/eq.pass.cpp
@@ -26,6 +26,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/move_assign.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/move_assign.pass.cpp
index c17c6d3843997..9fc1c60a2560f 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/move_assign.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/scoped.adaptor.operators/move_assign.pass.cpp
@@ -19,6 +19,7 @@
 #include <scoped_allocator>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/allocator.adaptor/types.pass.cpp b/libcxx/test/std/utilities/allocator.adaptor/types.pass.cpp
index 7820e29eef27f..a28eface78911 100644
--- a/libcxx/test/std/utilities/allocator.adaptor/types.pass.cpp
+++ b/libcxx/test/std/utilities/allocator.adaptor/types.pass.cpp
@@ -27,6 +27,7 @@
 #include <scoped_allocator>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "allocators.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
index a8ac0d6012ecb..fd4dee2f2f333 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
@@ -17,6 +17,7 @@
 #include <any>
 #include <cassert>
 
+#include "test_macros.h"
 #include "any_helpers.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
index f49511138dd58..9c85d3f6cf795 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
@@ -19,6 +19,7 @@
 #include <any>
 #include <cassert>
 
+#include "test_macros.h"
 #include "any_helpers.h"
 
 using std::any;
diff --git a/libcxx/test/std/utilities/any/any.class/any.observers/has_value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.observers/has_value.pass.cpp
index 54b4153c9749e..3de84270c7979 100644
--- a/libcxx/test/std/utilities/any/any.class/any.observers/has_value.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.observers/has_value.pass.cpp
@@ -15,6 +15,7 @@
 #include <any>
 #include <cassert>
 
+#include "test_macros.h"
 #include "any_helpers.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp b/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp
index 3a275d51e4ac7..1097fc5aec498 100644
--- a/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/not_literal_type.pass.cpp
@@ -15,6 +15,8 @@
 #include <any>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**) {
     static_assert(!std::is_literal_type<std::any>::value, "");
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
index 5461a4d0cdf14..1e982235f4a23 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
@@ -19,6 +19,8 @@
 #include <any>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::any;
 using std::any_cast;
 
diff --git a/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.pass.cpp
index 541db06a175fd..c74935d279418 100644
--- a/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.pass.cpp
+++ b/libcxx/test/std/utilities/charconv/charconv.from.chars/integral.pass.cpp
@@ -24,6 +24,7 @@
 // from_chars_result from_chars(const char* first, const char* last,
 //                              Integral& value, int base = 10)
 
+#include "test_macros.h"
 #include "charconv_test_helpers.h"
 
 template <typename T>
diff --git a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp
index f6d81dc3f3555..4bfe27d6dac7c 100644
--- a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp
+++ b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp
@@ -24,6 +24,7 @@
 // to_chars_result to_chars(char* first, char* last, Integral value,
 //                          int base = 10)
 
+#include "test_macros.h"
 #include "charconv_test_helpers.h"
 
 template <typename T>
diff --git a/libcxx/test/std/utilities/function.objects/arithmetic.operations/transparent.pass.cpp b/libcxx/test/std/utilities/function.objects/arithmetic.operations/transparent.pass.cpp
index 154a0f8877a5f..ec5dd27e80b34 100644
--- a/libcxx/test/std/utilities/function.objects/arithmetic.operations/transparent.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/arithmetic.operations/transparent.pass.cpp
@@ -10,6 +10,8 @@
 #include <functional>
 #include <string>
 
+#include "test_macros.h"
+
 template <class T>
 struct is_transparent
 {
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/PR23141_invoke_not_constexpr.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/PR23141_invoke_not_constexpr.pass.cpp
index 931778f54d91c..b1bc35889dfba 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/PR23141_invoke_not_constexpr.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/PR23141_invoke_not_constexpr.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct Fun
 {
   template<typename T, typename U>
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp
index 8beeb3321d517..dfde0759341cc 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp
@@ -21,6 +21,8 @@
 #include <cmath>
 #include <cassert>
 
+#include "test_macros.h"
+
 float _pow(float a, float b)
 {
     return std::pow(a, b);
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_function_object.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_function_object.pass.cpp
index b87918da11047..d7174df33691a 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_function_object.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_function_object.pass.cpp
@@ -19,6 +19,8 @@
 
 #include <functional>
 
+#include "test_macros.h"
+
 struct DummyUnaryFunction
 {
     template <typename S>
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_int_0.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_int_0.pass.cpp
index a77e1895d6f74..0f04df907e67a 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_int_0.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_int_0.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class R, class F>
 void
 test(F f, R expected)
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_lvalue.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_lvalue.pass.cpp
index 9b81d3301325c..5da40be9a6f0e 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_lvalue.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_lvalue.pass.cpp
@@ -20,6 +20,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int count = 0;
 
 // 1 arg, return void
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_rvalue.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_rvalue.pass.cpp
index 10d2ce017f8fa..d9de4ef6578bd 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_rvalue.pass.cpp
@@ -20,6 +20,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int count = 0;
 
 // 1 arg, return void
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_void_0.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_void_0.pass.cpp
index 2c8e56f034e8c..20959a5b8aca9 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_void_0.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/invoke_void_0.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int count = 0;
 
 template <class F>
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/nested.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/nested.pass.cpp
index 0d5be34135393..26196d4501455 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/nested.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/nested.pass.cpp
@@ -21,6 +21,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct power
 {
   template <typename T>
diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.isbind/is_bind_expression_03.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.isbind/is_bind_expression_03.pass.cpp
index c1af159b29366..3fe1be7946032 100644
--- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.isbind/is_bind_expression_03.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.isbind/is_bind_expression_03.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <functional>
 
+#include "test_macros.h"
+
 template <class T>
 void test() {
     static_assert(!std::is_bind_expression<T>::value, "");
diff --git a/libcxx/test/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp b/libcxx/test/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp
index 8bfa48f3e70ca..c7c1cf12cdd8b 100644
--- a/libcxx/test/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bitwise.operations/bit_not.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::bit_not<int> F;
diff --git a/libcxx/test/std/utilities/function.objects/bitwise.operations/transparent.pass.cpp b/libcxx/test/std/utilities/function.objects/bitwise.operations/transparent.pass.cpp
index 5ad0f233f19e9..b2353473b5327 100644
--- a/libcxx/test/std/utilities/function.objects/bitwise.operations/transparent.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/bitwise.operations/transparent.pass.cpp
@@ -10,6 +10,8 @@
 #include <functional>
 #include <string>
 
+#include "test_macros.h"
+
 template <class T>
 struct is_transparent
 {
diff --git a/libcxx/test/std/utilities/function.objects/comparisons/transparent.pass.cpp b/libcxx/test/std/utilities/function.objects/comparisons/transparent.pass.cpp
index 4be81925b7f70..cf8fb32bb76e0 100644
--- a/libcxx/test/std/utilities/function.objects/comparisons/transparent.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/comparisons/transparent.pass.cpp
@@ -10,6 +10,8 @@
 #include <functional>
 #include <string>
 
+#include "test_macros.h"
+
 template <class T>
 struct is_transparent
 {
diff --git a/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp b/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp
index 0f25db0399fba..6a385c74c989a 100644
--- a/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.invoke/invoke.pass.cpp
@@ -43,6 +43,8 @@
 #include <utility> // for std::move
 #include <cassert>
 
+#include "test_macros.h"
+
 struct NonCopyable {
     NonCopyable() {}
 private:
diff --git a/libcxx/test/std/utilities/function.objects/func.memfn/member_data.pass.cpp b/libcxx/test/std/utilities/function.objects/func.memfn/member_data.pass.cpp
index 52581881a5b2c..0c340072da5a4 100644
--- a/libcxx/test/std/utilities/function.objects/func.memfn/member_data.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.memfn/member_data.pass.cpp
@@ -13,6 +13,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     double data_;
diff --git a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const.pass.cpp b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const.pass.cpp
index dc93196cdc396..006a5bec2d392 100644
--- a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const.pass.cpp
@@ -14,6 +14,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char test0() const {return 'a';}
diff --git a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const_volatile.pass.cpp b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const_volatile.pass.cpp
index 594e1de8ad631..175f2de700556 100644
--- a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const_volatile.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_const_volatile.pass.cpp
@@ -14,6 +14,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char test0() const volatile {return 'a';}
diff --git a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_volatile.pass.cpp b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_volatile.pass.cpp
index 04439387dfec9..7f27c2f1d99ed 100644
--- a/libcxx/test/std/utilities/function.objects/func.memfn/member_function_volatile.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.memfn/member_function_volatile.pass.cpp
@@ -14,6 +14,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     char test0() volatile {return 'a';}
diff --git a/libcxx/test/std/utilities/function.objects/func.require/binary_function.pass.cpp b/libcxx/test/std/utilities/function.objects/func.require/binary_function.pass.cpp
index 79a4855ea02bd..c1d5ba8899c82 100644
--- a/libcxx/test/std/utilities/function.objects/func.require/binary_function.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.require/binary_function.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::binary_function<int, short, bool> bf;
diff --git a/libcxx/test/std/utilities/function.objects/func.require/unary_function.pass.cpp b/libcxx/test/std/utilities/function.objects/func.require/unary_function.pass.cpp
index f68b4b37c7332..ebce76bc35d7f 100644
--- a/libcxx/test/std/utilities/function.objects/func.require/unary_function.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.require/unary_function.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::unary_function<int, bool> uf;
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/default.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/default.pass.cpp
index 55cde8fe77764..e48a5f4ea0949 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/default.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/default.pass.cpp
@@ -36,6 +36,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp
index 106b0d37f61c9..e9d93fc07fdd3 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp
@@ -36,6 +36,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp
index be4db4e5a5418..32f82df85fc00 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp
@@ -36,6 +36,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/pred.pass.cpp
index 3656caa874988..70e182eb4dd00 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/pred.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/pred.pass.cpp
@@ -36,6 +36,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/default.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/default.pass.cpp
index 0b345deedc1c3..14c295f4036ca 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/default.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/default.pass.cpp
@@ -36,6 +36,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp
index 4106c5d6ae7fb..d364f3e00b8ab 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp
@@ -35,6 +35,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp
index 757bcc75792c8..ac9bdae6e12e3 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename T> struct MyHash {
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/pred.pass.cpp
index 3a20b88512fae..4494f7644cb76 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/pred.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pass.cpp
index eaf5eeb8273fc..6757b058748b5 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pass.cpp
@@ -35,6 +35,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 template <typename Iter1, typename Iter2>
diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pred.pass.cpp
index 773336f662664..043903d66da21 100644
--- a/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pred.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.default/default.pred.pass.cpp
@@ -35,6 +35,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "test_iterators.h"
 
 struct count_equal
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/bad_function_call.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/bad_function_call.pass.cpp
index eb223b88e4908..254ad9e2bcf6a 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/bad_function_call.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/bad_function_call.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_function_call>::value), "");
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/func.wrap.badcall.const/bad_function_call_ctor.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/func.wrap.badcall.const/bad_function_call_ctor.pass.cpp
index 385919227e056..e72ca95bae82b 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/func.wrap.badcall.const/bad_function_call_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.badcall/func.wrap.badcall.const/bad_function_call_ctor.pass.cpp
@@ -13,6 +13,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::bad_function_call ex;
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.cap/operator_bool.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.cap/operator_bool.pass.cpp
index ab5eef37270cf..ba19137c5af9e 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.cap/operator_bool.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.cap/operator_bool.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int g(int) {return 0;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_incomplete.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_incomplete.pass.cpp
index 21c2f216efe8b..5f0fb2faf5691 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_incomplete.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/F_incomplete.pass.cpp
@@ -17,6 +17,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct X{
     typedef std::function<void(X&)> callback_type;
     virtual ~X() {}
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc.pass.cpp
index 3b37ce6ffdc33..3ae14fdea342d 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_function.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_function.pass.cpp
index 583ca162ebf3b..3f3b5c696d75f 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_function.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_function.pass.cpp
@@ -17,6 +17,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 #include "test_allocator.h"
 #include "count_new.hpp"
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_nullptr.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_nullptr.pass.cpp
index 653057f6ec9e0..12de30e4a3424 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_nullptr.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_nullptr.pass.cpp
@@ -18,6 +18,7 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
 #include "min_allocator.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/default.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/default.pass.cpp
index e89bafba775a2..4b7e5f79dcbc4 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/default.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::function<int(int)> f;
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t.pass.cpp
index d58e191c4709a..cfd842345286a 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::function<int(int)> f(nullptr);
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp
index ff81080ffdaf1..00ab664f7f02b 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/nullptr_t_assign.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 class A
 {
     int data_[10];
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.inv/invoke.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.inv/invoke.pass.cpp
index fb67a3abf4ada..60c540fa81ef0 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.inv/invoke.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.inv/invoke.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 int count = 0;
 
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp
index 93997a0b55f8f..8c8f3f53db6d2 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.mod/swap.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 class A {
   int data_[10];
 
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.nullptr/operator_==.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.nullptr/operator_==.pass.cpp
index 698a461c8f031..6730536f76b5a 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.nullptr/operator_==.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.nullptr/operator_==.pass.cpp
@@ -25,6 +25,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int g(int) {return 0;}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
index d5031ba06e77c..317fbdb0bdaf1 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target.pass.cpp
@@ -24,6 +24,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int data_[10];
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp
index d9c8fc48d758a..1ff1854e4d67f 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.targ/target_type.pass.cpp
@@ -16,6 +16,8 @@
 #include <typeinfo>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int data_[10];
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/types.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/types.pass.cpp
index 8083ad83ac0cf..1399415a52d14 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/types.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/types.pass.cpp
@@ -24,6 +24,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 template <typename T>
 class has_argument_type
diff --git a/libcxx/test/std/utilities/function.objects/logical.operations/transparent.pass.cpp b/libcxx/test/std/utilities/function.objects/logical.operations/transparent.pass.cpp
index eb590b31a014e..42d12fc12bacc 100644
--- a/libcxx/test/std/utilities/function.objects/logical.operations/transparent.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/logical.operations/transparent.pass.cpp
@@ -10,6 +10,8 @@
 #include <functional>
 #include <string>
 
+#include "test_macros.h"
+
 template <class T>
 struct is_transparent
 {
diff --git a/libcxx/test/std/utilities/function.objects/negators/binary_negate.pass.cpp b/libcxx/test/std/utilities/function.objects/negators/binary_negate.pass.cpp
index 91c19ece612c7..cc8e4219f8f83 100644
--- a/libcxx/test/std/utilities/function.objects/negators/binary_negate.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/negators/binary_negate.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::binary_negate<std::logical_and<int> > F;
diff --git a/libcxx/test/std/utilities/function.objects/negators/not1.pass.cpp b/libcxx/test/std/utilities/function.objects/negators/not1.pass.cpp
index ccd6ab0673656..2e436a69446af 100644
--- a/libcxx/test/std/utilities/function.objects/negators/not1.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/negators/not1.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::logical_not<int> F;
diff --git a/libcxx/test/std/utilities/function.objects/negators/not2.pass.cpp b/libcxx/test/std/utilities/function.objects/negators/not2.pass.cpp
index 57d41a0d0f4fc..e4b69068c5ffc 100644
--- a/libcxx/test/std/utilities/function.objects/negators/not2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/negators/not2.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::logical_and<int> F;
diff --git a/libcxx/test/std/utilities/function.objects/negators/unary_negate.pass.cpp b/libcxx/test/std/utilities/function.objects/negators/unary_negate.pass.cpp
index 4875c9ba2b09f..da75720afb01a 100644
--- a/libcxx/test/std/utilities/function.objects/negators/unary_negate.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/negators/unary_negate.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::unary_negate<std::logical_not<int> > F;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.access/conversion.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.access/conversion.pass.cpp
index c15989f003e70..3a7121cbcbcf3 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.access/conversion.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.access/conversion.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 class functor1
 {
 };
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.assign/copy_assign.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.assign/copy_assign.pass.cpp
index 3ef0cdda8696f..f82ee72de9152 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.assign/copy_assign.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.assign/copy_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 class functor1
 {
 };
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/copy_ctor.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/copy_ctor.pass.cpp
index 355047d98ceba..a20cf8f3bd3b0 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/copy_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/copy_ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 class functor1
 {
 };
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/ctor.incomplete.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/ctor.incomplete.pass.cpp
index d26e4cb90110e..c0e50022cb4b2 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/ctor.incomplete.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/ctor.incomplete.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct Foo;
 
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.pass.cpp
index a43d0fd25109f..4d536a20411dc 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 class functor1
 {
 };
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref.incomplete.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref.incomplete.pass.cpp
index 4aad297adb497..b72a0bbc2701d 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref.incomplete.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref.incomplete.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct Foo;
 
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
index d4cb421f53eb4..2286d949e657f 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int i = 0;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
index 093b7e230fd49..b4707f4e4bc30 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const int i = 0;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref.incomplete.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref.incomplete.pass.cpp
index d5c26b274a4e7..a4f8b5f087aef 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref.incomplete.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref.incomplete.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct Foo;
 
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
index f64d8ad8caeba..41614d3b2e2a1 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int i = 0;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
index 4fdaf992c64d0..8e5e7c30e1e79 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "counting_predicates.hpp"
 
+#include "test_macros.h"
+
 bool is5 ( int i ) { return i == 5; }
 
 template <typename T>
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.pass.cpp
index fd31041330dd3..e5f3b13c8a1ef 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 int count = 0;
 
 // 1 arg, return void
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_int_0.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_int_0.pass.cpp
index 67cf51f33596f..57748371a0038 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_int_0.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_int_0.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 // 0 args, return int
 
 int count = 0;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_void_0.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_void_0.pass.cpp
index 18e655313336d..cfaf75b157d50 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_void_0.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.invoke/invoke_void_0.pass.cpp
@@ -18,6 +18,8 @@
 #include <functional>
 #include <cassert>
 
+#include "test_macros.h"
+
 // 0 args, return void
 
 int count = 0;
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/type.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/type.pass.cpp
index d17ab8100ef0d..31e1b693981dd 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/type.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/type.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 class C {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/unwrap_ref_decay.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/unwrap_ref_decay.pass.cpp
index 1987898393cd1..d41baa7833c33 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/unwrap_ref_decay.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/unwrap_ref_decay.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 template <typename T, typename Result>
 void check() {
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/unwrap_reference.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/unwrap_reference.pass.cpp
index 209d5e2a0bc93..c9967d3a30af0 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/unwrap_reference.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/unwrap_reference.pass.cpp
@@ -19,6 +19,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 template <typename T, typename Expected>
 void check_equal() {
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp
index 5a6a41fdbe0eb..a3577ff064275 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/weak_result.pass.cpp
@@ -15,6 +15,8 @@
 #include <functional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class Arg, class Result>
 struct my_unary_function
 { // std::unary_function was removed in C++17
diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/enabled_hashes.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/enabled_hashes.pass.cpp
index 8f6c3e14e2cf9..c871154676476 100644
--- a/libcxx/test/std/utilities/function.objects/unord.hash/enabled_hashes.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/enabled_hashes.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
 
diff --git a/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp b/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp
index 90b09132d9e4c..978032dce655e 100644
--- a/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp
+++ b/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename AtContainer, typename T, T... I>
 auto extract ( const AtContainer &t, const std::integer_sequence<T, I...> )
 -> decltype ( std::make_tuple ( std::get<I>(t)... ))
diff --git a/libcxx/test/std/utilities/intseq/intseq.intseq/integer_seq.pass.cpp b/libcxx/test/std/utilities/intseq/intseq.intseq/integer_seq.pass.cpp
index a8e14c9a9cdd8..0b23ddbcfd36e 100644
--- a/libcxx/test/std/utilities/intseq/intseq.intseq/integer_seq.pass.cpp
+++ b/libcxx/test/std/utilities/intseq/intseq.intseq/integer_seq.pass.cpp
@@ -22,6 +22,8 @@
 #include <cstddef>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
 //  Make a few of sequences
diff --git a/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq.pass.cpp b/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq.pass.cpp
index 50b49dd725e40..b0d1a1e557362 100644
--- a/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq.pass.cpp
+++ b/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_same<std::make_integer_sequence<int, 0>, std::integer_sequence<int>>::value, "");
diff --git a/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq_fallback.pass.cpp b/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq_fallback.pass.cpp
index e6b5a58c9228d..c0097a943c197 100644
--- a/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq_fallback.pass.cpp
+++ b/libcxx/test/std/utilities/intseq/intseq.make/make_integer_seq_fallback.pass.cpp
@@ -15,3 +15,5 @@
 
 #define _LIBCPP_TESTING_FALLBACK_MAKE_INTEGER_SEQUENCE
 #include "make_integer_seq.pass.cpp"
+
+#include "test_macros.h"
diff --git a/libcxx/test/std/utilities/memory/allocator.tag/allocator_arg.pass.cpp b/libcxx/test/std/utilities/memory/allocator.tag/allocator_arg.pass.cpp
index 1a58726a85462..0253243319af4 100644
--- a/libcxx/test/std/utilities/memory/allocator.tag/allocator_arg.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.tag/allocator_arg.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <memory>
 
+#include "test_macros.h"
+
 void test(std::allocator_arg_t) {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.pass.cpp
index 0ac2f266e8155..9f923074d26bf 100644
--- a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.pass.cpp
@@ -19,6 +19,7 @@
 #include <cstdint>
 #include <cassert>
 
+#include "test_macros.h"
 #include "incomplete_type_helper.h"
 
 template <class T>
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/deallocate.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/deallocate.pass.cpp
index c738416e01dce..cc2abda6da0a5 100644
--- a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/deallocate.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/deallocate.pass.cpp
@@ -19,6 +19,7 @@
 #include <cstdint>
 #include <cassert>
 
+#include "test_macros.h"
 #include "incomplete_type_helper.h"
 
 int called = 0;
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.types/is_always_equal.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.types/is_always_equal.pass.cpp
index 42b0fbab19ccd..add8c0b118610 100644
--- a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.types/is_always_equal.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.types/is_always_equal.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 struct A
 {
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator_type.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator_type.pass.cpp
index 840ad820e1fe1..b3b6d7275e205 100644
--- a/libcxx/test/std/utilities/memory/allocator.traits/allocator_type.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 struct A
 {
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/value_type.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/value_type.pass.cpp
index 047d40d5c89a1..b7cc2cd250353 100644
--- a/libcxx/test/std/utilities/memory/allocator.traits/value_type.pass.cpp
+++ b/libcxx/test/std/utilities/memory/allocator.traits/value_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 struct A
 {
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.globals/eq.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.globals/eq.pass.cpp
index 63412bce1f4b5..30f847a4cad52 100644
--- a/libcxx/test/std/utilities/memory/default.allocator/allocator.globals/eq.pass.cpp
+++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.globals/eq.pass.cpp
@@ -21,6 +21,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::allocator<int> a1;
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/address.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/address.pass.cpp
index c4ff55d0eab29..a1333619473da 100644
--- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/address.pass.cpp
+++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/address.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 void test_address()
 {
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/max_size.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/max_size.pass.cpp
index c2094bc044a5e..2991bdfc31ec6 100644
--- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/max_size.pass.cpp
+++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/max_size.pass.cpp
@@ -16,6 +16,8 @@
 #include <cstddef>
 #include <cassert>
 
+#include "test_macros.h"
+
 int new_called = 0;
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp
index ad4319410a792..416d16c87bcdc 100644
--- a/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp
+++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_pointers.pass.cpp
@@ -12,6 +12,8 @@
 #include <cassert>
 
 // #include <memory>
+
+#include "test_macros.h"
 //
 // template <class Alloc>
 // struct allocator_traits
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator_void.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator_void.pass.cpp
index 528902d2112d9..7c091d3d97c61 100644
--- a/libcxx/test/std/utilities/memory/default.allocator/allocator_void.pass.cpp
+++ b/libcxx/test/std/utilities/memory/default.allocator/allocator_void.pass.cpp
@@ -22,6 +22,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::allocator<void>::pointer, void*>::value), "");
diff --git a/libcxx/test/std/utilities/memory/pointer.traits/difference_type.pass.cpp b/libcxx/test/std/utilities/memory/pointer.traits/difference_type.pass.cpp
index 3eaedab16700b..7546df9028820 100644
--- a/libcxx/test/std/utilities/memory/pointer.traits/difference_type.pass.cpp
+++ b/libcxx/test/std/utilities/memory/pointer.traits/difference_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::pointer_traits<double*>::difference_type, std::ptrdiff_t>::value), "");
diff --git a/libcxx/test/std/utilities/memory/pointer.traits/element_type.pass.cpp b/libcxx/test/std/utilities/memory/pointer.traits/element_type.pass.cpp
index 505881ddeeaf0..e4f11c28e29be 100644
--- a/libcxx/test/std/utilities/memory/pointer.traits/element_type.pass.cpp
+++ b/libcxx/test/std/utilities/memory/pointer.traits/element_type.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::pointer_traits<const short*>::element_type, const short>::value), "");
diff --git a/libcxx/test/std/utilities/memory/pointer.traits/pointer.pass.cpp b/libcxx/test/std/utilities/memory/pointer.traits/pointer.pass.cpp
index 110a993dc6605..14054b3c4f65b 100644
--- a/libcxx/test/std/utilities/memory/pointer.traits/pointer.pass.cpp
+++ b/libcxx/test/std/utilities/memory/pointer.traits/pointer.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A
 {
     typedef short element_type;
diff --git a/libcxx/test/std/utilities/memory/pointer.traits/pointer.traits.functions/pointer_to.pass.cpp b/libcxx/test/std/utilities/memory/pointer.traits/pointer.traits.functions/pointer_to.pass.cpp
index 9e6a48930e28d..bd561ddb44711 100644
--- a/libcxx/test/std/utilities/memory/pointer.traits/pointer.traits.functions/pointer_to.pass.cpp
+++ b/libcxx/test/std/utilities/memory/pointer.traits/pointer.traits.functions/pointer_to.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class T>
 struct A
 {
diff --git a/libcxx/test/std/utilities/memory/ptr.align/align.pass.cpp b/libcxx/test/std/utilities/memory/ptr.align/align.pass.cpp
index 3d0216cce5333..d227238f000bd 100644
--- a/libcxx/test/std/utilities/memory/ptr.align/align.pass.cpp
+++ b/libcxx/test/std/utilities/memory/ptr.align/align.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const unsigned N = 20;
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/addressof.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/addressof.pass.cpp
index f6310c7bc9116..dde8a9e2be68f 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/addressof.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/addressof.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     void operator&() const {}
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/constexpr_addressof.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/constexpr_addressof.pass.cpp
index f14a0e7fc898c..2aabf812a75f6 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/constexpr_addressof.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.addressof/constexpr_addressof.pass.cpp
@@ -16,6 +16,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Pointer {
   constexpr Pointer(void* v) : value(v) {}
   void* value;
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.destroy/destroy_at.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.destroy/destroy_at.pass.cpp
index d505222b02d7d..e7b1dbdb6530f 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.destroy/destroy_at.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/specialized.destroy/destroy_at.pass.cpp
@@ -17,6 +17,8 @@
 #include <cstdlib>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Counted {
   static int count;
   static void reset() { count = 0; }
diff --git a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp
index db71c69bf26cd..09cfc74ec190e 100644
--- a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp
+++ b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp
@@ -21,6 +21,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct alignas(32) A {
     int field;
 };
diff --git a/libcxx/test/std/utilities/memory/temporary.buffer/temporary_buffer.pass.cpp b/libcxx/test/std/utilities/memory/temporary.buffer/temporary_buffer.pass.cpp
index 32a58e5a657be..af798186f2499 100644
--- a/libcxx/test/std/utilities/memory/temporary.buffer/temporary_buffer.pass.cpp
+++ b/libcxx/test/std/utilities/memory/temporary.buffer/temporary_buffer.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::pair<int*, std::ptrdiff_t> ip = std::get_temporary_buffer<int>(5);
diff --git a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp
index b9b158a9d2b74..9506ed2e34b22 100644
--- a/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp
+++ b/libcxx/test/std/utilities/memory/unique.ptr/unique.ptr.special/io.pass.cpp
@@ -21,6 +21,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::unique_ptr<int> p(new int(3));
diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp
index a2b6cf22ed8fe..892479fbeafef 100644
--- a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_no_pointers.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <memory>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     char* p = new char[10];
diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp
index c923089fa978d..2d880ef5c0684 100644
--- a/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.dynamic.safety/declare_reachable.pass.cpp
@@ -14,6 +14,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     int* p = new int;
diff --git a/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp b/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp
index 2fea98364b479..709ab4b4994f8 100644
--- a/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.dynamic.safety/get_pointer_safety.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 void test_pr26961() {
   std::pointer_safety d;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/enabled_hash.pass.cpp
index 440fa8ac480d9..677354e19ed91 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
index 34717ad4256ac..0fcf27fb9295a 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_shared_ptr.pass.cpp
@@ -22,6 +22,8 @@
 #if TEST_STD_VER >= 11
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 struct A {};
 #endif
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp
index 209e3fe71af4d..338f37de37304 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.getdeleter/get_deleter.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/auto_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/auto_ptr_Y.pass.cpp
index 896e50827dcfa..d71e702eae6f0 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/auto_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/auto_ptr_Y.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr.pass.cpp
index b67407c325aa9..75f58e4411fab 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y.pass.cpp
index 2b666315ef2f8..6fd4c8e074232 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y_rv.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y_rv.pass.cpp
index 6787c33c297f1..e73357a716e54 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y_rv.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_Y_rv.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_rv.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_rv.pass.cpp
index e921a09918985..dbc8ab5c490aa 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_rv.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/shared_ptr_rv.pass.cpp
@@ -18,6 +18,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp
index 4abe371bc0747..349fbec746f1c 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.assign/unique_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/const_pointer_cast.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/const_pointer_cast.pass.cpp
index 51e2949b6c609..e2bb126fd4709 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/const_pointer_cast.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/const_pointer_cast.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp
index 76009b96abde8..d6d5da1197aca 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/dynamic_pointer_cast.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/static_pointer_cast.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/static_pointer_cast.pass.cpp
index 9ea544fee0123..1fe674fa530eb 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/static_pointer_cast.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cast/static_pointer_cast.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/cmp_nullptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/cmp_nullptr.pass.cpp
index 98b5bbf0d67d9..bea3b4e94e27f 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/cmp_nullptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/cmp_nullptr.pass.cpp
@@ -38,6 +38,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 void do_nothing(int*) {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/eq.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/eq.pass.cpp
index e25ba611744aa..2d1cee8ef895d 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/eq.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/eq.pass.cpp
@@ -16,6 +16,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 void do_nothing(int*) {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/lt.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/lt.pass.cpp
index fdef32d518ac4..478cb6c32e5c7 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/lt.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.cmp/lt.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 void do_nothing(int*) {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/default.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/default.pass.cpp
index 247ca0fa924f7..17b346d3a13f2 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/default.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/default.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_ptr<int> p;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t.pass.cpp
index f29dd1cf7ea81..c9a46dc48f531 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_ptr<int> p(nullptr);
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp
index ee5861122bfc0..666741813ab8a 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp
index 0881e8cf42d24..23fff8c899547 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp
index 4700df0085c3e..51eccc59b3699 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer.pass.cpp
index b55d764a6c413..6eceb552ec14c 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp
index fd98193826e1f..bb41fa89be372 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp
index 0ec18a7aebd52..e17ae6f34312d 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp
index 5f2984c7e22d2..e39bda017a64e 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp
@@ -13,6 +13,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp
index da12e42ca0b2c..9fa32714bc1d7 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_throw.pass.cpp
@@ -21,6 +21,7 @@
 #include <cstdlib>
 
 #include "count_new.hpp"
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp
index 15e776003d9dd..b7b5d145093d5 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_throw.pass.cpp
@@ -21,6 +21,8 @@
 
 #include "count_new.hpp"
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr.pass.cpp
index 091782f02f3aa..c9bd0f0331158 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_Y.pass.cpp
index 01a74898f9cb3..6f770ef0dd2cb 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_pointer.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_pointer.pass.cpp
index 83a9a9720dc41..3a600f8e30bbb 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/shared_ptr_pointer.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp
index 06c2bba6705a8..40d4639d88102 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp
@@ -19,6 +19,7 @@
 #include <new>
 #include <cstdlib>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_cxx03.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_cxx03.pass.cpp
index 00f79cc19d2c5..13c929dd4f724 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_cxx03.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_cxx03.pass.cpp
@@ -18,6 +18,7 @@
 #include <new>
 #include <cstdlib>
 #include <cassert>
+#include "test_macros.h"
 #include "test_allocator.h"
 #include "min_allocator.h"
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.volatile.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.volatile.pass.cpp
index aa038f7475f83..eb7f75aed5115 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.volatile.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared.volatile.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <typename T>
 void test(const T &t0)
 {
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp
index b09550a6eab27..8903b75ae4e25 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.io/io.pass.cpp
@@ -18,6 +18,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::shared_ptr<int> p(new int(3));
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset.pass.cpp
index c9df003192b91..55c22414d63a3 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer.pass.cpp
index c3582416584a0..789013fdaac95 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter.pass.cpp
index e7d457fa13988..97cd298cf2f36 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct B
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp
index 9e2bd1012900f..ee7f3762de081 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp
@@ -14,6 +14,7 @@
 
 #include <memory>
 #include <cassert>
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "test_allocator.h"
 
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/swap.pass.cpp
index a27949ebb8039..5070dbc1112b4 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/swap.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/swap.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/arrow.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/arrow.pass.cpp
index 77bf3a22e0806..b968bd443176f 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/arrow.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/arrow.pass.cpp
@@ -16,6 +16,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::shared_ptr<std::pair<int, int> > p(new std::pair<int, int>(3, 4));
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/dereference.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/dereference.pass.cpp
index a6f75533acb9c..c8455e78d5625 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/dereference.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/dereference.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::shared_ptr<int> p(new int(32));
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/op_bool.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/op_bool.pass.cpp
index 247deb07bb99a..69b0ff3ed5c91 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/op_bool.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/op_bool.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/unique.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/unique.pass.cpp
index dfad31385173c..0b7d29dbcc06d 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/unique.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.obs/unique.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::shared_ptr<int> p(new int(32));
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp
index b0bfcae9ae4cf..47ae5dd8f7297 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/types.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/types.pass.cpp
index f5bdb876b466e..0666a98828d9f 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/types.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/types.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <memory>
 
+#include "test_macros.h"
+
 struct A;  // purposefully incomplete
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/shared_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/shared_ptr_Y.pass.cpp
index 02f180d3d6431..6d6f4e13556d0 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/shared_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/shared_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr.pass.cpp
index f41c391c4c24c..ae166a5662659 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr_Y.pass.cpp
index 33b2ddd422aff..257e5ef189eb9 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.assign/weak_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/default.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/default.pass.cpp
index e5a70abe417d5..9d6c72db45a81 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/default.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A;
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/shared_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/shared_ptr_Y.pass.cpp
index 45be55e66f8e1..24f57d3449fb3 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/shared_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/shared_ptr_Y.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/weak_ptr_Y.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/weak_ptr_Y.pass.cpp
index 4268fda3aba42..478af1e1b5a96 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/weak_ptr_Y.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.const/weak_ptr_Y.pass.cpp
@@ -17,6 +17,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/reset.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/reset.pass.cpp
index eae249ca1a534..93be75b3360a6 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/reset.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/reset.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/swap.pass.cpp
index 76703d0ddd755..73453d20ee807 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/swap.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.mod/swap.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/expired.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/expired.pass.cpp
index 5fb2dd4f95bf2..bc0eb6e1a481c 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/expired.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/expired.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/lock.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/lock.pass.cpp
index 50ff84318efcf..107a587c7de49 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/lock.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.obs/lock.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp
index 53bc3eb9cd86a..98429fd740638 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weakptr/bad_weak_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weakptr/bad_weak_ptr.pass.cpp
index f3e26dee54e83..49a466b903bcb 100644
--- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weakptr/bad_weak_ptr.pass.cpp
+++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weakptr/bad_weak_ptr.pass.cpp
@@ -20,6 +20,8 @@
 #include <cassert>
 #include <cstring>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_base_of<std::exception, std::bad_weak_ptr>::value), "");
diff --git a/libcxx/test/std/utilities/meta/meta.logical/conjunction.pass.cpp b/libcxx/test/std/utilities/meta/meta.logical/conjunction.pass.cpp
index e37769576d2fe..47c6361361b32 100644
--- a/libcxx/test/std/utilities/meta/meta.logical/conjunction.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.logical/conjunction.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct True  { static constexpr bool value = true; };
 struct False { static constexpr bool value = false; };
 
diff --git a/libcxx/test/std/utilities/meta/meta.logical/disjunction.pass.cpp b/libcxx/test/std/utilities/meta/meta.logical/disjunction.pass.cpp
index baaed6f03ceea..dc12e369092f8 100644
--- a/libcxx/test/std/utilities/meta/meta.logical/disjunction.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.logical/disjunction.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct True  { static constexpr bool value = true; };
 struct False { static constexpr bool value = false; };
 
diff --git a/libcxx/test/std/utilities/meta/meta.logical/negation.pass.cpp b/libcxx/test/std/utilities/meta/meta.logical/negation.pass.cpp
index 88ca693d31b39..3dc775688af61 100644
--- a/libcxx/test/std/utilities/meta/meta.logical/negation.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.logical/negation.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct True  { static constexpr bool value = true; };
 struct False { static constexpr bool value = false; };
 
diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp
index 057d3b6582320..b75b4468a70a7 100644
--- a/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp
@@ -23,3 +23,5 @@
 #define _LIBCPP_USE_IS_CONVERTIBLE_FALLBACK
 #include "is_convertible.pass.cpp"
 
+#include "test_macros.h"
+
diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_convertible.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_convertible.pass.cpp
index 2a953d90b8e18..00b533a8c4160 100644
--- a/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_convertible.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.rel/is_nothrow_convertible.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 struct B {
 public:
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/nullptr.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/nullptr.pass.cpp
index 0b25ac1ba957c..7f36df27b24e2 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/nullptr.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/nullptr.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cstddef>        // for std::nullptr_t
 
+#include "test_macros.h"
+
 template <class T>
 void test_nullptr_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/array.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/array.pass.cpp
index 487e14446c32c..c702e3646c7b8 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/array.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/array.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_array_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/class.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/class.pass.cpp
index bc072198f2ebd..6368e2ada8341 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/class.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/class.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_class_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/enum.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/enum.pass.cpp
index 71c74f2ba045b..9bca19c82a01b 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/enum.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/enum.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_enum_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/floating_point.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/floating_point.pass.cpp
index 957473c2a91b8..cff884bbd4df5 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/floating_point.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/floating_point.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_floating_point_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/function.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/function.pass.cpp
index c27b1237ebf90..c17be80643b76 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/function.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/function.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_function_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
index cac606a516e44..0cbeb4a320532 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_integral_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp
index 7a46b97fbd089..da9c7bc1b831c 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_bounded_array.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, bool B>
 void test_array_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_unbounded_array.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_unbounded_array.pass.cpp
index 3a561b0b01637..5e587f4058d73 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_unbounded_array.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/is_unbounded_array.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, bool B>
 void test_array_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/lvalue_ref.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/lvalue_ref.pass.cpp
index 44027dadb2648..b9c206e5c5ce0 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/lvalue_ref.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/lvalue_ref.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_lvalue_ref()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_function_pointer.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_function_pointer.pass.cpp
index fda2e81818a24..005228c0896a4 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_function_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_function_pointer.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_member_function_pointer_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_object_pointer.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_object_pointer.pass.cpp
index 3e8117b6a01a4..67a79cb29eba9 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_object_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/member_object_pointer.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_member_object_pointer_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/pointer.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/pointer.pass.cpp
index f5677b95b679d..2c45167a91221 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/pointer.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/pointer.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_pointer_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/rvalue_ref.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/rvalue_ref.pass.cpp
index 341b9462692a0..f00307aa96951 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/rvalue_ref.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/rvalue_ref.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_rvalue_ref()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/union.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/union.pass.cpp
index fb48a70f35fd2..d72f4db8a00b2 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/union.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/union.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_union_imp()
 {
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/void.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/void.pass.cpp
index 657f72774f49b..3cd58d368e866 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/void.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/void.pass.cpp
@@ -12,6 +12,8 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T>
 void test_void_imp()
 {
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
index 6f119b274231b..b907dd45ea819 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
@@ -17,6 +17,8 @@
 #include <optional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::bad_optional_access;
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
index 975d8678b89ce..135e2b28fdb4c 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
@@ -16,6 +16,8 @@
 #include <optional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::bad_optional_access;
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/equal.pass.cpp
index 4f7aedbcb0f06..e79ec051ec6b3 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/greater.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/greater.pass.cpp
index 373634f68c2dc..073ff324a8f3f 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/greater.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/greater.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/greater_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/greater_equal.pass.cpp
index 5d4839734bb90..abbfbfeaf2e22 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/greater_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/greater_equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/less_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/less_equal.pass.cpp
index a601939c5b39d..679bd69e72198 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/less_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/less_equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/less_than.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/less_than.pass.cpp
index 7320955908a4d..0aa17a1560bed 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/less_than.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/less_than.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.comp_with_t/not_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.comp_with_t/not_equal.pass.cpp
index 0d14f1e97a952..c3bdeb8256c50 100644
--- a/libcxx/test/std/utilities/optional/optional.comp_with_t/not_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.comp_with_t/not_equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/optional/optional.hash/enabled_hash.pass.cpp
index 66ab089dc3077..fc84e6dc92a59 100644
--- a/libcxx/test/std/utilities/optional/optional.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
index aa89a51d8d402..d4f04164f60fd 100644
--- a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp
@@ -18,6 +18,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 struct A {};
 struct B {};
 
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/equal.pass.cpp
index 5894462205abc..232969011b917 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/greater.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/greater.pass.cpp
index 59dc62fce17d5..8e5a6ec6784ea 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/greater.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/greater.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/greater_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/greater_equal.pass.cpp
index e23e8794f441d..8ff32bd6b707f 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/greater_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/greater_equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/less_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/less_equal.pass.cpp
index 96f0754e40b3b..c856cf1def2d9 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/less_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/less_equal.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/less_than.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/less_than.pass.cpp
index 872f3159a92fe..e95d35398f846 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/less_than.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/less_than.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullops/not_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullops/not_equal.pass.cpp
index 7eea0fa8856eb..1de366a054af8 100644
--- a/libcxx/test/std/utilities/optional/optional.nullops/not_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullops/not_equal.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.nullopt/nullopt_t.pass.cpp b/libcxx/test/std/utilities/optional/optional.nullopt/nullopt_t.pass.cpp
index c9d843e9c2af6..bc5a24c5a4933 100644
--- a/libcxx/test/std/utilities/optional/optional.nullopt/nullopt_t.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.nullopt/nullopt_t.pass.cpp
@@ -19,6 +19,8 @@
 #include <optional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 using std::nullopt_t;
 using std::nullopt;
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/deduct.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/deduct.pass.cpp
index fa2edfcdd69bf..919dc22468e67 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/deduct.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/deduct.pass.cpp
@@ -21,6 +21,8 @@
 #include <optional>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
index 927ac19ead121..12b19a72cafdb 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/nullopt_t.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 using std::optional;
 using std::nullopt_t;
 using std::nullopt;
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
index 23497bc4c4786..61da8672b08d2 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.dtor/dtor.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct PODType {
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
index 704606c6f75c7..05153a07f2f3d 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.mod/reset.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
index 736fe791af62e..71f7905ad9113 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_or_const.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct Y
diff --git a/libcxx/test/std/utilities/optional/optional.object/special_members.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/special_members.pass.cpp
index 28783264d715f..2449801be1878 100644
--- a/libcxx/test/std/utilities/optional/optional.object/special_members.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/special_members.pass.cpp
@@ -18,6 +18,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 
 template <class T>
 struct SpecialMemberTest {
diff --git a/libcxx/test/std/utilities/optional/optional.object/triviality.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/triviality.pass.cpp
index f53d86000e7c7..3af60a6bf9f1f 100644
--- a/libcxx/test/std/utilities/optional/optional.object/triviality.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/triviality.pass.cpp
@@ -24,6 +24,8 @@
 
 #include "archetypes.hpp"
 
+#include "test_macros.h"
+
 
 constexpr bool implies(bool p, bool q) {
     return !p || q;
diff --git a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
index 7c32d1857cd00..09107243abea9 100644
--- a/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/types.pass.cpp
@@ -19,6 +19,8 @@
 #include <optional>
 #include <type_traits>
 
+#include "test_macros.h"
+
 using std::optional;
 
 template <class Opt, class T>
diff --git a/libcxx/test/std/utilities/optional/optional.relops/equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/equal.pass.cpp
index 4fc85157a7685..e7c9819853121 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/equal.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.relops/greater_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/greater_equal.pass.cpp
index 4bc9720aa7f7a..c7cbc1538aae9 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/greater_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/greater_equal.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.relops/greater_than.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/greater_than.pass.cpp
index d168cd70635a5..010812042007b 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/greater_than.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/greater_than.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.relops/less_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/less_equal.pass.cpp
index 835be64f8fbee..d1c232e1fe47e 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/less_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/less_equal.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.relops/less_than.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/less_than.pass.cpp
index 832de4b2981a6..09adb21736a99 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/less_than.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/less_than.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.relops/not_equal.pass.cpp b/libcxx/test/std/utilities/optional/optional.relops/not_equal.pass.cpp
index ab00b7aab18dc..643425d1b9eb5 100644
--- a/libcxx/test/std/utilities/optional/optional.relops/not_equal.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.relops/not_equal.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 using std::optional;
 
 struct X {
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
index d3461542b93e8..e736eb4a3551b 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional_explicit.pass.cpp
@@ -17,6 +17,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/optional/optional.syn/optional_includes_initializer_list.pass.cpp b/libcxx/test/std/utilities/optional/optional.syn/optional_includes_initializer_list.pass.cpp
index daaad5664b6b8..663e1ca8395c1 100644
--- a/libcxx/test/std/utilities/optional/optional.syn/optional_includes_initializer_list.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.syn/optional_includes_initializer_list.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <optional>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using std::optional;
diff --git a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_add.pass.cpp b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_add.pass.cpp
index c62f75a0d875f..50cea314c0e46 100644
--- a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_add.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_add.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_divide.pass.cpp b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_divide.pass.cpp
index ce7f69473bc79..79a660f3d98ff 100644
--- a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_divide.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_divide.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_multiply.pass.cpp b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_multiply.pass.cpp
index e20f234434743..dcb1684e5610b 100644
--- a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_multiply.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_multiply.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_subtract.pass.cpp b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_subtract.pass.cpp
index e3871f7a7cfc9..bba506f5b96ee 100644
--- a/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_subtract.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/ratio.arithmetic/ratio_subtract.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/ratio/ratio.ratio/ratio.pass.cpp b/libcxx/test/std/utilities/ratio/ratio.ratio/ratio.pass.cpp
index 336d7d8e5b58d..c14c7d93b8a0a 100644
--- a/libcxx/test/std/utilities/ratio/ratio.ratio/ratio.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/ratio.ratio/ratio.pass.cpp
@@ -11,6 +11,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 template <long long N, long long D, long long eN, long long eD>
 void test()
 {
diff --git a/libcxx/test/std/utilities/ratio/typedefs.pass.cpp b/libcxx/test/std/utilities/ratio/typedefs.pass.cpp
index 8e24ff9749d35..06e9682b67c8d 100644
--- a/libcxx/test/std/utilities/ratio/typedefs.pass.cpp
+++ b/libcxx/test/std/utilities/ratio/typedefs.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <ratio>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::atto::num == 1 && std::atto::den == 1000000000000000000ULL, "");
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move.pass.cpp
index bc42afda3094b..89364b058f188 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move.pass.cpp
@@ -21,6 +21,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "unique_ptr_test_helper.h"
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.runtime.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.runtime.pass.cpp
index ce912546533f7..aa766c6336a51 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.runtime.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.runtime.pass.cpp
@@ -18,6 +18,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "unique_ptr_test_helper.h"
 
 template <class APtr, class BPtr>
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.single.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.single.pass.cpp
index d5f46935a19ac..0b2ead49e5de8 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.single.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/move_convert.single.pass.cpp
@@ -18,6 +18,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "unique_ptr_test_helper.h"
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/null.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/null.pass.cpp
index ecba79dfd3278..1af7eccb38c5e 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/null.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/null.pass.cpp
@@ -15,6 +15,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "unique_ptr_test_helper.h"
 
 // test assignment from null
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/nullptr.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/nullptr.pass.cpp
index 5cd44b2f64701..4a44a92dd2a86 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/nullptr.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.asgn/nullptr.pass.cpp
@@ -15,6 +15,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "unique_ptr_test_helper.h"
 
 // test assignment from null
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.ctor/move_convert.runtime.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.ctor/move_convert.runtime.pass.cpp
index bcf85b11031ec..632af11250083 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.ctor/move_convert.runtime.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.ctor/move_convert.runtime.pass.cpp
@@ -17,6 +17,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "deleter_types.h"
 #include "unique_ptr_test_helper.h"
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.dtor/null.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.dtor/null.pass.cpp
index 9ef48b2e4af76..c067f642e6766 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.dtor/null.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.dtor/null.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 class Deleter {
   int state_;
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset.single.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset.single.pass.cpp
index 4f5a519b798ba..583cbcf50c1be 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset.single.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset.single.pass.cpp
@@ -15,6 +15,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "unique_ptr_test_helper.h"
 
 int main(int, char**) {
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset_self.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset_self.pass.cpp
index d5e15aafe24c8..7f60e416b8d6c 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset_self.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.modifiers/reset_self.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <memory>
 
+#include "test_macros.h"
+
 struct A {
   std::unique_ptr<A> ptr_;
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/dereference.single.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/dereference.single.pass.cpp
index 254d88bb6cf66..fae3576b1252d 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/dereference.single.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/dereference.single.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   std::unique_ptr<int> p(new int(3));
   assert(*p == 3);
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_arrow.single.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_arrow.single.pass.cpp
index f31ca6b286d12..4a50d1f29d17a 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_arrow.single.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_arrow.single.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A {
   int i_;
 
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_subscript.runtime.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_subscript.runtime.pass.cpp
index 21e7e661688ab..1b11d695e96b5 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_subscript.runtime.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.class/unique.ptr.observers/op_subscript.runtime.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A {
   int state_;
   static int next_;
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.array.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.array.pass.cpp
index 715335eb1cd06..bf991db830e61 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.array.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.array.pass.cpp
@@ -11,6 +11,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 //    The only way to create an unique_ptr<T[]> is to default construct them.
 
 class foo {
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.single.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.single.pass.cpp
index 08062c122e185..0360af09f970d 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.single.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.single.pass.cpp
@@ -11,6 +11,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/convert_ctor.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/convert_ctor.pass.cpp
index 6b8407c571a4e..83a5f3132e784 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/convert_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/convert_ctor.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/default.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/default.pass.cpp
index e7cbeaba5e0c1..a5101b9889dc6 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/default.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt/default.pass.cpp
@@ -13,6 +13,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/convert_ctor.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/convert_ctor.pass.cpp
index 14e210598c11a..0eae429fd4098 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/convert_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/convert_ctor.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::default_delete<int[]> d1;
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/default.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/default.pass.cpp
index 9b220462b67d0..1693d11418d8b 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/default.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.dltr/unique.ptr.dltr.dflt1/default.pass.cpp
@@ -15,6 +15,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     static int count;
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/cmp_nullptr.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/cmp_nullptr.pass.cpp
index 52c399e64f721..ad3e1e2ddb190 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/cmp_nullptr.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/cmp_nullptr.pass.cpp
@@ -38,6 +38,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 void do_nothing(int*) {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/eq.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/eq.pass.cpp
index ce83b575031e6..0d0ac2709b7c3 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/eq.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/eq.pass.cpp
@@ -21,6 +21,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/rel.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/rel.pass.cpp
index 5fad4beb6c6ea..bc411ca545c29 100644
--- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/rel.pass.cpp
+++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.special/rel.pass.cpp
@@ -29,6 +29,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "deleter_types.h"
 
 struct A
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
index a499b66fdb584..5adc060c8aec7 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
   {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/all.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/all.pass.cpp
index fe9e0e0a363b6..f956e06bf6374 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/all.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/all.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_all()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/any.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/any.pass.cpp
index 95b640178b973..ac9a84b3a9bac 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/any.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/any.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_any()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/none.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/none.pass.cpp
index 2588ac60f7b37..9b0d107077849 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/none.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/none.pass.cpp
@@ -12,6 +12,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_none()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/size.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/size.pass.cpp
index 41318d99eef88..336e3e8dd4bce 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/size.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/size.pass.cpp
@@ -11,6 +11,8 @@
 #include <bitset>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_size()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
index c43ef90aeefbe..670a680433fa4 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
@@ -14,6 +14,8 @@
 #include <climits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_to_ullong()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
index c6cf6b19a1851..b01d7948d65a5 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
@@ -15,6 +15,8 @@
 #include <climits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <std::size_t N>
 void test_to_ulong()
 {
diff --git a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp
index 43c20f848bb01..3ef0eb8bf9bf5 100644
--- a/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/bitset.operators/stream_out.pass.cpp
@@ -16,6 +16,8 @@
 #include <sstream>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::ostringstream os;
diff --git a/libcxx/test/std/utilities/template.bitset/includes.pass.cpp b/libcxx/test/std/utilities/template.bitset/includes.pass.cpp
index 90695ed3dccc5..43d9e08b66ac3 100644
--- a/libcxx/test/std/utilities/template.bitset/includes.pass.cpp
+++ b/libcxx/test/std/utilities/template.bitset/includes.pass.cpp
@@ -10,6 +10,8 @@
 
 #include <bitset>
 
+#include "test_macros.h"
+
 template <class> void test_typedef() {}
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/time/days.pass.cpp b/libcxx/test/std/utilities/time/days.pass.cpp
index 43b53ea33cb89..52b9ea00c35ea 100644
--- a/libcxx/test/std/utilities/time/days.pass.cpp
+++ b/libcxx/test/std/utilities/time/days.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::days D;
diff --git a/libcxx/test/std/utilities/time/hours.pass.cpp b/libcxx/test/std/utilities/time/hours.pass.cpp
index 97fc2621b6acb..1372f9a3108bd 100644
--- a/libcxx/test/std/utilities/time/hours.pass.cpp
+++ b/libcxx/test/std/utilities/time/hours.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::hours D;
diff --git a/libcxx/test/std/utilities/time/microseconds.pass.cpp b/libcxx/test/std/utilities/time/microseconds.pass.cpp
index ded1c22fe14f9..ca21ace5d19c5 100644
--- a/libcxx/test/std/utilities/time/microseconds.pass.cpp
+++ b/libcxx/test/std/utilities/time/microseconds.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::microseconds D;
diff --git a/libcxx/test/std/utilities/time/milliseconds.pass.cpp b/libcxx/test/std/utilities/time/milliseconds.pass.cpp
index b1fe99e1209f3..231375b5ab1b5 100644
--- a/libcxx/test/std/utilities/time/milliseconds.pass.cpp
+++ b/libcxx/test/std/utilities/time/milliseconds.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::milliseconds D;
diff --git a/libcxx/test/std/utilities/time/minutes.pass.cpp b/libcxx/test/std/utilities/time/minutes.pass.cpp
index 23f0bf2877dee..14e79f4ab6373 100644
--- a/libcxx/test/std/utilities/time/minutes.pass.cpp
+++ b/libcxx/test/std/utilities/time/minutes.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::minutes D;
diff --git a/libcxx/test/std/utilities/time/months.pass.cpp b/libcxx/test/std/utilities/time/months.pass.cpp
index b14b2fab533ed..2212e9347373b 100644
--- a/libcxx/test/std/utilities/time/months.pass.cpp
+++ b/libcxx/test/std/utilities/time/months.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::months D;
diff --git a/libcxx/test/std/utilities/time/nanoseconds.pass.cpp b/libcxx/test/std/utilities/time/nanoseconds.pass.cpp
index d58a375c2a59b..29c973223124b 100644
--- a/libcxx/test/std/utilities/time/nanoseconds.pass.cpp
+++ b/libcxx/test/std/utilities/time/nanoseconds.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::nanoseconds D;
diff --git a/libcxx/test/std/utilities/time/seconds.pass.cpp b/libcxx/test/std/utilities/time/seconds.pass.cpp
index 45a3f1d8e7fb2..57fbb1d27e3fe 100644
--- a/libcxx/test/std/utilities/time/seconds.pass.cpp
+++ b/libcxx/test/std/utilities/time/seconds.pass.cpp
@@ -14,6 +14,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::seconds D;
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.file/consistency.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.file/consistency.pass.cpp
index 34244a8719b99..34a4f60e2acfd 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.file/consistency.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.file/consistency.pass.cpp
@@ -20,6 +20,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.file/rep_signed.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.file/rep_signed.pass.cpp
index c87fad2588cfd..bed859a14281a 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.file/rep_signed.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.file/rep_signed.pass.cpp
@@ -17,6 +17,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert(std::is_signed<std::chrono::file_clock::rep>::value, "");
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.hires/consistency.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.hires/consistency.pass.cpp
index 1650d3b6b5f3e..1f8a887387000 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.hires/consistency.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.hires/consistency.pass.cpp
@@ -24,6 +24,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.hires/now.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.hires/now.pass.cpp
index ddf3ced87dac1..db1fb55df9072 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.hires/now.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.hires/now.pass.cpp
@@ -15,6 +15,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::high_resolution_clock C;
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/consistency.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/consistency.pass.cpp
index 0797f2cb5bcf1..4e170baa37c0d 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/consistency.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/consistency.pass.cpp
@@ -26,6 +26,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp
index 7d268fd67ded9..4b8104dd1a6f1 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.steady/now.pass.cpp
@@ -17,6 +17,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::steady_clock C;
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.system/consistency.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.system/consistency.pass.cpp
index b92652134ad6a..06596f8975ffa 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.system/consistency.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.system/consistency.pass.cpp
@@ -24,6 +24,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class T>
 void test(const T &) {}
 
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.system/from_time_t.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.system/from_time_t.pass.cpp
index 54252718d4af1..70dd8117e6cef 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.system/from_time_t.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.system/from_time_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <chrono>
 #include <ctime>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::system_clock C;
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.system/now.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.system/now.pass.cpp
index 9d74541f98960..dade6bafa471b 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.system/now.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.system/now.pass.cpp
@@ -15,6 +15,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::system_clock C;
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.system/rep_signed.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.system/rep_signed.pass.cpp
index 967af52e11d23..69546a6518e28 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.system/rep_signed.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.system/rep_signed.pass.cpp
@@ -15,6 +15,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     assert(std::chrono::system_clock::duration::min() <
diff --git a/libcxx/test/std/utilities/time/time.clock/time.clock.system/to_time_t.pass.cpp b/libcxx/test/std/utilities/time/time.clock/time.clock.system/to_time_t.pass.cpp
index 86b37bb6bd1bd..bf4339c32d1ca 100644
--- a/libcxx/test/std/utilities/time/time.clock/time.clock.system/to_time_t.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.clock/time.clock.system/to_time_t.pass.cpp
@@ -15,6 +15,8 @@
 #include <chrono>
 #include <ctime>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::system_clock C;
diff --git a/libcxx/test/std/utilities/time/time.duration/default_ratio.pass.cpp b/libcxx/test/std/utilities/time/time.duration/default_ratio.pass.cpp
index 08870488c16a5..d59d59a6bbd3e 100644
--- a/libcxx/test/std/utilities/time/time.duration/default_ratio.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/default_ratio.pass.cpp
@@ -18,6 +18,8 @@
 #include <chrono>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::chrono::duration<int, std::ratio<1> >,
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.alg/abs.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.alg/abs.pass.cpp
index 06f9a7c7f036a..605e27cd982c5 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.alg/abs.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.alg/abs.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class Duration>
 void
 test(const Duration& f, const Duration& d)
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/ceil.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/ceil.pass.cpp
index a6e1982d04458..85d7338347cd7 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/ceil.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/ceil.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class ToDuration, class FromDuration>
 void
 test(const FromDuration& f, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/floor.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/floor.pass.cpp
index 6783b1f67eda5..d1c29be1bc9e1 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/floor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/floor.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class ToDuration, class FromDuration>
 void
 test(const FromDuration& f, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/round.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/round.pass.cpp
index ebd2e3194a4ac..274c91280c372 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.cast/round.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.cast/round.pass.cpp
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class ToDuration, class FromDuration>
 void
 test(const FromDuration& f, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.cons/convert_overflow.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.cons/convert_overflow.pass.cpp
index 5b963f2b1f4cb..d76018dbd4980 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.cons/convert_overflow.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.cons/convert_overflow.pass.cpp
@@ -18,6 +18,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 bool called = false;
 
 void f(std::chrono::milliseconds);
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals1.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals1.pass.cpp
index 2e5b7bbb87b6c..3961e2110cf84 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals1.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals1.pass.cpp
@@ -11,6 +11,8 @@
 #include <chrono>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::chrono;
diff --git a/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals2.pass.cpp b/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals2.pass.cpp
index d0b8b33e4934c..2ab1c930d34e6 100644
--- a/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals2.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/time.duration.literals/literals2.pass.cpp
@@ -13,6 +13,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     using namespace std::literals;
diff --git a/libcxx/test/std/utilities/time/time.duration/types.pass.cpp b/libcxx/test/std/utilities/time/time.duration/types.pass.cpp
index 250e532859e9d..30f92863be2d7 100644
--- a/libcxx/test/std/utilities/time/time.duration/types.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.duration/types.pass.cpp
@@ -18,6 +18,8 @@
 #include <chrono>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::duration<long, std::ratio<3, 2> > D;
diff --git a/libcxx/test/std/utilities/time/time.point/default_duration.pass.cpp b/libcxx/test/std/utilities/time/time.point/default_duration.pass.cpp
index 8a58413a0aafd..5e95fe03ac9bb 100644
--- a/libcxx/test/std/utilities/time/time.point/default_duration.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.point/default_duration.pass.cpp
@@ -18,6 +18,8 @@
 #include <chrono>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     static_assert((std::is_same<std::chrono::system_clock::duration,
diff --git a/libcxx/test/std/utilities/time/time.point/time.point.cast/ceil.pass.cpp b/libcxx/test/std/utilities/time/time.point/time.point.cast/ceil.pass.cpp
index 8dfd1bdaa38f8..e2dd87873849c 100644
--- a/libcxx/test/std/utilities/time/time.point/time.point.cast/ceil.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.point/time.point.cast/ceil.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class FromDuration, class ToDuration>
 void
 test(const FromDuration& df, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.point/time.point.cast/floor.pass.cpp b/libcxx/test/std/utilities/time/time.point/time.point.cast/floor.pass.cpp
index d50fff4a1c88b..84611109a1c28 100644
--- a/libcxx/test/std/utilities/time/time.point/time.point.cast/floor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.point/time.point.cast/floor.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class FromDuration, class ToDuration>
 void
 test(const FromDuration& df, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.point/time.point.cast/round.pass.cpp b/libcxx/test/std/utilities/time/time.point/time.point.cast/round.pass.cpp
index d8bb1b505b148..a9a91c5e05995 100644
--- a/libcxx/test/std/utilities/time/time.point/time.point.cast/round.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.point/time.point.cast/round.pass.cpp
@@ -19,6 +19,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class FromDuration, class ToDuration>
 void
 test(const FromDuration& df, const ToDuration& d)
diff --git a/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/duration.pass.cpp b/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/duration.pass.cpp
index 3dde540472705..93da8b9b300d7 100644
--- a/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/duration.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/duration.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class D1, class D2, class De>
 void
 test()
diff --git a/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/time_point.pass.cpp b/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/time_point.pass.cpp
index d73bb8ae6fdaf..ea20bf856b0af 100644
--- a/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/time_point.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.traits/time.traits.specializations/time_point.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <chrono>
 
+#include "test_macros.h"
+
 template <class D1, class D2, class De>
 void
 test()
diff --git a/libcxx/test/std/utilities/time/weeks.pass.cpp b/libcxx/test/std/utilities/time/weeks.pass.cpp
index 5a0cf3417f51f..82ca68ad21752 100644
--- a/libcxx/test/std/utilities/time/weeks.pass.cpp
+++ b/libcxx/test/std/utilities/time/weeks.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::weeks D;
diff --git a/libcxx/test/std/utilities/time/years.pass.cpp b/libcxx/test/std/utilities/time/years.pass.cpp
index 5016369268a48..b08e77fe854a0 100644
--- a/libcxx/test/std/utilities/time/years.pass.cpp
+++ b/libcxx/test/std/utilities/time/years.pass.cpp
@@ -15,6 +15,8 @@
 #include <type_traits>
 #include <limits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::chrono::years D;
diff --git a/libcxx/test/std/utilities/tuple/tuple.general/tuple.smartptr.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.general/tuple.smartptr.pass.cpp
index d57e7ad1825a0..43448409393fb 100644
--- a/libcxx/test/std/utilities/tuple/tuple.general/tuple.smartptr.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.general/tuple.smartptr.pass.cpp
@@ -15,6 +15,8 @@
 #include <tuple>
 #include <memory>
 
+#include "test_macros.h"
+
 int main(int, char**) {
     {
     std::tuple<std::unique_ptr<char>> up;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/TupleFunction.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/TupleFunction.pass.cpp
index ede72c2a50fc0..a2e69baa9b258 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/TupleFunction.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/TupleFunction.pass.cpp
@@ -13,6 +13,8 @@
 #include <tuple>
 #include <functional>
 
+#include "test_macros.h"
+
 struct X
 {
     X() {}
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
index 851a535eb49c5..5d7c28bf24f3f 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
@@ -23,6 +23,7 @@
 
 // std::array is explicitly allowed to be initialized with A a = { init-list };.
 // Disable the missing braces warning for this reason.
+#include "test_macros.h"
 #include "disable_missing_braces_warning.h"
 
 int count = 0;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_large_arity.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_large_arity.pass.cpp
index 004a5d46435c4..ce9f177e4bd56 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_large_arity.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/apply_large_arity.pass.cpp
@@ -19,6 +19,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 ////////////////////////////////////////////////////////////////////////////////
 template <class T, std::size_t Dummy = 0>
 struct always_imp
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/const_pair.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/const_pair.pass.cpp
index 9353add370fb4..cb0b6245dbe31 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/const_pair.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/const_pair.pass.cpp
@@ -19,6 +19,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_copy.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_copy.pass.cpp
index 8b9447c990019..59bc5e758112d 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_copy.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_copy.pass.cpp
@@ -19,6 +19,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     int id_;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_move.pass.cpp
index 71855a309c596..7b51b6f3cf0c0 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/convert_move.pass.cpp
@@ -21,6 +21,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     int id_;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move.pass.cpp
index 575c3b1dfad6e..3ed3037217a7a 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move.pass.cpp
@@ -19,6 +19,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 struct NonAssignable {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move_pair.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move_pair.pass.cpp
index 9681a238a9d87..ea77a3eb46b34 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move_pair.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/move_pair.pass.cpp
@@ -20,6 +20,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     int id_;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/tuple_array_template_depth.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/tuple_array_template_depth.pass.cpp
index 5796e8dbe0800..e37c25280358b 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/tuple_array_template_depth.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.assign/tuple_array_template_depth.pass.cpp
@@ -21,6 +21,8 @@
 #include <array>
 #include <tuple>
 
+#include "test_macros.h"
+
 // Use 1256 to try and blow the template instantiation depth for all compilers.
 typedef std::array<char, 1256> array_t;
 typedef std::tuple<array_t> tuple_t;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR22806_constrain_tuple_like_ctor.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR22806_constrain_tuple_like_ctor.pass.cpp
index 1e1b0846cc28f..9f262be73a462 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR22806_constrain_tuple_like_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR22806_constrain_tuple_like_ctor.pass.cpp
@@ -24,6 +24,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class Tp>
 using uncvref_t = typename std::remove_cv<typename std::remove_reference<Tp>::type>::type;
 
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
index 919d88e46a3e1..7376a38c9b6ac 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR23256_constrain_UTypes_ctor.pass.cpp
@@ -24,6 +24,8 @@
 #include <memory>
 #include <type_traits>
 
+#include "test_macros.h"
+
 
 struct UnconstrainedCtor {
   int value_;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR27684_contains_ref_to_incomplete_type.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR27684_contains_ref_to_incomplete_type.pass.cpp
index 1493f4f81609a..f9c4a7d1461d3 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR27684_contains_ref_to_incomplete_type.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR27684_contains_ref_to_incomplete_type.pass.cpp
@@ -23,6 +23,8 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct IncompleteType;
 extern IncompleteType inc1;
 extern IncompleteType inc2;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR31384.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR31384.pass.cpp
index 6c44f7027eedd..26013d8048990 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR31384.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/PR31384.pass.cpp
@@ -17,6 +17,8 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
+
 int count = 0;
 
 struct Explicit {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc.pass.cpp
index c5f52a92846c5..b0f957e3a374b 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc.pass.cpp
@@ -22,6 +22,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "DefaultOnly.h"
 #include "allocators.h"
 #include "../alloc_first.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_UTypes.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_UTypes.pass.cpp
index 57e2f1b41887e..050a4b0d5d5b6 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_UTypes.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_UTypes.pass.cpp
@@ -18,6 +18,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "allocators.h"
 #include "../alloc_first.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_Types.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_Types.pass.cpp
index 3b5b27f7b1509..d1cd59600bd7d 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_Types.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_Types.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_pair.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_pair.pass.cpp
index a7cffa72db6e8..f3dc0a27e1519 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_pair.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_const_pair.pass.cpp
@@ -19,6 +19,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_copy.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_copy.pass.cpp
index 083e15797f645..86989a8fb15d1 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_copy.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_copy.pass.cpp
@@ -19,6 +19,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_move.pass.cpp
index 1f33ef2fc6e3a..6973616a53a3e 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_convert_move.pass.cpp
@@ -20,6 +20,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_copy.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_copy.pass.cpp
index 1db842b8d91f0..37b0c7bd1df86 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_copy.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_copy.pass.cpp
@@ -18,6 +18,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move.pass.cpp
index fc25a4fc6a4bb..e4697becfdd30 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move.pass.cpp
@@ -18,6 +18,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 #include "allocators.h"
 #include "../alloc_first.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move_pair.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move_pair.pass.cpp
index e45702d88b78a..f1492f9c4a67d 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move_pair.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/alloc_move_pair.pass.cpp
@@ -20,6 +20,7 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
 #include "allocators.h"
 #include "../alloc_first.h"
 #include "../alloc_last.h"
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_move.pass.cpp
index 071f13cf9136b..f343201447be1 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_move.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct Explicit {
   int value;
   explicit Explicit(int x) : value(x) {}
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/default.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/default.pass.cpp
index ae296f73945c0..46fd35ad4ddd7 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/default.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/default.pass.cpp
@@ -19,6 +19,7 @@
 #include <cassert>
 #include <type_traits>
 
+#include "test_macros.h"
 #include "DefaultOnly.h"
 
 struct NoDefault {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
index 80b09b8718082..4c0b2945fa41b 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp
@@ -23,6 +23,8 @@
 #include <cassert>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
   static_assert(std::is_trivially_destructible<
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move.pass.cpp
index 977dc4c326419..c298c4e45bfbe 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move.pass.cpp
@@ -18,6 +18,7 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 struct ConstructsWithTupleLeaf
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move_pair.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move_pair.pass.cpp
index 635be614b9a42..ee7cd066f7dc5 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move_pair.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/move_pair.pass.cpp
@@ -19,6 +19,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct B
 {
     int id_;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/test_lazy_sfinae.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/test_lazy_sfinae.pass.cpp
index bdbe4fc4b44d2..15c4859615c7a 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/test_lazy_sfinae.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/test_lazy_sfinae.pass.cpp
@@ -16,6 +16,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 template <class ConstructFrom>
 struct ConstructibleFromT {
   ConstructibleFromT() = default;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/tuple_array_template_depth.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/tuple_array_template_depth.pass.cpp
index 2f9447f2a7d44..318a4a3ddb807 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/tuple_array_template_depth.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/tuple_array_template_depth.pass.cpp
@@ -24,6 +24,8 @@
 #include <array>
 #include <tuple>
 
+#include "test_macros.h"
+
 // Use 1256 to try and blow the template instantiation depth for all compilers.
 typedef std::array<char, 1256> array_t;
 typedef std::tuple<array_t> tuple_t;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/get_rv.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/get_rv.pass.cpp
index ae968403b0151..c0d5698d1b9e5 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/get_rv.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/get_rv.pass.cpp
@@ -21,6 +21,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/tuple.by.type.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/tuple.by.type.pass.cpp
index 7dd4e8f10adb6..f0ea3f5c440d6 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/tuple.by.type.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.elem/tuple.by.type.pass.cpp
@@ -17,6 +17,8 @@
 
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::complex<float> cf;
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp
index c3619f7949e75..2236b6d1c29c6 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp
@@ -22,6 +22,8 @@
 #include <array>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, std::size_t N, class U, size_t idx>
 void test()
 {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp
index fdfb8b8b0a8fe..458e4ffbc8c92 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp
@@ -21,6 +21,8 @@
 #include <utility>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, std::size_t N, class U, size_t idx>
 void test()
 {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size.pass.cpp
index f27c7eb470a5b..c7a31e9ce49f4 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size.pass.cpp
@@ -19,6 +19,8 @@
 #include <tuple>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, std::size_t N>
 void test()
 {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp
index 32bad3317bcfa..f83ce7f13e7ab 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp
@@ -21,6 +21,8 @@
 #include <array>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, size_t Size = sizeof(std::tuple_size<T>)>
 constexpr bool is_complete(int) { static_assert(Size > 0, ""); return true; }
 template <class> constexpr bool is_complete(long) { return false; }
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp
index 28186a7914e2f..bbcdf616001ee 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp
@@ -22,6 +22,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct S { int x; };
 
 void test_decomp_user_type() {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.pass.cpp
index bd01f49497c02..e5113e3a55320 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_v.pass.cpp
@@ -16,6 +16,8 @@
 #include <utility>
 #include <array>
 
+#include "test_macros.h"
+
 template <class Tuple, int Expect>
 void test()
 {
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_value_sfinae.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_value_sfinae.pass.cpp
index 2efbfa50e1561..aad003039ecda 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_value_sfinae.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_value_sfinae.pass.cpp
@@ -20,6 +20,8 @@
 #include <tuple>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, class = decltype(std::tuple_size<T>::value)>
 constexpr bool has_value(int) { return true; }
 template <class> constexpr bool has_value(long) { return false; }
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.special/non_member_swap.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.special/non_member_swap.pass.cpp
index eee8f18196afb..24320848d262c 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.special/non_member_swap.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.special/non_member_swap.pass.cpp
@@ -18,6 +18,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.swap/member_swap.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.swap/member_swap.pass.cpp
index 951a88726098b..3ebe950381ba4 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.swap/member_swap.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.swap/member_swap.pass.cpp
@@ -17,6 +17,7 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
 #include "MoveOnly.h"
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.traits/uses_allocator.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.traits/uses_allocator.pass.cpp
index b04c491ede6f6..b22d09293f280 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.traits/uses_allocator.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.traits/uses_allocator.pass.cpp
@@ -18,6 +18,8 @@
 #include <tuple>
 #include <type_traits>
 
+#include "test_macros.h"
+
 struct A {};
 
 int main(int, char**)
diff --git a/libcxx/test/std/utilities/type.index/type.index.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.hash/enabled_hash.pass.cpp
index 710b33878d9d6..e663071e71e10 100644
--- a/libcxx/test/std/utilities/type.index/type.index.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
 
diff --git a/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp
index 8192a9020f708..0154ae5542754 100644
--- a/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.hash/hash.pass.cpp
@@ -21,6 +21,8 @@
 #include <type_traits>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::hash<std::type_index> H;
diff --git a/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp
index c133130f10e2e..fd1ce7a36bdd6 100644
--- a/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.members/ctor.pass.cpp
@@ -16,6 +16,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::type_info const & info = typeid(int);
diff --git a/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp
index 97f6448b73e85..80ca023cb8de4 100644
--- a/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.members/eq.pass.cpp
@@ -16,6 +16,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::type_index t1 = typeid(int);
diff --git a/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp
index 0619ff754439d..f1ac930ef0a47 100644
--- a/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.members/hash_code.pass.cpp
@@ -15,6 +15,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::type_info& ti = typeid(int);
diff --git a/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp
index e24b3975e66dd..1697624f3e689 100644
--- a/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.members/lt.pass.cpp
@@ -18,6 +18,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::type_index t1 = typeid(int);
diff --git a/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp
index ee91629b35cf4..739e5d14174e7 100644
--- a/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.members/name.pass.cpp
@@ -16,6 +16,8 @@
 #include <string>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     const std::type_info& ti = typeid(int);
diff --git a/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp
index 72cae39d045eb..94aa22e961c85 100644
--- a/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.overview/copy_assign.pass.cpp
@@ -15,6 +15,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::type_index t1(typeid(int));
diff --git a/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp b/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp
index df0df2e8c92bf..ebb56fae5429e 100644
--- a/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/type.index/type.index.overview/copy_ctor.pass.cpp
@@ -15,6 +15,8 @@
 #include <typeindex>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::type_index t1(typeid(int));
diff --git a/libcxx/test/std/utilities/utility/as_const/as_const.pass.cpp b/libcxx/test/std/utilities/utility/as_const/as_const.pass.cpp
index 32d240a0b41b9..53cb11ef7e149 100644
--- a/libcxx/test/std/utilities/utility/as_const/as_const.pass.cpp
+++ b/libcxx/test/std/utilities/utility/as_const/as_const.pass.cpp
@@ -14,6 +14,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct S {int i;};
 bool operator==(const S& x, const S& y) { return x.i == y.i; }
 bool operator==(const volatile S& x, const volatile S& y) { return x.i == y.i; }
diff --git a/libcxx/test/std/utilities/utility/operators/rel_ops.pass.cpp b/libcxx/test/std/utilities/utility/operators/rel_ops.pass.cpp
index 42e808665628d..52ed642274114 100644
--- a/libcxx/test/std/utilities/utility/operators/rel_ops.pass.cpp
+++ b/libcxx/test/std/utilities/utility/operators/rel_ops.pass.cpp
@@ -11,6 +11,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct A
 {
     int data_;
diff --git a/libcxx/test/std/utilities/utility/pairs/pair.astuple/get_rv.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pair.astuple/get_rv.pass.cpp
index e0ce55bba90f8..efa97c4aca0b7 100644
--- a/libcxx/test/std/utilities/utility/pairs/pair.astuple/get_rv.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pair.astuple/get_rv.pass.cpp
@@ -20,6 +20,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/utility/pairs/pair.astuple/pairs.by.type.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pair.astuple/pairs.by.type.pass.cpp
index f2d3359e7a57b..62a71bc84278b 100644
--- a/libcxx/test/std/utilities/utility/pairs/pair.astuple/pairs.by.type.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pair.astuple/pairs.by.type.pass.cpp
@@ -16,6 +16,8 @@
 
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::complex<float> cf;
diff --git a/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_element.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_element.pass.cpp
index 1e41e3fda5d44..9ff5e0fdf71e3 100644
--- a/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_element.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_element.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 template <class T1, class T2>
 void test()
 {
diff --git a/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_size.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_size.pass.cpp
index 3b95b4749dd74..104f89811086a 100644
--- a/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_size.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pair.astuple/tuple_size.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/utility/pairs/pair.piecewise/piecewise_construct.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pair.piecewise/piecewise_construct.pass.cpp
index 98f864caf1f03..d70d060deb056 100644
--- a/libcxx/test/std/utilities/utility/pairs/pair.piecewise/piecewise_construct.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pair.piecewise/piecewise_construct.pass.cpp
@@ -19,6 +19,8 @@
 #include <tuple>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int i_;
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
index 0f22808de482c..dc4cd31764871 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/U_V.pass.cpp
@@ -21,6 +21,8 @@
 
 #include "archetypes.hpp"
 #include "test_convertible.hpp"
+
+#include "test_macros.h"
 using namespace ImplicitTypes; // Get implicitly archetypes
 
 template <class T1, class T1Arg,
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp
index f4dfe5e1b941c..55ce0fced101d 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct NonAssignable {
   NonAssignable& operator=(NonAssignable const&) = delete;
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair_cxx03.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair_cxx03.pass.cpp
index 47f85eaa6b4f4..96b4122cd5bdb 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair_cxx03.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_pair_cxx03.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct NonAssignable {
   NonAssignable() {}
 private:
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp
index b4f0c01094d09..5ace94d971d39 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair.pass.cpp
@@ -18,6 +18,8 @@
 #include <memory>
 #include <cassert>
 
+#include "test_macros.h"
+
 
 struct NonAssignable {
   NonAssignable& operator=(NonAssignable const&) = delete;
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp
index 0be0a4e9526c4..64e3adb56b7b8 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/assign_rv_pair_U_V.pass.cpp
@@ -19,6 +19,8 @@
 #include <cassert>
 #include <archetypes.hpp>
 
+#include "test_macros.h"
+
 struct Base
 {
     virtual ~Base() {}
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
index e147d75585684..53c6cc8a6f296 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second.pass.cpp
@@ -19,6 +19,8 @@
 
 #include "archetypes.hpp"
 #include "test_convertible.hpp"
+
+#include "test_macros.h"
 using namespace ImplicitTypes; // Get implicitly archetypes
 
 struct ExplicitT {
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second_cxx03.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second_cxx03.pass.cpp
index 880179723fbd2..394ee0fe5687e 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second_cxx03.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_first_const_second_cxx03.pass.cpp
@@ -15,6 +15,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 class A
 {
     int data_;
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
index ce1e86c1a8bb2..5a592c35cbf18 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V.pass.cpp
@@ -19,6 +19,8 @@
 
 #include "archetypes.hpp"
 #include "test_convertible.hpp"
+
+#include "test_macros.h"
 using namespace ImplicitTypes; // Get implicitly archetypes
 
 template <class T1, class U1,
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V_cxx03.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V_cxx03.pass.cpp
index 9f6498806f31a..b18ced0f4989e 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V_cxx03.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/const_pair_U_V_cxx03.pass.cpp
@@ -15,6 +15,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
index 26b02f383ad5b..5c6eb6e640d5b 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/piecewise.pass.cpp
@@ -20,6 +20,8 @@
 #include <tuple>
 #include <utility>
 
+#include "test_macros.h"
+
 
 int main(int, char**)
 {
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
index 0e3d9a1cb70bc..3d12398d6c9f4 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/rv_pair_U_V.pass.cpp
@@ -20,6 +20,8 @@
 
 #include "archetypes.hpp"
 #include "test_convertible.hpp"
+
+#include "test_macros.h"
 using namespace ImplicitTypes; // Get implicitly archetypes
 
 template <class T1, class U1,
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/special_member_generation_test.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/special_member_generation_test.pass.cpp
index db174e829bd4d..b1082ccfa6550 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/special_member_generation_test.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/special_member_generation_test.pass.cpp
@@ -25,6 +25,8 @@
 #include <tuple>
 
 #include "archetypes.hpp"
+
+#include "test_macros.h"
 using namespace ImplicitTypes; // Get implicitly archetypes
 
 namespace ConstructorTest {
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp
index faaae1bc255aa..9a497e5ac532d 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/swap.pass.cpp
@@ -15,6 +15,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 struct S {
     int i;
     S() : i(0) {}
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/types.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/types.pass.cpp
index 25108de5bbeef..cf62ec4020599 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.pair/types.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/types.pass.cpp
@@ -17,6 +17,8 @@
 #include <utility>
 #include <type_traits>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     typedef std::pair<float, short*> P;
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp
index 87432767217b7..9aac80468d018 100644
--- a/libcxx/test/std/utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp
@@ -16,6 +16,8 @@
 #include <utility>
 #include <cassert>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     {
diff --git a/libcxx/test/std/utilities/utility/synopsis.pass.cpp b/libcxx/test/std/utilities/utility/synopsis.pass.cpp
index 5a703e1b2e580..e13a6bcf7c706 100644
--- a/libcxx/test/std/utilities/utility/synopsis.pass.cpp
+++ b/libcxx/test/std/utilities/utility/synopsis.pass.cpp
@@ -13,6 +13,8 @@
 
 #include <utility>
 
+#include "test_macros.h"
+
 int main(int, char**)
 {
     std::initializer_list<int> x;
diff --git a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
index 4cb79a22c7ae7..5caff3b1d19d7 100644
--- a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
@@ -29,6 +29,8 @@
 #include <type_traits>
 #include <variant>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   static_assert(std::is_base_of<std::exception, std::bad_variant_access>::value,
                 "");
diff --git a/libcxx/test/std/utilities/variant/variant.hash/enabled_hash.pass.cpp b/libcxx/test/std/utilities/variant/variant.hash/enabled_hash.pass.cpp
index 7e9ffbfa06ebe..2649dd6c4ebf0 100644
--- a/libcxx/test/std/utilities/variant/variant.hash/enabled_hash.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.hash/enabled_hash.pass.cpp
@@ -17,6 +17,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 int main(int, char**) {
   test_library_hash_specializations_available();
 
diff --git a/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp b/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp
index fb027fb6380a9..654e5a9dfe9ea 100644
--- a/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp
@@ -22,6 +22,8 @@
 #include <type_traits>
 #include <variant>
 
+#include "test_macros.h"
+
 template <class V, size_t E> void test() {
   static_assert(std::variant_size<V>::value == E, "");
   static_assert(std::variant_size<const V>::value == E, "");
diff --git a/libcxx/test/std/utilities/variant/variant.monostate/monostate.pass.cpp b/libcxx/test/std/utilities/variant/variant.monostate/monostate.pass.cpp
index 1ba75a7790794..c726f684c35a2 100644
--- a/libcxx/test/std/utilities/variant/variant.monostate/monostate.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.monostate/monostate.pass.cpp
@@ -16,6 +16,8 @@
 #include <type_traits>
 #include <variant>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   using M = std::monostate;
   static_assert(std::is_trivially_default_constructible<M>::value, "");
diff --git a/libcxx/test/std/utilities/variant/variant.synopsis/variant_npos.pass.cpp b/libcxx/test/std/utilities/variant/variant.synopsis/variant_npos.pass.cpp
index 310b6980c1adb..087c574a496e7 100644
--- a/libcxx/test/std/utilities/variant/variant.synopsis/variant_npos.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.synopsis/variant_npos.pass.cpp
@@ -15,6 +15,8 @@
 
 #include <variant>
 
+#include "test_macros.h"
+
 int main(int, char**) {
   static_assert(std::variant_npos == static_cast<std::size_t>(-1), "");
 
diff --git a/libcxx/test/support/test.support/test_convertible_header.pass.cpp b/libcxx/test/support/test.support/test_convertible_header.pass.cpp
index f2923d50cd41a..fafa966bdd2eb 100644
--- a/libcxx/test/support/test.support/test_convertible_header.pass.cpp
+++ b/libcxx/test/support/test.support/test_convertible_header.pass.cpp
@@ -12,6 +12,8 @@
 
 #include "test_convertible.hpp"
 
+#include "test_macros.h"
+
 struct ImplicitDefault {
   ImplicitDefault() {}
 };
diff --git a/libcxx/test/support/test.support/test_demangle.pass.cpp b/libcxx/test/support/test.support/test_demangle.pass.cpp
index 2f1b16be9bb93..fd67a3d61dbae 100644
--- a/libcxx/test/support/test.support/test_demangle.pass.cpp
+++ b/libcxx/test/support/test.support/test_demangle.pass.cpp
@@ -6,6 +6,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "test_macros.h"
 #include "demangle.h"
 #include <typeinfo>
 #include <cassert>
diff --git a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp
index 692854b3d26ed..8c4d39e7bd475 100644
--- a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp
+++ b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp
@@ -15,6 +15,8 @@
 
 #include "poisoned_hash_helper.hpp"
 
+#include "test_macros.h"
+
 template <class T, size_t = sizeof(T)>
 constexpr bool is_complete_imp(int) { return true; }
 template <class> constexpr bool is_complete_imp(long) { return false; }
diff --git a/libcxx/test/support/test.workarounds/c1xx_broken_is_trivially_copyable.pass.cpp b/libcxx/test/support/test.workarounds/c1xx_broken_is_trivially_copyable.pass.cpp
index 1b2fd1462e291..ef6a82c80f0c6 100644
--- a/libcxx/test/support/test.workarounds/c1xx_broken_is_trivially_copyable.pass.cpp
+++ b/libcxx/test/support/test.workarounds/c1xx_broken_is_trivially_copyable.pass.cpp
@@ -16,6 +16,7 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_workarounds.h"
 
 struct S {
diff --git a/libcxx/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp b/libcxx/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp
index 688a0f798c36a..2ecf0e7d272e4 100644
--- a/libcxx/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp
+++ b/libcxx/test/support/test.workarounds/c1xx_broken_za_ctor_check.pass.cpp
@@ -12,6 +12,7 @@
 
 #include <type_traits>
 
+#include "test_macros.h"
 #include "test_workarounds.h"
 
 struct X {

From 51e0de6954a993f4647d16a243494139043dc105 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 18:54:44 +0000
Subject: [PATCH 0773/1176] [NFC][InstCombine] Add unary FNeg to cos-1.ll
 cos-2.ll cos-sin-intrinsic.ll

llvm-svn: 362253
---
 llvm/test/Transforms/InstCombine/cos-1.ll     | 162 ++++++++++++++++++
 llvm/test/Transforms/InstCombine/cos-2.ll     |   7 +
 .../InstCombine/cos-sin-intrinsic.ll          |  73 ++++++++
 3 files changed, 242 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/cos-1.ll b/llvm/test/Transforms/InstCombine/cos-1.ll
index 50db2a98e83bd..12a64403e950c 100644
--- a/llvm/test/Transforms/InstCombine/cos-1.ll
+++ b/llvm/test/Transforms/InstCombine/cos-1.ll
@@ -29,6 +29,16 @@ define double @cos_negated_arg(double %x) {
   ret double %r
 }
 
+define double @cos_unary_negated_arg(double %x) {
+; ANY-LABEL: @cos_unary_negated_arg(
+; ANY-NEXT:    [[COS:%.*]] = call double @cos(double [[X:%.*]])
+; ANY-NEXT:    ret double [[COS]]
+;
+  %neg = fneg double %x
+  %r = call double @cos(double %neg)
+  ret double %r
+}
+
 define float @cosf_negated_arg(float %x) {
 ; ANY-LABEL: @cosf_negated_arg(
 ; ANY-NEXT:    [[COS:%.*]] = call float @cosf(float [[X:%.*]])
@@ -39,6 +49,16 @@ define float @cosf_negated_arg(float %x) {
   ret float %r
 }
 
+define float @cosf_unary_negated_arg(float %x) {
+; ANY-LABEL: @cosf_unary_negated_arg(
+; ANY-NEXT:    [[COS:%.*]] = call float @cosf(float [[X:%.*]])
+; ANY-NEXT:    ret float [[COS]]
+;
+  %neg = fneg float %x
+  %r = call float @cosf(float %neg)
+  ret float %r
+}
+
 define float @cosf_negated_arg_FMF(float %x) {
 ; ANY-LABEL: @cosf_negated_arg_FMF(
 ; ANY-NEXT:    [[COS:%.*]] = call reassoc nnan float @cosf(float [[X:%.*]])
@@ -49,6 +69,16 @@ define float @cosf_negated_arg_FMF(float %x) {
   ret float %r
 }
 
+define float @cosf_unary_negated_arg_FMF(float %x) {
+; ANY-LABEL: @cosf_unary_negated_arg_FMF(
+; ANY-NEXT:    [[COS:%.*]] = call reassoc nnan float @cosf(float [[X:%.*]])
+; ANY-NEXT:    ret float [[COS]]
+;
+  %neg = fneg float %x
+  %r = call nnan reassoc float @cosf(float %neg)
+  ret float %r
+}
+
 ; sin(-x) -> -sin(x);
 
 define double @sin_negated_arg(double %x) {
@@ -62,6 +92,17 @@ define double @sin_negated_arg(double %x) {
   ret double %r
 }
 
+define double @sin_unary_negated_arg(double %x) {
+; ANY-LABEL: @sin_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret double [[TMP2]]
+;
+  %neg = fneg double %x
+  %r = call double @sin(double %neg)
+  ret double %r
+}
+
 define float @sinf_negated_arg(float %x) {
 ; ANY-LABEL: @sinf_negated_arg(
 ; ANY-NEXT:    [[TMP1:%.*]] = call float @sinf(float [[X:%.*]])
@@ -73,6 +114,17 @@ define float @sinf_negated_arg(float %x) {
   ret float %r
 }
 
+define float @sinf_unary_negated_arg(float %x) {
+; ANY-LABEL: @sinf_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call float @sinf(float [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret float [[TMP2]]
+;
+  %neg = fneg float %x
+  %r = call float @sinf(float %neg)
+  ret float %r
+}
+
 define float @sinf_negated_arg_FMF(float %x) {
 ; ANY-LABEL: @sinf_negated_arg_FMF(
 ; ANY-NEXT:    [[TMP1:%.*]] = call nnan afn float @sinf(float [[X:%.*]])
@@ -84,6 +136,17 @@ define float @sinf_negated_arg_FMF(float %x) {
   ret float %r
 }
 
+define float @sinf_unary_negated_arg_FMF(float %x) {
+; ANY-LABEL: @sinf_unary_negated_arg_FMF(
+; ANY-NEXT:    [[TMP1:%.*]] = call nnan afn float @sinf(float [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub nnan afn float -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret float [[TMP2]]
+;
+  %neg = fneg ninf float %x
+  %r = call afn nnan float @sinf(float %neg)
+  ret float %r
+}
+
 declare void @use(double)
 
 define double @sin_negated_arg_extra_use(double %x) {
@@ -99,6 +162,19 @@ define double @sin_negated_arg_extra_use(double %x) {
   ret double %r
 }
 
+define double @sin_unary_negated_arg_extra_use(double %x) {
+; ANY-LABEL: @sin_unary_negated_arg_extra_use(
+; ANY-NEXT:    [[NEG:%.*]] = fneg double [[X:%.*]]
+; ANY-NEXT:    [[R:%.*]] = call double @sin(double [[NEG]])
+; ANY-NEXT:    call void @use(double [[NEG]])
+; ANY-NEXT:    ret double [[R]]
+;
+  %neg = fneg double %x
+  %r = call double @sin(double %neg)
+  call void @use(double %neg)
+  ret double %r
+}
+
 ; -sin(-x) --> sin(x)
 ; PR38458: https://bugs.llvm.org/show_bug.cgi?id=38458
 
@@ -113,6 +189,39 @@ define double @neg_sin_negated_arg(double %x) {
   ret double %rn
 }
 
+define double @unary_neg_sin_unary_negated_arg(double %x) {
+; ANY-LABEL: @unary_neg_sin_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    ret double [[TMP1]]
+;
+  %neg = fneg double %x
+  %r = call double @sin(double %neg)
+  %rn = fneg double %r
+  ret double %rn
+}
+
+define double @neg_sin_unary_negated_arg(double %x) {
+; ANY-LABEL: @neg_sin_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    ret double [[TMP1]]
+;
+  %neg = fsub double -0.0, %x
+  %r = call double @sin(double %neg)
+  %rn = fneg double %r
+  ret double %rn
+}
+
+define double @unary_neg_sin_negated_arg(double %x) {
+; ANY-LABEL: @unary_neg_sin_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @sin(double [[X:%.*]])
+; ANY-NEXT:    ret double [[TMP1]]
+;
+  %neg = fneg double %x
+  %r = call double @sin(double %neg)
+  %rn = fsub double -0.0, %r
+  ret double %rn
+}
+
 ; tan(-x) -> -tan(x);
 
 define double @tan_negated_arg(double %x) {
@@ -126,6 +235,17 @@ define double @tan_negated_arg(double %x) {
   ret double %r
 }
 
+define double @tan_unary_negated_arg(double %x) {
+; ANY-LABEL: @tan_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call double @tan(double [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
+; ANY-NEXT:    ret double [[TMP2]]
+;
+  %neg = fneg double %x
+  %r = call double @tan(double %neg)
+  ret double %r
+}
+
 ; tanl(-x) -> -tanl(x);
 
 define fp128 @tanl_negated_arg(fp128 %x) {
@@ -139,6 +259,17 @@ define fp128 @tanl_negated_arg(fp128 %x) {
   ret fp128 %r
 }
 
+define fp128 @tanl_unary_negated_arg(fp128 %x) {
+; ANY-LABEL: @tanl_unary_negated_arg(
+; ANY-NEXT:    [[TMP1:%.*]] = call fp128 @tanl(fp128 [[X:%.*]])
+; ANY-NEXT:    [[TMP2:%.*]] = fsub fp128 0xL00000000000000008000000000000000, [[TMP1]]
+; ANY-NEXT:    ret fp128 [[TMP2]]
+;
+  %neg = fneg fp128 %x
+  %r = call fp128 @tanl(fp128 %neg)
+  ret fp128 %r
+}
+
 define float @negated_and_shrinkable_libcall(float %f) {
 ; NO-FLOAT-SHRINK-LABEL: @negated_and_shrinkable_libcall(
 ; NO-FLOAT-SHRINK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
@@ -157,6 +288,24 @@ define float @negated_and_shrinkable_libcall(float %f) {
   ret float %conv2
 }
 
+define float @unary_negated_and_shrinkable_libcall(float %f) {
+; NO-FLOAT-SHRINK-LABEL: @unary_negated_and_shrinkable_libcall(
+; NO-FLOAT-SHRINK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[COS1:%.*]] = call double @cos(double [[CONV1]])
+; NO-FLOAT-SHRINK-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS1]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[CONV2]]
+;
+; DO-FLOAT-SHRINK-LABEL: @unary_negated_and_shrinkable_libcall(
+; DO-FLOAT-SHRINK-NEXT:    [[COSF:%.*]] = call float @cosf(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[COSF]]
+;
+  %conv1 = fpext float %f to double
+  %neg = fneg double %conv1
+  %cos = call double @cos(double %neg)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
+
 ; TODO: It was ok to shrink the libcall, so the intrinsic should shrink too?
 
 define float @negated_and_shrinkable_intrinsic(float %f) {
@@ -173,3 +322,16 @@ define float @negated_and_shrinkable_intrinsic(float %f) {
   ret float %conv2
 }
 
+define float @unary_negated_and_shrinkable_intrinsic(float %f) {
+; ANY-LABEL: @unary_negated_and_shrinkable_intrinsic(
+; ANY-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; ANY-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
+; ANY-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
+; ANY-NEXT:    ret float [[CONV2]]
+;
+  %conv1 = fpext float %f to double
+  %neg = fneg double %conv1
+  %cos = call double @llvm.cos.f64(double %neg)
+  %conv2 = fptrunc double %cos to float
+  ret float %conv2
+}
diff --git a/llvm/test/Transforms/InstCombine/cos-2.ll b/llvm/test/Transforms/InstCombine/cos-2.ll
index a85cc8fa6bde4..47c894f947f8d 100644
--- a/llvm/test/Transforms/InstCombine/cos-2.ll
+++ b/llvm/test/Transforms/InstCombine/cos-2.ll
@@ -15,6 +15,13 @@ define float @test_no_simplify1(double %d) {
   ret float %cos
 }
 
+define float @test_no_simplify2(double %d) {
+; CHECK-LABEL: @test_no_simplify2(
+  %neg = fneg double %d
+  %cos = call float @cos(double %neg)
+; CHECK: call float @cos(double %neg)
+  ret float %cos
+}
 
 define i8 @bogus_sqrt() {
   %fake_sqrt = call signext i8 (...) @sqrt()
diff --git a/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll b/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll
index ef5513d086e1c..2e91862668681 100644
--- a/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/cos-sin-intrinsic.ll
@@ -38,6 +38,16 @@ define float @fneg_f32(float %x) {
   ret float %cos
 }
 
+define float @unary_fneg_f32(float %x) {
+; CHECK-LABEL: @unary_fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fneg = fneg float %x
+  %cos = call float @llvm.cos.f32(float %x.fneg)
+  ret float %cos
+}
+
 define <2 x float> @fneg_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @fneg_v2f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
@@ -48,6 +58,16 @@ define <2 x float> @fneg_v2f32(<2 x float> %x) {
   ret <2 x float> %cos
 }
 
+define <2 x float> @unary_fneg_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @unary_fneg_v2f32(
+; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[COS]]
+;
+  %x.fneg = fneg <2 x float> %x
+  %cos = call <2 x float> @llvm.cos.v2f32(<2 x float> %x.fneg)
+  ret <2 x float> %cos
+}
+
 ; FMF are not required, but they should propagate.
 
 define <2 x float> @fneg_cos_fmf(<2 x float> %x){
@@ -60,6 +80,16 @@ define <2 x float> @fneg_cos_fmf(<2 x float> %x){
   ret <2 x float> %r
 }
 
+define <2 x float> @unary_fneg_cos_fmf(<2 x float> %x){
+; CHECK-LABEL: @unary_fneg_cos_fmf(
+; CHECK-NEXT:    [[R:%.*]] = call nnan afn <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fneg fast <2 x float> %x
+  %r = call nnan afn <2 x float> @llvm.cos.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}
+
 define float @fabs_f32(float %x) {
 ; CHECK-LABEL: @fabs_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
@@ -81,6 +111,17 @@ define float @fabs_fneg_f32(float %x) {
   ret float %cos
 }
 
+define float @fabs_unary_fneg_f32(float %x) {
+; CHECK-LABEL: @fabs_unary_fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %x.fabs.fneg = fneg float %x.fabs
+  %cos = call float @llvm.cos.f32(float %x.fabs.fneg)
+  ret float %cos
+}
+
 define <2 x float> @fabs_fneg_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @fabs_fneg_v2f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
@@ -92,6 +133,17 @@ define <2 x float> @fabs_fneg_v2f32(<2 x float> %x) {
   ret <2 x float> %cos
 }
 
+define <2 x float> @fabs_unary_fneg_v2f32(<2 x float> %x) {
+; CHECK-LABEL: @fabs_unary_fneg_v2f32(
+; CHECK-NEXT:    [[COS:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    ret <2 x float> [[COS]]
+;
+  %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
+  %x.fabs.fneg = fneg <2 x float> %x.fabs
+  %cos = call <2 x float> @llvm.cos.v2f32(<2 x float> %x.fabs.fneg)
+  ret <2 x float> %cos
+}
+
 ; Negate is canonicalized after sin.
 
 declare <2 x float> @llvm.sin.v2f32(<2 x float>)
@@ -107,6 +159,17 @@ define <2 x float> @fneg_sin(<2 x float> %x){
   ret <2 x float> %r
 }
 
+define <2 x float> @unary_fneg_sin(<2 x float> %x){
+; CHECK-LABEL: @unary_fneg_sin(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fneg <2 x float> %x
+  %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}
+
 ; FMF are not required, but they should propagate.
 
 define <2 x float> @fneg_sin_fmf(<2 x float> %x){
@@ -120,3 +183,13 @@ define <2 x float> @fneg_sin_fmf(<2 x float> %x){
   ret <2 x float> %r
 }
 
+define <2 x float> @unary_fneg_sin_fmf(<2 x float> %x){
+; CHECK-LABEL: @unary_fneg_sin_fmf(
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan arcp afn <2 x float> @llvm.sin.v2f32(<2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan arcp afn <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %negx = fneg fast <2 x float> %x
+  %r = call nnan arcp afn <2 x float> @llvm.sin.v2f32(<2 x float> %negx)
+  ret <2 x float> %r
+}

From ccb63e0bfe169a9adadccc01ec0c9f2bdb0f6e86 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 31 May 2019 19:04:47 +0000
Subject: [PATCH 0774/1176] Revert "[CVP] Simplify non-overflowing saturating
 add/sub"

This reverts commit 1e692d1777ae34dcb93524b5798651a29defae09.

Causes assertion failure in builtins-wasm.c clang test.

llvm-svn: 362254
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 25 +------------------
 .../CorrelatedValuePropagation/overflows.ll   | 16 ++++++------
 2 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 1715838a07eb4..eab49b5f8b37e 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -63,8 +63,6 @@ STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 STATISTIC(NumOverflows, "Number of overflow checks removed");
-STATISTIC(NumSaturating,
-    "Number of saturating arithmetics converted to normal arithmetics");
 
 static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(true));
 
@@ -415,7 +413,7 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   IRBuilder<> B(WO);
   Value *NewOp = B.CreateBinOp(
       WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
-  // Constant-folding could have happened.
+  // Constant-holing could have happened.
   if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
     if (WO->isSigned())
       Inst->setHasNoSignedWrap();
@@ -430,20 +428,6 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   ++NumOverflows;
 }
 
-static void processSaturatingInst(SaturatingInst *SI) {
-  BinaryOperator *BinOp = BinaryOperator::Create(
-      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
-  BinOp->setDebugLoc(SI->getDebugLoc());
-  if (SI->isSigned())
-    BinOp->setHasNoSignedWrap();
-  else
-    BinOp->setHasNoUnsignedWrap();
-
-  SI->replaceAllUsesWith(BinOp);
-  SI->eraseFromParent();
-  ++NumSaturating;
-}
-
 /// Infer nonnull attributes for the arguments at the specified callsite.
 static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
   SmallVector<unsigned, 4> ArgNos;
@@ -456,13 +440,6 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
     }
   }
 
-  if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
-    if (willNotOverflow(SI, LVI)) {
-      processSaturatingInst(SI);
-      return true;
-    }
-  }
-
   // Deopt bundle operands are intended to capture state with minimal
   // perturbance of the code otherwise.  If we can find a constant value for
   // any such operand and remove a use of the original value, that's
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index b692e0d1ac387..860ebafd0749d 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -739,8 +739,8 @@ define i8 @uadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES1:%.*]] = add nuw i8 [[X]], 100
-; CHECK-NEXT:    ret i8 [[RES1]]
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    ret i8 [[RES]]
 ;
   %cmp = icmp ugt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -762,8 +762,8 @@ define i8 @sadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES1:%.*]] = add nsw i8 [[X]], 20
-; CHECK-NEXT:    ret i8 [[RES1]]
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 20)
+; CHECK-NEXT:    ret i8 [[RES]]
 ;
   %cmp = icmp sgt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -785,8 +785,8 @@ define i8 @usub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES1:%.*]] = sub nuw i8 [[X]], 100
-; CHECK-NEXT:    ret i8 [[RES1]]
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    ret i8 [[RES]]
 ;
   %cmp = icmp ult i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -808,8 +808,8 @@ define i8 @ssub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES1:%.*]] = sub nsw i8 [[X]], 20
-; CHECK-NEXT:    ret i8 [[RES1]]
+; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X]], i8 20)
+; CHECK-NEXT:    ret i8 [[RES]]
 ;
   %cmp = icmp slt i8 %x, -100
   br i1 %cmp, label %trap, label %cont

From 5594ee0a3e2ba9ef67e92f69b6291f1895962fe1 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Fri, 31 May 2019 19:12:59 +0000
Subject: [PATCH 0775/1176] [NFC][InstCombine] Add unary FNeg tests to
 AMDGPU/amdgcn-intrinsics.ll

llvm-svn: 362255
---
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 9abb489f1adcb..ca5dd63bd5ca8 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -769,6 +769,16 @@ define float @cos_fneg_f32(float %x) {
   ret float %cos
 }
 
+define float @cos_unary_fneg_f32(float %x) {
+; CHECK-LABEL: @cos_unary_fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fneg = fneg float %x
+  %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
+  ret float %cos
+}
+
 define float @cos_fabs_f32(float %x) {
 ; CHECK-LABEL: @cos_fabs_f32(
 ; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
@@ -790,6 +800,17 @@ define float @cos_fabs_fneg_f32(float %x) {
   ret float %cos
 }
 
+define float @cos_fabs_unary_fneg_f32(float %x) {
+; CHECK-LABEL: @cos_fabs_unary_fneg_f32(
+; CHECK-NEXT:    [[COS:%.*]] = call float @llvm.amdgcn.cos.f32(float [[X:%.*]])
+; CHECK-NEXT:    ret float [[COS]]
+;
+  %x.fabs = call float @llvm.fabs.f32(float %x)
+  %x.fabs.fneg = fneg float %x.fabs
+  %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
+  ret float %cos
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.cvt.pkrtz
 ; --------------------------------------------------------------------

From c669629e6c01aa55f17f91e64905aac397f1f540 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 19:18:07 +0000
Subject: [PATCH 0776/1176] [X86] Resync Host.cpp with compiler-rt's
 cpu_model.c to enable 0x55 to be identified as cascadelake when avx512vnni is
 detected.

Some other formatting changes.

llvm-svn: 362256
---
 llvm/lib/Support/Host.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 1aa2b82ce60a2..221a0af76d11b 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -661,10 +661,10 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
       break;
 
     // Skylake:
-    case 0x4e: // Skylake mobile
-    case 0x5e: // Skylake desktop
-    case 0x8e: // Kaby Lake mobile
-    case 0x9e: // Kaby Lake desktop
+    case 0x4e:              // Skylake mobile
+    case 0x5e:              // Skylake desktop
+    case 0x8e:              // Kaby Lake mobile
+    case 0x9e:              // Kaby Lake desktop
       *Type = X86::INTEL_COREI7; // "skylake"
       *Subtype = X86::INTEL_COREI7_SKYLAKE;
       break;
@@ -672,7 +672,10 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     // Skylake Xeon:
     case 0x55:
       *Type = X86::INTEL_COREI7;
-      *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
+        *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
+      else
+        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
       break;
 
     // Cannonlake:
@@ -723,9 +726,11 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x86:
       *Type = X86::INTEL_TREMONT;
       break;
+
     case 0x57:
       *Type = X86::INTEL_KNL; // knl
       break;
+
     case 0x85:
       *Type = X86::INTEL_KNM; // knm
       break;

From aa8a976174c7ac08676bbc7bb647f6bc0efd2e72 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm@meinersbur.de>
Date: Fri, 31 May 2019 19:26:57 +0000
Subject: [PATCH 0777/1176] [ScheduleOptimizer] Hoist extension nodes after
 schedule optimization.

Extension nodes make schedule trees are less flexible: Many operations,
such as rescheduling, do not work on such schedule trees with extension.
As such, some functionality such as determining parallel loops in isl's
AST are disabled.

Currently, only the pattern-matching generalized matrix-matrix
multiplication optimization adds extension nodes (to add copy-in
statements).

This patch removes all extension nodes as the last step of the schedule
optimization by hoisting the extension node's added domain up to the
root domain node. All following passes can assume that schedule trees
work without restrictions, including the parallelism test. Mark the
outermost loop of the optimized matrix-matrix multiplication as parallel
such that -polly-parallel is able to parallelize that loop.

Differential Revision: https://reviews.llvm.org/D58202

llvm-svn: 362257
---
 polly/include/polly/ScheduleTreeTransform.h   |  26 +
 polly/include/polly/ScopInfo.h                |   5 -
 polly/lib/Analysis/ScopInfo.cpp               |  18 -
 polly/lib/CMakeLists.txt                      |   1 +
 polly/lib/CodeGen/IslAst.cpp                  |   6 -
 polly/lib/Transform/ScheduleOptimizer.cpp     |   8 +-
 polly/lib/Transform/ScheduleTreeTransform.cpp | 510 ++++++++++++++++++
 .../pattern-matching-based-opts.ll            |   3 +
 .../pattern-matching-based-opts_5.ll          | 232 ++++++--
 9 files changed, 724 insertions(+), 85 deletions(-)
 create mode 100644 polly/include/polly/ScheduleTreeTransform.h
 create mode 100644 polly/lib/Transform/ScheduleTreeTransform.cpp

diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h
new file mode 100644
index 0000000000000..9b2e722568020
--- /dev/null
+++ b/polly/include/polly/ScheduleTreeTransform.h
@@ -0,0 +1,26 @@
+//===- polly/ScheduleTreeTransform.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Make changes to isl's schedule tree data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POLLY_SCHEDULETREETRANSFORM_H
+#define POLLY_SCHEDULETREETRANSFORM_H
+
+#include "isl/isl-noexceptions.h"
+
+namespace polly {
+/// Hoist all domains from extension into the root domain node, such that there
+/// are no more extension nodes (which isl does not support for some
+/// operations). This assumes that domains added by to extension nodes do not
+/// overlap.
+isl::schedule hoistExtensionNodes(isl::schedule Sched);
+} // namespace polly
+
+#endif // POLLY_SCHEDULETREETRANSFORM_H
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index bf03899a1bb0c..bae105b7f112c 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2975,11 +2975,6 @@ class Scop {
   ///        that has name @p Name.
   ScopArrayInfo *getArrayInfoByName(const std::string BaseName);
 
-  /// Check whether @p Schedule contains extension nodes.
-  ///
-  /// @return true if @p Schedule contains extension nodes.
-  static bool containsExtensionNode(isl::schedule Schedule);
-
   /// Simplify the SCoP representation.
   ///
   /// @param AfterHoisting Whether it is called after invariant load hoisting.
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 65e088e323efc..24e26c5078802 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -4402,26 +4402,8 @@ isl::union_map Scop::getAccesses(ScopArrayInfo *Array) {
       [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; });
 }
 
-// Check whether @p Node is an extension node.
-//
-// @return true if @p Node is an extension node.
-isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) {
-  if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension)
-    return isl_bool_error;
-  else
-    return isl_bool_true;
-}
-
-bool Scop::containsExtensionNode(isl::schedule Schedule) {
-  return isl_schedule_foreach_schedule_node_top_down(
-             Schedule.get(), isNotExtNode, nullptr) == isl_stat_error;
-}
-
 isl::union_map Scop::getSchedule() const {
   auto Tree = getScheduleTree();
-  if (containsExtensionNode(Tree))
-    return nullptr;
-
   return Tree.get_map();
 }
 
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
index 41f19087c971f..5bbc4dcf10904 100644
--- a/polly/lib/CMakeLists.txt
+++ b/polly/lib/CMakeLists.txt
@@ -58,6 +58,7 @@ add_library(PollyCore OBJECT
   Transform/CodePreparation.cpp
   Transform/DeadCodeElimination.cpp
   Transform/ScheduleOptimizer.cpp
+  Transform/ScheduleTreeTransform.cpp
   Transform/FlattenSchedule.cpp
   Transform/FlattenAlgo.cpp
   Transform/ForwardOpTree.cpp
diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp
index 8ccbeda6a7369..94367bd331b04 100644
--- a/polly/lib/CodeGen/IslAst.cpp
+++ b/polly/lib/CodeGen/IslAst.cpp
@@ -521,13 +521,7 @@ IslAst::~IslAst() {
 void IslAst::init(const Dependences &D) {
   bool PerformParallelTest = PollyParallel || DetectParallel ||
                              PollyVectorizerChoice != VECTORIZER_NONE;
-
-  // We can not perform the dependence analysis and, consequently,
-  // the parallel code generation in case the schedule tree contains
-  // extension nodes.
   auto ScheduleTree = S.getScheduleTree();
-  PerformParallelTest =
-      PerformParallelTest && !S.containsExtensionNode(ScheduleTree);
 
   // Skip AST and code generation if there was no benefit achieved.
   if (!benefitsFromPolly(S, PerformParallelTest))
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 5554154924ca2..230317869f71c 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -50,6 +50,7 @@
 #include "polly/DependenceInfo.h"
 #include "polly/LinkAllPasses.h"
 #include "polly/Options.h"
+#include "polly/ScheduleTreeTransform.h"
 #include "polly/ScopInfo.h"
 #include "polly/ScopPass.h"
 #include "polly/Simplify.h"
@@ -844,6 +845,10 @@ isl::schedule_node ScheduleTreeOptimizer::createMacroKernel(
   Node = Node.parent().parent();
   Node = permuteBandNodeDimensions(Node, DimOutNum - 2, DimOutNum - 1);
   Node = permuteBandNodeDimensions(Node, DimOutNum - 3, DimOutNum - 1);
+
+  // Mark the outermost loop as parallelizable.
+  Node = Node.band_member_set_coincident(0, true);
+
   return Node.child(0).child(0);
 }
 
@@ -1366,8 +1371,6 @@ bool ScheduleTreeOptimizer::isProfitableSchedule(Scop &S,
   // optimizations, by comparing (yet to be defined) performance metrics
   // before/after the scheduling optimizer
   // (e.g., #stride-one accesses)
-  if (S.containsExtensionNode(NewSchedule))
-    return true;
   auto NewScheduleMap = NewSchedule.get_map();
   auto OldSchedule = S.getSchedule();
   assert(OldSchedule && "Only IslScheduleOptimizer can insert extension nodes "
@@ -1615,6 +1618,7 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) {
   auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
   auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
+  NewSchedule = hoistExtensionNodes(NewSchedule);
   walkScheduleTreeForStatistics(NewSchedule, 2);
 
   if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule))
diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp
new file mode 100644
index 0000000000000..d571260affb49
--- /dev/null
+++ b/polly/lib/Transform/ScheduleTreeTransform.cpp
@@ -0,0 +1,510 @@
+//===- polly/ScheduleTreeTransform.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Make changes to isl's schedule tree data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "polly/ScheduleTreeTransform.h"
+#include "polly/Support/ISLTools.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace polly;
+
+namespace {
+
+/// This class defines a simple visitor class that may be used for
+/// various schedule tree analysis purposes.
+template <typename Derived, typename RetTy = void, typename... Args>
+struct ScheduleTreeVisitor {
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+  const Derived &getDerived() const {
+    return *static_cast<const Derived *>(this);
+  }
+
+  RetTy visit(const isl::schedule_node &Node, Args... args) {
+    assert(!Node.is_null());
+    switch (isl_schedule_node_get_type(Node.get())) {
+    case isl_schedule_node_domain:
+      assert(isl_schedule_node_n_children(Node.get()) == 1);
+      return getDerived().visitDomain(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_band:
+      assert(isl_schedule_node_n_children(Node.get()) == 1);
+      return getDerived().visitBand(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_sequence:
+      assert(isl_schedule_node_n_children(Node.get()) >= 2);
+      return getDerived().visitSequence(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_set:
+      return getDerived().visitSet(Node, std::forward<Args>(args)...);
+      assert(isl_schedule_node_n_children(Node.get()) >= 2);
+    case isl_schedule_node_leaf:
+      assert(isl_schedule_node_n_children(Node.get()) == 0);
+      return getDerived().visitLeaf(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_mark:
+      assert(isl_schedule_node_n_children(Node.get()) == 1);
+      return getDerived().visitMark(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_extension:
+      assert(isl_schedule_node_n_children(Node.get()) == 1);
+      return getDerived().visitExtension(Node, std::forward<Args>(args)...);
+    case isl_schedule_node_filter:
+      assert(isl_schedule_node_n_children(Node.get()) == 1);
+      return getDerived().visitFilter(Node, std::forward<Args>(args)...);
+    default:
+      llvm_unreachable("unimplemented schedule node type");
+    }
+  }
+
+  RetTy visitDomain(const isl::schedule_node &Domain, Args... args) {
+    return getDerived().visitSingleChild(Domain, std::forward<Args>(args)...);
+  }
+
+  RetTy visitBand(const isl::schedule_node &Band, Args... args) {
+    return getDerived().visitSingleChild(Band, std::forward<Args>(args)...);
+  }
+
+  RetTy visitSequence(const isl::schedule_node &Sequence, Args... args) {
+    return getDerived().visitMultiChild(Sequence, std::forward<Args>(args)...);
+  }
+
+  RetTy visitSet(const isl::schedule_node &Set, Args... args) {
+    return getDerived().visitMultiChild(Set, std::forward<Args>(args)...);
+  }
+
+  RetTy visitLeaf(const isl::schedule_node &Leaf, Args... args) {
+    return getDerived().visitNode(Leaf, std::forward<Args>(args)...);
+  }
+
+  RetTy visitMark(const isl::schedule_node &Mark, Args... args) {
+    return getDerived().visitSingleChild(Mark, std::forward<Args>(args)...);
+  }
+
+  RetTy visitExtension(const isl::schedule_node &Extension, Args... args) {
+    return getDerived().visitSingleChild(Extension,
+                                         std::forward<Args>(args)...);
+  }
+
+  RetTy visitFilter(const isl::schedule_node &Extension, Args... args) {
+    return getDerived().visitSingleChild(Extension,
+                                         std::forward<Args>(args)...);
+  }
+
+  RetTy visitSingleChild(const isl::schedule_node &Node, Args... args) {
+    return getDerived().visitNode(Node, std::forward<Args>(args)...);
+  }
+
+  RetTy visitMultiChild(const isl::schedule_node &Node, Args... args) {
+    return getDerived().visitNode(Node, std::forward<Args>(args)...);
+  }
+
+  RetTy visitNode(const isl::schedule_node &Node, Args... args) {
+    llvm_unreachable("Unimplemented other");
+  }
+};
+
+/// Recursively visit all nodes of a schedule tree.
+template <typename Derived, typename RetTy = void, typename... Args>
+struct RecursiveScheduleTreeVisitor
+    : public ScheduleTreeVisitor<Derived, RetTy, Args...> {
+  using BaseTy = ScheduleTreeVisitor<Derived, RetTy, Args...>;
+  BaseTy &getBase() { return *this; }
+  const BaseTy &getBase() const { return *this; }
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+  const Derived &getDerived() const {
+    return *static_cast<const Derived *>(this);
+  }
+
+  /// When visiting an entire schedule tree, start at its root node.
+  RetTy visit(const isl::schedule &Schedule, Args... args) {
+    return getDerived().visit(Schedule.get_root(), std::forward<Args>(args)...);
+  }
+
+  // Necessary to allow overload resolution with the added visit(isl::schedule)
+  // overload.
+  RetTy visit(const isl::schedule_node &Node, Args... args) {
+    return getBase().visit(Node, std::forward<Args>(args)...);
+  }
+
+  RetTy visitNode(const isl::schedule_node &Node, Args... args) {
+    int NumChildren = isl_schedule_node_n_children(Node.get());
+    for (int i = 0; i < NumChildren; i += 1)
+      getDerived().visit(Node.child(i), std::forward<Args>(args)...);
+    return RetTy();
+  }
+};
+
+/// Recursively visit all nodes of a schedule tree while allowing changes.
+///
+/// The visit methods return an isl::schedule_node that is used to continue
+/// visiting the tree. Structural changes such as returning a different node
+/// will confuse the visitor.
+template <typename Derived, typename... Args>
+struct ScheduleNodeRewriter
+    : public RecursiveScheduleTreeVisitor<Derived, isl::schedule_node,
+                                          Args...> {
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+  const Derived &getDerived() const {
+    return *static_cast<const Derived *>(this);
+  }
+
+  isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) {
+    if (!Node.has_children())
+      return Node;
+
+    isl::schedule_node It = Node.first_child();
+    while (true) {
+      It = getDerived().visit(It, std::forward<Args>(args)...);
+      if (!It.has_next_sibling())
+        break;
+      It = It.next_sibling();
+    }
+    return It.parent();
+  }
+};
+
+/// Rewrite a schedule tree by reconstructing it bottom-up.
+///
+/// By default, the original schedule tree is reconstructed. To build a
+/// different tree, redefine visitor methods in a derived class (CRTP).
+///
+/// Note that AST build options are not applied; Setting the isolate[] option
+/// makes the schedule tree 'anchored' and cannot be modified afterwards. Hence,
+/// AST build options must be set after the tree has been constructed.
+template <typename Derived, typename... Args>
+struct ScheduleTreeRewriter
+    : public RecursiveScheduleTreeVisitor<Derived, isl::schedule, Args...> {
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+  const Derived &getDerived() const {
+    return *static_cast<const Derived *>(this);
+  }
+
+  isl::schedule visitDomain(const isl::schedule_node &Node, Args... args) {
+    // Every schedule_tree already has a domain node, no need to add one.
+    return getDerived().visit(Node.first_child(), std::forward<Args>(args)...);
+  }
+
+  isl::schedule visitBand(const isl::schedule_node &Band, Args... args) {
+    isl::multi_union_pw_aff PartialSched =
+        isl::manage(isl_schedule_node_band_get_partial_schedule(Band.get()));
+    isl::schedule NewChild =
+        getDerived().visit(Band.child(0), std::forward<Args>(args)...);
+    isl::schedule_node NewNode =
+        NewChild.insert_partial_schedule(PartialSched).get_root().get_child(0);
+
+    // Reapply permutability and coincidence attributes.
+    NewNode = isl::manage(isl_schedule_node_band_set_permutable(
+        NewNode.release(), isl_schedule_node_band_get_permutable(Band.get())));
+    unsigned BandDims = isl_schedule_node_band_n_member(Band.get());
+    for (unsigned i = 0; i < BandDims; i += 1)
+      NewNode = isl::manage(isl_schedule_node_band_member_set_coincident(
+          NewNode.release(), i,
+          isl_schedule_node_band_member_get_coincident(Band.get(), i)));
+
+    return NewNode.get_schedule();
+  }
+
+  isl::schedule visitSequence(const isl::schedule_node &Sequence,
+                              Args... args) {
+    int NumChildren = isl_schedule_node_n_children(Sequence.get());
+    isl::schedule Result =
+        getDerived().visit(Sequence.child(0), std::forward<Args>(args)...);
+    for (int i = 1; i < NumChildren; i += 1)
+      Result = Result.sequence(
+          getDerived().visit(Sequence.child(i), std::forward<Args>(args)...));
+    return Result;
+  }
+
+  isl::schedule visitSet(const isl::schedule_node &Set, Args... args) {
+    int NumChildren = isl_schedule_node_n_children(Set.get());
+    isl::schedule Result =
+        getDerived().visit(Set.child(0), std::forward<Args>(args)...);
+    for (int i = 1; i < NumChildren; i += 1)
+      Result = isl::manage(
+          isl_schedule_set(Result.release(),
+                           getDerived()
+                               .visit(Set.child(i), std::forward<Args>(args)...)
+                               .release()));
+    return Result;
+  }
+
+  isl::schedule visitLeaf(const isl::schedule_node &Leaf, Args... args) {
+    return isl::schedule::from_domain(Leaf.get_domain());
+  }
+
+  isl::schedule visitMark(const isl::schedule_node &Mark, Args... args) {
+    isl::id TheMark = Mark.mark_get_id();
+    isl::schedule_node NewChild =
+        getDerived()
+            .visit(Mark.first_child(), std::forward<Args>(args)...)
+            .get_root()
+            .first_child();
+    return NewChild.insert_mark(TheMark).get_schedule();
+  }
+
+  isl::schedule visitExtension(const isl::schedule_node &Extension,
+                               Args... args) {
+    isl::union_map TheExtension = Extension.extension_get_extension();
+    isl::schedule_node NewChild = getDerived()
+                                      .visit(Extension.child(0), args...)
+                                      .get_root()
+                                      .first_child();
+    isl::schedule_node NewExtension =
+        isl::schedule_node::from_extension(TheExtension);
+    return NewChild.graft_before(NewExtension).get_schedule();
+  }
+
+  isl::schedule visitFilter(const isl::schedule_node &Filter, Args... args) {
+    isl::union_set FilterDomain = Filter.filter_get_filter();
+    isl::schedule NewSchedule =
+        getDerived().visit(Filter.child(0), std::forward<Args>(args)...);
+    return NewSchedule.intersect_domain(FilterDomain);
+  }
+
+  isl::schedule visitNode(const isl::schedule_node &Node, Args... args) {
+    llvm_unreachable("Not implemented");
+  }
+};
+
+/// Rewrite a schedule tree to an equivalent one without extension nodes.
+///
+/// Each visit method takes two additional arguments:
+///
+///  * The new domain the node, which is the inherited domain plus any domains
+///    added by extension nodes.
+///
+///  * A map of extension domains of all children is returned; it is required by
+///    band nodes to schedule the additional domains at the same position as the
+///    extension node would.
+///
+struct ExtensionNodeRewriter
+    : public ScheduleTreeRewriter<ExtensionNodeRewriter, const isl::union_set &,
+                                  isl::union_map &> {
+  using BaseTy = ScheduleTreeRewriter<ExtensionNodeRewriter,
+                                      const isl::union_set &, isl::union_map &>;
+  BaseTy &getBase() { return *this; }
+  const BaseTy &getBase() const { return *this; }
+
+  isl::schedule visitSchedule(const isl::schedule &Schedule) {
+    isl::union_map Extensions;
+    isl::schedule Result =
+        visit(Schedule.get_root(), Schedule.get_domain(), Extensions);
+    assert(Extensions && Extensions.is_empty());
+    return Result;
+  }
+
+  isl::schedule visitSequence(const isl::schedule_node &Sequence,
+                              const isl::union_set &Domain,
+                              isl::union_map &Extensions) {
+    int NumChildren = isl_schedule_node_n_children(Sequence.get());
+    isl::schedule NewNode = visit(Sequence.first_child(), Domain, Extensions);
+    for (int i = 1; i < NumChildren; i += 1) {
+      isl::schedule_node OldChild = Sequence.child(i);
+      isl::union_map NewChildExtensions;
+      isl::schedule NewChildNode = visit(OldChild, Domain, NewChildExtensions);
+      NewNode = NewNode.sequence(NewChildNode);
+      Extensions = Extensions.unite(NewChildExtensions);
+    }
+    return NewNode;
+  }
+
+  isl::schedule visitSet(const isl::schedule_node &Set,
+                         const isl::union_set &Domain,
+                         isl::union_map &Extensions) {
+    int NumChildren = isl_schedule_node_n_children(Set.get());
+    isl::schedule NewNode = visit(Set.first_child(), Domain, Extensions);
+    for (int i = 1; i < NumChildren; i += 1) {
+      isl::schedule_node OldChild = Set.child(i);
+      isl::union_map NewChildExtensions;
+      isl::schedule NewChildNode = visit(OldChild, Domain, NewChildExtensions);
+      NewNode = isl::manage(
+          isl_schedule_set(NewNode.release(), NewChildNode.release()));
+      Extensions = Extensions.unite(NewChildExtensions);
+    }
+    return NewNode;
+  }
+
+  isl::schedule visitLeaf(const isl::schedule_node &Leaf,
+                          const isl::union_set &Domain,
+                          isl::union_map &Extensions) {
+    isl::ctx Ctx = Leaf.get_ctx();
+    Extensions = isl::union_map::empty(isl::space::params_alloc(Ctx, 0));
+    return isl::schedule::from_domain(Domain);
+  }
+
+  isl::schedule visitBand(const isl::schedule_node &OldNode,
+                          const isl::union_set &Domain,
+                          isl::union_map &OuterExtensions) {
+    isl::schedule_node OldChild = OldNode.first_child();
+    isl::multi_union_pw_aff PartialSched =
+        isl::manage(isl_schedule_node_band_get_partial_schedule(OldNode.get()));
+
+    isl::union_map NewChildExtensions;
+    isl::schedule NewChild = visit(OldChild, Domain, NewChildExtensions);
+
+    // Add the extensions to the partial schedule.
+    OuterExtensions = isl::union_map::empty(NewChildExtensions.get_space());
+    isl::union_map NewPartialSchedMap = isl::union_map::from(PartialSched);
+    unsigned BandDims = isl_schedule_node_band_n_member(OldNode.get());
+    for (isl::map Ext : NewChildExtensions.get_map_list()) {
+      unsigned ExtDims = Ext.dim(isl::dim::in);
+      assert(ExtDims >= BandDims);
+      unsigned OuterDims = ExtDims - BandDims;
+
+      isl::map BandSched =
+          Ext.project_out(isl::dim::in, 0, OuterDims).reverse();
+      NewPartialSchedMap = NewPartialSchedMap.unite(BandSched);
+
+      // There might be more outer bands that have to schedule the extensions.
+      if (OuterDims > 0) {
+        isl::map OuterSched =
+            Ext.project_out(isl::dim::in, OuterDims, BandDims);
+        OuterExtensions = OuterExtensions.add_map(OuterSched);
+      }
+    }
+    isl::multi_union_pw_aff NewPartialSchedAsAsMultiUnionPwAff =
+        isl::multi_union_pw_aff::from_union_map(NewPartialSchedMap);
+    isl::schedule_node NewNode =
+        NewChild.insert_partial_schedule(NewPartialSchedAsAsMultiUnionPwAff)
+            .get_root()
+            .get_child(0);
+
+    // Reapply permutability and coincidence attributes.
+    NewNode = isl::manage(isl_schedule_node_band_set_permutable(
+        NewNode.release(),
+        isl_schedule_node_band_get_permutable(OldNode.get())));
+    for (unsigned i = 0; i < BandDims; i += 1) {
+      NewNode = isl::manage(isl_schedule_node_band_member_set_coincident(
+          NewNode.release(), i,
+          isl_schedule_node_band_member_get_coincident(OldNode.get(), i)));
+    }
+
+    return NewNode.get_schedule();
+  }
+
+  isl::schedule visitFilter(const isl::schedule_node &Filter,
+                            const isl::union_set &Domain,
+                            isl::union_map &Extensions) {
+    isl::union_set FilterDomain = Filter.filter_get_filter();
+    isl::union_set NewDomain = Domain.intersect(FilterDomain);
+
+    // A filter is added implicitly if necessary when joining schedule trees.
+    return visit(Filter.first_child(), NewDomain, Extensions);
+  }
+
+  isl::schedule visitExtension(const isl::schedule_node &Extension,
+                               const isl::union_set &Domain,
+                               isl::union_map &Extensions) {
+    isl::union_map ExtDomain = Extension.extension_get_extension();
+    isl::union_set NewDomain = Domain.unite(ExtDomain.range());
+    isl::union_map ChildExtensions;
+    isl::schedule NewChild =
+        visit(Extension.first_child(), NewDomain, ChildExtensions);
+    Extensions = ChildExtensions.unite(ExtDomain);
+    return NewChild;
+  }
+};
+
+/// Collect all AST build options in any schedule tree band.
+///
+/// ScheduleTreeRewriter cannot apply the schedule tree options. This class
+/// collects these options to apply them later.
+struct CollectASTBuildOptions
+    : public RecursiveScheduleTreeVisitor<CollectASTBuildOptions> {
+  using BaseTy = RecursiveScheduleTreeVisitor<CollectASTBuildOptions>;
+  BaseTy &getBase() { return *this; }
+  const BaseTy &getBase() const { return *this; }
+
+  llvm::SmallVector<isl::union_set, 8> ASTBuildOptions;
+
+  void visitBand(const isl::schedule_node &Band) {
+    ASTBuildOptions.push_back(
+        isl::manage(isl_schedule_node_band_get_ast_build_options(Band.get())));
+    return getBase().visitBand(Band);
+  }
+};
+
+/// Apply AST build options to the bands in a schedule tree.
+///
+/// This rewrites a schedule tree with the AST build options applied. We assume
+/// that the band nodes are visited in the same order as they were when the
+/// build options were collected, typically by CollectASTBuildOptions.
+struct ApplyASTBuildOptions
+    : public ScheduleNodeRewriter<ApplyASTBuildOptions> {
+  using BaseTy = ScheduleNodeRewriter<ApplyASTBuildOptions>;
+  BaseTy &getBase() { return *this; }
+  const BaseTy &getBase() const { return *this; }
+
+  int Pos;
+  llvm::ArrayRef<isl::union_set> ASTBuildOptions;
+
+  ApplyASTBuildOptions(llvm::ArrayRef<isl::union_set> ASTBuildOptions)
+      : ASTBuildOptions(ASTBuildOptions) {}
+
+  isl::schedule visitSchedule(const isl::schedule &Schedule) {
+    Pos = 0;
+    isl::schedule Result = visit(Schedule).get_schedule();
+    assert(Pos == ASTBuildOptions.size() &&
+           "AST build options must match to band nodes");
+    return Result;
+  }
+
+  isl::schedule_node visitBand(const isl::schedule_node &Band) {
+    isl::schedule_node Result =
+        Band.band_set_ast_build_options(ASTBuildOptions[Pos]);
+    Pos += 1;
+    return getBase().visitBand(Result);
+  }
+};
+
+} // namespace
+
+/// Return whether the schedule contains an extension node.
+static bool containsExtensionNode(isl::schedule Schedule) {
+  assert(!Schedule.is_null());
+
+  auto Callback = [](__isl_keep isl_schedule_node *Node,
+                     void *User) -> isl_bool {
+    if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension) {
+      // Stop walking the schedule tree.
+      return isl_bool_error;
+    }
+
+    // Continue searching the subtree.
+    return isl_bool_true;
+  };
+  isl_stat RetVal = isl_schedule_foreach_schedule_node_top_down(
+      Schedule.get(), Callback, nullptr);
+
+  // We assume that the traversal itself does not fail, i.e. the only reason to
+  // return isl_stat_error is that an extension node was found.
+  return RetVal == isl_stat_error;
+}
+
+isl::schedule polly::hoistExtensionNodes(isl::schedule Sched) {
+  // If there is no extension node in the first place, return the original
+  // schedule tree.
+  if (!containsExtensionNode(Sched))
+    return Sched;
+
+  // Build options can anchor schedule nodes, such that the schedule tree cannot
+  // be modified anymore. Therefore, apply build options after the tree has been
+  // created.
+  CollectASTBuildOptions Collector;
+  Collector.visit(Sched);
+
+  // Rewrite the schedule tree without extension nodes.
+  ExtensionNodeRewriter Rewriter;
+  isl::schedule NewSched = Rewriter.visitSchedule(Sched);
+
+  // Reapply the AST build options. The rewriter must not change the iteration
+  // order of bands. Any other node type is ignored.
+  ApplyASTBuildOptions Applicator(Collector.ASTBuildOptions);
+  NewSched = Applicator.visitSchedule(NewSched);
+
+  return NewSched;
+}
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll
index a7d77050ff284..cc65645e32f58 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll
@@ -1,6 +1,7 @@
 ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=false \
 ; RUN: -debug < %s 2>&1| FileCheck %s
 ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1| FileCheck %s --check-prefix=PATTERN-MATCHING-OPTS
+; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-ast-detect-parallel -polly-ast -analyze < %s | FileCheck %s --check-prefix=PARALLEL-AST
 ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -stats -disable-output < %s 2>&1| FileCheck %s --check-prefix=STATS -match-full-lines
 ; REQUIRES: asserts
 ;
@@ -15,6 +16,8 @@
 ;
 ; CHECK-NOT: The matrix multiplication pattern was detected
 ; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected
+; PARALLEL-AST: #pragma known-parallel
+; PARALLEL-AST: #pragma known-parallel
 ; STATS:  1 polly-opt-isl    - Number of matrix multiplication patterns detected and optimized
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
index 55aa0dc977d71..1875aac6c3379 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
@@ -93,44 +93,168 @@
 ; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
 ; CHECK-NEXT:                            }
 ; CHECK-NEXT:                          }
-; CHECK-NEXT:                        if (96 * c2 + 95 >= ni && ni % 4 >= 1)
+; CHECK-NEXT:                        if ((ni >= 96 * c2 + 5 && 96 * c2 + 7 >= ni && c3 >= 0) || (ni >= 96 * c2 + 8 && 96 * c2 + 95 >= ni && ni % 4 >= 1)) {
+; CHECK-NEXT:                          if (96 * c2 + 7 >= ni) {
+; CHECK-NEXT:                            for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                              // Loop Vectorizer Disabled
+; CHECK-NEXT:                              // Register tiling - Points
+; CHECK-NEXT:                              {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                if (ni >= 96 * c2 + 6) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 5, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (96 * c2 + 7 == ni) {
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          } else {
+; CHECK-NEXT:                            for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                              // Loop Vectorizer Disabled
+; CHECK-NEXT:                              // Register tiling - Points
+; CHECK-NEXT:                              {
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni - 1) % 4) + ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                if (ni % 4 >= 2) {
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(-((ni - 1) % 4) + ni, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                  if ((ni + 1) % 4 == 0) {
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          }
+; CHECK-NEXT:                        }
+; CHECK-NEXT:                      }
+; CHECK-NEXT:                    if (96 * c2 + 3 >= ni || 2048 * c0 + 7 >= nj || (2048 * c0 + 2047 >= nj && nj % 8 >= 1)) {
+; CHECK-NEXT:                      if (2048 * c0 + 7 >= nj) {
+; CHECK-NEXT:                        for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1)
 ; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
 ; CHECK-NEXT:                            // Loop Vectorizer Disabled
 ; CHECK-NEXT:                            // Register tiling - Points
 ; CHECK-NEXT:                            {
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
-; CHECK-NEXT:                              if (ni % 4 >= 2) {
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
-; CHECK-NEXT:                                if ((ni + 1) % 4 == 0) {
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0, 256 * c1 + c5);
+; CHECK-NEXT:                              if (nj >= 2048 * c0 + 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj >= 2048 * c0 + 3) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 4) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 5) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 6) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (2048 * c0 + 7 == nj)
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                              if (ni >= 96 * c2 + 4 * c4 + 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj >= 2048 * c0 + 2) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 3) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 4) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 5) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 6) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (2048 * c0 + 7 == nj)
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 1, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                                if (ni >= 96 * c2 + 4 * c4 + 3) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 2) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 3) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 4) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 5) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj >= 2048 * c0 + 6) {
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                            if (2048 * c0 + 7 == nj)
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                  if (ni >= 96 * c2 + 4 * c4 + 4) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 2) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 3) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 4) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj >= 2048 * c0 + 5) {
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                            if (nj >= 2048 * c0 + 6) {
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                              if (2048 * c0 + 7 == nj)
+; CHECK-NEXT:                                                Stmt_for_body6(96 * c2 + 4 * c4 + 3, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                            }
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
 ; CHECK-NEXT:                                }
 ; CHECK-NEXT:                              }
 ; CHECK-NEXT:                            }
 ; CHECK-NEXT:                          }
-; CHECK-NEXT:                      }
-; CHECK-NEXT:                    if (96 * c2 + 3 >= ni || (2048 * c0 + 2047 >= nj && nj % 8 >= 1)) {
-; CHECK-NEXT:                      if (96 * c2 + 3 >= ni) {
+; CHECK-NEXT:                      } else if (96 * c2 + 3 >= ni) {
 ; CHECK-NEXT:                        for (int c3 = 0; c3 <= min(255, -256 * c0 + (nj - 1) / 8); c3 += 1)
 ; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
 ; CHECK-NEXT:                            // Loop Vectorizer Disabled
@@ -211,17 +335,17 @@
 ; CHECK-NEXT:                            // Loop Vectorizer Disabled
 ; CHECK-NEXT:                            // Register tiling - Points
 ; CHECK-NEXT:                            {
-; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                              if (nj % 8 >= 2) {
-; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj, 256 * c1 + c5);
 ; CHECK-NEXT:                                if (nj % 8 >= 3) {
-; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj + 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                  if (nj % 8 >= 4) {
-; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj + 2, 256 * c1 + c5);
 ; CHECK-NEXT:                                    if (nj % 8 >= 5) {
-; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj + 3, 256 * c1 + c5);
 ; CHECK-NEXT:                                      if (nj % 8 >= 6) {
-; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4, -((nj - 1) % 8) + nj + 4, 256 * c1 + c5);
 ; CHECK-NEXT:                                        if ((nj + 1) % 8 == 0)
 ; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4, nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                      }
@@ -230,17 +354,17 @@
 ; CHECK-NEXT:                                }
 ; CHECK-NEXT:                              }
 ; CHECK-NEXT:                              if (ni >= 96 * c2 + 4 * c4 + 2) {
-; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                if (nj % 8 >= 2) {
-; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj, 256 * c1 + c5);
 ; CHECK-NEXT:                                  if (nj % 8 >= 3) {
-; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj + 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                    if (nj % 8 >= 4) {
-; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj + 2, 256 * c1 + c5);
 ; CHECK-NEXT:                                      if (nj % 8 >= 5) {
-; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj + 3, 256 * c1 + c5);
 ; CHECK-NEXT:                                        if (nj % 8 >= 6) {
-; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 1, -((nj - 1) % 8) + nj + 4, 256 * c1 + c5);
 ; CHECK-NEXT:                                          if ((nj + 1) % 8 == 0)
 ; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 1, nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                        }
@@ -249,17 +373,17 @@
 ; CHECK-NEXT:                                  }
 ; CHECK-NEXT:                                }
 ; CHECK-NEXT:                                if (ni >= 96 * c2 + 4 * c4 + 3) {
-; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                  if (nj % 8 >= 2) {
-; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj, 256 * c1 + c5);
 ; CHECK-NEXT:                                    if (nj % 8 >= 3) {
-; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj + 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                      if (nj % 8 >= 4) {
-; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj + 2, 256 * c1 + c5);
 ; CHECK-NEXT:                                        if (nj % 8 >= 5) {
-; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj + 3, 256 * c1 + c5);
 ; CHECK-NEXT:                                          if (nj % 8 >= 6) {
-; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 2, -((nj - 1) % 8) + nj + 4, 256 * c1 + c5);
 ; CHECK-NEXT:                                            if ((nj + 1) % 8 == 0)
 ; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                          }
@@ -268,17 +392,17 @@
 ; CHECK-NEXT:                                    }
 ; CHECK-NEXT:                                  }
 ; CHECK-NEXT:                                  if (ni >= 96 * c2 + 4 * c4 + 4) {
-; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                    if (nj % 8 >= 2) {
-; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj, 256 * c1 + c5);
 ; CHECK-NEXT:                                      if (nj % 8 >= 3) {
-; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj + 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                        if (nj % 8 >= 4) {
-; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj + 2, 256 * c1 + c5);
 ; CHECK-NEXT:                                          if (nj % 8 >= 5) {
-; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj + 3, 256 * c1 + c5);
 ; CHECK-NEXT:                                            if (nj % 8 >= 6) {
-; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, -((nj - 1) % 8) + nj + 4, 256 * c1 + c5);
 ; CHECK-NEXT:                                              if ((nj + 1) % 8 == 0)
 ; CHECK-NEXT:                                                Stmt_for_body6(96 * c2 + 4 * c4 + 3, nj - 1, 256 * c1 + c5);
 ; CHECK-NEXT:                                            }

From c16ab9dd886920bf56bd51cbcf7218a7186e99dd Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm@meinersbur.de>
Date: Fri, 31 May 2019 19:40:20 +0000
Subject: [PATCH 0778/1176] [ScopBuilder] Move verifyInvariantLoads function
 from ScopInfo. NFC.

Refactor Scop and ScopBuilder class. Move verifyInvariantLoads from Scop
class to ScopBuilder class.

Patch by: Dominik Adamski <adamski.dominik@gmail.com>

Differential Revision: https://reviews.llvm.org/D62628

llvm-svn: 362258
---
 polly/include/polly/ScopBuilder.h  | 15 +++++++++++++++
 polly/include/polly/ScopInfo.h     | 15 ---------------
 polly/lib/Analysis/ScopBuilder.cpp | 16 +++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 14 --------------
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index cd17beacd6322..f24a46a7e905a 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -360,6 +360,21 @@ class ScopBuilder {
   /// potential reduction.
   void checkForReductions(ScopStmt &Stmt);
 
+  /// Verify that all required invariant loads have been hoisted.
+  ///
+  /// Invariant load hoisting is not guaranteed to hoist all loads that were
+  /// assumed to be scop invariant during scop detection. This function checks
+  /// for cases where the hoisting failed, but where it would have been
+  /// necessary for our scop modeling to be correct. In case of insufficient
+  /// hoisting the scop is marked as invalid.
+  ///
+  /// In the example below Bound[1] is required to be invariant:
+  ///
+  /// for (int i = 1; i < Bound[0]; i++)
+  ///   for (int j = 1; j < Bound[1]; j++)
+  ///     ...
+  void verifyInvariantLoads();
+
   /// Collect loads which might form a reduction chain with @p StoreMA.
   ///
   /// Check if the stored value for @p StoreMA is a binary operator with one or
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index bae105b7f112c..05d4d9cfcd715 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2040,21 +2040,6 @@ class Scop {
   ///         nullptr if it cannot be hoisted at all.
   isl::set getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes);
 
-  /// Verify that all required invariant loads have been hoisted.
-  ///
-  /// Invariant load hoisting is not guaranteed to hoist all loads that were
-  /// assumed to be scop invariant during scop detection. This function checks
-  /// for cases where the hoisting failed, but where it would have been
-  /// necessary for our scop modeling to be correct. In case of insufficient
-  /// hoisting the scop is marked as invalid.
-  ///
-  /// In the example below Bound[1] is required to be invariant:
-  ///
-  /// for (int i = 1; i < Bound[0]; i++)
-  ///   for (int j = 1; j < Bound[1]; j++)
-  ///     ...
-  void verifyInvariantLoads();
-
   /// Hoist invariant memory loads and check for required ones.
   ///
   /// We first identify "common" invariant loads, thus loads that are invariant
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index b1de18fa16378..51c9472687d9e 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -1306,6 +1306,20 @@ void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
   }
 }
 
+void ScopBuilder::verifyInvariantLoads() {
+  auto &RIL = scop->getRequiredInvariantLoads();
+  for (LoadInst *LI : RIL) {
+    assert(LI && scop->contains(LI));
+    // If there exists a statement in the scop which has a memory access for
+    // @p LI, then mark this scop as infeasible for optimization.
+    for (ScopStmt &Stmt : *scop)
+      if (Stmt.getArrayAccessOrNULLFor(LI)) {
+        scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
+        return;
+      }
+  }
+}
+
 void ScopBuilder::collectCandidateReductionLoads(
     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
   ScopStmt *Stmt = StoreMA->getStatement();
@@ -1588,7 +1602,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
 
   scop->hoistInvariantLoads();
   scop->canonicalizeDynamicBasePtrs();
-  scop->verifyInvariantLoads();
+  verifyInvariantLoads();
   scop->simplifySCoP(true);
 
   // Check late for a feasible runtime context because profitability did not
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 24e26c5078802..f7712c3efb886 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3778,20 +3778,6 @@ isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
   return WrittenCtx;
 }
 
-void Scop::verifyInvariantLoads() {
-  auto &RIL = getRequiredInvariantLoads();
-  for (LoadInst *LI : RIL) {
-    assert(LI && contains(LI));
-    // If there exists a statement in the scop which has a memory access for
-    // @p LI, then mark this scop as infeasible for optimization.
-    for (ScopStmt &Stmt : Stmts)
-      if (Stmt.getArrayAccessOrNULLFor(LI)) {
-        invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
-        return;
-      }
-  }
-}
-
 void Scop::hoistInvariantLoads() {
   if (!PollyInvariantLoadHoisting)
     return;

From 4dc0acc915d1a0d1348ff83596979c44a89baeb6 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Fri, 31 May 2019 20:17:21 +0000
Subject: [PATCH 0779/1176] [Target] Remove ABI's dependence on
 ExpressionParser

llvm-svn: 362259
---
 lldb/source/Target/ABI.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp
index d1fc274e2ed46..d71cee7f58054 100644
--- a/lldb/source/Target/ABI.cpp
+++ b/lldb/source/Target/ABI.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Target/ABI.h"
-#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
 #include "lldb/Core/PluginManager.h"
 #include "lldb/Core/Value.h"
 #include "lldb/Core/ValueObjectConstResult.h"
+#include "lldb/Expression/ExpressionVariable.h"
 #include "lldb/Symbol/CompilerType.h"
 #include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Target/Target.h"
@@ -142,16 +142,16 @@ ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type,
     case Value::eValueTypeScalar:
     case Value::eValueTypeVector:
       clang_expr_variable_sp->m_flags |=
-          ClangExpressionVariable::EVIsFreezeDried;
+          ExpressionVariable::EVIsFreezeDried;
       clang_expr_variable_sp->m_flags |=
-          ClangExpressionVariable::EVIsLLDBAllocated;
+          ExpressionVariable::EVIsLLDBAllocated;
       clang_expr_variable_sp->m_flags |=
-          ClangExpressionVariable::EVNeedsAllocation;
+          ExpressionVariable::EVNeedsAllocation;
       break;
     case Value::eValueTypeLoadAddress:
       clang_expr_variable_sp->m_live_sp = live_valobj_sp;
       clang_expr_variable_sp->m_flags |=
-          ClangExpressionVariable::EVIsProgramReference;
+          ExpressionVariable::EVIsProgramReference;
       break;
     }
 

From f711d594272e77dda08764d2f5bf2d8d8508ee92 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Fri, 31 May 2019 20:34:57 +0000
Subject: [PATCH 0780/1176] [Tests] Add ne icmp tests w/preinc forms for
 LoopPredication

Turns out this is substaintially easier to match then the post increment form, so let's start there.

llvm-svn: 362260
---
 llvm/test/Transforms/LoopPredication/basic.ll | 86 +++++++++++++++++--
 1 file changed, 80 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index 0037dcdff91a8..b9b2314ba7813 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -1630,6 +1630,39 @@ exit:
   ret i32 0
 }
 
+; Same as previous, but with a pre-increment test since this is easier to match
+define i32 @ne_latch_zext_preinc(i32* %array, i32 %length, i16 %n16) {
+; CHECK-LABEL: @ne_latch_zext_preinc(
+; CHECK-NEXT:  loop.preheader:
+; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+loop.preheader:
+  %n = zext i16 %n16 to i32
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ne i32 %i, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
 ; NE Check (as produced by LFTR) where we can prove Start < End via the
 ; condition guarding the loop entry.
 define i32 @ne_latch_dom_check(i32* %array, i32 %length, i32 %n) {
@@ -1671,6 +1704,47 @@ exit:
   ret i32 0
 }
 
+; Same as previous, but easier to match
+define i32 @ne_latch_dom_check_preinc(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @ne_latch_dom_check_preinc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
+; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %tmp5 = icmp sle i32 %n, 0
+  br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+  %within.bounds = icmp ult i32 %i, %length
+  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+  %i.next = add nuw i32 %i, 1
+  %continue = icmp ne i32 %i, %n
+  br i1 %continue, label %loop, label %exit
+
+exit:
+  ret i32 0
+}
+
+
 ; NE latch - can't prove (end-start) mod step == 0 (i.e. might wrap
 ; around several times or even be infinite)
 define i32 @neg_ne_latch_mod_step(i32* %array, i32 %length, i16 %n16) {
@@ -1683,7 +1757,7 @@ define i32 @neg_ne_latch_mod_step(i32* %array, i32 %length, i16 %n16) {
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 3
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
@@ -1698,7 +1772,7 @@ loop:
   call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
 
   %i.next = add i32 %i, 3
-  %continue = icmp ne i32 %i.next, %n
+  %continue = icmp ne i32 %i, %n
   br i1 %continue, label %loop, label %exit
 
 exit:
@@ -1715,7 +1789,7 @@ define i32 @ne_latch_mod_step(i32* %array, i32 %length) {
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 2
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], 400
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], 400
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
@@ -1729,7 +1803,7 @@ loop:
   call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
 
   %i.next = add nuw i32 %i, 2
-  %continue = icmp ne i32 %i.next, 400
+  %continue = icmp ne i32 %i, 400
   br i1 %continue, label %loop, label %exit
 
 exit:
@@ -1746,7 +1820,7 @@ define i32 @neg_ne_latch_swapped_order(i32* %array, i32 %length) {
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], 0
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
@@ -1760,7 +1834,7 @@ loop:
   call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
 
   %i.next = add i32 %i, 1
-  %continue = icmp ne i32 %i.next, 0
+  %continue = icmp ne i32 %i, 0
   br i1 %continue, label %loop, label %exit
 
 exit:

From 23a02f6a5fa5892136fb57e573ae2e3bcc929d10 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 31 May 2019 20:42:07 +0000
Subject: [PATCH 0781/1176] [CVP] Fix assertion failure on vector with.overflow

Noticed on D62703. LVI only handles plain integers, not vectors of
integers. This was previously not an issue, because vector support
for with.overflow is only a relatively recent addition.

llvm-svn: 362261
---
 .../Transforms/Scalar/CorrelatedValuePropagation.cpp |  2 +-
 .../CorrelatedValuePropagation/overflows.ll          | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index eab49b5f8b37e..9ec2afb99d683 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -434,7 +434,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
   unsigned ArgNo = 0;
 
   if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
-    if (willNotOverflow(WO, LVI)) {
+    if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
       processOverflowIntrinsic(WO);
       return true;
     }
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index 860ebafd0749d..a0d268c059f9b 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -21,6 +21,8 @@ declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32)
 
 declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8)
 
+declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>)
+
 declare i8 @llvm.uadd.sat.i8(i8, i8)
 declare i8 @llvm.sadd.sat.i8(i8, i8)
 declare i8 @llvm.usub.sat.i8(i8, i8)
@@ -731,6 +733,16 @@ define { i8, i1 } @signed_mul_constant_folding() {
   ret { i8, i1 } %mul
 }
 
+define { <2 x i32>, <2 x i1> } @uaddo_vec(<2 x i32> %a) {
+; CHECK-LABEL: @uaddo_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[ADD]]
+;
+  %add = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 1, i32 1>)
+  ret { <2 x i32>, <2 x i1> } %add
+}
+
+
 define i8 @uadd_sat_no_overflow(i8 %x) {
 ; CHECK-LABEL: @uadd_sat_no_overflow(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X:%.*]], 100

From d43509305674e89e6ac482b5a76c2ac20c2de1c4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 31 May 2019 20:42:13 +0000
Subject: [PATCH 0782/1176] [CVP] Add vector saturating add test; NFC

Extra test for the assertion failure from D62703.

llvm-svn: 362262
---
 .../Transforms/CorrelatedValuePropagation/overflows.ll | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index a0d268c059f9b..66cd85abe9751 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -27,6 +27,7 @@ declare i8 @llvm.uadd.sat.i8(i8, i8)
 declare i8 @llvm.sadd.sat.i8(i8, i8)
 declare i8 @llvm.usub.sat.i8(i8, i8)
 declare i8 @llvm.ssub.sat.i8(i8, i8)
+declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
 
 declare void @llvm.trap()
 
@@ -834,3 +835,12 @@ cont:
   %res = call i8 @llvm.ssub.sat.i8(i8 %x, i8 20)
   ret i8 %res
 }
+
+define <2 x i8> @uadd_sat_vec(<2 x i8> %a) {
+; CHECK-LABEL: @uadd_sat_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 1, i8 1>)
+; CHECK-NEXT:    ret <2 x i8> [[ADD]]
+;
+  %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 1, i8 1>)
+  ret <2 x i8> %add
+}

From 7bafae55c01dda8478aeff74a8d657a4148630af Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 31 May 2019 20:48:26 +0000
Subject: [PATCH 0783/1176] Reapply [CVP] Simplify non-overflowing saturating
 add/sub

If we can determine that a saturating add/sub will not overflow based
on range analysis, convert it into a simple binary operation. This is
a sibling transform to the existing with.overflow handling.

Reapplying this with an additional check that the saturating intrinsic
has integer type, as LVI currently does not support vector types.

Differential Revision: https://reviews.llvm.org/D62703

llvm-svn: 362263
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 25 ++++++++++++++++++-
 .../CorrelatedValuePropagation/overflows.ll   | 16 ++++++------
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9ec2afb99d683..5bfdf17c9a03f 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -63,6 +63,8 @@ STATISTIC(NumUDivs,     "Number of udivs whose width was decreased");
 STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 STATISTIC(NumOverflows, "Number of overflow checks removed");
+STATISTIC(NumSaturating,
+    "Number of saturating arithmetics converted to normal arithmetics");
 
 static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(true));
 
@@ -413,7 +415,7 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   IRBuilder<> B(WO);
   Value *NewOp = B.CreateBinOp(
       WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
-  // Constant-holing could have happened.
+  // Constant-folding could have happened.
   if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
     if (WO->isSigned())
       Inst->setHasNoSignedWrap();
@@ -428,6 +430,20 @@ static void processOverflowIntrinsic(WithOverflowInst *WO) {
   ++NumOverflows;
 }
 
+static void processSaturatingInst(SaturatingInst *SI) {
+  BinaryOperator *BinOp = BinaryOperator::Create(
+      SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+  BinOp->setDebugLoc(SI->getDebugLoc());
+  if (SI->isSigned())
+    BinOp->setHasNoSignedWrap();
+  else
+    BinOp->setHasNoUnsignedWrap();
+
+  SI->replaceAllUsesWith(BinOp);
+  SI->eraseFromParent();
+  ++NumSaturating;
+}
+
 /// Infer nonnull attributes for the arguments at the specified callsite.
 static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
   SmallVector<unsigned, 4> ArgNos;
@@ -440,6 +456,13 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
     }
   }
 
+  if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
+    if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+      processSaturatingInst(SI);
+      return true;
+    }
+  }
+
   // Deopt bundle operands are intended to capture state with minimal
   // perturbance of the code otherwise.  If we can find a constant value for
   // any such operand and remove a use of the original value, that's
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index 66cd85abe9751..04b1471ebb44e 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -752,8 +752,8 @@ define i8 @uadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X]], i8 100)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = add nuw i8 [[X]], 100
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp ugt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -775,8 +775,8 @@ define i8 @sadd_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X]], i8 20)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = add nsw i8 [[X]], 20
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp sgt i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -798,8 +798,8 @@ define i8 @usub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X]], i8 100)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = sub nuw i8 [[X]], 100
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp ult i8 %x, 100
   br i1 %cmp, label %trap, label %cont
@@ -821,8 +821,8 @@ define i8 @ssub_sat_no_overflow(i8 %x) {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cont:
-; CHECK-NEXT:    [[RES:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X]], i8 20)
-; CHECK-NEXT:    ret i8 [[RES]]
+; CHECK-NEXT:    [[RES1:%.*]] = sub nsw i8 [[X]], 20
+; CHECK-NEXT:    ret i8 [[RES1]]
 ;
   %cmp = icmp slt i8 %x, -100
   br i1 %cmp, label %trap, label %cont

From e98cf5fe47f2585363588d7cdd19c5bfe57f2c13 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 31 May 2019 20:55:31 +0000
Subject: [PATCH 0784/1176] [codeview] Fix inline line table accuracy for
 discontiguous segments

After improving the inline line table dumper in llvm-pdbutil and looking
at MSVC's inline line tables, it is clear that setting the length of the
inlined code region does not update the code offset. This means that the
delta to the beginning of a new discontiguous inlined code region should
be calculated relative to the last code offset, excluding the length.
Implementing this is a one line fix for MC: simply don't update
LastLabel.

While I'm updating these test cases, switch them to use llvm-objdump -d
and llvm-pdbutil. This allows us to show offsets of each instruction and
correlate the line table offsets to the actual code.

llvm-svn: 362264
---
 llvm/lib/MC/MCCodeView.cpp                    |  1 -
 .../MC/COFF/cv-inline-linetable-unlikely.s    | 40 +++++++++++-------
 llvm/test/MC/COFF/cv-inline-linetable.s       |  4 +-
 llvm/test/MC/COFF/cv-loc-unreachable-2.s      | 41 ++++++++++--------
 llvm/test/MC/COFF/cv-loc-unreachable.s        | 42 +++++++++++--------
 5 files changed, 77 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index 1a71b542bd06d..b2983c11e3423 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -535,7 +535,6 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
           unsigned Length = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
           compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
           compressAnnotation(Length, Buffer);
-          LastLabel = Loc.getLabel();
         }
         HaveOpenRange = false;
         continue;
diff --git a/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s b/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
index 0ad3559beb7b5..3f693ee63294b 100644
--- a/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
+++ b/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
@@ -1,4 +1,6 @@
-# RUN: llvm-mc -triple=x86_64-windows -filetype=obj < %s | llvm-readobj --codeview | FileCheck %s
+# RUN: llvm-mc -triple=x86_64-windows -filetype=obj < %s -o %t.obj
+# RUN: llvm-objdump -d %t.obj | FileCheck %s --check-prefix=ASM
+# RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=CODEVIEW
 
 # C source to generate the assembly:
 # volatile int unlikely_cond = 0;
@@ -19,20 +21,28 @@
 # calls to __asan_report*, for which it is very important to have an accurate
 # stack trace.
 
-# CHECK:    GlobalProcIdSym {
-# CHECK:      FunctionType: g (0x1003)
-# CHECK:      CodeOffset: g+0x0
-# CHECK:      DisplayName: g
-# CHECK:      LinkageName: g
-# CHECK:    }
-# CHECK:    InlineSiteSym {
-# CHECK:      Inlinee: f (0x1002)
-# CHECK:      BinaryAnnotations [
-# CHECK-NEXT:   ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xE, LineOffset: 1}
-# CHECK-NEXT:   ChangeCodeLength: 0x9
-# CHECK-NEXT:   ChangeCodeOffsetAndLineOffset: {CodeOffset: 0xF, LineOffset: 1}
-# CHECK-NEXT:   ChangeCodeLength: 0x7
-# CHECK-NEXT: ]
+# ASM:      0000000000000000 g:
+# ASM-NEXT:        0: 48 83 ec 28                   subq    $40, %rsp
+# ASM-NEXT:        4: c7 05 fc ff ff ff 00 00 00 00 movl    $0, -4(%rip)
+#  Begin inline loc (matches cv_loc below)
+# ASM-NEXT:        e: 83 3d ff ff ff ff 00          cmpl    $0, -1(%rip)
+# ASM-NEXT:       15: 75 0f                         jne     15 <g+0x26>
+#  End inline loc
+# ASM-NEXT:       17: c7 05 fc ff ff ff 00 00 00 00 movl    $0, -4(%rip)
+# ASM-NEXT:       21: 48 83 c4 28                   addq    $40, %rsp
+# ASM-NEXT:       25: c3                            retq
+#  Begin inline loc (matches cv_loc below)
+# ASM-NEXT:       26: e8 00 00 00 00                callq   0 <g+0x2b>
+# ASM-NEXT:       2b: 0f 0b                         ud2
+#  End inline loc
+
+# CODEVIEW:      S_INLINESITE [size = 26]
+# CODEVIEW-NEXT: inlinee = 0x1002 (f), parent = 0, end = 0
+# CODEVIEW-NEXT:   0B2E      code 0xE (+0xE) line 1 (+1)
+# CODEVIEW-NEXT:   0409      code end 0x17 (+0x9)
+# CODEVIEW-NEXT:   0602      line 2 (+1)
+# CODEVIEW-NEXT:   0318      code 0x26 (+0x18)
+# CODEVIEW-NEXT:   0407      code end 0x2D (+0x7)
 
 	.text
 	.globl	g
diff --git a/llvm/test/MC/COFF/cv-inline-linetable.s b/llvm/test/MC/COFF/cv-inline-linetable.s
index f226d6fe21f44..08f250eb43b9e 100644
--- a/llvm/test/MC/COFF/cv-inline-linetable.s
+++ b/llvm/test/MC/COFF/cv-inline-linetable.s
@@ -123,7 +123,9 @@ Ltmp3:
 	.short	Ltmp5-Ltmp4
 Ltmp4:
 	.short	4429
-	.asciz	"\000\000\000\000\000\000\000\000\003\020\000"
+	.long 0 # parent
+	.long 0 # end
+	.long 0x1003 # inlinee, bar
 	.cv_inline_linetable	1 1 9 Lfunc_begin0 Lfunc_end0
 # CHECK:    InlineSiteSym {
 # CHECK:      PtrParent: 0x0
diff --git a/llvm/test/MC/COFF/cv-loc-unreachable-2.s b/llvm/test/MC/COFF/cv-loc-unreachable-2.s
index be96e82fa1bd5..e13ad84a8142c 100644
--- a/llvm/test/MC/COFF/cv-loc-unreachable-2.s
+++ b/llvm/test/MC/COFF/cv-loc-unreachable-2.s
@@ -1,26 +1,33 @@
-# RUN: llvm-mc < %s -triple=i686-pc-win32 -filetype=obj | llvm-readobj - --codeview | FileCheck %s
+# RUN: llvm-mc %s -triple=i686-pc-win32 -filetype=obj -o %t.obj
+# RUN: llvm-objdump -d %t.obj | FileCheck %s --check-prefix=ASM
+# RUN: llvm-pdbutil dump -il -symbols %t.obj | FileCheck %s --check-prefix=CODEVIEW
 
 # Based on the other cv-loc-unreachable-2.s, but with other code in the same
 # section afterwards. We had negative label difference assertions when .cv_loc
 # bound tightly to the next instruction.
 
-# CHECK-LABEL: InlineeSourceLine {
-# CHECK:   Inlinee: do_exit (0x1002)
-# CHECK:   FileID: C:\src\llvm-project\build\t.cpp (0x0)
-# CHECK:   SourceLineNum: 3
-# CHECK: }
+# ASM:      0000000000000000 _callit:
+#   begin inline {
+# ASM-NEXT:        0: e8 00 00 00 00                calll   0 <_callit+0x5>
+# ASM-NEXT:        5: 85 c0                         testl   %eax, %eax
+# ASM-NEXT:        7: 75 01                         jne     1 <_callit+0xa>
+#   } end inline
+# ASM-NEXT:        9: c3                            retl
+#   begin inline {
+# ASM-NEXT:        a: 6a 20                         pushl   $32
+# ASM-NEXT:        c: ff 15 00 00 00 00             calll   *0
+#   } end inline
 
-# CHECK-LABEL: InlineSiteSym {
-# CHECK:   Kind: S_INLINESITE (0x114D)
-# CHECK:   Inlinee: do_exit (0x1002)
-# CHECK:   BinaryAnnotations [
-# CHECK-NEXT:     ChangeLineOffset: 1
-# CHECK-NEXT:     ChangeCodeLength: 0x9
-# CHECK-NEXT:     ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x1, LineOffset: 1}
-# CHECK-NEXT:     ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x8, LineOffset: 1}
-# CHECK-NEXT:     ChangeCodeLength: 0x0
-# CHECK-NEXT:   ]
-# CHECK: }
+# CODEVIEW: Inlinee |  Line | Source File
+# CODEVIEW:  0x1002 |     3 | C:\src\llvm-project\build\t.cpp (MD5: 0BC092F354CE14FDC2FA78F8EDE7426E)
+
+# CODEVIEW:      S_INLINESITE [size = 26]
+# CODEVIEW-NEXT: inlinee = 0x1002 (do_exit), parent = 0, end = 0
+# CODEVIEW-NEXT:   0602      line 1 (+1)
+# CODEVIEW-NEXT:   0409      code end 0x9 (+0x9)
+# CODEVIEW-NEXT:   0B2A      code 0xA (+0xA) line 2 (+1)
+# CODEVIEW-NEXT:   0B28      code 0x12 (+0x8) line 3 (+1)
+# CODEVIEW-NEXT:   0400      code end 0x12 (+0x0)
 
 	.text
 	.def	 _callit; .scl	2; .type	32; .endef
diff --git a/llvm/test/MC/COFF/cv-loc-unreachable.s b/llvm/test/MC/COFF/cv-loc-unreachable.s
index 945e519f50666..bd6f8f75a8cf6 100644
--- a/llvm/test/MC/COFF/cv-loc-unreachable.s
+++ b/llvm/test/MC/COFF/cv-loc-unreachable.s
@@ -1,4 +1,6 @@
-# RUN: llvm-mc < %s -triple=i686-pc-win32 -filetype=obj | llvm-readobj - --codeview | FileCheck %s
+# RUN: llvm-mc %s -triple=i686-pc-win32 -filetype=obj -o %t.obj
+# RUN: llvm-objdump -d %t.obj | FileCheck %s --check-prefix=ASM
+# RUN: llvm-pdbutil dump -il -symbols %t.obj | FileCheck %s --check-prefix=CODEVIEW
 
 # Original source, slightly modified with an extra .cv_loc directive (at EXTRA
 # below) that was causing assertions:
@@ -14,23 +16,29 @@
 #   do_exit();
 # }
 
-# CHECK-LABEL: InlineeSourceLine {
-# CHECK:   Inlinee: do_exit (0x1002)
-# CHECK:   FileID: C:\src\llvm-project\build\t.cpp (0x0)
-# CHECK:   SourceLineNum: 3
-# CHECK: }
 
-# CHECK-LABEL: InlineSiteSym {
-# CHECK:   Kind: S_INLINESITE (0x114D)
-# CHECK:   Inlinee: do_exit (0x1002)
-# CHECK:   BinaryAnnotations [
-# CHECK-NEXT:     ChangeLineOffset: 1
-# CHECK-NEXT:     ChangeCodeLength: 0x9
-# CHECK-NEXT:     ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x1, LineOffset: 1}
-# CHECK-NEXT:     ChangeCodeOffsetAndLineOffset: {CodeOffset: 0x8, LineOffset: 1}
-# CHECK-NEXT:     ChangeCodeLength: 0x0
-# CHECK-NEXT:   ]
-# CHECK: }
+# ASM:      0000000000000000 _callit:
+#   begin inline {
+# ASM-NEXT:        0: e8 00 00 00 00                calll   0 <_callit+0x5>
+# ASM-NEXT:        5: 85 c0                         testl   %eax, %eax
+# ASM-NEXT:        7: 75 01                         jne     1 <_callit+0xa>
+#   } end inline
+# ASM-NEXT:        9: c3                            retl
+#   begin inline {
+# ASM-NEXT:        a: 6a 20                         pushl   $32
+# ASM-NEXT:        c: ff 15 00 00 00 00             calll   *0
+#   } end inline
+
+# CODEVIEW: Inlinee |  Line | Source File
+# CODEVIEW:  0x1002 |     3 | C:\src\llvm-project\build\t.cpp (MD5: 0BC092F354CE14FDC2FA78F8EDE7426E)
+
+# CODEVIEW:      S_INLINESITE [size = 26]
+# CODEVIEW-NEXT: inlinee = 0x1002 (do_exit), parent = 0, end = 0
+# CODEVIEW-NEXT:   0602      line 1 (+1)
+# CODEVIEW-NEXT:   0409      code end 0x9 (+0x9)
+# CODEVIEW-NEXT:   0B2A      code 0xA (+0xA) line 2 (+1)
+# CODEVIEW-NEXT:   0B28      code 0x12 (+0x8) line 3 (+1)
+# CODEVIEW-NEXT:   0400      code end 0x12 (+0x0)
 
 	.text
 	.def	 _callit; .scl	2; .type	32; .endef

From 103bd108a71ccf40434b678534c837cbfe43319e Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 31 May 2019 21:20:13 +0000
Subject: [PATCH 0785/1176] [RegisterCoalescer] fix potential use of undef
 value. NFC

Summary:
Fixes a warning produced from scan-build (llvm.org/reports/scan-build/),
further warnings found by annotation isMoveInstr [[nodiscard]].

isMoveInstr potentially does not assign to its parameters, so if they
were uninitialized, they will potentially stay uninitialized.  It seems
most call sites pass references to uninitialized values, then use them
without checking the return value.

Reviewers: wmi

Reviewed By: wmi

Subscribers: MatzeB, qcolombet, hiraditya, tpr, llvm-commits, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62109

llvm-svn: 362265
---
 llvm/lib/CodeGen/RegisterCoalescer.cpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 58e29d613f0e9..1f0046ab164e3 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -358,9 +358,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
 
-static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
-                        unsigned &Src, unsigned &Dst,
-                        unsigned &SrcSub, unsigned &DstSub) {
+LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
+                                       const MachineInstr *MI, unsigned &Src,
+                                       unsigned &Dst, unsigned &SrcSub,
+                                       unsigned &DstSub) {
   if (MI->isCopy()) {
     Dst = MI->getOperand(0).getReg();
     DstSub = MI->getOperand(0).getSubReg();
@@ -1516,7 +1517,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   // CoalescerPair may have a new register class with adjusted subreg indices
   // at this point.
   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
-  isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+  if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+    return nullptr;
 
   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
@@ -3514,7 +3516,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   if (!UseTerminalRule)
     return false;
   unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
-  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+  if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+    return false;
   // Check if the destination of this copy has any other affinity.
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       // If SrcReg is a physical register, the copy won't be coalesced.
@@ -3538,8 +3541,9 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
       continue;
     unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
-    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
-                OtherSubReg);
+    if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg))
+      return false;
     if (OtherReg == SrcReg)
       OtherReg = OtherSrcReg;
     // Check if OtherReg is a non-terminal.

From 30a58f63af49d338960ff4f6a7f9d2238f527c9f Mon Sep 17 00:00:00 2001
From: Aaron Puchert <aaron.puchert@sap.com>
Date: Fri, 31 May 2019 21:27:39 +0000
Subject: [PATCH 0786/1176] Clarify when fix-it hints on warnings are
 appropriate

Summary:
This is not a change in the rules, it's meant as a clarification about
warnings. Since the recovery from warnings is a no-op, the fix-it hints
on warnings shouldn't change anything. Anything that doesn't just
suppress the warning and changes the meaning of the code (even if it's
for the better) should be on an additional note.

Reviewers: rsmith, aaron.ballman

Reviewed By: aaron.ballman

Subscribers: cfe-commits, thakis

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62470

llvm-svn: 362266
---
 clang/docs/InternalsManual.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst
index 7779310ee4979..e1b5bd7c78f24 100644
--- a/clang/docs/InternalsManual.rst
+++ b/clang/docs/InternalsManual.rst
@@ -423,6 +423,9 @@ Fix-it hints on errors and warnings need to obey these rules:
   driver, they should only be used when it's very likely they match the user's
   intent.
 * Clang must recover from errors as if the fix-it had been applied.
+* Fix-it hints on a warning must not change the meaning of the code.
+  However, a hint may clarify the meaning as intentional, for example by adding
+  parentheses when the precedence of operators isn't obvious.
 
 If a fix-it can't obey these rules, put the fix-it on a note.  Fix-its on notes
 are not applied automatically.

From 8b1f64f63d7793af2ed14bb19bade1221d10c3d8 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 31 May 2019 21:36:21 +0000
Subject: [PATCH 0787/1176] [Bugpoint] fix another use-after-move. NFC

Summary:
This was flagged in https://www.viva64.com/en/b/0629/ under "Snippet No.
7".

These statements are order independent, short of the use-after-move.

Reviewers: echristo, srhines, RKSimon

Reviewed By: RKSimon

Subscribers: dblaikie, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62114

llvm-svn: 362267
---
 llvm/tools/bugpoint/Miscompilation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/bugpoint/Miscompilation.cpp b/llvm/tools/bugpoint/Miscompilation.cpp
index 56fb015bdcc59..1621a51c91d6d 100644
--- a/llvm/tools/bugpoint/Miscompilation.cpp
+++ b/llvm/tools/bugpoint/Miscompilation.cpp
@@ -705,8 +705,8 @@ static Expected<bool> TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
   if (!Optimized) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
-    BD.setNewProgram(std::move(Test));
     BD.EmitProgressBitcode(*Test, "pass-error", false);
+    BD.setNewProgram(std::move(Test));
     if (Error E = BD.debugOptimizerCrash())
       return std::move(E);
     return false;

From e8ee5b93511e0e16a097beddc9abf176f36a2237 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Fri, 31 May 2019 22:15:29 +0000
Subject: [PATCH 0788/1176] [Commands] Stop hardcoding languages in
 CommandObjectType

llvm-svn: 362268
---
 lldb/source/Commands/CommandObjectType.cpp | 23 +++++++++-------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp
index 8619818cd12aa..650a8dd216ff6 100644
--- a/lldb/source/Commands/CommandObjectType.cpp
+++ b/lldb/source/Commands/CommandObjectType.cpp
@@ -2806,17 +2806,11 @@ class CommandObjectTypeLookup : public CommandObjectRaw {
       return m_cmd_help_long;
 
     StreamString stream;
-    // FIXME: hardcoding languages is not good
-    lldb::LanguageType languages[] = {eLanguageTypeObjC,
-                                      eLanguageTypeC_plus_plus};
-
-    for (const auto lang_type : languages) {
-      if (auto language = Language::FindPlugin(lang_type)) {
-        if (const char *help = language->GetLanguageSpecificTypeLookupHelp()) {
-          stream.Printf("%s\n", help);
-        }
-      }
-    }
+    Language::ForEach([&](Language *lang) {
+      if (const char *help = lang->GetLanguageSpecificTypeLookupHelp())
+        stream.Printf("%s\n", help);
+      return true;
+    });
 
     m_cmd_help_long = stream.GetString();
     return m_cmd_help_long;
@@ -2852,9 +2846,10 @@ class CommandObjectTypeLookup : public CommandObjectRaw {
 
     if ((is_global_search =
              (m_command_options.m_language == eLanguageTypeUnknown))) {
-      // FIXME: hardcoding languages is not good
-      languages.push_back(Language::FindPlugin(eLanguageTypeObjC));
-      languages.push_back(Language::FindPlugin(eLanguageTypeC_plus_plus));
+      Language::ForEach([&](Language *lang) {
+        languages.push_back(lang);
+        return true;
+      });
     } else {
       languages.push_back(Language::FindPlugin(m_command_options.m_language));
     }

From bc9e04d0c38844c75f736728be4d95dd24080714 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 31 May 2019 22:18:45 +0000
Subject: [PATCH 0789/1176] [SelectionDAG] Make the code in mutateStrictFPToFP
 less aware of how many operands each node has. NFCI

Just copy all of the operands except the chain and call MorphNode on that.
This removes the IsUnary and IsTernary flags.

Also always get the result type from the result type of the original
nodes. Previously we got it from the operand except for two nodes
where that didn't work.

llvm-svn: 362269
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 89 +++++++------------
 1 file changed, 34 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 367b480c2114e..7cb7e17d55a2b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7690,71 +7690,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   unsigned OrigOpc = Node->getOpcode();
   unsigned NewOpc;
-  bool IsUnary = false;
-  bool IsTernary = false;
   switch (OrigOpc) {
   default:
     llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
-  case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
-  case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
-  case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
-  case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
-  case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
-  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
-  case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
-  case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
-  case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
-  case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
-  case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
-  case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
-  case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
-  case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
-  case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
-  case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
-  case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
-  case ISD::STRICT_FNEARBYINT:
-    NewOpc = ISD::FNEARBYINT;
-    IsUnary = true;
-    break;
-  case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
-  case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
-  case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
-  case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
-  case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
-  case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
-  // STRICT_FP_ROUND takes an extra argument describing whether or not
-  // the value will be changed by this node. See ISDOpcodes.h for details.
-  case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
-  case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; IsUnary = true; break;
-  }
+  case ISD::STRICT_FADD:       NewOpc = ISD::FADD;       break;
+  case ISD::STRICT_FSUB:       NewOpc = ISD::FSUB;       break;
+  case ISD::STRICT_FMUL:       NewOpc = ISD::FMUL;       break;
+  case ISD::STRICT_FDIV:       NewOpc = ISD::FDIV;       break;
+  case ISD::STRICT_FREM:       NewOpc = ISD::FREM;       break;
+  case ISD::STRICT_FMA:        NewOpc = ISD::FMA;        break;
+  case ISD::STRICT_FSQRT:      NewOpc = ISD::FSQRT;      break;
+  case ISD::STRICT_FPOW:       NewOpc = ISD::FPOW;       break;
+  case ISD::STRICT_FPOWI:      NewOpc = ISD::FPOWI;      break;
+  case ISD::STRICT_FSIN:       NewOpc = ISD::FSIN;       break;
+  case ISD::STRICT_FCOS:       NewOpc = ISD::FCOS;       break;
+  case ISD::STRICT_FEXP:       NewOpc = ISD::FEXP;       break;
+  case ISD::STRICT_FEXP2:      NewOpc = ISD::FEXP2;      break;
+  case ISD::STRICT_FLOG:       NewOpc = ISD::FLOG;       break;
+  case ISD::STRICT_FLOG10:     NewOpc = ISD::FLOG10;     break;
+  case ISD::STRICT_FLOG2:      NewOpc = ISD::FLOG2;      break;
+  case ISD::STRICT_FRINT:      NewOpc = ISD::FRINT;      break;
+  case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
+  case ISD::STRICT_FMAXNUM:    NewOpc = ISD::FMAXNUM;    break;
+  case ISD::STRICT_FMINNUM:    NewOpc = ISD::FMINNUM;    break;
+  case ISD::STRICT_FCEIL:      NewOpc = ISD::FCEIL;      break;
+  case ISD::STRICT_FFLOOR:     NewOpc = ISD::FFLOOR;     break;
+  case ISD::STRICT_FROUND:     NewOpc = ISD::FROUND;     break;
+  case ISD::STRICT_FTRUNC:     NewOpc = ISD::FTRUNC;     break;
+  case ISD::STRICT_FP_ROUND:   NewOpc = ISD::FP_ROUND;   break;
+  case ISD::STRICT_FP_EXTEND:  NewOpc = ISD::FP_EXTEND;  break;
+  }
+
+  assert(Node->getNumValues() == 2 && "Unexpected number of results!");
 
   // We're taking this node out of the chain, so we need to re-link things.
   SDValue InputChain = Node->getOperand(0);
   SDValue OutputChain = SDValue(Node, 1);
   ReplaceAllUsesOfValueWith(OutputChain, InputChain);
 
-  SDVTList VTs;
-  SDNode *Res = nullptr;
-
-  switch (OrigOpc) {
-  default:
-    VTs = getVTList(Node->getOperand(1).getValueType());
-    break;
-  case ISD::STRICT_FP_ROUND:
-  case ISD::STRICT_FP_EXTEND:
-    VTs = getVTList(Node->getValueType(0));
-    break;
-  }
+  SmallVector<SDValue, 3> Ops;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(Node->getOperand(i));
 
-  if (IsUnary)
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
-  else if (IsTernary)
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
-                                           Node->getOperand(2),
-                                           Node->getOperand(3)});
-  else
-    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
-                                           Node->getOperand(2) });
+  SDVTList VTs = getVTList(Node->getValueType(0));
+  SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
 
   // MorphNodeTo can operate in two ways: if an existing node with the
   // specified operands exists, it can just return it.  Otherwise, it

From fa6bcd0b966ee67696d40256971d5325117875d5 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Fri, 31 May 2019 22:22:29 +0000
Subject: [PATCH 0790/1176] [Tests] Better represent the postinc form produced
 by LFTR in LoopPred tests

llvm-svn: 362270
---
 llvm/test/Transforms/LoopPredication/basic.ll | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index b9b2314ba7813..e71a8b113a832 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -1602,19 +1602,21 @@ define i32 @ne_latch_zext(i32* %array, i32 %length, i16 %n16) {
 ; CHECK-LABEL: @ne_latch_zext(
 ; CHECK-NEXT:  loop.preheader:
 ; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
-; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[NPLUS1]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
 ;
 loop.preheader:
   %n = zext i16 %n16 to i32
+  %nplus1 = add nsw nuw i32 %n, 1
   br label %loop
 
 loop:
@@ -1622,8 +1624,8 @@ loop:
   %within.bounds = icmp ult i32 %i, %length
   call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
 
-  %i.next = add nuw i32 %i, 1
-  %continue = icmp ne i32 %i.next, %n
+  %i.next = add nsw nuw i32 %i, 1
+  %continue = icmp ne i32 %i.next, %nplus1
   br i1 %continue, label %loop, label %exit
 
 exit:
@@ -1669,6 +1671,7 @@ define i32 @ne_latch_dom_check(i32* %array, i32 %length, i32 %n) {
 ; CHECK-LABEL: @ne_latch_dom_check(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
+; CHECK-NEXT:    [[NPLUS1:%.*]] = add nuw i32 [[N]], 1
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -1676,8 +1679,8 @@ define i32 @ne_latch_dom_check(i32* %array, i32 %length, i32 %n) {
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
-; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
-; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[NPLUS1]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
 ; CHECK-NEXT:    br label [[EXIT]]
@@ -1686,6 +1689,7 @@ define i32 @ne_latch_dom_check(i32* %array, i32 %length, i32 %n) {
 ;
 entry:
   %tmp5 = icmp sle i32 %n, 0
+  %nplus1 = add nuw i32 %n, 1
   br i1 %tmp5, label %exit, label %loop.preheader
 
 loop.preheader:
@@ -1696,8 +1700,8 @@ loop:
   %within.bounds = icmp ult i32 %i, %length
   call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
 
-  %i.next = add nuw i32 %i, 1
-  %continue = icmp ne i32 %i.next, %n
+  %i.next = add nsw nuw i32 %i, 1
+  %continue = icmp ne i32 %i.next, %nplus1
   br i1 %continue, label %loop, label %exit
 
 exit:

From 5234921119f95138e2a0802369b7b092ddb7cc4a Mon Sep 17 00:00:00 2001
From: Erik Pilkington <erik.pilkington@gmail.com>
Date: Fri, 31 May 2019 22:41:31 +0000
Subject: [PATCH 0791/1176] NFC: Pull out a function to reduce some duplication

Part of https://reviews.llvm.org/D62358

llvm-svn: 362271
---
 .../llvm/Transforms/Utils/BuildLibCalls.h     |  14 +-
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp   | 167 ++++++------------
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp |  22 ++-
 3 files changed, 82 insertions(+), 121 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index 0c97f7dbca41e..ba22c7a7f9875 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -70,12 +70,22 @@ namespace llvm {
   /// Emit a call to the strcpy function to the builder, for the specified
   /// pointer arguments.
   Value *emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                    const TargetLibraryInfo *TLI, StringRef Name = "strcpy");
+                    const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the stpcpy function to the builder, for the specified
+  /// pointer arguments.
+  Value *emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                    const TargetLibraryInfo *TLI);
 
   /// Emit a call to the strncpy function to the builder, for the specified
   /// pointer arguments and length.
   Value *emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                     const TargetLibraryInfo *TLI, StringRef Name = "strncpy");
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the stpncpy function to the builder, for the specified
+  /// pointer arguments and length.
+  Value *emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
 
   /// Emit a call to the __memcpy_chk function to the builder. This expects that
   /// the Len and ObjSize have type 'intptr_t' and Dst/Src are pointers.
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index fe6c602ecd427..a44a8946addb1 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -789,100 +789,76 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
   return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
 }
 
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
-                        const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strlen))
+static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
+                          ArrayRef<Type *> ParamTypes,
+                          ArrayRef<Value *> Operands, IRBuilder<> &B,
+                          const TargetLibraryInfo *TLI,
+                          bool IsVaArgs = false) {
+  if (!TLI->has(TheLibFunc))
     return nullptr;
 
   Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrlenName = TLI->getName(LibFunc_strlen);
-  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee StrLen = M->getOrInsertFunction(
-      StrlenName, DL.getIntPtrType(Context), B.getInt8PtrTy());
-  inferLibFuncAttributes(M, StrlenName, *TLI);
-  CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), StrlenName);
+  StringRef FuncName = TLI->getName(TheLibFunc);
+  FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
+  FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
+  inferLibFuncAttributes(M, FuncName, *TLI);
+  CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
   if (const Function *F =
-          dyn_cast<Function>(StrLen.getCallee()->stripPointerCasts()))
+          dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
-
   return CI;
 }
 
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
                         const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strchr))
-    return nullptr;
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
+                     B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrChrName = TLI->getName(LibFunc_strchr);
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
-  FunctionCallee StrChr =
-      M->getOrInsertFunction(StrChrName, I8Ptr, I8Ptr, I32Ty);
-  inferLibFuncAttributes(M, StrChrName, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, StrChrName);
-  if (const Function *F =
-          dyn_cast<Function>(StrChr.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty},
+                     {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
 }
 
 Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                          const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_strncmp))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef StrNCmpName = TLI->getName(LibFunc_strncmp);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee StrNCmp =
-      M->getOrInsertFunction(StrNCmpName, B.getInt32Ty(), B.getInt8PtrTy(),
-                             B.getInt8PtrTy(), DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, StrNCmpName, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, StrNCmpName);
-
-  if (const Function *F =
-          dyn_cast<Function>(StrNCmp.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
+  return emitLibCall(
+      LibFunc_strncmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
 Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                        const TargetLibraryInfo *TLI, StringRef Name) {
-  if (!TLI->has(LibFunc_strcpy))
-    return nullptr;
+                        const TargetLibraryInfo *TLI) {
+  Type *I8Ptr = B.getInt8PtrTy();
+  return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
+                     {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
-  FunctionCallee StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
-  inferLibFuncAttributes(M, Name, *TLI);
-  CallInst *CI =
-      B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
-  if (const Function *F =
-          dyn_cast<Function>(StrCpy.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
+                     {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
 }
 
 Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                         const TargetLibraryInfo *TLI, StringRef Name) {
-  if (!TLI->has(LibFunc_strncpy))
-    return nullptr;
+                         const TargetLibraryInfo *TLI) {
+  Type *I8Ptr = B.getInt8PtrTy();
+  return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+                     {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
 
-  Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
   Type *I8Ptr = B.getInt8PtrTy();
-  FunctionCallee StrNCpy =
-      M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, Len->getType());
-  inferLibFuncAttributes(M, Name, *TLI);
-  CallInst *CI = B.CreateCall(
-      StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, Name);
-  if (const Function *F =
-          dyn_cast<Function>(StrNCpy.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
+  return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+                     {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
 }
 
 Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -911,58 +887,29 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 
 Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc_memchr))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef MemChrName = TLI->getName(LibFunc_memchr);
-  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee MemChr =
-      M->getOrInsertFunction(MemChrName, B.getInt8PtrTy(), B.getInt8PtrTy(),
-                             B.getInt32Ty(), DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, MemChrName, *TLI);
-  CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, MemChrName);
-
-  if (const Function *F =
-          dyn_cast<Function>(MemChr.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-// Common code for memcmp() and bcmp(), which have the exact same properties,
-// just a slight difference in semantics.
-static Value *emitMemCmpOrBcmp(llvm::LibFunc libfunc, Value *Ptr1, Value *Ptr2,
-                               Value *Len, IRBuilder<> &B, const DataLayout &DL,
-                               const TargetLibraryInfo *TLI) {
-  if (!TLI->has(libfunc))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getModule();
-  StringRef CmpFnName = TLI->getName(libfunc);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
-  FunctionCallee CmpFn =
-      M->getOrInsertFunction(CmpFnName, B.getInt32Ty(), B.getInt8PtrTy(),
-                             B.getInt8PtrTy(), DL.getIntPtrType(Context));
-  inferLibFuncAttributes(M, CmpFnName, *TLI);
-  CallInst *CI = B.CreateCall(
-      CmpFn, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, CmpFnName);
-
-  if (const Function *F =
-          dyn_cast<Function>(CmpFn.getCallee()->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
+  return emitLibCall(
+      LibFunc_memchr, B.getInt8PtrTy(),
+      {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr, B), Val, Len}, B, TLI);
 }
 
 Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  return emitMemCmpOrBcmp(LibFunc_memcmp, Ptr1, Ptr2, Len, B, DL, TLI);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(
+      LibFunc_memcmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
 Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                       const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  return emitMemCmpOrBcmp(LibFunc_bcmp, Ptr1, Ptr2, Len, B, DL, TLI);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  return emitLibCall(
+      LibFunc_bcmp, B.getInt32Ty(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+      {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
 /// Append a suffix to the function name according to the type of 'Op'.
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cc0b6e0f46984..f5e3d76faa043 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2887,8 +2887,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
                                                       IRBuilder<> &B,
                                                       LibFunc Func) {
-  Function *Callee = CI->getCalledFunction();
-  StringRef Name = Callee->getName();
   const DataLayout &DL = CI->getModule()->getDataLayout();
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
         *ObjSize = CI->getArgOperand(2);
@@ -2904,8 +2902,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true))
-    return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+  if (isFortifiedCallFoldable(CI, 2, 1, true)) {
+    if (Func == LibFunc_strcpy_chk)
+      return emitStrCpy(Dst, Src, B, TLI);
+    else
+      return emitStpCpy(Dst, Src, B, TLI);
+  }
 
   if (OnlyLowerUnknownSize)
     return nullptr;
@@ -2928,13 +2930,15 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
                                                        IRBuilder<> &B,
                                                        LibFunc Func) {
-  Function *Callee = CI->getCalledFunction();
-  StringRef Name = Callee->getName();
   if (isFortifiedCallFoldable(CI, 3, 2, false)) {
-    Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                             CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
-    return Ret;
+    if (Func == LibFunc_strncpy_chk)
+      return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), B, TLI);
+    else
+      return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), B, TLI);
   }
+
   return nullptr;
 }
 

From abb2a93c5327ad21d057164539ee06d27f733b71 Mon Sep 17 00:00:00 2001
From: Erik Pilkington <erik.pilkington@gmail.com>
Date: Fri, 31 May 2019 22:41:36 +0000
Subject: [PATCH 0792/1176] [SimplifyLibCalls] Fold more fortified functions
 into non-fortified variants

When the object size argument is -1, no checking can be done, so calling the
_chk variant is unnecessary. We already did this for a bunch of these
functions.

rdar://50797197

Differential revision: https://reviews.llvm.org/D62358

llvm-svn: 362272
---
 .../llvm/Analysis/TargetLibraryInfo.def       |  40 ++++
 .../llvm/Transforms/Utils/BuildLibCalls.h     |  37 ++++
 .../llvm/Transforms/Utils/SimplifyLibCalls.h  |  31 ++-
 llvm/lib/Analysis/TargetLibraryInfo.cpp       |  53 +++++
 llvm/lib/Transforms/Utils/BuildLibCalls.cpp   |  71 ++++++
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 147 +++++++++++--
 .../Transforms/InstCombine/fortify-folding.ll | 207 ++++++++++++++++++
 .../Analysis/TargetLibraryInfoTest.cpp        |  11 +
 8 files changed, 580 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/fortify-folding.ll

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 2e92d1de0316b..bbccca4ca664d 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -329,6 +329,10 @@ TLI_DEFINE_STRING_INTERNAL("__logf_finite")
 /// long double __logl_finite(long double x);
 TLI_DEFINE_ENUM_INTERNAL(logl_finite)
 TLI_DEFINE_STRING_INTERNAL("__logl_finite")
+/// void *__memccpy_chk(void *dst, const void *src, int c, size_t n,
+///                     size_t dstsize)
+TLI_DEFINE_ENUM_INTERNAL(memccpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__memccpy_chk")
 /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(memcpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
@@ -381,6 +385,14 @@ TLI_DEFINE_STRING_INTERNAL("__small_printf")
 /// int __small_sprintf(char *str, const char *format, ...);
 TLI_DEFINE_ENUM_INTERNAL(small_sprintf)
 TLI_DEFINE_STRING_INTERNAL("__small_sprintf")
+/// int __snprintf_chk(char *s, size_t n, int flags, size_t slen,
+///                    const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(snprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__snprintf_chk")
+/// int __sprintf_chk(char *str, int flags, size_t str_len,
+///                   const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(sprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__sprintf_chk")
 /// double __sqrt_finite(double x);
 TLI_DEFINE_ENUM_INTERNAL(sqrt_finite)
 TLI_DEFINE_STRING_INTERNAL("__sqrt_finite")
@@ -396,12 +408,26 @@ TLI_DEFINE_STRING_INTERNAL("__stpcpy_chk")
 /// char *__stpncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(stpncpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__stpncpy_chk")
+/// char *__strcat_chk(char *s1, const char *s2, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strcat_chk")
 /// char *__strcpy_chk(char *s1, const char *s2, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(strcpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__strcpy_chk")
 /// char * __strdup(const char *s);
 TLI_DEFINE_ENUM_INTERNAL(dunder_strdup)
 TLI_DEFINE_STRING_INTERNAL("__strdup")
+/// size_t __strlcat_chk(char *dst, const char *src, size_t size,
+///                      size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcat_chk")
+/// size_t __strlcpy_chk(char *dst, const char *src, size_t size,
+///                      size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcpy_chk")
+/// char *strncat_chk(char *s1, const char *s2, size_t n, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strncat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strncat_chk")
 /// char *__strncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
 TLI_DEFINE_ENUM_INTERNAL(strncpy_chk)
 TLI_DEFINE_STRING_INTERNAL("__strncpy_chk")
@@ -411,6 +437,14 @@ TLI_DEFINE_STRING_INTERNAL("__strndup")
 /// char * __strtok_r(char *s, const char *delim, char **save_ptr);
 TLI_DEFINE_ENUM_INTERNAL(dunder_strtok_r)
 TLI_DEFINE_STRING_INTERNAL("__strtok_r")
+/// int __vsnprintf_chk(char *s, size_t n, int flags, size_t slen,
+///                     const char *format, va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsnprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsnprintf_chk")
+/// int __vsprintf_chk(char *s, int flags, size_t slen, const char *format,
+///                    va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsprintf_chk")
 /// int abs(int j);
 TLI_DEFINE_ENUM_INTERNAL(abs)
 TLI_DEFINE_STRING_INTERNAL("abs")
@@ -1200,6 +1234,12 @@ TLI_DEFINE_STRING_INTERNAL("strcspn")
 /// char *strdup(const char *s1);
 TLI_DEFINE_ENUM_INTERNAL(strdup)
 TLI_DEFINE_STRING_INTERNAL("strdup")
+/// size_t strlcat(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcat)
+TLI_DEFINE_STRING_INTERNAL("strlcat")
+/// size_t strlcpy(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy)
+TLI_DEFINE_STRING_INTERNAL("strlcpy")
 /// size_t strlen(const char *s);
 TLI_DEFINE_ENUM_INTERNAL(strlen)
 TLI_DEFINE_STRING_INTERNAL("strlen")
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index ba22c7a7f9875..8421c31a36da3 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -106,6 +106,43 @@ namespace llvm {
   Value *emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
                   const DataLayout &DL, const TargetLibraryInfo *TLI);
 
+  /// Emit a call to the memccpy function.
+  Value *emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+                     IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the snprintf function.
+  Value *emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+                      ArrayRef<Value *> Args, IRBuilder<> &B,
+                      const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the sprintf function.
+  Value *emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs,
+                     IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strcat function.
+  Value *emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+                    const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strlcpy function.
+  Value *emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strlcat function.
+  Value *emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the strncat function.
+  Value *emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                     const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the vsnprintf function.
+  Value *emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+                       IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+  /// Emit a call to the vsprintf function.
+  Value *emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilder<> &B,
+                      const TargetLibraryInfo *TLI);
+
   /// Emit a call to the unary function named 'Name' (e.g.  'floor'). This
   /// function is known to take a single of type matching 'Op' and returns one
   /// value with the same type. If 'Op' is a long double, 'l' is added as the
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index e77dd43558ae1..2572094ddac82 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -57,14 +57,41 @@ class FortifiedLibCallSimplifier {
   Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B);
   Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B);
 
-  // Str/Stp cpy are similar enough to be handled in the same functions.
+  /// Str/Stp cpy are similar enough to be handled in the same functions.
   Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
   Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
+  Value *optimizeMemCCpyChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeSPrintfChk(CallInst *CI,IRBuilder<> &B);
+  Value *optimizeStrCatChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrLCat(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrNCatChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeStrLCpyChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeVSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeVSPrintfChk(CallInst *CI, IRBuilder<> &B);
 
   /// Checks whether the call \p CI to a fortified libcall is foldable
   /// to the non-fortified version.
+  ///
+  /// \param CI the call to the fortified libcall.
+  ///
+  /// \param ObjSizeOp the index of the object size parameter of this chk
+  /// function. Not optional since this is mandatory.
+  ///
+  /// \param SizeOp optionally set to the parameter index of an explicit buffer
+  /// size argument. For instance, set to '2' for __strncpy_chk.
+  ///
+  /// \param StrOp optionally set to the parameter index of the source string
+  /// parameter to strcpy-like functions, where only the strlen of the source
+  /// will be writtin into the destination.
+  ///
+  /// \param FlagsOp optionally set to the parameter index of a 'flags'
+  /// parameter. These are used by an implementation to opt-into stricter
+  /// checking.
   bool isFortifiedCallFoldable(CallInst *CI, unsigned ObjSizeOp,
-                               unsigned SizeOp, bool isString);
+                               Optional<unsigned> SizeOp = None,
+                               Optional<unsigned> StrOp = None,
+                               Optional<unsigned> FlagsOp = None);
 };
 
 /// LibCallSimplifier - This class implements a collection of optimizations
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index f154c6884ceed..e5cb2544f2d55 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -691,11 +691,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     return ((NumParams == 2 || NumParams == 3) &&
             FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
+  case LibFunc_strcat_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_strcat:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
             FTy.getParamType(1) == FTy.getReturnType());
 
+  case LibFunc_strncat_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_strncat:
     return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
             FTy.getParamType(0) == FTy.getReturnType() &&
@@ -714,6 +724,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
             FTy.getParamType(0) == FTy.getParamType(1) &&
             FTy.getParamType(0) == PCharTy);
 
+  case LibFunc_strlcat_chk:
+  case LibFunc_strlcpy_chk:
+    --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
+  case LibFunc_strlcat:
+  case LibFunc_strlcpy:
+    return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+           FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isPointerTy() &&
+           IsSizeTTy(FTy.getParamType(2));
+
   case LibFunc_strncpy_chk:
   case LibFunc_stpncpy_chk:
     --NumParams;
@@ -784,10 +807,27 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy(32));
+
+  case LibFunc_sprintf_chk:
+    return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(2)) &&
+           FTy.getParamType(3)->isPointerTy() &&
+           FTy.getReturnType()->isIntegerTy(32);
+
   case LibFunc_snprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy() &&
             FTy.getReturnType()->isIntegerTy(32));
+
+  case LibFunc_snprintf_chk:
+    return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+           IsSizeTTy(FTy.getParamType(1)) &&
+           FTy.getParamType(2)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(3)) &&
+           FTy.getParamType(4)->isPointerTy() &&
+           FTy.getReturnType()->isIntegerTy(32);
+
   case LibFunc_setitimer:
     return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
@@ -836,6 +876,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
             FTy.getParamType(1)->isIntegerTy() &&
             IsSizeTTy(FTy.getParamType(2)));
 
+  case LibFunc_memccpy_chk:
+      --NumParams;
+    if (!IsSizeTTy(FTy.getParamType(NumParams)))
+      return false;
+    LLVM_FALLTHROUGH;
   case LibFunc_memccpy:
     return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
   case LibFunc_memalign:
@@ -1004,9 +1049,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc_vsprintf:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
+  case LibFunc_vsprintf_chk:
+    return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(1)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
   case LibFunc_vsnprintf:
     return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(2)->isPointerTy());
+  case LibFunc_vsnprintf_chk:
+    return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
+           FTy.getParamType(2)->isIntegerTy(32) &&
+           IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
   case LibFunc_open:
     return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
   case LibFunc_opendir:
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index a44a8946addb1..d486d91599854 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -912,6 +912,77 @@ Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
       {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
 }
 
+Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+                         IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(
+      LibFunc_memccpy, B.getInt8PtrTy(),
+      {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
+      {Ptr1, Ptr2, Val, Len}, B, TLI);
+}
+
+Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+                          ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+                          const TargetLibraryInfo *TLI) {
+  SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+  Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+  return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
+                     Args, B, TLI, /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
+                         ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+  Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+  return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
+                     /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+                        const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy()},
+                     {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strlcpy, Size->getType(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strlcat, Size->getType(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+                         const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+                     {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+                           IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(
+      LibFunc_vsnprintf, B.getInt32Ty(),
+      {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
+      {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
+                          IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+  return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
+                     {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
+                     {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
 /// Append a suffix to the function name according to the type of 'Op'.
 static void appendTypeSuffix(Value *Op, StringRef &Name,
                              SmallString<20> &NameBuffer) {
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index f5e3d76faa043..0c95f4ce4857c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2825,12 +2825,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) {
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
-bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
-                                                         unsigned ObjSizeOp,
-                                                         unsigned SizeOp,
-                                                         bool isString) {
-  if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+bool
+FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+                                                    unsigned ObjSizeOp,
+                                                    Optional<unsigned> SizeOp,
+                                                    Optional<unsigned> StrOp,
+                                                    Optional<unsigned> FlagOp) {
+  // If this function takes a flag argument, the implementation may use it to
+  // perform extra checks. Don't fold into the non-checking variant.
+  if (FlagOp) {
+    ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
+    if (!Flag || !Flag->isZero())
+      return false;
+  }
+
+  if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
     return true;
+
   if (ConstantInt *ObjSizeCI =
           dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
     if (ObjSizeCI->isMinusOne())
@@ -2838,23 +2849,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
     // If the object size wasn't -1 (unknown), bail out if we were asked to.
     if (OnlyLowerUnknownSize)
       return false;
-    if (isString) {
-      uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+    if (StrOp) {
+      uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
       // If the length is 0 we don't know how long it is and so we can't
       // remove the check.
       if (Len == 0)
         return false;
       return ObjSizeCI->getZExtValue() >= Len;
     }
-    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
-      return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+
+    if (SizeOp) {
+      if (ConstantInt *SizeCI =
+              dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
+        return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+    }
   }
   return false;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
                                                      IRBuilder<> &B) {
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
                    CI->getArgOperand(2));
     return CI->getArgOperand(0);
@@ -2864,7 +2879,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
 
 Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
                                                       IRBuilder<> &B) {
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
                     CI->getArgOperand(2));
     return CI->getArgOperand(0);
@@ -2876,7 +2891,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
                                                      IRBuilder<> &B) {
   // TODO: Try foldMallocMemset() here.
 
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
     B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
     return CI->getArgOperand(0);
@@ -2902,7 +2917,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true)) {
+  if (isFortifiedCallFoldable(CI, 2, None, 1)) {
     if (Func == LibFunc_strcpy_chk)
       return emitStrCpy(Dst, Src, B, TLI);
     else
@@ -2930,10 +2945,10 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
                                                        IRBuilder<> &B,
                                                        LibFunc Func) {
-  if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+  if (isFortifiedCallFoldable(CI, 3, 2)) {
     if (Func == LibFunc_strncpy_chk)
       return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                         CI->getArgOperand(2), B, TLI);
+                               CI->getArgOperand(2), B, TLI);
     else
       return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
                          CI->getArgOperand(2), B, TLI);
@@ -2942,6 +2957,90 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
   return nullptr;
 }
 
+Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 4, 3))
+    return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
+                                                       IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
+    SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+    return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                        CI->getArgOperand(4), VariadicArgs, B, TLI);
+  }
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
+    SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+    return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
+                       B, TLI);
+  }
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
+                                                     IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2))
+    return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
+                                                   IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
+                                                      IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3))
+    return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
+                                                        IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
+    return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+
+  return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
+                                                       IRBuilder<> &B) {
+  if (isFortifiedCallFoldable(CI, 2, None, None, 1))
+    return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+                        CI->getArgOperand(4), B, TLI);
+
+  return nullptr;
+}
+
 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
   // Some clang users checked for _chk libcall availability using:
@@ -2986,6 +3085,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   case LibFunc_stpncpy_chk:
   case LibFunc_strncpy_chk:
     return optimizeStrpNCpyChk(CI, Builder, Func);
+  case LibFunc_memccpy_chk:
+    return optimizeMemCCpyChk(CI, Builder);
+  case LibFunc_snprintf_chk:
+    return optimizeSNPrintfChk(CI, Builder);
+  case LibFunc_sprintf_chk:
+    return optimizeSPrintfChk(CI, Builder);
+  case LibFunc_strcat_chk:
+    return optimizeStrCatChk(CI, Builder);
+  case LibFunc_strlcat_chk:
+    return optimizeStrLCat(CI, Builder);
+  case LibFunc_strncat_chk:
+    return optimizeStrNCatChk(CI, Builder);
+  case LibFunc_strlcpy_chk:
+    return optimizeStrLCpyChk(CI, Builder);
+  case LibFunc_vsnprintf_chk:
+    return optimizeVSNPrintfChk(CI, Builder);
+  case LibFunc_vsprintf_chk:
+    return optimizeVSPrintfChk(CI, Builder);
   default:
     break;
   }
diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll
new file mode 100644
index 0000000000000..68be219ed1ad9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll
@@ -0,0 +1,207 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s --dump-input-on-failure
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i8* @test_memccpy() {
+  ; CHECK-LABEL: define i8* @test_memccpy
+  ; CHECK-NEXT: call i8* @memccpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60)
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
+  ret i8* %ret
+}
+
+define i8* @test_not_memccpy() {
+  ; CHECK-LABEL: define i8* @test_not_memccpy
+  ; CHECK-NEXT: call i8* @__memccpy_chk
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 59)
+  ret i8* %ret
+}
+
+define i32 @test_snprintf() {
+  ; CHECK-LABEL: define i32 @test_snprintf
+  ; CHECK-NEXT: call i32 (i8*, i64, i8*, ...) @snprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+  ; CHECK-NEXT: ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
+define i32 @test_not_snprintf() {
+  ; CHECK-LABEL: define i32 @test_not_snprintf
+  ; CHECK-NEXT: call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk
+  ; CHECK-NEXT: call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk
+  ; CHECK-NEXT: ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 59, i8* %fmt)
+  %ign = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 1, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
+define i32 @test_sprintf() {
+  ; CHECK-LABEL: define i32 @test_sprintf
+  ; CHECK-NEXT: call i32 (i8*, i8*, ...) @sprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+  ; CHECK-NEXT: ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
+define i32 @test_not_sprintf() {
+  ; CHECK-LABEL: define i32 @test_not_sprintf
+  ; CHECK-NEXT: call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk
+  ; CHECK-NEXT: call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk
+  ; CHECK-NEXT: ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 59, i8* %fmt)
+  %ignored = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 1, i64 -1, i8* %fmt)
+  ret i32 %ret
+}
+
+define i8* @test_strcat() {
+  ; CHECK-LABEL: define i8* @test_strcat
+  ; CHECK-NEXT: call i8* @strcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1)
+  ret i8* %ret
+}
+
+define i8* @test_not_strcat() {
+  ; CHECK-LABEL: define i8* @test_not_strcat
+  ; CHECK-NEXT: call i8* @__strcat_chk
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__strcat_chk(i8* %dst, i8* %src, i64 0)
+  ret i8* %ret
+}
+
+define i64 @test_strlcat() {
+  ; CHECK-LABEL: define i64 @test_strlcat
+  ; CHECK-NEXT: call i64 @strlcat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+  ; CHECK-NEXT: ret i64
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
+define i64 @test_not_strlcat() {
+  ; CHECK-LABEL: define i64 @test_not_strlcat
+  ; CHECK-NEXT: call i64 @__strlcat_chk
+  ; CHECK-NEXT: ret i64
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 0)
+  ret i64 %ret
+}
+
+define i8* @test_strncat() {
+  ; CHECK-LABEL: define i8* @test_strncat
+  ; CHECK-NEXT: call i8* @strncat(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i8* %ret
+}
+
+define i8* @test_not_strncat() {
+  ; CHECK-LABEL: define i8* @test_not_strncat
+  ; CHECK-NEXT: call i8* @__strncat_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22, i64 3)
+  ; CHECK-NEXT: ret i8*
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 3)
+  ret i8* %ret
+}
+
+define i64 @test_strlcpy() {
+  ; CHECK-LABEL: define i64 @test_strlcpy
+  ; CHECK-NEXT: call i64 @strlcpy(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+  ; CHECK-NEXT: ret i64
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  ret i64 %ret
+}
+
+define i64 @test_not_strlcpy() {
+  ; CHECK-LABEL: define i64 @test_not_strlcpy
+  ; CHECK-NEXT: call i64 @__strlcpy_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22, i64 2)
+  ; CHECK-NEXT: ret i64
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 2)
+  ret i64 %ret
+}
+
+define i32 @test_vsnprintf() {
+  ; CHECK-LABEL: define i32 @test_vsnprintf
+  ; CHECK-NEXT: call i32 @vsnprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
+define i32 @test_not_vsnprintf() {
+  ; CHECK-LABEL: define i32 @test_not_vsnprintf
+  ; CHECK-NEXT: call i32 @__vsnprintf_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i32 0, i64 3, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; CHECK-NEXT: call i32 @__vsnprintf_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i32 1, i64 -1, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 3, i8* %src, %struct.__va_list_tag* null)
+  %ign = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 1, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
+define i32 @test_vsprintf() {
+  ; CHECK-LABEL: define i32 @test_vsprintf
+  ; CHECK-NEXT: call i32 @vsprintf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
+define i32 @test_not_vsprintf() {
+  ; CHECK-LABEL: define i32 @test_not_vsprintf
+  ; CHECK-NEXT: call i32 @__vsprintf_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 0, i64 3, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; CHECK-NEXT: call i32 @__vsprintf_chk(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 1, i64 -1, i8* getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+  ; ret i32
+  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
+  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
+  %ret = call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 3, i8* %src, %struct.__va_list_tag* null)
+  %ign = call i32 @__vsprintf_chk(i8* %dst, i32 1, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  ret i32 %ret
+}
+
+declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
+declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)
+declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
+declare i8* @__strcat_chk(i8*, i8*, i64)
+declare i64 @__strlcat_chk(i8*, i8*, i64, i64)
+declare i8* @__strncat_chk(i8*, i8*, i64, i64)
+declare i64 @__strlcpy_chk(i8*, i8*, i64, i64)
+declare i32 @__vsnprintf_chk(i8*, i64, i32, i64, i8*, %struct.__va_list_tag*)
+declare i32 @__vsprintf_chk(i8*, i32, i64, i8*, %struct.__va_list_tag*)
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 43776f607a1d2..00b1e94863b76 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -313,6 +313,8 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare i8* @strtok(i8*, i8*)\n"
       "declare i8* @strtok_r(i8*, i8*, i8**)\n"
       "declare i64 @strtol(i8*, i8**, i32)\n"
+      "declare i64 @strlcat(i8*, i8**, i64)\n"
+      "declare i64 @strlcpy(i8*, i8**, i64)\n"
       "declare x86_fp80 @strtold(i8*, i8**)\n"
       "declare i64 @strtoll(i8*, i8**, i32)\n"
       "declare i64 @strtoul(i8*, i8**, i32)\n"
@@ -467,6 +469,15 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare i8* @__stpncpy_chk(i8*, i8*, i64, i64)\n"
       "declare i8* @__strcpy_chk(i8*, i8*, i64)\n"
       "declare i8* @__strncpy_chk(i8*, i8*, i64, i64)\n"
+      "declare i8* @__memccpy_chk(i8*, i8*, i32, i64)\n"
+      "declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)\n"
+      "declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)\n"
+      "declare i8* @__strcat_chk(i8*, i8*, i64)\n"
+      "declare i64 @__strlcat_chk(i8*, i8*, i64, i64)\n"
+      "declare i8* @__strncat_chk(i8*, i8*, i64, i64)\n"
+      "declare i64 @__strlcpy_chk(i8*, i8*, i64, i64)\n"
+      "declare i32 @__vsnprintf_chk(i8*, i64, i32, i64, i8*, %struct*)\n"
+      "declare i32 @__vsprintf_chk(i8*, i32, i64, i8*, %struct*)\n"
 
       "declare i8* @memalign(i64, i64)\n"
       "declare i8* @mempcpy(i8*, i8*, i64)\n"

From 302eedcbfae15e2d1606802a818358d5b7c219ca Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 31 May 2019 22:47:36 +0000
Subject: [PATCH 0793/1176] AMDGPU: Fix not adding ImplicitBufferPtr as a
 live-in

Fixes missing test from r293000.

llvm-svn: 362275
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp |  5 ++++-
 llvm/test/CodeGen/AMDGPU/mesa3d.ll         | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/mesa3d.ll

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index e333154f83bfd..4b2124b14c054 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -419,7 +419,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
       }
     }
     MF.getRegInfo().addLiveIn(GitPtrLo);
-    MF.front().addLiveIn(GitPtrLo);
+    MBB.addLiveIn(GitPtrLo);
     BuildMI(MBB, I, DL, SMovB32, RsrcLo)
       .addReg(GitPtrLo)
       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
@@ -487,6 +487,9 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
           .addImm(0) // dlc
           .addMemOperand(MMO)
           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+        MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
+        MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
       }
     } else {
       unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
diff --git a/llvm/test/CodeGen/AMDGPU/mesa3d.ll b/llvm/test/CodeGen/AMDGPU/mesa3d.ll
new file mode 100644
index 0000000000000..4f09b3f748045
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mesa3d.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}scratch_ps:
+; GCN: s_load_dwordx2 s[4:5], s[0:1], 0x0{{$}}
+; GCN-DAG: s_mov_b32 s6, -1{{$}}
+; GCN-DAG: s_mov_b32 s7, 0xe8f000
+; GCN-DAG: v_mov_b32_e32 [[V:v[0-9]+]], 2
+; GCN: buffer_store_dword [[V]], off, s[4:7], s2 offset:4
+define amdgpu_ps void @scratch_ps(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %alloca = alloca i32, addrspace(5)
+  store volatile i32 2, i32 addrspace(5)* %alloca
+  ret void
+}

From 7d4ec5af6c27c2c7225afbfd7b6460930a384ac6 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Fri, 31 May 2019 22:51:59 +0000
Subject: [PATCH 0794/1176] [WebAssembly] Don't export __data_end and
 __heap_base by default.

These can still be exported via --export if needed.

Differential Revision: https://reviews.llvm.org/D62744

llvm-svn: 362276
---
 lld/test/wasm/alias.ll               | 18 ------------------
 lld/test/wasm/archive-export.ll      | 12 ------------
 lld/test/wasm/call-indirect.ll       | 20 +-------------------
 lld/test/wasm/comdats.ll             | 20 +-------------------
 lld/test/wasm/cxx-mangling.ll        |  6 ------
 lld/test/wasm/data-layout.ll         | 10 ++++++++--
 lld/test/wasm/entry.ll               | 12 ------------
 lld/test/wasm/export.ll              |  6 ------
 lld/test/wasm/local-symbols.ll       | 20 +-------------------
 lld/test/wasm/locals-duplicate.test  | 24 +++---------------------
 lld/test/wasm/lto/export.ll          |  6 ------
 lld/test/wasm/stack-first.test       | 11 ++++++-----
 lld/test/wasm/undefined-weak-call.ll | 18 ------------------
 lld/test/wasm/visibility-hidden.ll   | 12 ------------
 lld/test/wasm/weak-alias-overide.ll  | 18 ------------------
 lld/test/wasm/weak-alias.ll          | 18 ------------------
 lld/test/wasm/weak-symbols.ll        | 20 +-------------------
 lld/test/wasm/weak-undefined.ll      | 18 ------------------
 lld/wasm/Config.h                    |  1 +
 lld/wasm/Driver.cpp                  | 22 ++++++++++------------
 lld/wasm/SymbolTable.cpp             |  5 ++++-
 lld/wasm/SymbolTable.h               |  4 ++--
 lld/wasm/Writer.cpp                  |  9 ++++-----
 23 files changed, 42 insertions(+), 268 deletions(-)

diff --git a/lld/test/wasm/alias.ll b/lld/test/wasm/alias.ll
index a7268ad4397af..e20e648731cb9 100644
--- a/lld/test/wasm/alias.ll
+++ b/lld/test/wasm/alias.ll
@@ -41,29 +41,11 @@ entry:
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           1
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _start
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/archive-export.ll b/lld/test/wasm/archive-export.ll
index 750b999866c04..664f7761e61fa 100644
--- a/lld/test/wasm/archive-export.ll
+++ b/lld/test/wasm/archive-export.ll
@@ -14,12 +14,6 @@ CHECK:         Exports:
 CHECK-NEXT:       - Name:            memory
 CHECK-NEXT:         Kind:            MEMORY
 CHECK-NEXT:         Index:           0
-CHECK-NEXT:       - Name:            __heap_base
-CHECK-NEXT:         Kind:            GLOBAL
-CHECK-NEXT:         Index:           1
-CHECK-NEXT:       - Name:            __data_end
-CHECK-NEXT:         Kind:            GLOBAL
-CHECK-NEXT:         Index:           2
 CHECK-NEXT:       - Name:            foo
 CHECK-NEXT:         Kind:            FUNCTION
 CHECK-NEXT:         Index:           1
@@ -38,12 +32,6 @@ NOEXPORT:         Exports:
 NOEXPORT-NEXT:       - Name:            memory
 NOEXPORT-NEXT:         Kind:            MEMORY
 NOEXPORT-NEXT:         Index:           0
-NOEXPORT-NEXT:       - Name:            __heap_base
-NOEXPORT-NEXT:         Kind:            GLOBAL
-NOEXPORT-NEXT:         Index:           1
-NOEXPORT-NEXT:       - Name:            __data_end
-NOEXPORT-NEXT:         Kind:            GLOBAL
-NOEXPORT-NEXT:         Index:           2
 NOEXPORT-NEXT:       - Name:            _start
 NOEXPORT-NEXT:         Kind:            FUNCTION
 NOEXPORT-NEXT:         Index:           0
diff --git a/lld/test/wasm/call-indirect.ll b/lld/test/wasm/call-indirect.ll
index bbc6c0bcaa5e4..b0bbc4aeb44ca 100644
--- a/lld/test/wasm/call-indirect.ll
+++ b/lld/test/wasm/call-indirect.ll
@@ -81,30 +81,12 @@ define void @call_ptr(i64 (i64)* %arg) {
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66576
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1036
-; CHECK-NEXT:       - Index:           3
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           1032
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            bar
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
@@ -119,7 +101,7 @@ define void @call_ptr(i64 (i64)* %arg) {
 ; CHECK-NEXT:         Index:           3
 ; CHECK-NEXT:       - Name:            indirect_func
 ; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           3
+; CHECK-NEXT:         Index:           1
 ; CHECK-NEXT:       - Name:            call_ptr
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           4
diff --git a/lld/test/wasm/comdats.ll b/lld/test/wasm/comdats.ll
index 1dfedbb7a66ee..8528238b9bd72 100644
--- a/lld/test/wasm/comdats.ll
+++ b/lld/test/wasm/comdats.ll
@@ -27,30 +27,12 @@ entry:
 ; CHECK-NEXT:        Mutable:         false
 ; CHECK-NEXT:        InitExpr:
 ; CHECK-NEXT:          Opcode:          I32_CONST
-; CHECK-NEXT:          Value:           66576
-; CHECK-NEXT:      - Index:           2
-; CHECK-NEXT:        Type:            I32
-; CHECK-NEXT:        Mutable:         false
-; CHECK-NEXT:        InitExpr:
-; CHECK-NEXT:          Opcode:          I32_CONST
-; CHECK-NEXT:          Value:           1027
-; CHECK-NEXT:      - Index:           3
-; CHECK-NEXT:        Type:            I32
-; CHECK-NEXT:        Mutable:         false
-; CHECK-NEXT:        InitExpr:
-; CHECK-NEXT:          Opcode:          I32_CONST
 ; CHECK-NEXT:          Value:           1024
 ; CHECK-NEXT:  - Type:            EXPORT
 ; CHECK-NEXT:    Exports:
 ; CHECK-NEXT:      - Name:            memory
 ; CHECK-NEXT:        Kind:            MEMORY
 ; CHECK-NEXT:        Index:           0
-; CHECK-NEXT:      - Name:            __heap_base
-; CHECK-NEXT:        Kind:            GLOBAL
-; CHECK-NEXT:        Index:           1
-; CHECK-NEXT:      - Name:            __data_end
-; CHECK-NEXT:        Kind:            GLOBAL
-; CHECK-NEXT:        Index:           2
 ; CHECK-NEXT:      - Name:            _start
 ; CHECK-NEXT:        Kind:            FUNCTION
 ; CHECK-NEXT:        Index:           0
@@ -59,7 +41,7 @@ entry:
 ; CHECK-NEXT:        Index:           1
 ; CHECK-NEXT:      - Name:            constantData
 ; CHECK-NEXT:        Kind:            GLOBAL
-; CHECK-NEXT:        Index:           3
+; CHECK-NEXT:        Index:           1
 ; CHECK-NEXT:      - Name:            callInline1
 ; CHECK-NEXT:        Kind:            FUNCTION
 ; CHECK-NEXT:        Index:           2
diff --git a/lld/test/wasm/cxx-mangling.ll b/lld/test/wasm/cxx-mangling.ll
index 854383b43726b..415ad2f925243 100644
--- a/lld/test/wasm/cxx-mangling.ll
+++ b/lld/test/wasm/cxx-mangling.ll
@@ -26,12 +26,6 @@ define void @_start() {
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _Z3fooi
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           1
diff --git a/lld/test/wasm/data-layout.ll b/lld/test/wasm/data-layout.ll
index 4feaf8c0e4cea..20fe30445cb7d 100644
--- a/lld/test/wasm/data-layout.ll
+++ b/lld/test/wasm/data-layout.ll
@@ -13,7 +13,7 @@ target triple = "wasm32-unknown-unknown"
 @local_struct = hidden global %struct.s zeroinitializer, align 4
 @local_struct_internal_ptr = hidden local_unnamed_addr global i32* getelementptr inbounds (%struct.s, %struct.s* @local_struct, i32 0, i32 1), align 4
 
-; RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry -o %t.wasm %t.o %t.hello.o
+; RUN: wasm-ld -no-gc-sections --export=__data_end --export=__heap_base --allow-undefined --no-entry -o %t.wasm %t.o %t.hello.o
 ; RUN: obj2yaml %t.wasm | FileCheck %s
 
 ; CHECK:        - Type:            MEMORY
@@ -32,9 +32,15 @@ target triple = "wasm32-unknown-unknown"
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
+; CHECK-NEXT:           Value:           1071
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Type:            I32
+; CHECK-NEXT:         Mutable:         false
+; CHECK-NEXT:         InitExpr:
+; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66608
 
-; CHECK:         - Type:            DATA
+; CHECK:        - Type:            DATA
 ; CHECK-NEXT:     Segments:
 ; CHECK-NEXT:       - SectionOffset:   7
 ; CHECK-NEXT:         InitFlags:       0
diff --git a/lld/test/wasm/entry.ll b/lld/test/wasm/entry.ll
index a2fba3e98e732..21779a01bfec7 100644
--- a/lld/test/wasm/entry.ll
+++ b/lld/test/wasm/entry.ll
@@ -17,12 +17,6 @@ entry:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            entry
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
@@ -41,10 +35,4 @@ entry:
 ; CHECK-CTOR-NEXT:       - Name:            __wasm_call_ctors
 ; CHECK-CTOR-NEXT:         Kind:            FUNCTION
 ; CHECK-CTOR-NEXT:         Index:           0
-; CHECK-CTOR-NEXT:       - Name:            __heap_base
-; CHECK-CTOR-NEXT:         Kind:            GLOBAL
-; CHECK-CTOR-NEXT:         Index:           1
-; CHECK-CTOR-NEXT:       - Name:            __data_end
-; CHECK-CTOR-NEXT:         Kind:            GLOBAL
-; CHECK-CTOR-NEXT:         Index:           2
 ; CHECK-CTOR-NEXT:   - Type:
diff --git a/lld/test/wasm/export.ll b/lld/test/wasm/export.ll
index feda87f4d58b1..8dc14ae861ed4 100644
--- a/lld/test/wasm/export.ll
+++ b/lld/test/wasm/export.ll
@@ -40,12 +40,6 @@ entry:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            hidden_function
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/local-symbols.ll b/lld/test/wasm/local-symbols.ll
index 284b8423cb9f0..58d0ddbf70b9a 100644
--- a/lld/test/wasm/local-symbols.ll
+++ b/lld/test/wasm/local-symbols.ll
@@ -57,36 +57,18 @@ entry:
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66576
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1032
-; CHECK-NEXT:       - Index:           3
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _start
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           1
 ; CHECK-NEXT:       - Name:            foo
 ; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           3
+; CHECK-NEXT:         Index:           1
 ; CHECK-NEXT:   - Type:            CODE
 ; CHECK-NEXT:     Functions:
 ; CHECK-NEXT:       - Index:           0
diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test
index 28d7fd7883f4a..ce02c7fc3bf81 100644
--- a/lld/test/wasm/locals-duplicate.test
+++ b/lld/test/wasm/locals-duplicate.test
@@ -38,20 +38,8 @@
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66592
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1048
-; CHECK-NEXT:       - Index:           3
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           1028
-; CHECK-NEXT:       - Index:           4
+; CHECK-NEXT:       - Index:           2
 ; CHECK-NEXT:         Type:            I32
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
@@ -62,12 +50,6 @@
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            colliding_func2
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           1
@@ -79,7 +61,7 @@
 ; CHECK-NEXT:         Index:           4
 ; CHECK-NEXT:       - Name:            colliding_global2
 ; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           3
+; CHECK-NEXT:         Index:           1
 ; CHECK-NEXT:       - Name:            get_global3A
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           5
@@ -100,7 +82,7 @@
 ; CHECK-NEXT:         Index:           12
 ; CHECK-NEXT:       - Name:            colliding_global1
 ; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           4
+; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            get_global2B
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           13
diff --git a/lld/test/wasm/lto/export.ll b/lld/test/wasm/lto/export.ll
index 9a1b6dad68c03..b6dba4de58335 100644
--- a/lld/test/wasm/lto/export.ll
+++ b/lld/test/wasm/lto/export.ll
@@ -23,12 +23,6 @@ entry:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            hidden_function
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/stack-first.test b/lld/test/wasm/stack-first.test
index 71d1e9dde858f..bc45023c0db63 100644
--- a/lld/test/wasm/stack-first.test
+++ b/lld/test/wasm/stack-first.test
@@ -4,8 +4,7 @@
 ; global should be initialized to 512.
 
 RUN: llc -filetype=obj %p/Inputs/start.ll -o %t.o
-
-RUN: wasm-ld -z stack-size=512 --stack-first --allow-undefined -o %t.wasm %t.o
+RUN: wasm-ld -z stack-size=512 --stack-first --export=__data_end --export=__heap_base -o %t.wasm %t.o
 RUN: obj2yaml %t.wasm | FileCheck %s
 
 CHECK:        - Type:            GLOBAL
@@ -33,10 +32,12 @@ CHECK-NEXT:     Exports:
 CHECK-NEXT:       - Name:            memory
 CHECK-NEXT:         Kind:            MEMORY
 CHECK-NEXT:         Index:           0
-CHECK-NEXT:       - Name:            __heap_base
+CHECK-NEXT:       - Name:            __data_end
 CHECK-NEXT:         Kind:            GLOBAL
 CHECK-NEXT:         Index:           1
-CHECK-NEXT:       - Name:            __data_end
+CHECK-NEXT:       - Name:            __heap_base
 CHECK-NEXT:         Kind:            GLOBAL
 CHECK-NEXT:         Index:           2
-
+CHECK-NEXT:       - Name:            _start
+CHECK-NEXT:         Kind:            FUNCTION
+CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/undefined-weak-call.ll b/lld/test/wasm/undefined-weak-call.ll
index a95a32b0f30f0..badc77f54f01e 100644
--- a/lld/test/wasm/undefined-weak-call.ll
+++ b/lld/test/wasm/undefined-weak-call.ll
@@ -61,29 +61,11 @@ define i32 @callWeakFuncs() {
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           1
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            callWeakFuncs
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           3
diff --git a/lld/test/wasm/visibility-hidden.ll b/lld/test/wasm/visibility-hidden.ll
index 64ed0112bc757..f5731c4e964ad 100644
--- a/lld/test/wasm/visibility-hidden.ll
+++ b/lld/test/wasm/visibility-hidden.ll
@@ -43,12 +43,6 @@ entry:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            objectDefault
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           1
@@ -66,12 +60,6 @@ entry:
 ; NO-DEFAULT-NEXT:       - Name:            memory
 ; NO-DEFAULT-NEXT:         Kind:            MEMORY
 ; NO-DEFAULT-NEXT:         Index:           0
-; NO-DEFAULT-NEXT:       - Name:            __heap_base
-; NO-DEFAULT-NEXT:         Kind:            GLOBAL
-; NO-DEFAULT-NEXT:         Index:           1
-; NO-DEFAULT-NEXT:       - Name:            __data_end
-; NO-DEFAULT-NEXT:         Kind:            GLOBAL
-; NO-DEFAULT-NEXT:         Index:           2
 ; NO-DEFAULT-NEXT:       - Name:            _start
 ; NO-DEFAULT-NEXT:         Kind:            FUNCTION
 ; NO-DEFAULT-NEXT:         Index:           2
diff --git a/lld/test/wasm/weak-alias-overide.ll b/lld/test/wasm/weak-alias-overide.ll
index 0338f43f1ffac..913e53cba877b 100644
--- a/lld/test/wasm/weak-alias-overide.ll
+++ b/lld/test/wasm/weak-alias-overide.ll
@@ -51,29 +51,11 @@ entry:
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           1
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            alias_fn
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll
index 6e29ad08bafbe..6d79b70249975 100644
--- a/lld/test/wasm/weak-alias.ll
+++ b/lld/test/wasm/weak-alias.ll
@@ -48,29 +48,11 @@ entry:
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           1
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _start
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
diff --git a/lld/test/wasm/weak-symbols.ll b/lld/test/wasm/weak-symbols.ll
index 4fa9fa3d0d09e..1ae28b350bd3a 100644
--- a/lld/test/wasm/weak-symbols.ll
+++ b/lld/test/wasm/weak-symbols.ll
@@ -53,30 +53,12 @@ entry:
 ; CHECK-NEXT:         Mutable:         false
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66576
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1028
-; CHECK-NEXT:       - Index:           3
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _start
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           0
@@ -88,7 +70,7 @@ entry:
 ; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            weakGlobal
 ; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           3
+; CHECK-NEXT:         Index:           1
 ; CHECK-NEXT:       - Name:            exportWeak2
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           3
diff --git a/lld/test/wasm/weak-undefined.ll b/lld/test/wasm/weak-undefined.ll
index 290ee81ec8cd2..65f723de04219 100644
--- a/lld/test/wasm/weak-undefined.ll
+++ b/lld/test/wasm/weak-undefined.ll
@@ -59,29 +59,11 @@ entry:
 ; CHECK-NEXT:         InitExpr:
 ; CHECK-NEXT:           Opcode:          I32_CONST
 ; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           1
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           66560
-; CHECK-NEXT:       - Index:           2
-; CHECK-NEXT:         Type:            I32
-; CHECK-NEXT:         Mutable:         false
-; CHECK-NEXT:         InitExpr:
-; CHECK-NEXT:           Opcode:          I32_CONST
-; CHECK-NEXT:           Value:           1024
 ; CHECK-NEXT:   - Type:            EXPORT
 ; CHECK-NEXT:     Exports:
 ; CHECK-NEXT:       - Name:            memory
 ; CHECK-NEXT:         Kind:            MEMORY
 ; CHECK-NEXT:         Index:           0
-; CHECK-NEXT:       - Name:            __heap_base
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           1
-; CHECK-NEXT:       - Name:            __data_end
-; CHECK-NEXT:         Kind:            GLOBAL
-; CHECK-NEXT:         Index:           2
 ; CHECK-NEXT:       - Name:            _start
 ; CHECK-NEXT:         Kind:            FUNCTION
 ; CHECK-NEXT:         Index:           2
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 6650e9e89df5a..b6e72dcc721f9 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -59,6 +59,7 @@ struct Configuration {
   llvm::StringRef ThinLTOCacheDir;
 
   llvm::StringSet<> AllowUndefinedSymbols;
+  llvm::StringSet<> ExportedSymbols;
   std::vector<llvm::StringRef> SearchPaths;
   llvm::CachePruningPolicy ThinLTOCachePolicy;
   llvm::Optional<std::vector<std::string>> Features;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 5d27ac4bec6d1..ccf59f4fb3390 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -481,13 +481,8 @@ static void createSyntheticSymbols() {
     // See: https://github.com/WebAssembly/mutable-global
     WasmSym::StackPointer = Symtab->addSyntheticGlobal(
         "__stack_pointer", WASM_SYMBOL_VISIBILITY_HIDDEN, StackPointer);
-    WasmSym::HeapBase = Symtab->addSyntheticDataSymbol("__heap_base", 0);
-    WasmSym::DataEnd = Symtab->addSyntheticDataSymbol("__data_end", 0);
-
-    // These two synthetic symbols exist purely for the embedder so we always
-    // want to export them.
-    WasmSym::HeapBase->ForceExport = true;
-    WasmSym::DataEnd->ForceExport = true;
+    WasmSym::DataEnd = Symtab->addOptionalDataSymbol("__data_end");
+    WasmSym::HeapBase = Symtab->addOptionalDataSymbol("__heap_base");
   }
 
   if (Config->Pic) {
@@ -670,6 +665,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   for (auto *Arg : Args.filtered(OPT_trace_symbol))
     Symtab->trace(Arg->getValue());
 
+  for (auto *Arg : Args.filtered(OPT_export))
+    Config->ExportedSymbols.insert(Arg->getValue());
+
   if (!Config->Relocatable)
     createSyntheticSymbols();
 
@@ -688,6 +686,11 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   for (auto *Arg : Args.filtered(OPT_undefined))
     handleUndefined(Arg->getValue());
 
+  // Handle the `--export <sym>` options
+  // This works like --undefined but also exports the symbol if its found
+  for (auto *Arg : Args.filtered(OPT_export))
+    handleUndefined(Arg->getValue());
+
   Symbol *EntrySym = nullptr;
   if (!Config->Relocatable && !Config->Entry.empty()) {
     EntrySym = handleUndefined(Config->Entry);
@@ -701,11 +704,6 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   if (errorCount())
     return;
 
-  // Handle the `--export <sym>` options
-  // This works like --undefined but also exports the symbol if its found
-  for (auto *Arg : Args.filtered(OPT_export))
-    handleUndefined(Arg->getValue());
-
   // Create wrapped symbols for -wrap option.
   std::vector<WrappedSymbol> Wrapped = addWrappedSymbols(Args);
 
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index dec59cb98c71d..d10952797d55c 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -202,7 +202,10 @@ DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name,
 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef Name, uint32_t Value,
                                                 uint32_t Flags) {
   Symbol *S = find(Name);
-  if (!S || S->isDefined())
+  // Enable --export of optional symbols
+  if (!S && (Config->ExportAll || Config->ExportedSymbols.count(Name) != 0))
+    S = insertName(Name).first;
+  else if (!S || S->isDefined())
     return nullptr;
   LLVM_DEBUG(dbgs() << "addOptionalDataSymbol: " << Name << "\n");
   auto *rtn = replaceSymbol<DefinedData>(S, Name, Flags);
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index 8c96c616330e2..01f0d22d5a8e4 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -79,8 +79,8 @@ class SymbolTable {
                                     InputGlobal *Global);
   DefinedFunction *addSyntheticFunction(StringRef Name, uint32_t Flags,
                                         InputFunction *Function);
-  DefinedData *addOptionalDataSymbol(StringRef Name, uint32_t Value,
-                                     uint32_t Flags);
+  DefinedData *addOptionalDataSymbol(StringRef Name, uint32_t Value = 0,
+                                     uint32_t Flags = 0);
 
   void handleSymbolVariants();
   void handleWeakUndefines();
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index b7f5afc2dcbef..0aa1180cb2b14 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -258,10 +258,9 @@ void Writer::layoutMemory() {
   // Set `__heap_base` to directly follow the end of the stack or global data.
   // The fact that this comes last means that a malloc/brk implementation
   // can grow the heap at runtime.
-  if (!Config->Relocatable) {
+  log("mem: heap base   = " + Twine(MemoryPtr));
+  if (WasmSym::HeapBase)
     WasmSym::HeapBase->setVirtualAddress(MemoryPtr);
-    log("mem: heap base   = " + Twine(MemoryPtr));
-  }
 
   if (Config->InitialMemory != 0) {
     if (Config->InitialMemory != alignTo(Config->InitialMemory, WasmPageSize))
@@ -307,8 +306,8 @@ void Writer::addStartStopSymbols(const InputSegment *Seg) {
     return;
   uint32_t Start = Seg->OutputSeg->StartVA + Seg->OutputSegmentOffset;
   uint32_t Stop = Start + Seg->getSize();
-  Symtab->addOptionalDataSymbol(Saver.save("__start_" + S), Start, 0);
-  Symtab->addOptionalDataSymbol(Saver.save("__stop_" + S), Stop, 0);
+  Symtab->addOptionalDataSymbol(Saver.save("__start_" + S), Start);
+  Symtab->addOptionalDataSymbol(Saver.save("__stop_" + S), Stop);
 }
 
 void Writer::addSections() {

From eddd6c25b567bf2b75b38e9e09b80f69f87d7017 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Fri, 31 May 2019 22:55:03 +0000
Subject: [PATCH 0795/1176] [codeview] Revert inline line table change of
 r362264

Testing with debuggers shows that our previous behavior was correct.
The reason I thought MSVC did things differently is that MSVC prefers to
use the 0xB combined code offset and code length update opcode when
inline sites are discontiguous.

Keep the test changes, and update the llvm-pdbutil inline line table
dumper to account for this new interpretation of the opcodes.

llvm-svn: 362277
---
 llvm/lib/MC/MCCodeView.cpp                       | 1 +
 llvm/test/MC/COFF/cv-inline-linetable-unlikely.s | 5 ++---
 llvm/test/MC/COFF/cv-loc-unreachable-2.s         | 2 +-
 llvm/test/MC/COFF/cv-loc-unreachable.s           | 2 +-
 llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp  | 4 ++++
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index b2983c11e3423..1a71b542bd06d 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -535,6 +535,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
           unsigned Length = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
           compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
           compressAnnotation(Length, Buffer);
+          LastLabel = Loc.getLabel();
         }
         HaveOpenRange = false;
         continue;
diff --git a/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s b/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
index 3f693ee63294b..a12f6d32ad264 100644
--- a/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
+++ b/llvm/test/MC/COFF/cv-inline-linetable-unlikely.s
@@ -36,12 +36,11 @@
 # ASM-NEXT:       2b: 0f 0b                         ud2
 #  End inline loc
 
-# CODEVIEW:      S_INLINESITE [size = 26]
+# CODEVIEW:      S_INLINESITE [size = 24]
 # CODEVIEW-NEXT: inlinee = 0x1002 (f), parent = 0, end = 0
 # CODEVIEW-NEXT:   0B2E      code 0xE (+0xE) line 1 (+1)
 # CODEVIEW-NEXT:   0409      code end 0x17 (+0x9)
-# CODEVIEW-NEXT:   0602      line 2 (+1)
-# CODEVIEW-NEXT:   0318      code 0x26 (+0x18)
+# CODEVIEW-NEXT:   0B2F      code 0x26 (+0xF) line 2 (+1)
 # CODEVIEW-NEXT:   0407      code end 0x2D (+0x7)
 
 	.text
diff --git a/llvm/test/MC/COFF/cv-loc-unreachable-2.s b/llvm/test/MC/COFF/cv-loc-unreachable-2.s
index e13ad84a8142c..66e8da4fa140d 100644
--- a/llvm/test/MC/COFF/cv-loc-unreachable-2.s
+++ b/llvm/test/MC/COFF/cv-loc-unreachable-2.s
@@ -25,7 +25,7 @@
 # CODEVIEW-NEXT: inlinee = 0x1002 (do_exit), parent = 0, end = 0
 # CODEVIEW-NEXT:   0602      line 1 (+1)
 # CODEVIEW-NEXT:   0409      code end 0x9 (+0x9)
-# CODEVIEW-NEXT:   0B2A      code 0xA (+0xA) line 2 (+1)
+# CODEVIEW-NEXT:   0B21      code 0xA (+0x1) line 2 (+1)
 # CODEVIEW-NEXT:   0B28      code 0x12 (+0x8) line 3 (+1)
 # CODEVIEW-NEXT:   0400      code end 0x12 (+0x0)
 
diff --git a/llvm/test/MC/COFF/cv-loc-unreachable.s b/llvm/test/MC/COFF/cv-loc-unreachable.s
index bd6f8f75a8cf6..7a14a2d6002f5 100644
--- a/llvm/test/MC/COFF/cv-loc-unreachable.s
+++ b/llvm/test/MC/COFF/cv-loc-unreachable.s
@@ -36,7 +36,7 @@
 # CODEVIEW-NEXT: inlinee = 0x1002 (do_exit), parent = 0, end = 0
 # CODEVIEW-NEXT:   0602      line 1 (+1)
 # CODEVIEW-NEXT:   0409      code end 0x9 (+0x9)
-# CODEVIEW-NEXT:   0B2A      code 0xA (+0xA) line 2 (+1)
+# CODEVIEW-NEXT:   0B21      code 0xA (+0x1) line 2 (+1)
 # CODEVIEW-NEXT:   0B28      code 0x12 (+0x8) line 3 (+1)
 # CODEVIEW-NEXT:   0400      code end 0x12 (+0x0)
 
diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index d3c3f3da9c069..720d7396601d4 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -689,6 +689,10 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
       break;
     case BinaryAnnotationsOpCode::ChangeCodeLength:
       formatCodeLength(Annot.U1);
+      // Apparently this annotation updates the code offset. It's hard to make
+      // MSVC produce this opcode, but clang uses it, and debuggers seem to use
+      // this interpretation.
+      CodeOffset += Annot.U1;
       break;
     case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
       formatCodeOffset(Annot.U1);

From 7fcad2f17111d2bfeb607d8ed088c97473237c48 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 31 May 2019 23:02:13 +0000
Subject: [PATCH 0796/1176] [PowerPC] check for INLINEASM_BR along w/ INLINEASM

Summary:
It looks like since INLINEASM_BR was created off of INLINEASM (r353563),
a few checks for INLINEASM needed to be updated to check for either
case.

pr/41999

Reviewers: hfinkel

Reviewed By: hfinkel

Subscribers: nemanjai, hiraditya, kbarton, jsji, llvm-commits, craig.topper, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62403

llvm-svn: 362278
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      | 2 +-
 llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 25f4c9aa8ebd1..cacbe4eecc5f9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2013,7 +2013,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
-  if (Opcode == PPC::INLINEASM) {
+  if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index b1ddbb6b837ca..44175af7f9b60 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -422,6 +422,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       // of opcodes having a common attribute in TableGen.  Should this
       // change, this is a prime candidate to use such a mechanism.
       case PPC::INLINEASM:
+      case PPC::INLINEASM_BR:
       case PPC::EXTRACT_SUBREG:
       case PPC::INSERT_SUBREG:
       case PPC::COPY_TO_REGCLASS:

From ec1982f07f5bdc60a0e982b992c7143cb879a996 Mon Sep 17 00:00:00 2001
From: Stephane Moore <mog@google.com>
Date: Fri, 31 May 2019 23:41:15 +0000
Subject: [PATCH 0797/1176] Revise the google-objc-global-variable-declaration
 check to match the style guide.

Summary:
Revise the google-objc-global-variable-declaration check to match the style guide.

This commit updates the check as follows:
(1) Do not emit fixes for extern global constants.
(2) Allow the second character of prefixes for constants to be numeric (the new guideline is that global constants should generally be named with a prefix that begins with a capital letter followed by one or more capital letters or numbers).

https://google.github.io/styleguide/objcguide.html#prefixes

This is an amended re-submission of https://reviews.llvm.org/rG12e3726fadb0b2a4d8aeed0a2817b5159f9d029d.

Contributed By: yaqiji

Reviewers: Wizard, benhamilton, stephanemoore

Reviewed By: benhamilton, stephanemoore

Subscribers: mgorny, cfe-commits, yaqiji

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62045

llvm-svn: 362279
---
 .../google/GlobalVariableDeclarationCheck.cpp | 22 +++++++++++-------
 .../google-objc-global-variable-declaration.m | 23 ++++++++++++++++---
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
index ce833906dd5c5..30ab04c08c008 100644
--- a/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/GlobalVariableDeclarationCheck.cpp
@@ -23,29 +23,35 @@ namespace objc {
 
 namespace {
 
-AST_MATCHER(VarDecl, isLocalVariable) {
-  return Node.isLocalVarDecl();
-}
+AST_MATCHER(VarDecl, isLocalVariable) { return Node.isLocalVarDecl(); }
 
 FixItHint generateFixItHint(const VarDecl *Decl, bool IsConst) {
+  if (IsConst && (Decl->getStorageClass() != SC_Static)) {
+    // No fix available if it is not a static constant, since it is difficult
+    // to determine the proper fix in this case.
+    return FixItHint();
+  }
+
   char FC = Decl->getName()[0];
   if (!llvm::isAlpha(FC) || Decl->getName().size() == 1) {
     // No fix available if first character is not alphabetical character, or it
-    // is a single-character variable, since it is difficult to determine the 
+    // is a single-character variable, since it is difficult to determine the
     // proper fix in this case. Users should create a proper variable name by
     // their own.
     return FixItHint();
   }
   char SC = Decl->getName()[1];
   if ((FC == 'k' || FC == 'g') && !llvm::isAlpha(SC)) {
-    // No fix available if the prefix is correct but the second character is not
-    // alphabetical, since it is difficult to determine the proper fix in this
-    // case.
+    // No fix available if the prefix is correct but the second character is
+    // not alphabetical, since it is difficult to determine the proper fix in
+    // this case.
     return FixItHint();
   }
+
   auto NewName = (IsConst ? "k" : "g") +
                  llvm::StringRef(std::string(1, FC)).upper() +
                  Decl->getName().substr(1).str();
+
   return FixItHint::CreateReplacement(
       CharSourceRange::getTokenRange(SourceRange(Decl->getLocation())),
       llvm::StringRef(NewName));
@@ -71,7 +77,7 @@ void GlobalVariableDeclarationCheck::registerMatchers(MatchFinder *Finder) {
       this);
   Finder->addMatcher(varDecl(hasGlobalStorage(), hasType(isConstQualified()),
                              unless(isLocalVariable()),
-                             unless(matchesName("::(k[A-Z]|[A-Z]{2,})")))
+                             unless(matchesName("::(k[A-Z])|([A-Z][A-Z0-9])")))
                          .bind("global_const"),
                      this);
 }
diff --git a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
index 346ddeca7db93..d807a39d274d9 100644
--- a/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
+++ b/clang-tools-extra/test/clang-tidy/google-objc-global-variable-declaration.m
@@ -1,10 +1,14 @@
 // RUN: %check_clang_tidy %s google-objc-global-variable-declaration %t
 
 @class NSString;
+
 static NSString* const myConstString = @"hello";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'myConstString' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const kMyConstString = @"hello";
 
+extern NSString* const GlobalConstant = @"hey";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'GlobalConstant' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+
 static NSString* MyString = @"hi";
 // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: non-const global variable 'MyString' must have a name which starts with 'g[A-Z]' [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* gMyString = @"hi";
@@ -25,12 +29,25 @@
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable '_notAlpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const _notAlpha = @"NotBeginWithAlpha";
 
+static NSString* const notCap = @"NotBeginWithCap";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'notCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+// CHECK-FIXES: static NSString* const kNotCap = @"NotBeginWithCap";
+
 static NSString* const k_Alpha = @"SecondNotAlpha";
 // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'k_Alpha' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
 // CHECK-FIXES: static NSString* const k_Alpha = @"SecondNotAlpha";
 
+static NSString* const SecondNotCap = @"SecondNotCapOrNumber";
+// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: const global variable 'SecondNotCap' must have a name which starts with an appropriate prefix [google-objc-global-variable-declaration]
+// CHECK-FIXES: static NSString* const kSecondNotCap = @"SecondNotCapOrNumber";
+
+extern NSString* Y2Bad;
+// CHECK-MESSAGES: :[[@LINE-1]]:18: warning: non-const global variable 'Y2Bad' must have a name which starts with 'g[A-Z]' [google-objc-global-variable-declaration]
+// CHECK-FIXES: extern NSString* gY2Bad;
+
 static NSString* const kGood = @"hello";
 static NSString* const XYGood = @"hello";
+static NSString* const X1Good = @"hello";
 static NSString* gMyIntGood = 0;
 
 extern NSString* const GTLServiceErrorDomain;
@@ -42,8 +59,8 @@
 
 @implementation Foo
 - (void)f {
-    int x = 0;
-    static int bar;
-    static const int baz = 42;
+  int x = 0;
+  static int bar;
+  static const int baz = 42;
 }
 @end

From eb4d6142dcd53d79d8f8a86908a035582965fc52 Mon Sep 17 00:00:00 2001
From: Tom Tan <Tom.Tan@microsoft.com>
Date: Fri, 31 May 2019 23:43:31 +0000
Subject: [PATCH 0798/1176] [COFF, ARM64] Add CodeView register mapping

CodeView has its own register map which is defined in cvconst.h. Missing this
mapping before saving register to CodeView causes debugger to show incorrect
value for all register based variables, like variables in register and local
variables addressed by register (stack pointer + offset).

This change added mapping between LLVM register and CodeView register so the
correct register number will be stored to CodeView/PDB, it aso fixed the
mapping from CodeView register number to register name based on current
CPUType but print PDB to yaml still assumes X86 CPU and needs to be fixed.

Differential Revision: https://reviews.llvm.org/D62608

llvm-svn: 362280
---
 .../llvm/DebugInfo/CodeView/CodeView.h        |  14 ++
 .../DebugInfo/CodeView/CodeViewRegisters.def  | 203 +++++++++++++++++-
 .../llvm/DebugInfo/CodeView/EnumTables.h      |   2 +-
 llvm/include/llvm/DebugInfo/PDB/PDBExtras.h   |   3 +-
 llvm/include/llvm/DebugInfo/PDB/PDBTypes.h    |   1 +
 llvm/lib/DebugInfo/CodeView/EnumTables.cpp    |  20 +-
 llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp  |  19 +-
 llvm/lib/DebugInfo/PDB/PDBExtras.cpp          |  32 ++-
 llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp   |   2 +-
 .../MCTargetDesc/AArch64MCTargetDesc.cpp      | 177 ++++++++++++++-
 .../COFF/register-variables-arm64.ll          | 149 +++++++++++++
 .../llvm-pdbutil/MinimalSymbolDumper.cpp      |  54 +++--
 llvm/tools/llvm-pdbutil/PdbYaml.cpp           |   1 +
 .../llvm-pdbutil/PrettyFunctionDumper.cpp     |   3 +-
 14 files changed, 638 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/DebugInfo/COFF/register-variables-arm64.ll

diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
index b610a43ded2d9..ff25972e6e474 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -509,9 +509,23 @@ enum class FrameCookieKind : uint8_t {
 
 // Corresponds to CV_HREG_e enum.
 enum class RegisterId : uint16_t {
+#define CV_REGISTERS_ALL
 #define CV_REGISTER(name, value) name = value,
 #include "CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_ALL
+};
+
+// Register Ids are shared between architectures in CodeView. CPUType is needed
+// to map register Id to name.
+struct CPURegister {
+  CPURegister() = delete;
+  CPURegister(CPUType Cpu, codeview::RegisterId Reg) {
+    this->Cpu = Cpu;
+    this->Reg = Reg;
+  }
+  CPUType Cpu;
+  RegisterId Reg;
 };
 
 /// Two-bit value indicating which register is the designated frame pointer
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 0593bc0571c60..9767e49c44f59 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -14,8 +14,15 @@
 #define CV_REGISTER(name, value)
 #endif
 
+#if !defined(CV_REGISTERS_ALL) && !defined(CV_REGISTERS_X86) &&                \
+    !defined(CV_REGISTERS_ARM64)
+#error Need include at least one register set.
+#endif
+
 // This currently only contains the "register subset shared by all processor
-// types" (ERR etc.) and the x86 registers.
+// types" (ERR etc.) and the x86/arm64 registers.
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
 
 // Some system headers define macros that conflict with our enums. Every
 // compiler supported by LLVM has the push_macro and pop_macro pragmas, so use
@@ -356,3 +363,197 @@ CV_REGISTER(AMD64_K7, 765)
 #pragma pop_macro("CR2")
 #pragma pop_macro("CR3")
 #pragma pop_macro("CR4")
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
+
+// ARM64 registers
+
+CV_REGISTER(ARM64_NOREG, 0)
+
+// General purpose 32-bit integer registers
+
+CV_REGISTER(ARM64_W0, 10)
+CV_REGISTER(ARM64_W1, 11)
+CV_REGISTER(ARM64_W2, 12)
+CV_REGISTER(ARM64_W3, 13)
+CV_REGISTER(ARM64_W4, 14)
+CV_REGISTER(ARM64_W5, 15)
+CV_REGISTER(ARM64_W6, 16)
+CV_REGISTER(ARM64_W7, 17)
+CV_REGISTER(ARM64_W8, 18)
+CV_REGISTER(ARM64_W9, 19)
+CV_REGISTER(ARM64_W10, 20)
+CV_REGISTER(ARM64_W11, 21)
+CV_REGISTER(ARM64_W12, 22)
+CV_REGISTER(ARM64_W13, 23)
+CV_REGISTER(ARM64_W14, 24)
+CV_REGISTER(ARM64_W15, 25)
+CV_REGISTER(ARM64_W16, 26)
+CV_REGISTER(ARM64_W17, 27)
+CV_REGISTER(ARM64_W18, 28)
+CV_REGISTER(ARM64_W19, 29)
+CV_REGISTER(ARM64_W20, 30)
+CV_REGISTER(ARM64_W21, 31)
+CV_REGISTER(ARM64_W22, 32)
+CV_REGISTER(ARM64_W23, 33)
+CV_REGISTER(ARM64_W24, 34)
+CV_REGISTER(ARM64_W25, 35)
+CV_REGISTER(ARM64_W26, 36)
+CV_REGISTER(ARM64_W27, 37)
+CV_REGISTER(ARM64_W28, 38)
+CV_REGISTER(ARM64_W29, 39)
+CV_REGISTER(ARM64_W30, 40)
+CV_REGISTER(ARM64_WZR, 41)
+
+// General purpose 64-bit integer registers
+
+CV_REGISTER(ARM64_X0, 50)
+CV_REGISTER(ARM64_X1, 51)
+CV_REGISTER(ARM64_X2, 52)
+CV_REGISTER(ARM64_X3, 53)
+CV_REGISTER(ARM64_X4, 54)
+CV_REGISTER(ARM64_X5, 55)
+CV_REGISTER(ARM64_X6, 56)
+CV_REGISTER(ARM64_X7, 57)
+CV_REGISTER(ARM64_X8, 58)
+CV_REGISTER(ARM64_X9, 59)
+CV_REGISTER(ARM64_X10, 60)
+CV_REGISTER(ARM64_X11, 61)
+CV_REGISTER(ARM64_X12, 62)
+CV_REGISTER(ARM64_X13, 63)
+CV_REGISTER(ARM64_X14, 64)
+CV_REGISTER(ARM64_X15, 65)
+CV_REGISTER(ARM64_X16, 66)
+CV_REGISTER(ARM64_X17, 67)
+CV_REGISTER(ARM64_X18, 68)
+CV_REGISTER(ARM64_X19, 69)
+CV_REGISTER(ARM64_X20, 70)
+CV_REGISTER(ARM64_X21, 71)
+CV_REGISTER(ARM64_X22, 72)
+CV_REGISTER(ARM64_X23, 73)
+CV_REGISTER(ARM64_X24, 74)
+CV_REGISTER(ARM64_X25, 75)
+CV_REGISTER(ARM64_X26, 76)
+CV_REGISTER(ARM64_X27, 77)
+CV_REGISTER(ARM64_X28, 78)
+CV_REGISTER(ARM64_FP, 79)
+CV_REGISTER(ARM64_LR, 80)
+CV_REGISTER(ARM64_SP, 81)
+CV_REGISTER(ARM64_ZR, 82)
+
+// status register
+
+CV_REGISTER(ARM64_NZCV, 90)
+
+// 32-bit floating point registers
+
+CV_REGISTER(ARM64_S0, 100)
+CV_REGISTER(ARM64_S1, 101)
+CV_REGISTER(ARM64_S2, 102)
+CV_REGISTER(ARM64_S3, 103)
+CV_REGISTER(ARM64_S4, 104)
+CV_REGISTER(ARM64_S5, 105)
+CV_REGISTER(ARM64_S6, 106)
+CV_REGISTER(ARM64_S7, 107)
+CV_REGISTER(ARM64_S8, 108)
+CV_REGISTER(ARM64_S9, 109)
+CV_REGISTER(ARM64_S10, 110)
+CV_REGISTER(ARM64_S11, 111)
+CV_REGISTER(ARM64_S12, 112)
+CV_REGISTER(ARM64_S13, 113)
+CV_REGISTER(ARM64_S14, 114)
+CV_REGISTER(ARM64_S15, 115)
+CV_REGISTER(ARM64_S16, 116)
+CV_REGISTER(ARM64_S17, 117)
+CV_REGISTER(ARM64_S18, 118)
+CV_REGISTER(ARM64_S19, 119)
+CV_REGISTER(ARM64_S20, 120)
+CV_REGISTER(ARM64_S21, 121)
+CV_REGISTER(ARM64_S22, 122)
+CV_REGISTER(ARM64_S23, 123)
+CV_REGISTER(ARM64_S24, 124)
+CV_REGISTER(ARM64_S25, 125)
+CV_REGISTER(ARM64_S26, 126)
+CV_REGISTER(ARM64_S27, 127)
+CV_REGISTER(ARM64_S28, 128)
+CV_REGISTER(ARM64_S29, 129)
+CV_REGISTER(ARM64_S30, 130)
+CV_REGISTER(ARM64_S31, 131)
+
+// 64-bit floating point registers
+
+CV_REGISTER(ARM64_D0, 140)
+CV_REGISTER(ARM64_D1, 141)
+CV_REGISTER(ARM64_D2, 142)
+CV_REGISTER(ARM64_D3, 143)
+CV_REGISTER(ARM64_D4, 144)
+CV_REGISTER(ARM64_D5, 145)
+CV_REGISTER(ARM64_D6, 146)
+CV_REGISTER(ARM64_D7, 147)
+CV_REGISTER(ARM64_D8, 148)
+CV_REGISTER(ARM64_D9, 149)
+CV_REGISTER(ARM64_D10, 150)
+CV_REGISTER(ARM64_D11, 151)
+CV_REGISTER(ARM64_D12, 152)
+CV_REGISTER(ARM64_D13, 153)
+CV_REGISTER(ARM64_D14, 154)
+CV_REGISTER(ARM64_D15, 155)
+CV_REGISTER(ARM64_D16, 156)
+CV_REGISTER(ARM64_D17, 157)
+CV_REGISTER(ARM64_D18, 158)
+CV_REGISTER(ARM64_D19, 159)
+CV_REGISTER(ARM64_D20, 160)
+CV_REGISTER(ARM64_D21, 161)
+CV_REGISTER(ARM64_D22, 162)
+CV_REGISTER(ARM64_D23, 163)
+CV_REGISTER(ARM64_D24, 164)
+CV_REGISTER(ARM64_D25, 165)
+CV_REGISTER(ARM64_D26, 166)
+CV_REGISTER(ARM64_D27, 167)
+CV_REGISTER(ARM64_D28, 168)
+CV_REGISTER(ARM64_D29, 169)
+CV_REGISTER(ARM64_D30, 170)
+CV_REGISTER(ARM64_D31, 171)
+
+// 128-bit SIMD registers
+
+CV_REGISTER(ARM64_Q0, 180)
+CV_REGISTER(ARM64_Q1, 181)
+CV_REGISTER(ARM64_Q2, 182)
+CV_REGISTER(ARM64_Q3, 183)
+CV_REGISTER(ARM64_Q4, 184)
+CV_REGISTER(ARM64_Q5, 185)
+CV_REGISTER(ARM64_Q6, 186)
+CV_REGISTER(ARM64_Q7, 187)
+CV_REGISTER(ARM64_Q8, 188)
+CV_REGISTER(ARM64_Q9, 189)
+CV_REGISTER(ARM64_Q10, 190)
+CV_REGISTER(ARM64_Q11, 191)
+CV_REGISTER(ARM64_Q12, 192)
+CV_REGISTER(ARM64_Q13, 193)
+CV_REGISTER(ARM64_Q14, 194)
+CV_REGISTER(ARM64_Q15, 195)
+CV_REGISTER(ARM64_Q16, 196)
+CV_REGISTER(ARM64_Q17, 197)
+CV_REGISTER(ARM64_Q18, 198)
+CV_REGISTER(ARM64_Q19, 199)
+CV_REGISTER(ARM64_Q20, 200)
+CV_REGISTER(ARM64_Q21, 201)
+CV_REGISTER(ARM64_Q22, 202)
+CV_REGISTER(ARM64_Q23, 203)
+CV_REGISTER(ARM64_Q24, 204)
+CV_REGISTER(ARM64_Q25, 205)
+CV_REGISTER(ARM64_Q26, 206)
+CV_REGISTER(ARM64_Q27, 207)
+CV_REGISTER(ARM64_Q28, 208)
+CV_REGISTER(ARM64_Q29, 209)
+CV_REGISTER(ARM64_Q30, 210)
+CV_REGISTER(ARM64_Q31, 211)
+
+// Floating point status register
+
+CV_REGISTER(ARM64_FPSR, 220)
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
diff --git a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
index 74f8c7176811d..ed126ed9e2ffd 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -20,7 +20,7 @@ namespace codeview {
 
 ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
 ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames();
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames();
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu);
 ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames();
 ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames();
 ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames();
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index b9a8d8f6ac020..f5c3a5fcc99ff 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -27,7 +27,8 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_BuiltinType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_DataKind &Data);
-raw_ostream &operator<<(raw_ostream &OS, const codeview::RegisterId &Reg);
+raw_ostream &operator<<(raw_ostream &OS,
+                        const llvm::codeview::CPURegister &CpuReg);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_LocType &Loc);
 raw_ostream &operator<<(raw_ostream &OS, const codeview::ThunkOrdinal &Thunk);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum);
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index 6d26b64ddf726..742cb857a3360 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -126,6 +126,7 @@ enum class PDB_Machine {
   Am33 = 0x13,
   Amd64 = 0x8664,
   Arm = 0x1C0,
+  Arm64 = 0xaa64,
   ArmNT = 0x1C4,
   Ebc = 0xEBC,
   x86 = 0x14C,
diff --git a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index 4f3ddc442aea5..54e68ae4ea9fd 100644
--- a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -31,10 +31,20 @@ static const EnumEntry<TypeLeafKind> TypeLeafNames[] = {
 #undef CV_TYPE
 };
 
-static const EnumEntry<uint16_t> RegisterNames[] = {
+static const EnumEntry<uint16_t> RegisterNames_X86[] = {
+#define CV_REGISTERS_X86
 #define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_X86
+};
+
+static const EnumEntry<uint16_t> RegisterNames_ARM64[] = {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
 };
 
 static const EnumEntry<uint32_t> PublicSymFlagNames[] = {
@@ -171,6 +181,7 @@ static const EnumEntry<unsigned> CPUTypeNames[] = {
     CV_ENUM_CLASS_ENT(CPUType, ARM_XMAC),
     CV_ENUM_CLASS_ENT(CPUType, ARM_WMMX),
     CV_ENUM_CLASS_ENT(CPUType, ARM7),
+    CV_ENUM_CLASS_ENT(CPUType, ARM64),
     CV_ENUM_CLASS_ENT(CPUType, Omni),
     CV_ENUM_CLASS_ENT(CPUType, Ia64),
     CV_ENUM_CLASS_ENT(CPUType, Ia64_2),
@@ -300,8 +311,11 @@ ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames() {
   return makeArrayRef(TypeLeafNames);
 }
 
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
-  return makeArrayRef(RegisterNames);
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu) {
+  if (Cpu == CPUType::ARM64) {
+    return makeArrayRef(RegisterNames_ARM64);
+  }
+  return makeArrayRef(RegisterNames_X86);
 }
 
 ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
diff --git a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 44ce04a49e998..27cb7e35234b4 100644
--- a/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -325,7 +325,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
   W.printEnum("BaseRegister", uint16_t(DefRangeRegisterRel.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printBoolean("HasSpilledUDTMember",
                  DefRangeRegisterRel.hasSpilledUDTMember());
   W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent());
@@ -339,7 +339,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
   W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
   printLocalVariableAddrRange(DefRangeRegister.Range,
                               DefRangeRegister.getRelocationOffset());
@@ -350,7 +350,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
 Error CVSymbolDumperImpl::visitKnownRecord(
     CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
   W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
   W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent);
   printLocalVariableAddrRange(DefRangeSubfieldRegister.Range,
@@ -403,7 +403,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                      FrameCookie.getRelocationOffset(),
                                      FrameCookie.CodeOffset, &LinkageName);
   }
-  W.printEnum("Register", uint16_t(FrameCookie.Register), getRegisterNames());
+  W.printEnum("Register", uint16_t(FrameCookie.Register),
+              getRegisterNames(CompilationCPUType));
   W.printEnum("CookieKind", uint16_t(FrameCookie.CookieKind),
               getFrameCookieKindNames());
   W.printHex("Flags", FrameCookie.Flags);
@@ -424,10 +425,10 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                getFrameProcSymFlagNames());
   W.printEnum("LocalFramePtrReg",
               uint16_t(FrameProc.getLocalFramePtrReg(CompilationCPUType)),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   W.printEnum("ParamFramePtrReg",
               uint16_t(FrameProc.getParamFramePtrReg(CompilationCPUType)),
-              getRegisterNames());
+              getRegisterNames(CompilationCPUType));
   return Error::success();
 }
 
@@ -505,7 +506,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
 Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            RegisterSym &Register) {
   printTypeIndex("Type", Register.Index);
-  W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
+  W.printEnum("Seg", uint16_t(Register.Register),
+              getRegisterNames(CompilationCPUType));
   W.printString("Name", Register.Name);
   return Error::success();
 }
@@ -599,7 +601,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            RegRelativeSym &RegRel) {
   W.printHex("Offset", RegRel.Offset);
   printTypeIndex("Type", RegRel.Type);
-  W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames());
+  W.printEnum("Register", uint16_t(RegRel.Register),
+              getRegisterNames(CompilationCPUType));
   W.printString("VarName", RegRel.Name);
   return Error::success();
 }
diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index 71552276153f0..59eadd71856c3 100644
--- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -117,13 +117,37 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_DataKind &Data) {
 }
 
 raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
-                                   const codeview::RegisterId &Reg) {
-  switch (Reg) {
-#define CV_REGISTER(name, val) case codeview::RegisterId::name: OS << #name; return OS;
+                                   const llvm::codeview::CPURegister &CpuReg) {
+  if (CpuReg.Cpu == llvm::codeview::CPUType::ARM64) {
+    switch (CpuReg.Reg) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val)                                                 \
+  case codeview::RegisterId::name:                                             \
+    OS << #name;                                                               \
+    return OS;
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+    default:
+      break;
+    }
+  } else {
+    switch (CpuReg.Reg) {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val)                                                 \
+  case codeview::RegisterId::name:                                             \
+    OS << #name;                                                               \
+    return OS;
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+    default:
+      break;
+    }
   }
-  OS << static_cast<int>(Reg);
+  OS << static_cast<int>(CpuReg.Reg);
   return OS;
 }
 
diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index bbaa5ed75ad2b..227107c051ddc 100644
--- a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -147,7 +147,7 @@ void ScalarEnumerationTraits<CPUType>::enumeration(IO &io, CPUType &Cpu) {
 }
 
 void ScalarEnumerationTraits<RegisterId>::enumeration(IO &io, RegisterId &Reg) {
-  auto RegNames = getRegisterNames();
+  auto RegNames = getRegisterNames(CPUType::X64);
   for (const auto &E : RegNames) {
     io.enumCase(Reg, E.Name.str().c_str(), static_cast<RegisterId>(E.Value));
   }
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index d6ef79ee4a867..26dd5e5adccde 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64InstPrinter.h"
 #include "TargetInfo/AArch64TargetInfo.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCInstrAnalysis.h"
@@ -56,11 +57,177 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
 }
 
 void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
-  for (unsigned Reg = AArch64::NoRegister + 1;
-       Reg < AArch64::NUM_TARGET_REGS; ++Reg) {
-    unsigned CV = MRI->getEncodingValue(Reg);
-    MRI->mapLLVMRegToCVReg(Reg, CV);
-  }
+  // Mapping from CodeView to MC register id.
+  static const struct {
+    codeview::RegisterId CVReg;
+    MCPhysReg Reg;
+  } RegMap[] = {
+      {codeview::RegisterId::ARM64_W0, AArch64::W0},
+      {codeview::RegisterId::ARM64_W1, AArch64::W1},
+      {codeview::RegisterId::ARM64_W2, AArch64::W2},
+      {codeview::RegisterId::ARM64_W3, AArch64::W3},
+      {codeview::RegisterId::ARM64_W4, AArch64::W4},
+      {codeview::RegisterId::ARM64_W5, AArch64::W5},
+      {codeview::RegisterId::ARM64_W6, AArch64::W6},
+      {codeview::RegisterId::ARM64_W7, AArch64::W7},
+      {codeview::RegisterId::ARM64_W8, AArch64::W8},
+      {codeview::RegisterId::ARM64_W9, AArch64::W9},
+      {codeview::RegisterId::ARM64_W10, AArch64::W10},
+      {codeview::RegisterId::ARM64_W11, AArch64::W11},
+      {codeview::RegisterId::ARM64_W12, AArch64::W12},
+      {codeview::RegisterId::ARM64_W13, AArch64::W13},
+      {codeview::RegisterId::ARM64_W14, AArch64::W14},
+      {codeview::RegisterId::ARM64_W15, AArch64::W15},
+      {codeview::RegisterId::ARM64_W16, AArch64::W16},
+      {codeview::RegisterId::ARM64_W17, AArch64::W17},
+      {codeview::RegisterId::ARM64_W18, AArch64::W18},
+      {codeview::RegisterId::ARM64_W19, AArch64::W19},
+      {codeview::RegisterId::ARM64_W20, AArch64::W20},
+      {codeview::RegisterId::ARM64_W21, AArch64::W21},
+      {codeview::RegisterId::ARM64_W22, AArch64::W22},
+      {codeview::RegisterId::ARM64_W23, AArch64::W23},
+      {codeview::RegisterId::ARM64_W24, AArch64::W24},
+      {codeview::RegisterId::ARM64_W25, AArch64::W25},
+      {codeview::RegisterId::ARM64_W26, AArch64::W26},
+      {codeview::RegisterId::ARM64_W27, AArch64::W27},
+      {codeview::RegisterId::ARM64_W28, AArch64::W28},
+      {codeview::RegisterId::ARM64_W29, AArch64::W29},
+      {codeview::RegisterId::ARM64_W30, AArch64::W30},
+      {codeview::RegisterId::ARM64_WZR, AArch64::WZR},
+      {codeview::RegisterId::ARM64_X0, AArch64::X0},
+      {codeview::RegisterId::ARM64_X1, AArch64::X1},
+      {codeview::RegisterId::ARM64_X2, AArch64::X2},
+      {codeview::RegisterId::ARM64_X3, AArch64::X3},
+      {codeview::RegisterId::ARM64_X4, AArch64::X4},
+      {codeview::RegisterId::ARM64_X5, AArch64::X5},
+      {codeview::RegisterId::ARM64_X6, AArch64::X6},
+      {codeview::RegisterId::ARM64_X7, AArch64::X7},
+      {codeview::RegisterId::ARM64_X8, AArch64::X8},
+      {codeview::RegisterId::ARM64_X9, AArch64::X9},
+      {codeview::RegisterId::ARM64_X10, AArch64::X10},
+      {codeview::RegisterId::ARM64_X11, AArch64::X11},
+      {codeview::RegisterId::ARM64_X12, AArch64::X12},
+      {codeview::RegisterId::ARM64_X13, AArch64::X13},
+      {codeview::RegisterId::ARM64_X14, AArch64::X14},
+      {codeview::RegisterId::ARM64_X15, AArch64::X15},
+      {codeview::RegisterId::ARM64_X16, AArch64::X16},
+      {codeview::RegisterId::ARM64_X17, AArch64::X17},
+      {codeview::RegisterId::ARM64_X18, AArch64::X18},
+      {codeview::RegisterId::ARM64_X19, AArch64::X19},
+      {codeview::RegisterId::ARM64_X20, AArch64::X20},
+      {codeview::RegisterId::ARM64_X21, AArch64::X21},
+      {codeview::RegisterId::ARM64_X22, AArch64::X22},
+      {codeview::RegisterId::ARM64_X23, AArch64::X23},
+      {codeview::RegisterId::ARM64_X24, AArch64::X24},
+      {codeview::RegisterId::ARM64_X25, AArch64::X25},
+      {codeview::RegisterId::ARM64_X26, AArch64::X26},
+      {codeview::RegisterId::ARM64_X27, AArch64::X27},
+      {codeview::RegisterId::ARM64_X28, AArch64::X28},
+      {codeview::RegisterId::ARM64_FP, AArch64::FP},
+      {codeview::RegisterId::ARM64_LR, AArch64::LR},
+      {codeview::RegisterId::ARM64_SP, AArch64::SP},
+      {codeview::RegisterId::ARM64_ZR, AArch64::XZR},
+      {codeview::RegisterId::ARM64_NZCV, AArch64::NZCV},
+      {codeview::RegisterId::ARM64_S0, AArch64::S0},
+      {codeview::RegisterId::ARM64_S1, AArch64::S1},
+      {codeview::RegisterId::ARM64_S2, AArch64::S2},
+      {codeview::RegisterId::ARM64_S3, AArch64::S3},
+      {codeview::RegisterId::ARM64_S4, AArch64::S4},
+      {codeview::RegisterId::ARM64_S5, AArch64::S5},
+      {codeview::RegisterId::ARM64_S6, AArch64::S6},
+      {codeview::RegisterId::ARM64_S7, AArch64::S7},
+      {codeview::RegisterId::ARM64_S8, AArch64::S8},
+      {codeview::RegisterId::ARM64_S9, AArch64::S9},
+      {codeview::RegisterId::ARM64_S10, AArch64::S10},
+      {codeview::RegisterId::ARM64_S11, AArch64::S11},
+      {codeview::RegisterId::ARM64_S12, AArch64::S12},
+      {codeview::RegisterId::ARM64_S13, AArch64::S13},
+      {codeview::RegisterId::ARM64_S14, AArch64::S14},
+      {codeview::RegisterId::ARM64_S15, AArch64::S15},
+      {codeview::RegisterId::ARM64_S16, AArch64::S16},
+      {codeview::RegisterId::ARM64_S17, AArch64::S17},
+      {codeview::RegisterId::ARM64_S18, AArch64::S18},
+      {codeview::RegisterId::ARM64_S19, AArch64::S19},
+      {codeview::RegisterId::ARM64_S20, AArch64::S20},
+      {codeview::RegisterId::ARM64_S21, AArch64::S21},
+      {codeview::RegisterId::ARM64_S22, AArch64::S22},
+      {codeview::RegisterId::ARM64_S23, AArch64::S23},
+      {codeview::RegisterId::ARM64_S24, AArch64::S24},
+      {codeview::RegisterId::ARM64_S25, AArch64::S25},
+      {codeview::RegisterId::ARM64_S26, AArch64::S26},
+      {codeview::RegisterId::ARM64_S27, AArch64::S27},
+      {codeview::RegisterId::ARM64_S28, AArch64::S28},
+      {codeview::RegisterId::ARM64_S29, AArch64::S29},
+      {codeview::RegisterId::ARM64_S30, AArch64::S30},
+      {codeview::RegisterId::ARM64_S31, AArch64::S31},
+      {codeview::RegisterId::ARM64_D0, AArch64::D0},
+      {codeview::RegisterId::ARM64_D1, AArch64::D1},
+      {codeview::RegisterId::ARM64_D2, AArch64::D2},
+      {codeview::RegisterId::ARM64_D3, AArch64::D3},
+      {codeview::RegisterId::ARM64_D4, AArch64::D4},
+      {codeview::RegisterId::ARM64_D5, AArch64::D5},
+      {codeview::RegisterId::ARM64_D6, AArch64::D6},
+      {codeview::RegisterId::ARM64_D7, AArch64::D7},
+      {codeview::RegisterId::ARM64_D8, AArch64::D8},
+      {codeview::RegisterId::ARM64_D9, AArch64::D9},
+      {codeview::RegisterId::ARM64_D10, AArch64::D10},
+      {codeview::RegisterId::ARM64_D11, AArch64::D11},
+      {codeview::RegisterId::ARM64_D12, AArch64::D12},
+      {codeview::RegisterId::ARM64_D13, AArch64::D13},
+      {codeview::RegisterId::ARM64_D14, AArch64::D14},
+      {codeview::RegisterId::ARM64_D15, AArch64::D15},
+      {codeview::RegisterId::ARM64_D16, AArch64::D16},
+      {codeview::RegisterId::ARM64_D17, AArch64::D17},
+      {codeview::RegisterId::ARM64_D18, AArch64::D18},
+      {codeview::RegisterId::ARM64_D19, AArch64::D19},
+      {codeview::RegisterId::ARM64_D20, AArch64::D20},
+      {codeview::RegisterId::ARM64_D21, AArch64::D21},
+      {codeview::RegisterId::ARM64_D22, AArch64::D22},
+      {codeview::RegisterId::ARM64_D23, AArch64::D23},
+      {codeview::RegisterId::ARM64_D24, AArch64::D24},
+      {codeview::RegisterId::ARM64_D25, AArch64::D25},
+      {codeview::RegisterId::ARM64_D26, AArch64::D26},
+      {codeview::RegisterId::ARM64_D27, AArch64::D27},
+      {codeview::RegisterId::ARM64_D28, AArch64::D28},
+      {codeview::RegisterId::ARM64_D29, AArch64::D29},
+      {codeview::RegisterId::ARM64_D30, AArch64::D30},
+      {codeview::RegisterId::ARM64_D31, AArch64::D31},
+      {codeview::RegisterId::ARM64_Q0, AArch64::Q0},
+      {codeview::RegisterId::ARM64_Q1, AArch64::Q1},
+      {codeview::RegisterId::ARM64_Q2, AArch64::Q2},
+      {codeview::RegisterId::ARM64_Q3, AArch64::Q3},
+      {codeview::RegisterId::ARM64_Q4, AArch64::Q4},
+      {codeview::RegisterId::ARM64_Q5, AArch64::Q5},
+      {codeview::RegisterId::ARM64_Q6, AArch64::Q6},
+      {codeview::RegisterId::ARM64_Q7, AArch64::Q7},
+      {codeview::RegisterId::ARM64_Q8, AArch64::Q8},
+      {codeview::RegisterId::ARM64_Q9, AArch64::Q9},
+      {codeview::RegisterId::ARM64_Q10, AArch64::Q10},
+      {codeview::RegisterId::ARM64_Q11, AArch64::Q11},
+      {codeview::RegisterId::ARM64_Q12, AArch64::Q12},
+      {codeview::RegisterId::ARM64_Q13, AArch64::Q13},
+      {codeview::RegisterId::ARM64_Q14, AArch64::Q14},
+      {codeview::RegisterId::ARM64_Q15, AArch64::Q15},
+      {codeview::RegisterId::ARM64_Q16, AArch64::Q16},
+      {codeview::RegisterId::ARM64_Q17, AArch64::Q17},
+      {codeview::RegisterId::ARM64_Q18, AArch64::Q18},
+      {codeview::RegisterId::ARM64_Q19, AArch64::Q19},
+      {codeview::RegisterId::ARM64_Q20, AArch64::Q20},
+      {codeview::RegisterId::ARM64_Q21, AArch64::Q21},
+      {codeview::RegisterId::ARM64_Q22, AArch64::Q22},
+      {codeview::RegisterId::ARM64_Q23, AArch64::Q23},
+      {codeview::RegisterId::ARM64_Q24, AArch64::Q24},
+      {codeview::RegisterId::ARM64_Q25, AArch64::Q25},
+      {codeview::RegisterId::ARM64_Q26, AArch64::Q26},
+      {codeview::RegisterId::ARM64_Q27, AArch64::Q27},
+      {codeview::RegisterId::ARM64_Q28, AArch64::Q28},
+      {codeview::RegisterId::ARM64_Q29, AArch64::Q29},
+      {codeview::RegisterId::ARM64_Q30, AArch64::Q30},
+      {codeview::RegisterId::ARM64_Q31, AArch64::Q31},
+
+  };
+  for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
+    MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
 }
 
 static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
diff --git a/llvm/test/DebugInfo/COFF/register-variables-arm64.ll b/llvm/test/DebugInfo/COFF/register-variables-arm64.ll
new file mode 100644
index 0000000000000..c615e79331d53
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/register-variables-arm64.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -filetype=obj | llvm-readobj --codeview - | FileCheck %s --check-prefix=OBJ
+
+; Generated from:
+; volatile int x;
+; int getint(void);
+; void putint(int);
+; static inline int inlineinc(int a) {
+;   int b = a + 1;
+;   ++x;
+;   return b;
+; }
+; void f(int p) {
+;   if (p) {
+;     int a = getint();
+;     int b = inlineinc(a);
+;     putint(b);
+;   } else {
+;     int c = getint();
+;     putint(c);
+;   }
+; }
+
+
+; OBJ:   DefRangeRegisterRelSym {
+; OBJ:     Kind: S_DEFRANGE_REGISTER_REL (0x1145)
+; OBJ:     BaseRegister: ARM64_SP (0x51)
+; OBJ:     HasSpilledUDTMember: No
+; OBJ:     OffsetInParent: 0
+; OBJ:     BasePointerOffset: 12
+; OBJ:     LocalVariableAddrRange {
+; OBJ:       OffsetStart: .text+0x10
+; OBJ:       ISectStart: 0x0
+; OBJ:       Range: 0x2C
+; OBJ:     }
+; OBJ:   }
+
+; ModuleID = 't.cpp'
+source_filename = "test/DebugInfo/COFF/register-variables-arm64.ll"
+target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-unknown-windows-msvc19.16.27023"
+
+@x = common dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @f(i32 %p) #0 !dbg !12 {
+entry:
+  %p.addr = alloca i32, align 4
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 %p, i32* %p.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %p.addr, metadata !15, metadata !DIExpression()), !dbg !16
+  %0 = load i32, i32* %p.addr, align 4, !dbg !17
+  %tobool = icmp ne i32 %0, 0, !dbg !17
+  br i1 %tobool, label %if.then, label %if.else, !dbg !17
+
+if.then:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata i32* %a, metadata !18, metadata !DIExpression()), !dbg !21
+  %call = call i32 @getint(), !dbg !21
+  store i32 %call, i32* %a, align 4, !dbg !21
+  call void @llvm.dbg.declare(metadata i32* %b, metadata !22, metadata !DIExpression()), !dbg !23
+  %1 = load i32, i32* %a, align 4, !dbg !23
+  %call1 = call i32 @inlineinc(i32 %1), !dbg !23
+  store i32 %call1, i32* %b, align 4, !dbg !23
+  %2 = load i32, i32* %b, align 4, !dbg !24
+  call void @putint(i32 %2), !dbg !24
+  br label %if.end, !dbg !25
+
+if.else:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata i32* %c, metadata !26, metadata !DIExpression()), !dbg !28
+  %call2 = call i32 @getint(), !dbg !28
+  store i32 %call2, i32* %c, align 4, !dbg !28
+  %3 = load i32, i32* %c, align 4, !dbg !29
+  call void @putint(i32 %3), !dbg !29
+  br label %if.end, !dbg !30
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void, !dbg !31
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare dso_local i32 @getint() #2
+
+; Function Attrs: noinline nounwind optnone uwtable
+define internal i32 @inlineinc(i32 %a) #0 !dbg !32 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !35, metadata !DIExpression()), !dbg !36
+  call void @llvm.dbg.declare(metadata i32* %b, metadata !37, metadata !DIExpression()), !dbg !38
+  %0 = load i32, i32* %a.addr, align 4, !dbg !38
+  %add = add nsw i32 %0, 1, !dbg !38
+  store i32 %add, i32* %b, align 4, !dbg !38
+  %1 = load volatile i32, i32* @x, align 4, !dbg !39
+  %inc = add nsw i32 %1, 1, !dbg !39
+  store volatile i32 %inc, i32* @x, align 4, !dbg !39
+  %2 = load i32, i32* %b, align 4, !dbg !40
+  ret i32 %2, !dbg !40
+}
+
+declare dso_local void @putint(i32) #2
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!8, !9, !10}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (trunk 361867) (llvm/trunk 361866)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None)
+!3 = !DIFile(filename: "t.c", directory: "S:\5CLLVM\5Csvn\5Csbr\5Cbin", checksumkind: CSK_MD5, checksum: "734c448e95a6204a439a847ed063e5ce")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"CodeView", i32 1}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 1, !"wchar_size", i32 2}
+!11 = !{!"clang version 9.0.0 (trunk 361867) (llvm/trunk 361866)"}
+!12 = distinct !DISubprogram(name: "f", scope: !3, file: !3, line: 9, type: !13, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !7}
+!15 = !DILocalVariable(name: "p", arg: 1, scope: !12, file: !3, line: 9, type: !7)
+!16 = !DILocation(line: 9, scope: !12)
+!17 = !DILocation(line: 10, scope: !12)
+!18 = !DILocalVariable(name: "a", scope: !19, file: !3, line: 11, type: !7)
+!19 = distinct !DILexicalBlock(scope: !20, file: !3, line: 10)
+!20 = distinct !DILexicalBlock(scope: !12, file: !3, line: 10)
+!21 = !DILocation(line: 11, scope: !19)
+!22 = !DILocalVariable(name: "b", scope: !19, file: !3, line: 12, type: !7)
+!23 = !DILocation(line: 12, scope: !19)
+!24 = !DILocation(line: 13, scope: !19)
+!25 = !DILocation(line: 14, scope: !19)
+!26 = !DILocalVariable(name: "c", scope: !27, file: !3, line: 15, type: !7)
+!27 = distinct !DILexicalBlock(scope: !20, file: !3, line: 14)
+!28 = !DILocation(line: 15, scope: !27)
+!29 = !DILocation(line: 16, scope: !27)
+!30 = !DILocation(line: 17, scope: !27)
+!31 = !DILocation(line: 18, scope: !12)
+!32 = distinct !DISubprogram(name: "inlineinc", scope: !3, file: !3, line: 4, type: !33, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !2, retainedNodes: !4)
+!33 = !DISubroutineType(types: !34)
+!34 = !{!7, !7}
+!35 = !DILocalVariable(name: "a", arg: 1, scope: !32, file: !3, line: 4, type: !7)
+!36 = !DILocation(line: 4, scope: !32)
+!37 = !DILocalVariable(name: "b", scope: !32, file: !3, line: 5, type: !7)
+!38 = !DILocation(line: 5, scope: !32)
+!39 = !DILocation(line: 6, scope: !32)
+!40 = !DILocation(line: 7, scope: !32)
diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 720d7396601d4..e5ae47050678d 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -287,21 +287,39 @@ static std::string formatCookieKind(FrameCookieKind Kind) {
   return formatUnknownEnum(Kind);
 }
 
-static std::string formatRegisterId(RegisterId Id) {
-  switch (Id) {
+static std::string formatRegisterId(RegisterId Id, CPUType Cpu) {
+  if (Cpu == CPUType::ARM64) {
+    switch (Id) {
+#define CV_REGISTERS_ARM64
 #define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
 #include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
 #undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+    default:
+      break;
+    }
+  } else {
+    switch (Id) {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+    default:
+      break;
+    }
   }
   return formatUnknownEnum(Id);
 }
 
-static std::string formatRegisterId(uint16_t Reg16) {
-  return formatRegisterId(RegisterId(Reg16));
+static std::string formatRegisterId(uint16_t Reg16, CPUType Cpu) {
+  return formatRegisterId(RegisterId(Reg16), Cpu);
 }
 
-static std::string formatRegisterId(ulittle16_t &Reg16) {
-  return formatRegisterId(uint16_t(Reg16));
+static std::string formatRegisterId(ulittle16_t &Reg16, CPUType Cpu) {
+  return formatRegisterId(uint16_t(Reg16), Cpu);
 }
 
 static std::string formatRange(LocalVariableAddrRange Range) {
@@ -562,7 +580,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, offset = {1}, offset in parent = {2}, has "
                "spilled udt = {3}",
-               formatRegisterId(Def.Hdr.Register),
+               formatRegisterId(Def.Hdr.Register, CompilationCPU),
                int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
                Def.hasSpilledUDTMember());
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
@@ -575,7 +593,7 @@ Error MinimalSymbolDumper::visitKnownRecord(
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, may have no name = {1}, range start = "
                "{2}, length = {3}",
-               formatRegisterId(DefRangeRegister.Hdr.Register),
+               formatRegisterId(DefRangeRegister.Hdr.Register, CompilationCPU),
                bool(DefRangeRegister.Hdr.MayHaveNoName),
                formatSegmentOffset(DefRangeRegister.Range.ISectStart,
                                    DefRangeRegister.Range.OffsetStart),
@@ -590,7 +608,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
   P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
-               formatRegisterId(Def.Hdr.Register), NoName,
+               formatRegisterId(Def.Hdr.Register, CompilationCPU), NoName,
                uint32_t(Def.Hdr.OffsetInParent));
   P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -617,7 +635,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
 Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
   AutoIndent Indent(P, 7);
   P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
-               FC.CodeOffset, formatRegisterId(FC.Register),
+               FC.CodeOffset, formatRegisterId(FC.Register, CompilationCPU),
                formatCookieKind(FC.CookieKind), FC.Flags);
   return Error::success();
 }
@@ -631,9 +649,10 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
                FP.BytesOfCalleeSavedRegisters,
                formatSegmentOffset(FP.SectionIdOfExceptionHandler,
                                    FP.OffsetOfExceptionHandler));
-  P.formatLine("local fp reg = {0}, param fp reg = {1}",
-               formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU)),
-               formatRegisterId(FP.getParamFramePtrReg(CompilationCPU)));
+  P.formatLine(
+      "local fp reg = {0}, param fp reg = {1}",
+      formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU), CompilationCPU),
+      formatRegisterId(FP.getParamFramePtrReg(CompilationCPU), CompilationCPU));
   P.formatLine("flags = {0}",
                formatFrameProcedureOptions(P.getIndentLevel() + 9, FP.Flags));
   return Error::success();
@@ -741,7 +760,8 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   P.format(" `{0}`", Register.Name);
   AutoIndent Indent(P, 7);
   P.formatLine("register = {0}, type = {1}",
-               formatRegisterId(Register.Register), typeIndex(Register.Index));
+               formatRegisterId(Register.Register, CompilationCPU),
+               typeIndex(Register.Index));
   return Error::success();
 }
 
@@ -829,9 +849,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
                                             RegRelativeSym &RegRel) {
   P.format(" `{0}`", RegRel.Name);
   AutoIndent Indent(P, 7);
-  P.formatLine("type = {0}, register = {1}, offset = {2}",
-               typeIndex(RegRel.Type), formatRegisterId(RegRel.Register),
-               RegRel.Offset);
+  P.formatLine(
+      "type = {0}, register = {1}, offset = {2}", typeIndex(RegRel.Type),
+      formatRegisterId(RegRel.Register, CompilationCPU), RegRel.Offset);
   return Error::success();
 }
 
diff --git a/llvm/tools/llvm-pdbutil/PdbYaml.cpp b/llvm/tools/llvm-pdbutil/PdbYaml.cpp
index ff634fbb04875..a26241967b5ad 100644
--- a/llvm/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/llvm/tools/llvm-pdbutil/PdbYaml.cpp
@@ -50,6 +50,7 @@ template <> struct ScalarEnumerationTraits<llvm::pdb::PDB_Machine> {
     io.enumCase(Value, "SH3DSP", PDB_Machine::SH3DSP);
     io.enumCase(Value, "Thumb", PDB_Machine::Thumb);
     io.enumCase(Value, "WceMipsV2", PDB_Machine::WceMipsV2);
+    io.enumCase(Value, "Arm64", PDB_Machine::Arm64);
   }
 };
 
diff --git a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index a5621fb3086fc..b820ca3339654 100644
--- a/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -138,7 +138,8 @@ void FunctionDumper::start(const PDBSymbolFunc &Symbol, PointerType Pointer) {
 
   if (Symbol.hasFramePointer()) {
     WithColor(Printer, PDB_ColorItem::Register).get()
-        << Symbol.getLocalBasePointerRegisterId();
+        << CPURegister{Symbol.getRawSymbol().getPlatform(),
+                       Symbol.getLocalBasePointerRegisterId()};
   } else {
     WithColor(Printer, PDB_ColorItem::Register).get() << "FPO";
   }

From d8e8722791e4ce6694673d450fdcaf99e5edcbf9 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Sat, 1 Jun 2019 00:08:54 +0000
Subject: [PATCH 0799/1176] [CodeGen] Fix hashing for MO_ExternalSymbol
 MachineOperands.

We were hashing the string pointer, not the string, so two instructions
could be identical (isIdenticalTo), but have different hash codes.

This showed up as a very rare, non-deterministic assertion failure
rehashing a DenseMap constructed by MachineOutliner.  So there's no
"real" testcase, just a unittest which checks that the hash function
behaves correctly.

I'm a little scared fixing this is going to cause a regression in
outlining or MachineCSE, but hopefully we won't run into any issues.

Differential Revision: https://reviews.llvm.org/D61975

llvm-svn: 362281
---
 llvm/lib/CodeGen/MachineOperand.cpp           |  2 +-
 llvm/unittests/CodeGen/MachineOperandTest.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index a83459e691709..9458745733f64 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -361,7 +361,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
   case MachineOperand::MO_ExternalSymbol:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
-                        MO.getSymbolName());
+                        StringRef(MO.getSymbolName()));
   case MachineOperand::MO_GlobalAddress:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
                         MO.getOffset());
diff --git a/llvm/unittests/CodeGen/MachineOperandTest.cpp b/llvm/unittests/CodeGen/MachineOperandTest.cpp
index cfedfa9c8835b..faa471f2260c7 100644
--- a/llvm/unittests/CodeGen/MachineOperandTest.cpp
+++ b/llvm/unittests/CodeGen/MachineOperandTest.cpp
@@ -398,4 +398,14 @@ TEST(MachineOperandTest, PrintPredicate) {
   ASSERT_TRUE(OS.str() == "intpred(eq)");
 }
 
+TEST(MachineOperandTest, HashValue) {
+  char SymName1[] = "test";
+  char SymName2[] = "test";
+  MachineOperand MO1 = MachineOperand::CreateES(SymName1);
+  MachineOperand MO2 = MachineOperand::CreateES(SymName2);
+  ASSERT_NE(SymName1, SymName2);
+  ASSERT_EQ(hash_value(MO1), hash_value(MO2));
+  ASSERT_TRUE(MO1.isIdenticalTo(MO2));
+}
+
 } // end namespace

From 099eca832e7ef22c7229bc707789bc680ea228bd Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Sat, 1 Jun 2019 00:31:58 +0000
Subject: [PATCH 0800/1176] [LoopPred] Handle a subset of NE comparison based
 latches

At the moment, LoopPredication completely bails out if it sees a latch of the form:
%cmp = icmp ne %iv, %N
br i1 %cmp, label %loop, label %exit
OR
%cmp = icmp ne %iv.next, %NPlus1
br i1 %cmp, label %loop, label %exit

This is unfortunate since this is exactly the form that LFTR likes to produce. So, go ahead and recognize simple cases where we can.

For pre-increment loops, we leverage the fact that LFTR likes canonical counters (i.e. those starting at zero) and a (presumed) range fact on RHS to discharge the check trivially.

For post-increment forms, the key insight is in remembering that LFTR had to insert a (N+1) for the RHS. CVP can hopefully prove that add nsw/nuw (if there's appropriate range on N to start with). This leaves us both with the post-inc IV and the RHS involving an nsw/nuw add, and SCEV can discharge that with no problem.

This does still need to be extended to handle non-one steps, or other harder patterns of variable (but range restricted) starting values. That'll come later.

Differential Revision: https://reviews.llvm.org/D62748

llvm-svn: 362282
---
 .../lib/Transforms/Scalar/LoopPredication.cpp | 54 +++++++++++--------
 llvm/test/Transforms/LoopPredication/basic.ll | 20 ++++---
 2 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 15e5b6433af24..1503b5000c14b 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -230,23 +230,23 @@ static cl::opt<bool> PredicateWidenableBranchGuards(
     cl::init(true));
 
 namespace {
-class LoopPredication {
-  /// Represents an induction variable check:
-  ///   icmp Pred, <induction variable>, <loop invariant limit>
-  struct LoopICmp {
-    ICmpInst::Predicate Pred;
-    const SCEVAddRecExpr *IV;
-    const SCEV *Limit;
-    LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
-             const SCEV *Limit)
-        : Pred(Pred), IV(IV), Limit(Limit) {}
-    LoopICmp() {}
-    void dump() {
-      dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
-             << ", Limit = " << *Limit << "\n";
-    }
-  };
+/// Represents an induction variable check:
+///   icmp Pred, <induction variable>, <loop invariant limit>
+struct LoopICmp {
+  ICmpInst::Predicate Pred;
+  const SCEVAddRecExpr *IV;
+  const SCEV *Limit;
+  LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+           const SCEV *Limit)
+    : Pred(Pred), IV(IV), Limit(Limit) {}
+  LoopICmp() {}
+  void dump() {
+    dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
+           << ", Limit = " << *Limit << "\n";
+  }
+};
 
+class LoopPredication {
   AliasAnalysis *AA;
   ScalarEvolution *SE;
   BranchProbabilityInfo *BPI;
@@ -382,7 +382,7 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
   return getLoopPassPreservedAnalyses();
 }
 
-Optional<LoopPredication::LoopICmp>
+Optional<LoopICmp>
 LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
                                Value *RHS) {
   const SCEV *LHSS = SE->getSCEV(LHS);
@@ -428,7 +428,7 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
   return Builder.CreateICmp(Pred, LHSV, RHSV);
 }
 
-Optional<LoopPredication::LoopICmp>
+Optional<LoopICmp>
 LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
 
   auto *LatchType = LatchCheck.IV->getType();
@@ -518,7 +518,7 @@ bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
 }
 
 Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
-    LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
+    LoopICmp LatchCheck, LoopICmp RangeCheck,
     SCEVExpander &Expander, Instruction *Guard) {
   auto *Ty = RangeCheck.IV->getType();
   // Generate the widened condition for the forward loop:
@@ -567,7 +567,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
 }
 
 Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
-    LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
+    LoopICmp LatchCheck, LoopICmp RangeCheck,
     SCEVExpander &Expander, Instruction *Guard) {
   auto *Ty = RangeCheck.IV->getType();
   const SCEV *GuardStart = RangeCheck.IV->getStart();
@@ -614,6 +614,17 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
   return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
 }
 
+static void normalizePredicate(ScalarEvolution *SE, Loop *L,
+                               LoopICmp& RC) {
+  // LFTR canonicalizes checks to the ICMP_NE form instead of an ULT/SLT form.
+  // Normalize back to the ULT/SLT form for ease of handling.
+  if (RC.Pred == ICmpInst::ICMP_NE &&
+      RC.IV->getStepRecurrence(*SE)->isOne() &&
+      SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
+    RC.Pred = ICmpInst::ICMP_ULT;
+}
+
+
 /// If ICI can be widened to a loop invariant condition emits the loop
 /// invariant condition in the loop preheader and return it, otherwise
 /// returns None.
@@ -798,7 +809,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
   return true;
 }
 
-Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
+Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
   using namespace PatternMatch;
 
   BasicBlock *LoopLatch = L->getLoopLatch();
@@ -852,6 +863,7 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
     }
   };
 
+  normalizePredicate(SE, L, *Result);
   if (IsUnsupportedPredicate(Step, Result->Pred)) {
     LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
                       << ")!\n");
diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index e71a8b113a832..6f294b5b67ee5 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -1603,11 +1603,13 @@ define i32 @ne_latch_zext(i32* %array, i32 %length, i16 %n16) {
 ; CHECK-NEXT:  loop.preheader:
 ; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
 ; CHECK-NEXT:    [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[NPLUS1]], [[LENGTH:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[NPLUS1]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
@@ -1637,11 +1639,14 @@ define i32 @ne_latch_zext_preinc(i32* %array, i32 %length, i16 %n16) {
 ; CHECK-LABEL: @ne_latch_zext_preinc(
 ; CHECK-NEXT:  loop.preheader:
 ; CHECK-NEXT:    [[N:%.*]] = zext i16 [[N16:%.*]] to i32
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
@@ -1715,11 +1720,14 @@ define i32 @ne_latch_dom_check_preinc(i32* %array, i32 %length, i32 %n) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sle i32 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[N]], [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 0, [[LENGTH]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i1 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]]
-; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP3]], i32 9) [ "deopt"() ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]

From 2258ecc2aaef25f06e0147e0555c186d9afcb172 Mon Sep 17 00:00:00 2001
From: Tom Tan <Tom.Tan@microsoft.com>
Date: Sat, 1 Jun 2019 02:38:08 +0000
Subject: [PATCH 0801/1176] [COFF, ARM64] Fix location of ARM64 CodeView test

ARM64 CodeView test was incorrectly put under test/DebugInfo/COFF folder which
runs for all all architectures. This fix moves it to a subfolder AArch64 with
lit.local.cfg which specify it supports AArch64 only.

llvm-svn: 362283
---
 .../arm64-register-variables.ll}                               | 2 +-
 llvm/test/DebugInfo/COFF/AArch64/lit.local.cfg                 | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)
 rename llvm/test/DebugInfo/COFF/{register-variables-arm64.ll => AArch64/arm64-register-variables.ll} (98%)
 create mode 100644 llvm/test/DebugInfo/COFF/AArch64/lit.local.cfg

diff --git a/llvm/test/DebugInfo/COFF/register-variables-arm64.ll b/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
similarity index 98%
rename from llvm/test/DebugInfo/COFF/register-variables-arm64.ll
rename to llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
index c615e79331d53..daa7da201d535 100644
--- a/llvm/test/DebugInfo/COFF/register-variables-arm64.ll
+++ b/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -filetype=obj | llvm-readobj --codeview - | FileCheck %s --check-prefix=OBJ
+; RUN: llc < %s -mtriple=arm64-windows -filetype=obj | llvm-readobj --codeview - | FileCheck %s --check-prefix=OBJ
 
 ; Generated from:
 ; volatile int x;
diff --git a/llvm/test/DebugInfo/COFF/AArch64/lit.local.cfg b/llvm/test/DebugInfo/COFF/AArch64/lit.local.cfg
new file mode 100644
index 0000000000000..4004363241deb
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+

From 19afdf74bb91dca9dab9ef2bf1112c6748056c93 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Sat, 1 Jun 2019 03:09:28 +0000
Subject: [PATCH 0802/1176] [LoopPred] Eliminate a redundant/confusing cover
 function [NFC]

llvm-svn: 362284
---
 .../lib/Transforms/Scalar/LoopPredication.cpp | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 1503b5000c14b..dfecacfcb7842 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -257,13 +257,7 @@ class LoopPredication {
   LoopICmp LatchCheck;
 
   bool isSupportedStep(const SCEV* Step);
-  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
-    return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
-                         ICI->getOperand(1));
-  }
-  Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
-                                   Value *RHS);
-
+  Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
   Optional<LoopICmp> parseLoopLatchICmp();
 
   /// Return an insertion point suitable for inserting a safe to speculate
@@ -383,8 +377,11 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
 }
 
 Optional<LoopICmp>
-LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
-                               Value *RHS) {
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
+  auto Pred = ICI->getPredicate();
+  auto *LHS = ICI->getOperand(0);
+  auto *RHS = ICI->getOperand(1);
+
   const SCEV *LHSS = SE->getSCEV(LHS);
   if (isa<SCEVCouldNotCompute>(LHSS))
     return None;
@@ -818,27 +815,30 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
     return None;
   }
 
-  ICmpInst::Predicate Pred;
-  Value *LHS, *RHS;
-  BasicBlock *TrueDest, *FalseDest;
-
-  if (!match(LoopLatch->getTerminator(),
-             m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
-                  FalseDest))) {
+  auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+  if (!BI) {
     LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
     return None;
   }
+  BasicBlock *TrueDest = BI->getSuccessor(0);
+  BasicBlock *FalseDest = BI->getSuccessor(1);
   assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
          "One of the latch's destinations must be the header");
-  if (TrueDest != L->getHeader())
-    Pred = ICmpInst::getInversePredicate(Pred);
 
-  auto Result = parseLoopICmp(Pred, LHS, RHS);
+  auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!ICI || !BI->isConditional()) {
+    LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
+    return None;
+  }
+  auto Result = parseLoopICmp(ICI);
   if (!Result) {
     LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
     return None;
   }
 
+  if (TrueDest != L->getHeader())
+    Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
+
   // Check affine first, so if it's not we don't try to compute the step
   // recurrence.
   if (!Result->IV->isAffine()) {
@@ -869,6 +869,7 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
                       << ")!\n");
     return None;
   }
+
   return Result;
 }
 

From 4e875464df0ff84f7edb5d94be453e7ed28f06ad Mon Sep 17 00:00:00 2001
From: Richard Trieu <rtrieu@google.com>
Date: Sat, 1 Jun 2019 03:32:20 +0000
Subject: [PATCH 0803/1176] Inline variable into assert to fix unused variable
 warning.

llvm-svn: 362285
---
 llvm/lib/Transforms/Scalar/LoopPredication.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index dfecacfcb7842..62a79426e25e1 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -821,9 +821,9 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
     return None;
   }
   BasicBlock *TrueDest = BI->getSuccessor(0);
-  BasicBlock *FalseDest = BI->getSuccessor(1);
-  assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
-         "One of the latch's destinations must be the header");
+  assert(
+      (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
+      "One of the latch's destinations must be the header");
 
   auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
   if (!ICI || !BI->isConditional()) {

From b380846a125d2bd1e9b92882c56941137fa765f6 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Sat, 1 Jun 2019 04:51:26 +0000
Subject: [PATCH 0804/1176] [RuntimeDyld] fix too-small-bitmask error

Summary:
This was flagged in https://www.viva64.com/en/b/0629/ under "Snippet No.
33".

It seems that this statement is doing the standard bitwise trick for
adjusting a value to have a specific alignment.

The issue is that getStubAlignment() returns an unsigned, while DataSize
is declared a uint64_t. The right hand side of the expression is not
extended to 64b before bitwise negation, resulting in the top half of
the mask being 0s, which is not correct for realignment.

Reviewers: lhames, MaskRay

Reviewed By: MaskRay

Subscribers: RKSimon, MaskRay, hiraditya, llvm-commits, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62227

llvm-svn: 362286
---
 llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index e0642adbd31c7..e26e6ce45db4c 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -842,7 +842,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
       // Align DataSize to stub alignment if we have any stubs (PaddingSize will
       // have been increased above to account for this).
       if (StubBufSize > 0)
-        DataSize &= ~(getStubAlignment() - 1);
+        DataSize &= -(uint64_t)getStubAlignment();
     }
 
     LLVM_DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: "

From 48fdb61766a8f953ba16160ddeaae64cb0c603ac Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 1 Jun 2019 06:20:55 +0000
Subject: [PATCH 0805/1176] [X86] Make the X86FoldTablesEmitter functional
 again. Fix the spacing in the output to make it easier to diff.

Fix a few other formatting issues in the manual table. And remove some
old FIXMEs.

llvm-svn: 362287
---
 llvm/lib/Target/X86/X86InstrFoldTables.cpp   |  6 +-
 llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 61 ++++++++++++--------
 2 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 59e62da55f25a..7ab4dc4df7c08 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -248,7 +248,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
   { X86::XOR64rr,     X86::XOR64mr,    0 },
   { X86::XOR8ri,      X86::XOR8mi,     0 },
   { X86::XOR8ri8,     X86::XOR8mi8,    0 },
-  { X86::XOR8rr,      X86::XOR8mr,     0 }
+  { X86::XOR8rr,      X86::XOR8mr,     0 },
 };
 
 static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -614,7 +614,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::SQRTSSr,              X86::SQRTSSm,              0 },
   { X86::T1MSKC32rr,           X86::T1MSKC32rm,           0 },
   { X86::T1MSKC64rr,           X86::T1MSKC64rm,           0 },
-  // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
   { X86::TZCNT16rr,            X86::TZCNT16rm,            0 },
   { X86::TZCNT32rr,            X86::TZCNT32rm,            0 },
   { X86::TZCNT64rr,            X86::TZCNT64rm,            0 },
@@ -649,7 +648,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VCOMISSrr_Int,        X86::VCOMISSrm_Int,        TB_NO_REVERSE },
   { X86::VCVTDQ2PDYrr,         X86::VCVTDQ2PDYrm,         0 },
   { X86::VCVTDQ2PDZ128rr,      X86::VCVTDQ2PDZ128rm,      TB_NO_REVERSE },
-  { X86::VCVTDQ2PDZ256rr,      X86::VCVTDQ2PDZ256rm,       0 },
+  { X86::VCVTDQ2PDZ256rr,      X86::VCVTDQ2PDZ256rm,      0 },
   { X86::VCVTDQ2PDZrr,         X86::VCVTDQ2PDZrm,         0 },
   { X86::VCVTDQ2PDrr,          X86::VCVTDQ2PDrm,          TB_NO_REVERSE },
   { X86::VCVTDQ2PSYrr,         X86::VCVTDQ2PSYrm,         0 },
@@ -1518,7 +1517,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::SUBSDrr_Int,              X86::SUBSDrm_Int,              TB_NO_REVERSE },
   { X86::SUBSSrr,                  X86::SUBSSrm,                  0 },
   { X86::SUBSSrr_Int,              X86::SUBSSrm_Int,              TB_NO_REVERSE },
-  // FIXME: TEST*rr -> swapped      operand of TEST      *mr.
   { X86::UNPCKHPDrr,               X86::UNPCKHPDrm,               TB_ALIGN_16 },
   { X86::UNPCKHPSrr,               X86::UNPCKHPSrm,               TB_ALIGN_16 },
   { X86::UNPCKLPDrr,               X86::UNPCKLPDrm,               TB_ALIGN_16 },
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 0f646470f6aee..2c15e35f234d4 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenTarget.h"
 #include "X86RecognizableInstr.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -108,23 +109,23 @@ class X86FoldTablesEmitter {
                       const CodeGenInstruction *MemInst)
         : RegInst(RegInst), MemInst(MemInst) {}
 
-    friend raw_ostream &operator<<(raw_ostream &OS,
-                                   const X86FoldTableEntry &E) {
-      OS << "{ X86::" << E.RegInst->TheDef->getName()
-         << ", X86::" << E.MemInst->TheDef->getName() << ", ";
+    void print(formatted_raw_ostream &OS) const {
+      OS.indent(2);
+      OS << "{ X86::" << RegInst->TheDef->getName() << ",";
+      OS.PadToColumn(40);
+      OS  << "X86::" << MemInst->TheDef->getName() << ",";
+      OS.PadToColumn(75);
 
-      if (E.IsLoad)
+      if (IsLoad)
         OS << "TB_FOLDED_LOAD | ";
-      if (E.IsStore)
+      if (IsStore)
         OS << "TB_FOLDED_STORE | ";
-      if (E.CannotUnfold)
+      if (CannotUnfold)
         OS << "TB_NO_REVERSE | ";
-      if (E.IsAligned)
-        OS << "TB_ALIGN_" << E.Alignment << " | ";
+      if (IsAligned)
+        OS << "TB_ALIGN_" << Alignment << " | ";
 
       OS << "0 },\n";
-
-      return OS;
     }
   };
 
@@ -144,7 +145,7 @@ class X86FoldTablesEmitter {
   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
 
   // run - Generate the 6 X86 memory fold tables.
-  void run(raw_ostream &OS);
+  void run(formatted_raw_ostream &OS);
 
 private:
   // Decides to which table to add the entry with the given instructions.
@@ -162,21 +163,21 @@ class X86FoldTablesEmitter {
   // Print the given table as a static const C++ array of type
   // X86MemoryFoldTableEntry.
   void printTable(const FoldTable &Table, StringRef TableName,
-                  raw_ostream &OS) {
+                  formatted_raw_ostream &OS) {
     OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
        << "[] = {\n";
 
     for (const X86FoldTableEntry &E : Table)
-      OS << E;
+      E.print(OS);
 
-    OS << "};\n";
+    OS << "};\n\n";
   }
 };
 
 // Return true if one of the instruction's operands is a RST register class
 static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
-    return OpIn.Rec->getName() == "RST";
+    return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
   });
 }
 
@@ -347,10 +348,18 @@ class IsMatch {
             MemRec->getValueAsBit("hasLockPrefix") ||
         RegRec->getValueAsBit("hasNoTrackPrefix") !=
             MemRec->getValueAsBit("hasNoTrackPrefix") ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
-                        MemRec->getValueAsBitsInit("EVEX_LL")) ||
-        !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
-                        MemRec->getValueAsBitsInit("VEX_WPrefix")) ||
+        RegRec->getValueAsBit("hasVEX_L") !=
+            MemRec->getValueAsBit("hasVEX_L") ||
+        RegRec->getValueAsBit("hasEVEX_L2") !=
+            MemRec->getValueAsBit("hasEVEX_L2") ||
+        RegRec->getValueAsBit("ignoresVEX_L") !=
+            MemRec->getValueAsBit("ignoresVEX_L") ||
+        RegRec->getValueAsBit("HasVEX_W") !=
+            MemRec->getValueAsBit("HasVEX_W") ||
+        RegRec->getValueAsBit("IgnoresVEX_W") !=
+            MemRec->getValueAsBit("IgnoresVEX_W") ||
+        RegRec->getValueAsBit("EVEX_W1_VEX_W0") !=
+            MemRec->getValueAsBit("EVEX_W1_VEX_W0") ||
         // Instruction's format - The register form's "Form" field should be
         // the opposite of the memory form's "Form" field.
         !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
@@ -423,6 +432,7 @@ class IsMatch {
         (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
         (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
         (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
+        (MemFormNum == X86Local::MRMXmCC && RegFormNum == X86Local::MRMXrCC) ||
         (MemFormNum == X86Local::MRMDestMem &&
          RegFormNum == X86Local::MRMDestReg) ||
         (MemFormNum == X86Local::MRMSrcMem &&
@@ -430,7 +440,9 @@ class IsMatch {
         (MemFormNum == X86Local::MRMSrcMem4VOp3 &&
          RegFormNum == X86Local::MRMSrcReg4VOp3) ||
         (MemFormNum == X86Local::MRMSrcMemOp4 &&
-         RegFormNum == X86Local::MRMSrcRegOp4))
+         RegFormNum == X86Local::MRMSrcRegOp4) ||
+        (MemFormNum == X86Local::MRMSrcMemCC &&
+         RegFormNum == X86Local::MRMSrcRegCC))
       return true;
 
     return false;
@@ -560,7 +572,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
   return;
 }
 
-void X86FoldTablesEmitter::run(raw_ostream &OS) {
+void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
   emitSourceFileHeader("X86 fold tables", OS);
 
   // Holds all memory instructions
@@ -641,7 +653,7 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
                  &(Target.getInstruction(MemInstIter)), Entry.Strategy);
   }
 
-  // Print all tables to raw_ostream OS.
+  // Print all tables.
   printTable(Table2Addr, "Table2Addr", OS);
   printTable(Table0, "Table0", OS);
   printTable(Table1, "Table1", OS);
@@ -652,7 +664,8 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
 
 namespace llvm {
 
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) {
+void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &o) {
+  formatted_raw_ostream OS(o);
   X86FoldTablesEmitter(RK).run(OS);
 }
 } // namespace llvm

From c288a19bb711306dd0ceae98525d8861d3894175 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 1 Jun 2019 06:20:59 +0000
Subject: [PATCH 0806/1176] [X86] Add AVX512BF16 and AVX512VP2INTERSECT
 instructions to the loading folding tables.

llvm-svn: 362288
---
 llvm/lib/Target/X86/X86InstrFoldTables.cpp    |  33 ++
 .../CodeGen/X86/stack-folding-avx512bf16.ll   | 297 ++++++++++++++++++
 .../X86/stack-folding-avx512vp2intersect.ll   |  88 ++++++
 3 files changed, 418 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll
 create mode 100644 llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll

diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 7ab4dc4df7c08..0bf9f851dffb6 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -656,6 +656,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
   { X86::VCVTDQ2PSZ256rr,      X86::VCVTDQ2PSZ256rm,      0 },
   { X86::VCVTDQ2PSZrr,         X86::VCVTDQ2PSZrm,         0 },
   { X86::VCVTDQ2PSrr,          X86::VCVTDQ2PSrm,          0 },
+  { X86::VCVTNEPS2BF16Z128rr,  X86::VCVTNEPS2BF16Z128rm,  0 },
+  { X86::VCVTNEPS2BF16Z256rr,  X86::VCVTNEPS2BF16Z256rm,  0 },
+  { X86::VCVTNEPS2BF16Zrr,     X86::VCVTNEPS2BF16Zrm,     0 },
   { X86::VCVTPD2DQYrr,         X86::VCVTPD2DQYrm,         0 },
   { X86::VCVTPD2DQZ128rr,      X86::VCVTPD2DQZ128rm,      0 },
   { X86::VCVTPD2DQZ256rr,      X86::VCVTPD2DQZ256rm,      0 },
@@ -1637,6 +1640,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::VCVTDQ2PSZ128rrkz,        X86::VCVTDQ2PSZ128rmkz,        0 },
   { X86::VCVTDQ2PSZ256rrkz,        X86::VCVTDQ2PSZ256rmkz,        0 },
   { X86::VCVTDQ2PSZrrkz,           X86::VCVTDQ2PSZrmkz,           0 },
+  { X86::VCVTNE2PS2BF16Z128rr,     X86::VCVTNE2PS2BF16Z128rm,     0 },
+  { X86::VCVTNE2PS2BF16Z256rr,     X86::VCVTNE2PS2BF16Z256rm,     0 },
+  { X86::VCVTNE2PS2BF16Zrr,        X86::VCVTNE2PS2BF16Zrm,        0 },
+  { X86::VCVTNEPS2BF16Z128rrkz,    X86::VCVTNEPS2BF16Z128rmkz,    0 },
+  { X86::VCVTNEPS2BF16Z256rrkz,    X86::VCVTNEPS2BF16Z256rmkz,    0 },
+  { X86::VCVTNEPS2BF16Zrrkz,       X86::VCVTNEPS2BF16Zrmkz,       0 },
   { X86::VCVTPD2DQZ128rrkz,        X86::VCVTPD2DQZ128rmkz,        0 },
   { X86::VCVTPD2DQZ256rrkz,        X86::VCVTPD2DQZ256rmkz,        0 },
   { X86::VCVTPD2DQZrrkz,           X86::VCVTPD2DQZrmkz,           0 },
@@ -2012,6 +2021,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
   { X86::VORPSZ256rr,              X86::VORPSZ256rm,              0 },
   { X86::VORPSZrr,                 X86::VORPSZrm,                 0 },
   { X86::VORPSrr,                  X86::VORPSrm,                  0 },
+  { X86::VP2INTERSECTDZ128rr,      X86::VP2INTERSECTDZ128rm,      0 },
+  { X86::VP2INTERSECTDZ256rr,      X86::VP2INTERSECTDZ256rm,      0 },
+  { X86::VP2INTERSECTDZrr,         X86::VP2INTERSECTDZrm,         0 },
+  { X86::VP2INTERSECTQZ128rr,      X86::VP2INTERSECTQZ128rm,      0 },
+  { X86::VP2INTERSECTQZ256rr,      X86::VP2INTERSECTQZ256rm,      0 },
+  { X86::VP2INTERSECTQZrr,         X86::VP2INTERSECTQZrm,         0 },
   { X86::VPABSBZ128rrkz,           X86::VPABSBZ128rmkz,           0 },
   { X86::VPABSBZ256rrkz,           X86::VPABSBZ256rmkz,           0 },
   { X86::VPABSBZrrkz,              X86::VPABSBZrmkz,              0 },
@@ -3014,6 +3029,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
   { X86::VCVTDQ2PSZ128rrk,           X86::VCVTDQ2PSZ128rmk,           0 },
   { X86::VCVTDQ2PSZ256rrk,           X86::VCVTDQ2PSZ256rmk,           0 },
   { X86::VCVTDQ2PSZrrk,              X86::VCVTDQ2PSZrmk,              0 },
+  { X86::VCVTNE2PS2BF16Z128rrkz,     X86::VCVTNE2PS2BF16Z128rmkz,     0 },
+  { X86::VCVTNE2PS2BF16Z256rrkz,     X86::VCVTNE2PS2BF16Z256rmkz,     0 },
+  { X86::VCVTNE2PS2BF16Zrrkz,        X86::VCVTNE2PS2BF16Zrmkz,        0 },
+  { X86::VCVTNEPS2BF16Z128rrk,       X86::VCVTNEPS2BF16Z128rmk,       0 },
+  { X86::VCVTNEPS2BF16Z256rrk,       X86::VCVTNEPS2BF16Z256rmk,       0 },
+  { X86::VCVTNEPS2BF16Zrrk,          X86::VCVTNEPS2BF16Zrmk,          0 },
   { X86::VCVTPD2DQZ128rrk,           X86::VCVTPD2DQZ128rmk,           0 },
   { X86::VCVTPD2DQZ256rrk,           X86::VCVTPD2DQZ256rmk,           0 },
   { X86::VCVTPD2DQZrrk,              X86::VCVTPD2DQZrmk,              0 },
@@ -3102,6 +3123,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
   { X86::VDIVPSZrrkz,                X86::VDIVPSZrmkz,                0 },
   { X86::VDIVSDZrr_Intkz,            X86::VDIVSDZrm_Intkz,            TB_NO_REVERSE },
   { X86::VDIVSSZrr_Intkz,            X86::VDIVSSZrm_Intkz,            TB_NO_REVERSE },
+  { X86::VDPBF16PSZ128r,             X86::VDPBF16PSZ128m,             0 },
+  { X86::VDPBF16PSZ256r,             X86::VDPBF16PSZ256m,             0 },
+  { X86::VDPBF16PSZr,                X86::VDPBF16PSZm,                0 },
   { X86::VEXP2PDZrk,                 X86::VEXP2PDZmk,                 0 },
   { X86::VEXP2PSZrk,                 X86::VEXP2PSZmk,                 0 },
   { X86::VEXPANDPDZ128rrk,           X86::VEXPANDPDZ128rmk,           TB_NO_REVERSE },
@@ -4316,6 +4340,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
   { X86::VANDPSZ128rrk,             X86::VANDPSZ128rmk,             0 },
   { X86::VANDPSZ256rrk,             X86::VANDPSZ256rmk,             0 },
   { X86::VANDPSZrrk,                X86::VANDPSZrmk,                0 },
+  { X86::VCVTNE2PS2BF16Z128rrk,     X86::VCVTNE2PS2BF16Z128rmk,     0 },
+  { X86::VCVTNE2PS2BF16Z256rrk,     X86::VCVTNE2PS2BF16Z256rmk,     0 },
+  { X86::VCVTNE2PS2BF16Zrrk,        X86::VCVTNE2PS2BF16Zrmk,        0 },
   { X86::VCVTSD2SSZrr_Intk,         X86::VCVTSD2SSZrm_Intk,         TB_NO_REVERSE },
   { X86::VCVTSS2SDZrr_Intk,         X86::VCVTSS2SDZrm_Intk,         TB_NO_REVERSE },
   { X86::VDBPSADBWZ128rrik,         X86::VDBPSADBWZ128rmik,         0 },
@@ -4329,6 +4356,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
   { X86::VDIVPSZrrk,                X86::VDIVPSZrmk,                0 },
   { X86::VDIVSDZrr_Intk,            X86::VDIVSDZrm_Intk,            TB_NO_REVERSE },
   { X86::VDIVSSZrr_Intk,            X86::VDIVSSZrm_Intk,            TB_NO_REVERSE },
+  { X86::VDPBF16PSZ128rk,           X86::VDPBF16PSZ128mk,           0 },
+  { X86::VDPBF16PSZ128rkz,          X86::VDPBF16PSZ128mkz,          0 },
+  { X86::VDPBF16PSZ256rk,           X86::VDPBF16PSZ256mk,           0 },
+  { X86::VDPBF16PSZ256rkz,          X86::VDPBF16PSZ256mkz,          0 },
+  { X86::VDPBF16PSZrk,              X86::VDPBF16PSZmk,              0 },
+  { X86::VDPBF16PSZrkz,             X86::VDPBF16PSZmkz,             0 },
   { X86::VFIXUPIMMPDZ128rrik,       X86::VFIXUPIMMPDZ128rmik,       0 },
   { X86::VFIXUPIMMPDZ128rrikz,      X86::VFIXUPIMMPDZ128rmikz,      0 },
   { X86::VFIXUPIMMPDZ256rrik,       X86::VFIXUPIMMPDZ256rmik,       0 },
diff --git a/llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll b/llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll
new file mode 100644
index 0000000000000..16c0c5be00a44
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-folding-avx512bf16.ll
@@ -0,0 +1,297 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16,+avx512vl < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <32 x i16> @stack_fold_cvtne2ps2bf16(<16 x float> %a0, <16 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_cvtne2ps2bf16:
+  ;CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1)
+  ret <32 x i16> %2
+}
+declare <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float>, <16 x float>)
+
+define <32 x i16> @stack_fold_cvtne2ps2bf16_mask(<16 x float> %a0, <16 x float> %a1, <32 x i16>* %passthru, i32 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_mask:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1)
+  %3 = bitcast i32 %U to <32 x i1>
+  ; load needed to keep the operation from being scheduled above the asm block
+  %4 = load <32 x i16>, <32 x i16>* %passthru
+  %5 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> %4
+  ret <32 x i16> %5
+}
+
+define <32 x i16> @stack_fold_cvtne2ps2bf16_maskz(<16 x float> %a0, <16 x float> %a1, i32 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_maskz:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a0, <16 x float> %a1)
+  %3 = bitcast i32 %U to <32 x i1>
+  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
+  ret <32 x i16> %4
+}
+
+define <16 x i16> @stack_fold_cvtneps2bf16(<16 x float> %a0) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16:
+; CHECK:       vcvtneps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %a0)
+  ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float>)
+
+define <16 x i16> @stack_fold_cvtneps2bf16_mask(<16 x float> %a0, <16 x i16>* %passthru, i16 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_mask:
+; CHECK:       vcvtneps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %a0)
+  %3 = bitcast i16 %U to <16 x i1>
+  ; load needed to keep the operation from being scheduled above the asm block
+  %4 = load <16 x i16>, <16 x i16>* %passthru
+  %5 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> %4
+  ret <16 x i16> %5
+}
+
+define <16 x i16> @stack_fold_cvtneps2bf16_maskz(<16 x float> %a0, i16 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_maskz:
+; CHECK:       vcvtneps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %a0)
+  %3 = bitcast i16 %U to <16 x i1>
+  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
+  ret <16 x i16> %4
+}
+
+define <16 x float> @stack_fold_vdpbf16ps(<16 x float> %a0, <16 x i32> %a1, <16 x i32> %a2) {
+; CHECK-LABEL: stack_fold_vdpbf16ps:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %a0, <16 x i32> %a1, <16 x i32> %a2)
+  ret <16 x float> %2
+}
+declare <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float>, <16 x i32>, <16 x i32>)
+
+define <16 x float> @stack_fold_vdpbf16ps_mask(<16 x float>* %a0, <16 x i32> %a1, <16 x i32> %a2, <16 x float>* %passthru, i16 %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_mask:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  ; load needed to keep the operation from being scheduled above the asm block
+  %2 = load <16 x float>, <16 x float>* %a0
+  %3 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %2, <16 x i32> %a1, <16 x i32> %a2)
+  %4 = bitcast i16 %U to <16 x i1>
+  %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %2
+  ret <16 x float> %5
+}
+
+define <16 x float> @stack_fold_vdpbf16ps_maskz(<16 x float> %a0, <16 x i32> %a1, <16 x i32> %a2, i16* %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_maskz:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %a0, <16 x i32> %a1, <16 x i32> %a2)
+  %3 = load i16, i16* %U
+  %4 = bitcast i16 %3 to <16 x i1>
+  %5 = select <16 x i1> %4, <16 x float> %2, <16 x float> zeroinitializer
+  ret <16 x float> %5
+}
+
+
+
+define <16 x i16> @stack_fold_cvtne2ps2bf16_ymm(<8 x float> %a0, <8 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_cvtne2ps2bf16_ymm:
+  ;CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1)
+  ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float>, <8 x float>)
+
+define <16 x i16> @stack_fold_cvtne2ps2bf16_mask_ymm(<8 x float> %a0, <8 x float> %a1, <16 x i16>* %passthru, i16 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_mask_ymm:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1)
+  %3 = bitcast i16 %U to <16 x i1>
+  ; load needed to keep the operation from being scheduled above the asm block
+  %4 = load <16 x i16>, <16 x i16>* %passthru
+  %5 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> %4
+  ret <16 x i16> %5
+}
+
+define <16 x i16> @stack_fold_cvtne2ps2bf16_maskz_ymm(<8 x float> %a0, <8 x float> %a1, i16 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_maskz_ymm:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %a0, <8 x float> %a1)
+  %3 = bitcast i16 %U to <16 x i1>
+  %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> zeroinitializer
+  ret <16 x i16> %4
+}
+
+define <8 x i16> @stack_fold_cvtneps2bf16_ymm(<8 x float> %a0) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_ymm:
+; CHECK:       vcvtneps2bf16y {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %a0)
+  ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float>)
+
+define <8 x i16> @stack_fold_cvtneps2bf16_mask_ymm(<8 x float> %a0, <8 x i16>* %passthru, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_mask_ymm:
+; CHECK:       vcvtneps2bf16y {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %a0)
+  %3 = bitcast i8 %U to <8 x i1>
+  ; load needed to keep the operation from being scheduled above the asm block
+  %4 = load <8 x i16>, <8 x i16>* %passthru
+  %5 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> %4
+  ret <8 x i16> %5
+}
+
+define <8 x i16> @stack_fold_cvtneps2bf16_maskz_ymm(<8 x float> %a0, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_maskz_ymm:
+; CHECK:       vcvtneps2bf16y {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %a0)
+  %3 = bitcast i8 %U to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
+  ret <8 x i16> %4
+}
+
+define <8 x float> @stack_fold_vdpbf16ps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> %a2) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_ymm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %a0, <8 x i32> %a1, <8 x i32> %a2)
+  ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float>, <8 x i32>, <8 x i32>)
+
+define <8 x float> @stack_fold_vdpbf16ps_mask_ymm(<8 x float>* %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x float>* %passthru, i8 %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_mask_ymm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  ; load needed to keep the operation from being scheduled above the asm block
+  %2 = load <8 x float>, <8 x float>* %a0
+  %3 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %2, <8 x i32> %a1, <8 x i32> %a2)
+  %4 = bitcast i8 %U to <8 x i1>
+  %5 = select <8 x i1> %4, <8 x float> %3, <8 x float> %2
+  ret <8 x float> %5
+}
+
+define <8 x float> @stack_fold_vdpbf16ps_maskz_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> %a2, i8* %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_maskz_ymm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %a0, <8 x i32> %a1, <8 x i32> %a2)
+  %3 = load i8, i8* %U
+  %4 = bitcast i8 %3 to <8 x i1>
+  %5 = select <8 x i1> %4, <8 x float> %2, <8 x float> zeroinitializer
+  ret <8 x float> %5
+}
+
+
+
+
+define <8 x i16> @stack_fold_cvtne2ps2bf16_xmm(<4 x float> %a0, <4 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_cvtne2ps2bf16_xmm:
+  ;CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1)
+  ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float>, <4 x float>)
+
+define <8 x i16> @stack_fold_cvtne2ps2bf16_mask_xmm(<4 x float> %a0, <4 x float> %a1, <8 x i16>* %passthru, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_mask_xmm:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1)
+  %3 = bitcast i8 %U to <8 x i1>
+  ; load needed to keep the operation from being scheduled above the asm block
+  %4 = load <8 x i16>, <8 x i16>* %passthru
+  %5 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> %4
+  ret <8 x i16> %5
+}
+
+define <8 x i16> @stack_fold_cvtne2ps2bf16_maskz_xmm(<4 x float> %a0, <4 x float> %a1, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtne2ps2bf16_maskz_xmm:
+; CHECK:       vcvtne2ps2bf16 {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %a0, <4 x float> %a1)
+  %3 = bitcast i8 %U to <8 x i1>
+  %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> zeroinitializer
+  ret <8 x i16> %4
+}
+
+define <8 x i16> @stack_fold_cvtneps2bf16_xmm(<4 x float> %a0) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_xmm:
+; CHECK:       vcvtneps2bf16x {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %a0, <8 x i16> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float>, <8 x i16>, <4 x i1>)
+
+define <8 x i16> @stack_fold_cvtneps2bf16_mask_xmm(<4 x float> %a0, <8 x i16>* %passthru, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_mask_xmm:
+; CHECK:       vcvtneps2bf16x {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <8 x i16>, <8 x i16>* %passthru
+  %3 = bitcast i8 %U to <8 x i1>
+  %4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %5 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %a0, <8 x i16> %2, <4 x i1> %4)
+  ret <8 x i16> %5
+}
+
+define <8 x i16> @stack_fold_cvtneps2bf16_maskz_xmm(<4 x float> %a0, i8 %U) {
+; CHECK-LABEL: stack_fold_cvtneps2bf16_maskz_xmm:
+; CHECK:       vcvtneps2bf16x {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = bitcast i8 %U to <8 x i1>
+  %3 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %4 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %a0, <8 x i16> zeroinitializer, <4 x i1> %3)
+  ret <8 x i16> %4
+}
+
+define <4 x float> @stack_fold_vdpbf16ps_xmm(<4 x float> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_xmm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %a0, <4 x i32> %a1, <4 x i32> %a2)
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float>, <4 x i32>, <4 x i32>)
+
+define <4 x float> @stack_fold_vdpbf16ps_mask_xmm(<4 x float>* %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x float>* %passthru, i8 %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_mask_xmm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  ; load needed to keep the operation from being scheduled above the asm block
+  %2 = load <4 x float>, <4 x float>* %a0
+  %3 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %2, <4 x i32> %a1, <4 x i32> %a2)
+  %4 = bitcast i8 %U to <8 x i1>
+  %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %6 = select <4 x i1> %5, <4 x float> %3, <4 x float> %2
+  ret <4 x float> %6
+}
+
+define <4 x float> @stack_fold_vdpbf16ps_maskz_xmm(<4 x float> %a0, <4 x i32> %a1, <4 x i32> %a2, i8* %U) {
+; CHECK-LABEL: stack_fold_vdpbf16ps_maskz_xmm:
+; CHECK:       vdpbf16ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{{%k[0-7]}}} {z} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %a0, <4 x i32> %a1, <4 x i32> %a2)
+  %3 = load i8, i8* %U
+  %4 = bitcast i8 %3 to <8 x i1>
+  %5 = shufflevector <8 x i1> %4, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %6 = select <4 x i1> %5, <4 x float> %2, <4 x float> zeroinitializer
+  ret <4 x float> %6
+}
diff --git a/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
new file mode 100644
index 0000000000000..1d60504b5ac08
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-folding-avx512vp2intersect.ll
@@ -0,0 +1,88 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vp2intersect,+avx512vl < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define void @stack_fold_vp2intersectd(<16 x i32>* %a, <16 x i32> %b, <16 x i1>* nocapture %m0, <16 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectd:
+; CHECK:    vp2intersectd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <16 x i32>, <16 x i32>* %a
+  %3 = tail call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %2, <16 x i32> %b)
+  %4 = extractvalue { <16 x i1>, <16 x i1> } %3, 0
+  store <16 x i1> %4, <16 x i1>* %m0
+  %5 = extractvalue { <16 x i1>, <16 x i1> } %3, 1
+  store <16 x i1> %5, <16 x i1>* %m1
+  ret void
+}
+declare { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32>, <16 x i32>)
+
+define void @stack_fold_vp2intersectq(<8 x i64>* %a, <8 x i64> %b, <8 x i1>* nocapture %m0, <8 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectq:
+; CHECK:    vp2intersectq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <8 x i64>, <8 x i64>* %a
+  %3 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %2, <8 x i64> %b)
+  %4 = extractvalue { <8 x i1>, <8 x i1> } %3, 0
+  store <8 x i1> %4, <8 x i1>* %m0
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %3, 1
+  store <8 x i1> %5, <8 x i1>* %m1
+  ret void
+}
+declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64>, <8 x i64>)
+
+define void @stack_fold_vp2intersectd_256(<8 x i32>* %a, <8 x i32> %b, <8 x i1>* nocapture %m0, <8 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectd_256:
+; CHECK:    vp2intersectd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <8 x i32>, <8 x i32>* %a
+  %3 = tail call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %2, <8 x i32> %b)
+  %4 = extractvalue { <8 x i1>, <8 x i1> } %3, 0
+  store <8 x i1> %4, <8 x i1>* %m0
+  %5 = extractvalue { <8 x i1>, <8 x i1> } %3, 1
+  store <8 x i1> %5, <8 x i1>* %m1
+  ret void
+}
+declare { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32>, <8 x i32>)
+
+define void @stack_fold_vp2intersectq_256(<4 x i64>* %a, <4 x i64> %b, <4 x i1>* nocapture %m0, <4 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectq_256:
+; CHECK:    vp2intersectq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <4 x i64>, <4 x i64>* %a
+  %3 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %2, <4 x i64> %b)
+  %4 = extractvalue { <4 x i1>, <4 x i1> } %3, 0
+  store <4 x i1> %4, <4 x i1>* %m0
+  %5 = extractvalue { <4 x i1>, <4 x i1> } %3, 1
+  store <4 x i1> %5, <4 x i1>* %m1
+  ret void
+}
+declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64>, <4 x i64>)
+
+define void @stack_fold_vp2intersectd_128(<4 x i32>* %a, <4 x i32> %b, <4 x i1>* nocapture %m0, <4 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectd_128:
+; CHECK:    vp2intersectd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <4 x i32>, <4 x i32>* %a
+  %3 = tail call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %2, <4 x i32> %b)
+  %4 = extractvalue { <4 x i1>, <4 x i1> } %3, 0
+  store <4 x i1> %4, <4 x i1>* %m0
+  %5 = extractvalue { <4 x i1>, <4 x i1> } %3, 1
+  store <4 x i1> %5, <4 x i1>* %m1
+  ret void
+}
+declare { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32>, <4 x i32>)
+
+define void @stack_fold_vp2intersectq_128(<2 x i64>* %a, <2 x i64> %b, <2 x i1>* nocapture %m0, <2 x i1>* nocapture %m1) {
+; CHECK-LABEL: stack_fold_vp2intersectq_128:
+; CHECK:    vp2intersectq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%k[0-7]}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = load <2 x i64>, <2 x i64>* %a
+  %3 = tail call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %2, <2 x i64> %b)
+  %4 = extractvalue { <2 x i1>, <2 x i1> } %3, 0
+  store <2 x i1> %4, <2 x i1>* %m0
+  %5 = extractvalue { <2 x i1>, <2 x i1> } %3, 1
+  store <2 x i1> %5, <2 x i1>* %m1
+  ret void
+}
+declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)

From b3498346fd2c98fb09687e83bd645a75105b6a3d Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Sat, 1 Jun 2019 07:36:57 +0000
Subject: [PATCH 0807/1176] [llvm-objcopy] test commit

llvm-svn: 362289
---
 llvm/tools/llvm-objcopy/llvm-objcopy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index db29128f95eb5..21f0b7f231817 100644
--- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -8,9 +8,9 @@
 
 #include "llvm-objcopy.h"
 #include "Buffer.h"
-#include "COFF/COFFObjcopy.h"
 #include "CopyConfig.h"
 #include "ELF/ELFObjcopy.h"
+#include "COFF/COFFObjcopy.h"
 #include "MachO/MachOObjcopy.h"
 
 #include "llvm/ADT/STLExtras.h"

From 038e3b9f57434068f6bfa79cde577813b463d8e6 Mon Sep 17 00:00:00 2001
From: Dylan McKay <me@dylanmckay.io>
Date: Sat, 1 Jun 2019 09:18:26 +0000
Subject: [PATCH 0808/1176] Extend the DWARFExpression address handling to
 support 16-bit addresses

This allows the DWARFExpression class to handle addresses without
crashing on targets with 16-bit pointers like AVR.

This is required in order to generate assembly from clang via the '-S'
flag.

This fixes an error with the following message:

clang: llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h:132: llvm::DWARFExpression::DWARFExpression(llvm::DataExtractor, uint16_t, uint8_t):
       Assertion `AddressSize == 8 || AddressSize == 4' failed.
llvm-svn: 362290
---
 .../llvm/DebugInfo/DWARF/DWARFExpression.h    |  2 +-
 llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp  | 12 ++++++++----
 llvm/test/MC/AVR/dwarf-asm-no-code.s          | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/MC/AVR/dwarf-asm-no-code.s

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 21b138676bd3d..f066dd58d6069 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -129,7 +129,7 @@ class DWARFExpression {
 
   DWARFExpression(DataExtractor Data, uint16_t Version, uint8_t AddressSize)
       : Data(Data), Version(Version), AddressSize(AddressSize) {
-    assert(AddressSize == 8 || AddressSize == 4);
+    assert(AddressSize == 8 || AddressSize == 4 || AddressSize == 2);
   }
 
   iterator begin() const { return iterator(this, 0); }
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 133b85a096985..644075011a3d2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -155,17 +155,21 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
     case Operation::SizeAddr:
       if (AddressSize == 8) {
         Operands[Operand] = Data.getU64(&Offset);
-      } else {
-        assert(AddressSize == 4);
+      } else if (AddressSize == 4) {
         Operands[Operand] = Data.getU32(&Offset);
+      } else {
+        assert(AddressSize == 2);
+        Operands[Operand] = Data.getU16(&Offset);
       }
       break;
     case Operation::SizeRefAddr:
       if (getRefAddrSize(AddressSize, Version) == 8) {
         Operands[Operand] = Data.getU64(&Offset);
-      } else {
-        assert(getRefAddrSize(AddressSize, Version) == 4);
+      } else if (getRefAddrSize(AddressSize, Version) == 4) {
         Operands[Operand] = Data.getU32(&Offset);
+      } else {
+        assert(getRefAddrSize(AddressSize, Version) == 2);
+        Operands[Operand] = Data.getU16(&Offset);
       }
       break;
     case Operation::SizeLEB:
diff --git a/llvm/test/MC/AVR/dwarf-asm-no-code.s b/llvm/test/MC/AVR/dwarf-asm-no-code.s
new file mode 100644
index 0000000000000..cbe0bf49ad1e3
--- /dev/null
+++ b/llvm/test/MC/AVR/dwarf-asm-no-code.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc < %s -triple=avr -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+// If there is no code in an assembly file, no debug info is produced
+
+.section .data, "aw"
+a:
+.long 42
+
+// DWARF: ELF32-avr
+// DWARF-NOT: contents:
+// DWARF: .debug_line contents:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_info]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_aranges]:

From 2b1d799a595213fd6524a0222bf59a8eb9b04b0a Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 1 Jun 2019 09:40:09 +0000
Subject: [PATCH 0809/1176] [IndVarSimplify] Add additional PR33181 tests; NFC

Two more tests with a switch to a dynamically dead IV, with poison
occuring on the first or second iteration.

llvm-svn: 362291
---
 .../{pr31181.ll => lftr-pr31181.ll}           | 94 ++++++++++++++++++-
 1 file changed, 90 insertions(+), 4 deletions(-)
 rename llvm/test/Transforms/IndVarSimplify/{pr31181.ll => lftr-pr31181.ll} (72%)

diff --git a/llvm/test/Transforms/IndVarSimplify/pr31181.ll b/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
similarity index 72%
rename from llvm/test/Transforms/IndVarSimplify/pr31181.ll
rename to llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
index 453bb84192983..8316d8dbce2fe 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr31181.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
@@ -180,8 +180,8 @@ exit:
 
 ; Adopted from D30446.
 ; We switch from %iv to %iv2 and need to change nsw to nuw in the process.
-define i32 @switch_to_different_iv_pos_inc(i32* %ptr, i1 %always_false) {
-; CHECK-LABEL: @switch_to_different_iv_pos_inc(
+define i32 @switch_to_different_iv_post_inc(i32* %ptr, i1 %always_false) {
+; CHECK-LABEL: @switch_to_different_iv_post_inc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
@@ -215,7 +215,7 @@ never_taken:
 
 always_taken:
   %iv.inc = add nsw i32 %iv, 1
-  %iv2.inc = add nsw i32 %iv2, 1
+  %iv2.inc = add nuw nsw i32 %iv2, 1
   %cmp = icmp slt i32 %iv, 20
   br i1 %cmp, label %for.cond, label %for.end
 
@@ -266,9 +266,95 @@ never_taken:
 
 always_taken:
   %iv.inc = add nsw i32 %iv, 1
-  %iv2.inc = add nsw i32 %iv2, 1
+  %iv2.inc = add nuw nsw i32 %iv2, 1
   br label %for.cond
 
 for.end:
   ret i32 0
 }
+
+define i32 @switch_to_different_iv_first_poison(i32* %ptr, i1 %always_false) {
+; CHECK-LABEL: @switch_to_different_iv_first_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[IV2:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[IV2_INC:%.*]], [[ALWAYS_TAKEN:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ -2147483648, [[ENTRY]] ], [ [[IV_INC:%.*]], [[ALWAYS_TAKEN]] ]
+; CHECK-NEXT:    store i32 [[IV]], i32* [[PTR:%.*]]
+; CHECK-NEXT:    br i1 [[ALWAYS_FALSE:%.*]], label [[NEVER_TAKEN:%.*]], label [[ALWAYS_TAKEN]]
+; CHECK:       never_taken:
+; CHECK-NEXT:    store volatile i32 [[IV2]], i32* [[PTR]]
+; CHECK-NEXT:    br label [[ALWAYS_TAKEN]]
+; CHECK:       always_taken:
+; CHECK-NEXT:    [[IV2_INC]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-NEXT:    [[IV_INC]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV2_INC]], -2147483628
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %iv2 = phi i32 [ -1, %entry ], [ %iv2.inc, %always_taken ]
+  %iv = phi i32 [ -2147483648, %entry ], [ %iv.inc, %always_taken ]
+  store i32 %iv, i32* %ptr
+  br i1 %always_false, label %never_taken, label %always_taken
+
+never_taken:
+  store volatile i32 %iv2, i32* %ptr
+  br label %always_taken
+
+always_taken:
+  %iv2.inc = add nuw nsw i32 %iv2, 1
+  %iv.inc = add nsw i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 20
+  br i1 %cmp, label %for.cond, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+define i32 @switch_to_different_iv_second_poison(i32* %ptr, i1 %always_false) {
+; CHECK-LABEL: @switch_to_different_iv_second_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[IV2:%.*]] = phi i32 [ -2, [[ENTRY:%.*]] ], [ [[IV2_INC:%.*]], [[ALWAYS_TAKEN:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ -2147483648, [[ENTRY]] ], [ [[IV_INC:%.*]], [[ALWAYS_TAKEN]] ]
+; CHECK-NEXT:    store i32 [[IV]], i32* [[PTR:%.*]]
+; CHECK-NEXT:    br i1 [[ALWAYS_FALSE:%.*]], label [[NEVER_TAKEN:%.*]], label [[ALWAYS_TAKEN]]
+; CHECK:       never_taken:
+; CHECK-NEXT:    store volatile i32 [[IV2]], i32* [[PTR]]
+; CHECK-NEXT:    br label [[ALWAYS_TAKEN]]
+; CHECK:       always_taken:
+; CHECK-NEXT:    [[IV2_INC]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-NEXT:    [[IV_INC]] = add nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV2_INC]], -2147483629
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %iv2 = phi i32 [ -2, %entry ], [ %iv2.inc, %always_taken ]
+  %iv = phi i32 [ -2147483648, %entry ], [ %iv.inc, %always_taken ]
+  store i32 %iv, i32* %ptr
+  br i1 %always_false, label %never_taken, label %always_taken
+
+never_taken:
+  store volatile i32 %iv2, i32* %ptr
+  br label %always_taken
+
+always_taken:
+  %iv2.inc = add nuw nsw i32 %iv2, 1
+  %iv.inc = add nsw i32 %iv, 1
+  %cmp = icmp slt i32 %iv, 20
+  br i1 %cmp, label %for.cond, label %for.end
+
+for.end:
+  ret i32 0
+}

From 46d4dba6e64eb47dfa04beb8d9c623a29b3375df Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 1 Jun 2019 09:40:18 +0000
Subject: [PATCH 0810/1176] [IndVarSimplify] Fixup nowrap flags during LFTR
 (PR31181)

Fix for https://bugs.llvm.org/show_bug.cgi?id=31181 and partial fix
for LFTR poison handling issues in general.

When LFTR moves a condition from pre-inc to post-inc, it may now
depend on value that is poison due to nowrap flags. To avoid this,
we clear any nowrap flag that SCEV cannot prove for the post-inc
addrec.

Additionally, LFTR may switch to a different IV that is dynamically
dead and as such may be arbitrarily poison. This patch will correct
nowrap flags in some but not all cases where this happens. This is
related to the adoption of IR nowrap flags for the pre-inc addrec.
(See some of the switch_to_different_iv tests, where flags are not
dropped or insufficiently dropped.)

Finally, there are likely similar issues with the handling of GEP
inbounds, but we don't have a test case for this yet.

Differential Revision: https://reviews.llvm.org/D60935

llvm-svn: 362292
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 21 +++++++++++++++++++
 .../Transforms/IndVarSimplify/lftr-pr31181.ll | 12 +++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0542eca0e78e0..0261b68e97ab0 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -2372,6 +2372,27 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   }
 
+  // It may be necessary to drop nowrap flags on the incrementing instruction
+  // if either LFTR moves from a pre-inc check to a post-inc check (in which
+  // case the increment might have previously been poison on the last iteration
+  // only) or if LFTR switches to a different IV that was previously dynamically
+  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
+  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
+  // check), because the pre-inc addrec flags may be adopted from the original
+  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
+  // TODO: This handling is inaccurate for one case: If we switch to a
+  // dynamically dead IV that wraps on the first loop iteration only, which is
+  // not covered by the post-inc addrec. (If the new IV was not dynamically
+  // dead, it could not be poison on the first iteration in the first place.)
+  Value *IncVar = IndVar->getIncomingValueForBlock(L->getLoopLatch());
+  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
+    if (BO->hasNoUnsignedWrap())
+      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
+    if (BO->hasNoSignedWrap())
+      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
+  }
+
   Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll b/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
index 8316d8dbce2fe..6032b1d13cef0 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-pr31181.ll
@@ -15,7 +15,7 @@ define i32 @test_drop_nuw() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ -2, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* @a
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[STOREMERGE]], 1
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[STOREMERGE]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 0
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -42,7 +42,7 @@ define i32 @test_drop_nsw() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* @a
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[STOREMERGE]], 1
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[STOREMERGE]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], -2147483648
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -155,7 +155,7 @@ define i32 @test_drop_nsw_var_lim(i32 %lim) {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
 ; CHECK-NEXT:    store i32 [[STOREMERGE]], i32* @a
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[STOREMERGE]], 1
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[STOREMERGE]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; CHECK:       exit.loopexit:
@@ -194,7 +194,7 @@ define i32 @switch_to_different_iv_post_inc(i32* %ptr, i1 %always_false) {
 ; CHECK-NEXT:    br label [[ALWAYS_TAKEN]]
 ; CHECK:       always_taken:
 ; CHECK-NEXT:    [[IV_INC]] = add nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[IV2_INC]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-NEXT:    [[IV2_INC]] = add nuw i32 [[IV2]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV2_INC]], -2147483627
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
@@ -242,7 +242,7 @@ define i32 @switch_to_different_iv_pre_inc(i32* %ptr, i1 %always_false) {
 ; CHECK-NEXT:    br label [[ALWAYS_TAKEN]]
 ; CHECK:       always_taken:
 ; CHECK-NEXT:    [[IV_INC]] = add nsw i32 [[IV]], 1
-; CHECK-NEXT:    [[IV2_INC]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-NEXT:    [[IV2_INC]] = add nuw i32 [[IV2]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret i32 0
@@ -329,7 +329,7 @@ define i32 @switch_to_different_iv_second_poison(i32* %ptr, i1 %always_false) {
 ; CHECK-NEXT:    store volatile i32 [[IV2]], i32* [[PTR]]
 ; CHECK-NEXT:    br label [[ALWAYS_TAKEN]]
 ; CHECK:       always_taken:
-; CHECK-NEXT:    [[IV2_INC]] = add nuw nsw i32 [[IV2]], 1
+; CHECK-NEXT:    [[IV2_INC]] = add nsw i32 [[IV2]], 1
 ; CHECK-NEXT:    [[IV_INC]] = add nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV2_INC]], -2147483629
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END:%.*]]

From 73f05841992c5a70631968029e526c6a571383bf Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sat, 1 Jun 2019 10:12:07 +0000
Subject: [PATCH 0811/1176] msabi: Fix exponential mangling time for certain
 pathological inputs

Template back references used to be recursively recomputed, add a
memoization cache to cut down on this.

Since there are now two different types of argument maps, rename the
existing TypeBackReferences to FunArgBackReferences, and rename
mangleArgumentType() to mangleFunctionArgumentType().

Fixes PR42091, the input there now takes 50ms instead of 7s to compile.

No intended behavior change.

Differential Revision: https://reviews.llvm.org/D62746

llvm-svn: 362293
---
 clang/lib/AST/MicrosoftMangle.cpp             | 93 ++++++++++++-------
 .../mangle-ms-back-references-pr13207.cpp     | 41 ++++++++
 2 files changed, 100 insertions(+), 34 deletions(-)

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index a021e7162e442..ddc6e12d1d18e 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -265,7 +265,8 @@ class MicrosoftCXXNameMangler {
   BackRefVec NameBackReferences;
 
   typedef llvm::DenseMap<const void *, unsigned> ArgBackRefMap;
-  ArgBackRefMap TypeBackReferences;
+  ArgBackRefMap FunArgBackReferences;
+  ArgBackRefMap TemplateArgBackReferences;
 
   typedef std::set<std::pair<int, bool>> PassObjectSizeArgsSet;
   PassObjectSizeArgsSet PassObjectSizeArgs;
@@ -343,7 +344,7 @@ class MicrosoftCXXNameMangler {
                                   const TemplateArgumentList &TemplateArgs);
   void mangleObjCMethodName(const ObjCMethodDecl *MD);
 
-  void mangleArgumentType(QualType T, SourceRange Range);
+  void mangleFunctionArgumentType(QualType T, SourceRange Range);
   void manglePassObjectSizeArg(const PassObjectSizeAttr *POSA);
 
   bool isArtificialTagType(QualType T) const;
@@ -793,7 +794,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     // the X<Y> part is aliased. However, if you need to mangle
     //   void foo(A::X<A::Y>, A::X<B::Y>),
     // the A::X<> part is not aliased.
-    // That said, from the mangler's perspective we have a structure like this:
+    // That is, from the mangler's perspective we have a structure like this:
     //   namespace[s] -> type[ -> template-parameters]
     // but from the Clang perspective we have
     //   type [ -> template-parameters]
@@ -803,12 +804,30 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     // the mangled type name as a key to check the mangling of different types
     // for aliasing.
 
-    llvm::SmallString<64> TemplateMangling;
-    llvm::raw_svector_ostream Stream(TemplateMangling);
-    MicrosoftCXXNameMangler Extra(Context, Stream);
-    Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);
-
-    mangleSourceName(TemplateMangling);
+    // It's important to key cache reads off ND, not TD -- the same TD can
+    // be used with different TemplateArgs, but ND uniquely identifies
+    // TD / TemplateArg pairs.
+    ArgBackRefMap::iterator Found = TemplateArgBackReferences.find(ND);
+    if (Found == TemplateArgBackReferences.end()) {
+      // Mangle full template name into temporary buffer.
+      llvm::SmallString<64> TemplateMangling;
+      llvm::raw_svector_ostream Stream(TemplateMangling);
+      MicrosoftCXXNameMangler Extra(Context, Stream);
+      Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);
+
+      // Use the string backref vector to possibly get a back reference.
+      mangleSourceName(TemplateMangling);
+
+      // Memoize back reference for this type.
+      BackRefVec::iterator StringFound =
+          llvm::find(NameBackReferences, TemplateMangling);
+      if (StringFound != NameBackReferences.end()) {
+        TemplateArgBackReferences[ND] =
+            StringFound - NameBackReferences.begin();
+      }
+    } else {
+      Out << Found->second;
+    }
     return;
   }
 
@@ -1282,11 +1301,13 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
   // Always start with the unqualified name.
 
   // Templates have their own context for back references.
-  ArgBackRefMap OuterArgsContext;
+  ArgBackRefMap OuterFunArgsContext;
+  ArgBackRefMap OuterTemplateArgsContext;
   BackRefVec OuterTemplateContext;
   PassObjectSizeArgsSet OuterPassObjectSizeArgs;
   NameBackReferences.swap(OuterTemplateContext);
-  TypeBackReferences.swap(OuterArgsContext);
+  FunArgBackReferences.swap(OuterFunArgsContext);
+  TemplateArgBackReferences.swap(OuterTemplateArgsContext);
   PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
 
   mangleUnscopedTemplateName(TD);
@@ -1294,7 +1315,8 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
 
   // Restore the previous back reference contexts.
   NameBackReferences.swap(OuterTemplateContext);
-  TypeBackReferences.swap(OuterArgsContext);
+  FunArgBackReferences.swap(OuterFunArgsContext);
+  TemplateArgBackReferences.swap(OuterTemplateArgsContext);
   PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
 }
 
@@ -1699,8 +1721,8 @@ void MicrosoftCXXNameMangler::manglePointerCVQualifiers(Qualifiers Quals) {
   }
 }
 
-void MicrosoftCXXNameMangler::mangleArgumentType(QualType T,
-                                                 SourceRange Range) {
+void MicrosoftCXXNameMangler::mangleFunctionArgumentType(QualType T,
+                                                         SourceRange Range) {
   // MSVC will backreference two canonically equivalent types that have slightly
   // different manglings when mangled alone.
 
@@ -1730,9 +1752,9 @@ void MicrosoftCXXNameMangler::mangleArgumentType(QualType T,
     TypePtr = T.getCanonicalType().getAsOpaquePtr();
   }
 
-  ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);
+  ArgBackRefMap::iterator Found = FunArgBackReferences.find(TypePtr);
 
-  if (Found == TypeBackReferences.end()) {
+  if (Found == FunArgBackReferences.end()) {
     size_t OutSizeBefore = Out.tell();
 
     mangleType(T, Range, QMM_Drop);
@@ -1741,9 +1763,9 @@ void MicrosoftCXXNameMangler::mangleArgumentType(QualType T,
     // Only types longer than 1 character are considered
     // and only 10 back references slots are available:
     bool LongerThanOneChar = (Out.tell() - OutSizeBefore > 1);
-    if (LongerThanOneChar && TypeBackReferences.size() < 10) {
-      size_t Size = TypeBackReferences.size();
-      TypeBackReferences[TypePtr] = Size;
+    if (LongerThanOneChar && FunArgBackReferences.size() < 10) {
+      size_t Size = FunArgBackReferences.size();
+      FunArgBackReferences[TypePtr] = Size;
     }
   } else {
     Out << Found->second;
@@ -1757,16 +1779,16 @@ void MicrosoftCXXNameMangler::manglePassObjectSizeArg(
 
   auto Iter = PassObjectSizeArgs.insert({Type, Dynamic}).first;
   auto *TypePtr = (const void *)&*Iter;
-  ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);
+  ArgBackRefMap::iterator Found = FunArgBackReferences.find(TypePtr);
 
-  if (Found == TypeBackReferences.end()) {
+  if (Found == FunArgBackReferences.end()) {
     std::string Name =
         Dynamic ? "__pass_dynamic_object_size" : "__pass_object_size";
     mangleArtificialTagType(TTK_Enum, Name + llvm::utostr(Type), {"__clang"});
 
-    if (TypeBackReferences.size() < 10) {
-      size_t Size = TypeBackReferences.size();
-      TypeBackReferences[TypePtr] = Size;
+    if (FunArgBackReferences.size() < 10) {
+      size_t Size = FunArgBackReferences.size();
+      FunArgBackReferences[TypePtr] = Size;
     }
   } else {
     Out << Found->second;
@@ -2192,12 +2214,12 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
         Out << 'X';
       } else if (StructorType == Ctor_CopyingClosure) {
         // Copy constructor closure always takes an unqualified reference.
-        mangleArgumentType(getASTContext().getLValueReferenceType(
-                               Proto->getParamType(0)
-                                   ->getAs<LValueReferenceType>()
-                                   ->getPointeeType(),
-                               /*SpelledAsLValue=*/true),
-                           Range);
+        mangleFunctionArgumentType(getASTContext().getLValueReferenceType(
+                                       Proto->getParamType(0)
+                                           ->getAs<LValueReferenceType>()
+                                           ->getPointeeType(),
+                                       /*SpelledAsLValue=*/true),
+                                   Range);
         Out << '@';
       } else {
         llvm_unreachable("unexpected constructor closure!");
@@ -2239,7 +2261,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
   } else {
     // Happens for function pointer type arguments for example.
     for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) {
-      mangleArgumentType(Proto->getParamType(I), Range);
+      mangleFunctionArgumentType(Proto->getParamType(I), Range);
       // Mangle each pass_object_size parameter as if it's a parameter of enum
       // type passed directly after the parameter with the pass_object_size
       // attribute. The aforementioned enum's name is __pass_object_size, and we
@@ -2731,10 +2753,12 @@ void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T,
   if (T->qual_empty() && !T->isSpecialized())
     return mangleType(T->getBaseType(), Range, QMM_Drop);
 
-  ArgBackRefMap OuterArgsContext;
+  ArgBackRefMap OuterFunArgsContext;
+  ArgBackRefMap OuterTemplateArgsContext;
   BackRefVec OuterTemplateContext;
 
-  TypeBackReferences.swap(OuterArgsContext);
+  FunArgBackReferences.swap(OuterFunArgsContext);
+  TemplateArgBackReferences.swap(OuterTemplateArgsContext);
   NameBackReferences.swap(OuterTemplateContext);
 
   mangleTagTypeKind(TTK_Struct);
@@ -2758,7 +2782,8 @@ void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T,
 
   Out << '@';
 
-  TypeBackReferences.swap(OuterArgsContext);
+  FunArgBackReferences.swap(OuterFunArgsContext);
+  TemplateArgBackReferences.swap(OuterTemplateArgsContext);
   NameBackReferences.swap(OuterTemplateContext);
 }
 
diff --git a/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp b/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
index c68b97e68c05e..653937917b8d9 100644
--- a/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
@@ -191,3 +191,44 @@ void fun_instantiate2() {
 // CHECK: "??$fun_tmpl_recurse@H$1??$fun_tmpl_recurse@H$1?ident@fn_space@@YA?AURetVal@2@H@Z@fn_space@@YA?AURetVal@1@H@Z@fn_space@@YA?AURetVal@0@H@Z"
 // CHECK: "??$fun_tmpl_recurse@H$1?ident@fn_space@@YA?AURetVal@2@H@Z@fn_space@@YA?AURetVal@0@H@Z"
 }
+
+
+template <class T1, class T2, class T3, class T4, class T5, class T6, class T7,
+          class T8, class T9, class T10>
+struct Fooob {};
+
+using A0 = Fooob<int, int, int, int, int, int, int, int, int, int>;
+using A1 = Fooob<A0, A0, A0, A0, A0, A0, A0, A0, A0, A0>;
+using A2 = Fooob<A1, A1, A1, A1, A1, A1, A1, A1, A1, A1>;
+using A3 = Fooob<A2, A2, A2, A2, A2, A2, A2, A2, A2, A2>;
+using A4 = Fooob<A3, A3, A3, A3, A3, A3, A3, A3, A3, A3>;
+using A5 = Fooob<A4, A4, A4, A4, A4, A4, A4, A4, A4, A4>;
+using A6 = Fooob<A5, A5, A5, A5, A5, A5, A5, A5, A5, A5>;
+using A7 = Fooob<A6, A6, A6, A6, A6, A6, A6, A6, A6, A6>;
+using A8 = Fooob<A7, A7, A7, A7, A7, A7, A7, A7, A7, A7>;
+using A9 = Fooob<A8, A8, A8, A8, A8, A8, A8, A8, A8, A8>;
+using A10 = Fooob<A9, A9, A9, A9, A9, A9, A9, A9, A9, A9>;
+
+// This should take milliseconds, not minutes.
+void f(A9 a) {}
+// CHECK: "?f@@YAXU?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@U?$Fooob@HHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@@Z"
+
+
+template <class T1, class T2, class T3, class T4, class T5, class T6, class T7,
+          class T8, class T9, class T10, class T11, class T12, class T13,
+          class T14, class T15, class T16, class T17, class T18, class T19,
+          class T20>
+struct Food {};
+
+using B0 = Food<int, int, int, int, int, int, int, int, int, int,  int, int, int, int, int, int, int, int, int, int>;
+using B1 = Food<B0, B0, B0, B0, B0, B0, B0, B0, B0, B0,  B0, B0, B0, B0, B0, B0, B0, B0, B0, B0>;
+using B2 = Food<B1, B0, B0, B0, B0, B0, B0, B0, B0, B0,  B1, B1, B1, B1, B1, B1, B1, B1, B1, B1>;
+using B3 = Food<B2, B1, B0, B0, B0, B0, B0, B0, B0, B0,  B2, B2, B2, B2, B2, B2, B2, B2, B2, B2>;
+using B4 = Food<B3, B2, B1, B0, B0, B0, B0, B0, B0, B0,  B3, B3, B3, B3, B3, B3, B3, B3, B3, B3>;
+using B5 = Food<B4, B3, B2, B1, B0, B0, B0, B0, B0, B0,  B4, B4, B4, B4, B4, B4, B4, B4, B4, B4>;
+using B6 = Food<B5, B4, B3, B2, B1, B0, B0, B0, B0, B0,  B5, B5, B5, B5, B5, B5, B5, B5, B5, B5>;
+
+// This too should take milliseconds, not minutes.
+void f(B6 a) {}
+
+// CHECK: "?f@@YAXU?$Food@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U4@U4@U4@U4@U4@U4@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U5@U5@U5@U5@U5@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U4@U4@U4@U4@U4@U4@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U6@U6@U6@U6@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@@Z"

From bcc0bd7e2a1893ddb7ab1ec612f3c71fd62d1801 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 1 Jun 2019 11:01:26 +0000
Subject: [PATCH 0812/1176] [ELF][test] Reorganize some AArch64 tests

Delete aarch64-got.s because it is covered by aarch64-tls-iele.s
Merge got-aarch64.s into aarch64-fpic-got.s by adding disassembly to the latter
Create aarch64-gnu-ifunc-nonpreemptable to unify aarch64-gnu-ifunc3.s (position-dependent executable) and aarch64-gnu-ifunc-address-pie.s (PIE)
Rename aarch64-got-reloc.s to aarch64-got-weak-undef.s
Add --no-show-raw-insn to llvm-objdump -d RUN lines
Add -pie test to arch64-tls-iele.s
Delete aarch64-tls-pie.s: it is covered by arch64-tls-iele.s and aarch64-tls-le.s
Rename aarch64-copy2.s to aarch64-nopic-plt.s: "copy2" gives false impression that the test is related to copy relocation

llvm-svn: 362294
---
 lld/test/ELF/aarch64-fpic-got.s               | 16 +++--
 lld/test/ELF/aarch64-gnu-ifunc-address-pie.s  | 49 -------------
 lld/test/ELF/aarch64-gnu-ifunc-address.s      |  8 +--
 .../ELF/aarch64-gnu-ifunc-nonpreemptable.s    | 72 +++++++++++++++++++
 lld/test/ELF/aarch64-gnu-ifunc3.s             | 57 ---------------
 lld/test/ELF/aarch64-got-reloc.s              | 30 --------
 lld/test/ELF/aarch64-got-weak-undef.s         | 18 +++++
 lld/test/ELF/aarch64-got.s                    | 18 -----
 .../{aarch64-copy2.s => aarch64-nopic-plt.s}  |  0
 lld/test/ELF/aarch64-tls-gdie.s               | 12 ++--
 lld/test/ELF/aarch64-tls-gdle.s               | 18 ++---
 lld/test/ELF/aarch64-tls-ie.s                 | 66 +++++++++--------
 lld/test/ELF/aarch64-tls-iele.s               | 16 +++--
 lld/test/ELF/aarch64-tls-pie.s                | 28 --------
 lld/test/ELF/aarch64-tlsld-ldst.s             | 24 +++----
 lld/test/ELF/got-aarch64.s                    | 40 -----------
 16 files changed, 175 insertions(+), 297 deletions(-)
 delete mode 100644 lld/test/ELF/aarch64-gnu-ifunc-address-pie.s
 create mode 100644 lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
 delete mode 100644 lld/test/ELF/aarch64-gnu-ifunc3.s
 delete mode 100644 lld/test/ELF/aarch64-got-reloc.s
 create mode 100644 lld/test/ELF/aarch64-got-weak-undef.s
 delete mode 100644 lld/test/ELF/aarch64-got.s
 rename lld/test/ELF/{aarch64-copy2.s => aarch64-nopic-plt.s} (100%)
 delete mode 100644 lld/test/ELF/aarch64-tls-pie.s
 delete mode 100644 lld/test/ELF/got-aarch64.s

diff --git a/lld/test/ELF/aarch64-fpic-got.s b/lld/test/ELF/aarch64-fpic-got.s
index fbf28b8cc36fa..b4b685acd9c60 100644
--- a/lld/test/ELF/aarch64-fpic-got.s
+++ b/lld/test/ELF/aarch64-fpic-got.s
@@ -3,14 +3,22 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux %p/Inputs/shared.s -o %t-lib.o
 # RUN: ld.lld -shared %t-lib.o -o %t-lib.so
+
 # RUN: ld.lld %t-lib.so %t.o -o %t.exe
-# RUN: llvm-readobj --dyn-relocations %t.exe | FileCheck %s
+# RUN: llvm-readobj -r %t.exe | FileCheck --check-prefix=RELOC %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.exe | FileCheck --check-prefix=DIS %s
 
 ## Checks if got access to dynamic objects is done through a got relative
 ## dynamic relocation and not using plt relative (R_AARCH64_JUMP_SLOT).
-# CHECK:       Dynamic Relocations {
-# CHECK-NEXT:    0x{{[0-9A-F]+}}  R_AARCH64_GLOB_DAT bar 0x0
-# CHECK-NEXT:  }
+# RELOC:      .rela.dyn {
+# RELOC-NEXT:   0x2200C0 R_AARCH64_GLOB_DAT bar 0x0
+# RELOC-NEXT: }
+
+## page(0x2200C0) - page(0x210000) = 65536
+## page(0x2200C0) & 0xff8 = 192
+# DIS:      _start:
+# DIS-NEXT: 210000: adrp x0, #65536
+# DIS-NEXT: 210004: ldr x0, [x0, #192]
 
 .globl _start
 _start:
diff --git a/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s b/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s
deleted file mode 100644
index 9e6a940115239..0000000000000
--- a/lld/test/ELF/aarch64-gnu-ifunc-address-pie.s
+++ /dev/null
@@ -1,49 +0,0 @@
-# REQUIRES: aarch64
-# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
-# RUN: ld.lld -pie %t.o -o %tout
-# RUN: llvm-objdump -D %tout | FileCheck %s
-# RUN: llvm-readobj -r %tout | FileCheck %s -check-prefix=CHECK-RELOCS
-
-# Test that when we take the address of a preemptible ifunc using -fpie, we can
-# handle the case when the ifunc is in the same translation unit as the address
-# taker. In this case the compiler knows that ifunc is not defined in a shared
-# library so it can use a non got generating relative reference.
-.text
-.globl myfunc
-.type myfunc,@gnu_indirect_function
-myfunc:
-.globl myfunc_resolver
-.type myfunc_resolver,@function
-myfunc_resolver:
- ret
-
-.text
-.globl main
-.type main,@function
-main:
- adrp x8, myfunc
- add  x8, x8, :lo12: myfunc
- ret
-
-# CHECK: 0000000000010000 myfunc_resolver:
-# CHECK-NEXT:    10000:	c0 03 5f d6 	ret
-# CHECK: 0000000000010004 main:
-# CHECK-NEXT:    10004:	08 00 00 90 	adrp	x8, #0
-# x8 = 0x10000
-# CHECK-NEXT:    10008:	08 41 00 91 	add	x8, x8, #16
-# x8 = 0x10010 = .plt for myfunc
-# CHECK-NEXT:    1000c:	c0 03 5f d6 	ret
-# CHECK-EMPTY:
-# CHECK-NEXT: Disassembly of section .plt:
-# CHECK-EMPTY:
-# CHECK-NEXT: 0000000000010010 myfunc:
-# CHECK-NEXT:    10010:	10 01 00 90 	adrp	x16, #131072
-# CHECK-NEXT:    10014:	11 02 40 f9 	ldr	x17, [x16]
-# CHECK-NEXT:    10018:	10 02 00 91 	add	x16, x16, #0
-# CHECK-NEXT:    1001c:	20 02 1f d6 	br	x17
-
-# CHECK-RELOCS: Relocations [
-# CHECK-RELOCS-NEXT:   Section {{.*}} .rela.plt {
-# CHECK-RELOCS-NEXT:     0x30000 R_AARCH64_IRELATIVE - 0x10000
-# CHECK-RELOCS-NEXT:   }
-# CHECK-RELOCS-NEXT: ]
diff --git a/lld/test/ELF/aarch64-gnu-ifunc-address.s b/lld/test/ELF/aarch64-gnu-ifunc-address.s
index de60b22082df5..ad70ecbc84ec9 100644
--- a/lld/test/ELF/aarch64-gnu-ifunc-address.s
+++ b/lld/test/ELF/aarch64-gnu-ifunc-address.s
@@ -1,7 +1,7 @@
 # REQUIRES: aarch64
 # RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
 # RUN: ld.lld -shared %t.o -o %tout
-# RUN: llvm-objdump -D %tout | FileCheck %s
+# RUN: llvm-objdump -D --no-show-raw-insn %tout | FileCheck %s
 # RUN: llvm-readobj -r %tout | FileCheck %s --check-prefix=CHECK-RELOCS
 
 # Test that when we take the address of a preemptible ifunc in a shared object
@@ -22,10 +22,10 @@ main:
  ret
 # CHECK:   0000000000010004 main:
 # x8 = 0x20000
-# CHECK-NEXT:    10004: 88 00 00 90     adrp    x8, #65536
+# CHECK-NEXT:    10004: adrp    x8, #65536
 # x8 = 0x200a0 = .got entry for myfunc with R_AARCH64_GLOB_DAT
-# CHECK-NEXT:    10008: 08 51 40 f9     ldr     x8, [x8, #160]
-# CHECK-NEXT:    1000c: c0 03 5f d6     ret
+# CHECK-NEXT:    10008: ldr     x8, [x8, #160]
+# CHECK-NEXT:    1000c: ret
 
 # CHECK: Disassembly of section .got:
 # CHECK-EMPTY:
diff --git a/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
new file mode 100644
index 0000000000000..571d7c29fac68
--- /dev/null
+++ b/lld/test/ELF/aarch64-gnu-ifunc-nonpreemptable.s
@@ -0,0 +1,72 @@
+# REQUIRES: aarch64
+# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
+
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PDE
+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PDE-RELOC
+
+# RUN: ld.lld -pie %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s --check-prefix=PIE
+# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=PIE-RELOC
+
+## When compiling with -fno-PIE or -fPIE, if the ifunc is in the same
+## translation unit as the address taker, the compiler knows that ifunc is not
+## defined in a shared library so it can use a non GOT generating relative reference.
+.text
+.globl myfunc
+.type myfunc,@gnu_indirect_function
+myfunc:
+.globl myfunc_resolver
+.type myfunc_resolver,@function
+myfunc_resolver:
+ ret
+
+.text
+.globl main
+.type main,@function
+main:
+ adrp x8, myfunc
+ add  x8, x8, :lo12: myfunc
+ ret
+
+## The address of myfunc is the address of the PLT entry for myfunc.
+# PDE:      myfunc_resolver:
+# PDE-NEXT:   210000:   ret
+# PDE:      main:
+# PDE-NEXT:   210004:   adrp    x8, #0
+# PDE-NEXT:   210008:   add     x8, x8, #16
+# PDE-NEXT:   21000c:   ret
+# PDE-EMPTY:
+# PDE-NEXT: Disassembly of section .plt:
+# PDE-EMPTY:
+# PDE-NEXT: myfunc:
+## page(.got.plt) - page(0x210010) = 65536
+# PDE-NEXT:   210010: adrp    x16, #65536
+# PDE-NEXT:   210014: ldr     x17, [x16]
+# PDE-NEXT:   210018: add     x16, x16, #0
+# PDE-NEXT:   21001c: br      x17
+
+## The adrp to myfunc should generate a PLT entry and a GOT entry with an
+## irelative relocation.
+# PDE-RELOC:      .rela.plt {
+# PDE-RELOC-NEXT:   0x220000 R_AARCH64_IRELATIVE - 0x210000
+# PDE-RELOC-NEXT: }
+
+# PIE:      myfunc_resolver:
+# PIE-NEXT:    10000: ret
+# PIE:      main:
+# PIE-NEXT:    10004: adrp    x8, #0
+# PIE-NEXT:    10008: add     x8, x8, #16
+# PIE-NEXT:    1000c: ret
+# PIE-EMPTY:
+# PIE-NEXT: Disassembly of section .plt:
+# PIE-EMPTY:
+# PIE-NEXT: myfunc:
+# PIE-NEXT:    10010: adrp    x16, #131072
+# PIE-NEXT:    10014: ldr     x17, [x16]
+# PIE-NEXT:    10018: add     x16, x16, #0
+# PIE-NEXT:    1001c: br      x17
+
+# PIE-RELOC:      .rela.plt {
+# PIE-RELOC-NEXT:   0x30000 R_AARCH64_IRELATIVE - 0x10000
+# PIE-RELOC-NEXT: }
diff --git a/lld/test/ELF/aarch64-gnu-ifunc3.s b/lld/test/ELF/aarch64-gnu-ifunc3.s
deleted file mode 100644
index 1c032924d419d..0000000000000
--- a/lld/test/ELF/aarch64-gnu-ifunc3.s
+++ /dev/null
@@ -1,57 +0,0 @@
-# REQUIRES: aarch64
-# RUN: llvm-mc -filetype=obj -triple=aarch64-none-linux-gnu %s -o %t.o
-# RUN: ld.lld -static %t.o -o %tout
-# RUN: llvm-objdump -D %tout | FileCheck %s
-# RUN: llvm-readobj -r %tout | FileCheck %s --check-prefix=RELOC
-
-# The address of myfunc is the address of the PLT entry for myfunc.
-# The adrp to myfunc should generate a PLT entry and a got entry with an
-# irelative relocation.
-.text
-.globl myfunc
-.type myfunc,@gnu_indirect_function
-myfunc:
-.globl myfunc_resolver
-.type myfunc_resolver,@function
-myfunc_resolver:
- ret
-
-.text
-.globl _start
-.type _start,@function
-_start:
- adrp x8, myfunc
- add  x8, x8, :lo12:myfunc
- ret
-
-# CHECK: Disassembly of section .text:
-# CHECK-EMPTY:
-# CHECK-NEXT: myfunc_resolver:
-# CHECK-NEXT:   210000:	c0 03 5f d6 	ret
-# CHECK: _start:
-# adrp x8, 0x210000 + 0x10 from add == .plt entry
-# CHECK-NEXT:   210004:	08 00 00 90 	adrp	x8, #0
-# CHECK-NEXT:   210008:	08 41 00 91 	add	x8, x8, #16
-# CHECK-NEXT:   21000c:	c0 03 5f d6 	ret
-# CHECK-EMPTY:
-# CHECK-NEXT: Disassembly of section .plt:
-# CHECK-EMPTY:
-# CHECK-NEXT: myfunc:
-# adrp x16, 0x220000, 0x220000 == address in .got.plt
-# CHECK-NEXT:   210010:	90 00 00 90 	adrp	x16, #65536
-# CHECK-NEXT:   210014:	11 02 40 f9 	ldr	x17, [x16]
-# CHECK-NEXT:   210018:	10 02 00 91 	add	x16, x16, #0
-# CHECK-NEXT:   21001c:	20 02 1f d6 	br	x17
-# CHECK-EMPTY:
-# CHECK-NEXT: Disassembly of section .got.plt:
-# CHECK-EMPTY:
-# CHECK-NEXT: .got.plt:
-# 0x210010 == address in .plt
-# CHECK-NEXT:   220000:	10 00 21 00
-# CHECK-NEXT:   220004:	00 00 00 00
-
-# RELOC: Relocations [
-# RELOC-NEXT:  Section (1) .rela.plt {
-# RELOC-NEXT:    0x220000 R_AARCH64_IRELATIVE - 0x210000
-# RELOC-NEXT:  }
-# RELOC-NEXT: ]
diff --git a/lld/test/ELF/aarch64-got-reloc.s b/lld/test/ELF/aarch64-got-reloc.s
deleted file mode 100644
index f205e0799335c..0000000000000
--- a/lld/test/ELF/aarch64-got-reloc.s
+++ /dev/null
@@ -1,30 +0,0 @@
-// REQUIRES: aarch64
-// RUN: llvm-mc -filetype=obj -triple=aarch64-none-freebsd %s -o %t.o
-// RUN: ld.lld %t.o -o %t
-// RUN: llvm-readobj -S --section-data  %t | FileCheck %s
-
-// CHECK:      Name: .got
-// CHECK-NEXT: Type: SHT_PROGBITS
-// CHECK-NEXT:  Flags [
-// CHECK-NEXT:    SHF_ALLOC
-// CHECK-NEXT:    SHF_WRITE
-// CHECK-NEXT:  ]
-// CHECK-NEXT:  Address:
-// CHECK-NEXT:  Offset:
-// CHECK-NEXT:  Size: 8
-// CHECK-NEXT:  Link: 0
-// CHECK-NEXT:  Info: 0
-// CHECK-NEXT:  AddressAlignment: 8
-// CHECK-NEXT:  EntrySize: 0
-// CHECK-NEXT:  SectionData (
-// CHECK-NEXT:    0000: 00000000 00000000                    |........|
-// CHECK-NEXT:  )
-
-        .globl  _start
-_start:
-        adrp    x8, :got:foo
-        ldr     x8, [x8, :got_lo12:foo]
-        ldr     w0, [x8]
-        ret
-
-        .weak   foo
diff --git a/lld/test/ELF/aarch64-got-weak-undef.s b/lld/test/ELF/aarch64-got-weak-undef.s
new file mode 100644
index 0000000000000..fcdce49fb4c86
--- /dev/null
+++ b/lld/test/ELF/aarch64-got-weak-undef.s
@@ -0,0 +1,18 @@
+// REQUIRES: aarch64
+// RUN: llvm-mc -filetype=obj -triple=aarch64-none-freebsd %s -o %t.o
+// RUN: ld.lld %t.o -o %t
+// RUN: llvm-readelf -r %t | FileCheck --check-prefix=RELOC %s
+// RUN: llvm-readelf -x .got %t | FileCheck %s
+
+// RELOC: no relocations
+
+// CHECK: 0x00220000 00000000 00000000
+
+        .globl  _start
+_start:
+        adrp    x8, :got:foo
+        ldr     x8, [x8, :got_lo12:foo]
+        ldr     w0, [x8]
+        ret
+
+        .weak   foo
diff --git a/lld/test/ELF/aarch64-got.s b/lld/test/ELF/aarch64-got.s
deleted file mode 100644
index 54ba7d735afce..0000000000000
--- a/lld/test/ELF/aarch64-got.s
+++ /dev/null
@@ -1,18 +0,0 @@
-# REQUIRES: aarch64
-# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %t.o
-# RUN: ld.lld %t.o -o %t
-# RUN: llvm-readobj -S %t | FileCheck %s
-
-# CHECK-NOT: Name: .got
-
-.globl _start
-_start:
- adrp    x0, :gottprel:foo
-
-	.global foo
- .section .tdata,"awT",%progbits
- .align 2
- .type foo, %object
- .size foo, 4
-foo:
- .word 5
diff --git a/lld/test/ELF/aarch64-copy2.s b/lld/test/ELF/aarch64-nopic-plt.s
similarity index 100%
rename from lld/test/ELF/aarch64-copy2.s
rename to lld/test/ELF/aarch64-nopic-plt.s
diff --git a/lld/test/ELF/aarch64-tls-gdie.s b/lld/test/ELF/aarch64-tls-gdie.s
index 6563dd5a0dfa0..588a46acfce4b 100644
--- a/lld/test/ELF/aarch64-tls-gdie.s
+++ b/lld/test/ELF/aarch64-tls-gdie.s
@@ -4,7 +4,7 @@
 // RUN: ld.lld %t2.o -o %t2.so -shared
 // RUN: ld.lld --hash-style=sysv %t.o %t2.so -o %t
 // RUN: llvm-readobj -S %t | FileCheck --check-prefix=SEC %s
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 
         .globl  _start
 _start:
@@ -27,8 +27,8 @@ _start:
 // 0x0B0 = 176
 
 // CHECK:      _start:
-// CHECK-NEXT: 210000: {{.*}} nop
-// CHECK-NEXT: 210004: {{.*}} adrp       x0, #65536
-// CHECK-NEXT: 210008: {{.*}} ldr        x0, [x0, #176]
-// CHECK-NEXT: 21000c: {{.*}} nop
-// CHECK-NEXT: 210010: {{.*}} nop
+// CHECK-NEXT: 210000: nop
+// CHECK-NEXT: 210004: adrp    x0, #65536
+// CHECK-NEXT: 210008: ldr     x0, [x0, #176]
+// CHECK-NEXT: 21000c: nop
+// CHECK-NEXT: 210010: nop
diff --git a/lld/test/ELF/aarch64-tls-gdle.s b/lld/test/ELF/aarch64-tls-gdle.s
index 30ceb83fbc92f..533955fb375e0 100644
--- a/lld/test/ELF/aarch64-tls-gdle.s
+++ b/lld/test/ELF/aarch64-tls-gdle.s
@@ -2,21 +2,21 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %p/Inputs/aarch64-tls-ie.s -o %ttlsie.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %s -o %tmain.o
 # RUN: ld.lld %tmain.o %ttlsie.o -o %tout
-# RUN: llvm-objdump -d %tout | FileCheck %s
-# RUN: llvm-readobj -S -r %tout | FileCheck -check-prefix=RELOC %s
+# RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s
+# RUN: llvm-readobj -r %tout | FileCheck -check-prefix=RELOC %s
 
-#Local-Dynamic to Local-Exec relax creates no
-#RELOC:      Relocations [
-#RELOC-NEXT: ]
+## Local-Dynamic to Local-Exec relax creates no dynamic relocations.
+# RELOC:      Relocations [
+# RELOC-NEXT: ]
 
 # TCB size = 0x16 and foo is first element from TLS register.
 # CHECK: Disassembly of section .text:
 # CHECK-EMPTY:
 # CHECK: _start:
-# CHECK:  210000:	00 00 a0 d2	movz	x0, #0, lsl #16
-# CHECK:  210004:	00 02 80 f2 	movk	x0, #16
-# CHECK:  210008:	1f 20 03 d5 	nop
-# CHECK:  21000c:	1f 20 03 d5 	nop
+# CHECK:  210000: movz    x0, #0, lsl #16
+# CHECK:  210004: movk    x0, #16
+# CHECK:  210008: nop
+# CHECK:  21000c: nop
 
 .globl _start
 _start:
diff --git a/lld/test/ELF/aarch64-tls-ie.s b/lld/test/ELF/aarch64-tls-ie.s
index deb33b35dafcc..3d35876c90577 100644
--- a/lld/test/ELF/aarch64-tls-ie.s
+++ b/lld/test/ELF/aarch64-tls-ie.s
@@ -1,45 +1,43 @@
-// REQUIRES: aarch64
 # REQUIRES: aarch64
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %p/Inputs/aarch64-tls-ie.s -o %tdso.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %tmain.o
 # RUN: ld.lld -shared %tdso.o -o %tdso.so
 # RUN: ld.lld --hash-style=sysv %tmain.o %tdso.so -o %tout
-# RUN: llvm-objdump -d %tout | FileCheck %s
+# RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s
 # RUN: llvm-readobj -S -r %tout | FileCheck -check-prefix=RELOC %s
 
-#RELOC:      Section {
-#RELOC:        Index:
-#RELOC:        Name: .got
-#RELOC-NEXT:   Type: SHT_PROGBITS
-#RELOC-NEXT:   Flags [
-#RELOC-NEXT:     SHF_ALLOC
-#RELOC-NEXT:     SHF_WRITE
-#RELOC-NEXT:   ]
-#RELOC-NEXT:   Address: 0x2200B0
-#RELOC-NEXT:   Offset: 0x200B0
-#RELOC-NEXT:   Size: 16
-#RELOC-NEXT:   Link: 0
-#RELOC-NEXT:   Info: 0
-#RELOC-NEXT:   AddressAlignment: 8
-#RELOC-NEXT:   EntrySize: 0
-#RELOC-NEXT: }
-#RELOC:      Relocations [
-#RELOC-NEXT:  Section ({{.*}}) .rela.dyn {
-#RELOC-NEXT:    0x2200B8 R_AARCH64_TLS_TPREL64 bar 0x0
-#RELOC-NEXT:    0x2200B0 R_AARCH64_TLS_TPREL64 foo 0x0
-#RELOC-NEXT:  }
-#RELOC-NEXT:]
+# RELOC:      Section {
+# RELOC:        Index:
+# RELOC:        Name: .got
+# RELOC-NEXT:   Type: SHT_PROGBITS
+# RELOC-NEXT:   Flags [
+# RELOC-NEXT:     SHF_ALLOC
+# RELOC-NEXT:     SHF_WRITE
+# RELOC-NEXT:   ]
+# RELOC-NEXT:   Address: 0x2200B0
+# RELOC-NEXT:   Offset: 0x200B0
+# RELOC-NEXT:   Size: 16
+# RELOC-NEXT:   Link: 0
+# RELOC-NEXT:   Info: 0
+# RELOC-NEXT:   AddressAlignment: 8
+# RELOC-NEXT:   EntrySize: 0
+# RELOC-NEXT: }
+# RELOC:      Relocations [
+# RELOC-NEXT:  Section ({{.*}}) .rela.dyn {
+# RELOC-NEXT:    0x2200B8 R_AARCH64_TLS_TPREL64 bar 0x0
+# RELOC-NEXT:    0x2200B0 R_AARCH64_TLS_TPREL64 foo 0x0
+# RELOC-NEXT:  }
+# RELOC-NEXT:]
 
-# Page(0x2200B0) - Page(0x210000) = 0x10000 = 65536
-# 0x2200B0 & 0xff8 = 0xB0 = 176
-# Page(0x2200B8) - Page(0x210000) = 0x10000 = 65536
-# 0x2200B8 & 0xff8 = 0xB8 = 184
-#CHECK: Disassembly of section .text:
-#CHECK: _start:
-#CHECK:  210000: 80 00 00 90 adrp x0, #65536
-#CHECK:  210004: 00 58 40 f9 ldr  x0, [x0, #176]
-#CHECK:  210008: 80 00 00 90 adrp x0, #65536
-#CHECK:  21000c: 00 5c 40 f9 ldr  x0, [x0, #184]
+## Page(0x2200B0) - Page(0x210000) = 0x10000 = 65536
+## 0x2200B0 & 0xff8 = 0xB0 = 176
+## Page(0x2200B8) - Page(0x210000) = 0x10000 = 65536
+## 0x2200B8 & 0xff8 = 0xB8 = 184
+# CHECK:     _start:
+# CHECK-NEXT: 210000: adrp x0, #65536
+# CHECK-NEXT: 210004: ldr  x0, [x0, #176]
+# CHECK-NEXT: 210008: adrp x0, #65536
+# CHECK-NEXT: 21000c: ldr  x0, [x0, #184]
 
 .globl _start
 _start:
diff --git a/lld/test/ELF/aarch64-tls-iele.s b/lld/test/ELF/aarch64-tls-iele.s
index 3fac240bc8282..928ea450e9e73 100644
--- a/lld/test/ELF/aarch64-tls-iele.s
+++ b/lld/test/ELF/aarch64-tls-iele.s
@@ -2,8 +2,12 @@
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %p/Inputs/aarch64-tls-ie.s -o %ttlsie.o
 # RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %s -o %tmain.o
 # RUN: ld.lld %tmain.o %ttlsie.o -o %tout
-# RUN: llvm-objdump -d %tout | FileCheck %s
-# RUN: llvm-readobj -S -r %tout | FileCheck -check-prefix=RELOC %s
+# RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s
+# RUN: llvm-readobj -r %tout | FileCheck -check-prefix=RELOC %s
+
+# RUN: ld.lld -pie %tmain.o %ttlsie.o -o %tout
+# RUN: llvm-objdump -d --no-show-raw-insn %tout | FileCheck %s
+# RUN: llvm-readobj -r %tout | FileCheck -check-prefix=RELOC %s
 
 # Initial-Exec to Local-Exec relax creates no dynamic relocations.
 # RELOC:      Relocations [
@@ -13,10 +17,10 @@
 # CHECK: Disassembly of section .text:
 # CHECK-EMPTY:
 # CHECK: _start:
-# CHECK-NEXT: 210000:  00 00 a0 d2   movz   x0, #0, lsl #16
-# CHECK-NEXT: 210004:  80 02 80 f2   movk   x0, #20
-# CHECK-NEXT: 210008:  00 00 a0 d2   movz   x0, #0, lsl #16
-# CHECK-NEXT: 21000c:  00 02 80 f2   movk   x0, #16
+# CHECK-NEXT: movz   x0, #0, lsl #16
+# CHECK-NEXT: movk   x0, #20
+# CHECK-NEXT: movz   x0, #0, lsl #16
+# CHECK-NEXT: movk   x0, #16
 
 .section .tdata
 .align 2
diff --git a/lld/test/ELF/aarch64-tls-pie.s b/lld/test/ELF/aarch64-tls-pie.s
deleted file mode 100644
index 466045d6765da..0000000000000
--- a/lld/test/ELF/aarch64-tls-pie.s
+++ /dev/null
@@ -1,28 +0,0 @@
-# REQUIRES: aarch64
-# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-cloudabi %s -o %t1.o
-# RUN: ld.lld -pie %t1.o -o %t
-# RUN: llvm-readobj -r %t | FileCheck %s
-
-# Similar to bug 27174: R_AARCH64_TLSLE_*TPREL* relocations should be
-# eliminated when building a PIE executable, as the static TLS layout is
-# fixed.
-#
-# CHECK:      Relocations [
-# CHECK-NEXT: ]
-
-	.globl	_start
-_start:
-	# Accessing the variable directly.
-	add	x11, x8, :tprel_hi12:i
-	add	x11, x11, :tprel_lo12_nc:i
-
-	# Accessing the variable through the GOT.
-	adrp	x10, :gottprel:i
-	mrs	x8, TPIDR_EL0
-	ldr	x10, [x10, :gottprel_lo12:i]
-
-	.section	.tbss.i,"awT",@nobits
-	.globl	i
-i:
-	.word	0
-	.size	i, 4
diff --git a/lld/test/ELF/aarch64-tlsld-ldst.s b/lld/test/ELF/aarch64-tlsld-ldst.s
index 3144ca5d99afd..3f00630bd95c7 100644
--- a/lld/test/ELF/aarch64-tlsld-ldst.s
+++ b/lld/test/ELF/aarch64-tlsld-ldst.s
@@ -1,7 +1,7 @@
 // REQUIRES: aarch64
 // RUN: llvm-mc -triple=aarch64-linux-gnu -filetype=obj %s -o %t.o
 // RUN: ld.lld %t.o -o %t
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 // RUN: llvm-readelf --symbols %t | FileCheck -check-prefix CHECK-SYMS %s
 
         .text
@@ -25,22 +25,22 @@ _start:  mrs x8, TPIDR_EL0
         ldrb w0, [x8, :tprel_lo12_nc:var4]
 
 // CHECK: _start:
-// CHECK-NEXT:    210000:       48 d0 3b d5     mrs     x8, TPIDR_EL0
+// CHECK-NEXT:    210000:       mrs     x8, TPIDR_EL0
 // 0x0 + c10 = 0xc10       = tcb (16-bytes) + var0
-// CHECK-NEXT:    210004:       08 01 40 91     add     x8, x8, #0, lsl #12
-// CHECK-NEXT:    210008:       14 05 c3 3d     ldr     q20, [x8, #3088]
+// CHECK-NEXT:    210004:       add     x8, x8, #0, lsl #12
+// CHECK-NEXT:    210008:       ldr     q20, [x8, #3088]
 // 0x1000 + 0x820 = 0x1820 = tcb + var1
-// CHECK-NEXT:    21000c:       08 05 40 91     add     x8, x8, #1, lsl #12
-// CHECK-NEXT:    210010:       00 11 44 f9     ldr     x0, [x8, #2080]
+// CHECK-NEXT:    21000c:       add     x8, x8, #1, lsl #12
+// CHECK-NEXT:    210010:       ldr     x0, [x8, #2080]
 // 0x2000 + 0x428 = 0x2428 = tcb + var2
-// CHECK-NEXT:    210014:       08 09 40 91     add     x8, x8, #2, lsl #12
-// CHECK-NEXT:    210018:       00 29 44 b9     ldr     w0, [x8, #1064]
+// CHECK-NEXT:    210014:       add     x8, x8, #2, lsl #12
+// CHECK-NEXT:    210018:       ldr     w0, [x8, #1064]
 // 0x3000 + 0x2c  = 0x302c = tcb + var3
-// CHECK-NEXT:    21001c:       08 0d 40 91     add     x8, x8, #3, lsl #12
-// CHECK-NEXT:    210020:       00 59 40 79     ldrh    w0, [x8, #44]
+// CHECK-NEXT:    21001c:       add     x8, x8, #3, lsl #12
+// CHECK-NEXT:    210020:       ldrh    w0, [x8, #44]
 // 0x3000 + 0xc2e = 0x32ce = tcb + var4
-// CHECK-NEXT:    210024:       08 0d 40 91     add     x8, x8, #3, lsl #12
-// CHECK-NEXT:    210028:       00 b9 70 39     ldrb    w0, [x8, #3118]
+// CHECK-NEXT:    210024:       add     x8, x8, #3, lsl #12
+// CHECK-NEXT:    210028:       ldrb    w0, [x8, #3118]
 
 // CHECK-SYMS:      0000000000000c00     0 TLS     GLOBAL DEFAULT    2 var0
 // CHECK-SYMS-NEXT: 0000000000001810     4 TLS     GLOBAL DEFAULT    2 var1
diff --git a/lld/test/ELF/got-aarch64.s b/lld/test/ELF/got-aarch64.s
deleted file mode 100644
index e56870433af3d..0000000000000
--- a/lld/test/ELF/got-aarch64.s
+++ /dev/null
@@ -1,40 +0,0 @@
-// REQUIRES: aarch64
-// RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-linux %s -o %t.o
-// RUN: ld.lld --hash-style=sysv -shared %t.o -o %t.so
-// RUN: llvm-readobj -S -r %t.so | FileCheck %s
-// RUN: llvm-objdump -d %t.so | FileCheck --check-prefix=DISASM %s
-
-// CHECK:      Name: .got
-// CHECK-NEXT: Type: SHT_PROGBITS
-// CHECK-NEXT: Flags [
-// CHECK-NEXT:   SHF_ALLOC
-// CHECK-NEXT:   SHF_WRITE
-// CHECK-NEXT: ]
-// CHECK-NEXT: Address: 0x20090
-// CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 8
-// CHECK-NEXT: Link: 0
-// CHECK-NEXT: Info: 0
-// CHECK-NEXT: AddressAlignment: 8
-
-// CHECK:      Relocations [
-// CHECK-NEXT:   Section ({{.*}}) .rela.dyn {
-// CHECK-NEXT:     0x20090 R_AARCH64_GLOB_DAT dat 0x0
-// CHECK-NEXT:   }
-// CHECK-NEXT: ]
-
-// Page(0x20098) - Page(0x10000) = 0x10000 = 65536
-// 0x20098 & 0xff8 = 0x98 = 152
-
-// DISASM: main:
-// DISASM-NEXT:   10000:  80 00 00 90   adrp  x0, #65536
-// DISASM-NEXT:   10004: 00 48 40 f9   ldr x0, [x0, #144]
-
-.global main,foo,dat
-.text
-main:
-    adrp x0, :got:dat
-    ldr x0, [x0, :got_lo12:dat]
-.data
-dat:
-    .word 42

From 1aaa23c0fc5f8a439ee9e951157de1a9869268ab Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 1 Jun 2019 11:08:29 +0000
Subject: [PATCH 0813/1176] [NFC][Codegen] shift-amount-mod.ll: drop innermost
 operation

I have initially added it in for test to display both
whether the binop w/ constant is sinked or hoisted.
But as it can be seen from the 'sub (sub C, %x), %y'
test, that actually conceals the issues it is supposed to test.

At least two more patterns are unhandled:
* 'add (sub C, %x), %y' - D62266
* 'sub (sub C, %x), %y'

llvm-svn: 362295
---
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 266 ++++++--------
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 330 +++++++-----------
 2 files changed, 232 insertions(+), 364 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 0e1a426c77f29..2dc1d244481f9 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -6,170 +6,148 @@
 ; add (add %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_add_of_const_to_add0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    add w8, w8, w2
 ; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = add i32 %t1, %c
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_add_of_const_to_add1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    add w8, w8, w2
 ; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = add i32 %c, %t1
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; add (sub %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_of_const_to_add0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    add w8, w8, w2
 ; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = add i32 %t1, %c
+  %t0 = sub i32 %a, 32
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_of_const_to_add1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    add w8, w8, w2
 ; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = add i32 %c, %t1
+  %t0 = sub i32 %a, 32
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; add (sub C, %x), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_add0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub w8, w9, w8
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    add w0, w8, w1
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = add i32 %t1, %c
+  %t0 = sub i32 32, %a
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_add1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add w8, w0, w1
-; CHECK-NEXT:    mov w9, #32
-; CHECK-NEXT:    sub w8, w9, w8
-; CHECK-NEXT:    add w0, w2, w8
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    add w0, w1, w8
 ; CHECK-NEXT:    ret
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = add i32 %c, %t1
+  %t0 = sub i32 32, %a
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (add %x, C), %y
 ; sub %y, (add %x, C)
 
-define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = sub i32 %t1, %c
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w2, w8
 ; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = sub i32 %c, %t1
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (sub %x, C), %y
 ; sub %y, (sub %x, C)
 
-define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = sub i32 %t1, %c
+  %t0 = sub i32 %a, 32
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    add w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    add w0, w1, w8
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = sub i32 %c, %t1
+  %t0 = sub i32 %a, 32
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (sub C, %x), %y
 ; sub %y, (sub C, %x)
 
-define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w1, w0
-; CHECK-NEXT:    sub w8, w8, w2
-; CHECK-NEXT:    add w0, w8, #32 // =32
+; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    sub w8, w8, w0
+; CHECK-NEXT:    sub w0, w8, w1
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = sub i32 %t1, %c
+  %t0 = sub i32 32, %a
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub w8, w0, w1
-; CHECK-NEXT:    add w8, w8, w2
+; CHECK-NEXT:    add w8, w0, w1
 ; CHECK-NEXT:    sub w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = sub i32 %c, %t1
+  %t0 = sub i32 32, %a
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
@@ -180,191 +158,167 @@ define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; add (add %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_add_of_const_to_add0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI12_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI12_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = add <4 x i32> %t1, %c
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_add_of_const_to_add1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI13_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = add <4 x i32> %c, %t1
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; add (sub %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_of_const_to_add0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = add <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_of_const_to_add1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = add <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; add (sub C, %x), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_from_const_to_add0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v3.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = add <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_from_const_to_add1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v3.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = add <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (add %x, C), %y
 ; sub %y, (add %x, C)
 
-define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_add_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = sub <4 x i32> %t1, %c
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_add_of_const_to_sub2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI19_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI19_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI19_0]
 ; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = sub <4 x i32> %c, %t1
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (sub %x, C), %y
 ; sub %y, (sub %x, C)
 
-define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_of_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = sub <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_of_const_to_sub2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI21_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI21_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI21_0]
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = sub <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (sub C, %x), %y
 ; sub %y, (sub C, %x)
 
-define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI22_0]
-; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = sub <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: vec_sink_sub_from_const_to_sub2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = sub <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 4544707d07a2f..7c0a22d4eb629 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -7,156 +7,135 @@
 ; add (add %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_add0(i32 %a, i32 %b) {
 ; X32-LABEL: sink_add_of_const_to_add0:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    leal 32(%rdx,%rdi), %eax
+; X64-NEXT:    leal 32(%rdi,%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = add i32 %t1, %c
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_add1(i32 %a, i32 %b) {
 ; X32-LABEL: sink_add_of_const_to_add1:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    leal 32(%rdx,%rdi), %eax
+; X64-NEXT:    leal 32(%rdi,%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = add i32 %c, %t1
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; add (sub %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_add0(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    leal -32(%rdx,%rdi), %eax
+; X64-NEXT:    leal -32(%rdi,%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = add i32 %t1, %c
+  %t0 = sub i32 %a, 32
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    addl %esi, %edi
-; X64-NEXT:    leal -32(%rdx,%rdi), %eax
+; X64-NEXT:    leal -32(%rdi,%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = add i32 %c, %t1
+  %t0 = sub i32 %a, 32
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; add (sub C, %x), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_add0:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    addl %esi, %edi
 ; X64-NEXT:    movl $32, %eax
 ; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    addl %edx, %eax
+; X64-NEXT:    addl %esi, %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = add i32 %t1, %c
+  %t0 = sub i32 32, %a
+  %r = add i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_add1:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    addl %esi, %edi
 ; X64-NEXT:    movl $32, %eax
 ; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    addl %edx, %eax
+; X64-NEXT:    addl %esi, %eax
 ; X64-NEXT:    retq
-  %t0 = add i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = add i32 %c, %t1
+  %t0 = sub i32 32, %a
+  %r = add i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (add %x, C), %y
 ; sub %y, (add %x, C)
 
-define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b) {
 ; X32-LABEL: sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
@@ -164,46 +143,39 @@ define i32 @sink_add_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal 32(%rdi), %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = sub i32 %t1, %c
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_add_of_const_to_sub2(i32 %a, i32 %b) {
 ; X32-LABEL: sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl %ecx, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    leal -32(%rdx,%rsi), %eax
+; X64-NEXT:    leal -32(%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = add i32 %t0, 32 ; constant always on RHS
-  %r = sub i32 %c, %t1
+  %t0 = add i32 %a, 32 ; constant always on RHS
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (sub %x, C), %y
 ; sub %y, (sub %x, C)
 
-define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
@@ -211,81 +183,69 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b, i32 %c) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    subl %edx, %edi
 ; X64-NEXT:    leal -32(%rdi), %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = sub i32 %t1, %c
+  %t0 = sub i32 %a, 32
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl %ecx, %eax
-; X32-NEXT:    addl $32, %eax
+; X32-NEXT:    movl $32, %eax
+; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    leal 32(%rsi,%rdx), %eax
+; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    addl %esi, %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 %t0, 32
-  %r = sub i32 %c, %t1
+  %t0 = sub i32 %a, 32
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
 ; sub (sub C, %x), %y
 ; sub %y, (sub C, %x)
 
-define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl $32, %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    subl %edi, %esi
-; X64-NEXT:    subl %edx, %esi
-; X64-NEXT:    leal 32(%rsi), %eax
+; X64-NEXT:    movl $32, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = sub i32 %t1, %c
+  %t0 = sub i32 32, %a
+  %r = sub i32 %t0, %b
   ret i32 %r
 }
-define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b, i32 %c) {
+define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    addl %ecx, %eax
 ; X32-NEXT:    addl $-32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    subl %esi, %edi
-; X64-NEXT:    leal -32(%rdi,%rdx), %eax
+; X64-NEXT:    leal -32(%rdi,%rsi), %eax
 ; X64-NEXT:    retq
-  %t0 = sub i32 %a, %b
-  %t1 = sub i32 32, %t0
-  %r = sub i32 %c, %t1
+  %t0 = sub i32 32, %a
+  %r = sub i32 %b, %t0
   ret i32 %r
 }
 
@@ -296,146 +256,126 @@ define i32 @sink_sub_from_const_to_sub2(i32 %a, i32 %b, i32 %c) {
 ; add (add %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_add_of_const_to_add0:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = add <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = add <4 x i32> %t1, %c
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_add_of_const_to_add1:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = add <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = add <4 x i32> %c, %t1
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; add (sub %x, C), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = add <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = add <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; add (sub C, %x), %y
 ; Outer 'add' is commutative - 2 variants.
 
-define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; ALL-LABEL: vec_sink_sub_from_const_to_add0:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    paddd %xmm1, %xmm0
-; ALL-NEXT:    movdqa {{.*#+}} xmm1 = <42,24,u,46>
-; ALL-NEXT:    psubd %xmm0, %xmm1
-; ALL-NEXT:    paddd %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    psubd %xmm0, %xmm2
+; ALL-NEXT:    paddd %xmm1, %xmm2
+; ALL-NEXT:    movdqa %xmm2, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = add <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; ALL-LABEL: vec_sink_sub_from_const_to_add1:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    paddd %xmm1, %xmm0
-; ALL-NEXT:    movdqa {{.*#+}} xmm1 = <42,24,u,46>
-; ALL-NEXT:    psubd %xmm0, %xmm1
-; ALL-NEXT:    paddd %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    psubd %xmm0, %xmm2
+; ALL-NEXT:    paddd %xmm1, %xmm2
+; ALL-NEXT:    movdqa %xmm2, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
-  %t0 = add <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = add <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (add %x, C), %y
 ; sub %y, (add %x, C)
 
-define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_add_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    paddd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = sub <4 x i32> %t1, %c
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_add_of_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm1
 ; X32-NEXT:    movdqa %xmm1, %xmm0
 ; X32-NEXT:    retl
@@ -443,100 +383,74 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x
 ; X64-LABEL: vec_sink_add_of_const_to_sub2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm1
 ; X64-NEXT:    movdqa %xmm1, %xmm0
 ; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = add <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
-  %r = sub <4 x i32> %c, %t1
+  %t0 = add <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46> ; constant always on RHS
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (sub %x, C), %y
 ; sub %y, (sub %x, C)
 
-define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_sub_of_const_to_sub:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd %xmm2, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_sub:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd %xmm2, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = sub <4 x i32> %t1, %c
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; X32-LABEL: vec_sink_sub_of_const_to_sub2:
-; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    paddd %xmm2, %xmm1
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X32-NEXT:    movdqa %xmm1, %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: vec_sink_sub_of_const_to_sub2:
-; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    paddd %xmm2, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
-; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
-  %r = sub <4 x i32> %c, %t1
+define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: vec_sink_sub_of_const_to_sub2:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    psubd %xmm0, %xmm2
+; ALL-NEXT:    paddd %xmm1, %xmm2
+; ALL-NEXT:    movdqa %xmm2, %xmm0
+; ALL-NEXT:    ret{{[l|q]}}
+  %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }
 
 ; sub (sub C, %x), %y
 ; sub %y, (sub C, %x)
 
-define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; X32-LABEL: vec_sink_sub_from_const_to_sub:
-; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm0, %xmm1
-; X32-NEXT:    psubd %xmm2, %xmm1
-; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X32-NEXT:    movdqa %xmm1, %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: vec_sink_sub_from_const_to_sub:
-; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm0, %xmm1
-; X64-NEXT:    psubd %xmm2, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    movdqa %xmm1, %xmm0
-; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = sub <4 x i32> %t1, %c
+define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: vec_sink_sub_from_const_to_sub:
+; ALL:       # %bb.0:
+; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    psubd %xmm0, %xmm2
+; ALL-NEXT:    psubd %xmm1, %xmm2
+; ALL-NEXT:    movdqa %xmm2, %xmm0
+; ALL-NEXT:    ret{{[l|q]}}
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = sub <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
-define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
+; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
-  %t0 = sub <4 x i32> %a, %b
-  %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0
-  %r = sub <4 x i32> %c, %t1
+  %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
+  %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r
 }

From 2ef83571f264a4e03bdd94af29949b49092b71db Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 1 Jun 2019 12:10:29 +0000
Subject: [PATCH 0814/1176] [SLPVectorizer][X86] This test was from PR28474

llvm-svn: 362296
---
 llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
index 0f0bbf9a2ad2c..eaa230e9e11ac 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.10.0 -mattr=+sse4.2 | FileCheck %s
 
-
+; PR28474
 define i32 @test(i32* nocapture readonly %p) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:

From e6d1a80370f2e465c5aa61f6ba5f16de513aa393 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 1 Jun 2019 12:35:03 +0000
Subject: [PATCH 0815/1176] [SLPVectorizer][X86] Add other tests described in
 PR28474

llvm-svn: 362297
---
 .../SLPVectorizer/X86/reduction_loads.ll      | 256 ++++++++++++++++++
 1 file changed, 256 insertions(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
index eaa230e9e11ac..56539ab928eab 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -2,6 +2,23 @@
 ; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.10.0 -mattr=+sse4.2 | FileCheck %s
 
 ; PR28474
+
+;void foo();
+;
+;int test1(unsigned int *p) {
+;  int sum = 0;
+;  #pragma nounroll
+;  for (int y = 0; y < 2; y++) {
+;    // Inner loop gets unrolled
+;    for (int x = 0; x < 8; x++) {
+;      sum += p[x] * 42;
+;    }
+;    // Dummy call to keep outer loop alive
+;    foo();
+;  }
+;  return sum;
+;}
+
 define i32 @test(i32* nocapture readonly %p) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
@@ -79,3 +96,242 @@ for.body:
 for.end:
   ret i32 %add.7
 }
+
+;void foo();
+;
+;int test2(unsigned int *p, unsigned int *q) {
+;  int sum = 0;
+;  #pragma nounroll
+;  for (int y = 0; y < 2; y++) {
+;    // Inner loop gets unrolled
+;    for (int x = 0; x < 8; x++) {
+;      sum += p[x] * q[x];
+;    }
+;    // Dummy call to keep outer loop alive
+;    foo();
+;  }
+;  return sum;
+;}
+
+define i32 @test2(i32* nocapture readonly %p, i32* nocapture readonly %q) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX_P_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX_P_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_P_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX_P_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_P_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX_P_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_P_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX_Q_1:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX_Q_2:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_Q_3:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX_Q_4:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_Q_5:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX_Q_6:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_Q_7:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 7
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+;
+entry:
+  %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
+  %arrayidx.p.2 = getelementptr inbounds i32, i32* %p, i64 2
+  %arrayidx.p.3 = getelementptr inbounds i32, i32* %p, i64 3
+  %arrayidx.p.4 = getelementptr inbounds i32, i32* %p, i64 4
+  %arrayidx.p.5 = getelementptr inbounds i32, i32* %p, i64 5
+  %arrayidx.p.6 = getelementptr inbounds i32, i32* %p, i64 6
+  %arrayidx.p.7 = getelementptr inbounds i32, i32* %p, i64 7
+
+  %arrayidx.q.1 = getelementptr inbounds i32, i32* %q, i64 1
+  %arrayidx.q.2 = getelementptr inbounds i32, i32* %q, i64 2
+  %arrayidx.q.3 = getelementptr inbounds i32, i32* %q, i64 3
+  %arrayidx.q.4 = getelementptr inbounds i32, i32* %q, i64 4
+  %arrayidx.q.5 = getelementptr inbounds i32, i32* %q, i64 5
+  %arrayidx.q.6 = getelementptr inbounds i32, i32* %q, i64 6
+  %arrayidx.q.7 = getelementptr inbounds i32, i32* %q, i64 7
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
+  %tmpp = load i32, i32* %p, align 4
+  %tmpq = load i32, i32* %q, align 4
+  %mul = mul i32 %tmpp, %tmpq
+  %add = add i32 %mul, %sum
+  %tmp5p = load i32, i32* %arrayidx.p.1, align 4
+  %tmp5q = load i32, i32* %arrayidx.q.1, align 4
+  %mul.1 = mul i32 %tmp5p, %tmp5q
+  %add.1 = add i32 %mul.1, %add
+  %tmp6p = load i32, i32* %arrayidx.p.2, align 4
+  %tmp6q = load i32, i32* %arrayidx.q.2, align 4
+  %mul.2 = mul i32 %tmp6p, %tmp6q
+  %add.2 = add i32 %mul.2, %add.1
+  %tmp7p = load i32, i32* %arrayidx.p.3, align 4
+  %tmp7q = load i32, i32* %arrayidx.q.3, align 4
+  %mul.3 = mul i32 %tmp7p, %tmp7q
+  %add.3 = add i32 %mul.3, %add.2
+  %tmp8p = load i32, i32* %arrayidx.p.4, align 4
+  %tmp8q = load i32, i32* %arrayidx.q.4, align 4
+  %mul.4 = mul i32 %tmp8p, %tmp8q
+  %add.4 = add i32 %mul.4, %add.3
+  %tmp9p = load i32, i32* %arrayidx.p.5, align 4
+  %tmp9q = load i32, i32* %arrayidx.q.5, align 4
+  %mul.5 = mul i32 %tmp9p, %tmp9q
+  %add.5 = add i32 %mul.5, %add.4
+  %tmp10p = load i32, i32* %arrayidx.p.6, align 4
+  %tmp10q = load i32, i32* %arrayidx.q.6, align 4
+  %mul.6 = mul i32 %tmp10p, %tmp10q
+  %add.6 = add i32 %mul.6, %add.5
+  %tmp11p = load i32, i32* %arrayidx.p.7, align 4
+  %tmp11q = load i32, i32* %arrayidx.q.7, align 4
+  %mul.7 = mul i32 %tmp11p, %tmp11q
+  %add.7 = add i32 %mul.7, %add.6
+  br i1 true, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add.7
+}
+
+;void foo();
+;
+;int test3(unsigned int *p, unsigned int *q) {
+;  int sum = 0;
+;  #pragma nounroll
+;  for (int y = 0; y < 2; y++) {
+;    // Inner loop gets unrolled
+;    for (int x = 0; x < 8; x++) {
+;      sum += p[x] * q[7-x];
+;    }
+;    // Dummy call to keep outer loop alive
+;    foo();
+;  }
+;  return sum;
+;}
+
+define i32 @test3(i32* nocapture readonly %p, i32* nocapture readonly %q) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX_P_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX_P_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_P_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX_P_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_P_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX_P_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_P_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7
+; CHECK-NEXT:    [[ARRAYIDX_Q_1:%.*]] = getelementptr inbounds i32, i32* [[Q:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX_Q_2:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 2
+; CHECK-NEXT:    [[ARRAYIDX_Q_3:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 3
+; CHECK-NEXT:    [[ARRAYIDX_Q_4:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 4
+; CHECK-NEXT:    [[ARRAYIDX_Q_5:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 5
+; CHECK-NEXT:    [[ARRAYIDX_Q_6:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 6
+; CHECK-NEXT:    [[ARRAYIDX_Q_7:%.*]] = getelementptr inbounds i32, i32* [[Q]], i64 7
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[REORDER_SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[Q]] to <8 x i32>*
+; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i32> [[REORDER_SHUFFLE]], [[TMP3]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 undef, [[SUM]]
+; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 undef, [[ADD]]
+; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 undef, [[ADD_1]]
+; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 undef, [[ADD_2]]
+; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 undef, [[ADD_3]]
+; CHECK-NEXT:    [[ADD_5:%.*]] = add i32 undef, [[ADD_4]]
+; CHECK-NEXT:    [[ADD_6:%.*]] = add i32 undef, [[ADD_5]]
+; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <8 x i32> [[TMP4]], [[RDX_SHUF]]
+; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
+; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
+; CHECK-NEXT:    [[OP_EXTRA]] = add i32 [[TMP5]], [[SUM]]
+; CHECK-NEXT:    [[ADD_7:%.*]] = add i32 undef, [[ADD_6]]
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret i32 [[OP_EXTRA]]
+;
+entry:
+  %arrayidx.p.1 = getelementptr inbounds i32, i32* %p, i64 1
+  %arrayidx.p.2 = getelementptr inbounds i32, i32* %p, i64 2
+  %arrayidx.p.3 = getelementptr inbounds i32, i32* %p, i64 3
+  %arrayidx.p.4 = getelementptr inbounds i32, i32* %p, i64 4
+  %arrayidx.p.5 = getelementptr inbounds i32, i32* %p, i64 5
+  %arrayidx.p.6 = getelementptr inbounds i32, i32* %p, i64 6
+  %arrayidx.p.7 = getelementptr inbounds i32, i32* %p, i64 7
+
+  %arrayidx.q.1 = getelementptr inbounds i32, i32* %q, i64 1
+  %arrayidx.q.2 = getelementptr inbounds i32, i32* %q, i64 2
+  %arrayidx.q.3 = getelementptr inbounds i32, i32* %q, i64 3
+  %arrayidx.q.4 = getelementptr inbounds i32, i32* %q, i64 4
+  %arrayidx.q.5 = getelementptr inbounds i32, i32* %q, i64 5
+  %arrayidx.q.6 = getelementptr inbounds i32, i32* %q, i64 6
+  %arrayidx.q.7 = getelementptr inbounds i32, i32* %q, i64 7
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
+  %tmpp = load i32, i32* %p, align 4
+  %tmpq = load i32, i32* %arrayidx.q.7, align 4
+  %mul = mul i32 %tmpp, %tmpq
+  %add = add i32 %mul, %sum
+  %tmp5p = load i32, i32* %arrayidx.p.1, align 4
+  %tmp5q = load i32, i32* %arrayidx.q.6, align 4
+  %mul.1 = mul i32 %tmp5p, %tmp5q
+  %add.1 = add i32 %mul.1, %add
+  %tmp6p = load i32, i32* %arrayidx.p.2, align 4
+  %tmp6q = load i32, i32* %arrayidx.q.5, align 4
+  %mul.2 = mul i32 %tmp6p, %tmp6q
+  %add.2 = add i32 %mul.2, %add.1
+  %tmp7p = load i32, i32* %arrayidx.p.3, align 4
+  %tmp7q = load i32, i32* %arrayidx.q.4, align 4
+  %mul.3 = mul i32 %tmp7p, %tmp7q
+  %add.3 = add i32 %mul.3, %add.2
+  %tmp8p = load i32, i32* %arrayidx.p.4, align 4
+  %tmp8q = load i32, i32* %arrayidx.q.3, align 4
+  %mul.4 = mul i32 %tmp8p, %tmp8q
+  %add.4 = add i32 %mul.4, %add.3
+  %tmp9p = load i32, i32* %arrayidx.p.5, align 4
+  %tmp9q = load i32, i32* %arrayidx.q.2, align 4
+  %mul.5 = mul i32 %tmp9p, %tmp9q
+  %add.5 = add i32 %mul.5, %add.4
+  %tmp10p = load i32, i32* %arrayidx.p.6, align 4
+  %tmp10q = load i32, i32* %arrayidx.q.1, align 4
+  %mul.6 = mul i32 %tmp10p, %tmp10q
+  %add.6 = add i32 %mul.6, %add.5
+  %tmp11p = load i32, i32* %arrayidx.p.7, align 4
+  %tmp11q = load i32, i32* %q, align 4
+  %mul.7 = mul i32 %tmp11p, %tmp11q
+  %add.7 = add i32 %mul.7, %add.6
+  br i1 true, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add.7
+}

From 45eb4c7e55341c0b83a21dedecc092e273795eda Mon Sep 17 00:00:00 2001
From: Dylan McKay <me@dylanmckay.io>
Date: Sat, 1 Jun 2019 12:38:56 +0000
Subject: [PATCH 0816/1176] [AVR] Disable register coalescing to the
 PTRDISPREGS class

If we would allow register coalescing on PTRDISPREGS class then register
allocator can lock Z register to some virtual register. Larger instructions
requiring a memory acces then fail during the register allocation phase since
there is no available register to hold a pointer if Y register was already
taken for a stack frame. This patch prevents it by keeping Z register
spillable. It does it by not allowing coalescer to lock it.

Original discussion on https://github.com/avr-rust/rust/issues/128.

llvm-svn: 362298
---
 llvm/lib/Target/AVR/AVRRegisterInfo.cpp | 15 +++++++++++++++
 llvm/lib/Target/AVR/AVRRegisterInfo.h   |  7 +++++++
 llvm/test/CodeGen/AVR/PR37143.ll        |  6 +++---
 llvm/test/CodeGen/AVR/store.ll          |  4 ++--
 4 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 8dc31fe066b96..0aae34d7dfd0f 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 
@@ -272,4 +273,18 @@ void AVRRegisterInfo::splitReg(unsigned Reg,
     HiReg = getSubReg(Reg, AVR::sub_hi);
 }
 
+bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
+                                     const TargetRegisterClass *SrcRC,
+                                     unsigned SubReg,
+                                     const TargetRegisterClass *DstRC,
+                                     unsigned DstSubReg,
+                                     const TargetRegisterClass *NewRC,
+                                     LiveIntervals &LIS) const {
+  if(this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
+    return false;
+  }
+
+  return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg, NewRC, LIS);
+}
+
 } // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 2365039dbe32a..e8354925fed88 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -55,6 +55,13 @@ class AVRRegisterInfo : public AVRGenRegisterInfo {
     return true;
   }
 
+  bool shouldCoalesce(MachineInstr *MI,
+                      const TargetRegisterClass *SrcRC,
+                      unsigned SubReg,
+                      const TargetRegisterClass *DstRC,
+                      unsigned DstSubReg,
+                      const TargetRegisterClass *NewRC,
+                      LiveIntervals &LIS) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AVR/PR37143.ll b/llvm/test/CodeGen/AVR/PR37143.ll
index db157edc22f08..72f4a2fd3722c 100644
--- a/llvm/test/CodeGen/AVR/PR37143.ll
+++ b/llvm/test/CodeGen/AVR/PR37143.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -mattr=avr6,sram < %s -march=avr | FileCheck %s
 
-; CHECK: ld {{r[0-9]+}}, [[PTR:[YZ]]]
+; CHECK: ld {{r[0-9]+}}, [[PTR:[XYZ]]]
 ; CHECK: ldd {{r[0-9]+}}, [[PTR]]+1
-; CHECK: st [[PTR]], {{r[0-9]+}}
-; CHECK: std [[PTR]]+1, {{r[0-9]+}}
+; CHECK: st [[PTR2:[XYZ]]], {{r[0-9]+}}
+; CHECK: std [[PTR2]]+1, {{r[0-9]+}}
 define void @load_store_16(i16* nocapture %ptr) local_unnamed_addr #1 {
 entry:
   %0 = load i16, i16* %ptr, align 2
diff --git a/llvm/test/CodeGen/AVR/store.ll b/llvm/test/CodeGen/AVR/store.ll
index bad3f61a0135c..81bad77538745 100644
--- a/llvm/test/CodeGen/AVR/store.ll
+++ b/llvm/test/CodeGen/AVR/store.ll
@@ -45,9 +45,9 @@ define void @store16disp(i16* %x, i16 %y) {
 
 define void @store16nodisp(i16* %x, i16 %y) {
 ; CHECK-LABEL: store16nodisp:
+; CHECK: subi r24, 192
+; CHECK: sbci r25, 255
 ; CHECK: movw r30, r24
-; CHECK: subi r30, 192
-; CHECK: sbci r31, 255
 ; CHECK: st {{[YZ]}}, r22
 ; CHECK: std {{[YZ]}}+1, r23
   %arrayidx = getelementptr inbounds i16, i16* %x, i16 32

From 25694e0084440f913f89c985b950948819215820 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Sat, 1 Jun 2019 13:55:18 +0000
Subject: [PATCH 0817/1176] [mips] Extend range of register indexes accepted by
 cfcmsa/ctcmsa

The `cfcmsa` and `ctcmsa` instructions accept index of MSA control
register. The MIPS64 SIMD Architecture define eight MSA control
registers. But register index for `cfcmsa` and `ctcmsa` instructions
might be any number in 0..31 range. If the index is greater then 7,
`cfcmsa` writes zero to the destination registers and `ctcmsa` does
nothing [1].

[1] MIPS Architecture for Programmers Volume IV-j:
    The MIPS64 SIMD Architecture Module
https://www.mips.com/?do-download=the-mips64-simd-architecture-module

Differential Revision: https://reviews.llvm.org/D62597

llvm-svn: 362299
---
 llvm/lib/Target/Mips/MipsRegisterInfo.td    |  8 +++++++-
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 14 ++------------
 llvm/test/CodeGen/Mips/msa/elm_cxcmsa.ll    | 18 ++++++++++++++++++
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.td b/llvm/lib/Target/Mips/MipsRegisterInfo.td
index 7dca8835aad23..8a6279da46b76 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -258,6 +258,11 @@ let Namespace = "Mips" in {
   def MSARequest : MipsReg<5, "5">;
   def MSAMap     : MipsReg<6, "6">;
   def MSAUnmap   : MipsReg<7, "7">;
+  // MSA-ASE fake control registers.
+  // These registers do not exist, but instructions like `cfcmsa`
+  // and `ctcmsa` allows to specify them.
+  foreach I = 8-31 in
+  def MSA#I : MipsReg<#I, ""#I>;
 
   // Octeon multiplier and product registers
   def MPL0 : MipsReg<0, "mpl0">;
@@ -438,7 +443,8 @@ def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
                                 (decimate (sequence "W%u", 0, 31), 2)>;
 
 def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
-  MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
+  MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap,
+  (sequence "MSA%u", 8, 31))>, Unallocatable;
 
 // Hi/Lo Registers
 def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>;
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cc6efe57eff53..c50e4c215a4df 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -75,18 +75,8 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
 }
 
 unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const {
-  switch (cast<ConstantSDNode>(RegIdx)->getZExtValue()) {
-  default:
-    llvm_unreachable("Could not map int to register");
-  case 0: return Mips::MSAIR;
-  case 1: return Mips::MSACSR;
-  case 2: return Mips::MSAAccess;
-  case 3: return Mips::MSASave;
-  case 4: return Mips::MSAModify;
-  case 5: return Mips::MSARequest;
-  case 6: return Mips::MSAMap;
-  case 7: return Mips::MSAUnmap;
-  }
+  uint64_t RegNum = cast<ConstantSDNode>(RegIdx)->getZExtValue();
+  return Mips::MSACtrlRegClass.getRegister(RegNum);
 }
 
 bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
diff --git a/llvm/test/CodeGen/Mips/msa/elm_cxcmsa.ll b/llvm/test/CodeGen/Mips/msa/elm_cxcmsa.ll
index b96499c152359..c10c206255f7a 100644
--- a/llvm/test/CodeGen/Mips/msa/elm_cxcmsa.ll
+++ b/llvm/test/CodeGen/Mips/msa/elm_cxcmsa.ll
@@ -84,6 +84,15 @@ entry:
 ; CHECK: cfcmsa $[[R1:[0-9]+]], $7
 ; CHECK: .size msa_unmap_cfcmsa_test
 ;
+define i32 @msa_invalid_reg_cfcmsa_test() nounwind {
+entry:
+  %0 = tail call i32 @llvm.mips.cfcmsa(i32 8)
+  ret i32 %0
+}
+
+; CHECK-LABEL: msa_invalid_reg_cfcmsa_test:
+; CHECK: cfcmsa ${{[0-9]+}}, $8
+;
 define void @msa_ir_ctcmsa_test() nounwind {
 entry:
   tail call void @llvm.mips.ctcmsa(i32 0, i32 1)
@@ -164,5 +173,14 @@ entry:
 ; CHECK: ctcmsa $7
 ; CHECK: .size msa_unmap_ctcmsa_test
 ;
+define void @msa_invalid_reg_ctcmsa_test() nounwind {
+entry:
+  tail call void @llvm.mips.ctcmsa(i32 8, i32 1)
+  ret void
+}
+
+; CHECK: msa_invalid_reg_ctcmsa_test:
+; CHECK: ctcmsa $8
+;
 declare i32 @llvm.mips.cfcmsa(i32) nounwind
 declare void @llvm.mips.ctcmsa(i32, i32) nounwind

From 0d4a0405104be20ce03f53ea762dc4e9ccc8a558 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 1 Jun 2019 14:05:46 +0000
Subject: [PATCH 0818/1176] [X86][AVX] Add tests for
 CONCAT(MOVDDUP(x),MOVDDUP(y))

llvm-svn: 362300
---
 .../test/CodeGen/X86/vector-shuffle-256-v4.ll | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 1b9bc124c1ba9..ea0b9b1b06dec 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -686,6 +686,47 @@ define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) {
   ret <4 x double> %1
 }
 
+define <4 x double> @shuffle_v4f64_0044(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0044:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0044:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_v4f64_0044:
+; AVX512VL-SLOW:       # %bb.0:
+; AVX512VL-SLOW-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
+; AVX512VL-SLOW-NEXT:    retq
+;
+; AVX512VL-FAST-LABEL: shuffle_v4f64_0044:
+; AVX512VL-FAST:       # %bb.0:
+; AVX512VL-FAST-NEXT:    vmovapd {{.*#+}} ymm2 = [0,0,4,4]
+; AVX512VL-FAST-NEXT:    vpermt2pd %ymm1, %ymm2, %ymm0
+; AVX512VL-FAST-NEXT:    retq
+  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+  ret <4 x double> %1
+}
+
+define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0044_v2f64:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; ALL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %3
+}
+
 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4i64_0000:
 ; AVX1:       # %bb.0:

From a881ffeae432f89f7a204d41080cd14ce7127794 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 1 Jun 2019 14:58:36 +0000
Subject: [PATCH 0819/1176] [APInt] Add PR40897 test case

In reality APInt::getBitsNeeded(INT_MIN, base) cases require one less bit than is returned

llvm-svn: 362301
---
 llvm/unittests/ADT/APIntTest.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp
index a92a654ac17b5..b69dce1bd8a1d 100644
--- a/llvm/unittests/ADT/APIntTest.cpp
+++ b/llvm/unittests/ADT/APIntTest.cpp
@@ -1262,6 +1262,9 @@ TEST(APIntTest, StringBitsNeeded10) {
   EXPECT_EQ(5U, APInt::getBitsNeeded("-10", 10));
   EXPECT_EQ(6U, APInt::getBitsNeeded("-19", 10));
   EXPECT_EQ(6U, APInt::getBitsNeeded("-20", 10));
+
+  // TODO: INT_MIN cases need 1 less bit (PR40897)
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-128", 10));
 }
 
 TEST(APIntTest, StringBitsNeeded16) {

From 6a989c358cc79928a8cff0b51913d11417866dc4 Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Sat, 1 Jun 2019 15:22:37 +0000
Subject: [PATCH 0820/1176] [MCA][Scheduler] Change how memory instructions are
 dispatched to the pending set. NFCI

llvm-svn: 362302
---
 llvm/include/llvm/MCA/HardwareUnits/LSUnit.h | 33 ++++++++----
 llvm/lib/MCA/HardwareUnits/Scheduler.cpp     | 53 +++++++++-----------
 2 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
index e55b700884295..f2a5cf86ca49a 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -70,7 +70,7 @@ class MemoryGroup {
   unsigned getNumExecuting() const { return NumExecuting; }
   unsigned getNumExecuted() const { return NumExecuted; }
 
-  const InstRef &getCriticalMemoryInstruction() const { 
+  const InstRef &getCriticalMemoryInstruction() const {
     return CriticalMemoryInstruction;
   }
   const CriticalDependency &getCriticalPredecessor() const {
@@ -96,7 +96,7 @@ class MemoryGroup {
   }
   bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
   bool isExecuting() const {
-    return NumExecuting == NumInstructions - NumExecuted;
+    return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
   }
   bool isExecuted() const { return NumInstructions == NumExecuted; }
 
@@ -247,22 +247,32 @@ class LSUnitBase : public HardwareUnit {
   /// Check if a peviously dispatched instruction IR is now ready for execution.
   bool isReady(const InstRef &IR) const {
     unsigned GroupID = IR.getInstruction()->getLSUTokenID();
-    assert(isValidGroupID(GroupID) &&
-           "Invalid group associated with this instruction!");
-    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    const MemoryGroup &Group = getGroup(GroupID);
     return Group.isReady();
   }
 
-  /// Check if a previously dispatched instruction IR only depends on
-  /// instructions that are currently executing.
+  /// Check if instruction IR only depends on memory instructions that are
+  /// currently executing.
   bool isPending(const InstRef &IR) const {
     unsigned GroupID = IR.getInstruction()->getLSUTokenID();
-    assert(isValidGroupID(GroupID) &&
-           "Invalid group associated with this instruction!");
-    const MemoryGroup &Group = *Groups.find(GroupID)->second;
+    const MemoryGroup &Group = getGroup(GroupID);
     return Group.isPending();
   }
 
+  /// Check if instruction IR is still waiting on memory operations, and the
+  /// wait time is still unknown.
+  bool isWaiting(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return Group.isWaiting();
+  }
+
+  bool hasDependentUsers(const InstRef &IR) const {
+    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+    const MemoryGroup &Group = getGroup(GroupID);
+    return !Group.isExecuted() && Group.getNumSuccessors();
+  }
+
   const MemoryGroup &getGroup(unsigned Index) const {
     assert(isValidGroupID(Index) && "Group doesn't exist!");
     return *Groups.find(Index)->second;
@@ -274,7 +284,8 @@ class LSUnitBase : public HardwareUnit {
   }
 
   unsigned createMemoryGroup() {
-    Groups.insert(std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+    Groups.insert(
+        std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
     return NextGroupID++;
   }
 
diff --git a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
index 3afc0ac89ef02..c7091203595c7 100644
--- a/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/llvm/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -104,6 +104,7 @@ void Scheduler::issueInstruction(
     SmallVectorImpl<InstRef> &ReadyInstructions) {
   const Instruction &Inst = *IR.getInstruction();
   bool HasDependentUsers = Inst.hasDependentUsers();
+  HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR);
 
   Resources->releaseBuffers(Inst.getDesc().Buffers);
   issueInstructionImpl(IR, UsedResources);
@@ -111,14 +112,9 @@ void Scheduler::issueInstruction(
   // other dependent instructions. Dependent instructions may be issued during
   // this same cycle if operands have ReadAdvance entries.  Promote those
   // instructions to the ReadySet and notify the caller that those are ready.
-  // If IR is a memory operation, then always call method `promoteToReadySet()`
-  // to notify any dependent memory operations that IR started execution.
-  bool ShouldPromoteInstructions = Inst.isMemOp();
   if (HasDependentUsers)
-    ShouldPromoteInstructions |= promoteToPendingSet(PendingInstructions);
-
-  if (ShouldPromoteInstructions)
-    promoteToReadySet(ReadyInstructions);
+    if (promoteToPendingSet(PendingInstructions))
+      promoteToReadySet(ReadyInstructions);
 }
 
 bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
@@ -130,18 +126,18 @@ bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
     if (!IR)
       break;
 
-    // Check if there are unsolved memory dependencies.
+    // Check if there are unsolved register dependencies.
     Instruction &IS = *IR.getInstruction();
-    if (IS.isMemOp() && !LSU.isReady(IR)) {
+    if (!IS.isReady() && !IS.updatePending()) {
       ++I;
       continue;
     }
-
-    // Check if there are unsolved register dependencies.
-    if (!IS.isReady() && !IS.updatePending()) {
+    // Check if there are unsolved memory dependencies.
+    if (IS.isMemOp() && !LSU.isReady(IR)) {
       ++I;
       continue;
     }
+
     LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
                       << " promoted to the READY set.\n");
 
@@ -173,6 +169,12 @@ bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) {
       ++I;
       continue;
     }
+
+    if (IS.isMemOp() && LSU.isWaiting(IR)) {
+      ++I;
+      continue;
+    }
+
     LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
                       << " promoted to the PENDING set.\n");
 
@@ -251,13 +253,8 @@ void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
     if (Resources->checkAvailability(IS.getDesc()))
       continue;
 
-    if (IS.isMemOp()) {
-      const MemoryGroup &Group = LSU.getGroup(IS.getLSUTokenID());
-      if (Group.isWaiting())
-        continue;
-      if (Group.isPending())
-        MemDeps.emplace_back(IR);
-    }
+    if (IS.isMemOp() && LSU.isPending(IR))
+      MemDeps.emplace_back(IR);
 
     if (IS.isPending())
       RegDeps.emplace_back(IR);
@@ -309,7 +306,13 @@ bool Scheduler::dispatch(InstRef &IR) {
   if (IS.isMemOp())
     IS.setLSUTokenID(LSU.dispatch(IR));
 
-  if (IS.isPending()) {
+  if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+    WaitSet.push_back(IR);
+    return false;
+  }
+
+  if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) {
     LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
                       << " to the PendingSet\n");
     PendingSet.push_back(IR);
@@ -317,14 +320,8 @@ bool Scheduler::dispatch(InstRef &IR) {
     return false;
   }
 
-  // Memory operations that still have unsolved memory dependencies are
-  // initially dispatched to the WaitSet.
-  if (!IS.isReady() || (IS.isMemOp() && !LSU.isReady(IR))) {
-    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
-    WaitSet.push_back(IR);
-    return false;
-  }
-
+  assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) &&
+         "Unexpected internal state found!");
   // Don't add a zero-latency instruction to the Ready queue.
   // A zero-latency instruction doesn't consume any scheduler resources. That is
   // because it doesn't need to be executed, and it is often removed at register

From cd1878d0f957f72e34b378d25742dbc886f079bc Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 1 Jun 2019 18:27:06 +0000
Subject: [PATCH 0821/1176] [AMDGPU] Regenerate SDIV tests for an upcoming
 patch

llvm-svn: 362303
---
 llvm/test/CodeGen/AMDGPU/sdiv.ll | 2390 +++++++++++++++++++++++++++++-
 1 file changed, 2353 insertions(+), 37 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll
index a67e83035a67c..03784dcba51f0 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll
@@ -1,7 +1,8 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=amdgcn | FileCheck %s -check-prefixes=FUNC,SI,GCN
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s -check-prefixes=FUNC,SI,TONGA
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global | FileCheck %s -check-prefixes=FUNC,SI,GFX9
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood | FileCheck %s -check-prefixes=FUNC,EG
 
 ; The code generated by sdiv is long and complex and may frequently change.
 ; The goal of this test is to make sure the ISel doesn't fail.
@@ -12,9 +13,198 @@
 ; This was fixed by adding an additional pattern in R600Instructions.td to
 ; match this pattern with a CNDGE_INT.
 
-; FUNC-LABEL: {{^}}sdiv_i32:
-; EG: CF_END
 define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+; GCN-LABEL: sdiv_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GCN-NEXT:    v_xor_b32_e32 v4, v2, v3
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GCN-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GCN-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GCN-NEXT:    v_mul_hi_u32 v3, v2, v1
+; GCN-NEXT:    v_mul_lo_u32 v5, v2, v1
+; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v5
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v3
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[0:1]
+; GCN-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, v3, v2
+; GCN-NEXT:    v_subrev_i32_e32 v2, vcc, v3, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[0:1]
+; GCN-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GCN-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, -1, v2
+; GCN-NEXT:    v_subrev_i32_e32 v7, vcc, v3, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v1
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, v5, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_i32:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s7, 0xf000
+; TONGA-NEXT:    s_mov_b32 s6, -1
+; TONGA-NEXT:    s_mov_b32 s2, s6
+; TONGA-NEXT:    s_mov_b32 s3, s7
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s10
+; TONGA-NEXT:    s_mov_b32 s1, s11
+; TONGA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; TONGA-NEXT:    s_mov_b32 s4, s8
+; TONGA-NEXT:    s_mov_b32 s5, s9
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v2
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v6
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; TONGA-NEXT:    v_xor_b32_e32 v2, v6, v2
+; TONGA-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; TONGA-NEXT:    v_mul_lo_u32 v4, v3, v1
+; TONGA-NEXT:    v_mul_hi_u32 v5, v3, v1
+; TONGA-NEXT:    v_sub_u32_e32 v7, vcc, 0, v4
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v5
+; TONGA-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v4, v4, v3
+; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v4, v3
+; TONGA-NEXT:    v_subrev_u32_e32 v3, vcc, v4, v3
+; TONGA-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v3, v3, v0
+; TONGA-NEXT:    v_mul_lo_u32 v4, v3, v1
+; TONGA-NEXT:    v_add_u32_e32 v5, vcc, 1, v3
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, -1, v3
+; TONGA-NEXT:    v_subrev_u32_e32 v7, vcc, v4, v0
+; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v1
+; TONGA-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v2
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2
+; TONGA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
+; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v1
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v1
+; GFX9-NEXT:    v_sub_u32_e32 v6, 0, v4
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GFX9-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v5
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v5
+; GFX9-NEXT:    v_add_u32_e32 v6, v3, v4
+; GFX9-NEXT:    v_sub_u32_e32 v3, v3, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v3, v3, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, v5, v2
+; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v1
+; GFX9-NEXT:    v_add_u32_e32 v5, 1, v3
+; GFX9-NEXT:    v_add_u32_e32 v6, -1, v3
+; GFX9-NEXT:    v_sub_u32_e32 v7, v0, v4
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v1
+; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX9-NEXT:    v_sub_u32_e32 v0, v0, v2
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 30, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     SETGT_INT * T0.W, 0.0, T0.Y,
+; EG-NEXT:     ADD_INT * T1.W, T0.Y, PV.W,
+; EG-NEXT:     XOR_INT * T1.W, PV.W, T0.W,
+; EG-NEXT:     RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT:     MULLO_INT * T0.Z, PS, T1.W,
+; EG-NEXT:     SUB_INT T2.W, 0.0, PS,
+; EG-NEXT:     MULHI * T1.X, T0.Y, T1.W,
+; EG-NEXT:     CNDE_INT T2.W, PS, PV.W, T0.Z,
+; EG-NEXT:     SETGT_INT * T3.W, 0.0, T0.X,
+; EG-NEXT:     MULHI * T0.Z, PV.W, T0.Y,
+; EG-NEXT:     ADD_INT T1.Z, T0.X, T3.W,
+; EG-NEXT:     ADD_INT T2.W, T0.Y, PS,
+; EG-NEXT:     SUB_INT * T4.W, T0.Y, PS,
+; EG-NEXT:     CNDE_INT T2.W, T1.X, PV.W, PS,
+; EG-NEXT:     XOR_INT * T4.W, PV.Z, T3.W,
+; EG-NEXT:     MULHI * T0.X, PV.W, PS,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T1.W,
+; EG-NEXT:     SUB_INT * T2.W, T4.W, PS,
+; EG-NEXT:     SETGE_UINT T1.W, PV.W, T1.W,
+; EG-NEXT:     SETGE_UINT * T2.W, T4.W, T0.Y,
+; EG-NEXT:     AND_INT T1.W, PV.W, PS,
+; EG-NEXT:     ADD_INT * T4.W, T0.X, 1,
+; EG-NEXT:     CNDE_INT T1.W, PV.W, T0.X, PS,
+; EG-NEXT:     ADD_INT * T4.W, T0.X, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T1.W, T2.W, PS, PV.W,
+; EG-NEXT:     XOR_INT * T0.W, T3.W, T0.W,
+; EG-NEXT:     XOR_INT * T1.W, PV.W, PS,
+; EG-NEXT:     SUB_INT T0.X, PV.W, T0.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
   %num = load i32, i32 addrspace(1) * %in
   %den = load i32, i32 addrspace(1) * %den_ptr
@@ -23,8 +213,91 @@ define amdgpu_kernel void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %i
   ret void
 }
 
-; FUNC-LABEL: {{^}}sdiv_i32_4:
 define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+; GCN-LABEL: sdiv_i32_4:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_i32_4:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_dword v0, off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; TONGA-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; TONGA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_i32_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_i32_4:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 7, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     ASHR * T0.W, T0.X, literal.x,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR * T0.W, PV.W, literal.x,
+; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.W, T0.X, PV.W,
+; EG-NEXT:     ASHR T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 4
   store i32 %result, i32 addrspace(1)* %out
@@ -34,17 +307,98 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; Multiply by a weird constant to make sure setIntDivIsCheap is
 ; working.
 
-; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
-; SI-DAG: s_mov_b32 [[MAGIC:s[0-9]+]], 0x98a1930b
-; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
-; SI: v_add_{{[iu]}}32
-; SI: v_lshrrev_b32
-; SI: v_ashrrev_i32
-; SI: v_add_{{[iu]}}32
-; SI: buffer_store_dword
-; SI: s_endpgm
 define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+; GCN-LABEL: slow_sdiv_i32_3435:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s2, 0x98a1930b
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mul_hi_i32 v1, v0, s2
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v0, 11, v0
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: slow_sdiv_i32_3435:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; TONGA-NEXT:    s_mov_b32 s0, 0x98a1930b
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_mul_hi_i32 v1, v0, s0
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; TONGA-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 11, v0
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v1, v0
+; TONGA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: slow_sdiv_i32_3435:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s11, s3
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s6
+; GFX9-NEXT:    s_mov_b32 s9, s7
+; GFX9-NEXT:    buffer_load_dword v0, off, s[8:11], 0
+; GFX9-NEXT:    s_mov_b32 s0, 0x98a1930b
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_hi_i32 v1, v0, s0
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    v_add_u32_e32 v0, v1, v0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 31, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 11, v0
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: slow_sdiv_i32_3435:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 8, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MULHI_INT * T0.Y, T0.X, literal.x,
+; EG-NEXT:    -1734241525(-4.176600e-24), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT:     ASHR T1.W, PV.W, literal.x,
+; EG-NEXT:     LSHR * T0.W, PV.W, literal.y,
+; EG-NEXT:    11(1.541428e-44), 31(4.344025e-44)
+; EG-NEXT:     ADD_INT T0.X, PV.W, PS,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 3435
   store i32 %result, i32 addrspace(1)* %out
@@ -52,6 +406,326 @@ define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrsp
 }
 
 define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; GCN-LABEL: sdiv_v2i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s11, 0xf000
+; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_mov_b32 s6, s10
+; GCN-NEXT:    s_mov_b32 s7, s11
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s4, s2
+; GCN-NEXT:    s_mov_b32 s5, s3
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    s_mov_b32 s2, 0x4f800000
+; GCN-NEXT:    s_mov_b32 s8, s0
+; GCN-NEXT:    s_mov_b32 s9, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GCN-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GCN-NEXT:    v_xor_b32_e32 v8, v4, v5
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; GCN-NEXT:    v_xor_b32_e32 v9, v6, v7
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GCN-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GCN-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GCN-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v2
+; GCN-NEXT:    v_cvt_f32_u32_e32 v5, v3
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; GCN-NEXT:    v_mul_f32_e32 v4, s2, v4
+; GCN-NEXT:    v_mul_f32_e32 v5, s2, v5
+; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GCN-NEXT:    v_mul_hi_u32 v6, v4, v2
+; GCN-NEXT:    v_mul_lo_u32 v7, v4, v2
+; GCN-NEXT:    v_mul_hi_u32 v10, v5, v3
+; GCN-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GCN-NEXT:    v_sub_i32_e32 v12, vcc, 0, v7
+; GCN-NEXT:    v_sub_i32_e32 v13, vcc, 0, v11
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
+; GCN-NEXT:    v_cndmask_b32_e64 v6, v7, v12, s[0:1]
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0, v10
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v11, v13, s[2:3]
+; GCN-NEXT:    v_mul_hi_u32 v6, v6, v4
+; GCN-NEXT:    v_mul_hi_u32 v7, v7, v5
+; GCN-NEXT:    v_add_i32_e32 v10, vcc, v6, v4
+; GCN-NEXT:    v_subrev_i32_e32 v4, vcc, v6, v4
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v5
+; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, v7, v5
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[2:3]
+; GCN-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GCN-NEXT:    v_mul_hi_u32 v5, v5, v1
+; GCN-NEXT:    v_mul_lo_u32 v6, v4, v2
+; GCN-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
+; GCN-NEXT:    v_add_i32_e32 v10, vcc, -1, v4
+; GCN-NEXT:    v_mul_lo_u32 v11, v5, v3
+; GCN-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
+; GCN-NEXT:    v_add_i32_e32 v13, vcc, -1, v5
+; GCN-NEXT:    v_subrev_i32_e32 v14, vcc, v6, v0
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v6
+; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, v11, v1
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v2
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[2:3]
+; GCN-NEXT:    s_and_b64 s[2:3], s[4:5], vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[2:3]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GCN-NEXT:    v_xor_b32_e32 v1, v1, v9
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_v2i32:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s11, 0xf000
+; TONGA-NEXT:    s_mov_b32 s10, -1
+; TONGA-NEXT:    s_mov_b32 s4, 0x4f800000
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s0
+; TONGA-NEXT:    s_mov_b32 s9, s1
+; TONGA-NEXT:    s_mov_b32 s0, s2
+; TONGA-NEXT:    s_mov_b32 s1, s3
+; TONGA-NEXT:    s_mov_b32 s2, s10
+; TONGA-NEXT:    s_mov_b32 s3, s11
+; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v5, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v7, v3
+; TONGA-NEXT:    v_xor_b32_e32 v2, v2, v5
+; TONGA-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; TONGA-NEXT:    v_xor_b32_e32 v8, v4, v5
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v5, v2
+; TONGA-NEXT:    v_xor_b32_e32 v3, v3, v7
+; TONGA-NEXT:    v_xor_b32_e32 v9, v6, v7
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v4
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; TONGA-NEXT:    v_mul_f32_e32 v4, s4, v5
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v6, v1
+; TONGA-NEXT:    v_mul_f32_e32 v5, s4, v7
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v6
+; TONGA-NEXT:    v_mul_hi_u32 v6, v4, v2
+; TONGA-NEXT:    v_mul_lo_u32 v7, v4, v2
+; TONGA-NEXT:    v_mul_hi_u32 v10, v5, v3
+; TONGA-NEXT:    v_mul_lo_u32 v11, v5, v3
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
+; TONGA-NEXT:    v_sub_u32_e32 v12, vcc, 0, v7
+; TONGA-NEXT:    v_cndmask_b32_e64 v6, v7, v12, s[0:1]
+; TONGA-NEXT:    v_sub_u32_e32 v13, vcc, 0, v11
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0, v10
+; TONGA-NEXT:    v_cndmask_b32_e64 v7, v11, v13, s[2:3]
+; TONGA-NEXT:    v_mul_hi_u32 v6, v6, v4
+; TONGA-NEXT:    v_mul_hi_u32 v7, v7, v5
+; TONGA-NEXT:    v_add_u32_e32 v10, vcc, v6, v4
+; TONGA-NEXT:    v_subrev_u32_e32 v4, vcc, v6, v4
+; TONGA-NEXT:    v_cndmask_b32_e64 v4, v4, v10, s[0:1]
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, v7, v5
+; TONGA-NEXT:    v_subrev_u32_e32 v5, vcc, v7, v5
+; TONGA-NEXT:    v_mul_hi_u32 v4, v4, v0
+; TONGA-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[2:3]
+; TONGA-NEXT:    v_mul_hi_u32 v5, v5, v1
+; TONGA-NEXT:    v_mul_lo_u32 v6, v4, v2
+; TONGA-NEXT:    v_add_u32_e32 v7, vcc, 1, v4
+; TONGA-NEXT:    v_mul_lo_u32 v11, v5, v3
+; TONGA-NEXT:    v_add_u32_e32 v10, vcc, -1, v4
+; TONGA-NEXT:    v_subrev_u32_e32 v14, vcc, v6, v0
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v6
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v2
+; TONGA-NEXT:    v_subrev_u32_e32 v0, vcc, v11, v1
+; TONGA-NEXT:    v_add_u32_e32 v12, vcc, 1, v5
+; TONGA-NEXT:    v_add_u32_e32 v13, vcc, -1, v5
+; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; TONGA-NEXT:    s_and_b64 s[2:3], s[2:3], s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[2:3]
+; TONGA-NEXT:    s_and_b64 s[2:3], s[4:5], vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[2:3]
+; TONGA-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e32 v1, v13, v1, vcc
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v8
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v9
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v8
+; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v1, v9
+; TONGA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_v2i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s11, 0xf000
+; GFX9-NEXT:    s_mov_b32 s10, -1
+; GFX9-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s0
+; GFX9-NEXT:    s_mov_b32 s9, s1
+; GFX9-NEXT:    s_mov_b32 s0, s2
+; GFX9-NEXT:    s_mov_b32 s1, s3
+; GFX9-NEXT:    s_mov_b32 s2, s10
+; GFX9-NEXT:    s_mov_b32 s3, s11
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v5
+; GFX9-NEXT:    v_add_u32_e32 v3, v3, v6
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, v2
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v8, v3
+; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v4
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GFX9-NEXT:    v_xor_b32_e32 v5, v4, v5
+; GFX9-NEXT:    v_mul_f32_e32 v7, s4, v7
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GFX9-NEXT:    v_mul_f32_e32 v8, s4, v8
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX9-NEXT:    v_mul_lo_u32 v4, v7, v2
+; GFX9-NEXT:    v_mul_hi_u32 v11, v7, v2
+; GFX9-NEXT:    v_mul_lo_u32 v10, v8, v3
+; GFX9-NEXT:    v_mul_hi_u32 v12, v8, v3
+; GFX9-NEXT:    v_sub_u32_e32 v13, 0, v4
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v11
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v13, vcc
+; GFX9-NEXT:    v_sub_u32_e32 v14, 0, v10
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v12
+; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v14, s[0:1]
+; GFX9-NEXT:    v_mul_hi_u32 v4, v4, v7
+; GFX9-NEXT:    v_mul_hi_u32 v10, v10, v8
+; GFX9-NEXT:    v_ashrrev_i32_e32 v9, 31, v1
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v9
+; GFX9-NEXT:    v_xor_b32_e32 v6, v9, v6
+; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v9
+; GFX9-NEXT:    v_add_u32_e32 v9, v7, v4
+; GFX9-NEXT:    v_sub_u32_e32 v4, v7, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc
+; GFX9-NEXT:    v_add_u32_e32 v7, v8, v10
+; GFX9-NEXT:    v_sub_u32_e32 v8, v8, v10
+; GFX9-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v7, v8, v7, s[0:1]
+; GFX9-NEXT:    v_mul_hi_u32 v7, v7, v1
+; GFX9-NEXT:    v_mul_lo_u32 v8, v4, v2
+; GFX9-NEXT:    v_add_u32_e32 v9, 1, v4
+; GFX9-NEXT:    v_mul_lo_u32 v11, v7, v3
+; GFX9-NEXT:    v_add_u32_e32 v12, 1, v7
+; GFX9-NEXT:    v_sub_u32_e32 v14, v0, v8
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[2:3], v14, v2
+; GFX9-NEXT:    v_sub_u32_e32 v0, v1, v11
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], v1, v11
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
+; GFX9-NEXT:    s_and_b64 s[2:3], s[2:3], vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[2:3]
+; GFX9-NEXT:    s_and_b64 s[2:3], s[4:5], s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v10, -1, v4
+; GFX9-NEXT:    v_add_u32_e32 v13, -1, v7
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v7, v12, s[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[0:1]
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v5
+; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GFX9-NEXT:    v_sub_u32_e32 v0, v0, v5
+; GFX9-NEXT:    v_sub_u32_e32 v1, v1, v6
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_v2i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @6
+; EG-NEXT:    ALU 59, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
+; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     SETGT_INT * T0.W, 0.0, T1.Y,
+; EG-NEXT:     ADD_INT * T1.W, T1.Y, PV.W,
+; EG-NEXT:     XOR_INT T1.W, PV.W, T0.W,
+; EG-NEXT:     SETGT_INT * T2.W, 0.0, T1.X,
+; EG-NEXT:     ADD_INT T3.W, T1.X, PS,
+; EG-NEXT:     RECIP_UINT * T0.Z, PV.W,
+; EG-NEXT:     XOR_INT T3.W, PV.W, T2.W, BS:VEC_021/SCL_122
+; EG-NEXT:     MULLO_INT * T1.X, PS, T1.W,
+; EG-NEXT:     RECIP_UINT * T1.Y, PV.W,
+; EG-NEXT:     MULLO_INT * T1.Z, PS, T3.W,
+; EG-NEXT:     SUB_INT T4.W, 0.0, PS,
+; EG-NEXT:     MULHI * T2.X, T1.Y, T3.W,
+; EG-NEXT:     CNDE_INT T1.Z, PS, PV.W, T1.Z, BS:VEC_021/SCL_122
+; EG-NEXT:     SUB_INT T4.W, 0.0, T1.X,
+; EG-NEXT:     MULHI * T2.Y, T0.Z, T1.W,
+; EG-NEXT:     CNDE_INT T2.Z, PS, PV.W, T1.X,
+; EG-NEXT:     SETGT_INT T4.W, 0.0, T0.X,
+; EG-NEXT:     MULHI * T1.X, PV.Z, T1.Y,
+; EG-NEXT:     SETGT_INT T3.X, 0.0, T0.Y,
+; EG-NEXT:     ADD_INT T3.Y, T0.X, PV.W,
+; EG-NEXT:     ADD_INT T1.Z, T1.Y, PS,
+; EG-NEXT:     SUB_INT T5.W, T1.Y, PS,
+; EG-NEXT:     MULHI * T0.X, PV.Z, T0.Z,
+; EG-NEXT:     CNDE_INT T1.X, T2.X, PV.Z, PV.W,
+; EG-NEXT:     XOR_INT T1.Y, PV.Y, T4.W,
+; EG-NEXT:     ADD_INT T1.Z, T0.Y, PV.X,
+; EG-NEXT:     ADD_INT T5.W, T0.Z, PS,
+; EG-NEXT:     SUB_INT * T6.W, T0.Z, PS,
+; EG-NEXT:     CNDE_INT T0.Z, T2.Y, PV.W, PS,
+; EG-NEXT:     XOR_INT T5.W, PV.Z, T3.X,
+; EG-NEXT:     MULHI * T0.X, PV.X, PV.Y,
+; EG-NEXT:     MULHI * T0.Y, PV.Z, PV.W,
+; EG-NEXT:     MULLO_INT * T0.Z, PS, T1.W,
+; EG-NEXT:     SUB_INT T6.W, T5.W, PS,
+; EG-NEXT:     MULLO_INT * T1.X, T0.X, T3.W,
+; EG-NEXT:     SUB_INT T1.Z, T1.Y, PS,
+; EG-NEXT:     SETGE_UINT T1.W, PV.W, T1.W,
+; EG-NEXT:     SETGE_UINT * T5.W, T5.W, T0.Z,
+; EG-NEXT:     AND_INT T2.Y, PV.W, PS,
+; EG-NEXT:     ADD_INT T0.Z, T0.Y, 1,
+; EG-NEXT:     SETGE_UINT T1.W, PV.Z, T3.W,
+; EG-NEXT:     SETGE_UINT * T3.W, T1.Y, T1.X,
+; EG-NEXT:     AND_INT T1.Y, PV.W, PS,
+; EG-NEXT:     ADD_INT T1.Z, T0.X, 1,
+; EG-NEXT:     CNDE_INT T1.W, PV.Y, T0.Y, PV.Z,
+; EG-NEXT:     ADD_INT * T6.W, T0.Y, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.Y, T5.W, PS, PV.W,
+; EG-NEXT:     XOR_INT T0.Z, T3.X, T0.W,
+; EG-NEXT:     CNDE_INT T0.W, PV.Y, T0.X, PV.Z,
+; EG-NEXT:     ADD_INT * T1.W, T0.X, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T1.Z, T3.W, PS, PV.W,
+; EG-NEXT:     XOR_INT T0.W, T4.W, T2.W, BS:VEC_120/SCL_212
+; EG-NEXT:     XOR_INT * T1.W, PV.Y, PV.Z,
+; EG-NEXT:     SUB_INT T0.Y, PS, T0.Z,
+; EG-NEXT:     XOR_INT * T1.W, PV.Z, PV.W,
+; EG-NEXT:     SUB_INT T0.X, PV.W, T0.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
   %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
@@ -61,6 +735,108 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 }
 
 define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; GCN-LABEL: sdiv_v2i32_4:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GCN-NEXT:    v_lshrrev_b32_e32 v2, 30, v2
+; GCN-NEXT:    v_lshrrev_b32_e32 v3, 30, v3
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_v2i32_4:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; TONGA-NEXT:    v_lshrrev_b32_e32 v2, 30, v2
+; TONGA-NEXT:    v_lshrrev_b32_e32 v3, 30, v3
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v3, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; TONGA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_v2i32_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 30, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 30, v3
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_v2i32_4:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 13, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     ASHR * T0.W, T0.Y, literal.x,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T0.W, PV.W, literal.x,
+; EG-NEXT:     ASHR * T1.W, T0.X, literal.y,
+; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
+; EG-NEXT:     LSHR T1.W, PS, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, T0.Y, PV.W,
+; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.Y, PS, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, T0.X, PV.W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %result = sdiv <2 x i32> %num, <i32 4, i32 4>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -68,6 +844,586 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32>
 }
 
 define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; GCN-LABEL: sdiv_v4i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s19, 0xf000
+; GCN-NEXT:    s_mov_b32 s18, -1
+; GCN-NEXT:    s_mov_b32 s2, s18
+; GCN-NEXT:    s_mov_b32 s3, s19
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s0, s10
+; GCN-NEXT:    s_mov_b32 s1, s11
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GCN-NEXT:    s_mov_b32 s6, 0x4f800000
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_ashrrev_i32_e32 v8, 31, v0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
+; GCN-NEXT:    v_ashrrev_i32_e32 v10, 31, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v11, 31, v5
+; GCN-NEXT:    v_ashrrev_i32_e32 v12, 31, v2
+; GCN-NEXT:    v_ashrrev_i32_e32 v13, 31, v6
+; GCN-NEXT:    v_ashrrev_i32_e32 v14, 31, v3
+; GCN-NEXT:    v_ashrrev_i32_e32 v15, 31, v7
+; GCN-NEXT:    v_xor_b32_e32 v16, v8, v9
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v14, v3
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, v9, v4
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, v11, v5
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, v13, v6
+; GCN-NEXT:    v_add_i32_e32 v7, vcc, v15, v7
+; GCN-NEXT:    v_xor_b32_e32 v17, v10, v11
+; GCN-NEXT:    v_xor_b32_e32 v18, v12, v13
+; GCN-NEXT:    v_xor_b32_e32 v19, v14, v15
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GCN-NEXT:    v_xor_b32_e32 v4, v4, v9
+; GCN-NEXT:    v_xor_b32_e32 v1, v1, v10
+; GCN-NEXT:    v_xor_b32_e32 v5, v5, v11
+; GCN-NEXT:    v_xor_b32_e32 v2, v2, v12
+; GCN-NEXT:    v_xor_b32_e32 v6, v6, v13
+; GCN-NEXT:    v_xor_b32_e32 v3, v3, v14
+; GCN-NEXT:    v_xor_b32_e32 v7, v7, v15
+; GCN-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GCN-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GCN-NEXT:    v_cvt_f32_u32_e32 v10, v6
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v9, v9
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v10, v10
+; GCN-NEXT:    v_mul_f32_e32 v8, s6, v8
+; GCN-NEXT:    v_mul_f32_e32 v9, s6, v9
+; GCN-NEXT:    v_mul_f32_e32 v10, s6, v10
+; GCN-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GCN-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GCN-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; GCN-NEXT:    v_mul_hi_u32 v11, v8, v4
+; GCN-NEXT:    v_mul_lo_u32 v12, v8, v4
+; GCN-NEXT:    v_mul_hi_u32 v13, v9, v5
+; GCN-NEXT:    v_mul_lo_u32 v14, v9, v5
+; GCN-NEXT:    v_sub_i32_e32 v15, vcc, 0, v12
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
+; GCN-NEXT:    v_mul_hi_u32 v11, v10, v6
+; GCN-NEXT:    v_cndmask_b32_e64 v12, v12, v15, s[0:1]
+; GCN-NEXT:    v_sub_i32_e32 v15, vcc, 0, v14
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0, v13
+; GCN-NEXT:    v_mul_lo_u32 v13, v10, v6
+; GCN-NEXT:    v_cndmask_b32_e64 v14, v14, v15, s[2:3]
+; GCN-NEXT:    v_sub_i32_e32 v15, vcc, 0, v13
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
+; GCN-NEXT:    v_cvt_f32_u32_e32 v11, v7
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v11, v11
+; GCN-NEXT:    v_mul_f32_e32 v11, s6, v11
+; GCN-NEXT:    v_cvt_u32_f32_e32 v11, v11
+; GCN-NEXT:    v_cndmask_b32_e64 v13, v13, v15, s[4:5]
+; GCN-NEXT:    v_mul_hi_u32 v15, v11, v7
+; GCN-NEXT:    v_mul_lo_u32 v20, v11, v7
+; GCN-NEXT:    v_sub_i32_e32 v21, vcc, 0, v20
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[6:7], 0, v15
+; GCN-NEXT:    v_cndmask_b32_e64 v15, v20, v21, s[6:7]
+; GCN-NEXT:    v_mul_hi_u32 v12, v12, v8
+; GCN-NEXT:    v_add_i32_e32 v20, vcc, v12, v8
+; GCN-NEXT:    v_subrev_i32_e32 v8, vcc, v12, v8
+; GCN-NEXT:    v_mul_hi_u32 v12, v14, v9
+; GCN-NEXT:    v_add_i32_e32 v14, vcc, v12, v9
+; GCN-NEXT:    v_subrev_i32_e32 v9, vcc, v12, v9
+; GCN-NEXT:    v_mul_hi_u32 v12, v13, v10
+; GCN-NEXT:    v_add_i32_e32 v13, vcc, v12, v10
+; GCN-NEXT:    v_subrev_i32_e32 v10, vcc, v12, v10
+; GCN-NEXT:    v_mul_hi_u32 v12, v15, v11
+; GCN-NEXT:    v_add_i32_e32 v15, vcc, v12, v11
+; GCN-NEXT:    v_subrev_i32_e32 v11, vcc, v12, v11
+; GCN-NEXT:    s_mov_b32 s16, s8
+; GCN-NEXT:    s_mov_b32 s17, s9
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, v20, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v14, s[2:3]
+; GCN-NEXT:    v_cndmask_b32_e64 v10, v10, v13, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v11, v11, v15, s[6:7]
+; GCN-NEXT:    v_mul_hi_u32 v8, v8, v0
+; GCN-NEXT:    v_mul_hi_u32 v9, v9, v1
+; GCN-NEXT:    v_mul_hi_u32 v10, v10, v2
+; GCN-NEXT:    v_mul_hi_u32 v11, v11, v3
+; GCN-NEXT:    v_mul_lo_u32 v12, v8, v4
+; GCN-NEXT:    v_add_i32_e32 v13, vcc, 1, v8
+; GCN-NEXT:    v_add_i32_e32 v14, vcc, -1, v8
+; GCN-NEXT:    v_mul_lo_u32 v15, v9, v5
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v12
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v12
+; GCN-NEXT:    v_add_i32_e32 v12, vcc, 1, v9
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v0, v4
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, -1, v9
+; GCN-NEXT:    v_mul_lo_u32 v4, v10, v6
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v15
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, v1, v15
+; GCN-NEXT:    v_add_i32_e32 v15, vcc, 1, v10
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[6:7], v1, v5
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, -1, v10
+; GCN-NEXT:    v_mul_lo_u32 v5, v11, v7
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v4
+; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v2, v4
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, -1, v11
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[10:11], v3, v5
+; GCN-NEXT:    v_sub_i32_e32 v3, vcc, v3, v5
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, 1, v11
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v6
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[12:13], v3, v7
+; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v8, v13, s[2:3]
+; GCN-NEXT:    s_and_b64 s[2:3], s[6:7], s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v9, v12, s[2:3]
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[8:9]
+; GCN-NEXT:    v_cndmask_b32_e32 v6, v10, v15, vcc
+; GCN-NEXT:    s_and_b64 vcc, s[12:13], s[10:11]
+; GCN-NEXT:    v_cndmask_b32_e32 v5, v11, v5, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v14, v2, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[8:9]
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v5, s[10:11]
+; GCN-NEXT:    v_xor_b32_e32 v2, v2, v16
+; GCN-NEXT:    v_xor_b32_e32 v4, v0, v17
+; GCN-NEXT:    v_xor_b32_e32 v5, v1, v18
+; GCN-NEXT:    v_xor_b32_e32 v3, v3, v19
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v2, v16
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, v4, v17
+; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v5, v18
+; GCN-NEXT:    v_sub_i32_e32 v3, vcc, v3, v19
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_v4i32:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s11, 0xf000
+; TONGA-NEXT:    s_mov_b32 s10, -1
+; TONGA-NEXT:    s_mov_b32 s2, s10
+; TONGA-NEXT:    s_mov_b32 s3, s11
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s14
+; TONGA-NEXT:    s_mov_b32 s1, s15
+; TONGA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; TONGA-NEXT:    s_mov_b32 s14, 0x4f800000
+; TONGA-NEXT:    s_mov_b32 s8, s12
+; TONGA-NEXT:    s_mov_b32 s9, s13
+; TONGA-NEXT:    s_waitcnt vmcnt(1)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
+; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v9, v4
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v8, 31, v0
+; TONGA-NEXT:    v_xor_b32_e32 v4, v4, v9
+; TONGA-NEXT:    v_xor_b32_e32 v15, v8, v9
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v9, v4
+; TONGA-NEXT:    v_ashrrev_i32_e32 v11, 31, v5
+; TONGA-NEXT:    v_add_u32_e32 v5, vcc, v11, v5
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v8, v0
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v9, v9
+; TONGA-NEXT:    v_xor_b32_e32 v5, v5, v11
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v8
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v8, v5
+; TONGA-NEXT:    v_mul_f32_e32 v9, s14, v9
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; TONGA-NEXT:    v_ashrrev_i32_e32 v10, 31, v1
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v10, v1
+; TONGA-NEXT:    v_xor_b32_e32 v16, v10, v11
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v10
+; TONGA-NEXT:    v_mul_f32_e32 v8, s14, v8
+; TONGA-NEXT:    v_mul_hi_u32 v11, v9, v4
+; TONGA-NEXT:    v_mul_lo_u32 v10, v9, v4
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; TONGA-NEXT:    v_ashrrev_i32_e32 v12, 31, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v13, 31, v6
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v12, v2
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v11
+; TONGA-NEXT:    v_xor_b32_e32 v17, v12, v13
+; TONGA-NEXT:    v_xor_b32_e32 v2, v2, v12
+; TONGA-NEXT:    v_sub_u32_e32 v12, vcc, 0, v10
+; TONGA-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v12, v8, v5
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, v13, v6
+; TONGA-NEXT:    v_xor_b32_e32 v6, v6, v13
+; TONGA-NEXT:    v_mul_lo_u32 v11, v8, v5
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0, v12
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v12, v6
+; TONGA-NEXT:    v_mul_hi_u32 v10, v10, v9
+; TONGA-NEXT:    v_sub_u32_e32 v13, vcc, 0, v11
+; TONGA-NEXT:    v_cndmask_b32_e64 v11, v11, v13, s[2:3]
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v12, v12
+; TONGA-NEXT:    v_ashrrev_i32_e32 v14, 31, v7
+; TONGA-NEXT:    v_add_u32_e32 v7, vcc, v14, v7
+; TONGA-NEXT:    v_xor_b32_e32 v7, v7, v14
+; TONGA-NEXT:    v_mul_f32_e32 v12, s14, v12
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v12, v12
+; TONGA-NEXT:    v_mul_hi_u32 v18, v12, v6
+; TONGA-NEXT:    v_mul_lo_u32 v13, v12, v6
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v18
+; TONGA-NEXT:    v_add_u32_e32 v18, vcc, v10, v9
+; TONGA-NEXT:    v_subrev_u32_e32 v9, vcc, v10, v9
+; TONGA-NEXT:    v_mul_hi_u32 v10, v11, v8
+; TONGA-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v9, v9, v0
+; TONGA-NEXT:    v_sub_u32_e32 v19, vcc, 0, v13
+; TONGA-NEXT:    v_add_u32_e32 v11, vcc, v10, v8
+; TONGA-NEXT:    v_subrev_u32_e32 v8, vcc, v10, v8
+; TONGA-NEXT:    v_cndmask_b32_e64 v13, v13, v19, s[4:5]
+; TONGA-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s[2:3]
+; TONGA-NEXT:    v_mul_hi_u32 v10, v13, v12
+; TONGA-NEXT:    v_mul_lo_u32 v11, v9, v4
+; TONGA-NEXT:    v_mul_hi_u32 v8, v8, v1
+; TONGA-NEXT:    v_add_u32_e32 v13, vcc, v10, v12
+; TONGA-NEXT:    v_subrev_u32_e32 v10, vcc, v10, v12
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v11
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v11
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v0, v4
+; TONGA-NEXT:    v_cndmask_b32_e64 v10, v10, v13, s[4:5]
+; TONGA-NEXT:    v_mul_lo_u32 v0, v8, v5
+; TONGA-NEXT:    v_mul_hi_u32 v4, v10, v2
+; TONGA-NEXT:    v_add_u32_e32 v12, vcc, -1, v9
+; TONGA-NEXT:    v_add_u32_e32 v10, vcc, -1, v8
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v0
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v1, v0
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v5
+; TONGA-NEXT:    v_mul_lo_u32 v5, v4, v6
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, 1, v9
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, 1, v8
+; TONGA-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
+; TONGA-NEXT:    v_sub_u32_e32 v9, vcc, v2, v5
+; TONGA-NEXT:    s_and_b64 vcc, s[6:7], s[4:5]
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v11, v7
+; TONGA-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v15
+; TONGA-NEXT:    v_xor_b32_e32 v8, v0, v16
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v1, v15
+; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v8, v16
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v8, v11
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v9, v6
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v2, v5
+; TONGA-NEXT:    v_ashrrev_i32_e32 v10, 31, v3
+; TONGA-NEXT:    v_mul_f32_e32 v8, s14, v8
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v10, v3
+; TONGA-NEXT:    v_xor_b32_e32 v3, v3, v10
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, -1, v4
+; TONGA-NEXT:    v_mul_lo_u32 v5, v8, v7
+; TONGA-NEXT:    v_mul_hi_u32 v9, v8, v7
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, 1, v4
+; TONGA-NEXT:    v_sub_u32_e32 v11, vcc, 0, v5
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; TONGA-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[4:5]
+; TONGA-NEXT:    v_mul_hi_u32 v5, v5, v8
+; TONGA-NEXT:    v_add_u32_e32 v9, vcc, v5, v8
+; TONGA-NEXT:    v_subrev_u32_e32 v5, vcc, v5, v8
+; TONGA-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[4:5]
+; TONGA-NEXT:    v_mul_hi_u32 v5, v5, v3
+; TONGA-NEXT:    s_and_b64 vcc, s[0:1], s[2:3]
+; TONGA-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v2, v6, v2, s[2:3]
+; TONGA-NEXT:    v_mul_lo_u32 v4, v5, v7
+; TONGA-NEXT:    v_xor_b32_e32 v2, v2, v17
+; TONGA-NEXT:    v_sub_u32_e32 v2, vcc, v2, v17
+; TONGA-NEXT:    v_xor_b32_e32 v6, v10, v14
+; TONGA-NEXT:    v_sub_u32_e32 v8, vcc, v3, v4
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v8, v7
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v3, v4
+; TONGA-NEXT:    v_add_u32_e32 v7, vcc, -1, v5
+; TONGA-NEXT:    v_add_u32_e32 v3, vcc, 1, v5
+; TONGA-NEXT:    s_and_b64 vcc, s[0:1], s[2:3]
+; TONGA-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s[2:3]
+; TONGA-NEXT:    v_xor_b32_e32 v3, v3, v6
+; TONGA-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
+; TONGA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_v4i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s15, 0xf000
+; GFX9-NEXT:    s_mov_b32 s14, -1
+; GFX9-NEXT:    s_mov_b32 s2, s14
+; GFX9-NEXT:    s_mov_b32 s3, s15
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s10
+; GFX9-NEXT:    s_mov_b32 s1, s11
+; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_mov_b32 s4, 0x4f800000
+; GFX9-NEXT:    s_mov_b32 s12, s8
+; GFX9-NEXT:    s_mov_b32 s13, s9
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v9, 31, v4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v0
+; GFX9-NEXT:    v_add_u32_e32 v4, v4, v9
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v8
+; GFX9-NEXT:    v_xor_b32_e32 v4, v4, v9
+; GFX9-NEXT:    v_xor_b32_e32 v16, v8, v9
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v8, v4
+; GFX9-NEXT:    v_ashrrev_i32_e32 v11, 31, v5
+; GFX9-NEXT:    v_add_u32_e32 v5, v5, v11
+; GFX9-NEXT:    v_xor_b32_e32 v5, v5, v11
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v9, v5
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GFX9-NEXT:    v_ashrrev_i32_e32 v13, 31, v6
+; GFX9-NEXT:    v_ashrrev_i32_e32 v10, 31, v1
+; GFX9-NEXT:    v_add_u32_e32 v6, v6, v13
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v10
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v9, v9
+; GFX9-NEXT:    v_mul_f32_e32 v8, s4, v8
+; GFX9-NEXT:    v_xor_b32_e32 v6, v6, v13
+; GFX9-NEXT:    v_xor_b32_e32 v17, v10, v11
+; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v10
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v10, v6
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v8
+; GFX9-NEXT:    v_ashrrev_i32_e32 v12, 31, v2
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v12
+; GFX9-NEXT:    v_mul_f32_e32 v9, s4, v9
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v10, v10
+; GFX9-NEXT:    v_xor_b32_e32 v18, v12, v13
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v12
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v9
+; GFX9-NEXT:    v_mul_hi_u32 v12, v8, v4
+; GFX9-NEXT:    v_mul_lo_u32 v11, v8, v4
+; GFX9-NEXT:    v_mul_f32_e32 v10, s4, v10
+; GFX9-NEXT:    v_mul_lo_u32 v13, v9, v5
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v12
+; GFX9-NEXT:    v_mul_hi_u32 v12, v9, v5
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v10, v10
+; GFX9-NEXT:    v_sub_u32_e32 v19, 0, v11
+; GFX9-NEXT:    v_cndmask_b32_e32 v11, v11, v19, vcc
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v12
+; GFX9-NEXT:    v_sub_u32_e32 v19, 0, v13
+; GFX9-NEXT:    v_cndmask_b32_e64 v13, v13, v19, s[0:1]
+; GFX9-NEXT:    v_mul_hi_u32 v19, v10, v6
+; GFX9-NEXT:    v_ashrrev_i32_e32 v15, 31, v7
+; GFX9-NEXT:    v_add_u32_e32 v7, v7, v15
+; GFX9-NEXT:    v_xor_b32_e32 v7, v7, v15
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[2:3], 0, v19
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v19, v7
+; GFX9-NEXT:    v_mul_hi_u32 v11, v11, v8
+; GFX9-NEXT:    v_mul_lo_u32 v12, v10, v6
+; GFX9-NEXT:    v_ashrrev_i32_e32 v14, 31, v3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v19, v19
+; GFX9-NEXT:    v_add_u32_e32 v3, v3, v14
+; GFX9-NEXT:    v_sub_u32_e32 v20, 0, v12
+; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, v20, s[2:3]
+; GFX9-NEXT:    v_mul_f32_e32 v19, s4, v19
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v19, v19
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v14
+; GFX9-NEXT:    v_mul_hi_u32 v21, v19, v7
+; GFX9-NEXT:    v_mul_lo_u32 v20, v19, v7
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v21
+; GFX9-NEXT:    v_add_u32_e32 v21, v8, v11
+; GFX9-NEXT:    v_sub_u32_e32 v8, v8, v11
+; GFX9-NEXT:    v_mul_hi_u32 v11, v13, v9
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v21, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v8, v8, v0
+; GFX9-NEXT:    v_sub_u32_e32 v22, 0, v20
+; GFX9-NEXT:    v_add_u32_e32 v13, v9, v11
+; GFX9-NEXT:    v_sub_u32_e32 v9, v9, v11
+; GFX9-NEXT:    v_mul_hi_u32 v11, v12, v10
+; GFX9-NEXT:    v_cndmask_b32_e64 v9, v9, v13, s[0:1]
+; GFX9-NEXT:    v_mul_hi_u32 v9, v9, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v20, v20, v22, s[4:5]
+; GFX9-NEXT:    v_add_u32_e32 v12, v10, v11
+; GFX9-NEXT:    v_sub_u32_e32 v10, v10, v11
+; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v12, s[2:3]
+; GFX9-NEXT:    v_mul_lo_u32 v12, v8, v4
+; GFX9-NEXT:    v_mul_hi_u32 v11, v20, v19
+; GFX9-NEXT:    v_mul_hi_u32 v10, v10, v2
+; GFX9-NEXT:    v_add_u32_e32 v13, 1, v8
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v12
+; GFX9-NEXT:    v_sub_u32_e32 v0, v0, v12
+; GFX9-NEXT:    v_mul_lo_u32 v12, v9, v5
+; GFX9-NEXT:    v_add_u32_e32 v20, v19, v11
+; GFX9-NEXT:    v_sub_u32_e32 v11, v19, v11
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, v20, s[4:5]
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[2:3], v1, v12
+; GFX9-NEXT:    v_sub_u32_e32 v1, v1, v12
+; GFX9-NEXT:    v_mul_lo_u32 v12, v10, v6
+; GFX9-NEXT:    v_mul_hi_u32 v11, v11, v3
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v4
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v5
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v2, v12
+; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v12
+; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[8:9], v2, v6
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v8, v13, s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v0, 1, v9
+; GFX9-NEXT:    s_and_b64 s[0:1], s[4:5], s[2:3]
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v9, v0, s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v1, 1, v10
+; GFX9-NEXT:    s_and_b64 s[0:1], s[8:9], s[6:7]
+; GFX9-NEXT:    v_mul_lo_u32 v12, v11, v7
+; GFX9-NEXT:    v_add_u32_e32 v19, -1, v8
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v10, v1, s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v5, -1, v10
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v19, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[6:7]
+; GFX9-NEXT:    v_add_u32_e32 v4, -1, v9
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, v0, s[2:3]
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v16
+; GFX9-NEXT:    v_xor_b32_e32 v5, v1, v18
+; GFX9-NEXT:    v_xor_b32_e32 v4, v0, v17
+; GFX9-NEXT:    v_sub_u32_e32 v0, v2, v16
+; GFX9-NEXT:    v_sub_u32_e32 v2, v5, v18
+; GFX9-NEXT:    v_sub_u32_e32 v5, v3, v12
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v7
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], v3, v12
+; GFX9-NEXT:    v_add_u32_e32 v3, 1, v11
+; GFX9-NEXT:    s_and_b64 vcc, vcc, s[0:1]
+; GFX9-NEXT:    v_add_u32_e32 v5, -1, v11
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v11, v3, vcc
+; GFX9-NEXT:    v_sub_u32_e32 v1, v4, v17
+; GFX9-NEXT:    v_xor_b32_e32 v4, v14, v15
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[0:1]
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GFX9-NEXT:    v_sub_u32_e32 v3, v3, v4
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_v4i32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 2, @13, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @10
+; EG-NEXT:    ALU 114, @16, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T3.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 13:
+; EG-NEXT:     SETGT_INT * T0.W, 0.0, T1.Z,
+; EG-NEXT:     ADD_INT * T2.W, T1.Z, PV.W,
+; EG-NEXT:     XOR_INT * T2.W, PV.W, T0.W,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     RECIP_UINT * T0.X, T2.W,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T2.W,
+; EG-NEXT:     SUB_INT T4.W, 0.0, PS,
+; EG-NEXT:     MULHI * T0.Z, T0.X, T2.W,
+; EG-NEXT:     CNDE_INT T4.W, PS, PV.W, T0.Y,
+; EG-NEXT:     SETGT_INT * T5.W, 0.0, T3.Z,
+; EG-NEXT:     MULHI * T0.Y, PV.W, T0.X,
+; EG-NEXT:     SETGT_INT T2.Y, 0.0, T1.W,
+; EG-NEXT:     ADD_INT T1.Z, T3.Z, T5.W, BS:VEC_021/SCL_122
+; EG-NEXT:     ADD_INT T4.W, T0.X, PS,
+; EG-NEXT:     SUB_INT * T6.W, T0.X, PS,
+; EG-NEXT:     CNDE_INT T0.Z, T0.Z, PV.W, PS,
+; EG-NEXT:     XOR_INT T4.W, PV.Z, T5.W,
+; EG-NEXT:     ADD_INT * T1.W, T1.W, PV.Y,
+; EG-NEXT:     XOR_INT T1.W, PS, T2.Y,
+; EG-NEXT:     MULHI * T0.X, PV.Z, PV.W,
+; EG-NEXT:     SETGT_INT T6.W, 0.0, T1.Y,
+; EG-NEXT:     RECIP_UINT * T0.Y, PV.W,
+; EG-NEXT:     ADD_INT T7.W, T1.Y, PV.W,
+; EG-NEXT:     MULLO_INT * T0.Z, PS, T1.W,
+; EG-NEXT:     XOR_INT T1.Z, PV.W, T6.W, BS:VEC_021/SCL_122
+; EG-NEXT:     SUB_INT T7.W, 0.0, PS,
+; EG-NEXT:     MULHI * T1.Y, T0.Y, T1.W,
+; EG-NEXT:     CNDE_INT T7.W, PS, PV.W, T0.Z,
+; EG-NEXT:     RECIP_UINT * T0.Z, PV.Z,
+; EG-NEXT:     SETGT_INT T8.W, 0.0, T3.W,
+; EG-NEXT:     MULHI * T2.X, PV.W, T0.Y,
+; EG-NEXT:     ADD_INT T4.Y, T3.W, PV.W,
+; EG-NEXT:     ADD_INT T2.Z, T0.Y, PS,
+; EG-NEXT:     SUB_INT T3.W, T0.Y, PS,
+; EG-NEXT:     MULLO_INT * T0.Y, T0.Z, T1.Z,
+; EG-NEXT:     CNDE_INT T2.X, T1.Y, PV.Z, PV.W,
+; EG-NEXT:     XOR_INT T1.Y, PV.Y, T8.W,
+; EG-NEXT:     SETGT_INT T2.Z, 0.0, T1.X,
+; EG-NEXT:     SUB_INT T3.W, 0.0, PS,
+; EG-NEXT:     MULHI * T3.Z, T0.Z, T1.Z,
+; EG-NEXT:     CNDE_INT T4.Z, PS, PV.W, T0.Y,
+; EG-NEXT:     ADD_INT T3.W, T1.X, PV.Z,
+; EG-NEXT:     MULHI * T0.Y, PV.X, PV.Y,
+; EG-NEXT:     XOR_INT T3.W, PV.W, T2.Z, BS:VEC_021/SCL_122
+; EG-NEXT:     MULHI * T1.X, PV.Z, T0.Z,
+; EG-NEXT:     RECIP_UINT * T2.X, PV.W,
+; EG-NEXT:     MULLO_INT * T4.X, PS, T3.W,
+; EG-NEXT:     SETGT_INT T4.Z, 0.0, T3.Y,
+; EG-NEXT:     SUB_INT T7.W, 0.0, PS,
+; EG-NEXT:     MULHI * T4.Y, T2.X, T3.W,
+; EG-NEXT:     CNDE_INT T4.X, PS, PV.W, T4.X,
+; EG-NEXT:     ADD_INT T3.Y, T3.Y, PV.Z,
+; EG-NEXT:     ADD_INT T5.Z, T0.Z, T1.X,
+; EG-NEXT:     SUB_INT T7.W, T0.Z, T1.X,
+; EG-NEXT:     MULLO_INT * T0.Z, T0.Y, T1.W,
+; EG-NEXT:     CNDE_INT T5.Y, T3.Z, PV.Z, PV.W,
+; EG-NEXT:     XOR_INT T3.Z, PV.Y, T4.Z,
+; EG-NEXT:     SUB_INT T7.W, T1.Y, PS,
+; EG-NEXT:     MULHI * T1.X, PV.X, T2.X,
+; EG-NEXT:     SETGE_UINT T5.Z, PV.W, T1.W,
+; EG-NEXT:     SETGE_UINT T1.W, T1.Y, T0.Z,
+; EG-NEXT:     MULHI * T0.Z, PV.Y, PV.Z,
+; EG-NEXT:     AND_INT T1.Y, PV.Z, PV.W,
+; EG-NEXT:     ADD_INT T5.Z, T0.Y, 1,
+; EG-NEXT:     SETGT_INT T7.W, 0.0, T3.X,
+; EG-NEXT:     MULLO_INT * T3.Y, PS, T1.Z,
+; EG-NEXT:     SUB_INT T4.X, T3.Z, PS,
+; EG-NEXT:     ADD_INT T5.Y, T3.X, PV.W,
+; EG-NEXT:     ADD_INT T6.Z, T2.X, T1.X, BS:VEC_120/SCL_212
+; EG-NEXT:     SUB_INT * T9.W, T2.X, T1.X, BS:VEC_120/SCL_212
+; EG-NEXT:     MULLO_INT * T1.X, T0.X, T2.W,
+; EG-NEXT:     CNDE_INT T2.X, T4.Y, T6.Z, T9.W,
+; EG-NEXT:     XOR_INT T4.Y, T5.Y, T7.W, BS:VEC_201
+; EG-NEXT:     SUB_INT T6.Z, T4.W, PS, BS:VEC_120/SCL_212
+; EG-NEXT:     SETGE_UINT T9.W, T4.X, T1.Z, BS:VEC_102/SCL_221
+; EG-NEXT:     SETGE_UINT * T10.W, T3.Z, T3.Y,
+; EG-NEXT:     AND_INT T3.X, PV.W, PS,
+; EG-NEXT:     ADD_INT T3.Y, T0.Z, 1,
+; EG-NEXT:     SETGE_UINT T1.Z, PV.Z, T2.W,
+; EG-NEXT:     SETGE_UINT T2.W, T4.W, T1.X,
+; EG-NEXT:     MULHI * T1.X, PV.X, PV.Y,
+; EG-NEXT:     AND_INT T2.X, PV.Z, PV.W,
+; EG-NEXT:     ADD_INT T5.Y, T0.X, 1,
+; EG-NEXT:     CNDE_INT T1.Z, PV.X, T0.Z, PV.Y,
+; EG-NEXT:     ADD_INT T4.W, T0.Z, literal.x,
+; EG-NEXT:     MULLO_INT * T0.Z, PS, T3.W,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T3.X, T10.W, PV.W, PV.Z,
+; EG-NEXT:     CNDE_INT T3.Y, PV.X, T0.X, PV.Y,
+; EG-NEXT:     CNDE_INT T1.Z, T1.Y, T0.Y, T5.Z,
+; EG-NEXT:     ADD_INT T4.W, T0.Y, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     SUB_INT * T9.W, T4.Y, PS,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.X, T0.X, literal.x,
+; EG-NEXT:     SETGE_UINT T0.Y, PS, T3.W,
+; EG-NEXT:     SETGE_UINT T0.Z, T4.Y, T0.Z,
+; EG-NEXT:     CNDE_INT T1.W, T1.W, PV.W, PV.Z,
+; EG-NEXT:     XOR_INT * T3.W, T8.W, T2.Y,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     XOR_INT T2.X, PV.W, PS,
+; EG-NEXT:     AND_INT T0.Y, PV.Y, PV.Z,
+; EG-NEXT:     ADD_INT T1.Z, T1.X, 1,
+; EG-NEXT:     CNDE_INT T1.W, T2.W, PV.X, T3.Y,
+; EG-NEXT:     XOR_INT * T0.W, T5.W, T0.W,
+; EG-NEXT:     XOR_INT T0.X, T4.Z, T6.W, BS:VEC_021/SCL_122
+; EG-NEXT:     XOR_INT T1.Y, PV.W, PS,
+; EG-NEXT:     CNDE_INT T1.Z, PV.Y, T1.X, PV.Z,
+; EG-NEXT:     ADD_INT T1.W, T1.X, literal.x,
+; EG-NEXT:     SUB_INT * T3.W, PV.X, T3.W,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.Y, T0.Z, PV.W, PV.Z,
+; EG-NEXT:     SUB_INT T3.Z, PV.Y, T0.W,
+; EG-NEXT:     XOR_INT T0.W, T7.W, T2.Z,
+; EG-NEXT:     XOR_INT * T1.W, T3.X, PV.X,
+; EG-NEXT:     SUB_INT T3.Y, PS, T0.X,
+; EG-NEXT:     XOR_INT * T1.W, PV.Y, PV.W,
+; EG-NEXT:     SUB_INT T3.X, PV.W, T0.W,
+; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
   %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
@@ -77,17 +1433,284 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 }
 
 define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; GCN-LABEL: sdiv_v4i32_4:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; GCN-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GCN-NEXT:    v_lshrrev_b32_e32 v4, 30, v4
+; GCN-NEXT:    v_lshrrev_b32_e32 v5, 30, v5
+; GCN-NEXT:    v_lshrrev_b32_e32 v6, 30, v6
+; GCN-NEXT:    v_lshrrev_b32_e32 v7, 30, v7
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v7, v3
+; GCN-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 2, v2
+; GCN-NEXT:    v_ashrrev_i32_e32 v3, 2, v3
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: sdiv_v4i32_4:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; TONGA-NEXT:    v_lshrrev_b32_e32 v4, 30, v4
+; TONGA-NEXT:    v_lshrrev_b32_e32 v5, 30, v5
+; TONGA-NEXT:    v_lshrrev_b32_e32 v6, 30, v6
+; TONGA-NEXT:    v_lshrrev_b32_e32 v7, 30, v7
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v5, v1
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
+; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v7, v3
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v2, 2, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v3, 2, v3
+; TONGA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_v4i32_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 30, v4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 30, v5
+; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 30, v6
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 30, v7
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v4
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v5
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v6
+; GFX9-NEXT:    v_add_u32_e32 v3, v3, v7
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 2, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 2, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 2, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 2, v3
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: sdiv_v4i32_4:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 24, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     ASHR T1.W, T0.W, literal.x,
+; EG-NEXT:     ASHR * T2.W, T0.Z, literal.x,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR * T1.W, PV.W, literal.x,
+; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T1.Z, T0.W, PV.W,
+; EG-NEXT:     LSHR T0.W, T2.W, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:     ASHR * T1.W, T0.Y, literal.y,
+; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
+; EG-NEXT:     LSHR T1.Y, PS, literal.x,
+; EG-NEXT:     ASHR T2.Z, T0.X, literal.y,
+; EG-NEXT:     ADD_INT T0.W, T0.Z, PV.W,
+; EG-NEXT:     ASHR * T1.W, PV.Z, literal.z,
+; EG-NEXT:    30(4.203895e-44), 31(4.344025e-44)
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T1.Z, PV.W, literal.x,
+; EG-NEXT:     LSHR T0.W, PV.Z, literal.y,
+; EG-NEXT:     ADD_INT * T2.W, T0.Y, PV.Y,
+; EG-NEXT:    2(2.802597e-45), 30(4.203895e-44)
+; EG-NEXT:     ASHR T1.Y, PS, literal.x,
+; EG-NEXT:     ADD_INT * T0.W, T0.X, PV.W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T1.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_sdiv_i8:
-; SI: v_rcp_iflag_f32
-; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 8
-; SI: buffer_store_dword [[BFE]]
 define amdgpu_kernel void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+; GCN-LABEL: v_sdiv_i8:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_sbyte v0, off, s[8:11], 0
+; GCN-NEXT:    buffer_load_sbyte v1, off, s[8:11], 0 offset:1
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v2, v0, v1
+; GCN-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 30, v2
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v3, v1
+; GCN-NEXT:    v_or_b32_e32 v2, 1, v2
+; GCN-NEXT:    v_mul_f32_e32 v3, v0, v3
+; GCN-NEXT:    v_trunc_f32_e32 v3, v3
+; GCN-NEXT:    v_mad_f32 v0, -v3, v1, v0
+; GCN-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, |v1|
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: v_sdiv_i8:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0 offset:1
+; TONGA-NEXT:    buffer_load_sbyte v2, off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(1)
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v1, v0
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v3, v2
+; TONGA-NEXT:    v_xor_b32_e32 v0, v2, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v4, v1
+; TONGA-NEXT:    v_or_b32_e32 v0, 1, v0
+; TONGA-NEXT:    v_mul_f32_e32 v2, v3, v4
+; TONGA-NEXT:    v_trunc_f32_e32 v2, v2
+; TONGA-NEXT:    v_mad_f32 v3, -v2, v1, v3
+; TONGA-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; TONGA-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v1|
+; TONGA-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; TONGA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_sdiv_i8:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0 offset:1
+; GFX9-NEXT:    buffer_load_sbyte v2, off, s[4:7], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, v2
+; GFX9-NEXT:    v_xor_b32_e32 v0, v2, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v1
+; GFX9-NEXT:    v_or_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_mul_f32_e32 v2, v3, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v2
+; GFX9-NEXT:    v_mad_f32 v2, -v2, v1, v3
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v1|
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_add_u32_e32 v0, v4, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 8
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v_sdiv_i8:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @6
+; EG-NEXT:    ALU 21, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T1.X, T0.X, 1, #1
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     BFE_INT * T0.W, T1.X, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     INT_TO_FLT * T0.Y, PV.W,
+; EG-NEXT:     BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT:     RECIP_IEEE * T0.X, PS,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     INT_TO_FLT * T0.Z, PV.W,
+; EG-NEXT:     MUL_IEEE * T2.W, PS, T0.X,
+; EG-NEXT:     TRUNC T2.W, PV.W,
+; EG-NEXT:     XOR_INT * T0.W, T1.W, T0.W,
+; EG-NEXT:     ASHR T0.W, PS, literal.x,
+; EG-NEXT:     MULADD_IEEE * T1.W, -PV.W, T0.Y, T0.Z,
+; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
+; EG-NEXT:     TRUNC T0.Z, T2.W,
+; EG-NEXT:     SETGE T1.W, |PS|, |T0.Y|,
+; EG-NEXT:     OR_INT * T0.W, PV.W, 1,
+; EG-NEXT:     CNDE T0.W, PV.W, 0.0, PS,
+; EG-NEXT:     FLT_TO_INT * T1.W, PV.Z,
+; EG-NEXT:     ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
   %num = load i8, i8 addrspace(1) * %in
   %den = load i8, i8 addrspace(1) * %den_ptr
@@ -97,11 +1720,184 @@ define amdgpu_kernel void @v_sdiv_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %i
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_sdiv_i23:
-; SI: v_rcp_iflag_f32
-; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 23
-; SI: buffer_store_dword [[BFE]]
 define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)* %in) {
+; GCN-LABEL: v_sdiv_i23:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
+; GCN-NEXT:    buffer_load_ubyte v3, off, s[8:11], 0 offset:6
+; GCN-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; GCN-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(3)
+; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GCN-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_or_b32_e32 v1, v2, v3
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; GCN-NEXT:    v_bfe_i32 v1, v1, 0, 23
+; GCN-NEXT:    v_xor_b32_e32 v2, v0, v1
+; GCN-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 30, v2
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v3, v1
+; GCN-NEXT:    v_or_b32_e32 v2, 1, v2
+; GCN-NEXT:    v_mul_f32_e32 v3, v0, v3
+; GCN-NEXT:    v_trunc_f32_e32 v3, v3
+; GCN-NEXT:    v_mad_f32 v0, -v3, v1, v0
+; GCN-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, |v1|
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: v_sdiv_i23:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0 offset:2
+; TONGA-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:4
+; TONGA-NEXT:    buffer_load_ubyte v2, off, s[4:7], 0 offset:6
+; TONGA-NEXT:    buffer_load_ushort v3, off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(3)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; TONGA-NEXT:    s_waitcnt vmcnt(1)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; TONGA-NEXT:    v_or_b32_e32 v1, v1, v2
+; TONGA-NEXT:    v_bfe_i32 v1, v1, 0, 23
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v2, v1
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_or_b32_e32 v0, v3, v0
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v3, v0
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; TONGA-NEXT:    v_or_b32_e32 v0, 1, v0
+; TONGA-NEXT:    v_mul_f32_e32 v1, v3, v4
+; TONGA-NEXT:    v_trunc_f32_e32 v1, v1
+; TONGA-NEXT:    v_mad_f32 v3, -v1, v2, v3
+; TONGA-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; TONGA-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
+; TONGA-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; TONGA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_sdiv_i23:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0 offset:2
+; GFX9-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:4
+; GFX9-NEXT:    buffer_load_ubyte v2, off, s[4:7], 0 offset:6
+; GFX9-NEXT:    buffer_load_ushort v3, off, s[4:7], 0
+; GFX9-NEXT:    s_waitcnt vmcnt(3)
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
+; GFX9-NEXT:    v_bfe_i32 v1, v1, 0, 23
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_or_b32_e32 v0, v3, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, v0
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; GFX9-NEXT:    v_or_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_mul_f32_e32 v1, v3, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v1
+; GFX9-NEXT:    v_mad_f32 v1, -v1, v2, v3
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v2|
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_add_u32_e32 v0, v4, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 23
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v_sdiv_i23:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 3 @6
+; EG-NEXT:    ALU 33, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T1.X, T0.X, 6, #1
+; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_8 T3.X, T0.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 15:
+; EG-NEXT:     LSHL * T0.W, T1.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT T0.W, T0.X, PV.W,
+; EG-NEXT:     LSHL * T1.W, T3.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.W, PV.W, literal.x,
+; EG-NEXT:     OR_INT * T1.W, T2.X, T1.W,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL T1.W, PS, literal.x,
+; EG-NEXT:     INT_TO_FLT * T0.X, PV.W,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T1.W, PV.W, literal.x,
+; EG-NEXT:     RECIP_IEEE * T0.Y, PS,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     INT_TO_FLT * T0.Z, PV.W,
+; EG-NEXT:     MUL_IEEE * T2.W, PS, T0.Y,
+; EG-NEXT:     TRUNC T2.W, PV.W,
+; EG-NEXT:     XOR_INT * T0.W, T1.W, T0.W,
+; EG-NEXT:     ASHR T0.W, PS, literal.x,
+; EG-NEXT:     MULADD_IEEE * T1.W, -PV.W, T0.X, T0.Z,
+; EG-NEXT:    30(4.203895e-44), 0(0.000000e+00)
+; EG-NEXT:     TRUNC T0.Z, T2.W,
+; EG-NEXT:     SETGE T1.W, |PS|, |T0.X|,
+; EG-NEXT:     OR_INT * T0.W, PV.W, 1,
+; EG-NEXT:     CNDE T0.W, PV.W, 0.0, PS,
+; EG-NEXT:     FLT_TO_INT * T1.W, PV.Z,
+; EG-NEXT:     ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    9(1.261169e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    9(1.261169e-44), 2(2.802597e-45)
   %den_ptr = getelementptr i23, i23 addrspace(1)* %in, i23 1
   %num = load i23, i23 addrspace(1) * %in
   %den = load i23, i23 addrspace(1) * %den_ptr
@@ -111,11 +1907,187 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)*
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_sdiv_i24:
-; SI: v_rcp_iflag_f32
-; SI: v_bfe_i32 [[BFE:v[0-9]+]], v{{[0-9]+}}, 0, 24
-; SI: buffer_store_dword [[BFE]]
 define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)* %in) {
+; GCN-LABEL: v_sdiv_i24:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_sbyte v1, off, s[8:11], 0 offset:2
+; GCN-NEXT:    buffer_load_sbyte v3, off, s[8:11], 0 offset:6
+; GCN-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; GCN-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(3)
+; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GCN-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_or_b32_e32 v1, v2, v3
+; GCN-NEXT:    v_xor_b32_e32 v2, v0, v1
+; GCN-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 30, v2
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v3, v1
+; GCN-NEXT:    v_or_b32_e32 v2, 1, v2
+; GCN-NEXT:    v_mul_f32_e32 v3, v0, v3
+; GCN-NEXT:    v_trunc_f32_e32 v3, v3
+; GCN-NEXT:    v_mad_f32 v0, -v3, v1, v0
+; GCN-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, |v1|
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: v_sdiv_i24:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0 offset:2
+; TONGA-NEXT:    buffer_load_ushort v1, off, s[4:7], 0 offset:4
+; TONGA-NEXT:    buffer_load_sbyte v2, off, s[4:7], 0 offset:6
+; TONGA-NEXT:    buffer_load_ushort v3, off, s[4:7], 0
+; TONGA-NEXT:    s_waitcnt vmcnt(3)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; TONGA-NEXT:    s_waitcnt vmcnt(1)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; TONGA-NEXT:    v_or_b32_e32 v1, v1, v2
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v2, v1
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_or_b32_e32 v0, v3, v0
+; TONGA-NEXT:    v_cvt_f32_i32_e32 v3, v0
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v1
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; TONGA-NEXT:    v_or_b32_e32 v0, 1, v0
+; TONGA-NEXT:    v_mul_f32_e32 v1, v3, v4
+; TONGA-NEXT:    v_trunc_f32_e32 v1, v1
+; TONGA-NEXT:    v_mad_f32 v3, -v1, v2, v3
+; TONGA-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; TONGA-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
+; TONGA-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 0, 24
+; TONGA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_sdiv_i24:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; GFX9-NEXT:    buffer_load_sbyte v1, off, s[4:7], 0 offset:2
+; GFX9-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
+; GFX9-NEXT:    buffer_load_sbyte v3, off, s[4:7], 0 offset:6
+; GFX9-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX9-NEXT:    v_or_b32_e32 v2, v2, v3
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, v2
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 30, v0
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v3
+; GFX9-NEXT:    v_or_b32_e32 v0, 1, v0
+; GFX9-NEXT:    v_mul_f32_e32 v2, v1, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v2
+; GFX9-NEXT:    v_mad_f32 v1, -v2, v3, v1
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v3|
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_add_u32_e32 v0, v4, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 24
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v_sdiv_i24:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 3 @6
+; EG-NEXT:    ALU 43, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T1.X, T0.X, 6, #1
+; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_8 T3.X, T0.X, 2, #1
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:    ALU clause starting at 15:
+; EG-NEXT:     BFE_INT * T0.W, T1.X, 0.0, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT * T0.W, T0.X, PV.W,
+; EG-NEXT:     SETGT_INT * T1.W, 0.0, PV.W,
+; EG-NEXT:     ADD_INT * T0.W, T0.W, PV.W,
+; EG-NEXT:     XOR_INT * T0.W, PV.W, T1.W,
+; EG-NEXT:     RECIP_UINT * T0.X, PV.W,
+; EG-NEXT:     BFE_INT T2.W, T3.X, 0.0, literal.x,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHL T0.Z, PV.W, literal.x,
+; EG-NEXT:     SUB_INT T2.W, 0.0, PS,
+; EG-NEXT:     MULHI * T1.X, T0.X, T0.W,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T2.W, PS, PV.W, T0.Y,
+; EG-NEXT:     OR_INT * T3.W, T2.X, PV.Z,
+; EG-NEXT:     SETGT_INT T4.W, 0.0, PS,
+; EG-NEXT:     MULHI * T0.Y, PV.W, T0.X,
+; EG-NEXT:     ADD_INT T0.Z, T3.W, PV.W,
+; EG-NEXT:     ADD_INT T2.W, T0.X, PS,
+; EG-NEXT:     SUB_INT * T3.W, T0.X, PS,
+; EG-NEXT:     CNDE_INT T2.W, T1.X, PV.W, PS,
+; EG-NEXT:     XOR_INT * T3.W, PV.Z, T4.W,
+; EG-NEXT:     MULHI * T0.X, PV.W, PS,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT:     SUB_INT * T2.W, T3.W, PS,
+; EG-NEXT:     SETGE_UINT T0.W, PV.W, T0.W,
+; EG-NEXT:     SETGE_UINT * T2.W, T3.W, T0.Y,
+; EG-NEXT:     AND_INT T0.W, PV.W, PS,
+; EG-NEXT:     ADD_INT * T3.W, T0.X, 1,
+; EG-NEXT:     CNDE_INT T0.W, PV.W, T0.X, PS,
+; EG-NEXT:     ADD_INT * T3.W, T0.X, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.W, T2.W, PS, PV.W,
+; EG-NEXT:     XOR_INT * T1.W, T4.W, T1.W,
+; EG-NEXT:     XOR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     SUB_INT * T0.W, PV.W, T1.W,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
   %den_ptr = getelementptr i24, i24 addrspace(1)* %in, i24 1
   %num = load i24, i24 addrspace(1) * %in
   %den = load i24, i24 addrspace(1) * %den_ptr
@@ -125,9 +2097,220 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_sdiv_i25:
-; SI-NOT: v_rcp_f32
 define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)* %in) {
+; GCN-LABEL: v_sdiv_i25:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s10, s6
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s2
+; GCN-NEXT:    s_mov_b32 s9, s3
+; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_bfe_i32 v2, v0, 0, 25
+; GCN-NEXT:    v_bfe_i32 v3, v1, 0, 25
+; GCN-NEXT:    v_bfe_i32 v0, v0, 24, 1
+; GCN-NEXT:    v_bfe_i32 v1, v1, 24, 1
+; GCN-NEXT:    v_xor_b32_e32 v4, v0, v1
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v0, v2
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v1, v3
+; GCN-NEXT:    v_xor_b32_e32 v0, v2, v0
+; GCN-NEXT:    v_xor_b32_e32 v1, v3, v1
+; GCN-NEXT:    v_cvt_f32_u32_e32 v2, v1
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GCN-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GCN-NEXT:    v_mul_hi_u32 v3, v2, v1
+; GCN-NEXT:    v_mul_lo_u32 v5, v2, v1
+; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v5
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v3
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v6, s[0:1]
+; GCN-NEXT:    v_mul_hi_u32 v3, v3, v2
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, v3, v2
+; GCN-NEXT:    v_subrev_i32_e32 v2, vcc, v3, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[0:1]
+; GCN-NEXT:    v_mul_hi_u32 v2, v2, v0
+; GCN-NEXT:    v_mul_lo_u32 v3, v2, v1
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, -1, v2
+; GCN-NEXT:    v_subrev_i32_e32 v7, vcc, v3, v0
+; GCN-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; GCN-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v1
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, v5, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GCN-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 25
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: v_sdiv_i25:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s7, 0xf000
+; TONGA-NEXT:    s_mov_b32 s6, -1
+; TONGA-NEXT:    s_mov_b32 s2, s6
+; TONGA-NEXT:    s_mov_b32 s3, s7
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s10
+; TONGA-NEXT:    s_mov_b32 s1, s11
+; TONGA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; TONGA-NEXT:    s_mov_b32 s4, s8
+; TONGA-NEXT:    s_mov_b32 s5, s9
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_bfe_i32 v2, v1, 0, 25
+; TONGA-NEXT:    v_bfe_i32 v1, v1, 24, 1
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v1, v2
+; TONGA-NEXT:    v_xor_b32_e32 v2, v2, v1
+; TONGA-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; TONGA-NEXT:    v_bfe_i32 v4, v0, 0, 25
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 24, 1
+; TONGA-NEXT:    v_add_u32_e32 v4, vcc, v0, v4
+; TONGA-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; TONGA-NEXT:    v_xor_b32_e32 v4, v4, v0
+; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v1
+; TONGA-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; TONGA-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; TONGA-NEXT:    v_mul_lo_u32 v5, v3, v2
+; TONGA-NEXT:    v_mul_hi_u32 v6, v3, v2
+; TONGA-NEXT:    v_sub_u32_e32 v7, vcc, 0, v5
+; TONGA-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
+; TONGA-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v5, v5, v3
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, v5, v3
+; TONGA-NEXT:    v_subrev_u32_e32 v3, vcc, v5, v3
+; TONGA-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[0:1]
+; TONGA-NEXT:    v_mul_hi_u32 v3, v3, v4
+; TONGA-NEXT:    v_mul_lo_u32 v1, v3, v2
+; TONGA-NEXT:    v_add_u32_e32 v5, vcc, 1, v3
+; TONGA-NEXT:    v_add_u32_e32 v6, vcc, -1, v3
+; TONGA-NEXT:    v_subrev_u32_e32 v7, vcc, v1, v4
+; TONGA-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v1
+; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v2
+; TONGA-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; TONGA-NEXT:    v_cndmask_b32_e64 v1, v3, v5, s[0:1]
+; TONGA-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v0
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v1, v0
+; TONGA-NEXT:    v_bfe_i32 v0, v0, 0, 25
+; TONGA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v_sdiv_i25:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_mov_b32 s10, s6
+; GFX9-NEXT:    s_mov_b32 s11, s7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s2
+; GFX9-NEXT:    s_mov_b32 s9, s3
+; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s5, s1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_bfe_i32 v2, v1, 0, 25
+; GFX9-NEXT:    v_bfe_i32 v1, v1, 24, 1
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v1
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v1
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v2
+; GFX9-NEXT:    v_bfe_i32 v6, v0, 0, 25
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 24, 1
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v2
+; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v2
+; GFX9-NEXT:    v_sub_u32_e32 v7, 0, v4
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v4, v4, v3
+; GFX9-NEXT:    v_add_u32_e32 v5, v6, v0
+; GFX9-NEXT:    v_xor_b32_e32 v5, v5, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX9-NEXT:    v_add_u32_e32 v6, v3, v4
+; GFX9-NEXT:    v_sub_u32_e32 v3, v3, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v3, v3, v5
+; GFX9-NEXT:    v_mul_lo_u32 v4, v3, v2
+; GFX9-NEXT:    v_add_u32_e32 v1, 1, v3
+; GFX9-NEXT:    v_add_u32_e32 v6, -1, v3
+; GFX9-NEXT:    v_sub_u32_e32 v7, v5, v4
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v5, v4
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[0:1], v7, v2
+; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, v1, s[0:1]
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v1, vcc
+; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v0
+; GFX9-NEXT:    v_sub_u32_e32 v0, v1, v0
+; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 25
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v_sdiv_i25:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @6
+; EG-NEXT:    ALU 41, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_32 T0.X, T0.X, 4, #1
+; EG-NEXT:     VTX_READ_32 T1.X, T1.X, 0, #1
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.X, KC0[2].Z,
+; EG-NEXT:     MOV * T1.X, PV.X,
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
+; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR * T0.W, PV.W, literal.x,
+; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; EG-NEXT:     SETGT_INT * T1.W, 0.0, PV.W,
+; EG-NEXT:     ADD_INT * T0.W, T0.W, PV.W,
+; EG-NEXT:     XOR_INT * T0.W, PV.W, T1.W,
+; EG-NEXT:     RECIP_UINT * T0.X, PV.W,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT:     LSHL T0.Z, T1.X, literal.x,
+; EG-NEXT:     SUB_INT T2.W, 0.0, PS,
+; EG-NEXT:     MULHI * T1.X, T0.X, T0.W,
+; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T2.W, PS, PV.W, T0.Y,
+; EG-NEXT:     ASHR * T3.W, PV.Z, literal.x,
+; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; EG-NEXT:     SETGT_INT T4.W, 0.0, PS,
+; EG-NEXT:     MULHI * T0.Y, PV.W, T0.X,
+; EG-NEXT:     ADD_INT T0.Z, T3.W, PV.W,
+; EG-NEXT:     ADD_INT T2.W, T0.X, PS,
+; EG-NEXT:     SUB_INT * T3.W, T0.X, PS,
+; EG-NEXT:     CNDE_INT T2.W, T1.X, PV.W, PS,
+; EG-NEXT:     XOR_INT * T3.W, PV.Z, T4.W,
+; EG-NEXT:     MULHI * T0.X, PV.W, PS,
+; EG-NEXT:     MULLO_INT * T0.Y, PS, T0.W,
+; EG-NEXT:     SUB_INT * T2.W, T3.W, PS,
+; EG-NEXT:     SETGE_UINT T0.W, PV.W, T0.W,
+; EG-NEXT:     SETGE_UINT * T2.W, T3.W, T0.Y,
+; EG-NEXT:     AND_INT T0.W, PV.W, PS,
+; EG-NEXT:     ADD_INT * T3.W, T0.X, 1,
+; EG-NEXT:     CNDE_INT T0.W, PV.W, T0.X, PS,
+; EG-NEXT:     ADD_INT * T3.W, T0.X, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT T0.W, T2.W, PS, PV.W,
+; EG-NEXT:     XOR_INT * T1.W, T4.W, T1.W,
+; EG-NEXT:     XOR_INT * T0.W, PV.W, PS,
+; EG-NEXT:     SUB_INT * T0.W, PV.W, T1.W,
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; EG-NEXT:     ASHR T0.X, PV.W, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    7(9.809089e-45), 2(2.802597e-45)
   %den_ptr = getelementptr i25, i25 addrspace(1)* %in, i25 1
   %num = load i25, i25 addrspace(1) * %in
   %den = load i25, i25 addrspace(1) * %den_ptr
@@ -158,13 +2341,146 @@ define amdgpu_kernel void @v_sdiv_i25(i32 addrspace(1)* %out, i25 addrspace(1)*
 ;   ret void
 ; }
 
-; FUNC-LABEL: @scalarize_mulhs_4xi32
-; SI: v_mul_hi_i32
-; SI: v_mul_hi_i32
-; SI: v_mul_hi_i32
-; SI: v_mul_hi_i32
-
 define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocapture readonly %in, <4 x i32> addrspace(1)* nocapture %out) {
+; GCN-LABEL: scalarize_mulhs_4xi32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    s_mov_b32 s0, 0x1389c755
+; GCN-NEXT:    s_mov_b32 s4, s2
+; GCN-NEXT:    s_mov_b32 s5, s3
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mul_hi_i32 v0, v0, s0
+; GCN-NEXT:    v_mul_hi_i32 v1, v1, s0
+; GCN-NEXT:    v_mul_hi_i32 v2, v2, s0
+; GCN-NEXT:    v_mul_hi_i32 v3, v3, s0
+; GCN-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
+; GCN-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
+; GCN-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 12, v1
+; GCN-NEXT:    v_lshrrev_b32_e32 v6, 31, v2
+; GCN-NEXT:    v_ashrrev_i32_e32 v2, 12, v2
+; GCN-NEXT:    v_lshrrev_b32_e32 v7, 31, v3
+; GCN-NEXT:    v_ashrrev_i32_e32 v3, 12, v3
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; TONGA-LABEL: scalarize_mulhs_4xi32:
+; TONGA:       ; %bb.0:
+; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; TONGA-NEXT:    s_mov_b32 s3, 0xf000
+; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s0, s4
+; TONGA-NEXT:    s_mov_b32 s1, s5
+; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; TONGA-NEXT:    s_mov_b32 s0, 0x1389c755
+; TONGA-NEXT:    s_mov_b32 s4, s6
+; TONGA-NEXT:    s_mov_b32 s5, s7
+; TONGA-NEXT:    s_mov_b32 s6, s2
+; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_mul_hi_i32 v0, v0, s0
+; TONGA-NEXT:    v_mul_hi_i32 v1, v1, s0
+; TONGA-NEXT:    v_mul_hi_i32 v2, v2, s0
+; TONGA-NEXT:    v_mul_hi_i32 v3, v3, s0
+; TONGA-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
+; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
+; TONGA-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
+; TONGA-NEXT:    v_ashrrev_i32_e32 v1, 12, v1
+; TONGA-NEXT:    v_lshrrev_b32_e32 v6, 31, v2
+; TONGA-NEXT:    v_ashrrev_i32_e32 v2, 12, v2
+; TONGA-NEXT:    v_lshrrev_b32_e32 v7, 31, v3
+; TONGA-NEXT:    v_ashrrev_i32_e32 v3, 12, v3
+; TONGA-NEXT:    v_add_u32_e32 v0, vcc, v0, v4
+; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
+; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
+; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
+; TONGA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; TONGA-NEXT:    s_endpgm
+;
+; GFX9-LABEL: scalarize_mulhs_4xi32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s3, 0xf000
+; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    s_mov_b32 s0, 0x1389c755
+; GFX9-NEXT:    s_mov_b32 s4, s6
+; GFX9-NEXT:    s_mov_b32 s5, s7
+; GFX9-NEXT:    s_mov_b32 s6, s2
+; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mul_hi_i32 v0, v0, s0
+; GFX9-NEXT:    v_mul_hi_i32 v1, v1, s0
+; GFX9-NEXT:    v_mul_hi_i32 v2, v2, s0
+; GFX9-NEXT:    v_mul_hi_i32 v3, v3, s0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
+; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 12, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 31, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 12, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 31, v3
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 12, v3
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v4
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v5
+; GFX9-NEXT:    v_add_u32_e32 v2, v2, v6
+; GFX9-NEXT:    v_add_u32_e32 v3, v3, v7
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: scalarize_mulhs_4xi32:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 25, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, KC0[2].Y,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MULHI_INT * T0.W, T0.W, literal.x,
+; EG-NEXT:    327796565(3.478022e-27), 0(0.000000e+00)
+; EG-NEXT:     ASHR T1.Z, PS, literal.x,
+; EG-NEXT:     LSHR T0.W, PS, literal.y,
+; EG-NEXT:     MULHI_INT * T0.Z, T0.Z, literal.z,
+; EG-NEXT:    12(1.681558e-44), 31(4.344025e-44)
+; EG-NEXT:    327796565(3.478022e-27), 0(0.000000e+00)
+; EG-NEXT:     ASHR T1.Y, PS, literal.x,
+; EG-NEXT:     LSHR T0.Z, PS, literal.y,
+; EG-NEXT:     ADD_INT T0.W, PV.Z, PV.W,
+; EG-NEXT:     MULHI_INT * T0.Y, T0.Y, literal.z,
+; EG-NEXT:    12(1.681558e-44), 31(4.344025e-44)
+; EG-NEXT:    327796565(3.478022e-27), 0(0.000000e+00)
+; EG-NEXT:     ASHR T2.Y, PS, literal.x,
+; EG-NEXT:     ADD_INT T0.Z, PV.Y, PV.Z,
+; EG-NEXT:     LSHR T1.W, PS, literal.y,
+; EG-NEXT:     MULHI_INT * T0.X, T0.X, literal.z,
+; EG-NEXT:    12(1.681558e-44), 31(4.344025e-44)
+; EG-NEXT:    327796565(3.478022e-27), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.Y, PV.Y, PV.W,
+; EG-NEXT:     ASHR T1.W, PS, literal.x,
+; EG-NEXT:     LSHR * T2.W, PS, literal.y,
+; EG-NEXT:    12(1.681558e-44), 31(4.344025e-44)
+; EG-NEXT:     ADD_INT T0.X, PV.W, PS,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %2 = sdiv <4 x i32> %1, <i32 53668, i32 53668, i32 53668, i32 53668>
   store <4 x i32> %2, <4 x i32> addrspace(1)* %out, align 16

From 7cebf0af4076c7d198ef8ef90b79d1ff422a42cd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 1 Jun 2019 19:40:07 +0000
Subject: [PATCH 0822/1176] [InlineCost] Don't add the soft float function call
 cost for the fneg idiom, fsub -0.0, %x

Summary: Fneg can be implemented with an xor rather than a function call so we don't need to add the function call overhead. This was pointed out in D62699

Reviewers: efriedma, cameron.mcinally

Reviewed By: efriedma

Subscribers: javed.absar, eraman, hiraditya, haicheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62747

llvm-svn: 362304
---
 llvm/lib/Analysis/InlineCost.cpp             |  7 ++++--
 llvm/test/Transforms/Inline/ARM/inline-fp.ll | 25 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index ced30d6e3b91c..a332a43900716 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -1095,9 +1096,11 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
 
   // If the instruction is floating point, and the target says this operation
   // is expensive, this may eventually become a library call. Treat the cost
-  // as such.
+  // as such. Unless it's fneg which can be implemented with an xor.
+  using namespace llvm::PatternMatch;
   if (I.getType()->isFloatingPointTy() &&
-      TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
+      TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
+      !match(&I, m_FNeg(m_Value())))
     addCost(InlineConstants::CallPenalty);
 
   return false;
diff --git a/llvm/test/Transforms/Inline/ARM/inline-fp.ll b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
index fdc066c9ba685..1d74dfd15b187 100644
--- a/llvm/test/Transforms/Inline/ARM/inline-fp.ll
+++ b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
@@ -12,6 +12,8 @@
 ; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
 ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
 ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
+; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
 
 ; FULLFP-DAG: single inlined into test_single with (cost=0, threshold=75)
 ; FULLFP-DAG: single inlined into test_single with (cost=-15000, threshold=75)
@@ -21,6 +23,8 @@
 ; FULLFP-DAG: double inlined into test_double with (cost=-15000, threshold=75)
 ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
 ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
+; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
 
 ; SINGLEFP-DAG: single inlined into test_single with (cost=0, threshold=75)
 ; SINGLEFP-DAG: single inlined into test_single with (cost=-15000, threshold=75)
@@ -30,6 +34,8 @@
 ; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
 ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
 ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
+; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75)
 
 define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
   %call = call float @single(i32 %a, i8 zeroext %b)
@@ -55,6 +61,12 @@ define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 {
   ret i32 0
 }
 
+define i32 @test_single_force_soft_fneg(i32 %a, i8 %b, i32 %c, i8 %d) #1 {
+  %call = call float @single_force_soft_fneg(i32 %a, i8 zeroext %b) #1
+  %call2 = call float @single_force_soft_fneg(i32 %c, i8 zeroext %d) #1
+  ret i32 0
+}
+
 define internal float @single(i32 %response, i8 zeroext %value1) #0 {
 entry:
   %conv = zext i8 %value1 to i32
@@ -106,6 +118,19 @@ entry:
   ret float %div
 }
 
+define internal float @single_force_soft_fneg(i32 %response, i8 zeroext %value1) #1 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = sitofp i32 %sub to float
+  %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+  %mul = fsub float -0.0, %0
+  %conv2 = sitofp i32 %response to float
+  %sub3 = fsub float %conv2, %mul
+  %div = fdiv float %sub3, %mul
+  ret float %div
+}
+
 declare float @llvm.pow.f32(float, float) optsize minsize
 declare double @llvm.pow.f64(double, double) optsize minsize
 

From 900578d1c1273fcd1a157f12955cc21cc588b1c6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 1 Jun 2019 20:21:53 +0000
Subject: [PATCH 0823/1176] [SimplifyIndVar] Refactor overflow check
 elimination code; NFC

Extract a willNotOverflow() helper function that is shared between
eliminateOverflowIntrinsic() and strengthenOverflowingOperation().
Use WithOverflowInst for the former.

We'll be able to reuse the same code for saturating intrinsics as
well.

llvm-svn: 362305
---
 llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 140 ++++++-------------
 1 file changed, 43 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6d634d8e917d9..9a4ebd3255d74 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -79,7 +80,7 @@ namespace {
     bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
     bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
 
-    bool eliminateOverflowIntrinsic(CallInst *CI);
+    bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
     bool eliminateTrunc(TruncInst *TI);
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
@@ -400,61 +401,29 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
   replaceSRemWithURem(Rem);
 }
 
-bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
-  auto *F = CI->getCalledFunction();
-  if (!F)
-    return false;
-
-  typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
-      const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
-  typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
-      const SCEV *, Type *, unsigned);
-
-  OperationFunctionTy Operation;
-  ExtensionFunctionTy Extension;
-
-  Instruction::BinaryOps RawOp;
-
-  // We always have exactly one of nsw or nuw.  If NoSignedOverflow is false, we
-  // have nuw.
-  bool NoSignedOverflow;
-
-  switch (F->getIntrinsicID()) {
+static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
+                            bool Signed, const SCEV *LHS, const SCEV *RHS) {
+  const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+                                            SCEV::NoWrapFlags, unsigned);
+  switch (BinOp) {
   default:
-    return false;
-
-  case Intrinsic::sadd_with_overflow:
-    Operation = &ScalarEvolution::getAddExpr;
-    Extension = &ScalarEvolution::getSignExtendExpr;
-    RawOp = Instruction::Add;
-    NoSignedOverflow = true;
-    break;
-
-  case Intrinsic::uadd_with_overflow:
+    llvm_unreachable("Unsupported binary op");
+  case Instruction::Add:
     Operation = &ScalarEvolution::getAddExpr;
-    Extension = &ScalarEvolution::getZeroExtendExpr;
-    RawOp = Instruction::Add;
-    NoSignedOverflow = false;
     break;
-
-  case Intrinsic::ssub_with_overflow:
+  case Instruction::Sub:
     Operation = &ScalarEvolution::getMinusSCEV;
-    Extension = &ScalarEvolution::getSignExtendExpr;
-    RawOp = Instruction::Sub;
-    NoSignedOverflow = true;
     break;
-
-  case Intrinsic::usub_with_overflow:
-    Operation = &ScalarEvolution::getMinusSCEV;
-    Extension = &ScalarEvolution::getZeroExtendExpr;
-    RawOp = Instruction::Sub;
-    NoSignedOverflow = false;
+  case Instruction::Mul:
+    Operation = &ScalarEvolution::getMulExpr;
     break;
   }
 
-  const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
-  const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+  const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
+      Signed ? &ScalarEvolution::getSignExtendExpr
+             : &ScalarEvolution::getZeroExtendExpr;
 
+  // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
   auto *NarrowTy = cast<IntegerType>(LHS->getType());
   auto *WideTy =
     IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
@@ -465,27 +434,32 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
   const SCEV *B =
       (SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
                        (SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
+  return A == B;
+}
 
-  if (A != B)
+bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
+  const SCEV *LHS = SE->getSCEV(WO->getLHS());
+  const SCEV *RHS = SE->getSCEV(WO->getRHS());
+  if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
     return false;
 
   // Proved no overflow, nuke the overflow check and, if possible, the overflow
   // intrinsic as well.
 
   BinaryOperator *NewResult = BinaryOperator::Create(
-      RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+      WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
 
-  if (NoSignedOverflow)
+  if (WO->isSigned())
     NewResult->setHasNoSignedWrap(true);
   else
     NewResult->setHasNoUnsignedWrap(true);
 
   SmallVector<ExtractValueInst *, 4> ToDelete;
 
-  for (auto *U : CI->users()) {
+  for (auto *U : WO->users()) {
     if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
       if (EVI->getIndices()[0] == 1)
-        EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+        EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
       else {
         assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
         EVI->replaceAllUsesWith(NewResult);
@@ -497,8 +471,8 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
   for (auto *EVI : ToDelete)
     EVI->eraseFromParent();
 
-  if (CI->use_empty())
-    CI->eraseFromParent();
+  if (WO->use_empty())
+    WO->eraseFromParent();
 
   return true;
 }
@@ -635,8 +609,8 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
       return eliminateSDiv(Bin);
   }
 
-  if (auto *CI = dyn_cast<CallInst>(UseInst))
-    if (eliminateOverflowIntrinsic(CI))
+  if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
+    if (eliminateOverflowIntrinsic(WO))
       return true;
 
   if (auto *TI = dyn_cast<TruncInst>(UseInst))
@@ -729,59 +703,31 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
 /// unsigned-overflow.  Returns true if anything changed, false otherwise.
 bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
                                                     Value *IVOperand) {
-
   // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
   if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
     return false;
 
-  const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
-                                               SCEV::NoWrapFlags, unsigned);
-  switch (BO->getOpcode()) {
-  default:
+  if (BO->getOpcode() != Instruction::Add &&
+      BO->getOpcode() != Instruction::Sub &&
+      BO->getOpcode() != Instruction::Mul)
     return false;
 
-  case Instruction::Add:
-    GetExprForBO = &ScalarEvolution::getAddExpr;
-    break;
-
-  case Instruction::Sub:
-    GetExprForBO = &ScalarEvolution::getMinusSCEV;
-    break;
-
-  case Instruction::Mul:
-    GetExprForBO = &ScalarEvolution::getMulExpr;
-    break;
-  }
-
-  unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
-  Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
   const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
   const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
-
   bool Changed = false;
 
-  if (!BO->hasNoUnsignedWrap()) {
-    const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
-    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
-      SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
-      SCEV::FlagAnyWrap, 0u);
-    if (ExtendAfterOp == OpAfterExtend) {
-      BO->setHasNoUnsignedWrap();
-      SE->forgetValue(BO);
-      Changed = true;
-    }
+  if (!BO->hasNoUnsignedWrap() &&
+      willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
+    BO->setHasNoUnsignedWrap();
+    SE->forgetValue(BO);
+    Changed = true;
   }
 
-  if (!BO->hasNoSignedWrap()) {
-    const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
-    const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
-      SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
-      SCEV::FlagAnyWrap, 0u);
-    if (ExtendAfterOp == OpAfterExtend) {
-      BO->setHasNoSignedWrap();
-      SE->forgetValue(BO);
-      Changed = true;
-    }
+  if (!BO->hasNoSignedWrap() &&
+      willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
+    BO->setHasNoSignedWrap();
+    SE->forgetValue(BO);
+    Changed = true;
   }
 
   return Changed;

From 5a2a054028c27d0716faf9b513f3fb5c8723400f Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Sat, 1 Jun 2019 21:47:44 +0000
Subject: [PATCH 0824/1176] =?UTF-8?q?Silence=20'warning:=20extra=20?=
 =?UTF-8?q?=E2=80=98;=E2=80=99=20[-Wpedantic]'=20with=20GCC=207.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

llvm-svn: 362306
---
 lldb/source/Host/common/TCPSocket.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Host/common/TCPSocket.cpp b/lldb/source/Host/common/TCPSocket.cpp
index 201ddd352ddae..58f99f7832fe2 100644
--- a/lldb/source/Host/common/TCPSocket.cpp
+++ b/lldb/source/Host/common/TCPSocket.cpp
@@ -124,7 +124,7 @@ std::string TCPSocket::GetRemoteConnectionURI() const {
                          GetRemotePortNumber());
   }
   return "";
-};
+}
 
 Status TCPSocket::CreateSocket(int domain) {
   Status error;

From eeaecc63e933335c305263702eb1e3f9caab1f6c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 00:07:48 +0000
Subject: [PATCH 0825/1176] [X86] Add avx512 command lines and test cases to
 machine-combiner.ll

llvm-svn: 362307
---
 llvm/test/CodeGen/X86/machine-combiner.ll | 591 ++++++++++++++++++++--
 1 file changed, 543 insertions(+), 48 deletions(-)

diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index a1b2fba1e49c3..162cda8c86794 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,10 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Incremental updates of the instruction depths should be enough for this test
 ; case.
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
 
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
@@ -225,12 +228,18 @@ define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4
 ; SSE-NEXT:    addps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_adds_v4f32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
-; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: reassociate_adds_v4f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vaddps %xmm3, %xmm2, %xmm1
+; AVX1-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v4f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    vaddps %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
   %t0 = fmul <4 x float> %x0, %x1
   %t1 = fadd <4 x float> %x2, %t0
   %t2 = fadd <4 x float> %x3, %t1
@@ -247,12 +256,18 @@ define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1,
 ; SSE-NEXT:    addpd %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_adds_v2f64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
-; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: reassociate_adds_v2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
+; AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v2f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
   %t0 = fmul <2 x double> %x0, %x1
   %t1 = fadd <2 x double> %x2, %t0
   %t2 = fadd <2 x double> %x3, %t1
@@ -306,12 +321,28 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1,
 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
 
 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
-; AVX-LABEL: reassociate_adds_v8f32:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
-; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    retq
+; SSE-LABEL: reassociate_adds_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulps %xmm2, %xmm0
+; SSE-NEXT:    mulps %xmm3, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm4
+; SSE-NEXT:    addps %xmm4, %xmm0
+; SSE-NEXT:    addps %xmm7, %xmm5
+; SSE-NEXT:    addps %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_adds_v8f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm3, %ymm2, %ymm1
+; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v8f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
+; AVX512-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
   %t0 = fmul <8 x float> %x0, %x1
   %t1 = fadd <8 x float> %x2, %t0
   %t2 = fadd <8 x float> %x3, %t1
@@ -321,12 +352,28 @@ define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
 
 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
-; AVX-LABEL: reassociate_adds_v4f64:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
-; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    retq
+; SSE-LABEL: reassociate_adds_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulpd %xmm2, %xmm0
+; SSE-NEXT:    mulpd %xmm3, %xmm1
+; SSE-NEXT:    addpd %xmm6, %xmm4
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    addpd %xmm7, %xmm5
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_adds_v4f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
+; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v4f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
+; AVX512-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
   %t0 = fmul <4 x double> %x0, %x1
   %t1 = fadd <4 x double> %x2, %t0
   %t2 = fadd <4 x double> %x3, %t1
@@ -336,6 +383,16 @@ define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1,
 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
 
 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; SSE-LABEL: reassociate_muls_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm1
+; SSE-NEXT:    mulps %xmm6, %xmm4
+; SSE-NEXT:    mulps %xmm4, %xmm0
+; SSE-NEXT:    mulps %xmm7, %xmm5
+; SSE-NEXT:    mulps %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_muls_v8f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
@@ -351,6 +408,16 @@ define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
 
 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; SSE-LABEL: reassociate_muls_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm2, %xmm0
+; SSE-NEXT:    addpd %xmm3, %xmm1
+; SSE-NEXT:    mulpd %xmm6, %xmm4
+; SSE-NEXT:    mulpd %xmm4, %xmm0
+; SSE-NEXT:    mulpd %xmm7, %xmm5
+; SSE-NEXT:    mulpd %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_muls_v4f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
@@ -363,6 +430,168 @@ define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1,
   ret <4 x double> %t2
 }
 
+; Verify that AVX512 512-bit vector single-precision adds are reassociated.
+
+define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
+; SSE-LABEL: reassociate_adds_v16f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulps %xmm4, %xmm0
+; SSE-NEXT:    mulps %xmm5, %xmm1
+; SSE-NEXT:    mulps %xmm6, %xmm2
+; SSE-NEXT:    mulps %xmm7, %xmm3
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_adds_v16f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmulps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vaddps %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v16f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; AVX512-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fmul <16 x float> %x0, %x1
+  %t1 = fadd <16 x float> %x2, %t0
+  %t2 = fadd <16 x float> %x3, %t1
+  ret <16 x float> %t2
+}
+
+; Verify that AVX512 512-bit vector double-precision adds are reassociated.
+
+define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
+; SSE-LABEL: reassociate_adds_v8f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    mulpd %xmm4, %xmm0
+; SSE-NEXT:    mulpd %xmm5, %xmm1
+; SSE-NEXT:    mulpd %xmm6, %xmm2
+; SSE-NEXT:    mulpd %xmm7, %xmm3
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    addpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_adds_v8f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vaddpd %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_adds_v8f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
+; AVX512-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fmul <8 x double> %x0, %x1
+  %t1 = fadd <8 x double> %x2, %t0
+  %t2 = fadd <8 x double> %x3, %t1
+  ret <8 x double> %t2
+}
+
+; Verify that AVX512 512-bit vector single-precision multiplies are reassociated.
+
+define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
+; SSE-LABEL: reassociate_muls_v16f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm4, %xmm0
+; SSE-NEXT:    addps %xmm5, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm2
+; SSE-NEXT:    addps %xmm7, %xmm3
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    mulps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_muls_v16f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vmulps %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vmulps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmulps %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vmulps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_muls_v16f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmulps %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vmulps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <16 x float> %x0, %x1
+  %t1 = fmul <16 x float> %x2, %t0
+  %t2 = fmul <16 x float> %x3, %t1
+  ret <16 x float> %t2
+}
+
+; Verify that AVX512 512-bit vector double-precision multiplies are reassociated.
+
+define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
+; SSE-LABEL: reassociate_muls_v8f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    addpd %xmm6, %xmm2
+; SSE-NEXT:    addpd %xmm7, %xmm3
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    mulpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_muls_v8f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vmulpd %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmulpd %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_muls_v8f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmulpd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <8 x double> %x0, %x1
+  %t1 = fmul <8 x double> %x2, %t0
+  %t2 = fmul <8 x double> %x3, %t1
+  ret <8 x double> %t2
+}
+
 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
 
 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
@@ -558,6 +787,16 @@ define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1,
 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
 
 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; SSE-LABEL: reassociate_mins_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm1
+; SSE-NEXT:    minps %xmm6, %xmm4
+; SSE-NEXT:    minps %xmm4, %xmm0
+; SSE-NEXT:    minps %xmm7, %xmm5
+; SSE-NEXT:    minps %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_mins_v8f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
@@ -575,6 +814,16 @@ define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
 
 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; SSE-LABEL: reassociate_maxs_v8f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm1
+; SSE-NEXT:    maxps %xmm6, %xmm4
+; SSE-NEXT:    maxps %xmm4, %xmm0
+; SSE-NEXT:    maxps %xmm7, %xmm5
+; SSE-NEXT:    maxps %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_maxs_v8f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
@@ -592,6 +841,16 @@ define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8
 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
 
 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; SSE-LABEL: reassociate_mins_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm2, %xmm0
+; SSE-NEXT:    addpd %xmm3, %xmm1
+; SSE-NEXT:    minpd %xmm6, %xmm4
+; SSE-NEXT:    minpd %xmm4, %xmm0
+; SSE-NEXT:    minpd %xmm7, %xmm5
+; SSE-NEXT:    minpd %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_mins_v4f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
@@ -609,6 +868,16 @@ define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1,
 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
 
 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; SSE-LABEL: reassociate_maxs_v4f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm2, %xmm0
+; SSE-NEXT:    addpd %xmm3, %xmm1
+; SSE-NEXT:    maxpd %xmm6, %xmm4
+; SSE-NEXT:    maxpd %xmm4, %xmm0
+; SSE-NEXT:    maxpd %xmm7, %xmm5
+; SSE-NEXT:    maxpd %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_maxs_v4f64:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
@@ -623,24 +892,223 @@ define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1,
   ret <4 x double> %sel2
 }
 
+; Verify that AVX512 512-bit vector single-precision minimum ops are reassociated.
+
+define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
+; SSE-LABEL: reassociate_mins_v16f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm4, %xmm0
+; SSE-NEXT:    addps %xmm5, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm2
+; SSE-NEXT:    addps %xmm7, %xmm3
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    minps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_mins_v16f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vminps %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vminps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vminps %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vminps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_mins_v16f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vminps %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <16 x float> %x0, %x1
+  %cmp1 = fcmp olt <16 x float> %x2, %t0
+  %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
+  %cmp2 = fcmp olt <16 x float> %x3, %sel1
+  %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
+  ret <16 x float> %sel2
+}
+
+; Verify that AVX512 512-bit vector single-precision maximum ops are reassociated.
+
+define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
+; SSE-LABEL: reassociate_maxs_v16f32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addps %xmm4, %xmm0
+; SSE-NEXT:    addps %xmm5, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm2
+; SSE-NEXT:    addps %xmm7, %xmm3
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    maxps {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_maxs_v16f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vmaxps %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vmaxps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmaxps %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vmaxps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_maxs_v16f32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmaxps %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <16 x float> %x0, %x1
+  %cmp1 = fcmp ogt <16 x float> %x2, %t0
+  %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
+  %cmp2 = fcmp ogt <16 x float> %x3, %sel1
+  %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
+  ret <16 x float> %sel2
+}
+
+; Verify that AVX512 512-bit vector double-precision minimum ops are reassociated.
+
+define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
+; SSE-LABEL: reassociate_mins_v8f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    addpd %xmm6, %xmm2
+; SSE-NEXT:    addpd %xmm7, %xmm3
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    minpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_mins_v8f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vminpd %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vminpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vminpd %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vminpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_mins_v8f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vminpd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <8 x double> %x0, %x1
+  %cmp1 = fcmp olt <8 x double> %x2, %t0
+  %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
+  %cmp2 = fcmp olt <8 x double> %x3, %sel1
+  %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
+  ret <8 x double> %sel2
+}
+
+; Verify that AVX512 512-bit vector double-precision maximum ops are reassociated.
+
+define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
+; SSE-LABEL: reassociate_maxs_v8f64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    addpd %xmm6, %xmm2
+; SSE-NEXT:    addpd %xmm7, %xmm3
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    maxpd {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: reassociate_maxs_v8f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    vmaxpd %ymm6, %ymm4, %ymm2
+; AVX1-NEXT:    vmaxpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmaxpd %ymm7, %ymm5, %ymm2
+; AVX1-NEXT:    vmaxpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_maxs_v8f64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmaxpd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %t0 = fadd <8 x double> %x0, %x1
+  %cmp1 = fcmp ogt <8 x double> %x2, %t0
+  %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
+  %cmp2 = fcmp ogt <8 x double> %x3, %sel1
+  %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
+  ret <8 x double> %sel2
+}
+
 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 ; Verify that reassociation is not happening needlessly or wrongly.
 
 declare double @bar()
 
 define double @reassociate_adds_from_calls() {
+; SSE-LABEL: reassociate_adds_from_calls:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $24, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 32
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; SSE-NEXT:    # xmm1 = mem[0],zero
+; SSE-NEXT:    addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
+; SSE-NEXT:    addsd (%rsp), %xmm0 # 8-byte Folded Reload
+; SSE-NEXT:    addsd %xmm1, %xmm0
+; SSE-NEXT:    addq $24, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_adds_from_calls:
-; AVX:       callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
-; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
-; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
+; AVX:       # %bb.0:
+; AVX-NEXT:    subq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; AVX-NEXT:    # xmm1 = mem[0],zero
+; AVX-NEXT:    vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
+; AVX-NEXT:    vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    addq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 
   %x0 = call double @bar()
   %x1 = call double @bar()
@@ -653,18 +1121,45 @@ define double @reassociate_adds_from_calls() {
 }
 
 define double @already_reassociated() {
+; SSE-LABEL: already_reassociated:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $24, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 32
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; SSE-NEXT:    callq bar
+; SSE-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; SSE-NEXT:    # xmm1 = mem[0],zero
+; SSE-NEXT:    addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
+; SSE-NEXT:    addsd (%rsp), %xmm0 # 8-byte Folded Reload
+; SSE-NEXT:    addsd %xmm1, %xmm0
+; SSE-NEXT:    addq $24, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: already_reassociated:
-; AVX:       callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
-; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
-; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
-; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
+; AVX:       # %bb.0:
+; AVX-NEXT:    subq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX-NEXT:    callq bar
+; AVX-NEXT:    vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; AVX-NEXT:    # xmm1 = mem[0],zero
+; AVX-NEXT:    vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
+; AVX-NEXT:    vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    addq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
 
   %x0 = call double @bar()
   %x1 = call double @bar()

From 4721fad972adbfb7347c4c1f8040fed2aee84c0d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 00:07:52 +0000
Subject: [PATCH 0826/1176] [X86] Simplify the CHECK lines in
 vector-reduce-and/or/xor.

The AVX512BW and AVX512VL checks were never used. And AVX512 is the same
as AVX on all tests that weren't already split for AVX1 and AVX2.

llvm-svn: 362308
---
 llvm/test/CodeGen/X86/vector-reduce-and.ll | 101 +--------------------
 llvm/test/CodeGen/X86/vector-reduce-or.ll  | 101 +--------------------
 llvm/test/CodeGen/X86/vector-reduce-xor.ll | 101 +--------------------
 3 files changed, 6 insertions(+), 297 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll
index e90307b9841cf..9a9f5f21af509 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -433,14 +410,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -465,16 +434,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -504,18 +463,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -758,14 +705,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -800,16 +739,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -852,18 +781,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -914,20 +831,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll
index f9c08cb0e9879..ba9c3c6a08cee 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -433,14 +410,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -465,16 +434,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -504,18 +463,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -758,14 +705,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -800,16 +739,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -852,18 +781,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -914,20 +831,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
index 870fed03e2656..85eafac30efa4 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -433,14 +410,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -465,16 +434,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -504,18 +463,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -758,14 +705,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -800,16 +739,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -852,18 +781,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -914,20 +831,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }

From 396a915c2651052dda39971c174c2bf41ebf266d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 00:42:58 +0000
Subject: [PATCH 0827/1176] [X86] Add the SSE versions of PMULLW and PMULLD to
 isAssociativeAndCommutative.

llvm-svn: 362309
---
 llvm/lib/Target/X86/X86InstrInfo.cpp          |   2 +
 .../CodeGen/X86/vector-reduce-mul-widen.ll    | 278 +++++++++---------
 llvm/test/CodeGen/X86/vector-reduce-mul.ll    | 278 +++++++++---------
 3 files changed, 280 insertions(+), 278 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 124ad5dfdf4c0..2beb513d1a950 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6920,6 +6920,8 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::PADDWrr:
   case X86::PADDDrr:
   case X86::PADDQrr:
+  case X86::PMULLWrr:
+  case X86::PMULLDrr:
   case X86::VPANDrr:
   case X86::VPANDYrr:
   case X86::VPANDDZ128rr:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
index 76ace68eb8c03..62143fb1ffe62 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul-widen.ll
@@ -955,13 +955,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmulld %xmm3, %xmm1
-; SSE41-NEXT:    pmulld %xmm2, %xmm0
-; SSE41-NEXT:    pmulld %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmulld %xmm2, %xmm1
 ; SSE41-NEXT:    pmulld %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmulld %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -1052,14 +1052,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmulld %xmm6, %xmm2
-; SSE41-NEXT:    pmulld %xmm4, %xmm0
-; SSE41-NEXT:    pmulld %xmm2, %xmm0
 ; SSE41-NEXT:    pmulld %xmm7, %xmm3
-; SSE41-NEXT:    pmulld %xmm5, %xmm1
-; SSE41-NEXT:    pmulld %xmm3, %xmm1
-; SSE41-NEXT:    pmulld %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmulld %xmm1, %xmm0
+; SSE41-NEXT:    pmulld %xmm5, %xmm3
+; SSE41-NEXT:    pmulld %xmm1, %xmm3
+; SSE41-NEXT:    pmulld %xmm4, %xmm2
+; SSE41-NEXT:    pmulld %xmm3, %xmm2
+; SSE41-NEXT:    pmulld %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmulld %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmulld %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1295,16 +1295,16 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE-LABEL: test_v32i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pmullw %xmm3, %xmm1
-; SSE-NEXT:    pmullw %xmm2, %xmm0
-; SSE-NEXT:    pmullw %xmm1, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT:    pmullw %xmm2, %xmm1
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE-NEXT:    pmullw %xmm1, %xmm0
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psrld $16, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    movd %xmm1, %eax
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    pmullw %xmm1, %xmm0
+; SSE-NEXT:    movd %xmm0, %eax
 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE-NEXT:    retq
 ;
@@ -1415,14 +1415,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE-LABEL: test_v64i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pmullw %xmm6, %xmm2
-; SSE-NEXT:    pmullw %xmm4, %xmm0
-; SSE-NEXT:    pmullw %xmm2, %xmm0
 ; SSE-NEXT:    pmullw %xmm7, %xmm3
-; SSE-NEXT:    pmullw %xmm5, %xmm1
-; SSE-NEXT:    pmullw %xmm3, %xmm1
-; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE-NEXT:    pmullw %xmm1, %xmm0
+; SSE-NEXT:    pmullw %xmm5, %xmm3
+; SSE-NEXT:    pmullw %xmm1, %xmm3
+; SSE-NEXT:    pmullw %xmm4, %xmm2
+; SSE-NEXT:    pmullw %xmm3, %xmm2
+; SSE-NEXT:    pmullw %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT:    pmullw %xmm2, %xmm0
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
@@ -1958,9 +1958,9 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pmullw %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
@@ -1992,30 +1992,30 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm3
 ; SSE41-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    pmullw %xmm0, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pmullw %xmm0, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm3, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm3, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2234,18 +2234,18 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-NEXT:    pmullw %xmm4, %xmm5
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; SSE2-NEXT:    movdqa %xmm0, %xmm6
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm6
-; SSE2-NEXT:    pmullw %xmm5, %xmm6
+; SSE2-NEXT:    pmullw %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm3, %xmm1
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pmullw %xmm6, %xmm0
+; SSE2-NEXT:    pmullw %xmm4, %xmm0
+; SSE2-NEXT:    pmullw %xmm5, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
@@ -2280,40 +2280,40 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-NEXT:    pmullw %xmm3, %xmm1
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm3, %xmm6
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT:    pshufb %xmm1, %xmm6
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm3, %xmm6
 ; SSE41-NEXT:    pmullw %xmm4, %xmm5
-; SSE41-NEXT:    pshufb %xmm1, %xmm5
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pshufb %xmm3, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm4, %xmm5
+; SSE41-NEXT:    pshufb %xmm3, %xmm5
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm3, %xmm4
-; SSE41-NEXT:    pshufb %xmm1, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pmullw %xmm0, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2611,40 +2611,40 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-NEXT:    movdqa %xmm2, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa %xmm4, %xmm8
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm10
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm8, %xmm10
 ; SSE2-NEXT:    pmullw %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    movdqa %xmm7, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm3, %xmm9
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm8, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm12 = xmm12[8],xmm0[8],xmm12[9],xmm0[9],xmm12[10],xmm0[10],xmm12[11],xmm0[11],xmm12[12],xmm0[12],xmm12[13],xmm0[13],xmm12[14],xmm0[14],xmm12[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm11, %xmm12
 ; SSE2-NEXT:    movdqa %xmm1, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm11, %xmm8
-; SSE2-NEXT:    pmullw %xmm9, %xmm8
+; SSE2-NEXT:    pmullw %xmm12, %xmm8
 ; SSE2-NEXT:    pmullw %xmm10, %xmm8
+; SSE2-NEXT:    pmullw %xmm9, %xmm8
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm6, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm0
-; SSE2-NEXT:    pmullw %xmm2, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm7, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm5
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm5, %xmm1
-; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pmullw %xmm0, %xmm1
+; SSE2-NEXT:    pmullw %xmm4, %xmm1
 ; SSE2-NEXT:    pmullw %xmm8, %xmm1
+; SSE2-NEXT:    pmullw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    pand %xmm0, %xmm2
@@ -2679,65 +2679,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-NEXT:    pmullw %xmm6, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    pmullw %xmm2, %xmm4
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm0
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm11 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm7, %xmm3
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT:    pmullw %xmm3, %xmm5
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm5, %xmm1
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pmullw %xmm7, %xmm2
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT:    pshufb %xmm0, %xmm2
-; SSE41-NEXT:    pmullw %xmm11, %xmm4
-; SSE41-NEXT:    pshufb %xmm0, %xmm4
-; SSE41-NEXT:    pmullw %xmm10, %xmm6
-; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmullw %xmm4, %xmm1
+; SSE41-NEXT:    pmullw %xmm7, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm5, %xmm3
+; SSE41-NEXT:    pmullw %xmm11, %xmm6
+; SSE41-NEXT:    pshufb %xmm5, %xmm6
+; SSE41-NEXT:    pmullw %xmm10, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmullw %xmm8, %xmm9
-; SSE41-NEXT:    pshufb %xmm0, %xmm9
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pshufb %xmm5, %xmm9
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmullw %xmm7, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm5, %xmm6
-; SSE41-NEXT:    pshufb %xmm0, %xmm6
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmullw %xmm6, %xmm3
+; SSE41-NEXT:    pshufb %xmm5, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    pmullw %xmm4, %xmm2
-; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm2, %xmm4
-; SSE41-NEXT:    pshufb %xmm0, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm1
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pmullw %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    pxor %xmm0, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pextrb $0, %xmm1, %eax
+; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 662415bf1d2c5..64c6265530c49 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -948,13 +948,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmulld %xmm3, %xmm1
-; SSE41-NEXT:    pmulld %xmm2, %xmm0
-; SSE41-NEXT:    pmulld %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmulld %xmm2, %xmm1
 ; SSE41-NEXT:    pmulld %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmulld %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -1045,14 +1045,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmulld %xmm6, %xmm2
-; SSE41-NEXT:    pmulld %xmm4, %xmm0
-; SSE41-NEXT:    pmulld %xmm2, %xmm0
 ; SSE41-NEXT:    pmulld %xmm7, %xmm3
-; SSE41-NEXT:    pmulld %xmm5, %xmm1
-; SSE41-NEXT:    pmulld %xmm3, %xmm1
-; SSE41-NEXT:    pmulld %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmulld %xmm1, %xmm0
+; SSE41-NEXT:    pmulld %xmm5, %xmm3
+; SSE41-NEXT:    pmulld %xmm1, %xmm3
+; SSE41-NEXT:    pmulld %xmm4, %xmm2
+; SSE41-NEXT:    pmulld %xmm3, %xmm2
+; SSE41-NEXT:    pmulld %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmulld %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmulld %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1302,16 +1302,16 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE-LABEL: test_v32i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pmullw %xmm3, %xmm1
-; SSE-NEXT:    pmullw %xmm2, %xmm0
-; SSE-NEXT:    pmullw %xmm1, %xmm0
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE-NEXT:    pmullw %xmm2, %xmm1
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE-NEXT:    pmullw %xmm1, %xmm0
-; SSE-NEXT:    movdqa %xmm0, %xmm1
-; SSE-NEXT:    psrld $16, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    movd %xmm1, %eax
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    pmullw %xmm1, %xmm0
+; SSE-NEXT:    movd %xmm0, %eax
 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE-NEXT:    retq
 ;
@@ -1422,14 +1422,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE-LABEL: test_v64i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pmullw %xmm6, %xmm2
-; SSE-NEXT:    pmullw %xmm4, %xmm0
-; SSE-NEXT:    pmullw %xmm2, %xmm0
 ; SSE-NEXT:    pmullw %xmm7, %xmm3
-; SSE-NEXT:    pmullw %xmm5, %xmm1
-; SSE-NEXT:    pmullw %xmm3, %xmm1
-; SSE-NEXT:    pmullw %xmm0, %xmm1
-; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE-NEXT:    pmullw %xmm1, %xmm0
+; SSE-NEXT:    pmullw %xmm5, %xmm3
+; SSE-NEXT:    pmullw %xmm1, %xmm3
+; SSE-NEXT:    pmullw %xmm4, %xmm2
+; SSE-NEXT:    pmullw %xmm3, %xmm2
+; SSE-NEXT:    pmullw %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE-NEXT:    pmullw %xmm2, %xmm0
 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE-NEXT:    pmullw %xmm0, %xmm1
 ; SSE-NEXT:    movdqa %xmm1, %xmm0
@@ -1922,9 +1922,9 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pmullw %xmm3, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
@@ -1956,30 +1956,30 @@ define i8 @test_v32i8(<32 x i8> %a0) {
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm3
 ; SSE41-NEXT:    pshufb {{.*#+}} xmm3 = xmm3[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    pmullw %xmm0, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pmullw %xmm0, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm3, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm3, %xmm2
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2196,18 +2196,18 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-NEXT:    pmullw %xmm4, %xmm5
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
-; SSE2-NEXT:    movdqa %xmm0, %xmm6
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm6
-; SSE2-NEXT:    pmullw %xmm5, %xmm6
+; SSE2-NEXT:    pmullw %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm3, %xmm1
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm1, %xmm2
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; SSE2-NEXT:    pmullw %xmm2, %xmm0
-; SSE2-NEXT:    pmullw %xmm1, %xmm0
-; SSE2-NEXT:    pmullw %xmm6, %xmm0
+; SSE2-NEXT:    pmullw %xmm4, %xmm0
+; SSE2-NEXT:    pmullw %xmm5, %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    pand %xmm1, %xmm2
@@ -2242,40 +2242,40 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-NEXT:    pmullw %xmm3, %xmm1
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
 ; SSE41-NEXT:    pmullw %xmm3, %xmm6
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT:    pshufb %xmm1, %xmm6
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm3, %xmm6
 ; SSE41-NEXT:    pmullw %xmm4, %xmm5
-; SSE41-NEXT:    pshufb %xmm1, %xmm5
-; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pshufb %xmm3, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
+; SSE41-NEXT:    pmullw %xmm4, %xmm5
+; SSE41-NEXT:    pshufb %xmm3, %xmm5
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm3, %xmm4
-; SSE41-NEXT:    pshufb %xmm1, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pmullw %xmm0, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pand %xmm2, %xmm1
-; SSE41-NEXT:    packuswb %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm2, %xmm3
+; SSE41-NEXT:    pand %xmm1, %xmm3
+; SSE41-NEXT:    packuswb %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
 ; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
+; SSE41-NEXT:    pmullw %xmm3, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2567,40 +2567,40 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-NEXT:    movdqa %xmm2, %xmm9
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa %xmm4, %xmm8
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm10
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm10 = xmm10[8],xmm0[8],xmm10[9],xmm0[9],xmm10[10],xmm0[10],xmm10[11],xmm0[11],xmm10[12],xmm0[12],xmm10[13],xmm0[13],xmm10[14],xmm0[14],xmm10[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm8, %xmm10
 ; SSE2-NEXT:    pmullw %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
 ; SSE2-NEXT:    movdqa %xmm7, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    movdqa %xmm3, %xmm9
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm8, %xmm9
-; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm11 = xmm11[8],xmm0[8],xmm11[9],xmm0[9],xmm11[10],xmm0[10],xmm11[11],xmm0[11],xmm11[12],xmm0[12],xmm11[13],xmm0[13],xmm11[14],xmm0[14],xmm11[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm8, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm12 = xmm12[8],xmm0[8],xmm12[9],xmm0[9],xmm12[10],xmm0[10],xmm12[11],xmm0[11],xmm12[12],xmm0[12],xmm12[13],xmm0[13],xmm12[14],xmm0[14],xmm12[15],xmm0[15]
+; SSE2-NEXT:    pmullw %xmm11, %xmm12
 ; SSE2-NEXT:    movdqa %xmm1, %xmm8
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm0[8],xmm8[9],xmm0[9],xmm8[10],xmm0[10],xmm8[11],xmm0[11],xmm8[12],xmm0[12],xmm8[13],xmm0[13],xmm8[14],xmm0[14],xmm8[15],xmm0[15]
-; SSE2-NEXT:    pmullw %xmm11, %xmm8
-; SSE2-NEXT:    pmullw %xmm9, %xmm8
+; SSE2-NEXT:    pmullw %xmm12, %xmm8
 ; SSE2-NEXT:    pmullw %xmm10, %xmm8
+; SSE2-NEXT:    pmullw %xmm9, %xmm8
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm6, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm2, %xmm4
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw %xmm4, %xmm0
-; SSE2-NEXT:    pmullw %xmm2, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm7, %xmm3
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT:    pmullw %xmm3, %xmm5
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    pmullw %xmm5, %xmm1
-; SSE2-NEXT:    pmullw %xmm3, %xmm1
-; SSE2-NEXT:    pmullw %xmm0, %xmm1
+; SSE2-NEXT:    pmullw %xmm4, %xmm1
 ; SSE2-NEXT:    pmullw %xmm8, %xmm1
+; SSE2-NEXT:    pmullw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    pand %xmm0, %xmm2
@@ -2635,65 +2635,65 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-NEXT:    pmullw %xmm6, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    pmullw %xmm2, %xmm4
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw %xmm4, %xmm0
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm11 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm0[8],xmm7[9],xmm0[9],xmm7[10],xmm0[10],xmm7[11],xmm0[11],xmm7[12],xmm0[12],xmm7[13],xmm0[13],xmm7[14],xmm0[14],xmm7[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm7, %xmm3
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT:    pmullw %xmm3, %xmm5
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
 ; SSE41-NEXT:    pmullw %xmm5, %xmm1
-; SSE41-NEXT:    pmullw %xmm3, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pmullw %xmm7, %xmm2
-; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT:    pshufb %xmm0, %xmm2
-; SSE41-NEXT:    pmullw %xmm11, %xmm4
-; SSE41-NEXT:    pshufb %xmm0, %xmm4
-; SSE41-NEXT:    pmullw %xmm10, %xmm6
-; SSE41-NEXT:    pshufb %xmm0, %xmm6
+; SSE41-NEXT:    pmullw %xmm4, %xmm1
+; SSE41-NEXT:    pmullw %xmm7, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm5, %xmm3
+; SSE41-NEXT:    pmullw %xmm11, %xmm6
+; SSE41-NEXT:    pshufb %xmm5, %xmm6
+; SSE41-NEXT:    pmullw %xmm10, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmullw %xmm8, %xmm9
-; SSE41-NEXT:    pshufb %xmm0, %xmm9
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm5 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pshufb %xmm5, %xmm9
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm7 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    pmullw %xmm7, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm5, %xmm6
-; SSE41-NEXT:    pshufb %xmm0, %xmm6
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
+; SSE41-NEXT:    pmullw %xmm6, %xmm3
+; SSE41-NEXT:    pshufb %xmm5, %xmm3
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    pmullw %xmm4, %xmm2
-; SSE41-NEXT:    pshufb %xmm0, %xmm2
+; SSE41-NEXT:    pmullw %xmm3, %xmm2
+; SSE41-NEXT:    pshufb %xmm5, %xmm2
 ; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
-; SSE41-NEXT:    pmullw %xmm2, %xmm4
-; SSE41-NEXT:    pshufb %xmm0, %xmm4
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
-; SSE41-NEXT:    pmullw %xmm1, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    pxor %xmm1, %xmm1
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pmullw %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    pxor %xmm0, %xmm0
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm4, %xmm2
+; SSE41-NEXT:    packuswb %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
 ; SSE41-NEXT:    pmullw %xmm2, %xmm0
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pmullw %xmm0, %xmm1
-; SSE41-NEXT:    pextrb $0, %xmm1, %eax
+; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
 ;

From fe699c32a2081bde203e99593ed906fce651b3dd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 00:43:02 +0000
Subject: [PATCH 0828/1176] [X86] Simplify the CHECK lines in
 vector-reduce-and/or/xor-widen.ll in similar way to r362308.

Forgot to do the widen forms when I was doing the others.

llvm-svn: 362310
---
 .../CodeGen/X86/vector-reduce-and-widen.ll    | 101 +-----------------
 .../CodeGen/X86/vector-reduce-or-widen.ll     | 101 +-----------------
 .../CodeGen/X86/vector-reduce-xor-widen.ll    | 101 +-----------------
 3 files changed, 6 insertions(+), 297 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
index a00f81cc63d17..16c45da9c338b 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-widen.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -434,14 +411,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -467,16 +436,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -506,18 +465,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -762,14 +709,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -808,16 +747,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -862,18 +791,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -924,20 +841,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
index ef060acfb6782..b5abc094dfdc9 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-widen.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -434,14 +411,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -467,16 +436,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -506,18 +465,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -762,14 +709,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -808,16 +747,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -862,18 +791,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -924,20 +841,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
index 62bbba55f5ca1..7486a190f9f86 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-widen.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
-; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512
 
 ;
 ; vXi64
@@ -24,13 +24,6 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i64:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovq %xmm0, %rax
-; AVX512-NEXT:    retq
   %1 = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %a0)
   ret i64 %1
 }
@@ -200,13 +193,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -229,15 +215,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    retq
   %1 = call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -434,14 +411,6 @@ define i16 @test_v2i16(<2 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v2i16(<2 x i16> %a0)
   ret i16 %1
 }
@@ -467,16 +436,6 @@ define i16 @test_v4i16(<4 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v4i16(<4 x i16> %a0)
   ret i16 %1
 }
@@ -506,18 +465,6 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i16:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
-; AVX512-NEXT:    retq
   %1 = call i16 @llvm.experimental.vector.reduce.xor.i16.v8i16(<8 x i16> %a0)
   ret i16 %1
 }
@@ -762,14 +709,6 @@ define i8 @test_v2i8(<2 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v2i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v2i8(<2 x i8> %a0)
   ret i8 %1
 }
@@ -808,16 +747,6 @@ define i8 @test_v4i8(<4 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v4i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v4i8(<4 x i8> %a0)
   ret i8 %1
 }
@@ -862,18 +791,6 @@ define i8 @test_v8i8(<8 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v8i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v8i8(<8 x i8> %a0)
   ret i8 %1
 }
@@ -924,20 +841,6 @@ define i8 @test_v16i8(<16 x i8> %a0) {
 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
 ; AVX-NEXT:    retq
-;
-; AVX512-LABEL: test_v16i8:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
-; AVX512-NEXT:    # kill: def $al killed $al killed $eax
-; AVX512-NEXT:    retq
   %1 = call i8 @llvm.experimental.vector.reduce.xor.i8.v16i8(<16 x i8> %a0)
   ret i8 %1
 }

From 737de4d363ede4b90dd5609af0494fb39af53865 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Sun, 2 Jun 2019 01:14:31 +0000
Subject: [PATCH 0829/1176] [libcxx] Use libtool when merging archives on Apple
 platforms

ar doesn't produce the correct results when used for linking static
archives on Apple platforms, so instead use libtool -static which is
the official way to build static archives on those platforms.

Differential Revision: https://reviews.llvm.org/D62770

llvm-svn: 362311
---
 libcxx/src/CMakeLists.txt      |  4 ++++
 libcxx/utils/merge_archives.py | 22 ++++++++++++++++++++--
 libcxxabi/src/CMakeLists.txt   |  5 +++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 4fe4db47d2add..2a8ff2c2d89b7 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -375,12 +375,16 @@ if (LIBCXX_ENABLE_STATIC)
       set(MERGE_ARCHIVES_ABI_TARGET
         "${CMAKE_STATIC_LIBRARY_PREFIX}${LIBCXX_CXX_STATIC_ABI_LIBRARY}${CMAKE_STATIC_LIBRARY_SUFFIX}")
     endif()
+    if (APPLE)
+      set(MERGE_ARCHIVES_LIBTOOL "--use-libtool" "--libtool" "${CMAKE_LIBTOOL}")
+    endif()
     add_custom_command(TARGET cxx_static POST_BUILD
     COMMAND
       ${PYTHON_EXECUTABLE} ${LIBCXX_SOURCE_DIR}/utils/merge_archives.py
     ARGS
       -o $<TARGET_LINKER_FILE:cxx_static>
       --ar "${CMAKE_AR}"
+      ${MERGE_ARCHIVES_LIBTOOL}
       "$<TARGET_LINKER_FILE:cxx_static>"
       "${MERGE_ARCHIVES_ABI_TARGET}"
       "${MERGE_ARCHIVES_SEARCH_PATHS}"
diff --git a/libcxx/utils/merge_archives.py b/libcxx/utils/merge_archives.py
index 75d7a54e47d85..5c04bc915a4ad 100755
--- a/libcxx/utils/merge_archives.py
+++ b/libcxx/utils/merge_archives.py
@@ -97,6 +97,12 @@ def main():
         '--ar', dest='ar_exe', required=False,
         help='The ar executable to use, finds \'ar\' in the path if not given',
         type=str, action='store')
+    parser.add_argument(
+        '--use-libtool', dest='use_libtool', action='store_true', default=False)
+    parser.add_argument(
+        '--libtool', dest='libtool_exe', required=False,
+        help='The libtool executable to use, finds \'libtool\' in the path if not given',
+        type=str, action='store')
     parser.add_argument(
         'archives', metavar='archives',  nargs='+',
         help='The archives to merge')
@@ -109,6 +115,13 @@ def main():
     if not ar_exe:
         print_and_exit("failed to find 'ar' executable")
 
+    if args.use_libtool:
+        libtool_exe = args.libtool_exe
+        if not libtool_exe:
+            libtool_exe = distutils.spawn.find_executable('libtool')
+        if not libtool_exe:
+            print_and_exit("failed to find 'libtool' executable")
+
     if len(args.archives) < 2:
         print_and_exit('fewer than 2 inputs provided')
     archives = [find_and_diagnose_missing(ar, args.search_paths)
@@ -127,8 +140,13 @@ def main():
         out = execute_command_verbose([ar_exe, 't', arc])
         files.extend(out.splitlines())
 
-    execute_command_verbose([ar_exe, 'rcs', args.output] + files,
-                            cwd=temp_directory_root, verbose=args.verbose)
+    if args.use_libtool:
+        files = [f for f in files if not f.startswith('__.SYMDEF')]
+        execute_command_verbose([libtool_exe, '-static', '-o', args.output] + files,
+                                cwd=temp_directory_root, verbose=args.verbose)
+    else:
+        execute_command_verbose([ar_exe, 'rcs', args.output] + files,
+                                cwd=temp_directory_root, verbose=args.verbose)
 
 
 if __name__ == '__main__':
diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index e4e1fac34bbcc..77f00987097f8 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -226,6 +226,10 @@ if (LIBCXXABI_ENABLE_STATIC)
     list(APPEND LIBCXXABI_INSTALL_TARGETS "cxxabi_static")
   endif()
 
+  if (APPLE)
+    set(MERGE_ARCHIVES_LIBTOOL "--use-libtool" "--libtool" "${CMAKE_LIBTOOL}")
+  endif()
+
   # Merge the the libc++abi.a and libunwind.a into one.
   if(LIBCXXABI_USE_LLVM_UNWINDER AND LIBCXXABI_STATICALLY_LINK_UNWINDER_IN_STATIC_LIBRARY)
     add_custom_command(TARGET cxxabi_static POST_BUILD
@@ -233,6 +237,7 @@ if (LIBCXXABI_ENABLE_STATIC)
       ARGS
         -o "$<TARGET_LINKER_FILE:cxxabi_static>"
         --ar "${CMAKE_AR}"
+        ${MERGE_ARCHIVES_LIBTOOL}
         "$<TARGET_LINKER_FILE:cxxabi_static>"
         "$<TARGET_LINKER_FILE:unwind_static>"
       WORKING_DIRECTORY ${LIBCXXABI_BUILD_DIR}

From 78c794a70bc89dca8d9fec986b42135feacb605b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 01:36:48 +0000
Subject: [PATCH 0830/1176] [X86] Fix several places that weren't passing what
 they though they were to MachineInstr::print

Over a year ago, MachineInstr gained a fourth boolean parameter that occurs
before the TII pointer. When this happened, several places started accidentally
passing TII into this boolean parameter instead of the TII parameter.

llvm-svn: 362312
---
 llvm/lib/CodeGen/LiveDebugValues.cpp        | 12 +++++++++---
 llvm/lib/CodeGen/MachineCombiner.cpp        |  6 ++++--
 llvm/lib/CodeGen/MachineInstr.cpp           |  2 +-
 llvm/unittests/CodeGen/MachineInstrTest.cpp |  3 ++-
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index 2ac3fe20fffb2..7f95e12186f35 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -457,7 +457,9 @@ void LiveDebugValues::insertTransferDebugPair(
     VarLoc VL(*NewDebugInstr, LS);
     ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
-               NewDebugInstr->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
     return;
   }
   case TransferKind::TransferSpill: {
@@ -474,7 +476,9 @@ void LiveDebugValues::insertTransferDebugPair(
               SpillLocation.SpillOffset, LS);
     ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
-               NewDebugInstr->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
     return;
   }
   case TransferKind::TransferRestore: {
@@ -488,7 +492,9 @@ void LiveDebugValues::insertTransferDebugPair(
     VarLoc VL(*NewDebugInstr, LS);
     ProcessVarLoc(VL, NewDebugInstr);
     LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
-               NewDebugInstr->print(dbgs(), false, false, false, TII));
+               NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+                                    /*SkipOpers*/false, /*SkipDebugLoc*/false,
+                                    /*AddNewLine*/true, TII));
     return;
   }
   }
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 218484715db63..0584ec0bd2b31 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -561,10 +561,12 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
         dbgs() << "\tFor the Pattern (" << (int)P
                << ") these instructions could be removed\n";
         for (auto const *InstrPtr : DelInstrs)
-          InstrPtr->print(dbgs(), false, false, false, TII);
+          InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+                          /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
         dbgs() << "\tThese instructions could replace the removed ones\n";
         for (auto const *InstrPtr : InsInstrs)
-          InstrPtr->print(dbgs(), false, false, false, TII);
+          InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+                          /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
       });
 
       bool SubstituteAlways = false;
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index be39a72614029..894d0abea3eb0 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1472,7 +1472,7 @@ void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
   ModuleSlotTracker MST(M);
   if (F)
     MST.incorporateFunction(*F);
-  print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);
+  print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII);
 }
 
 void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
diff --git a/llvm/unittests/CodeGen/MachineInstrTest.cpp b/llvm/unittests/CodeGen/MachineInstrTest.cpp
index bfdd940bf376d..21b5eb6f4e14a 100644
--- a/llvm/unittests/CodeGen/MachineInstrTest.cpp
+++ b/llvm/unittests/CodeGen/MachineInstrTest.cpp
@@ -265,7 +265,8 @@ TEST(MachineInstrPrintingTest, DebugLocPrinting) {
 
   std::string str;
   raw_string_ostream OS(str);
-  MI->print(OS);
+  MI->print(OS, /*IsStandalone*/true, /*SkipOpers*/false, /*SkipDebugLoc*/false,
+            /*AddNewLine*/false);
   ASSERT_TRUE(
       StringRef(OS.str()).startswith("$noreg = UNKNOWN debug-location "));
   ASSERT_TRUE(

From 84254dd8abb22c028191484371aab378ecf3cfd9 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Sun, 2 Jun 2019 02:05:01 +0000
Subject: [PATCH 0831/1176] [CMake] Use libtool for runtimes when building for
 Apple platform

LLVM CMake build already uses libtool instead of ar when building
for Apple platform and we should be using the same when building
runtimes. To do so, this change extracts the logic for finding
libtool into a separate file and then uses it from both the LLVM
build as well as the LLVM runtimes build.

Differential Revision: https://reviews.llvm.org/D62769

llvm-svn: 362313
---
 llvm/CMakeLists.txt                 | 60 +++--------------------------
 llvm/cmake/modules/UseLibtool.cmake | 50 ++++++++++++++++++++++++
 llvm/runtimes/CMakeLists.txt        |  5 +++
 3 files changed, 60 insertions(+), 55 deletions(-)
 create mode 100644 llvm/cmake/modules/UseLibtool.cmake

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 895f9ab7189d1..a9addfc5a31da 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -49,61 +49,6 @@ if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type (default Debug)" FORCE)
 endif()
 
-# This should only apply if you are both on an Apple host, and targeting Apple.
-if(CMAKE_HOST_APPLE AND APPLE)
-  # if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
-  if(NOT CMAKE_LIBTOOL)
-    if(NOT CMAKE_XCRUN)
-      find_program(CMAKE_XCRUN NAMES xcrun)
-    endif()
-    if(CMAKE_XCRUN)
-      execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
-        OUTPUT_VARIABLE CMAKE_LIBTOOL
-        OUTPUT_STRIP_TRAILING_WHITESPACE)
-    endif()
-
-    if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
-      find_program(CMAKE_LIBTOOL NAMES libtool)
-    endif()
-  endif()
-
-  get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-  if(CMAKE_LIBTOOL)
-    set(CMAKE_LIBTOOL ${CMAKE_LIBTOOL} CACHE PATH "libtool executable")
-    message(STATUS "Found libtool - ${CMAKE_LIBTOOL}")
-
-    execute_process(COMMAND ${CMAKE_LIBTOOL} -V
-      OUTPUT_VARIABLE LIBTOOL_V_OUTPUT
-      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if("${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*")
-      string(REGEX REPLACE ".*cctools-([0-9.]+).*" "\\1" LIBTOOL_VERSION
-        ${LIBTOOL_V_OUTPUT})
-      if(NOT LIBTOOL_VERSION VERSION_LESS "862")
-        set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols")
-      endif()
-    endif()
-
-    foreach(lang ${languages})
-      set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
-        "\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> \
-        <LINK_FLAGS> <OBJECTS> ")
-    endforeach()
-  endif()
-
-  # If DYLD_LIBRARY_PATH is set we need to set it on archiver commands
-  if(DYLD_LIBRARY_PATH)
-    set(dyld_envar "DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}")
-    foreach(lang ${languages})
-      foreach(cmd ${CMAKE_${lang}_CREATE_STATIC_LIBRARY})
-        list(APPEND CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW
-             "${dyld_envar} ${cmd}")
-      endforeach()
-      set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
-        ${CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW})
-    endforeach()
-  endif()
-endif()
-
 # Side-by-side subprojects layout: automatically set the
 # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS
 # This allows an easy way of setting up a build directory for llvm and another
@@ -648,6 +593,11 @@ if (LLVM_BUILD_STATIC)
   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
 endif()
 
+# Use libtool instead of ar if you are both on an Apple host, and targeting Apple.
+if(CMAKE_HOST_APPLE AND APPLE)
+  include(UseLibtool)
+endif()
+
 # Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV.
 set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.")
 mark_as_advanced(LLVM_TARGET_TRIPLE_ENV)
diff --git a/llvm/cmake/modules/UseLibtool.cmake b/llvm/cmake/modules/UseLibtool.cmake
new file mode 100644
index 0000000000000..38d197d4846fd
--- /dev/null
+++ b/llvm/cmake/modules/UseLibtool.cmake
@@ -0,0 +1,50 @@
+# if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
+if(NOT CMAKE_LIBTOOL)
+  if(NOT CMAKE_XCRUN)
+    find_program(CMAKE_XCRUN NAMES xcrun)
+  endif()
+  if(CMAKE_XCRUN)
+    execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
+      OUTPUT_VARIABLE CMAKE_LIBTOOL
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+  endif()
+
+  if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
+    find_program(CMAKE_LIBTOOL NAMES libtool)
+  endif()
+endif()
+
+get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
+if(CMAKE_LIBTOOL)
+  set(CMAKE_LIBTOOL ${CMAKE_LIBTOOL} CACHE PATH "libtool executable")
+  message(STATUS "Found libtool - ${CMAKE_LIBTOOL}")
+
+  execute_process(COMMAND ${CMAKE_LIBTOOL} -V
+    OUTPUT_VARIABLE LIBTOOL_V_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if("${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*")
+    string(REGEX REPLACE ".*cctools-([0-9.]+).*" "\\1" LIBTOOL_VERSION
+      ${LIBTOOL_V_OUTPUT})
+    if(NOT LIBTOOL_VERSION VERSION_LESS "862")
+      set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols")
+    endif()
+  endif()
+
+  foreach(lang ${languages})
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
+      "\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> <LINK_FLAGS> <OBJECTS>")
+  endforeach()
+endif()
+
+# If DYLD_LIBRARY_PATH is set we need to set it on archiver commands
+if(DYLD_LIBRARY_PATH)
+  set(dyld_envar "DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}")
+  foreach(lang ${languages})
+    foreach(cmd ${CMAKE_${lang}_CREATE_STATIC_LIBRARY})
+      list(APPEND CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW
+           "${dyld_envar} ${cmd}")
+    endforeach()
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
+      ${CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW})
+  endforeach()
+endif()
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 6ac92cc6703cc..218c5a8283369 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -117,6 +117,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR})
   # Remove the -nostdlib++ option we've added earlier.
   string(REPLACE "-nostdlib++" "" CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
 
+  # Use libtool instead of ar if you are both on an Apple host, and targeting Apple.
+  if(CMAKE_HOST_APPLE AND APPLE)
+    include(UseLibtool)
+  endif()
+
   # This can be used to detect whether we're in the runtimes build.
   set(RUNTIMES_BUILD ON)
 

From 0a6bababa830e325014eb5ebde1b39050afcb2a3 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 2 Jun 2019 02:43:38 +0000
Subject: [PATCH 0832/1176] [ELF][MIPS] Delete dead !Sym->isDefined() check in
 addAbsolute()

llvm-svn: 362314
---
 lld/ELF/Writer.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 975fddf618bd1..bc3abf7837335 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -188,8 +188,6 @@ static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec,
 static Defined *addAbsolute(StringRef Name) {
   Symbol *Sym = Symtab->addSymbol(Defined{nullptr, Name, STB_GLOBAL, STV_HIDDEN,
                                           STT_NOTYPE, 0, 0, nullptr});
-  if (!Sym->isDefined())
-    error("duplicate symbol: " + toString(*Sym));
   return cast<Defined>(Sym);
 }
 

From f58ef87bb789bece7cba04925756814b903b3740 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 03:31:01 +0000
Subject: [PATCH 0833/1176] [DAGCombiner] Replace two unchecked dyn_casts with
 casts.

The results of the dyn_casts were immediately dereferenced on the next line
so they had better not be null.

I don't think there's any way for these dyn_casts to fail, so use a cast
of adding null check.

llvm-svn: 362315
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 773e0281b173b..4ed17440abc03 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8068,7 +8068,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
-  MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+  MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
   SDValue Data  = MST->getValue();
   EVT VT = Data.getValueType();
@@ -8219,7 +8219,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   if (Level >= AfterLegalizeTypes)
     return SDValue();
 
-  MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+  MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
   SDValue Mask = MLD->getMask();
   SDLoc DL(N);
 

From 2e2c02c0cdd45a59faf81bd5c87688b21a5a0d9a Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Sun, 2 Jun 2019 04:00:38 +0000
Subject: [PATCH 0834/1176] Add script to update OpenMP -ast-dump test
 expectations, and use it to regenerate the test expectations.

(Only two tests change, as a result of no longer matching the 0x in a
pointer; the other tests were already excluding that.)

llvm-svn: 362316
---
 clang/test/AST/ast-dump-openmp-for.c      | 390 +++++++++++-----------
 clang/test/AST/ast-dump-openmp-parallel.c |  20 +-
 clang/utils/make-ast-dump-check.sh        |  79 +++++
 3 files changed, 284 insertions(+), 205 deletions(-)
 create mode 100755 clang/utils/make-ast-dump-check.sh

diff --git a/clang/test/AST/ast-dump-openmp-for.c b/clang/test/AST/ast-dump-openmp-for.c
index 557424d1926e3..a19a3e4951f10 100644
--- a/clang/test/AST/ast-dump-openmp-for.c
+++ b/clang/test/AST/ast-dump-openmp-for.c
@@ -35,208 +35,208 @@ void test_five(int x, int y, int z) {
         ;
 }
 
-// CHECK: TranslationUnitDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK: |-FunctionDecl 0x{{.*}} <{{.*}}ast-dump-openmp-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:15, col:19> col:19 used x 'int'
-// CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:22, line:7:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:4:1, col:16>
-// CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:5:3, line:6:5>
-// CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:5:3, line:6:5>
-// CHECK-NEXT: |       | | |-DeclStmt 0x{{.*}} <line:5:8, col:17>
-// CHECK-NEXT: |       | | | `-VarDecl 0x{{.*}} <col:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
+// CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
+// CHECK-NEXT: | `-CompoundStmt {{.*}} <col:22, line:7:1>
+// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:4:1, col:16>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:5:3, line:6:5>
+// CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK-NEXT: |       | |-ForStmt {{.*}} <line:5:3, line:6:5>
+// CHECK-NEXT: |       | | |-DeclStmt {{.*}} <line:5:8, col:17>
+// CHECK-NEXT: |       | | | `-VarDecl {{.*}} <col:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |-<<<NULL>>>
-// CHECK-NEXT: |       | | |-BinaryOperator 0x{{.*}} <col:19, col:23> 'int' '<'
-// CHECK-NEXT: |       | | | |-ImplicitCastExpr 0x{{.*}} <col:19> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | | | `-DeclRefExpr 0x{{.*}} <col:19> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | | `-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | |   `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
-// CHECK-NEXT: |       | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | `-NullStmt 0x{{.*}} <line:6:5> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:4:1) *const restrict'
-// CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
-// CHECK-NEXT: |       `-DeclRefExpr 0x{{.*}} <col:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |-FunctionDecl 0x{{.*}} <line:9:1, line:14:1> line:9:6 test_two 'void (int, int)'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:15, col:19> col:19 used x 'int'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:22, col:26> col:26 used y 'int'
-// CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:29, line:14:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:10:1, col:16>
-// CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:11:3, line:13:7>
-// CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:11:3, line:13:7>
-// CHECK-NEXT: |       | | |-DeclStmt 0x{{.*}} <line:11:8, col:17>
-// CHECK-NEXT: |       | | | `-VarDecl 0x{{.*}} <col:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | | |-BinaryOperator {{.*}} <col:19, col:23> 'int' '<'
+// CHECK-NEXT: |       | | | |-ImplicitCastExpr {{.*}} <col:19> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:19> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | | `-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
+// CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | `-NullStmt {{.*}} <line:6:5> openmp_structured_block
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:4:1) *const restrict'
+// CHECK-NEXT: |       | `-VarDecl {{.*}} <line:5:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       `-DeclRefExpr {{.*}} <col:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |-FunctionDecl {{.*}} <line:9:1, line:14:1> line:9:6 test_two 'void (int, int)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:15, col:19> col:19 used x 'int'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:22, col:26> col:26 used y 'int'
+// CHECK-NEXT: | `-CompoundStmt {{.*}} <col:29, line:14:1>
+// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:10:1, col:16>
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:11:3, line:13:7>
+// CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK-NEXT: |       | |-ForStmt {{.*}} <line:11:3, line:13:7>
+// CHECK-NEXT: |       | | |-DeclStmt {{.*}} <line:11:8, col:17>
+// CHECK-NEXT: |       | | | `-VarDecl {{.*}} <col:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |-<<<NULL>>>
-// CHECK-NEXT: |       | | |-BinaryOperator 0x{{.*}} <col:19, col:23> 'int' '<'
-// CHECK-NEXT: |       | | | |-ImplicitCastExpr 0x{{.*}} <col:19> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | | | `-DeclRefExpr 0x{{.*}} <col:19> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | | `-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | |   `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
-// CHECK-NEXT: |       | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | `-ForStmt 0x{{.*}} <line:12:5, line:13:7> openmp_structured_block
-// CHECK-NEXT: |       | |   |-DeclStmt 0x{{.*}} <line:12:10, col:19>
-// CHECK-NEXT: |       | |   | `-VarDecl 0x{{.*}} <col:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       | |   |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       | | |-BinaryOperator {{.*}} <col:19, col:23> 'int' '<'
+// CHECK-NEXT: |       | | | |-ImplicitCastExpr {{.*}} <col:19> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:19> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | | `-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
+// CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | `-ForStmt {{.*}} <line:12:5, line:13:7> openmp_structured_block
+// CHECK-NEXT: |       | |   |-DeclStmt {{.*}} <line:12:10, col:19>
+// CHECK-NEXT: |       | |   | `-VarDecl {{.*}} <col:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | |   |-<<<NULL>>>
-// CHECK-NEXT: |       | |   |-BinaryOperator 0x{{.*}} <col:21, col:25> 'int' '<'
-// CHECK-NEXT: |       | |   | |-ImplicitCastExpr 0x{{.*}} <col:21> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   | | `-DeclRefExpr 0x{{.*}} <col:21> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   | `-ImplicitCastExpr 0x{{.*}} <col:25> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   |   `-DeclRefExpr 0x{{.*}} <col:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
-// CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:13:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:10:1) *const restrict'
-// CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
-// CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
-// CHECK-NEXT: |       |-DeclRefExpr 0x{{.*}} <line:11:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       `-DeclRefExpr 0x{{.*}} <line:12:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: |-FunctionDecl 0x{{.*}} <line:16:1, line:21:1> line:16:6 test_three 'void (int, int)'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:17, col:21> col:21 used x 'int'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:24, col:28> col:28 used y 'int'
-// CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:31, line:21:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:17:1, col:28>
-// CHECK-NEXT: |     |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
-// CHECK-NEXT: |     | `-ConstantExpr 0x{{.*}} <col:26> 'int'
-// CHECK-NEXT: |     |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 1
-// CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:18:3, line:20:7>
-// CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:18:3, line:20:7>
-// CHECK-NEXT: |       | | |-DeclStmt 0x{{.*}} <line:18:8, col:17>
-// CHECK-NEXT: |       | | | `-VarDecl 0x{{.*}} <col:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | |   |-BinaryOperator {{.*}} <col:21, col:25> 'int' '<'
+// CHECK-NEXT: |       | |   | |-ImplicitCastExpr {{.*}} <col:21> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   | | `-DeclRefExpr {{.*}} <col:21> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   | `-ImplicitCastExpr {{.*}} <col:25> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   |   `-DeclRefExpr {{.*}} <col:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
+// CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:13:7>
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:10:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:10:1) *const restrict'
+// CHECK-NEXT: |       | |-VarDecl {{.*}} <line:11:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | `-VarDecl {{.*}} <line:12:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       |-DeclRefExpr {{.*}} <line:11:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       `-DeclRefExpr {{.*}} <line:12:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: |-FunctionDecl {{.*}} <line:16:1, line:21:1> line:16:6 test_three 'void (int, int)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:17, col:21> col:21 used x 'int'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:24, col:28> col:28 used y 'int'
+// CHECK-NEXT: | `-CompoundStmt {{.*}} <col:31, line:21:1>
+// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:17:1, col:28>
+// CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:17, col:27>
+// CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:26> 'int'
+// CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:26> 'int' 1
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:18:3, line:20:7>
+// CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK-NEXT: |       | |-ForStmt {{.*}} <line:18:3, line:20:7>
+// CHECK-NEXT: |       | | |-DeclStmt {{.*}} <line:18:8, col:17>
+// CHECK-NEXT: |       | | | `-VarDecl {{.*}} <col:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |-<<<NULL>>>
-// CHECK-NEXT: |       | | |-BinaryOperator 0x{{.*}} <col:19, col:23> 'int' '<'
-// CHECK-NEXT: |       | | | |-ImplicitCastExpr 0x{{.*}} <col:19> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | | | `-DeclRefExpr 0x{{.*}} <col:19> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | | `-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | |   `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
-// CHECK-NEXT: |       | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | `-ForStmt 0x{{.*}} <line:19:5, line:20:7> openmp_structured_block
-// CHECK-NEXT: |       | |   |-DeclStmt 0x{{.*}} <line:19:10, col:19>
-// CHECK-NEXT: |       | |   | `-VarDecl 0x{{.*}} <col:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       | |   |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       | | |-BinaryOperator {{.*}} <col:19, col:23> 'int' '<'
+// CHECK-NEXT: |       | | | |-ImplicitCastExpr {{.*}} <col:19> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:19> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | | `-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
+// CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | `-ForStmt {{.*}} <line:19:5, line:20:7> openmp_structured_block
+// CHECK-NEXT: |       | |   |-DeclStmt {{.*}} <line:19:10, col:19>
+// CHECK-NEXT: |       | |   | `-VarDecl {{.*}} <col:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | |   |-<<<NULL>>>
-// CHECK-NEXT: |       | |   |-BinaryOperator 0x{{.*}} <col:21, col:25> 'int' '<'
-// CHECK-NEXT: |       | |   | |-ImplicitCastExpr 0x{{.*}} <col:21> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   | | `-DeclRefExpr 0x{{.*}} <col:21> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   | `-ImplicitCastExpr 0x{{.*}} <col:25> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   |   `-DeclRefExpr 0x{{.*}} <col:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
-// CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:20:7>
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:17:1) *const restrict'
-// CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
-// CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
-// CHECK-NEXT: |       |-DeclRefExpr 0x{{.*}} <line:18:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       `-DeclRefExpr 0x{{.*}} <line:19:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: |-FunctionDecl 0x{{.*}} <line:23:1, line:28:1> line:23:6 test_four 'void (int, int)'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:16, col:20> col:20 used x 'int'
-// CHECK-NEXT: | |-ParmVarDecl 0x{{.*}} <col:23, col:27> col:27 used y 'int'
-// CHECK-NEXT: | `-CompoundStmt 0x{{.*}} <col:30, line:28:1>
-// CHECK-NEXT: |   `-OMPForDirective 0x{{.*}} <line:24:1, col:28>
-// CHECK-NEXT: |     |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
-// CHECK-NEXT: |     | `-ConstantExpr 0x{{.*}} <col:26> 'int'
-// CHECK-NEXT: |     |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 2
-// CHECK-NEXT: |     `-CapturedStmt 0x{{.*}} <line:25:3, line:27:7>
-// CHECK-NEXT: |       |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT: |       | |-ForStmt 0x{{.*}} <line:25:3, line:27:7>
-// CHECK-NEXT: |       | | |-DeclStmt 0x{{.*}} <line:25:8, col:17>
-// CHECK-NEXT: |       | | | `-VarDecl 0x{{.*}} <col:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | |   |-BinaryOperator {{.*}} <col:21, col:25> 'int' '<'
+// CHECK-NEXT: |       | |   | |-ImplicitCastExpr {{.*}} <col:21> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   | | `-DeclRefExpr {{.*}} <col:21> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   | `-ImplicitCastExpr {{.*}} <col:25> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   |   `-DeclRefExpr {{.*}} <col:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
+// CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:20:7>
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:17:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:17:1) *const restrict'
+// CHECK-NEXT: |       | |-VarDecl {{.*}} <line:18:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | `-VarDecl {{.*}} <line:19:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       |-DeclRefExpr {{.*}} <line:18:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       `-DeclRefExpr {{.*}} <line:19:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: |-FunctionDecl {{.*}} <line:23:1, line:28:1> line:23:6 test_four 'void (int, int)'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
+// CHECK-NEXT: | |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
+// CHECK-NEXT: | `-CompoundStmt {{.*}} <col:30, line:28:1>
+// CHECK-NEXT: |   `-OMPForDirective {{.*}} <line:24:1, col:28>
+// CHECK-NEXT: |     |-OMPCollapseClause {{.*}} <col:17, col:27>
+// CHECK-NEXT: |     | `-ConstantExpr {{.*}} <col:26> 'int'
+// CHECK-NEXT: |     |   `-IntegerLiteral {{.*}} <col:26> 'int' 2
+// CHECK-NEXT: |     `-CapturedStmt {{.*}} <line:25:3, line:27:7>
+// CHECK-NEXT: |       |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK-NEXT: |       | |-ForStmt {{.*}} <line:25:3, line:27:7>
+// CHECK-NEXT: |       | | |-DeclStmt {{.*}} <line:25:8, col:17>
+// CHECK-NEXT: |       | | | `-VarDecl {{.*}} <col:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT: |       | | |-<<<NULL>>>
-// CHECK-NEXT: |       | | |-BinaryOperator 0x{{.*}} <col:19, col:23> 'int' '<'
-// CHECK-NEXT: |       | | | |-ImplicitCastExpr 0x{{.*}} <col:19> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | | | `-DeclRefExpr 0x{{.*}} <col:19> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | | `-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | | |   `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
-// CHECK-NEXT: |       | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | | `-ForStmt 0x{{.*}} <line:26:5, line:27:7>
-// CHECK-NEXT: |       | |   |-DeclStmt 0x{{.*}} <line:26:10, col:19>
-// CHECK-NEXT: |       | |   | `-VarDecl 0x{{.*}} <col:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       | |   |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       | | |-BinaryOperator {{.*}} <col:19, col:23> 'int' '<'
+// CHECK-NEXT: |       | | | |-ImplicitCastExpr {{.*}} <col:19> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | | | `-DeclRefExpr {{.*}} <col:19> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | | `-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
+// CHECK-NEXT: |       | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | | `-ForStmt {{.*}} <line:26:5, line:27:7>
+// CHECK-NEXT: |       | |   |-DeclStmt {{.*}} <line:26:10, col:19>
+// CHECK-NEXT: |       | |   | `-VarDecl {{.*}} <col:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT: |       | |   |-<<<NULL>>>
-// CHECK-NEXT: |       | |   |-BinaryOperator 0x{{.*}} <col:21, col:25> 'int' '<'
-// CHECK-NEXT: |       | |   | |-ImplicitCastExpr 0x{{.*}} <col:21> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   | | `-DeclRefExpr 0x{{.*}} <col:21> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   | `-ImplicitCastExpr 0x{{.*}} <col:25> 'int' <LValueToRValue>
-// CHECK-NEXT: |       | |   |   `-DeclRefExpr 0x{{.*}} <col:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: |       | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
-// CHECK-NEXT: |       | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT: |       | |   `-NullStmt 0x{{.*}} <line:27:7> openmp_structured_block
-// CHECK-NEXT: |       | |-ImplicitParamDecl 0x{{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:24:1) *const restrict'
-// CHECK-NEXT: |       | |-VarDecl 0x{{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT: |       | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
-// CHECK-NEXT: |       | `-VarDecl 0x{{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT: |       |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
-// CHECK-NEXT: |       |-DeclRefExpr 0x{{.*}} <line:25:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT: |       `-DeclRefExpr 0x{{.*}} <line:26:5> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT: `-FunctionDecl 0x{{.*}} <line:30:1, line:36:1> line:30:6 test_five 'void (int, int, int)'
-// CHECK-NEXT:   |-ParmVarDecl 0x{{.*}} <col:16, col:20> col:20 used x 'int'
-// CHECK-NEXT:   |-ParmVarDecl 0x{{.*}} <col:23, col:27> col:27 used y 'int'
-// CHECK-NEXT:   |-ParmVarDecl 0x{{.*}} <col:30, col:34> col:34 used z 'int'
-// CHECK-NEXT:   `-CompoundStmt 0x{{.*}} <col:37, line:36:1>
-// CHECK-NEXT:     `-OMPForDirective 0x{{.*}} <line:31:1, col:28>
-// CHECK-NEXT:       |-OMPCollapseClause 0x{{.*}} <col:17, col:27>
-// CHECK-NEXT:       | `-ConstantExpr 0x{{.*}} <col:26> 'int'
-// CHECK-NEXT:       |   `-IntegerLiteral 0x{{.*}} <col:26> 'int' 2
-// CHECK-NEXT:       `-CapturedStmt 0x{{.*}} <line:32:3, line:35:9>
-// CHECK-NEXT:         |-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK-NEXT:         | |-ForStmt 0x{{.*}} <line:32:3, line:35:9>
-// CHECK-NEXT:         | | |-DeclStmt 0x{{.*}} <line:32:8, col:17>
-// CHECK-NEXT:         | | | `-VarDecl 0x{{.*}} <col:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT:         | | |   `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | |   |-BinaryOperator {{.*}} <col:21, col:25> 'int' '<'
+// CHECK-NEXT: |       | |   | |-ImplicitCastExpr {{.*}} <col:21> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   | | `-DeclRefExpr {{.*}} <col:21> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   | `-ImplicitCastExpr {{.*}} <col:25> 'int' <LValueToRValue>
+// CHECK-NEXT: |       | |   |   `-DeclRefExpr {{.*}} <col:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: |       | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
+// CHECK-NEXT: |       | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT: |       | |   `-NullStmt {{.*}} <line:27:7> openmp_structured_block
+// CHECK-NEXT: |       | |-ImplicitParamDecl {{.*}} <line:24:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:24:1) *const restrict'
+// CHECK-NEXT: |       | |-VarDecl {{.*}} <line:25:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT: |       | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
+// CHECK-NEXT: |       | `-VarDecl {{.*}} <line:26:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT: |       |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
+// CHECK-NEXT: |       |-DeclRefExpr {{.*}} <line:25:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT: |       `-DeclRefExpr {{.*}} <line:26:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT: `-FunctionDecl {{.*}} <line:30:1, line:36:1> line:30:6 test_five 'void (int, int, int)'
+// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:16, col:20> col:20 used x 'int'
+// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:23, col:27> col:27 used y 'int'
+// CHECK-NEXT:   |-ParmVarDecl {{.*}} <col:30, col:34> col:34 used z 'int'
+// CHECK-NEXT:   `-CompoundStmt {{.*}} <col:37, line:36:1>
+// CHECK-NEXT:     `-OMPForDirective {{.*}} <line:31:1, col:28>
+// CHECK-NEXT:       |-OMPCollapseClause {{.*}} <col:17, col:27>
+// CHECK-NEXT:       | `-ConstantExpr {{.*}} <col:26> 'int'
+// CHECK-NEXT:       |   `-IntegerLiteral {{.*}} <col:26> 'int' 2
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:32:3, line:35:9>
+// CHECK-NEXT:         |-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK-NEXT:         | |-ForStmt {{.*}} <line:32:3, line:35:9>
+// CHECK-NEXT:         | | |-DeclStmt {{.*}} <line:32:8, col:17>
+// CHECK-NEXT:         | | | `-VarDecl {{.*}} <col:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT:         | | |   `-IntegerLiteral {{.*}} <col:16> 'int' 0
 // CHECK-NEXT:         | | |-<<<NULL>>>
-// CHECK-NEXT:         | | |-BinaryOperator 0x{{.*}} <col:19, col:23> 'int' '<'
-// CHECK-NEXT:         | | | |-ImplicitCastExpr 0x{{.*}} <col:19> 'int' <LValueToRValue>
-// CHECK-NEXT:         | | | | `-DeclRefExpr 0x{{.*}} <col:19> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | | | `-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT:         | | |   `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT:         | | |-UnaryOperator 0x{{.*}} <col:26, col:27> 'int' postfix '++'
-// CHECK-NEXT:         | | | `-DeclRefExpr 0x{{.*}} <col:26> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | | `-ForStmt 0x{{.*}} <line:33:5, line:35:9>
-// CHECK-NEXT:         | |   |-DeclStmt 0x{{.*}} <line:33:10, col:19>
-// CHECK-NEXT:         | |   | `-VarDecl 0x{{.*}} <col:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT:         | |   |   `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
+// CHECK-NEXT:         | | |-BinaryOperator {{.*}} <col:19, col:23> 'int' '<'
+// CHECK-NEXT:         | | | |-ImplicitCastExpr {{.*}} <col:19> 'int' <LValueToRValue>
+// CHECK-NEXT:         | | | | `-DeclRefExpr {{.*}} <col:19> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | | | `-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT:         | | |   `-DeclRefExpr {{.*}} <col:23> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT:         | | |-UnaryOperator {{.*}} <col:26, col:27> 'int' postfix '++'
+// CHECK-NEXT:         | | | `-DeclRefExpr {{.*}} <col:26> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | | `-ForStmt {{.*}} <line:33:5, line:35:9>
+// CHECK-NEXT:         | |   |-DeclStmt {{.*}} <line:33:10, col:19>
+// CHECK-NEXT:         | |   | `-VarDecl {{.*}} <col:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT:         | |   |   `-IntegerLiteral {{.*}} <col:18> 'int' 0
 // CHECK-NEXT:         | |   |-<<<NULL>>>
-// CHECK-NEXT:         | |   |-BinaryOperator 0x{{.*}} <col:21, col:25> 'int' '<'
-// CHECK-NEXT:         | |   | |-ImplicitCastExpr 0x{{.*}} <col:21> 'int' <LValueToRValue>
-// CHECK-NEXT:         | |   | | `-DeclRefExpr 0x{{.*}} <col:21> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | |   | `-ImplicitCastExpr 0x{{.*}} <col:25> 'int' <LValueToRValue>
-// CHECK-NEXT:         | |   |   `-DeclRefExpr 0x{{.*}} <col:25> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT:         | |   |-UnaryOperator 0x{{.*}} <col:28, col:29> 'int' postfix '++'
-// CHECK-NEXT:         | |   | `-DeclRefExpr 0x{{.*}} <col:28> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | |   `-ForStmt 0x{{.*}} <line:34:7, line:35:9> openmp_structured_block
-// CHECK-NEXT:         | |     |-DeclStmt 0x{{.*}} <line:34:12, col:21>
-// CHECK-NEXT:         | |     | `-VarDecl 0x{{.*}} <col:12, col:20> col:16 used i 'int' cinit
-// CHECK-NEXT:         | |     |   `-IntegerLiteral 0x{{.*}} <col:20> 'int' 0
+// CHECK-NEXT:         | |   |-BinaryOperator {{.*}} <col:21, col:25> 'int' '<'
+// CHECK-NEXT:         | |   | |-ImplicitCastExpr {{.*}} <col:21> 'int' <LValueToRValue>
+// CHECK-NEXT:         | |   | | `-DeclRefExpr {{.*}} <col:21> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | |   | `-ImplicitCastExpr {{.*}} <col:25> 'int' <LValueToRValue>
+// CHECK-NEXT:         | |   |   `-DeclRefExpr {{.*}} <col:25> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT:         | |   |-UnaryOperator {{.*}} <col:28, col:29> 'int' postfix '++'
+// CHECK-NEXT:         | |   | `-DeclRefExpr {{.*}} <col:28> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | |   `-ForStmt {{.*}} <line:34:7, line:35:9> openmp_structured_block
+// CHECK-NEXT:         | |     |-DeclStmt {{.*}} <line:34:12, col:21>
+// CHECK-NEXT:         | |     | `-VarDecl {{.*}} <col:12, col:20> col:16 used i 'int' cinit
+// CHECK-NEXT:         | |     |   `-IntegerLiteral {{.*}} <col:20> 'int' 0
 // CHECK-NEXT:         | |     |-<<<NULL>>>
-// CHECK-NEXT:         | |     |-BinaryOperator 0x{{.*}} <col:23, col:27> 'int' '<'
-// CHECK-NEXT:         | |     | |-ImplicitCastExpr 0x{{.*}} <col:23> 'int' <LValueToRValue>
-// CHECK-NEXT:         | |     | | `-DeclRefExpr 0x{{.*}} <col:23> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | |     | `-ImplicitCastExpr 0x{{.*}} <col:27> 'int' <LValueToRValue>
-// CHECK-NEXT:         | |     |   `-DeclRefExpr 0x{{.*}} <col:27> 'int' lvalue ParmVar 0x{{.*}} 'z' 'int'
-// CHECK-NEXT:         | |     |-UnaryOperator 0x{{.*}} <col:30, col:31> 'int' postfix '++'
-// CHECK-NEXT:         | |     | `-DeclRefExpr 0x{{.*}} <col:30> 'int' lvalue Var 0x{{.*}} 'i' 'int'
-// CHECK-NEXT:         | |     `-NullStmt 0x{{.*}} <line:35:9>
-// CHECK-NEXT:         | |-ImplicitParamDecl 0x{{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:31:1) *const restrict'
-// CHECK-NEXT:         | |-VarDecl 0x{{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
-// CHECK-NEXT:         | | `-IntegerLiteral 0x{{.*}} <col:16> 'int' 0
-// CHECK-NEXT:         | |-VarDecl 0x{{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
-// CHECK-NEXT:         | | `-IntegerLiteral 0x{{.*}} <col:18> 'int' 0
-// CHECK-NEXT:         | `-VarDecl 0x{{.*}} <line:34:12, col:20> col:16 used i 'int' cinit
-// CHECK-NEXT:         |   `-IntegerLiteral 0x{{.*}} <col:20> 'int' 0
-// CHECK-NEXT:         |-DeclRefExpr 0x{{.*}} <line:32:3> 'int' lvalue ParmVar 0x{{.*}} 'x' 'int'
-// CHECK-NEXT:         |-DeclRefExpr 0x{{.*}} <line:33:5> 'int' lvalue ParmVar 0x{{.*}} 'y' 'int'
-// CHECK-NEXT:         `-DeclRefExpr 0x{{.*}} <line:34:27> 'int' lvalue ParmVar 0x{{.*}} 'z' 'int'
+// CHECK-NEXT:         | |     |-BinaryOperator {{.*}} <col:23, col:27> 'int' '<'
+// CHECK-NEXT:         | |     | |-ImplicitCastExpr {{.*}} <col:23> 'int' <LValueToRValue>
+// CHECK-NEXT:         | |     | | `-DeclRefExpr {{.*}} <col:23> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | |     | `-ImplicitCastExpr {{.*}} <col:27> 'int' <LValueToRValue>
+// CHECK-NEXT:         | |     |   `-DeclRefExpr {{.*}} <col:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
+// CHECK-NEXT:         | |     |-UnaryOperator {{.*}} <col:30, col:31> 'int' postfix '++'
+// CHECK-NEXT:         | |     | `-DeclRefExpr {{.*}} <col:30> 'int' lvalue Var {{.*}} 'i' 'int'
+// CHECK-NEXT:         | |     `-NullStmt {{.*}} <line:35:9>
+// CHECK-NEXT:         | |-ImplicitParamDecl {{.*}} <line:31:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-for.c:31:1) *const restrict'
+// CHECK-NEXT:         | |-VarDecl {{.*}} <line:32:8, col:16> col:12 used i 'int' cinit
+// CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:16> 'int' 0
+// CHECK-NEXT:         | |-VarDecl {{.*}} <line:33:10, col:18> col:14 used i 'int' cinit
+// CHECK-NEXT:         | | `-IntegerLiteral {{.*}} <col:18> 'int' 0
+// CHECK-NEXT:         | `-VarDecl {{.*}} <line:34:12, col:20> col:16 used i 'int' cinit
+// CHECK-NEXT:         |   `-IntegerLiteral {{.*}} <col:20> 'int' 0
+// CHECK-NEXT:         |-DeclRefExpr {{.*}} <line:32:3> 'int' lvalue ParmVar {{.*}} 'x' 'int'
+// CHECK-NEXT:         |-DeclRefExpr {{.*}} <line:33:5> 'int' lvalue ParmVar {{.*}} 'y' 'int'
+// CHECK-NEXT:         `-DeclRefExpr {{.*}} <line:34:27> 'int' lvalue ParmVar {{.*}} 'z' 'int'
diff --git a/clang/test/AST/ast-dump-openmp-parallel.c b/clang/test/AST/ast-dump-openmp-parallel.c
index ba33546681b5d..ff599d68098a8 100644
--- a/clang/test/AST/ast-dump-openmp-parallel.c
+++ b/clang/test/AST/ast-dump-openmp-parallel.c
@@ -5,13 +5,13 @@ void test() {
   ;
 }
 
-// CHECK: TranslationUnitDecl 0x{{.*}} <<invalid sloc>> <invalid sloc>
-// CHECK: `-FunctionDecl 0x{{.*}} <{{.*}}ast-dump-openmp-parallel.c:3:1, line:6:1> line:3:6 test 'void ()'
-// CHECK-NEXT:   `-CompoundStmt 0x{{.*}} <col:13, line:6:1>
-// CHECK-NEXT:     `-OMPParallelDirective 0x{{.*}} <line:4:1, col:21>
-// CHECK-NEXT:       `-CapturedStmt 0x{{.*}} <line:5:3>
-// CHECK-NEXT:         `-CapturedDecl 0x{{.*}} <<invalid sloc>> <invalid sloc> nothrow
-// CHECK-NEXT:           |-NullStmt 0x{{.*}} <col:3> openmp_structured_block
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
-// CHECK-NEXT:           |-ImplicitParamDecl 0x{{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
-// CHECK-NEXT:           `-ImplicitParamDecl 0x{{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel.c:4:1) *const restrict'
+// CHECK: TranslationUnitDecl {{.*}} <<invalid sloc>> <invalid sloc>
+// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel.c:3:1, line:6:1> line:3:6 test 'void ()'
+// CHECK-NEXT:   `-CompoundStmt {{.*}} <col:13, line:6:1>
+// CHECK-NEXT:     `-OMPParallelDirective {{.*}} <line:4:1, col:21>
+// CHECK-NEXT:       `-CapturedStmt {{.*}} <line:5:3>
+// CHECK-NEXT:         `-CapturedDecl {{.*}} <<invalid sloc>> <invalid sloc> nothrow
+// CHECK-NEXT:           |-NullStmt {{.*}} <col:3> openmp_structured_block
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <line:4:1> col:1 implicit .global_tid. 'const int *const restrict'
+// CHECK-NEXT:           |-ImplicitParamDecl {{.*}} <col:1> col:1 implicit .bound_tid. 'const int *const restrict'
+// CHECK-NEXT:           `-ImplicitParamDecl {{.*}} <col:1> col:1 implicit __context 'struct (anonymous at {{.*}}ast-dump-openmp-parallel.c:4:1) *const restrict'
diff --git a/clang/utils/make-ast-dump-check.sh b/clang/utils/make-ast-dump-check.sh
new file mode 100755
index 0000000000000..2a9cf40a884cd
--- /dev/null
+++ b/clang/utils/make-ast-dump-check.sh
@@ -0,0 +1,79 @@
+#! /bin/bash
+
+# This script is intended as a FileCheck replacement to update the test
+# expectations in a -ast-dump test.
+#
+# Usage:
+#
+# $ lit -DFileCheck=$PWD/utils/make-ast-dump-check.sh test/AST/ast-dump-openmp-*
+
+prefix=CHECK
+
+while [[ "$#" -ne 0 ]]; do
+  case "$1" in
+  --check-prefix)
+    shift
+    prefix="$1"
+    ;;
+  --implicit-check-not)
+    shift
+    ;;
+  -*)
+    ;;
+  *)
+    file="$1"
+    ;;
+  esac
+  shift
+done
+
+testdir="$(dirname "$file")"
+
+read -r -d '' script <<REWRITE
+BEGIN {
+  skipping_builtins = 0
+  matched_last_line = 0
+}
+
+/^[\`|].* line:/ {
+  skipping_builtins = 0
+}
+
+{
+  if (skipping_builtins == 1) {
+    matched_last_line = 0
+    next
+  }
+}
+
+/TranslationUnitDecl/ {
+  skipping_builtins = 1
+}
+
+{
+  s = \$0
+  gsub("0x[0-9a-fA-F]+", "{{.*}}", s)
+  gsub("$testdir/", "{{.*}}", s)
+}
+
+matched_last_line == 0 {
+  print "// ${prefix}: " s
+}
+
+matched_last_line == 1 {
+  print "// ${prefix}-NEXT: " s
+}
+
+{
+  matched_last_line = 1
+}
+REWRITE
+
+echo "$script"
+
+{
+  cat "$file" | grep -v "$prefix"
+  awk "$script"
+} > "$file.new"
+
+mv "$file.new" "$file"

From e518235aca345ad5dd708dd26c632e06122ffb09 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Sun, 2 Jun 2019 04:00:43 +0000
Subject: [PATCH 0835/1176] Factor out commonality between variable capture
 initialization and 'this' capture initialization.

llvm-svn: 362317
---
 clang/include/clang/Sema/Initialization.h |   4 +-
 clang/include/clang/Sema/Sema.h           |  10 +-
 clang/lib/Sema/SemaLambda.cpp             | 153 ++++++++++++----------
 clang/lib/Sema/SemaStmt.cpp               |  40 +++---
 clang/test/AST/ast-dump-expr-json.cpp     |  56 +++++---
 clang/test/AST/ast-dump-expr.cpp          |   1 +
 6 files changed, 149 insertions(+), 115 deletions(-)

diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 8efa2e7597cb0..14d8aa8dabf30 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -386,6 +386,8 @@ class alignas(8) InitializedEntity {
   }
 
   /// Create the initialization entity for a lambda capture.
+  ///
+  /// \p VarID The name of the entity being captured, or nullptr for 'this'.
   static InitializedEntity InitializeLambdaCapture(IdentifierInfo *VarID,
                                                    QualType FieldType,
                                                    SourceLocation Loc) {
@@ -509,7 +511,7 @@ class alignas(8) InitializedEntity {
   /// For a lambda capture, return the capture's name.
   StringRef getCapturedVarName() const {
     assert(getKind() == EK_LambdaCapture && "Not a lambda capture!");
-    return Capture.VarID->getName();
+    return Capture.VarID ? Capture.VarID->getName() : "this";
   }
 
   /// Determine the location of the capture when initializing
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 3e128df7fba2a..a6db2f046bbd9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5291,11 +5291,6 @@ class Sema {
       const unsigned *const FunctionScopeIndexToStopAt = nullptr,
       bool ByCopy = false);
 
-  /// Initialize the given 'this' capture with a suitable 'this' or '*this'
-  /// expression.
-  ExprResult performThisCaptureInitialization(const sema::Capture &Capture,
-                                              bool IsImplicit);
-
   /// Determine whether the given type is the type of *this that is used
   /// outside of the body of a member function for a type that is currently
   /// being defined.
@@ -5808,6 +5803,11 @@ class Sema {
   /// Build a FieldDecl suitable to hold the given capture.
   FieldDecl *BuildCaptureField(RecordDecl *RD, const sema::Capture &Capture);
 
+  /// Initialize the given capture with a suitable expression.
+  ExprResult BuildCaptureInit(const sema::Capture &Capture,
+                              SourceLocation ImplicitCaptureLoc,
+                              bool IsOpenMPMapping = false);
+
   /// Complete a lambda-expression having processed and attached the
   /// lambda body.
   ExprResult BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index d3f3b60926fa3..4b832f5653025 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1431,32 +1431,22 @@ static void addBlockPointerConversion(Sema &S,
   Class->addDecl(Conversion);
 }
 
-ExprResult Sema::performThisCaptureInitialization(const Capture &Cap,
-                                                  bool IsImplicit) {
-  QualType ThisTy = getCurrentThisType();
-  SourceLocation Loc = Cap.getLocation();
-  Expr *This = BuildCXXThisExpr(Loc, ThisTy, IsImplicit);
-  if (Cap.isReferenceCapture())
-    return This;
-
-  // Capture (by copy) of '*this'.
-  Expr *StarThis = CreateBuiltinUnaryOp(Loc, UO_Deref, This).get();
-  InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
-      nullptr, Cap.getCaptureType(), Loc);
-  InitializationKind InitKind =
-      InitializationKind::CreateDirect(Loc, Loc, Loc);
-  InitializationSequence Init(*this, Entity, InitKind, StarThis);
-  return Init.Perform(*this, Entity, InitKind, StarThis);
-}
-
-static ExprResult performLambdaVarCaptureInitialization(
-    Sema &S, const Capture &Capture, FieldDecl *Field,
-    SourceLocation ImplicitCaptureLoc, bool IsImplicitCapture) {
-  assert(Capture.isVariableCapture() && "not a variable capture");
-
-  auto *Var = Capture.getVariable();
+ExprResult Sema::BuildCaptureInit(const Capture &Cap,
+                                  SourceLocation ImplicitCaptureLoc,
+                                  bool IsOpenMPMapping) {
+  // VLA captures don't have a stored initialization expression.
+  if (Cap.isVLATypeCapture())
+    return ExprResult();
+
+  // An init-capture is initialized directly from its stored initializer.
+  if (Cap.isInitCapture())
+    return Cap.getVariable()->getInit();
+
+  // For anything else, build an initialization expression. For an implicit
+  // capture, the capture notionally happens at the capture-default, so use
+  // that location here.
   SourceLocation Loc =
-      IsImplicitCapture ? ImplicitCaptureLoc : Capture.getLocation();
+      ImplicitCaptureLoc.isValid() ? ImplicitCaptureLoc : Cap.getLocation();
 
   // C++11 [expr.prim.lambda]p21:
   //   When the lambda-expression is evaluated, the entities that
@@ -1470,17 +1460,39 @@ static ExprResult performLambdaVarCaptureInitialization(
   // C++ [expr.prim.lambda]p12:
   //   An entity captured by a lambda-expression is odr-used (3.2) in
   //   the scope containing the lambda-expression.
-  ExprResult RefResult = S.BuildDeclarationNameExpr(
+  ExprResult Init;
+  IdentifierInfo *Name = nullptr;
+  if (Cap.isThisCapture()) {
+    QualType ThisTy = getCurrentThisType();
+    Expr *This = BuildCXXThisExpr(Loc, ThisTy, ImplicitCaptureLoc.isValid());
+    if (Cap.isCopyCapture())
+      Init = CreateBuiltinUnaryOp(Loc, UO_Deref, This);
+    else
+      Init = This;
+  } else {
+    assert(Cap.isVariableCapture() && "unknown kind of capture");
+    VarDecl *Var = Cap.getVariable();
+    Name = Var->getIdentifier();
+    Init = BuildDeclarationNameExpr(
       CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
-  if (RefResult.isInvalid())
+  }
+
+  // In OpenMP, the capture kind doesn't actually describe how to capture:
+  // variables are "mapped" onto the device in a process that does not formally
+  // make a copy, even for a "copy capture".
+  if (IsOpenMPMapping)
+    return Init;
+
+  if (Init.isInvalid())
     return ExprError();
-  Expr *Ref = RefResult.get();
 
-  auto Entity = InitializedEntity::InitializeLambdaCapture(
-      Var->getIdentifier(), Field->getType(), Loc);
-  InitializationKind InitKind = InitializationKind::CreateDirect(Loc, Loc, Loc);
-  InitializationSequence Init(S, Entity, InitKind, Ref);
-  return Init.Perform(S, Entity, InitKind, Ref);
+  Expr *InitExpr = Init.get();
+  InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture(
+      Name, Cap.getCaptureType(), Loc);
+  InitializationKind InitKind =
+      InitializationKind::CreateDirect(Loc, Loc, Loc);
+  InitializationSequence InitSeq(*this, Entity, InitKind, InitExpr);
+  return InitSeq.Perform(*this, Entity, InitKind, InitExpr);
 }
 
 ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body,
@@ -1647,14 +1659,18 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
 
       assert(!From.isBlockCapture() && "Cannot capture __block variables");
       bool IsImplicit = I >= LSI->NumExplicitCaptures;
+      SourceLocation ImplicitCaptureLoc =
+          IsImplicit ? CaptureDefaultLoc : SourceLocation();
 
       // Use source ranges of explicit captures for fixits where available.
       SourceRange CaptureRange = LSI->ExplicitCaptureRanges[I];
 
       // Warn about unused explicit captures.
       bool IsCaptureUsed = true;
-      if (!CurContext->isDependentContext() && !IsImplicit && !From.isODRUsed()) {
+      if (!CurContext->isDependentContext() && !IsImplicit &&
+          !From.isODRUsed()) {
         // Initialized captures that are non-ODR used may not be eliminated.
+        // FIXME: Where did the IsGenericLambda here come from?
         bool NonODRUsedInitCapture =
             IsGenericLambda && From.isNonODRUsed() && From.isInitCapture();
         if (!NonODRUsedInitCapture) {
@@ -1682,46 +1698,43 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
         PrevCaptureLoc = CaptureRange.getEnd();
       }
 
-      // Add a FieldDecl for the capture.
-      FieldDecl *Field = BuildCaptureField(Class, From);
-
-      // Handle 'this' capture.
-      if (From.isThisCapture()) {
-        // Capturing 'this' implicitly with a default of '[=]' is deprecated,
-        // because it results in a reference capture. Don't warn prior to
-        // C++2a; there's nothing that can be done about it before then.
-        if (getLangOpts().CPlusPlus2a && IsImplicit &&
-            CaptureDefault == LCD_ByCopy) {
-          Diag(From.getLocation(), diag::warn_deprecated_this_capture);
-          Diag(CaptureDefaultLoc, diag::note_deprecated_this_capture)
-              << FixItHint::CreateInsertion(
-                     getLocForEndOfToken(CaptureDefaultLoc), ", this");
+      // Map the capture to our AST representation.
+      LambdaCapture Capture = [&] {
+        if (From.isThisCapture()) {
+          // Capturing 'this' implicitly with a default of '[=]' is deprecated,
+          // because it results in a reference capture. Don't warn prior to
+          // C++2a; there's nothing that can be done about it before then.
+          if (getLangOpts().CPlusPlus2a && IsImplicit &&
+              CaptureDefault == LCD_ByCopy) {
+            Diag(From.getLocation(), diag::warn_deprecated_this_capture);
+            Diag(CaptureDefaultLoc, diag::note_deprecated_this_capture)
+                << FixItHint::CreateInsertion(
+                       getLocForEndOfToken(CaptureDefaultLoc), ", this");
+          }
+          return LambdaCapture(From.getLocation(), IsImplicit,
+                               From.isCopyCapture() ? LCK_StarThis : LCK_This);
+        } else if (From.isVLATypeCapture()) {
+          return LambdaCapture(From.getLocation(), IsImplicit, LCK_VLAType);
+        } else {
+          assert(From.isVariableCapture() && "unknown kind of capture");
+          VarDecl *Var = From.getVariable();
+          LambdaCaptureKind Kind =
+              From.isCopyCapture() ? LCK_ByCopy : LCK_ByRef;
+          return LambdaCapture(From.getLocation(), IsImplicit, Kind, Var,
+                               From.getEllipsisLoc());
         }
+      }();
 
-        ExprResult Init = performThisCaptureInitialization(From, IsImplicit);
-        Captures.push_back(
-            LambdaCapture(From.getLocation(), IsImplicit,
-                          From.isCopyCapture() ? LCK_StarThis : LCK_This));
-        CaptureInits.push_back(Init.get());
-        continue;
-      }
-      if (From.isVLATypeCapture()) {
-        Captures.push_back(
-            LambdaCapture(From.getLocation(), IsImplicit, LCK_VLAType));
-        CaptureInits.push_back(nullptr);
-        continue;
-      }
+      // Form the initializer for the capture field.
+      ExprResult Init = BuildCaptureInit(From, ImplicitCaptureLoc);
 
-      VarDecl *Var = From.getVariable();
-      LambdaCaptureKind Kind = From.isCopyCapture() ? LCK_ByCopy : LCK_ByRef;
-      Captures.push_back(LambdaCapture(From.getLocation(), IsImplicit, Kind,
-                                       Var, From.getEllipsisLoc()));
+      // FIXME: Skip this capture if the capture is not used, the initializer
+      // has no side-effects, the type of the capture is trivial, and the
+      // lambda is not externally visible.
 
-      ExprResult Init =
-          From.isInitCapture()
-              ? Var->getInit()
-              : performLambdaVarCaptureInitialization(
-                    *this, From, Field, CaptureDefaultLoc, IsImplicit);
+      // Add a FieldDecl for the capture and form its initializer.
+      BuildCaptureField(Class, From);
+      Captures.push_back(Capture);
       CaptureInits.push_back(Init.get());
     }
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 3a7acd20274ec..bc1e8f27090af 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -4231,39 +4231,35 @@ buildCapturedStmtCaptureList(Sema &S, CapturedRegionScopeInfo *RSI,
     if (Cap.isInvalid())
       continue;
 
+    // Form the initializer for the capture.
+    ExprResult Init = S.BuildCaptureInit(Cap, Cap.getLocation(),
+                                         RSI->CapRegionKind == CR_OpenMP);
+
+    // FIXME: Bail out now if the capture is not used and the initializer has
+    // no side-effects.
+
     // Create a field for this capture.
     FieldDecl *Field = S.BuildCaptureField(RSI->TheRecordDecl, Cap);
 
+    // Add the capture to our list of captures.
     if (Cap.isThisCapture()) {
-      ExprResult Init =
-          S.performThisCaptureInitialization(Cap, /*Implicit*/ true);
       Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
                                                CapturedStmt::VCK_This));
-      CaptureInits.push_back(Init.get());
-      continue;
     } else if (Cap.isVLATypeCapture()) {
       Captures.push_back(
           CapturedStmt::Capture(Cap.getLocation(), CapturedStmt::VCK_VLAType));
-      CaptureInits.push_back(nullptr);
-      continue;
-    }
-
-    if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
-      S.setOpenMPCaptureKind(Field, Cap.getVariable(), RSI->OpenMPLevel);
-
-    VarDecl *Var = Cap.getVariable();
-    SourceLocation Loc = Cap.getLocation();
+    } else {
+      assert(Cap.isVariableCapture() && "unknown kind of capture");
 
-    // FIXME: For a non-reference capture, we need to build an expression to
-    // perform a copy here!
-    ExprResult Init = S.BuildDeclarationNameExpr(
-        CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
+      if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP)
+        S.setOpenMPCaptureKind(Field, Cap.getVariable(), RSI->OpenMPLevel);
 
-    Captures.push_back(CapturedStmt::Capture(Loc,
-                                             Cap.isReferenceCapture()
-                                                 ? CapturedStmt::VCK_ByRef
-                                                 : CapturedStmt::VCK_ByCopy,
-                                             Var));
+      Captures.push_back(CapturedStmt::Capture(Cap.getLocation(),
+                                               Cap.isReferenceCapture()
+                                                   ? CapturedStmt::VCK_ByRef
+                                                   : CapturedStmt::VCK_ByCopy,
+                                               Cap.getVariable()));
+    }
     CaptureInits.push_back(Init.get());
   }
   return false;
diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp
index 3a8e745e50f3e..d56d79d8c1671 100644
--- a/clang/test/AST/ast-dump-expr-json.cpp
+++ b/clang/test/AST/ast-dump-expr-json.cpp
@@ -3924,24 +3924,46 @@ void TestNonADLCall3() {
 // CHECK-NEXT:                  ]
 // CHECK-NEXT:                 },
 // CHECK-NEXT:                 {
-// CHECK-NEXT:                  "id": "0x{{.*}}",
-// CHECK-NEXT:                  "kind": "CXXThisExpr",
-// CHECK-NEXT:                  "range": {
-// CHECK-NEXT:                   "begin": {
-// CHECK-NEXT:                    "col": 8,
-// CHECK-NEXT:                    "file": "{{.*}}",
-// CHECK-NEXT:                    "line": 98
+// CHECK-NEXT:                   "id": "0x{{.*}}",
+// CHECK-NEXT:                   "kind": "ParenListExpr",
+// CHECK-NEXT:                   "range": {
+// CHECK-NEXT:                     "begin": {
+// CHECK-NEXT:                       "col": 8,
+// CHECK-NEXT:                       "file": "{{.*}}",
+// CHECK-NEXT:                       "line": 98
+// CHECK-NEXT:                     },
+// CHECK-NEXT:                     "end": {
+// CHECK-NEXT:                       "col": 8,
+// CHECK-NEXT:                       "file": "{{.*}}",
+// CHECK-NEXT:                       "line": 98
+// CHECK-NEXT:                     }
 // CHECK-NEXT:                   },
-// CHECK-NEXT:                   "end": {
-// CHECK-NEXT:                    "col": 8,
-// CHECK-NEXT:                    "file": "{{.*}}",
-// CHECK-NEXT:                    "line": 98
-// CHECK-NEXT:                   }
-// CHECK-NEXT:                  },
-// CHECK-NEXT:                  "type": {
-// CHECK-NEXT:                   "qualType": "V *"
-// CHECK-NEXT:                  },
-// CHECK-NEXT:                  "valueCategory": "rvalue"
+// CHECK-NEXT:                   "type": {
+// CHECK-NEXT:                     "qualType": "NULL TYPE"
+// CHECK-NEXT:                   },
+// CHECK-NEXT:                   "valueCategory": "rvalue",
+// CHECK-NEXT:                   "inner": [
+// CHECK-NEXT:                     {
+// CHECK-NEXT:                       "id": "0x{{.*}}",
+// CHECK-NEXT:                       "kind": "CXXThisExpr",
+// CHECK-NEXT:                       "range": {
+// CHECK-NEXT:                         "begin": {
+// CHECK-NEXT:                           "col": 8,
+// CHECK-NEXT:                           "file": "{{.*}}",
+// CHECK-NEXT:                           "line": 98
+// CHECK-NEXT:                         },
+// CHECK-NEXT:                         "end": {
+// CHECK-NEXT:                           "col": 8,
+// CHECK-NEXT:                           "file": "{{.*}}",
+// CHECK-NEXT:                           "line": 98
+// CHECK-NEXT:                         }
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "type": {
+// CHECK-NEXT:                         "qualType": "V *"
+// CHECK-NEXT:                       },
+// CHECK-NEXT:                       "valueCategory": "rvalue"
+// CHECK-NEXT:                     }
+// CHECK-NEXT:                   ]
 // CHECK-NEXT:                 },
 // CHECK-NEXT:                 {
 // CHECK-NEXT:                  "id": "0x{{.*}}",
diff --git a/clang/test/AST/ast-dump-expr.cpp b/clang/test/AST/ast-dump-expr.cpp
index 47f69a882ecba..f04c311c63470 100644
--- a/clang/test/AST/ast-dump-expr.cpp
+++ b/clang/test/AST/ast-dump-expr.cpp
@@ -255,6 +255,7 @@ void PrimaryExpressions(Ts... a) {
       // CHECK-NEXT: CXXMethodDecl
       // CHECK-NEXT: CompoundStmt
       // CHECK-NEXT: FieldDecl 0x{{[^ ]*}} <col:8> col:8 implicit 'V *'
+      // CHECK-NEXT: ParenListExpr
       // CHECK-NEXT: CXXThisExpr 0x{{[^ ]*}} <col:8> 'V *' this
 
       [*this]{};

From 591ede411d2e85ab66496acbb9fc64f9977cd946 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Sun, 2 Jun 2019 06:03:05 +0000
Subject: [PATCH 0836/1176] [Target] Adjust header in Thread

llvm-svn: 362318
---
 lldb/source/Target/Thread.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp
index f248e6b525ef0..7a6b49e552524 100644
--- a/lldb/source/Target/Thread.cpp
+++ b/lldb/source/Target/Thread.cpp
@@ -22,7 +22,7 @@
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/DynamicLoader.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
+#include "lldb/Target/LanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/StackFrameRecognizer.h"

From 0bfa9359b0df7cdb2a1f9af0da04c9e14deadc52 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 2 Jun 2019 08:05:24 +0000
Subject: [PATCH 0837/1176] [NFC][X86] extract-lowbits.ll: add patterns with
 truncation too

If we look past truncations of X too eagerly (D62786), we may
end up with 64-bit 'BEXTR', even though 32-bit-one would suffice.

llvm-svn: 362319
---
 llvm/test/CodeGen/X86/extract-lowbits.ll | 1328 ++++++++++++++++++----
 1 file changed, 1084 insertions(+), 244 deletions(-)

diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 47c611ebef696..e5aecb07708d3 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -730,6 +730,302 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
   ret i64 %masked
 }
 
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_a0:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %edx
+; X86-NOBMI-NEXT:    shll %cl, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB10_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB10_2:
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_a0:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $1, %edx
+; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
+; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB10_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB10_2:
+; X86-BMI1NOTBM-NEXT:    decl %eax
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_a0:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    jne .LBB10_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl $1, %eax
+; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:  .LBB10_2:
+; X86-BMI1BMI2-NEXT:    decl %eax
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_a0:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_a0:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movl $1, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    decl %eax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_a0:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    movl $1, %eax
+; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    decl %eax
+; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %onebit = shl i64 1, %numlowbits
+  %mask = add nsw i64 %onebit, -1
+  %masked = and i64 %mask, %val
+  %res = trunc i64 %masked to i32
+  ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_a1:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_a1:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_a1:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_a1:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_a1:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_a1:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %truncval = trunc i64 %val to i32
+  %onebit = shl i32 1, %numlowbits
+  %mask = add nsw i32 %onebit, -1
+  %masked = and i32 %mask, %truncval
+  ret i32 %masked
+}
+
+; Shifting happens in 64-bit, then truncation (with extra use).
+; Masking is 32-bit.
+define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    movl %ebx, %ecx
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    shll $8, %ebx
+; X86-BMI1NOTBM-NEXT:    bextrl %ebx, %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %esi, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
+; X64-NOBMI-NEXT:    movl %esi, %ebp
+; X64-NOBMI-NEXT:    movq %rdi, %rbx
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    movl %ebp, %ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %ebx, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ebx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    shll $8, %ebx
+; X64-BMI1NOTBM-NEXT:    bextrl %ebx, %r14d, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebp
+; X64-BMI1BMI2-NEXT:    movq %rdi, %rbx
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebp, %ebx, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
+; X64-BMI1BMI2-NEXT:    retq
+  %truncval = trunc i64 %val to i32
+  call void @use32(i32 %truncval)
+  %onebit = shl i32 1, %numlowbits
+  %mask = add nsw i32 %onebit, -1
+  %masked = and i32 %mask, %truncval
+  ret i32 %masked
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_a2:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_a2:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_a2:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_a2:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_a2:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_a2:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %onebit = shl i32 1, %numlowbits
+  %mask = add nsw i32 %onebit, -1
+  %zextmask = zext i32 %mask to i64
+  %masked = and i64 %zextmask, %val
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern b. 32-bit
 ; ---------------------------------------------------------------------------- ;
@@ -1000,11 +1296,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB15_2
+; X86-NOBMI-NEXT:    je .LBB19_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB15_2:
+; X86-NOBMI-NEXT:  .LBB19_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1020,11 +1316,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB15_2
+; X86-BMI1NOTBM-NEXT:    je .LBB19_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB15_2:
+; X86-BMI1NOTBM-NEXT:  .LBB19_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1038,11 +1334,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB15_2
+; X86-BMI1BMI2-NEXT:    je .LBB19_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB15_2:
+; X86-BMI1BMI2-NEXT:  .LBB19_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1083,11 +1379,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB16_2
+; X86-NOBMI-NEXT:    je .LBB20_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB16_2:
+; X86-NOBMI-NEXT:  .LBB20_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1103,11 +1399,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB16_2
+; X86-BMI1NOTBM-NEXT:    je .LBB20_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB16_2:
+; X86-BMI1NOTBM-NEXT:  .LBB20_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1121,11 +1417,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB16_2
+; X86-BMI1BMI2-NEXT:    je .LBB20_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB16_2:
+; X86-BMI1BMI2-NEXT:  .LBB20_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1171,11 +1467,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB17_2
+; X86-NOBMI-NEXT:    je .LBB21_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB17_2:
+; X86-NOBMI-NEXT:  .LBB21_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -1193,11 +1489,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB17_2
+; X86-BMI1NOTBM-NEXT:    je .LBB21_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB17_2:
+; X86-BMI1NOTBM-NEXT:  .LBB21_2:
 ; X86-BMI1NOTBM-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1212,11 +1508,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB17_2
+; X86-BMI1BMI2-NEXT:    je .LBB21_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB17_2:
+; X86-BMI1BMI2-NEXT:  .LBB21_2:
 ; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1260,11 +1556,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB18_2
+; X86-NOBMI-NEXT:    je .LBB22_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB18_2:
+; X86-NOBMI-NEXT:  .LBB22_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -1282,11 +1578,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB18_2
+; X86-BMI1NOTBM-NEXT:    je .LBB22_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB18_2:
+; X86-BMI1NOTBM-NEXT:  .LBB22_2:
 ; X86-BMI1NOTBM-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1301,11 +1597,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB18_2
+; X86-BMI1BMI2-NEXT:    je .LBB22_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB18_2:
+; X86-BMI1BMI2-NEXT:  .LBB22_2:
 ; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1350,11 +1646,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB19_2
+; X86-NOBMI-NEXT:    je .LBB23_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB19_2:
+; X86-NOBMI-NEXT:  .LBB23_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1370,11 +1666,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB19_2
+; X86-BMI1NOTBM-NEXT:    je .LBB23_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB19_2:
+; X86-BMI1NOTBM-NEXT:  .LBB23_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1388,11 +1684,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB19_2
+; X86-BMI1BMI2-NEXT:    je .LBB23_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB19_2:
+; X86-BMI1BMI2-NEXT:  .LBB23_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1408,20 +1704,207 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
+; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    retq
+  %notmask = shl i64 -1, %numlowbits
+  %mask = xor i64 %notmask, -1
+  %masked = and i64 %val, %mask ; swapped order
+  ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_b0:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    shll %cl, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB24_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB24_2:
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_b0:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
+; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB24_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
+; X86-BMI1NOTBM-NEXT:  .LBB24_2:
+; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_b0:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI1BMI2-NEXT:    testb $32, %al
+; X86-BMI1BMI2-NEXT:    jne .LBB24_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI1BMI2-NEXT:  .LBB24_2:
+; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_b0:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq $-1, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_b0:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_b0:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    movq $-1, %rax
+; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %widenumlowbits = zext i8 %numlowbits to i64
+  %notmask = shl nsw i64 -1, %widenumlowbits
+  %mask = xor i64 %notmask, -1
+  %wideres = and i64 %val, %mask
+  %res = trunc i64 %wideres to i32
+  ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_b1:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_b1:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_b1:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_b1:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_b1:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_b1:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %truncval = trunc i64 %val to i32
+  %widenumlowbits = zext i8 %numlowbits to i32
+  %notmask = shl nsw i32 -1, %widenumlowbits
+  %mask = xor i32 %notmask, -1
+  %res = and i32 %truncval, %mask
+  ret i32 %res
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_b2:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_b2:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_b2:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_b2:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_b2:
 ; X64-BMI1NOTBM:       # %bb.0:
 ; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
+; X64-BMI1BMI2-LABEL: bzhi64_32_b2:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
-  %notmask = shl i64 -1, %numlowbits
-  %mask = xor i64 %notmask, -1
-  %masked = and i64 %val, %mask ; swapped order
-  ret i64 %masked
+  %widenumlowbits = zext i8 %numlowbits to i32
+  %notmask = shl nsw i32 -1, %widenumlowbits
+  %mask = xor i32 %notmask, -1
+  %zextmask = zext i32 %mask to i64
+  %wideres = and i64 %val, %zextmask
+  %res = trunc i64 %wideres to i32
+  ret i32 %res
 }
 
 ; ---------------------------------------------------------------------------- ;
@@ -2012,11 +2495,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB25_2
+; X86-NOBMI-NEXT:    je .LBB32_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB25_2:
+; X86-NOBMI-NEXT:  .LBB32_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2043,11 +2526,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB25_2
+; X86-BMI1NOTBM-NEXT:    je .LBB32_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB25_2:
+; X86-BMI1NOTBM-NEXT:  .LBB32_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2073,11 +2556,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB25_2
+; X86-BMI1BMI2-NEXT:    je .LBB32_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB25_2:
+; X86-BMI1BMI2-NEXT:  .LBB32_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2169,11 +2652,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB26_2
+; X86-NOBMI-NEXT:    je .LBB33_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB26_2:
+; X86-NOBMI-NEXT:  .LBB33_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2200,11 +2683,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB26_2
+; X86-BMI1NOTBM-NEXT:    je .LBB33_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB26_2:
+; X86-BMI1NOTBM-NEXT:  .LBB33_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2230,11 +2713,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB26_2
+; X86-BMI1BMI2-NEXT:    je .LBB33_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB26_2:
+; X86-BMI1BMI2-NEXT:  .LBB33_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2328,11 +2811,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB27_2
+; X86-NOBMI-NEXT:    je .LBB34_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB27_2:
+; X86-NOBMI-NEXT:  .LBB34_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -2362,11 +2845,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB27_2
+; X86-BMI1NOTBM-NEXT:    je .LBB34_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB27_2:
+; X86-BMI1NOTBM-NEXT:  .LBB34_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -2395,11 +2878,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB27_2
+; X86-BMI1BMI2-NEXT:    je .LBB34_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB27_2:
+; X86-BMI1BMI2-NEXT:  .LBB34_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -2482,11 +2965,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB28_2
+; X86-NOBMI-NEXT:    je .LBB35_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB28_2:
+; X86-NOBMI-NEXT:  .LBB35_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -2516,11 +2999,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB28_2
+; X86-BMI1NOTBM-NEXT:    je .LBB35_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB28_2:
+; X86-BMI1NOTBM-NEXT:  .LBB35_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -2549,11 +3032,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB28_2
+; X86-BMI1BMI2-NEXT:    je .LBB35_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB28_2:
+; X86-BMI1BMI2-NEXT:  .LBB35_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -2637,11 +3120,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB29_2
+; X86-NOBMI-NEXT:    je .LBB36_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB29_2:
+; X86-NOBMI-NEXT:  .LBB36_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2656,129 +3139,325 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
+; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
+; X86-BMI1NOTBM-NEXT:    movb $64, %cl
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    je .LBB36_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
+; X86-BMI1NOTBM-NEXT:  .LBB36_2:
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movb $64, %cl
+; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $-1, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB36_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI1BMI2-NEXT:  .LBB36_2:
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_c4_commutative:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %r14
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    movq $-1, %rbx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
+; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
+; X64-BMI1BMI2-NEXT:    retq
+  %numhighbits = sub i64 64, %numlowbits
+  %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
+  %masked = and i64 %val, %mask ; swapped order
+  ret i64 %masked
+}
+
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_c0:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb $64, %cl
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edx
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB37_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB37_2:
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_c0:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb $64, %cl
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edx
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB37_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB37_2:
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_c0:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb $64, %cl
+; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl $-1, %edx
+; X86-BMI1BMI2-NEXT:    movl $-1, %eax
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB37_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
+; X86-BMI1BMI2-NEXT:  .LBB37_2:
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_c0:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    movq $-1, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_c0:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_c0:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    movq $-1, %rax
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %numhighbits = sub i64 64, %numlowbits
+  %mask = lshr i64 -1, %numhighbits
+  %masked = and i64 %mask, %val
+  %res = trunc i64 %masked to i32
+  ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_c1:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_c1:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_c1:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_c1:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_c1:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_c1:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %truncval = trunc i64 %val to i32
+  %numhighbits = sub i32 32, %numlowbits
+  %mask = lshr i32 -1, %numhighbits
+  %masked = and i32 %mask, %truncval
+  ret i32 %masked
+}
+
+; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_c2:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_c2:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB29_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB29_2:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    calll use64
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative:
+; X86-BMI1BMI2-LABEL: bzhi64_32_c2:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB29_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB29_2:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    calll use64
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
-; X64-NOBMI-LABEL: bzhi64_c4_commutative:
+; X64-NOBMI-LABEL: bzhi64_32_c2:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    pushq %r14
-; X64-NOBMI-NEXT:    pushq %rbx
-; X64-NOBMI-NEXT:    pushq %rax
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq %rdi, %r14
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    movq $-1, %rbx
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rbx
-; X64-NOBMI-NEXT:    movq %rbx, %rdi
-; X64-NOBMI-NEXT:    callq use64
-; X64-NOBMI-NEXT:    andq %r14, %rbx
-; X64-NOBMI-NEXT:    movq %rbx, %rax
-; X64-NOBMI-NEXT:    addq $8, %rsp
-; X64-NOBMI-NEXT:    popq %rbx
-; X64-NOBMI-NEXT:    popq %r14
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
+; X64-BMI1NOTBM-LABEL: bzhi64_32_c2:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
+; X64-BMI1BMI2-LABEL: bzhi64_32_c2:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
-  %numhighbits = sub i64 64, %numlowbits
-  %mask = lshr i64 -1, %numhighbits
-  call void @use64(i64 %mask)
-  %masked = and i64 %val, %mask ; swapped order
-  ret i64 %masked
+  %numhighbits = sub i32 32, %numlowbits
+  %mask = lshr i32 -1, %numhighbits
+  %zextmask = zext i32 %mask to i64
+  %masked = and i64 %zextmask, %val
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
 }
 
 ; ---------------------------------------------------------------------------- ;
@@ -3012,26 +3691,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB34_2
+; X86-NOBMI-NEXT:    jne .LBB44_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB34_2:
+; X86-NOBMI-NEXT:  .LBB44_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB34_4
+; X86-NOBMI-NEXT:    jne .LBB44_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB34_4:
+; X86-NOBMI-NEXT:  .LBB44_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB34_6
+; X86-NOBMI-NEXT:    jne .LBB44_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB34_6:
+; X86-NOBMI-NEXT:  .LBB44_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3051,26 +3730,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB34_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB44_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB34_2:
+; X86-BMI1NOTBM-NEXT:  .LBB44_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB34_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB44_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB34_4:
+; X86-BMI1NOTBM-NEXT:  .LBB44_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB34_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB44_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB34_6:
+; X86-BMI1NOTBM-NEXT:  .LBB44_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3088,22 +3767,22 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB34_2
+; X86-BMI1BMI2-NEXT:    je .LBB44_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB34_2:
+; X86-BMI1BMI2-NEXT:  .LBB44_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB34_4
+; X86-BMI1BMI2-NEXT:    jne .LBB44_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB34_4:
+; X86-BMI1BMI2-NEXT:  .LBB44_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB34_6
+; X86-BMI1BMI2-NEXT:    jne .LBB44_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB34_6:
+; X86-BMI1BMI2-NEXT:  .LBB44_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3149,26 +3828,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB35_2
+; X86-NOBMI-NEXT:    jne .LBB45_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB35_2:
+; X86-NOBMI-NEXT:  .LBB45_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB35_4
+; X86-NOBMI-NEXT:    jne .LBB45_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB35_4:
+; X86-NOBMI-NEXT:  .LBB45_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB35_6
+; X86-NOBMI-NEXT:    jne .LBB45_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB35_6:
+; X86-NOBMI-NEXT:  .LBB45_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3188,26 +3867,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB35_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB35_2:
+; X86-BMI1NOTBM-NEXT:  .LBB45_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB35_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB35_4:
+; X86-BMI1NOTBM-NEXT:  .LBB45_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB35_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB35_6:
+; X86-BMI1NOTBM-NEXT:  .LBB45_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3225,22 +3904,22 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB35_2
+; X86-BMI1BMI2-NEXT:    je .LBB45_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB35_2:
+; X86-BMI1BMI2-NEXT:  .LBB45_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB35_4
+; X86-BMI1BMI2-NEXT:    jne .LBB45_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB35_4:
+; X86-BMI1BMI2-NEXT:  .LBB45_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB35_6
+; X86-BMI1BMI2-NEXT:    jne .LBB45_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB35_6:
+; X86-BMI1BMI2-NEXT:  .LBB45_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3290,26 +3969,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB36_2
+; X86-NOBMI-NEXT:    jne .LBB46_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB36_2:
+; X86-NOBMI-NEXT:  .LBB46_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB36_4
+; X86-NOBMI-NEXT:    jne .LBB46_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB36_4:
+; X86-NOBMI-NEXT:  .LBB46_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB36_6
+; X86-NOBMI-NEXT:    jne .LBB46_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB36_6:
+; X86-NOBMI-NEXT:  .LBB46_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3330,26 +4009,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB36_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB36_2:
+; X86-BMI1NOTBM-NEXT:  .LBB46_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB36_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB36_4:
+; X86-BMI1NOTBM-NEXT:  .LBB46_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB36_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB36_6:
+; X86-BMI1NOTBM-NEXT:  .LBB46_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3368,22 +4047,22 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB36_2
+; X86-BMI1BMI2-NEXT:    je .LBB46_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB36_2:
+; X86-BMI1BMI2-NEXT:  .LBB46_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB36_4
+; X86-BMI1BMI2-NEXT:    jne .LBB46_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB36_4:
+; X86-BMI1BMI2-NEXT:  .LBB46_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB36_6
+; X86-BMI1BMI2-NEXT:    jne .LBB46_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB36_6:
+; X86-BMI1BMI2-NEXT:  .LBB46_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3431,26 +4110,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB37_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB37_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB37_4
+; X86-NOBMI-NEXT:    jne .LBB47_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB37_4:
+; X86-NOBMI-NEXT:  .LBB47_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB37_6
+; X86-NOBMI-NEXT:    jne .LBB47_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB37_6:
+; X86-NOBMI-NEXT:  .LBB47_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3471,26 +4150,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB37_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB37_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB37_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB37_4:
+; X86-BMI1NOTBM-NEXT:  .LBB47_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB37_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB37_6:
+; X86-BMI1NOTBM-NEXT:  .LBB47_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3509,22 +4188,22 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB37_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB37_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB37_4
+; X86-BMI1BMI2-NEXT:    jne .LBB47_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB37_4:
+; X86-BMI1BMI2-NEXT:  .LBB47_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB37_6
+; X86-BMI1BMI2-NEXT:    jne .LBB47_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB37_6:
+; X86-BMI1BMI2-NEXT:  .LBB47_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3559,6 +4238,167 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
   ret i64 %masked
 }
 
+; 64-bit, but with 32-bit output
+
+; Everything done in 64-bit, truncation happens last.
+define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_d0:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movb $64, %cl
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:    shll %cl, %edx
+; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB48_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB48_4
+; X86-NOBMI-NEXT:  # %bb.3:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB48_4:
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_d0:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movb $64, %cl
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
+; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
+; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    je .LBB48_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
+; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
+; X86-BMI1NOTBM-NEXT:  # %bb.3:
+; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB48_4:
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_d0:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    movb $64, %cl
+; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl %eax, %edx
+; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB48_4
+; X86-BMI1BMI2-NEXT:  # %bb.3:
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
+; X86-BMI1BMI2-NEXT:  .LBB48_4:
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_d0:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_d0:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_d0:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %numhighbits = sub i64 64, %numlowbits
+  %highbitscleared = shl i64 %val, %numhighbits
+  %masked = lshr i64 %highbitscleared, %numhighbits
+  %res = trunc i64 %masked to i32
+  ret i32 %res
+}
+
+; Shifting happens in 64-bit, then truncation. Masking is 32-bit.
+define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_d1:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_d1:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    shll $8, %eax
+; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_d1:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_d1:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_d1:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    shll $8, %esi
+; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_d1:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    retq
+  %truncval = trunc i64 %val to i32
+  %numhighbits = sub i32 32, %numlowbits
+  %highbitscleared = shl i32 %truncval, %numhighbits
+  %masked = lshr i32 %highbitscleared, %numhighbits
+  ret i32 %masked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Constant mask
 ; ---------------------------------------------------------------------------- ;

From eb375098322ff28bbf1f1789bb659e2d7c88e646 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 2 Jun 2019 08:49:35 +0000
Subject: [PATCH 0838/1176] [IndVarSimplify] Add tests for saturating math on
 IV; NFC

These saturating math ops can be replaced with simple math.

llvm-svn: 362320
---
 .../IndVarSimplify/eliminate-sat.ll           | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll
new file mode 100644
index 0000000000000..b7c38041040ec
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-sat.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -indvars | FileCheck %s
+
+declare i32 @llvm.uadd.sat.i32(i32, i32)
+declare i32 @llvm.sadd.sat.i32(i32, i32)
+declare i32 @llvm.usub.sat.i32(i32, i32)
+declare i32 @llvm.ssub.sat.i32(i32, i32)
+
+define void @uadd_sat(i32* %p) {
+; CHECK-LABEL: @uadd_sat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SAT:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[I]], i32 1)
+; CHECK-NEXT:    store volatile i32 [[SAT]], i32* [[P:%.*]]
+; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %sat = call i32 @llvm.uadd.sat.i32(i32 %i, i32 1)
+  store volatile i32 %sat, i32* %p
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, 100
+  br i1 %cmp, label %loop, label %end
+
+end:
+  ret void
+}
+
+define void @sadd_sat(i32* %p) {
+; CHECK-LABEL: @sadd_sat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SAT:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[I]], i32 1)
+; CHECK-NEXT:    store volatile i32 [[SAT]], i32* [[P:%.*]]
+; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %sat = call i32 @llvm.sadd.sat.i32(i32 %i, i32 1)
+  store volatile i32 %sat, i32* %p
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, 100
+  br i1 %cmp, label %loop, label %end
+
+end:
+  ret void
+}
+
+define void @usub_sat(i32* %p) {
+; CHECK-LABEL: @usub_sat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SAT:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[I]], i32 1)
+; CHECK-NEXT:    store volatile i32 [[SAT]], i32* [[P:%.*]]
+; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ]
+  %sat = call i32 @llvm.usub.sat.i32(i32 %i, i32 1)
+  store volatile i32 %sat, i32* %p
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, 100
+  br i1 %cmp, label %loop, label %end
+
+end:
+  ret void
+}
+
+define void @ssub_sat(i32* %p) {
+; CHECK-LABEL: @ssub_sat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SAT:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[I]], i32 1)
+; CHECK-NEXT:    store volatile i32 [[SAT]], i32* [[P:%.*]]
+; CHECK-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[I_INC]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %sat = call i32 @llvm.ssub.sat.i32(i32 %i, i32 1)
+  store volatile i32 %sat, i32* %p
+  %i.inc = add nuw nsw i32 %i, 1
+  %cmp = icmp ne i32 %i.inc, 100
+  br i1 %cmp, label %loop, label %end
+
+end:
+  ret void
+}

From 30a6caa3e72de6e85c581c6c4e658798e4c0764e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 10:03:56 +0000
Subject: [PATCH 0839/1176] [TargetLowering] SimplifyDemandedVectorElts - use
 same arg names as SimplifyDemandedBits. NFCI.

Helps with debugging as we recurse between them.

llvm-svn: 362321
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4ad578d80fab6..43251d06ef1d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1801,11 +1801,11 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
 }
 
 bool TargetLowering::SimplifyDemandedVectorElts(
-    SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+    SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
     bool AssumeSingleUse) const {
   EVT VT = Op.getValueType();
-  APInt DemandedElts = DemandedEltMask;
+  APInt DemandedElts = OriginalDemandedElts;
   unsigned NumElts = DemandedElts.getBitWidth();
   assert(VT.isVector() && "Expected vector op");
   assert(VT.getVectorNumElements() == NumElts &&
@@ -2237,8 +2237,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
     } else {
       KnownBits Known;
       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
-      if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
-                               Depth, AssumeSingleUse))
+      if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+                               TLO, Depth, AssumeSingleUse))
         return true;
     }
     break;

From 88522ce38892808818c835497e850fba6ae3448c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 10:12:55 +0000
Subject: [PATCH 0840/1176] [TargetLowering] SimplifyDemandedBits - don't use
 OriginalDemanded variables in analysis.

These might have been replaced in multiple use cases.

llvm-svn: 362322
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 43251d06ef1d9..785530b3605b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -676,27 +676,27 @@ bool TargetLowering::SimplifyDemandedBits(
 
     // If index isn't constant, assume we need all vector elements AND the
     // inserted element.
-    APInt DemandedVecElts(OriginalDemandedElts);
+    APInt DemandedVecElts(DemandedElts);
     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
       unsigned Idx = CIdx->getZExtValue();
       DemandedVecElts.clearBit(Idx);
 
       // Inserted element is not required.
-      if (!OriginalDemandedElts[Idx])
+      if (!DemandedElts[Idx])
         return TLO.CombineTo(Op, Vec);
     }
 
     KnownBits KnownScl;
     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
-    APInt DemandedSclBits = OriginalDemandedBits.zextOrTrunc(NumSclBits);
+    APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
       return true;
 
     Known = KnownScl.zextOrTrunc(BitWidth, false);
 
     KnownBits KnownVec;
-    if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
-                             KnownVec, TLO, Depth + 1))
+    if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+                             Depth + 1))
       return true;
 
     if (!!DemandedVecElts) {

From ffb4d2bff7f1024aa3accd1c387017003b8ce94d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 11:56:39 +0000
Subject: [PATCH 0841/1176] [DAG] isBitwiseNot / isConstOrConstSplat - add
 support for build vector undefs + truncation (PR41020)

Add (opt-in) support for implicit truncation to isConstOrConstSplat, which allows us to match truncated 'all ones' cases in isBitwiseNot.

PR41020 compares against using ISD::isBuildVectorAllOnes() instead, but that predicate silently accepts any UNDEF elements in the build vector which might not be what we want in isBitwiseNot - so I've added an opt-in 'AllowUndefs' flag that is set to false by default but will allow us to enable it on individual cases where its safe.

Differential Revision: https://reviews.llvm.org/D62783

llvm-svn: 362323
---
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h |  8 ++--
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 41 +++++++++++++------
 llvm/test/CodeGen/AArch64/sat-add.ll          | 24 ++++-------
 3 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 4e4c0e57d6325..370c3a438d12b 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1649,15 +1649,17 @@ SDValue peekThroughExtractSubvectors(SDValue V);
 
 /// Returns true if \p V is a bitwise not operation. Assumes that an all ones
 /// constant is canonicalized to be operand 1.
-bool isBitwiseNot(SDValue V);
+bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
+                                    bool AllowTruncation = false);
 
 /// Returns the SDNode if it is a demanded constant splat BuildVector or
 /// constant int.
 ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
-                                    bool AllowUndefs = false);
+                                    bool AllowUndefs = false,
+                                    bool AllowTruncation = false);
 
 /// Returns the SDNode if it is a constant splat BuildVector or constant float.
 ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7cb7e17d55a2b..d6d8cf54cb01d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8640,14 +8640,18 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
   return V;
 }
 
-bool llvm::isBitwiseNot(SDValue V) {
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
   if (V.getOpcode() != ISD::XOR)
     return false;
-  ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
-  return C && C->isAllOnesValue();
+  V = peekThroughBitcasts(V.getOperand(1));
+  unsigned NumBits = V.getScalarValueSizeInBits();
+  ConstantSDNode *C =
+      isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+  return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
 }
 
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+                                          bool AllowTruncation) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8655,17 +8659,23 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
     BitVector UndefElements;
     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
 
-    // BuildVectors can truncate their operands. Ignore that case here.
-    if (CN && (UndefElements.none() || AllowUndefs) &&
-        CN->getValueType(0) == N.getValueType().getScalarType())
-      return CN;
+    // BuildVectors can truncate their operands. Ignore that case here unless
+    // AllowTruncation is set.
+    if (CN && (UndefElements.none() || AllowUndefs)) {
+      EVT CVT = CN->getValueType(0);
+      EVT NSVT = N.getValueType().getScalarType();
+      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+      if (AllowTruncation || (CVT == NSVT))
+        return CN;
+    }
   }
 
   return nullptr;
 }
 
 ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
-                                          bool AllowUndefs) {
+                                          bool AllowUndefs,
+                                          bool AllowTruncation) {
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
     return CN;
 
@@ -8673,10 +8683,15 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
     BitVector UndefElements;
     ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
 
-    // BuildVectors can truncate their operands. Ignore that case here.
-    if (CN && (UndefElements.none() || AllowUndefs) &&
-        CN->getValueType(0) == N.getValueType().getScalarType())
-      return CN;
+    // BuildVectors can truncate their operands. Ignore that case here unless
+    // AllowTruncation is set.
+    if (CN && (UndefElements.none() || AllowUndefs)) {
+      EVT CVT = CN->getValueType(0);
+      EVT NSVT = N.getValueType().getScalarType();
+      assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+      if (AllowTruncation || (CVT == NSVT))
+        return CN;
+    }
   }
 
   return nullptr;
diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll
index 36e63f3594b4a..8e54d91662775 100644
--- a/llvm/test/CodeGen/AArch64/sat-add.ll
+++ b/llvm/test/CodeGen/AArch64/sat-add.ll
@@ -364,8 +364,7 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
 ; CHECK-NEXT:    movi v1.16b, #42
 ; CHECK-NEXT:    add v1.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmhi v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
   %c = icmp ugt <16 x i8> %x, %a
@@ -380,8 +379,7 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) {
 ; CHECK-NEXT:    movi v2.16b, #213
 ; CHECK-NEXT:    add v1.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmhi v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
   %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43>
@@ -409,8 +407,7 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
 ; CHECK-NEXT:    movi v1.8h, #42
 ; CHECK-NEXT:    add v1.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    cmhi v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
   %c = icmp ugt <8 x i16> %x, %a
@@ -425,8 +422,7 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) {
 ; CHECK-NEXT:    mvni v2.8h, #42
 ; CHECK-NEXT:    add v1.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    cmhi v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
   %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43>
@@ -545,8 +541,7 @@ define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add v1.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmhi v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <16 x i8> %x, %y
   %c = icmp ugt <16 x i8> %x, %a
@@ -560,8 +555,7 @@ define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16
 ; CHECK-NEXT:    mvn v2.16b, v1.16b
 ; CHECK-NEXT:    add v1.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmhi v0.16b, v0.16b, v2.16b
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %a = add <16 x i8> %x, %y
@@ -589,8 +583,7 @@ define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add v1.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    cmhi v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %a = add <8 x i16> %x, %y
   %c = icmp ugt <8 x i16> %x, %a
@@ -604,8 +597,7 @@ define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8
 ; CHECK-NEXT:    mvn v2.16b, v1.16b
 ; CHECK-NEXT:    add v1.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    cmhi v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    bic v1.16b, v1.16b, v0.16b
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %a = add <8 x i16> %x, %y

From 7a869e70367757ca322cdec534504881f11ec731 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 14:42:11 +0000
Subject: [PATCH 0842/1176] [DAGCombine] Fold
 insert_subvector(bitcast(x),bitcast(y),c1) ->
 bitcast(insert_subvector(x,y),c2)

Move this combine from x86 into generic DAGCombine, which currently only manages cases where the bitcast is between types of the same scalarsize.

Differential Revision: https://reviews.llvm.org/D59188

llvm-svn: 362324
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 37 +++++++++++++++++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 36 ------------------
 2 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4ed17440abc03..949c14f3ce46b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18780,6 +18780,43 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
 
   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
 
+  // Push subvector bitcasts to the output, adjusting the index as we go.
+  // insert_subvector(bitcast(v), bitcast(s), c1)
+  // -> bitcast(insert_subvector(v, s, c2))
+  if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+      N1.getOpcode() == ISD::BITCAST) {
+    SDValue N0Src = peekThroughBitcasts(N0);
+    SDValue N1Src = peekThroughBitcasts(N1);
+    EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+    EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+    if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+        N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+      EVT NewVT;
+      SDLoc DL(N);
+      SDValue NewIdx;
+      MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+      LLVMContext &Ctx = *DAG.getContext();
+      unsigned NumElts = VT.getVectorNumElements();
+      unsigned EltSizeInBits = VT.getScalarSizeInBits();
+      if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+        unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+        NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+        NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+      } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+        unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+        if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+          NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+          NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+        }
+      }
+      if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+        SDValue Res = DAG.getBitcast(NewVT, N0Src);
+        Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+        return DAG.getBitcast(VT, Res);
+      }
+    }
+  }
+
   // Canonicalize insert_subvector dag nodes.
   // Example:
   // (insert_subvector (insert_subvector A, Idx0), Idx1)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 253f4487976c5..5bdcb89b8f501 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43066,42 +43066,6 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Push subvector bitcasts to the output, adjusting the index as we go.
-  // insert_subvector(bitcast(v), bitcast(s), c1) ->
-  // bitcast(insert_subvector(v,s,c2))
-  // TODO: Move this to generic - which only supports same scalar sizes.
-  if ((Vec.isUndef() || Vec.getOpcode() == ISD::BITCAST) &&
-      SubVec.getOpcode() == ISD::BITCAST) {
-    SDValue VecSrc = peekThroughBitcasts(Vec);
-    SDValue SubVecSrc = peekThroughBitcasts(SubVec);
-    MVT VecSrcSVT = VecSrc.getSimpleValueType().getScalarType();
-    MVT SubVecSrcSVT = SubVecSrc.getSimpleValueType().getScalarType();
-    if (Vec.isUndef() || VecSrcSVT == SubVecSrcSVT) {
-      MVT NewOpVT;
-      SDValue NewIdx;
-      unsigned NumElts = OpVT.getVectorNumElements();
-      unsigned EltSizeInBits = OpVT.getScalarSizeInBits();
-      if ((EltSizeInBits % SubVecSrcSVT.getSizeInBits()) == 0) {
-        unsigned Scale = EltSizeInBits / SubVecSrcSVT.getSizeInBits();
-        NewOpVT = MVT::getVectorVT(SubVecSrcSVT, NumElts * Scale);
-        NewIdx = DAG.getIntPtrConstant(IdxVal * Scale, dl);
-      } else if ((SubVecSrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
-        unsigned Scale = SubVecSrcSVT.getSizeInBits() / EltSizeInBits;
-        if ((IdxVal % Scale) == 0) {
-          NewOpVT = MVT::getVectorVT(SubVecSrcSVT, NumElts / Scale);
-          NewIdx = DAG.getIntPtrConstant(IdxVal / Scale, dl);
-        }
-      }
-      if (NewIdx && DAG.getTargetLoweringInfo().isOperationLegal(
-                        ISD::INSERT_SUBVECTOR, NewOpVT)) {
-        SDValue Res = DAG.getBitcast(NewOpVT, VecSrc);
-        Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewOpVT, Res, SubVecSrc,
-                          NewIdx);
-        return DAG.getBitcast(OpVT, Res);
-      }
-    }
-  }
-
   // Match concat_vector style patterns.
   SmallVector<SDValue, 2> SubVectorOps;
   if (collectConcatOps(N, SubVectorOps))

From 2065ddfd79bb938f16e5662f68c70da3d5bdcd10 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 2 Jun 2019 15:07:49 +0000
Subject: [PATCH 0843/1176] [NFC][X86] extract-lowbits.ll: add one more pattern
 a with truncation

We are also free to interpret this as 'BZHI'/'BEXTR'.
https://rise4fun.com/Alive/dD6

llvm-svn: 362325
---
 llvm/test/CodeGen/X86/extract-bits.ll    | 615 ++++++++++++++---------
 llvm/test/CodeGen/X86/extract-lowbits.ll | 326 +++++++-----
 2 files changed, 581 insertions(+), 360 deletions(-)

diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index 7921e9685cefc..6a594c18d4e05 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -3780,6 +3780,141 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
   ret i32 %res
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bextr64_32_b3:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl %edi, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrdl %cl, %edi, %esi
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB33_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:  .LBB33_2:
+; X86-NOBMI-NEXT:    movl $-1, %esi
+; X86-NOBMI-NEXT:    movl %edx, %ecx
+; X86-NOBMI-NEXT:    shll %cl, %esi
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
+; X86-NOBMI-NEXT:    testb $32, %dl
+; X86-NOBMI-NEXT:    jne .LBB33_4
+; X86-NOBMI-NEXT:  # %bb.3:
+; X86-NOBMI-NEXT:    movl %esi, %ecx
+; X86-NOBMI-NEXT:  .LBB33_4:
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl %ecx, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bextr64_32_b3:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB33_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
+; X86-BMI1NOTBM-NEXT:  .LBB33_2:
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
+; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    testb $32, %al
+; X86-BMI1NOTBM-NEXT:    jne .LBB33_4
+; X86-BMI1NOTBM-NEXT:  # %bb.3:
+; X86-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X86-BMI1NOTBM-NEXT:  .LBB33_4:
+; X86-BMI1NOTBM-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bextr64_32_b3:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB33_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI1BMI2-NEXT:  .LBB33_2:
+; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI1BMI2-NEXT:    testb $32, %al
+; X86-BMI1BMI2-NEXT:    jne .LBB33_4
+; X86-BMI1BMI2-NEXT:  # %bb.3:
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI1BMI2-NEXT:  .LBB33_4:
+; X86-BMI1BMI2-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bextr64_32_b3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rdi
+; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    movl $4294967295, %esi # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    shlq %cl, %rsi
+; X64-NOBMI-NEXT:    xorl %esi, %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bextr64_32_b3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %esi # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rsi
+; X64-BMI1NOTBM-NEXT:    xorl %esi, %eax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bextr64_32_b3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-BMI1BMI2-NEXT:    shlxq %rdx, %rcx, %rdx
+; X64-BMI1BMI2-NEXT:    xorl %edx, %ecx
+; X64-BMI1BMI2-NEXT:    andl %ecx, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %shiftedval = lshr i64 %val, %numskipbits
+  %widenumlowbits = zext i8 %numlowbits to i64
+  %notmask = shl nsw i64 4294967295, %widenumlowbits
+  %mask = xor i64 %notmask, 4294967295
+  %wideres = and i64 %shiftedval, %mask
+  %res = trunc i64 %wideres to i32
+  ret i32 %res
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern c. 32-bit
 ; ---------------------------------------------------------------------------- ;
@@ -4641,11 +4776,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB39_2
+; X86-NOBMI-NEXT:    je .LBB40_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB39_2:
+; X86-NOBMI-NEXT:  .LBB40_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -4653,11 +4788,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB39_4
+; X86-NOBMI-NEXT:    je .LBB40_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB39_4:
+; X86-NOBMI-NEXT:  .LBB40_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -4688,11 +4823,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB39_2
+; X86-BMI1NOTBM-NEXT:    je .LBB40_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB39_2:
+; X86-BMI1NOTBM-NEXT:  .LBB40_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -4700,11 +4835,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB39_4
+; X86-BMI1NOTBM-NEXT:    je .LBB40_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB39_4:
+; X86-BMI1NOTBM-NEXT:  .LBB40_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -4734,22 +4869,22 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB39_2
+; X86-BMI1BMI2-NEXT:    je .LBB40_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB39_2:
+; X86-BMI1BMI2-NEXT:  .LBB40_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB39_4
+; X86-BMI1BMI2-NEXT:    je .LBB40_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB39_4:
+; X86-BMI1BMI2-NEXT:  .LBB40_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -4850,11 +4985,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB40_2
+; X86-NOBMI-NEXT:    je .LBB41_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB40_2:
+; X86-NOBMI-NEXT:  .LBB41_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -4862,11 +4997,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB40_4
+; X86-NOBMI-NEXT:    je .LBB41_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB40_4:
+; X86-NOBMI-NEXT:  .LBB41_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -4897,11 +5032,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB40_2
+; X86-BMI1NOTBM-NEXT:    je .LBB41_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB40_2:
+; X86-BMI1NOTBM-NEXT:  .LBB41_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -4909,11 +5044,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB40_4
+; X86-BMI1NOTBM-NEXT:    je .LBB41_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB40_4:
+; X86-BMI1NOTBM-NEXT:  .LBB41_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -4943,22 +5078,22 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB40_2
+; X86-BMI1BMI2-NEXT:    je .LBB41_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB40_2:
+; X86-BMI1BMI2-NEXT:  .LBB41_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB40_4
+; X86-BMI1BMI2-NEXT:    je .LBB41_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB40_4:
+; X86-BMI1BMI2-NEXT:  .LBB41_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5063,11 +5198,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB41_2
+; X86-NOBMI-NEXT:    je .LBB42_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB41_2:
+; X86-NOBMI-NEXT:  .LBB42_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5075,11 +5210,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB41_4
+; X86-NOBMI-NEXT:    je .LBB42_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB41_4:
+; X86-NOBMI-NEXT:  .LBB42_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5111,11 +5246,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_2
+; X86-BMI1NOTBM-NEXT:    je .LBB42_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB41_2:
+; X86-BMI1NOTBM-NEXT:  .LBB42_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5123,11 +5258,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_4
+; X86-BMI1NOTBM-NEXT:    je .LBB42_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB41_4:
+; X86-BMI1NOTBM-NEXT:  .LBB42_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5158,22 +5293,22 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB41_2
+; X86-BMI1BMI2-NEXT:    je .LBB42_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB41_2:
+; X86-BMI1BMI2-NEXT:  .LBB42_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB41_4
+; X86-BMI1BMI2-NEXT:    je .LBB42_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB41_4:
+; X86-BMI1BMI2-NEXT:  .LBB42_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5276,11 +5411,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB42_2
+; X86-NOBMI-NEXT:    je .LBB43_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB42_2:
+; X86-NOBMI-NEXT:  .LBB43_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5288,11 +5423,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB42_4
+; X86-NOBMI-NEXT:    je .LBB43_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB42_4:
+; X86-NOBMI-NEXT:  .LBB43_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5324,11 +5459,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_2
+; X86-BMI1NOTBM-NEXT:    je .LBB43_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB42_2:
+; X86-BMI1NOTBM-NEXT:  .LBB43_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5336,11 +5471,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_4
+; X86-BMI1NOTBM-NEXT:    je .LBB43_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB42_4:
+; X86-BMI1NOTBM-NEXT:  .LBB43_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5371,22 +5506,22 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB42_2
+; X86-BMI1BMI2-NEXT:    je .LBB43_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB42_2:
+; X86-BMI1BMI2-NEXT:  .LBB43_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB42_4
+; X86-BMI1BMI2-NEXT:    je .LBB43_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB42_4:
+; X86-BMI1BMI2-NEXT:  .LBB43_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5491,11 +5626,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB43_2
+; X86-NOBMI-NEXT:    je .LBB44_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB43_2:
+; X86-NOBMI-NEXT:  .LBB44_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5503,11 +5638,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB43_4
+; X86-NOBMI-NEXT:    je .LBB44_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB43_4:
+; X86-NOBMI-NEXT:  .LBB44_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5538,11 +5673,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_2
+; X86-BMI1NOTBM-NEXT:    je .LBB44_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB43_2:
+; X86-BMI1NOTBM-NEXT:  .LBB44_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5550,11 +5685,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_4
+; X86-BMI1NOTBM-NEXT:    je .LBB44_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB43_4:
+; X86-BMI1NOTBM-NEXT:  .LBB44_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5584,22 +5719,22 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB43_2
+; X86-BMI1BMI2-NEXT:    je .LBB44_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB43_2:
+; X86-BMI1BMI2-NEXT:  .LBB44_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB43_4
+; X86-BMI1BMI2-NEXT:    je .LBB44_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB43_4:
+; X86-BMI1BMI2-NEXT:  .LBB44_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5700,11 +5835,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB44_2
+; X86-NOBMI-NEXT:    je .LBB45_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB44_2:
+; X86-NOBMI-NEXT:  .LBB45_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
@@ -5712,11 +5847,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrl %cl, %ebp
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB44_4
+; X86-NOBMI-NEXT:    je .LBB45_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebp, %ebx
 ; X86-NOBMI-NEXT:    xorl %ebp, %ebp
-; X86-NOBMI-NEXT:  .LBB44_4:
+; X86-NOBMI-NEXT:  .LBB45_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebp
 ; X86-NOBMI-NEXT:    pushl %ebx
@@ -5752,11 +5887,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_2
+; X86-BMI1NOTBM-NEXT:    je .LBB45_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB44_2:
+; X86-BMI1NOTBM-NEXT:  .LBB45_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
@@ -5764,11 +5899,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_4
+; X86-BMI1NOTBM-NEXT:    je .LBB45_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB44_4:
+; X86-BMI1NOTBM-NEXT:  .LBB45_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
@@ -5803,22 +5938,22 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_2
+; X86-BMI1BMI2-NEXT:    je .LBB45_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB44_2:
+; X86-BMI1BMI2-NEXT:  .LBB45_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_4
+; X86-BMI1BMI2-NEXT:    je .LBB45_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB44_4:
+; X86-BMI1BMI2-NEXT:  .LBB45_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
@@ -5932,10 +6067,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB45_2
+; X86-NOBMI-NEXT:    jne .LBB46_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB45_2:
+; X86-NOBMI-NEXT:  .LBB46_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %esi
@@ -5943,10 +6078,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB45_4
+; X86-NOBMI-NEXT:    jne .LBB46_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB45_4:
+; X86-NOBMI-NEXT:  .LBB46_4:
 ; X86-NOBMI-NEXT:    andl %edx, %eax
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
@@ -5961,10 +6096,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB45_2:
+; X86-BMI1NOTBM-NEXT:  .LBB46_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
@@ -5972,10 +6107,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB45_4:
+; X86-BMI1NOTBM-NEXT:  .LBB46_4:
 ; X86-BMI1NOTBM-NEXT:    andl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
@@ -5988,20 +6123,20 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_2
+; X86-BMI1BMI2-NEXT:    je .LBB46_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB45_2:
+; X86-BMI1BMI2-NEXT:  .LBB46_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_4
+; X86-BMI1BMI2-NEXT:    je .LBB46_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB45_4:
+; X86-BMI1BMI2-NEXT:  .LBB46_4:
 ; X86-BMI1BMI2-NEXT:    andl %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6061,10 +6196,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB46_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -6085,10 +6220,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -6104,10 +6239,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6160,10 +6295,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -6184,10 +6319,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -6203,10 +6338,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6627,36 +6762,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB53_2
+; X86-NOBMI-NEXT:    je .LBB54_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB53_2:
+; X86-NOBMI-NEXT:  .LBB54_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB53_4
+; X86-NOBMI-NEXT:    jne .LBB54_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB53_4:
+; X86-NOBMI-NEXT:  .LBB54_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB53_6
+; X86-NOBMI-NEXT:    jne .LBB54_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB53_6:
+; X86-NOBMI-NEXT:  .LBB54_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB53_8
+; X86-NOBMI-NEXT:    jne .LBB54_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB53_8:
+; X86-NOBMI-NEXT:  .LBB54_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -6675,36 +6810,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB53_2
+; X86-BMI1NOTBM-NEXT:    je .LBB54_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB53_2:
+; X86-BMI1NOTBM-NEXT:  .LBB54_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB53_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB54_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB53_4:
+; X86-BMI1NOTBM-NEXT:  .LBB54_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB53_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB54_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB53_6:
+; X86-BMI1NOTBM-NEXT:  .LBB54_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB53_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB54_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB53_8:
+; X86-BMI1NOTBM-NEXT:  .LBB54_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -6721,32 +6856,32 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB53_2
+; X86-BMI1BMI2-NEXT:    je .LBB54_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB53_2:
+; X86-BMI1BMI2-NEXT:  .LBB54_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB53_4
+; X86-BMI1BMI2-NEXT:    je .LBB54_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB53_4:
+; X86-BMI1BMI2-NEXT:  .LBB54_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB53_6
+; X86-BMI1BMI2-NEXT:    jne .LBB54_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB53_6:
+; X86-BMI1BMI2-NEXT:  .LBB54_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB53_8
+; X86-BMI1BMI2-NEXT:    jne .LBB54_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB53_8:
+; X86-BMI1BMI2-NEXT:  .LBB54_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6797,36 +6932,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB54_2
+; X86-NOBMI-NEXT:    je .LBB55_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB54_2:
+; X86-NOBMI-NEXT:  .LBB55_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB54_4
+; X86-NOBMI-NEXT:    jne .LBB55_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB54_4:
+; X86-NOBMI-NEXT:  .LBB55_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB54_6
+; X86-NOBMI-NEXT:    jne .LBB55_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB54_6:
+; X86-NOBMI-NEXT:  .LBB55_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB54_8
+; X86-NOBMI-NEXT:    jne .LBB55_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB54_8:
+; X86-NOBMI-NEXT:  .LBB55_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -6845,36 +6980,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB54_2
+; X86-BMI1NOTBM-NEXT:    je .LBB55_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB54_2:
+; X86-BMI1NOTBM-NEXT:  .LBB55_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB54_4:
+; X86-BMI1NOTBM-NEXT:  .LBB55_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB54_6:
+; X86-BMI1NOTBM-NEXT:  .LBB55_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB54_8:
+; X86-BMI1NOTBM-NEXT:  .LBB55_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -6891,32 +7026,32 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB54_2
+; X86-BMI1BMI2-NEXT:    je .LBB55_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB54_2:
+; X86-BMI1BMI2-NEXT:  .LBB55_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB54_4
+; X86-BMI1BMI2-NEXT:    je .LBB55_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB54_4:
+; X86-BMI1BMI2-NEXT:  .LBB55_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB54_6
+; X86-BMI1BMI2-NEXT:    jne .LBB55_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB54_6:
+; X86-BMI1BMI2-NEXT:  .LBB55_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB54_8
+; X86-BMI1BMI2-NEXT:    jne .LBB55_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB54_8:
+; X86-BMI1BMI2-NEXT:  .LBB55_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6972,36 +7107,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB55_2
+; X86-NOBMI-NEXT:    je .LBB56_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB55_2:
+; X86-NOBMI-NEXT:  .LBB56_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB55_4
+; X86-NOBMI-NEXT:    jne .LBB56_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB55_4:
+; X86-NOBMI-NEXT:  .LBB56_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB55_6
+; X86-NOBMI-NEXT:    jne .LBB56_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB55_6:
+; X86-NOBMI-NEXT:  .LBB56_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB55_8
+; X86-NOBMI-NEXT:    jne .LBB56_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB55_8:
+; X86-NOBMI-NEXT:  .LBB56_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7021,36 +7156,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB55_2
+; X86-BMI1NOTBM-NEXT:    je .LBB56_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_2:
+; X86-BMI1NOTBM-NEXT:  .LBB56_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB55_4:
+; X86-BMI1NOTBM-NEXT:  .LBB56_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB55_6:
+; X86-BMI1NOTBM-NEXT:  .LBB56_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_8:
+; X86-BMI1NOTBM-NEXT:  .LBB56_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7068,32 +7203,32 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_2
+; X86-BMI1BMI2-NEXT:    je .LBB56_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB55_2:
+; X86-BMI1BMI2-NEXT:  .LBB56_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_4
+; X86-BMI1BMI2-NEXT:    je .LBB56_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB55_4:
+; X86-BMI1BMI2-NEXT:  .LBB56_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB55_6
+; X86-BMI1BMI2-NEXT:    jne .LBB56_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB55_6:
+; X86-BMI1BMI2-NEXT:  .LBB56_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB55_8
+; X86-BMI1BMI2-NEXT:    jne .LBB56_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB55_8:
+; X86-BMI1BMI2-NEXT:  .LBB56_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7146,36 +7281,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB56_2
+; X86-NOBMI-NEXT:    je .LBB57_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB56_2:
+; X86-NOBMI-NEXT:  .LBB57_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB56_4
+; X86-NOBMI-NEXT:    jne .LBB57_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB56_4:
+; X86-NOBMI-NEXT:  .LBB57_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB56_6
+; X86-NOBMI-NEXT:    jne .LBB57_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB56_6:
+; X86-NOBMI-NEXT:  .LBB57_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB56_8
+; X86-NOBMI-NEXT:    jne .LBB57_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB56_8:
+; X86-NOBMI-NEXT:  .LBB57_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7195,36 +7330,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB56_2
+; X86-BMI1NOTBM-NEXT:    je .LBB57_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_2:
+; X86-BMI1NOTBM-NEXT:  .LBB57_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB56_4:
+; X86-BMI1NOTBM-NEXT:  .LBB57_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB56_6:
+; X86-BMI1NOTBM-NEXT:  .LBB57_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_8:
+; X86-BMI1NOTBM-NEXT:  .LBB57_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7242,32 +7377,32 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_2
+; X86-BMI1BMI2-NEXT:    je .LBB57_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB56_2:
+; X86-BMI1BMI2-NEXT:  .LBB57_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_4
+; X86-BMI1BMI2-NEXT:    je .LBB57_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB56_4:
+; X86-BMI1BMI2-NEXT:  .LBB57_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB56_6
+; X86-BMI1BMI2-NEXT:    jne .LBB57_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB56_6:
+; X86-BMI1BMI2-NEXT:  .LBB57_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB56_8
+; X86-BMI1BMI2-NEXT:    jne .LBB57_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB56_8:
+; X86-BMI1BMI2-NEXT:  .LBB57_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7326,37 +7461,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %al
-; X86-NOBMI-NEXT:    je .LBB57_2
+; X86-NOBMI-NEXT:    je .LBB58_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:  .LBB57_2:
+; X86-NOBMI-NEXT:  .LBB58_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
-; X86-NOBMI-NEXT:    jne .LBB57_4
+; X86-NOBMI-NEXT:    jne .LBB58_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebp
-; X86-NOBMI-NEXT:  .LBB57_4:
+; X86-NOBMI-NEXT:  .LBB58_4:
 ; X86-NOBMI-NEXT:    movl %ebp, %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edi
-; X86-NOBMI-NEXT:    jne .LBB57_6
+; X86-NOBMI-NEXT:    jne .LBB58_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %edx
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:  .LBB57_6:
+; X86-NOBMI-NEXT:  .LBB58_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    jne .LBB57_8
+; X86-NOBMI-NEXT:    jne .LBB58_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %edx, %esi
-; X86-NOBMI-NEXT:  .LBB57_8:
+; X86-NOBMI-NEXT:  .LBB58_8:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ecx
 ; X86-NOBMI-NEXT:    pushl %eax
@@ -7387,37 +7522,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB57_2
+; X86-BMI1NOTBM-NEXT:    je .LBB58_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB57_2:
+; X86-BMI1NOTBM-NEXT:  .LBB58_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB57_4:
+; X86-BMI1NOTBM-NEXT:  .LBB58_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %esi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB57_6:
+; X86-BMI1NOTBM-NEXT:  .LBB58_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB57_8:
+; X86-BMI1NOTBM-NEXT:  .LBB58_8:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ecx
 ; X86-BMI1NOTBM-NEXT:    pushl %eax
@@ -7445,33 +7580,33 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB57_2
+; X86-BMI1BMI2-NEXT:    je .LBB58_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB57_2:
+; X86-BMI1BMI2-NEXT:  .LBB58_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_4
+; X86-BMI1BMI2-NEXT:    je .LBB58_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
 ; X86-BMI1BMI2-NEXT:    movl $0, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB57_4:
+; X86-BMI1BMI2-NEXT:  .LBB58_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    jne .LBB57_6
+; X86-BMI1BMI2-NEXT:    jne .LBB58_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB57_6:
+; X86-BMI1BMI2-NEXT:  .LBB58_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    jne .LBB57_8
+; X86-BMI1BMI2-NEXT:    jne .LBB58_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
-; X86-BMI1BMI2-NEXT:  .LBB57_8:
+; X86-BMI1BMI2-NEXT:  .LBB58_8:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ecx
 ; X86-BMI1BMI2-NEXT:    pushl %eax
@@ -7545,28 +7680,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB58_2
+; X86-NOBMI-NEXT:    je .LBB59_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB58_2:
+; X86-NOBMI-NEXT:  .LBB59_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB58_4
+; X86-NOBMI-NEXT:    je .LBB59_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB58_4:
+; X86-NOBMI-NEXT:  .LBB59_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB58_6
+; X86-NOBMI-NEXT:    jne .LBB59_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB58_6:
+; X86-NOBMI-NEXT:  .LBB59_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -7580,28 +7715,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB58_2
+; X86-BMI1NOTBM-NEXT:    je .LBB59_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_2:
+; X86-BMI1NOTBM-NEXT:  .LBB59_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB58_4
+; X86-BMI1NOTBM-NEXT:    je .LBB59_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB58_4:
+; X86-BMI1NOTBM-NEXT:  .LBB59_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_6:
+; X86-BMI1NOTBM-NEXT:  .LBB59_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -7613,27 +7748,27 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_2
+; X86-BMI1BMI2-NEXT:    je .LBB59_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB58_2:
+; X86-BMI1BMI2-NEXT:  .LBB59_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_4
+; X86-BMI1BMI2-NEXT:    je .LBB59_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB58_4:
+; X86-BMI1BMI2-NEXT:  .LBB59_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_6
+; X86-BMI1BMI2-NEXT:    je .LBB59_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB58_6:
+; X86-BMI1BMI2-NEXT:  .LBB59_6:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_d0:
@@ -7684,10 +7819,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB59_2
+; X86-NOBMI-NEXT:    jne .LBB60_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB59_2:
+; X86-NOBMI-NEXT:  .LBB60_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -7708,10 +7843,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB60_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB59_2:
+; X86-BMI1NOTBM-NEXT:  .LBB60_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -7727,10 +7862,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_2
+; X86-BMI1BMI2-NEXT:    je .LBB60_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB59_2:
+; X86-BMI1BMI2-NEXT:  .LBB60_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index e5aecb07708d3..5757d6f6bc581 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -1907,6 +1907,92 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
   ret i32 %res
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_b3:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    shll %cl, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB27_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB27_2:
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_b3:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
+; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB27_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
+; X86-BMI1NOTBM-NEXT:  .LBB27_2:
+; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_b3:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI1BMI2-NEXT:    testb $32, %al
+; X86-BMI1BMI2-NEXT:    jne .LBB27_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI1BMI2-NEXT:  .LBB27_2:
+; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_b3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shlq %cl, %rdx
+; X64-NOBMI-NEXT:    xorl %edx, %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_b3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rdx
+; X64-BMI1NOTBM-NEXT:    xorl %edx, %eax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_b3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rcx
+; X64-BMI1BMI2-NEXT:    xorl %ecx, %eax
+; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %widenumlowbits = zext i8 %numlowbits to i64
+  %notmask = shl nsw i64 4294967295, %widenumlowbits
+  %mask = xor i64 %notmask, 4294967295
+  %wideres = and i64 %val, %mask
+  %res = trunc i64 %wideres to i32
+  ret i32 %res
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern c. 32-bit
 ; ---------------------------------------------------------------------------- ;
@@ -2495,11 +2581,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB32_2
+; X86-NOBMI-NEXT:    je .LBB33_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB32_2:
+; X86-NOBMI-NEXT:  .LBB33_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2526,11 +2612,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB32_2
+; X86-BMI1NOTBM-NEXT:    je .LBB33_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB32_2:
+; X86-BMI1NOTBM-NEXT:  .LBB33_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2556,11 +2642,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB32_2
+; X86-BMI1BMI2-NEXT:    je .LBB33_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB32_2:
+; X86-BMI1BMI2-NEXT:  .LBB33_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2652,11 +2738,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB33_2
+; X86-NOBMI-NEXT:    je .LBB34_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB33_2:
+; X86-NOBMI-NEXT:  .LBB34_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2683,11 +2769,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB33_2
+; X86-BMI1NOTBM-NEXT:    je .LBB34_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB33_2:
+; X86-BMI1NOTBM-NEXT:  .LBB34_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2713,11 +2799,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB33_2
+; X86-BMI1BMI2-NEXT:    je .LBB34_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB33_2:
+; X86-BMI1BMI2-NEXT:  .LBB34_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2811,11 +2897,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB34_2
+; X86-NOBMI-NEXT:    je .LBB35_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB34_2:
+; X86-NOBMI-NEXT:  .LBB35_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -2845,11 +2931,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB34_2
+; X86-BMI1NOTBM-NEXT:    je .LBB35_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB34_2:
+; X86-BMI1NOTBM-NEXT:  .LBB35_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -2878,11 +2964,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB34_2
+; X86-BMI1BMI2-NEXT:    je .LBB35_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB34_2:
+; X86-BMI1BMI2-NEXT:  .LBB35_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -2965,11 +3051,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB35_2
+; X86-NOBMI-NEXT:    je .LBB36_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB35_2:
+; X86-NOBMI-NEXT:  .LBB36_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -2999,11 +3085,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB35_2
+; X86-BMI1NOTBM-NEXT:    je .LBB36_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB35_2:
+; X86-BMI1NOTBM-NEXT:  .LBB36_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -3032,11 +3118,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB35_2
+; X86-BMI1BMI2-NEXT:    je .LBB36_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB35_2:
+; X86-BMI1BMI2-NEXT:  .LBB36_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -3120,11 +3206,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB36_2
+; X86-NOBMI-NEXT:    je .LBB37_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB36_2:
+; X86-NOBMI-NEXT:  .LBB37_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -3151,11 +3237,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB36_2
+; X86-BMI1NOTBM-NEXT:    je .LBB37_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB36_2:
+; X86-BMI1NOTBM-NEXT:  .LBB37_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -3181,11 +3267,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB36_2
+; X86-BMI1BMI2-NEXT:    je .LBB37_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB36_2:
+; X86-BMI1BMI2-NEXT:  .LBB37_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -3277,10 +3363,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB37_2
+; X86-NOBMI-NEXT:    jne .LBB38_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB37_2:
+; X86-NOBMI-NEXT:  .LBB38_2:
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -3293,10 +3379,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB37_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB38_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB37_2:
+; X86-BMI1NOTBM-NEXT:  .LBB38_2:
 ; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -3308,10 +3394,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB37_2
+; X86-BMI1BMI2-NEXT:    je .LBB38_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB37_2:
+; X86-BMI1BMI2-NEXT:  .LBB38_2:
 ; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
@@ -3691,26 +3777,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB44_2
+; X86-NOBMI-NEXT:    jne .LBB45_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB44_2:
+; X86-NOBMI-NEXT:  .LBB45_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB44_4
+; X86-NOBMI-NEXT:    jne .LBB45_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB44_4:
+; X86-NOBMI-NEXT:  .LBB45_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB44_6
+; X86-NOBMI-NEXT:    jne .LBB45_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB44_6:
+; X86-NOBMI-NEXT:  .LBB45_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3730,26 +3816,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB44_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB44_2:
+; X86-BMI1NOTBM-NEXT:  .LBB45_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB44_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB44_4:
+; X86-BMI1NOTBM-NEXT:  .LBB45_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB44_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB45_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB44_6:
+; X86-BMI1NOTBM-NEXT:  .LBB45_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3767,22 +3853,22 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_2
+; X86-BMI1BMI2-NEXT:    je .LBB45_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB44_2:
+; X86-BMI1BMI2-NEXT:  .LBB45_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB44_4
+; X86-BMI1BMI2-NEXT:    jne .LBB45_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB44_4:
+; X86-BMI1BMI2-NEXT:  .LBB45_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB44_6
+; X86-BMI1BMI2-NEXT:    jne .LBB45_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB44_6:
+; X86-BMI1BMI2-NEXT:  .LBB45_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3828,26 +3914,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB45_2
+; X86-NOBMI-NEXT:    jne .LBB46_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB45_2:
+; X86-NOBMI-NEXT:  .LBB46_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB45_4
+; X86-NOBMI-NEXT:    jne .LBB46_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB45_4:
+; X86-NOBMI-NEXT:  .LBB46_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB45_6
+; X86-NOBMI-NEXT:    jne .LBB46_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB45_6:
+; X86-NOBMI-NEXT:  .LBB46_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3867,26 +3953,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB45_2:
+; X86-BMI1NOTBM-NEXT:  .LBB46_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB45_4:
+; X86-BMI1NOTBM-NEXT:  .LBB46_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB45_6:
+; X86-BMI1NOTBM-NEXT:  .LBB46_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3904,22 +3990,22 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_2
+; X86-BMI1BMI2-NEXT:    je .LBB46_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB45_2:
+; X86-BMI1BMI2-NEXT:  .LBB46_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB45_4
+; X86-BMI1BMI2-NEXT:    jne .LBB46_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB45_4:
+; X86-BMI1BMI2-NEXT:  .LBB46_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB45_6
+; X86-BMI1BMI2-NEXT:    jne .LBB46_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB45_6:
+; X86-BMI1BMI2-NEXT:  .LBB46_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3969,26 +4055,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB46_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB46_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB46_4
+; X86-NOBMI-NEXT:    jne .LBB47_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB46_4:
+; X86-NOBMI-NEXT:  .LBB47_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_6
+; X86-NOBMI-NEXT:    jne .LBB47_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB46_6:
+; X86-NOBMI-NEXT:  .LBB47_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4009,26 +4095,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB46_4:
+; X86-BMI1NOTBM-NEXT:  .LBB47_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB46_6:
+; X86-BMI1NOTBM-NEXT:  .LBB47_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4047,22 +4133,22 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB46_4
+; X86-BMI1BMI2-NEXT:    jne .LBB47_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB46_4:
+; X86-BMI1BMI2-NEXT:  .LBB47_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB46_6
+; X86-BMI1BMI2-NEXT:    jne .LBB47_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB46_6:
+; X86-BMI1BMI2-NEXT:  .LBB47_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4110,26 +4196,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB47_4
+; X86-NOBMI-NEXT:    jne .LBB48_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB47_4:
+; X86-NOBMI-NEXT:  .LBB48_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_6
+; X86-NOBMI-NEXT:    jne .LBB48_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB47_6:
+; X86-NOBMI-NEXT:  .LBB48_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4150,26 +4236,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_4:
+; X86-BMI1NOTBM-NEXT:  .LBB48_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB47_6:
+; X86-BMI1NOTBM-NEXT:  .LBB48_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4188,22 +4274,22 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB47_4
+; X86-BMI1BMI2-NEXT:    jne .LBB48_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_4:
+; X86-BMI1BMI2-NEXT:  .LBB48_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB47_6
+; X86-BMI1BMI2-NEXT:    jne .LBB48_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB47_6:
+; X86-BMI1BMI2-NEXT:  .LBB48_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4253,18 +4339,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB48_2
+; X86-NOBMI-NEXT:    je .LBB49_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:  .LBB49_2:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB48_4
+; X86-NOBMI-NEXT:    jne .LBB49_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB48_4:
+; X86-NOBMI-NEXT:  .LBB49_4:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -4279,18 +4365,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB48_2
+; X86-BMI1NOTBM-NEXT:    je .LBB49_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
+; X86-BMI1NOTBM-NEXT:  .LBB49_2:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB48_4:
+; X86-BMI1NOTBM-NEXT:  .LBB49_4:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -4303,17 +4389,17 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
+; X86-BMI1BMI2-NEXT:    je .LBB49_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
+; X86-BMI1BMI2-NEXT:  .LBB49_2:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_4
+; X86-BMI1BMI2-NEXT:    je .LBB49_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB48_4:
+; X86-BMI1BMI2-NEXT:  .LBB49_4:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d0:

From b0dc262ffbcb577c58a659192189b7d43412f7d7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 15:33:28 +0000
Subject: [PATCH 0844/1176] [X86] Add AVX2 'fast-variable-shuffle' PHADD tests
 (PR39921)

Haswell etc. will combine shuffles to a extract_subvector(permd(x)) before isHorizontalBinOp can match it.

llvm-svn: 362326
---
 llvm/test/CodeGen/X86/phaddsub.ll | 71 ++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/phaddsub.ll b/llvm/test/CodeGen/X86/phaddsub.ll
index 454256a3ec48b..cadde6873b19e 100644
--- a/llvm/test/CodeGen/X86/phaddsub.ll
+++ b/llvm/test/CodeGen/X86/phaddsub.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops   | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1,AVX1-FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2            | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX2,AVX2-SLOW
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops  | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX2,AVX2-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-variable-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SHUF
 
 define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
 ; SSSE3-LABEL: phaddw1:
@@ -140,6 +141,12 @@ define <4 x i32> @phaddd6(<4 x i32> %x) {
 ; AVX-FAST:       # %bb.0:
 ; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phaddd6:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-SHUF-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-SHUF-NEXT:    retq
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
   %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   %r = add <4 x i32> %a, %b
@@ -248,6 +255,12 @@ define <4 x i32> @phsubd4(<4 x i32> %x) {
 ; AVX-FAST:       # %bb.0:
 ; AVX-FAST-NEXT:    vphsubd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phsubd4:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-SHUF-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-SHUF-NEXT:    retq
   %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
   %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   %r = sub <4 x i32> %a, %b
@@ -384,6 +397,12 @@ define <4 x i32> @phaddd_single_source4(<4 x i32> %x) {
 ; AVX-FAST:       # %bb.0:
 ; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phaddd_single_source4:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,2,2]
+; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    retq
   %l = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
   %add = add <4 x i32> %l, %x
   ret <4 x i32> %add
@@ -415,6 +434,13 @@ define <4 x i32> @phaddd_single_source5(<4 x i32> %x) {
 ; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phaddd_single_source5:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,1,2,2]
+; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX2-SHUF-NEXT:    retq
   %l = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
   %add = add <4 x i32> %l, %x
   %shuffle2 = shufflevector <4 x i32> %add, <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
@@ -464,12 +490,25 @@ define <8 x i16> @phaddw_single_source2(<8 x i16> %x) {
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
 ; SSSE3-NEXT:    retq
 ;
-; AVX-LABEL: phaddw_single_source2:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7]
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; AVX-NEXT:    retq
+; AVX-SLOW-LABEL: phaddw_single_source2:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7]
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: phaddw_single_source2:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7]
+; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phaddw_single_source2:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX2-SHUF-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,4,5,6,7,10,11,8,9,12,13,14,15]
+; AVX2-SHUF-NEXT:    retq
   %l = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 2, i32 4, i32 6>
   %r = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
   %add = add <8 x i16> %l, %r
@@ -517,6 +556,12 @@ define <8 x i16> @phaddw_single_source4(<8 x i16> %x) {
 ; AVX-FAST:       # %bb.0:
 ; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: phaddw_single_source4:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vpslld $16, %xmm0, %xmm1
+; AVX2-SHUF-NEXT:    vpaddw %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    retq
   %l = shufflevector <8 x i16> %x, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6>
   %add = add <8 x i16> %l, %x
   ret <8 x i16> %add
@@ -601,6 +646,20 @@ define i32 @PR39936_v8i32(<8 x i32>) {
 ; AVX2-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX2-FAST-NEXT:    vzeroupper
 ; AVX2-FAST-NEXT:    retq
+;
+; AVX2-SHUF-LABEL: PR39936_v8i32:
+; AVX2-SHUF:       # %bb.0:
+; AVX2-SHUF-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
+; AVX2-SHUF-NEXT:    vpermd %ymm0, %ymm1, %ymm1
+; AVX2-SHUF-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,3,5,7,5,7,6,7]
+; AVX2-SHUF-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    vmovd %xmm0, %eax
+; AVX2-SHUF-NEXT:    vzeroupper
+; AVX2-SHUF-NEXT:    retq
   %2 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
   %3 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
   %4 = add <8 x i32> %2, %3

From 71a39bcf68c3fd8b578c66fca8ad044793ad18e5 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 15:47:49 +0000
Subject: [PATCH 0845/1176] [X86] isHorizontalBinOp - add
 extract_subvector(shuffle(x)) handling (PR39921)

Let's us match horizontal op patterns on fast-variable-shuffle targets (Haswell etc.)

llvm-svn: 362327
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++++++++++-----
 llvm/test/CodeGen/X86/phaddsub.ll       |  7 ++-----
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5bdcb89b8f501..1f6533c5c57f0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39666,6 +39666,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
          "Unsupported vector type for horizontal add/sub");
   unsigned NumElts = VT.getVectorNumElements();
 
+  // TODO - can we make a general helper method that does all of this for us?
   auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
                         SmallVectorImpl<int> &ShuffleMask) {
     if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
@@ -39677,17 +39678,33 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
       ShuffleMask.append(Mask.begin(), Mask.end());
       return;
     }
+    bool UseSubVector = false;
+    if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+        Op.getOperand(0).getValueType().is256BitVector() &&
+        llvm::isNullConstant(Op.getOperand(1))) {
+      Op = Op.getOperand(0);
+      UseSubVector = true;
+    }
     bool IsUnary;
     SmallVector<SDValue, 2> SrcOps;
     SmallVector<int, 16> SrcShuffleMask;
     SDValue BC = peekThroughBitcasts(Op);
     if (isTargetShuffle(BC.getOpcode()) &&
         getTargetShuffleMask(BC.getNode(), BC.getSimpleValueType(), false,
-                             SrcOps, SrcShuffleMask, IsUnary) &&
-        SrcOps.size() <= 2 && SrcShuffleMask.size() == NumElts) {
-      N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
-      N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
-      ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end());
+                             SrcOps, SrcShuffleMask, IsUnary)) {
+      if (!UseSubVector && SrcShuffleMask.size() == NumElts &&
+          SrcOps.size() <= 2) {
+        N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
+        N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
+        ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end());
+      }
+      if (UseSubVector && (SrcShuffleMask.size() == (NumElts * 2)) &&
+          SrcOps.size() == 1) {
+        N0 = extract128BitVector(SrcOps[0], 0, DAG, SDLoc(Op));
+        N1 = extract128BitVector(SrcOps[0], NumElts, DAG, SDLoc(Op));
+        ArrayRef<int> Mask = ArrayRef<int>(SrcShuffleMask).slice(0, NumElts);
+        ShuffleMask.append(Mask.begin(), Mask.end());
+      }
     }
   };
 
diff --git a/llvm/test/CodeGen/X86/phaddsub.ll b/llvm/test/CodeGen/X86/phaddsub.ll
index cadde6873b19e..08cacce6abd80 100644
--- a/llvm/test/CodeGen/X86/phaddsub.ll
+++ b/llvm/test/CodeGen/X86/phaddsub.ll
@@ -649,11 +649,8 @@ define i32 @PR39936_v8i32(<8 x i32>) {
 ;
 ; AVX2-SHUF-LABEL: PR39936_v8i32:
 ; AVX2-SHUF:       # %bb.0:
-; AVX2-SHUF-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
-; AVX2-SHUF-NEXT:    vpermd %ymm0, %ymm1, %ymm1
-; AVX2-SHUF-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,3,5,7,5,7,6,7]
-; AVX2-SHUF-NEXT:    vpermd %ymm0, %ymm2, %ymm0
-; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-SHUF-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-SHUF-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; AVX2-SHUF-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX2-SHUF-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; AVX2-SHUF-NEXT:    vpaddd %xmm0, %xmm1, %xmm0

From ccbda6b0003b49d030f668c7f27dedca6d11ebda Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Sun, 2 Jun 2019 15:53:43 +0000
Subject: [PATCH 0846/1176] [test] Fix plugin tests

Recommit of r361790 that was temporarily reverted in r361793 due to bot breakage.

Summary:
The following changes were required to fix these tests:

1) Change LLVM_ENABLE_PLUGINS to an option and move it to
   llvm/CMakeLists.txt with an appropriate default -- which matches
   the original default behavior.

2) Move the plugins directory from clang/test/Analysis
   clang/lib/Analysis.  It's not enough to add an exclude to the
   lit.local.cfg file because add_lit_testsuites recurses the tree and
   automatically adds the appropriate `check-` targets, which don't
   make sense for the plugins because they aren't tests and don't
   have `RUN` statements.

   Here's a list of the `clang-check-anlysis*` targets with this
   change:

```
  $ ninja -t targets all| sed -n "s/.*\/\(check[^:]*\):.*/\1/p" | sort -u | grep clang-analysis
  check-clang-analysis
  check-clang-analysis-checkers
  check-clang-analysis-copypaste
  check-clang-analysis-diagnostics
  check-clang-analysis-engine
  check-clang-analysis-exploration_order
  check-clang-analysis-html_diagnostics
  check-clang-analysis-html_diagnostics-relevant_lines
  check-clang-analysis-inlining
  check-clang-analysis-objc
  check-clang-analysis-unified-sources
  check-clang-analysis-z3
```

3) Simplify the logic and only include the subdirectories under
   clang/lib/Analysis/plugins if LLVM_ENABLE_PLUGINS is set.

Reviewed By: NoQ

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62445

llvm-svn: 362328
---
 clang/lib/Analysis/CMakeLists.txt             |  2 ++
 clang/lib/Analysis/plugins/CMakeLists.txt     |  5 +++
 .../CheckerDependencyHandling/CMakeLists.txt  | 15 ++++-----
 .../CheckerDependencyHandling.cpp             |  0
 ...erDependencyHandlingAnalyzerPlugin.exports |  0
 .../CheckerOptionHandling/CMakeLists.txt      | 15 ++++-----
 .../CheckerOptionHandling.cpp                 |  0
 ...heckerOptionHandlingAnalyzerPlugin.exports |  0
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 10 ++++++
 .../SampleAnalyzer/MainCallChecker.cpp        |  0
 .../SampleAnalyzerPlugin.exports              |  0
 clang/test/Analysis/lit.local.cfg             |  2 --
 clang/test/Analysis/plugins/CMakeLists.txt    | 12 -------
 .../plugins/SampleAnalyzer/CMakeLists.txt     | 11 -------
 clang/test/CMakeLists.txt                     | 32 ++++---------------
 llvm/CMakeLists.txt                           | 11 +++++++
 llvm/cmake/modules/HandleLLVMOptions.cmake    |  8 -----
 17 files changed, 48 insertions(+), 75 deletions(-)
 create mode 100644 clang/lib/Analysis/plugins/CMakeLists.txt
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt (51%)
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt (50%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports (100%)
 create mode 100644 clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
 rename clang/{test => lib}/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp (100%)
 rename clang/{test => lib}/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports (100%)
 delete mode 100644 clang/test/Analysis/plugins/CMakeLists.txt
 delete mode 100644 clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt

diff --git a/clang/lib/Analysis/CMakeLists.txt b/clang/lib/Analysis/CMakeLists.txt
index 940a3dfe6f60d..92717143467d5 100644
--- a/clang/lib/Analysis/CMakeLists.txt
+++ b/clang/lib/Analysis/CMakeLists.txt
@@ -34,3 +34,5 @@ add_clang_library(clangAnalysis
   clangBasic
   clangLex
   )
+
+add_subdirectory(plugins)
diff --git a/clang/lib/Analysis/plugins/CMakeLists.txt b/clang/lib/Analysis/plugins/CMakeLists.txt
new file mode 100644
index 0000000000000..f7dbc936952cc
--- /dev/null
+++ b/clang/lib/Analysis/plugins/CMakeLists.txt
@@ -0,0 +1,5 @@
+if(LLVM_ENABLE_PLUGINS)
+  add_subdirectory(SampleAnalyzer)
+  add_subdirectory(CheckerDependencyHandling)
+  add_subdirectory(CheckerOptionHandling)
+endif()
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
similarity index 51%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 80e2cdbd3a258..0a8ff48755f17 100644
--- a/clang/test/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -1,11 +1,10 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
+target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
diff --git a/clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandlingAnalyzerPlugin.exports
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
similarity index 50%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 6a1d5e8527941..6e289933c2dd4 100644
--- a/clang/test/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -1,11 +1,10 @@
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
 add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE CheckerOptionHandling.cpp PLUGIN_TOOL clang)
 
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
+target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
diff --git a/clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandlingAnalyzerPlugin.exports
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
new file mode 100644
index 0000000000000..639a97f253112
--- /dev/null
+++ b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
+add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
+
+target_link_libraries(SampleAnalyzerPlugin PRIVATE
+  clangAnalysis
+  clangAST
+  clangStaticAnalyzerCore
+  clangStaticAnalyzerFrontend
+  LLVMSupport
+  )
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp b/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
similarity index 100%
rename from clang/test/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
rename to clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports b/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
similarity index 100%
rename from clang/test/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
rename to clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.exports
diff --git a/clang/test/Analysis/lit.local.cfg b/clang/test/Analysis/lit.local.cfg
index b77cae8ecebec..84f7569152c9f 100644
--- a/clang/test/Analysis/lit.local.cfg
+++ b/clang/test/Analysis/lit.local.cfg
@@ -18,7 +18,5 @@ config.substitutions.append(('%diff_plist',
 config.substitutions.append(('%diff_sarif',
     '''diff -U1 -w -I ".*file:.*%basename_t" -I '"version":' -I "2\.0\.0\-csd\.[0-9]*\.beta\."'''))
 
-config.excludes.add('plugins')
-
 if not config.root.clang_staticanalyzer:
     config.unsupported = True
diff --git a/clang/test/Analysis/plugins/CMakeLists.txt b/clang/test/Analysis/plugins/CMakeLists.txt
deleted file mode 100644
index 8d4333f99a4d3..0000000000000
--- a/clang/test/Analysis/plugins/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-add_subdirectory(SampleAnalyzer)
-add_subdirectory(CheckerDependencyHandling)
-add_subdirectory(CheckerOptionHandling)
-
-set(CLANG_ANALYZER_PLUGIN_DEPS
-  SampleAnalyzerPlugin
-  CheckerDependencyHandlingAnalyzerPlugin
-  CheckerOptionHandlingAnalyzerPlugin
-  )
-
-add_custom_target(clang-analyzer-plugin
-  DEPENDS ${CLANG_ANALYZER_PLUGIN_DEPS})
diff --git a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
deleted file mode 100644
index 7c7b2aec1988d..0000000000000
--- a/clang/test/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
-add_llvm_library(SampleAnalyzerPlugin MODULE MainCallChecker.cpp PLUGIN_TOOL clang)
-
-if(LLVM_ENABLE_PLUGINS AND (WIN32 OR CYGWIN))
-  target_link_libraries(SampleAnalyzerPlugin PRIVATE
-    clangAnalysis
-    clangAST
-    clangStaticAnalyzerCore
-    LLVMSupport
-    )
-endif()
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 339f637847deb..04dbaf2e61949 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -83,12 +83,6 @@ if (CLANG_BUILD_EXAMPLES)
     )
 endif ()
 
-if (CLANG_ENABLE_STATIC_ANALYZER AND CLANG_BUILD_EXAMPLES)
-  list(APPEND CLANG_TEST_DEPS
-    SampleAnalyzerPlugin
-    )
-endif ()
-
 set(CLANG_TEST_PARAMS
   clang_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
   USE_Z3_SOLVER=0
@@ -126,27 +120,13 @@ if( NOT CLANG_BUILT_STANDALONE )
 endif()
 
 if (CLANG_ENABLE_STATIC_ANALYZER)
-  add_subdirectory(Analysis/plugins)
-  list(APPEND CLANG_TEST_DEPS clang-analyzer-plugin)
-
-  # check-all would launch those tests via check-clang.
-  set(EXCLUDE_FROM_ALL ON)
-
-  add_lit_testsuite(check-clang-analyzer "Running the Clang analyzer tests"
-    ${CMAKE_CURRENT_BINARY_DIR}/Analysis
-    PARAMS ${ANALYZER_TEST_PARAMS}
-    DEPENDS ${CLANG_TEST_DEPS})
-  set_target_properties(check-clang-analyzer PROPERTIES FOLDER "Clang tests")
-
-  if (LLVM_WITH_Z3)
-    add_lit_testsuite(check-clang-analyzer-z3 "Running the Clang analyzer tests, using Z3 as a solver"
-      ${CMAKE_CURRENT_BINARY_DIR}/Analysis
-      PARAMS ${ANALYZER_TEST_PARAMS_Z3}
-      DEPENDS ${CLANG_TEST_DEPS})
-    set_target_properties(check-clang-analyzer-z3 PROPERTIES FOLDER "Clang tests")
+  if (LLVM_ENABLE_PLUGINS)
+    set(CLANG_TEST_DEPS
+      SampleAnalyzerPlugin
+      CheckerDependencyHandlingAnalyzerPlugin
+      CheckerOptionHandlingAnalyzerPlugin
+      )
   endif()
-
-  set(EXCLUDE_FROM_ALL OFF)
 endif()
 
 add_custom_target(clang-test-depends DEPENDS ${CLANG_TEST_DEPS})
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index a9addfc5a31da..0bb3edfa8959a 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -622,6 +622,17 @@ set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 message(STATUS "LLVM host triple: ${LLVM_HOST_TRIPLE}")
 message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}")
 
+if(WIN32 OR CYGWIN)
+  if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
+    set(LLVM_ENABLE_PLUGINS_default ON)
+  else()
+    set(LLVM_ENABLE_PLUGINS_default OFF)
+  endif()
+else()
+  set(LLVM_ENABLE_PLUGINS_default ON)
+endif()
+option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
+
 include(HandleLLVMOptions)
 
 # Verify that we can find a Python 2 interpreter.  Python 3 is unsupported.
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 1aa0804a3c0d2..80ef69f36e0e8 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -912,14 +912,6 @@ if(LLVM_LINK_LLVM_DYLIB AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS)
   message(FATAL_ERROR "LLVM_LINK_LLVM_DYLIB not compatible with LLVM_EXPORT_SYMBOLS_FOR_PLUGINS")
 endif()
 
-# Plugin support
-# FIXME: Make this configurable.
-if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB)
-  set(LLVM_ENABLE_PLUGINS ON)
-else()
-  set(LLVM_ENABLE_PLUGINS OFF)
-endif()
-
 # By default we should enable LLVM_ENABLE_IDE only for multi-configuration
 # generators. This option disables optional build system features that make IDEs
 # less usable.

From d0d32c35d93f4ad8063cb5d2ebc4822a82c0cb96 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 16:47:07 +0000
Subject: [PATCH 0847/1176] Add test coverage for __pascal mangling

llvm-svn: 362329
---
 clang/test/CodeGenCXX/mangle-windows.cpp | 4 ++++
 llvm/test/Demangle/ms-windows.test       | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/clang/test/CodeGenCXX/mangle-windows.cpp b/clang/test/CodeGenCXX/mangle-windows.cpp
index a9d7be1197cc8..3d5a1e9a868ef 100644
--- a/clang/test/CodeGenCXX/mangle-windows.cpp
+++ b/clang/test/CodeGenCXX/mangle-windows.cpp
@@ -20,6 +20,10 @@ extern "C" void __fastcall f4(void) {}
 // WIN: define dso_local x86_fastcallcc void @"\01@f4@0"
 // ITANIUM: define dso_local x86_fastcallcc void @"\01@f4@0"
 
+void __pascal f5(void) {}
+// WIN: define dso_local void @"?f5@@YCXXZ"
+// ITANIUM: define dso_local void @_Z2f5v
+
 struct Foo {
   void __stdcall foo();
   static void __stdcall bar();
diff --git a/llvm/test/Demangle/ms-windows.test b/llvm/test/Demangle/ms-windows.test
index bc80090358379..d083f51717066 100644
--- a/llvm/test/Demangle/ms-windows.test
+++ b/llvm/test/Demangle/ms-windows.test
@@ -15,3 +15,6 @@
 
 ?f1@@YGXXZ
 ; CHECK: void __stdcall f1(void)
+
+?f5@@YCXXZ
+; CHECK: void __pascal f5(void)

From 420f5df1c325d109a56d0f164554518c0d7efe76 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 2 Jun 2019 17:11:21 +0000
Subject: [PATCH 0848/1176] [NFC][X86] extract-{low,}bits.ll: one more pattern
 a with truncation

llvm-svn: 362330
---
 llvm/test/CodeGen/X86/extract-bits.ll    | 829 +++++++++++++----------
 llvm/test/CodeGen/X86/extract-lowbits.ll | 408 ++++++-----
 2 files changed, 727 insertions(+), 510 deletions(-)

diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index 6a594c18d4e05..ed0bdf3efc0e0 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -2093,6 +2093,139 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
   ret i32 %truncmasked
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bextr64_32_a3:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    shrdl %cl, %edi, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB18_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:  .LBB18_2:
+; X86-NOBMI-NEXT:    movl $1, %edi
+; X86-NOBMI-NEXT:    movl %edx, %ecx
+; X86-NOBMI-NEXT:    shll %cl, %edi
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    testb $32, %dl
+; X86-NOBMI-NEXT:    jne .LBB18_4
+; X86-NOBMI-NEXT:  # %bb.3:
+; X86-NOBMI-NEXT:    movl %edi, %eax
+; X86-NOBMI-NEXT:  .LBB18_4:
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bextr64_32_a3:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB18_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
+; X86-BMI1NOTBM-NEXT:  .LBB18_2:
+; X86-BMI1NOTBM-NEXT:    movl $1, %edi
+; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %dl
+; X86-BMI1NOTBM-NEXT:    jne .LBB18_4
+; X86-BMI1NOTBM-NEXT:  # %bb.3:
+; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB18_4:
+; X86-BMI1NOTBM-NEXT:    decl %eax
+; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bextr64_32_a3:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB18_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI1BMI2-NEXT:  .LBB18_2:
+; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %bl
+; X86-BMI1BMI2-NEXT:    jne .LBB18_4
+; X86-BMI1BMI2-NEXT:  # %bb.3:
+; X86-BMI1BMI2-NEXT:    movl $1, %eax
+; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI1BMI2-NEXT:  .LBB18_4:
+; X86-BMI1BMI2-NEXT:    decl %eax
+; X86-BMI1BMI2-NEXT:    andl %edx, %eax
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bextr64_32_a3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rdi
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bextr64_32_a3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
+; X64-BMI1NOTBM-NEXT:    movl $1, %eax
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    decl %eax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bextr64_32_a3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    movl $1, %ecx
+; X64-BMI1BMI2-NEXT:    shlxq %rdx, %rcx, %rcx
+; X64-BMI1BMI2-NEXT:    decl %ecx
+; X64-BMI1BMI2-NEXT:    andl %ecx, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %shifted = lshr i64 %val, %numskipbits
+  %onebit = shl i64 1, %numlowbits
+  %mask = add nsw i64 %onebit, 4294967295
+  %masked = and i64 %mask, %shifted
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern b. 32-bit
 ; ---------------------------------------------------------------------------- ;
@@ -2554,22 +2687,22 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB24_2
+; X86-NOBMI-NEXT:    je .LBB25_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB24_2:
+; X86-NOBMI-NEXT:  .LBB25_2:
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movb %ch, %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %ch
-; X86-NOBMI-NEXT:    je .LBB24_4
+; X86-NOBMI-NEXT:    je .LBB25_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB24_4:
+; X86-NOBMI-NEXT:  .LBB25_4:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl %edi, %edx
 ; X86-NOBMI-NEXT:    notl %eax
@@ -2591,22 +2724,22 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB24_2
+; X86-BMI1NOTBM-NEXT:    je .LBB25_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB24_2:
+; X86-BMI1NOTBM-NEXT:  .LBB25_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB24_4
+; X86-BMI1NOTBM-NEXT:    je .LBB25_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB24_4:
+; X86-BMI1NOTBM-NEXT:  .LBB25_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -2626,21 +2759,21 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB24_2
+; X86-BMI1BMI2-NEXT:    je .LBB25_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB24_2:
+; X86-BMI1BMI2-NEXT:  .LBB25_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %edi
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB24_4
+; X86-BMI1BMI2-NEXT:    je .LBB25_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB24_4:
+; X86-BMI1BMI2-NEXT:  .LBB25_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -2693,22 +2826,22 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB25_2
+; X86-NOBMI-NEXT:    je .LBB26_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB25_2:
+; X86-NOBMI-NEXT:  .LBB26_2:
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movb %ch, %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %ch
-; X86-NOBMI-NEXT:    je .LBB25_4
+; X86-NOBMI-NEXT:    je .LBB26_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB25_4:
+; X86-NOBMI-NEXT:  .LBB26_4:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl %edi, %edx
 ; X86-NOBMI-NEXT:    notl %eax
@@ -2730,22 +2863,22 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB25_2
+; X86-BMI1NOTBM-NEXT:    je .LBB26_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB25_2:
+; X86-BMI1NOTBM-NEXT:  .LBB26_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB25_4
+; X86-BMI1NOTBM-NEXT:    je .LBB26_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB25_4:
+; X86-BMI1NOTBM-NEXT:  .LBB26_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -2765,21 +2898,21 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB25_2
+; X86-BMI1BMI2-NEXT:    je .LBB26_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB25_2:
+; X86-BMI1BMI2-NEXT:  .LBB26_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %edi
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB25_4
+; X86-BMI1BMI2-NEXT:    je .LBB26_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB25_4:
+; X86-BMI1BMI2-NEXT:  .LBB26_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -2837,22 +2970,22 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB26_2
+; X86-NOBMI-NEXT:    je .LBB27_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB26_2:
+; X86-NOBMI-NEXT:  .LBB27_2:
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movb %ch, %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %ch
-; X86-NOBMI-NEXT:    je .LBB26_4
+; X86-NOBMI-NEXT:    je .LBB27_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB26_4:
+; X86-NOBMI-NEXT:  .LBB27_4:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl %edi, %edx
 ; X86-NOBMI-NEXT:    notl %eax
@@ -2875,22 +3008,22 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB26_2
+; X86-BMI1NOTBM-NEXT:    je .LBB27_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB26_2:
+; X86-BMI1NOTBM-NEXT:  .LBB27_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB26_4
+; X86-BMI1NOTBM-NEXT:    je .LBB27_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB26_4:
+; X86-BMI1NOTBM-NEXT:  .LBB27_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -2911,21 +3044,21 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB26_2
+; X86-BMI1BMI2-NEXT:    je .LBB27_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB26_2:
+; X86-BMI1BMI2-NEXT:  .LBB27_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %edi
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB26_4
+; X86-BMI1BMI2-NEXT:    je .LBB27_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB26_4:
+; X86-BMI1BMI2-NEXT:  .LBB27_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -2981,22 +3114,22 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB27_2
+; X86-NOBMI-NEXT:    je .LBB28_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB27_2:
+; X86-NOBMI-NEXT:  .LBB28_2:
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movb %ch, %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %ch
-; X86-NOBMI-NEXT:    je .LBB27_4
+; X86-NOBMI-NEXT:    je .LBB28_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB27_4:
+; X86-NOBMI-NEXT:  .LBB28_4:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl %edi, %edx
 ; X86-NOBMI-NEXT:    notl %eax
@@ -3019,22 +3152,22 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB27_2
+; X86-BMI1NOTBM-NEXT:    je .LBB28_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB27_2:
+; X86-BMI1NOTBM-NEXT:  .LBB28_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB27_4
+; X86-BMI1NOTBM-NEXT:    je .LBB28_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB27_4:
+; X86-BMI1NOTBM-NEXT:  .LBB28_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -3055,21 +3188,21 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB27_2
+; X86-BMI1BMI2-NEXT:    je .LBB28_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB27_2:
+; X86-BMI1BMI2-NEXT:  .LBB28_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %edi
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB27_4
+; X86-BMI1BMI2-NEXT:    je .LBB28_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB27_4:
+; X86-BMI1BMI2-NEXT:  .LBB28_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -3128,22 +3261,22 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB28_2
+; X86-NOBMI-NEXT:    je .LBB29_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB28_2:
+; X86-NOBMI-NEXT:  .LBB29_2:
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movb %ch, %cl
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %edi
 ; X86-NOBMI-NEXT:    testb $32, %ch
-; X86-NOBMI-NEXT:    je .LBB28_4
+; X86-NOBMI-NEXT:    je .LBB29_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:  .LBB28_4:
+; X86-NOBMI-NEXT:  .LBB29_4:
 ; X86-NOBMI-NEXT:    notl %edi
 ; X86-NOBMI-NEXT:    andl %edi, %edx
 ; X86-NOBMI-NEXT:    notl %esi
@@ -3165,22 +3298,22 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB28_2
+; X86-BMI1NOTBM-NEXT:    je .LBB29_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB28_2:
+; X86-BMI1NOTBM-NEXT:  .LBB29_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB28_4
+; X86-BMI1NOTBM-NEXT:    je .LBB29_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB28_4:
+; X86-BMI1NOTBM-NEXT:  .LBB29_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -3200,21 +3333,21 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB28_2
+; X86-BMI1BMI2-NEXT:    je .LBB29_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB28_2:
+; X86-BMI1BMI2-NEXT:  .LBB29_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %edi
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB28_4
+; X86-BMI1BMI2-NEXT:    je .LBB29_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB28_4:
+; X86-BMI1BMI2-NEXT:  .LBB29_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -3271,22 +3404,22 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrl %cl, %ebp
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %al
-; X86-NOBMI-NEXT:    je .LBB29_2
+; X86-NOBMI-NEXT:    je .LBB30_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebp, %ebx
 ; X86-NOBMI-NEXT:    xorl %ebp, %ebp
-; X86-NOBMI-NEXT:  .LBB29_2:
+; X86-NOBMI-NEXT:  .LBB30_2:
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movl $-1, %edi
 ; X86-NOBMI-NEXT:    movl %edx, %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    shldl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %dl
-; X86-NOBMI-NEXT:    je .LBB29_4
+; X86-NOBMI-NEXT:    je .LBB30_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB29_4:
+; X86-NOBMI-NEXT:  .LBB30_4:
 ; X86-NOBMI-NEXT:    notl %esi
 ; X86-NOBMI-NEXT:    andl %ebp, %esi
 ; X86-NOBMI-NEXT:    notl %edi
@@ -3321,22 +3454,22 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB29_2
+; X86-BMI1NOTBM-NEXT:    je .LBB30_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB29_2:
+; X86-BMI1NOTBM-NEXT:  .LBB30_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebp
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %dl
-; X86-BMI1NOTBM-NEXT:    je .LBB29_4
+; X86-BMI1NOTBM-NEXT:    je .LBB30_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB29_4:
+; X86-BMI1NOTBM-NEXT:  .LBB30_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    andnl %edi, %ebp, %edi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
@@ -3368,21 +3501,21 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB29_2
+; X86-BMI1BMI2-NEXT:    je .LBB30_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB29_2:
+; X86-BMI1BMI2-NEXT:  .LBB30_2:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
 ; X86-BMI1BMI2-NEXT:    shlxl %edx, %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    movl %edx, %ecx
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:    testb $32, %dl
-; X86-BMI1BMI2-NEXT:    je .LBB29_4
+; X86-BMI1BMI2-NEXT:    je .LBB30_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB29_4:
+; X86-BMI1BMI2-NEXT:  .LBB30_4:
 ; X86-BMI1BMI2-NEXT:    andnl %esi, %ebp, %esi
 ; X86-BMI1BMI2-NEXT:    andnl %edi, %ebx, %edi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
@@ -3462,19 +3595,19 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB30_2
+; X86-NOBMI-NEXT:    jne .LBB31_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB30_2:
+; X86-NOBMI-NEXT:  .LBB31_2:
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movl %edx, %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    testb $32, %dl
-; X86-NOBMI-NEXT:    jne .LBB30_4
+; X86-NOBMI-NEXT:    jne .LBB31_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ecx
-; X86-NOBMI-NEXT:  .LBB30_4:
+; X86-NOBMI-NEXT:  .LBB31_4:
 ; X86-NOBMI-NEXT:    notl %ecx
 ; X86-NOBMI-NEXT:    andl %ecx, %eax
 ; X86-NOBMI-NEXT:    popl %esi
@@ -3493,19 +3626,19 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB30_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB31_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB30_2:
+; X86-BMI1NOTBM-NEXT:  .LBB31_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    jne .LBB30_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB31_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X86-BMI1NOTBM-NEXT:  .LBB30_4:
+; X86-BMI1NOTBM-NEXT:  .LBB31_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %ecx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
@@ -3520,17 +3653,17 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB30_2
+; X86-BMI1BMI2-NEXT:    je .LBB31_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB30_2:
+; X86-BMI1BMI2-NEXT:  .LBB31_2:
 ; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB30_4
+; X86-BMI1BMI2-NEXT:    jne .LBB31_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB30_4:
+; X86-BMI1BMI2-NEXT:  .LBB31_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3590,10 +3723,10 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB31_2
+; X86-NOBMI-NEXT:    jne .LBB32_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %esi
-; X86-NOBMI-NEXT:  .LBB31_2:
+; X86-NOBMI-NEXT:  .LBB32_2:
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movl %edx, %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -3615,10 +3748,10 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB31_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB32_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB31_2:
+; X86-BMI1NOTBM-NEXT:  .LBB32_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -3634,10 +3767,10 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB31_2
+; X86-BMI1BMI2-NEXT:    je .LBB32_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB31_2:
+; X86-BMI1BMI2-NEXT:  .LBB32_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3692,10 +3825,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB32_2
+; X86-NOBMI-NEXT:    jne .LBB33_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %esi
-; X86-NOBMI-NEXT:  .LBB32_2:
+; X86-NOBMI-NEXT:  .LBB33_2:
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movl %edx, %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -3717,10 +3850,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB32_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB33_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB32_2:
+; X86-BMI1NOTBM-NEXT:  .LBB33_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -3736,10 +3869,10 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB32_2
+; X86-BMI1BMI2-NEXT:    je .LBB33_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB32_2:
+; X86-BMI1BMI2-NEXT:  .LBB33_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3795,19 +3928,19 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB33_2
+; X86-NOBMI-NEXT:    jne .LBB34_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB33_2:
+; X86-NOBMI-NEXT:  .LBB34_2:
 ; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    movl %edx, %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    testb $32, %dl
-; X86-NOBMI-NEXT:    jne .LBB33_4
+; X86-NOBMI-NEXT:    jne .LBB34_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ecx
-; X86-NOBMI-NEXT:  .LBB33_4:
+; X86-NOBMI-NEXT:  .LBB34_4:
 ; X86-NOBMI-NEXT:    notl %ecx
 ; X86-NOBMI-NEXT:    andl %ecx, %eax
 ; X86-NOBMI-NEXT:    popl %esi
@@ -3826,19 +3959,19 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB33_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB34_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB33_2:
+; X86-BMI1NOTBM-NEXT:  .LBB34_2:
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    jne .LBB33_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB34_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X86-BMI1NOTBM-NEXT:  .LBB33_4:
+; X86-BMI1NOTBM-NEXT:  .LBB34_4:
 ; X86-BMI1NOTBM-NEXT:    andnl %edx, %ecx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
@@ -3853,17 +3986,17 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB33_2
+; X86-BMI1BMI2-NEXT:    je .LBB34_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB33_2:
+; X86-BMI1BMI2-NEXT:  .LBB34_2:
 ; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB33_4
+; X86-BMI1BMI2-NEXT:    jne .LBB34_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB33_4:
+; X86-BMI1BMI2-NEXT:  .LBB34_4:
 ; X86-BMI1BMI2-NEXT:    andnl %edx, %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4776,11 +4909,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB40_2
+; X86-NOBMI-NEXT:    je .LBB41_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB40_2:
+; X86-NOBMI-NEXT:  .LBB41_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -4788,11 +4921,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB40_4
+; X86-NOBMI-NEXT:    je .LBB41_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB40_4:
+; X86-NOBMI-NEXT:  .LBB41_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -4823,11 +4956,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB40_2
+; X86-BMI1NOTBM-NEXT:    je .LBB41_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB40_2:
+; X86-BMI1NOTBM-NEXT:  .LBB41_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -4835,11 +4968,11 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB40_4
+; X86-BMI1NOTBM-NEXT:    je .LBB41_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB40_4:
+; X86-BMI1NOTBM-NEXT:  .LBB41_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -4869,22 +5002,22 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB40_2
+; X86-BMI1BMI2-NEXT:    je .LBB41_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB40_2:
+; X86-BMI1BMI2-NEXT:  .LBB41_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB40_4
+; X86-BMI1BMI2-NEXT:    je .LBB41_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB40_4:
+; X86-BMI1BMI2-NEXT:  .LBB41_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -4985,11 +5118,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB41_2
+; X86-NOBMI-NEXT:    je .LBB42_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB41_2:
+; X86-NOBMI-NEXT:  .LBB42_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -4997,11 +5130,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB41_4
+; X86-NOBMI-NEXT:    je .LBB42_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB41_4:
+; X86-NOBMI-NEXT:  .LBB42_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5032,11 +5165,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_2
+; X86-BMI1NOTBM-NEXT:    je .LBB42_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB41_2:
+; X86-BMI1NOTBM-NEXT:  .LBB42_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5044,11 +5177,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_4
+; X86-BMI1NOTBM-NEXT:    je .LBB42_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB41_4:
+; X86-BMI1NOTBM-NEXT:  .LBB42_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5078,22 +5211,22 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB41_2
+; X86-BMI1BMI2-NEXT:    je .LBB42_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB41_2:
+; X86-BMI1BMI2-NEXT:  .LBB42_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB41_4
+; X86-BMI1BMI2-NEXT:    je .LBB42_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB41_4:
+; X86-BMI1BMI2-NEXT:  .LBB42_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5198,11 +5331,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB42_2
+; X86-NOBMI-NEXT:    je .LBB43_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB42_2:
+; X86-NOBMI-NEXT:  .LBB43_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5210,11 +5343,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB42_4
+; X86-NOBMI-NEXT:    je .LBB43_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB42_4:
+; X86-NOBMI-NEXT:  .LBB43_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5246,11 +5379,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_2
+; X86-BMI1NOTBM-NEXT:    je .LBB43_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB42_2:
+; X86-BMI1NOTBM-NEXT:  .LBB43_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5258,11 +5391,11 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_4
+; X86-BMI1NOTBM-NEXT:    je .LBB43_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB42_4:
+; X86-BMI1NOTBM-NEXT:  .LBB43_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5293,22 +5426,22 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB42_2
+; X86-BMI1BMI2-NEXT:    je .LBB43_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB42_2:
+; X86-BMI1BMI2-NEXT:  .LBB43_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB42_4
+; X86-BMI1BMI2-NEXT:    je .LBB43_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB42_4:
+; X86-BMI1BMI2-NEXT:  .LBB43_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5411,11 +5544,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB43_2
+; X86-NOBMI-NEXT:    je .LBB44_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB43_2:
+; X86-NOBMI-NEXT:  .LBB44_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5423,11 +5556,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB43_4
+; X86-NOBMI-NEXT:    je .LBB44_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB43_4:
+; X86-NOBMI-NEXT:  .LBB44_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5459,11 +5592,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_2
+; X86-BMI1NOTBM-NEXT:    je .LBB44_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB43_2:
+; X86-BMI1NOTBM-NEXT:  .LBB44_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5471,11 +5604,11 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_4
+; X86-BMI1NOTBM-NEXT:    je .LBB44_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB43_4:
+; X86-BMI1NOTBM-NEXT:  .LBB44_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5506,22 +5639,22 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB43_2
+; X86-BMI1BMI2-NEXT:    je .LBB44_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB43_2:
+; X86-BMI1BMI2-NEXT:  .LBB44_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB43_4
+; X86-BMI1BMI2-NEXT:    je .LBB44_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB43_4:
+; X86-BMI1BMI2-NEXT:  .LBB44_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5626,11 +5759,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB44_2
+; X86-NOBMI-NEXT:    je .LBB45_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB44_2:
+; X86-NOBMI-NEXT:  .LBB45_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebp
@@ -5638,11 +5771,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB44_4
+; X86-NOBMI-NEXT:    je .LBB45_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB44_4:
+; X86-NOBMI-NEXT:  .LBB45_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %ebp
@@ -5673,11 +5806,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_2
+; X86-BMI1NOTBM-NEXT:    je .LBB45_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB44_2:
+; X86-BMI1NOTBM-NEXT:  .LBB45_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
@@ -5685,11 +5818,11 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_4
+; X86-BMI1NOTBM-NEXT:    je .LBB45_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB44_4:
+; X86-BMI1NOTBM-NEXT:  .LBB45_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
@@ -5719,22 +5852,22 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_2
+; X86-BMI1BMI2-NEXT:    je .LBB45_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB44_2:
+; X86-BMI1BMI2-NEXT:  .LBB45_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_4
+; X86-BMI1BMI2-NEXT:    je .LBB45_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB44_4:
+; X86-BMI1BMI2-NEXT:  .LBB45_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
@@ -5835,11 +5968,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB45_2
+; X86-NOBMI-NEXT:    je .LBB46_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB45_2:
+; X86-NOBMI-NEXT:  .LBB46_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
@@ -5847,11 +5980,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrl %cl, %ebp
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB45_4
+; X86-NOBMI-NEXT:    je .LBB46_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %ebp, %ebx
 ; X86-NOBMI-NEXT:    xorl %ebp, %ebp
-; X86-NOBMI-NEXT:  .LBB45_4:
+; X86-NOBMI-NEXT:  .LBB46_4:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebp
 ; X86-NOBMI-NEXT:    pushl %ebx
@@ -5887,11 +6020,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB45_2
+; X86-BMI1NOTBM-NEXT:    je .LBB46_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB45_2:
+; X86-BMI1NOTBM-NEXT:  .LBB46_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
@@ -5899,11 +6032,11 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB45_4
+; X86-BMI1NOTBM-NEXT:    je .LBB46_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB45_4:
+; X86-BMI1NOTBM-NEXT:  .LBB46_4:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebx
@@ -5938,22 +6071,22 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_2
+; X86-BMI1BMI2-NEXT:    je .LBB46_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB45_2:
+; X86-BMI1BMI2-NEXT:  .LBB46_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebp, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_4
+; X86-BMI1BMI2-NEXT:    je .LBB46_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB45_4:
+; X86-BMI1BMI2-NEXT:  .LBB46_4:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
@@ -6067,10 +6200,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB46_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %esi
@@ -6078,10 +6211,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_4
+; X86-NOBMI-NEXT:    jne .LBB47_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB46_4:
+; X86-NOBMI-NEXT:  .LBB47_4:
 ; X86-NOBMI-NEXT:    andl %edx, %eax
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
@@ -6096,10 +6229,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
@@ -6107,10 +6240,10 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB46_4:
+; X86-BMI1NOTBM-NEXT:  .LBB47_4:
 ; X86-BMI1NOTBM-NEXT:    andl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
@@ -6123,20 +6256,20 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_4
+; X86-BMI1BMI2-NEXT:    je .LBB47_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB46_4:
+; X86-BMI1BMI2-NEXT:  .LBB47_4:
 ; X86-BMI1BMI2-NEXT:    andl %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6196,10 +6329,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -6220,10 +6353,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -6239,10 +6372,10 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6295,10 +6428,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB48_2
+; X86-NOBMI-NEXT:    jne .LBB49_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:  .LBB49_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -6319,10 +6452,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
+; X86-BMI1NOTBM-NEXT:  .LBB49_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -6338,10 +6471,10 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
+; X86-BMI1BMI2-NEXT:    je .LBB49_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
+; X86-BMI1BMI2-NEXT:  .LBB49_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6762,36 +6895,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB54_2
+; X86-NOBMI-NEXT:    je .LBB55_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB54_2:
+; X86-NOBMI-NEXT:  .LBB55_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB54_4
+; X86-NOBMI-NEXT:    jne .LBB55_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB54_4:
+; X86-NOBMI-NEXT:  .LBB55_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB54_6
+; X86-NOBMI-NEXT:    jne .LBB55_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB54_6:
+; X86-NOBMI-NEXT:  .LBB55_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB54_8
+; X86-NOBMI-NEXT:    jne .LBB55_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB54_8:
+; X86-NOBMI-NEXT:  .LBB55_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -6810,36 +6943,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB54_2
+; X86-BMI1NOTBM-NEXT:    je .LBB55_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB54_2:
+; X86-BMI1NOTBM-NEXT:  .LBB55_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB54_4:
+; X86-BMI1NOTBM-NEXT:  .LBB55_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB54_6:
+; X86-BMI1NOTBM-NEXT:  .LBB55_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB54_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB55_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB54_8:
+; X86-BMI1NOTBM-NEXT:  .LBB55_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -6856,32 +6989,32 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB54_2
+; X86-BMI1BMI2-NEXT:    je .LBB55_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB54_2:
+; X86-BMI1BMI2-NEXT:  .LBB55_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB54_4
+; X86-BMI1BMI2-NEXT:    je .LBB55_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB54_4:
+; X86-BMI1BMI2-NEXT:  .LBB55_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB54_6
+; X86-BMI1BMI2-NEXT:    jne .LBB55_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB54_6:
+; X86-BMI1BMI2-NEXT:  .LBB55_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB54_8
+; X86-BMI1BMI2-NEXT:    jne .LBB55_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB54_8:
+; X86-BMI1BMI2-NEXT:  .LBB55_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -6932,36 +7065,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB55_2
+; X86-NOBMI-NEXT:    je .LBB56_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB55_2:
+; X86-NOBMI-NEXT:  .LBB56_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB55_4
+; X86-NOBMI-NEXT:    jne .LBB56_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB55_4:
+; X86-NOBMI-NEXT:  .LBB56_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB55_6
+; X86-NOBMI-NEXT:    jne .LBB56_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB55_6:
+; X86-NOBMI-NEXT:  .LBB56_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB55_8
+; X86-NOBMI-NEXT:    jne .LBB56_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB55_8:
+; X86-NOBMI-NEXT:  .LBB56_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -6980,36 +7113,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB55_2
+; X86-BMI1NOTBM-NEXT:    je .LBB56_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_2:
+; X86-BMI1NOTBM-NEXT:  .LBB56_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB55_4:
+; X86-BMI1NOTBM-NEXT:  .LBB56_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB55_6:
+; X86-BMI1NOTBM-NEXT:  .LBB56_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_8:
+; X86-BMI1NOTBM-NEXT:  .LBB56_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7026,32 +7159,32 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_2
+; X86-BMI1BMI2-NEXT:    je .LBB56_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB55_2:
+; X86-BMI1BMI2-NEXT:  .LBB56_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_4
+; X86-BMI1BMI2-NEXT:    je .LBB56_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB55_4:
+; X86-BMI1BMI2-NEXT:  .LBB56_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB55_6
+; X86-BMI1BMI2-NEXT:    jne .LBB56_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB55_6:
+; X86-BMI1BMI2-NEXT:  .LBB56_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB55_8
+; X86-BMI1BMI2-NEXT:    jne .LBB56_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB55_8:
+; X86-BMI1BMI2-NEXT:  .LBB56_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7107,36 +7240,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB56_2
+; X86-NOBMI-NEXT:    je .LBB57_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB56_2:
+; X86-NOBMI-NEXT:  .LBB57_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB56_4
+; X86-NOBMI-NEXT:    jne .LBB57_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB56_4:
+; X86-NOBMI-NEXT:  .LBB57_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB56_6
+; X86-NOBMI-NEXT:    jne .LBB57_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB56_6:
+; X86-NOBMI-NEXT:  .LBB57_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB56_8
+; X86-NOBMI-NEXT:    jne .LBB57_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB56_8:
+; X86-NOBMI-NEXT:  .LBB57_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7156,36 +7289,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB56_2
+; X86-BMI1NOTBM-NEXT:    je .LBB57_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_2:
+; X86-BMI1NOTBM-NEXT:  .LBB57_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB56_4:
+; X86-BMI1NOTBM-NEXT:  .LBB57_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB56_6:
+; X86-BMI1NOTBM-NEXT:  .LBB57_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_8:
+; X86-BMI1NOTBM-NEXT:  .LBB57_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7203,32 +7336,32 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_2
+; X86-BMI1BMI2-NEXT:    je .LBB57_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB56_2:
+; X86-BMI1BMI2-NEXT:  .LBB57_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_4
+; X86-BMI1BMI2-NEXT:    je .LBB57_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB56_4:
+; X86-BMI1BMI2-NEXT:  .LBB57_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB56_6
+; X86-BMI1BMI2-NEXT:    jne .LBB57_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB56_6:
+; X86-BMI1BMI2-NEXT:  .LBB57_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB56_8
+; X86-BMI1BMI2-NEXT:    jne .LBB57_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB56_8:
+; X86-BMI1BMI2-NEXT:  .LBB57_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7281,36 +7414,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB57_2
+; X86-NOBMI-NEXT:    je .LBB58_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB57_2:
+; X86-NOBMI-NEXT:  .LBB58_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB57_4
+; X86-NOBMI-NEXT:    jne .LBB58_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB57_4:
+; X86-NOBMI-NEXT:  .LBB58_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB57_6
+; X86-NOBMI-NEXT:    jne .LBB58_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB57_6:
+; X86-NOBMI-NEXT:  .LBB58_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB57_8
+; X86-NOBMI-NEXT:    jne .LBB58_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB57_8:
+; X86-NOBMI-NEXT:  .LBB58_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7330,36 +7463,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB57_2
+; X86-BMI1NOTBM-NEXT:    je .LBB58_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_2:
+; X86-BMI1NOTBM-NEXT:  .LBB58_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB57_4:
+; X86-BMI1NOTBM-NEXT:  .LBB58_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB57_6:
+; X86-BMI1NOTBM-NEXT:  .LBB58_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_8:
+; X86-BMI1NOTBM-NEXT:  .LBB58_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7377,32 +7510,32 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_2
+; X86-BMI1BMI2-NEXT:    je .LBB58_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB57_2:
+; X86-BMI1BMI2-NEXT:  .LBB58_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_4
+; X86-BMI1BMI2-NEXT:    je .LBB58_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB57_4:
+; X86-BMI1BMI2-NEXT:  .LBB58_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB57_6
+; X86-BMI1BMI2-NEXT:    jne .LBB58_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB57_6:
+; X86-BMI1BMI2-NEXT:  .LBB58_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB57_8
+; X86-BMI1BMI2-NEXT:    jne .LBB58_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB57_8:
+; X86-BMI1BMI2-NEXT:  .LBB58_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7461,37 +7594,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %al
-; X86-NOBMI-NEXT:    je .LBB58_2
+; X86-NOBMI-NEXT:    je .LBB59_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:  .LBB58_2:
+; X86-NOBMI-NEXT:  .LBB59_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
-; X86-NOBMI-NEXT:    jne .LBB58_4
+; X86-NOBMI-NEXT:    jne .LBB59_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebp
-; X86-NOBMI-NEXT:  .LBB58_4:
+; X86-NOBMI-NEXT:  .LBB59_4:
 ; X86-NOBMI-NEXT:    movl %ebp, %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edi
-; X86-NOBMI-NEXT:    jne .LBB58_6
+; X86-NOBMI-NEXT:    jne .LBB59_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %edx
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:  .LBB58_6:
+; X86-NOBMI-NEXT:  .LBB59_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    jne .LBB58_8
+; X86-NOBMI-NEXT:    jne .LBB59_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %edx, %esi
-; X86-NOBMI-NEXT:  .LBB58_8:
+; X86-NOBMI-NEXT:  .LBB59_8:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ecx
 ; X86-NOBMI-NEXT:    pushl %eax
@@ -7522,37 +7655,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB58_2
+; X86-BMI1NOTBM-NEXT:    je .LBB59_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB58_2:
+; X86-BMI1NOTBM-NEXT:  .LBB59_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB58_4:
+; X86-BMI1NOTBM-NEXT:  .LBB59_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %esi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB58_6:
+; X86-BMI1NOTBM-NEXT:  .LBB59_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB58_8:
+; X86-BMI1NOTBM-NEXT:  .LBB59_8:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ecx
 ; X86-BMI1NOTBM-NEXT:    pushl %eax
@@ -7580,33 +7713,33 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB58_2
+; X86-BMI1BMI2-NEXT:    je .LBB59_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB58_2:
+; X86-BMI1BMI2-NEXT:  .LBB59_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_4
+; X86-BMI1BMI2-NEXT:    je .LBB59_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
 ; X86-BMI1BMI2-NEXT:    movl $0, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB58_4:
+; X86-BMI1BMI2-NEXT:  .LBB59_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    jne .LBB58_6
+; X86-BMI1BMI2-NEXT:    jne .LBB59_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB58_6:
+; X86-BMI1BMI2-NEXT:  .LBB59_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    jne .LBB58_8
+; X86-BMI1BMI2-NEXT:    jne .LBB59_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
-; X86-BMI1BMI2-NEXT:  .LBB58_8:
+; X86-BMI1BMI2-NEXT:  .LBB59_8:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ecx
 ; X86-BMI1BMI2-NEXT:    pushl %eax
@@ -7680,28 +7813,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB59_2
+; X86-NOBMI-NEXT:    je .LBB60_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB59_2:
+; X86-NOBMI-NEXT:  .LBB60_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB59_4
+; X86-NOBMI-NEXT:    je .LBB60_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB59_4:
+; X86-NOBMI-NEXT:  .LBB60_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB59_6
+; X86-NOBMI-NEXT:    jne .LBB60_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB59_6:
+; X86-NOBMI-NEXT:  .LBB60_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -7715,28 +7848,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB59_2
+; X86-BMI1NOTBM-NEXT:    je .LBB60_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB59_2:
+; X86-BMI1NOTBM-NEXT:  .LBB60_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB59_4
+; X86-BMI1NOTBM-NEXT:    je .LBB60_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB59_4:
+; X86-BMI1NOTBM-NEXT:  .LBB60_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB60_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB59_6:
+; X86-BMI1NOTBM-NEXT:  .LBB60_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -7748,27 +7881,27 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_2
+; X86-BMI1BMI2-NEXT:    je .LBB60_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB59_2:
+; X86-BMI1BMI2-NEXT:  .LBB60_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_4
+; X86-BMI1BMI2-NEXT:    je .LBB60_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB59_4:
+; X86-BMI1BMI2-NEXT:  .LBB60_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_6
+; X86-BMI1BMI2-NEXT:    je .LBB60_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB59_6:
+; X86-BMI1BMI2-NEXT:  .LBB60_6:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_d0:
@@ -7819,10 +7952,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB60_2
+; X86-NOBMI-NEXT:    jne .LBB61_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB60_2:
+; X86-NOBMI-NEXT:  .LBB61_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -7843,10 +7976,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB60_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB61_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB60_2:
+; X86-BMI1NOTBM-NEXT:  .LBB61_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -7862,10 +7995,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB60_2
+; X86-BMI1BMI2-NEXT:    je .LBB61_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB60_2:
+; X86-BMI1BMI2-NEXT:  .LBB61_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 5757d6f6bc581..6564486bd526e 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -1026,6 +1026,90 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
   ret i32 %truncmasked
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_32_a3:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %edx
+; X86-NOBMI-NEXT:    shll %cl, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB14_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %edx, %eax
+; X86-NOBMI-NEXT:  .LBB14_2:
+; X86-NOBMI-NEXT:    decl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bzhi64_32_a3:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $1, %edx
+; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
+; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB14_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB14_2:
+; X86-BMI1NOTBM-NEXT:    decl %eax
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bzhi64_32_a3:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    jne .LBB14_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    movl $1, %eax
+; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:  .LBB14_2:
+; X86-BMI1BMI2-NEXT:    decl %eax
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_a3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    decl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_a3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movl $1, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shlq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    decl %eax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_a3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    movl $1, %eax
+; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    decl %eax
+; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %onebit = shl i64 1, %numlowbits
+  %mask = add nsw i64 %onebit, 4294967295
+  %masked = and i64 %mask, %val
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern b. 32-bit
 ; ---------------------------------------------------------------------------- ;
@@ -1296,11 +1380,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB19_2
+; X86-NOBMI-NEXT:    je .LBB20_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB19_2:
+; X86-NOBMI-NEXT:  .LBB20_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1316,11 +1400,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB19_2
+; X86-BMI1NOTBM-NEXT:    je .LBB20_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB19_2:
+; X86-BMI1NOTBM-NEXT:  .LBB20_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1334,11 +1418,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB19_2
+; X86-BMI1BMI2-NEXT:    je .LBB20_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB19_2:
+; X86-BMI1BMI2-NEXT:  .LBB20_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1379,11 +1463,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB20_2
+; X86-NOBMI-NEXT:    je .LBB21_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB20_2:
+; X86-NOBMI-NEXT:  .LBB21_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1399,11 +1483,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB20_2
+; X86-BMI1NOTBM-NEXT:    je .LBB21_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB20_2:
+; X86-BMI1NOTBM-NEXT:  .LBB21_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1417,11 +1501,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB20_2
+; X86-BMI1BMI2-NEXT:    je .LBB21_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB20_2:
+; X86-BMI1BMI2-NEXT:  .LBB21_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1467,11 +1551,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB21_2
+; X86-NOBMI-NEXT:    je .LBB22_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB21_2:
+; X86-NOBMI-NEXT:  .LBB22_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -1489,11 +1573,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB21_2
+; X86-BMI1NOTBM-NEXT:    je .LBB22_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB21_2:
+; X86-BMI1NOTBM-NEXT:  .LBB22_2:
 ; X86-BMI1NOTBM-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1508,11 +1592,11 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB21_2
+; X86-BMI1BMI2-NEXT:    je .LBB22_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB21_2:
+; X86-BMI1BMI2-NEXT:  .LBB22_2:
 ; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1556,11 +1640,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB22_2
+; X86-NOBMI-NEXT:    je .LBB23_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB22_2:
+; X86-NOBMI-NEXT:  .LBB23_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -1578,11 +1662,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB22_2
+; X86-BMI1NOTBM-NEXT:    je .LBB23_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB22_2:
+; X86-BMI1NOTBM-NEXT:  .LBB23_2:
 ; X86-BMI1NOTBM-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1597,11 +1681,11 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB22_2
+; X86-BMI1BMI2-NEXT:    je .LBB23_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB22_2:
+; X86-BMI1BMI2-NEXT:  .LBB23_2:
 ; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
 ; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1646,11 +1730,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB23_2
+; X86-NOBMI-NEXT:    je .LBB24_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB23_2:
+; X86-NOBMI-NEXT:  .LBB24_2:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -1666,11 +1750,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB23_2
+; X86-BMI1NOTBM-NEXT:    je .LBB24_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB23_2:
+; X86-BMI1NOTBM-NEXT:  .LBB24_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -1684,11 +1768,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB23_2
+; X86-BMI1BMI2-NEXT:    je .LBB24_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB23_2:
+; X86-BMI1BMI2-NEXT:  .LBB24_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
@@ -1731,10 +1815,10 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB24_2
+; X86-NOBMI-NEXT:    jne .LBB25_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB24_2:
+; X86-NOBMI-NEXT:  .LBB25_2:
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -1746,10 +1830,10 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB24_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB25_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB24_2:
+; X86-BMI1NOTBM-NEXT:  .LBB25_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -1758,11 +1842,11 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB24_2
+; X86-BMI1BMI2-NEXT:    jne .LBB25_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB24_2:
+; X86-BMI1BMI2-NEXT:  .LBB25_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
@@ -1917,10 +2001,10 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB27_2
+; X86-NOBMI-NEXT:    jne .LBB28_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB27_2:
+; X86-NOBMI-NEXT:  .LBB28_2:
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -1932,10 +2016,10 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB27_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB28_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB27_2:
+; X86-BMI1NOTBM-NEXT:  .LBB28_2:
 ; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -1944,11 +2028,11 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB27_2
+; X86-BMI1BMI2-NEXT:    jne .LBB28_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB27_2:
+; X86-BMI1BMI2-NEXT:  .LBB28_2:
 ; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
@@ -2581,11 +2665,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB33_2
+; X86-NOBMI-NEXT:    je .LBB34_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB33_2:
+; X86-NOBMI-NEXT:  .LBB34_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2612,11 +2696,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB33_2
+; X86-BMI1NOTBM-NEXT:    je .LBB34_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB33_2:
+; X86-BMI1NOTBM-NEXT:  .LBB34_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2642,11 +2726,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB33_2
+; X86-BMI1BMI2-NEXT:    je .LBB34_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB33_2:
+; X86-BMI1BMI2-NEXT:  .LBB34_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2738,11 +2822,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB34_2
+; X86-NOBMI-NEXT:    je .LBB35_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB34_2:
+; X86-NOBMI-NEXT:  .LBB35_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -2769,11 +2853,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB34_2
+; X86-BMI1NOTBM-NEXT:    je .LBB35_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB34_2:
+; X86-BMI1NOTBM-NEXT:  .LBB35_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -2799,11 +2883,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB34_2
+; X86-BMI1BMI2-NEXT:    je .LBB35_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB34_2:
+; X86-BMI1BMI2-NEXT:  .LBB35_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -2897,11 +2981,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB35_2
+; X86-NOBMI-NEXT:    je .LBB36_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB35_2:
+; X86-NOBMI-NEXT:  .LBB36_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -2931,11 +3015,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB35_2
+; X86-BMI1NOTBM-NEXT:    je .LBB36_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB35_2:
+; X86-BMI1NOTBM-NEXT:  .LBB36_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -2964,11 +3048,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB35_2
+; X86-BMI1BMI2-NEXT:    je .LBB36_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB35_2:
+; X86-BMI1BMI2-NEXT:  .LBB36_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -3051,11 +3135,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB36_2
+; X86-NOBMI-NEXT:    je .LBB37_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB36_2:
+; X86-NOBMI-NEXT:  .LBB37_2:
 ; X86-NOBMI-NEXT:    movl (%edx), %esi
 ; X86-NOBMI-NEXT:    andl %eax, %esi
 ; X86-NOBMI-NEXT:    movl 4(%edx), %edi
@@ -3085,11 +3169,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB36_2
+; X86-BMI1NOTBM-NEXT:    je .LBB37_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB36_2:
+; X86-BMI1NOTBM-NEXT:  .LBB37_2:
 ; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
 ; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
@@ -3118,11 +3202,11 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB36_2
+; X86-BMI1BMI2-NEXT:    je .LBB37_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB36_2:
+; X86-BMI1BMI2-NEXT:  .LBB37_2:
 ; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
 ; X86-BMI1BMI2-NEXT:    andl %eax, %esi
 ; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
@@ -3206,11 +3290,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB37_2
+; X86-NOBMI-NEXT:    je .LBB38_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    xorl %edi, %edi
-; X86-NOBMI-NEXT:  .LBB37_2:
+; X86-NOBMI-NEXT:  .LBB38_2:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
@@ -3237,11 +3321,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB37_2
+; X86-BMI1NOTBM-NEXT:    je .LBB38_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB37_2:
+; X86-BMI1NOTBM-NEXT:  .LBB38_2:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
@@ -3267,11 +3351,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB37_2
+; X86-BMI1BMI2-NEXT:    je .LBB38_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB37_2:
+; X86-BMI1BMI2-NEXT:  .LBB38_2:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
@@ -3363,10 +3447,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB38_2
+; X86-NOBMI-NEXT:    jne .LBB39_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB38_2:
+; X86-NOBMI-NEXT:  .LBB39_2:
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -3379,10 +3463,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB38_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB39_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB38_2:
+; X86-BMI1NOTBM-NEXT:  .LBB39_2:
 ; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -3394,10 +3478,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB38_2
+; X86-BMI1BMI2-NEXT:    je .LBB39_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB38_2:
+; X86-BMI1BMI2-NEXT:  .LBB39_2:
 ; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
@@ -3777,26 +3861,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB45_2
+; X86-NOBMI-NEXT:    jne .LBB46_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB45_2:
+; X86-NOBMI-NEXT:  .LBB46_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB45_4
+; X86-NOBMI-NEXT:    jne .LBB46_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB45_4:
+; X86-NOBMI-NEXT:  .LBB46_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB45_6
+; X86-NOBMI-NEXT:    jne .LBB46_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB45_6:
+; X86-NOBMI-NEXT:  .LBB46_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3816,26 +3900,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB45_2:
+; X86-BMI1NOTBM-NEXT:  .LBB46_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB45_4:
+; X86-BMI1NOTBM-NEXT:  .LBB46_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB45_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB45_6:
+; X86-BMI1NOTBM-NEXT:  .LBB46_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3853,22 +3937,22 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_2
+; X86-BMI1BMI2-NEXT:    je .LBB46_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB45_2:
+; X86-BMI1BMI2-NEXT:  .LBB46_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB45_4
+; X86-BMI1BMI2-NEXT:    jne .LBB46_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB45_4:
+; X86-BMI1BMI2-NEXT:  .LBB46_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB45_6
+; X86-BMI1BMI2-NEXT:    jne .LBB46_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB45_6:
+; X86-BMI1BMI2-NEXT:  .LBB46_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3914,26 +3998,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB46_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB46_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB46_4
+; X86-NOBMI-NEXT:    jne .LBB47_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB46_4:
+; X86-NOBMI-NEXT:  .LBB47_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_6
+; X86-NOBMI-NEXT:    jne .LBB47_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB46_6:
+; X86-NOBMI-NEXT:  .LBB47_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3953,26 +4037,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB46_4:
+; X86-BMI1NOTBM-NEXT:  .LBB47_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB46_6:
+; X86-BMI1NOTBM-NEXT:  .LBB47_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3990,22 +4074,22 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB46_4
+; X86-BMI1BMI2-NEXT:    jne .LBB47_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB46_4:
+; X86-BMI1BMI2-NEXT:  .LBB47_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB46_6
+; X86-BMI1BMI2-NEXT:    jne .LBB47_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB46_6:
+; X86-BMI1BMI2-NEXT:  .LBB47_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4055,26 +4139,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB47_4
+; X86-NOBMI-NEXT:    jne .LBB48_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB47_4:
+; X86-NOBMI-NEXT:  .LBB48_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_6
+; X86-NOBMI-NEXT:    jne .LBB48_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB47_6:
+; X86-NOBMI-NEXT:  .LBB48_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4095,26 +4179,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_4:
+; X86-BMI1NOTBM-NEXT:  .LBB48_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB47_6:
+; X86-BMI1NOTBM-NEXT:  .LBB48_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4133,22 +4217,22 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB47_4
+; X86-BMI1BMI2-NEXT:    jne .LBB48_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_4:
+; X86-BMI1BMI2-NEXT:  .LBB48_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB47_6
+; X86-BMI1BMI2-NEXT:    jne .LBB48_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB47_6:
+; X86-BMI1BMI2-NEXT:  .LBB48_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4196,26 +4280,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB48_2
+; X86-NOBMI-NEXT:    jne .LBB49_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:  .LBB49_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB48_4
+; X86-NOBMI-NEXT:    jne .LBB49_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB48_4:
+; X86-NOBMI-NEXT:  .LBB49_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB48_6
+; X86-NOBMI-NEXT:    jne .LBB49_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB48_6:
+; X86-NOBMI-NEXT:  .LBB49_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4236,26 +4320,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
+; X86-BMI1NOTBM-NEXT:  .LBB49_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_4:
+; X86-BMI1NOTBM-NEXT:  .LBB49_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB48_6:
+; X86-BMI1NOTBM-NEXT:  .LBB49_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4274,22 +4358,22 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
+; X86-BMI1BMI2-NEXT:    je .LBB49_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
+; X86-BMI1BMI2-NEXT:  .LBB49_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB48_4
+; X86-BMI1BMI2-NEXT:    jne .LBB49_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB48_4:
+; X86-BMI1BMI2-NEXT:  .LBB49_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB48_6
+; X86-BMI1BMI2-NEXT:    jne .LBB49_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB48_6:
+; X86-BMI1BMI2-NEXT:  .LBB49_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4339,18 +4423,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB49_2
+; X86-NOBMI-NEXT:    je .LBB50_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB49_2:
+; X86-NOBMI-NEXT:  .LBB50_2:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB49_4
+; X86-NOBMI-NEXT:    jne .LBB50_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB49_4:
+; X86-NOBMI-NEXT:  .LBB50_4:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -4365,18 +4449,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB49_2
+; X86-BMI1NOTBM-NEXT:    je .LBB50_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB49_2:
+; X86-BMI1NOTBM-NEXT:  .LBB50_2:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB49_4:
+; X86-BMI1NOTBM-NEXT:  .LBB50_4:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -4389,17 +4473,17 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB49_2
+; X86-BMI1BMI2-NEXT:    je .LBB50_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB49_2:
+; X86-BMI1BMI2-NEXT:  .LBB50_2:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB49_4
+; X86-BMI1BMI2-NEXT:    je .LBB50_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB49_4:
+; X86-BMI1BMI2-NEXT:  .LBB50_4:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d0:

From dfe02bc4e9fc80398614b7acd166c01cd02f1621 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 17:23:53 +0000
Subject: [PATCH 0849/1176] Add mangling test coverage for non-volatile const
 member pointers

llvm-svn: 362331
---
 clang/test/CodeGenCXX/mangle-ms.cpp |  4 ++++
 llvm/test/Demangle/ms-mangle.test   | 10 ++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenCXX/mangle-ms.cpp b/clang/test/CodeGenCXX/mangle-ms.cpp
index 75ca3af8250a1..4bb376df428aa 100644
--- a/clang/test/CodeGenCXX/mangle-ms.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms.cpp
@@ -120,6 +120,10 @@ FunT FunArr[10][20];
 int (__stdcall *j)(signed char, unsigned char);
 // CHECK-DAG: @"?j@@3P6GHCE@ZA"
 
+const char foo2::*m;
+// CHECK-DAG: @"?m@@3PRfoo@@DR1@"
+// X64-DAG:   @"?m@@3PERfoo@@DER1@"
+
 const volatile char foo2::*k;
 // CHECK-DAG: @"?k@@3PTfoo@@DT1@"
 // X64-DAG:   @"?k@@3PETfoo@@DET1@"
diff --git a/llvm/test/Demangle/ms-mangle.test b/llvm/test/Demangle/ms-mangle.test
index cd65d8306a1ae..e7badeee64223 100644
--- a/llvm/test/Demangle/ms-mangle.test
+++ b/llvm/test/Demangle/ms-mangle.test
@@ -103,6 +103,12 @@
 ?funptr@@YAP6AHXZXZ
 ; CHECK: int (__cdecl * __cdecl funptr(void))(void)
 
+?m@@3PRfoo@@DR1@
+; CHECK: char const foo::*m
+
+?m@@3PERfoo@@DER1@
+; CHECK: char const foo::*m
+
 ?k@@3PTfoo@@DT1@
 ; CHECK: char const volatile foo::*k
 
@@ -393,5 +399,5 @@
 ??0?$L@V?$H@PAH@PR26029@@@PR26029@@QAE@XZ
 ; CHECK: __thiscall PR26029::L<class PR26029::H<int *>>::L<class PR26029::H<int *>>(void)
 
-; ??$emplace_back@ABH@?$vector@HV?$allocator@H@std@@@std@@QAE?A?<decltype-auto>@@ABH@Z
-<decltype-auto> __thiscall std::vector<int, class std::allocator<int>>::emplace_back<int const &>(int const &)
\ No newline at end of file
+??$emplace_back@ABH@?$vector@HV?$allocator@H@std@@@std@@QAE?A?<decltype-auto>@@ABH@Z
+; CHECK: <decltype-auto> __thiscall std::vector<int, class std::allocator<int>>::emplace_back<int const &>(int const &)

From 869308dd55bf702d8fa2db08d83e05e27b72cc8a Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 17:29:26 +0000
Subject: [PATCH 0850/1176] Add demangling test coverage for unsigned short,
 unsigned long

llvm-svn: 362332
---
 llvm/test/Demangle/ms-basic.test | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/test/Demangle/ms-basic.test b/llvm/test/Demangle/ms-basic.test
index c3ef53afdaeef..b80d87c6455a9 100644
--- a/llvm/test/Demangle/ms-basic.test
+++ b/llvm/test/Demangle/ms-basic.test
@@ -23,6 +23,12 @@
 ?x@@3PEAEEA
 ; CHECK: unsigned char *x
 
+?y@@3PEAGEA
+; CHECK: unsigned short *y
+
+?z@@3PEAKEA
+; CHECK: unsigned long *z
+
 ?x@@3PEAY1NKM@5HEA
 ; CHECK: int (*x)[3500][6]
 

From b5cd6163f43ef86e83a2192fd6483f56bba7ebe3 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 17:41:07 +0000
Subject: [PATCH 0851/1176] Remove code path that's dead after r358835

llvm-svn: 362333
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 425c7d3fd19bc..ed9052f40570a 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -1086,10 +1086,8 @@ static void writeHexDigit(char *Buffer, uint8_t Digit) {
 }
 
 static void outputHex(OutputStream &OS, unsigned C) {
-  if (C == 0) {
-    OS << "\\x00";
-    return;
-  }
+  assert (C != 0);
+
   // It's easier to do the math if we can work from right to left, but we need
   // to print the numbers from left to right.  So render this into a temporary
   // buffer first, then output the temporary buffer.  Each byte is of the form

From 232a8785c0110a95974b65bddba4884427f959b2 Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Sun, 2 Jun 2019 17:56:26 +0000
Subject: [PATCH 0852/1176] Fix bug in r362328 -- append to dependency list
 instead of overwrite it.

llvm-svn: 362334
---
 clang/test/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 04dbaf2e61949..99524a5219ea9 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -121,7 +121,7 @@ endif()
 
 if (CLANG_ENABLE_STATIC_ANALYZER)
   if (LLVM_ENABLE_PLUGINS)
-    set(CLANG_TEST_DEPS
+    list(APPEND CLANG_TEST_DEPS
       SampleAnalyzerPlugin
       CheckerDependencyHandlingAnalyzerPlugin
       CheckerOptionHandlingAnalyzerPlugin

From 59a8db628b7f1ba9905f45883ea8810e4dcee924 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 18:06:42 +0000
Subject: [PATCH 0853/1176] [TTI][X86] Cleanup getMaskedMemoryOpCost. NFCI.

Prep work before resurrecting D61257.

llvm-svn: 362335
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7501834ea4cb3..edd6384625847 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2346,6 +2346,9 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
                                       unsigned Alignment,
                                       unsigned AddressSpace) {
+  bool IsLoad = (Instruction::Load == Opcode);
+  bool IsStore = (Instruction::Store == Opcode);
+
   VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
   if (!SrcVTy)
     // To calculate scalar take the regular cost, without mask
@@ -2353,10 +2356,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
 
   unsigned NumElem = SrcVTy->getVectorNumElements();
   VectorType *MaskTy =
-    VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
-  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
-      (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
-      !isPowerOf2_32(NumElem)) {
+      VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
+  if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) ||
+      (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) {
     // Scalarization
     int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
     int ScalarCompareCost = getCmpSelInstrCost(
@@ -2364,8 +2366,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     int BranchCost = getCFInstrCost(Instruction::Br);
     int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
 
-    int ValueSplitCost = getScalarizationOverhead(
-        SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+    int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
     int MemopCost =
         NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
                                          Alignment, AddressSpace);
@@ -2388,11 +2389,13 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     // Expanding requires fill mask with zeroes
     Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
   }
+
+  // Pre-AVX512 - each maskmov costs 4.
   if (!ST->hasAVX512())
-    return Cost + LT.first*4; // Each maskmov costs 4
+    return Cost + LT.first * 4;
 
   // AVX-512 masked load/store is cheapper
-  return Cost+LT.first;
+  return Cost + LT.first;
 }
 
 int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,

From 87346a15db59fbbb9828f232e8025dca4e610163 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Sun, 2 Jun 2019 18:53:44 +0000
Subject: [PATCH 0854/1176] Transform lambda expression captures when
 transforming an expression to potentially-evaluated.

This ensures that every potentially-evaluated expression is built in a
potentially-evaluated context. No functionality change intended.

llvm-svn: 362336
---
 clang/include/clang/Sema/Sema.h | 12 ++++++------
 clang/lib/Sema/SemaExpr.cpp     | 17 ++++++-----------
 clang/lib/Sema/SemaLambda.cpp   | 27 +++++++++++++++------------
 clang/lib/Sema/TreeTransform.h  | 28 +++++++++++++++++++++++-----
 4 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a6db2f046bbd9..b4f721c091977 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -5710,12 +5710,12 @@ class Sema {
                                          LambdaCaptureDefault CaptureDefault);
 
   /// Start the definition of a lambda expression.
-  CXXMethodDecl *startLambdaDefinition(CXXRecordDecl *Class,
-                                       SourceRange IntroducerRange,
-                                       TypeSourceInfo *MethodType,
-                                       SourceLocation EndLoc,
-                                       ArrayRef<ParmVarDecl *> Params,
-                                       bool IsConstexprSpecified);
+  CXXMethodDecl *
+  startLambdaDefinition(CXXRecordDecl *Class, SourceRange IntroducerRange,
+                        TypeSourceInfo *MethodType, SourceLocation EndLoc,
+                        ArrayRef<ParmVarDecl *> Params,
+                        bool IsConstexprSpecified,
+                        Optional<std::pair<unsigned, Decl *>> Mangling = None);
 
   /// Endow the lambda scope info with the relevant properties.
   void buildLambdaScope(sema::LambdaScopeInfo *LSI,
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5746a102b7124..72b61b8e5842f 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14579,6 +14579,7 @@ namespace {
 
     // Make sure we redo semantic analysis
     bool AlwaysRebuild() { return true; }
+    bool ReplacingOriginal() { return true; }
 
     // We need to special-case DeclRefExprs referring to FieldDecls which
     // are not part of a member pointer formation; normal TreeTransforming
@@ -14605,10 +14606,11 @@ namespace {
       return BaseTransform::TransformUnaryOperator(E);
     }
 
-    ExprResult TransformLambdaExpr(LambdaExpr *E) {
-      // Lambdas never need to be transformed.
-      return E;
-    }
+    // The body of a lambda-expression is in a separate expression evaluation
+    // context so never needs to be transformed.
+    // FIXME: Ideally we wouldn't transform the closure type either, and would
+    // just recreate the capture expressions and lambda expression.
+    StmtResult TransformLambdaBody(Stmt *Body) { return Body; }
   };
 }
 
@@ -14715,13 +14717,6 @@ void Sema::PopExpressionEvaluationContext() {
 
       for (const auto *L : Rec.Lambdas)
         Diag(L->getBeginLoc(), D);
-    } else {
-      // Mark the capture expressions odr-used. This was deferred
-      // during lambda expression creation.
-      for (auto *Lambda : Rec.Lambdas) {
-        for (auto *C : Lambda->capture_inits())
-          MarkDeclarationsReferencedInExpr(C);
-      }
     }
   }
 
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 4b832f5653025..ccc8f6f42a5a2 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -367,12 +367,11 @@ Sema::ExpressionEvaluationContextRecord::getMangleNumberingContext(
   return *MangleNumbering;
 }
 
-CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
-                                           SourceRange IntroducerRange,
-                                           TypeSourceInfo *MethodTypeInfo,
-                                           SourceLocation EndLoc,
-                                           ArrayRef<ParmVarDecl *> Params,
-                                           const bool IsConstexprSpecified) {
+CXXMethodDecl *Sema::startLambdaDefinition(
+    CXXRecordDecl *Class, SourceRange IntroducerRange,
+    TypeSourceInfo *MethodTypeInfo, SourceLocation EndLoc,
+    ArrayRef<ParmVarDecl *> Params, const bool IsConstexprSpecified,
+    Optional<std::pair<unsigned, Decl *>> Mangling) {
   QualType MethodType = MethodTypeInfo->getType();
   TemplateParameterList *TemplateParams =
             getGenericLambdaTemplateParameterList(getCurLambda(), *this);
@@ -438,12 +437,16 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
       P->setOwningFunction(Method);
   }
 
-  Decl *ManglingContextDecl;
-  if (MangleNumberingContext *MCtx =
-          getCurrentMangleNumberContext(Class->getDeclContext(),
-                                        ManglingContextDecl)) {
-    unsigned ManglingNumber = MCtx->getManglingNumber(Method);
-    Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
+  if (Mangling) {
+    Class->setLambdaMangling(Mangling->first, Mangling->second);
+  } else {
+    Decl *ManglingContextDecl;
+    if (MangleNumberingContext *MCtx =
+            getCurrentMangleNumberContext(Class->getDeclContext(),
+                                          ManglingContextDecl)) {
+      unsigned ManglingNumber = MCtx->getManglingNumber(Method);
+      Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
+    }
   }
 
   return Method;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 6620885f23961..f46193502b6a6 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -148,6 +148,11 @@ class TreeTransform {
   /// statement node appears at most once in its containing declaration.
   bool AlwaysRebuild() { return SemaRef.ArgumentPackSubstitutionIndex != -1; }
 
+  /// Whether the transformation is forming an expression or statement that
+  /// replaces the original. In this case, we'll reuse mangling numbers from
+  /// existing lambdas.
+  bool ReplacingOriginal() { return false; }
+
   /// Returns the location of the entity being transformed, if that
   /// information was not available elsewhere in the AST.
   ///
@@ -654,6 +659,9 @@ class TreeTransform {
                                           Optional<unsigned> NumExpansions,
                                           bool ExpectParameterPack);
 
+  /// Transform the body of a lambda-expression.
+  StmtResult TransformLambdaBody(Stmt *Body);
+
   QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);
 
   StmtResult TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr);
@@ -11197,8 +11205,6 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
     auto SubstInitCapture = [&](SourceLocation EllipsisLoc,
                                 Optional<unsigned> NumExpansions) {
-      EnterExpressionEvaluationContext EEEC(
-          getSema(), Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
       ExprResult NewExprInitResult = getDerived().TransformInitializer(
           OldVD->getInit(), OldVD->getInitStyle() == VarDecl::CallInit);
 
@@ -11289,19 +11295,25 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   LSI->GLTemplateParameterList = TPL;
 
   // Create the local class that will describe the lambda.
+  CXXRecordDecl *OldClass = E->getLambdaClass();
   CXXRecordDecl *Class
     = getSema().createLambdaClosureType(E->getIntroducerRange(),
                                         NewCallOpTSI,
                                         /*KnownDependent=*/false,
                                         E->getCaptureDefault());
-  getDerived().transformedLocalDecl(E->getLambdaClass(), {Class});
+  getDerived().transformedLocalDecl(OldClass, {Class});
+
+  Optional<std::pair<unsigned, Decl*>> Mangling;
+  if (getDerived().ReplacingOriginal())
+    Mangling = std::make_pair(OldClass->getLambdaManglingNumber(),
+                              OldClass->getLambdaContextDecl());
 
   // Build the call operator.
   CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition(
       Class, E->getIntroducerRange(), NewCallOpTSI,
       E->getCallOperator()->getEndLoc(),
       NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(),
-      E->getCallOperator()->isConstexpr());
+      E->getCallOperator()->isConstexpr(), Mangling);
 
   LSI->CallOperator = NewCallOperator;
 
@@ -11465,7 +11477,7 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
   // Instantiate the body of the lambda expression.
   StmtResult Body =
-      Invalid ? StmtError() : getDerived().TransformStmt(E->getBody());
+      Invalid ? StmtError() : getDerived().TransformLambdaBody(E->getBody());
 
   // ActOnLambda* will pop the function scope for us.
   FuncScopeCleanup.disable();
@@ -11489,6 +11501,12 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
                                    &LSICopy);
 }
 
+template<typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformLambdaBody(Stmt *S) {
+  return TransformStmt(S);
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(

From a7bc31ebc6deddc99bcfc4ca15fc6d2573544f1c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 18:58:46 +0000
Subject: [PATCH 0855/1176] [DAGCombiner] Replace masked loads with a zero mask
 with the passthru value

Similar to what was recently done for gathers in r362015.

llvm-svn: 362337
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++--
 .../CodeGen/X86/avx512-intrinsics-upgrade.ll  | 15 ++-----
 llvm/test/CodeGen/X86/masked_load.ll          | 12 ++++++
 llvm/test/CodeGen/X86/vmaskmov-offset.ll      | 40 ++++++++++++-------
 4 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 949c14f3ce46b..a866dbb250a5c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8216,13 +8216,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
   SDValue Mask = MLD->getMask();
   SDLoc DL(N);
 
+  // Zap masked loads with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MLOAD result requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index 8c2e07cf1f7cd..0a1850e5020c2 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -8264,18 +8264,9 @@ define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
 
 ; Make sure we don't crash if you pass 0 to the mask.
 define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
-; X86-LABEL: test_zero_mask_expand_load_pd_512:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
-; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
-; X86-NEXT:    retl ## encoding: [0xc3]
-;
-; X64-LABEL: test_zero_mask_expand_load_pd_512:
-; X64:       ## %bb.0:
-; X64-NEXT:    kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
-; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
-; X64-NEXT:    retq ## encoding: [0xc3]
+; CHECK-LABEL: test_zero_mask_expand_load_pd_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
   ret <8 x double> %res
 }
diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll
index 4767669b93c69..ccd034eb68b89 100644
--- a/llvm/test/CodeGen/X86/masked_load.ll
+++ b/llvm/test/CodeGen/X86/masked_load.ll
@@ -7250,6 +7250,18 @@ define i32 @pr38986(i1 %c, i32* %p) {
  ret i32 %ret
 }
 
+define <2 x double> @zero_mask(<2 x double>* %addr, <2 x double> %dst) {
+; SSE-LABEL: zero_mask:
+; SSE:       ## %bb.0:
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: zero_mask:
+; AVX:       ## %bb.0:
+; AVX-NEXT:    retq
+  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> zeroinitializer, <2 x double> %dst)
+  ret <2 x double> %res
+}
+
 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
 declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
diff --git a/llvm/test/CodeGen/X86/vmaskmov-offset.ll b/llvm/test/CodeGen/X86/vmaskmov-offset.ll
index a8dbbd0c1655a..4137988614131 100644
--- a/llvm/test/CodeGen/X86/vmaskmov-offset.ll
+++ b/llvm/test/CodeGen/X86/vmaskmov-offset.ll
@@ -4,39 +4,51 @@
 declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
 declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
 
-define void @test_v16f() local_unnamed_addr {
+define void @test_v16f(<16 x i32> %x) {
   ; CHECK-LABEL: name: test_v16f
   ; CHECK: bb.0.bb:
+  ; CHECK:   liveins: $ymm0, $ymm1
+  ; CHECK:   [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
+  ; CHECK:   [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
   ; CHECK:   [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
-  ; CHECK:   [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
-  ; CHECK:   [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
-  ; CHECK:   VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
-  ; CHECK:   VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
+  ; CHECK:   [[VPCMPEQDYrr:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY]], [[AVX_SET0_]]
+  ; CHECK:   [[VPCMPEQDYrr1:%[0-9]+]]:vr256 = VPCMPEQDYrr [[COPY1]], [[AVX_SET0_]]
+  ; CHECK:   [[VMASKMOVPSYrm:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
+  ; CHECK:   [[VMASKMOVPSYrm1:%[0-9]+]]:vr256 = VMASKMOVPSYrm [[VPCMPEQDYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
+  ; CHECK:   VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQDYrr]], killed [[VMASKMOVPSYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
+  ; CHECK:   VMASKMOVPSYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQDYrr1]], killed [[VMASKMOVPSYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
   ; CHECK:   RET 0
 bb:
   %stack_input_vec = alloca <16 x float>, align 64
   %stack_output_vec = alloca <16 x float>, align 64
-  %masked_loaded_vec = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull %stack_input_vec, i32 4, <16 x i1> undef, <16 x float> undef)
-  call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %masked_loaded_vec, <16 x float>* nonnull %stack_output_vec, i32 4, <16 x i1> undef)
+  %mask = icmp eq <16 x i32> %x, zeroinitializer
+  %masked_loaded_vec = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* nonnull %stack_input_vec, i32 4, <16 x i1> %mask, <16 x float> undef)
+  call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %masked_loaded_vec, <16 x float>* nonnull %stack_output_vec, i32 4, <16 x i1> %mask)
   ret void
 }
 
 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
 declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
 
-define void @test_v8d() local_unnamed_addr {
+define void @test_v8d(<8 x i64> %x) {
   ; CHECK-LABEL: name: test_v8d
   ; CHECK: bb.0.bb:
+  ; CHECK:   liveins: $ymm0, $ymm1
+  ; CHECK:   [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
+  ; CHECK:   [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
   ; CHECK:   [[AVX_SET0_:%[0-9]+]]:vr256 = AVX_SET0
-  ; CHECK:   [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
-  ; CHECK:   [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[AVX_SET0_]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
-  ; CHECK:   VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
-  ; CHECK:   VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[AVX_SET0_]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
+  ; CHECK:   [[VPCMPEQQYrr:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY]], [[AVX_SET0_]]
+  ; CHECK:   [[VPCMPEQQYrr1:%[0-9]+]]:vr256 = VPCMPEQQYrr [[COPY1]], [[AVX_SET0_]]
+  ; CHECK:   [[VMASKMOVPDYrm:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr1]], %stack.0.stack_input_vec, 1, $noreg, 0, $noreg :: (load 32 from %ir.stack_input_vec, align 4)
+  ; CHECK:   [[VMASKMOVPDYrm1:%[0-9]+]]:vr256 = VMASKMOVPDYrm [[VPCMPEQQYrr]], %stack.0.stack_input_vec, 1, $noreg, 32, $noreg :: (load 32 from %ir.stack_input_vec + 32, align 4)
+  ; CHECK:   VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 32, $noreg, [[VPCMPEQQYrr]], killed [[VMASKMOVPDYrm1]] :: (store 32 into %ir.stack_output_vec + 32, align 4)
+  ; CHECK:   VMASKMOVPDYmr %stack.1.stack_output_vec, 1, $noreg, 0, $noreg, [[VPCMPEQQYrr1]], killed [[VMASKMOVPDYrm]] :: (store 32 into %ir.stack_output_vec, align 4)
   ; CHECK:   RET 0
 bb:
   %stack_input_vec = alloca <8 x double>, align 64
   %stack_output_vec = alloca <8 x double>, align 64
-  %masked_loaded_vec = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull %stack_input_vec, i32 4, <8 x i1> undef, <8 x double> undef)
-  call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %masked_loaded_vec, <8 x double>* nonnull %stack_output_vec, i32 4, <8 x i1> undef)
+  %mask = icmp eq <8 x i64> %x, zeroinitializer
+  %masked_loaded_vec = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* nonnull %stack_input_vec, i32 4, <8 x i1> %mask, <8 x double> undef)
+  call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %masked_loaded_vec, <8 x double>* nonnull %stack_output_vec, i32 4, <8 x i1> %mask)
   ret void
 }

From 8a32ca381d1ecbb04b456a109bcb4f4ab846380b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 2 Jun 2019 20:37:02 +0000
Subject: [PATCH 0856/1176] [CostModel][X86] Improve masked load/store
 AVX1/AVX2 costs

A mixture of internal tests and review of the scheduler models indicates we're overestimating the cost of a masked load, which we're estimating at 4x regular memory ops - more realistic values indicates that its closer to 2x. Masked stores costs are a lot more diverse but 8x is roughly in the middle of the range.

e.g. SandyBridge
defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;

e.g. Btver2
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 4, 4], 2>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;

Differential Revision: https://reviews.llvm.org/D61257

llvm-svn: 362338
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp |    4 +-
 .../X86/masked-intrinsic-cost-widen.ll        |   76 +-
 .../CostModel/X86/masked-intrinsic-cost.ll    |   76 +-
 .../LoopVectorize/X86/masked_load_store.ll    | 1422 +++++++++++------
 4 files changed, 968 insertions(+), 610 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index edd6384625847..2b9a61d4c8770 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2390,9 +2390,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
   }
 
-  // Pre-AVX512 - each maskmov costs 4.
+  // Pre-AVX512 - each maskmov load costs 2 + store costs ~8.
   if (!ST->hasAVX512())
-    return Cost + LT.first * 4;
+    return Cost + LT.first * (IsLoad ? 2 : 8);
 
   // AVX-512 masked load/store is cheapper
   return Cost + LT.first;
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-widen.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-widen.ll
index b3e33cb828194..2e89c3c036415 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-widen.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-widen.ll
@@ -37,22 +37,22 @@ define i32 @masked_load() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX-LABEL: 'masked_load'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
@@ -179,22 +179,22 @@ define i32 @masked_store() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX-LABEL: 'masked_store'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
@@ -873,7 +873,7 @@ define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
 ;
 ; AVX-LABEL: 'test1'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX512-LABEL: 'test1'
@@ -894,7 +894,7 @@ define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
 ;
 ; AVX-LABEL: 'test2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX512-LABEL: 'test2'
@@ -915,7 +915,7 @@ define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
 ;
 ; AVX-LABEL: 'test3'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test3'
@@ -936,17 +936,17 @@ define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %d
 ;
 ; AVX1-LABEL: 'test4'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; AVX2-LABEL: 'test4'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; SKL-LABEL: 'test4'
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; AVX512-LABEL: 'test4'
@@ -967,7 +967,7 @@ define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
 ;
 ; AVX-LABEL: 'test5'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test5'
@@ -988,7 +988,7 @@ define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
 ;
 ; AVX-LABEL: 'test6'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test6'
@@ -1009,7 +1009,7 @@ define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %d
 ;
 ; AVX-LABEL: 'test7'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
 ;
 ; AVX512-LABEL: 'test7'
@@ -1030,7 +1030,7 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 ;
 ; AVX-LABEL: 'test8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
 ;
 ; AVX512-LABEL: 'test8'
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 0958550a31068..050a83364989c 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -37,22 +37,22 @@ define i32 @masked_load() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX-LABEL: 'masked_load'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* undef, i32 1, <8 x i1> undef, <8 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* undef, i32 1, <4 x i1> undef, <4 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 1, <2 x i1> undef, <2 x double> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0v1f64(<1 x double>* undef, i32 1, <1 x i1> undef, <1 x double> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* undef, i32 1, <16 x i1> undef, <16 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* undef, i32 1, <8 x i1> undef, <8 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 1, <4 x i1> undef, <4 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 1, <2 x i1> undef, <2 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0v8i64(<8 x i64>* undef, i32 1, <8 x i1> undef, <8 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 1, <4 x i1> undef, <4 x i64> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 1, <2 x i1> undef, <2 x i64> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0v1i64(<1 x i64>* undef, i32 1, <1 x i1> undef, <1 x i64> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* undef, i32 1, <16 x i1> undef, <16 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef)
@@ -179,22 +179,22 @@ define i32 @masked_store() {
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 0
 ;
 ; AVX-LABEL: 'masked_store'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> undef, <8 x double>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> undef, <4 x double>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> undef, <2 x double>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> undef, <1 x double>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> undef, <16 x float>* undef, i32 1, <16 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> undef, <8 x float>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> undef, <4 x float>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> undef, <2 x float>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0v8i64(<8 x i64> undef, <8 x i64>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> undef, <4 x i64>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> undef, <2 x i64>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0v1i64(<1 x i64> undef, <1 x i64>* undef, i32 1, <1 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> undef, <16 x i32>* undef, i32 1, <16 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef)
@@ -873,7 +873,7 @@ define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
 ;
 ; AVX-LABEL: 'test1'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> %mask, <2 x double> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %res
 ;
 ; AVX512-LABEL: 'test1'
@@ -894,7 +894,7 @@ define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
 ;
 ; AVX-LABEL: 'test2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; AVX512-LABEL: 'test2'
@@ -915,7 +915,7 @@ define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
 ;
 ; AVX-LABEL: 'test3'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test3'
@@ -936,17 +936,17 @@ define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %d
 ;
 ; AVX1-LABEL: 'test4'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; AVX2-LABEL: 'test4'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; SKL-LABEL: 'test4'
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer
-; SKL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
+; SKL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> %mask, <8 x float> %dst)
 ; SKL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %res
 ;
 ; AVX512-LABEL: 'test4'
@@ -972,7 +972,7 @@ define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
 ;
 ; AVX-LABEL: 'test5'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %addr, i32 4, <2 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test5'
@@ -998,7 +998,7 @@ define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
 ;
 ; AVX-LABEL: 'test6'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %addr, i32 4, <2 x i1> %mask)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'test6'
@@ -1024,7 +1024,7 @@ define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %d
 ;
 ; AVX-LABEL: 'test7'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1> %mask, <2 x float> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %res
 ;
 ; AVX512-LABEL: 'test7'
@@ -1050,7 +1050,7 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 ;
 ; AVX-LABEL: 'test8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %res
 ;
 ; AVX512-LABEL: 'test8'
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 8a819743bd0ec..4c0424f6044f4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -717,131 +717,206 @@ for.end:                                          ; preds = %for.inc
 ;}
 
 define void @foo2(float* nocapture %A, float* nocapture readonly %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
-; AVX-LABEL: @foo2(
-; AVX-NEXT:  entry:
-; AVX-NEXT:    [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
-; AVX-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
-; AVX-NEXT:    [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
-; AVX-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
-; AVX:       vector.memcheck:
-; AVX-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
-; AVX-NEXT:    [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
-; AVX-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
-; AVX-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
-; AVX-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
-; AVX-NEXT:    [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
-; AVX-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
-; AVX-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
-; AVX-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; AVX-NEXT:    [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
-; AVX-NEXT:    [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
-; AVX-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
-; AVX-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
-; AVX-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
-; AVX-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-; AVX:       vector.ph:
-; AVX-NEXT:    br label [[VECTOR_BODY:%.*]]
-; AVX:       vector.body:
-; AVX-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
-; AVX-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
-; AVX-NEXT:    [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
-; AVX-NEXT:    [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
-; AVX-NEXT:    [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
-; AVX-NEXT:    [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
-; AVX-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; AVX-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 8
-; AVX-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 16
-; AVX-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 24
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !21
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
-; AVX-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !21
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
-; AVX-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !21
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
-; AVX-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !21
-; AVX-NEXT:    [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
-; AVX-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x float> undef), !alias.scope !24
-; AVX-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 8
-; AVX-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD18:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x float> undef), !alias.scope !24
-; AVX-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
-; AVX-NEXT:    [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD19:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x float> undef), !alias.scope !24
-; AVX-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 24
-; AVX-NEXT:    [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD20:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x float> undef), !alias.scope !24
-; AVX-NEXT:    [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
-; AVX-NEXT:    [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD15]] to <8 x float>
-; AVX-NEXT:    [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD16]] to <8 x float>
-; AVX-NEXT:    [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD17]] to <8 x float>
-; AVX-NEXT:    [[TMP36:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP32]]
-; AVX-NEXT:    [[TMP37:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD18]], [[TMP33]]
-; AVX-NEXT:    [[TMP38:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD19]], [[TMP34]]
-; AVX-NEXT:    [[TMP39:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD20]], [[TMP35]]
-; AVX-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0
-; AVX-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
-; AVX-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP36]], <8 x float>* [[TMP45]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !26, !noalias !28
-; AVX-NEXT:    [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 8
-; AVX-NEXT:    [[TMP47:%.*]] = bitcast float* [[TMP46]] to <8 x float>*
-; AVX-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP37]], <8 x float>* [[TMP47]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !26, !noalias !28
-; AVX-NEXT:    [[TMP48:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 16
-; AVX-NEXT:    [[TMP49:%.*]] = bitcast float* [[TMP48]] to <8 x float>*
-; AVX-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP38]], <8 x float>* [[TMP49]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !26, !noalias !28
-; AVX-NEXT:    [[TMP50:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 24
-; AVX-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP50]] to <8 x float>*
-; AVX-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP39]], <8 x float>* [[TMP51]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !26, !noalias !28
-; AVX-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
-; AVX-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
-; AVX-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
-; AVX:       middle.block:
-; AVX-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, 9984
-; AVX-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; AVX:       scalar.ph:
-; AVX-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; AVX-NEXT:    br label [[FOR_BODY:%.*]]
-; AVX:       for.body:
-; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; AVX-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
-; AVX-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
-; AVX:       if.then:
-; AVX-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP54:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; AVX-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP53]] to float
-; AVX-NEXT:    [[ADD:%.*]] = fadd float [[TMP54]], [[CONV]]
-; AVX-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    store float [[ADD]], float* [[ARRAYIDX7]], align 4
-; AVX-NEXT:    br label [[FOR_INC]]
-; AVX:       for.inc:
-; AVX-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; AVX-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
-; AVX-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
-; AVX:       for.end:
-; AVX-NEXT:    ret void
+; AVX1-LABEL: @foo2(
+; AVX1-NEXT:  entry:
+; AVX1-NEXT:    [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
+; AVX1-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
+; AVX1-NEXT:    [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
+; AVX1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; AVX1:       vector.memcheck:
+; AVX1-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
+; AVX1-NEXT:    [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
+; AVX1-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
+; AVX1-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; AVX1-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
+; AVX1-NEXT:    [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
+; AVX1-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
+; AVX1-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
+; AVX1-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX1-NEXT:    [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
+; AVX1-NEXT:    [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
+; AVX1-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; AVX1-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; AVX1-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
+; AVX1-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; AVX1:       vector.ph:
+; AVX1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX1:       vector.body:
+; AVX1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
+; AVX1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX1-NEXT:    [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+; AVX1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; AVX1-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <8 x i32>*
+; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP3]], align 4, !alias.scope !21
+; AVX1-NEXT:    [[TMP4:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0
+; AVX1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP7]], i32 4, <8 x i1> [[TMP4]], <8 x float> undef), !alias.scope !24
+; AVX1-NEXT:    [[TMP8:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
+; AVX1-NEXT:    [[TMP9:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP8]]
+; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 0
+; AVX1-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>*
+; AVX1-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP9]], <8 x float>* [[TMP12]], i32 4, <8 x i1> [[TMP4]]), !alias.scope !26, !noalias !28
+; AVX1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; AVX1-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
+; AVX1-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
+; AVX1:       middle.block:
+; AVX1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, 10000
+; AVX1-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; AVX1:       scalar.ph:
+; AVX1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; AVX1-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX1:       for.body:
+; AVX1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX1-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP14]], 100
+; AVX1-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; AVX1:       if.then:
+; AVX1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP15:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; AVX1-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP14]] to float
+; AVX1-NEXT:    [[ADD:%.*]] = fadd float [[TMP15]], [[CONV]]
+; AVX1-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    store float [[ADD]], float* [[ARRAYIDX7]], align 4
+; AVX1-NEXT:    br label [[FOR_INC]]
+; AVX1:       for.inc:
+; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
+; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
+; AVX1:       for.end:
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @foo2(
+; AVX2-NEXT:  entry:
+; AVX2-NEXT:    [[A1:%.*]] = bitcast float* [[A:%.*]] to i8*
+; AVX2-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
+; AVX2-NEXT:    [[B6:%.*]] = bitcast float* [[B:%.*]] to i8*
+; AVX2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; AVX2:       vector.memcheck:
+; AVX2-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A]], i64 10000
+; AVX2-NEXT:    [[SCEVGEP2:%.*]] = bitcast float* [[SCEVGEP]] to i8*
+; AVX2-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000
+; AVX2-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; AVX2-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[B]], i64 10000
+; AVX2-NEXT:    [[SCEVGEP78:%.*]] = bitcast float* [[SCEVGEP7]] to i8*
+; AVX2-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]]
+; AVX2-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
+; AVX2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX2-NEXT:    [[BOUND09:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP78]]
+; AVX2-NEXT:    [[BOUND110:%.*]] = icmp ult i8* [[B6]], [[SCEVGEP2]]
+; AVX2-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; AVX2-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; AVX2-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
+; AVX2-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; AVX2:       vector.ph:
+; AVX2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX2:       vector.body:
+; AVX2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
+; AVX2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
+; AVX2-NEXT:    [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
+; AVX2-NEXT:    [[INDUCTION12:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+; AVX2-NEXT:    [[INDUCTION13:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23>
+; AVX2-NEXT:    [[INDUCTION14:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>
+; AVX2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 8
+; AVX2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 16
+; AVX2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 24
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !21
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8
+; AVX2-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !21
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16
+; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD16:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !21
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24
+; AVX2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !21
+; AVX2-NEXT:    [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD15]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD16]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD17]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 0
+; AVX2-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP25]], i32 4, <8 x i1> [[TMP16]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 8
+; AVX2-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD18:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP27]], i32 4, <8 x i1> [[TMP17]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 16
+; AVX2-NEXT:    [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD19:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP29]], i32 4, <8 x i1> [[TMP18]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 24
+; AVX2-NEXT:    [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD20:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP19]], <8 x float> undef), !alias.scope !24
+; AVX2-NEXT:    [[TMP32:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x float>
+; AVX2-NEXT:    [[TMP33:%.*]] = sitofp <8 x i32> [[WIDE_LOAD15]] to <8 x float>
+; AVX2-NEXT:    [[TMP34:%.*]] = sitofp <8 x i32> [[WIDE_LOAD16]] to <8 x float>
+; AVX2-NEXT:    [[TMP35:%.*]] = sitofp <8 x i32> [[WIDE_LOAD17]] to <8 x float>
+; AVX2-NEXT:    [[TMP36:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP32]]
+; AVX2-NEXT:    [[TMP37:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD18]], [[TMP33]]
+; AVX2-NEXT:    [[TMP38:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD19]], [[TMP34]]
+; AVX2-NEXT:    [[TMP39:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD20]], [[TMP35]]
+; AVX2-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 0
+; AVX2-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
+; AVX2-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP36]], <8 x float>* [[TMP45]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !26, !noalias !28
+; AVX2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 8
+; AVX2-NEXT:    [[TMP47:%.*]] = bitcast float* [[TMP46]] to <8 x float>*
+; AVX2-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP37]], <8 x float>* [[TMP47]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !26, !noalias !28
+; AVX2-NEXT:    [[TMP48:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 16
+; AVX2-NEXT:    [[TMP49:%.*]] = bitcast float* [[TMP48]] to <8 x float>*
+; AVX2-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP38]], <8 x float>* [[TMP49]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !26, !noalias !28
+; AVX2-NEXT:    [[TMP50:%.*]] = getelementptr inbounds float, float* [[TMP40]], i32 24
+; AVX2-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP50]] to <8 x float>*
+; AVX2-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP39]], <8 x float>* [[TMP51]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !26, !noalias !28
+; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 32
+; AVX2-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984
+; AVX2-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !29
+; AVX2:       middle.block:
+; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, 9984
+; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; AVX2:       scalar.ph:
+; AVX2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; AVX2-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX2:       for.body:
+; AVX2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP53]], 100
+; AVX2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; AVX2:       if.then:
+; AVX2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP54:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; AVX2-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP53]] to float
+; AVX2-NEXT:    [[ADD:%.*]] = fadd float [[TMP54]], [[CONV]]
+; AVX2-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    store float [[ADD]], float* [[ARRAYIDX7]], align 4
+; AVX2-NEXT:    br label [[FOR_INC]]
+; AVX2:       for.inc:
+; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
+; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !30
+; AVX2:       for.end:
+; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo2(
 ; AVX512-NEXT:  entry:
@@ -1506,155 +1581,178 @@ for.end:                                          ; preds = %for.inc
 ;}
 
 define void @foo6(double* nocapture readonly %in, double* nocapture %out, i32 %size, i32* nocapture readonly %trigger) local_unnamed_addr #0 {
-; AVX-LABEL: @foo6(
-; AVX-NEXT:  entry:
-; AVX-NEXT:    [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8*
-; AVX-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
-; AVX-NEXT:    [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8*
-; AVX-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
-; AVX:       vector.memcheck:
-; AVX-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096
-; AVX-NEXT:    [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
-; AVX-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096
-; AVX-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
-; AVX-NEXT:    [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096
-; AVX-NEXT:    [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
-; AVX-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]]
-; AVX-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
-; AVX-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; AVX-NEXT:    [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]]
-; AVX-NEXT:    [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]]
-; AVX-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
-; AVX-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
-; AVX-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
-; AVX-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-; AVX:       vector.ph:
-; AVX-NEXT:    br label [[VECTOR_BODY:%.*]]
-; AVX:       vector.body:
-; AVX-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]]
-; AVX-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0
-; AVX-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 -1, i64 -2, i64 -3>
-; AVX-NEXT:    [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
-; AVX-NEXT:    [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -8, i64 -9, i64 -10, i64 -11>
-; AVX-NEXT:    [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -12, i64 -13, i64 -14, i64 -15>
-; AVX-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; AVX-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4
-; AVX-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8
-; AVX-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -12
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3
-; AVX-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41
-; AVX-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3
-; AVX-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41
-; AVX-NEXT:    [[REVERSE16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD15]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
-; AVX-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3
-; AVX-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41
-; AVX-NEXT:    [[REVERSE18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD17]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12
-; AVX-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3
-; AVX-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
-; AVX-NEXT:    [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41
-; AVX-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD19]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
-; AVX-NEXT:    [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE16]], zeroinitializer
-; AVX-NEXT:    [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE18]], zeroinitializer
-; AVX-NEXT:    [[TMP23:%.*]] = icmp sgt <4 x i32> [[REVERSE20]], zeroinitializer
-; AVX-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
-; AVX-NEXT:    [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
-; AVX-NEXT:    [[REVERSE21:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !44
-; AVX-NEXT:    [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
-; AVX-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
-; AVX-NEXT:    [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
-; AVX-NEXT:    [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
-; AVX-NEXT:    [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
-; AVX-NEXT:    [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
-; AVX-NEXT:    [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
-; AVX-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
-; AVX-NEXT:    [[REVERSE29:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !44
-; AVX-NEXT:    [[REVERSE31:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD30]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP40:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX-NEXT:    [[TMP41:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX-NEXT:    [[TMP42:%.*]] = fadd <4 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX-NEXT:    [[TMP43:%.*]] = fadd <4 x double> [[REVERSE31]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
-; AVX-NEXT:    [[REVERSE32:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
-; AVX-NEXT:    [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3
-; AVX-NEXT:    [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !46, !noalias !48
-; AVX-NEXT:    [[REVERSE34:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4
-; AVX-NEXT:    [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3
-; AVX-NEXT:    [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48
-; AVX-NEXT:    [[REVERSE36:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
-; AVX-NEXT:    [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3
-; AVX-NEXT:    [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !46, !noalias !48
-; AVX-NEXT:    [[REVERSE38:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT:    [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12
-; AVX-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3
-; AVX-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !46, !noalias !48
-; AVX-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; AVX-NEXT:    [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
-; AVX-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49
-; AVX:       middle.block:
-; AVX-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
-; AVX-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; AVX:       scalar.ph:
-; AVX-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ]
-; AVX-NEXT:    br label [[FOR_BODY:%.*]]
-; AVX:       for.body:
-; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; AVX-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0
-; AVX-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
-; AVX:       if.then:
-; AVX-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; AVX-NEXT:    [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01
-; AVX-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    store double [[ADD]], double* [[ARRAYIDX5]], align 8
-; AVX-NEXT:    br label [[FOR_INC]]
-; AVX:       for.inc:
-; AVX-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; AVX-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; AVX-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !50
-; AVX:       for.end:
-; AVX-NEXT:    ret void
+; AVX1-LABEL: @foo6(
+; AVX1-NEXT:  entry:
+; AVX1-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX1:       for.body:
+; AVX1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 4095, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX1-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+; AVX1-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; AVX1:       if.then:
+; AVX1-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN:%.*]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX3]], align 8
+; AVX1-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e-01
+; AVX1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    store double [[ADD]], double* [[ARRAYIDX5]], align 8
+; AVX1-NEXT:    br label [[FOR_INC]]
+; AVX1:       for.inc:
+; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; AVX1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; AVX1-NEXT:    br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; AVX1:       for.end:
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @foo6(
+; AVX2-NEXT:  entry:
+; AVX2-NEXT:    [[OUT1:%.*]] = bitcast double* [[OUT:%.*]] to i8*
+; AVX2-NEXT:    [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8*
+; AVX2-NEXT:    [[IN6:%.*]] = bitcast double* [[IN:%.*]] to i8*
+; AVX2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; AVX2:       vector.memcheck:
+; AVX2-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[OUT]], i64 4096
+; AVX2-NEXT:    [[SCEVGEP2:%.*]] = bitcast double* [[SCEVGEP]] to i8*
+; AVX2-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 4096
+; AVX2-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
+; AVX2-NEXT:    [[SCEVGEP7:%.*]] = getelementptr double, double* [[IN]], i64 4096
+; AVX2-NEXT:    [[SCEVGEP78:%.*]] = bitcast double* [[SCEVGEP7]] to i8*
+; AVX2-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP45]]
+; AVX2-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]]
+; AVX2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; AVX2-NEXT:    [[BOUND09:%.*]] = icmp ult i8* [[OUT1]], [[SCEVGEP78]]
+; AVX2-NEXT:    [[BOUND110:%.*]] = icmp ult i8* [[IN6]], [[SCEVGEP2]]
+; AVX2-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; AVX2-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; AVX2-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[CONFLICT_RDX]], true
+; AVX2-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; AVX2:       vector.ph:
+; AVX2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX2:       vector.body:
+; AVX2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX2-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]]
+; AVX2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0
+; AVX2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 -1, i64 -2, i64 -3>
+; AVX2-NEXT:    [[INDUCTION12:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
+; AVX2-NEXT:    [[INDUCTION13:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -8, i64 -9, i64 -10, i64 -11>
+; AVX2-NEXT:    [[INDUCTION14:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 -12, i64 -13, i64 -14, i64 -15>
+; AVX2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; AVX2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4
+; AVX2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8
+; AVX2-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -12
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3
+; AVX2-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3
+; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[REVERSE16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD15]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8
+; AVX2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3
+; AVX2-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[REVERSE18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD17]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12
+; AVX2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3
+; AVX2-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
+; AVX2-NEXT:    [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41
+; AVX2-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD19]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer
+; AVX2-NEXT:    [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE16]], zeroinitializer
+; AVX2-NEXT:    [[TMP22:%.*]] = icmp sgt <4 x i32> [[REVERSE18]], zeroinitializer
+; AVX2-NEXT:    [[TMP23:%.*]] = icmp sgt <4 x i32> [[REVERSE20]], zeroinitializer
+; AVX2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 0
+; AVX2-NEXT:    [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3
+; AVX2-NEXT:    [[REVERSE21:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4
+; AVX2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3
+; AVX2-NEXT:    [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8
+; AVX2-NEXT:    [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3
+; AVX2-NEXT:    [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12
+; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3
+; AVX2-NEXT:    [[REVERSE29:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !44
+; AVX2-NEXT:    [[REVERSE31:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD30]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP40:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX2-NEXT:    [[TMP41:%.*]] = fadd <4 x double> [[REVERSE25]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX2-NEXT:    [[TMP42:%.*]] = fadd <4 x double> [[REVERSE28]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX2-NEXT:    [[TMP43:%.*]] = fadd <4 x double> [[REVERSE31]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
+; AVX2-NEXT:    [[REVERSE32:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
+; AVX2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3
+; AVX2-NEXT:    [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    [[REVERSE34:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4
+; AVX2-NEXT:    [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3
+; AVX2-NEXT:    [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    [[REVERSE36:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8
+; AVX2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3
+; AVX2-NEXT:    [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    [[REVERSE38:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; AVX2-NEXT:    [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12
+; AVX2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3
+; AVX2-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !46, !noalias !48
+; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; AVX2-NEXT:    [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
+; AVX2-NEXT:    br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49
+; AVX2:       middle.block:
+; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 4096, 4096
+; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; AVX2:       scalar.ph:
+; AVX2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 4095, [[ENTRY:%.*]] ], [ 4095, [[VECTOR_MEMCHECK]] ]
+; AVX2-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX2:       for.body:
+; AVX2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP61:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; AVX2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP61]], 0
+; AVX2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; AVX2:       if.then:
+; AVX2-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[IN]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP62:%.*]] = load double, double* [[ARRAYIDX3]], align 8
+; AVX2-NEXT:    [[ADD:%.*]] = fadd double [[TMP62]], 5.000000e-01
+; AVX2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    store double [[ADD]], double* [[ARRAYIDX5]], align 8
+; AVX2-NEXT:    br label [[FOR_INC]]
+; AVX2:       for.inc:
+; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; AVX2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; AVX2-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !50
+; AVX2:       for.end:
+; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo6(
 ; AVX512-NEXT:  entry:
@@ -1842,135 +1940,265 @@ for.end:                                          ; preds = %for.inc
 ; }
 
 define void @foo7(double* noalias nocapture %out, double** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 {
-; AVX-LABEL: @foo7(
-; AVX-NEXT:  entry:
-; AVX-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
-; AVX-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
-; AVX:       for.body.preheader:
-; AVX-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
-; AVX-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
-; AVX-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; AVX:       vector.ph:
-; AVX-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
-; AVX-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
-; AVX-NEXT:    br label [[VECTOR_BODY:%.*]]
-; AVX:       vector.body:
-; AVX-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
-; AVX-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; AVX-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
-; AVX-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
-; AVX-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
-; AVX-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; AVX-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
-; AVX-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
-; AVX-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
-; AVX-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
-; AVX-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
-; AVX-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
-; AVX-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
-; AVX-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
-; AVX-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
-; AVX-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
-; AVX-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
-; AVX-NEXT:    [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
-; AVX-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
-; AVX-NEXT:    [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
-; AVX-NEXT:    [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
-; AVX-NEXT:    [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
-; AVX-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
-; AVX-NEXT:    [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
-; AVX-NEXT:    [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
-; AVX-NEXT:    [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
-; AVX-NEXT:    [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
-; AVX-NEXT:    [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
-; AVX-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
-; AVX-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
-; AVX-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
-; AVX-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
-; AVX-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
-; AVX-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
-; AVX-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
-; AVX-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
-; AVX-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
-; AVX-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; AVX-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51
-; AVX:       middle.block:
-; AVX-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
-; AVX-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
-; AVX:       scalar.ph:
-; AVX-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; AVX-NEXT:    br label [[FOR_BODY:%.*]]
-; AVX:       for.body:
-; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; AVX-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
-; AVX-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
-; AVX-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
-; AVX:       land.lhs.true:
-; AVX-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
-; AVX-NEXT:    [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
-; AVX-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
-; AVX:       if.then:
-; AVX-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
-; AVX-NEXT:    br label [[FOR_INC]]
-; AVX:       for.inc:
-; AVX-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; AVX-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52
-; AVX:       for.end.loopexit:
-; AVX-NEXT:    br label [[FOR_END]]
-; AVX:       for.end:
-; AVX-NEXT:    ret void
+; AVX1-LABEL: @foo7(
+; AVX1-NEXT:  entry:
+; AVX1-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
+; AVX1-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; AVX1:       for.body.preheader:
+; AVX1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
+; AVX1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
+; AVX1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; AVX1:       vector.ph:
+; AVX1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
+; AVX1-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; AVX1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX1:       vector.body:
+; AVX1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
+; AVX1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; AVX1-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
+; AVX1-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
+; AVX1-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
+; AVX1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; AVX1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; AVX1-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; AVX1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
+; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
+; AVX1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
+; AVX1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
+; AVX1-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
+; AVX1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
+; AVX1-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
+; AVX1-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
+; AVX1-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
+; AVX1-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
+; AVX1-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
+; AVX1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
+; AVX1-NEXT:    [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
+; AVX1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
+; AVX1-NEXT:    [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
+; AVX1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
+; AVX1-NEXT:    [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
+; AVX1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
+; AVX1-NEXT:    [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
+; AVX1-NEXT:    [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
+; AVX1-NEXT:    [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
+; AVX1-NEXT:    [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
+; AVX1-NEXT:    [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
+; AVX1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
+; AVX1-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
+; AVX1-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
+; AVX1-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
+; AVX1-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
+; AVX1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
+; AVX1-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
+; AVX1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
+; AVX1-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
+; AVX1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
+; AVX1-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
+; AVX1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; AVX1-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !41
+; AVX1:       middle.block:
+; AVX1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; AVX1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; AVX1:       scalar.ph:
+; AVX1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; AVX1-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX1:       for.body:
+; AVX1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; AVX1-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
+; AVX1-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
+; AVX1-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
+; AVX1:       land.lhs.true:
+; AVX1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
+; AVX1-NEXT:    [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
+; AVX1-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
+; AVX1:       if.then:
+; AVX1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
+; AVX1-NEXT:    br label [[FOR_INC]]
+; AVX1:       for.inc:
+; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !42
+; AVX1:       for.end.loopexit:
+; AVX1-NEXT:    br label [[FOR_END]]
+; AVX1:       for.end:
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @foo7(
+; AVX2-NEXT:  entry:
+; AVX2-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
+; AVX2-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; AVX2:       for.body.preheader:
+; AVX2-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
+; AVX2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
+; AVX2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; AVX2:       vector.ph:
+; AVX2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
+; AVX2-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; AVX2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX2:       vector.body:
+; AVX2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
+; AVX2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; AVX2-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
+; AVX2-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
+; AVX2-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
+; AVX2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; AVX2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; AVX2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
+; AVX2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
+; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
+; AVX2-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
+; AVX2-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
+; AVX2-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
+; AVX2-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
+; AVX2-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
+; AVX2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double*, double** [[IN:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 0
+; AVX2-NEXT:    [[TMP33:%.*]] = bitcast double** [[TMP32]] to <4 x double*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x double*> undef)
+; AVX2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 4
+; AVX2-NEXT:    [[TMP35:%.*]] = bitcast double** [[TMP34]] to <4 x double*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x double*> undef)
+; AVX2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 8
+; AVX2-NEXT:    [[TMP37:%.*]] = bitcast double** [[TMP36]] to <4 x double*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x double*> undef)
+; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds double*, double** [[TMP24]], i32 12
+; AVX2-NEXT:    [[TMP39:%.*]] = bitcast double** [[TMP38]] to <4 x double*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double*> @llvm.masked.load.v4p0f64.p0v4p0f64(<4 x double*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x double*> undef)
+; AVX2-NEXT:    [[TMP40:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD]], zeroinitializer
+; AVX2-NEXT:    [[TMP41:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD7]], zeroinitializer
+; AVX2-NEXT:    [[TMP42:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD8]], zeroinitializer
+; AVX2-NEXT:    [[TMP43:%.*]] = icmp eq <4 x double*> [[WIDE_MASKED_LOAD9]], zeroinitializer
+; AVX2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
+; AVX2-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
+; AVX2-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
+; AVX2-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
+; AVX2-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
+; AVX2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
+; AVX2-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
+; AVX2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
+; AVX2-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
+; AVX2-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
+; AVX2-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
+; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; AVX2-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51
+; AVX2:       middle.block:
+; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; AVX2:       scalar.ph:
+; AVX2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; AVX2-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX2:       for.body:
+; AVX2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; AVX2-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
+; AVX2-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
+; AVX2-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
+; AVX2:       land.lhs.true:
+; AVX2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double*, double** [[IN]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP67:%.*]] = load double*, double** [[ARRAYIDX2]], align 8
+; AVX2-NEXT:    [[CMP3:%.*]] = icmp eq double* [[TMP67]], null
+; AVX2-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
+; AVX2:       if.then:
+; AVX2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
+; AVX2-NEXT:    br label [[FOR_INC]]
+; AVX2:       for.inc:
+; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52
+; AVX2:       for.end.loopexit:
+; AVX2-NEXT:    br label [[FOR_END]]
+; AVX2:       for.end:
+; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo7(
 ; AVX512-NEXT:  entry:
@@ -2147,135 +2375,265 @@ for.end:                                          ; preds = %for.inc, %entry
 ;}
 
 define void @foo8(double* noalias nocapture %out, i32 ()** noalias nocapture readonly %in, i8* noalias nocapture readonly %trigger, i32 %size) local_unnamed_addr #0 {
-; AVX-LABEL: @foo8(
-; AVX-NEXT:  entry:
-; AVX-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
-; AVX-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
-; AVX:       for.body.preheader:
-; AVX-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
-; AVX-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
-; AVX-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; AVX:       vector.ph:
-; AVX-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
-; AVX-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
-; AVX-NEXT:    br label [[VECTOR_BODY:%.*]]
-; AVX:       vector.body:
-; AVX-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AVX-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
-; AVX-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
-; AVX-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; AVX-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
-; AVX-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
-; AVX-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
-; AVX-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; AVX-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
-; AVX-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
-; AVX-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
-; AVX-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
-; AVX-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
-; AVX-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
-; AVX-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
-; AVX-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
-; AVX-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
-; AVX-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
-; AVX-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
-; AVX-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
-; AVX-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
-; AVX-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
-; AVX-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
-; AVX-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
-; AVX-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
-; AVX-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
-; AVX-NEXT:    [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
-; AVX-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
-; AVX-NEXT:    [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
-; AVX-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
-; AVX-NEXT:    [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
-; AVX-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
-; AVX-NEXT:    [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
-; AVX-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
-; AVX-NEXT:    [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
-; AVX-NEXT:    [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
-; AVX-NEXT:    [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
-; AVX-NEXT:    [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
-; AVX-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
-; AVX-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
-; AVX-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
-; AVX-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
-; AVX-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
-; AVX-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
-; AVX-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
-; AVX-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
-; AVX-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
-; AVX-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
-; AVX-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
-; AVX-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
-; AVX-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
-; AVX-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
-; AVX-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
-; AVX-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
-; AVX-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
-; AVX-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
-; AVX-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; AVX-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; AVX-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !54
-; AVX:       middle.block:
-; AVX-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
-; AVX-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
-; AVX:       scalar.ph:
-; AVX-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; AVX-NEXT:    br label [[FOR_BODY:%.*]]
-; AVX:       for.body:
-; AVX-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; AVX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; AVX-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
-; AVX-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
-; AVX-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
-; AVX:       land.lhs.true:
-; AVX-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
-; AVX-NEXT:    [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
-; AVX-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
-; AVX:       if.then:
-; AVX-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
-; AVX-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
-; AVX-NEXT:    br label [[FOR_INC]]
-; AVX:       for.inc:
-; AVX-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; AVX-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; AVX-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !55
-; AVX:       for.end.loopexit:
-; AVX-NEXT:    br label [[FOR_END]]
-; AVX:       for.end:
-; AVX-NEXT:    ret void
+; AVX1-LABEL: @foo8(
+; AVX1-NEXT:  entry:
+; AVX1-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
+; AVX1-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; AVX1:       for.body.preheader:
+; AVX1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
+; AVX1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
+; AVX1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; AVX1:       vector.ph:
+; AVX1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
+; AVX1-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; AVX1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX1:       vector.body:
+; AVX1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
+; AVX1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX1-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; AVX1-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
+; AVX1-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
+; AVX1-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
+; AVX1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; AVX1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; AVX1-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; AVX1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
+; AVX1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
+; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
+; AVX1-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
+; AVX1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
+; AVX1-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
+; AVX1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
+; AVX1-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
+; AVX1-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
+; AVX1-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
+; AVX1-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
+; AVX1-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
+; AVX1-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
+; AVX1-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
+; AVX1-NEXT:    [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
+; AVX1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
+; AVX1-NEXT:    [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
+; AVX1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
+; AVX1-NEXT:    [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
+; AVX1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
+; AVX1-NEXT:    [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
+; AVX1-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
+; AVX1-NEXT:    [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
+; AVX1-NEXT:    [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
+; AVX1-NEXT:    [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
+; AVX1-NEXT:    [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
+; AVX1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
+; AVX1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
+; AVX1-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
+; AVX1-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
+; AVX1-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
+; AVX1-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
+; AVX1-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
+; AVX1-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
+; AVX1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
+; AVX1-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
+; AVX1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
+; AVX1-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
+; AVX1-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
+; AVX1-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
+; AVX1-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; AVX1-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; AVX1-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !44
+; AVX1:       middle.block:
+; AVX1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; AVX1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; AVX1:       scalar.ph:
+; AVX1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; AVX1-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX1:       for.body:
+; AVX1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; AVX1-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
+; AVX1-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
+; AVX1-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
+; AVX1:       land.lhs.true:
+; AVX1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
+; AVX1-NEXT:    [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
+; AVX1-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
+; AVX1:       if.then:
+; AVX1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
+; AVX1-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
+; AVX1-NEXT:    br label [[FOR_INC]]
+; AVX1:       for.inc:
+; AVX1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX1-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; AVX1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !45
+; AVX1:       for.end.loopexit:
+; AVX1-NEXT:    br label [[FOR_END]]
+; AVX1:       for.end:
+; AVX1-NEXT:    ret void
+;
+; AVX2-LABEL: @foo8(
+; AVX2-NEXT:  entry:
+; AVX2-NEXT:    [[CMP5:%.*]] = icmp eq i32 [[SIZE:%.*]], 0
+; AVX2-NEXT:    br i1 [[CMP5]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; AVX2:       for.body.preheader:
+; AVX2-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
+; AVX2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
+; AVX2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; AVX2:       vector.ph:
+; AVX2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
+; AVX2-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; AVX2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; AVX2:       vector.body:
+; AVX2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; AVX2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
+; AVX2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
+; AVX2-NEXT:    [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; AVX2-NEXT:    [[INDUCTION1:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 4, i64 5, i64 6, i64 7>
+; AVX2-NEXT:    [[INDUCTION2:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 8, i64 9, i64 10, i64 11>
+; AVX2-NEXT:    [[INDUCTION3:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 12, i64 13, i64 14, i64 15>
+; AVX2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; AVX2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; AVX2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; AVX2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; AVX2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
+; AVX2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP9]], align 1
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 4
+; AVX2-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
+; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 8
+; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP13]], align 1
+; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 12
+; AVX2-NEXT:    [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <4 x i8>*
+; AVX2-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, <4 x i8>* [[TMP15]], align 1
+; AVX2-NEXT:    [[TMP16:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP17:%.*]] = and <4 x i8> [[WIDE_LOAD4]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP18:%.*]] = and <4 x i8> [[WIDE_LOAD5]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP19:%.*]] = and <4 x i8> [[WIDE_LOAD6]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP16]], zeroinitializer
+; AVX2-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i8> [[TMP17]], zeroinitializer
+; AVX2-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i8> [[TMP18]], zeroinitializer
+; AVX2-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
+; AVX2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP28:%.*]] = xor <4 x i1> [[TMP20]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP29:%.*]] = xor <4 x i1> [[TMP21]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[TMP22]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP31:%.*]] = xor <4 x i1> [[TMP23]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 0
+; AVX2-NEXT:    [[TMP33:%.*]] = bitcast i32 ()** [[TMP32]] to <4 x i32 ()*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP33]], i32 8, <4 x i1> [[TMP28]], <4 x i32 ()*> undef)
+; AVX2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 4
+; AVX2-NEXT:    [[TMP35:%.*]] = bitcast i32 ()** [[TMP34]] to <4 x i32 ()*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP35]], i32 8, <4 x i1> [[TMP29]], <4 x i32 ()*> undef)
+; AVX2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 8
+; AVX2-NEXT:    [[TMP37:%.*]] = bitcast i32 ()** [[TMP36]] to <4 x i32 ()*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD8:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP37]], i32 8, <4 x i1> [[TMP30]], <4 x i32 ()*> undef)
+; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[TMP24]], i32 12
+; AVX2-NEXT:    [[TMP39:%.*]] = bitcast i32 ()** [[TMP38]] to <4 x i32 ()*>*
+; AVX2-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32 ()*> @llvm.masked.load.v4p0f_i32f.p0v4p0f_i32f(<4 x i32 ()*>* [[TMP39]], i32 8, <4 x i1> [[TMP31]], <4 x i32 ()*> undef)
+; AVX2-NEXT:    [[TMP40:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD]], zeroinitializer
+; AVX2-NEXT:    [[TMP41:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD7]], zeroinitializer
+; AVX2-NEXT:    [[TMP42:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD8]], zeroinitializer
+; AVX2-NEXT:    [[TMP43:%.*]] = icmp eq <4 x i32 ()*> [[WIDE_MASKED_LOAD9]], zeroinitializer
+; AVX2-NEXT:    [[TMP44:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP1]]
+; AVX2-NEXT:    [[TMP46:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP47:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[TMP3]]
+; AVX2-NEXT:    [[TMP48:%.*]] = xor <4 x i1> [[TMP40]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP49:%.*]] = xor <4 x i1> [[TMP41]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP50:%.*]] = xor <4 x i1> [[TMP42]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP51:%.*]] = xor <4 x i1> [[TMP43]], <i1 true, i1 true, i1 true, i1 true>
+; AVX2-NEXT:    [[TMP52:%.*]] = and <4 x i1> [[TMP48]], [[TMP28]]
+; AVX2-NEXT:    [[TMP53:%.*]] = and <4 x i1> [[TMP49]], [[TMP29]]
+; AVX2-NEXT:    [[TMP54:%.*]] = and <4 x i1> [[TMP50]], [[TMP30]]
+; AVX2-NEXT:    [[TMP55:%.*]] = and <4 x i1> [[TMP51]], [[TMP31]]
+; AVX2-NEXT:    [[TMP56:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0
+; AVX2-NEXT:    [[TMP57:%.*]] = bitcast double* [[TMP56]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP57]], i32 8, <4 x i1> [[TMP52]])
+; AVX2-NEXT:    [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 4
+; AVX2-NEXT:    [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP59]], i32 8, <4 x i1> [[TMP53]])
+; AVX2-NEXT:    [[TMP60:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 8
+; AVX2-NEXT:    [[TMP61:%.*]] = bitcast double* [[TMP60]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP61]], i32 8, <4 x i1> [[TMP54]])
+; AVX2-NEXT:    [[TMP62:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 12
+; AVX2-NEXT:    [[TMP63:%.*]] = bitcast double* [[TMP62]] to <4 x double>*
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]])
+; AVX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; AVX2-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; AVX2-NEXT:    br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !54
+; AVX2:       middle.block:
+; AVX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; AVX2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; AVX2:       scalar.ph:
+; AVX2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; AVX2-NEXT:    br label [[FOR_BODY:%.*]]
+; AVX2:       for.body:
+; AVX2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; AVX2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TRIGGER]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP65:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; AVX2-NEXT:    [[TMP66:%.*]] = and i8 [[TMP65]], 1
+; AVX2-NEXT:    [[TOBOOL:%.*]] = icmp eq i8 [[TMP66]], 0
+; AVX2-NEXT:    br i1 [[TOBOOL]], label [[FOR_INC]], label [[LAND_LHS_TRUE:%.*]]
+; AVX2:       land.lhs.true:
+; AVX2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32 ()*, i32 ()** [[IN]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    [[TMP67:%.*]] = load i32 ()*, i32 ()** [[ARRAYIDX2]], align 8
+; AVX2-NEXT:    [[CMP3:%.*]] = icmp eq i32 ()* [[TMP67]], null
+; AVX2-NEXT:    br i1 [[CMP3]], label [[FOR_INC]], label [[IF_THEN:%.*]]
+; AVX2:       if.then:
+; AVX2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[OUT]], i64 [[INDVARS_IV]]
+; AVX2-NEXT:    store double 5.000000e-01, double* [[ARRAYIDX5]], align 8
+; AVX2-NEXT:    br label [[FOR_INC]]
+; AVX2:       for.inc:
+; AVX2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; AVX2-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; AVX2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !55
+; AVX2:       for.end.loopexit:
+; AVX2-NEXT:    br label [[FOR_END]]
+; AVX2:       for.end:
+; AVX2-NEXT:    ret void
 ;
 ; AVX512-LABEL: @foo8(
 ; AVX512-NEXT:  entry:

From fde26d222da4dcd947d0dea4baeade8fb69ae1a7 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Sun, 2 Jun 2019 21:11:21 +0000
Subject: [PATCH 0857/1176] [Commands] Remove unused header

llvm-svn: 362339
---
 lldb/source/Commands/CommandObjectExpression.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp
index dc77a2eae6588..29e4ab6955225 100644
--- a/lldb/source/Commands/CommandObjectExpression.cpp
+++ b/lldb/source/Commands/CommandObjectExpression.cpp
@@ -10,7 +10,6 @@
 #include "llvm/ADT/StringRef.h"
 
 #include "CommandObjectExpression.h"
-#include "Plugins/ExpressionParser/Clang/ClangExpressionVariable.h"
 #include "lldb/Core/Debugger.h"
 #include "lldb/Core/Value.h"
 #include "lldb/Core/ValueObjectVariable.h"

From 162360774ed9c1922820d2afa2d6cd39983e90d5 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Sun, 2 Jun 2019 21:40:53 +0000
Subject: [PATCH 0858/1176] [analyzer] exploded-graph-rewriter: Initial commit.

This is a utility to improve readability and generally manipulate
GraphViz dumps of the analysis graph. Such dumps are often huge and
not only hard to read, but also often hang the viewer apps with their
mere size. Such script should significantly improve debugging experience.

Differential Revision: https://reviews.llvm.org/D62638

llvm-svn: 362340
---
 .../Analysis/exploded-graph-rewriter/edge.dot |  12 +
 .../exploded-graph-rewriter/empty.dot         |   9 +
 .../exploded-graph-rewriter/environment.dot   |  50 +++
 .../exploded-graph-rewriter/lit.local.cfg     |  13 +
 .../program_points.dot                        |  60 +++
 .../exploded-graph-rewriter/store.dot         |  42 ++
 .../utils/analyzer/exploded-graph-rewriter.py | 398 ++++++++++++++++++
 7 files changed, 584 insertions(+)
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/edge.dot
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/empty.dot
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/environment.dot
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/program_points.dot
 create mode 100644 clang/test/Analysis/exploded-graph-rewriter/store.dot
 create mode 100755 clang/utils/analyzer/exploded-graph-rewriter.py

diff --git a/clang/test/Analysis/exploded-graph-rewriter/edge.dot b/clang/test/Analysis/exploded-graph-rewriter/edge.dot
new file mode 100644
index 0000000000000..5fc695c1ce018
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/edge.dot
@@ -0,0 +1,12 @@
+// RUN: %exploded_graph_rewriter %s | FileCheck %s
+
+Node0x1 [shape=record,label=
+ "{{ "node_id": 1, "pointer": "0x1",
+     "program_state": null, "program_points": []}\l}"];
+
+// CHECK: Node0x1 -> Node0x2;
+Node0x1 -> Node0x2;
+
+Node0x2 [shape=record,label=
+ "{{ "node_id": 2, "pointer": "0x2",
+     "program_state": null, "program_points": []}\l}"];
diff --git a/clang/test/Analysis/exploded-graph-rewriter/empty.dot b/clang/test/Analysis/exploded-graph-rewriter/empty.dot
new file mode 100644
index 0000000000000..85c227dbab8a0
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/empty.dot
@@ -0,0 +1,9 @@
+// RUN: %exploded_graph_rewriter %s | FileCheck %s
+
+digraph "Exploded Graph" {
+  label="Exploded Graph";
+}
+
+// CHECK:      digraph "ExplodedGraph" {
+// CHECK-NEXT:   label="";
+// CHECK-NEXT: }
diff --git a/clang/test/Analysis/exploded-graph-rewriter/environment.dot b/clang/test/Analysis/exploded-graph-rewriter/environment.dot
new file mode 100644
index 0000000000000..31d2d72551d41
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/environment.dot
@@ -0,0 +1,50 @@
+// RUN: %exploded_graph_rewriter %s | FileCheck %s
+
+// CHECK: <b>Environment: </b>
+// CHECK-SAME: <table border="0">
+// CHECK-SAME:   <tr>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       <b>#0 Call</b>
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       <font color="grey60">foo </font>(line 4)
+// CHECK-SAME:     </td>
+// CHECK-SAME:   </tr>
+// CHECK-SAME:   <tr>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       <i>S5</i>
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       bar()
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       Unknown
+// CHECK-SAME:     </td>
+// CHECK-SAME:   </tr>
+// CHECK-SAME: </table>
+Node0x1 [shape=record,label=
+ "{
+    { "node_id": 1,
+      "pointer": "0x1",
+      "state_id": 2,
+      "program_points": [],
+      "program_state": {
+        "store": null,
+        "environment": [
+          {
+            "location_context": "#0 Call",
+            "lctx_id": 3,
+            "calling": "foo",
+            "call_line": 4,
+            "items": [
+              {
+                "stmt_id": 5,
+                "pretty": "bar()",
+                "value": "Unknown"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
diff --git a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
new file mode 100644
index 0000000000000..1ebb25a4001a6
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
@@ -0,0 +1,13 @@
+import lit.util
+import lit.formats
+import os
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+
+config.substitutions.append(('%exploded_graph_rewriter',
+                             lit.util.which('exploded-graph-rewriter.py',
+                                            os.path.join(config.clang_src_dir,
+                                                         'utils', 'analyzer'))))
+
+config.suffixes = ['.dot']
diff --git a/clang/test/Analysis/exploded-graph-rewriter/program_points.dot b/clang/test/Analysis/exploded-graph-rewriter/program_points.dot
new file mode 100644
index 0000000000000..a7696d27a9f7f
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/program_points.dot
@@ -0,0 +1,60 @@
+// RUN: %exploded_graph_rewriter %s | FileCheck %s
+
+// CHECK: <b>Program point:</b>
+// CHECK-SAME: <table border="0" align="left" width="0">
+// CHECK-SAME:   <tr>
+// CHECK-SAME:     <td width="0">
+// CHECK-SAME:       -
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left" width="0">
+// CHECK-SAME:       <font color="gold3">Edge</font>
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:     [B0] -&gt; [B1]
+// CHECK-SAME:     </td>
+// CHECK-SAME:   </tr>
+// CHECK-SAME: </table>
+Node0x1 [shape=record,label=
+ "{
+    { "node_id": 1, "pointer": "0x1",
+      "program_state": null, "program_points": [
+      {
+        "kind": "Edge",
+        "src_id": 0,
+        "dst_id": 1,
+        "terminator": null,
+        "term_kind": null,
+        "tag": null }
+    ]}
+\l}"];
+
+// CHECK-NEXT: <b>Program point:</b>
+// CHECK-SAME: <table border="0" align="left" width="0">
+// CHECK-SAME:   <tr>
+// CHECK-SAME:     <td align="left" width="0">
+// CHECK-SAME:       (main file):<b>4</b>:<b>5</b>:
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left" width="0">
+// CHECK-SAME:       <font color="cyan3">DeclRefExpr</font>
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td>x</td>
+// CHECK-SAME:   </tr>
+// CHECK-SAME: </table>
+Node0x2 [shape=record,label=
+ "{
+    { "node_id": 2, "pointer": "0x2",
+      "program_state": null, "program_points": [
+      {
+        "kind": "Statement",
+        "stmt_kind": "DeclRefExpr",
+        "stmd_id": 3,
+        "pointer": "0x3",
+        "pretty": "x",
+        "location": {
+          "line": 4,
+          "column": 5
+        },
+        "tag": null
+      }
+    ]}
+\l}"];
diff --git a/clang/test/Analysis/exploded-graph-rewriter/store.dot b/clang/test/Analysis/exploded-graph-rewriter/store.dot
new file mode 100644
index 0000000000000..0f0fa928b288a
--- /dev/null
+++ b/clang/test/Analysis/exploded-graph-rewriter/store.dot
@@ -0,0 +1,42 @@
+// RUN: %exploded_graph_rewriter %s | FileCheck %s
+
+// CHECK: <b>Store: </b>
+// CHECK-SAME: <table border="0">
+// CHECK-SAME:   <tr>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       x
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       0
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       (<i>Default</i>)
+// CHECK-SAME:     </td>
+// CHECK-SAME:     <td align="left">
+// CHECK-SAME:       Undefined
+// CHECK-SAME:     </td>
+// CHECK-SAME:   </tr>
+// CHECK-SAME: </table>
+Node0x1 [shape=record,label=
+ "{
+    { "node_id": 1,
+      "pointer": "0x1",
+      "state_id": 2,
+      "program_points": [],
+      "program_state": {
+        "environment": null,
+        "store": [
+          {
+            "cluster": "x",
+            "items": [
+              {
+                "kind": "Default",
+                "offset": 0,
+                "value": "Undefined"
+              }
+            ]
+          }
+        ]
+      }
+    }
+\l}"];
diff --git a/clang/utils/analyzer/exploded-graph-rewriter.py b/clang/utils/analyzer/exploded-graph-rewriter.py
new file mode 100755
index 0000000000000..355fc8632a574
--- /dev/null
+++ b/clang/utils/analyzer/exploded-graph-rewriter.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import collections
+import json
+import logging
+import re
+
+
+# A deserialized source location.
+class SourceLocation(object):
+    def __init__(self, json_loc):
+        super(SourceLocation, self).__init__()
+        self.line = json_loc['line']
+        self.col = json_loc['column']
+        self.filename = json_loc['filename'] \
+            if 'filename' in json_loc else '(main file)'
+
+
+# A deserialized program point.
+class ProgramPoint(object):
+    def __init__(self, json_pp):
+        super(ProgramPoint, self).__init__()
+        self.kind = json_pp['kind']
+        self.tag = json_pp['tag']
+        if self.kind == 'Edge':
+            self.src_id = json_pp['src_id']
+            self.dst_id = json_pp['dst_id']
+        elif self.kind == 'Statement':
+            self.stmt_kind = json_pp['stmt_kind']
+            self.pointer = json_pp['pointer']
+            self.pretty = json_pp['pretty']
+            self.loc = SourceLocation(json_pp['location']) \
+                if json_pp['location'] is not None else None
+        elif self.kind == 'BlockEntrance':
+            self.block_id = json_pp['block_id']
+
+
+# A value of a single expression in a deserialized Environment.
+class EnvironmentBinding(object):
+    def __init__(self, json_eb):
+        super(EnvironmentBinding, self).__init__()
+        self.stmt_id = json_eb['stmt_id']
+        self.pretty = json_eb['pretty']
+        self.value = json_eb['value']
+
+
+# Deserialized description of a location context.
+class LocationContext(object):
+    def __init__(self, json_frame):
+        super(LocationContext, self).__init__()
+        self.lctx_id = json_frame['lctx_id']
+        self.caption = json_frame['location_context']
+        self.decl = json_frame['calling']
+        self.line = json_frame['call_line']
+
+
+# A group of deserialized Environment bindings that correspond to a specific
+# location context.
+class EnvironmentFrame(object):
+    def __init__(self, json_frame):
+        super(EnvironmentFrame, self).__init__()
+        self.location_context = LocationContext(json_frame)
+        self.bindings = [EnvironmentBinding(b) for b in json_frame['items']] \
+            if json_frame['items'] is not None else []
+
+
+# A deserialized Environment.
+class Environment(object):
+    def __init__(self, json_e):
+        super(Environment, self).__init__()
+        self.frames = [EnvironmentFrame(f) for f in json_e]
+
+
+# A single binding in a deserialized RegionStore cluster.
+class StoreBinding(object):
+    def __init__(self, json_sb):
+        super(StoreBinding, self).__init__()
+        self.kind = json_sb['kind']
+        self.offset = json_sb['offset']
+        self.value = json_sb['value']
+
+
+# A single cluster of the deserialized RegionStore.
+class StoreCluster(object):
+    def __init__(self, json_sc):
+        super(StoreCluster, self).__init__()
+        self.base_region = json_sc['cluster']
+        self.bindings = [StoreBinding(b) for b in json_sc['items']]
+
+
+# A deserialized RegionStore.
+class Store(object):
+    def __init__(self, json_s):
+        super(Store, self).__init__()
+        self.clusters = [StoreCluster(c) for c in json_s]
+
+
+# A deserialized program state.
+class ProgramState(object):
+    def __init__(self, state_id, json_ps):
+        super(ProgramState, self).__init__()
+        logging.debug('Adding ProgramState ' + str(state_id))
+
+        self.state_id = state_id
+        self.store = Store(json_ps['store']) \
+            if json_ps['store'] is not None else None
+        self.environment = Environment(json_ps['environment']) \
+            if json_ps['environment'] is not None else None
+        # TODO: Objects under construction.
+        # TODO: Constraint ranges.
+        # TODO: Dynamic types of objects.
+        # TODO: Checker messages.
+
+
+# A deserialized exploded graph node. Has a default constructor because it
+# may be referenced as part of an edge before its contents are deserialized,
+# and in this moment we already need a room for predecessors and successors.
+class ExplodedNode(object):
+    def __init__(self):
+        super(ExplodedNode, self).__init__()
+        self.predecessors = []
+        self.successors = []
+
+    def construct(self, node_id, json_node):
+        logging.debug('Adding ' + node_id)
+        self.node_id = json_node['node_id']
+        self.ptr = json_node['pointer']
+        self.points = [ProgramPoint(p) for p in json_node['program_points']]
+        self.state = ProgramState(json_node['state_id'],
+                                  json_node['program_state']) \
+            if json_node['program_state'] is not None else None
+
+        assert self.node_name() == node_id
+
+    def node_name(self):
+        return 'Node' + self.ptr
+
+
+# A deserialized ExplodedGraph. Constructed by consuming a .dot file
+# line-by-line.
+class ExplodedGraph(object):
+    # Parse .dot files with regular expressions.
+    node_re = re.compile(
+        '^(Node0x[0-9a-f]*) \\[shape=record,.*label="{(.*)\\\\l}"\\];$')
+    edge_re = re.compile(
+        '^(Node0x[0-9a-f]*) -> (Node0x[0-9a-f]*);$')
+
+    def __init__(self):
+        super(ExplodedGraph, self).__init__()
+        self.nodes = collections.defaultdict(ExplodedNode)
+        self.root_id = None
+        self.incomplete_line = ''
+
+    def add_raw_line(self, raw_line):
+        if raw_line.startswith('//'):
+            return
+
+        # Allow line breaks by waiting for ';'. This is not valid in
+        # a .dot file, but it is useful for writing tests.
+        if len(raw_line) > 0 and raw_line[-1] != ';':
+            self.incomplete_line += raw_line
+            return
+        raw_line = self.incomplete_line + raw_line
+        self.incomplete_line = ''
+
+        # Apply regexps one by one to see if it's a node or an edge
+        # and extract contents if necessary.
+        logging.debug('Line: ' + raw_line)
+        result = self.edge_re.match(raw_line)
+        if result is not None:
+            logging.debug('Classified as edge line.')
+            pred = result.group(1)
+            succ = result.group(2)
+            self.nodes[pred].successors.append(succ)
+            self.nodes[succ].predecessors.append(pred)
+            return
+        result = self.node_re.match(raw_line)
+        if result is not None:
+            logging.debug('Classified as node line.')
+            node_id = result.group(1)
+            if len(self.nodes) == 0:
+                self.root_id = node_id
+            # Note: when writing tests you don't need to escape everything,
+            # even though in a valid dot file everything is escaped.
+            node_label = result.group(2).replace('\\l', '') \
+                                        .replace('&nbsp;', '') \
+                                        .replace('\\"', '"') \
+                                        .replace('\\{', '{') \
+                                        .replace('\\}', '}') \
+                                        .replace('\\<', '\\\\<') \
+                                        .replace('\\>', '\\\\>') \
+                                        .rstrip(',')
+            logging.debug(node_label)
+            json_node = json.loads(node_label)
+            self.nodes[node_id].construct(node_id, json_node)
+            return
+        logging.debug('Skipping.')
+
+
+# A visitor that dumps the ExplodedGraph into a DOT file with fancy HTML-based
+# syntax highlighing.
+class DotDumpVisitor(object):
+    def __init__(self):
+        super(DotDumpVisitor, self).__init__()
+
+    @staticmethod
+    def _dump_raw(s):
+        print(s, end='')
+
+    @staticmethod
+    def _dump(s):
+        print(s.replace('&', '&amp;')
+               .replace('{', '\\{')
+               .replace('}', '\\}')
+               .replace('\\<', '&lt;')
+               .replace('\\>', '&gt;')
+               .replace('\\l', '<br />')
+               .replace('|', ''), end='')
+
+    def visit_begin_graph(self, graph):
+        self._graph = graph
+        self._dump_raw('digraph "ExplodedGraph" {\n')
+        self._dump_raw('label="";\n')
+
+    def visit_program_point(self, p):
+        if p.kind in ['Edge', 'BlockEntrance', 'BlockExit']:
+            color = 'gold3'
+        elif p.kind in ['PreStmtPurgeDeadSymbols',
+                        'PostStmtPurgeDeadSymbols']:
+            color = 'red'
+        elif p.kind in ['CallEnter', 'CallExitBegin', 'CallExitEnd']:
+            color = 'blue'
+        elif p.kind in ['Statement']:
+            color = 'cyan3'
+        else:
+            color = 'forestgreen'
+
+        if p.kind == 'Statement':
+            if p.loc is not None:
+                self._dump('<tr><td align="left" width="0">'
+                           '%s:<b>%s</b>:<b>%s</b>:</td>'
+                           '<td align="left" width="0"><font color="%s">'
+                           '%s</font></td><td>%s</td></tr>'
+                           % (p.loc.filename, p.loc.line,
+                              p.loc.col, color, p.stmt_kind, p.pretty))
+            else:
+                self._dump('<tr><td align="left" width="0">'
+                           '<i>Invalid Source Location</i>:</td>'
+                           '<td align="left" width="0">'
+                           '<font color="%s">%s</font></td><td>%s</td></tr>'
+                           % (color, p.stmt_kind, p.pretty))
+        elif p.kind == 'Edge':
+            self._dump('<tr><td width="0">-</td>'
+                       '<td align="left" width="0">'
+                       '<font color="%s">%s</font></td><td align="left">'
+                       '[B%d] -\\> [B%d]</td></tr>'
+                       % (color, p.kind, p.src_id, p.dst_id))
+        else:
+            # TODO: Print more stuff for other kinds of points.
+            self._dump('<tr><td width="0">-</td>'
+                       '<td align="left" width="0" colspan="2">'
+                       '<font color="%s">%s</font></td></tr>'
+                       % (color, p.kind))
+
+    def visit_environment(self, e):
+        self._dump('<table border="0">')
+
+        for f in e.frames:
+            self._dump('<tr><td align="left"><b>%s</b></td>'
+                       '<td align="left"><font color="grey60">%s </font>'
+                       '%s</td></tr>'
+                       % (f.location_context.caption,
+                          f.location_context.decl,
+                          ('(line %s)' % f.location_context.line)
+                          if f.location_context.line is not None else ''))
+            for b in f.bindings:
+                self._dump('<tr><td align="left"><i>S%s</i></td>'
+                           '<td align="left">%s</td>'
+                           '<td align="left">%s</td></tr>'
+                           % (b.stmt_id, b.pretty, b.value))
+
+        self._dump('</table>')
+
+    def visit_store(self, s):
+        self._dump('<table border="0">')
+
+        for c in s.clusters:
+            for b in c.bindings:
+                self._dump('<tr><td align="left">%s</td>'
+                           '<td align="left">%s</td>'
+                           '<td align="left">%s</td>'
+                           '<td align="left">%s</td></tr>'
+                           % (c.base_region, b.offset,
+                              '(<i>Default</i>)' if b.kind == 'Default'
+                              else '',
+                              b.value))
+
+        self._dump('</table>')
+
+    def visit_state(self, s):
+        self._dump('<tr><td align="left">'
+                   '<b>Store: </b>')
+        if s.store is None:
+            self._dump('<i> Nothing!</i>')
+        else:
+            self._dump('</td></tr>'
+                       '<tr><td align="left">')
+            self.visit_store(s.store)
+
+        self._dump('</td></tr><hr />'
+                   '<tr><td align="left">'
+                   '<b>Environment: </b>')
+        if s.environment is None:
+            self._dump('<i> Nothing!</i>')
+        else:
+            self._dump('</td></tr>'
+                       '<tr><td align="left">')
+            self.visit_environment(s.environment)
+
+        self._dump('</td></tr>')
+
+    def visit_node(self, node):
+        self._dump('%s [shape=record,label=<<table border="0">'
+                   % (node.node_name()))
+
+        self._dump('<tr><td bgcolor="grey"><b>Node %d (%s) - '
+                   'State %s</b></td></tr>'
+                   % (node.node_id, node.ptr, node.state.state_id
+                      if node.state is not None else 'Unspecified'))
+        self._dump('<tr><td align="left" width="0">')
+        if len(node.points) > 1:
+            self._dump('<b>Program points:</b></td></tr>')
+        else:
+            self._dump('<b>Program point:</b></td></tr>')
+        self._dump('<tr><td align="left" width="0">'
+                   '<table border="0" align="left" width="0">')
+        for p in node.points:
+            self.visit_program_point(p)
+        self._dump('</table></td></tr>')
+
+        if node.state is not None:
+            self._dump('<hr />')
+            self.visit_state(node.state)
+        self._dump_raw('</table>>];\n')
+
+    def visit_edge(self, pred, succ):
+        self._dump_raw('%s -> %s;\n' % (pred.node_name(), succ.node_name()))
+
+    def visit_end_of_graph(self):
+        self._dump_raw('}\n')
+
+
+# A class that encapsulates traversal of the ExplodedGraph. Different explorer
+# kinds could potentially traverse specific sub-graphs.
+class Explorer(object):
+    def __init__(self):
+        super(Explorer, self).__init__()
+
+    def explore(self, graph, visitor):
+        visitor.visit_begin_graph(graph)
+        for node in sorted(graph.nodes):
+            logging.debug('Visiting ' + node)
+            visitor.visit_node(graph.nodes[node])
+            for succ in sorted(graph.nodes[node].successors):
+                logging.debug('Visiting edge: %s -> %s ' % (node, succ))
+                visitor.visit_edge(graph.nodes[node], graph.nodes[succ])
+        visitor.visit_end_of_graph()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filename', type=str)
+    parser.add_argument('-d', '--debug', action='store_const', dest='loglevel',
+                        const=logging.DEBUG, default=logging.WARNING,
+                        help='enable debug prints')
+    parser.add_argument('-v', '--verbose', action='store_const',
+                        dest='loglevel', const=logging.INFO,
+                        default=logging.WARNING,
+                        help='enable info prints')
+    args = parser.parse_args()
+    logging.basicConfig(level=args.loglevel)
+
+    graph = ExplodedGraph()
+    with open(args.filename) as fd:
+        for raw_line in fd:
+            raw_line = raw_line.strip()
+            graph.add_raw_line(raw_line)
+
+    explorer = Explorer()
+    visitor = DotDumpVisitor()
+    explorer.explore(graph, visitor)
+
+
+if __name__ == '__main__':
+    main()

From 5f79d749466239f1d9cc55871c9f5c6cc2451bab Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 22:52:34 +0000
Subject: [PATCH 0859/1176] [X86] Add test cases for masked store and masked
 scatter with an all zeroes mask. Fix bug in ScalarizeMaskedMemIntrin

Need to cast only to Constant instead of ConstantVector to allow
ConstantAggregateZero.

llvm-svn: 362341
---
 llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp |  2 +-
 .../test/CodeGen/X86/masked_gather_scatter.ll | 37 +++++++++++++++++++
 llvm/test/CodeGen/X86/masked_store.ll         | 28 ++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index e2ee9f28f3b55..7776dffb4e9c8 100644
--- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -488,7 +488,7 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
   // Shorten the way if the mask is a vector of constants.
   if (isConstantIntVector(Mask)) {
     for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-      if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
+      if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
         continue;
       Value *OneElt =
           Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 8ee23d6feff13..2d6b19b334e02 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -2964,3 +2964,40 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f
   %res3 = fadd <16 x float> %res2, %res
   ret <16 x float>%res3
 }
+
+define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) {
+; KNL_64-LABEL: zero_mask:
+; KNL_64:       # %bb.0:
+; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_64-NEXT:    kxorw %k0, %k0, %k1
+; KNL_64-NEXT:    vscatterqpd %zmm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    vzeroupper
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: zero_mask:
+; KNL_32:       # %bb.0:
+; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; KNL_32-NEXT:    vpsllq $32, %xmm1, %xmm1
+; KNL_32-NEXT:    vpsraq $32, %zmm1, %zmm1
+; KNL_32-NEXT:    kxorw %k0, %k0, %k1
+; KNL_32-NEXT:    vscatterqpd %zmm0, (,%zmm1) {%k1}
+; KNL_32-NEXT:    vzeroupper
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: zero_mask:
+; SKX:       # %bb.0:
+; SKX-NEXT:    kxorw %k0, %k0, %k1
+; SKX-NEXT:    vscatterqpd %xmm0, (,%xmm1) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: zero_mask:
+; SKX_32:       # %bb.0:
+; SKX_32-NEXT:    vpsllq $32, %xmm1, %xmm1
+; SKX_32-NEXT:    vpsraq $32, %xmm1, %xmm1
+; SKX_32-NEXT:    kxorw %k0, %k0, %k1
+; SKX_32-NEXT:    vscatterqpd %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index efbb1ef8cc667..180197ccbcfbe 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -5505,6 +5505,34 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) {
   ret void
 }
 
+define void @zero_mask(<2 x double>* %addr, <2 x double> %val) {
+; SSE-LABEL: zero_mask:
+; SSE:       ## %bb.0:
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: zero_mask:
+; AVX1OR2:       ## %bb.0:
+; AVX1OR2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX1OR2-NEXT:    vmaskmovpd %xmm0, %xmm1, (%rdi)
+; AVX1OR2-NEXT:    retq
+;
+; AVX512F-LABEL: zero_mask:
+; AVX512F:       ## %bb.0:
+; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    kxorw %k0, %k0, %k1
+; AVX512F-NEXT:    vmovupd %zmm0, (%rdi) {%k1}
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: zero_mask:
+; AVX512VL:       ## %bb.0:
+; AVX512VL-NEXT:    kxorw %k0, %k0, %k1
+; AVX512VL-NEXT:    vmovupd %xmm0, (%rdi) {%k1}
+; AVX512VL-NEXT:    retq
+  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer)
+  ret void
+}
+
 declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
 declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)

From 50b35caf30513174640493c72047d9f9bdc4d985 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 2 Jun 2019 22:52:38 +0000
Subject: [PATCH 0860/1176] [DAGCombiner][X86] Fold away masked store and
 scatter with all zeroes mask.

Similar to what was done for masked load and gather.

llvm-svn: 362342
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 29 +++++++++------
 .../test/CodeGen/X86/masked_gather_scatter.ll | 35 ++-----------------
 llvm/test/CodeGen/X86/masked_store.ll         | 22 ++----------
 3 files changed, 24 insertions(+), 62 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a866dbb250a5c..5da66eb8dc4d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8006,14 +8006,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
 }
 
 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   SDValue Mask = MSC->getMask();
-  SDValue Data  = MSC->getValue();
+  SDValue Data = MSC->getValue();
+  SDValue Chain = MSC->getChain();
   SDLoc DL(N);
 
+  // Zap scatters with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return Chain;
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MSCATTER data type requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -8031,8 +8036,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   EVT LoVT, HiVT;
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
 
-  SDValue Chain = MSC->getChain();
-
   EVT MemoryVT = MSC->getMemoryVT();
   unsigned Alignment = MSC->getOriginalAlignment();
 
@@ -8065,15 +8068,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
-  if (Level >= AfterLegalizeTypes)
-    return SDValue();
-
   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
-  SDValue Data  = MST->getValue();
+  SDValue Data = MST->getValue();
+  SDValue Chain = MST->getChain();
   EVT VT = Data.getValueType();
   SDLoc DL(N);
 
+  // Zap masked stores with a zero mask.
+  if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+    return Chain;
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
   // If the MSTORE data type requires splitting and the mask is provided by a
   // SETCC, then split both nodes and its operands before legalization. This
   // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -8087,7 +8095,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     SDValue MaskLo, MaskHi, Lo, Hi;
     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-    SDValue Chain = MST->getChain();
     SDValue Ptr   = MST->getBasePtr();
 
     EVT MemoryVT = MST->getMemoryVT();
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 2d6b19b334e02..2c4294e07db64 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -2966,38 +2966,9 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f
 }
 
 define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) {
-; KNL_64-LABEL: zero_mask:
-; KNL_64:       # %bb.0:
-; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL_64-NEXT:    kxorw %k0, %k0, %k1
-; KNL_64-NEXT:    vscatterqpd %zmm0, (,%zmm1) {%k1}
-; KNL_64-NEXT:    vzeroupper
-; KNL_64-NEXT:    retq
-;
-; KNL_32-LABEL: zero_mask:
-; KNL_32:       # %bb.0:
-; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL_32-NEXT:    vpsllq $32, %xmm1, %xmm1
-; KNL_32-NEXT:    vpsraq $32, %zmm1, %zmm1
-; KNL_32-NEXT:    kxorw %k0, %k0, %k1
-; KNL_32-NEXT:    vscatterqpd %zmm0, (,%zmm1) {%k1}
-; KNL_32-NEXT:    vzeroupper
-; KNL_32-NEXT:    retl
-;
-; SKX-LABEL: zero_mask:
-; SKX:       # %bb.0:
-; SKX-NEXT:    kxorw %k0, %k0, %k1
-; SKX-NEXT:    vscatterqpd %xmm0, (,%xmm1) {%k1}
-; SKX-NEXT:    retq
-;
-; SKX_32-LABEL: zero_mask:
-; SKX_32:       # %bb.0:
-; SKX_32-NEXT:    vpsllq $32, %xmm1, %xmm1
-; SKX_32-NEXT:    vpsraq $32, %xmm1, %xmm1
-; SKX_32-NEXT:    kxorw %k0, %k0, %k1
-; SKX_32-NEXT:    vscatterqpd %xmm0, (,%xmm1) {%k1}
-; SKX_32-NEXT:    retl
+; ALL-LABEL: zero_mask:
+; ALL:       # %bb.0:
+; ALL-NEXT:    ret{{[l|q]}}
   call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index 180197ccbcfbe..984b995877ff2 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -5510,25 +5510,9 @@ define void @zero_mask(<2 x double>* %addr, <2 x double> %val) {
 ; SSE:       ## %bb.0:
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: zero_mask:
-; AVX1OR2:       ## %bb.0:
-; AVX1OR2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vmaskmovpd %xmm0, %xmm1, (%rdi)
-; AVX1OR2-NEXT:    retq
-;
-; AVX512F-LABEL: zero_mask:
-; AVX512F:       ## %bb.0:
-; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    kxorw %k0, %k0, %k1
-; AVX512F-NEXT:    vmovupd %zmm0, (%rdi) {%k1}
-; AVX512F-NEXT:    vzeroupper
-; AVX512F-NEXT:    retq
-;
-; AVX512VL-LABEL: zero_mask:
-; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    kxorw %k0, %k0, %k1
-; AVX512VL-NEXT:    vmovupd %xmm0, (%rdi) {%k1}
-; AVX512VL-NEXT:    retq
+; AVX-LABEL: zero_mask:
+; AVX:       ## %bb.0:
+; AVX-NEXT:    retq
   call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer)
   ret void
 }

From 0ac4ab48fcf9ec1eb56162cfac97cd1a044b8bc3 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Sun, 2 Jun 2019 23:17:56 +0000
Subject: [PATCH 0861/1176] [analyzer] exploded-graph-rewriter: An attempt to
 fix Windows buildbots.

Breakage caused by r362340.

llvm-svn: 362343
---
 clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
index 1ebb25a4001a6..98b9da8eb4588 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
+++ b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
@@ -7,7 +7,7 @@ config.test_format = lit.formats.ShTest(use_lit_shell == "0")
 
 config.substitutions.append(('%exploded_graph_rewriter',
                              lit.util.which('exploded-graph-rewriter.py',
-                                            os.path.join(config.clang_src_dir,
-                                                         'utils', 'analyzer'))))
+                                            config.clang_src_dir +
+                                            '/utils/analyzer')))
 
 config.suffixes = ['.dot']

From 54362477c7bc0d728c323815b689017800c1e7d5 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 23:26:57 +0000
Subject: [PATCH 0862/1176] llvm-undname; Add more test coverage for
 demangleFunctionClass()

Also add two FC_Far that seem to be missing, by symmetry from
the public and protected cases. (But FC_Far isn't really a thing
anymore, so this doesn't really have an observable effect.)

llvm-svn: 362344
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp |  4 ++--
 llvm/test/Demangle/ms-mangle.test       | 28 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index ed9052f40570a..b93a84ea91d1f 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -1587,11 +1587,11 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
   case 'C':
     return FuncClass(FC_Private | FC_Static);
   case 'D':
-    return FuncClass(FC_Private | FC_Static);
+    return FuncClass(FC_Private | FC_Static | FC_Far);
   case 'E':
     return FuncClass(FC_Private | FC_Virtual);
   case 'F':
-    return FuncClass(FC_Private | FC_Virtual);
+    return FuncClass(FC_Private | FC_Virtual | FC_Far);
   case 'G':
     return FuncClass(FC_Private | FC_StaticThisAdjust);
   case 'H':
diff --git a/llvm/test/Demangle/ms-mangle.test b/llvm/test/Demangle/ms-mangle.test
index e7badeee64223..bbac3ebb99719 100644
--- a/llvm/test/Demangle/ms-mangle.test
+++ b/llvm/test/Demangle/ms-mangle.test
@@ -401,3 +401,31 @@
 
 ??$emplace_back@ABH@?$vector@HV?$allocator@H@std@@@std@@QAE?A?<decltype-auto>@@ABH@Z
 ; CHECK: <decltype-auto> __thiscall std::vector<int, class std::allocator<int>>::emplace_back<int const &>(int const &)
+
+
+?pub_foo@S@@QAEXXZ
+; CHECK: public: void __thiscall S::pub_foo(void)
+
+?pub_stat_foo@S@@SAXXZ
+; CHECK: public: static void __cdecl S::pub_stat_foo(void)
+
+?pub_virt_foo@S@@UAEXXZ
+; CHECK: public: virtual void __thiscall S::pub_virt_foo(void)
+
+?prot_foo@S@@IAEXXZ
+; CHECK: protected: void __thiscall S::prot_foo(void)
+
+?prot_stat_foo@S@@KAXXZ
+; CHECK: protected: static void __cdecl S::prot_stat_foo(void)
+
+?prot_virt_foo@S@@MAEXXZ
+; CHECK: protected: virtual void __thiscall S::prot_virt_foo(void)
+
+?priv_foo@S@@AAEXXZ
+; CHECK: private: void __thiscall S::priv_foo(void)
+
+?priv_stat_foo@S@@CAXXZ
+; CHECK: private: static void __cdecl S::priv_stat_foo(void)
+
+?priv_virt_foo@S@@EAEXXZ
+; CHECK: private: virtual void __thiscall S::priv_virt_foo(void)

From 2d59bab568feb3d0a69cb65dd31a6bcba6b4c3aa Mon Sep 17 00:00:00 2001
From: Mike Spertus <mike@spertus.com>
Date: Sun, 2 Jun 2019 23:33:32 +0000
Subject: [PATCH 0863/1176] Update MSVC Visualizer to reflect new variadic
 PointerUnion

This changed updates the MSVC Visualizer to work with the recent change
of PointerUnion into a variadic template. As an extra bonus, we
fix some bit rot in the SmallPtrSet visualizer as well

llvm-svn: 362345
---
 llvm/utils/LLVMVisualizers/llvm.natvis | 109 ++++++++-----------------
 1 file changed, 36 insertions(+), 73 deletions(-)

diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 42d3a122bece5..edc533f93770f 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -85,98 +85,61 @@ For later versions of Visual Studio, no setup is required.
     </Expand>
   </Type>
 
-  <Type Name="llvm::PointerIntPair&lt;*,*,*,*&gt;">
+  <Type Name="llvm::PointerIntPair&lt;*&gt;">
     <DisplayString>{$T5::IntMask}: {($T1)(Value &amp; $T5::PointerBitMask)} [{($T3)((Value &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)}]</DisplayString>
     <Expand>
       <Item Name="[ptr]">($T1)(Value &amp; $T5::PointerBitMask)</Item>
       <Item Name="[int]">($T3)((Value &gt;&gt; $T5::IntShift) &amp; $T5::IntMask)</Item>
     </Expand>
   </Type>
-
   <!-- PointerUnion types - In addition to the regular view, which displays the pointer, there is a "deref" view that
        displays the pointed to object, which is often needed by other visualizers -->
-  <Type Name="llvm::PointerUnion&lt;*,*&gt;">
-    <DisplayString  Optional="true" IncludeView="deref" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) == 0">{*($T1)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)}</DisplayString>
-    <DisplayString  Optional="true" IncludeView="deref" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) != 0">{*($T2)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)}</DisplayString>
-    <DisplayString  Optional="true" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) == 0">{"$T1", s8b}: {($T1)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)}</DisplayString>
-    <DisplayString  Optional="true" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) != 0">{"$T2", s8b}: {($T2)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)}</DisplayString>
-    <Expand>
-      <Item Name="[Holds]" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) == 0">"$T1", s8b</Item>
-      <Item Name="[Ptr]" Optional="true"  Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) == 0">($T1)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)</Item>
-      <Item Name="[Holds]" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) != 0">"$T2", s8b</Item>
-      <Item Name="[Ptr]" Optional="true" Condition="((Val.Value &gt;&gt; ValTy::InfoTy::IntShift) &amp; ValTy::InfoTy::IntMask) != 0">($T2)(Val.Value &amp; ValTy::InfoTy::PointerBitMask)</Item>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::PointerUnion3&lt;*,*,*&gt;">
-    <DisplayString Optional="true" IncludeView="deref" Condition="(Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1">{Val,view(deref)}</DisplayString>
-    <DisplayString Optional="true" IncludeView="deref">{*(InnerUnion*)&amp;Val.Val.Value,view(deref)}</DisplayString>
-    <DisplayString Condition="(Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1">{Val}</DisplayString>
-    <DisplayString>{*(InnerUnion*)&amp;Val.Val.Value}</DisplayString>
-    <Expand>
-      <Item Name="[Holds]" Condition="(Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1">"$T3", s8b</Item>
-      <Item Name="[Ptr]" Condition="(Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1">($T3)(Val.Val.Value &amp; ValTy::ValTy::InfoTy::PointerBitMask)</Item>
-      <ExpandedItem Condition="!((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1)">*(InnerUnion*)&amp;Val.Val.Value</ExpandedItem>
-    </Expand>
-  </Type>
-
-  <Type Name="llvm::PointerUnion4&lt;*,*,*,*&gt;">
-    <DisplayString Optional="true" IncludeView="deref" 
-                   Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-      {*($T1)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)}
+  <Type Name="llvm::pointer_union_detail::PointerUnionMembers&lt;*&gt;">
+    <DisplayString Optional="true" IncludeView="deref" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
+      {*($T4)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString Optional="true" IncludeView="deref" 
-                   Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-      {*($T2)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
+      {($T4)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString  Optional="true" IncludeView="deref" 
-                    Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-      {*($T3)(Val.Val.Value &amp; InnerUnion2::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" IncludeView="deref" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
+      {*($T5)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString  Optional="true" IncludeView="deref" 
-                    Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-      {*($T4)(Val.Val.Value &amp; InnerUnion2::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
+      {($T5)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-      {"$T1", s8b}: {($T1)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" IncludeView="deref" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 2">
+      {*($T6)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-      {"$T2", s8b}: {($T2)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 2">
+      {($T6)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-      {"$T3", s8b}: {($T3)(Val.Val.Value &amp; InnerUnion2::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" IncludeView="deref" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 3">
+      {*($T7)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
-    <DisplayString Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-      {"$T4", s8b}: {($T4)(Val.Val.Value &amp; InnerUnion2::ValTy::InfoTy::PointerBitMask)}
+    <DisplayString Optional="true" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 3">
+      {($T7)(Val.Value &amp; $T2::InfoTy::PointerBitMask)}
     </DisplayString>
+    <DisplayString>Unexpected index in PointerUnion: {(Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask}</DisplayString>
     <Expand>
-      <Item Name="[Holds]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-        "$T1", s8b
-      </Item>
-      <Item Name="[Ptr]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-        ($T1)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)
-      </Item>
-      <Item Name="[Holds]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-        "$T2", s8b
+      <Item Name="[Holds]" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">"$T4",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 0">
+        ($T4)(Val.Value &amp; $T2::InfoTy::PointerBitMask)
       </Item>
-      <Item Name="[Ptr]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 0 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion1::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-        ($T2)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)
+      <Item Name="[Holds]" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">"$T5",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 1">
+        ($T5)(Val.Value &amp; $T2::InfoTy::PointerBitMask)
       </Item>
-      <Item Name="[Holds]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-        "$T3", s8b
+      <Item Name="[Holds]" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 2">"$T6",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 2">
+        ($T6)(Val.Value &amp; $T2::InfoTy::PointerBitMask)
       </Item>
-      <Item Name="[Ptr]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 0">
-        ($T3)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)
-      </Item>
-      <Item Name="[Holds]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-        "$T4", s8b
-      </Item>
-      <Item Name="[Ptr]" Condition="((Val.Val.Value &gt;&gt; ValTy::ValTy::InfoTy::IntShift) &amp; 1) == 1 &amp;&amp; ((Val.Val.Value &gt;&gt; InnerUnion2::ValTy::InfoTy::IntShift) &amp; 1) == 1">
-        ($T4)(Val.Val.Value &amp; InnerUnion1::ValTy::InfoTy::PointerBitMask)
+      <Item Name="[Holds]" Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 3">"$T6",s8b</Item>
+      <Item Name="[Ptr]" Optional="true"  Condition="((Val.Value&gt;&gt;$T2::InfoTy::IntShift) &amp; $T2::InfoTy::IntMask) == 3">
+        ($T7)(Val.Value &amp; $T2::InfoTy::PointerBitMask)
       </Item>
     </Expand>
   </Type>
-
+ 
   <Type Name="llvm::iplist&lt;*,*&gt;">
     <DisplayString Condition="Head == 0">{{ empty }}</DisplayString>
     <DisplayString Condition="Head != 0">{{ head={Head} }}</DisplayString>
@@ -200,13 +163,13 @@ For later versions of Visual Studio, no setup is required.
   </Type>
 
   <Type Name="llvm::SmallPtrSet&lt;*,*&gt;">
-    <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>
-    <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumElements}, capacity={CurArraySize} }}</DisplayString>
+    <DisplayString Condition="CurArray == SmallArray">{{ [Small Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
+    <DisplayString Condition="CurArray != SmallArray">{{ [Big Mode] size={NumNonEmpty}, capacity={CurArraySize} }}</DisplayString>
     <Expand>
-      <Item Name="[size]">NumElements</Item>
+      <Item Name="[size]">NumNonEmpty</Item>
       <Item Name="[capacity]">CurArraySize</Item>
       <ArrayItems>
-        <Size>CurArraySize</Size>
+        <Size>NumNonEmpty</Size>
         <ValuePointer>($T1*)CurArray</ValuePointer>
       </ArrayItems>
     </Expand>

From 3cbb8b83917257fda2f8cc81d97843823eaf884b Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Sun, 2 Jun 2019 23:48:28 +0000
Subject: [PATCH 0864/1176] llvm-undname: Add coverage for some error paths

llvm-svn: 362346
---
 llvm/test/Demangle/invalid-manglings.test | 50 +++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 6cd025a1eeca0..8490c02501f61 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -174,3 +174,53 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ??$foo@$1??_C@_02PCEFGMJL@hi?$AA@@
 ; CHECK-NEXT: error: Invalid mangled name
+
+??_C@
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_3
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_3
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_01
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_01
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_0101234567@
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_0101234567@
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_0101234567@?
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_0101234567@?
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_0101234567@?$
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_0101234567@?$
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_0101234567@?$za
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_0101234567@?$za
+; CHECK-NEXT: error: Invalid mangled name
+
+??_C@_0101234567@?$az
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_0101234567@?$az
+; CHECK-NEXT: error: Invalid mangled name
+
+??@foo
+; CHECK-EMPTY:
+; CHECK-NEXT: ??@foo
+; CHECK-NEXT: error: Invalid mangled name

From c3236cd200ae19618ab75a7cc6c71472512730e5 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Mon, 3 Jun 2019 00:21:00 +0000
Subject: [PATCH 0865/1176] Revert "[analyzer] exploded-graph-rewriter: An
 attempt to fix Windows buildbots."

This reverts commit r362343.

Instead, disable tests on Windows for now.

llvm-svn: 362347
---
 clang/test/Analysis/exploded-graph-rewriter/edge.dot          | 3 +++
 clang/test/Analysis/exploded-graph-rewriter/empty.dot         | 3 +++
 clang/test/Analysis/exploded-graph-rewriter/environment.dot   | 3 +++
 clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg     | 4 ++--
 .../test/Analysis/exploded-graph-rewriter/program_points.dot  | 3 +++
 clang/test/Analysis/exploded-graph-rewriter/store.dot         | 3 +++
 6 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/exploded-graph-rewriter/edge.dot b/clang/test/Analysis/exploded-graph-rewriter/edge.dot
index 5fc695c1ce018..fa4b017e8a971 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/edge.dot
+++ b/clang/test/Analysis/exploded-graph-rewriter/edge.dot
@@ -1,5 +1,8 @@
 // RUN: %exploded_graph_rewriter %s | FileCheck %s
 
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
 Node0x1 [shape=record,label=
  "{{ "node_id": 1, "pointer": "0x1",
      "program_state": null, "program_points": []}\l}"];
diff --git a/clang/test/Analysis/exploded-graph-rewriter/empty.dot b/clang/test/Analysis/exploded-graph-rewriter/empty.dot
index 85c227dbab8a0..3e0733c5173e7 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/empty.dot
+++ b/clang/test/Analysis/exploded-graph-rewriter/empty.dot
@@ -1,5 +1,8 @@
 // RUN: %exploded_graph_rewriter %s | FileCheck %s
 
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
 digraph "Exploded Graph" {
   label="Exploded Graph";
 }
diff --git a/clang/test/Analysis/exploded-graph-rewriter/environment.dot b/clang/test/Analysis/exploded-graph-rewriter/environment.dot
index 31d2d72551d41..7271684642a28 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/environment.dot
+++ b/clang/test/Analysis/exploded-graph-rewriter/environment.dot
@@ -1,5 +1,8 @@
 // RUN: %exploded_graph_rewriter %s | FileCheck %s
 
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
 // CHECK: <b>Environment: </b>
 // CHECK-SAME: <table border="0">
 // CHECK-SAME:   <tr>
diff --git a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
index 98b9da8eb4588..1ebb25a4001a6 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
+++ b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
@@ -7,7 +7,7 @@ config.test_format = lit.formats.ShTest(use_lit_shell == "0")
 
 config.substitutions.append(('%exploded_graph_rewriter',
                              lit.util.which('exploded-graph-rewriter.py',
-                                            config.clang_src_dir +
-                                            '/utils/analyzer')))
+                                            os.path.join(config.clang_src_dir,
+                                                         'utils', 'analyzer'))))
 
 config.suffixes = ['.dot']
diff --git a/clang/test/Analysis/exploded-graph-rewriter/program_points.dot b/clang/test/Analysis/exploded-graph-rewriter/program_points.dot
index a7696d27a9f7f..aadabf3955641 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/program_points.dot
+++ b/clang/test/Analysis/exploded-graph-rewriter/program_points.dot
@@ -1,5 +1,8 @@
 // RUN: %exploded_graph_rewriter %s | FileCheck %s
 
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
 // CHECK: <b>Program point:</b>
 // CHECK-SAME: <table border="0" align="left" width="0">
 // CHECK-SAME:   <tr>
diff --git a/clang/test/Analysis/exploded-graph-rewriter/store.dot b/clang/test/Analysis/exploded-graph-rewriter/store.dot
index 0f0fa928b288a..8152a9929fe90 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/store.dot
+++ b/clang/test/Analysis/exploded-graph-rewriter/store.dot
@@ -1,5 +1,8 @@
 // RUN: %exploded_graph_rewriter %s | FileCheck %s
 
+// FIXME: Substitution doesn't seem to work on Windows.
+// UNSUPPORTED: system-windows
+
 // CHECK: <b>Store: </b>
 // CHECK-SAME: <table border="0">
 // CHECK-SAME:   <tr>

From 38f11825d18eab5bc140f891dc4c024e45283a90 Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Mon, 3 Jun 2019 00:47:32 +0000
Subject: [PATCH 0866/1176] [coroutines][PR41909] Don't build dependent
 coroutine statements for generic lambda

Summary:
https://bugs.llvm.org/show_bug.cgi?id=41909 describes an issue in which
a generic lambda that takes a dependent argument `auto set` causes the
template instantiation machinery for coroutine body statements to crash
with an ICE. The issue is two-fold:

1. The paths taken by the template instantiator contain several asserts
   that the coroutine promise must not have a dependent type.
2. The template instantiator unconditionally builds corotuine statements
   that depend on the promise type, which cannot be dependent.

To work around the issue, prevent the template instantiator from building
dependent coroutine statements if the coroutine promise type is dependent.
Since we only expect this to occur in the case of a generic lambda, limit
the workaround behavior to just that case.

Reviewers: GorNishanov, EricWF, lewissbaker, tks2103

Reviewed By: GorNishanov

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62550

llvm-svn: 362348
---
 clang/lib/Sema/TreeTransform.h    | 23 ++++++++++++++++-------
 clang/test/SemaCXX/coroutines.cpp | 10 ++++++++++
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index f46193502b6a6..592787a5870ce 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -7163,13 +7163,22 @@ TreeTransform<Derived>::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
   Builder.ReturnValue = Res.get();
 
   if (S->hasDependentPromiseType()) {
-    assert(!Promise->getType()->isDependentType() &&
-           "the promise type must no longer be dependent");
-    assert(!S->getFallthroughHandler() && !S->getExceptionHandler() &&
-           !S->getReturnStmtOnAllocFailure() && !S->getDeallocate() &&
-           "these nodes should not have been built yet");
-    if (!Builder.buildDependentStatements())
-      return StmtError();
+    // PR41909: We may find a generic coroutine lambda definition within a
+    // template function that is being instantiated. In this case, the lambda
+    // will have a dependent promise type, until it is used in an expression
+    // that creates an instantiation with a non-dependent promise type. We
+    // should not assert or build coroutine dependent statements for such a
+    // generic lambda.
+    auto *MD = dyn_cast_or_null<CXXMethodDecl>(FD);
+    if (!MD || !MD->getParent()->isGenericLambda()) {
+      assert(!Promise->getType()->isDependentType() &&
+             "the promise type must no longer be dependent");
+      assert(!S->getFallthroughHandler() && !S->getExceptionHandler() &&
+             !S->getReturnStmtOnAllocFailure() && !S->getDeallocate() &&
+             "these nodes should not have been built yet");
+      if (!Builder.buildDependentStatements())
+        return StmtError();
+    }
   } else {
     if (auto *OnFallthrough = S->getFallthroughHandler()) {
       StmtResult Res = getDerived().TransformStmt(OnFallthrough);
diff --git a/clang/test/SemaCXX/coroutines.cpp b/clang/test/SemaCXX/coroutines.cpp
index 99964ef6bcb1b..1286ca4628b7b 100644
--- a/clang/test/SemaCXX/coroutines.cpp
+++ b/clang/test/SemaCXX/coroutines.cpp
@@ -720,6 +720,16 @@ coro<good_promise_1> ok_static_coawait() {
   co_await 42;
 }
 
+template<typename T> void ok_generic_lambda_coawait_PR41909() {
+  [](auto& arg) -> coro<good_promise_1> { // expected-warning {{expression result unused}}
+    co_await 12;
+  };
+  [](auto &arg) -> coro<good_promise_1> {
+    co_await 24;
+  }("argument");
+}
+template void ok_generic_lambda_coawait_PR41909<int>(); // expected-note {{in instantiation of function template specialization 'ok_generic_lambda_coawait_PR41909<int>' requested here}}
+
 template<> struct std::experimental::coroutine_traits<int, int, const char**>
 { using promise_type = promise; };
 

From 382320ea025ff5c2c4d54389c822c6b817d67d4a Mon Sep 17 00:00:00 2001
From: Tom Tan <Tom.Tan@microsoft.com>
Date: Mon, 3 Jun 2019 00:48:16 +0000
Subject: [PATCH 0867/1176] [COFF, ARM64] Fix CodeView API change for
 getRegisterNames

Change rL362280 changed CodeView API getRegisterNames() by adding an input
parameter in CPUType. It is called in LLDB and needs to be updated.

Differential Revision: https://reviews.llvm.org/D62772

llvm-svn: 362349
---
 .../NativePDB/PdbFPOProgramToDWARFExpression.cpp    | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpression.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpression.cpp
index 405d0157a1c46..79dd010ff3119 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpression.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpression.cpp
@@ -25,8 +25,19 @@ using namespace lldb_private::postfix;
 
 static uint32_t ResolveLLDBRegisterNum(llvm::StringRef reg_name, llvm::Triple::ArchType arch_type) {
   // lookup register name to get lldb register number
+  llvm::codeview::CPUType cpu_type;
+  switch (arch_type) {
+    case llvm::Triple::ArchType::aarch64:
+      cpu_type = llvm::codeview::CPUType::ARM64;
+      break;
+
+    default:
+      cpu_type = llvm::codeview::CPUType::X64;
+      break;
+  }
+
   llvm::ArrayRef<llvm::EnumEntry<uint16_t>> register_names =
-      llvm::codeview::getRegisterNames();
+      llvm::codeview::getRegisterNames(cpu_type);
   auto it = llvm::find_if(
       register_names,
       [&reg_name](const llvm::EnumEntry<uint16_t> &register_entry) {

From e71963c850d42f6d318af984d88afe859c2ba5ff Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 3 Jun 2019 01:30:19 +0000
Subject: [PATCH 0868/1176] Recommit r360171: [DAGCombiner] Avoid creating
 large tokenfactors in visitTokenFactor.

If we hit the limit, we do expand the outstanding tokenfactors.
Otherwise, we might drop nodes with users in the unexpanded
tokenfactors. This fixes the crashes reported by Jordan Rupprecht.

Reviewers: niravd, spatel, craig.topper, rupprecht

Reviewed By: niravd

Differential Revision: https://reviews.llvm.org/D62633

llvm-svn: 362350
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +++++++-
 .../X86/dagcombine-tokenfactor-limit-crash.ll | 59 +++++++++++++++++++
 2 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/dagcombine-tokenfactor-limit-crash.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5da66eb8dc4d0..27e56a3bbc71d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -111,6 +111,10 @@ static cl::opt<bool>
   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
                     cl::desc("DAG combiner may split indexing from loads"));
 
+static cl::opt<unsigned> TokenFactorInlineLimit(
+    "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+    cl::desc("Limit the number of operands to inline for Token Factors"));
+
 namespace {
 
   class DAGCombiner {
@@ -1801,8 +1805,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   // Iterate through token factors.  The TFs grows when new token factors are
   // encountered.
   for (unsigned i = 0; i < TFs.size(); ++i) {
-    SDNode *TF = TFs[i];
+    // Limit number of nodes to inline, to avoid quadratic compile times.
+    // We have to add the outstanding Token Factors to Ops, otherwise we might
+    // drop Ops from the resulting Token Factors.
+    if (Ops.size() > TokenFactorInlineLimit) {
+      for (unsigned j = i; j < TFs.size(); j++)
+        Ops.emplace_back(TFs[j], 0);
+      // Drop unprocessed Token Factors from TFs, so we do not add them to the
+      // combiner worklist later.
+      TFs.resize(i);
+      break;
+    }
 
+    SDNode *TF = TFs[i];
     // Check each of the operands.
     for (const SDValue &Op : TF->op_values()) {
       switch (Op.getOpcode()) {
@@ -1816,8 +1831,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
-          // Clean up in case the token factor is removed.
-          AddToWorklist(Op.getNode());
           Changed = true;
           break;
         }
@@ -1834,6 +1847,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
     }
   }
 
+  // Re-visit inlined Token Factors, to clean them up in case they have been
+  // removed. Skip the first Token Factor, as this is the current node.
+  for (unsigned i = 1, e = TFs.size(); i < e; i++)
+    AddToWorklist(TFs[i]);
+
   // Remove Nodes that are chained to another node in the list. Do so
   // by walking up chains breath-first stopping when we've seen
   // another operand. In general we must climb to the EntryNode, but we can exit
diff --git a/llvm/test/CodeGen/X86/dagcombine-tokenfactor-limit-crash.ll b/llvm/test/CodeGen/X86/dagcombine-tokenfactor-limit-crash.ll
new file mode 100644
index 0000000000000..412eb7126f5fe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dagcombine-tokenfactor-limit-crash.ll
@@ -0,0 +1,59 @@
+; RUN: llc %s -combiner-tokenfactor-inline-limit=5 -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.snork = type { i8 }
+%struct.wombat = type { [15 x i32] }
+
+; CHECK:          pushq   %rbx
+; CHECK-NEXT:     andq    $-32, %rsp
+; CHECK-NEXT:     subq    $66144, %rsp            # imm = 0x10260
+; CHECK-NEXT:     .cfi_offset %rbx, -24
+; CHECK-NEXT:     movabsq $-868076584853899022, %rax # imm = 0xF3F3F8F201F2F8F2
+; CHECK-NEXT:     movq    %rax, (%rsp)
+; CHECK-NEXT:     movb    $-13, 8263(%rsp)
+; CHECK-NEXT:     movq    %rdi, %rbx
+; CHECK-NEXT:     callq   hoge
+; CHECK-NEXT:     movq    %rbx, %rdi
+; CHECK-NEXT:     callq   hoge
+; CHECK-NEXT:     callq   hoge
+; CHECK-NEXT:     callq   hoge
+; CHECK-NEXT:     callq   eggs
+; CHECK-NEXT:     callq   hoge
+; CHECK-NEXT:     movq    %rbx, %rax
+; CHECK-NEXT:     leaq    -8(%rbp), %rsp
+; CHECK-NEXT:     popq    %rbx
+; CHECK-NEXT:     popq    %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+define void @spam(%struct.snork* noalias sret %arg, %struct.snork* %arg2) {
+bb:
+  %tmp = alloca i8, i64 66112, align 32
+  %tmp7 = ptrtoint i8* %tmp to i64
+  %tmp914 = inttoptr i64 undef to %struct.wombat*
+  %tmp915 = inttoptr i64 undef to %struct.snork*
+  %tmp916 = inttoptr i64 undef to %struct.snork*
+  %tmp917 = inttoptr i64 undef to %struct.wombat*
+  %tmp918 = inttoptr i64 undef to %struct.snork*
+  %tmp921 = inttoptr i64 undef to %struct.snork*
+  %tmp2055 = inttoptr i64 %tmp7 to i64*
+  store i64 -868076584853899022, i64* %tmp2055, align 1
+  %tmp2056 = add i64 %tmp7, 8263
+  %tmp2057 = inttoptr i64 %tmp2056 to i8*
+  store i8 -13, i8* %tmp2057, align 1
+  br label %bb2058
+
+bb2058:                                           ; preds = %bb
+  call void @hoge(%struct.snork* %arg)
+  call void @hoge(%struct.snork* %arg)
+  call void @hoge(%struct.snork* %tmp915)
+  call void @hoge(%struct.snork* %tmp916)
+  call void @eggs(%struct.snork* %tmp918, %struct.wombat* %tmp914, %struct.wombat* %tmp917)
+  call void @hoge(%struct.snork* %tmp921)
+  ret void
+}
+
+declare void @hoge(%struct.snork*)
+
+declare void @eggs(%struct.snork*, %struct.wombat*, %struct.wombat*)

From 20b14dacbbbf9fedc90c732b6dde9361b7b2283c Mon Sep 17 00:00:00 2001
From: Jim Lin <tclin914@gmail.com>
Date: Mon, 3 Jun 2019 02:31:07 +0000
Subject: [PATCH 0869/1176] [AVR] Fix incorrect source regclass of LDWRdPtr

Summary:
LDWRdPtr would be expanded to ld+ldd. ldd only accepts the pointer register is Y or Z.
So the register class of pointer of LDWRdPtr should be PTRDISPREGS instead of PTRREGS.

Reviewers: dylanmckay

Reviewed By: dylanmckay

Subscribers: dylanmckay, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62300

llvm-svn: 362351
---
 llvm/lib/Target/AVR/AVRInstrInfo.td           |  8 ++---
 llvm/lib/Target/AVR/AVRRegisterInfo.td        |  4 +--
 llvm/test/CodeGen/AVR/load.ll                 | 10 +++---
 .../AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir    | 35 -------------------
 llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir   |  6 ++--
 5 files changed, 14 insertions(+), 49 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir

diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c458fe7de068e..caca9b6176091 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1159,11 +1159,11 @@ isReMaterializable = 1 in
   // LDW Rd+1:Rd, P
   //
   // Expands to:
-  // ld Rd,   P+
-  // ld Rd+1, P
+  // ld  Rd,   P
+  // ldd Rd+1, P+1
   let Constraints = "@earlyclobber $reg" in
   def LDWRdPtr : Pseudo<(outs DREGS:$reg),
-                        (ins PTRREGS:$ptrreg),
+                        (ins PTRDISPREGS:$ptrreg),
                         "ldw\t$reg, $ptrreg",
                         [(set i16:$reg, (load i16:$ptrreg))]>,
                  Requires<[HasSRAM]>;
@@ -1230,7 +1230,7 @@ isReMaterializable = 1 in
   // ldd Rd,   P+q
   // ldd Rd+1, P+q+1
   let Constraints = "@earlyclobber $dst" in
-  def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
+  def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND:$dst),
                           (ins memri:$memri),
                           "lddw\t$dst, $memri",
                           [(set i16:$dst, (load addr:$memri))]>,
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index e20f69beabe7d..ea38fedd22ce9 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -165,14 +165,14 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
 // cannot use Z; it's simply a workaround a regalloc bug.
 //
 // More information can be found in PR39553.
-def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+def DREGS_WITHOUT_YZ_WORKAROUND : RegisterClass<"AVR", [i16], 8,
   (
     // Return value and arguments.
     add R25R24, R19R18, R21R20, R23R22,
     // Scratch registers.
     R27R26,
     // Callee saved registers.
-    R29R28, R17R16, R15R14, R13R12, R11R10,
+    R17R16, R15R14, R13R12, R11R10,
     R9R8, R7R6, R5R4, R3R2, R1R0
   )>;
 
diff --git a/llvm/test/CodeGen/AVR/load.ll b/llvm/test/CodeGen/AVR/load.ll
index f58edeb425a72..ac5691376cbfd 100644
--- a/llvm/test/CodeGen/AVR/load.ll
+++ b/llvm/test/CodeGen/AVR/load.ll
@@ -9,7 +9,7 @@ define i8 @load8(i8* %x) {
 
 define i16 @load16(i16* %x) {
 ; CHECK-LABEL: load16:
-; CHECK: ld r24,  [[PTR:[XYZ]]]
+; CHECK: ld r24,  [[PTR:[YZ]]]
 ; CHECK: ldd r25, [[PTR]]+1
   %1 = load i16, i16* %x
   ret i16 %1
@@ -45,10 +45,10 @@ define i16 @load16disp(i16* %x) {
 
 define i16 @load16nodisp(i16* %x) {
 ; CHECK-LABEL: load16nodisp:
-; CHECK: movw r26, r24
-; CHECK: subi r26, 192
-; CHECK: sbci r27, 255
-; CHECK: ld r24,  [[PTR:[XYZ]]]
+; CHECK: movw r30, r24
+; CHECK: subi r30, 192
+; CHECK: sbci r31, 255
+; CHECK: ld r24,  [[PTR:[YZ]]]
 ; CHECK: ldd r25, [[PTR]]+1
   %1 = getelementptr inbounds i16, i16* %x, i64 32
   %2 = load i16, i16* %1
diff --git a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir b/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir
deleted file mode 100644
index 72b20d39d68fc..0000000000000
--- a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir
+++ /dev/null
@@ -1,35 +0,0 @@
-# RUN: llc -O0  %s -o - -march=avr | FileCheck %s
-
-# This test checks the expansion of the 16-bit 'LDDWRdPtrQ' pseudo instruction.
-#
-# This test ensures that the pseudo expander can correctly handle the case
-# where we are expanding a 16-bit LDD instruction where the source and
-# destination registers are the same.
-#
-# The instruction itself is earlyclobber and so ISel will never produce an
-# instruction like this, but the stack slot loading can and will.
-
---- |
-  target triple = "avr--"
-  define void @test_lddwrdptrq() {
-  entry:
-    ret void
-  }
-...
-
----
-name:            test_lddwrdptrq
-tracksRegLiveness: true
-body: |
-  bb.0.entry:
-
-    ; CHECK-LABEL: test_lddwrdptrq
-
-    ; CHECK:      ldd [[SCRATCH:r[0-9]+]], Y+10
-    ; CHECK-NEXT: push [[SCRATCH]]
-    ; CHECK-NEXT: ldd [[SCRATCH]], Y+11
-    ; CHECK-NEXT: mov r29, [[SCRATCH]]
-    ; CHECK-NEXT: pop r28
-
-    early-clobber $r29r28 = LDDWRdPtrQ undef $r29r28, 10
-...
diff --git a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir b/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir
index 96d3809ed2d71..ef8519ed9de40 100644
--- a/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir
+++ b/llvm/test/CodeGen/AVR/pseudo/LDDWRdPtrQ.mir
@@ -18,8 +18,8 @@ body: |
 
     ; CHECK-LABEL: test_lddwrdptrq
 
-    ; CHECK:      ldd     r28, Z+10
-    ; CHECK-NEXT: ldd     r29, Z+11
+    ; CHECK:      ldd     r24, Z+10
+    ; CHECK-NEXT: ldd     r25, Z+11
 
-    early-clobber $r29r28 = LDDWRdPtrQ undef $r31r30, 10
+    early-clobber $r25r24 = LDDWRdPtrQ undef $r31r30, 10
 ...

From 3fc299df3d337c8a61d15f42f2537508f2feb92a Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Mon, 3 Jun 2019 04:55:46 +0000
Subject: [PATCH 0870/1176] [clangd] Add RelationSlab

Summary:
RelationSlab is a new index data structure that stores relations between
symbols.

Reviewers: kadircet

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D59407

llvm-svn: 362352
---
 clang-tools-extra/clangd/CMakeLists.txt       |  1 +
 clang-tools-extra/clangd/index/Index.h        |  1 +
 clang-tools-extra/clangd/index/Relation.cpp   | 40 +++++++++
 clang-tools-extra/clangd/index/Relation.h     | 88 +++++++++++++++++++
 .../clangd/unittests/IndexTests.cpp           | 39 ++++++++
 5 files changed, 169 insertions(+)
 create mode 100644 clang-tools-extra/clangd/index/Relation.cpp
 create mode 100644 clang-tools-extra/clangd/index/Relation.h

diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 55c7ee5b805a9..68bf0aa50b479 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -77,6 +77,7 @@ add_clang_library(clangDaemon
   index/MemIndex.cpp
   index/Merge.cpp
   index/Ref.cpp
+  index/Relation.cpp
   index/Serialization.cpp
   index/Symbol.cpp
   index/SymbolCollector.cpp
diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h
index 0a271a7cf7845..87a777e72df88 100644
--- a/clang-tools-extra/clangd/index/Index.h
+++ b/clang-tools-extra/clangd/index/Index.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
 
 #include "Ref.h"
+#include "Relation.h"
 #include "Symbol.h"
 #include "SymbolID.h"
 #include "llvm/ADT/DenseSet.h"
diff --git a/clang-tools-extra/clangd/index/Relation.cpp b/clang-tools-extra/clangd/index/Relation.cpp
new file mode 100644
index 0000000000000..e46aa23664151
--- /dev/null
+++ b/clang-tools-extra/clangd/index/Relation.cpp
@@ -0,0 +1,40 @@
+//===--- Relation.cpp --------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Relation.h"
+
+#include <algorithm>
+
+namespace clang {
+namespace clangd {
+
+llvm::iterator_range<RelationSlab::iterator>
+RelationSlab::lookup(const SymbolID &Subject,
+                     index::SymbolRole Predicate) const {
+  auto IterPair = std::equal_range(Relations.begin(), Relations.end(),
+                                   Relation{Subject, Predicate, SymbolID{}},
+                                   [](const Relation &A, const Relation &B) {
+                                     return std::tie(A.Subject, A.Predicate) <
+                                            std::tie(B.Subject, B.Predicate);
+                                   });
+  return {IterPair.first, IterPair.second};
+}
+
+RelationSlab RelationSlab::Builder::build() && {
+  // Sort in SPO order.
+  std::sort(Relations.begin(), Relations.end());
+
+  // Remove duplicates.
+  Relations.erase(std::unique(Relations.begin(), Relations.end()),
+                  Relations.end());
+
+  return RelationSlab{std::move(Relations)};
+}
+
+} // namespace clangd
+} // namespace clang
diff --git a/clang-tools-extra/clangd/index/Relation.h b/clang-tools-extra/clangd/index/Relation.h
new file mode 100644
index 0000000000000..deca3d7a2e7e0
--- /dev/null
+++ b/clang-tools-extra/clangd/index/Relation.h
@@ -0,0 +1,88 @@
+//===--- Relation.h ----------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RELATION_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RELATION_H
+
+#include "SymbolID.h"
+#include "SymbolLocation.h"
+#include "clang/Index/IndexSymbol.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cstdint>
+#include <utility>
+
+namespace clang {
+namespace clangd {
+
+/// Represents a relation between two symbols.
+/// For an example "A is a base class of B" may be represented
+/// as { Subject = A, Predicate = RelationBaseOf, Object = B }.
+struct Relation {
+  SymbolID Subject;
+  index::SymbolRole Predicate;
+  SymbolID Object;
+
+  bool operator==(const Relation &Other) const {
+    return std::tie(Subject, Predicate, Object) ==
+           std::tie(Other.Subject, Other.Predicate, Other.Object);
+  }
+  // SPO order
+  bool operator<(const Relation &Other) const {
+    return std::tie(Subject, Predicate, Object) <
+           std::tie(Other.Subject, Other.Predicate, Other.Object);
+  }
+};
+
+class RelationSlab {
+public:
+  using value_type = Relation;
+  using const_iterator = std::vector<value_type>::const_iterator;
+  using iterator = const_iterator;
+
+  RelationSlab() = default;
+  RelationSlab(RelationSlab &&Slab) = default;
+  RelationSlab &operator=(RelationSlab &&RHS) = default;
+
+  const_iterator begin() const { return Relations.begin(); }
+  const_iterator end() const { return Relations.end(); }
+  size_t size() const { return Relations.size(); }
+  bool empty() const { return Relations.empty(); }
+
+  size_t bytes() const {
+    return sizeof(*this) + sizeof(value_type) * Relations.capacity();
+  }
+
+  /// Lookup all relations matching the given subject and predicate.
+  llvm::iterator_range<iterator> lookup(const SymbolID &Subject,
+                                        index::SymbolRole Predicate) const;
+
+  /// RelationSlab::Builder is a mutable container that can 'freeze' to
+  /// RelationSlab.
+  class Builder {
+  public:
+    /// Adds a relation to the slab.
+    void insert(const Relation &R) { Relations.push_back(R); }
+
+    /// Consumes the builder to finalize the slab.
+    RelationSlab build() &&;
+
+  private:
+    std::vector<Relation> Relations;
+  };
+
+private:
+  RelationSlab(std::vector<Relation> Relations)
+      : Relations(std::move(Relations)) {}
+
+  std::vector<Relation> Relations;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RELATION_H
diff --git a/clang-tools-extra/clangd/unittests/IndexTests.cpp b/clang-tools-extra/clangd/unittests/IndexTests.cpp
index 8d8c8da771562..27b23d2f75748 100644
--- a/clang-tools-extra/clangd/unittests/IndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IndexTests.cpp
@@ -76,6 +76,45 @@ TEST(SymbolSlab, FindAndIterate) {
     EXPECT_THAT(*S.find(SymbolID(Sym)), Named(Sym));
 }
 
+TEST(RelationSlab, Lookup) {
+  SymbolID A{"A"};
+  SymbolID B{"B"};
+  SymbolID C{"C"};
+  SymbolID D{"D"};
+
+  RelationSlab::Builder Builder;
+  Builder.insert(Relation{A, index::SymbolRole::RelationBaseOf, B});
+  Builder.insert(Relation{A, index::SymbolRole::RelationBaseOf, C});
+  Builder.insert(Relation{B, index::SymbolRole::RelationBaseOf, D});
+  Builder.insert(Relation{C, index::SymbolRole::RelationBaseOf, D});
+  Builder.insert(Relation{B, index::SymbolRole::RelationChildOf, A});
+  Builder.insert(Relation{C, index::SymbolRole::RelationChildOf, A});
+  Builder.insert(Relation{D, index::SymbolRole::RelationChildOf, B});
+  Builder.insert(Relation{D, index::SymbolRole::RelationChildOf, C});
+
+  RelationSlab Slab = std::move(Builder).build();
+  EXPECT_THAT(
+      Slab.lookup(A, index::SymbolRole::RelationBaseOf),
+      UnorderedElementsAre(Relation{A, index::SymbolRole::RelationBaseOf, B},
+                           Relation{A, index::SymbolRole::RelationBaseOf, C}));
+}
+
+TEST(RelationSlab, Duplicates) {
+  SymbolID A{"A"};
+  SymbolID B{"B"};
+  SymbolID C{"C"};
+
+  RelationSlab::Builder Builder;
+  Builder.insert(Relation{A, index::SymbolRole::RelationBaseOf, B});
+  Builder.insert(Relation{A, index::SymbolRole::RelationBaseOf, C});
+  Builder.insert(Relation{A, index::SymbolRole::RelationBaseOf, B});
+
+  RelationSlab Slab = std::move(Builder).build();
+  EXPECT_THAT(Slab, UnorderedElementsAre(
+                        Relation{A, index::SymbolRole::RelationBaseOf, B},
+                        Relation{A, index::SymbolRole::RelationBaseOf, C}));
+}
+
 TEST(SwapIndexTest, OldIndexRecycled) {
   auto Token = std::make_shared<int>();
   std::weak_ptr<int> WeakToken = Token;

From 92524f9bf84de92a0a61fcdd7305261d7f9501ca Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Mon, 3 Jun 2019 05:07:52 +0000
Subject: [PATCH 0871/1176] [clangd] Serialization support for RelationSlab

Summary: This builds on D59407 to provide YAML and RIFF serialization support.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62459

llvm-svn: 362353
---
 .../clangd/index/Serialization.cpp            | 83 ++++++++++++++++++-
 .../clangd/index/Serialization.h              | 11 ++-
 .../clangd/index/YAMLSerialization.cpp        | 63 +++++++++++++-
 .../clangd/unittests/SerializationTests.cpp   | 29 ++++++-
 4 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index 12993eb631ca6..521e5bd04ad71 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -24,6 +24,29 @@ llvm::Error makeError(const llvm::Twine &Msg) {
   return llvm::make_error<llvm::StringError>(Msg,
                                              llvm::inconvertibleErrorCode());
 }
+} // namespace
+
+RelationKind symbolRoleToRelationKind(index::SymbolRole Role) {
+  // SymbolRole is used to record relations in the index.
+  // Only handle the relations we actually store currently.
+  // If we start storing more relations, this list can be expanded.
+  switch (Role) {
+  case index::SymbolRole::RelationBaseOf:
+    return RelationKind::BaseOf;
+  default:
+    llvm_unreachable("Unsupported symbol role");
+  }
+}
+
+index::SymbolRole relationKindToSymbolRole(RelationKind Kind) {
+  switch (Kind) {
+  case RelationKind::BaseOf:
+    return index::SymbolRole::RelationBaseOf;
+  }
+  llvm_unreachable("Invalid relation kind");
+}
+
+namespace {
 
 // IO PRIMITIVES
 // We use little-endian 32 bit ints, sometimes with variable-length encoding.
@@ -358,6 +381,28 @@ readRefs(Reader &Data, llvm::ArrayRef<llvm::StringRef> Strings) {
   return Result;
 }
 
+// RELATIONS ENCODING
+// A relations section is a flat list of relations. Each relation has:
+//  - SymbolID (subject): 8 bytes
+//  - relation kind (predicate): 1 byte
+//  - SymbolID (object): 8 bytes
+// In the future, we might prefer a packed representation if the need arises.
+
+void writeRelation(const Relation &R, llvm::raw_ostream &OS) {
+  OS << R.Subject.raw();
+  RelationKind Kind = symbolRoleToRelationKind(R.Predicate);
+  OS.write(static_cast<uint8_t>(Kind));
+  OS << R.Object.raw();
+}
+
+Relation readRelation(Reader &Data) {
+  SymbolID Subject = Data.consumeID();
+  index::SymbolRole Predicate =
+      relationKindToSymbolRole(static_cast<RelationKind>(Data.consume8()));
+  SymbolID Object = Data.consumeID();
+  return {Subject, Predicate, Object};
+}
+
 // FILE ENCODING
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
@@ -434,6 +479,17 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
       return makeError("malformed or truncated refs");
     Result.Refs = std::move(Refs).build();
   }
+  if (Chunks.count("rela")) {
+    Reader RelationsReader(Chunks.lookup("rela"));
+    RelationSlab::Builder Relations;
+    while (!RelationsReader.eof()) {
+      auto Relation = readRelation(RelationsReader);
+      Relations.insert(Relation);
+    }
+    if (RelationsReader.err())
+      return makeError("malformed or truncated relations");
+    Result.Relations = std::move(Relations).build();
+  }
   return std::move(Result);
 }
 
@@ -483,6 +539,14 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
     }
   }
 
+  std::vector<Relation> Relations;
+  if (Data.Relations) {
+    for (const auto &Relation : *Data.Relations) {
+      Relations.emplace_back(Relation);
+      // No strings to be interned in relations.
+    }
+  }
+
   std::string StringSection;
   {
     llvm::raw_string_ostream StringOS(StringSection);
@@ -508,6 +572,16 @@ void writeRIFF(const IndexFileOut &Data, llvm::raw_ostream &OS) {
     RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
   }
 
+  std::string RelationSection;
+  if (Data.Relations) {
+    {
+      llvm::raw_string_ostream RelationOS{RelationSection};
+      for (const auto &Relation : Relations)
+        writeRelation(Relation, RelationOS);
+    }
+    RIFF.Chunks.push_back({riff::fourCC("rela"), RelationSection});
+  }
+
   std::string SrcsSection;
   {
     {
@@ -561,6 +635,7 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
 
   SymbolSlab Symbols;
   RefSlab Refs;
+  RelationSlab Relations;
   {
     trace::Span Tracer("ParseIndex");
     if (auto I = readIndexFile(Buffer->get()->getBuffer())) {
@@ -568,6 +643,8 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
         Symbols = std::move(*I->Symbols);
       if (I->Refs)
         Refs = std::move(*I->Refs);
+      if (I->Relations)
+        Relations = std::move(*I->Relations);
     } else {
       llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n";
       return nullptr;
@@ -576,15 +653,17 @@ std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
 
   size_t NumSym = Symbols.size();
   size_t NumRefs = Refs.numRefs();
+  size_t NumRelations = Relations.size();
 
   trace::Span Tracer("BuildIndex");
   auto Index = UseDex ? dex::Dex::build(std::move(Symbols), std::move(Refs))
                       : MemIndex::build(std::move(Symbols), std::move(Refs));
   vlog("Loaded {0} from {1} with estimated memory usage {2} bytes\n"
        "  - number of symbols: {3}\n"
-       "  - number of refs: {4}\n",
+       "  - number of refs: {4}\n"
+       "  - numnber of relations: {5}",
        UseDex ? "Dex" : "MemIndex", SymbolFilename,
-       Index->estimateMemoryUsage(), NumSym, NumRefs);
+       Index->estimateMemoryUsage(), NumSym, NumRefs, NumRelations);
   return Index;
 }
 
diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h
index 3788693e04246..03e5e387c7bcd 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -41,6 +41,7 @@ enum class IndexFileFormat {
 struct IndexFileIn {
   llvm::Optional<SymbolSlab> Symbols;
   llvm::Optional<RefSlab> Refs;
+  llvm::Optional<RelationSlab> Relations;
   // Keys are URIs of the source files.
   llvm::Optional<IncludeGraph> Sources;
 };
@@ -51,6 +52,7 @@ llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
 struct IndexFileOut {
   const SymbolSlab *Symbols = nullptr;
   const RefSlab *Refs = nullptr;
+  const RelationSlab *Relations = nullptr;
   // Keys are URIs of the source files.
   const IncludeGraph *Sources = nullptr;
   // TODO: Support serializing Dex posting lists.
@@ -59,7 +61,8 @@ struct IndexFileOut {
   IndexFileOut() = default;
   IndexFileOut(const IndexFileIn &I)
       : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr),
-        Refs(I.Refs ? I.Refs.getPointer() : nullptr) {}
+        Refs(I.Refs ? I.Refs.getPointer() : nullptr),
+        Relations(I.Relations ? I.Relations.getPointer() : nullptr) {}
 };
 // Serializes an index file.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
@@ -67,12 +70,18 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O);
 // Convert a single symbol to YAML, a nice debug representation.
 std::string toYAML(const Symbol &);
 std::string toYAML(const std::pair<SymbolID, ArrayRef<Ref>> &);
+std::string toYAML(const Relation &);
 
 // Build an in-memory static index from an index file.
 // The size should be relatively small, so data can be managed in memory.
 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef Filename,
                                        bool UseDex = true);
 
+// Used for serializing SymbolRole as used in Relation.
+enum class RelationKind : uint8_t { BaseOf };
+RelationKind symbolRoleToRelationKind(index::SymbolRole);
+index::SymbolRole relationKindToSymbolRole(RelationKind);
+
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
index 6bf0cb789ca42..4e30abaa60e0e 100644
--- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp
+++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Index.h"
+#include "Relation.h"
 #include "Serialization.h"
 #include "SymbolLocation.h"
 #include "SymbolOrigin.h"
@@ -35,10 +36,11 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref)
 namespace {
 using RefBundle =
     std::pair<clang::clangd::SymbolID, std::vector<clang::clangd::Ref>>;
-// This is a pale imitation of std::variant<Symbol, RefBundle>
+// This is a pale imitation of std::variant<Symbol, RefBundle, Relation>
 struct VariantEntry {
   llvm::Optional<clang::clangd::Symbol> Symbol;
   llvm::Optional<RefBundle> Refs;
+  llvm::Optional<clang::clangd::Relation> Relation;
 };
 // A class helps YAML to serialize the 32-bit encoded position (Line&Column),
 // as YAMLIO can't directly map bitfields.
@@ -53,6 +55,8 @@ namespace yaml {
 
 using clang::clangd::Ref;
 using clang::clangd::RefKind;
+using clang::clangd::Relation;
+using clang::clangd::RelationKind;
 using clang::clangd::Symbol;
 using clang::clangd::SymbolID;
 using clang::clangd::SymbolLocation;
@@ -60,6 +64,7 @@ using clang::clangd::SymbolOrigin;
 using clang::index::SymbolInfo;
 using clang::index::SymbolKind;
 using clang::index::SymbolLanguage;
+using clang::index::SymbolRole;
 
 // Helper to (de)serialize the SymbolID. We serialize it as a hex string.
 struct NormalizedSymbolID {
@@ -275,6 +280,37 @@ template <> struct MappingTraits<Ref> {
   }
 };
 
+struct NormalizedSymbolRole {
+  NormalizedSymbolRole(IO &) {}
+  NormalizedSymbolRole(IO &IO, SymbolRole R) {
+    Kind = static_cast<uint8_t>(clang::clangd::symbolRoleToRelationKind(R));
+  }
+
+  SymbolRole denormalize(IO &IO) {
+    return clang::clangd::relationKindToSymbolRole(
+        static_cast<RelationKind>(Kind));
+  }
+
+  uint8_t Kind = 0;
+};
+
+template <> struct MappingTraits<SymbolID> {
+  static void mapping(IO &IO, SymbolID &ID) {
+    MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO, ID);
+    IO.mapRequired("ID", NSymbolID->HexString);
+  }
+};
+
+template <> struct MappingTraits<Relation> {
+  static void mapping(IO &IO, Relation &Relation) {
+    MappingNormalization<NormalizedSymbolRole, SymbolRole> NRole(
+        IO, Relation.Predicate);
+    IO.mapRequired("Subject", Relation.Subject);
+    IO.mapRequired("Predicate", NRole->Kind);
+    IO.mapRequired("Object", Relation.Object);
+  }
+};
+
 template <> struct MappingTraits<VariantEntry> {
   static void mapping(IO &IO, VariantEntry &Variant) {
     if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) {
@@ -285,6 +321,10 @@ template <> struct MappingTraits<VariantEntry> {
       if (!IO.outputting())
         Variant.Refs.emplace();
       MappingTraits<RefBundle>::mapping(IO, *Variant.Refs);
+    } else if (IO.mapTag("!Relations", Variant.Relation.hasValue())) {
+      if (!IO.outputting())
+        Variant.Relation.emplace();
+      MappingTraits<Relation>::mapping(IO, *Variant.Relation);
     }
   }
 };
@@ -308,11 +348,18 @@ void writeYAML(const IndexFileOut &O, llvm::raw_ostream &OS) {
       Entry.Refs = Sym;
       Yout << Entry;
     }
+  if (O.Relations)
+    for (auto &R : *O.Relations) {
+      VariantEntry Entry;
+      Entry.Relation = R;
+      Yout << Entry;
+    }
 }
 
 llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data) {
   SymbolSlab::Builder Symbols;
   RefSlab::Builder Refs;
+  RelationSlab::Builder Relations;
   llvm::BumpPtrAllocator
       Arena; // store the underlying data of Position::FileURI.
   llvm::UniqueStringSaver Strings(Arena);
@@ -329,12 +376,15 @@ llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data) {
     if (Variant.Refs)
       for (const auto &Ref : Variant.Refs->second)
         Refs.insert(Variant.Refs->first, Ref);
+    if (Variant.Relation)
+      Relations.insert(*Variant.Relation);
     Yin.nextDocument();
   }
 
   IndexFileIn Result;
   Result.Symbols.emplace(std::move(Symbols).build());
   Result.Refs.emplace(std::move(Refs).build());
+  Result.Relations.emplace(std::move(Relations).build());
   return std::move(Result);
 }
 
@@ -360,5 +410,16 @@ std::string toYAML(const std::pair<SymbolID, llvm::ArrayRef<Ref>> &Data) {
   return Buf;
 }
 
+std::string toYAML(const Relation &R) {
+  std::string Buf;
+  {
+    llvm::raw_string_ostream OS(Buf);
+    llvm::yaml::Output Yout(OS);
+    Relation Rel = R; // copy: Yout<< requires mutability.
+    Yout << Rel;
+  }
+  return Buf;
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp
index 792da77031039..1723bb867bc35 100644
--- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp
@@ -82,6 +82,14 @@ ID: 057557CEBF6E6B2D
       End:
         Line: 5
         Column: 8
+...
+--- !Relations
+Subject:
+  ID:              6481EE7AF2841756
+Predicate:       0
+Object:
+  ID:              6512AEC512EA3A2D
+...
 )";
 
 MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
@@ -139,6 +147,13 @@ TEST(SerializationTest, YAMLConversions) {
   auto Ref1 = ParsedYAML->Refs->begin()->second.front();
   EXPECT_EQ(Ref1.Kind, RefKind::Reference);
   EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
+
+  SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
+  SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
+  ASSERT_TRUE(bool(ParsedYAML->Relations));
+  EXPECT_THAT(*ParsedYAML->Relations,
+              UnorderedElementsAre(
+                  Relation{Base, index::SymbolRole::RelationBaseOf, Derived}));
 }
 
 std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
@@ -149,8 +164,15 @@ std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
 }
 std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
   std::vector<std::string> Result;
-  for (const auto &Sym : Slab)
-    Result.push_back(toYAML(Sym));
+  for (const auto &Refs : Slab)
+    Result.push_back(toYAML(Refs));
+  return Result;
+}
+
+std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) {
+  std::vector<std::string> Result;
+  for (const auto &Rel : Slab)
+    Result.push_back(toYAML(Rel));
   return Result;
 }
 
@@ -167,12 +189,15 @@ TEST(SerializationTest, BinaryConversions) {
   ASSERT_TRUE(bool(In2)) << In.takeError();
   ASSERT_TRUE(In2->Symbols);
   ASSERT_TRUE(In2->Refs);
+  ASSERT_TRUE(In2->Relations);
 
   // Assert the YAML serializations match, for nice comparisons and diffs.
   EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
               UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
   EXPECT_THAT(YAMLFromRefs(*In2->Refs),
               UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+  EXPECT_THAT(YAMLFromRelations(*In2->Relations),
+              UnorderedElementsAreArray(YAMLFromRelations(*In->Relations)));
 }
 
 TEST(SerializationTest, SrcsTest) {

From 471f11805f741b0b22328e0fd111be4329f245a3 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Mon, 3 Jun 2019 05:11:44 +0000
Subject: [PATCH 0872/1176] Add --sort-common to the man page.

Differential Revision: https://reviews.llvm.org/D62799

llvm-svn: 362354
---
 lld/docs/ld.lld.1 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 1a2fe614d2434..b398c19a27939 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -429,6 +429,8 @@ Set
 .Dv DT_SONAME
 to
 .Ar value .
+.It Fl -sort-common
+This option is ignored for GNU compatibility.
 .It Fl -sort-section Ns = Ns Ar value
 Specifies sections sorting rule when linkerscript is used.
 .It Fl -start-lib

From c78c999a9cd7a77b9d13c610c9faebac5d560a55 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 3 Jun 2019 05:25:03 +0000
Subject: [PATCH 0873/1176] [ELF] Simplify the condition to create .interp

(1) {gcc,clang} -fuse-ld=bfd -pie -fPIE -nostdlib a.c => .interp created
(2) {gcc,clang} -fuse-ld=lld -pie -fPIE -nostdlib a.c => .interp not created
(3) {gcc,clang} -fuse-ld=lld -pie -fPIE -nostdlib a.c a.so => .interp created

The inconsistency of (2) is due to the condition `!Config->SharedFiles.empty()`.
To make lld behave more like ld.bfd, we could change the condition to:

    Config->HasDynSymTab && !Config->DynamicLinker.empty() && Script->needsInterpSection();

However, that would bring another inconsistency as can be observed with:

(4) {gcc,clang} -fuse-ld=bfd -no-pie -nostdlib a.c => .interp not created

So instead, use `!Config->DynamicLinker.empty() && Script->needsInterpSection()`,
which is both simple and consistent in these cases.

The inconsistency of (4) likely originated from ld.bfd and gold's choice to have a default --dynamic-linker.
Their condition to create .interp is ANDed with (not -shared).
Since lld doesn't have a default --dynamic-linker,
compiler drivers (gcc/clang) don't pass --dynamic-linker for -shared,
and direct ld users are not supposed to specify --dynamic-linker for -shared,
we do not need the condition !Config->Shared.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62765

llvm-svn: 362355
---
 lld/ELF/Writer.cpp            |  3 +--
 lld/test/ELF/dynamic-linker.s | 16 +++++++---------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index bc3abf7837335..ec5be8c8fd70d 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -138,8 +138,7 @@ StringRef elf::getOutputSectionName(const InputSectionBase *S) {
 }
 
 static bool needsInterpSection() {
-  return !SharedFiles.empty() && !Config->DynamicLinker.empty() &&
-         Script->needsInterpSection();
+  return !Config->DynamicLinker.empty() && Script->needsInterpSection();
 }
 
 template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); }
diff --git a/lld/test/ELF/dynamic-linker.s b/lld/test/ELF/dynamic-linker.s
index 3faf8e8a169e5..ac7ec6a3b3e95 100644
--- a/lld/test/ELF/dynamic-linker.s
+++ b/lld/test/ELF/dynamic-linker.s
@@ -1,21 +1,19 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/shared.s -o %t1.o
-# RUN: ld.lld -shared %t1.o -o %t.so
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 
-# RUN: ld.lld --dynamic-linker foo %t.o %t.so -o %t
-# RUN: llvm-readelf -program-headers %t | FileCheck %s
+# RUN: ld.lld --dynamic-linker foo %t.o -o %t
+# RUN: llvm-readelf --program-headers %t | FileCheck %s
 
-# RUN: ld.lld --dynamic-linker=foo %t.o %t.so -o %t
-# RUN: llvm-readelf -program-headers %t | FileCheck %s
+# RUN: ld.lld --dynamic-linker=foo %t.o -o %t
+# RUN: llvm-readelf --program-headers %t | FileCheck %s
 
 # CHECK: [Requesting program interpreter: foo]
 
-# RUN: ld.lld %t.o %t.so -o %t
+# RUN: ld.lld %t.o -o %t
 # RUN: llvm-readelf -program-headers %t | FileCheck --check-prefix=NO %s
 
-# RUN: ld.lld --dynamic-linker foo --no-dynamic-linker %t.o %t.so -o %t
-# RUN: llvm-readelf -program-headers %t | FileCheck --check-prefix=NO %s
+# RUN: ld.lld --dynamic-linker foo --no-dynamic-linker %t.o -o %t
+# RUN: llvm-readelf --program-headers %t | FileCheck --check-prefix=NO %s
 
 # NO-NOT: PT_INTERP
 

From abb7484c31d5346c5f7b06fb65549b0fef75dccf Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 3 Jun 2019 05:34:25 +0000
Subject: [PATCH 0874/1176] [ELF] Don't create an output section named
 `/DISCARD/` if it is assigned to the special phdr `NONE`

Fixes the remaining issue of PR41673 after D61186: with `/DISCARD/ { ... } :NONE`,
we may create an output section named `/DISCARD/`.

Note, if an input section is named `/DISCARD/`, ld.bfd discards it but
lld keeps it. It is probably not worth copying this behavior as it is unrealistic.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62768

llvm-svn: 362356
---
 lld/ELF/LinkerScript.cpp                 | 3 +++
 lld/test/ELF/linkerscript/discard-phdr.s | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index a6354c841c264..3f68e6eff87c6 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -824,6 +824,9 @@ void LinkerScript::assignOffsets(OutputSection *Sec) {
 }
 
 static bool isDiscardable(OutputSection &Sec) {
+  if (Sec.Name == "/DISCARD/")
+    return true;
+
   // We do not remove empty sections that are explicitly
   // assigned to any segment.
   if (!Sec.Phdrs.empty())
diff --git a/lld/test/ELF/linkerscript/discard-phdr.s b/lld/test/ELF/linkerscript/discard-phdr.s
index 311dcbaad1ff9..dc2c8235fcd17 100644
--- a/lld/test/ELF/linkerscript/discard-phdr.s
+++ b/lld/test/ELF/linkerscript/discard-phdr.s
@@ -10,7 +10,7 @@
 # RUN:  /DISCARD/ : { *(.discard) } :NONE \
 # RUN: }" > %t.script
 # RUN: ld.lld -o %t --script %t.script %t.o
-# RUN: llvm-readelf -S -l %t | FileCheck %s
+# RUN: llvm-readelf -S -l %t | FileCheck --implicit-check-not=/DISCARD/ %s
 
 ## Check that /DISCARD/ does not interfere with the assignment of segments to
 ## sections.

From ce1534b4055d52e007aff99dee521f6b7a7dba44 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 3 Jun 2019 05:41:31 +0000
Subject: [PATCH 0875/1176] [ELF][PPC64] Don't apply LD->LE relaxation on
 R_PPC64_GOT_DTPREL16*

In ELF v2 ABI, R_PPC64_GOT_DTPREL16* are not relaxed.

This family of relocation types are used for variables outside of 2GiB
of the TLS block. 2 instructions cannot materialize a DTPREL offset that
is not 32-bit.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62737

llvm-svn: 362357
---
 lld/ELF/Arch/PPC64.cpp             |  4 ---
 lld/ELF/Relocations.cpp            |  7 +----
 lld/test/ELF/ppc64-dtprel.s        | 26 ++----------------
 lld/test/ELF/ppc64-ld-got-dtprel.s | 44 ++++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 34 deletions(-)
 create mode 100644 lld/test/ELF/ppc64-ld-got-dtprel.s

diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index ae0d63ab74828..a79e9c80242fa 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -445,10 +445,6 @@ void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
   case R_PPC64_DTPREL16_DS:
   case R_PPC64_DTPREL16_LO:
   case R_PPC64_DTPREL16_LO_DS:
-  case R_PPC64_GOT_DTPREL16_HA:
-  case R_PPC64_GOT_DTPREL16_LO_DS:
-  case R_PPC64_GOT_DTPREL16_DS:
-  case R_PPC64_GOT_DTPREL16_HI:
     relocateOne(Loc, Type, Val);
     break;
   default:
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index b02e1cc2cae13..fe10234be641a 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -257,13 +257,8 @@ handleTlsRelocation(RelType Type, Symbol &Sym, InputSectionBase &C,
   }
 
   // Local-Dynamic sequence where offset of tls variable relative to dynamic
-  // thread pointer is stored in the got.
+  // thread pointer is stored in the got. This cannot be relaxed to Local-Exec.
   if (Expr == R_TLSLD_GOT_OFF) {
-    // Local-Dynamic relocs can be relaxed to local-exec
-    if (!Config->Shared) {
-      C.Relocations.push_back({R_RELAX_TLS_LD_TO_LE, Type, Offset, Addend, &Sym});
-      return 1;
-    }
     if (!Sym.isInGot()) {
       In.Got->addEntry(Sym);
       uint64_t Off = Sym.getGotOffset();
diff --git a/lld/test/ELF/ppc64-dtprel.s b/lld/test/ELF/ppc64-dtprel.s
index a7ece1d78ff96..4bb767a488162 100644
--- a/lld/test/ELF/ppc64-dtprel.s
+++ b/lld/test/ELF/ppc64-dtprel.s
@@ -97,15 +97,6 @@ test_not_adjusted:
         mtlr 0
         blr
 
-        .globl test_got_dtprel
-        .p2align 4
-        .type test_got_dtprel,@function
-test_got_dtprel:
-         addis 3, 2, i@got@dtprel@ha
-         ld 3, i@got@dtprel@l(3)
-         addis 3, 2, i@got@dtprel@h
-         addi 3, 2, i@got@dtprel
-
         .section        .debug_addr,"",@progbits
         .quad   i@dtprel+32768
 
@@ -137,10 +128,6 @@ k:
 // InputRelocs: R_PPC64_DTPREL16_HIGHER   {{[0-9a-f]+}} k + 0
 // InputRelocs: R_PPC64_DTPREL16_HI       {{[0-9a-f]+}} k + 0
 // InputRelocs: R_PPC64_DTPREL16_LO       {{[0-9a-f]+}} k + 0
-// InputRelocs: R_PPC64_GOT_DTPREL16_HA    {{[0-9a-f]+}} i + 0
-// InputRelocs: R_PPC64_GOT_DTPREL16_LO_DS {{[0-9a-f]+}} i + 0
-// InputRelocs: R_PPC64_GOT_DTPREL16_HI    {{[0-9a-f]+}} i + 0
-// InputRelocs: R_PPC64_GOT_DTPREL16_DS    {{[0-9a-f]+}} i + 0
 // InputRelocs: Relocation section '.rela.debug_addr'
 // InputRelocs: R_PPC64_DTPREL64          {{[0-9a-f]+}} i + 8000
 
@@ -154,11 +141,11 @@ k:
 // i@dtprel = 1024 - 0x8000 = -31744 = 0xffffffffffff8400
 // HEX-LE:      section '.got':
 // HEX-LE-NEXT: 4204f8 f8844200 00000000 00000000 00000000
-// HEX-LE-NEXT: 420508 00000000 00000000 0084ffff ffffffff
+// HEX-LE-NEXT: 420508 00000000 00000000
 
 // HEX-BE:      section '.got':
 // HEX-BE-NEXT: 4204f8 00000000 004284f8 00000000 00000000
-// HEX-BE-NEXT: 420508 00000000 00000000 ffffffff ffff8400
+// HEX-BE-NEXT: 420508 00000000 00000000
 
 // Dis:     test:
 // Dis:      addi 4, 3, -31744
@@ -186,12 +173,3 @@ k:
 // Dis:    oris 4, 4, 63
 // Dis:    ori 4, 4, 33796
 
-// #ha(i@got@dtprel) = ((0x420510 - (.got+0x8000) + 0x8000) >> 16) & 0xffff = 0
-// #lo(i@got@dtprel) = (0x420510 - (.got+0x8000)) & 0xffff = -32744
-// #hi(i@got@dtprel) = ((0x420510 - (.got+0x8000)) >> 16) & 0xffff = -1
-// i@got@dtprel --> 0x420510 - (.got+0x8000) = -32744
-// Dis: test_got_dtprel:
-// Dis:    addis 3, 2, 0
-// Dis:    ld 3, -32744(3)
-// Dis:    addis 3, 2, -1
-// Dis:    addi 3, 2, -32744
diff --git a/lld/test/ELF/ppc64-ld-got-dtprel.s b/lld/test/ELF/ppc64-ld-got-dtprel.s
new file mode 100644
index 0000000000000..319c14e42cb94
--- /dev/null
+++ b/lld/test/ELF/ppc64-ld-got-dtprel.s
@@ -0,0 +1,44 @@
+# REQUIRES: ppc
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOCS %s
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s
+## Check LD->LE relaxation does not affect R_PPC64_GOT_DTPREL16*.
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=RELOCS %s
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck %s
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+# RELOCS:      .rela.text {
+# RELOCS-NEXT:   R_PPC64_GOT_DTPREL16_HA i 0x0
+# RELOCS-NEXT:   R_PPC64_GOT_DTPREL16_LO_DS i 0x0
+# RELOCS-NEXT:   R_PPC64_GOT_DTPREL16_HI j 0x0
+# RELOCS-NEXT:   R_PPC64_GOT_DTPREL16_DS j 0x0
+# RELOCS-NEXT: }
+
+## ha(i@got@dtprel) = (&.got[0] - (.got+0x8000) + 0x8000 >> 16) & 0xffff = 0
+## lo(i@got@dtprel) = &.got[0] - (.got+0x8000) & 0xffff = -32768
+## hi(j@got@dtprel) = (&.got[1] - (.got+0x8000) >> 16) & 0xffff = -1
+## j@got@dtprel = &.got[1] - (.got+0x8000) = -32760
+# CHECK:      addis 3, 2, 0
+# CHECK-NEXT: ld 3, -32768(3)
+# CHECK-NEXT: addis 3, 2, -1
+# CHECK-NEXT: addi 3, 2, -32760
+
+  addis 3, 2, i@got@dtprel@ha
+  ld 3, i@got@dtprel@l(3)
+  addis 3, 2, j@got@dtprel@h
+  addi 3, 2, j@got@dtprel
+
+.section .tbss,"awT",@progbits
+.p2align 2
+i:
+  .long 0
+j:
+  .long 0

From ea0c66be55114087d2151401756612b49f3589eb Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Mon, 3 Jun 2019 06:02:10 +0000
Subject: [PATCH 0876/1176] PR42104: Support instantiations of lambdas that
 implicitly capture packs.

Two changes:
 * Track odr-use via FunctionParmPackExprs to properly handle dependent
   odr-uses of packs in generic lambdas.
 * Do not instantiate implicit captures; instead, regenerate them by
   instantiating the body of the lambda. This is necessary to
   distinguish between cases where only one element of a pack is
   captured and cases where the entire pack is captured.

llvm-svn: 362358
---
 clang/include/clang/Sema/ScopeInfo.h          | 19 +++---
 clang/include/clang/Sema/Sema.h               |  1 +
 clang/lib/Sema/ScopeInfo.cpp                  | 28 ++++-----
 clang/lib/Sema/SemaExpr.cpp                   | 50 +++++++++------
 clang/lib/Sema/SemaExprCXX.cpp                | 11 +---
 clang/lib/Sema/SemaTemplateInstantiate.cpp    | 14 +++--
 clang/lib/Sema/TreeTransform.h                | 63 +++++++++++++++----
 .../cxx1y-generic-lambdas-capturing.cpp       | 27 +++++++-
 .../test/SemaTemplate/lambda-capture-pack.cpp | 17 +++++
 9 files changed, 161 insertions(+), 69 deletions(-)
 create mode 100644 clang/test/SemaTemplate/lambda-capture-pack.cpp

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 177c88d7e8475..ea2595113d589 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_SEMA_SCOPEINFO_H
 
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CapturedStmt.h"
 #include "clang/Basic/LLVM.h"
@@ -913,7 +914,8 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///   };
   /// }
   void addPotentialCapture(Expr *VarExpr) {
-    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr));
+    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr) ||
+           isa<FunctionParmPackExpr>(VarExpr));
     PotentiallyCapturingExprs.push_back(VarExpr);
   }
 
@@ -965,13 +967,15 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///  building such a node. So we need a rule that anyone can implement and get
   ///  exactly the same result".
   void markVariableExprAsNonODRUsed(Expr *CapturingVarExpr) {
-    assert(isa<DeclRefExpr>(CapturingVarExpr)
-        || isa<MemberExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
+           isa<MemberExpr>(CapturingVarExpr) ||
+           isa<FunctionParmPackExpr>(CapturingVarExpr));
     NonODRUsedCapturingExprs.insert(CapturingVarExpr);
   }
   bool isVariableExprMarkedAsNonODRUsed(Expr *CapturingVarExpr) const {
-    assert(isa<DeclRefExpr>(CapturingVarExpr)
-      || isa<MemberExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
+           isa<MemberExpr>(CapturingVarExpr) ||
+           isa<FunctionParmPackExpr>(CapturingVarExpr));
     return NonODRUsedCapturingExprs.count(CapturingVarExpr);
   }
   void removePotentialCapture(Expr *E) {
@@ -993,9 +997,8 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
                                   PotentialThisCaptureLocation.isValid();
   }
 
-  // When passed the index, returns the VarDecl and Expr associated
-  // with the index.
-  void getPotentialVariableCapture(unsigned Idx, VarDecl *&VD, Expr *&E) const;
+  void visitPotentialCaptures(
+      llvm::function_ref<void(VarDecl *, Expr *)> Callback) const;
 };
 
 FunctionScopeInfo::WeakObjectProfileTy::WeakObjectProfileTy()
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b4f721c091977..096bebf162170 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4179,6 +4179,7 @@ class Sema {
   void MarkVariableReferenced(SourceLocation Loc, VarDecl *Var);
   void MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base = nullptr);
   void MarkMemberReferenced(MemberExpr *E);
+  void MarkFunctionParmPackReferenced(FunctionParmPackExpr *E);
   void MarkCaptureUsedInEnclosingContext(VarDecl *Capture, SourceLocation Loc,
                                          unsigned CapturingScopeIndex);
 
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index e84e592a4827e..b2a26af9b4a57 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -229,20 +229,20 @@ bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
   return false;
 }
 
-void LambdaScopeInfo::getPotentialVariableCapture(unsigned Idx, VarDecl *&VD,
-                                                  Expr *&E) const {
-  assert(Idx < getNumPotentialVariableCaptures() &&
-         "Index of potential capture must be within 0 to less than the "
-         "number of captures!");
-  E = PotentiallyCapturingExprs[Idx];
-  if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
-    VD = dyn_cast<VarDecl>(DRE->getFoundDecl());
-  else if (MemberExpr *ME = dyn_cast<MemberExpr>(E))
-    VD = dyn_cast<VarDecl>(ME->getMemberDecl());
-  else
-    llvm_unreachable("Only DeclRefExprs or MemberExprs should be added for "
-    "potential captures");
-  assert(VD);
+void LambdaScopeInfo::visitPotentialCaptures(
+    llvm::function_ref<void(VarDecl *, Expr *)> Callback) const {
+  for (Expr *E : PotentiallyCapturingExprs) {
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+      Callback(cast<VarDecl>(DRE->getFoundDecl()), E);
+    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
+      Callback(cast<VarDecl>(ME->getMemberDecl()), E);
+    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
+      for (VarDecl *VD : *FP)
+        Callback(VD, E);
+    } else {
+      llvm_unreachable("unexpected expression in potential captures list");
+    }
+  }
 }
 
 FunctionScopeInfo::~FunctionScopeInfo() { }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 72b61b8e5842f..d0b2760284670 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14610,7 +14610,9 @@ namespace {
     // context so never needs to be transformed.
     // FIXME: Ideally we wouldn't transform the closure type either, and would
     // just recreate the capture expressions and lambda expression.
-    StmtResult TransformLambdaBody(Stmt *Body) { return Body; }
+    StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body) {
+      return SkipLambdaBody(E, Body);
+    }
   };
 }
 
@@ -15054,7 +15056,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
 ///    *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
 static void
 MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
-                   const unsigned *const FunctionScopeIndexToStopAt) {
+                   const unsigned *const FunctionScopeIndexToStopAt = nullptr) {
   // Keep track of used but undefined variables.
   // FIXME: We shouldn't suppress this warning for static data members.
   if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
@@ -15735,14 +15737,19 @@ void Sema::UpdateMarkingForLValueToRValue(Expr *E) {
   // variable.
   if (LambdaScopeInfo *LSI = getCurLambda()) {
     Expr *SansParensExpr = E->IgnoreParens();
-    VarDecl *Var = nullptr;
+    VarDecl *Var;
+    ArrayRef<VarDecl *> Vars = None;
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(SansParensExpr))
-      Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
+      Vars = Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
     else if (MemberExpr *ME = dyn_cast<MemberExpr>(SansParensExpr))
-      Var = dyn_cast<VarDecl>(ME->getMemberDecl());
+      Vars = Var = dyn_cast<VarDecl>(ME->getMemberDecl());
+    else if (auto *FPPE = dyn_cast<FunctionParmPackExpr>(SansParensExpr))
+      Vars = llvm::makeArrayRef(FPPE->begin(), FPPE->end());
 
-    if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context))
-      LSI->markVariableExprAsNonODRUsed(SansParensExpr);
+    for (VarDecl *VD : Vars) {
+      if (Var && IsVariableNonDependentAndAConstantExpression(VD, Context))
+        LSI->markVariableExprAsNonODRUsed(SansParensExpr);
+    }
   }
 }
 
@@ -15767,20 +15774,18 @@ void Sema::CleanupVarDeclMarking() {
   std::swap(LocalMaybeODRUseExprs, MaybeODRUseExprs);
 
   for (Expr *E : LocalMaybeODRUseExprs) {
-    VarDecl *Var;
-    SourceLocation Loc;
-    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
-      Var = cast<VarDecl>(DRE->getDecl());
-      Loc = DRE->getLocation();
-    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
-      Var = cast<VarDecl>(ME->getMemberDecl());
-      Loc = ME->getMemberLoc();
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+      MarkVarDeclODRUsed(cast<VarDecl>(DRE->getDecl()),
+                         DRE->getLocation(), *this);
+    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
+      MarkVarDeclODRUsed(cast<VarDecl>(ME->getMemberDecl()), ME->getMemberLoc(),
+                         *this);
+    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
+      for (VarDecl *VD : *FP)
+        MarkVarDeclODRUsed(VD, FP->getParameterPackLocation(), *this);
     } else {
       llvm_unreachable("Unexpected expression");
     }
-
-    MarkVarDeclODRUsed(Var, Loc, *this,
-                       /*MaxFunctionScopeIndex Pointer*/ nullptr);
   }
 
   assert(MaybeODRUseExprs.empty() &&
@@ -15789,7 +15794,8 @@ void Sema::CleanupVarDeclMarking() {
 
 static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
                                     VarDecl *Var, Expr *E) {
-  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E)) &&
+  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E) ||
+          isa<FunctionParmPackExpr>(E)) &&
          "Invalid Expr argument to DoMarkVarDeclReferenced");
   Var->setReferenced();
 
@@ -16022,6 +16028,12 @@ void Sema::MarkMemberReferenced(MemberExpr *E) {
   MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse);
 }
 
+/// Perform reference-marking and odr-use handling for a FunctionParmPackExpr.
+void Sema::MarkFunctionParmPackReferenced(FunctionParmPackExpr *E) {
+  for (VarDecl *VD : *E)
+    MarkExprReferenced(*this, E->getParameterPackLocation(), VD, E, true);
+}
+
 /// Perform marking for a reference to an arbitrary declaration.  It
 /// marks the declaration referenced, and performs odr-use checking for
 /// functions and variables. This method should not be used when building a
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index ac050fa1ef55c..5884cf906fd15 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7427,12 +7427,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
   // All the potentially captureable variables in the current nested
   // lambda (within a generic outer lambda), must be captured by an
   // outer lambda that is enclosed within a non-dependent context.
-  const unsigned NumPotentialCaptures =
-      CurrentLSI->getNumPotentialVariableCaptures();
-  for (unsigned I = 0; I != NumPotentialCaptures; ++I) {
-    Expr *VarExpr = nullptr;
-    VarDecl *Var = nullptr;
-    CurrentLSI->getPotentialVariableCapture(I, Var, VarExpr);
+  CurrentLSI->visitPotentialCaptures([&] (VarDecl *Var, Expr *VarExpr) {
     // If the variable is clearly identified as non-odr-used and the full
     // expression is not instantiation dependent, only then do we not
     // need to check enclosing lambda's for speculative captures.
@@ -7446,7 +7441,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
     // }
     if (CurrentLSI->isVariableExprMarkedAsNonODRUsed(VarExpr) &&
         !IsFullExprInstantiationDependent)
-      continue;
+      return;
 
     // If we have a capture-capable lambda for the variable, go ahead and
     // capture the variable in that lambda (and all its enclosing lambdas).
@@ -7478,7 +7473,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
                           DeclRefType, nullptr);
       }
     }
-  }
+  });
 
   // Check if 'this' needs to be captured.
   if (CurrentLSI->hasPotentialThisCapture()) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index ba54d5010bab4..973f564d30583 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1368,9 +1368,11 @@ TemplateInstantiator::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
     Vars.push_back(D);
   }
 
-  return FunctionParmPackExpr::Create(getSema().Context, T,
-                                      E->getParameterPack(),
-                                      E->getParameterPackLocation(), Vars);
+  auto *PackExpr =
+      FunctionParmPackExpr::Create(getSema().Context, T, E->getParameterPack(),
+                                   E->getParameterPackLocation(), Vars);
+  getSema().MarkFunctionParmPackReferenced(PackExpr);
+  return PackExpr;
 }
 
 ExprResult
@@ -1389,8 +1391,10 @@ TemplateInstantiator::TransformFunctionParmPackRefExpr(DeclRefExpr *E,
       QualType T = TransformType(E->getType());
       if (T.isNull())
         return ExprError();
-      return FunctionParmPackExpr::Create(getSema().Context, T, PD,
-                                          E->getExprLoc(), *Pack);
+      auto *PackExpr = FunctionParmPackExpr::Create(getSema().Context, T, PD,
+                                                    E->getExprLoc(), *Pack);
+      getSema().MarkFunctionParmPackReferenced(PackExpr);
+      return PackExpr;
     }
 
     TransformedDecl = (*Pack)[getSema().ArgumentPackSubstitutionIndex];
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 592787a5870ce..a1a9aaedee443 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -660,7 +660,10 @@ class TreeTransform {
                                           bool ExpectParameterPack);
 
   /// Transform the body of a lambda-expression.
-  StmtResult TransformLambdaBody(Stmt *Body);
+  StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body);
+  /// Alternative implementation of TransformLambdaBody that skips transforming
+  /// the body.
+  StmtResult SkipLambdaBody(LambdaExpr *E, Stmt *Body);
 
   QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);
 
@@ -11358,16 +11361,13 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   bool Invalid = false;
 
   // Transform captures.
-  bool FinishedExplicitCaptures = false;
   for (LambdaExpr::capture_iterator C = E->capture_begin(),
                                  CEnd = E->capture_end();
        C != CEnd; ++C) {
     // When we hit the first implicit capture, tell Sema that we've finished
     // the list of explicit captures.
-    if (!FinishedExplicitCaptures && C->isImplicit()) {
-      getSema().finishLambdaExplicitCaptures(LSI);
-      FinishedExplicitCaptures = true;
-    }
+    if (C->isImplicit())
+      break;
 
     // Capturing 'this' is trivial.
     if (C->capturesThis()) {
@@ -11476,17 +11476,16 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
     getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
                                  EllipsisLoc);
   }
-  if (!FinishedExplicitCaptures)
-    getSema().finishLambdaExplicitCaptures(LSI);
+  getSema().finishLambdaExplicitCaptures(LSI);
 
-  // Enter a new evaluation context to insulate the lambda from any
-  // cleanups from the enclosing full-expression.
+  // FIXME: Sema's lambda-building mechanism expects us to push an expression
+  // evaluation context even if we're not transforming the function body.
   getSema().PushExpressionEvaluationContext(
       Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
   // Instantiate the body of the lambda expression.
   StmtResult Body =
-      Invalid ? StmtError() : getDerived().TransformLambdaBody(E->getBody());
+      Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());
 
   // ActOnLambda* will pop the function scope for us.
   FuncScopeCleanup.disable();
@@ -11512,10 +11511,50 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
 template<typename Derived>
 StmtResult
-TreeTransform<Derived>::TransformLambdaBody(Stmt *S) {
+TreeTransform<Derived>::TransformLambdaBody(LambdaExpr *E, Stmt *S) {
   return TransformStmt(S);
 }
 
+template<typename Derived>
+StmtResult
+TreeTransform<Derived>::SkipLambdaBody(LambdaExpr *E, Stmt *S) {
+  // Transform captures.
+  for (LambdaExpr::capture_iterator C = E->capture_begin(),
+                                 CEnd = E->capture_end();
+       C != CEnd; ++C) {
+    // When we hit the first implicit capture, tell Sema that we've finished
+    // the list of explicit captures.
+    if (!C->isImplicit())
+      continue;
+
+    // Capturing 'this' is trivial.
+    if (C->capturesThis()) {
+      getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
+                                    /*BuildAndDiagnose*/ true, nullptr,
+                                    C->getCaptureKind() == LCK_StarThis);
+      continue;
+    }
+    // Captured expression will be recaptured during captured variables
+    // rebuilding.
+    if (C->capturesVLAType())
+      continue;
+
+    assert(C->capturesVariable() && "unexpected kind of lambda capture");
+    assert(!E->isInitCapture(C) && "implicit init-capture?");
+
+    // Transform the captured variable.
+    VarDecl *CapturedVar = cast_or_null<VarDecl>(
+        getDerived().TransformDecl(C->getLocation(), C->getCapturedVar()));
+    if (!CapturedVar || CapturedVar->isInvalidDecl())
+      return StmtError();
+
+    // Capture the transformed variable.
+    getSema().tryCaptureVariable(CapturedVar, C->getLocation());
+  }
+
+  return S;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(
diff --git a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
index eaed45acd11be..a98366c8794a1 100644
--- a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
+++ b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -emit-llvm-only %s
+// RUN: %clang_cc1 -std=c++2a -verify -fsyntax-only -fblocks -emit-llvm-only %s
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing %s -DDELAYED_TEMPLATE_PARSING
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fms-extensions %s -DMS_EXTENSIONS
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing -fms-extensions %s -DMS_EXTENSIONS -DDELAYED_TEMPLATE_PARSING
@@ -176,7 +177,13 @@ void doit() {
     sample::X cx{5};
     auto L = [=](auto a) { 
       const int z = 3;
+      // FIXME: The warning below is correct but for some reason doesn't show
+      // up in C++17 mode.
       return [&,a](auto b) {
+#if __cplusplus > 201702L
+        // expected-warning@-2 {{address of stack memory associated with local variable 'z' returned}}
+        // expected-note@#call {{in instantiation of}}
+#endif
         const int y = 5;    
         return [=](auto c) { 
           int d[sizeof(a) == sizeof(c) || sizeof(c) == sizeof(b) ? 2 : 1];
@@ -189,7 +196,7 @@ void doit() {
         }; 
       };
     };
-    auto M = L(3)(3.5);
+    auto M = L(3)(3.5); // #call
     M(3.14);
   }
 }
@@ -1519,6 +1526,20 @@ void test() {
 
 } // end ns5
 
-
-
 } // end PR34266
+
+namespace capture_pack {
+#if __cplusplus >= 201702L
+  constexpr
+#endif
+  auto v =
+    [](auto ...a) {
+      [&](auto ...b) {
+        ((a = b), ...); // expected-warning 0-1{{extension}}
+      }(100, 20, 3);
+      return (a + ...); // expected-warning 0-1{{extension}}
+    }(400, 50, 6);
+#if __cplusplus >= 201702L
+  static_assert(v == 123);
+#endif
+}
diff --git a/clang/test/SemaTemplate/lambda-capture-pack.cpp b/clang/test/SemaTemplate/lambda-capture-pack.cpp
new file mode 100644
index 0000000000000..2fe576769dbdf
--- /dev/null
+++ b/clang/test/SemaTemplate/lambda-capture-pack.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s
+// expected-no-diagnostics
+
+template<typename ...T, typename ...Lambda> void check_sizes(Lambda ...L) {
+  static_assert(((sizeof(T) == sizeof(Lambda)) && ...));
+}
+
+template<typename ...T> void f(T ...v) {
+  // Pack expansion of lambdas: each lambda captures only one pack element.
+  check_sizes<T...>([=] { (void)&v; } ...);
+
+  // Pack expansion inside lambda: captures all pack elements.
+  auto l = [=] { ((void)&v, ...); };
+  static_assert(sizeof(l) >= (sizeof(T) + ...));
+}
+
+template void f(int, char, double);

From 8522d579b894b8e43ed0bfee5d3c9c2238805234 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 3 Jun 2019 06:21:33 +0000
Subject: [PATCH 0877/1176] [ELF][PPC64] Rename some PPC64 ELFv2 specific
 RelExpr from R_PPC_* to R_PPC64_*

The following abstract relocation types (RelExpr) are PPC64 ELFv2 ABI specific,
not used by PPC32. So rename them to prevent confusion when the PPC32 port is improved.

* R_PPC_CALL R_PPC_CALL_PLT:
  R_PPC_CALL_PLT represents R_PPC64_REL14 and R_PPC64_REL24.
  If the function is not preemptable, R_PPC_CALL_PLT can be optimized to R_PPC_CALL:
  the formula adjusts the symbol VA from the global entry point to the local entry point.
* R_PPC_TOC: represents R_PPC64_TOC.  We don't have a test. Add one to ppc64-relocs.s
  Rename it to R_PPC64_TOCBASE because `@tocbase` is the assembly form.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D62800

llvm-svn: 362359
---
 lld/ELF/Arch/PPC64.cpp      |  4 ++--
 lld/ELF/InputSection.cpp    |  8 ++++----
 lld/ELF/Relocations.cpp     | 20 +++++++++++---------
 lld/ELF/Relocations.h       |  6 +++---
 lld/test/ELF/ppc64-relocs.s | 22 +++++++++++++++++-----
 5 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index a79e9c80242fa..9e2543e9eb860 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -545,10 +545,10 @@ RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
   case R_PPC64_TOC16_LO_DS:
     return Config->TocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL;
   case R_PPC64_TOC:
-    return R_PPC_TOC;
+    return R_PPC64_TOCBASE;
   case R_PPC64_REL14:
   case R_PPC64_REL24:
-    return R_PPC_CALL_PLT;
+    return R_PPC64_CALL_PLT;
   case R_PPC64_REL16_LO:
   case R_PPC64_REL16_HA:
   case R_PPC64_REL32:
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 45bafd321ded8..74a8028e229bd 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -717,9 +717,9 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
   case R_PLT:
     return Sym.getPltVA() + A;
   case R_PLT_PC:
-  case R_PPC_CALL_PLT:
+  case R_PPC64_CALL_PLT:
     return Sym.getPltVA() + A - P;
-  case R_PPC_CALL: {
+  case R_PPC64_CALL: {
     uint64_t SymVA = Sym.getVA(A);
     // If we have an undefined weak symbol, we might get here with a symbol
     // address of zero. That could overflow, but the code must be unreachable,
@@ -735,7 +735,7 @@ static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
     // branching to the local entry point.
     return SymVA - P + getPPC64GlobalEntryToLocalEntryOffset(Sym.StOther);
   }
-  case R_PPC_TOC:
+  case R_PPC64_TOCBASE:
     return getPPC64TocBase() + A;
   case R_RELAX_GOT_PC:
     return Sym.getVA(A) - P;
@@ -922,7 +922,7 @@ void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
     case R_RELAX_TLS_GD_TO_IE_GOTPLT:
       Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
       break;
-    case R_PPC_CALL:
+    case R_PPC64_CALL:
       // If this is a call to __tls_get_addr, it may be part of a TLS
       // sequence that has been relaxed and turned into a nop. In this
       // case, we don't want to handle it as a call.
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index fe10234be641a..5f1149a074728 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -363,7 +363,7 @@ static bool isAbsoluteValue(const Symbol &Sym) {
 
 // Returns true if Expr refers a PLT entry.
 static bool needsPlt(RelExpr Expr) {
-  return oneof<R_PLT_PC, R_PPC_CALL_PLT, R_PLT>(Expr);
+  return oneof<R_PLT_PC, R_PPC64_CALL_PLT, R_PLT>(Expr);
 }
 
 // Returns true if Expr refers a GOT entry. Note that this function
@@ -378,8 +378,9 @@ static bool needsGot(RelExpr Expr) {
 // True if this expression is of the form Sym - X, where X is a position in the
 // file (PC, or GOT for example).
 static bool isRelExpr(RelExpr Expr) {
-  return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC_CALL,
-               R_PPC64_RELAX_TOC, R_PPC_CALL_PLT, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC>(Expr);
+  return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC64_CALL,
+               R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_AARCH64_PAGE_PC,
+               R_RELAX_GOT_PC>(Expr);
 }
 
 // Returns true if a given relocation can be computed at link-time.
@@ -398,7 +399,7 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
             R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL, R_MIPS_GOT_OFF,
             R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_MIPS_TLSGD,
             R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
-            R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC_CALL_PLT,
+            R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC64_CALL_PLT,
             R_PPC64_RELAX_TOC, R_TLSDESC_CALL, R_TLSDESC_PC,
             R_AARCH64_TLSDESC_PAGE, R_HINT, R_TLSLD_HINT, R_TLSIE_HINT>(E))
     return true;
@@ -452,8 +453,8 @@ static bool isStaticLinkTimeConstant(RelExpr E, RelType Type, const Symbol &Sym,
 
 static RelExpr toPlt(RelExpr Expr) {
   switch (Expr) {
-  case R_PPC_CALL:
-    return R_PPC_CALL_PLT;
+  case R_PPC64_CALL:
+    return R_PPC64_CALL_PLT;
   case R_PC:
     return R_PLT_PC;
   case R_ABS:
@@ -469,8 +470,8 @@ static RelExpr fromPlt(RelExpr Expr) {
   switch (Expr) {
   case R_PLT_PC:
     return R_PC;
-  case R_PPC_CALL_PLT:
-    return R_PPC_CALL;
+  case R_PPC64_CALL_PLT:
+    return R_PPC64_CALL;
   case R_PLT:
     return R_ABS;
   default:
@@ -1125,7 +1126,8 @@ static void scanReloc(InputSectionBase &Sec, OffsetGetter &GetOffset, RelTy *&I,
   // The 4 types that relative GOTPLT are all x86 and x86-64 specific.
   if (oneof<R_GOTPLTONLY_PC, R_GOTPLTREL, R_GOTPLT, R_TLSGD_GOTPLT>(Expr)) {
     In.GotPlt->HasGotPltOffRel = true;
-  } else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC_TOC, R_PPC64_RELAX_TOC>(Expr)) {
+  } else if (oneof<R_GOTONLY_PC, R_GOTREL, R_PPC64_TOCBASE, R_PPC64_RELAX_TOC>(
+                 Expr)) {
     In.Got->HasGotOffRel = true;
   }
 
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index 00156f5c3731b..064feb0687c23 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -91,10 +91,10 @@ enum RelExpr {
   R_MIPS_GOT_OFF32,
   R_MIPS_TLSGD,
   R_MIPS_TLSLD,
-  R_PPC_CALL,
-  R_PPC_CALL_PLT,
-  R_PPC_TOC,
+  R_PPC64_CALL,
+  R_PPC64_CALL_PLT,
   R_PPC64_RELAX_TOC,
+  R_PPC64_TOCBASE,
   R_RISCV_PC_INDIRECT,
 };
 
diff --git a/lld/test/ELF/ppc64-relocs.s b/lld/test/ELF/ppc64-relocs.s
index 1804db9253f58..5e8c529e96144 100644
--- a/lld/test/ELF/ppc64-relocs.s
+++ b/lld/test/ELF/ppc64-relocs.s
@@ -2,12 +2,12 @@
 
 # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
 # RUN: ld.lld --no-toc-optimize %t.o -o %t
-# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATALE
+# RUN: llvm-readelf -x .rodata -x .R_PPC64_TOC -x .eh_frame %t | FileCheck %s --check-prefix=DATALE
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 
 # RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
 # RUN: ld.lld --no-toc-optimize %t.o -o %t
-# RUN: llvm-readelf -x .rodata -x .eh_frame %t | FileCheck %s --check-prefix=DATABE
+# RUN: llvm-readelf -x .rodata -x .R_PPC64_TOC -x .eh_frame %t | FileCheck %s --check-prefix=DATABE
 # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
 
 .text
@@ -139,14 +139,26 @@ _start:
 __foo:
   li 3,0
 
+.section .R_PPC64_TOC,"a",@progbits
+  .quad .TOC.@tocbase
+
+# SEC: .got PROGBITS 0000000010020000
+
+## tocbase = .got+0x8000 = 0x10028000
+# DATALE-LABEL: section '.R_PPC64_TOC':
+# DATALE: 00800210 00000000
+
+# DATABE-LABEL: section '.R_PPC64_TOC':
+# DATABE: 00000000 10028000
+
 # Check that the personality (relocated by R_PPC64_REL64) in the .eh_frame
 # equals the address of __foo.
-# 0x100001e2 + 0x76fe = 0x10010058
+# 0x100001ea + 0xfe6e = 0x10010058
 # DATALE: section '.eh_frame':
-# DATALE: 0x100001e0 {{....}}76fe
+# DATALE: 0x100001e8 {{....}}6efe
 
 # DATABE: section '.eh_frame':
-# DATABE: 0x100001e0 {{[0-9a-f]+ [0-9a-f]+}} fe76{{....}}
+# DATABE: 0x100001e8 {{[0-9a-f]+ [0-9a-f]+}} fe6e{{....}}
 
 # CHECK: __foo
 # CHECK-NEXT: 10010058:       li 3, 0

From 404a679e1d0c19bf504776fd10aaca411462da5e Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen@ericsson.com>
Date: Mon, 3 Jun 2019 06:38:01 +0000
Subject: [PATCH 0878/1176] [TableGen] Fix std::array initializer to avoid
 warnings with older tool chains. NFC

A std::array is implemented as a template with an array inside a struct.
Older versions of clang, like 3.6, require an extra set of curly braces
around std::array initializations to avoid warnings.

The C++ language was changed regarding this by CWG 1270. So more modern
tool chains does not complain even if leaving out one level of braces.

llvm-svn: 362360
---
 llvm/utils/TableGen/DAGISelMatcherGen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 3d3ae9c21ebef..1f0db4fd1239a 100644
--- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -692,7 +692,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
     }
 
     if (Def->getName() == "undef_tied_input") {
-      std::array<MVT::SimpleValueType, 1> ResultVTs = { N->getSimpleType(0) };
+      std::array<MVT::SimpleValueType, 1> ResultVTs = {{ N->getSimpleType(0) }};
       std::array<unsigned, 0> InstOps;
       auto IDOperandNo = NextRecordedOperandNo++;
       AddMatcher(new EmitNodeMatcher("TargetOpcode::IMPLICIT_DEF",

From ceb0cc54f9d647c2fed3ae878ba91b95f9740777 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein@google.com>
Date: Mon, 3 Jun 2019 08:14:15 +0000
Subject: [PATCH 0879/1176] [clang-tidy] Fix make-unique check to work in C++17
 mode.

Summary:
Previously, we intended to omit the check fix to the case when constructor has
any braced-init-list argument. But the HasListInitializedArgument was not
correct to handle all cases (Foo(Bar{1, 2}) will return false in C++14
mode).

This patch fixes it, corrects the tests, and makes the check to run at C++17 mode.

Reviewers: gribozavr

Subscribers: xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62736

llvm-svn: 362361
---
 .../clang-tidy/modernize/MakeSmartPtrCheck.cpp    | 15 ++++++++++++---
 .../test/clang-tidy/modernize-make-unique.cpp     | 12 ++++++------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
index 179a09745e87d..5fbc7be3749b6 100644
--- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
@@ -298,11 +298,20 @@ bool MakeSmartPtrCheck::replaceNew(DiagnosticBuilder &Diag,
         return true;
       // Check whether we implicitly construct a class from a
       // std::initializer_list.
-      if (const auto *ImplicitCE = dyn_cast<CXXConstructExpr>(Arg)) {
-        if (ImplicitCE->isStdInitListInitialization())
+      if (const auto *CEArg = dyn_cast<CXXConstructExpr>(Arg)) {
+        // Strip the elidable move constructor, it is present in the AST for
+        // C++11/14, e.g. Foo(Bar{1, 2}), the move constructor is around the
+        // init-list constructor.
+        if (CEArg->isElidable()) {
+          if (const auto *TempExp = CEArg->getArg(0)) {
+            if (const auto *UnwrappedCE =
+                    dyn_cast<CXXConstructExpr>(TempExp->IgnoreImplicit()))
+              CEArg = UnwrappedCE;
+          }
+        }
+        if (CEArg->isStdInitListInitialization())
           return true;
       }
-      return false;
     }
     return false;
   };
diff --git a/clang-tools-extra/test/clang-tidy/modernize-make-unique.cpp b/clang-tools-extra/test/clang-tidy/modernize-make-unique.cpp
index 3b7a3de43ac5f..2920596e67c2f 100644
--- a/clang-tools-extra/test/clang-tidy/modernize-make-unique.cpp
+++ b/clang-tools-extra/test/clang-tidy/modernize-make-unique.cpp
@@ -1,5 +1,5 @@
-// RUN: %check_clang_tidy -std=c++14 %s modernize-make-unique %t -- -- -I %S/Inputs/modernize-smart-ptr
-// FIXME: Fix the checker to work in C++17 mode.
+// RUN: %check_clang_tidy -std=c++14,c++17 %s modernize-make-unique %t -- -- -I %S/Inputs/modernize-smart-ptr
+// FIXME: Fix the test code in C++2a mode.
 
 #include "unique_ptr.h"
 #include "initializer_list.h"
@@ -455,10 +455,10 @@ void initialization(int T, Base b) {
 
   std::unique_ptr<J> PJ2 = std::unique_ptr<J>(new J(E{1, 2}, 1));
   // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: use std::make_unique instead
-  // CHECK-FIXES: std::unique_ptr<J> PJ2 = std::make_unique<J>(E{1, 2}, 1);
+  // CHECK-FIXES: std::unique_ptr<J> PJ2 = std::unique_ptr<J>(new J(E{1, 2}, 1));
   PJ2.reset(new J(E{1, 2}, 1));
   // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use std::make_unique instead
-  // CHECK-FIXES: PJ2 = std::make_unique<J>(E{1, 2}, 1);
+  // CHECK-FIXES: PJ2.reset(new J(E{1, 2}, 1));
 
   std::unique_ptr<J> PJ3 = std::unique_ptr<J>(new J{ {1, 2}, 1 });
   // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: use std::make_unique instead
@@ -469,10 +469,10 @@ void initialization(int T, Base b) {
 
   std::unique_ptr<J> PJ4 = std::unique_ptr<J>(new J{E{1, 2}, 1});
   // CHECK-MESSAGES: :[[@LINE-1]]:28: warning: use std::make_unique instead
-  // CHECK-FIXES: std::unique_ptr<J> PJ4 = std::make_unique<J>(E{1, 2}, 1);
+  // CHECK-FIXES: std::unique_ptr<J> PJ4 = std::unique_ptr<J>(new J{E{1, 2}, 1});
   PJ4.reset(new J{E{1, 2}, 1});
   // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use std::make_unique instead
-  // CHECK-FIXES:  PJ4 = std::make_unique<J>(E{1, 2}, 1);
+  // CHECK-FIXES: PJ4.reset(new J{E{1, 2}, 1});
 
   std::unique_ptr<Foo> FF = std::unique_ptr<Foo>(new Foo());
   // CHECK-MESSAGES: :[[@LINE-1]]:29: warning:

From 209adba440537a0803ec298f3c8c9af2344d9020 Mon Sep 17 00:00:00 2001
From: Mikhail Dvorskiy <mikhail.dvorskiy@intel.com>
Date: Mon, 3 Jun 2019 08:23:30 +0000
Subject: [PATCH 0880/1176] A test commit from Mikhail Dvorskiy (blank line, to
 pstl/trunk) according 'Obtaining Commit Access' rules
 (https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access)

llvm-svn: 362362
---
 pstl/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pstl/README.md b/pstl/README.md
index d81a7210a5ef7..5c862cbde4a74 100644
--- a/pstl/README.md
+++ b/pstl/README.md
@@ -32,3 +32,4 @@ To use Parallel STL, you must have the following software installed:
 * The following algorithms require additional O(n) memory space for parallel execution: `copy_if`, `inplace_merge`,
   `partial_sort`, `partial_sort_copy`, `partition_copy`, `remove`, `remove_if`, `rotate`, `sort`, `stable_sort`,
   `unique`, `unique_copy`.
+

From 1a44584588b7e54856b9f16a68251800bda805e7 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Mon, 3 Jun 2019 08:34:25 +0000
Subject: [PATCH 0881/1176] [CodeComplete] Add a bit more whitespace to
 completed patterns

Summary:
E.g. we now turn `while(<#cond#>){` into `while (<#cond#>) {`

This slightly improves the final output. Should not affect clients that
format the result on their own.

Reviewers: gribozavr

Reviewed By: gribozavr

Subscribers: jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62616

llvm-svn: 362363
---
 .../clangd/unittests/CodeCompleteTests.cpp    |  2 +-
 clang/lib/Sema/SemaCodeComplete.cpp           | 17 ++++++++++++
 .../CodeCompletion/ordinary-name-cxx11.cpp    | 24 ++++++++---------
 clang/test/CodeCompletion/ordinary-name.cpp   | 26 +++++++++----------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index 202757aff412b..e90124ceb4330 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -2436,7 +2436,7 @@ TEST(CompletionTest, CursorInSnippets) {
   EXPECT_THAT(
       Results.Completions,
       Contains(AllOf(Named("while"),
-                     SnippetSuffix("(${1:condition}){\n${0:statements}\n}"))));
+                     SnippetSuffix(" (${1:condition}) {\n${0:statements}\n}"))));
   // However, snippets for functions must *not* end with $0.
   EXPECT_THAT(Results.Completions,
               Contains(AllOf(Named("while_foo"),
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 1a30573f3566d..f530601aba17d 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1900,6 +1900,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
         Builder.AddTypedTextChunk("namespace");
         Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
         Builder.AddPlaceholderChunk("identifier");
+        Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
         Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
         Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
         Builder.AddPlaceholderChunk("declarations");
@@ -2048,15 +2049,19 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
     if (SemaRef.getLangOpts().CPlusPlus && Results.includeCodePatterns() &&
         SemaRef.getLangOpts().CXXExceptions) {
       Builder.AddTypedTextChunk("try");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddTextChunk("catch");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       Builder.AddPlaceholderChunk("declaration");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
@@ -2070,12 +2075,14 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
     if (Results.includeCodePatterns()) {
       // if (condition) { statements }
       Builder.AddTypedTextChunk("if");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       if (SemaRef.getLangOpts().CPlusPlus)
         Builder.AddPlaceholderChunk("condition");
       else
         Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
@@ -2085,12 +2092,14 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
 
       // switch (condition) { }
       Builder.AddTypedTextChunk("switch");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       if (SemaRef.getLangOpts().CPlusPlus)
         Builder.AddPlaceholderChunk("condition");
       else
         Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("cases");
@@ -2118,12 +2127,14 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
     if (Results.includeCodePatterns()) {
       /// while (condition) { statements }
       Builder.AddTypedTextChunk("while");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       if (SemaRef.getLangOpts().CPlusPlus)
         Builder.AddPlaceholderChunk("condition");
       else
         Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
@@ -2133,12 +2144,14 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
 
       // do { statements } while ( expression );
       Builder.AddTypedTextChunk("do");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddChunk(CodeCompletionString::CK_RightBrace);
       Builder.AddTextChunk("while");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       Builder.AddPlaceholderChunk("expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
@@ -2146,16 +2159,20 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
 
       // for ( for-init-statement ; condition ; expression ) { statements }
       Builder.AddTypedTextChunk("for");
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftParen);
       if (SemaRef.getLangOpts().CPlusPlus || SemaRef.getLangOpts().C99)
         Builder.AddPlaceholderChunk("init-statement");
       else
         Builder.AddPlaceholderChunk("init-expression");
       Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddPlaceholderChunk("condition");
       Builder.AddChunk(CodeCompletionString::CK_SemiColon);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddPlaceholderChunk("inc-expression");
       Builder.AddChunk(CodeCompletionString::CK_RightParen);
+      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddChunk(CodeCompletionString::CK_LeftBrace);
       Builder.AddChunk(CodeCompletionString::CK_VerticalSpace);
       Builder.AddPlaceholderChunk("statements");
diff --git a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
index f955c421f1618..7696c7505f20a 100644
--- a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
+++ b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
@@ -14,7 +14,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-CC1: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : do {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: double
@@ -24,9 +24,9 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]false
   // CHECK-CC1-NEXT: COMPLETION: float
   // CHECK-CC1-NEXT: COMPLETION: foo : [#void#]foo()
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : for (<#init-statement#>; <#condition#>; <#inc-expression#>) {
   // CHECK-CC1: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : if (<#condition#>) {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: int
@@ -46,12 +46,12 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-CC1-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-CC1-NEXT: COMPLETION: struct
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : switch (<#condition#>) {
   // CHECK-CC1: COMPLETION: t : t
   // CHECK-CC1-NEXT: COMPLETION: thread_local
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]true
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : try {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
@@ -66,7 +66,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: void
   // CHECK-CC1-NEXT: COMPLETION: volatile
   // CHECK-CC1-NEXT: COMPLETION: wchar_t
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : while (<#condition#>) {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: X : X
@@ -91,7 +91,7 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: inline
   // CHECK-CC2-NEXT: COMPLETION: int
   // CHECK-CC2-NEXT: COMPLETION: long
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#> {
   // CHECK-CC2-NEXT: <#declarations#>
   // CHECK-CC2-NEXT: }
   // CHECK-CC2: COMPLETION: Pattern : namespace <#name#> = <#namespace#>;
@@ -219,7 +219,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do {
   // CHECK-NO-RTTI-NEXT: <#statements#>
   // CHECK-NO-RTTI-NEXT: }
   // CHECK-NO-RTTI: COMPLETION: double
@@ -229,9 +229,9 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#bool#]false
   // CHECK-NO-RTTI-NEXT: COMPLETION: float
   // CHECK-NO-RTTI-NEXT: COMPLETION: foo : [#void#]foo()
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for (<#init-statement#>; <#condition#>; <#inc-expression#>) {
   // CHECK-NO-RTTI: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if (<#condition#>) {
   // CHECK-NO-RTTI-NEXT: <#statements#>
   // CHECK-NO-RTTI-NEXT: }
   // CHECK-NO-RTTI: COMPLETION: int
@@ -251,7 +251,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_assert(<#expression#>, <#message#>);
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI-NEXT: COMPLETION: struct
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch (<#condition#>) {
   // CHECK-NO-RTTI: COMPLETION: t : t
   // CHECK-NO-RTTI-NOT: throw
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#bool#]true
@@ -268,7 +268,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: void
   // CHECK-NO-RTTI-NEXT: COMPLETION: volatile
   // CHECK-NO-RTTI-NEXT: COMPLETION: wchar_t
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while (<#condition#>) {
   // CHECK-NO-RTTI: COMPLETION: X : X
   // CHECK-NO-RTTI-NEXT: COMPLETION: y : [#int#]y
   // CHECK-NO-RTTI-NEXT: COMPLETION: z : [#void#]z(<#int#>)
diff --git a/clang/test/CodeCompletion/ordinary-name.cpp b/clang/test/CodeCompletion/ordinary-name.cpp
index 5c700461a664b..99cb69093a5ee 100644
--- a/clang/test/CodeCompletion/ordinary-name.cpp
+++ b/clang/test/CodeCompletion/ordinary-name.cpp
@@ -12,7 +12,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-CC1: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : do {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: double
@@ -22,11 +22,11 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]false
   // CHECK-CC1-NEXT: COMPLETION: float
   // CHECK-CC1-NEXT: COMPLETION: foo : [#void#]foo()
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : for (<#init-statement#>; <#condition#>; <#inc-expression#>) {
   // CHECK-CC1-NEXT: <#statements#>{{$}}
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : if (<#condition#>) {
   // CHECK-CC1-NEXT: <#statements#>{{$}}
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: int
@@ -42,13 +42,13 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: static
   // CHECK-CC1-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-CC1-NEXT: COMPLETION: struct
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : switch (<#condition#>) {
   // CHECK-CC1: COMPLETION: t : t
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
   // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]true
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : try {
   // CHECK-CC1-NEXT: <#statements#>
-  // CHECK-CC1-NEXT: }catch(<#declaration#>){
+  // CHECK-CC1-NEXT: } catch (<#declaration#>) {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
@@ -63,7 +63,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: void
   // CHECK-CC1-NEXT: COMPLETION: volatile
   // CHECK-CC1-NEXT: COMPLETION: wchar_t
-  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : while (<#condition#>) {
   // CHECK-CC1-NEXT: <#statements#>
   // CHECK-CC1-NEXT: }
   // CHECK-CC1: COMPLETION: X : X
@@ -83,7 +83,7 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: inline
   // CHECK-CC2-NEXT: COMPLETION: int
   // CHECK-CC2-NEXT: COMPLETION: long
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#> {
   // CHECK-CC2-NEXT: <#declarations#>
   // CHECK-CC2-NEXT: }
   // CHECK-CC2: COMPLETION: Pattern : namespace <#name#> = <#namespace#>;
@@ -195,7 +195,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#void#]delete <#expression#>
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do {
   // CHECK-NO-RTTI: COMPLETION: double
   // CHECK-NO-RTTI-NOT: dynamic_cast
   // CHECK-NO-RTTI: COMPLETION: enum
@@ -203,9 +203,9 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#bool#]false
   // CHECK-NO-RTTI-NEXT: COMPLETION: float
   // CHECK-NO-RTTI-NEXT: COMPLETION: foo : [#void#]foo()
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for (<#init-statement#>; <#condition#>; <#inc-expression#>) {
   // CHECK-NO-RTTI: COMPLETION: Pattern : goto <#label#>;
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if (<#condition#>) {
   // CHECK-NO-RTTI: COMPLETION: int
   // CHECK-NO-RTTI-NEXT: COMPLETION: long
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
@@ -219,7 +219,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: static
   // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
   // CHECK-NO-RTTI-NEXT: COMPLETION: struct
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch (<#condition#>) {
   // CHECK-NO-RTTI: COMPLETION: t : t
   // CHECK-NO-RTTI-NOT: throw
   // CHECK-NO-RTTI: COMPLETION: Pattern : [#bool#]true
@@ -236,7 +236,7 @@ void foo() {
   // CHECK-NO-RTTI-NEXT: COMPLETION: void
   // CHECK-NO-RTTI-NEXT: COMPLETION: volatile
   // CHECK-NO-RTTI-NEXT: COMPLETION: wchar_t
-  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while (<#condition#>) {
   // CHECK-NO-RTTI: COMPLETION: X : X
   // CHECK-NO-RTTI-NEXT: COMPLETION: y : [#int#]y
   // CHECK-NO-RTTI-NEXT: COMPLETION: z : [#void#]z(<#int#>)

From bcd542881ddcfc6647d7c0892f7c8a6f4fdc5f49 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 3 Jun 2019 08:44:09 +0000
Subject: [PATCH 0882/1176] [NFC][X86] extract-{low,}bits.ll: one more pattern
 c with truncation

llvm-svn: 362364
---
 llvm/test/CodeGen/X86/extract-bits.ll    | 416 +++++++++++++++--------
 llvm/test/CodeGen/X86/extract-lowbits.ll | 223 +++++++-----
 2 files changed, 411 insertions(+), 228 deletions(-)

diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index ed0bdf3efc0e0..254e006b1ce30 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -6515,6 +6515,134 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
   ret i32 %truncmasked
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bextr64_32_c3:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:    shrl %cl, %edx
+; X86-NOBMI-NEXT:    shrdl %cl, %esi, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB50_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:  .LBB50_2:
+; X86-NOBMI-NEXT:    movb $64, %cl
+; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB50_4
+; X86-NOBMI-NEXT:  # %bb.3:
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:  .LBB50_4:
+; X86-NOBMI-NEXT:    andl %edx, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1NOTBM-LABEL: bextr64_32_c3:
+; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_2
+; X86-BMI1NOTBM-NEXT:  # %bb.1:
+; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
+; X86-BMI1NOTBM-NEXT:  .LBB50_2:
+; X86-BMI1NOTBM-NEXT:    movb $64, %cl
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
+; X86-BMI1NOTBM-NEXT:  # %bb.3:
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:  .LBB50_4:
+; X86-BMI1NOTBM-NEXT:    andl %edx, %eax
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    retl
+;
+; X86-BMI1BMI2-LABEL: bextr64_32_c3:
+; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    je .LBB50_2
+; X86-BMI1BMI2-NEXT:  # %bb.1:
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI1BMI2-NEXT:  .LBB50_2:
+; X86-BMI1BMI2-NEXT:    movb $64, %cl
+; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %esi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
+; X86-BMI1BMI2-NEXT:    jne .LBB50_4
+; X86-BMI1BMI2-NEXT:  # %bb.3:
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:  .LBB50_4:
+; X86-BMI1BMI2-NEXT:    andl %edx, %eax
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bextr64_32_c3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rdi
+; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bextr64_32_c3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
+; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bextr64_32_c3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rcx
+; X64-BMI1BMI2-NEXT:    negb %dl
+; X64-BMI1BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1BMI2-NEXT:    shrxq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    andl %ecx, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %shifted = lshr i64 %val, %numskipbits
+  %numhighbits = sub i64 64, %numlowbits
+  %mask = lshr i64 4294967295, %numhighbits
+  %masked = and i64 %mask, %shifted
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern d. 32-bit.
 ; ---------------------------------------------------------------------------- ;
@@ -6895,36 +7023,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB55_2
+; X86-NOBMI-NEXT:    je .LBB56_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB55_2:
+; X86-NOBMI-NEXT:  .LBB56_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB55_4
+; X86-NOBMI-NEXT:    jne .LBB56_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB55_4:
+; X86-NOBMI-NEXT:  .LBB56_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB55_6
+; X86-NOBMI-NEXT:    jne .LBB56_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB55_6:
+; X86-NOBMI-NEXT:  .LBB56_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB55_8
+; X86-NOBMI-NEXT:    jne .LBB56_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB55_8:
+; X86-NOBMI-NEXT:  .LBB56_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -6943,36 +7071,36 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB55_2
+; X86-BMI1NOTBM-NEXT:    je .LBB56_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_2:
+; X86-BMI1NOTBM-NEXT:  .LBB56_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB55_4:
+; X86-BMI1NOTBM-NEXT:  .LBB56_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB55_6:
+; X86-BMI1NOTBM-NEXT:  .LBB56_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB55_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB55_8:
+; X86-BMI1NOTBM-NEXT:  .LBB56_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -6989,32 +7117,32 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_2
+; X86-BMI1BMI2-NEXT:    je .LBB56_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB55_2:
+; X86-BMI1BMI2-NEXT:  .LBB56_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB55_4
+; X86-BMI1BMI2-NEXT:    je .LBB56_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB55_4:
+; X86-BMI1BMI2-NEXT:  .LBB56_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB55_6
+; X86-BMI1BMI2-NEXT:    jne .LBB56_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB55_6:
+; X86-BMI1BMI2-NEXT:  .LBB56_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB55_8
+; X86-BMI1BMI2-NEXT:    jne .LBB56_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB55_8:
+; X86-BMI1BMI2-NEXT:  .LBB56_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7065,36 +7193,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB56_2
+; X86-NOBMI-NEXT:    je .LBB57_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB56_2:
+; X86-NOBMI-NEXT:  .LBB57_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB56_4
+; X86-NOBMI-NEXT:    jne .LBB57_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB56_4:
+; X86-NOBMI-NEXT:  .LBB57_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB56_6
+; X86-NOBMI-NEXT:    jne .LBB57_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB56_6:
+; X86-NOBMI-NEXT:  .LBB57_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB56_8
+; X86-NOBMI-NEXT:    jne .LBB57_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB56_8:
+; X86-NOBMI-NEXT:  .LBB57_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7113,36 +7241,36 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB56_2
+; X86-BMI1NOTBM-NEXT:    je .LBB57_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_2:
+; X86-BMI1NOTBM-NEXT:  .LBB57_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB56_4:
+; X86-BMI1NOTBM-NEXT:  .LBB57_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB56_6:
+; X86-BMI1NOTBM-NEXT:  .LBB57_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_8:
+; X86-BMI1NOTBM-NEXT:  .LBB57_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7159,32 +7287,32 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_2
+; X86-BMI1BMI2-NEXT:    je .LBB57_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB56_2:
+; X86-BMI1BMI2-NEXT:  .LBB57_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_4
+; X86-BMI1BMI2-NEXT:    je .LBB57_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB56_4:
+; X86-BMI1BMI2-NEXT:  .LBB57_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB56_6
+; X86-BMI1BMI2-NEXT:    jne .LBB57_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB56_6:
+; X86-BMI1BMI2-NEXT:  .LBB57_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB56_8
+; X86-BMI1BMI2-NEXT:    jne .LBB57_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB56_8:
+; X86-BMI1BMI2-NEXT:  .LBB57_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7240,36 +7368,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB57_2
+; X86-NOBMI-NEXT:    je .LBB58_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB57_2:
+; X86-NOBMI-NEXT:  .LBB58_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB57_4
+; X86-NOBMI-NEXT:    jne .LBB58_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB57_4:
+; X86-NOBMI-NEXT:  .LBB58_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB57_6
+; X86-NOBMI-NEXT:    jne .LBB58_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB57_6:
+; X86-NOBMI-NEXT:  .LBB58_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB57_8
+; X86-NOBMI-NEXT:    jne .LBB58_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB57_8:
+; X86-NOBMI-NEXT:  .LBB58_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7289,36 +7417,36 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB57_2
+; X86-BMI1NOTBM-NEXT:    je .LBB58_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_2:
+; X86-BMI1NOTBM-NEXT:  .LBB58_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB57_4:
+; X86-BMI1NOTBM-NEXT:  .LBB58_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB57_6:
+; X86-BMI1NOTBM-NEXT:  .LBB58_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_8:
+; X86-BMI1NOTBM-NEXT:  .LBB58_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7336,32 +7464,32 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_2
+; X86-BMI1BMI2-NEXT:    je .LBB58_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB57_2:
+; X86-BMI1BMI2-NEXT:  .LBB58_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_4
+; X86-BMI1BMI2-NEXT:    je .LBB58_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB57_4:
+; X86-BMI1BMI2-NEXT:  .LBB58_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB57_6
+; X86-BMI1BMI2-NEXT:    jne .LBB58_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB57_6:
+; X86-BMI1BMI2-NEXT:  .LBB58_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB57_8
+; X86-BMI1BMI2-NEXT:    jne .LBB58_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB57_8:
+; X86-BMI1BMI2-NEXT:  .LBB58_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7414,36 +7542,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB58_2
+; X86-NOBMI-NEXT:    je .LBB59_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB58_2:
+; X86-NOBMI-NEXT:  .LBB59_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edi, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %edi, %ebx
-; X86-NOBMI-NEXT:    jne .LBB58_4
+; X86-NOBMI-NEXT:    jne .LBB59_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:  .LBB58_4:
+; X86-NOBMI-NEXT:  .LBB59_4:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB58_6
+; X86-NOBMI-NEXT:    jne .LBB59_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edi, %esi
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB58_6:
+; X86-NOBMI-NEXT:  .LBB59_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB58_8
+; X86-NOBMI-NEXT:    jne .LBB59_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:  .LBB58_8:
+; X86-NOBMI-NEXT:  .LBB59_8:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -7463,36 +7591,36 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB58_2
+; X86-BMI1NOTBM-NEXT:    je .LBB59_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_2:
+; X86-BMI1NOTBM-NEXT:  .LBB59_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB58_4:
+; X86-BMI1NOTBM-NEXT:  .LBB59_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB58_6:
+; X86-BMI1NOTBM-NEXT:  .LBB59_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB59_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_8:
+; X86-BMI1NOTBM-NEXT:  .LBB59_8:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -7510,32 +7638,32 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_2
+; X86-BMI1BMI2-NEXT:    je .LBB59_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB58_2:
+; X86-BMI1BMI2-NEXT:  .LBB59_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_4
+; X86-BMI1BMI2-NEXT:    je .LBB59_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB58_4:
+; X86-BMI1BMI2-NEXT:  .LBB59_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB58_6
+; X86-BMI1BMI2-NEXT:    jne .LBB59_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB58_6:
+; X86-BMI1BMI2-NEXT:  .LBB59_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB58_8
+; X86-BMI1BMI2-NEXT:    jne .LBB59_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB58_8:
+; X86-BMI1BMI2-NEXT:  .LBB59_8:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -7594,37 +7722,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %al
-; X86-NOBMI-NEXT:    je .LBB59_2
+; X86-NOBMI-NEXT:    je .LBB60_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:  .LBB59_2:
+; X86-NOBMI-NEXT:  .LBB60_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %ebx, %esi
 ; X86-NOBMI-NEXT:    shll %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %ebx, %ebp
-; X86-NOBMI-NEXT:    jne .LBB59_4
+; X86-NOBMI-NEXT:    jne .LBB60_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebp
-; X86-NOBMI-NEXT:  .LBB59_4:
+; X86-NOBMI-NEXT:  .LBB60_4:
 ; X86-NOBMI-NEXT:    movl %ebp, %esi
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edi
-; X86-NOBMI-NEXT:    jne .LBB59_6
+; X86-NOBMI-NEXT:    jne .LBB60_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %edx
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:  .LBB59_6:
+; X86-NOBMI-NEXT:  .LBB60_6:
 ; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    jne .LBB59_8
+; X86-NOBMI-NEXT:    jne .LBB60_8
 ; X86-NOBMI-NEXT:  # %bb.7:
 ; X86-NOBMI-NEXT:    movl %edx, %esi
-; X86-NOBMI-NEXT:  .LBB59_8:
+; X86-NOBMI-NEXT:  .LBB60_8:
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ecx
 ; X86-NOBMI-NEXT:    pushl %eax
@@ -7655,37 +7783,37 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB59_2
+; X86-BMI1NOTBM-NEXT:    je .LBB60_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB59_2:
+; X86-BMI1NOTBM-NEXT:  .LBB60_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %esi
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB60_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB59_4:
+; X86-BMI1NOTBM-NEXT:  .LBB60_4:
 ; X86-BMI1NOTBM-NEXT:    movl %ebp, %esi
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB60_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB59_6:
+; X86-BMI1NOTBM-NEXT:  .LBB60_6:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_8
+; X86-BMI1NOTBM-NEXT:    jne .LBB60_8
 ; X86-BMI1NOTBM-NEXT:  # %bb.7:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB59_8:
+; X86-BMI1NOTBM-NEXT:  .LBB60_8:
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ecx
 ; X86-BMI1NOTBM-NEXT:    pushl %eax
@@ -7713,33 +7841,33 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-BMI1BMI2-NEXT:    shrxl %eax, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB59_2
+; X86-BMI1BMI2-NEXT:    je .LBB60_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB59_2:
+; X86-BMI1BMI2-NEXT:  .LBB60_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edi, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_4
+; X86-BMI1BMI2-NEXT:    je .LBB60_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
 ; X86-BMI1BMI2-NEXT:    movl $0, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB59_4:
+; X86-BMI1BMI2-NEXT:  .LBB60_4:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    jne .LBB59_6
+; X86-BMI1BMI2-NEXT:    jne .LBB60_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB59_6:
+; X86-BMI1BMI2-NEXT:  .LBB60_6:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    jne .LBB59_8
+; X86-BMI1BMI2-NEXT:    jne .LBB60_8
 ; X86-BMI1BMI2-NEXT:  # %bb.7:
 ; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
-; X86-BMI1BMI2-NEXT:  .LBB59_8:
+; X86-BMI1BMI2-NEXT:  .LBB60_8:
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    pushl %ecx
 ; X86-BMI1BMI2-NEXT:    pushl %eax
@@ -7813,28 +7941,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB60_2
+; X86-NOBMI-NEXT:    je .LBB61_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB60_2:
+; X86-NOBMI-NEXT:  .LBB61_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB60_4
+; X86-NOBMI-NEXT:    je .LBB61_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB60_4:
+; X86-NOBMI-NEXT:  .LBB61_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB60_6
+; X86-NOBMI-NEXT:    jne .LBB61_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB60_6:
+; X86-NOBMI-NEXT:  .LBB61_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -7848,28 +7976,28 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB60_2
+; X86-BMI1NOTBM-NEXT:    je .LBB61_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB60_2:
+; X86-BMI1NOTBM-NEXT:  .LBB61_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB60_4
+; X86-BMI1NOTBM-NEXT:    je .LBB61_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB60_4:
+; X86-BMI1NOTBM-NEXT:  .LBB61_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB60_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB61_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB60_6:
+; X86-BMI1NOTBM-NEXT:  .LBB61_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -7881,27 +8009,27 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB60_2
+; X86-BMI1BMI2-NEXT:    je .LBB61_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB60_2:
+; X86-BMI1BMI2-NEXT:  .LBB61_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB60_4
+; X86-BMI1BMI2-NEXT:    je .LBB61_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB60_4:
+; X86-BMI1BMI2-NEXT:  .LBB61_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB60_6
+; X86-BMI1BMI2-NEXT:    je .LBB61_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB60_6:
+; X86-BMI1BMI2-NEXT:  .LBB61_6:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_d0:
@@ -7952,10 +8080,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB61_2
+; X86-NOBMI-NEXT:    jne .LBB62_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB61_2:
+; X86-NOBMI-NEXT:  .LBB62_2:
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
@@ -7976,10 +8104,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB61_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB62_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB61_2:
+; X86-BMI1NOTBM-NEXT:  .LBB62_2:
 ; X86-BMI1NOTBM-NEXT:    shll $8, %eax
 ; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    popl %esi
@@ -7995,10 +8123,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB61_2
+; X86-BMI1BMI2-NEXT:    je .LBB62_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB61_2:
+; X86-BMI1BMI2-NEXT:  .LBB62_2:
 ; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 6564486bd526e..7e65c1d5cc904 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -3630,6 +3630,61 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
   ret i32 %truncmasked
 }
 
+; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit.
+; Masking is 64-bit. Then truncation.
+define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
+; X86-LABEL: bzhi64_32_c3:
+; X86:       # %bb.0:
+; X86-NEXT:    movb $64, %cl
+; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    jne .LBB42_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB42_2:
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-NOBMI-LABEL: bzhi64_32_c3:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    negb %cl
+; X64-NOBMI-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1NOTBM-LABEL: bzhi64_32_c3:
+; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1NOTBM-NEXT:    retq
+;
+; X64-BMI1BMI2-LABEL: bzhi64_32_c3:
+; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1BMI2-NEXT:    retq
+  %numhighbits = sub i64 64, %numlowbits
+  %mask = lshr i64 4294967295, %numhighbits
+  %masked = and i64 %mask, %val
+  %truncmasked = trunc i64 %masked to i32
+  ret i32 %truncmasked
+}
+
 ; ---------------------------------------------------------------------------- ;
 ; Pattern d. 32-bit.
 ; ---------------------------------------------------------------------------- ;
@@ -3861,26 +3916,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB46_2
+; X86-NOBMI-NEXT:    jne .LBB47_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB46_2:
+; X86-NOBMI-NEXT:  .LBB47_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB46_4
+; X86-NOBMI-NEXT:    jne .LBB47_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB46_4:
+; X86-NOBMI-NEXT:  .LBB47_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB46_6
+; X86-NOBMI-NEXT:    jne .LBB47_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB46_6:
+; X86-NOBMI-NEXT:  .LBB47_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3900,26 +3955,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
+; X86-BMI1NOTBM-NEXT:  .LBB47_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB46_4:
+; X86-BMI1NOTBM-NEXT:  .LBB47_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB46_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB46_6:
+; X86-BMI1NOTBM-NEXT:  .LBB47_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -3937,22 +3992,22 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
+; X86-BMI1BMI2-NEXT:    je .LBB47_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
+; X86-BMI1BMI2-NEXT:  .LBB47_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB46_4
+; X86-BMI1BMI2-NEXT:    jne .LBB47_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB46_4:
+; X86-BMI1BMI2-NEXT:  .LBB47_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB46_6
+; X86-BMI1BMI2-NEXT:    jne .LBB47_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB46_6:
+; X86-BMI1BMI2-NEXT:  .LBB47_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -3998,26 +4053,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB47_4
+; X86-NOBMI-NEXT:    jne .LBB48_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB47_4:
+; X86-NOBMI-NEXT:  .LBB48_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_6
+; X86-NOBMI-NEXT:    jne .LBB48_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB47_6:
+; X86-NOBMI-NEXT:  .LBB48_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4037,26 +4092,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
+; X86-BMI1NOTBM-NEXT:  .LBB48_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_4:
+; X86-BMI1NOTBM-NEXT:  .LBB48_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB47_6:
+; X86-BMI1NOTBM-NEXT:  .LBB48_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4074,22 +4129,22 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
+; X86-BMI1BMI2-NEXT:    je .LBB48_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
+; X86-BMI1BMI2-NEXT:  .LBB48_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB47_4
+; X86-BMI1BMI2-NEXT:    jne .LBB48_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_4:
+; X86-BMI1BMI2-NEXT:  .LBB48_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB47_6
+; X86-BMI1BMI2-NEXT:    jne .LBB48_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB47_6:
+; X86-BMI1BMI2-NEXT:  .LBB48_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4139,26 +4194,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB48_2
+; X86-NOBMI-NEXT:    jne .LBB49_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:  .LBB49_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB48_4
+; X86-NOBMI-NEXT:    jne .LBB49_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB48_4:
+; X86-NOBMI-NEXT:  .LBB49_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB48_6
+; X86-NOBMI-NEXT:    jne .LBB49_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB48_6:
+; X86-NOBMI-NEXT:  .LBB49_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4179,26 +4234,26 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
+; X86-BMI1NOTBM-NEXT:  .LBB49_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_4:
+; X86-BMI1NOTBM-NEXT:  .LBB49_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB49_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB48_6:
+; X86-BMI1NOTBM-NEXT:  .LBB49_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4217,22 +4272,22 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
+; X86-BMI1BMI2-NEXT:    je .LBB49_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
+; X86-BMI1BMI2-NEXT:  .LBB49_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB48_4
+; X86-BMI1BMI2-NEXT:    jne .LBB49_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB48_4:
+; X86-BMI1BMI2-NEXT:  .LBB49_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB48_6
+; X86-BMI1BMI2-NEXT:    jne .LBB49_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB48_6:
+; X86-BMI1BMI2-NEXT:  .LBB49_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4280,26 +4335,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB49_2
+; X86-NOBMI-NEXT:    jne .LBB50_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB49_2:
+; X86-NOBMI-NEXT:  .LBB50_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB49_4
+; X86-NOBMI-NEXT:    jne .LBB50_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB49_4:
+; X86-NOBMI-NEXT:  .LBB50_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB49_6
+; X86-NOBMI-NEXT:    jne .LBB50_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB49_6:
+; X86-NOBMI-NEXT:  .LBB50_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -4320,26 +4375,26 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB49_2:
+; X86-BMI1NOTBM-NEXT:  .LBB50_2:
 ; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
 ; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB49_4:
+; X86-BMI1NOTBM-NEXT:  .LBB50_4:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_6
+; X86-BMI1NOTBM-NEXT:    jne .LBB50_6
 ; X86-BMI1NOTBM-NEXT:  # %bb.5:
 ; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB49_6:
+; X86-BMI1NOTBM-NEXT:  .LBB50_6:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    popl %ebx
@@ -4358,22 +4413,22 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB49_2
+; X86-BMI1BMI2-NEXT:    je .LBB50_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB49_2:
+; X86-BMI1BMI2-NEXT:  .LBB50_2:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB49_4
+; X86-BMI1BMI2-NEXT:    jne .LBB50_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB49_4:
+; X86-BMI1BMI2-NEXT:  .LBB50_4:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB49_6
+; X86-BMI1BMI2-NEXT:    jne .LBB50_6
 ; X86-BMI1BMI2-NEXT:  # %bb.5:
 ; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB49_6:
+; X86-BMI1BMI2-NEXT:  .LBB50_6:
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
@@ -4423,18 +4478,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB50_2
+; X86-NOBMI-NEXT:    je .LBB51_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB50_2:
+; X86-NOBMI-NEXT:  .LBB51_2:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB50_4
+; X86-NOBMI-NEXT:    jne .LBB51_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB50_4:
+; X86-NOBMI-NEXT:  .LBB51_4:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -4449,18 +4504,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
 ; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB50_2
+; X86-BMI1NOTBM-NEXT:    je .LBB51_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
 ; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB50_2:
+; X86-BMI1NOTBM-NEXT:  .LBB51_2:
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
+; X86-BMI1NOTBM-NEXT:    jne .LBB51_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
 ; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB50_4:
+; X86-BMI1NOTBM-NEXT:  .LBB51_4:
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
@@ -4473,17 +4528,17 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB50_2
+; X86-BMI1BMI2-NEXT:    je .LBB51_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
 ; X86-BMI1BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB50_2:
+; X86-BMI1BMI2-NEXT:  .LBB51_2:
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB50_4
+; X86-BMI1BMI2-NEXT:    je .LBB51_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
 ; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB50_4:
+; X86-BMI1BMI2-NEXT:  .LBB51_4:
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d0:

From a0bd6f8a1ae73887fc32b85cd44e85435310f9d3 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Mon, 3 Jun 2019 08:49:17 +0000
Subject: [PATCH 0883/1176] [AArch64] Check for simple type in FPToUInt

DAGCombiner was hitting a SimpleType assertion when trying to combine
a v3f32 before type legalization.

bugzilla: https://bugs.llvm.org/show_bug.cgi?id=41916

Differential Revision: https://reviews.llvm.org/D62734

llvm-svn: 362365
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp |  3 +++
 llvm/test/CodeGen/AArch64/v3f-to-int.ll         | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/v3f-to-int.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d8e52929ffb3c..ba8bbd251597a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9206,6 +9206,9 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget->hasNEON())
     return SDValue();
 
+  if (!N->getValueType(0).isSimple())
+    return SDValue();
+
   SDValue Op = N->getOperand(0);
   if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
       Op.getOpcode() != ISD::FMUL)
diff --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
new file mode 100644
index 0000000000000..9c9dd5ed7e98e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64--linux-eabi %s -o - | FileCheck %s
+
+; CHECK-LABEL: convert_v3f32
+; CHECK: strb
+; CHECK: strh
+define void @convert_v3f32() {
+entry:
+  br label %bb
+
+bb:
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %1 = fmul reassoc nnan ninf nsz contract afn <3 x float> %0, <float 2.550000e+02, float 2.550000e+02, float 2.550000e+02>
+  %2 = fptoui <3 x float> %1 to <3 x i8>
+  %3 = bitcast i8* undef to <3 x i8>*
+  store <3 x i8> %2, <3 x i8>* %3, align 1
+  ret void
+}

From df92f841105e23be1f2c3fe6bcc543d53cbf7576 Mon Sep 17 00:00:00 2001
From: "Diogo N. Sampaio" <diogo.sampaio@arm.com>
Date: Mon, 3 Jun 2019 08:58:05 +0000
Subject: [PATCH 0884/1176] [ARM][FIX] Ran out of registers due tail recursion

Summary:
- pr42062
When compiling for MinSize,
ARMTargetLowering::LowerCall decides to indirect
multiple calls to a same function. However,
it disconsiders the limitation that thumb1
indirect calls require the callee to be in a
register from r0 to r3 (llvm limiation).
If all those registers are used by arguments, the
compiler dies with "error: run out of registers
during register allocation".
This patch tells the function
IsEligibleForTailCallOptimization if we intend to
perform indirect calls, as to avoid tail call
optimization.

Reviewers: dmgreen, efriedma

Reviewed By: efriedma

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62683

llvm-svn: 362366
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 77 ++++++++++++-------------
 llvm/lib/Target/ARM/ARMISelLowering.h   | 16 +++--
 llvm/test/CodeGen/ARM/pr42062.ll        | 38 ++++++++++++
 3 files changed, 82 insertions(+), 49 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/pr42062.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9231ad20aa3ab..ad84f036f241a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1832,29 +1832,40 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool isThisReturn   = false;
-  bool isSibCall      = false;
+  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool isThisReturn = false;
   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
+  bool PreferIndirect = false;
 
   // Disable tail calls if they're not supported.
   if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
     isTailCall = false;
 
+  if (isa<GlobalAddressSDNode>(Callee)) {
+    // If we're optimizing for minimum size and the function is called three or
+    // more times in this block, we can improve codesize by calling indirectly
+    // as BLXr has a 16-bit encoding.
+    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+    auto *BB = CLI.CS.getParent();
+    PreferIndirect =
+        Subtarget->isThumb() && Subtarget->hasMinSize() &&
+        count_if(GV->users(), [&BB](const User *U) {
+          return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
+        }) > 2;
+  }
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
-                                                   Outs, OutVals, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(
+        Callee, CallConv, isVarArg, isStructRet,
+        MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
+        PreferIndirect);
     if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
       report_fatal_error("failed to perform tail call elimination on a call "
                          "site marked musttail");
     // We don't support GuaranteedTailCallOpt for ARM, only automatically
     // detected sibcalls.
-    if (isTailCall) {
+    if (isTailCall)
       ++NumTailCalls;
-      isSibCall = true;
-    }
   }
 
   // Analyze operands of the call, assigning locations to each operand.
@@ -1866,14 +1877,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
-  // For tail calls, memory operands are available in our caller's stack.
-  if (isSibCall)
+  if (isTailCall) {
+    // For tail calls, memory operands are available in our caller's stack.
     NumBytes = 0;
-
-  // Adjust the stack pointer for the new arguments...
-  // These operations are automatically eliminated by the prolog/epilog pass
-  if (!isSibCall)
+  } else {
+    // Adjust the stack pointer for the new arguments...
+    // These operations are automatically eliminated by the prolog/epilog pass
     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  }
 
   SDValue StackPtr =
       DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@@ -1995,7 +2006,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops));
       }
-    } else if (!isSibCall) {
+    } else if (!isTailCall) {
       assert(VA.isMemLoc());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -2067,17 +2078,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
     }
   } else if (isa<GlobalAddressSDNode>(Callee)) {
-    // If we're optimizing for minimum size and the function is called three or
-    // more times in this block, we can improve codesize by calling indirectly
-    // as BLXr has a 16-bit encoding.
-    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
-    auto *BB = CLI.CS.getParent();
-    bool PreferIndirect =
-        Subtarget->isThumb() && Subtarget->hasMinSize() &&
-        count_if(GV->users(), [&BB](const User *U) {
-          return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
-        }) > 2;
-
     if (!PreferIndirect) {
       isDirect = true;
       bool isDef = GV->isStrongDefinitionForLinker();
@@ -2309,28 +2309,25 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
-bool
-ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
-                                                     CallingConv::ID CalleeCC,
-                                                     bool isVarArg,
-                                                     bool isCalleeStructRet,
-                                                     bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                                     SelectionDAG& DAG) const {
+bool ARMTargetLowering::IsEligibleForTailCallOptimization(
+    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+    bool isCalleeStructRet, bool isCallerStructRet,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+    const bool isIndirect) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const Function &CallerF = MF.getFunction();
   CallingConv::ID CallerCC = CallerF.getCallingConv();
 
   assert(Subtarget->supportsTailCall());
 
-  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
+  // Indirect tail calls cannot be optimized for Thumb1 if the args
   // to the call take up r0-r3. The reason is that there are no legal registers
   // left to hold the pointer to the function to be called.
   if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
-      !isa<GlobalAddressSDNode>(Callee.getNode()))
-      return false;
+      (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
+    return false;
 
   // Look for obvious safe cases to perform tail call optimization that do not
   // require ABI changes. This is what gcc calls sibcall.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8e254d75b1c30..c61135ff69ae2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -763,15 +763,13 @@ class VectorType;
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
-    bool IsEligibleForTailCallOptimization(SDValue Callee,
-                                           CallingConv::ID CalleeCC,
-                                           bool isVarArg,
-                                           bool isCalleeStructRet,
-                                           bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                           SelectionDAG& DAG) const;
+    bool IsEligibleForTailCallOptimization(
+        SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+        bool isCalleeStructRet, bool isCallerStructRet,
+        const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+        const bool isIndirect) const;
 
     bool CanLowerReturn(CallingConv::ID CallConv,
                         MachineFunction &MF, bool isVarArg,
diff --git a/llvm/test/CodeGen/ARM/pr42062.ll b/llvm/test/CodeGen/ARM/pr42062.ll
new file mode 100644
index 0000000000000..612c9d67f40d1
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr42062.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error
+target triple = "thumbv8m.base-arm-none-eabi"
+@foo = external global i8
+declare i32 @bar(i8* nocapture, i32, i32, i8* nocapture)
+
+define void @food(i8* %a) #0 {
+; CHECK-LABEL: food:
+; CHECK:    mov [[ARG0:r[4-7]]], r0
+; CHECK-NEXT:    movs r1, #8
+; CHECK-NEXT:    movs r2, #1
+; CHECK-NEXT:    ldr [[FOO_R:r[4-7]]], [[FOO_ADDR:\..*]]
+; CHECK-NEXT:    ldr [[BAR_R:r[4-7]]], [[BAR_ADDR:\..*]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    movs r1, #9
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    mov r0, [[ARG0]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    movs r1, #7
+; CHECK-NEXT:    movs r2, #2
+; CHECK-NEXT:    mov r0, [[ARG0]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK:         [[FOO_ADDR]]:
+; CHECK-NEXT:    .long foo
+; CHECK:         [[BAR_ADDR]]:
+; CHECK-NEXT:    .long bar
+entry:
+  %0 = tail call i32 @bar(i8* %a, i32 8, i32 1, i8* nonnull @foo)
+  %1 = tail call i32 @bar(i8* %a, i32 9, i32 0, i8* nonnull @foo)
+  %2 = tail call i32 @bar(i8* %a, i32 7, i32 2, i8* nonnull @foo)
+  ret void
+}
+attributes #0 = { minsize "target-cpu"="cortex-m23" }
+

From 3b20ae6c54d8303134db918ae4cee54075f29475 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 3 Jun 2019 09:23:01 +0000
Subject: [PATCH 0885/1176] [LLD][ELF] - Remove dead code. NFC.

I believe this line was dead after r362356.

llvm-svn: 362367
---
 lld/ELF/LinkerScript.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 3f68e6eff87c6..71c8ff2497c04 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -481,7 +481,6 @@ void LinkerScript::processSectionCommands() {
       if (Sec->Name == "/DISCARD/") {
         discard(V);
         Sec->SectionCommands.clear();
-        Sec->SectionIndex = 0; // Not an orphan.
         continue;
       }
 

From 3c837201e034a7499267ade6fef3715a10132312 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 09:29:51 +0000
Subject: [PATCH 0886/1176] Include what you use in BPFMCTargetDesc.cpp

BPFMCTargetDesc.cpp was not using any APIs from BPF.h.  Doing so is
problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
BPF target library and the MCTargetDesc library).

llvm-svn: 362368
---
 llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 38f3931c3ad50..fa27b335f3a18 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/BPFMCTargetDesc.h"
-#include "BPF.h"
 #include "MCTargetDesc/BPFInstPrinter.h"
 #include "MCTargetDesc/BPFMCAsmInfo.h"
 #include "TargetInfo/BPFTargetInfo.h"

From c5327ab71dc64008f701cf38916532970e4e2802 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 09:33:48 +0000
Subject: [PATCH 0887/1176] Include what you use in HexagonShuffler.h

HexagonShuffler.h was not using any APIs from Hexagon.h, and was only
including it for transitive dependencies.  Doing so is problematic from
include-what-you-use perspective, but it is also a layering issue (it
creates a dependency cycle between the primary Hexagon target library
and the MCTargetDesc library).

llvm-svn: 362369
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index fb940db33b2ce..bf3bad36dfe55 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
 #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"

From 301f8fd6327932f0abc03da252aa294bdf6be6df Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 09:38:48 +0000
Subject: [PATCH 0888/1176] Include what you use in HexagonAsmParser.cpp

HexagonAsmParser.cpp was not using any APIs from Hexagon.h.  Doing so is
problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the AsmParser library).

llvm-svn: 362370
---
 llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 9a7900ccc2fe2..0881bf841f901 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -8,7 +8,6 @@
 
 #define DEBUG_TYPE "mcasmparser"
 
-#include "Hexagon.h"
 #include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCELFStreamer.h"

From 79a222fcf8c40a8a90e76118e2412e8697d02a89 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Mon, 3 Jun 2019 09:39:11 +0000
Subject: [PATCH 0889/1176] [OpenCL] Declare builtin functions using TableGen

This patch adds a `-fdeclare-opencl-builtins` command line option to
the clang frontend.  This enables clang to verify OpenCL C builtin
function declarations using a fast StringMatcher lookup, instead of
including the opencl-c.h file with the `-finclude-default-header`
option.  This avoids the large parse time penalty of the header file.

This commit only adds the basic infrastructure and some of the OpenCL
builtins.  It does not cover all builtins defined by the various OpenCL
specifications.  As such, it is not a replacement for
`-finclude-default-header` yet.

RFC: http://lists.llvm.org/pipermail/cfe-dev/2018-November/060041.html

Co-authored-by: Pierre Gondois
Co-authored-by: Joey Gouly
Co-authored-by: Sven van Haastregt

Differential Revision: https://reviews.llvm.org/D60763

llvm-svn: 362371
---
 clang/include/clang/Basic/CMakeLists.txt      |   6 +
 clang/include/clang/Basic/LangOptions.def     |   1 +
 clang/include/clang/Basic/OpenCLBuiltins.td   | 296 ++++++++++++++++
 clang/include/clang/Driver/CC1Options.td      |   4 +-
 clang/lib/Frontend/CompilerInvocation.cpp     |   3 +-
 clang/lib/Sema/SemaLookup.cpp                 |  84 +++++
 .../SemaOpenCL/fdeclare-opencl-builtins.cl    |  24 ++
 clang/utils/TableGen/CMakeLists.txt           |   1 +
 .../TableGen/ClangOpenCLBuiltinEmitter.cpp    | 318 ++++++++++++++++++
 clang/utils/TableGen/TableGen.cpp             |   6 +
 clang/utils/TableGen/TableGenBackends.h       |   3 +
 11 files changed, 744 insertions(+), 2 deletions(-)
 create mode 100644 clang/include/clang/Basic/OpenCLBuiltins.td
 create mode 100644 clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
 create mode 100644 clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp

diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 15bed5adec9e1..e26e683b9aba6 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -41,6 +41,12 @@ clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl
   TARGET ClangAttrHasAttributeImpl
   )
 
+clang_tablegen(OpenCLBuiltins.inc
+  -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ -gen-clang-opencl-builtins
+  SOURCE OpenCLBuiltins.td
+  TARGET ClangOpenCLBuiltinsImpl
+  )
+
 # ARM NEON
 clang_tablegen(arm_neon.inc -gen-arm-neon-sema
   SOURCE arm_neon.td
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 359075717f5d7..ae3aeb4701930 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -256,6 +256,7 @@ LANGOPT(CFProtectionBranch , 1, 0, "Control-Flow Branch Protection enabled")
 LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map")
 ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode")
 LANGOPT(IncludeDefaultHeader, 1, 0, "Include default header file for OpenCL")
+LANGOPT(DeclareOpenCLBuiltins, 1, 0, "Declare OpenCL builtin functions")
 BENIGN_LANGOPT(DelayedTemplateParsing , 1, 0, "delayed template parsing")
 LANGOPT(BlocksRuntimeOptional , 1, 0, "optional blocks runtime")
 LANGOPT(
diff --git a/clang/include/clang/Basic/OpenCLBuiltins.td b/clang/include/clang/Basic/OpenCLBuiltins.td
new file mode 100644
index 0000000000000..7e37e55dbafab
--- /dev/null
+++ b/clang/include/clang/Basic/OpenCLBuiltins.td
@@ -0,0 +1,296 @@
+//==--- OpenCLBuiltins.td - OpenCL builtin declarations -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains TableGen definitions for OpenCL builtin function
+// declarations.  In case of an unresolved function name in OpenCL, Clang will
+// check for a function described in this file when -fdeclare-opencl-builtins
+// is specified.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//              Definitions of miscellaneous basic entities.
+//===----------------------------------------------------------------------===//
+// Versions of OpenCL
+class Version<int _Version> {
+  int Version = _Version;
+}
+def CL10: Version<100>;
+def CL11: Version<110>;
+def CL12: Version<120>;
+def CL20: Version<200>;
+
+// Address spaces
+// Pointer types need to be assigned an address space.
+class AddressSpace<string _AS> {
+  string AddrSpace = _AS;
+}
+def default_as    : AddressSpace<"clang::LangAS::Default">;
+def private_as    : AddressSpace<"clang::LangAS::opencl_private">;
+def global_as     : AddressSpace<"clang::LangAS::opencl_global">;
+def constant_as   : AddressSpace<"clang::LangAS::opencl_constant">;
+def local_as      : AddressSpace<"clang::LangAS::opencl_local">;
+def generic_as    : AddressSpace<"clang::LangAS::opencl_generic">;
+
+
+// Qualified Type. Allow to retrieve one ASTContext QualType.
+class QualType<string _Name> {
+  // Name of the field or function in a clang::ASTContext
+  // E.g. Name="IntTy" for the int type, and "getIntPtrType()" for an intptr_t
+  string Name = _Name;
+}
+
+// Helper class to store type access qualifiers (volatile, const, ...).
+class Qualifier<string _QualName> {
+  string QualName = _QualName;
+}
+
+//===----------------------------------------------------------------------===//
+//                      OpenCL C classes for types
+//===----------------------------------------------------------------------===//
+// OpenCL types (int, float, ...)
+class Type<string _Name, QualType _QTName> {
+  // Name of the Type
+  string Name = _Name;
+  // QualType associated with this type
+  QualType QTName = _QTName;
+  // Size of the vector (if applicable)
+  int VecWidth = 0;
+  // Is pointer
+  bit IsPointer = 0;
+  // List of qualifiers associated with the type (volatile, ...)
+  list<Qualifier> QualList = [];
+  // Address space
+  string AddrSpace = "clang::LangAS::Default";
+  // Access qualifier. Must be one of ("RO", "WO", "RW").
+  string AccessQualifier = "";
+}
+
+// OpenCL vector types (e.g. int2, int3, int16, float8, ...)
+class VectorType<Type _Ty, int _VecWidth> : Type<_Ty.Name, _Ty.QTName> {
+  int VecWidth = _VecWidth;
+}
+
+// OpenCL pointer types (e.g. int*, float*, ...)
+class PointerType<Type _Ty, AddressSpace _AS = global_as> :
+                                      Type<_Ty.Name, _Ty.QTName> {
+  bit IsPointer = 1;
+  string AddrSpace = _AS.AddrSpace;
+}
+
+// OpenCL image types (e.g. image2d_t, ...)
+class ImageType<Type _Ty, QualType _QTName, string _AccessQualifier> :
+                                              Type<_Ty.Name, _QTName> {
+  let AccessQualifier = _AccessQualifier;
+}
+
+//===----------------------------------------------------------------------===//
+//                      OpenCL C class for builtin functions
+//===----------------------------------------------------------------------===//
+class Builtin<string _Name, list<Type> _Signature> {
+  // Name of the builtin function
+  string Name = _Name;
+  // List of types used by the function. The first one is the return type and
+  // the following are the arguments. The list must have at least one element
+  // (the return type).
+  list<Type> Signature = _Signature;
+  // OpenCL Extension to which the function belongs (cl_khr_subgroups, ...)
+  string Extension = "";
+  // OpenCL Version to which the function belongs (CL10, ...)
+  Version Version = CL10;
+}
+
+//===----------------------------------------------------------------------===//
+//                           Multiclass definitions
+//===----------------------------------------------------------------------===//
+// multiclass BifN: Creates Builtin class instances for OpenCL builtin
+//                  functions with N arguments.
+// _Name      : Name of the function
+// _Signature : Signature of the function (list of the Type used by the
+//              function, the first one being the return type).
+// _IsVector  : List of bit indicating if the type in the _Signature at the
+//              same index is to be a vector in the multiple overloads. The
+//              list must have at least one non-zero value.
+multiclass Bif0<string _Name, list<Type> _Signature, list<bit> _IsVector> {
+  def : Builtin<_Name, _Signature>;
+  foreach v = [2, 3, 4, 8, 16] in {
+    def : Builtin<_Name,
+                  [!if(_IsVector[0], VectorType<_Signature[0], v>, _Signature[0])]>;
+  }
+}
+multiclass Bif1<string _Name, list<Type> _Signature, list<bit> _IsVector> {
+  def : Builtin<_Name, _Signature>;
+  foreach v = [2, 3, 4, 8, 16] in {
+    def : Builtin<_Name,
+                  [!if(_IsVector[0], VectorType<_Signature[0], v>, _Signature[0]),
+                  !if(_IsVector[1], VectorType<_Signature[1], v>, _Signature[1])]>;
+  }
+}
+multiclass Bif2<string _Name, list<Type> _Signature, list<bit> _IsVector> {
+  def : Builtin<_Name, _Signature>;
+  foreach v = [2, 3, 4, 8, 16] in {
+    def : Builtin<_Name,
+                  [!if(_IsVector[0], VectorType<_Signature[0], v>, _Signature[0]),
+                  !if(_IsVector[1], VectorType<_Signature[1], v>, _Signature[1]),
+                  !if(_IsVector[2], VectorType<_Signature[2], v>, _Signature[2])]>;
+  }
+}
+multiclass Bif3<string _Name, list<Type> _Signature, list<bit> _IsVector> {
+  def : Builtin<_Name, _Signature>;
+  foreach v = [2, 3, 4, 8, 16] in {
+    def : Builtin<_Name,
+                  [!if(_IsVector[0], VectorType<_Signature[0], v>, _Signature[0]),
+                  !if(_IsVector[1], VectorType<_Signature[1], v>, _Signature[1]),
+                  !if(_IsVector[2], VectorType<_Signature[2], v>, _Signature[2]),
+                  !if(_IsVector[3], VectorType<_Signature[3], v>, _Signature[3])]>;
+  }
+}
+//===----------------------------------------------------------------------===//
+//                 Definitions of OpenCL C types
+//===----------------------------------------------------------------------===//
+// OpenCL v1.2 s6.1.1: Built-in Scalar Data Types
+def bool_t      : Type<"bool", QualType<"BoolTy">>;
+def char_t      : Type<"char", QualType<"CharTy">>;
+def uchar_t     : Type<"uchar", QualType<"UnsignedCharTy">>;
+def short_t     : Type<"short", QualType<"ShortTy">>;
+def ushort_t    : Type<"ushort", QualType<"UnsignedShortTy">>;
+def int_t       : Type<"int", QualType<"IntTy">>;
+def uint_t      : Type<"uint", QualType<"UnsignedIntTy">>;
+def long_t      : Type<"long", QualType<"LongTy">>;
+def ulong_t     : Type<"ulong", QualType<"UnsignedLongTy">>;
+def float_t     : Type<"float", QualType<"FloatTy">>;
+def double_t    : Type<"double", QualType<"DoubleTy">>;
+def half_t      : Type<"half", QualType<"HalfTy">>;
+def size_t      : Type<"size_t",  QualType<"getSizeType()">>;
+def ptrdiff_t   : Type<"ptrdiff_t", QualType<"getPointerDiffType()">>;
+def intptr_t    : Type<"intptr_t", QualType<"getIntPtrType()">>;
+def uintptr_t   : Type<"uintptr_t", QualType<"getUIntPtrType()">>;
+def void_t      : Type<"void", QualType<"VoidTy">>;
+
+// OpenCL v1.2 s6.1.2: Built-in Vector Data Types
+foreach v = [2, 3, 4, 8, 16] in {
+  def char#v#_t    : VectorType<char_t, v>;
+  def uchar#v#_t   : VectorType<uchar_t, v>;
+  def short#v#_t   : VectorType<short_t, v>;
+  def ushort#v#_t  : VectorType<ushort_t, v>;
+  def "int"#v#_t   : VectorType<int_t, v>;
+  def uint#v#_t    : VectorType<uint_t, v>;
+  def long#v#_t    : VectorType<long_t, v>;
+  def ulong#v#_t   : VectorType<ulong_t, v>;
+  def float#v#_t   : VectorType<float_t, v>;
+  def double#v#_t  : VectorType<double_t, v>;
+  def half#v#_t    : VectorType<half_t, v>;
+}
+
+// OpenCL v1.2 s6.1.3: Other Built-in Data Types
+// These definitions with a "null" name are "abstract". They should not
+// be used in definitions of Builtin functions.
+def image2d_t         : Type<"image2d_t", QualType<"null">>;
+def image3d_t         : Type<"image3d_t", QualType<"null">>;
+def image2d_array_t   : Type<"image2d_array_t", QualType<"null">>;
+def image1d_t         : Type<"image1d_t", QualType<"null">>;
+def image1d_buffer_t  : Type<"image1d_buffer_t", QualType<"null">>;
+def image1d_array_t   : Type<"image1d_array_t", QualType<"null">>;
+// Unlike the few functions above, the following definitions can be used
+// in definitions of Builtin functions (they have a QualType with a name).
+foreach v = ["RO", "WO", "RW"] in {
+  def image2d_#v#_t       : ImageType<image2d_t,
+                                      QualType<"OCLImage2d"#v#"Ty">,
+                                      v>;
+  def image3d_#v#_t       : ImageType<image3d_t,
+                                      QualType<"OCLImage3d"#v#"Ty">,
+                                      v>;
+  def image2d_array#v#_t  : ImageType<image2d_array_t,
+                                      QualType<"OCLImage2dArray"#v#"Ty">,
+                                      v>;
+  def image1d_#v#_t       : ImageType<image1d_t,
+                                      QualType<"OCLImage1d"#v#"Ty">,
+                                      v>;
+  def image1d_buffer#v#_t : ImageType<image1d_buffer_t,
+                                      QualType<"OCLImage1dBuffer"#v#"Ty">,
+                                      v>;
+  def image1d_array#v#_t  : ImageType<image1d_array_t,
+                                      QualType<"OCLImage1dArray"#v#"Ty">,
+                                      v>;
+}
+
+def sampler_t         : Type<"sampler_t", QualType<"OCLSamplerTy">>;
+def event_t           : Type<"event_t", QualType<"OCLEventTy">>;
+
+//===----------------------------------------------------------------------===//
+//                 Definitions of OpenCL builtin functions
+//===----------------------------------------------------------------------===//
+// OpenCL v1.2 s6.2.3: Explicit Conversions
+// Generate the convert_ builtins.
+foreach RType = [float_t, double_t, char_t, uchar_t, short_t, ushort_t,
+                int_t, uint_t, long_t, ulong_t] in {
+  foreach IType = [float_t, double_t, char_t, uchar_t, short_t, ushort_t,
+                   int_t, uint_t, long_t, ulong_t] in {
+    foreach sat = ["", "_sat"] in {
+      foreach rte = ["", "_rte", "_rtz", "_rtp", "_rtn"] in {
+        def : Builtin<"convert_"  # RType.Name # sat # rte, [RType, IType]>;
+        foreach v = [2, 3, 4, 8, 16] in {
+          def : Builtin<"convert_" # RType.Name # v # sat # rte,
+                        [VectorType<RType, v>,
+                         VectorType<IType, v>]>;
+        }
+      }
+    }
+  }
+}
+
+// OpenCL v1.2 s6.12.1: Work-Item Functions
+def get_work_dim : Builtin<"get_work_dim", [uint_t]>;
+foreach name = ["get_global_size", "get_global_id", "get_local_size",
+                "get_local_id", "get_num_groups", "get_group_id",
+                "get_global_offset"] in {
+  def : Builtin<name, [size_t, uint_t]>;
+}
+
+// OpenCL v1.2 s6.12.2: Math Functions
+foreach name = ["acos", "acosh", "acospi",
+                "asin", "asinh", "asinpi",
+                "atan", "atanh", "atanpi"] in {
+  foreach type = [float_t, double_t, half_t] in {
+    defm : Bif1<name, [type, type], [1, 1]>;
+  }
+}
+
+foreach name = ["atan2", "atan2pi"] in {
+  foreach type = [float_t, double_t, half_t] in {
+    defm : Bif2<name, [type, type, type], [1, 1, 1]>;
+  }
+}
+
+foreach name = ["fmax", "fmin"] in {
+  foreach type = [float_t, double_t, half_t] in {
+    defm : Bif2<name, [type, type, type], [1, 1, 1]>;
+    defm : Bif2<name, [type, type, type], [1, 1, 0]>;
+  }
+}
+
+// OpenCL v1.2 s6.12.14: Built-in Image Read Functions
+def read_imagef : Builtin<"read_imagef",
+                          [float4_t, image2d_RO_t, VectorType<int_t, 2>]>;
+def write_imagef : Builtin<"write_imagef",
+                           [void_t,
+                            image2d_WO_t,
+                            VectorType<int_t, 2>,
+                            VectorType<float_t, 4>]>;
+
+
+// OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions
+let Version = CL20 in {
+  let Extension = "cl_khr_subgroups" in {
+    def get_sub_group_size : Builtin<"get_sub_group_size", [uint_t]>;
+    def get_max_sub_group_size : Builtin<"get_max_sub_group_size", [uint_t]>;
+    def get_num_sub_groups : Builtin<"get_num_sub_groups", [uint_t]>;
+  }
+}
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index d2d471089e5e4..76b36a18269be 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -778,7 +778,9 @@ def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-r
 def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
   HelpText<"Set default calling convention">, Values<"cdecl,fastcall,stdcall,vectorcall,regcall">;
 def finclude_default_header : Flag<["-"], "finclude-default-header">,
-  HelpText<"Include the default header file for OpenCL">;
+  HelpText<"Include default header file for OpenCL">;
+def fdeclare_opencl_builtins : Flag<["-"], "fdeclare-opencl-builtins">,
+  HelpText<"Add OpenCL builtin function declarations (experimental)">;
 def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
   HelpText<"Preserve 3-component vector type">;
 def fwchar_type_EQ : Joined<["-"], "fwchar-type=">,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 96580804576d3..717278c0861d6 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2179,7 +2179,7 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
     Opts.NativeHalfArgsAndReturns = 1;
     Opts.OpenCLCPlusPlus = Opts.CPlusPlus;
     // Include default header file for OpenCL.
-    if (Opts.IncludeDefaultHeader) {
+    if (Opts.IncludeDefaultHeader && !Opts.DeclareOpenCLBuiltins) {
       PPOpts.Includes.push_back("opencl-c.h");
     }
   }
@@ -2385,6 +2385,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   }
 
   Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header);
+  Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins);
 
   llvm::Triple T(TargetOpts.Triple);
   CompilerInvocation::setLangDefaults(Opts, IK, T, PPOpts, LangStd);
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index f1d2a05240456..7643a06a82f79 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -46,6 +46,8 @@
 #include <utility>
 #include <vector>
 
+#include "clang/Basic/OpenCLBuiltins.inc"
+
 using namespace clang;
 using namespace sema;
 
@@ -670,6 +672,79 @@ LLVM_DUMP_METHOD void LookupResult::dump() {
     D->dump();
 }
 
+/// When trying to resolve a function name, if the isOpenCLBuiltin function
+/// defined in "OpenCLBuiltins.inc" returns a non-null <Index, Len>, then the
+/// identifier is referencing an OpenCL builtin function. Thus, all its
+/// prototypes are added to the LookUpResult.
+///
+/// \param S The Sema instance
+/// \param LR  The LookupResult instance
+/// \param II  The identifier being resolved
+/// \param Index  The list of prototypes starts at Index in OpenCLBuiltins[]
+/// \param Len  The list of prototypes has Len elements
+static void InsertOCLBuiltinDeclarations(Sema &S, LookupResult &LR,
+                                         IdentifierInfo *II, unsigned Index,
+                                         unsigned Len) {
+
+  for (unsigned i = 0; i < Len; ++i) {
+    OpenCLBuiltinDecl &Decl = OpenCLBuiltins[Index - 1 + i];
+    ASTContext &Context = S.Context;
+
+    // Ignore this BIF if the version is incorrect.
+    if (Context.getLangOpts().OpenCLVersion < Decl.Version)
+      continue;
+
+    FunctionProtoType::ExtProtoInfo PI;
+    PI.Variadic = false;
+
+    // Defined in "OpenCLBuiltins.inc"
+    QualType RT = OCL2Qual(Context, OpenCLSignature[Decl.ArgTableIndex]);
+
+    SmallVector<QualType, 5> ArgTypes;
+    for (unsigned I = 1; I < Decl.NumArgs; I++) {
+      QualType Ty = OCL2Qual(Context, OpenCLSignature[Decl.ArgTableIndex + I]);
+      ArgTypes.push_back(Ty);
+    }
+
+    QualType R = Context.getFunctionType(RT, ArgTypes, PI);
+    SourceLocation Loc = LR.getNameLoc();
+
+    // TODO: This part is taken from Sema::LazilyCreateBuiltin,
+    // maybe refactor it.
+    DeclContext *Parent = Context.getTranslationUnitDecl();
+    FunctionDecl *New = FunctionDecl::Create(Context, Parent, Loc, Loc, II, R,
+                                             /*TInfo=*/nullptr, SC_Extern,
+                                             false, R->isFunctionProtoType());
+    New->setImplicit();
+
+    // Create Decl objects for each parameter, adding them to the
+    // FunctionDecl.
+    if (const FunctionProtoType *FT = dyn_cast<FunctionProtoType>(R)) {
+      SmallVector<ParmVarDecl *, 16> Params;
+      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+        ParmVarDecl *Parm =
+            ParmVarDecl::Create(Context, New, SourceLocation(),
+                                SourceLocation(), nullptr, FT->getParamType(i),
+                                /*TInfo=*/nullptr, SC_None, nullptr);
+        Parm->setScopeInfo(0, i);
+        Params.push_back(Parm);
+      }
+      New->setParams(Params);
+    }
+
+    New->addAttr(OverloadableAttr::CreateImplicit(Context));
+
+    if (strlen(Decl.Extension))
+      S.setOpenCLExtensionForDecl(New, Decl.Extension);
+
+    LR.addDecl(New);
+  }
+
+  // If we added overloads, need to resolve the lookup result.
+  if (Len > 1)
+    LR.resolveKind();
+}
+
 /// Lookup a builtin function, when name lookup would otherwise
 /// fail.
 static bool LookupBuiltin(Sema &S, LookupResult &R) {
@@ -692,6 +767,15 @@ static bool LookupBuiltin(Sema &S, LookupResult &R) {
         }
       }
 
+      // Check if this is an OpenCL Builtin, and if so, insert its overloads.
+      if (S.getLangOpts().OpenCL && S.getLangOpts().DeclareOpenCLBuiltins) {
+        auto Index = isOpenCLBuiltin(II->getName());
+        if (Index.first) {
+          InsertOCLBuiltinDeclarations(S, R, II, Index.first, Index.second);
+          return true;
+        }
+      }
+
       // If this is a builtin on this (or all) targets, create the decl.
       if (unsigned BuiltinID = II->getBuiltinID()) {
         // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
new file mode 100644
index 0000000000000..a19664f3104b0
--- /dev/null
+++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -triple spir -verify -pedantic -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins
+
+// Test the -fdeclare-opencl-builtins option.
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef unsigned int uint;
+typedef __SIZE_TYPE__ size_t;
+
+kernel void basic_conversion(global float4 *buf, global int4 *res) {
+  res[0] = convert_int4(buf[0]);
+}
+
+kernel void basic_readonly_image_type(__read_only image2d_t img, int2 coord, global float4 *out) {
+  out[0] = read_imagef(img, coord);
+}
+
+kernel void basic_subgroup(global uint *out) {
+  out[0] = get_sub_group_size();
+// expected-error@-1{{use of declaration 'get_sub_group_size' requires cl_khr_subgroups extension to be enabled}}
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+  out[1] = get_sub_group_size();
+}
diff --git a/clang/utils/TableGen/CMakeLists.txt b/clang/utils/TableGen/CMakeLists.txt
index dba0c94ac0e42..3fc87d6552512 100644
--- a/clang/utils/TableGen/CMakeLists.txt
+++ b/clang/utils/TableGen/CMakeLists.txt
@@ -8,6 +8,7 @@ add_tablegen(clang-tblgen CLANG
   ClangCommentHTMLTagsEmitter.cpp
   ClangDataCollectorsEmitter.cpp
   ClangDiagnosticsEmitter.cpp
+  ClangOpenCLBuiltinEmitter.cpp
   ClangOptionDocEmitter.cpp
   ClangSACheckersEmitter.cpp
   NeonEmitter.cpp
diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
new file mode 100644
index 0000000000000..1e495039c494c
--- /dev/null
+++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
@@ -0,0 +1,318 @@
+//===- ClangOpenCLBuiltinEmitter.cpp - Generate Clang OpenCL Builtin handling
+//
+//                     The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits code for checking whether a function is an
+// OpenCL builtin function. If so, all overloads of this function are
+// added to the LookupResult. The generated include file is used by
+// SemaLookup.cpp
+//
+// For a successful lookup of e.g. the "cos" builtin, isOpenCLBuiltin("cos")
+// returns a pair <Index, Len>.
+// OpenCLBuiltins[Index] to OpenCLBuiltins[Index + Len] contains the pairs
+// <SigIndex, SigLen> of the overloads of "cos".
+// OpenCLSignature[SigIndex] to OpenCLSignature[SigIndex + SigLen] contains
+// one of the signatures of "cos". The OpenCLSignature entry can be
+// referenced by other functions, i.e. "sin", since multiple OpenCL builtins
+// share the same signature.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <set>
+
+using namespace llvm;
+
+namespace {
+class BuiltinNameEmitter {
+public:
+  BuiltinNameEmitter(RecordKeeper &Records, raw_ostream &OS)
+      : Records(Records), OS(OS) {}
+
+  // Entrypoint to generate the functions and structures for checking
+  // whether a function is an OpenCL builtin function.
+  void Emit();
+
+private:
+  // Contains OpenCL builtin functions and related information, stored as
+  // Record instances. They are coming from the associated TableGen file.
+  RecordKeeper &Records;
+
+  // The output file.
+  raw_ostream &OS;
+
+  // Emit the enums and structs.
+  void EmitDeclarations();
+
+  // Parse the Records generated by TableGen and populate OverloadInfo and
+  // SignatureSet.
+  void GetOverloads();
+
+  // Emit the OpenCLSignature table. This table contains all possible
+  // signatures, and is a struct OpenCLType. A signature is composed of a
+  // return type (mandatory), followed by zero or more argument types.
+  // E.g.:
+  // // 12
+  // { OCLT_uchar, 4, clang::LangAS::Default, false },
+  // { OCLT_float, 4, clang::LangAS::Default, false },
+  // This means that index 12 represents a signature
+  //   - returning a uchar vector of 4 elements, and
+  //   - taking as first argument a float vector of 4 elements.
+  void EmitSignatureTable();
+
+  // Emit the OpenCLBuiltins table. This table contains all overloads of
+  // each function, and is a struct OpenCLBuiltinDecl.
+  // E.g.:
+  // // acos
+  //   { 2, 0, "", 100 },
+  // This means that the signature of this acos overload is defined in OpenCL
+  // version 1.0 (100) and does not belong to any extension ("").  It has a
+  // 1 argument (+1 for the return type), stored at index 0 in the
+  // OpenCLSignature table.
+  void EmitBuiltinTable();
+
+  // Emit a StringMatcher function to check whether a function name is an
+  // OpenCL builtin function name.
+  void EmitStringMatcher();
+
+  // Emit a function returning the clang QualType instance associated with
+  // the TableGen Record Type.
+  void EmitQualTypeFinder();
+
+  // Contains a list of the available signatures, without the name of the
+  // function. Each pair consists of a signature and a cumulative index.
+  // E.g.:  <<float, float>, 0>,
+  //        <<float, int, int, 2>>,
+  //        <<float>, 5>,
+  //        ...
+  //        <<double, double>, 35>.
+  std::vector<std::pair<std::vector<Record *>, unsigned>> SignatureSet;
+
+  // Map the name of a builtin function to its prototypes (instances of the
+  // TableGen "Builtin" class).
+  // Each prototype is registered as a pair of:
+  //   <pointer to the "Builtin" instance,
+  //    cumulative index of the associated signature in the SignatureSet>
+  // E.g.:  The function cos: (float cos(float), double cos(double), ...)
+  //        <"cos", <<ptrToPrototype0, 5>,
+  //                <ptrToPrototype1, 35>>
+  //                <ptrToPrototype2, 79>>
+  // ptrToPrototype1 has the following signature: <double, double>
+  MapVector<StringRef, std::vector<std::pair<const Record *, unsigned>>>
+      OverloadInfo;
+};
+} // namespace
+
+void BuiltinNameEmitter::Emit() {
+  emitSourceFileHeader("OpenCL Builtin handling", OS);
+
+  OS << "#include \"llvm/ADT/StringRef.h\"\n";
+  OS << "using namespace clang;\n\n";
+
+  EmitDeclarations();
+
+  GetOverloads();
+
+  EmitSignatureTable();
+
+  EmitBuiltinTable();
+
+  EmitStringMatcher();
+
+  EmitQualTypeFinder();
+}
+
+void BuiltinNameEmitter::EmitDeclarations() {
+  OS << "enum OpenCLTypeID {\n";
+  std::vector<Record *> Types = Records.getAllDerivedDefinitions("Type");
+  StringMap<bool> TypesSeen;
+  for (const auto *T : Types) {
+    if (TypesSeen.find(T->getValueAsString("Name")) == TypesSeen.end())
+      OS << "  OCLT_" + T->getValueAsString("Name") << ",\n";
+    TypesSeen.insert(std::make_pair(T->getValueAsString("Name"), true));
+  }
+  OS << "};\n";
+
+  OS << R"(
+
+// Type used in a prototype of an OpenCL builtin function.
+struct OpenCLType {
+  // A type (e.g.: float, int, ...)
+  OpenCLTypeID ID;
+  // Size of vector (if applicable)
+  unsigned VectorWidth;
+  // Address space of the pointer (if applicable)
+  LangAS AS;
+  // Whether the type is a pointer
+  bool isPointer;
+};
+
+// One overload of an OpenCL builtin function.
+struct OpenCLBuiltinDecl {
+  // Number of arguments for the signature
+  unsigned NumArgs;
+  // Index in the OpenCLSignature table to get the required types
+  unsigned ArgTableIndex;
+  // Extension to which it belongs (e.g. cl_khr_subgroups)
+  const char *Extension;
+  // Version in which it was introduced (e.g. CL20)
+  unsigned Version;
+};
+
+)";
+}
+
+void BuiltinNameEmitter::GetOverloads() {
+  unsigned CumulativeSignIndex = 0;
+  std::vector<Record *> Builtins = Records.getAllDerivedDefinitions("Builtin");
+  for (const auto *B : Builtins) {
+    StringRef BName = B->getValueAsString("Name");
+    if (OverloadInfo.find(BName) == OverloadInfo.end()) {
+      OverloadInfo.insert(std::make_pair(
+          BName, std::vector<std::pair<const Record *, unsigned>>{}));
+    }
+
+    auto Signature = B->getValueAsListOfDefs("Signature");
+    auto it =
+        std::find_if(SignatureSet.begin(), SignatureSet.end(),
+                     [&](const std::pair<std::vector<Record *>, unsigned> &a) {
+                       return a.first == Signature;
+                     });
+    unsigned SignIndex;
+    if (it == SignatureSet.end()) {
+      SignatureSet.push_back(std::make_pair(Signature, CumulativeSignIndex));
+      SignIndex = CumulativeSignIndex;
+      CumulativeSignIndex += Signature.size();
+    } else {
+      SignIndex = it->second;
+    }
+    OverloadInfo[BName].push_back(std::make_pair(B, SignIndex));
+  }
+}
+
+void BuiltinNameEmitter::EmitSignatureTable() {
+  OS << "OpenCLType OpenCLSignature[] = {\n";
+  for (auto &P : SignatureSet) {
+    OS << "// " << P.second << "\n";
+    for (Record *R : P.first) {
+      OS << "{ OCLT_" << R->getValueAsString("Name") << ", "
+         << R->getValueAsInt("VecWidth") << ", "
+         << R->getValueAsString("AddrSpace") << ", "
+         << R->getValueAsBit("IsPointer") << "},";
+      OS << "\n";
+    }
+  }
+  OS << "};\n\n";
+}
+
+void BuiltinNameEmitter::EmitBuiltinTable() {
+  OS << "OpenCLBuiltinDecl OpenCLBuiltins[] = {\n";
+  for (auto &i : OverloadInfo) {
+    StringRef Name = i.first;
+    OS << "// " << Name << "\n";
+    for (auto &Overload : i.second) {
+      OS << "  { " << Overload.first->getValueAsListOfDefs("Signature").size()
+         << ", " << Overload.second << ", " << '"'
+         << Overload.first->getValueAsString("Extension") << "\", "
+         << Overload.first->getValueAsDef("Version")->getValueAsInt("Version")
+         << " },\n";
+    }
+  }
+  OS << "};\n\n";
+}
+
+void BuiltinNameEmitter::EmitStringMatcher() {
+  std::vector<StringMatcher::StringPair> ValidBuiltins;
+  unsigned CumulativeIndex = 1;
+  for (auto &i : OverloadInfo) {
+    auto &Ov = i.second;
+    std::string RetStmt;
+    raw_string_ostream SS(RetStmt);
+    SS << "return std::make_pair(" << CumulativeIndex << ", " << Ov.size()
+       << ");";
+    SS.flush();
+    CumulativeIndex += Ov.size();
+
+    ValidBuiltins.push_back(StringMatcher::StringPair(i.first, RetStmt));
+  }
+
+  OS << R"(
+// Return 0 if name is not a recognized OpenCL builtin, or an index
+// into a table of declarations if it is an OpenCL builtin.
+std::pair<unsigned, unsigned> isOpenCLBuiltin(llvm::StringRef name) {
+
+)";
+
+  StringMatcher("name", ValidBuiltins, OS).Emit(0, true);
+
+  OS << "  return std::make_pair(0, 0);\n";
+  OS << "}\n";
+}
+
+void BuiltinNameEmitter::EmitQualTypeFinder() {
+  OS << R"(
+
+static QualType OCL2Qual(ASTContext &Context, OpenCLType Ty) {
+  QualType RT = Context.VoidTy;
+  switch (Ty.ID) {
+)";
+
+  std::vector<Record *> Types = Records.getAllDerivedDefinitions("Type");
+  StringMap<bool> TypesSeen;
+
+  for (const auto *T : Types) {
+    // Check we have not seen this Type
+    if (TypesSeen.find(T->getValueAsString("Name")) != TypesSeen.end())
+      continue;
+    TypesSeen.insert(std::make_pair(T->getValueAsString("Name"), true));
+
+    // Check the Type does not have an "abstract" QualType
+    auto QT = T->getValueAsDef("QTName");
+    if (QT->getValueAsString("Name") == "null")
+      continue;
+
+    OS << "  case OCLT_" << T->getValueAsString("Name") << ":\n";
+    OS << "    RT = Context." << QT->getValueAsString("Name") << ";\n";
+    OS << "    break;\n";
+  }
+  OS << "  }\n";
+
+  // Special cases
+  OS << R"(
+  if (Ty.VectorWidth > 0)
+    RT = Context.getExtVectorType(RT, Ty.VectorWidth);
+
+  if (Ty.isPointer) {
+    RT = Context.getAddrSpaceQualType(RT, Ty.AS);
+    RT = Context.getPointerType(RT);
+  }
+
+  return RT;
+}
+)";
+}
+
+namespace clang {
+
+void EmitClangOpenCLBuiltins(RecordKeeper &Records, raw_ostream &OS) {
+  BuiltinNameEmitter NameChecker(Records, OS);
+  NameChecker.Emit();
+}
+
+} // end namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 351768fe96519..b9ec90fd5bccc 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -53,6 +53,7 @@ enum ActionType {
   GenClangCommentHTMLNamedCharacterReferences,
   GenClangCommentCommandInfo,
   GenClangCommentCommandList,
+  GenClangOpenCLBuiltins,
   GenArmNeon,
   GenArmFP16,
   GenArmNeonSema,
@@ -147,6 +148,8 @@ cl::opt<ActionType> Action(
         clEnumValN(GenClangCommentCommandList, "gen-clang-comment-command-list",
                    "Generate list of commands that are used in "
                    "documentation comments"),
+        clEnumValN(GenClangOpenCLBuiltins, "gen-clang-opencl-builtins",
+                   "Generate OpenCL builtin declaration handlers"),
         clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
         clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
         clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
@@ -266,6 +269,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenClangCommentCommandList:
     EmitClangCommentCommandList(Records, OS);
     break;
+  case GenClangOpenCLBuiltins:
+    EmitClangOpenCLBuiltins(Records, OS);
+    break;
   case GenArmNeon:
     EmitNeon(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 08edb68574278..02af66c5bf814 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -90,6 +90,9 @@ void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
+void EmitClangOpenCLBuiltins(llvm::RecordKeeper &Records,
+                             llvm::raw_ostream &OS);
+
 void EmitClangDataCollectors(llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
 

From 0aa374a3062eca4cfe61083ee0671b6db6201147 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 09:43:05 +0000
Subject: [PATCH 0890/1176] Include what you use in HexagonAsmBackend.cpp

HexagonAsmBackend.cpp was not using any APIs from Hexagon.h.  Doing so
is problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362372
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 57f4860bed60d..7c0770926abe6 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonFixupKinds.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"

From 2d0106a110410174ae1d1ba394cc58ab127a5949 Mon Sep 17 00:00:00 2001
From: Nikola Prica <nikola.prica@rt-rk.com>
Date: Mon, 3 Jun 2019 09:48:29 +0000
Subject: [PATCH 0891/1176] [LiveDebugValues] Close range for previous
 variable's location when adding newly deduced location

When LiveDebugValues deduces new variable's location from spill, restore or
register copy instruction it should close old variable's location. Otherwise
we can have multiple block output locations for same variable. That could lead
to inserting two DBG_VALUEs for same variable to the beginning of the successor
block which results to ignoring of first DBG_VALUE.

Reviewers: aprantl, jmorse, wolfgangp, dstenb

Reviewed By: aprantl

Subscribers: probinson, asowda, ivanbaev, petarj, djtodoro

Tags: #debug-info

Differential Revision: https://reviews.llvm.org/D62196

llvm-svn: 362373
---
 llvm/lib/CodeGen/LiveDebugValues.cpp      | 12 ++++++++-
 llvm/test/DebugInfo/X86/fission-ranges.ll | 33 ++++++++++++++++-------
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index 7f95e12186f35..8b8a340c179de 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -430,9 +430,15 @@ void LiveDebugValues::insertTransferDebugPair(
   MachineFunction *MF = MI.getParent()->getParent();
   MachineInstr *NewDebugInstr;
 
-  auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers,
+  auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
                         &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
     unsigned LocId = VarLocIDs.insert(VL);
+
+    // Close this variable's previous location range.
+    DebugVariable V(DebugInstr->getDebugVariable(),
+                    DebugInstr->getDebugLoc()->getInlinedAt());
+    OpenRanges.erase(V);
+
     OpenRanges.insert(LocId, VL.Var);
     // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
     // after MI. Keep track of the pairing.
@@ -714,6 +720,10 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
   });
   VarLocSet &VLS = OutLocs[CurMBB];
   Changed = VLS |= OpenRanges.getVarLocs();
+  // New OutLocs set may be different due to spill, restore or register
+  // copy instruction processing.
+  if (Changed)
+    VLS = OpenRanges.getVarLocs();
   OpenRanges.clear();
   return Changed;
 }
diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll
index e120fed23ce99..e43477361953a 100644
--- a/llvm/test/DebugInfo/X86/fission-ranges.ll
+++ b/llvm/test/DebugInfo/X86/fission-ranges.ll
@@ -5,6 +5,18 @@
 ; RUN: llc -dwarf-version=5 -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
 ; RUN: llvm-dwarfdump -v %t | FileCheck --check-prefix=V5RNGLISTS %s
 
+; RUN: llc -O0 %s -mtriple=x86_64-unknown-linux-gnu -stop-after=livedebugvalues -o -| FileCheck --check-prefix=CHECK-MIR %s
+
+; LiveDebugValues should produce DBG_VALUEs for variable "b" in successive
+; blocks once we recognize that it is spilled.
+; CHECK-MIR: ![[BDIVAR:[0-9]+]] = !DILocalVariable(name: "b"
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
+; CHECK-MIR-LABEL: bb.6.for.inc13:
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
+; CHECK-MIR-LABEL: bb.7.for.inc16:
+; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus)
+
+
 ; CHECK: .debug_info contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK-NEXT: DW_AT_stmt_list
@@ -31,22 +43,25 @@
 
 ; CHECK:      [[A]]:
 ; CHECK-NEXT:   Addr idx 2 (w/ length 169): DW_OP_consts +0, DW_OP_stack_value
-; CHECK-NEXT:   Addr idx 3 (w/ length 25): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 3 (w/ length 15): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 4 (w/ length 18): DW_OP_breg7 RSP-8
 ; CHECK:      [[E]]:
-; CHECK-NEXT:   Addr idx 4 (w/ length 19): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 5 (w/ length 23): DW_OP_reg0 RAX
 ; CHECK:      [[B]]:
-; CHECK-NEXT:   Addr idx 5 (w/ length 17): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 6 (w/ length 15): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 7 (w/ length 66): DW_OP_breg7 RSP-32
 ; CHECK:      [[D]]:
-; CHECK-NEXT:   Addr idx 6 (w/ length 17): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 8 (w/ length 15): DW_OP_reg0 RAX
+; CHECK-NEXT:   Addr idx 9 (w/ length 42): DW_OP_breg7 RSP-20
 
 ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
 ; HDR-NOT: .rela.{{.*}}.dwo
 
 ; Make sure we have enough stuff in the debug_addr to cover the address indexes
-; (6 is the last index in debug_loc.dwo, making 7 entries of 8 bytes each, 7 * 8
-; == 56 base 10 == 38 base 16)
+; (9 is the last index in debug_loc.dwo, making 10 entries of 8 bytes each,
+; 10 * 8 == 80 base 10 == 50 base 16)
 
-; HDR: .debug_addr 00000038
+; HDR: .debug_addr 00000050
 ; HDR-NOT: .rela.{{.*}}.dwo
 
 ; Check for the existence of a DWARF v5-style range list table in the .debug_rnglists
@@ -134,13 +149,13 @@ for.body9:                                        ; preds = %for.body9, %for.con
   tail call void @llvm.dbg.value(metadata i32* @c, metadata !19, metadata !DIExpression()), !dbg !40
   %and = and i32 %and2, 1, !dbg !32
   %inc = add i32 %e.01, 1, !dbg !39
-  tail call void @llvm.dbg.value(metadata i32 %inc, metadata !18, metadata !DIExpression()), !dbg !39
+  tail call void @llvm.dbg.value(metadata i32 %inc, metadata !18, metadata !DIExpression()), !dbg !42
   %exitcond = icmp eq i32 %inc, 30, !dbg !39
   br i1 %exitcond, label %for.inc10, label %for.body9, !dbg !39
 
 for.inc10:                                        ; preds = %for.body9
   %inc11 = add nsw i32 %b.03, 1, !dbg !38
-  tail call void @llvm.dbg.value(metadata i32 %inc11, metadata !15, metadata !DIExpression()), !dbg !38
+  tail call void @llvm.dbg.value(metadata i32 %inc11, metadata !15, metadata !DIExpression()), !dbg !42
   %exitcond11 = icmp eq i32 %inc11, 30, !dbg !38
   br i1 %exitcond11, label %for.inc13, label %for.cond7.preheader, !dbg !38
 

From 1115a199aaec146972419749db7b10cfe832801a Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 3 Jun 2019 09:52:32 +0000
Subject: [PATCH 0892/1176] [llvm-readobj/llvm-readelf] - Remove
 gnu-relocations.test completely.

rL362089 introduced a set of yaml based reloc-types-*.test test cases
(instead of huge reloc-types.test that used a lot of precompiled binaries)
These test cases checks LLVM-styled dumping of the relocations.

gnu-relocations.test was a test case to check GNU styled relocations dumping.
It did that only for elf-x86 and elf-x86_64 targets. It did not test all of the
relocations though.

Now, after rL362089, it does not make sence to keep it.
This patch updates reloc-types-elf-i386.test and reloc-types-elf-x64.test tests
with llvm-readelf calls to check GNU styled output in one place.
It removes gnu-relocations.test completely.

One of intentions of doing this is also to get rid of relocs.obj.elf-i386 and
relocs.obj.elf-x86_64 precompiled objects completely (they are used in other tests still).

Differential revision: https://reviews.llvm.org/D62655

llvm-svn: 362374
---
 .../tools/llvm-readobj/gnu-relocations.test   |  28 ---
 .../llvm-readobj/reloc-types-elf-i386.test    | 173 +++++++++++++-----
 .../llvm-readobj/reloc-types-elf-x64.test     | 169 ++++++++++++-----
 3 files changed, 259 insertions(+), 111 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-readobj/gnu-relocations.test

diff --git a/llvm/test/tools/llvm-readobj/gnu-relocations.test b/llvm/test/tools/llvm-readobj/gnu-relocations.test
deleted file mode 100644
index 1bba0ffad3f85..0000000000000
--- a/llvm/test/tools/llvm-readobj/gnu-relocations.test
+++ /dev/null
@@ -1,28 +0,0 @@
-RUN: llvm-readelf -r %p/Inputs/relocs.obj.elf-i386 \
-RUN:   | FileCheck %s -check-prefix ELF32
-RUN: llvm-readelf -r %p/Inputs/relocs.obj.elf-x86_64 \
-RUN:   | FileCheck %s -check-prefix ELF64
-
-ELF32:    Relocation section '.rel.text' at offset 0x318 contains 41 entries:
-ELF32-NEXT:     Offset     Info    Type                Sym. Value  Symbol's Name
-ELF32-NEXT:    00000002  00000500 R_386_NONE             00000000   sym
-ELF32-NEXT:    00000008  00000501 R_386_32               00000000   sym
-ELF32-NEXT:    0000000e  00000502 R_386_PC32             00000000   sym
-ELF32-NEXT:    00000014  00000503 R_386_GOT32            00000000   sym
-ELF32-NEXT:    0000001a  00000504 R_386_PLT32            00000000   sym
-ELF32-NEXT:    00000020  00000505 R_386_COPY             00000000   sym
-ELF32-NEXT:    00000026  00000506 R_386_GLOB_DAT         00000000   sym
-ELF32-NEXT:    0000002c  00000507 R_386_JUMP_SLOT        00000000   sym
-ELF32-NEXT:    00000032  00000508 R_386_RELATIVE         00000000   sym
-ELF32-NEXT:    00000038  00000509 R_386_GOTOFF           00000000   sym
-
-ELF64:    Relocation section '.rela.text' at offset 0x430 contains 38 entries:
-ELF64-NEXT:        Offset             Info             Type               Symbol's Value Symbol's Name + Addend
-ELF64-NEXT:    0000000000000003  0000000500000000 R_X86_64_NONE          0000000000000000 sym - 4
-ELF64-NEXT:    000000000000000a  0000000500000001 R_X86_64_64            0000000000000000 sym - 4
-ELF64-NEXT:    0000000000000011  0000000500000002 R_X86_64_PC32          0000000000000000 sym - 4
-ELF64-NEXT:    0000000000000018  0000000500000003 R_X86_64_GOT32         0000000000000000 sym - 4
-ELF64-NEXT:    000000000000001f  0000000500000004 R_X86_64_PLT32         0000000000000000 sym - 4
-ELF64-NEXT:    0000000000000026  0000000500000005 R_X86_64_COPY          0000000000000000 sym - 4
-ELF64-NEXT:    000000000000002d  0000000500000006 R_X86_64_GLOB_DAT      0000000000000000 sym - 4
-ELF64-NEXT:    0000000000000034  0000000500000007 R_X86_64_JUMP_SLOT     0000000000000000 sym - 4
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
index b968757614751..1f661e40318a8 100644
--- a/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-i386.test
@@ -2,49 +2,94 @@
 ## names and values for i386 target.
 
 # RUN: yaml2obj %s -o %t-i386.o
-# RUN: llvm-readobj -r --expand-relocs %t-i386.o | FileCheck %s
+# RUN: llvm-readobj -r --expand-relocs %t-i386.o | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readelf -r --expand-relocs %t-i386.o | FileCheck %s --check-prefix=GNU
 
-# CHECK: Type: R_386_NONE (0)
-# CHECK: Type: R_386_32 (1)
-# CHECK: Type: R_386_PC32 (2)
-# CHECK: Type: R_386_GOT32 (3)
-# CHECK: Type: R_386_PLT32 (4)
-# CHECK: Type: R_386_COPY (5)
-# CHECK: Type: R_386_GLOB_DAT (6)
-# CHECK: Type: R_386_JUMP_SLOT (7)
-# CHECK: Type: R_386_RELATIVE (8)
-# CHECK: Type: R_386_GOTOFF (9)
-# CHECK: Type: R_386_GOTPC (10)
-# CHECK: Type: R_386_32PLT (11)
-# CHECK: Type: R_386_TLS_TPOFF (14)
-# CHECK: Type: R_386_TLS_IE (15)
-# CHECK: Type: R_386_TLS_GOTIE (16)
-# CHECK: Type: R_386_TLS_LE (17)
-# CHECK: Type: R_386_TLS_GD (18)
-# CHECK: Type: R_386_TLS_LDM (19)
-# CHECK: Type: R_386_16 (20)
-# CHECK: Type: R_386_PC16 (21)
-# CHECK: Type: R_386_8 (22)
-# CHECK: Type: R_386_PC8 (23)
-# CHECK: Type: R_386_TLS_GD_32 (24)
-# CHECK: Type: R_386_TLS_GD_PUSH (25)
-# CHECK: Type: R_386_TLS_GD_CALL (26)
-# CHECK: Type: R_386_TLS_GD_POP (27)
-# CHECK: Type: R_386_TLS_LDM_32 (28)
-# CHECK: Type: R_386_TLS_LDM_PUSH (29)
-# CHECK: Type: R_386_TLS_LDM_CALL (30)
-# CHECK: Type: R_386_TLS_LDM_POP (31)
-# CHECK: Type: R_386_TLS_LDO_32 (32)
-# CHECK: Type: R_386_TLS_IE_32 (33)
-# CHECK: Type: R_386_TLS_LE_32 (34)
-# CHECK: Type: R_386_TLS_DTPMOD32 (35)
-# CHECK: Type: R_386_TLS_DTPOFF32 (36)
-# CHECK: Type: R_386_TLS_TPOFF32 (37)
-# CHECK: Type: R_386_TLS_GOTDESC (39)
-# CHECK: Type: R_386_TLS_DESC_CALL (40)
-# CHECK: Type: R_386_TLS_DESC (41)
-# CHECK: Type: R_386_IRELATIVE (42)
-# CHECK: Type: R_386_GOT32X (43)
+# LLVM: Type: R_386_NONE (0)
+# LLVM: Type: R_386_32 (1)
+# LLVM: Type: R_386_PC32 (2)
+# LLVM: Type: R_386_GOT32 (3)
+# LLVM: Type: R_386_PLT32 (4)
+# LLVM: Type: R_386_COPY (5)
+# LLVM: Type: R_386_GLOB_DAT (6)
+# LLVM: Type: R_386_JUMP_SLOT (7)
+# LLVM: Type: R_386_RELATIVE (8)
+# LLVM: Type: R_386_GOTOFF (9)
+# LLVM: Type: R_386_GOTPC (10)
+# LLVM: Type: R_386_32PLT (11)
+# LLVM: Type: R_386_TLS_TPOFF (14)
+# LLVM: Type: R_386_TLS_IE (15)
+# LLVM: Type: R_386_TLS_GOTIE (16)
+# LLVM: Type: R_386_TLS_LE (17)
+# LLVM: Type: R_386_TLS_GD (18)
+# LLVM: Type: R_386_TLS_LDM (19)
+# LLVM: Type: R_386_16 (20)
+# LLVM: Type: R_386_PC16 (21)
+# LLVM: Type: R_386_8 (22)
+# LLVM: Type: R_386_PC8 (23)
+# LLVM: Type: R_386_TLS_GD_32 (24)
+# LLVM: Type: R_386_TLS_GD_PUSH (25)
+# LLVM: Type: R_386_TLS_GD_CALL (26)
+# LLVM: Type: R_386_TLS_GD_POP (27)
+# LLVM: Type: R_386_TLS_LDM_32 (28)
+# LLVM: Type: R_386_TLS_LDM_PUSH (29)
+# LLVM: Type: R_386_TLS_LDM_CALL (30)
+# LLVM: Type: R_386_TLS_LDM_POP (31)
+# LLVM: Type: R_386_TLS_LDO_32 (32)
+# LLVM: Type: R_386_TLS_IE_32 (33)
+# LLVM: Type: R_386_TLS_LE_32 (34)
+# LLVM: Type: R_386_TLS_DTPMOD32 (35)
+# LLVM: Type: R_386_TLS_DTPOFF32 (36)
+# LLVM: Type: R_386_TLS_TPOFF32 (37)
+# LLVM: Type: R_386_TLS_GOTDESC (39)
+# LLVM: Type: R_386_TLS_DESC_CALL (40)
+# LLVM: Type: R_386_TLS_DESC (41)
+# LLVM: Type: R_386_IRELATIVE (42)
+# LLVM: Type: R_386_GOT32X (43)
+
+# GNU:      Relocation section '.rel.text' at offset 0x128 contains 41 entries:
+# GNU-NEXT:  Offset     Info    Type                Sym. Value  Symbol's Name
+# GNU-NEXT: 00000002  00000100 R_386_NONE             00000000   foo
+# GNU-NEXT: 00000008  00000101 R_386_32               00000000   foo
+# GNU-NEXT: 0000000e  00000102 R_386_PC32             00000000   foo
+# GNU-NEXT: 00000014  00000103 R_386_GOT32            00000000   foo
+# GNU-NEXT: 0000001a  00000104 R_386_PLT32            00000000   foo
+# GNU-NEXT: 00000020  00000105 R_386_COPY             00000000   foo
+# GNU-NEXT: 00000026  00000106 R_386_GLOB_DAT         00000000   foo
+# GNU-NEXT: 0000002c  00000107 R_386_JUMP_SLOT        00000000   foo
+# GNU-NEXT: 00000032  00000108 R_386_RELATIVE         00000000   foo
+# GNU-NEXT: 00000038  00000109 R_386_GOTOFF           00000000   foo
+# GNU-NEXT: 0000003e  0000010a R_386_GOTPC            00000000   foo
+# GNU-NEXT: 00000044  0000010b R_386_32PLT            00000000   foo
+# GNU-NEXT: 0000004a  0000010e R_386_TLS_TPOFF        00000000   foo
+# GNU-NEXT: 00000050  0000010f R_386_TLS_IE           00000000   foo
+# GNU-NEXT: 00000056  00000110 R_386_TLS_GOTIE        00000000   foo
+# GNU-NEXT: 0000005c  00000111 R_386_TLS_LE           00000000   foo
+# GNU-NEXT: 00000062  00000112 R_386_TLS_GD           00000000   foo
+# GNU-NEXT: 00000068  00000113 R_386_TLS_LDM          00000000   foo
+# GNU-NEXT: 0000006e  00000114 R_386_16               00000000   foo
+# GNU-NEXT: 00000074  00000115 R_386_PC16             00000000   foo
+# GNU-NEXT: 0000007a  00000116 R_386_8                00000000   foo
+# GNU-NEXT: 00000080  00000117 R_386_PC8              00000000   foo
+# GNU-NEXT: 00000086  00000118 R_386_TLS_GD_32        00000000   foo
+# GNU-NEXT: 0000008c  00000119 R_386_TLS_GD_PUSH      00000000   foo
+# GNU-NEXT: 00000092  0000011a R_386_TLS_GD_CALL      00000000   foo
+# GNU-NEXT: 00000098  0000011b R_386_TLS_GD_POP       00000000   foo
+# GNU-NEXT: 0000009e  0000011c R_386_TLS_LDM_32       00000000   foo
+# GNU-NEXT: 000000a4  0000011d R_386_TLS_LDM_PUSH     00000000   foo
+# GNU-NEXT: 000000aa  0000011e R_386_TLS_LDM_CALL     00000000   foo
+# GNU-NEXT: 000000b0  0000011f R_386_TLS_LDM_POP      00000000   foo
+# GNU-NEXT: 000000b6  00000120 R_386_TLS_LDO_32       00000000   foo
+# GNU-NEXT: 000000bc  00000121 R_386_TLS_IE_32        00000000   foo
+# GNU-NEXT: 000000c2  00000122 R_386_TLS_LE_32        00000000   foo
+# GNU-NEXT: 000000c8  00000123 R_386_TLS_DTPMOD32     00000000   foo
+# GNU-NEXT: 000000ce  00000124 R_386_TLS_DTPOFF32     00000000   foo
+# GNU-NEXT: 000000d4  00000125 R_386_TLS_TPOFF32      00000000   foo
+# GNU-NEXT: 000000da  00000127 R_386_TLS_GOTDESC      00000000   foo
+# GNU-NEXT: 000000e0  00000128 R_386_TLS_DESC_CALL    00000000   foo
+# GNU-NEXT: 000000e6  00000129 R_386_TLS_DESC         00000000   foo
+# GNU-NEXT: 000000ec  0000012a R_386_IRELATIVE        00000000   foo
+# GNU-NEXT: 000000f2  0000012b R_386_GOT32X           00000000   foo
 
 --- !ELF
 FileHeader:
@@ -66,83 +111,127 @@ Sections:
     Relocations:
       - Offset: 0x0000000000000002
         Type:   R_386_NONE
+        Symbol: foo
       - Offset: 0x0000000000000008
         Type:   R_386_32
+        Symbol: foo
       - Offset: 0x000000000000000E
         Type:   R_386_PC32
+        Symbol: foo
       - Offset: 0x0000000000000014
         Type:   R_386_GOT32
+        Symbol: foo
       - Offset: 0x000000000000001A
         Type:   R_386_PLT32
+        Symbol: foo
       - Offset: 0x0000000000000020
         Type:   R_386_COPY
+        Symbol: foo
       - Offset: 0x0000000000000026
         Type:   R_386_GLOB_DAT
+        Symbol: foo
       - Offset: 0x000000000000002C
         Type:   R_386_JUMP_SLOT
+        Symbol: foo
       - Offset: 0x0000000000000032
         Type:   R_386_RELATIVE
+        Symbol: foo
       - Offset: 0x0000000000000038
         Type:   R_386_GOTOFF
+        Symbol: foo
       - Offset: 0x000000000000003E
         Type:   R_386_GOTPC
+        Symbol: foo
       - Offset: 0x0000000000000044
         Type:   R_386_32PLT
+        Symbol: foo
       - Offset: 0x000000000000004A
         Type:   R_386_TLS_TPOFF
+        Symbol: foo
       - Offset: 0x0000000000000050
         Type:   R_386_TLS_IE
+        Symbol: foo
       - Offset: 0x0000000000000056
         Type:   R_386_TLS_GOTIE
+        Symbol: foo
       - Offset: 0x000000000000005C
         Type:   R_386_TLS_LE
+        Symbol: foo
       - Offset: 0x0000000000000062
         Type:   R_386_TLS_GD
+        Symbol: foo
       - Offset: 0x0000000000000068
         Type:   R_386_TLS_LDM
+        Symbol: foo
       - Offset: 0x000000000000006E
         Type:   R_386_16
+        Symbol: foo
       - Offset: 0x0000000000000074
         Type:   R_386_PC16
+        Symbol: foo
       - Offset: 0x000000000000007A
         Type:   R_386_8
+        Symbol: foo
       - Offset: 0x0000000000000080
         Type:   R_386_PC8
+        Symbol: foo
       - Offset: 0x0000000000000086
         Type:   R_386_TLS_GD_32
+        Symbol: foo
       - Offset: 0x000000000000008C
         Type:   R_386_TLS_GD_PUSH
+        Symbol: foo
       - Offset: 0x0000000000000092
         Type:   R_386_TLS_GD_CALL
+        Symbol: foo
       - Offset: 0x0000000000000098
         Type:   R_386_TLS_GD_POP
+        Symbol: foo
       - Offset: 0x000000000000009E
         Type:   R_386_TLS_LDM_32
+        Symbol: foo
       - Offset: 0x00000000000000A4
         Type:   R_386_TLS_LDM_PUSH
+        Symbol: foo
       - Offset: 0x00000000000000AA
         Type:   R_386_TLS_LDM_CALL
+        Symbol: foo
       - Offset: 0x00000000000000B0
         Type:   R_386_TLS_LDM_POP
+        Symbol: foo
       - Offset: 0x00000000000000B6
         Type:   R_386_TLS_LDO_32
+        Symbol: foo
       - Offset: 0x00000000000000BC
         Type:   R_386_TLS_IE_32
+        Symbol: foo
       - Offset: 0x00000000000000C2
         Type:   R_386_TLS_LE_32
+        Symbol: foo
       - Offset: 0x00000000000000C8
         Type:   R_386_TLS_DTPMOD32
+        Symbol: foo
       - Offset: 0x00000000000000CE
         Type:   R_386_TLS_DTPOFF32
+        Symbol: foo
       - Offset: 0x00000000000000D4
         Type:   R_386_TLS_TPOFF32
+        Symbol: foo
       - Offset: 0x00000000000000DA
         Type:   R_386_TLS_GOTDESC
+        Symbol: foo
       - Offset: 0x00000000000000E0
         Type:   R_386_TLS_DESC_CALL
+        Symbol: foo
       - Offset: 0x00000000000000E6
         Type:   R_386_TLS_DESC
+        Symbol: foo
       - Offset: 0x00000000000000EC
         Type:   R_386_IRELATIVE
+        Symbol: foo
       - Offset: 0x00000000000000F2
         Type:   R_386_GOT32X
+        Symbol: foo
+Symbols:
+  - Name:    foo
+    Binding: STB_GLOBAL
diff --git a/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test b/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
index 7b3fa1cf593d0..021fbb5d3d78c 100644
--- a/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
+++ b/llvm/test/tools/llvm-readobj/reloc-types-elf-x64.test
@@ -2,48 +2,92 @@
 ## names and values for x86_64 target.
 
 # RUN: yaml2obj %s -o %t-x86_64.o
-# RUN: llvm-readobj -r --expand-relocs %t-x86_64.o | FileCheck %s
+# RUN: llvm-readobj -r --expand-relocs %t-x86_64.o | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readelf -r --expand-relocs %t-x86_64.o | FileCheck %s --check-prefix=GNU
 
-# CHECK: Type: R_X86_64_NONE (0)
-# CHECK: Type: R_X86_64_64 (1)
-# CHECK: Type: R_X86_64_PC32 (2)
-# CHECK: Type: R_X86_64_GOT32 (3)
-# CHECK: Type: R_X86_64_PLT32 (4)
-# CHECK: Type: R_X86_64_COPY (5)
-# CHECK: Type: R_X86_64_GLOB_DAT (6)
-# CHECK: Type: R_X86_64_JUMP_SLOT (7)
-# CHECK: Type: R_X86_64_RELATIVE (8)
-# CHECK: Type: R_X86_64_GOTPCREL (9)
-# CHECK: Type: R_X86_64_32 (10)
-# CHECK: Type: R_X86_64_32S (11)
-# CHECK: Type: R_X86_64_16 (12)
-# CHECK: Type: R_X86_64_PC16 (13)
-# CHECK: Type: R_X86_64_8 (14)
-# CHECK: Type: R_X86_64_PC8 (15)
-# CHECK: Type: R_X86_64_DTPMOD64 (16)
-# CHECK: Type: R_X86_64_DTPOFF64 (17)
-# CHECK: Type: R_X86_64_TPOFF64 (18)
-# CHECK: Type: R_X86_64_TLSGD (19)
-# CHECK: Type: R_X86_64_TLSLD (20)
-# CHECK: Type: R_X86_64_DTPOFF32 (21)
-# CHECK: Type: R_X86_64_GOTTPOFF (22)
-# CHECK: Type: R_X86_64_TPOFF32 (23)
-# CHECK: Type: R_X86_64_PC64 (24)
-# CHECK: Type: R_X86_64_GOTOFF64 (25)
-# CHECK: Type: R_X86_64_GOTPC32 (26)
-# CHECK: Type: R_X86_64_GOT64 (27)
-# CHECK: Type: R_X86_64_GOTPCREL64 (28)
-# CHECK: Type: R_X86_64_GOTPC64 (29)
-# CHECK: Type: R_X86_64_GOTPLT64 (30)
-# CHECK: Type: R_X86_64_PLTOFF64 (31)
-# CHECK: Type: R_X86_64_SIZE32 (32)
-# CHECK: Type: R_X86_64_SIZE64 (33)
-# CHECK: Type: R_X86_64_GOTPC32_TLSDESC (34)
-# CHECK: Type: R_X86_64_TLSDESC_CALL (35)
-# CHECK: Type: R_X86_64_TLSDESC (36)
-# CHECK: Type: R_X86_64_IRELATIVE (37)
-# CHECK: Type: R_X86_64_GOTPCRELX (41)
-# CHECK: Type: R_X86_64_REX_GOTPCRELX (42)
+# LLVM: Type: R_X86_64_NONE (0)
+# LLVM: Type: R_X86_64_64 (1)
+# LLVM: Type: R_X86_64_PC32 (2)
+# LLVM: Type: R_X86_64_GOT32 (3)
+# LLVM: Type: R_X86_64_PLT32 (4)
+# LLVM: Type: R_X86_64_COPY (5)
+# LLVM: Type: R_X86_64_GLOB_DAT (6)
+# LLVM: Type: R_X86_64_JUMP_SLOT (7)
+# LLVM: Type: R_X86_64_RELATIVE (8)
+# LLVM: Type: R_X86_64_GOTPCREL (9)
+# LLVM: Type: R_X86_64_32 (10)
+# LLVM: Type: R_X86_64_32S (11)
+# LLVM: Type: R_X86_64_16 (12)
+# LLVM: Type: R_X86_64_PC16 (13)
+# LLVM: Type: R_X86_64_8 (14)
+# LLVM: Type: R_X86_64_PC8 (15)
+# LLVM: Type: R_X86_64_DTPMOD64 (16)
+# LLVM: Type: R_X86_64_DTPOFF64 (17)
+# LLVM: Type: R_X86_64_TPOFF64 (18)
+# LLVM: Type: R_X86_64_TLSGD (19)
+# LLVM: Type: R_X86_64_TLSLD (20)
+# LLVM: Type: R_X86_64_DTPOFF32 (21)
+# LLVM: Type: R_X86_64_GOTTPOFF (22)
+# LLVM: Type: R_X86_64_TPOFF32 (23)
+# LLVM: Type: R_X86_64_PC64 (24)
+# LLVM: Type: R_X86_64_GOTOFF64 (25)
+# LLVM: Type: R_X86_64_GOTPC32 (26)
+# LLVM: Type: R_X86_64_GOT64 (27)
+# LLVM: Type: R_X86_64_GOTPCREL64 (28)
+# LLVM: Type: R_X86_64_GOTPC64 (29)
+# LLVM: Type: R_X86_64_GOTPLT64 (30)
+# LLVM: Type: R_X86_64_PLTOFF64 (31)
+# LLVM: Type: R_X86_64_SIZE32 (32)
+# LLVM: Type: R_X86_64_SIZE64 (33)
+# LLVM: Type: R_X86_64_GOTPC32_TLSDESC (34)
+# LLVM: Type: R_X86_64_TLSDESC_CALL (35)
+# LLVM: Type: R_X86_64_TLSDESC (36)
+# LLVM: Type: R_X86_64_IRELATIVE (37)
+# LLVM: Type: R_X86_64_GOTPCRELX (41)
+# LLVM: Type: R_X86_64_REX_GOTPCRELX (42)
+
+# GNU:      Relocation section '.rela.text' at offset 0x1c8 contains 40 entries:
+# GNU-NEXT:     Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# GNU-NEXT: 0000000000000003  0000000100000000 R_X86_64_NONE          0000000000000000 foo + 0
+# GNU-NEXT: 000000000000000a  0000000100000001 R_X86_64_64            0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000011  0000000100000002 R_X86_64_PC32          0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000018  0000000100000003 R_X86_64_GOT32         0000000000000000 foo + 0
+# GNU-NEXT: 000000000000001f  0000000100000004 R_X86_64_PLT32         0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000026  0000000100000005 R_X86_64_COPY          0000000000000000 foo + 0
+# GNU-NEXT: 000000000000002d  0000000100000006 R_X86_64_GLOB_DAT      0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000034  0000000100000007 R_X86_64_JUMP_SLOT     0000000000000000 foo + 0
+# GNU-NEXT: 000000000000003b  0000000100000008 R_X86_64_RELATIVE      0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000042  0000000100000009 R_X86_64_GOTPCREL      0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000049  000000010000000a R_X86_64_32            0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000050  000000010000000b R_X86_64_32S           0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000057  000000010000000c R_X86_64_16            0000000000000000 foo + 0
+# GNU-NEXT: 000000000000005e  000000010000000d R_X86_64_PC16          0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000065  000000010000000e R_X86_64_8             0000000000000000 foo + 0
+# GNU-NEXT: 000000000000006c  000000010000000f R_X86_64_PC8           0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000073  0000000100000010 R_X86_64_DTPMOD64      0000000000000000 foo + 0
+# GNU-NEXT: 000000000000007a  0000000100000011 R_X86_64_DTPOFF64      0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000081  0000000100000012 R_X86_64_TPOFF64       0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000088  0000000100000013 R_X86_64_TLSGD         0000000000000000 foo + 0
+# GNU-NEXT: 000000000000008f  0000000100000014 R_X86_64_TLSLD         0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000096  0000000100000015 R_X86_64_DTPOFF32      0000000000000000 foo + 0
+# GNU-NEXT: 000000000000009d  0000000100000016 R_X86_64_GOTTPOFF      0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000a4  0000000100000017 R_X86_64_TPOFF32       0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000ab  0000000100000018 R_X86_64_PC64          0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000b2  0000000100000019 R_X86_64_GOTOFF64      0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000b9  000000010000001a R_X86_64_GOTPC32       0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000c0  000000010000001b R_X86_64_GOT64         0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000c7  000000010000001c R_X86_64_GOTPCREL64    0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000ce  000000010000001d R_X86_64_GOTPC64       0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000d5  000000010000001e R_X86_64_GOTPLT64      0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000dc  000000010000001f R_X86_64_PLTOFF64      0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000e3  0000000100000020 R_X86_64_SIZE32        0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000ea  0000000100000021 R_X86_64_SIZE64        0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000f1  0000000100000022 R_X86_64_GOTPC32_TLSDESC 0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000f8  0000000100000023 R_X86_64_TLSDESC_CALL  0000000000000000 foo + 0
+# GNU-NEXT: 00000000000000ff  0000000100000024 R_X86_64_TLSDESC       0000000000000000 foo + 0
+# GNU-NEXT: 0000000000000106  0000000100000025 R_X86_64_IRELATIVE     0000000000000000 foo + 0
+# GNU-NEXT: 000000000000010a  0000000100000029 R_X86_64_GOTPCRELX     0000000000000000 foo + 0
+# GNU-NEXT: 000000000000010e  000000010000002a R_X86_64_REX_GOTPCRELX 0000000000000000 foo + 0
 
 --- !ELF
 FileHeader:
@@ -65,81 +109,124 @@ Sections:
     Relocations:
       - Offset: 0x0000000000000003
         Type:   R_X86_64_NONE
+        Symbol: foo
       - Offset: 0x000000000000000A
         Type:   R_X86_64_64
+        Symbol: foo
       - Offset: 0x0000000000000011
         Type:   R_X86_64_PC32
+        Symbol: foo
       - Offset: 0x0000000000000018
         Type:   R_X86_64_GOT32
+        Symbol: foo
       - Offset: 0x000000000000001F
         Type:   R_X86_64_PLT32
+        Symbol: foo
       - Offset: 0x0000000000000026
         Type:   R_X86_64_COPY
+        Symbol: foo
       - Offset: 0x000000000000002D
         Type:   R_X86_64_GLOB_DAT
+        Symbol: foo
       - Offset: 0x0000000000000034
         Type:   R_X86_64_JUMP_SLOT
+        Symbol: foo
       - Offset: 0x000000000000003B
         Type:   R_X86_64_RELATIVE
+        Symbol: foo
       - Offset: 0x0000000000000042
         Type:   R_X86_64_GOTPCREL
+        Symbol: foo
       - Offset: 0x0000000000000049
         Type:   R_X86_64_32
+        Symbol: foo
       - Offset: 0x0000000000000050
         Type:   R_X86_64_32S
+        Symbol: foo
       - Offset: 0x0000000000000057
         Type:   R_X86_64_16
+        Symbol: foo
       - Offset: 0x000000000000005E
         Type:   R_X86_64_PC16
+        Symbol: foo
       - Offset: 0x0000000000000065
         Type:   R_X86_64_8
+        Symbol: foo
       - Offset: 0x000000000000006C
         Type:   R_X86_64_PC8
+        Symbol: foo
       - Offset: 0x0000000000000073
         Type:   R_X86_64_DTPMOD64
+        Symbol: foo
       - Offset: 0x000000000000007A
         Type:   R_X86_64_DTPOFF64
+        Symbol: foo
       - Offset: 0x0000000000000081
         Type:   R_X86_64_TPOFF64
+        Symbol: foo
       - Offset: 0x0000000000000088
         Type:   R_X86_64_TLSGD
+        Symbol: foo
       - Offset: 0x000000000000008F
         Type:   R_X86_64_TLSLD
+        Symbol: foo
       - Offset: 0x0000000000000096
         Type:   R_X86_64_DTPOFF32
+        Symbol: foo
       - Offset: 0x000000000000009D
         Type:   R_X86_64_GOTTPOFF
+        Symbol: foo
       - Offset: 0x00000000000000A4
         Type:   R_X86_64_TPOFF32
+        Symbol: foo
       - Offset: 0x00000000000000AB
         Type:   R_X86_64_PC64
+        Symbol: foo
       - Offset: 0x00000000000000B2
         Type:   R_X86_64_GOTOFF64
+        Symbol: foo
       - Offset: 0x00000000000000B9
         Type:   R_X86_64_GOTPC32
+        Symbol: foo
       - Offset: 0x00000000000000C0
         Type:   R_X86_64_GOT64
+        Symbol: foo
       - Offset: 0x00000000000000C7
         Type:   R_X86_64_GOTPCREL64
+        Symbol: foo
       - Offset: 0x00000000000000CE
         Type:   R_X86_64_GOTPC64
+        Symbol: foo
       - Offset: 0x00000000000000D5
         Type:   R_X86_64_GOTPLT64
+        Symbol: foo
       - Offset: 0x00000000000000DC
         Type:   R_X86_64_PLTOFF64
+        Symbol: foo
       - Offset: 0x00000000000000E3
         Type:   R_X86_64_SIZE32
+        Symbol: foo
       - Offset: 0x00000000000000EA
         Type:   R_X86_64_SIZE64
+        Symbol: foo
       - Offset: 0x00000000000000F1
         Type:   R_X86_64_GOTPC32_TLSDESC
+        Symbol: foo
       - Offset: 0x00000000000000F8
         Type:   R_X86_64_TLSDESC_CALL
+        Symbol: foo
       - Offset: 0x00000000000000FF
         Type:   R_X86_64_TLSDESC
+        Symbol: foo
       - Offset: 0x0000000000000106
         Type:   R_X86_64_IRELATIVE
+        Symbol: foo
       - Offset: 0x000000000000010A
         Type:   R_X86_64_GOTPCRELX
+        Symbol: foo
       - Offset: 0x000000000000010E
         Type:   R_X86_64_REX_GOTPCRELX
+        Symbol: foo
+Symbols:
+  - Name:    foo
+    Binding: STB_GLOBAL

From c716e5d6dea2dec44e2f54da5a7ec41b1237011b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 3 Jun 2019 09:56:09 +0000
Subject: [PATCH 0893/1176] Revert rL362358 : PR42104: Support instantiations
 of lambdas that implicitly capture packs.

Two changes:
 * Track odr-use via FunctionParmPackExprs to properly handle dependent
   odr-uses of packs in generic lambdas.
 * Do not instantiate implicit captures; instead, regenerate them by
   instantiating the body of the lambda. This is necessary to
   distinguish between cases where only one element of a pack is
   captured and cases where the entire pack is captured.
........
Fixes http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win buildbot failures

llvm-svn: 362375
---
 clang/include/clang/Sema/ScopeInfo.h          | 19 +++---
 clang/include/clang/Sema/Sema.h               |  1 -
 clang/lib/Sema/ScopeInfo.cpp                  | 28 ++++-----
 clang/lib/Sema/SemaExpr.cpp                   | 50 ++++++---------
 clang/lib/Sema/SemaExprCXX.cpp                | 11 +++-
 clang/lib/Sema/SemaTemplateInstantiate.cpp    | 14 ++---
 clang/lib/Sema/TreeTransform.h                | 63 ++++---------------
 .../cxx1y-generic-lambdas-capturing.cpp       | 27 +-------
 .../test/SemaTemplate/lambda-capture-pack.cpp | 17 -----
 9 files changed, 69 insertions(+), 161 deletions(-)
 delete mode 100644 clang/test/SemaTemplate/lambda-capture-pack.cpp

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index ea2595113d589..177c88d7e8475 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -15,7 +15,6 @@
 #define LLVM_CLANG_SEMA_SCOPEINFO_H
 
 #include "clang/AST/Expr.h"
-#include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CapturedStmt.h"
 #include "clang/Basic/LLVM.h"
@@ -914,8 +913,7 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///   };
   /// }
   void addPotentialCapture(Expr *VarExpr) {
-    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr) ||
-           isa<FunctionParmPackExpr>(VarExpr));
+    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr));
     PotentiallyCapturingExprs.push_back(VarExpr);
   }
 
@@ -967,15 +965,13 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///  building such a node. So we need a rule that anyone can implement and get
   ///  exactly the same result".
   void markVariableExprAsNonODRUsed(Expr *CapturingVarExpr) {
-    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
-           isa<MemberExpr>(CapturingVarExpr) ||
-           isa<FunctionParmPackExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr)
+        || isa<MemberExpr>(CapturingVarExpr));
     NonODRUsedCapturingExprs.insert(CapturingVarExpr);
   }
   bool isVariableExprMarkedAsNonODRUsed(Expr *CapturingVarExpr) const {
-    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
-           isa<MemberExpr>(CapturingVarExpr) ||
-           isa<FunctionParmPackExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr)
+      || isa<MemberExpr>(CapturingVarExpr));
     return NonODRUsedCapturingExprs.count(CapturingVarExpr);
   }
   void removePotentialCapture(Expr *E) {
@@ -997,8 +993,9 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
                                   PotentialThisCaptureLocation.isValid();
   }
 
-  void visitPotentialCaptures(
-      llvm::function_ref<void(VarDecl *, Expr *)> Callback) const;
+  // When passed the index, returns the VarDecl and Expr associated
+  // with the index.
+  void getPotentialVariableCapture(unsigned Idx, VarDecl *&VD, Expr *&E) const;
 };
 
 FunctionScopeInfo::WeakObjectProfileTy::WeakObjectProfileTy()
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 096bebf162170..b4f721c091977 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4179,7 +4179,6 @@ class Sema {
   void MarkVariableReferenced(SourceLocation Loc, VarDecl *Var);
   void MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base = nullptr);
   void MarkMemberReferenced(MemberExpr *E);
-  void MarkFunctionParmPackReferenced(FunctionParmPackExpr *E);
   void MarkCaptureUsedInEnclosingContext(VarDecl *Capture, SourceLocation Loc,
                                          unsigned CapturingScopeIndex);
 
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index b2a26af9b4a57..e84e592a4827e 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -229,20 +229,20 @@ bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
   return false;
 }
 
-void LambdaScopeInfo::visitPotentialCaptures(
-    llvm::function_ref<void(VarDecl *, Expr *)> Callback) const {
-  for (Expr *E : PotentiallyCapturingExprs) {
-    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
-      Callback(cast<VarDecl>(DRE->getFoundDecl()), E);
-    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
-      Callback(cast<VarDecl>(ME->getMemberDecl()), E);
-    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
-      for (VarDecl *VD : *FP)
-        Callback(VD, E);
-    } else {
-      llvm_unreachable("unexpected expression in potential captures list");
-    }
-  }
+void LambdaScopeInfo::getPotentialVariableCapture(unsigned Idx, VarDecl *&VD,
+                                                  Expr *&E) const {
+  assert(Idx < getNumPotentialVariableCaptures() &&
+         "Index of potential capture must be within 0 to less than the "
+         "number of captures!");
+  E = PotentiallyCapturingExprs[Idx];
+  if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
+    VD = dyn_cast<VarDecl>(DRE->getFoundDecl());
+  else if (MemberExpr *ME = dyn_cast<MemberExpr>(E))
+    VD = dyn_cast<VarDecl>(ME->getMemberDecl());
+  else
+    llvm_unreachable("Only DeclRefExprs or MemberExprs should be added for "
+    "potential captures");
+  assert(VD);
 }
 
 FunctionScopeInfo::~FunctionScopeInfo() { }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index d0b2760284670..72b61b8e5842f 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14610,9 +14610,7 @@ namespace {
     // context so never needs to be transformed.
     // FIXME: Ideally we wouldn't transform the closure type either, and would
     // just recreate the capture expressions and lambda expression.
-    StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body) {
-      return SkipLambdaBody(E, Body);
-    }
+    StmtResult TransformLambdaBody(Stmt *Body) { return Body; }
   };
 }
 
@@ -15056,7 +15054,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
 ///    *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
 static void
 MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
-                   const unsigned *const FunctionScopeIndexToStopAt = nullptr) {
+                   const unsigned *const FunctionScopeIndexToStopAt) {
   // Keep track of used but undefined variables.
   // FIXME: We shouldn't suppress this warning for static data members.
   if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
@@ -15737,19 +15735,14 @@ void Sema::UpdateMarkingForLValueToRValue(Expr *E) {
   // variable.
   if (LambdaScopeInfo *LSI = getCurLambda()) {
     Expr *SansParensExpr = E->IgnoreParens();
-    VarDecl *Var;
-    ArrayRef<VarDecl *> Vars = None;
+    VarDecl *Var = nullptr;
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(SansParensExpr))
-      Vars = Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
+      Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
     else if (MemberExpr *ME = dyn_cast<MemberExpr>(SansParensExpr))
-      Vars = Var = dyn_cast<VarDecl>(ME->getMemberDecl());
-    else if (auto *FPPE = dyn_cast<FunctionParmPackExpr>(SansParensExpr))
-      Vars = llvm::makeArrayRef(FPPE->begin(), FPPE->end());
+      Var = dyn_cast<VarDecl>(ME->getMemberDecl());
 
-    for (VarDecl *VD : Vars) {
-      if (Var && IsVariableNonDependentAndAConstantExpression(VD, Context))
-        LSI->markVariableExprAsNonODRUsed(SansParensExpr);
-    }
+    if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context))
+      LSI->markVariableExprAsNonODRUsed(SansParensExpr);
   }
 }
 
@@ -15774,18 +15767,20 @@ void Sema::CleanupVarDeclMarking() {
   std::swap(LocalMaybeODRUseExprs, MaybeODRUseExprs);
 
   for (Expr *E : LocalMaybeODRUseExprs) {
-    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
-      MarkVarDeclODRUsed(cast<VarDecl>(DRE->getDecl()),
-                         DRE->getLocation(), *this);
-    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
-      MarkVarDeclODRUsed(cast<VarDecl>(ME->getMemberDecl()), ME->getMemberLoc(),
-                         *this);
-    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
-      for (VarDecl *VD : *FP)
-        MarkVarDeclODRUsed(VD, FP->getParameterPackLocation(), *this);
+    VarDecl *Var;
+    SourceLocation Loc;
+    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+      Var = cast<VarDecl>(DRE->getDecl());
+      Loc = DRE->getLocation();
+    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
+      Var = cast<VarDecl>(ME->getMemberDecl());
+      Loc = ME->getMemberLoc();
     } else {
       llvm_unreachable("Unexpected expression");
     }
+
+    MarkVarDeclODRUsed(Var, Loc, *this,
+                       /*MaxFunctionScopeIndex Pointer*/ nullptr);
   }
 
   assert(MaybeODRUseExprs.empty() &&
@@ -15794,8 +15789,7 @@ void Sema::CleanupVarDeclMarking() {
 
 static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
                                     VarDecl *Var, Expr *E) {
-  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E) ||
-          isa<FunctionParmPackExpr>(E)) &&
+  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E)) &&
          "Invalid Expr argument to DoMarkVarDeclReferenced");
   Var->setReferenced();
 
@@ -16028,12 +16022,6 @@ void Sema::MarkMemberReferenced(MemberExpr *E) {
   MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse);
 }
 
-/// Perform reference-marking and odr-use handling for a FunctionParmPackExpr.
-void Sema::MarkFunctionParmPackReferenced(FunctionParmPackExpr *E) {
-  for (VarDecl *VD : *E)
-    MarkExprReferenced(*this, E->getParameterPackLocation(), VD, E, true);
-}
-
 /// Perform marking for a reference to an arbitrary declaration.  It
 /// marks the declaration referenced, and performs odr-use checking for
 /// functions and variables. This method should not be used when building a
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 5884cf906fd15..ac050fa1ef55c 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7427,7 +7427,12 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
   // All the potentially captureable variables in the current nested
   // lambda (within a generic outer lambda), must be captured by an
   // outer lambda that is enclosed within a non-dependent context.
-  CurrentLSI->visitPotentialCaptures([&] (VarDecl *Var, Expr *VarExpr) {
+  const unsigned NumPotentialCaptures =
+      CurrentLSI->getNumPotentialVariableCaptures();
+  for (unsigned I = 0; I != NumPotentialCaptures; ++I) {
+    Expr *VarExpr = nullptr;
+    VarDecl *Var = nullptr;
+    CurrentLSI->getPotentialVariableCapture(I, Var, VarExpr);
     // If the variable is clearly identified as non-odr-used and the full
     // expression is not instantiation dependent, only then do we not
     // need to check enclosing lambda's for speculative captures.
@@ -7441,7 +7446,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
     // }
     if (CurrentLSI->isVariableExprMarkedAsNonODRUsed(VarExpr) &&
         !IsFullExprInstantiationDependent)
-      return;
+      continue;
 
     // If we have a capture-capable lambda for the variable, go ahead and
     // capture the variable in that lambda (and all its enclosing lambdas).
@@ -7473,7 +7478,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
                           DeclRefType, nullptr);
       }
     }
-  });
+  }
 
   // Check if 'this' needs to be captured.
   if (CurrentLSI->hasPotentialThisCapture()) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 973f564d30583..ba54d5010bab4 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1368,11 +1368,9 @@ TemplateInstantiator::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
     Vars.push_back(D);
   }
 
-  auto *PackExpr =
-      FunctionParmPackExpr::Create(getSema().Context, T, E->getParameterPack(),
-                                   E->getParameterPackLocation(), Vars);
-  getSema().MarkFunctionParmPackReferenced(PackExpr);
-  return PackExpr;
+  return FunctionParmPackExpr::Create(getSema().Context, T,
+                                      E->getParameterPack(),
+                                      E->getParameterPackLocation(), Vars);
 }
 
 ExprResult
@@ -1391,10 +1389,8 @@ TemplateInstantiator::TransformFunctionParmPackRefExpr(DeclRefExpr *E,
       QualType T = TransformType(E->getType());
       if (T.isNull())
         return ExprError();
-      auto *PackExpr = FunctionParmPackExpr::Create(getSema().Context, T, PD,
-                                                    E->getExprLoc(), *Pack);
-      getSema().MarkFunctionParmPackReferenced(PackExpr);
-      return PackExpr;
+      return FunctionParmPackExpr::Create(getSema().Context, T, PD,
+                                          E->getExprLoc(), *Pack);
     }
 
     TransformedDecl = (*Pack)[getSema().ArgumentPackSubstitutionIndex];
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index a1a9aaedee443..592787a5870ce 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -660,10 +660,7 @@ class TreeTransform {
                                           bool ExpectParameterPack);
 
   /// Transform the body of a lambda-expression.
-  StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body);
-  /// Alternative implementation of TransformLambdaBody that skips transforming
-  /// the body.
-  StmtResult SkipLambdaBody(LambdaExpr *E, Stmt *Body);
+  StmtResult TransformLambdaBody(Stmt *Body);
 
   QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);
 
@@ -11361,13 +11358,16 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   bool Invalid = false;
 
   // Transform captures.
+  bool FinishedExplicitCaptures = false;
   for (LambdaExpr::capture_iterator C = E->capture_begin(),
                                  CEnd = E->capture_end();
        C != CEnd; ++C) {
     // When we hit the first implicit capture, tell Sema that we've finished
     // the list of explicit captures.
-    if (C->isImplicit())
-      break;
+    if (!FinishedExplicitCaptures && C->isImplicit()) {
+      getSema().finishLambdaExplicitCaptures(LSI);
+      FinishedExplicitCaptures = true;
+    }
 
     // Capturing 'this' is trivial.
     if (C->capturesThis()) {
@@ -11476,16 +11476,17 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
     getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
                                  EllipsisLoc);
   }
-  getSema().finishLambdaExplicitCaptures(LSI);
+  if (!FinishedExplicitCaptures)
+    getSema().finishLambdaExplicitCaptures(LSI);
 
-  // FIXME: Sema's lambda-building mechanism expects us to push an expression
-  // evaluation context even if we're not transforming the function body.
+  // Enter a new evaluation context to insulate the lambda from any
+  // cleanups from the enclosing full-expression.
   getSema().PushExpressionEvaluationContext(
       Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
   // Instantiate the body of the lambda expression.
   StmtResult Body =
-      Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());
+      Invalid ? StmtError() : getDerived().TransformLambdaBody(E->getBody());
 
   // ActOnLambda* will pop the function scope for us.
   FuncScopeCleanup.disable();
@@ -11511,50 +11512,10 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
 template<typename Derived>
 StmtResult
-TreeTransform<Derived>::TransformLambdaBody(LambdaExpr *E, Stmt *S) {
+TreeTransform<Derived>::TransformLambdaBody(Stmt *S) {
   return TransformStmt(S);
 }
 
-template<typename Derived>
-StmtResult
-TreeTransform<Derived>::SkipLambdaBody(LambdaExpr *E, Stmt *S) {
-  // Transform captures.
-  for (LambdaExpr::capture_iterator C = E->capture_begin(),
-                                 CEnd = E->capture_end();
-       C != CEnd; ++C) {
-    // When we hit the first implicit capture, tell Sema that we've finished
-    // the list of explicit captures.
-    if (!C->isImplicit())
-      continue;
-
-    // Capturing 'this' is trivial.
-    if (C->capturesThis()) {
-      getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
-                                    /*BuildAndDiagnose*/ true, nullptr,
-                                    C->getCaptureKind() == LCK_StarThis);
-      continue;
-    }
-    // Captured expression will be recaptured during captured variables
-    // rebuilding.
-    if (C->capturesVLAType())
-      continue;
-
-    assert(C->capturesVariable() && "unexpected kind of lambda capture");
-    assert(!E->isInitCapture(C) && "implicit init-capture?");
-
-    // Transform the captured variable.
-    VarDecl *CapturedVar = cast_or_null<VarDecl>(
-        getDerived().TransformDecl(C->getLocation(), C->getCapturedVar()));
-    if (!CapturedVar || CapturedVar->isInvalidDecl())
-      return StmtError();
-
-    // Capture the transformed variable.
-    getSema().tryCaptureVariable(CapturedVar, C->getLocation());
-  }
-
-  return S;
-}
-
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(
diff --git a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
index a98366c8794a1..eaed45acd11be 100644
--- a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
+++ b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -emit-llvm-only %s
-// RUN: %clang_cc1 -std=c++2a -verify -fsyntax-only -fblocks -emit-llvm-only %s
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing %s -DDELAYED_TEMPLATE_PARSING
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fms-extensions %s -DMS_EXTENSIONS
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing -fms-extensions %s -DMS_EXTENSIONS -DDELAYED_TEMPLATE_PARSING
@@ -177,13 +176,7 @@ void doit() {
     sample::X cx{5};
     auto L = [=](auto a) { 
       const int z = 3;
-      // FIXME: The warning below is correct but for some reason doesn't show
-      // up in C++17 mode.
       return [&,a](auto b) {
-#if __cplusplus > 201702L
-        // expected-warning@-2 {{address of stack memory associated with local variable 'z' returned}}
-        // expected-note@#call {{in instantiation of}}
-#endif
         const int y = 5;    
         return [=](auto c) { 
           int d[sizeof(a) == sizeof(c) || sizeof(c) == sizeof(b) ? 2 : 1];
@@ -196,7 +189,7 @@ void doit() {
         }; 
       };
     };
-    auto M = L(3)(3.5); // #call
+    auto M = L(3)(3.5);
     M(3.14);
   }
 }
@@ -1526,20 +1519,6 @@ void test() {
 
 } // end ns5
 
-} // end PR34266
 
-namespace capture_pack {
-#if __cplusplus >= 201702L
-  constexpr
-#endif
-  auto v =
-    [](auto ...a) {
-      [&](auto ...b) {
-        ((a = b), ...); // expected-warning 0-1{{extension}}
-      }(100, 20, 3);
-      return (a + ...); // expected-warning 0-1{{extension}}
-    }(400, 50, 6);
-#if __cplusplus >= 201702L
-  static_assert(v == 123);
-#endif
-}
+
+} // end PR34266
diff --git a/clang/test/SemaTemplate/lambda-capture-pack.cpp b/clang/test/SemaTemplate/lambda-capture-pack.cpp
deleted file mode 100644
index 2fe576769dbdf..0000000000000
--- a/clang/test/SemaTemplate/lambda-capture-pack.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: %clang_cc1 -std=c++2a -verify %s
-// expected-no-diagnostics
-
-template<typename ...T, typename ...Lambda> void check_sizes(Lambda ...L) {
-  static_assert(((sizeof(T) == sizeof(Lambda)) && ...));
-}
-
-template<typename ...T> void f(T ...v) {
-  // Pack expansion of lambdas: each lambda captures only one pack element.
-  check_sizes<T...>([=] { (void)&v; } ...);
-
-  // Pack expansion inside lambda: captures all pack elements.
-  auto l = [=] { ((void)&v, ...); };
-  static_assert(sizeof(l) >= (sizeof(T) + ...));
-}
-
-template void f(int, char, double);

From 7ebfbebfe127b3a74873ad24dd9162aa4fdb238f Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 09:56:40 +0000
Subject: [PATCH 0894/1176] Include what you use in HexagonELFObjectWriter.cpp

HexagonELFObjectWriter.cpp was not using any APIs from Hexagon.h, and
was only including it for transitive dependencies.  Doing so is
problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362376
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index a0e0489e90169..f678bf49322e5 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonFixupKinds.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"

From ab93e6e0feaee5c3b1eb27d88df38cb05f4e7e9e Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 3 Jun 2019 09:58:41 +0000
Subject: [PATCH 0895/1176] [llvm-readobj] - Convert gnu-sections.test to use
 YAML.

gnu-sections.test currently use relocs.obj.elf-x86_64 and
relocs.obj.elf-i386 precompiled objects as an inputs.

These inputs actually initially were introduced to test the
dump of relocations and have almost nothing common with dumping
sections.

Patch converts the test to use yaml2obj. That allows to remove
relocs.obj.elf-i386 binary.
(relocs.obj.elf-x86_64 is still used by another test and can't be removed atm).

Differential revision: https://reviews.llvm.org/D62659

llvm-svn: 362377
---
 .../llvm-readobj/Inputs/relocs.obj.elf-i386   | Bin 1120 -> 0 bytes
 .../test/tools/llvm-readobj/gnu-sections.test | 154 +++++++++++++-----
 2 files changed, 113 insertions(+), 41 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386

diff --git a/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386 b/llvm/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386
deleted file mode 100644
index 7860df6de7630922930e34c75c457b5960e12123..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1120
zcmd_oKP&@L9Ki8++G<NeQUB`ye@Yq=gGo}+5@}KiF-cAvO**K--Jli{iG|5z5R=Jd
zGMP*!lgVT<nG7a_@2`0;P9kCVl6Rlq@7?cyFZb^DQ<=17Sw5wQrF_aV39jp8y0SK^
zLMot+4<-3;S6(@>Q`kzlg<V%A@{XetPSJ69++3c>ST0ra&77O7oY-G30gB|9t97xQ
z^}CJ*zmBCQtYe{#ueQjdJSX?0F2s!M+<eptx#=0EUoC4=|65(eQzLDp-=qY3nD+hT
zRb~;-Nj5OZls#oiuNc$pNvJ-k>ySVX)6e8T`TZ}Ie%_9f`}tG9hV1L@E4gyYC1o#U
zvUACdolU1#QmgiAaxRmy^?T4j&d0(!timYzP~O3hy%@k@ti}lp;uMB(2E#auH8_v8
zxP*1MjP<yN5nRUx%wr=Ku?e>^iY_+e9=6~Cw&D@C;R&|mDaP;&JMaQK@e;f63cK+d
nd+-K(@fQ2=4*T&Q2k-$0@ezmc2}kf5NAU&6@D=0uhU54NobO^C

diff --git a/llvm/test/tools/llvm-readobj/gnu-sections.test b/llvm/test/tools/llvm-readobj/gnu-sections.test
index faf00d9d5acfe..17a0c4cb8eb26 100644
--- a/llvm/test/tools/llvm-readobj/gnu-sections.test
+++ b/llvm/test/tools/llvm-readobj/gnu-sections.test
@@ -1,43 +1,115 @@
-RUN: llvm-readelf -S %p/Inputs/relocs.obj.elf-i386 \
-RUN:   | FileCheck %s -check-prefix ELF32
-RUN: llvm-readelf -S %p/Inputs/relocs.obj.elf-x86_64 \
-RUN:   | FileCheck %s -check-prefix ELF64
-RUN: llvm-readobj --wide --sections \
-RUN:   %p/Inputs/relocs.obj.elf-x86_64 --elf-output-style=GNU \
-RUN:   | FileCheck %s -check-prefix ELF64
-RUN: llvm-readobj -W --sections \
-RUN:   %p/Inputs/relocs.obj.elf-x86_64 --elf-output-style=GNU \
-RUN:   | FileCheck %s -check-prefix ELF64
-RUN: llvm-readelf -W -S %p/Inputs/relocs.obj.elf-x86_64 \
-RUN:   | FileCheck %s -check-prefix ELF64
+# RUN: yaml2obj --docnum=1 %s -o %t-i386.o
+# RUN: llvm-readelf -S %t-i386.o | FileCheck %s -check-prefix ELF32
 
-ELF32:    Section Headers:
-ELF32-NEXT:      [Nr] Name              Type            Address  Off    Size   ES Flg Lk Inf Al
-ELF32-NEXT:      [ 0]                   NULL            00000000 000000 000000 00      0   0  0
-ELF32-NEXT:      [ 1] .text             PROGBITS        00000000 000034 0000f6 00  AX  0   0  4
-ELF32-NEXT:      [ 2] .rel.text         REL             00000000 000318 000148 08      6   1  4
-ELF32-NEXT:      [ 3] .data             PROGBITS        00000000 00012c 000000 00  WA  0   0  4
-ELF32-NEXT:      [ 4] .bss              NOBITS          00000000 00012c 000000 00  WA  0   0  4
-ELF32-NEXT:      [ 5] .shstrtab         STRTAB          00000000 00012c 000030 00      0   0  1
-ELF32-NEXT:      [ 6] .symtab           SYMTAB          00000000 00029c 000060 10      7   4  4
-ELF32-NEXT:      [ 7] .strtab           STRTAB          00000000 0002fc 00001b 00      0   0  1
-ELF32-NEXT:    Key to Flags:
-ELF32-NEXT:      W (write), A (alloc), X (execute), M (merge), S (strings), l (large)
-ELF32-NEXT:      I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
-ELF32-NEXT:      O (extra OS processing required) o (OS specific), p (processor specific)
+# ELF32:      There are 8 section headers, starting at offset 0x34:
+# ELF32-EMPTY:
+# ELF32-NEXT: Section Headers:
+# ELF32-NEXT:   [Nr] Name              Type            Address  Off    Size   ES Flg Lk Inf Al
+# ELF32-NEXT:   [ 0]                   NULL            00000000 000000 000000 00      0   0  0
+# ELF32-NEXT:   [ 1] .text             PROGBITS        00000000 000174 000001 00  AX  0   0  4
+# ELF32-NEXT:   [ 2] .rel.text         REL             00000000 000178 000000 08      5   1  4
+# ELF32-NEXT:   [ 3] .data             PROGBITS        00000000 000178 000000 00  WA  0   0  4
+# ELF32-NEXT:   [ 4] .bss              NOBITS          00000000 000178 000000 00  WA  0   0  4
+# ELF32-NEXT:   [ 5] .symtab           SYMTAB          00000000 000178 000020 10      6   2  8
+# ELF32-NEXT:   [ 6] .strtab           STRTAB          00000000 000198 000007 00      0   0  1
+# ELF32-NEXT:   [ 7] .shstrtab         STRTAB          00000000 00019f 000030 00      0   0  1
+# ELF32-NEXT: Key to Flags:
+# ELF32-NEXT:   W (write), A (alloc), X (execute), M (merge), S (strings), l (large)
+# ELF32-NEXT:   I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
+# ELF32-NEXT:   O (extra OS processing required) o (OS specific), p (processor specific)
 
-ELF64:    There are 8 section headers, starting at offset 0x180:
-ELF64:    Section Headers:
-ELF64-NEXT:      [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
-ELF64-NEXT:      [ 0]                   NULL            0000000000000000 000000 000000 00     0   0  0
-ELF64-NEXT:      [ 1] .text             PROGBITS        0000000000000000 000040 00010a 00  AX 0   0  4
-ELF64-NEXT:      [ 2] .rela.text        RELA            0000000000000000 000430 000390 18     6   1  8
-ELF64-NEXT:      [ 3] .data             PROGBITS        0000000000000000 00014c 000000 00  WA 0   0  4
-ELF64-NEXT:      [ 4] .bss              NOBITS          0000000000000000 00014c 000000 00  WA 0   0  4
-ELF64-NEXT:      [ 5] .shstrtab         STRTAB          0000000000000000 00014c 000031 00     0   0  1
-ELF64-NEXT:      [ 6] .symtab           SYMTAB          0000000000000000 000380 000090 18     7   4  8
-ELF64-NEXT:      [ 7] .strtab           STRTAB          0000000000000000 000410 00001b 00     0   0  1
-ELF64-NEXT:    Key to Flags:
-ELF64-NEXT:      W (write), A (alloc), X (execute), M (merge), S (strings), l (large)
-ELF64-NEXT:      I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
-ELF64-NEXT:      O (extra OS processing required) o (OS specific), p (processor specific)
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2LSB
+  OSABI:   ELFOSABI_GNU
+  Type:    ET_REL
+  Machine: EM_386
+Sections:
+  - Name:         .text
+    Type:         SHT_PROGBITS
+    Flags:        [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign: 0x0000000000000004
+    Content:      00
+  - Name:         .rel.text
+    Type:         SHT_REL
+    Link:         .symtab
+    AddressAlign: 0x0000000000000004
+    EntSize:      0x0000000000000008
+    Info:         .text
+    Relocations:
+  - Name:         .data
+    Type:         SHT_PROGBITS
+    Flags:        [ SHF_WRITE, SHF_ALLOC ]
+    AddressAlign: 0x0000000000000004
+    Content:      ''
+  - Name:         .bss
+    Type:         SHT_NOBITS
+    Flags:        [ SHF_WRITE, SHF_ALLOC ]
+    AddressAlign: 0x0000000000000004
+Symbols:
+  - Name:         .text
+    Type:         STT_SECTION
+    Section:      .text
+
+# RUN: yaml2obj --docnum=2 %s -o %t-x64.o
+# RUN: llvm-readelf -S %t-x64.o | FileCheck %s -check-prefix ELF64
+
+## Check that --wide is the same as -W and ignored and also
+## that --section is the same as -S.
+# RUN: llvm-readobj --wide --sections %t-x64.o --elf-output-style=GNU \
+# RUN:   | FileCheck %s -check-prefix ELF64
+# RUN: llvm-readobj -W --sections %t-x64.o --elf-output-style=GNU \
+# RUN:   | FileCheck %s -check-prefix ELF64
+# RUN: llvm-readelf -W -S %t-x64.o | FileCheck %s -check-prefix ELF64
+
+# ELF64:      There are 8 section headers, starting at offset 0x40:
+# ELF64-EMPTY: 
+# ELF64-NEXT: Section Headers:
+# ELF64-NEXT:   [Nr] Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# ELF64-NEXT:   [ 0]                   NULL            0000000000000000 000000 000000 00  0  0  0
+# ELF64-NEXT:   [ 1] .text             PROGBITS        0000000000000000 000240 000001 00  AX 0   0  4
+# ELF64-NEXT:   [ 2] .rela.text        RELA            0000000000000000 000248 000000 18  5  1  8
+# ELF64-NEXT:   [ 3] .data             PROGBITS        0000000000000000 000248 000000 00  WA 0   0  4
+# ELF64-NEXT:   [ 4] .bss              NOBITS          0000000000000000 000248 000000 00  WA 0   0  4
+# ELF64-NEXT:   [ 5] .symtab           SYMTAB          0000000000000000 000248 000030 18  6  2  8
+# ELF64-NEXT:   [ 6] .strtab           STRTAB          0000000000000000 000278 000007 00  0  0  1
+# ELF64-NEXT:   [ 7] .shstrtab         STRTAB          0000000000000000 00027f 000031 00  0  0  1
+# ELF64-NEXT: Key to Flags:
+# ELF64-NEXT:   W (write), A (alloc), X (execute), M (merge), S (strings), l (large)
+# ELF64-NEXT:   I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
+# ELF64-NEXT:   O (extra OS processing required) o (OS specific), p (processor specific)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  OSABI:   ELFOSABI_GNU
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:         .text
+    Type:         SHT_PROGBITS
+    Flags:        [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign: 0x0000000000000004
+    Content:      00
+  - Name:         .rela.text
+    Type:         SHT_RELA
+    Link:         .symtab
+    AddressAlign: 0x0000000000000008
+    EntSize:      0x0000000000000018
+    Info:         .text
+    Relocations:
+  - Name:         .data
+    Type:         SHT_PROGBITS
+    Flags:        [ SHF_WRITE, SHF_ALLOC ]
+    AddressAlign: 0x0000000000000004
+    Content:      ''
+  - Name:         .bss
+    Type:         SHT_NOBITS
+    Flags:        [ SHF_WRITE, SHF_ALLOC ]
+    AddressAlign: 0x0000000000000004
+Symbols:
+  - Name:    .text
+    Type:    STT_SECTION
+    Section: .text

From 3901dd3e4126d6349deb63686be99ac7fd7b94f6 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Mon, 3 Jun 2019 10:42:02 +0000
Subject: [PATCH 0896/1176] [AArch64][SVE2] Add CPU and arch directive tests

Summary:
This patch adds tests for directives .arch, .arch_extension and .cpu for
all features defined in Arm SVE2 architecture extension.

Reviewed By: chill

Differential Revision: https://reviews.llvm.org/D62602

llvm-svn: 362378
---
 .../MC/AArch64/SVE2/directive-arch-negative.s | 31 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/directive-arch.s    | 21 +++++++++++++
 .../SVE2/directive-arch_extension-negative.s  | 31 +++++++++++++++++++
 .../AArch64/SVE2/directive-arch_extension.s   | 21 +++++++++++++
 .../MC/AArch64/SVE2/directive-cpu-negative.s  | 31 +++++++++++++++++++
 llvm/test/MC/AArch64/SVE2/directive-cpu.s     | 21 +++++++++++++
 6 files changed, 156 insertions(+)
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-arch.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
 create mode 100644 llvm/test/MC/AArch64/SVE2/directive-cpu.s

diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
new file mode 100644
index 0000000000000..4b2ba039dc389
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv8-a+sve2
+.arch armv8-a+nosve2
+tbx z0.b, z1.b, z2.b
+// CHECK: error: instruction requires: sve2
+// CHECK-NEXT: tbx z0.b, z1.b, z2.b
+
+.arch armv8-a+sve2-aes
+.arch armv8-a+nosve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: error: instruction requires: sve2-aes
+// CHECK-NEXT: aesd z23.b, z23.b, z13.b
+
+.arch armv8-a+sve2-sm4
+.arch armv8-a+nosve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: error: instruction requires: sve2-sm4
+// CHECK-NEXT: sm4e z0.s, z0.s, z0.s
+
+.arch armv8-a+sve2-sha3
+.arch armv8-a+nosve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: error: instruction requires: sve2-sha3
+// CHECK-NEXT: rax1 z0.d, z0.d, z0.d
+
+.arch armv8-a+bitperm
+.arch armv8-a+nobitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: error: instruction requires: bitperm
+// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch.s b/llvm/test/MC/AArch64/SVE2/directive-arch.s
new file mode 100644
index 0000000000000..94ef64700755c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv8-a+sve2
+tbx z0.b, z1.b, z2.b
+// CHECK: tbx z0.b, z1.b, z2.b
+
+.arch armv8-a+sve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: aesd z23.b, z23.b, z13.b
+
+.arch armv8-a+sve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: sm4e z0.s, z0.s, z0.s
+
+.arch armv8-a+sve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: rax1 z0.d, z0.d, z0.d
+
+.arch armv8-a+bitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: bgrp z21.s, z10.s, z21.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
new file mode 100644
index 0000000000000..5db80e11a9100
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch_extension sve2
+.arch_extension nosve2
+tbx z0.b, z1.b, z2.b
+// CHECK: error: instruction requires: sve2
+// CHECK-NEXT: tbx z0.b, z1.b, z2.b
+
+.arch_extension sve2-aes
+.arch_extension nosve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: error: instruction requires: sve2-aes
+// CHECK-NEXT: aesd z23.b, z23.b, z13.b
+
+.arch_extension sve2-sm4
+.arch_extension nosve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: error: instruction requires: sve2-sm4
+// CHECK-NEXT: sm4e z0.s, z0.s, z0.s
+
+.arch_extension sve2-sha3
+.arch_extension nosve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: error: instruction requires: sve2-sha3
+// CHECK-NEXT: rax1 z0.d, z0.d, z0.d
+
+.arch_extension bitperm
+.arch_extension nobitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: error: instruction requires: bitperm
+// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
new file mode 100644
index 0000000000000..257f5721d720b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch_extension sve2
+tbx z0.b, z1.b, z2.b
+// CHECK: tbx z0.b, z1.b, z2.b
+
+.arch_extension sve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: aesd z23.b, z23.b, z13.b
+
+.arch_extension sve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: sm4e z0.s, z0.s, z0.s
+
+.arch_extension sve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: rax1 z0.d, z0.d, z0.d
+
+.arch_extension bitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: bgrp z21.s, z10.s, z21.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
new file mode 100644
index 0000000000000..542a6f692ca3e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.cpu generic+sve2
+.cpu generic+nosve2
+tbx z0.b, z1.b, z2.b
+// CHECK: error: instruction requires: sve2
+// CHECK-NEXT: tbx z0.b, z1.b, z2.b
+
+.cpu generic+sve2-aes
+.cpu generic+nosve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: error: instruction requires: sve2-aes
+// CHECK-NEXT: aesd z23.b, z23.b, z13.b
+
+.cpu generic+sve2-sm4
+.cpu generic+nosve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: error: instruction requires: sve2-sm4
+// CHECK-NEXT: sm4e z0.s, z0.s, z0.s
+
+.cpu generic+sve2-sha3
+.cpu generic+nosve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: error: instruction requires: sve2-sha3
+// CHECK-NEXT: rax1 z0.d, z0.d, z0.d
+
+.cpu generic+bitperm
+.cpu generic+nobitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: error: instruction requires: bitperm
+// CHECK-NEXT: bgrp z21.s, z10.s, z21.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu.s b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
new file mode 100644
index 0000000000000..a8ca7b389e942
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.cpu generic+sve2
+tbx z0.b, z1.b, z2.b
+// CHECK: tbx z0.b, z1.b, z2.b
+
+.cpu generic+sve2-aes
+aesd z23.b, z23.b, z13.b
+// CHECK: aesd z23.b, z23.b, z13.b
+
+.cpu generic+sve2-sm4
+sm4e z0.s, z0.s, z0.s
+// CHECK: sm4e z0.s, z0.s, z0.s
+
+.cpu generic+sve2-sha3
+rax1 z0.d, z0.d, z0.d
+// CHECK: rax1 z0.d, z0.d, z0.d
+
+.cpu generic+bitperm
+bgrp z21.s, z10.s, z21.s
+// CHECK: bgrp z21.s, z10.s, z21.s

From d8d3e17b8b07a34e6118a8b6cd66aea657265531 Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen@ericsson.com>
Date: Mon, 3 Jun 2019 10:50:41 +0000
Subject: [PATCH 0897/1176] Fix compilation warning about unused variable [NFC]

llvm-svn: 362379
---
 clang/lib/Driver/Driver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index a57c66403a28a..e28ff55147a0e 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -1007,7 +1007,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   // Check for working directory option before accessing any files
   if (Arg *WD = Args.getLastArg(options::OPT_working_directory))
-    if (std::error_code EC = VFS->setCurrentWorkingDirectory(WD->getValue()))
+    if (VFS->setCurrentWorkingDirectory(WD->getValue()))
       Diag(diag::err_drv_unable_to_set_working_directory) << WD->getValue();
 
   // FIXME: This stuff needs to go into the Compilation, not the driver.

From dc83a3c44940ca4d13839613dc55049e5eddc42e Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Mon, 3 Jun 2019 11:02:53 +0000
Subject: [PATCH 0898/1176] [ARM] Fix recent breakage of -mfpu=none.

The recent change D60691 introduced a bug in clang when handling
option combinations such as `-mcpu=cortex-m4 -mfpu=none`. Those
options together should select Cortex-M4 but disable all use of
hardware FP, but in fact, now hardware FP instructions can still be
generated in that mode.

The reason is because the handling of FPUVersion::NONE disables all
the same feature names it used to, of which the base one is `vfp2`.
But now there are further features below that, like `vfp2d16fp` and
(following D60694) `fpregs`, which also need to be turned off to
disable hardware FP completely.

Added a tiny test which double-checks that compiling a simple FP
function doesn't access the FP registers.

Reviewers: SjoerdMeijer, dmgreen

Reviewed By: dmgreen

Subscribers: lebedev.ri, javed.absar, kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62729

llvm-svn: 362380
---
 clang/lib/Driver/ToolChains/Arch/ARM.cpp | 9 +++++++--
 clang/test/CodeGen/arm-mfpu-none.c       | 8 ++++++++
 clang/test/Driver/arm-mfpu.c             | 2 ++
 llvm/lib/Support/ARMTargetParser.cpp     | 1 +
 4 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/arm-mfpu-none.c

diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 77fad7ed68f91..e38ce4d583fa7 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -430,8 +430,8 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
     llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features);
 
     // Disable hardware FP features which have been enabled.
-    // FIXME: Disabling vfp2 and neon should be enough as all the other
-    //        features are dependent on these 2 features in LLVM. However
+    // FIXME: Disabling fpregs should be enough all by itself, since all
+    //        the other FP features are dependent on it. However
     //        there is currently no easy way to test this in clang, so for
     //        now just be explicit and disable all known dependent features
     //        as well.
@@ -439,6 +439,11 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
                                 "neon", "crypto", "dotprod", "fp16fml"})
       if (std::find(std::begin(Features), std::end(Features), "+" + Feature) != std::end(Features))
         Features.push_back(Args.MakeArgString("-" + Feature));
+
+    // Disable the base feature unconditionally, even if it was not
+    // explicitly in the features list (e.g. if we had +vfp3, which
+    // implies it).
+    Features.push_back("-fpregs");
   }
 
   // En/disable crc code generation.
diff --git a/clang/test/CodeGen/arm-mfpu-none.c b/clang/test/CodeGen/arm-mfpu-none.c
new file mode 100644
index 0000000000000..ae4b07d863813
--- /dev/null
+++ b/clang/test/CodeGen/arm-mfpu-none.c
@@ -0,0 +1,8 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target arm-none-eabi -mcpu=cortex-m4 -mfpu=none -S -o - %s | FileCheck %s
+
+// CHECK-LABEL: compute
+// CHECK-NOT: {{s[0-9]}}
+float compute(float a, float b) {
+  return (a+b) * (a-b);
+}
diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c
index 33cad80bf1492..921afa227ce28 100644
--- a/clang/test/Driver/arm-mfpu.c
+++ b/clang/test/Driver/arm-mfpu.c
@@ -318,6 +318,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FP %s
 // CHECK-NO-FP-NOT: "-target-feature" "+soft-float"
 // CHECK-NO-FP: "-target-feature" "+soft-float-abi"
+// CHECK-NO-FP: "-target-feature" "-fpregs"
 // CHECK-NO-FP: "-target-feature" "-vfp2"
 // CHECK-NO-FP: "-target-feature" "-vfp3"
 // CHECK-NO-FP: "-target-feature" "-vfp4"
@@ -363,6 +364,7 @@
 // CHECK-SOFT-ABI-FP: "-target-feature" "-fp-armv8"
 // CHECK-SOFT-ABI-FP: "-target-feature" "-neon"
 // CHECK-SOFT-ABI-FP: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-fpregs"
 
 // RUN: %clang -target arm-linux-androideabi21 %s -### -c 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM5-ANDROID-FP-DEFAULT %s
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index a33f602e532bf..8806ea52fdf42 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -198,6 +198,7 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
     Features.push_back("-fp-armv8");
     break;
   case FPUVersion::NONE:
+    Features.push_back("-fpregs");
     Features.push_back("-vfp2");
     Features.push_back("-vfp3");
     Features.push_back("-fp16");

From beb7f48a29859ed316e7c8f74ba09684fa163020 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:14:05 +0000
Subject: [PATCH 0899/1176] Include what you use in HexagonMCShuffler.cpp

HexagonMCShuffler.cpp was not using any APIs from Hexagon.h.  Doing so
is problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362381
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index dc6f19432a049..7d45b4fcfdde5 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -14,7 +14,6 @@
 #define DEBUG_TYPE "hexagon-shuffle"
 
 #include "MCTargetDesc/HexagonMCShuffler.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonShuffler.h"
 #include "llvm/MC/MCInst.h"

From bf2a356ec040bc7f0b3f16a4de4e71579a6c0ec5 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:14:10 +0000
Subject: [PATCH 0900/1176] Include what you use in HexagonMCTargetDesc.cpp

HexagonMCTargetDesc.cpp was not using any APIs from Hexagon.h.  Doing so
is problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362382
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 03abe7b904478..9c50b25156c38 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "Hexagon.h"
 #include "HexagonDepArch.h"
 #include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"

From 6214b577b74f286965fae4f6deb5e22f098d487c Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:14:15 +0000
Subject: [PATCH 0901/1176] Include what you use in HexagonMCChecker.cpp

HexagonMCChecker.cpp was not using any APIs from Hexagon.h.  Doing so is
problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362383
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 10041aa8af9e2..fcd3758600c1a 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCChecker.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"

From 6e076a081a4ba6894a00288035cf14660d807534 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:14:20 +0000
Subject: [PATCH 0902/1176] Include what you use in HexagonShuffler.cpp

HexagonShuffler.cpp was not using any APIs from Hexagon.h, and was only
including it for transitive dependencies.  Doing so is problematic from
include-what-you-use perspective, but it is also a layering issue (it
creates a dependency cycle between the primary Hexagon target library
and the MCTargetDesc library).

llvm-svn: 362384
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 5514a9803ae43..18c7790a17cc1 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -14,7 +14,6 @@
 #define DEBUG_TYPE "hexagon-shuffle"
 
 #include "MCTargetDesc/HexagonShuffler.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
@@ -22,6 +21,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"

From ebe360edfa92b54a467621904f5bf1bd11990dd6 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:20:48 +0000
Subject: [PATCH 0903/1176] Include what you use in HexagonMCCompound.cpp

HexagonMCCompound.cpp was not using any APIs from Hexagon.h.  Doing so
is problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362385
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index f9c046b7bfa7d..ed571188c1e83 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"

From 970b9f961fbb7d00c47661cc589a4d599ccba143 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:20:53 +0000
Subject: [PATCH 0904/1176] Include what you use in HexagonMCCodeEmitter.cpp

HexagonMCCodeEmitter.cpp was not using any APIs from Hexagon.h.  Doing
so is problematic from include-what-you-use perspective, but it is also
a layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362386
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 84b9a05b1db9f..95e23c99868a4 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonFixupKinds.h"
 #include "MCTargetDesc/HexagonMCExpr.h"

From 03d1b33041b9b16c1ee0799dfef5f0adaa13b409 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:25:37 +0000
Subject: [PATCH 0905/1176] Include what you use in HexagonMCInstrInfo.cpp

HexagonMCInstrInfo.cpp was not using any APIs from Hexagon.h.  Doing so
is problematic from include-what-you-use perspective, but it is also a
layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362387
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 6c0a3b644e9d8..0750bfe74f760 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCExpr.h"

From 61b49ccb770504fe38faaa86152f71909da23dfc Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:41:18 +0000
Subject: [PATCH 0906/1176] Include what you use in HexagonAsmPrinter.h

llvm-svn: 362388
---
 llvm/lib/Target/Hexagon/HexagonAsmPrinter.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index a3301d7516a54..6c4b664e83f52 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
 
-#include "Hexagon.h"
 #include "HexagonSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"

From 8668fc0102a3df25c40ac214017a11d081b6ad40 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 11:41:22 +0000
Subject: [PATCH 0907/1176] Include what you use in HexagonInstPrinter.cpp

HexagonInstPrinter.cpp was not using any APIs from HexagonAsmPrinter.h.
Doing so is problematic from include-what-you-use perspective, but it is
also a layering issue (it creates a dependency cycle between the primary
Hexagon target library and the MCTargetDesc library).

llvm-svn: 362389
---
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 7adddf1f2ac1e..6b9e63f5ac9e2 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonInstPrinter.h"
-#include "HexagonAsmPrinter.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCAsmInfo.h"

From edfa756f3f5b9e31c69a34542d1b09c947f5dd2b Mon Sep 17 00:00:00 2001
From: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Mon, 3 Jun 2019 12:07:41 +0000
Subject: [PATCH 0908/1176] AMDGPU/GFX10: V_CMPX_xxx instructions still have an
 omod operand

Summary: Change-Id: If6ee98e4a723b643bc37254fc6ef8b3812db16da

Reviewers: rampitec

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62720

Change-Id: Id547ef152b2f92b24dc1c0efbf7e4467c4fb4b6e
llvm-svn: 362390
---
 llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index a73350caaf7b1..fd0951e7545e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -270,8 +270,7 @@ void SIInsertSkips::kill(MachineInstr &MI) {
         .addImm(0)  // src1 modifiers
         .add(MI.getOperand(0));
 
-      if (!ST.hasNoSdstCMPX())
-        I.addImm(0);  // omod
+      I.addImm(0);  // omod
     }
     break;
   }

From fa8cd7691ac28d07f6a127ed26f0dbe49699bd59 Mon Sep 17 00:00:00 2001
From: Andrew Savonichev <andrew.savonichev@intel.com>
Date: Mon, 3 Jun 2019 12:34:59 +0000
Subject: [PATCH 0909/1176] [OpenCL] Use long instead of long long in x86
 builtins

Summary: According to C99 standard long long is at least 64 bits in
size. However, OpenCL C defines long long as 128 bit signed
integer. This prevents one to use x86 builtins when compiling OpenCL C
code for x86 targets. The patch changes long long to long for OpenCL
only.

Patch by: Alexander Batashev <alexander.batashev@intel.com>

Reviewers: craig.topper, Ka-Ka, eandrews, erichkeane, Anastasia

Reviewed By: Ka-Ka, erichkeane, Anastasia

Subscribers: a.elovikov, yaxunl, Anastasia, cfe-commits, ivankara, etyurin, asavonic

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62580

llvm-svn: 362391
---
 clang/include/clang/Basic/Builtins.def       |   1 +
 clang/include/clang/Basic/BuiltinsX86.def    | 834 +++++++++----------
 clang/include/clang/Basic/BuiltinsX86_64.def | 104 +--
 clang/lib/AST/ASTContext.cpp                 |  19 +-
 clang/test/CodeGen/builtins-x86.c            |  58 +-
 5 files changed, 532 insertions(+), 484 deletions(-)

diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index dc1ed8a5170c3..a9ca9abf50cc1 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -53,6 +53,7 @@
 //  Z   -> int32_t (require a native 32-bit integer type on the target)
 //  W   -> int64_t (require a native 64-bit integer type on the target)
 //  N   -> 'int' size if target is LP64, 'L' otherwise.
+//  O   -> long for OpenCL targets, long long otherwise.
 //  S   -> signed
 //  U   -> unsigned
 //  I   -> Required to constant fold to an integer constant expression.
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 47f79b90fc21a..f67f33007ac5f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -101,24 +101,24 @@ TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pand, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pandn, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_por, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pxor, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllq, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlq, "V1OiV1OiV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1Oi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1Oi", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllqi, "V1OiV1Oii", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "ncV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1OiV1Oii", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "ncV:64:", "mmx")
@@ -137,7 +137,7 @@ TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "nV:64:", "mmx")
-TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "nV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_movntq, "vV1Oi*V1Oi", "nV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "ncV:64:", "mmx")
 TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "ncV:64:", "mmx")
@@ -164,9 +164,9 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "ncV:64:", "mmx,sse")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "ncV:64:", "mmx,sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "ncV:64:", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_paddq, "V1OiV1OiV1Oi", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1OiV2iV2i", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_psubq, "V1OiV1OiV1Oi", "ncV:64:", "mmx,sse2")
 
 // MMX+SSSE3
 TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "ncV:64:", "mmx,ssse3")
@@ -326,11 +326,11 @@ TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2OiV16cV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2OiV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2")
@@ -346,26 +346,26 @@ TARGET_BUILTIN(__builtin_ia32_mfence, "v", "n", "sse2")
 TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2")
 TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "")
 TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2OiV4iV4i", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllq128, "V2OiV2OiV2Oi", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2OiV2Oii", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2OiV2Oii", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2OiV2OiIi", "ncV:128:", "sse2")
 
 TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "n", "sse3")
 TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3")
@@ -390,16 +390,16 @@ TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2OiV2Oi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1")
@@ -428,30 +428,30 @@ TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
 TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
 
 // SSE4a
-TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "ncV:128:", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_extrqi, "V2OiV2OiIcIc", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_extrq, "V2OiV2OiV16c", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertqi, "V2OiV2OiV2OiIcIc", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertq, "V2OiV2OiV2Oi", "ncV:128:", "sse4a")
 TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a")
 TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a")
 
 // AES
-TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "ncV:128:", "aes")
-TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2OiV2OiV2Oi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2OiV2Oi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2OiV2OiIc", "ncV:128:", "aes")
 
 // VAES
-TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4OiV4OiV4Oi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f,vaes")
 
 // GFNI
 TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
@@ -465,11 +465,11 @@ TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx
 TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512bw,gfni")
 
 // CLMUL
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "ncV:128:", "pclmul")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2OiV2OiV2OiIc", "ncV:128:", "pclmul")
 
 // VPCLMULQDQ
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "ncV:256:", "vpclmulqdq")
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "ncV:512:", "avx512f,vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4OiV4OiV4OiIc", "ncV:256:", "vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,vpclmulqdq")
 
 // AVX
 TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx")
@@ -482,9 +482,9 @@ TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2Oi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4Oi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "ncV:256:", "avx")
@@ -535,21 +535,21 @@ TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4OiV4Oi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4OiV4Oi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4OiV4Oi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx")
 TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx")
 TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2Oi", "nV:128:", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4Oi", "nV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2OiV2d", "nV:128:", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4OiV4d", "nV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx")
@@ -601,12 +601,12 @@ TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4OiV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4OiV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "ncV:256:", "avx2")
@@ -616,68 +616,68 @@ TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4OiV4OiIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4OiV4Oii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllq256, "V4OiV4OiV2Oi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4OiV4OiIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4OiV4Oii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4OiV4OiV2Oi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permti256, "V4OiV4OiV4OiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permdi256, "V4OiV4OiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2OiV4OiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4OiV4OiV2OiIi", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4OiV4OiC*V4Oi", "nV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2OiV2OiC*V2Oi", "nV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4Oi*V4OiV4Oi", "nV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2Oi*V2OiV2Oi", "nV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4OiV4OiV4Oi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2OiV2OiV2Oi", "ncV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "ncV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "ncV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4OiV4OiV4Oi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2OiV2OiV2Oi", "ncV:128:", "avx2")
 
 // GATHER
 TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "nV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2OiV2dIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4OiV4dIc", "nV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "nV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2OiV4fIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4OiV4fIc", "nV:256:", "avx2")
 
-TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2OiV2OiOiC*V4iV2OiIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4OiV4OiOiC*V4iV4OiIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2OiV2OiOiC*V2OiV2OiIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4OiV4OiOiC*V4OiV4OiIc", "nV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "nV:128:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "nV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "nV:128:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2OiV4iIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4OiV4iIc", "nV:256:", "avx2")
 
 // F16C
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "ncV:128:", "f16c")
@@ -694,16 +694,16 @@ TARGET_BUILTIN(__builtin_ia32_fxrstor, "vv*", "n", "fxsr")
 TARGET_BUILTIN(__builtin_ia32_fxsave, "vv*", "n", "fxsr")
 
 // XSAVE
-TARGET_BUILTIN(__builtin_ia32_xsave, "vv*ULLi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*ULLi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xgetbv, "ULLiUi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xsave, "vv*UOi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*UOi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xgetbv, "UOiUi", "n", "xsave")
 TARGET_HEADER_BUILTIN(_xgetbv, "UWiUi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_xsetbv, "vUiULLi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xsetbv, "vUiUOi", "n", "xsave")
 TARGET_HEADER_BUILTIN(_xsetbv, "vUiUWi", "nh", "immintrin.h", ALL_MS_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*ULLi", "n", "xsaveopt")
-TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*ULLi", "n", "xsaves")
-TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*ULLi", "n", "xsavec")
-TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "n", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*UOi", "n", "xsaveopt")
+TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*UOi", "n", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*UOi", "n", "xsavec")
+TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*UOi", "n", "xsaves")
 
 // SHSTK
 TARGET_BUILTIN(__builtin_ia32_incsspd, "vUi", "n", "shstk")
@@ -803,55 +803,55 @@ TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2OiV4iV4iV2Oi", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
 
 TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2OiV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2OiV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2OiV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2OiV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2OiV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2OiV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2OiV4i", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2OiV2OiIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2OiV2OiV2Oi", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2OiV2OiV2OiIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "ncV:128:", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "ncV:256:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2OiV2OiV2OiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2OiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4OiIc", "ncV:256:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "ncV:128:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "ncV:256:", "xop")
 TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "ncV:128:", "xop")
@@ -866,10 +866,10 @@ TARGET_BUILTIN(__builtin_ia32_xend, "v", "n", "rtm")
 TARGET_BUILTIN(__builtin_ia32_xabort, "vIc", "n", "rtm")
 TARGET_BUILTIN(__builtin_ia32_xtest, "i", "n", "rtm")
 
-BUILTIN(__builtin_ia32_rdpmc, "ULLii", "")
-BUILTIN(__builtin_ia32_rdtsc, "ULLi", "")
-BUILTIN(__rdtsc, "ULLi", "")
-BUILTIN(__builtin_ia32_rdtscp, "ULLiUi*", "")
+BUILTIN(__builtin_ia32_rdpmc, "UOii", "")
+BUILTIN(__builtin_ia32_rdtsc, "UOi", "")
+BUILTIN(__rdtsc, "UOi", "")
+BUILTIN(__builtin_ia32_rdtscp, "UOiUi*", "")
 
 TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "n", "rdpid")
 
@@ -930,35 +930,35 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "av
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8OiOiC*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vOi*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f")
 
@@ -975,76 +975,76 @@ TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512v
 TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
 TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
 
-TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2LLiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLivC*V2LLiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4dvC*V4LLiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLivC*V4LLiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4fvC*V2LLiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4ivC*V2LLiUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4fvC*V4LLiUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4ivC*V4LLiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2dvC*V2OiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2OiV2OivC*V2OiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4dvC*V4OiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4OiV4OivC*V4OiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4fvC*V2OiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4ivC*V2OiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4fvC*V4OiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4ivC*V4OiUcIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2dvC*V4iUcIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLivC*V4iUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2OiV2OivC*V4iUcIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4dvC*V4iUcIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLivC*V4iUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4OiV4OivC*V4iUcIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4fvC*V4iUcIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4ivC*V4iUcIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8fvC*V8iUcIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8ivC*V8iUcIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*V16iUsIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8LLiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8LLiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLivC*V8iUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8OiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8OiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8OiV8OivC*V8iUcIi", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*V16iUsIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLivC*V8LLiUcIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8LLiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8OiV8OivC*V8OiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8OiUcIi", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8LLiV8dIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8LLiV8fIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8LLiIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8OiV8dIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8OiV8fIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8OiIi", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8LLiV8LLiIi", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8LLiV8iIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f")
 
 TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "nV:512:", "avx512pf")
 TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8LLivC*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8LLivC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf")
 TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf")
 TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiv*IiIi", "nV:512:", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLiv*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf")
 
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "ULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2OiV2OiIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2OiV2OiIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "UOiV64cV64cIiUOi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
@@ -1073,21 +1073,21 @@ TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubusb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2LLiV2LLi", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4LLiV4LLi", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512, "V8LLiV8LLi", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512, "V8OiV8Oi", "ncV:512:", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512cd")
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd")
 
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2OiV2Oi", "ncV:128:", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4OiV4Oi", "ncV:256:", "avx512vpopcntdq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8OiV8Oi", "ncV:512:", "avx512vpopcntdq")
 
 TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg")
@@ -1098,7 +1098,7 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bital
 
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
 TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "ncV:512:", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg")
 
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
@@ -1131,8 +1131,8 @@ TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:",
 
 TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2OiV2OiV2OiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4OiV4OiV4OiUc", "ncV:256:", "avx512vl")
 
 TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
@@ -1145,8 +1145,8 @@ TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "ncV:128:", "av
 TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
 
 TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
@@ -1170,8 +1170,8 @@ TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "av
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2OiV2OiV2OiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4OiV4OiV4OiUc", "ncV:256:", "avx512vl")
 
 TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
@@ -1180,8 +1180,8 @@ TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "ncV:256:", "a
 
 TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2OiC*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
 
 TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
@@ -1200,16 +1200,16 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
@@ -1219,18 +1219,18 @@ TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "ncV:256:", "a
 TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512vl")
 
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vv*UcV2LLiV2dIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vv*UcV2LLiV2LLiIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vv*UcV4LLiV4dIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vv*UcV4LLiV4LLiIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vv*UcV2LLiV4fIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vv*UcV2LLiV4iIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vv*UcV4LLiV4fIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vv*UcV4LLiV4iIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vv*UcV2OiV2dIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vv*UcV2OiV2OiIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vv*UcV4OiV4dIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vv*UcV4OiV4OiIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vv*UcV2OiV4fIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vv*UcV2OiV4iIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vv*UcV4OiV4fIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vv*UcV4OiV4iIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vv*UcV4iV2dIi", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vv*UcV4iV2LLiIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vv*UcV4iV2OiIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vv*UcV4iV4dIi", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vv*UcV4iV4LLiIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vv*UcV4iV4OiIi", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vv*UcV4iV4fIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vv*UcV4iV4iIi", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vv*UcV8iV8fIi", "nV:256:", "avx512vl")
@@ -1239,15 +1239,15 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vv*UcV8iV8iIi", "nV:256:", "avx512
 TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2OiV2d", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4OiV4d", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8OiV8d", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi")
@@ -1258,9 +1258,9 @@ TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "
 TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
@@ -1268,9 +1268,9 @@ TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512v
 TARGET_BUILTIN(__builtin_ia32_vpshldvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshldvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2")
@@ -1278,9 +1278,9 @@ TARGET_BUILTIN(__builtin_ia32_vpshldvw512, "V32sV32sV32sV32s", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_vpshrdvd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdvw512, "V32sV32sV32sV32s", "ncV:512:", "avx512vbmi2")
@@ -1288,9 +1288,9 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdvw512, "V32sV32sV32sV32s", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2OiV2OiV2OiIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8OiV8OiV8OiIi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
@@ -1298,24 +1298,24 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512v
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2OiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2OiV2dV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4OiV4dV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2OiV4fV2OiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4OiV4fV4OiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2OiV4fUc", "ncV:128:", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq")
@@ -1333,46 +1333,46 @@ TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "av
 TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8OiV8dV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8OiV8fV8OiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8OiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8OiV8fUcIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolq512, "V8OiV8OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorq512, "V8OiV8OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorq128, "V2OiV2OiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorq256, "V4OiV4OiIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
@@ -1381,53 +1381,53 @@ TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psravq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psravq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8OiV8OiIi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2OiV2OiC*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cUOi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8OiIiUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2OiIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
@@ -1438,10 +1438,10 @@ TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "nV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "nV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "nV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "nV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2OiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2OiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4OiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl")
@@ -1452,8 +1452,8 @@ TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx51
 TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2OiV2Oi*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4OiV4Oi*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl")
@@ -1461,7 +1461,7 @@ TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx51
 TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw")
@@ -1472,8 +1472,8 @@ TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "nV:256:", "avx512
 TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "nV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
@@ -1486,8 +1486,8 @@ TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx51
 TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2OiV2Oi", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4OiV4Oi", "ncV:256:", "avx512cd,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "ncV:128:", "avx512f")
@@ -1498,7 +1498,7 @@ TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f")
@@ -1507,58 +1507,58 @@ TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512
 TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8OiV8Oii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psraq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraq256, "V4OiV4OiV2Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2OiV2Oii", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4OiV4Oii", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psraq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8OiV8OiV2Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8OiV8OiV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2OiV2OiV2OiV2OiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2OiV2OiV2OiV2OiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4OiV4OiV4OiV4OiIiUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8OiV8OiV8OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4OiV4OiV4OiIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "UOiV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cUOi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8OiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8Oi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl")
@@ -1569,21 +1569,21 @@ TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "ncV:128:", "avx512dq,avx51
 TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "ncV:128:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "ncV:128:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2OiUc", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4OiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2Oi", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4Oi", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
@@ -1594,29 +1594,29 @@ TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx51
 TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4OiV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
@@ -1627,29 +1627,29 @@ TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4OiV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8OiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8OiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8OiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8OiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
@@ -1660,35 +1660,35 @@ TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512
 TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2OiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4OiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2OiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2OiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2OiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4OiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2OiV8OiIiV2OiUc", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4OiV8OiIiV4OiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2OiV4OiIiV2OiUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8OiV8OiV2OiIi", "ncV:512:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8OiV8OiV4OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "ncV:256:", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4OiV4OiV2OiIi", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f")
@@ -1710,10 +1710,10 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "
 TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permdi512, "V8OiV8OiIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
@@ -1721,8 +1721,8 @@ TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512v
 TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl")
@@ -1734,87 +1734,87 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
 TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iULLiULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iULLiULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "ULLiULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "ULLiULLiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "ULLiULLiIUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "ULLiULLi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8OiV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8OiV8OiV8OiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cUOi", "ncV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8OiV8OiC*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8Oi*V8OiUc", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cUOi", "nV:512:", "avx512vbmi2")
 TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl")
@@ -1850,16 +1850,16 @@ TARGET_BUILTIN(__builtin_ia32_vp2intersect_d_128, "vV4iV4iUc*Uc*", "nV:128:", "a
 // generic select intrinsics
 TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cUOiV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2OiUcV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4OiUcV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8OiUcV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 59b60c3be74b8..56051af55e7d5 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -43,65 +43,65 @@ TARGET_HEADER_BUILTIN(_InterlockedOr64,          "LLiLLiD*LLi", "nh", "intrin.h"
 TARGET_HEADER_BUILTIN(_InterlockedXor64,         "LLiLLiD*LLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128, "UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "cx16")
 
-TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "")
-TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "")
-TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "ncV:256:", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "UOi", "n", "")
+TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vUOi", "n", "")
+TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "OiV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "OiV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "OiV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "OiV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movnti64, "vOi*Oi", "n", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2OiV2OiOiIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_crc32di, "UOiUOiUOi", "nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "OiV4OiIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4OiV4OiOiIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
-TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "n", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "UOi", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "n", "fsgsbase")
-TARGET_BUILTIN(__builtin_ia32_rdgsbase64, "ULLi", "n", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_rdgsbase64, "UOi", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_wrfsbase32, "vUi", "n", "fsgsbase")
-TARGET_BUILTIN(__builtin_ia32_wrfsbase64, "vULLi", "n", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrfsbase64, "vUOi", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_wrgsbase32, "vUi", "n", "fsgsbase")
-TARGET_BUILTIN(__builtin_ia32_wrgsbase64, "vULLi", "n", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrgsbase64, "vUOi", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_fxrstor64, "vv*", "n", "fxsr")
 TARGET_BUILTIN(__builtin_ia32_fxsave64, "vv*", "n", "fxsr")
-TARGET_BUILTIN(__builtin_ia32_xsave64, "vv*ULLi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xrstor64, "vv*ULLi", "n", "xsave")
-TARGET_BUILTIN(__builtin_ia32_xsaveopt64, "vv*ULLi", "n", "xsaveopt")
-TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*ULLi", "n", "xsaves")
-TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*ULLi", "n", "xsavec")
-TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "n", "xsaves")
-TARGET_BUILTIN(__builtin_ia32_incsspq, "vULLi", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_rdsspq, "ULLiULLi", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_wrssq, "vULLiv*", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_wrussq, "vULLiv*", "n", "shstk")
-TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "n", "")
-TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcULLiULLiULLi*", "n", "")
-TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiULLi*", "n", "rdrnd")
-TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiULLi*", "n", "rdseed")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "ULLiULLi", "nc", "lzcnt")
-TARGET_BUILTIN(__builtin_ia32_bextr_u64, "ULLiULLiULLi", "nc", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "ULLiULLi", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "nc", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "nc", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "nc", "bmi2")
-TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "nc", "tbm")
-TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_xsave64, "vv*UOi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xrstor64, "vv*UOi", "n", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xsaveopt64, "vv*UOi", "n", "xsaveopt")
+TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*UOi", "n", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*UOi", "n", "xsavec")
+TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*UOi", "n", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_incsspq, "vUOi", "n", "shstk")
+TARGET_BUILTIN(__builtin_ia32_rdsspq, "UOiUOi", "n", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrssq, "vUOiv*", "n", "shstk")
+TARGET_BUILTIN(__builtin_ia32_wrussq, "vUOiv*", "n", "shstk")
+TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
+TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
+TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
+TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "nc", "bmi")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_bextri_u64, "UOiUOiIUOi", "nc", "tbm")
+TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcUOiUiUi", "n", "lwp")
+TARGET_BUILTIN(__builtin_ia32_lwpval64, "vUOiUiUi", "n", "lwp")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "OiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "UOiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "OiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "UOiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "OiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "UOiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "OiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "UOiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dUOiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fUOiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
-TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vULLi", "n", "ptwrite")
+TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
 
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 4f1df7cdf190a..f732a7531dbe0 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -9282,13 +9282,13 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       Unsigned = true;
       break;
     case 'L':
-      assert(!IsSpecial && "Can't use 'L' with 'W', 'N' or 'Z' modifiers");
+      assert(!IsSpecial && "Can't use 'L' with 'W', 'N', 'Z' or 'O' modifiers");
       assert(HowLong <= 2 && "Can't have LLLL modifier");
       ++HowLong;
       break;
     case 'N':
       // 'N' behaves like 'L' for all non LP64 targets and 'int' otherwise.
-      assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z' modifiers!");
+      assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
       assert(HowLong == 0 && "Can't use both 'L' and 'N' modifiers!");
       #ifndef NDEBUG
       IsSpecial = true;
@@ -9298,7 +9298,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       break;
     case 'W':
       // This modifier represents int64 type.
-      assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z'  modifiers!");
+      assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
       assert(HowLong == 0 && "Can't use both 'L' and 'W' modifiers!");
       #ifndef NDEBUG
       IsSpecial = true;
@@ -9316,7 +9316,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
       break;
     case 'Z':
       // This modifier represents int32 type.
-      assert(!IsSpecial && "Can't use two 'N', 'W' or 'Z' modifiers!");
+      assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
       assert(HowLong == 0 && "Can't use both 'L' and 'Z' modifiers!");
       #ifndef NDEBUG
       IsSpecial = true;
@@ -9335,6 +9335,17 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
         break;
       }
       break;
+    case 'O':
+      assert(!IsSpecial && "Can't use two 'N', 'W', 'Z' or 'O' modifiers!");
+      assert(HowLong == 0 && "Can't use both 'L' and 'O' modifiers!");
+      #ifndef NDEBUG
+      IsSpecial = true;
+      #endif
+      if (Context.getLangOpts().OpenCL)
+        HowLong = 1;
+      else
+        HowLong = 2;
+      break;
     }
   }
 
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index e237bc2d12b8a..8236922e3d3ae 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
 // RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +shstk -target-feature +clzero -target-feature +wbnoinvd -target-feature +cldemote -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -DOPENCL -x cl -cl-std=CL2.0 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -11,7 +12,11 @@
 typedef char V8c __attribute__((vector_size(8 * sizeof(char))));
 typedef signed short V4s __attribute__((vector_size(8)));
 typedef signed int V2i __attribute__((vector_size(8)));
+#ifndef OPENCL
 typedef signed long long V1LLi __attribute__((vector_size(8)));
+#else
+typedef signed long V1LLi __attribute__((vector_size(8)));
+#endif
 
 typedef float V2f __attribute__((vector_size(8)));
 
@@ -19,7 +24,11 @@ typedef float V2f __attribute__((vector_size(8)));
 typedef char V16c __attribute__((vector_size(16)));
 typedef signed short V8s __attribute__((vector_size(16)));
 typedef signed int V4i __attribute__((vector_size(16)));
+#ifndef OPENCL
 typedef signed long long V2LLi __attribute__((vector_size(16)));
+#else
+typedef signed long V2LLi __attribute__((vector_size(16)));
+#endif
 
 typedef float V4f __attribute__((vector_size(16)));
 typedef double V2d __attribute__((vector_size(16)));
@@ -27,7 +36,11 @@ typedef double V2d __attribute__((vector_size(16)));
 // 256-bit
 typedef char V32c __attribute__((vector_size(32)));
 typedef signed int V8i __attribute__((vector_size(32)));
+#ifndef OPENCL
 typedef signed long long V4LLi __attribute__((vector_size(32)));
+#else
+typedef signed long V4LLi __attribute__((vector_size(32)));
+#endif
 
 typedef double V4d __attribute__((vector_size(32)));
 typedef float  V8f __attribute__((vector_size(32)));
@@ -41,21 +54,30 @@ void f0() {
 #endif
   signed int          tmp_i;
   unsigned int        tmp_Ui;
+#ifndef OPENCL
   signed long long    tmp_LLi;
   unsigned long long  tmp_ULLi;
+#else
+  signed long         tmp_LLi;
+  unsigned long       tmp_ULLi;
+#endif
   float               tmp_f;
   double              tmp_d;
 
   void*          tmp_vp;
   const void*    tmp_vCp;
-  char*          tmp_cp; 
-  const char*    tmp_cCp; 
+  char*          tmp_cp;
+  const char*    tmp_cCp;
   int*           tmp_ip;
   float*         tmp_fp;
   const float*   tmp_fCp;
   double*        tmp_dp;
   const double*  tmp_dCp;
+#ifndef OPENCL
   long long*     tmp_LLip;
+#else
+  long*          tmp_LLip;
+#endif
 
 #define imm_i 32
 #define imm_i_0_2 0
@@ -102,8 +124,8 @@ void f0() {
   const V4d* tmp_V4dCp;
   const V8f* tmp_V8fCp;
 
-  tmp_V2LLi = __builtin_ia32_undef128();
-  tmp_V4LLi = __builtin_ia32_undef256();
+  tmp_V2d = __builtin_ia32_undef128();
+  tmp_V4d = __builtin_ia32_undef256();
 
   tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
@@ -203,9 +225,9 @@ void f0() {
   tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
-  tmp_V8s = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
+  tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
+  tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
+  tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_pmulhuw128(tmp_V8s, tmp_V8s);
   tmp_V4f = __builtin_ia32_addsubps(tmp_V4f, tmp_V4f);
   tmp_V2d = __builtin_ia32_addsubpd(tmp_V2d, tmp_V2d);
@@ -225,7 +247,7 @@ void f0() {
   tmp_V2i = __builtin_ia32_phsubd(tmp_V2i, tmp_V2i);
   tmp_V8s = __builtin_ia32_phsubsw128(tmp_V8s, tmp_V8s);
   tmp_V4s = __builtin_ia32_phsubsw(tmp_V4s, tmp_V4s);
-  tmp_V16c = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
+  tmp_V8s = __builtin_ia32_pmaddubsw128(tmp_V16c, tmp_V16c);
   tmp_V8c = __builtin_ia32_pmaddubsw(tmp_V8c, tmp_V8c);
   tmp_V8s = __builtin_ia32_pmulhrsw128(tmp_V8s, tmp_V8s);
   tmp_V4s = __builtin_ia32_pmulhrsw(tmp_V4s, tmp_V4s);
@@ -271,9 +293,13 @@ void f0() {
   __builtin_ia32_clrssbsy(tmp_vp);
 
   (void) __builtin_ia32_ldmxcsr(tmp_Ui);
+#ifndef OPENCL
   (void) _mm_setcsr(tmp_Ui);
+#endif
   tmp_Ui = __builtin_ia32_stmxcsr();
+#ifndef OPENCL
   tmp_Ui = _mm_getcsr();
+#endif
   (void)__builtin_ia32_fxsave(tmp_vp);
   (void)__builtin_ia32_fxsave64(tmp_vp);
   (void)__builtin_ia32_fxrstor(tmp_vp);
@@ -321,7 +347,9 @@ void f0() {
   tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
   (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
   (void) __builtin_ia32_sfence();
+#ifndef OPENCL
   (void) _mm_sfence();
+#endif
 
   tmp_V4s = __builtin_ia32_psadbw(tmp_V8c, tmp_V8c);
   tmp_V4f = __builtin_ia32_rcpps(tmp_V4f);
@@ -356,13 +384,21 @@ void f0() {
   tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f);
   tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f);
   (void) __builtin_ia32_clflush(tmp_vCp);
+#ifndef OPENCL
   (void) _mm_clflush(tmp_vCp);
+#endif
   (void) __builtin_ia32_lfence();
+#ifndef OPENCL
   (void) _mm_lfence();
+#endif
   (void) __builtin_ia32_mfence();
+#ifndef OPENCL
   (void) _mm_mfence();
+#endif
   (void) __builtin_ia32_pause();
+#ifndef OPENCL
   (void) _mm_pause();
+#endif
   tmp_V4s = __builtin_ia32_psllwi(tmp_V4s, tmp_i);
   tmp_V2i = __builtin_ia32_pslldi(tmp_V2i, tmp_i);
   tmp_V1LLi = __builtin_ia32_psllqi(tmp_V1LLi, tmp_i);
@@ -389,12 +425,12 @@ void f0() {
   tmp_V2LLi = __builtin_ia32_psrlqi128(tmp_V2LLi, tmp_i);
   tmp_V8s = __builtin_ia32_psrawi128(tmp_V8s, tmp_i);
   tmp_V4i = __builtin_ia32_psradi128(tmp_V4i, tmp_i);
-  tmp_V8s = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
+  tmp_V4i = __builtin_ia32_pmaddwd128(tmp_V8s, tmp_V8s);
   (void) __builtin_ia32_monitor(tmp_vp, tmp_Ui, tmp_Ui);
   (void) __builtin_ia32_mwait(tmp_Ui, tmp_Ui);
   tmp_V16c = __builtin_ia32_lddqu(tmp_cCp);
-  tmp_V2LLi = __builtin_ia32_palignr128(tmp_V2LLi, tmp_V2LLi, imm_i);
-  tmp_V1LLi = __builtin_ia32_palignr(tmp_V1LLi, tmp_V1LLi, imm_i);
+  tmp_V16c = __builtin_ia32_palignr128(tmp_V16c, tmp_V16c, imm_i);
+  tmp_V8c = __builtin_ia32_palignr(tmp_V8c, tmp_V8c, imm_i);
 #ifdef USE_SSE4
   tmp_V16c = __builtin_ia32_pblendvb128(tmp_V16c, tmp_V16c, tmp_V16c);
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);

From c69ee63cb981ed5fcb66fb37864fcbc7a946207a Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 12:37:11 +0000
Subject: [PATCH 0910/1176] Include what you use in LanaiDisassembler.cpp

llvm-svn: 362392
---
 llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index e669538af6095..25ae7c521706a 100644
--- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -12,8 +12,9 @@
 
 #include "LanaiDisassembler.h"
 
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
 #include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"

From 9c78db6005370b779a41f6914cad46d4f5a992ea Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Mon, 3 Jun 2019 12:39:47 +0000
Subject: [PATCH 0911/1176] Re-land [LLD][COFF] Early load PDB type server
 files

We need to have all input files ready before doing debuginfo type merging.
This patch is moving the late PDB type server discovery much earlier in the process, when the explicit inputs (OBJs, LIBs) are loaded.
The short term goal is to parallelize type merging.

Differential Revision: https://reviews.llvm.org/D60095

llvm-svn: 362393
---
 lld/COFF/DebugTypes.cpp                       | 214 ++++++++++++++++--
 lld/COFF/DebugTypes.h                         |  29 ++-
 lld/COFF/Driver.cpp                           |   4 +
 lld/COFF/Driver.h                             |   4 +-
 lld/COFF/PDB.cpp                              | 122 +---------
 .../Inputs/pdb-type-server-invalid-path.yaml  | 121 ++++++++++
 .../pdb-type-server-valid-signature.yaml      |   2 +-
 .../pdb-type-server-invalid-signature.yaml    |  16 +-
 8 files changed, 368 insertions(+), 144 deletions(-)
 create mode 100644 lld/test/COFF/Inputs/pdb-type-server-invalid-path.yaml

diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 34f32ea085ca3..bcdb33fadee66 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -7,8 +7,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "DebugTypes.h"
+#include "Driver.h"
 #include "InputFiles.h"
+#include "lld/Common/ErrorHandler.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/Support/Path.h"
 
 using namespace lld;
 using namespace lld::coff;
@@ -16,14 +23,44 @@ using namespace llvm;
 using namespace llvm::codeview;
 
 namespace {
+// The TypeServerSource class represents a PDB type server, a file referenced by
+// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
+// files, therefore there must be only once instance per OBJ lot. The file path
+// is discovered from the dependent OBJ's debug type stream. The
+// TypeServerSource object is then queued and loaded by the COFF Driver. The
+// debug type stream for such PDB files will be merged first in the final PDB,
+// before any dependent OBJ.
 class TypeServerSource : public TpiSource {
 public:
-  TypeServerSource(ObjFile *F) : TpiSource(PDB, F) {}
+  explicit TypeServerSource(MemoryBufferRef M, llvm::pdb::NativeSession *S)
+      : TpiSource(PDB, nullptr), Session(S), MB(M) {}
+
+  // Queue a PDB type server for loading in the COFF Driver
+  static void enqueue(const ObjFile *DependentFile,
+                      const TypeServer2Record &TS);
+
+  // Create an instance
+  static Expected<TypeServerSource *> getInstance(MemoryBufferRef M);
+
+  // Fetch the PDB instance loaded for a corresponding dependent OBJ.
+  static Expected<TypeServerSource *>
+  findFromFile(const ObjFile *DependentFile);
+
+  static std::map<std::string, std::pair<std::string, TypeServerSource *>>
+      Instances;
+
+  // The interface to the PDB (if it was opened successfully)
+  std::unique_ptr<llvm::pdb::NativeSession> Session;
+
+private:
+  MemoryBufferRef MB;
 };
 
+// This class represents the debug type stream of an OBJ file that depends on a
+// PDB type server (see TypeServerSource).
 class UseTypeServerSource : public TpiSource {
 public:
-  UseTypeServerSource(ObjFile *F, TypeServer2Record *TS)
+  UseTypeServerSource(const ObjFile *F, const TypeServer2Record *TS)
       : TpiSource(UsingPDB, F), TypeServerDependency(*TS) {}
 
   // Information about the PDB type server dependency, that needs to be loaded
@@ -31,14 +68,20 @@ class UseTypeServerSource : public TpiSource {
   TypeServer2Record TypeServerDependency;
 };
 
+// This class represents the debug type stream of a Microsoft precompiled
+// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
+// PDB, before any other OBJs that depend on this. Note that only MSVC generate
+// such files, clang does not.
 class PrecompSource : public TpiSource {
 public:
-  PrecompSource(ObjFile *F) : TpiSource(PCH, F) {}
+  PrecompSource(const ObjFile *F) : TpiSource(PCH, F) {}
 };
 
+// This class represents the debug type stream of an OBJ file that depends on a
+// Microsoft precompiled headers OBJ (see PrecompSource).
 class UsePrecompSource : public TpiSource {
 public:
-  UsePrecompSource(ObjFile *F, PrecompRecord *Precomp)
+  UsePrecompSource(const ObjFile *F, const PrecompRecord *Precomp)
       : TpiSource(UsingPCH, F), PrecompDependency(*Precomp) {}
 
   // Information about the Precomp OBJ dependency, that needs to be loaded in
@@ -49,40 +92,177 @@ class UsePrecompSource : public TpiSource {
 
 static std::vector<std::unique_ptr<TpiSource>> GC;
 
-TpiSource::TpiSource(TpiKind K, ObjFile *F) : Kind(K), File(F) {
+TpiSource::TpiSource(TpiKind K, const ObjFile *F) : Kind(K), File(F) {
   GC.push_back(std::unique_ptr<TpiSource>(this));
 }
 
-TpiSource *coff::makeTpiSource(ObjFile *F) {
+TpiSource *lld::coff::makeTpiSource(const ObjFile *F) {
   return new TpiSource(TpiSource::Regular, F);
 }
 
-TpiSource *coff::makeTypeServerSource(ObjFile *F) {
-  return new TypeServerSource(F);
-}
-
-TpiSource *coff::makeUseTypeServerSource(ObjFile *F, TypeServer2Record *TS) {
+TpiSource *lld::coff::makeUseTypeServerSource(const ObjFile *F,
+                                              const TypeServer2Record *TS) {
+  TypeServerSource::enqueue(F, *TS);
   return new UseTypeServerSource(F, TS);
 }
 
-TpiSource *coff::makePrecompSource(ObjFile *F) { return new PrecompSource(F); }
+TpiSource *lld::coff::makePrecompSource(const ObjFile *F) {
+  return new PrecompSource(F);
+}
 
-TpiSource *coff::makeUsePrecompSource(ObjFile *F, PrecompRecord *Precomp) {
+TpiSource *lld::coff::makeUsePrecompSource(const ObjFile *F,
+                                           const PrecompRecord *Precomp) {
   return new UsePrecompSource(F, Precomp);
 }
 
 namespace lld {
 namespace coff {
 template <>
-const PrecompRecord &retrieveDependencyInfo(TpiSource *Source) {
+const PrecompRecord &retrieveDependencyInfo(const TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPCH);
-  return ((UsePrecompSource *)Source)->PrecompDependency;
+  return ((const UsePrecompSource *)Source)->PrecompDependency;
 }
 
 template <>
-const TypeServer2Record &retrieveDependencyInfo(TpiSource *Source) {
+const TypeServer2Record &retrieveDependencyInfo(const TpiSource *Source) {
   assert(Source->Kind == TpiSource::UsingPDB);
-  return ((UseTypeServerSource *)Source)->TypeServerDependency;
+  return ((const UseTypeServerSource *)Source)->TypeServerDependency;
 }
 } // namespace coff
 } // namespace lld
+
+std::map<std::string, std::pair<std::string, TypeServerSource *>>
+    TypeServerSource::Instances;
+
+// Make a PDB path assuming the PDB is in the same folder as the OBJ
+static std::string getPdbBaseName(const ObjFile *File, StringRef TSPath) {
+  StringRef LocalPath =
+      !File->ParentName.empty() ? File->ParentName : File->getName();
+  SmallString<128> Path = sys::path::parent_path(LocalPath);
+
+  // Currently, type server PDBs are only created by MSVC cl, which only runs
+  // on Windows, so we can assume type server paths are Windows style.
+  sys::path::append(Path, sys::path::filename(TSPath, sys::path::Style::windows));
+  return Path.str();
+}
+
+// The casing of the PDB path stamped in the OBJ can differ from the actual path
+// on disk. With this, we ensure to always use lowercase as a key for the
+// PDBInputFile::Instances map, at least on Windows.
+static std::string normalizePdbPath(StringRef path) {
+#if defined(_WIN32)
+  return path.lower();
+#else // LINUX
+  return path;
+#endif
+}
+
+// If existing, return the actual PDB path on disk.
+static Optional<std::string> findPdbPath(StringRef PDBPath,
+                                         const ObjFile *DependentFile) {
+  // Ensure the file exists before anything else. In some cases, if the path
+  // points to a removable device, Driver::enqueuePath() would fail with an
+  // error (EAGAIN, "resource unavailable try again") which we want to skip
+  // silently.
+  if (llvm::sys::fs::exists(PDBPath))
+    return normalizePdbPath(PDBPath);
+  std::string Ret = getPdbBaseName(DependentFile, PDBPath);
+  if (llvm::sys::fs::exists(Ret))
+    return normalizePdbPath(Ret);
+  return None;
+}
+
+// Fetch the PDB instance that was already loaded by the COFF Driver.
+Expected<TypeServerSource *>
+TypeServerSource::findFromFile(const ObjFile *DependentFile) {
+  const TypeServer2Record &TS =
+      retrieveDependencyInfo<TypeServer2Record>(DependentFile->DebugTypesObj);
+
+  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
+  if (!P)
+    return createFileError(TS.Name, errorCodeToError(std::error_code(
+                                        ENOENT, std::generic_category())));
+
+  auto It = TypeServerSource::Instances.find(*P);
+  // The PDB file exists on disk, at this point we expect it to have been
+  // inserted in the map by TypeServerSource::loadPDB()
+  assert(It != TypeServerSource::Instances.end());
+
+  std::pair<std::string, TypeServerSource *> &PDB = It->second;
+
+  if (!PDB.second)
+    return createFileError(
+        *P, createStringError(inconvertibleErrorCode(), PDB.first.c_str()));
+
+  pdb::PDBFile &PDBFile = (PDB.second)->Session->getPDBFile();
+  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
+
+  // Just because a file with a matching name was found doesn't mean it can be
+  // used. The GUID and Age must match between the PDB header and the OBJ
+  // TypeServer2 record. The 'Age' is used by MSVC incremental compilation.
+  if (Info.getGuid() != TS.getGuid() || Info.getAge() != TS.getAge())
+    return createFileError(
+        TS.Name,
+        make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
+
+  return PDB.second;
+}
+
+// FIXME: Temporary interface until PDBLinker::maybeMergeTypeServerPDB() is
+// moved here.
+Expected<llvm::pdb::NativeSession *>
+lld::coff::findTypeServerSource(const ObjFile *F) {
+  Expected<TypeServerSource *> TS = TypeServerSource::findFromFile(F);
+  if (!TS)
+    return TS.takeError();
+  return TS.get()->Session.get();
+}
+
+// Queue a PDB type server for loading in the COFF Driver
+void TypeServerSource::enqueue(const ObjFile *DependentFile,
+                               const TypeServer2Record &TS) {
+  // Start by finding where the PDB is located (either the record path or next
+  // to the OBJ file)
+  Optional<std::string> P = findPdbPath(TS.Name, DependentFile);
+  if (!P)
+    return;
+  auto It = TypeServerSource::Instances.emplace(
+      *P, std::pair<std::string, TypeServerSource *>{});
+  if (!It.second)
+    return; // another OBJ already scheduled this PDB for load
+
+  Driver->enqueuePath(*P, false);
+}
+
+// Create an instance of TypeServerSource or an error string if the PDB couldn't
+// be loaded. The error message will be displayed later, when the referring OBJ
+// will be merged in. NOTE - a PDB load failure is not a link error: some
+// debug info will simply be missing from the final PDB - that is the default
+// accepted behavior.
+void lld::coff::loadTypeServerSource(llvm::MemoryBufferRef M) {
+  std::string Path = normalizePdbPath(M.getBufferIdentifier());
+
+  Expected<TypeServerSource *> TS = TypeServerSource::getInstance(M);
+  if (!TS)
+    TypeServerSource::Instances[Path] = {toString(TS.takeError()), nullptr};
+  else
+    TypeServerSource::Instances[Path] = {{}, *TS};
+}
+
+Expected<TypeServerSource *> TypeServerSource::getInstance(MemoryBufferRef M) {
+  std::unique_ptr<llvm::pdb::IPDBSession> ISession;
+  Error Err = pdb::NativeSession::createFromPdb(
+      MemoryBuffer::getMemBuffer(M, false), ISession);
+  if (Err)
+    return std::move(Err);
+
+  std::unique_ptr<llvm::pdb::NativeSession> Session(
+      static_cast<pdb::NativeSession *>(ISession.release()));
+
+  pdb::PDBFile &PDBFile = Session->getPDBFile();
+  Expected<pdb::InfoStream &> Info = PDBFile.getPDBInfoStream();
+  // All PDB Files should have an Info stream.
+  if (!Info)
+    return Info.takeError();
+  return new TypeServerSource(M, Session.release());
+}
diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h
index 0505a354257af..cb03aba5b0d2d 100644
--- a/lld/COFF/DebugTypes.h
+++ b/lld/COFF/DebugTypes.h
@@ -10,12 +10,16 @@
 #define LLD_COFF_DEBUGTYPES_H
 
 #include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace codeview {
 class PrecompRecord;
 class TypeServer2Record;
 } // namespace codeview
+namespace pdb {
+class NativeSession;
+}
 } // namespace llvm
 
 namespace lld {
@@ -27,23 +31,28 @@ class TpiSource {
 public:
   enum TpiKind { Regular, PCH, UsingPCH, PDB, UsingPDB };
 
-  TpiSource(TpiKind K, ObjFile *F);
+  TpiSource(TpiKind K, const ObjFile *F);
   virtual ~TpiSource() {}
 
   const TpiKind Kind;
-  ObjFile *File;
+  const ObjFile *File;
 };
 
-TpiSource *makeTpiSource(ObjFile *F);
-TpiSource *makeTypeServerSource(ObjFile *F);
-TpiSource *makeUseTypeServerSource(ObjFile *F,
-                                   llvm::codeview::TypeServer2Record *TS);
-TpiSource *makePrecompSource(ObjFile *F);
-TpiSource *makeUsePrecompSource(ObjFile *F,
-                                llvm::codeview::PrecompRecord *Precomp);
+TpiSource *makeTpiSource(const ObjFile *F);
+TpiSource *makeUseTypeServerSource(const ObjFile *F,
+                                   const llvm::codeview::TypeServer2Record *TS);
+TpiSource *makePrecompSource(const ObjFile *F);
+TpiSource *makeUsePrecompSource(const ObjFile *F,
+                                const llvm::codeview::PrecompRecord *Precomp);
+
+void loadTypeServerSource(llvm::MemoryBufferRef M);
 
 // Temporary interface to get the dependency
-template <typename T> const T &retrieveDependencyInfo(TpiSource *Source);
+template <typename T> const T &retrieveDependencyInfo(const TpiSource *Source);
+
+// Temporary interface until we move PDBLinker::maybeMergeTypeServerPDB here
+llvm::Expected<llvm::pdb::NativeSession *>
+findTypeServerSource(const ObjFile *F);
 
 } // namespace coff
 } // namespace lld
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index df374f518d94d..7f7fde12980d6 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "DebugTypes.h"
 #include "Driver.h"
 #include "Config.h"
 #include "ICF.h"
@@ -181,6 +182,9 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> MB,
   case file_magic::coff_import_library:
     Symtab->addFile(make<ObjFile>(MBRef));
     break;
+  case file_magic::pdb:
+    loadTypeServerSource(MBRef);
+    break;
   case file_magic::coff_cl_gl_object:
     error(Filename + ": is not a native COFF file. Recompile without /GL");
     break;
diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h
index f9448bd2d3b16..f0c2ee6a7728d 100644
--- a/lld/COFF/Driver.h
+++ b/lld/COFF/Driver.h
@@ -77,6 +77,8 @@ class LinkerDriver {
 
   MemoryBufferRef takeBuffer(std::unique_ptr<MemoryBuffer> MB);
 
+  void enqueuePath(StringRef Path, bool WholeArchive);
+
 private:
   std::unique_ptr<llvm::TarWriter> Tar; // for /linkrepro
 
@@ -120,8 +122,6 @@ class LinkerDriver {
   void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName,
                         StringRef ParentName, uint64_t OffsetInArchive);
 
-  void enqueuePath(StringRef Path, bool WholeArchive);
-
   void enqueueTask(std::function<void()> Task);
   bool run();
 
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index 242235154d05f..a99a02362776f 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -175,11 +175,6 @@ class PDBLinker {
 
   llvm::SmallString<128> NativePath;
 
-  /// A list of other PDBs which are loaded during the linking process and which
-  /// we need to keep around since the linking operation may reference pointers
-  /// inside of these PDBs.
-  llvm::SmallVector<std::unique_ptr<pdb::NativeSession>, 2> LoadedPDBs;
-
   std::vector<pdb::SecMapEntry> SectionMap;
 
   /// Type index mappings of type server PDBs that we've loaded so far.
@@ -189,10 +184,6 @@ class PDBLinker {
   /// far.
   std::map<uint32_t, CVIndexMap> PrecompTypeIndexMappings;
 
-  /// List of TypeServer PDBs which cannot be loaded.
-  /// Cached to prevent repeated load attempts.
-  std::map<codeview::GUID, std::string> MissingTypeServerPDBs;
-
   // For statistics
   uint64_t GlobalSymbols = 0;
   uint64_t ModuleSymbols = 0;
@@ -338,7 +329,7 @@ PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) {
   ScopedTimer T(TypeMergingTimer);
 
   if (!File->DebugTypesObj)
-      return *ObjectIndexMap; // no Types stream
+    return *ObjectIndexMap; // no Types stream
 
   // Precompiled headers objects need to save the index map for further
   // reference by other objects which use the precompiled headers.
@@ -416,115 +407,26 @@ PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) {
   return *ObjectIndexMap;
 }
 
-static Expected<std::unique_ptr<pdb::NativeSession>>
-tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) {
-  // Ensure the file exists before anything else. We want to return ENOENT,
-  // "file not found", even if the path points to a removable device (in which
-  // case the return message would be EAGAIN, "resource unavailable try again")
-  if (!llvm::sys::fs::exists(TSPath))
-    return errorCodeToError(std::error_code(ENOENT, std::generic_category()));
-
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(
-      TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
-  if (!MBOrErr)
-    return errorCodeToError(MBOrErr.getError());
-
-  std::unique_ptr<pdb::IPDBSession> ThisSession;
-  if (auto EC = pdb::NativeSession::createFromPdb(
-          MemoryBuffer::getMemBuffer(Driver->takeBuffer(std::move(*MBOrErr)),
-                                     /*RequiresNullTerminator=*/false),
-          ThisSession))
-    return std::move(EC);
-
-  std::unique_ptr<pdb::NativeSession> NS(
-      static_cast<pdb::NativeSession *>(ThisSession.release()));
-  pdb::PDBFile &File = NS->getPDBFile();
-  auto ExpectedInfo = File.getPDBInfoStream();
-  // All PDB Files should have an Info stream.
-  if (!ExpectedInfo)
-    return ExpectedInfo.takeError();
-
-  // Just because a file with a matching name was found and it was an actual
-  // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
-  // must match the GUID specified in the TypeServer2 record.
-  if (ExpectedInfo->getGuid() != GuidFromObj)
-    return make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date);
-
-  return std::move(NS);
-}
-
 Expected<const CVIndexMap &> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File) {
-  const TypeServer2Record &TS =
-      retrieveDependencyInfo<TypeServer2Record>(File->DebugTypesObj);
+  Expected<llvm::pdb::NativeSession *> PDBSession = findTypeServerSource(File);
+  if (!PDBSession)
+    return PDBSession.takeError();
 
-  const codeview::GUID &TSId = TS.getGuid();
-  StringRef TSPath = TS.getName();
+  pdb::PDBFile &PDBFile = PDBSession.get()->getPDBFile();
+  pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
 
-  // First, check if the PDB has previously failed to load.
-  auto PrevErr = MissingTypeServerPDBs.find(TSId);
-  if (PrevErr != MissingTypeServerPDBs.end())
-    return createFileError(
-        TSPath,
-        make_error<StringError>(PrevErr->second, inconvertibleErrorCode()));
-
-  // Second, check if we already loaded a PDB with this GUID. Return the type
-  // index mapping if we have it.
-  auto Insertion = TypeServerIndexMappings.insert({TSId, CVIndexMap()});
-  CVIndexMap &IndexMap = Insertion.first->second;
-  if (!Insertion.second)
-    return IndexMap;
+  auto It = TypeServerIndexMappings.emplace(Info.getGuid(), CVIndexMap());
+  CVIndexMap &IndexMap = It.first->second;
+  if (!It.second)
+    return IndexMap; // already merged
 
   // Mark this map as a type server map.
   IndexMap.IsTypeServerMap = true;
 
-  // Check for a PDB at:
-  // 1. The given file path
-  // 2. Next to the object file or archive file
-  auto ExpectedSession = handleExpected(
-      tryToLoadPDB(TSId, TSPath),
-      [&]() {
-        StringRef LocalPath =
-            !File->ParentName.empty() ? File->ParentName : File->getName();
-        SmallString<128> Path = sys::path::parent_path(LocalPath);
-        // Currently, type server PDBs are only created by cl, which only runs
-        // on Windows, so we can assume type server paths are Windows style.
-        sys::path::append(
-            Path, sys::path::filename(TSPath, sys::path::Style::windows));
-        return tryToLoadPDB(TSId, Path);
-      },
-      [&](std::unique_ptr<ECError> EC) -> Error {
-        auto SysErr = EC->convertToErrorCode();
-        // Only re-try loading if the previous error was "No such file or
-        // directory"
-        if (SysErr.category() == std::generic_category() &&
-            SysErr.value() == ENOENT)
-          return Error::success();
-        return Error(std::move(EC));
-      });
-
-  if (auto E = ExpectedSession.takeError()) {
-    TypeServerIndexMappings.erase(TSId);
-
-    // Flatten the error to a string, for later display, if the error occurs
-    // again on the same PDB.
-    std::string ErrMsg;
-    raw_string_ostream S(ErrMsg);
-    S << E;
-    MissingTypeServerPDBs.emplace(TSId, S.str());
-
-    return createFileError(TSPath, std::move(E));
-  }
-
-  pdb::NativeSession *Session = ExpectedSession->get();
-
-  // Keep a strong reference to this PDB, so that it's safe to hold pointers
-  // into the file.
-  LoadedPDBs.push_back(std::move(*ExpectedSession));
-
-  auto ExpectedTpi = Session->getPDBFile().getPDBTpiStream();
+  Expected<pdb::TpiStream &> ExpectedTpi = PDBFile.getPDBTpiStream();
   if (auto E = ExpectedTpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
-  auto ExpectedIpi = Session->getPDBFile().getPDBIpiStream();
+  Expected<pdb::TpiStream &> ExpectedIpi = PDBFile.getPDBIpiStream();
   if (auto E = ExpectedIpi.takeError())
     fatal("Type server does not have TPI stream: " + toString(std::move(E)));
 
diff --git a/lld/test/COFF/Inputs/pdb-type-server-invalid-path.yaml b/lld/test/COFF/Inputs/pdb-type-server-invalid-path.yaml
new file mode 100644
index 0000000000000..9e485ffcecee8
--- /dev/null
+++ b/lld/test/COFF/Inputs/pdb-type-server-invalid-path.yaml
@@ -0,0 +1,121 @@
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:            '.debug$S'
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    Subsections:
+      - !Symbols
+        Records:
+          - Kind:            S_GPROC32_ID
+            ProcSym:
+              CodeSize:        3
+              DbgStart:        0
+              DbgEnd:          2
+              FunctionType:    4199
+              Flags:           [  ]
+              DisplayName:     main
+          - Kind:            S_FRAMEPROC
+            FrameProcSym:
+              TotalFrameBytes: 0
+              PaddingFrameBytes: 0
+              OffsetToPadding: 0
+              BytesOfCalleeSavedRegisters: 0
+              OffsetOfExceptionHandler: 0
+              SectionIdOfExceptionHandler: 0
+              Flags:           [ AsynchronousExceptionHandling, OptimizedForSpeed ]
+          - Kind:            S_PROC_ID_END
+            ScopeEndSym:
+      - !Lines
+        CodeSize:        3
+        Flags:           [  ]
+        RelocOffset:     0
+        RelocSegment:    0
+        Blocks:
+          - FileName:        'c:\src\llvm-project\build\t.c'
+            Lines:
+              - Offset:          0
+                LineStart:       1
+                IsStatement:     true
+                EndDelta:        0
+            Columns:
+      - !FileChecksums
+        Checksums:
+          - FileName:        'c:\src\llvm-project\build\t.c'
+            Kind:            MD5
+            Checksum:        270A878DCC1B845655B162F56C4F5020
+      - !StringTable
+        Strings:
+          - 'c:\src\llvm-project\build\t.c'
+    Relocations:
+      - VirtualAddress:  44
+        SymbolName:      main
+        Type:            IMAGE_REL_AMD64_SECREL
+      - VirtualAddress:  48
+        SymbolName:      main
+        Type:            IMAGE_REL_AMD64_SECTION
+      - VirtualAddress:  100
+        SymbolName:      main
+        Type:            IMAGE_REL_AMD64_SECREL
+      - VirtualAddress:  104
+        SymbolName:      main
+        Type:            IMAGE_REL_AMD64_SECTION
+  - Name:            '.debug$T'
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    Types:
+      - Kind:            LF_TYPESERVER2
+        TypeServer2:
+          Guid:            '{8DABD2A0-28FF-CB43-9BAF-175B77B76414}'
+          Age:             1
+          Name:            'c:\some_invalid_path_AABB98765\pdb-diff-cl.pdb'
+  - Name:            '.text$mn'
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       16
+    SectionData:     33C0C3
+symbols:
+  - Name:            '.debug$S'
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          328
+      NumberOfRelocations: 4
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.debug$T'
+    Value:           0
+    SectionNumber:   2
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          564
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.text$mn'
+    Value:           0
+    SectionNumber:   3
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          3
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        4021952397
+      Number:          0
+  - Name:            main
+    Value:           0
+    SectionNumber:   3
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+...
diff --git a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
index dd95a3df8893c..23656d1807cff 100644
--- a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
+++ b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
@@ -69,7 +69,7 @@ sections:
       - Kind:            LF_TYPESERVER2
         TypeServer2:
           Guid:            '{8DABD2A0-28FF-CB43-9BAF-175B77B76414}'
-          Age:             18
+          Age:             1
           Name:            'pdb-diff-cl.pdb'
   - Name:            '.text$mn'
     Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
diff --git a/lld/test/COFF/pdb-type-server-invalid-signature.yaml b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
index 247e00096235c..612bd74eeaa84 100644
--- a/lld/test/COFF/pdb-type-server-invalid-signature.yaml
+++ b/lld/test/COFF/pdb-type-server-invalid-signature.yaml
@@ -1,22 +1,30 @@
 
+# Test linking an OBJ with a reference to an out-of-date PDB type server
 # RUN: cd %S/Inputs
 # RUN: yaml2obj %s -o %t.obj
 # RUN: lld-link %t.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s
 # RUN: cd %S
 
-# CHECK: warning: Cannot use debug info for {{.*}}.obj
+# CHECK: warning: Cannot use debug info for '{{.*}}.obj'
 # CHECK-NEXT: The signature does not match; the file(s) might be out of date
 
-# Also test a valid match
-
+# Also test linking an OBJ with a reference to *valid* PDB type server
 # RUN: cd %S/Inputs
 # RUN: yaml2obj %S/Inputs/pdb-type-server-valid-signature.yaml -o %t2.obj
 # RUN: lld-link %t2.obj -out:%t2.exe -debug -pdb:%t2.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=VALID-SIGNATURE -allow-empty
 # RUN: cd %S
 
-# VALID-SIGNATURE-NOT: warning: Cannot use debug info for {{.*}}.obj
+# VALID-SIGNATURE-NOT: warning: Cannot use debug info for '{{.*}}.obj'
 # VALID-SIGNATURE-NOT: The signature does not match; the file(s) might be out of date
 
+# Test an invalid path reference to a PDB type server; as a fallback LLD should try to load the PDB in the same path as the OBJ
+# RUN: yaml2obj %S/Inputs/pdb-type-server-invalid-path.yaml -o %t3.obj
+# RUN: cp %S/Inputs/pdb-diff-cl.pdb %T
+# RUN: lld-link %t3.obj -out:%t3.exe -debug -pdb:%t3.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=INVALID-PATH -allow-empty
+
+# INVALID-PATH-NOT: warning: Cannot use debug info for '{{.*}}3.obj' [LNK4099]
+# INVALID-PATH-NOT: failed to load reference 'c:\some_invalid_path_AABB98765\pdb-diff-cl.pdb': {{[Nn]}}o such file or directory
+
 --- !COFF
 header:
   Machine:         IMAGE_FILE_MACHINE_AMD64

From 2f66316c9688830056df8b10b1ed0651154182c7 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 12:42:48 +0000
Subject: [PATCH 0912/1176] Include what you use in LanaiMCCodeEmitter.cpp

LanaiMCCodeEmitter.cpp was not using any APIs from Lanai.h, and was only
including it for transitive dependencies.  Doing so is problematic from
include-what-you-use perspective, but it is also a layering issue (it
creates a dependency cycle between the primary Lanai target library and
the MCTargetDesc library).

llvm-svn: 362394
---
 llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index adfe528f9b528..df4ee297155fc 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "MCTargetDesc/LanaiBaseInfo.h"
 #include "MCTargetDesc/LanaiFixupKinds.h"
 #include "MCTargetDesc/LanaiMCExpr.h"

From 7a3e4ab286cad51b706166ba5a1137b3811ff5c2 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 12:53:05 +0000
Subject: [PATCH 0913/1176] Include what you use in LanaiInstPrinter.cpp

llvm-svn: 362395
---
 llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index 491a88c34fd5e..0d42612824b48 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -11,11 +11,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "LanaiInstPrinter.h"
-#include "Lanai.h"
 #include "LanaiMCExpr.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiMCTargetDesc.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"

From 74467814f27e3da681a4faaf3edbaeb493a036b6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 3 Jun 2019 12:58:36 +0000
Subject: [PATCH 0914/1176] [SystemZ] Remove sitofp(undef) from reduced test
 case.

Pre-commit for D62807 - which adds DAG [us]itofp(undef) --> 0 constant fold

llvm-svn: 362396
---
 llvm/test/CodeGen/SystemZ/subregliveness-02.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/subregliveness-02.ll b/llvm/test/CodeGen/SystemZ/subregliveness-02.ll
index 8f972e18a6647..e0e1c197e69d7 100644
--- a/llvm/test/CodeGen/SystemZ/subregliveness-02.ll
+++ b/llvm/test/CodeGen/SystemZ/subregliveness-02.ll
@@ -7,9 +7,9 @@ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
 target triple = "s390x-ibm-linux"
 
 ; Function Attrs: nounwind
-define void @spec_random_load() #0 {
+define void @spec_random_load(i64 %a0) #0 {
 bb:
-  %tmp = sitofp i64 undef to float
+  %tmp = sitofp i64 %a0 to float
   %tmp1 = fmul float %tmp, 0x3E00000000000000
   %tmp2 = fpext float %tmp1 to double
   %tmp3 = fmul double %tmp2, 2.560000e+02

From cb7e4e8193f3504073932f9e9337fec6ab7675df Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 3 Jun 2019 13:02:07 +0000
Subject: [PATCH 0915/1176] [SelectionDAG] Add [us]itofp(undef) --> 0 constant
 fold (PR39205)

We were missing this fold in the DAG, which I've copied directly from llvm::ConstantFoldCastInstruction

Differential Revision: https://reviews.llvm.org/D62807

llvm-svn: 362397
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   8 ++
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   6 +
 llvm/test/CodeGen/X86/avx512-cvt-widen.ll     |  28 ++--
 llvm/test/CodeGen/X86/avx512-cvt.ll           |  28 ++--
 llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll  | 130 +++++-------------
 llvm/test/CodeGen/X86/vec_int_to_fp.ll        | 130 +++++-------------
 6 files changed, 126 insertions(+), 204 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 27e56a3bbc71d..a4c1e9886ae6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12440,6 +12440,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
+  // [us]itofp(undef) = 0, because the result value is bounded.
+  if (N0.isUndef())
+    return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
   // fold (sint_to_fp c1) -> c1fp
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
@@ -12497,6 +12501,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT OpVT = N0.getValueType();
 
+  // [us]itofp(undef) = 0, because the result value is bounded.
+  if (N0.isUndef())
+    return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
   // fold (uint_to_fp c1) -> c1fp
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       // ...but only if the target supports immediate floating-point values
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d6d8cf54cb01d..1dc9d7460f859 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4440,6 +4440,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (Operand.isUndef())
       return getUNDEF(VT);
     break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    // [us]itofp(undef) = 0, because the result value is bounded.
+    if (Operand.isUndef())
+      return getConstantFP(0.0, DL, VT);
+    break;
   case ISD::SIGN_EXTEND:
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid SIGN_EXTEND!");
diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
index 90631baaf47eb..eddd0039507ed 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
@@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) {
 }
 
 define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ:       # %bb.0:
-; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT:    vmovq %xmm0, %rax
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT:    retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vmovq %xmm0, %rax
+; NOVLDQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT:    retq
 ;
 ; VLDQ-LABEL: sltof2f32:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
 ; VLDQ-NEXT:    retq
 ;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT:    retq
+;
 ; DQNOVL-LABEL: sltof2f32:
 ; DQNOVL:       # %bb.0:
 ; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index 2b5112650a969..c42be0d0f1c28 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) {
 }
 
 define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ:       # %bb.0:
-; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT:    vmovq %xmm0, %rax
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT:    retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ:       # %bb.0:
+; NOVLDQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT:    vmovq %xmm0, %rax
+; NOVLDQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT:    retq
 ;
 ; VLDQ-LABEL: sltof2f32:
 ; VLDQ:       # %bb.0:
 ; VLDQ-NEXT:    vcvtqq2ps %xmm0, %xmm0
 ; VLDQ-NEXT:    retq
 ;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ:       # %bb.0:
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT:    retq
+;
 ; DQNOVL-LABEL: sltof2f32:
 ; DQNOVL:       # %bb.0:
 ; DQNOVL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
index 78bc214358bc6..26027bcad19ea 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll
@@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_2i64_to_4f32:
@@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; SSE41-NEXT:    retq
 ;
 ; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    js .LBB39_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    jmp .LBB39_6
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    retq
 ; VEX-NEXT:  .LBB39_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
@@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB39_6:
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT:    js .LBB39_8
-; VEX-NEXT:  # %bb.7:
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:  .LBB39_8:
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
@@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
 ; SSE2-NEXT:    js .LBB41_1
 ; SSE2-NEXT:  # %bb.2:
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    jmp .LBB41_3
 ; SSE2-NEXT:  .LBB41_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    addss %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
+; SSE2-NEXT:    addss %xmm1, %xmm1
 ; SSE2-NEXT:  .LBB41_3:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movq %xmm1, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
 ; SSE2-NEXT:    js .LBB41_4
 ; SSE2-NEXT:  # %bb.5:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    jmp .LBB41_6
 ; SSE2-NEXT:  .LBB41_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    addss %xmm1, %xmm1
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
+; SSE2-NEXT:    addss %xmm0, %xmm0
 ; SSE2-NEXT:  .LBB41_6:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    js .LBB41_8
-; SSE2-NEXT:  # %bb.7:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:  .LBB41_8:
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB41_6
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT:    retq
 ; SSE41-NEXT:  .LBB41_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
@@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB41_6:
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    js .LBB41_8
-; SSE41-NEXT:  # %bb.7:
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:  .LBB41_8:
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; SSE41-NEXT:    retq
 ;
 ; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    js .LBB41_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    jmp .LBB41_6
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    retq
 ; VEX-NEXT:  .LBB41_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
@@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB41_6:
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT:    js .LBB41_8
-; VEX-NEXT:  # %bb.7:
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:  .LBB41_8:
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 9b543075f3b29..f7d8216ed3d28 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_2i64_to_4f32:
@@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_2i64_to_4f32:
@@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
@@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:    movq %xmm0, %rax
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; SSE41-NEXT:    retq
 ;
 ; VEX-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; VEX-NEXT:    vmovq %xmm0, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
@@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    js .LBB39_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    jmp .LBB39_6
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    retq
 ; VEX-NEXT:  .LBB39_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
@@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB39_6:
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT:    js .LBB39_8
-; VEX-NEXT:  # %bb.7:
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:  .LBB39_8:
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
@@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_2i64_to_4f32:
@@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
@@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
 ; SSE2-NEXT:    js .LBB41_1
 ; SSE2-NEXT:  # %bb.2:
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
 ; SSE2-NEXT:    jmp .LBB41_3
 ; SSE2-NEXT:  .LBB41_1:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE2-NEXT:    addss %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
+; SSE2-NEXT:    addss %xmm1, %xmm1
 ; SSE2-NEXT:  .LBB41_3:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movq %xmm1, %rax
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movq %xmm0, %rax
 ; SSE2-NEXT:    testq %rax, %rax
 ; SSE2-NEXT:    js .LBB41_4
 ; SSE2-NEXT:  # %bb.5:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE2-NEXT:    jmp .LBB41_6
 ; SSE2-NEXT:  .LBB41_4:
 ; SSE2-NEXT:    movq %rax, %rcx
 ; SSE2-NEXT:    shrq %rcx
 ; SSE2-NEXT:    andl $1, %eax
 ; SSE2-NEXT:    orq %rcx, %rax
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:    addss %xmm1, %xmm1
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
+; SSE2-NEXT:    addss %xmm0, %xmm0
 ; SSE2-NEXT:  .LBB41_6:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    js .LBB41_8
-; SSE2-NEXT:  # %bb.7:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE2-NEXT:  .LBB41_8:
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:  # %bb.5:
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
-; SSE41-NEXT:    jmp .LBB41_6
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT:    retq
 ; SSE41-NEXT:  .LBB41_4:
 ; SSE41-NEXT:    movq %rax, %rcx
 ; SSE41-NEXT:    shrq %rcx
@@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; SSE41-NEXT:    xorps %xmm0, %xmm0
 ; SSE41-NEXT:    cvtsi2ss %rax, %xmm0
 ; SSE41-NEXT:    addss %xmm0, %xmm0
-; SSE41-NEXT:  .LBB41_6:
-; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT:    testq %rax, %rax
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    js .LBB41_8
-; SSE41-NEXT:  # %bb.7:
-; SSE41-NEXT:    xorps %xmm1, %xmm1
-; SSE41-NEXT:    cvtsi2ss %rax, %xmm1
-; SSE41-NEXT:  .LBB41_8:
-; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; SSE41-NEXT:    retq
 ;
 ; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    js .LBB41_4
 ; VEX-NEXT:  # %bb.5:
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT:    jmp .LBB41_6
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT:    retq
 ; VEX-NEXT:  .LBB41_4:
 ; VEX-NEXT:    movq %rax, %rcx
 ; VEX-NEXT:    shrq %rcx
@@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; VEX-NEXT:    orq %rcx, %rax
 ; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
 ; VEX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT:  .LBB41_6:
-; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT:    testq %rax, %rax
-; VEX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT:    js .LBB41_8
-; VEX-NEXT:  # %bb.7:
-; VEX-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT:  .LBB41_8:
-; VEX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; VEX-NEXT:    retq
 ;
 ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
 ; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm1, %xmm1
 ; AVX512VL-NEXT:    vmovq %xmm0, %rax
 ; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT:    vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:

From 9ed325e463d5fd4a87284175cdb46af3d8cbb02a Mon Sep 17 00:00:00 2001
From: Andrew Savonichev <andrew.savonichev@intel.com>
Date: Mon, 3 Jun 2019 13:02:43 +0000
Subject: [PATCH 0916/1176] [OpenCL] Undefine cl_intel_planar_yuv extension

Summary:

Remove unnecessary definition (otherwise the extension will be defined
where it's not supposed to be defined).

Consider the code:

  #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
  // some declarations
  #pragma OPENCL EXTENSION cl_intel_planar_yuv : end

is enough for extension to become known for clang.

Patch by: Dmitry Sidorov <dmitry.sidorov@intel.com>

Reviewers: Anastasia, yaxunl

Reviewed By: Anastasia

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58666

llvm-svn: 362398
---
 clang/lib/Headers/opencl-c.h             | 3 ---
 clang/test/Headers/opencl-c-header.cl    | 3 ---
 clang/test/SemaOpenCL/extension-begin.cl | 7 +++++++
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index ba98fa6eb6c61..009b1e934eb6b 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -22,9 +22,6 @@
 #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
 
 #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
-#ifndef cl_intel_planar_yuv
-#define cl_intel_planar_yuv
-#endif // cl_intel_planar_yuv
 #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
 #pragma OPENCL EXTENSION cl_intel_planar_yuv : end
 #endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl
index 14c2e78444867..9faf7bfdd8d6f 100644
--- a/clang/test/Headers/opencl-c-header.cl
+++ b/clang/test/Headers/opencl-c-header.cl
@@ -77,9 +77,6 @@ void test_image3dwo(write_only image3d_t img) {
 // OpenCL 1.2 onwards.
 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
 // expected-no-diagnostics
-#ifndef cl_intel_planar_yuv
-#error "Missing cl_intel_planar_yuv define"
-#endif
 #else //__OPENCL_C_VERSION__
 // expected-warning@+2{{unknown OpenCL extension 'cl_intel_planar_yuv' - ignoring}}
 #endif //__OPENCL_C_VERSION__
diff --git a/clang/test/SemaOpenCL/extension-begin.cl b/clang/test/SemaOpenCL/extension-begin.cl
index 276e6d7f10b01..367652ccdd227 100644
--- a/clang/test/SemaOpenCL/extension-begin.cl
+++ b/clang/test/SemaOpenCL/extension-begin.cl
@@ -16,6 +16,13 @@
 //
 // RUN: %clang_cc1 -cl-std=CL2.0 -DIMPLICIT_INCLUDE -include %S/extension-begin.h -triple spir-unknown-unknown -O0 -emit-llvm -o - -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.modules %s -verify -pedantic
 
+#pragma OPENCL EXTENSION my_ext : enable
+#ifndef IMPLICIT_INCLUDE
+// expected-warning@-2 {{unknown OpenCL extension 'my_ext' - ignoring}}
+// expected-warning@+2 {{unknown OpenCL extension 'my_ext' - ignoring}}
+#endif // IMPLICIT_INCLUDE
+#pragma OPENCL EXTENSION my_ext : disable
+
 #ifndef IMPLICIT_INCLUDE
 #include "extension-begin.h"
 #endif // IMPLICIT_INCLUDE

From 082d99f58cbea021727a69cb7d7c28ec92331957 Mon Sep 17 00:00:00 2001
From: David Zarzycki <dave@znu.io>
Date: Mon, 3 Jun 2019 13:39:49 +0000
Subject: [PATCH 0917/1176] Unbreak non-PIC builds after r362390 / D62720

llvm-svn: 362399
---
 llvm/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 0bb3edfa8959a..de499a75827ba 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -629,7 +629,7 @@ if(WIN32 OR CYGWIN)
     set(LLVM_ENABLE_PLUGINS_default OFF)
   endif()
 else()
-  set(LLVM_ENABLE_PLUGINS_default ON)
+  set(LLVM_ENABLE_PLUGINS_default LLVM_ENABLE_PIC)
 endif()
 option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
 

From 9111f35f0233af8fb91467cf65100b7cda50f892 Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Mon, 3 Jun 2019 13:51:24 +0000
Subject: [PATCH 0918/1176] [AMDGPU][MC] Added support of SCC, VCCZ and EXECZ
 operands

See bug 39292: https://bugs.llvm.org/show_bug.cgi?id=39292

Reviewers: rampitec, arsenm

Differential Revision: https://reviews.llvm.org/D62660

llvm-svn: 362400
---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   9 ++
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  20 ++-
 .../Disassembler/AMDGPUDisassembler.cpp       |  11 +-
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |  12 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  22 +--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   5 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |  14 +-
 llvm/test/MC/AMDGPU/literals.s                | 148 +++++++++++++++++-
 .../MC/Disassembler/AMDGPU/literal_gfx9.txt   |  81 ++++++++++
 9 files changed, 296 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0e8a517d1d644..9dcbb599589c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -678,6 +678,15 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
         case AMDGPU::TMA_HI:
           llvm_unreachable("trap handler registers should not be used");
 
+        case AMDGPU::SRC_VCCZ:
+          llvm_unreachable("src_vccz register should not be used");
+
+        case AMDGPU::SRC_EXECZ:
+          llvm_unreachable("src_execz register should not be used");
+
+        case AMDGPU::SRC_SCC:
+          llvm_unreachable("src_scc register should not be used");
+
         default:
           break;
         }
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bc7068ef7569b..37879520ec084 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1658,6 +1658,10 @@ static bool isInlineValue(unsigned Reg) {
   case AMDGPU::SRC_PRIVATE_LIMIT:
   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
     return true;
+  case AMDGPU::SRC_VCCZ:
+  case AMDGPU::SRC_EXECZ:
+  case AMDGPU::SRC_SCC:
+    return true;
   default:
     return false;
   }
@@ -1723,7 +1727,12 @@ static unsigned getSpecialRegForName(StringRef RegName) {
     .Case("lds_direct", AMDGPU::LDS_DIRECT)
     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
     .Case("m0", AMDGPU::M0)
-    .Case("scc", AMDGPU::SCC)
+    .Case("vccz", AMDGPU::SRC_VCCZ)
+    .Case("src_vccz", AMDGPU::SRC_VCCZ)
+    .Case("execz", AMDGPU::SRC_EXECZ)
+    .Case("src_execz", AMDGPU::SRC_EXECZ)
+    .Case("scc", AMDGPU::SRC_SCC)
+    .Case("src_scc", AMDGPU::SRC_SCC)
     .Case("tba", AMDGPU::TBA)
     .Case("tma", AMDGPU::TMA)
     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
@@ -3878,6 +3887,12 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   }
 
   switch (RegNo) {
+  case AMDGPU::SRC_SHARED_BASE:
+  case AMDGPU::SRC_SHARED_LIMIT:
+  case AMDGPU::SRC_PRIVATE_BASE:
+  case AMDGPU::SRC_PRIVATE_LIMIT:
+  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+    return !isCI() && !isSI() && !isVI();
   case AMDGPU::TBA:
   case AMDGPU::TBA_LO:
   case AMDGPU::TBA_HI:
@@ -3895,9 +3910,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
     break;
   }
 
-  if (isInlineValue(RegNo))
-    return !isCI() && !isSI() && !isVI();
-
   if (isCI())
     return true;
 
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index b77deeab96067..307f0cb7cde46 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -916,11 +916,9 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
   case 237: return createRegOperand(SRC_PRIVATE_BASE);
   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
-    // ToDo: no support for vccz register
-  case 251: break;
-    // ToDo: no support for execz register
-  case 252: break;
-  case 253: return createRegOperand(SCC);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
   case 254: return createRegOperand(LDS_DIRECT);
   default: break;
   }
@@ -942,6 +940,9 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
   case 237: return createRegOperand(SRC_PRIVATE_BASE);
   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+  case 251: return createRegOperand(SRC_VCCZ);
+  case 252: return createRegOperand(SRC_EXECZ);
+  case 253: return createRegOperand(SRC_SCC);
   default: break;
   }
   return errOperand(Val, "unknown operand encoding " + Twine(Val));
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f67904ad19d97..0e2706349b1de 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -281,8 +281,14 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
   case AMDGPU::VCC:
     O << "vcc";
     return;
-  case AMDGPU::SCC:
-    O << "scc";
+  case AMDGPU::SRC_VCCZ:
+    O << "src_vccz";
+    return;
+  case AMDGPU::SRC_EXECZ:
+    O << "src_execz";
+    return;
+  case AMDGPU::SRC_SCC:
+    O << "src_scc";
     return;
   case AMDGPU::EXEC:
     O << "exec";
@@ -358,6 +364,8 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
   case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
   case AMDGPU::PRIVATE_RSRC_REG:
     llvm_unreachable("pseudo-register should not ever be emitted");
+  case AMDGPU::SCC:
+    llvm_unreachable("pseudo scc should not ever be emitted");
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 14f5dbe6ad496..c25be611cb7c7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2823,19 +2823,19 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
 
-  // FLAT_SCR is just an SGPR pair.
-  if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
-    return true;
-
-  // EXEC register uses the constant bus.
-  if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
-    return true;
+  // Null is free
+  if (MO.getReg() == AMDGPU::SGPR_NULL)
+    return false;
 
   // SGPRs use the constant bus
-  return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
-          (!MO.isImplicit() &&
-           (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
-            AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
+  if (MO.isImplicit()) {
+    return MO.getReg() == AMDGPU::M0 ||
+           MO.getReg() == AMDGPU::VCC ||
+           MO.getReg() == AMDGPU::VCC_LO;
+  } else {
+    return AMDGPU::SReg_32RegClass.contains(MO.getReg()) ||
+           AMDGPU::SReg_64RegClass.contains(MO.getReg());
+  }
 }
 
 static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b503af4d210f5..341a88fa471bf 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -154,6 +154,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
   reserveRegisterTuples(Reserved, AMDGPU::M0);
 
+  // Reserve src_vccz, src_execz, src_scc.
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
+  reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
+
   // Reserve the memory aperture registers.
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 0a7962e789746..2605487dc5af3 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -69,7 +69,16 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
   let HWEncoding = 126;
 }
 
-def SCC : SIReg<"scc", 253>;
+// 32-bit real registers, for MC only.
+// May be used with both 32-bit and 64-bit operands.
+def SRC_VCCZ : SIReg<"src_vccz", 251>;
+def SRC_EXECZ : SIReg<"src_execz", 252>;
+def SRC_SCC : SIReg<"src_scc", 253>;
+
+// 1-bit pseudo register, for codegen only.
+// Should never be emitted.
+def SCC : SIReg<"">;
+
 def M0 : SIReg <"m0", 124>;
 def SGPR_NULL : SIReg<"null", 125>;
 
@@ -448,7 +457,8 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
 def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
   (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
    SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
-   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID)> {
+   SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+   SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
   let AllocationPriority = 8;
 }
 
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index c379b0d1583ee..dd9d2903a1ae2 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -1,6 +1,6 @@
 // RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SI --check-prefix=SICI
-// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=CIVI --check-prefix=CI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=CIVI --check-prefix=GFX89 --check-prefix=GFX9
 
@@ -519,7 +519,125 @@ v_trunc_f64 v[0:1], 0x100000001
 v_trunc_f64 v[0:1], 0x1fffffff000
 
 //---------------------------------------------------------------------------//
-// named inline values like shared_base
+// named inline values: scc, vccz, execz
+//---------------------------------------------------------------------------//
+
+// SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
+// GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
+buffer_atomic_add v0, off, s[0:3], scc offset:4095
+
+// SICI: s_add_i32 s0, src_vccz, s0      ; encoding: [0xfb,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_vccz, s0      ; encoding: [0xfb,0x00,0x00,0x81]
+s_add_i32 s0, vccz, s0
+
+// SICI: s_add_i32 s0, src_execz, s0      ; encoding: [0xfc,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_execz, s0      ; encoding: [0xfc,0x00,0x00,0x81]
+s_add_i32 s0, execz, s0
+
+// SICI: s_add_i32 s0, src_scc, s0       ; encoding: [0xfd,0x00,0x00,0x81]
+// GFX89: s_add_i32 s0, src_scc, s0       ; encoding: [0xfd,0x00,0x00,0x81]
+s_add_i32 s0, scc, s0
+
+// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86]
+s_and_b64 s[0:1], s[0:1], src_vccz
+
+// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86]
+s_and_b64 s[0:1], s[0:1], src_execz
+
+// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87]
+// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86]
+s_and_b64 s[0:1], s[0:1], src_scc
+
+// NOSICI: error: instruction not supported on this GPU
+// GFX89: v_add_u16_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x4c]
+v_add_u16 v0, vccz, v0
+
+// NOSICI: error: not a valid operand
+// NOVI: error: invalid operand for instruction
+// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
+v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICI: error: not a valid operand
+// NOVI: error: invalid operand for instruction
+// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
+v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+
+// NOSICIVI: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68]
+v_add_u32 v0, execz, v0
+
+// NOSICIVI: error: instruction not supported on this GPU
+// GFX9: v_add_u32_e64 v0, src_scc, v0   ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
+v_add_u32_e64 v0, scc, v0
+
+// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d]
+// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d]
+v_cmp_eq_i64 vcc, scc, v[0:1]
+
+// NOSICI: error: instruction not supported on this GPU
+// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a]
+v_max_f16 v0, execz, v0
+
+// SICI: v_max_f32_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x20]
+// GFX89: v_max_f32_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x16]
+v_max_f32 v0, vccz, v0
+
+// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00]
+// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
+v_max_f64 v[0:1], scc, v[0:1]
+
+// NOSICIVI: error: instruction not supported on this GPU
+// GFX9: v_pk_add_f16 v0, src_execz, v0  ; encoding: [0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18]
+v_pk_add_f16 v0, execz, v0
+
+// NOSICI: error: not a valid operand
+// GFX89: v_ceil_f16_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
+v_ceil_f16 v0, neg(vccz)
+
+// NOSICI: error: not a valid operand
+// GFX89: v_ceil_f16_e64 v0, |src_scc|    ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
+v_ceil_f16 v0, abs(scc)
+
+// NOSI: error: not a valid operand
+// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00]
+// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
+v_ceil_f64 v[5:6], |execz|
+
+// NOSI: error: not a valid operand
+// CI: v_ceil_f64_e64 v[5:6], -vcc     ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20]
+// GFX89: v_ceil_f64_e64 v[5:6], -vcc     ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
+v_ceil_f64 v[5:6], -vcc
+
+// SICI: v_ceil_f32_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20]
+// GFX89: v_ceil_f32_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
+v_ceil_f32 v0, -vccz
+
+// SICI: v_ceil_f32_e64 v0, |src_execz|  ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00]
+// GFX89: v_ceil_f32_e64 v0, |src_execz|  ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
+v_ceil_f32 v0, |execz|
+
+// NOSICI: error: not a valid operand
+// NOVI: error: invalid operand for instruction
+// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
+v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+
+// NOSICI: error: not a valid operand
+// NOVI: error: invalid operand for instruction
+// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
+v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE
+
+// NOSICIVI: error: invalid operand for instruction
+// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
+v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD
+
+// NOSICIVI: error: invalid operand for instruction
+// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
+v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD
+
+//---------------------------------------------------------------------------//
+// named inline values: shared_base, shared_limit, private_base, etc
 //---------------------------------------------------------------------------//
 
 // NOSICIVI: error: failed parsing operand.
@@ -659,6 +777,10 @@ v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_add_u32 v0, private_base, s0
 
+// NOSICIVI: error: instruction not supported on this GPU
+// NOGFX9: error: invalid operand (violates constant bus restrictions)
+v_add_u32 v0, scc, s0
+
 // v_div_fmas implicitly reads VCC
 // NOSICIVI: error: failed parsing operand.
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
@@ -674,6 +796,18 @@ v_div_fmas_f32 v0, v0, shared_limit, v1
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_div_fmas_f32 v0, v0, v1, private_limit
 
+// v_div_fmas implicitly reads VCC
+// NOGCN: error: invalid operand (violates constant bus restrictions)
+v_div_fmas_f32 v0, execz, v0, v1
+
+// v_div_fmas implicitly reads VCC
+// NOGCN: error: invalid operand (violates constant bus restrictions)
+v_div_fmas_f32 v0, v0, scc, v1
+
+// v_div_fmas implicitly reads VCC
+// NOGCN: error: invalid operand (violates constant bus restrictions)
+v_div_fmas_f32 v0, v0, v1, vccz
+
 // v_addc_co_u32 implicitly reads VCC (VOP2)
 // NOSICIVI: error: failed parsing operand.
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
@@ -683,6 +817,9 @@ v_addc_co_u32 v0, vcc, shared_base, v0, vcc
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_madak_f32 v0, shared_base, v0, 0x11213141
 
+// NOGCN: error: invalid operand (violates constant bus restrictions)
+v_madak_f32 v0, scc, v0, 0x11213141
+
 // NOSICIVI: error: failed parsing operand.
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, private_limit
@@ -691,6 +828,13 @@ v_cmp_eq_f32 s[0:1], private_base, private_limit
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_cmp_eq_f32 s[0:1], private_base, s0
 
+// NOGCN: error: invalid operand (violates constant bus restrictions)
+v_cmp_eq_f32 s[0:1], execz, s0
+
 // NOSICIVI: error: failed parsing operand.
 // NOGFX9: error: invalid operand (violates constant bus restrictions)
 v_pk_add_f16 v255, private_base, private_limit
+
+// NOSICIVI: error: instruction not supported on this GPU
+// NOGFX9: error: invalid operand (violates constant bus restrictions)
+v_pk_add_f16 v255, vccz, execz
diff --git a/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt
index 7b3d2a83bc1df..97c86c81e4d6b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt
@@ -77,3 +77,84 @@
 
 # GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00]
 0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00
+
+# GFX9: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
+0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd
+
+# GFX9: s_add_i32 s0, src_vccz, s0      ; encoding: [0xfb,0x00,0x00,0x81]
+0xfb,0x00,0x00,0x81
+
+# GFX9: s_add_i32 s0, src_execz, s0      ; encoding: [0xfc,0x00,0x00,0x81]
+0xfc,0x00,0x00,0x81
+
+# GFX9: s_add_i32 s0, src_scc, s0       ; encoding: [0xfd,0x00,0x00,0x81]
+0xfd,0x00,0x00,0x81
+
+# GFX9: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86]
+0x00,0xfb,0x80,0x86
+
+# GFX9: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86]
+0x00,0xfc,0x80,0x86
+
+# GFX9: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86]
+0x00,0xfd,0x80,0x86
+
+# GFX9: v_add_u16_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x4c]
+0xfb,0x00,0x00,0x4c
+
+# GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06]
+0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06
+
+# GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86]
+0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86
+
+# GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68]
+0xfc,0x00,0x00,0x68
+
+# GFX9: v_add_u32_e64 v0, src_scc, v0   ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00]
+0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00
+
+# GFX9: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d]
+0xfd,0x00,0xc4,0x7d
+
+# GFX9: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a]
+0xfc,0x00,0x00,0x5a
+
+# GFX9: v_max_f32_e32 v0, src_vccz, v0  ; encoding: [0xfb,0x00,0x00,0x16]
+0xfb,0x00,0x00,0x16
+
+# GFX9: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00]
+0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00
+
+# GFX9: v_pk_add_f16 v0, src_execz, v0  ; encoding: [0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18]
+0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18
+
+# GFX9: v_ceil_f16_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20]
+0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20
+
+# GFX9: v_ceil_f16_e64 v0, |src_scc|    ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
+0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00
+
+# GFX9: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00]
+0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00
+
+# GFX9: v_ceil_f64_e64 v[5:6], -vcc     ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20]
+0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20
+
+# GFX9: v_ceil_f32_e64 v0, -src_vccz    ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20]
+0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20
+
+# GFX9: v_ceil_f32_e64 v0, |src_execz|  ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00]
+0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00
+
+# GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00]
+0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00
+
+# GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00]
+0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00
+
+# GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00]
+0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00
+
+# GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00]
+0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00

From 3f786dab0eb136917dab32c6f414bc78650b2876 Mon Sep 17 00:00:00 2001
From: Andrey Churbanov <Andrey.Churbanov@intel.com>
Date: Mon, 3 Jun 2019 14:21:59 +0000
Subject: [PATCH 0919/1176] Fixed build warning with -DLIBOMP_USE_HWLOC=1

Made type of depth of hwloc object to correapond with
change from unsigned in hwloc 1,x to int in hwloc 2.x.
This eliminates the warning on signed-unsigned comparison.

Differential Revision: https://reviews.llvm.org/D62332

llvm-svn: 362401
---
 openmp/runtime/src/kmp.h            | 6 ++++++
 openmp/runtime/src/kmp_affinity.cpp | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 16ecaa5e99dc2..0133108b7e155 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -96,6 +96,12 @@ class kmp_stats_list;
 #ifndef HWLOC_OBJ_PACKAGE
 #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
 #endif
+#if HWLOC_API_VERSION >= 0x00020000
+// hwloc 2.0 changed type of depth of object from unsigned to int
+typedef int kmp_hwloc_depth_t;
+#else
+typedef unsigned int kmp_hwloc_depth_t;
+#endif
 #endif
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index b8d585c6e2572..d86de7e28fdd1 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -457,7 +457,8 @@ static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
 }
 
 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
-                                               hwloc_obj_t o, unsigned depth,
+                                               hwloc_obj_t o,
+                                               kmp_hwloc_depth_t depth,
                                                hwloc_obj_t *f) {
   if (o->depth == depth) {
     if (*f == NULL)

From 14c69fefe6009fc391a81d49db71b5973773beea Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 14:26:50 +0000
Subject: [PATCH 0920/1176] Include what you use in NVPTX.h

I also fixed all other files that were including NVPTX.h and were
relying on transitive includes.

llvm-svn: 362402
---
 llvm/lib/Target/NVPTX/NVPTX.h                      | 1 -
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp        | 1 +
 llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h          | 1 +
 llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp         | 1 +
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp           | 1 +
 llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 1 +
 llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h   | 2 +-
 7 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index a9c376aea40a5..e81a301b59b41 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTX_H
 
-#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4542e60c97ea9..3d2447d75c775 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
 
 #include "NVPTXISelDAGToDAG.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 15f230a246a93..e4e5069b7a807 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -17,6 +17,7 @@
 #include "NVPTXISelLowering.h"
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 5720ea2afdd17..76fb9f3fa692b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -26,6 +26,7 @@
 
 #include "NVPTX.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 8c7f6e63e1f43..c5e02e34e25e4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -91,6 +91,7 @@
 #include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index d08f8cc16c96c..e213089e40852 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,6 +16,7 @@
 #include "NVPTXMachineFunctionInfo.h"
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index b4a06034a9d6d..b179a28fa713b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
 
-#include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"

From 2b369f83c59b29489c24ce9606c21d7e0795ee9a Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 14:37:26 +0000
Subject: [PATCH 0921/1176] Include what you use in NVPTX.h

Other files were not relying on these transitive includes, so I'm
submitting this change separately.

llvm-svn: 362403
---
 llvm/lib/Target/NVPTX/NVPTX.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index e81a301b59b41..6530c40ea1007 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -14,13 +14,8 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTX_H
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <iosfwd>
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 class NVPTXTargetMachine;

From 9158d57d19c84cc117d7002b2dab466a60608df4 Mon Sep 17 00:00:00 2001
From: Michal Gorny <mgorny@gentoo.org>
Date: Mon, 3 Jun 2019 14:50:03 +0000
Subject: [PATCH 0922/1176] [llvm] [test] Remove non-portable EISDIR test from
 macho-disassemble-g-dsym.test

Remove the test checking error message for 'is a directory'.  It does
not seem to serve any real purpose, and it relies on matching platform
error strings which are unpredictable and makes the test fragile.
Furthermore, it fails on NetBSD where read() works on directories,
and therefore does not return EISDIR at all.

Fixes r362141.

Differential Revision: https://reviews.llvm.org/D62773

llvm-svn: 362404
---
 .../tools/llvm-objdump/X86/macho-disassemble-g-dsym.test     | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
index c9f72fa752b28..94ef98ab9f68f 100644
--- a/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
+++ b/llvm/test/tools/llvm-objdump/X86/macho-disassemble-g-dsym.test
@@ -15,8 +15,3 @@ MACHO_DSYM: (__TEXT,__text) section
 // RUN: llvm-objdump -m -d -g -dsym %p/../Inputs/libbogus11.a %p/../../dsymutil/Inputs/basic.macho.x86_64 2>&1 | FileCheck -check-prefix BAD_INPUT %s
 
 BAD_INPUT: is not a Mach-O or Universal file type.
-
-// RUN: not llvm-objdump -m -d -g -dsym %p/Inputs %p/Inputs/hello-macho-thin 2>&1 | FileCheck -check-prefix DIRECTORY %s
-
-// Windows will emit "Is a directory", whereas others emit "is a directory"
-DIRECTORY: {{[i|I]}}s a directory

From b3bd866c7f5fea6665e0eb3bd6319961e8a72631 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 15:04:05 +0000
Subject: [PATCH 0923/1176] Include what you use in PPCInstrInfo.h

llvm-svn: 362405
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 112b163c5c4d4..da34e70989db3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
 
-#include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 

From dab879d7c805562debea149e6d2c17839405c71b Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Mon, 3 Jun 2019 15:18:15 +0000
Subject: [PATCH 0924/1176] [lldb-server unittest] Add missing teardown logic

Summary:
This test base class is missing the teardown making the second set of tests extending it to fail in an assertion in the FileSystem::Initialize() (as it's being initialized twice).
Not sure why this isn't failing the build bots.. (unless they're running without asserts?).

With this fix `ninja LLDBServerTests && ./tools/lldb/unittests/tools/lldb-server/tests/LLDBServerTests` successfully runs and passes all tests.

Reviewers: clayborg, xiaobai, labath

Reviewed By: xiaobai, labath

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62788

llvm-svn: 362406
---
 lldb/unittests/tools/lldb-server/tests/TestBase.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lldb/unittests/tools/lldb-server/tests/TestBase.h b/lldb/unittests/tools/lldb-server/tests/TestBase.h
index 76ff96a681276..053ee8ff1ffec 100644
--- a/lldb/unittests/tools/lldb-server/tests/TestBase.h
+++ b/lldb/unittests/tools/lldb-server/tests/TestBase.h
@@ -25,6 +25,11 @@ class TestBase: public ::testing::Test {
     lldb_private::HostInfo::Initialize();
   }
 
+  static void TearDownTestCase() {
+    lldb_private::HostInfo::Terminate();
+    lldb_private::FileSystem::Terminate();
+  }
+
   static std::string getInferiorPath(llvm::StringRef Name) {
     llvm::SmallString<64> Path(LLDB_TEST_INFERIOR_PATH);
     llvm::sys::path::append(Path, Name + LLDB_TEST_INFERIOR_SUFFIX);

From fade9cbed76343c1e63657c4d425d3c47b0d73bf Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Mon, 3 Jun 2019 15:26:07 +0000
Subject: [PATCH 0925/1176] [llvm-ar] Fix relative thin archive path handling

This fixes some thin archive relative path issues, paths are shortened where possible and paths are output correctly when using the display table command.

Differential Revision: https://reviews.llvm.org/D59491

llvm-svn: 362407
---
 llvm/include/llvm/Object/ArchiveWriter.h      |  2 +-
 llvm/lib/Object/ArchiveWriter.cpp             | 51 ++++++++++++-------
 llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp   | 11 ++--
 llvm/test/tools/llvm-ar/reduce-thin-path.test | 10 ++++
 llvm/test/tools/llvm-ar/thin-archive.test     | 45 ++++++++++++++++
 .../ELF/archive-unknown-members.test          |  8 +--
 .../llvm-readobj/thin-archive-paths.test      |  6 +--
 llvm/tools/llvm-ar/llvm-ar.cpp                | 38 ++++++++++----
 8 files changed, 134 insertions(+), 37 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ar/reduce-thin-path.test
 create mode 100644 llvm/test/tools/llvm-ar/thin-archive.test

diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index cf415e92bc79b..9e6daf2da36e9 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -36,7 +36,7 @@ struct NewArchiveMember {
                                             bool Deterministic);
 };
 
-std::string computeArchiveRelativePath(StringRef From, StringRef To);
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 849d2835772e2..68c40054bb98d 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -494,29 +494,46 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 }
 
 namespace llvm {
+
+static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
+  SmallString<128> Ret = P;
+  std::error_code Err = sys::fs::make_absolute(Ret);
+  if (Err)
+    return Err;
+  sys::path::remove_dots(Ret, /*removedotdot*/ true);
+  return Ret;
+}
+
 // Compute the relative path from From to To.
-std::string computeArchiveRelativePath(StringRef From, StringRef To) {
-  if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
-    return To;
-
-  StringRef DirFrom = sys::path::parent_path(From);
-  auto FromI = sys::path::begin(DirFrom);
-  auto ToI = sys::path::begin(To);
-  while (*FromI == *ToI) {
-    ++FromI;
-    ++ToI;
-  }
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
+  ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
+  ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
+  if (!PathToOrErr || !DirFromOrErr)
+    return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+  const SmallString<128> &PathTo = *PathToOrErr;
+  const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
+
+  // Can't construct a relative path between different roots
+  if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
+    return sys::path::convert_to_slash(PathTo);
+
+  // Skip common prefixes
+  auto FromTo =
+      std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
+                    sys::path::begin(PathTo), sys::path::end(PathTo));
+  auto FromI = FromTo.first;
+  auto ToI = FromTo.second;
 
+  // Construct relative path
   SmallString<128> Relative;
   for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
-    sys::path::append(Relative, "..");
+    sys::path::append(Relative, sys::path::Style::posix, "..");
 
-  for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
-    sys::path::append(Relative, *ToI);
+  for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
+    sys::path::append(Relative, sys::path::Style::posix, *ToI);
 
-  // Replace backslashes with slashes so that the path is portable between *nix
-  // and Windows.
-  return sys::path::convert_to_slash(Relative);
+  return Relative.str();
 }
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 34a83147a3a63..2d44686dd280f 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -211,9 +211,14 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   // llvm-lib uses relative paths for both regular and thin archives, unlike
   // standard GNU ar, which only uses relative paths for thin archives and
   // basenames for regular archives.
-  for (NewArchiveMember &Member : Members)
-    Member.MemberName =
-        Saver.save(computeArchiveRelativePath(OutputPath, Member.MemberName));
+  for (NewArchiveMember &Member : Members) {
+    if (sys::path::is_relative(Member.MemberName)) {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(OutputPath, Member.MemberName);
+      if (PathOrErr)
+        Member.MemberName = Saver.save(*PathOrErr);
+    }
+  }
 
   if (Error E =
           writeArchive(OutputPath, Members,
diff --git a/llvm/test/tools/llvm-ar/reduce-thin-path.test b/llvm/test/tools/llvm-ar/reduce-thin-path.test
new file mode 100644
index 0000000000000..aea6101ce9bc6
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/reduce-thin-path.test
@@ -0,0 +1,10 @@
+RUN: rm -rf %t && mkdir -p %t/foo/bar/
+RUN: mkdir -p %t/baz/
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: cd %t && llvm-ar rTc %t/baz/internal.ar elf.o
+RUN: cd %t/foo && llvm-ar rTc %t/foo/bar/external.ar ../baz/internal.ar
+
+RUN: FileCheck -input-file=%t/foo/bar/external.ar %s
+
+CHECK: {{^}}../../elf.o/
diff --git a/llvm/test/tools/llvm-ar/thin-archive.test b/llvm/test/tools/llvm-ar/thin-archive.test
new file mode 100644
index 0000000000000..8d9543b686968
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/thin-archive.test
@@ -0,0 +1,45 @@
+RUN: rm -rf %t && mkdir -p %t/foo/bar/
+
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/foo/elf.o
+RUN: cp %t/foo/elf.o %t/foo/bar/elf.o
+RUN: cp %t/foo/bar/elf.o %t/delete.o
+
+Test that modules can be added with absolute paths when the archive is created using an absolute path
+
+RUN: llvm-ar rTc %t/absolute-1.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
+RUN: llvm-ar dT %t/absolute-1.ar delete.o
+
+RUN: FileCheck -input-file=%t/absolute-1.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
+RUN: llvm-ar t %t/absolute-1.ar | FileCheck %s -DPATH=%/t/
+
+Test that modules can be added with absolute paths when the archive is created using a relative path
+
+RUN: llvm-ar rTc Output/%basename_t.tmp/absolute-2.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
+RUN: llvm-ar dT Output/%basename_t.tmp/absolute-2.ar %t/delete.o
+
+RUN: FileCheck -input-file=%t/absolute-2.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
+RUN: llvm-ar t %t/absolute-2.ar | FileCheck %s -DPATH=%/t/
+
+These tests must be run in %t/foo. cd %t is included on each line to make debugging this test case easier.
+
+Test that modules can be added with relative paths when the archive is created using a relative path
+
+RUN: cd %t/foo && llvm-ar rTc ../relative-1.ar elf.o ../delete.o bar/elf.o
+RUN: cd %t/foo && llvm-ar dT ../relative-1.ar delete.o
+
+RUN: FileCheck -input-file=%t/relative-1.ar --check-prefixes=THIN,CHECK %s -DPATH=
+RUN: llvm-ar t %t/relative-1.ar | FileCheck %s -DPATH=%/t/
+
+Test that modules can be added with relative paths when the archive is created using a absolute path
+
+RUN: cd %t/foo && llvm-ar rTc %t/relative-2.ar elf.o ../delete.o bar/elf.o
+RUN: cd %t/foo && llvm-ar dT %t/relative-2.ar delete.o
+
+RUN: FileCheck -input-file=%t/relative-2.ar --check-prefixes=THIN,CHECK %s -DPATH=
+RUN: llvm-ar t %t/relative-2.ar | FileCheck %s -DPATH=%/t/
+
+THIN: !<thin>
+
+CHECK-NOT: delete.o
+CHECK: {{^}}[[PATH]]foo/elf.o
+CHECK: {{^}}[[PATH]]foo/bar/elf.o
diff --git a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
index 6540b630f7dac..39a6597a83bfb 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
@@ -23,10 +23,10 @@
 # RUN: llvm-ar rcT %t.thin1.a %t1.o %s
 # RUN: llvm-ar rcT %t.thin2.a %t2.o %s
 
-# RUN: not llvm-objcopy --strip-debug %t.thin1.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin1.a -DMEMBER=%s
-# RUN: not llvm-strip --strip-debug %t.thin2.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin2.a -DMEMBER=%s
+# RUN: not llvm-objcopy --strip-debug %/t.thin1.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin1.a -DMEMBER=%/s
+# RUN: not llvm-strip --strip-debug %/t.thin2.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin2.a -DMEMBER=%/s
 ## Verify that the first member was not modified, if a later member could not
 ## be recognized.
 # RUN: cmp %t.o %t1.o
diff --git a/llvm/test/tools/llvm-readobj/thin-archive-paths.test b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
index f1952c739ccff..d7a971eb303d8 100644
--- a/llvm/test/tools/llvm-readobj/thin-archive-paths.test
+++ b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
@@ -23,11 +23,11 @@
 # RUN: llvm-ar rcT c/absolute.a %t/a/b/1.o
 
 # Show that absolute paths in the file header printing are correct.
-# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%t
+# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%/t
 # ABS: File: [[DIR]]/a/b/1.o
 
 # Show that absolute paths in an error message for both archive and member are correct.
 # RUN: rm a/b/1.o
-# RUN: not llvm-readobj --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
-# RUN: not llvm-readelf --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
+# RUN: not llvm-readobj --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
+# RUN: not llvm-readelf --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
 # ERR2: error: '[[DIR]]/c/absolute.a': '[[DIR]]/a/b/1.o': {{[Nn]}}o such file or directory
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 04c2396a4fa32..0731f35ac458b 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -464,9 +464,11 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
   }
 
   if (C.getParent()->isThin()) {
-    StringRef ParentDir = sys::path::parent_path(ArchiveName);
-    if (!ParentDir.empty())
-      outs() << ParentDir << '/';
+    if (!sys::path::is_absolute(Name)) {
+      StringRef ParentDir = sys::path::parent_path(ArchiveName);
+      if (!ParentDir.empty())
+        outs() << sys::path::convert_to_slash(ParentDir) << '/';
+    }
   }
   outs() << Name << "\n";
 }
@@ -593,10 +595,18 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
   // the archive it's in, so the file resolves correctly.
   if (Thin && FlattenArchive) {
     StringSaver Saver(Alloc);
-    Expected<std::string> FileNameOrErr = M.getFullName();
+    Expected<std::string> FileNameOrErr = M.getName();
     failIfError(FileNameOrErr.takeError());
-    NMOrErr->MemberName =
-        Saver.save(computeArchiveRelativePath(ArchiveName, *FileNameOrErr));
+    if (sys::path::is_absolute(*FileNameOrErr)) {
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(*FileNameOrErr));
+    } else {
+      FileNameOrErr = M.getFullName();
+      failIfError(FileNameOrErr.takeError());
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, *FileNameOrErr);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(*FileNameOrErr));
+    }
   }
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
@@ -625,9 +635,19 @@ static void addMember(std::vector<NewArchiveMember> &Members,
   // For regular archives, use the basename of the object path for the member
   // name. For thin archives, use the full relative paths so the file resolves
   // correctly.
-  NMOrErr->MemberName =
-      Thin ? Saver.save(computeArchiveRelativePath(ArchiveName, FileName))
-           : sys::path::filename(NMOrErr->MemberName);
+  if (!Thin) {
+    NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
+  } else {
+    if (sys::path::is_absolute(FileName))
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(FileName));
+    else {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, FileName);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName));
+    }
+  }
+
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
     object::Archive &Lib = readLibrary(FileName);

From bedcaea99a78d869b0c2763c6c2f471b5637fc35 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 15:26:25 +0000
Subject: [PATCH 0926/1176] Include what you use in LanaiInstrInfo.cpp

llvm-svn: 362408
---
 llvm/lib/Target/Lanai/LanaiInstrInfo.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index dd45797edae7a..700a860691027 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -10,10 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
 #include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
-#include "LanaiTargetMachine.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"

From 5099aef86964c4b845d9165b9328e6e8e30fc8a3 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Mon, 3 Jun 2019 15:42:36 +0000
Subject: [PATCH 0927/1176] [PR41567][Sema] Fixed cast kind in addr space
 conversions

This change sets missing cast kind correctly in the address
space conversion case.

Differential Revision: https://reviews.llvm.org/D62299

llvm-svn: 362409
---
 clang/lib/Sema/SemaCast.cpp                         | 4 ++++
 clang/test/CodeGenOpenCLCXX/addrspace-conversion.cl | 7 +++++++
 2 files changed, 11 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCLCXX/addrspace-conversion.cl

diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 0958943f9ef57..46d0f57dc777f 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2450,6 +2450,10 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
     tcr = TryAddressSpaceCast(Self, SrcExpr, DestType, /*CStyle*/ true, msg);
     if (SrcExpr.isInvalid())
       return;
+
+    if (isValidCast(tcr))
+      Kind = CK_AddressSpaceConversion;
+
     if (tcr == TC_NotApplicable) {
       // ... or if that is not possible, a static_cast, ignoring const, ...
       tcr = TryStaticCast(Self, SrcExpr, DestType, CCK, OpRange, msg, Kind,
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-conversion.cl b/clang/test/CodeGenOpenCLCXX/addrspace-conversion.cl
new file mode 100644
index 0000000000000..38422c24e8596
--- /dev/null
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-conversion.cl
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
+
+void bar(__generic volatile unsigned int* ptr)
+{
+  //CHECK: addrspacecast i32 addrspace(4)* %{{.+}} to i32 addrspace(1)*
+  auto gptr = (__global volatile unsigned int*)ptr;
+}

From b8fee677bf8e2d6444c556293d6b77fb876654e4 Mon Sep 17 00:00:00 2001
From: Jennifer Yu <jennifer.yu@intel.com>
Date: Mon, 3 Jun 2019 15:57:25 +0000
Subject: [PATCH 0928/1176] Re-check in clang support gun asm goto after fixing
 tests.

llvm-svn: 362410
---
 clang/include/clang/AST/Stmt.h                |  51 ++++++-
 .../clang/Basic/DiagnosticParseKinds.td       |   4 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  10 +-
 clang/include/clang/Sema/Sema.h               |   1 +
 clang/lib/AST/ASTImporter.cpp                 |  10 +-
 clang/lib/AST/Stmt.cpp                        |  29 +++-
 clang/lib/AST/StmtPrinter.cpp                 |  20 ++-
 clang/lib/AST/StmtProfile.cpp                 |   3 +
 clang/lib/Analysis/CFG.cpp                    |  74 +++++++---
 clang/lib/CodeGen/CGStmt.cpp                  | 126 ++++++++++++------
 clang/lib/Parse/ParseStmtAsm.cpp              |  68 ++++++++--
 clang/lib/Sema/JumpDiagnostics.cpp            | 114 +++++++++-------
 clang/lib/Sema/SemaStmtAsm.cpp                |  48 ++++++-
 clang/lib/Sema/TreeTransform.h                |  16 ++-
 clang/lib/Serialization/ASTReaderStmt.cpp     |   7 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |   4 +
 clang/test/Analysis/asm-goto.cpp              |  53 ++++++++
 clang/test/CodeGen/asm-goto.c                 |  20 +++
 clang/test/CodeGen/asm.c                      |  12 ++
 clang/test/CodeGen/inline-asm-mixed-style.c   |  10 +-
 clang/test/Coverage/c-language-features.inc   |   4 +-
 clang/test/PCH/asm.h                          |   6 +-
 clang/test/Parser/asm-goto.c                  |  57 ++++++++
 clang/test/Parser/asm-goto.cpp                |  53 ++++++++
 clang/test/Sema/asm-goto.cpp                  |  63 +++++++++
 clang/test/Sema/asm.c                         |  21 +++
 clang/test/Sema/inline-asm-validate-tmpl.cpp  |  10 ++
 27 files changed, 746 insertions(+), 148 deletions(-)
 create mode 100644 clang/test/Analysis/asm-goto.cpp
 create mode 100644 clang/test/CodeGen/asm-goto.c
 create mode 100644 clang/test/Parser/asm-goto.c
 create mode 100644 clang/test/Parser/asm-goto.cpp
 create mode 100644 clang/test/Sema/asm-goto.cpp

diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index 77b2173fcb878..fe5d802688466 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -46,6 +46,7 @@ class Attr;
 class CapturedDecl;
 class Decl;
 class Expr;
+class AddrLabelExpr;
 class LabelDecl;
 class ODRHash;
 class PrinterHelper;
@@ -2816,13 +2817,15 @@ class GCCAsmStmt : public AsmStmt {
   StringLiteral **Constraints = nullptr;
   StringLiteral **Clobbers = nullptr;
   IdentifierInfo **Names = nullptr;
+  unsigned NumLabels = 0;
 
 public:
   GCCAsmStmt(const ASTContext &C, SourceLocation asmloc, bool issimple,
              bool isvolatile, unsigned numoutputs, unsigned numinputs,
              IdentifierInfo **names, StringLiteral **constraints, Expr **exprs,
              StringLiteral *asmstr, unsigned numclobbers,
-             StringLiteral **clobbers, SourceLocation rparenloc);
+             StringLiteral **clobbers, unsigned numlabels,
+             SourceLocation rparenloc);
 
   /// Build an empty inline-assembly statement.
   explicit GCCAsmStmt(EmptyShell Empty) : AsmStmt(GCCAsmStmtClass, Empty) {}
@@ -2947,6 +2950,51 @@ class GCCAsmStmt : public AsmStmt {
     return const_cast<GCCAsmStmt*>(this)->getInputExpr(i);
   }
 
+  //===--- Labels ---===//
+
+  bool isAsmGoto() const {
+    return NumLabels > 0;
+  }
+
+  unsigned getNumLabels() const {
+    return NumLabels;
+  }
+
+  IdentifierInfo *getLabelIdentifier(unsigned i) const {
+    return Names[i + NumInputs];
+  }
+
+  AddrLabelExpr *getLabelExpr(unsigned i) const;
+  StringRef getLabelName(unsigned i) const;
+  using labels_iterator = CastIterator<AddrLabelExpr>;
+  using const_labels_iterator = ConstCastIterator<AddrLabelExpr>;
+  using labels_range = llvm::iterator_range<labels_iterator>;
+  using labels_const_range = llvm::iterator_range<const_labels_iterator>;
+
+  labels_iterator begin_labels() {
+    return &Exprs[0] + NumInputs;
+  }
+
+  labels_iterator end_labels() {
+    return &Exprs[0] + NumInputs + NumLabels;
+  }
+
+  labels_range labels() {
+    return labels_range(begin_labels(), end_labels());
+  }
+
+  const_labels_iterator begin_labels() const {
+    return &Exprs[0] + NumInputs;
+  }
+
+  const_labels_iterator end_labels() const {
+    return &Exprs[0] + NumInputs + NumLabels;
+  }
+
+  labels_const_range labels() const {
+    return labels_const_range(begin_labels(), end_labels());
+  }
+
 private:
   void setOutputsAndInputsAndClobbers(const ASTContext &C,
                                       IdentifierInfo **Names,
@@ -2954,6 +3002,7 @@ class GCCAsmStmt : public AsmStmt {
                                       Stmt **Exprs,
                                       unsigned NumOutputs,
                                       unsigned NumInputs,
+                                      unsigned NumLabels,
                                       StringLiteral **Clobbers,
                                       unsigned NumClobbers);
 
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index fb281a5be86a5..15a5ecf177514 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -27,8 +27,8 @@ def err_msasm_unable_to_create_target : Error<
   "MS-style inline assembly is not available: %0">;
 def err_gnu_inline_asm_disabled : Error<
   "GNU-style inline assembly is disabled">;
-def err_asm_goto_not_supported_yet : Error<
-  "'asm goto' constructs are not supported yet">;
+def err_asm_goto_cannot_have_output : Error<
+  "'asm goto' cannot have output constraints">;
 }
 
 let CategoryName = "Parse Issue" in {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 058d7d4e7a40a..761bd22819a6b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5073,12 +5073,12 @@ def warn_cxx98_compat_switch_into_protected_scope : Warning<
 def err_indirect_goto_without_addrlabel : Error<
   "indirect goto in function with no address-of-label expressions">;
 def err_indirect_goto_in_protected_scope : Error<
-  "cannot jump from this indirect goto statement to one of its possible targets">;
+  "cannot jump from this %select{indirect|asm}0 goto statement to one of its possible targets">;
 def warn_cxx98_compat_indirect_goto_in_protected_scope : Warning<
-  "jump from this indirect goto statement to one of its possible targets "
+  "jump from this %select{indirect|asm}0 goto statement to one of its possible targets "
   "is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
 def note_indirect_goto_target : Note<
-  "possible target of indirect goto statement">;
+  "possible target of %select{indirect|asm}0 goto statement">;
 def note_protected_by_variable_init : Note<
   "jump bypasses variable initialization">;
 def note_protected_by_variable_nontriv_destructor : Note<
@@ -7506,6 +7506,10 @@ let CategoryName = "Inline Assembly Issue" in {
     "use constraint modifier \"%0\"">;
   def note_asm_input_duplicate_first : Note<
     "constraint '%0' is already present here">;
+ def error_duplicate_asm_operand_name : Error<
+    "duplicate use of asm operand name \"%0\"">;
+ def note_duplicate_asm_operand_name : Note<
+    "asm operand name \"%0\" first referenced here">;
 }
 
   def error_inoutput_conflict_with_clobber : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b4f721c091977..7bccaf77c1e9c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -3985,6 +3985,7 @@ class Sema {
                              unsigned NumInputs, IdentifierInfo **Names,
                              MultiExprArg Constraints, MultiExprArg Exprs,
                              Expr *AsmString, MultiExprArg Clobbers,
+                             unsigned NumLabels,
                              SourceLocation RParenLoc);
 
   void FillInlineAsmIdentifierInfo(Expr *Res,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 2e4c304b3de20..1f1ec1d687c2e 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -5592,12 +5592,17 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       return InputOrErr.takeError();
   }
 
-  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs());
+  SmallVector<Expr *, 4> Exprs(S->getNumOutputs() + S->getNumInputs() +
+                               S->getNumLabels());
   if (Error Err = ImportContainerChecked(S->outputs(), Exprs))
     return std::move(Err);
 
+  if (Error Err =
+          ImportArrayChecked(S->inputs(), Exprs.begin() + S->getNumOutputs()))
+    return std::move(Err);
+
   if (Error Err = ImportArrayChecked(
-      S->inputs(), Exprs.begin() + S->getNumOutputs()))
+          S->labels(), Exprs.begin() + S->getNumOutputs() + S->getNumInputs()))
     return std::move(Err);
 
   ExpectedSLoc AsmLocOrErr = import(S->getAsmLoc());
@@ -5623,6 +5628,7 @@ ExpectedStmt ASTNodeImporter::VisitGCCAsmStmt(GCCAsmStmt *S) {
       *AsmStrOrErr,
       S->getNumClobbers(),
       Clobbers.data(),
+      S->getNumLabels(),
       *RParenLocOrErr);
 }
 
diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index 68a5a2d6ab0ed..0a4d403106bd4 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -444,6 +444,14 @@ void GCCAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
 
+AddrLabelExpr *GCCAsmStmt::getLabelExpr(unsigned i) const {
+  return cast<AddrLabelExpr>(Exprs[i + NumInputs]);
+}
+
+StringRef GCCAsmStmt::getLabelName(unsigned i) const {
+  return getLabelExpr(i)->getLabel()->getName();
+}
+
 /// getInputConstraint - Return the specified input constraint.  Unlike output
 /// constraints, these can be empty.
 StringRef GCCAsmStmt::getInputConstraint(unsigned i) const {
@@ -456,13 +464,16 @@ void GCCAsmStmt::setOutputsAndInputsAndClobbers(const ASTContext &C,
                                                 Stmt **Exprs,
                                                 unsigned NumOutputs,
                                                 unsigned NumInputs,
+                                                unsigned NumLabels,
                                                 StringLiteral **Clobbers,
                                                 unsigned NumClobbers) {
   this->NumOutputs = NumOutputs;
   this->NumInputs = NumInputs;
   this->NumClobbers = NumClobbers;
+  this->NumLabels = NumLabels;
+  assert(!(NumOutputs && NumLabels) && "asm goto cannot have outputs");
 
-  unsigned NumExprs = NumOutputs + NumInputs;
+  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
 
   C.Deallocate(this->Names);
   this->Names = new (C) IdentifierInfo*[NumExprs];
@@ -498,6 +509,10 @@ int GCCAsmStmt::getNamedOperand(StringRef SymbolicName) const {
     if (getInputName(i) == SymbolicName)
       return getNumOutputs() + NumPlusOperands + i;
 
+  for (unsigned i = 0, e = getNumLabels(); i != e; ++i)
+    if (getLabelName(i) == SymbolicName)
+      return i + getNumInputs();
+
   // Not found.
   return -1;
 }
@@ -615,8 +630,8 @@ unsigned GCCAsmStmt::AnalyzeAsmString(SmallVectorImpl<AsmStringPiece>&Pieces,
       while (CurPtr != StrEnd && isDigit(*CurPtr))
         N = N*10 + ((*CurPtr++)-'0');
 
-      unsigned NumOperands =
-        getNumOutputs() + getNumPlusOperands() + getNumInputs();
+      unsigned NumOperands = getNumOutputs() + getNumPlusOperands() +
+                             getNumInputs() + getNumLabels();
       if (N >= NumOperands) {
         DiagOffs = CurPtr-StrStart-1;
         return diag::err_asm_invalid_operand_number;
@@ -729,10 +744,12 @@ GCCAsmStmt::GCCAsmStmt(const ASTContext &C, SourceLocation asmloc,
                        unsigned numinputs, IdentifierInfo **names,
                        StringLiteral **constraints, Expr **exprs,
                        StringLiteral *asmstr, unsigned numclobbers,
-                       StringLiteral **clobbers, SourceLocation rparenloc)
+                       StringLiteral **clobbers, unsigned numlabels,
+                       SourceLocation rparenloc)
     : AsmStmt(GCCAsmStmtClass, asmloc, issimple, isvolatile, numoutputs,
-              numinputs, numclobbers), RParenLoc(rparenloc), AsmStr(asmstr) {
-  unsigned NumExprs = NumOutputs + NumInputs;
+              numinputs, numclobbers),
+              RParenLoc(rparenloc), AsmStr(asmstr), NumLabels(numlabels) {
+  unsigned NumExprs = NumOutputs + NumInputs + NumLabels;
 
   Names = new (C) IdentifierInfo*[NumExprs];
   std::copy(names, names + NumExprs, Names);
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 7fe0be5217dbc..563095f89b9b3 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -414,12 +414,15 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   if (Node->isVolatile())
     OS << "volatile ";
 
+  if (Node->isAsmGoto())
+    OS << "goto ";
+
   OS << "(";
   VisitStringLiteral(Node->getAsmString());
 
   // Outputs
   if (Node->getNumOutputs() != 0 || Node->getNumInputs() != 0 ||
-      Node->getNumClobbers() != 0)
+      Node->getNumClobbers() != 0 || Node->getNumLabels() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumOutputs(); i != e; ++i) {
@@ -439,7 +442,8 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Inputs
-  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0)
+  if (Node->getNumInputs() != 0 || Node->getNumClobbers() != 0 ||
+      Node->getNumLabels() != 0)
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumInputs(); i != e; ++i) {
@@ -459,7 +463,7 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
   }
 
   // Clobbers
-  if (Node->getNumClobbers() != 0)
+  if (Node->getNumClobbers() != 0 || Node->getNumLabels())
     OS << " : ";
 
   for (unsigned i = 0, e = Node->getNumClobbers(); i != e; ++i) {
@@ -469,6 +473,16 @@ void StmtPrinter::VisitGCCAsmStmt(GCCAsmStmt *Node) {
     VisitStringLiteral(Node->getClobberStringLiteral(i));
   }
 
+  // Labels
+  if (Node->getNumLabels() != 0)
+    OS << " : ";
+
+  for (unsigned i = 0, e = Node->getNumLabels(); i != e; ++i) {
+    if (i != 0)
+      OS << ", ";
+    OS << Node->getLabelName(i);
+  }
+
   OS << ");";
   if (Policy.IncludeNewlines) OS << NL;
 }
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 93bdcac8b5496..c5da5bfda9cb5 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -321,6 +321,9 @@ void StmtProfiler::VisitGCCAsmStmt(const GCCAsmStmt *S) {
   ID.AddInteger(S->getNumClobbers());
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     VisitStringLiteral(S->getClobberStringLiteral(I));
+  ID.AddInteger(S->getNumLabels());
+  for (auto *L : S->labels())
+    VisitDecl(L->getLabel());
 }
 
 void StmtProfiler::VisitMSAsmStmt(const MSAsmStmt *S) {
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index 1d83359341528..b53bfcca37cd4 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -549,6 +549,7 @@ class CFGBuilder {
   CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, AddStmtChoice asc);
   CFGBlock *VisitForStmt(ForStmt *F);
   CFGBlock *VisitGotoStmt(GotoStmt *G);
+  CFGBlock *VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc);
   CFGBlock *VisitIfStmt(IfStmt *I);
   CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc);
   CFGBlock *VisitConstantExpr(ConstantExpr *E, AddStmtChoice asc);
@@ -1478,22 +1479,38 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
                                    E = BackpatchBlocks.end(); I != E; ++I ) {
 
     CFGBlock *B = I->block;
-    const GotoStmt *G = cast<GotoStmt>(B->getTerminator());
-    LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
-
-    // If there is no target for the goto, then we are looking at an
-    // incomplete AST.  Handle this by not registering a successor.
-    if (LI == LabelMap.end()) continue;
-
-    JumpTarget JT = LI->second;
-    prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
-                                              JT.scopePosition);
-    prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
-                                           JT.scopePosition);
-    const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
-        B, I->scopePosition, JT.scopePosition);
-    appendScopeBegin(JT.block, VD, G);
-    addSuccessor(B, JT.block);
+    if (auto *G = dyn_cast<GotoStmt>(B->getTerminator())) {
+      LabelMapTy::iterator LI = LabelMap.find(G->getLabel());
+      // If there is no target for the goto, then we are looking at an
+      // incomplete AST.  Handle this by not registering a successor.
+      if (LI == LabelMap.end())
+        continue;
+      JumpTarget JT = LI->second;
+      prependAutomaticObjLifetimeWithTerminator(B, I->scopePosition,
+                                                JT.scopePosition);
+      prependAutomaticObjDtorsWithTerminator(B, I->scopePosition,
+                                             JT.scopePosition);
+      const VarDecl *VD = prependAutomaticObjScopeEndWithTerminator(
+          B, I->scopePosition, JT.scopePosition);
+      appendScopeBegin(JT.block, VD, G);
+      addSuccessor(B, JT.block);
+    };
+    if (auto *G = dyn_cast<GCCAsmStmt>(B->getTerminator())) {
+      CFGBlock *Successor  = (I+1)->block;
+      for (auto *L : G->labels()) {
+        LabelMapTy::iterator LI = LabelMap.find(L->getLabel());
+        // If there is no target for the goto, then we are looking at an
+        // incomplete AST.  Handle this by not registering a successor.
+        if (LI == LabelMap.end())
+          continue;
+        JumpTarget JT = LI->second;
+        // Successor has been added, so skip it.
+        if (JT.block == Successor)
+          continue;
+        addSuccessor(B, JT.block);
+      }
+      I++;
+    }
   }
 
   // Add successors to the Indirect Goto Dispatch block (if we have one).
@@ -2142,6 +2159,9 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc) {
     case Stmt::GotoStmtClass:
       return VisitGotoStmt(cast<GotoStmt>(S));
 
+    case Stmt::GCCAsmStmtClass:
+      return VisitGCCAsmStmt(cast<GCCAsmStmt>(S), asc);
+
     case Stmt::IfStmtClass:
       return VisitIfStmt(cast<IfStmt>(S));
 
@@ -3146,6 +3166,28 @@ CFGBlock *CFGBuilder::VisitGotoStmt(GotoStmt *G) {
   return Block;
 }
 
+CFGBlock *CFGBuilder::VisitGCCAsmStmt(GCCAsmStmt *G, AddStmtChoice asc) {
+  // Goto is a control-flow statement.  Thus we stop processing the current
+  // block and create a new one.
+
+  if (!G->isAsmGoto())
+    return VisitStmt(G, asc);
+
+  if (Block) {
+    Succ = Block;
+    if (badCFG)
+      return nullptr;
+  }
+  Block = createBlock();
+  Block->setTerminator(G);
+  // We will backpatch this block later for all the labels.
+  BackpatchBlocks.push_back(JumpSource(Block, ScopePos));
+  // Save "Succ" in BackpatchBlocks. In the backpatch processing, "Succ" is
+  // used to avoid adding "Succ" again.
+  BackpatchBlocks.push_back(JumpSource(Succ, ScopePos));
+  return Block;
+}
+
 CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) {
   CFGBlock *LoopSuccessor = nullptr;
 
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c617b198d76d5..5c24db709250e 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -1896,6 +1896,55 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
   return llvm::MDNode::get(CGF.getLLVMContext(), Locs);
 }
 
+static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
+                              bool ReadOnly, bool ReadNone, const AsmStmt &S,
+                              const std::vector<llvm::Type *> &ResultRegTypes,
+                              CodeGenFunction &CGF,
+                              std::vector<llvm::Value *> &RegResults) {
+  Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                      llvm::Attribute::NoUnwind);
+  // Attach readnone and readonly attributes.
+  if (!HasSideEffect) {
+    if (ReadNone)
+      Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                          llvm::Attribute::ReadNone);
+    else if (ReadOnly)
+      Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                          llvm::Attribute::ReadOnly);
+  }
+
+  // Slap the source location of the inline asm into a !srcloc metadata on the
+  // call.
+  if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S))
+    Result.setMetadata("srcloc",
+                       getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
+  else {
+    // At least put the line number on MS inline asm blobs.
+    llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty,
+                                        S.getAsmLoc().getRawEncoding());
+    Result.setMetadata("srcloc",
+                       llvm::MDNode::get(CGF.getLLVMContext(),
+                                         llvm::ConstantAsMetadata::get(Loc)));
+  }
+
+  if (CGF.getLangOpts().assumeFunctionsAreConvergent())
+    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
+    // convergent (meaning, they may call an intrinsically convergent op, such
+    // as bar.sync, and so can't have certain optimizations applied around
+    // them).
+    Result.addAttribute(llvm::AttributeList::FunctionIndex,
+                        llvm::Attribute::Convergent);
+  // Extract all of the register value results from the asm.
+  if (ResultRegTypes.size() == 1) {
+    RegResults.push_back(&Result);
+  } else {
+    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
+      llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult");
+      RegResults.push_back(Tmp);
+    }
+  }
+}
+
 void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   // Assemble the final asm string.
   std::string AsmString = S.generateAsmString(getContext());
@@ -2138,6 +2187,29 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   }
   Constraints += InOutConstraints;
 
+  // Labels
+  SmallVector<llvm::BasicBlock *, 16> Transfer;
+  llvm::BasicBlock *Fallthrough = nullptr;
+  bool IsGCCAsmGoto = false;
+  if (const auto *GS =  dyn_cast<GCCAsmStmt>(&S)) {
+    IsGCCAsmGoto = GS->isAsmGoto();
+    if (IsGCCAsmGoto) {
+      for (auto *E : GS->labels()) {
+        JumpDest Dest = getJumpDestForLabel(E->getLabel());
+        Transfer.push_back(Dest.getBlock());
+        llvm::BlockAddress *BA =
+            llvm::BlockAddress::get(CurFn, Dest.getBlock());
+        Args.push_back(BA);
+        ArgTypes.push_back(BA->getType());
+        if (!Constraints.empty())
+          Constraints += ',';
+        Constraints += 'X';
+      }
+      StringRef Name = "asm.fallthrough";
+      Fallthrough = createBasicBlock(Name);
+    }
+  }
+
   // Clobbers
   for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
     StringRef Clobber = S.getClobber(i);
@@ -2180,52 +2252,18 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   llvm::InlineAsm *IA =
     llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect,
                          /* IsAlignStack */ false, AsmDialect);
-  llvm::CallInst *Result =
-      Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
-  Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                       llvm::Attribute::NoUnwind);
-
-  // Attach readnone and readonly attributes.
-  if (!HasSideEffect) {
-    if (ReadNone)
-      Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                           llvm::Attribute::ReadNone);
-    else if (ReadOnly)
-      Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                           llvm::Attribute::ReadOnly);
-  }
-
-  // Slap the source location of the inline asm into a !srcloc metadata on the
-  // call.
-  if (const GCCAsmStmt *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S)) {
-    Result->setMetadata("srcloc", getAsmSrcLocInfo(gccAsmStmt->getAsmString(),
-                                                   *this));
-  } else {
-    // At least put the line number on MS inline asm blobs.
-    auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());
-    Result->setMetadata("srcloc",
-                        llvm::MDNode::get(getLLVMContext(),
-                                          llvm::ConstantAsMetadata::get(Loc)));
-  }
-
-  if (getLangOpts().assumeFunctionsAreConvergent()) {
-    // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
-    // convergent (meaning, they may call an intrinsically convergent op, such
-    // as bar.sync, and so can't have certain optimizations applied around
-    // them).
-    Result->addAttribute(llvm::AttributeList::FunctionIndex,
-                         llvm::Attribute::Convergent);
-  }
-
-  // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
-  if (ResultRegTypes.size() == 1) {
-    RegResults.push_back(Result);
+  if (IsGCCAsmGoto) {
+    llvm::CallBrInst *Result =
+        Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
+    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
+                      ReadNone, S, ResultRegTypes, *this, RegResults);
+    EmitBlock(Fallthrough);
   } else {
-    for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
-      llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult");
-      RegResults.push_back(Tmp);
-    }
+    llvm::CallInst *Result =
+        Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
+    UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly,
+                      ReadNone, S, ResultRegTypes, *this, RegResults);
   }
 
   assert(RegResults.size() == ResultRegTypes.size());
diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp
index c63808a472b5a..75f3ac396e1a4 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -710,12 +710,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
 
   // Remember if this was a volatile asm.
   bool isVolatile = DS.getTypeQualifiers() & DeclSpec::TQ_volatile;
+  // Remember if this was a goto asm.
+  bool isGotoAsm = false;
 
-  // TODO: support "asm goto" constructs (PR#9295).
   if (Tok.is(tok::kw_goto)) {
-    Diag(Tok, diag::err_asm_goto_not_supported_yet);
-    SkipUntil(tok::r_paren, StopAtSemi);
-    return StmtError();
+    isGotoAsm = true;
+    ConsumeToken();
   }
 
   if (Tok.isNot(tok::l_paren)) {
@@ -753,7 +753,8 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     return Actions.ActOnGCCAsmStmt(AsmLoc, /*isSimple*/ true, isVolatile,
                                    /*NumOutputs*/ 0, /*NumInputs*/ 0, nullptr,
                                    Constraints, Exprs, AsmString.get(),
-                                   Clobbers, T.getCloseLocation());
+                                   Clobbers, /*NumLabels*/ 0,
+                                   T.getCloseLocation());
   }
 
   // Parse Outputs, if present.
@@ -763,6 +764,12 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
     AteExtraColon = Tok.is(tok::coloncolon);
     ConsumeToken();
 
+    if (!AteExtraColon && isGotoAsm && Tok.isNot(tok::colon)) {
+      Diag(Tok, diag::err_asm_goto_cannot_have_output);
+      SkipUntil(tok::r_paren, StopAtSemi);
+      return StmtError();
+    }
+
     if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs))
       return StmtError();
   }
@@ -789,12 +796,15 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
   unsigned NumInputs = Names.size() - NumOutputs;
 
   // Parse the clobbers, if present.
-  if (AteExtraColon || Tok.is(tok::colon)) {
-    if (!AteExtraColon)
+  if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) {
+    if (AteExtraColon)
+      AteExtraColon = false;
+    else {
+      AteExtraColon = Tok.is(tok::coloncolon);
       ConsumeToken();
-
+    }
     // Parse the asm-string list for clobbers if present.
-    if (Tok.isNot(tok::r_paren)) {
+    if (!AteExtraColon && isTokenStringLiteral()) {
       while (1) {
         ExprResult Clobber(ParseAsmStringLiteral());
 
@@ -808,11 +818,49 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) {
       }
     }
   }
+  if (!isGotoAsm && (Tok.isNot(tok::r_paren) || AteExtraColon)) {
+    Diag(Tok, diag::err_expected) << tok::r_paren;
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
+  }
+
+  // Parse the goto label, if present.
+  unsigned NumLabels = 0;
+  if (AteExtraColon || Tok.is(tok::colon)) {
+    if (!AteExtraColon)
+      ConsumeToken();
 
+    while (true) {
+      if (Tok.isNot(tok::identifier)) {
+        Diag(Tok, diag::err_expected) << tok::identifier;
+        SkipUntil(tok::r_paren, StopAtSemi);
+        return StmtError();
+      }
+      LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(),
+                                                  Tok.getLocation());
+      Names.push_back(Tok.getIdentifierInfo());
+      if (!LD) {
+        SkipUntil(tok::r_paren, StopAtSemi);
+        return StmtError();
+      }
+      ExprResult Res =
+          Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD);
+      Exprs.push_back(Res.get());
+      NumLabels++;
+      ConsumeToken();
+      if (!TryConsumeToken(tok::comma))
+        break;
+    }
+  } else if (isGotoAsm) {
+    Diag(Tok, diag::err_expected) << tok::colon;
+    SkipUntil(tok::r_paren, StopAtSemi);
+    return StmtError();
+  }
   T.consumeClose();
   return Actions.ActOnGCCAsmStmt(
       AsmLoc, false, isVolatile, NumOutputs, NumInputs, Names.data(),
-      Constraints, Exprs, AsmString.get(), Clobbers, T.getCloseLocation());
+      Constraints, Exprs, AsmString.get(), Clobbers, NumLabels,
+      T.getCloseLocation());
 }
 
 /// ParseAsmOperands - Parse the asm-operands production as used by
diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp
index 2234d6ba9b11f..c8743df90e340 100644
--- a/clang/lib/Sema/JumpDiagnostics.cpp
+++ b/clang/lib/Sema/JumpDiagnostics.cpp
@@ -65,8 +65,10 @@ class JumpScopeChecker {
   llvm::DenseMap<Stmt*, unsigned> LabelAndGotoScopes;
   SmallVector<Stmt*, 16> Jumps;
 
-  SmallVector<IndirectGotoStmt*, 4> IndirectJumps;
+  SmallVector<Stmt*, 4> IndirectJumps;
+  SmallVector<Stmt*, 4> AsmJumps;
   SmallVector<LabelDecl*, 4> IndirectJumpTargets;
+  SmallVector<LabelDecl*, 4> AsmJumpTargets;
 public:
   JumpScopeChecker(Stmt *Body, Sema &S);
 private:
@@ -76,10 +78,10 @@ class JumpScopeChecker {
   void BuildScopeInformation(Stmt *S, unsigned &origParentScope);
 
   void VerifyJumps();
-  void VerifyIndirectJumps();
+  void VerifyIndirectOrAsmJumps(bool IsAsmGoto);
   void NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes);
-  void DiagnoseIndirectJump(IndirectGotoStmt *IG, unsigned IGScope,
-                            LabelDecl *Target, unsigned TargetScope);
+  void DiagnoseIndirectOrAsmJump(Stmt *IG, unsigned IGScope, LabelDecl *Target,
+                                 unsigned TargetScope);
   void CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc,
                  unsigned JumpDiag, unsigned JumpDiagWarning,
                  unsigned JumpDiagCXX98Compat);
@@ -103,7 +105,8 @@ JumpScopeChecker::JumpScopeChecker(Stmt *Body, Sema &s)
 
   // Check that all jumps we saw are kosher.
   VerifyJumps();
-  VerifyIndirectJumps();
+  VerifyIndirectOrAsmJumps(false);
+  VerifyIndirectOrAsmJumps(true);
 }
 
 /// GetDeepestCommonScope - Finds the innermost scope enclosing the
@@ -316,7 +319,7 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     }
 
     LabelAndGotoScopes[S] = ParentScope;
-    IndirectJumps.push_back(cast<IndirectGotoStmt>(S));
+    IndirectJumps.push_back(S);
     break;
 
   case Stmt::SwitchStmtClass:
@@ -339,6 +342,18 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S,
     Jumps.push_back(S);
     break;
 
+  case Stmt::GCCAsmStmtClass:
+    if (auto *GS = dyn_cast<GCCAsmStmt>(S))
+      if (GS->isAsmGoto()) {
+        // Remember both what scope a goto is in as well as the fact that we
+        // have it.  This makes the second scan not have to walk the AST again.
+        LabelAndGotoScopes[S] = ParentScope;
+        AsmJumps.push_back(GS);
+        for (auto *E : GS->labels())
+          AsmJumpTargets.push_back(E->getLabel());
+      }
+    break;
+
   case Stmt::IfStmtClass: {
     IfStmt *IS = cast<IfStmt>(S);
     if (!(IS->isConstexpr() || IS->isObjCAvailabilityCheck()))
@@ -629,14 +644,13 @@ void JumpScopeChecker::VerifyJumps() {
   }
 }
 
-/// VerifyIndirectJumps - Verify whether any possible indirect jump
-/// might cross a protection boundary.  Unlike direct jumps, indirect
-/// jumps count cleanups as protection boundaries:  since there's no
-/// way to know where the jump is going, we can't implicitly run the
-/// right cleanups the way we can with direct jumps.
-///
-/// Thus, an indirect jump is "trivial" if it bypasses no
-/// initializations and no teardowns.  More formally, an indirect jump
+/// VerifyIndirectOrAsmJumps - Verify whether any possible indirect goto or
+/// asm goto jump might cross a protection boundary.  Unlike direct jumps,
+/// indirect or asm goto jumps count cleanups as protection boundaries:
+/// since there's no way to know where the jump is going, we can't implicitly
+/// run the right cleanups the way we can with direct jumps.
+/// Thus, an indirect/asm jump is "trivial" if it bypasses no
+/// initializations and no teardowns.  More formally, an indirect/asm jump
 /// from A to B is trivial if the path out from A to DCA(A,B) is
 /// trivial and the path in from DCA(A,B) to B is trivial, where
 /// DCA(A,B) is the deepest common ancestor of A and B.
@@ -648,36 +662,41 @@ void JumpScopeChecker::VerifyJumps() {
 /// Under these definitions, this function checks that the indirect
 /// jump between A and B is trivial for every indirect goto statement A
 /// and every label B whose address was taken in the function.
-void JumpScopeChecker::VerifyIndirectJumps() {
-  if (IndirectJumps.empty()) return;
-
+void JumpScopeChecker::VerifyIndirectOrAsmJumps(bool IsAsmGoto) {
+  SmallVector<Stmt*, 4> GotoJumps = IsAsmGoto ? AsmJumps : IndirectJumps;
+  if (GotoJumps.empty())
+    return;
+  SmallVector<LabelDecl *, 4> JumpTargets =
+      IsAsmGoto ? AsmJumpTargets : IndirectJumpTargets;
   // If there aren't any address-of-label expressions in this function,
   // complain about the first indirect goto.
-  if (IndirectJumpTargets.empty()) {
-    S.Diag(IndirectJumps[0]->getGotoLoc(),
+  if (JumpTargets.empty()) {
+    assert(!IsAsmGoto &&"only indirect goto can get here");
+    S.Diag(GotoJumps[0]->getBeginLoc(),
            diag::err_indirect_goto_without_addrlabel);
     return;
   }
-
   // Collect a single representative of every scope containing an
-  // indirect goto.  For most code bases, this substantially cuts
+  // indirect or asm goto.  For most code bases, this substantially cuts
   // down on the number of jump sites we'll have to consider later.
-  typedef std::pair<unsigned, IndirectGotoStmt*> JumpScope;
+  typedef std::pair<unsigned, Stmt*> JumpScope;
   SmallVector<JumpScope, 32> JumpScopes;
   {
-    llvm::DenseMap<unsigned, IndirectGotoStmt*> JumpScopesMap;
-    for (SmallVectorImpl<IndirectGotoStmt*>::iterator
-           I = IndirectJumps.begin(), E = IndirectJumps.end(); I != E; ++I) {
-      IndirectGotoStmt *IG = *I;
+    llvm::DenseMap<unsigned, Stmt*> JumpScopesMap;
+    for (SmallVectorImpl<Stmt *>::iterator I = GotoJumps.begin(),
+                                           E = GotoJumps.end();
+         I != E; ++I) {
+      Stmt *IG = *I;
       if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(IG)))
         continue;
       unsigned IGScope = LabelAndGotoScopes[IG];
-      IndirectGotoStmt *&Entry = JumpScopesMap[IGScope];
+      Stmt *&Entry = JumpScopesMap[IGScope];
       if (!Entry) Entry = IG;
     }
     JumpScopes.reserve(JumpScopesMap.size());
-    for (llvm::DenseMap<unsigned, IndirectGotoStmt*>::iterator
-           I = JumpScopesMap.begin(), E = JumpScopesMap.end(); I != E; ++I)
+    for (llvm::DenseMap<unsigned, Stmt *>::iterator I = JumpScopesMap.begin(),
+                                                    E = JumpScopesMap.end();
+         I != E; ++I)
       JumpScopes.push_back(*I);
   }
 
@@ -685,8 +704,8 @@ void JumpScopeChecker::VerifyIndirectJumps() {
   // label whose address was taken somewhere in the function.
   // For most code bases, there will be only one such scope.
   llvm::DenseMap<unsigned, LabelDecl*> TargetScopes;
-  for (SmallVectorImpl<LabelDecl*>::iterator
-         I = IndirectJumpTargets.begin(), E = IndirectJumpTargets.end();
+  for (SmallVectorImpl<LabelDecl *>::iterator I = JumpTargets.begin(),
+                                              E = JumpTargets.end();
        I != E; ++I) {
     LabelDecl *TheLabel = *I;
     if (CHECK_PERMISSIVE(!LabelAndGotoScopes.count(TheLabel->getStmt())))
@@ -763,7 +782,7 @@ void JumpScopeChecker::VerifyIndirectJumps() {
       // Only diagnose if we didn't find something.
       if (IsReachable) continue;
 
-      DiagnoseIndirectJump(I->second, I->first, TargetLabel, TargetScope);
+      DiagnoseIndirectOrAsmJump(I->second, I->first, TargetLabel, TargetScope);
     }
   }
 }
@@ -784,12 +803,15 @@ static bool IsCXX98CompatWarning(Sema &S, unsigned InDiagNote) {
 }
 
 /// Produce primary diagnostic for an indirect jump statement.
-static void DiagnoseIndirectJumpStmt(Sema &S, IndirectGotoStmt *Jump,
-                                     LabelDecl *Target, bool &Diagnosed) {
+static void DiagnoseIndirectOrAsmJumpStmt(Sema &S, Stmt *Jump,
+                                          LabelDecl *Target, bool &Diagnosed) {
   if (Diagnosed)
     return;
-  S.Diag(Jump->getGotoLoc(), diag::err_indirect_goto_in_protected_scope);
-  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
+  bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
+  S.Diag(Jump->getBeginLoc(), diag::err_indirect_goto_in_protected_scope)
+      << IsAsmGoto;
+  S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
+      << IsAsmGoto;
   Diagnosed = true;
 }
 
@@ -803,10 +825,9 @@ void JumpScopeChecker::NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes) {
 }
 
 /// Diagnose an indirect jump which is known to cross scopes.
-void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
-                                            unsigned JumpScope,
-                                            LabelDecl *Target,
-                                            unsigned TargetScope) {
+void JumpScopeChecker::DiagnoseIndirectOrAsmJump(Stmt *Jump, unsigned JumpScope,
+                                                 LabelDecl *Target,
+                                                 unsigned TargetScope) {
   if (CHECK_PERMISSIVE(JumpScope == TargetScope))
     return;
 
@@ -816,7 +837,7 @@ void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
   // Walk out the scope chain until we reach the common ancestor.
   for (unsigned I = JumpScope; I != Common; I = Scopes[I].ParentScope)
     if (Scopes[I].OutDiag) {
-      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].OutDiag);
     }
 
@@ -827,15 +848,18 @@ void JumpScopeChecker::DiagnoseIndirectJump(IndirectGotoStmt *Jump,
     if (IsCXX98CompatWarning(S, Scopes[I].InDiag))
       ToScopesCXX98Compat.push_back(I);
     else if (Scopes[I].InDiag) {
-      DiagnoseIndirectJumpStmt(S, Jump, Target, Diagnosed);
+      DiagnoseIndirectOrAsmJumpStmt(S, Jump, Target, Diagnosed);
       S.Diag(Scopes[I].Loc, Scopes[I].InDiag);
     }
 
   // Diagnose this jump if it would be ill-formed in C++98.
   if (!Diagnosed && !ToScopesCXX98Compat.empty()) {
-    S.Diag(Jump->getGotoLoc(),
-           diag::warn_cxx98_compat_indirect_goto_in_protected_scope);
-    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target);
+    bool IsAsmGoto = isa<GCCAsmStmt>(Jump);
+    S.Diag(Jump->getBeginLoc(),
+           diag::warn_cxx98_compat_indirect_goto_in_protected_scope)
+        << IsAsmGoto;
+    S.Diag(Target->getStmt()->getIdentLoc(), diag::note_indirect_goto_target)
+        << IsAsmGoto;
     NoteJumpIntoScopes(ToScopesCXX98Compat);
   }
 }
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index 8c6012573c64f..ec8958c3c5f90 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -209,11 +209,12 @@ static StringRef extractRegisterName(const Expr *Expression,
 static SourceLocation
 getClobberConflictLocation(MultiExprArg Exprs, StringLiteral **Constraints,
                            StringLiteral **Clobbers, int NumClobbers,
+                           unsigned NumLabels,
                            const TargetInfo &Target, ASTContext &Cont) {
   llvm::StringSet<> InOutVars;
   // Collect all the input and output registers from the extended asm
   // statement in order to check for conflicts with the clobber list
-  for (unsigned int i = 0; i < Exprs.size(); ++i) {
+  for (unsigned int i = 0; i < Exprs.size() - NumLabels; ++i) {
     StringRef Constraint = Constraints[i]->getString();
     StringRef InOutReg = Target.getConstraintRegister(
         Constraint, extractRegisterName(Exprs[i], Target));
@@ -241,6 +242,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                                  unsigned NumInputs, IdentifierInfo **Names,
                                  MultiExprArg constraints, MultiExprArg Exprs,
                                  Expr *asmString, MultiExprArg clobbers,
+                                 unsigned NumLabels,
                                  SourceLocation RParenLoc) {
   unsigned NumClobbers = clobbers.size();
   StringLiteral **Constraints =
@@ -269,7 +271,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -330,7 +332,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
   }
 
@@ -352,7 +354,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
 
     ExprResult ER = CheckPlaceholderExpr(Exprs[i]);
@@ -451,14 +453,15 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       return new (Context)
           GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                      NumInputs, Names, Constraints, Exprs.data(), AsmString,
-                     NumClobbers, Clobbers, RParenLoc);
+                     NumClobbers, Clobbers, NumLabels, RParenLoc);
     }
   }
 
   GCCAsmStmt *NS =
     new (Context) GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
                              NumInputs, Names, Constraints, Exprs.data(),
-                             AsmString, NumClobbers, Clobbers, RParenLoc);
+                             AsmString, NumClobbers, Clobbers, NumLabels,
+                             RParenLoc);
   // Validate the asm string, ensuring it makes sense given the operands we
   // have.
   SmallVector<GCCAsmStmt::AsmStringPiece, 8> Pieces;
@@ -476,8 +479,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
 
     // Look for the correct constraint index.
     unsigned ConstraintIdx = Piece.getOperandNo();
+    // Labels are the last in the Exprs list.
+    if (NS->isAsmGoto() && ConstraintIdx >= NS->getNumInputs())
+      continue;
     unsigned NumOperands = NS->getNumOutputs() + NS->getNumInputs();
-
     // Look for the (ConstraintIdx - NumOperands + 1)th constraint with
     // modifier '+'.
     if (ConstraintIdx >= NumOperands) {
@@ -660,10 +665,39 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   // Check for conflicts between clobber list and input or output lists
   SourceLocation ConstraintLoc =
       getClobberConflictLocation(Exprs, Constraints, Clobbers, NumClobbers,
+                                 NumLabels,
                                  Context.getTargetInfo(), Context);
   if (ConstraintLoc.isValid())
     targetDiag(ConstraintLoc, diag::error_inoutput_conflict_with_clobber);
 
+  // Check for duplicate asm operand name between input, output and label lists.
+  typedef std::pair<StringRef , Expr *> NamedOperand;
+  SmallVector<NamedOperand, 4> NamedOperandList;
+  for (unsigned i = 0, e = NumOutputs + NumInputs + NumLabels; i != e; ++i)
+    if (Names[i])
+      NamedOperandList.emplace_back(
+          std::make_pair(Names[i]->getName(), Exprs[i]));
+  // Sort NamedOperandList.
+  std::stable_sort(NamedOperandList.begin(), NamedOperandList.end(),
+              [](const NamedOperand &LHS, const NamedOperand &RHS) {
+                return LHS.first < RHS.first;
+              });
+  // Find adjacent duplicate operand.
+  SmallVector<NamedOperand, 4>::iterator Found =
+      std::adjacent_find(begin(NamedOperandList), end(NamedOperandList),
+                         [](const NamedOperand &LHS, const NamedOperand &RHS) {
+                           return LHS.first == RHS.first;
+                         });
+  if (Found != NamedOperandList.end()) {
+    Diag((Found + 1)->second->getBeginLoc(),
+         diag::error_duplicate_asm_operand_name)
+        << (Found + 1)->first;
+    Diag(Found->second->getBeginLoc(), diag::note_duplicate_asm_operand_name)
+        << Found->first;
+    return StmtError();
+  }
+  if (NS->isAsmGoto())
+    setFunctionHasBranchIntoScope();
   return NS;
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 592787a5870ce..9f5a5f6caca62 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1381,10 +1381,11 @@ class TreeTransform {
                                unsigned NumInputs, IdentifierInfo **Names,
                                MultiExprArg Constraints, MultiExprArg Exprs,
                                Expr *AsmString, MultiExprArg Clobbers,
+                               unsigned NumLabels,
                                SourceLocation RParenLoc) {
     return getSema().ActOnGCCAsmStmt(AsmLoc, IsSimple, IsVolatile, NumOutputs,
                                      NumInputs, Names, Constraints, Exprs,
-                                     AsmString, Clobbers, RParenLoc);
+                                     AsmString, Clobbers, NumLabels, RParenLoc);
   }
 
   /// Build a new MS style inline asm statement.
@@ -7059,6 +7060,16 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
     Exprs.push_back(Result.get());
   }
 
+  // Go through the Labels.
+  for (unsigned I = 0, E = S->getNumLabels(); I != E; ++I) {
+    Names.push_back(S->getLabelIdentifier(I));
+
+    ExprResult Result = getDerived().TransformExpr(S->getLabelExpr(I));
+    if (Result.isInvalid())
+      return StmtError();
+    ExprsChanged |= Result.get() != S->getLabelExpr(I);
+    Exprs.push_back(Result.get());
+  }
   if (!getDerived().AlwaysRebuild() && !ExprsChanged)
     return S;
 
@@ -7072,7 +7083,8 @@ TreeTransform<Derived>::TransformGCCAsmStmt(GCCAsmStmt *S) {
                                         S->isVolatile(), S->getNumOutputs(),
                                         S->getNumInputs(), Names.data(),
                                         Constraints, Exprs, AsmString.get(),
-                                        Clobbers, S->getRParenLoc());
+                                        Clobbers, S->getNumLabels(),
+                                        S->getRParenLoc());
 }
 
 template<typename Derived>
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 4d879b46e1a4a..52aa3d961d200 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -370,12 +370,14 @@ void ASTStmtReader::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
+  S->NumLabels = Record.readInt();
   S->setRParenLoc(ReadSourceLocation());
   S->setAsmString(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
   unsigned NumOutputs = S->getNumOutputs();
   unsigned NumInputs = S->getNumInputs();
   unsigned NumClobbers = S->getNumClobbers();
+  unsigned NumLabels = S->getNumLabels();
 
   // Outputs and inputs
   SmallVector<IdentifierInfo *, 16> Names;
@@ -392,9 +394,14 @@ void ASTStmtReader::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0; I != NumClobbers; ++I)
     Clobbers.push_back(cast_or_null<StringLiteral>(Record.readSubStmt()));
 
+  // Labels
+  for (unsigned I = 0, N = NumLabels; I != N; ++I)
+    Exprs.push_back(Record.readSubStmt());
+
   S->setOutputsAndInputsAndClobbers(Record.getContext(),
                                     Names.data(), Constraints.data(),
                                     Exprs.data(), NumOutputs, NumInputs,
+                                    NumLabels,
                                     Clobbers.data(), NumClobbers);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index b0a35cf2f5655..776aab6bf51d2 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -283,6 +283,7 @@ void ASTStmtWriter::VisitAsmStmt(AsmStmt *S) {
 
 void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   VisitAsmStmt(S);
+  Record.push_back(S->getNumLabels());
   Record.AddSourceLocation(S->getRParenLoc());
   Record.AddStmt(S->getAsmString());
 
@@ -304,6 +305,9 @@ void ASTStmtWriter::VisitGCCAsmStmt(GCCAsmStmt *S) {
   for (unsigned I = 0, N = S->getNumClobbers(); I != N; ++I)
     Record.AddStmt(S->getClobberStringLiteral(I));
 
+  // Labels
+  for (auto *E : S->labels()) Record.AddStmt(E);
+
   Code = serialization::STMT_GCCASM;
 }
 
diff --git a/clang/test/Analysis/asm-goto.cpp b/clang/test/Analysis/asm-goto.cpp
new file mode 100644
index 0000000000000..bc212f800401a
--- /dev/null
+++ b/clang/test/Analysis/asm-goto.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_analyze_cc1  -triple i386-pc-linux-gnu -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
+// RUN: %clang_analyze_cc1  -triple x86_64-pc-linux-gnu -analyzer-checker=debug.DumpCFG %s 2>&1 | FileCheck %s
+
+int foo(int cond)
+{
+label_true:
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  return 0;
+loop:
+  return 0;
+}
+
+// CHECK-LABEL: loop
+// CHECK-NEXT: 0
+// CHECK-NEXT: return
+// CHECK-NEXT: Preds (1): B3
+// CHECK-NEXT: Succs (1): B0
+
+// CHECK-LABEL: label_true
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (2): B3 B4
+// CHECK-NEXT: Succs (3): B2 B3 B1
+
+
+int bar(int cond)
+{
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::L1, L2);
+  return 0;
+L1:
+L2:
+  return 0;
+}
+
+// CHECK: [B4]
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (1): B5
+// CHECK-NEXT: Succs (3): B3 B2 B1
+
+int zoo(int n)
+{
+A5:
+A1:
+  asm goto("testl %0, %0; jne %l1;" :: "r"(n)::A1, A2, A3, A4, A5);
+A2:
+A3:
+A4:
+  return 0;
+}
+
+// CHECK-LABEL: A1
+// CHECK-NEXT: asm goto
+// CHECK-NEXT: Preds (2): B5 B4
+// CHECK-NEXT: Succs (5): B3 B4 B2 B1 B5
diff --git a/clang/test/CodeGen/asm-goto.c b/clang/test/CodeGen/asm-goto.c
new file mode 100644
index 0000000000000..97f2158d8afab
--- /dev/null
+++ b/clang/test/CodeGen/asm-goto.c
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -O0 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu-O0 -emit-llvm %s -o - | FileCheck %s
+
+int foo(int cond)
+{
+  // CHECK: callbr void asm sideeffect
+  // CHECK: to label %asm.fallthrough [label %label_true, label %loop], !srcloc
+  // CHECK: asm.fallthrough:
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  // CHECK: callbr void asm sideeffect
+  // CHECK: to label %asm.fallthrough1 [label %label_true, label %loop], !srcloc
+  // CHECK: asm.fallthrough1:
+  return 0;
+loop:
+  return 0;
+label_true:
+  return 1;
+}
diff --git a/clang/test/CodeGen/asm.c b/clang/test/CodeGen/asm.c
index 038d346e9993a..7de79639bfd72 100644
--- a/clang/test/CodeGen/asm.c
+++ b/clang/test/CodeGen/asm.c
@@ -262,3 +262,15 @@ void t31(int len) {
   // CHECK: @t31
   // CHECK: call void asm sideeffect "", "=*%rm,=*rm,0,1,~{dirflag},~{fpsr},~{flags}"
 }
+
+// CHECK: @t32
+int t32(int cond)
+{
+  asm goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+  // CHECK: callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,X,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@t32, %label_true), i8* blockaddress(@t32, %loop)) #1
+  return 0;
+loop:
+  return 0;
+label_true:
+  return 1;
+}
diff --git a/clang/test/CodeGen/inline-asm-mixed-style.c b/clang/test/CodeGen/inline-asm-mixed-style.c
index 6b830d9fa7a92..a9e111cd5ddcf 100644
--- a/clang/test/CodeGen/inline-asm-mixed-style.c
+++ b/clang/test/CodeGen/inline-asm-mixed-style.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -fsyntax-only -verify %s -DCHECK_ASM_GOTO
 // RUN: %clang_cc1 -triple i386-unknown-unknown -fasm-blocks -O0 -emit-llvm -S %s -o - | FileCheck %s
 // REQUIRES: x86-registered-target
 
@@ -20,10 +19,11 @@ void f() {
   // CHECK: movl    %ebx, %eax
   // CHECK: movl    %ecx, %edx
 
-#ifdef CHECK_ASM_GOTO
-  __asm volatile goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
+  __asm volatile goto ("movl %ecx, %edx");
+  // CHECK: movl    %ecx, %edx
 
   __asm mov eax, ebx
-  __asm goto ("movl %ecx, %edx"); // expected-error {{'asm goto' constructs are not supported yet}}
-#endif
+  __asm goto ("movl %ecx, %edx");
+  // CHECK: movl    %ebx, %eax
+  // CHECK: movl    %ecx, %edx
 }
diff --git a/clang/test/Coverage/c-language-features.inc b/clang/test/Coverage/c-language-features.inc
index 356687907d905..c0259debd83b7 100644
--- a/clang/test/Coverage/c-language-features.inc
+++ b/clang/test/Coverage/c-language-features.inc
@@ -71,7 +71,9 @@ theif:
   }
 
   asm ("nop");
-
+  int cond;
+  asm goto("" ::::label_true);
+label_true:
   return;
 }
 
diff --git a/clang/test/PCH/asm.h b/clang/test/PCH/asm.h
index a568058d58f66..5a7268eff6e6a 100644
--- a/clang/test/PCH/asm.h
+++ b/clang/test/PCH/asm.h
@@ -1,10 +1,14 @@
 // Header for the PCH test asm.c
 
 void f() {
-  int i;
+  int i,cond;
 
   asm ("foo\n" : : "a" (i + 2));
   asm ("foo\n" : [symbolic_name] "=a" (i) : "[symbolic_name]" (i));
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(cond)::label_true, loop);
+label_true:
+loop:
+  return;
 }
 
 void clobbers() {
diff --git a/clang/test/Parser/asm-goto.c b/clang/test/Parser/asm-goto.c
new file mode 100644
index 0000000000000..7f8edb1115631
--- /dev/null
+++ b/clang/test/Parser/asm-goto.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify %s
+
+#if !__has_extension(gnu_asm)
+#error Extension 'gnu_asm' should be available by default
+#endif
+
+
+int a, b, c, d, e, f, g, h, i, j, k, l;
+
+void
+fgoto1 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
+               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
+               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
+            ::lab1,lab2);
+lab1: return;
+lab2: return;
+}
+
+void
+fgoto2 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
+               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
+               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
+            :: lab);
+  lab: return;
+}
+
+int zoo ()
+{
+  int x,cond,*e;
+  // expected-error@+1 {{expected ')'}}
+  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
+  // expected-error@+1 {{'asm goto' cannot have output constraints}}
+  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
+  // expected-error@+1 {{expected identifie}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" : );
+  // expected-error@+1 {{expected ':'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" );
+  // expected-error@+1 {{use of undeclared label 'x'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
+  // expected-error@+1 {{use of undeclared label 'b'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
+  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
+  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
+a:
+label_true:
+loop:
+  return 0;
+}
diff --git a/clang/test/Parser/asm-goto.cpp b/clang/test/Parser/asm-goto.cpp
new file mode 100644
index 0000000000000..f09466ca488d4
--- /dev/null
+++ b/clang/test/Parser/asm-goto.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -std=c++11 %s
+
+int zoo ()
+{
+  int x,cond,*e;
+  // expected-error@+1 {{expected ')'}}
+  asm ("mov %[e], %[e]" : : [e] "rm" (*e)::a)
+  // expected-error@+1  {{'asm goto' cannot have output constraints}}
+  asm goto ("decl %0; jnz %l[a]" :"=r"(x): "m"(x) : "memory" : a);
+  // expected-error@+1 {{expected identifie}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" : );
+  // expected-error@+1  {{expected ':'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" );
+  // expected-error@+1 {{use of undeclared label 'x'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :x);
+  // expected-error@+1 {{use of undeclared label 'b'}}
+  asm goto ("decl %0;" :: "m"(x) : "memory" :b);
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm goto ("testl %0, %0; jne %l3;" :: "r"(cond)::label_true, loop);
+  // expected-error@+1 {{unknown symbolic operand name in inline assembly string}}
+  asm goto ("decl %0; jnz %l[b]" :: "m"(x) : "memory" : a);
+label_true:
+loop:
+a:
+  return 0;
+}
+
+
+int a, b, c, d, e, f, g, h, i, j, k, l;
+
+void
+fgoto1 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r" (a), [b] "r" (b), [c] "r" (c), [d] "r" (d),
+               [e] "r" (e), [f] "r" (f), [g] "r" (g), [h] "r" (h),
+               [i] "r" (i), [j] "r" (j), [k] "r" (k), [l] "r" (l)
+            ::lab1,lab2);
+lab1: return;
+lab2: return;
+}
+
+void
+fgoto2 (void)
+{
+  __asm__ volatile goto (""
+            :: [a] "r,m" (a), [b] "r,m" (b), [c] "r,m" (c), [d] "r,m" (d),
+               [e] "r,m" (e), [f] "r,m" (f), [g] "r,m" (g), [h] "r,m" (h),
+               [i] "r,m" (i), [j] "r,m" (j), [k] "r,m" (k), [l] "r,m" (l)
+            :: lab);
+  lab: return;
+}
diff --git a/clang/test/Sema/asm-goto.cpp b/clang/test/Sema/asm-goto.cpp
new file mode 100644
index 0000000000000..d85730974359c
--- /dev/null
+++ b/clang/test/Sema/asm-goto.cpp
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 %s -triple i386-pc-linux-gnu -verify -fsyntax-only
+// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -verify -fsyntax-only
+
+struct NonTrivial {
+  ~NonTrivial();
+  int f(int);
+private:
+  int k;
+};
+void JumpDiagnostics(int n) {
+// expected-error@+1 {{cannot jump from this goto statement to its label}}
+  goto DirectJump;
+// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
+  NonTrivial tnp1;
+
+DirectJump:
+// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm goto("jmp %l0;" ::::Later);
+// expected-note@+1 {{jump bypasses variable with a non-trivial destructor}}
+  NonTrivial tnp2;
+// expected-note@+1 {{possible target of asm goto statement}}
+Later:
+  return;
+}
+
+struct S { ~S(); };
+void foo(int a) {
+  if (a) {
+FOO:
+// expected-note@+2 {{jump exits scope of variable with non-trivial destructor}}
+// expected-note@+1 {{jump exits scope of variable with non-trivial destructor}}
+    S s;
+    void *p = &&BAR;
+// expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm goto("jmp %l0;" ::::BAR);
+// expected-error@+1 {{cannot jump from this indirect goto statement to one of its possible targets}}
+    goto *p;
+    p = &&FOO;
+    goto *p;
+    return;
+  }
+// expected-note@+2 {{possible target of asm goto statement}}
+// expected-note@+1 {{possible target of indirect goto statement}}
+BAR:
+  return;
+}
+
+
+//Asm goto:
+int test16(int n)
+{
+  // expected-error@+2 {{cannot jump from this asm goto statement to one of its possible targets}}
+  // expected-error@+1 {{cannot jump from this asm goto statement to one of its possible targets}}
+  asm volatile goto("testl %0, %0; jne %l1;" :: "r"(n)::label_true, loop);
+  // expected-note@+2 {{jump bypasses initialization of variable length array}}
+  // expected-note@+1 {{possible target of asm goto statement}}
+  return ({int a[n];label_true: 2;});
+  // expected-note@+1 {{jump bypasses initialization of variable length array}}
+  int b[n];
+// expected-note@+1 {{possible target of asm goto statement}}
+loop:
+  return 0;
+}
diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c
index 67da197426cd5..29a55c610de49 100644
--- a/clang/test/Sema/asm.c
+++ b/clang/test/Sema/asm.c
@@ -295,3 +295,24 @@ int test17(int t0)
   return r0 + r1;
 }
 
+void test18()
+{
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm goto ("" : : : : lab, lab, lab2, lab);
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm goto ("xorw %[lab], %[lab]; je %l[lab]" : : [lab] "i" (0) : : lab);
+lab:;
+lab2:;
+  int x,x1;
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm ("" : [lab] "=r" (x),[lab] "+r" (x) : [lab1] "r" (x));
+  // expected-error@+2 {{duplicate use of asm operand name "lab"}}
+  // expected-note@+1 {{asm operand name "lab" first referenced here}}
+  asm ("" : [lab] "=r" (x1) : [lab] "r" (x));
+  // expected-error@+1 {{invalid operand number in inline asm string}}
+  asm ("jne %l0":::);
+  asm goto ("jne %l0"::::lab);
+}
diff --git a/clang/test/Sema/inline-asm-validate-tmpl.cpp b/clang/test/Sema/inline-asm-validate-tmpl.cpp
index cf7eac3d83d43..9e234caa9c8df 100644
--- a/clang/test/Sema/inline-asm-validate-tmpl.cpp
+++ b/clang/test/Sema/inline-asm-validate-tmpl.cpp
@@ -23,3 +23,13 @@ template <int N> void	testc(int value)
 	asm("rol %1, %0" :"=r"(value): "I"(N + 1));
 }
 int	foo() { testc<2>(10); }
+
+// these should compile without error
+template <int N> bool testd()
+{
+  __asm goto ("" : : : : lab);
+  return true;
+lab:
+  return false;
+}
+bool foox() { return testd<0> (); }

From 0912b06f780a9ac40386a7fb0fb12634619bf323 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Mon, 3 Jun 2019 16:17:14 +0000
Subject: [PATCH 0929/1176] [LoopPred] Convert member function to free helper
 function [NFC]

llvm-svn: 362411
---
 .../lib/Transforms/Scalar/LoopPredication.cpp | 90 ++++++++++---------
 1 file changed, 47 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 62a79426e25e1..2df69acc7bff5 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -300,19 +300,6 @@ class LoopPredication {
   // within the loop. We identify such unprofitable loops through BPI.
   bool isLoopProfitableToPredicate();
 
-  // When the IV type is wider than the range operand type, we can still do loop
-  // predication, by generating SCEVs for the range and latch that are of the
-  // same type. We achieve this by generating a SCEV truncate expression for the
-  // latch IV. This is done iff truncation of the IV is a safe operation,
-  // without loss of information.
-  // Another way to achieve this is by generating a wider type SCEV for the
-  // range check operand, however, this needs a more involved check that
-  // operands do not overflow. This can lead to loss of information when the
-  // range operand is of the form: add i32 %offset, %iv. We need to prove that
-  // sext(x + y) is same as sext(x) + sext(y).
-  // This function returns true if we can safely represent the IV type in
-  // the RangeCheckType without loss of information.
-  bool isSafeToTruncateWideIVType(Type *RangeCheckType);
   // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
   // so.
   Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
@@ -425,6 +412,52 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
   return Builder.CreateICmp(Pred, LHSV, RHSV);
 }
 
+
+// Returns true if its safe to truncate the IV to RangeCheckType.
+// When the IV type is wider than the range operand type, we can still do loop
+// predication, by generating SCEVs for the range and latch that are of the
+// same type. We achieve this by generating a SCEV truncate expression for the
+// latch IV. This is done iff truncation of the IV is a safe operation,
+// without loss of information.
+// Another way to achieve this is by generating a wider type SCEV for the
+// range check operand, however, this needs a more involved check that
+// operands do not overflow. This can lead to loss of information when the
+// range operand is of the form: add i32 %offset, %iv. We need to prove that
+// sext(x + y) is same as sext(x) + sext(y).
+// This function returns true if we can safely represent the IV type in
+// the RangeCheckType without loss of information.
+bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE,
+                                const LoopICmp LatchCheck,
+                                Type *RangeCheckType) {
+  if (!EnableIVTruncation)
+    return false;
+  assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
+             DL.getTypeSizeInBits(RangeCheckType) &&
+         "Expected latch check IV type to be larger than range check operand "
+         "type!");
+  // The start and end values of the IV should be known. This is to guarantee
+  // that truncating the wide type will not lose information.
+  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
+  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
+  if (!Limit || !Start)
+    return false;
+  // This check makes sure that the IV does not change sign during loop
+  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
+  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
+  // IV wraps around, and the truncation of the IV would lose the range of
+  // iterations between 2^32 and 2^64.
+  bool Increasing;
+  if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+    return false;
+  // The active bits should be less than the bits in the RangeCheckType. This
+  // guarantees that truncating the latch check to RangeCheckType is a safe
+  // operation.
+  auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
+  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
+         Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
+}
+
+
 Optional<LoopICmp>
 LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
 
@@ -434,7 +467,7 @@ LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
   // For now, bail out if latch type is narrower than range type.
   if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
     return None;
-  if (!isSafeToTruncateWideIVType(RangeCheckType))
+  if (!isSafeToTruncateWideIVType(*DL, *SE, LatchCheck, RangeCheckType))
     return None;
   // We can now safely identify the truncated version of the IV and limit for
   // RangeCheckType.
@@ -873,35 +906,6 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
   return Result;
 }
 
-// Returns true if its safe to truncate the IV to RangeCheckType.
-bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
-  if (!EnableIVTruncation)
-    return false;
-  assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
-             DL->getTypeSizeInBits(RangeCheckType) &&
-         "Expected latch check IV type to be larger than range check operand "
-         "type!");
-  // The start and end values of the IV should be known. This is to guarantee
-  // that truncating the wide type will not lose information.
-  auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
-  auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
-  if (!Limit || !Start)
-    return false;
-  // This check makes sure that the IV does not change sign during loop
-  // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
-  // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
-  // IV wraps around, and the truncation of the IV would lose the range of
-  // iterations between 2^32 and 2^64.
-  bool Increasing;
-  if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
-    return false;
-  // The active bits should be less than the bits in the RangeCheckType. This
-  // guarantees that truncating the latch check to RangeCheckType is a safe
-  // operation.
-  auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
-  return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
-         Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
-}
 
 bool LoopPredication::isLoopProfitableToPredicate() {
   if (SkipProfitabilityChecks || !BPI)

From 009d08f313c46915930ca617946033a487933b73 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Mon, 3 Jun 2019 16:20:59 +0000
Subject: [PATCH 0930/1176] [PowerPC] Set PROT_READ flag for MF_EXEC to prevent
 segfaults on PPC machines

The big endian PPC buildbots are all failing now due to calls to cache
invalidation in unit tests on data that has only the PROT_EXEC flag set.
This has been an issue all along on FreeBSD but it can affect Linux machines
depending on configuration.

This patch mitigates the issue the same way it is mitigated on FreeBSD.

Since this is needed to bring the buildbots back to green, I plan to commit this
and allow for post-commit review, but I thought I would also post it here for
ease of access/readability.

Differential revision: https://reviews.llvm.org/D62741

llvm-svn: 362412
---
 llvm/lib/Support/Unix/Memory.inc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc
index 3b0033629597a..a0927da50e487 100644
--- a/llvm/lib/Support/Unix/Memory.inc
+++ b/llvm/lib/Support/Unix/Memory.inc
@@ -58,14 +58,13 @@ int getPosixProtectionFlags(unsigned Flags) {
       llvm::sys::Memory::MF_EXEC:
     return PROT_READ | PROT_WRITE | PROT_EXEC;
   case llvm::sys::Memory::MF_EXEC:
-#if defined(__FreeBSD__)
+#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC))
     // On PowerPC, having an executable page that has no read permission
     // can have unintended consequences.  The function InvalidateInstruction-
     // Cache uses instructions dcbf and icbi, both of which are treated by
     // the processor as loads.  If the page has no read permissions,
     // executing these instructions will result in a segmentation fault.
-    // Somehow, this problem is not present on Linux, but it does happen
-    // on FreeBSD.
     return PROT_READ | PROT_EXEC;
 #else
     return PROT_EXEC;

From 857de979a7e43f26973351931b93a3879a09ec5a Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 16:21:37 +0000
Subject: [PATCH 0931/1176] Revert "[llvm-ar] Fix relative thin archive path
 handling"

This reverts commit r362407.  It broke compilation of
llvm/lib/Object/ArchiveWriter.cpp:

error: type 'llvm::sys::path::const_iterator' does not provide a call
operator

llvm-svn: 362413
---
 llvm/include/llvm/Object/ArchiveWriter.h      |  2 +-
 llvm/lib/Object/ArchiveWriter.cpp             | 51 +++++++------------
 llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp   | 11 ++--
 llvm/test/tools/llvm-ar/reduce-thin-path.test | 10 ----
 llvm/test/tools/llvm-ar/thin-archive.test     | 45 ----------------
 .../ELF/archive-unknown-members.test          |  8 +--
 .../llvm-readobj/thin-archive-paths.test      |  6 +--
 llvm/tools/llvm-ar/llvm-ar.cpp                | 38 ++++----------
 8 files changed, 37 insertions(+), 134 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-ar/reduce-thin-path.test
 delete mode 100644 llvm/test/tools/llvm-ar/thin-archive.test

diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index 9e6daf2da36e9..cf415e92bc79b 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -36,7 +36,7 @@ struct NewArchiveMember {
                                             bool Deterministic);
 };
 
-Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
+std::string computeArchiveRelativePath(StringRef From, StringRef To);
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 68c40054bb98d..849d2835772e2 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -494,46 +494,29 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 }
 
 namespace llvm {
-
-static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
-  SmallString<128> Ret = P;
-  std::error_code Err = sys::fs::make_absolute(Ret);
-  if (Err)
-    return Err;
-  sys::path::remove_dots(Ret, /*removedotdot*/ true);
-  return Ret;
-}
-
 // Compute the relative path from From to To.
-Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
-  ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
-  ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
-  if (!PathToOrErr || !DirFromOrErr)
-    return errorCodeToError(std::error_code(errno, std::generic_category()));
-
-  const SmallString<128> &PathTo = *PathToOrErr;
-  const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
-
-  // Can't construct a relative path between different roots
-  if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
-    return sys::path::convert_to_slash(PathTo);
-
-  // Skip common prefixes
-  auto FromTo =
-      std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
-                    sys::path::begin(PathTo), sys::path::end(PathTo));
-  auto FromI = FromTo.first;
-  auto ToI = FromTo.second;
+std::string computeArchiveRelativePath(StringRef From, StringRef To) {
+  if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
+    return To;
+
+  StringRef DirFrom = sys::path::parent_path(From);
+  auto FromI = sys::path::begin(DirFrom);
+  auto ToI = sys::path::begin(To);
+  while (*FromI == *ToI) {
+    ++FromI;
+    ++ToI;
+  }
 
-  // Construct relative path
   SmallString<128> Relative;
   for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
-    sys::path::append(Relative, sys::path::Style::posix, "..");
+    sys::path::append(Relative, "..");
 
-  for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
-    sys::path::append(Relative, sys::path::Style::posix, *ToI);
+  for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
+    sys::path::append(Relative, *ToI);
 
-  return Relative.str();
+  // Replace backslashes with slashes so that the path is portable between *nix
+  // and Windows.
+  return sys::path::convert_to_slash(Relative);
 }
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 2d44686dd280f..34a83147a3a63 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -211,14 +211,9 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   // llvm-lib uses relative paths for both regular and thin archives, unlike
   // standard GNU ar, which only uses relative paths for thin archives and
   // basenames for regular archives.
-  for (NewArchiveMember &Member : Members) {
-    if (sys::path::is_relative(Member.MemberName)) {
-      Expected<std::string> PathOrErr =
-          computeArchiveRelativePath(OutputPath, Member.MemberName);
-      if (PathOrErr)
-        Member.MemberName = Saver.save(*PathOrErr);
-    }
-  }
+  for (NewArchiveMember &Member : Members)
+    Member.MemberName =
+        Saver.save(computeArchiveRelativePath(OutputPath, Member.MemberName));
 
   if (Error E =
           writeArchive(OutputPath, Members,
diff --git a/llvm/test/tools/llvm-ar/reduce-thin-path.test b/llvm/test/tools/llvm-ar/reduce-thin-path.test
deleted file mode 100644
index aea6101ce9bc6..0000000000000
--- a/llvm/test/tools/llvm-ar/reduce-thin-path.test
+++ /dev/null
@@ -1,10 +0,0 @@
-RUN: rm -rf %t && mkdir -p %t/foo/bar/
-RUN: mkdir -p %t/baz/
-RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
-
-RUN: cd %t && llvm-ar rTc %t/baz/internal.ar elf.o
-RUN: cd %t/foo && llvm-ar rTc %t/foo/bar/external.ar ../baz/internal.ar
-
-RUN: FileCheck -input-file=%t/foo/bar/external.ar %s
-
-CHECK: {{^}}../../elf.o/
diff --git a/llvm/test/tools/llvm-ar/thin-archive.test b/llvm/test/tools/llvm-ar/thin-archive.test
deleted file mode 100644
index 8d9543b686968..0000000000000
--- a/llvm/test/tools/llvm-ar/thin-archive.test
+++ /dev/null
@@ -1,45 +0,0 @@
-RUN: rm -rf %t && mkdir -p %t/foo/bar/
-
-RUN: yaml2obj %S/Inputs/elf.yaml -o %t/foo/elf.o
-RUN: cp %t/foo/elf.o %t/foo/bar/elf.o
-RUN: cp %t/foo/bar/elf.o %t/delete.o
-
-Test that modules can be added with absolute paths when the archive is created using an absolute path
-
-RUN: llvm-ar rTc %t/absolute-1.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
-RUN: llvm-ar dT %t/absolute-1.ar delete.o
-
-RUN: FileCheck -input-file=%t/absolute-1.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
-RUN: llvm-ar t %t/absolute-1.ar | FileCheck %s -DPATH=%/t/
-
-Test that modules can be added with absolute paths when the archive is created using a relative path
-
-RUN: llvm-ar rTc Output/%basename_t.tmp/absolute-2.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
-RUN: llvm-ar dT Output/%basename_t.tmp/absolute-2.ar %t/delete.o
-
-RUN: FileCheck -input-file=%t/absolute-2.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
-RUN: llvm-ar t %t/absolute-2.ar | FileCheck %s -DPATH=%/t/
-
-These tests must be run in %t/foo. cd %t is included on each line to make debugging this test case easier.
-
-Test that modules can be added with relative paths when the archive is created using a relative path
-
-RUN: cd %t/foo && llvm-ar rTc ../relative-1.ar elf.o ../delete.o bar/elf.o
-RUN: cd %t/foo && llvm-ar dT ../relative-1.ar delete.o
-
-RUN: FileCheck -input-file=%t/relative-1.ar --check-prefixes=THIN,CHECK %s -DPATH=
-RUN: llvm-ar t %t/relative-1.ar | FileCheck %s -DPATH=%/t/
-
-Test that modules can be added with relative paths when the archive is created using a absolute path
-
-RUN: cd %t/foo && llvm-ar rTc %t/relative-2.ar elf.o ../delete.o bar/elf.o
-RUN: cd %t/foo && llvm-ar dT %t/relative-2.ar delete.o
-
-RUN: FileCheck -input-file=%t/relative-2.ar --check-prefixes=THIN,CHECK %s -DPATH=
-RUN: llvm-ar t %t/relative-2.ar | FileCheck %s -DPATH=%/t/
-
-THIN: !<thin>
-
-CHECK-NOT: delete.o
-CHECK: {{^}}[[PATH]]foo/elf.o
-CHECK: {{^}}[[PATH]]foo/bar/elf.o
diff --git a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
index 39a6597a83bfb..6540b630f7dac 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
@@ -23,10 +23,10 @@
 # RUN: llvm-ar rcT %t.thin1.a %t1.o %s
 # RUN: llvm-ar rcT %t.thin2.a %t2.o %s
 
-# RUN: not llvm-objcopy --strip-debug %/t.thin1.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin1.a -DMEMBER=%/s
-# RUN: not llvm-strip --strip-debug %/t.thin2.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin2.a -DMEMBER=%/s
+# RUN: not llvm-objcopy --strip-debug %t.thin1.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin1.a -DMEMBER=%s
+# RUN: not llvm-strip --strip-debug %t.thin2.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin2.a -DMEMBER=%s
 ## Verify that the first member was not modified, if a later member could not
 ## be recognized.
 # RUN: cmp %t.o %t1.o
diff --git a/llvm/test/tools/llvm-readobj/thin-archive-paths.test b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
index d7a971eb303d8..f1952c739ccff 100644
--- a/llvm/test/tools/llvm-readobj/thin-archive-paths.test
+++ b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
@@ -23,11 +23,11 @@
 # RUN: llvm-ar rcT c/absolute.a %t/a/b/1.o
 
 # Show that absolute paths in the file header printing are correct.
-# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%/t
+# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%t
 # ABS: File: [[DIR]]/a/b/1.o
 
 # Show that absolute paths in an error message for both archive and member are correct.
 # RUN: rm a/b/1.o
-# RUN: not llvm-readobj --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
-# RUN: not llvm-readelf --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
+# RUN: not llvm-readobj --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
+# RUN: not llvm-readelf --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
 # ERR2: error: '[[DIR]]/c/absolute.a': '[[DIR]]/a/b/1.o': {{[Nn]}}o such file or directory
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 0731f35ac458b..04c2396a4fa32 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -464,11 +464,9 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
   }
 
   if (C.getParent()->isThin()) {
-    if (!sys::path::is_absolute(Name)) {
-      StringRef ParentDir = sys::path::parent_path(ArchiveName);
-      if (!ParentDir.empty())
-        outs() << sys::path::convert_to_slash(ParentDir) << '/';
-    }
+    StringRef ParentDir = sys::path::parent_path(ArchiveName);
+    if (!ParentDir.empty())
+      outs() << ParentDir << '/';
   }
   outs() << Name << "\n";
 }
@@ -595,18 +593,10 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
   // the archive it's in, so the file resolves correctly.
   if (Thin && FlattenArchive) {
     StringSaver Saver(Alloc);
-    Expected<std::string> FileNameOrErr = M.getName();
+    Expected<std::string> FileNameOrErr = M.getFullName();
     failIfError(FileNameOrErr.takeError());
-    if (sys::path::is_absolute(*FileNameOrErr)) {
-      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(*FileNameOrErr));
-    } else {
-      FileNameOrErr = M.getFullName();
-      failIfError(FileNameOrErr.takeError());
-      Expected<std::string> PathOrErr =
-          computeArchiveRelativePath(ArchiveName, *FileNameOrErr);
-      NMOrErr->MemberName = Saver.save(
-          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(*FileNameOrErr));
-    }
+    NMOrErr->MemberName =
+        Saver.save(computeArchiveRelativePath(ArchiveName, *FileNameOrErr));
   }
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
@@ -635,19 +625,9 @@ static void addMember(std::vector<NewArchiveMember> &Members,
   // For regular archives, use the basename of the object path for the member
   // name. For thin archives, use the full relative paths so the file resolves
   // correctly.
-  if (!Thin) {
-    NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
-  } else {
-    if (sys::path::is_absolute(FileName))
-      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(FileName));
-    else {
-      Expected<std::string> PathOrErr =
-          computeArchiveRelativePath(ArchiveName, FileName);
-      NMOrErr->MemberName = Saver.save(
-          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName));
-    }
-  }
-
+  NMOrErr->MemberName =
+      Thin ? Saver.save(computeArchiveRelativePath(ArchiveName, FileName))
+           : sys::path::filename(NMOrErr->MemberName);
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
     object::Archive &Lib = readLibrary(FileName);

From 985f2f48bd5c20bc0ceb729502fe5e23eb711630 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 3 Jun 2019 16:21:58 +0000
Subject: [PATCH 0932/1176] [WebAssembly] Remove fptosi(undef) and
 fptoui(undef) from reduced test case.

Pre-commit for D62811 - which adds DAG fpto[us]i(undef) --> undef constant fold

llvm-svn: 362414
---
 llvm/test/CodeGen/WebAssembly/target-features.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/WebAssembly/target-features.ll b/llvm/test/CodeGen/WebAssembly/target-features.ll
index 6d9c392938221..8c05ca3b12372 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features.ll
@@ -9,16 +9,16 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-define void @foo(i32* %p1) #0 {
+define void @foo(i32* %p1, float %f2) #0 {
   %a = atomicrmw min i32* undef, i32 42 seq_cst
-  %v = fptoui float undef to i32
+  %v = fptoui float %f2 to i32
   store i32 %v, i32* %p1
   ret void
 }
 
-define void @bar(i32* %p1) #1 {
+define void @bar(i32* %p1, float %f2) #1 {
   %a = atomicrmw min i32* undef, i32 42 seq_cst
-  %v = fptoui float undef to i32
+  %v = fptoui float %f2 to i32
   store i32 %v, i32* %p1
   ret void
 }

From 9ed1673703c59dad989f5393dc56d94097c6bbae Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Mon, 3 Jun 2019 16:23:20 +0000
Subject: [PATCH 0933/1176] [LoopPred] Convert a second member function to a
 static helper [NFC]

(And remember to actually mark the first one static.)

llvm-svn: 362415
---
 .../lib/Transforms/Scalar/LoopPredication.cpp | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 2df69acc7bff5..017bf21d233c8 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -300,10 +300,6 @@ class LoopPredication {
   // within the loop. We identify such unprofitable loops through BPI.
   bool isLoopProfitableToPredicate();
 
-  // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
-  // so.
-  Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
-
 public:
   LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
                   BranchProbabilityInfo *BPI)
@@ -426,9 +422,10 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
 // sext(x + y) is same as sext(x) + sext(y).
 // This function returns true if we can safely represent the IV type in
 // the RangeCheckType without loss of information.
-bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE,
-                                const LoopICmp LatchCheck,
-                                Type *RangeCheckType) {
+static bool isSafeToTruncateWideIVType(const DataLayout &DL,
+                                       ScalarEvolution &SE,
+                                       const LoopICmp LatchCheck,
+                                       Type *RangeCheckType) {
   if (!EnableIVTruncation)
     return false;
   assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
@@ -458,26 +455,30 @@ bool isSafeToTruncateWideIVType(const DataLayout &DL, ScalarEvolution &SE,
 }
 
 
-Optional<LoopICmp>
-LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
+// Return an LoopICmp describing a latch check equivlent to LatchCheck but with
+// the requested type if safe to do so.  May involve the use of a new IV.
+static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
+                                                 ScalarEvolution &SE,
+                                                 const LoopICmp LatchCheck,
+                                                 Type *RangeCheckType) {
 
   auto *LatchType = LatchCheck.IV->getType();
   if (RangeCheckType == LatchType)
     return LatchCheck;
   // For now, bail out if latch type is narrower than range type.
-  if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
+  if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
     return None;
-  if (!isSafeToTruncateWideIVType(*DL, *SE, LatchCheck, RangeCheckType))
+  if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
     return None;
   // We can now safely identify the truncated version of the IV and limit for
   // RangeCheckType.
   LoopICmp NewLatchCheck;
   NewLatchCheck.Pred = LatchCheck.Pred;
   NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
-      SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
+      SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
   if (!NewLatchCheck.IV)
     return None;
-  NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
+  NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
   LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
                     << "can be represented as range check type:"
                     << *RangeCheckType << "\n");
@@ -693,7 +694,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
     return None;
   }
   auto *Ty = RangeCheckIV->getType();
-  auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
+  auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
   if (!CurrLatchCheckOpt) {
     LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
                          "corresponding to range type: "

From 8e317e29da4a7391087889ef499b1d0f39db600c Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 16:31:37 +0000
Subject: [PATCH 0934/1176] Include what you use in LanaiRegisterInfo.cpp

llvm-svn: 362416
---
 llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index d113a82b659f9..24747d98b80ec 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -11,8 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "LanaiRegisterInfo.h"
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiFrameLowering.h"
+#include "LanaiInstrInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"

From 2fcd2bd0df5a32320716f235a023c68692fdd00c Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Mon, 3 Jun 2019 16:46:03 +0000
Subject: [PATCH 0935/1176] [Tests] Add LFTR tests for multiple exit loops

This is preparation for D62625

llvm-svn: 362417
---
 .../IndVarSimplify/lftr-multi-exit.ll         | 276 ++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
new file mode 100644
index 0000000000000..bda119133cd50
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
@@ -0,0 +1,276 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -indvars -dce -S | FileCheck %
+; This is a collection of tests specifically for LFTR of multiple exit loops.
+; The actual LFTR performed is trivial so as to focus on the loop structure
+; aspects.
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+@A = external global i32
+
+define void @analyzeable_early_exit(i32 %n) {
+; CHECK-LABEL: @analyzeable_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzeable_early_exit() {
+; CHECK-LABEL: @unanalyzeable_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %vol = load volatile i32, i32* @A
+  %earlycnd = icmp ne i32 %vol, 0
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+
+define void @multiple_early_exits(i32 %n, i32 %m) {
+; CHECK-LABEL: @multiple_early_exits(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
+; CHECK:       continue:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %continue, label %exit
+
+continue:
+  store volatile i32 %iv, i32* @A
+  %earlycnd2 = icmp ult i32 %iv, %m
+  br i1 %earlycnd2, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store volatile i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Note: This slightly odd form is what indvars itself produces for multiple
+; exits without a side effect between them.
+define void @compound_early_exit(i32 %n, i32 %m) {
+; CHECK-LABEL: @compound_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[EARLYCND]], [[EARLYCND2]]
+; CHECK-NEXT:    br i1 [[AND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  %earlycnd2 = icmp ult i32 %iv, %m
+  %and = and i1 %earlycnd, %earlycnd2
+  br i1 %and, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store volatile i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+
+define void @unanalyzeable_latch(i32 %n) {
+; CHECK-LABEL: @unanalyzeable_latch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[VOL]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %vol = load volatile i32, i32* @A
+  %c = icmp ult i32 %vol, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @single_exit_no_latch(i32 %n) {
+; CHECK-LABEL: @single_exit_no_latch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Multiple exits which could be LFTRed, but the latch itself is not an
+; exiting block.
+define void @no_latch_exit(i32 %n, i32 %m) {
+; CHECK-LABEL: @no_latch_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
+; CHECK:       continue:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
+; CHECK:       latch:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %continue, label %exit
+
+continue:
+  store volatile i32 %iv, i32* @A
+  %earlycnd2 = icmp ult i32 %iv, %m
+  br i1 %earlycnd2, label %latch, label %exit
+
+latch:
+  store volatile i32 %iv, i32* @A
+  %iv.next = add i32 %iv, 1
+  br label %loop
+
+exit:
+  ret void
+}

From b46934eeb8ce1ade2dc4c2a68679dd259e8813ea Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 16:58:11 +0000
Subject: [PATCH 0936/1176] Revert "[Tests] Add LFTR tests for multiple exit
 loops"

This reverts commit r362417.  There's a syntax error in the RUN line.

llvm-svn: 362418
---
 .../IndVarSimplify/lftr-multi-exit.ll         | 276 ------------------
 1 file changed, 276 deletions(-)
 delete mode 100644 llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
deleted file mode 100644
index bda119133cd50..0000000000000
--- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
+++ /dev/null
@@ -1,276 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -indvars -dce -S | FileCheck %
-; This is a collection of tests specifically for LFTR of multiple exit loops.
-; The actual LFTR performed is trivial so as to focus on the loop structure
-; aspects.
-
-; Provide legal integer types.
-target datalayout = "n8:16:32:64"
-
-@A = external global i32
-
-define void @analyzeable_early_exit(i32 %n) {
-; CHECK-LABEL: @analyzeable_early_exit(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  br i1 %earlycnd, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store i32 %iv, i32* @A
-  %c = icmp ult i32 %iv.next, 1000
-  br i1 %c, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-define void @unanalyzeable_early_exit() {
-; CHECK-LABEL: @unanalyzeable_early_exit(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0
-; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %vol = load volatile i32, i32* @A
-  %earlycnd = icmp ne i32 %vol, 0
-  br i1 %earlycnd, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store i32 %iv, i32* @A
-  %c = icmp ult i32 %iv.next, 1000
-  br i1 %c, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-
-define void @multiple_early_exits(i32 %n, i32 %m) {
-; CHECK-LABEL: @multiple_early_exits(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
-; CHECK:       continue:
-; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  br i1 %earlycnd, label %continue, label %exit
-
-continue:
-  store volatile i32 %iv, i32* @A
-  %earlycnd2 = icmp ult i32 %iv, %m
-  br i1 %earlycnd2, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store volatile i32 %iv, i32* @A
-  %c = icmp ult i32 %iv.next, 1000
-  br i1 %c, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-; Note: This slightly odd form is what indvars itself produces for multiple
-; exits without a side effect between them.
-define void @compound_early_exit(i32 %n, i32 %m) {
-; CHECK-LABEL: @compound_early_exit(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i1 [[EARLYCND]], [[EARLYCND2]]
-; CHECK-NEXT:    br i1 [[AND]], label [[LATCH]], label [[EXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
-; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  %earlycnd2 = icmp ult i32 %iv, %m
-  %and = and i1 %earlycnd, %earlycnd2
-  br i1 %and, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store volatile i32 %iv, i32* @A
-  %c = icmp ult i32 %iv.next, 1000
-  br i1 %c, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-
-define void @unanalyzeable_latch(i32 %n) {
-; CHECK-LABEL: @unanalyzeable_latch(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[VOL]], 1000
-; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  br i1 %earlycnd, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store i32 %iv, i32* @A
-  %vol = load volatile i32, i32* @A
-  %c = icmp ult i32 %vol, 1000
-  br i1 %c, label %loop, label %exit
-
-exit:
-  ret void
-}
-
-define void @single_exit_no_latch(i32 %n) {
-; CHECK-LABEL: @single_exit_no_latch(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT:    store i32 [[IV]], i32* @A
-; CHECK-NEXT:    br label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  br i1 %earlycnd, label %latch, label %exit
-
-latch:
-  %iv.next = add i32 %iv, 1
-  store i32 %iv, i32* @A
-  br label %loop
-
-exit:
-  ret void
-}
-
-; Multiple exits which could be LFTRed, but the latch itself is not an
-; exiting block.
-define void @no_latch_exit(i32 %n, i32 %m) {
-; CHECK-LABEL: @no_latch_exit(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
-; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
-; CHECK:       continue:
-; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
-; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
-; CHECK:       latch:
-; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
-; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT:    br label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
-  %earlycnd = icmp ult i32 %iv, %n
-  br i1 %earlycnd, label %continue, label %exit
-
-continue:
-  store volatile i32 %iv, i32* @A
-  %earlycnd2 = icmp ult i32 %iv, %m
-  br i1 %earlycnd2, label %latch, label %exit
-
-latch:
-  store volatile i32 %iv, i32* @A
-  %iv.next = add i32 %iv, 1
-  br label %loop
-
-exit:
-  ret void
-}

From 179154f6b99f3577b3249a7f65d4bfa3a3b392c2 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 17:01:52 +0000
Subject: [PATCH 0937/1176] Include what you use in LanaiFrameLowering.{cpp,h}

llvm-svn: 362419
---
 llvm/lib/Target/Lanai/LanaiFrameLowering.cpp | 1 -
 llvm/lib/Target/Lanai/LanaiFrameLowering.h   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index f220323295bf4..02d610eddd515 100644
--- a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -13,7 +13,6 @@
 #include "LanaiFrameLowering.h"
 
 #include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
 #include "LanaiSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.h b/llvm/lib/Target/Lanai/LanaiFrameLowering.h
index 0438661129472..5fe4535543ec0 100644
--- a/llvm/lib/Target/Lanai/LanaiFrameLowering.h
+++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
 #define LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
 
-#include "Lanai.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 
 namespace llvm {

From f4d22bd0b40be3a50f263a978789ce2004e3e481 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 17:01:57 +0000
Subject: [PATCH 0938/1176] Include what you use in LanaiISelDAGToDAG.cpp

llvm-svn: 362420
---
 llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index b3e96822184ec..aadcdc43f5601 100644
--- a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "LanaiMachineFunctionInfo.h"
 #include "LanaiRegisterInfo.h"
 #include "LanaiSubtarget.h"

From dc136847e3f8ada2117cfc626dbb6c50d6ea79e7 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 17:02:02 +0000
Subject: [PATCH 0939/1176] Include what you use in LanaiMemAluCombiner.cpp

llvm-svn: 362421
---
 llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index a69dcddfc25e1..67443b771d3d4 100644
--- a/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -22,7 +22,7 @@
 // in the same machine basic block into one machine instruction.
 //===----------------------------------------------------------------------===//
 
-#include "Lanai.h"
+#include "LanaiAluCode.h"
 #include "LanaiTargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"

From b8aeaf882e49824151965b47d8660980c0ac5241 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 17:02:07 +0000
Subject: [PATCH 0940/1176] Include what you use in LanaiAsmPrinter.cpp

llvm-svn: 362422
---
 llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index e6c459aa42873..64d963475e1a6 100644
--- a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/LanaiInstPrinter.h"
-#include "Lanai.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
 #include "LanaiInstrInfo.h"
 #include "LanaiMCInstLower.h"
 #include "LanaiTargetMachine.h"

From 26c43d0ef8e528020b9567e415572fecb3b1859e Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Mon, 3 Jun 2019 17:02:15 +0000
Subject: [PATCH 0941/1176] Include what you use in Lanai.h

Other files were not relying on these transitive includes, so I'm
submitting this change separately.

llvm-svn: 362423
---
 llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 1 +
 llvm/lib/Target/Lanai/Lanai.h                      | 7 +------
 llvm/lib/Target/Lanai/LanaiFrameLowering.cpp       | 1 +
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index f6360e7ead26a..94e530ace4a31 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -9,6 +9,7 @@
 #include "Lanai.h"
 #include "LanaiAluCode.h"
 #include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
 #include "MCTargetDesc/LanaiMCExpr.h"
 #include "TargetInfo/LanaiTargetInfo.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/llvm/lib/Target/Lanai/Lanai.h b/llvm/lib/Target/Lanai/Lanai.h
index 8e966c2c37056..2f06ea91ab03c 100644
--- a/llvm/lib/Target/Lanai/Lanai.h
+++ b/llvm/lib/Target/Lanai/Lanai.h
@@ -14,12 +14,7 @@
 #ifndef LLVM_LIB_TARGET_LANAI_LANAI_H
 #define LLVM_LIB_TARGET_LANAI_LANAI_H
 
-#include "LanaiAluCode.h"
-#include "LanaiCondCode.h"
-#include "MCTargetDesc/LanaiBaseInfo.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 class FunctionPass;
diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index 02d610eddd515..142c09c504cc1 100644
--- a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -12,6 +12,7 @@
 
 #include "LanaiFrameLowering.h"
 
+#include "LanaiAluCode.h"
 #include "LanaiInstrInfo.h"
 #include "LanaiSubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"

From aad5d518823b5be988768a629f13cbe14a6e8b5d Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Mon, 3 Jun 2019 17:08:13 +0000
Subject: [PATCH 0942/1176] builtins: correct function name for AEABI

If `COMPILER_RT_ARMHF_TARGET` is set , the definition of the AEABI runtime
function `__aeabi_fcmpun` is misspelt: `__aeabi_fcmpum` instead of
`__aeabi_fcmpun`.

Patch by Konstantin Schwarz!

llvm-svn: 362424
---
 compiler-rt/lib/builtins/arm/comparesf2.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/builtins/arm/comparesf2.S b/compiler-rt/lib/builtins/arm/comparesf2.S
index a87cadf1b98b4..24b85d2fee150 100644
--- a/compiler-rt/lib/builtins/arm/comparesf2.S
+++ b/compiler-rt/lib/builtins/arm/comparesf2.S
@@ -248,11 +248,11 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
 END_COMPILERRT_FUNCTION(__unordsf2)
 
 #if defined(COMPILER_RT_ARMHF_TARGET)
-DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpun)
 	vmov s0, r0
 	vmov s1, r1
 	b SYMBOL_NAME(__unordsf2)
-END_COMPILERRT_FUNCTION(__aeabi_fcmpum)
+END_COMPILERRT_FUNCTION(__aeabi_fcmpun)
 #else
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2)
 #endif

From 197a7c01e6bf77ad08104189592186b2f1c89283 Mon Sep 17 00:00:00 2001
From: Saleem Abdulrasool <compnerd@compnerd.org>
Date: Mon, 3 Jun 2019 17:17:09 +0000
Subject: [PATCH 0943/1176] tools: add `llvm-nm` and `llvm-objcopy` to tools

Add `nm` and `objcopy` to the default value for the tools that we install now
that they are sufficiently feature complete to replace bintuils' implementation.

Patch by Jiang Yi!

llvm-svn: 362425
---
 llvm/cmake/modules/AddLLVM.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index bd78a9f6783d7..e886c46a186a3 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -878,6 +878,8 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
     llvm-ar
     llvm-ranlib
     llvm-lib
+    llvm-nm
+    llvm-objcopy
     llvm-objdump
     llvm-rc
     llvm-profdata

From 83645d214d4d506945efa120a19e88d0ad4f0163 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Mon, 3 Jun 2019 17:41:12 +0000
Subject: [PATCH 0944/1176] [Tests] Add LFTR tests for multiple exit loops (try
 2)

(Recommit after fixing a keymash in the run line.  Sorry for breakage.)

This is preparation for D62625 <https://reviews.llvm.org/D62625>

llvm-svn: 362426
---
 .../IndVarSimplify/lftr-multi-exit.ll         | 276 ++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
new file mode 100644
index 0000000000000..037909accbc6b
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll
@@ -0,0 +1,276 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -indvars -S | FileCheck %s
+; This is a collection of tests specifically for LFTR of multiple exit loops.
+; The actual LFTR performed is trivial so as to focus on the loop structure
+; aspects.
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+@A = external global i32
+
+define void @analyzeable_early_exit(i32 %n) {
+; CHECK-LABEL: @analyzeable_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzeable_early_exit() {
+; CHECK-LABEL: @unanalyzeable_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %vol = load volatile i32, i32* @A
+  %earlycnd = icmp ne i32 %vol, 0
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+
+define void @multiple_early_exits(i32 %n, i32 %m) {
+; CHECK-LABEL: @multiple_early_exits(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
+; CHECK:       continue:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %continue, label %exit
+
+continue:
+  store volatile i32 %iv, i32* @A
+  %earlycnd2 = icmp ult i32 %iv, %m
+  br i1 %earlycnd2, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store volatile i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Note: This slightly odd form is what indvars itself produces for multiple
+; exits without a side effect between them.
+define void @compound_early_exit(i32 %n, i32 %m) {
+; CHECK-LABEL: @compound_early_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[EARLYCND]], [[EARLYCND2]]
+; CHECK-NEXT:    br i1 [[AND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  %earlycnd2 = icmp ult i32 %iv, %m
+  %and = and i1 %earlycnd, %earlycnd2
+  br i1 %and, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store volatile i32 %iv, i32* @A
+  %c = icmp ult i32 %iv.next, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+
+define void @unanalyzeable_latch(i32 %n) {
+; CHECK-LABEL: @unanalyzeable_latch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[VOL:%.*]] = load volatile i32, i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[VOL]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  %vol = load volatile i32, i32* @A
+  %c = icmp ult i32 %vol, 1000
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @single_exit_no_latch(i32 %n) {
+; CHECK-LABEL: @single_exit_no_latch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK:       latch:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    store i32 [[IV]], i32* @A
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %latch, label %exit
+
+latch:
+  %iv.next = add i32 %iv, 1
+  store i32 %iv, i32* @A
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Multiple exits which could be LFTRed, but the latch itself is not an
+; exiting block.
+define void @no_latch_exit(i32 %n, i32 %m) {
+; CHECK-LABEL: @no_latch_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp ult i32 [[IV]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]]
+; CHECK:       continue:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[EARLYCND2:%.*]] = icmp ult i32 [[IV]], [[M:%.*]]
+; CHECK-NEXT:    br i1 [[EARLYCND2]], label [[LATCH]], label [[EXIT]]
+; CHECK:       latch:
+; CHECK-NEXT:    store volatile i32 [[IV]], i32* @A
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %latch]
+  %earlycnd = icmp ult i32 %iv, %n
+  br i1 %earlycnd, label %continue, label %exit
+
+continue:
+  store volatile i32 %iv, i32* @A
+  %earlycnd2 = icmp ult i32 %iv, %m
+  br i1 %earlycnd2, label %latch, label %exit
+
+latch:
+  store volatile i32 %iv, i32* @A
+  %iv.next = add i32 %iv, 1
+  br label %loop
+
+exit:
+  ret void
+}

From 4172dbab5dd3dffa8717e090e1912fce598d1a77 Mon Sep 17 00:00:00 2001
From: Andrew Kaylor <andrew.kaylor@intel.com>
Date: Mon, 3 Jun 2019 17:54:15 +0000
Subject: [PATCH 0945/1176] Fix a crash when the default of a switch is removed

This patch fixes a problem that occurs in LowerSwitch when a switch statement has a PHI node as its condition, and the PHI node only has two incoming blocks, and one of those incoming blocks is through an unreachable default in the switch statement. When this condition occurs, LowerSwitch holds a pointer to the condition value, but removes the switch block as a predecessor of the PHI block, causing the PHI node to be replaced. LowerSwitch then tries to use its stale pointer to the original condition value, causing a crash.

Differential Revision: https://reviews.llvm.org/D62560

llvm-svn: 362427
---
 llvm/lib/Transforms/Utils/LowerSwitch.cpp     |  5 +++
 .../condition-phi-unreachable-default.ll      | 36 +++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 llvm/test/Transforms/LowerSwitch/condition-phi-unreachable-default.ll

diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 680b5d31a42c9..8062fe4990832 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -584,6 +584,11 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
         PopSucc->removePredecessor(OrigBlock);
       return;
     }
+
+    // If the condition was a PHI node with the switch block as a predecessor
+    // removing predecessors may have caused the condition to be erased.
+    // Getting the condition value again here protects against that.
+    Val = SI->getCondition();
   }
 
   // Create a new, empty default block so that the new hierarchy of
diff --git a/llvm/test/Transforms/LowerSwitch/condition-phi-unreachable-default.ll b/llvm/test/Transforms/LowerSwitch/condition-phi-unreachable-default.ll
new file mode 100644
index 0000000000000..c53602bcd27aa
--- /dev/null
+++ b/llvm/test/Transforms/LowerSwitch/condition-phi-unreachable-default.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -lowerswitch -S | FileCheck %s
+
+; This test verifies -lowerswitch does not crash when an removing an
+; unreachable default branch causes a PHI node used as the switch
+; condition to be erased.
+
+define void @f() local_unnamed_addr {
+entry:
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.epilog.outer, %for.body
+  %i = phi i32 [ undef, %for.body ], [ 0, %entry ]
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %sw.epilog
+  switch i32 %i, label %sw.epilog [
+    i32 0, label %sw.epilog.outer.backedge.loopexit
+    i32 1, label %sw.epilog.outer.backedge
+  ]
+
+sw.epilog.outer.backedge.loopexit:                ; preds = %for.body
+  br label %for.end
+
+sw.epilog.outer.backedge:                         ; preds = %for.body
+  unreachable
+
+for.end:                                          ; preds = %sw.epilog
+  ret void
+}
+
+; The phi and the switch should both be eliminated.
+; CHECK: @f()
+; CHECK: sw.epilog:
+; CHECK-NOT: phi
+; CHECK: for.body:
+; CHECK-NOT: switch

From 86463141e753a5a2f559b4e8a31eff3914822282 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 3 Jun 2019 18:13:29 +0000
Subject: [PATCH 0946/1176] gn build: Merge r362352

llvm-svn: 362428
---
 llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
index 6c2c478f7ec59..e8ebbe8185845 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn
@@ -108,6 +108,7 @@ static_library("clangd") {
     "index/MemIndex.cpp",
     "index/Merge.cpp",
     "index/Ref.cpp",
+    "index/Relation.cpp",
     "index/Serialization.cpp",
     "index/Symbol.cpp",
     "index/SymbolCollector.cpp",

From 221e604d6f92af075f22e38ca2fe71432bb1b3c1 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Mon, 3 Jun 2019 18:15:38 +0000
Subject: [PATCH 0947/1176] [PDB] Copy inlinee lines records into the PDB

Summary:
- Fixes inline call frame line table display in windbg.
- Improve llvm-pdbutil to dump extra file ids.
- Warn on unknown subsections so we don't have this kind of bug in the
  future.

Reviewers: inglorion, akhuang, aganea

Subscribers: eraman, zturner, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62701

llvm-svn: 362429
---
 lld/COFF/PDB.cpp                              | 103 +++++-
 lld/test/COFF/pdb-inlinees-extrafiles.s       | 334 ++++++++++++++++++
 lld/test/COFF/pdb-inlinees.s                  | 332 +++++++++++++++++
 lld/test/COFF/pdb-unknown-subsection.s        |  46 +++
 .../CodeView/DebugInlineeLinesSubsection.h    |   7 +-
 llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp   |   4 +
 6 files changed, 809 insertions(+), 17 deletions(-)
 create mode 100644 lld/test/COFF/pdb-inlinees-extrafiles.s
 create mode 100644 lld/test/COFF/pdb-inlinees.s
 create mode 100644 lld/test/COFF/pdb-unknown-subsection.s

diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index a99a02362776f..6ea4c345ac201 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -211,6 +211,10 @@ class DebugSHandler {
   /// PDB.
   DebugChecksumsSubsectionRef Checksums;
 
+  /// The DEBUG_S_INLINEELINES subsection. There can be only one of these per
+  /// object file.
+  DebugInlineeLinesSubsectionRef InlineeLines;
+
   /// The DEBUG_S_FRAMEDATA subsection(s).  There can be more than one of
   /// these and they need not appear in any specific order.  However, they
   /// contain string table references which need to be re-written, so we
@@ -231,6 +235,10 @@ class DebugSHandler {
       : Linker(Linker), File(File), IndexMap(IndexMap) {}
 
   void handleDebugS(lld::coff::SectionChunk &DebugS);
+
+  std::shared_ptr<DebugInlineeLinesSubsection>
+  mergeInlineeLines(DebugChecksumsSubsection *NewChecksums);
+
   void finish();
 };
 }
@@ -1004,6 +1012,11 @@ void DebugSHandler::handleDebugS(lld::coff::SectionChunk &DebugS) {
       // modification because the file checksum offsets will stay the same.
       File.ModuleDBI->addDebugSubsection(SS);
       break;
+    case DebugSubsectionKind::InlineeLines:
+      assert(!InlineeLines.valid() &&
+             "Encountered multiple inlinee lines subsections!");
+      ExitOnErr(InlineeLines.initialize(SS.getRecordData()));
+      break;
     case DebugSubsectionKind::FrameData: {
       // We need to re-write string table indices here, so save off all
       // frame data subsections until we've processed the entire list of
@@ -1018,13 +1031,77 @@ void DebugSHandler::handleDebugS(lld::coff::SectionChunk &DebugS) {
                                 SS.getRecordData());
       break;
     }
+
+    case DebugSubsectionKind::CrossScopeImports:
+    case DebugSubsectionKind::CrossScopeExports:
+      // These appear to relate to cross-module optimization, so we might use
+      // these for ThinLTO.
+      break;
+
+    case DebugSubsectionKind::ILLines:
+    case DebugSubsectionKind::FuncMDTokenMap:
+    case DebugSubsectionKind::TypeMDTokenMap:
+    case DebugSubsectionKind::MergedAssemblyInput:
+      // These appear to relate to .Net assembly info.
+      break;
+
+    case DebugSubsectionKind::CoffSymbolRVA:
+      // Unclear what this is for.
+      break;
+
     default:
-      // FIXME: Process the rest of the subsections.
+      warn("ignoring unknown debug$S subsection kind 0x" +
+           utohexstr(uint32_t(SS.kind())));
       break;
     }
   }
 }
 
+static Expected<StringRef>
+getFileName(const DebugStringTableSubsectionRef &Strings,
+            const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) {
+  auto Iter = Checksums.getArray().at(FileID);
+  if (Iter == Checksums.getArray().end())
+    return make_error<CodeViewError>(cv_error_code::no_records);
+  uint32_t Offset = Iter->FileNameOffset;
+  return Strings.getString(Offset);
+}
+
+std::shared_ptr<DebugInlineeLinesSubsection>
+DebugSHandler::mergeInlineeLines(DebugChecksumsSubsection *NewChecksums) {
+  auto NewInlineeLines = std::make_shared<DebugInlineeLinesSubsection>(
+      *NewChecksums, InlineeLines.hasExtraFiles());
+
+  for (const InlineeSourceLine &Line : InlineeLines) {
+    TypeIndex Inlinee = Line.Header->Inlinee;
+    uint32_t FileID = Line.Header->FileID;
+    uint32_t SourceLine = Line.Header->SourceLineNum;
+
+    ArrayRef<TypeIndex> TypeOrItemMap =
+        IndexMap.IsTypeServerMap ? IndexMap.IPIMap : IndexMap.TPIMap;
+    if (!remapTypeIndex(Inlinee, TypeOrItemMap)) {
+      log("ignoring inlinee line record in " + File.getName() +
+          " with bad inlinee index 0x" + utohexstr(Inlinee.getIndex()));
+      continue;
+    }
+
+    SmallString<128> Filename =
+        ExitOnErr(getFileName(CVStrTab, Checksums, FileID));
+    pdbMakeAbsolute(Filename);
+    NewInlineeLines->addInlineSite(Inlinee, Filename, SourceLine);
+
+    if (InlineeLines.hasExtraFiles()) {
+      for (uint32_t ExtraFileId : Line.ExtraFiles) {
+        Filename = ExitOnErr(getFileName(CVStrTab, Checksums, ExtraFileId));
+        pdbMakeAbsolute(Filename);
+        NewInlineeLines->addExtraFile(Filename);
+      }
+    }
+  }
+
+  return NewInlineeLines;
+}
+
 void DebugSHandler::finish() {
   pdb::DbiStreamBuilder &DbiBuilder = Linker.Builder.getDbiBuilder();
 
@@ -1063,13 +1140,17 @@ void DebugSHandler::finish() {
   // subsections.
   auto NewChecksums = make_unique<DebugChecksumsSubsection>(Linker.PDBStrTab);
   for (FileChecksumEntry &FC : Checksums) {
-    SmallString<128> FileName =
+    SmallString<128> Filename =
         ExitOnErr(CVStrTab.getString(FC.FileNameOffset));
-    pdbMakeAbsolute(FileName);
-    ExitOnErr(Linker.Builder.getDbiBuilder().addModuleSourceFile(
-        *File.ModuleDBI, FileName));
-    NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum);
+    pdbMakeAbsolute(Filename);
+    ExitOnErr(DbiBuilder.addModuleSourceFile(*File.ModuleDBI, Filename));
+    NewChecksums->addChecksum(Filename, FC.Kind, FC.Checksum);
   }
+
+  // Rewrite inlinee item indices if present.
+  if (InlineeLines.valid())
+    File.ModuleDBI->addDebugSubsection(mergeInlineeLines(NewChecksums.get()));
+
   File.ModuleDBI->addDebugSubsection(std::move(NewChecksums));
 }
 
@@ -1604,16 +1685,6 @@ void PDBLinker::commit(codeview::GUID *Guid) {
   ExitOnErr(Builder.commit(Config->PDBPath, Guid));
 }
 
-static Expected<StringRef>
-getFileName(const DebugStringTableSubsectionRef &Strings,
-            const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) {
-  auto Iter = Checksums.getArray().at(FileID);
-  if (Iter == Checksums.getArray().end())
-    return make_error<CodeViewError>(cv_error_code::no_records);
-  uint32_t Offset = Iter->FileNameOffset;
-  return Strings.getString(Offset);
-}
-
 static uint32_t getSecrelReloc() {
   switch (Config->Machine) {
   case AMD64:
diff --git a/lld/test/COFF/pdb-inlinees-extrafiles.s b/lld/test/COFF/pdb-inlinees-extrafiles.s
new file mode 100644
index 0000000000000..759663d7a4f46
--- /dev/null
+++ b/lld/test/COFF/pdb-inlinees-extrafiles.s
@@ -0,0 +1,334 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj %s -o %t.obj -triple x86_64-windows-msvc
+# RUN: lld-link -entry:main -nodefaultlib %t.obj -out:%t.exe -pdb:%t.pdb -debug
+# RUN: llvm-pdbutil dump -il %t.pdb | FileCheck %s
+
+# The assembly was hand written to model the following C code. As of this
+# writing, clang does not emit extra files for inlinees, so it had to be hand
+# written.
+
+# // t1.h:
+# ++x;
+# #include "t2.h"
+# ++x;
+
+# // t2.h:
+# ++x;
+# __debugbreak();
+# ++x;
+
+# // t.c:
+# volatile int x;
+# static __forceinline void inlinee_1(void) {
+#   ++x;
+# #include "t1.inc"
+#   ++x;
+# }
+# int main() {
+#   ++x;
+#   inlinee_1();
+#   ++x;
+#   return x;
+# }
+
+# CHECK:                             Inlinee Lines
+# CHECK:      Mod 0000 | `{{.*}}pdb-inlinees-extrafiles.s.tmp.obj`:
+# CHECK-NEXT:  Inlinee |  Line | Source File
+# CHECK-NEXT:   0x1000 |     7 | C:\src\llvm-project\build\t.c (MD5: A79D837C976E9F0463A474D74E2EE9E7)
+# CHECK-NEXT:                    C:\src\llvm-project\build\t1.h (MD5: FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)
+# CHECK-NEXT:                    C:\src\llvm-project\build\t2.h (MD5: FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)
+
+	.text
+	.def	 @feat.00;
+	.scl	3;
+	.type	0;
+	.endef
+	.globl	@feat.00
+.set @feat.00, 0
+	.intel_syntax noprefix
+	.file	"t.c"
+	.def	 main;
+	.scl	2;
+	.type	32;
+	.endef
+	.section	.text,"xr",one_only,main
+	.globl	main                    # -- Begin function main
+main:                                   # @main
+.Lfunc_begin0:
+	.cv_func_id 0
+# %bb.0:                                # %entry
+	.cv_file	1 "C:\\src\\llvm-project\\build\\t.c" "A79D837C976E9F0463A474D74E2EE9E7" 1
+	.cv_file	2 "C:\\src\\llvm-project\\build\\t1.h" "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" 1
+	.cv_file	3 "C:\\src\\llvm-project\\build\\t2.h" "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" 1
+	.cv_loc	0 1 13 0                # t.c:13:0
+	inc	dword ptr [rip + x]
+.Ltmp0:
+	.cv_inline_site_id 1 within 0 inlined_at 1 14 0
+	.cv_loc	1 1 3 0                 # t.c:3:0
+	inc	dword ptr [rip + x]
+.Ltmp1:
+	.cv_loc	1 2 1 0                 # t1.h:1:0
+	inc	dword ptr [rip + x]
+	.cv_loc	1 3 1 0                 # t2.h:1:0
+	inc	dword ptr [rip + x]
+	.cv_loc	1 3 2 0                 # t2.h:2:0
+	int3
+	.cv_loc	1 3 3 0                 # t2.h:3:0
+	inc	dword ptr [rip + x]
+	.cv_loc	1 2 3 0                 # t1.h:3:0
+	inc	dword ptr [rip + x]
+.Ltmp2:
+	.cv_loc	1 1 5 0                # t.c:5:0
+	inc	dword ptr [rip + x]
+.Ltmp3:
+	.cv_loc	0 1 15 0                # t.c:15:0
+	inc	dword ptr [rip + x]
+	.cv_loc	0 1 16 0                # t.c:16:0
+	mov	eax, dword ptr [rip + x]
+	ret
+.Ltmp4:
+.Lfunc_end0:
+                                        # -- End function
+	.comm	x,4,2                   # @x
+	.section	.debug$S,"dr"
+	.p2align	2
+	.long	4                       # Debug section magic
+	.long	241
+	.long	.Ltmp6-.Ltmp5           # Subsection size
+.Ltmp5:
+	.short	.Ltmp8-.Ltmp7           # Record length
+.Ltmp7:
+	.short	4412                    # Record kind: S_COMPILE3
+	.long	0                       # Flags and language
+	.short	208                     # CPUType
+	.short	9                       # Frontend version
+	.short	0
+	.short	0
+	.short	0
+	.short	9000                    # Backend version
+	.short	0
+	.short	0
+	.short	0
+	.asciz	"clang version 9.0.0 (git@github.com:llvm/llvm-project.git aa762a56caf3ef2b0b41c501e66d3ef32903a2d0)" # Null-terminated compiler version string
+	.p2align	2
+.Ltmp8:
+.Ltmp6:
+	.p2align	2
+	.long	246                     # Inlinee lines subsection
+	.long	.Ltmp10-.Ltmp9          # Subsection size
+.Ltmp9:
+	.long	1                       # Inlinee lines signature, extra files
+
+                                        # Inlined function inlinee_1 starts at t.c:7
+	.long	4098                    # Type index of inlined function
+	.cv_filechecksumoffset	1       # Offset into filechecksum table
+	.long	7                       # Starting line number
+	.long 2
+	.cv_filechecksumoffset	2       # Offset into filechecksum table
+	.cv_filechecksumoffset	3       # Offset into filechecksum table
+
+.Ltmp10:
+	.p2align	2
+	.section	.debug$S,"dr",associative,main
+	.p2align	2
+	.long	4                       # Debug section magic
+	.long	241                     # Symbol subsection for main
+	.long	.Ltmp12-.Ltmp11         # Subsection size
+.Ltmp11:
+	.short	.Ltmp14-.Ltmp13         # Record length
+.Ltmp13:
+	.short	4423                    # Record kind: S_GPROC32_ID
+	.long	0                       # PtrParent
+	.long	0                       # PtrEnd
+	.long	0                       # PtrNext
+	.long	.Lfunc_end0-main        # Code size
+	.long	0                       # Offset after prologue
+	.long	0                       # Offset before epilogue
+	.long	4101                    # Function type index
+	.secrel32	main            # Function section relative address
+	.secidx	main                    # Function section index
+	.byte	0                       # Flags
+	.asciz	"main"                  # Function name
+	.p2align	2
+.Ltmp14:
+	.short	.Ltmp16-.Ltmp15         # Record length
+.Ltmp15:
+	.short	4114                    # Record kind: S_FRAMEPROC
+	.long	0                       # FrameSize
+	.long	0                       # Padding
+	.long	0                       # Offset of padding
+	.long	0                       # Bytes of callee saved registers
+	.long	0                       # Exception handler offset
+	.short	0                       # Exception handler section
+	.long	0                       # Flags (defines frame register)
+	.p2align	2
+.Ltmp16:
+	.short	.Ltmp18-.Ltmp17         # Record length
+.Ltmp17:
+	.short	4429                    # Record kind: S_INLINESITE
+	.long	0                       # PtrParent
+	.long	0                       # PtrEnd
+	.long	4098                    # Inlinee type index
+	.cv_inline_linetable	1 1 7 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp18:
+	.short	2                       # Record length
+	.short	4430                    # Record kind: S_INLINESITE_END
+	.short	2                       # Record length
+	.short	4431                    # Record kind: S_PROC_ID_END
+.Ltmp12:
+	.p2align	2
+	.cv_linetable	0, main, .Lfunc_end0
+	.section	.debug$S,"dr"
+	.long	241                     # Symbol subsection for globals
+	.long	.Ltmp22-.Ltmp21         # Subsection size
+.Ltmp21:
+	.short	.Ltmp24-.Ltmp23         # Record length
+.Ltmp23:
+	.short	4365                    # Record kind: S_GDATA32
+	.long	4102                    # Type
+	.secrel32	x               # DataOffset
+	.secidx	x                       # Segment
+	.asciz	"x"                     # Name
+	.p2align	2
+.Ltmp24:
+.Ltmp22:
+	.p2align	2
+	.cv_filechecksums               # File index to string table offset subsection
+	.cv_stringtable                 # String table
+	.long	241
+	.long	.Ltmp26-.Ltmp25         # Subsection size
+.Ltmp25:
+	.short	.Ltmp28-.Ltmp27         # Record length
+.Ltmp27:
+	.short	4428                    # Record kind: S_BUILDINFO
+	.long	4105                    # LF_BUILDINFO index
+	.p2align	2
+.Ltmp28:
+.Ltmp26:
+	.p2align	2
+	.section	.debug$T,"dr"
+	.p2align	2
+	.long	4                       # Debug section magic
+	# ArgList (0x1000) {
+	#   TypeLeafKind: LF_ARGLIST (0x1201)
+	#   NumArgs: 0
+	#   Arguments [
+	#   ]
+	# }
+	.byte	0x06, 0x00, 0x01, 0x12
+	.byte	0x00, 0x00, 0x00, 0x00
+	# Procedure (0x1001) {
+	#   TypeLeafKind: LF_PROCEDURE (0x1008)
+	#   ReturnType: void (0x3)
+	#   CallingConvention: NearC (0x0)
+	#   FunctionOptions [ (0x0)
+	#   ]
+	#   NumParameters: 0
+	#   ArgListType: () (0x1000)
+	# }
+	.byte	0x0e, 0x00, 0x08, 0x10
+	.byte	0x03, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x10, 0x00, 0x00
+	# FuncId (0x1002) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: void () (0x1001)
+	#   Name: inlinee_1
+	# }
+	.byte	0x16, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x01, 0x10, 0x00, 0x00
+	.byte	0x69, 0x6e, 0x6c, 0x69
+	.byte	0x6e, 0x65, 0x65, 0x5f
+	.byte	0x31, 0x00, 0xf2, 0xf1
+	# FuncId (0x1003) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: void () (0x1001)
+	#   Name: inlinee_2
+	# }
+	.byte	0x16, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x01, 0x10, 0x00, 0x00
+	.byte	0x69, 0x6e, 0x6c, 0x69
+	.byte	0x6e, 0x65, 0x65, 0x5f
+	.byte	0x32, 0x00, 0xf2, 0xf1
+	# Procedure (0x1004) {
+	#   TypeLeafKind: LF_PROCEDURE (0x1008)
+	#   ReturnType: int (0x74)
+	#   CallingConvention: NearC (0x0)
+	#   FunctionOptions [ (0x0)
+	#   ]
+	#   NumParameters: 0
+	#   ArgListType: () (0x1000)
+	# }
+	.byte	0x0e, 0x00, 0x08, 0x10
+	.byte	0x74, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x10, 0x00, 0x00
+	# FuncId (0x1005) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: int () (0x1004)
+	#   Name: main
+	# }
+	.byte	0x12, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x04, 0x10, 0x00, 0x00
+	.byte	0x6d, 0x61, 0x69, 0x6e
+	.byte	0x00, 0xf3, 0xf2, 0xf1
+	# Modifier (0x1006) {
+	#   TypeLeafKind: LF_MODIFIER (0x1001)
+	#   ModifiedType: int (0x74)
+	#   Modifiers [ (0x2)
+	#     Volatile (0x2)
+	#   ]
+	# }
+	.byte	0x0a, 0x00, 0x01, 0x10
+	.byte	0x74, 0x00, 0x00, 0x00
+	.byte	0x02, 0x00, 0xf2, 0xf1
+	# StringId (0x1007) {
+	#   TypeLeafKind: LF_STRING_ID (0x1605)
+	#   Id: 0x0
+	#   StringData: C:\src\llvm-project\build
+	# }
+	.byte	0x22, 0x00, 0x05, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x43, 0x3a, 0x5c, 0x73
+	.byte	0x72, 0x63, 0x5c, 0x6c
+	.byte	0x6c, 0x76, 0x6d, 0x2d
+	.byte	0x70, 0x72, 0x6f, 0x6a
+	.byte	0x65, 0x63, 0x74, 0x5c
+	.byte	0x62, 0x75, 0x69, 0x6c
+	.byte	0x64, 0x00, 0xf2, 0xf1
+	# StringId (0x1008) {
+	#   TypeLeafKind: LF_STRING_ID (0x1605)
+	#   Id: 0x0
+	#   StringData: t.c
+	# }
+	.byte	0x0a, 0x00, 0x05, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x74, 0x2e, 0x63, 0x00
+	# BuildInfo (0x1009) {
+	#   TypeLeafKind: LF_BUILDINFO (0x1603)
+	#   NumArgs: 5
+	#   Arguments [
+	#     ArgType: C:\src\llvm-project\build (0x1007)
+	#     ArgType: 0x0
+	#     ArgType: t.c (0x1008)
+	#     ArgType: 0x0
+	#     ArgType: 0x0
+	#   ]
+	# }
+	.byte	0x1a, 0x00, 0x03, 0x16
+	.byte	0x05, 0x00, 0x07, 0x10
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x08, 0x10
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0xf2, 0xf1
+
+	.addrsig
+	.addrsig_sym x
diff --git a/lld/test/COFF/pdb-inlinees.s b/lld/test/COFF/pdb-inlinees.s
new file mode 100644
index 0000000000000..845a04bdec0f7
--- /dev/null
+++ b/lld/test/COFF/pdb-inlinees.s
@@ -0,0 +1,332 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj %s -o %t.obj -triple x86_64-windows-msvc
+# RUN: lld-link -entry:main -nodefaultlib %t.obj -out:%t.exe -pdb:%t.pdb -debug
+# RUN: llvm-pdbutil dump -il %t.pdb | FileCheck %s
+
+# Compiled from this C code, with modifications to test multiple file checksums:
+# volatile int x;
+# static __forceinline void inlinee_2(void) {
+#   ++x;
+#   __debugbreak();
+#   ++x;
+# }
+# static __forceinline void inlinee_1(void) {
+#   ++x;
+#   inlinee_2();
+#   ++x;
+# }
+# int main() {
+#   ++x;
+#   inlinee_1();
+#   ++x;
+#   return x;
+# }
+
+# CHECK:                             Inlinee Lines
+# CHECK:      Mod 0000 | `{{.*}}pdb-inlinees.s.tmp.obj`:
+# CHECK-NEXT:  Inlinee |  Line | Source File
+# CHECK-NEXT:   0x1000 |     7 | C:\src\llvm-project\build\t.c (MD5: A79D837C976E9F0463A474D74E2EE9E7)
+# CHECK-NEXT:   0x1001 |     2 | C:\src\llvm-project\build\file2.h (MD5: FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)
+
+	.text
+	.def	 @feat.00;
+	.scl	3;
+	.type	0;
+	.endef
+	.globl	@feat.00
+.set @feat.00, 0
+	.intel_syntax noprefix
+	.file	"t.c"
+	.def	 main;
+	.scl	2;
+	.type	32;
+	.endef
+	.section	.text,"xr",one_only,main
+	.globl	main                    # -- Begin function main
+main:                                   # @main
+.Lfunc_begin0:
+	.cv_func_id 0
+# %bb.0:                                # %entry
+	.cv_file	1 "C:\\src\\llvm-project\\build\\t.c" "A79D837C976E9F0463A474D74E2EE9E7" 1
+	.cv_file	2 "C:\\src\\llvm-project\\build\\file2.h" "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" 1
+	.cv_loc	0 1 13 0                # t.c:13:0
+	inc	dword ptr [rip + x]
+.Ltmp0:
+	.cv_inline_site_id 1 within 0 inlined_at 1 14 0
+	.cv_loc	1 1 8 0                 # t.c:8:0
+	inc	dword ptr [rip + x]
+.Ltmp1:
+	.cv_inline_site_id 2 within 1 inlined_at 1 9 0
+	.cv_loc	2 2 3 0                 # file2.h:3:0
+	inc	dword ptr [rip + x]
+	.cv_loc	2 2 4 0                 # file2.h:4:0
+	int3
+	.cv_loc	2 2 5 0                 # file2.h:5:0
+	inc	dword ptr [rip + x]
+.Ltmp2:
+	.cv_loc	1 1 10 0                # t.c:10:0
+	inc	dword ptr [rip + x]
+.Ltmp3:
+	.cv_loc	0 1 15 0                # t.c:15:0
+	inc	dword ptr [rip + x]
+	.cv_loc	0 1 16 0                # t.c:16:0
+	mov	eax, dword ptr [rip + x]
+	ret
+.Ltmp4:
+.Lfunc_end0:
+                                        # -- End function
+	.comm	x,4,2                   # @x
+	.section	.debug$S,"dr"
+	.p2align	2
+	.long	4                       # Debug section magic
+	.long	241
+	.long	.Ltmp6-.Ltmp5           # Subsection size
+.Ltmp5:
+	.short	.Ltmp8-.Ltmp7           # Record length
+.Ltmp7:
+	.short	4412                    # Record kind: S_COMPILE3
+	.long	0                       # Flags and language
+	.short	208                     # CPUType
+	.short	9                       # Frontend version
+	.short	0
+	.short	0
+	.short	0
+	.short	9000                    # Backend version
+	.short	0
+	.short	0
+	.short	0
+	.asciz	"clang version 9.0.0 (git@github.com:llvm/llvm-project.git aa762a56caf3ef2b0b41c501e66d3ef32903a2d0)" # Null-terminated compiler version string
+	.p2align	2
+.Ltmp8:
+.Ltmp6:
+	.p2align	2
+	.long	246                     # Inlinee lines subsection
+	.long	.Ltmp10-.Ltmp9          # Subsection size
+.Ltmp9:
+	.long	0                       # Inlinee lines signature
+
+                                        # Inlined function inlinee_1 starts at t.c:7
+	.long	4098                    # Type index of inlined function
+	.cv_filechecksumoffset	1       # Offset into filechecksum table
+	.long	7                       # Starting line number
+
+                                        # Inlined function inlinee_2 starts at file2.h:2
+	.long	4099                    # Type index of inlined function
+	.cv_filechecksumoffset	2       # Offset into filechecksum table
+	.long	2                       # Starting line number
+.Ltmp10:
+	.p2align	2
+	.section	.debug$S,"dr",associative,main
+	.p2align	2
+	.long	4                       # Debug section magic
+	.long	241                     # Symbol subsection for main
+	.long	.Ltmp12-.Ltmp11         # Subsection size
+.Ltmp11:
+	.short	.Ltmp14-.Ltmp13         # Record length
+.Ltmp13:
+	.short	4423                    # Record kind: S_GPROC32_ID
+	.long	0                       # PtrParent
+	.long	0                       # PtrEnd
+	.long	0                       # PtrNext
+	.long	.Lfunc_end0-main        # Code size
+	.long	0                       # Offset after prologue
+	.long	0                       # Offset before epilogue
+	.long	4101                    # Function type index
+	.secrel32	main            # Function section relative address
+	.secidx	main                    # Function section index
+	.byte	0                       # Flags
+	.asciz	"main"                  # Function name
+	.p2align	2
+.Ltmp14:
+	.short	.Ltmp16-.Ltmp15         # Record length
+.Ltmp15:
+	.short	4114                    # Record kind: S_FRAMEPROC
+	.long	0                       # FrameSize
+	.long	0                       # Padding
+	.long	0                       # Offset of padding
+	.long	0                       # Bytes of callee saved registers
+	.long	0                       # Exception handler offset
+	.short	0                       # Exception handler section
+	.long	0                       # Flags (defines frame register)
+	.p2align	2
+.Ltmp16:
+	.short	.Ltmp18-.Ltmp17         # Record length
+.Ltmp17:
+	.short	4429                    # Record kind: S_INLINESITE
+	.long	0                       # PtrParent
+	.long	0                       # PtrEnd
+	.long	4098                    # Inlinee type index
+	.cv_inline_linetable	1 1 7 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp18:
+	.short	.Ltmp20-.Ltmp19         # Record length
+.Ltmp19:
+	.short	4429                    # Record kind: S_INLINESITE
+	.long	0                       # PtrParent
+	.long	0                       # PtrEnd
+	.long	4099                    # Inlinee type index
+	.cv_inline_linetable	2 2 2 .Lfunc_begin0 .Lfunc_end0
+	.p2align	2
+.Ltmp20:
+	.short	2                       # Record length
+	.short	4430                    # Record kind: S_INLINESITE_END
+	.short	2                       # Record length
+	.short	4430                    # Record kind: S_INLINESITE_END
+	.short	2                       # Record length
+	.short	4431                    # Record kind: S_PROC_ID_END
+.Ltmp12:
+	.p2align	2
+	.cv_linetable	0, main, .Lfunc_end0
+	.section	.debug$S,"dr"
+	.long	241                     # Symbol subsection for globals
+	.long	.Ltmp22-.Ltmp21         # Subsection size
+.Ltmp21:
+	.short	.Ltmp24-.Ltmp23         # Record length
+.Ltmp23:
+	.short	4365                    # Record kind: S_GDATA32
+	.long	4102                    # Type
+	.secrel32	x               # DataOffset
+	.secidx	x                       # Segment
+	.asciz	"x"                     # Name
+	.p2align	2
+.Ltmp24:
+.Ltmp22:
+	.p2align	2
+	.cv_filechecksums               # File index to string table offset subsection
+	.cv_stringtable                 # String table
+	.long	241
+	.long	.Ltmp26-.Ltmp25         # Subsection size
+.Ltmp25:
+	.short	.Ltmp28-.Ltmp27         # Record length
+.Ltmp27:
+	.short	4428                    # Record kind: S_BUILDINFO
+	.long	4105                    # LF_BUILDINFO index
+	.p2align	2
+.Ltmp28:
+.Ltmp26:
+	.p2align	2
+	.section	.debug$T,"dr"
+	.p2align	2
+	.long	4                       # Debug section magic
+	# ArgList (0x1000) {
+	#   TypeLeafKind: LF_ARGLIST (0x1201)
+	#   NumArgs: 0
+	#   Arguments [
+	#   ]
+	# }
+	.byte	0x06, 0x00, 0x01, 0x12
+	.byte	0x00, 0x00, 0x00, 0x00
+	# Procedure (0x1001) {
+	#   TypeLeafKind: LF_PROCEDURE (0x1008)
+	#   ReturnType: void (0x3)
+	#   CallingConvention: NearC (0x0)
+	#   FunctionOptions [ (0x0)
+	#   ]
+	#   NumParameters: 0
+	#   ArgListType: () (0x1000)
+	# }
+	.byte	0x0e, 0x00, 0x08, 0x10
+	.byte	0x03, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x10, 0x00, 0x00
+	# FuncId (0x1002) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: void () (0x1001)
+	#   Name: inlinee_1
+	# }
+	.byte	0x16, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x01, 0x10, 0x00, 0x00
+	.byte	0x69, 0x6e, 0x6c, 0x69
+	.byte	0x6e, 0x65, 0x65, 0x5f
+	.byte	0x31, 0x00, 0xf2, 0xf1
+	# FuncId (0x1003) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: void () (0x1001)
+	#   Name: inlinee_2
+	# }
+	.byte	0x16, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x01, 0x10, 0x00, 0x00
+	.byte	0x69, 0x6e, 0x6c, 0x69
+	.byte	0x6e, 0x65, 0x65, 0x5f
+	.byte	0x32, 0x00, 0xf2, 0xf1
+	# Procedure (0x1004) {
+	#   TypeLeafKind: LF_PROCEDURE (0x1008)
+	#   ReturnType: int (0x74)
+	#   CallingConvention: NearC (0x0)
+	#   FunctionOptions [ (0x0)
+	#   ]
+	#   NumParameters: 0
+	#   ArgListType: () (0x1000)
+	# }
+	.byte	0x0e, 0x00, 0x08, 0x10
+	.byte	0x74, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x10, 0x00, 0x00
+	# FuncId (0x1005) {
+	#   TypeLeafKind: LF_FUNC_ID (0x1601)
+	#   ParentScope: 0x0
+	#   FunctionType: int () (0x1004)
+	#   Name: main
+	# }
+	.byte	0x12, 0x00, 0x01, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x04, 0x10, 0x00, 0x00
+	.byte	0x6d, 0x61, 0x69, 0x6e
+	.byte	0x00, 0xf3, 0xf2, 0xf1
+	# Modifier (0x1006) {
+	#   TypeLeafKind: LF_MODIFIER (0x1001)
+	#   ModifiedType: int (0x74)
+	#   Modifiers [ (0x2)
+	#     Volatile (0x2)
+	#   ]
+	# }
+	.byte	0x0a, 0x00, 0x01, 0x10
+	.byte	0x74, 0x00, 0x00, 0x00
+	.byte	0x02, 0x00, 0xf2, 0xf1
+	# StringId (0x1007) {
+	#   TypeLeafKind: LF_STRING_ID (0x1605)
+	#   Id: 0x0
+	#   StringData: C:\src\llvm-project\build
+	# }
+	.byte	0x22, 0x00, 0x05, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x43, 0x3a, 0x5c, 0x73
+	.byte	0x72, 0x63, 0x5c, 0x6c
+	.byte	0x6c, 0x76, 0x6d, 0x2d
+	.byte	0x70, 0x72, 0x6f, 0x6a
+	.byte	0x65, 0x63, 0x74, 0x5c
+	.byte	0x62, 0x75, 0x69, 0x6c
+	.byte	0x64, 0x00, 0xf2, 0xf1
+	# StringId (0x1008) {
+	#   TypeLeafKind: LF_STRING_ID (0x1605)
+	#   Id: 0x0
+	#   StringData: t.c
+	# }
+	.byte	0x0a, 0x00, 0x05, 0x16
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x74, 0x2e, 0x63, 0x00
+	# BuildInfo (0x1009) {
+	#   TypeLeafKind: LF_BUILDINFO (0x1603)
+	#   NumArgs: 5
+	#   Arguments [
+	#     ArgType: C:\src\llvm-project\build (0x1007)
+	#     ArgType: 0x0
+	#     ArgType: t.c (0x1008)
+	#     ArgType: 0x0
+	#     ArgType: 0x0
+	#   ]
+	# }
+	.byte	0x1a, 0x00, 0x03, 0x16
+	.byte	0x05, 0x00, 0x07, 0x10
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x08, 0x10
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0x00, 0x00
+	.byte	0x00, 0x00, 0xf2, 0xf1
+
+	.addrsig
+	.addrsig_sym x
diff --git a/lld/test/COFF/pdb-unknown-subsection.s b/lld/test/COFF/pdb-unknown-subsection.s
new file mode 100644
index 0000000000000..669f51e729966
--- /dev/null
+++ b/lld/test/COFF/pdb-unknown-subsection.s
@@ -0,0 +1,46 @@
+# Check that unknown symbol subsections are ignored with a warning, and a PDB
+# is produced anyway.
+
+# REQUIRES: x86
+# RUN: llvm-mc -triple=i386-pc-win32 -filetype=obj -o %t.obj %s
+# RUN: lld-link -subsystem:console -debug -nodefaultlib -entry:foo -out:%t.exe -pdb:%t.pdb %t.obj 2>&1 | FileCheck %s --check-prefix=WARNING
+# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s
+
+# WARNING: ignoring unknown debug$S subsection kind 0xFF
+
+# CHECK:                           Symbols
+# CHECK:        4 | S_COMPILE3 [size = 52]
+# CHECK:            machine = intel x86-x64, Ver = clang version SENTINEL, language = c
+
+.text
+_foo:
+ret
+
+.global _foo
+
+.section .debug$S,"dr"
+	.p2align	2
+	.long	4                       # Debug section magic
+	.long	0xF1 # Symbol subsection
+	.long	.Ltmp6-.Ltmp5           # Subsection size
+.Ltmp5:
+	.short	.Ltmp8-.Ltmp7           # Record length
+.Ltmp7:
+	.short	4412                    # Record kind: S_COMPILE3
+	.long	0                       # Flags and language
+	.short	208                     # CPUType
+	.short	9                       # Frontend version
+	.short	0
+	.short	0
+	.short	0
+	.short	9000                    # Backend version
+	.short	0
+	.short	0
+	.short	0
+	.asciz	"clang version SENTINEL" # Null-terminated compiler version string
+	.p2align	2
+.Ltmp8:
+.Ltmp6:
+	.long	0xFF # Unknown subsection kind
+	.long	4           # Subsection size
+	.long  0
diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index 1ca2bd0296959..9fd88a64873a8 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -70,6 +70,11 @@ class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef {
   }
 
   Error initialize(BinaryStreamReader Reader);
+  Error initialize(BinaryStreamRef Section) {
+    return initialize(BinaryStreamReader(Section));
+  }
+
+  bool valid() const { return Lines.valid(); }
   bool hasExtraFiles() const;
 
   Iterator begin() const { return Lines.begin(); }
@@ -77,7 +82,7 @@ class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef {
 
 private:
   InlineeLinesSignature Signature;
-  VarStreamArray<InlineeSourceLine> Lines;
+  LinesArray Lines;
 };
 
 class DebugInlineeLinesSubsection final : public DebugSubsection {
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index dab1e42c3a295..bb2bdd5379198 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -995,6 +995,10 @@ Error DumpOutputStyle::dumpInlineeLines() {
           P.formatLine("{0,+8} | {1,+5} | ", Entry.Header->Inlinee,
                        fmtle(Entry.Header->SourceLineNum));
           Strings.formatFromChecksumsOffset(P, Entry.Header->FileID, true);
+          for (const auto &ExtraFileID : Entry.ExtraFiles) {
+            P.formatLine("                   ");
+            Strings.formatFromChecksumsOffset(P, ExtraFileID, true);
+          }
         }
         P.NewLine();
       });

From c061b99c5b6234ff2442eee847491286633d9e92 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Mon, 3 Jun 2019 18:19:54 +0000
Subject: [PATCH 0948/1176] [ConstantRange] Add sdiv() support

The implementation is conceptually simple: We separate the LHS and
RHS into positive and negative components and then also compute the
positive and negative components of the result, taking into account
that e.g. only pos/pos and neg/neg will give a positive result.

However, there's one significant complication: SignedMin / -1 is UB
for sdiv, and we can't just ignore it, because the APInt result of
SignedMin would break the sign segregation. Instead we drop SignedMin
or -1 from the corresponding ranges, taking into account some edge
cases with wrapped ranges.

Because of the sign segregation, the implementation ends up being
nearly fully precise even for wrapped ranges (the remaining
imprecision is due to ranges that are both signed and unsigned
wrapping and are divided by a trivial divisor like 1). This means
that the testing cannot just check the signed envelope as we
usually do. Instead we collect all possible results in a bitvector
and construct a better sign wrapped range (than the full envelope).

Differential Revision: https://reviews.llvm.org/D61238

llvm-svn: 362430
---
 llvm/include/llvm/IR/ConstantRange.h    |  7 ++
 llvm/lib/IR/ConstantRange.cpp           | 87 +++++++++++++++++++++++++
 llvm/unittests/IR/ConstantRangeTest.cpp | 58 +++++++++++++++++
 3 files changed, 152 insertions(+)

diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 0b176747f7c0f..91f3f31abe17c 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -364,6 +364,13 @@ class LLVM_NODISCARD ConstantRange {
   /// \p Other.
   ConstantRange udiv(const ConstantRange &Other) const;
 
+  /// Return a new range representing the possible values resulting
+  /// from a signed division of a value in this range and a value in
+  /// \p Other. Division by zero and division of SignedMin by -1 are considered
+  /// undefined behavior, in line with IR, and do not contribute towards the
+  /// result.
+  ConstantRange sdiv(const ConstantRange &Other) const;
+
   /// Return a new range representing the possible values resulting
   /// from an unsigned remainder operation of a value in this range and a
   /// value in \p Other.
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 30b6a27078c2c..920fdc01a14ff 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -765,6 +765,8 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
     return multiply(Other);
   case Instruction::UDiv:
     return udiv(Other);
+  case Instruction::SDiv:
+    return sdiv(Other);
   case Instruction::URem:
     return urem(Other);
   case Instruction::SRem:
@@ -962,6 +964,91 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
   return getNonEmpty(std::move(Lower), std::move(Upper));
 }
 
+ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
+  // We split up the LHS and RHS into positive and negative components
+  // and then also compute the positive and negative components of the result
+  // separately by combining division results with the appropriate signs.
+  APInt Zero = APInt::getNullValue(getBitWidth());
+  APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+  ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
+  ConstantRange NegFilter(SignedMin, Zero);
+  ConstantRange PosL = intersectWith(PosFilter);
+  ConstantRange NegL = intersectWith(NegFilter);
+  ConstantRange PosR = RHS.intersectWith(PosFilter);
+  ConstantRange NegR = RHS.intersectWith(NegFilter);
+
+  ConstantRange PosRes = getEmpty();
+  if (!PosL.isEmptySet() && !PosR.isEmptySet())
+    // pos / pos = pos.
+    PosRes = ConstantRange(PosL.Lower.sdiv(PosR.Upper - 1),
+                           (PosL.Upper - 1).sdiv(PosR.Lower) + 1);
+
+  if (!NegL.isEmptySet() && !NegR.isEmptySet()) {
+    // neg / neg = pos.
+    //
+    // We need to deal with one tricky case here: SignedMin / -1 is UB on the
+    // IR level, so we'll want to exclude this case when calculating bounds.
+    // (For APInts the operation is well-defined and yields SignedMin.) We
+    // handle this by dropping either SignedMin from the LHS or -1 from the RHS.
+    APInt Lo = (NegL.Upper - 1).sdiv(NegR.Lower);
+    if (NegL.Lower.isMinSignedValue() && NegR.Upper.isNullValue()) {
+      // Remove -1 from the LHS. Skip if it's the only element, as this would
+      // leave us with an empty set.
+      if (!NegR.Lower.isAllOnesValue()) {
+        APInt AdjNegRUpper;
+        if (RHS.Lower.isAllOnesValue())
+          // Negative part of [-1, X] without -1 is [SignedMin, X].
+          AdjNegRUpper = RHS.Upper;
+        else
+          // [X, -1] without -1 is [X, -2].
+          AdjNegRUpper = NegR.Upper - 1;
+
+        PosRes = PosRes.unionWith(
+            ConstantRange(Lo, NegL.Lower.sdiv(AdjNegRUpper - 1) + 1));
+      }
+
+      // Remove SignedMin from the RHS. Skip if it's the only element, as this
+      // would leave us with an empty set.
+      if (NegL.Upper != SignedMin + 1) {
+        APInt AdjNegLLower;
+        if (Upper == SignedMin + 1)
+          // Negative part of [X, SignedMin] without SignedMin is [X, -1].
+          AdjNegLLower = Lower;
+        else
+          // [SignedMin, X] without SignedMin is [SignedMin + 1, X].
+          AdjNegLLower = NegL.Lower + 1;
+
+        PosRes = PosRes.unionWith(
+            ConstantRange(std::move(Lo),
+                          AdjNegLLower.sdiv(NegR.Upper - 1) + 1));
+      }
+    } else {
+      PosRes = PosRes.unionWith(
+          ConstantRange(std::move(Lo), NegL.Lower.sdiv(NegR.Upper - 1) + 1));
+    }
+  }
+
+  ConstantRange NegRes = getEmpty();
+  if (!PosL.isEmptySet() && !NegR.isEmptySet())
+    // pos / neg = neg.
+    NegRes = ConstantRange((PosL.Upper - 1).sdiv(NegR.Upper - 1),
+                           PosL.Lower.sdiv(NegR.Lower) + 1);
+
+  if (!NegL.isEmptySet() && !PosR.isEmptySet())
+    // neg / pos = neg.
+    NegRes = NegRes.unionWith(
+        ConstantRange(NegL.Lower.sdiv(PosR.Lower),
+                      (NegL.Upper - 1).sdiv(PosR.Upper - 1) + 1));
+
+  // Prefer a non-wrapping signed range here.
+  ConstantRange Res = NegRes.unionWith(PosRes, PreferredRangeType::Signed);
+
+  // Preserve the zero that we dropped when splitting the LHS by sign.
+  if (contains(Zero) && (!PosR.isEmptySet() || !NegR.isEmptySet()))
+    Res = Res.unionWith(ConstantRange(Zero));
+  return Res;
+}
+
 ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
   if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
     return getEmpty();
diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp
index eeebe2e73ae24..c0166b21039c8 100644
--- a/llvm/unittests/IR/ConstantRangeTest.cpp
+++ b/llvm/unittests/IR/ConstantRangeTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
@@ -844,6 +845,63 @@ TEST_F(ConstantRangeTest, UDiv) {
             ConstantRange(APInt(16, 0), APInt(16, 99)));
 }
 
+TEST_F(ConstantRangeTest, SDiv) {
+  unsigned Bits = 4;
+  EnumerateTwoConstantRanges(Bits, [&](const ConstantRange &CR1,
+                                       const ConstantRange &CR2) {
+    // Collect possible results in a bit vector. We store the signed value plus
+    // a bias to make it unsigned.
+    int Bias = 1 << (Bits - 1);
+    BitVector Results(1 << Bits);
+    ForeachNumInConstantRange(CR1, [&](const APInt &N1) {
+      ForeachNumInConstantRange(CR2, [&](const APInt &N2) {
+        // Division by zero is UB.
+        if (N2 == 0)
+          return;
+
+        // SignedMin / -1 is UB.
+        if (N1.isMinSignedValue() && N2.isAllOnesValue())
+          return;
+
+        APInt N = N1.sdiv(N2);
+        Results.set(N.getSExtValue() + Bias);
+      });
+    });
+
+    ConstantRange CR = CR1.sdiv(CR2);
+    if (Results.none()) {
+      EXPECT_TRUE(CR.isEmptySet());
+      return;
+    }
+
+    // If there is a non-full signed envelope, that should be the result.
+    APInt SMin(Bits, Results.find_first() - Bias);
+    APInt SMax(Bits, Results.find_last() - Bias);
+    ConstantRange Envelope = ConstantRange::getNonEmpty(SMin, SMax + 1);
+    if (!Envelope.isFullSet()) {
+      EXPECT_EQ(Envelope, CR);
+      return;
+    }
+
+    // If the signed envelope is a full set, try to find a smaller sign wrapped
+    // set that is separated in negative and positive components (or one which
+    // can also additionally contain zero).
+    int LastNeg = Results.find_last_in(0, Bias) - Bias;
+    int LastPos = Results.find_next(Bias) - Bias;
+    if (Results[Bias]) {
+      if (LastNeg == -1)
+        ++LastNeg;
+      else if (LastPos == 1)
+        --LastPos;
+    }
+
+    APInt WMax(Bits, LastNeg);
+    APInt WMin(Bits, LastPos);
+    ConstantRange Wrapped = ConstantRange::getNonEmpty(WMin, WMax + 1);
+    EXPECT_EQ(Wrapped, CR);
+  });
+}
+
 TEST_F(ConstantRangeTest, URem) {
   EXPECT_EQ(Full.urem(Empty), Empty);
   EXPECT_EQ(Empty.urem(Full), Empty);

From 479568e64582c17bac55522f770421463e346658 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Mon, 3 Jun 2019 18:24:55 +0000
Subject: [PATCH 0949/1176] Add cdb test for inline line tables

This creates an integration test for inlined call line tables, and in
particular, ones that are discontiguous. We've had issues in the past
with discontiguous inline line tables, and until r362429 LLD didn't
write the inlinees section into the PDB.

The test was reduced from https://crbug.com/965670

Reviewers: thakis

Differential Revision: https://reviews.llvm.org/D62758

llvm-svn: 362431
---
 debuginfo-tests/win_cdb/inline-line-gap.cpp | 45 +++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 debuginfo-tests/win_cdb/inline-line-gap.cpp

diff --git a/debuginfo-tests/win_cdb/inline-line-gap.cpp b/debuginfo-tests/win_cdb/inline-line-gap.cpp
new file mode 100644
index 0000000000000..5e0270d08c7fb
--- /dev/null
+++ b/debuginfo-tests/win_cdb/inline-line-gap.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cl -MD -Od %s -o %t.exe -fuse-ld=lld -Z7
+// RUN: grep DE[B]UGGER: %s | sed -e 's/.*DE[B]UGGER: //' > %t.script
+// RUN: %cdb -cf %t.script %t.exe | FileCheck %s --check-prefixes=DEBUGGER,CHECK
+//
+// RUN: %clang_cl -MD -O2 %s -o %t.exe -fuse-ld=lld -Z7
+// RUN: grep DE[B]UGGER: %s | sed -e 's/.*DE[B]UGGER: //' > %t.script
+// RUN: %cdb -cf %t.script %t.exe | FileCheck %s --check-prefixes=DEBUGGER,CHECK
+
+// This code is structured to have an early exit with an epilogue in the middle
+// of the function, which creates a gap between the beginning of the inlined
+// code region and the end. Previously, this confused cdb.
+
+volatile bool shutting_down_ = true;
+volatile bool tearing_down_ = true;
+
+void __attribute__((optnone)) setCrashString(const char *) {}
+void __attribute__((optnone)) doTailCall() {}
+extern "C" void __declspec(noreturn) abort();
+
+void __forceinline inlineCrashFrame() {
+  if (shutting_down_ || tearing_down_) {
+    setCrashString("crashing");
+    __debugbreak();
+    // MSVC lays out calls to abort out of line, gets the layout we want.
+    abort();
+  }
+}
+
+void __declspec(noinline) callerOfInlineCrashFrame(bool is_keeping_alive) {
+  if (is_keeping_alive)
+    inlineCrashFrame();
+  else
+    doTailCall();
+}
+
+int __attribute__((optnone)) main() {
+  callerOfInlineCrashFrame(true);
+}
+
+// DEBUGGER: g
+// DEBUGGER: k3
+// CHECK: {{.*}}!inlineCrashFrame
+// CHECK: {{.*}}!callerOfInlineCrashFrame
+// CHECK: {{.*}}!main
+// DEBUGGER: q

From 786a85dcd20c1c4e154221f59ecdd80ff5cd847a Mon Sep 17 00:00:00 2001
From: Artur Pilipenko <apilipenko@azulsystems.com>
Date: Mon, 3 Jun 2019 18:26:45 +0000
Subject: [PATCH 0950/1176] Add
 ScalarEvolutionsTest::SCEVExpandInsertCanonicalIV tests

Test insertion of canonical IV in canonical expansion mode.

llvm-svn: 362432
---
 .../Analysis/ScalarEvolutionTest.cpp          | 193 ++++++++++++++++++
 1 file changed, 193 insertions(+)

diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
index 93a61af9c37b0..1d74fb128441b 100644
--- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -1444,5 +1444,198 @@ TEST_F(ScalarEvolutionsTest, SCEVComputeExpressionSize) {
   EXPECT_EQ(S2S->getExpressionSize(), 5u);
 }
 
+TEST_F(ScalarEvolutionsTest, SCEVExpandInsertCanonicalIV) {
+  LLVMContext C;
+  SMDiagnostic Err;
+
+  // Expand the addrec produced by GetAddRec into a loop without a canonical IV.
+  // SCEVExpander will insert one.
+  auto TestNoCanonicalIV = [&](
+      std::function<const SCEV *(ScalarEvolution & SE, Loop * L)> GetAddRec) {
+    std::unique_ptr<Module> M =
+        parseAssemblyString("define i32 @test(i32 %limit) { "
+                            "entry: "
+                            "  br label %loop "
+                            "loop: "
+                            "  %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] "
+                            "  %i.inc = add nsw i32 %i, 1 "
+                            "  %cont = icmp slt i32 %i.inc, %limit "
+                            "  br i1 %cont, label %loop, label %exit "
+                            "exit: "
+                            "  ret i32 %i.inc "
+                            "}",
+                            Err, C);
+
+    assert(M && "Could not parse module?");
+    assert(!verifyModule(*M) && "Must have been well formed!");
+
+    runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+      auto &I = GetInstByName(F, "i");
+      auto *Loop = LI.getLoopFor(I.getParent());
+      EXPECT_FALSE(Loop->getCanonicalInductionVariable());
+
+      auto *AR = GetAddRec(SE, Loop);
+      unsigned ExpectedCanonicalIVWidth = SE.getTypeSizeInBits(AR->getType());
+
+      SCEVExpander Exp(SE, M->getDataLayout(), "expander");
+      auto *InsertAt = I.getNextNode();
+      Exp.expandCodeFor(AR, nullptr, InsertAt);
+      PHINode *CanonicalIV = Loop->getCanonicalInductionVariable();
+      unsigned CanonicalIVBitWidth =
+          cast<IntegerType>(CanonicalIV->getType())->getBitWidth();
+      EXPECT_EQ(CanonicalIVBitWidth, ExpectedCanonicalIVWidth);
+    });
+  };
+
+  // Expand the addrec produced by GetAddRec into a loop with a canonical IV
+  // which is narrower than addrec type.
+  // SCEVExpander will insert a canonical IV of a wider type to expand the
+  // addrec.
+  auto TestNarrowCanonicalIV = [&](
+      std::function<const SCEV *(ScalarEvolution & SE, Loop * L)> GetAddRec) {
+    std::unique_ptr<Module> M = parseAssemblyString(
+        "define i32 @test(i32 %limit) { "
+        "entry: "
+        "  br label %loop "
+        "loop: "
+        "  %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] "
+        "  %canonical.iv = phi i8 [ 0, %entry ], [ %canonical.iv.inc, %loop ] "
+        "  %i.inc = add nsw i32 %i, 1 "
+        "  %canonical.iv.inc = add i8 %canonical.iv, 1 "
+        "  %cont = icmp slt i32 %i.inc, %limit "
+        "  br i1 %cont, label %loop, label %exit "
+        "exit: "
+        "  ret i32 %i.inc "
+        "}",
+        Err, C);
+
+    assert(M && "Could not parse module?");
+    assert(!verifyModule(*M) && "Must have been well formed!");
+
+    runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+      auto &I = GetInstByName(F, "i");
+
+      auto *LoopHeaderBB = I.getParent();
+      auto *Loop = LI.getLoopFor(LoopHeaderBB);
+      PHINode *CanonicalIV = Loop->getCanonicalInductionVariable();
+      EXPECT_EQ(CanonicalIV, &GetInstByName(F, "canonical.iv"));
+
+      auto *AR = GetAddRec(SE, Loop);
+
+      unsigned ExpectedCanonicalIVWidth = SE.getTypeSizeInBits(AR->getType());
+      unsigned CanonicalIVBitWidth =
+          cast<IntegerType>(CanonicalIV->getType())->getBitWidth();
+      EXPECT_LT(CanonicalIVBitWidth, ExpectedCanonicalIVWidth);
+
+      SCEVExpander Exp(SE, M->getDataLayout(), "expander");
+      auto *InsertAt = I.getNextNode();
+      Exp.expandCodeFor(AR, nullptr, InsertAt);
+
+      // Loop over all of the PHI nodes, looking for the new canonical indvar.
+      PHINode *NewCanonicalIV = nullptr;
+      for (BasicBlock::iterator i = LoopHeaderBB->begin(); isa<PHINode>(i);
+           ++i) {
+        PHINode *PN = cast<PHINode>(i);
+        if (PN == &I || PN == CanonicalIV)
+          continue;
+        // We expect that the only PHI added is the new canonical IV
+        EXPECT_FALSE(NewCanonicalIV);
+        NewCanonicalIV = PN;
+      }
+
+      // Check that NewCanonicalIV is a canonical IV, i.e {0,+,1}
+      BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+      EXPECT_TRUE(Loop->getIncomingAndBackEdge(Incoming, Backedge));
+      auto *Start = NewCanonicalIV->getIncomingValueForBlock(Incoming);
+      EXPECT_TRUE(isa<ConstantInt>(Start));
+      EXPECT_TRUE(dyn_cast<ConstantInt>(Start)->isZero());
+      auto *Next = NewCanonicalIV->getIncomingValueForBlock(Backedge);
+      EXPECT_TRUE(isa<BinaryOperator>(Next));
+      auto *NextBinOp = dyn_cast<BinaryOperator>(Next);
+      EXPECT_EQ(NextBinOp->getOpcode(), Instruction::Add);
+      EXPECT_EQ(NextBinOp->getOperand(0), NewCanonicalIV);
+      auto *Step = NextBinOp->getOperand(1);
+      EXPECT_TRUE(isa<ConstantInt>(Step));
+      EXPECT_TRUE(dyn_cast<ConstantInt>(Step)->isOne());
+
+      unsigned NewCanonicalIVBitWidth =
+          cast<IntegerType>(NewCanonicalIV->getType())->getBitWidth();
+      EXPECT_EQ(NewCanonicalIVBitWidth, ExpectedCanonicalIVWidth);
+    });
+  };
+
+  // Expand the addrec produced by GetAddRec into a loop with a canonical IV
+  // of addrec width.
+  // To expand the addrec SCEVExpander should use the existing canonical IV.
+  auto TestMatchingCanonicalIV = [&](
+      std::function<const SCEV *(ScalarEvolution & SE, Loop * L)> GetAddRec,
+      unsigned ARBitWidth) {
+    auto ARBitWidthTypeStr = "i" + std::to_string(ARBitWidth);
+    std::unique_ptr<Module> M = parseAssemblyString(
+        "define i32 @test(i32 %limit) { "
+        "entry: "
+        "  br label %loop "
+        "loop: "
+        "  %i = phi i32 [ 1, %entry ], [ %i.inc, %loop ] "
+        "  %canonical.iv = phi " + ARBitWidthTypeStr +
+            " [ 0, %entry ], [ %canonical.iv.inc, %loop ] "
+        "  %i.inc = add nsw i32 %i, 1 "
+        "  %canonical.iv.inc = add " + ARBitWidthTypeStr +
+            " %canonical.iv, 1 "
+        "  %cont = icmp slt i32 %i.inc, %limit "
+        "  br i1 %cont, label %loop, label %exit "
+        "exit: "
+        "  ret i32 %i.inc "
+        "}",
+        Err, C);
+
+    assert(M && "Could not parse module?");
+    assert(!verifyModule(*M) && "Must have been well formed!");
+
+    runWithSE(*M, "test", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+      auto &I = GetInstByName(F, "i");
+      auto &CanonicalIV = GetInstByName(F, "canonical.iv");
+
+      auto *LoopHeaderBB = I.getParent();
+      auto *Loop = LI.getLoopFor(LoopHeaderBB);
+      EXPECT_EQ(&CanonicalIV, Loop->getCanonicalInductionVariable());
+      unsigned CanonicalIVBitWidth =
+          cast<IntegerType>(CanonicalIV.getType())->getBitWidth();
+
+      auto *AR = GetAddRec(SE, Loop);
+      EXPECT_EQ(ARBitWidth, SE.getTypeSizeInBits(AR->getType()));
+      EXPECT_EQ(CanonicalIVBitWidth, ARBitWidth);
+
+      SCEVExpander Exp(SE, M->getDataLayout(), "expander");
+      auto *InsertAt = I.getNextNode();
+      Exp.expandCodeFor(AR, nullptr, InsertAt);
+
+      // Loop over all of the PHI nodes, looking if a new canonical indvar was
+      // introduced.
+      PHINode *NewCanonicalIV = nullptr;
+      for (BasicBlock::iterator i = LoopHeaderBB->begin(); isa<PHINode>(i);
+           ++i) {
+        PHINode *PN = cast<PHINode>(i);
+        if (PN == &I || PN == &CanonicalIV)
+          continue;
+        NewCanonicalIV = PN;
+      }
+      EXPECT_FALSE(NewCanonicalIV);
+    });
+  };
+
+  unsigned ARBitWidth = 16;
+  Type *ARType = IntegerType::get(C, ARBitWidth);
+
+  // Expand {5,+,1}
+  auto GetAR2 = [&](ScalarEvolution &SE, Loop *L) -> const SCEV * {
+    return SE.getAddRecExpr(SE.getConstant(APInt(ARBitWidth, 5)),
+                            SE.getOne(ARType), L, SCEV::FlagAnyWrap);
+  };
+  TestNoCanonicalIV(GetAR2);
+  TestNarrowCanonicalIV(GetAR2);
+  TestMatchingCanonicalIV(GetAR2, ARBitWidth);
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm

From 6f83c75d035a8717a8725dfb3807441f8bcf9182 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 3 Jun 2019 18:29:00 +0000
Subject: [PATCH 0951/1176] gn build: Merge r362371

llvm-svn: 362433
---
 .../utils/gn/secondary/clang/include/clang/Basic/BUILD.gn | 8 ++++++++
 llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn           | 1 +
 llvm/utils/gn/secondary/clang/utils/TableGen/BUILD.gn     | 1 +
 3 files changed, 10 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn
index 7d67a2f96ce8e..bbd5684894714 100644
--- a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn
@@ -89,6 +89,14 @@ clang_tablegen("AttrHasAttributeImpl") {
   td_file = "Attr.td"
 }
 
+clang_tablegen("OpenCLBuiltins") {
+  args = [
+    "-gen-clang-opencl-builtins",
+    "-I",
+    rebase_path("../..", root_out_dir),
+  ]
+}
+
 # Misc
 
 clang_tablegen("arm_neon") {
diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
index 1b2994d7ad800..53649562dabf1 100644
--- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn
@@ -2,6 +2,7 @@ static_library("Sema") {
   output_name = "clangSema"
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [
+    "//clang/include/clang/Basic:OpenCLBuiltins",
     "//clang/include/clang/Sema:AttrParsedAttrImpl",
     "//clang/include/clang/Sema:AttrParsedAttrKinds",
     "//clang/include/clang/Sema:AttrParsedAttrList",
diff --git a/llvm/utils/gn/secondary/clang/utils/TableGen/BUILD.gn b/llvm/utils/gn/secondary/clang/utils/TableGen/BUILD.gn
index acfcccab6b156..9a81b79da10a1 100644
--- a/llvm/utils/gn/secondary/clang/utils/TableGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/utils/TableGen/BUILD.gn
@@ -11,6 +11,7 @@ executable("clang-tblgen") {
     "ClangCommentHTMLTagsEmitter.cpp",
     "ClangDataCollectorsEmitter.cpp",
     "ClangDiagnosticsEmitter.cpp",
+    "ClangOpenCLBuiltinEmitter.cpp",
     "ClangOptionDocEmitter.cpp",
     "ClangSACheckersEmitter.cpp",
     "NeonEmitter.cpp",

From 81ef625080cb7097044b4461fee0ac5567a44c75 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Mon, 3 Jun 2019 18:36:26 +0000
Subject: [PATCH 0952/1176] Permit Exception Spec mismatch with NoThrow on
 inherited Virtual

As reported here: https://bugs.llvm.org/show_bug.cgi?id=42100

This fairly common pattern ends up being an error in MinGW, so relax it
in all cases to a warning.

llvm-svn: 362434
---
 clang/include/clang/Sema/Sema.h               |  1 +
 clang/lib/Sema/SemaExceptionSpec.cpp          | 18 +++++++++++++++---
 .../SemaCXX/nothrow-vs-exception-specs.cpp    | 19 +++++++++++++++++++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 7bccaf77c1e9c..0c487725bdd42 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1564,6 +1564,7 @@ class Sema {
   bool CheckExceptionSpecSubset(const PartialDiagnostic &DiagID,
                                 const PartialDiagnostic &NestedDiagID,
                                 const PartialDiagnostic &NoteID,
+                                const PartialDiagnostic &NoThrowDiagID,
                                 const FunctionProtoType *Superset,
                                 SourceLocation SuperLoc,
                                 const FunctionProtoType *Subset,
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 8f3ebc29b5207..5274532b0ff89 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -744,6 +744,7 @@ bool Sema::handlerCanCatch(QualType HandlerType, QualType ExceptionType) {
 bool Sema::CheckExceptionSpecSubset(const PartialDiagnostic &DiagID,
                                     const PartialDiagnostic &NestedDiagID,
                                     const PartialDiagnostic &NoteID,
+                                    const PartialDiagnostic &NoThrowDiagID,
                                     const FunctionProtoType *Superset,
                                     SourceLocation SuperLoc,
                                     const FunctionProtoType *Subset,
@@ -790,6 +791,16 @@ bool Sema::CheckExceptionSpecSubset(const PartialDiagnostic &DiagID,
     return CheckParamExceptionSpec(NestedDiagID, NoteID, Superset, SuperLoc,
                                    Subset, SubLoc);
 
+  // Allow __declspec(nothrow) to be missing on redeclaration as an extension in
+  // some cases.
+  if (NoThrowDiagID.getDiagID() != 0 && SubCanThrow == CT_Can &&
+      SuperCanThrow == CT_Cannot && SuperEST == EST_NoThrow) {
+    Diag(SubLoc, NoThrowDiagID);
+    if (NoteID.getDiagID() != 0)
+      Diag(SuperLoc, NoteID);
+    return true;
+  }
+
   // If the subset contains everything or the superset contains nothing, we've
   // failed.
   if ((SubCanThrow == CT_Can && SubEST != EST_Dynamic) ||
@@ -919,9 +930,9 @@ bool Sema::CheckExceptionSpecCompatibility(Expr *From, QualType ToType) {
   //     void (*q)(void (*) throw(int)) = p;
   //   }
   // ... because it might be instantiated with T=int.
-  return CheckExceptionSpecSubset(PDiag(DiagID), PDiag(NestedDiagID), PDiag(),
-                                  ToFunc, From->getSourceRange().getBegin(),
-                                  FromFunc, SourceLocation()) &&
+  return CheckExceptionSpecSubset(
+             PDiag(DiagID), PDiag(NestedDiagID), PDiag(), PDiag(), ToFunc,
+             From->getSourceRange().getBegin(), FromFunc, SourceLocation()) &&
          !getLangOpts().CPlusPlus17;
 }
 
@@ -953,6 +964,7 @@ bool Sema::CheckOverridingFunctionExceptionSpec(const CXXMethodDecl *New,
   return CheckExceptionSpecSubset(PDiag(DiagID),
                                   PDiag(diag::err_deep_exception_specs_differ),
                                   PDiag(diag::note_overridden_virtual_function),
+                                  PDiag(diag::ext_override_exception_spec),
                                   Old->getType()->getAs<FunctionProtoType>(),
                                   Old->getLocation(),
                                   New->getType()->getAs<FunctionProtoType>(),
diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
index 78416d8c0b6e0..7a00783b0b781 100644
--- a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -69,3 +69,22 @@ struct S {
   __declspec(nothrow) void f4() noexcept(true);
   __declspec(nothrow) void f5() noexcept(false);
 };
+
+namespace PR42100 {
+class Base {
+public:
+  // expected-note@+1{{overridden virtual function is here}}
+  virtual __declspec(nothrow) void foo() = 0;
+  // expected-note@+1{{previous declaration is here}}
+  __declspec(nothrow) void bar();
+};
+
+// expected-warning@+1{{'bar' is missing exception specification '__attribute__((nothrow))'}}
+void Base::bar() {}
+
+class Sub : public Base {
+public:
+  // expected-warning@+1{{exception specification of overriding function is more lax than base version}}
+  void foo() {}
+};
+}

From bf37536a351a2db55efe830437866010ae050eea Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Mon, 3 Jun 2019 18:36:33 +0000
Subject: [PATCH 0953/1176] Make NoThrow FunctionLike, make FunctionLike
 include references, fix prettyprint

__declspec(nothrow) should work on function pointers as well as function
references, so this changes it to FunctionLike.  Additionally,
FunctionLike needed to be modified to permit function references.

Finally, the TypePrinter didn't properly print the NoThrow exception
specifier, so make sure we get that right as well.

llvm-svn: 362435
---
 clang/include/clang/AST/Type.h                     |  8 ++++++++
 clang/include/clang/Basic/Attr.td                  |  2 +-
 clang/lib/AST/DeclBase.cpp                         |  2 ++
 clang/lib/AST/TypePrinter.cpp                      |  2 ++
 ...pragma-attribute-supported-attributes-list.test |  2 +-
 clang/test/SemaCXX/nothrow-vs-exception-specs.cpp  | 14 ++++++++++++++
 6 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 66c3de72f5f48..3f71a7ec6ffe9 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1962,6 +1962,7 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isLValueReferenceType() const;
   bool isRValueReferenceType() const;
   bool isFunctionPointerType() const;
+  bool isFunctionReferenceType() const;
   bool isMemberPointerType() const;
   bool isMemberFunctionPointerType() const;
   bool isMemberDataPointerType() const;
@@ -6374,6 +6375,13 @@ inline bool Type::isFunctionPointerType() const {
     return false;
 }
 
+inline bool Type::isFunctionReferenceType() const {
+  if (const auto *T = getAs<ReferenceType>())
+    return T->getPointeeType()->isFunctionType();
+  else
+    return false;
+}
+
 inline bool Type::isMemberPointerType() const {
   return isa<MemberPointerType>(CanonicalType);
 }
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index ad179009ea6a2..c20a56532d5d7 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1657,7 +1657,7 @@ def NoStackProtector : InheritableAttr {
 
 def NoThrow : InheritableAttr {
   let Spellings = [GCC<"nothrow">, Declspec<"nothrow">];
-  let Subjects = SubjectList<[Function]>;
+  let Subjects = SubjectList<[FunctionLike]>;
   let Documentation = [NoThrowDocs];
 }
 
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 511925d1b140e..31985486d1d93 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -957,6 +957,8 @@ const FunctionType *Decl::getFunctionType(bool BlocksToo) const {
 
   if (Ty->isFunctionPointerType())
     Ty = Ty->getAs<PointerType>()->getPointeeType();
+  else if (Ty->isFunctionReferenceType())
+    Ty = Ty->getAs<ReferenceType>()->getPointeeType();
   else if (BlocksToo && Ty->isBlockPointerType())
     Ty = Ty->getAs<BlockPointerType>()->getPointeeType();
 
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 13b105bc5729f..ca3e34666841b 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -734,6 +734,8 @@ FunctionProtoType::printExceptionSpecification(raw_ostream &OS,
         OS << getExceptionType(I).stream(Policy);
       }
     OS << ')';
+  } else if (EST_NoThrow == getExceptionSpecType()) {
+    OS << " __attribute__((nothrow))";
   } else if (isNoexceptExceptionSpec(getExceptionSpecType())) {
     OS << " noexcept";
     // FIXME:Is it useful to print out the expression for a non-dependent
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index f85c89ae015dc..6e07e8e811129 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -86,7 +86,7 @@
 // CHECK-NEXT: NoSplitStack (SubjectMatchRule_function)
 // CHECK-NEXT: NoStackProtector (SubjectMatchRule_function)
 // CHECK-NEXT: NoThreadSafetyAnalysis (SubjectMatchRule_function)
-// CHECK-NEXT: NoThrow (SubjectMatchRule_function)
+// CHECK-NEXT: NoThrow (SubjectMatchRule_hasType_functionType)
 // CHECK-NEXT: NotTailCalled (SubjectMatchRule_function)
 // CHECK-NEXT: OSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: OSReturnsNotRetained (SubjectMatchRule_function, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_variable_is_parameter)
diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
index 7a00783b0b781..a065dad772461 100644
--- a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -88,3 +88,17 @@ class Sub : public Base {
   void foo() {}
 };
 }
+
+namespace FuncPointerReferenceConverts
+void FuncToBeRefed();
+
+#ifndef CPP17
+// expected-error@+6{{target exception specification is not superset of source}}
+// expected-error@+6{{target exception specification is not superset of source}}
+#else
+// expected-error@+3{{non-const lvalue reference to type 'void () __attribute__((nothrow))' cannot bind to a value of unrelated type 'void ()'}}
+// expected-error@+3{{cannot initialize a variable of type 'void (*)() __attribute__((nothrow))' with an lvalue of type 'void ()': different exception specifications}}
+#endif
+__declspec(nothrow) void (&FuncRef)() = FuncToBeRefed;
+__declspec(nothrow) void (*FuncPtr)() = FuncToBeRefed;
+}

From 8dbeb9256cb60fe551fdcbd40580589ffce59e37 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 3 Jun 2019 18:41:34 +0000
Subject: [PATCH 0954/1176] TTI: Improve default costs for addrspacecast

For some reason multiple places need to do this, and the variant the
loop unroller and inliner use was not handling it.

Also, introduce a new wrapper to be slightly more precise, since on
AMDGPU some addrspacecasts are free, but not no-ops.

llvm-svn: 362436
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  8 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |  5 +-
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  2 +-
 .../CostModel/AMDGPU/addrspacecast.ll         | 33 ++++++--
 .../AMDGPU/unroll-cost-addrspacecast.ll       | 77 +++++++++++++++++++
 7 files changed, 119 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0f575c882975f..9a3be5c083bb1 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -413,6 +413,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       if (TLI->isZExtFree(OpTy, Ty))
         return TargetTransformInfo::TCC_Free;
       return TargetTransformInfo::TCC_Basic;
+
+    case Instruction::AddrSpaceCast:
+      if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
+                                   Ty->getPointerAddressSpace()))
+        return TargetTransformInfo::TCC_Free;
+      return TargetTransformInfo::TCC_Basic;
     }
 
     return BaseT::getOperationCost(Opcode, Ty, OpTy);
@@ -656,7 +662,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return 0;
 
     if (Opcode == Instruction::AddrSpaceCast &&
-        TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
+        TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
                                  Dst->getPointerAddressSpace()))
       return 0;
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index f06e01acae30c..d00cc1675cd32 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1591,8 +1591,9 @@ class TargetLoweringBase {
   }
 
   /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
-  /// are happy to sink it into basic blocks.
-  virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+  /// are happy to sink it into basic blocks. A cast may be free, but not
+  /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
+  virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
     return isNoopAddrSpaceCast(SrcAS, DestAS);
   }
 
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 488cfe6b6c89c..797064b68c7d1 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1140,8 +1140,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
   // Sink only "cheap" (or nop) address-space casts.  This is a weaker condition
   // than sinking only nop casts, but is helpful on some platforms.
   if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
-    if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
-                                  ASC->getDestAddressSpace()))
+    if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+                                 ASC->getDestAddressSpace()))
       return false;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ac90399b980e2..1ca11da247ee3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1261,8 +1261,8 @@ bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
   return I && I->getMetadata("amdgpu.noclobber");
 }
 
-bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
-                                            unsigned DestAS) const {
+bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
+                                           unsigned DestAS) const {
   // Flat -> private/local is a simple truncate.
   // Flat -> global is no-op
   if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a63d75184b32d..4d7dac91cd19b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -246,7 +246,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   bool isMemOpUniform(const SDNode *N) const;
   bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
-  bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+  bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(MVT VT) const override;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
index ddb31482adce6..f15ab500a9e38 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
@@ -1,45 +1,66 @@
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s
 
-; CHECK: 'addrspacecast_global_to_flat'
+; CHECK-LABEL: 'addrspacecast_global_to_flat'
 ; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8*
 define i8* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
   %cast = addrspacecast i8 addrspace(1)* %ptr to i8*
   ret i8* %cast
 }
 
-; CHECK: 'addrspacecast_global_to_flat_v2'
+; CHECK-LABEL: 'addrspacecast_global_to_flat_v2'
 ; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
 define <2 x i8*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
   %cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
   ret <2 x i8*> %cast
 }
 
-; CHECK: 'addrspacecast_global_to_flat_v32'
+; CHECK-LABEL: 'addrspacecast_global_to_flat_v32'
 ; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
 define <32 x i8*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
   %cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
   ret <32 x i8*> %cast
 }
 
-; CHECK: 'addrspacecast_local_to_flat'
+; CHECK-LABEL: 'addrspacecast_local_to_flat'
 ; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8*
 define i8* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
   %cast = addrspacecast i8 addrspace(3)* %ptr to i8*
   ret i8* %cast
 }
 
-; CHECK: 'addrspacecast_local_to_flat_v2'
+; CHECK-LABEL: 'addrspacecast_local_to_flat_v2'
 ; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
 define <2 x i8*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
   %cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
   ret <2 x i8*> %cast
 }
 
-; CHECK: 'addrspacecast_local_to_flat_v32'
+; CHECK-LABEL: 'addrspacecast_local_to_flat_v32'
 ; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
 define <32 x i8*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
   %cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
   ret <32 x i8*> %cast
 }
 
+; CHECK-LABEL: 'addrspacecast_flat_to_local'
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8* %ptr to i8 addrspace(3)*
+define i8 addrspace(3)* @addrspacecast_flat_to_local(i8* %ptr) #0 {
+  %cast = addrspacecast i8* %ptr to i8 addrspace(3)*
+  ret i8 addrspace(3)* %cast
+}
+
+; CHECK-LABEL: 'addrspacecast_flat_to_local_v2'
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8*> %ptr to <2 x i8 addrspace(3)*>
+define <2 x i8 addrspace(3)*> @addrspacecast_flat_to_local_v2(<2 x i8*> %ptr) #0 {
+  %cast = addrspacecast <2 x i8*> %ptr to <2 x i8 addrspace(3)*>
+  ret <2 x i8 addrspace(3)*> %cast
+}
+
+; CHECK-LABEL: 'addrspacecast_flat_to_local_v32'
+; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8*> %ptr to <32 x i8 addrspace(3)*>
+define <32 x i8 addrspace(3)*> @addrspacecast_flat_to_local_v32(<32 x i8*> %ptr) #0 {
+  %cast = addrspacecast <32 x i8*> %ptr to <32 x i8 addrspace(3)*>
+  ret <32 x i8 addrspace(3)*> %cast
+}
+
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
new file mode 100644
index 0000000000000..761aa077606b4
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-cost-addrspacecast.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -unroll-threshold=75 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s
+
+; CHECK-LABEL: @test_func_addrspacecast_cost_noop(
+; CHECK-NOT: br i1
+define amdgpu_kernel void @test_func_addrspacecast_cost_noop(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float addrspace(1)* %out, i32 %indvars.iv
+  %cast.in = addrspacecast float addrspace(1)* %arrayidx.in to float*
+  %cast.out = addrspacecast float addrspace(1)* %arrayidx.out to float*
+  %load = load float, float* %cast.in
+  %fmul = fmul float %load, %sum.02
+  store float %fmul, float* %cast.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; Free, but not a no-op
+; CHECK-LABEL: @test_func_addrspacecast_cost_free(
+; CHECK-NOT: br i1
+define amdgpu_kernel void @test_func_addrspacecast_cost_free(float* noalias nocapture %out, float* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float* %out, i32 %indvars.iv
+  %cast.in = addrspacecast float* %arrayidx.in to float addrspace(3)*
+  %cast.out = addrspacecast float* %arrayidx.out to float addrspace(3)*
+  %load = load float, float addrspace(3)* %cast.in
+  %fmul = fmul float %load, %sum.02
+  store float %fmul, float addrspace(3)* %cast.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test_func_addrspacecast_cost_nonfree(
+; CHECK: br i1 %exitcond
+define amdgpu_kernel void @test_func_addrspacecast_cost_nonfree(float addrspace(3)* noalias nocapture %out, float addrspace(3)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
+  %arrayidx.in = getelementptr inbounds float, float addrspace(3)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds float, float addrspace(3)* %out, i32 %indvars.iv
+  %cast.in = addrspacecast float addrspace(3)* %arrayidx.in to float*
+  %cast.out = addrspacecast float addrspace(3)* %arrayidx.out to float*
+  %load = load float, float* %cast.in
+  %fmul = fmul float %load, %sum.02
+  store float %fmul, float* %cast.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 16
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone speculatable }

From 18ca8a2233a4be45944ce8d772f2930362641534 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Mon, 3 Jun 2019 18:46:30 +0000
Subject: [PATCH 0955/1176] Silence 'warning C4305: 'initializing': truncation
 from 'double' to 'float'' with MSVC 19.16.27021.1 (VS2017 15.9.12)

llvm-svn: 362437
---
 .../Utility/ReproducerInstrumentationTest.cpp        | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lldb/unittests/Utility/ReproducerInstrumentationTest.cpp b/lldb/unittests/Utility/ReproducerInstrumentationTest.cpp
index fef0ab97d7433..2a4ebac2df90d 100644
--- a/lldb/unittests/Utility/ReproducerInstrumentationTest.cpp
+++ b/lldb/unittests/Utility/ReproducerInstrumentationTest.cpp
@@ -31,7 +31,7 @@ struct Pod {
   bool a = true;
   bool b = false;
   char c = 'a';
-  float d = 1.1;
+  float d = 1.1f;
   int e = 2;
   long long f = 3;
   long g = 4;
@@ -443,7 +443,7 @@ TEST(RecordReplayTest, InstrumentedFoo) {
 
   {
     int b = 200;
-    float c = 300.3;
+    float c = 300.3f;
     double e = 400.4;
 
     InstrumentedFoo foo(0);
@@ -471,7 +471,7 @@ TEST(RecordReplayTest, InstrumentedFooSameThis) {
   g_serializer.emplace(os);
 
   int b = 200;
-  float c = 300.3;
+  float c = 300.3f;
   double e = 400.4;
 
   InstrumentedFoo *foo = new InstrumentedFoo(0);
@@ -517,7 +517,7 @@ TEST(RecordReplayTest, InstrumentedBar) {
 #endif
 
     int b = 200;
-    float c = 300.3;
+    float c = 300.3f;
     double e = 400.4;
 
     foo.A(100);
@@ -552,7 +552,7 @@ TEST(RecordReplayTest, InstrumentedBarRef) {
     InstrumentedFoo &foo = bar.GetInstrumentedFooRef();
 
     int b = 200;
-    float c = 300.3;
+    float c = 300.3f;
     double e = 400.4;
 
     foo.A(100);
@@ -587,7 +587,7 @@ TEST(RecordReplayTest, InstrumentedBarPtr) {
     InstrumentedFoo &foo = *(bar.GetInstrumentedFooPtr());
 
     int b = 200;
-    float c = 300.3;
+    float c = 300.3f;
     double e = 400.4;
 
     foo.A(100);

From bad43d8f49cc3efc2751f11c795c0ad7b3fc3975 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Mon, 3 Jun 2019 19:09:15 +0000
Subject: [PATCH 0956/1176] [PowerPC] Look through copies for compare
 elimination

We currently miss the opportunities for optmizing comparisons in the peephole
optimizer if the input is the result of a COPY since we look for record-form
versions of the producing instruction.

This patch simply lets the optimization peek through copies.

Differential revision: https://reviews.llvm.org/D59633

llvm-svn: 362438
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  7 ++++-
 .../PowerPC/eliminate-compare-of-copy.ll      | 29 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/eliminate-compare-of-copy.ll

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index cacbe4eecc5f9..0799c4281e317 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1653,6 +1653,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
     return false;
 
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
   // The record forms set the condition register based on a signed comparison
   // with zero (so says the ISA manual). This is not as straightforward as it
   // seems, however, because this is always a 64-bit comparison on PPC64, even
@@ -1666,6 +1667,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
   bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
 
+  // Look through copies unless that gets us to a physical register.
+  unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+  if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+    SrcReg = ActualSrc;
+
   // Get the unique definition of SrcReg.
   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   if (!MI) return false;
@@ -1794,7 +1800,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   }
 
   // Search for Sub.
-  const TargetRegisterInfo *TRI = &getRegisterInfo();
   --I;
 
   // Get ready to iterate backward from CmpInstr.
diff --git a/llvm/test/CodeGen/PowerPC/eliminate-compare-of-copy.ll b/llvm/test/CodeGen/PowerPC/eliminate-compare-of-copy.ll
new file mode 100644
index 0000000000000..6675c1af19fd2
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/eliminate-compare-of-copy.ll
@@ -0,0 +1,29 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s
+
+define dso_local signext i32 @func(i32 zeroext %x, i32 zeroext %y) local_unnamed_addr {
+; CHECK-LABEL: func
+; CHECK: or. {{r[0-9]+}}, r4, r3
+; CHECK-NOT: cmplwi
+; CHECK: blr
+entry:
+  %or = or i32 %y, %x
+  %tobool = icmp eq i32 %or, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call signext i32 bitcast (i32 (...)* @f1 to i32 ()*)()
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %call1 = tail call signext i32 bitcast (i32 (...)* @f2 to i32 ()*)()
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ]
+  ret i32 %retval.0
+}
+
+declare signext i32 @f1(...) local_unnamed_addr
+
+declare signext i32 @f2(...) local_unnamed_addr

From 0b7f98da6569bbc49aed2675fdda95f89198d356 Mon Sep 17 00:00:00 2001
From: Michael Berg <michael_c_berg@apple.com>
Date: Mon, 3 Jun 2019 19:12:15 +0000
Subject: [PATCH 0957/1176] Propagate fmf for setcc/select folds

Summary: This change facilitates propagating fmf which was placed on setcc from fcmp through folds with selects so that back ends can model this path for arithmetic folds on selects in SDAG.

Reviewers: qcolombet, spatel

Reviewed By: qcolombet

Subscribers: nemanjai, jsji

Differential Revision: https://reviews.llvm.org/D62552

llvm-svn: 362439
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 13 ++++++++++---
 llvm/test/CodeGen/PowerPC/fmf-propagation.ll  |  4 ++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a4c1e9886ae6d..98e7f4055e9c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7941,9 +7941,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     }
 
     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
-        (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
-      return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
-                         N0.getOperand(2));
+        (!LegalOperations &&
+         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+      // Any flags available in a select/setcc fold will be on the setcc as they
+      // migrated from fcmp
+      const SDNodeFlags Flags = N0.getNode()->getFlags();
+      SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+                                       N2, N0.getOperand(2));
+      SelectNode->setFlags(Flags);
+      return SelectNode;
+    }
 
     return SimplifySelect(DL, N0, N1, N2);
   }
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 0ce4701d68350..a62ef79f50926 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -375,11 +375,11 @@ define float @sqrt_fast(float %x) {
 ; fcmp can have fast-math-flags.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
-; FMFDEBUG:         select_cc {{t[0-9]+}}
+; FMFDEBUG:         select_cc nnan {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
-; GLOBALDEBUG:         select_cc {{t[0-9]+}}
+; GLOBALDEBUG:         select_cc nnan {{t[0-9]+}}
 ; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
 
 define double @fcmp_nnan(double %a, double %y, double %z) {

From dcf865f0ca557e60b1e902f004012bc54e418878 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 19:29:14 +0000
Subject: [PATCH 0958/1176] [X86] Fix the pattern for merge masked vcvtps2pd.

r362199 fixed it for zero masking, but not zero masking. The load
folding in the peephole pass hid the bug. This patch turns off
the peephole pass on the relevant test to ensure coverage.

llvm-svn: 362440
---
 llvm/lib/Target/X86/X86InstrAVX512.td |  5 +---
 llvm/test/CodeGen/X86/avx512-cvt.ll   | 43 +++++++++++++++++++++------
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 753f1b71b07bc..eebb6401db033 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          (ins MaskRC:$mask, MemOp:$src),
                          OpcodeStr#Alias, "$src", "$src",
                          LdDAG,
-                         (vselect MaskRC:$mask,
-                                  (_.VT (OpNode (_Src.VT
-                                                 (_Src.LdFrag addr:$src)))),
-                                  _.RC:$src0),
+                         (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
                          vselect, "$src0 = $dst">,
                          EVEX, Sched<[sched.Folded]>;
 
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index c42be0d0f1c28..2c8978d4a0119 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq  | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw  | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
 
 
 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
@@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl
   ret <4 x double> %c
 }
 
-define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) {
 ; NOVL-LABEL: f32to4f64_mask_load:
 ; NOVL:       # %bb.0:
+; NOVL-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
+; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm3
+; NOVL-NEXT:    vcmpltpd %zmm1, %zmm0, %k1
+; NOVL-NEXT:    vblendmpd %zmm3, %zmm2, %zmm0 {%k1}
+; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; NOVL-NEXT:    retq
+;
+; VL-LABEL: f32to4f64_mask_load:
+; VL:       # %bb.0:
+; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
+; VL-NEXT:    vcvtps2pd (%rdi), %ymm2 {%k1}
+; VL-NEXT:    vmovaps %ymm2, %ymm0
+; VL-NEXT:    retq
+  %b = load <4 x float>, <4 x float>* %p
+  %a = fpext <4 x float> %b to <4 x double>
+  %mask = fcmp ogt <4 x double> %a1, %b1
+  %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru
+  ret <4 x double> %c
+}
+
+define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) {
+; NOVL-LABEL: f32to4f64_maskz_load:
+; NOVL:       # %bb.0:
 ; NOVL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; NOVL-NEXT:    vcvtps2pd (%rdi), %ymm2
@@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x
 ; NOVL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; NOVL-NEXT:    retq
 ;
-; VL-LABEL: f32to4f64_mask_load:
+; VL-LABEL: f32to4f64_maskz_load:
 ; VL:       # %bb.0:
 ; VL-NEXT:    vcmpltpd %ymm1, %ymm0, %k1
 ; VL-NEXT:    vcvtps2pd (%rdi), %ymm0 {%k1} {z}

From 7a4eabef3926f65a393433967b6cf50b3954b05e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 19:35:52 +0000
Subject: [PATCH 0959/1176] [CFLGraph] Add FAdd to visitConstantExpr.

This looks like an oversight as all the other binary operators are present.

Accidentally noticed while auditing places that need FNeg handling.

No test because as noted in the review it would be contrived and amount to "don't crash"

Differential Revision: https://reviews.llvm.org/D62790

llvm-svn: 362441
---
 llvm/lib/Analysis/CFLGraph.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h
index 5783c5bc9bdd5..a81ff5b15c6ce 100644
--- a/llvm/lib/Analysis/CFLGraph.h
+++ b/llvm/lib/Analysis/CFLGraph.h
@@ -555,6 +555,7 @@ template <typename CFLAA> class CFLGraphBuilder {
       }
 
       case Instruction::Add:
+      case Instruction::FAdd:
       case Instruction::Sub:
       case Instruction::FSub:
       case Instruction::Mul:

From c24a2f4ad921c1d81563ca20dd43d2395303385f Mon Sep 17 00:00:00 2001
From: George Burgess IV <george.burgess.iv@gmail.com>
Date: Mon, 3 Jun 2019 19:56:22 +0000
Subject: [PATCH 0960/1176] CFLAA: reflow comments; NFC

llvm-svn: 362442
---
 llvm/lib/Analysis/CFLGraph.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h
index a81ff5b15c6ce..cd7c2df8041b3 100644
--- a/llvm/lib/Analysis/CFLGraph.h
+++ b/llvm/lib/Analysis/CFLGraph.h
@@ -152,7 +152,7 @@ class CFLGraph {
   }
 };
 
-///A builder class used to create CFLGraph instance from a given function
+/// A builder class used to create CFLGraph instance from a given function
 /// The CFL-AA that uses this builder must provide its own type as a template
 /// argument. This is necessary for interprocedural processing: CFLGraphBuilder
 /// needs a way of obtaining the summary of other functions when callinsts are
@@ -181,8 +181,7 @@ template <typename CFLAA> class CFLGraphBuilder {
 
     static bool hasUsefulEdges(ConstantExpr *CE) {
       // ConstantExpr doesn't have terminators, invokes, or fences, so only
-      // needs
-      // to check for compares.
+      // needs to check for compares.
       return CE->getOpcode() != Instruction::ICmp &&
              CE->getOpcode() != Instruction::FCmp;
     }
@@ -197,8 +196,8 @@ template <typename CFLAA> class CFLGraphBuilder {
       }
 
       // TODO: If the call is indirect, we might be able to enumerate all
-      // potential
-      // targets of the call and return them, rather than just failing.
+      // potential targets of the call and return them, rather than just
+      // failing.
       return false;
     }
 

From dd0adae65a705c1aec085217b99b51f33b05d1d3 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Mon, 3 Jun 2019 19:57:52 +0000
Subject: [PATCH 0961/1176] Fix test failure from r362435

Apparently I forgot to do an open brace in a namespace, so we get an
error about an extra closing brace.

llvm-svn: 362443
---
 clang/test/SemaCXX/nothrow-vs-exception-specs.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
index a065dad772461..6d2a7dd58bd60 100644
--- a/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
+++ b/clang/test/SemaCXX/nothrow-vs-exception-specs.cpp
@@ -89,7 +89,7 @@ class Sub : public Base {
 };
 }
 
-namespace FuncPointerReferenceConverts
+namespace FuncPointerReferenceConverts {
 void FuncToBeRefed();
 
 #ifndef CPP17

From 06f3b094e4ad2624d8376e4312eebbc5214faf94 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 3 Jun 2019 20:14:25 +0000
Subject: [PATCH 0962/1176] ELF: Introduce a separate bit for tracking whether
 an output section has ever had an input section added to it. NFCI.

We currently (ab)use the Live bit on output sections to track whether
the section has ever had an input section added to it, and then later
use it during orphan placement. This will conflict with one of my upcoming
partition-related changes that will assign all output sections to a partition
(thus marking them as live) so that they can be added to the correct segment
by the code that creates program headers.

Instead of using the Live bit for this purpose, create a new flag and
start using it to track the property explicitly.

Differential Revision: https://reviews.llvm.org/D62348

llvm-svn: 362444
---
 lld/ELF/LinkerScript.cpp   |  7 +++----
 lld/ELF/OutputSections.cpp |  3 ++-
 lld/ELF/OutputSections.h   |  5 +++++
 lld/ELF/Writer.cpp         | 13 +++++++------
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 71c8ff2497c04..24c4fb8de461d 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -889,10 +889,9 @@ void LinkerScript::adjustSectionsBeforeSorting() {
       Sec->Alignment =
           std::max<uint32_t>(Sec->Alignment, Sec->AlignExpr().getValue());
 
-    // A live output section means that some input section was added to it. It
-    // might have been removed (if it was empty synthetic section), but we at
-    // least know the flags.
-    if (Sec->isLive())
+    // The input section might have been removed (if it was an empty synthetic
+    // section), but we at least know the flags.
+    if (Sec->HasInputSections)
       Flags = Sec->Flags;
 
     // We do not want to keep any special flags for output section
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 8927b69a64a1f..8048609565e70 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -84,9 +84,10 @@ static bool canMergeToProgbits(unsigned Type) {
 }
 
 void OutputSection::addSection(InputSection *IS) {
-  if (!isLive()) {
+  if (!HasInputSections) {
     // If IS is the first section to be added to this section,
     // initialize Partition, Type, Entsize and flags from IS.
+    HasInputSections = true;
     Partition = IS->Partition;
     Type = IS->Type;
     Entsize = IS->Entsize;
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index c072f2c31b05d..dded729271aa0 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -93,6 +93,11 @@ class OutputSection final : public BaseCommand, public SectionBase {
   bool UsedInExpression = false;
   bool InOverlay = false;
 
+  // Tracks whether the section has ever had an input section added to it, even
+  // if the section was later removed (e.g. because it is a synthetic section
+  // that wasn't needed). This is needed for orphan placement.
+  bool HasInputSections = false;
+
   void finalize();
   template <class ELFT> void writeTo(uint8_t *Buf);
   template <class ELFT> void maybeCompress();
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index ec5be8c8fd70d..40d32a814bbe7 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1052,7 +1052,7 @@ static int getRankProximityAux(OutputSection *A, OutputSection *B) {
 
 static int getRankProximity(OutputSection *A, BaseCommand *B) {
   auto *Sec = dyn_cast<OutputSection>(B);
-  return (Sec && Sec->isLive()) ? getRankProximityAux(A, Sec) : -1;
+  return (Sec && Sec->HasInputSections) ? getRankProximityAux(A, Sec) : -1;
 }
 
 // When placing orphan sections, we want to place them after symbol assignments
@@ -1094,19 +1094,20 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B,
   int Proximity = getRankProximity(Sec, *I);
   for (; I != E; ++I) {
     auto *CurSec = dyn_cast<OutputSection>(*I);
-    if (!CurSec || !CurSec->isLive())
+    if (!CurSec || !CurSec->HasInputSections)
       continue;
     if (getRankProximity(Sec, CurSec) != Proximity ||
         Sec->SortRank < CurSec->SortRank)
       break;
   }
 
-  auto IsLiveOutputSec = [](BaseCommand *Cmd) {
+  auto IsOutputSecWithInputSections = [](BaseCommand *Cmd) {
     auto *OS = dyn_cast<OutputSection>(Cmd);
-    return OS && OS->isLive();
+    return OS && OS->HasInputSections;
   };
   auto J = std::find_if(llvm::make_reverse_iterator(I),
-                        llvm::make_reverse_iterator(B), IsLiveOutputSec);
+                        llvm::make_reverse_iterator(B),
+                        IsOutputSecWithInputSections);
   I = J.base();
 
   // As a special case, if the orphan section is the last section, put
@@ -1114,7 +1115,7 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B,
   // This matches bfd's behavior and is convenient when the linker script fully
   // specifies the start of the file, but doesn't care about the end (the non
   // alloc sections for example).
-  auto NextSec = std::find_if(I, E, IsLiveOutputSec);
+  auto NextSec = std::find_if(I, E, IsOutputSecWithInputSections);
   if (NextSec == E)
     return E;
 

From bddab42fc763aead7aa07c3ed1974e39938d100e Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 3 Jun 2019 20:40:07 +0000
Subject: [PATCH 0963/1176] gn build: Merge r361896.

llvm-svn: 362445
---
 llvm/utils/gn/secondary/llvm/test/BUILD.gn    |  1 +
 .../secondary/llvm/tools/llvm-lipo/BUILD.gn   | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn

diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index b17fe80ae6a21..7c6a31636ad0d 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -218,6 +218,7 @@ group("test") {
     "//llvm/tools/llvm-isel-fuzzer",
     "//llvm/tools/llvm-jitlink",
     "//llvm/tools/llvm-link",
+    "//llvm/tools/llvm-lipo",
     "//llvm/tools/llvm-lto",
     "//llvm/tools/llvm-lto2",
     "//llvm/tools/llvm-mc",
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn
new file mode 100644
index 0000000000000..28f762d500548
--- /dev/null
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn
@@ -0,0 +1,19 @@
+import("//llvm/utils/TableGen/tablegen.gni")
+
+tablegen("LipoOpts") {
+  visibility = [ ":llvm-lipo" ]
+  args = [ "-gen-opt-parser-defs" ]
+}
+
+executable("llvm-lipo") {
+  deps = [
+    ":LipoOpts",
+    "//llvm/lib/Object",
+    "//llvm/lib/Option",
+    "//llvm/lib/Support",
+    "//llvm/lib/Target:TargetsToBuild",
+  ]
+  sources = [
+    "llvm-lipo.cpp",
+  ]
+}

From 7500c97ce4bda8a0735b737231b26bde922620f5 Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Mon, 3 Jun 2019 20:47:20 +0000
Subject: [PATCH 0964/1176] [AArch64][GlobalISel] Optimize G_FCMP + G_SELECT
 pairs when G_SELECT is fp

Instead of emitting all of the test stuff for a compare when it's only used by
a select, instead, just emit the compare + select. The select will use the
value of NZCV correctly, so we don't need to emit all of the test instructions
etc.

For now, only support fp selects which use G_FCMP. Also only support condition
codes which will only require one select to represent.

Also add a test.

Differential Revision: https://reviews.llvm.org/D62695

llvm-svn: 362446
---
 .../AArch64/AArch64InstructionSelector.cpp    | 104 +++++-
 .../AArch64/GlobalISel/fold-fp-select.mir     | 351 ++++++++++++++++++
 2 files changed, 447 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 5fc272707f50e..c9af8fa1d65a6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -172,6 +172,7 @@ class AArch64InstructionSelector : public InstructionSelector {
 
   bool tryOptVectorShuffle(MachineInstr &I) const;
   bool tryOptVectorDup(MachineInstr &MI) const;
+  bool tryOptSelect(MachineInstr &MI) const;
 
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
@@ -741,6 +742,19 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
+static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
+                                const RegisterBankInfo &RBI) {
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+               AArch64::GPRRegBankID);
+  LLT Ty = MRI.getType(I.getOperand(0).getReg());
+  if (Ty == LLT::scalar(32))
+    return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
+  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
+    return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
+  return 0;
+}
+
 /// Helper function to select the opcode for a G_FCMP.
 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
   // If this is a compare against +0.0, then we don't have to explicitly
@@ -1774,16 +1788,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     // select instead of an integer select.
     bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
                  AArch64::GPRRegBankID);
-    unsigned CSelOpc = 0;
 
-    if (Ty == LLT::scalar(32)) {
-      CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
-    } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
-      CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
-    } else {
-      return false;
-    }
+    if (IsFP && tryOptSelect(I))
+      return true;
 
+    unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
     MachineInstr &TstMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
              .addDef(AArch64::WZR)
@@ -2810,6 +2819,85 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
   return &I;
 }
 
+bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
+  MachineIRBuilder MIB(I);
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+
+  // We want to recognize this pattern:
+  //
+  // $z = G_FCMP pred, $x, $y
+  // ...
+  // $w = G_SELECT $z, $a, $b
+  //
+  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
+  // some copies/truncs in between.)
+  //
+  // If we see this, then we can emit something like this:
+  //
+  // fcmp $x, $y
+  // fcsel $w, $a, $b, pred
+  //
+  // Rather than emitting both of the rather long sequences in the standard
+  // G_FCMP/G_SELECT select methods.
+
+  // First, check if the condition is defined by a compare.
+  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
+  while (CondDef) {
+    // We can only fold if all of the defs have one use.
+    if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
+      return false;
+
+    // We can skip over G_TRUNC since the condition is 1-bit.
+    // Truncating/extending can have no impact on the value.
+    unsigned Opc = CondDef->getOpcode();
+    if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
+      break;
+
+    CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
+  }
+
+  // Is the condition defined by a compare?
+  // TODO: Handle G_ICMP.
+  if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
+    return false;
+
+  // Get the condition code for the select.
+  AArch64CC::CondCode CondCode;
+  AArch64CC::CondCode CondCode2;
+  changeFCMPPredToAArch64CC(
+      (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
+      CondCode2);
+
+  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
+  // instructions to emit the comparison.
+  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
+  // unnecessary.
+  if (CondCode2 != AArch64CC::AL)
+    return false;
+
+  // Make sure we'll be able to select the compare.
+  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
+  if (!CmpOpc)
+    return false;
+
+  // Emit a new compare.
+  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
+  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+    Cmp.addUse(CondDef->getOperand(3).getReg());
+
+  // Emit the select.
+  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
+  auto CSel =
+      MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
+                     {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
+          .addImm(CondCode);
+  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
+  I.eraseFromParent();
+  return true;
+}
+
 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
   // Try to match a vector splat operation into a dup instruction.
   // We're looking for this pattern:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir
new file mode 100644
index 0000000000000..619b9276179a9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir
@@ -0,0 +1,351 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify the following:
+#
+# - We can fold compares into selects.
+# - This only happens when the result of the compare is only used by the select.
+#
+# Also verify that, for now:
+#
+# - We only support doing this with G_FCMP.
+# - We only support condition flags that require a single instruction.
+#
+
+...
+---
+name:            fcmp_more_than_one_user_no_fold
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $s1, $w1
+
+    ; CHECK-LABEL: name: fcmp_more_than_one_user_no_fold
+    ; CHECK: liveins: $s0, $s1, $w1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
+    ; CHECK: $w1 = COPY [[CSINCWr]]
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
+    $w1 = COPY %5(s32)
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            using_icmp
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $w0
+
+    ; CHECK-LABEL: name: using_icmp
+    ; CHECK: liveins: $s0, $w0
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:gpr(s32) = COPY $w0
+    %1:fpr(s32) = COPY $s0
+    %2:gpr(s32) = G_CONSTANT i32 0
+    %5:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %6:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %6(s32)
+    %7:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %7(s1), %1, %5
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            foeq
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: foeq
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 0, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            fueq
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: fueq
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            fone
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: fone
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
+    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(one), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %6(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            fune
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: fune
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[FCSELSrrr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(une), %0(s32), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s32) = G_SELECT %6(s1), %1, %2
+    $s0 = COPY %4(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            doeq
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: doeq
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
+    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 0, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %1:fpr(s64) = COPY $d1
+    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s64) = G_SELECT %6(s1), %2, %1
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            dueq
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: dueq
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
+    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 1, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %1:fpr(s64) = COPY $d1
+    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s64), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s64) = G_SELECT %6(s1), %2, %1
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            done
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: done
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
+    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
+    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
+    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
+    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %1:fpr(s64) = COPY $d1
+    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(one), %0(s64), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s64) = G_SELECT %6(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            dune
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: dune
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
+    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
+    ; CHECK: $d0 = COPY [[FCSELDrrr]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(s64) = COPY $d0
+    %1:fpr(s64) = COPY $d1
+    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
+    %5:gpr(s32) = G_FCMP floatpred(une), %0(s64), %2
+    %3:gpr(s1) = G_TRUNC %5(s32)
+    %6:fpr(s1) = COPY %3(s1)
+    %4:fpr(s64) = G_SELECT %6(s1), %1, %2
+    $d0 = COPY %4(s64)
+    RET_ReallyLR implicit $d0
+
+...

From 0ceda9fb5c8ff2a21ddcaa2f8a8b969a065b4553 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 3 Jun 2019 21:33:22 +0000
Subject: [PATCH 0965/1176] AMDGPU: Disable stack realignment for kernels

This is something of a workaround, and the state of stack realignment
controls is kind of a mess. Ideally, we would be able to specify the
stack is infinitely aligned on entry to a kernel.

TargetFrameLowering provides multiple controls which apply at
different points. The StackRealignable field is used during
SelectionDAG, and for some reason distinct from this
hook. StackAlignment is a single field not dependent on the
function. It would probably be better to make that dependent on the
calling convention, and the maximum value for kernels.

Currently this doesn't really change anything, since the frame
lowering mostly does its own thing. This helps avoid regressions in a
future change which will rely more heavily on hasFP.

llvm-svn: 362447
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  13 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |   1 +
 .../CodeGen/AMDGPU/stack-realign-kernel.ll    | 294 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/stack-realign.ll     |  26 ++
 4 files changed, 334 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 341a88fa471bf..2e96b9866671f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -245,6 +245,19 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  // On entry, the base address is 0, so it can't possibly need any more
+  // alignment.
+
+  // FIXME: Should be able to specify the entry frame alignment per calling
+  // convention instead.
+  if (Info->isEntryFunction())
+    return false;
+
+  return TargetRegisterInfo::canRealignStack(MF);
+}
+
 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
   if (Info->isEntryFunction()) {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index e2df3ae5ea7e9..de10e92c96573 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -73,6 +73,7 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
 
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 
+  bool canRealignStack(const MachineFunction &MF) const override;
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
new file mode 100644
index 0000000000000..037ed1a314232
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900  < %s | FileCheck -check-prefix=GFX9 %s
+
+; Make sure the stack is never realigned for entry functions.
+
+define amdgpu_kernel void @max_alignment_128() #0 {
+; VI-LABEL: max_alignment_128:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_add_u32 s4, s4, s7
+; VI-NEXT:    v_mov_b32_e32 v0, 9
+; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
+; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:128
+; VI-NEXT:    s_endpgm
+; VI-NEXT:    .section .rodata,#alloc
+; VI-NEXT:    .p2align 6
+; VI-NEXT:    .amdhsa_kernel max_alignment_128
+; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
+; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; VI-NEXT:     .amdhsa_next_free_vgpr 1
+; VI-NEXT:     .amdhsa_next_free_sgpr 8
+; VI-NEXT:     .amdhsa_reserve_vcc 0
+; VI-NEXT:     .amdhsa_float_round_mode_32 0
+; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; VI-NEXT:     .amdhsa_dx10_clamp 1
+; VI-NEXT:     .amdhsa_ieee_mode 1
+; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; VI-NEXT:     .amdhsa_exception_int_div_zero 0
+; VI-NEXT:    .end_amdhsa_kernel
+; VI-NEXT:    .text
+;
+; GFX9-LABEL: max_alignment_128:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
+; GFX9-NEXT:    v_mov_b32_e32 v0, 9
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:128
+; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .section .rodata,#alloc
+; GFX9-NEXT:    .p2align 6
+; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
+; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
+; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
+; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
+; GFX9-NEXT:     .amdhsa_reserve_vcc 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; GFX9-NEXT:     .amdhsa_dx10_clamp 1
+; GFX9-NEXT:     .amdhsa_ieee_mode 1
+; GFX9-NEXT:     .amdhsa_fp16_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
+; GFX9-NEXT:    .end_amdhsa_kernel
+; GFX9-NEXT:    .text
+  %alloca.align = alloca i32, align 128, addrspace(5)
+  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
+  ret void
+}
+
+define amdgpu_kernel void @stackrealign_attr() #1 {
+; VI-LABEL: stackrealign_attr:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_add_u32 s4, s4, s7
+; VI-NEXT:    v_mov_b32_e32 v0, 9
+; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
+; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
+; VI-NEXT:    s_endpgm
+; VI-NEXT:    .section .rodata,#alloc
+; VI-NEXT:    .p2align 6
+; VI-NEXT:    .amdhsa_kernel stackrealign_attr
+; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
+; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; VI-NEXT:     .amdhsa_next_free_vgpr 1
+; VI-NEXT:     .amdhsa_next_free_sgpr 8
+; VI-NEXT:     .amdhsa_reserve_vcc 0
+; VI-NEXT:     .amdhsa_float_round_mode_32 0
+; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; VI-NEXT:     .amdhsa_dx10_clamp 1
+; VI-NEXT:     .amdhsa_ieee_mode 1
+; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; VI-NEXT:     .amdhsa_exception_int_div_zero 0
+; VI-NEXT:    .end_amdhsa_kernel
+; VI-NEXT:    .text
+;
+; GFX9-LABEL: stackrealign_attr:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
+; GFX9-NEXT:    v_mov_b32_e32 v0, 9
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
+; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .section .rodata,#alloc
+; GFX9-NEXT:    .p2align 6
+; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
+; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
+; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
+; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
+; GFX9-NEXT:     .amdhsa_reserve_vcc 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; GFX9-NEXT:     .amdhsa_dx10_clamp 1
+; GFX9-NEXT:     .amdhsa_ieee_mode 1
+; GFX9-NEXT:     .amdhsa_fp16_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
+; GFX9-NEXT:    .end_amdhsa_kernel
+; GFX9-NEXT:    .text
+  %alloca.align = alloca i32, align 4, addrspace(5)
+  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
+  ret void
+}
+
+define amdgpu_kernel void @alignstack_attr() #2 {
+; VI-LABEL: alignstack_attr:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_add_u32 s4, s4, s7
+; VI-NEXT:    v_mov_b32_e32 v0, 9
+; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
+; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
+; VI-NEXT:    s_endpgm
+; VI-NEXT:    .section .rodata,#alloc
+; VI-NEXT:    .p2align 6
+; VI-NEXT:    .amdhsa_kernel alignstack_attr
+; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
+; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; VI-NEXT:     .amdhsa_next_free_vgpr 1
+; VI-NEXT:     .amdhsa_next_free_sgpr 8
+; VI-NEXT:     .amdhsa_reserve_vcc 0
+; VI-NEXT:     .amdhsa_float_round_mode_32 0
+; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; VI-NEXT:     .amdhsa_dx10_clamp 1
+; VI-NEXT:     .amdhsa_ieee_mode 1
+; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; VI-NEXT:     .amdhsa_exception_int_div_zero 0
+; VI-NEXT:    .end_amdhsa_kernel
+; VI-NEXT:    .text
+;
+; GFX9-LABEL: alignstack_attr:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
+; GFX9-NEXT:    v_mov_b32_e32 v0, 9
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
+; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .section .rodata,#alloc
+; GFX9-NEXT:    .p2align 6
+; GFX9-NEXT:    .amdhsa_kernel alignstack_attr
+; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
+; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
+; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
+; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
+; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
+; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
+; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
+; GFX9-NEXT:     .amdhsa_reserve_vcc 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
+; GFX9-NEXT:     .amdhsa_dx10_clamp 1
+; GFX9-NEXT:     .amdhsa_ieee_mode 1
+; GFX9-NEXT:     .amdhsa_fp16_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
+; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
+; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
+; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
+; GFX9-NEXT:    .end_amdhsa_kernel
+; GFX9-NEXT:    .text
+  %alloca.align = alloca i32, align 4, addrspace(5)
+  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "stackrealign" }
+attributes #2 = { nounwind alignstack=128 }
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index 99a218b5325c1..aece86d9a31eb 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -120,6 +120,32 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {
   ret void
 }
 
+; GCN-LABEL: {{^}}default_realign_align128:
+; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0
+; GCN-NEXT: s_and_b32 s5, [[TMP]], 0xffffe000
+; GCN-NEXT: s_add_u32 s32, s32, 0x6000
+; GCN-NOT: s5
+; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:128
+; GCN: s_sub_u32 s32, s32, 0x6000
+define void @default_realign_align128(i32 %idx) #0 {
+  %alloca.align = alloca i32, align 128, addrspace(5)
+  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
+  ret void
+}
+
+; GCN-LABEL: {{^}}disable_realign_align128:
+; GCN-NOT: s32
+; GCN: s_mov_b32 s5, s32
+; GCN-NOT: s32
+; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:16
+; GCN-NOT: s32
+define void @disable_realign_align128(i32 %idx) #3 {
+  %alloca.align = alloca i32, align 128, addrspace(5)
+  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
+  ret void
+}
+
 attributes #0 = { noinline nounwind }
 attributes #1 = { noinline nounwind "stackrealign" }
 attributes #2 = { noinline nounwind alignstack=4 }
+attributes #3 = { noinline nounwind "no-realign-stack" }

From 6ff978ee05469f0f976bf003e601f879db754ed8 Mon Sep 17 00:00:00 2001
From: Michael Berg <michael_c_berg@apple.com>
Date: Mon, 3 Jun 2019 21:53:26 +0000
Subject: [PATCH 0966/1176] Propagate fmf for setcc in SDAG for select folds

llvm-svn: 362448
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp   | 11 +++++++----
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp |  1 +
 llvm/test/CodeGen/X86/fmf-propagation.ll        |  8 ++++++++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 4f7d14ab67e42..ed6b95640b43d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3445,6 +3445,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                              DAG.getConstant(0, dl, Tmp1.getValueType()),
                              Tmp2, Tmp3, ISD::SETNE);
     }
+    Tmp1->setFlags(Node->getFlags());
     Results.push_back(Tmp1);
     break;
   case ISD::BR_JT: {
@@ -3528,7 +3529,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // condition code, create a new SETCC node.
       if (Tmp3.getNode())
         Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
-                           Tmp1, Tmp2, Tmp3);
+                           Tmp1, Tmp2, Tmp3, Node->getFlags());
 
       // If we expanded the SETCC by inverting the condition code, then wrap
       // the existing SETCC in a NOT to restore the intended condition.
@@ -3556,6 +3557,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                        DAG.getConstant(TrueValue, dl, VT),
                        DAG.getConstant(0, dl, VT),
                        Tmp3);
+    Tmp1->setFlags(Node->getFlags());
     Results.push_back(Tmp1);
     break;
   }
@@ -3577,7 +3579,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
              "expanded.");
       EVT CCVT =
           TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
-      SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+      SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
       Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
       break;
     }
@@ -4246,6 +4248,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
     // Perform the larger operation, then round down.
     Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+    Tmp1->setFlags(Node->getFlags());
     if (TruncOp != ISD::FP_ROUND)
       Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
     else
@@ -4276,8 +4279,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     }
     Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
-    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
-                                  Tmp1, Tmp2, Node->getOperand(2)));
+    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+                                  Tmp2, Node->getOperand(2), Node->getFlags()));
     break;
   }
   case ISD::BR_CC: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 03ec64d59332a..14fd5be23ccba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -707,6 +707,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
   auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
   assert((OpIdEntry == 0) && "Node is already promoted!");
   OpIdEntry = getTableId(Result);
+  Result->setFlags(Op->getFlags());
 
   DAG.transferDbgValues(Op, Result);
 }
diff --git a/llvm/test/CodeGen/X86/fmf-propagation.ll b/llvm/test/CodeGen/X86/fmf-propagation.ll
index 56e813f371c1b..8d16e1acda770 100644
--- a/llvm/test/CodeGen/X86/fmf-propagation.ll
+++ b/llvm/test/CodeGen/X86/fmf-propagation.ll
@@ -28,3 +28,11 @@ define float @fmf_transfer(float %x, float %y) {
   ret float %f8
 }
 
+; CHECK: Optimized type-legalized selection DAG: %bb.0 'fmf_setcc:'
+; CHECK: t13: i8 = setcc nnan ninf nsz arcp contract afn reassoc t2, ConstantFP:f32<0.000000e+00>, setlt:ch
+
+define float @fmf_setcc(float %x, float %y) {
+  %cmp = fcmp fast ult float %x, 0.0
+  %ret = select i1 %cmp, float %x, float %y
+  ret float %ret
+}

From 89f9af5487e3287c1638e53d0b5b057154eb1b32 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Mon, 3 Jun 2019 21:53:56 +0000
Subject: [PATCH 0967/1176] [SCCP] Add UnaryOperator visitor to SCCP for unary
 FNeg

Differential Revision: https://reviews.llvm.org/D62819

llvm-svn: 362449
---
 llvm/lib/Transforms/Scalar/SCCP.cpp           | 26 +++++++++++++++
 .../test/Transforms/SCCP/apfloat-basictest.ll | 33 +++++++++++++++++++
 llvm/test/Transforms/SCCP/undef-resolve.ll    |  8 +++++
 3 files changed, 67 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/apfloat-basictest.ll

diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 66885ed57e279..1d0354bd70830 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -613,6 +613,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
 
   void visitCastInst(CastInst &I);
   void visitSelectInst(SelectInst &I);
+  void visitUnaryOperator(Instruction &I);
   void visitBinaryOperator(Instruction &I);
   void visitCmpInst(CmpInst &I);
   void visitExtractValueInst(ExtractValueInst &EVI);
@@ -969,6 +970,29 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
   markOverdefined(&I);
 }
 
+// Handle Unary Operators.
+void SCCPSolver::visitUnaryOperator(Instruction &I) {
+  LatticeVal V0State = getValueState(I.getOperand(0));
+
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V0State.isConstant()) {
+    Constant *C = ConstantExpr::get(I.getOpcode(), V0State.getConstant());
+
+    // op Y -> undef.
+    if (isa<UndefValue>(C))
+      return;
+    return (void)markConstant(IV, &I, C);
+  }
+
+  // If something is undef, wait for it to resolve.
+  if (!V0State.isOverdefined())
+    return;
+
+  markOverdefined(&I);
+}
+
 // Handle Binary Operators.
 void SCCPSolver::visitBinaryOperator(Instruction &I) {
   LatticeVal V1State = getValueState(I.getOperand(0));
@@ -1484,6 +1508,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         else
           markOverdefined(&I);
         return true;
+      case Instruction::FNeg:
+        break; // fneg undef -> undef
       case Instruction::ZExt:
       case Instruction::SExt:
       case Instruction::FPToUI:
diff --git a/llvm/test/Transforms/SCCP/apfloat-basictest.ll b/llvm/test/Transforms/SCCP/apfloat-basictest.ll
new file mode 100644
index 0000000000000..2ef09668cd4d1
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/apfloat-basictest.ll
@@ -0,0 +1,33 @@
+; This is a basic sanity check for constant propagation. The fneg instruction
+; should be eliminated.
+
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+define double @test(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:
+	%Val = fneg double 42.0
+	br label %BB3
+BB2:
+	br label %BB3
+BB3:
+	%Ret = phi double [%Val, %BB1], [1.0, %BB2]
+	ret double %Ret
+; CHECK-LABEL: @test(
+; CHECK: [[PHI:%.*]] = phi double [ -4.200000e+01, %BB1 ], [ 1.000000e+00, %BB2 ]
+}
+
+define double @test1(i1 %B) {
+        br i1 %B, label %BB1, label %BB2
+BB1:
+        %Div = fdiv double 1.0, 1.0
+        %Val = fneg double %Div
+        br label %BB3
+BB2:
+        br label %BB3
+BB3:
+        %Ret = phi double [%Val, %BB1], [1.0, %BB2]
+        ret double %Ret
+; CHECK-LABEL: @test1(
+; CHECK: [[PHI:%.*]] = phi double [ -1.000000e+00, %BB1 ], [ 1.000000e+00, %BB2 ]
+}
diff --git a/llvm/test/Transforms/SCCP/undef-resolve.ll b/llvm/test/Transforms/SCCP/undef-resolve.ll
index dd7f1f3dd8801..7fdcd556dae61 100644
--- a/llvm/test/Transforms/SCCP/undef-resolve.ll
+++ b/llvm/test/Transforms/SCCP/undef-resolve.ll
@@ -180,3 +180,11 @@ entry:
 ; CHECK-LABEL: @test11(
 ; CHECK: ret i32 0
 }
+
+; Test unary ops
+define double @test12(double %x) {
+  %t = fneg double undef
+  ret double %t
+; CHECK-LABEL: @test12(
+; CHECK: double undef
+}

From 357e8a390ca926a99f38548f1d0f973c3bc9c841 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 3 Jun 2019 22:02:48 +0000
Subject: [PATCH 0968/1176] [ORC] Use uint8_t for bitfields in
 SymbolTableEntry.

This allows for better struct packing on MSVC, and as a bonus will eliminate a
warning on GCC builds.

llvm-svn: 362450
---
 llvm/include/llvm/ExecutionEngine/Orc/Core.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 6a913e85fcbea..45bcb9460f8b0 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -653,16 +653,16 @@ class JITDylib {
   public:
     SymbolTableEntry() = default;
     SymbolTableEntry(JITSymbolFlags Flags)
-        : Flags(Flags), State(SymbolState::NeverSearched),
+        : Flags(Flags), State(static_cast<uint8_t>(SymbolState::NeverSearched)),
           MaterializerAttached(false), PendingRemoval(false) {}
 
     JITTargetAddress getAddress() const { return Addr; }
     JITSymbolFlags getFlags() const { return Flags; }
-    SymbolState getState() const { return State; }
+    SymbolState getState() const { return static_cast<SymbolState>(State); }
 
     bool isInMaterializationPhase() const {
-      return State == SymbolState::Materializing ||
-             State == SymbolState::Resolved;
+      return getState() == SymbolState::Materializing ||
+             getState() == SymbolState::Resolved;
     }
 
     bool hasMaterializerAttached() const { return MaterializerAttached; }
@@ -670,7 +670,11 @@ class JITDylib {
 
     void setAddress(JITTargetAddress Addr) { this->Addr = Addr; }
     void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
-    void setState(SymbolState State) { this->State = State; }
+    void setState(SymbolState State) {
+      assert(static_cast<uint8_t>(State) < (1 << 6) &&
+             "State does not fit in bitfield");
+      this->State = static_cast<uint8_t>(State);
+    }
 
     void setMaterializerAttached(bool MaterializerAttached) {
       this->MaterializerAttached = MaterializerAttached;
@@ -687,9 +691,9 @@ class JITDylib {
   private:
     JITTargetAddress Addr = 0;
     JITSymbolFlags Flags;
-    SymbolState State : 6;
-    bool MaterializerAttached : 1;
-    bool PendingRemoval : 1;
+    uint8_t State : 6;
+    uint8_t MaterializerAttached : 1;
+    uint8_t PendingRemoval : 1;
   };
 
   using SymbolTable = DenseMap<SymbolStringPtr, SymbolTableEntry>;

From 27a546610c41597dc81e5388e88047ec98d1b437 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 22:11:30 +0000
Subject: [PATCH 0969/1176] foo

llvm-svn: 362451
---
 llvm/test/CodeGen/X86/bmi.ll | 74 ++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index ab159c9506600..c2d13c2971062 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1149,3 +1149,77 @@ define i64 @blsr64_branch(i64 %x) {
 }
 
 declare void @bar()
+
+define void @pr42118_i32(i32 %x) {
+; X86-LABEL: pr42118_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jne .LBB48_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    jmp bar # TAILCALL
+; X86-NEXT:  .LBB48_1:
+; X86-NEXT:    retl
+;
+; X64-LABEL: pr42118_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    blsrl %edi, %eax
+; X64-NEXT:    jne .LBB48_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    jmp bar # TAILCALL
+; X64-NEXT:  .LBB48_1:
+; X64-NEXT:    retq
+  %tmp = sub i32 0, %x
+  %tmp1 = and i32 %tmp, %x
+  %cmp = icmp eq i32 %tmp1, %x
+  br i1 %cmp, label %1, label %2
+
+  tail call void @bar()
+  br label %2
+
+  ret void
+}
+
+define void @pr42118_i64(i64 %x) {
+; X86-LABEL: pr42118_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    addl $-1, %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    jne .LBB49_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    jmp bar # TAILCALL
+; X86-NEXT:  .LBB49_1:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: pr42118_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    blsrq %rdi, %rax
+; X64-NEXT:    jne .LBB49_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    jmp bar # TAILCALL
+; X64-NEXT:  .LBB49_1:
+; X64-NEXT:    retq
+  %tmp = sub i64 0, %x
+  %tmp1 = and i64 %tmp, %x
+  %cmp = icmp eq i64 %tmp1, %x
+  br i1 %cmp, label %1, label %2
+
+  tail call void @bar()
+  br label %2
+
+  ret void
+}

From 17728e7c1516674479cc78871957f7d8682166aa Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 22:11:40 +0000
Subject: [PATCH 0970/1176] [X86] Add test cases for 32 and 64 bit versions of
 PR42118. NFC

llvm-svn: 362452
---
 llvm/test/CodeGen/X86/bmi.ll | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index c2d13c2971062..c48be66705f92 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1153,7 +1153,10 @@ declare void @bar()
 define void @pr42118_i32(i32 %x) {
 ; X86-LABEL: pr42118_i32:
 ; X86:       # %bb.0:
-; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andnl %eax, %ecx, %eax
 ; X86-NEXT:    jne .LBB48_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    jmp bar # TAILCALL
@@ -1162,7 +1165,9 @@ define void @pr42118_i32(i32 %x) {
 ;
 ; X64-LABEL: pr42118_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    blsrl %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andnl %edi, %eax, %eax
 ; X64-NEXT:    jne .LBB48_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    jmp bar # TAILCALL
@@ -1187,13 +1192,13 @@ define void @pr42118_i64(i64 %x) {
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    addl $-1, %edx
-; X86-NEXT:    movl %ecx, %esi
-; X86-NEXT:    adcl $-1, %esi
-; X86-NEXT:    andl %eax, %edx
-; X86-NEXT:    andl %ecx, %esi
-; X86-NEXT:    orl %edx, %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    andnl %ecx, %edx, %ecx
+; X86-NEXT:    andnl %eax, %esi, %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    jne .LBB49_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    popl %esi
@@ -1207,7 +1212,9 @@ define void @pr42118_i64(i64 %x) {
 ;
 ; X64-LABEL: pr42118_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    blsrq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    andnq %rdi, %rax, %rax
 ; X64-NEXT:    jne .LBB49_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    jmp bar # TAILCALL

From 099f4a9fa828bd982b1da096260a308ba21c3d6f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 22:14:54 +0000
Subject: [PATCH 0971/1176] Revert r362451 "foo" and r362452 "[X86] Add test
 cases for 32 and 64 bit versions of PR42118. NFC"

I failed to squash these properly

llvm-svn: 362453
---
 llvm/test/CodeGen/X86/bmi.ll | 81 ------------------------------------
 1 file changed, 81 deletions(-)

diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index c48be66705f92..ab159c9506600 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1149,84 +1149,3 @@ define i64 @blsr64_branch(i64 %x) {
 }
 
 declare void @bar()
-
-define void @pr42118_i32(i32 %x) {
-; X86-LABEL: pr42118_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    andnl %eax, %ecx, %eax
-; X86-NEXT:    jne .LBB48_1
-; X86-NEXT:  # %bb.2:
-; X86-NEXT:    jmp bar # TAILCALL
-; X86-NEXT:  .LBB48_1:
-; X86-NEXT:    retl
-;
-; X64-LABEL: pr42118_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    andnl %edi, %eax, %eax
-; X64-NEXT:    jne .LBB48_1
-; X64-NEXT:  # %bb.2:
-; X64-NEXT:    jmp bar # TAILCALL
-; X64-NEXT:  .LBB48_1:
-; X64-NEXT:    retq
-  %tmp = sub i32 0, %x
-  %tmp1 = and i32 %tmp, %x
-  %cmp = icmp eq i32 %tmp1, %x
-  br i1 %cmp, label %1, label %2
-
-  tail call void @bar()
-  br label %2
-
-  ret void
-}
-
-define void @pr42118_i64(i64 %x) {
-; X86-LABEL: pr42118_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %esi, -8
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    sbbl %ecx, %edx
-; X86-NEXT:    andnl %ecx, %edx, %ecx
-; X86-NEXT:    andnl %eax, %esi, %eax
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    jne .LBB49_1
-; X86-NEXT:  # %bb.2:
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    jmp bar # TAILCALL
-; X86-NEXT:  .LBB49_1:
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-;
-; X64-LABEL: pr42118_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    negq %rax
-; X64-NEXT:    andnq %rdi, %rax, %rax
-; X64-NEXT:    jne .LBB49_1
-; X64-NEXT:  # %bb.2:
-; X64-NEXT:    jmp bar # TAILCALL
-; X64-NEXT:  .LBB49_1:
-; X64-NEXT:    retq
-  %tmp = sub i64 0, %x
-  %tmp1 = and i64 %tmp, %x
-  %cmp = icmp eq i64 %tmp1, %x
-  br i1 %cmp, label %1, label %2
-
-  tail call void @bar()
-  br label %2
-
-  ret void
-}

From 552fda839a313af1f73efa13b6abee624e944f77 Mon Sep 17 00:00:00 2001
From: Jason Liu <jasonliu.development@gmail.com>
Date: Mon, 3 Jun 2019 22:22:03 +0000
Subject: [PATCH 0972/1176] Fix DWARF DebugInfo unit test errors when
 cross-compiling

Summary:
When building with a Default Target set we can experience issues
in the DWARF DebugInfo unit tests because:

They assume we can generate object files for the host platform.
Some tests assume the endianess of the target we are generating
DWARF for and the host match.

This patch correct these issues by ensuring the tests which
generate objects in memory are run with respect to
LVM_DEFAULT_TARGET_TRIPLE and it's endianess.

We also make sure we don't use the hosts address size for line test
and split the triple util function in DwarfUtils into a version
that takes an address size and one that doesn't.

See also for discussion:
http://lists.llvm.org/pipermail/llvm-dev/2019-March/131212.html

Patch by: daltenty

Differential Revision: https://reviews.llvm.org/D62084

llvm-svn: 362454
---
 .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp    | 24 +++++++++----------
 .../DebugInfo/DWARF/DWARFDebugLineTest.cpp    |  7 +++---
 llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp | 16 +++++++++++--
 llvm/unittests/DebugInfo/DWARF/DwarfUtils.h   |  3 ++-
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
index ef51ce0fbff9f..e2631afb007f3 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp
@@ -41,7 +41,7 @@ namespace {
 
 template <uint16_t Version, class AddrType, class RefAddrType>
 void TestAllForms() {
-  Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+  Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType));
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -455,7 +455,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version5Addr8AllForms) {
 }
 
 template <uint16_t Version, class AddrType> void TestChildren() {
-  Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+  Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType));
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -585,7 +585,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) {
 }
 
 template <uint16_t Version, class AddrType> void TestReferences() {
-  Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+  Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType));
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -835,7 +835,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8References) {
 }
 
 template <uint16_t Version, class AddrType> void TestAddresses() {
-  Triple Triple = getHostTripleForAddrSize(sizeof(AddrType));
+  Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType));
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1007,7 +1007,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Addresses) {
 }
 
 TEST(DWARFDebugInfo, TestStringOffsets) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1071,7 +1071,7 @@ TEST(DWARFDebugInfo, TestStringOffsets) {
 }
 
 TEST(DWARFDebugInfo, TestEmptyStringOffsets) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1100,7 +1100,7 @@ TEST(DWARFDebugInfo, TestEmptyStringOffsets) {
 }
 
 TEST(DWARFDebugInfo, TestRelations) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1287,7 +1287,7 @@ TEST(DWARFDebugInfo, TestDWARFDie) {
 }
 
 TEST(DWARFDebugInfo, TestChildIterators) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1401,7 +1401,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) {
 }
 
 TEST(DWARFDebugInfo, TestAttributeIterators) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1463,7 +1463,7 @@ TEST(DWARFDebugInfo, TestAttributeIterators) {
 }
 
 TEST(DWARFDebugInfo, TestFindRecurse) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1677,7 +1677,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) {
 }
 
 TEST(DWARFDebugInfo, TestFindAttrs) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
@@ -1740,7 +1740,7 @@ TEST(DWARFDebugInfo, TestFindAttrs) {
 }
 
 TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) {
-  Triple Triple = getHostTripleForAddrSize(sizeof(void *));
+  Triple Triple = getNormalizedDefaultTargetTriple();
   if (!isConfigurationSupported(Triple))
     return;
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index d016a946b1af9..fef8dc50fb467 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -37,7 +37,7 @@ struct CommonFixture {
   }
 
   bool setupGenerator(uint16_t Version = 4) {
-    Triple T = getHostTripleForAddrSize(8);
+    Triple T = getDefaultTargetTripleForAddrSize(8);
     if (!isConfigurationSupported(T))
       return false;
     auto ExpectedGenerator = Generator::create(T, Version);
@@ -50,8 +50,9 @@ struct CommonFixture {
     Context = createContext();
     assert(Context != nullptr && "test state is not valid");
     const DWARFObject &Obj = Context->getDWARFObj();
-    LineData = DWARFDataExtractor(Obj, Obj.getLineSection(),
-                                  sys::IsLittleEndianHost, 8);
+    LineData = DWARFDataExtractor(
+        Obj, Obj.getLineSection(),
+        getDefaultTargetTripleForAddrSize(8).isLittleEndian(), 8);
   }
 
   std::unique_ptr<DWARFContext> createContext() {
diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp
index fe0cebd75b027..249cfb42271ae 100644
--- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp
@@ -9,6 +9,7 @@
 #include "DwarfUtils.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 
@@ -25,9 +26,20 @@ static void initLLVMIfNeeded() {
   }
 }
 
-Triple llvm::dwarf::utils::getHostTripleForAddrSize(uint8_t AddrSize) {
-  Triple T(Triple::normalize(LLVM_HOST_TRIPLE));
+Triple llvm::dwarf::utils::getNormalizedDefaultTargetTriple() {
+  Triple T(Triple::normalize(sys::getDefaultTargetTriple()));
 
+  return T;
+}
+
+Triple llvm::dwarf::utils::getDefaultTargetTripleForAddrSize(uint8_t AddrSize) {
+  Triple T = getNormalizedDefaultTargetTriple();
+
+  assert((AddrSize == 4 || AddrSize == 8) &&
+         "Only 32-bit/64-bit address size variants are supported");
+
+  // If a 32-bit/64-bit address size was specified, try to convert the triple
+  // if it is for the wrong variant.
   if (AddrSize == 8 && T.isArch32Bit())
     return T.get64BitArchVariant();
   if (AddrSize == 4 && T.isArch64Bit())
diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h
index e96b2b9447964..036071e0b5670 100644
--- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h
+++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h
@@ -18,7 +18,8 @@ class Triple;
 namespace dwarf {
 namespace utils {
 
-Triple getHostTripleForAddrSize(uint8_t AddrSize);
+Triple getDefaultTargetTripleForAddrSize(uint8_t AddrSize);
+Triple getNormalizedDefaultTargetTriple();
 bool isConfigurationSupported(Triple &T);
 
 } // end namespace utils

From 6dc8ce323e24bed62cf9938b60f7323464c0da38 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 3 Jun 2019 22:30:18 +0000
Subject: [PATCH 0973/1176] [NFC][Codegen] Add tests for hoisting and-by-const
 from "logical shift", when then eq-comparing with 0

This was initially reported as: https://reviews.llvm.org/D62818

https://rise4fun.com/Alive/oPH

llvm-svn: 362455
---
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll |  340 +++++
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll |  335 +++++
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 1222 ++++++++++++++++
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 1234 +++++++++++++++++
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll |  926 +++++++++++++
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll |  874 ++++++++++++
 6 files changed, 4931 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
 create mode 100644 llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
 create mode 100644 llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
 create mode 100644 llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
 create mode 100644 llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
 create mode 100644 llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll

diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..0bc2d8b37766e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -0,0 +1,340 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,AARCH64
+
+; We are looking for the following pattern here:
+;   (X & (C l>> Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X << Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #24
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #32768
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #4080
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #16776960
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808
+; CHECK-NEXT:    lsr x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    lsr x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281474976645120
+; CHECK-NEXT:    lsr x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_splat_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4s, v1.4s
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI13_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    neg v1.4s, v1.4s
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = lshr <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4s, v1.4s
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4s, v1.4s
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg v1.4s, v1.4s
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #24
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    sxtb w8, w8
+; CHECK-NEXT:    cmp w8, #0 // =0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsr w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..c76a2e43daeca
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -0,0 +1,335 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s --check-prefixes=CHECK,AARCH64
+
+; We are looking for the following pattern here:
+;   (X & (C << Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X l>> Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #24
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-32768
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #4080
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xffff
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-2147483648
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #16776960
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    tst w8, w0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_signbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-9223372036854775808
+; CHECK-NEXT:    lsl x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_lowestbit_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    lsl x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: scalar_i64_bitsinmiddle_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #281474976645120
+; CHECK-NEXT:    lsl x8, x8, x1
+; CHECK-NEXT:    tst x8, x0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_splat_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI13_0
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = shl <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    ushl v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    tst w8, #0xff
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #24
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    sxtb w8, w8
+; CHECK-NEXT:    cmp w8, #0 // =0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    lsl w8, w8, w1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..ff621f6c708c0
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -0,0 +1,1222 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv6 < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM6
+; RUN: llc -mtriple=armv7 < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM78,ARM7
+; RUN: llc -mtriple=armv8a < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM78,ARM8
+; RUN: llc -mtriple=thumbv6 < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB6
+; RUN: llc -mtriple=thumbv7 < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB78,THUMB7
+; RUN: llc -mtriple=thumbv8-eabi < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB78,THUMB8
+
+; We are looking for the following pattern here:
+;   (X & (C l>> Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X << Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_signbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #128
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #128
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #128
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_bitsinmiddle_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #24
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #24
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #24
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; ARM-LABEL: scalar_i16_signbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxth r1, r1
+; ARM-NEXT:    mov r2, #32768
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r2, #15
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    mov.w r2, #32768
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; ARM-LABEL: scalar_i16_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxth r1, r1
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; ARM-LABEL: scalar_i16_bitsinmiddle_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxth r1, r1
+; ARM-NEXT:    mov r2, #4080
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    movs r2, #255
+; THUMB6-NEXT:    lsls r2, r2, #4
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    mov.w r2, #4080
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; ARM-LABEL: scalar_i32_signbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    mov r2, #-2147483648
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r2, #31
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i32_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov.w r2, #-2147483648
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; ARM-LABEL: scalar_i32_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i32_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_bitsinmiddle_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    mov r2, #65280
+; ARM6-NEXT:    orr r2, r2, #16711680
+; ARM6-NEXT:    and r0, r0, r2, lsr r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i32_bitsinmiddle_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r2, #65280
+; ARM78-NEXT:    movt r2, #255
+; ARM78-NEXT:    and r0, r0, r2, lsr r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    ldr r2, .LCPI8_0
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI8_0:
+; THUMB6-NEXT:    .long 16776960 @ 0xffff00
+;
+; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movw r2, #65280
+; THUMB78-NEXT:    movt r2, #255
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_signbit_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    mov r12, #-2147483648
+; ARM6-NEXT:    sub lr, r2, #32
+; ARM6-NEXT:    lsr r3, r12, r2
+; ARM6-NEXT:    rsb r2, r2, #32
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    lsl r2, r12, r2
+; ARM6-NEXT:    movge r3, #0
+; ARM6-NEXT:    lsrge r2, r12, lr
+; ARM6-NEXT:    and r1, r3, r1
+; ARM6-NEXT:    and r0, r2, r0
+; ARM6-NEXT:    orr r0, r0, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: scalar_i64_signbit_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    push {r11, lr}
+; ARM78-NEXT:    mov r12, #-2147483648
+; ARM78-NEXT:    sub lr, r2, #32
+; ARM78-NEXT:    lsr r3, r12, r2
+; ARM78-NEXT:    rsb r2, r2, #32
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    lsl r2, r12, r2
+; ARM78-NEXT:    movwge r3, #0
+; ARM78-NEXT:    lsrge r2, r12, lr
+; ARM78-NEXT:    and r1, r3, r1
+; ARM78-NEXT:    and r0, r2, r0
+; ARM78-NEXT:    orr r0, r0, r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    pop {r11, pc}
+;
+; THUMB6-LABEL: scalar_i64_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    lsls r1, r0, #31
+; THUMB6-NEXT:    movs r0, #0
+; THUMB6-NEXT:    bl __lshrdi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB7-LABEL: scalar_i64_signbit_eq:
+; THUMB7:       @ %bb.0:
+; THUMB7-NEXT:    push {r7, lr}
+; THUMB7-NEXT:    rsb.w r3, r2, #32
+; THUMB7-NEXT:    mov.w r12, #-2147483648
+; THUMB7-NEXT:    sub.w lr, r2, #32
+; THUMB7-NEXT:    lsr.w r2, r12, r2
+; THUMB7-NEXT:    lsl.w r3, r12, r3
+; THUMB7-NEXT:    cmp.w lr, #0
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    lsrge.w r3, r12, lr
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    movge r2, #0
+; THUMB7-NEXT:    ands r0, r3
+; THUMB7-NEXT:    ands r1, r2
+; THUMB7-NEXT:    orrs r0, r1
+; THUMB7-NEXT:    clz r0, r0
+; THUMB7-NEXT:    lsrs r0, r0, #5
+; THUMB7-NEXT:    pop {r7, pc}
+;
+; THUMB8-LABEL: scalar_i64_signbit_eq:
+; THUMB8:       @ %bb.0:
+; THUMB8-NEXT:    .save {r7, lr}
+; THUMB8-NEXT:    push {r7, lr}
+; THUMB8-NEXT:    rsb.w r3, r2, #32
+; THUMB8-NEXT:    sub.w lr, r2, #32
+; THUMB8-NEXT:    mov.w r12, #-2147483648
+; THUMB8-NEXT:    cmp.w lr, #0
+; THUMB8-NEXT:    lsl.w r3, r12, r3
+; THUMB8-NEXT:    lsr.w r2, r12, r2
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    lsrge.w r3, r12, lr
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    movge r2, #0
+; THUMB8-NEXT:    ands r0, r3
+; THUMB8-NEXT:    ands r1, r2
+; THUMB8-NEXT:    orrs r0, r1
+; THUMB8-NEXT:    clz r0, r0
+; THUMB8-NEXT:    lsrs r0, r0, #5
+; THUMB8-NEXT:    pop {r7, pc}
+  %t0 = lshr i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_lowestbit_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    mov r1, #1
+; ARM6-NEXT:    lsr r1, r1, r2
+; ARM6-NEXT:    sub r2, r2, #32
+; ARM6-NEXT:    cmp r2, #0
+; ARM6-NEXT:    movge r1, #0
+; ARM6-NEXT:    and r0, r1, r0
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i64_lowestbit_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r1, #1
+; ARM78-NEXT:    lsr r1, r1, r2
+; ARM78-NEXT:    sub r2, r2, #32
+; ARM78-NEXT:    cmp r2, #0
+; ARM78-NEXT:    movwge r1, #0
+; ARM78-NEXT:    and r0, r1, r0
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i64_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    movs r1, #0
+; THUMB6-NEXT:    bl __lshrdi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: scalar_i64_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movs r1, #1
+; THUMB78-NEXT:    lsrs r1, r2
+; THUMB78-NEXT:    subs r2, #32
+; THUMB78-NEXT:    cmp r2, #0
+; THUMB78-NEXT:    it ge
+; THUMB78-NEXT:    movge r1, #0
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_bitsinmiddle_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    mov r12, #255
+; ARM6-NEXT:    sub lr, r2, #32
+; ARM6-NEXT:    orr r12, r12, #65280
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    lsr r3, r12, r2
+; ARM6-NEXT:    movge r3, #0
+; ARM6-NEXT:    and r1, r3, r1
+; ARM6-NEXT:    mov r3, #16711680
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    orr r3, r3, #-16777216
+; ARM6-NEXT:    lsr r3, r3, r2
+; ARM6-NEXT:    rsb r2, r2, #32
+; ARM6-NEXT:    orr r2, r3, r12, lsl r2
+; ARM6-NEXT:    lsrge r2, r12, lr
+; ARM6-NEXT:    and r0, r2, r0
+; ARM6-NEXT:    orr r0, r0, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: scalar_i64_bitsinmiddle_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    push {r11, lr}
+; ARM78-NEXT:    movw r12, #65535
+; ARM78-NEXT:    sub lr, r2, #32
+; ARM78-NEXT:    lsr r3, r12, r2
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    movwge r3, #0
+; ARM78-NEXT:    and r1, r3, r1
+; ARM78-NEXT:    movw r3, #0
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    movt r3, #65535
+; ARM78-NEXT:    lsr r3, r3, r2
+; ARM78-NEXT:    rsb r2, r2, #32
+; ARM78-NEXT:    orr r2, r3, r12, lsl r2
+; ARM78-NEXT:    lsrge r2, r12, lr
+; ARM78-NEXT:    and r0, r2, r0
+; ARM78-NEXT:    orr r0, r0, r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    pop {r11, pc}
+;
+; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    ldr r0, .LCPI11_0
+; THUMB6-NEXT:    ldr r1, .LCPI11_1
+; THUMB6-NEXT:    bl __lshrdi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI11_0:
+; THUMB6-NEXT:    .long 4294901760 @ 0xffff0000
+; THUMB6-NEXT:  .LCPI11_1:
+; THUMB6-NEXT:    .long 65535 @ 0xffff
+;
+; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB7:       @ %bb.0:
+; THUMB7-NEXT:    push {r7, lr}
+; THUMB7-NEXT:    movs r3, #0
+; THUMB7-NEXT:    movw lr, #65535
+; THUMB7-NEXT:    movt r3, #65535
+; THUMB7-NEXT:    lsr.w r12, r3, r2
+; THUMB7-NEXT:    rsb.w r3, r2, #32
+; THUMB7-NEXT:    lsl.w r3, lr, r3
+; THUMB7-NEXT:    orr.w r3, r3, r12
+; THUMB7-NEXT:    sub.w r12, r2, #32
+; THUMB7-NEXT:    lsr.w r2, lr, r2
+; THUMB7-NEXT:    cmp.w r12, #0
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    lsrge.w r3, lr, r12
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    movge r2, #0
+; THUMB7-NEXT:    ands r0, r3
+; THUMB7-NEXT:    ands r1, r2
+; THUMB7-NEXT:    orrs r0, r1
+; THUMB7-NEXT:    clz r0, r0
+; THUMB7-NEXT:    lsrs r0, r0, #5
+; THUMB7-NEXT:    pop {r7, pc}
+;
+; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB8:       @ %bb.0:
+; THUMB8-NEXT:    .save {r7, lr}
+; THUMB8-NEXT:    push {r7, lr}
+; THUMB8-NEXT:    movs r3, #0
+; THUMB8-NEXT:    movw lr, #65535
+; THUMB8-NEXT:    movt r3, #65535
+; THUMB8-NEXT:    lsr.w r12, r3, r2
+; THUMB8-NEXT:    rsb.w r3, r2, #32
+; THUMB8-NEXT:    lsl.w r3, lr, r3
+; THUMB8-NEXT:    orr.w r3, r3, r12
+; THUMB8-NEXT:    sub.w r12, r2, #32
+; THUMB8-NEXT:    cmp.w r12, #0
+; THUMB8-NEXT:    lsr.w r2, lr, r2
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    lsrge.w r3, lr, r12
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    movge r2, #0
+; THUMB8-NEXT:    ands r0, r3
+; THUMB8-NEXT:    ands r1, r2
+; THUMB8-NEXT:    orrs r0, r1
+; THUMB8-NEXT:    clz r0, r0
+; THUMB8-NEXT:    lsrs r0, r0, #5
+; THUMB8-NEXT:    pop {r7, pc}
+  %t0 = lshr i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_splat_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    and r0, r0, lr, lsr r12
+; ARM6-NEXT:    ldr r12, [sp, #12]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r1, r1, lr, lsr r12
+; ARM6-NEXT:    ldr r12, [sp, #16]
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    and r2, r2, lr, lsr r12
+; ARM6-NEXT:    ldr r12, [sp, #20]
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    and r3, r3, lr, lsr r12
+; ARM6-NEXT:    lsr r2, r2, #5
+; ARM6-NEXT:    clz r3, r3
+; ARM6-NEXT:    lsr r3, r3, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_splat_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    vmov.i32 q9, #0x1
+; ARM78-NEXT:    vneg.s32 q8, q8
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_splat_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r6, lr}
+; THUMB6-NEXT:    ldr r5, [sp, #16]
+; THUMB6-NEXT:    movs r4, #1
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsrs r6, r5
+; THUMB6-NEXT:    ands r6, r0
+; THUMB6-NEXT:    rsbs r0, r6, #0
+; THUMB6-NEXT:    adcs r0, r6
+; THUMB6-NEXT:    ldr r5, [sp, #20]
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsrs r6, r5
+; THUMB6-NEXT:    ands r6, r1
+; THUMB6-NEXT:    rsbs r1, r6, #0
+; THUMB6-NEXT:    adcs r1, r6
+; THUMB6-NEXT:    ldr r5, [sp, #24]
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsrs r6, r5
+; THUMB6-NEXT:    ands r6, r2
+; THUMB6-NEXT:    rsbs r2, r6, #0
+; THUMB6-NEXT:    adcs r2, r6
+; THUMB6-NEXT:    ldr r5, [sp, #28]
+; THUMB6-NEXT:    lsrs r4, r5
+; THUMB6-NEXT:    ands r4, r3
+; THUMB6-NEXT:    rsbs r3, r4, #0
+; THUMB6-NEXT:    adcs r3, r4
+; THUMB6-NEXT:    pop {r4, r5, r6, pc}
+;
+; THUMB78-LABEL: vec_4xi32_splat_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    vmov.i32 q9, #0x1
+; THUMB78-NEXT:    vneg.s32 q8, q8
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r12, [sp, #4]
+; ARM6-NEXT:    mov r0, #1
+; ARM6-NEXT:    and r0, r1, r0, lsr r12
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r1, r0, #5
+; ARM6-NEXT:    mov r0, #65280
+; ARM6-NEXT:    orr r0, r0, #16711680
+; ARM6-NEXT:    and r0, r2, r0, lsr r12
+; ARM6-NEXT:    ldr r12, [sp, #12]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r2, r0, #5
+; ARM6-NEXT:    mov r0, #-2147483648
+; ARM6-NEXT:    and r0, r3, r0, lsr r12
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r3, r0, #5
+; ARM6-NEXT:    mov r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    adr r12, .LCPI13_0
+; ARM78-NEXT:    vneg.s32 q8, q8
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12:128]
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+; ARM78-NEXT:    .p2align 4
+; ARM78-NEXT:  @ %bb.1:
+; ARM78-NEXT:  .LCPI13_0:
+; ARM78-NEXT:    .long 0 @ 0x0
+; ARM78-NEXT:    .long 1 @ 0x1
+; ARM78-NEXT:    .long 16776960 @ 0xffff00
+; ARM78-NEXT:    .long 2147483648 @ 0x80000000
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #24]
+; THUMB6-NEXT:    ldr r5, .LCPI13_0
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r2
+; THUMB6-NEXT:    rsbs r2, r5, #0
+; THUMB6-NEXT:    adcs r2, r5
+; THUMB6-NEXT:    lsls r4, r0, #31
+; THUMB6-NEXT:    ldr r5, [sp, #28]
+; THUMB6-NEXT:    lsrs r4, r5
+; THUMB6-NEXT:    ands r4, r3
+; THUMB6-NEXT:    rsbs r3, r4, #0
+; THUMB6-NEXT:    adcs r3, r4
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI13_0:
+; THUMB6-NEXT:    .long 16776960 @ 0xffff00
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    adr.w r12, .LCPI13_0
+; THUMB78-NEXT:    vneg.s32 q8, q8
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12:128]
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+; THUMB78-NEXT:    .p2align 4
+; THUMB78-NEXT:  @ %bb.1:
+; THUMB78-NEXT:  .LCPI13_0:
+; THUMB78-NEXT:    .long 0 @ 0x0
+; THUMB78-NEXT:    .long 1 @ 0x1
+; THUMB78-NEXT:    .long 16776960 @ 0xffff00
+; THUMB78-NEXT:    .long 2147483648 @ 0x80000000
+  %t0 = lshr <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsr r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsr r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsr r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    mov r2, #1
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    vmov.i32 q9, #0x1
+; ARM78-NEXT:    vneg.s32 q8, q8
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r3
+; THUMB6-NEXT:    rsbs r3, r5, #0
+; THUMB6-NEXT:    adcs r3, r5
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    vmov.i32 q9, #0x1
+; THUMB78-NEXT:    vneg.s32 q8, q8
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsr r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsr r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsr r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    vmov.i32 q9, #0x1
+; ARM78-NEXT:    vneg.s32 q8, q8
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    lsrs r2, r4
+; THUMB6-NEXT:    ands r2, r3
+; THUMB6-NEXT:    rsbs r3, r2, #0
+; THUMB6-NEXT:    adcs r3, r2
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    vmov.i32 q9, #0x1
+; THUMB78-NEXT:    vneg.s32 q8, q8
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsr r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsr r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsr r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    vmov.i32 q9, #0x1
+; ARM78-NEXT:    vneg.s32 q8, q8
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsrs r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    lsrs r2, r4
+; THUMB6-NEXT:    ands r2, r3
+; THUMB6-NEXT:    rsbs r3, r2, #0
+; THUMB6-NEXT:    adcs r3, r2
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    vmov.i32 q9, #0x1
+; THUMB78-NEXT:    vneg.s32 q8, q8
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; ARM6-LABEL: scalar_i8_signbit_ne:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    uxtb r1, r1
+; ARM6-NEXT:    mov r2, #128
+; ARM6-NEXT:    and r0, r0, r2, lsr r1
+; ARM6-NEXT:    uxtb r0, r0
+; ARM6-NEXT:    cmp r0, #0
+; ARM6-NEXT:    movne r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i8_signbit_ne:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    uxtb r1, r1
+; ARM78-NEXT:    mov r2, #128
+; ARM78-NEXT:    and r0, r0, r2, lsr r1
+; ARM78-NEXT:    uxtb r0, r0
+; ARM78-NEXT:    cmp r0, #0
+; ARM78-NEXT:    movwne r0, #1
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_ne:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #128
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r0, r2
+; THUMB6-NEXT:    subs r1, r0, #1
+; THUMB6-NEXT:    sbcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_ne:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #128
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    cmp r0, #0
+; THUMB78-NEXT:    it ne
+; THUMB78-NEXT:    movne r0, #1
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    uxtb r1, r1
+; ARM6-NEXT:    mov r2, #24
+; ARM6-NEXT:    and r0, r0, r2, lsr r1
+; ARM6-NEXT:    sxtb r1, r0
+; ARM6-NEXT:    mov r0, #0
+; ARM6-NEXT:    cmp r1, #0
+; ARM6-NEXT:    movlt r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    uxtb r1, r1
+; ARM78-NEXT:    mov r2, #24
+; ARM78-NEXT:    and r0, r0, r2, lsr r1
+; ARM78-NEXT:    sxtb r1, r0
+; ARM78-NEXT:    mov r0, #0
+; ARM78-NEXT:    cmp r1, #0
+; ARM78-NEXT:    movwlt r0, #1
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #24
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    sxtb r0, r2
+; THUMB6-NEXT:    cmp r0, #0
+; THUMB6-NEXT:    blt .LBB18_2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:    movs r0, #0
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:  .LBB18_2:
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #24
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    sxtb r1, r0
+; THUMB78-NEXT:    movs r0, #0
+; THUMB78-NEXT:    cmp r1, #0
+; THUMB78-NEXT:    it lt
+; THUMB78-NEXT:    movlt r0, #1
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #128
+; ARM-NEXT:    and r0, r0, r2, lsr r1
+; ARM-NEXT:    mvn r1, #0
+; ARM-NEXT:    uxtab r0, r1, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #128
+; THUMB6-NEXT:    lsrs r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r0, r2
+; THUMB6-NEXT:    subs r1, r0, #1
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #128
+; THUMB78-NEXT:    lsr.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    mov.w r1, #-1
+; THUMB78-NEXT:    uxtab r0, r1, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..104a43979a0ca
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -0,0 +1,1234 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv6 < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM6
+; RUN: llc -mtriple=armv7 < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM78,ARM7
+; RUN: llc -mtriple=armv8a < %s | FileCheck %s --check-prefixes=CHECK,ARM,ARM78,ARM8
+; RUN: llc -mtriple=thumbv6 < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB6
+; RUN: llc -mtriple=thumbv7 < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB78,THUMB7
+; RUN: llc -mtriple=thumbv8-eabi < %s | FileCheck %s --check-prefixes=CHECK,THUMB,THUMB78,THUMB8
+
+; We are looking for the following pattern here:
+;   (X & (C << Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X l>> Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_signbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mvn r2, #127
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #127
+; THUMB6-NEXT:    mvns r2, r2
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    mvn r2, #127
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_bitsinmiddle_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mov r2, #24
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    uxtb r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #24
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #24
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; ARM6-LABEL: scalar_i16_signbit_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r2, .LCPI3_0
+; ARM6-NEXT:    uxth r1, r1
+; ARM6-NEXT:    and r0, r0, r2, lsl r1
+; ARM6-NEXT:    uxth r0, r0
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+; ARM6-NEXT:    .p2align 2
+; ARM6-NEXT:  @ %bb.1:
+; ARM6-NEXT:  .LCPI3_0:
+; ARM6-NEXT:    .long 4294934528 @ 0xffff8000
+;
+; ARM78-LABEL: scalar_i16_signbit_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r2, #32768
+; ARM78-NEXT:    uxth r1, r1
+; ARM78-NEXT:    movt r2, #65535
+; ARM78-NEXT:    and r0, r0, r2, lsl r1
+; ARM78-NEXT:    uxth r0, r0
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    ldr r2, .LCPI3_0
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI3_0:
+; THUMB6-NEXT:    .long 4294934528 @ 0xffff8000
+;
+; THUMB78-LABEL: scalar_i16_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movw r2, #32768
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    movt r2, #65535
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; ARM-LABEL: scalar_i16_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxth r1, r1
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; ARM-LABEL: scalar_i16_bitsinmiddle_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxth r1, r1
+; ARM-NEXT:    mov r2, #4080
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxth r1, r1
+; THUMB6-NEXT:    movs r2, #255
+; THUMB6-NEXT:    lsls r2, r2, #4
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxth r1, r2
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxth r1, r1
+; THUMB78-NEXT:    mov.w r2, #4080
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxth r0, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; ARM-LABEL: scalar_i32_signbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    mov r2, #-2147483648
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r2, #31
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i32_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov.w r2, #-2147483648
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; ARM-LABEL: scalar_i32_lowestbit_eq:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    mov r2, #1
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i32_lowestbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movs r2, #1
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_bitsinmiddle_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    mov r2, #65280
+; ARM6-NEXT:    orr r2, r2, #16711680
+; ARM6-NEXT:    and r0, r0, r2, lsl r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i32_bitsinmiddle_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r2, #65280
+; ARM78-NEXT:    movt r2, #255
+; ARM78-NEXT:    and r0, r0, r2, lsl r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    ldr r2, .LCPI8_0
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI8_0:
+; THUMB6-NEXT:    .long 16776960 @ 0xffff00
+;
+; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movw r2, #65280
+; THUMB78-NEXT:    movt r2, #255
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_signbit_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    mov r0, #-2147483648
+; ARM6-NEXT:    lsl r0, r0, r2
+; ARM6-NEXT:    sub r2, r2, #32
+; ARM6-NEXT:    cmp r2, #0
+; ARM6-NEXT:    movge r0, #0
+; ARM6-NEXT:    and r0, r0, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i64_signbit_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r0, #-2147483648
+; ARM78-NEXT:    lsl r0, r0, r2
+; ARM78-NEXT:    sub r2, r2, #32
+; ARM78-NEXT:    cmp r2, #0
+; ARM78-NEXT:    movwge r0, #0
+; ARM78-NEXT:    and r0, r0, r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i64_signbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    lsls r1, r0, #31
+; THUMB6-NEXT:    movs r0, #0
+; THUMB6-NEXT:    bl __ashldi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: scalar_i64_signbit_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov.w r0, #-2147483648
+; THUMB78-NEXT:    lsls r0, r2
+; THUMB78-NEXT:    subs r2, #32
+; THUMB78-NEXT:    cmp r2, #0
+; THUMB78-NEXT:    it ge
+; THUMB78-NEXT:    movge r0, #0
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_lowestbit_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    mov r12, #1
+; ARM6-NEXT:    sub lr, r2, #32
+; ARM6-NEXT:    lsl r3, r12, r2
+; ARM6-NEXT:    rsb r2, r2, #32
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    lsr r2, r12, r2
+; ARM6-NEXT:    movge r3, #0
+; ARM6-NEXT:    lslge r2, r12, lr
+; ARM6-NEXT:    and r0, r3, r0
+; ARM6-NEXT:    and r1, r2, r1
+; ARM6-NEXT:    orr r0, r0, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: scalar_i64_lowestbit_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    push {r11, lr}
+; ARM78-NEXT:    mov r12, #1
+; ARM78-NEXT:    sub lr, r2, #32
+; ARM78-NEXT:    lsl r3, r12, r2
+; ARM78-NEXT:    rsb r2, r2, #32
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    lsr r2, r12, r2
+; ARM78-NEXT:    movwge r3, #0
+; ARM78-NEXT:    lslge r2, r12, lr
+; ARM78-NEXT:    and r0, r3, r0
+; ARM78-NEXT:    and r1, r2, r1
+; ARM78-NEXT:    orr r0, r0, r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    pop {r11, pc}
+;
+; THUMB6-LABEL: scalar_i64_lowestbit_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    movs r1, #0
+; THUMB6-NEXT:    bl __ashldi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB7-LABEL: scalar_i64_lowestbit_eq:
+; THUMB7:       @ %bb.0:
+; THUMB7-NEXT:    push {r7, lr}
+; THUMB7-NEXT:    rsb.w r3, r2, #32
+; THUMB7-NEXT:    mov.w r12, #1
+; THUMB7-NEXT:    sub.w lr, r2, #32
+; THUMB7-NEXT:    lsl.w r2, r12, r2
+; THUMB7-NEXT:    lsr.w r3, r12, r3
+; THUMB7-NEXT:    cmp.w lr, #0
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    lslge.w r3, r12, lr
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    movge r2, #0
+; THUMB7-NEXT:    ands r1, r3
+; THUMB7-NEXT:    ands r0, r2
+; THUMB7-NEXT:    orrs r0, r1
+; THUMB7-NEXT:    clz r0, r0
+; THUMB7-NEXT:    lsrs r0, r0, #5
+; THUMB7-NEXT:    pop {r7, pc}
+;
+; THUMB8-LABEL: scalar_i64_lowestbit_eq:
+; THUMB8:       @ %bb.0:
+; THUMB8-NEXT:    .save {r7, lr}
+; THUMB8-NEXT:    push {r7, lr}
+; THUMB8-NEXT:    rsb.w r3, r2, #32
+; THUMB8-NEXT:    sub.w lr, r2, #32
+; THUMB8-NEXT:    mov.w r12, #1
+; THUMB8-NEXT:    cmp.w lr, #0
+; THUMB8-NEXT:    lsr.w r3, r12, r3
+; THUMB8-NEXT:    lsl.w r2, r12, r2
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    lslge.w r3, r12, lr
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    movge r2, #0
+; THUMB8-NEXT:    ands r1, r3
+; THUMB8-NEXT:    ands r0, r2
+; THUMB8-NEXT:    orrs r0, r1
+; THUMB8-NEXT:    clz r0, r0
+; THUMB8-NEXT:    lsrs r0, r0, #5
+; THUMB8-NEXT:    pop {r7, pc}
+  %t0 = shl i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; ARM6-LABEL: scalar_i64_bitsinmiddle_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r4, lr}
+; ARM6-NEXT:    mov r12, #16711680
+; ARM6-NEXT:    sub lr, r2, #32
+; ARM6-NEXT:    orr r12, r12, #-16777216
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    mov r4, #255
+; ARM6-NEXT:    lsl r3, r12, r2
+; ARM6-NEXT:    orr r4, r4, #65280
+; ARM6-NEXT:    movge r3, #0
+; ARM6-NEXT:    and r0, r3, r0
+; ARM6-NEXT:    rsb r3, r2, #32
+; ARM6-NEXT:    cmp lr, #0
+; ARM6-NEXT:    lsr r3, r12, r3
+; ARM6-NEXT:    orr r2, r3, r4, lsl r2
+; ARM6-NEXT:    lslge r2, r12, lr
+; ARM6-NEXT:    and r1, r2, r1
+; ARM6-NEXT:    orr r0, r0, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    pop {r4, pc}
+;
+; ARM78-LABEL: scalar_i64_bitsinmiddle_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    push {r4, lr}
+; ARM78-NEXT:    movw r12, #0
+; ARM78-NEXT:    sub lr, r2, #32
+; ARM78-NEXT:    movt r12, #65535
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    lsl r3, r12, r2
+; ARM78-NEXT:    movw r4, #65535
+; ARM78-NEXT:    movwge r3, #0
+; ARM78-NEXT:    and r0, r3, r0
+; ARM78-NEXT:    rsb r3, r2, #32
+; ARM78-NEXT:    cmp lr, #0
+; ARM78-NEXT:    lsr r3, r12, r3
+; ARM78-NEXT:    orr r2, r3, r4, lsl r2
+; ARM78-NEXT:    lslge r2, r12, lr
+; ARM78-NEXT:    and r1, r2, r1
+; ARM78-NEXT:    orr r0, r0, r1
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    pop {r4, pc}
+;
+; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mov r4, r1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    ldr r0, .LCPI11_0
+; THUMB6-NEXT:    ldr r1, .LCPI11_1
+; THUMB6-NEXT:    bl __ashldi3
+; THUMB6-NEXT:    ands r1, r4
+; THUMB6-NEXT:    ands r0, r5
+; THUMB6-NEXT:    orrs r0, r1
+; THUMB6-NEXT:    rsbs r1, r0, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI11_0:
+; THUMB6-NEXT:    .long 4294901760 @ 0xffff0000
+; THUMB6-NEXT:  .LCPI11_1:
+; THUMB6-NEXT:    .long 65535 @ 0xffff
+;
+; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB7:       @ %bb.0:
+; THUMB7-NEXT:    push {r7, lr}
+; THUMB7-NEXT:    movw r3, #65535
+; THUMB7-NEXT:    movw lr, #0
+; THUMB7-NEXT:    lsl.w r12, r3, r2
+; THUMB7-NEXT:    rsb.w r3, r2, #32
+; THUMB7-NEXT:    movt lr, #65535
+; THUMB7-NEXT:    lsr.w r3, lr, r3
+; THUMB7-NEXT:    orr.w r3, r3, r12
+; THUMB7-NEXT:    sub.w r12, r2, #32
+; THUMB7-NEXT:    lsl.w r2, lr, r2
+; THUMB7-NEXT:    cmp.w r12, #0
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    lslge.w r3, lr, r12
+; THUMB7-NEXT:    it ge
+; THUMB7-NEXT:    movge r2, #0
+; THUMB7-NEXT:    ands r1, r3
+; THUMB7-NEXT:    ands r0, r2
+; THUMB7-NEXT:    orrs r0, r1
+; THUMB7-NEXT:    clz r0, r0
+; THUMB7-NEXT:    lsrs r0, r0, #5
+; THUMB7-NEXT:    pop {r7, pc}
+;
+; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
+; THUMB8:       @ %bb.0:
+; THUMB8-NEXT:    .save {r7, lr}
+; THUMB8-NEXT:    push {r7, lr}
+; THUMB8-NEXT:    movw r3, #65535
+; THUMB8-NEXT:    movw lr, #0
+; THUMB8-NEXT:    lsl.w r12, r3, r2
+; THUMB8-NEXT:    rsb.w r3, r2, #32
+; THUMB8-NEXT:    movt lr, #65535
+; THUMB8-NEXT:    lsr.w r3, lr, r3
+; THUMB8-NEXT:    orr.w r3, r3, r12
+; THUMB8-NEXT:    sub.w r12, r2, #32
+; THUMB8-NEXT:    cmp.w r12, #0
+; THUMB8-NEXT:    lsl.w r2, lr, r2
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    lslge.w r3, lr, r12
+; THUMB8-NEXT:    it ge
+; THUMB8-NEXT:    movge r2, #0
+; THUMB8-NEXT:    ands r1, r3
+; THUMB8-NEXT:    ands r0, r2
+; THUMB8-NEXT:    orrs r0, r1
+; THUMB8-NEXT:    clz r0, r0
+; THUMB8-NEXT:    lsrs r0, r0, #5
+; THUMB8-NEXT:    pop {r7, pc}
+  %t0 = shl i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_splat_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    and r0, r0, lr, lsl r12
+; ARM6-NEXT:    ldr r12, [sp, #12]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r1, r1, lr, lsl r12
+; ARM6-NEXT:    ldr r12, [sp, #16]
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    and r2, r2, lr, lsl r12
+; ARM6-NEXT:    ldr r12, [sp, #20]
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    and r3, r3, lr, lsl r12
+; ARM6-NEXT:    lsr r2, r2, #5
+; ARM6-NEXT:    clz r3, r3
+; ARM6-NEXT:    lsr r3, r3, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_splat_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    vmov.i32 q8, #0x1
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT:    vshl.u32 q8, q8, q9
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_splat_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r6, lr}
+; THUMB6-NEXT:    ldr r5, [sp, #16]
+; THUMB6-NEXT:    movs r4, #1
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsls r6, r5
+; THUMB6-NEXT:    ands r6, r0
+; THUMB6-NEXT:    rsbs r0, r6, #0
+; THUMB6-NEXT:    adcs r0, r6
+; THUMB6-NEXT:    ldr r5, [sp, #20]
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsls r6, r5
+; THUMB6-NEXT:    ands r6, r1
+; THUMB6-NEXT:    rsbs r1, r6, #0
+; THUMB6-NEXT:    adcs r1, r6
+; THUMB6-NEXT:    ldr r5, [sp, #24]
+; THUMB6-NEXT:    mov r6, r4
+; THUMB6-NEXT:    lsls r6, r5
+; THUMB6-NEXT:    ands r6, r2
+; THUMB6-NEXT:    rsbs r2, r6, #0
+; THUMB6-NEXT:    adcs r2, r6
+; THUMB6-NEXT:    ldr r5, [sp, #28]
+; THUMB6-NEXT:    lsls r4, r5
+; THUMB6-NEXT:    ands r4, r3
+; THUMB6-NEXT:    rsbs r3, r4, #0
+; THUMB6-NEXT:    adcs r3, r4
+; THUMB6-NEXT:    pop {r4, r5, r6, pc}
+;
+; THUMB78-LABEL: vec_4xi32_splat_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    vmov.i32 q8, #0x1
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT:    vshl.u32 q8, q8, q9
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r12, [sp, #4]
+; ARM6-NEXT:    mov r0, #1
+; ARM6-NEXT:    and r0, r1, r0, lsl r12
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r1, r0, #5
+; ARM6-NEXT:    mov r0, #65280
+; ARM6-NEXT:    orr r0, r0, #16711680
+; ARM6-NEXT:    and r0, r2, r0, lsl r12
+; ARM6-NEXT:    ldr r12, [sp, #12]
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r2, r0, #5
+; ARM6-NEXT:    mov r0, #-2147483648
+; ARM6-NEXT:    and r0, r3, r0, lsl r12
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r3, r0, #5
+; ARM6-NEXT:    mov r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d16, d17}, [r12]
+; ARM78-NEXT:    adr r12, .LCPI13_0
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12:128]
+; ARM78-NEXT:    vshl.u32 q8, q9, q8
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+; ARM78-NEXT:    .p2align 4
+; ARM78-NEXT:  @ %bb.1:
+; ARM78-NEXT:  .LCPI13_0:
+; ARM78-NEXT:    .long 0 @ 0x0
+; ARM78-NEXT:    .long 1 @ 0x1
+; ARM78-NEXT:    .long 16776960 @ 0xffff00
+; ARM78-NEXT:    .long 2147483648 @ 0x80000000
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    mov r5, r0
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #24]
+; THUMB6-NEXT:    ldr r5, .LCPI13_0
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r2
+; THUMB6-NEXT:    rsbs r2, r5, #0
+; THUMB6-NEXT:    adcs r2, r5
+; THUMB6-NEXT:    lsls r4, r0, #31
+; THUMB6-NEXT:    ldr r5, [sp, #28]
+; THUMB6-NEXT:    lsls r4, r5
+; THUMB6-NEXT:    ands r4, r3
+; THUMB6-NEXT:    rsbs r3, r4, #0
+; THUMB6-NEXT:    adcs r3, r4
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI13_0:
+; THUMB6-NEXT:    .long 16776960 @ 0xffff00
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d16, d17}, [r12]
+; THUMB78-NEXT:    adr.w r12, .LCPI13_0
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12:128]
+; THUMB78-NEXT:    vshl.u32 q8, q9, q8
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+; THUMB78-NEXT:    .p2align 4
+; THUMB78-NEXT:  @ %bb.1:
+; THUMB78-NEXT:  .LCPI13_0:
+; THUMB78-NEXT:    .long 0 @ 0x0
+; THUMB78-NEXT:    .long 1 @ 0x1
+; THUMB78-NEXT:    .long 16776960 @ 0xffff00
+; THUMB78-NEXT:    .long 2147483648 @ 0x80000000
+  %t0 = shl <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsl r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsl r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsl r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    mov r2, #1
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    vmov.i32 q8, #0x1
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT:    vshl.u32 q8, q8, q9
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r3
+; THUMB6-NEXT:    rsbs r3, r5, #0
+; THUMB6-NEXT:    adcs r3, r5
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    vmov.i32 q8, #0x1
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT:    vshl.u32 q8, q8, q9
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsl r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsl r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsl r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    vmov.i32 q8, #0x1
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT:    vshl.u32 q8, q8, q9
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    lsls r2, r4
+; THUMB6-NEXT:    ands r2, r3
+; THUMB6-NEXT:    rsbs r3, r2, #0
+; THUMB6-NEXT:    adcs r3, r2
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    vmov.i32 q8, #0x1
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT:    vshl.u32 q8, q8, q9
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; ARM6-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    push {r11, lr}
+; ARM6-NEXT:    ldr r2, [sp, #12]
+; ARM6-NEXT:    mov lr, #1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    and r1, r1, lr, lsl r2
+; ARM6-NEXT:    ldr r2, [sp, #20]
+; ARM6-NEXT:    and r0, r0, lr, lsl r12
+; ARM6-NEXT:    clz r1, r1
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    and r2, r3, lr, lsl r2
+; ARM6-NEXT:    lsr r1, r1, #5
+; ARM6-NEXT:    clz r2, r2
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    lsr r3, r2, #5
+; ARM6-NEXT:    pop {r11, pc}
+;
+; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    vmov.i32 q8, #0x1
+; ARM78-NEXT:    mov r12, sp
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
+; ARM78-NEXT:    vshl.u32 q8, q8, q9
+; ARM78-NEXT:    vmov d19, r2, r3
+; ARM78-NEXT:    vmov d18, r0, r1
+; ARM78-NEXT:    vtst.32 q8, q8, q9
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vmovn.i32 d16, q8
+; ARM78-NEXT:    vmov r0, r1, d16
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    ldr r4, [sp, #16]
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r0
+; THUMB6-NEXT:    rsbs r0, r5, #0
+; THUMB6-NEXT:    adcs r0, r5
+; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    mov r5, r2
+; THUMB6-NEXT:    lsls r5, r4
+; THUMB6-NEXT:    ands r5, r1
+; THUMB6-NEXT:    rsbs r1, r5, #0
+; THUMB6-NEXT:    adcs r1, r5
+; THUMB6-NEXT:    ldr r4, [sp, #28]
+; THUMB6-NEXT:    lsls r2, r4
+; THUMB6-NEXT:    ands r2, r3
+; THUMB6-NEXT:    rsbs r3, r2, #0
+; THUMB6-NEXT:    adcs r3, r2
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
+;
+; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    vmov.i32 q8, #0x1
+; THUMB78-NEXT:    mov r12, sp
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
+; THUMB78-NEXT:    vshl.u32 q8, q8, q9
+; THUMB78-NEXT:    vmov d19, r2, r3
+; THUMB78-NEXT:    vmov d18, r0, r1
+; THUMB78-NEXT:    vtst.32 q8, q8, q9
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vmovn.i32 d16, q8
+; THUMB78-NEXT:    vmov r0, r1, d16
+; THUMB78-NEXT:    bx lr
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; ARM6-LABEL: scalar_i8_signbit_ne:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    uxtb r1, r1
+; ARM6-NEXT:    mvn r2, #127
+; ARM6-NEXT:    and r0, r0, r2, lsl r1
+; ARM6-NEXT:    uxtb r0, r0
+; ARM6-NEXT:    cmp r0, #0
+; ARM6-NEXT:    movne r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: scalar_i8_signbit_ne:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    uxtb r1, r1
+; ARM78-NEXT:    mvn r2, #127
+; ARM78-NEXT:    and r0, r0, r2, lsl r1
+; ARM78-NEXT:    uxtb r0, r0
+; ARM78-NEXT:    cmp r0, #0
+; ARM78-NEXT:    movwne r0, #1
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_ne:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #127
+; THUMB6-NEXT:    mvns r2, r2
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r0, r2
+; THUMB6-NEXT:    subs r1, r0, #1
+; THUMB6-NEXT:    sbcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_ne:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    mvn r2, #127
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    uxtb r0, r0
+; THUMB78-NEXT:    cmp r0, #0
+; THUMB78-NEXT:    it ne
+; THUMB78-NEXT:    movne r0, #1
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; ARM6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    uxtb r1, r1
+; ARM6-NEXT:    mov r2, #24
+; ARM6-NEXT:    and r0, r0, r2, lsl r1
+; ARM6-NEXT:    sxtb r1, r0
+; ARM6-NEXT:    mov r0, #0
+; ARM6-NEXT:    cmp r1, #0
+; ARM6-NEXT:    movlt r0, #1
+; ARM6-NEXT:    bx lr
+;
+; ARM78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    uxtb r1, r1
+; ARM78-NEXT:    mov r2, #24
+; ARM78-NEXT:    and r0, r0, r2, lsl r1
+; ARM78-NEXT:    sxtb r1, r0
+; ARM78-NEXT:    mov r0, #0
+; ARM78-NEXT:    cmp r1, #0
+; ARM78-NEXT:    movwlt r0, #1
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #24
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    sxtb r0, r2
+; THUMB6-NEXT:    cmp r0, #0
+; THUMB6-NEXT:    blt .LBB18_2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:    movs r0, #0
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:  .LBB18_2:
+; THUMB6-NEXT:    movs r0, #1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    movs r2, #24
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    sxtb r1, r0
+; THUMB78-NEXT:    movs r0, #0
+; THUMB78-NEXT:    cmp r1, #0
+; THUMB78-NEXT:    it lt
+; THUMB78-NEXT:    movlt r0, #1
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; ARM-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    uxtb r1, r1
+; ARM-NEXT:    mvn r2, #127
+; ARM-NEXT:    and r0, r0, r2, lsl r1
+; ARM-NEXT:    mvn r1, #0
+; ARM-NEXT:    uxtab r0, r1, r0
+; ARM-NEXT:    clz r0, r0
+; ARM-NEXT:    lsr r0, r0, #5
+; ARM-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    uxtb r1, r1
+; THUMB6-NEXT:    movs r2, #127
+; THUMB6-NEXT:    mvns r2, r2
+; THUMB6-NEXT:    lsls r2, r1
+; THUMB6-NEXT:    ands r2, r0
+; THUMB6-NEXT:    uxtb r0, r2
+; THUMB6-NEXT:    subs r1, r0, #1
+; THUMB6-NEXT:    rsbs r0, r1, #0
+; THUMB6-NEXT:    adcs r0, r1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    uxtb r1, r1
+; THUMB78-NEXT:    mvn r2, #127
+; THUMB78-NEXT:    lsl.w r1, r2, r1
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    mov.w r1, #-1
+; THUMB78-NEXT:    uxtab r0, r1, r0
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..d4cab1a392195
--- /dev/null
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -0,0 +1,926 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI12
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI12
+
+; We are looking for the following pattern here:
+;   (X & (C l>> Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X << Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $1, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_lowestbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $1, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_bitsinmiddle_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $24, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_bitsinmiddle_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $24, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i16_signbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i16_signbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $32768, %eax # imm = 0x8000
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i16_signbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $32768, %ecx # imm = 0x8000
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i16_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $32768, %eax # imm = 0x8000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testw %di, %ax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i16_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $32768, %eax # imm = 0x8000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testw %di, %ax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i16_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $32768, %eax # imm = 0x8000
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testw %di, %ax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i16_lowestbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i16_lowestbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $1, %ecx
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i16_lowestbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testw %di, %ax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i16_lowestbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testw %di, %ax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i16_lowestbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $1, %eax
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testw %di, %ax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $4080, %ecx # imm = 0xFF0
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testw %di, %ax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testw %di, %ax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testw %di, %ax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_signbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_signbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_signbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testl %edi, %eax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testl %edi, %eax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testl %edi, %eax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_lowestbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_lowestbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $1, %ecx
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_lowestbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testl %edi, %eax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_lowestbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testl %edi, %eax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_lowestbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $1, %eax
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testl %edi, %eax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testl %edi, %eax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testl %edi, %eax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testl %edi, %eax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i64_signbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    xorl %esi, %esi
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    cmovnel %eax, %esi
+; X86-NOBMI-NEXT:    cmovnel %edx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    orl %esi, %eax
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i64_signbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    cmovnel %eax, %esi
+; X86-BMI1-NEXT:    cmovnel %edx, %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    orl %esi, %eax
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i64_signbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    pushl %esi
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-BMI12-NEXT:    xorl %edx, %edx
+; X86-BMI12-NEXT:    xorl %esi, %esi
+; X86-BMI12-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI12-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI12-NEXT:    testb $32, %cl
+; X86-BMI12-NEXT:    cmovnel %eax, %esi
+; X86-BMI12-NEXT:    cmovnel %edx, %eax
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI12-NEXT:    orl %esi, %eax
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    popl %esi
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i64_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    testq %rdi, %rax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i64_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    testq %rdi, %rax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i64_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI12-NEXT:    testq %rdi, %rax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; X86-LABEL: scalar_i64_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl $0, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i64_lowestbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    testq %rdi, %rax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i64_lowestbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movl $1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    testq %rdi, %rax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i64_lowestbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $1, %eax
+; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI12-NEXT:    testq %rdi, %rax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NOBMI-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    xorl %esi, %esi
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    cmovnel %eax, %edx
+; X86-NOBMI-NEXT:    cmovel %eax, %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    orl %edx, %esi
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-BMI1-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    cmovnel %eax, %edx
+; X86-BMI1-NEXT:    cmovel %eax, %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    orl %edx, %esi
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    pushl %esi
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI12-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-BMI12-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
+; X86-BMI12-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI12-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI12-NEXT:    xorl %esi, %esi
+; X86-BMI12-NEXT:    testb $32, %cl
+; X86-BMI12-NEXT:    cmovnel %eax, %edx
+; X86-BMI12-NEXT:    cmovel %eax, %esi
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI12-NEXT:    orl %edx, %esi
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    popl %esi
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    testq %rdi, %rax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    testq %rdi, %rax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI12-NEXT:    testq %rdi, %rax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_splat_eq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm5
+; CHECK-NEXT:    psrld %xmm2, %xmm5
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    psrld %xmm1, %xmm3
+; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_eq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,16776960,2147483648]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm5
+; CHECK-NEXT:    psrld %xmm2, %xmm5
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    psrld %xmm1, %xmm3
+; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %t0 = lshr <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm5
+; CHECK-NEXT:    psrld %xmm2, %xmm5
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    psrld %xmm1, %xmm3
+; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm5
+; CHECK-NEXT:    psrld %xmm2, %xmm5
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    psrld %xmm1, %xmm3
+; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm5
+; CHECK-NEXT:    psrld %xmm2, %xmm5
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; CHECK-NEXT:    movdqa %xmm3, %xmm4
+; CHECK-NEXT:    psrld %xmm2, %xmm4
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; CHECK-NEXT:    psrld %xmm1, %xmm3
+; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; CHECK-NEXT:    andps %xmm5, %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+  %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_ne:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_ne:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $24, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shrb $7, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $24, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    andb %dil, %al
+; X64-NEXT:    shrb $7, %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shrb %cl, %al
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    cmpb $1, %al
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrb %cl, %al
+; X64-NEXT:    andb %dil, %al
+; X64-NEXT:    cmpb $1, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = lshr i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
new file mode 100644
index 0000000000000..1f190fdee26fb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -0,0 +1,874 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI12
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI12
+
+; We are looking for the following pattern here:
+;   (X & (C << Y)) ==/!= 0
+; It may be optimal to hoist the constant:
+;   ((X l>> Y) & C) ==/!= 0
+
+;------------------------------------------------------------------------------;
+; A few scalar test
+;------------------------------------------------------------------------------;
+
+; i8 scalar
+
+define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    btl %eax, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_lowestbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    btl %esi, %edi
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = shl i8 1, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_bitsinmiddle_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $24, %al
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_bitsinmiddle_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $24, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 0
+  ret i1 %res
+}
+
+; i16 scalar
+
+define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i16_signbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i16_signbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i16_signbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $-32768, %ecx # imm = 0x8000
+; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i16_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    testw %di, %ax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i16_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    testw %di, %ax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i16_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testw %di, %ax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i16 32768, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
+; X86-LABEL: scalar_i16_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    btl %eax, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i16_lowestbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    btl %esi, %edi
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = shl i16 1, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $4080, %ecx # imm = 0xFF0
+; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    testw %di, %ax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    testw %di, %ax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testw %di, %ax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i16 4080, %y
+  %t1 = and i16 %t0, %x
+  %res = icmp eq i16 %t1, 0
+  ret i1 %res
+}
+
+; i32 scalar
+
+define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_signbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_signbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_signbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
+; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    testl %edi, %eax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    testl %edi, %eax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testl %edi, %eax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i32 2147483648, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
+; X86-LABEL: scalar_i32_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    btl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i32_lowestbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    btl %esi, %edi
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
+; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    testl %edi, %eax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    testl %edi, %eax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI12-NEXT:    testl %edi, %eax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i32 16776960, %y
+  %t1 = and i32 %t0, %x
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
+; i64 scalar
+
+define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
+; X86-LABEL: scalar_i64_signbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    movl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl $0, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i64_signbit_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    testq %rdi, %rax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i64_signbit_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shlq %cl, %rax
+; X64-BMI1-NEXT:    testq %rdi, %rax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i64_signbit_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI12-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI12-NEXT:    testq %rdi, %rax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i64 9223372036854775808, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i64_lowestbit_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    xorl %esi, %esi
+; X86-NOBMI-NEXT:    shldl %cl, %eax, %esi
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    cmovnel %eax, %esi
+; X86-NOBMI-NEXT:    cmovnel %edx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    orl %esi, %eax
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    cmovnel %eax, %esi
+; X86-BMI1-NEXT:    cmovnel %edx, %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    orl %esi, %eax
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i64_lowestbit_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    pushl %esi
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI12-NEXT:    movl $1, %eax
+; X86-BMI12-NEXT:    xorl %edx, %edx
+; X86-BMI12-NEXT:    xorl %esi, %esi
+; X86-BMI12-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI12-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI12-NEXT:    testb $32, %cl
+; X86-BMI12-NEXT:    cmovnel %eax, %esi
+; X86-BMI12-NEXT:    cmovnel %edx, %eax
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI12-NEXT:    orl %esi, %eax
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    popl %esi
+; X86-BMI12-NEXT:    retl
+;
+; X64-LABEL: scalar_i64_lowestbit_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    btq %rsi, %rdi
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = shl i64 1, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X86-NOBMI-NEXT:    movl $65535, %edx # imm = 0xFFFF
+; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    xorl %esi, %esi
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    cmovnel %eax, %edx
+; X86-NOBMI-NEXT:    cmovel %eax, %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    orl %edx, %esi
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI1-NEXT:    movl $65535, %edx # imm = 0xFFFF
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    cmovnel %eax, %edx
+; X86-BMI1-NEXT:    cmovel %eax, %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    orl %edx, %esi
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    pushl %esi
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI12-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI12-NEXT:    movl $65535, %edx # imm = 0xFFFF
+; X86-BMI12-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI12-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI12-NEXT:    xorl %esi, %esi
+; X86-BMI12-NEXT:    testb $32, %cl
+; X86-BMI12-NEXT:    cmovnel %eax, %edx
+; X86-BMI12-NEXT:    cmovel %eax, %esi
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI12-NEXT:    orl %edx, %esi
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    popl %esi
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    testq %rdi, %rax
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shlq %cl, %rax
+; X64-BMI1-NEXT:    testq %rdi, %rax
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI12-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI12-NEXT:    testq %rdi, %rax
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i64 281474976645120, %y
+  %t1 = and i64 %t0, %x
+  %res = icmp eq i64 %t1, 0
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few trivial vector tests
+;------------------------------------------------------------------------------;
+
+define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: vec_4xi32_splat_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pslld $23, %xmm1
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm2, %xmm3
+; X86-NEXT:    pmuludq %xmm1, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_4xi32_splat_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pslld $23, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm2, %xmm3
+; X64-NEXT:    pmuludq %xmm1, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT:    pand %xmm1, %xmm0
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: vec_4xi32_nonsplat_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pslld $23, %xmm1
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm1, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm3, %xmm1
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-NEXT:    pand %xmm2, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_4xi32_nonsplat_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pslld $23, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm1, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm3, %xmm1
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT:    pand %xmm2, %xmm0
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = shl <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+
+define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pslld $23, %xmm1
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm2, %xmm3
+; X86-NEXT:    pmuludq %xmm1, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pslld $23, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm2, %xmm3
+; X64-NEXT:    pmuludq %xmm1, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT:    pand %xmm1, %xmm0
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pslld $23, %xmm1
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm2, %xmm3
+; X86-NEXT:    pmuludq %xmm1, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pslld $23, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm2, %xmm3
+; X64-NEXT:    pmuludq %xmm1, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT:    pand %xmm1, %xmm0
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    pslld $23, %xmm1
+; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-NEXT:    pmuludq %xmm2, %xmm3
+; X86-NEXT:    pmuludq %xmm1, %xmm2
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    pxor %xmm1, %xmm1
+; X86-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    pslld $23, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-NEXT:    pmuludq %xmm2, %xmm3
+; X64-NEXT:    pmuludq %xmm1, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT:    pand %xmm1, %xmm0
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
+  %t1 = and <4 x i32> %t0, %x
+  %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
+  ret <4 x i1> %res
+}
+
+;------------------------------------------------------------------------------;
+; A special tests
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_ne:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_ne:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    testb %dil, %al
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp ne i8 %t1, 0 ;  we are perfectly happy with 'ne' predicate
+  ret i1 %res
+}
+
+;------------------------------------------------------------------------------;
+; A few negative tests
+;------------------------------------------------------------------------------;
+
+define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
+; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $24, %al
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    shrb $7, %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: negative_scalar_i8_bitsinmiddle_slt:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $24, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    andb %dil, %al
+; X64-NEXT:    shrb $7, %al
+; X64-NEXT:    retq
+  %t0 = shl i8 24, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp slt i8 %t1, 0
+  ret i1 %res
+}
+
+define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
+; X86-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movb $-128, %al
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    cmpb $1, %al
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i8_signbit_eq_with_nonzero:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    movb $-128, %al
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shlb %cl, %al
+; X64-NEXT:    andb %dil, %al
+; X64-NEXT:    cmpb $1, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %t0 = shl i8 128, %y
+  %t1 = and i8 %t0, %x
+  %res = icmp eq i8 %t1, 1 ; should be comparing with 0
+  ret i1 %res
+}

From c3ea7c66fec021867e005ad1b02f3c7e80feaa85 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Mon, 3 Jun 2019 22:34:12 +0000
Subject: [PATCH 0974/1176] Add support for mid-function epilogues on x86 that
 end in a non-local jump.

The x86 assembly inspection engine has code to support detecting a
mid-function epilogue that ends in a RET instruction; add support for
recognizing an epilogue that ends in a JMP, and add a check that the
unwind state has been restored to the original stack setup; reinstate
the post-prologue unwind state after this JMP instruction.

The assembly inspection engine used for other architectures,
UnwindAssemblyInstEmulation, detects mid-function epilogues by
tracking branch instructions within the function and "forwards"
the current unwind state to the targets of the branches.  If
an epilogue unwinds the stack and exits, followed by a branch
target, we get back to the correct unwind state.  The x86
unwinder should move to this same algorithm, or possibly even
look at implementing an x86 instruction emulation plugin and
get UnwindAssemblyInstEmulation to work for x86 too.  I added
a branch instruction recognizier method that will be necessary
if we want to switch the algorithm.

Differential Revision: https://reviews.llvm.org/D62764
<rdar://problem/51074422>

llvm-svn: 362456
---
 .../x86/x86AssemblyInspectionEngine.cpp       | 264 ++++++++++++++++--
 .../x86/x86AssemblyInspectionEngine.h         |  12 +
 .../x86/Testx86AssemblyInspectionEngine.cpp   | 122 ++++++++
 3 files changed, 379 insertions(+), 19 deletions(-)

diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
index c8468181eef70..e4d5ff0d353de 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
@@ -667,12 +667,209 @@ bool x86AssemblyInspectionEngine::mov_reg_to_local_stack_frame_p(
   return false;
 }
 
+// Returns true if this is a jmp instruction where we can't
+// know the destination address statically. 
+//
+// ff e0                                   jmpq   *%rax
+// ff e1                                   jmpq   *%rcx
+// ff 60 28                                jmpq   *0x28(%rax)
+// ff 60 60                                jmpq   *0x60(%rax)
+bool x86AssemblyInspectionEngine::jmp_to_reg_p() {
+  if (*m_cur_insn != 0xff)
+    return false;
+
+  // The second byte is a ModR/M /4 byte, strip off the registers
+  uint8_t second_byte_sans_reg = *(m_cur_insn + 1) & ~7;
+
+  // Don't handle 0x24 disp32, because the target address is
+  // knowable statically - pc_rel_branch_or_jump_p() will
+  // return the target address.
+
+  // [reg]
+  if (second_byte_sans_reg == 0x20)
+    return true;
+
+  // [reg]+disp8
+  if (second_byte_sans_reg == 0x60)
+    return true;
+
+  // [reg]+disp32
+  if (second_byte_sans_reg == 0xa0)
+    return true;
+
+  // reg
+  if (second_byte_sans_reg == 0xe0)
+    return true;
+
+  // disp32
+  // jumps to an address stored in memory, the value can't be cached
+  // in an unwind plan.
+  if (second_byte_sans_reg == 0x24)
+    return true;
+
+  // use SIB byte
+  // ff 24 fe  jmpq   *(%rsi,%rdi,8)
+  if (second_byte_sans_reg == 0x24)
+    return true;
+
+  return false;
+}
+
+// Detect branches to fixed pc-relative offsets.
+// Returns the offset from the address of the next instruction
+// that may be branch/jumped to.
+//
+// Cannot determine the offset of a JMP that jumps to the address in
+// a register ("jmpq *%rax") or offset from a register value 
+// ("jmpq *0x28(%rax)"), this method will return false on those
+// instructions.
+//
+// These instructions all end in either a relative 8/16/32 bit value
+// depending on the instruction and the current execution mode of the
+// inferior process.  Once we know the size of the opcode instruction, 
+// we can use the total instruction length to determine the size of
+// the relative offset without having to compute it correctly.
+
+bool x86AssemblyInspectionEngine::pc_rel_branch_or_jump_p (
+    const int instruction_length, int &offset)
+{
+  int opcode_size = 0;
+
+  uint8_t b1 = m_cur_insn[0];
+  uint8_t b2 = m_cur_insn[1];
+
+  switch (b1) {
+    case 0x77: // JA/JNBE rel8
+    case 0x73: // JAE/JNB/JNC rel8
+    case 0x72: // JB/JC/JNAE rel8
+    case 0x76: // JBE/JNA rel8
+    case 0xe3: // JCXZ/JECXZ/JRCXZ rel8
+    case 0x74: // JE/JZ rel8
+    case 0x7f: // JG/JNLE rel8
+    case 0x7d: // JGE/JNL rel8
+    case 0x7c: // JL/JNGE rel8
+    case 0x7e: // JNG/JLE rel8
+    case 0x71: // JNO rel8
+    case 0x7b: // JNP/JPO rel8
+    case 0x79: // JNS rel8
+    case 0x75: // JNE/JNZ rel8
+    case 0x70: // JO rel8
+    case 0x7a: // JP/JPE rel8
+    case 0x78: // JS rel8
+    case 0xeb: // JMP rel8
+    case 0xe9: // JMP rel16/rel32
+      opcode_size = 1;
+      break;
+    default:
+      break;
+  }
+  if (b1 == 0x0f && opcode_size == 0) {
+    switch (b2) {
+      case 0x87: // JA/JNBE rel16/rel32
+      case 0x86: // JBE/JNA rel16/rel32
+      case 0x84: // JE/JZ rel16/rel32
+      case 0x8f: // JG/JNLE rel16/rel32
+      case 0x8d: // JNL/JGE rel16/rel32
+      case 0x8e: // JLE rel16/rel32
+      case 0x82: // JB/JC/JNAE rel16/rel32
+      case 0x83: // JAE/JNB/JNC rel16/rel32
+      case 0x85: // JNE/JNZ rel16/rel32
+      case 0x8c: // JL/JNGE rel16/rel32
+      case 0x81: // JNO rel16/rel32
+      case 0x8b: // JNP/JPO rel16/rel32
+      case 0x89: // JNS rel16/rel32
+      case 0x80: // JO rel16/rel32
+      case 0x8a: // JP rel16/rel32
+      case 0x88: // JS rel16/rel32
+        opcode_size = 2;
+        break;
+      default:
+        break;
+    }
+  }
+
+  if (opcode_size == 0)
+    return false;
+
+  offset = 0;
+  if (instruction_length - opcode_size == 1) {
+    int8_t rel8 = (int8_t) *(m_cur_insn + opcode_size);
+    offset = rel8;
+  } else if (instruction_length - opcode_size == 2) {
+    int16_t rel16 = extract_2_signed (m_cur_insn + opcode_size);
+    offset = rel16;
+  } else if (instruction_length - opcode_size == 4) {
+    int32_t rel32 = extract_4_signed (m_cur_insn + opcode_size);
+    offset = rel32;
+  } else {
+    return false;
+  }
+  return true;
+}
+
+// Returns true if this instruction is a intra-function branch or jump -
+// a branch/jump within the bounds of this same function.
+// Cannot predict where a jump through a register value ("jmpq *%rax")
+// will go, so it will return false on that instruction.
+bool x86AssemblyInspectionEngine::local_branch_p (
+    const addr_t current_func_text_offset,
+    const AddressRange &func_range,
+    const int instruction_length,
+    addr_t &target_insn_offset) {
+  int offset;
+  if (pc_rel_branch_or_jump_p (instruction_length, offset) && offset != 0) {
+    addr_t next_pc_value = current_func_text_offset + instruction_length;
+    if (offset < 0 && -offset > current_func_text_offset) {
+      // Branch target is before the start of this function
+      return false;
+    }
+    if (offset + next_pc_value > func_range.GetByteSize()) {
+      // Branch targets outside this function's bounds
+      return false;
+    }
+    // This instruction branches to target_insn_offset (byte offset into the function)
+    target_insn_offset = next_pc_value + offset;
+    return true;
+  }
+  return false;
+}
+
+// Returns true if this instruction is a inter-function branch or jump - a
+// branch/jump to another function.
+// Cannot predict where a jump through a register value ("jmpq *%rax")
+// will go, so it will return false on that instruction.
+bool x86AssemblyInspectionEngine::non_local_branch_p (
+    const addr_t current_func_text_offset,
+    const AddressRange &func_range,
+    const int instruction_length) {
+  int offset;
+  addr_t target_insn_offset;
+  if (pc_rel_branch_or_jump_p (instruction_length, offset)) {
+    return !local_branch_p(current_func_text_offset,func_range,instruction_length,target_insn_offset);
+  }
+  return false;
+}
+
 // ret [0xc3] or [0xcb] or [0xc2 imm16] or [0xca imm16]
 bool x86AssemblyInspectionEngine::ret_pattern_p() {
   uint8_t *p = m_cur_insn;
   return *p == 0xc3 || *p == 0xc2 || *p == 0xca || *p == 0xcb;
 }
 
+uint16_t x86AssemblyInspectionEngine::extract_2(uint8_t *b) {
+  uint16_t v = 0;
+  for (int i = 1; i >= 0; i--)
+    v = (v << 8) | b[i];
+  return v;
+}
+
+int16_t x86AssemblyInspectionEngine::extract_2_signed(uint8_t *b) {
+  int16_t v = 0;
+  for (int i = 1; i >= 0; i--)
+    v = (v << 8) | b[i];
+  return v;
+}
+
 uint32_t x86AssemblyInspectionEngine::extract_4(uint8_t *b) {
   uint32_t v = 0;
   for (int i = 3; i >= 0; i--)
@@ -680,6 +877,14 @@ uint32_t x86AssemblyInspectionEngine::extract_4(uint8_t *b) {
   return v;
 }
 
+int32_t x86AssemblyInspectionEngine::extract_4_signed(uint8_t *b) {
+  int32_t v = 0;
+  for (int i = 3; i >= 0; i--)
+    v = (v << 8) | b[i];
+  return v;
+}
+
+
 bool x86AssemblyInspectionEngine::instruction_length(uint8_t *insn_p,
                                                      int &length, 
                                                      uint32_t buffer_remaining_bytes) {
@@ -705,7 +910,6 @@ bool x86AssemblyInspectionEngine::machine_regno_to_lldb_regno(
     return true;
   }
   return false;
-  return false;
 }
 
 bool x86AssemblyInspectionEngine::GetNonCallSiteUnwindPlanFromAssembly(
@@ -1029,25 +1233,47 @@ bool x86AssemblyInspectionEngine::GetNonCallSiteUnwindPlanFromAssembly(
       }
     }
 
-    else if (ret_pattern_p() && prologue_completed_row.get()) {
-      // Reinstate the saved prologue setup for any instructions that come
-      // after the ret instruction
-
-      UnwindPlan::Row *newrow = new UnwindPlan::Row;
-      *newrow = *prologue_completed_row.get();
-      row.reset(newrow);
-      current_sp_bytes_offset_from_fa =
-          prologue_completed_sp_bytes_offset_from_cfa;
-      is_aligned = prologue_completed_is_aligned;
-
-      saved_registers.clear();
-      saved_registers.resize(prologue_completed_saved_registers.size(), false);
-      for (size_t i = 0; i < prologue_completed_saved_registers.size(); ++i) {
-        saved_registers[i] = prologue_completed_saved_registers[i];
+    else if (prologue_completed_row.get() && 
+             (ret_pattern_p() ||
+              non_local_branch_p (current_func_text_offset, func_range, insn_len) ||
+              jmp_to_reg_p())) {
+      // Check if the current instruction is the end of an epilogue sequence,
+      // and if so, re-instate the prologue-completed unwind state.
+
+      // The current instruction is a branch/jump outside this function, 
+      // a ret, or a jump through a register value which we cannot 
+      // determine the effcts of.  Verify that the stack frame state 
+      // has been unwound to the same as it was at function entry to avoid 
+      // mis-identifying a JMP instruction as an epilogue.
+      UnwindPlan::Row::RegisterLocation sp, pc;
+      if (row->GetRegisterInfo(m_lldb_sp_regnum, sp) &&
+          row->GetRegisterInfo(m_lldb_ip_regnum, pc)) {
+        // Any ret instruction variant is definitely indicative of an
+        // epilogue; for other insn patterns verify that we're back to
+        // the original unwind state.
+        if (ret_pattern_p() ||
+            (sp.IsCFAPlusOffset() && sp.GetOffset() == 0 &&
+            pc.IsAtCFAPlusOffset() && pc.GetOffset() == -m_wordsize)) {
+          // Reinstate the saved prologue setup for any instructions that come
+          // after the epilogue
+
+          UnwindPlan::Row *newrow = new UnwindPlan::Row;
+          *newrow = *prologue_completed_row.get();
+          row.reset(newrow);
+          current_sp_bytes_offset_from_fa =
+              prologue_completed_sp_bytes_offset_from_cfa;
+          is_aligned = prologue_completed_is_aligned;
+
+          saved_registers.clear();
+          saved_registers.resize(prologue_completed_saved_registers.size(), false);
+          for (size_t i = 0; i < prologue_completed_saved_registers.size(); ++i) {
+            saved_registers[i] = prologue_completed_saved_registers[i];
+          }
+
+          in_epilogue = true;
+          row_updated = true;
+        }
       }
-
-      in_epilogue = true;
-      row_updated = true;
     }
 
     // call next instruction
diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.h b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.h
index 9a8f71f4ee1cb..680598abdeff9 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.h
+++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.h
@@ -114,7 +114,19 @@ class x86AssemblyInspectionEngine {
   bool call_next_insn_pattern_p();
   bool mov_reg_to_local_stack_frame_p(int &regno, int &rbp_offset);
   bool ret_pattern_p();
+  bool jmp_to_reg_p();
+  bool pc_rel_branch_or_jump_p (const int instruction_length, int &offset);
+  bool non_local_branch_p (const lldb::addr_t current_func_text_offset, 
+                           const lldb_private::AddressRange &func_range,
+                           const int instruction_length);
+  bool local_branch_p (const lldb::addr_t current_func_text_offset, 
+                       const lldb_private::AddressRange &func_range,
+                       const int instruction_length,
+                       lldb::addr_t &target_insn_offset);
+  uint16_t extract_2(uint8_t *b);
+  int16_t extract_2_signed(uint8_t *b);
   uint32_t extract_4(uint8_t *b);
+  int32_t extract_4_signed(uint8_t *b);
 
   bool instruction_length(uint8_t *insn, int &length, uint32_t buffer_remaining_bytes);
 
diff --git a/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp b/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp
index ccf065ca4b733..f8308c304654c 100644
--- a/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp
+++ b/lldb/unittests/UnwindAssembly/x86/Testx86AssemblyInspectionEngine.cpp
@@ -2723,3 +2723,125 @@ TEST_F(Testx86AssemblyInspectionEngine, TestReturnDetect) {
   EXPECT_TRUE(regloc.IsAtCFAPlusOffset());
   EXPECT_EQ(-8, regloc.GetOffset());
 }
+
+
+// Test mid-function epilogues - the unwind state post-prologue
+// should be re-instated.
+
+TEST_F(Testx86AssemblyInspectionEngine, TestDisassemblyMidFunctionEpilogues) {
+  AddressRange sample_range;
+  UnwindPlan unwind_plan(eRegisterKindLLDB);
+  std::unique_ptr<x86AssemblyInspectionEngine> engine32 = Geti386Inspector();
+  std::unique_ptr<x86AssemblyInspectionEngine> engine64 = Getx86_64Inspector();
+
+  uint8_t data[] = {
+    0x55,                   // <+0>: pushq %rbp
+    0x48, 0x89, 0xe5,       // <+1>: movq %rsp, %rbp
+    0x48, 0x83, 0xec, 0x70, // <+4>: subq $0x70, %rsp
+    0x90,                   // <+8>: nop               // prologue set up
+
+    0x74, 0x7,              // <+9>: je 7 <+18>
+    0x48, 0x83, 0xc4, 0x70, // <+11>: addq $0x70, %rsp
+    0x5d,                   // <+15>: popq %rbp
+    0xff, 0xe0,             // <+16>: jmpq *%rax      // epilogue completed
+
+    0x90,                   // <+18>: nop             // prologue setup back
+
+    0x74, 0x7,              // <+19>: je 6 <+27>
+    0x48, 0x83, 0xc4, 0x70, // <+21>: addq $0x70, %rsp
+    0x5d,                   // <+25>: popq %rbp
+    0xc3,                   // <+26>: retq            // epilogue completed
+
+    0x90,                   // <+27>: nop             // prologue setup back
+
+    0x48, 0x83, 0xc4, 0x70, // <+28>: addq $0x70, %rsp
+    0x5d,                   // <+32>: popq %rbp
+    0xc3,                   // <+33>: retq            // epilogue completed
+
+  };
+
+  sample_range = AddressRange(0x1000, sizeof(data));
+
+  int wordsize = 4;
+  EXPECT_TRUE(engine32->GetNonCallSiteUnwindPlanFromAssembly(
+      data, sizeof(data), sample_range, unwind_plan));
+
+  // Check that we've unwound the stack after the first mid-function epilogue
+  // row:   CFA=esp +4 => esp=CFA+0 eip=[CFA-4]
+  UnwindPlan::RowSP row_sp = unwind_plan.GetRowForFunctionOffset(16);
+  EXPECT_EQ(16ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_esp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize, row_sp->GetCFAValue().GetOffset());
+
+  // Check that we've reinstated the stack frame setup 
+  // unwind instructions after a jmpq *%eax
+  // row:   CFA=ebp +8 => esp=CFA+0 eip=[CFA-8]
+  row_sp = unwind_plan.GetRowForFunctionOffset(18);
+  EXPECT_EQ(18ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_ebp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize * 2, row_sp->GetCFAValue().GetOffset());
+
+  // Check that we've reinstated the stack frame setup 
+  // unwind instructions after a mid-function retq
+  // row:   CFA=ebp +8 => esp=CFA+0 eip=[CFA-8]
+  row_sp = unwind_plan.GetRowForFunctionOffset(27);
+  EXPECT_EQ(27ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_ebp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize * 2, row_sp->GetCFAValue().GetOffset());
+
+  // After last instruction in the function, verify that
+  // the stack frame has been unwound
+  // row:   CFA=esp +4 => esp=CFA+0 eip=[CFA-4]
+  row_sp = unwind_plan.GetRowForFunctionOffset(33);
+  EXPECT_EQ(33ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_esp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize, row_sp->GetCFAValue().GetOffset());
+
+
+  unwind_plan.Clear();
+
+  wordsize = 8;
+  EXPECT_TRUE(engine64->GetNonCallSiteUnwindPlanFromAssembly(
+      data, sizeof(data), sample_range, unwind_plan));
+
+  // Check that we've unwound the stack after the first mid-function epilogue
+  // row:   CFA=rsp +8 => rsp=CFA+0 rip=[CFA-8]
+  row_sp = unwind_plan.GetRowForFunctionOffset(16);
+  EXPECT_EQ(16ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rsp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize, row_sp->GetCFAValue().GetOffset());
+
+  // Check that we've reinstated the stack frame setup 
+  // unwind instructions after a jmpq *%eax
+  // row:   CFA=rbp+16 => rsp=CFA+0 rip=[CFA-16]
+  row_sp = unwind_plan.GetRowForFunctionOffset(18);
+  EXPECT_EQ(18ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rbp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize * 2, row_sp->GetCFAValue().GetOffset());
+
+  // Check that we've reinstated the stack frame setup 
+  // unwind instructions after a mid-function retq
+  // row:   CFA=rbp+16 => rsp=CFA+0 rip=[CFA-16]
+  row_sp = unwind_plan.GetRowForFunctionOffset(27);
+  EXPECT_EQ(27ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rbp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize * 2, row_sp->GetCFAValue().GetOffset());
+
+  // After last instruction in the function, verify that
+  // the stack frame has been unwound
+  // row:   CFA=rsp +8 => esp=CFA+0 rip=[CFA-8]
+  row_sp = unwind_plan.GetRowForFunctionOffset(33);
+  EXPECT_EQ(33ull, row_sp->GetOffset());
+  EXPECT_TRUE(row_sp->GetCFAValue().GetRegisterNumber() == k_rsp);
+  EXPECT_TRUE(row_sp->GetCFAValue().IsRegisterPlusOffset() == true);
+  EXPECT_EQ(wordsize, row_sp->GetCFAValue().GetOffset());
+
+
+}

From ac062bbad8a65f751a046784f8250e09aac47d84 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 3 Jun 2019 22:34:15 +0000
Subject: [PATCH 0975/1176] [X86] Add test cases for 32 and 64 bit versions of
 PR42118. NFC

llvm-svn: 362457
---
 llvm/test/CodeGen/X86/bmi.ll | 81 ++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index ab159c9506600..c48be66705f92 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1149,3 +1149,84 @@ define i64 @blsr64_branch(i64 %x) {
 }
 
 declare void @bar()
+
+define void @pr42118_i32(i32 %x) {
+; X86-LABEL: pr42118_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    andnl %eax, %ecx, %eax
+; X86-NEXT:    jne .LBB48_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    jmp bar # TAILCALL
+; X86-NEXT:  .LBB48_1:
+; X86-NEXT:    retl
+;
+; X64-LABEL: pr42118_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    andnl %edi, %eax, %eax
+; X64-NEXT:    jne .LBB48_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    jmp bar # TAILCALL
+; X64-NEXT:  .LBB48_1:
+; X64-NEXT:    retq
+  %tmp = sub i32 0, %x
+  %tmp1 = and i32 %tmp, %x
+  %cmp = icmp eq i32 %tmp1, %x
+  br i1 %cmp, label %1, label %2
+
+  tail call void @bar()
+  br label %2
+
+  ret void
+}
+
+define void @pr42118_i64(i64 %x) {
+; X86-LABEL: pr42118_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    negl %esi
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    andnl %ecx, %edx, %ecx
+; X86-NEXT:    andnl %eax, %esi, %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    jne .LBB49_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    jmp bar # TAILCALL
+; X86-NEXT:  .LBB49_1:
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
+;
+; X64-LABEL: pr42118_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    andnq %rdi, %rax, %rax
+; X64-NEXT:    jne .LBB49_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    jmp bar # TAILCALL
+; X64-NEXT:  .LBB49_1:
+; X64-NEXT:    retq
+  %tmp = sub i64 0, %x
+  %tmp1 = and i64 %tmp, %x
+  %cmp = icmp eq i64 %tmp1, %x
+  br i1 %cmp, label %1, label %2
+
+  tail call void @bar()
+  br label %2
+
+  ret void
+}

From 1f8030630be6c5b75c4c2a1edf7658472ff9c0c1 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Mon, 3 Jun 2019 22:41:48 +0000
Subject: [PATCH 0976/1176] [Target] Move
 ObjCLanguageRuntime::LookupRuntimeSymbol into LanguageRuntime

Summary:
LookupRuntimeSymbol seems like a general LanguageRuntime method.
Although no other language runtime currently implements this, there's no
reason another language runtime couldn't use this.

Additionally, this breaks IRExecutionUnit's dependency on
ObjCLanguageRuntime.

Reviewers: compnerd, labath, JDevlieghere, davide

Subscribers: lldb-commits

Differential Revision: https://reviews.llvm.org/D62795

llvm-svn: 362458
---
 lldb/include/lldb/Target/LanguageRuntime.h     | 7 +++++++
 lldb/include/lldb/Target/ObjCLanguageRuntime.h | 7 -------
 lldb/source/Expression/IRExecutionUnit.cpp     | 8 +++-----
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h
index 105a0040f004a..c19cf77898e68 100644
--- a/lldb/include/lldb/Target/LanguageRuntime.h
+++ b/lldb/include/lldb/Target/LanguageRuntime.h
@@ -166,6 +166,13 @@ class LanguageRuntime : public PluginInterface {
     return false;
   }
 
+  // Given the name of a runtime symbol (e.g. in Objective-C, an ivar offset
+  // symbol), try to determine from the runtime what the value of that symbol
+  // would be. Useful when the underlying binary is stripped.
+  virtual lldb::addr_t LookupRuntimeSymbol(ConstString name) {
+    return LLDB_INVALID_ADDRESS;
+  }
+
 protected:
   // Classes that inherit from LanguageRuntime can see and modify these
 
diff --git a/lldb/include/lldb/Target/ObjCLanguageRuntime.h b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
index c31d7255d6d7d..7d3613bfd9144 100644
--- a/lldb/include/lldb/Target/ObjCLanguageRuntime.h
+++ b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
@@ -264,13 +264,6 @@ class ObjCLanguageRuntime : public LanguageRuntime {
   virtual size_t GetByteOffsetForIvar(CompilerType &parent_qual_type,
                                       const char *ivar_name);
 
-  // Given the name of an Objective-C runtime symbol (e.g., ivar offset
-  // symbol), try to determine from the runtime what the value of that symbol
-  // would be. Useful when the underlying binary is stripped.
-  virtual lldb::addr_t LookupRuntimeSymbol(ConstString name) {
-    return LLDB_INVALID_ADDRESS;
-  }
-
   bool HasNewLiteralsAndIndexing() {
     if (m_has_new_literals_and_indexing == eLazyBoolCalculate) {
       if (CalculateHasNewLiteralsAndIndexing())
diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp
index 34a3488578cde..160f586b2f4c5 100644
--- a/lldb/source/Expression/IRExecutionUnit.cpp
+++ b/lldb/source/Expression/IRExecutionUnit.cpp
@@ -24,7 +24,7 @@
 #include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
+#include "lldb/Target/LanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
 #include "lldb/Utility/DataExtractor.h"
@@ -902,10 +902,8 @@ IRExecutionUnit::FindInRuntimes(const std::vector<SearchSpec> &specs,
     return LLDB_INVALID_ADDRESS;
   }
 
-  ObjCLanguageRuntime *runtime = process_sp->GetObjCLanguageRuntime();
-
-  if (runtime) {
-    for (const SearchSpec &spec : specs) {
+  for (const SearchSpec &spec : specs) {
+    for (LanguageRuntime *runtime : process_sp->GetLanguageRuntimes()) {
       lldb::addr_t symbol_load_addr = runtime->LookupRuntimeSymbol(spec.name);
 
       if (symbol_load_addr != LLDB_INVALID_ADDRESS)

From 6e2d36b60b401a0fd5a25f8eb98cddfd3a7b92b4 Mon Sep 17 00:00:00 2001
From: Alex Lorenz <arphaman@gmail.com>
Date: Mon, 3 Jun 2019 22:59:17 +0000
Subject: [PATCH 0977/1176] Add clang source minimizer that reduces source to
 directives that might affect the dependency list for a compilation

This commit introduces a dependency directives source minimizer to clang
that minimizes header and source files to the minimum necessary preprocessor
directives for evaluating includes. It reduces the source down to #define, #include,

The source minimizer works by lexing the input with a custom fast lexer that recognizes
the preprocessor directives it cares about, and emitting those directives in the minimized source.
It ignores source code, comments, and normalizes whitespace. It gives up and fails if seems
any directives that it doesn't recognize as valid (e.g. #define 0).

In addition to the source minimizer this patch adds a
-print-dependency-directives-minimized-source CC1 option that allows you to invoke the minimizer
from clang directly.

Differential Revision: https://reviews.llvm.org/D55463

llvm-svn: 362459
---
 .../include/clang/Basic/DiagnosticLexKinds.td |   9 +
 clang/include/clang/Driver/CC1Options.td      |   3 +
 .../include/clang/Frontend/FrontendActions.h  |  11 +
 .../include/clang/Frontend/FrontendOptions.h  |   5 +-
 .../Lex/DependencyDirectivesSourceMinimizer.h |  88 ++
 clang/lib/Frontend/CompilerInvocation.cpp     |   5 +
 clang/lib/Frontend/FrontendActions.cpp        |  33 +-
 .../ExecuteCompilerInvocation.cpp             |   2 +
 clang/lib/Lex/CMakeLists.txt                  |   1 +
 .../DependencyDirectivesSourceMinimizer.cpp   | 756 ++++++++++++++++++
 ...minimize_source_to_dependency_directives.c |  14 +
 ...ndency_directives_at_import_extra_tokens.m |   3 +
 ...ndency_directives_at_import_missing_semi.m |   3 +
 ...dependency_directives_invalid_macro_name.c |   3 +
 clang/unittests/Lex/CMakeLists.txt            |   1 +
 ...ependencyDirectivesSourceMinimizerTest.cpp | 508 ++++++++++++
 16 files changed, 1443 insertions(+), 2 deletions(-)
 create mode 100644 clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
 create mode 100644 clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
 create mode 100644 clang/test/Frontend/minimize_source_to_dependency_directives.c
 create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
 create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
 create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
 create mode 100644 clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp

diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index dd5e2af3d27fb..b64cbc23f8100 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -818,4 +818,13 @@ def err_pp_eof_in_assume_nonnull : Error<
 
 }
 
+let CategoryName = "Dependency Directive Source Minimization Issue" in {
+
+def err_dep_source_minimizer_missing_sema_after_at_import : Error<
+  "could not find ';' after @import">;
+def err_dep_source_minimizer_unexpected_tokens_at_import : Error<
+  "unexpected extra tokens at end of @import declaration">;
+
+}
+
 }
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 76b36a18269be..56ff05d3e4c79 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -612,6 +612,9 @@ def migrate : Flag<["-"], "migrate">,
   HelpText<"Migrate source code">;
 def compiler_options_dump : Flag<["-"], "compiler-options-dump">,
   HelpText<"Dump the compiler configuration options">;
+def print_dependency_directives_minimized_source : Flag<["-"],
+  "print-dependency-directives-minimized-source">,
+  HelpText<"Print the output of the dependency directives source minimizer">;
 }
 
 def emit_llvm_uselists : Flag<["-"], "emit-llvm-uselists">,
diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h
index e3b8b46165674..846b26897c359 100644
--- a/clang/include/clang/Frontend/FrontendActions.h
+++ b/clang/include/clang/Frontend/FrontendActions.h
@@ -240,6 +240,17 @@ class PrintPreambleAction : public FrontendAction {
   bool usesPreprocessorOnly() const override { return true; }
 };
 
+class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction {
+protected:
+  void ExecuteAction() override;
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &,
+                                                 StringRef) override {
+    return nullptr;
+  }
+
+  bool usesPreprocessorOnly() const override { return true; }
+};
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Actions
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index ce0b6964f5bee..1bbd048967489 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -128,7 +128,10 @@ enum ActionKind {
   MigrateSource,
 
   /// Just lex, no output.
-  RunPreprocessorOnly
+  RunPreprocessorOnly,
+
+  /// Print the output of the dependency directives source minimizer.
+  PrintDependencyDirectivesSourceMinimizerOutput
 };
 
 } // namespace frontend
diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
new file mode 100644
index 0000000000000..41641078afe43
--- /dev/null
+++ b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h
@@ -0,0 +1,88 @@
+//===- clang/Lex/DependencyDirectivesSourceMinimizer.h -  ----------*- C++ -*-//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the interface for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+
+class DiagnosticsEngine;
+
+namespace minimize_source_to_dependency_directives {
+
+/// Represents the kind of preprocessor directive or a module declaration that
+/// is tracked by the source minimizer in its token output.
+enum TokenKind {
+  pp_none,
+  pp_include,
+  pp___include_macros,
+  pp_define,
+  pp_undef,
+  pp_import,
+  pp_pragma_import,
+  pp_include_next,
+  pp_if,
+  pp_ifdef,
+  pp_ifndef,
+  pp_elif,
+  pp_else,
+  pp_endif,
+  decl_at_import,
+  pp_eof,
+};
+
+/// Represents a simplified token that's lexed as part of the source
+/// minimization. It's used to track the location of various preprocessor
+/// directives that could potentially have an effect on the depedencies.
+struct Token {
+  /// The kind of token.
+  TokenKind K = pp_none;
+
+  /// Offset into the output byte stream of where the directive begins.
+  int Offset = -1;
+
+  Token(TokenKind K, int Offset) : K(K), Offset(Offset) {}
+};
+
+} // end namespace minimize_source_to_dependency_directives
+
+/// Minimize the input down to the preprocessor directives that might have
+/// an effect on the dependencies for a compilation unit.
+///
+/// This function deletes all non-preprocessor code, and strips anything that
+/// can't affect what gets included. It canonicalizes whitespace where
+/// convenient to stabilize the output against formatting changes in the input.
+///
+/// Clears the output vectors at the beginning of the call.
+///
+/// \returns false on success, true on error. If the diagnostic engine is not
+/// null, an appropriate error is reported using the given input location
+/// with the offset that corresponds to the minimizer's current buffer offset.
+bool minimizeSourceToDependencyDirectives(
+    llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
+    llvm::SmallVectorImpl<minimize_source_to_dependency_directives::Token>
+        &Tokens,
+    DiagnosticsEngine *Diags = nullptr,
+    SourceLocation InputSourceLoc = SourceLocation());
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 717278c0861d6..7ac58ae4da4b9 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1696,6 +1696,10 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
       Opts.ProgramAction = frontend::MigrateSource; break;
     case OPT_Eonly:
       Opts.ProgramAction = frontend::RunPreprocessorOnly; break;
+    case OPT_print_dependency_directives_minimized_source:
+      Opts.ProgramAction =
+          frontend::PrintDependencyDirectivesSourceMinimizerOutput;
+      break;
     }
   }
 
@@ -3116,6 +3120,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {
   case frontend::PrintPreprocessedInput:
   case frontend::RewriteMacros:
   case frontend::RunPreprocessorOnly:
+  case frontend::PrintDependencyDirectivesSourceMinimizerOutput:
     return true;
   }
   llvm_unreachable("invalid frontend action");
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 9e8632354571c..7d54d665146b6 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -14,6 +14,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/MultiplexConsumer.h"
 #include "clang/Frontend/Utils.h"
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
@@ -23,8 +24,8 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 #include <system_error>
 
@@ -908,3 +909,33 @@ void DumpCompilerOptionsAction::ExecuteAction() {
 
   OS << "}";
 }
+
+void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() {
+  CompilerInstance &CI = getCompilerInstance();
+  SourceManager &SM = CI.getPreprocessor().getSourceManager();
+  const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
+
+  llvm::SmallString<1024> Output;
+  llvm::SmallVector<minimize_source_to_dependency_directives::Token, 32> Toks;
+  if (minimizeSourceToDependencyDirectives(
+          FromFile->getBuffer(), Output, Toks, &CI.getDiagnostics(),
+          SM.getLocForStartOfFile(SM.getMainFileID()))) {
+    assert(CI.getDiagnostics().hasErrorOccurred() &&
+           "no errors reported for failure");
+
+    // Preprocess the source when verifying the diagnostics to capture the
+    // 'expected' comments.
+    if (CI.getDiagnosticOpts().VerifyDiagnostics) {
+      // Make sure we don't emit new diagnostics!
+      CI.getDiagnostics().setSuppressAllDiagnostics();
+      Preprocessor &PP = getCompilerInstance().getPreprocessor();
+      PP.EnterMainSourceFile();
+      Token Tok;
+      do {
+        PP.Lex(Tok);
+      } while (Tok.isNot(tok::eof));
+    }
+    return;
+  }
+  llvm::outs() << Output;
+}
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index da7aa7b82a39c..b6a20a7bb6ced 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -116,6 +116,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
   case RunAnalysis:            Action = "RunAnalysis"; break;
 #endif
   case RunPreprocessorOnly:    return llvm::make_unique<PreprocessOnlyAction>();
+  case PrintDependencyDirectivesSourceMinimizerOutput:
+    return llvm::make_unique<PrintDependencyDirectivesSourceMinimizerAction>();
   }
 
 #if !CLANG_ENABLE_ARCMT || !CLANG_ENABLE_STATIC_ANALYZER \
diff --git a/clang/lib/Lex/CMakeLists.txt b/clang/lib/Lex/CMakeLists.txt
index 7888b15cb66e6..d77e6ddb66a07 100644
--- a/clang/lib/Lex/CMakeLists.txt
+++ b/clang/lib/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangLex
+  DependencyDirectivesSourceMinimizer.cpp
   HeaderMap.cpp
   HeaderSearch.cpp
   Lexer.cpp
diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
new file mode 100644
index 0000000000000..802b7ba159979
--- /dev/null
+++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -0,0 +1,756 @@
+//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This is the implementation for minimizing header and source files to the
+/// minimum necessary preprocessor directives for evaluating includes. It
+/// reduces the source down to #define, #include, #import, @import, and any
+/// conditional preprocessor logic that contains one of those.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Lex/LexDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace {
+
+struct Minimizer {
+  /// Minimized output.
+  SmallVectorImpl<char> &Out;
+  /// The known tokens encountered during the minimization.
+  SmallVectorImpl<Token> &Tokens;
+
+  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
+            StringRef Input, DiagnosticsEngine *Diags,
+            SourceLocation InputSourceLoc)
+      : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
+        InputSourceLoc(InputSourceLoc) {}
+
+  /// Lex the provided source and emit the minimized output.
+  ///
+  /// \returns True on error.
+  bool minimize();
+
+private:
+  struct IdInfo {
+    const char *Last;
+    StringRef Name;
+  };
+
+  /// Lex an identifier.
+  ///
+  /// \pre First points at a valid identifier head.
+  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
+  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
+                                       const char *const End);
+  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
+  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
+  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
+                                 const char *&First, const char *const End);
+  Token &makeToken(TokenKind K) {
+    Tokens.emplace_back(K, Out.size());
+    return Tokens.back();
+  }
+  void popToken() {
+    Out.resize(Tokens.back().Offset);
+    Tokens.pop_back();
+  }
+  TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
+
+  Minimizer &put(char Byte) {
+    Out.push_back(Byte);
+    return *this;
+  }
+  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
+  Minimizer &append(const char *First, const char *Last) {
+    Out.append(First, Last);
+    return *this;
+  }
+
+  void printToNewline(const char *&First, const char *const End);
+  void printAdjacentModuleNameParts(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printAtImportBody(const char *&First,
+                                        const char *const End);
+  void printDirectiveBody(const char *&First, const char *const End);
+  void printAdjacentMacroArgs(const char *&First, const char *const End);
+  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
+
+  /// Reports a diagnostic if the diagnostic engine is provided. Always returns
+  /// true at the end.
+  bool reportError(const char *CurPtr, unsigned Err);
+
+  StringMap<char> SplitIds;
+  StringRef Input;
+  DiagnosticsEngine *Diags;
+  SourceLocation InputSourceLoc;
+};
+
+} // end anonymous namespace
+
+bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
+  if (!Diags)
+    return true;
+  assert(CurPtr >= Input.data() && "invalid buffer ptr");
+  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
+  return true;
+}
+
+static void skipOverSpaces(const char *&First, const char *const End) {
+  while (First != End && isHorizontalWhitespace(*First))
+    ++First;
+}
+
+LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
+                                              const char *Current) {
+  assert(First <= Current);
+
+  // Check if we can even back up.
+  if (*Current != '\"' || First == Current)
+    return false;
+
+  // Check for an "R".
+  --Current;
+  if (*Current != 'R')
+    return false;
+  if (First == Current || !isIdentifierBody(*--Current))
+    return true;
+
+  // Check for a prefix of "u", "U", or "L".
+  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
+    return First == Current || !isIdentifierBody(*--Current);
+
+  // Check for a prefix of "u8".
+  if (*Current != '8' || First == Current || *Current-- != 'u')
+    return false;
+  return First == Current || !isIdentifierBody(*--Current);
+}
+
+static void skipRawString(const char *&First, const char *const End) {
+  assert(First[0] == '\"');
+  assert(First[-1] == 'R');
+
+  const char *Last = ++First;
+  while (Last != End && *Last != '(')
+    ++Last;
+  if (Last == End) {
+    First = Last; // Hit the end... just give up.
+    return;
+  }
+
+  StringRef Terminator(First, Last - First);
+  for (;;) {
+    // Move First to just past the next ")".
+    First = Last;
+    while (First != End && *First != ')')
+      ++First;
+    if (First == End)
+      return;
+    ++First;
+
+    // Look ahead for the terminator sequence.
+    Last = First;
+    while (Last != End && size_t(Last - First) < Terminator.size() &&
+           Terminator[Last - First] == *Last)
+      ++Last;
+
+    // Check if we hit it (or the end of the file).
+    if (Last == End) {
+      First = Last;
+      return;
+    }
+    if (size_t(Last - First) < Terminator.size())
+      continue;
+    if (*Last != '\"')
+      continue;
+    First = Last + 1;
+    return;
+  }
+}
+
+static void skipString(const char *&First, const char *const End) {
+  assert(*First == '\'' || *First == '\"');
+  const char Terminator = *First;
+  for (++First; First != End && *First != Terminator; ++First)
+    if (*First == '\\')
+      if (++First == End)
+        return;
+  if (First != End)
+    ++First; // Finish off the string.
+}
+
+static void skipNewline(const char *&First, const char *End) {
+  assert(isVerticalWhitespace(*First));
+  ++First;
+  if (First == End)
+    return;
+
+  // Check for "\n\r" and "\r\n".
+  if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
+    ++First;
+}
+
+static void skipToNewlineRaw(const char *&First, const char *const End) {
+  for (;;) {
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First))
+      return;
+
+    while (!isVerticalWhitespace(*First))
+      if (++First == End)
+        return;
+
+    if (First[-1] != '\\')
+      return;
+
+    ++First; // Keep going...
+  }
+}
+
+static const char *reverseOverSpaces(const char *First, const char *Last) {
+  assert(First <= Last);
+  while (First != Last && isHorizontalWhitespace(Last[-1]))
+    --Last;
+  return Last;
+}
+
+static void skipLineComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '/');
+  First += 2;
+  skipToNewlineRaw(First, End);
+}
+
+static void skipBlockComment(const char *&First, const char *const End) {
+  assert(First[0] == '/' && First[1] == '*');
+  if (End - First < 4) {
+    First = End;
+    return;
+  }
+  for (First += 3; First != End; ++First)
+    if (First[-1] == '*' && First[0] == '/') {
+      ++First;
+      return;
+    }
+}
+
+/// \returns True if the current single quotation mark character is a C++ 14
+/// digit separator.
+static bool isQuoteCppDigitSeparator(const char *const Start,
+                                     const char *const Cur,
+                                     const char *const End) {
+  assert(*Cur == '\'' && "expected quotation character");
+  // skipLine called in places where we don't expect a valid number
+  // body before `start` on the same line, so always return false at the start.
+  if (Start == Cur)
+    return false;
+  // The previous character must be a valid PP number character.
+  if (!isPreprocessingNumberBody(*(Cur - 1)))
+    return false;
+  // The next character should be a valid identifier body character.
+  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
+}
+
+static void skipLine(const char *&First, const char *const End) {
+  do {
+    assert(First <= End);
+    if (First == End)
+      return;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      return;
+    }
+    const char *Start = First;
+    while (First != End && !isVerticalWhitespace(*First)) {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*First == '\"' ||
+          (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+        if (isRawStringLiteral(Start, First))
+          skipRawString(First, End);
+        else
+          skipString(First, End);
+        continue;
+      }
+
+      // Iterate over comments correctly.
+      if (*First != '/' || End - First < 2) {
+        ++First;
+        continue;
+      }
+
+      if (First[1] == '/') {
+        // "//...".
+        skipLineComment(First, End);
+        continue;
+      }
+
+      if (First[1] != '*') {
+        ++First;
+        continue;
+      }
+
+      // "/*...*/".
+      skipBlockComment(First, End);
+    }
+    if (First == End)
+      return;
+
+    // Skip over the newline.
+    assert(isVerticalWhitespace(*First));
+    skipNewline(First, End);
+  } while (First[-2] == '\\'); // Continue past line-continuations.
+}
+
+static void skipDirective(StringRef Name, const char *&First,
+                          const char *const End) {
+  if (llvm::StringSwitch<bool>(Name)
+          .Case("warning", true)
+          .Case("error", true)
+          .Default(false))
+    // Do not process quotes or comments.
+    skipToNewlineRaw(First, End);
+  else
+    skipLine(First, End);
+}
+
+void Minimizer::printToNewline(const char *&First, const char *const End) {
+  while (First != End && !isVerticalWhitespace(*First)) {
+    const char *Last = First;
+    do {
+      // Iterate over strings correctly to avoid comments and newlines.
+      if (*Last == '\"' || *Last == '\'') {
+        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
+          skipRawString(Last, End);
+        else
+          skipString(Last, End);
+        continue;
+      }
+      if (*Last != '/' || End - Last < 2) {
+        ++Last;
+        continue; // Gather the rest up to print verbatim.
+      }
+
+      if (Last[1] != '/' && Last[1] != '*') {
+        ++Last;
+        continue;
+      }
+
+      // Deal with "//..." and "/*...*/".
+      append(First, reverseOverSpaces(First, Last));
+      First = Last;
+
+      if (Last[1] == '/') {
+        skipLineComment(First, End);
+        return;
+      }
+
+      put(' ');
+      skipBlockComment(First, End);
+      skipOverSpaces(First, End);
+      Last = First;
+    } while (Last != End && !isVerticalWhitespace(*Last));
+
+    // Print out the string.
+    if (Last == End || Last == First || Last[-1] != '\\') {
+      append(First, reverseOverSpaces(First, Last));
+      return;
+    }
+
+    // Print up to the backslash, backing up over spaces.
+    append(First, reverseOverSpaces(First, Last - 1));
+
+    First = Last;
+    skipNewline(First, End);
+    skipOverSpaces(First, End);
+  }
+}
+
+static void skipWhitespace(const char *&First, const char *const End) {
+  for (;;) {
+    assert(First <= End);
+    skipOverSpaces(First, End);
+
+    if (End - First < 2)
+      return;
+
+    if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
+      skipNewline(++First, End);
+      continue;
+    }
+
+    // Check for a non-comment character.
+    if (First[0] != '/')
+      return;
+
+    // "// ...".
+    if (First[1] == '/') {
+      skipLineComment(First, End);
+      return;
+    }
+
+    // Cannot be a comment.
+    if (First[1] != '*')
+      return;
+
+    // "/*...*/".
+    skipBlockComment(First, End);
+  }
+}
+
+void Minimizer::printAdjacentModuleNameParts(const char *&First,
+                                             const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (isVerticalWhitespace(*First)) {
+      skipNewline(First, End);
+      continue;
+    }
+
+    // Found a semicolon.
+    if (*First == ';') {
+      put(*First++).put('\n');
+      return false;
+    }
+
+    // Don't handle macro expansions inside @import for now.
+    if (!isIdentifierBody(*First) && *First != '.')
+      return true;
+
+    printAdjacentModuleNameParts(First, End);
+  }
+}
+
+void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
+  skipWhitespace(First, End); // Skip initial whitespace.
+  printToNewline(First, End);
+  while (Out.back() == ' ')
+    Out.pop_back();
+  put('\n');
+}
+
+LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
+                                                   const char *const End) {
+  assert(isIdentifierBody(*First) && "invalid identifer");
+  const char *Last = First + 1;
+  while (Last != End && isIdentifierBody(*Last))
+    ++Last;
+  return Last;
+}
+
+LLVM_NODISCARD static const char *
+getIdentifierContinuation(const char *First, const char *const End) {
+  if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
+    return nullptr;
+
+  ++First;
+  skipNewline(First, End);
+  if (First == End)
+    return nullptr;
+  return isIdentifierBody(First[0]) ? First : nullptr;
+}
+
+Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
+                                           const char *const End) {
+  const char *Last = lexRawIdentifier(First, End);
+  const char *Next = getIdentifierContinuation(Last, End);
+  if (LLVM_LIKELY(!Next))
+    return IdInfo{Last, StringRef(First, Last - First)};
+
+  // Slow path, where identifiers are split over lines.
+  SmallVector<char, 64> Id(First, Last);
+  while (Next) {
+    Last = lexRawIdentifier(Next, End);
+    Id.append(Next, Last);
+    Next = getIdentifierContinuation(Last, End);
+  }
+  return IdInfo{
+      Last,
+      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
+}
+
+void Minimizer::printAdjacentMacroArgs(const char *&First,
+                                       const char *const End) {
+  // Skip over parts of the body.
+  const char *Last = First;
+  do
+    ++Last;
+  while (Last != End &&
+         (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
+  append(First, Last);
+  First = Last;
+}
+
+bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
+  assert(*First == '(');
+  put(*First++);
+  for (;;) {
+    skipWhitespace(First, End);
+    if (First == End)
+      return true;
+
+    if (*First == ')') {
+      put(*First++);
+      return false;
+    }
+
+    // This is intentionally fairly liberal.
+    if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
+      return true;
+
+    printAdjacentMacroArgs(First, End);
+  }
+}
+
+/// Looks for an identifier starting from Last.
+///
+/// Updates "First" to just past the next identifier, if any.  Returns true iff
+/// the identifier matches "Id".
+bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
+                                 const char *const End) {
+  skipWhitespace(First, End);
+  if (First == End || !isIdentifierHead(*First))
+    return false;
+
+  IdInfo FoundId = lexIdentifier(First, End);
+  First = FoundId.Last;
+  return FoundId.Name == Id;
+}
+
+bool Minimizer::lexAt(const char *&First, const char *const End) {
+  // Handle "@import".
+  const char *ImportLoc = First++;
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+  makeToken(decl_at_import);
+  append("@import ");
+  if (printAtImportBody(First, End))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
+  skipWhitespace(First, End);
+  if (First == End)
+    return false;
+  if (!isVerticalWhitespace(*First))
+    return reportError(
+        ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
+  skipNewline(First, End);
+  return false;
+}
+
+bool Minimizer::lexDefine(const char *&First, const char *const End) {
+  makeToken(pp_define);
+  append("#define ");
+  skipWhitespace(First, End);
+
+  if (!isIdentifierHead(*First))
+    return reportError(First, diag::err_pp_macro_not_identifier);
+
+  IdInfo Id = lexIdentifier(First, End);
+  const char *Last = Id.Last;
+  append(Id.Name);
+  if (Last == End)
+    return false;
+  if (*Last == '(') {
+    size_t Size = Out.size();
+    if (printMacroArgs(Last, End)) {
+      // Be robust to bad macro arguments, since they can show up in disabled
+      // code.
+      Out.resize(Size);
+      append("(/* invalid */\n");
+      skipLine(Last, End);
+      return false;
+    }
+  }
+  skipWhitespace(Last, End);
+  if (Last == End)
+    return false;
+  if (!isVerticalWhitespace(*Last))
+    put(' ');
+  printDirectiveBody(Last, End);
+  First = Last;
+  return false;
+}
+
+bool Minimizer::lexPragma(const char *&First, const char *const End) {
+  // #pragma.
+  if (!isNextIdentifier("clang", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang.
+  if (!isNextIdentifier("module", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module.
+  if (!isNextIdentifier("import", First, End)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // #pragma clang module import.
+  makeToken(pp_pragma_import);
+  append("#pragma clang module import ");
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexEndif(const char *&First, const char *const End) {
+  // Strip out "#else" if it's empty.
+  if (top() == pp_else)
+    popToken();
+
+  // Strip out "#elif" if they're empty.
+  while (top() == pp_elif)
+    popToken();
+
+  // If "#if" is empty, strip it and skip the "#endif".
+  if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
+    popToken();
+    skipLine(First, End);
+    return false;
+  }
+
+  return lexDefault(pp_endif, "endif", First, End);
+}
+
+bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
+                           const char *&First, const char *const End) {
+  makeToken(Kind);
+  put('#').append(Directive).put(' ');
+  printDirectiveBody(First, End);
+  return false;
+}
+
+bool Minimizer::lexPPLine(const char *&First, const char *const End) {
+  assert(First != End);
+
+  skipWhitespace(First, End);
+  assert(First <= End);
+  if (First == End)
+    return false;
+
+  if (*First != '#' && *First != '@') {
+    skipLine(First, End);
+    assert(First <= End);
+    return false;
+  }
+
+  // Handle "@import".
+  if (*First == '@')
+    return lexAt(First, End);
+
+  // Handle preprocessing directives.
+  ++First; // Skip over '#'.
+  skipWhitespace(First, End);
+
+  if (First == End)
+    return reportError(First, diag::err_pp_expected_eol);
+
+  if (!isIdentifierHead(*First)) {
+    skipLine(First, End);
+    return false;
+  }
+
+  // Figure out the token.
+  IdInfo Id = lexIdentifier(First, End);
+  First = Id.Last;
+  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
+                  .Case("include", pp_include)
+                  .Case("__include_macros", pp___include_macros)
+                  .Case("define", pp_define)
+                  .Case("undef", pp_undef)
+                  .Case("import", pp_import)
+                  .Case("include_next", pp_include_next)
+                  .Case("if", pp_if)
+                  .Case("ifdef", pp_ifdef)
+                  .Case("ifndef", pp_ifndef)
+                  .Case("elif", pp_elif)
+                  .Case("else", pp_else)
+                  .Case("endif", pp_endif)
+                  .Case("pragma", pp_pragma_import)
+                  .Default(pp_none);
+  if (Kind == pp_none) {
+    skipDirective(Id.Name, First, End);
+    return false;
+  }
+
+  if (Kind == pp_endif)
+    return lexEndif(First, End);
+
+  if (Kind == pp_define)
+    return lexDefine(First, End);
+
+  if (Kind == pp_pragma_import)
+    return lexPragma(First, End);
+
+  // Everything else.
+  return lexDefault(Kind, Id.Name, First, End);
+}
+
+bool Minimizer::minimizeImpl(const char *First, const char *const End) {
+  while (First != End)
+    if (lexPPLine(First, End))
+      return true;
+  return false;
+}
+
+bool Minimizer::minimize() {
+  bool Error = minimizeImpl(Input.begin(), Input.end());
+
+  if (!Error) {
+    // Add a trailing newline and an EOF on success.
+    if (!Out.empty() && Out.back() != '\n')
+      Out.push_back('\n');
+    makeToken(pp_eof);
+  }
+
+  // Null-terminate the output. This way the memory buffer that's passed to
+  // Clang will not have to worry about the terminating '\0'.
+  Out.push_back(0);
+  Out.pop_back();
+  return Error;
+}
+
+bool clang::minimizeSourceToDependencyDirectives(
+    StringRef Input, SmallVectorImpl<char> &Output,
+    SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
+    SourceLocation InputSourceLoc) {
+  Output.clear();
+  Tokens.clear();
+  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
+}
diff --git a/clang/test/Frontend/minimize_source_to_dependency_directives.c b/clang/test/Frontend/minimize_source_to_dependency_directives.c
new file mode 100644
index 0000000000000..39f608b264a6a
--- /dev/null
+++ b/clang/test/Frontend/minimize_source_to_dependency_directives.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s > %t
+// RUN: echo END. >> %t
+// RUN: FileCheck < %t %s
+
+#ifdef FOO
+#include "a.h"
+#else
+void skipThisCode();
+#endif
+
+// CHECK:      #ifdef FOO
+// CHECK-NEXT: #include "a.h"
+// CHECK-NEXT: #endif
+// CHECK-NEXT: END.
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m b/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
new file mode 100644
index 0000000000000..ef210af5c57dd
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+@import x; a // expected-error {{unexpected extra tokens at end of @import declaration}}
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m b/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
new file mode 100644
index 0000000000000..8962e31a05f21
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+@import x // expected-error {{could not find ';' after @import}}
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
new file mode 100644
index 0000000000000..fa4ff7dcb8bb8
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+#define 0 0 // expected-error {{macro name must be an identifier}}
diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt
index bb0f66d860734..dbc8328eda026 100644
--- a/clang/unittests/Lex/CMakeLists.txt
+++ b/clang/unittests/Lex/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_clang_unittest(LexTests
+  DependencyDirectivesSourceMinimizerTest.cpp
   HeaderMapTest.cpp
   HeaderSearchTest.cpp
   LexerTest.cpp
diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
new file mode 100644
index 0000000000000..7feb6c97fd170
--- /dev/null
+++ b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -0,0 +1,508 @@
+//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp -  -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
+#include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::minimize_source_to_dependency_directives;
+
+namespace clang {
+
+bool minimizeSourceToDependencyDirectives(StringRef Input,
+                                          SmallVectorImpl<char> &Out) {
+  SmallVector<minimize_source_to_dependency_directives::Token, 32> Tokens;
+  return minimizeSourceToDependencyDirectives(Input, Out, Tokens);
+}
+
+} // end namespace clang
+
+namespace {
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Empty) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens));
+  EXPECT_TRUE(Out.empty());
+  ASSERT_EQ(1u, Tokens.size());
+  ASSERT_EQ(pp_eof, Tokens.back().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define A\n"
+                                           "#undef A\n"
+                                           "#endif\n"
+                                           "#if A\n"
+                                           "#ifdef A\n"
+                                           "#ifndef A\n"
+                                           "#elif A\n"
+                                           "#else\n"
+                                           "#include <A>\n"
+                                           "#include_next <A>\n"
+                                           "#__include_macros <A>\n"
+                                           "#import <A>\n"
+                                           "@import A;\n"
+                                           "#pragma clang module import A\n",
+                                           Out, Tokens));
+  EXPECT_EQ(pp_define, Tokens[0].K);
+  EXPECT_EQ(pp_undef, Tokens[1].K);
+  EXPECT_EQ(pp_endif, Tokens[2].K);
+  EXPECT_EQ(pp_if, Tokens[3].K);
+  EXPECT_EQ(pp_ifdef, Tokens[4].K);
+  EXPECT_EQ(pp_ifndef, Tokens[5].K);
+  EXPECT_EQ(pp_elif, Tokens[6].K);
+  EXPECT_EQ(pp_else, Tokens[7].K);
+  EXPECT_EQ(pp_include, Tokens[8].K);
+  EXPECT_EQ(pp_include_next, Tokens[9].K);
+  EXPECT_EQ(pp___include_macros, Tokens[10].K);
+  EXPECT_EQ(pp_import, Tokens[11].K);
+  EXPECT_EQ(decl_at_import, Tokens[12].K);
+  EXPECT_EQ(pp_pragma_import, Tokens[13].K);
+  EXPECT_EQ(pp_eof, Tokens[14].K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Define) {
+  SmallVector<char, 128> Out;
+  SmallVector<Token, 4> Tokens;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+  ASSERT_EQ(2u, Tokens.size());
+  ASSERT_EQ(pp_define, Tokens.front().K);
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a \n\n\n", Out));
+  EXPECT_STREQ("#define MACRO a\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define   MACRO\n\n\n", Out));
+  EXPECT_STREQ("#define MACRO\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out));
+  EXPECT_STREQ("#define MACRO()\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out));
+  EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO content", Out));
+  EXPECT_STREQ("#define MACRO content\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO con  tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO()   con  tent   ", Out));
+  EXPECT_STREQ("#define MACRO() con  tent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out));
+  EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\t)\tcon \t tent\t", Out));
+  EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\f)\fcon \f tent\f", Out));
+  EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO(\v)\vcon \v tent\v", Out));
+  EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out));
+  EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a        \\\n"
+                                           "              )",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a)\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n"
+                                           "              b)       \\\n"
+                                           "        call((a),      \\\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r"
+                                           "              b)       \\\r"
+                                           "        call((a),      \\\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsCarriageReturnNewline) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\r\n"
+                                           "              b)       \\\r\n"
+                                           "        call((a),      \\\r\n"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest,
+     DefineMultilineArgsNewlineCarriageReturn) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO(a,       \\\n\r"
+                                           "              b)       \\\n\r"
+                                           "        call((a),      \\\n\r"
+                                           "             (b))",
+                                           Out));
+  EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n"
+                                                    "&\n",
+                                                    Out));
+  EXPECT_STREQ("#define AND &\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#define MACRO a/*\n"
+                                           "  /*\n"
+                                           "#define MISSING abc\n"
+                                           "  /*\n"
+                                           "  /* something */ \n"
+                                           "#include  /* \"def\" */ <abc> \n",
+                                           Out));
+  EXPECT_STREQ("#define MACRO a\n"
+               "#include <abc>\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO1 \"/*\"\n"
+                                                    "#define MACRO2 \"*/\"\n",
+                                                    Out));
+  EXPECT_STREQ("#define MACRO1 \"/*\"\n"
+               "#define MACRO2 \"*/\"\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#define B\n"
+                                                    "#elif B\n"
+                                                    "#define C\n"
+                                                    "#elif C\n"
+                                                    "#define D\n"
+                                                    "#else\n"
+                                                    "#define E\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifdef A\n"
+               "#define B\n"
+               "#elif B\n"
+               "#define C\n"
+               "#elif C\n"
+               "#define D\n"
+               "#else\n"
+               "#define E\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n"
+                                                    "#elif B\n"
+                                                    "#elif C\n"
+                                                    "#else D\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#pragma clang module\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module impor\n", Out));
+  EXPECT_STREQ("", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#pragma clang module import\n", Out));
+  EXPECT_STREQ("#pragma clang module import\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, Include) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \"A\"\n", Out));
+  EXPECT_STREQ("#include \"A\"\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include <A>\n", Out));
+  EXPECT_STREQ("#include <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#include_next <A>\n", Out));
+  EXPECT_STREQ("#include_next <A>\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import <A>\n", Out));
+  EXPECT_STREQ("#import <A>\n", Out.data());
+
+  ASSERT_FALSE(
+      minimizeSourceToDependencyDirectives("#__include_macros <A>\n", Out));
+  EXPECT_STREQ("#__include_macros <A>\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import  A;\n", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out));
+  EXPECT_STREQ("@import A;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out));
+  EXPECT_STREQ("@import A.B;\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
+  ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "R\"()\"\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(
+      "#ifndef GUARD\n"
+      "#define GUARD\n"
+      R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw"
+      "\n"
+      "#endif\n",
+      Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) {
+  SmallVector<char, 128> Out;
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#if\\\n"
+                                                    "ndef GUARD\n"
+                                                    "#define GUARD\n"
+                                                    "#endif\n",
+                                                    Out));
+  EXPECT_STREQ("#ifndef GUARD\n"
+               "#define GUARD\n"
+               "#endif\n",
+               Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r"
+                                                    "RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUARD\n", Out.data());
+
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n"
+                                                    "           RD\n",
+                                                    Out));
+  EXPECT_STREQ("#define GUA RD\n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) {
+  SmallVector<char, 128> Out;
+
+  for (auto Source : {
+           "#warning '\n#include <t.h>\n",
+           "#warning \"\n#include <t.h>\n",
+           "#warning /*\n#include <t.h>\n",
+           "#warning \\\n#include <t.h>\n#include <t.h>\n",
+           "#error '\n#include <t.h>\n",
+           "#error \"\n#include <t.h>\n",
+           "#error /*\n#include <t.h>\n",
+           "#error \\\n#include <t.h>\n#include <t.h>\n",
+       }) {
+    ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+    EXPECT_STREQ("#include <t.h>\n", Out.data());
+  }
+
+  for (auto Source : {
+           "#warning \\\n#include <t.h>\n",
+           "#error \\\n#include <t.h>\n",
+           "#if MACRO\n#warning '\n#endif\n",
+           "#if MACRO\n#warning \"\n#endif\n",
+           "#if MACRO\n#warning /*\n#endif\n",
+           "#if MACRO\n#error '\n#endif\n",
+           "#if MACRO\n#error \"\n#endif\n",
+           "#if MACRO\n#error /*\n#endif\n",
+       }) {
+    ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+    EXPECT_STREQ("", Out.data());
+  }
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteral) {
+  SmallVector<char, 128> Out;
+
+  StringRef Source = R"(
+#include <bob>
+int a = 0'1;
+int b = 0xfa'af'fa;
+int c = 12 ' ';
+#include <foo>
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include <bob>\n#include <foo>\n", Out.data());
+}
+
+} // end anonymous namespace

From b3650868f617984691e3171e2f5d475534467ed4 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 3 Jun 2019 23:00:51 +0000
Subject: [PATCH 0978/1176] [NFC][X86] Fixup FileCheck prefixes - drop
 duplicates

llvm-svn: 362460
---
 .../X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll     | 8 ++++----
 .../X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll      | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index d4cab1a392195..b5bf462514e61 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI1
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI12
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI12
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI1
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI12
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI12
 
 ; We are looking for the following pattern here:
 ;   (X & (C l>> Y)) ==/!= 0
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 1f190fdee26fb..0a6ce5c0e3f10 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI1
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86,X86-BMI12
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI12
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI1
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64,X64-BMI12
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI12
 
 ; We are looking for the following pattern here:
 ;   (X & (C << Y)) ==/!= 0

From b978f72058bae8c1082da7c55e0dafce7ab0d1ff Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Mon, 3 Jun 2019 23:12:11 +0000
Subject: [PATCH 0979/1176] [Target] Generalize some behavior in
 Target::SymbolsDidLoad

Summary:
SymbolsDidLoad is currently only implemented for ObjCLanguageRuntime,
but that doesn't mean that it couldn't be useful for other Langauges. Although
this change seems like it's generalizing for the sake of purity, this removes
Target's dependency on ObjCLanguageRuntime.

Differential Revision: https://reviews.llvm.org/D62796

llvm-svn: 362461
---
 lldb/include/lldb/Target/LanguageRuntime.h     | 2 ++
 lldb/include/lldb/Target/ObjCLanguageRuntime.h | 2 +-
 lldb/source/Target/Target.cpp                  | 8 ++------
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h
index c19cf77898e68..e88bf26a54cc3 100644
--- a/lldb/include/lldb/Target/LanguageRuntime.h
+++ b/lldb/include/lldb/Target/LanguageRuntime.h
@@ -143,6 +143,8 @@ class LanguageRuntime : public PluginInterface {
     return false;
   }
 
+  virtual void SymbolsDidLoad(const ModuleList &module_list) { return; }
+
   virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread,
                                                           bool stop_others) = 0;
 
diff --git a/lldb/include/lldb/Target/ObjCLanguageRuntime.h b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
index 7d3613bfd9144..85a070c19ec61 100644
--- a/lldb/include/lldb/Target/ObjCLanguageRuntime.h
+++ b/lldb/include/lldb/Target/ObjCLanguageRuntime.h
@@ -275,7 +275,7 @@ class ObjCLanguageRuntime : public LanguageRuntime {
     return (m_has_new_literals_and_indexing == eLazyBoolYes);
   }
 
-  virtual void SymbolsDidLoad(const ModuleList &module_list) {
+  void SymbolsDidLoad(const ModuleList &module_list) override {
     m_negative_complete_class_cache.clear();
   }
 
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 14755f60c5522..41772e0f8bce1 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -43,7 +43,6 @@
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Target/Language.h"
 #include "lldb/Target/LanguageRuntime.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/SectionLoadList.h"
 #include "lldb/Target/StackFrame.h"
@@ -1668,11 +1667,8 @@ void Target::ModulesDidLoad(ModuleList &module_list) {
 void Target::SymbolsDidLoad(ModuleList &module_list) {
   if (m_valid && module_list.GetSize()) {
     if (m_process_sp) {
-      LanguageRuntime *runtime =
-          m_process_sp->GetLanguageRuntime(lldb::eLanguageTypeObjC);
-      if (runtime) {
-        ObjCLanguageRuntime *objc_runtime = (ObjCLanguageRuntime *)runtime;
-        objc_runtime->SymbolsDidLoad(module_list);
+      for (LanguageRuntime *runtime : m_process_sp->GetLanguageRuntimes()) {
+        runtime->SymbolsDidLoad(module_list);
       }
     }
 

From 6d04fd15b5a2b46a844132fd724cda87d764cc9c Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Mon, 3 Jun 2019 23:16:06 +0000
Subject: [PATCH 0980/1176] Remove test/CodeGen/builtin-stackaddress.c as it
 duplicates test/CodeGen/2004-02-13-BuiltinFrameReturnAddress.c.

Differential Revision: https://reviews.llvm.org/D62133

llvm-svn: 362462
---
 clang/test/CodeGen/builtin-stackaddress.c | 9 ---------
 1 file changed, 9 deletions(-)
 delete mode 100644 clang/test/CodeGen/builtin-stackaddress.c

diff --git a/clang/test/CodeGen/builtin-stackaddress.c b/clang/test/CodeGen/builtin-stackaddress.c
deleted file mode 100644
index f13b90eb9ed3f..0000000000000
--- a/clang/test/CodeGen/builtin-stackaddress.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %clang_cc1 -emit-llvm < %s | grep "llvm.returnaddress"
-// RUN: %clang_cc1 -emit-llvm < %s | grep "llvm.frameaddress"
-void* a(unsigned x) {
-return __builtin_return_address(0);
-}
-
-void* c(unsigned x) {
-return __builtin_frame_address(0);
-}

From bb39f78113fd3598c551018cb3f3f3d3228e13c1 Mon Sep 17 00:00:00 2001
From: Alex Lorenz <arphaman@gmail.com>
Date: Mon, 3 Jun 2019 23:17:21 +0000
Subject: [PATCH 0981/1176] Fix windows build for commit r362459

llvm-svn: 362463
---
 .../lib/Lex/DependencyDirectivesSourceMinimizer.cpp  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
index 802b7ba159979..6d7dfd1145956 100644
--- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -122,7 +122,7 @@ LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
   assert(First <= Current);
 
   // Check if we can even back up.
-  if (*Current != '\"' || First == Current)
+  if (*Current != '"' || First == Current)
     return false;
 
   // Check for an "R".
@@ -143,7 +143,7 @@ LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
 }
 
 static void skipRawString(const char *&First, const char *const End) {
-  assert(First[0] == '\"');
+  assert(First[0] == '"');
   assert(First[-1] == 'R');
 
   const char *Last = ++First;
@@ -177,7 +177,7 @@ static void skipRawString(const char *&First, const char *const End) {
     }
     if (size_t(Last - First) < Terminator.size())
       continue;
-    if (*Last != '\"')
+    if (*Last != '"')
       continue;
     First = Last + 1;
     return;
@@ -185,7 +185,7 @@ static void skipRawString(const char *&First, const char *const End) {
 }
 
 static void skipString(const char *&First, const char *const End) {
-  assert(*First == '\'' || *First == '\"');
+  assert(*First == '\'' || *First == '"');
   const char Terminator = *First;
   for (++First; First != End && *First != Terminator; ++First)
     if (*First == '\\')
@@ -281,7 +281,7 @@ static void skipLine(const char *&First, const char *const End) {
     const char *Start = First;
     while (First != End && !isVerticalWhitespace(*First)) {
       // Iterate over strings correctly to avoid comments and newlines.
-      if (*First == '\"' ||
+      if (*First == '"' ||
           (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
         if (isRawStringLiteral(Start, First))
           skipRawString(First, End);
@@ -336,7 +336,7 @@ void Minimizer::printToNewline(const char *&First, const char *const End) {
     const char *Last = First;
     do {
       // Iterate over strings correctly to avoid comments and newlines.
-      if (*Last == '\"' || *Last == '\'') {
+      if (*Last == '"' || *Last == '\'') {
         if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
           skipRawString(Last, End);
         else

From 59839124f49d69bb4846b2f9c622054c534a63cd Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Mon, 3 Jun 2019 23:48:14 +0000
Subject: [PATCH 0982/1176] Add a release note entry for the change made in
 r362034.

llvm-svn: 362464
---
 clang/docs/ReleaseNotes.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a13c454083150..29c900d523207 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -123,7 +123,14 @@ C++1z Feature Support
 Objective-C Language Changes in Clang
 -------------------------------------
 
-...
+- Fixed encoding of ObjC pointer types that are pointers to typedefs.
+
+.. code-block:: c++
+
+      typedef NSArray<NSObject *> MyArray;
+
+      // clang used to encode this as "^{NSArray=#}" instead of "@".
+      const char *s0 = @encode(MyArray *);
 
 OpenCL C Language Changes in Clang
 ----------------------------------

From f67524d4006c1080997fdead8af9fcba6cfc7e6e Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Tue, 4 Jun 2019 02:07:11 +0000
Subject: [PATCH 0983/1176] We had a _LIBCPP_ASSERT commented out because gcc
 4.9 didn't like it. We (LLVM) now require GCC 5.1, so that's not a problem
 any more. Re-enable the assertion. Fixes PR#36863

llvm-svn: 362465
---
 libcxx/include/string_view | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index d29bcc3e8c1a8..aa93e1f6fa711 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -228,9 +228,9 @@ public:
     basic_string_view(const _CharT* __s, size_type __len) _NOEXCEPT
         : __data(__s), __size(__len)
     {
-// #if _LIBCPP_STD_VER > 11
-//         _LIBCPP_ASSERT(__len == 0 || __s != nullptr, "string_view::string_view(_CharT *, size_t): received nullptr");
-// #endif
+#if _LIBCPP_STD_VER > 11
+    _LIBCPP_ASSERT(__len == 0 || __s != nullptr, "string_view::string_view(_CharT *, size_t): received nullptr");
+#endif
     }
 
     _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY

From d2d6c17760d59a3dcab722bea80663590a52fe2e Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Tue, 4 Jun 2019 02:38:15 +0000
Subject: [PATCH 0984/1176] [builtins] Use libtool for builtins when building
 for Apple platform

compiler-rt already uses libtool instead of ar when building for
Apple platform, but that's not being used when builtins are being
built separately e.g. as part of the runtimes build. This change
extracts the logic setting up libtool into a separate file and uses
it from both the compiler-rt and standalone builtins build.

Differential Revision: https://reviews.llvm.org/D62820

llvm-svn: 362466
---
 compiler-rt/CMakeLists.txt                 | 54 +---------------------
 compiler-rt/cmake/Modules/UseLibtool.cmake | 50 ++++++++++++++++++++
 compiler-rt/lib/builtins/CMakeLists.txt    |  3 ++
 3 files changed, 55 insertions(+), 52 deletions(-)
 create mode 100644 compiler-rt/cmake/Modules/UseLibtool.cmake

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 14f514a96ab72..1baac0133cfa3 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -89,58 +89,8 @@ if (COMPILER_RT_STANDALONE_BUILD)
   endif()
 
   # Ensure that fat libraries are built correctly on Darwin
-  if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
-    if(NOT CMAKE_LIBTOOL)
-      find_program(CMAKE_XCRUN
-                   NAMES
-                     xcrun)
-      if(CMAKE_XCRUN)
-        execute_process(COMMAND
-                          ${CMAKE_XCRUN} -find libtool
-                        OUTPUT_VARIABLE
-                          CMAKE_LIBTOOL
-                        OUTPUT_STRIP_TRAILING_WHITESPACE)
-      endif()
-
-      if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
-        find_program(CMAKE_LIBTOOL
-                     NAMES
-                       libtool)
-      endif()
-    endif()
-
-    get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
-
-    if(CMAKE_LIBTOOL)
-      set(CMAKE_LIBTOOL ${CMAKE_LIBTOOL} CACHE PATH "libtool executable")
-      message(STATUS "Found libtool - ${CMAKE_LIBTOOL}")
-
-      execute_process(COMMAND
-                        ${CMAKE_LIBTOOL} -V
-                      OUTPUT_VARIABLE
-                        LIBTOOL_V_OUTPUT
-                      OUTPUT_STRIP_TRAILING_WHITESPACE)
-      if("${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9]+).*")
-        string(REGEX REPLACE ".*cctools-([0-9]+).*" "\\1" LIBTOOL_VERSION ${LIBTOOL_V_OUTPUT})
-        if(NOT LIBTOOL_VERSION VERSION_LESS "862")
-          set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols")
-        endif()
-      endif()
-
-      foreach(lang ${languages})
-        set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> <LINK_FLAGS> <OBJECTS>")
-      endforeach()
-    endif()
-
-    # Workaround SIP :-(
-    if(DYLD_LIBRARY_PATH)
-      set(dyld_envar "DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}")
-      foreach(lang ${languages})
-        foreach(cmd ${CMAKE_${lang}_CREATE_STATIC_LIBRARY})
-          list(APPEND CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW "${dyld_envar} ${cmd}")
-        endforeach()
-      endforeach()
-    endif()
+  if(CMAKE_HOST_APPLE AND APPLE)
+    include(UseLibtool)
   endif()
 
   # Define default arguments to lit.
diff --git a/compiler-rt/cmake/Modules/UseLibtool.cmake b/compiler-rt/cmake/Modules/UseLibtool.cmake
new file mode 100644
index 0000000000000..38d197d4846fd
--- /dev/null
+++ b/compiler-rt/cmake/Modules/UseLibtool.cmake
@@ -0,0 +1,50 @@
+# if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
+if(NOT CMAKE_LIBTOOL)
+  if(NOT CMAKE_XCRUN)
+    find_program(CMAKE_XCRUN NAMES xcrun)
+  endif()
+  if(CMAKE_XCRUN)
+    execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
+      OUTPUT_VARIABLE CMAKE_LIBTOOL
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+  endif()
+
+  if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
+    find_program(CMAKE_LIBTOOL NAMES libtool)
+  endif()
+endif()
+
+get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
+if(CMAKE_LIBTOOL)
+  set(CMAKE_LIBTOOL ${CMAKE_LIBTOOL} CACHE PATH "libtool executable")
+  message(STATUS "Found libtool - ${CMAKE_LIBTOOL}")
+
+  execute_process(COMMAND ${CMAKE_LIBTOOL} -V
+    OUTPUT_VARIABLE LIBTOOL_V_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if("${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*")
+    string(REGEX REPLACE ".*cctools-([0-9.]+).*" "\\1" LIBTOOL_VERSION
+      ${LIBTOOL_V_OUTPUT})
+    if(NOT LIBTOOL_VERSION VERSION_LESS "862")
+      set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols")
+    endif()
+  endif()
+
+  foreach(lang ${languages})
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
+      "\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> <LINK_FLAGS> <OBJECTS>")
+  endforeach()
+endif()
+
+# If DYLD_LIBRARY_PATH is set we need to set it on archiver commands
+if(DYLD_LIBRARY_PATH)
+  set(dyld_envar "DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}")
+  foreach(lang ${languages})
+    foreach(cmd ${CMAKE_${lang}_CREATE_STATIC_LIBRARY})
+      list(APPEND CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW
+           "${dyld_envar} ${cmd}")
+    endforeach()
+    set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
+      ${CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW})
+  endforeach()
+endif()
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index d0bd07b636a37..728581d977e00 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -20,6 +20,9 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   if(APPLE)
     include(CompilerRTDarwinUtils)
   endif()
+  if(CMAKE_HOST_APPLE AND APPLE)
+    include(UseLibtool)
+  endif()
   include(AddCompilerRT)
 endif()
 

From 73e6f47da249dcd869320b539da7625b6864f110 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Tue, 4 Jun 2019 04:25:44 +0000
Subject: [PATCH 0985/1176] [clangd] SymbolCollector support for relations

Summary:
The only relation currently collected is RelationBaseOf, because this is
all we need for type hierarchy subtypes. Additional relations can be
collected in the future as the need arises.

This patch builds on D59407 and D62459.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62471

llvm-svn: 362467
---
 .../clangd/index/SymbolCollector.cpp          | 51 +++++++++++++++++--
 .../clangd/index/SymbolCollector.h            |  5 ++
 .../clangd/unittests/SymbolCollectorTests.cpp | 31 ++++++++---
 3 files changed, 75 insertions(+), 12 deletions(-)

diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index 507d0ea6ba64d..f7b027c3240b5 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -193,6 +193,11 @@ RefKind toRefKind(index::SymbolRoleSet Roles) {
   return static_cast<RefKind>(static_cast<unsigned>(RefKind::All) & Roles);
 }
 
+bool shouldIndexRelation(const index::SymbolRelation &R) {
+  // We currently only index BaseOf relations, for type hierarchy subtypes.
+  return R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf);
+}
+
 } // namespace
 
 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
@@ -291,6 +296,16 @@ bool SymbolCollector::handleDeclOccurence(
       SM.getFileID(SpellingLoc) == SM.getMainFileID())
     ReferencedDecls.insert(ND);
 
+  auto ID = getSymbolID(ND);
+  if (!ID)
+    return true;
+
+  // Note: we need to process relations for all decl occurrences, including
+  // refs, because the indexing code only populates relations for specific
+  // occurrences. For example, RelationBaseOf is only populated for the
+  // occurrence inside the base-specifier.
+  processRelations(*ND, *ID, Relations);
+
   bool CollectRef = static_cast<unsigned>(Opts.RefFilter) & Roles;
   bool IsOnlyRef =
       !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
@@ -315,10 +330,6 @@ bool SymbolCollector::handleDeclOccurence(
   if (IsOnlyRef)
     return true;
 
-  auto ID = getSymbolID(ND);
-  if (!ID)
-    return true;
-
   // FIXME: ObjCPropertyDecl are not properly indexed here:
   // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
   // not a NamedDecl.
@@ -338,6 +349,7 @@ bool SymbolCollector::handleDeclOccurence(
 
   if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
     addDefinition(*OriginalDecl, *BasicSymbol);
+
   return true;
 }
 
@@ -416,6 +428,37 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name,
   return true;
 }
 
+void SymbolCollector::processRelations(
+    const NamedDecl &ND, const SymbolID &ID,
+    ArrayRef<index::SymbolRelation> Relations) {
+  // Store subtype relations.
+  if (!dyn_cast<TagDecl>(&ND))
+    return;
+
+  for (const auto &R : Relations) {
+    if (!shouldIndexRelation(R))
+      continue;
+
+    const Decl *Object = R.RelatedSymbol;
+
+    auto ObjectID = getSymbolID(Object);
+    if (!ObjectID)
+      continue;
+
+    // Record the relation.
+    // TODO: There may be cases where the object decl is not indexed for some
+    // reason. Those cases should probably be removed in due course, but for
+    // now there are two possible ways to handle it:
+    //   (A) Avoid storing the relation in such cases.
+    //   (B) Store it anyways. Clients will likely lookup() the SymbolID
+    //       in the index and find nothing, but that's a situation they
+    //       probably need to handle for other reasons anyways.
+    // We currently do (B) because it's simpler.
+    this->Relations.insert(
+        Relation{ID, index::SymbolRole::RelationBaseOf, *ObjectID});
+  }
+}
+
 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
   if (Opts.CollectIncludePath)
     if (shouldCollectIncludePath(S.SymInfo.Kind))
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h
index f746002bbd3e3..3c28a451406dd 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.h
+++ b/clang-tools-extra/clangd/index/SymbolCollector.h
@@ -110,6 +110,7 @@ class SymbolCollector : public index::IndexDataConsumer {
 
   SymbolSlab takeSymbols() { return std::move(Symbols).build(); }
   RefSlab takeRefs() { return std::move(Refs).build(); }
+  RelationSlab takeRelations() { return std::move(Relations).build(); }
 
   void finish() override;
 
@@ -117,6 +118,8 @@ class SymbolCollector : public index::IndexDataConsumer {
   const Symbol *addDeclaration(const NamedDecl &, SymbolID,
                                bool IsMainFileSymbol);
   void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
+  void processRelations(const NamedDecl &ND, const SymbolID &ID,
+                        ArrayRef<index::SymbolRelation> Relations);
 
   llvm::Optional<std::string> getIncludeHeader(llvm::StringRef QName, FileID);
   bool isSelfContainedHeader(FileID);
@@ -135,6 +138,8 @@ class SymbolCollector : public index::IndexDataConsumer {
   // Only symbols declared in preamble (from #include) and referenced from the
   // main file will be included.
   RefSlab::Builder Refs;
+  // All relations collected from the AST.
+  RelationSlab::Builder Relations;
   ASTContext *ASTCtx;
   std::shared_ptr<Preprocessor> PP;
   std::shared_ptr<GlobalCodeCompletionAllocator> CompletionAllocator;
diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index e422f5ca82b53..4a714388016b1 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -123,8 +123,9 @@ class ShouldCollectSymbolTest : public ::testing::Test {
     assert(AST.hasValue());
     const NamedDecl &ND =
         Qualified ? findDecl(*AST, Name) : findUnqualifiedDecl(*AST, Name);
-    const SourceManager& SM = AST->getSourceManager();
-    bool MainFile = SM.isWrittenInMainFile(SM.getExpansionLoc(ND.getBeginLoc()));
+    const SourceManager &SM = AST->getSourceManager();
+    bool MainFile =
+        SM.isWrittenInMainFile(SM.getExpansionLoc(ND.getBeginLoc()));
     return SymbolCollector::shouldCollectSymbol(
         ND, AST->getASTContext(), SymbolCollector::Options(), MainFile);
   }
@@ -272,13 +273,14 @@ class SymbolCollectorTest : public ::testing::Test {
         Args, Factory->create(), Files.get(),
         std::make_shared<PCHContainerOperations>());
 
-    InMemoryFileSystem->addFile(
-        TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+    InMemoryFileSystem->addFile(TestHeaderName, 0,
+                                llvm::MemoryBuffer::getMemBuffer(HeaderCode));
     InMemoryFileSystem->addFile(TestFileName, 0,
                                 llvm::MemoryBuffer::getMemBuffer(MainCode));
     Invocation.run();
     Symbols = Factory->Collector->takeSymbols();
     Refs = Factory->Collector->takeRefs();
+    Relations = Factory->Collector->takeRelations();
     return true;
   }
 
@@ -290,6 +292,7 @@ class SymbolCollectorTest : public ::testing::Test {
   std::string TestFileURI;
   SymbolSlab Symbols;
   RefSlab Refs;
+  RelationSlab Relations;
   SymbolCollector::Options CollectorOpts;
   std::unique_ptr<CommentHandler> PragmaHandler;
 };
@@ -634,6 +637,19 @@ TEST_F(SymbolCollectorTest, RefsInHeaders) {
                                   HaveRanges(Header.ranges()))));
 }
 
+TEST_F(SymbolCollectorTest, Relations) {
+  std::string Header = R"(
+  class Base {};
+  class Derived : public Base {};
+  )";
+  runSymbolCollector(Header, /*Main=*/"");
+  const Symbol &Base = findSymbol(Symbols, "Base");
+  const Symbol &Derived = findSymbol(Symbols, "Derived");
+  EXPECT_THAT(Relations,
+              Contains(Relation{Base.ID, index::SymbolRole::RelationBaseOf,
+                                Derived.ID}));
+}
+
 TEST_F(SymbolCollectorTest, References) {
   const std::string Header = R"(
     class W;
@@ -783,10 +799,9 @@ TEST_F(SymbolCollectorTest, SymbolsInMainFile) {
     void f1() {}
   )";
   runSymbolCollector(/*Header=*/"", Main);
-  EXPECT_THAT(Symbols,
-              UnorderedElementsAre(QName("Foo"), QName("f1"), QName("f2"),
-                                   QName("ff"), QName("foo"), QName("foo::Bar"),
-                                   QName("main_f")));
+  EXPECT_THAT(Symbols, UnorderedElementsAre(
+                           QName("Foo"), QName("f1"), QName("f2"), QName("ff"),
+                           QName("foo"), QName("foo::Bar"), QName("main_f")));
 }
 
 TEST_F(SymbolCollectorTest, Documentation) {

From a050b2554400a83996a63b4a8172da2ca468c3de Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Tue, 4 Jun 2019 06:48:14 +0000
Subject: [PATCH 0986/1176] [PowerPC] add testcases for reordering LSR and
 PPCCTRLoops - NFC

llvm-svn: 362468
---
 llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll | 217 +++++++++++++++++++++++
 1 file changed, 217 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll

diff --git a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
new file mode 100644
index 0000000000000..14fd4c96dea6e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll
@@ -0,0 +1,217 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; void foo(float *data, float d) {
+;   long i;
+;   for (i = 0; i < 8000; i++)
+;     data[i] = d;
+; }
+; 
+; This loop will be unrolled by 96 and vectorized on power9.
+; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
+; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})' 
+
+define void @foo(float* nocapture %data, float %d) {
+; CHECK-LABEL: foo:
+; CHECK:  .LBB0_1: # %vector.body
+; CHECK:         add 5, 3, 4
+; CHECK-NEXT:    stxvx 0, 3, 4
+; CHECK-NEXT:    addi 4, 4, 384
+; CHECK-NEXT:    stxv 0, 16(5)
+; CHECK-NEXT:    stxv 0, 32(5)
+; CHECK-NEXT:    stxv 0, 48(5)
+; CHECK-NEXT:    stxv 0, 64(5)
+; CHECK-NEXT:    stxv 0, 80(5)
+; CHECK-NEXT:    stxv 0, 96(5)
+; CHECK-NEXT:    stxv 0, 112(5)
+; CHECK-NEXT:    stxv 0, 128(5)
+; CHECK-NEXT:    stxv 0, 144(5)
+; CHECK-NEXT:    stxv 0, 160(5)
+; CHECK-NEXT:    stxv 0, 176(5)
+; CHECK-NEXT:    stxv 0, 192(5)
+; CHECK-NEXT:    stxv 0, 208(5)
+; CHECK-NEXT:    stxv 0, 224(5)
+; CHECK-NEXT:    stxv 0, 240(5)
+; CHECK-NEXT:    stxv 0, 256(5)
+; CHECK-NEXT:    stxv 0, 272(5)
+; CHECK-NEXT:    stxv 0, 288(5)
+; CHECK-NEXT:    stxv 0, 304(5)
+; CHECK-NEXT:    stxv 0, 320(5)
+; CHECK-NEXT:    stxv 0, 336(5)
+; CHECK-NEXT:    stxv 0, 352(5)
+; CHECK-NEXT:    stxv 0, 368(5)
+; CHECK-NEXT:    bdnz .LBB0_1
+
+entry:
+  %broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
+  %broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
+  %0 = getelementptr inbounds float, float* %data, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
+  %2 = getelementptr inbounds float, float* %0, i64 4
+  %3 = bitcast float* %2 to <4 x float>*
+  store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
+  %4 = getelementptr inbounds float, float* %0, i64 8
+  %5 = bitcast float* %4 to <4 x float>*
+  store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
+  %6 = getelementptr inbounds float, float* %0, i64 12
+  %7 = bitcast float* %6 to <4 x float>*
+  store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
+  %8 = getelementptr inbounds float, float* %0, i64 16
+  %9 = bitcast float* %8 to <4 x float>*
+  store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
+  %10 = getelementptr inbounds float, float* %0, i64 20
+  %11 = bitcast float* %10 to <4 x float>*
+  store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
+  %12 = getelementptr inbounds float, float* %0, i64 24
+  %13 = bitcast float* %12 to <4 x float>*
+  store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
+  %14 = getelementptr inbounds float, float* %0, i64 28
+  %15 = bitcast float* %14 to <4 x float>*
+  store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
+  %16 = getelementptr inbounds float, float* %0, i64 32
+  %17 = bitcast float* %16 to <4 x float>*
+  store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
+  %18 = getelementptr inbounds float, float* %0, i64 36
+  %19 = bitcast float* %18 to <4 x float>*
+  store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
+  %20 = getelementptr inbounds float, float* %0, i64 40
+  %21 = bitcast float* %20 to <4 x float>*
+  store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
+  %22 = getelementptr inbounds float, float* %0, i64 44
+  %23 = bitcast float* %22 to <4 x float>*
+  store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
+  %index.next = add nuw nsw i64 %index, 48
+  %24 = getelementptr inbounds float, float* %data, i64 %index.next
+  %25 = bitcast float* %24 to <4 x float>*
+  store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
+  %26 = getelementptr inbounds float, float* %24, i64 4
+  %27 = bitcast float* %26 to <4 x float>*
+  store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
+  %28 = getelementptr inbounds float, float* %24, i64 8
+  %29 = bitcast float* %28 to <4 x float>*
+  store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
+  %30 = getelementptr inbounds float, float* %24, i64 12
+  %31 = bitcast float* %30 to <4 x float>*
+  store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
+  %32 = getelementptr inbounds float, float* %24, i64 16
+  %33 = bitcast float* %32 to <4 x float>*
+  store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
+  %34 = getelementptr inbounds float, float* %24, i64 20
+  %35 = bitcast float* %34 to <4 x float>*
+  store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
+  %36 = getelementptr inbounds float, float* %24, i64 24
+  %37 = bitcast float* %36 to <4 x float>*
+  store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
+  %38 = getelementptr inbounds float, float* %24, i64 28
+  %39 = bitcast float* %38 to <4 x float>*
+  store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
+  %40 = getelementptr inbounds float, float* %24, i64 32
+  %41 = bitcast float* %40 to <4 x float>*
+  store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
+  %42 = getelementptr inbounds float, float* %24, i64 36
+  %43 = bitcast float* %42 to <4 x float>*
+  store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
+  %44 = getelementptr inbounds float, float* %24, i64 40
+  %45 = bitcast float* %44 to <4 x float>*
+  store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
+  %46 = getelementptr inbounds float, float* %24, i64 44
+  %47 = bitcast float* %46 to <4 x float>*
+  store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
+  %index.next.1 = add nuw nsw i64 %index, 96
+  %48 = icmp eq i64 %index.next.1, 7968
+  br i1 %48, label %for.body, label %vector.body
+
+for.body:                                         ; preds = %vector.body
+  %arrayidx = getelementptr inbounds float, float* %data, i64 7968
+  store float %d, float* %arrayidx, align 4
+  %arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
+  store float %d, float* %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
+  store float %d, float* %arrayidx.2, align 4
+  %arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
+  store float %d, float* %arrayidx.3, align 4
+  %arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
+  store float %d, float* %arrayidx.4, align 4
+  %arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
+  store float %d, float* %arrayidx.5, align 4
+  %arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
+  store float %d, float* %arrayidx.6, align 4
+  %arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
+  store float %d, float* %arrayidx.7, align 4
+  %arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
+  store float %d, float* %arrayidx.8, align 4
+  %arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
+  store float %d, float* %arrayidx.9, align 4
+  %arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
+  store float %d, float* %arrayidx.10, align 4
+  %arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
+  store float %d, float* %arrayidx.11, align 4
+  %arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
+  store float %d, float* %arrayidx.12, align 4
+  %arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
+  store float %d, float* %arrayidx.13, align 4
+  %arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
+  store float %d, float* %arrayidx.14, align 4
+  %arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
+  store float %d, float* %arrayidx.15, align 4
+  %arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
+  store float %d, float* %arrayidx.16, align 4
+  %arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
+  store float %d, float* %arrayidx.17, align 4
+  %arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
+  store float %d, float* %arrayidx.18, align 4
+  %arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
+  store float %d, float* %arrayidx.19, align 4
+  %arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
+  store float %d, float* %arrayidx.20, align 4
+  %arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
+  store float %d, float* %arrayidx.21, align 4
+  %arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
+  store float %d, float* %arrayidx.22, align 4
+  %arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
+  store float %d, float* %arrayidx.23, align 4
+  %arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
+  store float %d, float* %arrayidx.24, align 4
+  %arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
+  store float %d, float* %arrayidx.25, align 4
+  %arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
+  store float %d, float* %arrayidx.26, align 4
+  %arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
+  store float %d, float* %arrayidx.27, align 4
+  %arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
+  store float %d, float* %arrayidx.28, align 4
+  %arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
+  store float %d, float* %arrayidx.29, align 4
+  %arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
+  store float %d, float* %arrayidx.30, align 4
+  %arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
+  store float %d, float* %arrayidx.31, align 4
+  ret void
+}

From 65de43bc8beeab30618baece6f0d95f785672667 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 4 Jun 2019 07:19:11 +0000
Subject: [PATCH 0987/1176] [clangd] Fix a crash when clang-tidy is disabled

llvm-svn: 362469
---
 .../clangd/test/diagnostics-no-tidy.test      | 39 +++++++++++++++++++
 clang-tools-extra/clangd/tool/ClangdMain.cpp  | 14 +++----
 2 files changed, 46 insertions(+), 7 deletions(-)
 create mode 100644 clang-tools-extra/clangd/test/diagnostics-no-tidy.test

diff --git a/clang-tools-extra/clangd/test/diagnostics-no-tidy.test b/clang-tools-extra/clangd/test/diagnostics-no-tidy.test
new file mode 100644
index 0000000000000..f17ab1794990b
--- /dev/null
+++ b/clang-tools-extra/clangd/test/diagnostics-no-tidy.test
@@ -0,0 +1,39 @@
+# RUN: clangd -lit-test -clang-tidy=false < %s | FileCheck -strict-whitespace %s
+{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
+---
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///foo.c","languageId":"c","version":1,"text":"void main() {\n(void)sizeof(42);\n}"}}}
+#      CHECK:  "method": "textDocument/publishDiagnostics",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "diagnostics": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "code": "-Wmain-return-type",
+# CHECK-NEXT:        "message": "Return type of 'main' is not 'int' (fix available)",
+# CHECK-NEXT:        "range": {
+# CHECK-NEXT:          "end": {
+# CHECK-NEXT:            "character": 4,
+# CHECK-NEXT:            "line": 0
+# CHECK-NEXT:          },
+# CHECK-NEXT:          "start": {
+# CHECK-NEXT:            "character": 0,
+# CHECK-NEXT:            "line": 0
+# CHECK-NEXT:          }
+# CHECK-NEXT:        },
+# CHECK-NEXT:        "severity": 2,
+# CHECK-NEXT:        "source": "clang"
+# CHECK-NEXT:      }
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "uri": "file://{{.*}}/foo.c"
+# CHECK-NEXT:  }
+---
+{"jsonrpc":"2.0","id":2,"method":"sync","params":null}
+---
+{"jsonrpc":"2.0","method":"textDocument/didClose","params":{"textDocument":{"uri":"test:///foo.c"}}}
+#      CHECK:  "method": "textDocument/publishDiagnostics",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "diagnostics": [],
+# CHECK-NEXT:    "uri": "file://{{.*}}/foo.c"
+# CHECK-NEXT:  }
+---
+{"jsonrpc":"2.0","id":5,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}
diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 90e00e0a26764..91d82f9c4a1b0 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -512,14 +512,14 @@ int main(int argc, char *argv[]) {
         tidy::ClangTidyGlobalOptions(),
         /* Default */ tidy::ClangTidyOptions::getDefaults(),
         /* Override */ OverrideClangTidyOptions, FSProvider.getFileSystem());
+    Opts.GetClangTidyOptions = [&](llvm::vfs::FileSystem &,
+                                   llvm::StringRef File) {
+      // This function must be thread-safe and tidy option providers are not.
+      std::lock_guard<std::mutex> Lock(ClangTidyOptMu);
+      // FIXME: use the FS provided to the function.
+      return ClangTidyOptProvider->getOptions(File);
+    };
   }
-  Opts.GetClangTidyOptions = [&](llvm::vfs::FileSystem &,
-                                 llvm::StringRef File) {
-    // This function must be thread-safe and tidy option providers are not.
-    std::lock_guard<std::mutex> Lock(ClangTidyOptMu);
-    // FIXME: use the FS provided to the function.
-    return ClangTidyOptProvider->getOptions(File);
-  };
   Opts.SuggestMissingIncludes = SuggestMissingIncludes;
   llvm::Optional<OffsetEncoding> OffsetEncodingFromFlag;
   if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding)

From ac0244552497848ef2a2e6b69565a4bd6daedf74 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Tue, 4 Jun 2019 08:28:48 +0000
Subject: [PATCH 0988/1176] [ARM] Turn some undefined encoding bits into 0s.

The family of 32-bit Thumb instruction encodings that include t2ORR,
t2AND and t2EOR are all listed in the ArmARM as having (0) in bit 15.
The Tablegen descriptions of those instructions listed them as ?. This
change tightens that up by making them into 0 + Unpredictable.

In the specific case of t2ORR, we tighten it up still further by
making the zero bit mandatory. This change comes from Arm v8.1-M, in
which encodings with that bit equal to 1 will now be used for
different instructions.


Reviewers: dmgreen, samparker, SjoerdMeijer, efriedma

Reviewed By: dmgreen, efriedma

Subscribers: efriedma, javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60705

llvm-svn: 362470
---
 llvm/lib/Target/ARM/ARMInstrThumb2.td         | 17 ++++
 .../MC/Disassembler/ARM/thumb2-bit-15.txt     | 92 +++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 llvm/test/MC/Disassembler/ARM/thumb2-bit-15.txt

diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 234b2767494da..e82cbeef43f7d 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -603,6 +603,17 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
+     let Inst{15} = 0b0;
+     // In most of these instructions, and most versions of the Arm
+     // architecture, bit 15 of this encoding is listed as (0) rather
+     // than 0, i.e. setting it to 1 is UNPREDICTABLE or a soft-fail
+     // rather than a hard failure. In v8.1-M, this requirement is
+     // upgraded to a hard one for ORR, so that the encodings with 1
+     // in this bit can be reused for other instructions (such as
+     // CSEL). Setting Unpredictable{15} = 1 here would reintroduce
+     // that encoding clash in the auto- generated MC decoder, so I
+     // comment it out.
+     let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1);
      let Inst{14-12} = 0b000; // imm3
      let Inst{7-6} = 0b00; // imm2
      let Inst{5-4} = 0b00; // type
@@ -616,6 +627,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+     let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1); // see above
    }
   // Assembly aliases for optional destination operand when it's the same
   // as the source operand.
@@ -879,6 +892,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
      let Inst{31-27} = 0b11101;
      let Inst{26-21} = 0b010010;
      let Inst{19-16} = 0b1111; // Rn
+     let Inst{15}    = 0b0;
      let Inst{5-4} = opcod;
    }
    // register
@@ -1872,6 +1886,7 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
   let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0b0;
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0000;
 }
@@ -2400,6 +2415,8 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
   let Inst{26-25} = 0b01;
   let Inst{24-21} = 0b0010;
   let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0b0;
+  let Unpredictable{15} = 0b1;
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0011;
 }
diff --git a/llvm/test/MC/Disassembler/ARM/thumb2-bit-15.txt b/llvm/test/MC/Disassembler/ARM/thumb2-bit-15.txt
new file mode 100644
index 0000000000000..a73e7c9336136
--- /dev/null
+++ b/llvm/test/MC/Disassembler/ARM/thumb2-bit-15.txt
@@ -0,0 +1,92 @@
+# RUN: not llvm-mc -triple=thumbv7 -mcpu=cortex-a8 -disassemble < %s 2> %t | FileCheck %s
+# RUN: FileCheck --check-prefix=ERROR < %t %s
+
+[0x09,0xea,0x08,0x04]
+# CHECK: and.w	r4, r9, r8
+
+[0x09,0xea,0x08,0x84]
+# CHECK: and.w	r4, r9, r8
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding
+
+[0x04,0xea,0xe8,0x01]
+# CHECK: and.w	r1, r4, r8, asr #3
+
+[0x04,0xea,0xe8,0x81]
+# CHECK: and.w	r1, r4, r8, asr #3
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding
+
+[0x11,0xea,0x47,0x02]
+# CHECK: ands.w	r2, r1, r7, lsl #1
+
+[0x11,0xea,0x47,0x82]
+# CHECK: ands.w	r2, r1, r7, lsl #1
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding
+
+[0x45,0xea,0x06,0x04]
+# CHECK: orr.w	r4, r5, r6
+
+[0x45,0xea,0x06,0x84]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x45,0xea,0x46,0x14]
+# CHECK: orr.w	r4, r5, r6, lsl #5
+
+[0x45,0xea,0x46,0x94]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x55,0xea,0x56,0x14]
+# CHECK: orrs.w	r4, r5, r6, lsr #5
+
+[0x55,0xea,0x56,0x94]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x85,0xea,0x06,0x04]
+# CHECK: eor.w	r4, r5, r6
+
+[0x85,0xea,0x06,0x84]
+# CHECK: eor.w	r4, r5, r6
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding
+
+[0x85,0xea,0x46,0x14]
+# CHECK: eor.w	r4, r5, r6, lsl #5
+
+[0x85,0xea,0x46,0x94]
+# CHECK: eor.w	r4, r5, r6, lsl #5
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding
+
+[0x4f,0xea,0x02,0x01]
+# CHECK: mov.w  r1, r2
+
+[0x4f,0xea,0x02,0x81]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x4f,0xea,0x02,0x46]
+# CHECK: lsl.w	r6, r2, #16
+
+[0x4f,0xea,0x02,0xc6]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x4f,0xea,0x12,0x46]
+# CHECK: lsr.w	r6, r2, #16
+
+[0x4f,0xea,0x12,0xc6]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x5f,0xea,0x22,0x06]
+# CHECK: asrs.w	r6, r2, #32
+
+[0x5f,0xea,0x22,0x86]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x5f,0xea,0x72,0x16]
+# CHECK: rors.w	r6, r2, #5
+
+[0x5f,0xea,0x72,0x96]
+# ERROR: [[@LINE-1]]:2: warning: invalid instruction encoding
+
+[0x4f,0xea,0x34,0x04]
+# CHECK: rrx	r4, r4
+
+[0x4f,0xea,0x34,0x84]
+# CHECK: rrx	r4, r4
+# ERROR: [[@LINE-2]]:2: warning: potentially undefined instruction encoding

From 72667b4e4811e6ecac3031b4b3ec8ec50bce3ac1 Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Tue, 4 Jun 2019 08:45:07 +0000
Subject: [PATCH 0989/1176] [NFC] Update the test to check the endianness after
 the CodeGenPrepare instead of checking the assembly instructions.

llvm-svn: 362471
---
 .../SystemZ/codegenprepare-splitstore.ll      | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/codegenprepare-splitstore.ll b/llvm/test/CodeGen/SystemZ/codegenprepare-splitstore.ll
index 0b1e2b477c2a5..73b273254bfde 100644
--- a/llvm/test/CodeGen/SystemZ/codegenprepare-splitstore.ll
+++ b/llvm/test/CodeGen/SystemZ/codegenprepare-splitstore.ll
@@ -1,14 +1,22 @@
 ; Test that CodeGenPrepare respects endianness when splitting a store.
 ;
-; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -force-split-store < %s  | FileCheck %s
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -stop-after codegenprepare -force-split-store < %s  | FileCheck %s
 
 define void @fun(i16* %Src, i16* %Dst) {
-; CHECK-LABEL: # %bb.0:
-; CHECK:       lh   %r0, 0(%r2)
-; CHECK-NEXT:  stc  %r0, 1(%r3)
-; CHECK-NEXT:  srl  %r0, 8
-; CHECK-NEXT:  stc  %r0, 0(%r3)
-; CHECK-NEXT:  br   %r14
+; CHECK-LABEL: @fun(
+; CHECK:      %1 = load i16, i16* %Src
+; CHECK-NEXT: %2 = trunc i16 %1 to i8
+; CHECK-NEXT: %3 = lshr i16 %1, 8
+; CHECK-NEXT: %4 = trunc i16 %3 to i8
+; CHECK-NEXT: %5 = zext i8 %2 to i16
+; CHECK-NEXT: %6 = zext i8 %4 to i16
+; CHECK-NEXT: %7 = shl nuw i16 %6, 8
+; CHECK-NEXT: %8 = or i16 %7, %5
+; CHECK-NEXT: %9 = bitcast i16* %Dst to i8*
+; CHECK-NEXT: %10 = getelementptr i8, i8* %9, i32 1
+; CHECK-NEXT: store i8 %2, i8* %10
+; CHECK-NEXT: %11 = bitcast i16* %Dst to i8*
+; CHECK-NEXT: store i8 %4, i8* %11
   %1 = load i16, i16* %Src
   %2 = trunc i16 %1 to i8
   %3 = lshr i16 %1, 8

From 11de0e71b0da7c822f7e7636ebe46f54ebc856db Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Tue, 4 Jun 2019 08:53:53 +0000
Subject: [PATCH 0990/1176] [DAGCombine] Match a pattern where a wide type
 scalar value is stored by several narrow stores

This opportunity is found from spec 2017 557.xz_r. And it is used by the sha encrypt/decrypt. See sha-2/sha512.c

static void store64(u64 x, unsigned char* y)
{
    for(int i = 0; i != 8; ++i)
        y[i] = (x >> ((7-i) * 8)) & 255;
}

static u64 load64(const unsigned char* y)
{
    u64 res = 0;
    for(int i = 0; i != 8; ++i)
        res |= (u64)(y[i]) << ((7-i) * 8);
    return res;
}
The load64 has been implemented by https://reviews.llvm.org/D26149
This patch is trying to implement the store pattern.

Match a pattern where a wide type scalar value is stored by several narrow
stores. Fold it into a single store or a BSWAP and a store if the targets
supports it.

Assuming little endian target:
i8 *p = ...
i32 val = ...
p[0] = (val >> 0) & 0xFF;
p[1] = (val >> 8) & 0xFF;
p[2] = (val >> 16) & 0xFF;
p[3] = (val >> 24) & 0xFF;

>
*((i32)p) = val;

i8 *p = ...
i32 val = ...
p[0] = (val >> 24) & 0xFF;
p[1] = (val >> 16) & 0xFF;
p[2] = (val >> 8) & 0xFF;
p[3] = (val >> 0) & 0xFF;

>
*((i32)p) = BSWAP(val);

Differential Revision: https://reviews.llvm.org/D61843

llvm-svn: 362472
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 179 ++++++++++
 llvm/test/CodeGen/PowerPC/store-combine.ll    | 315 +++---------------
 2 files changed, 228 insertions(+), 266 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 98e7f4055e9c8..8eeb4a70a53be 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -524,6 +524,7 @@ namespace {
                               const SDLoc &DL);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     SDValue MatchLoadCombine(SDNode *N);
+    SDValue MatchStoreCombine(StoreSDNode *N);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue splitMergedValStore(StoreSDNode *ST);
@@ -6262,6 +6263,180 @@ static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
   return BigEndian;
 }
 
+static SDValue stripTruncAndExt(SDValue Value) {
+  switch (Value.getOpcode()) {
+  case ISD::TRUNCATE:
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::ANY_EXTEND:
+    return stripTruncAndExt(Value.getOperand(0));
+  }
+  return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+///  i8 *p = ...
+///  i32 val = ...
+///  p[0] = (val >> 0) & 0xFF;
+///  p[1] = (val >> 8) & 0xFF;
+///  p[2] = (val >> 16) & 0xFF;
+///  p[3] = (val >> 24) & 0xFF;
+/// =>
+///  *((i32)p) = val;
+///
+///  i8 *p = ...
+///  i32 val = ...
+///  p[0] = (val >> 24) & 0xFF;
+///  p[1] = (val >> 16) & 0xFF;
+///  p[2] = (val >> 8) & 0xFF;
+///  p[3] = (val >> 0) & 0xFF;
+/// =>
+///  *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+  // Collect all the stores in the chain.
+  SDValue Chain;
+  SmallVector<StoreSDNode *, 8> Stores;
+  for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+    if (Store->getMemoryVT() != MVT::i8 ||
+        Store->isVolatile() || Store->isIndexed())
+      return SDValue();
+    Stores.push_back(Store);
+    Chain = Store->getChain();
+  }
+  // Handle the simple type only.
+  unsigned Width = Stores.size();
+  EVT VT = EVT::getIntegerVT(
+    *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+    return SDValue();
+
+  // Check if all the bytes of the combined value we are looking at are stored 
+  // to the same base address. Collect bytes offsets from Base address into 
+  // ByteOffsets. 
+  SDValue CombinedValue;
+  SmallVector<int64_t, 4> ByteOffsets(Width);
+  int64_t FirstOffset = INT64_MAX;
+  StoreSDNode *FirstStore = nullptr;
+  Optional<BaseIndexOffset> Base;
+  for (auto Store : Stores) {
+    // All the stores store different byte of the CombinedValue. A truncate is
+    // required to get that byte value.
+    SDValue Trunc = Store->getValue();
+    if (Trunc.getOpcode() != ISD::TRUNCATE)
+      return SDValue();
+    // A shift operation is required to get the right byte offset, except the
+    // first byte.
+    int64_t Offset = 0;
+    SDValue Value = Trunc.getOperand(0);
+    if (Value.getOpcode() == ISD::SRL ||
+        Value.getOpcode() == ISD::SRA) {
+      ConstantSDNode *ShiftOffset =
+        dyn_cast<ConstantSDNode>(Value.getOperand(1));
+      // Trying to match the following pattern. The shift offset must be 
+      // a constant and a multiple of 8. It is the byte offset in "y".
+      // 
+      // x = srl y, offset
+      // i8 z = trunc x 
+      // store z, ...
+      if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+        return SDValue();
+  
+     Offset = ShiftOffset->getSExtValue()/8;
+     Value = Value.getOperand(0);
+    }
+
+    // Stores must share the same combined value with different offsets.
+    if (!CombinedValue)
+      CombinedValue = Value;
+    else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+      return SDValue();
+
+    // The trunc and all the extend operation should be stripped to get the
+    // real value we are stored.
+    else if (CombinedValue.getValueType() != VT) {
+      if (Value.getValueType() == VT ||
+          Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+        CombinedValue = Value;
+      // Give up if the combined value type is smaller than the store size.
+      if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+        return SDValue();
+    }
+
+    // Stores must share the same base address
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+    int64_t ByteOffsetFromBase = 0;
+    if (!Base)
+      Base = Ptr;
+    else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+      return SDValue();
+
+    // Remember the first byte store
+    if (ByteOffsetFromBase < FirstOffset) {
+      FirstStore = Store;
+      FirstOffset = ByteOffsetFromBase;
+    }
+    // Map the offset in the store and the offset in the combined value.
+    if (Offset < 0 || Offset >= Width)
+      return SDValue();
+    ByteOffsets[Offset] = ByteOffsetFromBase;
+  }
+
+  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+  assert(FirstStore && "First store must be set");
+
+  // Check if the bytes of the combined value we are looking at match with 
+  // either big or little endian value store.
+  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+  if (!IsBigEndian.hasValue())
+    return SDValue();
+
+  // The node we are looking at matches with the pattern, check if we can
+  // replace it with a single bswap if needed and store.
+
+  // If the store needs byte swap check if the target supports it
+  bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+  // Before legalize we can introduce illegal bswaps which will be later
+  // converted to an explicit bswap sequence. This way we end up with a single
+  // store and byte shuffling instead of several stores and byte shuffling.
+  if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Check that a store of the wide type is both allowed and fast on the target
+  bool Fast = false;
+  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+                                        VT, FirstStore->getAddressSpace(),
+                                        FirstStore->getAlignment(), &Fast);
+  if (!Allowed || !Fast)
+    return SDValue();
+
+  if (VT != CombinedValue.getValueType()) {
+    assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+           "Get unexpected store value to combine");
+    CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+                             CombinedValue);
+  }
+
+  if (NeedsBswap)
+    CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+  SDValue NewStore =
+    DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
+                 FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+  // Rely on other DAG combine rules to remove the other individual stores.
+  DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+  return NewStore;
+}
+
 /// Match a pattern where a wide type scalar value is loaded by several narrow
 /// loads and combined by shifts and ors. Fold it into a single load or a load
 /// and a BSWAP if the targets supports it.
@@ -15762,6 +15937,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (SDValue NewST = TransformFPLoadStorePair(N))
     return NewST;
 
+  // Try transforming several stores into STORE (BSWAP).
+  if (SDValue Store = MatchStoreCombine(ST))
+    return Store;
+
   if (ST->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes, on this store and any
     // adjacent stores.
diff --git a/llvm/test/CodeGen/PowerPC/store-combine.ll b/llvm/test/CodeGen/PowerPC/store-combine.ll
index 77e02e76cde0a..9315df130800c 100644
--- a/llvm/test/CodeGen/PowerPC/store-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/store-combine.ll
@@ -10,24 +10,12 @@
 define void @store_i32_by_i8(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
+; CHECK-PPC64LE-NEXT:    stw 3, 0(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    stb 3, 3(4)
+; CHECK-PPC64-NEXT:    stwbrx 3, 0, 4 
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i32 %m to i8
@@ -55,24 +43,12 @@ entry:
 define void @store_i32_by_i8_bswap(i32 signext %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 0(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
+; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, 4
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64-NEXT:    srwi 6, 3, 16
-; CHECK-PPC64-NEXT:    stb 5, 0(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 6, 1(4)
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    stb 3, 3(4)
+; CHECK-PPC64-NEXT:    stw 3, 0(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 24
@@ -104,40 +80,12 @@ entry:
 define void @store_i64_by_i8(i64 %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
-; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
+; CHECK-PPC64LE-NEXT:    stdx 3, 0, 4
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64-NEXT:    stb 6, 2(4)
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
-; CHECK-PPC64-NEXT:    stb 5, 3(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64-NEXT:    stb 6, 4(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    stb 5, 5(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64-NEXT:    stb 5, 6(4)
-; CHECK-PPC64-NEXT:    stb 3, 7(4)
+; CHECK-PPC64-NEXT:    stdbrx 3, 0, 4
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i64 %m to i8
@@ -185,40 +133,12 @@ entry:
 define void @store_i64_by_i8_bswap(i64 %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
-; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
-; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
-; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    stdbrx 3, 0, 4
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
-; CHECK-PPC64-NEXT:    stb 5, 6(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64-NEXT:    stb 6, 5(4)
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
-; CHECK-PPC64-NEXT:    stb 5, 4(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64-NEXT:    stb 6, 3(4)
-; CHECK-PPC64-NEXT:    stb 3, 7(4)
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    stdx 3, 0, 4
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i64 %m to i8
@@ -267,46 +187,18 @@ entry:
 define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap_uses:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    slwi 5, 3, 3
-; CHECK-PPC64LE-NEXT:    subf 3, 3, 5
-; CHECK-PPC64LE-NEXT:    extsw 3, 3
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
-; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
-; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
-; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    slwi [[REG:[0-9]+]], 3, 3
+; CHECK-PPC64LE-NEXT:    subf [[REG1:[0-9]+]], 3, [[REG]] 
+; CHECK-PPC64LE-NEXT:    extsw [[REG2:[0-9]+]], [[REG1]]
+; CHECK-PPC64LE-NEXT:    stdbrx [[REG2]], 0, 4
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap_uses:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    slwi 5, 3, 3
-; CHECK-PPC64-NEXT:    subf 3, 3, 5
-; CHECK-PPC64-NEXT:    extsw 3, 3
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
-; CHECK-PPC64-NEXT:    stb 5, 6(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
-; CHECK-PPC64-NEXT:    stb 6, 5(4)
-; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
-; CHECK-PPC64-NEXT:    stb 5, 4(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
-; CHECK-PPC64-NEXT:    stb 6, 3(4)
-; CHECK-PPC64-NEXT:    stb 3, 7(4)
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
-; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    slwi [[REG:[0-9]+]], 3, 3
+; CHECK-PPC64-NEXT:    subf [[REG1:[0-9]+]], 3, [[REG]]
+; CHECK-PPC64-NEXT:    extsw [[REG2:[0-9]+]], [[REG1]]
+; CHECK-PPC64-NEXT:    stdx [[REG2]], 0, 4
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %mul = mul nsw i32 %t, 7
@@ -356,25 +248,11 @@ entry:
 define void @store_i32_by_i8_bswap_volatile(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_volatile:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    blr
+; CHECK-PPC64LE-NOT:   stwbrx 
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_volatile:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, 3(4)
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    blr
+; CHECK-PPC64-NOT:   stw 
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -403,29 +281,11 @@ entry:
 define void @store_i32_by_i8_bswap_store_in_between(i32 signext %m, i8* %p, i8* %q) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_store_in_between:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 6, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
-; CHECK-PPC64LE-NEXT:    stb 6, 2(4)
-; CHECK-PPC64LE-NEXT:    li 6, 3
-; CHECK-PPC64LE-NEXT:    stb 6, 0(5)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    blr
+; CHECK-PPC64LE-NOT:   stwbrx 
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_store_in_between:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    li 6, 3
-; CHECK-PPC64-NEXT:    srwi 7, 3, 8
-; CHECK-PPC64-NEXT:    stb 7, 2(4)
-; CHECK-PPC64-NEXT:    stb 3, 3(4)
-; CHECK-PPC64-NEXT:    stb 6, 0(5)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    blr
+; CHECK-PPC64-NOT:   stw 
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -448,25 +308,11 @@ entry:
 define void @store_i32_by_i8_bswap_unrelated_store(i32 signext %m, i8* %p, i8* %q) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_unrelated_store:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 6, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
-; CHECK-PPC64LE-NEXT:    stb 6, 2(5)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    blr
+; CHECK-PPC64LE-NOT:   stwbrx 
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_unrelated_store:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 6, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, 3(4)
-; CHECK-PPC64-NEXT:    stb 6, 2(5)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    blr
+; CHECK-PPC64-NOT:   stw 
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -493,24 +339,13 @@ entry:
 define void @store_i32_by_i8_bswap_nonzero_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_nonzero_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 4(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
+; CHECK-PPC64LE-NEXT:    addi [[REG1:[0-9]+]], 4, 1
+; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG1]] 
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_nonzero_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, 4(4)
-; CHECK-PPC64-NEXT:    stb 5, 3(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, 2(4)
-; CHECK-PPC64-NEXT:    stb 3, 1(4)
+; CHECK-PPC64-NEXT:    stw 3, 1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 8
@@ -539,24 +374,13 @@ entry:
 define void @store_i32_by_i8_neg_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_neg_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, -3(4)
-; CHECK-PPC64LE-NEXT:    stb 3, -4(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
+; CHECK-PPC64LE-NEXT:    stw 3, -4(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_neg_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, -4(4)
-; CHECK-PPC64-NEXT:    stb 5, -3(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 3, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, -2(4)
-; CHECK-PPC64-NEXT:    stb 3, -1(4)
+; CHECK-PPC64-NEXT:    addi [[REG1:[0-9]+]], 4, -4
+; CHECK-PPC64-NEXT:    stwbrx 3, 0, [[REG1]] 
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 8
@@ -585,24 +409,13 @@ entry:
 define void @store_i32_by_i8_bswap_neg_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_neg_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    stb 5, -3(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, -4(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
+; CHECK-PPC64LE-NEXT:    addi [[REG1:[0-9]+]], 4, -4
+; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG1]] 
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_neg_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    srwi 6, 3, 24
-; CHECK-PPC64-NEXT:    stb 5, -3(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 6, -4(4)
-; CHECK-PPC64-NEXT:    stb 5, -2(4)
-; CHECK-PPC64-NEXT:    stb 3, -1(4)
+; CHECK-PPC64-NEXT:    stw 3, -4(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 16
@@ -631,28 +444,17 @@ entry:
 define void @store_i32_by_i8_bswap_base_index_offset(i32 %m, i32 %i, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_base_index_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    extsw 4, 4
-; CHECK-PPC64LE-NEXT:    srwi 6, 3, 16
-; CHECK-PPC64LE-NEXT:    add 4, 5, 4
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 6, -3(4)
-; CHECK-PPC64LE-NEXT:    stb 5, -4(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
-; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
+; CHECK-PPC64LE-NEXT:    extsw [[REG1:[0-9]+]], 4
+; CHECK-PPC64LE-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
+; CHECK-PPC64LE-NEXT:    addi [[REG3:[0-9]+]], [[REG2]], -4
+; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG3]] 
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_base_index_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    extsw 4, 4
-; CHECK-PPC64-NEXT:    srwi 6, 3, 16
-; CHECK-PPC64-NEXT:    add 4, 5, 4
-; CHECK-PPC64-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64-NEXT:    stb 6, -3(4)
-; CHECK-PPC64-NEXT:    srwi 6, 3, 8
-; CHECK-PPC64-NEXT:    stb 5, -4(4)
-; CHECK-PPC64-NEXT:    stb 6, -2(4)
-; CHECK-PPC64-NEXT:    stb 3, -1(4)
+; CHECK-PPC64-NEXT:    extsw [[REG1:[0-9]+]], 4
+; CHECK-PPC64-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
+; CHECK-PPC64-NEXT:    stw 3, -4([[REG2]])
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 16
@@ -694,28 +496,17 @@ entry:
 define void @store_i32_by_i8_bswap_complicated(i32 %m, i32 %i, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_complicated:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    extsw 4, 4
-; CHECK-PPC64LE-NEXT:    add 4, 5, 4
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
-; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 6(4)
+; CHECK-PPC64LE-NEXT:    extsw [[REG1:[0-9]+]], 4
+; CHECK-PPC64LE-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
+; CHECK-PPC64LE-NEXT:    addi [[REG3:[0-9]+]], [[REG2]], 3 
+; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG3]] 
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_complicated:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    extsw 4, 4
-; CHECK-PPC64-NEXT:    srwi 6, 3, 24
-; CHECK-PPC64-NEXT:    add 4, 5, 4
-; CHECK-PPC64-NEXT:    srwi 5, 3, 16
-; CHECK-PPC64-NEXT:    stb 6, 3(4)
-; CHECK-PPC64-NEXT:    stb 5, 4(4)
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 5, 5(4)
-; CHECK-PPC64-NEXT:    stb 3, 6(4)
+; CHECK-PPC64-NEXT:    extsw [[REG1:[0-9]+]], 4
+; CHECK-PPC64-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
+; CHECK-PPC64-NEXT:    stw 3, 3([[REG2]])
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %idx.ext = sext i32 %i to i64
@@ -745,16 +536,12 @@ entry:
 define void @store_i16_by_i8_bswap(i16 %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i16_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 5, 0(4)
-; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
+; CHECK-PPC64LE-NEXT:    sthbrx 3, 0, 4
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i16_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 5, 0(4)
-; CHECK-PPC64-NEXT:    stb 3, 1(4)
+; CHECK-PPC64-NEXT:    sth 3, 0(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i16 %m, 8
@@ -771,16 +558,12 @@ entry:
 define void @store_16_by_i8(i16 %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_16_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
-; CHECK-PPC64LE-NEXT:    srwi 3, 3, 8
-; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
+; CHECK-PPC64LE-NEXT:    sth 3, 0(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_16_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    srwi 5, 3, 8
-; CHECK-PPC64-NEXT:    stb 3, 0(4)
-; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    sthbrx 3, 0, 4
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv1 = trunc i16 %m to i8

From 4f9e68148bd0dada2d6997625432385918ac2e2c Mon Sep 17 00:00:00 2001
From: Yevgeny Rouban <yevgeny.rouban@azul.com>
Date: Tue, 4 Jun 2019 09:03:39 +0000
Subject: [PATCH 0991/1176] Make SwitchInstProfUpdateWrapper safer

While prof branch_weights inconsistencies are being fixed patch
by patch (pass by pass) we need SwitchInstProfUpdateWrapper to
be safe with respect to inconsistent metadata that can come from
passes that have not been fixed yet. See the bug found by @nikic
in https://reviews.llvm.org/D62126.

This patch introduces one more state (called Invalid) to the
wrapper class that allows users to work with the underlying
SwitchInst ignoring the prof metadata changes.

Created a unit test for the SwitchInstProfUpdateWrapper class.

Reviewers: davidx, nikic, eraman, reames, chandlerc
Reviewed By: davidx
Differential Revision: https://reviews.llvm.org/D62656

llvm-svn: 362473
---
 llvm/include/llvm/IR/Instructions.h    | 20 +++++--
 llvm/lib/IR/Instructions.cpp           | 57 +++++++++++++------
 llvm/unittests/IR/InstructionsTest.cpp | 79 ++++++++++++++++++++++++++
 3 files changed, 132 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 82833658c4182..2e35f5a7fff8c 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -3439,15 +3439,24 @@ class SwitchInst : public Instruction {
 /// their prof branch_weights metadata.
 class SwitchInstProfUpdateWrapper {
   SwitchInst &SI;
-  Optional<SmallVector<uint32_t, 8> > Weights;
-  bool Changed = false;
+  Optional<SmallVector<uint32_t, 8> > Weights = None;
+
+  // Sticky invalid state is needed to safely ignore operations with prof data
+  // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst
+  // with inconsistent prof data. TODO: once we fix all prof data
+  // inconsistencies we can turn invalid state to assertions.
+  enum {
+    Invalid,
+    Initialized,
+    Changed
+  } State = Invalid;
 
 protected:
   static MDNode *getProfBranchWeightsMD(const SwitchInst &SI);
 
   MDNode *buildProfBranchWeightsMD();
 
-  Optional<SmallVector<uint32_t, 8> > getProfBranchWeights();
+  void init();
 
 public:
   using CaseWeightOpt = Optional<uint32_t>;
@@ -3455,11 +3464,10 @@ class SwitchInstProfUpdateWrapper {
   SwitchInst &operator*() { return SI; }
   operator SwitchInst *() { return &SI; }
 
-  SwitchInstProfUpdateWrapper(SwitchInst &SI)
-      : SI(SI), Weights(getProfBranchWeights()) {}
+  SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); }
 
   ~SwitchInstProfUpdateWrapper() {
-    if (Changed)
+    if (State == Changed)
       SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD());
   }
 
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 8812df35e26b2..ad082a9c24f30 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -45,6 +45,12 @@
 
 using namespace llvm;
 
+static cl::opt<bool> SwitchInstProfUpdateWrapperStrict(
+    "switch-inst-prof-update-wrapper-strict", cl::Hidden,
+    cl::desc("Assert that prof branch_weights metadata is valid when creating "
+             "an instance of SwitchInstProfUpdateWrapper"),
+    cl::init(false));
+
 //===----------------------------------------------------------------------===//
 //                            AllocaInst Class
 //===----------------------------------------------------------------------===//
@@ -3880,7 +3886,7 @@ SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) {
 }
 
 MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
-  assert(Changed && "called only if metadata has changed");
+  assert(State == Changed && "called only if metadata has changed");
 
   if (!Weights)
     return nullptr;
@@ -3897,11 +3903,20 @@ MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
   return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights);
 }
 
-Optional<SmallVector<uint32_t, 8> >
-SwitchInstProfUpdateWrapper::getProfBranchWeights() {
+void SwitchInstProfUpdateWrapper::init() {
   MDNode *ProfileData = getProfBranchWeightsMD(SI);
-  if (!ProfileData)
-    return None;
+  if (!ProfileData) {
+    State = Initialized;
+    return;
+  }
+
+  if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) {
+    State = Invalid;
+    if (SwitchInstProfUpdateWrapperStrict)
+      assert(!"number of prof branch_weights metadata operands corresponds to"
+              " number of succesors");
+    return;
+  }
 
   SmallVector<uint32_t, 8> Weights;
   for (unsigned CI = 1, CE = SI.getNumSuccessors(); CI <= CE; ++CI) {
@@ -3909,7 +3924,8 @@ SwitchInstProfUpdateWrapper::getProfBranchWeights() {
     uint32_t CW = C->getValue().getZExtValue();
     Weights.push_back(CW);
   }
-  return Weights;
+  State = Initialized;
+  this->Weights = std::move(Weights);
 }
 
 SwitchInst::CaseIt
@@ -3917,7 +3933,7 @@ SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
   if (Weights) {
     assert(SI.getNumSuccessors() == Weights->size() &&
            "num of prof branch_weights must accord with num of successors");
-    Changed = true;
+    State = Changed;
     // Copy the last case to the place of the removed one and shrink.
     // This is tightly coupled with the way SwitchInst::removeCase() removes
     // the cases in SwitchInst::removeCase(CaseIt).
@@ -3932,12 +3948,15 @@ void SwitchInstProfUpdateWrapper::addCase(
     SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
   SI.addCase(OnVal, Dest);
 
+  if (State == Invalid)
+    return;
+
   if (!Weights && W && *W) {
-    Changed = true;
+    State = Changed;
     Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
     Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
   } else if (Weights) {
-    Changed = true;
+    State = Changed;
     Weights.getValue().push_back(W ? *W : 0);
   }
   if (Weights)
@@ -3948,10 +3967,11 @@ void SwitchInstProfUpdateWrapper::addCase(
 SymbolTableList<Instruction>::iterator
 SwitchInstProfUpdateWrapper::eraseFromParent() {
   // Instruction is erased. Mark as unchanged to not touch it in the destructor.
-  Changed = false;
-
-  if (Weights)
-    Weights->resize(0);
+  if (State != Invalid) {
+    State = Initialized;
+    if (Weights)
+      Weights->resize(0);
+  }
   return SI.eraseFromParent();
 }
 
@@ -3964,7 +3984,7 @@ SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
 
 void SwitchInstProfUpdateWrapper::setSuccessorWeight(
     unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
-  if (!W)
+  if (!W || State == Invalid)
     return;
 
   if (!Weights && *W)
@@ -3973,7 +3993,7 @@ void SwitchInstProfUpdateWrapper::setSuccessorWeight(
   if (Weights) {
     auto &OldW = Weights.getValue()[idx];
     if (*W != OldW) {
-      Changed = true;
+      State = Changed;
       OldW = *W;
     }
   }
@@ -3983,9 +4003,10 @@ SwitchInstProfUpdateWrapper::CaseWeightOpt
 SwitchInstProfUpdateWrapper::getSuccessorWeight(const SwitchInst &SI,
                                                 unsigned idx) {
   if (MDNode *ProfileData = getProfBranchWeightsMD(SI))
-    return mdconst::extract<ConstantInt>(ProfileData->getOperand(idx + 1))
-        ->getValue()
-        .getZExtValue();
+    if (ProfileData->getNumOperands() == SI.getNumSuccessors() + 1)
+      return mdconst::extract<ConstantInt>(ProfileData->getOperand(idx + 1))
+          ->getValue()
+          .getZExtValue();
 
   return None;
 }
diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp
index 3b2bd6fa81b0f..70d51e5fc6d10 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -753,6 +753,85 @@ TEST(InstructionsTest, SwitchInst) {
   EXPECT_EQ(BB1.get(), Handle.getCaseSuccessor());
 }
 
+TEST(InstructionsTest, SwitchInstProfUpdateWrapper) {
+  LLVMContext C;
+
+  std::unique_ptr<BasicBlock> BB1, BB2, BB3;
+  BB1.reset(BasicBlock::Create(C));
+  BB2.reset(BasicBlock::Create(C));
+  BB3.reset(BasicBlock::Create(C));
+
+  // We create block 0 after the others so that it gets destroyed first and
+  // clears the uses of the other basic blocks.
+  std::unique_ptr<BasicBlock> BB0(BasicBlock::Create(C));
+
+  auto *Int32Ty = Type::getInt32Ty(C);
+
+  SwitchInst *SI =
+      SwitchInst::Create(UndefValue::get(Int32Ty), BB0.get(), 4, BB0.get());
+  SI->addCase(ConstantInt::get(Int32Ty, 1), BB1.get());
+  SI->addCase(ConstantInt::get(Int32Ty, 2), BB2.get());
+  SI->setMetadata(LLVMContext::MD_prof,
+                  MDBuilder(C).createBranchWeights({ 9, 1, 22 }));
+
+  {
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    EXPECT_EQ(*SIW.getSuccessorWeight(0), 9u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(1), 1u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(2), 22u);
+    SIW.setSuccessorWeight(0, 99u);
+    SIW.setSuccessorWeight(1, 11u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(0), 99u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(1), 11u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(2), 22u);
+  }
+
+  { // Create another wrapper and check that the data persist.
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    EXPECT_EQ(*SIW.getSuccessorWeight(0), 99u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(1), 11u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(2), 22u);
+  }
+
+  // Make prof data invalid by adding one extra weight.
+  SI->setMetadata(LLVMContext::MD_prof, MDBuilder(C).createBranchWeights(
+                                            { 99, 11, 22, 33 })); // extra
+  { // Invalid prof data makes wrapper act as if there were no prof data.
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    ASSERT_FALSE(SIW.getSuccessorWeight(0).hasValue());
+    ASSERT_FALSE(SIW.getSuccessorWeight(1).hasValue());
+    ASSERT_FALSE(SIW.getSuccessorWeight(2).hasValue());
+    SIW.addCase(ConstantInt::get(Int32Ty, 3), BB3.get(), 39);
+    ASSERT_FALSE(SIW.getSuccessorWeight(3).hasValue()); // did not add weight 39
+  }
+
+  { // With added 3rd case the prof data become consistent with num of cases.
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    EXPECT_EQ(*SIW.getSuccessorWeight(0), 99u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(1), 11u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(2), 22u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(3), 33u);
+  }
+
+  // Make prof data invalid by removing one extra weight.
+  SI->setMetadata(LLVMContext::MD_prof,
+                  MDBuilder(C).createBranchWeights({ 99, 11, 22 })); // shorter
+  { // Invalid prof data makes wrapper act as if there were no prof data.
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    ASSERT_FALSE(SIW.getSuccessorWeight(0).hasValue());
+    ASSERT_FALSE(SIW.getSuccessorWeight(1).hasValue());
+    ASSERT_FALSE(SIW.getSuccessorWeight(2).hasValue());
+    SIW.removeCase(SwitchInst::CaseIt(SI, 2));
+  }
+
+  { // With removed 3rd case the prof data become consistent with num of cases.
+    SwitchInstProfUpdateWrapper SIW(*SI);
+    EXPECT_EQ(*SIW.getSuccessorWeight(0), 99u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(1), 11u);
+    EXPECT_EQ(*SIW.getSuccessorWeight(2), 22u);
+  }
+}
+
 TEST(InstructionsTest, CommuteShuffleMask) {
   SmallVector<int, 16> Indices({-1, 0, 7});
   ShuffleVectorInst::commuteShuffleMask(Indices, 4);

From 3e39961eec7a665f14219997450c0bbbda471cef Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Tue, 4 Jun 2019 09:04:53 +0000
Subject: [PATCH 0992/1176] [HWASAN][CMake] Allow instrumenting LLVM/clang

Differential revision: https://reviews.llvm.org/D62813

llvm-svn: 362474
---
 llvm/cmake/modules/HandleLLVMOptions.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 80ef69f36e0e8..966a2b9294e6d 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -697,6 +697,9 @@ if(LLVM_USE_SANITIZER)
     if (LLVM_USE_SANITIZER STREQUAL "Address")
       append_common_sanitizer_flags()
       append("-fsanitize=address" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    elseif (LLVM_USE_SANITIZER STREQUAL "HWAddress")
+      append_common_sanitizer_flags()
+      append("-fsanitize=hwaddress" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?")
       append_common_sanitizer_flags()
       append("-fsanitize=memory" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)

From 9d1c5ea165020d3a60f32ab58b33e044450af4cc Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 4 Jun 2019 09:13:08 +0000
Subject: [PATCH 0993/1176] Include what you use in PPCRegisterInfo.h

llvm-svn: 362475
---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 9e11bef87e5e5..1ba1e3db87c01 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -14,13 +14,14 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 #define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
 
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "llvm/ADT/DenseMap.h"
 
 #define GET_REGINFO_HEADER
 #include "PPCGenRegisterInfo.inc"
 
 namespace llvm {
+class PPCTargetMachine;
 
 inline static unsigned getCRFromCRBit(unsigned SrcReg) {
   unsigned Reg = 0;

From 067a17b51dcf00cc738e86e923bdaaa9464516ff Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 4 Jun 2019 09:16:31 +0000
Subject: [PATCH 0994/1176] Include what you use in PPCMachineScheduler.cpp

llvm-svn: 362476
---
 llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index d57e38acef683..a38c8f475066e 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -5,8 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-#include "PPC.h"
+
 #include "PPCMachineScheduler.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+
 using namespace llvm;
 
 static cl::opt<bool> 

From 73a15d4b7822316e82c8bf96702bf592b623cf6a Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 4 Jun 2019 09:16:35 +0000
Subject: [PATCH 0995/1176] Include what you use in PPC.h

llvm-svn: 362477
---
 llvm/lib/Target/PowerPC/PPC.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 1f6f96de81665..c6951ab67b08e 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -15,7 +15,6 @@
 #define LLVM_LIB_TARGET_POWERPC_PPC_H
 
 #include "llvm/Support/CodeGen.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
 
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC

From c3c686f5f8c4368b374e110f722ab44e388b9602 Mon Sep 17 00:00:00 2001
From: Eugene Leviant <eleviant@accesssoftek.com>
Date: Tue, 4 Jun 2019 09:20:02 +0000
Subject: [PATCH 0996/1176] [HWASAN] Make new/delete weak

This allows instrumenting programs which have their own
versions of new and delete operators.

Differential revision: https://reviews.llvm.org/D62794

llvm-svn: 362478
---
 compiler-rt/lib/hwasan/hwasan_new_delete.cpp  | 16 ++++++-------
 .../hwasan/TestCases/override-new-delete.cpp  | 23 +++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)
 create mode 100644 compiler-rt/test/hwasan/TestCases/override-new-delete.cpp

diff --git a/compiler-rt/lib/hwasan/hwasan_new_delete.cpp b/compiler-rt/lib/hwasan/hwasan_new_delete.cpp
index 438a3699a92b3..4a9c79fe41b36 100644
--- a/compiler-rt/lib/hwasan/hwasan_new_delete.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_new_delete.cpp
@@ -35,15 +35,15 @@ namespace std {
   if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
   return res
 
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void *operator new(size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void *operator new[](size_t size) { OPERATOR_NEW_BODY(false /*nothrow*/); }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void *operator new(size_t size, std::nothrow_t const&) {
   OPERATOR_NEW_BODY(true /*nothrow*/);
 }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void *operator new[](size_t size, std::nothrow_t const&) {
   OPERATOR_NEW_BODY(true /*nothrow*/);
 }
@@ -52,13 +52,13 @@ void *operator new[](size_t size, std::nothrow_t const&) {
   GET_MALLOC_STACK_TRACE; \
   if (ptr) hwasan_free(ptr, &stack)
 
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void operator delete(void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY; }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void operator delete[](void *ptr) NOEXCEPT { OPERATOR_DELETE_BODY; }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void operator delete(void *ptr, std::nothrow_t const&) { OPERATOR_DELETE_BODY; }
-INTERCEPTOR_ATTRIBUTE
+INTERCEPTOR_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 void operator delete[](void *ptr, std::nothrow_t const&) {
   OPERATOR_DELETE_BODY;
 }
diff --git a/compiler-rt/test/hwasan/TestCases/override-new-delete.cpp b/compiler-rt/test/hwasan/TestCases/override-new-delete.cpp
new file mode 100644
index 0000000000000..2bb7be83d590b
--- /dev/null
+++ b/compiler-rt/test/hwasan/TestCases/override-new-delete.cpp
@@ -0,0 +1,23 @@
+// RUN: %clangxx_hwasan %s
+#include <stddef.h>
+#include <new>
+
+char *__dummy;
+
+void *operator new(size_t size) { return __dummy; }
+void *operator new[](size_t size) { return __dummy; }
+void *operator new(size_t size, std::nothrow_t const&) noexcept { 
+  return __dummy; 
+}
+void *operator new[](size_t size, std::nothrow_t const&) noexcept { 
+  return __dummy; 
+}
+
+void operator delete(void *ptr) noexcept {}
+void operator delete[](void *ptr) noexcept {}
+void operator delete(void *ptr, std::nothrow_t const&) noexcept {}
+void operator delete[](void *ptr, std::nothrow_t const&) noexcept {}
+
+int main() {
+  return 0;  
+}

From 30977fc3a97b6172431749402ed4ed0b565d902c Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 4 Jun 2019 09:26:08 +0000
Subject: [PATCH 0997/1176] [CodeComplete] Include more text into typed chunks
 of pattern completions

Summary:
To allow filtering on any of the words in the editors.
In particular, the following completions were changed:
    - 'using namespace <#name#>'
      Typed text before: 'using', after: 'using namespace'.
    - 'else if (#<condition#>)'
      Before: 'else', after: 'else if'.
    - 'using typename <#qualifier#>::<#name#>'
      Before: 'using', after: 'using typename'.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62615

llvm-svn: 362479
---
 clang/lib/Sema/SemaCodeComplete.cpp              | 16 ++++------------
 .../test/CodeCompletion/ordinary-name-cxx11.cpp  |  2 +-
 clang/test/CodeCompletion/ordinary-name.cpp      |  2 +-
 clang/test/Index/complete-stmt.c                 |  4 ++--
 4 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index f530601aba17d..4575d4fe4675d 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1919,9 +1919,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
       Results.AddResult(Result(Builder.TakeString()));
 
       // Using directives
-      Builder.AddTypedTextChunk("using");
-      Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
-      Builder.AddTextChunk("namespace");
+      Builder.AddTypedTextChunk("using namespace");
       Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
       Builder.AddPlaceholderChunk("identifier");
       Builder.AddChunk(CodeCompletionString::CK_SemiColon);
@@ -1964,9 +1962,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
 
       // using typename qualifier::name (only in a dependent context)
       if (SemaRef.CurContext->isDependentContext()) {
-        Builder.AddTypedTextChunk("using");
-        Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
-        Builder.AddTextChunk("typename");
+        Builder.AddTypedTextChunk("using typename");
         Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
         Builder.AddPlaceholderChunk("qualifier");
         Builder.AddTextChunk("::");
@@ -2236,9 +2232,7 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, Scope *S,
     Results.AddResult(Result(Builder.TakeString()));
 
     // Using directives
-    Builder.AddTypedTextChunk("using");
-    Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
-    Builder.AddTextChunk("namespace");
+    Builder.AddTypedTextChunk("using namespace");
     Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
     Builder.AddPlaceholderChunk("identifier");
     Builder.AddChunk(CodeCompletionString::CK_SemiColon);
@@ -5201,9 +5195,7 @@ void Sema::CodeCompleteAfterIf(Scope *S) {
   Results.AddResult(Builder.TakeString());
 
   // "else if" block
-  Builder.AddTypedTextChunk("else");
-  Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
-  Builder.AddTextChunk("if");
+  Builder.AddTypedTextChunk("else if");
   Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace);
   Builder.AddChunk(CodeCompletionString::CK_LeftParen);
   if (getLangOpts().CPlusPlus)
diff --git a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
index 7696c7505f20a..e568ee1ee3d8d 100644
--- a/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
+++ b/clang/test/CodeCompletion/ordinary-name-cxx11.cpp
@@ -112,8 +112,8 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof(<#type#>)
   // CHECK-CC2-NEXT: COMPLETION: union
   // CHECK-CC2-NEXT: COMPLETION: unsigned
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : using namespace <#identifier#>;
   // CHECK-CC2-NEXT: COMPLETION: Pattern : using <#qualifier#>::<#name#>;
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : using namespace <#identifier#>;
   // CHECK-CC2-NEXT: COMPLETION: void
   // CHECK-CC2-NEXT: COMPLETION: volatile
   // CHECK-CC2-NEXT: COMPLETION: wchar_t
diff --git a/clang/test/CodeCompletion/ordinary-name.cpp b/clang/test/CodeCompletion/ordinary-name.cpp
index 99cb69093a5ee..3f891307bb74b 100644
--- a/clang/test/CodeCompletion/ordinary-name.cpp
+++ b/clang/test/CodeCompletion/ordinary-name.cpp
@@ -102,8 +102,8 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof(<#type#>)
   // CHECK-CC2-NEXT: COMPLETION: union
   // CHECK-CC2-NEXT: COMPLETION: unsigned
-  // CHECK-CC2-NEXT: COMPLETION: Pattern : using namespace <#identifier#>;
   // CHECK-CC2-NEXT: COMPLETION: Pattern : using <#qualifier#>::<#name#>;
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : using namespace <#identifier#>;
   // CHECK-CC2-NEXT: COMPLETION: void
   // CHECK-CC2-NEXT: COMPLETION: volatile
   // CHECK-CC2-NEXT: COMPLETION: wchar_t
diff --git a/clang/test/Index/complete-stmt.c b/clang/test/Index/complete-stmt.c
index 0deb4d371c2fc..78f49745a869d 100644
--- a/clang/test/Index/complete-stmt.c
+++ b/clang/test/Index/complete-stmt.c
@@ -9,11 +9,11 @@ void f(int x) {
 
 // RUN: env CINDEXTEST_CODE_COMPLETE_PATTERNS=1 c-index-test -code-completion-at=%s:7:4 %s | FileCheck -check-prefix=CHECK-IF-ELSE %s
 // CHECK-IF-ELSE: NotImplemented:{TypedText else}{HorizontalSpace  }{LeftBrace {}{VerticalSpace  }{Placeholder statements}{VerticalSpace  }{RightBrace }} (40)
-// CHECK-IF-ELSE: NotImplemented:{TypedText else}{HorizontalSpace  }{Text if}{HorizontalSpace  }{LeftParen (}{Placeholder expression}{RightParen )}{HorizontalSpace  }{LeftBrace {}{VerticalSpace  }{Placeholder statements}{VerticalSpace  }{RightBrace }} (40)
+// CHECK-IF-ELSE: NotImplemented:{TypedText else if}{HorizontalSpace  }{LeftParen (}{Placeholder expression}{RightParen )}{HorizontalSpace  }{LeftBrace {}{VerticalSpace  }{Placeholder statements}{VerticalSpace  }{RightBrace }} (40)
 
 // RUN: c-index-test -code-completion-at=%s:7:4 %s | FileCheck -check-prefix=CHECK-IF-ELSE-SIMPLE %s
 // CHECK-IF-ELSE-SIMPLE: NotImplemented:{TypedText else} (40)
-// CHECK-IF-ELSE-SIMPLE: NotImplemented:{TypedText else}{HorizontalSpace  }{Text if}{HorizontalSpace  }{LeftParen (}{Placeholder expression}{RightParen )} (40)
+// CHECK-IF-ELSE-SIMPLE: NotImplemented:{TypedText else if}{HorizontalSpace  }{LeftParen (}{Placeholder expression}{RightParen )} (40)
 
 // RUN: c-index-test -code-completion-at=%s:6:1 %s | FileCheck -check-prefix=CHECK-STMT %s
 // CHECK-STMT: NotImplemented:{TypedText _Nonnull} (50)

From 63846039f574a584714ef10777a6ea4c4e5706fd Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 4 Jun 2019 09:31:07 +0000
Subject: [PATCH 0998/1176] Silenced a warning "implicit conversion turns
 string literal into bool" introduced in r362473

llvm-svn: 362480
---
 llvm/lib/IR/Instructions.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index ad082a9c24f30..693918c8c076f 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -3913,8 +3913,9 @@ void SwitchInstProfUpdateWrapper::init() {
   if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) {
     State = Invalid;
     if (SwitchInstProfUpdateWrapperStrict)
-      assert(!"number of prof branch_weights metadata operands corresponds to"
-              " number of succesors");
+      assert(false &&
+             "number of prof branch_weights metadata operands corresponds to"
+             " number of succesors");
     return;
   }
 

From 4ef0f82b71dedeb11d2ff40c6a68ac2737f07f59 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 4 Jun 2019 09:36:59 +0000
Subject: [PATCH 0999/1176] [clangd] Support offsets for parameters in
 signatureHelp

Summary: Added to LSP in version 3.14

Reviewers: hokein

Reviewed By: hokein

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62476

llvm-svn: 362481
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  |  18 ++-
 clang-tools-extra/clangd/ClangdLSPServer.h    |   5 +-
 clang-tools-extra/clangd/CodeComplete.cpp     | 115 ++++++++---------
 clang-tools-extra/clangd/Protocol.cpp         |  18 ++-
 clang-tools-extra/clangd/Protocol.h           |  13 +-
 .../test/signature-help-with-offsets.test     |  50 ++++++++
 .../clangd/unittests/CodeCompleteTests.cpp    | 117 ++++++++++--------
 7 files changed, 213 insertions(+), 123 deletions(-)
 create mode 100644 clang-tools-extra/clangd/test/signature-help-with-offsets.test

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 53326296a6810..6a4d2f3dbfca2 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -360,6 +360,7 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
       Params.capabilities.HierarchicalDocumentSymbol;
   SupportFileStatus = Params.initializationOptions.FileStatus;
   HoverContentFormat = Params.capabilities.HoverContentFormat;
+  SupportsOffsetsInSignatureHelp = Params.capabilities.OffsetsInSignatureHelp;
   llvm::json::Object Result{
       {{"capabilities",
         llvm::json::Object{
@@ -761,7 +762,22 @@ void ClangdLSPServer::onCompletion(const CompletionParams &Params,
 void ClangdLSPServer::onSignatureHelp(const TextDocumentPositionParams &Params,
                                       Callback<SignatureHelp> Reply) {
   Server->signatureHelp(Params.textDocument.uri.file(), Params.position,
-                        std::move(Reply));
+                        Bind(
+                            [this](decltype(Reply) Reply,
+                                   llvm::Expected<SignatureHelp> Signature) {
+                              if (!Signature)
+                                return Reply(Signature.takeError());
+                              if (SupportsOffsetsInSignatureHelp)
+                                return Reply(std::move(*Signature));
+                              // Strip out the offsets from signature help for
+                              // clients that only support string labels.
+                              for (auto &Signature : Signature->signatures) {
+                                for (auto &Param : Signature.parameters)
+                                  Param.labelOffsets.reset();
+                              }
+                              return Reply(std::move(*Signature));
+                            },
+                            std::move(Reply)));
 }
 
 // Go to definition has a toggle function: if def and decl are distinct, then
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
index 238e9affa134a..2b2d9c0b0080d 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -156,8 +156,9 @@ class ClangdLSPServer : private DiagnosticsConsumer {
   bool SupportFileStatus = false;
   /// Which kind of markup should we use in textDocument/hover responses.
   MarkupKind HoverContentFormat = MarkupKind::PlainText;
-
-  /// Store of the current versions of the open documents.
+  /// Whether the client supports offsets for parameter info labels.
+  bool SupportsOffsetsInSignatureHelp = false;
+  // Store of the current versions of the open documents.
   DraftStore DraftMgr;
 
   // The CDB is created by the "initialize" LSP method.
diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index 2186bdee44865..32fdccc410092 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -28,6 +28,7 @@
 #include "FuzzyMatch.h"
 #include "Headers.h"
 #include "Logger.h"
+#include "Protocol.h"
 #include "Quality.h"
 #include "SourceCode.h"
 #include "TUScheduler.h"
@@ -56,6 +57,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
@@ -148,46 +150,6 @@ toCompletionItemKind(CodeCompletionResult::ResultKind ResKind,
   llvm_unreachable("Unhandled CodeCompletionResult::ResultKind.");
 }
 
-/// Get the optional chunk as a string. This function is possibly recursive.
-///
-/// The parameter info for each parameter is appended to the Parameters.
-std::string getOptionalParameters(const CodeCompletionString &CCS,
-                                  std::vector<ParameterInformation> &Parameters,
-                                  SignatureQualitySignals &Signal) {
-  std::string Result;
-  for (const auto &Chunk : CCS) {
-    switch (Chunk.Kind) {
-    case CodeCompletionString::CK_Optional:
-      assert(Chunk.Optional &&
-             "Expected the optional code completion string to be non-null.");
-      Result += getOptionalParameters(*Chunk.Optional, Parameters, Signal);
-      break;
-    case CodeCompletionString::CK_VerticalSpace:
-      break;
-    case CodeCompletionString::CK_Placeholder:
-      // A string that acts as a placeholder for, e.g., a function call
-      // argument.
-      // Intentional fallthrough here.
-    case CodeCompletionString::CK_CurrentParameter: {
-      // A piece of text that describes the parameter that corresponds to
-      // the code-completion location within a function call, message send,
-      // macro invocation, etc.
-      Result += Chunk.Text;
-      ParameterInformation Info;
-      Info.label = Chunk.Text;
-      Parameters.push_back(std::move(Info));
-      Signal.ContainsActiveParameter = true;
-      Signal.NumberOfOptionalParameters++;
-      break;
-    }
-    default:
-      Result += Chunk.Text;
-      break;
-    }
-  }
-  return Result;
-}
-
 // Identifier code completion result.
 struct RawIdentifier {
   llvm::StringRef Name;
@@ -830,8 +792,7 @@ class SignatureHelpCollector final : public CodeCompleteConsumer {
 public:
   SignatureHelpCollector(const clang::CodeCompleteOptions &CodeCompleteOpts,
                          const SymbolIndex *Index, SignatureHelp &SigHelp)
-      : CodeCompleteConsumer(CodeCompleteOpts),
-        SigHelp(SigHelp),
+      : CodeCompleteConsumer(CodeCompleteOpts), SigHelp(SigHelp),
         Allocator(std::make_shared<clang::GlobalCodeCompletionAllocator>()),
         CCTUInfo(Allocator), Index(Index) {}
 
@@ -944,6 +905,50 @@ class SignatureHelpCollector final : public CodeCompleteConsumer {
   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
 
 private:
+  void processParameterChunk(llvm::StringRef ChunkText,
+                             SignatureInformation &Signature,
+                             SignatureQualitySignals Signal) const {
+    // (!) this is O(n), should still be fast compared to building ASTs.
+    unsigned ParamStartOffset = lspLength(Signature.label);
+    unsigned ParamEndOffset = ParamStartOffset + lspLength(ChunkText);
+    // A piece of text that describes the parameter that corresponds to
+    // the code-completion location within a function call, message send,
+    // macro invocation, etc.
+    Signature.label += ChunkText;
+    ParameterInformation Info;
+    Info.labelOffsets.emplace(ParamStartOffset, ParamEndOffset);
+    // FIXME: only set 'labelOffsets' when all clients migrate out of it.
+    Info.labelString = ChunkText;
+
+    Signature.parameters.push_back(std::move(Info));
+    // FIXME: this should only be set on CK_CurrentParameter.
+    Signal.ContainsActiveParameter = true;
+  }
+
+  void processOptionalChunk(const CodeCompletionString &CCS,
+                            SignatureInformation &Signature,
+                            SignatureQualitySignals &Signal) const {
+    for (const auto &Chunk : CCS) {
+      switch (Chunk.Kind) {
+      case CodeCompletionString::CK_Optional:
+        assert(Chunk.Optional &&
+               "Expected the optional code completion string to be non-null.");
+        processOptionalChunk(*Chunk.Optional, Signature, Signal);
+        break;
+      case CodeCompletionString::CK_VerticalSpace:
+        break;
+      case CodeCompletionString::CK_CurrentParameter:
+      case CodeCompletionString::CK_Placeholder:
+        processParameterChunk(Chunk.Text, Signature, Signal);
+        Signal.NumberOfOptionalParameters++;
+        break;
+      default:
+        Signature.label += Chunk.Text;
+        break;
+      }
+    }
+  }
+
   // FIXME(ioeric): consider moving CodeCompletionString logic here to
   // CompletionString.h.
   ScoredSignature processOverloadCandidate(const OverloadCandidate &Candidate,
@@ -964,28 +969,16 @@ class SignatureHelpCollector final : public CodeCompleteConsumer {
         assert(!ReturnType && "Unexpected CK_ResultType");
         ReturnType = Chunk.Text;
         break;
+      case CodeCompletionString::CK_CurrentParameter:
       case CodeCompletionString::CK_Placeholder:
-        // A string that acts as a placeholder for, e.g., a function call
-        // argument.
-        // Intentional fallthrough here.
-      case CodeCompletionString::CK_CurrentParameter: {
-        // A piece of text that describes the parameter that corresponds to
-        // the code-completion location within a function call, message send,
-        // macro invocation, etc.
-        Signature.label += Chunk.Text;
-        ParameterInformation Info;
-        Info.label = Chunk.Text;
-        Signature.parameters.push_back(std::move(Info));
+        processParameterChunk(Chunk.Text, Signature, Signal);
         Signal.NumberOfParameters++;
-        Signal.ContainsActiveParameter = true;
         break;
-      }
       case CodeCompletionString::CK_Optional: {
         // The rest of the parameters are defaulted/optional.
         assert(Chunk.Optional &&
                "Expected the optional code completion string to be non-null.");
-        Signature.label += getOptionalParameters(*Chunk.Optional,
-                                                 Signature.parameters, Signal);
+        processOptionalChunk(*Chunk.Optional, Signature, Signal);
         break;
       }
       case CodeCompletionString::CK_VerticalSpace:
@@ -1037,7 +1030,7 @@ void loadMainFilePreambleMacros(const Preprocessor &PP,
       PP.getIdentifierTable().getExternalIdentifierLookup();
   if (!PreambleIdentifiers || !PreambleMacros)
     return;
-  for (const auto& MacroName : Preamble.MainFileMacros)
+  for (const auto &MacroName : Preamble.MainFileMacros)
     if (auto *II = PreambleIdentifiers->get(MacroName))
       if (II->isOutOfDate())
         PreambleMacros->updateOutOfDateIdentifier(*II);
@@ -1213,7 +1206,7 @@ class CodeCompleteFlow {
   int NSema = 0, NIndex = 0, NSemaAndIndex = 0, NIdent = 0;
   bool Incomplete = false; // Would more be available with a higher limit?
   CompletionPrefix HeuristicPrefix;
-  llvm::Optional<FuzzyMatcher> Filter;  // Initialized once Sema runs.
+  llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
   Range ReplacedRange;
   std::vector<std::string> QueryScopes; // Initialized once Sema runs.
   // Initialized once QueryScopes is initialized, if there are scopes.
@@ -1707,8 +1700,8 @@ clang::CodeCompleteOptions CodeCompleteOptions::getClangCompleteOpts() const {
   return Result;
 }
 
-CompletionPrefix
-guessCompletionPrefix(llvm::StringRef Content, unsigned Offset) {
+CompletionPrefix guessCompletionPrefix(llvm::StringRef Content,
+                                       unsigned Offset) {
   assert(Offset <= Content.size());
   StringRef Rest = Content.take_front(Offset);
   CompletionPrefix Result;
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 51316fefd1f7f..5c5912eb9bcc6 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -314,6 +314,14 @@ bool fromJSON(const llvm::json::Value &Params, ClientCapabilities &R) {
         }
       }
     }
+    if (auto *Help = TextDocument->getObject("signatureHelp")) {
+      if (auto *Info = Help->getObject("signatureInformation")) {
+        if (auto *Parameter = Info->getObject("parameterInformation")) {
+          if (auto OffsetSupport = Parameter->getBoolean("labelOffsetSupport"))
+            R.OffsetsInSignatureHelp = *OffsetSupport;
+        }
+      }
+    }
   }
   if (auto *Workspace = O->getObject("workspace")) {
     if (auto *Symbol = Workspace->getObject("symbol")) {
@@ -824,8 +832,14 @@ llvm::json::Value toJSON(const CompletionList &L) {
 }
 
 llvm::json::Value toJSON(const ParameterInformation &PI) {
-  assert(!PI.label.empty() && "parameter information label is required");
-  llvm::json::Object Result{{"label", PI.label}};
+  assert(PI.labelOffsets.hasValue() ||
+         !PI.labelString.empty() && "parameter information label is required");
+  llvm::json::Object Result;
+  if (PI.labelOffsets)
+    Result["label"] =
+        llvm::json::Array({PI.labelOffsets->first, PI.labelOffsets->second});
+  else
+    Result["label"] = PI.labelString;
   if (!PI.documentation.empty())
     Result["documentation"] = PI.documentation;
   return std::move(Result);
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index 21bac8abfb531..1ebe8071cdad8 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -390,6 +390,9 @@ struct ClientCapabilities {
   /// Client supports hierarchical document symbols.
   bool HierarchicalDocumentSymbol = false;
 
+  /// Client supports processing label offsets instead of a simple label string.
+  bool OffsetsInSignatureHelp = false;
+
   /// The supported set of CompletionItemKinds for textDocument/completion.
   /// textDocument.completion.completionItemKind.valueSet
   llvm::Optional<CompletionItemKindBitset> CompletionItemKinds;
@@ -979,8 +982,14 @@ llvm::json::Value toJSON(const CompletionList &);
 /// A single parameter of a particular signature.
 struct ParameterInformation {
 
-  /// The label of this parameter. Mandatory.
-  std::string label;
+  /// The label of this parameter. Ignored when labelOffsets is set.
+  std::string labelString;
+
+  /// Inclusive start and exclusive end offsets withing the containing signature
+  /// label.
+  /// Offsets are computed by lspLength(), which counts UTF-16 code units by
+  /// default but that can be overriden, see its documentation for details.
+  llvm::Optional<std::pair<unsigned, unsigned>> labelOffsets;
 
   /// The documentation of this parameter. Optional.
   std::string documentation;
diff --git a/clang-tools-extra/clangd/test/signature-help-with-offsets.test b/clang-tools-extra/clangd/test/signature-help-with-offsets.test
new file mode 100644
index 0000000000000..825dbc6c79bdb
--- /dev/null
+++ b/clang-tools-extra/clangd/test/signature-help-with-offsets.test
@@ -0,0 +1,50 @@
+# RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s
+# Start a session.
+{
+  "jsonrpc": "2.0",
+  "id": 0,
+  "method": "initialize",
+  "params": {
+    "processId": 123,
+    "rootPath": "clangd",
+    "capabilities": {
+      "textDocument": {
+        "signatureHelp": {
+          "signatureInformation": {
+            "parameterInformation": {
+              "labelOffsetSupport": true
+            }
+          }
+        }
+      }
+    },
+    "trace": "off"
+  }
+}
+---
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"void x(int);\nint main(){\nx("}}}
+---
+{"jsonrpc":"2.0","id":1,"method":"textDocument/signatureHelp","params":{"textDocument":{"uri":"test:///main.cpp"},"position":{"line":2,"character":2}}}
+#      CHECK: "id": 1,
+# CHECK-NEXT: "jsonrpc": "2.0",
+# CHECK-NEXT: "result": {
+# CHECK-NEXT:   "activeParameter": 0,
+# CHECK-NEXT:   "activeSignature": 0,
+# CHECK-NEXT:   "signatures": [
+# CHECK-NEXT:     {
+# CHECK-NEXT:       "label": "x(int) -> void",
+# CHECK-NEXT:       "parameters": [
+# CHECK-NEXT:         {
+# CHECK-NEXT:           "label": [
+# CHECK-NEXT:                     2,
+# CHECK-NEXT:                     5
+# CHECK-NEXT:                    ]
+# CHECK-NEXT:         }
+# CHECK-NEXT:       ]
+# CHECK-NEXT:     }
+# CHECK-NEXT:   ]
+# CHECK-NEXT: }
+---
+{"jsonrpc":"2.0","id":100000,"method":"shutdown"}
+---
+{"jsonrpc":"2.0","method":"exit"}
diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
index e90124ceb4330..2f846e9c83781 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp
@@ -967,19 +967,37 @@ SignatureHelp signatures(llvm::StringRef Text,
   return signatures(Test.code(), Test.point(), std::move(IndexSymbols));
 }
 
+struct ExpectedParameter {
+  std::string Text;
+  std::pair<unsigned, unsigned> Offsets;
+};
 MATCHER_P(ParamsAre, P, "") {
   if (P.size() != arg.parameters.size())
     return false;
-  for (unsigned I = 0; I < P.size(); ++I)
-    if (P[I] != arg.parameters[I].label)
+  for (unsigned I = 0; I < P.size(); ++I) {
+    if (P[I].Text != arg.parameters[I].labelString ||
+        P[I].Offsets != arg.parameters[I].labelOffsets)
       return false;
+  }
   return true;
 }
 MATCHER_P(SigDoc, Doc, "") { return arg.documentation == Doc; }
 
-Matcher<SignatureInformation> Sig(std::string Label,
-                                  std::vector<std::string> Params) {
-  return AllOf(SigHelpLabeled(Label), ParamsAre(Params));
+/// \p AnnotatedLabel is a signature label with ranges marking parameters, e.g.
+///    foo([[int p1]], [[double p2]]) -> void
+Matcher<SignatureInformation> Sig(llvm::StringRef AnnotatedLabel) {
+  llvm::Annotations A(AnnotatedLabel);
+  std::string Label = A.code();
+  std::vector<ExpectedParameter> Parameters;
+  for (auto Range : A.ranges()) {
+    Parameters.emplace_back();
+
+    ExpectedParameter &P = Parameters.back();
+    P.Text = Label.substr(Range.Begin, Range.End - Range.Begin);
+    P.Offsets.first = lspLength(llvm::StringRef(Label).substr(0, Range.Begin));
+    P.Offsets.second = lspLength(llvm::StringRef(Label).substr(1, Range.End));
+  }
+  return AllOf(SigHelpLabeled(Label), ParamsAre(Parameters));
 }
 
 TEST(SignatureHelpTest, Overloads) {
@@ -992,11 +1010,10 @@ TEST(SignatureHelpTest, Overloads) {
     int main() { foo(^); }
   )cpp");
   EXPECT_THAT(Results.signatures,
-              UnorderedElementsAre(
-                  Sig("foo(float x, float y) -> void", {"float x", "float y"}),
-                  Sig("foo(float x, int y) -> void", {"float x", "int y"}),
-                  Sig("foo(int x, float y) -> void", {"int x", "float y"}),
-                  Sig("foo(int x, int y) -> void", {"int x", "int y"})));
+              UnorderedElementsAre(Sig("foo([[float x]], [[float y]]) -> void"),
+                                   Sig("foo([[float x]], [[int y]]) -> void"),
+                                   Sig("foo([[int x]], [[float y]]) -> void"),
+                                   Sig("foo([[int x]], [[int y]]) -> void")));
   // We always prefer the first signature.
   EXPECT_EQ(0, Results.activeSignature);
   EXPECT_EQ(0, Results.activeParameter);
@@ -1010,9 +1027,8 @@ TEST(SignatureHelpTest, DefaultArgs) {
   )cpp");
   EXPECT_THAT(Results.signatures,
               UnorderedElementsAre(
-                  Sig("bar(int x, int y = 0) -> void", {"int x", "int y = 0"}),
-                  Sig("bar(float x = 0, int y = 42) -> void",
-                      {"float x = 0", "int y = 42"})));
+                  Sig("bar([[int x]], [[int y = 0]]) -> void"),
+                  Sig("bar([[float x = 0]], [[int y = 42]]) -> void")));
   EXPECT_EQ(0, Results.activeSignature);
   EXPECT_EQ(0, Results.activeParameter);
 }
@@ -1023,8 +1039,7 @@ TEST(SignatureHelpTest, ActiveArg) {
     int main() { baz(baz(1,2,3), ^); }
   )cpp");
   EXPECT_THAT(Results.signatures,
-              ElementsAre(Sig("baz(int a, int b, int c) -> int",
-                              {"int a", "int b", "int c"})));
+              ElementsAre(Sig("baz([[int a]], [[int b]], [[int c]]) -> int")));
   EXPECT_EQ(0, Results.activeSignature);
   EXPECT_EQ(1, Results.activeParameter);
 }
@@ -1761,14 +1776,12 @@ TEST(SignatureHelpTest, OverloadsOrdering) {
     void foo(int x, int y = 0);
     int main() { foo(^); }
   )cpp");
-  EXPECT_THAT(
-      Results.signatures,
-      ElementsAre(
-          Sig("foo(int x) -> void", {"int x"}),
-          Sig("foo(int x, int y = 0) -> void", {"int x", "int y = 0"}),
-          Sig("foo(float x, int y) -> void", {"float x", "int y"}),
-          Sig("foo(int x, float y) -> void", {"int x", "float y"}),
-          Sig("foo(float x, float y) -> void", {"float x", "float y"})));
+  EXPECT_THAT(Results.signatures,
+              ElementsAre(Sig("foo([[int x]]) -> void"),
+                          Sig("foo([[int x]], [[int y = 0]]) -> void"),
+                          Sig("foo([[float x]], [[int y]]) -> void"),
+                          Sig("foo([[int x]], [[float y]]) -> void"),
+                          Sig("foo([[float x]], [[float y]]) -> void")));
   // We always prefer the first signature.
   EXPECT_EQ(0, Results.activeSignature);
   EXPECT_EQ(0, Results.activeParameter);
@@ -1785,7 +1798,7 @@ TEST(SignatureHelpTest, InstantiatedSignatures) {
   )cpp";
 
   EXPECT_THAT(signatures(Sig0).signatures,
-              ElementsAre(Sig("foo(T, T, T) -> void", {"T", "T", "T"})));
+              ElementsAre(Sig("foo([[T]], [[T]], [[T]]) -> void")));
 
   StringRef Sig1 = R"cpp(
     template <class T>
@@ -1796,7 +1809,7 @@ TEST(SignatureHelpTest, InstantiatedSignatures) {
     })cpp";
 
   EXPECT_THAT(signatures(Sig1).signatures,
-              ElementsAre(Sig("foo(T, T, T) -> void", {"T", "T", "T"})));
+              ElementsAre(Sig("foo([[T]], [[T]], [[T]]) -> void")));
 
   StringRef Sig2 = R"cpp(
     template <class ...T>
@@ -1808,7 +1821,7 @@ TEST(SignatureHelpTest, InstantiatedSignatures) {
   )cpp";
 
   EXPECT_THAT(signatures(Sig2).signatures,
-              ElementsAre(Sig("foo(T...) -> void", {"T..."})));
+              ElementsAre(Sig("foo([[T...]]) -> void")));
 
   // It is debatable whether we should substitute the outer template parameter
   // ('T') in that case. Currently we don't substitute it in signature help, but
@@ -1828,7 +1841,7 @@ TEST(SignatureHelpTest, InstantiatedSignatures) {
   )cpp";
 
   EXPECT_THAT(signatures(Sig3).signatures,
-              ElementsAre(Sig("foo(T, U) -> void", {"T", "U"})));
+              ElementsAre(Sig("foo([[T]], [[U]]) -> void")));
 }
 
 TEST(SignatureHelpTest, IndexDocumentation) {
@@ -1849,8 +1862,8 @@ TEST(SignatureHelpTest, IndexDocumentation) {
 
   EXPECT_THAT(
       signatures(Sig0, {Foo0}).signatures,
-      ElementsAre(AllOf(Sig("foo() -> int", {}), SigDoc("Doc from the index")),
-                  AllOf(Sig("foo(double) -> int", {"double"}), SigDoc(""))));
+      ElementsAre(AllOf(Sig("foo() -> int"), SigDoc("Doc from the index")),
+                  AllOf(Sig("foo([[double]]) -> int"), SigDoc(""))));
 
   StringRef Sig1 = R"cpp(
     int foo();
@@ -1866,11 +1879,10 @@ TEST(SignatureHelpTest, IndexDocumentation) {
 
   EXPECT_THAT(
       signatures(Sig1, {Foo0, Foo1, Foo2}).signatures,
-      ElementsAre(AllOf(Sig("foo() -> int", {}), SigDoc("Doc from the index")),
-                  AllOf(Sig("foo(int) -> int", {"int"}),
-                        SigDoc("Overriden doc from sema")),
-                  AllOf(Sig("foo(int, int) -> int", {"int", "int"}),
-                        SigDoc("Doc from sema"))));
+      ElementsAre(
+          AllOf(Sig("foo() -> int"), SigDoc("Doc from the index")),
+          AllOf(Sig("foo([[int]]) -> int"), SigDoc("Overriden doc from sema")),
+          AllOf(Sig("foo([[int]], [[int]]) -> int"), SigDoc("Doc from sema"))));
 }
 
 TEST(SignatureHelpTest, DynamicIndexDocumentation) {
@@ -1901,7 +1913,7 @@ TEST(SignatureHelpTest, DynamicIndexDocumentation) {
   EXPECT_THAT(
       llvm::cantFail(runSignatureHelp(Server, File, FileContent.point()))
           .signatures,
-      ElementsAre(AllOf(Sig("foo() -> int", {}), SigDoc("Member doc"))));
+      ElementsAre(AllOf(Sig("foo() -> int"), SigDoc("Member doc"))));
 }
 
 TEST(CompletionTest, CompletionFunctionArgsDisabled) {
@@ -2179,10 +2191,9 @@ TEST(SignatureHelpTest, InsideArgument) {
       void foo(int x, int y);
       int main() { foo(1+^); }
     )cpp");
-    EXPECT_THAT(
-        Results.signatures,
-        ElementsAre(Sig("foo(int x) -> void", {"int x"}),
-                    Sig("foo(int x, int y) -> void", {"int x", "int y"})));
+    EXPECT_THAT(Results.signatures,
+                ElementsAre(Sig("foo([[int x]]) -> void"),
+                            Sig("foo([[int x]], [[int y]]) -> void")));
     EXPECT_EQ(0, Results.activeParameter);
   }
   {
@@ -2191,10 +2202,9 @@ TEST(SignatureHelpTest, InsideArgument) {
       void foo(int x, int y);
       int main() { foo(1^); }
     )cpp");
-    EXPECT_THAT(
-        Results.signatures,
-        ElementsAre(Sig("foo(int x) -> void", {"int x"}),
-                    Sig("foo(int x, int y) -> void", {"int x", "int y"})));
+    EXPECT_THAT(Results.signatures,
+                ElementsAre(Sig("foo([[int x]]) -> void"),
+                            Sig("foo([[int x]], [[int y]]) -> void")));
     EXPECT_EQ(0, Results.activeParameter);
   }
   {
@@ -2203,10 +2213,9 @@ TEST(SignatureHelpTest, InsideArgument) {
       void foo(int x, int y);
       int main() { foo(1^0); }
     )cpp");
-    EXPECT_THAT(
-        Results.signatures,
-        ElementsAre(Sig("foo(int x) -> void", {"int x"}),
-                    Sig("foo(int x, int y) -> void", {"int x", "int y"})));
+    EXPECT_THAT(Results.signatures,
+                ElementsAre(Sig("foo([[int x]]) -> void"),
+                            Sig("foo([[int x]], [[int y]]) -> void")));
     EXPECT_EQ(0, Results.activeParameter);
   }
   {
@@ -2216,8 +2225,8 @@ TEST(SignatureHelpTest, InsideArgument) {
       int bar(int x, int y);
       int main() { bar(foo(2, 3^)); }
     )cpp");
-    EXPECT_THAT(Results.signatures, ElementsAre(Sig("foo(int x, int y) -> void",
-                                                    {"int x", "int y"})));
+    EXPECT_THAT(Results.signatures,
+                ElementsAre(Sig("foo([[int x]], [[int y]]) -> void")));
     EXPECT_EQ(1, Results.activeParameter);
   }
 }
@@ -2234,9 +2243,8 @@ TEST(SignatureHelpTest, ConstructorInitializeFields) {
       };
     )cpp");
     EXPECT_THAT(Results.signatures,
-                UnorderedElementsAre(Sig("A(int)", {"int"}),
-                                     Sig("A(A &&)", {"A &&"}),
-                                     Sig("A(const A &)", {"const A &"})));
+                UnorderedElementsAre(Sig("A([[int]])"), Sig("A([[A &&]])"),
+                                     Sig("A([[const A &]])")));
   }
   {
     const auto Results = signatures(R"cpp(
@@ -2253,9 +2261,8 @@ TEST(SignatureHelpTest, ConstructorInitializeFields) {
       };
     )cpp");
     EXPECT_THAT(Results.signatures,
-                UnorderedElementsAre(Sig("A(int)", {"int"}),
-                                     Sig("A(A &&)", {"A &&"}),
-                                     Sig("A(const A &)", {"const A &"})));
+                UnorderedElementsAre(Sig("A([[int]])"), Sig("A([[A &&]])"),
+                                     Sig("A([[const A &]])")));
   }
 }
 

From 08da01b496481ea079b2e213f6e67ef1fd1c6f29 Mon Sep 17 00:00:00 2001
From: Mikhail Maltsev <mikhail.maltsev@arm.com>
Date: Tue, 4 Jun 2019 09:39:55 +0000
Subject: [PATCH 1000/1176] [ARM] Add FP16 vector insert/extract patterns

This change adds two FP16 extraction and two insertion patterns
(one per possible vector length).
Extractions are handled by copying a Q/D register into one of VFP2
class registers, where single FP32 sub-registers can be accessed. Then
the extraction of even lanes are simple sub-register extractions
(because we don't care about the top parts of registers for FP16
operations). Odd lanes need an additional VMOVX instruction.

Unfortunately, insertions cannot be handled in the same way, because:
* There is no instruction to insert FP16 into an even lane (VINS only
  works with odd lanes)
* The patterns for odd lanes will have a form of a DAG (not a tree),
  and will not be implementable in pure tablegen

Because of this insertions are handled in the same way as 16-bit
integer insertions (with conversions between FP registers and GPRs
using VMOVHR instructions).

Without these patterns the ARM backend would sometimes fail during
instruction selection.

This patch also adds patterns which combine:
* an FP16 element extraction and a store into a single VST1
  instruction
* an FP16 load and insertion into a single VLD1 instruction

Differential Revision: https://reviews.llvm.org/D62651

llvm-svn: 362482
---
 llvm/lib/Target/ARM/ARMInstrNEON.td           | 53 ++++++++++++++
 llvm/test/CodeGen/ARM/fp16-insert-extract.ll  | 72 +++++++++++++++++++
 llvm/test/CodeGen/ARM/fp16-vldlane-vstlane.ll | 56 +++++++++++++++
 3 files changed, 181 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/fp16-insert-extract.ll
 create mode 100644 llvm/test/CodeGen/ARM/fp16-vldlane-vstlane.ll

diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index effee0fcd387b..1c7bbab6a2c18 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -1117,6 +1117,12 @@ def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
 
+def : Pat<(vector_insert (v4f16 DPR:$src),
+                         (f16 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v8f16 QPR:$src),
+                         (f16 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
 def : Pat<(vector_insert (v2f32 DPR:$src),
                          (f32 (load addrmode6:$addr)), imm:$lane),
           (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
@@ -2175,6 +2181,11 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
           (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
 
+def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
 // ...with address register writeback:
 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
                PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
@@ -2504,6 +2515,13 @@ def SSubReg_f32_reg : SDNodeXForm<imm, [{
                                    MVT::i32);
 }]>;
 
+// Extract S sub-registers of Q/D registers containing a given f16 lane.
+def SSubReg_f16_reg : SDNodeXForm<imm, [{
+  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
+                                   MVT::i32);
+}]>;
+
 // Translate lane numbers from Q registers to D subregs.
 def SubReg_i8_lane  : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
@@ -6223,6 +6241,32 @@ def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
           (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
 
+def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
+def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
+
+def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
+            (EXTRACT_SUBREG
+                (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+                (SSubReg_f16_reg imm_even:$lane))>;
+
+def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
+            (COPY_TO_REGCLASS
+              (VMOVH (EXTRACT_SUBREG
+                  (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+                  (SSubReg_f16_reg imm_odd:$lane))),
+              HPR)>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
+            (EXTRACT_SUBREG
+                (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+                (SSubReg_f16_reg imm_even:$lane))>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
+            (COPY_TO_REGCLASS
+              (VMOVH (EXTRACT_SUBREG
+                  (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+                  (SSubReg_f16_reg imm_odd:$lane))),
+              HPR)>;
 
 //   VMOV     : Vector Set Lane (move ARM core register to scalar)
 
@@ -6281,6 +6325,15 @@ def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
           (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
                                 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
 
+def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
+          (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
+def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
+          (v8f16 (INSERT_SUBREG QPR:$src1,
+                   (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+                                      (DSubReg_i16_reg imm:$lane))),
+                             (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
+                   (DSubReg_i16_reg imm:$lane)))>;
+
 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
 //          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
diff --git a/llvm/test/CodeGen/ARM/fp16-insert-extract.ll b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll
new file mode 100644
index 0000000000000..617a4dfada183
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fp16-insert-extract.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard -O1 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft -O1 < %s | FileCheck %s
+
+define float @test_vget_lane_f16_1(<4 x half> %a) nounwind {
+; CHECK-LABEL: test_vget_lane_f16_1:
+; CHECK:      vmovx.f16 s0, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s0
+entry:
+  %elt = extractelement <4 x half> %a, i32 1
+  %conv = fpext half %elt to float
+  ret float %conv
+}
+
+define float @test_vget_lane_f16_2(<4 x half> %a) nounwind {
+; CHECK-LABEL: test_vget_lane_f16_2:
+; CHECK-NOT:  vmovx.f16
+; CHECK:      vcvtb.f32.f16 s0, s1
+entry:
+  %elt = extractelement <4 x half> %a, i32 2
+  %conv = fpext half %elt to float
+  ret float %conv
+}
+
+define float @test_vget_laneq_f16_6(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_vget_laneq_f16_6:
+; CHECK-NOT:  vmovx.f16
+; CHECK:      vcvtb.f32.f16 s0, s3
+entry:
+  %elt = extractelement <8 x half> %a, i32 6
+  %conv = fpext half %elt to float
+  ret float %conv
+}
+
+define float @test_vget_laneq_f16_7(<8 x half> %a) nounwind {
+; CHECK-LABEL: test_vget_laneq_f16_7:
+; CHECK:      vmovx.f16 s0, s3
+; CHECK:      vcvtb.f32.f16 s0, s0
+entry:
+  %elt = extractelement <8 x half> %a, i32 7
+  %conv = fpext half %elt to float
+  ret float %conv
+}
+
+define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind {
+; CHECK-LABEL: test_vset_lane_f16:
+; CHECK: vmov.f16 r[[GPR:[0-9]+]], s{{[0-9]+}}
+; CHECK: vmov.16  d{{[0-9]+}}[3], r[[GPR]]
+entry:
+  %b = fptrunc float %fb to half
+  %x = insertelement <4 x half> %a, half %b, i32 3
+  ret <4 x half> %x
+}
+
+define <8 x half> @test_vset_laneq_f16_1(<8 x half> %a, float %fb) nounwind {
+; CHECK-LABEL: test_vset_laneq_f16_1:
+; CHECK: vmov.f16 r[[GPR:[0-9]+]], s{{[0-9]+}}
+; CHECK: vmov.16  d{{[0-9]+}}[1], r[[GPR]]
+entry:
+  %b = fptrunc float %fb to half
+  %x = insertelement <8 x half> %a, half %b, i32 1
+  ret <8 x half> %x
+}
+
+define <8 x half> @test_vset_laneq_f16_7(<8 x half> %a, float %fb) nounwind {
+; CHECK-LABEL: test_vset_laneq_f16_7:
+; CHECK: vmov.f16 r[[GPR:[0-9]+]], s{{[0-9]+}}
+; CHECK: vmov.16  d{{[0-9]+}}[3], r[[GPR]]
+entry:
+  %b = fptrunc float %fb to half
+  %x = insertelement <8 x half> %a, half %b, i32 7
+  ret <8 x half> %x
+}
diff --git a/llvm/test/CodeGen/ARM/fp16-vldlane-vstlane.ll b/llvm/test/CodeGen/ARM/fp16-vldlane-vstlane.ll
new file mode 100644
index 0000000000000..2a7358323af71
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fp16-vldlane-vstlane.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard -O1 < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft -O1 < %s | FileCheck %s
+
+define <4 x half> @vld1d_lane_f16(half* %pa, <4 x half> %v4) nounwind {
+; CHECK-LABEL: vld1d_lane_f16:
+; CHECK: vld1.16 {d{{[0-9]+}}[3]}, [r0:16]
+entry:
+  %a = load half, half* %pa
+  %res = insertelement <4 x half> %v4, half %a, i32 3
+  ret <4 x half> %res
+}
+
+define <8 x half> @vld1q_lane_f16_1(half* %pa, <8 x half> %v8) nounwind {
+; CHECK-LABEL: vld1q_lane_f16_1:
+; CHECK: vld1.16 {d{{[0-9]+}}[1]}, [r0:16]
+entry:
+  %a = load half, half* %pa
+  %res = insertelement <8 x half> %v8, half %a, i32 1
+  ret <8 x half> %res
+}
+
+define <8 x half> @vld1q_lane_f16_7(half* %pa, <8 x half> %v8) nounwind {
+; CHECK-LABEL: vld1q_lane_f16_7:
+; CHECK: vld1.16 {d{{[0-9]+}}[3]}, [r0:16]
+entry:
+  %a = load half, half* %pa
+  %res = insertelement <8 x half> %v8, half %a, i32 7
+  ret <8 x half> %res
+}
+
+define void @vst1d_lane_f16(half* %pa, <4 x half> %v4) nounwind {
+; CHECK-LABEL: vst1d_lane_f16:
+; CHECK: vst1.16 {d{{[0-9]+}}[3]}, [r0:16]
+entry:
+  %a = extractelement <4 x half> %v4, i32 3
+  store half %a, half* %pa
+  ret void
+}
+
+define void @vst1q_lane_f16_7(half* %pa, <8 x half> %v8) nounwind {
+; CHECK-LABEL: vst1q_lane_f16_7:
+; CHECK: vst1.16 {d{{[0-9]+}}[3]}, [r0:16]
+entry:
+  %a = extractelement <8 x half> %v8, i32 7
+  store half %a, half* %pa
+  ret void
+}
+
+define void @vst1q_lane_f16_1(half* %pa, <8 x half> %v8) nounwind {
+; CHECK-LABEL: vst1q_lane_f16_1:
+; CHECK: vst1.16 {d{{[0-9]+}}[1]}, [r0:16]
+entry:
+  %a = extractelement <8 x half> %v8, i32 1
+  store half %a, half* %pa
+  ret void
+}

From 3018d505a3643f1b191da1e2cc99493708ede186 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 10:04:55 +0000
Subject: [PATCH 1001/1176] [SelectionDAG] Add fpto[us]i(undef) --> undef
 constant fold

Follow up to D62807.

Differential Revision: https://reviews.llvm.org/D62811

llvm-svn: 362483
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp  | 8 ++++++++
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 5 +++++
 llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll   | 2 --
 llvm/test/CodeGen/X86/vec_fp_to_int.ll         | 2 --
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8eeb4a70a53be..8f2e80853e8d1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12770,6 +12770,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
+  // fold (fp_to_sint undef) -> undef
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
   // fold (fp_to_sint c1fp) -> c1
   if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
@@ -12781,6 +12785,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
+  // fold (fp_to_uint undef) -> undef
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
   // fold (fp_to_uint c1fp) -> c1
   if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1dc9d7460f859..e30b702ac1563 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4440,6 +4440,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (Operand.isUndef())
       return getUNDEF(VT);
     break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    if (Operand.isUndef())
+      return getUNDEF(VT);
+    break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     // [us]itofp(undef) = 0, because the result value is bounded.
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
index 3c04c89cf5142..94b860cff1934 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
@@ -106,9 +106,7 @@ define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
 ; SSE-LABEL: fptosi_4f64_to_2i32:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
 ; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: fptosi_4f64_to_2i32:
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
index f7047a68f148a..e3c9a5491c1cd 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
@@ -108,9 +108,7 @@ define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
 ; SSE-LABEL: fptosi_4f64_to_2i32:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    cvttpd2dq %xmm0, %xmm1
 ; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: fptosi_4f64_to_2i32:

From 5d5078e341f593944d9badd53e374e6b319282da Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Tue, 4 Jun 2019 10:13:03 +0000
Subject: [PATCH 1002/1176] [llvm-ar] Reapply Fix relative thin archive path
 handling

Includes a fix for an introduced build failure due to a post c++11 use of std::mismatch.

This fixes some thin archive relative path issues, paths are shortened where possible and paths are output correctly when using the display table command.

Differential Revision: https://reviews.llvm.org/D59491

llvm-svn: 362484
---
 llvm/include/llvm/Object/ArchiveWriter.h      |  2 +-
 llvm/lib/Object/ArchiveWriter.cpp             | 51 ++++++++++++-------
 llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp   | 11 ++--
 llvm/test/tools/llvm-ar/reduce-thin-path.test | 10 ++++
 llvm/test/tools/llvm-ar/thin-archive.test     | 45 ++++++++++++++++
 .../ELF/archive-unknown-members.test          |  8 +--
 .../llvm-readobj/thin-archive-paths.test      |  6 +--
 llvm/tools/llvm-ar/llvm-ar.cpp                | 38 ++++++++++----
 8 files changed, 134 insertions(+), 37 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ar/reduce-thin-path.test
 create mode 100644 llvm/test/tools/llvm-ar/thin-archive.test

diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index cf415e92bc79b..9e6daf2da36e9 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -36,7 +36,7 @@ struct NewArchiveMember {
                                             bool Deterministic);
 };
 
-std::string computeArchiveRelativePath(StringRef From, StringRef To);
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
                    bool WriteSymtab, object::Archive::Kind Kind,
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 849d2835772e2..201ff1326f080 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -494,29 +494,46 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
 }
 
 namespace llvm {
+
+static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
+  SmallString<128> Ret = P;
+  std::error_code Err = sys::fs::make_absolute(Ret);
+  if (Err)
+    return Err;
+  sys::path::remove_dots(Ret, /*removedotdot*/ true);
+  return Ret;
+}
+
 // Compute the relative path from From to To.
-std::string computeArchiveRelativePath(StringRef From, StringRef To) {
-  if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
-    return To;
-
-  StringRef DirFrom = sys::path::parent_path(From);
-  auto FromI = sys::path::begin(DirFrom);
-  auto ToI = sys::path::begin(To);
-  while (*FromI == *ToI) {
-    ++FromI;
-    ++ToI;
-  }
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
+  ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
+  ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
+  if (!PathToOrErr || !DirFromOrErr)
+    return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+  const SmallString<128> &PathTo = *PathToOrErr;
+  const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
+
+  // Can't construct a relative path between different roots
+  if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
+    return sys::path::convert_to_slash(PathTo);
+
+  // Skip common prefixes
+  auto FromTo =
+      std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
+                    sys::path::begin(PathTo));
+  auto FromI = FromTo.first;
+  auto ToI = FromTo.second;
 
+  // Construct relative path
   SmallString<128> Relative;
   for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
-    sys::path::append(Relative, "..");
+    sys::path::append(Relative, sys::path::Style::posix, "..");
 
-  for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
-    sys::path::append(Relative, *ToI);
+  for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
+    sys::path::append(Relative, sys::path::Style::posix, *ToI);
 
-  // Replace backslashes with slashes so that the path is portable between *nix
-  // and Windows.
-  return sys::path::convert_to_slash(Relative);
+  return Relative.str();
 }
 
 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 34a83147a3a63..2d44686dd280f 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -211,9 +211,14 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
   // llvm-lib uses relative paths for both regular and thin archives, unlike
   // standard GNU ar, which only uses relative paths for thin archives and
   // basenames for regular archives.
-  for (NewArchiveMember &Member : Members)
-    Member.MemberName =
-        Saver.save(computeArchiveRelativePath(OutputPath, Member.MemberName));
+  for (NewArchiveMember &Member : Members) {
+    if (sys::path::is_relative(Member.MemberName)) {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(OutputPath, Member.MemberName);
+      if (PathOrErr)
+        Member.MemberName = Saver.save(*PathOrErr);
+    }
+  }
 
   if (Error E =
           writeArchive(OutputPath, Members,
diff --git a/llvm/test/tools/llvm-ar/reduce-thin-path.test b/llvm/test/tools/llvm-ar/reduce-thin-path.test
new file mode 100644
index 0000000000000..aea6101ce9bc6
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/reduce-thin-path.test
@@ -0,0 +1,10 @@
+RUN: rm -rf %t && mkdir -p %t/foo/bar/
+RUN: mkdir -p %t/baz/
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: cd %t && llvm-ar rTc %t/baz/internal.ar elf.o
+RUN: cd %t/foo && llvm-ar rTc %t/foo/bar/external.ar ../baz/internal.ar
+
+RUN: FileCheck -input-file=%t/foo/bar/external.ar %s
+
+CHECK: {{^}}../../elf.o/
diff --git a/llvm/test/tools/llvm-ar/thin-archive.test b/llvm/test/tools/llvm-ar/thin-archive.test
new file mode 100644
index 0000000000000..8d9543b686968
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/thin-archive.test
@@ -0,0 +1,45 @@
+RUN: rm -rf %t && mkdir -p %t/foo/bar/
+
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/foo/elf.o
+RUN: cp %t/foo/elf.o %t/foo/bar/elf.o
+RUN: cp %t/foo/bar/elf.o %t/delete.o
+
+Test that modules can be added with absolute paths when the archive is created using an absolute path
+
+RUN: llvm-ar rTc %t/absolute-1.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
+RUN: llvm-ar dT %t/absolute-1.ar delete.o
+
+RUN: FileCheck -input-file=%t/absolute-1.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
+RUN: llvm-ar t %t/absolute-1.ar | FileCheck %s -DPATH=%/t/
+
+Test that modules can be added with absolute paths when the archive is created using a relative path
+
+RUN: llvm-ar rTc Output/%basename_t.tmp/absolute-2.ar %t/foo/elf.o %t/delete.o %t/foo/bar/elf.o
+RUN: llvm-ar dT Output/%basename_t.tmp/absolute-2.ar %t/delete.o
+
+RUN: FileCheck -input-file=%t/absolute-2.ar --check-prefixes=THIN,CHECK %s -DPATH=%/t/
+RUN: llvm-ar t %t/absolute-2.ar | FileCheck %s -DPATH=%/t/
+
+These tests must be run in %t/foo. cd %t is included on each line to make debugging this test case easier.
+
+Test that modules can be added with relative paths when the archive is created using a relative path
+
+RUN: cd %t/foo && llvm-ar rTc ../relative-1.ar elf.o ../delete.o bar/elf.o
+RUN: cd %t/foo && llvm-ar dT ../relative-1.ar delete.o
+
+RUN: FileCheck -input-file=%t/relative-1.ar --check-prefixes=THIN,CHECK %s -DPATH=
+RUN: llvm-ar t %t/relative-1.ar | FileCheck %s -DPATH=%/t/
+
+Test that modules can be added with relative paths when the archive is created using a absolute path
+
+RUN: cd %t/foo && llvm-ar rTc %t/relative-2.ar elf.o ../delete.o bar/elf.o
+RUN: cd %t/foo && llvm-ar dT %t/relative-2.ar delete.o
+
+RUN: FileCheck -input-file=%t/relative-2.ar --check-prefixes=THIN,CHECK %s -DPATH=
+RUN: llvm-ar t %t/relative-2.ar | FileCheck %s -DPATH=%/t/
+
+THIN: !<thin>
+
+CHECK-NOT: delete.o
+CHECK: {{^}}[[PATH]]foo/elf.o
+CHECK: {{^}}[[PATH]]foo/bar/elf.o
diff --git a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
index 6540b630f7dac..39a6597a83bfb 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/archive-unknown-members.test
@@ -23,10 +23,10 @@
 # RUN: llvm-ar rcT %t.thin1.a %t1.o %s
 # RUN: llvm-ar rcT %t.thin2.a %t2.o %s
 
-# RUN: not llvm-objcopy --strip-debug %t.thin1.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin1.a -DMEMBER=%s
-# RUN: not llvm-strip --strip-debug %t.thin2.a 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%t.thin2.a -DMEMBER=%s
+# RUN: not llvm-objcopy --strip-debug %/t.thin1.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin1.a -DMEMBER=%/s
+# RUN: not llvm-strip --strip-debug %/t.thin2.a 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=THIN -DARCHIVE=%/t.thin2.a -DMEMBER=%/s
 ## Verify that the first member was not modified, if a later member could not
 ## be recognized.
 # RUN: cmp %t.o %t1.o
diff --git a/llvm/test/tools/llvm-readobj/thin-archive-paths.test b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
index f1952c739ccff..d7a971eb303d8 100644
--- a/llvm/test/tools/llvm-readobj/thin-archive-paths.test
+++ b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
@@ -23,11 +23,11 @@
 # RUN: llvm-ar rcT c/absolute.a %t/a/b/1.o
 
 # Show that absolute paths in the file header printing are correct.
-# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%t
+# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%/t
 # ABS: File: [[DIR]]/a/b/1.o
 
 # Show that absolute paths in an error message for both archive and member are correct.
 # RUN: rm a/b/1.o
-# RUN: not llvm-readobj --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
-# RUN: not llvm-readelf --file-headers %t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%t
+# RUN: not llvm-readobj --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
+# RUN: not llvm-readelf --file-headers %/t/c/absolute.a 2>&1 | FileCheck %s --check-prefix=ERR2 -DDIR=%/t
 # ERR2: error: '[[DIR]]/c/absolute.a': '[[DIR]]/a/b/1.o': {{[Nn]}}o such file or directory
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 04c2396a4fa32..0731f35ac458b 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -464,9 +464,11 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
   }
 
   if (C.getParent()->isThin()) {
-    StringRef ParentDir = sys::path::parent_path(ArchiveName);
-    if (!ParentDir.empty())
-      outs() << ParentDir << '/';
+    if (!sys::path::is_absolute(Name)) {
+      StringRef ParentDir = sys::path::parent_path(ArchiveName);
+      if (!ParentDir.empty())
+        outs() << sys::path::convert_to_slash(ParentDir) << '/';
+    }
   }
   outs() << Name << "\n";
 }
@@ -593,10 +595,18 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
   // the archive it's in, so the file resolves correctly.
   if (Thin && FlattenArchive) {
     StringSaver Saver(Alloc);
-    Expected<std::string> FileNameOrErr = M.getFullName();
+    Expected<std::string> FileNameOrErr = M.getName();
     failIfError(FileNameOrErr.takeError());
-    NMOrErr->MemberName =
-        Saver.save(computeArchiveRelativePath(ArchiveName, *FileNameOrErr));
+    if (sys::path::is_absolute(*FileNameOrErr)) {
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(*FileNameOrErr));
+    } else {
+      FileNameOrErr = M.getFullName();
+      failIfError(FileNameOrErr.takeError());
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, *FileNameOrErr);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(*FileNameOrErr));
+    }
   }
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
@@ -625,9 +635,19 @@ static void addMember(std::vector<NewArchiveMember> &Members,
   // For regular archives, use the basename of the object path for the member
   // name. For thin archives, use the full relative paths so the file resolves
   // correctly.
-  NMOrErr->MemberName =
-      Thin ? Saver.save(computeArchiveRelativePath(ArchiveName, FileName))
-           : sys::path::filename(NMOrErr->MemberName);
+  if (!Thin) {
+    NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
+  } else {
+    if (sys::path::is_absolute(FileName))
+      NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(FileName));
+    else {
+      Expected<std::string> PathOrErr =
+          computeArchiveRelativePath(ArchiveName, FileName);
+      NMOrErr->MemberName = Saver.save(
+          PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName));
+    }
+  }
+
   if (FlattenArchive &&
       identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
     object::Archive &Lib = readLibrary(FileName);

From 3178546a2725e95a1a4a9ca4e99d4af7e4a27da9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 10:17:56 +0000
Subject: [PATCH 1003/1176] [SelectionDAG] ComputeNumSignBits - clang-format +
 improve *EXTLOAD comments. NFCI.

Pre-commit requested for D62777.

llvm-svn: 362485
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e30b702ac1563..4d854f81de278 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3878,13 +3878,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
       unsigned ExtType = LD->getExtensionType();
       switch (ExtType) {
-        default: break;
-        case ISD::SEXTLOAD:    // '17' bits known
-          Tmp = LD->getMemoryVT().getScalarSizeInBits();
-          return VTBits-Tmp+1;
-        case ISD::ZEXTLOAD:    // '16' bits known
-          Tmp = LD->getMemoryVT().getScalarSizeInBits();
-          return VTBits-Tmp;
+      default: break;
+      case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+        Tmp = LD->getMemoryVT().getScalarSizeInBits();
+        return VTBits - Tmp + 1;
+      case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+        Tmp = LD->getMemoryVT().getScalarSizeInBits();
+        return VTBits - Tmp;
       }
     }
   }

From ad298f86b7ad2ab09a05e75663a8d0f621e478e6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 10:49:06 +0000
Subject: [PATCH 1004/1176] [SelectionDAG] ComputeNumSignBits - support
 constant pool values from target

As I mentioned on D61887 we don't get many hits on ComputeNumSignBits as we did on computeKnownBits.

The case we do get is interesting though - it allows us to use the 'ConditionalNegate' combine in combineLogicBlendIntoPBLENDV to remove a select.

It comes too late for SSE41 (BLENDV) cases, but SSE2 tests can hit it now. We should probably try to make use of this for SSE41+ targets as well - avoiding variable blends is usually a good idea. I'll investigate as a followup.

Differential Revision: https://reviews.llvm.org/D62777

llvm-svn: 362486
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 30 ++++++++++++
 llvm/test/CodeGen/X86/combine-sdiv.ll         | 48 +++++++++----------
 2 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4d854f81de278..224265dfee52b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3885,6 +3885,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
       case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
         Tmp = LD->getMemoryVT().getScalarSizeInBits();
         return VTBits - Tmp;
+      case ISD::NON_EXTLOAD:
+        if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+          // We only need to handle vectors - computeKnownBits should handle
+          // scalar cases.
+          Type *CstTy = Cst->getType();
+          if (CstTy->isVectorTy() &&
+              (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+            Tmp = VTBits;
+            for (unsigned i = 0; i != NumElts; ++i) {
+              if (!DemandedElts[i])
+                continue;
+              if (Constant *Elt = Cst->getAggregateElement(i)) {
+                if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+                  const APInt &Value = CInt->getValue();
+                  Tmp = std::min(Tmp, Value.getNumSignBits());
+                  continue;
+                }
+                if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+                  APInt Value = CFP->getValueAPF().bitcastToAPInt();
+                  Tmp = std::min(Tmp, Value.getNumSignBits());
+                  continue;
+                }
+              }
+              // Unknown type. Conservatively assume no bits match sign bit.
+              return 1;
+            }
+            return Tmp;
+          }
+        }
+        break;
       }
     }
   }
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 1694c2e34576a..ad7a28ed4ad9e 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2112,11 +2112,9 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
 ; SSE2-LABEL: non_splat_minus_one_divisor_0:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    psubb %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    pandn %xmm2, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: non_splat_minus_one_divisor_0:
@@ -2175,38 +2173,36 @@ define <16 x i8> @non_splat_minus_one_divisor_0(<16 x i8> %A) {
 define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
 ; SSE2-LABEL: non_splat_minus_one_divisor_1:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15]
 ; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm3
 ; SSE2-NEXT:    psrlw $8, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
 ; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    psrlw $8, %xmm2
 ; SSE2-NEXT:    packuswb %xmm3, %xmm2
-; SSE2-NEXT:    paddb %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE2-NEXT:    psraw $8, %xmm3
-; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm3
-; SSE2-NEXT:    psrlw $8, %xmm3
+; SSE2-NEXT:    paddb %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT:    psraw $8, %xmm1
+; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
 ; SSE2-NEXT:    psraw $8, %xmm2
 ; SSE2-NEXT:    pmullw {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    psrlw $8, %xmm2
-; SSE2-NEXT:    packuswb %xmm3, %xmm2
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm3, %xmm2
-; SSE2-NEXT:    pandn %xmm1, %xmm3
-; SSE2-NEXT:    por %xmm2, %xmm3
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
-; SSE2-NEXT:    psubb %xmm3, %xmm0
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    pandn %xmm3, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm1, %xmm2
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,255,0,0,0,255,0,0,255,255,255,255,255,255,255]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    por %xmm2, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [255,255,0,255,255,255,0,255,255,0,0,0,0,255,0,255]
+; SSE2-NEXT:    pxor %xmm0, %xmm1
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: non_splat_minus_one_divisor_1:

From be6ce7b3f2258382a2a95263f67eb47cefe16f51 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 4 Jun 2019 11:06:08 +0000
Subject: [PATCH 1005/1176] [DAGCombine][X86][AArch64][ARM] (C - x) + y  ->  (y
 - x) + C  fold

Summary:
All changes except ARM look **great**.
https://rise4fun.com/Alive/R2M

The regression `test/CodeGen/ARM/addsubcarry-promotion.ll`
is recovered fully by D62392 + D62450.

Reviewers: RKSimon, craig.topper, spatel, rogfer01, efriedma

Reviewed By: efriedma

Subscribers: dmgreen, javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62266

llvm-svn: 362487
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 ++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll |  8 +-
 .../CodeGen/AArch64/sink-addsub-of-const.ll   | 27 +++---
 .../test/CodeGen/ARM/addsubcarry-promotion.ll | 69 +++++++++-----
 llvm/test/CodeGen/X86/shift-amount-mod.ll     | 20 ++---
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll | 90 +++++++++++--------
 6 files changed, 129 insertions(+), 92 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8f2e80853e8d1..d65bd6d941f16 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2497,6 +2497,13 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
   }
+  // Hoist one-use subtraction from non-opaque constant:
+  //   (C - x) + y  ->  (y - x) + C
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+    SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+    return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+  }
 
   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
   // rather than 'add 0/-1' (the zext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index c91700436bb96..e949486512728 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -375,9 +375,7 @@ define i64 @reg64_lshr_by_sub_of_negated(i64 %val, i64 %a, i64 %b) nounwind {
 define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_add_to_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w8, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 32, %a
@@ -388,9 +386,7 @@ define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind {
 define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_add_to_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
-; CHECK-NEXT:    add x8, x8, x2
+; CHECK-NEXT:    sub x8, x2, x1
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 64, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index 2dc1d244481f9..b92ec9639d23f 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -57,9 +57,8 @@ define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_add0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    add w0, w8, w1
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 32, %a
   %r = add i32 %t0, %b
@@ -68,9 +67,8 @@ define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_add1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    add w0, w1, w8
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 32, %a
   %r = add i32 %b, %t0
@@ -117,9 +115,8 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
 define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_of_const_to_sub2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    add w0, w1, w8
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #32 // =32
 ; CHECK-NEXT:    ret
   %t0 = sub i32 %a, 32
   %r = sub i32 %b, %t0
@@ -219,8 +216,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
   %r = add <4 x i32> %t0, %b
@@ -231,8 +228,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI17_0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
   %r = add <4 x i32> %b, %t0
@@ -287,8 +284,8 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI21_0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI21_0]
-; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
   %r = sub <4 x i32> %b, %t0
diff --git a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
index fc9feb9ff6b76..2dfd217057356 100644
--- a/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
+++ b/llvm/test/CodeGen/ARM/addsubcarry-promotion.ll
@@ -10,40 +10,63 @@
 define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
 ; ARM-LABEL: fn1:
 ; ARM:       @ %bb.0: @ %entry
-; ARM-NEXT:    rsb r2, r2, #1
 ; ARM-NEXT:    adds r0, r1, r0
+; ARM-NEXT:    mov r3, #0
+; ARM-NEXT:    adc r0, r3, #0
 ; ARM-NEXT:    movw r1, #65535
-; ARM-NEXT:    sxth r2, r2
-; ARM-NEXT:    adc r0, r2, #0
-; ARM-NEXT:    tst r0, r1
+; ARM-NEXT:    sub r0, r0, r2
+; ARM-NEXT:    uxth r0, r0
+; ARM-NEXT:    cmp r0, r1
 ; ARM-NEXT:    bxeq lr
 ; ARM-NEXT:  .LBB0_1: @ %for.cond
 ; ARM-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; ARM-NEXT:    b .LBB0_1
 ;
-; THUMB1-LABEL: fn1:
-; THUMB1:       @ %bb.0: @ %entry
-; THUMB1-NEXT:    movs r3, #1
-; THUMB1-NEXT:    subs r2, r3, r2
-; THUMB1-NEXT:    sxth r2, r2
-; THUMB1-NEXT:    movs r3, #0
-; THUMB1-NEXT:    adds r0, r1, r0
-; THUMB1-NEXT:    adcs r3, r2
-; THUMB1-NEXT:    lsls r0, r3, #16
-; THUMB1-NEXT:    beq .LBB0_2
-; THUMB1-NEXT:  .LBB0_1: @ %for.cond
-; THUMB1-NEXT:    @ =>This Inner Loop Header: Depth=1
-; THUMB1-NEXT:    b .LBB0_1
-; THUMB1-NEXT:  .LBB0_2: @ %if.end
-; THUMB1-NEXT:    bx lr
+; THUMBV6M-LABEL: fn1:
+; THUMBV6M:       @ %bb.0: @ %entry
+; THUMBV6M-NEXT:    movs r3, #0
+; THUMBV6M-NEXT:    adds r0, r1, r0
+; THUMBV6M-NEXT:    adcs r3, r3
+; THUMBV6M-NEXT:    subs r0, r3, r2
+; THUMBV6M-NEXT:    uxth r0, r0
+; THUMBV6M-NEXT:    ldr r1, .LCPI0_0
+; THUMBV6M-NEXT:    cmp r0, r1
+; THUMBV6M-NEXT:    beq .LBB0_2
+; THUMBV6M-NEXT:  .LBB0_1: @ %for.cond
+; THUMBV6M-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMBV6M-NEXT:    b .LBB0_1
+; THUMBV6M-NEXT:  .LBB0_2: @ %if.end
+; THUMBV6M-NEXT:    bx lr
+; THUMBV6M-NEXT:    .p2align 2
+; THUMBV6M-NEXT:  @ %bb.3:
+; THUMBV6M-NEXT:  .LCPI0_0:
+; THUMBV6M-NEXT:    .long 65535 @ 0xffff
+;
+; THUMBV8M-BASE-LABEL: fn1:
+; THUMBV8M-BASE:       @ %bb.0: @ %entry
+; THUMBV8M-BASE-NEXT:    movs r3, #0
+; THUMBV8M-BASE-NEXT:    adds r0, r1, r0
+; THUMBV8M-BASE-NEXT:    adcs r3, r3
+; THUMBV8M-BASE-NEXT:    subs r0, r3, r2
+; THUMBV8M-BASE-NEXT:    uxth r0, r0
+; THUMBV8M-BASE-NEXT:    movw r1, #65535
+; THUMBV8M-BASE-NEXT:    cmp r0, r1
+; THUMBV8M-BASE-NEXT:    beq .LBB0_2
+; THUMBV8M-BASE-NEXT:  .LBB0_1: @ %for.cond
+; THUMBV8M-BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
+; THUMBV8M-BASE-NEXT:    b .LBB0_1
+; THUMBV8M-BASE-NEXT:  .LBB0_2: @ %if.end
+; THUMBV8M-BASE-NEXT:    bx lr
 ;
 ; THUMB-LABEL: fn1:
 ; THUMB:       @ %bb.0: @ %entry
-; THUMB-NEXT:    rsb.w r2, r2, #1
 ; THUMB-NEXT:    adds r0, r0, r1
-; THUMB-NEXT:    sxth r2, r2
-; THUMB-NEXT:    adc r0, r2, #0
-; THUMB-NEXT:    lsls r0, r0, #16
+; THUMB-NEXT:    mov.w r3, #0
+; THUMB-NEXT:    adc r0, r3, #0
+; THUMB-NEXT:    movw r1, #65535
+; THUMB-NEXT:    subs r0, r0, r2
+; THUMB-NEXT:    uxth r0, r0
+; THUMB-NEXT:    cmp r0, r1
 ; THUMB-NEXT:    it eq
 ; THUMB-NEXT:    bxeq lr
 ; THUMB-NEXT:  .LBB0_1: @ %for.cond
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index e8af5f66d36c9..5be960d2a3108 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -861,19 +861,17 @@ define i32 @reg32_lshr_by_add_to_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X32-LABEL: reg32_lshr_by_add_to_negated:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X32-NEXT:    shrl %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: reg32_lshr_by_add_to_negated:
 ; X64:       # %bb.0:
+; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
 ; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    addl %edx, %ecx
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
@@ -888,9 +886,9 @@ define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addb $64, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
@@ -905,11 +903,10 @@ define i64 @reg64_lshr_by_add_to_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ;
 ; X64-LABEL: reg64_lshr_by_add_to_negated:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
 ; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    addl %edx, %ecx
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
   %nega = sub i64 64, %a
@@ -1278,11 +1275,10 @@ define i64 @reg64_lshr_by_negated_unfolded_add_b(i64 %val, i64 %a, i64 %b) nounw
 ;
 ; X64-LABEL: reg64_lshr_by_negated_unfolded_add_b:
 ; X64:       # %bb.0:
+; X64-NEXT:    movq %rdx, %rcx
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
 ; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    addl %edx, %ecx
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index 7c0a22d4eb629..bc52f1c22787e 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -94,16 +94,16 @@ define i32 @sink_sub_of_const_to_add1(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_add0:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl $32, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_add0:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    addl %esi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal 32(%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 32, %a
   %r = add i32 %t0, %b
@@ -112,16 +112,16 @@ define i32 @sink_sub_from_const_to_add0(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_add1(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_add1:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl $32, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_add1:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    addl %esi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal 32(%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 32, %a
   %r = add i32 %b, %t0
@@ -192,16 +192,16 @@ define i32 @sink_sub_of_const_to_sub(i32 %a, i32 %b) {
 define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_of_const_to_sub2:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl $32, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    addl $32, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_of_const_to_sub2:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl $32, %eax
-; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    addl %esi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    subl %edi, %esi
+; X64-NEXT:    leal 32(%rsi), %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, 32
   %r = sub i32 %b, %t0
@@ -329,25 +329,37 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
 ; Outer 'add' is commutative - 2 variants.
 
 define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: vec_sink_sub_from_const_to_add0:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
-; ALL-NEXT:    psubd %xmm0, %xmm2
-; ALL-NEXT:    paddd %xmm1, %xmm2
-; ALL-NEXT:    movdqa %xmm2, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; X32-LABEL: vec_sink_sub_from_const_to_add0:
+; X32:       # %bb.0:
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: vec_sink_sub_from_const_to_add0:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
   %r = add <4 x i32> %t0, %b
   ret <4 x i32> %r
 }
 define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: vec_sink_sub_from_const_to_add1:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
-; ALL-NEXT:    psubd %xmm0, %xmm2
-; ALL-NEXT:    paddd %xmm1, %xmm2
-; ALL-NEXT:    movdqa %xmm2, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; X32-LABEL: vec_sink_sub_from_const_to_add1:
+; X32:       # %bb.0:
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: vec_sink_sub_from_const_to_add1:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
   %r = add <4 x i32> %b, %t0
   ret <4 x i32> %r
@@ -411,13 +423,19 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
   ret <4 x i32> %r
 }
 define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: vec_sink_sub_of_const_to_sub2:
-; ALL:       # %bb.0:
-; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
-; ALL-NEXT:    psubd %xmm0, %xmm2
-; ALL-NEXT:    paddd %xmm1, %xmm2
-; ALL-NEXT:    movdqa %xmm2, %xmm0
-; ALL-NEXT:    ret{{[l|q]}}
+; X32-LABEL: vec_sink_sub_of_const_to_sub2:
+; X32:       # %bb.0:
+; X32-NEXT:    psubd %xmm0, %xmm1
+; X32-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: vec_sink_sub_of_const_to_sub2:
+; X64:       # %bb.0:
+; X64-NEXT:    psubd %xmm0, %xmm1
+; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, <i32 42, i32 24, i32 undef, i32 46>
   %r = sub <4 x i32> %b, %t0
   ret <4 x i32> %r

From 3dce0326fe7c06561e74dc6c995cd329d376fd9f Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 4 Jun 2019 11:06:21 +0000
Subject: [PATCH 1006/1176] [DAGCombine][X86][AArch64][MIPS][LANAI] (C - x) - y
  ->  C - (x + y) fold (PR41952)

Summary:
This *might* be the last fold for `sink-addsub-of-const.ll`, but i'm not sure yet.

As far as i can tell, there are no regressions here (ignoring x86-32),
all changes are either good or neutral.

This, almost surprisingly to me, fixes the motivational tests (in `shift-amount-mod.ll`)
`@reg32_lshr_by_sub_from_negated` from [[ https://bugs.llvm.org/show_bug.cgi?id=41952 | PR41952 ]].

https://rise4fun.com/Alive/vMd3

Reviewers: RKSimon, t.p.northover, craig.topper, spatel, efriedma

Reviewed By: RKSimon

Subscribers: sdardis, javed.absar, arichardson, kristof.beyls, jrtc27, atanasyan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62774

llvm-svn: 362488
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 +++
 llvm/test/CodeGen/AArch64/shift-amount-mod.ll | 18 +++----
 .../CodeGen/AArch64/sink-addsub-of-const.ll   |  8 +--
 llvm/test/CodeGen/Lanai/constant_multiply.ll  | 14 ++---
 llvm/test/CodeGen/Mips/const-mult.ll          |  4 +-
 llvm/test/CodeGen/Mips/madd-msub.ll           | 22 ++++----
 llvm/test/CodeGen/X86/shift-amount-mod.ll     | 51 ++++++++++---------
 llvm/test/CodeGen/X86/sink-addsub-of-const.ll |  9 ++--
 8 files changed, 69 insertions(+), 63 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d65bd6d941f16..1d1699ce58956 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3033,6 +3033,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
   }
+  // (C - x) - y  ->  C - (x + y)
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+  }
 
   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
   // rather than 'sub 0/1' (the sext should get folded).
diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index e949486512728..6b8d19c83be52 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -318,9 +318,8 @@ define void @modify64_ashr_by_negated(i64* %valptr, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_sub_from_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    neg w8, w8
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 32, %a
@@ -331,9 +330,8 @@ define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_sub_from_negated:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    sub x8, x8, x1
-; CHECK-NEXT:    sub x8, x8, x2
+; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    neg x8, x8
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 64, %a
@@ -482,8 +480,8 @@ define i64 @reg64_lshr_by_negated_unfolded(i64 %val, i64 %shamt) nounwind {
 define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg w8, w1
-; CHECK-NEXT:    sub w8, w8, w2
+; CHECK-NEXT:    add w8, w1, w2
+; CHECK-NEXT:    neg w8, w8
 ; CHECK-NEXT:    lsr w0, w0, w8
 ; CHECK-NEXT:    ret
   %nega = sub i32 0, %a
@@ -495,8 +493,8 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    neg x8, x1
-; CHECK-NEXT:    sub x8, x8, x2
+; CHECK-NEXT:    add x8, x1, x2
+; CHECK-NEXT:    neg x8, x8
 ; CHECK-NEXT:    lsr x0, x0, x8
 ; CHECK-NEXT:    ret
   %nega = sub i64 0, %a
diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
index b92ec9639d23f..d55310cad76b7 100644
--- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll
@@ -129,9 +129,9 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; CHECK-LABEL: sink_sub_from_const_to_sub:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    sub w0, w8, w1
+; CHECK-NEXT:    add w8, w0, w1
+; CHECK-NEXT:    mov w9, #32
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %t0 = sub i32 32, %a
   %r = sub i32 %t0, %b
@@ -300,8 +300,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI22_0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    sub v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a
   %r = sub <4 x i32> %t0, %b
diff --git a/llvm/test/CodeGen/Lanai/constant_multiply.ll b/llvm/test/CodeGen/Lanai/constant_multiply.ll
index f176a7143d866..f7ad041b77e97 100644
--- a/llvm/test/CodeGen/Lanai/constant_multiply.ll
+++ b/llvm/test/CodeGen/Lanai/constant_multiply.ll
@@ -150,9 +150,9 @@ define i32 @fm9(i32 inreg %a) #0 {
 ; CHECK-NEXT:    st %fp, [--%sp]
 ; CHECK-NEXT:    add %sp, 0x8, %fp
 ; CHECK-NEXT:    sub %sp, 0x8, %sp
-; CHECK-NEXT:    sub %r0, %r6, %r3
-; CHECK-NEXT:    sh %r6, 0x3, %r9
-; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    add %r6, %r3, %r3
+; CHECK-NEXT:    sub %r0, %r3, %rv
 ; CHECK-NEXT:    ld -4[%fp], %pc ! return
 ; CHECK-NEXT:    add %fp, 0x0, %sp
 ; CHECK-NEXT:    ld -8[%fp], %fp
@@ -166,10 +166,10 @@ define i32 @fm10(i32 inreg %a) #0 {
 ; CHECK-NEXT:    st %fp, [--%sp]
 ; CHECK-NEXT:    add %sp, 0x8, %fp
 ; CHECK-NEXT:    sub %sp, 0x8, %sp
-; CHECK-NEXT:    sh %r6, 0x1, %r3
-; CHECK-NEXT:    sub %r0, %r3, %r3
-; CHECK-NEXT:    sh %r6, 0x3, %r9
-; CHECK-NEXT:    sub %r3, %r9, %rv
+; CHECK-NEXT:    sh %r6, 0x3, %r3
+; CHECK-NEXT:    sh %r6, 0x1, %r9
+; CHECK-NEXT:    add %r9, %r3, %r3
+; CHECK-NEXT:    sub %r0, %r3, %rv
 ; CHECK-NEXT:    ld -4[%fp], %pc ! return
 ; CHECK-NEXT:    add %fp, 0x0, %sp
 ; CHECK-NEXT:    ld -8[%fp], %fp
diff --git a/llvm/test/CodeGen/Mips/const-mult.ll b/llvm/test/CodeGen/Mips/const-mult.ll
index dd90971dcee0b..bb1e5b3d57c1c 100644
--- a/llvm/test/CodeGen/Mips/const-mult.ll
+++ b/llvm/test/CodeGen/Mips/const-mult.ll
@@ -179,8 +179,8 @@ define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) {
 ; MIPS32-NEXT:    subu $1, $1, $3
 ; MIPS32-NEXT:    subu $5, $1, $12
 ; MIPS32-NEXT:    subu $4, $9, $10
-; MIPS32-NEXT:    negu $1, $8
-; MIPS32-NEXT:    subu $3, $1, $11
+; MIPS32-NEXT:    addu $1, $8, $11
+; MIPS32-NEXT:    negu $3, $1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    negu $2, $2
 ;
diff --git a/llvm/test/CodeGen/Mips/madd-msub.ll b/llvm/test/CodeGen/Mips/madd-msub.ll
index 8a1010e45f66f..b413dca438635 100644
--- a/llvm/test/CodeGen/Mips/madd-msub.ll
+++ b/llvm/test/CodeGen/Mips/madd-msub.ll
@@ -342,13 +342,13 @@ define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readn
 ;
 ; 32R6-LABEL: msub2:
 ; 32R6:       # %bb.0: # %entry
-; 32R6-NEXT:    mul $1, $5, $4
-; 32R6-NEXT:    sltu $2, $6, $1
-; 32R6-NEXT:    muhu $3, $5, $4
-; 32R6-NEXT:    negu $3, $3
-; 32R6-NEXT:    subu $2, $3, $2
+; 32R6-NEXT:    muhu $1, $5, $4
+; 32R6-NEXT:    mul $3, $5, $4
+; 32R6-NEXT:    sltu $2, $6, $3
+; 32R6-NEXT:    addu $1, $1, $2
+; 32R6-NEXT:    negu $2, $1
 ; 32R6-NEXT:    jr $ra
-; 32R6-NEXT:    subu $3, $6, $1
+; 32R6-NEXT:    subu $3, $6, $3
 ;
 ; DSP-LABEL: msub2:
 ; DSP:       # %bb.0: # %entry
@@ -377,12 +377,12 @@ define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readn
 ; 16:       # %bb.0: # %entry
 ; 16-NEXT:    multu $5, $4
 ; 16-NEXT:    mflo $2
-; 16-NEXT:    mfhi $4
-; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    mfhi $3
 ; 16-NEXT:    sltu $6, $2
-; 16-NEXT:    move $2, $24
-; 16-NEXT:    neg $4, $4
-; 16-NEXT:    subu $2, $4, $2
+; 16-NEXT:    move $4, $24
+; 16-NEXT:    addu $4, $3, $4
+; 16-NEXT:    subu $3, $6, $2
+; 16-NEXT:    neg $2, $4
 ; 16-NEXT:    jrc $ra
 entry:
   %conv = zext i32 %c to i64
diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll
index 5be960d2a3108..70fa32027c787 100644
--- a/llvm/test/CodeGen/X86/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll
@@ -735,19 +735,20 @@ define i32 @reg32_lshr_by_sub_from_negated(i32 %val, i32 %a, i32 %b) nounwind {
 ; X32-LABEL: reg32_lshr_by_sub_from_negated:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $32, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    negb %cl
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X32-NEXT:    shrl %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: reg32_lshr_by_sub_from_negated:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl $32, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rsi,%rdx), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
@@ -762,9 +763,10 @@ define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl $64, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb $64, %cl
+; X32-NEXT:    subb %dl, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
@@ -780,9 +782,8 @@ define i64 @reg64_lshr_by_sub_from_negated(i64 %val, i64 %a, i64 %b) nounwind {
 ; X64-LABEL: reg64_lshr_by_sub_from_negated:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rdx,%rsi), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
@@ -1108,19 +1109,20 @@ define i32 @reg32_lshr_by_negated_unfolded_sub_b(i32 %val, i32 %a, i32 %b) nounw
 ; X32-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    negb %cl
 ; X32-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X32-NEXT:    shrl %cl, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: reg32_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %ecx
+; X64-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rsi,%rdx), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrl %cl, %eax
 ; X64-NEXT:    retq
@@ -1136,10 +1138,10 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X32-NEXT:    pushl %esi
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    addb $64, %cl
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movb $64, %cl
+; X32-NEXT:    subb %dl, %cl
 ; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl %cl, %edx
 ; X32-NEXT:    shrdl %cl, %esi, %eax
@@ -1155,9 +1157,8 @@ define i64 @reg64_lshr_by_negated_unfolded_sub_b(i64 %val, i64 %a, i64 %b) nounw
 ; X64-LABEL: reg64_lshr_by_negated_unfolded_sub_b:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movl $64, %ecx
-; X64-NEXT:    subl %esi, %ecx
-; X64-NEXT:    subl %edx, %ecx
+; X64-NEXT:    leal (%rdx,%rsi), %ecx
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    shrq %cl, %rax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
index bc52f1c22787e..5c7d4e0717a26 100644
--- a/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
+++ b/llvm/test/CodeGen/X86/sink-addsub-of-const.ll
@@ -214,16 +214,17 @@ define i32 @sink_sub_of_const_to_sub2(i32 %a, i32 %b) {
 define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) {
 ; X32-LABEL: sink_sub_from_const_to_sub:
 ; X32:       # %bb.0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movl $32, %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    subl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: sink_sub_from_const_to_sub:
 ; X64:       # %bb.0:
+; X64-NEXT:    addl %esi, %edi
 ; X64-NEXT:    movl $32, %eax
 ; X64-NEXT:    subl %edi, %eax
-; X64-NEXT:    subl %esi, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 32, %a
   %r = sub i32 %t0, %b
@@ -448,8 +449,8 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) {
 ; ALL-LABEL: vec_sink_sub_from_const_to_sub:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    movdqa {{.*#+}} xmm2 = <42,24,u,46>
+; ALL-NEXT:    paddd %xmm1, %xmm0
 ; ALL-NEXT:    psubd %xmm0, %xmm2
-; ALL-NEXT:    psubd %xmm1, %xmm2
 ; ALL-NEXT:    movdqa %xmm2, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
   %t0 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %a

From 5f7c20e279d06f119660a245f07f9d11666ecef7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 11:11:51 +0000
Subject: [PATCH 1007/1176] Fix Wshadow warning

llvm-svn: 362489
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 6a4d2f3dbfca2..0c09eaaeabf9f 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -771,8 +771,8 @@ void ClangdLSPServer::onSignatureHelp(const TextDocumentPositionParams &Params,
                                 return Reply(std::move(*Signature));
                               // Strip out the offsets from signature help for
                               // clients that only support string labels.
-                              for (auto &Signature : Signature->signatures) {
-                                for (auto &Param : Signature.parameters)
+                              for (auto &SigInfo : Signature->signatures) {
+                                for (auto &Param : SigInfo.parameters)
                                   Param.labelOffsets.reset();
                               }
                               return Reply(std::move(*Signature));

From 580c6d31c00dd038ca0ea1ea72f54b4db4fd4bef Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.smith@linaro.org>
Date: Tue, 4 Jun 2019 11:28:22 +0000
Subject: [PATCH 1008/1176] [AARCH64][ELF][llvm-readobj] Support for AArch64
 .note.gnu.property

ELF for the 64-bit Arm Architecture defines a processor specific property
type GNU_PROPERTY_AARCH64_FEATURE_1_AND as GNU_PROPERTY_LOPROC. This
property works in a similar way to the existing X86 processor specific
property GNU_PROPERTY_GNU_X86_FEATURE_1_AND.

Two feature bits are defined for GNU_PROPERTY_AARCH64_FEATURE_1_AND:
- GNU_PROPERTY_AARCH64_FEATURE_1_BTI 0x1
- GNU_PROPERTY_AARCH64_FEATURE_1_PAC 0x2

This patch defines the property, feature bits and implements support for
printing in llvm-readobj.

Differential Revision: https://reviews.llvm.org/D62595

llvm-svn: 362490
---
 llvm/include/llvm/BinaryFormat/ELF.h          |  7 ++++
 .../AArch64/aarch64-note-gnu-property.s       | 38 +++++++++++++++++++
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 13 +++++--
 3 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/tools/llvm-readobj/AArch64/aarch64-note-gnu-property.s

diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 8258bb3711bfd..1ad586684df04 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1364,6 +1364,7 @@ enum {
 enum : unsigned {
   GNU_PROPERTY_STACK_SIZE = 1,
   GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2,
+  GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000,
   GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002,
   GNU_PROPERTY_X86_ISA_1_NEEDED = 0xc0008000,
   GNU_PROPERTY_X86_FEATURE_2_NEEDED = 0xc0008001,
@@ -1371,6 +1372,12 @@ enum : unsigned {
   GNU_PROPERTY_X86_FEATURE_2_USED = 0xc0010001,
 };
 
+// aarch64 processor feature bits.
+enum : unsigned {
+  GNU_PROPERTY_AARCH64_FEATURE_1_BTI = 1 << 0,
+  GNU_PROPERTY_AARCH64_FEATURE_1_PAC = 1 << 1,
+};
+
 // x86 processor feature bits.
 enum : unsigned {
   GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0,
diff --git a/llvm/test/tools/llvm-readobj/AArch64/aarch64-note-gnu-property.s b/llvm/test/tools/llvm-readobj/AArch64/aarch64-note-gnu-property.s
new file mode 100644
index 0000000000000..693b6c93f35a4
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/AArch64/aarch64-note-gnu-property.s
@@ -0,0 +1,38 @@
+// RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o %t
+// RUN: llvm-readelf --notes %t | FileCheck %s --check-prefix=GNU
+// RUN: llvm-readobj --notes %t | FileCheck %s --check-prefix=LLVM
+
+// GNU: Displaying notes found at file offset 0x00000040 with length 0x00000020:
+// GNU-NEXT:   Owner                 Data size	Description
+// GNU-NEXT:   GNU                   0x00000010	NT_GNU_PROPERTY_TYPE_0 (property note)
+// GNU-NEXT:     Properties:    aarch64 feature: BTI, PAC
+
+// LLVM:      Notes [
+// LLVM-NEXT:   NoteSection {
+// LLVM-NEXT:     Offset: 0x40
+// LLVM-NEXT:     Size: 0x20
+// LLVM-NEXT:     Note {
+// LLVM-NEXT:       Owner: GNU
+// LLVM-NEXT:       Data size: 0x10
+// LLVM-NEXT:       Type: NT_GNU_PROPERTY_TYPE_0 (property note)
+// LLVM-NEXT:       Property [
+// LLVM-NEXT:         aarch64 feature: BTI, PAC
+// LLVM-NEXT:       ]
+// LLVM-NEXT:     }
+// LLVM-NEXT:   }
+// LLVM-NEXT: ]
+
+.section ".note.gnu.property", "a"
+.align 4
+  .long 4           /* Name length is always 4 ("GNU") */
+  .long end - begin /* Data length */
+  .long 5           /* Type: NT_GNU_PROPERTY_TYPE_0 */
+  .asciz "GNU"      /* Name */
+  .p2align 3
+begin:
+  /* BTI and PAC property note */
+  .long 0xc0000000  /* Type: GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+  .long 4           /* Data size */
+  .long 3           /* BTI and PAC */
+  .p2align 3        /* Align to 8 byte for 64 bit */
+end:
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 1012cf1085ff1..f87be61046e0b 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3840,8 +3840,10 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
     if (DataSize)
       OS << format(" <corrupt length: 0x%x>", DataSize);
     return OS.str();
+  case GNU_PROPERTY_AARCH64_FEATURE_1_AND:
   case GNU_PROPERTY_X86_FEATURE_1_AND:
-    OS << "x86 feature: ";
+    OS << ((Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ? "aarch64 feature: "
+                                                        : "x86 feature: ");
     if (DataSize != 4) {
       OS << format("<corrupt length: 0x%x>", DataSize);
       return OS.str();
@@ -3851,8 +3853,13 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
       OS << "<None>";
       return OS.str();
     }
-    DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT");
-    DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK");
+    if (Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+      DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI");
+      DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC");
+    } else {
+      DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT");
+      DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK");
+    }
     if (PrData)
       OS << format("<unknown flags: 0x%x>", PrData);
     return OS.str();

From 5b41fe58deb330c28e0421b96b52c7eadbf073ed Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 11:31:45 +0000
Subject: [PATCH 1009/1176] Fix -Wparentheses warning. NFCI.

llvm-svn: 362491
---
 clang-tools-extra/clangd/Protocol.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 5c5912eb9bcc6..4714c6c11da57 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -832,8 +832,8 @@ llvm::json::Value toJSON(const CompletionList &L) {
 }
 
 llvm::json::Value toJSON(const ParameterInformation &PI) {
-  assert(PI.labelOffsets.hasValue() ||
-         !PI.labelString.empty() && "parameter information label is required");
+  assert((PI.labelOffsets.hasValue() || !PI.labelString.empty()) &&
+         "parameter information label is required");
   llvm::json::Object Result;
   if (PI.labelOffsets)
     Result["label"] =

From c73c10a9bf1d0fbfb3a78342c64a57267902119e Mon Sep 17 00:00:00 2001
From: David Zarzycki <dave@znu.io>
Date: Tue, 4 Jun 2019 11:33:49 +0000
Subject: [PATCH 1010/1176] Unbreak my hasty "unbreak" cmake fix

llvm-svn: 362492
---
 llvm/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index de499a75827ba..71434bcb3e71f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -629,7 +629,7 @@ if(WIN32 OR CYGWIN)
     set(LLVM_ENABLE_PLUGINS_default OFF)
   endif()
 else()
-  set(LLVM_ENABLE_PLUGINS_default LLVM_ENABLE_PIC)
+  set(LLVM_ENABLE_PLUGINS_default ${LLVM_ENABLE_PIC})
 endif()
 option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default})
 

From 49d7221f7195d1254366e9eb88a2d610a2277e23 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.smith@linaro.org>
Date: Tue, 4 Jun 2019 11:44:33 +0000
Subject: [PATCH 1011/1176] [AArch64][ELF][llvm-readobj] Add support for BTI
 and PAC dynamic tags

ELF for the 64-bit Arm Architecture defines two processor-specific dynamic
tags:
DT_AARCH64_BTI_PLT 0x70000001, d_val
DT_AARCH64_PAC_PLT 0x70000003, d_val

These presence of these tags indicate that PLT sequences have been
protected using Branch Target Identification and Pointer Authentication
respectively. The presence of both indicates that the PLT sequences have
been protected with both Branch Target Identification and Pointer
Authentication.

This patch adds the tags and tests for llvm-readobj and yaml2obj.

As some of the processor specific dynamic tags overlap, this patch splits
them up, keeping their original default value if they were not previously
mentioned explicitly in a switch case.

Differential Revision: https://reviews.llvm.org/D62596

llvm-svn: 362493
---
 .../include/llvm/BinaryFormat/DynamicTags.def |  13 ++
 llvm/lib/Object/ELF.cpp                       |  10 ++
 llvm/lib/ObjectYAML/ELFYAML.cpp               |  10 +-
 .../elf-dynamic-tags-machine-specific.yaml    |  38 ++++++
 .../elf-dynamic-tags-machine-specific.test    |  22 ++++
 .../obj2yaml/dynamic-section-arch-tags.test   |  26 +++-
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 113 ++++++++++++++++--
 7 files changed, 217 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index 82fe72eefade1..c884badab3603 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -6,6 +6,11 @@
 // such as DT_HIOS, etc. to allow using this file to in other contexts.
 // For example we can use it to generate a stringification switch statement.
 
+#ifndef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define AARCH64_DYNAMIC_TAG_DEFINED
+#endif
+
 #ifndef HEXAGON_DYNAMIC_TAG
 #define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG_DEFINED
@@ -107,6 +112,10 @@ DYNAMIC_TAG(VERNEED, 0X6FFFFFFE)    // The address of the version dependency
                                     // table.
 DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
 
+// AArch64 specific dynamic table entries
+AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
+AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
+
 // Hexagon specific dynamic table entries
 HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
 HEXAGON_DYNAMIC_TAG(HEXAGON_VER, 0x70000001)
@@ -204,6 +213,10 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF)    // Shared object to get values from
 #undef DYNAMIC_TAG_MARKER
 #undef DYNAMIC_TAG_MARKER_DEFINED
 #endif
+#ifdef AARCH64_DYNAMIC_TAG_DEFINED
+#undef AARCH64_DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG_DEFINED
+#endif
 #ifdef MIPS_DYNAMIC_TAG_DEFINED
 #undef MIPS_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG_DEFINED
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index a9c90e01551ea..f0ef53d244455 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -434,6 +434,14 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 
 #define DYNAMIC_TAG(n, v)
   switch (Arch) {
+  case ELF::EM_AARCH64:
+    switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+    }
+    break;
+
   case ELF::EM_HEXAGON:
     switch (Type) {
 #define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
@@ -461,6 +469,7 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 #undef DYNAMIC_TAG
   switch (Type) {
 // Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
 #define MIPS_DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG(name, value)
@@ -469,6 +478,7 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
 #define DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 39e59efe00f60..1d230700f4471 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -680,6 +680,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
   assert(Object && "The IO context is not initialized");
 
 // Disable architecture specific tags by default. We might enable them below.
+#define AARCH64_DYNAMIC_TAG(name, value)
 #define MIPS_DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG(name, value)
@@ -689,6 +690,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
 #define STRINGIFY(X) (#X)
 #define DYNAMIC_TAG(X, Y) IO.enumCase(Value, STRINGIFY(DT_##X), ELF::DT_##X);
   switch (Object->Header.Machine) {
+  case ELF::EM_AARCH64:
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value)
+    break;
   case ELF::EM_MIPS:
 #undef MIPS_DYNAMIC_TAG
 #define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
@@ -714,7 +722,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
 #include "llvm/BinaryFormat/DynamicTags.def"
     break;
   }
-
+#undef AARCH64_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG
diff --git a/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml b/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
index e60034afc80a2..653a7e9eeb0dd 100644
--- a/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
+++ b/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
@@ -200,3 +200,41 @@ ProgramHeaders:
     VAddr: 0x1010
     Sections:
       - Section: .dynamic
+
+# Fourth document: AARCH64
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_AARCH64
+Sections:
+  - Name:    .dynstr
+    Type:    SHT_STRTAB
+    Address: 0x1000
+    Size:    0x10
+    Content: "004400550066007700"
+  - Name:    .dynamic
+    Type:    SHT_DYNAMIC
+    Address: 0x1010
+    Entries:
+      - Tag:   DT_HASH
+        Value: 0x1000
+      - Tag:   DT_AARCH64_BTI_PLT
+        Value: 0
+      - Tag:   DT_AARCH64_PAC_PLT
+        Value: 0
+      - Tag:   0x1234abcd
+        Value: 0x1
+      - Tag:   DT_NULL
+        Value: 0
+ProgramHeaders:
+  - Type: PT_LOAD
+    VAddr: 0x1000
+    Sections:
+      - Section: .dynstr
+      - Section: .dynamic
+  - Type: PT_DYNAMIC
+    VAddr: 0x1010
+    Sections:
+      - Section: .dynamic
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
index 06c8b6d3fbe70..1d6c3b33b88c0 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
@@ -151,3 +151,25 @@
 # GNU-PPC-NEXT:   0x0000000070000000 (PPC64_GLINK)        0x1000
 # GNU-PPC-NEXT:   0x000000001234abcd (unknown)            0x1
 # GNU-PPC-NEXT:   0x0000000000000000 (NULL)               0x0
+
+# Test that AARCH64 machine-specific tags can be dumped.
+# RUN: yaml2obj --docnum=4 %S/Inputs/elf-dynamic-tags-machine-specific.yaml -o %t.aarch64
+# RUN: llvm-readobj --dynamic-table %t.aarch64 | FileCheck %s --check-prefix=LLVM-AARCH64
+# RUN: llvm-readelf --dynamic-table %t.aarch64 | FileCheck %s --check-prefix=GNU-AARCH64
+
+# LLVM-AARCH64:     DynamicSection [ (5 entries)
+# LLVM-AARCH64-NEXT:  Tag                Type                 Name/Value
+# LLVM-AARCH64-NEXT:  0x0000000000000004 HASH                 0x1000
+# LLVM-AARCH64-NEXT:  0x0000000070000001 AARCH64_BTI_PLT      0
+# LLVM-AARCH64-NEXT:  0x0000000070000003 AARCH64_PAC_PLT      0
+# LLVM-AARCH64-NEXT:  0x000000001234ABCD unknown              0x1
+# LLVM-AARCH64-NEXT:  0x0000000000000000 NULL                 0x0
+# LLVM-AARCH64-NEXT:]
+
+# GNU-AARCH64:      Dynamic section at offset {{.*}} contains 5 entries:
+# GNU-AARCH64-NEXT:  Tag                Type                 Name/Value
+# GNU-AARCH64-NEXT:  0x0000000000000004 (HASH)               0x1000
+# GNU-AARCH64-NEXT:  0x0000000070000001 (AARCH64_BTI_PLT)    0
+# GNU-AARCH64-NEXT:  0x0000000070000003 (AARCH64_PAC_PLT)    0
+# GNU-AARCH64-NEXT:  0x000000001234abcd (unknown)            0x1
+# GNU-AARCH64-NEXT:  0x0000000000000000 (NULL)               0x0
diff --git a/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test b/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
index bc93237126601..badb9cc3edd6d 100644
--- a/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
+++ b/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
@@ -249,12 +249,36 @@ Sections:
       - Tag:             DT_PPC64_GLINK
         Value:           0x0000000000000001
 
+## Check we can handle AARCH64 specific tags.
+# RUN: yaml2obj -docnum=4 %s -o %t2
+# RUN: obj2yaml %t2 | FileCheck %s --check-prefix=AARCH64
+
+# AARCH64:      - Tag:             DT_AARCH64_BTI_PLT
+# AARCH64-NEXT:   Value:           0x0000000000000000
+# AARCH64-NEXT: - Tag:             DT_AARCH64_PAC_PLT
+# AARCH64-NEXT:   Value:           0x0000000000000000
+
+--- !ELF
+FileHeader:
+  Class:             ELFCLASS64
+  Data:              ELFDATA2LSB
+  Type:              ET_REL
+  Machine:           EM_AARCH64
+Sections:
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Entries:
+      - Tag:             DT_AARCH64_BTI_PLT
+        Value:           0x0000000000000000
+      - Tag:             DT_AARCH64_PAC_PLT
+        Value:           0x0000000000000000
+
 ## Check we can't use a tag from a different architecture,
 ## even if it has the same numeric value as a valid tag.
 ## Here for EM_PPC64 we are trying to use DT_HEXAGON_SYMSZ
 ## instead of DT_PPC64_GLINK. They both have value of 0x70000000.
 
-# RUN: not yaml2obj -docnum=4 %s 2>&1 | FileCheck %s --check-prefix=ERR
+# RUN: not yaml2obj -docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERR
 # ERR:      error: invalid hex64 number
 # ERR-NEXT: - Tag: DT_HEXAGON_SYMSZ
 
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f87be61046e0b..f41adaeaed6b5 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1463,6 +1463,17 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
 static const char *getTypeString(unsigned Arch, uint64_t Type) {
 #define DYNAMIC_TAG(n, v)
   switch (Arch) {
+
+  case EM_AARCH64:
+    switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value)                                       \
+    case DT_##name:                                                            \
+      return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+    }
+    break;
+
   case EM_HEXAGON:
     switch (Type) {
 #define HEXAGON_DYNAMIC_TAG(name, value)                                       \
@@ -1496,6 +1507,7 @@ static const char *getTypeString(unsigned Arch, uint64_t Type) {
 #undef DYNAMIC_TAG
   switch (Type) {
 // Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
 #define MIPS_DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG(name, value)
@@ -1506,6 +1518,7 @@ static const char *getTypeString(unsigned Arch, uint64_t Type) {
     return #name;
 #include "llvm/BinaryFormat/DynamicTags.def"
 #undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG
@@ -1783,6 +1796,93 @@ void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
                                         uint64_t Value) const {
   const char *ConvChar =
       (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
+
+  // Handle custom printing of architecture specific tags
+  switch (ObjF->getELFFile()->getHeader()->e_machine) {
+  case EM_AARCH64:
+    switch (Type) {
+    case DT_AARCH64_BTI_PLT:
+    case DT_AARCH64_PAC_PLT:
+      OS << Value;
+      return;
+    default:
+      break;
+    }
+    break;
+  case EM_HEXAGON:
+    switch (Type) {
+    case DT_HEXAGON_VER:
+      OS << Value;
+      return;
+    case DT_HEXAGON_SYMSZ:
+    case DT_HEXAGON_PLT:
+      OS << format(ConvChar, Value);
+      return;
+    default:
+      break;
+    }
+    break;
+  case EM_MIPS:
+    switch (Type) {
+    case DT_MIPS_RLD_VERSION:
+    case DT_MIPS_LOCAL_GOTNO:
+    case DT_MIPS_SYMTABNO:
+    case DT_MIPS_UNREFEXTNO:
+      OS << Value;
+      return;
+    case DT_MIPS_TIME_STAMP:
+    case DT_MIPS_ICHECKSUM:
+    case DT_MIPS_IVERSION:
+    case DT_MIPS_BASE_ADDRESS:
+    case DT_MIPS_MSYM:
+    case DT_MIPS_CONFLICT:
+    case DT_MIPS_LIBLIST:
+    case DT_MIPS_CONFLICTNO:
+    case DT_MIPS_LIBLISTNO:
+    case DT_MIPS_GOTSYM:
+    case DT_MIPS_HIPAGENO:
+    case DT_MIPS_RLD_MAP:
+    case DT_MIPS_DELTA_CLASS:
+    case DT_MIPS_DELTA_CLASS_NO:
+    case DT_MIPS_DELTA_INSTANCE:
+    case DT_MIPS_DELTA_RELOC:
+    case DT_MIPS_DELTA_RELOC_NO:
+    case DT_MIPS_DELTA_SYM:
+    case DT_MIPS_DELTA_SYM_NO:
+    case DT_MIPS_DELTA_CLASSSYM:
+    case DT_MIPS_DELTA_CLASSSYM_NO:
+    case DT_MIPS_CXX_FLAGS:
+    case DT_MIPS_PIXIE_INIT:
+    case DT_MIPS_SYMBOL_LIB:
+    case DT_MIPS_LOCALPAGE_GOTIDX:
+    case DT_MIPS_LOCAL_GOTIDX:
+    case DT_MIPS_HIDDEN_GOTIDX:
+    case DT_MIPS_PROTECTED_GOTIDX:
+    case DT_MIPS_OPTIONS:
+    case DT_MIPS_INTERFACE:
+    case DT_MIPS_DYNSTR_ALIGN:
+    case DT_MIPS_INTERFACE_SIZE:
+    case DT_MIPS_RLD_TEXT_RESOLVE_ADDR:
+    case DT_MIPS_PERF_SUFFIX:
+    case DT_MIPS_COMPACT_SIZE:
+    case DT_MIPS_GP_VALUE:
+    case DT_MIPS_AUX_DYNAMIC:
+    case DT_MIPS_PLTGOT:
+    case DT_MIPS_RWPLT:
+    case DT_MIPS_RLD_MAP_REL:
+      OS << format(ConvChar, Value);
+      return;
+    case DT_MIPS_FLAGS:
+      printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
+      return;
+    default:
+      break;
+    }
+    break;
+  default:
+    break;
+  }
+
   switch (Type) {
   case DT_PLTREL:
     if (Value == DT_REL) {
@@ -1811,22 +1911,12 @@ void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
   case DT_VERSYM:
   case DT_GNU_HASH:
   case DT_NULL:
-  case DT_MIPS_BASE_ADDRESS:
-  case DT_MIPS_GOTSYM:
-  case DT_MIPS_RLD_MAP:
-  case DT_MIPS_RLD_MAP_REL:
-  case DT_MIPS_PLTGOT:
-  case DT_MIPS_OPTIONS:
     OS << format(ConvChar, Value);
     break;
   case DT_RELACOUNT:
   case DT_RELCOUNT:
   case DT_VERDEFNUM:
   case DT_VERNEEDNUM:
-  case DT_MIPS_RLD_VERSION:
-  case DT_MIPS_LOCAL_GOTNO:
-  case DT_MIPS_SYMTABNO:
-  case DT_MIPS_UNREFEXTNO:
     OS << Value;
     break;
   case DT_PLTRELSZ:
@@ -1862,9 +1952,6 @@ void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
   case DT_RUNPATH:
     OS << getDynamicString(Value);
     break;
-  case DT_MIPS_FLAGS:
-    printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
-    break;
   case DT_FLAGS:
     printFlags(Value, makeArrayRef(ElfDynamicDTFlags), OS);
     break;

From 2e49e8196dab68481857ec243f091fbb4ad7af43 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 4 Jun 2019 11:44:50 +0000
Subject: [PATCH 1012/1176] [NFC][Codegen] D62818 - also add tests with X being
 constant

For X86, these may be a 'BT' pattern, and in general, can cause
the transform to deadlock.

llvm-svn: 362494
---
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll |  34 ++++++
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll |  34 ++++++
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 109 +++++++++++++++++-
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 109 +++++++++++++++++-
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll |  85 ++++++++++++++
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll |  85 ++++++++++++++
 6 files changed, 452 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 0bc2d8b37766e..433904a6fedbc 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -301,6 +301,40 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_x_is_const_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43605
+; CHECK-NEXT:    movk w8, #43605, lsl #16
+; CHECK-NEXT:    lsr w8, w8, w0
+; CHECK-NEXT:    tst w8, #0x1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_x_is_const2_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w9, #43605
+; CHECK-NEXT:    lsr w8, w8, w0
+; CHECK-NEXT:    movk w9, #43605, lsl #16
+; CHECK-NEXT:    tst w8, w9
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;
diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index c76a2e43daeca..856f294675ee8 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -296,6 +296,40 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_x_is_const_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43605
+; CHECK-NEXT:    movk w8, #43605, lsl #16
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    tst w8, #0x1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; CHECK-LABEL: scalar_i32_x_is_const2_eq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w9, #43605
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    movk w9, #43605, lsl #16
+; CHECK-NEXT:    tst w8, w9
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index ff621f6c708c0..6c30586a9595b 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -1119,6 +1119,111 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_x_is_const_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r1, .LCPI18_0
+; ARM6-NEXT:    mov r2, #1
+; ARM6-NEXT:    bic r0, r2, r1, lsr r0
+; ARM6-NEXT:    bx lr
+; ARM6-NEXT:    .p2align 2
+; ARM6-NEXT:  @ %bb.1:
+; ARM6-NEXT:  .LCPI18_0:
+; ARM6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; ARM78-LABEL: scalar_i32_x_is_const_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r1, #43605
+; ARM78-NEXT:    mov r2, #1
+; ARM78-NEXT:    movt r1, #43605
+; ARM78-NEXT:    bic r0, r2, r1, lsr r0
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_x_is_const_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    ldr r1, .LCPI18_0
+; THUMB6-NEXT:    lsrs r1, r0
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    ands r2, r1
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI18_0:
+; THUMB6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; THUMB78-LABEL: scalar_i32_x_is_const_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movw r1, #43605
+; THUMB78-NEXT:    movt r1, #43605
+; THUMB78-NEXT:    lsr.w r0, r1, r0
+; THUMB78-NEXT:    movs r1, #1
+; THUMB78-NEXT:    bic.w r0, r1, r0
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_x_is_const2_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r2, .LCPI19_0
+; ARM6-NEXT:    mov r1, #1
+; ARM6-NEXT:    and r0, r2, r1, lsr r0
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+; ARM6-NEXT:    .p2align 2
+; ARM6-NEXT:  @ %bb.1:
+; ARM6-NEXT:  .LCPI19_0:
+; ARM6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; ARM78-LABEL: scalar_i32_x_is_const2_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r1, #43605
+; ARM78-NEXT:    mov r2, #1
+; ARM78-NEXT:    movt r1, #43605
+; ARM78-NEXT:    and r0, r1, r2, lsr r0
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_x_is_const2_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r1, #1
+; THUMB6-NEXT:    lsrs r1, r0
+; THUMB6-NEXT:    ldr r2, .LCPI19_0
+; THUMB6-NEXT:    ands r2, r1
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI19_0:
+; THUMB6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; THUMB78-LABEL: scalar_i32_x_is_const2_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movs r1, #1
+; THUMB78-NEXT:    lsr.w r0, r1, r0
+; THUMB78-NEXT:    movw r1, #43605
+; THUMB78-NEXT:    movt r1, #43605
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;
@@ -1154,11 +1259,11 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; THUMB6-NEXT:    ands r2, r0
 ; THUMB6-NEXT:    sxtb r0, r2
 ; THUMB6-NEXT:    cmp r0, #0
-; THUMB6-NEXT:    blt .LBB18_2
+; THUMB6-NEXT:    blt .LBB20_2
 ; THUMB6-NEXT:  @ %bb.1:
 ; THUMB6-NEXT:    movs r0, #0
 ; THUMB6-NEXT:    bx lr
-; THUMB6-NEXT:  .LBB18_2:
+; THUMB6-NEXT:  .LBB20_2:
 ; THUMB6-NEXT:    movs r0, #1
 ; THUMB6-NEXT:    bx lr
 ;
diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 104a43979a0ca..82572af4d6873 100644
--- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1130,6 +1130,111 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_x_is_const_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r1, .LCPI18_0
+; ARM6-NEXT:    mov r2, #1
+; ARM6-NEXT:    bic r0, r2, r1, lsl r0
+; ARM6-NEXT:    bx lr
+; ARM6-NEXT:    .p2align 2
+; ARM6-NEXT:  @ %bb.1:
+; ARM6-NEXT:  .LCPI18_0:
+; ARM6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; ARM78-LABEL: scalar_i32_x_is_const_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r1, #43605
+; ARM78-NEXT:    mov r2, #1
+; ARM78-NEXT:    movt r1, #43605
+; ARM78-NEXT:    bic r0, r2, r1, lsl r0
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_x_is_const_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    ldr r1, .LCPI18_0
+; THUMB6-NEXT:    lsls r1, r0
+; THUMB6-NEXT:    movs r2, #1
+; THUMB6-NEXT:    ands r2, r1
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI18_0:
+; THUMB6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; THUMB78-LABEL: scalar_i32_x_is_const_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movw r1, #43605
+; THUMB78-NEXT:    movt r1, #43605
+; THUMB78-NEXT:    lsl.w r0, r1, r0
+; THUMB78-NEXT:    movs r1, #1
+; THUMB78-NEXT:    bic.w r0, r1, r0
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; ARM6-LABEL: scalar_i32_x_is_const2_eq:
+; ARM6:       @ %bb.0:
+; ARM6-NEXT:    ldr r2, .LCPI19_0
+; ARM6-NEXT:    mov r1, #1
+; ARM6-NEXT:    and r0, r2, r1, lsl r0
+; ARM6-NEXT:    clz r0, r0
+; ARM6-NEXT:    lsr r0, r0, #5
+; ARM6-NEXT:    bx lr
+; ARM6-NEXT:    .p2align 2
+; ARM6-NEXT:  @ %bb.1:
+; ARM6-NEXT:  .LCPI19_0:
+; ARM6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; ARM78-LABEL: scalar_i32_x_is_const2_eq:
+; ARM78:       @ %bb.0:
+; ARM78-NEXT:    movw r1, #43605
+; ARM78-NEXT:    mov r2, #1
+; ARM78-NEXT:    movt r1, #43605
+; ARM78-NEXT:    and r0, r1, r2, lsl r0
+; ARM78-NEXT:    clz r0, r0
+; ARM78-NEXT:    lsr r0, r0, #5
+; ARM78-NEXT:    bx lr
+;
+; THUMB6-LABEL: scalar_i32_x_is_const2_eq:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    movs r1, #1
+; THUMB6-NEXT:    lsls r1, r0
+; THUMB6-NEXT:    ldr r2, .LCPI19_0
+; THUMB6-NEXT:    ands r2, r1
+; THUMB6-NEXT:    rsbs r0, r2, #0
+; THUMB6-NEXT:    adcs r0, r2
+; THUMB6-NEXT:    bx lr
+; THUMB6-NEXT:    .p2align 2
+; THUMB6-NEXT:  @ %bb.1:
+; THUMB6-NEXT:  .LCPI19_0:
+; THUMB6-NEXT:    .long 2857740885 @ 0xaa55aa55
+;
+; THUMB78-LABEL: scalar_i32_x_is_const2_eq:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    movs r1, #1
+; THUMB78-NEXT:    lsl.w r0, r1, r0
+; THUMB78-NEXT:    movw r1, #43605
+; THUMB78-NEXT:    movt r1, #43605
+; THUMB78-NEXT:    ands r0, r1
+; THUMB78-NEXT:    clz r0, r0
+; THUMB78-NEXT:    lsrs r0, r0, #5
+; THUMB78-NEXT:    bx lr
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;
@@ -1165,11 +1270,11 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; THUMB6-NEXT:    ands r2, r0
 ; THUMB6-NEXT:    sxtb r0, r2
 ; THUMB6-NEXT:    cmp r0, #0
-; THUMB6-NEXT:    blt .LBB18_2
+; THUMB6-NEXT:    blt .LBB20_2
 ; THUMB6-NEXT:  @ %bb.1:
 ; THUMB6-NEXT:    movs r0, #0
 ; THUMB6-NEXT:    bx lr
-; THUMB6-NEXT:  .LBB18_2:
+; THUMB6-NEXT:  .LBB20_2:
 ; THUMB6-NEXT:    movs r0, #1
 ; THUMB6-NEXT:    bx lr
 ;
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index b5bf462514e61..02ded696861cc 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -869,6 +869,91 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; X86-LABEL: scalar_i32_x_is_const_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
+; X86-NEXT:    btl %eax, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i32_x_is_const_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-NEXT:    btl %edi, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = lshr i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_x_is_const2_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_x_is_const2_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_x_is_const2_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $1, %ecx
+; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_x_is_const2_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %ecx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_x_is_const2_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %edi, %ecx
+; X64-BMI1-NEXT:    movl $1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_x_is_const2_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $1, %eax
+; X64-BMI12-NEXT:    shrxl %edi, %eax, %eax
+; X64-BMI12-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = lshr i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 0a6ce5c0e3f10..a1b01be7cf6b6 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -817,6 +817,91 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
   ret i1 %res
 }
 
+;------------------------------------------------------------------------------;
+; What if X is a constant too?
+;------------------------------------------------------------------------------;
+
+define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
+; X86-NOBMI-LABEL: scalar_i32_x_is_const_eq:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $1, %al
+; X86-NOBMI-NEXT:    sete %al
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: scalar_i32_x_is_const_eq:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $1, %al
+; X86-BMI1-NEXT:    sete %al
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI12-LABEL: scalar_i32_x_is_const_eq:
+; X86-BMI12:       # %bb.0:
+; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI12-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
+; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI12-NEXT:    testb $1, %al
+; X86-BMI12-NEXT:    sete %al
+; X86-BMI12-NEXT:    retl
+;
+; X64-NOBMI-LABEL: scalar_i32_x_is_const_eq:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %ecx
+; X64-NOBMI-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    testb $1, %al
+; X64-NOBMI-NEXT:    sete %al
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: scalar_i32_x_is_const_eq:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %edi, %ecx
+; X64-BMI1-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shll %cl, %eax
+; X64-BMI1-NEXT:    testb $1, %al
+; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI12-LABEL: scalar_i32_x_is_const_eq:
+; X64-BMI12:       # %bb.0:
+; X64-BMI12-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI12-NEXT:    shlxl %edi, %eax, %eax
+; X64-BMI12-NEXT:    testb $1, %al
+; X64-BMI12-NEXT:    sete %al
+; X64-BMI12-NEXT:    retq
+  %t0 = shl i32 2857740885, %y
+  %t1 = and i32 %t0, 1
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
+; X86-LABEL: scalar_i32_x_is_const2_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
+; X86-NEXT:    btl %eax, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: scalar_i32_x_is_const2_eq:
+; X64:       # %bb.0:
+; X64-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-NEXT:    btl %edi, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    retq
+  %t0 = shl i32 1, %y
+  %t1 = and i32 %t0, 2857740885
+  %res = icmp eq i32 %t1, 0
+  ret i1 %res
+}
+
 ;------------------------------------------------------------------------------;
 ; A few negative tests
 ;------------------------------------------------------------------------------;

From 454fc7787256bafbd5ae079fdaff4e32a1feafb6 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 4 Jun 2019 12:55:00 +0000
Subject: [PATCH 1013/1176] Include what you use in PPCRegisterInfo.cpp

llvm-svn: 362495
---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index b4e375b5c9e6e..c796b852fd1af 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCRegisterInfo.h"
-#include "PPC.h"
 #include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"

From a7f9f42d289525cf266674bde31716d24c39fee6 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Tue, 4 Jun 2019 13:38:36 +0000
Subject: [PATCH 1014/1176] [clangd] Also apply adjustArguments when returning
 fallback commands

Reviewers: ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62856

llvm-svn: 362496
---
 clang-tools-extra/clangd/GlobalCompilationDatabase.cpp     | 1 +
 .../clangd/unittests/GlobalCompilationDatabaseTests.cpp    | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index 50f5408cff362..b40ae26cd3e72 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -173,6 +173,7 @@ tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
   std::lock_guard<std::mutex> Lock(Mutex);
   Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(),
                          FallbackFlags.end());
+  adjustArguments(Cmd, ResourceDir);
   return Cmd;
 }
 
diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
index 7c7993cc0f9f7..6761deb70acbd 100644
--- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
+++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
@@ -21,6 +21,7 @@ using ::testing::Contains;
 using ::testing::ElementsAre;
 using ::testing::EndsWith;
 using ::testing::Not;
+using ::testing::StartsWith;
 
 TEST(GlobalCompilationDatabaseTest, FallbackCommand) {
   DirectoryBasedGlobalCompilationDatabase DB(None);
@@ -85,7 +86,8 @@ TEST_F(OverlayCDBTest, GetCompileCommand) {
 TEST_F(OverlayCDBTest, GetFallbackCommand) {
   OverlayCDB CDB(Base.get(), {"-DA=4"});
   EXPECT_THAT(CDB.getFallbackCommand(testPath("bar.cc")).CommandLine,
-              ElementsAre("clang", "-DA=2", testPath("bar.cc"), "-DA=4"));
+              ElementsAre("clang", "-DA=2", testPath("bar.cc"), "-DA=4",
+                          "-fsyntax-only", StartsWith("-resource-dir")));
 }
 
 TEST_F(OverlayCDBTest, NoBase) {
@@ -97,7 +99,8 @@ TEST_F(OverlayCDBTest, NoBase) {
               Contains("-DA=5"));
 
   EXPECT_THAT(CDB.getFallbackCommand(testPath("foo.cc")).CommandLine,
-              ElementsAre(EndsWith("clang"), testPath("foo.cc"), "-DA=6"));
+              ElementsAre(EndsWith("clang"), testPath("foo.cc"), "-DA=6",
+                          "-fsyntax-only"));
 }
 
 TEST_F(OverlayCDBTest, Watch) {

From dcba4828a9ead5f5b1fa27f0853823618075d0e0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 4 Jun 2019 13:41:29 +0000
Subject: [PATCH 1015/1176] [ELF] Suppress "STT_SECTION symbol should be
 defined" on .eh_frame, .debug*, .zdebug* and .gcc_except_table

Summary:
With -r or --emit-relocs, we warn `STT_SECTION symbol should be defined`
on relocations to discarded section symbol. This was added as an error
in rLLD319404, but was not so effective before D61583 (it turned the
error to a warning).

Relocations from .eh_frame .debug* .zdebug* .gcc_except_table to
discarded .text are very common and somewhat expected. Don't warn/error
on them. As a reference, ld.bfd has a similar logic in
_bfd_elf_default_action_discarded() to allow these cases.

Delete invalid-undef-section-symbol.test because what it intended to
check is now covered by the updated comdat-discarded-reloc.s

Delete relocatable-eh-frame.s because we allow relocations from
.eh_frame as a special case now.

Reviewers: grimar, phosek, ruiu, espindola

Reviewed By: ruiu

Subscribers: emaste, arichardson, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62840

llvm-svn: 362497
---
 lld/ELF/InputSection.cpp                      | 17 ++++++--
 lld/test/ELF/Inputs/comdat-discarded-reloc.s  | 12 +++---
 lld/test/ELF/comdat-discarded-reloc.s         | 39 +++++++++++++++----
 .../ELF/invalid-undef-section-symbol.test     | 26 -------------
 lld/test/ELF/relocatable-eh-frame.s           | 21 ----------
 5 files changed, 53 insertions(+), 62 deletions(-)
 delete mode 100644 lld/test/ELF/invalid-undef-section-symbol.test
 delete mode 100644 lld/test/ELF/relocatable-eh-frame.s

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 74a8028e229bd..cb1072505638f 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -412,7 +412,8 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
 
   for (const RelTy &Rel : Rels) {
     RelType Type = Rel.getType(Config->IsMips64EL);
-    Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
+    const ObjFile<ELFT> *File = getFile<ELFT>();
+    Symbol &Sym = File->getRelocTargetSym(Rel);
 
     auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf);
     Buf += sizeof(RelTy);
@@ -435,10 +436,20 @@ void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
       // .eh_frame is horribly special and can reference discarded sections. To
       // avoid having to parse and recreate .eh_frame, we just replace any
       // relocation in it pointing to discarded sections with R_*_NONE, which
-      // hopefully creates a frame that is ignored at runtime.
+      // hopefully creates a frame that is ignored at runtime. Also, don't warn
+      // on .gcc_except_table and debug sections.
       auto *D = dyn_cast<Defined>(&Sym);
       if (!D) {
-        warn("STT_SECTION symbol should be defined");
+        if (!Sec->Name.startswith(".debug") &&
+            !Sec->Name.startswith(".zdebug") && Sec->Name != ".eh_frame" &&
+            Sec->Name != ".gcc_except_table") {
+          uint32_t SecIdx = cast<Undefined>(Sym).DiscardedSecIdx;
+          Elf_Shdr_Impl<ELFT> Sec =
+              CHECK(File->getObj().sections(), File)[SecIdx];
+          warn("relocation refers to a discarded section: " +
+               CHECK(File->getObj().getSectionName(&Sec), File) +
+               "\n>>> referenced by " + getObjMsg(P->r_offset));
+        }
         P->setSymbolAndType(0, 0, false);
         continue;
       }
diff --git a/lld/test/ELF/Inputs/comdat-discarded-reloc.s b/lld/test/ELF/Inputs/comdat-discarded-reloc.s
index 9526f5ac95c00..d89575f27e944 100644
--- a/lld/test/ELF/Inputs/comdat-discarded-reloc.s
+++ b/lld/test/ELF/Inputs/comdat-discarded-reloc.s
@@ -1,6 +1,8 @@
-.section .text.bar1,"aG",@progbits,group,comdat
+.global bar, _start
 
-.section .text.bar2
-.global bar
-bar:
- .quad .text.bar1
+.section .text.foo,"aG",@progbits,group,comdat
+
+.section .text
+_start:
+ .quad .text.foo
+ .quad bar
diff --git a/lld/test/ELF/comdat-discarded-reloc.s b/lld/test/ELF/comdat-discarded-reloc.s
index d12732cd3569b..4d50ab10e5127 100644
--- a/lld/test/ELF/comdat-discarded-reloc.s
+++ b/lld/test/ELF/comdat-discarded-reloc.s
@@ -1,17 +1,42 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/comdat-discarded-reloc.s -o %t2.o
-# RUN: ld.lld -gc-sections --noinhibit-exec %t.o %t2.o -o /dev/null
+# RUN: ld.lld -gc-sections --noinhibit-exec %t2.o %t.o -o /dev/null
+# RUN: ld.lld -r %t2.o %t.o -o %t 2>&1 | FileCheck --check-prefix=WARN %s
+# RUN: llvm-readobj -r %t | FileCheck --check-prefix=RELOC %s
 
 ## ELF spec doesn't allow a relocation to point to a deduplicated
 ## COMDAT section. Unfortunately this happens in practice (e.g. .eh_frame)
 ## Test case checks we do not crash.
 
-.global bar, _start
+# WARN: warning: relocation refers to a discarded section: .text.bar1
+# WARN-NEXT: >>> referenced by {{.*}}.o:(.rela.text.bar2+0x0)
+# WARN-NOT: warning
 
-.section .text.foo,"aG",@progbits,group,comdat
+# RELOC:      .rela.eh_frame {
+# RELOC-NEXT:   R_X86_64_NONE
+# RELOC-NEXT: }
+# RELOC-NEXT: .rela.debug_foo {
+# RELOC-NEXT:   R_X86_64_NONE
+# RELOC-NEXT: }
+# RELOC-NEXT: .rela.gcc_except_table {
+# RELOC-NEXT:   R_X86_64_NONE
+# RELOC-NEXT: }
 
-.section .text
-_start:
- .quad .text.foo
- .quad bar
+.section .text.bar1,"aG",@progbits,group,comdat
+
+## .text.bar1 in this file is discarded. Warn on the relocation.
+.section .text.bar2,"ax"
+.globl bar
+bar:
+  .quad .text.bar1
+
+## Don't warn on .eh_frame, .debug*, .zdebug*, or .gcc_except_table
+.section .eh_frame,"a"
+  .quad .text.bar1
+
+.section .debug_foo
+  .quad .text.bar1
+
+.section .gcc_except_table,"a"
+  .quad .text.bar1
diff --git a/lld/test/ELF/invalid-undef-section-symbol.test b/lld/test/ELF/invalid-undef-section-symbol.test
deleted file mode 100644
index 80e5a1464d740..0000000000000
--- a/lld/test/ELF/invalid-undef-section-symbol.test
+++ /dev/null
@@ -1,26 +0,0 @@
-# RUN: yaml2obj %s -o %t.o
-# RUN: not ld.lld -r --fatal-warnings %t.o -o /dev/null 2>&1 | FileCheck %s
-
-# We used to crash at this.
-# CHECK: STT_SECTION symbol should be defined
-
---- !ELF
-FileHeader:
-  Class:           ELFCLASS64
-  Data:            ELFDATA2LSB
-  Type:            ET_REL
-  Machine:         EM_X86_64
-Sections:
-  - Name:            .text
-    Type:            SHT_PROGBITS
-  - Name:            .rela.text
-    Type:            SHT_RELA
-    AddressAlign:    0x0000000000000008
-    Info:            .text
-    Relocations:
-      - Offset:          0x0000000000000000
-        Symbol:          .text
-        Type:            R_X86_64_NONE
-Symbols:
-  - Name:            .text
-    Type:            STT_SECTION
diff --git a/lld/test/ELF/relocatable-eh-frame.s b/lld/test/ELF/relocatable-eh-frame.s
deleted file mode 100644
index 6172dd355db4a..0000000000000
--- a/lld/test/ELF/relocatable-eh-frame.s
+++ /dev/null
@@ -1,21 +0,0 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: ld.lld -r %t.o %t.o -o %t 2>&1 | FileCheck --check-prefix=WARN %s
-# RUN: llvm-readobj -r %t | FileCheck %s
-# RUN: ld.lld %t -o %t.so -shared
-# RUN: llvm-objdump -h %t.so | FileCheck --check-prefix=DSO %s
-
-# WARN: STT_SECTION symbol should be defined
-
-# DSO: .eh_frame     00000034
-
-# CHECK:      Relocations [
-# CHECK-NEXT:   Section ({{.*}}) .rela.eh_frame {
-# CHECK-NEXT:     0x20 R_X86_64_PC32 .foo 0x0
-# CHECK-NEXT:     0x50 R_X86_64_NONE - 0x0
-# CHECK-NEXT:   }
-# CHECK-NEXT: ]
-
-.section .foo,"aG",@progbits,bar,comdat
-.cfi_startproc
-.cfi_endproc

From 92f95d219123fff91cedcac5d2aa0480faf561db Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 13:42:45 +0000
Subject: [PATCH 1016/1176] gn build: Merge r362459

llvm-svn: 362498
---
 llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn       | 1 +
 llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn | 1 +
 2 files changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn
index 2934e9cfcdffb..e38bfb75afe30 100644
--- a/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Lex/BUILD.gn
@@ -6,6 +6,7 @@ static_library("Lex") {
     "//llvm/lib/Support",
   ]
   sources = [
+    "DependencyDirectivesSourceMinimizer.cpp",
     "HeaderMap.cpp",
     "HeaderSearch.cpp",
     "Lexer.cpp",
diff --git a/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn
index 63180cb99537a..3c79748808a7f 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Lex/BUILD.gn
@@ -11,6 +11,7 @@ unittest("LexTests") {
     "//llvm/lib/Support",
   ]
   sources = [
+    "DependencyDirectivesSourceMinimizerTest.cpp",
     "HeaderMapTest.cpp",
     "HeaderSearchTest.cpp",
     "LexerTest.cpp",

From 7f22fecac29e6da426df4ef211da575bb851cfb0 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Tue, 4 Jun 2019 14:17:46 +0000
Subject: [PATCH 1017/1176] [SimplifyCFG] NFC; remove bogus test case

Even if one bit is defined, the code is not clear what it is suppose to do.

The test wants to assert that some bits are undef, but that's not what the IR does and I don't think it's even possible to do that in any meaningful way. It was added in D12497, so @reames might want to double check.

Differential Revision: https://reviews.llvm.org/D60859

llvm-svn: 362499
---
 .../SimplifyCFG/switch-dead-default.ll        | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index e5c2ef65b3181..e14eab98c69cf 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -151,29 +151,3 @@ default:
   call void @foo(i32 2)
   ret void
 }
-
-;; All but one bit known undef
-;; Note: This is currently testing an optimization which doesn't trigger. The
-;; case this is protecting against is that a bit could be assumed both zero 
-;; *or* one given we know it's undef.  ValueTracking doesn't do this today,
-;; but it doesn't hurt to confirm.
-define void @test8(i8 %a) {
-; CHECK-LABEL: @test8(
-; CHECK: switch i8
-  %and = and i8 %a, 254
-  %cmp = icmp eq i8 %and, undef
-  call void @llvm.assume(i1 %cmp)
-  switch i8 %a, label %default [i8 255, label %true
-                                i8 254, label %false]
-true:
-  call void @foo(i32 1)
-  ret void
-false:
-  call void @foo(i32 3)
-  ret void
-default:
-  call void @foo(i32 2)
-  ret void
-}
-
-declare void @llvm.assume(i1)

From e4ad1b7bbedffb2eac9a599eff98e5633a8dbfee Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Tue, 4 Jun 2019 14:21:48 +0000
Subject: [PATCH 1018/1176] [CMake] Move and add settings to Apple-lldb-base
 cache script

llvm-svn: 362500
---
 lldb/cmake/caches/Apple-lldb-base.cmake  | 5 +++++
 lldb/cmake/caches/Apple-lldb-macOS.cmake | 3 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/lldb/cmake/caches/Apple-lldb-base.cmake b/lldb/cmake/caches/Apple-lldb-base.cmake
index c0b83b66f2f25..03287b06285fc 100644
--- a/lldb/cmake/caches/Apple-lldb-base.cmake
+++ b/lldb/cmake/caches/Apple-lldb-base.cmake
@@ -1,5 +1,10 @@
+set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "")
+
 set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64 CACHE STRING "")
 set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "")
+set(LLVM_ENABLE_MODULES ON CACHE BOOL "")
 
 # Release builds set these explicitly:
 #set(LLDB_VERSION_MAJOR 9999 CACHE STRING "")
diff --git a/lldb/cmake/caches/Apple-lldb-macOS.cmake b/lldb/cmake/caches/Apple-lldb-macOS.cmake
index 4745916838a60..81ff59385cf82 100644
--- a/lldb/cmake/caches/Apple-lldb-macOS.cmake
+++ b/lldb/cmake/caches/Apple-lldb-macOS.cmake
@@ -17,6 +17,3 @@ set(LLDB_FRAMEWORK_INSTALL_DIR /Applications/Xcode.app/Contents/SharedFrameworks
 set(CMAKE_OSX_DEPLOYMENT_TARGET 10.11 CACHE STRING "")
 set(LLDB_USE_SYSTEM_DEBUGSERVER ON CACHE BOOL "")
 set(LLVM_EXTERNALIZE_DEBUGINFO OFF CACHE BOOL "")
-
-set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
-set(LLVM_ENABLE_MODULES ON CACHE BOOL "")

From 2ee9a827ad32ec00494e698c6f40b6cce47510bc Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Tue, 4 Jun 2019 14:32:52 +0000
Subject: [PATCH 1019/1176] [SimplifyCFG] fix last commit

llvm-svn: 362501
---
 llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index e14eab98c69cf..77d7e748c7815 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -151,3 +151,6 @@ default:
   call void @foo(i32 2)
   ret void
 }
+
+declare void @llvm.assume(i1)
+

From d6de9426ee956740c024d01ae7bac014ab3e94c1 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 4 Jun 2019 14:40:37 +0000
Subject: [PATCH 1020/1176] [x86] add test for store merging/splitting; NFC

This is a reduction of a test that would infinite loop with D62498.

llvm-svn: 362502
---
 llvm/test/CodeGen/X86/vector-trunc-widen.ll | 126 ++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index c6b36e4aae783..1eff810f7575b 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -2002,3 +2002,129 @@ define void @PR34773(i16* %a0, i8* %a1) {
   store <16 x i8> %12, <16 x i8>* %6, align 1
   ret void
 }
+
+; Store merging must not infinitely fight store splitting.
+
+define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, <8 x i16>* %p) align 2 {
+; SSE2-LABEL: store_merge_split:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pslld $16, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    pslld $16, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    pslld $16, %xmm3
+; SSE2-NEXT:    psrad $16, %xmm3
+; SSE2-NEXT:    pslld $16, %xmm2
+; SSE2-NEXT:    psrad $16, %xmm2
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
+; SSE2-NEXT:    shlq $4, %rdi
+; SSE2-NEXT:    movdqu %xmm0, (%rsi,%rdi)
+; SSE2-NEXT:    movdqu %xmm2, 16(%rsi,%rdi)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: store_merge_split:
+; SSSE3:       # %bb.0:
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:    pshufb %xmm4, %xmm1
+; SSSE3-NEXT:    pshufb %xmm4, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    pshufb %xmm4, %xmm3
+; SSSE3-NEXT:    pshufb %xmm4, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSSE3-NEXT:    shlq $4, %rdi
+; SSSE3-NEXT:    movdqu %xmm0, (%rsi,%rdi)
+; SSSE3-NEXT:    movdqu %xmm2, 16(%rsi,%rdi)
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: store_merge_split:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm4, %xmm1
+; SSE41-NEXT:    pshufb %xmm4, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    pshufb %xmm4, %xmm3
+; SSE41-NEXT:    pshufb %xmm4, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    shlq $4, %rdi
+; SSE41-NEXT:    movdqu %xmm0, (%rsi,%rdi)
+; SSE41-NEXT:    movdqu %xmm2, 16(%rsi,%rdi)
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: store_merge_split:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    shlq $4, %rdi
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi,%rdi)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi,%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: store_merge_split:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT:    shlq $4, %rdi
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rsi,%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: store_merge_split:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT:    shlq $4, %rdi
+; AVX512F-NEXT:    vmovdqu %xmm0, (%rsi,%rdi)
+; AVX512F-NEXT:    vmovdqu %xmm1, 16(%rsi,%rdi)
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: store_merge_split:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    shlq $4, %rdi
+; AVX512VL-NEXT:    vpmovdw %ymm0, (%rsi,%rdi)
+; AVX512VL-NEXT:    vpmovdw %ymm1, 16(%rsi,%rdi)
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: store_merge_split:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
+; AVX512BW-NEXT:    shlq $4, %rdi
+; AVX512BW-NEXT:    vmovdqu %xmm0, (%rsi,%rdi)
+; AVX512BW-NEXT:    vmovdqu %xmm1, 16(%rsi,%rdi)
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512BWVL-LABEL: store_merge_split:
+; AVX512BWVL:       # %bb.0:
+; AVX512BWVL-NEXT:    shlq $4, %rdi
+; AVX512BWVL-NEXT:    vpmovdw %ymm0, (%rsi,%rdi)
+; AVX512BWVL-NEXT:    vpmovdw %ymm1, 16(%rsi,%rdi)
+; AVX512BWVL-NEXT:    vzeroupper
+; AVX512BWVL-NEXT:    retq
+  %t1 = trunc <8 x i32> %w1 to <8 x i16>
+  %t2 = trunc <8 x i32> %w2 to <8 x i16>
+  %g1 = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 %idx
+  %g2 = getelementptr inbounds <8 x i16>, <8 x i16>* %g1, i64 1
+  store <8 x i16> %t1, <8 x i16>* %g1, align 2
+  store <8 x i16> %t2, <8 x i16>* %g2, align 2
+  ret void
+}

From 669775f9db794741c8b15dc06c730afe95ce44f2 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Tue, 4 Jun 2019 14:51:15 +0000
Subject: [PATCH 1021/1176] [Support] make countLeadingZeros()
 countTrailingZeros() countLeadingOnes() and countTrailingOnes() return
 unsigned

This matches APInt's versions of these functions, and there is no need for these to be size_t.

(as well as __builtin_clzll())

Differential Revision: https://reviews.llvm.org/D60823

llvm-svn: 362503
---
 llvm/include/llvm/Support/MathExtras.h      | 24 ++++++++++-----------
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index e902a725659ad..85d5a5ae4b903 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -51,14 +51,14 @@ enum ZeroBehavior {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
       return 0;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     T Shift = std::numeric_limits<T>::digits >> 1;
     T Mask = std::numeric_limits<T>::max() >> Shift;
     while (Shift) {
@@ -75,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -91,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -116,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -125,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static std::size_t count(T Val, ZeroBehavior) {
+  static unsigned count(T Val, ZeroBehavior) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
     // Bisection method.
-    std::size_t ZeroBits = 0;
+    unsigned ZeroBits = 0;
     for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
       T Tmp = Val >> Shift;
       if (Tmp)
@@ -144,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 32;
 
@@ -160,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static std::size_t count(T Val, ZeroBehavior ZB) {
+  static unsigned count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
       return 64;
 
@@ -185,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
 ///   valid arguments.
 template <typename T>
-std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
@@ -474,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// \param ZB the behavior on an input of all ones. Only ZB_Width and
 /// ZB_Undefined are valid arguments.
 template <typename T>
-std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
   static_assert(std::numeric_limits<T>::is_integer &&
                     !std::numeric_limits<T>::is_signed,
                 "Only unsigned integral types are allowed.");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 9dcbb599589c6..3b1faacab36d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1156,7 +1156,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
 
   // These alignment values are specified in powers of two, so alignment =
   // 2^n.  The minimum alignment is 2^4 = 16.
-  Out.kernarg_segment_alignment = std::max((size_t)4,
+  Out.kernarg_segment_alignment = std::max<size_t>(4,
       countTrailingZeros(MaxKernArgAlign));
 }
 

From a6e289e9f8b1a4b03c96176483e5ec478e281720 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 4 Jun 2019 15:02:33 +0000
Subject: [PATCH 1022/1176] [X86][SSE] Pulled out (sub (xor X, M), M)
 'ConditionalNegate' out pattern match code. NFCI.

As discussed on D62777 - we should be able to use this in more SSE41+ cases as well but that requires us to separate it from the OR(AND(),ANDN()) matcher.

llvm-svn: 362504
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 115 ++++++++++++++----------
 1 file changed, 66 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1f6533c5c57f0..e493d3d719416 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38102,6 +38102,68 @@ static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
   return true;
 }
 
+// Try to match:
+//   (or (and (M, (sub 0, X)), (pandn M, X)))
+// which is a special case of vselect:
+//   (vselect M, (sub 0, X), X)
+// Per:
+// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+// We know that, if fNegate is 0 or 1:
+//   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+//
+// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+//   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+//   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
+// This lets us transform our vselect to:
+//   (add (xor X, M), (and M, 1))
+// And further to:
+//   (sub (xor X, M), M)
+static SDValue combineLogicBlendIntoConditionalNegate(
+    EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
+    SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+  EVT MaskVT = Mask.getValueType();
+  unsigned EltBits = MaskVT.getScalarSizeInBits();
+  assert(MaskVT.isInteger() && DAG.ComputeNumSignBits(Mask) == EltBits &&
+         "Mask must be zero/all-bits");
+
+  if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)
+    return SDValue();
+  if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
+    return SDValue();
+
+  auto IsNegV = [](SDNode *N, SDValue V) {
+    return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
+           ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
+  };
+
+  SDValue V;
+  if (IsNegV(Y.getNode(), X))
+    V = X;
+  else if (IsNegV(X.getNode(), Y))
+    V = Y;
+  else
+    return SDValue();
+
+  SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
+  SDValue SubOp2 = Mask;
+
+  // If the negate was on the false side of the select, then
+  // the operands of the SUB need to be swapped. PR 27251.
+  // This is because the pattern being matched above is
+  // (vselect M, (sub (0, X), X)  -> (sub (xor X, M), M)
+  // but if the pattern matched was
+  // (vselect M, X, (sub (0, X))), that is really negation of the pattern
+  // above, -(vselect M, (sub 0, X), X), and therefore the replacement
+  // pattern also needs to be a negation of the replacement pattern above.
+  // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
+  // sub accomplishes the negation of the replacement pattern.
+  if (V == Y)
+    std::swap(SubOp1, SubOp2);
+
+  SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
+  return DAG.getBitcast(VT, Res);
+}
+
 // Try to fold:
 //   (or (and (m, y), (pandn m, x)))
 // into:
@@ -38137,55 +38199,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
 
   SDLoc DL(N);
 
-  // Try to match:
-  //   (or (and (M, (sub 0, X)), (pandn M, X)))
-  // which is a special case of vselect:
-  //   (vselect M, (sub 0, X), X)
-  // Per:
-  // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
-  // We know that, if fNegate is 0 or 1:
-  //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
-  //
-  // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
-  //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
-  //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
-  // This lets us transform our vselect to:
-  //   (add (xor X, M), (and M, 1))
-  // And further to:
-  //   (sub (xor X, M), M)
-  if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT &&
-      DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) {
-    auto IsNegV = [](SDNode *N, SDValue V) {
-      return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
-        ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
-    };
-    SDValue V;
-    if (IsNegV(Y.getNode(), X))
-      V = X;
-    else if (IsNegV(X.getNode(), Y))
-      V = Y;
-
-    if (V) {
-      SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
-      SDValue SubOp2 = Mask;
-
-      // If the negate was on the false side of the select, then
-      // the operands of the SUB need to be swapped. PR 27251.
-      // This is because the pattern being matched above is
-      // (vselect M, (sub (0, X), X)  -> (sub (xor X, M), M)
-      // but if the pattern matched was
-      // (vselect M, X, (sub (0, X))), that is really negation of the pattern
-      // above, -(vselect M, (sub 0, X), X), and therefore the replacement
-      // pattern also needs to be a negation of the replacement pattern above.
-      // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
-      // sub accomplishes the negation of the replacement pattern.
-      if (V == Y)
-         std::swap(SubOp1, SubOp2);
-
-      SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
-      return DAG.getBitcast(VT, Res);
-    }
-  }
+  // Attempt to combine to conditional negate: (sub (xor X, M), M)
+  if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,
+                                                           DAG, Subtarget))
+    return Res;
 
   // PBLENDVB is only available on SSE 4.1.
   if (!Subtarget.hasSSE41())

From c5fe030c166b0fee57b7a5dfea20f24f4571fe29 Mon Sep 17 00:00:00 2001
From: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>
Date: Tue, 4 Jun 2019 15:05:53 +0000
Subject: [PATCH 1023/1176] [OpenMP][libomptarget] Enable usage of unified
 memory for declare target link variables

Summary: This patch enables the usage of a host variable on the device for declare target link variables when unified memory is available.

Reviewers: ABataev, caomhin, grokos

Reviewed By: grokos

Subscribers: Hahnfeld, guansong, jdoerfert, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D60884

llvm-svn: 362505
---
 openmp/libomptarget/include/omptargetplugin.h |  3 +++
 openmp/libomptarget/plugins/cuda/src/rtl.cpp  | 23 +++++++++++++++++++
 openmp/libomptarget/plugins/exports           |  1 +
 openmp/libomptarget/src/device.cpp            |  3 +++
 openmp/libomptarget/src/rtl.cpp               |  4 ++++
 openmp/libomptarget/src/rtl.h                 |  7 ++++--
 6 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index 2876bfbcdf551..e03416ccf2ddf 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -31,6 +31,9 @@ int32_t __tgt_rtl_number_of_devices(void);
 // having to load the library, which can be expensive.
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
 
+// Initialize the requires flags for the device.
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags);
+
 // Initialize the specified device. In case of success return 0; otherwise
 // return an error code.
 int32_t __tgt_rtl_init_device(int32_t ID);
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index fc0c1ecd18767..844afa107bb82 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -111,6 +111,9 @@ class RTLDeviceInfoTy {
   int EnvNumTeams;
   int EnvTeamLimit;
 
+  // OpenMP Requires Flags
+  int64_t RequiresFlags;
+
   //static int EnvNumThreads;
   static const int HardTeamLimit = 1<<16; // 64k
   static const int HardThreadLimit = 1024;
@@ -227,6 +230,9 @@ class RTLDeviceInfoTy {
     } else {
       EnvNumTeams = -1;
     }
+
+    // Default state.
+    RequiresFlags = OMP_REQ_UNDEFINED;
   }
 
   ~RTLDeviceInfoTy() {
@@ -264,6 +270,12 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
 
 int32_t __tgt_rtl_number_of_devices() { return DeviceInfo.NumberOfDevices; }
 
+int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
+  DP("Init requires flags to %ld\n", RequiresFlags);
+  DeviceInfo.RequiresFlags = RequiresFlags;
+  return RequiresFlags;
+}
+
 int32_t __tgt_rtl_init_device(int32_t device_id) {
 
   CUdevice cuDevice;
@@ -436,6 +448,17 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
           DPxPTR(e - HostBegin), e->name, DPxPTR(cuptr));
       entry.addr = (void *)cuptr;
 
+      if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+          e->flags & OMP_DECLARE_TARGET_LINK) {
+        // If unified memory is present any target link variables
+        // can access host addresses directly. There is no longer a
+        // need for device copies.
+        cuMemcpyHtoD(cuptr, e->addr, sizeof(void *));
+        DP("Copy linked variable host address (" DPxMOD ")"
+           "to device address (" DPxMOD ")\n",
+          DPxPTR(*((void**)e->addr)), DPxPTR(cuptr));
+      }
+
       DeviceInfo.addOffloadEntry(device_id, entry);
 
       continue;
diff --git a/openmp/libomptarget/plugins/exports b/openmp/libomptarget/plugins/exports
index 3f9f7d449ceba..a14bedf0791a5 100644
--- a/openmp/libomptarget/plugins/exports
+++ b/openmp/libomptarget/plugins/exports
@@ -2,6 +2,7 @@ VERS1.0 {
   global:
     __tgt_rtl_is_valid_binary;
     __tgt_rtl_number_of_devices;
+    __tgt_rtl_init_requires;
     __tgt_rtl_init_device;
     __tgt_rtl_load_binary;
     __tgt_rtl_data_alloc;
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index a946b928be940..5ecba5759eb01 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -275,6 +275,9 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
 
 /// Init device, should not be called directly.
 void DeviceTy::init() {
+  // Make call to init_requires if it exists for this plugin.
+  if (RTL->init_requires)
+    RTL->init_requires(RTLRequiresFlags);
   int32_t rc = RTL->init_device(RTLDeviceID);
   if (rc == OFFLOAD_SUCCESS) {
     IsInit = true;
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 770ae36a82918..4eb7ab71489fc 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -107,6 +107,10 @@ void RTLsTy::LoadRTLs() {
               dynlib_handle, "__tgt_rtl_run_target_team_region")))
       continue;
 
+    // Optional functions
+    *((void**) &R.init_requires) = dlsym(
+        dynlib_handle, "__tgt_rtl_init_requires");
+
     // No devices are supported by this RTL?
     if (!(R.NumberOfDevices = R.number_of_devices())) {
       DP("No devices supported in this RTL\n");
diff --git a/openmp/libomptarget/src/rtl.h b/openmp/libomptarget/src/rtl.h
index 381f23ea05476..8148e81e7df6a 100644
--- a/openmp/libomptarget/src/rtl.h
+++ b/openmp/libomptarget/src/rtl.h
@@ -36,6 +36,7 @@ struct RTLInfoTy {
                                  int32_t);
   typedef int32_t(run_team_region_ty)(int32_t, void *, void **, ptrdiff_t *,
                                       int32_t, int32_t, int32_t, uint64_t);
+  typedef int64_t(init_requires_ty)(int64_t);
 
   int32_t Idx;                     // RTL index, index is the number of devices
                                    // of other RTLs that were registered before,
@@ -60,6 +61,7 @@ struct RTLInfoTy {
   data_delete_ty *data_delete;
   run_region_ty *run_region;
   run_team_region_ty *run_team_region;
+  init_requires_ty *init_requires;
 
   // Are there images associated with this RTL.
   bool isUsed;
@@ -78,8 +80,8 @@ struct RTLInfoTy {
 #endif
         is_valid_binary(0), number_of_devices(0), init_device(0),
         load_binary(0), data_alloc(0), data_submit(0), data_retrieve(0),
-        data_delete(0), run_region(0), run_team_region(0), isUsed(false),
-        Mtx() {}
+        data_delete(0), run_region(0), run_team_region(0),
+        init_requires(0), isUsed(false), Mtx() {}
 
   RTLInfoTy(const RTLInfoTy &r) : Mtx() {
     Idx = r.Idx;
@@ -98,6 +100,7 @@ struct RTLInfoTy {
     data_delete = r.data_delete;
     run_region = r.run_region;
     run_team_region = r.run_team_region;
+    init_requires = r.init_requires;
     isUsed = r.isUsed;
   }
 };

From 880d21d3cbfd143f11c2bd2f5a5d6b46bfcb1a70 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 15:13:30 +0000
Subject: [PATCH 1024/1176] llvm-undname: Several behavior-preserving changes
 to increase coverage

- Replace `Error = true` in a few branches that are truly unreachable
  with DEMANGLE_UNREACHABLE

- Remove early return early in startsWithLocalScopePattern() because
  it's redundant with the next two early returns

- Remove unreachable `case '0'` (it's handled in the branch below)

- Remove an unused bool return

- Add test coverage for several early error returns, mostly in
  array type parsing

llvm-svn: 362506
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp      | 17 +++++--------
 llvm/lib/Demangle/MicrosoftDemangleNodes.cpp |  9 ++++---
 llvm/test/Demangle/invalid-manglings.test    | 25 ++++++++++++++++++++
 llvm/test/Demangle/ms-basic.test             |  3 +++
 4 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index b93a84ea91d1f..83e6f60de033a 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -58,8 +58,9 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
     // what.
     break;
   default:
-    Error = true;
-    return false;
+    // isMemberPointer() is called only if isPointerType() returns true,
+    // and it rejects other prefixes.
+    DEMANGLE_UNREACHABLE;
   }
 
   // If it starts with a number, then 6 indicates a non-member function
@@ -141,8 +142,6 @@ consumeSpecialIntrinsicKind(StringView &MangledName) {
 static bool startsWithLocalScopePattern(StringView S) {
   if (!S.consumeFront('?'))
     return false;
-  if (S.size() < 2)
-    return false;
 
   size_t End = S.find('?');
   if (End == StringView::npos)
@@ -2197,7 +2196,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
       MangledName = MangledName.dropFront();
       // 1 - single inheritance       <name>
       // H - multiple inheritance     <name> <number>
-      // I - virtual inheritance      <name> <number> <number> <number>
+      // I - virtual inheritance      <name> <number> <number>
       // J - unspecified inheritance  <name> <number> <number> <number>
       char InheritanceSpecifier = MangledName.popFront();
       SymbolNode *S = nullptr;
@@ -2226,8 +2225,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
       case '1':
         break;
       default:
-        Error = true;
-        break;
+        DEMANGLE_UNREACHABLE;
       }
       TPRN->Affinity = PointerAffinity::Pointer;
       TPRN->Symbol = S;
@@ -2254,12 +2252,9 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
             demangleSigned(MangledName);
         TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
             demangleSigned(MangledName);
-        DEMANGLE_FALLTHROUGH;
-      case '0':
         break;
       default:
-        Error = true;
-        break;
+        DEMANGLE_UNREACHABLE;
       }
       TPRN->IsMemberPointer = true;
 
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index c26151c5b19f1..c07fde897e0d9 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -34,21 +34,20 @@ static void outputSpaceIfNecessary(OutputStream &OS) {
     OS << " ";
 }
 
-static bool outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+static void outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
   switch (Q) {
   case Q_Const:
     OS << "const";
-    return true;
+    break;
   case Q_Volatile:
     OS << "volatile";
-    return true;
+    break;
   case Q_Restrict:
     OS << "__restrict";
-    return true;
+    break;
   default:
     break;
   }
-  return false;
 }
 
 static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 8490c02501f61..8d84034452cf2 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -220,7 +220,32 @@
 ; CHECK-NEXT: ??_C@_0101234567@?$az
 ; CHECK-NEXT: error: Invalid mangled name
 
+??_C@_1201234567@a?$az
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_C@_1201234567@a?$az
+; CHECK-NEXT: error: Invalid mangled name
+
 ??@foo
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ??@foo
 ; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3YA@A
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3YA@A
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3Y~01KA
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3Y~01KA
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3Y0~1KA
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3Y0~1KA
+; CHECK-NEXT: error: Invalid mangled name
+
+?x@@3PEAY02$$CRHEA
+; CHECK-EMPTY:
+; CHECK-NEXT: ?x@@3PEAY02$$CRHEA
+; CHECK-NEXT: error: Invalid mangled name
diff --git a/llvm/test/Demangle/ms-basic.test b/llvm/test/Demangle/ms-basic.test
index b80d87c6455a9..844602bfe4bc9 100644
--- a/llvm/test/Demangle/ms-basic.test
+++ b/llvm/test/Demangle/ms-basic.test
@@ -11,6 +11,9 @@
 ?x@@3PEAPEAHEA
 ; CHECK: int **x
 
+?foo@@3Y123KA
+; CHECK: unsigned long foo[3][4]
+
 ?x@@3PEAY02HEA
 ; CHECK: int (*x)[3]
 

From 1e63dd0b44998721fefae9f690882af927ca3c2a Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 4 Jun 2019 15:15:59 +0000
Subject: [PATCH 1025/1176] [SelectionDAG][x86] limit post-legalization store
 merging by type

The proposal in D62498 showed that x86 would benefit from vector
store splitting, but that may conflict with the generic DAG
combiner's store merging transforms.

Add memory type to the existing TLI hook that enables the merging
transforms, so we can limit those changes to scalars only for x86.

llvm-svn: 362507
---
 llvm/include/llvm/CodeGen/TargetLowering.h    | 10 ++++++----
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.h         |  6 +++++-
 llvm/test/CodeGen/X86/vector-trunc-widen.ll   |  4 ++--
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d00cc1675cd32..ad17fd8455888 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -435,10 +435,12 @@ class TargetLoweringBase {
     return false;
   }
 
-  /// Allow store merging after legalization in addition to before legalization.
-  /// This may catch stores that do not exist earlier (eg, stores created from
-  /// intrinsics).
-  virtual bool mergeStoresAfterLegalization() const { return true; }
+  /// Allow store merging for the specified type after legalization in addition
+  /// to before legalization. This may transform stores that do not exist
+  /// earlier (for example, stores created from intrinsics).
+  virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
+    return true;
+  }
 
   /// Returns if it's reasonable to merge stores to MemVT size.
   virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1d1699ce58956..33ef68c2f1ff2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16085,7 +16085,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Always perform this optimization before types are legal. If the target
   // prefers, also try this after legalization to catch stores that were created
   // by intrinsics or other nodes.
-  if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+  if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
     while (true) {
       // There can be multiple store sequences on the same chain.
       // Keep trying to merge store sequences until we are unable to do so
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 74d5d80ee68e0..a17f5dae576f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -234,7 +234,7 @@ class AMDGPUTargetLowering : public TargetLowering {
   // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
   // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
   // now.
-  bool mergeStoresAfterLegalization() const override { return false; }
+  bool mergeStoresAfterLegalization(EVT) const override { return false; }
 
   bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
     return true;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7eed866614a04..42b5b06268a58 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -799,7 +799,11 @@ namespace llvm {
     /// This method returns the name of a target specific DAG node.
     const char *getTargetNodeName(unsigned Opcode) const override;
 
-    bool mergeStoresAfterLegalization() const override { return true; }
+    /// Do not merge vector stores after legalization because that may conflict
+    /// with x86-specific store splitting optimizations.
+    bool mergeStoresAfterLegalization(EVT MemVT) const override {
+      return !MemVT.isVector();
+    }
 
     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
                           const SelectionDAG &DAG) const override;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index 1eff810f7575b..54ebdbe026aa7 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -2076,8 +2076,8 @@ define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, <8 x i16>
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
 ; AVX2-NEXT:    shlq $4, %rdi
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rsi,%rdi)
+; AVX2-NEXT:    vmovdqu %xmm0, (%rsi,%rdi)
+; AVX2-NEXT:    vmovdqu %xmm1, 16(%rsi,%rdi)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;

From 605d62e9de00b9f95a00202f66dfb03fe52ca523 Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Tue, 4 Jun 2019 15:18:46 +0000
Subject: [PATCH 1026/1176] No longer reject inputs when using a locale that
 has grouping information _and_ the input has no grouping characters at all.
 We continue to reject cases when the input has grouping characters in the
 wrong place. Fixes PR#28704

llvm-svn: 362508
---
 libcxx/src/locale.cpp                                         | 4 +++-
 .../locale.num.get/facet.num.get.members/get_long.pass.cpp    | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 00eb574ec4512..ca0a1c9b15fca 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -4379,7 +4379,9 @@ void
 __check_grouping(const string& __grouping, unsigned* __g, unsigned* __g_end,
                  ios_base::iostate& __err)
 {
-    if (__grouping.size() != 0)
+//  if the grouping pattern is empty _or_ there are no grouping bits, then do nothing
+//  we always have at least a single entry in [__g, __g_end); the end of the input sequence
+	if (__grouping.size() >= 0 && __g_end - __g > 1)
     {
         reverse(__g, __g_end);
         const char* __ig = __grouping.data();
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
index 42126260d980c..00b0c3a69b679 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long.pass.cpp
@@ -162,14 +162,14 @@ int main(int, char**)
     ios.imbue(std::locale(std::locale(), new my_numpunct));
     {
         v = -1;
-        const char str[] = "123";
+        const char str[] = "123"; // no separators at all
         std::ios_base::iostate err = ios.goodbit;
         input_iterator<const char*> iter =
             f.get(input_iterator<const char*>(str),
                   input_iterator<const char*>(str+sizeof(str)),
                   ios, err, v);
         assert(iter.base() == str+sizeof(str)-1);
-        assert(err == ios.failbit);
+        assert(err == ios.goodbit);
         assert(v == 123);
     }
     {

From 3144d7a2daa37e9570d708f14e4d3703ea759e72 Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Tue, 4 Jun 2019 15:22:23 +0000
Subject: [PATCH 1027/1176] [PowerPC] P9 Scheduling Model: dispatching rule
 fixes

This is to address some of the problems in existing P9 resource modeling,
especially about the dispatching rules.

Instead of using a hypothetical DISPATCHER , we try to use the number of
actual dispatch slots, and define SchedWriteRes to model dispatch rules,
then update instruction classes according to dispatch rules.

All the dispatch rules and instruction classes update are made according
to POWER9 User Manual.

Differential Revision: https://reviews.llvm.org/D61873

llvm-svn: 362509
---
 llvm/lib/Target/PowerPC/P9InstrResources.td   | 238 ++++++------
 llvm/lib/Target/PowerPC/PPCScheduleP9.td      |  50 ++-
 .../CodeGen/PowerPC/build-vector-tests.ll     |   8 +-
 .../CodeGen/PowerPC/csr-save-restore-order.ll |  48 +--
 .../PowerPC/vec_conv_fp32_to_i8_elts.ll       | 352 +++++++++---------
 5 files changed, 366 insertions(+), 330 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index f7ee2a83e5d60..b84b7bca2ca28 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -32,9 +32,8 @@
 
 // Two cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "VADDU(B|H|W|D)M$"),
     (instregex "VAND(C)?$"),
@@ -86,7 +85,7 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
 // single slice. However, since it is Restricted it requires all 3 dispatches
 // (DISP) for that superslice.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "TABORT(D|W)C(I)?$"),
     (instregex "MTFSB(0|1)$"),
@@ -102,7 +101,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "XSMAX(C|J)?DP$"),
     (instregex "XSMIN(C|J)?DP$"),
@@ -119,7 +118,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
 )>;
 
 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
       (instrs
     (instregex "S(L|R)D$"),
     (instregex "SRAD(I)?$"),
@@ -172,7 +171,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
 //  single slice. However, since it is Restricted it requires all 3 dispatches
 //  (DISP) for that superslice.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "RLDC(L|R)$"),
     (instregex "RLWIMI(8)?$"),
@@ -199,9 +198,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 
 // Three cycle ALU vector operation that uses an entire superslice.
 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatches (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     (instregex "M(T|F)VSCR$"),
     (instregex "VCMPNEZ(B|H|W)$"),
@@ -286,8 +284,7 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
 // 7 cycle DP vector operation that uses an entire superslice.
 //  Uses both DP units (the even DPE and odd DPO units), two pipelines
 //  (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     VADDFP,
     VCTSXS,
@@ -397,7 +394,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
 
 // 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
 //  dispatch units for the superslice.
-def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MADD(HD|HDU|LD|LD8)$"),
     (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
@@ -405,7 +402,7 @@ def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 
 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
 //  dispatch units for the superslice.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FRSP,
     (instregex "FRI(N|P|Z|M)(D|S)$"),
@@ -448,25 +445,25 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
 // These operations can be done in parallel.
-//  The DP is restricted so we need a full 5 dispatches.
+//  The DP is restricted so we need a full 4 dispatches.
 def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FSEL(D|S)o$")
 )>;
 
 // 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
 def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "MUL(H|L)(D|W)(U)?o$")
 )>;
 
 // 7 cycle Restricted DP operation and one 3 cycle ALU operation.
 // These operations must be done sequentially.
-//  The DP is restricted so we need a full 5 dispatches.
+//  The DP is restricted so we need a full 4 dispatches.
 def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "FRI(N|P|Z|M)(D|S)o$"),
     (instregex "FRE(S)?o$"),
@@ -482,8 +479,8 @@ def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
     FRSPo
 )>;
 
-// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
       (instrs
     XSADDDP,
     XSADDSP,
@@ -519,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
 )>;
 
 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
 //  dispatches.
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "LVS(L|R)$"),
     (instregex "VSPLTIS(W|H|B)$"),
@@ -627,9 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
 //  dispatches.
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDSRo,
     XSADDQP,
@@ -651,17 +648,17 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one 
 //  dispatches.
-def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCTSQo
 )>;
 
 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
 //  dispatches.
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSMADDQP,
     XSMADDQPO,
@@ -676,17 +673,17 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
 //  dispatches.
-def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     BCDCFSQo
 )>;
 
 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
 //  dispatches.
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSDIVQP,
     XSDIVQPO
@@ -695,20 +692,20 @@ def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
       (instrs
     XSSQRTQP,
     XSSQRTQPO
 )>;
 
 // 6 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LXVL(L)?")
 )>;
 
 // 5 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "LVE(B|H|W)X$"),
     (instregex "LVX(L)?"),
@@ -727,7 +724,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
 )>;
 
 // 4 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "DCB(F|T|ST)(EP)?$"),
     (instregex "DCBZ(L)?(EP)?$"),
@@ -757,7 +754,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
 
 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
 //  superslice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWZX,
     LFDX,
@@ -767,7 +764,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
 // Cracked Load Instructions.
 // Load instructions that can be done in parallel.
 def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C],
       (instrs
     SLBIA,
     SLBIE,
@@ -781,17 +778,26 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
 // Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
 // operations can be run in parallel.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
+      (instrs
+    (instregex "L(W|H)ZU(X)?(8)?$")
+)>;
+
+// Cracked TEND Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+              DISP_1C, DISP_1C],
       (instrs
-    (instregex "L(W|H)ZU(X)?(8)?$"),
     TEND
 )>;
 
+
 // Cracked Store Instruction
 // Consecutive Store and ALU instructions. The store is restricted and requires
 // three dispatches.
 def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(B|H|W|D)CX$")
 )>;
@@ -799,7 +805,7 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
 // Cracked Load Instruction.
 // Two consecutive load operations for a total of 8 cycles.
 def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LDMX
 )>;
@@ -808,7 +814,7 @@ def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "LHA(X)?(8)?$"),
     (instregex "CP_PASTE(8)?o$"),
@@ -821,7 +827,7 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
 //  operations cannot be done at the same time and so their latencies are added.
 // Full 6 dispatches are required as this is both cracked and restricted.
 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFIWAX
 )>;
@@ -830,8 +836,7 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSIWAX,
     LIWAX
@@ -843,7 +848,7 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
 // their latencies are added.
 // Full 6 dispatches are required as this is a restricted instruction.
 def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     LFSX,
     LFS
@@ -853,8 +858,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
 // Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
 //  operations cannot be done at the same time and so their latencies are added.
 // Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     LXSSP,
     LXSSPX,
@@ -865,7 +869,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
 // Cracked 3-Way Load Instruction
 // Load with two ALU operations that depend on each other
 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
       (instrs
     (instregex "LHAU(X)?(8)?$"),
     LWAUX
@@ -875,10 +879,10 @@ def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
 // Since the Load and the PM cannot be done at the same time the latencies are
 //  added. Requires 8 cycles.
 // Since the PM requires the full superslice we need both EXECE, EXECO pipelines
-//  as well as 3 dispatches for the PM. The Load requires the remaining 2
+//  as well as 1 dispatches for the PM. The Load requires the remaining 1
 //  dispatches.
 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     LXVH8X,
     LXVDSX,
@@ -887,7 +891,7 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
 
 // Single slice Restricted store operation. The restricted operation requires
 //  all three dispatches for the superslice.
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "STF(S|D|IWX|SX|DX)$"),
     (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
@@ -904,10 +908,9 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // Vector Store Instruction
-// Requires the whole superslice and therefore requires all three dispatches
+// Requires the whole superslice and therefore requires one dispatches
 // as well as both the Even and Odd exec pipelines.
-def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
       (instrs
     (instregex "STVE(B|H|W)X$"),
     (instregex "STVX(L)?$"),
@@ -915,18 +918,18 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
 )>;
 
 // 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "MTCTR(8)?(loop)?$"),
     (instregex "MTLR(8)?$")
 )>;
 
 // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
 // dispatches.
-def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
       (instrs
     (instregex "M(T|F)VRSAVE(v)?$"),
     (instregex "M(T|F)PMR$"),
@@ -937,10 +940,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
 )>;
 
 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and two
 //  dispatches.
-def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVW,
     DIVWU,
@@ -948,10 +950,9 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
 )>;
 
 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and two
 //  dispatches.
-def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVWE,
     DIVD,
@@ -965,8 +966,7 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
 //  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
 //  dispatches.
-def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
       (instrs
     DIVDE,
     DIVDEU
@@ -976,7 +976,7 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     (instregex "DIVW(U)?(O)?o$")
 )>;
@@ -985,7 +985,7 @@ def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 26.
 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDo,
     DIVDUo,
@@ -997,7 +997,7 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 //  and one full superslice for the DIV operation since there is only one DIV
 //  per superslice. Latency of DIV plus ALU is 42.
 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_EVEN_1C, DISP_1C],
       (instrs
     DIVDEo,
     DIVDEUo
@@ -1011,7 +1011,7 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
 //  instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     MTCRF,
     MTCRF8
@@ -1020,10 +1020,10 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
 //  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
+//  instructions running together on two pipelines and 2 dispatches.
 // ALU ops are 2 cycles each.
 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     (instregex "ADDC(8)?o$"),
     (instregex "SUBFC(8)?o$")
@@ -1035,7 +1035,7 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
 // One of the ALU ops is restricted the other is not so we have a total of
 // 5 dispatches.
 def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "F(N)?ABS(D|S)o$"),
     (instregex "FCPSGN(D|S)o$"),
@@ -1046,10 +1046,10 @@ def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 // Cracked ALU operations.
 // Here the two ALU ops can actually be done in parallel and therefore the
 //  latencies are not added together. Otherwise this is like having two
-//  instructions running together on two pipelines and 4 dispatches.
+//  instructions running together on two pipelines and 2 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C, DISP_1C],
       (instrs
     MCRFS
 )>;
@@ -1060,7 +1060,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 //  instructions running together on two pipelines and 6 dispatches.
 // ALU ops are 3 cycles each.
 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MTFSF(b|o)?$"),
     (instregex "MTFSFI(o)?$")
@@ -1070,7 +1070,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
 // The two ops cannot be done in parallel.
 // One of the ALU ops is restricted and takes 3 dispatches.
 def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "RLD(I)?C(R|L)o$"),
     (instregex "RLW(IMI|INM|NM)(8)?o$"),
@@ -1085,7 +1085,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
 // The two ops cannot be done in parallel.
 // Both of the ALU ops are restricted and take 3 dispatches.
 def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFFS(L|CE|o)?$")
 )>;
@@ -1094,16 +1094,14 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
 // total of 6 cycles. All of the ALU operations are also restricted so each
 // takes 3 dispatches for a total of 9.
 def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
       (instrs
     (instregex "MFCR(8)?$")
 )>;
 
 // Cracked instruction made of two ALU ops.
 // The two ops cannot be done in parallel.
-def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "EXTSWSLIo$"),
     (instregex "SRAD(I)?o$"),
@@ -1113,110 +1111,110 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
 )>;
 
 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIV
 )>;
 
 // 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVo
 )>;
 
 // 36 Cycle DP Instruction.
 // Instruction can be done on a single slice.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTDP
 )>;
 
 // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRT
 )>;
 
 // 36 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTDP
 )>;
 
 // 27 Cycle DP Vector Instruction.
 def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVSQRTSP
 )>;
 
 // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTo
 )>;
 
 // 26 Cycle DP Instruction.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSSQRTSP
 )>;
 
 // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FSQRTS
 )>;
 
 // 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FSQRTSo
 )>;
 
-// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 1 dispatches.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVDP
 )>;
 
 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
       (instrs
     FDIVS
 )>;
 
 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
 def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     FDIVSo
 )>;
 
-// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 1 dispatches.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
       (instrs
     XSDIVSP
 )>;
 
 // 24 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
 //  superslice.
 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVSP
 )>;
 
 // 33 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
 //  superslice.
 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
-              DISP_1C, DISP_1C, DISP_1C],
+              DISP_1C],
       (instrs
     XVDIVDP
 )>;
@@ -1225,12 +1223,11 @@ def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
 // The Load and one of the ALU ops cannot be run at the same time and so the
 //  latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
 // Both the load and the ALU that depends on it are restricted and so they take
-//  a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+//  a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
 def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
               IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(SU|SUX)$")
 )>;
@@ -1239,7 +1236,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
 // the store and so it can be run at the same time as the store. The store is
 // also restricted.
 def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "STF(S|D)U(X)?$"),
     (instregex "ST(B|H|W|D)U(X)?(8)?$")
@@ -1248,7 +1245,7 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
 // the load and so it can be run at the same time as the load.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_PAIR_1C, DISP_PAIR_1C],
       (instrs
     (instregex "LBZU(X)?(8)?$"),
     (instregex "LDU(X)?$")
@@ -1261,7 +1258,7 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
 //  are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
 //  is required for the ALU.
 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+              DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "LF(DU|DUX)$")
 )>;
@@ -1269,9 +1266,9 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
 // Crypto Instructions
 
 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
-//  superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-//  dispatches.
-def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+//  superslice. That includes both exec pipelines (EXECO, EXECE) and one
+//  dispatch.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
       (instrs
     (instregex "VPMSUM(B|H|W|D)$"),
     (instregex "V(N)?CIPHER(LAST)?$"),
@@ -1281,7 +1278,7 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
 // Branch Instructions
 
 // Two Cycle Branch
-def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BR_2C, DISP_BR_1C],
       (instrs
   (instregex "BCCCTR(L)?(8)?$"),
   (instregex "BCCL(A|R|RL)?$"),
@@ -1312,8 +1309,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
 
 // Five Cycle Branch with a 2 Cycle ALU Op
 // Operations must be done consecutively and not in parallel.
-def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
       (instrs
     ADDPCIS
 )>;
@@ -1323,17 +1319,15 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
 // Atomic Load
 def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
               IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
+              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
       (instrs
     (instregex "L(D|W)AT$")
 )>;
 
 // Atomic Store
 def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
-              IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
-              DISP_1C],
+              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
       (instrs
     (instregex "ST(D|W)AT$")
 )>;
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 679cf3962ec9f..505bb14b63103 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -50,8 +50,21 @@ let SchedModel = P9Model in {
 
   // ***************** Processor Resources *****************
 
-  //Dispatcher:
-  def DISPATCHER : ProcResource<12>;
+  // Dispatcher slots:
+  // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each
+  // corresponds to one of the four execution slices.
+  def DISPx02 : ProcResource<2>;
+  def DISPx13 : ProcResource<2>;
+  // The xa and xb ports can be used to send an iop to either of the two slices
+  // of the superslice, but are restricted to iops with only two primary sources.
+  def DISPxab : ProcResource<2>;
+  // b0 and b1 are dedicated dispatch ports into the branch slice.
+  def DISPb01 : ProcResource<2>;
+
+  // Any non BR dispatch ports
+  def DISP_NBR
+      : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>;
+  def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>;
 
   // Issue Ports
   // An instruction can go down one of two issue queues.
@@ -116,8 +129,37 @@ let SchedModel = P9Model in {
 
   // ***************** SchedWriteRes Definitions *****************
 
-  //Dispatcher
-  def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+  // Dispatcher
+  // Dispatch Rules: '-' or 'V'
+  // Vector ('V') - vector iops (128-bit operand) take only one decode and
+  // dispatch slot but are dispatched to both the even and odd slices of a
+  // superslice.
+  def DISP_1C : SchedWriteRes<[DISP_NBR]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'E' 
+  // Even slice ('E')- certain operations must be sent only to an even slice.
+  // Also consumes odd dispatch slice slot of the same superslice at dispatch
+  def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Dispatch Rules: 'P'
+  // Paired ('P') - certain cracked and expanded iops are paired such that they
+  // must dispatch together to the same superslice.
+  def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Tuple Restricted ('R') - certain iops preclude dispatching more than one
+  // operation per slice for the super- slice to which they are dispatched
+  def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+  // Each execution and branch slice can receive up to two iops per cycle
+  def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> {
     let NumMicroOps = 0;
     let Latency = 1;
   }
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index 302661b61b834..e94808f0a2d27 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -2012,9 +2012,9 @@ define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) {
 ; P9BE-NEXT:    lfd f0, 24(r3)
 ; P9BE-NEXT:    lfd f1, 16(r3)
 ; P9BE-NEXT:    lfd f2, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P9BE-NEXT:    lfd f3, 0(r3)
 ; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
-; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P9BE-NEXT:    xvcvdpsxws v2, vs1
 ; P9BE-NEXT:    xvcvdpsxws v3, vs0
 ; P9BE-NEXT:    vmrgew v2, v3, v2
@@ -2025,8 +2025,8 @@ define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) {
 ; P9LE-NEXT:    lfd f0, 24(r3)
 ; P9LE-NEXT:    lfd f2, 8(r3)
 ; P9LE-NEXT:    lfd f1, 16(r3)
-; P9LE-NEXT:    lfd f3, 0(r3)
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    lfd f3, 0(r3)
 ; P9LE-NEXT:    xvcvdpsxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P9LE-NEXT:    xvcvdpsxws v3, vs0
@@ -3596,9 +3596,9 @@ define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) {
 ; P9BE-NEXT:    lfd f0, 24(r3)
 ; P9BE-NEXT:    lfd f1, 16(r3)
 ; P9BE-NEXT:    lfd f2, 8(r3)
+; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P9BE-NEXT:    lfd f3, 0(r3)
 ; P9BE-NEXT:    xxmrghd vs1, vs1, vs3
-; P9BE-NEXT:    xxmrghd vs0, vs0, vs2
 ; P9BE-NEXT:    xvcvdpuxws v2, vs1
 ; P9BE-NEXT:    xvcvdpuxws v3, vs0
 ; P9BE-NEXT:    vmrgew v2, v3, v2
@@ -3609,8 +3609,8 @@ define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) {
 ; P9LE-NEXT:    lfd f0, 24(r3)
 ; P9LE-NEXT:    lfd f2, 8(r3)
 ; P9LE-NEXT:    lfd f1, 16(r3)
-; P9LE-NEXT:    lfd f3, 0(r3)
 ; P9LE-NEXT:    xxmrghd vs0, vs2, vs0
+; P9LE-NEXT:    lfd f3, 0(r3)
 ; P9LE-NEXT:    xvcvdpuxws v2, vs0
 ; P9LE-NEXT:    xxmrghd vs0, vs3, vs1
 ; P9LE-NEXT:    xvcvdpuxws v3, vs0
diff --git a/llvm/test/CodeGen/PowerPC/csr-save-restore-order.ll b/llvm/test/CodeGen/PowerPC/csr-save-restore-order.ll
index e3f38b945fbb6..dc858953e8120 100644
--- a/llvm/test/CodeGen/PowerPC/csr-save-restore-order.ll
+++ b/llvm/test/CodeGen/PowerPC/csr-save-restore-order.ll
@@ -58,18 +58,30 @@ define i64 @test(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 ; CHECK-PWR9-NEXT:    std r14, 240(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r15, 248(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r16, 256(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v20, 48(r1) # 16-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v21, 64(r1) # 16-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v22, 80(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r17, 264(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v23, 96(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r18, 272(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r19, 280(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v24, 112(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r20, 288(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v25, 128(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r21, 296(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v26, 144(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r22, 304(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r23, 312(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v27, 160(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r24, 320(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v28, 176(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r25, 328(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v29, 192(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r26, 336(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r27, 344(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v30, 208(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r28, 352(r1) # 8-byte Folded Spill
+; CHECK-PWR9-NEXT:    stxv v31, 224(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r29, 360(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r30, 368(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r31, 376(r1) # 8-byte Folded Spill
@@ -91,20 +103,8 @@ define i64 @test(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 ; CHECK-PWR9-NEXT:    stfd f29, 504(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    stfd f30, 512(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    stfd f31, 520(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v20, 48(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r4, 40(r1) # 8-byte Folded Spill
 ; CHECK-PWR9-NEXT:    std r3, 32(r1) # 8-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v21, 64(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v22, 80(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v23, 96(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v24, 112(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v25, 128(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v26, 144(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v27, 160(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v28, 176(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v29, 192(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v30, 208(r1) # 16-byte Folded Spill
-; CHECK-PWR9-NEXT:    stxv v31, 224(r1) # 16-byte Folded Spill
 ; CHECK-PWR9-NEXT:    #APP
 ; CHECK-PWR9-NEXT:    nop
 ; CHECK-PWR9-NEXT:    #NO_APP
@@ -129,18 +129,6 @@ define i64 @test(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 ; CHECK-PWR9-NEXT:    lfd f28, 496(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    lfd f27, 488(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    lfd f26, 480(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f25, 472(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f24, 464(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f23, 456(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f22, 448(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f21, 440(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f20, 432(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f19, 424(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f18, 416(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f17, 408(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f16, 400(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f15, 392(r1) # 8-byte Folded Reload
-; CHECK-PWR9-NEXT:    lfd f14, 384(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r31, 376(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r30, 368(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r29, 360(r1) # 8-byte Folded Reload
@@ -148,17 +136,29 @@ define i64 @test(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 ; CHECK-PWR9-NEXT:    ld r27, 344(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r26, 336(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r25, 328(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f25, 472(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r24, 320(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r23, 312(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r22, 304(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f24, 464(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r21, 296(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r20, 288(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r19, 280(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f23, 456(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r18, 272(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r17, 264(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r16, 256(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f22, 448(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r15, 248(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    ld r14, 240(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f21, 440(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f20, 432(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f19, 424(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f18, 416(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f17, 408(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f16, 400(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f15, 392(r1) # 8-byte Folded Reload
+; CHECK-PWR9-NEXT:    lfd f14, 384(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-NEXT:    addi r1, r1, 528
 ; CHECK-PWR9-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index de38a82a3a803..ae1b70d0be97a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -494,22 +494,101 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ;
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs2, 48(r3)
-; CHECK-P9-NEXT:    lxv vs3, 32(r3)
-; CHECK-P9-NEXT:    lxv vs4, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
@@ -519,87 +598,8 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    mtvsrd f1, r3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglb v3, v3, v4
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    vmrglb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs2
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v5, v4
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
 ; CHECK-P9-NEXT:    vmrglb v5, v5, v0
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4
@@ -1212,22 +1212,101 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ;
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    lxv vs0, 0(r3)
+; CHECK-P9-NEXT:    lxv vs2, 0(r3)
+; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    lxv vs0, 48(r3)
+; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    lxv vs4, 16(r3)
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v2, vs3
+; CHECK-P9-NEXT:    xxswapd vs3, vs2
+; CHECK-P9-NEXT:    xscvspdpn f3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    xscvspdpn f3, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-P9-NEXT:    xxswapd v3, vs3
+; CHECK-P9-NEXT:    vmrglb v3, v3, v4
+; CHECK-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    vmrglw v2, v3, v2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v3, vs2
+; CHECK-P9-NEXT:    xxswapd vs2, vs1
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f2, vs1
+; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
+; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    mfvsrwz r3, f2
+; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    xxswapd v2, vs1
+; CHECK-P9-NEXT:    vmrglb v3, v4, v3
+; CHECK-P9-NEXT:    xxswapd v4, vs2
+; CHECK-P9-NEXT:    vmrglb v4, v4, v5
+; CHECK-P9-NEXT:    vmrglh v3, v4, v3
+; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs2, 48(r3)
-; CHECK-P9-NEXT:    lxv vs3, 32(r3)
-; CHECK-P9-NEXT:    lxv vs4, 16(r3)
 ; CHECK-P9-NEXT:    mfvsrwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs1
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
@@ -1237,87 +1316,8 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    mtvsrd f1, r3
 ; CHECK-P9-NEXT:    mfvsrwz r3, f0
 ; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglb v2, v3, v2
-; CHECK-P9-NEXT:    xxswapd v3, vs1
-; CHECK-P9-NEXT:    vmrglb v3, v3, v4
-; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs4
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs4
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs4, vs4, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    vmrglb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v3, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    vmrglb v3, v4, v3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs3, vs3, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 3
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    vmrglb v4, v4, v5
-; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v4, vs0
-; CHECK-P9-NEXT:    xxswapd vs0, vs2
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xscvspdpn f0, vs2
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v5, v4
-; CHECK-P9-NEXT:    xxswapd v5, vs0
-; CHECK-P9-NEXT:    xxsldwi vs0, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
 ; CHECK-P9-NEXT:    vmrglb v5, v5, v0
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4

From dbb4322e511b6be756c8fd0a1ee1697b5b110de9 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Tue, 4 Jun 2019 15:27:19 +0000
Subject: [PATCH 1028/1176] [lldb] Fix out-of-bounds read after
 c3ea7c66fec021867e005ad1b02f3c7e80feaa85 "Add support for mid-function
 epilogues on x86 that end in a non-local jump."

Detected by asan.

llvm-svn: 362510
---
 .../Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
index e4d5ff0d353de..af54115c2db54 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
@@ -736,7 +736,6 @@ bool x86AssemblyInspectionEngine::pc_rel_branch_or_jump_p (
   int opcode_size = 0;
 
   uint8_t b1 = m_cur_insn[0];
-  uint8_t b2 = m_cur_insn[1];
 
   switch (b1) {
     case 0x77: // JA/JNBE rel8
@@ -764,6 +763,7 @@ bool x86AssemblyInspectionEngine::pc_rel_branch_or_jump_p (
       break;
   }
   if (b1 == 0x0f && opcode_size == 0) {
+    uint8_t b2 = m_cur_insn[1];
     switch (b2) {
       case 0x87: // JA/JNBE rel16/rel32
       case 0x86: // JBE/JNA rel16/rel32

From 7f3135037dd44620b231a975d313ce62f93fe873 Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Tue, 4 Jun 2019 15:34:58 +0000
Subject: [PATCH 1029/1176] [llvm-symbolizer] Flush output on bad input

One way of using llvm-symbolizer is to interactively within a process
write a line from a parent process to llvm-symbolizer's stdin, and then
read the output, then write the next line, read, etc. This worked as
long as all the lines were good. However, this didn't work prior to this
patch if any of the inputs were bad inputs, because the output is not
flushed after a bad input, meaning the parent process is sat waiting for
output, whilst llvm-symbolizer is sat waiting for input. This patch
flushes the output after every invocation of symbolizeInput when reading
from stdin. It also removes unnecessary flushing when llvm-symbolizer is
not reading addresses from stdin, which should give a slight performance
boost in these situations.

Reviewed by: ikudrin

Differential Revision: https://reviews.llvm.org/D62371

llvm-svn: 362511
---
 .../llvm-symbolizer/Inputs/flush-output.py    | 24 +++++++++++++++++++
 .../test/tools/llvm-symbolizer/flush-output.s | 17 +++++++++++++
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp |  5 ++--
 3 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/llvm-symbolizer/Inputs/flush-output.py
 create mode 100644 llvm/test/tools/llvm-symbolizer/flush-output.s

diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/flush-output.py b/llvm/test/tools/llvm-symbolizer/Inputs/flush-output.py
new file mode 100644
index 0000000000000..120d49226fa9a
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/Inputs/flush-output.py
@@ -0,0 +1,24 @@
+from __future__ import print_function
+import os
+import subprocess
+import sys
+import threading
+
+def kill_subprocess(process):
+    process.kill()
+    os._exit(1)
+
+# Pass -f=none and --output-style=GNU to get only one line of output per input.
+cmd = subprocess.Popen([sys.argv[1],
+                        '--obj=' + sys.argv[2],
+                        '-f=none',
+                        '--output-style=GNU'], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+watchdog = threading.Timer(20, kill_subprocess, args=[cmd])
+watchdog.start()
+cmd.stdin.write(b'0\n')
+cmd.stdin.flush()
+print(cmd.stdout.readline())
+cmd.stdin.write(b'bad\n')
+cmd.stdin.flush()
+print(cmd.stdout.readline())
+watchdog.cancel()
diff --git a/llvm/test/tools/llvm-symbolizer/flush-output.s b/llvm/test/tools/llvm-symbolizer/flush-output.s
new file mode 100644
index 0000000000000..840f430feaf81
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/flush-output.s
@@ -0,0 +1,17 @@
+# REQUIRES: x86-registered-target
+
+## If a process spawns llvm-symbolizer, and wishes to feed it addresses one at a
+## time, llvm-symbolizer needs to flush its output after each input has been
+## processed or the parent process will not be able to read the output and may
+## deadlock. This test runs a script that simulates this situation for both a
+## a good and bad input.
+
+foo:
+    nop
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -g
+# RUN: %python %p/Inputs/flush-output.py llvm-symbolizer %t.o \
+# RUN:   | FileCheck %s
+
+# CHECK: flush-output.s:10
+# CHECK: bad
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index e385ed8fce5d5..423ad077bd099 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -246,7 +246,6 @@ static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
   }
   if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
     outs() << "\n";
-  outs().flush();
 }
 
 int main(int argc, char **argv) {
@@ -291,8 +290,10 @@ int main(int argc, char **argv) {
     const int kMaxInputStringLength = 1024;
     char InputString[kMaxInputStringLength];
 
-    while (fgets(InputString, sizeof(InputString), stdin))
+    while (fgets(InputString, sizeof(InputString), stdin)) {
       symbolizeInput(InputString, Symbolizer, Printer);
+      outs().flush();
+    }
   } else {
     for (StringRef Address : ClInputAddresses)
       symbolizeInput(Address, Symbolizer, Printer);

From c1a0e6fe6b33b9f9b5420edddd3afce7fa1d308b Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 15:38:00 +0000
Subject: [PATCH 1030/1176] llvm-undname: More no-op changes to increase test
 coverage

- Add test coverage around invalid anon namespaces and
  for error paths in demanglePrimitiveType() and in
  demangleFullyQualifiedTypeName()

- Use DEMANGLE_UNREACHABLE in two more unreachable places

llvm-svn: 362514
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp   | 11 +++++-----
 llvm/test/Demangle/invalid-manglings.test | 25 +++++++++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 83e6f60de033a..a28e01c2567aa 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -238,10 +238,10 @@ demanglePointerCVQualifiers(StringView &MangledName) {
   case 'S':
     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
                           PointerAffinity::Pointer);
-  default:
-    assert(false && "Ty is not a pointer type!");
   }
-  return std::make_pair(Q_None, PointerAffinity::Pointer);
+  // This function is only called if isPointerType() returns true,
+  // and it only returns true for the six cases listed above.
+  DEMANGLE_UNREACHABLE;
 }
 
 StringView Demangler::copyString(StringView Borrowed) {
@@ -1694,7 +1694,7 @@ CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
 }
 
 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
-  assert(std::isdigit(MangledName.front()));
+  assert(MangledName.front() >= '0' && MangledName.front() <= '4');
 
   switch (MangledName.popFront()) {
   case '0':
@@ -1708,8 +1708,7 @@ StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
   case '4':
     return StorageClass::FunctionLocalStatic;
   }
-  Error = true;
-  return StorageClass::None;
+  DEMANGLE_UNREACHABLE;
 }
 
 std::pair<Qualifiers, bool>
diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 8d84034452cf2..258a752786e49 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -249,3 +249,28 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ?x@@3PEAY02$$CRHEA
 ; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3_
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3_
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3_XA
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3_XA
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3Vbar
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3Vbar
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@3Vbar@
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@3Vbar@
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?A
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?A
+; CHECK-NEXT: error: Invalid mangled name

From dc2a8c7d7f8a8a1eb260442cc3a8faa3a470edff Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 15:47:25 +0000
Subject: [PATCH 1031/1176] llvm-undname: Add coverage for
 startsWithLocalScopePattern()

llvm-svn: 362515
---
 llvm/test/Demangle/invalid-manglings.test | 35 +++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 258a752786e49..a5102b9a977d1 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -274,3 +274,38 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ?foo@?A
 ; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@??
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@??
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?XX?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?XX?
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?A@?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?A@?
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?Q@?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?Q@?
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?BQ@?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?BQ@?
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@?0?
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@?0?
+; CHECK-NEXT: error: Invalid mangled name

From 78e71c4d0906857cb417e80f6ed0e62d2f68044f Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Tue, 4 Jun 2019 16:15:19 +0000
Subject: [PATCH 1032/1176] [Tests] Autogen tests so that diffs for a future
 change are understandable

llvm-svn: 362516
---
 .../Transforms/IndVarSimplify/elim-extend.ll  | 114 ++++++++++++++----
 .../IndVarSimplify/ult-sub-to-eq.ll           |  34 +++++-
 2 files changed, 119 insertions(+), 29 deletions(-)

diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
index 6b6d597416599..314ce6992a7a8 100644
--- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -indvars -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -5,11 +6,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; IV with constant start, preinc and postinc sign extends, with and without NSW.
 ; IV rewrite only removes one sext. WidenIVs removes all three.
 define void @postincConstIV(i8* %base, i32 %limit) nounwind {
+; CHECK-LABEL: @postincConstIV(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PREADR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[PREADR]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[POSTADR:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 0, i8* [[POSTADR]]
+; CHECK-NEXT:    [[POSTADRNSW:%.*]] = getelementptr inbounds i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 0, i8* [[POSTADRNSW]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
-; CHECK: loop:
-; CHECK-NOT: sext
-; CHECK: exit:
 loop:
   %iv = phi i32 [ %postiv, %loop ], [ 0, %entry ]
   %ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ]
@@ -36,14 +54,33 @@ return:
 ; with and without NSW.
 ; As with postincConstIV, WidenIVs removes all three sexts.
 define void @postincVarIV(i8* %base, i32 %init, i32 %limit) nounwind {
+; CHECK-LABEL: @postincVarIV(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PRECOND:%.*]] = icmp sgt i32 [[LIMIT:%.*]], [[INIT:%.*]]
+; CHECK-NEXT:    br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INIT]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PREADR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[PREADR]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[POSTADR:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 0, i8* [[POSTADR]]
+; CHECK-NEXT:    [[POSTADRNSW:%.*]] = getelementptr i8, i8* [[BASE]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 0, i8* [[POSTADRNSW]]
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMIT]] to i64
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
   %precond = icmp sgt i32 %limit, %init
   br i1 %precond, label %loop, label %return
-; CHECK: loop:
-; CHECK-NOT: sext
-; CHECK: wide.trip.count = sext
-; CHECK-NOT: sext
-; CHECK: exit:
 loop:
   %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]
   %ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ]
@@ -72,18 +109,57 @@ return:
 ; %inneriv can be widened only after proving it has no signed-overflow
 ;   based on the loop test.
 define void @nestedIV(i8* %address, i32 %limit) nounwind {
+; CHECK-LABEL: @nestedIV(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[LIMIT]] to i64
+; CHECK-NEXT:    br label [[OUTERLOOP:%.*]]
+; CHECK:       outerloop:
+; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INNERCOUNT:%.*]] = phi i32 [ [[INNERCOUNT_MERGE:%.*]], [[OUTERMERGE]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV1]], -1
+; CHECK-NEXT:    [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    store i8 0, i8* [[ADR1]]
+; CHECK-NEXT:    br label [[INNERPREHEADER:%.*]]
+; CHECK:       innerpreheader:
+; CHECK-NEXT:    [[INNERPRECMP:%.*]] = icmp sgt i32 [[LIMITDEC]], [[INNERCOUNT]]
+; CHECK-NEXT:    br i1 [[INNERPRECMP]], label [[INNERLOOP_PREHEADER:%.*]], label [[OUTERMERGE]]
+; CHECK:       innerloop.preheader:
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[INNERCOUNT]] to i64
+; CHECK-NEXT:    br label [[INNERLOOP:%.*]]
+; CHECK:       innerloop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ADR2:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 0, i8* [[ADR2]]
+; CHECK-NEXT:    [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    store i8 0, i8* [[ADR3]]
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMITDEC]] to i64
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
+; CHECK:       innerexit:
+; CHECK-NEXT:    [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
+; CHECK-NEXT:    br label [[OUTERMERGE]]
+; CHECK:       outermerge:
+; CHECK-NEXT:    [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP3]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]
+; CHECK-NEXT:    [[ADR4:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV1]]
+; CHECK-NEXT:    store i8 0, i8* [[ADR4]]
+; CHECK-NEXT:    [[OFS5:%.*]] = sext i32 [[INNERCOUNT_MERGE]] to i64
+; CHECK-NEXT:    [[ADR5:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[OFS5]]
+; CHECK-NEXT:    store i8 0, i8* [[ADR5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1
+; CHECK-NEXT:    [[TMP47:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT2]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP47]], label [[OUTERLOOP]], label [[RETURN:%.*]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
   %limitdec = add i32 %limit, -1
   br label %outerloop
 
-; CHECK: outerloop:
-;
 ; Eliminate %ofs1 after widening outercount.
-; CHECK-NOT: sext
-; CHECK: getelementptr
-;
 ; IV rewriting hoists a gep into this block. We don't like that.
-; CHECK-NOT: getelementptr
 outerloop:
   %outercount   = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ]
   %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ]
@@ -99,13 +175,8 @@ innerpreheader:
   %innerprecmp = icmp sgt i32 %limitdec, %innercount
   br i1 %innerprecmp, label %innerloop, label %outermerge
 
-; CHECK: innerloop:
-;
 ; Eliminate %ofs2 after widening inneriv.
 ; Eliminate %ofs3 after normalizing sext(innerpostiv)
-; CHECK-NOT: sext
-; CHECK: getelementptr
-;
 ; FIXME: We should check that indvars does not increase the number of
 ; IVs in this loop. sext elimination plus LFTR currently results in 2 final
 ; IVs. Waiting to remove LFTR.
@@ -128,12 +199,7 @@ innerexit:
   %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ]
   br label %outermerge
 
-; CHECK: outermerge:
-;
 ; Eliminate %ofs4 after widening outercount
-; CHECK-NOT: sext
-; CHECK: getelementptr
-;
 ; TODO: Eliminate %ofs5 after removing lcssa
 outermerge:
   %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ]
diff --git a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
index 6a7e5b70ca1b2..0bd5064094c70 100644
--- a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -indvars < %s | FileCheck %s
 
 ; Provide legal integer types.
@@ -5,6 +6,34 @@ target datalayout = "n8:16:32:64"
 
 
 define void @test1(float* nocapture %autoc, float* nocapture %data, float %d, i32 %data_len, i32 %sample) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[DATA_LEN:%.*]], [[SAMPLE:%.*]]
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[DATA_LEN]], [[SAMPLE]]
+; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP0]], [[SAMPLE]]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[DATA:%.*]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP1]], [[D:%.*]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[AUTOC:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[TMP2]], [[MUL]]
+; CHECK-NEXT:    store float [[ADD3]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SUB]] to i64
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %sub = sub i32 %data_len, %sample
   %cmp4 = icmp eq i32 %data_len, %sample
@@ -30,12 +59,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 
-; CHECK-LABEL: @test1(
 
 ; check that we turn the IV test into an eq.
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %wide.trip.count = zext i32 %sub to i64
-; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
-; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
 }
 

From 2df387b05774dbc45818044c640b5f35f6e50b2f Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 4 Jun 2019 16:19:11 +0000
Subject: [PATCH 1033/1176] [clangd] Minor cleanup. NFC

Removed unused using declaration from TweakTests.cpp

llvm-svn: 362517
---
 clang-tools-extra/clangd/unittests/TweakTests.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp
index 66806b3d52e99..f39f4886de0b0 100644
--- a/clang-tools-extra/clangd/unittests/TweakTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -21,7 +21,6 @@
 #include <cassert>
 
 using llvm::Failed;
-using llvm::HasValue;
 using llvm::Succeeded;
 
 namespace clang {

From af11a4376c1123b5d0f343645c66e8c5d2c7b14a Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Tue, 4 Jun 2019 16:19:34 +0000
Subject: [PATCH 1034/1176] [Tests] Update a test to consistently use new pass
 manager and FileCheck the result

llvm-svn: 362518
---
 llvm/test/Transforms/IndVarSimplify/iv-widen.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
index 558869aac926f..8664dacb20d78 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt -lcssa -loop-simplify -S < %s | opt -S -passes='require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)'
+; RUN: opt < %s -S -passes='lcssa,loop-simplify,require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)' | FileCheck %s
 
 ; Provide legal integer types.
 target datalayout = "n8:16:32:64"

From df621bdfc86e8f1891a39a33edf263ac17058500 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 4 Jun 2019 16:24:09 +0000
Subject: [PATCH 1035/1176] [LVI][CVP] Add support for urem, srem and sdiv

The underlying ConstantRange functionality has been added in D60952,
D61207 and D61238, this just exposes it for LVI.

I'm switching the code from using a whitelist to a blacklist, as
we're down to one unsupported operation here (xor) and writing it
this way seems more obvious :)

Differential Revision: https://reviews.llvm.org/D62822

llvm-svn: 362519
---
 llvm/lib/Analysis/LazyValueInfo.cpp           | 29 +++++--------------
 .../CorrelatedValuePropagation/basic.ll       | 10 +++----
 2 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 280dd3ea6043d..53e9f49a5711d 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1082,31 +1082,18 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
 
   assert(BO->getOperand(0)->getType()->isSized() &&
          "all operands to binary operators are sized");
-
-  // Filter out operators we don't know how to reason about before attempting to
-  // recurse on our operand(s).  This can cut a long search short if we know
-  // we're not going to be able to get any useful information anyways.
-  switch (BO->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::UDiv:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::And:
-  case Instruction::Or:
-    return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
-        [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
-          return CR1.binaryOp(BO->getOpcode(), CR2);
-        });
-  default:
-    // Unhandled instructions are overdefined.
+  if (BO->getOpcode() == Instruction::Xor) {
+    // Xor is the only operation not supported by ConstantRange::binaryOp().
     LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
                       << "' - overdefined (unknown binary operator).\n");
     BBLV = ValueLatticeElement::getOverdefined();
     return true;
-  };
+  }
+
+  return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
+      [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+        return CR1.binaryOp(BO->getOpcode(), CR2);
+      });
 }
 
 bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
index a063d0cf5bd92..630cfcb6bc166 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -931,7 +931,7 @@ define i1 @urem_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[UREM]], 30
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %urem = urem i32 %a, 30
@@ -949,9 +949,9 @@ define i1 @srem_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[SREM]], -30
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
-; CHECK-NEXT:    ret i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %srem = srem i32 %a, 30
@@ -972,9 +972,9 @@ define i1 @sdiv_unknown(i32 %a) {
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[SREM]], -17459217
 ; CHECK-NEXT:    br i1 undef, label [[EXIT1:%.*]], label [[EXIT2:%.*]]
 ; CHECK:       exit1:
-; CHECK-NEXT:    ret i1 [[CMP1]]
+; CHECK-NEXT:    ret i1 true
 ; CHECK:       exit2:
-; CHECK-NEXT:    ret i1 [[CMP2]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %srem = sdiv i32 %a, 123

From d98a0a362fbe004be2954857551c0274b09bd635 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 16:25:28 +0000
Subject: [PATCH 1036/1176] llvm-undname: Yet more coverage for error paths

- For error returns in demangleSpecialTableNode(),
  demangleLocalStaticGuard(), RTTITypeDescriptor,
  demangleRttiBaseClassDescriptorNode(), demangleUnsigned(),
  demangleUntypedVariable() (via RttiBaseClassArray)

- For ?_A and ?_P which are handled at early levels of the
  demangler but are not implemented in a later stage; this
  is now more obvious

- Replace a "default:" with an explicit list of cases, to
  get -Wswitch check we list all cases

llvm-svn: 362520
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp   | 11 ++++--
 llvm/test/Demangle/invalid-manglings.test | 45 +++++++++++++++++++++++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index a28e01c2567aa..450c95ff8a100 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -429,10 +429,10 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
 
 SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
-  if (SIK == SpecialIntrinsicKind::None)
-    return nullptr;
 
   switch (SIK) {
+  case SpecialIntrinsicKind::None:
+    return nullptr;
   case SpecialIntrinsicKind::StringLiteralSymbol:
     return demangleStringLiteral(MangledName);
   case SpecialIntrinsicKind::Vftable:
@@ -468,8 +468,13 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
     return demangleInitFiniStub(MangledName, false);
   case SpecialIntrinsicKind::DynamicAtexitDestructor:
     return demangleInitFiniStub(MangledName, true);
-  default:
+  case SpecialIntrinsicKind::Typeof:
+  case SpecialIntrinsicKind::UdtReturning:
+    // It's unclear which tools produces these manglings, so demangling
+    // support is not (yet?) implemented.
     break;
+  case SpecialIntrinsicKind::Unknown:
+    DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
   }
   Error = true;
   return nullptr;
diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index a5102b9a977d1..1cc192601a483 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -309,3 +309,48 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ?foo@?0?
 ; CHECK-NEXT: error: Invalid mangled name
+
+??_Sfoo@@1Abar@@
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_Sfoo@@1Abar@@
+; CHECK-NEXT: error: Invalid mangled name
+
+??_Bfoo@@1
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_Bfoo@@1
+; CHECK-NEXT: error: Invalid mangled name
+
+??_R0
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_R0
+; CHECK-NEXT: error: Invalid mangled name
+
+??_R0H
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_R0H
+; CHECK-NEXT: error: Invalid mangled name
+
+??_R0H@8foo
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_R0H@8foo
+; CHECK-NEXT: error: Invalid mangled name
+
+??_R1012?3foo@@
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_R1012?3foo@@
+; CHECK-NEXT: error: Invalid mangled name
+
+??_R2foo@@1
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_R2foo@@1
+; CHECK-NEXT: error: Invalid mangled name
+
+??_A
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_A
+; CHECK-NEXT: error: Invalid mangled name
+
+??_P
+; CHECK-EMPTY:
+; CHECK-NEXT: ??_P
+; CHECK-NEXT: error: Invalid mangled name

From 48566aaab461e014dc8350c2ddd6012e34ffe434 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Tue, 4 Jun 2019 16:29:58 +0000
Subject: [PATCH 1037/1176] [CodeGen][ObjC] Convert '[self alloc]' in a class
 method to a call to 'objc_alloc(self)'

Also convert '[[self alloc] init]' in a class method to a call to
'objc_alloc_init(self)'.

rdar://problem/50855121

Differential Revision: https://reviews.llvm.org/D62643

llvm-svn: 362521
---
 clang/lib/CodeGen/CGObjC.cpp                  | 44 ++++++++++++++-----
 .../convert-messages-to-runtime-calls.m       | 28 ++++++++++++
 clang/test/CodeGenObjC/objc-alloc-init.m      |  6 +++
 3 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 8302c10d66dd9..31ffab5d0bcde 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -383,10 +383,12 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
     if (isClassMessage &&
         Runtime.shouldUseRuntimeFunctionsForAlloc() &&
         ResultType->isObjCObjectPointerType()) {
-        // [Foo alloc] -> objc_alloc(Foo)
+        // [Foo alloc] -> objc_alloc(Foo) or
+        // [self alloc] -> objc_alloc(self)
         if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc")
           return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType));
-        // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo)
+        // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) or
+        // [self allocWithZone:nil] -> objc_allocWithZone(self)
         if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 &&
             Args.size() == 1 && Args.front().getType()->isPointerType() &&
             Sel.getNameForSlot(0) == "allocWithZone") {
@@ -444,22 +446,38 @@ tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) {
       Sel.getNameForSlot(0) != "init")
     return None;
 
-  // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]'.
+  // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' or
+  // we are in an ObjC class method and 'receiver' is '[self alloc]'.
   auto *SubOME =
-      dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParens());
+      dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts());
   if (!SubOME)
     return None;
   Selector SubSel = SubOME->getSelector();
-  if (SubOME->getReceiverKind() != ObjCMessageExpr::Class ||
-      !SubOME->getType()->isObjCObjectPointerType() ||
+
+  // Check if we are in an ObjC class method and the receiver expression is
+  // 'self'.
+  const Expr *SelfInClassMethod = nullptr;
+  if (const auto *CurMD = dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl))
+    if (CurMD->isClassMethod())
+      if ((SelfInClassMethod = SubOME->getInstanceReceiver()))
+        if (!SelfInClassMethod->isObjCSelfExpr())
+          SelfInClassMethod = nullptr;
+
+  if ((SubOME->getReceiverKind() != ObjCMessageExpr::Class &&
+       !SelfInClassMethod) || !SubOME->getType()->isObjCObjectPointerType() ||
       !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc")
     return None;
 
-  QualType ReceiverType = SubOME->getClassReceiver();
-  const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>();
-  const ObjCInterfaceDecl *ID = ObjTy->getInterface();
-  assert(ID && "null interface should be impossible here");
-  llvm::Value *Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID);
+  llvm::Value *Receiver;
+  if (SelfInClassMethod) {
+    Receiver = CGF.EmitScalarExpr(SelfInClassMethod);
+  } else {
+    QualType ReceiverType = SubOME->getClassReceiver();
+    const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>();
+    const ObjCInterfaceDecl *ID = ObjTy->getInterface();
+    assert(ID && "null interface should be impossible here");
+    Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID);
+  }
   return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType()));
 }
 
@@ -507,6 +525,10 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
   switch (E->getReceiverKind()) {
   case ObjCMessageExpr::Instance:
     ReceiverType = E->getInstanceReceiver()->getType();
+    if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(CurFuncDecl))
+      if (OMD->isClassMethod())
+        if (E->getInstanceReceiver()->isObjCSelfExpr())
+          isClassMessage = true;
     if (retainSelf) {
       TryEmitResult ter = tryEmitARCRetainScalarExpr(*this,
                                                    E->getInstanceReceiver());
diff --git a/clang/test/CodeGenObjC/convert-messages-to-runtime-calls.m b/clang/test/CodeGenObjC/convert-messages-to-runtime-calls.m
index 6a4edfd09791d..c51b56b66fff5 100644
--- a/clang/test/CodeGenObjC/convert-messages-to-runtime-calls.m
+++ b/clang/test/CodeGenObjC/convert-messages-to-runtime-calls.m
@@ -150,6 +150,34 @@ float test_cannot_message_return_float(C *c) {
   return [c retain];
 }
 
+@interface TestSelf
++ (instancetype)alloc;
++ (instancetype)allocWithZone:(void*)zone;
++ (id)classMeth;
+- (id)instanceMeth;
+@end
+
+@implementation TestSelf
+// CHECK-LABEL: define internal i8* @"\01+[TestSelf classMeth]"(
++ (id)classMeth {
+  // MSGS: {{call.*@objc_msgSend}}
+  // MSGS: {{call.*@objc_msgSend}}
+  // CALLS: {{call.*@objc_allocWithZone\(}}
+  // CALLS: {{call.*@objc_alloc\(}}
+  [self allocWithZone:nil];
+  return [self alloc];
+}
+// CHECK-LABEL: define internal i8* @"\01-[TestSelf instanceMeth]"(
+- (id)instanceMeth {
+  // MSGS: {{call.*@objc_msgSend}}
+  // MSGS: {{call.*@objc_msgSend}}
+  // CALLS: {{call.*@objc_msgSend}}
+  // CALLS: {{call.*@objc_msgSend}}
+  [self allocWithZone:nil];
+  return [self alloc];
+}
+@end
+
 @interface NSString : NSObject
 + (void)retain_self;
 - (void)retain_super;
diff --git a/clang/test/CodeGenObjC/objc-alloc-init.m b/clang/test/CodeGenObjC/objc-alloc-init.m
index 08a383d59f669..c5c1a763b7c76 100644
--- a/clang/test/CodeGenObjC/objc-alloc-init.m
+++ b/clang/test/CodeGenObjC/objc-alloc-init.m
@@ -23,14 +23,20 @@ void f() {
 
 @interface Y : X
 +(void)meth;
+-(void)instanceMeth;
 @end
 
 @implementation Y
 +(void)meth {
   [[self alloc] init];
+  // OPTIMIZED: call i8* @objc_alloc_init(
+  // NOT_OPTIMIZED: call i8* @objc_alloc(
+}
+-(void)instanceMeth {
   // EITHER-NOT: call i8* @objc_alloc
   // EITHER: call {{.*}} @objc_msgSend
   // EITHER: call {{.*}} @objc_msgSend
+  [[self alloc] init];
 }
 @end
 

From 8e8ddaa38ff691b4dd21a93bcc348a383f7ffcac Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Tue, 4 Jun 2019 16:35:23 +0000
Subject: [PATCH 1038/1176] [WebAssembly] Add comment as follow-up to rL362276.
 NFC.

Subscribers: dschuff, jgravelle-google, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62824

llvm-svn: 362522
---
 lld/wasm/SymbolTable.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index d10952797d55c..c4a460f3ad7aa 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -199,10 +199,12 @@ DefinedFunction *SymbolTable::addSyntheticFunction(StringRef Name,
                                         Flags, nullptr, Function);
 }
 
+// Adds an optional, linker generated, data symbols.  The symbol will only be
+// added if there is an undefine reference to it, or if it is explictly exported
+// via the --export flag.  Otherwise we don't add the symbol and return nullptr.
 DefinedData *SymbolTable::addOptionalDataSymbol(StringRef Name, uint32_t Value,
                                                 uint32_t Flags) {
   Symbol *S = find(Name);
-  // Enable --export of optional symbols
   if (!S && (Config->ExportAll || Config->ExportedSymbols.count(Name) != 0))
     S = insertName(Name).first;
   else if (!S || S->isDefined())

From f15e3d856fddd3ecf80fdbb798be64d0c4bc6de4 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.smith@linaro.org>
Date: Tue, 4 Jun 2019 16:35:40 +0000
Subject: [PATCH 1039/1176] [AArch64][ELF] Add support for PLT decoding with
 BTI instructions present

Arm Architecture v8.5a introduces Branch Target Identification (BTI). When
enabled all indirect branches must target a bti instruction of the
appropriate form. As PLT sequences may sometimes be the target of an
indirect branch and PLT[0] always is, a static linker may need to generate
PLT sequences that contain "bti c" as the first instruction. In effect:
bti     c
adrp    x16, page offset to .got.plt
...
Instead of:
adrp    x16, page offset to .got.plt
...
At present the PLT decoding assumes the adrp will always be the first
instruction. This patch adds support for a single "bti c" to prefix it. A
test binary has been uploaded with such a PLT sequence. A forthcoming LLD
patch will make heavy use of the PLT decoding code.

Differential Revision: https://reviews.llvm.org/D62598

llvm-svn: 362523
---
 .../MCTargetDesc/AArch64MCTargetDesc.cpp      | 10 +++-
 llvm/test/tools/llvm-objdump/AArch64/plt.test | 57 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 26dd5e5adccde..df12274d94704 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -333,12 +333,20 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis {
     for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
          Byte += 4) {
       uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+      uint64_t Off = 0;
+      // Check for optional bti c that prefixes adrp in BTI enabled entries
+      if (Insn == 0xd503245f) {
+         Off = 4;
+         Insn = support::endian::read32le(PltContents.data() + Byte + Off);
+      }
       // Check for adrp.
       if ((Insn & 0x9f000000) != 0x90000000)
         continue;
+      Off += 4;
       uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
             (((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
-      uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+      uint32_t Insn2 =
+          support::endian::read32le(PltContents.data() + Byte + Off);
       // Check for: ldr Xt, [Xn, #pimm].
       if (Insn2 >> 22 == 0x3e5) {
         Imm += ((Insn2 >> 10) & 0xfff) << 3;
diff --git a/llvm/test/tools/llvm-objdump/AArch64/plt.test b/llvm/test/tools/llvm-objdump/AArch64/plt.test
index d463a694149cb..5b3eff331d3c4 100644
--- a/llvm/test/tools/llvm-objdump/AArch64/plt.test
+++ b/llvm/test/tools/llvm-objdump/AArch64/plt.test
@@ -1,5 +1,60 @@
-// RUN: llvm-objdump -d %p/Inputs/cfi.elf-aarch64 | FileCheck %s
+# RUN: llvm-objdump -d %p/Inputs/cfi.elf-aarch64 | FileCheck %s
 
 # CHECK: Disassembly of section .plt:
 # CHECK: __cfi_slowpath@plt:
+# CHECK-NEXT: adrp      x16, {{.*}}
 # CHECK: bl {{.*}} <__cfi_slowpath@plt>
+
+# RUN: yaml2obj %s -o %t.aarch64
+# RUN: llvm-objdump -d -mattr=+bti %t.aarch64 | \
+# RUN:   FileCheck --check-prefix=CHECK-BTI %s
+# CHECK-BTI: bl {{.*}} <f1@plt>
+# CHECK-BTI: bl {{.*}} <f2@plt>
+# CHECK-BTI: Disassembly of section .plt:
+# CHECK-BTI: f1@plt:
+# CHECK-BTI-NEXT: bti   c
+# CHECK-BTI-NEXT: adrp  x16, {{.*}}
+# CHECK-BTI: f2@plt:
+# CHECK-BTI-NEXT: bti   c
+# CHECK-BTI-NEXT: adrp  x16, {{.*}}
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_AARCH64
+Sections:
+  - Name:    .rela.plt
+    Type:    SHT_RELA
+    Flags:   [ SHF_ALLOC ]
+    EntSize: 0x0000000000000018
+    Info:    .got.plt
+    Relocations:
+      - Offset: 0x0000000000230018
+        Symbol: f1
+        Type:   R_AARCH64_JUMP_SLOT
+      - Offset: 0x0000000000230020
+        Symbol: f2
+        Type:   R_AARCH64_JUMP_SLOT
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address: 0x0000000000210000
+    Content: 0C00009411000094C0035FD6
+  - Name:    .plt
+    Type:    SHT_PROGBITS
+    Flags:   [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address: 0x0000000000210010
+    Content: 5F2403D5F07BBFA910010090110A40F91042009120021FD61F2003D51F2003D55F2403D510010090110E40F9106200919F2103D520021FD65F2403D510010090111240F9108200919F2103D520021FD6
+  - Name:    .got.plt
+    Type:    SHT_PROGBITS
+    Content: '000000000000000000000000000000000000000000000000100021000000000010002100000000001000210000000000'
+Symbols:
+  - Name:    f1
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+  - Name:    f2
+    Type:    STT_FUNC
+    Binding: STB_GLOBAL
+...

From 606eb2367f9f0bef2d1e0182bbb2bf4effb1711e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 4 Jun 2019 16:40:04 +0000
Subject: [PATCH 1040/1176] [x86] split 256-bit store of concatenated vectors

This shows up as a side issue to the main problem for the AVX target example from PR37428:
https://bugs.llvm.org/show_bug.cgi?id=37428 - https://godbolt.org/z/7tpRa3

But as we can see in the pile of existing test diffs, it's actually a widespread problem
that affects any AVX or later target. Apart from a couple of oddballs, I think these are
all improvements for the reasons stated in the code comment: we do not want to enable YMM
unnecessarily (avoid vzeroupper and frequency throttling) and some cores split 256-bit
stores anyway.

We could say that MergeConsecutiveStores() is going overboard on some of these examples,
but that won't solve the problem completely. But that is a reason I'm proposing this as
a lowering rather than a combine: we will infinite loop fighting the merge code if we try
this earlier.

Differential Revision: https://reviews.llvm.org/D62498

llvm-svn: 362524
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  11 +
 llvm/test/CodeGen/X86/avg.ll                  | 402 +++++++++---------
 .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll |  24 +-
 llvm/test/CodeGen/X86/avx-intrinsics-x86.ll   |  12 +-
 llvm/test/CodeGen/X86/avx512-trunc-widen.ll   |  16 +-
 llvm/test/CodeGen/X86/avx512-trunc.ll         |  16 +-
 llvm/test/CodeGen/X86/nontemporal-2.ll        |  40 +-
 llvm/test/CodeGen/X86/oddsubvector.ll         |  15 +-
 llvm/test/CodeGen/X86/pmovsx-inreg.ll         |  72 ++--
 llvm/test/CodeGen/X86/shrink_vmul-widen.ll    | 124 +++---
 llvm/test/CodeGen/X86/shrink_vmul.ll          | 124 +++---
 .../CodeGen/X86/shuffle-vs-trunc-512-widen.ll |  18 +-
 llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll |  18 +-
 llvm/test/CodeGen/X86/subvector-broadcast.ll  |  68 +--
 llvm/test/CodeGen/X86/vec_fptrunc.ll          |  10 +-
 llvm/test/CodeGen/X86/vec_saddo.ll            |  68 +--
 llvm/test/CodeGen/X86/vec_smulo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_ssubo.ll            |  84 ++--
 llvm/test/CodeGen/X86/vec_uaddo.ll            |  24 +-
 llvm/test/CodeGen/X86/vec_umulo.ll            |  26 +-
 llvm/test/CodeGen/X86/vec_usubo.ll            |  24 +-
 llvm/test/CodeGen/X86/vector-gep.ll           | 134 +++---
 llvm/test/CodeGen/X86/vector-trunc-widen.ll   |  72 ++--
 llvm/test/CodeGen/X86/vector-trunc.ll         |  72 ++--
 .../CodeGen/X86/x86-interleaved-access.ll     |  73 ++--
 25 files changed, 786 insertions(+), 845 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e493d3d719416..a15e375382068 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1283,6 +1283,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);
       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
+      setOperationAction(ISD::STORE,              VT, Custom);
     }
 
     if (HasInt256)
@@ -21073,7 +21074,17 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   if (St->isTruncatingStore())
     return SDValue();
 
+  // If this is a 256-bit store of concatenated ops, we are better off splitting
+  // that store into two 128-bit stores. This avoids spurious use of 256-bit ops
+  // and each half can execute independently. Some cores would split the op into
+  // halves anyway, so the concat (vinsertf128) is purely an extra op.
   MVT StoreVT = StoredVal.getSimpleValueType();
+  if (StoreVT.is256BitVector()) {
+    if (StoredVal.getOpcode() != ISD::CONCAT_VECTORS || !StoredVal.hasOneUse())
+      return SDValue();
+    return split256BitStore(St, DAG);
+  }
+
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
          "Unexpected VT");
   if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index cfa9f11a9c73e..22a6daa999d71 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -102,11 +102,10 @@ define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8:
@@ -267,8 +266,8 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-LABEL: avg_v48i8:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
-; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm4
+; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm4
+; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[3,3,0,1]
@@ -279,10 +278,10 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm15 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[3,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm10 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm11 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm14 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm12 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm13 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -297,52 +296,52 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm4
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm3
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm13
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[3,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm2, %xmm5, %xmm12
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm11
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,2,3]
+; AVX1-NEXT:    vpaddd %xmm5, %xmm6, %xmm10
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm6, %xmm7, %xmm9
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm8
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm4, %xmm15, %xmm15
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm8
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd %xmm3, %xmm15, %xmm15
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm7, %xmm10, %xmm7
+; AVX1-NEXT:    vpaddd %xmm7, %xmm11, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd %xmm2, %xmm14, %xmm14
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd %xmm0, %xmm12, %xmm12
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpaddd %xmm0, %xmm13, %xmm13
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm4[2,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm5, %xmm5 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm3[3,3,0,1]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[3,3,0,1]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm6 # 16-byte Folded Reload
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm3[1,1,2,3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm4[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
 ; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpsubd %xmm4, %xmm13, %xmm10
-; AVX1-NEXT:    vpsubd %xmm4, %xmm11, %xmm11
-; AVX1-NEXT:    vpsubd %xmm4, %xmm9, %xmm9
-; AVX1-NEXT:    vpsubd %xmm4, %xmm8, %xmm8
-; AVX1-NEXT:    vpsubd %xmm4, %xmm15, %xmm13
-; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
-; AVX1-NEXT:    vpsubd %xmm4, %xmm14, %xmm0
-; AVX1-NEXT:    vpsubd %xmm4, %xmm12, %xmm2
-; AVX1-NEXT:    vpsubd %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vpsubd %xmm4, %xmm6, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX1-NEXT:    vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm4, %xmm4 # 16-byte Folded Reload
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubd %xmm3, %xmm12, %xmm11
+; AVX1-NEXT:    vpsubd %xmm3, %xmm10, %xmm10
+; AVX1-NEXT:    vpsubd %xmm3, %xmm9, %xmm9
+; AVX1-NEXT:    vpsubd %xmm3, %xmm8, %xmm8
+; AVX1-NEXT:    vpsubd %xmm3, %xmm15, %xmm12
+; AVX1-NEXT:    vpsubd %xmm3, %xmm7, %xmm7
+; AVX1-NEXT:    vpsubd %xmm3, %xmm14, %xmm0
+; AVX1-NEXT:    vpsubd %xmm3, %xmm13, %xmm2
+; AVX1-NEXT:    vpsubd %xmm3, %xmm5, %xmm5
+; AVX1-NEXT:    vpsubd %xmm3, %xmm6, %xmm6
+; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm3, %xmm1
@@ -353,13 +352,13 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpsrld $1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackusdw %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpsrld $1, %xmm7, %xmm2
-; AVX1-NEXT:    vpsrld $1, %xmm13, %xmm4
+; AVX1-NEXT:    vpsrld $1, %xmm12, %xmm4
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm4, %xmm2
 ; AVX1-NEXT:    vpsrld $1, %xmm8, %xmm4
 ; AVX1-NEXT:    vpsrld $1, %xmm9, %xmm5
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm5
-; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm6
+; AVX1-NEXT:    vpsrld $1, %xmm10, %xmm5
+; AVX1-NEXT:    vpsrld $1, %xmm11, %xmm6
 ; AVX1-NEXT:    vpackusdw %xmm5, %xmm6, %xmm5
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
@@ -368,13 +367,12 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm4, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v48i8:
@@ -449,13 +447,12 @@ define void @avg_v48i8(<48 x i8>* %a, <48 x i8>* %b) nounwind {
 ; AVX512F-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX512F-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX512F-NEXT:    vmovdqa 32(%rdi), %xmm2
-; AVX512F-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
 ; AVX512F-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm1
+; AVX512F-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX512F-NEXT:    vpavgb 32(%rsi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vmovdqu %xmm1, (%rax)
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
-; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX512F-NEXT:    vmovdqu %xmm2, (%rax)
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: avg_v48i8:
@@ -507,15 +504,14 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8:
@@ -628,11 +624,10 @@ define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rsi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16:
@@ -685,15 +680,14 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rsi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rdi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16:
@@ -834,11 +828,10 @@ define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_2:
@@ -893,13 +886,12 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 48(%rsi), %xmm3
 ; AVX1-NEXT:    vpavgb %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vpavgb %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm1
-; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpavgb %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_2:
@@ -1013,11 +1005,10 @@ define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
-; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_2:
@@ -1070,15 +1061,14 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) nounwind {
 ; AVX1-NEXT:    vmovdqa 16(%rdi), %xmm1
 ; AVX1-NEXT:    vmovdqa 32(%rdi), %xmm2
 ; AVX1-NEXT:    vmovdqa 48(%rdi), %xmm3
-; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rsi), %xmm1, %xmm1
 ; AVX1-NEXT:    vpavgw 32(%rsi), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw 48(%rsi), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_2:
@@ -1206,11 +1196,10 @@ define void @avg_v32i8_const(<32 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i8_const:
@@ -1258,15 +1247,14 @@ define void @avg_v64i8_const(<64 x i8>* %a) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = [7.9499288951273625E-275,7.9499288951273625E-275]
 ; AVX1-NEXT:    # xmm0 = mem[0,0]
-; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgb (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgb 16(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgb 32(%rdi), %xmm0, %xmm3
+; AVX1-NEXT:    vpavgb 48(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v64i8_const:
@@ -1365,11 +1353,10 @@ define void @avg_v16i16_const(<16 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v16i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v16i16_const:
@@ -1416,15 +1403,14 @@ define void @avg_v32i16_const(<32 x i16>* %a) nounwind {
 ; AVX1-LABEL: avg_v32i16_const:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
-; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm1
-; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm2
-; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
-; AVX1-NEXT:    vmovups %ymm1, (%rax)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpavgw (%rdi), %xmm0, %xmm1
+; AVX1-NEXT:    vpavgw 16(%rdi), %xmm0, %xmm2
+; AVX1-NEXT:    vpavgw 32(%rdi), %xmm0, %xmm3
+; AVX1-NEXT:    vpavgw 48(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm3, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: avg_v32i16_const:
@@ -1665,100 +1651,96 @@ define <512 x i8> @avg_v512i8_3(<512 x i8> %a, <512 x i8> %b) nounwind {
 ; AVX1-NEXT:    pushq %rbp
 ; AVX1-NEXT:    movq %rsp, %rbp
 ; AVX1-NEXT:    andq $-32, %rsp
-; AVX1-NEXT:    subq $96, %rsp
+; AVX1-NEXT:    subq $32, %rsp
 ; AVX1-NEXT:    movq %rdi, %rax
-; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpavgb 288(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm8, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpavgb 320(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm8, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rsp) # 32-byte Spill
-; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vpavgb 352(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm8, %ymm13
-; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
-; AVX1-NEXT:    vpavgb 384(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm8, %ymm14
-; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm4
-; AVX1-NEXT:    vpavgb 416(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm8, %ymm15
-; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm5
-; AVX1-NEXT:    vpavgb 448(%rbp), %xmm5, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm8, %ymm12
-; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm6
-; AVX1-NEXT:    vpavgb 480(%rbp), %xmm6, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm8, %ymm6
-; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm8
-; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm7
-; AVX1-NEXT:    vpavgb 512(%rbp), %xmm7, %xmm7
-; AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm8, %ymm7
-; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 528(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 544(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm8
-; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 560(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 576(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm9
-; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 592(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 608(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm10
-; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm0
-; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm1
-; AVX1-NEXT:    vpavgb 624(%rbp), %xmm0, %xmm0
-; AVX1-NEXT:    vpavgb 640(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm1
-; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm2
-; AVX1-NEXT:    vpavgb 656(%rbp), %xmm1, %xmm1
-; AVX1-NEXT:    vpavgb 672(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm2
-; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm3
-; AVX1-NEXT:    vpavgb 688(%rbp), %xmm2, %xmm2
-; AVX1-NEXT:    vpavgb 704(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm3
-; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm4
-; AVX1-NEXT:    vpavgb 720(%rbp), %xmm3, %xmm3
-; AVX1-NEXT:    vpavgb 736(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm4
-; AVX1-NEXT:    vpavgb 752(%rbp), %xmm4, %xmm4
-; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm11
-; AVX1-NEXT:    vpavgb 768(%rbp), %xmm11, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT:    vmovaps %ymm4, 480(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, 448(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, 416(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, 384(%rdi)
-; AVX1-NEXT:    vmovaps %ymm0, 352(%rdi)
-; AVX1-NEXT:    vmovaps %ymm10, 320(%rdi)
-; AVX1-NEXT:    vmovaps %ymm9, 288(%rdi)
-; AVX1-NEXT:    vmovaps %ymm8, 256(%rdi)
-; AVX1-NEXT:    vmovaps %ymm7, 224(%rdi)
-; AVX1-NEXT:    vmovaps %ymm6, 192(%rdi)
-; AVX1-NEXT:    vmovaps %ymm12, 160(%rdi)
-; AVX1-NEXT:    vmovaps %ymm15, 128(%rdi)
-; AVX1-NEXT:    vmovaps %ymm14, 96(%rdi)
-; AVX1-NEXT:    vmovaps %ymm13, 64(%rdi)
-; AVX1-NEXT:    vmovaps (%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
-; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovdqa 256(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 768(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 496(%rdi)
+; AVX1-NEXT:    vmovdqa 240(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 752(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 480(%rdi)
+; AVX1-NEXT:    vmovdqa 224(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 736(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 464(%rdi)
+; AVX1-NEXT:    vmovdqa 208(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 720(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 448(%rdi)
+; AVX1-NEXT:    vmovdqa 192(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 704(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 432(%rdi)
+; AVX1-NEXT:    vmovdqa 176(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 688(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 416(%rdi)
+; AVX1-NEXT:    vmovdqa 160(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 672(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 400(%rdi)
+; AVX1-NEXT:    vmovdqa 144(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 656(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 384(%rdi)
+; AVX1-NEXT:    vmovdqa 128(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 640(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 368(%rdi)
+; AVX1-NEXT:    vmovdqa 112(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 624(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 352(%rdi)
+; AVX1-NEXT:    vmovdqa 96(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 608(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 336(%rdi)
+; AVX1-NEXT:    vmovdqa 80(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 592(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 320(%rdi)
+; AVX1-NEXT:    vmovdqa 64(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 576(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 304(%rdi)
+; AVX1-NEXT:    vmovdqa 48(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 560(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 288(%rdi)
+; AVX1-NEXT:    vmovdqa 32(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 544(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 272(%rdi)
+; AVX1-NEXT:    vmovdqa 16(%rbp), %xmm8
+; AVX1-NEXT:    vpavgb 528(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 256(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm7, %xmm8
+; AVX1-NEXT:    vpavgb 512(%rbp), %xmm8, %xmm8
+; AVX1-NEXT:    vmovdqa %xmm8, 240(%rdi)
+; AVX1-NEXT:    vpavgb 496(%rbp), %xmm7, %xmm7
+; AVX1-NEXT:    vmovdqa %xmm7, 224(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm6, %xmm7
+; AVX1-NEXT:    vpavgb 480(%rbp), %xmm7, %xmm7
+; AVX1-NEXT:    vmovdqa %xmm7, 208(%rdi)
+; AVX1-NEXT:    vpavgb 464(%rbp), %xmm6, %xmm6
+; AVX1-NEXT:    vmovdqa %xmm6, 192(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
+; AVX1-NEXT:    vpavgb 448(%rbp), %xmm6, %xmm6
+; AVX1-NEXT:    vmovdqa %xmm6, 176(%rdi)
+; AVX1-NEXT:    vpavgb 432(%rbp), %xmm5, %xmm5
+; AVX1-NEXT:    vmovdqa %xmm5, 160(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm4, %xmm5
+; AVX1-NEXT:    vpavgb 416(%rbp), %xmm5, %xmm5
+; AVX1-NEXT:    vmovdqa %xmm5, 144(%rdi)
+; AVX1-NEXT:    vpavgb 400(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 128(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vpavgb 384(%rbp), %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 112(%rdi)
+; AVX1-NEXT:    vpavgb 368(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa %xmm3, 96(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpavgb 352(%rbp), %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa %xmm3, 80(%rdi)
+; AVX1-NEXT:    vpavgb 336(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm2, 64(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpavgb 320(%rbp), %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm2, 48(%rdi)
+; AVX1-NEXT:    vpavgb 304(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 32(%rdi)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpavgb 288(%rbp), %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vpavgb 272(%rbp), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    movq %rbp, %rsp
 ; AVX1-NEXT:    popq %rbp
 ; AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index 8f0ec5030eb03..9706bf3455fef 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -725,12 +725,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
-; X86-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
-; X86-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
-; X86-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
-; X86-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
-; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
+; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
+; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
+; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
+; X86-AVX-NEXT:    vmovdqu %xmm0, 16(%eax) # encoding: [0xc5,0xfa,0x7f,0x40,0x10]
+; X86-AVX-NEXT:    vmovdqu %xmm2, (%eax) # encoding: [0xc5,0xfa,0x7f,0x10]
 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
@@ -745,12 +745,12 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
 ;
 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
 ; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
-; X64-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
-; X64-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
-; X64-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
-; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
-; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
+; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
+; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm2 # encoding: [0xc5,0xf9,0xf8,0xd1]
+; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
+; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
+; X64-AVX-NEXT:    vmovdqu %xmm0, 16(%rdi) # encoding: [0xc5,0xfa,0x7f,0x47,0x10]
+; X64-AVX-NEXT:    vmovdqu %xmm2, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x17]
 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 2fd2b863859c7..8e48289c1042e 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -916,8 +916,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
-; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-AVX-NEXT:    vmovntdq %xmm0, (%eax) # encoding: [0xc5,0xf9,0xe7,0x00]
 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX512VL-LABEL: movnt_dq:
@@ -925,24 +924,21 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
-; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-AVX512VL-NEXT:    vmovntdq %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x00]
 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-AVX-LABEL: movnt_dq:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
-; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-AVX-NEXT:    vmovntdq %xmm0, (%rdi) # encoding: [0xc5,0xf9,0xe7,0x07]
 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX512VL-LABEL: movnt_dq:
 ; X64-AVX512VL:       # %bb.0:
 ; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
 ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
-; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
-; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-AVX512VL-NEXT:    vmovntdq %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe7,0x07]
 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
index ba451973faa04..1ce08c01773d1 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc-widen.ll
@@ -462,12 +462,10 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -672,8 +670,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -952,8 +950,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll
index c15d33222ca0e..263f7c90441d4 100644
--- a/llvm/test/CodeGen/X86/avx512-trunc.ll
+++ b/llvm/test/CodeGen/X86/avx512-trunc.ll
@@ -458,12 +458,10 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
 define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
 ; KNL-LABEL: trunc_wb_512_mem:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; KNL-NEXT:    vpmovdb %zmm1, %xmm1
-; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; KNL-NEXT:    vmovdqa %ymm0, (%rdi)
+; KNL-NEXT:    vpmovdb %zmm1, 16(%rdi)
+; KNL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; KNL-NEXT:    vpmovdb %zmm0, (%rdi)
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ;
@@ -667,8 +665,8 @@ define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -948,8 +946,8 @@ define void @smax_usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
 ; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
 ; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vmovdqu %ymm0, (%rdi)
+; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
+; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
index 5b39cb16afec7..aa3e7cda18c0a 100644
--- a/llvm/test/CodeGen/X86/nontemporal-2.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -1061,12 +1061,12 @@ define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v8i32:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1126,12 +1126,12 @@ define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v4i64:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1164,12 +1164,12 @@ define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v16i16:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1202,12 +1202,12 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
 ;
 ; AVX1-LABEL: test_op_v32i8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovntps %ymm0, (%rdi)
+; AVX1-NEXT:    vmovntdq %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovntdq %xmm2, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index 69ea53e7e9c82..9bc6c0f380a07 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -116,13 +116,14 @@ define void @PR40815(%struct.Mat4* nocapture readonly dereferenceable(64), %stru
 ;
 ; AVX-LABEL: PR40815:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vmovaps 16(%rdi), %xmm0
-; AVX-NEXT:    vmovaps 48(%rdi), %xmm1
-; AVX-NEXT:    vinsertf128 $1, 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0
-; AVX-NEXT:    vmovups %ymm1, (%rsi)
-; AVX-NEXT:    vmovups %ymm0, 32(%rsi)
-; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    vmovaps (%rdi), %xmm0
+; AVX-NEXT:    vmovaps 16(%rdi), %xmm1
+; AVX-NEXT:    vmovaps 32(%rdi), %xmm2
+; AVX-NEXT:    vmovaps 48(%rdi), %xmm3
+; AVX-NEXT:    vmovaps %xmm2, 16(%rsi)
+; AVX-NEXT:    vmovaps %xmm3, (%rsi)
+; AVX-NEXT:    vmovaps %xmm0, 48(%rsi)
+; AVX-NEXT:    vmovaps %xmm1, 32(%rsi)
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: PR40815:
diff --git a/llvm/test/CodeGen/X86/pmovsx-inreg.ll b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
index 9ab6917966b38..f89223fa45834 100644
--- a/llvm/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
@@ -53,12 +53,12 @@ define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test2:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbq 2(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -134,12 +134,12 @@ define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test4:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbd 4(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -215,12 +215,12 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
 ;
 ; AVX1-LABEL: test6:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -296,12 +296,12 @@ define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test8:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwq 4(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -377,12 +377,12 @@ define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
 ;
 ; AVX1-LABEL: test10:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -458,12 +458,12 @@ define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
 ;
 ; AVX1-LABEL: test12:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vmovdqu %ymm1, (%rax)
-; AVX1-NEXT:    vmovups %ymm0, (%rsi)
+; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm1
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vmovups %ymm2, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm1, 16(%rsi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rsi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
index 0ed79ea4af70b..7599858007407 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul-widen.ll
@@ -215,10 +215,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -261,9 +260,8 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -349,12 +347,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -417,11 +414,10 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -648,10 +644,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -693,9 +688,8 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -780,12 +774,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -847,11 +840,10 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1284,24 +1276,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1351,23 +1342,22 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2201,8 +2191,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2255,14 +2245,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2435,8 +2424,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2486,13 +2475,12 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
-; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll
index 0c8949f246177..5e952472f7577 100644
--- a/llvm/test/CodeGen/X86/shrink_vmul.ll
+++ b/llvm/test/CodeGen/X86/shrink_vmul.ll
@@ -209,10 +209,9 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi8:
@@ -255,9 +254,8 @@ define void @mul_8xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi8:
@@ -343,12 +341,11 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X86-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi8:
@@ -411,11 +408,10 @@ define void @mul_16xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmaddwd %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; X64-AVX1-NEXT:    vpmaddwd %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi8:
@@ -640,10 +636,9 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_8xi16:
@@ -685,9 +680,8 @@ define void @mul_8xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i64
 ; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm2, %xmm0
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_8xi16:
@@ -772,12 +766,11 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16:
@@ -839,11 +832,10 @@ define void @mul_16xi16(i8* nocapture readonly %a, i8* nocapture readonly %b, i6
 ; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16:
@@ -1258,24 +1250,23 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX1-NEXT:    movl c, %esi
-; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm0
-; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm2
-; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm3
-; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 24(%edx,%ecx), %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%edx,%ecx), %xmm1
+; X86-AVX1-NEXT:    vpmovsxwd 8(%edx,%ecx), %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%edx,%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpmovsxwd 24(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X86-AVX1-NEXT:    vpmovsxwd 16(%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
-; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X86-AVX1-NEXT:    vpmovsxwd 8(%eax,%ecx), %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpmovsxwd (%eax,%ecx), %xmm4
 ; X86-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X86-AVX1-NEXT:    vmovups %ymm0, 32(%esi,%ecx,4)
-; X86-AVX1-NEXT:    vmovups %ymm2, (%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm0, 48(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm1, 32(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm2, 16(%esi,%ecx,4)
+; X86-AVX1-NEXT:    vmovdqu %xmm3, (%esi,%ecx,4)
 ; X86-AVX1-NEXT:    popl %esi
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: mul_16xi16_sext:
@@ -1325,23 +1316,22 @@ define void @mul_16xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %
 ; X64-AVX1-LABEL: mul_16xi16_sext:
 ; X64-AVX1:       # %bb.0: # %entry
 ; X64-AVX1-NEXT:    movq {{.*}}(%rip), %rax
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm0
-; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm2
-; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm3
-; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 24(%rdi,%rdx), %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rdi,%rdx), %xmm1
+; X64-AVX1-NEXT:    vpmovsxwd 8(%rdi,%rdx), %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rdi,%rdx), %xmm3
 ; X64-AVX1-NEXT:    vpmovsxwd 24(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm0, %xmm4, %xmm0
+; X64-AVX1-NEXT:    vpmovsxwd 16(%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm4, %xmm1
-; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
-; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
 ; X64-AVX1-NEXT:    vpmovsxwd 8(%rsi,%rdx), %xmm4
+; X64-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X64-AVX1-NEXT:    vpmovsxwd (%rsi,%rdx), %xmm4
 ; X64-AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovups %ymm0, 32(%rax,%rdx,4)
-; X64-AVX1-NEXT:    vmovups %ymm2, (%rax,%rdx,4)
-; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    vmovdqu %xmm0, 48(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm1, 32(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm2, 16(%rax,%rdx,4)
+; X64-AVX1-NEXT:    vmovdqu %xmm3, (%rax,%rdx,4)
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: mul_16xi16_sext:
@@ -2157,8 +2147,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    divl 32(%ecx)
 ; X86-AVX1-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm3
-; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa (%ecx), %xmm1
+; X86-AVX1-NEXT:    vmovdqa 16(%ecx), %xmm3
 ; X86-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X86-AVX1-NEXT:    xorl %edx, %edx
 ; X86-AVX1-NEXT:    divl %ecx
@@ -2211,14 +2201,13 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199]
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm1, (%eax)
+; X86-AVX1-NEXT:    vmovdqa %xmm0, (%eax)
 ; X86-AVX1-NEXT:    addl $16, %esp
 ; X86-AVX1-NEXT:    popl %esi
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
-; X86-AVX1-NEXT:    vzeroupper
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR34947:
@@ -2391,8 +2380,8 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    divl 32(%rsi)
 ; X64-AVX1-NEXT:    movl %edx, %r8d
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm2, %eax
-; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm3
-; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa (%rsi), %xmm1
+; X64-AVX1-NEXT:    vmovdqa 16(%rsi), %xmm3
 ; X64-AVX1-NEXT:    vpextrd $3, %xmm3, %ecx
 ; X64-AVX1-NEXT:    xorl %edx, %edx
 ; X64-AVX1-NEXT:    divl %ecx
@@ -2442,13 +2431,12 @@ define void @PR34947(<9 x i16>* %p0, <9 x i32>* %p1) nounwind {
 ; X64-AVX1-NEXT:    vpinsrd $2, %r10d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpinsrd $3, %r9d, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpmulld %xmm1, %xmm2, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    imull $8199, %r8d, %eax # imm = 0x2007
 ; X64-AVX1-NEXT:    movl %eax, (%rax)
-; X64-AVX1-NEXT:    vmovaps %ymm0, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm1, (%rax)
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rax)
 ; X64-AVX1-NEXT:    popq %rbx
 ; X64-AVX1-NEXT:    popq %rbp
-; X64-AVX1-NEXT:    vzeroupper
 ; X64-AVX1-NEXT:    retq
 ;
 ; X64-AVX2-LABEL: PR34947:
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
index 1a6bdd3aaa407..737925eca0440 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512-widen.ll
@@ -88,23 +88,21 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
index 19031bbb2c0f8..6f94e0c608683 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
@@ -88,23 +88,21 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
 ; AVX512F-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512F-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc_v32i16_to_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vmovdqa (%rdi), %ymm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqa %ymm0, (%rsi)
+; AVX512VL-NEXT:    vpmovdb %zmm1, 16(%rsi)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rsi)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 3ce584eff2a9e..7ecfac5151f2e 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -835,24 +835,24 @@ define <16 x i32> @test_broadcast_4i32_16i32_chain(<4 x i32>* %p0, <4 x float>*
 define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ; X32-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X32-AVX1:       # %bb.0: # %entry
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,0,4,0]
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,0,2,0]
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,0,2,0]
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,0,4,0]
 ; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,0,2,0,3,0,4,0]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; X32-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
-; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
-; X32-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X32-AVX1-NEXT:    vmovups %ymm0, ga4
+; X32-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,0,2,0,3,0,4,0]
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
+; X32-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
+; X32-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
+; X32-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
+; X32-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
+; X32-AVX1-NEXT:    vmovdqu %xmm0, ga4+16
+; X32-AVX1-NEXT:    vmovdqu %xmm4, ga4
 ; X32-AVX1-NEXT:    vmovups %ymm2, gb4+32
 ; X32-AVX1-NEXT:    vmovups %ymm1, gb4
 ; X32-AVX1-NEXT:    vzeroupper
@@ -886,24 +886,24 @@ define void @fallback_broadcast_v4i64_to_v8i64(<4 x i64> %a, <8 x i64> %b) {
 ;
 ; X64-AVX1-LABEL: fallback_broadcast_v4i64_to_v8i64:
 ; X64-AVX1:       # %bb.0: # %entry
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [3,4]
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm3, %xmm3
-; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2]
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2]
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm4
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [3,4]
 ; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm3 = [1,2,3,4]
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm2, %xmm2
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
-; X64-AVX1-NEXT:    vpaddq %xmm4, %xmm6, %xmm4
-; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm1, %xmm1
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
-; X64-AVX1-NEXT:    vandps %ymm3, %ymm2, %ymm2
-; X64-AVX1-NEXT:    vmovups %ymm0, {{.*}}(%rip)
+; X64-AVX1-NEXT:    vmovaps {{.*#+}} ymm6 = [1,2,3,4]
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm7, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm7
+; X64-AVX1-NEXT:    vpaddq %xmm5, %xmm7, %xmm5
+; X64-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm6, %ymm1, %ymm1
+; X64-AVX1-NEXT:    vandps %ymm6, %ymm2, %ymm2
+; X64-AVX1-NEXT:    vmovdqu %xmm0, ga4+{{.*}}(%rip)
+; X64-AVX1-NEXT:    vmovdqu %xmm4, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm2, gb4+{{.*}}(%rip)
 ; X64-AVX1-NEXT:    vmovups %ymm1, {{.*}}(%rip)
 ; X64-AVX1-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec_fptrunc.ll b/llvm/test/CodeGen/X86/vec_fptrunc.ll
index bb6be6cd9e84b..e7318d9d69723 100644
--- a/llvm/test/CodeGen/X86/vec_fptrunc.ll
+++ b/llvm/test/CodeGen/X86/vec_fptrunc.ll
@@ -99,9 +99,8 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
 ; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
-; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
-; X32-AVX-NEXT:    vzeroupper
+; X32-AVX-NEXT:    vmovupd %xmm1, 16(%eax)
+; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
 ; X32-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: fptrunc_frommem8:
@@ -120,9 +119,8 @@ define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
 ; X64-AVX:       # %bb.0: # %entry
 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
 ; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
-; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
-; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    vmovupd %xmm1, 16(%rsi)
+; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
 ; X64-AVX-NEXT:    retq
 entry:
   %0 = load <8 x double>, <8 x double>* %in
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index aeb1951fbef87..d37795b55cdc2 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -693,8 +693,8 @@ define <8 x i32> @saddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v8i32:
@@ -824,48 +824,48 @@ define <16 x i32> @saddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
 ; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm10
-; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpandn %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm8, %xmm3, %xmm8
+; AVX1-NEXT:    vpcmpgtd %xmm10, %xmm5, %xmm1
+; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm8, %xmm1, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm7
+; AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm5, %xmm1
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm1, %xmm7
-; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm3, %xmm1
 ; AVX1-NEXT:    vpandn %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm4
-; AVX1-NEXT:    vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm3
+; AVX1-NEXT:    vpxor %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm7
 ; AVX1-NEXT:    vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm7, %xmm4
-; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm5, %xmm2
-; AVX1-NEXT:    vpxor %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
-; AVX1-NEXT:    vpandn %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm3
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm7, %xmm3
+; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm5, %xmm0
+; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm7, %xmm0
+; AVX1-NEXT:    vpandn %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm9, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm10, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index ab97c51df410b..3f53f9f2250cc 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -973,8 +973,8 @@ define <8 x i32> @smulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm5, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v8i32:
@@ -1266,59 +1266,59 @@ define <16 x i32> @smulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpmuldq %xmm4, %xmm6, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
-; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm4
-; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
+; AVX1-NEXT:    vpmulld %xmm4, %xmm6, %xmm8
+; AVX1-NEXT:    vpsrad $31, %xmm8, %xmm6
 ; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
-; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm7, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmuldq %xmm3, %xmm1, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm7[0,1],xmm5[2,3],xmm7[4,5],xmm5[6,7]
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm5, %xmm3
-; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm9
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3],xmm7[4,5],xmm4[6,7]
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm9
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm7[1,1,3,3]
-; AVX1-NEXT:    vpmuldq %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpmuldq %xmm5, %xmm7, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm7[1,1,3,3]
+; AVX1-NEXT:    vpmuldq %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpmuldq %xmm4, %xmm7, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm6[0,1],xmm3[2,3],xmm6[4,5],xmm3[6,7]
-; AVX1-NEXT:    vpmulld %xmm5, %xmm7, %xmm5
-; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm6
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3],xmm6[4,5],xmm1[6,7]
+; AVX1-NEXT:    vpmulld %xmm4, %xmm7, %xmm4
+; AVX1-NEXT:    vpsrad $31, %xmm4, %xmm6
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[1,1,3,3]
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[1,1,3,3]
 ; AVX1-NEXT:    vpmuldq %xmm6, %xmm7, %xmm6
 ; AVX1-NEXT:    vpmuldq %xmm2, %xmm0, %xmm7
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3],xmm7[4,5],xmm6[6,7]
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm6, %xmm2
-; AVX1-NEXT:    vpxor %xmm8, %xmm2, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm9, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm4
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm9, %xmm0, %xmm1
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm8, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: smulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 15c0531d67a75..3dc73e3b4ba1e 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -714,8 +714,8 @@ define <8 x i32> @ssubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm0, %ymm8, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm6, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v8i32:
@@ -850,52 +850,52 @@ define <16 x i32> @ssubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm4
 ; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
 ; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm4, %xmm7
-; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm9, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpandn %xmm3, %xmm7, %xmm3
-; AVX1-NEXT:    vpackssdw %xmm6, %xmm3, %xmm8
+; AVX1-NEXT:    vpsubd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm9, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm4, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm1, %xmm7, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm6, %xmm1, %xmm8
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
 ; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
-; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm3, %xmm6
-; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm4
-; AVX1-NEXT:    vpcmpgtd %xmm4, %xmm9, %xmm7
-; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpandn %xmm3, %xmm6, %xmm3
-; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm6
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm1, %xmm6
+; AVX1-NEXT:    vpsubd %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtd %xmm7, %xmm9, %xmm4
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpandn %xmm1, %xmm6, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm4
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm6
 ; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm7
-; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT:    vpcmpeqd %xmm6, %xmm7, %xmm6
-; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm9, %xmm2
-; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm7, %xmm2
-; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpandn %xmm2, %xmm6, %xmm2
-; AVX1-NEXT:    vpackssdw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpacksswb %xmm8, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm10, %ymm1, %ymm4
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpsubd %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm9, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpandn %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm8, %xmm0, %xmm1
+; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm4, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm3, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm10, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm7, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 41a0e258e3d12..4e9cd2efb74b2 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -501,8 +501,8 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v8i32:
@@ -633,19 +633,19 @@ define <16 x i32> @uaddo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: uaddo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 0c95b73853e96..0bcaacc21dfe4 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -843,10 +843,10 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm5, %xmm5
 ; AVX1-NEXT:    vpxor %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm5, %ymm2
-; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm3
 ; AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vpmulld %xmm3, %xmm4, %xmm1
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    vmovaps %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1111,23 +1111,23 @@ define <16 x i32> @umulo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm9, %xmm5, %xmm5
 ; AVX1-NEXT:    vpackssdw %xmm13, %xmm5, %xmm5
 ; AVX1-NEXT:    vpacksswb %xmm11, %xmm5, %xmm5
+; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm2
 ; AVX1-NEXT:    vpmulld %xmm6, %xmm4, %xmm4
-; AVX1-NEXT:    vpmulld %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm2
-; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm0
-; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm3
+; AVX1-NEXT:    vpmulld %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpmulld %xmm10, %xmm12, %xmm6
 ; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[1,1,2,3]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm5[2,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm5[3,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[3,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm6, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: umulo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index b662ac45caf60..c5a7b19cf14dd 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -525,8 +525,8 @@ define <8 x i32> @usubo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) noun
 ; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v8i32:
@@ -671,19 +671,19 @@ define <16 x i32> @usubo_v16i32(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %p2)
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackssdw %xmm7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
-; AVX1-NEXT:    vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm0, %ymm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbd %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
 ; AVX1-NEXT:    vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vmovaps %ymm3, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm2, (%rdi)
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
+; AVX1-NEXT:    vmovdqa %xmm4, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: usubo_v16i32:
diff --git a/llvm/test/CodeGen/X86/vector-gep.ll b/llvm/test/CodeGen/X86/vector-gep.ll
index 8f62fe5382564..693380a48ee22 100644
--- a/llvm/test/CodeGen/X86/vector-gep.ll
+++ b/llvm/test/CodeGen/X86/vector-gep.ll
@@ -122,74 +122,88 @@ define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind {
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-32, %esp
-; CHECK-NEXT:    subl $96, %esp
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm4
-; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm3
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
+; CHECK-NEXT:    subl $160, %esp
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm3
+; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm5
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
+; CHECK-NEXT:    vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; CHECK-NEXT:    vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa %xmm0, (%esp) # 16-byte Spill
+; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm4
 ; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
+; CHECK-NEXT:    vpaddd %xmm4, %xmm5, %xmm4
+; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm1
 ; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm0
-; CHECK-NEXT:    vmovaps %ymm0, (%esp) # 32-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
-; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
-; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
-; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm4
-; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
-; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
-; CHECK-NEXT:    vpaddd %xmm4, %xmm3, %xmm4
-; CHECK-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm5
-; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
-; CHECK-NEXT:    vpaddd %xmm5, %xmm5, %xmm5
-; CHECK-NEXT:    vpaddd %xmm5, %xmm3, %xmm5
-; CHECK-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
-; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm6
-; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
+; CHECK-NEXT:    vpaddd %xmm1, %xmm5, %xmm1
+; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm6
 ; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
-; CHECK-NEXT:    vpaddd %xmm6, %xmm3, %xmm6
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm6, %ymm6
+; CHECK-NEXT:    vpaddd %xmm6, %xmm5, %xmm6
+; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm2
+; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpaddd %xmm2, %xmm5, %xmm2
 ; CHECK-NEXT:    vmovdqa 152(%ebp), %xmm7
 ; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
-; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm0
+; CHECK-NEXT:    vpaddd %xmm7, %xmm5, %xmm7
+; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm0
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm0, %ymm0
-; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
-; CHECK-NEXT:    vpaddd %xmm7, %xmm3, %xmm7
-; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
-; CHECK-NEXT:    vinsertf128 $1, %xmm7, %ymm1, %ymm1
+; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
+; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
+; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
 ; CHECK-NEXT:    movl 8(%ebp), %eax
-; CHECK-NEXT:    vmovaps %ymm1, 224(%eax)
-; CHECK-NEXT:    vmovaps %ymm0, 192(%eax)
-; CHECK-NEXT:    vmovaps %ymm6, 160(%eax)
-; CHECK-NEXT:    vmovaps %ymm5, 128(%eax)
-; CHECK-NEXT:    vmovaps %ymm4, 96(%eax)
-; CHECK-NEXT:    vmovaps %ymm2, 64(%eax)
-; CHECK-NEXT:    vmovaps (%esp), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vmovaps %ymm0, 32(%eax)
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vmovaps %ymm0, (%eax)
+; CHECK-NEXT:    vmovdqa %xmm3, 240(%eax)
+; CHECK-NEXT:    vmovdqa %xmm0, 224(%eax)
+; CHECK-NEXT:    vmovdqa %xmm7, 208(%eax)
+; CHECK-NEXT:    vmovdqa %xmm2, 192(%eax)
+; CHECK-NEXT:    vmovdqa %xmm6, 176(%eax)
+; CHECK-NEXT:    vmovdqa %xmm1, 160(%eax)
+; CHECK-NEXT:    vmovdqa %xmm4, 144(%eax)
+; CHECK-NEXT:    vmovaps (%esp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 128(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 112(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 96(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 80(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 64(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 48(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 32(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, 16(%eax)
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    vmovaps %xmm0, (%eax)
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index 54ebdbe026aa7..6a504269b9380 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -668,14 +668,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -686,8 +686,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -717,16 +717,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -792,16 +792,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1291,14 +1291,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1317,23 +1317,19 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 56e86a6bc95fd..0027fbe2657de 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -678,14 +678,14 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX1-LABEL: trunc16i32_16i16:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -696,8 +696,8 @@ define void @trunc16i32_16i16(<16 x i32> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -727,16 +727,16 @@ define void @trunc16i32_16i16_ashr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_ashr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrad $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrad $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -802,16 +802,16 @@ define void @trunc16i32_16i16_lshr(<16 x i32> %a) {
 ;
 ; AVX1-LABEL: trunc16i32_16i16_lshr:
 ; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1301,14 +1301,14 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ; AVX1-LABEL: trunc32i16_32i8:
 ; AVX1:       # %bb.0: # %entry
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rax)
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqu %xmm1, (%rax)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1327,23 +1327,19 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
 ;
 ; AVX512F-LABEL: trunc32i16_32i8:
 ; AVX512F:       # %bb.0: # %entry
-; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: trunc32i16_32i8:
 ; AVX512VL:       # %bb.0: # %entry
-; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
-; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512VL-NEXT:    vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT:    vpmovdb %zmm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index bff39467c1eb2..8cd01b631d601 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -341,11 +341,10 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, (%rdi)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vmovdqa %xmm0, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride4:
@@ -358,11 +357,10 @@ define void @interleaved_store_vf16_i8_stride4(<16 x i8> %x1, <16 x i8> %x2, <16
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm3, %ymm1
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm4, %ymm0
-; AVX2-NEXT:    vmovdqa %ymm0, 32(%rdi)
-; AVX2-NEXT:    vmovdqa %ymm1, (%rdi)
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vmovdqa %xmm0, 48(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; AVX2-NEXT:    vmovdqa %xmm3, (%rdi)
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride4:
@@ -888,37 +886,20 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(<128 x i8>* %ptr) {
 }
 
 define void @interleaved_store_vf8_i8_stride4(<8 x i8> %x1, <8 x i8> %x2, <8 x i8> %x3, <8 x i8> %x4, <32 x i8>* %p) {
-; AVX1-LABEL: interleaved_store_vf8_i8_stride4:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
-; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2OR512-LABEL: interleaved_store_vf8_i8_stride4:
-; AVX2OR512:       # %bb.0:
-; AVX2OR512-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
-; AVX2OR512-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
-; AVX2OR512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; AVX2OR512-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2OR512-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX2OR512-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
-; AVX2OR512-NEXT:    vmovdqa %ymm0, (%rdi)
-; AVX2OR512-NEXT:    vzeroupper
-; AVX2OR512-NEXT:    retq
+; AVX-LABEL: interleaved_store_vf8_i8_stride4:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
+; AVX-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vpshufb %xmm4, %xmm3, %xmm1
+; AVX-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
+; AVX-NEXT:    vmovdqa %xmm2, (%rdi)
+; AVX-NEXT:    retq
 %v1 = shufflevector <8 x i8> %x1, <8 x i8> %x2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %v2 = shufflevector <8 x i8> %x3, <8 x i8> %x4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 %interleaved.vec = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0,i32 8,i32 16,i32 24,i32 1,i32 9,i32 17,i32 25,i32 2,i32 10,i32 18,i32 26,i32 3,i32 11,i32 19,i32 27,i32 4,i32 12,i32 20,i32 28,i32 5,i32 13,i32 21,i32 29,i32 6,i32 14,i32 22,i32 30,i32 7,i32 15,i32 23,i32 31>
@@ -1096,10 +1077,9 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rdi)
 ; AVX1-NEXT:    vmovdqu %xmm2, 32(%rdi)
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleaved_store_vf16_i8_stride3:
@@ -1116,10 +1096,9 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
 ; AVX2-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
-; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX2-NEXT:    vmovdqu %xmm1, (%rdi)
 ; AVX2-NEXT:    vmovdqu %xmm2, 32(%rdi)
-; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: interleaved_store_vf16_i8_stride3:

From c3c23b27a4d1f6eed39d17b6b5ca709b7b405e8e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 4 Jun 2019 16:47:18 +0000
Subject: [PATCH 1041/1176] [libcxx] Add test to check min/max requirement to
 regular expression

This commit adds tests that repeated characters in regular expressions
are within numeric limits, and that a <= b in a regex like `x{a,b}`.

Thanks to Andrey Maksimov for the patch.

Differential Revision: https://reviews.llvm.org/D62816

llvm-svn: 362525
---
 .../excessive_brace_min_max.pass.cpp          | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 libcxx/test/std/re/re.grammar/excessive_brace_min_max.pass.cpp

diff --git a/libcxx/test/std/re/re.grammar/excessive_brace_min_max.pass.cpp b/libcxx/test/std/re/re.grammar/excessive_brace_min_max.pass.cpp
new file mode 100644
index 0000000000000..09bedc5bd8e2e
--- /dev/null
+++ b/libcxx/test/std/re/re.grammar/excessive_brace_min_max.pass.cpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <regex>
+// UNSUPPORTED: libcpp-no-exceptions
+// UNSUPPORTED: c++98, c++03
+
+// the "n" and "m" in `a{n,m}` should be within the numeric limits.
+// requirement "m >= n" should be checked.
+
+#include <regex>
+#include <cassert>
+#include "test_macros.h"
+
+int main(int, char**) {
+  // test that `n <= m`
+  for (std::regex_constants::syntax_option_type op :
+       {std::regex::basic}) {
+    try {
+      TEST_IGNORE_NODISCARD std::regex("a\\{3,2\\}", op);
+      assert(false);
+    } catch (const std::regex_error &e) {
+      assert(e.code() == std::regex_constants::error_badbrace);
+      LIBCPP_ASSERT(e.code() == std::regex_constants::error_badbrace);
+    }
+  }
+  for (std::regex_constants::syntax_option_type op :
+       {std::regex::ECMAScript, std::regex::extended, std::regex::egrep,
+        std::regex::awk}) {
+    try {
+      TEST_IGNORE_NODISCARD std::regex("a{3,2}", op);
+      assert(false);
+    } catch (const std::regex_error &e) {
+      assert(e.code() == std::regex_constants::error_badbrace);
+      LIBCPP_ASSERT(e.code() == std::regex_constants::error_badbrace);
+    }
+  }
+
+  // test that both bounds are within the limit
+  for (std::regex_constants::syntax_option_type op :
+       {std::regex::basic}) {
+    try {
+      TEST_IGNORE_NODISCARD std::regex("a\\{100000000000000000000,10000000000000000000\\}", op);
+      assert(false);
+    } catch (const std::regex_error &e) {
+      assert(e.code() == std::regex_constants::error_badbrace);
+      LIBCPP_ASSERT(e.code() == std::regex_constants::error_badbrace);
+    }
+  }
+  for (std::regex_constants::syntax_option_type op :
+       {std::regex::ECMAScript, std::regex::extended, std::regex::egrep,
+        std::regex::awk}) {
+    try {
+      TEST_IGNORE_NODISCARD std::regex("a{100000000000000000000,10000000000000000000}", op);
+      assert(false);
+    } catch (const std::regex_error &e) {
+      assert(e.code() == std::regex_constants::error_badbrace);
+      LIBCPP_ASSERT(e.code() == std::regex_constants::error_badbrace);
+    }
+  }
+  return 0;
+}

From c33944832c3821d83402a866bba22f71d21a09a0 Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Tue, 4 Jun 2019 16:55:30 +0000
Subject: [PATCH 1042/1176] [MACHO] Replaced calls to getStruct with
 getStructOrErr in functions returning Error or Expected or similar

llvm-svn: 362526
---
 llvm/lib/Object/MachOObjectFile.cpp | 121 ++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index ba811071b9774..1b7392e102957 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -291,7 +291,10 @@ static Error parseSegmentLoadCommand(
     for (unsigned J = 0; J < S.nsects; ++J) {
       const char *Sec = getSectionPtr(Obj, Load, J);
       Sections.push_back(Sec);
-      Section s = getStruct<Section>(Obj, Sec);
+      auto SectionOrErr = getStructOrErr<Section>(Obj, Sec);
+      if (!SectionOrErr)
+        return SectionOrErr.takeError();
+      Section s = SectionOrErr.get();
       if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
           Obj.getHeader().filetype != MachO::MH_DSYM &&
           s.flags != MachO::S_ZEROFILL &&
@@ -401,8 +404,10 @@ static Error checkSymtabCommand(const MachOObjectFile &Obj,
                           " LC_SYMTAB cmdsize too small");
   if (*SymtabLoadCmd != nullptr)
     return malformedError("more than one LC_SYMTAB command");
-  MachO::symtab_command Symtab =
-    getStruct<MachO::symtab_command>(Obj, Load.Ptr);
+  auto SymtabOrErr = getStructOrErr<MachO::symtab_command>(Obj, Load.Ptr);
+  if (!SymtabOrErr)
+    return SymtabOrErr.takeError();
+  MachO::symtab_command Symtab = SymtabOrErr.get();
   if (Symtab.cmdsize != sizeof(MachO::symtab_command))
     return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) +
                           " has incorrect cmdsize");
@@ -457,8 +462,11 @@ static Error checkDysymtabCommand(const MachOObjectFile &Obj,
                           " LC_DYSYMTAB cmdsize too small");
   if (*DysymtabLoadCmd != nullptr)
     return malformedError("more than one LC_DYSYMTAB command");
-  MachO::dysymtab_command Dysymtab =
-    getStruct<MachO::dysymtab_command>(Obj, Load.Ptr);
+  auto DysymtabOrErr =
+    getStructOrErr<MachO::dysymtab_command>(Obj, Load.Ptr);
+  if (!DysymtabOrErr)
+    return DysymtabOrErr.takeError();
+  MachO::dysymtab_command Dysymtab = DysymtabOrErr.get();
   if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command))
     return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) +
                           " has incorrect cmdsize");
@@ -588,8 +596,11 @@ static Error checkLinkeditDataCommand(const MachOObjectFile &Obj,
                           CmdName + " cmdsize too small");
   if (*LoadCmd != nullptr)
     return malformedError("more than one " + Twine(CmdName) + " command");
-  MachO::linkedit_data_command LinkData =
-    getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr);
+  auto LinkDataOrError =
+    getStructOrErr<MachO::linkedit_data_command>(Obj, Load.Ptr);
+  if (!LinkDataOrError)
+    return LinkDataOrError.takeError();
+  MachO::linkedit_data_command LinkData = LinkDataOrError.get();
   if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command))
     return malformedError(Twine(CmdName) + " command " +
                           Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -623,8 +634,11 @@ static Error checkDyldInfoCommand(const MachOObjectFile &Obj,
   if (*LoadCmd != nullptr)
     return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY "
                           "command");
-  MachO::dyld_info_command DyldInfo =
-    getStruct<MachO::dyld_info_command>(Obj, Load.Ptr);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(Obj, Load.Ptr);
+  if (!DyldInfoOrErr)
+    return DyldInfoOrErr.takeError();
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command))
     return malformedError(Twine(CmdName) + " command " +
                           Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -714,7 +728,10 @@ static Error checkDylibCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::dylib_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " cmdsize too small");
-  MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr);
+  auto CommandOrErr = getStructOrErr<MachO::dylib_command>(Obj, Load.Ptr);
+  if (!CommandOrErr)
+    return CommandOrErr.takeError();
+  MachO::dylib_command D = CommandOrErr.get();
   if (D.dylib.name < sizeof(MachO::dylib_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " name.offset field too small, not past "
@@ -760,7 +777,10 @@ static Error checkDyldCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::dylinker_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " cmdsize too small");
-  MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr);
+  auto CommandOrErr = getStructOrErr<MachO::dylinker_command>(Obj, Load.Ptr);
+  if (!CommandOrErr)
+    return CommandOrErr.takeError();
+  MachO::dylinker_command D = CommandOrErr.get();
   if (D.name < sizeof(MachO::dylinker_command))
     return malformedError("load command " + Twine(LoadCommandIndex) + " " +
                           CmdName + " name.offset field too small, not past "
@@ -805,7 +825,10 @@ static Error checkNoteCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize != sizeof(MachO::note_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_NOTE has incorrect cmdsize");
-  MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr);
+  auto NoteCmdOrErr = getStructOrErr<MachO::note_command>(Obj, Load.Ptr);
+  if (!NoteCmdOrErr)
+    return NoteCmdOrErr.takeError();
+  MachO::note_command Nt = NoteCmdOrErr.get();
   uint64_t FileSize = Obj.getData().size();
   if (Nt.offset > FileSize)
     return malformedError("offset field of LC_NOTE command " +
@@ -828,8 +851,11 @@ parseBuildVersionCommand(const MachOObjectFile &Obj,
                          const MachOObjectFile::LoadCommandInfo &Load,
                          SmallVectorImpl<const char*> &BuildTools,
                          uint32_t LoadCommandIndex) {
-  MachO::build_version_command BVC =
-      getStruct<MachO::build_version_command>(Obj, Load.Ptr);
+  auto BVCOrErr =
+    getStructOrErr<MachO::build_version_command>(Obj, Load.Ptr);
+  if (!BVCOrErr)
+    return BVCOrErr.takeError();
+  MachO::build_version_command BVC = BVCOrErr.get();
   if (Load.C.cmdsize !=
       sizeof(MachO::build_version_command) +
           BVC.ntools * sizeof(MachO::build_tool_version))
@@ -850,7 +876,10 @@ static Error checkRpathCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::rpath_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_RPATH cmdsize too small");
-  MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr);
+  auto ROrErr = getStructOrErr<MachO::rpath_command>(Obj, Load.Ptr);
+  if (!ROrErr)
+    return ROrErr.takeError();
+  MachO::rpath_command R = ROrErr.get();
   if (R.path < sizeof(MachO::rpath_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_RPATH path.offset field too small, not past "
@@ -903,8 +932,11 @@ static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::linker_option_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " LC_LINKER_OPTION cmdsize too small");
-  MachO::linker_option_command L =
-    getStruct<MachO::linker_option_command>(Obj, Load.Ptr);
+  auto LinkOptionOrErr =
+    getStructOrErr<MachO::linker_option_command>(Obj, Load.Ptr);
+  if (!LinkOptionOrErr)
+    return LinkOptionOrErr.takeError();
+  MachO::linker_option_command L = LinkOptionOrErr.get();
   // Make sure the count of strings is correct.
   const char *string = (const char *)Load.Ptr +
                        sizeof(struct MachO::linker_option_command);
@@ -968,8 +1000,11 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
   if (Load.C.cmdsize < sizeof(MachO::thread_command))
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           CmdName + " cmdsize too small");
-  MachO::thread_command T =
-    getStruct<MachO::thread_command>(Obj, Load.Ptr);
+  auto ThreadCommandOrErr =
+    getStructOrErr<MachO::thread_command>(Obj, Load.Ptr);
+  if (!ThreadCommandOrErr)
+    return ThreadCommandOrErr.takeError();
+  MachO::thread_command T = ThreadCommandOrErr.get();
   const char *state = Load.Ptr + sizeof(MachO::thread_command);
   const char *end = Load.Ptr + T.cmdsize;
   uint32_t nflavor = 0;
@@ -1160,8 +1195,10 @@ static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj,
                           " LC_TWOLEVEL_HINTS has incorrect cmdsize");
   if (*LoadCmd != nullptr)
     return malformedError("more than one LC_TWOLEVEL_HINTS command");
-  MachO::twolevel_hints_command Hints =
-    getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+  auto HintsOrErr = getStructOrErr<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+  if(!HintsOrErr)
+    return HintsOrErr.takeError();
+  MachO::twolevel_hints_command Hints = HintsOrErr.get();
   uint64_t FileSize = Obj.getData().size();
   if (Hints.offset > FileSize)
     return malformedError("offset field of LC_TWOLEVEL_HINTS command " +
@@ -2396,8 +2433,11 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
   // all the Libraries.
   if (LibrariesShortNames.size() == 0) {
     for (unsigned i = 0; i < Libraries.size(); i++) {
-      MachO::dylib_command D =
-        getStruct<MachO::dylib_command>(*this, Libraries[i]);
+      auto CommandOrErr =
+        getStructOrErr<MachO::dylib_command>(*this, Libraries[i]);
+      if (!CommandOrErr)
+        return object_error::parse_failed;
+      MachO::dylib_command D = CommandOrErr.get();
       if (D.dylib.name >= D.cmdsize)
         return object_error::parse_failed;
       const char *P = (const char *)(Libraries[i]) + D.dylib.name;
@@ -4491,8 +4531,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off));
   return makeArrayRef(Ptr, DyldInfo.rebase_size);
@@ -4502,8 +4545,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off));
   return makeArrayRef(Ptr, DyldInfo.bind_size);
@@ -4513,8 +4559,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off));
   return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
@@ -4524,8 +4573,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off));
   return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
@@ -4535,8 +4587,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
   if (!DyldInfoLoadCmd)
     return None;
 
-  MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  auto DyldInfoOrErr =
+    getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+  if (!DyldInfoOrErr)
+    return None;
+  MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
   const uint8_t *Ptr =
       reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off));
   return makeArrayRef(Ptr, DyldInfo.export_size);

From 2133daf232c5ee8f9cee5662e5b31f584992d3a6 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Tue, 4 Jun 2019 17:01:11 +0000
Subject: [PATCH 1043/1176] [GWP-ASan] Configuration options [3].

Summary:
See D60593 for further information.

This patch introduces the configuration options for GWP-ASan. In general, we expect the supporting allocator to populate the options struct, and give that to GWP-ASan during initialisation. For allocators that are okay with pulling in sanitizer_common, we also provide an optional parser that populates the gwp_asan::Options struct with values provided in the GWP_ASAN_OPTIONS environment variable.

This patch contains very little logic, and all of the testable components (i.e. the optional parser's internal logic) is tested as part of the sanitizer_common testbed.

Reviewers: vlad.tsyrklevich, morehouse, jfb

Reviewed By: morehouse

Subscribers: dexonsmith, kubamracek, mgorny, #sanitizers, llvm-commits, vitalybuka

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D62698

llvm-svn: 362527
---
 compiler-rt/lib/gwp_asan/CMakeLists.txt       | 48 ++++++++++
 .../lib/gwp_asan/optional/options_parser.cpp  | 91 +++++++++++++++++++
 .../lib/gwp_asan/optional/options_parser.h    | 32 +++++++
 compiler-rt/lib/gwp_asan/options.h            | 41 +++++++++
 compiler-rt/lib/gwp_asan/options.inc          | 41 +++++++++
 5 files changed, 253 insertions(+)
 create mode 100644 compiler-rt/lib/gwp_asan/optional/options_parser.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/optional/options_parser.h
 create mode 100644 compiler-rt/lib/gwp_asan/options.h
 create mode 100644 compiler-rt/lib/gwp_asan/options.inc

diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt
index 6c83d86c6c899..771192f23e0bd 100644
--- a/compiler-rt/lib/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt
@@ -10,6 +10,8 @@ set(GWP_ASAN_SOURCES
 set(GWP_ASAN_HEADERS
   mutex.h
   random.h
+  options.h
+  options.inc
 )
 
 # Ensure that GWP-ASan meets the delegated requirements of some supporting
@@ -20,6 +22,26 @@ set(GWP_ASAN_CFLAGS -fno-rtti -fno-exceptions -nostdinc++ -pthread)
 # Remove -stdlib= which is unused when passing -nostdinc++.
 string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
 
+# Options parsing support is optional. GwpAsan is totally independent of
+# sanitizer_common, the options parser is not. This is an optional library
+# that can be used by an allocator to automatically parse GwpAsan options from
+# the environment variable GWP_ASAN_FLAGS, but the allocator can choose to
+# implement its own options parsing and populate the Options struct itself.
+set(GWP_ASAN_OPTIONS_PARSER_SOURCES
+  optional/options_parser.cpp
+)
+set(GWP_ASAN_OPTIONS_PARSER_HEADERS
+  optional/options_parser.h
+  options.h
+  options.inc
+)
+set(GWP_ASAN_OPTIONS_PARSER_CFLAGS
+    ${GWP_ASAN_CFLAGS}
+    ${SANITIZER_COMMON_CFLAGS})
+set(GWP_ASAN_OPTIONS_PARSER_OBJECT_LIBS
+    RTSanitizerCommon
+    RTSanitizerCommonNoLibc)
+
 if (COMPILER_RT_HAS_GWP_ASAN)
   foreach(arch ${GWP_ASAN_SUPPORTED_ARCH})
     add_compiler_rt_runtime(
@@ -38,6 +60,32 @@ if (COMPILER_RT_HAS_GWP_ASAN)
       SOURCES ${GWP_ASAN_SOURCES}
       ADDITIONAL_HEADERS ${GWP_ASAN_HEADERS}
       CFLAGS ${GWP_ASAN_CFLAGS})
+
+  # Note: If you choose to add this as an object library, ensure you also
+  # include the sanitizer_common flag parsing object lib (generally
+  # 'RTSanitizerCommonNoTermination').
+  add_compiler_rt_object_libraries(RTGwpAsanOptionsParser
+      ARCHS ${GWP_ASAN_SUPPORTED_ARCH}
+      SOURCES ${GWP_ASAN_OPTIONS_PARSER_SOURCES}
+      ADDITIONAL_HEADERS ${GWP_ASAN_OPTIONS_PARSER_HEADERS}
+      CFLAGS ${GWP_ASAN_OPTIONS_PARSER_CFLAGS})
+
+  # Ensure that the build for the options parser succeeds, as
+  # 'RTGwpAsanOptionsParser' may not be built if it's not needed. This library
+  # has only a very small amount of logic, all of the testable components are
+  # exercised in the sanitizer_common test suite.
+  foreach(arch ${GWP_ASAN_SUPPORTED_ARCH})
+    add_compiler_rt_runtime(
+      clang_rt.gwp_asan_options_parser
+      SHARED
+      ARCHS ${arch}
+      SOURCES ${GWP_ASAN_OPTIONS_PARSER_SOURCES}
+      ADDITIONAL_HEADERS ${GWP_ASAN_OPTIONS_PARSER_HEADERS}
+      CFLAGS ${GWP_ASAN_OPTIONS_PARSER_CFLAGS}
+      OBJECT_LIBS ${GWP_ASAN_OPTIONS_PARSER_OBJECT_LIBS}
+      PARENT_TARGET gwp_asan
+    )
+  endforeach()
 endif()
 
 if(COMPILER_RT_INCLUDE_TESTS)
diff --git a/compiler-rt/lib/gwp_asan/optional/options_parser.cpp b/compiler-rt/lib/gwp_asan/optional/options_parser.cpp
new file mode 100644
index 0000000000000..ba9af4930f5eb
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/optional/options_parser.cpp
@@ -0,0 +1,91 @@
+//===-- options_parser.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/optional/options_parser.h"
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gwp_asan/options.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
+namespace gwp_asan {
+namespace options {
+namespace {
+void registerGwpAsanFlags(__sanitizer::FlagParser *parser, Options *o) {
+#define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description)                 \
+  RegisterFlag(parser, #Name, Description, &o->Name);
+#include "gwp_asan/options.inc"
+#undef GWP_ASAN_OPTION
+}
+
+const char *getCompileDefinitionGwpAsanDefaultOptions() {
+#ifdef GWP_ASAN_DEFAULT_OPTIONS
+  return SANITIZER_STRINGIFY(GWP_ASAN_DEFAULT_OPTIONS);
+#else
+  return "";
+#endif
+}
+
+const char *getGwpAsanDefaultOptions() {
+  return (__gwp_asan_default_options) ? __gwp_asan_default_options() : "";
+}
+
+Options *getOptionsInternal() {
+  static Options GwpAsanFlags;
+  return &GwpAsanFlags;
+}
+} // anonymous namespace
+
+void initOptions() {
+  Options *o = getOptionsInternal();
+  o->setDefaults();
+
+  __sanitizer::FlagParser Parser;
+  registerGwpAsanFlags(&Parser, o);
+
+  // Override from compile definition.
+  Parser.ParseString(getCompileDefinitionGwpAsanDefaultOptions());
+
+  // Override from user-specified string.
+  Parser.ParseString(getGwpAsanDefaultOptions());
+
+  // Override from environment.
+  Parser.ParseString(__sanitizer::GetEnv("GWP_ASAN_OPTIONS"));
+
+  __sanitizer::InitializeCommonFlags();
+  if (__sanitizer::Verbosity())
+    __sanitizer::ReportUnrecognizedFlags();
+
+  if (!o->Enabled)
+    return;
+
+  // Sanity checks for the parameters.
+  if (o->MaxSimultaneousAllocations <= 0) {
+    __sanitizer::Printf("GWP-ASan ERROR: MaxSimultaneousAllocations must be > "
+                        "0 when GWP-ASan is enabled.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (o->SampleRate < 1) {
+    __sanitizer::Printf(
+        "GWP-ASan ERROR: SampleRate must be > 0 when GWP-ASan is enabled.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  o->Printf = __sanitizer::Printf;
+}
+
+const Options &getOptions() { return *getOptionsInternal(); }
+
+} // namespace options
+} // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/optional/options_parser.h b/compiler-rt/lib/gwp_asan/optional/options_parser.h
new file mode 100644
index 0000000000000..7a1d3b098a3e9
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/optional/options_parser.h
@@ -0,0 +1,32 @@
+//===-- options_parser.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_OPTIONAL_OPTIONS_PARSER_H_
+#define GWP_ASAN_OPTIONAL_OPTIONS_PARSER_H_
+
+#include "gwp_asan/options.h"
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace gwp_asan {
+namespace options {
+
+// Parse the options from the GWP_ASAN_FLAGS environment variable.
+void initOptions();
+// Returns a pointer to the initialised options. Call initOptions() prior to
+// calling this function.
+const Options &getOptions();
+
+} // namespace options
+} // namespace gwp_asan
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char *
+__gwp_asan_default_options();
+}
+
+#endif // GWP_ASAN_OPTIONAL_OPTIONS_PARSER_H_
diff --git a/compiler-rt/lib/gwp_asan/options.h b/compiler-rt/lib/gwp_asan/options.h
new file mode 100644
index 0000000000000..c1b6e67193a47
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/options.h
@@ -0,0 +1,41 @@
+//===-- options.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_OPTIONS_H_
+#define GWP_ASAN_OPTIONS_H_
+
+namespace gwp_asan {
+namespace options {
+// The function pointer type for printf(). Follows the standard format from the
+// sanitizers library. If the supported allocator exposes printing via a
+// different function signature, please provide a wrapper which has this
+// printf() signature, and pass the wrapper instead.
+typedef void (*Printf_t)(const char *Format, ...);
+
+struct Options {
+  Printf_t Printf = nullptr;
+
+  // Read the options from the included definitions file.
+#define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description)                 \
+  Type Name = DefaultValue;
+#include "gwp_asan/options.inc"
+#undef GWP_ASAN_OPTION
+
+  void setDefaults() {
+#define GWP_ASAN_OPTION(Type, Name, DefaultValue, Description)                 \
+  Name = DefaultValue;
+#include "gwp_asan/options.inc"
+#undef GWP_ASAN_OPTION
+
+    Printf = nullptr;
+  }
+};
+} // namespace options
+} // namespace gwp_asan
+
+#endif // GWP_ASAN_OPTIONS_H_
diff --git a/compiler-rt/lib/gwp_asan/options.inc b/compiler-rt/lib/gwp_asan/options.inc
new file mode 100644
index 0000000000000..9042b11895aec
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/options.inc
@@ -0,0 +1,41 @@
+//===-- options.inc ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_OPTION
+#error "Define GWP_ASAN_OPTION prior to including this file!"
+#endif
+
+GWP_ASAN_OPTION(bool, Enabled, true, "Is GWP-ASan enabled? Defaults to true.")
+
+GWP_ASAN_OPTION(
+    bool, PerfectlyRightAlign, false,
+    "When allocations are right-aligned, should we perfectly align them up to "
+    "the page boundary? By default (false), we round up allocation size to the "
+    "nearest power of two (1, 2, 4, 8, 16) up to a maximum of 16-byte "
+    "alignment for performance reasons. Setting this to true can find single "
+    "byte buffer-overflows for multibyte allocations at the cost of "
+    "performance, and may be incompatible with some architectures.")
+
+GWP_ASAN_OPTION(
+    int, MaxSimultaneousAllocations, 16,
+    "Number of usable guarded slots in the allocation pool. Defaults to 16.")
+
+GWP_ASAN_OPTION(int, SampleRate, 5000,
+                "The probability (1 / SampleRate) that an allocation is "
+                "selected for GWP-ASan sampling. Default is 5000. Sample rates "
+                "up to (2^31 - 1) are supported.")
+
+GWP_ASAN_OPTION(
+    bool, InstallSignalHandlers, true,
+    "Install GWP-ASan signal handlers for SIGSEGV during dynamic loading. This "
+    "allows better error reports by providing stack traces for allocation and "
+    "deallocation when reporting a memory error. GWP-ASan's signal handler "
+    "will forward the signal to any previously-installed handler, and user "
+    "programs that install further signal handlers should make sure they do "
+    "the same. Note, if the previously installed SIGSEGV handler is SIG_IGN, "
+    "we terminate the process after dumping the error report.")

From 78ec94e4ec16e6f8abc7885f0c2ec159f02fe89e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 4 Jun 2019 17:05:06 +0000
Subject: [PATCH 1044/1176] [NFC][Codegen][AMDGPU] Autogenerate
 commute-shifts.ll test

Being affected by upcoming patch

llvm-svn: 362528
---
 llvm/test/CodeGen/AMDGPU/commute-shifts.ll | 45 ++++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 415a3156699cc..81ca354574d5f 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -1,10 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-; GCN-LABEL: {{^}}main:
-; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
 define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
+; SI-LABEL: main:
+; SI:       ; %bb.0: ; %bb
+; SI-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NEXT:    s_mov_b32 s0, 0
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    s_mov_b32 s2, s0
+; SI-NEXT:    s_mov_b32 s3, s0
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s0
+; SI-NEXT:    s_mov_b32 s6, s0
+; SI-NEXT:    s_mov_b32 s7, s0
+; SI-NEXT:    image_load v2, v0, s[0:7] dmask:0x1 unorm
+; SI-NEXT:    v_and_b32_e32 v0, 7, v0
+; SI-NEXT:    v_lshl_b32_e32 v0, 1, v0
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v0, v2, v0
+; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; SI-NEXT:    v_cvt_pkrtz_f16_f32_e32 v0, s0, v0
+; SI-NEXT:    ; return to shader part epilog
+;
+; VI-LABEL: main:
+; VI:       ; %bb.0: ; %bb
+; VI-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; VI-NEXT:    s_mov_b32 s0, 0
+; VI-NEXT:    s_mov_b32 s1, s0
+; VI-NEXT:    s_mov_b32 s2, s0
+; VI-NEXT:    s_mov_b32 s3, s0
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s0
+; VI-NEXT:    s_mov_b32 s6, s0
+; VI-NEXT:    s_mov_b32 s7, s0
+; VI-NEXT:    image_load v2, v0, s[0:7] dmask:0x1 unorm
+; VI-NEXT:    v_and_b32_e32 v0, 7, v0
+; VI-NEXT:    v_lshlrev_b32_e64 v0, v0, 1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v0, v2, v0
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VI-NEXT:    v_cvt_pkrtz_f16_f32 v0, s0, v0
+; VI-NEXT:    ; return to shader part epilog
 bb:
   %tmp = fptosi float %arg0 to i32
   %tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)

From 925553ec91f77be6bc6de86b766f7c1bd1c8ea67 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 4 Jun 2019 17:05:34 +0000
Subject: [PATCH 1045/1176] [NFC][Codegen][PowerPC] Autogenerate shift-cmp.ll
 test

Being affected by upcoming patch

llvm-svn: 362529
---
 llvm/test/CodeGen/PowerPC/shift-cmp.ll | 46 +++++++++++++-------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/shift-cmp.ll b/llvm/test/CodeGen/PowerPC/shift-cmp.ll
index 299b643be7339..ffbcd6b85c04b 100644
--- a/llvm/test/CodeGen/PowerPC/shift-cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/shift-cmp.ll
@@ -1,54 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
 
 define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) {
+; CHECK-LABEL: and_cmp_variable_power_of_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subfic 4, 4, 32
+; CHECK-NEXT:    rlwnm 3, 3, 4, 31, 31
+; CHECK-NEXT:    blr
   %shl = shl i32 1, %y
   %and = and i32 %x, %shl
   %cmp = icmp eq i32 %and, %shl
   ret i1 %cmp
-
-; CHECK-LABEL: @and_cmp_variable_power_of_two
-; CHECK: subfic 4, 4, 32
-; CHECK: rlwnm 3, 3, 4, 31, 31
-; CHECK: blr
 }
 
 define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) {
+; CHECK-LABEL: and_cmp_variable_power_of_two_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subfic 4, 4, 64
+; CHECK-NEXT:    rldcl 3, 3, 4, 63
+; CHECK-NEXT:    blr
   %shl = shl i64 1, %y
   %and = and i64 %x, %shl
   %cmp = icmp eq i64 %and, %shl
   ret i1 %cmp
-
-; CHECK-LABEL: @and_cmp_variable_power_of_two_64
-; CHECK: subfic 4, 4, 64
-; CHECK: rldcl 3, 3, 4, 63
-; CHECK: blr
 }
 
 define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) {
+; CHECK-LABEL: and_ncmp_variable_power_of_two:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subfic 4, 4, 32
+; CHECK-NEXT:    nor 3, 3, 3
+; CHECK-NEXT:    rlwnm 3, 3, 4, 31, 31
+; CHECK-NEXT:    blr
   %shl = shl i32 1, %y
   %and = and i32 %x, %shl
   %cmp = icmp ne i32 %and, %shl
   ret i1 %cmp
-
-; CHECK-LABEL: @and_ncmp_variable_power_of_two
-; CHECK-DAG: subfic 4, 4, 32
-; CHECK-DAG: nor [[REG:[0-9]+]], 3, 3
-; CHECK: rlwnm 3, [[REG]], 4, 31, 31
-; CHECK: blr
 }
 
 define i1 @and_ncmp_variable_power_of_two_64(i64 %x, i64 %y) {
+; CHECK-LABEL: and_ncmp_variable_power_of_two_64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    not 3, 3
+; CHECK-NEXT:    subfic 4, 4, 64
+; CHECK-NEXT:    rldcl 3, 3, 4, 63
+; CHECK-NEXT:    blr
   %shl = shl i64 1, %y
   %and = and i64 %x, %shl
   %cmp = icmp ne i64 %and, %shl
   ret i1 %cmp
-
-; CHECK-LABEL: @and_ncmp_variable_power_of_two_64
-; CHECK-DAG: subfic 4, 4, 64
-; CHECK-DAG: not [[REG:[0-9]+]], 3
-; CHECK: rldcl 3, [[REG]], 4, 63
-; CHECK: blr
 }
-

From f4302ad35e340f01529bf32919410b2577f899bd Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Tue, 4 Jun 2019 17:15:48 +0000
Subject: [PATCH 1046/1176] [Syntax] Do not depend on llvm targets for Syntax
 tests. NFC

They are not required and only slow down the build.

llvm-svn: 362530
---
 clang/unittests/Tooling/Syntax/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt
index 4150a9ff1e1b5..fbcca9d04e530 100644
--- a/clang/unittests/Tooling/Syntax/CMakeLists.txt
+++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
   Support
   )
 

From 7bf8f6fa8ab123fe97ccd82d9a0ddff85505ee5f Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 4 Jun 2019 17:17:20 +0000
Subject: [PATCH 1047/1176] PR42104: Support instantiations of lambdas that
 implicitly capture packs.

Two changes:
 * Track odr-use via FunctionParmPackExprs to properly handle dependent
   odr-uses of packs in generic lambdas.
 * Do not instantiate implicit captures; instead, regenerate them by
   instantiating the body of the lambda. This is necessary to
   distinguish between cases where only one element of a pack is
   captured and cases where the entire pack is captured.

This reinstates r362358 (reverted in r362375) with a fix for an
uninitialized variable use in UpdateMarkingForLValueToRValue.

llvm-svn: 362531
---
 clang/include/clang/Sema/ScopeInfo.h          | 19 +++---
 clang/include/clang/Sema/Sema.h               |  1 +
 clang/lib/Sema/ScopeInfo.cpp                  | 28 ++++-----
 clang/lib/Sema/SemaExpr.cpp                   | 48 +++++++++-----
 clang/lib/Sema/SemaExprCXX.cpp                | 11 +---
 clang/lib/Sema/SemaTemplateInstantiate.cpp    | 14 +++--
 clang/lib/Sema/TreeTransform.h                | 63 +++++++++++++++----
 .../cxx1y-generic-lambdas-capturing.cpp       | 27 +++++++-
 .../test/SemaTemplate/lambda-capture-pack.cpp | 17 +++++
 9 files changed, 161 insertions(+), 67 deletions(-)
 create mode 100644 clang/test/SemaTemplate/lambda-capture-pack.cpp

diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 177c88d7e8475..ea2595113d589 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_SEMA_SCOPEINFO_H
 
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CapturedStmt.h"
 #include "clang/Basic/LLVM.h"
@@ -913,7 +914,8 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///   };
   /// }
   void addPotentialCapture(Expr *VarExpr) {
-    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr));
+    assert(isa<DeclRefExpr>(VarExpr) || isa<MemberExpr>(VarExpr) ||
+           isa<FunctionParmPackExpr>(VarExpr));
     PotentiallyCapturingExprs.push_back(VarExpr);
   }
 
@@ -965,13 +967,15 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
   ///  building such a node. So we need a rule that anyone can implement and get
   ///  exactly the same result".
   void markVariableExprAsNonODRUsed(Expr *CapturingVarExpr) {
-    assert(isa<DeclRefExpr>(CapturingVarExpr)
-        || isa<MemberExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
+           isa<MemberExpr>(CapturingVarExpr) ||
+           isa<FunctionParmPackExpr>(CapturingVarExpr));
     NonODRUsedCapturingExprs.insert(CapturingVarExpr);
   }
   bool isVariableExprMarkedAsNonODRUsed(Expr *CapturingVarExpr) const {
-    assert(isa<DeclRefExpr>(CapturingVarExpr)
-      || isa<MemberExpr>(CapturingVarExpr));
+    assert(isa<DeclRefExpr>(CapturingVarExpr) ||
+           isa<MemberExpr>(CapturingVarExpr) ||
+           isa<FunctionParmPackExpr>(CapturingVarExpr));
     return NonODRUsedCapturingExprs.count(CapturingVarExpr);
   }
   void removePotentialCapture(Expr *E) {
@@ -993,9 +997,8 @@ class LambdaScopeInfo final : public CapturingScopeInfo {
                                   PotentialThisCaptureLocation.isValid();
   }
 
-  // When passed the index, returns the VarDecl and Expr associated
-  // with the index.
-  void getPotentialVariableCapture(unsigned Idx, VarDecl *&VD, Expr *&E) const;
+  void visitPotentialCaptures(
+      llvm::function_ref<void(VarDecl *, Expr *)> Callback) const;
 };
 
 FunctionScopeInfo::WeakObjectProfileTy::WeakObjectProfileTy()
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 0c487725bdd42..ed41143161d57 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4181,6 +4181,7 @@ class Sema {
   void MarkVariableReferenced(SourceLocation Loc, VarDecl *Var);
   void MarkDeclRefReferenced(DeclRefExpr *E, const Expr *Base = nullptr);
   void MarkMemberReferenced(MemberExpr *E);
+  void MarkFunctionParmPackReferenced(FunctionParmPackExpr *E);
   void MarkCaptureUsedInEnclosingContext(VarDecl *Capture, SourceLocation Loc,
                                          unsigned CapturingScopeIndex);
 
diff --git a/clang/lib/Sema/ScopeInfo.cpp b/clang/lib/Sema/ScopeInfo.cpp
index e84e592a4827e..b2a26af9b4a57 100644
--- a/clang/lib/Sema/ScopeInfo.cpp
+++ b/clang/lib/Sema/ScopeInfo.cpp
@@ -229,20 +229,20 @@ bool CapturingScopeInfo::isVLATypeCaptured(const VariableArrayType *VAT) const {
   return false;
 }
 
-void LambdaScopeInfo::getPotentialVariableCapture(unsigned Idx, VarDecl *&VD,
-                                                  Expr *&E) const {
-  assert(Idx < getNumPotentialVariableCaptures() &&
-         "Index of potential capture must be within 0 to less than the "
-         "number of captures!");
-  E = PotentiallyCapturingExprs[Idx];
-  if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
-    VD = dyn_cast<VarDecl>(DRE->getFoundDecl());
-  else if (MemberExpr *ME = dyn_cast<MemberExpr>(E))
-    VD = dyn_cast<VarDecl>(ME->getMemberDecl());
-  else
-    llvm_unreachable("Only DeclRefExprs or MemberExprs should be added for "
-    "potential captures");
-  assert(VD);
+void LambdaScopeInfo::visitPotentialCaptures(
+    llvm::function_ref<void(VarDecl *, Expr *)> Callback) const {
+  for (Expr *E : PotentiallyCapturingExprs) {
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+      Callback(cast<VarDecl>(DRE->getFoundDecl()), E);
+    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
+      Callback(cast<VarDecl>(ME->getMemberDecl()), E);
+    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
+      for (VarDecl *VD : *FP)
+        Callback(VD, E);
+    } else {
+      llvm_unreachable("unexpected expression in potential captures list");
+    }
+  }
 }
 
 FunctionScopeInfo::~FunctionScopeInfo() { }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 72b61b8e5842f..1e9164bc3ac56 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -14610,7 +14610,9 @@ namespace {
     // context so never needs to be transformed.
     // FIXME: Ideally we wouldn't transform the closure type either, and would
     // just recreate the capture expressions and lambda expression.
-    StmtResult TransformLambdaBody(Stmt *Body) { return Body; }
+    StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body) {
+      return SkipLambdaBody(E, Body);
+    }
   };
 }
 
@@ -15054,7 +15056,7 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func,
 ///    *FunctionScopeIndexToStopAt on the FunctionScopeInfo stack.
 static void
 MarkVarDeclODRUsed(VarDecl *Var, SourceLocation Loc, Sema &SemaRef,
-                   const unsigned *const FunctionScopeIndexToStopAt) {
+                   const unsigned *const FunctionScopeIndexToStopAt = nullptr) {
   // Keep track of used but undefined variables.
   // FIXME: We shouldn't suppress this warning for static data members.
   if (Var->hasDefinition(SemaRef.Context) == VarDecl::DeclarationOnly &&
@@ -15735,14 +15737,21 @@ void Sema::UpdateMarkingForLValueToRValue(Expr *E) {
   // variable.
   if (LambdaScopeInfo *LSI = getCurLambda()) {
     Expr *SansParensExpr = E->IgnoreParens();
-    VarDecl *Var = nullptr;
+    VarDecl *Var;
+    ArrayRef<VarDecl *> Vars(&Var, &Var + 1);
     if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(SansParensExpr))
       Var = dyn_cast<VarDecl>(DRE->getFoundDecl());
     else if (MemberExpr *ME = dyn_cast<MemberExpr>(SansParensExpr))
       Var = dyn_cast<VarDecl>(ME->getMemberDecl());
+    else if (auto *FPPE = dyn_cast<FunctionParmPackExpr>(SansParensExpr))
+      Vars = llvm::makeArrayRef(FPPE->begin(), FPPE->end());
+    else
+      Vars = None;
 
-    if (Var && IsVariableNonDependentAndAConstantExpression(Var, Context))
-      LSI->markVariableExprAsNonODRUsed(SansParensExpr);
+    for (VarDecl *VD : Vars) {
+      if (VD && IsVariableNonDependentAndAConstantExpression(VD, Context))
+        LSI->markVariableExprAsNonODRUsed(SansParensExpr);
+    }
   }
 }
 
@@ -15767,20 +15776,18 @@ void Sema::CleanupVarDeclMarking() {
   std::swap(LocalMaybeODRUseExprs, MaybeODRUseExprs);
 
   for (Expr *E : LocalMaybeODRUseExprs) {
-    VarDecl *Var;
-    SourceLocation Loc;
-    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
-      Var = cast<VarDecl>(DRE->getDecl());
-      Loc = DRE->getLocation();
-    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
-      Var = cast<VarDecl>(ME->getMemberDecl());
-      Loc = ME->getMemberLoc();
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+      MarkVarDeclODRUsed(cast<VarDecl>(DRE->getDecl()),
+                         DRE->getLocation(), *this);
+    } else if (auto *ME = dyn_cast<MemberExpr>(E)) {
+      MarkVarDeclODRUsed(cast<VarDecl>(ME->getMemberDecl()), ME->getMemberLoc(),
+                         *this);
+    } else if (auto *FP = dyn_cast<FunctionParmPackExpr>(E)) {
+      for (VarDecl *VD : *FP)
+        MarkVarDeclODRUsed(VD, FP->getParameterPackLocation(), *this);
     } else {
       llvm_unreachable("Unexpected expression");
     }
-
-    MarkVarDeclODRUsed(Var, Loc, *this,
-                       /*MaxFunctionScopeIndex Pointer*/ nullptr);
   }
 
   assert(MaybeODRUseExprs.empty() &&
@@ -15789,7 +15796,8 @@ void Sema::CleanupVarDeclMarking() {
 
 static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
                                     VarDecl *Var, Expr *E) {
-  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E)) &&
+  assert((!E || isa<DeclRefExpr>(E) || isa<MemberExpr>(E) ||
+          isa<FunctionParmPackExpr>(E)) &&
          "Invalid Expr argument to DoMarkVarDeclReferenced");
   Var->setReferenced();
 
@@ -16022,6 +16030,12 @@ void Sema::MarkMemberReferenced(MemberExpr *E) {
   MarkExprReferenced(*this, Loc, E->getMemberDecl(), E, MightBeOdrUse);
 }
 
+/// Perform reference-marking and odr-use handling for a FunctionParmPackExpr.
+void Sema::MarkFunctionParmPackReferenced(FunctionParmPackExpr *E) {
+  for (VarDecl *VD : *E)
+    MarkExprReferenced(*this, E->getParameterPackLocation(), VD, E, true);
+}
+
 /// Perform marking for a reference to an arbitrary declaration.  It
 /// marks the declaration referenced, and performs odr-use checking for
 /// functions and variables. This method should not be used when building a
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index ac050fa1ef55c..5884cf906fd15 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7427,12 +7427,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
   // All the potentially captureable variables in the current nested
   // lambda (within a generic outer lambda), must be captured by an
   // outer lambda that is enclosed within a non-dependent context.
-  const unsigned NumPotentialCaptures =
-      CurrentLSI->getNumPotentialVariableCaptures();
-  for (unsigned I = 0; I != NumPotentialCaptures; ++I) {
-    Expr *VarExpr = nullptr;
-    VarDecl *Var = nullptr;
-    CurrentLSI->getPotentialVariableCapture(I, Var, VarExpr);
+  CurrentLSI->visitPotentialCaptures([&] (VarDecl *Var, Expr *VarExpr) {
     // If the variable is clearly identified as non-odr-used and the full
     // expression is not instantiation dependent, only then do we not
     // need to check enclosing lambda's for speculative captures.
@@ -7446,7 +7441,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
     // }
     if (CurrentLSI->isVariableExprMarkedAsNonODRUsed(VarExpr) &&
         !IsFullExprInstantiationDependent)
-      continue;
+      return;
 
     // If we have a capture-capable lambda for the variable, go ahead and
     // capture the variable in that lambda (and all its enclosing lambdas).
@@ -7478,7 +7473,7 @@ static void CheckIfAnyEnclosingLambdasMustCaptureAnyPotentialCaptures(
                           DeclRefType, nullptr);
       }
     }
-  }
+  });
 
   // Check if 'this' needs to be captured.
   if (CurrentLSI->hasPotentialThisCapture()) {
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index ba54d5010bab4..973f564d30583 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1368,9 +1368,11 @@ TemplateInstantiator::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
     Vars.push_back(D);
   }
 
-  return FunctionParmPackExpr::Create(getSema().Context, T,
-                                      E->getParameterPack(),
-                                      E->getParameterPackLocation(), Vars);
+  auto *PackExpr =
+      FunctionParmPackExpr::Create(getSema().Context, T, E->getParameterPack(),
+                                   E->getParameterPackLocation(), Vars);
+  getSema().MarkFunctionParmPackReferenced(PackExpr);
+  return PackExpr;
 }
 
 ExprResult
@@ -1389,8 +1391,10 @@ TemplateInstantiator::TransformFunctionParmPackRefExpr(DeclRefExpr *E,
       QualType T = TransformType(E->getType());
       if (T.isNull())
         return ExprError();
-      return FunctionParmPackExpr::Create(getSema().Context, T, PD,
-                                          E->getExprLoc(), *Pack);
+      auto *PackExpr = FunctionParmPackExpr::Create(getSema().Context, T, PD,
+                                                    E->getExprLoc(), *Pack);
+      getSema().MarkFunctionParmPackReferenced(PackExpr);
+      return PackExpr;
     }
 
     TransformedDecl = (*Pack)[getSema().ArgumentPackSubstitutionIndex];
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 9f5a5f6caca62..6e033cb579dd1 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -660,7 +660,10 @@ class TreeTransform {
                                           bool ExpectParameterPack);
 
   /// Transform the body of a lambda-expression.
-  StmtResult TransformLambdaBody(Stmt *Body);
+  StmtResult TransformLambdaBody(LambdaExpr *E, Stmt *Body);
+  /// Alternative implementation of TransformLambdaBody that skips transforming
+  /// the body.
+  StmtResult SkipLambdaBody(LambdaExpr *E, Stmt *Body);
 
   QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL);
 
@@ -11370,16 +11373,13 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   bool Invalid = false;
 
   // Transform captures.
-  bool FinishedExplicitCaptures = false;
   for (LambdaExpr::capture_iterator C = E->capture_begin(),
                                  CEnd = E->capture_end();
        C != CEnd; ++C) {
     // When we hit the first implicit capture, tell Sema that we've finished
     // the list of explicit captures.
-    if (!FinishedExplicitCaptures && C->isImplicit()) {
-      getSema().finishLambdaExplicitCaptures(LSI);
-      FinishedExplicitCaptures = true;
-    }
+    if (C->isImplicit())
+      break;
 
     // Capturing 'this' is trivial.
     if (C->capturesThis()) {
@@ -11488,17 +11488,16 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
     getSema().tryCaptureVariable(CapturedVar, C->getLocation(), Kind,
                                  EllipsisLoc);
   }
-  if (!FinishedExplicitCaptures)
-    getSema().finishLambdaExplicitCaptures(LSI);
+  getSema().finishLambdaExplicitCaptures(LSI);
 
-  // Enter a new evaluation context to insulate the lambda from any
-  // cleanups from the enclosing full-expression.
+  // FIXME: Sema's lambda-building mechanism expects us to push an expression
+  // evaluation context even if we're not transforming the function body.
   getSema().PushExpressionEvaluationContext(
       Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
   // Instantiate the body of the lambda expression.
   StmtResult Body =
-      Invalid ? StmtError() : getDerived().TransformLambdaBody(E->getBody());
+      Invalid ? StmtError() : getDerived().TransformLambdaBody(E, E->getBody());
 
   // ActOnLambda* will pop the function scope for us.
   FuncScopeCleanup.disable();
@@ -11524,10 +11523,50 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
 template<typename Derived>
 StmtResult
-TreeTransform<Derived>::TransformLambdaBody(Stmt *S) {
+TreeTransform<Derived>::TransformLambdaBody(LambdaExpr *E, Stmt *S) {
   return TransformStmt(S);
 }
 
+template<typename Derived>
+StmtResult
+TreeTransform<Derived>::SkipLambdaBody(LambdaExpr *E, Stmt *S) {
+  // Transform captures.
+  for (LambdaExpr::capture_iterator C = E->capture_begin(),
+                                 CEnd = E->capture_end();
+       C != CEnd; ++C) {
+    // When we hit the first implicit capture, tell Sema that we've finished
+    // the list of explicit captures.
+    if (!C->isImplicit())
+      continue;
+
+    // Capturing 'this' is trivial.
+    if (C->capturesThis()) {
+      getSema().CheckCXXThisCapture(C->getLocation(), C->isExplicit(),
+                                    /*BuildAndDiagnose*/ true, nullptr,
+                                    C->getCaptureKind() == LCK_StarThis);
+      continue;
+    }
+    // Captured expression will be recaptured during captured variables
+    // rebuilding.
+    if (C->capturesVLAType())
+      continue;
+
+    assert(C->capturesVariable() && "unexpected kind of lambda capture");
+    assert(!E->isInitCapture(C) && "implicit init-capture?");
+
+    // Transform the captured variable.
+    VarDecl *CapturedVar = cast_or_null<VarDecl>(
+        getDerived().TransformDecl(C->getLocation(), C->getCapturedVar()));
+    if (!CapturedVar || CapturedVar->isInvalidDecl())
+      return StmtError();
+
+    // Capture the transformed variable.
+    getSema().tryCaptureVariable(CapturedVar, C->getLocation());
+  }
+
+  return S;
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCXXUnresolvedConstructExpr(
diff --git a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
index eaed45acd11be..a98366c8794a1 100644
--- a/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
+++ b/clang/test/SemaCXX/cxx1y-generic-lambdas-capturing.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -emit-llvm-only %s
+// RUN: %clang_cc1 -std=c++2a -verify -fsyntax-only -fblocks -emit-llvm-only %s
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing %s -DDELAYED_TEMPLATE_PARSING
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fms-extensions %s -DMS_EXTENSIONS
 // DONTRUNYET: %clang_cc1 -std=c++1y -verify -fsyntax-only -fblocks -fdelayed-template-parsing -fms-extensions %s -DMS_EXTENSIONS -DDELAYED_TEMPLATE_PARSING
@@ -176,7 +177,13 @@ void doit() {
     sample::X cx{5};
     auto L = [=](auto a) { 
       const int z = 3;
+      // FIXME: The warning below is correct but for some reason doesn't show
+      // up in C++17 mode.
       return [&,a](auto b) {
+#if __cplusplus > 201702L
+        // expected-warning@-2 {{address of stack memory associated with local variable 'z' returned}}
+        // expected-note@#call {{in instantiation of}}
+#endif
         const int y = 5;    
         return [=](auto c) { 
           int d[sizeof(a) == sizeof(c) || sizeof(c) == sizeof(b) ? 2 : 1];
@@ -189,7 +196,7 @@ void doit() {
         }; 
       };
     };
-    auto M = L(3)(3.5);
+    auto M = L(3)(3.5); // #call
     M(3.14);
   }
 }
@@ -1519,6 +1526,20 @@ void test() {
 
 } // end ns5
 
-
-
 } // end PR34266
+
+namespace capture_pack {
+#if __cplusplus >= 201702L
+  constexpr
+#endif
+  auto v =
+    [](auto ...a) {
+      [&](auto ...b) {
+        ((a = b), ...); // expected-warning 0-1{{extension}}
+      }(100, 20, 3);
+      return (a + ...); // expected-warning 0-1{{extension}}
+    }(400, 50, 6);
+#if __cplusplus >= 201702L
+  static_assert(v == 123);
+#endif
+}
diff --git a/clang/test/SemaTemplate/lambda-capture-pack.cpp b/clang/test/SemaTemplate/lambda-capture-pack.cpp
new file mode 100644
index 0000000000000..2fe576769dbdf
--- /dev/null
+++ b/clang/test/SemaTemplate/lambda-capture-pack.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++2a -verify %s
+// expected-no-diagnostics
+
+template<typename ...T, typename ...Lambda> void check_sizes(Lambda ...L) {
+  static_assert(((sizeof(T) == sizeof(Lambda)) && ...));
+}
+
+template<typename ...T> void f(T ...v) {
+  // Pack expansion of lambdas: each lambda captures only one pack element.
+  check_sizes<T...>([=] { (void)&v; } ...);
+
+  // Pack expansion inside lambda: captures all pack elements.
+  auto l = [=] { ((void)&v, ...); };
+  static_assert(sizeof(l) >= (sizeof(T) + ...));
+}
+
+template void f(int, char, double);

From 0cdaf3a09fea6db0008828264bf84c91a5b6eba1 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Tue, 4 Jun 2019 17:29:55 +0000
Subject: [PATCH 1048/1176] [Tests] Autogen a test so future changes are
 visible

Oddly, I had to change a value name from "tmp0" to "bc0" to get the autogened test to pass.  I'm putting this down to an oddity of update_test_checks or FileCheck, but don't understand it.

llvm-svn: 362532
---
 .../Transforms/IndVarSimplify/iv-widen.ll     | 156 ++++++++++++++----
 1 file changed, 122 insertions(+), 34 deletions(-)

diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
index 8664dacb20d78..a8d89b10f16d8 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; RUN: opt < %s -S -passes='lcssa,loop-simplify,require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)' | FileCheck %s
 
@@ -9,17 +10,37 @@ target triple = "x86_64-apple-darwin"
 
 declare void @use(i64 %x)
 
-; CHECK-LABEL: @loop_0
-; CHECK-LABEL: B18:
 ; Only one phi now.
-; CHECK: phi i64
-; CHECK-NOT: phi
 ; One trunc for the gep.
-; CHECK: trunc i64 %indvars.iv to i32
 ; One trunc for the dummy() call.
-; CHECK-LABEL: exit24:
-; CHECK: trunc i64 {{.*}}lcssa.wide to i32
 define void @loop_0(i32* %a) {
+; CHECK-LABEL: @loop_0(
+; CHECK-NEXT:  Prologue:
+; CHECK-NEXT:    br i1 undef, label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
+; CHECK:       B18.preheader:
+; CHECK-NEXT:    br label [[B18:%.*]]
+; CHECK:       B18:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[B18_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[B24:%.*]] ]
+; CHECK-NEXT:    call void @use(i64 [[INDVARS_IV]])
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[O:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[O]]
+; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[T]], label [[EXIT24:%.*]], label [[B24]]
+; CHECK:       B24:
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20
+; CHECK-NEXT:    br i1 [[T2]], label [[B6_LOOPEXIT:%.*]], label [[B18]]
+; CHECK:       B6.loopexit:
+; CHECK-NEXT:    br label [[B6]]
+; CHECK:       B6:
+; CHECK-NEXT:    ret void
+; CHECK:       exit24:
+; CHECK-NEXT:    [[DOT02_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV]], [[B18]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[DOT02_LCSSA_WIDE]] to i32
+; CHECK-NEXT:    call void @dummy(i32 [[TMP1]])
+; CHECK-NEXT:    unreachable
+;
 Prologue:
   br i1 undef, label %B18, label %B6
 
@@ -46,11 +67,31 @@ exit24:                      ; preds = %B18
 }
 
 ; Make sure that dead zext is removed and no widening happens.
-; CHECK-LABEL: @loop_0.dead
-; CHECK: phi i32
-; CHECK-NOT: zext
-; CHECK-NOT: trunc
-define void @loop_0.dead(i32* %a) {
+define void @loop_0_dead(i32* %a) {
+; CHECK-LABEL: @loop_0_dead(
+; CHECK-NEXT:  Prologue:
+; CHECK-NEXT:    br i1 undef, label [[B18_PREHEADER:%.*]], label [[B6:%.*]]
+; CHECK:       B18.preheader:
+; CHECK-NEXT:    br label [[B18:%.*]]
+; CHECK:       B18:
+; CHECK-NEXT:    [[DOT02:%.*]] = phi i32 [ [[TMP33:%.*]], [[B24:%.*]] ], [ 0, [[B18_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP33]] = add nuw i32 [[DOT02]], 1
+; CHECK-NEXT:    [[O:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[DOT02]]
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[O]]
+; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[T]], label [[EXIT24:%.*]], label [[B24]]
+; CHECK:       B24:
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[TMP33]], 20
+; CHECK-NEXT:    br i1 [[T2]], label [[B6_LOOPEXIT:%.*]], label [[B18]]
+; CHECK:       B6.loopexit:
+; CHECK-NEXT:    br label [[B6]]
+; CHECK:       B6:
+; CHECK-NEXT:    ret void
+; CHECK:       exit24:
+; CHECK-NEXT:    [[DOT02_LCSSA:%.*]] = phi i32 [ [[DOT02]], [[B18]] ]
+; CHECK-NEXT:    call void @dummy(i32 [[DOT02_LCSSA]])
+; CHECK-NEXT:    unreachable
+;
 Prologue:
   br i1 undef, label %B18, label %B6
 
@@ -77,16 +118,29 @@ exit24:                      ; preds = %B18
 
 define void @loop_1(i32 %lim) {
 ; CHECK-LABEL: @loop_1(
- entry:
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ENTRY_COND:%.*]] = icmp ne i32 [[LIM:%.*]], 0
+; CHECK-NEXT:    br i1 [[ENTRY_COND]], label [[LOOP_PREHEADER:%.*]], label [[LEAVE:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LIM]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    call void @dummy.i64(i64 [[TMP1]])
+; CHECK-NEXT:    [[BE_COND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[BE_COND]], label [[LOOP]], label [[LEAVE_LOOPEXIT:%.*]]
+; CHECK:       leave.loopexit:
+; CHECK-NEXT:    br label [[LEAVE]]
+; CHECK:       leave:
+; CHECK-NEXT:    ret void
+;
+  entry:
   %entry.cond = icmp ne i32 %lim, 0
   br i1 %entry.cond, label %loop, label %leave
 
- loop:
-; CHECK: loop:
-; CHECK:  %indvars.iv = phi i64 [ 1, %loop.preheader ], [ %indvars.iv.next, %loop ]
-; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK:  [[IV_INC:%[^ ]+]] = add nsw i64 %indvars.iv, -1
-; CHECK:  call void @dummy.i64(i64 [[IV_INC]])
+  loop:
 
   %iv = phi i32 [ 1, %entry ], [ %iv.inc, %loop ]
   %iv.inc = add i32 %iv, 1
@@ -96,7 +150,7 @@ define void @loop_1(i32 %lim) {
   %be.cond = icmp ult i32 %iv.inc, %lim
   br i1 %be.cond, label %loop, label %leave
 
- leave:
+  leave:
   ret void
 }
 
@@ -106,9 +160,54 @@ declare void @dummy.i64(i64)
 
 define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8 %tmp1) {
 ; CHECK-LABEL: @loop_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
+; CHECK-NEXT:    [[BC0:%.*]] = bitcast i32* [[LINED:%.*]] to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[HSIZE:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw i64 [[INDVARS_IV7]], [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
+; CHECK:       for.body2.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
+; CHECK:       for.body2:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 1, [[FOR_BODY2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY2]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i64 [[TMP4]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP5]]
+; CHECK-NEXT:    store i8 [[TMP1:%.*]], i8* [[ADD_PTR]], align 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_BODY3_PREHEADER:%.*]]
+; CHECK:       for.body3.preheader:
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
+; CHECK:       for.body3:
+; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ 1, [[FOR_BODY3_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], [[INDVARS_IV2]]
+; CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[BC0]], i64 [[TMP8]]
+; CHECK-NEXT:    store i8 [[TMP1]], i8* [[ADD_PTR2]], align 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT5:%.*]] = zext i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT5]]
+; CHECK-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY3]], label [[FOR_INC_LOOPEXIT:%.*]]
+; CHECK:       for.inc.loopexit:
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT8]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp215 = icmp sgt i32 %size, 1
-  %tmp0 = bitcast i32* %lined to i8*
+  %bc0 = bitcast i32* %lined to i8*
   br label %for.body
 
 for.body:
@@ -118,33 +217,22 @@ for.body:
   br i1 %cmp215, label %for.body2, label %for.inc
 
 ; check that the induction variable of the inner loop has been widened after indvars.
-; CHECK:  [[INNERLOOPINV:%[^ ]+]] = add nsw i64
-; CHECK: for.body2:
-; CHECK-NEXT:  %indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
-; CHECK-NEXT:  [[WIDENED:%[^ ]+]] = add nsw i64 [[INNERLOOPINV]], %indvars.iv
-; CHECK-NEXT:  %add.ptr = getelementptr inbounds i8, i8* %tmp0, i64 [[WIDENED]]
 for.body2:
   %k = phi i32 [ %inc, %for.body2 ], [ 1, %for.body ]
   %add4 = add nsw i32 %add, %k
   %idx.ext = sext i32 %add4 to i64
-  %add.ptr = getelementptr inbounds i8, i8* %tmp0, i64 %idx.ext
+  %add.ptr = getelementptr inbounds i8, i8* %bc0, i64 %idx.ext
   store i8 %tmp1, i8* %add.ptr, align 1
   %inc = add nsw i32 %k, 1
   %cmp2 = icmp slt i32 %inc, %size
   br i1 %cmp2, label %for.body2, label %for.body3
 
 ; check that the induction variable of the inner loop has been widened after indvars.
-; CHECK: for.body3.preheader:
-; CHECK:  [[INNERLOOPINV:%[^ ]+]] = zext i32
-; CHECK: for.body3:
-; CHECK-NEXT:  %indvars.iv2 = phi i64 [ 1, %for.body3.preheader ], [ %indvars.iv.next3, %for.body3 ]
-; CHECK-NEXT:  [[WIDENED:%[^ ]+]] = add nuw nsw i64 [[INNERLOOPINV]], %indvars.iv2
-; CHECK-NEXT:  %add.ptr2 = getelementptr inbounds i8, i8* %tmp0, i64 [[WIDENED]]
 for.body3:
   %l = phi i32 [ %inc2, %for.body3 ], [ 1, %for.body2 ]
   %add5 = add nuw i32 %add, %l
   %idx.ext2 = zext i32 %add5 to i64
-  %add.ptr2 = getelementptr inbounds i8, i8* %tmp0, i64 %idx.ext2
+  %add.ptr2 = getelementptr inbounds i8, i8* %bc0, i64 %idx.ext2
   store i8 %tmp1, i8* %add.ptr2, align 1
   %inc2 = add nsw i32 %l, 1
   %cmp3 = icmp slt i32 %inc2, %size

From 09a4415803cd590ebb68a5141950366b077f13a7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 4 Jun 2019 17:44:18 +0000
Subject: [PATCH 1049/1176] [DAGCombiner][X86] Fold (not (neg X)) -> (add X,
 -1)

This is a special case of a more general transform (not (sub Y, X)) -> (add X, ~Y). InstCombine knows the general form. I've restricted to the special case to fix the motivating case PR42118. I tried handling any case where Y was constant, but got some changes on some Mips tests that I couldn't quickly prove where beneficial.

Fixes PR42118

Differential Revision: https://reviews.llvm.org/D62828

llvm-svn: 362533
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 +++++++
 llvm/test/CodeGen/X86/bmi.ll                  | 27 +++++++------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 33ef68c2f1ff2..b69936d462cd1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6789,6 +6789,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
     }
   }
+
+  // fold (not (neg x)) -> (add X, -1)
+  // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+  // Y is a constant or the subtract has a single use.
+  if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+      isNullConstant(N0.getOperand(0))) {
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+                       DAG.getAllOnesConstant(DL, VT));
+  }
+
   // fold (xor (and x, y), y) -> (and (not x), y)
   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
     SDValue X = N0.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index c48be66705f92..c2d13c2971062 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -1153,10 +1153,7 @@ declare void @bar()
 define void @pr42118_i32(i32 %x) {
 ; X86-LABEL: pr42118_i32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    andnl %eax, %ecx, %eax
+; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    jne .LBB48_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    jmp bar # TAILCALL
@@ -1165,9 +1162,7 @@ define void @pr42118_i32(i32 %x) {
 ;
 ; X64-LABEL: pr42118_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    andnl %edi, %eax, %eax
+; X64-NEXT:    blsrl %edi, %eax
 ; X64-NEXT:    jne .LBB48_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    jmp bar # TAILCALL
@@ -1192,13 +1187,13 @@ define void @pr42118_i64(i64 %x) {
 ; X86-NEXT:    .cfi_offset %esi, -8
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    sbbl %ecx, %edx
-; X86-NEXT:    andnl %ecx, %edx, %ecx
-; X86-NEXT:    andnl %eax, %esi, %eax
-; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    addl $-1, %edx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    orl %edx, %esi
 ; X86-NEXT:    jne .LBB49_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    popl %esi
@@ -1212,9 +1207,7 @@ define void @pr42118_i64(i64 %x) {
 ;
 ; X64-LABEL: pr42118_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    negq %rax
-; X64-NEXT:    andnq %rdi, %rax, %rax
+; X64-NEXT:    blsrq %rdi, %rax
 ; X64-NEXT:    jne .LBB49_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    jmp bar # TAILCALL

From 03ff1b3c30f13c66236ef0ce6013826c849e0005 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 4 Jun 2019 18:01:07 +0000
Subject: [PATCH 1050/1176] [X86] Fold single-use variable into assert. NFC.

Avoids an unused variable warning in Release builds.

llvm-svn: 362534
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a15e375382068..6bdd448f5b8de 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38133,8 +38133,8 @@ static SDValue combineLogicBlendIntoConditionalNegate(
     EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
     SelectionDAG &DAG, const X86Subtarget &Subtarget) {
   EVT MaskVT = Mask.getValueType();
-  unsigned EltBits = MaskVT.getScalarSizeInBits();
-  assert(MaskVT.isInteger() && DAG.ComputeNumSignBits(Mask) == EltBits &&
+  assert(MaskVT.isInteger() &&
+         DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
          "Mask must be zero/all-bits");
 
   if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)

From 137de380091a7a756c44227ea68dad9298a3ba2b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 4 Jun 2019 18:03:07 +0000
Subject: [PATCH 1051/1176] [X86] Mutate fceil/ffloor/ftrunc/fnearbyint/frint
 into X86ISD::RNDSCALE during PreProcessIselDAG to cut down on pattern
 permutations

We already need to have patterns for X86ISD::RNDSCALE to support software intrinsics. But we currently have 5 sets of patterns for the 5 rounding operations. For of these 6 patterns we have to support 3 vectors widths, 2 element sizes, sse/vex/evex encodings, load folding, and broadcast load folding. This results in a fair amount of bytes in the isel table.

This patch adds code to PreProcessIselDAG to morph the fceil/ffloor/ftrunc/fnearbyint/frint to X86ISD::RNDSCALE. This way we can remove everything, but the intrinsic pattern while still allowing the operations to be considered Legal for DAGCombine and Legalization. This shrinks the DAGISel by somewhere between 9K and 10K.

There is one complication to this, the STRICT versions of these nodes are currently mutated to their none strict equivalents at isel time when the node is visited. This won't be true in the future since that loses the chain ordering information. For now I've also added support for the non-STRICT nodes to Select so we can change the STRICT versions there after they've been mutated to their non-STRICT versions. We'll probably need a STRICT version of RNDSCALE or something to handle this in the future. Which will take us back to needing 2 sets of patterns for strict and non-strict, but that's still better than the 11 or 12 sets of patterns we'd need.

We can probably do something similar for scalar, but I haven't looked at it yet.

Differential Revision: https://reviews.llvm.org/D62757

llvm-svn: 362535
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 100 +++++++++---
 llvm/lib/Target/X86/X86InstrAVX512.td   | 203 ------------------------
 llvm/lib/Target/X86/X86InstrSSE.td      | 136 ----------------
 3 files changed, 82 insertions(+), 357 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6fffb56c980a3..89e1aea7b8a40 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -790,28 +790,60 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       continue;
     }
 
-    // Replace vector shifts with their X86 specific equivalent so we don't
-    // need 2 sets of patterns.
     switch (N->getOpcode()) {
     case ISD::SHL:
     case ISD::SRA:
-    case ISD::SRL:
-      if (N->getValueType(0).isVector()) {
-        unsigned NewOpc;
-        switch (N->getOpcode()) {
-        default: llvm_unreachable("Unexpected opcode!");
-        case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
-        case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
-        case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
-        }
-        SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
-                                      N->getOperand(0), N->getOperand(1));
-        --I;
-        CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
-        ++I;
-        CurDAG->DeleteNode(N);
-        continue;
+    case ISD::SRL: {
+      // Replace vector shifts with their X86 specific equivalent so we don't
+      // need 2 sets of patterns.
+      if (!N->getValueType(0).isVector())
+        break;
+
+      unsigned NewOpc;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
+      case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
+      case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
+      }
+      SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+                                    N->getOperand(0), N->getOperand(1));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+    case ISD::FCEIL:
+    case ISD::FFLOOR:
+    case ISD::FTRUNC:
+    case ISD::FNEARBYINT:
+    case ISD::FRINT: {
+      // Replace vector rounding with their X86 specific equivalent so we don't
+      // need 2 sets of patterns.
+      if (!N->getValueType(0).isVector())
+        break;
+
+      unsigned Imm;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected opcode!");
+      case ISD::FCEIL:      Imm = 0xA; break;
+      case ISD::FFLOOR:     Imm = 0x9; break;
+      case ISD::FTRUNC:     Imm = 0xB; break;
+      case ISD::FNEARBYINT: Imm = 0xC; break;
+      case ISD::FRINT:      Imm = 0x4; break;
       }
+      SDLoc dl(N);
+      SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+                                    N->getValueType(0),
+                                    N->getOperand(0),
+                                    CurDAG->getConstant(Imm, dl, MVT::i8));
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+      ++I;
+      CurDAG->DeleteNode(N);
+      continue;
+    }
     }
 
     if (OptLevel != CodeGenOpt::None &&
@@ -4672,6 +4704,38 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     if (foldLoadStoreIntoMemOperand(Node))
       return;
     break;
+  case ISD::FCEIL:
+  case ISD::FFLOOR:
+  case ISD::FTRUNC:
+  case ISD::FNEARBYINT:
+  case ISD::FRINT: {
+    // Replace vector rounding with their X86 specific equivalent so we don't
+    // need 2 sets of patterns.
+    // FIXME: This can only happen when the nodes started as STRICT_* and have
+    // been mutated into their non-STRICT equivalents. Eventually this
+    // mutation will be removed and we should switch the STRICT_ nodes to a
+    // strict version of RNDSCALE in PreProcessISelDAG.
+    if (!Node->getValueType(0).isVector())
+      break;
+
+    unsigned Imm;
+    switch (Node->getOpcode()) {
+    default: llvm_unreachable("Unexpected opcode!");
+    case ISD::FCEIL:      Imm = 0xA; break;
+    case ISD::FFLOOR:     Imm = 0x9; break;
+    case ISD::FTRUNC:     Imm = 0xB; break;
+    case ISD::FNEARBYINT: Imm = 0xC; break;
+    case ISD::FRINT:      Imm = 0x4; break;
+    }
+    SDLoc dl(Node);
+    SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+                                  Node->getValueType(0),
+                                  Node->getOperand(0),
+                                  CurDAG->getConstant(Imm, dl, MVT::i8));
+    ReplaceNode(Node, Res.getNode());
+    SelectCode(Res.getNode());
+    return;
+  }
   }
 
   SelectCode(Node);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index eebb6401db033..b230cbf611b2e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10694,209 +10694,6 @@ defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
-
-multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
-  // Register
-  def : Pat<(_.VT (ffloor _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc _.RC:$src)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
-             _.RC:$src, (i32 0xB))>;
-
-  // Merge-masking
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
-             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
-  // Zero-masking
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
-             _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
-  // Load
-  def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
-             addr:$src, (i32 0xB))>;
-
-  // Merge-masking + load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Zero-masking + load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Broadcast load
-  def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
-             addr:$src, (i32 0xB))>;
-
-  // Merge-masking + broadcast load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.RC:$dst)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
-             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
-  // Zero-masking + broadcast load
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
-  def : Pat<(_.VT (vselect _.KRCWM:$mask,
-                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
-                           _.ImmAllZerosV)),
-            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
-             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX512] in {
-  defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
-  defm : AVX512_rndscale_lowering<v8f64_info,  "PD">;
-}
-
-let Predicates = [HasVLX] in {
-  defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
-  defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
-  defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
-  defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
-}
-
 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
                                           X86FoldableSchedWrite sched,
                                           X86VectorVTInfo _,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 23aea3ea90817..2d4dc46d41b4c 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -5772,96 +5772,6 @@ let Predicates = [UseAVX, OptForSize] in {
             (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
 }
 
-let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v4f32 (ffloor VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
-            (VROUNDPSm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
-            (VROUNDPDm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v8f32 (ffloor VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0x9))>;
-  def : Pat<(v8f32 (fnearbyint VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xC))>;
-  def : Pat<(v8f32 (fceil VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xA))>;
-  def : Pat<(v8f32 (frint VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0x4))>;
-  def : Pat<(v8f32 (ftrunc VR256:$src)),
-            (VROUNDPSYr VR256:$src, (i32 0xB))>;
-
-  def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0x9))>;
-  def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xC))>;
-  def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xA))>;
-  def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0x4))>;
-  def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
-            (VROUNDPSYm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v4f64 (ffloor VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0x9))>;
-  def : Pat<(v4f64 (fnearbyint VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xC))>;
-  def : Pat<(v4f64 (fceil VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xA))>;
-  def : Pat<(v4f64 (frint VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0x4))>;
-  def : Pat<(v4f64 (ftrunc VR256:$src)),
-            (VROUNDPDYr VR256:$src, (i32 0xB))>;
-
-  def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
-            (VROUNDPDYm addr:$src, (i32 0xB))>;
-}
-
 let ExeDomain = SSEPackedSingle in
 defm ROUNDPS  : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
                                 memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
@@ -5923,52 +5833,6 @@ let Predicates = [UseSSE41, OptForSize] in {
             (ROUNDSDm addr:$src, (i32 0xB))>;
 }
 
-let Predicates = [UseSSE41] in {
-  def : Pat<(v4f32 (ffloor VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v4f32 (ffloor (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0x9))>;
-  def : Pat<(v4f32 (fnearbyint (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xC))>;
-  def : Pat<(v4f32 (fceil (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xA))>;
-  def : Pat<(v4f32 (frint (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0x4))>;
-  def : Pat<(v4f32 (ftrunc (memopv4f32 addr:$src))),
-            (ROUNDPSm addr:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0xB))>;
-
-  def : Pat<(v2f64 (ffloor (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0x9))>;
-  def : Pat<(v2f64 (fnearbyint (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xC))>;
-  def : Pat<(v2f64 (fceil (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xA))>;
-  def : Pat<(v2f64 (frint (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0x4))>;
-  def : Pat<(v2f64 (ftrunc (memopv2f64 addr:$src))),
-            (ROUNDPDm addr:$src, (i32 0xB))>;
-}
-
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//

From 878df1c2a9803e2b7965d52f5b21fb294dd18607 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 18:06:28 +0000
Subject: [PATCH 1052/1176] llvm-undname: Add test coverage for
 demangleInitFiniStub()

llvm-svn: 362536
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp   |  4 ++--
 llvm/test/Demangle/invalid-manglings.test | 10 ++++++++++
 llvm/test/Demangle/ms-operators.test      |  7 +++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 450c95ff8a100..c8f7502ce2c09 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -465,9 +465,9 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
   case SpecialIntrinsicKind::RttiBaseClassDescriptor:
     return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
   case SpecialIntrinsicKind::DynamicInitializer:
-    return demangleInitFiniStub(MangledName, false);
+    return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
   case SpecialIntrinsicKind::DynamicAtexitDestructor:
-    return demangleInitFiniStub(MangledName, true);
+    return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
   case SpecialIntrinsicKind::Typeof:
   case SpecialIntrinsicKind::UdtReturning:
     // It's unclear which tools produces these manglings, so demangling
diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 1cc192601a483..8887b2cd20c79 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -90,6 +90,16 @@
 ; CHECK-NEXT: ??__E?Foo@@0HA@@
 ; CHECK-NEXT: error: Invalid mangled name
 
+??__E?i@C@@0HA@
+; CHECK-EMPTY:
+; CHECK-NEXT: ??__E?i@C@@0HA@
+; CHECK-NEXT: error: Invalid mangled name
+
+??__E?Foo@@YAXXZ
+; CHECK-EMPTY:
+; CHECK-NEXT: ??__E?Foo@@YAXXZ
+; CHECK-NEXT: error: Invalid mangled name
+
 ??8@8
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ??8@8
diff --git a/llvm/test/Demangle/ms-operators.test b/llvm/test/Demangle/ms-operators.test
index 6a22ebda17145..71a3a1305b456 100644
--- a/llvm/test/Demangle/ms-operators.test
+++ b/llvm/test/Demangle/ms-operators.test
@@ -221,9 +221,16 @@
 ??_R4Base@@6B@
 ; CHECK: const Base::`RTTI Complete Object Locator'
 
+; Generated for `int Foo = f(4);` at global scope.
 ??__EFoo@@YAXXZ
 ; CHECK: void __cdecl `dynamic initializer for 'Foo''(void)
 
+; Generated for
+;   class C {  static int i; };
+;   int C::i = f(5);
+??__E?i@C@@0HA@@YAXXZ
+; CHECK: void __cdecl `dynamic initializer for `private: static int C::i''(void)
+
 ??__FFoo@@YAXXZ
 ; CHECK: void __cdecl `dynamic atexit destructor for 'Foo''(void)
 

From 7dcd73340ba51943b935064db038e526d4596e6e Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 4 Jun 2019 18:30:46 +0000
Subject: [PATCH 1053/1176] Factor out repeated code to build a DeclRefExpr and
 mark it referenced.

llvm-svn: 362537
---
 clang/include/clang/Sema/Sema.h | 18 +++++++++----
 clang/lib/Sema/SemaDeclCXX.cpp  |  4 +--
 clang/lib/Sema/SemaExpr.cpp     | 46 ++++++++++++++++-----------------
 clang/lib/Sema/SemaExprCXX.cpp  |  9 +++----
 clang/lib/Sema/SemaOverload.cpp | 30 ++++++---------------
 5 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ed41143161d57..74a1a28a39bbc 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4305,16 +4305,24 @@ class Sema {
                                         bool isAddressOfOperand,
                                 const TemplateArgumentListInfo *TemplateArgs);
 
-  ExprResult BuildDeclRefExpr(ValueDecl *D, QualType Ty,
-                              ExprValueKind VK,
-                              SourceLocation Loc,
-                              const CXXScopeSpec *SS = nullptr);
-  ExprResult
+  DeclRefExpr *BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
+                                SourceLocation Loc,
+                                const CXXScopeSpec *SS = nullptr);
+  DeclRefExpr *
   BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                    const DeclarationNameInfo &NameInfo,
                    const CXXScopeSpec *SS = nullptr,
                    NamedDecl *FoundD = nullptr,
+                   SourceLocation TemplateKWLoc = SourceLocation(),
+                   const TemplateArgumentListInfo *TemplateArgs = nullptr);
+  DeclRefExpr *
+  BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
+                   const DeclarationNameInfo &NameInfo,
+                   NestedNameSpecifierLoc NNS,
+                   NamedDecl *FoundD = nullptr,
+                   SourceLocation TemplateKWLoc = SourceLocation(),
                    const TemplateArgumentListInfo *TemplateArgs = nullptr);
+
   ExprResult
   BuildAnonymousStructUnionMemberReference(
       const CXXScopeSpec &SS,
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 0956aff21e174..354316a3ccdcb 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -11475,7 +11475,7 @@ class RefBuilder: public ExprBuilder {
 
 public:
   Expr *build(Sema &S, SourceLocation Loc) const override {
-    return assertNotNull(S.BuildDeclRefExpr(Var, VarType, VK_LValue, Loc).get());
+    return assertNotNull(S.BuildDeclRefExpr(Var, VarType, VK_LValue, Loc));
   }
 
   RefBuilder(VarDecl *Var, QualType VarType)
@@ -12877,7 +12877,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
 
   // Construct the body of the conversion function { return __invoke; }.
   Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
-                                       VK_LValue, Conv->getLocation()).get();
+                                       VK_LValue, Conv->getLocation());
   assert(FunctionRef && "Can't refer to __invoke function?");
   Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
   Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1e9164bc3ac56..7eb5bcbbbb0b3 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1762,7 +1762,7 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
   llvm_unreachable("unexpected literal operator lookup result");
 }
 
-ExprResult
+DeclRefExpr *
 Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                        SourceLocation Loc,
                        const CXXScopeSpec *SS) {
@@ -1770,36 +1770,33 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
   return BuildDeclRefExpr(D, Ty, VK, NameInfo, SS);
 }
 
+DeclRefExpr *
+Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
+                       const DeclarationNameInfo &NameInfo,
+                       const CXXScopeSpec *SS, NamedDecl *FoundD,
+                       SourceLocation TemplateKWLoc,
+                       const TemplateArgumentListInfo *TemplateArgs) {
+  NestedNameSpecifierLoc NNS =
+      SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc();
+  return BuildDeclRefExpr(D, Ty, VK, NameInfo, NNS, FoundD, TemplateKWLoc,
+                          TemplateArgs);
+}
+
 /// BuildDeclRefExpr - Build an expression that references a
 /// declaration that does not require a closure capture.
-ExprResult
+DeclRefExpr *
 Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
                        const DeclarationNameInfo &NameInfo,
-                       const CXXScopeSpec *SS, NamedDecl *FoundD,
+                       NestedNameSpecifierLoc NNS, NamedDecl *FoundD,
+                       SourceLocation TemplateKWLoc,
                        const TemplateArgumentListInfo *TemplateArgs) {
   bool RefersToCapturedVariable =
       isa<VarDecl>(D) &&
       NeedToCaptureVariable(cast<VarDecl>(D), NameInfo.getLoc());
 
-  DeclRefExpr *E;
-  if (isa<VarTemplateSpecializationDecl>(D)) {
-    VarTemplateSpecializationDecl *VarSpec =
-        cast<VarTemplateSpecializationDecl>(D);
-
-    E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context)
-                                        : NestedNameSpecifierLoc(),
-                            VarSpec->getTemplateKeywordLoc(), D,
-                            RefersToCapturedVariable, NameInfo.getLoc(), Ty, VK,
-                            FoundD, TemplateArgs);
-  } else {
-    assert(!TemplateArgs && "No template arguments for non-variable"
-                            " template specialization references");
-    E = DeclRefExpr::Create(Context, SS ? SS->getWithLocInContext(Context)
-                                        : NestedNameSpecifierLoc(),
-                            SourceLocation(), D, RefersToCapturedVariable,
-                            NameInfo, Ty, VK, FoundD);
-  }
-
+  DeclRefExpr *E = DeclRefExpr::Create(Context, NNS, TemplateKWLoc, D,
+                                       RefersToCapturedVariable, NameInfo, Ty,
+                                       VK, FoundD, TemplateArgs);
   MarkDeclRefReferenced(E);
 
   if (getLangOpts().ObjCWeak && isa<VarDecl>(D) &&
@@ -3141,6 +3138,7 @@ ExprResult Sema::BuildDeclarationNameExpr(
     }
 
     return BuildDeclRefExpr(VD, type, valueKind, NameInfo, &SS, FoundD,
+                            /*FIXME: TemplateKWLoc*/ SourceLocation(),
                             TemplateArgs);
   }
 }
@@ -5615,8 +5613,8 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
     }
   }
 
-  if (isa<DeclRefExpr>(NakedFn)) {
-    NDecl = cast<DeclRefExpr>(NakedFn)->getDecl();
+  if (auto *DRE = dyn_cast<DeclRefExpr>(NakedFn)) {
+    NDecl = DRE->getDecl();
 
     FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
     if (FDecl && FDecl->getBuiltinID()) {
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 5884cf906fd15..1eb6a7114ffa2 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -3633,12 +3633,9 @@ ExprResult Sema::CheckConditionVariable(VarDecl *ConditionVar,
                           diag::err_invalid_use_of_array_type)
                      << ConditionVar->getSourceRange());
 
-  ExprResult Condition = DeclRefExpr::Create(
-      Context, NestedNameSpecifierLoc(), SourceLocation(), ConditionVar,
-      /*enclosing*/ false, ConditionVar->getLocation(),
-      ConditionVar->getType().getNonReferenceType(), VK_LValue);
-
-  MarkDeclRefReferenced(cast<DeclRefExpr>(Condition.get()));
+  ExprResult Condition = BuildDeclRefExpr(
+      ConditionVar, ConditionVar->getType().getNonReferenceType(), VK_LValue,
+      ConditionVar->getLocation());
 
   switch (CK) {
   case ConditionKind::Boolean:
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index e5cbd1d0a81c0..30d809ac91b55 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13864,17 +13864,10 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
       TemplateArgs = &TemplateArgsBuffer;
     }
 
-    DeclRefExpr *DRE = DeclRefExpr::Create(Context,
-                                           ULE->getQualifierLoc(),
-                                           ULE->getTemplateKeywordLoc(),
-                                           Fn,
-                                           /*enclosing*/ false, // FIXME?
-                                           ULE->getNameLoc(),
-                                           Fn->getType(),
-                                           VK_LValue,
-                                           Found.getDecl(),
-                                           TemplateArgs);
-    MarkDeclRefReferenced(DRE);
+    DeclRefExpr *DRE =
+        BuildDeclRefExpr(Fn, Fn->getType(), VK_LValue, ULE->getNameInfo(),
+                         ULE->getQualifierLoc(), Found.getDecl(),
+                         ULE->getTemplateKeywordLoc(), TemplateArgs);
     DRE->setHadMultipleCandidates(ULE->getNumDecls() > 1);
     return DRE;
   }
@@ -13893,17 +13886,10 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
     // implicit member access, rewrite to a simple decl ref.
     if (MemExpr->isImplicitAccess()) {
       if (cast<CXXMethodDecl>(Fn)->isStatic()) {
-        DeclRefExpr *DRE = DeclRefExpr::Create(Context,
-                                               MemExpr->getQualifierLoc(),
-                                               MemExpr->getTemplateKeywordLoc(),
-                                               Fn,
-                                               /*enclosing*/ false,
-                                               MemExpr->getMemberLoc(),
-                                               Fn->getType(),
-                                               VK_LValue,
-                                               Found.getDecl(),
-                                               TemplateArgs);
-        MarkDeclRefReferenced(DRE);
+        DeclRefExpr *DRE = BuildDeclRefExpr(
+            Fn, Fn->getType(), VK_LValue, MemExpr->getNameInfo(),
+            MemExpr->getQualifierLoc(), Found.getDecl(),
+            MemExpr->getTemplateKeywordLoc(), TemplateArgs);
         DRE->setHadMultipleCandidates(MemExpr->getNumDecls() > 1);
         return DRE;
       } else {

From bfceed49cec1ec3f6b846974c96e49e00bd8c36f Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Tue, 4 Jun 2019 18:45:15 +0000
Subject: [PATCH 1054/1176] [Utils] Clean another duplicated util method.

Summary:
Following the cleanup in D48202, method foldBlockIntoPredecessor has the
same behavior. Replace its uses with MergeBlockIntoPredecessor.
Remove foldBlockIntoPredecessor.

Reviewers: chandlerc, dmgreen

Subscribers: jlebar, javed.absar, zzheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62751

llvm-svn: 362538
---
 .../llvm/Transforms/Utils/UnrollLoop.h        |  3 -
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp |  7 ++
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      | 64 +------------------
 .../lib/Transforms/Utils/LoopUnrollAndJam.cpp |  4 +-
 .../AArch64/loop-micro-op-buffer-size-t99.ll  |  2 +-
 5 files changed, 14 insertions(+), 66 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index fb75ccfa9f3ce..593ca26feb983 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -119,9 +119,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
                         TargetTransformInfo::UnrollingPreferences &UP,
                         bool &UseUpperBound);
 
-BasicBlock *foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
-                                     ScalarEvolution *SE, DominatorTree *DT);
-
 void remapInstruction(Instruction *I, ValueToValueMapTy &VMap);
 
 void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index aa7b933022c00..842434cca1574 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -38,6 +38,8 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
@@ -47,6 +49,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "basicblock-utils"
+
 void llvm::DetatchDeadBlocks(
     ArrayRef<BasicBlock *> BBs,
     SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@@ -190,6 +194,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
       if (IncValue == &PN)
         return false;
 
+  LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+                    << PredBB->getName() << "\n");
+
   // Begin by getting rid of unneeded PHIs.
   SmallVector<AssertingVH<Value>, 4> IncomingValues;
   if (isa<PHINode>(BB->front())) {
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index d8ac42b5a8500..d0908b594c931 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -93,66 +93,6 @@ void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
   }
 }
 
-/// Folds a basic block into its predecessor if it only has one predecessor, and
-/// that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
-                                           ScalarEvolution *SE,
-                                           DominatorTree *DT) {
-  // Merge basic blocks into their predecessor if there is only one distinct
-  // pred, and if there is only one distinct successor of the predecessor, and
-  // if there are no PHI nodes.
-  BasicBlock *OnlyPred = BB->getSinglePredecessor();
-  if (!OnlyPred) return nullptr;
-
-  if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
-    return nullptr;
-
-  LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
-                    << OnlyPred->getName() << "\n");
-
-  // Resolve any PHI nodes at the start of the block.  They are all
-  // guaranteed to have exactly one entry if they exist, unless there are
-  // multiple duplicate (but guaranteed to be equal) entries for the
-  // incoming edges.  This occurs when there are multiple edges from
-  // OnlyPred to OnlySucc.
-  FoldSingleEntryPHINodes(BB);
-
-  // Delete the unconditional branch from the predecessor...
-  OnlyPred->getInstList().pop_back();
-
-  // Make all PHI nodes that referred to BB now refer to Pred as their
-  // source...
-  BB->replaceAllUsesWith(OnlyPred);
-
-  // Move all definitions in the successor to the predecessor...
-  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
-  // OldName will be valid until erased.
-  StringRef OldName = BB->getName();
-
-  // Erase the old block and update dominator info.
-  if (DT)
-    if (DomTreeNode *DTN = DT->getNode(BB)) {
-      DomTreeNode *PredDTN = DT->getNode(OnlyPred);
-      SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
-      for (auto *DI : Children)
-        DT->changeImmediateDominator(DI, PredDTN);
-
-      DT->eraseNode(BB);
-    }
-
-  LI->removeBlock(BB);
-
-  // Inherit predecessor's name if it exists...
-  if (!OldName.empty() && !OnlyPred->hasName())
-    OnlyPred->setName(OldName);
-
-  BB->eraseFromParent();
-
-  return OnlyPred;
-}
-
 /// Check if unrolling created a situation where we need to insert phi nodes to
 /// preserve LCSSA form.
 /// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -818,12 +758,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   assert(!DT || !UnrollVerifyDomtree ||
       DT->verify(DominatorTree::VerificationLevel::Fast));
 
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   // Merge adjacent basic blocks, if possible.
   for (BasicBlock *Latch : Latches) {
     BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
     if (Term->isUnconditional()) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+      BasicBlock *Fold = Dest->getUniquePredecessor();
+      if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
         // Dest has been folded into Fold. Update our worklists accordingly.
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
         UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index a53ee9d268726..ff49d83f25c54 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -538,12 +538,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
   MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
   MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
   MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
   while (!MergeBlocks.empty()) {
     BasicBlock *BB = *MergeBlocks.begin();
     BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
     if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+      BasicBlock *Fold = Dest->getUniquePredecessor();
+      if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
         // Don't remove BB and add Fold as they are the same BB
         assert(Fold == BB);
         (void)Fold;
diff --git a/llvm/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll b/llvm/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll
index d64b51509e161..4dad5c33d202b 100644
--- a/llvm/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll
+++ b/llvm/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt -mcpu=thunderx2t99 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
+; RUN: opt -mcpu=thunderx2t99 -loop-unroll --debug-only=loop-unroll --debug-only=basicblock-utils -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
 
 target triple = "aarch64-unknown-linux-gnu"
 

From aed7227b71784fb1c1e8fc7a0a5ac65647cd463f Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Tue, 4 Jun 2019 18:48:43 +0000
Subject: [PATCH 1055/1176] Revert r362472 as it is breaking PPC build bots

The patch https://reviews.llvm.org/rL362472 broke PPC LNT buildbots.
Reverting it to bring the bots back to green.

llvm-svn: 362539
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 179 ----------
 llvm/test/CodeGen/PowerPC/store-combine.ll    | 315 +++++++++++++++---
 2 files changed, 266 insertions(+), 228 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b69936d462cd1..be1c10801655b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -524,7 +524,6 @@ namespace {
                               const SDLoc &DL);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     SDValue MatchLoadCombine(SDNode *N);
-    SDValue MatchStoreCombine(StoreSDNode *N);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
     SDValue splitMergedValStore(StoreSDNode *ST);
@@ -6276,180 +6275,6 @@ static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
   return BigEndian;
 }
 
-static SDValue stripTruncAndExt(SDValue Value) {
-  switch (Value.getOpcode()) {
-  case ISD::TRUNCATE:
-  case ISD::ZERO_EXTEND:
-  case ISD::SIGN_EXTEND:
-  case ISD::ANY_EXTEND:
-    return stripTruncAndExt(Value.getOperand(0));
-  }
-  return Value;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-///  i8 *p = ...
-///  i32 val = ...
-///  p[0] = (val >> 0) & 0xFF;
-///  p[1] = (val >> 8) & 0xFF;
-///  p[2] = (val >> 16) & 0xFF;
-///  p[3] = (val >> 24) & 0xFF;
-/// =>
-///  *((i32)p) = val;
-///
-///  i8 *p = ...
-///  i32 val = ...
-///  p[0] = (val >> 24) & 0xFF;
-///  p[1] = (val >> 16) & 0xFF;
-///  p[2] = (val >> 8) & 0xFF;
-///  p[3] = (val >> 0) & 0xFF;
-/// =>
-///  *((i32)p) = BSWAP(val);
-SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
-  // Collect all the stores in the chain.
-  SDValue Chain;
-  SmallVector<StoreSDNode *, 8> Stores;
-  for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
-    if (Store->getMemoryVT() != MVT::i8 ||
-        Store->isVolatile() || Store->isIndexed())
-      return SDValue();
-    Stores.push_back(Store);
-    Chain = Store->getChain();
-  }
-  // Handle the simple type only.
-  unsigned Width = Stores.size();
-  EVT VT = EVT::getIntegerVT(
-    *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
-  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
-    return SDValue();
-
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
-    return SDValue();
-
-  // Check if all the bytes of the combined value we are looking at are stored 
-  // to the same base address. Collect bytes offsets from Base address into 
-  // ByteOffsets. 
-  SDValue CombinedValue;
-  SmallVector<int64_t, 4> ByteOffsets(Width);
-  int64_t FirstOffset = INT64_MAX;
-  StoreSDNode *FirstStore = nullptr;
-  Optional<BaseIndexOffset> Base;
-  for (auto Store : Stores) {
-    // All the stores store different byte of the CombinedValue. A truncate is
-    // required to get that byte value.
-    SDValue Trunc = Store->getValue();
-    if (Trunc.getOpcode() != ISD::TRUNCATE)
-      return SDValue();
-    // A shift operation is required to get the right byte offset, except the
-    // first byte.
-    int64_t Offset = 0;
-    SDValue Value = Trunc.getOperand(0);
-    if (Value.getOpcode() == ISD::SRL ||
-        Value.getOpcode() == ISD::SRA) {
-      ConstantSDNode *ShiftOffset =
-        dyn_cast<ConstantSDNode>(Value.getOperand(1));
-      // Trying to match the following pattern. The shift offset must be 
-      // a constant and a multiple of 8. It is the byte offset in "y".
-      // 
-      // x = srl y, offset
-      // i8 z = trunc x 
-      // store z, ...
-      if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
-        return SDValue();
-  
-     Offset = ShiftOffset->getSExtValue()/8;
-     Value = Value.getOperand(0);
-    }
-
-    // Stores must share the same combined value with different offsets.
-    if (!CombinedValue)
-      CombinedValue = Value;
-    else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
-      return SDValue();
-
-    // The trunc and all the extend operation should be stripped to get the
-    // real value we are stored.
-    else if (CombinedValue.getValueType() != VT) {
-      if (Value.getValueType() == VT ||
-          Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
-        CombinedValue = Value;
-      // Give up if the combined value type is smaller than the store size.
-      if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
-        return SDValue();
-    }
-
-    // Stores must share the same base address
-    BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
-    int64_t ByteOffsetFromBase = 0;
-    if (!Base)
-      Base = Ptr;
-    else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
-      return SDValue();
-
-    // Remember the first byte store
-    if (ByteOffsetFromBase < FirstOffset) {
-      FirstStore = Store;
-      FirstOffset = ByteOffsetFromBase;
-    }
-    // Map the offset in the store and the offset in the combined value.
-    if (Offset < 0 || Offset >= Width)
-      return SDValue();
-    ByteOffsets[Offset] = ByteOffsetFromBase;
-  }
-
-  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
-  assert(FirstStore && "First store must be set");
-
-  // Check if the bytes of the combined value we are looking at match with 
-  // either big or little endian value store.
-  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
-  if (!IsBigEndian.hasValue())
-    return SDValue();
-
-  // The node we are looking at matches with the pattern, check if we can
-  // replace it with a single bswap if needed and store.
-
-  // If the store needs byte swap check if the target supports it
-  bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
-
-  // Before legalize we can introduce illegal bswaps which will be later
-  // converted to an explicit bswap sequence. This way we end up with a single
-  // store and byte shuffling instead of several stores and byte shuffling.
-  if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
-    return SDValue();
-
-  // Check that a store of the wide type is both allowed and fast on the target
-  bool Fast = false;
-  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, FirstStore->getAddressSpace(),
-                                        FirstStore->getAlignment(), &Fast);
-  if (!Allowed || !Fast)
-    return SDValue();
-
-  if (VT != CombinedValue.getValueType()) {
-    assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
-           "Get unexpected store value to combine");
-    CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
-                             CombinedValue);
-  }
-
-  if (NeedsBswap)
-    CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
-
-  SDValue NewStore =
-    DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
-                 FirstStore->getPointerInfo(), FirstStore->getAlignment());
-
-  // Rely on other DAG combine rules to remove the other individual stores.
-  DAG.ReplaceAllUsesWith(N, NewStore.getNode());
-  return NewStore;
-}
-
 /// Match a pattern where a wide type scalar value is loaded by several narrow
 /// loads and combined by shifts and ors. Fold it into a single load or a load
 /// and a BSWAP if the targets supports it.
@@ -15968,10 +15793,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (SDValue NewST = TransformFPLoadStorePair(N))
     return NewST;
 
-  // Try transforming several stores into STORE (BSWAP).
-  if (SDValue Store = MatchStoreCombine(ST))
-    return Store;
-
   if (ST->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes, on this store and any
     // adjacent stores.
diff --git a/llvm/test/CodeGen/PowerPC/store-combine.ll b/llvm/test/CodeGen/PowerPC/store-combine.ll
index 9315df130800c..77e02e76cde0a 100644
--- a/llvm/test/CodeGen/PowerPC/store-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/store-combine.ll
@@ -10,12 +10,24 @@
 define void @store_i32_by_i8(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stw 3, 0(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stwbrx 3, 0, 4 
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    stb 3, 3(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i32 %m to i8
@@ -43,12 +55,24 @@ entry:
 define void @store_i32_by_i8_bswap(i32 signext %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 0(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stw 3, 0(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64-NEXT:    srwi 6, 3, 16
+; CHECK-PPC64-NEXT:    stb 5, 0(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 6, 1(4)
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    stb 3, 3(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 24
@@ -80,12 +104,40 @@ entry:
 define void @store_i64_by_i8(i64 %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stdx 3, 0, 4
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stdbrx 3, 0, 4
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT:    stb 6, 2(4)
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT:    stb 5, 3(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT:    stb 6, 4(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    stb 5, 5(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT:    stb 5, 6(4)
+; CHECK-PPC64-NEXT:    stb 3, 7(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i64 %m to i8
@@ -133,12 +185,40 @@ entry:
 define void @store_i64_by_i8_bswap(i64 %m, i8* %p)  {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stdbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
+; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stdx 3, 0, 4
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT:    stb 5, 6(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT:    stb 6, 5(4)
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT:    stb 5, 4(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT:    stb 6, 3(4)
+; CHECK-PPC64-NEXT:    stb 3, 7(4)
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i64 %m to i8
@@ -187,18 +267,46 @@ entry:
 define void @store_i64_by_i8_bswap_uses(i32 signext %t, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i64_by_i8_bswap_uses:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    slwi [[REG:[0-9]+]], 3, 3
-; CHECK-PPC64LE-NEXT:    subf [[REG1:[0-9]+]], 3, [[REG]] 
-; CHECK-PPC64LE-NEXT:    extsw [[REG2:[0-9]+]], [[REG1]]
-; CHECK-PPC64LE-NEXT:    stdbrx [[REG2]], 0, 4
+; CHECK-PPC64LE-NEXT:    slwi 5, 3, 3
+; CHECK-PPC64LE-NEXT:    subf 3, 3, 5
+; CHECK-PPC64LE-NEXT:    extsw 3, 3
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 7(4)
+; CHECK-PPC64LE-NEXT:    stb 5, 6(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 48, 16
+; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 32, 32
+; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64LE-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i64_by_i8_bswap_uses:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    slwi [[REG:[0-9]+]], 3, 3
-; CHECK-PPC64-NEXT:    subf [[REG1:[0-9]+]], 3, [[REG]]
-; CHECK-PPC64-NEXT:    extsw [[REG2:[0-9]+]], [[REG1]]
-; CHECK-PPC64-NEXT:    stdx [[REG2]], 0, 4
+; CHECK-PPC64-NEXT:    slwi 5, 3, 3
+; CHECK-PPC64-NEXT:    subf 3, 3, 5
+; CHECK-PPC64-NEXT:    extsw 3, 3
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 56, 8
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 48, 16
+; CHECK-PPC64-NEXT:    stb 5, 6(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 40, 24
+; CHECK-PPC64-NEXT:    stb 6, 5(4)
+; CHECK-PPC64-NEXT:    rldicl 6, 3, 32, 32
+; CHECK-PPC64-NEXT:    stb 5, 4(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 24, 40
+; CHECK-PPC64-NEXT:    stb 6, 3(4)
+; CHECK-PPC64-NEXT:    stb 3, 7(4)
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    rldicl 5, 3, 16, 48
+; CHECK-PPC64-NEXT:    rldicl 3, 3, 8, 56
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %mul = mul nsw i32 %t, 7
@@ -248,11 +356,25 @@ entry:
 define void @store_i32_by_i8_bswap_volatile(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_volatile:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NOT:   stwbrx 
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_volatile:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NOT:   stw 
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, 3(4)
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -281,11 +403,29 @@ entry:
 define void @store_i32_by_i8_bswap_store_in_between(i32 signext %m, i8* %p, i8* %q) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_store_in_between:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NOT:   stwbrx 
+; CHECK-PPC64LE-NEXT:    srwi 6, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
+; CHECK-PPC64LE-NEXT:    stb 6, 2(4)
+; CHECK-PPC64LE-NEXT:    li 6, 3
+; CHECK-PPC64LE-NEXT:    stb 6, 0(5)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_store_in_between:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NOT:   stw 
+; CHECK-PPC64-NEXT:    li 6, 3
+; CHECK-PPC64-NEXT:    srwi 7, 3, 8
+; CHECK-PPC64-NEXT:    stb 7, 2(4)
+; CHECK-PPC64-NEXT:    stb 3, 3(4)
+; CHECK-PPC64-NEXT:    stb 6, 0(5)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -308,11 +448,25 @@ entry:
 define void @store_i32_by_i8_bswap_unrelated_store(i32 signext %m, i8* %p, i8* %q) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_unrelated_store:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NOT:   stwbrx 
+; CHECK-PPC64LE-NEXT:    srwi 6, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 3(4)
+; CHECK-PPC64LE-NEXT:    stb 6, 2(5)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_unrelated_store:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NOT:   stw 
+; CHECK-PPC64-NEXT:    srwi 6, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, 3(4)
+; CHECK-PPC64-NEXT:    stb 6, 2(5)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    blr
 entry:
   %conv = trunc i32 %m to i8
   %arrayidx = getelementptr inbounds i8, i8* %p, i64 3
@@ -339,13 +493,24 @@ entry:
 define void @store_i32_by_i8_bswap_nonzero_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_nonzero_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    addi [[REG1:[0-9]+]], 4, 1
-; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG1]] 
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_nonzero_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stw 3, 1(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, 4(4)
+; CHECK-PPC64-NEXT:    stb 5, 3(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 2(4)
+; CHECK-PPC64-NEXT:    stb 3, 1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 8
@@ -374,13 +539,24 @@ entry:
 define void @store_i32_by_i8_neg_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_neg_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    stw 3, -4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, -3(4)
+; CHECK-PPC64LE-NEXT:    stb 3, -4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_neg_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    addi [[REG1:[0-9]+]], 4, -4
-; CHECK-PPC64-NEXT:    stwbrx 3, 0, [[REG1]] 
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, -4(4)
+; CHECK-PPC64-NEXT:    stb 5, -3(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, -2(4)
+; CHECK-PPC64-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 8
@@ -409,13 +585,24 @@ entry:
 define void @store_i32_by_i8_bswap_neg_offset(i32 signext %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_neg_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    addi [[REG1:[0-9]+]], 4, -4
-; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG1]] 
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    stb 5, -3(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, -4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_neg_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    stw 3, -4(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 6, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, -3(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 6, -4(4)
+; CHECK-PPC64-NEXT:    stb 5, -2(4)
+; CHECK-PPC64-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 16
@@ -444,17 +631,28 @@ entry:
 define void @store_i32_by_i8_bswap_base_index_offset(i32 %m, i32 %i, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_base_index_offset:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64LE-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
-; CHECK-PPC64LE-NEXT:    addi [[REG3:[0-9]+]], [[REG2]], -4
-; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG3]] 
+; CHECK-PPC64LE-NEXT:    extsw 4, 4
+; CHECK-PPC64LE-NEXT:    srwi 6, 3, 16
+; CHECK-PPC64LE-NEXT:    add 4, 5, 4
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 6, -3(4)
+; CHECK-PPC64LE-NEXT:    stb 5, -4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, -2(4)
+; CHECK-PPC64LE-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_base_index_offset:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
-; CHECK-PPC64-NEXT:    stw 3, -4([[REG2]])
+; CHECK-PPC64-NEXT:    extsw 4, 4
+; CHECK-PPC64-NEXT:    srwi 6, 3, 16
+; CHECK-PPC64-NEXT:    add 4, 5, 4
+; CHECK-PPC64-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64-NEXT:    stb 6, -3(4)
+; CHECK-PPC64-NEXT:    srwi 6, 3, 8
+; CHECK-PPC64-NEXT:    stb 5, -4(4)
+; CHECK-PPC64-NEXT:    stb 6, -2(4)
+; CHECK-PPC64-NEXT:    stb 3, -1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i32 %m, 16
@@ -496,17 +694,28 @@ entry:
 define void @store_i32_by_i8_bswap_complicated(i32 %m, i32 %i, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_complicated:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64LE-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
-; CHECK-PPC64LE-NEXT:    addi [[REG3:[0-9]+]], [[REG2]], 3 
-; CHECK-PPC64LE-NEXT:    stwbrx 3, 0, [[REG3]] 
+; CHECK-PPC64LE-NEXT:    extsw 4, 4
+; CHECK-PPC64LE-NEXT:    add 4, 5, 4
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 3(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    stb 5, 4(4)
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, 5(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 6(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_complicated:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    extsw [[REG1:[0-9]+]], 4
-; CHECK-PPC64-NEXT:    add [[REG2:[0-9]+]], 5, [[REG1]] 
-; CHECK-PPC64-NEXT:    stw 3, 3([[REG2]])
+; CHECK-PPC64-NEXT:    extsw 4, 4
+; CHECK-PPC64-NEXT:    srwi 6, 3, 24
+; CHECK-PPC64-NEXT:    add 4, 5, 4
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    stb 6, 3(4)
+; CHECK-PPC64-NEXT:    stb 5, 4(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 5, 5(4)
+; CHECK-PPC64-NEXT:    stb 3, 6(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %idx.ext = sext i32 %i to i64
@@ -536,12 +745,16 @@ entry:
 define void @store_i16_by_i8_bswap(i16 %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_i16_by_i8_bswap:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    sthbrx 3, 0, 4
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 5, 0(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_i16_by_i8_bswap:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    sth 3, 0(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 5, 0(4)
+; CHECK-PPC64-NEXT:    stb 3, 1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %0 = lshr i16 %m, 8
@@ -558,12 +771,16 @@ entry:
 define void @store_16_by_i8(i16 %m, i8* %p) {
 ; CHECK-PPC64LE-LABEL: store_16_by_i8:
 ; CHECK-PPC64LE:       # %bb.0: # %entry
-; CHECK-PPC64LE-NEXT:    sth 3, 0(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 8
+; CHECK-PPC64LE-NEXT:    stb 3, 1(4)
 ; CHECK-PPC64LE-NEXT:    blr
 ;
 ; CHECK-PPC64-LABEL: store_16_by_i8:
 ; CHECK-PPC64:       # %bb.0: # %entry
-; CHECK-PPC64-NEXT:    sthbrx 3, 0, 4
+; CHECK-PPC64-NEXT:    srwi 5, 3, 8
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
 ; CHECK-PPC64-NEXT:    blr
 entry:
   %conv1 = trunc i16 %m to i8

From 463854846853c3e4e099d31c51edec18e00730c0 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 18:49:05 +0000
Subject: [PATCH 1056/1176] llvm-undname: More coverage-related cleanups

- The loop in demangleFunctionParameterList() only exits
  on Error, @, and Z. All 3 cases were handled, so the
  rest of the function is DEMANGLE_UNREACHABLE.

- The loop in demangleTemplateParameterList() always returns
  on Error, so there's no need to check for that in the loop
  header and after the loop.

- Add test cases for invalid function parameter manglings.

- Add a (redundant) test case for a simple template parameter
  list mangling.

- Add a test case pointing out that varargs functions aren't
  demangled correctly.

llvm-svn: 362540
---
 llvm/lib/Demangle/MicrosoftDemangle.cpp   | 20 +++++++++-----------
 llvm/test/Demangle/invalid-manglings.test | 10 ++++++++++
 llvm/test/Demangle/ms-basic.test          |  4 ++++
 llvm/test/Demangle/ms-templates.test      |  2 ++
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index c8f7502ce2c09..e28420c4a136f 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -2093,7 +2093,7 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
   return ATy;
 }
 
-// Reads a function or a template parameters.
+// Reads a function's parameters.
 NodeArrayNode *
 Demangler::demangleFunctionParameterList(StringView &MangledName) {
   // Empty parameter list.
@@ -2157,8 +2157,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
     return NA;
   }
 
-  Error = true;
-  return nullptr;
+  DEMANGLE_UNREACHABLE;
 }
 
 NodeArrayNode *
@@ -2167,7 +2166,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
   NodeList **Current = &Head;
   size_t Count = 0;
 
-  while (!Error && !MangledName.startsWith('@')) {
+  while (!MangledName.startsWith('@')) {
     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
         MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
       // parameter pack separator
@@ -2278,15 +2277,14 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
     Current = &TP.Next;
   }
 
-  if (Error)
-    return nullptr;
+  // The loop above returns nullptr on Error.
+  assert(!Error);
 
   // Template parameter lists cannot be variadic, so it can only be terminated
-  // by @.
-  if (MangledName.consumeFront('@'))
-    return nodeListToNodeArray(Arena, Head, Count);
-  Error = true;
-  return nullptr;
+  // by @ (as opposed to 'Z' in the function parameter case).
+  assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
+  MangledName.consumeFront('@');
+  return nodeListToNodeArray(Arena, Head, Count);
 }
 
 void Demangler::dumpBackReferences() {
diff --git a/llvm/test/Demangle/invalid-manglings.test b/llvm/test/Demangle/invalid-manglings.test
index 8887b2cd20c79..543c116916e6d 100644
--- a/llvm/test/Demangle/invalid-manglings.test
+++ b/llvm/test/Demangle/invalid-manglings.test
@@ -100,6 +100,16 @@
 ; CHECK-NEXT: ??__E?Foo@@YAXXZ
 ; CHECK-NEXT: error: Invalid mangled name
 
+?foo@@YAH0@Z
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@YAH0@Z
+; CHECK-NEXT: error: Invalid mangled name
+
+?foo@@YAHH
+; CHECK-EMPTY:
+; CHECK-NEXT: ?foo@@YAHH
+; CHECK-NEXT: error: Invalid mangled name
+
 ??8@8
 ; CHECK-EMPTY:
 ; CHECK-NEXT: ??8@8
diff --git a/llvm/test/Demangle/ms-basic.test b/llvm/test/Demangle/ms-basic.test
index 844602bfe4bc9..8ba34d871f163 100644
--- a/llvm/test/Demangle/ms-basic.test
+++ b/llvm/test/Demangle/ms-basic.test
@@ -38,6 +38,10 @@
 ?x@@YAXMH@Z
 ; CHECK: void __cdecl x(float, int)
 
+?x@@YAXMHZZ
+; FIXME: This should be `(float, int, ...)`
+; CHECK: void __cdecl x(float, int)
+
 ?x@@3P6AHMNH@ZEA
 ; CHECK: int (__cdecl *x)(float, double, int)
 
diff --git a/llvm/test/Demangle/ms-templates.test b/llvm/test/Demangle/ms-templates.test
index 91e85f412ef54..e6ce1928b3012 100644
--- a/llvm/test/Demangle/ms-templates.test
+++ b/llvm/test/Demangle/ms-templates.test
@@ -4,6 +4,8 @@
 
 ; CHECK-NOT: Invalid mangled name
 
+?f@@3V?$C@H@@A
+; CHECK: class C<int> f
 
 ??0?$Class@VTypename@@@@QAE@XZ
 ; CHECK: __thiscall Class<class Typename>::Class<class Typename>(void)

From 1dce82636c9241b8208abe05ba02e499424338d5 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 19:10:08 +0000
Subject: [PATCH 1057/1176] llvm-undname: Correctly demangle vararg parameters

FunctionSignatureNode already had an IsVariadic field,
but it wasn't used anywhere yet. Set it and use it.

llvm-svn: 362541
---
 llvm/include/llvm/Demangle/MicrosoftDemangle.h      | 3 ++-
 llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 2 +-
 llvm/lib/Demangle/MicrosoftDemangle.cpp             | 9 ++++-----
 llvm/lib/Demangle/MicrosoftDemangleNodes.cpp        | 6 ++++++
 llvm/test/Demangle/ms-basic.test                    | 6 ++++--
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index 423fc2eac8588..382e79401c437 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -177,8 +177,9 @@ class Demangler {
 
   ArrayTypeNode *demangleArrayType(StringView &MangledName);
 
+  NodeArrayNode *demangleFunctionParameterList(StringView &MangledName,
+                                               bool &IsVariadic);
   NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
-  NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
 
   std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
   uint64_t demangleUnsigned(StringView &MangledName);
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index df384e7362a72..da9d9d5bfdc0c 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -344,7 +344,7 @@ struct FunctionSignatureNode : public TypeNode {
   // Function parameters
   NodeArrayNode *Params = nullptr;
 
-  // True if the function type is noexcept
+  // True if the function type is noexcept.
   bool IsNoexcept = false;
 };
 
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index e28420c4a136f..bf7d77638f34e 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -1826,7 +1826,7 @@ FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
   if (!IsStructor)
     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
 
-  FTy->Params = demangleFunctionParameterList(MangledName);
+  FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
 
   FTy->IsNoexcept = demangleThrowSpecification(MangledName);
 
@@ -2094,8 +2094,8 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
 }
 
 // Reads a function's parameters.
-NodeArrayNode *
-Demangler::demangleFunctionParameterList(StringView &MangledName) {
+NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
+                                                        bool &IsVariadic) {
   // Empty parameter list.
   if (MangledName.consumeFront('X'))
     return nullptr;
@@ -2152,8 +2152,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
     return NA;
 
   if (MangledName.consumeFront('Z')) {
-    // This is a variadic parameter list.  We probably need a variadic node to
-    // append to the end.
+    IsVariadic = true;
     return NA;
   }
 
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index c07fde897e0d9..63ca475ec1fed 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -414,6 +414,12 @@ void FunctionSignatureNode::outputPost(OutputStream &OS,
       Params->output(OS, Flags);
     else
       OS << "void";
+
+    if (IsVariadic) {
+      if (OS.back() != '(')
+        OS << ", ";
+      OS << "...";
+    }
     OS << ")";
   }
 
diff --git a/llvm/test/Demangle/ms-basic.test b/llvm/test/Demangle/ms-basic.test
index 8ba34d871f163..bc514b4e0c607 100644
--- a/llvm/test/Demangle/ms-basic.test
+++ b/llvm/test/Demangle/ms-basic.test
@@ -39,8 +39,10 @@
 ; CHECK: void __cdecl x(float, int)
 
 ?x@@YAXMHZZ
-; FIXME: This should be `(float, int, ...)`
-; CHECK: void __cdecl x(float, int)
+; CHECK: void __cdecl x(float, int, ...)
+
+?x@@YAXZZ
+; CHECK: void __cdecl x(...)
 
 ?x@@3P6AHMNH@ZEA
 ; CHECK: int (__cdecl *x)(float, double, int)

From 2e207d4d76dfc9c3b9e1d039df4f27bd9ca30076 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Tue, 4 Jun 2019 19:18:40 +0000
Subject: [PATCH 1058/1176] Fixed GWP-ASan build breakage. When adding the
 optional flag parser, there was a missing dependency on compiler-rt (and thus
 SanitizerCommon) for this feature.

llvm-svn: 362542
---
 compiler-rt/cmake/config-ix.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 67aec6f1f1747..49a22a9222737 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -687,7 +687,8 @@ endif()
 # Note: Fuchsia and Windows are not currently supported by GWP-ASan. Support
 # is planned for these platforms. Darwin is also not supported due to TLS
 # calling malloc on first use.
-if (GWP_ASAN_SUPPORTED_ARCH AND OS_NAME MATCHES "Android|Linux")
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND GWP_ASAN_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Android|Linux")
   set(COMPILER_RT_HAS_GWP_ASAN TRUE)
 else()
   set(COMPILER_RT_HAS_GWP_ASAN FALSE)

From a03e2b25abfac72d2415edfef8a81ed127a95ca4 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Tue, 4 Jun 2019 19:29:59 +0000
Subject: [PATCH 1059/1176] [ABI] Fix SystemV ABI to handle nested aggregate
 type returned in register

Add a function to flatten the nested aggregate type

Differential Revision: https://reviews.llvm.org/D62702

Patch by Wanyi Ye <kusmour@gmail.com>

llvm-svn: 362543
---
 lldb/include/lldb/Symbol/ClangASTContext.h    |   2 +
 lldb/include/lldb/Symbol/TypeSystem.h         |   2 +
 .../functionalities/return-value/Makefile     |   2 +-
 .../return-value/TestReturnValue.py           |  39 +++-
 .../{call-func.c => call-func.cpp}            | 200 ++++++++++++++++++
 .../lang/cpp/trivial_abi/TestTrivialABI.py    |   3 +-
 .../ABI/SysV-x86_64/ABISysV_x86_64.cpp        | 136 +++++++-----
 lldb/source/Symbol/ClangASTContext.cpp        |   8 +
 8 files changed, 338 insertions(+), 54 deletions(-)
 rename lldb/packages/Python/lldbsuite/test/functionalities/return-value/{call-func.c => call-func.cpp} (60%)

diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h
index 677359cecf9fe..dda9f4347f0cd 100644
--- a/lldb/include/lldb/Symbol/ClangASTContext.h
+++ b/lldb/include/lldb/Symbol/ClangASTContext.h
@@ -598,6 +598,8 @@ class ClangASTContext : public TypeSystem {
 
   bool IsVoidType(lldb::opaque_compiler_type_t type) override;
 
+  bool CanPassInRegisters(const CompilerType &type) override;
+
   bool SupportsLanguage(lldb::LanguageType language) override;
 
   static bool GetCXXClassName(const CompilerType &type,
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index 39226eb9712f6..4bef2a4446ebd 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -181,6 +181,8 @@ class TypeSystem : public PluginInterface {
 
   virtual bool IsVoidType(lldb::opaque_compiler_type_t type) = 0;
 
+  virtual bool CanPassInRegisters(const CompilerType &type) = 0;
+
   // TypeSystems can support more than one language
   virtual bool SupportsLanguage(lldb::LanguageType language) = 0;
 
diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/Makefile
index cb03eabfc2744..fd4e308634654 100644
--- a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/Makefile
@@ -1,5 +1,5 @@
 LEVEL = ../../make
 
-C_SOURCES := call-func.c
+CXX_SOURCES := call-func.cpp
 
 include $(LEVEL)/Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py
index 929bd4a735112..6f8575c72b597 100644
--- a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py
+++ b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/TestReturnValue.py
@@ -57,7 +57,7 @@ def test_with_python(self):
 
         frame = thread.GetFrameAtIndex(0)
         fun_name = frame.GetFunctionName()
-        self.assertTrue(fun_name == "outer_sint")
+        self.assertTrue(fun_name == "outer_sint(int)")
 
         return_value = thread.GetStopReturnValue()
         self.assertTrue(return_value.IsValid())
@@ -78,7 +78,7 @@ def test_with_python(self):
 
         frame = thread.GetFrameAtIndex(1)
         fun_name = frame.GetFunctionName()
-        self.assertTrue(fun_name == "outer_sint")
+        self.assertTrue(fun_name == "outer_sint(int)")
         in_int = frame.FindVariable("value").GetValueAsSigned(error)
         self.assertTrue(error.Success())
 
@@ -98,7 +98,7 @@ def test_with_python(self):
 
         # Now try some simple returns that have different types:
         inner_float_bkpt = self.target.BreakpointCreateByName(
-            "inner_float", exe)
+            "inner_float(float)", exe)
         self.assertTrue(inner_float_bkpt, VALID_BREAKPOINT)
         self.process.Continue()
         thread_list = lldbutil.get_threads_stopped_at_breakpoint(
@@ -118,7 +118,7 @@ def test_with_python(self):
 
         frame = thread.GetFrameAtIndex(0)
         fun_name = frame.GetFunctionName()
-        self.assertTrue(fun_name == "outer_float")
+        self.assertTrue(fun_name == "outer_float(float)")
 
         #return_value = thread.GetStopReturnValue()
         #self.assertTrue(return_value.IsValid())
@@ -190,6 +190,37 @@ def test_vector_values(self):
         self.return_and_test_struct_value("return_ext_vector_size_float32_4")
         self.return_and_test_struct_value("return_ext_vector_size_float32_8")
 
+    # limit the nested struct and class tests to only x86_64
+    @skipIf(archs=no_match(['x86_64']))
+    @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24778")
+    def test_for_cpp_support(self):
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+        (self.target, self.process, thread, inner_sint_bkpt) = lldbutil.run_to_name_breakpoint(self, "inner_sint", exe_name = exe)
+
+        error = lldb.SBError()
+
+        self.target = self.dbg.CreateTarget(exe)
+        self.assertTrue(self.target, VALID_TARGET)
+
+        main_bktp = self.target.BreakpointCreateByName("main", exe)
+        self.assertTrue(main_bktp, VALID_BREAKPOINT)
+
+        self.process = self.target.LaunchSimple(
+            None, None, self.get_process_working_directory())
+        self.assertEqual(len(lldbutil.get_threads_stopped_at_breakpoint(
+            self.process, main_bktp)), 1)
+        # nested struct tests
+        self.return_and_test_struct_value("return_nested_one_float_three_base")
+        self.return_and_test_struct_value("return_double_nested_one_float_one_nested")
+        self.return_and_test_struct_value("return_nested_float_struct")
+        # class test
+        self.return_and_test_struct_value("return_base_class_one_char")
+        self.return_and_test_struct_value("return_nested_class_float_and_base")
+        self.return_and_test_struct_value("return_double_nested_class_float_and_nested")
+        self.return_and_test_struct_value("return_base_class")
+        self.return_and_test_struct_value("return_derived_class")
+
     def return_and_test_struct_value(self, func_name):
         """Pass in the name of the function to return from - takes in value, returns value."""
 
diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.c b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.cpp
similarity index 60%
rename from lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.c
rename to lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.cpp
index 0c026ffcca173..c538e8479a9b1 100644
--- a/lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.c
+++ b/lldb/packages/Python/lldbsuite/test/functionalities/return-value/call-func.cpp
@@ -301,6 +301,69 @@ return_one_int_one_pointer (struct one_int_one_pointer value)
   return value;
 }
 
+struct base_one_char {
+  char c;
+};
+
+struct nested_one_float_three_base {
+  float f;
+  struct base_one_char b1;
+  struct base_one_char b2;
+  struct base_one_char b3;
+}; // returned in RAX for both SysV and Windows
+
+struct nested_one_float_three_base
+return_nested_one_float_three_base (struct nested_one_float_three_base value)
+{
+  return value;
+}
+
+struct double_nested_one_float_one_nested {
+  float f;
+  struct nested_one_float_three_base ns;
+}; // SysV-ABI: returned in XMM0 + RAX
+// Windows-ABI: returned in memory
+
+struct double_nested_one_float_one_nested
+return_double_nested_one_float_one_nested(struct double_nested_one_float_one_nested value)
+{
+  return value;
+}
+
+struct base_float_struct {
+  float f1;
+  float f2;
+};
+
+struct nested_float_struct {
+  double d;
+  struct base_float_struct bfs;
+}; // SysV-ABI: return in xmm0 + xmm1
+// Windows-ABI: returned in memory
+
+struct nested_float_struct
+return_nested_float_struct (struct nested_float_struct value)
+{
+  return value;
+}
+
+struct six_double_three_int {
+  double d1;  // 8
+  double d2;  // 8
+  int i1;   // 4
+  double d3;  // 8
+  double d4;  // 8
+  int i2;   // 4
+  double d5;  // 8
+  double d6;  // 8
+  int i3;   // 4
+}; // returned in memeory on both SysV and Windows
+
+struct six_double_three_int
+return_six_double_three_int (struct six_double_three_int value) {
+  return value;
+}
+
 typedef float vector_size_float32_8 __attribute__((__vector_size__(8)));
 typedef float vector_size_float32_16 __attribute__((__vector_size__(16)));
 typedef float vector_size_float32_32 __attribute__((__vector_size__(32)));
@@ -345,6 +408,100 @@ return_ext_vector_size_float32_8 (ext_vector_size_float32_8 value)
     return value;
 }
 
+class base_class_one_char {
+public:
+  char c = '!';
+}; // returned in RAX for both ABI
+
+base_class_one_char
+return_base_class_one_char(base_class_one_char value) {
+  return value;
+}
+
+class nested_class_float_and_base {
+public:
+  float f = 0.1;
+  base_class_one_char b;
+}; // returned in RAX for both ABI
+
+nested_class_float_and_base
+return_nested_class_float_and_base(nested_class_float_and_base value) {
+  return value;
+}
+
+class double_nested_class_float_and_nested {
+public:
+  float f = 0.2;
+  nested_class_float_and_base n;
+}; // SysV-ABI: returned in XMM0 + RAX
+// Windows-ABI: returned in memory
+
+double_nested_class_float_and_nested
+return_double_nested_class_float_and_nested(
+    double_nested_class_float_and_nested value) {
+  return value;
+}
+
+class base_class {
+public:
+  base_class() {
+    c = 'a';
+    c2 = 'b';
+  }
+private:
+  char c;
+protected:
+  char c2;
+}; // returned in RAX for both ABI
+
+base_class
+return_base_class(base_class value) {
+  return value;
+}
+
+class sub_class : base_class {
+public:
+  sub_class() {
+    c2 = '&';
+    i = 10;
+  }
+private:
+  int i;
+}; // size 8; should be returned in RAX
+// Since it's in register, lldb won't be able to get the
+// fields in base class, expected to fail.
+
+sub_class
+return_sub_class(sub_class value) {
+  return value;
+}
+
+class abstract_class {
+public:
+  virtual char getChar() = 0;
+private:
+  int i = 8;
+protected:
+  char c = '!';
+};
+
+class derived_class : abstract_class {
+public:
+  derived_class() {
+    c = '?';
+  }
+  char getChar() {
+    return this->c;
+  }
+private:
+  char c2 = '$';
+}; // size: 16; contains non POD member, returned in memory
+
+derived_class
+return_derived_class(derived_class value) {
+  return value;
+}
+
 int 
 main ()
 {
@@ -395,6 +552,49 @@ main ()
   return_one_int_one_double_packed ((struct one_int_one_double_packed) {10, 20.0});
   return_one_int_one_long ((struct one_int_one_long) {10, 20});
 
+  return_nested_one_float_three_base((struct nested_one_float_three_base) {
+                                        10.0,
+                                        (struct base_one_char) {
+                                          'x'
+                                        },
+                                        (struct base_one_char) {
+                                          'y'
+                                        },
+                                        (struct base_one_char) {
+                                          'z'
+                                        }
+                                      });
+  return_double_nested_one_float_one_nested((struct double_nested_one_float_one_nested) {
+                                              10.0,
+                                              (struct nested_one_float_three_base) {
+                                                20.0,
+                                                (struct base_one_char) {
+                                                  'x'
+                                                },
+                                                (struct base_one_char) {
+                                                  'y'
+                                                },
+                                                (struct base_one_char) {
+                                                  'z'
+                                                }
+                                              }});
+  return_nested_float_struct((struct nested_float_struct) {
+                                10.0,
+                                (struct base_float_struct) {
+                                  20.0,
+                                  30.0
+                                }});
+  return_six_double_three_int((struct six_double_three_int) {
+                                10.0, 20.0, 1, 30.0, 40.0, 2, 50.0, 60.0, 3});
+
+  return_base_class_one_char(base_class_one_char());
+  return_nested_class_float_and_base(nested_class_float_and_base());
+  return_double_nested_class_float_and_nested(double_nested_class_float_and_nested());
+  return_base_class(base_class());
+  // this is expected to fail
+  return_sub_class(sub_class());
+  return_derived_class(derived_class());
+
   return_vector_size_float32_8 (( vector_size_float32_8 ){1.5, 2.25});
   return_vector_size_float32_16 (( vector_size_float32_16 ){1.5, 2.25, 4.125, 8.0625});
   return_vector_size_float32_32 (( vector_size_float32_32 ){1.5, 2.25, 4.125, 8.0625, 7.89, 8.52, 6.31, 9.12});
diff --git a/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py b/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py
index 11263abeea3c6..9c69da2ef1200 100644
--- a/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py
+++ b/lldb/packages/Python/lldbsuite/test/lang/cpp/trivial_abi/TestTrivialABI.py
@@ -28,7 +28,8 @@ def test_call_trivial(self):
         self.expr_test(True)
 
     @skipUnlessSupportedTypeAttribute("trivial_abi")
-    @expectedFailureAll(bugnumber="llvm.org/pr36870")
+    # fixed for SysV-x86_64 ABI, but not Windows-x86_64
+    @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr36870")
     def test_call_nontrivial(self):
         """Test that we can print a variable & call a function on the same class w/o the trivial ABI marker."""
         self.build()
diff --git a/lldb/source/Plugins/ABI/SysV-x86_64/ABISysV_x86_64.cpp b/lldb/source/Plugins/ABI/SysV-x86_64/ABISysV_x86_64.cpp
index 69d4d403a461a..75eb5490bd453 100644
--- a/lldb/source/Plugins/ABI/SysV-x86_64/ABISysV_x86_64.cpp
+++ b/lldb/source/Plugins/ABI/SysV-x86_64/ABISysV_x86_64.cpp
@@ -30,6 +30,8 @@
 #include "lldb/Utility/RegisterValue.h"
 #include "lldb/Utility/Status.h"
 
+#include <vector>
+
 using namespace lldb;
 using namespace lldb_private;
 
@@ -1558,6 +1560,55 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectSimple(
   return return_valobj_sp;
 }
 
+// The compiler will flatten the nested aggregate type into single
+// layer and push the value to stack
+// This helper function will flatten an aggregate type
+// and return true if it can be returned in register(s) by value
+// return false if the aggregate is in memory
+static bool FlattenAggregateType(
+    Thread &thread, ExecutionContext &exe_ctx,
+    CompilerType &return_compiler_type,
+    uint32_t data_byte_offset,
+    std::vector<uint32_t> &aggregate_field_offsets,
+    std::vector<CompilerType> &aggregate_compiler_types) {
+
+  const uint32_t num_children = return_compiler_type.GetNumFields();
+  for (uint32_t idx = 0; idx < num_children; ++idx) {
+    std::string name;
+    bool is_signed;
+    uint32_t count;
+    bool is_complex;
+
+    uint64_t field_bit_offset = 0;
+    CompilerType field_compiler_type = return_compiler_type.GetFieldAtIndex(
+        idx, name, &field_bit_offset, nullptr, nullptr);
+    llvm::Optional<uint64_t> field_bit_width =
+          field_compiler_type.GetBitSize(&thread);
+
+    // if we don't know the size of the field (e.g. invalid type), exit
+    if (!field_bit_width || *field_bit_width == 0) {
+      return false;
+    }
+
+    uint32_t field_byte_offset = field_bit_offset / 8 + data_byte_offset;
+
+    const uint32_t field_type_flags = field_compiler_type.GetTypeInfo();
+    if (field_compiler_type.IsIntegerOrEnumerationType(is_signed) ||
+        field_compiler_type.IsPointerType() ||
+        field_compiler_type.IsFloatingPointType(count, is_complex)) {
+      aggregate_field_offsets.push_back(field_byte_offset);
+      aggregate_compiler_types.push_back(field_compiler_type);
+    } else if (field_type_flags & eTypeHasChildren) {
+      if (!FlattenAggregateType(thread, exe_ctx, field_compiler_type,
+                                field_byte_offset, aggregate_field_offsets,
+                                aggregate_compiler_types)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
     Thread &thread, CompilerType &return_compiler_type) const {
   ValueObjectSP return_valobj_sp;
@@ -1580,10 +1631,17 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
   if (return_compiler_type.IsAggregateType()) {
     Target *target = exe_ctx.GetTargetPtr();
     bool is_memory = true;
-    if (*bit_width <= 128) {
-      ByteOrder target_byte_order = target->GetArchitecture().GetByteOrder();
+    std::vector<uint32_t> aggregate_field_offsets;
+    std::vector<CompilerType> aggregate_compiler_types;
+    if (return_compiler_type.GetTypeSystem()->CanPassInRegisters(
+          return_compiler_type) &&
+      *bit_width <= 128 &&
+      FlattenAggregateType(thread, exe_ctx, return_compiler_type,
+                          0, aggregate_field_offsets,
+                          aggregate_compiler_types)) {
+      ByteOrder byte_order = target->GetArchitecture().GetByteOrder();
       DataBufferSP data_sp(new DataBufferHeap(16, 0));
-      DataExtractor return_ext(data_sp, target_byte_order,
+      DataExtractor return_ext(data_sp, byte_order,
                                target->GetArchitecture().GetAddressByteSize());
 
       const RegisterInfo *rax_info =
@@ -1613,36 +1671,27 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
       uint32_t integer_bytes =
           0; // Tracks how much of the rax/rds registers we've consumed so far
 
-      const uint32_t num_children = return_compiler_type.GetNumFields();
+      // in case of the returned type is a subclass of non-abstract-base class
+      // it will have a padding to skip the base content
+      if (aggregate_field_offsets.size()) {
+        fp_bytes = aggregate_field_offsets[0];
+        integer_bytes = aggregate_field_offsets[0];
+      }
+
+      const uint32_t num_children = aggregate_compiler_types.size();
 
       // Since we are in the small struct regime, assume we are not in memory.
       is_memory = false;
-
       for (uint32_t idx = 0; idx < num_children; idx++) {
-        std::string name;
-        uint64_t field_bit_offset = 0;
         bool is_signed;
-        bool is_complex;
         uint32_t count;
+        bool is_complex;
 
-        CompilerType field_compiler_type = return_compiler_type.GetFieldAtIndex(
-            idx, name, &field_bit_offset, nullptr, nullptr);
-        llvm::Optional<uint64_t> field_bit_width =
-            field_compiler_type.GetBitSize(&thread);
-
-        // if we don't know the size of the field (e.g. invalid type), just
-        // bail out
-        if (!field_bit_width || *field_bit_width == 0)
-          break;
+        CompilerType field_compiler_type = aggregate_compiler_types[idx];
+        uint32_t field_byte_width = (uint32_t) (*field_compiler_type.GetByteSize(&thread));
+        uint32_t field_byte_offset = aggregate_field_offsets[idx];
 
-        // If there are any unaligned fields, this is stored in memory.
-        if (field_bit_offset % *field_bit_width != 0) {
-          is_memory = true;
-          break;
-        }
-
-        uint32_t field_byte_width = *field_bit_width / 8;
-        uint32_t field_byte_offset = field_bit_offset / 8;
+        uint32_t field_bit_width = field_byte_width * 8;
 
         DataExtractor *copy_from_extractor = nullptr;
         uint32_t copy_from_offset = 0;
@@ -1674,10 +1723,10 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
           }
         } else if (field_compiler_type.IsFloatingPointType(count, is_complex)) {
           // Structs with long doubles are always passed in memory.
-          if (*field_bit_width == 128) {
+          if (field_bit_width == 128) {
             is_memory = true;
             break;
-          } else if (*field_bit_width == 64) {
+          } else if (field_bit_width == 64) {
             // These have to be in a single xmm register.
             if (fp_bytes == 0)
               copy_from_extractor = &xmm0_data;
@@ -1686,7 +1735,7 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
 
             copy_from_offset = 0;
             fp_bytes += field_byte_width;
-          } else if (*field_bit_width == 32) {
+          } else if (field_bit_width == 32) {
             // This one is kind of complicated.  If we are in an "eightbyte"
             // with another float, we'll be stuffed into an xmm register with
             // it.  If we are in an "eightbyte" with one or more ints, then we
@@ -1695,18 +1744,15 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
             if (field_byte_offset % 8 == 0) {
               // We are at the beginning of one of the eightbytes, so check the
               // next element (if any)
-              if (idx == num_children - 1)
+              if (idx == num_children - 1) {
                 in_gpr = false;
-              else {
-                uint64_t next_field_bit_offset = 0;
+              } else {
                 CompilerType next_field_compiler_type =
-                    return_compiler_type.GetFieldAtIndex(idx + 1, name,
-                                                         &next_field_bit_offset,
-                                                         nullptr, nullptr);
+                    aggregate_compiler_types[idx + 1];
                 if (next_field_compiler_type.IsIntegerOrEnumerationType(
-                        is_signed))
+                        is_signed)) {
                   in_gpr = true;
-                else {
+                } else {
                   copy_from_offset = 0;
                   in_gpr = false;
                 }
@@ -1715,18 +1761,15 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
               // We are inside of an eightbyte, so see if the field before us
               // is floating point: This could happen if somebody put padding
               // in the structure.
-              if (idx == 0)
+              if (idx == 0) {
                 in_gpr = false;
-              else {
-                uint64_t prev_field_bit_offset = 0;
+              } else {
                 CompilerType prev_field_compiler_type =
-                    return_compiler_type.GetFieldAtIndex(idx - 1, name,
-                                                         &prev_field_bit_offset,
-                                                         nullptr, nullptr);
+                    aggregate_compiler_types[idx - 1];
                 if (prev_field_compiler_type.IsIntegerOrEnumerationType(
-                        is_signed))
+                        is_signed)) {
                   in_gpr = true;
-                else {
+                } else {
                   copy_from_offset = 4;
                   in_gpr = false;
                 }
@@ -1759,7 +1802,6 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
             }
           }
         }
-
         // These two tests are just sanity checks.  If I somehow get the type
         // calculation wrong above it is better to just return nothing than to
         // assert or crash.
@@ -1768,13 +1810,11 @@ ValueObjectSP ABISysV_x86_64::GetReturnValueObjectImpl(
         if (copy_from_offset + field_byte_width >
             copy_from_extractor->GetByteSize())
           return return_valobj_sp;
-
         copy_from_extractor->CopyByteOrderedData(
             copy_from_offset, field_byte_width,
             data_sp->GetBytes() + field_byte_offset, field_byte_width,
-            target_byte_order);
+            byte_order);
       }
-
       if (!is_memory) {
         // The result is in our data buffer.  Let's make a variable object out
         // of it:
diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp
index 55befb4bbcf3f..c554d989b17e4 100644
--- a/lldb/source/Symbol/ClangASTContext.cpp
+++ b/lldb/source/Symbol/ClangASTContext.cpp
@@ -3911,6 +3911,14 @@ bool ClangASTContext::IsVoidType(lldb::opaque_compiler_type_t type) {
   return GetCanonicalQualType(type)->isVoidType();
 }
 
+bool ClangASTContext::CanPassInRegisters(const CompilerType &type) {
+  if (auto *record_decl =
+      ClangASTContext::GetAsRecordDecl(type)) {
+    return record_decl->canPassInRegisters();
+  }
+  return false;
+}
+
 bool ClangASTContext::SupportsLanguage(lldb::LanguageType language) {
   return ClangASTContextSupportsLanguage(language);
 }

From 29975a2a5d050d1c6a7220844efe8706d1376eb4 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Tue, 4 Jun 2019 20:14:33 +0000
Subject: [PATCH 1060/1176] [Target] Remove Process::GetCPPLanguageRuntime

Summary:
I want to remove this method because I think that Process should be
language agnostic, or at least, not have knowledge about specific language
runtimes. There is "GetLanguageRuntime()" which should be used instead. If the
caller a CPPLanguageRuntime, they should cast it as needed. Ideally, this
should only happen in plugins that need C++ specific knowledge.

The next step I would like to do is remove "GetObjCLanguageRuntime()" as well.
There are a lot more instances of that function being used, so I wanted to
upload this one first to get the general reception to this idea.

Reviewers: compnerd, davide, JDevlieghere, jingham, clayborg, labath, aprantl

Subscribers: lldb-commits

Differential Revision: https://reviews.llvm.org/D62755

llvm-svn: 362544
---
 lldb/include/lldb/Target/CPPLanguageRuntime.h          |  5 +++++
 lldb/include/lldb/Target/Process.h                     |  2 --
 lldb/include/lldb/lldb-forward.h                       |  1 -
 lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp      |  3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp         |  2 +-
 lldb/source/Target/Process.cpp                         | 10 ----------
 6 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/lldb/include/lldb/Target/CPPLanguageRuntime.h b/lldb/include/lldb/Target/CPPLanguageRuntime.h
index 8de885372862b..f035bac4b4613 100644
--- a/lldb/include/lldb/Target/CPPLanguageRuntime.h
+++ b/lldb/include/lldb/Target/CPPLanguageRuntime.h
@@ -43,6 +43,11 @@ class CPPLanguageRuntime : public LanguageRuntime {
     return lldb::eLanguageTypeC_plus_plus;
   }
 
+  static CPPLanguageRuntime *GetCPPLanguageRuntime(Process &process) {
+    return static_cast<CPPLanguageRuntime *>(
+        process.GetLanguageRuntime(lldb::eLanguageTypeC_plus_plus));
+  }
+
   virtual bool IsVTableName(const char *name) = 0;
 
   bool GetObjectDescription(Stream &str, ValueObject &object) override;
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index 2657302340eec..c3ffa99a73c6c 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -2184,8 +2184,6 @@ class Process : public std::enable_shared_from_this<Process>,
   LanguageRuntime *GetLanguageRuntime(lldb::LanguageType language,
                                       bool retry_if_null = true);
 
-  CPPLanguageRuntime *GetCPPLanguageRuntime(bool retry_if_null = true);
-
   ObjCLanguageRuntime *GetObjCLanguageRuntime(bool retry_if_null = true);
 
   bool IsPossibleDynamicValue(ValueObject &in_value);
diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
index ebebd47f0614b..fd2d272c89158 100644
--- a/lldb/include/lldb/lldb-forward.h
+++ b/lldb/include/lldb/lldb-forward.h
@@ -43,7 +43,6 @@ class BreakpointSiteList;
 class BroadcastEventSpec;
 class Broadcaster;
 class BroadcasterManager;
-class CPPLanguageRuntime;
 class ClangASTContext;
 class ClangASTImporter;
 class ClangASTMetadata;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
index 9dceca6123f31..01b3a7ecad2ae 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -67,7 +67,8 @@ bool lldb_private::formatters::LibcxxFunctionSummaryProvider(
   if (process == nullptr)
     return false;
 
-  CPPLanguageRuntime *cpp_runtime = process->GetCPPLanguageRuntime();
+  CPPLanguageRuntime *cpp_runtime =
+      CPPLanguageRuntime::GetCPPLanguageRuntime(*process);
 
   if (!cpp_runtime)
     return false;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
index 8a0b5bf392ccb..7ea3d6b32e8c1 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.cpp
@@ -462,7 +462,7 @@ lldb::SearchFilterSP AppleObjCRuntime::CreateExceptionSearchFilter() {
 
 ValueObjectSP AppleObjCRuntime::GetExceptionObjectForThread(
     ThreadSP thread_sp) {
-  auto cpp_runtime = m_process->GetCPPLanguageRuntime();
+  auto *cpp_runtime = m_process->GetLanguageRuntime(eLanguageTypeC_plus_plus);
   if (!cpp_runtime) return ValueObjectSP();
   auto cpp_exception = cpp_runtime->GetExceptionObjectForThread(thread_sp);
   if (!cpp_exception) return ValueObjectSP();
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 1d1fda18e3dde..b46ded442b4e7 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1598,16 +1598,6 @@ LanguageRuntime *Process::GetLanguageRuntime(lldb::LanguageType language,
   return runtime;
 }
 
-CPPLanguageRuntime *Process::GetCPPLanguageRuntime(bool retry_if_null) {
-  std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
-  LanguageRuntime *runtime =
-      GetLanguageRuntime(eLanguageTypeC_plus_plus, retry_if_null);
-  if (!runtime)
-    return nullptr;
-
-  return static_cast<CPPLanguageRuntime *>(runtime);
-}
-
 ObjCLanguageRuntime *Process::GetObjCLanguageRuntime(bool retry_if_null) {
   std::lock_guard<std::recursive_mutex> guard(m_language_runtimes_mutex);
   LanguageRuntime *runtime =

From 40107ce753ff172f76ceb67da2817868f952c003 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Tue, 4 Jun 2019 20:21:46 +0000
Subject: [PATCH 1061/1176] Introduce
 Value::stripPointerCastsSameRepresentation

This patch allows current users of Value::stripPointerCasts() to force
the result of the function to have the same representation as the value
it was called on. This is useful in various cases, e.g., (non-)null
checks.

In this patch only a single call site was adjusted to fix an existing
misuse that would cause nonnull where they may be wrong. Uses in
attribute deduction and other areas, e.g., D60047, are to be expected.

For a discussion on this topic, please see [0].

[0] http://lists.llvm.org/pipermail/llvm-dev/2018-December/128423.html

Reviewers: hfinkel, arsenm, reames

Subscribers: wdng, hiraditya, bollu, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61607

llvm-svn: 362545
---
 clang/test/CodeGenOpenCLCXX/addrspace-references.cl |  2 +-
 llvm/include/llvm/IR/Value.h                        | 13 ++++++++++++-
 llvm/lib/Analysis/LazyValueInfo.cpp                 |  2 +-
 llvm/lib/IR/Value.cpp                               | 13 ++++++++++++-
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-references.cl b/clang/test/CodeGenOpenCLCXX/addrspace-references.cl
index b17e701426e2d..19aeebe5df63b 100644
--- a/clang/test/CodeGenOpenCLCXX/addrspace-references.cl
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-references.cl
@@ -9,6 +9,6 @@ void foo() {
   // CHECK: [[REF:%.*]] = alloca i32
   // CHECK: store i32 1, i32* [[REF]]
   // CHECK: [[REG:%[0-9]+]] = addrspacecast i32* [[REF]] to i32 addrspace(4)*
-  // CHECK: call spir_func i32 @_Z3barRU3AS4Kj(i32 addrspace(4)* nonnull dereferenceable(4) [[REG]])
+  // CHECK: call spir_func i32 @_Z3barRU3AS4Kj(i32 addrspace(4)* dereferenceable(4) [[REG]])
   bar(1);
 }
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index d0de008db265f..3f8caa668e51b 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -493,7 +493,7 @@ class Value {
   /// swifterror attribute.
   bool isSwiftError() const;
 
-  /// Strip off pointer casts, all-zero GEPs, and aliases.
+  /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
   /// value, it returns 'this'.
@@ -503,6 +503,17 @@ class Value {
                          static_cast<const Value *>(this)->stripPointerCasts());
   }
 
+  /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases
+  /// but ensures the representation of the result stays the same.
+  ///
+  /// Returns the original uncasted value with the same representation. If this
+  /// is called on a non-pointer value, it returns 'this'.
+  const Value *stripPointerCastsSameRepresentation() const;
+  Value *stripPointerCastsSameRepresentation() {
+    return const_cast<Value *>(static_cast<const Value *>(this)
+                                   ->stripPointerCastsSameRepresentation());
+  }
+
   /// Strip off pointer casts, all-zero GEPs, aliases and invariant group
   /// info.
   ///
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 53e9f49a5711d..542ff709d4755 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1803,7 +1803,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
   // through would still be correct.
   const DataLayout &DL = CxtI->getModule()->getDataLayout();
   if (V->getType()->isPointerTy() && C->isNullValue() &&
-      isKnownNonZero(V->stripPointerCasts(), DL)) {
+      isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) {
     if (Pred == ICmpInst::ICMP_EQ)
       return LazyValueInfo::False;
     else if (Pred == ICmpInst::ICMP_NE)
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index cf32a66c90143..9e0a43ce1e30a 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -460,6 +460,7 @@ namespace {
 enum PointerStripKind {
   PSK_ZeroIndices,
   PSK_ZeroIndicesAndAliases,
+  PSK_ZeroIndicesAndAliasesSameRepresentation,
   PSK_ZeroIndicesAndAliasesAndInvariantGroups,
   PSK_InBoundsConstantIndices,
   PSK_InBounds
@@ -479,6 +480,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
       switch (StripKind) {
       case PSK_ZeroIndicesAndAliases:
+      case PSK_ZeroIndicesAndAliasesSameRepresentation:
       case PSK_ZeroIndicesAndAliasesAndInvariantGroups:
       case PSK_ZeroIndices:
         if (!GEP->hasAllZeroIndices())
@@ -494,8 +496,12 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
         break;
       }
       V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation &&
                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+      // TODO: If we know an address space cast will not change the
+      //       representation we could look through it here as well.
       V = cast<Operator>(V)->getOperand(0);
     } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
       if (StripKind == PSK_ZeroIndices || GA->isInterposable())
@@ -530,6 +536,11 @@ const Value *Value::stripPointerCasts() const {
   return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
 }
 
+const Value *Value::stripPointerCastsSameRepresentation() const {
+  return stripPointerCastsAndOffsets<
+      PSK_ZeroIndicesAndAliasesSameRepresentation>(this);
+}
+
 const Value *Value::stripPointerCastsNoFollowAliases() const {
   return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
 }

From 6b432dca5d4aa6bea8a39e7858f8cfd19f2b87ed Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Tue, 4 Jun 2019 20:34:43 +0000
Subject: [PATCH 1062/1176] [SelectionDAG][FIX] Allow "returned" arguments to
 be bit-casted

Summary:
An argument that is return by a function but bit-casted before can still
be annotated as "returned". Make sure we do not crash for this case.

Reviewers: sunfish, stephenwlin, niravd, arsenm

Subscribers: wdng, hiraditya, bollu, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59917

llvm-svn: 362546
---
 .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp |  7 +++++--
 llvm/test/CodeGen/X86/arg_returned_bitcast.ll        | 12 ++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/arg_returned_bitcast.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index da06ac7a414ff..4f7257d4a151d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9111,8 +9111,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       // for now.
       if (Args[i].IsReturned && !Op.getValueType().isVector() &&
           CanLowerReturn) {
-        assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
-               "unexpected use of 'returned'");
+        assert((CLI.RetTy == Args[i].Ty ||
+                (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+                 CLI.RetTy->getPointerAddressSpace() ==
+                     Args[i].Ty->getPointerAddressSpace())) &&
+               RetTys.size() == NumValues && "unexpected use of 'returned'");
         // Before passing 'returned' to the target lowering code, ensure that
         // either the register MVT and the actual EVT are the same size or that
         // the return value and argument are extended in the same way; in these
diff --git a/llvm/test/CodeGen/X86/arg_returned_bitcast.ll b/llvm/test/CodeGen/X86/arg_returned_bitcast.ll
new file mode 100644
index 0000000000000..2287c129b75c0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/arg_returned_bitcast.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
+
+; Test that the "returned" attribute "works" even if there is a bitcast between
+; the argument and return value.
+
+declare double* @bar(i8* returned)
+
+define double* @foo(i8*) {
+  %r = tail call double* @bar(i8* %0)
+; CHECK: jmp    bar
+  ret double* %r
+}

From 3d9ca00e74e26f616de95353dd855b65ae5cf06f Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 4 Jun 2019 21:08:20 +0000
Subject: [PATCH 1063/1176] [WebAssembly] Fix ISel crash on sext_inreg/extract
 type mismatch

Summary:
Adjusts the index and adds a bitcast around the vector operand of
EXTRACT_VECTOR_ELT so that its lane type matches the source type of
its parent sext_inreg. Without this bitcast the ISel patterns do not
match and ISel fails.

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62646

llvm-svn: 362547
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 28 ++++++++-
 .../WebAssembly/simd-extended-extract.ll      | 59 +++++++++++++++++++
 2 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/simd-extended-extract.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 65db1ebf50fca..ca942cb3ac23c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1197,6 +1197,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
 SDValue
 WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
                                                   SelectionDAG &DAG) const {
+  SDLoc DL(Op);
   // If sign extension operations are disabled, allow sext_inreg only if operand
   // is a vector extract. SIMD does not depend on sign extension operations, but
   // allowing sext_inreg in this context lets us have simple patterns to select
@@ -1204,8 +1205,31 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
   // simpler in this file, but would necessitate large and brittle patterns to
   // undo the expansion and select extract_lane_s instructions.
   assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
-  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
-    return Op;
+  if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    const SDValue &Extract = Op.getOperand(0);
+    MVT VecT = Extract.getOperand(0).getSimpleValueType();
+    MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
+                             ->getVT()
+                             .getSimpleVT();
+    MVT ExtractedVecT =
+        MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
+    if (ExtractedVecT == VecT)
+      return Op;
+    // Bitcast vector to appropriate type to ensure ISel pattern coverage
+    const SDValue &Index = Extract.getOperand(1);
+    unsigned IndexVal =
+        static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
+    unsigned Scale =
+        ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
+    assert(Scale > 1);
+    SDValue NewIndex =
+        DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
+    SDValue NewExtract = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
+        DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
+                       NewExtract, Op.getOperand(1));
+  }
   // Otherwise expand
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/WebAssembly/simd-extended-extract.ll b/llvm/test/CodeGen/WebAssembly/simd-extended-extract.ll
new file mode 100644
index 0000000000000..b6e35f5f93a7c
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-extended-extract.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s
+
+; Regression test for an issue with patterns like the following:
+;
+;     t101: v4i32 = BUILD_VECTOR t99, t99, t99, t99
+;         t92: i32 = extract_vector_elt t101, Constant:i32<0>
+;             t89: i32 = sign_extend_inreg t92, ValueType:ch:i8
+;
+; Notice that the sign_extend_inreg has source value type i8 but the
+; extracted vector has type v4i32. There are no ISel patterns that
+; handle mismatched types like this, so we insert a bitcast before the
+; extract. This was previously an ISel failure. This test case is
+; reduced from a private user bug report, and the vector extracts are
+; optimized out via subsequent DAG combines.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: foo:
+
+; CHECK: i32.load8_u
+; CHECK: i32x4.splat
+; CHECK: i32.load8_u
+; CHECK: i32x4.replace_lane   1
+; CHECK: i32.load8_u
+; CHECK: i32x4.replace_lane   2
+; CHECK: i32.load8_u
+; CHECK: i32x4.replace_lane   3
+
+; CHECK: i8x16.extract_lane_s 0
+; CHECK: f64.convert_i32_s
+; CHECK: f32.demote_f64
+; CHECK: f32x4.splat
+
+; CHECK: i8x16.extract_lane_s 4
+; CHECK: f64.convert_i32_s
+; CHECK: f32.demote_f64
+; CHECK: f32x4.replace_lane   1
+
+; CHECK: i8x16.extract_lane_s 8
+; CHECK: f64.convert_i32_s
+; CHECK: f32.demote_f64
+; CHECK: f32x4.replace_lane   2
+
+; CHECK: i8x16.extract_lane_s 12
+; CHECK: f64.convert_i32_s
+; CHECK: f32.demote_f64
+; CHECK: f32x4.replace_lane   3
+
+; CHECK: v128.store
+define void @foo(<4 x i8>* %p) {
+  %1 = load <4 x i8>, <4 x i8>* %p
+  %2 = sitofp <4 x i8> %1 to <4 x double>
+  %3 = fmul <4 x double> zeroinitializer, %2
+  %4 = fadd <4 x double> %3, zeroinitializer
+  %5 = fptrunc <4 x double> %4 to <4 x float>
+  store <4 x float> %5, <4 x float>* undef
+  ret void
+}

From b98025a2f75d50e9e7a762846216cd2a1837c8cd Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 4 Jun 2019 21:13:41 +0000
Subject: [PATCH 1064/1176] [WebAssembly] make wasm-ld --verbose show data
 section startVA and name

Summary:
Make `wasm-ld --verbose` show data section start virtual address and name
as well, instead of just showing the size. This makes it much easier to
track which global variable is in which address when used in conjunction
with `--no-merge-data-sections`.

Patch by Guanzhong Chen

Reviewers: tlively, aheejin, sbc100, ruiu

Reviewed By: sbc100, ruiu

Subscribers: ruiu, dschuff, jgravelle-google, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62834

llvm-svn: 362548
---
 lld/wasm/OutputSections.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp
index e33f85f36000d..338c88bc63478 100644
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -151,7 +151,8 @@ void DataSection::finalizeContents() {
 
     Segment->SectionOffset = BodySize;
     BodySize += Segment->Header.size() + Segment->Size;
-    log("Data segment: size=" + Twine(Segment->Size));
+    log("Data segment: size=" + Twine(Segment->Size) + ", startVA=" +
+        Twine::utohexstr(Segment->StartVA) + ", name=" + Segment->Name);
 
     for (InputSegment *InputSeg : Segment->InputSegments)
       InputSeg->OutputOffset = Segment->SectionOffset + Segment->Header.size() +

From 2fb7306f82b6cce5847f0aed7cfcbfee1ad9b597 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 4 Jun 2019 21:26:36 +0000
Subject: [PATCH 1065/1176] [X86] Add 512-bit test cases to
 machine-combiner-int-vec.ll. NFC

llvm-svn: 362549
---
 .../CodeGen/X86/machine-combiner-int-vec.ll   | 156 +++++++++++++++++-
 1 file changed, 155 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
index 5aef93a84f71f..81844f6b5918a 100644
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -1,5 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx2 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx2 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
 
 ; Verify that 128-bit vector logical ops are reassociated.
 
@@ -69,6 +71,16 @@ define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
 ; Verify that 256-bit vector logical ops are reassociated.
 
 define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_and_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm6, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm7, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_and_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
@@ -83,6 +95,16 @@ define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
 }
 
 define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_or_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm4
+; SSE-NEXT:    por %xmm4, %xmm0
+; SSE-NEXT:    por %xmm7, %xmm5
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_or_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
@@ -97,6 +119,16 @@ define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %
 }
 
 define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_xor_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm6, %xmm4
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    pxor %xmm7, %xmm5
+; SSE-NEXT:    pxor %xmm5, %xmm1
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: reassociate_xor_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
@@ -110,3 +142,125 @@ define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
   ret <8 x i32> %t2
 }
 
+
+; Verify that 512-bit vector logical ops are reassociated.
+
+define <16 x i32> @reassociate_and_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_and_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pand {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_and_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_and_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpandd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = and <16 x i32> %x2, %t0
+  %t2 = and <16 x i32> %x3, %t1
+  ret <16 x i32> %t2
+}
+
+define <16 x i32> @reassociate_or_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_or_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    por {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_or_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_or_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpord %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = or <16 x i32> %x2, %t0
+  %t2 = or <16 x i32> %x3, %t1
+  ret <16 x i32> %t2
+}
+
+define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_xor_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_xor_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_xor_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpxord %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = xor <16 x i32> %x2, %t0
+  %t2 = xor <16 x i32> %x3, %t1
+  ret <16 x i32> %t2
+}

From 8362518c6e51476a97f429477949c6d60d12d9bb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 4 Jun 2019 21:26:46 +0000
Subject: [PATCH 1066/1176] [X86] Add vector min/max reassociation tests to
 machine-combiner-int-vec.ll. NFC

llvm-svn: 362550
---
 .../CodeGen/X86/machine-combiner-int-vec.ll   | 2899 +++++++++++++++++
 1 file changed, 2899 insertions(+)

diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
index 81844f6b5918a..6a1385a6c1c3c 100644
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -264,3 +264,2902 @@ define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
   %t2 = xor <16 x i32> %x3, %t1
   ret <16 x i32> %t2
 }
+
+; Verify that 128-bit vector min/max are reassociated.
+
+define <16 x i8> @reassociate_umax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
+; SSE-LABEL: reassociate_umax_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxub %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxub %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i8> %x0, %x1
+  %t1 = icmp ugt <16 x i8> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i8> %x2, <16 x i8> %t0
+  %t3 = icmp ugt <16 x i8> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i8> %x3, <16 x i8> %t2
+  ret <16 x i8> %t4
+}
+
+define <8 x i16> @reassociate_umax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, <8 x i16> %x3) {
+; SSE-LABEL: reassociate_umax_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm3
+; SSE-NEXT:    pmaxsw %xmm3, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxuw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxuw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i16> %x0, %x1
+  %t1 = icmp ugt <8 x i16> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i16> %x2, <8 x i16> %t0
+  %t3 = icmp ugt <8 x i16> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i16> %x3, <8 x i16> %t2
+  ret <8 x i16> %t4
+}
+
+define <4 x i32> @reassociate_umax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_umax_v4i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm4
+; SSE-NEXT:    por %xmm2, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    pandn %xmm4, %xmm1
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxud %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxud %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = icmp ugt <4 x i32> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i32> %x2, <4 x i32> %t0
+  %t3 = icmp ugt <4 x i32> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i32> %x3, <4 x i32> %t2
+  ret <4 x i32> %t4
+}
+
+define <2 x i64> @reassociate_umax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; SSE-LABEL: reassociate_umax_v2i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    movdqa %xmm4, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm7, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm2, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umax_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm1, %xmm2, %xmm4
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm5
+; AVX2-NEXT:    vpcmpgtq %xmm5, %xmm4, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vxorpd %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpxor %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpmaxuq %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    vpmaxuq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <2 x i64> %x0, %x1
+  %t1 = icmp ugt <2 x i64> %x2, %t0
+  %t2 = select <2 x i1> %t1, <2 x i64> %x2, <2 x i64> %t0
+  %t3 = icmp ugt <2 x i64> %x3, %t2
+  %t4 = select <2 x i1> %t3, <2 x i64> %x3, <2 x i64> %t2
+  ret <2 x i64> %t4
+}
+
+define <16 x i8> @reassociate_smax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
+; SSE-LABEL: reassociate_smax_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm3, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxsb %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxsb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i8> %x0, %x1
+  %t1 = icmp sgt <16 x i8> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i8> %x2, <16 x i8> %t0
+  %t3 = icmp sgt <16 x i8> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i8> %x3, <16 x i8> %t2
+  ret <16 x i8> %t4
+}
+
+define <8 x i16> @reassociate_smax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, <8 x i16> %x3) {
+; SSE-LABEL: reassociate_smax_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxsw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxsw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i16> %x0, %x1
+  %t1 = icmp sgt <8 x i16> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i16> %x2, <8 x i16> %t0
+  %t3 = icmp sgt <8 x i16> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i16> %x3, <8 x i16> %t2
+  ret <8 x i16> %t4
+}
+
+define <4 x i32> @reassociate_smax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_smax_v4i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm3, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxsd %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpmaxsd %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = icmp sgt <4 x i32> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i32> %x2, <4 x i32> %t0
+  %t3 = icmp sgt <4 x i32> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i32> %x3, <4 x i32> %t2
+  ret <4 x i32> %t4
+}
+
+define <2 x i64> @reassociate_smax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; SSE-LABEL: reassociate_smax_v2i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    movdqa %xmm4, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm7, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm2, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smax_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpmaxsq %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    vpmaxsq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <2 x i64> %x0, %x1
+  %t1 = icmp sgt <2 x i64> %x2, %t0
+  %t2 = select <2 x i1> %t1, <2 x i64> %x2, <2 x i64> %t0
+  %t3 = icmp sgt <2 x i64> %x3, %t2
+  %t4 = select <2 x i1> %t3, <2 x i64> %x3, <2 x i64> %t2
+  ret <2 x i64> %t4
+}
+
+define <16 x i8> @reassociate_umin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
+; SSE-LABEL: reassociate_umin_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminub %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminub %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i8> %x0, %x1
+  %t1 = icmp ult <16 x i8> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i8> %x2, <16 x i8> %t0
+  %t3 = icmp ult <16 x i8> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i8> %x3, <16 x i8> %t2
+  ret <16 x i8> %t4
+}
+
+define <8 x i16> @reassociate_umin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, <8 x i16> %x3) {
+; SSE-LABEL: reassociate_umin_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm3
+; SSE-NEXT:    pminsw %xmm3, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminuw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminuw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i16> %x0, %x1
+  %t1 = icmp ult <8 x i16> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i16> %x2, <8 x i16> %t0
+  %t3 = icmp ult <8 x i16> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i16> %x3, <8 x i16> %t2
+  ret <8 x i16> %t4
+}
+
+define <4 x i32> @reassociate_umin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_umin_v4i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm2, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminud %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminud %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = icmp ult <4 x i32> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i32> %x2, <4 x i32> %t0
+  %t3 = icmp ult <4 x i32> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i32> %x3, <4 x i32> %t2
+  ret <4 x i32> %t4
+}
+
+define <2 x i64> @reassociate_umin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; SSE-LABEL: reassociate_umin_v2i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm7, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm2, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umin_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm1, %xmm2, %xmm4
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm5
+; AVX2-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX2-NEXT:    vblendvpd %xmm4, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vxorpd %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpxor %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpminuq %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    vpminuq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <2 x i64> %x0, %x1
+  %t1 = icmp ult <2 x i64> %x2, %t0
+  %t2 = select <2 x i1> %t1, <2 x i64> %x2, <2 x i64> %t0
+  %t3 = icmp ult <2 x i64> %x3, %t2
+  %t4 = select <2 x i1> %t3, <2 x i64> %x3, <2 x i64> %t2
+  ret <2 x i64> %t4
+}
+
+define <16 x i8> @reassociate_smin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
+; SSE-LABEL: reassociate_smin_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminsb %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminsb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i8> %x0, %x1
+  %t1 = icmp slt <16 x i8> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i8> %x2, <16 x i8> %t0
+  %t3 = icmp slt <16 x i8> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i8> %x3, <16 x i8> %t2
+  ret <16 x i8> %t4
+}
+
+define <8 x i16> @reassociate_smin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, <8 x i16> %x3) {
+; SSE-LABEL: reassociate_smin_v8i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v8i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminsw %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminsw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i16> %x0, %x1
+  %t1 = icmp slt <8 x i16> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i16> %x2, <8 x i16> %t0
+  %t3 = icmp slt <8 x i16> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i16> %x3, <8 x i16> %t2
+  ret <8 x i16> %t4
+}
+
+define <4 x i32> @reassociate_smin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_smin_v4i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v4i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpminsd %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vpminsd %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = icmp slt <4 x i32> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i32> %x2, <4 x i32> %t0
+  %t3 = icmp slt <4 x i32> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i32> %x3, <4 x i32> %t2
+  ret <4 x i32> %t4
+}
+
+define <2 x i64> @reassociate_smin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
+; SSE-LABEL: reassociate_smin_v2i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm7, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm2, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smin_v2i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm1
+; AVX2-NEXT:    vblendvpd %xmm1, %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v2i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpminsq %xmm0, %xmm2, %xmm0
+; AVX512-NEXT:    vpminsq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <2 x i64> %x0, %x1
+  %t1 = icmp slt <2 x i64> %x2, %t0
+  %t2 = select <2 x i1> %t1, <2 x i64> %x2, <2 x i64> %t0
+  %t3 = icmp slt <2 x i64> %x3, %t2
+  %t4 = select <2 x i1> %t3, <2 x i64> %x3, <2 x i64> %t2
+  ret <2 x i64> %t4
+}
+
+; Verify that 256-bit vector min/max are reassociated.
+
+define <32 x i8> @reassociate_umax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3) {
+; SSE-LABEL: reassociate_umax_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm2, %xmm0
+; SSE-NEXT:    paddb %xmm3, %xmm1
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm4, %xmm0
+; SSE-NEXT:    pmaxub %xmm6, %xmm0
+; SSE-NEXT:    pmaxub %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v32i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxub %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxub %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i8> %x0, %x1
+  %t1 = icmp ugt <32 x i8> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i8> %x2, <32 x i8> %t0
+  %t3 = icmp ugt <32 x i8> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i8> %x3, <32 x i8> %t2
+  ret <32 x i8> %t4
+}
+
+define <16 x i16> @reassociate_umax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, <16 x i16> %x3) {
+; SSE-LABEL: reassociate_umax_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm2, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm2, %xmm5
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm6
+; SSE-NEXT:    pmaxsw %xmm6, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm7
+; SSE-NEXT:    pmaxsw %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v16i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxuw %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxuw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i16> %x0, %x1
+  %t1 = icmp ugt <16 x i16> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i16> %x2, <16 x i16> %t0
+  %t3 = icmp ugt <16 x i16> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i16> %x3, <16 x i16> %t2
+  ret <16 x i16> %t4
+}
+
+define <8 x i32> @reassociate_umax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_umax_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm1, %xmm8
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm8
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm5, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm8, %xmm3
+; SSE-NEXT:    pxor %xmm1, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm8, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm1, %xmm3
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm3
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    pxor %xmm1, %xmm4
+; SSE-NEXT:    movdqa %xmm6, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm3, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    pxor %xmm1, %xmm3
+; SSE-NEXT:    pxor %xmm7, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v8i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxud %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxud %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = icmp ugt <8 x i32> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i32> %x2, <8 x i32> %t0
+  %t3 = icmp ugt <8 x i32> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i32> %x3, <8 x i32> %t2
+  ret <8 x i32> %t4
+}
+
+define <4 x i64> @reassociate_umax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, <4 x i64> %x3) {
+; SSE-LABEL: reassociate_umax_v4i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm8, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm3, %xmm9
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm9
+; SSE-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm10, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
+; SSE-NEXT:    por %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm9, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm2
+; SSE-NEXT:    por %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm8, %xmm0
+; SSE-NEXT:    movdqa %xmm6, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    pxor %xmm7, %xmm8
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm8
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm3, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umax_v4i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %ymm1, %ymm2, %ymm4
+; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm5
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpxor %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v4i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxuq %ymm0, %ymm2, %ymm0
+; AVX512-NEXT:    vpmaxuq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <4 x i64> %x0, %x1
+  %t1 = icmp ugt <4 x i64> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i64> %x2, <4 x i64> %t0
+  %t3 = icmp ugt <4 x i64> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i64> %x3, <4 x i64> %t2
+  ret <4 x i64> %t4
+}
+
+define <32 x i8> @reassociate_smax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3) {
+; SSE-LABEL: reassociate_smax_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm2, %xmm0
+; SSE-NEXT:    paddb %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm5, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm6, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v32i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxsb %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxsb %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i8> %x0, %x1
+  %t1 = icmp sgt <32 x i8> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i8> %x2, <32 x i8> %t0
+  %t3 = icmp sgt <32 x i8> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i8> %x3, <32 x i8> %t2
+  ret <32 x i8> %t4
+}
+
+define <16 x i16> @reassociate_smax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, <16 x i16> %x3) {
+; SSE-LABEL: reassociate_smax_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm2, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm1
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm6, %xmm0
+; SSE-NEXT:    pmaxsw %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v16i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxsw %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxsw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i16> %x0, %x1
+  %t1 = icmp sgt <16 x i16> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i16> %x2, <16 x i16> %t0
+  %t3 = icmp sgt <16 x i16> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i16> %x3, <16 x i16> %t2
+  ret <16 x i16> %t4
+}
+
+define <8 x i32> @reassociate_smax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_smax_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm5, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm6, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v8i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpmaxsd %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpmaxsd %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = icmp sgt <8 x i32> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i32> %x2, <8 x i32> %t0
+  %t3 = icmp sgt <8 x i32> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i32> %x3, <8 x i32> %t2
+  ret <8 x i32> %t4
+}
+
+define <4 x i64> @reassociate_smax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, <4 x i64> %x3) {
+; SSE-LABEL: reassociate_smax_v4i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm8, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm3, %xmm9
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm9
+; SSE-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm10, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
+; SSE-NEXT:    por %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm9, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm2
+; SSE-NEXT:    por %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm8, %xmm0
+; SSE-NEXT:    movdqa %xmm6, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    pxor %xmm7, %xmm8
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm8
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm3, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smax_v4i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm3, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v4i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpmaxsq %ymm0, %ymm2, %ymm0
+; AVX512-NEXT:    vpmaxsq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <4 x i64> %x0, %x1
+  %t1 = icmp sgt <4 x i64> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i64> %x2, <4 x i64> %t0
+  %t3 = icmp sgt <4 x i64> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i64> %x3, <4 x i64> %t2
+  ret <4 x i64> %t4
+}
+
+define <32 x i8> @reassociate_umin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3) {
+; SSE-LABEL: reassociate_umin_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm2, %xmm0
+; SSE-NEXT:    paddb %xmm3, %xmm1
+; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm4, %xmm0
+; SSE-NEXT:    pminub %xmm6, %xmm0
+; SSE-NEXT:    pminub %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v32i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminub %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminub %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i8> %x0, %x1
+  %t1 = icmp ult <32 x i8> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i8> %x2, <32 x i8> %t0
+  %t3 = icmp ult <32 x i8> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i8> %x3, <32 x i8> %t2
+  ret <32 x i8> %t4
+}
+
+define <16 x i16> @reassociate_umin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, <16 x i16> %x3) {
+; SSE-LABEL: reassociate_umin_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm2, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm2, %xmm5
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm6
+; SSE-NEXT:    pminsw %xmm6, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm7
+; SSE-NEXT:    pminsw %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v16i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminuw %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminuw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i16> %x0, %x1
+  %t1 = icmp ult <16 x i16> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i16> %x2, <16 x i16> %t0
+  %t3 = icmp ult <16 x i16> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i16> %x3, <16 x i16> %t2
+  ret <16 x i16> %t4
+}
+
+define <8 x i32> @reassociate_umin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_umin_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm5, %xmm8
+; SSE-NEXT:    pxor %xmm3, %xmm8
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm3, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm8, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    pxor %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm0
+; SSE-NEXT:    movdqa %xmm6, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm7, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v8i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminud %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminud %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = icmp ult <8 x i32> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i32> %x2, <8 x i32> %t0
+  %t3 = icmp ult <8 x i32> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i32> %x3, <8 x i32> %t2
+  ret <8 x i32> %t4
+}
+
+define <4 x i64> @reassociate_umin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, <4 x i64> %x3) {
+; SSE-LABEL: reassociate_umin_v4i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm8, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm9
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm9
+; SSE-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm10, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
+; SSE-NEXT:    por %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm9, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm2
+; SSE-NEXT:    por %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm8, %xmm0
+; SSE-NEXT:    movdqa %xmm6, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    pxor %xmm7, %xmm8
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm8, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm8
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm3, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umin_v4i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %ymm1, %ymm2, %ymm4
+; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm5
+; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpxor %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v4i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminuq %ymm0, %ymm2, %ymm0
+; AVX512-NEXT:    vpminuq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <4 x i64> %x0, %x1
+  %t1 = icmp ult <4 x i64> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i64> %x2, <4 x i64> %t0
+  %t3 = icmp ult <4 x i64> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i64> %x3, <4 x i64> %t2
+  ret <4 x i64> %t4
+}
+
+define <32 x i8> @reassociate_smin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3) {
+; SSE-LABEL: reassociate_smin_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm2, %xmm0
+; SSE-NEXT:    paddb %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm6, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm7, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v32i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminsb %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminsb %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i8> %x0, %x1
+  %t1 = icmp slt <32 x i8> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i8> %x2, <32 x i8> %t0
+  %t3 = icmp slt <32 x i8> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i8> %x3, <32 x i8> %t2
+  ret <32 x i8> %t4
+}
+
+define <16 x i16> @reassociate_smin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, <16 x i16> %x3) {
+; SSE-LABEL: reassociate_smin_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm2, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm1
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm6, %xmm0
+; SSE-NEXT:    pminsw %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v16i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminsw %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminsw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <16 x i16> %x0, %x1
+  %t1 = icmp slt <16 x i16> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i16> %x2, <16 x i16> %t0
+  %t3 = icmp slt <16 x i16> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i16> %x3, <16 x i16> %t2
+  ret <16 x i16> %t4
+}
+
+define <8 x i32> @reassociate_smin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; SSE-LABEL: reassociate_smin_v8i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm7, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v8i32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpminsd %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vpminsd %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = icmp slt <8 x i32> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i32> %x2, <8 x i32> %t0
+  %t3 = icmp slt <8 x i32> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i32> %x3, <8 x i32> %t2
+  ret <8 x i32> %t4
+}
+
+define <4 x i64> @reassociate_smin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, <4 x i64> %x3) {
+; SSE-LABEL: reassociate_smin_v4i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm8 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm8, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm9
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm9
+; SSE-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm10, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
+; SSE-NEXT:    por %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm1, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pxor %xmm8, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    pand %xmm9, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm4
+; SSE-NEXT:    pandn %xmm0, %xmm2
+; SSE-NEXT:    por %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm8, %xmm0
+; SSE-NEXT:    movdqa %xmm6, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm6
+; SSE-NEXT:    pandn %xmm2, %xmm0
+; SSE-NEXT:    por %xmm6, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm8, %xmm1
+; SSE-NEXT:    pxor %xmm7, %xmm8
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm8, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm8
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3]
+; SSE-NEXT:    pand %xmm4, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm3, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smin_v4i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm0, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v4i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpminsq %ymm0, %ymm2, %ymm0
+; AVX512-NEXT:    vpminsq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <4 x i64> %x0, %x1
+  %t1 = icmp slt <4 x i64> %x2, %t0
+  %t2 = select <4 x i1> %t1, <4 x i64> %x2, <4 x i64> %t0
+  %t3 = icmp slt <4 x i64> %x3, %t2
+  %t4 = select <4 x i1> %t3, <4 x i64> %x3, <4 x i64> %t2
+  ret <4 x i64> %t4
+}
+
+; Verify that 512-bit vector min/max are reassociated.
+
+define <64 x i8> @reassociate_umax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, <64 x i8> %x3) {
+; SSE-LABEL: reassociate_umax_v64i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm4, %xmm0
+; SSE-NEXT:    paddb %xmm5, %xmm1
+; SSE-NEXT:    paddb %xmm6, %xmm2
+; SSE-NEXT:    paddb %xmm7, %xmm3
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v64i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpmaxub %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpmaxub %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpmaxub %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpmaxub %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <64 x i8> %x0, %x1
+  %t1 = icmp ugt <64 x i8> %x2, %t0
+  %t2 = select <64 x i1> %t1, <64 x i8> %x2, <64 x i8> %t0
+  %t3 = icmp ugt <64 x i8> %x3, %t2
+  %t4 = select <64 x i1> %t3, <64 x i8> %x3, <64 x i8> %t2
+  ret <64 x i8> %t4
+}
+
+define <32 x i16> @reassociate_umax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, <32 x i16> %x3) {
+; SSE-LABEL: reassociate_umax_v32i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm4, %xmm0
+; SSE-NEXT:    paddw %xmm5, %xmm1
+; SSE-NEXT:    paddw %xmm6, %xmm2
+; SSE-NEXT:    paddw %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    pmaxsw %xmm3, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pmaxsw %xmm2, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    pmaxsw %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    pmaxsw %xmm0, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    pmaxsw %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    pmaxsw %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pmaxsw %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umax_v32i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpmaxuw %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpmaxuw %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpmaxuw %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpmaxuw %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i16> %x0, %x1
+  %t1 = icmp ugt <32 x i16> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i16> %x2, <32 x i16> %t0
+  %t3 = icmp ugt <32 x i16> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i16> %x3, <32 x i16> %t2
+  ret <32 x i16> %t4
+}
+
+define <16 x i32> @reassociate_umax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_umax_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm7, %xmm9
+; SSE-NEXT:    movdqa %xmm4, %xmm7
+; SSE-NEXT:    movdqa %xmm3, %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm4
+; SSE-NEXT:    paddd %xmm7, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm9, %xmm8
+; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm4, %xmm6
+; SSE-NEXT:    pxor %xmm3, %xmm6
+; SSE-NEXT:    movdqa %xmm8, %xmm5
+; SSE-NEXT:    pxor %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE-NEXT:    pand %xmm6, %xmm4
+; SSE-NEXT:    pandn %xmm8, %xmm6
+; SSE-NEXT:    por %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm15, %xmm5
+; SSE-NEXT:    pxor %xmm3, %xmm5
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm15
+; SSE-NEXT:    pandn %xmm2, %xmm5
+; SSE-NEXT:    por %xmm15, %xmm5
+; SSE-NEXT:    movdqa %xmm14, %xmm2
+; SSE-NEXT:    pxor %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm14
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm14, %xmm2
+; SSE-NEXT:    movdqa %xmm13, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm13
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm13, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    movdqa %xmm12, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm12
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm12, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    movdqa %xmm11, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm11
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm11, %xmm1
+; SSE-NEXT:    movdqa %xmm5, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    movdqa %xmm10, %xmm2
+; SSE-NEXT:    pxor %xmm3, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm10
+; SSE-NEXT:    pandn %xmm5, %xmm2
+; SSE-NEXT:    por %xmm10, %xmm2
+; SSE-NEXT:    movdqa %xmm6, %xmm4
+; SSE-NEXT:    pxor %xmm3, %xmm4
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pxor %xmm5, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm6, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umax_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxud %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxud %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxud %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = icmp ugt <16 x i32> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i32> %x2, <16 x i32> %t0
+  %t3 = icmp ugt <16 x i32> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i32> %x3, <16 x i32> %t2
+  ret <16 x i32> %t4
+}
+
+define <8 x i64> @reassociate_umax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, <8 x i64> %x3) {
+; SSE-LABEL: reassociate_umax_v8i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    paddq %xmm4, %xmm0
+; SSE-NEXT:    paddq %xmm5, %xmm1
+; SSE-NEXT:    paddq %xmm6, %xmm2
+; SSE-NEXT:    paddq %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm8, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm5, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm9
+; SSE-NEXT:    pand %xmm9, %xmm8
+; SSE-NEXT:    pandn %xmm3, %xmm9
+; SSE-NEXT:    por %xmm8, %xmm9
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm3, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm8
+; SSE-NEXT:    pand %xmm8, %xmm5
+; SSE-NEXT:    pandn %xmm2, %xmm8
+; SSE-NEXT:    por %xmm5, %xmm8
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm7
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm7, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm7, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm6, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    pandn %xmm8, %xmm2
+; SSE-NEXT:    por %xmm6, %xmm2
+; SSE-NEXT:    movdqa %xmm9, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm9, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umax_v8i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %ymm3, %ymm5, %ymm8
+; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm9
+; AVX2-NEXT:    vpcmpgtq %ymm9, %ymm8, %ymm8
+; AVX2-NEXT:    vblendvpd %ymm8, %ymm5, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm3, %ymm4, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm0, %ymm5
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm3, %ymm0, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm6, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm3, %ymm1, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm7, %ymm3
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm7, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v8i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxuq %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxuq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <8 x i64> %x0, %x1
+  %t1 = icmp ugt <8 x i64> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i64> %x2, <8 x i64> %t0
+  %t3 = icmp ugt <8 x i64> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i64> %x3, <8 x i64> %t2
+  ret <8 x i64> %t4
+}
+
+define <64 x i8> @reassociate_smax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, <64 x i8> %x3) {
+; SSE-LABEL: reassociate_smax_v64i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    paddb %xmm4, %xmm0
+; SSE-NEXT:    paddb %xmm5, %xmm1
+; SSE-NEXT:    paddb %xmm6, %xmm2
+; SSE-NEXT:    paddb %xmm7, %xmm3
+; SSE-NEXT:    movdqa %xmm15, %xmm4
+; SSE-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm15
+; SSE-NEXT:    pandn %xmm3, %xmm4
+; SSE-NEXT:    por %xmm15, %xmm4
+; SSE-NEXT:    movdqa %xmm14, %xmm3
+; SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm14
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm14, %xmm3
+; SSE-NEXT:    movdqa %xmm13, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm13
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm13, %xmm2
+; SSE-NEXT:    movdqa %xmm12, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm12
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm12, %xmm1
+; SSE-NEXT:    movdqa %xmm11, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm11
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm11, %xmm0
+; SSE-NEXT:    movdqa %xmm10, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm10
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm10, %xmm1
+; SSE-NEXT:    movdqa %xmm9, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm9
+; SSE-NEXT:    pandn %xmm3, %xmm2
+; SSE-NEXT:    por %xmm9, %xmm2
+; SSE-NEXT:    movdqa %xmm8, %xmm3
+; SSE-NEXT:    pcmpgtb %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm8
+; SSE-NEXT:    pandn %xmm4, %xmm3
+; SSE-NEXT:    por %xmm8, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v64i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpmaxsb %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpmaxsb %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpmaxsb %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpmaxsb %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <64 x i8> %x0, %x1
+  %t1 = icmp sgt <64 x i8> %x2, %t0
+  %t2 = select <64 x i1> %t1, <64 x i8> %x2, <64 x i8> %t0
+  %t3 = icmp sgt <64 x i8> %x3, %t2
+  %t4 = select <64 x i1> %t3, <64 x i8> %x3, <64 x i8> %t2
+  ret <64 x i8> %t4
+}
+
+define <32 x i16> @reassociate_smax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, <32 x i16> %x3) {
+; SSE-LABEL: reassociate_smax_v32i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm4, %xmm0
+; SSE-NEXT:    paddw %xmm5, %xmm1
+; SSE-NEXT:    paddw %xmm6, %xmm2
+; SSE-NEXT:    paddw %xmm7, %xmm3
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smax_v32i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpmaxsw %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpmaxsw %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpmaxsw %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpmaxsw %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i16> %x0, %x1
+  %t1 = icmp sgt <32 x i16> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i16> %x2, <32 x i16> %t0
+  %t3 = icmp sgt <32 x i16> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i16> %x3, <32 x i16> %t2
+  ret <32 x i16> %t4
+}
+
+define <16 x i32> @reassociate_smax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_smax_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    movdqa %xmm15, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm15
+; SSE-NEXT:    pandn %xmm3, %xmm4
+; SSE-NEXT:    por %xmm15, %xmm4
+; SSE-NEXT:    movdqa %xmm14, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm14
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm14, %xmm3
+; SSE-NEXT:    movdqa %xmm13, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm13
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm13, %xmm2
+; SSE-NEXT:    movdqa %xmm12, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm12
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm12, %xmm1
+; SSE-NEXT:    movdqa %xmm11, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm11
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm11, %xmm0
+; SSE-NEXT:    movdqa %xmm10, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm10
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm10, %xmm1
+; SSE-NEXT:    movdqa %xmm9, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm9
+; SSE-NEXT:    pandn %xmm3, %xmm2
+; SSE-NEXT:    por %xmm9, %xmm2
+; SSE-NEXT:    movdqa %xmm8, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm8
+; SSE-NEXT:    pandn %xmm4, %xmm3
+; SSE-NEXT:    por %xmm8, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smax_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxsd %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsd %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxsd %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = icmp sgt <16 x i32> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i32> %x2, <16 x i32> %t0
+  %t3 = icmp sgt <16 x i32> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i32> %x3, <16 x i32> %t2
+  ret <16 x i32> %t4
+}
+
+define <8 x i64> @reassociate_smax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, <8 x i64> %x3) {
+; SSE-LABEL: reassociate_smax_v8i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    paddq %xmm4, %xmm0
+; SSE-NEXT:    paddq %xmm5, %xmm1
+; SSE-NEXT:    paddq %xmm6, %xmm2
+; SSE-NEXT:    paddq %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm8, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm5, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm9
+; SSE-NEXT:    pand %xmm9, %xmm8
+; SSE-NEXT:    pandn %xmm3, %xmm9
+; SSE-NEXT:    por %xmm8, %xmm9
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm3, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm8
+; SSE-NEXT:    pand %xmm8, %xmm5
+; SSE-NEXT:    pandn %xmm2, %xmm8
+; SSE-NEXT:    por %xmm5, %xmm8
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm7
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm7, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm7, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm6, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    pandn %xmm8, %xmm2
+; SSE-NEXT:    por %xmm6, %xmm2
+; SSE-NEXT:    movdqa %xmm9, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm9, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smax_v8i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm5, %ymm3
+; AVX2-NEXT:    vblendvpd %ymm3, %ymm5, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm4, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm6, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm7, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm7, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v8i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsq %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxsq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <8 x i64> %x0, %x1
+  %t1 = icmp sgt <8 x i64> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i64> %x2, <8 x i64> %t0
+  %t3 = icmp sgt <8 x i64> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i64> %x3, <8 x i64> %t2
+  ret <8 x i64> %t4
+}
+
+define <64 x i8> @reassociate_umin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, <64 x i8> %x3) {
+; SSE-LABEL: reassociate_umin_v64i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddb %xmm4, %xmm0
+; SSE-NEXT:    paddb %xmm5, %xmm1
+; SSE-NEXT:    paddb %xmm6, %xmm2
+; SSE-NEXT:    paddb %xmm7, %xmm3
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v64i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpminub %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpminub %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpminub %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpminub %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <64 x i8> %x0, %x1
+  %t1 = icmp ult <64 x i8> %x2, %t0
+  %t2 = select <64 x i1> %t1, <64 x i8> %x2, <64 x i8> %t0
+  %t3 = icmp ult <64 x i8> %x3, %t2
+  %t4 = select <64 x i1> %t3, <64 x i8> %x3, <64 x i8> %t2
+  ret <64 x i8> %t4
+}
+
+define <32 x i16> @reassociate_umin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, <32 x i16> %x3) {
+; SSE-LABEL: reassociate_umin_v32i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm4, %xmm0
+; SSE-NEXT:    paddw %xmm5, %xmm1
+; SSE-NEXT:    paddw %xmm6, %xmm2
+; SSE-NEXT:    paddw %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    pminsw %xmm3, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pminsw %xmm2, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    pminsw %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    pminsw %xmm0, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    pminsw %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    pminsw %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pminsw %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_umin_v32i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpminuw %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpminuw %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpminuw %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpminuw %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i16> %x0, %x1
+  %t1 = icmp ult <32 x i16> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i16> %x2, <32 x i16> %t0
+  %t3 = icmp ult <32 x i16> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i16> %x3, <32 x i16> %t2
+  ret <32 x i16> %t4
+}
+
+define <16 x i32> @reassociate_umin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_umin_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm15, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    pxor %xmm5, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm15
+; SSE-NEXT:    pandn %xmm3, %xmm4
+; SSE-NEXT:    por %xmm15, %xmm4
+; SSE-NEXT:    movdqa %xmm14, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm14
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm14, %xmm3
+; SSE-NEXT:    movdqa %xmm13, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pxor %xmm5, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm13
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm13, %xmm2
+; SSE-NEXT:    movdqa %xmm12, %xmm1
+; SSE-NEXT:    pxor %xmm5, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm6
+; SSE-NEXT:    pand %xmm6, %xmm12
+; SSE-NEXT:    pandn %xmm0, %xmm6
+; SSE-NEXT:    por %xmm12, %xmm6
+; SSE-NEXT:    movdqa %xmm6, %xmm0
+; SSE-NEXT:    pxor %xmm5, %xmm0
+; SSE-NEXT:    movdqa %xmm11, %xmm1
+; SSE-NEXT:    pxor %xmm5, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm11
+; SSE-NEXT:    pandn %xmm6, %xmm0
+; SSE-NEXT:    por %xmm11, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm5, %xmm1
+; SSE-NEXT:    movdqa %xmm10, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm10
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm10, %xmm1
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm5, %xmm2
+; SSE-NEXT:    movdqa %xmm9, %xmm6
+; SSE-NEXT:    pxor %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm6, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm9
+; SSE-NEXT:    pandn %xmm3, %xmm2
+; SSE-NEXT:    por %xmm9, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm8, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm8
+; SSE-NEXT:    pandn %xmm4, %xmm3
+; SSE-NEXT:    por %xmm8, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umin_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminud %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminud %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminud %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminud %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminud %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminud %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = icmp ult <16 x i32> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i32> %x2, <16 x i32> %t0
+  %t3 = icmp ult <16 x i32> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i32> %x3, <16 x i32> %t2
+  ret <16 x i32> %t4
+}
+
+define <8 x i64> @reassociate_umin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, <8 x i64> %x3) {
+; SSE-LABEL: reassociate_umin_v8i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    paddq %xmm4, %xmm0
+; SSE-NEXT:    paddq %xmm5, %xmm1
+; SSE-NEXT:    paddq %xmm6, %xmm2
+; SSE-NEXT:    paddq %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
+; SSE-NEXT:    movdqa %xmm8, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm6, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm9
+; SSE-NEXT:    pand %xmm9, %xmm8
+; SSE-NEXT:    pandn %xmm3, %xmm9
+; SSE-NEXT:    por %xmm8, %xmm9
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm6, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm8
+; SSE-NEXT:    pand %xmm8, %xmm5
+; SSE-NEXT:    pandn %xmm2, %xmm8
+; SSE-NEXT:    por %xmm5, %xmm8
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm7
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm7, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm0, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm7, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm6, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    pandn %xmm8, %xmm2
+; SSE-NEXT:    por %xmm6, %xmm2
+; SSE-NEXT:    movdqa %xmm9, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm4
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm9, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_umin_v8i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %ymm3, %ymm5, %ymm8
+; AVX2-NEXT:    vpxor %ymm3, %ymm1, %ymm9
+; AVX2-NEXT:    vpcmpgtq %ymm8, %ymm9, %ymm8
+; AVX2-NEXT:    vblendvpd %ymm8, %ymm5, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm3, %ymm4, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm0, %ymm5
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm5, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm3, %ymm0, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm6, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm0, %ymm0
+; AVX2-NEXT:    vxorpd %ymm3, %ymm1, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm7, %ymm3
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm7, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v8i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminuq %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminuq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <8 x i64> %x0, %x1
+  %t1 = icmp ult <8 x i64> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i64> %x2, <8 x i64> %t0
+  %t3 = icmp ult <8 x i64> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i64> %x3, <8 x i64> %t2
+  ret <8 x i64> %t4
+}
+
+define <64 x i8> @reassociate_smin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, <64 x i8> %x3) {
+; SSE-LABEL: reassociate_smin_v64i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    paddb %xmm4, %xmm0
+; SSE-NEXT:    paddb %xmm5, %xmm1
+; SSE-NEXT:    paddb %xmm6, %xmm2
+; SSE-NEXT:    paddb %xmm7, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    pcmpgtb %xmm15, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm15
+; SSE-NEXT:    pandn %xmm3, %xmm4
+; SSE-NEXT:    por %xmm15, %xmm4
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    pcmpgtb %xmm14, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm14
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm14, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm13, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm13
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm13, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm12, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm12
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm12, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm11, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm11
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm11, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtb %xmm10, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm10
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm10, %xmm1
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm9, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm9
+; SSE-NEXT:    pandn %xmm3, %xmm2
+; SSE-NEXT:    por %xmm9, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm3
+; SSE-NEXT:    pcmpgtb %xmm8, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm8
+; SSE-NEXT:    pandn %xmm4, %xmm3
+; SSE-NEXT:    por %xmm8, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v64i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpminsb %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpminsb %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpminsb %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpminsb %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <64 x i8> %x0, %x1
+  %t1 = icmp slt <64 x i8> %x2, %t0
+  %t2 = select <64 x i1> %t1, <64 x i8> %x2, <64 x i8> %t0
+  %t3 = icmp slt <64 x i8> %x3, %t2
+  %t4 = select <64 x i1> %t3, <64 x i8> %x3, <64 x i8> %t2
+  ret <64 x i8> %t4
+}
+
+define <32 x i16> @reassociate_smin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, <32 x i16> %x3) {
+; SSE-LABEL: reassociate_smin_v32i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    paddw %xmm4, %xmm0
+; SSE-NEXT:    paddw %xmm5, %xmm1
+; SSE-NEXT:    paddw %xmm6, %xmm2
+; SSE-NEXT:    paddw %xmm7, %xmm3
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm0
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm2
+; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_smin_v32i16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpminsw %ymm1, %ymm5, %ymm1
+; AVX-NEXT:    vpminsw %ymm0, %ymm4, %ymm0
+; AVX-NEXT:    vpminsw %ymm0, %ymm6, %ymm0
+; AVX-NEXT:    vpminsw %ymm1, %ymm7, %ymm1
+; AVX-NEXT:    retq
+
+  %t0 = add <32 x i16> %x0, %x1
+  %t1 = icmp slt <32 x i16> %x2, %t0
+  %t2 = select <32 x i1> %t1, <32 x i16> %x2, <32 x i16> %t0
+  %t3 = icmp slt <32 x i16> %x3, %t2
+  %t4 = select <32 x i1> %t3, <32 x i16> %x3, <32 x i16> %t2
+  ret <32 x i16> %t4
+}
+
+define <16 x i32> @reassociate_smin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, <16 x i32> %x3) {
+; SSE-LABEL: reassociate_smin_v16i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm10
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm11
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm12
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm13
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm14
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm15
+; SSE-NEXT:    paddd %xmm4, %xmm0
+; SSE-NEXT:    paddd %xmm5, %xmm1
+; SSE-NEXT:    paddd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm7, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    pcmpgtd %xmm15, %xmm4
+; SSE-NEXT:    pand %xmm4, %xmm15
+; SSE-NEXT:    pandn %xmm3, %xmm4
+; SSE-NEXT:    por %xmm15, %xmm4
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm14, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm14
+; SSE-NEXT:    pandn %xmm2, %xmm3
+; SSE-NEXT:    por %xmm14, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm13, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm13
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm13, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm12, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm12
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    por %xmm12, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm11, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm11
+; SSE-NEXT:    pandn %xmm1, %xmm0
+; SSE-NEXT:    por %xmm11, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pcmpgtd %xmm10, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm10
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm10, %xmm1
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm9, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm9
+; SSE-NEXT:    pandn %xmm3, %xmm2
+; SSE-NEXT:    por %xmm9, %xmm2
+; SSE-NEXT:    movdqa %xmm4, %xmm3
+; SSE-NEXT:    pcmpgtd %xmm8, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm8
+; SSE-NEXT:    pandn %xmm4, %xmm3
+; SSE-NEXT:    por %xmm8, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smin_v16i32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminsd %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminsd %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminsd %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminsd %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v16i32:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsd %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminsd %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <16 x i32> %x0, %x1
+  %t1 = icmp slt <16 x i32> %x2, %t0
+  %t2 = select <16 x i1> %t1, <16 x i32> %x2, <16 x i32> %t0
+  %t3 = icmp slt <16 x i32> %x3, %t2
+  %t4 = select <16 x i1> %t3, <16 x i32> %x3, <16 x i32> %t2
+  ret <16 x i32> %t4
+}
+
+define <8 x i64> @reassociate_smin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, <8 x i64> %x3) {
+; SSE-LABEL: reassociate_smin_v8i64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm8
+; SSE-NEXT:    paddq %xmm4, %xmm0
+; SSE-NEXT:    paddq %xmm5, %xmm1
+; SSE-NEXT:    paddq %xmm6, %xmm2
+; SSE-NEXT:    paddq %xmm7, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
+; SSE-NEXT:    movdqa %xmm8, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm6, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm5, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm9
+; SSE-NEXT:    pand %xmm9, %xmm8
+; SSE-NEXT:    pandn %xmm3, %xmm9
+; SSE-NEXT:    por %xmm8, %xmm9
+; SSE-NEXT:    movdqa %xmm5, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm6
+; SSE-NEXT:    pxor %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm6, %xmm7
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm7
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm7[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm6
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3]
+; SSE-NEXT:    por %xmm6, %xmm8
+; SSE-NEXT:    pand %xmm8, %xmm5
+; SSE-NEXT:    pandn %xmm2, %xmm8
+; SSE-NEXT:    por %xmm5, %xmm8
+; SSE-NEXT:    movdqa %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pxor %xmm4, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm2, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm5
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm5
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm5, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pandn %xmm1, %xmm2
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm7, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm5
+; SSE-NEXT:    pand %xmm5, %xmm7
+; SSE-NEXT:    pandn %xmm0, %xmm5
+; SSE-NEXT:    por %xmm7, %xmm5
+; SSE-NEXT:    movdqa %xmm5, %xmm0
+; SSE-NEXT:    pxor %xmm4, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm0, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm0, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pandn %xmm5, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm4, %xmm1
+; SSE-NEXT:    movdqa %xmm7, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm6
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm1, %xmm7
+; SSE-NEXT:    pandn %xmm2, %xmm1
+; SSE-NEXT:    por %xmm7, %xmm1
+; SSE-NEXT:    movdqa %xmm8, %xmm2
+; SSE-NEXT:    pxor %xmm4, %xmm2
+; SSE-NEXT:    movdqa %xmm6, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    movdqa %xmm2, %xmm5
+; SSE-NEXT:    pcmpgtd %xmm3, %xmm5
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3]
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    pandn %xmm8, %xmm2
+; SSE-NEXT:    por %xmm6, %xmm2
+; SSE-NEXT:    movdqa %xmm9, %xmm3
+; SSE-NEXT:    pxor %xmm4, %xmm3
+; SSE-NEXT:    pxor %xmm5, %xmm4
+; SSE-NEXT:    movdqa %xmm3, %xmm6
+; SSE-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE-NEXT:    pand %xmm3, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm3, %xmm5
+; SSE-NEXT:    pandn %xmm9, %xmm3
+; SSE-NEXT:    por %xmm5, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: reassociate_smin_v8i64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm1, %ymm3
+; AVX2-NEXT:    vblendvpd %ymm3, %ymm5, %ymm1, %ymm1
+; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm4, %ymm0, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm0, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm0, %ymm0
+; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm1, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm7, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v8i64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsq %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminsq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
+
+  %t0 = add <8 x i64> %x0, %x1
+  %t1 = icmp slt <8 x i64> %x2, %t0
+  %t2 = select <8 x i1> %t1, <8 x i64> %x2, <8 x i64> %t0
+  %t3 = icmp slt <8 x i64> %x3, %t2
+  %t4 = select <8 x i1> %t3, <8 x i64> %x3, <8 x i64> %t2
+  ret <8 x i64> %t4
+}

From c32ef4bc0b17f79d96b672143d5e5fba9e39ea8d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Tue, 4 Jun 2019 21:29:28 +0000
Subject: [PATCH 1067/1176] Convert MemberExpr creation and serialization to
 work the same way as most / all other Expr subclasses.

llvm-svn: 362551
---
 clang/include/clang/AST/Expr.h                | 68 ++++++--------
 clang/include/clang/AST/Stmt.h                |  1 +
 clang/lib/AST/DeclBase.cpp                    |  1 +
 clang/lib/AST/Expr.cpp                        | 72 +++++++++++----
 .../Frontend/Rewrite/RewriteModernObjC.cpp    | 54 +++++------
 clang/lib/Frontend/Rewrite/RewriteObjC.cpp    | 24 +++--
 clang/lib/Sema/SemaExprCXX.cpp                |  9 +-
 clang/lib/Serialization/ASTReaderStmt.cpp     | 92 +++++++++----------
 clang/lib/Serialization/ASTWriterStmt.cpp     | 56 ++++++-----
 9 files changed, 196 insertions(+), 181 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 96cb8e8f1a81a..438b10cc964f6 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -2735,6 +2735,7 @@ class MemberExpr final
                                     ASTTemplateKWAndArgsInfo,
                                     TemplateArgumentLoc> {
   friend class ASTReader;
+  friend class ASTStmtReader;
   friend class ASTStmtWriter;
   friend TrailingObjects;
 
@@ -2769,49 +2770,38 @@ class MemberExpr final
     return MemberExprBits.HasTemplateKWAndArgsInfo;
   }
 
+  MemberExpr(Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
+             ValueDecl *MemberDecl, const DeclarationNameInfo &NameInfo,
+             QualType T, ExprValueKind VK, ExprObjectKind OK);
+  MemberExpr(EmptyShell Empty)
+      : Expr(MemberExprClass, Empty), Base(), MemberDecl() {}
+
 public:
-  MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc,
-             ValueDecl *memberdecl, const DeclarationNameInfo &NameInfo,
-             QualType ty, ExprValueKind VK, ExprObjectKind OK)
-      : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(),
-             base->isValueDependent(), base->isInstantiationDependent(),
-             base->containsUnexpandedParameterPack()),
-        Base(base), MemberDecl(memberdecl), MemberDNLoc(NameInfo.getInfo()),
-        MemberLoc(NameInfo.getLoc()) {
-    assert(memberdecl->getDeclName() == NameInfo.getName());
-    MemberExprBits.IsArrow = isarrow;
-    MemberExprBits.HasQualifierOrFoundDecl = false;
-    MemberExprBits.HasTemplateKWAndArgsInfo = false;
-    MemberExprBits.HadMultipleCandidates = false;
-    MemberExprBits.OperatorLoc = operatorloc;
-  }
-
-  // NOTE: this constructor should be used only when it is known that
-  // the member name can not provide additional syntactic info
-  // (i.e., source locations for C++ operator names or type source info
-  // for constructors, destructors and conversion operators).
-  MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc,
-             ValueDecl *memberdecl, SourceLocation l, QualType ty,
-             ExprValueKind VK, ExprObjectKind OK)
-      : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(),
-             base->isValueDependent(), base->isInstantiationDependent(),
-             base->containsUnexpandedParameterPack()),
-        Base(base), MemberDecl(memberdecl), MemberDNLoc(), MemberLoc(l) {
-    MemberExprBits.IsArrow = isarrow;
-    MemberExprBits.HasQualifierOrFoundDecl = false;
-    MemberExprBits.HasTemplateKWAndArgsInfo = false;
-    MemberExprBits.HadMultipleCandidates = false;
-    MemberExprBits.OperatorLoc = operatorloc;
-  }
-
-  static MemberExpr *Create(const ASTContext &C, Expr *base, bool isarrow,
+  static MemberExpr *Create(const ASTContext &C, Expr *Base, bool IsArrow,
                             SourceLocation OperatorLoc,
                             NestedNameSpecifierLoc QualifierLoc,
-                            SourceLocation TemplateKWLoc, ValueDecl *memberdecl,
-                            DeclAccessPair founddecl,
+                            SourceLocation TemplateKWLoc, ValueDecl *MemberDecl,
+                            DeclAccessPair FoundDecl,
                             DeclarationNameInfo MemberNameInfo,
-                            const TemplateArgumentListInfo *targs, QualType ty,
-                            ExprValueKind VK, ExprObjectKind OK);
+                            const TemplateArgumentListInfo *TemplateArgs,
+                            QualType T, ExprValueKind VK, ExprObjectKind OK);
+
+  /// Create an implicit MemberExpr, with no location, qualifier, template
+  /// arguments, and so on.
+  static MemberExpr *CreateImplicit(const ASTContext &C, Expr *Base,
+                                    bool IsArrow, ValueDecl *MemberDecl,
+                                    QualType T, ExprValueKind VK,
+                                    ExprObjectKind OK) {
+    return Create(C, Base, IsArrow, SourceLocation(), NestedNameSpecifierLoc(),
+                  SourceLocation(), MemberDecl,
+                  DeclAccessPair::make(MemberDecl, MemberDecl->getAccess()),
+                  DeclarationNameInfo(), nullptr, T, VK, OK);
+  }
+
+  static MemberExpr *CreateEmpty(const ASTContext &Context, bool HasQualifier,
+                                 bool HasFoundDecl,
+                                 bool HasTemplateKWAndArgsInfo,
+                                 unsigned NumTemplateArgs);
 
   void setBase(Expr *E) { Base = E; }
   Expr *getBase() const { return cast<Expr>(Base); }
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index fe5d802688466..d3b3bc27643f8 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -453,6 +453,7 @@ class alignas(void *) Stmt {
   enum { NumCallExprBits = 32 };
 
   class MemberExprBitfields {
+    friend class ASTStmtReader;
     friend class MemberExpr;
 
     unsigned : NumExprBits;
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 31985486d1d93..f5853b498043b 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -920,6 +920,7 @@ bool Decl::AccessDeclContextSanity() const {
   if (isa<TranslationUnitDecl>(this) ||
       isa<TemplateTypeParmDecl>(this) ||
       isa<NonTypeTemplateParmDecl>(this) ||
+      !getDeclContext() ||
       !isa<CXXRecordDecl>(getDeclContext()) ||
       isInvalidDecl() ||
       isa<StaticAssertDecl>(this) ||
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 10829c7007471..ee9d853f9f871 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1538,29 +1538,44 @@ UnaryExprOrTypeTraitExpr::UnaryExprOrTypeTraitExpr(
   }
 }
 
+MemberExpr::MemberExpr(Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
+                       ValueDecl *MemberDecl,
+                       const DeclarationNameInfo &NameInfo, QualType T,
+                       ExprValueKind VK, ExprObjectKind OK)
+    : Expr(MemberExprClass, T, VK, OK, Base->isTypeDependent(),
+           Base->isValueDependent(), Base->isInstantiationDependent(),
+           Base->containsUnexpandedParameterPack()),
+      Base(Base), MemberDecl(MemberDecl), MemberDNLoc(NameInfo.getInfo()),
+      MemberLoc(NameInfo.getLoc()) {
+  assert(!NameInfo.getName() ||
+         MemberDecl->getDeclName() == NameInfo.getName());
+  MemberExprBits.IsArrow = IsArrow;
+  MemberExprBits.HasQualifierOrFoundDecl = false;
+  MemberExprBits.HasTemplateKWAndArgsInfo = false;
+  MemberExprBits.HadMultipleCandidates = false;
+  MemberExprBits.OperatorLoc = OperatorLoc;
+}
+
 MemberExpr *MemberExpr::Create(
-    const ASTContext &C, Expr *base, bool isarrow, SourceLocation OperatorLoc,
+    const ASTContext &C, Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
     NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
-    ValueDecl *memberdecl, DeclAccessPair founddecl,
-    DeclarationNameInfo nameinfo, const TemplateArgumentListInfo *targs,
-    QualType ty, ExprValueKind vk, ExprObjectKind ok) {
-
-  bool hasQualOrFound = (QualifierLoc ||
-                         founddecl.getDecl() != memberdecl ||
-                         founddecl.getAccess() != memberdecl->getAccess());
-
-  bool HasTemplateKWAndArgsInfo = targs || TemplateKWLoc.isValid();
+    ValueDecl *MemberDecl, DeclAccessPair FoundDecl,
+    DeclarationNameInfo NameInfo, const TemplateArgumentListInfo *TemplateArgs,
+    QualType T, ExprValueKind VK, ExprObjectKind OK) {
+  bool HasQualOrFound = QualifierLoc || FoundDecl.getDecl() != MemberDecl ||
+                        FoundDecl.getAccess() != MemberDecl->getAccess();
+  bool HasTemplateKWAndArgsInfo = TemplateArgs || TemplateKWLoc.isValid();
   std::size_t Size =
       totalSizeToAlloc<MemberExprNameQualifier, ASTTemplateKWAndArgsInfo,
-                       TemplateArgumentLoc>(hasQualOrFound ? 1 : 0,
-                                            HasTemplateKWAndArgsInfo ? 1 : 0,
-                                            targs ? targs->size() : 0);
+                       TemplateArgumentLoc>(
+          HasQualOrFound ? 1 : 0, HasTemplateKWAndArgsInfo ? 1 : 0,
+          TemplateArgs ? TemplateArgs->size() : 0);
 
   void *Mem = C.Allocate(Size, alignof(MemberExpr));
   MemberExpr *E = new (Mem)
-      MemberExpr(base, isarrow, OperatorLoc, memberdecl, nameinfo, ty, vk, ok);
+      MemberExpr(Base, IsArrow, OperatorLoc, MemberDecl, NameInfo, T, VK, OK);
 
-  if (hasQualOrFound) {
+  if (HasQualOrFound) {
     // FIXME: Wrong. We should be looking at the member declaration we found.
     if (QualifierLoc && QualifierLoc.getNestedNameSpecifier()->isDependent()) {
       E->setValueDependent(true);
@@ -1576,19 +1591,20 @@ MemberExpr *MemberExpr::Create(
     MemberExprNameQualifier *NQ =
         E->getTrailingObjects<MemberExprNameQualifier>();
     NQ->QualifierLoc = QualifierLoc;
-    NQ->FoundDecl = founddecl;
+    NQ->FoundDecl = FoundDecl;
   }
 
   E->MemberExprBits.HasTemplateKWAndArgsInfo =
-      (targs || TemplateKWLoc.isValid());
+      TemplateArgs || TemplateKWLoc.isValid();
 
-  if (targs) {
+  if (TemplateArgs) {
     bool Dependent = false;
     bool InstantiationDependent = false;
     bool ContainsUnexpandedParameterPack = false;
     E->getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
-        TemplateKWLoc, *targs, E->getTrailingObjects<TemplateArgumentLoc>(),
-        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
+        TemplateKWLoc, *TemplateArgs,
+        E->getTrailingObjects<TemplateArgumentLoc>(), Dependent,
+        InstantiationDependent, ContainsUnexpandedParameterPack);
     if (InstantiationDependent)
       E->setInstantiationDependent(true);
   } else if (TemplateKWLoc.isValid()) {
@@ -1599,6 +1615,22 @@ MemberExpr *MemberExpr::Create(
   return E;
 }
 
+MemberExpr *MemberExpr::CreateEmpty(const ASTContext &Context,
+                                    bool HasQualifier, bool HasFoundDecl,
+                                    bool HasTemplateKWAndArgsInfo,
+                                    unsigned NumTemplateArgs) {
+  assert((!NumTemplateArgs || HasTemplateKWAndArgsInfo) &&
+         "template args but no template arg info?");
+  bool HasQualOrFound = HasQualifier || HasFoundDecl;
+  std::size_t Size =
+      totalSizeToAlloc<MemberExprNameQualifier, ASTTemplateKWAndArgsInfo,
+                       TemplateArgumentLoc>(HasQualOrFound ? 1 : 0,
+                                            HasTemplateKWAndArgsInfo ? 1 : 0,
+                                            NumTemplateArgs);
+  void *Mem = Context.Allocate(Size, alignof(MemberExpr));
+  return new (Mem) MemberExpr(EmptyShell());
+}
+
 SourceLocation MemberExpr::getBeginLoc() const {
   if (isImplicitAccess()) {
     if (hasQualifier())
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 7b1f20408d5e0..170149d5053ff 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -881,9 +881,8 @@ RewriteModernObjC::getIvarAccessString(ObjCIvarDecl *D) {
                                         IvarT, nullptr,
                                         /*BitWidth=*/nullptr, /*Mutable=*/true,
                                         ICIS_NoInit);
-      MemberExpr *ME = new (Context)
-          MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
-                     FD->getType(), VK_LValue, OK_Ordinary);
+      MemberExpr *ME = MemberExpr::CreateImplicit(
+          *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
       IvarT = Context->getDecltypeType(ME, ME->getType());
     }
   }
@@ -2736,9 +2735,9 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) {
                                     Context->getPointerType(Context->VoidPtrTy),
                                     nullptr, /*BitWidth=*/nullptr,
                                     /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *ArrayLiteralME = new (Context)
-      MemberExpr(NSArrayCallExpr, false, SourceLocation(), ARRFD,
-                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ArrayLiteralME =
+      MemberExpr::CreateImplicit(*Context, NSArrayCallExpr, false, ARRFD,
+                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
   QualType ConstIdT = Context->getObjCIdType().withConst();
   CStyleCastExpr * ArrayLiteralObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -2865,9 +2864,9 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
                                        Context->getPointerType(Context->VoidPtrTy),
                                        nullptr, /*BitWidth=*/nullptr,
                                        /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *DictLiteralValueME = new (Context)
-      MemberExpr(NSValueCallExpr, false, SourceLocation(), ARRFD,
-                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *DictLiteralValueME =
+      MemberExpr::CreateImplicit(*Context, NSValueCallExpr, false, ARRFD,
+                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
   QualType ConstIdT = Context->getObjCIdType().withConst();
   CStyleCastExpr * DictValueObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -2878,9 +2877,9 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
   Expr *NSKeyCallExpr = CallExpr::Create(
       *Context, NSDictDRE, KeyExprs, NSDictFType, VK_LValue, SourceLocation());
 
-  MemberExpr *DictLiteralKeyME = new (Context)
-      MemberExpr(NSKeyCallExpr, false, SourceLocation(), ARRFD,
-                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *DictLiteralKeyME =
+      MemberExpr::CreateImplicit(*Context, NSKeyCallExpr, false, ARRFD,
+                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
 
   CStyleCastExpr * DictKeyObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -3180,9 +3179,8 @@ Expr *RewriteModernObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFla
                                     returnType, nullptr,
                                     /*BitWidth=*/nullptr,
                                     /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *ME = new (Context)
-      MemberExpr(STCE, false, SourceLocation(), FieldD, SourceLocation(),
-                 FieldD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = MemberExpr::CreateImplicit(
+      *Context, STCE, false, FieldD, FieldD->getType(), VK_LValue, OK_Ordinary);
 
   return ME;
 }
@@ -4629,9 +4627,8 @@ Stmt *RewriteModernObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME =
-      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
-                               FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = MemberExpr::CreateImplicit(
+      *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
 
   CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,
                                                 CK_BitCast, ME);
@@ -4676,9 +4673,8 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME = new (Context)
-      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),
-                 FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = MemberExpr::CreateImplicit(
+      *Context, DeclRefExp, isArrow, FD, FD->getType(), VK_LValue, OK_Ordinary);
 
   StringRef Name = VD->getName();
   FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),
@@ -4686,9 +4682,8 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                          Context->VoidPtrTy, nullptr,
                          /*BitWidth=*/nullptr, /*Mutable=*/true,
                          ICIS_NoInit);
-  ME =
-      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),
-                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);
+  ME = MemberExpr::CreateImplicit(*Context, ME, true, FD, DeclRefExp->getType(),
+                                  VK_LValue, OK_Ordinary);
 
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(),
@@ -7528,9 +7523,8 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                             IvarT, nullptr,
                                             /*BitWidth=*/nullptr,
                                             /*Mutable=*/true, ICIS_NoInit);
-          MemberExpr *ME = new (Context)
-              MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
-                         FD->getType(), VK_LValue, OK_Ordinary);
+          MemberExpr *ME = MemberExpr::CreateImplicit(
+              *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
           IvarT = Context->getDecltypeType(ME, ME->getType());
         }
       }
@@ -7557,9 +7551,9 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                           D->getType(), nullptr,
                                           /*BitWidth=*/D->getBitWidth(),
                                           /*Mutable=*/true, ICIS_NoInit);
-        MemberExpr *ME = new (Context)
-            MemberExpr(PE, /*isArrow*/ false, SourceLocation(), FD,
-                       SourceLocation(), FD->getType(), VK_LValue, OK_Ordinary);
+        MemberExpr *ME =
+            MemberExpr::CreateImplicit(*Context, PE, /*isArrow*/ false, FD,
+                                       FD->getType(), VK_LValue, OK_Ordinary);
         Replacement = ME;
 
       }
diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index 3e50aff3c488d..2ff230dfff1b5 100644
--- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -3793,9 +3793,8 @@ Stmt *RewriteObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME =
-      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
-                               FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = MemberExpr::CreateImplicit(
+      *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
 
   CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,
                                                 CK_BitCast, ME);
@@ -3840,9 +3839,9 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME = new (Context)
-      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),
-                 FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME =
+      MemberExpr::CreateImplicit(*Context, DeclRefExp, isArrow, FD,
+                                 FD->getType(), VK_LValue, OK_Ordinary);
 
   StringRef Name = VD->getName();
   FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),
@@ -3850,9 +3849,8 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                          Context->VoidPtrTy, nullptr,
                          /*BitWidth=*/nullptr, /*Mutable=*/true,
                          ICIS_NoInit);
-  ME =
-      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),
-                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);
+  ME = MemberExpr::CreateImplicit(*Context, ME, true, FD, DeclRefExp->getType(),
+                                  VK_LValue, OK_Ordinary);
 
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(),
@@ -5830,10 +5828,10 @@ Stmt *RewriteObjCFragileABI::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                               OldRange.getEnd(),
                                               castExpr);
       if (IV->isFreeIvar() &&
-          declaresSameEntity(CurMethodDef->getClassInterface(), iFaceDecl->getDecl())) {
-        MemberExpr *ME = new (Context)
-            MemberExpr(PE, true, SourceLocation(), D, IV->getLocation(),
-                       D->getType(), VK_LValue, OK_Ordinary);
+          declaresSameEntity(CurMethodDef->getClassInterface(),
+                             iFaceDecl->getDecl())) {
+        MemberExpr *ME = MemberExpr::CreateImplicit(
+            *Context, PE, true, D, D->getType(), VK_LValue, OK_Ordinary);
         Replacement = ME;
       } else {
         IV->setBase(PE);
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 1eb6a7114ffa2..c42d272a3459b 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7189,9 +7189,12 @@ ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
     }
   }
 
-  MemberExpr *ME = new (Context) MemberExpr(
-      Exp.get(), /*IsArrow=*/false, SourceLocation(), Method, SourceLocation(),
-      Context.BoundMemberTy, VK_RValue, OK_Ordinary);
+  MemberExpr *ME = MemberExpr::Create(
+      Context, Exp.get(), /*IsArrow=*/false, SourceLocation(),
+      NestedNameSpecifierLoc(), SourceLocation(), Method,
+      DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()),
+      DeclarationNameInfo(), /*TemplateArgs=*/nullptr, Context.BoundMemberTy,
+      VK_RValue, OK_Ordinary);
   if (HadMultipleCandidates)
     ME->setHadMultipleCandidates(true);
   MarkMemberReferenced(ME);
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 52aa3d961d200..a179207316429 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -752,9 +752,42 @@ void ASTStmtReader::VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
 }
 
 void ASTStmtReader::VisitMemberExpr(MemberExpr *E) {
-  // Don't call VisitExpr, this is fully initialized at creation.
-  assert(E->getStmtClass() == Stmt::MemberExprClass &&
-         "It's a subclass, we must advance Idx!");
+  VisitExpr(E);
+
+  bool HasQualifier = Record.readInt();
+  bool HasFoundDecl = Record.readInt();
+  bool HasTemplateInfo = Record.readInt();
+  unsigned NumTemplateArgs = Record.readInt();
+
+  E->Base = Record.readSubExpr();
+  E->MemberDecl = Record.readDeclAs<ValueDecl>();
+  Record.readDeclarationNameLoc(E->MemberDNLoc, E->MemberDecl->getDeclName());
+  E->MemberLoc = Record.readSourceLocation();
+  E->MemberExprBits.IsArrow = Record.readInt();
+  E->MemberExprBits.HasQualifierOrFoundDecl = HasQualifier || HasFoundDecl;
+  E->MemberExprBits.HasTemplateKWAndArgsInfo = HasTemplateInfo;
+  E->MemberExprBits.HadMultipleCandidates = Record.readInt();
+  E->MemberExprBits.OperatorLoc = Record.readSourceLocation();
+
+  if (HasQualifier || HasFoundDecl)
+    *E->getTrailingObjects<MemberExprNameQualifier>() =
+        MemberExprNameQualifier();
+
+  if (HasFoundDecl) {
+    auto *FoundD = Record.readDeclAs<NamedDecl>();
+    auto AS = (AccessSpecifier)Record.readInt();
+    E->getTrailingObjects<MemberExprNameQualifier>()->FoundDecl =
+        DeclAccessPair::make(FoundD, AS);
+  }
+
+  if (HasQualifier)
+    E->getTrailingObjects<MemberExprNameQualifier>()->QualifierLoc =
+        Record.readNestedNameSpecifierLoc();
+
+  if (HasTemplateInfo)
+    ReadTemplateKWAndArgsInfo(
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+        E->getTrailingObjects<TemplateArgumentLoc>(), NumTemplateArgs);
 }
 
 void ASTStmtReader::VisitObjCIsaExpr(ObjCIsaExpr *E) {
@@ -2551,55 +2584,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
           Context, /*NumArgs=*/Record[ASTStmtReader::NumExprFields], Empty);
       break;
 
-    case EXPR_MEMBER: {
-      // We load everything here and fully initialize it at creation.
-      // That way we can use MemberExpr::Create and don't have to duplicate its
-      // logic with a MemberExpr::CreateEmpty.
-
-      assert(Record.getIdx() == 0);
-      NestedNameSpecifierLoc QualifierLoc;
-      if (Record.readInt()) { // HasQualifier.
-        QualifierLoc = Record.readNestedNameSpecifierLoc();
-      }
-
-      SourceLocation TemplateKWLoc;
-      TemplateArgumentListInfo ArgInfo;
-      bool HasTemplateKWAndArgsInfo = Record.readInt();
-      if (HasTemplateKWAndArgsInfo) {
-        TemplateKWLoc = Record.readSourceLocation();
-        unsigned NumTemplateArgs = Record.readInt();
-        ArgInfo.setLAngleLoc(Record.readSourceLocation());
-        ArgInfo.setRAngleLoc(Record.readSourceLocation());
-        for (unsigned i = 0; i != NumTemplateArgs; ++i)
-          ArgInfo.addArgument(Record.readTemplateArgumentLoc());
-      }
-
-      bool HadMultipleCandidates = Record.readInt();
-
-      auto *FoundD = Record.readDeclAs<NamedDecl>();
-      auto AS = (AccessSpecifier)Record.readInt();
-      DeclAccessPair FoundDecl = DeclAccessPair::make(FoundD, AS);
-
-      QualType T = Record.readType();
-      auto VK = static_cast<ExprValueKind>(Record.readInt());
-      auto OK = static_cast<ExprObjectKind>(Record.readInt());
-      Expr *Base = ReadSubExpr();
-      auto *MemberD = Record.readDeclAs<ValueDecl>();
-      SourceLocation MemberLoc = Record.readSourceLocation();
-      DeclarationNameInfo MemberNameInfo(MemberD->getDeclName(), MemberLoc);
-      bool IsArrow = Record.readInt();
-      SourceLocation OperatorLoc = Record.readSourceLocation();
-
-      S = MemberExpr::Create(Context, Base, IsArrow, OperatorLoc, QualifierLoc,
-                             TemplateKWLoc, MemberD, FoundDecl, MemberNameInfo,
-                             HasTemplateKWAndArgsInfo ? &ArgInfo : nullptr, T,
-                             VK, OK);
-      Record.readDeclarationNameLoc(cast<MemberExpr>(S)->MemberDNLoc,
-                                    MemberD->getDeclName());
-      if (HadMultipleCandidates)
-        cast<MemberExpr>(S)->setHadMultipleCandidates(true);
+    case EXPR_MEMBER:
+      S = MemberExpr::CreateEmpty(Context, Record[ASTStmtReader::NumExprFields],
+                                  Record[ASTStmtReader::NumExprFields + 1],
+                                  Record[ASTStmtReader::NumExprFields + 2],
+                                  Record[ASTStmtReader::NumExprFields + 3]);
       break;
-    }
 
     case EXPR_BINARY_OPERATOR:
       S = new (Context) BinaryOperator(Empty);
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 776aab6bf51d2..d52a4a85b3252 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -660,39 +660,45 @@ void ASTStmtWriter::VisitCallExpr(CallExpr *E) {
 }
 
 void ASTStmtWriter::VisitMemberExpr(MemberExpr *E) {
-  // Don't call VisitExpr, we'll write everything here.
-
-  Record.push_back(E->hasQualifier());
-  if (E->hasQualifier())
-    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
-
-  Record.push_back(E->hasTemplateKWAndArgsInfo());
-  if (E->hasTemplateKWAndArgsInfo()) {
-    Record.AddSourceLocation(E->getTemplateKeywordLoc());
-    unsigned NumTemplateArgs = E->getNumTemplateArgs();
-    Record.push_back(NumTemplateArgs);
-    Record.AddSourceLocation(E->getLAngleLoc());
-    Record.AddSourceLocation(E->getRAngleLoc());
-    for (unsigned i=0; i != NumTemplateArgs; ++i)
-      Record.AddTemplateArgumentLoc(E->getTemplateArgs()[i]);
-  }
+  VisitExpr(E);
 
-  Record.push_back(E->hadMultipleCandidates());
+  bool HasQualifier = E->hasQualifier();
+  bool HasFoundDecl =
+      E->hasQualifierOrFoundDecl() &&
+      (E->getFoundDecl().getDecl() != E->getMemberDecl() ||
+       E->getFoundDecl().getAccess() != E->getMemberDecl()->getAccess());
+  bool HasTemplateInfo = E->hasTemplateKWAndArgsInfo();
+  unsigned NumTemplateArgs = E->getNumTemplateArgs();
 
-  DeclAccessPair FoundDecl = E->getFoundDecl();
-  Record.AddDeclRef(FoundDecl.getDecl());
-  Record.push_back(FoundDecl.getAccess());
+  // Write these first for easy access when deserializing, as they affect the
+  // size of the MemberExpr.
+  Record.push_back(HasQualifier);
+  Record.push_back(HasFoundDecl);
+  Record.push_back(HasTemplateInfo);
+  Record.push_back(NumTemplateArgs);
 
-  Record.AddTypeRef(E->getType());
-  Record.push_back(E->getValueKind());
-  Record.push_back(E->getObjectKind());
   Record.AddStmt(E->getBase());
   Record.AddDeclRef(E->getMemberDecl());
+  Record.AddDeclarationNameLoc(E->MemberDNLoc,
+                               E->getMemberDecl()->getDeclName());
   Record.AddSourceLocation(E->getMemberLoc());
   Record.push_back(E->isArrow());
+  Record.push_back(E->hadMultipleCandidates());
   Record.AddSourceLocation(E->getOperatorLoc());
-  Record.AddDeclarationNameLoc(E->MemberDNLoc,
-                               E->getMemberDecl()->getDeclName());
+
+  if (HasFoundDecl) {
+    DeclAccessPair FoundDecl = E->getFoundDecl();
+    Record.AddDeclRef(FoundDecl.getDecl());
+    Record.push_back(FoundDecl.getAccess());
+  }
+
+  if (HasQualifier)
+    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+
+  if (HasTemplateInfo)
+    AddTemplateKWAndArgsInfo(*E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+                             E->getTrailingObjects<TemplateArgumentLoc>());
+
   Code = serialization::EXPR_MEMBER;
 }
 

From 1648cb17e40e2579a1a54df35639002f2acf7fe5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 4 Jun 2019 21:47:50 +0000
Subject: [PATCH 1068/1176] [X86] Add avx512bw to the avx512
 machine-combiner-int-vec.ll to ensure we use zmm for v32i16/v64i8.

llvm-svn: 362552
---
 .../CodeGen/X86/machine-combiner-int-vec.ll   | 202 +++++++++++-------
 1 file changed, 129 insertions(+), 73 deletions(-)

diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
index 6a1385a6c1c3c..52c4cd6ca9ab9 100644
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx2 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl,avx512bw -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
 
 ; Verify that 128-bit vector logical ops are reassociated.
 
@@ -1710,15 +1710,22 @@ define <64 x i8> @reassociate_umax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; SSE-NEXT:    pmaxub {{[0-9]+}}(%rsp), %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_umax_v64i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpmaxub %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpmaxub %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpmaxub %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpmaxub %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_umax_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxub %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxub %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxub %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
   %t1 = icmp ugt <64 x i8> %x2, %t0
@@ -1770,15 +1777,22 @@ define <32 x i16> @reassociate_umax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; SSE-NEXT:    pxor %xmm4, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_umax_v32i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpmaxuw %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpmaxuw %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpmaxuw %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpmaxuw %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_umax_v32i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxuw %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umax_v32i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxuw %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxuw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
   %t1 = icmp ugt <32 x i16> %x2, %t0
@@ -2129,15 +2143,22 @@ define <64 x i8> @reassociate_smax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; SSE-NEXT:    por %xmm8, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_smax_v64i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpmaxsb %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpmaxsb %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpmaxsb %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpmaxsb %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_smax_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxsb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsb %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxsb %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
   %t1 = icmp sgt <64 x i8> %x2, %t0
@@ -2164,15 +2185,22 @@ define <32 x i16> @reassociate_smax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; SSE-NEXT:    pmaxsw {{[0-9]+}}(%rsp), %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_smax_v32i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpmaxsw %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpmaxsw %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpmaxsw %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpmaxsw %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_smax_v32i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpmaxsw %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smax_v32i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpmaxsw %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpmaxsw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
   %t1 = icmp sgt <32 x i16> %x2, %t0
@@ -2447,15 +2475,22 @@ define <64 x i8> @reassociate_umin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; SSE-NEXT:    pminub {{[0-9]+}}(%rsp), %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_umin_v64i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpminub %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpminub %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpminub %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpminub %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_umin_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminub %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminub %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminub %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminub %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminub %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminub %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
   %t1 = icmp ult <64 x i8> %x2, %t0
@@ -2507,15 +2542,22 @@ define <32 x i16> @reassociate_umin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; SSE-NEXT:    pxor %xmm4, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_umin_v32i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpminuw %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpminuw %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpminuw %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpminuw %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_umin_v32i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminuw %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminuw %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminuw %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminuw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_umin_v32i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminuw %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminuw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
   %t1 = icmp ult <32 x i16> %x2, %t0
@@ -2863,15 +2905,22 @@ define <64 x i8> @reassociate_smin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; SSE-NEXT:    por %xmm8, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_smin_v64i8:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpminsb %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpminsb %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpminsb %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpminsb %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_smin_v64i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminsb %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminsb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminsb %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminsb %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v64i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsb %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminsb %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
   %t1 = icmp slt <64 x i8> %x2, %t0
@@ -2898,15 +2947,22 @@ define <32 x i16> @reassociate_smin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; SSE-NEXT:    pminsw {{[0-9]+}}(%rsp), %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: reassociate_smin_v32i16:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
-; AVX-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX-NEXT:    vpminsw %ymm1, %ymm5, %ymm1
-; AVX-NEXT:    vpminsw %ymm0, %ymm4, %ymm0
-; AVX-NEXT:    vpminsw %ymm0, %ymm6, %ymm0
-; AVX-NEXT:    vpminsw %ymm1, %ymm7, %ymm1
-; AVX-NEXT:    retq
+; AVX2-LABEL: reassociate_smin_v32i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpminsw %ymm1, %ymm5, %ymm1
+; AVX2-NEXT:    vpminsw %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpminsw %ymm0, %ymm6, %ymm0
+; AVX2-NEXT:    vpminsw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: reassociate_smin_v32i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vpminsw %zmm0, %zmm2, %zmm0
+; AVX512-NEXT:    vpminsw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
   %t1 = icmp slt <32 x i16> %x2, %t0

From 2d37cb82f0e66e48f8970104e155a644038c45d5 Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Tue, 4 Jun 2019 21:51:34 +0000
Subject: [PATCH 1069/1176] [AArch64][GlobalISel] Make extloads to i64 legal.

Although we had the support in the prelegalizer combiner to generate the
G_SEXTLOAD or G_ZEXTLOAD ops, the legalizer definitions for arm64 had them as
lowering back to separate ops.

llvm-svn: 362553
---
 .../Target/AArch64/AArch64LegalizerInfo.cpp   |   3 +
 .../AArch64/GlobalISel/legalize-extload.mir   | 148 ++++++++++++++++++
 2 files changed, 151 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index aeedeba73ab70..cf063316c7dbc 100644
--- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -201,6 +201,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .legalForTypesWithMemDesc({{s32, p0, 8, 8},
                                  {s32, p0, 16, 8},
                                  {s32, p0, 32, 8},
+                                 {s64, p0, 8, 2},
+                                 {s64, p0, 16, 2},
+                                 {s64, p0, 32, 4},
                                  {s64, p0, 64, 8},
                                  {p0, p0, 64, 8},
                                  {v2s32, p0, 64, 8}})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir
index a26704497c353..22873b7ff9679 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extload.mir
@@ -1,5 +1,51 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64"
+
+  define void @test_extload() {
+  entry:
+    ret void
+  }
+
+  define i64 @sext_i32_i64(i32* %ptr) {
+    %ld = load i32, i32* %ptr, align 4
+    %v = sext i32 %ld to i64
+    ret i64 %v
+  }
+
+  define i64 @sext_i16_i64(i16* %ptr) {
+    %ld = load i16, i16* %ptr, align 2
+    %v = sext i16 %ld to i64
+    ret i64 %v
+  }
+
+  define i64 @sext_i8_i64(i8* %ptr) {
+    %ld = load i8, i8* %ptr, align 1
+    %v = sext i8 %ld to i64
+    ret i64 %v
+  }
+
+  define i64 @zext_i32_i64(i32* %ptr) {
+    %ld = load i32, i32* %ptr, align 4
+    %v = zext i32 %ld to i64
+    ret i64 %v
+  }
+
+  define i64 @zext_i16_i64(i16* %ptr) {
+    %ld = load i16, i16* %ptr, align 2
+    %v = zext i16 %ld to i64
+    ret i64 %v
+  }
+
+  define i64 @zext_i8_i64(i8* %ptr) {
+    %ld = load i8, i8* %ptr, align 1
+    %v = zext i8 %ld to i64
+    ret i64 %v
+  }
+
+...
 ---
 name:            test_extload
 body: |
@@ -13,3 +59,105 @@ body: |
     %1:_(s32) = G_LOAD %0 :: (load 1)
     $w0 = COPY %1
 ...
+---
+name:            sext_i32_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: sext_i32_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 4 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[SEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 4 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            sext_i16_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: sext_i16_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[SEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 2 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            sext_i8_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: sext_i8_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 1 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[SEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_SEXTLOAD %0(p0) :: (load 1 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            zext_i32_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: zext_i32_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 4 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 4 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            zext_i16_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: zext_i16_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 2 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 2 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            zext_i8_i64
+body:             |
+  bb.1:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: zext_i8_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load 1 from %ir.ptr)
+    ; CHECK: $x0 = COPY [[ZEXTLOAD]](s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %0:_(p0) = COPY $x0
+    %2:_(s64) = G_ZEXTLOAD %0(p0) :: (load 1 from %ir.ptr)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...

From 06c801e153347d24ec7ce93f6ffbbc58b64a89ba Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm@meinersbur.de>
Date: Tue, 4 Jun 2019 21:58:54 +0000
Subject: [PATCH 1070/1176] [ScopBuilder] Move canonicalizeDynamicsBasePtrs
 from ScopInfo. NFC.

Refactor Scop and ScopBuilder class. Move canonicalizeDynamicsBasePtrs
and corresponding static functions from ScopInfo.cpp to ScopBuilder.cpp

Patch by Dominik Adamski <adamski.dominik@gmail.com>

Differential Revision: https://reviews.llvm.org/D62781

llvm-svn: 362554
---
 polly/include/polly/ScopBuilder.h  | 28 ++++++++++++
 polly/include/polly/ScopInfo.h     | 28 ------------
 polly/lib/Analysis/ScopBuilder.cpp | 72 +++++++++++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 70 -----------------------------
 4 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index f24a46a7e905a..0c1e15a12a640 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -391,6 +391,34 @@ class ScopBuilder {
   /// Build the access relation of all memory accesses of @p Stmt.
   void buildAccessRelations(ScopStmt &Stmt);
 
+  /// Canonicalize arrays with base pointers from the same equivalence class.
+  ///
+  /// Some context: in our normal model we assume that each base pointer is
+  /// related to a single specific memory region, where memory regions
+  /// associated with different base pointers are disjoint. Consequently we do
+  /// not need to compute additional data dependences that model possible
+  /// overlaps of these memory regions. To verify our assumption we compute
+  /// alias checks that verify that modeled arrays indeed do not overlap. In
+  /// case an overlap is detected the runtime check fails and we fall back to
+  /// the original code.
+  ///
+  /// In case of arrays where the base pointers are know to be identical,
+  /// because they are dynamically loaded by accesses that are in the same
+  /// invariant load equivalence class, such run-time alias check would always
+  /// be false.
+  ///
+  /// This function makes sure that we do not generate consistently failing
+  /// run-time checks for code that contains distinct arrays with known
+  /// equivalent base pointers. It identifies for each invariant load
+  /// equivalence class a single canonical array and canonicalizes all memory
+  /// accesses that reference arrays that have base pointers that are known to
+  /// be equal to the base pointer of such a canonical array to this canonical
+  /// array.
+  ///
+  /// We currently do not canonicalize arrays for which certain memory accesses
+  /// have been hoisted as loop invariant.
+  void canonicalizeDynamicBasePtrs();
+
 public:
   explicit ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
                        const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 05d4d9cfcd715..583b99ef2be92 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2059,34 +2059,6 @@ class Scop {
   /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB)
   void hoistInvariantLoads();
 
-  /// Canonicalize arrays with base pointers from the same equivalence class.
-  ///
-  /// Some context: in our normal model we assume that each base pointer is
-  /// related to a single specific memory region, where memory regions
-  /// associated with different base pointers are disjoint. Consequently we do
-  /// not need to compute additional data dependences that model possible
-  /// overlaps of these memory regions. To verify our assumption we compute
-  /// alias checks that verify that modeled arrays indeed do not overlap. In
-  /// case an overlap is detected the runtime check fails and we fall back to
-  /// the original code.
-  ///
-  /// In case of arrays where the base pointers are know to be identical,
-  /// because they are dynamically loaded by accesses that are in the same
-  /// invariant load equivalence class, such run-time alias check would always
-  /// be false.
-  ///
-  /// This function makes sure that we do not generate consistently failing
-  /// run-time checks for code that contains distinct arrays with known
-  /// equivalent base pointers. It identifies for each invariant load
-  /// equivalence class a single canonical array and canonicalizes all memory
-  /// accesses that reference arrays that have base pointers that are known to
-  /// be equal to the base pointer of such a canonical array to this canonical
-  /// array.
-  ///
-  /// We currently do not canonicalize arrays for which certain memory accesses
-  /// have been hoisted as loop invariant.
-  void canonicalizeDynamicBasePtrs();
-
   /// Check if @p MA can always be hoisted without execution context.
   bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty,
                           bool MAInvalidCtxIsEmpty,
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index 51c9472687d9e..c4ac225d04ec5 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -1366,6 +1366,76 @@ void ScopBuilder::collectCandidateReductionLoads(
       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
 }
 
+/// Find the canonical scop array info object for a set of invariant load
+/// hoisted loads. The canonical array is the one that corresponds to the
+/// first load in the list of accesses which is used as base pointer of a
+/// scop array.
+static const ScopArrayInfo *findCanonicalArray(Scop &S,
+                                               MemoryAccessList &Accesses) {
+  for (MemoryAccess *Access : Accesses) {
+    const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
+        Access->getAccessInstruction(), MemoryKind::Array);
+    if (CanonicalArray)
+      return CanonicalArray;
+  }
+  return nullptr;
+}
+
+/// Check if @p Array severs as base array in an invariant load.
+static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
+  for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
+    for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
+      if (Access2->getScopArrayInfo() == Array)
+        return true;
+  return false;
+}
+
+/// Replace the base pointer arrays in all memory accesses referencing @p Old,
+/// with a reference to @p New.
+static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
+                                 const ScopArrayInfo *New) {
+  for (ScopStmt &Stmt : S)
+    for (MemoryAccess *Access : Stmt) {
+      if (Access->getLatestScopArrayInfo() != Old)
+        continue;
+
+      isl::id Id = New->getBasePtrId();
+      isl::map Map = Access->getAccessRelation();
+      Map = Map.set_tuple_id(isl::dim::out, Id);
+      Access->setAccessRelation(Map);
+    }
+}
+
+void ScopBuilder::canonicalizeDynamicBasePtrs() {
+  for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
+    MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
+
+    const ScopArrayInfo *CanonicalBasePtrSAI =
+        findCanonicalArray(*scop, BasePtrAccesses);
+
+    if (!CanonicalBasePtrSAI)
+      continue;
+
+    for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
+      const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
+          BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
+      if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
+          !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
+        continue;
+
+      // we currently do not canonicalize arrays where some accesses are
+      // hoisted as invariant loads. If we would, we need to update the access
+      // function of the invariant loads as well. However, as this is not a
+      // very common situation, we leave this for now to avoid further
+      // complexity increases.
+      if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
+        continue;
+
+      replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
+    }
+  }
+}
+
 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
   for (MemoryAccess *Access : Stmt.MemAccs) {
     Type *ElementType = Access->getElementType();
@@ -1601,7 +1671,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
   }
 
   scop->hoistInvariantLoads();
-  scop->canonicalizeDynamicBasePtrs();
+  canonicalizeDynamicBasePtrs();
   verifyInvariantLoads();
   scop->simplifySCoP(true);
 
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index f7712c3efb886..0a8da7426bc9f 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3797,76 +3797,6 @@ void Scop::hoistInvariantLoads() {
   }
 }
 
-/// Find the canonical scop array info object for a set of invariant load
-/// hoisted loads. The canonical array is the one that corresponds to the
-/// first load in the list of accesses which is used as base pointer of a
-/// scop array.
-static const ScopArrayInfo *findCanonicalArray(Scop *S,
-                                               MemoryAccessList &Accesses) {
-  for (MemoryAccess *Access : Accesses) {
-    const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull(
-        Access->getAccessInstruction(), MemoryKind::Array);
-    if (CanonicalArray)
-      return CanonicalArray;
-  }
-  return nullptr;
-}
-
-/// Check if @p Array severs as base array in an invariant load.
-static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) {
-  for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses())
-    for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
-      if (Access2->getScopArrayInfo() == Array)
-        return true;
-  return false;
-}
-
-/// Replace the base pointer arrays in all memory accesses referencing @p Old,
-/// with a reference to @p New.
-static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old,
-                                 const ScopArrayInfo *New) {
-  for (ScopStmt &Stmt : *S)
-    for (MemoryAccess *Access : Stmt) {
-      if (Access->getLatestScopArrayInfo() != Old)
-        continue;
-
-      isl::id Id = New->getBasePtrId();
-      isl::map Map = Access->getAccessRelation();
-      Map = Map.set_tuple_id(isl::dim::out, Id);
-      Access->setAccessRelation(Map);
-    }
-}
-
-void Scop::canonicalizeDynamicBasePtrs() {
-  for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) {
-    MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
-
-    const ScopArrayInfo *CanonicalBasePtrSAI =
-        findCanonicalArray(this, BasePtrAccesses);
-
-    if (!CanonicalBasePtrSAI)
-      continue;
-
-    for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
-      const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull(
-          BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
-      if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
-          !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
-        continue;
-
-      // we currently do not canonicalize arrays where some accesses are
-      // hoisted as invariant loads. If we would, we need to update the access
-      // function of the invariant loads as well. However, as this is not a
-      // very common situation, we leave this for now to avoid further
-      // complexity increases.
-      if (isUsedForIndirectHoistedLoad(this, BasePtrSAI))
-        continue;
-
-      replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI);
-    }
-  }
-}
-
 ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
                                               ArrayRef<const SCEV *> Sizes,
                                               MemoryKind Kind,

From a33eaad00cca99ca0b5d2b0cc6dec33be6d7ee7f Mon Sep 17 00:00:00 2001
From: Don Hinton <hintonda@gmail.com>
Date: Tue, 4 Jun 2019 22:07:40 +0000
Subject: [PATCH 1071/1176] [Analysis] Only build Analysis plugins when
 CLANG_ENABLE_STATIC_ANALYZER is enabled.

Fixes bug introduced in r362328.

Thanks to Nathan Chancellor for reporting this!

llvm-svn: 362555
---
 clang/lib/Analysis/plugins/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/plugins/CMakeLists.txt b/clang/lib/Analysis/plugins/CMakeLists.txt
index f7dbc936952cc..bd7314a871fc5 100644
--- a/clang/lib/Analysis/plugins/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(LLVM_ENABLE_PLUGINS)
+if(CLANG_ENABLE_STATIC_ANALYZER AND LLVM_ENABLE_PLUGINS)
   add_subdirectory(SampleAnalyzer)
   add_subdirectory(CheckerDependencyHandling)
   add_subdirectory(CheckerOptionHandling)

From 375d5fb9ca22ba40f8b10660de66c2491630ba21 Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Tue, 4 Jun 2019 22:17:27 +0000
Subject: [PATCH 1072/1176] [test][llvm-objcopy] Test llvm-objcopy with
 standard streams

Differential Revision: https://reviews.llvm.org/D62817

llvm-svn: 362556
---
 llvm/test/tools/llvm-objcopy/ELF/standard-streams.test | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/standard-streams.test

diff --git a/llvm/test/tools/llvm-objcopy/ELF/standard-streams.test b/llvm/test/tools/llvm-objcopy/ELF/standard-streams.test
new file mode 100644
index 0000000000000..85b120b22a979
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/standard-streams.test
@@ -0,0 +1,6 @@
+# RUN: llvm-objcopy - %t < %p/Inputs/alloc-symtab.o
+# RUN: llvm-objcopy %p/Inputs/alloc-symtab.o %t1
+# RUN: cmp %t %t1
+
+# RUN: llvm-objcopy %p/Inputs/alloc-symtab.o - > %t2
+# RUN: cmp %t1 %t2

From c93b99589f7ef45d2f85f459731d0cbc4a55b005 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Tue, 4 Jun 2019 22:46:20 +0000
Subject: [PATCH 1073/1176] Call abs to avoid signed/unsigned comparison
 warning.

llvm-svn: 362557
---
 .../Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
index af54115c2db54..34d08167a16b2 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
@@ -819,7 +819,7 @@ bool x86AssemblyInspectionEngine::local_branch_p (
   int offset;
   if (pc_rel_branch_or_jump_p (instruction_length, offset) && offset != 0) {
     addr_t next_pc_value = current_func_text_offset + instruction_length;
-    if (offset < 0 && -offset > current_func_text_offset) {
+    if (offset < 0 && abs (offset) > current_func_text_offset) {
       // Branch target is before the start of this function
       return false;
     }

From 5c7245b830eabfe85113707187f1da9bac1e17e5 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Tue, 4 Jun 2019 23:01:36 +0000
Subject: [PATCH 1074/1176] [Scalarizer] Add UnaryOperator visitor to
 scalarization pass

Differential Revision: https://reviews.llvm.org/D62858

llvm-svn: 362558
---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 38 ++++++++++++++
 llvm/test/Transforms/Scalarizer/basic.ll  | 62 +++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 0bd0fff1aa59b..515a648277300 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -124,6 +124,18 @@ struct ICmpSplitter {
   ICmpInst &ICI;
 };
 
+// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
+// a unary operator like UO called Name with operand X.
+struct UnarySplitter {
+  UnarySplitter(UnaryOperator &uo) : UO(uo) {}
+
+  Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
+    return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
+  }
+
+  UnaryOperator &UO;
+};
+
 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
 // a binary operator like BO called Name with operands X and Y.
 struct BinarySplitter {
@@ -173,6 +185,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
   bool visitSelectInst(SelectInst &SI);
   bool visitICmpInst(ICmpInst &ICI);
   bool visitFCmpInst(FCmpInst &FCI);
+  bool visitUnaryOperator(UnaryOperator &UO);
   bool visitBinaryOperator(BinaryOperator &BO);
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
   bool visitCastInst(CastInst &CI);
@@ -192,6 +205,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
                        const DataLayout &DL);
   bool finish();
 
+  template<typename T> bool splitUnary(Instruction &, const T &);
   template<typename T> bool splitBinary(Instruction &, const T &);
 
   bool splitCall(CallInst &CI);
@@ -419,6 +433,26 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
   return true;
 }
 
+// Scalarize one-operand instruction I, using Split(Builder, X, Name)
+// to create an instruction like I with operand X and name Name.
+template<typename Splitter>
+bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
+  VectorType *VT = dyn_cast<VectorType>(I.getType());
+  if (!VT)
+    return false;
+
+  unsigned NumElems = VT->getNumElements();
+  IRBuilder<> Builder(&I);
+  Scatterer Op = scatter(&I, I.getOperand(0));
+  assert(Op.size() == NumElems && "Mismatched unary operation");
+  ValueVector Res;
+  Res.resize(NumElems);
+  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+    Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
+  gather(&I, Res);
+  return true;
+}
+
 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
 // to create an instruction like I with operands X and Y and name Name.
 template<typename Splitter>
@@ -551,6 +585,10 @@ bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
   return splitBinary(FCI, FCmpSplitter(FCI));
 }
 
+bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
+  return splitUnary(UO, UnarySplitter(UO));
+}
+
 bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
   return splitBinary(BO, BinarySplitter(BO));
 }
diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index 29a82fd8090c4..577f0b19bb1ed 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -444,6 +444,68 @@ exit:
   ret <4 x float> %next_acc
 }
 
+; Test unary operator scalarization.
+define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
+; CHECK-LABEL: @f15(
+; CHECK: %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
+; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
+; CHECK: %val.i0 = load float, float* %ptr.i0, align 16
+; CHECK: %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
+; CHECK: %val.i1 = load float, float* %ptr.i1, align 4
+; CHECK: %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
+; CHECK: %val.i2 = load float, float* %ptr.i2, align 8
+; CHECK: %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
+; CHECK: %val.i3 = load float, float* %ptr.i3, align 4
+; CHECK: %neg.i0 = fneg float %val.i0
+; CHECK: %neg.i1 = fneg float %val.i1
+; CHECK: %neg.i2 = fneg float %val.i2
+; CHECK: %neg.i3 = fneg float %val.i3
+; CHECK: %neg.upto0 = insertelement <4 x float> undef, float %neg.i0, i32 0
+; CHECK: %neg.upto1 = insertelement <4 x float> %neg.upto0, float %neg.i1, i32 1
+; CHECK: %neg.upto2 = insertelement <4 x float> %neg.upto1, float %neg.i2, i32 2
+; CHECK: %neg = insertelement <4 x float> %neg.upto2, float %neg.i3, i32 3
+; CHECK: %call = call <4 x float> @ext(<4 x float> %neg)
+; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
+; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.000000e+00
+; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
+; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.000000e+00
+; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
+; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.000000e+00
+; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
+; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.000000e+00
+; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.000000e+00
+; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.000000e+00
+; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.000000e+00
+; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.000000e+00
+; CHECK: store float %sel.i0, float* %ptr.i0, align 16
+; CHECK: store float %sel.i1, float* %ptr.i1, align 4
+; CHECK: store float %sel.i2, float* %ptr.i2, align 8
+; CHECK: store float %sel.i3, float* %ptr.i3, align 4
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
+  %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
+  %nexti = sub i32 %i, 1
+
+  %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
+  %val = load <4 x float> , <4 x float> *%ptr
+  %neg = fneg <4 x float> %val
+  %call = call <4 x float> @ext(<4 x float> %neg)
+  %cmp = fcmp ogt <4 x float> %call,
+  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %sel = select <4 x i1> %cmp, <4 x float> %call,
+  <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
+  store <4 x float> %sel, <4 x float> *%ptr
+
+  %test = icmp eq i32 %nexti, 0
+  br i1 %test, label %loop, label %exit
+
+exit:
+  ret void
+}
+
 !0 = !{ !"root" }
 !1 = !{ !"set1", !0 }
 !2 = !{ !"set2", !0 }

From 5e312be0fa8442958fab31f08ec877cc4f6f31a2 Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Tue, 4 Jun 2019 23:11:42 +0000
Subject: [PATCH 1075/1176] [AArch64] FastISel: fix test to specify -fast-isel
 when -fast-isel-abort=1 is used.

This test has been inadvertently been GISel, and now assert due to incompatible flags.

llvm-svn: 362559
---
 llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
index cb54e4530a582..5f90bab9cf46e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 @fn.table = internal global [2 x i8*] [i8* blockaddress(@fn, %ZERO), i8* blockaddress(@fn, %ONE)], align 8
 

From 487077698cf3c2b50de55de59729ddc9cd6bf95e Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 23:27:40 +0000
Subject: [PATCH 1076/1176] msabi: Fix exponential mangling time for even more
 contrived inputs

This is a follow-up to r362293 which fixed exponential time needed
for mangling certain templates. This fixes the same issue if that
template pattern happens in template arguments > 10: The first
ten template arguments can use back references, and r362293 added
caching for back references. For latter arguments, we have to add
a cache for the mangling itself instead.

Fixes PR42091 even more.

Differential Revision: https://reviews.llvm.org/D62780

llvm-svn: 362560
---
 clang/lib/AST/MicrosoftMangle.cpp             | 51 +++++++++++++------
 .../mangle-ms-back-references-pr13207.cpp     | 22 ++++++++
 2 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index ddc6e12d1d18e..5e9358e24fc9d 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/xxhash.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/StringSaver.h"
 
 using namespace clang;
 
@@ -268,6 +269,11 @@ class MicrosoftCXXNameMangler {
   ArgBackRefMap FunArgBackReferences;
   ArgBackRefMap TemplateArgBackReferences;
 
+  typedef llvm::DenseMap<const void *, StringRef> TemplateArgStringMap;
+  TemplateArgStringMap TemplateArgStrings;
+  llvm::StringSaver TemplateArgStringStorage;
+  llvm::BumpPtrAllocator TemplateArgStringStorageAlloc;
+
   typedef std::set<std::pair<int, bool>> PassObjectSizeArgsSet;
   PassObjectSizeArgsSet PassObjectSizeArgs;
 
@@ -282,18 +288,21 @@ class MicrosoftCXXNameMangler {
 
   MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_)
       : Context(C), Out(Out_), Structor(nullptr), StructorType(-1),
+        TemplateArgStringStorage(TemplateArgStringStorageAlloc),
         PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
                          64) {}
 
   MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_,
                           const CXXConstructorDecl *D, CXXCtorType Type)
       : Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
+        TemplateArgStringStorage(TemplateArgStringStorageAlloc),
         PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
                          64) {}
 
   MicrosoftCXXNameMangler(MicrosoftMangleContextImpl &C, raw_ostream &Out_,
                           const CXXDestructorDecl *D, CXXDtorType Type)
       : Context(C), Out(Out_), Structor(getStructor(D)), StructorType(Type),
+        TemplateArgStringStorage(TemplateArgStringStorageAlloc),
         PointersAre64Bit(C.getASTContext().getTargetInfo().getPointerWidth(0) ==
                          64) {}
 
@@ -809,24 +818,34 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     // TD / TemplateArg pairs.
     ArgBackRefMap::iterator Found = TemplateArgBackReferences.find(ND);
     if (Found == TemplateArgBackReferences.end()) {
-      // Mangle full template name into temporary buffer.
-      llvm::SmallString<64> TemplateMangling;
-      llvm::raw_svector_ostream Stream(TemplateMangling);
-      MicrosoftCXXNameMangler Extra(Context, Stream);
-      Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);
-
-      // Use the string backref vector to possibly get a back reference.
-      mangleSourceName(TemplateMangling);
-
-      // Memoize back reference for this type.
-      BackRefVec::iterator StringFound =
-          llvm::find(NameBackReferences, TemplateMangling);
-      if (StringFound != NameBackReferences.end()) {
-        TemplateArgBackReferences[ND] =
-            StringFound - NameBackReferences.begin();
+
+      TemplateArgStringMap::iterator Found = TemplateArgStrings.find(ND);
+      if (Found == TemplateArgStrings.end()) {
+        // Mangle full template name into temporary buffer.
+        llvm::SmallString<64> TemplateMangling;
+        llvm::raw_svector_ostream Stream(TemplateMangling);
+        MicrosoftCXXNameMangler Extra(Context, Stream);
+        Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);
+
+        // Use the string backref vector to possibly get a back reference.
+        mangleSourceName(TemplateMangling);
+
+        // Memoize back reference for this type if one exist, else memoize
+        // the mangling itself.
+        BackRefVec::iterator StringFound =
+            llvm::find(NameBackReferences, TemplateMangling);
+        if (StringFound != NameBackReferences.end()) {
+          TemplateArgBackReferences[ND] =
+              StringFound - NameBackReferences.begin();
+        } else {
+          TemplateArgStrings[ND] =
+              TemplateArgStringStorage.save(TemplateMangling.str());
+        }
+      } else {
+        Out << Found->second; // Outputs a StringRef.
       }
     } else {
-      Out << Found->second;
+      Out << Found->second; // Outputs a back reference (an int).
     }
     return;
   }
diff --git a/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp b/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
index 653937917b8d9..d810ec708000c 100644
--- a/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-back-references-pr13207.cpp
@@ -232,3 +232,25 @@ using B6 = Food<B5, B4, B3, B2, B1, B0, B0, B0, B0, B0,  B5, B5, B5, B5, B5, B5,
 void f(B6 a) {}
 
 // CHECK: "?f@@YAXU?$Food@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U4@U4@U4@U4@U4@U4@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U5@U5@U5@U5@U5@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U4@U4@U4@U4@U4@U4@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U3@U3@U3@U3@U3@U3@U3@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U2@U2@U2@U2@U2@U2@U2@U2@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@U?$Food@HHHHHHHHHHHHHHHHHHHH@@U6@U6@U6@U6@U1@U1@U1@U1@U1@U1@U1@U1@U1@U1@@@@Z"
+
+
+// Similar to the previous case, except that the later arguments aren't
+// present in the earlier ones and hence aren't in the backref cache.
+template <class T1, class T2, class T3, class T4, class T5, class T6, class T7,
+          class T8, class T9, class T10, class T11, class T12, class T13,
+          class T14, class T15, class T16, class T17, class T18, class T19,
+          class T20>
+struct Fooe {};
+
+using C0 = Fooe<int, int, int, int, int, int, int, int, int, int,  int, int, int, int, int, int, int, int, int, int>;
+using C1 = Fooe<C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,  C0, C0, C0, C0, C0, C0, C0, C0, C0, C0>;
+using C2 = Fooe<C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1>;
+using C3 = Fooe<C1, C1, C0, C0, C0, C0, C0, C0, C0, C0,  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2>;
+using C4 = Fooe<C2, C2, C1, C0, C0, C0, C0, C0, C0, C0,  C3, C3, C3, C3, C3, C3, C3, C3, C3, C3>;
+using C5 = Fooe<C3, C3, C2, C1, C0, C0, C0, C0, C0, C0,  C4, C4, C4, C4, C4, C4, C4, C4, C4, C4>;
+using C6 = Fooe<C4, C4, C3, C2, C1, C0, C0, C0, C0, C0,  C5, C5, C5, C5, C5, C5, C5, C5, C5, C5>;
+using C7 = Fooe<C5, C4, C3, C2, C1, C0, C0, C0, C0, C0,  C6, C6, C6, C6, C6, C6, C6, C6, C6, C6>;
+
+// This too should take milliseconds, not minutes.
+void f(C7 a) {}
+// CHECK: "??@f23afdfb44276eaa53a5575352cf0ebc@"

From 577d59bc679d0348ed2f0d6e759fbc728c1f8dbd Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 4 Jun 2019 23:35:07 +0000
Subject: [PATCH 1077/1176] svn propset svn:executable on
 utils/prepare-code-coverage-artifact.py

llvm-svn: 362561
---
 llvm/utils/prepare-code-coverage-artifact.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 llvm/utils/prepare-code-coverage-artifact.py

diff --git a/llvm/utils/prepare-code-coverage-artifact.py b/llvm/utils/prepare-code-coverage-artifact.py
old mode 100644
new mode 100755

From 453b7caaf7f0112a7164cb132db5a39907abf6eb Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Wed, 5 Jun 2019 00:04:33 +0000
Subject: [PATCH 1078/1176] PR42111: Use guarded initialization for
 thread-local variables with unordered initialization and internal linkage.

We'll run their initializers once on each reference, so we need a guard
variable even though they only have a single definition.

llvm-svn: 362562
---
 clang/lib/CodeGen/CGDeclCXX.cpp                 |  8 +++++++-
 .../test/CodeGenCXX/cxx1y-variable-template.cpp | 17 +++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index ce0163723c473..199e0ac99e81d 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -630,7 +630,13 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
   // Use guarded initialization if the global variable is weak. This
   // occurs for, e.g., instantiated static data members and
   // definitions explicitly marked weak.
-  if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage()) {
+  //
+  // Also use guarded initialization for a variable with dynamic TLS and
+  // unordered initialization. (If the initialization is ordered, the ABI
+  // layer will guard the whole-TU initialization for us.)
+  if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage() ||
+      (D->getTLSKind() == VarDecl::TLS_Dynamic &&
+       isTemplateInstantiation(D->getTemplateSpecializationKind()))) {
     EmitCXXGuardedInit(*D, Addr, PerformInit);
   } else {
     EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
diff --git a/clang/test/CodeGenCXX/cxx1y-variable-template.cpp b/clang/test/CodeGenCXX/cxx1y-variable-template.cpp
index dd8f28e42992c..51d1b54b37a7f 100644
--- a/clang/test/CodeGenCXX/cxx1y-variable-template.cpp
+++ b/clang/test/CodeGenCXX/cxx1y-variable-template.cpp
@@ -27,4 +27,21 @@ int *p = &n<0>;
 // CHECK: @_ZN5OuterIA100_cE5InnerIA20_cE3arrIA3_cEE = linkonce_odr global [123 x i32] zeroinitializer
 // CHECK: @_ZGVN5OuterIA100_cE5InnerIA20_cE3arrIA3_cEE = linkonce_odr global
 
+// CHECK: @_ZTHN7PR4211112_GLOBAL__N_11nILi0EEE = internal alias {{.*}} @[[PR42111_CTOR:.*]]
+
 // CHECK: call {{.*}}@_Z8init_arrv
+
+// Ensure that we use guarded initialization for an instantiated thread_local
+// variable with internal linkage.
+namespace PR42111 {
+  int f();
+  namespace { template <int = 0> thread_local int n = f(); }
+  // CHECK: define {{.*}}@[[PR42111_CTOR]](
+  // CHECK: load {{.*}} @_ZGVN7PR4211112_GLOBAL__N_11nILi0EEE
+  // CHECK: icmp eq i8 {{.*}}, 0
+  // CHECK: br i1
+  // CHECK: call i32 @_ZN7PR421111fEv(
+  // CHECK: store i32 {{.*}}, i32* @_ZN7PR4211112_GLOBAL__N_11nILi0EEE
+  // CHECK: store i8 1, i8* @_ZGVN7PR4211112_GLOBAL__N_11nILi0EEE
+  int g() { return n<> + n<>; }
+}

From 8ca545576476b26cd4097b8ad23049adaf084d9c Mon Sep 17 00:00:00 2001
From: Richard Smith <richard-llvm@metafoo.co.uk>
Date: Wed, 5 Jun 2019 00:21:47 +0000
Subject: [PATCH 1079/1176] Factor out duplicated code building a MemberExpr
 and marking it referenced.

llvm-svn: 362563
---
 clang/include/clang/Sema/Sema.h   | 17 ++++++++
 clang/lib/Sema/SemaExprCXX.cpp    | 15 +++----
 clang/lib/Sema/SemaExprMember.cpp | 71 +++++++++++++++++++------------
 clang/lib/Sema/SemaOverload.cpp   | 11 ++---
 4 files changed, 70 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 74a1a28a39bbc..a065be308eed4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4510,6 +4510,23 @@ class Sema {
                                    UnqualifiedId &Member,
                                    Decl *ObjCImpDecl);
 
+  MemberExpr *
+  BuildMemberExpr(Expr *Base, bool IsArrow, SourceLocation OpLoc,
+                  const CXXScopeSpec *SS, SourceLocation TemplateKWLoc,
+                  ValueDecl *Member, DeclAccessPair FoundDecl,
+                  bool HadMultipleCandidates,
+                  const DeclarationNameInfo &MemberNameInfo, QualType Ty,
+                  ExprValueKind VK, ExprObjectKind OK,
+                  const TemplateArgumentListInfo *TemplateArgs = nullptr);
+  MemberExpr *
+  BuildMemberExpr(Expr *Base, bool IsArrow, SourceLocation OpLoc,
+                  NestedNameSpecifierLoc NNS, SourceLocation TemplateKWLoc,
+                  ValueDecl *Member, DeclAccessPair FoundDecl,
+                  bool HadMultipleCandidates,
+                  const DeclarationNameInfo &MemberNameInfo, QualType Ty,
+                  ExprValueKind VK, ExprObjectKind OK,
+                  const TemplateArgumentListInfo *TemplateArgs = nullptr);
+
   void ActOnDefaultCtorInitializers(Decl *CDtorDecl);
   bool ConvertArgumentsForCall(CallExpr *Call, Expr *Fn,
                                FunctionDecl *FDecl,
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index c42d272a3459b..2f7e4a0f15cfa 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7189,15 +7189,12 @@ ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
     }
   }
 
-  MemberExpr *ME = MemberExpr::Create(
-      Context, Exp.get(), /*IsArrow=*/false, SourceLocation(),
-      NestedNameSpecifierLoc(), SourceLocation(), Method,
-      DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()),
-      DeclarationNameInfo(), /*TemplateArgs=*/nullptr, Context.BoundMemberTy,
-      VK_RValue, OK_Ordinary);
-  if (HadMultipleCandidates)
-    ME->setHadMultipleCandidates(true);
-  MarkMemberReferenced(ME);
+  MemberExpr *ME =
+      BuildMemberExpr(Exp.get(), /*IsArrow=*/false, SourceLocation(),
+                      NestedNameSpecifierLoc(), SourceLocation(), Method,
+                      DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()),
+                      HadMultipleCandidates, DeclarationNameInfo(),
+                      Context.BoundMemberTy, VK_RValue, OK_Ordinary);
 
   QualType ResultType = Method->getReturnType();
   ExprValueKind VK = Expr::getValueKindForType(ResultType);
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index 3d7b8db2f6710..f7b46a5e0f458 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -893,18 +893,31 @@ BuildMSPropertyRefExpr(Sema &S, Expr *BaseExpr, bool IsArrow,
                                            NameInfo.getLoc());
 }
 
-/// Build a MemberExpr AST node.
-static MemberExpr *BuildMemberExpr(
-    Sema &SemaRef, ASTContext &C, Expr *Base, bool isArrow,
-    SourceLocation OpLoc, const CXXScopeSpec &SS, SourceLocation TemplateKWLoc,
-    ValueDecl *Member, DeclAccessPair FoundDecl,
-    const DeclarationNameInfo &MemberNameInfo, QualType Ty, ExprValueKind VK,
-    ExprObjectKind OK, const TemplateArgumentListInfo *TemplateArgs = nullptr) {
-  assert((!isArrow || Base->isRValue()) && "-> base must be a pointer rvalue");
-  MemberExpr *E = MemberExpr::Create(
-      C, Base, isArrow, OpLoc, SS.getWithLocInContext(C), TemplateKWLoc, Member,
-      FoundDecl, MemberNameInfo, TemplateArgs, Ty, VK, OK);
-  SemaRef.MarkMemberReferenced(E);
+MemberExpr *Sema::BuildMemberExpr(
+    Expr *Base, bool IsArrow, SourceLocation OpLoc, const CXXScopeSpec *SS,
+    SourceLocation TemplateKWLoc, ValueDecl *Member, DeclAccessPair FoundDecl,
+    bool HadMultipleCandidates, const DeclarationNameInfo &MemberNameInfo,
+    QualType Ty, ExprValueKind VK, ExprObjectKind OK,
+    const TemplateArgumentListInfo *TemplateArgs) {
+  NestedNameSpecifierLoc NNS =
+      SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc();
+  return BuildMemberExpr(Base, IsArrow, OpLoc, NNS, TemplateKWLoc, Member,
+                         FoundDecl, HadMultipleCandidates, MemberNameInfo, Ty,
+                         VK, OK, TemplateArgs);
+}
+
+MemberExpr *Sema::BuildMemberExpr(
+    Expr *Base, bool IsArrow, SourceLocation OpLoc, NestedNameSpecifierLoc NNS,
+    SourceLocation TemplateKWLoc, ValueDecl *Member, DeclAccessPair FoundDecl,
+    bool HadMultipleCandidates, const DeclarationNameInfo &MemberNameInfo,
+    QualType Ty, ExprValueKind VK, ExprObjectKind OK,
+    const TemplateArgumentListInfo *TemplateArgs) {
+  assert((!IsArrow || Base->isRValue()) && "-> base must be a pointer rvalue");
+  MemberExpr *E = MemberExpr::Create(Context, Base, IsArrow, OpLoc, NNS,
+                                     TemplateKWLoc, Member, FoundDecl,
+                                     MemberNameInfo, TemplateArgs, Ty, VK, OK);
+  E->setHadMultipleCandidates(HadMultipleCandidates);
+  MarkMemberReferenced(E);
   return E;
 }
 
@@ -1115,10 +1128,10 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                                                     OpLoc);
 
   if (VarDecl *Var = dyn_cast<VarDecl>(MemberDecl)) {
-    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
-                           TemplateKWLoc, Var, FoundDecl, MemberNameInfo,
-                           Var->getType().getNonReferenceType(), VK_LValue,
-                           OK_Ordinary);
+    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var,
+                           FoundDecl, /*MultipleCandidates=*/false,
+                           MemberNameInfo, Var->getType().getNonReferenceType(),
+                           VK_LValue, OK_Ordinary);
   }
 
   if (CXXMethodDecl *MemberFn = dyn_cast<CXXMethodDecl>(MemberDecl)) {
@@ -1132,24 +1145,25 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
       type = MemberFn->getType();
     }
 
-    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
-                           TemplateKWLoc, MemberFn, FoundDecl, MemberNameInfo,
-                           type, valueKind, OK_Ordinary);
+    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc,
+                           MemberFn, FoundDecl, /*MultipleCandidates=*/false,
+                           MemberNameInfo, type, valueKind, OK_Ordinary);
   }
   assert(!isa<FunctionDecl>(MemberDecl) && "member function not C++ method?");
 
   if (EnumConstantDecl *Enum = dyn_cast<EnumConstantDecl>(MemberDecl)) {
-    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
-                           TemplateKWLoc, Enum, FoundDecl, MemberNameInfo,
-                           Enum->getType(), VK_RValue, OK_Ordinary);
+    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Enum,
+                           FoundDecl, /*MultipleCandidates=*/false,
+                           MemberNameInfo, Enum->getType(), VK_RValue,
+                           OK_Ordinary);
   }
   if (VarTemplateDecl *VarTempl = dyn_cast<VarTemplateDecl>(MemberDecl)) {
     if (VarDecl *Var = getVarTemplateSpecialization(
             *this, VarTempl, TemplateArgs, MemberNameInfo, TemplateKWLoc))
-      return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
-                             TemplateKWLoc, Var, FoundDecl, MemberNameInfo,
-                             Var->getType().getNonReferenceType(), VK_LValue,
-                             OK_Ordinary);
+      return BuildMemberExpr(
+          BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var, FoundDecl,
+          /*MultipleCandidates=*/false, MemberNameInfo,
+          Var->getType().getNonReferenceType(), VK_LValue, OK_Ordinary);
     return ExprError();
   }
 
@@ -1805,9 +1819,10 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow,
     }
   }
 
-  return BuildMemberExpr(*this, Context, Base.get(), IsArrow, OpLoc, SS,
+  return BuildMemberExpr(Base.get(), IsArrow, OpLoc, &SS,
                          /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
-                         MemberNameInfo, MemberType, VK, OK);
+                         /*MultipleCandidates=*/false, MemberNameInfo,
+                         MemberType, VK, OK);
 }
 
 /// Builds an implicit member access expression.  The current context
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 30d809ac91b55..4bc725f4cf0b1 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13912,14 +13912,11 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
       type = Context.BoundMemberTy;
     }
 
-    MemberExpr *ME = MemberExpr::Create(
-        Context, Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),
+    return BuildMemberExpr(
+        Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),
         MemExpr->getQualifierLoc(), MemExpr->getTemplateKeywordLoc(), Fn, Found,
-        MemExpr->getMemberNameInfo(), TemplateArgs, type, valueKind,
-        OK_Ordinary);
-    ME->setHadMultipleCandidates(true);
-    MarkMemberReferenced(ME);
-    return ME;
+        /*HadMultipleCandidates=*/true, MemExpr->getMemberNameInfo(),
+        type, valueKind, OK_Ordinary, TemplateArgs);
   }
 
   llvm_unreachable("Invalid reference to overloaded function");

From 78fdce25a1ba0694b8d0ce24d2bc99f19ef7c184 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 5 Jun 2019 01:00:34 +0000
Subject: [PATCH 1080/1176] [X86] Cleanup convertIntLogicToFPLogic a little.
 NFCI

-Use early returns to reduce indentation
-Replace multipe ifs with a switch.
-Replace an assert with an llvm_unreachable default in the switch.
-Check that the FP type we're going to use for the
 X86ISD::FAND/FOR/FXOR is legal rather than checking that the
 integer type matches the width of a legal scalar fp type. This all
 runs after legalization so it shouldn't really matter, but making
 sure we're using a valid type in the X86ISD node is really
 whats important.

llvm-svn: 362565
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 47 +++++++++++++------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6bdd448f5b8de..64585c8de0a65 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37667,34 +37667,35 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
 /// unnecessary moves from SSE to integer registers.
 static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
                                         const X86Subtarget &Subtarget) {
-  unsigned FPOpcode = ISD::DELETED_NODE;
-  if (N->getOpcode() == ISD::AND)
-    FPOpcode = X86ISD::FAND;
-  else if (N->getOpcode() == ISD::OR)
-    FPOpcode = X86ISD::FOR;
-  else if (N->getOpcode() == ISD::XOR)
-    FPOpcode = X86ISD::FXOR;
-
-  assert(FPOpcode != ISD::DELETED_NODE &&
-         "Unexpected input node for FP logic conversion");
-
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
-  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
-      ((Subtarget.hasSSE1() && VT == MVT::i32) ||
-       (Subtarget.hasSSE2() && VT == MVT::i64))) {
-    SDValue N00 = N0.getOperand(0);
-    SDValue N10 = N1.getOperand(0);
-    EVT N00Type = N00.getValueType();
-    EVT N10Type = N10.getValueType();
-    if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
-      SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
-      return DAG.getBitcast(VT, FPLogic);
-    }
+
+  if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+  SDValue N10 = N1.getOperand(0);
+  EVT N00Type = N00.getValueType();
+  EVT N10Type = N10.getValueType();
+
+  // Ensure that both types are the same and are legal scalar fp types.
+  if (N00Type != N10Type ||
+      !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+        (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+    return SDValue();
+
+  unsigned FPOpcode;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected input node for FP logic conversion");
+  case ISD::AND: FPOpcode = X86ISD::FAND; break;
+  case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
+  case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
   }
-  return SDValue();
+
+  SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+  return DAG.getBitcast(VT, FPLogic);
 }
 
 /// If this is a zero/all-bits result that is bitwise-anded with a low bits

From c012188adaf740592b3bb8d13e4297721e2c8417 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Wed, 5 Jun 2019 01:27:39 +0000
Subject: [PATCH 1081/1176] Changed force build of GWP-ASan options parser to
 be statically linked instead of dynamic. This should help resolve a
 downstream build order issue against libc++.

llvm-svn: 362566
---
 compiler-rt/lib/gwp_asan/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt
index 771192f23e0bd..c5315245066e4 100644
--- a/compiler-rt/lib/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt
@@ -77,7 +77,7 @@ if (COMPILER_RT_HAS_GWP_ASAN)
   foreach(arch ${GWP_ASAN_SUPPORTED_ARCH})
     add_compiler_rt_runtime(
       clang_rt.gwp_asan_options_parser
-      SHARED
+      STATIC
       ARCHS ${arch}
       SOURCES ${GWP_ASAN_OPTIONS_PARSER_SOURCES}
       ADDITIONAL_HEADERS ${GWP_ASAN_OPTIONS_PARSER_HEADERS}

From 2e05045e2601da7f88d93661b24277dd8790a0c8 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 5 Jun 2019 01:28:55 +0000
Subject: [PATCH 1082/1176] [TargetTransformInfo] assert on nullptr

Summary:
This was flagged in https://www.viva64.com/en/b/0629/ under "Snippet No.
38".

Add an assertion, since it's unlikely that this parameter is nullptr.

Reviewers: RKSimon, fhahn

Reviewed By: RKSimon

Subscribers: fhahn, llvm-commits, RKSimon, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62229

llvm-svn: 362567
---
 .../llvm/Analysis/TargetTransformInfoImpl.h   | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index af250aa638e5e..bb290ec898eb4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -681,14 +681,12 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
 
   int getGEPCost(Type *PointeeType, const Value *Ptr,
                  ArrayRef<const Value *> Operands) {
-    const GlobalValue *BaseGV = nullptr;
-    if (Ptr != nullptr) {
-      // TODO: will remove this when pointers have an opaque type.
-      assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
-                 PointeeType &&
-             "explicit pointee type doesn't match operand's pointee type");
-      BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
-    }
+    assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
+    // TODO: will remove this when pointers have an opaque type.
+    assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+               PointeeType &&
+           "explicit pointee type doesn't match operand's pointee type");
+    auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
     bool HasBaseReg = (BaseGV == nullptr);
 
     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
@@ -731,13 +729,10 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       }
     }
 
-    // Assumes the address space is 0 when Ptr is nullptr.
-    unsigned AS =
-        (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
-
     if (static_cast<T *>(this)->isLegalAddressingMode(
             TargetType, const_cast<GlobalValue *>(BaseGV),
-            BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
+            BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
+            Ptr->getType()->getPointerAddressSpace()))
       return TTI::TCC_Free;
     return TTI::TCC_Basic;
   }

From fe97754acff1a5bd47306d1a2769da6cc8966719 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 5 Jun 2019 01:31:43 +0000
Subject: [PATCH 1083/1176] Initial support for IBM MASS vector library

This is the LLVM portion of patch https://reviews.llvm.org/D59881.
The clang portion is to follow.

llvm-svn: 362568
---
 .../include/llvm/Analysis/TargetLibraryInfo.h |    1 +
 llvm/include/llvm/Analysis/VecFuncs.def       |   79 +-
 llvm/lib/Analysis/TargetLibraryInfo.cpp       |   10 +
 .../LoopVectorize/PowerPC/massv-altivec.ll    |  106 ++
 .../LoopVectorize/PowerPC/massv-calls.ll      | 1525 +++++++++++++++++
 .../LoopVectorize/PowerPC/massv-nobuiltin.ll  |   56 +
 .../PowerPC/massv-unsupported.ll              |  108 ++
 7 files changed, 1884 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index a40e04404ff64..4b5200f5a838a 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -86,6 +86,7 @@ class TargetLibraryInfoImpl {
   enum VectorLibrary {
     NoLibrary,  // Don't use any vector library.
     Accelerate, // Use Accelerate framework.
+    MASSV,      // IBM MASS vector library.
     SVML        // Intel short vector math library.
   };
 
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 15ff4def30049..4c9206266d9ab 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -8,7 +8,7 @@
 
 // This .def file will create mappings from scalar math functions to vector
 // functions along with their vectorization factor. The current support includes
-// such mappings for Accelerate framework and SVML library. 
+// such mappings for Accelerate framework, MASS vector library, and SVML library. 
 
 #if !(defined(TLI_DEFINE_VECFUNC))
 #define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
@@ -55,6 +55,82 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
 TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
 
 
+#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
+// IBM MASS library's vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
+TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
+TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
+TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
+TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
+TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
+TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
+TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
+TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
+TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+
+
 #elif defined(TLI_DEFINE_SVML_VECFUNCS)
 // Intel SVM library's Vector Functions
 
@@ -169,5 +245,6 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
 
 #undef TLI_DEFINE_VECFUNC
 #undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_MASSV_VECFUNCS
 #undef TLI_DEFINE_SVML_VECFUNCS
 
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index e5cb2544f2d55..ef139d3257d2b 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -23,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
                           "No vector functions library"),
                clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
                           "Accelerate framework"),
+               clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
+                          "IBM MASS vector library"),
                clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
                           "Intel SVML library")));
 
@@ -1537,6 +1539,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     addVectorizableFunctions(VecFuncs);
     break;
   }
+  case MASSV: {
+    const VecDesc VecFuncs[] = {
+    #define TLI_DEFINE_MASSV_VECFUNCS
+    #include "llvm/Analysis/VecFuncs.def"
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
   case SVML: {
     const VecDesc VecFuncs[] = {
     #define TLI_DEFINE_SVML_VECFUNCS
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
new file mode 100644
index 0000000000000..c63db5b0b6636
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
@@ -0,0 +1,106 @@
+; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -mattr=-altivec -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64" 
+target triple = "powerpc64le-unknown-linux-gnu"
+
+declare double @cbrt(double) #0
+declare float @cbrtf(float) #0
+
+declare double @atanh(double) #0
+declare float @atanhf(float) #0
+
+; MASSV is unsupported for AltiVec.
+; Check that massv entries are not generated.
+define void @cbrt_f64(double* nocapture %varray) {
+; CHECK-LABEL: @cbrt_f64(
+; CHECK-NOT: __cbrtd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @cbrt(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cbrt_f32(float* nocapture %varray) {
+; CHECK-LABEL: @cbrt_f32(
+; CHECK-NOT: __cbrtf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @cbrtf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atanh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @atanh_f64(
+; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @atanh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atanh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @atanh_f32(
+; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @atanhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
new file mode 100644
index 0000000000000..a08c23b5b2dd8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
@@ -0,0 +1,1525 @@
+; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64" 
+target triple = "powerpc64le-unknown-linux-gnu"
+
+declare double @cbrt(double) #0
+declare float @cbrtf(float) #0
+
+declare double @pow(double, double) #0
+declare double @llvm.pow.f64(double, double) #0
+declare float @powf(float, float) #0
+declare float @llvm.pow.f32(float, float) #0
+
+declare double @sqrt(double) #0
+declare float @sqrtf(float) #0
+
+declare double @exp(double) #0
+declare double @llvm.exp.f64(double) #0
+declare float @expf(float) #0
+declare float @llvm.exp.f32(float) #0
+
+declare double @exp2(double) #0
+declare double @llvm.exp2.f64(double) #0
+declare float @exp2f(float) #0
+declare float @llvm.exp2.f32(float) #0
+
+declare double @expm1(double) #0
+declare float @expm1f(float) #0
+
+declare double @log(double) #0
+declare double @llvm.log.f64(double) #0
+declare float @logf(float) #0
+declare float @llvm.log.f32(float) #0
+
+declare double @log1p(double) #0
+declare float @log1pf(float) #0
+
+declare double @log10(double) #0
+declare double @llvm.log10.f64(double) #0
+declare float @log10f(float) #0
+declare float @llvm.log10.f32(float) #0
+
+declare double @log2(double) #0
+declare double @llvm.log2.f64(double) #0
+declare float @log2f(float) #0
+declare float @llvm.log2.f32(float) #0
+
+declare double @sin(double) #0
+declare double @llvm.sin.f64(double) #0
+declare float @sinf(float) #0
+declare float @llvm.sin.f32(float) #0
+
+declare double @cos(double) #0
+declare double @llvm.cos.f64(double) #0
+declare float @cosf(float) #0
+declare float @llvm.cos.f32(float) #0
+
+declare double @tan(double) #0
+declare float @tanf(float) #0
+
+declare double @asin(double) #0
+declare float @asinf(float) #0
+
+declare double @acos(double) #0
+declare float @acosf(float) #0
+
+declare double @atan(double) #0
+declare float @atanf(float) #0
+
+declare double @atan2(double) #0
+declare float @atan2f(float) #0
+
+declare double @sinh(double) #0
+declare float @sinhf(float) #0
+
+declare double @cosh(double) #0
+declare float @coshf(float) #0
+
+declare double @tanh(double) #0
+declare float @tanhf(float) #0
+
+declare double @asinh(double) #0
+declare float @asinhf(float) #0
+
+declare double @acosh(double) #0
+declare float @acoshf(float) #0
+
+declare double @atanh(double) #0
+declare float @atanhf(float) #0
+
+define void @cbrt_f64(double* nocapture %varray) {
+; CHECK-LABEL: @cbrt_f64(
+; CHECK: __cbrtd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @cbrt(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cbrt_f32(float* nocapture %varray) {
+; CHECK-LABEL: @cbrt_f32(
+; CHECK: __cbrtf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @cbrtf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+; CHECK-LABEL: @pow_f64(
+; CHECK:  __powd2_massv{{.*}}<2 x double>
+; CHECK:  ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
+  %tmp1 = load double, double* %arrayidx, align 4
+  %tmp2 = tail call double @pow(double %conv, double %tmp1)
+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %tmp2, double* %arrayidx2, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+; CHECK-LABEL: @pow_f64_intrinsic(
+; CHECK: __powd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
+  %tmp1 = load double, double* %arrayidx, align 4
+  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
+  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %tmp2, double* %arrayidx2, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+; CHECK-LABEL: @pow_f32(
+; CHECK: __powf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
+  %tmp1 = load float, float* %arrayidx, align 4
+  %tmp2 = tail call float @powf(float %conv, float %tmp1)
+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %tmp2, float* %arrayidx2, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+; CHECK-LABEL: @pow_f32_intrinsic(
+; CHECK: __powf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
+  %tmp1 = load float, float* %arrayidx, align 4
+  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
+  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %tmp2, float* %arrayidx2, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f64(
+; CHECK: __sqrtd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @sqrt(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f32(float* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f32(
+; CHECK: __sqrtf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @sqrtf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp_f64(double* nocapture %varray) {
+; CHECK-LABEL: @exp_f64(
+; CHECK: __expd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @exp(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @exp_f64_intrinsic(
+; CHECK: __expd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.exp.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp_f32(float* nocapture %varray) {
+; CHECK-LABEL: @exp_f32(
+; CHECK: __expf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @expf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @exp_f32_intrinsic(
+; CHECK: __expf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.exp.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp2_f64(double* nocapture %varray) {
+; CHECK-LABEL: @exp2_f64(
+; CHECK: __exp2d2_massv{{.*}}<2 x double>
+; CHECK:  ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @exp2(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp2_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @exp2_f64_intrinsic(
+; CHECK: __exp2d2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.exp2.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp2_f32(float* nocapture %varray) {
+; CHECK-LABEL: @exp2_f32(
+; CHECK: __exp2f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @exp2f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp2_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @exp2_f32_intrinsic(
+; CHECK: __exp2f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.exp2.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @expm1_f64(double* nocapture %varray) {
+; CHECK-LABEL: @expm1_f64(
+; CHECK: __expm1d2_massv{{.*}}<2 x double>
+; CHECK:  ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @expm1(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @expm1_f32(float* nocapture %varray) {
+; CHECK-LABEL: @expm1_f32(
+; CHECK: __expm1f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @expm1f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log_f64(double* nocapture %varray) {
+; CHECK-LABEL: @log_f64(
+; CHECK: __logd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @log_f64_intrinsic(
+; CHECK: __logd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log_f32(float* nocapture %varray) {
+; CHECK-LABEL: @log_f32(
+; CHECK: __logf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @logf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @log_f32_intrinsic(
+; CHECK: __logf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log1p_f64(double* nocapture %varray) {
+; CHECK-LABEL: @log1p_f64(
+; CHECK: __log1pd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log1p(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log1p_f32(float* nocapture %varray) {
+; CHECK-LABEL: @log1p_f32(
+; CHECK: __log1pf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @log1pf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f64(double* nocapture %varray) {
+; CHECK-LABEL: @log10_f64(
+; CHECK: __log10d2_massv(<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log10(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @log10_f64_intrinsic(
+; CHECK: __log10d2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log10.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f32(float* nocapture %varray) {
+; CHECK-LABEL: @log10_f32(
+; CHECK: __log10f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @log10f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @log10_f32_intrinsic(
+; CHECK: __log10f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log10.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log2_f64(double* nocapture %varray) {
+; CHECK-LABEL: @log2_f64(
+; CHECK: __log2d2_massv(<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log2(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log2_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @log2_f64_intrinsic(
+; CHECK: __log2d2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log2.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log2_f32(float* nocapture %varray) {
+; CHECK-LABEL: @log2_f32(
+; CHECK: __log2f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @log2f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log2_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @log2_f32_intrinsic(
+; CHECK: __log2f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log2.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sin_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sin_f64(
+; CHECK: __sind2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @sin(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sin_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @sin_f64_intrinsic(
+; CHECK: __sind2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.sin.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sin_f32(float* nocapture %varray) {
+; CHECK-LABEL: @sin_f32(
+; CHECK: __sinf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @sinf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sin_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @sin_f32_intrinsic(
+; CHECK: __sinf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.sin.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cos_f64(double* nocapture %varray) {
+; CHECK-LABEL: @cos_f64(
+; CHECK: __cosd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @cos(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cos_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @cos_f64_intrinsic(
+; CHECK:    [[TMP5:%.*]] = call <2 x double> @__cosd2_massv(<2 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.cos.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cos_f32(float* nocapture %varray) {
+; CHECK-LABEL: @cos_f32(
+; CHECK: __cosf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @cosf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cos_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @cos_f32_intrinsic(
+; CHECK: __cosf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.cos.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @tan_f64(double* nocapture %varray) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: __tand2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @tan(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @tan_f32(float* nocapture %varray) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: __tanf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @tanf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @asin_f64(double* nocapture %varray) {
+; CHECK-LABEL: @asin_f64(
+; CHECK: __asind2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @asin(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @asin_f32(float* nocapture %varray) {
+; CHECK-LABEL: @asin_f32(
+; CHECK: __asinf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @asinf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @acos_f64(double* nocapture %varray) {
+; CHECK-LABEL: @acos_f64(
+; CHECK: __acosd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @acos(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @acos_f32(float* nocapture %varray) {
+; CHECK-LABEL: @acos_f32(
+; CHECK: __acosf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @acosf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atan_f64(double* nocapture %varray) {
+; CHECK-LABEL: @atan_f64(
+; CHECK: __atand2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @atan(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atan_f32(float* nocapture %varray) {
+; CHECK-LABEL: @atan_f32(
+; CHECK: __atanf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @atanf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atan2_f64(double* nocapture %varray) {
+; CHECK-LABEL: @atan2_f64(
+; CHECK: __atan2d2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @atan2(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atan2_f32(float* nocapture %varray) {
+; CHECK-LABEL: @atan2_f32(
+; CHECK: __atan2f4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @atan2f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sinh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sinh_f64(
+; CHECK: __sinhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @sinh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sinh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @sinh_f32(
+; CHECK: __sinhf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @sinhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cosh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @cosh_f64(
+; CHECK: __coshd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @cosh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @cosh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @cosh_f32(
+; CHECK: __coshf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @coshf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @tanh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @tanh_f64(
+; CHECK: __tanhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @tanh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @tanh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @tanh_f32(
+; CHECK: __tanhf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @tanhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @asinh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @asinh_f64(
+; CHECK: __asinhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @asinh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @asinh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @asinh_f32(
+; CHECK: __asinhf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @asinhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @acosh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @acosh_f64(
+; CHECK: __acoshd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @acosh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @acosh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @acosh_f32(
+; CHECK: __acoshf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @acoshf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atanh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @atanh_f64(
+; CHECK: __atanhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @atanh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atanh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @atanh_f32(
+; CHECK: __atanhf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @atanhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
new file mode 100644
index 0000000000000..e7503b615f44a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
@@ -0,0 +1,56 @@
+; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64" 
+target triple = "powerpc64le-unknown-linux-gnu"
+
+declare double @atanh(double) #1
+declare float @atanhf(float) #1
+
+; Check that functions marked as nobuiltin are not lowered to massv entries.
+define void @atanh_f64(double* nocapture %varray) {
+; CHECK-LABEL: @atanh_f64(
+; CHECK-NOT: __atanhd2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @atanh(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @atanh_f32(float* nocapture %varray) {
+; CHECK-LABEL: @atanh_f32(
+; CHECK-NOT: __atanhf4_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @atanhf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #1 = { nobuiltin nounwind }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
new file mode 100644
index 0000000000000..f9b4b72a027e8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
@@ -0,0 +1,108 @@
+; RUN: opt -vector-library=MASSV -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64" 
+target triple = "powerpc64le-unknown-linux-gnu"
+
+declare double @ceil(double) #0
+declare float @fabsf(float) #0
+
+declare double @llvm.sqrt.f64(double) #0
+declare float @llvm.sqrt.f32(float) #0
+
+; Vector counterpart of ceil is unsupported in MASSV library.
+define void @ceil_f64(double* nocapture %varray) {
+; CHECK-LABEL: @ceil_f64(
+; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @ceil(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; Vector counterpart of fabs is unsupported in MASSV library.
+define void @fabs_f32(float* nocapture %varray) {
+; CHECK-LABEL: @fabs_f32(
+; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @fabsf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; sqrt intrinsics are converted to their vector counterpart intrinsics.
+; They are not lowered to MASSV entries.
+define void @sqrt_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f64_intrinsic(
+; CHECK: llvm.sqrt.v2f64{{.*}}<2 x double>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.sqrt.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f32_intrinsic(
+; CHECK: llvm.sqrt.v4f32{{.*}}<4 x float>
+; CHECK: ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.sqrt.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+attributes #0 = { nounwind }

From f090e6f7b6bec197caa288422f24160cbbbbe554 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 5 Jun 2019 01:36:48 +0000
Subject: [PATCH 1084/1176] [llvm-objdump/llvm-readobj/obj2yaml/yaml2obj]
 Support DT_PPC_GOT and DT_PPC_OPT

In glibc, DT_PPC_GOT indicates that PowerPC32 Secure PLT ABI is used.
I plan to use it in D62464.

DT_PPC_OPT currently indicates if a TLSDESC inspired TLS optimization is
enabled.

Reviewed By: grimar, jhenderson, rupprecht

Differential Revision: https://reviews.llvm.org/D62851

llvm-svn: 362569
---
 .../include/llvm/BinaryFormat/DynamicTags.def | 13 ++++++
 llvm/lib/ObjectYAML/ELFYAML.cpp               |  9 ++++
 .../elf-dynamic-section-machine-specific.test | 23 +++++++---
 .../elf-dynamic-tags-machine-specific.yaml    | 38 +++++++++++++++-
 .../elf-dynamic-tags-machine-specific.test    | 45 ++++++++++++++-----
 .../obj2yaml/dynamic-section-arch-tags.test   | 32 +++++++++++--
 6 files changed, 137 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index c884badab3603..aec408bd2d72b 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -21,6 +21,11 @@
 #define MIPS_DYNAMIC_TAG_DEFINED
 #endif
 
+#ifndef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG_DEFINED
+#endif
+
 #ifndef PPC64_DYNAMIC_TAG
 #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG_DEFINED
@@ -199,6 +204,10 @@ MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034)        // Points to the base
 MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035)  // Relative offset of run time loader
                                                 // map, used for debugging.
 
+// PPC specific dynamic table entries.
+PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI.
+PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
+
 // PPC64 specific dynamic table entries.
 PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
                                            // first glink lazy resolver stub.
@@ -225,6 +234,10 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF)    // Shared object to get values from
 #undef HEXAGON_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG_DEFINED
 #endif
+#ifdef PPC_DYNAMIC_TAG_DEFINED
+#undef PPC_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG_DEFINED
+#endif
 #ifdef PPC64_DYNAMIC_TAG_DEFINED
 #undef PPC64_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG_DEFINED
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 1d230700f4471..33d011cbd9f84 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -683,6 +683,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
 #define AARCH64_DYNAMIC_TAG(name, value)
 #define MIPS_DYNAMIC_TAG(name, value)
 #define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG(name, value)
 #define PPC64_DYNAMIC_TAG(name, value)
 // Ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
 #define DYNAMIC_TAG_MARKER(name, value)
@@ -711,6 +712,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
 #undef HEXAGON_DYNAMIC_TAG
 #define HEXAGON_DYNAMIC_TAG(name, value)
     break;
+  case ELF::EM_PPC:
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value)
+    break;
   case ELF::EM_PPC64:
 #undef PPC64_DYNAMIC_TAG
 #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
@@ -725,6 +733,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
 #undef AARCH64_DYNAMIC_TAG
 #undef MIPS_DYNAMIC_TAG
 #undef HEXAGON_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG
 #undef PPC64_DYNAMIC_TAG
 #undef DYNAMIC_TAG_MARKER
 #undef STRINGIFY
diff --git a/llvm/test/tools/llvm-objdump/elf-dynamic-section-machine-specific.test b/llvm/test/tools/llvm-objdump/elf-dynamic-section-machine-specific.test
index 23224f01bba11..2355dd9d81c25 100644
--- a/llvm/test/tools/llvm-objdump/elf-dynamic-section-machine-specific.test
+++ b/llvm/test/tools/llvm-objdump/elf-dynamic-section-machine-specific.test
@@ -1,4 +1,4 @@
-# Test that hexagon machine-specific tags can be dumped.
+## Test that hexagon machine-specific tags can be dumped.
 # RUN: yaml2obj --docnum=1 -o %t.hex \
 # RUN:          %S/../llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
 # RUN: llvm-objdump -p %t.hex | FileCheck %s --check-prefix=HEXAGON
@@ -11,7 +11,7 @@
 # HEXAGON-NEXT:   <unknown:>0x1234abcd 0x0000000000000001
 
 
-# Test that MIPS machine-specific tags can be dumped.
+## Test that MIPS machine-specific tags can be dumped.
 # RUN: yaml2obj --docnum=2 -o %t.mips \
 # RUN:          %S/../llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
 # RUN: llvm-objdump -p %t.mips | FileCheck %s --check-prefix=MIPS
@@ -66,12 +66,23 @@
 # MIPS-NEXT:   <unknown:>0x1234abcd 0x0000000000000001
 
 
-# Test that PPC64 machine-specific tags can be dumped.
+## Test that PPC machine-specific tags can be dumped.
 # RUN: yaml2obj --docnum=3 -o %t.ppc \
 # RUN:          %S/../llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
 # RUN: llvm-objdump -p %t.ppc | FileCheck %s --check-prefix=PPC
 
 # PPC:      Dynamic Section:
-# PPC-NEXT:   HASH                 0x0000000000001000
-# PPC-NEXT:   PPC64_GLINK          0x0000000000001000
-# PPC-NEXT:   <unknown:>0x1234abcd 0x0000000000000001
+# PPC-NEXT:   PPC_GOT              0x000200c0
+# PPC-NEXT:   PPC_OPT              0x00000001
+# PPC-NEXT:   <unknown:>0x1234abcd 0x00000001
+
+
+## Test that PPC64 machine-specific tags can be dumped.
+# RUN: yaml2obj --docnum=4 -o %t.ppc64 \
+# RUN:          %S/../llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
+# RUN: llvm-objdump -p %t.ppc64 | FileCheck %s --check-prefix=PPC64
+
+# PPC64:      Dynamic Section:
+# PPC64-NEXT:   HASH                 0x0000000000001000
+# PPC64-NEXT:   PPC64_GLINK          0x0000000000001000
+# PPC64-NEXT:   <unknown:>0x1234abcd 0x0000000000000001
diff --git a/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml b/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
index 653a7e9eeb0dd..222b8723e1164 100644
--- a/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
+++ b/llvm/test/tools/llvm-readobj/Inputs/elf-dynamic-tags-machine-specific.yaml
@@ -165,7 +165,43 @@ ProgramHeaders:
     Sections:
       - Section: .dynamic
 
-# Third document: PPC64
+# Third document: PPC
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS32
+  Data:    ELFDATA2MSB
+  Type:    ET_EXEC
+  Machine: EM_PPC
+Sections:
+  - Name:    .dynstr
+    Type:    SHT_STRTAB
+    Address: 0x200
+    Size:    0x10
+    Content: "004400550066007700"
+  - Name:    .dynamic
+    Type:    SHT_DYNAMIC
+    Address: 0x20000
+    Entries:
+      - Tag:   DT_PPC_GOT
+        Value: 0x200c0
+      - Tag:   DT_PPC_OPT
+        Value: 1
+      - Tag:   0x1234abcd
+        Value: 0x1
+      - Tag:   DT_NULL
+        Value: 0
+ProgramHeaders:
+  - Type: PT_LOAD
+    VAddr: 0x1000
+    Sections:
+      - Section: .dynstr
+      - Section: .dynamic
+  - Type: PT_DYNAMIC
+    VAddr: 0x20000
+    Sections:
+      - Section: .dynamic
+
+# Fourth document: PPC64
 --- !ELF
 FileHeader:
   Class:   ELFCLASS64
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
index 1d6c3b33b88c0..cebb1cbac7213 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-tags-machine-specific.test
@@ -132,28 +132,49 @@
 # GNU-MIPS-NEXT:   0x0000000000000000 (NULL)               0x0
 
 
-# Test that PPC64 machine-specific tags can be dumped.
+# Test that PPC machine-specific tags can be dumped.
 # RUN: yaml2obj --docnum=3 %S/Inputs/elf-dynamic-tags-machine-specific.yaml -o %t.ppc
 # RUN: llvm-readobj --dynamic-table %t.ppc | FileCheck %s --check-prefix=LLVM-PPC
 # RUN: llvm-readelf --dynamic-table %t.ppc | FileCheck %s --check-prefix=GNU-PPC
 
 # LLVM-PPC:      DynamicSection [ (4 entries)
-# LLVM-PPC-NEXT:   Tag                Type                 Name/Value
-# LLVM-PPC-NEXT:   0x0000000000000004 HASH                 0x1000
-# LLVM-PPC-NEXT:   0x0000000070000000 PPC64_GLINK          0x1000
-# LLVM-PPC-NEXT:   0x000000001234ABCD unknown              0x1
-# LLVM-PPC-NEXT:   0x0000000000000000 NULL                 0x0
+# LLVM-PPC-NEXT:   Tag        Type                 Name/Value
+# LLVM-PPC-NEXT:   0x70000000 PPC_GOT              0x200C0
+# LLVM-PPC-NEXT:   0x70000001 PPC_OPT              0x1
+# LLVM-PPC-NEXT:   0x1234ABCD unknown              0x1
+# LLVM-PPC-NEXT:   0x00000000 NULL                 0x0
 # LLVM-PPC-NEXT: ]
 
 # GNU-PPC:      Dynamic section at offset {{.*}} contains 4 entries:
-# GNU-PPC-NEXT:   Tag                Type                 Name/Value
-# GNU-PPC-NEXT:   0x0000000000000004 (HASH)               0x1000
-# GNU-PPC-NEXT:   0x0000000070000000 (PPC64_GLINK)        0x1000
-# GNU-PPC-NEXT:   0x000000001234abcd (unknown)            0x1
-# GNU-PPC-NEXT:   0x0000000000000000 (NULL)               0x0
+# GNU-PPC-NEXT:   Tag        Type                 Name/Value
+# GNU-PPC-NEXT:   0x70000000 (PPC_GOT)            0x200c0
+# GNU-PPC-NEXT:   0x70000001 (PPC_OPT)            0x1
+# GNU-PPC-NEXT:   0x1234abcd (unknown)            0x1
+# GNU-PPC-NEXT:   0x00000000 (NULL)               0x0
+
+
+# Test that PPC64 machine-specific tags can be dumped.
+# RUN: yaml2obj --docnum=4 %S/Inputs/elf-dynamic-tags-machine-specific.yaml -o %t.ppc64
+# RUN: llvm-readobj --dynamic-table %t.ppc64 | FileCheck %s --check-prefix=LLVM-PPC64
+# RUN: llvm-readelf --dynamic-table %t.ppc64 | FileCheck %s --check-prefix=GNU-PPC64
+
+# LLVM-PPC64:      DynamicSection [ (4 entries)
+# LLVM-PPC64-NEXT:   Tag                Type                 Name/Value
+# LLVM-PPC64-NEXT:   0x0000000000000004 HASH                 0x1000
+# LLVM-PPC64-NEXT:   0x0000000070000000 PPC64_GLINK          0x1000
+# LLVM-PPC64-NEXT:   0x000000001234ABCD unknown              0x1
+# LLVM-PPC64-NEXT:   0x0000000000000000 NULL                 0x0
+# LLVM-PPC64-NEXT: ]
+
+# GNU-PPC64:      Dynamic section at offset {{.*}} contains 4 entries:
+# GNU-PPC64-NEXT:   Tag                Type                 Name/Value
+# GNU-PPC64-NEXT:   0x0000000000000004 (HASH)               0x1000
+# GNU-PPC64-NEXT:   0x0000000070000000 (PPC64_GLINK)        0x1000
+# GNU-PPC64-NEXT:   0x000000001234abcd (unknown)            0x1
+# GNU-PPC64-NEXT:   0x0000000000000000 (NULL)               0x0
 
 # Test that AARCH64 machine-specific tags can be dumped.
-# RUN: yaml2obj --docnum=4 %S/Inputs/elf-dynamic-tags-machine-specific.yaml -o %t.aarch64
+# RUN: yaml2obj --docnum=5 %S/Inputs/elf-dynamic-tags-machine-specific.yaml -o %t.aarch64
 # RUN: llvm-readobj --dynamic-table %t.aarch64 | FileCheck %s --check-prefix=LLVM-AARCH64
 # RUN: llvm-readelf --dynamic-table %t.aarch64 | FileCheck %s --check-prefix=GNU-AARCH64
 
diff --git a/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test b/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
index badb9cc3edd6d..090477e66743a 100644
--- a/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
+++ b/llvm/test/tools/obj2yaml/dynamic-section-arch-tags.test
@@ -229,9 +229,33 @@ Sections:
       - Tag:             DT_HEXAGON_PLT
         Value:           0x0000000000000003
 
-## Check we can handle PPC64 specific tags.
+## Check we can handle PPC specific tags.
 # RUN: yaml2obj -docnum=3 %s -o %t3
-# RUN: obj2yaml %t3 | FileCheck %s --check-prefix=EM_PPC64
+# RUN: obj2yaml %t3 | FileCheck %s --check-prefix=EM_PPC
+
+# EM_PPC:      - Tag:             DT_PPC_GOT
+# EM_PPC-NEXT:   Value:           0x0000000000020000
+# EM_PPC-NEXT: - Tag:             DT_PPC_OPT
+# EM_PPC-NEXT:   Value:           0x0000000000000001
+
+--- !ELF
+FileHeader:
+  Class:             ELFCLASS32
+  Data:              ELFDATA2MSB
+  Type:              ET_REL
+  Machine:           EM_PPC
+Sections:
+  - Name:            .dynamic
+    Type:            SHT_DYNAMIC
+    Entries:
+      - Tag:             DT_PPC_GOT
+        Value:           0x0000000000020000
+      - Tag:             DT_PPC_OPT
+        Value:           0x0000000000000001
+
+## Check we can handle PPC64 specific tags.
+# RUN: yaml2obj -docnum=4 %s -o %t4
+# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=EM_PPC64
 
 # EM_PPC64:      - Tag:             DT_PPC64_GLINK
 # EM_PPC64-NEXT:   Value:           0x0000000000000001
@@ -250,7 +274,7 @@ Sections:
         Value:           0x0000000000000001
 
 ## Check we can handle AARCH64 specific tags.
-# RUN: yaml2obj -docnum=4 %s -o %t2
+# RUN: yaml2obj -docnum=5 %s -o %t2
 # RUN: obj2yaml %t2 | FileCheck %s --check-prefix=AARCH64
 
 # AARCH64:      - Tag:             DT_AARCH64_BTI_PLT
@@ -278,7 +302,7 @@ Sections:
 ## Here for EM_PPC64 we are trying to use DT_HEXAGON_SYMSZ
 ## instead of DT_PPC64_GLINK. They both have value of 0x70000000.
 
-# RUN: not yaml2obj -docnum=5 %s 2>&1 | FileCheck %s --check-prefix=ERR
+# RUN: not yaml2obj -docnum=6 %s 2>&1 | FileCheck %s --check-prefix=ERR
 # ERR:      error: invalid hex64 number
 # ERR-NEXT: - Tag: DT_HEXAGON_SYMSZ
 

From ff918fb487e3f3a161217bd855889d3ea7a9a6e8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 5 Jun 2019 01:49:06 +0000
Subject: [PATCH 1085/1176] Fix -Wsign-compare by explicit cast after r362557

llvm-svn: 362570
---
 .../Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
index 34d08167a16b2..43041ca1bb2fb 100644
--- a/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/x86/x86AssemblyInspectionEngine.cpp
@@ -819,7 +819,7 @@ bool x86AssemblyInspectionEngine::local_branch_p (
   int offset;
   if (pc_rel_branch_or_jump_p (instruction_length, offset) && offset != 0) {
     addr_t next_pc_value = current_func_text_offset + instruction_length;
-    if (offset < 0 && abs (offset) > current_func_text_offset) {
+    if (offset < 0 && addr_t(-offset) > current_func_text_offset) {
       // Branch target is before the start of this function
       return false;
     }

From 6321c680659118220e954511d2aec3770d3c8154 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 5 Jun 2019 01:57:57 +0000
Subject: [PATCH 1086/1176] Initial support for vectorization using MASSV (IBM
 MASS vector library)

Part 2 (the Clang portion) of D59881.

This patch (first of two patches) enables the vectorizer to recognize the
IBM MASS vector library routines. This patch specifically adds support for
recognizing the -vector-library=MASSV option, and defines mappings from IEEE
standard scalar math functions to generic PowerPC MASS vector counterparts.
For instance, the generic PowerPC MASS vector entry for double-precision
cbrt function is __cbrtd2_massv.

The second patch will further lower the generic PowerPC vector entries to
PowerPC subtarget-specific entries.
For instance, the PowerPC generic entry cbrtd2_massv is lowered to
cbrtd2_P9 for Power9 subtarget.

The overall support for MASS vector library is presented as such in two patches
for ease of review.

Patch by Jeeva Paudel.

Differential revision: https://reviews.llvm.org/D59881

llvm-svn: 362571
---
 clang/include/clang/Basic/CodeGenOptions.h | 1 +
 clang/include/clang/Driver/Options.td      | 2 +-
 clang/lib/CodeGen/BackendUtil.cpp          | 3 +++
 clang/lib/Frontend/CompilerInvocation.cpp  | 2 ++
 clang/test/Driver/autocomplete.c           | 1 +
 clang/test/Driver/fveclib.c                | 2 ++
 6 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index 200706fda7cac..6bd939aee3045 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -53,6 +53,7 @@ class CodeGenOptions : public CodeGenOptionsBase {
   enum VectorLibrary {
     NoLibrary,  // Don't use any vector library.
     Accelerate, // Use the Accelerate framework.
+    MASSV,      // IBM MASS vector library.
     SVML        // Intel short vector math library.
   };
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index b86d39261e6ef..1e5b72d1d5319 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1418,7 +1418,7 @@ def fno_experimental_new_pass_manager : Flag<["-"], "fno-experimental-new-pass-m
   Group<f_clang_Group>, Flags<[CC1Option]>,
   HelpText<"Disables an experimental new pass manager in LLVM.">;
 def fveclib : Joined<["-"], "fveclib=">, Group<f_Group>, Flags<[CC1Option]>,
-    HelpText<"Use the given vector functions library">, Values<"Accelerate,SVML,none">;
+    HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">;
 def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group<f_Group>,
   HelpText<"Disallow implicit conversions between vectors with a different number of elements or different element types">, Flags<[CC1Option]>;
 def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group<f_Group>,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index c5e56c7a06914..ec7c62dae24a8 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -340,6 +340,9 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
   case CodeGenOptions::Accelerate:
     TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate);
     break;
+  case CodeGenOptions::MASSV:
+    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV);
+    break;    
   case CodeGenOptions::SVML:
     TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML);
     break;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7ac58ae4da4b9..ca0f2fc845a8c 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -682,6 +682,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
     StringRef Name = A->getValue();
     if (Name == "Accelerate")
       Opts.setVecLib(CodeGenOptions::Accelerate);
+    else if (Name == "MASSV")
+      Opts.setVecLib(CodeGenOptions::MASSV);
     else if (Name == "SVML")
       Opts.setVecLib(CodeGenOptions::SVML);
     else if (Name == "none")
diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c
index f8271770d063c..7805c3bc7894b 100644
--- a/clang/test/Driver/autocomplete.c
+++ b/clang/test/Driver/autocomplete.c
@@ -68,6 +68,7 @@
 // FLTOALL-NEXT: thin
 // RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL
 // FVECLIBALL: Accelerate
+// FVECLIBALL-NEXT: MASSV
 // FVECLIBALL-NEXT: none
 // FVECLIBALL-NEXT: SVML
 // RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL
diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c
index 3bd4bf630f0ad..2ec35280d550f 100644
--- a/clang/test/Driver/fveclib.c
+++ b/clang/test/Driver/fveclib.c
@@ -1,9 +1,11 @@
 // RUN: %clang -### -c -fveclib=none %s 2>&1 | FileCheck -check-prefix CHECK-NOLIB %s
 // RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck -check-prefix CHECK-ACCELERATE %s
+// RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s
 // RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s
 
 // CHECK-NOLIB: "-fveclib=none"
 // CHECK-ACCELERATE: "-fveclib=Accelerate"
+// CHECK-MASSV: "-fveclib=MASSV"
 
 // CHECK-INVALID: error: invalid value 'something' in '-fveclib=something'
 

From 4b7bdcd318426c3f1728e0e8386b23f9ad990c13 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Wed, 5 Jun 2019 02:01:43 +0000
Subject: [PATCH 1087/1176] [LLD][COFF] Don't take into account the 'age' when
 looking for PDB type server

The age field is only there to say how many times an OBJ or a PDB was incrementally linked. It shouldn't be used to validate the link between the OBJ and the PDB.

Differential Revision: https://reviews.llvm.org/D62837

llvm-svn: 362572
---
 lld/COFF/DebugTypes.cpp                                   | 4 ++--
 lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index bcdb33fadee66..770de80486c4b 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -198,9 +198,9 @@ TypeServerSource::findFromFile(const ObjFile *DependentFile) {
   pdb::InfoStream &Info = cantFail(PDBFile.getPDBInfoStream());
 
   // Just because a file with a matching name was found doesn't mean it can be
-  // used. The GUID and Age must match between the PDB header and the OBJ
+  // used. The GUID must match between the PDB header and the OBJ
   // TypeServer2 record. The 'Age' is used by MSVC incremental compilation.
-  if (Info.getGuid() != TS.getGuid() || Info.getAge() != TS.getAge())
+  if (Info.getGuid() != TS.getGuid())
     return createFileError(
         TS.Name,
         make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
diff --git a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
index 23656d1807cff..dd95a3df8893c 100644
--- a/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
+++ b/lld/test/COFF/Inputs/pdb-type-server-valid-signature.yaml
@@ -69,7 +69,7 @@ sections:
       - Kind:            LF_TYPESERVER2
         TypeServer2:
           Guid:            '{8DABD2A0-28FF-CB43-9BAF-175B77B76414}'
-          Age:             1
+          Age:             18
           Name:            'pdb-diff-cl.pdb'
   - Name:            '.text$mn'
     Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]

From cfb6c82172e77d71f36de96accb40be555d6ede6 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 5 Jun 2019 02:09:03 +0000
Subject: [PATCH 1088/1176] [PowerPC][NFC] Add codegen test for consecutive
 stores of vector elements

NFC commit of a test case in order for the subsequent review to show differences
in codegen.

Differential revision: https://reviews.llvm.org/D62843

llvm-svn: 362573
---
 .../test/CodeGen/PowerPC/extract-and-store.ll | 535 ++++++++++++++++++
 1 file changed, 535 insertions(+)

diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 5ecbdb52ec146..22360c5701641 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -478,3 +478,538 @@ entry:
   store i32 %vecext, i32* %arrayidx, align 4
   ret <4 x i32> %a
 }
+
+define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_consecutive_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    stfiwx f0, 0, r5
+; CHECK-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_consecutive_i32:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-BE-NEXT:    xxsldwi vs1, vs34, vs34, 1
+; CHECK-BE-NEXT:    li r3, 4
+; CHECK-BE-NEXT:    stfiwx f0, 0, r5
+; CHECK-BE-NEXT:    stfiwx f1, r5, r3
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_consecutive_i32:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stfiwx f0, 0, r5
+; CHECK-P9-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_consecutive_i32:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r5
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  store i32 %vecext, i32* %b, align 4
+  %vecext1 = extractelement <4 x i32> %a, i32 2
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 1
+  store i32 %vecext1, i32* %arrayidx2, align 4
+  ret void
+}
+
+define dso_local void @test_consecutive_float(<4 x float> %a, float* nocapture %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_consecutive_float:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 3
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    stfiwx f0, 0, r5
+; CHECK-NEXT:    stfiwx f1, r5, r3
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_consecutive_float:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    li r3, 4
+; CHECK-BE-NEXT:    stxsiwx vs34, 0, r5
+; CHECK-BE-NEXT:    stfiwx f0, r5, r3
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_consecutive_float:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    stfiwx f0, 0, r5
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_consecutive_float:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stxsiwx vs34, 0, r5
+; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 1
+  store float %vecext, float* %b, align 4
+  %vecext1 = extractelement <4 x float> %a, i32 3
+  %arrayidx2 = getelementptr inbounds float, float* %b, i64 1
+  store float %vecext1, float* %arrayidx2, align 4
+  ret void
+}
+
+define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_stores_exceed_vec_size:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
+; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
+; CHECK-NEXT:    li r4, 20
+; CHECK-NEXT:    addi r3, r3, .LCPI16_0@toc@l
+; CHECK-NEXT:    lvx v3, 0, r3
+; CHECK-NEXT:    li r3, 16
+; CHECK-NEXT:    vperm v3, v2, v2, v3
+; CHECK-NEXT:    xxswapd vs0, vs35
+; CHECK-NEXT:    stxvd2x vs0, 0, r5
+; CHECK-NEXT:    stfiwx f1, r5, r3
+; CHECK-NEXT:    stxsiwx vs34, r5, r4
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_stores_exceed_vec_size:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxspltw vs0, vs34, 0
+; CHECK-BE-NEXT:    xxsldwi vs1, vs34, vs34, 1
+; CHECK-BE-NEXT:    li r3, 16
+; CHECK-BE-NEXT:    li r4, 20
+; CHECK-BE-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
+; CHECK-BE-NEXT:    stxvw4x vs0, 0, r5
+; CHECK-BE-NEXT:    stfiwx f1, r5, r4
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_stores_exceed_vec_size:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0@toc@l
+; CHECK-P9-NEXT:    lxvx vs35, 0, r3
+; CHECK-P9-NEXT:    li r3, 16
+; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    stxv vs35, 0(r5)
+; CHECK-P9-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-NEXT:    li r3, 20
+; CHECK-P9-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxspltw vs0, vs34, 0
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
+; CHECK-P9-BE-NEXT:    li r3, 16
+; CHECK-P9-BE-NEXT:    stxv vs0, 0(r5)
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-BE-NEXT:    stxsiwx vs34, r5, r3
+; CHECK-P9-BE-NEXT:    li r3, 20
+; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 2
+  store i32 %vecext, i32* %b, align 4
+  %vecext1 = extractelement <4 x i32> %a, i32 3
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 1
+  store i32 %vecext1, i32* %arrayidx2, align 4
+  %vecext3 = extractelement <4 x i32> %a, i32 0
+  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 2
+  store i32 %vecext3, i32* %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 3
+  store i32 %vecext3, i32* %arrayidx6, align 4
+  %vecext7 = extractelement <4 x i32> %a, i32 1
+  %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 4
+  store i32 %vecext7, i32* %arrayidx8, align 4
+  %arrayidx10 = getelementptr inbounds i32, i32* %b, i64 5
+  store i32 %vecext, i32* %arrayidx10, align 4
+  ret void
+}
+
+define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_5_consecutive_stores_of_bytes:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    mfvsrd r3, vs34
+; CHECK-NEXT:    rldicl r6, r3, 32, 56
+; CHECK-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-NEXT:    mfvsrd r4, f0
+; CHECK-NEXT:    stb r6, 1(r5)
+; CHECK-NEXT:    stb r3, 2(r5)
+; CHECK-NEXT:    rldicl r6, r4, 32, 56
+; CHECK-NEXT:    rldicl r3, r4, 8, 56
+; CHECK-NEXT:    rldicl r4, r4, 16, 56
+; CHECK-NEXT:    stb r6, 0(r5)
+; CHECK-NEXT:    stb r3, 3(r5)
+; CHECK-NEXT:    stb r4, 4(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_5_consecutive_stores_of_bytes:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxswapd vs0, vs34
+; CHECK-BE-NEXT:    mfvsrd r3, vs34
+; CHECK-BE-NEXT:    rldicl r6, r3, 40, 56
+; CHECK-BE-NEXT:    mfvsrd r4, f0
+; CHECK-BE-NEXT:    stb r6, 0(r5)
+; CHECK-BE-NEXT:    rldicl r6, r4, 40, 56
+; CHECK-BE-NEXT:    rldicl r4, r4, 16, 56
+; CHECK-BE-NEXT:    stb r6, 1(r5)
+; CHECK-BE-NEXT:    clrldi r6, r3, 56
+; CHECK-BE-NEXT:    rldicl r3, r3, 56, 56
+; CHECK-BE-NEXT:    stb r4, 2(r5)
+; CHECK-BE-NEXT:    stb r6, 3(r5)
+; CHECK-BE-NEXT:    stb r3, 4(r5)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_5_consecutive_stores_of_bytes:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 4
+; CHECK-P9-NEXT:    stxsibx vs35, 0, r5
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 12
+; CHECK-P9-NEXT:    li r3, 1
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 15
+; CHECK-P9-NEXT:    li r3, 2
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 1
+; CHECK-P9-NEXT:    li r3, 3
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 2
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_5_consecutive_stores_of_bytes:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 13
+; CHECK-P9-BE-NEXT:    stxsibx vs35, 0, r5
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 5
+; CHECK-P9-BE-NEXT:    li r3, 1
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 2
+; CHECK-P9-BE-NEXT:    li r3, 2
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    li r3, 3
+; CHECK-P9-BE-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v2, v2, v2, 15
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 4
+  store i8 %vecext, i8* %b, align 1
+  %vecext1 = extractelement <16 x i8> %a, i32 12
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 1
+  store i8 %vecext1, i8* %arrayidx2, align 1
+  %vecext3 = extractelement <16 x i8> %a, i32 9
+  %arrayidx4 = getelementptr inbounds i8, i8* %b, i64 2
+  store i8 %vecext3, i8* %arrayidx4, align 1
+  %vecext5 = extractelement <16 x i8> %a, i32 7
+  %arrayidx6 = getelementptr inbounds i8, i8* %b, i64 3
+  store i8 %vecext5, i8* %arrayidx6, align 1
+  %vecext7 = extractelement <16 x i8> %a, i32 6
+  %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 4
+  store i8 %vecext7, i8* %arrayidx8, align 1
+  ret void
+}
+
+define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_13_consecutive_stores_of_bytes:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd vs0, vs34
+; CHECK-NEXT:    mfvsrd r3, vs34
+; CHECK-NEXT:    rldicl r4, r3, 32, 56
+; CHECK-NEXT:    rldicl r6, r3, 56, 56
+; CHECK-NEXT:    stb r4, 1(r5)
+; CHECK-NEXT:    rldicl r4, r3, 40, 56
+; CHECK-NEXT:    mfvsrd r7, f0
+; CHECK-NEXT:    stb r6, 2(r5)
+; CHECK-NEXT:    rldicl r6, r3, 24, 56
+; CHECK-NEXT:    stb r4, 6(r5)
+; CHECK-NEXT:    rldicl r4, r3, 8, 56
+; CHECK-NEXT:    stb r6, 7(r5)
+; CHECK-NEXT:    rldicl r3, r3, 16, 56
+; CHECK-NEXT:    stb r4, 9(r5)
+; CHECK-NEXT:    rldicl r4, r7, 32, 56
+; CHECK-NEXT:    rldicl r6, r7, 8, 56
+; CHECK-NEXT:    stb r4, 0(r5)
+; CHECK-NEXT:    rldicl r4, r7, 16, 56
+; CHECK-NEXT:    stb r6, 3(r5)
+; CHECK-NEXT:    clrldi r6, r7, 56
+; CHECK-NEXT:    stb r4, 4(r5)
+; CHECK-NEXT:    rldicl r4, r7, 48, 56
+; CHECK-NEXT:    stb r6, 5(r5)
+; CHECK-NEXT:    rldicl r6, r7, 56, 56
+; CHECK-NEXT:    stb r4, 8(r5)
+; CHECK-NEXT:    rldicl r4, r7, 24, 56
+; CHECK-NEXT:    stb r6, 10(r5)
+; CHECK-NEXT:    stb r4, 11(r5)
+; CHECK-NEXT:    stb r3, 12(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_13_consecutive_stores_of_bytes:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mfvsrd r3, vs34
+; CHECK-BE-NEXT:    xxswapd vs0, vs34
+; CHECK-BE-NEXT:    rldicl r4, r3, 40, 56
+; CHECK-BE-NEXT:    clrldi r6, r3, 56
+; CHECK-BE-NEXT:    stb r4, 0(r5)
+; CHECK-BE-NEXT:    rldicl r4, r3, 56, 56
+; CHECK-BE-NEXT:    mfvsrd r7, f0
+; CHECK-BE-NEXT:    stb r6, 3(r5)
+; CHECK-BE-NEXT:    rldicl r6, r3, 8, 56
+; CHECK-BE-NEXT:    stb r4, 4(r5)
+; CHECK-BE-NEXT:    rldicl r4, r3, 24, 56
+; CHECK-BE-NEXT:    stb r6, 5(r5)
+; CHECK-BE-NEXT:    rldicl r6, r3, 16, 56
+; CHECK-BE-NEXT:    stb r4, 8(r5)
+; CHECK-BE-NEXT:    rldicl r4, r7, 40, 56
+; CHECK-BE-NEXT:    stb r6, 10(r5)
+; CHECK-BE-NEXT:    rldicl r6, r7, 16, 56
+; CHECK-BE-NEXT:    stb r4, 1(r5)
+; CHECK-BE-NEXT:    rldicl r4, r7, 32, 56
+; CHECK-BE-NEXT:    stb r6, 2(r5)
+; CHECK-BE-NEXT:    rldicl r6, r7, 48, 56
+; CHECK-BE-NEXT:    stb r4, 6(r5)
+; CHECK-BE-NEXT:    clrldi r4, r7, 56
+; CHECK-BE-NEXT:    stb r6, 7(r5)
+; CHECK-BE-NEXT:    rldicl r3, r3, 48, 56
+; CHECK-BE-NEXT:    rldicl r6, r7, 56, 56
+; CHECK-BE-NEXT:    stb r4, 9(r5)
+; CHECK-BE-NEXT:    stb r3, 11(r5)
+; CHECK-BE-NEXT:    stb r6, 12(r5)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_13_consecutive_stores_of_bytes:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 4
+; CHECK-P9-NEXT:    stxsibx vs35, 0, r5
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 12
+; CHECK-P9-NEXT:    li r3, 1
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 15
+; CHECK-P9-NEXT:    li r3, 2
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 1
+; CHECK-P9-NEXT:    li r3, 3
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 2
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 8
+; CHECK-P9-NEXT:    li r3, 5
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 13
+; CHECK-P9-NEXT:    li r3, 6
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 11
+; CHECK-P9-NEXT:    li r3, 7
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 6
+; CHECK-P9-NEXT:    li r3, 8
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 9
+; CHECK-P9-NEXT:    li r3, 9
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 7
+; CHECK-P9-NEXT:    li r3, 10
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 3
+; CHECK-P9-NEXT:    li r3, 11
+; CHECK-P9-NEXT:    vsldoi v2, v2, v2, 10
+; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-NEXT:    li r3, 12
+; CHECK-P9-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_13_consecutive_stores_of_bytes:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 13
+; CHECK-P9-BE-NEXT:    stxsibx vs35, 0, r5
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 5
+; CHECK-P9-BE-NEXT:    li r3, 1
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 2
+; CHECK-P9-BE-NEXT:    li r3, 2
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    li r3, 3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 15
+; CHECK-P9-BE-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 9
+; CHECK-P9-BE-NEXT:    li r3, 5
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 4
+; CHECK-P9-BE-NEXT:    li r3, 6
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 6
+; CHECK-P9-BE-NEXT:    li r3, 7
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 11
+; CHECK-P9-BE-NEXT:    li r3, 8
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 8
+; CHECK-P9-BE-NEXT:    li r3, 9
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 10
+; CHECK-P9-BE-NEXT:    li r3, 10
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 14
+; CHECK-P9-BE-NEXT:    li r3, 11
+; CHECK-P9-BE-NEXT:    vsldoi v2, v2, v2, 7
+; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
+; CHECK-P9-BE-NEXT:    li r3, 12
+; CHECK-P9-BE-NEXT:    stxsibx vs34, r5, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <16 x i8> %a, i32 4
+  store i8 %vecext, i8* %b, align 1
+  %vecext1 = extractelement <16 x i8> %a, i32 12
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 1
+  store i8 %vecext1, i8* %arrayidx2, align 1
+  %vecext3 = extractelement <16 x i8> %a, i32 9
+  %arrayidx4 = getelementptr inbounds i8, i8* %b, i64 2
+  store i8 %vecext3, i8* %arrayidx4, align 1
+  %vecext5 = extractelement <16 x i8> %a, i32 7
+  %arrayidx6 = getelementptr inbounds i8, i8* %b, i64 3
+  store i8 %vecext5, i8* %arrayidx6, align 1
+  %vecext7 = extractelement <16 x i8> %a, i32 6
+  %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 4
+  store i8 %vecext7, i8* %arrayidx8, align 1
+  %vecext9 = extractelement <16 x i8> %a, i32 0
+  %arrayidx10 = getelementptr inbounds i8, i8* %b, i64 5
+  store i8 %vecext9, i8* %arrayidx10, align 1
+  %vecext11 = extractelement <16 x i8> %a, i32 11
+  %arrayidx12 = getelementptr inbounds i8, i8* %b, i64 6
+  store i8 %vecext11, i8* %arrayidx12, align 1
+  %vecext13 = extractelement <16 x i8> %a, i32 13
+  %arrayidx14 = getelementptr inbounds i8, i8* %b, i64 7
+  store i8 %vecext13, i8* %arrayidx14, align 1
+  %vecext15 = extractelement <16 x i8> %a, i32 2
+  %arrayidx16 = getelementptr inbounds i8, i8* %b, i64 8
+  store i8 %vecext15, i8* %arrayidx16, align 1
+  %vecext17 = extractelement <16 x i8> %a, i32 15
+  %arrayidx18 = getelementptr inbounds i8, i8* %b, i64 9
+  store i8 %vecext17, i8* %arrayidx18, align 1
+  %vecext19 = extractelement <16 x i8> %a, i32 1
+  %arrayidx20 = getelementptr inbounds i8, i8* %b, i64 10
+  store i8 %vecext19, i8* %arrayidx20, align 1
+  %vecext21 = extractelement <16 x i8> %a, i32 5
+  %arrayidx22 = getelementptr inbounds i8, i8* %b, i64 11
+  store i8 %vecext21, i8* %arrayidx22, align 1
+  %vecext23 = extractelement <16 x i8> %a, i32 14
+  %arrayidx24 = getelementptr inbounds i8, i8* %b, i64 12
+  store i8 %vecext23, i8* %arrayidx24, align 1
+  ret void
+}
+
+define void @test_elements_from_two_vec(<4 x i32> %a, <4 x i32> %b, i32* nocapture %c) local_unnamed_addr #0 {
+; CHECK-LABEL: test_elements_from_two_vec:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-NEXT:    xxsldwi vs1, vs35, vs35, 1
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    stfiwx f0, r7, r3
+; CHECK-NEXT:    stfiwx f1, 0, r7
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_elements_from_two_vec:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-BE-NEXT:    li r3, 4
+; CHECK-BE-NEXT:    stfiwx f0, r7, r3
+; CHECK-BE-NEXT:    stxsiwx vs35, 0, r7
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_elements_from_two_vec:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stfiwx f0, r7, r3
+; CHECK-P9-NEXT:    xxsldwi vs0, vs35, vs35, 1
+; CHECK-P9-NEXT:    stfiwx f0, 0, r7
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_elements_from_two_vec:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stfiwx f0, r7, r3
+; CHECK-P9-BE-NEXT:    stxsiwx vs35, 0, r7
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 0
+  %arrayidx = getelementptr inbounds i32, i32* %c, i64 1
+  store i32 %vecext, i32* %arrayidx, align 4
+  %vecext1 = extractelement <4 x i32> %b, i32 1
+  store i32 %vecext1, i32* %c, align 4
+  ret void
+}
+
+define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float> %b, <4 x float> %c, float* nocapture %d) local_unnamed_addr #0 {
+; CHECK-LABEL: test_elements_from_three_vec:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT:    xxsldwi vs1, vs36, vs36, 1
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    li r4, 8
+; CHECK-NEXT:    stxsiwx vs35, r9, r3
+; CHECK-NEXT:    stfiwx f0, 0, r9
+; CHECK-NEXT:    stfiwx f1, r9, r4
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_elements_from_three_vec:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-BE-NEXT:    xxsldwi vs1, vs35, vs35, 1
+; CHECK-BE-NEXT:    li r3, 4
+; CHECK-BE-NEXT:    li r4, 8
+; CHECK-BE-NEXT:    stfiwx f1, r9, r3
+; CHECK-BE-NEXT:    stfiwx f0, 0, r9
+; CHECK-BE-NEXT:    stxsiwx vs36, r9, r4
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-P9-LABEL: test_elements_from_three_vec:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-NEXT:    li r3, 4
+; CHECK-P9-NEXT:    stfiwx f0, 0, r9
+; CHECK-P9-NEXT:    xxsldwi vs0, vs36, vs36, 1
+; CHECK-P9-NEXT:    stxsiwx vs35, r9, r3
+; CHECK-P9-NEXT:    li r3, 8
+; CHECK-P9-NEXT:    stfiwx f0, r9, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P9-BE-LABEL: test_elements_from_three_vec:
+; CHECK-P9-BE:       # %bb.0: # %entry
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r9
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs35, vs35, 1
+; CHECK-P9-BE-NEXT:    li r3, 4
+; CHECK-P9-BE-NEXT:    stfiwx f0, r9, r3
+; CHECK-P9-BE-NEXT:    li r3, 8
+; CHECK-P9-BE-NEXT:    stxsiwx vs36, r9, r3
+; CHECK-P9-BE-NEXT:    blr
+entry:
+  %vecext = extractelement <4 x float> %a, i32 3
+  store float %vecext, float* %d, align 4
+  %vecext1 = extractelement <4 x float> %b, i32 2
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 1
+  store float %vecext1, float* %arrayidx2, align 4
+  %vecext3 = extractelement <4 x float> %c, i32 1
+  %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+  store float %vecext3, float* %arrayidx4, align 4
+  ret void
+}

From 1611cc1cba181db26f147477c92b5d010a295512 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Wed, 5 Jun 2019 02:09:29 +0000
Subject: [PATCH 1089/1176] [analyzer] exploded-graph-rewriter: Pick up python
 from cmake in tests.

This should fix NetBSD buildbots.

llvm-svn: 362574
---
 .../Analysis/exploded-graph-rewriter/lit.local.cfg    | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
index 1ebb25a4001a6..7bc2e107f6423 100644
--- a/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
+++ b/clang/test/Analysis/exploded-graph-rewriter/lit.local.cfg
@@ -1,3 +1,5 @@
+# -*- Python -*-
+
 import lit.util
 import lit.formats
 import os
@@ -6,8 +8,11 @@ use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
 
 config.substitutions.append(('%exploded_graph_rewriter',
-                             lit.util.which('exploded-graph-rewriter.py',
-                                            os.path.join(config.clang_src_dir,
-                                                         'utils', 'analyzer'))))
+                             '\'%s\' %s' % (
+                                 config.python_executable,
+                                 lit.util.which('exploded-graph-rewriter.py',
+                                                os.path.join(
+                                                    config.clang_src_dir,
+                                                    'utils', 'analyzer')))))
 
 config.suffixes = ['.dot']

From 44fb55bf96158ac3c9a90eae1818fb6b1ddefef6 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Wed, 5 Jun 2019 02:09:49 +0000
Subject: [PATCH 1090/1176] [analyzer] exploded-graph-rewriter: Add the missing
 license header!

llvm-svn: 362575
---
 clang/utils/analyzer/exploded-graph-rewriter.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/clang/utils/analyzer/exploded-graph-rewriter.py b/clang/utils/analyzer/exploded-graph-rewriter.py
index 355fc8632a574..dbfd086215385 100755
--- a/clang/utils/analyzer/exploded-graph-rewriter.py
+++ b/clang/utils/analyzer/exploded-graph-rewriter.py
@@ -1,4 +1,13 @@
 #!/usr/bin/env python
+#
+#===- exploded-graph-rewriter.py - ExplodedGraph dump tool -----*- python -*--#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===-----------------------------------------------------------------------===#
+
 
 from __future__ import print_function
 

From 7c842fadf100b2ed160986e40a9a68a0613df256 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Wed, 5 Jun 2019 02:36:40 +0000
Subject: [PATCH 1091/1176] [PowerPC] Collapse RLDICL/RLDICR into RLDIC when
 possible

Generally speaking, we lower to an optimal rotate sequence for nodes visible in
the SDAG. However, there are instances where the two rotates are not visible at
ISEL time - most notably those in a very common sequence when lowering switch
statements to jump tables.

A common situation is a switch on a 32-bit integer. This value has to have the
upper 32 bits cleared and because jump table offsets are word offsets, the value
needs to be shifted left by 2 bits. We currently emit the clear and the left
shift as two separate instructions, but this is not needed as we can lower it to
a single RLDIC.

This patch just cleans that up.

Differential revision: https://reviews.llvm.org/D60402

llvm-svn: 362576
---
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp     |  52 ++++++++
 .../test/CodeGen/PowerPC/collapse-rotates.mir |  65 ++++++++++
 .../PowerPC/jump-tables-collapse-rotate.ll    | 122 ++++++++++++++++++
 3 files changed, 239 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/collapse-rotates.mir
 create mode 100644 llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll

diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 255ba2d868158..9a566eddfdd65 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -47,6 +47,8 @@ STATISTIC(NumFunctionsEnteredInMIPeephole,
 STATISTIC(NumFixedPointIterations,
           "Number of fixed-point iterations converting reg-reg instructions "
           "to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+          "Number of pairs of rotate left, clear left/right collapsed");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -757,6 +759,56 @@ bool PPCMIPeephole::simplifyCode(void) {
         NumOptADDLIs++;
         break;
       }
+      case PPC::RLDICR: {
+        // We miss the opportunity to emit an RLDIC when lowering jump tables
+        // since ISEL sees only a single basic block. When selecting, the clear
+        // and shift left will be in different blocks.
+        unsigned SrcReg = MI.getOperand(1).getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+          break;
+
+        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+        if (SrcMI->getOpcode() != PPC::RLDICL)
+          break;
+        MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+        MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+        MachineOperand MOpSHMI = MI.getOperand(2);
+        MachineOperand MOpMEMI = MI.getOperand(3);
+        if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() &&
+              MOpSHMI.isImm() && MOpMEMI.isImm()))
+          break;
+        uint64_t SHSrc = MOpSHSrc.getImm();
+        uint64_t MBSrc = MOpMBSrc.getImm();
+        uint64_t SHMI = MOpSHMI.getImm();
+        uint64_t MEMI = MOpMEMI.getImm();
+        uint64_t NewSH = SHSrc + SHMI;
+        uint64_t NewMB = MBSrc - SHMI;
+        if (NewMB > 63 || NewSH > 63)
+          break;
+
+        // The bits cleared with RLDICL are [0, MBSrc).
+        // The bits cleared with RLDICR are (MEMI, 63].
+        // After the sequence, the bits cleared are:
+        // [0, MBSrc-SHMI) and (MEMI, 63).
+        //
+        // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+        if ((63 - NewSH) != MEMI)
+          break;
+
+        LLVM_DEBUG(dbgs() << "Converting pair: ");
+        LLVM_DEBUG(SrcMI->dump());
+        LLVM_DEBUG(MI.dump());
+
+        MI.setDesc(TII->get(PPC::RLDIC));
+        MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+        MI.getOperand(2).setImm(NewSH);
+        MI.getOperand(3).setImm(NewMB);
+
+        LLVM_DEBUG(dbgs() << "To: ");
+        LLVM_DEBUG(MI.dump());
+        NumRotatesCollapsed++;
+        break;
+      }
       }
     }
 
diff --git a/llvm/test/CodeGen/PowerPC/collapse-rotates.mir b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir
new file mode 100644
index 0000000000000..116f74bd46e6a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir
@@ -0,0 +1,65 @@
+# RUN: llc -mtriple=powerpc64le--linux-gnu -start-before ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s
+
+--- |
+  ; ModuleID = 'b.ll'
+  source_filename = "b.ll"
+  target datalayout = "e-m:e-i64:64-n32:64"
+  
+  define dso_local i64 @test(i64 %l) {
+  entry:
+    %shl = shl i64 %l, 3
+    ret i64 %shl
+  }
+
+...
+---
+name:            test
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       
+  - { id: 0, class: g8rc, preferred-register: '' }
+  - { id: 1, class: g8rc, preferred-register: '' }
+  - { id: 2, class: g8rc, preferred-register: '' }
+liveins:         
+  - { reg: '$x3', virtual-reg: '%0' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x3
+  
+    %0:g8rc = COPY $x3
+    %1:g8rc = RLDICL %0, 2, 32
+    %2:g8rc = RLDICR %1, 3, 58
+    $x3 = COPY %2
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+
+...
+# CHECK: rldic 3, 3, 5, 29
diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
new file mode 100644
index 0000000000000..1fa49fdbfee39
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -o - \
+; RUN:   -ppc-asm-full-reg-names -verify-machineinstrs %s | FileCheck %s
+
+; Function Attrs: nounwind
+define dso_local zeroext i32 @test(i32 signext %l) {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addi r3, r3, -1
+; CHECK-NEXT:    cmplwi r3, 5
+; CHECK-NEXT:    bgt cr0, .LBB0_3
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    addis r4, r2, .LC0@toc@ha
+; CHECK-NEXT:    rldic r3, r3, 2, 30
+; CHECK-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-NEXT:    lwax r3, r3, r4
+; CHECK-NEXT:    add r3, r3, r4
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:  .LBB0_2: # %sw.bb
+; CHECK-NEXT:    li r3, 2
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_3: # %sw.default
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_4: # %sw.bb3
+; CHECK-NEXT:    li r3, 3
+; CHECK-NEXT:    b .LBB0_9
+; CHECK-NEXT:  .LBB0_5: # %sw.bb5
+; CHECK-NEXT:    li r3, 4
+; CHECK-NEXT:    bl test2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_6: # %sw.bb8
+; CHECK-NEXT:    li r3, 5
+; CHECK-NEXT:    bl test4
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_7: # %sw.bb10
+; CHECK-NEXT:    li r3, 66
+; CHECK-NEXT:    bl test4
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_8: # %sw.bb13
+; CHECK-NEXT:    li r3, 66
+; CHECK-NEXT:  .LBB0_9: # %return
+; CHECK-NEXT:    bl test2
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_10: # %return
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  switch i32 %l, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb3
+    i32 3, label %sw.bb5
+    i32 4, label %sw.bb8
+    i32 5, label %sw.bb10
+    i32 6, label %sw.bb13
+  ]
+
+sw.default:                                       ; preds = %entry
+  %call = tail call signext i32 @test1(i32 signext 1)
+  %call1 = tail call signext i32 @test3(i32 signext %call)
+  br label %return
+
+sw.bb:                                            ; preds = %entry
+  %call2 = tail call signext i32 @test1(i32 signext 2)
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  %call4 = tail call signext i32 @test2(i32 signext 3)
+  br label %return
+
+sw.bb5:                                           ; preds = %entry
+  %call6 = tail call signext i32 @test2(i32 signext 4)
+  %call7 = tail call signext i32 @test3(i32 signext %call6)
+  br label %return
+
+sw.bb8:                                           ; preds = %entry
+  %call9 = tail call signext i32 @test4(i32 signext 5)
+  br label %return
+
+sw.bb10:                                          ; preds = %entry
+  %call11 = tail call signext i32 @test4(i32 signext 66)
+  %call12 = tail call signext i32 @test1(i32 signext %call11)
+  br label %return
+
+sw.bb13:                                          ; preds = %entry
+  %call14 = tail call signext i32 @test2(i32 signext 66)
+  br label %return
+
+return:                                           ; preds = %sw.bb13, %sw.bb10, %sw.bb8, %sw.bb5, %sw.bb3, %sw.bb, %sw.default
+  %retval.0 = phi i32 [ %call1, %sw.default ], [ %call14, %sw.bb13 ], [ %call12, %sw.bb10 ], [ %call9, %sw.bb8 ], [ %call7, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb ]
+  ret i32 %retval.0
+}
+
+declare signext i32 @test3(i32 signext)
+
+declare signext i32 @test1(i32 signext)
+
+declare signext i32 @test2(i32 signext)
+
+declare signext i32 @test4(i32 signext)

From 76467c4d7fbbe19ed0f1f64aa22befdebb71a869 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Wed, 5 Jun 2019 03:00:06 +0000
Subject: [PATCH 1092/1176] [NFC][FnAttrs] Stress tests for attribute deduction

This commit is a preparation of upcoming patches on attribute deduction.
It will shorten the diffs and make it clear what we inferred before.

Reviewers: chandlerc, homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, nlopes

Subscribers: bollu, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59903

llvm-svn: 362577
---
 .../Transforms/FunctionAttrs/arg_nocapture.ll | 445 +++++++++++++
 .../Transforms/FunctionAttrs/arg_returned.ll  | 589 ++++++++++++++++++
 .../Transforms/FunctionAttrs/fn_noreturn.ll   | 134 ++++
 .../read_write_returned_arguments_scc.ll      | 164 +++++
 .../Transforms/FunctionAttrs/readattrs.ll     |  31 +-
 5 files changed, 1362 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
 create mode 100644 llvm/test/Transforms/FunctionAttrs/arg_returned.ll
 create mode 100644 llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
 create mode 100644 llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll

diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
new file mode 100644
index 0000000000000..25656d8c909aa
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
@@ -0,0 +1,445 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+;
+; Test cases specifically designed for the "no-capture" argument attribute.
+; We use FIXME's to indicate problems and missing attributes.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; TEST comparison against NULL
+;
+; int is_null_return(int *p) {
+;   return p == 0;
+; }
+;
+; FIXME: no-capture missing for %p
+; CHECK: define i32 @is_null_return(i32* readnone %p)
+define i32 @is_null_return(i32* %p) #0 {
+entry:
+  %cmp = icmp eq i32* %p, null
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+; TEST comparison against NULL in control flow
+;
+; int is_null_control(int *p) {
+;   if (p == 0)
+;     return 1;
+;   if (0 == p)
+;     return 1;
+;   return 0;
+; }
+;
+; FIXME: no-capture missing for %p
+; CHECK: define i32 @is_null_control(i32* readnone %p)
+define i32 @is_null_control(i32* %p) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %cmp = icmp eq i32* %p, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval, align 4
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %cmp1 = icmp eq i32* null, %p
+  br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:                                         ; preds = %if.end
+  store i32 1, i32* %retval, align 4
+  br label %return
+
+if.end3:                                          ; preds = %if.end
+  store i32 0, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %if.end3, %if.then2, %if.then
+  %0 = load i32, i32* %retval, align 4
+  ret i32 %0
+}
+
+; TEST singleton SCC
+;
+; double *srec0(double *a) {
+;   srec0(a);
+;   return 0;
+; }
+;
+; CHECK: define noalias double* @srec0(double* nocapture readnone %a)
+define double* @srec0(double* %a) #0 {
+entry:
+  %call = call double* @srec0(double* %a)
+  ret double* null
+}
+
+; TEST singleton SCC with lots of nested recursive calls
+;
+; int* srec16(int* a) {
+;   return srec16(srec16(srec16(srec16(
+;          srec16(srec16(srec16(srec16(
+;          srec16(srec16(srec16(srec16(
+;          srec16(srec16(srec16(srec16(
+;                        a
+;          ))))))))))))))));
+; }
+;
+; Other arguments are possible here due to the no-return behavior.
+;
+; FIXME: no-return missing
+; CHECK: define noalias nonnull i32* @srec16(i32* nocapture readnone %a)
+define i32* @srec16(i32* %a) #0 {
+entry:
+  %call = call i32* @srec16(i32* %a)
+  %call1 = call i32* @srec16(i32* %call)
+  %call2 = call i32* @srec16(i32* %call1)
+  %call3 = call i32* @srec16(i32* %call2)
+  %call4 = call i32* @srec16(i32* %call3)
+  %call5 = call i32* @srec16(i32* %call4)
+  %call6 = call i32* @srec16(i32* %call5)
+  %call7 = call i32* @srec16(i32* %call6)
+  %call8 = call i32* @srec16(i32* %call7)
+  %call9 = call i32* @srec16(i32* %call8)
+  %call10 = call i32* @srec16(i32* %call9)
+  %call11 = call i32* @srec16(i32* %call10)
+  %call12 = call i32* @srec16(i32* %call11)
+  %call13 = call i32* @srec16(i32* %call12)
+  %call14 = call i32* @srec16(i32* %call13)
+  %call15 = call i32* @srec16(i32* %call14)
+  ret i32* %call15
+}
+
+; TEST SCC with various calls, casts, and comparisons agains NULL
+;
+; FIXME: returned missing for %a
+; FIXME: no-capture missing for %a
+; CHECK: define float* @scc_A(i32* readnone %a)
+;
+; FIXME: returned missing for %a
+; FIXME: no-capture missing for %a
+; CHECK: define i64* @scc_B(double* readnone %a)
+;
+; FIXME: returned missing for %a
+; FIXME: readnone missing for %s
+; FIXME: no-capture missing for %a
+; CHECK: define i8* @scc_C(i16* %a)
+;
+; float *scc_A(int *a) {
+;   return (float*)(a ? (int*)scc_A((int*)scc_B((double*)scc_C((short*)a))) : a);
+; }
+;
+; long *scc_B(double *a) {
+;   return (long*)(a ? scc_C((short*)scc_B((double*)scc_A((int*)a))) : a);
+; }
+;
+; void *scc_C(short *a) {
+;   return scc_A((int*)(scc_C(a) ? scc_B((double*)a) : scc_C(a)));
+; }
+define float* @scc_A(i32* %a) {
+entry:
+  %tobool = icmp ne i32* %a, null
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %0 = bitcast i32* %a to i16*
+  %call = call i8* @scc_C(i16* %0)
+  %1 = bitcast i8* %call to double*
+  %call1 = call i64* @scc_B(double* %1)
+  %2 = bitcast i64* %call1 to i32*
+  %call2 = call float* @scc_A(i32* %2)
+  %3 = bitcast float* %call2 to i32*
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32* [ %3, %cond.true ], [ %a, %cond.false ]
+  %4 = bitcast i32* %cond to float*
+  ret float* %4
+}
+
+define i64* @scc_B(double* %a) {
+entry:
+  %tobool = icmp ne double* %a, null
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %0 = bitcast double* %a to i32*
+  %call = call float* @scc_A(i32* %0)
+  %1 = bitcast float* %call to double*
+  %call1 = call i64* @scc_B(double* %1)
+  %2 = bitcast i64* %call1 to i16*
+  %call2 = call i8* @scc_C(i16* %2)
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %3 = bitcast double* %a to i8*
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call2, %cond.true ], [ %3, %cond.false ]
+  %4 = bitcast i8* %cond to i64*
+  ret i64* %4
+}
+
+define i8* @scc_C(i16* %a) {
+entry:
+  %call = call i8* @scc_C(i16* %a)
+  %tobool = icmp ne i8* %call, null
+  br i1 %tobool, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %0 = bitcast i16* %a to double*
+  %call1 = call i64* @scc_B(double* %0)
+  %1 = bitcast i64* %call1 to i8*
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %call2 = call i8* @scc_C(i16* %a)
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %1, %cond.true ], [ %call2, %cond.false ]
+  %2 = bitcast i8* %cond to i32*
+  %call3 = call float* @scc_A(i32* %2)
+  %3 = bitcast float* %call3 to i8*
+  ret i8* %3
+}
+
+
+; TEST call to external function, marked no-capture
+;
+; void external_no_capture(int /* no-capture */ *p);
+; void test_external_no_capture(int *p) {
+;   external_no_capture(p);
+; }
+;
+; CHECK: define void @test_external_no_capture(i32* nocapture %p)
+declare void @external_no_capture(i32* nocapture)
+
+define void @test_external_no_capture(i32* %p) #0 {
+entry:
+  call void @external_no_capture(i32* %p)
+  ret void
+}
+
+; TEST call to external var-args function, marked no-capture
+;
+; void test_var_arg_call(char *p, int a) {
+;   printf(p, a);
+; }
+;
+; CHECK: define void @test_var_arg_call(i8* nocapture %p, i32 %a)
+define void @test_var_arg_call(i8* %p, i32 %a) #0 {
+entry:
+  %call = call i32 (i8*, ...) @printf(i8* %p, i32 %a)
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
+
+; TEST "captured" only through return
+;
+; long *not_captured_but_returned_0(long *a) {
+;   *a1 = 0;
+;   return a;
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define i64* @not_captured_but_returned_0(i64* returned %a)
+define i64* @not_captured_but_returned_0(i64* %a) #0 {
+entry:
+  store i64 0, i64* %a, align 8
+  ret i64* %a
+}
+
+; TEST "captured" only through return
+;
+; long *not_captured_but_returned_1(long *a) {
+;   *(a+1) = 1;
+;   return a + 1;
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define nonnull i64* @not_captured_but_returned_1(i64* %a)
+define i64* @not_captured_but_returned_1(i64* %a) #0 {
+entry:
+  %add.ptr = getelementptr inbounds i64, i64* %a, i64 1
+  store i64 1, i64* %add.ptr, align 8
+  ret i64* %add.ptr
+}
+
+; TEST calls to "captured" only through return functions
+;
+; void test_not_captured_but_returned_calls(long *a) {
+;   not_captured_but_returned_0(a);
+;   not_captured_but_returned_1(a);
+; }
+;
+; FIXME: no-capture missing for %a
+; CHECK: define void @test_not_captured_but_returned_calls(i64* %a)
+define void @test_not_captured_but_returned_calls(i64* %a) #0 {
+entry:
+  %call = call i64* @not_captured_but_returned_0(i64* %a)
+  %call1 = call i64* @not_captured_but_returned_1(i64* %a)
+  ret void
+}
+
+; TEST "captured" only through transitive return
+;
+; long* negative_test_not_captured_but_returned_call_0a(long *a) {
+;   return not_captured_but_returned_0(a);
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define i64* @negative_test_not_captured_but_returned_call_0a(i64* returned %a)
+define i64* @negative_test_not_captured_but_returned_call_0a(i64* %a) #0 {
+entry:
+  %call = call i64* @not_captured_but_returned_0(i64* %a)
+  ret i64* %call
+}
+
+; TEST captured through write
+;
+; void negative_test_not_captured_but_returned_call_0b(long *a) {
+;   *a = (long)not_captured_but_returned_0(a);
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define void @negative_test_not_captured_but_returned_call_0b(i64* %a)
+define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 {
+entry:
+  %call = call i64* @not_captured_but_returned_0(i64* %a)
+  %0 = ptrtoint i64* %call to i64
+  store i64 %0, i64* %a, align 8
+  ret void
+}
+
+; TEST "captured" only through transitive return
+;
+; long* negative_test_not_captured_but_returned_call_1a(long *a) {
+;   return not_captured_but_returned_1(a);
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define nonnull i64* @negative_test_not_captured_but_returned_call_1a(i64* %a)
+define i64* @negative_test_not_captured_but_returned_call_1a(i64* %a) #0 {
+entry:
+  %call = call i64* @not_captured_but_returned_1(i64* %a)
+  ret i64* %call
+}
+
+; TEST captured through write
+;
+; void negative_test_not_captured_but_returned_call_1b(long *a) {
+;   *a = (long)not_captured_but_returned_1(a);
+; }
+;
+; There should *not* be a no-capture attribute on %a
+; CHECK: define void @negative_test_not_captured_but_returned_call_1b(i64* %a)
+define void @negative_test_not_captured_but_returned_call_1b(i64* %a) #0 {
+entry:
+  %call = call i64* @not_captured_but_returned_1(i64* %a)
+  %0 = ptrtoint i64* %call to i64
+  store i64 %0, i64* %call, align 8
+  ret void
+}
+
+; TEST return argument or unknown call result
+;
+; int* ret_arg_or_unknown(int* b) {
+;   if (b == 0)
+;     return b;
+;   return unknown();
+; }
+;
+; Verify we do *not* assume b is returned or not captured.
+;
+; CHECK:     define i32* @ret_arg_or_unknown(i32* readnone %b)
+; CHECK:     define i32* @ret_arg_or_unknown_through_phi(i32* readnone %b)
+declare i32* @unknown()
+
+define i32* @ret_arg_or_unknown(i32* %b) #0 {
+entry:
+  %cmp = icmp eq i32* %b, null
+  br i1 %cmp, label %ret_arg, label %ret_unknown
+
+ret_arg:
+  ret i32* %b
+
+ret_unknown:
+  %call = call i32* @unknown()
+  ret i32* %call
+}
+
+define i32* @ret_arg_or_unknown_through_phi(i32* %b) #0 {
+entry:
+  %cmp = icmp eq i32* %b, null
+  br i1 %cmp, label %ret_arg, label %ret_unknown
+
+ret_arg:
+  br label %r
+
+ret_unknown:
+  %call = call i32* @unknown()
+  br label %r
+
+r:
+  %phi = phi i32* [ %b, %ret_arg ], [ %call, %ret_unknown ]
+  ret i32* %phi
+}
+
+
+; TEST not captured by readonly external function
+;
+; CHECK: define void @not_captured_by_readonly_call(i32* nocapture %b)
+declare i32* @readonly_unknown(i32*, i32*) readonly
+
+define void @not_captured_by_readonly_call(i32* %b) #0 {
+entry:
+  %call = call i32* @readonly_unknown(i32* %b, i32* %b)
+  ret void
+}
+
+
+; TEST not captured by readonly external function if return chain is known
+;
+; Make sure the returned flag on %r is strong enough to justify nocapture on %b but **not** on %r.
+;
+; FIXME: The "returned" information is not propagated to the fullest extend causing us to miss "nocapture" on %b in the following:
+; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either1(i32* readonly %b, i32* readonly returned %r)
+;
+; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either2(i32* readonly %b, i32* readonly returned %r)
+; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either3(i32* readonly %b, i32* readonly returned %r)
+;
+; FIXME: The "nounwind" information is not derived to the fullest extend causing us to miss "nocapture" on %b in the following:
+; CHECK: define i32* @not_captured_by_readonly_call_not_returned_either4(i32* readonly %b, i32* readonly returned %r)
+define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* returned %r) #0 {
+entry:
+  %call = call i32* @readonly_unknown(i32* %b, i32* %r) nounwind
+  ret i32* %call
+}
+
+declare i32* @readonly_unknown_r1a(i32*, i32* returned) readonly
+define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r) #0 {
+entry:
+  %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r) nounwind
+  ret i32* %call
+}
+
+declare i32* @readonly_unknown_r1b(i32*, i32* returned) readonly nounwind
+define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r) #0 {
+entry:
+  %call = call i32* @readonly_unknown_r1b(i32* %b, i32* %r)
+  ret i32* %call
+}
+
+define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r) #0 {
+entry:
+  %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r)
+  ret i32* %call
+}
+
+attributes #0 = { noinline nounwind uwtable }
diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
new file mode 100644
index 0000000000000..2430fda5cbe7b
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -0,0 +1,589 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+;
+; Test cases specifically designed for the "returned" argument attribute.
+; We use FIXME's to indicate problems and missing attributes.
+;
+
+; TEST SCC test returning an integer value argument
+;
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define i32 @sink_r0(i32 returned %r)
+;
+; FIXME: returned on %r missing:
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define i32 @scc_r1(i32 %a, i32 %r, i32 %b)
+;
+; FIXME: returned on %r missing:
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+;
+; int scc_r1(int a, int b, int r);
+; int scc_r2(int a, int b, int r);
+;
+; __attribute__((noinline)) int sink_r0(int r) {
+;   return r;
+; }
+;
+; __attribute__((noinline)) int scc_r1(int a, int r, int b) {
+;   return scc_r2(r, a, sink_r0(r));
+; }
+;
+; __attribute__((noinline)) int scc_r2(int a, int b, int r) {
+;   if (a > b)
+;     return scc_r2(b, a, sink_r0(r));
+;   if (a < b)
+;     return scc_r1(sink_r0(b), scc_r2(scc_r1(a, b, r), scc_r1(a, scc_r2(r, r, r), r), scc_r2(a, b, r)), scc_r1(a, b, r));
+;   return a == b ? r : scc_r2(a, b, r);
+; }
+; __attribute__((noinline)) int scc_rX(int a, int b, int r) {
+;   if (a > b)
+;     return scc_r2(b, a, sink_r0(r));
+;   if (a < b)                                                                         // V Diff to scc_r2
+;     return scc_r1(sink_r0(b), scc_r2(scc_r1(a, b, r), scc_r1(a, scc_r2(r, r, r), r), scc_r1(a, b, r)), scc_r1(a, b, r));
+;   return a == b ? r : scc_r2(a, b, r);
+; }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @sink_r0(i32 %r) #0 {
+entry:
+  ret i32 %r
+}
+
+define i32 @scc_r1(i32 %a, i32 %r, i32 %b) #0 {
+entry:
+  %call = call i32 @sink_r0(i32 %r)
+  %call1 = call i32 @scc_r2(i32 %r, i32 %a, i32 %call)
+  ret i32 %call1
+}
+
+define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @sink_r0(i32 %r)
+  %call1 = call i32 @scc_r2(i32 %b, i32 %a, i32 %call)
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %cmp2 = icmp slt i32 %a, %b
+  br i1 %cmp2, label %if.then3, label %if.end12
+
+if.then3:                                         ; preds = %if.end
+  %call4 = call i32 @sink_r0(i32 %b)
+  %call5 = call i32 @scc_r1(i32 %a, i32 %b, i32 %r)
+  %call6 = call i32 @scc_r2(i32 %r, i32 %r, i32 %r)
+  %call7 = call i32 @scc_r1(i32 %a, i32 %call6, i32 %r)
+  %call8 = call i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+  %call9 = call i32 @scc_r2(i32 %call5, i32 %call7, i32 %call8)
+  %call10 = call i32 @scc_r1(i32 %a, i32 %b, i32 %r)
+  %call11 = call i32 @scc_r1(i32 %call4, i32 %call9, i32 %call10)
+  br label %return
+
+if.end12:                                         ; preds = %if.end
+  %cmp13 = icmp eq i32 %a, %b
+  br i1 %cmp13, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %if.end12
+  br label %cond.end
+
+cond.false:                                       ; preds = %if.end12
+  %call14 = call i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %r, %cond.true ], [ %call14, %cond.false ]
+  br label %return
+
+return:                                           ; preds = %cond.end, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ %call11, %if.then3 ], [ %cond, %cond.end ]
+  ret i32 %retval.0
+}
+
+define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @sink_r0(i32 %r)
+  %call1 = call i32 @scc_r2(i32 %b, i32 %a, i32 %call)
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %cmp2 = icmp slt i32 %a, %b
+  br i1 %cmp2, label %if.then3, label %if.end12
+
+if.then3:                                         ; preds = %if.end
+  %call4 = call i32 @sink_r0(i32 %b)
+  %call5 = call i32 @scc_r1(i32 %a, i32 %b, i32 %r)
+  %call6 = call i32 @scc_r2(i32 %r, i32 %r, i32 %r)
+  %call7 = call i32 @scc_r1(i32 %a, i32 %call6, i32 %r)
+  %call8 = call i32 @scc_r1(i32 %a, i32 %b, i32 %r)
+  %call9 = call i32 @scc_r2(i32 %call5, i32 %call7, i32 %call8)
+  %call10 = call i32 @scc_r1(i32 %a, i32 %b, i32 %r)
+  %call11 = call i32 @scc_r1(i32 %call4, i32 %call9, i32 %call10)
+  br label %return
+
+if.end12:                                         ; preds = %if.end
+  %cmp13 = icmp eq i32 %a, %b
+  br i1 %cmp13, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %if.end12
+  br label %cond.end
+
+cond.false:                                       ; preds = %if.end12
+  %call14 = call i32 @scc_r2(i32 %a, i32 %b, i32 %r)
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %r, %cond.true ], [ %call14, %cond.false ]
+  br label %return
+
+return:                                           ; preds = %cond.end, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call1, %if.then ], [ %call11, %if.then3 ], [ %cond, %cond.end ]
+  ret i32 %retval.0
+}
+
+
+; TEST SCC test returning a pointer value argument
+;
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define double* @ptr_sink_r0(double* readnone returned %r)
+;
+; FIXME: returned on %r missing:
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define double* @ptr_scc_r1(double* %a, double* readnone %r, double* nocapture readnone %b)
+;
+; FIXME: returned on %r missing:
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define double* @ptr_scc_r2(double* readnone %a, double* readnone %b, double* readnone %r)
+;
+; double* ptr_scc_r1(double* a, double* b, double* r);
+; double* ptr_scc_r2(double* a, double* b, double* r);
+;
+; __attribute__((noinline)) double* ptr_sink_r0(double* r) {
+;   return r;
+; }
+;
+; __attribute__((noinline)) double* ptr_scc_r1(double* a, double* r, double* b) {
+;   return ptr_scc_r2(r, a, ptr_sink_r0(r));
+; }
+;
+; __attribute__((noinline)) double* ptr_scc_r2(double* a, double* b, double* r) {
+;   if (a > b)
+;     return ptr_scc_r2(b, a, ptr_sink_r0(r));
+;   if (a < b)
+;     return ptr_scc_r1(ptr_sink_r0(b), ptr_scc_r2(ptr_scc_r1(a, b, r), ptr_scc_r1(a, ptr_scc_r2(r, r, r), r), ptr_scc_r2(a, b, r)), ptr_scc_r1(a, b, r));
+;   return a == b ? r : ptr_scc_r2(a, b, r);
+; }
+define double* @ptr_sink_r0(double* %r) #0 {
+entry:
+  ret double* %r
+}
+
+define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 {
+entry:
+  %call = call double* @ptr_sink_r0(double* %r)
+  %call1 = call double* @ptr_scc_r2(double* %r, double* %a, double* %call)
+  ret double* %call1
+}
+
+define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 {
+entry:
+  %cmp = icmp ugt double* %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call double* @ptr_sink_r0(double* %r)
+  %call1 = call double* @ptr_scc_r2(double* %b, double* %a, double* %call)
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %cmp2 = icmp ult double* %a, %b
+  br i1 %cmp2, label %if.then3, label %if.end12
+
+if.then3:                                         ; preds = %if.end
+  %call4 = call double* @ptr_sink_r0(double* %b)
+  %call5 = call double* @ptr_scc_r1(double* %a, double* %b, double* %r)
+  %call6 = call double* @ptr_scc_r2(double* %r, double* %r, double* %r)
+  %call7 = call double* @ptr_scc_r1(double* %a, double* %call6, double* %r)
+  %call8 = call double* @ptr_scc_r2(double* %a, double* %b, double* %r)
+  %call9 = call double* @ptr_scc_r2(double* %call5, double* %call7, double* %call8)
+  %call10 = call double* @ptr_scc_r1(double* %a, double* %b, double* %r)
+  %call11 = call double* @ptr_scc_r1(double* %call4, double* %call9, double* %call10)
+  br label %return
+
+if.end12:                                         ; preds = %if.end
+  %cmp13 = icmp eq double* %a, %b
+  br i1 %cmp13, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %if.end12
+  br label %cond.end
+
+cond.false:                                       ; preds = %if.end12
+  %call14 = call double* @ptr_scc_r2(double* %a, double* %b, double* %r)
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi double* [ %r, %cond.true ], [ %call14, %cond.false ]
+  br label %return
+
+return:                                           ; preds = %cond.end, %if.then3, %if.then
+  %retval.0 = phi double* [ %call1, %if.then ], [ %call11, %if.then3 ], [ %cond, %cond.end ]
+  ret double* %retval.0
+}
+
+
+; TEST a singleton SCC with a lot of recursive calls
+;
+; int* ret0(int *a) {
+;   return *a ? a : ret0(ret0(ret0(...ret0(a)...)));
+; }
+;
+; FIXME: returned on %a missing:
+; CHECK: Function Attrs: noinline nounwind readonly uwtable
+; CHECK: define i32* @ret0(i32* readonly %a)
+define i32* @ret0(i32* %a) #0 {
+entry:
+  %v = load i32, i32* %a, align 4
+  %tobool = icmp ne i32 %v, 0
+  %call = call i32* @ret0(i32* %a)
+  %call1 = call i32* @ret0(i32* %call)
+  %call2 = call i32* @ret0(i32* %call1)
+  %call3 = call i32* @ret0(i32* %call2)
+  %call4 = call i32* @ret0(i32* %call3)
+  %call5 = call i32* @ret0(i32* %call4)
+  %call6 = call i32* @ret0(i32* %call5)
+  %call7 = call i32* @ret0(i32* %call6)
+  %call8 = call i32* @ret0(i32* %call7)
+  %call9 = call i32* @ret0(i32* %call8)
+  %call10 = call i32* @ret0(i32* %call9)
+  %call11 = call i32* @ret0(i32* %call10)
+  %call12 = call i32* @ret0(i32* %call11)
+  %call13 = call i32* @ret0(i32* %call12)
+  %call14 = call i32* @ret0(i32* %call13)
+  %call15 = call i32* @ret0(i32* %call14)
+  %call16 = call i32* @ret0(i32* %call15)
+  %call17 = call i32* @ret0(i32* %call16)
+  %sel = select i1 %tobool, i32* %a, i32* %call17
+  ret i32* %sel
+}
+
+
+; TEST address taken function with call to an external functions
+;
+;  void unknown_fn(void *);
+;
+;  int* calls_unknown_fn(int *r) {
+;    unknown_fn(&calls_unknown_fn);
+;    return r;
+;  }
+;
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK: declare void @unknown_fn(i32* (i32*)*)
+;
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK: define i32* @calls_unknown_fn(i32* readnone returned %r)
+declare void @unknown_fn(i32* (i32*)*) #0
+
+define i32* @calls_unknown_fn(i32* %r) #0 {
+  tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn)
+  ret i32* %r
+}
+
+
+; TEST call to a function that might be redifined at link time
+;
+;  int *maybe_redefined_fn(int *r) {
+;    return r;
+;  }
+;
+;  int *calls_maybe_redefined_fn(int *r) {
+;    maybe_redefined_fn(r);
+;    return r;
+;  }
+;
+; Verify the maybe-redefined function is not annotated:
+;
+; CHECK: Function Attrs: noinline norecurse nounwind uwtable
+; CHECK: define linkonce_odr i32* @maybe_redefined_fn(i32* %r)
+; FIXME: We should not derive norecurse for potentially redefined functions!
+;        Function Attrs: noinline nounwind uwtable
+;        define linkonce_odr i32* @maybe_redefined_fn(i32* %r)
+;
+; CHECK: Function Attrs: noinline norecurse nounwind uwtable
+; CHECK: define i32* @calls_maybe_redefined_fn(i32* returned %r)
+; FIXME: We should not derive norecurse for potentially redefined functions!
+;        Function Attrs: noinline nounwind uwtable
+;        define i32* @calls_maybe_redefined_fn(i32* returned %r)
+define linkonce_odr i32* @maybe_redefined_fn(i32* %r) #0 {
+entry:
+  ret i32* %r
+}
+
+define i32* @calls_maybe_redefined_fn(i32* %r) #0 {
+entry:
+  %call = call i32* @maybe_redefined_fn(i32* %r)
+  ret i32* %r
+}
+
+
+; TEST returned argument goes through select and phi
+;
+; double select_and_phi(double b) {
+;   double x = b;
+;   if (b > 0)
+;     x = b;
+;   return b == 0? b : x;
+; }
+;
+; FIXME: returned on %b missing:
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define double @select_and_phi(double %b)
+define double @select_and_phi(double %b) #0 {
+entry:
+  %cmp = fcmp ogt double %b, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi double [ %b, %if.then ], [ %b, %entry ]
+  %cmp1 = fcmp oeq double %b, 0.000000e+00
+  %sel = select i1 %cmp1, double %b, double %phi
+  ret double %sel
+}
+
+
+; TEST returned argument goes through recursion, select, and phi
+;
+; double recursion_select_and_phi(int a, double b) {
+;   double x = b;
+;   if (a-- > 0)
+;     x = recursion_select_and_phi(a, b);
+;   return b == 0? b : x;
+; }
+;
+; FIXME: returned on %b missing:
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define double @recursion_select_and_phi(i32 %a, double %b)
+define double @recursion_select_and_phi(i32 %a, double %b) #0 {
+entry:
+  %dec = add nsw i32 %a, -1
+  %cmp = icmp sgt i32 %a, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call double @recursion_select_and_phi(i32 %dec, double %b)
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi double [ %call, %if.then ], [ %b, %entry ]
+  %cmp1 = fcmp oeq double %b, 0.000000e+00
+  %sel = select i1 %cmp1, double %b, double %phi
+  ret double %sel
+}
+
+
+; TEST returned argument goes through bitcasts
+;
+; double* bitcast(int* b) {
+;   return (double*)b;
+; }
+;
+; FIXME: returned on %b missing:
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define double* @bitcast(i32* readnone %b)
+define double* @bitcast(i32* %b) #0 {
+entry:
+  %bc0 = bitcast i32* %b to double*
+  ret double* %bc0
+}
+
+
+; TEST returned argument goes through select and phi interleaved with bitcasts
+;
+; double* bitcasts_select_and_phi(int* b) {
+;   double* x = b;
+;   if (b == 0)
+;     x = b;
+;   return b != 0 ? b : x;
+; }
+;
+; FIXME: returned on %b missing:
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define double* @bitcasts_select_and_phi(i32* readnone %b)
+define double* @bitcasts_select_and_phi(i32* %b) #0 {
+entry:
+  %bc0 = bitcast i32* %b to double*
+  %cmp = icmp eq double* %bc0, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %bc1 = bitcast i32* %b to double*
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi double* [ %bc1, %if.then ], [ %bc0, %entry ]
+  %bc2 = bitcast double* %phi to i8*
+  %bc3 = bitcast i32* %b to i8*
+  %cmp2 = icmp ne double* %bc0, null
+  %sel = select i1 %cmp2, i8* %bc2, i8* %bc3
+  %bc4 = bitcast i8* %sel to double*
+  ret double* %bc4
+}
+
+
+; TEST return argument or argument or undef
+;
+; double* ret_arg_arg_undef(int* b) {
+;   if (b == 0)
+;     return (double*)b;
+;   if (b == 0)
+;     return (double*)b;
+;   /* return undef */
+; }
+;
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK:     define double* @ret_arg_arg_undef(i32* readnone %b)
+define double* @ret_arg_arg_undef(i32* %b) #0 {
+entry:
+  %bc0 = bitcast i32* %b to double*
+  %cmp = icmp eq double* %bc0, null
+  br i1 %cmp, label %ret_arg0, label %if.end
+
+ret_arg0:
+  %bc1 = bitcast i32* %b to double*
+  ret double* %bc1
+
+if.end:
+  br i1 %cmp, label %ret_arg1, label %ret_undef
+
+ret_arg1:
+  ret double* %bc0
+
+ret_undef:
+  ret double *undef
+}
+
+
+; TEST return undef or argument or argument
+;
+; double* ret_undef_arg_arg(int* b) {
+;   if (b == 0)
+;     return (double*)b;
+;   if (b == 0)
+;     return (double*)b;
+;   /* return undef */
+; }
+;
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK:     define double* @ret_undef_arg_arg(i32* readnone %b)
+define double* @ret_undef_arg_arg(i32* %b) #0 {
+entry:
+  %bc0 = bitcast i32* %b to double*
+  %cmp = icmp eq double* %bc0, null
+  br i1 %cmp, label %ret_undef, label %if.end
+
+ret_undef:
+  ret double *undef
+
+if.end:
+  br i1 %cmp, label %ret_arg0, label %ret_arg1
+
+ret_arg0:
+  ret double* %bc0
+
+ret_arg1:
+  %bc1 = bitcast i32* %b to double*
+  ret double* %bc1
+}
+
+
+; TEST return undef or argument or undef
+;
+; double* ret_undef_arg_undef(int* b) {
+;   if (b == 0)
+;     /* return undef */
+;   if (b == 0)
+;     return (double*)b;
+;   /* return undef */
+; }
+;
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK:     define double* @ret_undef_arg_undef(i32* readnone %b)
+define double* @ret_undef_arg_undef(i32* %b) #0 {
+entry:
+  %bc0 = bitcast i32* %b to double*
+  %cmp = icmp eq double* %bc0, null
+  br i1 %cmp, label %ret_undef0, label %if.end
+
+ret_undef0:
+  ret double *undef
+
+if.end:
+  br i1 %cmp, label %ret_arg, label %ret_undef1
+
+ret_arg:
+  ret double* %bc0
+
+ret_undef1:
+  ret double *undef
+}
+
+; TEST return argument or unknown call result
+;
+; int* ret_arg_or_unknown(int* b) {
+;   if (b == 0)
+;     return b;
+;   return unknown(b);
+; }
+;
+; Verify we do not assume b is returned>
+;
+; CHECK:     define i32* @ret_arg_or_unknown(i32* %b)
+; CHECK:     define i32* @ret_arg_or_unknown_through_phi(i32* %b)
+declare i32* @unknown(i32*)
+
+define i32* @ret_arg_or_unknown(i32* %b) #0 {
+entry:
+  %cmp = icmp eq i32* %b, null
+  br i1 %cmp, label %ret_arg, label %ret_unknown
+
+ret_arg:
+  ret i32* %b
+
+ret_unknown:
+  %call = call i32* @unknown(i32* %b)
+  ret i32* %call
+}
+
+define i32* @ret_arg_or_unknown_through_phi(i32* %b) #0 {
+entry:
+  %cmp = icmp eq i32* %b, null
+  br i1 %cmp, label %ret_arg, label %ret_unknown
+
+ret_arg:
+  br label %r
+
+ret_unknown:
+  %call = call i32* @unknown(i32* %b)
+  br label %r
+
+r:
+  %phi = phi i32* [ %b, %ret_arg ], [ %call, %ret_unknown ]
+  ret i32* %phi
+}
+
+attributes #0 = { noinline nounwind uwtable }
+
+; CHECK-NOT: attributes #
+; CHECK-DAG: attributes #{{[0-9]*}} = { noinline norecurse nounwind readnone uwtable }
+; CHECK-DAG: attributes #{{[0-9]*}} = { noinline nounwind readnone uwtable }
+; CHECK-DAG: attributes #{{[0-9]*}} = { noinline nounwind readonly uwtable }
+; CHECK-DAG: attributes #{{[0-9]*}} = { noinline nounwind uwtable }
+; CHECK-DAG: attributes #{{[0-9]*}} = { noinline norecurse nounwind uwtable }
+; CHECK-NOT: attributes #
diff --git a/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll b/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
new file mode 100644
index 0000000000000..d691eb04860c5
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
@@ -0,0 +1,134 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+;
+; Test cases specifically designed for the "no-return" function attribute.
+; We use FIXME's to indicate problems and missing attributes.
+;
+; TEST 1: singleton SCC void return type
+; TEST 2: singleton SCC int return type with a lot of recursive calls
+; TEST 3: endless loop, no return instruction
+; TEST 4: endless loop, dead return instruction
+; TEST 5: all paths contain a no-return function call
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+
+; TEST 1
+;
+; void srec0() {
+;   return srec0();
+; }
+;
+; FIXME: no-return missing
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define void @srec0()
+;
+define void @srec0() #0 {
+entry:
+  call void @srec0()
+  ret void
+}
+
+
+; TEST 2
+;
+; int srec16(int a) {
+;   return srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(srec16(a))))))))))))))));
+; }
+;
+; FIXME: no-return missing
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define i32 @srec16(i32 %a)
+;
+define i32 @srec16(i32 %a) #0 {
+entry:
+  %call = call i32 @srec16(i32 %a)
+  %call1 = call i32 @srec16(i32 %call)
+  %call2 = call i32 @srec16(i32 %call1)
+  %call3 = call i32 @srec16(i32 %call2)
+  %call4 = call i32 @srec16(i32 %call3)
+  %call5 = call i32 @srec16(i32 %call4)
+  %call6 = call i32 @srec16(i32 %call5)
+  %call7 = call i32 @srec16(i32 %call6)
+  %call8 = call i32 @srec16(i32 %call7)
+  %call9 = call i32 @srec16(i32 %call8)
+  %call10 = call i32 @srec16(i32 %call9)
+  %call11 = call i32 @srec16(i32 %call10)
+  %call12 = call i32 @srec16(i32 %call11)
+  %call13 = call i32 @srec16(i32 %call12)
+  %call14 = call i32 @srec16(i32 %call13)
+  %call15 = call i32 @srec16(i32 %call14)
+  ret i32 %call15
+}
+
+
+; TEST 3
+;
+; int endless_loop(int a) {
+;   while (1);
+; }
+;
+; FIXME: no-return missing
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define i32 @endless_loop(i32 %a)
+;
+define i32 @endless_loop(i32 %a) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  br label %while.body
+}
+
+
+; TEST 4
+;
+; int endless_loop(int a) {
+;   while (1);
+;   return a;
+; }
+;
+; FIXME: no-return missing
+; CHECK: Function Attrs: noinline norecurse nounwind readnone uwtable
+; CHECK: define i32 @dead_return(i32 returned %a)
+;
+define i32 @dead_return(i32 %a) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  br label %while.body
+
+return:                                           ; No predecessors!
+  ret i32 %a
+}
+
+
+; TEST 5
+;
+; int multiple_noreturn_calls(int a) {
+;   return a == 0 ? endless_loop(a) : srec16(a);
+; }
+;
+; FIXME: no-return missing
+; CHECK: Function Attrs: noinline nounwind readnone uwtable
+; CHECK: define i32 @multiple_noreturn_calls(i32 %a)
+;
+define i32 @multiple_noreturn_calls(i32 %a) #0 {
+entry:
+  %cmp = icmp eq i32 %a, 0
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %call = call i32 @endless_loop(i32 %a)
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %call1 = call i32 @srec16(i32 %a)
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %call, %cond.true ], [ %call1, %cond.false ]
+  ret i32 %cond
+}
+
+attributes #0 = { noinline nounwind uwtable }
diff --git a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
new file mode 100644
index 0000000000000..8fea3f9656327
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -0,0 +1,164 @@
+; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s
+;
+; This is an evolved example to stress test SCC parameter attribute propagation.
+; The SCC in this test is made up of the following six function, three of which
+; are internal and three externally visible:
+;
+; static int *internal_ret0_nw(int *n0, int *w0);
+; static int *internal_ret1_rw(int *r0, int *w0);
+; static int *internal_ret1_rrw(int *r0, int *r1, int *w0);
+;        int *external_ret2_nrw(int *n0, int *r0, int *w0);
+;        int *external_sink_ret2_nrw(int *n0, int *r0, int *w0);
+;        int *external_source_ret2_nrw(int *n0, int *r0, int *w0);
+;
+; The top four functions call each other while the "sink" function will not
+; call anything and the "source" function will not be called in this module.
+; The names of the functions define the returned parameter (X for "_retX_"),
+; as well as how the parameters are (transitively) used (n = readnone,
+; r = readonly, w = writeonly).
+;
+; What we should see is something along the lines of:
+;   1 - Number of functions marked as norecurse
+;   6 - Number of functions marked argmemonly
+;   6 - Number of functions marked as nounwind
+;  16 - Number of arguments marked nocapture
+;   4 - Number of arguments marked readnone
+;   6 - Number of arguments marked writeonly
+;   6 - Number of arguments marked readonly
+;   6 - Number of arguments marked returned
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
+entry:
+  %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+  %call1 = call i32* @internal_ret1_rrw(i32* %r0, i32* %r0, i32* %w0)
+  %call2 = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+  %call3 = call i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+  ret i32* %call3
+}
+
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
+entry:
+  %r0 = alloca i32, align 4
+  %r1 = alloca i32, align 4
+  %tobool = icmp ne i32* %n0, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 3, i32* %r0, align 4
+  store i32 5, i32* %r1, align 4
+  store i32 1, i32* %w0, align 4
+  %call = call i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+  %call1 = call i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+  %call2 = call i32* @external_ret2_nrw(i32* %n0, i32* %r1, i32* %w0)
+  %call3 = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+  %call4 = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r1, i32* %w0)
+  %call5 = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32* [ %call5, %if.end ], [ %n0, %if.then ]
+  ret i32* %retval.0
+}
+
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0)
+define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
+entry:
+  %0 = load i32, i32* %r0, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %call = call i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+  %1 = load i32, i32* %r0, align 4
+  %2 = load i32, i32* %r1, align 4
+  %add = add nsw i32 %1, %2
+  store i32 %add, i32* %w0, align 4
+  %call1 = call i32* @internal_ret1_rw(i32* %r1, i32* %w0)
+  %call2 = call i32* @internal_ret0_nw(i32* %r0, i32* %w0)
+  %call3 = call i32* @internal_ret0_nw(i32* %w0, i32* %w0)
+  %call4 = call i32* @external_ret2_nrw(i32* %r0, i32* %r1, i32* %w0)
+  %call5 = call i32* @external_ret2_nrw(i32* %r1, i32* %r0, i32* %w0)
+  %call6 = call i32* @external_sink_ret2_nrw(i32* %r0, i32* %r1, i32* %w0)
+  %call7 = call i32* @external_sink_ret2_nrw(i32* %r1, i32* %r0, i32* %w0)
+  %call8 = call i32* @internal_ret0_nw(i32* %r1, i32* %w0)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32* [ %call8, %if.end ], [ %r1, %if.then ]
+  ret i32* %retval.0
+}
+
+; CHECK: Function Attrs: norecurse nounwind
+; CHECK-NEXT: define i32* @external_sink_ret2_nrw(i32* readnone %n0, i32* nocapture readonly %r0, i32* returned %w0)
+define i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
+entry:
+  %tobool = icmp ne i32* %n0, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, i32* %r0, align 4
+  store i32 %0, i32* %w0, align 4
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  ret i32* %w0
+}
+
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0)
+define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
+entry:
+  %0 = load i32, i32* %r0, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %call = call i32* @internal_ret1_rrw(i32* %r0, i32* %r0, i32* %w0)
+  %1 = load i32, i32* %r0, align 4
+  store i32 %1, i32* %w0, align 4
+  %call1 = call i32* @internal_ret0_nw(i32* %r0, i32* %w0)
+  %call2 = call i32* @internal_ret0_nw(i32* %w0, i32* %w0)
+  %call3 = call i32* @external_sink_ret2_nrw(i32* %r0, i32* %r0, i32* %w0)
+  %call4 = call i32* @external_ret2_nrw(i32* %r0, i32* %r0, i32* %w0)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32* [ %call4, %if.end ], [ %w0, %if.then ]
+  ret i32* %retval.0
+}
+
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
+entry:
+  %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+  %call1 = call i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)
+  ret i32* %call1
+}
+
+; Verify that we see only expected attribute sets, the above lines only check
+; for a subset relation.
+;
+; CHECK-NOT: attributes #
+; CHECK: attributes #{{.*}} = { nounwind }
+; CHECK: attributes #{{.*}} = { norecurse nounwind }
+; CHECK-NOT: attributes #
diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
index 3728a71797243..0521986bd1bb8 100644
--- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -1,10 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s
 @x = global i32 0
 
 declare void @test1_1(i8* %x1_1, i8* readonly %y1_1, ...)
 
+; NOTE: readonly for %y1_2 would be OK here but not for the similar situation in test13.
+;
 ; CHECK: define void @test1_2(i8* %x1_2, i8* readonly %y1_2, i8* %z1_2)
 define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) {
   call void (i8*, i8*, ...) @test1_1(i8* %x1_2, i8* %y1_2, i8* %z1_2)
@@ -113,3 +114,31 @@ define i32 @volatile_load(i32* %p) {
   %load = load volatile i32, i32* %p
   ret i32 %load
 }
+
+declare void @escape_readonly_ptr(i8** %addr, i8* readnone %ptr)
+declare void @escape_readnone_ptr(i8** %addr, i8* readonly %ptr)
+
+; The argument pointer %escaped_then_written cannot be marked readnone/only even
+; though the only direct use, in @escape_readnone_ptr/@escape_readonly_ptr,
+; is marked as readnone/only. However, the functions can write the pointer into
+; %addr, causing the store to write to %escaped_then_written.
+;
+; FIXME: This test currently exposes a bug!
+;
+; BUG: define void @unsound_readnone(i8* %ignored, i8* readnone %escaped_then_written)
+; BUG: define void @unsound_readonly(i8* %ignored, i8* readonly %escaped_then_written)
+define void @unsound_readnone(i8* %ignored, i8* %escaped_then_written) {
+  %addr = alloca i8*
+  call void @escape_readnone_ptr(i8** %addr, i8* %escaped_then_written)
+  %addr.ld = load i8*, i8** %addr
+  store i8 0, i8* %addr.ld
+  ret void
+}
+
+define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) {
+  %addr = alloca i8*
+  call void @escape_readonly_ptr(i8** %addr, i8* %escaped_then_written)
+  %addr.ld = load i8*, i8** %addr
+  store i8 0, i8* %addr.ld
+  ret void
+}

From aade782a98e06a141309644c7b92fd62df0e46cf Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Wed, 5 Jun 2019 03:02:24 +0000
Subject: [PATCH 1093/1176] [Attributor] Pass infrastructure and fixpoint
 framework

NOTE: Note that no attributes are derived yet. This patch will not go in
      alone but only with others that derive attributes. The framework is
      split for review purposes.

This commit introduces the Attributor pass infrastructure and fixpoint
iteration framework. Further patches will introduce abstract attributes
into this framework.

In a nutshell, the Attributor will update instances of abstract
arguments until a fixpoint, or a "timeout", is reached. Communication
between the Attributor and the abstract attributes that are derived is
restricted to the AbstractState and AbstractAttribute interfaces.

Please see the file comment in Attributor.h for detailed information
including design decisions and typical use case. Also consider the class
documentation for Attributor, AbstractState, and AbstractAttribute.

Reviewers: chandlerc, homerdin, hfinkel, fedor.sergeev, sanjoy, spatel, nlopes, nicholas, reames

Subscribers: mehdi_amini, mgorny, hiraditya, bollu, steven_wu, dexonsmith, dang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59918

llvm-svn: 362578
---
 llvm/include/llvm/InitializePasses.h          |   1 +
 llvm/include/llvm/LinkAllPasses.h             |   2 +
 llvm/include/llvm/Transforms/IPO/Attributor.h | 565 ++++++++++++++++++
 llvm/lib/LTO/LTOCodeGenerator.cpp             |   1 +
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 llvm/lib/Transforms/IPO/Attributor.cpp        | 529 ++++++++++++++++
 llvm/lib/Transforms/IPO/CMakeLists.txt        |   1 +
 llvm/lib/Transforms/IPO/IPO.cpp               |   1 +
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  11 +-
 llvm/test/Other/opt-O2-pipeline.ll            |   1 +
 llvm/test/Other/opt-O3-pipeline.ll            |   1 +
 llvm/test/Other/opt-Os-pipeline.ll            |   1 +
 .../Transforms/FunctionAttrs/arg_nocapture.ll |   3 +-
 .../Transforms/FunctionAttrs/arg_returned.ll  |   3 +-
 .../Transforms/FunctionAttrs/fn_noreturn.ll   |   3 +-
 .../read_write_returned_arguments_scc.ll      |   3 +-
 17 files changed, 1123 insertions(+), 5 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/IPO/Attributor.h
 create mode 100644 llvm/lib/Transforms/IPO/Attributor.cpp

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index fa7909f14cd3d..aa61bf9c3b5a3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -74,6 +74,7 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);
+void initializeAttributorLegacyPassPass(PassRegistry&);
 void initializeBDCELegacyPassPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
 void initializeBasicAAWrapperPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index a8354125c9d38..472e320ad9c19 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -41,6 +41,7 @@
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
 #include "llvm/Transforms/Instrumentation.h"
@@ -188,6 +189,7 @@ namespace {
       (void) llvm::createPostDomTree();
       (void) llvm::createInstructionNamerPass();
       (void) llvm::createMetaRenamerPass();
+      (void) llvm::createAttributorLegacyPass();
       (void) llvm::createPostOrderFunctionAttrsLegacyPass();
       (void) llvm::createReversePostOrderFunctionAttrsPass();
       (void) llvm::createMergeFunctionsPass();
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
new file mode 100644
index 0000000000000..67dd5825849b4
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -0,0 +1,565 @@
+//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Attributor: An inter procedural (abstract) "attribute" deduction framework.
+//
+// The Attributor framework is an inter procedural abstract analysis (fixpoint
+// iteration analysis). The goal is to allow easy deduction of new attributes as
+// well as information exchange between abstract attributes in-flight.
+//
+// The Attributor class is the driver and the link between the various abstract
+// attributes. The Attributor will iterate until a fixpoint state is reached by
+// all abstract attributes in-flight, or until it will enforce a pessimistic fix
+// point because an iteration limit is reached.
+//
+// Abstract attributes, derived from the AbstractAttribute class, actually
+// describe properties of the code. They can correspond to actual LLVM-IR
+// attributes, or they can be more general, ultimately unrelated to LLVM-IR
+// attributes. The latter is useful when an abstract attributes provides
+// information to other abstract attributes in-flight but we might not want to
+// manifest the information. The Attributor allows to query in-flight abstract
+// attributes through the `Attributor::getAAFor` method (see the method
+// description for an example). If the method is used by an abstract attribute
+// P, and it results in an abstract attribute Q, the Attributor will
+// automatically capture a potential dependence from Q to P. This dependence
+// will cause P to be reevaluated whenever Q changes in the future.
+//
+// The Attributor will only reevaluated abstract attributes that might have
+// changed since the last iteration. That means that the Attribute will not
+// revisit all instructions/blocks/functions in the module but only query
+// an update from a subset of the abstract attributes.
+//
+// The update method `AbstractAttribute::updateImpl` is implemented by the
+// specific "abstract attribute" subclasses. The method is invoked whenever the
+// currently assumed state (see the AbstractState class) might not be valid
+// anymore. This can, for example, happen if the state was dependent on another
+// abstract attribute that changed. In every invocation, the update method has
+// to adjust the internal state of an abstract attribute to a point that is
+// justifiable by the underlying IR and the current state of abstract attributes
+// in-flight. Since the IR is given and assumed to be valid, the information
+// derived from it can be assumed to hold. However, information derived from
+// other abstract attributes is conditional on various things. If the justifying
+// state changed, the `updateImpl` has to revisit the situation and potentially
+// find another justification or limit the optimistic assumes made.
+//
+// Change is the key in this framework. Until a state of no-change, thus a
+// fixpoint, is reached, the Attributor will query the abstract attributes
+// in-flight to re-evaluate their state. If the (current) state is too
+// optimistic, hence it cannot be justified anymore through other abstract
+// attributes or the state of the IR, the state of the abstract attribute will
+// have to change. Generally, we assume abstract attribute state to be a finite
+// height lattice and the update function to be monotone. However, these
+// conditions are not enforced because the iteration limit will guarantee
+// termination. If an optimistic fixpoint is reached, or a pessimistic fix
+// point is enforced after a timeout, the abstract attributes are tasked to
+// manifest their result in the IR for passes to come.
+//
+// Attribute manifestation is not mandatory. If desired, there is support to
+// generate a single LLVM-IR attribute already in the AbstractAttribute base
+// class. In the simplest case, a subclass overloads
+// `AbstractAttribute::getManifestPosition()` and
+// `AbstractAttribute::getAttrKind()` to return the appropriate values. The
+// Attributor manifestation framework will then create and place a new attribute
+// if it is allowed to do so (based on the abstract state). Other use cases can
+// be achieved by overloading other abstract attribute methods.
+//
+//
+// The "mechanics" of adding a new "abstract attribute":
+// - Define a class (transitively) inheriting from AbstractAttribute and one
+//   (which could be the same) that (transitively) inherits from AbstractState.
+//   For the latter, consider the already available BooleanState and
+//   IntegerState if they fit your needs, e.g., you require only a bit-encoding.
+// - Implement all pure methods. Also use overloading if the attribute is not
+//   conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
+//   an argument, call site argument, function return value, or function. See
+//   the class and method descriptions for more information on the two
+//   "Abstract" classes and their respective methods.
+// - Register opportunities for the new abstract attribute in the
+//   `Attributor::identifyDefaultAbstractAttributes` method if it should be
+//   counted as a 'default' attribute.
+// - Add sufficient tests.
+// - Add a Statistics object for bookkeeping. If it is a simple (set of)
+//   attribute(s) manifested through the Attributor manifestation framework, see
+//   the bookkeeping function in Attributor.cpp.
+// - If instructions with a certain opcode are interesting to the attribute, add
+//   that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
+//   will make it possible to query all those instructions through the
+//   `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
+//   need to traverse the IR repeatedly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct AbstractAttribute;
+struct InformationCache;
+
+class Function;
+
+/// Simple enum class that forces the status to be spelled out explicitly.
+///
+///{
+enum class ChangeStatus {
+  CHANGED,
+  UNCHANGED,
+};
+
+ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
+ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
+///}
+
+/// The fixpoint analysis framework that orchestrates the attribute deduction.
+///
+/// The Attributor provides a general abstract analysis framework (guided
+/// fixpoint iteration) as well as helper functions for the deduction of
+/// (LLVM-IR) attributes. However, also other code properties can be deduced,
+/// propagated, and ultimately manifested through the Attributor framework. This
+/// is particularly useful if these properties interact with attributes and a
+/// co-scheduled deduction allows to improve the solution. Even if not, thus if
+/// attributes/properties are completely isolated, they should use the
+/// Attributor framework to reduce the number of fixpoint iteration frameworks
+/// in the code base. Note that the Attributor design makes sure that isolated
+/// attributes are not impacted, in any way, by others derived at the same time
+/// if there is no cross-reasoning performed.
+///
+/// The public facing interface of the Attributor is kept simple and basically
+/// allows abstract attributes to one thing, query abstract attributes
+/// in-flight. There are two reasons to do this:
+///    a) The optimistic state of one abstract attribute can justify an
+///       optimistic state of another, allowing to framework to end up with an
+///       optimistic (=best possible) fixpoint instead of one based solely on
+///       information in the IR.
+///    b) This avoids reimplementing various kinds of lookups, e.g., to check
+///       for existing IR attributes, in favor of a single lookups interface
+///       provided by an abstract attribute subclass.
+///
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+///       described in the file comment.
+struct Attributor {
+  ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); }
+
+  /// Run the analyses until a fixpoint is reached or enforced (timeout).
+  ///
+  /// The attributes registered with this Attributor can be used after as long
+  /// as the Attributor is not destroyed (it owns the attributes now).
+  ///
+  /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
+  ChangeStatus run();
+
+  /// Lookup an abstract attribute of type \p AAType anchored at value \p V and
+  /// argument number \p ArgNo. If no attribute is found and \p V is a call base
+  /// instruction, the called function is tried as a value next. Thus, the
+  /// returned abstract attribute might be anchored at the callee of \p V.
+  ///
+  /// This method is the only (supported) way an abstract attribute can retrieve
+  /// information from another abstract attribute. As an example, take an
+  /// abstract attribute that determines the memory access behavior for a
+  /// argument (readnone, readonly, ...). It should use `getAAFor` to get the
+  /// most optimistic information for other abstract attributes in-flight, e.g.
+  /// the one reasoning about the "captured" state for the argument or the one
+  /// reasoning on the memory access behavior of the function as a whole.
+  template <typename AAType>
+  const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V,
+                         int ArgNo = -1) {
+    static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+                  "Cannot query an attribute with a type not derived from "
+                  "'AbstractAttribute'!");
+    assert(AAType::ID != Attribute::None &&
+           "Cannot lookup generic abstract attributes!");
+
+    // Determine the argument number automatically for llvm::Arguments.
+    if (auto *Arg = dyn_cast<Argument>(&V))
+      ArgNo = Arg->getArgNo();
+
+    // If a function was given together with an argument number, perform the
+    // lookup for the actual argument instead. Don't do it for variadic
+    // arguments.
+    if (ArgNo >= 0 && isa<Function>(&V) &&
+        cast<Function>(&V)->arg_size() > (size_t)ArgNo)
+      return getAAFor<AAType>(
+          QueryingAA, *(cast<Function>(&V)->arg_begin() + ArgNo), ArgNo);
+
+    // Lookup the abstract attribute of type AAType. If found, return it after
+    // registering a dependence of QueryingAA on the one returned attribute.
+    const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo});
+    if (AAType *AA = static_cast<AAType *>(
+            KindToAbstractAttributeMap.lookup(AAType::ID))) {
+      QueryMap[AA].insert(&QueryingAA);
+      return AA;
+    }
+
+    // If no abstract attribute was found and we look for a call site argument,
+    // defer to the actual argument instead.
+    ImmutableCallSite ICS(&V);
+    if (ICS && ICS.getCalledValue())
+      return getAAFor<AAType>(QueryingAA, *ICS.getCalledValue(), ArgNo);
+
+    // No matching attribute found
+    return nullptr;
+  }
+
+  /// Introduce a new abstract attribute into the fixpoint analysis.
+  ///
+  /// Note that ownership of the attribute is given to the Attributor. It will
+  /// invoke delete for the Attributor on destruction of the Attributor.
+  ///
+  /// Attributes are identified by
+  ///  (1) their anchored value (see AA.getAnchoredValue()),
+  ///  (2) their argument number (\p ArgNo, or Argument::getArgNo()), and
+  ///  (3) their default attribute kind (see AAType::ID).
+  template <typename AAType> AAType &registerAA(AAType &AA, int ArgNo = -1) {
+    static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+                  "Cannot register an attribute with a type not derived from "
+                  "'AbstractAttribute'!");
+
+    // Determine the anchor value and the argument number which are used to
+    // lookup the attribute together with AAType::ID.
+    Value &AnchoredVal = AA.getAnchoredValue();
+    if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
+      ArgNo = Arg->getArgNo();
+
+    // Put the attribute in the lookup map structure and the container we use to
+    // keep track of all attributes.
+    AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA;
+    AllAbstractAttributes.push_back(&AA);
+    return AA;
+  }
+
+  /// Determine opportunities to derive 'default' attributes in \p F and create
+  /// abstract attribute objects for them.
+  ///
+  /// \param F The function that is checked for attribute opportunities.
+  /// \param InfoCache A cache for information queryable by the new attributes.
+  /// \param Whitelist If not null, a set limiting the attribute opportunities.
+  ///
+  /// Note that abstract attribute instances are generally created even if the
+  /// IR already contains the information they would deduce. The most important
+  /// reason for this is the single interface, the one of the abstract attribute
+  /// instance, which can be queried without the need to look at the IR in
+  /// various places.
+  void identifyDefaultAbstractAttributes(
+      Function &F, InformationCache &InfoCache,
+      DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist = nullptr);
+
+private:
+  /// The set of all abstract attributes.
+  ///{
+  using AAVector = SmallVector<AbstractAttribute *, 64>;
+  AAVector AllAbstractAttributes;
+  ///}
+
+  /// A nested map to lookup abstract attributes based on the anchored value and
+  /// an argument positions (or -1) on the outer level, and attribute kinds
+  /// (Attribute::AttrKind) on the inner level.
+  ///{
+  using KindToAbstractAttributeMap = DenseMap<unsigned, AbstractAttribute *>;
+  DenseMap<std::pair<const Value *, int>, KindToAbstractAttributeMap> AAMap;
+  ///}
+
+  /// A map from abstract attributes to the ones that queried them through calls
+  /// to the getAAFor<...>(...) method.
+  ///{
+  using QueryMapTy =
+      DenseMap<AbstractAttribute *, SetVector<AbstractAttribute *>>;
+  QueryMapTy QueryMap;
+  ///}
+};
+
+/// Data structure to hold cached (LLVM-IR) information.
+///
+/// All attributes are given an InformationCache object at creation time to
+/// avoid inspection of the IR by all of them individually. This default
+/// InformationCache will hold information required by 'default' attributes,
+/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
+/// is called.
+///
+/// If custom abstract attributes, registered manually through
+/// Attributor::registerAA(...), need more information, especially if it is not
+/// reusable, it is advised to inherit from the InformationCache and cast the
+/// instance down in the abstract attributes.
+struct InformationCache {
+  /// A map type from opcodes to instructions with this opcode.
+  using OpcodeInstMapTy = DenseMap<unsigned, SmallVector<Instruction *, 32>>;
+
+  /// Return the map that relates "interesting" opcodes with all instructions
+  /// with that opcode in \p F.
+  OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) {
+    return FuncInstOpcodeMap[&F];
+  }
+
+  /// A vector type to hold instructions.
+  using InstructionVectorTy = std::vector<Instruction *>;
+
+  /// Return the instructions in \p F that may read or write memory.
+  InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) {
+    return FuncRWInstsMap[&F];
+  }
+
+private:
+  /// A map type from functions to opcode to instruction maps.
+  using FuncInstOpcodeMapTy = DenseMap<Function *, OpcodeInstMapTy>;
+
+  /// A map type from functions to their read or write instructions.
+  using FuncRWInstsMapTy = DenseMap<Function *, InstructionVectorTy>;
+
+  /// A nested map that remembers all instructions in a function with a certain
+  /// instruction opcode (Instruction::getOpcode()).
+  FuncInstOpcodeMapTy FuncInstOpcodeMap;
+
+  /// A map from functions to their instructions that may read or write memory.
+  FuncRWInstsMapTy FuncRWInstsMap;
+
+  /// Give the Attributor access to the members so
+  /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
+  friend struct Attributor;
+};
+
+/// An interface to query the internal state of an abstract attribute.
+///
+/// The abstract state is a minimal interface that allows the Attributor to
+/// communicate with the abstract attributes about their internal state without
+/// enforcing or exposing implementation details, e.g., the (existence of an)
+/// underlying lattice.
+///
+/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
+/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
+/// was reached or (4) a pessimistic fixpoint was enforced.
+///
+/// All methods need to be implemented by the subclass. For the common use case,
+/// a single boolean state or a bit-encoded state, the BooleanState and
+/// IntegerState classes are already provided. An abstract attribute can inherit
+/// from them to get the abstract state interface and additional methods to
+/// directly modify the state based if needed. See the class comments for help.
+struct AbstractState {
+  virtual ~AbstractState() {}
+
+  /// Return if this abstract state is in a valid state. If false, no
+  /// information provided should be used.
+  virtual bool isValidState() const = 0;
+
+  /// Return if this abstract state is fixed, thus does not need to be updated
+  /// if information changes as it cannot change itself.
+  virtual bool isAtFixpoint() const = 0;
+
+  /// Indicate that the abstract state should converge to the optimistic state.
+  ///
+  /// This will usually make the optimistically assumed state the known to be
+  /// true state.
+  virtual void indicateOptimisticFixpoint() = 0;
+
+  /// Indicate that the abstract state should converge to the pessimistic state.
+  ///
+  /// This will usually revert the optimistically assumed state to the known to
+  /// be true state.
+  virtual void indicatePessimisticFixpoint() = 0;
+};
+
+/// Base struct for all "concrete attribute" deductions.
+///
+/// The abstract attribute is a minimal interface that allows the Attributor to
+/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
+/// implementation choices made for the subclasses but also to structure their
+/// implementation and simplify the use of other abstract attributes in-flight.
+///
+/// To allow easy creation of new attributes, most methods have default
+/// implementations. The ones that do not are generally straight forward, except
+/// `AbstractAttribute::updateImpl` which is the location of most reasoning
+/// associated with the abstract attribute. The update is invoked by the
+/// Attributor in case the situation used to justify the current optimistic
+/// state might have changed. The Attributor determines this automatically
+/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
+///
+/// The `updateImpl` method should inspect the IR and other abstract attributes
+/// in-flight to justify the best possible (=optimistic) state. The actual
+/// implementation is, similar to the underlying abstract state encoding, not
+/// exposed. In the most common case, the `updateImpl` will go through a list of
+/// reasons why its optimistic state is valid given the current information. If
+/// any combination of them holds and is sufficient to justify the current
+/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
+/// state is adjusted to the situation and the method shall return CHANGED.
+///
+/// If the manifestation of the "concrete attribute" deduced by the subclass
+/// differs from the "default" behavior, which is a (set of) LLVM-IR
+/// attribute(s) for an argument, call site argument, function return value, or
+/// function, the `AbstractAttribute::manifest` method should be overloaded.
+///
+/// NOTE: If the state obtained via getState() is INVALID, thus if
+///       AbstractAttribute::getState().isValidState() returns false, no
+///       information provided by the methods of this class should be used.
+/// NOTE: The Attributor currently runs as a call graph SCC pass. Partially to
+///       this *current* choice there are certain limitations to what we can do.
+///       As a general rule of thumb, "concrete" abstract attributes should *for
+///       now* only perform "backward" information propagation. That means
+///       optimistic information obtained through abstract attributes should
+///       only be used at positions that precede the origin of the information
+///       with regards to the program flow. More practically, information can
+///       *now* be propagated from instructions to their enclosing function, but
+///       *not* from call sites to the called function. The mechanisms to allow
+///       both directions will be added in the future.
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+///       described in the file comment.
+struct AbstractAttribute {
+
+  /// The positions attributes can be manifested in.
+  enum ManifestPosition {
+    MP_ARGUMENT,           ///< An attribute for a function argument.
+    MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
+    MP_FUNCTION,           ///< An attribute for a function as a whole.
+    MP_RETURNED,           ///< An attribute for the function return value.
+  };
+
+  /// An abstract attribute associated with \p AssociatedVal and anchored at
+  /// \p AnchoredVal.
+  ///
+  /// \param AssociatedVal The value this abstract attribute is associated with.
+  /// \param AnchoredVal The value this abstract attributes is anchored at.
+  /// \param InfoCache Cached information accessible to the abstract attribute.
+  AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal,
+                    InformationCache &InfoCache)
+      : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal),
+        InfoCache(InfoCache) {}
+
+  /// An abstract attribute associated with and anchored at \p V.
+  AbstractAttribute(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(&V, V, InfoCache) {}
+
+  /// Virtual destructor.
+  virtual ~AbstractAttribute() {}
+
+  /// Initialize the state with the information in the Attributor \p A.
+  ///
+  /// This function is called by the Attributor once all abstract attributes
+  /// have been identified. It can and shall be used for task like:
+  ///  - identify existing knowledge in the IR and use it for the "known state"
+  ///  - perform any work that is not going to change over time, e.g., determine
+  ///    a subset of the IR, or attributes in-flight, that have to be looked at
+  ///    in the `updateImpl` method.
+  virtual void initialize(Attributor &A) {}
+
+  /// Return the internal abstract state for inspection.
+  virtual const AbstractState &getState() const = 0;
+
+  /// Return the value this abstract attribute is anchored with.
+  ///
+  /// The anchored value might not be the associated value if the latter is not
+  /// sufficient to determine where arguments will be manifested. This is mostly
+  /// the case for call site arguments as the value is not sufficient to
+  /// pinpoint them. Instead, we can use the call site as an anchor.
+  ///
+  ///{
+  Value &getAnchoredValue() { return AnchoredVal; }
+  const Value &getAnchoredValue() const { return AnchoredVal; }
+  ///}
+
+  /// Return the llvm::Function surrounding the anchored value.
+  ///
+  ///{
+  Function &getAnchorScope();
+  const Function &getAnchorScope() const;
+  ///}
+
+  /// Return the value this abstract attribute is associated with.
+  ///
+  /// The abstract state usually represents this value.
+  ///
+  ///{
+  virtual Value *getAssociatedValue() { return AssociatedVal; }
+  virtual const Value *getAssociatedValue() const { return AssociatedVal; }
+  ///}
+
+  /// Return the position this abstract state is manifested in.
+  virtual ManifestPosition getManifestPosition() const = 0;
+
+  /// Return the kind that identifies the abstract attribute implementation.
+  virtual Attribute::AttrKind getAttrKind() const = 0;
+
+  /// Return the deduced attributes in \p Attrs.
+  virtual void getDeducedAttributes(SmallVectorImpl<Attribute> &Attrs) const {
+    LLVMContext &Ctx = AnchoredVal.getContext();
+    Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
+  }
+
+  /// Helper functions, for debug purposes only.
+  ///{
+  virtual void print(raw_ostream &OS) const;
+  void dump() const { print(dbgs()); }
+
+  /// This function should return the "summarized" assumed state as string.
+  virtual const std::string getAsStr() const = 0;
+  ///}
+
+  /// Allow the Attributor access to the protected methods.
+  friend struct Attributor;
+
+protected:
+  /// Hook for the Attributor to trigger an update of the internal state.
+  ///
+  /// If this attribute is already fixed, this method will return UNCHANGED,
+  /// otherwise it delegates to `AbstractAttribute::updateImpl`.
+  ///
+  /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+  ChangeStatus update(Attributor &A);
+
+  /// Hook for the Attributor to trigger the manifestation of the information
+  /// represented by the abstract attribute in the LLVM-IR.
+  ///
+  /// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
+  virtual ChangeStatus manifest(Attributor &A);
+
+  /// Return the internal abstract state for careful modification.
+  virtual AbstractState &getState() = 0;
+
+  /// The actual update/transfer function which has to be implemented by the
+  /// derived classes.
+  ///
+  /// If it is called, the environment has changed and we have to determine if
+  /// the current information is still valid or adjust it otherwise.
+  ///
+  /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+  virtual ChangeStatus updateImpl(Attributor &A) = 0;
+
+  /// The value this abstract attribute is associated with.
+  Value *AssociatedVal;
+
+  /// The value this abstract attribute is anchored at.
+  Value &AnchoredVal;
+
+  /// The information cache accessible to this abstract attribute.
+  InformationCache &InfoCache;
+};
+
+/// Forward declarations of output streams for debug purposes.
+///
+///{
+raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
+raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
+raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition);
+raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
+///}
+
+struct AttributorPass : public PassInfoMixin<AttributorPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+Pass *createAttributorLegacyPass();
+
+/// ----------------------------------------------------------------------------
+///                       Abstract Attribute Classes
+/// ----------------------------------------------------------------------------
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 79d41ee896692..f6d955d59c6b4 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -130,6 +130,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeArgPromotionPass(R);
   initializeJumpThreadingPass(R);
   initializeSROALegacyPassPass(R);
+  initializeAttributorLegacyPassPass(R);
   initializePostOrderFunctionAttrsLegacyPassPass(R);
   initializeReversePostOrderFunctionAttrsLegacyPassPass(R);
   initializeGlobalsAAWrapperPassPass(R);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 3a0d0c29466ab..2da923086b205 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/CalledValuePropagation.h"
 #include "llvm/Transforms/IPO/ConstantMerge.h"
 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index cf601ec9ebc54..6e7f4177e5677 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -41,6 +41,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #define MODULE_PASS(NAME, CREATE_PASS)
 #endif
 MODULE_PASS("always-inline", AlwaysInlinerPass())
+MODULE_PASS("attributor", AttributorPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
 MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
 MODULE_PASS("cg-profile", CGProfilePass())
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
new file mode 100644
index 0000000000000..56c32235fd384
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -0,0 +1,529 @@
+//===- Attributor.cpp - Module-wide attribute deduction -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an inter procedural pass that deduces and/or propagating
+// attributes. This is done in an abstract interpretation style fixpoint
+// iteration. See the Attributor.h file comment and the class descriptions in
+// that file for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "attributor"
+
+STATISTIC(NumFnWithExactDefinition,
+          "Number of function with exact definitions");
+STATISTIC(NumFnWithoutExactDefinition,
+          "Number of function without exact definitions");
+STATISTIC(NumAttributesTimedOut,
+          "Number of abstract attributes timed out before fixpoint");
+STATISTIC(NumAttributesValidFixpoint,
+          "Number of abstract attributes in a valid fixpoint state");
+STATISTIC(NumAttributesManifested,
+          "Number of abstract attributes manifested in IR");
+
+// TODO: Determine a good default value.
+//
+// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
+// (when run with the first 5 abstract attributes). The results also indicate
+// that we never reach 32 iterations but always find a fixpoint sooner.
+//
+// This will become more evolved once we perform two interleaved fixpoint
+// iterations: bottom-up and top-down.
+static cl::opt<unsigned>
+    MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
+                          cl::desc("Maximal number of fixpoint iterations."),
+                          cl::init(32));
+
+static cl::opt<bool> DisableAttributor(
+    "attributor-disable", cl::Hidden,
+    cl::desc("Disable the attributor inter-procedural deduction pass."),
+    cl::init(false));
+
+static cl::opt<bool> VerifyAttributor(
+    "attributor-verify", cl::Hidden,
+    cl::desc("Verify the Attributor deduction and "
+             "manifestation of attributes -- may issue false-positive errors"),
+    cl::init(false));
+
+/// Logic operators for the change status enum class.
+///
+///{
+ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) {
+  return l == ChangeStatus::CHANGED ? l : r;
+}
+ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
+  return l == ChangeStatus::UNCHANGED ? l : r;
+}
+///}
+
+/// Helper to adjust the statistics.
+static void bookkeeping(AbstractAttribute::ManifestPosition MP,
+                        const Attribute &Attr) {
+  if (!AreStatisticsEnabled())
+    return;
+
+  if (!Attr.isEnumAttribute())
+    return;
+  switch (Attr.getKindAsEnum()) {
+  default:
+    return;
+  }
+}
+
+/// Helper to identify the correct offset into an attribute list.
+static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
+                             unsigned ArgNo = 0) {
+  switch (MP) {
+  case AbstractAttribute::MP_ARGUMENT:
+  case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+    return ArgNo + AttributeList::FirstArgIndex;
+  case AbstractAttribute::MP_FUNCTION:
+    return AttributeList::FunctionIndex;
+  case AbstractAttribute::MP_RETURNED:
+    return AttributeList::ReturnIndex;
+  }
+}
+
+/// Return true if \p New is equal or worse than \p Old.
+static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
+  if (!Old.isIntAttribute())
+    return true;
+
+  return Old.getValueAsInt() >= New.getValueAsInt();
+}
+
+/// Return true if the information provided by \p Attr was added to the
+/// attribute list \p Attrs. This is only the case if it was not already present
+/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
+                             AttributeList &Attrs,
+                             AbstractAttribute::ManifestPosition MP,
+                             unsigned ArgNo = 0) {
+  unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+
+  if (Attr.isEnumAttribute()) {
+    Attribute::AttrKind Kind = Attr.getKindAsEnum();
+    if (Attrs.hasAttribute(AttrIdx, Kind))
+      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+        return false;
+    Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+    return true;
+  }
+  if (Attr.isStringAttribute()) {
+    StringRef Kind = Attr.getKindAsString();
+    if (Attrs.hasAttribute(AttrIdx, Kind))
+      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+        return false;
+    Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+    return true;
+  }
+
+  llvm_unreachable("Expected enum or string attribute!");
+}
+
+ChangeStatus AbstractAttribute::update(Attributor &A) {
+  ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+  if (getState().isAtFixpoint())
+    return HasChanged;
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Update: " << *this << "\n");
+
+  HasChanged = updateImpl(A);
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Update " << HasChanged << " " << *this
+                    << "\n");
+
+  return HasChanged;
+}
+
+ChangeStatus AbstractAttribute::manifest(Attributor &A) {
+  assert(getState().isValidState() &&
+         "Attempted to manifest an invalid state!");
+  assert(getAssociatedValue() &&
+         "Attempted to manifest an attribute without associated value!");
+
+  ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+  SmallVector<Attribute, 4> DeducedAttrs;
+  getDeducedAttributes(DeducedAttrs);
+
+  Function &ScopeFn = getAnchorScope();
+  LLVMContext &Ctx = ScopeFn.getContext();
+  ManifestPosition MP = getManifestPosition();
+
+  AttributeList Attrs;
+  SmallVector<unsigned, 4> ArgNos;
+
+  // In the following some generic code that will manifest attributes in
+  // DeducedAttrs if they improve the current IR. Due to the different
+  // annotation positions we use the underlying AttributeList interface.
+  // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
+
+  switch (MP) {
+  case MP_ARGUMENT:
+    ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
+    Attrs = ScopeFn.getAttributes();
+    break;
+  case MP_FUNCTION:
+  case MP_RETURNED:
+    ArgNos.push_back(0);
+    Attrs = ScopeFn.getAttributes();
+    break;
+  case MP_CALL_SITE_ARGUMENT: {
+    CallSite CS(&getAnchoredValue());
+    for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
+      if (CS.getArgOperand(u) == getAssociatedValue())
+        ArgNos.push_back(u);
+    Attrs = CS.getAttributes();
+  }
+  }
+
+  for (const Attribute &Attr : DeducedAttrs) {
+    for (unsigned ArgNo : ArgNos) {
+      if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
+        continue;
+
+      HasChanged = ChangeStatus::CHANGED;
+      bookkeeping(MP, Attr);
+    }
+  }
+
+  if (HasChanged == ChangeStatus::UNCHANGED)
+    return HasChanged;
+
+  switch (MP) {
+  case MP_ARGUMENT:
+  case MP_FUNCTION:
+  case MP_RETURNED:
+    ScopeFn.setAttributes(Attrs);
+    break;
+  case MP_CALL_SITE_ARGUMENT:
+    CallSite(&getAnchoredValue()).setAttributes(Attrs);
+  }
+
+  return HasChanged;
+}
+
+Function &AbstractAttribute::getAnchorScope() {
+  Value &V = getAnchoredValue();
+  if (isa<Function>(V))
+    return cast<Function>(V);
+  if (isa<Argument>(V))
+    return *cast<Argument>(V).getParent();
+  if (isa<Instruction>(V))
+    return *cast<Instruction>(V).getFunction();
+  llvm_unreachable("No scope for anchored value found!");
+}
+
+const Function &AbstractAttribute::getAnchorScope() const {
+  return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+}
+
+/// ----------------------------------------------------------------------------
+///                               Attributor
+/// ----------------------------------------------------------------------------
+
+ChangeStatus Attributor::run() {
+  // Initialize all abstract attributes.
+  for (AbstractAttribute *AA : AllAbstractAttributes)
+    AA->initialize(*this);
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
+                    << AllAbstractAttributes.size()
+                    << " abstract attributes.\n");
+
+  // Now that all abstract attributes are collected and initialized we start the
+  // abstract analysis.
+
+  unsigned IterationCounter = 1;
+
+  SmallVector<AbstractAttribute *, 64> ChangedAAs;
+  SetVector<AbstractAttribute *> Worklist;
+  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+
+  do {
+    LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
+                      << ", Worklist size: " << Worklist.size() << "\n");
+
+    // Add all abstract attributes that are potentially dependent on one that
+    // changed to the work list.
+    for (AbstractAttribute *ChangedAA : ChangedAAs) {
+      auto &QuerriedAAs = QueryMap[ChangedAA];
+      Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
+    }
+
+    // Reset the changed set.
+    ChangedAAs.clear();
+
+    // Update all abstract attribute in the work list and record the ones that
+    // changed.
+    for (AbstractAttribute *AA : Worklist)
+      if (AA->update(*this) == ChangeStatus::CHANGED)
+        ChangedAAs.push_back(AA);
+
+    // Reset the work list and repopulate with the changed abstract attributes.
+    // Note that dependent ones are added above.
+    Worklist.clear();
+    Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+
+  } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+
+  LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
+                    << IterationCounter << "/" << MaxFixpointIterations
+                    << " iterations\n");
+
+  bool FinishedAtFixpoint = Worklist.empty();
+
+  // Reset abstract arguments not settled in a sound fixpoint by now. This
+  // happens when we stopped the fixpoint iteration early. Note that only the
+  // ones marked as "changed" *and* the ones transitively depending on them
+  // need to be reverted to a pessimistic state. Others might not be in a
+  // fixpoint state but we can use the optimistic results for them anyway.
+  SmallPtrSet<AbstractAttribute *, 32> Visited;
+  for (unsigned u = 0; u < ChangedAAs.size(); u++) {
+    AbstractAttribute *ChangedAA = ChangedAAs[u];
+    if (!Visited.insert(ChangedAA).second)
+      continue;
+
+    AbstractState &State = ChangedAA->getState();
+    if (!State.isAtFixpoint()) {
+      State.indicatePessimisticFixpoint();
+
+      NumAttributesTimedOut++;
+    }
+
+    auto &QuerriedAAs = QueryMap[ChangedAA];
+    ChangedAAs.append(QuerriedAAs.begin(), QuerriedAAs.end());
+  }
+
+  LLVM_DEBUG({
+    if (!Visited.empty())
+      dbgs() << "\n[Attributor] Finalized " << Visited.size()
+             << " abstract attributes.\n";
+  });
+
+  unsigned NumManifested = 0;
+  unsigned NumAtFixpoint = 0;
+  ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
+  for (AbstractAttribute *AA : AllAbstractAttributes) {
+    AbstractState &State = AA->getState();
+
+    // If there is not already a fixpoint reached, we can now take the
+    // optimistic state. This is correct because we enforced a pessimistic one
+    // on abstract attributes that were transitively dependent on a changed one
+    // already above.
+    if (!State.isAtFixpoint())
+      State.indicateOptimisticFixpoint();
+
+    // If the state is invalid, we do not try to manifest it.
+    if (!State.isValidState())
+      continue;
+
+    // Manifest the state and record if we changed the IR.
+    ChangeStatus LocalChange = AA->manifest(*this);
+    ManifestChange = ManifestChange | LocalChange;
+
+    NumAtFixpoint++;
+    NumManifested += (LocalChange == ChangeStatus::CHANGED);
+  }
+
+  (void)NumManifested;
+  (void)NumAtFixpoint;
+  LLVM_DEBUG(dbgs() << "\n[Attributor] Manifested " << NumManifested
+                    << " arguments while " << NumAtFixpoint
+                    << " were in a valid fixpoint state\n");
+
+  // If verification is requested, we finished this run at a fixpoint, and the
+  // IR was changed, we re-run the whole fixpoint analysis, starting at
+  // re-initialization of the arguments. This re-run should not result in an IR
+  // change. Though, the (virtual) state of attributes at the end of the re-run
+  // might be more optimistic than the known state or the IR state if the better
+  // state cannot be manifested.
+  if (VerifyAttributor && FinishedAtFixpoint &&
+      ManifestChange == ChangeStatus::CHANGED) {
+    VerifyAttributor = false;
+    ChangeStatus VerifyStatus = run();
+    if (VerifyStatus != ChangeStatus::UNCHANGED)
+      llvm_unreachable(
+          "Attributor verification failed, re-run did result in an IR change "
+          "even after a fixpoint was reached in the original run. (False "
+          "positives possible!)");
+    VerifyAttributor = true;
+  }
+
+  NumAttributesManifested += NumManifested;
+  NumAttributesValidFixpoint += NumAtFixpoint;
+
+  return ManifestChange;
+}
+
+void Attributor::identifyDefaultAbstractAttributes(
+    Function &F, InformationCache &InfoCache,
+    DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+
+  // Walk all instructions to find more attribute opportunities and also
+  // interesting instructions that might be queried by abstract attributes
+  // during their initialization or update.
+  auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
+  auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
+
+  for (Instruction &I : instructions(&F)) {
+    bool IsInterestingOpcode = false;
+
+    // To allow easy access to all instructions in a function with a given
+    // opcode we store them in the InfoCache. As not all opcodes are interesting
+    // to concrete attributes we only cache the ones that are as identified in
+    // the following switch.
+    // Note: There are no concrete attributes now so this is initially empty.
+    switch (I.getOpcode()) {
+    default:
+      break;
+    }
+    if (IsInterestingOpcode)
+      InstOpcodeMap[I.getOpcode()].push_back(&I);
+    if (I.mayReadOrWriteMemory())
+      ReadOrWriteInsts.push_back(&I);
+  }
+}
+
+/// Helpers to ease debugging through output streams and print calls.
+///
+///{
+raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
+  return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+                              AbstractAttribute::ManifestPosition AP) {
+  switch (AP) {
+  case AbstractAttribute::MP_ARGUMENT:
+    return OS << "arg";
+  case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+    return OS << "cs_arg";
+  case AbstractAttribute::MP_FUNCTION:
+    return OS << "fn";
+  case AbstractAttribute::MP_RETURNED:
+    return OS << "fn_ret";
+  }
+  llvm_unreachable("Unknown attribute position!");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
+  return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
+  AA.print(OS);
+  return OS;
+}
+
+void AbstractAttribute::print(raw_ostream &OS) const {
+  OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
+     << AnchoredVal.getName() << "]";
+}
+///}
+
+/// ----------------------------------------------------------------------------
+///                       Pass (Manager) Boilerplate
+/// ----------------------------------------------------------------------------
+
+static bool runAttributorOnModule(Module &M) {
+  if (DisableAttributor)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size()
+                    << " functions.\n");
+
+  // Create an Attributor and initially empty information cache that is filled
+  // while we identify default attribute opportunities.
+  Attributor A;
+  InformationCache InfoCache;
+
+  for (Function &F : M) {
+    // TODO: Not all attributes require an exact definition. Find a way to
+    //       enable deduction for some but not all attributes in case the
+    //       definition might be changed at runtime, see also
+    //       http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+    // TODO: We could always determine abstract attributes and if sufficient
+    //       information was found we could duplicate the functions that do not
+    //       have an exact definition.
+    if (!F.hasExactDefinition()) {
+      NumFnWithoutExactDefinition++;
+      continue;
+    }
+
+    // For now we ignore naked and optnone functions.
+    if (F.hasFnAttribute(Attribute::Naked) ||
+        F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+
+    NumFnWithExactDefinition++;
+
+    // Populate the Attributor with abstract attribute opportunities in the
+    // function and the information cache with IR information.
+    A.identifyDefaultAbstractAttributes(F, InfoCache);
+  }
+
+  return A.run() == ChangeStatus::CHANGED;
+}
+
+PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (runAttributorOnModule(M)) {
+    // FIXME: Think about passes we will preserve and add them here.
+    return PreservedAnalyses::none();
+  }
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct AttributorLegacyPass : public ModulePass {
+  static char ID;
+
+  AttributorLegacyPass() : ModulePass(ID) {
+    initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    if (skipModule(M))
+      return false;
+    return runAttributorOnModule(M);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    // FIXME: Think about passes we will preserve and add them here.
+    AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+
+char AttributorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
+                      "Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
+                    "Deduce and propagate attributes", false, false)
+
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 7e2bca0f8f800..f1e40b8162f79 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMipo
   AlwaysInliner.cpp
   ArgumentPromotion.cpp
+  Attributor.cpp
   BarrierNoopPass.cpp
   BlockExtractor.cpp
   CalledValuePropagation.cpp
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index 75219e9cac422..34db75dd8b038 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -45,6 +45,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeLowerTypeTestsPass(Registry);
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerLegacyPassPass(Registry);
+  initializeAttributorLegacyPassPass(Registry);
   initializePostOrderFunctionAttrsLegacyPassPass(Registry);
   initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
   initializePruneEHPass(Registry);
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 6ae7c859379a3..d451653533a7c 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Attributor.h"
 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
@@ -515,6 +516,10 @@ void PassManagerBuilder::populateModulePassManager(
 
   MPM.add(createIPSCCPPass());          // IP SCCP
   MPM.add(createCalledValuePropagationPass());
+
+  // Infer attributes on declarations, call sites, arguments, etc.
+  MPM.add(createAttributorLegacyPass());
+
   MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
   // Promote any localized global vars.
   MPM.add(createPromoteMemoryToRegisterPass());
@@ -819,6 +824,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
     // Attach metadata to indirect call sites indicating the set of functions
     // they may target at run-time. This should follow IPSCCP.
     PM.add(createCalledValuePropagationPass());
+
+    // Infer attributes on declarations, call sites, arguments, etc.
+    PM.add(createAttributorLegacyPass());
   }
 
   // Infer attributes about definitions. The readnone attribute in particular is
@@ -892,8 +900,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // link-time inlining, and visibility of nocapture attribute.
   PM.add(createTailCallEliminationPass());
 
-  // Run a few AA driven optimizations here and now, to cleanup the code.
+  // Infer attributes on declarations, call sites, arguments, etc.
   PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
+  // Run a few AA driven optimizations here and now, to cleanup the code.
   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
 
   PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll
index 12999c64026f6..8f4d98fe3c957 100644
--- a/llvm/test/Other/opt-O2-pipeline.ll
+++ b/llvm/test/Other/opt-O2-pipeline.ll
@@ -30,6 +30,7 @@
 ; CHECK-NEXT:     Interprocedural Sparse Conditional Constant Propagation
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     Called Value Propagation
+; CHECK-NEXT:     Deduce and propagate attributes
 ; CHECK-NEXT:     Global Variable Optimizer
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     FunctionPass Manager
diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll
index 366a138203866..07e20b80f46f3 100644
--- a/llvm/test/Other/opt-O3-pipeline.ll
+++ b/llvm/test/Other/opt-O3-pipeline.ll
@@ -33,6 +33,7 @@
 ; CHECK-NEXT:     Interprocedural Sparse Conditional Constant Propagation
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     Called Value Propagation
+; CHECK-NEXT:     Deduce and propagate attributes
 ; CHECK-NEXT:     Global Variable Optimizer
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     FunctionPass Manager
diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll
index c3a225d5e8d7e..bacda22ead162 100644
--- a/llvm/test/Other/opt-Os-pipeline.ll
+++ b/llvm/test/Other/opt-Os-pipeline.ll
@@ -30,6 +30,7 @@
 ; CHECK-NEXT:     Interprocedural Sparse Conditional Constant Propagation
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     Called Value Propagation
+; CHECK-NEXT:     Deduce and propagate attributes
 ; CHECK-NEXT:     Global Variable Optimizer
 ; CHECK-NEXT:       Unnamed pass: implement Pass::getPassName()
 ; CHECK-NEXT:     FunctionPass Manager
diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
index 25656d8c909aa..2a41b10361940 100644
--- a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
+++ b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll
@@ -1,4 +1,5 @@
-; RUN: opt -functionattrs -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
 ;
 ; Test cases specifically designed for the "no-capture" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
index 2430fda5cbe7b..cc54dd9afde46 100644
--- a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -1,4 +1,5 @@
-; RUN: opt -functionattrs -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
 ;
 ; Test cases specifically designed for the "returned" argument attribute.
 ; We use FIXME's to indicate problems and missing attributes.
diff --git a/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll b/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
index d691eb04860c5..2f58588c79068 100644
--- a/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
+++ b/llvm/test/Transforms/FunctionAttrs/fn_noreturn.ll
@@ -1,4 +1,5 @@
-; RUN: opt -functionattrs -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -S < %s | FileCheck %s
+; RUN: opt -functionattrs -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
 ;
 ; Test cases specifically designed for the "no-return" function attribute.
 ; We use FIXME's to indicate problems and missing attributes.
diff --git a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
index 8fea3f9656327..0828db35fd232 100644
--- a/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/read_write_returned_arguments_scc.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s
+; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-disable=false -S < %s | FileCheck %s
+; RUN: opt -functionattrs -enable-nonnull-arg-prop -attributor -attributor-disable=false -attributor-verify=true -S < %s | FileCheck %s
 ;
 ; This is an evolved example to stress test SCC parameter attribute propagation.
 ; The SCC in this test is made up of the following six function, three of which

From 2057f8366a40621f24209f886b1d95fb6fa25bdb Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Wed, 5 Jun 2019 03:04:46 +0000
Subject: [PATCH 1094/1176] Read .note.gnu.property sections and emit a merged
 .note.gnu.property section.

This patch also adds `--require-cet` option for the sake of testing.
The actual feature for IBT-aware PLT is not included in this patch.

This is a part of https://reviews.llvm.org/D59780. Submitting this
first should make it easy to work with a related change
(https://reviews.llvm.org/D62609).

Differential Revision: https://reviews.llvm.org/D62853

llvm-svn: 362579
---
 lld/ELF/Config.h                    |  2 +
 lld/ELF/Driver.cpp                  | 32 ++++++++++++
 lld/ELF/InputFiles.cpp              | 77 +++++++++++++++++++++++++++++
 lld/ELF/InputFiles.h                |  2 +
 lld/ELF/Options.td                  |  4 ++
 lld/ELF/SyntheticSections.cpp       | 29 +++++++++++
 lld/ELF/SyntheticSections.h         |  7 +++
 lld/ELF/Writer.cpp                  |  3 ++
 lld/test/ELF/Inputs/i386-cet1.s     | 16 ++++++
 lld/test/ELF/Inputs/i386-cet2.s     | 20 ++++++++
 lld/test/ELF/Inputs/i386-cet3.s     |  5 ++
 lld/test/ELF/Inputs/i386-cet4.s     | 16 ++++++
 lld/test/ELF/Inputs/x86-64-cet1.s   | 17 +++++++
 lld/test/ELF/Inputs/x86-64-cet2.s   | 22 +++++++++
 lld/test/ELF/Inputs/x86-64-cet3.s   |  5 ++
 lld/test/ELF/Inputs/x86-64-cet4.s   | 17 +++++++
 lld/test/ELF/i386-cet.s             | 48 ++++++++++++++++++
 lld/test/ELF/x86-64-cet.s           | 48 ++++++++++++++++++
 llvm/include/llvm/Object/ELFTypes.h |  2 +-
 19 files changed, 371 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/ELF/Inputs/i386-cet1.s
 create mode 100644 lld/test/ELF/Inputs/i386-cet2.s
 create mode 100644 lld/test/ELF/Inputs/i386-cet3.s
 create mode 100644 lld/test/ELF/Inputs/i386-cet4.s
 create mode 100644 lld/test/ELF/Inputs/x86-64-cet1.s
 create mode 100644 lld/test/ELF/Inputs/x86-64-cet2.s
 create mode 100644 lld/test/ELF/Inputs/x86-64-cet3.s
 create mode 100644 lld/test/ELF/Inputs/x86-64-cet4.s
 create mode 100644 lld/test/ELF/i386-cet.s
 create mode 100644 lld/test/ELF/x86-64-cet.s

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index ff48fd5f26410..085674b51d733 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -82,6 +82,7 @@ struct VersionDefinition {
 // Most fields are initialized by the driver.
 struct Configuration {
   uint8_t OSABI = 0;
+  uint32_t AndFeatures = 0;
   llvm::CachePruningPolicy ThinLTOCachePolicy;
   llvm::StringMap<uint64_t> SectionStartMap;
   llvm::StringRef Chroot;
@@ -147,6 +148,7 @@ struct Configuration {
   bool ExportDynamic;
   bool FixCortexA53Errata843419;
   bool FormatBinary = false;
+  bool RequireCET;
   bool GcSections;
   bool GdbIndex;
   bool GnuHash = false;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index b49ab85a9c7c5..a90522a1092d8 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -334,6 +334,9 @@ static void checkOptions() {
     if (Config->SingleRoRx && !Script->HasSectionsCommand)
       error("-execute-only and -no-rosegment cannot be used together");
   }
+
+  if (Config->ZRetpolineplt && Config->RequireCET)
+    error("--require-cet may not be used with -z retpolineplt");
 }
 
 static const char *getReproduceOption(opt::InputArgList &Args) {
@@ -813,6 +816,7 @@ static void readConfigs(opt::InputArgList &Args) {
   Config->FilterList = args::getStrings(Args, OPT_filter);
   Config->Fini = Args.getLastArgValue(OPT_fini, "_fini");
   Config->FixCortexA53Errata843419 = Args.hasArg(OPT_fix_cortex_a53_843419);
+  Config->RequireCET = Args.hasArg(OPT_require_cet);
   Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
   Config->GnuUnique = Args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
   Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
@@ -1584,6 +1588,30 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> Wrapped) {
     Symtab->wrap(W.Sym, W.Real, W.Wrap);
 }
 
+// To enable CET (x86's hardware-assited control flow enforcement), each
+// source file must be compiled with -fcf-protection. Object files compiled
+// with the flag contain feature flags indicating that they are compatible
+// with CET. We enable the feature only when all object files are compatible
+// with CET.
+//
+// This function returns the merged feature flags. If 0, we cannot enable CET.
+//
+// Note that the CET-aware PLT is not implemented yet. We do error
+// check only.
+template <class ELFT> static uint32_t getAndFeatures() {
+  if (Config->EMachine != EM_386 && Config->EMachine != EM_X86_64)
+    return 0;
+
+  uint32_t Ret = -1;
+  for (InputFile *F : ObjectFiles) {
+    uint32_t Features = cast<ObjFile<ELFT>>(F)->AndFeatures;
+    if (!Features && Config->RequireCET)
+      error(toString(F) + ": --require-cet: file is not compatible with CET");
+    Ret &= Features;
+  }
+  return Ret;
+}
+
 static const char *LibcallRoutineNames[] = {
 #define HANDLE_LIBCALL(code, name) name,
 #include "llvm/IR/RuntimeLibcalls.def"
@@ -1762,6 +1790,10 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
            (S->Name.startswith(".debug") || S->Name.startswith(".zdebug"));
   });
 
+  // Read .note.gnu.property sections from input object files which
+  // contain a hint to tweak linker's and loader's behaviors.
+  Config->AndFeatures = getAndFeatures<ELFT>();
+
   Config->EFlags = Target->calcEFlags();
   // MaxPageSize (sometimes called abi page size) is the maximum page size that
   // the output can be run on. For example if the OS can use 4k or 64k page
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 16991421fe6ce..e16a07fa1a31c 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/raw_ostream.h"
@@ -34,6 +35,7 @@ using namespace llvm::ELF;
 using namespace llvm::object;
 using namespace llvm::sys;
 using namespace llvm::sys::fs;
+using namespace llvm::support::endian;
 
 using namespace lld;
 using namespace lld::elf;
@@ -753,6 +755,68 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &Attributes) {
   }
 }
 
+// If a source file is compiled with x86 hardware-assisted call flow control
+// enabled, the generated object file contains feature flags indicating that
+// fact. This function reads the feature flags and returns it.
+//
+// Essentially we want to read a single 32-bit value in this function, but this
+// function is rather complicated because the value is buried deep inside a
+// .note.gnu.property section.
+//
+// The section consists of one or more NOTE records. Each NOTE record consists
+// of zero or more type-length-value fields. We want to find a field of a
+// certain type. It seems a bit too much to just store a 32-bit value, perhaps
+// the ABI is unnecessarily complicated.
+template <class ELFT>
+static uint32_t readAndFeatures(ObjFile<ELFT> *Obj, ArrayRef<uint8_t> Data) {
+  using Elf_Nhdr = typename ELFT::Nhdr;
+  using Elf_Note = typename ELFT::Note;
+
+  while (!Data.empty()) {
+    // Read one NOTE record.
+    if (Data.size() < sizeof(Elf_Nhdr))
+      fatal(toString(Obj) + ": .note.gnu.property: section too short");
+
+    auto *Nhdr = reinterpret_cast<const Elf_Nhdr *>(Data.data());
+    if (Data.size() < Nhdr->getSize())
+      fatal(toString(Obj) + ": .note.gnu.property: section too short");
+
+    Elf_Note Note(*Nhdr);
+    if (Nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || Note.getName() != "GNU") {
+      Data = Data.slice(Nhdr->getSize());
+      continue;
+    }
+
+    // Read a body of a NOTE record, which consists of type-length-value fields.
+    ArrayRef<uint8_t> Desc = Note.getDesc();
+    while (!Desc.empty()) {
+      if (Desc.size() < 8)
+        fatal(toString(Obj) + ": .note.gnu.property: section too short");
+
+      uint32_t Type = read32le(Desc.data());
+      uint32_t Size = read32le(Desc.data() + 4);
+
+      if (Type == GNU_PROPERTY_X86_FEATURE_1_AND) {
+        // We found the field.
+        return read32le(Desc.data() + 8);
+      }
+
+      // On 64-bit, a payload may be followed by a 4-byte padding to make its
+      // size a multiple of 8.
+      if (ELFT::Is64Bits)
+        Size = alignTo(Size, 8);
+
+      Desc = Desc.slice(Size + 8); // +8 for Type and Size
+    }
+
+    // Go to next NOTE record if a note section didn't contain
+    // X86_FEATURES_1_AND description.
+    Data = Data.slice(Nhdr->getSize());
+  }
+
+  return 0;
+}
+
 template <class ELFT>
 InputSectionBase *ObjFile<ELFT>::getRelocTarget(const Elf_Shdr &Sec) {
   uint32_t Idx = Sec.sh_info;
@@ -901,6 +965,19 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(const Elf_Shdr &Sec) {
   if (Name == ".note.GNU-stack")
     return &InputSection::Discarded;
 
+  // If an object file is compatible with Intel Control-Flow Enforcement
+  // Technology (CET), it has a .note.gnu.property section containing the
+  // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
+  //
+  // Since we merge bitmaps from multiple object files to create a new
+  // .note.gnu.property containing a single AND'ed bitmap, we discard an input
+  // file's .note.gnu.property section.
+  if (Name == ".note.gnu.property") {
+    ArrayRef<uint8_t> Contents = check(this->getObj().getSectionContents(&Sec));
+    this->AndFeatures = readAndFeatures(this, Contents);
+    return &InputSection::Discarded;
+  }
+
   // Split stacks is a feature to support a discontiguous stack,
   // commonly used in the programming language Go. For the details,
   // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index e506b9eb87b66..64cd02022e214 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -228,6 +228,8 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   // R_MIPS_GPREL16 / R_MIPS_GPREL32 relocations.
   uint32_t MipsGp0 = 0;
 
+  uint32_t AndFeatures = 0;
+
   // Name of source file obtained from STT_FILE symbol value,
   // or empty string if there is no such symbol in object file
   // symbol table.
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 6db47e64cb941..d89eaea1f7c10 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -171,6 +171,10 @@ defm fini: Eq<"fini", "Specify a finalizer function">, MetaVarName<"<symbol>">;
 def fix_cortex_a53_843419: F<"fix-cortex-a53-843419">,
   HelpText<"Apply fixes for AArch64 Cortex-A53 erratum 843419">;
 
+// This option is intentionally hidden from the user as the implementation
+// is not complete.
+def require_cet: F<"require-cet">;
+
 defm format: Eq<"format", "Change the input format of the inputs following this option">,
   MetaVarName<"[default,elf,binary]">;
 
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 9606eeda86bd4..f461761090da4 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -288,6 +288,35 @@ static size_t getHashSize() {
   }
 }
 
+// This class represents a linker-synthesized .note.gnu.property section.
+//
+// In x86, object files may contain feature flags indicating the features that
+// they are using. The flags are stored in a .note.gnu.property section.
+//
+// lld reads the sections from input files and merges them by computing AND of
+// the flags. The result is written as a new .note.gnu.property section.
+//
+// If the flag is zero (which indicates that the intersection of the feature
+// sets is empty, or some input files didn't have .note.gnu.property sections),
+// we don't create this section.
+GnuPropertySection::GnuPropertySection()
+    : SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE, 4,
+                       ".note.gnu.property") {}
+
+void GnuPropertySection::writeTo(uint8_t *Buf) {
+  write32(Buf, 4);                                   // Name size
+  write32(Buf + 4, Config->Is64 ? 16 : 12);          // Content size
+  write32(Buf + 8, NT_GNU_PROPERTY_TYPE_0);          // Type
+  memcpy(Buf + 12, "GNU", 4);                        // Name string
+  write32(Buf + 16, GNU_PROPERTY_X86_FEATURE_1_AND); // Feature type
+  write32(Buf + 20, 4);                              // Feature size
+  write32(Buf + 24, Config->AndFeatures);            // Feature flags
+  if (Config->Is64)
+    write32(Buf + 28, 0); // Padding
+}
+
+size_t GnuPropertySection::getSize() const { return Config->Is64 ? 32 : 28; }
+
 BuildIdSection::BuildIdSection()
     : SyntheticSection(SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"),
       HashSize(getHashSize()) {}
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index ad37cde763040..786ca4ef642c2 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -147,6 +147,13 @@ class GnuStackSection : public SyntheticSection {
   size_t getSize() const override { return 0; }
 };
 
+class GnuPropertySection : public SyntheticSection {
+public:
+  GnuPropertySection();
+  void writeTo(uint8_t *Buf) override;
+  size_t getSize() const override;
+};
+
 // .note.gnu.build-id section.
 class BuildIdSection : public SyntheticSection {
   // First 16 bytes are a header.
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 40d32a814bbe7..2ebf8442360ab 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -433,6 +433,9 @@ template <class ELFT> static void createSyntheticSections() {
   In.Iplt = make<PltSection>(true);
   Add(In.Iplt);
 
+  if (Config->AndFeatures)
+    Add(make<GnuPropertySection>());
+
   // .note.GNU-stack is always added when we are creating a re-linkable
   // object file. Other linkers are using the presence of this marker
   // section to control the executable-ness of the stack area, but that
diff --git a/lld/test/ELF/Inputs/i386-cet1.s b/lld/test/ELF/Inputs/i386-cet1.s
new file mode 100644
index 0000000000000..62cd66edf046c
--- /dev/null
+++ b/lld/test/ELF/Inputs/i386-cet1.s
@@ -0,0 +1,16 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 12
+.long 5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 3
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/i386-cet2.s b/lld/test/ELF/Inputs/i386-cet2.s
new file mode 100644
index 0000000000000..1ccf701a253a7
--- /dev/null
+++ b/lld/test/ELF/Inputs/i386-cet2.s
@@ -0,0 +1,20 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 24
+.long 5
+.asciz "GNU"
+
+.long 0xc0000000
+.long 4
+.long 0
+
+.long 0xc0000002
+.long 4
+.long 3
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/i386-cet3.s b/lld/test/ELF/Inputs/i386-cet3.s
new file mode 100644
index 0000000000000..cdec62a194829
--- /dev/null
+++ b/lld/test/ELF/Inputs/i386-cet3.s
@@ -0,0 +1,5 @@
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/i386-cet4.s b/lld/test/ELF/Inputs/i386-cet4.s
new file mode 100644
index 0000000000000..6d07eeaff6a1e
--- /dev/null
+++ b/lld/test/ELF/Inputs/i386-cet4.s
@@ -0,0 +1,16 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 12
+.long 5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 0xfffffffd
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/x86-64-cet1.s b/lld/test/ELF/Inputs/x86-64-cet1.s
new file mode 100644
index 0000000000000..ca0a368ce3890
--- /dev/null
+++ b/lld/test/ELF/Inputs/x86-64-cet1.s
@@ -0,0 +1,17 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 16
+.long 5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 3
+.long 0
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/x86-64-cet2.s b/lld/test/ELF/Inputs/x86-64-cet2.s
new file mode 100644
index 0000000000000..39a6291e56f85
--- /dev/null
+++ b/lld/test/ELF/Inputs/x86-64-cet2.s
@@ -0,0 +1,22 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 32
+.long 5
+.asciz "GNU"
+
+.long 0xc0000000
+.long 4
+.long 0
+.long 0
+
+.long 0xc0000002
+.long 4
+.long 3
+.long 0
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/x86-64-cet3.s b/lld/test/ELF/Inputs/x86-64-cet3.s
new file mode 100644
index 0000000000000..cdec62a194829
--- /dev/null
+++ b/lld/test/ELF/Inputs/x86-64-cet3.s
@@ -0,0 +1,5 @@
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/Inputs/x86-64-cet4.s b/lld/test/ELF/Inputs/x86-64-cet4.s
new file mode 100644
index 0000000000000..85ddee299370c
--- /dev/null
+++ b/lld/test/ELF/Inputs/x86-64-cet4.s
@@ -0,0 +1,17 @@
+.section ".note.gnu.property", "a"
+.align 4
+.long 4
+.long 16
+.long 5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 0xfffffffd
+.long 0
+
+.text
+.globl func2
+.type func2,@function
+func2:
+  ret
diff --git a/lld/test/ELF/i386-cet.s b/lld/test/ELF/i386-cet.s
new file mode 100644
index 0000000000000..22a5205675836
--- /dev/null
+++ b/lld/test/ELF/i386-cet.s
@@ -0,0 +1,48 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet1.s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet2.s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet3.s -o %t3.o
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-linux %p/Inputs/i386-cet4.s -o %t4.o
+
+# RUN: ld.lld -e func1 %t.o %t1.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
+
+# RUN: ld.lld -e func1 %t.o %t2.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
+
+# CET: Properties: x86 feature: IBT, SHSTK
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t
+# RUN: llvm-readelf -S %t | FileCheck -check-prefix=NOCET %s
+
+# NOCET:     Section Headers
+# NOCET-NOT: .note.gnu.property
+
+# RUN: not ld.lld -e func1 %t.o %t3.o -o %t --require-cet 2>&1 \
+# RUN:   | FileCheck -check-prefix=ERROR %s
+# ERROR: i386-cet.s.tmp3.o: --require-cet: file is not compatible with CET
+
+# RUN: ld.lld -e func1 %t.o %t4.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=NOSHSTK -match-full-lines %s
+
+# Check .note.gnu.protery without property SHSTK.
+# NOSHSTK: Properties: x86 feature: IBT
+
+.section ".note.gnu.property", "a"
+.long 4
+.long 0x10
+.long 0x5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 3
+.long 0
+
+.text
+.globl func1
+.type func1,@function
+func1:
+  call func2
+  ret
diff --git a/lld/test/ELF/x86-64-cet.s b/lld/test/ELF/x86-64-cet.s
new file mode 100644
index 0000000000000..3fb3716ed4e92
--- /dev/null
+++ b/lld/test/ELF/x86-64-cet.s
@@ -0,0 +1,48 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet1.s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet2.s -o %t2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet3.s -o %t3.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/x86-64-cet4.s -o %t4.o
+
+# RUN: ld.lld -e func1 %t.o %t1.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
+
+# RUN: ld.lld -e func1 %t.o %t2.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=CET -match-full-lines %s
+
+# CET: Properties: x86 feature: IBT, SHSTK
+
+# RUN: ld.lld -e func1 %t.o %t3.o -o %t
+# RUN: llvm-readelf -S %t | FileCheck -check-prefix=NOCET %s
+
+# NOCET:     Section Headers
+# NOCET-NOT: .note.gnu.property
+
+# RUN: not ld.lld -e func1 %t.o %t3.o -o %t --require-cet 2>&1 \
+# RUN:   | FileCheck -check-prefix=ERROR %s
+# ERROR: x86-64-cet.s.tmp3.o: --require-cet: file is not compatible with CET
+
+# RUN: ld.lld -e func1 %t.o %t4.o -o %t
+# RUN: llvm-readelf -n %t | FileCheck -check-prefix=NOSHSTK -match-full-lines %s
+
+# Check .note.gnu.protery without property SHSTK.
+# NOSHSTK: Properties: x86 feature: IBT
+
+.section ".note.gnu.property", "a"
+.long 4
+.long 0x10
+.long 0x5
+.asciz "GNU"
+
+.long 0xc0000002
+.long 4
+.long 3
+.long 0
+
+.text
+.globl func1
+.type func1,@function
+func1:
+  call func2
+  ret
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 45bbd6c4dea53..5552208b1f8a7 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -592,9 +592,9 @@ class Elf_Note_Impl {
 
   template <class NoteIteratorELFT> friend class Elf_Note_Iterator_Impl;
 
+public:
   Elf_Note_Impl(const Elf_Nhdr_Impl<ELFT> &Nhdr) : Nhdr(Nhdr) {}
 
-public:
   /// Get the note's name, excluding the terminating null byte.
   StringRef getName() const {
     if (!Nhdr.n_namesz)

From 516e6cc1dd946de52288159d24a87df0c3dbdebb Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Wed, 5 Jun 2019 03:17:11 +0000
Subject: [PATCH 1095/1176] [Clang] Disable new PM for tests that use
 optimization level -O1, -O2 and -O3

Tests that use -O1, -O2 and -O3 would often produce different results
with the new pass manager which makes these tests fail. Disable new PM
explicitly for these tests.

Differential Revision: https://reviews.llvm.org/D58375

llvm-svn: 362580
---
 clang/test/CodeGen/callback_annotated.c       |  6 ++---
 clang/test/CodeGen/cfi-icall-cross-dso.c      |  8 +++----
 clang/test/CodeGen/complex-math.c             | 16 ++++++-------
 clang/test/CodeGen/dllimport.c                |  4 ++--
 clang/test/CodeGen/inline2.c                  |  4 ++--
 clang/test/CodeGen/lifetime.c                 |  6 ++---
 .../sanitize-address-field-padding.cpp        |  2 +-
 clang/test/CodeGen/tbaa-for-vptr.cpp          | 10 ++++----
 clang/test/CodeGenCXX/atomicinit.cpp          |  2 +-
 .../CodeGenCXX/cfi-speculative-vtable.cpp     |  4 ++--
 .../CodeGenCXX/debug-info-class-optzns.cpp    |  2 +-
 clang/test/CodeGenCXX/dllimport-members.cpp   |  4 ++--
 clang/test/CodeGenCXX/dllimport.cpp           |  2 +-
 .../test/CodeGenCXX/dso-local-executable.cpp  |  6 ++---
 clang/test/CodeGenCXX/init-invariant.cpp      |  2 +-
 clang/test/CodeGenCXX/merge-functions.cpp     |  2 +-
 clang/test/CodeGenCXX/nrvo.cpp                |  6 ++---
 .../sanitize-dtor-nontrivial-virtual-base.cpp |  2 +-
 .../visibility-hidden-extern-templates.cpp    |  2 +-
 clang/test/CodeGenObjCXX/nrvo.mm              |  2 +-
 clang/test/Driver/asan.c                      | 24 +++++++++----------
 clang/test/Driver/msan.c                      | 24 +++++++++----------
 clang/test/Driver/tsan.c                      | 10 ++++----
 23 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/clang/test/CodeGen/callback_annotated.c b/clang/test/CodeGen/callback_annotated.c
index feacda2754153..c5b431d5ef845 100644
--- a/clang/test/CodeGen/callback_annotated.c
+++ b/clang/test/CodeGen/callback_annotated.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN1
-// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN2
-// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN1
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN2
+// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s
 
 // RUN1-DAG: @broker0({{[^#]*#[0-9]+}} !callback ![[cid0:[0-9]+]]
 __attribute__((callback(1, 2))) void *broker0(void *(*callee)(void *), void *payload) {
diff --git a/clang/test/CodeGen/cfi-icall-cross-dso.c b/clang/test/CodeGen/cfi-icall-cross-dso.c
index 1df20aa1c8a38..67901c4f7d624 100644
--- a/clang/test/CodeGen/cfi-icall-cross-dso.c
+++ b/clang/test/CodeGen/cfi-icall-cross-dso.c
@@ -1,25 +1,25 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 \
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fno-experimental-new-pass-manager \
 // RUN:   -fsanitize=cfi-icall -fsanitize-cfi-cross-dso \
 // RUN:   -emit-llvm -o - %s | FileCheck \
 // RUN:       --check-prefix=CHECK --check-prefix=CHECK-DIAG \
 // RUN:       --check-prefix=ITANIUM --check-prefix=ITANIUM-DIAG \
 // RUN:       %s
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 \
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fno-experimental-new-pass-manager \
 // RUN:   -fsanitize=cfi-icall -fsanitize-cfi-cross-dso -fsanitize-trap=cfi-icall \
 // RUN:   -emit-llvm -o - %s | FileCheck \
 // RUN:       --check-prefix=CHECK \
 // RUN:       --check-prefix=ITANIUM --check-prefix=ITANIUM-TRAP \
 // RUN:       %s
 
-// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 \
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 -fno-experimental-new-pass-manager \
 // RUN:   -fsanitize=cfi-icall -fsanitize-cfi-cross-dso \
 // RUN:   -emit-llvm -o - %s | FileCheck \
 // RUN:       --check-prefix=CHECK --check-prefix=CHECK-DIAG \
 // RUN:       --check-prefix=MS --check-prefix=MS-DIAG \
 // RUN:       %s
 
-// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 \
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 -fno-experimental-new-pass-manager \
 // RUN:   -fsanitize=cfi-icall -fsanitize-cfi-cross-dso -fsanitize-trap=cfi-icall \
 // RUN:   -emit-llvm -o - %s | FileCheck \
 // RUN:       --check-prefix=CHECK \
diff --git a/clang/test/CodeGen/complex-math.c b/clang/test/CodeGen/complex-math.c
index e28941f838373..a5805b61d94e8 100644
--- a/clang/test/CodeGen/complex-math.c
+++ b/clang/test/CodeGen/complex-math.c
@@ -1,11 +1,11 @@
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s --check-prefix=PPC
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
-// RUN: %clang_cc1 %s -O1 -emit-llvm -triple aarch64-unknown-unknown -ffast-math -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple x86_64-pc-win64 -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s --check-prefix=PPC
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple armv7-none-linux-gnueabi -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARMHF
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
+// RUN: %clang_cc1 %s -O1 -fno-experimental-new-pass-manager -emit-llvm -triple aarch64-unknown-unknown -ffast-math -o - | FileCheck %s --check-prefix=AARCH64-FASTMATH
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
diff --git a/clang/test/CodeGen/dllimport.c b/clang/test/CodeGen/dllimport.c
index c3fd0770ee837..319770232a289 100644
--- a/clang/test/CodeGen/dllimport.c
+++ b/clang/test/CodeGen/dllimport.c
@@ -2,8 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s
 // RUN: %clang_cc1 -triple i686-windows-gnu    -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s
 // RUN: %clang_cc1 -triple x86_64-windows-gnu  -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s
-// RUN: %clang_cc1 -triple i686-windows-msvc   -fms-extensions -emit-llvm -std=c11 -O1 -o - %s | FileCheck --check-prefix=O1 --check-prefix=MO1 %s
-// RUN: %clang_cc1 -triple i686-windows-gnu    -fms-extensions -emit-llvm -std=c11 -O1 -o - %s | FileCheck --check-prefix=O1 --check-prefix=GO1 %s
+// RUN: %clang_cc1 -triple i686-windows-msvc   -fms-extensions -emit-llvm -std=c11 -O1 -fno-experimental-new-pass-manager -o - %s | FileCheck --check-prefix=O1 --check-prefix=MO1 %s
+// RUN: %clang_cc1 -triple i686-windows-gnu    -fms-extensions -emit-llvm -std=c11 -O1 -fno-experimental-new-pass-manager -o - %s | FileCheck --check-prefix=O1 --check-prefix=GO1 %s
 
 #define JOIN2(x, y) x##y
 #define JOIN(x, y) JOIN2(x, y)
diff --git a/clang/test/CodeGen/inline2.c b/clang/test/CodeGen/inline2.c
index 84cd4db027749..0f168e5066142 100644
--- a/clang/test/CodeGen/inline2.c
+++ b/clang/test/CodeGen/inline2.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -O1 -std=gnu89 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-GNU89 %s
-// RUN: %clang_cc1 -O1 -std=c99 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-C99 %s
+// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -std=gnu89 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-GNU89 %s
+// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -std=c99 -triple i386-apple-darwin9 -emit-llvm %s -o - | FileCheck -check-prefix CHECK-C99 %s
 
 // CHECK-GNU89-LABEL: define i32 @f0()
 // CHECK-C99-LABEL: define i32 @f0()
diff --git a/clang/test/CodeGen/lifetime.c b/clang/test/CodeGen/lifetime.c
index 2203840d4b806..446974ff430c6 100644
--- a/clang/test/CodeGen/lifetime.c
+++ b/clang/test/CodeGen/lifetime.c
@@ -1,7 +1,7 @@
 // RUN: %clang -S -emit-llvm -o - -O0 %s | FileCheck %s -check-prefix=O0
-// RUN: %clang -S -emit-llvm -o - -O1 %s | FileCheck %s -check-prefix=O1
-// RUN: %clang -S -emit-llvm -o - -O2 %s | FileCheck %s -check-prefix=O2
-// RUN: %clang -S -emit-llvm -o - -O3 %s | FileCheck %s -check-prefix=O3
+// RUN: %clang -S -emit-llvm -o - -O1 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O1
+// RUN: %clang -S -emit-llvm -o - -O2 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O2
+// RUN: %clang -S -emit-llvm -o - -O3 -fno-experimental-new-pass-manager %s | FileCheck %s -check-prefix=O3
 
 extern void use(char *a);
 
diff --git a/clang/test/CodeGen/sanitize-address-field-padding.cpp b/clang/test/CodeGen/sanitize-address-field-padding.cpp
index 045a4342a1ebd..c20b24769add2 100644
--- a/clang/test/CodeGen/sanitize-address-field-padding.cpp
+++ b/clang/test/CodeGen/sanitize-address-field-padding.cpp
@@ -2,7 +2,7 @@
 // RUN: echo 'type:SomeNamespace::BlacklistedByName=field-padding' > %t.type.blacklist
 // RUN: echo 'src:*sanitize-address-field-padding.cpp=field-padding' > %t.file.blacklist
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.type.blacklist -Rsanitize-address -emit-llvm -o - %s 2>&1 | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.type.blacklist -Rsanitize-address -emit-llvm -o - %s -O1 -mconstructor-aliases 2>&1 | FileCheck %s --check-prefix=WITH_CTOR_ALIASES
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.type.blacklist -Rsanitize-address -emit-llvm -o - %s -O1 -fno-experimental-new-pass-manager -mconstructor-aliases 2>&1 | FileCheck %s --check-prefix=WITH_CTOR_ALIASES
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.file.blacklist -Rsanitize-address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=FILE_BLACKLIST
 // RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=NO_PADDING
 // Try to emulate -save-temps option and make sure -disable-llvm-passes will not run sanitize instrumentation.
diff --git a/clang/test/CodeGen/tbaa-for-vptr.cpp b/clang/test/CodeGen/tbaa-for-vptr.cpp
index 1139749f0fad4..663f502de5983 100644
--- a/clang/test/CodeGen/tbaa-for-vptr.cpp
+++ b/clang/test/CodeGen/tbaa-for-vptr.cpp
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,OLD-PATH
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,OLD-PATH
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O1  -relaxed-aliasing -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,OLD-PATH
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O1 -fno-experimental-new-pass-manager %s | FileCheck %s --check-prefixes=CHECK,OLD-PATH
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O1 -fno-experimental-new-pass-manager -relaxed-aliasing -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,OLD-PATH
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -new-struct-path-tbaa -o - -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,NEW-PATH
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -new-struct-path-tbaa -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,NEW-PATH
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -new-struct-path-tbaa -o - -O1  -relaxed-aliasing -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,NEW-PATH
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -new-struct-path-tbaa -o - -O1 -fno-experimental-new-pass-manager %s | FileCheck %s --check-prefixes=CHECK,NEW-PATH
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -new-struct-path-tbaa -o - -O1 -fno-experimental-new-pass-manager -relaxed-aliasing -fsanitize=thread %s | FileCheck %s --check-prefixes=CHECK,NEW-PATH
 //
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s --check-prefix=NOTBAA
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O2  -relaxed-aliasing %s | FileCheck %s --check-prefix=NOTBAA
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - -O2 -fno-experimental-new-pass-manager -relaxed-aliasing %s | FileCheck %s --check-prefix=NOTBAA
 //
 // Check that we generate TBAA for vtable pointer loads and stores.
 // When -fsanitize=thread is used TBAA should be generated at all opt levels
diff --git a/clang/test/CodeGenCXX/atomicinit.cpp b/clang/test/CodeGenCXX/atomicinit.cpp
index 4c30ec35212f9..85ec74593fe04 100644
--- a/clang/test/CodeGenCXX/atomicinit.cpp
+++ b/clang/test/CodeGenCXX/atomicinit.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -O1 -o - -triple=i686-apple-darwin9 -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -O1 -fno-experimental-new-pass-manager -o - -triple=i686-apple-darwin9 -std=c++11 | FileCheck %s
 
 // CHECK-DAG: @PR22043 = local_unnamed_addr global i32 0, align 4
 typedef _Atomic(int) AtomicInt;
diff --git a/clang/test/CodeGenCXX/cfi-speculative-vtable.cpp b/clang/test/CodeGenCXX/cfi-speculative-vtable.cpp
index 490190c4afd75..06e6df86d358c 100644
--- a/clang/test/CodeGenCXX/cfi-speculative-vtable.cpp
+++ b/clang/test/CodeGenCXX/cfi-speculative-vtable.cpp
@@ -1,7 +1,7 @@
 // Test that we don't emit a bit set entry for a speculative (available_externally) vtable.
 // This does not happen in the Microsoft ABI.
-// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck  %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck  %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fno-experimental-new-pass-manager -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -emit-llvm -o - %s | FileCheck  %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fno-experimental-new-pass-manager -fsanitize=cfi-vcall -fsanitize-trap=cfi-vcall -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck  %s
 
 class A {
  public:
diff --git a/clang/test/CodeGenCXX/debug-info-class-optzns.cpp b/clang/test/CodeGenCXX/debug-info-class-optzns.cpp
index d58510b7155a2..3fca31bc03b33 100644
--- a/clang/test/CodeGenCXX/debug-info-class-optzns.cpp
+++ b/clang/test/CodeGenCXX/debug-info-class-optzns.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown_unknown -emit-llvm -debug-info-kind=limited %s -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown_unknown -emit-llvm -debug-info-kind=limited %s -O1 -fno-experimental-new-pass-manager -o - | FileCheck %s
 
 // Ensure class definitions are not emitted to debug info just because the
 // vtable is emitted for optimization purposes (as available_externally). The
diff --git a/clang/test/CodeGenCXX/dllimport-members.cpp b/clang/test/CodeGenCXX/dllimport-members.cpp
index f8034cfb47c10..1c8fabfaa12fc 100644
--- a/clang/test/CodeGenCXX/dllimport-members.cpp
+++ b/clang/test/CodeGenCXX/dllimport-members.cpp
@@ -2,8 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-compatibility -emit-llvm -std=c++1y -O0 -o - %s -DMSABI | FileCheck --check-prefix=MSC --check-prefix=M64 %s
 // RUN: %clang_cc1 -triple i686-windows-gnu                       -emit-llvm -std=c++1y -O0 -o - %s         | FileCheck --check-prefix=GNU --check-prefix=G32 %s
 // RUN: %clang_cc1 -triple x86_64-windows-gnu                     -emit-llvm -std=c++1y -O0 -o - %s         | FileCheck --check-prefix=GNU --check-prefix=G64 %s
-// RUN: %clang_cc1 -triple i686-windows-msvc -fms-compatibility   -emit-llvm -std=c++1y -O1 -o - %s -DMSABI | FileCheck --check-prefix=MO1 %s
-// RUN: %clang_cc1 -triple i686-windows-gnu                       -emit-llvm -std=c++1y -O1 -o - %s         | FileCheck --check-prefix=GO1 %s
+// RUN: %clang_cc1 -triple i686-windows-msvc -fms-compatibility   -emit-llvm -std=c++1y -O1 -fno-experimental-new-pass-manager -o - %s -DMSABI | FileCheck --check-prefix=MO1 %s
+// RUN: %clang_cc1 -triple i686-windows-gnu                       -emit-llvm -std=c++1y -O1 -fno-experimental-new-pass-manager -o - %s         | FileCheck --check-prefix=GO1 %s
 
 // Helper structs to make templates more expressive.
 struct ImplicitInst_Imported {};
diff --git a/clang/test/CodeGenCXX/dllimport.cpp b/clang/test/CodeGenCXX/dllimport.cpp
index e9f0e4795f798..2f4a1b956bf32 100644
--- a/clang/test/CodeGenCXX/dllimport.cpp
+++ b/clang/test/CodeGenCXX/dllimport.cpp
@@ -4,7 +4,7 @@
 // RUN: %clang_cc1 -triple x86_64-windows-gnu  -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s         -w | FileCheck --check-prefix=GNU --check-prefix=G64 %s
 // RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=18.00 -emit-llvm -std=c++1y -O1 -disable-llvm-passes -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M18 %s
 // RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -std=c++1y -O1 -disable-llvm-passes -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M19 %s
-// RUN: %clang_cc1 -triple i686-windows-gnu    -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O1 -o - %s         -w | FileCheck --check-prefix=GO1 %s
+// RUN: %clang_cc1 -triple i686-windows-gnu    -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O1 -fno-experimental-new-pass-manager -o - %s         -w | FileCheck --check-prefix=GO1 %s
 
 // CHECK-NOT doesn't play nice with CHECK-DAG, so use separate run lines.
 // RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -DMSABI -w | FileCheck --check-prefix=MSC2 %s
diff --git a/clang/test/CodeGenCXX/dso-local-executable.cpp b/clang/test/CodeGenCXX/dso-local-executable.cpp
index ceb649e074a30..a7a6bef951bd3 100644
--- a/clang/test/CodeGenCXX/dso-local-executable.cpp
+++ b/clang/test/CodeGenCXX/dso-local-executable.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -O1 -emit-llvm %s -o - | FileCheck --check-prefix=STATIC %s
-// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -fno-plt -O1 -emit-llvm %s -o - | FileCheck --check-prefix=NOPLT %s
-// RUN: %clang_cc1 -triple x86_64-w64-mingw32 -O1 -emit-llvm %s -o - | FileCheck --check-prefix=MINGW %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -O1 -fno-experimental-new-pass-manager -emit-llvm %s -o - | FileCheck --check-prefix=STATIC %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -fno-plt -O1 -fno-experimental-new-pass-manager -emit-llvm %s -o - | FileCheck --check-prefix=NOPLT %s
+// RUN: %clang_cc1 -triple x86_64-w64-mingw32 -O1 -fno-experimental-new-pass-manager -emit-llvm %s -o - | FileCheck --check-prefix=MINGW %s
 
 // STATIC-DAG: @_ZTV1C = linkonce_odr dso_local unnamed_addr constant
 // STATIC-DAG: @_ZTS1C = linkonce_odr dso_local constant
diff --git a/clang/test/CodeGenCXX/init-invariant.cpp b/clang/test/CodeGenCXX/init-invariant.cpp
index 815287c8e1167..82a6649222436 100644
--- a/clang/test/CodeGenCXX/init-invariant.cpp
+++ b/clang/test/CodeGenCXX/init-invariant.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple i686-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-O0
-// RUN: %clang_cc1 -triple i686-linux-gnu -emit-llvm %s -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i686-linux-gnu -emit-llvm %s -O1 -fno-experimental-new-pass-manager -o - | FileCheck %s
 
 // Check that we add an llvm.invariant.start.p0i8 to mark when a global becomes
 // read-only. If globalopt can fold the initializer, it will then mark the
diff --git a/clang/test/CodeGenCXX/merge-functions.cpp b/clang/test/CodeGenCXX/merge-functions.cpp
index 20a286e022038..db742f41d3c89 100644
--- a/clang/test/CodeGenCXX/merge-functions.cpp
+++ b/clang/test/CodeGenCXX/merge-functions.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -O1 -fmerge-functions -emit-llvm -o - -x c++ < %s | FileCheck %s -implicit-check-not=_ZN1A1gEiPi
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -O1 -fno-experimental-new-pass-manager -fmerge-functions -emit-llvm -o - -x c++ < %s | FileCheck %s -implicit-check-not=_ZN1A1gEiPi
 
 // Basic functionality test. Function merging doesn't kick in on functions that
 // are too simple.
diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp
index 221857402988a..aab26890ea988 100644
--- a/clang/test/CodeGenCXX/nrvo.cpp
+++ b/clang/test/CodeGenCXX/nrvo.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fcxx-exceptions -fexceptions -std=c++03 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-03 %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fcxx-exceptions -fexceptions -std=c++11 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-11 %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++03 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-03 %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -O1 -fno-experimental-new-pass-manager -fcxx-exceptions -fexceptions -std=c++11 -o - %s | FileCheck --check-prefixes=CHECK-EH,CHECK-EH-11 %s
 
 // Test code generation for the named return value optimization.
 class X {
diff --git a/clang/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp b/clang/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
index 27eb64b553475..8c5cc631becc1 100644
--- a/clang/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
+++ b/clang/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsanitize=memory -O0 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fsanitize=memory -O1 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=memory -O1 -fno-experimental-new-pass-manager -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
 
 template <class T>
 class Vector {
diff --git a/clang/test/CodeGenCXX/visibility-hidden-extern-templates.cpp b/clang/test/CodeGenCXX/visibility-hidden-extern-templates.cpp
index 95e8e089cc12b..c84c7fbc97b26 100644
--- a/clang/test/CodeGenCXX/visibility-hidden-extern-templates.cpp
+++ b/clang/test/CodeGenCXX/visibility-hidden-extern-templates.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -triple %itanium_abi_triple -emit-llvm -o - -fvisibility hidden %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fno-experimental-new-pass-manager -triple %itanium_abi_triple -emit-llvm -o - -fvisibility hidden %s | FileCheck %s
 
 template<typename T>
 struct X {
diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm
index 0a7dff50c5a0e..1ad5f79ad12ea 100644
--- a/clang/test/CodeGenObjCXX/nrvo.mm
+++ b/clang/test/CodeGenObjCXX/nrvo.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -triple x86_64-apple-darwin10.0.0 -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -fblocks %s -O1 -fno-experimental-new-pass-manager -triple x86_64-apple-darwin10.0.0 -fobjc-runtime=macosx-fragile-10.5 | FileCheck %s
 
 // PR10835 / <rdar://problem/10050178>
 struct X {
diff --git a/clang/test/Driver/asan.c b/clang/test/Driver/asan.c
index db1c042a4f708..51db6e24968b5 100644
--- a/clang/test/Driver/asan.c
+++ b/clang/test/Driver/asan.c
@@ -1,19 +1,19 @@
 // RUN: %clang     -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
-// RUN: %clang -O1 -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
-// RUN: %clang -O2 -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
-// RUN: %clang -O3 -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-ASAN
 // RUN: %clang     -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
-// RUN: %clang -O1 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
-// RUN: %clang -O2 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
-// RUN: %clang -O3 -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target i386-unknown-linux -fsanitize=kernel-address %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KASAN
 // RUN: %clang     -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
-// RUN: %clang -O1 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
-// RUN: %clang -O2 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
-// RUN: %clang -O3 -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-HWASAN
 // RUN: %clang     -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
-// RUN: %clang -O1 -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
-// RUN: %clang -O2 -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
-// RUN: %clang -O3 -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target aarch64-unknown-linux -fsanitize=kernel-hwaddress %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KHWASAN
 // Verify that -fsanitize={address,hwaddres,kernel-address,kernel-hwaddress} invokes ASan, HWAsan, KASan or KHWASan instrumentation.
 
 int foo(int *a) { return *a; }
diff --git a/clang/test/Driver/msan.c b/clang/test/Driver/msan.c
index 18ef2a96cacb2..dcbace819b7ea 100644
--- a/clang/test/Driver/msan.c
+++ b/clang/test/Driver/msan.c
@@ -1,14 +1,14 @@
 // REQUIRES: x86-registered-target
 
-// RUN: %clang     -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O1 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O2 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O3 -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang     -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
 
-// RUN: %clang     -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
-// RUN: %clang -O1 -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
-// RUN: %clang -O2 -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
-// RUN: %clang -O3 -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
+// RUN: %clang     -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=kernel-memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-KMSAN
 
 // RUN: %clang -target mips64-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
 // RUN: %clang -target mips64el-unknown-linux-gnu -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
@@ -19,10 +19,10 @@
 
 // Also check that this works with the new pass manager with and without
 // optimization
-// RUN: %clang     -target x86_64-unknown-linux -fexperimental-new-pass-manager -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O1 -target x86_64-unknown-linux -fexperimental-new-pass-manager -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O2 -target x86_64-unknown-linux -fexperimental-new-pass-manager -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
-// RUN: %clang -O3 -target x86_64-unknown-linux -fexperimental-new-pass-manager -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang     -fexperimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O1 -fexperimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O2 -fexperimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
+// RUN: %clang -O3 -fexperimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=memory %s -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-MSAN
 
 int foo(int *a) { return *a; }
 // CHECK-MSAN: __msan_init
diff --git a/clang/test/Driver/tsan.c b/clang/test/Driver/tsan.c
index ea88f87450fae..7fca92fec457c 100644
--- a/clang/test/Driver/tsan.c
+++ b/clang/test/Driver/tsan.c
@@ -1,10 +1,10 @@
 // REQUIRES: x86-registered-target
 
-// RUN: %clang     -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang -O1 -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang -O2 -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang -O3 -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang     -target x86_64-unknown-linux -fsanitize=thread  %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang     -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O1 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O2 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang -O3 -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang     -fno-experimental-new-pass-manager -target x86_64-unknown-linux -fsanitize=thread  %s -S -emit-llvm -o - | FileCheck %s
 // Verify that -fsanitize=thread invokes tsan instrumentation.
 
 // Also check that this works with the new pass manager with and without

From c0eb8a982538c2c5db234ebc1c30f4c9b40587de Mon Sep 17 00:00:00 2001
From: Kristina Brooks <notstina@gmail.com>
Date: Wed, 5 Jun 2019 03:47:02 +0000
Subject: [PATCH 1096/1176] Add __FILE_NAME__ to ReleaseNotes. NFC

Added it under C language changes as a nonstandard
extension for the time being.

llvm-svn: 362581
---
 clang/docs/ReleaseNotes.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 29c900d523207..e13bec1fa2df0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -101,9 +101,11 @@ Windows Support
 C Language Changes in Clang
 ---------------------------
 
-- ...
+- ``__FILE_NAME__`` macro has been added as a Clang specific extension supported
+  in all C-family languages. This macro is similar to ``__FILE__`` except it
+  will always provide the last path component when possible.
 
-...
+- ...
 
 C11 Feature Support
 ^^^^^^^^^^^^^^^^^^^

From fa449a9bb27abdc43a4dbd17dc84d920f5f408d8 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Wed, 5 Jun 2019 04:18:12 +0000
Subject: [PATCH 1097/1176] Suppress false-positive GCC -Wreturn-type warning.

llvm-svn: 362582
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 56c32235fd384..d82667be933b5 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -104,6 +104,7 @@ static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
   case AbstractAttribute::MP_RETURNED:
     return AttributeList::ReturnIndex;
   }
+  llvm_unreachable("Unknown manifest position!");
 }
 
 /// Return true if \p New is equal or worse than \p Old.

From a3e16719c46aff109a6902c32787c06831c0e61d Mon Sep 17 00:00:00 2001
From: Yevgeny Rouban <yevgeny.rouban@azul.com>
Date: Wed, 5 Jun 2019 05:46:40 +0000
Subject: [PATCH 1098/1176] Resubmit "[CorrelatedValuePropagation] Fix prof
 branch_weights metadata handling for SwitchInst"

This reverts commit 5b32f60ec31ce136edac6f693538aeb6039f4ad0.
The fix is in commit 4f9e68148bd0dada2d6997625432385918ac2e2c.

This patch fixes the CorrelatedValuePropagation pass to keep
prof branch_weights metadata of SwitchInst consistent.
It makes use of SwitchInstProfUpdateWrapper.
New tests are added.

Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D62126

llvm-svn: 362583
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 117 ++++++++---------
 .../CorrelatedValuePropagation/profmd.ll      | 119 ++++++++++++++++++
 2 files changed, 180 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 5bfdf17c9a03f..17a8972c6fe53 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -308,11 +308,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
 /// that cannot fire no matter what the incoming edge can safely be removed. If
 /// a case fires on every incoming edge then the entire switch can be removed
 /// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
                           DominatorTree *DT) {
   DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
-  Value *Cond = SI->getCondition();
-  BasicBlock *BB = SI->getParent();
+  Value *Cond = I->getCondition();
+  BasicBlock *BB = I->getParent();
 
   // If the condition was defined in same block as the switch then LazyValueInfo
   // currently won't say anything useful about it, though in theory it could.
@@ -329,67 +329,72 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
   for (auto *Succ : successors(BB))
     SuccessorsCount[Succ]++;
 
-  for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
-    ConstantInt *Case = CI->getCaseValue();
-
-    // Check to see if the switch condition is equal to/not equal to the case
-    // value on every incoming edge, equal/not equal being the same each time.
-    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
-    for (pred_iterator PI = PB; PI != PE; ++PI) {
-      // Is the switch condition equal to the case value?
-      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
-                                                              Cond, Case, *PI,
-                                                              BB, SI);
-      // Give up on this case if nothing is known.
-      if (Value == LazyValueInfo::Unknown) {
-        State = LazyValueInfo::Unknown;
-        break;
+  { // Scope for SwitchInstProfUpdateWrapper. It must not live during
+    // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
+    SwitchInstProfUpdateWrapper SI(*I);
+
+    for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+      ConstantInt *Case = CI->getCaseValue();
+
+      // Check to see if the switch condition is equal to/not equal to the case
+      // value on every incoming edge, equal/not equal being the same each time.
+      LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        // Is the switch condition equal to the case value?
+        LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                                Cond, Case, *PI,
+                                                                BB, SI);
+        // Give up on this case if nothing is known.
+        if (Value == LazyValueInfo::Unknown) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
+
+        // If this was the first edge to be visited, record that all other edges
+        // need to give the same result.
+        if (PI == PB) {
+          State = Value;
+          continue;
+        }
+
+        // If this case is known to fire for some edges and known not to fire for
+        // others then there is nothing we can do - give up.
+        if (Value != State) {
+          State = LazyValueInfo::Unknown;
+          break;
+        }
       }
 
-      // If this was the first edge to be visited, record that all other edges
-      // need to give the same result.
-      if (PI == PB) {
-        State = Value;
+      if (State == LazyValueInfo::False) {
+        // This case never fires - remove it.
+        BasicBlock *Succ = CI->getCaseSuccessor();
+        Succ->removePredecessor(BB);
+        CI = SI.removeCase(CI);
+        CE = SI->case_end();
+
+        // The condition can be modified by removePredecessor's PHI simplification
+        // logic.
+        Cond = SI->getCondition();
+
+        ++NumDeadCases;
+        Changed = true;
+        if (--SuccessorsCount[Succ] == 0)
+          DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
         continue;
       }
-
-      // If this case is known to fire for some edges and known not to fire for
-      // others then there is nothing we can do - give up.
-      if (Value != State) {
-        State = LazyValueInfo::Unknown;
+      if (State == LazyValueInfo::True) {
+        // This case always fires.  Arrange for the switch to be turned into an
+        // unconditional branch by replacing the switch condition with the case
+        // value.
+        SI->setCondition(Case);
+        NumDeadCases += SI->getNumCases();
+        Changed = true;
         break;
       }
-    }
 
-    if (State == LazyValueInfo::False) {
-      // This case never fires - remove it.
-      BasicBlock *Succ = CI->getCaseSuccessor();
-      Succ->removePredecessor(BB);
-      CI = SI->removeCase(CI);
-      CE = SI->case_end();
-
-      // The condition can be modified by removePredecessor's PHI simplification
-      // logic.
-      Cond = SI->getCondition();
-
-      ++NumDeadCases;
-      Changed = true;
-      if (--SuccessorsCount[Succ] == 0)
-        DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
-      continue;
+      // Increment the case iterator since we didn't delete it.
+      ++CI;
     }
-    if (State == LazyValueInfo::True) {
-      // This case always fires.  Arrange for the switch to be turned into an
-      // unconditional branch by replacing the switch condition with the case
-      // value.
-      SI->setCondition(Case);
-      NumDeadCases += SI->getNumCases();
-      Changed = true;
-      break;
-    }
-
-    // Increment the case iterator since we didn't delete it.
-    ++CI;
   }
 
   if (Changed)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll b/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
new file mode 100644
index 0000000000000..493b4c2273e21
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/profmd.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; Removed several cases from switch.
+define i32 @switch1(i32 %s) {
+; CHECK-LABEL: @switch1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+; CHECK:       negative:
+; CHECK-NEXT:    switch i32 [[S]], label [[OUT]] [
+; CHECK-NEXT:    i32 -2, label [[NEXT:%.*]]
+; CHECK-NEXT:    i32 -1, label [[NEXT]]
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 1, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  i32 2, label %out
+  i32 3, label %out
+; CHECK-NEXT: !prof ![[MD0:[0-9]+]]
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6}
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+; Removed all cases from switch.
+define i32 @switch2(i32 %s) {
+; CHECK-LABEL: @switch2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[POSITIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 -1, label %next
+  i32 -2, label %next
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+; Change switch into conditional branch.
+define i32 @switch3(i32 %s) {
+; CHECK-LABEL: @switch3(
+;
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+; CHECK:      positive:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 %s, 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEXT:%.*]], label [[OUT:%.*]], !prof ![[MD1:[0-9]+]]
+  switch i32 %s, label %out [
+  i32 1, label %next
+  i32 -1, label %next
+  i32 -2, label %next
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3}
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+; Removed all cases from switch.
+define i32 @switch4(i32 %s) {
+; CHECK-LABEL: @switch4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[S:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEGATIVE:%.*]], label [[OUT:%.*]]
+;
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+; CHECK:       negative:
+; CHECK-NEXT:    br label %out
+  switch i32 %s, label %out [
+  i32 0, label %out
+  i32 1, label %out
+  i32 2, label %out
+  i32 3, label %out
+  ], !prof !{!"branch_weights", i32 99, i32 1, i32 2, i32 3, i32 4}
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+}
+
+; CHECK: ![[MD0]] = !{!"branch_weights", i32 99, i32 4, i32 3}
+; CHECK: ![[MD1]] = !{!"branch_weights", i32 1, i32 99}

From 4cd07dbeec9811a98efbc0ed3518e78dc38c0f53 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Wed, 5 Jun 2019 06:35:10 +0000
Subject: [PATCH 1099/1176] Reduce memory consumption of coverage dumps

Avoiding an intermediate join operation removes the need for an
intermediate buffer that may be quite large, as showcased by

        https://bugs.llvm.org/show_bug.cgi?id=41965

Differential Revision: https://reviews.llvm.org/D62623

llvm-svn: 362584
---
 clang/lib/CodeGen/CoverageMappingGen.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index ad014b5a17ee1..d900c7b238378 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -1388,10 +1388,19 @@ void CoverageMappingModuleGen::emit() {
   std::string FilenamesAndCoverageMappings;
   llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
   CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
-  std::string RawCoverageMappings =
-      llvm::join(CoverageMappings.begin(), CoverageMappings.end(), "");
-  OS << RawCoverageMappings;
-  size_t CoverageMappingSize = RawCoverageMappings.size();
+
+  // Stream the content of CoverageMappings to OS while keeping
+  // memory consumption under control.
+  size_t CoverageMappingSize = 0;
+  for (auto &S : CoverageMappings) {
+    CoverageMappingSize += S.size();
+    OS << S;
+    S.clear();
+    S.shrink_to_fit();
+  }
+  CoverageMappings.clear();
+  CoverageMappings.shrink_to_fit();
+
   size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
   // Append extra zeroes if necessary to ensure that the size of the filenames
   // and coverage mappings is a multiple of 8.

From 9d8d0f68fbc32f88dd2eab8a47635834d3f4020a Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Wed, 5 Jun 2019 06:58:41 +0000
Subject: [PATCH 1100/1176] [CMake][Fuchsia] Use libc++ ABIv2 for the first
 stage build

This also unifies flags between macOS and Linux builds.

llvm-svn: 362585
---
 clang/cmake/caches/Fuchsia.cmake | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index 6b4fefcb04804..63bd62d1e93de 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -29,20 +29,21 @@ if(APPLE)
   set(COMPILER_RT_ENABLE_IOS OFF CACHE BOOL "")
   set(COMPILER_RT_ENABLE_TVOS OFF CACHE BOOL "")
   set(COMPILER_RT_ENABLE_WATCHOS OFF CACHE BOOL "")
-elseif(UNIX)
-  set(LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "")
-  set(LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "")
-  set(LIBUNWIND_INSTALL_LIBRARY OFF CACHE BOOL "")
-  set(LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "")
-  set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
-  set(LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "")
-  set(LIBCXXABI_ENABLE_STATIC_UNWINDER ON CACHE BOOL "")
-  set(LIBCXXABI_INSTALL_LIBRARY OFF CACHE BOOL "")
-  set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
-  set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
-  set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
 endif()
 
+set(LIBUNWIND_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBUNWIND_INSTALL_LIBRARY OFF CACHE BOOL "")
+set(LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_STATIC_UNWINDER ON CACHE BOOL "")
+set(LIBCXXABI_INSTALL_LIBRARY OFF CACHE BOOL "")
+set(LIBCXXABI_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXXABI_USE_LLVM_UNWINDER ON CACHE BOOL "")
+set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
+set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+
 if(BOOTSTRAP_CMAKE_SYSTEM_NAME)
   set(target "${BOOTSTRAP_CMAKE_CXX_COMPILER_TARGET}")
   if(STAGE2_LINUX_${target}_SYSROOT)

From da7f033693a757b54bde89b14af601b672944aa9 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 5 Jun 2019 07:29:55 +0000
Subject: [PATCH 1101/1176] Ignore DIEs in the skeleton unit in a DWO scenario

Summary:
r362103 exposed a bug, where we could read incorrect data if a skeleton
unit contained more than the single unit DIE. Clang emits these kinds of
units with -fsplit-dwarf-inlining (which is also the default).

Changing lldb to handle these DIEs is nontrivial, as we'd have to change
the UID encoding logic to be able to reference these DIEs, and fix up
various places which are assuming that all DIEs come from the separate
compile unit.

However, it turns out this is not necessary, as the DWO unit contains
all the information that the skeleton unit does. So, this patch just
skips parsing the extra DIEs if we have successfully found the DWO file.
This enforces the invariant that the rest of the code is already
operating under.

This patch fixes a couple of existing tests, but I've also included a
simpler test which does not depend on execution of binaries, and would
have helped us in catching this sooner.

Reviewers: clayborg, JDevlieghere, aprantl

Subscribers: probinson, dblaikie, lldb-commits

Differential Revision: https://reviews.llvm.org/D62852

llvm-svn: 362586
---
 lldb/lit/SymbolFile/DWARF/split-dwarf-inlining.cpp |  8 ++++++++
 lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp | 11 +++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 lldb/lit/SymbolFile/DWARF/split-dwarf-inlining.cpp

diff --git a/lldb/lit/SymbolFile/DWARF/split-dwarf-inlining.cpp b/lldb/lit/SymbolFile/DWARF/split-dwarf-inlining.cpp
new file mode 100644
index 0000000000000..5873d896b19a6
--- /dev/null
+++ b/lldb/lit/SymbolFile/DWARF/split-dwarf-inlining.cpp
@@ -0,0 +1,8 @@
+// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -fsplit-dwarf-inlining \
+// RUN:   -c %s -o %t
+// RUN: %lldb %t -o "breakpoint set -n foo" -b | FileCheck %s
+
+// CHECK: Breakpoint 1: 2 locations
+
+__attribute__((always_inline)) int foo(int x) { return x; }
+int bar(int x) { return foo(x); }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 287ff60b600d2..aadbe65b19cfa 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -183,6 +183,17 @@ void DWARFUnit::ExtractDIEsRWLocked() {
 
       if (!m_first_die)
         AddUnitDIE(m_die_array.front());
+
+      // With -fsplit-dwarf-inlining, clang will emit non-empty skeleton compile
+      // units. We are not able to access these DIE *and* the dwo file
+      // simultaneously. We also don't need to do that as the dwo file will
+      // contain a superset of information. So, we don't even attempt to parse
+      // any remaining DIEs.
+      if (m_dwo_symbol_file) {
+        m_die_array.front().SetHasChildren(false);
+        break;
+      }
+
     } else {
       if (null_die) {
         if (prev_die_had_children) {

From 2ae86d208932637802c4c18b5893f74c0ed418a0 Mon Sep 17 00:00:00 2001
From: Michal Gorny <mgorny@gentoo.org>
Date: Wed, 5 Jun 2019 08:21:42 +0000
Subject: [PATCH 1102/1176] [clang] [test] Add a (xfailing) test for PR41027

Add a test for tracking PR41027 (8.0 regression breaking assembly code
relying on __builtin_constant_p() to identify compile-time constants).
Mark it as expected to fail everywhere.

Differential Revision: https://reviews.llvm.org/D60728

llvm-svn: 362587
---
 clang/test/Sema/pr41027.c | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 clang/test/Sema/pr41027.c

diff --git a/clang/test/Sema/pr41027.c b/clang/test/Sema/pr41027.c
new file mode 100644
index 0000000000000..94ace6463810f
--- /dev/null
+++ b/clang/test/Sema/pr41027.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64 -fsyntax-only %s
+// XFAIL: *
+
+inline void pr41027(unsigned a, unsigned b) {
+  if (__builtin_constant_p(a)) {
+    __asm__ volatile("outl %0,%w1" : : "a"(b), "n"(a));
+  } else {
+    __asm__ volatile("outl %0,%w1" : : "a"(b), "d"(a));
+  }
+}

From 3c850ca56035fca73fc07bfb138628d745aa3947 Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Wed, 5 Jun 2019 08:29:24 +0000
Subject: [PATCH 1103/1176] [CMake] Export CMAKE_CONFIGURATION_TYPES for the
 LLVM build-tree

Summary: Useful info for standalone builds of subprojects. If a multi-configuration generator was used for the provided LLVM build-tree, standalone builds should consider actual subdirectories per configuration in `find_program()` (e.g. looking for `llvm-lit` or `llvm-tblgen`).

Reviewers: labath, beanz, mgorny

Subscribers: lldb-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62878

llvm-svn: 362588
---
 llvm/cmake/modules/LLVMConfig.cmake.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in
index df97c723efaa2..536031f790d3b 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -83,6 +83,7 @@ set(LLVM_BINARY_DIR "@LLVM_CONFIG_BINARY_DIR@")
 set(LLVM_TOOLS_BINARY_DIR "@LLVM_CONFIG_TOOLS_BINARY_DIR@")
 set(LLVM_TOOLS_INSTALL_DIR "@LLVM_TOOLS_INSTALL_DIR@")
 set(LLVM_HAVE_OPT_VIEWER_MODULES @LLVM_HAVE_OPT_VIEWER_MODULES@)
+set(LLVM_CONFIGURATION_TYPES @CMAKE_CONFIGURATION_TYPES@)
 
 if(NOT TARGET LLVMSupport)
   set(LLVM_EXPORTED_TARGETS "@LLVM_CONFIG_EXPORTS@")

From c7694624383637e3f4e3779a3626542c830856a7 Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Wed, 5 Jun 2019 08:31:50 +0000
Subject: [PATCH 1104/1176] [CMake] Add configuration dirs as potential
 locations for llvm-lit and llvm-tblgen in standalone builds

Summary:
If the provided LLVM build-tree used a multi-configuration generator like Xcode, `LLVM_TOOLS_BINARY_DIR` will have a generator-specific placeholder to express `CMAKE_CFG_INTDIR`. Thus `llvm-lit` and `llvm-tblgen` won't be found.
D62878 exports the actual configuration types so we can fix the path and add them to the search paths for `find_program()`.

Reviewers: xiaobai, labath, stella.stamenova

Reviewed By: xiaobai, stella.stamenova

Subscribers: mgorny, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62879

llvm-svn: 362589
---
 lldb/cmake/modules/LLDBStandalone.cmake | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 604544a01ee6b..267dab588a602 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -1,3 +1,12 @@
+function(append_configuration_directories input_dir output_dirs)
+  set(dirs_list ${input_dir})
+  foreach(config_type ${LLVM_CONFIGURATION_TYPES})
+    string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} dir ${input_dir})
+    list(APPEND dirs_list ${dir})
+  endforeach()
+  set(${output_dirs} ${dirs_list} PARENT_SCOPE)
+endfunction()
+
 # If we are not building as a part of LLVM, build LLDB as an
 # standalone project, using LLVM as an external library:
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
@@ -27,7 +36,10 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   if(CMAKE_HOST_WIN32 AND NOT CYGWIN)
     set(lit_file_name "${lit_file_name}.py")
   endif()
-  set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_TOOLS_BINARY_DIR}/${lit_file_name}" CACHE PATH "Path to llvm-lit")
+
+  append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
+  find_program(lit_full_path ${lit_file_name} ${config_dirs} NO_DEFAULT_PATH)
+  set(LLVM_DEFAULT_EXTERNAL_LIT ${lit_full_path} CACHE PATH "Path to llvm-lit")
 
   if(CMAKE_CROSSCOMPILING)
     set(LLVM_NATIVE_BUILD "${LLDB_PATH_TO_LLVM_BUILD}/NATIVE")
@@ -51,8 +63,9 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
         "${LLVM_NATIVE_BUILD}/Release/bin/llvm-tblgen${HOST_EXECUTABLE_SUFFIX}")
     endif()
   else()
-    find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR}
-      NO_DEFAULT_PATH)
+    set(tblgen_file_name "llvm-tblgen${CMAKE_EXECUTABLE_SUFFIX}")
+    append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
+    find_program(LLVM_TABLEGEN_EXE ${tblgen_file_name} ${config_dirs} NO_DEFAULT_PATH)
   endif()
 
   # They are used as destination of target generators.

From 6fc4c1cc54ac48acd9af142e4bbcc4e002d53370 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Wed, 5 Jun 2019 08:58:00 +0000
Subject: [PATCH 1105/1176] Include what you use in PPCFrameLowering.h

llvm-svn: 362590
---
 llvm/lib/Target/PowerPC/PPCFrameLowering.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index a5453449d165a..d116e9fd22e12 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -12,7 +12,6 @@
 #ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 #define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
 
-#include "PPC.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"

From e12334a0f248bd1579af995fb09157a65500400a Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.smith@linaro.org>
Date: Wed, 5 Jun 2019 09:31:45 +0000
Subject: [PATCH 1106/1176] [ELF] Allow reading of more than one FEATURE_1_AND
 in same object.

Although many relocatable objects will have a single
GNU_PROPERTY_X86_FEATURE_1_AND in the .note.gnu.property section it is
permissible to have more than one, and there are tests in ld.bfd that use
it. The behavior that ld.bfd follows is to set the feature bit for a
relocatable object if any of the GNU_PROPERTY_X86_FEATURE_1_AND
have the feature bit set.

Differential Revision: https://reviews.llvm.org/D62862

llvm-svn: 362591
---
 lld/ELF/InputFiles.cpp                  | 12 +++++----
 lld/test/ELF/i386-cet.s                 |  3 +--
 lld/test/ELF/x86-property-relocatable.s | 36 +++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 7 deletions(-)
 create mode 100644 lld/test/ELF/x86-property-relocatable.s

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index e16a07fa1a31c..a863894c55720 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -772,6 +772,7 @@ static uint32_t readAndFeatures(ObjFile<ELFT> *Obj, ArrayRef<uint8_t> Data) {
   using Elf_Nhdr = typename ELFT::Nhdr;
   using Elf_Note = typename ELFT::Note;
 
+  uint32_t FeaturesSet = 0;
   while (!Data.empty()) {
     // Read one NOTE record.
     if (Data.size() < sizeof(Elf_Nhdr))
@@ -797,8 +798,10 @@ static uint32_t readAndFeatures(ObjFile<ELFT> *Obj, ArrayRef<uint8_t> Data) {
       uint32_t Size = read32le(Desc.data() + 4);
 
       if (Type == GNU_PROPERTY_X86_FEATURE_1_AND) {
-        // We found the field.
-        return read32le(Desc.data() + 8);
+        // We found a FEATURE_1_AND field. There may be more than one of these
+        // in a .note.gnu.propery section, for a relocatable object we
+        // accumulate the bits set.
+        FeaturesSet |= read32le(Desc.data() + 8);
       }
 
       // On 64-bit, a payload may be followed by a 4-byte padding to make its
@@ -809,12 +812,11 @@ static uint32_t readAndFeatures(ObjFile<ELFT> *Obj, ArrayRef<uint8_t> Data) {
       Desc = Desc.slice(Size + 8); // +8 for Type and Size
     }
 
-    // Go to next NOTE record if a note section didn't contain
-    // X86_FEATURES_1_AND description.
+    // Go to next NOTE record to look for more FEATURE_1_AND descriptions.
     Data = Data.slice(Nhdr->getSize());
   }
 
-  return 0;
+  return FeaturesSet;
 }
 
 template <class ELFT>
diff --git a/lld/test/ELF/i386-cet.s b/lld/test/ELF/i386-cet.s
index 22a5205675836..125c7977ee280 100644
--- a/lld/test/ELF/i386-cet.s
+++ b/lld/test/ELF/i386-cet.s
@@ -31,14 +31,13 @@
 
 .section ".note.gnu.property", "a"
 .long 4
-.long 0x10
+.long 0xc
 .long 0x5
 .asciz "GNU"
 
 .long 0xc0000002
 .long 4
 .long 3
-.long 0
 
 .text
 .globl func1
diff --git a/lld/test/ELF/x86-property-relocatable.s b/lld/test/ELF/x86-property-relocatable.s
new file mode 100644
index 0000000000000..12b1e73b4bace
--- /dev/null
+++ b/lld/test/ELF/x86-property-relocatable.s
@@ -0,0 +1,36 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld -r %t.o -o %t2.o
+# RUN: llvm-readelf -n %t2.o | FileCheck -match-full-lines %s
+
+## Test that .note.gnu.property is passed through -r, and that we can handle
+## more than one FEATURE_AND in the same object file. This is logically the
+## same as if the features were combined in a single FEATURE_AND as the rule
+## states that the bit in the output pr_data field if it is set in all
+.text
+ret
+
+.section ".note.gnu.property", "a"
+.p2align 3
+.long 4
+.long 0x10
+.long 0x5
+.asciz "GNU"
+
+.long 0xc0000002 // GNU_PROPERTY_X86_FEATURE_1_AND
+.long 4
+.long 1          // GNU_PROPERTY_X86_FEATURE_1_IBT
+.long 0
+
+.long 4
+.long 0x10
+.long 0x5
+.asciz "GNU"
+.long 0xc0000002 // GNU_PROPERTY_X86_FEATURE_1_AND
+.long 4
+.long 2          // GNU_PROPERTY_X86_FEATURE_1_SHSTK
+.long 0
+
+# CHECK:   Owner                 Data size	Description
+# CHECK-NEXT:   GNU                   0x00000010	NT_GNU_PROPERTY_TYPE_0 (property note)
+# CHECK-NEXT:     Properties:    x86 feature: IBT, SHSTK

From db134aaec24e8a88fdac9b5015e7af8575b5cad6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 10:04:05 +0000
Subject: [PATCH 1107/1176] [IPO] Disabled 'default only' switch statements to
 fix MSVC warnings.

@jdoerfert Looks like these are placeholders for incoming abstract attributes patches so I've just commented the code out, even though this is usually frowned upon.

llvm-svn: 362592
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index d82667be933b5..2e1bdd667e3f0 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -86,10 +86,10 @@ static void bookkeeping(AbstractAttribute::ManifestPosition MP,
 
   if (!Attr.isEnumAttribute())
     return;
-  switch (Attr.getKindAsEnum()) {
-  default:
-    return;
-  }
+  //switch (Attr.getKindAsEnum()) {
+  //default:
+  //  return;
+  //}
 }
 
 /// Helper to identify the correct offset into an attribute list.
@@ -397,10 +397,10 @@ void Attributor::identifyDefaultAbstractAttributes(
     // to concrete attributes we only cache the ones that are as identified in
     // the following switch.
     // Note: There are no concrete attributes now so this is initially empty.
-    switch (I.getOpcode()) {
-    default:
-      break;
-    }
+    //switch (I.getOpcode()) {
+    //default:
+    //  break;
+    //}
     if (IsInterestingOpcode)
       InstOpcodeMap[I.getOpcode()].push_back(&I);
     if (I.mayReadOrWriteMemory())

From daeeb33f8601ed0ab88fc7a1b20088cb782b45e5 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Wed, 5 Jun 2019 10:32:28 +0000
Subject: [PATCH 1108/1176] Sanitize llvm-size help

Remove irrelevant options from standard help output.

New output:

    OVERVIEW: llvm object size dumper

    USAGE: llvm-size [options] <input files>

    OPTIONS:

    Generic Options:

      --help           - Display available options (--help-hidden for more)
      --help-list      - Display list of available options (--help-list-hidden for more)
      --version        - Display the version of this program

    llvm-size Options:

      Specify output format
          -A             - System V format
          -B             - Berkeley format
          -m             - Darwin -m format
      --arch=<string>  - architecture(s) from a Mach-O file to dump
      --common         - Print common symbols in the ELF file.  When using Berkely format, this is added to bss.
      Print size in radix:
          -o             - Print size in octal
          -d             - Print size in decimal
          -x             - Print size in hexadecimal
      --format=<value> - Specify output format
        =sysv          -   System V format
        =berkeley      -   Berkeley format
        =darwin        -   Darwin -m format
      -l               - When format is darwin, use long format to include addresses and offsets.
      --radix=<value>  - Print size in radix
        =8             -   Print size in octal
        =10            -   Print size in decimal
        =16            -   Print size in hexadecimal
      --totals         - Print totals of all objects - Berkeley format only

Differential Revision: https://reviews.llvm.org/D62482

llvm-svn: 362593
---
 llvm/tools/llvm-size/llvm-size.cpp | 58 ++++++++++++++++--------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/llvm/tools/llvm-size/llvm-size.cpp b/llvm/tools/llvm-size/llvm-size.cpp
index 32d10abe773f2..e422e69f78515 100644
--- a/llvm/tools/llvm-size/llvm-size.cpp
+++ b/llvm/tools/llvm-size/llvm-size.cpp
@@ -32,20 +32,22 @@
 using namespace llvm;
 using namespace object;
 
+cl::OptionCategory SizeCat("llvm-size Options");
+
 enum OutputFormatTy { berkeley, sysv, darwin };
 static cl::opt<OutputFormatTy>
-OutputFormat("format", cl::desc("Specify output format"),
-             cl::values(clEnumVal(sysv, "System V format"),
-                        clEnumVal(berkeley, "Berkeley format"),
-                        clEnumVal(darwin, "Darwin -m format")),
-             cl::init(berkeley));
-
-static cl::opt<OutputFormatTy> OutputFormatShort(
-    cl::desc("Specify output format"),
-    cl::values(clEnumValN(sysv, "A", "System V format"),
-               clEnumValN(berkeley, "B", "Berkeley format"),
-               clEnumValN(darwin, "m", "Darwin -m format")),
-    cl::init(berkeley));
+    OutputFormat("format", cl::desc("Specify output format"),
+                 cl::values(clEnumVal(sysv, "System V format"),
+                            clEnumVal(berkeley, "Berkeley format"),
+                            clEnumVal(darwin, "Darwin -m format")),
+                 cl::init(berkeley), cl::cat(SizeCat));
+
+static cl::opt<OutputFormatTy>
+    OutputFormatShort(cl::desc("Specify output format"),
+                      cl::values(clEnumValN(sysv, "A", "System V format"),
+                                 clEnumValN(berkeley, "B", "Berkeley format"),
+                                 clEnumValN(darwin, "m", "Darwin -m format")),
+                      cl::init(berkeley), cl::cat(SizeCat));
 
 static bool BerkeleyHeaderPrinted = false;
 static bool MoreThanOneFile = false;
@@ -55,18 +57,20 @@ static uint64_t TotalObjectBss = 0;
 static uint64_t TotalObjectTotal = 0;
 
 cl::opt<bool>
-DarwinLongFormat("l", cl::desc("When format is darwin, use long format "
-                               "to include addresses and offsets."));
+    DarwinLongFormat("l",
+                     cl::desc("When format is darwin, use long format "
+                              "to include addresses and offsets."),
+                     cl::cat(SizeCat));
 
 cl::opt<bool>
     ELFCommons("common",
                cl::desc("Print common symbols in the ELF file.  When using "
                         "Berkely format, this is added to bss."),
-               cl::init(false));
+               cl::init(false), cl::cat(SizeCat));
 
 static cl::list<std::string>
-ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
-          cl::ZeroOrMore);
+    ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
+              cl::ZeroOrMore, cl::cat(SizeCat));
 static bool ArchAll = false;
 
 enum RadixTy { octal = 8, decimal = 10, hexadecimal = 16 };
@@ -74,25 +78,26 @@ static cl::opt<RadixTy> Radix(
     "radix", cl::desc("Print size in radix"), cl::init(decimal),
     cl::values(clEnumValN(octal, "8", "Print size in octal"),
                clEnumValN(decimal, "10", "Print size in decimal"),
-               clEnumValN(hexadecimal, "16", "Print size in hexadecimal")));
+               clEnumValN(hexadecimal, "16", "Print size in hexadecimal")),
+    cl::cat(SizeCat));
 
-static cl::opt<RadixTy>
-RadixShort(cl::desc("Print size in radix:"),
-           cl::values(clEnumValN(octal, "o", "Print size in octal"),
-                      clEnumValN(decimal, "d", "Print size in decimal"),
-                      clEnumValN(hexadecimal, "x", "Print size in hexadecimal")),
-           cl::init(decimal));
+static cl::opt<RadixTy> RadixShort(
+    cl::desc("Print size in radix:"),
+    cl::values(clEnumValN(octal, "o", "Print size in octal"),
+               clEnumValN(decimal, "d", "Print size in decimal"),
+               clEnumValN(hexadecimal, "x", "Print size in hexadecimal")),
+    cl::init(decimal), cl::cat(SizeCat));
 
 static cl::opt<bool>
     TotalSizes("totals",
                cl::desc("Print totals of all objects - Berkeley format only"),
-               cl::init(false));
+               cl::init(false), cl::cat(SizeCat));
 
 static cl::alias TotalSizesShort("t", cl::desc("Short for --totals"),
                                  cl::aliasopt(TotalSizes));
 
 static cl::list<std::string>
-InputFilenames(cl::Positional, cl::desc("<input files>"), cl::ZeroOrMore);
+    InputFilenames(cl::Positional, cl::desc("<input files>"), cl::ZeroOrMore);
 
 static bool HadError = false;
 
@@ -860,6 +865,7 @@ static void printBerkelyTotals() {
 
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
+  cl::HideUnrelatedOptions(SizeCat);
   cl::ParseCommandLineOptions(argc, argv, "llvm object size dumper\n");
 
   ToolName = argv[0];

From ddfbfd6172f7aeee0bbed99ac763846ebb2e618f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 10:55:55 +0000
Subject: [PATCH 1109/1176] [X86][SSE] Add some nt-store test cases inspired by
 PR42123

llvm-svn: 362594
---
 .../X86/merge-consecutive-stores-nt.ll        | 163 ++++++++++++++++++
 1 file changed, 163 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll

diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
new file mode 100644
index 0000000000000..0461008f84d90
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE4A
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
+
+;
+; PR42123
+;
+
+; FIXME: AVX doesn't retain NT flag on store.
+; Should be VMOVNTPS ymm.
+define void @merge_2_v4f32_align32(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movaps (%ecx), %xmm0
+; X86-NEXT:    movaps 16(%ecx), %xmm1
+; X86-NEXT:    movntps %xmm0, (%eax)
+; X86-NEXT:    movntps %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align32:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movntps %xmm0, (%rsi)
+; X64-SSE-NEXT:    movntps %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align32:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovaps (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 32
+  %4 = load <4 x float>, <4 x float>* %2, align 16
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 32, !nontemporal !0
+  store <4 x float> %4, <4 x float>* %6, align 16, !nontemporal !0
+  ret void
+}
+
+; FIXME: shouldn't attempt to merge nt and non-nt stores even if aligned.
+; Must be kept seperate as VMOVNTPS xmm + VMOVAPS xmm.
+define void @merge_2_v4f32_align32_mix(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align32_mix:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movaps (%ecx), %xmm0
+; X86-NEXT:    movaps 16(%ecx), %xmm1
+; X86-NEXT:    movntps %xmm0, (%eax)
+; X86-NEXT:    movaps %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align32_mix:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movntps %xmm0, (%rsi)
+; X64-SSE-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align32_mix:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovaps (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 32
+  %4 = load <4 x float>, <4 x float>* %2, align 16
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 32, !nontemporal !0
+  store <4 x float> %4, <4 x float>* %6, align 16
+  ret void
+}
+
+; FIXME: AVX can't perform NT-store-ymm on 16-byte aligned memory.
+; Must be kept seperate as VMOVNTPS xmm.
+define void @merge_2_v4f32_align16(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movaps (%ecx), %xmm0
+; X86-NEXT:    movaps 16(%ecx), %xmm1
+; X86-NEXT:    movntps %xmm0, (%eax)
+; X86-NEXT:    movntps %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align16:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movntps %xmm0, (%rsi)
+; X64-SSE-NEXT:    movntps %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align16:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 16
+  %4 = load <4 x float>, <4 x float>* %2, align 16
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 16, !nontemporal !0
+  store <4 x float> %4, <4 x float>* %6, align 16, !nontemporal !0
+  ret void
+}
+
+; FIXME: Nothing can perform NT-store-vector on 1-byte aligned memory.
+; Must be scalarized to use MOVTNI/MOVNTSD.
+define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movups (%ecx), %xmm0
+; X86-NEXT:    movups 16(%ecx), %xmm1
+; X86-NEXT:    movups %xmm0, (%eax)
+; X86-NEXT:    movups %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align1:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movups (%rdi), %xmm0
+; X64-SSE-NEXT:    movups 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movups %xmm0, (%rsi)
+; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align1:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 1
+  %4 = load <4 x float>, <4 x float>* %2, align 1
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 1, !nontemporal !0
+  store <4 x float> %4, <4 x float>* %6, align 1, !nontemporal !0
+  ret void
+}
+
+!0 = !{i32 1}

From 5a81af547c2c7a3444e4b4a6b752c4fc1e5d95b1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 10:59:04 +0000
Subject: [PATCH 1110/1176] [TargetLowering] SimplifyDemandedBits - pull out
 shift value type. NFCI.

Will be used more in an upcoming patch.

llvm-svn: 362595
---
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 785530b3605b7..6e6917b39b6d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1136,6 +1136,7 @@ bool TargetLowering::SimplifyDemandedBits(
       if (SA->getAPIntValue().uge(BitWidth))
         break;
 
+      EVT ShiftVT = Op1.getValueType();
       unsigned ShAmt = SA->getZExtValue();
       APInt InDemandedMask = (DemandedBits << ShAmt);
 
@@ -1160,7 +1161,7 @@ bool TargetLowering::SimplifyDemandedBits(
                 Opc = ISD::SHL;
               }
 
-              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+              SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
               return TLO.CombineTo(
                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
             }

From b42196661ba7ccfa3d68906ed52e0488305291b2 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 5 Jun 2019 11:37:53 +0000
Subject: [PATCH 1111/1176] [llvm-objdump] - Disassemble non-executable
 sections if specifically requested.

This is https://bugs.llvm.org/show_bug.cgi?id=41897.

Previously -d + -j .data had no effect, that wasn't consistent with GNU,
which proccesses .data in that case. With this patch we follow this behavior.

Diffeential revision: https://reviews.llvm.org/D62848

llvm-svn: 362596
---
 .../X86/section-filter-disasm.test            | 43 +++++++++++++++++++
 .../X86/section-filter-relocs.test            | 15 ++++---
 llvm/tools/llvm-objdump/llvm-objdump.cpp      |  3 +-
 3 files changed, 53 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/X86/section-filter-disasm.test

diff --git a/llvm/test/tools/llvm-objdump/X86/section-filter-disasm.test b/llvm/test/tools/llvm-objdump/X86/section-filter-disasm.test
new file mode 100644
index 0000000000000..076e748bb5392
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/section-filter-disasm.test
@@ -0,0 +1,43 @@
+# RUN: yaml2obj %s -o %t.o
+
+## By default, only executable sections are disassembled,
+## but with the use of the --section flag, we can change this behavior.
+## Show that llvm-objdump can disassemble the specified sections.
+
+# RUN: llvm-objdump -d %t.o | FileCheck %s --check-prefix=TEXT \
+# RUN:   --implicit-check-not=.rodata --implicit-check-not=.data
+
+# RUN: llvm-objdump -d %t.o --section=.rodata \
+# RUN:   | FileCheck %s --check-prefix=RODATA \
+# RUN:   --implicit-check-not=.text --implicit-check-not=.data
+
+# RUN: llvm-objdump -d %t.o --section=.rodata --section=.text \
+# RUN:   | FileCheck %s --check-prefixes=RODATA,TEXT \
+# RUN:   --implicit-check-not=.data
+
+# RUN: llvm-objdump -d %t.o --section=.rodata --section=.text --section=.data \
+# RUN:   | FileCheck %s --check-prefixes=RODATA,TEXT,DATA
+
+# RODATA: Disassembly of section .rodata
+# TEXT:   Disassembly of section .text
+# DATA:   Disassembly of section .data
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:    .rodata
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC]
+    Content: '00'
+  - Name:    .text
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_EXECINSTR]
+    Content: '00'
+  - Name:    .data
+    Type:    SHT_PROGBITS
+    Flags:   [SHF_ALLOC, SHF_WRITE]
+    Content: '00'
diff --git a/llvm/test/tools/llvm-objdump/X86/section-filter-relocs.test b/llvm/test/tools/llvm-objdump/X86/section-filter-relocs.test
index 5411288019d9d..e8abbdfdf6339 100644
--- a/llvm/test/tools/llvm-objdump/X86/section-filter-relocs.test
+++ b/llvm/test/tools/llvm-objdump/X86/section-filter-relocs.test
@@ -1,16 +1,11 @@
 ## Test that --section works correctly for -d with -r.
 # RUN: yaml2obj %s -o %t.o
 
-## Show non-executable sections are not disassembled even if specified,
-## and that only the specified executable sections are disassembled.
+## Show that only the specified sections are disassembled.
 ## Also show that no relocation sections are dumped because none are
 ## specified.
-## FIXME: This is different behaviour to GNU objdump, which dumps the non-
-##        executable sections if requested explicitly.
-##        See https://bugs.llvm.org/show_bug.cgi?id=41897.
 # RUN: llvm-objdump -d -r %t.o --section=.text --section=.rodata \
-# RUN:   | FileCheck %s --check-prefixes=DISASM,RELOC --implicit-check-not=.text2 \
-# RUN:           --implicit-check-not=.rodata
+# RUN:   | FileCheck %s --check-prefixes=DISASM,RELOC --implicit-check-not=.text2
 
 # DISASM:       Disassembly of section .text:
 # DISASM-EMPTY:
@@ -18,6 +13,12 @@
 # DISASM-NEXT:  400: e8 00 00 00 00                callq   0 <.text+0x5>
 # RELOC-NEXT:                      00000401:  R_X86_64_PC32        foo+1
 # RELOC-NEXT:                      00000401:  R_X86_64_GOT32       foo
+# DISASM:       Disassembly of section .rodata:
+# DISASM-EMPTY:
+# DISASM-NEXT:  0000000000000000 .rodata:
+# DISASM-NEXT:  0: 00 00                           addb    %al, (%rax)
+# RELOC-NEXT:              0000000000000000:  R_X86_64_NONE        foo
+# DISASM-NEXT:  2: 00 00                           addb    %al, (%rax)
 
 --- !ELF
 FileHeader:
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 1312ea8bbd1ba..4e734a95de3a0 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1095,7 +1095,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
   array_pod_sort(AbsoluteSymbols.begin(), AbsoluteSymbols.end());
 
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
-    if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
+    if (FilterSections.empty() && !DisassembleAll &&
+        (!Section.isText() || Section.isVirtual()))
       continue;
 
     uint64_t SectionAddr = Section.getAddress();

From 9b2b8ad8b187745881880b62fda465c6d5b61fa5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 5 Jun 2019 11:46:57 +0000
Subject: [PATCH 1112/1176] Revert "Factor out duplicated code building a
 MemberExpr and marking it" and "Convert MemberExpr creation and serialization
 to work the same way as"

This reverts commits r362551 and r362563. Crashes during modules selfhost.

llvm-svn: 362597
---
 clang/include/clang/AST/Expr.h                | 68 ++++++++------
 clang/include/clang/AST/Stmt.h                |  1 -
 clang/include/clang/Sema/Sema.h               | 17 ----
 clang/lib/AST/DeclBase.cpp                    |  1 -
 clang/lib/AST/Expr.cpp                        | 72 ++++-----------
 .../Frontend/Rewrite/RewriteModernObjC.cpp    | 54 ++++++-----
 clang/lib/Frontend/Rewrite/RewriteObjC.cpp    | 24 ++---
 clang/lib/Sema/SemaExprCXX.cpp                | 12 +--
 clang/lib/Sema/SemaExprMember.cpp             | 71 ++++++--------
 clang/lib/Sema/SemaOverload.cpp               | 11 ++-
 clang/lib/Serialization/ASTReaderStmt.cpp     | 92 ++++++++++---------
 clang/lib/Serialization/ASTWriterStmt.cpp     | 56 +++++------
 12 files changed, 219 insertions(+), 260 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 438b10cc964f6..96cb8e8f1a81a 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -2735,7 +2735,6 @@ class MemberExpr final
                                     ASTTemplateKWAndArgsInfo,
                                     TemplateArgumentLoc> {
   friend class ASTReader;
-  friend class ASTStmtReader;
   friend class ASTStmtWriter;
   friend TrailingObjects;
 
@@ -2770,38 +2769,49 @@ class MemberExpr final
     return MemberExprBits.HasTemplateKWAndArgsInfo;
   }
 
-  MemberExpr(Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
-             ValueDecl *MemberDecl, const DeclarationNameInfo &NameInfo,
-             QualType T, ExprValueKind VK, ExprObjectKind OK);
-  MemberExpr(EmptyShell Empty)
-      : Expr(MemberExprClass, Empty), Base(), MemberDecl() {}
-
 public:
-  static MemberExpr *Create(const ASTContext &C, Expr *Base, bool IsArrow,
+  MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc,
+             ValueDecl *memberdecl, const DeclarationNameInfo &NameInfo,
+             QualType ty, ExprValueKind VK, ExprObjectKind OK)
+      : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(),
+             base->isValueDependent(), base->isInstantiationDependent(),
+             base->containsUnexpandedParameterPack()),
+        Base(base), MemberDecl(memberdecl), MemberDNLoc(NameInfo.getInfo()),
+        MemberLoc(NameInfo.getLoc()) {
+    assert(memberdecl->getDeclName() == NameInfo.getName());
+    MemberExprBits.IsArrow = isarrow;
+    MemberExprBits.HasQualifierOrFoundDecl = false;
+    MemberExprBits.HasTemplateKWAndArgsInfo = false;
+    MemberExprBits.HadMultipleCandidates = false;
+    MemberExprBits.OperatorLoc = operatorloc;
+  }
+
+  // NOTE: this constructor should be used only when it is known that
+  // the member name can not provide additional syntactic info
+  // (i.e., source locations for C++ operator names or type source info
+  // for constructors, destructors and conversion operators).
+  MemberExpr(Expr *base, bool isarrow, SourceLocation operatorloc,
+             ValueDecl *memberdecl, SourceLocation l, QualType ty,
+             ExprValueKind VK, ExprObjectKind OK)
+      : Expr(MemberExprClass, ty, VK, OK, base->isTypeDependent(),
+             base->isValueDependent(), base->isInstantiationDependent(),
+             base->containsUnexpandedParameterPack()),
+        Base(base), MemberDecl(memberdecl), MemberDNLoc(), MemberLoc(l) {
+    MemberExprBits.IsArrow = isarrow;
+    MemberExprBits.HasQualifierOrFoundDecl = false;
+    MemberExprBits.HasTemplateKWAndArgsInfo = false;
+    MemberExprBits.HadMultipleCandidates = false;
+    MemberExprBits.OperatorLoc = operatorloc;
+  }
+
+  static MemberExpr *Create(const ASTContext &C, Expr *base, bool isarrow,
                             SourceLocation OperatorLoc,
                             NestedNameSpecifierLoc QualifierLoc,
-                            SourceLocation TemplateKWLoc, ValueDecl *MemberDecl,
-                            DeclAccessPair FoundDecl,
+                            SourceLocation TemplateKWLoc, ValueDecl *memberdecl,
+                            DeclAccessPair founddecl,
                             DeclarationNameInfo MemberNameInfo,
-                            const TemplateArgumentListInfo *TemplateArgs,
-                            QualType T, ExprValueKind VK, ExprObjectKind OK);
-
-  /// Create an implicit MemberExpr, with no location, qualifier, template
-  /// arguments, and so on.
-  static MemberExpr *CreateImplicit(const ASTContext &C, Expr *Base,
-                                    bool IsArrow, ValueDecl *MemberDecl,
-                                    QualType T, ExprValueKind VK,
-                                    ExprObjectKind OK) {
-    return Create(C, Base, IsArrow, SourceLocation(), NestedNameSpecifierLoc(),
-                  SourceLocation(), MemberDecl,
-                  DeclAccessPair::make(MemberDecl, MemberDecl->getAccess()),
-                  DeclarationNameInfo(), nullptr, T, VK, OK);
-  }
-
-  static MemberExpr *CreateEmpty(const ASTContext &Context, bool HasQualifier,
-                                 bool HasFoundDecl,
-                                 bool HasTemplateKWAndArgsInfo,
-                                 unsigned NumTemplateArgs);
+                            const TemplateArgumentListInfo *targs, QualType ty,
+                            ExprValueKind VK, ExprObjectKind OK);
 
   void setBase(Expr *E) { Base = E; }
   Expr *getBase() const { return cast<Expr>(Base); }
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index d3b3bc27643f8..fe5d802688466 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -453,7 +453,6 @@ class alignas(void *) Stmt {
   enum { NumCallExprBits = 32 };
 
   class MemberExprBitfields {
-    friend class ASTStmtReader;
     friend class MemberExpr;
 
     unsigned : NumExprBits;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a065be308eed4..74a1a28a39bbc 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4510,23 +4510,6 @@ class Sema {
                                    UnqualifiedId &Member,
                                    Decl *ObjCImpDecl);
 
-  MemberExpr *
-  BuildMemberExpr(Expr *Base, bool IsArrow, SourceLocation OpLoc,
-                  const CXXScopeSpec *SS, SourceLocation TemplateKWLoc,
-                  ValueDecl *Member, DeclAccessPair FoundDecl,
-                  bool HadMultipleCandidates,
-                  const DeclarationNameInfo &MemberNameInfo, QualType Ty,
-                  ExprValueKind VK, ExprObjectKind OK,
-                  const TemplateArgumentListInfo *TemplateArgs = nullptr);
-  MemberExpr *
-  BuildMemberExpr(Expr *Base, bool IsArrow, SourceLocation OpLoc,
-                  NestedNameSpecifierLoc NNS, SourceLocation TemplateKWLoc,
-                  ValueDecl *Member, DeclAccessPair FoundDecl,
-                  bool HadMultipleCandidates,
-                  const DeclarationNameInfo &MemberNameInfo, QualType Ty,
-                  ExprValueKind VK, ExprObjectKind OK,
-                  const TemplateArgumentListInfo *TemplateArgs = nullptr);
-
   void ActOnDefaultCtorInitializers(Decl *CDtorDecl);
   bool ConvertArgumentsForCall(CallExpr *Call, Expr *Fn,
                                FunctionDecl *FDecl,
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index f5853b498043b..31985486d1d93 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -920,7 +920,6 @@ bool Decl::AccessDeclContextSanity() const {
   if (isa<TranslationUnitDecl>(this) ||
       isa<TemplateTypeParmDecl>(this) ||
       isa<NonTypeTemplateParmDecl>(this) ||
-      !getDeclContext() ||
       !isa<CXXRecordDecl>(getDeclContext()) ||
       isInvalidDecl() ||
       isa<StaticAssertDecl>(this) ||
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index ee9d853f9f871..10829c7007471 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1538,44 +1538,29 @@ UnaryExprOrTypeTraitExpr::UnaryExprOrTypeTraitExpr(
   }
 }
 
-MemberExpr::MemberExpr(Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
-                       ValueDecl *MemberDecl,
-                       const DeclarationNameInfo &NameInfo, QualType T,
-                       ExprValueKind VK, ExprObjectKind OK)
-    : Expr(MemberExprClass, T, VK, OK, Base->isTypeDependent(),
-           Base->isValueDependent(), Base->isInstantiationDependent(),
-           Base->containsUnexpandedParameterPack()),
-      Base(Base), MemberDecl(MemberDecl), MemberDNLoc(NameInfo.getInfo()),
-      MemberLoc(NameInfo.getLoc()) {
-  assert(!NameInfo.getName() ||
-         MemberDecl->getDeclName() == NameInfo.getName());
-  MemberExprBits.IsArrow = IsArrow;
-  MemberExprBits.HasQualifierOrFoundDecl = false;
-  MemberExprBits.HasTemplateKWAndArgsInfo = false;
-  MemberExprBits.HadMultipleCandidates = false;
-  MemberExprBits.OperatorLoc = OperatorLoc;
-}
-
 MemberExpr *MemberExpr::Create(
-    const ASTContext &C, Expr *Base, bool IsArrow, SourceLocation OperatorLoc,
+    const ASTContext &C, Expr *base, bool isarrow, SourceLocation OperatorLoc,
     NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
-    ValueDecl *MemberDecl, DeclAccessPair FoundDecl,
-    DeclarationNameInfo NameInfo, const TemplateArgumentListInfo *TemplateArgs,
-    QualType T, ExprValueKind VK, ExprObjectKind OK) {
-  bool HasQualOrFound = QualifierLoc || FoundDecl.getDecl() != MemberDecl ||
-                        FoundDecl.getAccess() != MemberDecl->getAccess();
-  bool HasTemplateKWAndArgsInfo = TemplateArgs || TemplateKWLoc.isValid();
+    ValueDecl *memberdecl, DeclAccessPair founddecl,
+    DeclarationNameInfo nameinfo, const TemplateArgumentListInfo *targs,
+    QualType ty, ExprValueKind vk, ExprObjectKind ok) {
+
+  bool hasQualOrFound = (QualifierLoc ||
+                         founddecl.getDecl() != memberdecl ||
+                         founddecl.getAccess() != memberdecl->getAccess());
+
+  bool HasTemplateKWAndArgsInfo = targs || TemplateKWLoc.isValid();
   std::size_t Size =
       totalSizeToAlloc<MemberExprNameQualifier, ASTTemplateKWAndArgsInfo,
-                       TemplateArgumentLoc>(
-          HasQualOrFound ? 1 : 0, HasTemplateKWAndArgsInfo ? 1 : 0,
-          TemplateArgs ? TemplateArgs->size() : 0);
+                       TemplateArgumentLoc>(hasQualOrFound ? 1 : 0,
+                                            HasTemplateKWAndArgsInfo ? 1 : 0,
+                                            targs ? targs->size() : 0);
 
   void *Mem = C.Allocate(Size, alignof(MemberExpr));
   MemberExpr *E = new (Mem)
-      MemberExpr(Base, IsArrow, OperatorLoc, MemberDecl, NameInfo, T, VK, OK);
+      MemberExpr(base, isarrow, OperatorLoc, memberdecl, nameinfo, ty, vk, ok);
 
-  if (HasQualOrFound) {
+  if (hasQualOrFound) {
     // FIXME: Wrong. We should be looking at the member declaration we found.
     if (QualifierLoc && QualifierLoc.getNestedNameSpecifier()->isDependent()) {
       E->setValueDependent(true);
@@ -1591,20 +1576,19 @@ MemberExpr *MemberExpr::Create(
     MemberExprNameQualifier *NQ =
         E->getTrailingObjects<MemberExprNameQualifier>();
     NQ->QualifierLoc = QualifierLoc;
-    NQ->FoundDecl = FoundDecl;
+    NQ->FoundDecl = founddecl;
   }
 
   E->MemberExprBits.HasTemplateKWAndArgsInfo =
-      TemplateArgs || TemplateKWLoc.isValid();
+      (targs || TemplateKWLoc.isValid());
 
-  if (TemplateArgs) {
+  if (targs) {
     bool Dependent = false;
     bool InstantiationDependent = false;
     bool ContainsUnexpandedParameterPack = false;
     E->getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
-        TemplateKWLoc, *TemplateArgs,
-        E->getTrailingObjects<TemplateArgumentLoc>(), Dependent,
-        InstantiationDependent, ContainsUnexpandedParameterPack);
+        TemplateKWLoc, *targs, E->getTrailingObjects<TemplateArgumentLoc>(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
     if (InstantiationDependent)
       E->setInstantiationDependent(true);
   } else if (TemplateKWLoc.isValid()) {
@@ -1615,22 +1599,6 @@ MemberExpr *MemberExpr::Create(
   return E;
 }
 
-MemberExpr *MemberExpr::CreateEmpty(const ASTContext &Context,
-                                    bool HasQualifier, bool HasFoundDecl,
-                                    bool HasTemplateKWAndArgsInfo,
-                                    unsigned NumTemplateArgs) {
-  assert((!NumTemplateArgs || HasTemplateKWAndArgsInfo) &&
-         "template args but no template arg info?");
-  bool HasQualOrFound = HasQualifier || HasFoundDecl;
-  std::size_t Size =
-      totalSizeToAlloc<MemberExprNameQualifier, ASTTemplateKWAndArgsInfo,
-                       TemplateArgumentLoc>(HasQualOrFound ? 1 : 0,
-                                            HasTemplateKWAndArgsInfo ? 1 : 0,
-                                            NumTemplateArgs);
-  void *Mem = Context.Allocate(Size, alignof(MemberExpr));
-  return new (Mem) MemberExpr(EmptyShell());
-}
-
 SourceLocation MemberExpr::getBeginLoc() const {
   if (isImplicitAccess()) {
     if (hasQualifier())
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 170149d5053ff..7b1f20408d5e0 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -881,8 +881,9 @@ RewriteModernObjC::getIvarAccessString(ObjCIvarDecl *D) {
                                         IvarT, nullptr,
                                         /*BitWidth=*/nullptr, /*Mutable=*/true,
                                         ICIS_NoInit);
-      MemberExpr *ME = MemberExpr::CreateImplicit(
-          *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
+      MemberExpr *ME = new (Context)
+          MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
+                     FD->getType(), VK_LValue, OK_Ordinary);
       IvarT = Context->getDecltypeType(ME, ME->getType());
     }
   }
@@ -2735,9 +2736,9 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) {
                                     Context->getPointerType(Context->VoidPtrTy),
                                     nullptr, /*BitWidth=*/nullptr,
                                     /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *ArrayLiteralME =
-      MemberExpr::CreateImplicit(*Context, NSArrayCallExpr, false, ARRFD,
-                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ArrayLiteralME = new (Context)
+      MemberExpr(NSArrayCallExpr, false, SourceLocation(), ARRFD,
+                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
   QualType ConstIdT = Context->getObjCIdType().withConst();
   CStyleCastExpr * ArrayLiteralObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -2864,9 +2865,9 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
                                        Context->getPointerType(Context->VoidPtrTy),
                                        nullptr, /*BitWidth=*/nullptr,
                                        /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *DictLiteralValueME =
-      MemberExpr::CreateImplicit(*Context, NSValueCallExpr, false, ARRFD,
-                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *DictLiteralValueME = new (Context)
+      MemberExpr(NSValueCallExpr, false, SourceLocation(), ARRFD,
+                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
   QualType ConstIdT = Context->getObjCIdType().withConst();
   CStyleCastExpr * DictValueObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -2877,9 +2878,9 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
   Expr *NSKeyCallExpr = CallExpr::Create(
       *Context, NSDictDRE, KeyExprs, NSDictFType, VK_LValue, SourceLocation());
 
-  MemberExpr *DictLiteralKeyME =
-      MemberExpr::CreateImplicit(*Context, NSKeyCallExpr, false, ARRFD,
-                                 ARRFD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *DictLiteralKeyME = new (Context)
+      MemberExpr(NSKeyCallExpr, false, SourceLocation(), ARRFD,
+                 SourceLocation(), ARRFD->getType(), VK_LValue, OK_Ordinary);
 
   CStyleCastExpr * DictKeyObjects =
     NoTypeInfoCStyleCastExpr(Context,
@@ -3179,8 +3180,9 @@ Expr *RewriteModernObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFla
                                     returnType, nullptr,
                                     /*BitWidth=*/nullptr,
                                     /*Mutable=*/true, ICIS_NoInit);
-  MemberExpr *ME = MemberExpr::CreateImplicit(
-      *Context, STCE, false, FieldD, FieldD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = new (Context)
+      MemberExpr(STCE, false, SourceLocation(), FieldD, SourceLocation(),
+                 FieldD->getType(), VK_LValue, OK_Ordinary);
 
   return ME;
 }
@@ -4627,8 +4629,9 @@ Stmt *RewriteModernObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME = MemberExpr::CreateImplicit(
-      *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME =
+      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
+                               FD->getType(), VK_LValue, OK_Ordinary);
 
   CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,
                                                 CK_BitCast, ME);
@@ -4673,8 +4676,9 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME = MemberExpr::CreateImplicit(
-      *Context, DeclRefExp, isArrow, FD, FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = new (Context)
+      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),
+                 FD->getType(), VK_LValue, OK_Ordinary);
 
   StringRef Name = VD->getName();
   FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),
@@ -4682,8 +4686,9 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                          Context->VoidPtrTy, nullptr,
                          /*BitWidth=*/nullptr, /*Mutable=*/true,
                          ICIS_NoInit);
-  ME = MemberExpr::CreateImplicit(*Context, ME, true, FD, DeclRefExp->getType(),
-                                  VK_LValue, OK_Ordinary);
+  ME =
+      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),
+                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);
 
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(),
@@ -7523,8 +7528,9 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                             IvarT, nullptr,
                                             /*BitWidth=*/nullptr,
                                             /*Mutable=*/true, ICIS_NoInit);
-          MemberExpr *ME = MemberExpr::CreateImplicit(
-              *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
+          MemberExpr *ME = new (Context)
+              MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
+                         FD->getType(), VK_LValue, OK_Ordinary);
           IvarT = Context->getDecltypeType(ME, ME->getType());
         }
       }
@@ -7551,9 +7557,9 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                           D->getType(), nullptr,
                                           /*BitWidth=*/D->getBitWidth(),
                                           /*Mutable=*/true, ICIS_NoInit);
-        MemberExpr *ME =
-            MemberExpr::CreateImplicit(*Context, PE, /*isArrow*/ false, FD,
-                                       FD->getType(), VK_LValue, OK_Ordinary);
+        MemberExpr *ME = new (Context)
+            MemberExpr(PE, /*isArrow*/ false, SourceLocation(), FD,
+                       SourceLocation(), FD->getType(), VK_LValue, OK_Ordinary);
         Replacement = ME;
 
       }
diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index 2ff230dfff1b5..3e50aff3c488d 100644
--- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -3793,8 +3793,9 @@ Stmt *RewriteObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME = MemberExpr::CreateImplicit(
-      *Context, PE, true, FD, FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME =
+      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
+                               FD->getType(), VK_LValue, OK_Ordinary);
 
   CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,
                                                 CK_BitCast, ME);
@@ -3839,9 +3840,9 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                                     Context->VoidPtrTy, nullptr,
                                     /*BitWidth=*/nullptr, /*Mutable=*/true,
                                     ICIS_NoInit);
-  MemberExpr *ME =
-      MemberExpr::CreateImplicit(*Context, DeclRefExp, isArrow, FD,
-                                 FD->getType(), VK_LValue, OK_Ordinary);
+  MemberExpr *ME = new (Context)
+      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),
+                 FD->getType(), VK_LValue, OK_Ordinary);
 
   StringRef Name = VD->getName();
   FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),
@@ -3849,8 +3850,9 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) {
                          Context->VoidPtrTy, nullptr,
                          /*BitWidth=*/nullptr, /*Mutable=*/true,
                          ICIS_NoInit);
-  ME = MemberExpr::CreateImplicit(*Context, ME, true, FD, DeclRefExp->getType(),
-                                  VK_LValue, OK_Ordinary);
+  ME =
+      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),
+                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);
 
   // Need parens to enforce precedence.
   ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(),
@@ -5828,10 +5830,10 @@ Stmt *RewriteObjCFragileABI::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) {
                                               OldRange.getEnd(),
                                               castExpr);
       if (IV->isFreeIvar() &&
-          declaresSameEntity(CurMethodDef->getClassInterface(),
-                             iFaceDecl->getDecl())) {
-        MemberExpr *ME = MemberExpr::CreateImplicit(
-            *Context, PE, true, D, D->getType(), VK_LValue, OK_Ordinary);
+          declaresSameEntity(CurMethodDef->getClassInterface(), iFaceDecl->getDecl())) {
+        MemberExpr *ME = new (Context)
+            MemberExpr(PE, true, SourceLocation(), D, IV->getLocation(),
+                       D->getType(), VK_LValue, OK_Ordinary);
         Replacement = ME;
       } else {
         IV->setBase(PE);
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 2f7e4a0f15cfa..1eb6a7114ffa2 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -7189,12 +7189,12 @@ ExprResult Sema::BuildCXXMemberCallExpr(Expr *E, NamedDecl *FoundDecl,
     }
   }
 
-  MemberExpr *ME =
-      BuildMemberExpr(Exp.get(), /*IsArrow=*/false, SourceLocation(),
-                      NestedNameSpecifierLoc(), SourceLocation(), Method,
-                      DeclAccessPair::make(FoundDecl, FoundDecl->getAccess()),
-                      HadMultipleCandidates, DeclarationNameInfo(),
-                      Context.BoundMemberTy, VK_RValue, OK_Ordinary);
+  MemberExpr *ME = new (Context) MemberExpr(
+      Exp.get(), /*IsArrow=*/false, SourceLocation(), Method, SourceLocation(),
+      Context.BoundMemberTy, VK_RValue, OK_Ordinary);
+  if (HadMultipleCandidates)
+    ME->setHadMultipleCandidates(true);
+  MarkMemberReferenced(ME);
 
   QualType ResultType = Method->getReturnType();
   ExprValueKind VK = Expr::getValueKindForType(ResultType);
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index f7b46a5e0f458..3d7b8db2f6710 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -893,31 +893,18 @@ BuildMSPropertyRefExpr(Sema &S, Expr *BaseExpr, bool IsArrow,
                                            NameInfo.getLoc());
 }
 
-MemberExpr *Sema::BuildMemberExpr(
-    Expr *Base, bool IsArrow, SourceLocation OpLoc, const CXXScopeSpec *SS,
-    SourceLocation TemplateKWLoc, ValueDecl *Member, DeclAccessPair FoundDecl,
-    bool HadMultipleCandidates, const DeclarationNameInfo &MemberNameInfo,
-    QualType Ty, ExprValueKind VK, ExprObjectKind OK,
-    const TemplateArgumentListInfo *TemplateArgs) {
-  NestedNameSpecifierLoc NNS =
-      SS ? SS->getWithLocInContext(Context) : NestedNameSpecifierLoc();
-  return BuildMemberExpr(Base, IsArrow, OpLoc, NNS, TemplateKWLoc, Member,
-                         FoundDecl, HadMultipleCandidates, MemberNameInfo, Ty,
-                         VK, OK, TemplateArgs);
-}
-
-MemberExpr *Sema::BuildMemberExpr(
-    Expr *Base, bool IsArrow, SourceLocation OpLoc, NestedNameSpecifierLoc NNS,
-    SourceLocation TemplateKWLoc, ValueDecl *Member, DeclAccessPair FoundDecl,
-    bool HadMultipleCandidates, const DeclarationNameInfo &MemberNameInfo,
-    QualType Ty, ExprValueKind VK, ExprObjectKind OK,
-    const TemplateArgumentListInfo *TemplateArgs) {
-  assert((!IsArrow || Base->isRValue()) && "-> base must be a pointer rvalue");
-  MemberExpr *E = MemberExpr::Create(Context, Base, IsArrow, OpLoc, NNS,
-                                     TemplateKWLoc, Member, FoundDecl,
-                                     MemberNameInfo, TemplateArgs, Ty, VK, OK);
-  E->setHadMultipleCandidates(HadMultipleCandidates);
-  MarkMemberReferenced(E);
+/// Build a MemberExpr AST node.
+static MemberExpr *BuildMemberExpr(
+    Sema &SemaRef, ASTContext &C, Expr *Base, bool isArrow,
+    SourceLocation OpLoc, const CXXScopeSpec &SS, SourceLocation TemplateKWLoc,
+    ValueDecl *Member, DeclAccessPair FoundDecl,
+    const DeclarationNameInfo &MemberNameInfo, QualType Ty, ExprValueKind VK,
+    ExprObjectKind OK, const TemplateArgumentListInfo *TemplateArgs = nullptr) {
+  assert((!isArrow || Base->isRValue()) && "-> base must be a pointer rvalue");
+  MemberExpr *E = MemberExpr::Create(
+      C, Base, isArrow, OpLoc, SS.getWithLocInContext(C), TemplateKWLoc, Member,
+      FoundDecl, MemberNameInfo, TemplateArgs, Ty, VK, OK);
+  SemaRef.MarkMemberReferenced(E);
   return E;
 }
 
@@ -1128,10 +1115,10 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                                                     OpLoc);
 
   if (VarDecl *Var = dyn_cast<VarDecl>(MemberDecl)) {
-    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var,
-                           FoundDecl, /*MultipleCandidates=*/false,
-                           MemberNameInfo, Var->getType().getNonReferenceType(),
-                           VK_LValue, OK_Ordinary);
+    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
+                           TemplateKWLoc, Var, FoundDecl, MemberNameInfo,
+                           Var->getType().getNonReferenceType(), VK_LValue,
+                           OK_Ordinary);
   }
 
   if (CXXMethodDecl *MemberFn = dyn_cast<CXXMethodDecl>(MemberDecl)) {
@@ -1145,25 +1132,24 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
       type = MemberFn->getType();
     }
 
-    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc,
-                           MemberFn, FoundDecl, /*MultipleCandidates=*/false,
-                           MemberNameInfo, type, valueKind, OK_Ordinary);
+    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
+                           TemplateKWLoc, MemberFn, FoundDecl, MemberNameInfo,
+                           type, valueKind, OK_Ordinary);
   }
   assert(!isa<FunctionDecl>(MemberDecl) && "member function not C++ method?");
 
   if (EnumConstantDecl *Enum = dyn_cast<EnumConstantDecl>(MemberDecl)) {
-    return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Enum,
-                           FoundDecl, /*MultipleCandidates=*/false,
-                           MemberNameInfo, Enum->getType(), VK_RValue,
-                           OK_Ordinary);
+    return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
+                           TemplateKWLoc, Enum, FoundDecl, MemberNameInfo,
+                           Enum->getType(), VK_RValue, OK_Ordinary);
   }
   if (VarTemplateDecl *VarTempl = dyn_cast<VarTemplateDecl>(MemberDecl)) {
     if (VarDecl *Var = getVarTemplateSpecialization(
             *this, VarTempl, TemplateArgs, MemberNameInfo, TemplateKWLoc))
-      return BuildMemberExpr(
-          BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var, FoundDecl,
-          /*MultipleCandidates=*/false, MemberNameInfo,
-          Var->getType().getNonReferenceType(), VK_LValue, OK_Ordinary);
+      return BuildMemberExpr(*this, Context, BaseExpr, IsArrow, OpLoc, SS,
+                             TemplateKWLoc, Var, FoundDecl, MemberNameInfo,
+                             Var->getType().getNonReferenceType(), VK_LValue,
+                             OK_Ordinary);
     return ExprError();
   }
 
@@ -1819,10 +1805,9 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow,
     }
   }
 
-  return BuildMemberExpr(Base.get(), IsArrow, OpLoc, &SS,
+  return BuildMemberExpr(*this, Context, Base.get(), IsArrow, OpLoc, SS,
                          /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
-                         /*MultipleCandidates=*/false, MemberNameInfo,
-                         MemberType, VK, OK);
+                         MemberNameInfo, MemberType, VK, OK);
 }
 
 /// Builds an implicit member access expression.  The current context
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 4bc725f4cf0b1..30d809ac91b55 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -13912,11 +13912,14 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
       type = Context.BoundMemberTy;
     }
 
-    return BuildMemberExpr(
-        Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),
+    MemberExpr *ME = MemberExpr::Create(
+        Context, Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),
         MemExpr->getQualifierLoc(), MemExpr->getTemplateKeywordLoc(), Fn, Found,
-        /*HadMultipleCandidates=*/true, MemExpr->getMemberNameInfo(),
-        type, valueKind, OK_Ordinary, TemplateArgs);
+        MemExpr->getMemberNameInfo(), TemplateArgs, type, valueKind,
+        OK_Ordinary);
+    ME->setHadMultipleCandidates(true);
+    MarkMemberReferenced(ME);
+    return ME;
   }
 
   llvm_unreachable("Invalid reference to overloaded function");
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a179207316429..52aa3d961d200 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -752,42 +752,9 @@ void ASTStmtReader::VisitCXXMemberCallExpr(CXXMemberCallExpr *E) {
 }
 
 void ASTStmtReader::VisitMemberExpr(MemberExpr *E) {
-  VisitExpr(E);
-
-  bool HasQualifier = Record.readInt();
-  bool HasFoundDecl = Record.readInt();
-  bool HasTemplateInfo = Record.readInt();
-  unsigned NumTemplateArgs = Record.readInt();
-
-  E->Base = Record.readSubExpr();
-  E->MemberDecl = Record.readDeclAs<ValueDecl>();
-  Record.readDeclarationNameLoc(E->MemberDNLoc, E->MemberDecl->getDeclName());
-  E->MemberLoc = Record.readSourceLocation();
-  E->MemberExprBits.IsArrow = Record.readInt();
-  E->MemberExprBits.HasQualifierOrFoundDecl = HasQualifier || HasFoundDecl;
-  E->MemberExprBits.HasTemplateKWAndArgsInfo = HasTemplateInfo;
-  E->MemberExprBits.HadMultipleCandidates = Record.readInt();
-  E->MemberExprBits.OperatorLoc = Record.readSourceLocation();
-
-  if (HasQualifier || HasFoundDecl)
-    *E->getTrailingObjects<MemberExprNameQualifier>() =
-        MemberExprNameQualifier();
-
-  if (HasFoundDecl) {
-    auto *FoundD = Record.readDeclAs<NamedDecl>();
-    auto AS = (AccessSpecifier)Record.readInt();
-    E->getTrailingObjects<MemberExprNameQualifier>()->FoundDecl =
-        DeclAccessPair::make(FoundD, AS);
-  }
-
-  if (HasQualifier)
-    E->getTrailingObjects<MemberExprNameQualifier>()->QualifierLoc =
-        Record.readNestedNameSpecifierLoc();
-
-  if (HasTemplateInfo)
-    ReadTemplateKWAndArgsInfo(
-        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
-        E->getTrailingObjects<TemplateArgumentLoc>(), NumTemplateArgs);
+  // Don't call VisitExpr, this is fully initialized at creation.
+  assert(E->getStmtClass() == Stmt::MemberExprClass &&
+         "It's a subclass, we must advance Idx!");
 }
 
 void ASTStmtReader::VisitObjCIsaExpr(ObjCIsaExpr *E) {
@@ -2584,12 +2551,55 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
           Context, /*NumArgs=*/Record[ASTStmtReader::NumExprFields], Empty);
       break;
 
-    case EXPR_MEMBER:
-      S = MemberExpr::CreateEmpty(Context, Record[ASTStmtReader::NumExprFields],
-                                  Record[ASTStmtReader::NumExprFields + 1],
-                                  Record[ASTStmtReader::NumExprFields + 2],
-                                  Record[ASTStmtReader::NumExprFields + 3]);
+    case EXPR_MEMBER: {
+      // We load everything here and fully initialize it at creation.
+      // That way we can use MemberExpr::Create and don't have to duplicate its
+      // logic with a MemberExpr::CreateEmpty.
+
+      assert(Record.getIdx() == 0);
+      NestedNameSpecifierLoc QualifierLoc;
+      if (Record.readInt()) { // HasQualifier.
+        QualifierLoc = Record.readNestedNameSpecifierLoc();
+      }
+
+      SourceLocation TemplateKWLoc;
+      TemplateArgumentListInfo ArgInfo;
+      bool HasTemplateKWAndArgsInfo = Record.readInt();
+      if (HasTemplateKWAndArgsInfo) {
+        TemplateKWLoc = Record.readSourceLocation();
+        unsigned NumTemplateArgs = Record.readInt();
+        ArgInfo.setLAngleLoc(Record.readSourceLocation());
+        ArgInfo.setRAngleLoc(Record.readSourceLocation());
+        for (unsigned i = 0; i != NumTemplateArgs; ++i)
+          ArgInfo.addArgument(Record.readTemplateArgumentLoc());
+      }
+
+      bool HadMultipleCandidates = Record.readInt();
+
+      auto *FoundD = Record.readDeclAs<NamedDecl>();
+      auto AS = (AccessSpecifier)Record.readInt();
+      DeclAccessPair FoundDecl = DeclAccessPair::make(FoundD, AS);
+
+      QualType T = Record.readType();
+      auto VK = static_cast<ExprValueKind>(Record.readInt());
+      auto OK = static_cast<ExprObjectKind>(Record.readInt());
+      Expr *Base = ReadSubExpr();
+      auto *MemberD = Record.readDeclAs<ValueDecl>();
+      SourceLocation MemberLoc = Record.readSourceLocation();
+      DeclarationNameInfo MemberNameInfo(MemberD->getDeclName(), MemberLoc);
+      bool IsArrow = Record.readInt();
+      SourceLocation OperatorLoc = Record.readSourceLocation();
+
+      S = MemberExpr::Create(Context, Base, IsArrow, OperatorLoc, QualifierLoc,
+                             TemplateKWLoc, MemberD, FoundDecl, MemberNameInfo,
+                             HasTemplateKWAndArgsInfo ? &ArgInfo : nullptr, T,
+                             VK, OK);
+      Record.readDeclarationNameLoc(cast<MemberExpr>(S)->MemberDNLoc,
+                                    MemberD->getDeclName());
+      if (HadMultipleCandidates)
+        cast<MemberExpr>(S)->setHadMultipleCandidates(true);
       break;
+    }
 
     case EXPR_BINARY_OPERATOR:
       S = new (Context) BinaryOperator(Empty);
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index d52a4a85b3252..776aab6bf51d2 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -660,45 +660,39 @@ void ASTStmtWriter::VisitCallExpr(CallExpr *E) {
 }
 
 void ASTStmtWriter::VisitMemberExpr(MemberExpr *E) {
-  VisitExpr(E);
+  // Don't call VisitExpr, we'll write everything here.
+
+  Record.push_back(E->hasQualifier());
+  if (E->hasQualifier())
+    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
+
+  Record.push_back(E->hasTemplateKWAndArgsInfo());
+  if (E->hasTemplateKWAndArgsInfo()) {
+    Record.AddSourceLocation(E->getTemplateKeywordLoc());
+    unsigned NumTemplateArgs = E->getNumTemplateArgs();
+    Record.push_back(NumTemplateArgs);
+    Record.AddSourceLocation(E->getLAngleLoc());
+    Record.AddSourceLocation(E->getRAngleLoc());
+    for (unsigned i=0; i != NumTemplateArgs; ++i)
+      Record.AddTemplateArgumentLoc(E->getTemplateArgs()[i]);
+  }
 
-  bool HasQualifier = E->hasQualifier();
-  bool HasFoundDecl =
-      E->hasQualifierOrFoundDecl() &&
-      (E->getFoundDecl().getDecl() != E->getMemberDecl() ||
-       E->getFoundDecl().getAccess() != E->getMemberDecl()->getAccess());
-  bool HasTemplateInfo = E->hasTemplateKWAndArgsInfo();
-  unsigned NumTemplateArgs = E->getNumTemplateArgs();
+  Record.push_back(E->hadMultipleCandidates());
 
-  // Write these first for easy access when deserializing, as they affect the
-  // size of the MemberExpr.
-  Record.push_back(HasQualifier);
-  Record.push_back(HasFoundDecl);
-  Record.push_back(HasTemplateInfo);
-  Record.push_back(NumTemplateArgs);
+  DeclAccessPair FoundDecl = E->getFoundDecl();
+  Record.AddDeclRef(FoundDecl.getDecl());
+  Record.push_back(FoundDecl.getAccess());
 
+  Record.AddTypeRef(E->getType());
+  Record.push_back(E->getValueKind());
+  Record.push_back(E->getObjectKind());
   Record.AddStmt(E->getBase());
   Record.AddDeclRef(E->getMemberDecl());
-  Record.AddDeclarationNameLoc(E->MemberDNLoc,
-                               E->getMemberDecl()->getDeclName());
   Record.AddSourceLocation(E->getMemberLoc());
   Record.push_back(E->isArrow());
-  Record.push_back(E->hadMultipleCandidates());
   Record.AddSourceLocation(E->getOperatorLoc());
-
-  if (HasFoundDecl) {
-    DeclAccessPair FoundDecl = E->getFoundDecl();
-    Record.AddDeclRef(FoundDecl.getDecl());
-    Record.push_back(FoundDecl.getAccess());
-  }
-
-  if (HasQualifier)
-    Record.AddNestedNameSpecifierLoc(E->getQualifierLoc());
-
-  if (HasTemplateInfo)
-    AddTemplateKWAndArgsInfo(*E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
-                             E->getTrailingObjects<TemplateArgumentLoc>());
-
+  Record.AddDeclarationNameLoc(E->MemberDNLoc,
+                               E->getMemberDecl()->getDeclName());
   Code = serialization::EXPR_MEMBER;
 }
 

From 2121a4f7335a9e4985997d4d880c11c588b48a27 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 5 Jun 2019 12:05:54 +0000
Subject: [PATCH 1113/1176] gn build: Merge r362578

llvm-svn: 362598
---
 llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
index 636936bf0ac23..6095eb042a8b8 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn
@@ -20,6 +20,7 @@ static_library("IPO") {
   sources = [
     "AlwaysInliner.cpp",
     "ArgumentPromotion.cpp",
+    "Attributor.cpp",
     "BarrierNoopPass.cpp",
     "BlockExtractor.cpp",
     "CalledValuePropagation.cpp",

From 886a55eaa05ddfecaf41b86d1735c503b3b17bda Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 12:56:53 +0000
Subject: [PATCH 1114/1176] [X86][AVX] combineX86ShuffleChain - combine
 shuffle(extractsubvector(x),extractsubvector(y))

We already handle the case where we combine shuffle(extractsubvector(x),extractsubvector(x)), this relaxes the requirement to permit different sources as long as they have the same value type.

This causes a couple of cases where the VPERMV3 binary shuffles occur at a wider width than before, which I intend to improve in future commits - but as only the subvector's mask indices are defined, these will broadcast so we don't see any increase in constant size.

llvm-svn: 362599
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 13 +++--
 .../X86/avx512-shuffles/partial_permute.ll    | 25 +++++----
 llvm/test/CodeGen/X86/pr29112.ll              | 54 +++++++++----------
 .../CodeGen/X86/vector-shuffle-256-v32.ll     |  5 +-
 4 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 64585c8de0a65..a6aa2b77990da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32091,19 +32091,28 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
       isa<ConstantSDNode>(V2.getOperand(1))) {
     SDValue Src1 = V1.getOperand(0);
     SDValue Src2 = V2.getOperand(0);
-    if (Src1 == Src2) {
+    if (Src1.getValueType() == Src2.getValueType()) {
       unsigned Offset1 = V1.getConstantOperandVal(1);
       unsigned Offset2 = V2.getConstantOperandVal(1);
       assert(((Offset1 % VT1.getVectorNumElements()) == 0 ||
               (Offset2 % VT2.getVectorNumElements()) == 0 ||
               (Src1.getValueSizeInBits() % RootSizeInBits) == 0) &&
              "Unexpected subvector extraction");
+      unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits;
+
       // Convert extraction indices to mask size.
       Offset1 /= VT1.getVectorNumElements();
       Offset2 /= VT2.getVectorNumElements();
       Offset1 *= NumMaskElts;
       Offset2 *= NumMaskElts;
 
+      SmallVector<SDValue, 2> NewInputs;
+      NewInputs.push_back(Src1);
+      if (Src1 != Src2) {
+        NewInputs.push_back(Src2);
+        Offset2 += Scale * NumMaskElts;
+      }
+
       // Create new mask for larger type.
       SmallVector<int, 64> NewMask(Mask);
       for (int &M : NewMask) {
@@ -32114,10 +32123,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
         else
           M = (M - NumMaskElts) + Offset2;
       }
-      unsigned Scale = Src1.getValueSizeInBits() / RootSizeInBits;
       NewMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
 
-      SDValue NewInputs[] = {Src1};
       if (SDValue Res = combineX86ShuffleChain(
               NewInputs, Src1, NewMask, Depth, HasVariableMask,
               AllowVariableMask, DAG, Subtarget)) {
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index c1d37a77ad041..57e333d78bd68 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -1805,12 +1805,12 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask3(<16 x i32>* %vp,
 define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) {
 ; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask9:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,3]
 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,1,0,2]
-; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; CHECK-NEXT:    vpermd %ymm2, %ymm1, %ymm1
-; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; CHECK-NEXT:    vmovdqa {{.*#+}} xmm0 = [0,1,4,3]
-; CHECK-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; CHECK-NEXT:    vpermd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT:    vpermt2d %ymm0, %ymm2, %ymm1
+; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 12, i32 9, i32 4, i32 10>
@@ -3128,10 +3128,9 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask0(<16 x float> %v
 define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
 ; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mask1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vextractf32x4 $2, %zmm0, %xmm3
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm4 = [0,6,2,6]
-; CHECK-NEXT:    vpermi2ps %xmm0, %xmm3, %xmm4
+; CHECK-NEXT:    vmovaps {{.*#+}} xmm3 = [0,14,2,14]
+; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm4
+; CHECK-NEXT:    vpermt2ps %ymm0, %ymm3, %ymm4
 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    vcmpeqps %xmm0, %xmm2, %k1
 ; CHECK-NEXT:    vblendmps %xmm4, %xmm1, %xmm0 {%k1}
@@ -3146,12 +3145,12 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec
 define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mask1(<16 x float> %vec, <4 x float> %mask) {
 ; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mask1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [0,6,2,6]
+; CHECK-NEXT:    vmovaps {{.*#+}} xmm3 = [0,14,2,14]
+; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
 ; CHECK-NEXT:    vcmpeqps %xmm4, %xmm1, %k1
-; CHECK-NEXT:    vpermi2ps %xmm3, %xmm2, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpermt2ps %ymm0, %ymm3, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vmovaps %xmm2, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <4 x i32> <i32 8, i32 6, i32 10, i32 6>
diff --git a/llvm/test/CodeGen/X86/pr29112.ll b/llvm/test/CodeGen/X86/pr29112.ll
index b78a5ce7c5d12..2dce179f367ca 100644
--- a/llvm/test/CodeGen/X86/pr29112.ll
+++ b/llvm/test/CodeGen/X86/pr29112.ll
@@ -11,45 +11,45 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, <
 ; CHECK-NEXT:    subq $72, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-NEXT:    vmovaps %xmm1, %xmm8
+; CHECK-NEXT:    vbroadcastsd {{.*#+}} zmm0 = [85899345925,85899345925,85899345925,85899345925,85899345925,85899345925,85899345925,85899345925]
+; CHECK-NEXT:    vpermi2ps %zmm3, %zmm2, %zmm0
 ; CHECK-NEXT:    vextractf128 $1, %ymm3, %xmm1
-; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; CHECK-NEXT:    vunpcklps {{.*#+}} xmm10 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm10[0,1],xmm2[1],xmm10[3]
-; CHECK-NEXT:    vblendps {{.*#+}} xmm9 = xmm5[0],xmm1[1],xmm5[2,3]
-; CHECK-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3]
-; CHECK-NEXT:    vunpcklps {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm11 = xmm6[0,1],xmm2[1],xmm6[3]
+; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; CHECK-NEXT:    vunpcklps {{.*#+}} xmm10 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm14 = xmm10[0,1],xmm2[1],xmm10[3]
+; CHECK-NEXT:    vblendps {{.*#+}} xmm9 = xmm4[0],xmm1[1],xmm4[2,3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm11 = xmm0[0,1],xmm2[1],xmm0[3]
 ; CHECK-NEXT:    vextractf32x4 $3, %zmm3, %xmm7
-; CHECK-NEXT:    vunpcklps {{.*#+}} xmm4 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm2[1],xmm4[3]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm3[1]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm6 = xmm11[0,1,2],xmm3[1]
-; CHECK-NEXT:    vaddps %xmm4, %xmm6, %xmm12
-; CHECK-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm7 = xmm5[0],xmm7[2],zero,zero
+; CHECK-NEXT:    vunpcklps {{.*#+}} xmm5 = xmm4[0],xmm7[0],xmm4[1],xmm7[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm2[1],xmm5[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1,2],xmm3[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm11[0,1,2],xmm3[1]
+; CHECK-NEXT:    vaddps %xmm5, %xmm0, %xmm12
+; CHECK-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm7 = xmm4[0],xmm7[2],zero,zero
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm7 = xmm7[0,1],xmm2[1],xmm7[3]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm13 = xmm7[0,1,2],xmm4[0]
-; CHECK-NEXT:    vpermilps {{.*#+}} xmm4 = xmm2[3,1,2,3]
-; CHECK-NEXT:    vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm7 = xmm5[0],xmm1[2],zero,zero
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm3[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm13 = xmm7[0,1,2],xmm5[0]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm5 = xmm2[3,1,2,3]
+; CHECK-NEXT:    vunpcklps {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm7 = xmm4[0],xmm1[2],zero,zero
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm14[0,1,2],xmm3[1]
 ; CHECK-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm9[0,1],xmm2[1],xmm9[3]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm6 = xmm5[0,1,2],xmm3[1]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm9[0,1],xmm2[1],xmm9[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm6 = xmm4[0,1,2],xmm3[1]
 ; CHECK-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm4[0,1],xmm2[1],xmm4[3]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm9 = xmm7[0,1],xmm2[1],xmm7[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm2[1],xmm5[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm7[0,1],xmm2[1],xmm7[3]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm10[0,1],xmm2[3],xmm10[3]
 ; CHECK-NEXT:    vextractf32x4 $2, %zmm3, %xmm4
-; CHECK-NEXT:    vblendps {{.*#+}} xmm4 = xmm0[0,1,2],xmm4[3]
+; CHECK-NEXT:    vblendps {{.*#+}} xmm4 = xmm14[0,1,2],xmm4[3]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1,2],xmm3[1]
 ; CHECK-NEXT:    vblendps {{.*#+}} xmm7 = xmm11[0,1,2],xmm3[3]
-; CHECK-NEXT:    vblendps {{.*#+}} xmm10 = xmm0[0,1,2],xmm3[3]
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm9[0,1,2],xmm3[1]
+; CHECK-NEXT:    vblendps {{.*#+}} xmm9 = xmm14[0,1,2],xmm3[3]
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[1]
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm2
-; CHECK-NEXT:    vaddps %xmm10, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm9, %xmm0, %xmm0
 ; CHECK-NEXT:    vaddps %xmm13, %xmm1, %xmm9
 ; CHECK-NEXT:    vaddps %xmm12, %xmm0, %xmm0
 ; CHECK-NEXT:    vaddps %xmm1, %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 8dfcffbe0a496..a58b6bd547158 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -2994,8 +2994,9 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_
 ;
 ; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
 ; AVX512VLVBMI:       # %bb.0:
-; AVX512VLVBMI-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
-; AVX512VLVBMI-NEXT:    vpermt2b %xmm1, %xmm2, %xmm0
+; AVX512VLVBMI-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32,15,15,15,15,15,15,15,15,32,32,32,32,32,32,32,32]
+; AVX512VLVBMI-NEXT:    # ymm2 = mem[0,1,0,1]
+; AVX512VLVBMI-NEXT:    vpermt2b %ymm1, %ymm2, %ymm0
 ; AVX512VLVBMI-NEXT:    retq
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <32 x i8> %shuffle

From a1bb4fb79d866326fae775a1423f5ae8baa36a17 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Wed, 5 Jun 2019 13:11:51 +0000
Subject: [PATCH 1115/1176] [ARM] Allow "-march=foo+fp" to vary with foo

This is the LLVM part of this change, the Clang part contains the full
description in its commit message.

Differential Revision: https://reviews.llvm.org/D60697

llvm-svn: 362600
---
 llvm/include/llvm/Support/ARMTargetParser.h |  2 +
 llvm/lib/Support/ARMTargetParser.cpp        | 79 ++++++++++++++++++---
 2 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index ca9e26202cf14..44bc0ac326f7f 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -240,6 +240,8 @@ StringRef getCPUAttr(ArchKind AK);
 StringRef getSubArch(ArchKind AK);
 StringRef getArchExtName(unsigned ArchExtKind);
 StringRef getArchExtFeature(StringRef ArchExt);
+bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+                           std::vector<StringRef> &Features);
 StringRef getHWDivName(unsigned HWDivKind);
 
 // Information by Name
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index 8806ea52fdf42..9ab6ec21b9685 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -485,22 +485,85 @@ StringRef ARM::getArchExtName(unsigned ArchExtKind) {
   return StringRef();
 }
 
-StringRef ARM::getArchExtFeature(StringRef ArchExt) {
-  if (ArchExt.startswith("no")) {
-    StringRef ArchExtBase(ArchExt.substr(2));
-    for (const auto AE : ARCHExtNames) {
-      if (AE.NegFeature && ArchExtBase == AE.getName())
-        return StringRef(AE.NegFeature);
-    }
+static bool stripNegationPrefix(StringRef &Name) {
+  if (Name.startswith("no")) {
+    Name = Name.substr(2);
+    return true;
   }
+  return false;
+}
+
+StringRef ARM::getArchExtFeature(StringRef ArchExt) {
+  bool Negated = stripNegationPrefix(ArchExt);
   for (const auto AE : ARCHExtNames) {
     if (AE.Feature && ArchExt == AE.getName())
-      return StringRef(AE.Feature);
+      return StringRef(Negated ? AE.NegFeature : AE.Feature);
   }
 
   return StringRef();
 }
 
+static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
+  const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
+
+  // If the input FPU already supports double-precision, then there
+  // isn't any different FPU we can return here.
+  //
+  // The current available FPURestriction values are None (no
+  // restriction), D16 (only 16 d-regs) and SP_D16 (16 d-regs
+  // and single precision only); there's no value representing
+  // SP restriction without D16. So this test just means 'is it
+  // SP only?'.
+  if (InputFPU.Restriction != ARM::FPURestriction::SP_D16)
+    return ARM::FK_INVALID;
+
+  // Otherwise, look for an FPU entry with all the same fields, except
+  // that SP_D16 has been replaced with just D16, representing adding
+  // double precision and not changing anything else.
+  for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) {
+    if (CandidateFPU.FPUVer == InputFPU.FPUVer &&
+        CandidateFPU.NeonSupport == InputFPU.NeonSupport &&
+        CandidateFPU.Restriction == ARM::FPURestriction::D16) {
+      return CandidateFPU.ID;
+    }
+  }
+
+  // nothing found
+  return ARM::FK_INVALID;
+}
+
+bool ARM::appendArchExtFeatures(
+  StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+  std::vector<StringRef> &Features) {
+  StringRef StandardFeature = getArchExtFeature(ArchExt);
+  if (!StandardFeature.empty()) {
+    Features.push_back(StandardFeature);
+    return true;
+  }
+
+  const bool Negated = stripNegationPrefix(ArchExt);
+
+  if (CPU == "")
+    CPU = "generic";
+
+  if (ArchExt == "fp" || ArchExt == "fp.dp") {
+    unsigned FPUKind;
+    if (ArchExt == "fp.dp") {
+      if (Negated) {
+        Features.push_back("-fp64");
+        return true;
+      }
+      FPUKind = findDoublePrecisionFPU(getDefaultFPU(CPU, AK));
+    } else if (Negated) {
+      FPUKind = ARM::FK_NONE;
+    } else {
+      FPUKind = getDefaultFPU(CPU, AK);
+    }
+    return ARM::getFPUFeatures(FPUKind, Features);
+  }
+  return false;
+}
+
 StringRef ARM::getHWDivName(unsigned HWDivKind) {
   for (const auto D : HWDivNames) {
     if (HWDivKind == D.ID)

From f95e6c06534298a522088df30a6d9a4145834049 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Wed, 5 Jun 2019 13:12:01 +0000
Subject: [PATCH 1116/1176] [ARM] Allow "-march=foo+fp" to vary with foo

Now, when clang processes an argument of the form "-march=foo+x+y+z",
then instead of calling getArchExtFeature() for each of the extension
names "x", "y", "z" and appending the returned string to its list of
low-level subtarget features, it will call appendArchExtFeatures()
which does the appending itself.

The difference is that appendArchExtFeatures can add _more_ than one
low-level feature name to the output feature list if it has to, and
also, it gets told some information about what base architecture and
CPU the extension is going to go with, which means that "+fp" can now
mean something different for different CPUs. Namely, "+fp" now selects
whatever the _default_ FPU is for the selected CPU and/or
architecture, as defined in the ARM_ARCH or ARM_CPU_NAME macros in
ARMTargetParser.def.

On the clang side, I adjust DecodeARMFeatures to call the new
appendArchExtFeatures function in place of getArchExtFeature. This
means DecodeARMFeatures needs to be passed a CPU name and an ArchKind,
which meant changing its call sites to make those available, and also
sawing getLLVMArchSuffixForARM in half so that you can get an ArchKind
enum value out of it instead of a string.

Also, I add support here for the extension name "+fp.dp", which will
automatically look through the FPU list for something that looks just
like the default FPU except for also supporting double precision.

Differential Revision: https://reviews.llvm.org/D60697

llvm-svn: 362601
---
 clang/lib/Driver/ToolChains/Arch/ARM.cpp | 72 ++++++++++++++----------
 clang/lib/Driver/ToolChains/Arch/ARM.h   |  3 +
 clang/test/Driver/armv8.1m.main.c        | 35 ++++++++++++
 clang/test/Driver/armv8.1m.main.s        | 24 +++++++-
 4 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index e38ce4d583fa7..5ff1dd9c91796 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -72,15 +72,13 @@ static void getARMFPUFeatures(const Driver &D, const Arg *A,
 
 // Decode ARM features from string like +[no]featureA+[no]featureB+...
 static bool DecodeARMFeatures(const Driver &D, StringRef text,
+                              StringRef CPU, llvm::ARM::ArchKind ArchKind,
                               std::vector<StringRef> &Features) {
   SmallVector<StringRef, 8> Split;
   text.split(Split, StringRef("+"), -1, false);
 
   for (StringRef Feature : Split) {
-    StringRef FeatureName = llvm::ARM::getArchExtFeature(Feature);
-    if (!FeatureName.empty())
-      Features.push_back(FeatureName);
-    else
+    if (!appendArchExtFeatures(CPU, ArchKind, Feature, Features))
       return false;
   }
   return true;
@@ -100,14 +98,16 @@ static void DecodeARMFeaturesFromCPU(const Driver &D, StringRef CPU,
 // getARMArch is used here instead of just checking the -march value in order
 // to handle -march=native correctly.
 static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args,
-                             llvm::StringRef ArchName,
+                             llvm::StringRef ArchName, llvm::StringRef CPUName,
                              std::vector<StringRef> &Features,
                              const llvm::Triple &Triple) {
   std::pair<StringRef, StringRef> Split = ArchName.split("+");
 
   std::string MArch = arm::getARMArch(ArchName, Triple);
-  if (llvm::ARM::parseArch(MArch) == llvm::ARM::ArchKind::INVALID ||
-      (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
+  llvm::ARM::ArchKind ArchKind = llvm::ARM::parseArch(MArch);
+  if (ArchKind == llvm::ARM::ArchKind::INVALID ||
+      (Split.second.size() && !DecodeARMFeatures(
+        D, Split.second, CPUName, ArchKind, Features)))
     D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
@@ -119,8 +119,11 @@ static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args,
   std::pair<StringRef, StringRef> Split = CPUName.split("+");
 
   std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
-  if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() ||
-      (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
+  llvm::ARM::ArchKind ArchKind =
+    arm::getLLVMArchKindForARM(CPU, ArchName, Triple);
+  if (ArchKind == llvm::ARM::ArchKind::INVALID ||
+      (Split.second.size() && !DecodeARMFeatures(
+        D, Split.second, CPU, ArchKind, Features)))
     D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
@@ -327,25 +330,12 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
   if (ThreadPointer == arm::ReadTPMode::Cp15)
     Features.push_back("+read-tp-hard");
 
-  // Check -march. ClangAs gives preference to -Wa,-march=.
   const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ);
+  const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
   StringRef ArchName;
-  if (WaArch) {
-    if (ArchArg)
-      D.Diag(clang::diag::warn_drv_unused_argument)
-          << ArchArg->getAsString(Args);
-    ArchName = StringRef(WaArch->getValue()).substr(7);
-    checkARMArchName(D, WaArch, Args, ArchName, Features, Triple);
-    // FIXME: Set Arch.
-    D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
-  } else if (ArchArg) {
-    ArchName = ArchArg->getValue();
-    checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple);
-  }
+  StringRef CPUName;
 
   // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=.
-  const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
-  StringRef CPUName;
   if (WaCPU) {
     if (CPUArg)
       D.Diag(clang::diag::warn_drv_unused_argument)
@@ -355,6 +345,20 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
   } else if (CPUArg)
     CPUName = CPUArg->getValue();
 
+  // Check -march. ClangAs gives preference to -Wa,-march=.
+  if (WaArch) {
+    if (ArchArg)
+      D.Diag(clang::diag::warn_drv_unused_argument)
+          << ArchArg->getAsString(Args);
+    ArchName = StringRef(WaArch->getValue()).substr(7);
+    checkARMArchName(D, WaArch, Args, ArchName, CPUName, Features, Triple);
+    // FIXME: Set Arch.
+    D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
+  } else if (ArchArg) {
+    ArchName = ArchArg->getValue();
+    checkARMArchName(D, ArchArg, Args, ArchName, CPUName, Features, Triple);
+  }
+
   // Add CPU features for generic CPUs
   if (CPUName == "native") {
     llvm::StringMap<bool> HostFeatures;
@@ -625,11 +629,12 @@ std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch,
   return getARMCPUForMArch(Arch, Triple);
 }
 
-/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
-/// CPU  (or Arch, if CPU is generic).
-// FIXME: This is redundant with -mcpu, why does LLVM use this.
-StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
-                                       const llvm::Triple &Triple) {
+/// getLLVMArchSuffixForARM - Get the LLVM ArchKind value to use for a
+/// particular CPU (or Arch, if CPU is generic). This is needed to
+/// pass to functions like llvm::ARM::getDefaultFPU which need an
+/// ArchKind as well as a CPU name.
+llvm::ARM::ArchKind arm::getLLVMArchKindForARM(StringRef CPU, StringRef Arch,
+                                               const llvm::Triple &Triple) {
   llvm::ARM::ArchKind ArchKind;
   if (CPU == "generic") {
     std::string ARMArch = tools::arm::getARMArch(Arch, Triple);
@@ -645,6 +650,15 @@ StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
                           ? llvm::ARM::ArchKind::ARMV7K
                           : llvm::ARM::parseCPUArch(CPU);
   }
+  return ArchKind;
+}
+
+/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
+/// CPU  (or Arch, if CPU is generic).
+// FIXME: This is redundant with -mcpu, why does LLVM use this.
+StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
+                                       const llvm::Triple &Triple) {
+  llvm::ARM::ArchKind ArchKind = getLLVMArchKindForARM(CPU, Arch, Triple);
   if (ArchKind == llvm::ARM::ArchKind::INVALID)
     return "";
   return llvm::ARM::getSubArch(ArchKind);
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.h b/clang/lib/Driver/ToolChains/Arch/ARM.h
index 0b3ad4d413ee0..5640f8371262a 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.h
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/TargetParser.h"
 #include <string>
 #include <vector>
 
@@ -25,6 +26,8 @@ std::string getARMTargetCPU(StringRef CPU, llvm::StringRef Arch,
                             const llvm::Triple &Triple);
 const std::string getARMArch(llvm::StringRef Arch, const llvm::Triple &Triple);
 StringRef getARMCPUForMArch(llvm::StringRef Arch, const llvm::Triple &Triple);
+llvm::ARM::ArchKind getLLVMArchKindForARM(StringRef CPU, StringRef Arch,
+                                          const llvm::Triple &Triple);
 StringRef getLLVMArchSuffixForARM(llvm::StringRef CPU, llvm::StringRef Arch,
                                   const llvm::Triple &Triple);
 
diff --git a/clang/test/Driver/armv8.1m.main.c b/clang/test/Driver/armv8.1m.main.c
index 675d98c8fd138..9a745b1b80914 100644
--- a/clang/test/Driver/armv8.1m.main.c
+++ b/clang/test/Driver/armv8.1m.main.c
@@ -2,13 +2,48 @@
 // RUN: FileCheck --check-prefix=CHECK-DSP < %t %s
 // CHECK-DSP: "-target-feature" "+dsp"
 
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FP < %t %s
+// CHECK-FP: "-target-feature" "+fp-armv8"
+// CHECK-FP-NOT: "-target-feature" "+fp64"
+// CHECK-FP-NOT: "-target-feature" "+d32"
+// CHECK-FP: "-target-feature" "+fullfp16"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+nofp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NOFP < %t %s
+// CHECK-NOFP: "-target-feature" "-vfp2" "-target-feature" "-vfp3" "-target-feature" "-fp16" "-target-feature" "-vfp4" "-target-feature" "-fp-armv8" "-target-feature" "-fp64" "-target-feature" "-d32" "-target-feature" "-neon" "-target-feature" "-crypto"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+fp.dp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-FPDP < %t %s
+// CHECK-FPDP: "-target-feature" "+fp-armv8"
+// CHECK-FPDP: "-target-feature" "+fullfp16"
+// CHECK-FPDP: "-target-feature" "+fp64"
+// CHECK-FPDP-NOT: "-target-feature" "+d32"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+nofp.dp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NOFPDP < %t %s
+// CHECK-NOFPDP: "-target-feature" "-fp64"
+
 // RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve  -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-MVE < %t %s
 // CHECK-MVE: "-target-feature" "+mve"
 
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+nomve  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NOMVE < %t %s
+// CHECK-NOMVE: "-target-feature" "-mve"
+
 // RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp  -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-MVEFP < %t %s
 // CHECK-MVEFP: "-target-feature" "+mve.fp"
 // CHECK-MVEFP-NOT: "-target-feature" "+fp64"
 
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+nomve.fp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NOMVEFP < %t %s
+// CHECK-NOMVEFP: "-target-feature" "-mve.fp"
+
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+fp.dp  -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-MVEFP_DP < %t %s
+// CHECK-MVEFP_DP: "-target-feature" "+mve.fp"
+// CHECK-MVEFP_DP: "-target-feature" "+fp64"
+
 double foo (double a) { return a; }
diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
index 546ddca4ef14f..8fc94cf772fae 100644
--- a/clang/test/Driver/armv8.1m.main.s
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -5,10 +5,24 @@
 # RUN:      FileCheck --check-prefix=ERROR-V81M < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+dsp -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_DSP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp.dp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp.dp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve+fp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVE_FP < %t %s
 # RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp -o /dev/null %s 2>%t
 # RUN:      FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
+# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve.fp -o /dev/null %s 2>%t
+# RUN:      FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
 
 .syntax unified
 .thumb
@@ -39,15 +53,21 @@ vcmp.f64 d0,d1
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
-# ERROR-V81M_MVEFP: :[[@LINE-5]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-5]]:1: error
+# ERROR-V81M_MVE_FP: :[[@LINE-6]]:1: error
+# ERROR-V81M_MVEFP: :[[@LINE-7]]:1: error
 
 asrl r0, r1, r2
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
 
 vcadd.i8 q0, q1, q2, #90
 # ERROR-V8M: :[[@LINE-1]]:1: error
 # ERROR-V81M: :[[@LINE-2]]:1: error
 # ERROR-V81M_DSP: :[[@LINE-3]]:1: error
+# ERROR-V81M_FP: :[[@LINE-4]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error

From 66296dc3e4c5356b9c2b8443125b10b9ca41e435 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 5 Jun 2019 13:16:53 +0000
Subject: [PATCH 1117/1176] [yaml2obj] - Change how we handle implicit
 sections.

We have a few sections that can be added implicitly to the output:
".dynsym", ".dynstr", ".symtab", ".strtab" and ".shstrtab".

Problem appears when such section is listed explicitly in YAML.
In that case it's content is written twice:
first time during writing of regular sections listed in the document
and second time during special handling.

Because of that their file offsets can become unexpectedly broken:
(yaml file for sample below lists .dynsym explicitly before .text.foo)

Before patch:
  [Nr] Name              Type             Address           Offset
       Size              EntSize          Flags  Link  Info  Align
  [ 0]                   NULL             0000000000000000  00000000
       0000000000000000  0000000000000000           0     0     0
  [ 1] .dynsym           DYNSYM           0000000000000100  00000250
       0000000000000030  0000000000000018   A       6     0     8
  [ 2] .text.foo         PROGBITS         0000000000000200  00000200
       0000000000000000  0000000000000000  AX       0     0     0

After patch:
Section Headers:
  [Nr] Name         Type             Address           Offset
       Size              EntSize          Flags  Link  Info  Align
  [ 0]                   NULL             0000000000000000  00000000
       0000000000000000  0000000000000000           0     0     0
  [ 1] .dynsym           DYNSYM           0000000000000100  00000200
       0000000000000030  0000000000000018   A       6     0     8
  [ 2] .text.foo         PROGBITS         0000000000000200  00000230
       0000000000000000  0000000000000000  AX       0     0     0

This patch reorganizes our code and fixes the issue described.

Differential revision: https://reviews.llvm.org/D62809

llvm-svn: 362602
---
 .../tools/yaml2obj/implicit-sections.test     |  86 +++++++++
 llvm/tools/yaml2obj/yaml2elf.cpp              | 181 ++++++++++++------
 2 files changed, 205 insertions(+), 62 deletions(-)
 create mode 100644 llvm/test/tools/yaml2obj/implicit-sections.test

diff --git a/llvm/test/tools/yaml2obj/implicit-sections.test b/llvm/test/tools/yaml2obj/implicit-sections.test
new file mode 100644
index 0000000000000..fef2f1ea4243c
--- /dev/null
+++ b/llvm/test/tools/yaml2obj/implicit-sections.test
@@ -0,0 +1,86 @@
+## Check the section header properties of ".dynsym",
+## ".dynstr", ".symtab", ".strtab", ".shstrtab".
+## These sections sections are usually added implicitly,
+## but here we add them explicitly in YAML and verify.
+## We check their order matches YAML and that file offset is
+## ascending. This is a natural default behavior.
+
+# RUN: yaml2obj --docnum=1 %s -o %t1
+# RUN: llvm-readelf -S %t1 | FileCheck %s
+
+# CHECK:      Section Headers:
+# CHECK-NEXT:  [Nr] Name      Type     Address          Off    Size
+# CHECK-NEXT:  [ 0]           NULL     0000000000000000 000000 000000
+# CHECK-NEXT:  [ 1] .dynstr   STRTAB   0000000000000100 000200 000009
+# CHECK-NEXT:  [ 2] .dynsym   DYNSYM   0000000000000150 000209 000030
+# CHECK-NEXT:  [ 3] .symtab   SYMTAB   0000000000000000 000239 000018
+# CHECK-NEXT:  [ 4] .strtab   STRTAB   0000000000000000 000251 000001
+# CHECK-NEXT:  [ 5] .shstrtab STRTAB   0000000000000000 000252 000035
+# CHECK-NEXT:  [ 6] .text.foo PROGBITS 0000000000000200 000287 000000
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_DYN
+  Machine: EM_X86_64
+Sections:
+  - Name:    .dynstr
+    Type:    SHT_STRTAB
+    Address: 0x100
+  - Name:    .dynsym
+    Type:    SHT_DYNSYM
+    Address: 0x150
+  - Name:    .symtab
+    Type:    SHT_SYMTAB
+  - Name:    .strtab
+    Type:    SHT_STRTAB
+  - Name:    .shstrtab
+    Type:    SHT_STRTAB
+  - Name:    .text.foo
+    Type:    SHT_PROGBITS
+    Address: 0x200
+## Symbol is required for the .dynsym to be generated.
+DynamicSymbols:
+  - Name:    _Z3fooi
+    Binding: STB_GLOBAL
+
+## Check that yaml2obj creates empty .dynstr and .dynsym sections for
+## the case when no dynamic symbols were specified and Content wasn't set,
+## but the sections were explicitly listed. Check their VAs are correct.
+
+# RUN: yaml2obj --docnum=2 %s -o %t2
+# RUN: llvm-readelf -S %t2 | FileCheck %s --check-prefix=NODYNSYM
+
+# NODYNSYM:      Section Headers:
+# NODYNSYM-NEXT: [Nr] Name      Type      Address          Off    Size
+# NODYNSYM-NEXT: [ 0]           NULL      0000000000000000 000000 000000
+# NODYNSYM-NEXT: [ 1] .dynstr   STRTAB    0000000000000100 000200 000001
+# NODYNSYM-NEXT: [ 2] .dynsym   DYNSYM    0000000000000150 000201 000018
+# NODYNSYM-NEXT: [ 3] .symtab   SYMTAB    0000000000000000 000219 000018
+# NODYNSYM-NEXT: [ 4] .strtab   STRTAB    0000000000000000 000231 000001
+# NODYNSYM-NEXT: [ 5] .shstrtab STRTAB    0000000000000000 000232 000035
+# NODYNSYM-NEXT: [ 6] .text.foo PROGBITS  0000000000000200 000267 000000
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_DYN
+  Machine: EM_X86_64
+Sections:
+  - Name:    .dynstr
+    Type:    SHT_STRTAB
+    Address: 0x100
+  - Name:    .dynsym
+    Type:    SHT_DYNSYM
+    Address: 0x150
+  - Name:    .symtab
+    Type:    SHT_SYMTAB
+  - Name:    .strtab
+    Type:    SHT_STRTAB
+  - Name:    .shstrtab
+    Type:    SHT_STRTAB
+  - Name:    .text.foo
+    Type:    SHT_PROGBITS
+    Address: 0x200
diff --git a/llvm/tools/yaml2obj/yaml2elf.cpp b/llvm/tools/yaml2obj/yaml2elf.cpp
index 79de01ef43ef1..b3782259a4d33 100644
--- a/llvm/tools/yaml2obj/yaml2elf.cpp
+++ b/llvm/tools/yaml2obj/yaml2elf.cpp
@@ -136,13 +136,19 @@ class ELFState {
   bool buildSymbolIndex(ArrayRef<ELFYAML::Symbol> Symbols);
   void initELFHeader(Elf_Ehdr &Header);
   void initProgramHeaders(std::vector<Elf_Phdr> &PHeaders);
-  bool initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
+  bool initImplicitHeader(ELFState<ELFT> &State, ContiguousBlobAccumulator &CBA,
+                          Elf_Shdr &Header, StringRef SecName,
+                          ELFYAML::Section *YAMLSec);
+  bool initSectionHeaders(ELFState<ELFT> &State,
+                          std::vector<Elf_Shdr> &SHeaders,
                           ContiguousBlobAccumulator &CBA);
   void initSymtabSectionHeader(Elf_Shdr &SHeader, SymtabType STType,
-                               ContiguousBlobAccumulator &CBA);
+                               ContiguousBlobAccumulator &CBA,
+                               ELFYAML::Section *YAMLSec);
   void initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
                                StringTableBuilder &STB,
-                               ContiguousBlobAccumulator &CBA);
+                               ContiguousBlobAccumulator &CBA,
+                               ELFYAML::Section *YAMLSec);
   void setProgramHeaderLayout(std::vector<Elf_Phdr> &PHeaders,
                               std::vector<Elf_Shdr> &SHeaders);
   void addSymbols(ArrayRef<ELFYAML::Symbol> Symbols, std::vector<Elf_Sym> &Syms,
@@ -248,7 +254,36 @@ static bool convertSectionIndex(NameToIdxMap &SN2I, StringRef SecName,
 }
 
 template <class ELFT>
-bool ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
+bool ELFState<ELFT>::initImplicitHeader(ELFState<ELFT> &State,
+                                        ContiguousBlobAccumulator &CBA,
+                                        Elf_Shdr &Header, StringRef SecName,
+                                        ELFYAML::Section *YAMLSec) {
+  // Check if the header was already initialized.
+  if (Header.sh_offset)
+    return false;
+
+  if (SecName == ".symtab")
+    State.initSymtabSectionHeader(Header, SymtabType::Static, CBA, YAMLSec);
+  else if (SecName == ".strtab")
+    State.initStrtabSectionHeader(Header, SecName, State.DotStrtab, CBA,
+                                  YAMLSec);
+  else if (SecName == ".shstrtab")
+    State.initStrtabSectionHeader(Header, SecName, State.DotShStrtab, CBA,
+                                  YAMLSec);
+
+  else if (SecName == ".dynsym")
+    State.initSymtabSectionHeader(Header, SymtabType::Dynamic, CBA, YAMLSec);
+  else if (SecName == ".dynstr")
+    State.initStrtabSectionHeader(Header, SecName, State.DotDynstr, CBA,
+                                  YAMLSec);
+  else
+    return false;
+  return true;
+}
+
+template <class ELFT>
+bool ELFState<ELFT>::initSectionHeaders(ELFState<ELFT> &State,
+                                        std::vector<Elf_Shdr> &SHeaders,
                                         ContiguousBlobAccumulator &CBA) {
   // Ensure SHN_UNDEF entry is present. An all-zero section header is a
   // valid SHN_UNDEF entry since SHT_NULL == 0.
@@ -271,6 +306,15 @@ bool ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
       SHeader.sh_link = Index;
     }
 
+    // We have a few sections like string or symbol tables that are added
+    // implicitly later. However, if they are explicitly specified in the YAML,
+    // we want to write them right now. This ensures the file offset remains
+    // correct.
+    if (initImplicitHeader(State, CBA, SHeader, Sec->Name, Sec.get())) {
+      SHeaders.push_back(SHeader);
+      continue;
+    }
+
     if (auto S = dyn_cast<ELFYAML::RawContentSection>(Sec.get())) {
       if (!writeSectionContent(SHeader, *S, CBA))
         return false;
@@ -306,6 +350,26 @@ bool ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
 
     SHeaders.push_back(SHeader);
   }
+
+  // Populate SHeaders with implicit sections not present in the Doc.
+  for (StringRef Name : State.implicitSectionNames())
+    if (State.SN2I.get(Name) >= SHeaders.size())
+      SHeaders.push_back({});
+
+  // Initialize the implicit sections.
+  initImplicitHeader(State, CBA, SHeaders[State.SN2I.get(".symtab")], ".symtab",
+                     nullptr /*DocSec*/);
+  initImplicitHeader(State, CBA, SHeaders[State.SN2I.get(".strtab")], ".strtab",
+                     nullptr /*DocSec*/);
+  initImplicitHeader(State, CBA, SHeaders[State.SN2I.get(".shstrtab")],
+                     ".shstrtab", nullptr /*DocSec*/);
+  if (!Doc.DynamicSymbols.empty()) {
+    initImplicitHeader(State, CBA, SHeaders[State.SN2I.get(".dynsym")],
+                       ".dynsym", nullptr /*DocSec*/);
+    initImplicitHeader(State, CBA, SHeaders[State.SN2I.get(".dynstr")],
+                       ".dynstr", nullptr /*DocSec*/);
+  }
+
   return true;
 }
 
@@ -319,7 +383,8 @@ static size_t findFirstNonGlobal(ArrayRef<ELFYAML::Symbol> Symbols) {
 template <class ELFT>
 void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
                                              SymtabType STType,
-                                             ContiguousBlobAccumulator &CBA) {
+                                             ContiguousBlobAccumulator &CBA,
+                                             ELFYAML::Section *YAMLSec) {
   zero(SHeader);
   bool IsStatic = STType == SymtabType::Static;
   SHeader.sh_name = DotShStrtab.getOffset(IsStatic ? ".symtab" : ".dynsym");
@@ -330,57 +395,71 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
 
   // One greater than symbol table index of the last local symbol.
   const auto &Symbols = IsStatic ? Doc.Symbols : Doc.DynamicSymbols;
-  SHeader.sh_info = findFirstNonGlobal(Symbols) + 1;
-  SHeader.sh_entsize = sizeof(Elf_Sym);
-  SHeader.sh_addralign = 8;
 
-  // Get the section index ignoring the SHT_NULL section.
-  unsigned SecNdx =
-      IsStatic ? getDotSymTabSecNo() - 1 : getDotDynSymSecNo() - 1;
   // If the symbol table section is explicitly described in the YAML
   // then we should set the fields requested.
-  if (SecNdx < Doc.Sections.size()) {
-    ELFYAML::Section *Sec = Doc.Sections[SecNdx].get();
-    SHeader.sh_addr = Sec->Address;
-    if (auto S = dyn_cast<ELFYAML::RawContentSection>(Sec))
-      SHeader.sh_info = S->Info;
-  }
-
-  std::vector<Elf_Sym> Syms;
-  {
-    // Ensure STN_UNDEF is present
-    Elf_Sym Sym;
-    zero(Sym);
-    Syms.push_back(Sym);
-  }
+  ELFYAML::RawContentSection *RawSec =
+      dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
+  SHeader.sh_info =
+      RawSec ? (unsigned)RawSec->Info : findFirstNonGlobal(Symbols) + 1;
+  SHeader.sh_entsize = (YAMLSec && YAMLSec->EntSize)
+                           ? (uint64_t)(*YAMLSec->EntSize)
+                           : sizeof(Elf_Sym);
+  SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 8;
+  SHeader.sh_addr = YAMLSec ? (uint64_t)YAMLSec->Address : 0;
+
+  if (RawSec && RawSec->Content.binary_size()) {
+    RawSec->Content.writeAsBinary(
+        CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign));
+    SHeader.sh_size = RawSec->Size;
+  } else {
+    std::vector<Elf_Sym> Syms;
+    {
+      // Ensure STN_UNDEF is present
+      Elf_Sym Sym;
+      zero(Sym);
+      Syms.push_back(Sym);
+    }
 
-  addSymbols(Symbols, Syms, IsStatic ? DotStrtab : DotDynstr);
+    addSymbols(Symbols, Syms, IsStatic ? DotStrtab : DotDynstr);
 
-  writeArrayData(
-      CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign),
-      makeArrayRef(Syms));
-  SHeader.sh_size = arrayDataSize(makeArrayRef(Syms));
+    writeArrayData(
+        CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign),
+        makeArrayRef(Syms));
+    SHeader.sh_size = arrayDataSize(makeArrayRef(Syms));
+  }
 }
 
 template <class ELFT>
 void ELFState<ELFT>::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name,
                                              StringTableBuilder &STB,
-                                             ContiguousBlobAccumulator &CBA) {
+                                             ContiguousBlobAccumulator &CBA,
+                                             ELFYAML::Section *YAMLSec) {
   zero(SHeader);
   SHeader.sh_name = DotShStrtab.getOffset(Name);
   SHeader.sh_type = ELF::SHT_STRTAB;
-  STB.write(CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign));
-  SHeader.sh_size = STB.getSize();
-  SHeader.sh_addralign = 1;
+  SHeader.sh_addralign = YAMLSec ? (uint64_t)YAMLSec->AddressAlign : 1;
+
+  ELFYAML::RawContentSection *RawSec =
+      dyn_cast_or_null<ELFYAML::RawContentSection>(YAMLSec);
+  if (RawSec && RawSec->Content.binary_size()) {
+    RawSec->Content.writeAsBinary(
+        CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign));
+    SHeader.sh_size = RawSec->Size;
+  } else {
+    STB.write(
+        CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign));
+    SHeader.sh_size = STB.getSize();
+  }
+
+  if (YAMLSec && YAMLSec->EntSize)
+    SHeader.sh_entsize = *YAMLSec->EntSize;
 
   // If .dynstr section is explicitly described in the YAML
   // then we want to use its section address.
   if (Name == ".dynstr") {
-    // Take section index and ignore the SHT_NULL section.
-    unsigned SecNdx = getDotDynStrSecNo() - 1;
-    if (SecNdx < Doc.Sections.size())
-      SHeader.sh_addr = Doc.Sections[SecNdx]->Address;
-
+    if (YAMLSec)
+      SHeader.sh_addr = YAMLSec->Address;
     // We assume that .dynstr is always allocatable.
     SHeader.sh_flags |= ELF::SHF_ALLOC;
   }
@@ -841,9 +920,6 @@ template <class ELFT> void ELFState<ELFT>::finalizeStrings() {
     DotStrtab.add(Sym.Name);
   DotStrtab.finalize();
 
-  if (Doc.DynamicSymbols.empty())
-    return;
-
   // Add the dynamic symbol names to .dynstr section.
   for (const ELFYAML::Symbol &Sym : Doc.DynamicSymbols)
     DotDynstr.add(Sym.Name);
@@ -898,28 +974,9 @@ int ELFState<ELFT>::writeELF(raw_ostream &OS, const ELFYAML::Object &Doc) {
   ContiguousBlobAccumulator CBA(SectionContentBeginOffset);
 
   std::vector<Elf_Shdr> SHeaders;
-  if (!State.initSectionHeaders(SHeaders, CBA))
+  if (!State.initSectionHeaders(State, SHeaders, CBA))
     return 1;
 
-  // Populate SHeaders with implicit sections not present in the Doc
-  for (StringRef Name : State.implicitSectionNames())
-    if (State.SN2I.get(Name) >= SHeaders.size())
-      SHeaders.push_back({});
-
-  // Initialize the implicit sections
-  State.initSymtabSectionHeader(SHeaders[State.SN2I.get(".symtab")],
-                                SymtabType::Static, CBA);
-  State.initStrtabSectionHeader(SHeaders[State.SN2I.get(".strtab")], ".strtab",
-                                State.DotStrtab, CBA);
-  State.initStrtabSectionHeader(SHeaders[State.SN2I.get(".shstrtab")],
-                                ".shstrtab", State.DotShStrtab, CBA);
-  if (!Doc.DynamicSymbols.empty()) {
-    State.initSymtabSectionHeader(SHeaders[State.SN2I.get(".dynsym")],
-                                  SymtabType::Dynamic, CBA);
-    State.initStrtabSectionHeader(SHeaders[State.SN2I.get(".dynstr")],
-                                  ".dynstr", State.DotDynstr, CBA);
-  }
-
   // Now we can decide segment offsets
   State.setProgramHeaderLayout(PHeaders, SHeaders);
 

From 22e99c434fb65ca8a9d5e3dbf5db5965681ce5c3 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Wed, 5 Jun 2019 14:03:13 +0000
Subject: [PATCH 1118/1176] [MIPS GlobalISel] Select fcmp

Select floating point compare for MIPS32.

Differential Revision: https://reviews.llvm.org/D62721

llvm-svn: 362603
---
 .../Target/Mips/MipsInstructionSelector.cpp   |   79 ++
 llvm/lib/Target/Mips/MipsLegalizerInfo.cpp    |    4 +
 llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp |   11 +
 .../GlobalISel/instruction-select/fcmp.mir    | 1152 +++++++++++++++++
 .../Mips/GlobalISel/legalizer/fcmp.mir        |   73 ++
 .../CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll   |  438 +++++++
 .../Mips/GlobalISel/regbankselect/fcmp.mir    |   75 ++
 7 files changed, 1832 insertions(+)
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fcmp.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/legalizer/fcmp.mir
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
 create mode 100644 llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fcmp.mir

diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 442244af609ff..8fddcca4cfe41 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -11,6 +11,7 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/MipsInstPrinter.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterBankInfo.h"
 #include "MipsTargetMachine.h"
@@ -492,6 +493,84 @@ bool MipsInstructionSelector::select(MachineInstr &I,
     I.eraseFromParent();
     return true;
   }
+  case G_FCMP: {
+    unsigned MipsFCMPCondCode;
+    bool isLogicallyNegated;
+    switch (CmpInst::Predicate Cond = static_cast<CmpInst::Predicate>(
+                I.getOperand(1).getPredicate())) {
+    case CmpInst::FCMP_UNO: // Unordered
+    case CmpInst::FCMP_ORD: // Ordered (OR)
+      MipsFCMPCondCode = Mips::FCOND_UN;
+      isLogicallyNegated = Cond != CmpInst::FCMP_UNO;
+      break;
+    case CmpInst::FCMP_OEQ: // Equal
+    case CmpInst::FCMP_UNE: // Not Equal (NEQ)
+      MipsFCMPCondCode = Mips::FCOND_OEQ;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OEQ;
+      break;
+    case CmpInst::FCMP_UEQ: // Unordered or Equal
+    case CmpInst::FCMP_ONE: // Ordered or Greater Than or Less Than (OGL)
+      MipsFCMPCondCode = Mips::FCOND_UEQ;
+      isLogicallyNegated = Cond != CmpInst::FCMP_UEQ;
+      break;
+    case CmpInst::FCMP_OLT: // Ordered or Less Than
+    case CmpInst::FCMP_UGE: // Unordered or Greater Than or Equal (UGE)
+      MipsFCMPCondCode = Mips::FCOND_OLT;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OLT;
+      break;
+    case CmpInst::FCMP_ULT: // Unordered or Less Than
+    case CmpInst::FCMP_OGE: // Ordered or Greater Than or Equal (OGE)
+      MipsFCMPCondCode = Mips::FCOND_ULT;
+      isLogicallyNegated = Cond != CmpInst::FCMP_ULT;
+      break;
+    case CmpInst::FCMP_OLE: // Ordered or Less Than or Equal
+    case CmpInst::FCMP_UGT: // Unordered or Greater Than (UGT)
+      MipsFCMPCondCode = Mips::FCOND_OLE;
+      isLogicallyNegated = Cond != CmpInst::FCMP_OLE;
+      break;
+    case CmpInst::FCMP_ULE: // Unordered or Less Than or Equal
+    case CmpInst::FCMP_OGT: // Ordered or Greater Than (OGT)
+      MipsFCMPCondCode = Mips::FCOND_ULE;
+      isLogicallyNegated = Cond != CmpInst::FCMP_ULE;
+      break;
+    default:
+      return false;
+    }
+
+    // Default compare result in gpr register will be `true`.
+    // We will move `false` (MIPS::Zero) to gpr result when fcmp gives false
+    // using MOVF_I. When orignal predicate (Cond) is logically negated
+    // MipsFCMPCondCode, result is inverted i.e. MOVT_I is used.
+    unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I;
+
+    unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+    BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+        .addDef(TrueInReg)
+        .addUse(Mips::ZERO)
+        .addImm(1);
+
+    unsigned Size = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+    unsigned FCMPOpcode =
+        Size == 32 ? Mips::FCMP_S32
+                   : STI.isFP64bit() ? Mips::FCMP_D64 : Mips::FCMP_D32;
+    MachineInstr *FCMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(FCMPOpcode))
+                             .addUse(I.getOperand(2).getReg())
+                             .addUse(I.getOperand(3).getReg())
+                             .addImm(MipsFCMPCondCode);
+    if (!constrainSelectedInstRegOperands(*FCMP, TII, TRI, RBI))
+      return false;
+
+    MachineInstr *Move = BuildMI(MBB, I, I.getDebugLoc(), TII.get(MoveOpcode))
+                             .addDef(I.getOperand(0).getReg())
+                             .addUse(Mips::ZERO)
+                             .addUse(Mips::FCC0)
+                             .addUse(TrueInReg);
+    if (!constrainSelectedInstRegOperands(*Move, TII, TRI, RBI))
+      return false;
+
+    I.eraseFromParent();
+    return true;
+  }
   default:
     return false;
   }
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index fcad2a9a857fe..cabaed8e303b5 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -97,6 +97,10 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
   getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
       .legalFor({s32, s64});
 
+  getActionDefinitionsBuilder(G_FCMP)
+      .legalFor({{s32, s32}, {s32, s64}})
+      .minScalar(0, s32);
+
   computeTables();
   verify(*ST.getInstrInfo());
 }
diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 4814ef4b0397a..0f9d1064dda2f 100644
--- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -149,6 +149,17 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr});
     break;
   }
+  case G_FCMP: {
+    unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+    const RegisterBankInfo::ValueMapping *FPRValueMapping =
+        Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+                   : &Mips::ValueMappings[Mips::DPRIdx];
+    OperandsMapping =
+        getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+                            FPRValueMapping, FPRValueMapping});
+    break;
+  }
   case G_CONSTANT:
   case G_FRAME_INDEX:
   case G_GLOBAL_VALUE:
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fcmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fcmp.mir
new file mode 100644
index 0000000000000..b861dc6d49311
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/fcmp.mir
@@ -0,0 +1,1152 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64
+--- |
+
+  define void @false_s() {entry: ret void}
+  define void @true_s() {entry: ret void}
+  define void @uno_s() {entry: ret void}
+  define void @ord_s() {entry: ret void}
+  define void @oeq_s() {entry: ret void}
+  define void @une_s() {entry: ret void}
+  define void @ueq_s() {entry: ret void}
+  define void @one_s() {entry: ret void}
+  define void @olt_s() {entry: ret void}
+  define void @uge_s() {entry: ret void}
+  define void @ult_s() {entry: ret void}
+  define void @oge_s() {entry: ret void}
+  define void @ole_s() {entry: ret void}
+  define void @ugt_s() {entry: ret void}
+  define void @ule_s() {entry: ret void}
+  define void @ogt_s() {entry: ret void}
+
+  define void @false_d() {entry: ret void}
+  define void @true_d() {entry: ret void}
+  define void @uno_d() {entry: ret void}
+  define void @ord_d() {entry: ret void}
+  define void @oeq_d() {entry: ret void}
+  define void @une_d() {entry: ret void}
+  define void @ueq_d() {entry: ret void}
+  define void @one_d() {entry: ret void}
+  define void @olt_d() {entry: ret void}
+  define void @uge_d() {entry: ret void}
+  define void @ult_d() {entry: ret void}
+  define void @oge_d() {entry: ret void}
+  define void @ole_d() {entry: ret void}
+  define void @ugt_d() {entry: ret void}
+  define void @ule_d() {entry: ret void}
+  define void @ogt_d() {entry: ret void}
+
+...
+---
+name:            false_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: false_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 0
+    ; FP32: $v0 = COPY [[ORi]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: false_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 0
+    ; FP64: $v0 = COPY [[ORi]]
+    ; FP64: RetRA implicit $v0
+    %5:gprb(s32) = G_CONSTANT i32 0
+    %4:gprb(s32) = COPY %5(s32)
+    $v0 = COPY %4(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            true_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: true_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 65535
+    ; FP32: $v0 = COPY [[ADDiu]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: true_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 65535
+    ; FP64: $v0 = COPY [[ADDiu]]
+    ; FP64: RetRA implicit $v0
+    %5:gprb(s32) = G_CONSTANT i32 -1
+    %4:gprb(s32) = COPY %5(s32)
+    $v0 = COPY %4(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            uno_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: uno_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: uno_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(uno), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ord_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ord_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ord_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ord), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oeq_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: oeq_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(oeq), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            une_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: une_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: une_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(une), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ueq_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ueq_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ueq_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ueq), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            one_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: one_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: one_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(one), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            olt_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: olt_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: olt_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(olt), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            uge_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: uge_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: uge_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(uge), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ult_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ult_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ult_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ult), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oge_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: oge_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oge_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(oge), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ole_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ole_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ole_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ole), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ugt_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ugt_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ugt_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ugt), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ule_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ule_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ule_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ule), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ogt_s
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: ogt_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_S32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ogt_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fgr32 = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fgr32 = COPY $f14
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_S32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s32) = COPY $f12
+    %1:fprb(s32) = COPY $f14
+    %4:gprb(s32) = G_FCMP floatpred(ogt), %0(s32), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            false_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: false_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 0
+    ; FP32: $v0 = COPY [[ORi]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: false_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 0
+    ; FP64: $v0 = COPY [[ORi]]
+    ; FP64: RetRA implicit $v0
+    %5:gprb(s32) = G_CONSTANT i32 0
+    %4:gprb(s32) = COPY %5(s32)
+    $v0 = COPY %4(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            true_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: true_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 65535
+    ; FP32: $v0 = COPY [[ADDiu]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: true_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 65535
+    ; FP64: $v0 = COPY [[ADDiu]]
+    ; FP64: RetRA implicit $v0
+    %5:gprb(s32) = G_CONSTANT i32 -1
+    %4:gprb(s32) = COPY %5(s32)
+    $v0 = COPY %4(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            uno_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: uno_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: uno_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(uno), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ord_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ord_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ord_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 1, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ord), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oeq_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: oeq_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(oeq), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            une_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: une_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: une_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 2, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(une), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ueq_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ueq_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ueq_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ueq), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            one_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: one_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: one_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 3, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(one), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            olt_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: olt_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: olt_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(olt), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            uge_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: uge_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: uge_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 4, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(uge), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ult_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ult_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ult_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ult), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oge_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: oge_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oge_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 5, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(oge), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ole_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ole_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ole_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ole), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ugt_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ugt_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ugt_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 6, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ugt), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ule_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ule_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP32: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVF_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ule_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP64: [[MOVF_I:%[0-9]+]]:gpr32 = MOVF_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVF_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ule), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ogt_d
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: ogt_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:afgr64 = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:afgr64 = COPY $d7
+    ; FP32: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP32: FCMP_D32 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP32: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP32: $v0 = COPY [[MOVT_I]]
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: ogt_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fgr64 = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fgr64 = COPY $d7
+    ; FP64: [[ADDiu:%[0-9]+]]:gpr32 = ADDiu $zero, 1
+    ; FP64: FCMP_D64 [[COPY]], [[COPY1]], 7, implicit-def $fcc0
+    ; FP64: [[MOVT_I:%[0-9]+]]:gpr32 = MOVT_I $zero, $fcc0, [[ADDiu]]
+    ; FP64: $v0 = COPY [[MOVT_I]]
+    ; FP64: RetRA implicit $v0
+    %0:fprb(s64) = COPY $d6
+    %1:fprb(s64) = COPY $d7
+    %4:gprb(s32) = G_FCMP floatpred(ogt), %0(s64), %1
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fcmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fcmp.mir
new file mode 100644
index 0000000000000..9df4edd2ad18c
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/fcmp.mir
@@ -0,0 +1,73 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64
+--- |
+
+  define void @oeq_s() {entry: ret void}
+  define void @oeq_d() {entry: ret void}
+
+...
+---
+name:            oeq_s
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: oeq_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+    ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+    ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32)
+    ; FP32: $v0 = COPY [[COPY2]](s32)
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:_(s32) = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:_(s32) = COPY $f14
+    ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+    ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32)
+    ; FP64: $v0 = COPY [[COPY2]](s32)
+    ; FP64: RetRA implicit $v0
+    %0:_(s32) = COPY $f12
+    %1:_(s32) = COPY $f14
+    %2:_(s1) = G_FCMP floatpred(oeq), %0(s32), %1
+    %3:_(s32) = G_ANYEXT %2(s1)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oeq_d
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: oeq_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:_(s64) = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
+    ; FP32: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[COPY]](s64), [[COPY1]]
+    ; FP32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32)
+    ; FP32: $v0 = COPY [[COPY2]](s32)
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:_(s64) = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:_(s64) = COPY $d7
+    ; FP64: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(oeq), [[COPY]](s64), [[COPY1]]
+    ; FP64: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32)
+    ; FP64: $v0 = COPY [[COPY2]](s32)
+    ; FP64: RetRA implicit $v0
+    %0:_(s64) = COPY $d6
+    %1:_(s64) = COPY $d7
+    %2:_(s1) = G_FCMP floatpred(oeq), %0(s64), %1
+    %3:_(s32) = G_ANYEXT %2(s1)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
new file mode 100644
index 0000000000000..8f559633c9569
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
@@ -0,0 +1,438 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP32
+; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32,FP64
+
+define i1 @false_s(float %x, float %y) {
+; MIPS32-LABEL: false_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp false float %x, %y
+  ret i1 %cmp
+}
+define i1 @true_s(float %x, float %y) {
+; MIPS32-LABEL: true_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $2, $zero, 65535
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp true float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @uno_s(float %x, float %y) {
+; MIPS32-LABEL: uno_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.un.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp uno float %x, %y
+  ret i1 %cmp
+}
+define i1 @ord_s(float %x, float %y) {
+; MIPS32-LABEL: ord_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.un.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ord float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @oeq_s(float %x, float %y) {
+; MIPS32-LABEL: oeq_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.eq.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp oeq float %x, %y
+  ret i1 %cmp
+}
+define i1 @une_s(float %x, float %y) {
+; MIPS32-LABEL: une_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.eq.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp une float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ueq_s(float %x, float %y) {
+; MIPS32-LABEL: ueq_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ueq.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ueq float %x, %y
+  ret i1 %cmp
+}
+define i1 @one_s(float %x, float %y) {
+; MIPS32-LABEL: one_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ueq.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp one float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @olt_s(float %x, float %y) {
+; MIPS32-LABEL: olt_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.olt.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp olt float %x, %y
+  ret i1 %cmp
+}
+define i1 @uge_s(float %x, float %y) {
+; MIPS32-LABEL: uge_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.olt.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp uge float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ult_s(float %x, float %y) {
+; MIPS32-LABEL: ult_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ult.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ult float %x, %y
+  ret i1 %cmp
+}
+define i1 @oge_s(float %x, float %y) {
+; MIPS32-LABEL: oge_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ult.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp oge float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ole_s(float %x, float %y) {
+; MIPS32-LABEL: ole_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ole.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ole float %x, %y
+  ret i1 %cmp
+}
+define i1 @ugt_s(float %x, float %y) {
+; MIPS32-LABEL: ugt_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ole.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ugt float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ule_s(float %x, float %y) {
+; MIPS32-LABEL: ule_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ule.s $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ule float %x, %y
+  ret i1 %cmp
+}
+define i1 @ogt_s(float %x, float %y) {
+; MIPS32-LABEL: ogt_s:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ule.s $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ogt float %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @false_d(double %x, double %y) {
+; MIPS32-LABEL: false_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp false double %x, %y
+  ret i1 %cmp
+}
+define i1 @true_d(double %x, double %y) {
+; MIPS32-LABEL: true_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $2, $zero, 65535
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp true double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @uno_d(double %x, double %y) {
+; MIPS32-LABEL: uno_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.un.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp uno double %x, %y
+  ret i1 %cmp
+}
+define i1 @ord_d(double %x, double %y) {
+; MIPS32-LABEL: ord_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.un.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ord double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @oeq_d(double %x, double %y) {
+; MIPS32-LABEL: oeq_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.eq.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp oeq double %x, %y
+  ret i1 %cmp
+}
+define i1 @une_d(double %x, double %y) {
+; MIPS32-LABEL: une_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.eq.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp une double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ueq_d(double %x, double %y) {
+; MIPS32-LABEL: ueq_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ueq.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ueq double %x, %y
+  ret i1 %cmp
+}
+define i1 @one_d(double %x, double %y) {
+; MIPS32-LABEL: one_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ueq.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp one double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @olt_d(double %x, double %y) {
+; MIPS32-LABEL: olt_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.olt.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp olt double %x, %y
+  ret i1 %cmp
+}
+define i1 @uge_d(double %x, double %y) {
+; MIPS32-LABEL: uge_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.olt.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp uge double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ult_d(double %x, double %y) {
+; MIPS32-LABEL: ult_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ult.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ult double %x, %y
+  ret i1 %cmp
+}
+define i1 @oge_d(double %x, double %y) {
+; MIPS32-LABEL: oge_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ult.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp oge double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ole_d(double %x, double %y) {
+; MIPS32-LABEL: ole_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ole.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ole double %x, %y
+  ret i1 %cmp
+}
+define i1 @ugt_d(double %x, double %y) {
+; MIPS32-LABEL: ugt_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ole.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ugt double %x, %y
+  ret i1 %cmp
+}
+
+
+define i1 @ule_d(double %x, double %y) {
+; MIPS32-LABEL: ule_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ule.d $f12, $f14
+; MIPS32-NEXT:    movf $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ule double %x, %y
+  ret i1 %cmp
+}
+define i1 @ogt_d(double %x, double %y) {
+; MIPS32-LABEL: ogt_d:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    c.ule.d $f12, $f14
+; MIPS32-NEXT:    movt $1, $zero, $fcc0
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = fcmp ogt double %x, %y
+  ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fcmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fcmp.mir
new file mode 100644
index 0000000000000..ed5f3dface88d
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/fcmp.mir
@@ -0,0 +1,75 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP32
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -mattr=+fp64,+mips32r2 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=FP64
+--- |
+
+  define void @oeq_s() {entry: ret void}
+  define void @oeq_d() {entry: ret void}
+
+...
+---
+name:            oeq_s
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $f12, $f14
+
+    ; FP32-LABEL: name: oeq_s
+    ; FP32: liveins: $f12, $f14
+    ; FP32: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12
+    ; FP32: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14
+    ; FP32: [[FCMP:%[0-9]+]]:gprb(s32) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+    ; FP32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[FCMP]](s32)
+    ; FP32: $v0 = COPY [[COPY2]](s32)
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_s
+    ; FP64: liveins: $f12, $f14
+    ; FP64: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12
+    ; FP64: [[COPY1:%[0-9]+]]:fprb(s32) = COPY $f14
+    ; FP64: [[FCMP:%[0-9]+]]:gprb(s32) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
+    ; FP64: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[FCMP]](s32)
+    ; FP64: $v0 = COPY [[COPY2]](s32)
+    ; FP64: RetRA implicit $v0
+    %0:_(s32) = COPY $f12
+    %1:_(s32) = COPY $f14
+    %4:_(s32) = G_FCMP floatpred(oeq), %0(s32), %1
+    %3:_(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            oeq_d
+alignment:       2
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d6, $d7
+
+    ; FP32-LABEL: name: oeq_d
+    ; FP32: liveins: $d6, $d7
+    ; FP32: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6
+    ; FP32: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7
+    ; FP32: [[FCMP:%[0-9]+]]:gprb(s32) = G_FCMP floatpred(oeq), [[COPY]](s64), [[COPY1]]
+    ; FP32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[FCMP]](s32)
+    ; FP32: $v0 = COPY [[COPY2]](s32)
+    ; FP32: RetRA implicit $v0
+    ; FP64-LABEL: name: oeq_d
+    ; FP64: liveins: $d6, $d7
+    ; FP64: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d6
+    ; FP64: [[COPY1:%[0-9]+]]:fprb(s64) = COPY $d7
+    ; FP64: [[FCMP:%[0-9]+]]:gprb(s32) = G_FCMP floatpred(oeq), [[COPY]](s64), [[COPY1]]
+    ; FP64: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[FCMP]](s32)
+    ; FP64: $v0 = COPY [[COPY2]](s32)
+    ; FP64: RetRA implicit $v0
+    %0:_(s64) = COPY $d6
+    %1:_(s64) = COPY $d7
+    %4:_(s32) = G_FCMP floatpred(oeq), %0(s64), %1
+    %3:_(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...

From 5145b1e4421adb3ac7e558cb88c9a2100ccb2b44 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Wed, 5 Jun 2019 14:03:34 +0000
Subject: [PATCH 1119/1176] [Sema] Prevent binding incompatible addr space ref
 to temporaries

References to arbitrary address spaces can't always be bound to
temporaries. This change extends the reference binding logic to
check that the address space of a temporary can be implicitly
converted to the address space in a reference when temporary
materialization is performed.

Differential Revision: https://reviews.llvm.org/D61318

llvm-svn: 362604
---
 clang/include/clang/AST/Type.h                | 20 +++++++++++--------
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +++
 clang/include/clang/Sema/Initialization.h     |  3 +++
 clang/lib/Sema/SemaInit.cpp                   | 19 +++++++++++++++++-
 .../SemaOpenCLCXX/address-space-references.cl |  5 +++++
 5 files changed, 41 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/SemaOpenCLCXX/address-space-references.cl

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3f71a7ec6ffe9..00a2b7643370a 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -460,21 +460,25 @@ class Qualifiers {
     Mask |= qs.Mask;
   }
 
-  /// Returns true if this address space is a superset of the other one.
+  /// Returns true if address space A is equal to or a superset of B.
   /// OpenCL v2.0 defines conversion rules (OpenCLC v2.0 s6.5.5) and notion of
   /// overlapping address spaces.
   /// CL1.1 or CL1.2:
   ///   every address space is a superset of itself.
   /// CL2.0 adds:
   ///   __generic is a superset of any address space except for __constant.
+  static bool isAddressSpaceSupersetOf(LangAS A, LangAS B) {
+    // Address spaces must match exactly.
+    return A == B ||
+           // Otherwise in OpenCLC v2.0 s6.5.5: every address space except
+           // for __constant can be used as __generic.
+           (A == LangAS::opencl_generic && B != LangAS::opencl_constant);
+  }
+
+  /// Returns true if the address space in these qualifiers is equal to or
+  /// a superset of the address space in the argument qualifiers.
   bool isAddressSpaceSupersetOf(Qualifiers other) const {
-    return
-        // Address spaces must match exactly.
-        getAddressSpace() == other.getAddressSpace() ||
-        // Otherwise in OpenCLC v2.0 s6.5.5: every address space except
-        // for __constant can be used as __generic.
-        (getAddressSpace() == LangAS::opencl_generic &&
-         other.getAddressSpace() != LangAS::opencl_constant);
+    return isAddressSpaceSupersetOf(getAddressSpace(), other.getAddressSpace());
   }
 
   /// Determines if these qualifiers compatibly include another set.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 761bd22819a6b..d34720c9fafa9 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1857,6 +1857,9 @@ def err_reference_bind_failed : Error<
   "reference %diff{to %select{type|incomplete type}1 $ could not bind to an "
   "%select{rvalue|lvalue}2 of type $|could not bind to %select{rvalue|lvalue}2 of "
   "incompatible type}0,3">;
+def err_reference_bind_temporary_addrspace : Error<
+  "reference of type %0 cannot bind to a temporary object because of "
+  "address space mismatch">;
 def err_reference_bind_init_list : Error<
   "reference to type %0 cannot bind to an initializer list">;
 def err_init_list_bad_dest_type : Error<
diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h
index 14d8aa8dabf30..1c522e49bc6a2 100644
--- a/clang/include/clang/Sema/Initialization.h
+++ b/clang/include/clang/Sema/Initialization.h
@@ -1012,6 +1012,9 @@ class InitializationSequence {
     /// Reference binding drops qualifiers.
     FK_ReferenceInitDropsQualifiers,
 
+    /// Reference with mismatching address space binding to temporary.
+    FK_ReferenceAddrspaceMismatchTemporary,
+
     /// Reference binding failed.
     FK_ReferenceInitFailed,
 
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 45456aff364dd..25aff40f26f76 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -3344,6 +3344,7 @@ bool InitializationSequence::isAmbiguous() const {
   case FK_NonConstLValueReferenceBindingToVectorElement:
   case FK_NonConstLValueReferenceBindingToUnrelated:
   case FK_RValueReferenceBindingToLValue:
+  case FK_ReferenceAddrspaceMismatchTemporary:
   case FK_ReferenceInitDropsQualifiers:
   case FK_ReferenceInitFailed:
   case FK_ConversionFailed:
@@ -4837,9 +4838,16 @@ static void TryReferenceInitializationCore(Sema &S,
 
   Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /*bindingTemporary=*/true);
 
-  if (T1Quals.hasAddressSpace())
+  if (T1Quals.hasAddressSpace()) {
+    if (!Qualifiers::isAddressSpaceSupersetOf(T1Quals.getAddressSpace(),
+                                              LangAS::Default)) {
+      Sequence.SetFailed(
+          InitializationSequence::FK_ReferenceAddrspaceMismatchTemporary);
+      return;
+    }
     Sequence.AddQualificationConversionStep(cv1T1, isLValueRef ? VK_LValue
                                                                : VK_XValue);
+  }
 }
 
 /// Attempt character array initialization from a string literal
@@ -8516,6 +8524,11 @@ bool InitializationSequence::Diagnose(Sema &S,
       << Args[0]->getSourceRange();
     break;
 
+  case FK_ReferenceAddrspaceMismatchTemporary:
+    S.Diag(Kind.getLocation(), diag::err_reference_bind_temporary_addrspace)
+        << DestType << Args[0]->getSourceRange();
+    break;
+
   case FK_ReferenceInitDropsQualifiers: {
     QualType SourceType = OnlyArg->getType();
     QualType NonRefType = DestType.getNonReferenceType();
@@ -8851,6 +8864,10 @@ void InitializationSequence::dump(raw_ostream &OS) const {
       OS << "reference initialization drops qualifiers";
       break;
 
+    case FK_ReferenceAddrspaceMismatchTemporary:
+      OS << "reference with mismatching address space bound to temporary";
+      break;
+
     case FK_ReferenceInitFailed:
       OS << "reference initialization failed";
       break;
diff --git a/clang/test/SemaOpenCLCXX/address-space-references.cl b/clang/test/SemaOpenCLCXX/address-space-references.cl
new file mode 100644
index 0000000000000..c359bbd713a4a
--- /dev/null
+++ b/clang/test/SemaOpenCLCXX/address-space-references.cl
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -pedantic -verify -fsyntax-only
+
+__global const int& f(__global float &ref) {
+  return ref; // expected-error{{reference of type 'const __global int &' cannot bind to a temporary object because of address space mismatch}}
+}

From 54bd6c840e37bd738253d6ee5e7c2c571dd66860 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 5 Jun 2019 14:08:01 +0000
Subject: [PATCH 1120/1176] UpdateTestChecks: hexagon support

Summary:
These tests are being affected by an upcoming patch,
so having an understandable (autogenerated) diff is helpful.

This target, again, prefers `-march`:
```
llvm/test/CodeGen/Hexagon$ grep -r triple | wc -l
467
llvm/test/CodeGen/Hexagon$ grep -r march | wc -l
1167
```

Reviewers: RKSimon, kparzysz

Reviewed By: kparzysz

Subscribers: xbolva00, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62867

llvm-svn: 362605
---
 llvm/test/CodeGen/Hexagon/isel-prefer.ll | 79 ++++++++++++++++++++----
 llvm/test/CodeGen/Hexagon/ntstbit.ll     | 35 ++++++++++-
 llvm/test/CodeGen/Hexagon/tstbit.ll      | 11 +++-
 llvm/utils/UpdateTestChecks/asm.py       | 19 ++++++
 4 files changed, 130 insertions(+), 14 deletions(-)

diff --git a/llvm/test/CodeGen/Hexagon/isel-prefer.ll b/llvm/test/CodeGen/Hexagon/isel-prefer.ll
index 4cef0039cebcb..1b69f5691474f 100644
--- a/llvm/test/CodeGen/Hexagon/isel-prefer.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-prefer.ll
@@ -1,11 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=hexagon < %s | FileCheck %s
 
 @data1 = external global [2 x [31 x i8]], align 8
 @data2 = external global [2 x [91 x i8]], align 8
 
-; CHECK-LABEL: Prefer_M4_or_andn:
-; CHECK: r2 |= and(r0,~r1)
 define i32 @Prefer_M4_or_andn(i32 %a0, i32 %a1, i32 %a2) #0 {
+; CHECK-LABEL: Prefer_M4_or_andn:
+; CHECK:       // %bb.0: // %b3
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r2 = asl(r2,#5)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r2 |= and(r0,~r1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = r2
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b3:
   %v4 = xor i32 %a1, -1
   %v5 = shl i32 %a2, 5
@@ -14,27 +27,51 @@ b3:
   ret i32 %v7
 }
 
-; CHECK-LABEL: Prefer_M4_mpyri_addi:
-; CHECK: add(##data1,mpyi(r0,#31))
 define i32 @Prefer_M4_mpyri_addi(i32 %a0) #0 {
+; CHECK-LABEL: Prefer_M4_mpyri_addi:
+; CHECK:       // %bb.0: // %b1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = add(##data1,mpyi(r0,#31))
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b1:
   %v2 = getelementptr inbounds [2 x [31 x i8]], [2 x [31 x i8]]* @data1, i32 0, i32 %a0
   %v3 = ptrtoint [31 x i8]* %v2 to i32
   ret i32 %v3
 }
 
-; CHECK-LABEL: Prefer_M4_mpyrr_addi:
-; CHECK: add(##data2,mpyi(r0,r1))
 define i32 @Prefer_M4_mpyrr_addi(i32 %a0) #0 {
+; CHECK-LABEL: Prefer_M4_mpyrr_addi:
+; CHECK:       // %bb.0: // %b1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1 = #91
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = add(##data2,mpyi(r0,r1))
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b1:
   %v2 = getelementptr inbounds [2 x [91 x i8]], [2 x [91 x i8]]* @data2, i32 0, i32 %a0
   %v3 = ptrtoint [91 x i8]* %v2 to i32
   ret i32 %v3
 }
 
-; CHECK-LABEL: Prefer_S2_tstbit_r:
-; CHECK: p0 = tstbit(r0,r1)
 define i32 @Prefer_S2_tstbit_r(i32 %a0, i32 %a1) #0 {
+; CHECK-LABEL: Prefer_S2_tstbit_r:
+; CHECK:       // %bb.0: // %b2
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = tstbit(r0,r1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = mux(p0,#1,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b2:
   %v3 = shl i32 1, %a1
   %v4 = and i32 %a0, %v3
@@ -43,9 +80,18 @@ b2:
   ret i32 %v6
 }
 
-; CHECK-LABEL: Prefer_S2_ntstbit_r:
-; CHECK: p0 = !tstbit(r0,r1)
 define i32 @Prefer_S2_ntstbit_r(i32 %a0, i32 %a1) #0 {
+; CHECK-LABEL: Prefer_S2_ntstbit_r:
+; CHECK:       // %bb.0: // %b2
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = !tstbit(r0,r1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = mux(p0,#1,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b2:
   %v3 = shl i32 1, %a1
   %v4 = and i32 %a0, %v3
@@ -54,9 +100,18 @@ b2:
   ret i32 %v6
 }
 
-; CHECK-LABEL: Prefer_L2_loadrub_io:
-; CHECK: memub(r0+#65)
 define i64 @Prefer_L2_loadrub_io(i8* %a0) #0 {
+; CHECK-LABEL: Prefer_L2_loadrub_io:
+; CHECK:       // %bb.0: // %b1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = memub(r0+#65)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1:0 = combine(#0,r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b1:
   %v2 = getelementptr i8, i8* %a0, i32 65
   %v3 = load i8, i8* %v2
diff --git a/llvm/test/CodeGen/Hexagon/ntstbit.ll b/llvm/test/CodeGen/Hexagon/ntstbit.ll
index 8c6d77e539c2d..2b8526d8f4055 100644
--- a/llvm/test/CodeGen/Hexagon/ntstbit.ll
+++ b/llvm/test/CodeGen/Hexagon/ntstbit.ll
@@ -1,8 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=hexagon < %s | FileCheck %s
-; CHECK: !tstbit
 
 ; Function Attrs: nounwind
 define i32 @f0(i32 %a0, i32 %a1, i32 %a2) #0 {
+; CHECK-LABEL: f0:
+; CHECK:       // %bb.0: // %b0
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = !tstbit(r1,r2)
+; CHECK-NEXT:     r17:16 = combine(r0,r1)
+; CHECK-NEXT:     memd(r29+#-16) = r17:16
+; CHECK-NEXT:     allocframe(#8)
+; CHECK-NEXT:    } // 8-byte Folded Spill
+; CHECK-NEXT:    {
+; CHECK-NEXT:     if (p0) jump:nt .LBB0_2
+; CHECK-NEXT:    }
+; CHECK-NEXT:  // %bb.1: // %b1
+; CHECK-NEXT:    {
+; CHECK-NEXT:     call f1
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jump .LBB0_3
+; CHECK-NEXT:    }
+; CHECK-NEXT:  .LBB0_2: // %b2
+; CHECK-NEXT:    {
+; CHECK-NEXT:     call f2
+; CHECK-NEXT:    }
+; CHECK-NEXT:  .LBB0_3: // %b3
+; CHECK-NEXT:    {
+; CHECK-NEXT:     call f3
+; CHECK-NEXT:     r1 = add(r16,#2)
+; CHECK-NEXT:     r0 = r17
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = #0
+; CHECK-NEXT:     r17:16 = memd(r29+#0)
+; CHECK-NEXT:     dealloc_return
+; CHECK-NEXT:    } // 8-byte Folded Reload
 b0:
   %v0 = shl i32 1, %a2
   %v1 = and i32 %v0, %a1
diff --git a/llvm/test/CodeGen/Hexagon/tstbit.ll b/llvm/test/CodeGen/Hexagon/tstbit.ll
index f28f031a020c9..ac1799007f635 100644
--- a/llvm/test/CodeGen/Hexagon/tstbit.ll
+++ b/llvm/test/CodeGen/Hexagon/tstbit.ll
@@ -1,8 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=hexagon < %s | FileCheck %s
-; CHECK: tstbit
 
 ; Function Attrs: nounwind readnone
 define i32 @f0(i32 %a0, i32 %a1) #0 {
+; CHECK-LABEL: f0:
+; CHECK:       // %bb.0: // %b0
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = tstbit(r0,r1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = mux(p0,#1,#0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
 b0:
   %v0 = shl i32 1, %a1
   %v1 = and i32 %v0, %a0
diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index 7fb93fab56519..a27cd04205b3b 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -42,6 +42,13 @@ class string:
     r'.Lfunc_end[0-9]+:\n',
     flags=(re.M | re.S))
 
+ASM_FUNCTION_HEXAGON_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*//[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>.*?)\n' # (body of the function)
+    # This list is incomplete
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
 ASM_FUNCTION_MIPS_RE = re.compile(
     r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
     r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
@@ -161,6 +168,16 @@ def scrub_asm_arm_eabi(asm, args):
   asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
   return asm
 
+def scrub_asm_hexagon(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
 def scrub_asm_powerpc(asm, args):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
@@ -239,6 +256,7 @@ def get_triple_from_march(march):
       'r600': 'r600',
       'mips': 'mips',
       'sparc': 'sparc',
+      'hexagon': 'hexagon',
   }
   for prefix, triple in triples.items():
     if march.startswith(prefix):
@@ -254,6 +272,7 @@ def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, pre
       'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
       'arm64-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
       'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
+      'hexagon': (scrub_asm_hexagon, ASM_FUNCTION_HEXAGON_RE),
       'r600': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
       'amdgcn': (scrub_asm_amdgpu, ASM_FUNCTION_AMDGPU_RE),
       'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),

From 253086230fa5d577674b4607b68714cd18cb5d99 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 5 Jun 2019 14:08:11 +0000
Subject: [PATCH 1121/1176] [NFC][Codegen][X86] Add AVX2 runline for '(X & (C
 l>> Y)) ==/!= 0' tests

llvm-svn: 362606
---
 ...st-and-by-const-from-lshr-in-eqcmp-zero.ll | 804 +++++++-----------
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 796 ++++++++---------
 2 files changed, 670 insertions(+), 930 deletions(-)

diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index 02ded696861cc..88c939f52bee8 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI1
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI12
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI1
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI12
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI1,X86-BMI1,V0,X86-V0
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI1,X86-BMI1,V1,X86-V1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI2,X86-BMI2,V2,X86-V2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,BMI2,X86-BMI2,AVX2,X86-AVX2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI1,X64-BMI1,V0,X64-V0
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI1,X64-BMI1,V1,X64-V1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI2,X64-BMI2,V2,X64-V2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,BMI2,X64-BMI2,AVX2,X64-AVX2
 
 ; We are looking for the following pattern here:
 ;   (X & (C l>> Y)) ==/!= 0
@@ -95,15 +97,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; i16 scalar
 
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i16_signbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $32768, %eax # imm = 0x8000
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -113,24 +106,14 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i16_signbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $32768, %ecx # imm = 0x8000
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i16_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $32768, %eax # imm = 0x8000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testw %di, %ax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i16_signbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $32768, %ecx # imm = 0x8000
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -142,13 +125,13 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i16_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $32768, %eax # imm = 0x8000
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testw %di, %ax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i16_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $32768, %eax # imm = 0x8000
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testw %di, %ax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i16 32768, %y
   %t1 = and i16 %t0, %x
   %res = icmp eq i16 %t1, 0
@@ -156,15 +139,6 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 }
 
 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i16_lowestbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $1, %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i16_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -174,24 +148,14 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i16_lowestbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $1, %ecx
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i16_lowestbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testw %di, %ax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i16_lowestbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i16_lowestbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -203,13 +167,13 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i16_lowestbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $1, %eax
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testw %di, %ax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i16_lowestbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $1, %eax
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testw %di, %ax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i16 1, %y
   %t1 = and i16 %t0, %x
   %res = icmp eq i16 %t1, 0
@@ -217,15 +181,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 }
 
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -235,24 +190,14 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $4080, %ecx # imm = 0xFF0
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testw %di, %ax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $4080, %ecx # imm = 0xFF0
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -264,13 +209,13 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testw %di, %ax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testw %di, %ax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i16 4080, %y
   %t1 = and i16 %t0, %x
   %res = icmp eq i16 %t1, 0
@@ -280,15 +225,6 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; i32 scalar
 
 define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_signbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -298,24 +234,14 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_signbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testl %edi, %eax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_signbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -327,13 +253,13 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testl %edi, %eax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testl %edi, %eax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i32 2147483648, %y
   %t1 = and i32 %t0, %x
   %res = icmp eq i32 %t1, 0
@@ -341,15 +267,6 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 }
 
 define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_lowestbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $1, %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -359,24 +276,14 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_lowestbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $1, %ecx
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_lowestbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testl %edi, %eax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_lowestbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_lowestbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -388,13 +295,13 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_lowestbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $1, %eax
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testl %edi, %eax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_lowestbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $1, %eax
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testl %edi, %eax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i32 1, %y
   %t1 = and i32 %t0, %x
   %res = icmp eq i32 %t1, 0
@@ -402,15 +309,6 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 }
 
 define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -420,24 +318,14 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testl %edi, %eax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -449,13 +337,13 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X64-BMI12-NEXT:    shrxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testl %edi, %eax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testl %edi, %eax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i32 16776960, %y
   %t1 = and i32 %t0, %x
   %res = icmp eq i32 %t1, 0
@@ -465,25 +353,6 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; i64 scalar
 
 define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i64_signbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    cmovnel %eax, %esi
-; X86-NOBMI-NEXT:    cmovnel %edx, %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    orl %esi, %eax
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i64_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
@@ -503,34 +372,24 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i64_signbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    pushl %esi
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X86-BMI12-NEXT:    xorl %edx, %edx
-; X86-BMI12-NEXT:    xorl %esi, %esi
-; X86-BMI12-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI12-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI12-NEXT:    testb $32, %cl
-; X86-BMI12-NEXT:    cmovnel %eax, %esi
-; X86-BMI12-NEXT:    cmovnel %edx, %eax
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI12-NEXT:    orl %esi, %eax
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    popl %esi
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i64_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
-; X64-NOBMI-NEXT:    testq %rdi, %rax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i64_signbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    cmovnel %eax, %esi
+; X86-BMI2-NEXT:    cmovnel %edx, %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    orl %esi, %eax
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i64_signbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -542,13 +401,13 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i64_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
-; X64-BMI12-NEXT:    testq %rdi, %rax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i64_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    testq %rdi, %rax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i64 9223372036854775808, %y
   %t1 = and i64 %t0, %x
   %res = icmp eq i64 %t1, 0
@@ -569,16 +428,6 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
-; X64-NOBMI-LABEL: scalar_i64_lowestbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
-; X64-NOBMI-NEXT:    testq %rdi, %rax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
-;
 ; X64-BMI1-LABEL: scalar_i64_lowestbit_eq:
 ; X64-BMI1:       # %bb.0:
 ; X64-BMI1-NEXT:    movq %rsi, %rcx
@@ -589,13 +438,13 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i64_lowestbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $1, %eax
-; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
-; X64-BMI12-NEXT:    testq %rdi, %rax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i64_lowestbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $1, %eax
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    testq %rdi, %rax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i64 1, %y
   %t1 = and i64 %t0, %x
   %res = icmp eq i64 %t1, 0
@@ -603,25 +452,6 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 }
 
 define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; X86-NOBMI-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    cmovnel %eax, %edx
-; X86-NOBMI-NEXT:    cmovel %eax, %esi
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    orl %edx, %esi
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
@@ -641,34 +471,24 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    pushl %esi
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI12-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; X86-BMI12-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
-; X86-BMI12-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI12-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI12-NEXT:    xorl %esi, %esi
-; X86-BMI12-NEXT:    testb $32, %cl
-; X86-BMI12-NEXT:    cmovnel %eax, %edx
-; X86-BMI12-NEXT:    cmovel %eax, %esi
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI12-NEXT:    orl %edx, %esi
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    popl %esi
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
-; X64-NOBMI-NEXT:    testq %rdi, %rax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-BMI2-NEXT:    movl $-65536, %edx # imm = 0xFFFF0000
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    cmovnel %eax, %edx
+; X86-BMI2-NEXT:    cmovel %eax, %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    orl %edx, %esi
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -680,13 +500,13 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-BMI12-NEXT:    shrxq %rsi, %rax, %rax
-; X64-BMI12-NEXT:    testq %rdi, %rax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    testq %rdi, %rax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i64 281474976645120, %y
   %t1 = and i64 %t0, %x
   %res = icmp eq i64 %t1, 0
@@ -698,28 +518,37 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ;------------------------------------------------------------------------------;
 
 define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_splat_eq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm5
-; CHECK-NEXT:    psrld %xmm2, %xmm5
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    psrld %xmm1, %xmm3
-; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    ret{{[l|q]}}
+; SSE2-LABEL: vec_4xi32_splat_eq:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    psrld %xmm2, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrld %xmm1, %xmm3
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: vec_4xi32_splat_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
   %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -727,28 +556,37 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 }
 
 define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_eq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,16776960,2147483648]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm5
-; CHECK-NEXT:    psrld %xmm2, %xmm5
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    psrld %xmm1, %xmm3
-; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    ret{{[l|q]}}
+; SSE2-LABEL: vec_4xi32_nonsplat_eq:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,16776960,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    psrld %xmm2, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrld %xmm1, %xmm3
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
   %t0 = lshr <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -756,84 +594,111 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 }
 
 define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm5
-; CHECK-NEXT:    psrld %xmm2, %xmm5
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    psrld %xmm1, %xmm3
-; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    ret{{[l|q]}}
+; SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    psrld %xmm2, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrld %xmm1, %xmm3
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
   %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i1> %res
 }
 define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm5
-; CHECK-NEXT:    psrld %xmm2, %xmm5
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    psrld %xmm1, %xmm3
-; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    ret{{[l|q]}}
+; SSE2-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1,1,1,1]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    psrld %xmm2, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrld %xmm1, %xmm3
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
   %t0 = lshr <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
   ret <4 x i1> %res
 }
 define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm5
-; CHECK-NEXT:    psrld %xmm2, %xmm5
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; CHECK-NEXT:    movdqa %xmm3, %xmm4
-; CHECK-NEXT:    psrld %xmm2, %xmm4
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; CHECK-NEXT:    psrld %xmm1, %xmm3
-; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
-; CHECK-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
-; CHECK-NEXT:    andps %xmm5, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    ret{{[l|q]}}
+; SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = <1,1,u,1>
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    psrld %xmm2, %xmm5
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psrld %xmm2, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; SSE2-NEXT:    psrld %xmm1, %xmm3
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1]
+; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsrlvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
   %t0 = lshr <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
@@ -894,15 +759,6 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
   ret i1 %res
 }
 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_x_is_const2_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $1, %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
-; X86-NOBMI-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_x_is_const2_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -912,24 +768,14 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_x_is_const2_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $1, %ecx
-; X86-BMI12-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_x_is_const2_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %edi, %ecx
-; X64-NOBMI-NEXT:    movl $1, %eax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
-; X64-NOBMI-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_x_is_const2_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_x_is_const2_eq:
 ; X64-BMI1:       # %bb.0:
@@ -941,13 +787,13 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_x_is_const2_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $1, %eax
-; X64-BMI12-NEXT:    shrxl %edi, %eax, %eax
-; X64-BMI12-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_x_is_const2_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $1, %eax
+; X64-BMI2-NEXT:    shrxl %edi, %eax, %eax
+; X64-BMI2-NEXT:    testl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = lshr i32 1, %y
   %t1 = and i32 %t0, 2857740885
   %res = icmp eq i32 %t1, 0
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index a1b01be7cf6b6..aa4a135fd681f 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI1
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,X86-BMI,X86-BMI12
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2            < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi       < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI1
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,X64-BMI,X64-BMI12
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI1,X86-BMI1,V0,X86-V0
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI1,X86-BMI1,V1,X86-V1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,SSE2,X86-SSE2,BMI2,X86-BMI2,V2,X86-V2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=CHECK,X86,V0123,X86-V0123,BMI2,X86-BMI2,AVX2,X86-AVX2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI1,X64-BMI1,V0,X64-V0
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI1,X64-BMI1,V1,X64-V1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,SSE2,X64-SSE2,BMI2,X64-BMI2,V2,X64-V2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=CHECK,X64,V0123,X64-V0123,BMI2,X64-BMI2,AVX2,X64-AVX2
 
 ; We are looking for the following pattern here:
 ;   (X & (C << Y)) ==/!= 0
@@ -90,15 +92,6 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; i16 scalar
 
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i16_signbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-32768, %eax # imm = 0x8000
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -108,24 +101,14 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i16_signbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $-32768, %ecx # imm = 0x8000
-; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i16_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $-32768, %eax # imm = 0x8000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    testw %di, %ax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i16_signbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-32768, %ecx # imm = 0x8000
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i16_signbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -137,13 +120,13 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i16_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $-32768, %eax # imm = 0x8000
-; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testw %di, %ax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i16_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-32768, %eax # imm = 0x8000
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testw %di, %ax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i16 32768, %y
   %t1 = and i16 %t0, %x
   %res = icmp eq i16 %t1, 0
@@ -171,15 +154,6 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 }
 
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -189,24 +163,14 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $4080, %ecx # imm = 0xFF0
-; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testw %ax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i16_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    testw %di, %ax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $4080, %ecx # imm = 0xFF0
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testw %ax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -218,13 +182,13 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i16_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $4080, %eax # imm = 0xFF0
-; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testw %di, %ax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $4080, %eax # imm = 0xFF0
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testw %di, %ax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i16 4080, %y
   %t1 = and i16 %t0, %x
   %res = icmp eq i16 %t1, 0
@@ -234,15 +198,6 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; i32 scalar
 
 define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_signbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -252,24 +207,14 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_signbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
-; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    testl %edi, %eax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_signbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_signbit_eq:
 ; X64-BMI1:       # %bb.0:
@@ -281,13 +226,13 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testl %edi, %eax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testl %edi, %eax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i32 2147483648, %y
   %t1 = and i32 %t0, %x
   %res = icmp eq i32 %t1, 0
@@ -315,15 +260,6 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind {
 }
 
 define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -333,24 +269,14 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
-; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testl %eax, {{[0-9]+}}(%esp)
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    testl %edi, %eax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $16776960, %ecx # imm = 0xFFFF00
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testl %eax, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -362,13 +288,13 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
-; X64-BMI12-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI12-NEXT:    testl %edi, %eax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $16776960, %eax # imm = 0xFFFF00
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    testl %edi, %eax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i32 16776960, %y
   %t1 = and i32 %t0, %x
   %res = icmp eq i32 %t1, 0
@@ -391,16 +317,6 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    sete %al
 ; X86-NEXT:    retl
 ;
-; X64-NOBMI-LABEL: scalar_i64_signbit_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    testq %rdi, %rax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
-;
 ; X64-BMI1-LABEL: scalar_i64_signbit_eq:
 ; X64-BMI1:       # %bb.0:
 ; X64-BMI1-NEXT:    movq %rsi, %rcx
@@ -411,13 +327,13 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i64_signbit_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; X64-BMI12-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI12-NEXT:    testq %rdi, %rax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i64_signbit_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    testq %rdi, %rax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i64 9223372036854775808, %y
   %t1 = and i64 %t0, %x
   %res = icmp eq i64 %t1, 0
@@ -425,25 +341,6 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 }
 
 define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i64_lowestbit_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $1, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:    shldl %cl, %eax, %esi
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    cmovnel %eax, %esi
-; X86-NOBMI-NEXT:    cmovnel %edx, %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    orl %esi, %eax
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
@@ -463,24 +360,24 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i64_lowestbit_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    pushl %esi
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI12-NEXT:    movl $1, %eax
-; X86-BMI12-NEXT:    xorl %edx, %edx
-; X86-BMI12-NEXT:    xorl %esi, %esi
-; X86-BMI12-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI12-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI12-NEXT:    testb $32, %cl
-; X86-BMI12-NEXT:    cmovnel %eax, %esi
-; X86-BMI12-NEXT:    cmovnel %edx, %eax
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI12-NEXT:    orl %esi, %eax
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    popl %esi
-; X86-BMI12-NEXT:    retl
+; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    cmovnel %eax, %esi
+; X86-BMI2-NEXT:    cmovnel %edx, %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    orl %esi, %eax
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i64_lowestbit_eq:
 ; X64:       # %bb.0:
@@ -494,25 +391,6 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
 }
 
 define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
-; X86-NOBMI-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    xorl %esi, %esi
-; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    cmovnel %eax, %edx
-; X86-NOBMI-NEXT:    cmovel %eax, %esi
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    orl %edx, %esi
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    pushl %esi
@@ -532,34 +410,24 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    pushl %esi
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI12-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
-; X86-BMI12-NEXT:    movl $65535, %edx # imm = 0xFFFF
-; X86-BMI12-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI12-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI12-NEXT:    xorl %esi, %esi
-; X86-BMI12-NEXT:    testb $32, %cl
-; X86-BMI12-NEXT:    cmovnel %eax, %edx
-; X86-BMI12-NEXT:    cmovel %eax, %esi
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI12-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI12-NEXT:    orl %edx, %esi
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    popl %esi
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i64_bitsinmiddle_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    testq %rdi, %rax
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X86-BMI2-NEXT:    movl $65535, %edx # imm = 0xFFFF
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    cmovnel %eax, %edx
+; X86-BMI2-NEXT:    cmovel %eax, %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    orl %edx, %esi
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq:
 ; X64-BMI1:       # %bb.0:
@@ -571,13 +439,13 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i64_bitsinmiddle_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
-; X64-BMI12-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI12-NEXT:    testq %rdi, %rax
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    testq %rdi, %rax
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i64 281474976645120, %y
   %t1 = and i64 %t0, %x
   %res = icmp eq i64 %t1, 0
@@ -589,39 +457,48 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 ;------------------------------------------------------------------------------;
 
 define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X86-LABEL: vec_4xi32_splat_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pslld $23, %xmm1
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    cvttps2dq %xmm1, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm2, %xmm3
-; X86-NEXT:    pmuludq %xmm1, %xmm2
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT:    pand %xmm1, %xmm0
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    pcmpeqd %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: vec_4xi32_splat_eq:
-; X64:       # %bb.0:
-; X64-NEXT:    pslld $23, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    cvttps2dq %xmm1, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm2, %xmm3
-; X64-NEXT:    pmuludq %xmm1, %xmm2
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    retq
+; X86-SSE2-LABEL: vec_4xi32_splat_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pslld $23, %xmm1
+; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-SSE2-NEXT:    retl
+;
+; AVX2-LABEL: vec_4xi32_splat_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_splat_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    pslld $23, %xmm1
+; X64-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-SSE2-NEXT:    pand %xmm1, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
   %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -629,41 +506,50 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 }
 
 define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X86-LABEL: vec_4xi32_nonsplat_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pslld $23, %xmm1
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    cvttps2dq %xmm1, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
-; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm1, %xmm2
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm3, %xmm1
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X86-NEXT:    pand %xmm2, %xmm0
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    pcmpeqd %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: vec_4xi32_nonsplat_eq:
-; X64:       # %bb.0:
-; X64-NEXT:    pslld $23, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    cvttps2dq %xmm1, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm1, %xmm2
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm3, %xmm1
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X64-NEXT:    pand %xmm2, %xmm0
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    retq
+; X86-SSE2-LABEL: vec_4xi32_nonsplat_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pslld $23, %xmm1
+; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm3, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-SSE2-NEXT:    pand %xmm2, %xmm0
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-SSE2-NEXT:    retl
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_nonsplat_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    pslld $23, %xmm1
+; X64-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,16776960,2147483648]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm3, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-SSE2-NEXT:    pand %xmm2, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
   %t0 = shl <4 x i32> <i32 0, i32 1, i32 16776960, i32 2147483648>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
@@ -671,117 +557,144 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 }
 
 define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X86-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pslld $23, %xmm1
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    cvttps2dq %xmm1, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm2, %xmm3
-; X86-NEXT:    pmuludq %xmm1, %xmm2
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT:    pand %xmm1, %xmm0
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    pcmpeqd %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: vec_4xi32_nonsplat_undef0_eq:
-; X64:       # %bb.0:
-; X64-NEXT:    pslld $23, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    cvttps2dq %xmm1, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm2, %xmm3
-; X64-NEXT:    pmuludq %xmm1, %xmm2
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    retq
+; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pslld $23, %xmm1
+; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-SSE2-NEXT:    retl
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    pslld $23, %xmm1
+; X64-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-SSE2-NEXT:    pand %xmm1, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
   %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i1> %res
 }
 define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X86-LABEL: vec_4xi32_nonsplat_undef1_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pslld $23, %xmm1
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    cvttps2dq %xmm1, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm2, %xmm3
-; X86-NEXT:    pmuludq %xmm1, %xmm2
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT:    pand %xmm1, %xmm0
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    pcmpeqd %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: vec_4xi32_nonsplat_undef1_eq:
-; X64:       # %bb.0:
-; X64-NEXT:    pslld $23, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    cvttps2dq %xmm1, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm2, %xmm3
-; X64-NEXT:    pmuludq %xmm1, %xmm2
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    retq
+; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pslld $23, %xmm1
+; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-SSE2-NEXT:    retl
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef1_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    pslld $23, %xmm1
+; X64-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-SSE2-NEXT:    pand %xmm1, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
   %t0 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
   ret <4 x i1> %res
 }
 define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X86-LABEL: vec_4xi32_nonsplat_undef2_eq:
-; X86:       # %bb.0:
-; X86-NEXT:    pslld $23, %xmm1
-; X86-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    cvttps2dq %xmm1, %xmm1
-; X86-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X86-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X86-NEXT:    pmuludq %xmm2, %xmm3
-; X86-NEXT:    pmuludq %xmm1, %xmm2
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT:    pand %xmm1, %xmm0
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    pcmpeqd %xmm1, %xmm0
-; X86-NEXT:    retl
-;
-; X64-LABEL: vec_4xi32_nonsplat_undef2_eq:
-; X64:       # %bb.0:
-; X64-NEXT:    pslld $23, %xmm1
-; X64-NEXT:    paddd {{.*}}(%rip), %xmm1
-; X64-NEXT:    cvttps2dq %xmm1, %xmm1
-; X64-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; X64-NEXT:    pmuludq %xmm2, %xmm3
-; X64-NEXT:    pmuludq %xmm1, %xmm2
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-NEXT:    pand %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    pcmpeqd %xmm1, %xmm0
-; X64-NEXT:    retq
+; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pslld $23, %xmm1
+; X86-SSE2-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X86-SSE2-NEXT:    pand %xmm1, %xmm0
+; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X86-SSE2-NEXT:    retl
+;
+; AVX2-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    ret{{[l|q]}}
+;
+; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef2_eq:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    pslld $23, %xmm1
+; X64-SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-SSE2-NEXT:    pand %xmm1, %xmm0
+; X64-SSE2-NEXT:    pxor %xmm1, %xmm1
+; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; X64-SSE2-NEXT:    retq
   %t0 = shl <4 x i32> <i32 1, i32 1, i32 undef, i32 1>, %y
   %t1 = and <4 x i32> %t0, %x
   %res = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
@@ -822,15 +735,6 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ;------------------------------------------------------------------------------;
 
 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
-; X86-NOBMI-LABEL: scalar_i32_x_is_const_eq:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
-; X86-NOBMI-NEXT:    shll %cl, %eax
-; X86-NOBMI-NEXT:    testb $1, %al
-; X86-NOBMI-NEXT:    sete %al
-; X86-NOBMI-NEXT:    retl
-;
 ; X86-BMI1-LABEL: scalar_i32_x_is_const_eq:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
@@ -840,24 +744,14 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; X86-BMI1-NEXT:    sete %al
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI12-LABEL: scalar_i32_x_is_const_eq:
-; X86-BMI12:       # %bb.0:
-; X86-BMI12-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI12-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
-; X86-BMI12-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI12-NEXT:    testb $1, %al
-; X86-BMI12-NEXT:    sete %al
-; X86-BMI12-NEXT:    retl
-;
-; X64-NOBMI-LABEL: scalar_i32_x_is_const_eq:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl %edi, %ecx
-; X64-NOBMI-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    testb $1, %al
-; X64-NOBMI-NEXT:    sete %al
-; X64-NOBMI-NEXT:    retq
+; X86-BMI2-LABEL: scalar_i32_x_is_const_eq:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1437226411, %ecx # imm = 0xAA55AA55
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    testb $1, %al
+; X86-BMI2-NEXT:    sete %al
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i32_x_is_const_eq:
 ; X64-BMI1:       # %bb.0:
@@ -869,13 +763,13 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; X64-BMI1-NEXT:    sete %al
 ; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI12-LABEL: scalar_i32_x_is_const_eq:
-; X64-BMI12:       # %bb.0:
-; X64-BMI12-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
-; X64-BMI12-NEXT:    shlxl %edi, %eax, %eax
-; X64-BMI12-NEXT:    testb $1, %al
-; X64-BMI12-NEXT:    sete %al
-; X64-BMI12-NEXT:    retq
+; X64-BMI2-LABEL: scalar_i32_x_is_const_eq:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1437226411, %eax # imm = 0xAA55AA55
+; X64-BMI2-NEXT:    shlxl %edi, %eax, %eax
+; X64-BMI2-NEXT:    testb $1, %al
+; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    retq
   %t0 = shl i32 2857740885, %y
   %t1 = and i32 %t0, 1
   %res = icmp eq i32 %t1, 0

From da59652c1ba15ee780cf38186933dcd135e36ed4 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Wed, 5 Jun 2019 14:10:39 +0000
Subject: [PATCH 1122/1176] Avoid using NoThrow Exception Specifier in non-C++
 Modes.

As reported in https://bugs.llvm.org/show_bug.cgi?id=42113, there are a
number of locations in Clang where it is assumed that exception
specifications are only valid in C++ mode. Since the original
justification for the NoThrow Exception Specifier Type was C++ related,
this patch just makes C mode use the attribute-based nothrow handling.

Additionally, I noticed that the handling of non-prototype functions
regressed the behavior of the nothrow attribute, in part because it is
was listed in the function type macro(which I did in the previous
patch).  In reality, it should only be doing so in a conditional nature,
so this patch removes it there and puts it directly in the switch to be
handled correctly.

llvm-svn: 362607
---
 clang/include/clang/Sema/ParsedAttr.h |  2 +-
 clang/lib/Sema/SemaType.cpp           | 25 ++++++++++++-------------
 clang/test/Sema/attr-nothrow.c        | 18 ++++++++++++++++++
 3 files changed, 31 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/Sema/attr-nothrow.c

diff --git a/clang/include/clang/Sema/ParsedAttr.h b/clang/include/clang/Sema/ParsedAttr.h
index a42e4547a9679..d87d5da04accf 100644
--- a/clang/include/clang/Sema/ParsedAttr.h
+++ b/clang/include/clang/Sema/ParsedAttr.h
@@ -440,7 +440,7 @@ class ParsedAttr final
   }
 
   bool isUsedAsTypeAttr() const { return UsedAsTypeAttr; }
-  void setUsedAsTypeAttr() { UsedAsTypeAttr = true; }
+  void setUsedAsTypeAttr(bool Used = true) { UsedAsTypeAttr = Used; }
 
   /// True if the attribute is specified using '#pragma clang attribute'.
   bool isPragmaClangAttribute() const { return IsPragmaClangAttribute; }
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index d473fb6c8f3dc..27f034bd1404e 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -130,7 +130,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
   case ParsedAttr::AT_Regparm:                                                 \
   case ParsedAttr::AT_AnyX86NoCallerSavedRegisters:                            \
   case ParsedAttr::AT_AnyX86NoCfCheck:                                         \
-  case ParsedAttr::AT_NoThrow:                                                 \
     CALLING_CONV_ATTRS_CASELIST
 
 // Microsoft-specific type qualifiers.
@@ -6947,23 +6946,17 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
   }
 
   if (attr.getKind() == ParsedAttr::AT_NoThrow) {
-    if (S.CheckAttrNoArgs(attr))
-      return true;
-
     // Delay if this is not a function type.
     if (!unwrapped.isFunctionType())
       return false;
 
-    // Otherwise we can process right away.
-    auto *Proto = unwrapped.get()->getAs<FunctionProtoType>();
-
-    // In the case where this is a FunctionNoProtoType instead of a
-    // FunctionProtoType, let the existing NoThrowAttr implementation do its
-    // thing.
-    if (!Proto)
-      return false;
+    if (S.CheckAttrNoArgs(attr)) {
+      attr.setInvalid();
+      return true;
+    }
 
-    attr.setUsedAsTypeAttr();
+    // Otherwise we can process right away.
+    auto *Proto = unwrapped.get()->castAs<FunctionProtoType>();
 
     // MSVC ignores nothrow if it is in conflict with an explicit exception
     // specification.
@@ -7668,6 +7661,12 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
         attr.setInvalid();
       break;
 
+    case ParsedAttr::AT_NoThrow:
+    // Exception Specifications aren't generally supported in C mode throughout
+    // clang, so revert to attribute-based handling for C.
+      if (!state.getSema().getLangOpts().CPlusPlus)
+        break;
+      LLVM_FALLTHROUGH;
     FUNCTION_TYPE_ATTRS_CASELIST:
       attr.setUsedAsTypeAttr();
 
diff --git a/clang/test/Sema/attr-nothrow.c b/clang/test/Sema/attr-nothrow.c
new file mode 100644
index 0000000000000..c44462846ec87
--- /dev/null
+++ b/clang/test/Sema/attr-nothrow.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 %s -verify
+// RUN: %clang_cc1 %s -ast-dump | FileCheck %s
+// expected-no-diagnostics
+
+// PR42113: The following caused an assertion in mergeFunctionTypes
+// because it causes one side to have an exception specification, which
+// isn't typically supported in C.
+void PR42113a();
+void PR42113a(void) __attribute__((nothrow));
+// CHECK: FunctionDecl {{.*}} PR42113a
+// CHECK: FunctionDecl {{.*}} PR42113a
+// CHECK: NoThrowAttr
+void PR42113b() __attribute__((nothrow));
+// CHECK: FunctionDecl {{.*}} PR42113b
+// CHECK: NoThrowAttr
+ __attribute__((nothrow)) void PR42113c();
+// CHECK: FunctionDecl {{.*}} PR42113c
+// CHECK: NoThrowAttr

From d34797dfc26c61cea19f45669a13ea572172ba34 Mon Sep 17 00:00:00 2001
From: Whitney Tsang <whitney.uwaterloo@gmail.com>
Date: Wed, 5 Jun 2019 14:34:12 +0000
Subject: [PATCH 1123/1176] Title: [LOOPINFO] Extend Loop object to add
 utilities to get the loop bounds, step, and loop induction variable.

Summary: This PR extends the loop object with more utilities to get loop
bounds, step, and loop induction variable. There already exists passes
which try to obtain the loop induction variable in their own pass, e.g.
loop interchange. It would be useful to have a common area to get these
information.

/// Example:
/// for (int i = lb; i < ub; i+=step)
///   <loop body>
/// --- pseudo LLVMIR ---
/// beforeloop:
///   guardcmp = (lb < ub)
///   if (guardcmp) goto preheader; else goto afterloop
/// preheader:
/// loop:
///   i1 = phi[{lb, preheader}, {i2, latch}]
///   <loop body>
///   i2 = i1 + step
/// latch:
///   cmp = (i2 < ub)
///   if (cmp) goto loop
/// exit:
/// afterloop:
///
/// getBounds
///   getInitialIVValue      --> lb
///   getStepInst            --> i2 = i1 + step
///   getStepValue           --> step
///   getFinalIVValue        --> ub
///   getCanonicalPredicate  --> '<'
///   getDirection           --> Increasing
/// getInductionVariable          --> i1
/// getAuxiliaryInductionVariable --> {i1}
/// isCanonical                   --> false

Reviewers: kbarton, hfinkel, dmgreen, Meinersbur, jdoerfert, syzaara,
fhahn
Reviewed By: kbarton
Subscribers: tvvikram, bmahjour, etiotto, fhahn, jsji, hiraditya,
llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D60565

llvm-svn: 362609
---
 llvm/include/llvm/Analysis/LoopInfo.h    | 161 ++++
 llvm/lib/Analysis/LoopInfo.cpp           | 216 ++++++
 llvm/unittests/Analysis/LoopInfoTest.cpp | 900 +++++++++++++++++++++++
 3 files changed, 1277 insertions(+)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 6b964cdf9eae5..a4c8f648685c8 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -54,9 +54,11 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
+class InductionDescriptor;
 class MDNode;
 class MemorySSAUpdater;
 class PHINode;
+class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -529,6 +531,165 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
+  /// Below are some utilities to get loop bounds and induction variable, and
+  /// check if a given phinode is an auxiliary induction variable, as well as
+  /// checking if the loop is canonical.
+  ///
+  /// Here is an example:
+  /// \code
+  /// for (int i = lb; i < ub; i+=step)
+  ///   <loop body>
+  /// --- pseudo LLVMIR ---
+  /// beforeloop:
+  ///   guardcmp = (lb < ub)
+  ///   if (guardcmp) goto preheader; else goto afterloop
+  /// preheader:
+  /// loop:
+  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
+  ///   <loop body>
+  ///   i_2 = i_1 + step
+  /// latch:
+  ///   cmp = (i_2 < ub)
+  ///   if (cmp) goto loop
+  /// exit:
+  /// afterloop:
+  /// \endcode
+  ///
+  /// - getBounds
+  ///   - getInitialIVValue      --> lb
+  ///   - getStepInst            --> i_2 = i_1 + step
+  ///   - getStepValue           --> step
+  ///   - getFinalIVValue        --> ub
+  ///   - getCanonicalPredicate  --> '<'
+  ///   - getDirection           --> Increasing
+  ///
+  /// - getInductionVariable            --> i_1
+  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+  /// - isCanonical                     --> false
+  struct LoopBounds {
+    /// Return the LoopBounds object if
+    /// - the given \p IndVar is an induction variable
+    /// - the initial value of the induction variable can be found
+    /// - the step instruction of the induction variable can be found
+    /// - the final value of the induction variable can be found
+    ///
+    /// Else None.
+    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+                                                ScalarEvolution &SE);
+
+    /// Get the initial value of the loop induction variable.
+    Value &getInitialIVValue() const { return InitialIVValue; }
+
+    /// Get the instruction that updates the loop induction variable.
+    Instruction &getStepInst() const { return StepInst; }
+
+    /// Get the step that the loop induction variable gets updated by in each
+    /// loop iteration. Return nullptr if not found.
+    Value *getStepValue() const { return StepValue; }
+
+    /// Get the final value of the loop induction variable.
+    Value &getFinalIVValue() const { return FinalIVValue; }
+
+    /// Return the canonical predicate for the latch compare instruction, if
+    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+    ///
+    /// A predicate is considered as canonical if requirements below are all
+    /// satisfied:
+    /// 1. The first successor of the latch branch is the loop header
+    ///    If not, inverse the predicate.
+    /// 2. One of the operands of the latch comparison is StepInst
+    ///    If not, and
+    ///    - if the current calcuated predicate is not ne or eq, flip the
+    ///      predicate.
+    ///    - else if the loop is increasing, return slt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///    - else if the loop is decreasing, return sgt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///
+    /// Here is an example when both (1) and (2) are not satisfied:
+    /// \code
+    /// loop.header:
+    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+    ///  %inc = add %iv, %step
+    ///  %cmp = slt %iv, %finaliv
+    ///  br %cmp, %loop.exit, %loop.header
+    /// loop.exit:
+    /// \endcode
+    /// - The second successor of the latch branch is the loop header instead
+    ///   of the first successor (slt -> sge)
+    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+    ///   instead of the StepInst (%inc) (sge -> sgt)
+    ///
+    /// The predicate would be sgt if both (1) and (2) are satisfied.
+    /// getCanonicalPredicate() returns sgt for this example.
+    /// Note: The IR is not changed.
+    ICmpInst::Predicate getCanonicalPredicate() const;
+
+    /// An enum for the direction of the loop
+    /// - for (int i = 0; i < ub; ++i)  --> Increasing
+    /// - for (int i = ub; i > 0; --i)  --> Descresing
+    /// - for (int i = x; i != y; i+=z) --> Unknown
+    enum class Direction { Increasing, Decreasing, Unknown };
+
+    /// Get the direction of the loop.
+    Direction getDirection() const;
+
+  private:
+    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+               ScalarEvolution &SE)
+        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+          FinalIVValue(F), SE(SE) {}
+
+    const Loop &L;
+
+    // The initial value of the loop induction variable
+    Value &InitialIVValue;
+
+    // The instruction that updates the loop induction variable
+    Instruction &StepInst;
+
+    // The value that the loop induction variable gets updated by in each loop
+    // iteration
+    Value *StepValue;
+
+    // The final value of the loop induction variable
+    Value &FinalIVValue;
+
+    ScalarEvolution &SE;
+  };
+
+  /// Return the struct LoopBounds collected if all struct members are found,
+  /// else None.
+  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+  /// Return the loop induction variable if found, else return nullptr.
+  /// An instruction is considered as the loop induction variable if
+  /// - it is an induction variable of the loop; and
+  /// - it is used to determine the condition of the branch in the loop latch
+  ///
+  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+  /// zero and increments by one each time through the loop (but it can be).
+  PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+  /// Get the loop induction descriptor for the loop induction variable. Return
+  /// true if the loop induction variable is found.
+  bool getInductionDescriptor(ScalarEvolution &SE,
+                              InductionDescriptor &IndDesc) const;
+
+  /// Return true if the given PHINode \p AuxIndVar is
+  /// - in the loop header
+  /// - not used outside of the loop
+  /// - incremented by a loop invariant step for each loop iteration
+  /// - step instruction opcode should be add or sub
+  /// Note: auxiliary induction variable is not required to be used in the
+  ///       conditional branch in the loop latch. (but it can be)
+  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                    ScalarEvolution &SE) const;
+
+  /// Return true if the loop induction variable starts at zero and increments
+  /// by one each time through the loop.
+  bool isCanonical(ScalarEvolution &SE) const;
+
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index aa933d98f249b..d7120ba3e8b19 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,10 +17,12 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -164,6 +166,220 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+  if (BasicBlock *Latch = L.getLoopLatch())
+    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+      if (BI->isConditional())
+        return dyn_cast<ICmpInst>(BI->getCondition());
+
+  return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+                               const Instruction &StepInst) {
+  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+  if (!LatchCmpInst)
+    return nullptr;
+
+  Value *Op0 = LatchCmpInst->getOperand(0);
+  Value *Op1 = LatchCmpInst->getOperand(1);
+  if (Op0 == &IndVar || Op0 == &StepInst)
+    return Op1;
+
+  if (Op1 == &IndVar || Op1 == &StepInst)
+    return Op0;
+
+  return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+                                                       PHINode &IndVar,
+                                                       ScalarEvolution &SE) {
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+    return None;
+
+  Value *InitialIVValue = IndDesc.getStartValue();
+  Instruction *StepInst = IndDesc.getInductionBinOp();
+  if (!InitialIVValue || !StepInst)
+    return None;
+
+  const SCEV *Step = IndDesc.getStep();
+  Value *StepInstOp1 = StepInst->getOperand(1);
+  Value *StepInstOp0 = StepInst->getOperand(0);
+  Value *StepValue = nullptr;
+  if (SE.getSCEV(StepInstOp1) == Step)
+    StepValue = StepInstOp1;
+  else if (SE.getSCEV(StepInstOp0) == Step)
+    StepValue = StepInstOp0;
+
+  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+  if (!FinalIVValue)
+    return None;
+
+  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+                    SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+  BasicBlock *Latch = L.getLoopLatch();
+  assert(Latch && "Expecting valid latch");
+
+  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+  assert(LatchCmpInst &&
+         "Expecting the latch compare instruction to be a CmpInst");
+
+  // Need to inverse the predicate when first successor is not the loop
+  // header
+  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+                                 ? LatchCmpInst->getPredicate()
+                                 : LatchCmpInst->getInversePredicate();
+
+  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+
+  // Need to flip strictness of the predicate when the latch compare instruction
+  // is not using StepInst
+  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+      LatchCmpInst->getOperand(1) == &getStepInst())
+    return Pred;
+
+  // Cannot flip strictness of NE and EQ
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+  Direction D = getDirection();
+  if (D == Direction::Increasing)
+    return ICmpInst::ICMP_SLT;
+
+  if (D == Direction::Decreasing)
+    return ICmpInst::ICMP_SGT;
+
+  // If cannot determine the direction, then unable to find the canonical
+  // predicate
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+  if (const SCEVAddRecExpr *StepAddRecExpr =
+          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+      if (SE.isKnownPositive(StepRecur))
+        return Direction::Increasing;
+      if (SE.isKnownNegative(StepRecur))
+        return Direction::Decreasing;
+    }
+
+  return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return LoopBounds::getBounds(*this, *IndVar, SE);
+
+  return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+  if (!isLoopSimplifyForm())
+    return nullptr;
+
+  BasicBlock *Header = getHeader();
+  assert(Header && "Expected a valid loop header");
+  BasicBlock *Latch = getLoopLatch();
+  assert(Latch && "Expected a valid loop latch");
+  ICmpInst *CmpInst = getLatchCmpInst(*this);
+  if (!CmpInst)
+    return nullptr;
+
+  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+
+  for (PHINode &IndVar : Header->phis()) {
+    InductionDescriptor IndDesc;
+    if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
+      continue;
+
+    Instruction *StepInst = IndDesc.getInductionBinOp();
+
+    // case 1:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = StepInst < FinalValue
+    if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
+      return &IndVar;
+
+    // case 2:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = IndVar < FinalValue
+    if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
+      return &IndVar;
+  }
+
+  return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+                                  InductionDescriptor &IndDesc) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+  return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                        ScalarEvolution &SE) const {
+  // Located in the loop header
+  BasicBlock *Header = getHeader();
+  if (AuxIndVar.getParent() != Header)
+    return false;
+
+  // No uses outside of the loop
+  for (User *U : AuxIndVar.users())
+    if (const Instruction *I = dyn_cast<Instruction>(U))
+      if (!contains(I))
+        return false;
+
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+    return false;
+
+  // The step instruction opcode should be add or sub.
+  if (IndDesc.getInductionOpcode() != Instruction::Add &&
+      IndDesc.getInductionOpcode() != Instruction::Sub)
+    return false;
+
+  // Incremented by a loop invariant step for each loop iteration
+  return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+  InductionDescriptor IndDesc;
+  if (!getInductionDescriptor(SE, IndDesc))
+    return false;
+
+  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+  if (!Init || !Init->isZero())
+    return false;
+
+  if (IndDesc.getInductionOpcode() != Instruction::Add)
+    return false;
+
+  ConstantInt *Step = IndDesc.getConstIntStepValue();
+  if (!Step || !Step->isOne())
+    return false;
+
+  return true;
+}
+
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 483532a187527..005e1dc405b75 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -7,6 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/SourceMgr.h"
@@ -26,6 +30,26 @@ runWithLoopInfo(Module &M, StringRef FuncName,
   Test(*F, LI);
 }
 
+/// Build the loop info and scalar evolution for the function and run the Test.
+static void runWithLoopInfoPlus(
+    Module &M, StringRef FuncName,
+    function_ref<void(Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                      PostDominatorTree &PDT)>
+        Test) {
+  auto *F = M.getFunction(FuncName);
+  ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
+
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  AssumptionCache AC(*F);
+  DominatorTree DT(*F);
+  LoopInfo LI(DT);
+  ScalarEvolution SE(*F, TLI, AC, DT, LI);
+
+  PostDominatorTree PDT(*F);
+  Test(*F, LI, SE, PDT);
+}
+
 static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
                                               const char *ModuleStr) {
   SMDiagnostic Err;
@@ -210,3 +234,879 @@ TEST(LoopInfoTest, PreorderTraversals) {
   EXPECT_EQ(&L_0_1, ReverseSiblingPreorder[6]);
   EXPECT_EQ(&L_0_0, ReverseSiblingPreorder[7]);
 }
+
+TEST(LoopInfoTest, CanonicalLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseGuardSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sge i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.end, label %for.preheader\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithSwappedGuardCmp) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sgt i32 %ub, 0\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseLatchSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithLatchCmpNE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithGuardCmpSLE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %ubPlusOne = add i32 %ub, 1\n"
+      "  %guardcmp = icmp sle i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ubPlusOne\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ubPlusOne");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNonConstantStep) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(), Loop::LoopBounds::Direction::Unknown);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopUnsignedBounds) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp ult i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add i32 %i, 1\n"
+      "  %cmp = icmp ult i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_ULT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, DecreasingLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ %ub, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = sub nsw i32 %i, 1\n"
+      "  %cmp = icmp sgt i32 %inc, 0\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        EXPECT_EQ(Bounds->getInitialIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_EQ(StepValue, nullptr);
+        ConstantInt *FinalIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getFinalIVValue());
+        EXPECT_TRUE(FinalIVValue && FinalIVValue->isZero());
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SGT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Decreasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, CannotFindDirection) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(*M, "foo",
+                      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                          PostDominatorTree &PDT) {
+                        Function::iterator FI = F.begin();
+                        // First two basic block are entry and for.preheader
+                        // - skip them.
+                        ++FI;
+                        BasicBlock *Header = &*(++FI);
+                        assert(Header->getName() == "for.body");
+                        Loop *L = LI.getLoopFor(Header);
+                        EXPECT_NE(L, nullptr);
+
+                        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+                        EXPECT_NE(Bounds, None);
+                        ConstantInt *InitialIVValue =
+                            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+                        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+                        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+                        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+                        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+                        EXPECT_EQ(Bounds->getCanonicalPredicate(),
+                                  ICmpInst::BAD_ICMP_PREDICATE);
+                        EXPECT_EQ(Bounds->getDirection(),
+                                  Loop::LoopBounds::Direction::Unknown);
+                        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+                      });
+}
+
+TEST(LoopInfoTest, ZextIndVar) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %for.body ]\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %wide.trip.count = zext i32 %ub to i64\n"
+      "  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count\n"
+      "  br i1 %exitcond, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "indvars.iv.next");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "wide.trip.count");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_NE);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "indvars.iv");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First basic block is entry - skip it.
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoopWithControlFlow) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i1 %cond) {\n"
+      "entry:\n"
+      "  br i1 %cond, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNest) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.outer.preheader, label %for.end\n"
+      "for.outer.preheader:\n"
+      "  br label %for.outer\n"
+      "for.outer:\n"
+      "  %j = phi i32 [ 0, %for.outer.preheader ], [ %inc.outer, %for.outer.latch ]\n"
+      "  br i1 %guardcmp, label %for.inner.preheader, label %for.outer.latch\n"
+      "for.inner.preheader:\n"
+      "  br label %for.inner\n"
+      "for.inner:\n"
+      "  %i = phi i32 [ 0, %for.inner.preheader ], [ %inc, %for.inner ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.inner, label %for.inner.exit\n"
+      "for.inner.exit:\n"
+      "  br label %for.outer.latch\n"
+      "for.outer.latch:\n"
+      "  %inc.outer = add nsw i32 %j, 1\n"
+      "  %cmp.outer = icmp slt i32 %inc.outer, %ub\n"
+      "  br i1 %cmp.outer, label %for.outer, label %for.outer.exit\n"
+      "for.outer.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.outer.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.outer");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc.outer");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "j");
+
+        // Next two basic blocks are for.outer and for.inner.preheader - skip
+        // them.
+        ++FI;
+        Header = &*(++FI);
+        assert(Header->getName() == "for.inner");
+        L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> InnerBounds = L->getBounds(SE);
+        EXPECT_NE(InnerBounds, None);
+        InitialIVValue =
+            dyn_cast<ConstantInt>(&InnerBounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(InnerBounds->getStepInst().getName(), "inc");
+        StepValue = dyn_cast_or_null<ConstantInt>(InnerBounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(InnerBounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(InnerBounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(InnerBounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, AuxiliaryIV) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %aux = phi i32 [ 0, %for.preheader ], [ %auxinc, %for.body ]\n"
+      "  %loopvariant = phi i32 [ 0, %for.preheader ], [ %loopvariantinc, %for.body ]\n"
+      "  %usedoutside = phi i32 [ 0, %for.preheader ], [ %usedoutsideinc, %for.body ]\n"
+      "  %mulopcode = phi i32 [ 0, %for.preheader ], [ %mulopcodeinc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %mulopcodeinc = mul nsw i32 %mulopcode, 5\n"
+      "  %usedoutsideinc = add nsw i32 %usedoutside, 5\n"
+      "  %loopvariantinc = add nsw i32 %loopvariant, %i\n"
+      "  %auxinc = add nsw i32 %aux, 5\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  %lcssa = phi i32 [ %usedoutside, %for.body ]\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+        BasicBlock::iterator II = Header->begin();
+        PHINode &Instruction_i = cast<PHINode>(*(II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_i, SE));
+        PHINode &Instruction_aux = cast<PHINode>(*(++II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_aux, SE));
+        PHINode &Instruction_loopvariant = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_loopvariant, SE));
+        PHINode &Instruction_usedoutside = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_usedoutside, SE));
+        PHINode &Instruction_mulopcode = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
+      });
+}

From b90b35479888735a2bc7a7312e97fcf2e39a0f7b Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 5 Jun 2019 14:43:58 +0000
Subject: [PATCH 1124/1176] [LoopInfo] Fix unused variable warning. NFC.

llvm-svn: 362610
---
 llvm/lib/Analysis/LoopInfo.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d7120ba3e8b19..50b5694297dda 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -293,8 +293,7 @@ PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
 
   BasicBlock *Header = getHeader();
   assert(Header && "Expected a valid loop header");
-  BasicBlock *Latch = getLoopLatch();
-  assert(Latch && "Expected a valid loop latch");
+  assert(getLoopLatch() && "Expected a valid loop latch");
   ICmpInst *CmpInst = getLatchCmpInst(*this);
   if (!CmpInst)
     return nullptr;

From 7ca9b978c4f4c7ecf652cac0c900b074d8dc48d7 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Wed, 5 Jun 2019 14:50:01 +0000
Subject: [PATCH 1125/1176] [OpenCL][PR42031] Prevent deducing addr space in
 type alias.

Similar to typedefs we shouldn't deduce addr space in
type alias.

Differential Revision: https://reviews.llvm.org/D62591

llvm-svn: 362611
---
 clang/lib/Sema/SemaType.cpp                   |  3 +++
 .../SemaOpenCLCXX/address-space-deduction.cl  | 20 ++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 27f034bd1404e..47de398d4fde2 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7401,6 +7401,9 @@ static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State,
       (D.getContext() == DeclaratorContext::MemberContext &&
        (!IsPointee &&
         D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static)) ||
+      // Do not deduce addr space of non-pointee in type alias because it
+      // doesn't define any object.
+      (D.getContext() == DeclaratorContext::AliasDeclContext && !IsPointee) ||
       // Do not deduce addr space for types used to define a typedef and the
       // typedef itself, except the pointee type of a pointer type which is used
       // to define the typedef.
diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index d6dcc853a60cd..6b2a07cad748b 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -1,12 +1,26 @@
-//RUN: %clang_cc1 %s -cl-std=c++ -pedantic -ast-dump -verify
+//RUN: %clang_cc1 %s -cl-std=c++ -pedantic -ast-dump -verify | FileCheck %s
 
 //expected-no-diagnostics
 
-//CHECK: |-VarDecl  foo {{.*}} 'const __global int' constexpr cinit
+//CHECK: |-VarDecl {{.*}} foo 'const __global int'
 constexpr int foo = 0;
 
 class c {
 public:
-  //CHECK: `-VarDecl {{.*}} foo2 'const __global int' static constexpr cinit
+  //CHECK: `-VarDecl {{.*}} foo2 'const __global int'
   static constexpr int foo2 = 0;
 };
+
+struct c1 {};
+
+// We only deduce addr space in type alias in pointer types.
+//CHECK: TypeAliasDecl {{.*}} alias_c1 'c1'
+using alias_c1 = c1;
+//CHECK: TypeAliasDecl {{.*}} alias_c1_ptr '__generic c1 *'
+using alias_c1_ptr = c1 *;
+
+struct c2 {
+  alias_c1 y;
+  alias_c1_ptr ptr = &y;
+};
+

From ad62a3a2992744dc9e16fcb248841d7e27be3b7e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 5 Jun 2019 14:58:04 +0000
Subject: [PATCH 1126/1176] [LoopUtils][SLPVectorizer] clean up management of
 fast-math-flags

Instead of passing around fast-math-flags as a parameter, we can set those
using an IRBuilder guard object. This is no-functional-change-intended.

The motivation is to eventually fix the vectorizers to use and set the
correct fast-math-flags for reductions. Examples of that not behaving as
expected are:
https://bugs.llvm.org/show_bug.cgi?id=23116 (should be able to reduce with less than 'fast')
https://bugs.llvm.org/show_bug.cgi?id=35538 (possible miscompile for -0.0)
D61802 (should be able to reduce with IR-level FMF)

Differential Revision: https://reviews.llvm.org/D62272

llvm-svn: 362612
---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  5 +-
 llvm/lib/CodeGen/ExpandReductions.cpp         |  5 +-
 llvm/lib/Transforms/Utils/LoopUtils.cpp       | 52 ++++++++-----------
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 12 +++--
 4 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 1aad257e08702..2f2365ad5c3a5 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -299,10 +299,10 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
                     ArrayRef<Value *> RedOps = None);
 
 /// Generates a vector reduction using shufflevectors to reduce the value.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
 Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                            RecurrenceDescriptor::MinMaxRecurrenceKind
                                MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
-                           FastMathFlags FMF = FastMathFlags(),
                            ArrayRef<Value *> RedOps = None);
 
 /// Create a target reduction of the given vector. The reduction operation
@@ -310,17 +310,18 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
 /// additional information supplied in \p Flags.
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
 Value *createSimpleTargetReduction(IRBuilder<> &B,
                                    const TargetTransformInfo *TTI,
                                    unsigned Opcode, Value *Src,
                                    TargetTransformInfo::ReductionFlags Flags =
                                        TargetTransformInfo::ReductionFlags(),
-                                   FastMathFlags FMF = FastMathFlags(),
                                    ArrayRef<Value *> RedOps = None);
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
+/// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
                              RecurrenceDescriptor &Desc, Value *Src,
                              bool NoNaN = false);
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 34858883298ed..340ee19c33910 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -118,11 +118,14 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     }
     if (!TTI->shouldExpandReduction(II))
       continue;
+    // Propagate FMF using the builder.
     FastMathFlags FMF =
         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
     Value *Rdx =
         IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
-                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF);
+                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
     II->replaceAllUsesWith(Rdx);
     II->eraseFromParent();
     Changed = true;
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 16971b2edbb6c..03d84c39b66fb 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -675,12 +675,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
   return true;
 }
 
-static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
-  if (isa<FPMathOperator>(V))
-    cast<Instruction>(V)->setFastMathFlags(FMF);
-  return V;
-}
-
 Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
                             RecurrenceDescriptor::MinMaxRecurrenceKind RK,
                             Value *Left, Value *Right) {
@@ -761,7 +755,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
 Value *
 llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                           RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
-                          FastMathFlags FMF, ArrayRef<Value *> RedOps) {
+                          ArrayRef<Value *> RedOps) {
   unsigned VF = Src->getType()->getVectorNumElements();
   // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
   // and vector ops, reducing the set of values being computed by half each
@@ -784,10 +778,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
         ConstantVector::get(ShuffleMask), "rdx.shuf");
 
     if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
-      // Floating point operations had to be 'fast' to enable the reduction.
-      TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
-                                                   TmpVec, Shuf, "bin.rdx"),
-                               FMF);
+      // The builder propagates its fast-math-flags setting.
+      TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                   "bin.rdx");
     } else {
       assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
              "Invalid min/max");
@@ -804,7 +797,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
 /// flags (if generating min/max reductions).
 Value *llvm::createSimpleTargetReduction(
     IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
-    Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF,
+    Value *Src, TargetTransformInfo::ReductionFlags Flags,
     ArrayRef<Value *> RedOps) {
   assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
 
@@ -874,7 +867,7 @@ Value *llvm::createSimpleTargetReduction(
   }
   if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
     return BuildFunc();
-  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps);
+  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
 }
 
 /// Create a vector reduction using a given recurrence descriptor.
@@ -887,39 +880,36 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
   RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
   TargetTransformInfo::ReductionFlags Flags;
   Flags.NoNaN = NoNaN;
+
+  // All ops in the reduction inherit fast-math-flags from the recurrence
+  // descriptor.
+  IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+  B.setFastMathFlags(Desc.getFastMathFlags());
+
   switch (RecKind) {
   case RD::RK_FloatAdd:
-    return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
   case RD::RK_FloatMult:
-    return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
   case RD::RK_IntegerAdd:
-    return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
   case RD::RK_IntegerMult:
-    return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
   case RD::RK_IntegerAnd:
-    return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
   case RD::RK_IntegerOr:
-    return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
   case RD::RK_IntegerXor:
-    return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
   case RD::RK_IntegerMinMax: {
     RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
     Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
     Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
-    return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
   }
   case RD::RK_FloatMinMax: {
     Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
-    return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
   }
   default:
     llvm_unreachable("Unhandled RecKind");
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c0c2c85b8f483..72fc9cf41ef3b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6105,6 +6105,9 @@ class HorizontalReduction {
     unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
 
     Value *VectorizedTree = nullptr;
+
+    // FIXME: Fast-math-flags should be set based on the instructions in the
+    //        reduction (not all of 'fast' are required).
     IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
     FastMathFlags Unsafe;
     Unsafe.setFast();
@@ -6294,11 +6297,14 @@ class HorizontalReduction {
     assert(isPowerOf2_32(ReduxWidth) &&
            "We only handle power-of-two reductions for now");
 
-    if (!IsPairwiseReduction)
+    if (!IsPairwiseReduction) {
+      // FIXME: The builder should use an FMF guard. It should not be hard-coded
+      //        to 'fast'.
+      assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
       return createSimpleTargetReduction(
           Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
-          ReductionData.getFlags(), FastMathFlags::getFast(),
-          ReductionOps.back());
+          ReductionData.getFlags(), ReductionOps.back());
+    }
 
     Value *TmpVec = VectorizedValue;
     for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {

From 15c657d13d6f12ec4f9c77af8c5034caca9ba7d7 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <dtemirbulatov@gmail.com>
Date: Wed, 5 Jun 2019 15:26:28 +0000
Subject: [PATCH 1127/1176] [SLP] Fix regression in broadcasts caused by
 operand reordering patch D59973.

This patch fixes a regression caused by the operand reordering refactoring patch https://reviews.llvm.org/D59973 .
The fix changes the strategy to Splat instead of Opcode, if broadcast opportunities are found.
Please see the lit test for some examples.

Committed on behalf of @vporpo (Vasileios Porpodas)

Differential Revision: https://reviews.llvm.org/D62427

llvm-svn: 362613
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 40 +++++++++++--
 .../Transforms/SLPVectorizer/X86/broadcast.ll | 58 +++++++++----------
 2 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 72fc9cf41ef3b..2da9ead14caaf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -917,6 +917,32 @@ class BoUpSLP {
     /// Clears the data.
     void clear() { OpsVec.clear(); }
 
+    /// \Returns true if there are enough operands identical to \p Op to fill
+    /// the whole vector.
+    /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
+    bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
+      bool OpAPO = getData(OpIdx, Lane).APO;
+      for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+        if (Ln == Lane)
+          continue;
+        // This is set to true if we found a candidate for broadcast at Lane.
+        bool FoundCandidate = false;
+        for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
+          OperandData &Data = getData(OpI, Ln);
+          if (Data.APO != OpAPO || Data.IsUsed)
+            continue;
+          if (Data.V == Op) {
+            FoundCandidate = true;
+            Data.IsUsed = true;
+            break;
+          }
+        }
+        if (!FoundCandidate)
+          return false;
+      }
+      return true;
+    }
+
   public:
     /// Initialize with all the operands of the instruction vector \p RootVL.
     VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
@@ -971,8 +997,13 @@ class BoUpSLP {
         // side.
         if (isa<LoadInst>(OpLane0))
           ReorderingModes[OpIdx] = ReorderingMode::Load;
-        else if (isa<Instruction>(OpLane0))
-          ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+        else if (isa<Instruction>(OpLane0)) {
+          // Check if OpLane0 should be broadcast.
+          if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+            ReorderingModes[OpIdx] = ReorderingMode::Splat;
+          else
+            ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+        }
         else if (isa<Constant>(OpLane0))
           ReorderingModes[OpIdx] = ReorderingMode::Constant;
         else if (isa<Argument>(OpLane0))
@@ -990,9 +1021,8 @@ class BoUpSLP {
       for (int Pass = 0; Pass != 2; ++Pass) {
         // Skip the second pass if the first pass did not fail.
         bool StrategyFailed = false;
-        // Mark the operand data as free to use for all but the first pass.
-        if (Pass > 0)
-          clearUsed();
+        // Mark all operand data as free to use.
+        clearUsed();
         // We keep the original operand order for the FirstLane, so reorder the
         // rest of the lanes. We are visiting the nodes in a circular fashion,
         // using FirstLane as the center point and increasing the radius
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
index 79525cff5a3ca..cb6c9461ceb63 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast.ll
@@ -7,29 +7,31 @@
 ; S[2] = %v2 + %v1
 ; S[3] = %v1 + %v2
 ;
-; TODO: We should broadcast %v1 and %v2
+; We broadcast %v1 and %v2
 ;
+
 define void @bcast_vals(i64 *%A, i64 *%B, i64 *%S) {
 ; CHECK-LABEL: @bcast_vals(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[B0:%.*]] = load i64, i64* [[B:%.*]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i64> [[TMP1]], <i64 1, i64 1>
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> undef, i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP5]], i32 1
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[V1:%.*]] = sub i64 [[A0]], 1
+; CHECK-NEXT:    [[V2:%.*]] = sub i64 [[B0]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> undef, i64 [[V1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[V1]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[V1]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[V1]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i64> undef, i64 [[V2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[V2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[V2]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i64> [[TMP6]], i64 [[V2]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[TMP3]], [[TMP7]]
 ; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i64, i64* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 1
 ; CHECK-NEXT:    [[IDXS2:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 2
 ; CHECK-NEXT:    [[IDXS3:%.*]] = getelementptr inbounds i64, i64* [[S]], i64 3
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
-; CHECK-NEXT:    store <4 x i64> [[TMP7]], <4 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[IDXS0]] to <4 x i64>*
+; CHECK-NEXT:    store <4 x i64> [[TMP8]], <4 x i64>* [[TMP9]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -61,7 +63,8 @@ entry:
 ; S[2] = %v5 + %v1
 ; S[3] = %v1 + %v4
 ;
-; TODO: We should broadcast %v1.
+; We broadcast %v1.
+
 ;
 define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
 ; CHECK-LABEL: @bcast_vals2(
@@ -72,25 +75,22 @@ define void @bcast_vals2(i16 *%A, i16 *%B, i16 *%C, i16 *%D, i16 *%E, i32 *%S) {
 ; CHECK-NEXT:    [[D0:%.*]] = load i16, i16* [[D:%.*]], align 8
 ; CHECK-NEXT:    [[E0:%.*]] = load i16, i16* [[E:%.*]], align 8
 ; CHECK-NEXT:    [[V1:%.*]] = sext i16 [[A0]] to i32
-; CHECK-NEXT:    [[V2:%.*]] = sext i16 [[B0]] to i32
-; CHECK-NEXT:    [[V3:%.*]] = sext i16 [[C0]] to i32
-; CHECK-NEXT:    [[V4:%.*]] = sext i16 [[D0]] to i32
-; CHECK-NEXT:    [[V5:%.*]] = sext i16 [[E0]] to i32
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[V1]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[V3]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V5]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[V1]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[V2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[V1]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V4]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP3]], [[TMP7]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> undef, i16 [[B0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> [[TMP0]], i16 [[C0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[E0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[D0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[V1]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V1]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[V1]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[V1]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 1
 ; CHECK-NEXT:    [[IDXS2:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 2
 ; CHECK-NEXT:    [[IDXS3:%.*]] = getelementptr inbounds i32, i32* [[S]], i64 3
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[IDXS0]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:

From 5da702308c520c3c3e037c21a05a8e5f8d36b85c Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Wed, 5 Jun 2019 15:29:50 +0000
Subject: [PATCH 1128/1176] [llvm-readobj] - Remove TODOs from
 gnu-hash-symbols.test and demangle.test test cases.

We can remove this TODOs now.

Differential revision: https://reviews.llvm.org/D62846

llvm-svn: 362614
---
 llvm/test/tools/llvm-readobj/demangle.test         | 14 +++-----------
 llvm/test/tools/llvm-readobj/gnu-hash-symbols.test | 14 +++-----------
 2 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/demangle.test b/llvm/test/tools/llvm-readobj/demangle.test
index fb6c64b8e9a8b..a1a881e6436d2 100644
--- a/llvm/test/tools/llvm-readobj/demangle.test
+++ b/llvm/test/tools/llvm-readobj/demangle.test
@@ -117,18 +117,11 @@ FileHeader:
   Type:            ET_DYN
   Machine:         EM_X86_64
 Sections:
-  ## TODO:  Replace the raw section contents with more meaningful dynamic
-  ## tags/symbols/etc, once yaml2obj supports it.
-  ## FIXME: yaml2obj does not currently allow custom addresses for .dynstr and
-  ## .dynsym if DynamicSymbols are specified.
-  ## See https://bugs.llvm.org/show_bug.cgi?id=40339
   - Name:         .dynstr
     Type:         SHT_STRTAB
     Flags:        [ SHF_ALLOC ]
     AddressAlign: 0x100
     EntSize:      0x1
-    ## "\0_Z3fooi\0"
-    Content: "005f5a33666f6f6900"
   - Name:         .dynsym
     Type:         SHT_DYNSYM
     Flags:        [ SHF_ALLOC ]
@@ -136,10 +129,6 @@ Sections:
     Address:      0x100
     AddressAlign: 0x100
     EntSize:      0x18
-    ## Null symbol;
-    ## st_name: 1; st_info: Global | Func; st_other: 0;
-    ##   st_shndx: .text.foo; st_value: 0x2000; st_size: 0
-    Content: "000000000000000000000000000000000000000000000000010000001200050000200000000000000000000000000000"
   - Name:         .rela.dyn
     Type:         SHT_RELA
     Flags:        [ SHF_ALLOC ]
@@ -218,6 +207,9 @@ Symbols:
     Type:    STT_FUNC
     Section: .text.foo
     Binding: STB_GLOBAL
+DynamicSymbols:
+  - Name:    _Z3fooi
+    Binding: STB_GLOBAL
 ProgramHeaders:
   - Type:  PT_LOAD
     Flags: [ PF_R, PF_X ]
diff --git a/llvm/test/tools/llvm-readobj/gnu-hash-symbols.test b/llvm/test/tools/llvm-readobj/gnu-hash-symbols.test
index 8babac599c360..151ec23d1ecd0 100644
--- a/llvm/test/tools/llvm-readobj/gnu-hash-symbols.test
+++ b/llvm/test/tools/llvm-readobj/gnu-hash-symbols.test
@@ -47,18 +47,11 @@ FileHeader:
   Type:            ET_DYN
   Machine:         EM_X86_64
 Sections:
-  ## TODO:  Replace the raw section contents with more meaningful dynamic
-  ## tags/symbols/etc, once yaml2obj supports it.
-  ## FIXME: yaml2obj does not currently allow custom addresses for .dynstr and
-  ## .dynsym if DynamicSymbols are specified.
-  ## See https://bugs.llvm.org/show_bug.cgi?id=40339
   - Name:         .dynstr
     Type:         SHT_STRTAB
     Flags:        [ SHF_ALLOC ]
     AddressAlign: 0x100
     EntSize:      0x1
-    ## "\0_Z3fooi\0"
-    Content: "005f5a33666f6f6900"
   - Name:         .dynsym
     Type:         SHT_DYNSYM
     Flags:        [ SHF_ALLOC ]
@@ -66,10 +59,6 @@ Sections:
     Address:      0x100
     AddressAlign: 0x100
     EntSize:      0x18
-    ## Null symbol;
-    ## st_name: 1; st_info: Global | Func; st_other: 0;
-    ##   st_shndx: .text.foo; st_value: 0x2000; st_size: 0
-    Content: "000000000000000000000000000000000000000000000000010000001200040000200000000000000000000000000000"
   - Name:            .dynamic
     Type:            SHT_DYNAMIC
     Flags:           [ SHF_ALLOC ]
@@ -94,6 +83,9 @@ Sections:
     Size:  0x40
     Address: 0x2000
     AddressAlign: 0x2000
+DynamicSymbols:
+  - Name:    _Z3fooi
+    Binding: STB_GLOBAL
 ProgramHeaders:
   - Type:  PT_LOAD
     Flags: [ PF_R, PF_X ]

From 590b1aee609d30346aee77a699381d21c538dd56 Mon Sep 17 00:00:00 2001
From: Whitney Tsang <whitney.uwaterloo@gmail.com>
Date: Wed, 5 Jun 2019 15:32:56 +0000
Subject: [PATCH 1129/1176] Revert "Title: [LOOPINFO] Extend Loop object to add
 utilities to get the loop"

This reverts commit d34797dfc26c61cea19f45669a13ea572172ba34.

llvm-svn: 362615
---
 llvm/include/llvm/Analysis/LoopInfo.h    | 161 ----
 llvm/lib/Analysis/LoopInfo.cpp           | 215 ------
 llvm/unittests/Analysis/LoopInfoTest.cpp | 900 -----------------------
 3 files changed, 1276 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index a4c8f648685c8..6b964cdf9eae5 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -54,11 +54,9 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
-class InductionDescriptor;
 class MDNode;
 class MemorySSAUpdater;
 class PHINode;
-class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -531,165 +529,6 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
-  /// Below are some utilities to get loop bounds and induction variable, and
-  /// check if a given phinode is an auxiliary induction variable, as well as
-  /// checking if the loop is canonical.
-  ///
-  /// Here is an example:
-  /// \code
-  /// for (int i = lb; i < ub; i+=step)
-  ///   <loop body>
-  /// --- pseudo LLVMIR ---
-  /// beforeloop:
-  ///   guardcmp = (lb < ub)
-  ///   if (guardcmp) goto preheader; else goto afterloop
-  /// preheader:
-  /// loop:
-  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
-  ///   <loop body>
-  ///   i_2 = i_1 + step
-  /// latch:
-  ///   cmp = (i_2 < ub)
-  ///   if (cmp) goto loop
-  /// exit:
-  /// afterloop:
-  /// \endcode
-  ///
-  /// - getBounds
-  ///   - getInitialIVValue      --> lb
-  ///   - getStepInst            --> i_2 = i_1 + step
-  ///   - getStepValue           --> step
-  ///   - getFinalIVValue        --> ub
-  ///   - getCanonicalPredicate  --> '<'
-  ///   - getDirection           --> Increasing
-  ///
-  /// - getInductionVariable            --> i_1
-  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
-  /// - isCanonical                     --> false
-  struct LoopBounds {
-    /// Return the LoopBounds object if
-    /// - the given \p IndVar is an induction variable
-    /// - the initial value of the induction variable can be found
-    /// - the step instruction of the induction variable can be found
-    /// - the final value of the induction variable can be found
-    ///
-    /// Else None.
-    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
-                                                ScalarEvolution &SE);
-
-    /// Get the initial value of the loop induction variable.
-    Value &getInitialIVValue() const { return InitialIVValue; }
-
-    /// Get the instruction that updates the loop induction variable.
-    Instruction &getStepInst() const { return StepInst; }
-
-    /// Get the step that the loop induction variable gets updated by in each
-    /// loop iteration. Return nullptr if not found.
-    Value *getStepValue() const { return StepValue; }
-
-    /// Get the final value of the loop induction variable.
-    Value &getFinalIVValue() const { return FinalIVValue; }
-
-    /// Return the canonical predicate for the latch compare instruction, if
-    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
-    ///
-    /// A predicate is considered as canonical if requirements below are all
-    /// satisfied:
-    /// 1. The first successor of the latch branch is the loop header
-    ///    If not, inverse the predicate.
-    /// 2. One of the operands of the latch comparison is StepInst
-    ///    If not, and
-    ///    - if the current calcuated predicate is not ne or eq, flip the
-    ///      predicate.
-    ///    - else if the loop is increasing, return slt
-    ///      (notice that it is safe to change from ne or eq to sign compare)
-    ///    - else if the loop is decreasing, return sgt
-    ///      (notice that it is safe to change from ne or eq to sign compare)
-    ///
-    /// Here is an example when both (1) and (2) are not satisfied:
-    /// \code
-    /// loop.header:
-    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
-    ///  %inc = add %iv, %step
-    ///  %cmp = slt %iv, %finaliv
-    ///  br %cmp, %loop.exit, %loop.header
-    /// loop.exit:
-    /// \endcode
-    /// - The second successor of the latch branch is the loop header instead
-    ///   of the first successor (slt -> sge)
-    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
-    ///   instead of the StepInst (%inc) (sge -> sgt)
-    ///
-    /// The predicate would be sgt if both (1) and (2) are satisfied.
-    /// getCanonicalPredicate() returns sgt for this example.
-    /// Note: The IR is not changed.
-    ICmpInst::Predicate getCanonicalPredicate() const;
-
-    /// An enum for the direction of the loop
-    /// - for (int i = 0; i < ub; ++i)  --> Increasing
-    /// - for (int i = ub; i > 0; --i)  --> Descresing
-    /// - for (int i = x; i != y; i+=z) --> Unknown
-    enum class Direction { Increasing, Decreasing, Unknown };
-
-    /// Get the direction of the loop.
-    Direction getDirection() const;
-
-  private:
-    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
-               ScalarEvolution &SE)
-        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
-          FinalIVValue(F), SE(SE) {}
-
-    const Loop &L;
-
-    // The initial value of the loop induction variable
-    Value &InitialIVValue;
-
-    // The instruction that updates the loop induction variable
-    Instruction &StepInst;
-
-    // The value that the loop induction variable gets updated by in each loop
-    // iteration
-    Value *StepValue;
-
-    // The final value of the loop induction variable
-    Value &FinalIVValue;
-
-    ScalarEvolution &SE;
-  };
-
-  /// Return the struct LoopBounds collected if all struct members are found,
-  /// else None.
-  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
-
-  /// Return the loop induction variable if found, else return nullptr.
-  /// An instruction is considered as the loop induction variable if
-  /// - it is an induction variable of the loop; and
-  /// - it is used to determine the condition of the branch in the loop latch
-  ///
-  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
-  /// zero and increments by one each time through the loop (but it can be).
-  PHINode *getInductionVariable(ScalarEvolution &SE) const;
-
-  /// Get the loop induction descriptor for the loop induction variable. Return
-  /// true if the loop induction variable is found.
-  bool getInductionDescriptor(ScalarEvolution &SE,
-                              InductionDescriptor &IndDesc) const;
-
-  /// Return true if the given PHINode \p AuxIndVar is
-  /// - in the loop header
-  /// - not used outside of the loop
-  /// - incremented by a loop invariant step for each loop iteration
-  /// - step instruction opcode should be add or sub
-  /// Note: auxiliary induction variable is not required to be used in the
-  ///       conditional branch in the loop latch. (but it can be)
-  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
-                                    ScalarEvolution &SE) const;
-
-  /// Return true if the loop induction variable starts at zero and increments
-  /// by one each time through the loop.
-  bool isCanonical(ScalarEvolution &SE) const;
-
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 50b5694297dda..aa933d98f249b 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,12 +17,10 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -166,219 +164,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
-/// Get the latch condition instruction.
-static ICmpInst *getLatchCmpInst(const Loop &L) {
-  if (BasicBlock *Latch = L.getLoopLatch())
-    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
-      if (BI->isConditional())
-        return dyn_cast<ICmpInst>(BI->getCondition());
-
-  return nullptr;
-}
-
-/// Return the final value of the loop induction variable if found.
-static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
-                               const Instruction &StepInst) {
-  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
-  if (!LatchCmpInst)
-    return nullptr;
-
-  Value *Op0 = LatchCmpInst->getOperand(0);
-  Value *Op1 = LatchCmpInst->getOperand(1);
-  if (Op0 == &IndVar || Op0 == &StepInst)
-    return Op1;
-
-  if (Op1 == &IndVar || Op1 == &StepInst)
-    return Op0;
-
-  return nullptr;
-}
-
-Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
-                                                       PHINode &IndVar,
-                                                       ScalarEvolution &SE) {
-  InductionDescriptor IndDesc;
-  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
-    return None;
-
-  Value *InitialIVValue = IndDesc.getStartValue();
-  Instruction *StepInst = IndDesc.getInductionBinOp();
-  if (!InitialIVValue || !StepInst)
-    return None;
-
-  const SCEV *Step = IndDesc.getStep();
-  Value *StepInstOp1 = StepInst->getOperand(1);
-  Value *StepInstOp0 = StepInst->getOperand(0);
-  Value *StepValue = nullptr;
-  if (SE.getSCEV(StepInstOp1) == Step)
-    StepValue = StepInstOp1;
-  else if (SE.getSCEV(StepInstOp0) == Step)
-    StepValue = StepInstOp0;
-
-  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
-  if (!FinalIVValue)
-    return None;
-
-  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
-                    SE);
-}
-
-using Direction = Loop::LoopBounds::Direction;
-
-ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
-  BasicBlock *Latch = L.getLoopLatch();
-  assert(Latch && "Expecting valid latch");
-
-  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
-  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
-
-  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
-  assert(LatchCmpInst &&
-         "Expecting the latch compare instruction to be a CmpInst");
-
-  // Need to inverse the predicate when first successor is not the loop
-  // header
-  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
-                                 ? LatchCmpInst->getPredicate()
-                                 : LatchCmpInst->getInversePredicate();
-
-  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
-    Pred = ICmpInst::getSwappedPredicate(Pred);
-
-  // Need to flip strictness of the predicate when the latch compare instruction
-  // is not using StepInst
-  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
-      LatchCmpInst->getOperand(1) == &getStepInst())
-    return Pred;
-
-  // Cannot flip strictness of NE and EQ
-  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
-    return ICmpInst::getFlippedStrictnessPredicate(Pred);
-
-  Direction D = getDirection();
-  if (D == Direction::Increasing)
-    return ICmpInst::ICMP_SLT;
-
-  if (D == Direction::Decreasing)
-    return ICmpInst::ICMP_SGT;
-
-  // If cannot determine the direction, then unable to find the canonical
-  // predicate
-  return ICmpInst::BAD_ICMP_PREDICATE;
-}
-
-Direction Loop::LoopBounds::getDirection() const {
-  if (const SCEVAddRecExpr *StepAddRecExpr =
-          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
-    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
-      if (SE.isKnownPositive(StepRecur))
-        return Direction::Increasing;
-      if (SE.isKnownNegative(StepRecur))
-        return Direction::Decreasing;
-    }
-
-  return Direction::Unknown;
-}
-
-Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
-  if (PHINode *IndVar = getInductionVariable(SE))
-    return LoopBounds::getBounds(*this, *IndVar, SE);
-
-  return None;
-}
-
-PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
-  if (!isLoopSimplifyForm())
-    return nullptr;
-
-  BasicBlock *Header = getHeader();
-  assert(Header && "Expected a valid loop header");
-  assert(getLoopLatch() && "Expected a valid loop latch");
-  ICmpInst *CmpInst = getLatchCmpInst(*this);
-  if (!CmpInst)
-    return nullptr;
-
-  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
-  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
-
-  for (PHINode &IndVar : Header->phis()) {
-    InductionDescriptor IndDesc;
-    if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
-      continue;
-
-    Instruction *StepInst = IndDesc.getInductionBinOp();
-
-    // case 1:
-    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
-    // StepInst = IndVar + step
-    // cmp = StepInst < FinalValue
-    if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
-      return &IndVar;
-
-    // case 2:
-    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
-    // StepInst = IndVar + step
-    // cmp = IndVar < FinalValue
-    if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
-      return &IndVar;
-  }
-
-  return nullptr;
-}
-
-bool Loop::getInductionDescriptor(ScalarEvolution &SE,
-                                  InductionDescriptor &IndDesc) const {
-  if (PHINode *IndVar = getInductionVariable(SE))
-    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
-
-  return false;
-}
-
-bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
-                                        ScalarEvolution &SE) const {
-  // Located in the loop header
-  BasicBlock *Header = getHeader();
-  if (AuxIndVar.getParent() != Header)
-    return false;
-
-  // No uses outside of the loop
-  for (User *U : AuxIndVar.users())
-    if (const Instruction *I = dyn_cast<Instruction>(U))
-      if (!contains(I))
-        return false;
-
-  InductionDescriptor IndDesc;
-  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
-    return false;
-
-  // The step instruction opcode should be add or sub.
-  if (IndDesc.getInductionOpcode() != Instruction::Add &&
-      IndDesc.getInductionOpcode() != Instruction::Sub)
-    return false;
-
-  // Incremented by a loop invariant step for each loop iteration
-  return SE.isLoopInvariant(IndDesc.getStep(), this);
-}
-
-bool Loop::isCanonical(ScalarEvolution &SE) const {
-  InductionDescriptor IndDesc;
-  if (!getInductionDescriptor(SE, IndDesc))
-    return false;
-
-  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
-  if (!Init || !Init->isZero())
-    return false;
-
-  if (IndDesc.getInductionOpcode() != Instruction::Add)
-    return false;
-
-  ConstantInt *Step = IndDesc.getConstIntStepValue();
-  if (!Step || !Step->isOne())
-    return false;
-
-  return true;
-}
-
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 005e1dc405b75..483532a187527 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -7,10 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/SourceMgr.h"
@@ -30,26 +26,6 @@ runWithLoopInfo(Module &M, StringRef FuncName,
   Test(*F, LI);
 }
 
-/// Build the loop info and scalar evolution for the function and run the Test.
-static void runWithLoopInfoPlus(
-    Module &M, StringRef FuncName,
-    function_ref<void(Function &F, LoopInfo &LI, ScalarEvolution &SE,
-                      PostDominatorTree &PDT)>
-        Test) {
-  auto *F = M.getFunction(FuncName);
-  ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
-
-  TargetLibraryInfoImpl TLII;
-  TargetLibraryInfo TLI(TLII);
-  AssumptionCache AC(*F);
-  DominatorTree DT(*F);
-  LoopInfo LI(DT);
-  ScalarEvolution SE(*F, TLI, AC, DT, LI);
-
-  PostDominatorTree PDT(*F);
-  Test(*F, LI, SE, PDT);
-}
-
 static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
                                               const char *ModuleStr) {
   SMDiagnostic Err;
@@ -234,879 +210,3 @@ TEST(LoopInfoTest, PreorderTraversals) {
   EXPECT_EQ(&L_0_1, ReverseSiblingPreorder[6]);
   EXPECT_EQ(&L_0_0, ReverseSiblingPreorder[7]);
 }
-
-TEST(LoopInfoTest, CanonicalLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithInverseGuardSuccs) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp sge i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.end, label %for.preheader\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithSwappedGuardCmp) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp sgt i32 %ub, 0\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp sge i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.exit, label %for.body\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithInverseLatchSuccs) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp sge i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.exit, label %for.body\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithLatchCmpNE) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp ne i32 %i, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopWithGuardCmpSLE) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %ubPlusOne = add i32 %ub, 1\n"
-      "  %guardcmp = icmp sle i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp ne i32 %i, %ubPlusOne\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ubPlusOne");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopNonConstantStep) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = zext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, %step\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(), Loop::LoopBounds::Direction::Unknown);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopUnsignedBounds) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp ult i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = zext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add i32 %i, 1\n"
-      "  %cmp = icmp ult i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_ULT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, DecreasingLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ %ub, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = sub nsw i32 %i, 1\n"
-      "  %cmp = icmp sgt i32 %inc, 0\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        EXPECT_EQ(Bounds->getInitialIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_EQ(StepValue, nullptr);
-        ConstantInt *FinalIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getFinalIVValue());
-        EXPECT_TRUE(FinalIVValue && FinalIVValue->isZero());
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SGT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Decreasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, CannotFindDirection) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, %step\n"
-      "  %cmp = icmp ne i32 %i, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(*M, "foo",
-                      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-                          PostDominatorTree &PDT) {
-                        Function::iterator FI = F.begin();
-                        // First two basic block are entry and for.preheader
-                        // - skip them.
-                        ++FI;
-                        BasicBlock *Header = &*(++FI);
-                        assert(Header->getName() == "for.body");
-                        Loop *L = LI.getLoopFor(Header);
-                        EXPECT_NE(L, nullptr);
-
-                        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-                        EXPECT_NE(Bounds, None);
-                        ConstantInt *InitialIVValue =
-                            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-                        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-                        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-                        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
-                        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-                        EXPECT_EQ(Bounds->getCanonicalPredicate(),
-                                  ICmpInst::BAD_ICMP_PREDICATE);
-                        EXPECT_EQ(Bounds->getDirection(),
-                                  Loop::LoopBounds::Direction::Unknown);
-                        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-                      });
-}
-
-TEST(LoopInfoTest, ZextIndVar) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %for.body ]\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %wide.trip.count = zext i32 %ub to i64\n"
-      "  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count\n"
-      "  br i1 %exitcond, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "indvars.iv.next");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "wide.trip.count");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_NE);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "indvars.iv");
-      });
-}
-
-TEST(LoopInfoTest, UnguardedLoop) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First basic block is entry - skip it.
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, UnguardedLoopWithControlFlow) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub, i1 %cond) {\n"
-      "entry:\n"
-      "  br i1 %cond, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, LoopNest) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.outer.preheader, label %for.end\n"
-      "for.outer.preheader:\n"
-      "  br label %for.outer\n"
-      "for.outer:\n"
-      "  %j = phi i32 [ 0, %for.outer.preheader ], [ %inc.outer, %for.outer.latch ]\n"
-      "  br i1 %guardcmp, label %for.inner.preheader, label %for.outer.latch\n"
-      "for.inner.preheader:\n"
-      "  br label %for.inner\n"
-      "for.inner:\n"
-      "  %i = phi i32 [ 0, %for.inner.preheader ], [ %inc, %for.inner ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.inner, label %for.inner.exit\n"
-      "for.inner.exit:\n"
-      "  br label %for.outer.latch\n"
-      "for.outer.latch:\n"
-      "  %inc.outer = add nsw i32 %j, 1\n"
-      "  %cmp.outer = icmp slt i32 %inc.outer, %ub\n"
-      "  br i1 %cmp.outer, label %for.outer, label %for.outer.exit\n"
-      "for.outer.exit:\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.outer.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.outer");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc.outer");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "j");
-
-        // Next two basic blocks are for.outer and for.inner.preheader - skip
-        // them.
-        ++FI;
-        Header = &*(++FI);
-        assert(Header->getName() == "for.inner");
-        L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> InnerBounds = L->getBounds(SE);
-        EXPECT_NE(InnerBounds, None);
-        InitialIVValue =
-            dyn_cast<ConstantInt>(&InnerBounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(InnerBounds->getStepInst().getName(), "inc");
-        StepValue = dyn_cast_or_null<ConstantInt>(InnerBounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(InnerBounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(InnerBounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(InnerBounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-      });
-}
-
-TEST(LoopInfoTest, AuxiliaryIV) {
-  const char *ModuleStr =
-      "define void @foo(i32* %A, i32 %ub) {\n"
-      "entry:\n"
-      "  %guardcmp = icmp slt i32 0, %ub\n"
-      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
-      "for.preheader:\n"
-      "  br label %for.body\n"
-      "for.body:\n"
-      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
-      "  %aux = phi i32 [ 0, %for.preheader ], [ %auxinc, %for.body ]\n"
-      "  %loopvariant = phi i32 [ 0, %for.preheader ], [ %loopvariantinc, %for.body ]\n"
-      "  %usedoutside = phi i32 [ 0, %for.preheader ], [ %usedoutsideinc, %for.body ]\n"
-      "  %mulopcode = phi i32 [ 0, %for.preheader ], [ %mulopcodeinc, %for.body ]\n"
-      "  %idxprom = sext i32 %i to i64\n"
-      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
-      "  store i32 %i, i32* %arrayidx, align 4\n"
-      "  %mulopcodeinc = mul nsw i32 %mulopcode, 5\n"
-      "  %usedoutsideinc = add nsw i32 %usedoutside, 5\n"
-      "  %loopvariantinc = add nsw i32 %loopvariant, %i\n"
-      "  %auxinc = add nsw i32 %aux, 5\n"
-      "  %inc = add nsw i32 %i, 1\n"
-      "  %cmp = icmp slt i32 %inc, %ub\n"
-      "  br i1 %cmp, label %for.body, label %for.exit\n"
-      "for.exit:\n"
-      "  %lcssa = phi i32 [ %usedoutside, %for.body ]\n"
-      "  br label %for.end\n"
-      "for.end:\n"
-      "  ret void\n"
-      "}\n";
-
-  // Parse the module.
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
-
-  runWithLoopInfoPlus(
-      *M, "foo",
-      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
-          PostDominatorTree &PDT) {
-        Function::iterator FI = F.begin();
-        // First two basic block are entry and for.preheader - skip them.
-        ++FI;
-        BasicBlock *Header = &*(++FI);
-        assert(Header->getName() == "for.body");
-        Loop *L = LI.getLoopFor(Header);
-        EXPECT_NE(L, nullptr);
-
-        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
-        EXPECT_NE(Bounds, None);
-        ConstantInt *InitialIVValue =
-            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
-        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
-        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
-        ConstantInt *StepValue =
-            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
-        EXPECT_TRUE(StepValue && StepValue->isOne());
-        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
-        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
-        EXPECT_EQ(Bounds->getDirection(),
-                  Loop::LoopBounds::Direction::Increasing);
-        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
-        BasicBlock::iterator II = Header->begin();
-        PHINode &Instruction_i = cast<PHINode>(*(II));
-        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_i, SE));
-        PHINode &Instruction_aux = cast<PHINode>(*(++II));
-        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_aux, SE));
-        PHINode &Instruction_loopvariant = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_loopvariant, SE));
-        PHINode &Instruction_usedoutside = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_usedoutside, SE));
-        PHINode &Instruction_mulopcode = cast<PHINode>(*(++II));
-        EXPECT_FALSE(
-            L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
-      });
-}

From a0e350e640b3068717eb1522e199db99b7741984 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 16:11:57 +0000
Subject: [PATCH 1130/1176] [X86][SSE] Add additional nt-load test cases as
 discussed on D62910

llvm-svn: 362616
---
 .../X86/merge-consecutive-stores-nt.ll        | 248 ++++++++++++++++--
 1 file changed, 226 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
index 0461008f84d90..9ef0ecb5fbc73 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
@@ -2,14 +2,18 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE4A
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
 
 ;
 ; PR42123
 ;
 
-; FIXME: AVX doesn't retain NT flag on store.
-; Should be VMOVNTPS ymm.
+; FIXME: AVX doesn't retain NT flag on load/store.
+; AVX1 load should be 2 x VMOVNTDQA xmm.
+; AVX2 load should be VMOVNTDQA ymm.
+; AVX store should be VMOVNTPS ymm.
 define void @merge_2_v4f32_align32(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X86-LABEL: merge_2_v4f32_align32:
 ; X86:       # %bb.0:
@@ -21,13 +25,29 @@ define void @merge_2_v4f32_align32(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X86-NEXT:    movntps %xmm1, 16(%eax)
 ; X86-NEXT:    retl
 ;
-; X64-SSE-LABEL: merge_2_v4f32_align32:
-; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movaps (%rdi), %xmm0
-; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
-; X64-SSE-NEXT:    movntps %xmm0, (%rsi)
-; X64-SSE-NEXT:    movntps %xmm1, 16(%rsi)
-; X64-SSE-NEXT:    retq
+; X64-SSE2-LABEL: merge_2_v4f32_align32:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE2-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movntps %xmm0, (%rsi)
+; X64-SSE2-NEXT:    movntps %xmm1, 16(%rsi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE4A-LABEL: merge_2_v4f32_align32:
+; X64-SSE4A:       # %bb.0:
+; X64-SSE4A-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE4A-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE4A-NEXT:    movntps %xmm0, (%rsi)
+; X64-SSE4A-NEXT:    movntps %xmm1, 16(%rsi)
+; X64-SSE4A-NEXT:    retq
+;
+; X64-SSE41-LABEL: merge_2_v4f32_align32:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; X64-SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    movntdq %xmm0, (%rsi)
+; X64-SSE41-NEXT:    movntdq %xmm1, 16(%rsi)
+; X64-SSE41-NEXT:    retq
 ;
 ; X64-AVX-LABEL: merge_2_v4f32_align32:
 ; X64-AVX:       # %bb.0:
@@ -37,8 +57,8 @@ define void @merge_2_v4f32_align32(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X64-AVX-NEXT:    retq
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
   %2 = bitcast float* %1 to <4 x float>*
-  %3 = load <4 x float>, <4 x float>* %a0, align 32
-  %4 = load <4 x float>, <4 x float>* %2, align 16
+  %3 = load <4 x float>, <4 x float>* %a0, align 32, !nontemporal !0
+  %4 = load <4 x float>, <4 x float>* %2, align 16, !nontemporal !0
   %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
   %6 = bitcast float* %5 to <4 x float>*
   store <4 x float> %3, <4 x float>* %a1, align 32, !nontemporal !0
@@ -46,10 +66,64 @@ define void @merge_2_v4f32_align32(<4 x float>* %a0, <4 x float>* %a1)  {
   ret void
 }
 
+; FIXME: shouldn't attempt to merge nt and non-nt loads even if aligned.
+; Must be kept seperate as VMOVNTDQA xmm + VMOVDQA xmm.
+define void @merge_2_v4f32_align32_mix_ntload(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align32_mix_ntload:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movaps (%ecx), %xmm0
+; X86-NEXT:    movaps 16(%ecx), %xmm1
+; X86-NEXT:    movaps %xmm0, (%eax)
+; X86-NEXT:    movaps %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE2-LABEL: merge_2_v4f32_align32_mix_ntload:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE2-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movaps %xmm0, (%rsi)
+; X64-SSE2-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE4A-LABEL: merge_2_v4f32_align32_mix_ntload:
+; X64-SSE4A:       # %bb.0:
+; X64-SSE4A-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE4A-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE4A-NEXT:    movaps %xmm0, (%rsi)
+; X64-SSE4A-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE4A-NEXT:    retq
+;
+; X64-SSE41-LABEL: merge_2_v4f32_align32_mix_ntload:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; X64-SSE41-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqa %xmm0, (%rsi)
+; X64-SSE41-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align32_mix_ntload:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovaps (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 32, !nontemporal !0
+  %4 = load <4 x float>, <4 x float>* %2, align 16
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 32
+  store <4 x float> %4, <4 x float>* %6, align 16
+  ret void
+}
+
 ; FIXME: shouldn't attempt to merge nt and non-nt stores even if aligned.
 ; Must be kept seperate as VMOVNTPS xmm + VMOVAPS xmm.
-define void @merge_2_v4f32_align32_mix(<4 x float>* %a0, <4 x float>* %a1)  {
-; X86-LABEL: merge_2_v4f32_align32_mix:
+define void @merge_2_v4f32_align32_mix_ntstore(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align32_mix_ntstore:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -59,7 +133,7 @@ define void @merge_2_v4f32_align32_mix(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X86-NEXT:    movaps %xmm1, 16(%eax)
 ; X86-NEXT:    retl
 ;
-; X64-SSE-LABEL: merge_2_v4f32_align32_mix:
+; X64-SSE-LABEL: merge_2_v4f32_align32_mix_ntstore:
 ; X64-SSE:       # %bb.0:
 ; X64-SSE-NEXT:    movaps (%rdi), %xmm0
 ; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
@@ -67,7 +141,7 @@ define void @merge_2_v4f32_align32_mix(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X64-SSE-NEXT:    movaps %xmm1, 16(%rsi)
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: merge_2_v4f32_align32_mix:
+; X64-AVX-LABEL: merge_2_v4f32_align32_mix_ntstore:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovaps (%rdi), %ymm0
 ; X64-AVX-NEXT:    vmovaps %ymm0, (%rsi)
@@ -84,10 +158,64 @@ define void @merge_2_v4f32_align32_mix(<4 x float>* %a0, <4 x float>* %a1)  {
   ret void
 }
 
+; FIXME: AVX can't perform NT-load-ymm on 16-byte aligned memory.
+; Must be kept seperate as VMOVNTDQA xmm.
+define void @merge_2_v4f32_align16_ntload(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align16_ntload:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movaps (%ecx), %xmm0
+; X86-NEXT:    movaps 16(%ecx), %xmm1
+; X86-NEXT:    movaps %xmm0, (%eax)
+; X86-NEXT:    movaps %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE2-LABEL: merge_2_v4f32_align16_ntload:
+; X64-SSE2:       # %bb.0:
+; X64-SSE2-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE2-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE2-NEXT:    movaps %xmm0, (%rsi)
+; X64-SSE2-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE2-NEXT:    retq
+;
+; X64-SSE4A-LABEL: merge_2_v4f32_align16_ntload:
+; X64-SSE4A:       # %bb.0:
+; X64-SSE4A-NEXT:    movaps (%rdi), %xmm0
+; X64-SSE4A-NEXT:    movaps 16(%rdi), %xmm1
+; X64-SSE4A-NEXT:    movaps %xmm0, (%rsi)
+; X64-SSE4A-NEXT:    movaps %xmm1, 16(%rsi)
+; X64-SSE4A-NEXT:    retq
+;
+; X64-SSE41-LABEL: merge_2_v4f32_align16_ntload:
+; X64-SSE41:       # %bb.0:
+; X64-SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; X64-SSE41-NEXT:    movntdqa 16(%rdi), %xmm1
+; X64-SSE41-NEXT:    movdqa %xmm0, (%rsi)
+; X64-SSE41-NEXT:    movdqa %xmm1, 16(%rsi)
+; X64-SSE41-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align16_ntload:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 16, !nontemporal !0
+  %4 = load <4 x float>, <4 x float>* %2, align 16, !nontemporal !0
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 16
+  store <4 x float> %4, <4 x float>* %6, align 16
+  ret void
+}
+
 ; FIXME: AVX can't perform NT-store-ymm on 16-byte aligned memory.
 ; Must be kept seperate as VMOVNTPS xmm.
-define void @merge_2_v4f32_align16(<4 x float>* %a0, <4 x float>* %a1)  {
-; X86-LABEL: merge_2_v4f32_align16:
+define void @merge_2_v4f32_align16_ntstore(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align16_ntstore:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -97,7 +225,7 @@ define void @merge_2_v4f32_align16(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X86-NEXT:    movntps %xmm1, 16(%eax)
 ; X86-NEXT:    retl
 ;
-; X64-SSE-LABEL: merge_2_v4f32_align16:
+; X64-SSE-LABEL: merge_2_v4f32_align16_ntstore:
 ; X64-SSE:       # %bb.0:
 ; X64-SSE-NEXT:    movaps (%rdi), %xmm0
 ; X64-SSE-NEXT:    movaps 16(%rdi), %xmm1
@@ -105,7 +233,7 @@ define void @merge_2_v4f32_align16(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X64-SSE-NEXT:    movntps %xmm1, 16(%rsi)
 ; X64-SSE-NEXT:    retq
 ;
-; X64-AVX-LABEL: merge_2_v4f32_align16:
+; X64-AVX-LABEL: merge_2_v4f32_align16_ntstore:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
 ; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
@@ -122,8 +250,84 @@ define void @merge_2_v4f32_align16(<4 x float>* %a0, <4 x float>* %a1)  {
   ret void
 }
 
+; FIXME: Nothing can perform NT-load-vector on 1-byte aligned memory.
+; Just perform regular loads.
+define void @merge_2_v4f32_align1_ntload(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align1_ntload:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movups (%ecx), %xmm0
+; X86-NEXT:    movups 16(%ecx), %xmm1
+; X86-NEXT:    movups %xmm0, (%eax)
+; X86-NEXT:    movups %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align1_ntload:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movups (%rdi), %xmm0
+; X64-SSE-NEXT:    movups 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movups %xmm0, (%rsi)
+; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align1_ntload:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 1, !nontemporal !0
+  %4 = load <4 x float>, <4 x float>* %2, align 1, !nontemporal !0
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 1
+  store <4 x float> %4, <4 x float>* %6, align 1
+  ret void
+}
+
 ; FIXME: Nothing can perform NT-store-vector on 1-byte aligned memory.
 ; Must be scalarized to use MOVTNI/MOVNTSD.
+define void @merge_2_v4f32_align1_ntstore(<4 x float>* %a0, <4 x float>* %a1)  {
+; X86-LABEL: merge_2_v4f32_align1_ntstore:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movups (%ecx), %xmm0
+; X86-NEXT:    movups 16(%ecx), %xmm1
+; X86-NEXT:    movups %xmm0, (%eax)
+; X86-NEXT:    movups %xmm1, 16(%eax)
+; X86-NEXT:    retl
+;
+; X64-SSE-LABEL: merge_2_v4f32_align1_ntstore:
+; X64-SSE:       # %bb.0:
+; X64-SSE-NEXT:    movups (%rdi), %xmm0
+; X64-SSE-NEXT:    movups 16(%rdi), %xmm1
+; X64-SSE-NEXT:    movups %xmm0, (%rsi)
+; X64-SSE-NEXT:    movups %xmm1, 16(%rsi)
+; X64-SSE-NEXT:    retq
+;
+; X64-AVX-LABEL: merge_2_v4f32_align1_ntstore:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovups (%rdi), %ymm0
+; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
+; X64-AVX-NEXT:    vzeroupper
+; X64-AVX-NEXT:    retq
+  %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
+  %2 = bitcast float* %1 to <4 x float>*
+  %3 = load <4 x float>, <4 x float>* %a0, align 1
+  %4 = load <4 x float>, <4 x float>* %2, align 1
+  %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
+  %6 = bitcast float* %5 to <4 x float>*
+  store <4 x float> %3, <4 x float>* %a1, align 1, !nontemporal !0
+  store <4 x float> %4, <4 x float>* %6, align 1, !nontemporal !0
+  ret void
+}
+
+; FIXME: Nothing can perform NT-load-vector on 1-byte aligned memory.
+; Just perform regular loads and scalarize NT-stores.
 define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X86-LABEL: merge_2_v4f32_align1:
 ; X86:       # %bb.0:
@@ -151,8 +355,8 @@ define void @merge_2_v4f32_align1(<4 x float>* %a0, <4 x float>* %a1)  {
 ; X64-AVX-NEXT:    retq
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %a0, i64 1, i64 0
   %2 = bitcast float* %1 to <4 x float>*
-  %3 = load <4 x float>, <4 x float>* %a0, align 1
-  %4 = load <4 x float>, <4 x float>* %2, align 1
+  %3 = load <4 x float>, <4 x float>* %a0, align 1, !nontemporal !0
+  %4 = load <4 x float>, <4 x float>* %2, align 1, !nontemporal !0
   %5 = getelementptr inbounds <4 x float>, <4 x float>* %a1, i64 1, i64 0
   %6 = bitcast float* %5 to <4 x float>*
   store <4 x float> %3, <4 x float>* %a1, align 1, !nontemporal !0

From de586bd1fd57f3d4438fa9fdfcc7406727a90004 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 16:14:14 +0000
Subject: [PATCH 1131/1176] [X86][AVX] Generalize split256BitStore to
 splitVectorStore. NFCI.

Enables us to use this to split 512-bit vectors in future patches.

llvm-svn: 362617
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +++++++++++++++----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6aa2b77990da..63f0c8b4004b3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21016,10 +21016,12 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
 }
 
-/// Change a 256-bit vector store into a pair of 128-bit vector stores.
-static SDValue split256BitStore(StoreSDNode *Store, SelectionDAG &DAG) {
+/// Change a vector store into a pair of half-size vector stores.
+static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
   SDValue StoredVal = Store->getValue();
-  assert(StoredVal.getValueType().is256BitVector() && "Expecting 256-bit op");
+  assert((StoredVal.getValueType().is256BitVector() ||
+          StoredVal.getValueType().is512BitVector()) &&
+         "Expecting 256/512-bit op");
 
   // Splitting volatile memory ops is not allowed unless the operation was not
   // legal to begin with. We are assuming the input op is legal (this transform
@@ -21029,19 +21031,22 @@ static SDValue split256BitStore(StoreSDNode *Store, SelectionDAG &DAG) {
 
   MVT StoreVT = StoredVal.getSimpleValueType();
   unsigned NumElems = StoreVT.getVectorNumElements();
+  unsigned HalfSize = StoredVal.getValueSizeInBits() / 2;
+  unsigned HalfAlign = (128 == HalfSize ? 16 : 32);
+
   SDLoc DL(Store);
-  SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, DL);
-  SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, DL);
+  SDValue Value0 = extractSubVector(StoredVal, 0, DAG, DL, HalfSize);
+  SDValue Value1 = extractSubVector(StoredVal, NumElems / 2, DAG, DL, HalfSize);
   SDValue Ptr0 = Store->getBasePtr();
-  SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, DL);
+  SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfAlign, DL);
   unsigned Alignment = Store->getAlignment();
   SDValue Ch0 =
       DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
                    Alignment, Store->getMemOperand()->getFlags());
-  SDValue Ch1 =
-      DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
-                   Store->getPointerInfo().getWithOffset(16),
-                   MinAlign(Alignment, 16), Store->getMemOperand()->getFlags());
+  SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
+                             Store->getPointerInfo().getWithOffset(HalfAlign),
+                             MinAlign(Alignment, HalfAlign),
+                             Store->getMemOperand()->getFlags());
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
 }
 
@@ -21082,7 +21087,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   if (StoreVT.is256BitVector()) {
     if (StoredVal.getOpcode() != ISD::CONCAT_VECTORS || !StoredVal.hasOneUse())
       return SDValue();
-    return split256BitStore(St, DAG);
+    return splitVectorStore(St, DAG);
   }
 
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
@@ -39464,7 +39469,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     if (NumElems < 2)
       return SDValue();
 
-    return split256BitStore(St, DAG);
+    return splitVectorStore(St, DAG);
   }
 
   // Optimize trunc store (of multiple scalars) to shuffle and store.

From d47f5488cf02fa06259a0f8563f684e2d45165c9 Mon Sep 17 00:00:00 2001
From: Andrey Churbanov <Andrey.Churbanov@intel.com>
Date: Wed, 5 Jun 2019 16:14:47 +0000
Subject: [PATCH 1132/1176] Added propagation of not big initial stack size of
 master thread to workers.

Currently implemented only for non-Windows 64-bit platforms.

Differential Revision: https://reviews.llvm.org/D62488

llvm-svn: 362618
---
 openmp/runtime/src/kmp.h                      |  1 +
 openmp/runtime/src/kmp_settings.cpp           | 14 ++++
 openmp/runtime/src/z_Linux_util.cpp           | 11 ++++
 .../runtime/test/misc_bugs/stack-propagate.c  | 65 +++++++++++++++++++
 4 files changed, 91 insertions(+)
 create mode 100644 openmp/runtime/test/misc_bugs/stack-propagate.c

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 0133108b7e155..f1a8f3814ee56 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3263,6 +3263,7 @@ extern void __kmp_init_random(kmp_info_t *thread);
 
 extern kmp_r_sched_t __kmp_get_schedule_global(void);
 extern void __kmp_adjust_num_threads(int new_nproc);
+extern void __kmp_check_stksize(size_t *val);
 
 extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
 extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 114cd9c9bac4a..1afba5bb57555 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -289,6 +289,20 @@ static void __kmp_stg_parse_bool(char const *name, char const *value,
   }
 } // __kmp_stg_parse_bool
 
+// placed here in order to use __kmp_round4k static function
+void __kmp_check_stksize(size_t *val) {
+  // if system stack size is too big then limit the size for worker threads
+  if (*val > KMP_DEFAULT_STKSIZE * 16) // just a heuristics...
+    *val = KMP_DEFAULT_STKSIZE * 16;
+  if (*val < KMP_MIN_STKSIZE)
+    *val = KMP_MIN_STKSIZE;
+  if (*val > KMP_MAX_STKSIZE)
+    *val = KMP_MAX_STKSIZE; // dead code currently, but may work in future
+#if KMP_OS_DARWIN
+  *val = __kmp_round4k(*val);
+#endif // KMP_OS_DARWIN
+}
+
 static void __kmp_stg_parse_size(char const *name, char const *value,
                                  size_t size_min, size_t size_max,
                                  int *is_specified, size_t *out,
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 7eb782a33a3a7..b1cf8299f1bf3 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1833,6 +1833,17 @@ void __kmp_runtime_initialize(void) {
 
   __kmp_xproc = __kmp_get_xproc();
 
+#if ! KMP_32_BIT_ARCH
+  struct rlimit rlim;
+  // read stack size of calling thread, save it as default for worker threads;
+  // this should be done before reading environment variables
+  status = getrlimit(RLIMIT_STACK, &rlim);
+  if (status == 0) { // success?
+    __kmp_stksize = rlim.rlim_cur;
+    __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
+  }
+#endif /* KMP_32_BIT_ARCH */
+
   if (sysconf(_SC_THREADS)) {
 
     /* Query the maximum number of threads */
diff --git a/openmp/runtime/test/misc_bugs/stack-propagate.c b/openmp/runtime/test/misc_bugs/stack-propagate.c
new file mode 100644
index 0000000000000..ac289b56ccd9d
--- /dev/null
+++ b/openmp/runtime/test/misc_bugs/stack-propagate.c
@@ -0,0 +1,65 @@
+// RUN: %libomp-compile-and-run
+
+// https://bugs.llvm.org/show_bug.cgi?id=26540 requested
+// stack size to be propagated from master to workers.
+// Library implements propagation of not too big stack
+// for Linux x86_64 platform (skipped Windows for now).
+//
+// The test checks that workers can use more than 4MB
+// of stack (4MB - was historical default for
+// stack size of worker thread in runtime library).
+
+#include <stdio.h>
+#include <omp.h>
+#if !defined(_WIN32)
+#include <sys/resource.h> // getrlimit
+#endif
+
+#define STK 4800000
+
+double foo(int n, int th)
+{
+  double arr[n];
+  int i;
+  double res = 0.0;
+  for (i = 0; i < n; ++i) {
+    arr[i] = (double)i / (n + 2);
+  }
+  for (i = 0; i < n; ++i) {
+    res += arr[i] / n;
+  }
+  return res;
+}
+
+int main(int argc, char *argv[])
+{
+#if defined(_WIN32)
+  // don't test Windows
+  printf("stack propagation not implemented, skipping test...\n");
+  return 0;
+#else
+  int status;
+  double val = 0.0;
+  int m = STK / 8; // > 4800000 bytes per thread
+  // read stack size of calling thread, save it as default
+  struct rlimit rlim;
+  status = getrlimit(RLIMIT_STACK, &rlim);
+  if (sizeof(void *) > 4 &&                 // do not test 32-bit systems,
+      status == 0 && rlim.rlim_cur > STK) { // or small initial stack size
+#pragma omp parallel reduction(+:val)
+    {
+      val += foo(m, omp_get_thread_num());
+    }
+  } else {
+    printf("too small stack size limit (needs about 8MB), skipping test...\n");
+    return 0;
+  }
+  if (val > 0.1) {
+    printf("passed\n");
+    return 0;
+  } else {
+    printf("failed, val = %f\n", val);
+    return 1;
+  }
+#endif // _WIN32
+}

From 5659b36c15b0c2ad5b3ea18fcff433b15b0ffecb Mon Sep 17 00:00:00 2001
From: Antonio Afonso <antonio.afonso@gmail.com>
Date: Wed, 5 Jun 2019 16:22:33 +0000
Subject: [PATCH 1133/1176] [DynamicLoader] Make sure we always set the
 rendezvous breakpoint

Summary:
Once we've attached to the process we load all current modules and also set a breakpoint at the rendezvous break address.
However, we don't do this if we already have a load address for the image info address (e.g.: DT_DEBUG on ELF). This code was added 4 years ago when adding support for `$qXfer:Libraries:` packet (https://reviews.llvm.org/D9471) but its intention is not 100% clear to me. It seems to me we're using that check to know if the modules have already been loaded (which they have if `$qXfer:Libraries:` is supported by the gdb server) and skip loading the modules again in the following `if` block. The problem is that we also skip setting the Rendezvous breakpoint so we stop knowing when the process loads new modules.
I fix this by moving the call to set the breakpoint to the end of the function so we always call it as long as we have a valid executable.

Reviewers: ADodds, clayborg, eugene, labath

Reviewed By: eugene, labath

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D62168

llvm-svn: 362619
---
 .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp           | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
index 587b2d36accf2..591a27f4492ca 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp
@@ -150,11 +150,6 @@ void DynamicLoaderPOSIXDYLD::DidAttach() {
                          true);
 
     LoadAllCurrentModules();
-    if (!SetRendezvousBreakpoint()) {
-      // If we cannot establish rendezvous breakpoint right now we'll try again
-      // at entry point.
-      ProbeEntry();
-    }
 
     m_process->GetTarget().ModulesDidLoad(module_list);
     if (log) {
@@ -169,6 +164,14 @@ void DynamicLoaderPOSIXDYLD::DidAttach() {
       }
     }
   }
+
+  if (executable_sp.get()) {
+    if (!SetRendezvousBreakpoint()) {
+      // If we cannot establish rendezvous breakpoint right now we'll try again
+      // at entry point.
+      ProbeEntry();
+    }
+  }
 }
 
 void DynamicLoaderPOSIXDYLD::DidLaunch() {

From 2bf82879bde8565c882c315b815784c6c5f64152 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 5 Jun 2019 16:40:57 +0000
Subject: [PATCH 1134/1176] [x86] split more 256-bit stores of concatenated
 vectors

As suggested in D62498 - collectConcatOps() matches both
concat_vectors and insert_subvector patterns, and we see
more test improvements by using the more general match.

llvm-svn: 362620
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   7 +-
 llvm/test/CodeGen/X86/memset-nonzero.ll       |   5 +-
 llvm/test/CodeGen/X86/oddshuffles.ll          | 157 ++++++++----------
 .../CodeGen/X86/x86-interleaved-access.ll     |  68 ++++----
 4 files changed, 113 insertions(+), 124 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 63f0c8b4004b3..c7f5ebfb98f81 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21085,9 +21085,10 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
   // halves anyway, so the concat (vinsertf128) is purely an extra op.
   MVT StoreVT = StoredVal.getSimpleValueType();
   if (StoreVT.is256BitVector()) {
-    if (StoredVal.getOpcode() != ISD::CONCAT_VECTORS || !StoredVal.hasOneUse())
-      return SDValue();
-    return splitVectorStore(St, DAG);
+    SmallVector<SDValue, 4> CatOps;
+    if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps))
+      return splitVectorStore(St, DAG);
+    return SDValue();
   }
 
   assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
diff --git a/llvm/test/CodeGen/X86/memset-nonzero.ll b/llvm/test/CodeGen/X86/memset-nonzero.ll
index c5b701b84d46c..8166d009d72ff 100644
--- a/llvm/test/CodeGen/X86/memset-nonzero.ll
+++ b/llvm/test/CodeGen/X86/memset-nonzero.ll
@@ -259,9 +259,8 @@ define void @memset_32_nonconst_bytes(i8* %x, i8 %c) {
 ; AVX1-NEXT:    vmovd %esi, %xmm0
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vmovdqu %xmm0, 16(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: memset_32_nonconst_bytes:
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index ccabe102f6f9c..da54945782ab0 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -1186,18 +1186,17 @@ define void @interleave_24i16_in(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2,
 ; AVX1-NEXT:    vmovdqu (%rcx), %xmm3
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,2,2]
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0],xmm2[1,2],xmm4[3],xmm2[4,5],xmm4[6],xmm2[7]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm4 = xmm4[0,1,2,3,u,u,4,5,6,7,u,u,8,9,10,11]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[0,0,0,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2],xmm4[3,4],xmm5[5],xmm4[6,7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,10,11,8,9,8,9,14,15,12,13,14,15]
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm3[2,2,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
-; AVX1-NEXT:    vmovdqu %xmm0, 32(%rdi)
-; AVX1-NEXT:    vmovups %ymm2, (%rdi)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm4 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm4 = xmm4[4,5,10,11,10,11,8,9,8,9,14,15,12,13,14,15]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm3[2,2,3,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1],xmm4[2,3],xmm5[4],xmm4[5,6],xmm5[7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,u,u,4,5,6,7,u,u,8,9,10,11]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm3[0,0,0,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4],xmm1[5],xmm0[6,7]
+; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    vmovdqu %xmm4, 32(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm2, 16(%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: interleave_24i16_in:
@@ -1802,17 +1801,16 @@ define void @splat3_128(<16 x i8> %a0, <16 x i8> %a1, <96 x i8> *%a2) {
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufb %xmm6, %xmm5, %xmm3
+; AVX1-NEXT:    vpshufb %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT:    vmovaps %ymm2, 64(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 80(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 64(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: splat3_128:
@@ -1842,34 +1840,29 @@ define void @splat3_128(<16 x i8> %a0, <16 x i8> %a1, <96 x i8> *%a2) {
 ;
 ; XOP-LABEL: splat3_128:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm0[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm6 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm7 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm5[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm0[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm6 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm7 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm8 = xmm5[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
 ; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm5[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm1[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm1 = xmm7[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm7[0,1,2,3,4]
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm7 = [5,16,11,6,17,12,7,18,13,8,19,14,9,20,15,10]
-; XOP-NEXT:    vpperm %xmm7, %xmm5, %xmm3, %xmm3
-; XOP-NEXT:    vpperm %xmm7, %xmm0, %xmm6, %xmm0
-; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; XOP-NEXT:    vpperm %xmm7, %xmm6, %xmm5, %xmm3
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
-; XOP-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
-; XOP-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; XOP-NEXT:    vpshufb %xmm5, %xmm4, %xmm3
-; XOP-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
-; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; XOP-NEXT:    vmovaps %ymm1, 32(%rdi)
-; XOP-NEXT:    vmovaps %ymm0, (%rdi)
-; XOP-NEXT:    vmovaps %ymm2, 64(%rdi)
-; XOP-NEXT:    vzeroupper
+; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm1[5,6,7,8,9,10,11,12,13,14,15],xmm5[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
+; XOP-NEXT:    vmovdqa {{.*#+}} xmm3 = [5,16,11,6,17,12,7,18,13,8,19,14,9,20,15,10]
+; XOP-NEXT:    vpperm %xmm3, %xmm4, %xmm2, %xmm2
+; XOP-NEXT:    vpperm %xmm3, %xmm0, %xmm7, %xmm0
+; XOP-NEXT:    vpperm %xmm3, %xmm7, %xmm4, %xmm4
+; XOP-NEXT:    vpperm %xmm3, %xmm1, %xmm6, %xmm1
+; XOP-NEXT:    vpperm %xmm3, %xmm5, %xmm8, %xmm7
+; XOP-NEXT:    vpperm %xmm3, %xmm6, %xmm5, %xmm3
+; XOP-NEXT:    vmovdqa %xmm3, 80(%rdi)
+; XOP-NEXT:    vmovdqa %xmm7, 64(%rdi)
+; XOP-NEXT:    vmovdqa %xmm1, 48(%rdi)
+; XOP-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; XOP-NEXT:    vmovdqa %xmm2, 16(%rdi)
+; XOP-NEXT:    vmovdqa %xmm0, (%rdi)
 ; XOP-NEXT:    retq
   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -1980,16 +1973,16 @@ define void @splat3_256(<32 x i8> %a0, <96 x i8> *%a1) {
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vpshufb %xmm6, %xmm5, %xmm3
+; AVX1-NEXT:    vpshufb %xmm6, %xmm5, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm6, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT:    vmovaps %ymm2, 64(%rdi)
-; AVX1-NEXT:    vmovaps %ymm1, 32(%rdi)
-; AVX1-NEXT:    vmovaps %ymm0, (%rdi)
+; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa %xmm4, 80(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm2, 64(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm1, 48(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm5, 32(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm3, 16(%rdi)
+; AVX1-NEXT:    vmovdqa %xmm0, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -2018,34 +2011,30 @@ define void @splat3_256(<32 x i8> %a0, <96 x i8> *%a1) {
 ;
 ; XOP-LABEL: splat3_256:
 ; XOP:       # %bb.0:
-; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm1[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm0[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm6 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm7 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm5[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm5[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm1[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm1 = xmm7[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm2 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
-; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm7[0,1,2,3,4]
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm7 = [5,16,11,6,17,12,7,18,13,8,19,14,9,20,15,10]
-; XOP-NEXT:    vpperm %xmm7, %xmm5, %xmm3, %xmm3
-; XOP-NEXT:    vpperm %xmm7, %xmm0, %xmm6, %xmm0
-; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; XOP-NEXT:    vpperm %xmm7, %xmm6, %xmm5, %xmm3
-; XOP-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
-; XOP-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
-; XOP-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; XOP-NEXT:    vpshufb %xmm5, %xmm4, %xmm3
-; XOP-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
-; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; XOP-NEXT:    vmovaps %ymm1, 32(%rdi)
-; XOP-NEXT:    vmovaps %ymm0, (%rdi)
-; XOP-NEXT:    vmovaps %ymm2, 64(%rdi)
+; XOP-NEXT:    vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOP-NEXT:    vpalignr {{.*#+}} xmm3 = xmm2[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm0[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm2[11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm6 = xmm3[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm7 = xmm1[5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm8 = xmm5[5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm1 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm5 = xmm2[5,6,7,8,9,10,11,12,13,14,15],xmm5[0,1,2,3,4]
+; XOP-NEXT:    vpalignr {{.*#+}} xmm4 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
+; XOP-NEXT:    vmovdqa {{.*#+}} xmm3 = [5,16,11,6,17,12,7,18,13,8,19,14,9,20,15,10]
+; XOP-NEXT:    vpperm %xmm3, %xmm4, %xmm1, %xmm1
+; XOP-NEXT:    vpperm %xmm3, %xmm0, %xmm7, %xmm0
+; XOP-NEXT:    vpperm %xmm3, %xmm7, %xmm4, %xmm4
+; XOP-NEXT:    vpperm %xmm3, %xmm2, %xmm6, %xmm2
+; XOP-NEXT:    vpperm %xmm3, %xmm5, %xmm8, %xmm7
+; XOP-NEXT:    vpperm %xmm3, %xmm6, %xmm5, %xmm3
+; XOP-NEXT:    vmovdqa %xmm3, 80(%rdi)
+; XOP-NEXT:    vmovdqa %xmm7, 64(%rdi)
+; XOP-NEXT:    vmovdqa %xmm2, 48(%rdi)
+; XOP-NEXT:    vmovdqa %xmm4, 32(%rdi)
+; XOP-NEXT:    vmovdqa %xmm1, 16(%rdi)
+; XOP-NEXT:    vmovdqa %xmm0, (%rdi)
 ; XOP-NEXT:    vzeroupper
 ; XOP-NEXT:    retq
   %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index 8cd01b631d601..2728871fce970 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -1153,16 +1153,16 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm6, %xmm6
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm1, %ymm1
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
 ; AVX1-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT:    vmovups %ymm0, 64(%rdi)
-; AVX1-NEXT:    vmovups %ymm2, 32(%rdi)
-; AVX1-NEXT:    vmovups %ymm1, (%rdi)
+; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqu %xmm3, 80(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm0, 64(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm5, 48(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm2, 32(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm6, 16(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm1, (%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
@@ -1255,37 +1255,37 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm5 = xmm9[5,6,7,8,9,10,11,12,13,14,15],xmm15[0,1,2,3,4]
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm9 = xmm15[5,6,7,8,9,10,11,12,13,14,15],xmm7[0,1,2,3,4]
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm0 = xmm4[5,6,7,8,9,10,11,12,13,14,15],xmm11[0,1,2,3,4]
-; AVX1-NEXT:    vpalignr {{.*#+}} xmm11 = xmm12[5,6,7,8,9,10,11,12,13,14,15],xmm6[0,1,2,3,4]
-; AVX1-NEXT:    vpalignr {{.*#+}} xmm4 = xmm6[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
+; AVX1-NEXT:    vpalignr {{.*#+}} xmm7 = xmm12[5,6,7,8,9,10,11,12,13,14,15],xmm6[0,1,2,3,4]
+; AVX1-NEXT:    vpalignr {{.*#+}} xmm11 = xmm6[5,6,7,8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4]
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm2 = xmm14[5,6,7,8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4]
 ; AVX1-NEXT:    vpalignr $5, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm6 # 16-byte Folded Reload
 ; AVX1-NEXT:    # xmm6 = mem[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
 ; AVX1-NEXT:    vpalignr {{.*#+}} xmm1 = xmm1[5,6,7,8,9,10,11,12,13,14,15],xmm14[0,1,2,3,4]
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
-; AVX1-NEXT:    vpshufb %xmm7, %xmm6, %xmm6
-; AVX1-NEXT:    vpshufb %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; AVX1-NEXT:    vpshufb %xmm7, %xmm1, %xmm1
-; AVX1-NEXT:    vpshufb %xmm7, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vpshufb %xmm7, %xmm4, %xmm1
-; AVX1-NEXT:    vpshufb %xmm7, %xmm11, %xmm4
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
-; AVX1-NEXT:    vpshufb %xmm7, %xmm5, %xmm4
-; AVX1-NEXT:    vpshufb %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT:    vpshufb %xmm7, %xmm9, %xmm4
-; AVX1-NEXT:    vpshufb %xmm7, %xmm10, %xmm5
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT:    vpshufb %xmm7, %xmm13, %xmm5
-; AVX1-NEXT:    vpshufb %xmm7, %xmm8, %xmm6
-; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
-; AVX1-NEXT:    vmovups %ymm5, 160(%rdi)
-; AVX1-NEXT:    vmovups %ymm4, 128(%rdi)
-; AVX1-NEXT:    vmovups %ymm3, 96(%rdi)
-; AVX1-NEXT:    vmovups %ymm1, 64(%rdi)
-; AVX1-NEXT:    vmovups %ymm0, 32(%rdi)
-; AVX1-NEXT:    vmovups %ymm2, (%rdi)
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm6, %xmm6
+; AVX1-NEXT:    vpshufb %xmm4, %xmm2, %xmm14
+; AVX1-NEXT:    vpshufb %xmm4, %xmm1, %xmm12
+; AVX1-NEXT:    vpshufb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm4, %xmm7, %xmm7
+; AVX1-NEXT:    vpshufb %xmm4, %xmm11, %xmm1
+; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm5
+; AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm11
+; AVX1-NEXT:    vpshufb %xmm4, %xmm9, %xmm9
+; AVX1-NEXT:    vpshufb %xmm4, %xmm10, %xmm2
+; AVX1-NEXT:    vpshufb %xmm4, %xmm8, %xmm3
+; AVX1-NEXT:    vpshufb %xmm4, %xmm13, %xmm4
+; AVX1-NEXT:    vmovdqu %xmm1, 80(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm7, 64(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm6, 16(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm14, (%rdi)
+; AVX1-NEXT:    vmovdqu %xmm0, 48(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm12, 32(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm4, 176(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm3, 160(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm5, 112(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm11, 96(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm2, 144(%rdi)
+; AVX1-NEXT:    vmovdqu %xmm9, 128(%rdi)
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;

From 3027a2999c32d4cdedf9b639b61fc4653f1bba4b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 5 Jun 2019 17:14:32 +0000
Subject: [PATCH 1135/1176] [dsymutil] Support more than 4 architectures

When running dsymutil on a fat binary, we use temporary files in a small
vector of size four. When processing more than 4 architectures, this
resulted in a user-after-move, because the temporary files got moved to
the heap. Instead of storing an optional temp file, we now use a unique
pointer, so the location of the actual temp file doesn't change.

We could test this by checking in 5 binaries for 5 different
architectures, but this seems wasteful, especially since the number of
elements in the small vector is arbitrary.

llvm-svn: 362621
---
 llvm/tools/dsymutil/MachOUtils.cpp | 2 +-
 llvm/tools/dsymutil/MachOUtils.h   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/dsymutil/MachOUtils.cpp b/llvm/tools/dsymutil/MachOUtils.cpp
index b7ab35274f9e8..cd0f2805dc25a 100644
--- a/llvm/tools/dsymutil/MachOUtils.cpp
+++ b/llvm/tools/dsymutil/MachOUtils.cpp
@@ -35,7 +35,7 @@ llvm::Error ArchAndFile::createTempFile() {
   if (!T)
     return T.takeError();
 
-  File = llvm::Optional<sys::fs::TempFile>(std::move(*T));
+  File = llvm::make_unique<sys::fs::TempFile>(std::move(*T));
   return Error::success();
 }
 
diff --git a/llvm/tools/dsymutil/MachOUtils.h b/llvm/tools/dsymutil/MachOUtils.h
index 83b648122b78b..bc88f58cf0fdd 100644
--- a/llvm/tools/dsymutil/MachOUtils.h
+++ b/llvm/tools/dsymutil/MachOUtils.h
@@ -26,13 +26,14 @@ namespace MachOUtils {
 struct ArchAndFile {
   std::string Arch;
   // Optional because TempFile has no default constructor.
-  Optional<llvm::sys::fs::TempFile> File;
+  std::unique_ptr<llvm::sys::fs::TempFile> File;
 
   llvm::Error createTempFile();
   llvm::StringRef path() const;
 
   ArchAndFile(StringRef Arch) : Arch(Arch) {}
   ArchAndFile(ArchAndFile &&A) = default;
+  ArchAndFile &operator=(ArchAndFile &&A) = default;
   ~ArchAndFile();
 };
 

From 77d6adc491ad8b9d83af1e8ae717ca007ce8ae7d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 17:26:29 +0000
Subject: [PATCH 1136/1176] Fix shadow local variable warning. NFCI.

llvm-svn: 362622
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index be1c10801655b..c494b9181e5c4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14965,7 +14965,7 @@ void DAGCombiner::getStoreMergeCandidates(
                             int64_t &Offset) -> bool {
     if (Other->isVolatile() || Other->isIndexed())
       return false;
-    SDValue Val = peekThroughBitcasts(Other->getValue());
+    SDValue OtherBC = peekThroughBitcasts(Other->getValue());
     // Allow merging constants of different types as integers.
     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
                                            : Other->getMemoryVT() != MemVT;
@@ -14973,7 +14973,7 @@ void DAGCombiner::getStoreMergeCandidates(
       if (NoTypeMatch)
         return false;
       // The Load's Base Ptr must also match
-      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
+      if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
         if (LoadVT != OtherLd->getMemoryVT())
           return false;
@@ -14991,17 +14991,17 @@ void DAGCombiner::getStoreMergeCandidates(
     if (IsConstantSrc) {
       if (NoTypeMatch)
         return false;
-      if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+      if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
         return false;
     }
     if (IsExtractVecSrc) {
       // Do not merge truncated stores here.
       if (Other->isTruncatingStore())
         return false;
-      if (!MemVT.bitsEq(Val.getValueType()))
+      if (!MemVT.bitsEq(OtherBC.getValueType()))
         return false;
-      if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
-          Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+          OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
         return false;
     }
     Ptr = BaseIndexOffset::match(Other, DAG);

From d97ea1bc1acae17cf3a1689c3e63149b0c7004b1 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Wed, 5 Jun 2019 17:29:00 +0000
Subject: [PATCH 1137/1176] [Clang] Fix pretty printing of CUDA address spaces

Patch by richardmembarth (Richard Membarth)!

Differential Revision: https://reviews.llvm.org/D54258

llvm-svn: 362623
---
 clang/lib/AST/TypePrinter.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index ca3e34666841b..8d5c37299e5fb 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1805,17 +1805,19 @@ void Qualifiers::print(raw_ostream &OS, const PrintingPolicy& Policy,
       case LangAS::opencl_private:
         break;
       case LangAS::opencl_constant:
-      case LangAS::cuda_constant:
         OS << "__constant";
         break;
       case LangAS::opencl_generic:
         OS << "__generic";
         break;
       case LangAS::cuda_device:
-        OS << "__device";
+        OS << "__device__";
+        break;
+      case LangAS::cuda_constant:
+        OS << "__constant__";
         break;
       case LangAS::cuda_shared:
-        OS << "__shared";
+        OS << "__shared__";
         break;
       default:
         OS << "__attribute__((address_space(";

From b67cb3cda05bb1fa08b4789423983a7ee3f3c5e4 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.bieneman@me.com>
Date: Wed, 5 Jun 2019 17:35:38 +0000
Subject: [PATCH 1138/1176] Use LTO capable linker

Summary:
In DistributionExample.cmake be sure we use a LTO
capable linker, the easiest to choose is lld.

Reviewers: beanz

Reviewed By: beanz

Patch By: winksaville

Subscribers: mgorny, mehdi_amini, inglorion, dexonsmith, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62279

llvm-svn: 362624
---
 clang/cmake/caches/DistributionExample-stage2.cmake | 2 +-
 clang/cmake/caches/DistributionExample.cmake        | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/cmake/caches/DistributionExample-stage2.cmake b/clang/cmake/caches/DistributionExample-stage2.cmake
index f4d5d92d1d127..99d5dc0fd2fef 100644
--- a/clang/cmake/caches/DistributionExample-stage2.cmake
+++ b/clang/cmake/caches/DistributionExample-stage2.cmake
@@ -2,7 +2,7 @@
 # bootstrap build.
 
 set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld" CACHE STRING "")
-set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi" CACHE STRING "")
 
 set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64 CACHE STRING "")
 
diff --git a/clang/cmake/caches/DistributionExample.cmake b/clang/cmake/caches/DistributionExample.cmake
index 35493edd17f63..50fcc09cf078c 100644
--- a/clang/cmake/caches/DistributionExample.cmake
+++ b/clang/cmake/caches/DistributionExample.cmake
@@ -2,7 +2,7 @@
 
 #Enable LLVM projects and runtimes
 set(LLVM_ENABLE_PROJECTS "clang;clang-tools-extra;lld" CACHE STRING "")
-set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi" CACHE STRING "")
 
 # Only build the native target in stage1 since it is a throwaway build.
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
@@ -17,6 +17,11 @@ set(PACKAGE_VENDOR LLVM.org CACHE STRING "")
 # the proper LTO library dependencies can be connected.
 set(BOOTSTRAP_LLVM_ENABLE_LTO ON CACHE BOOL "")
 
+if (NOT APPLE)
+  # Since LLVM_ENABLE_LTO is ON we need a LTO capable linker
+  set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
+endif()
+
 # Expose stage2 targets through the stage1 build configuration.
 set(CLANG_BOOTSTRAP_TARGETS
   check-all

From 579c8df70130c4d41391d62f8f03e2404778945f Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 5 Jun 2019 17:39:37 +0000
Subject: [PATCH 1139/1176] [lld] Explicitly ignore comdat groups when parsing
 LTO object(s)

Any symbols defined in the LTO object are by definition the ones we
want in the final output so we skip the comdat group checking in those
cases.

This change makes the ELF code more explicit about this and means
that wasm and ELF do this in the same way.

Differential Revision: https://reviews.llvm.org/D62884

llvm-svn: 362625
---
 lld/ELF/Driver.cpp      |  3 +--
 lld/ELF/InputFiles.cpp  | 35 ++++++++++++++---------------------
 lld/ELF/InputFiles.h    | 10 +++-------
 lld/wasm/InputFiles.cpp | 14 +++++---------
 4 files changed, 23 insertions(+), 39 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index a90522a1092d8..9a7b762040661 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1506,9 +1506,8 @@ template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
     LTO->add(*File);
 
   for (InputFile *File : LTO->compile()) {
-    DenseMap<CachedHashStringRef, const InputFile *> DummyGroups;
     auto *Obj = cast<ObjFile<ELFT>>(File);
-    Obj->parse(DummyGroups);
+    Obj->parse(/*IgnoreComdats=*/true);
     for (Symbol *Sym : Obj->getGlobalSymbols())
       Sym->parseSymbolVersion();
     ObjectFiles.push_back(File);
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index a863894c55720..dd384788e343b 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -178,13 +178,13 @@ template <class ELFT> static void doParseFile(InputFile *File) {
   // LLVM bitcode file
   if (auto *F = dyn_cast<BitcodeFile>(File)) {
     BitcodeFiles.push_back(F);
-    F->parse<ELFT>(Symtab->ComdatGroups);
+    F->parse<ELFT>();
     return;
   }
 
   // Regular object file
   ObjectFiles.push_back(File);
-  cast<ObjFile<ELFT>>(File)->parse(Symtab->ComdatGroups);
+  cast<ObjFile<ELFT>>(File)->parse();
 }
 
 // Add symbols in File to the symbol table.
@@ -449,14 +449,12 @@ template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() {
   return makeArrayRef(this->Symbols).slice(this->FirstGlobal);
 }
 
-template <class ELFT>
-void ObjFile<ELFT>::parse(
-    DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
+template <class ELFT> void ObjFile<ELFT>::parse(bool IgnoreComdats) {
   // Read a section table. JustSymbols is usually false.
   if (this->JustSymbols)
     initializeJustSymbols();
   else
-    initializeSections(ComdatGroups);
+    initializeSections(IgnoreComdats);
 
   // Read a symbol table.
   initializeSymbols();
@@ -564,8 +562,7 @@ static void addDependentLibrary(StringRef Specifier, const InputFile *F) {
 }
 
 template <class ELFT>
-void ObjFile<ELFT>::initializeSections(
-    DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
+void ObjFile<ELFT>::initializeSections(bool IgnoreComdats) {
   const ELFFile<ELFT> &Obj = this->getObj();
 
   ArrayRef<Elf_Shdr> ObjSections = CHECK(Obj.sections(), this);
@@ -625,7 +622,9 @@ void ObjFile<ELFT>::initializeSections(
         fatal(toString(this) + ": unsupported SHT_GROUP format");
 
       bool IsNew =
-          ComdatGroups.try_emplace(CachedHashStringRef(Signature), this).second;
+          IgnoreComdats ||
+          Symtab->ComdatGroups.try_emplace(CachedHashStringRef(Signature), this)
+              .second;
       if (IsNew) {
         if (Config->Relocatable)
           this->Sections[I] = createInputSection(Sec);
@@ -1478,13 +1477,11 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
   return Symtab->addSymbol(New);
 }
 
-template <class ELFT>
-void BitcodeFile::parse(
-    DenseMap<CachedHashStringRef, const InputFile *> &ComdatGroups) {
+template <class ELFT> void BitcodeFile::parse() {
   std::vector<bool> KeptComdats;
   for (StringRef S : Obj->getComdatTable())
     KeptComdats.push_back(
-        ComdatGroups.try_emplace(CachedHashStringRef(S), this).second);
+        Symtab->ComdatGroups.try_emplace(CachedHashStringRef(S), this).second);
 
   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
     Symbols.push_back(createBitcodeSymbol<ELFT>(KeptComdats, ObjSym, *this));
@@ -1617,14 +1614,10 @@ std::string elf::replaceThinLTOSuffix(StringRef Path) {
   return Path;
 }
 
-template void
-BitcodeFile::parse<ELF32LE>(DenseMap<CachedHashStringRef, const InputFile *> &);
-template void
-BitcodeFile::parse<ELF32BE>(DenseMap<CachedHashStringRef, const InputFile *> &);
-template void
-BitcodeFile::parse<ELF64LE>(DenseMap<CachedHashStringRef, const InputFile *> &);
-template void
-BitcodeFile::parse<ELF64BE>(DenseMap<CachedHashStringRef, const InputFile *> &);
+template void BitcodeFile::parse<ELF32LE>();
+template void BitcodeFile::parse<ELF32BE>();
+template void BitcodeFile::parse<ELF64LE>();
+template void BitcodeFile::parse<ELF64BE>();
 
 template void LazyObjFile::parse<ELF32LE>();
 template void LazyObjFile::parse<ELF32BE>();
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 64cd02022e214..98f2e811fa7fb 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -201,8 +201,7 @@ template <class ELFT> class ObjFile : public ELFFileBase {
     this->ArchiveName = ArchiveName;
   }
 
-  void parse(llvm::DenseMap<llvm::CachedHashStringRef, const InputFile *>
-                 &ComdatGroups);
+  void parse(bool IgnoreComdats = false);
 
   StringRef getShtGroupSignature(ArrayRef<Elf_Shdr> Sections,
                                  const Elf_Shdr &Sec);
@@ -250,8 +249,7 @@ template <class ELFT> class ObjFile : public ELFFileBase {
   ArrayRef<Elf_CGProfile> CGProfile;
 
 private:
-  void initializeSections(llvm::DenseMap<llvm::CachedHashStringRef,
-                                         const InputFile *> &ComdatGroups);
+  void initializeSections(bool IgnoreComdats);
   void initializeSymbols();
   void initializeJustSymbols();
   void initializeDwarf();
@@ -340,9 +338,7 @@ class BitcodeFile : public InputFile {
   BitcodeFile(MemoryBufferRef M, StringRef ArchiveName,
               uint64_t OffsetInArchive);
   static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
-  template <class ELFT>
-  void parse(llvm::DenseMap<llvm::CachedHashStringRef, const InputFile *>
-                 &ComdatGroups);
+  template <class ELFT> void parse();
   std::unique_ptr<llvm::lto::InputFile> Obj;
 };
 
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index e1c4fa7b747fc..23d3900c5ed76 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -306,11 +306,10 @@ void ObjFile::parse(bool IgnoreComdats) {
   TypeIsUsed.resize(getWasmObj()->types().size(), false);
 
   ArrayRef<StringRef> Comdats = WasmObj->linkingData().Comdats;
-  for (unsigned I = 0; I < Comdats.size(); ++I)
-    if (IgnoreComdats)
-      KeptComdats.push_back(true);
-    else
-      KeptComdats.push_back(Symtab->addComdat(Comdats[I]));
+  for (unsigned I = 0; I < Comdats.size(); ++I) {
+    bool IsNew = IgnoreComdats || Symtab->addComdat(Comdats[I]);
+    KeptComdats.push_back(IsNew);
+  }
 
   // Populate `Segments`.
   for (const WasmSegment &S : WasmObj->dataSegments())
@@ -535,10 +534,7 @@ void BitcodeFile::parse(bool IgnoreComdats) {
   }
   std::vector<bool> KeptComdats;
   for (StringRef S : Obj->getComdatTable())
-    if (IgnoreComdats)
-      KeptComdats.push_back(true);
-    else
-      KeptComdats.push_back(Symtab->addComdat(S));
+    KeptComdats.push_back(Symtab->addComdat(S));
 
   for (const lto::InputFile::Symbol &ObjSym : Obj->symbols())
     Symbols.push_back(createBitcodeSymbol(KeptComdats, ObjSym, *this));

From a282a61ba3a144a8f820c96446ba51a90134efcc Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 5 Jun 2019 17:50:45 +0000
Subject: [PATCH 1140/1176] [WebAssembly] Handle object parsing more like the
 ELF backend

Differential Revision: https://reviews.llvm.org/D62886

llvm-svn: 362626
---
 lld/wasm/InputFiles.cpp  |  4 ++--
 lld/wasm/InputFiles.h    | 11 +++--------
 lld/wasm/SymbolTable.cpp | 28 ++++++++++++++++++++++------
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 23d3900c5ed76..1d49f6385ba1c 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -454,7 +454,7 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &Sym, bool IsCalledDirectly) {
   llvm_unreachable("unknown symbol kind");
 }
 
-void ArchiveFile::parse(bool IgnoreComdats) {
+void ArchiveFile::parse() {
   // Parse a MemoryBufferRef as an archive file.
   LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
   File = CHECK(Archive::create(MB), toString(this));
@@ -524,7 +524,7 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
   return Symtab->addDefinedData(Name, Flags, &F, nullptr, 0, 0);
 }
 
-void BitcodeFile::parse(bool IgnoreComdats) {
+void BitcodeFile::parse() {
   Obj = check(lto::InputFile::create(MemoryBufferRef(
       MB.getBuffer(), Saver.save(ArchiveName + MB.getBufferIdentifier()))));
   Triple T(Obj->getTargetTriple());
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index f5b4532fa927c..57d36a8dc2ac1 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -51,9 +51,6 @@ class InputFile {
   // Returns the filename.
   StringRef getName() const { return MB.getBufferIdentifier(); }
 
-  // Reads a file (the constructor doesn't do that).
-  virtual void parse(bool IgnoreComdats = false) = 0;
-
   Kind kind() const { return FileKind; }
 
   // An archive file name if this file is created from an archive.
@@ -82,7 +79,7 @@ class ArchiveFile : public InputFile {
 
   void addMember(const llvm::object::Archive::Symbol *Sym);
 
-  void parse(bool IgnoreComdats) override;
+  void parse();
 
 private:
   std::unique_ptr<llvm::object::Archive> File;
@@ -98,7 +95,7 @@ class ObjFile : public InputFile {
   }
   static bool classof(const InputFile *F) { return F->kind() == ObjectKind; }
 
-  void parse(bool IgnoreComdats) override;
+  void parse(bool IgnoreComdats = false);
 
   // Returns the underlying wasm file.
   const WasmObjectFile *getWasmObj() const { return WasmObj.get(); }
@@ -150,8 +147,6 @@ class SharedFile : public InputFile {
 public:
   explicit SharedFile(MemoryBufferRef M) : InputFile(SharedKind, M) {}
   static bool classof(const InputFile *F) { return F->kind() == SharedKind; }
-
-  void parse(bool IgnoreComdats) override {}
 };
 
 // .bc file
@@ -163,7 +158,7 @@ class BitcodeFile : public InputFile {
   }
   static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; }
 
-  void parse(bool IgnoreComdats) override;
+  void parse();
   std::unique_ptr<llvm::lto::InputFile> Obj;
 };
 
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index c4a460f3ad7aa..74d3132e9f32b 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -28,17 +28,33 @@ SymbolTable *lld::wasm::Symtab;
 
 void SymbolTable::addFile(InputFile *File) {
   log("Processing: " + toString(File));
+
+  // .a file
+  if (auto *F = dyn_cast<ArchiveFile>(File)) {
+    F->parse();
+    return;
+  }
+
+  // .so file
+  if (auto *F = dyn_cast<SharedFile>(File)) {
+    SharedFiles.push_back(F);
+    return;
+  }
+
   if (Config->Trace)
     message(toString(File));
-  File->parse();
 
   // LLVM bitcode file
-  if (auto *F = dyn_cast<BitcodeFile>(File))
+  if (auto *F = dyn_cast<BitcodeFile>(File)) {
+    F->parse();
     BitcodeFiles.push_back(F);
-  else if (auto *F = dyn_cast<ObjFile>(File))
-    ObjectFiles.push_back(F);
-  else if (auto *F = dyn_cast<SharedFile>(File))
-    SharedFiles.push_back(F);
+    return;
+  }
+
+  // Regular object file
+  auto *F = cast<ObjFile>(File);
+  F->parse(false);
+  ObjectFiles.push_back(F);
 }
 
 // This function is where all the optimizations of link-time

From 0a31726d2002249f6a0df05587dcc1cf424e39ef Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 5 Jun 2019 18:00:27 +0000
Subject: [PATCH 1141/1176] [NFC][Reassociate] Regenerate CHECKs for
 fast-basictest.ll

llvm-svn: 362627
---
 .../Transforms/Reassociate/fast-basictest.ll  | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index ad94a79381708..58cfba3652761 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -16,8 +16,8 @@ define float @test1(float %arg) {
 ; Both 'reassoc' and 'nsz' are required.
 define float @test1_minimal(float %arg) {
 ; CHECK-LABEL: @test1_minimal(
-; CHECK-NEXT:    [[ARG_NEG:%.*]] = fsub reassoc nsz float -0.000000e+00, [[ARG:%.*]]
-; CHECK-NEXT:    ret float [[ARG_NEG]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[ARG:%.*]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %t1 = fsub reassoc nsz float -1.200000e+01, %arg
   %t2 = fadd reassoc nsz float %t1, 1.200000e+01
@@ -226,8 +226,8 @@ define float @test9(float %X) {
 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 define float @test9_reassoc_nsz(float %X) {
 ; CHECK-LABEL: @test9_reassoc_nsz(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 9.400000e+01
-; CHECK-NEXT:    ret float [[FACTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 9.400000e+01
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %Y = fmul reassoc nsz float %X, 4.700000e+01
   %Z = fadd reassoc nsz float %Y, %Y
@@ -263,8 +263,8 @@ define float @test10(float %X) {
 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 define float @test10_reassoc_nsz(float %X) {
 ; CHECK-LABEL: @test10_reassoc_nsz(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 4.000000e+00
-; CHECK-NEXT:    ret float [[FACTOR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 4.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %Y = fadd reassoc nsz float %X ,%X
   %Z = fadd reassoc nsz float %Y, %X
@@ -301,8 +301,8 @@ define float @test11(float %W) {
 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 define float @test11_reassoc_nsz(float %W) {
 ; CHECK-LABEL: @test11_reassoc_nsz(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul reassoc nsz float [[W:%.*]], 3.810000e+02
-; CHECK-NEXT:    ret float [[FACTOR]]
+; CHECK-NEXT:    [[Z:%.*]] = fmul reassoc nsz float [[W:%.*]], 3.810000e+02
+; CHECK-NEXT:    ret float [[Z]]
 ;
   %X = fmul reassoc nsz float %W, 127.0
   %Y = fadd reassoc nsz float %X ,%X
@@ -341,9 +341,9 @@ define float @test12(float %X) {
 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
 define float @test12_reassoc_nsz(float %X) {
 ; CHECK-LABEL: @test12_reassoc_nsz(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul reassoc nsz float [[X:%.*]], 3.000000e+00
-; CHECK-NEXT:    [[Z:%.*]] = fsub reassoc nsz float 6.000000e+00, [[FACTOR]]
-; CHECK-NEXT:    ret float [[Z]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc nsz float 6.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[TMP2]]
 ;
   %A = fsub reassoc nsz float 1.000000e+00, %X
   %B = fsub reassoc nsz float 2.000000e+00, %X
@@ -402,8 +402,8 @@ define float @test13_reassoc(float %X1, float %X2, float %X3) {
 define float @test14(float %X1, float %X2) {
 ; CHECK-LABEL: @test14(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X1:%.*]], [[X2:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], 4.700000e+01
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[D1:%.*]] = fmul fast float [[TMP1]], 4.700000e+01
+; CHECK-NEXT:    ret float [[D1]]
 ;
   %B = fmul fast float %X1, 47.   ; X1*47
   %C = fmul fast float %X2, -47.  ; X2*-47
@@ -416,8 +416,8 @@ define float @test14(float %X1, float %X2) {
 define float @test14_reassoc_nsz(float %X1, float %X2) {
 ; CHECK-LABEL: @test14_reassoc_nsz(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X1:%.*]], [[X2:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], 4.700000e+01
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[D1:%.*]] = fmul reassoc nsz float [[TMP1]], 4.700000e+01
+; CHECK-NEXT:    ret float [[D1]]
 ;
   %B = fmul reassoc nsz float %X1, 47.   ; X1*47
   %C = fmul reassoc nsz float %X2, -47.  ; X2*-47

From 13dd125043fa40b2bf1f8b5d3143b20caf6c68c2 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Wed, 5 Jun 2019 18:00:59 +0000
Subject: [PATCH 1142/1176] [Tests] Add poison inference tests for indvars
 showing both existing transforms, and some room for improvement

llvm-svn: 362628
---
 .../IndVarSimplify/infer-poison-flags.ll      | 369 ++++++++++++++++++
 1 file changed, 369 insertions(+)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll

diff --git a/llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll b/llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll
new file mode 100644
index 0000000000000..a148bf75bb217
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/infer-poison-flags.ll
@@ -0,0 +1,369 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@A = external global i32
+
+define void @add_cr_nsw_nuw() {
+; CHECK-LABEL: @add_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 1000
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @add_cr_nuw() {
+; CHECK-LABEL: @add_cr_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], -1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, -1
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @add_cr_nsw() {
+; CHECK-LABEL: @add_cr_nsw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 10
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ -10, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 10
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @add_cr_none() {
+; CHECK-LABEL: @add_cr_none(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 10, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 0
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @add_unknown_none(i32 %n) {
+; CHECK-LABEL: @add_unknown_none(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, %n
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+define void @sub_cr_nsw_nuw() {
+; CHECK-LABEL: @sub_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = sub nsw i32 [[I]], -1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = sub i32 %i, -1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 1000
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+
+define void @sub_unknown_none(i32 %n) {
+; CHECK-LABEL: @sub_unknown_none(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = sub i32 [[I]], -1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = sub i32 %i, -1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, %n
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+
+; NOTE: For the rest of these, it looks like we're failing to use a statically
+; computable backedge taken count to infer a range on the IV and thus fail to
+; prove flags via constant range reasoning.
+
+; TODO
+define void @mul_cr_nsw_nuw() {
+; CHECK-LABEL: @mul_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = mul i32 [[I]], 2
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = mul i32 %i, 2
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 1024
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+;; TODO
+define void @shl_cr_nsw_nuw() {
+; CHECK-LABEL: @shl_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = shl i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = shl i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 1024
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+; TODO
+define void @lshr_cr_nsw_nuw() {
+; CHECK-LABEL: @lshr_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1024, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = lshr i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 1024, %entry ], [ %i.next, %loop ]
+  %i.next = lshr i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 0
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+; TODO
+define void @lshr_cr_nuw() {
+; CHECK-LABEL: @lshr_cr_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = lshr i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ -1, %entry ], [ %i.next, %loop ]
+  %i.next = lshr i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 0
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+; TODO
+define void @ashr_cr_nsw_nuw() {
+; CHECK-LABEL: @ashr_cr_nsw_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1024, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = ashr i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 1024, %entry ], [ %i.next, %loop ]
+  %i.next = ashr i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 0
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+; TODO
+define void @ashr_cr_nsw() {
+; CHECK-LABEL: @ashr_cr_nsw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ -1024, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = ashr i32 [[I]], 1
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[I_NEXT]], 1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ -1024, %entry ], [ %i.next, %loop ]
+  %i.next = ashr i32 %i, 1
+  store i32 %i, i32* @A
+  %c = icmp ne i32 %i.next, 1
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret void
+}
+
+

From d0fff89b81650e1b7d6c9c73c035672099b0c2db Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 5 Jun 2019 18:25:09 +0000
Subject: [PATCH 1143/1176] [X86] Add the vector integer min/max instructions
 to isAssociativeAndCommutative.

As far as I know these should be freely reassociatable just like
the floating point MAXC/MINC instructions.

The *reduce* test changes are largely regressions and caused by
the "generic" CPU we default to not having a scheduler model.

The machine-combiner-int-vec.ll test shows the positive benefits
of this change.

Differential Revision: https://reviews.llvm.org/D62787

llvm-svn: 362629
---
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  84 +++++
 .../CodeGen/X86/horizontal-reduce-smax.ll     | 104 +++---
 .../CodeGen/X86/horizontal-reduce-smin.ll     | 104 +++---
 .../CodeGen/X86/horizontal-reduce-umax.ll     | 120 +++----
 .../CodeGen/X86/horizontal-reduce-umin.ll     | 100 +++---
 .../CodeGen/X86/machine-combiner-int-vec.ll   | 320 +++++++++---------
 .../CodeGen/X86/vector-reduce-smax-widen.ll   | 148 ++++----
 llvm/test/CodeGen/X86/vector-reduce-smax.ll   | 148 ++++----
 .../CodeGen/X86/vector-reduce-smin-widen.ll   | 148 ++++----
 llvm/test/CodeGen/X86/vector-reduce-smin.ll   | 148 ++++----
 .../CodeGen/X86/vector-reduce-umax-widen.ll   | 166 ++++-----
 llvm/test/CodeGen/X86/vector-reduce-umax.ll   | 166 ++++-----
 .../CodeGen/X86/vector-reduce-umin-widen.ll   | 146 ++++----
 llvm/test/CodeGen/X86/vector-reduce-umin.ll   | 146 ++++----
 14 files changed, 1066 insertions(+), 982 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2beb513d1a950..53b23c75737ee 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6922,6 +6922,18 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::PADDQrr:
   case X86::PMULLWrr:
   case X86::PMULLDrr:
+  case X86::PMAXSBrr:
+  case X86::PMAXSDrr:
+  case X86::PMAXSWrr:
+  case X86::PMAXUBrr:
+  case X86::PMAXUDrr:
+  case X86::PMAXUWrr:
+  case X86::PMINSBrr:
+  case X86::PMINSDrr:
+  case X86::PMINSWrr:
+  case X86::PMINUBrr:
+  case X86::PMINUDrr:
+  case X86::PMINUWrr:
   case X86::VPANDrr:
   case X86::VPANDYrr:
   case X86::VPANDDZ128rr:
@@ -7025,6 +7037,78 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   case X86::VPMULLQZ128rr:
   case X86::VPMULLQZ256rr:
   case X86::VPMULLQZrr:
+  case X86::VPMAXSBrr:
+  case X86::VPMAXSBYrr:
+  case X86::VPMAXSBZ128rr:
+  case X86::VPMAXSBZ256rr:
+  case X86::VPMAXSBZrr:
+  case X86::VPMAXSDrr:
+  case X86::VPMAXSDYrr:
+  case X86::VPMAXSDZ128rr:
+  case X86::VPMAXSDZ256rr:
+  case X86::VPMAXSDZrr:
+  case X86::VPMAXSQZ128rr:
+  case X86::VPMAXSQZ256rr:
+  case X86::VPMAXSQZrr:
+  case X86::VPMAXSWrr:
+  case X86::VPMAXSWYrr:
+  case X86::VPMAXSWZ128rr:
+  case X86::VPMAXSWZ256rr:
+  case X86::VPMAXSWZrr:
+  case X86::VPMAXUBrr:
+  case X86::VPMAXUBYrr:
+  case X86::VPMAXUBZ128rr:
+  case X86::VPMAXUBZ256rr:
+  case X86::VPMAXUBZrr:
+  case X86::VPMAXUDrr:
+  case X86::VPMAXUDYrr:
+  case X86::VPMAXUDZ128rr:
+  case X86::VPMAXUDZ256rr:
+  case X86::VPMAXUDZrr:
+  case X86::VPMAXUQZ128rr:
+  case X86::VPMAXUQZ256rr:
+  case X86::VPMAXUQZrr:
+  case X86::VPMAXUWrr:
+  case X86::VPMAXUWYrr:
+  case X86::VPMAXUWZ128rr:
+  case X86::VPMAXUWZ256rr:
+  case X86::VPMAXUWZrr:
+  case X86::VPMINSBrr:
+  case X86::VPMINSBYrr:
+  case X86::VPMINSBZ128rr:
+  case X86::VPMINSBZ256rr:
+  case X86::VPMINSBZrr:
+  case X86::VPMINSDrr:
+  case X86::VPMINSDYrr:
+  case X86::VPMINSDZ128rr:
+  case X86::VPMINSDZ256rr:
+  case X86::VPMINSDZrr:
+  case X86::VPMINSQZ128rr:
+  case X86::VPMINSQZ256rr:
+  case X86::VPMINSQZrr:
+  case X86::VPMINSWrr:
+  case X86::VPMINSWYrr:
+  case X86::VPMINSWZ128rr:
+  case X86::VPMINSWZ256rr:
+  case X86::VPMINSWZrr:
+  case X86::VPMINUBrr:
+  case X86::VPMINUBYrr:
+  case X86::VPMINUBZ128rr:
+  case X86::VPMINUBZ256rr:
+  case X86::VPMINUBZrr:
+  case X86::VPMINUDrr:
+  case X86::VPMINUDYrr:
+  case X86::VPMINUDZ128rr:
+  case X86::VPMINUDZ256rr:
+  case X86::VPMINUDZrr:
+  case X86::VPMINUQZ128rr:
+  case X86::VPMINUQZ256rr:
+  case X86::VPMINUQZrr:
+  case X86::VPMINUWrr:
+  case X86::VPMINUWYrr:
+  case X86::VPMINUWZ128rr:
+  case X86::VPMINUWZ256rr:
+  case X86::VPMINUWZrr:
   // Normal min/max instructions are not commutative because of NaN and signed
   // zero semantics, but these are. Thus, there's no need to check for global
   // relaxed math; the instructions themselves have the properties we need.
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
index f5328cf151d1c..161095377cd03 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
@@ -1331,13 +1331,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v16i32:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxsd %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxsd %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE42-NEXT:    pmaxsd %xmm2, %xmm1
 ; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
-; X86-SSE42-NEXT:    movd %xmm0, %eax
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X86-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
+; X86-SSE42-NEXT:    movd %xmm1, %eax
 ; X86-SSE42-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: test_reduce_v16i32:
@@ -1345,8 +1345,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X86-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1403,13 +1403,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v16i32:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxsd %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxsd %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE42-NEXT:    pmaxsd %xmm2, %xmm1
 ; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE42-NEXT:    pmaxsd %xmm1, %xmm0
-; X64-SSE42-NEXT:    movd %xmm0, %eax
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X64-SSE42-NEXT:    pmaxsd %xmm0, %xmm1
+; X64-SSE42-NEXT:    movd %xmm1, %eax
 ; X64-SSE42-NEXT:    retq
 ;
 ; X64-AVX1-LABEL: test_reduce_v16i32:
@@ -1417,8 +1417,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X64-AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1473,26 +1473,26 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE2-LABEL: test_reduce_v32i16:
 ; X86-SSE2:       ## %bb.0:
 ; X86-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; X86-SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT:    pmaxsw %xmm2, %xmm1
 ; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrld $16, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X86-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; X86-SSE2-NEXT:    movd %xmm1, %eax
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:    psrld $16, %xmm0
+; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; X86-SSE2-NEXT:    movd %xmm0, %eax
 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-SSE42-LABEL: test_reduce_v32i16:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxsw %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxsw %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxsw %xmm1, %xmm0
-; X86-SSE42-NEXT:    pxor LCPI10_0, %xmm0
-; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT:    pmaxsw %xmm2, %xmm1
+; X86-SSE42-NEXT:    pmaxsw %xmm0, %xmm1
+; X86-SSE42-NEXT:    pxor LCPI10_0, %xmm1
+; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X86-SSE42-NEXT:    movd %xmm0, %eax
 ; X86-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1503,8 +1503,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpxor LCPI10_0, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1529,26 +1529,26 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE2-LABEL: test_reduce_v32i16:
 ; X64-SSE2:       ## %bb.0:
 ; X64-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; X64-SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT:    pmaxsw %xmm2, %xmm1
 ; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE2-NEXT:    psrld $16, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X64-SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; X64-SSE2-NEXT:    movd %xmm1, %eax
+; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X64-SSE2-NEXT:    psrld $16, %xmm0
+; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; X64-SSE2-NEXT:    movd %xmm0, %eax
 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-SSE42-LABEL: test_reduce_v32i16:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxsw %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxsw %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxsw %xmm1, %xmm0
-; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
-; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT:    pmaxsw %xmm2, %xmm1
+; X64-SSE42-NEXT:    pmaxsw %xmm0, %xmm1
+; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X64-SSE42-NEXT:    movd %xmm0, %eax
 ; X64-SSE42-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1559,8 +1559,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1665,13 +1665,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v64i8:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxsb %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxsb %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxsb %xmm1, %xmm0
-; X86-SSE42-NEXT:    pxor LCPI11_0, %xmm0
-; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT:    psrlw $8, %xmm1
-; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X86-SSE42-NEXT:    pmaxsb %xmm2, %xmm1
+; X86-SSE42-NEXT:    pmaxsb %xmm0, %xmm1
+; X86-SSE42-NEXT:    pxor LCPI11_0, %xmm1
+; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT:    psrlw $8, %xmm0
+; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X86-SSE42-NEXT:    xorb $127, %al
 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1682,8 +1682,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpxor LCPI11_0, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1759,13 +1759,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v64i8:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxsb %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxsb %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxsb %xmm1, %xmm0
-; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
-; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT:    psrlw $8, %xmm1
-; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X64-SSE42-NEXT:    pmaxsb %xmm2, %xmm1
+; X64-SSE42-NEXT:    pmaxsb %xmm0, %xmm1
+; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT:    psrlw $8, %xmm0
+; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X64-SSE42-NEXT:    xorb $127, %al
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1776,8 +1776,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
index 212467078d257..af3c39e546cd1 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
@@ -1335,13 +1335,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v16i32:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminsd %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminsd %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE42-NEXT:    pminsd %xmm2, %xmm1
 ; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
-; X86-SSE42-NEXT:    movd %xmm0, %eax
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
+; X86-SSE42-NEXT:    movd %xmm1, %eax
 ; X86-SSE42-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: test_reduce_v16i32:
@@ -1349,8 +1349,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1407,13 +1407,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v16i32:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminsd %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminsd %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE42-NEXT:    pminsd %xmm2, %xmm1
 ; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
-; X64-SSE42-NEXT:    movd %xmm0, %eax
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
+; X64-SSE42-NEXT:    movd %xmm1, %eax
 ; X64-SSE42-NEXT:    retq
 ;
 ; X64-AVX1-LABEL: test_reduce_v16i32:
@@ -1421,8 +1421,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1477,26 +1477,26 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE2-LABEL: test_reduce_v32i16:
 ; X86-SSE2:       ## %bb.0:
 ; X86-SSE2-NEXT:    pminsw %xmm3, %xmm1
-; X86-SSE2-NEXT:    pminsw %xmm2, %xmm0
-; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT:    pminsw %xmm2, %xmm1
 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrld $16, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
-; X86-SSE2-NEXT:    movd %xmm1, %eax
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT:    psrld $16, %xmm0
+; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
+; X86-SSE2-NEXT:    movd %xmm0, %eax
 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-SSE42-LABEL: test_reduce_v32i16:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminsw %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminsw %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminsw %xmm1, %xmm0
-; X86-SSE42-NEXT:    pxor LCPI10_0, %xmm0
-; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT:    pminsw %xmm2, %xmm1
+; X86-SSE42-NEXT:    pminsw %xmm0, %xmm1
+; X86-SSE42-NEXT:    pxor LCPI10_0, %xmm1
+; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X86-SSE42-NEXT:    movd %xmm0, %eax
 ; X86-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1507,8 +1507,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpxor LCPI10_0, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1533,26 +1533,26 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE2-LABEL: test_reduce_v32i16:
 ; X64-SSE2:       ## %bb.0:
 ; X64-SSE2-NEXT:    pminsw %xmm3, %xmm1
-; X64-SSE2-NEXT:    pminsw %xmm2, %xmm0
-; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT:    pminsw %xmm2, %xmm1
 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
-; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE2-NEXT:    psrld $16, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
-; X64-SSE2-NEXT:    movd %xmm1, %eax
+; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X64-SSE2-NEXT:    psrld $16, %xmm0
+; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
+; X64-SSE2-NEXT:    movd %xmm0, %eax
 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-SSE42-LABEL: test_reduce_v32i16:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminsw %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminsw %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminsw %xmm1, %xmm0
-; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
-; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT:    pminsw %xmm2, %xmm1
+; X64-SSE42-NEXT:    pminsw %xmm0, %xmm1
+; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X64-SSE42-NEXT:    movd %xmm0, %eax
 ; X64-SSE42-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1563,8 +1563,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1669,13 +1669,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v64i8:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminsb %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminsb %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminsb %xmm1, %xmm0
-; X86-SSE42-NEXT:    pxor LCPI11_0, %xmm0
-; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT:    psrlw $8, %xmm1
-; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X86-SSE42-NEXT:    pminsb %xmm2, %xmm1
+; X86-SSE42-NEXT:    pminsb %xmm0, %xmm1
+; X86-SSE42-NEXT:    pxor LCPI11_0, %xmm1
+; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT:    psrlw $8, %xmm0
+; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X86-SSE42-NEXT:    xorb $-128, %al
 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1686,8 +1686,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpxor LCPI11_0, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1763,13 +1763,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v64i8:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminsb %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminsb %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminsb %xmm1, %xmm0
-; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm0
-; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT:    psrlw $8, %xmm1
-; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X64-SSE42-NEXT:    pminsb %xmm2, %xmm1
+; X64-SSE42-NEXT:    pminsb %xmm0, %xmm1
+; X64-SSE42-NEXT:    pxor {{.*}}(%rip), %xmm1
+; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT:    psrlw $8, %xmm0
+; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X64-SSE42-NEXT:    xorb $-128, %al
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1780,8 +1780,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index 076f4d7d1d268..498d1bee2da5c 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -1524,13 +1524,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v16i32:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxud %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxud %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE42-NEXT:    pmaxud %xmm2, %xmm1
 ; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE42-NEXT:    pmaxud %xmm1, %xmm0
-; X86-SSE42-NEXT:    movd %xmm0, %eax
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X86-SSE42-NEXT:    pmaxud %xmm0, %xmm1
+; X86-SSE42-NEXT:    movd %xmm1, %eax
 ; X86-SSE42-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: test_reduce_v16i32:
@@ -1538,8 +1538,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X86-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1611,13 +1611,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v16i32:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxud %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxud %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE42-NEXT:    pmaxud %xmm2, %xmm1
 ; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE42-NEXT:    pmaxud %xmm1, %xmm0
-; X64-SSE42-NEXT:    movd %xmm0, %eax
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X64-SSE42-NEXT:    pmaxud %xmm0, %xmm1
+; X64-SSE42-NEXT:    movd %xmm1, %eax
 ; X64-SSE42-NEXT:    retq
 ;
 ; X64-AVX1-LABEL: test_reduce_v16i32:
@@ -1625,8 +1625,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X64-AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1685,9 +1685,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X86-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
+; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm2
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
 ; X86-SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; X86-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1711,11 +1711,11 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v32i16:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxuw %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxuw %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxuw %xmm1, %xmm0
-; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
-; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X86-SSE42-NEXT:    pmaxuw %xmm2, %xmm1
+; X86-SSE42-NEXT:    pmaxuw %xmm0, %xmm1
+; X86-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X86-SSE42-NEXT:    movd %xmm0, %eax
 ; X86-SSE42-NEXT:    notl %eax
 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1726,8 +1726,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1758,9 +1758,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X64-SSE2-NEXT:    pmaxsw %xmm3, %xmm1
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
+; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm2
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
 ; X64-SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; X64-SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1784,11 +1784,11 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v32i16:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxuw %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxuw %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxuw %xmm1, %xmm0
-; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
-; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X64-SSE42-NEXT:    pmaxuw %xmm2, %xmm1
+; X64-SSE42-NEXT:    pmaxuw %xmm0, %xmm1
+; X64-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X64-SSE42-NEXT:    movd %xmm0, %eax
 ; X64-SSE42-NEXT:    notl %eax
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1799,8 +1799,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1860,33 +1860,33 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-SSE2-LABEL: test_reduce_v64i8:
 ; X86-SSE2:       ## %bb.0:
 ; X86-SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; X86-SSE2-NEXT:    pmaxub %xmm2, %xmm0
-; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT:    pmaxub %xmm2, %xmm1
 ; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrld $16, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; X86-SSE2-NEXT:    psrlw $8, %xmm0
+; X86-SSE2-NEXT:    psrld $16, %xmm0
 ; X86-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrlw $8, %xmm1
+; X86-SSE2-NEXT:    pmaxub %xmm0, %xmm1
+; X86-SSE2-NEXT:    movd %xmm1, %eax
 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-SSE42-LABEL: test_reduce_v64i8:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pmaxub %xmm3, %xmm1
-; X86-SSE42-NEXT:    pmaxub %xmm2, %xmm0
-; X86-SSE42-NEXT:    pmaxub %xmm1, %xmm0
-; X86-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
-; X86-SSE42-NEXT:    pxor %xmm0, %xmm1
-; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
-; X86-SSE42-NEXT:    psrlw $8, %xmm0
-; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
-; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT:    pmaxub %xmm2, %xmm1
+; X86-SSE42-NEXT:    pmaxub %xmm0, %xmm1
+; X86-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE42-NEXT:    psrlw $8, %xmm1
+; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
+; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X86-SSE42-NEXT:    notb %al
 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1897,8 +1897,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
@@ -1929,33 +1929,33 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-SSE2-LABEL: test_reduce_v64i8:
 ; X64-SSE2:       ## %bb.0:
 ; X64-SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; X64-SSE2-NEXT:    pmaxub %xmm2, %xmm0
-; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT:    pmaxub %xmm2, %xmm1
 ; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE2-NEXT:    psrld $16, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; X64-SSE2-NEXT:    psrlw $8, %xmm0
+; X64-SSE2-NEXT:    psrld $16, %xmm0
 ; X64-SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; X64-SSE2-NEXT:    movd %xmm0, %eax
+; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT:    psrlw $8, %xmm1
+; X64-SSE2-NEXT:    pmaxub %xmm0, %xmm1
+; X64-SSE2-NEXT:    movd %xmm1, %eax
 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-SSE42-LABEL: test_reduce_v64i8:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pmaxub %xmm3, %xmm1
-; X64-SSE42-NEXT:    pmaxub %xmm2, %xmm0
-; X64-SSE42-NEXT:    pmaxub %xmm1, %xmm0
-; X64-SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
-; X64-SSE42-NEXT:    pxor %xmm0, %xmm1
-; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
-; X64-SSE42-NEXT:    psrlw $8, %xmm0
-; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
-; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT:    pmaxub %xmm2, %xmm1
+; X64-SSE42-NEXT:    pmaxub %xmm0, %xmm1
+; X64-SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
+; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE42-NEXT:    psrlw $8, %xmm1
+; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
+; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X64-SSE42-NEXT:    notb %al
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
@@ -1966,8 +1966,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 5678671042220..e0b71051a0eeb 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -1428,13 +1428,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v16i32:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminud %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminud %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE42-NEXT:    pminud %xmm2, %xmm1
 ; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
-; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
-; X86-SSE42-NEXT:    movd %xmm0, %eax
+; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
+; X86-SSE42-NEXT:    movd %xmm1, %eax
 ; X86-SSE42-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: test_reduce_v16i32:
@@ -1442,8 +1442,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1515,13 +1515,13 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v16i32:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminud %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminud %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE42-NEXT:    pminud %xmm2, %xmm1
 ; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
-; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
-; X64-SSE42-NEXT:    movd %xmm0, %eax
+; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
+; X64-SSE42-NEXT:    movd %xmm1, %eax
 ; X64-SSE42-NEXT:    retq
 ;
 ; X64-AVX1-LABEL: test_reduce_v16i32:
@@ -1529,8 +1529,8 @@ define i32 @test_reduce_v16i32(<16 x i32> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1589,9 +1589,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X86-SSE2-NEXT:    pminsw %xmm3, %xmm1
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
+; X86-SSE2-NEXT:    pminsw %xmm1, %xmm2
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
 ; X86-SSE2-NEXT:    pminsw %xmm2, %xmm0
-; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1615,9 +1615,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-SSE42-LABEL: test_reduce_v32i16:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminuw %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminuw %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminuw %xmm1, %xmm0
-; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X86-SSE42-NEXT:    pminuw %xmm2, %xmm1
+; X86-SSE42-NEXT:    pminuw %xmm0, %xmm1
+; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X86-SSE42-NEXT:    movd %xmm0, %eax
 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-SSE42-NEXT:    retl
@@ -1627,8 +1627,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1653,9 +1653,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X64-SSE2-NEXT:    pminsw %xmm3, %xmm1
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
+; X64-SSE2-NEXT:    pminsw %xmm1, %xmm2
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
 ; X64-SSE2-NEXT:    pminsw %xmm2, %xmm0
-; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1679,9 +1679,9 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-SSE42-LABEL: test_reduce_v32i16:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminuw %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminuw %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminuw %xmm1, %xmm0
-; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
+; X64-SSE42-NEXT:    pminuw %xmm2, %xmm1
+; X64-SSE42-NEXT:    pminuw %xmm0, %xmm1
+; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
 ; X64-SSE42-NEXT:    movd %xmm0, %eax
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-SSE42-NEXT:    retq
@@ -1691,8 +1691,8 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
@@ -1744,31 +1744,31 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-SSE2-LABEL: test_reduce_v64i8:
 ; X86-SSE2:       ## %bb.0:
 ; X86-SSE2-NEXT:    pminub %xmm3, %xmm1
-; X86-SSE2-NEXT:    pminub %xmm2, %xmm0
-; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X86-SSE2-NEXT:    pminub %xmm2, %xmm1
 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrld $16, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; X86-SSE2-NEXT:    psrlw $8, %xmm0
+; X86-SSE2-NEXT:    psrld $16, %xmm0
 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrlw $8, %xmm1
+; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
+; X86-SSE2-NEXT:    movd %xmm1, %eax
 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-SSE42-LABEL: test_reduce_v64i8:
 ; X86-SSE42:       ## %bb.0:
 ; X86-SSE42-NEXT:    pminub %xmm3, %xmm1
-; X86-SSE42-NEXT:    pminub %xmm2, %xmm0
-; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
-; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE42-NEXT:    psrlw $8, %xmm1
+; X86-SSE42-NEXT:    pminub %xmm2, %xmm1
 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE42-NEXT:    psrlw $8, %xmm0
+; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-SSE42-NEXT:    retl
@@ -1778,8 +1778,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X86-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1804,31 +1804,31 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-SSE2-LABEL: test_reduce_v64i8:
 ; X64-SSE2:       ## %bb.0:
 ; X64-SSE2-NEXT:    pminub %xmm3, %xmm1
-; X64-SSE2-NEXT:    pminub %xmm2, %xmm0
-; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-SSE2-NEXT:    pminub %xmm2, %xmm1
 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE2-NEXT:    psrld $16, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; X64-SSE2-NEXT:    psrlw $8, %xmm0
+; X64-SSE2-NEXT:    psrld $16, %xmm0
 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
-; X64-SSE2-NEXT:    movd %xmm0, %eax
+; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X64-SSE2-NEXT:    psrlw $8, %xmm1
+; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
+; X64-SSE2-NEXT:    movd %xmm1, %eax
 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-SSE2-NEXT:    retq
 ;
 ; X64-SSE42-LABEL: test_reduce_v64i8:
 ; X64-SSE42:       ## %bb.0:
 ; X64-SSE42-NEXT:    pminub %xmm3, %xmm1
-; X64-SSE42-NEXT:    pminub %xmm2, %xmm0
-; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
-; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
-; X64-SSE42-NEXT:    psrlw $8, %xmm1
+; X64-SSE42-NEXT:    pminub %xmm2, %xmm1
 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
-; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
+; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
+; X64-SSE42-NEXT:    psrlw $8, %xmm0
+; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
+; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-SSE42-NEXT:    retq
@@ -1838,8 +1838,8 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; X64-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; X64-AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
index 52c4cd6ca9ab9..ff606a7df4101 100644
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -271,15 +271,15 @@ define <16 x i8> @reassociate_umax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
 ; SSE-LABEL: reassociate_umax_v16i8:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm2
 ; SSE-NEXT:    pmaxub %xmm2, %xmm0
-; SSE-NEXT:    pmaxub %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_umax_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxub %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxub %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxub %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i8> %x0, %x1
@@ -306,8 +306,8 @@ define <8 x i16> @reassociate_umax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
 ; AVX-LABEL: reassociate_umax_v8i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxuw %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxuw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxuw %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i16> %x0, %x1
@@ -344,8 +344,8 @@ define <4 x i32> @reassociate_umax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
 ; AVX-LABEL: reassociate_umax_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxud %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxud %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxud %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <4 x i32> %x0, %x1
@@ -409,8 +409,8 @@ define <2 x i64> @reassociate_umax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
 ; AVX512-LABEL: reassociate_umax_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpmaxuq %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    vpmaxuq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    vpmaxuq %xmm3, %xmm2, %xmm1
+; AVX512-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <2 x i64> %x0, %x1
@@ -440,8 +440,8 @@ define <16 x i8> @reassociate_smax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
 ; AVX-LABEL: reassociate_smax_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxsb %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxsb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxsb %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i8> %x0, %x1
@@ -456,15 +456,15 @@ define <8 x i16> @reassociate_smax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
 ; SSE-LABEL: reassociate_smax_v8i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm2
 ; SSE-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE-NEXT:    pmaxsw %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_smax_v8i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxsw %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxsw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxsw %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i16> %x0, %x1
@@ -494,8 +494,8 @@ define <4 x i32> @reassociate_smax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
 ; AVX-LABEL: reassociate_smax_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmaxsd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpmaxsd %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpmaxsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <4 x i32> %x0, %x1
@@ -554,8 +554,8 @@ define <2 x i64> @reassociate_smax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
 ; AVX512-LABEL: reassociate_smax_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpmaxsq %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    vpmaxsq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    vpmaxsq %xmm3, %xmm2, %xmm1
+; AVX512-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <2 x i64> %x0, %x1
@@ -570,15 +570,15 @@ define <16 x i8> @reassociate_umin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
 ; SSE-LABEL: reassociate_umin_v16i8:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddb %xmm1, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm2
 ; SSE-NEXT:    pminub %xmm2, %xmm0
-; SSE-NEXT:    pminub %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_umin_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminub %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminub %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminub %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i8> %x0, %x1
@@ -605,8 +605,8 @@ define <8 x i16> @reassociate_umin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
 ; AVX-LABEL: reassociate_umin_v8i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminuw %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminuw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminuw %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i16> %x0, %x1
@@ -642,8 +642,8 @@ define <4 x i32> @reassociate_umin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
 ; AVX-LABEL: reassociate_umin_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminud %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminud %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminud %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <4 x i32> %x0, %x1
@@ -707,8 +707,8 @@ define <2 x i64> @reassociate_umin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
 ; AVX512-LABEL: reassociate_umin_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpminuq %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    vpminuq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    vpminuq %xmm3, %xmm2, %xmm1
+; AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <2 x i64> %x0, %x1
@@ -738,8 +738,8 @@ define <16 x i8> @reassociate_smin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
 ; AVX-LABEL: reassociate_smin_v16i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminsb %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminsb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminsb %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i8> %x0, %x1
@@ -754,15 +754,15 @@ define <8 x i16> @reassociate_smin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
 ; SSE-LABEL: reassociate_smin_v8i16:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm2
 ; SSE-NEXT:    pminsw %xmm2, %xmm0
-; SSE-NEXT:    pminsw %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_smin_v8i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminsw %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminsw %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminsw %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i16> %x0, %x1
@@ -792,8 +792,8 @@ define <4 x i32> @reassociate_smin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
 ; AVX-LABEL: reassociate_smin_v4i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpminsd %xmm0, %xmm2, %xmm0
-; AVX-NEXT:    vpminsd %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpminsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 
   %t0 = add <4 x i32> %x0, %x1
@@ -852,8 +852,8 @@ define <2 x i64> @reassociate_smin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
 ; AVX512-LABEL: reassociate_smin_v2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vpminsq %xmm0, %xmm2, %xmm0
-; AVX512-NEXT:    vpminsq %xmm0, %xmm3, %xmm0
+; AVX512-NEXT:    vpminsq %xmm3, %xmm2, %xmm1
+; AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <2 x i64> %x0, %x1
@@ -871,17 +871,17 @@ define <32 x i8> @reassociate_umax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddb %xmm2, %xmm0
 ; SSE-NEXT:    paddb %xmm3, %xmm1
-; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm6, %xmm4
 ; SSE-NEXT:    pmaxub %xmm4, %xmm0
-; SSE-NEXT:    pmaxub %xmm6, %xmm0
-; SSE-NEXT:    pmaxub %xmm7, %xmm1
+; SSE-NEXT:    pmaxub %xmm7, %xmm5
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_umax_v32i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxub %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxub %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxub %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <32 x i8> %x0, %x1
@@ -915,8 +915,8 @@ define <16 x i16> @reassociate_umax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
 ; AVX-LABEL: reassociate_umax_v16i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxuw %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxuw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxuw %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i16> %x0, %x1
@@ -970,8 +970,8 @@ define <8 x i32> @reassociate_umax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
 ; AVX-LABEL: reassociate_umax_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxud %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxud %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxud %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i32> %x0, %x1
@@ -1066,8 +1066,8 @@ define <4 x i64> @reassociate_umax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
 ; AVX512-LABEL: reassociate_umax_v4i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpmaxuq %ymm0, %ymm2, %ymm0
-; AVX512-NEXT:    vpmaxuq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    vpmaxuq %ymm3, %ymm2, %ymm1
+; AVX512-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <4 x i64> %x0, %x1
@@ -1108,8 +1108,8 @@ define <32 x i8> @reassociate_smax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
 ; AVX-LABEL: reassociate_smax_v32i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxsb %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxsb %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxsb %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <32 x i8> %x0, %x1
@@ -1125,17 +1125,17 @@ define <16 x i16> @reassociate_smax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddw %xmm2, %xmm0
 ; SSE-NEXT:    paddw %xmm3, %xmm1
-; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm6, %xmm4
 ; SSE-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE-NEXT:    pmaxsw %xmm6, %xmm0
-; SSE-NEXT:    pmaxsw %xmm7, %xmm1
+; SSE-NEXT:    pmaxsw %xmm7, %xmm5
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_smax_v16i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxsw %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxsw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxsw %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i16> %x0, %x1
@@ -1176,8 +1176,8 @@ define <8 x i32> @reassociate_smax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
 ; AVX-LABEL: reassociate_smax_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpmaxsd %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpmaxsd %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpmaxsd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i32> %x0, %x1
@@ -1267,8 +1267,8 @@ define <4 x i64> @reassociate_smax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
 ; AVX512-LABEL: reassociate_smax_v4i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpmaxsq %ymm0, %ymm2, %ymm0
-; AVX512-NEXT:    vpmaxsq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    vpmaxsq %ymm3, %ymm2, %ymm1
+; AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <4 x i64> %x0, %x1
@@ -1284,17 +1284,17 @@ define <32 x i8> @reassociate_umin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddb %xmm2, %xmm0
 ; SSE-NEXT:    paddb %xmm3, %xmm1
-; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm6, %xmm4
 ; SSE-NEXT:    pminub %xmm4, %xmm0
-; SSE-NEXT:    pminub %xmm6, %xmm0
-; SSE-NEXT:    pminub %xmm7, %xmm1
+; SSE-NEXT:    pminub %xmm7, %xmm5
+; SSE-NEXT:    pminub %xmm5, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_umin_v32i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminub %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminub %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminub %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminub %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <32 x i8> %x0, %x1
@@ -1328,8 +1328,8 @@ define <16 x i16> @reassociate_umin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
 ; AVX-LABEL: reassociate_umin_v16i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminuw %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminuw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminuw %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i16> %x0, %x1
@@ -1382,8 +1382,8 @@ define <8 x i32> @reassociate_umin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
 ; AVX-LABEL: reassociate_umin_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminud %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminud %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminud %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i32> %x0, %x1
@@ -1478,8 +1478,8 @@ define <4 x i64> @reassociate_umin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
 ; AVX512-LABEL: reassociate_umin_v4i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpminuq %ymm0, %ymm2, %ymm0
-; AVX512-NEXT:    vpminuq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    vpminuq %ymm3, %ymm2, %ymm1
+; AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <4 x i64> %x0, %x1
@@ -1520,8 +1520,8 @@ define <32 x i8> @reassociate_smin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
 ; AVX-LABEL: reassociate_smin_v32i8:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminsb %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminsb %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminsb %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <32 x i8> %x0, %x1
@@ -1537,17 +1537,17 @@ define <16 x i16> @reassociate_smin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    paddw %xmm2, %xmm0
 ; SSE-NEXT:    paddw %xmm3, %xmm1
-; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm6, %xmm4
 ; SSE-NEXT:    pminsw %xmm4, %xmm0
-; SSE-NEXT:    pminsw %xmm6, %xmm0
-; SSE-NEXT:    pminsw %xmm7, %xmm1
+; SSE-NEXT:    pminsw %xmm7, %xmm5
+; SSE-NEXT:    pminsw %xmm5, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: reassociate_smin_v16i16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminsw %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminsw %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminsw %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <16 x i16> %x0, %x1
@@ -1588,8 +1588,8 @@ define <8 x i32> @reassociate_smin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
 ; AVX-LABEL: reassociate_smin_v8i32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vpminsd %ymm0, %ymm2, %ymm0
-; AVX-NEXT:    vpminsd %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vpminsd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
 
   %t0 = add <8 x i32> %x0, %x1
@@ -1679,8 +1679,8 @@ define <4 x i64> @reassociate_smin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
 ; AVX512-LABEL: reassociate_smin_v4i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vpminsq %ymm0, %ymm2, %ymm0
-; AVX512-NEXT:    vpminsq %ymm0, %ymm3, %ymm0
+; AVX512-NEXT:    vpminsq %ymm3, %ymm2, %ymm1
+; AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <4 x i64> %x0, %x1
@@ -1714,17 +1714,17 @@ define <64 x i8> @reassociate_umax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxub %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxub %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxub %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxub %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxub %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umax_v64i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxub %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxub %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxub %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
@@ -1781,17 +1781,17 @@ define <32 x i16> @reassociate_umax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxuw %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxuw %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxuw %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxuw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxuw %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umax_v32i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxuw %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxuw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxuw %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
@@ -1890,17 +1890,17 @@ define <16 x i32> @reassociate_umax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxud %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxud %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxud %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxud %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umax_v16i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxud %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxud %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxud %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <16 x i32> %x0, %x1
@@ -2074,8 +2074,8 @@ define <8 x i64> @reassociate_umax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
 ; AVX512-LABEL: reassociate_umax_v8i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxuq %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxuq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxuq %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <8 x i64> %x0, %x1
@@ -2147,17 +2147,17 @@ define <64 x i8> @reassociate_smax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsb %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxsb %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxsb %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxsb %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxsb %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smax_v64i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsb %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxsb %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxsb %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
@@ -2189,17 +2189,17 @@ define <32 x i16> @reassociate_smax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsw %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxsw %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxsw %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxsw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxsw %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smax_v32i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsw %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxsw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxsw %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
@@ -2271,17 +2271,17 @@ define <16 x i32> @reassociate_smax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpmaxsd %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpmaxsd %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpmaxsd %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpmaxsd %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smax_v16i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsd %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxsd %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxsd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <16 x i32> %x0, %x1
@@ -2446,8 +2446,8 @@ define <8 x i64> @reassociate_smax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
 ; AVX512-LABEL: reassociate_smax_v8i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpmaxsq %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpmaxsq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpmaxsq %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <8 x i64> %x0, %x1
@@ -2479,17 +2479,17 @@ define <64 x i8> @reassociate_umin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminub %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminub %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminub %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminub %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminub %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminub %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umin_v64i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminub %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminub %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminub %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminub %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
@@ -2546,17 +2546,17 @@ define <32 x i16> @reassociate_umin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminuw %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminuw %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminuw %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminuw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminuw %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminuw %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umin_v32i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminuw %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminuw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminuw %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
@@ -2652,17 +2652,17 @@ define <16 x i32> @reassociate_umin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminud %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminud %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminud %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminud %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminud %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminud %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_umin_v16i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminud %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminud %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminud %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <16 x i32> %x0, %x1
@@ -2836,8 +2836,8 @@ define <8 x i64> @reassociate_umin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
 ; AVX512-LABEL: reassociate_umin_v8i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminuq %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminuq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminuq %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <8 x i64> %x0, %x1
@@ -2909,17 +2909,17 @@ define <64 x i8> @reassociate_smin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsb %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminsb %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminsb %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminsb %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminsb %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminsb %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smin_v64i8:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminsb %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminsb %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminsb %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <64 x i8> %x0, %x1
@@ -2951,17 +2951,17 @@ define <32 x i16> @reassociate_smin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsw %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminsw %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminsw %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminsw %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminsw %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminsw %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smin_v32i16:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminsw %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminsw %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminsw %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <32 x i16> %x0, %x1
@@ -3033,17 +3033,17 @@ define <16 x i32> @reassociate_smin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsd %ymm1, %ymm5, %ymm1
-; AVX2-NEXT:    vpminsd %ymm0, %ymm4, %ymm0
-; AVX2-NEXT:    vpminsd %ymm0, %ymm6, %ymm0
-; AVX2-NEXT:    vpminsd %ymm1, %ymm7, %ymm1
+; AVX2-NEXT:    vpminsd %ymm6, %ymm4, %ymm2
+; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm7, %ymm5, %ymm2
+; AVX2-NEXT:    vpminsd %ymm2, %ymm1, %ymm1
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: reassociate_smin_v16i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminsd %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminsd %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminsd %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <16 x i32> %x0, %x1
@@ -3208,8 +3208,8 @@ define <8 x i64> @reassociate_smin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
 ; AVX512-LABEL: reassociate_smin_v8i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT:    vpminsq %zmm0, %zmm2, %zmm0
-; AVX512-NEXT:    vpminsq %zmm0, %zmm3, %zmm0
+; AVX512-NEXT:    vpminsq %zmm3, %zmm2, %zmm1
+; AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
 
   %t0 = add <8 x i64> %x0, %x1
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
index 61cc52d8e2eab..1c18643b12500 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax-widen.ll
@@ -884,13 +884,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm1
 ; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -898,8 +898,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -994,14 +994,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsd %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsd %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
 ; SSE41-NEXT:    pmaxsd %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsd %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsd %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsd %xmm1, %xmm3
+; SSE41-NEXT:    pmaxsd %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsd %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1010,16 +1010,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1031,7 +1031,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
@@ -1241,26 +1241,26 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-LABEL: test_v32i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxsw %xmm2, %xmm1
+; SSE41-NEXT:    pmaxsw %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32767, %eax # imm = 0x7FFF
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1271,8 +1271,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1315,14 +1315,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-LABEL: test_v64i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxsw %xmm6, %xmm2
-; SSE2-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pmaxsw %xmm7, %xmm3
-; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm5, %xmm3
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm3
+; SSE2-NEXT:    pmaxsw %xmm4, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm3, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -1335,14 +1335,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsw %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE41-NEXT:    pmaxsw %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsw %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxsw %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxsw %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsw %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsw %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32767, %eax # imm = 0x7FFF
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1354,13 +1354,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxsw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1372,7 +1372,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
@@ -1789,13 +1789,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm2, %xmm1
+; SSE41-NEXT:    pmaxsb %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $127, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -1806,8 +1806,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1923,17 +1923,17 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsb %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsb %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE41-NEXT:    pmaxsb %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsb %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxsb %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsb %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsb %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsb %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $127, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -1945,13 +1945,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxsb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsb %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1965,7 +1965,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
index 8e21e169b8d4c..f81998496c95c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll
@@ -958,13 +958,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm1
 ; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -972,8 +972,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1068,14 +1068,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsd %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsd %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
 ; SSE41-NEXT:    pmaxsd %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsd %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsd %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsd %xmm1, %xmm3
+; SSE41-NEXT:    pmaxsd %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsd %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmaxsd %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1084,16 +1084,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsd %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1105,7 +1105,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
@@ -1461,26 +1461,26 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-LABEL: test_v32i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxsw %xmm2, %xmm1
+; SSE41-NEXT:    pmaxsw %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32767, %eax # imm = 0x7FFF
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1491,8 +1491,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1535,14 +1535,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-LABEL: test_v64i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxsw %xmm6, %xmm2
-; SSE2-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pmaxsw %xmm7, %xmm3
-; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE2-NEXT:    pmaxsw %xmm5, %xmm3
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm3
+; SSE2-NEXT:    pmaxsw %xmm4, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm3, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -1555,14 +1555,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsw %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE41-NEXT:    pmaxsw %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsw %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxsw %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxsw %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsw %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsw %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32767, %eax # imm = 0x7FFF
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1574,13 +1574,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxsw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1592,7 +1592,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
@@ -2139,13 +2139,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
-; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm2, %xmm1
+; SSE41-NEXT:    pmaxsb %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $127, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2156,8 +2156,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -2273,17 +2273,17 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxsb %xmm7, %xmm3
-; SSE41-NEXT:    pmaxsb %xmm5, %xmm1
-; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE41-NEXT:    pmaxsb %xmm5, %xmm3
+; SSE41-NEXT:    pmaxsb %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxsb %xmm6, %xmm2
-; SSE41-NEXT:    pmaxsb %xmm4, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
-; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm4, %xmm2
+; SSE41-NEXT:    pmaxsb %xmm3, %xmm2
+; SSE41-NEXT:    pmaxsb %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $127, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2295,13 +2295,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxsb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxsb %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -2315,7 +2315,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
index 8359d7d5fbbc3..0a6ada8dd3d3d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin-widen.ll
@@ -883,13 +883,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsd %xmm3, %xmm1
-; SSE41-NEXT:    pminsd %xmm2, %xmm0
-; SSE41-NEXT:    pminsd %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pminsd %xmm2, %xmm1
 ; SSE41-NEXT:    pminsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pminsd %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pminsd %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -897,8 +897,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -993,14 +993,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsd %xmm6, %xmm2
-; SSE41-NEXT:    pminsd %xmm4, %xmm0
-; SSE41-NEXT:    pminsd %xmm2, %xmm0
 ; SSE41-NEXT:    pminsd %xmm7, %xmm3
-; SSE41-NEXT:    pminsd %xmm5, %xmm1
-; SSE41-NEXT:    pminsd %xmm3, %xmm1
-; SSE41-NEXT:    pminsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pminsd %xmm1, %xmm0
+; SSE41-NEXT:    pminsd %xmm5, %xmm3
+; SSE41-NEXT:    pminsd %xmm1, %xmm3
+; SSE41-NEXT:    pminsd %xmm4, %xmm2
+; SSE41-NEXT:    pminsd %xmm3, %xmm2
+; SSE41-NEXT:    pminsd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pminsd %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pminsd %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1009,16 +1009,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpminsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpminsd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1030,7 +1030,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
@@ -1240,26 +1240,26 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-LABEL: test_v32i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminsw %xmm3, %xmm1
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pminsw %xmm2, %xmm1
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pminsw %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pminsw %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsw %xmm3, %xmm1
-; SSE41-NEXT:    pminsw %xmm2, %xmm0
-; SSE41-NEXT:    pminsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminsw %xmm2, %xmm1
+; SSE41-NEXT:    pminsw %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32768, %eax # imm = 0x8000
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1270,8 +1270,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1314,14 +1314,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-LABEL: test_v64i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminsw %xmm6, %xmm2
-; SSE2-NEXT:    pminsw %xmm4, %xmm0
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pminsw %xmm7, %xmm3
-; SSE2-NEXT:    pminsw %xmm5, %xmm1
-; SSE2-NEXT:    pminsw %xmm3, %xmm1
-; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
+; SSE2-NEXT:    pminsw %xmm5, %xmm3
+; SSE2-NEXT:    pminsw %xmm1, %xmm3
+; SSE2-NEXT:    pminsw %xmm4, %xmm2
+; SSE2-NEXT:    pminsw %xmm3, %xmm2
+; SSE2-NEXT:    pminsw %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -1334,14 +1334,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsw %xmm7, %xmm3
-; SSE41-NEXT:    pminsw %xmm5, %xmm1
-; SSE41-NEXT:    pminsw %xmm3, %xmm1
+; SSE41-NEXT:    pminsw %xmm5, %xmm3
+; SSE41-NEXT:    pminsw %xmm1, %xmm3
 ; SSE41-NEXT:    pminsw %xmm6, %xmm2
-; SSE41-NEXT:    pminsw %xmm4, %xmm0
-; SSE41-NEXT:    pminsw %xmm2, %xmm0
-; SSE41-NEXT:    pminsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminsw %xmm4, %xmm2
+; SSE41-NEXT:    pminsw %xmm3, %xmm2
+; SSE41-NEXT:    pminsw %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32768, %eax # imm = 0x8000
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1353,13 +1353,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminsw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminsw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminsw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1371,7 +1371,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
@@ -1788,13 +1788,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsb %xmm3, %xmm1
-; SSE41-NEXT:    pminsb %xmm2, %xmm0
-; SSE41-NEXT:    pminsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pminsb %xmm2, %xmm1
+; SSE41-NEXT:    pminsb %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $-128, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -1805,8 +1805,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1922,17 +1922,17 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsb %xmm7, %xmm3
-; SSE41-NEXT:    pminsb %xmm5, %xmm1
-; SSE41-NEXT:    pminsb %xmm3, %xmm1
+; SSE41-NEXT:    pminsb %xmm5, %xmm3
+; SSE41-NEXT:    pminsb %xmm1, %xmm3
 ; SSE41-NEXT:    pminsb %xmm6, %xmm2
-; SSE41-NEXT:    pminsb %xmm4, %xmm0
-; SSE41-NEXT:    pminsb %xmm2, %xmm0
-; SSE41-NEXT:    pminsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pminsb %xmm4, %xmm2
+; SSE41-NEXT:    pminsb %xmm3, %xmm2
+; SSE41-NEXT:    pminsb %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $-128, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -1944,13 +1944,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminsb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminsb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminsb %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsb %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -1964,7 +1964,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
index 0661bf5b2d361..4f2d15854e360 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll
@@ -957,13 +957,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsd %xmm3, %xmm1
-; SSE41-NEXT:    pminsd %xmm2, %xmm0
-; SSE41-NEXT:    pminsd %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pminsd %xmm2, %xmm1
 ; SSE41-NEXT:    pminsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pminsd %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pminsd %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -971,8 +971,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1067,14 +1067,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsd %xmm6, %xmm2
-; SSE41-NEXT:    pminsd %xmm4, %xmm0
-; SSE41-NEXT:    pminsd %xmm2, %xmm0
 ; SSE41-NEXT:    pminsd %xmm7, %xmm3
-; SSE41-NEXT:    pminsd %xmm5, %xmm1
-; SSE41-NEXT:    pminsd %xmm3, %xmm1
-; SSE41-NEXT:    pminsd %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pminsd %xmm1, %xmm0
+; SSE41-NEXT:    pminsd %xmm5, %xmm3
+; SSE41-NEXT:    pminsd %xmm1, %xmm3
+; SSE41-NEXT:    pminsd %xmm4, %xmm2
+; SSE41-NEXT:    pminsd %xmm3, %xmm2
+; SSE41-NEXT:    pminsd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pminsd %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pminsd %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1083,16 +1083,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpminsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpminsd %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsd %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1104,7 +1104,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
@@ -1460,26 +1460,26 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-LABEL: test_v32i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminsw %xmm3, %xmm1
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pminsw %xmm2, %xmm1
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pminsw %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %eax
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    pminsw %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsw %xmm3, %xmm1
-; SSE41-NEXT:    pminsw %xmm2, %xmm0
-; SSE41-NEXT:    pminsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminsw %xmm2, %xmm1
+; SSE41-NEXT:    pminsw %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32768, %eax # imm = 0x8000
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1490,8 +1490,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1534,14 +1534,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-LABEL: test_v64i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminsw %xmm6, %xmm2
-; SSE2-NEXT:    pminsw %xmm4, %xmm0
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pminsw %xmm7, %xmm3
-; SSE2-NEXT:    pminsw %xmm5, %xmm1
-; SSE2-NEXT:    pminsw %xmm3, %xmm1
-; SSE2-NEXT:    pminsw %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
+; SSE2-NEXT:    pminsw %xmm5, %xmm3
+; SSE2-NEXT:    pminsw %xmm1, %xmm3
+; SSE2-NEXT:    pminsw %xmm4, %xmm2
+; SSE2-NEXT:    pminsw %xmm3, %xmm2
+; SSE2-NEXT:    pminsw %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -1554,14 +1554,14 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsw %xmm7, %xmm3
-; SSE41-NEXT:    pminsw %xmm5, %xmm1
-; SSE41-NEXT:    pminsw %xmm3, %xmm1
+; SSE41-NEXT:    pminsw %xmm5, %xmm3
+; SSE41-NEXT:    pminsw %xmm1, %xmm3
 ; SSE41-NEXT:    pminsw %xmm6, %xmm2
-; SSE41-NEXT:    pminsw %xmm4, %xmm0
-; SSE41-NEXT:    pminsw %xmm2, %xmm0
-; SSE41-NEXT:    pminsw %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminsw %xmm4, %xmm2
+; SSE41-NEXT:    pminsw %xmm3, %xmm2
+; SSE41-NEXT:    pminsw %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    xorl $32768, %eax # imm = 0x8000
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1573,13 +1573,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminsw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminsw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminsw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -1591,7 +1591,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
@@ -2138,13 +2138,13 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsb %xmm3, %xmm1
-; SSE41-NEXT:    pminsb %xmm2, %xmm0
-; SSE41-NEXT:    pminsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pminsb %xmm2, %xmm1
+; SSE41-NEXT:    pminsb %xmm0, %xmm1
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $-128, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2155,8 +2155,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -2272,17 +2272,17 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminsb %xmm7, %xmm3
-; SSE41-NEXT:    pminsb %xmm5, %xmm1
-; SSE41-NEXT:    pminsb %xmm3, %xmm1
+; SSE41-NEXT:    pminsb %xmm5, %xmm3
+; SSE41-NEXT:    pminsb %xmm1, %xmm3
 ; SSE41-NEXT:    pminsb %xmm6, %xmm2
-; SSE41-NEXT:    pminsb %xmm4, %xmm0
-; SSE41-NEXT:    pminsb %xmm2, %xmm0
-; SSE41-NEXT:    pminsb %xmm1, %xmm0
-; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pminsb %xmm4, %xmm2
+; SSE41-NEXT:    pminsb %xmm3, %xmm2
+; SSE41-NEXT:    pminsb %xmm0, %xmm2
+; SSE41-NEXT:    pxor {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    xorb $-128, %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2294,13 +2294,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminsb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminsb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminsb %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminsb %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
@@ -2314,7 +2314,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
index 932fbce3ffc2f..a2c0653e126bb 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax-widen.ll
@@ -974,13 +974,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxud %xmm3, %xmm1
-; SSE41-NEXT:    pmaxud %xmm2, %xmm0
-; SSE41-NEXT:    pmaxud %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmaxud %xmm2, %xmm1
 ; SSE41-NEXT:    pmaxud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmaxud %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmaxud %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -988,8 +988,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1111,14 +1111,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxud %xmm6, %xmm2
-; SSE41-NEXT:    pmaxud %xmm4, %xmm0
-; SSE41-NEXT:    pmaxud %xmm2, %xmm0
 ; SSE41-NEXT:    pmaxud %xmm7, %xmm3
-; SSE41-NEXT:    pmaxud %xmm5, %xmm1
-; SSE41-NEXT:    pmaxud %xmm3, %xmm1
-; SSE41-NEXT:    pmaxud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmaxud %xmm1, %xmm0
+; SSE41-NEXT:    pmaxud %xmm5, %xmm3
+; SSE41-NEXT:    pmaxud %xmm1, %xmm3
+; SSE41-NEXT:    pmaxud %xmm4, %xmm2
+; SSE41-NEXT:    pmaxud %xmm3, %xmm2
+; SSE41-NEXT:    pmaxud %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmaxud %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmaxud %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1127,16 +1127,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1148,7 +1148,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
@@ -1441,9 +1441,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
 ; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1467,11 +1467,11 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
-; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm2, %xmm1
+; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    notl %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1482,8 +1482,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1546,16 +1546,16 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm8, %xmm2
 ; SSE2-NEXT:    pmaxsw %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm8, %xmm4
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm7
 ; SSE2-NEXT:    pxor %xmm8, %xmm3
 ; SSE2-NEXT:    pmaxsw %xmm7, %xmm3
 ; SSE2-NEXT:    pxor %xmm8, %xmm5
+; SSE2-NEXT:    pmaxsw %xmm3, %xmm5
 ; SSE2-NEXT:    pxor %xmm8, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE2-NEXT:    pmaxsw %xmm4, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
@@ -1580,15 +1580,15 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxuw %xmm7, %xmm3
-; SSE41-NEXT:    pmaxuw %xmm5, %xmm1
-; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE41-NEXT:    pmaxuw %xmm5, %xmm3
+; SSE41-NEXT:    pmaxuw %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxuw %xmm6, %xmm2
-; SSE41-NEXT:    pmaxuw %xmm4, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm4, %xmm2
+; SSE41-NEXT:    pmaxuw %xmm3, %xmm2
+; SSE41-NEXT:    pmaxuw %xmm0, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    notl %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1600,13 +1600,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxuw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxuw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1619,7 +1619,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
@@ -1977,33 +1977,33 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; SSE2-NEXT:    pmaxub %xmm2, %xmm0
-; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pmaxub %xmm2, %xmm1
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
+; SSE2-NEXT:    pmaxub %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxub %xmm3, %xmm1
-; SSE41-NEXT:    pmaxub %xmm2, %xmm0
-; SSE41-NEXT:    pmaxub %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxub %xmm2, %xmm1
+; SSE41-NEXT:    pmaxub %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    notb %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2014,8 +2014,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
@@ -2082,14 +2082,14 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxub %xmm6, %xmm2
-; SSE2-NEXT:    pmaxub %xmm4, %xmm0
-; SSE2-NEXT:    pmaxub %xmm2, %xmm0
 ; SSE2-NEXT:    pmaxub %xmm7, %xmm3
-; SSE2-NEXT:    pmaxub %xmm5, %xmm1
-; SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pmaxub %xmm1, %xmm0
+; SSE2-NEXT:    pmaxub %xmm5, %xmm3
+; SSE2-NEXT:    pmaxub %xmm1, %xmm3
+; SSE2-NEXT:    pmaxub %xmm4, %xmm2
+; SSE2-NEXT:    pmaxub %xmm3, %xmm2
+; SSE2-NEXT:    pmaxub %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pmaxub %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -2105,18 +2105,18 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxub %xmm7, %xmm3
-; SSE41-NEXT:    pmaxub %xmm5, %xmm1
-; SSE41-NEXT:    pmaxub %xmm3, %xmm1
+; SSE41-NEXT:    pmaxub %xmm5, %xmm3
+; SSE41-NEXT:    pmaxub %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxub %xmm6, %xmm2
-; SSE41-NEXT:    pmaxub %xmm4, %xmm0
-; SSE41-NEXT:    pmaxub %xmm2, %xmm0
-; SSE41-NEXT:    pmaxub %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxub %xmm4, %xmm2
+; SSE41-NEXT:    pmaxub %xmm3, %xmm2
+; SSE41-NEXT:    pmaxub %xmm0, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    notb %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2128,13 +2128,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxub %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxub %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
@@ -2149,7 +2149,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
index f6a2a57e4b8ca..087b25321c1e6 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll
@@ -1023,13 +1023,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxud %xmm3, %xmm1
-; SSE41-NEXT:    pmaxud %xmm2, %xmm0
-; SSE41-NEXT:    pmaxud %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmaxud %xmm2, %xmm1
 ; SSE41-NEXT:    pmaxud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pmaxud %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmaxud %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -1037,8 +1037,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1160,14 +1160,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxud %xmm6, %xmm2
-; SSE41-NEXT:    pmaxud %xmm4, %xmm0
-; SSE41-NEXT:    pmaxud %xmm2, %xmm0
 ; SSE41-NEXT:    pmaxud %xmm7, %xmm3
-; SSE41-NEXT:    pmaxud %xmm5, %xmm1
-; SSE41-NEXT:    pmaxud %xmm3, %xmm1
-; SSE41-NEXT:    pmaxud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pmaxud %xmm1, %xmm0
+; SSE41-NEXT:    pmaxud %xmm5, %xmm3
+; SSE41-NEXT:    pmaxud %xmm1, %xmm3
+; SSE41-NEXT:    pmaxud %xmm4, %xmm2
+; SSE41-NEXT:    pmaxud %xmm3, %xmm2
+; SSE41-NEXT:    pmaxud %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pmaxud %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pmaxud %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1176,16 +1176,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxud %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1197,7 +1197,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
@@ -1556,9 +1556,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pmaxsw %xmm1, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
 ; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1582,11 +1582,11 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
-; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm2, %xmm1
+; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    notl %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1597,8 +1597,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1661,16 +1661,16 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm8, %xmm2
 ; SSE2-NEXT:    pmaxsw %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm8, %xmm4
+; SSE2-NEXT:    pmaxsw %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm4, %xmm0
-; SSE2-NEXT:    pmaxsw %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm7
 ; SSE2-NEXT:    pxor %xmm8, %xmm3
 ; SSE2-NEXT:    pmaxsw %xmm7, %xmm3
 ; SSE2-NEXT:    pxor %xmm8, %xmm5
+; SSE2-NEXT:    pmaxsw %xmm3, %xmm5
 ; SSE2-NEXT:    pxor %xmm8, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm5, %xmm1
-; SSE2-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE2-NEXT:    pmaxsw %xmm4, %xmm1
 ; SSE2-NEXT:    pmaxsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
@@ -1695,15 +1695,15 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxuw %xmm7, %xmm3
-; SSE41-NEXT:    pmaxuw %xmm5, %xmm1
-; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE41-NEXT:    pmaxuw %xmm5, %xmm3
+; SSE41-NEXT:    pmaxuw %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxuw %xmm6, %xmm2
-; SSE41-NEXT:    pmaxuw %xmm4, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
-; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm4, %xmm2
+; SSE41-NEXT:    pmaxuw %xmm3, %xmm2
+; SSE41-NEXT:    pmaxuw %xmm0, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    notl %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1715,13 +1715,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxuw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxuw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1734,7 +1734,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
@@ -2217,33 +2217,33 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; SSE2-NEXT:    pmaxub %xmm2, %xmm0
-; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pmaxub %xmm2, %xmm1
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    pmaxub %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
+; SSE2-NEXT:    pmaxub %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxub %xmm3, %xmm1
-; SSE41-NEXT:    pmaxub %xmm2, %xmm0
-; SSE41-NEXT:    pmaxub %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxub %xmm2, %xmm1
+; SSE41-NEXT:    pmaxub %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    notb %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2254,8 +2254,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
@@ -2322,14 +2322,14 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pmaxub %xmm6, %xmm2
-; SSE2-NEXT:    pmaxub %xmm4, %xmm0
-; SSE2-NEXT:    pmaxub %xmm2, %xmm0
 ; SSE2-NEXT:    pmaxub %xmm7, %xmm3
-; SSE2-NEXT:    pmaxub %xmm5, %xmm1
-; SSE2-NEXT:    pmaxub %xmm3, %xmm1
-; SSE2-NEXT:    pmaxub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pmaxub %xmm1, %xmm0
+; SSE2-NEXT:    pmaxub %xmm5, %xmm3
+; SSE2-NEXT:    pmaxub %xmm1, %xmm3
+; SSE2-NEXT:    pmaxub %xmm4, %xmm2
+; SSE2-NEXT:    pmaxub %xmm3, %xmm2
+; SSE2-NEXT:    pmaxub %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pmaxub %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pmaxub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -2345,18 +2345,18 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pmaxub %xmm7, %xmm3
-; SSE41-NEXT:    pmaxub %xmm5, %xmm1
-; SSE41-NEXT:    pmaxub %xmm3, %xmm1
+; SSE41-NEXT:    pmaxub %xmm5, %xmm3
+; SSE41-NEXT:    pmaxub %xmm1, %xmm3
 ; SSE41-NEXT:    pmaxub %xmm6, %xmm2
-; SSE41-NEXT:    pmaxub %xmm4, %xmm0
-; SSE41-NEXT:    pmaxub %xmm2, %xmm0
-; SSE41-NEXT:    pmaxub %xmm1, %xmm0
-; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE41-NEXT:    pxor %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pmaxub %xmm4, %xmm2
+; SSE41-NEXT:    pmaxub %xmm3, %xmm2
+; SSE41-NEXT:    pmaxub %xmm0, %xmm2
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    notb %al
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
@@ -2368,13 +2368,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpmaxub %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmaxub %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
@@ -2389,7 +2389,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
index 2f95c7eb0c0b6..acf817be1f776 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin-widen.ll
@@ -973,13 +973,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminud %xmm3, %xmm1
-; SSE41-NEXT:    pminud %xmm2, %xmm0
-; SSE41-NEXT:    pminud %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pminud %xmm2, %xmm1
 ; SSE41-NEXT:    pminud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pminud %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pminud %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -987,8 +987,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1110,14 +1110,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminud %xmm6, %xmm2
-; SSE41-NEXT:    pminud %xmm4, %xmm0
-; SSE41-NEXT:    pminud %xmm2, %xmm0
 ; SSE41-NEXT:    pminud %xmm7, %xmm3
-; SSE41-NEXT:    pminud %xmm5, %xmm1
-; SSE41-NEXT:    pminud %xmm3, %xmm1
-; SSE41-NEXT:    pminud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pminud %xmm1, %xmm0
+; SSE41-NEXT:    pminud %xmm5, %xmm3
+; SSE41-NEXT:    pminud %xmm1, %xmm3
+; SSE41-NEXT:    pminud %xmm4, %xmm2
+; SSE41-NEXT:    pminud %xmm3, %xmm2
+; SSE41-NEXT:    pminud %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pminud %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pminud %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1126,16 +1126,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpminud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpminud %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1147,7 +1147,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
@@ -1398,9 +1398,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pminsw %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pminsw %xmm1, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
 ; SSE2-NEXT:    pminsw %xmm2, %xmm0
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1424,9 +1424,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminuw %xmm3, %xmm1
-; SSE41-NEXT:    pminuw %xmm2, %xmm0
-; SSE41-NEXT:    pminuw %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminuw %xmm2, %xmm1
+; SSE41-NEXT:    pminuw %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE41-NEXT:    retq
@@ -1436,8 +1436,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1478,16 +1478,16 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm8, %xmm2
 ; SSE2-NEXT:    pminsw %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm8, %xmm4
+; SSE2-NEXT:    pminsw %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
-; SSE2-NEXT:    pminsw %xmm4, %xmm0
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm7
 ; SSE2-NEXT:    pxor %xmm8, %xmm3
 ; SSE2-NEXT:    pminsw %xmm7, %xmm3
 ; SSE2-NEXT:    pxor %xmm8, %xmm5
+; SSE2-NEXT:    pminsw %xmm3, %xmm5
 ; SSE2-NEXT:    pxor %xmm8, %xmm1
 ; SSE2-NEXT:    pminsw %xmm5, %xmm1
-; SSE2-NEXT:    pminsw %xmm3, %xmm1
+; SSE2-NEXT:    pminsw %xmm4, %xmm1
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
@@ -1512,13 +1512,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminuw %xmm7, %xmm3
-; SSE41-NEXT:    pminuw %xmm5, %xmm1
-; SSE41-NEXT:    pminuw %xmm3, %xmm1
+; SSE41-NEXT:    pminuw %xmm5, %xmm3
+; SSE41-NEXT:    pminuw %xmm1, %xmm3
 ; SSE41-NEXT:    pminuw %xmm6, %xmm2
-; SSE41-NEXT:    pminuw %xmm4, %xmm0
-; SSE41-NEXT:    pminuw %xmm2, %xmm0
-; SSE41-NEXT:    pminuw %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminuw %xmm4, %xmm2
+; SSE41-NEXT:    pminuw %xmm3, %xmm2
+; SSE41-NEXT:    pminuw %xmm0, %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE41-NEXT:    retq
@@ -1529,13 +1529,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminuw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminuw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminuw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminuw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1545,7 +1545,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
@@ -1837,31 +1837,31 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminub %xmm3, %xmm1
-; SSE2-NEXT:    pminub %xmm2, %xmm0
-; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pminub %xmm2, %xmm1
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
+; SSE2-NEXT:    pminub %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminub %xmm3, %xmm1
-; SSE41-NEXT:    pminub %xmm2, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm2, %xmm1
 ; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -1871,8 +1871,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1915,14 +1915,14 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminub %xmm6, %xmm2
-; SSE2-NEXT:    pminub %xmm4, %xmm0
-; SSE2-NEXT:    pminub %xmm2, %xmm0
 ; SSE2-NEXT:    pminub %xmm7, %xmm3
-; SSE2-NEXT:    pminub %xmm5, %xmm1
-; SSE2-NEXT:    pminub %xmm3, %xmm1
-; SSE2-NEXT:    pminub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pminub %xmm1, %xmm0
+; SSE2-NEXT:    pminub %xmm5, %xmm3
+; SSE2-NEXT:    pminub %xmm1, %xmm3
+; SSE2-NEXT:    pminub %xmm4, %xmm2
+; SSE2-NEXT:    pminub %xmm3, %xmm2
+; SSE2-NEXT:    pminub %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pminub %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -1938,16 +1938,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminub %xmm7, %xmm3
-; SSE41-NEXT:    pminub %xmm5, %xmm1
-; SSE41-NEXT:    pminub %xmm3, %xmm1
+; SSE41-NEXT:    pminub %xmm5, %xmm3
+; SSE41-NEXT:    pminub %xmm1, %xmm3
 ; SSE41-NEXT:    pminub %xmm6, %xmm2
-; SSE41-NEXT:    pminub %xmm4, %xmm0
+; SSE41-NEXT:    pminub %xmm4, %xmm2
+; SSE41-NEXT:    pminub %xmm3, %xmm2
+; SSE41-NEXT:    pminub %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
 ; SSE41-NEXT:    pminub %xmm2, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -1958,13 +1958,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminub %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminub %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminub %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -1976,7 +1976,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
index f1d77e32f7fec..38d8ec429b9da 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll
@@ -1022,13 +1022,13 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; SSE41-LABEL: test_v16i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminud %xmm3, %xmm1
-; SSE41-NEXT:    pminud %xmm2, %xmm0
-; SSE41-NEXT:    pminud %xmm1, %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pminud %xmm2, %xmm1
 ; SSE41-NEXT:    pminud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE41-NEXT:    pminud %xmm1, %xmm0
-; SSE41-NEXT:    movd %xmm0, %eax
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pminud %xmm0, %xmm1
+; SSE41-NEXT:    movd %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v16i32:
@@ -1036,8 +1036,8 @@ define i32 @test_v16i32(<16 x i32> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1159,14 +1159,14 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; SSE41-LABEL: test_v32i32:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminud %xmm6, %xmm2
-; SSE41-NEXT:    pminud %xmm4, %xmm0
-; SSE41-NEXT:    pminud %xmm2, %xmm0
 ; SSE41-NEXT:    pminud %xmm7, %xmm3
-; SSE41-NEXT:    pminud %xmm5, %xmm1
-; SSE41-NEXT:    pminud %xmm3, %xmm1
-; SSE41-NEXT:    pminud %xmm0, %xmm1
-; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE41-NEXT:    pminud %xmm1, %xmm0
+; SSE41-NEXT:    pminud %xmm5, %xmm3
+; SSE41-NEXT:    pminud %xmm1, %xmm3
+; SSE41-NEXT:    pminud %xmm4, %xmm2
+; SSE41-NEXT:    pminud %xmm3, %xmm2
+; SSE41-NEXT:    pminud %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE41-NEXT:    pminud %xmm2, %xmm0
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE41-NEXT:    pminud %xmm0, %xmm1
 ; SSE41-NEXT:    movd %xmm1, %eax
@@ -1175,16 +1175,16 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX1-LABEL: test_v32i32:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm4
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm5
-; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vpminud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpminud %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminud %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
@@ -1196,7 +1196,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
 ; AVX2-LABEL: test_v32i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminud %xmm1, %xmm0, %xmm0
@@ -1513,9 +1513,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pminsw %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    pminsw %xmm1, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
 ; SSE2-NEXT:    pminsw %xmm2, %xmm0
-; SSE2-NEXT:    pminsw %xmm1, %xmm0
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
@@ -1539,9 +1539,9 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; SSE41-LABEL: test_v32i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminuw %xmm3, %xmm1
-; SSE41-NEXT:    pminuw %xmm2, %xmm0
-; SSE41-NEXT:    pminuw %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminuw %xmm2, %xmm1
+; SSE41-NEXT:    pminuw %xmm0, %xmm1
+; SSE41-NEXT:    phminposuw %xmm1, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE41-NEXT:    retq
@@ -1551,8 +1551,8 @@ define i16 @test_v32i16(<32 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1593,16 +1593,16 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE2-NEXT:    pxor %xmm8, %xmm2
 ; SSE2-NEXT:    pminsw %xmm6, %xmm2
 ; SSE2-NEXT:    pxor %xmm8, %xmm4
+; SSE2-NEXT:    pminsw %xmm2, %xmm4
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
-; SSE2-NEXT:    pminsw %xmm4, %xmm0
-; SSE2-NEXT:    pminsw %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm7
 ; SSE2-NEXT:    pxor %xmm8, %xmm3
 ; SSE2-NEXT:    pminsw %xmm7, %xmm3
 ; SSE2-NEXT:    pxor %xmm8, %xmm5
+; SSE2-NEXT:    pminsw %xmm3, %xmm5
 ; SSE2-NEXT:    pxor %xmm8, %xmm1
 ; SSE2-NEXT:    pminsw %xmm5, %xmm1
-; SSE2-NEXT:    pminsw %xmm3, %xmm1
+; SSE2-NEXT:    pminsw %xmm4, %xmm1
 ; SSE2-NEXT:    pminsw %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm8, %xmm0
@@ -1627,13 +1627,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; SSE41-LABEL: test_v64i16:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminuw %xmm7, %xmm3
-; SSE41-NEXT:    pminuw %xmm5, %xmm1
-; SSE41-NEXT:    pminuw %xmm3, %xmm1
+; SSE41-NEXT:    pminuw %xmm5, %xmm3
+; SSE41-NEXT:    pminuw %xmm1, %xmm3
 ; SSE41-NEXT:    pminuw %xmm6, %xmm2
-; SSE41-NEXT:    pminuw %xmm4, %xmm0
-; SSE41-NEXT:    pminuw %xmm2, %xmm0
-; SSE41-NEXT:    pminuw %xmm1, %xmm0
-; SSE41-NEXT:    phminposuw %xmm0, %xmm0
+; SSE41-NEXT:    pminuw %xmm4, %xmm2
+; SSE41-NEXT:    pminuw %xmm3, %xmm2
+; SSE41-NEXT:    pminuw %xmm0, %xmm2
+; SSE41-NEXT:    phminposuw %xmm2, %xmm0
 ; SSE41-NEXT:    movd %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SSE41-NEXT:    retq
@@ -1644,13 +1644,13 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminuw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminuw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminuw %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminuw %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1660,7 +1660,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
 ; AVX2-LABEL: test_v64i16:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
@@ -2077,31 +2077,31 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; SSE2-LABEL: test_v64i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminub %xmm3, %xmm1
-; SSE2-NEXT:    pminub %xmm2, %xmm0
-; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pminub %xmm2, %xmm1
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
 ; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrld $16, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    psrld $16, %xmm0
 ; SSE2-NEXT:    pminub %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $8, %xmm1
+; SSE2-NEXT:    pminub %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: test_v64i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminub %xmm3, %xmm1
-; SSE41-NEXT:    pminub %xmm2, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
+; SSE41-NEXT:    pminub %xmm2, %xmm1
 ; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    pminub %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2111,8 +2111,8 @@ define i8 @test_v64i8(<64 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -2155,14 +2155,14 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE2-LABEL: test_v128i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pminub %xmm6, %xmm2
-; SSE2-NEXT:    pminub %xmm4, %xmm0
-; SSE2-NEXT:    pminub %xmm2, %xmm0
 ; SSE2-NEXT:    pminub %xmm7, %xmm3
-; SSE2-NEXT:    pminub %xmm5, %xmm1
-; SSE2-NEXT:    pminub %xmm3, %xmm1
-; SSE2-NEXT:    pminub %xmm0, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    pminub %xmm1, %xmm0
+; SSE2-NEXT:    pminub %xmm5, %xmm3
+; SSE2-NEXT:    pminub %xmm1, %xmm3
+; SSE2-NEXT:    pminub %xmm4, %xmm2
+; SSE2-NEXT:    pminub %xmm3, %xmm2
+; SSE2-NEXT:    pminub %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE2-NEXT:    pminub %xmm2, %xmm0
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
 ; SSE2-NEXT:    pminub %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
@@ -2178,16 +2178,16 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; SSE41-LABEL: test_v128i8:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pminub %xmm7, %xmm3
-; SSE41-NEXT:    pminub %xmm5, %xmm1
-; SSE41-NEXT:    pminub %xmm3, %xmm1
+; SSE41-NEXT:    pminub %xmm5, %xmm3
+; SSE41-NEXT:    pminub %xmm1, %xmm3
 ; SSE41-NEXT:    pminub %xmm6, %xmm2
-; SSE41-NEXT:    pminub %xmm4, %xmm0
+; SSE41-NEXT:    pminub %xmm4, %xmm2
+; SSE41-NEXT:    pminub %xmm3, %xmm2
+; SSE41-NEXT:    pminub %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
 ; SSE41-NEXT:    pminub %xmm2, %xmm0
-; SSE41-NEXT:    pminub %xmm1, %xmm0
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pminub %xmm0, %xmm1
-; SSE41-NEXT:    phminposuw %xmm1, %xmm0
+; SSE41-NEXT:    phminposuw %xmm0, %xmm0
 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
 ; SSE41-NEXT:    retq
@@ -2198,13 +2198,13 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
 ; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT:    vpminub %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
 ; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpminub %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpminub %xmm4, %xmm1, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpminub %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vphminposuw %xmm0, %xmm0
@@ -2216,7 +2216,7 @@ define i8 @test_v128i8(<128 x i8> %a0) {
 ; AVX2-LABEL: test_v128i8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm1, %ymm2, %ymm1
 ; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0

From 5162266515e5a9d9fa356d26a7458013844fdf16 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 5 Jun 2019 18:35:54 +0000
Subject: [PATCH 1144/1176] [NFC][Reassociate] Add unary fneg tests to
 fast-basictest.ll

llvm-svn: 362630
---
 .../Transforms/Reassociate/fast-basictest.ll  | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index 58cfba3652761..5d15ca46c00f0 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -200,6 +200,18 @@ define float @test8(float %X, float %Y, float %Z) {
   ret float %C
 }
 
+define float @test8_unary_fneg(float %X, float %Y, float %Z) {
+; CHECK-LABEL: @test8_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fsub fast float [[Z:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret float [[C]]
+;
+  %A = fneg fast float %X
+  %B = fmul fast float %A, %Y
+  %C = fadd fast float %B, %Z
+  ret float %C
+}
+
 define float @test8_reassoc(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test8_reassoc(
 ; CHECK-NEXT:    [[A:%.*]] = fsub reassoc float 0.000000e+00, [[X:%.*]]
@@ -384,6 +396,19 @@ define float @test13(float %X1, float %X2, float %X3) {
   ret float %D
 }
 
+define float @test13_unary_fneg(float %X1, float %X2, float %X3) {
+; CHECK-LABEL: @test13_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X3:%.*]], [[X2:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[TMP1]], [[X1:%.*]]
+; CHECK-NEXT:    ret float [[D]]
+;
+  %A = fneg fast float %X1
+  %B = fmul fast float %A, %X2   ; -X1*X2
+  %C = fmul fast float %X1, %X3  ; X1*X3
+  %D = fadd fast float %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
+  ret float %D
+}
+
 define float @test13_reassoc(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test13_reassoc(
 ; CHECK-NEXT:    [[A:%.*]] = fsub reassoc float 0.000000e+00, [[X1:%.*]]
@@ -472,6 +497,18 @@ define float @test16(float %b, float %a) {
   ret float %4
 }
 
+define float @test16_unary_fneg(float %b, float %a) {
+; CHECK-LABEL: @test16_unary_fneg(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[B:%.*]], 1.234000e+03
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = fadd fast float %a, 1234.0
+  %2 = fadd fast float %b, %1
+  %3 = fneg fast float %a
+  %4 = fadd fast float %2, %3
+  ret float %4
+}
+
 define float @test16_reassoc(float %b, float %a) {
 ; CHECK-LABEL: @test16_reassoc(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[A:%.*]], 1.234000e+03
@@ -504,6 +541,23 @@ define float @test17(float %a, float %b, float %z) {
   ret float %g
 }
 
+; FIXME: This reassociation is not working.
+define float @test17_unary_fneg(float %a, float %b, float %z) {
+; CHECK-LABEL: @test17_unary_fneg(
+; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], -1.234500e+04
+; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F]], [[Z:%.*]]
+; CHECK-NEXT:    [[G:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[G]]
+;
+  %c = fneg fast float %z
+  %d = fmul fast float %a, %b
+  %e = fmul fast float %c, %d
+  %f = fmul fast float %e, 1.234500e+04
+  %g = fsub fast float 0.000000e+00, %f
+  ret float %g
+}
+
 define float @test17_reassoc(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17_reassoc(
 ; CHECK-NEXT:    [[C:%.*]] = fsub reassoc float 0.000000e+00, [[Z:%.*]]
@@ -534,6 +588,21 @@ define float @test18(float %a, float %b, float %z) {
   ret float %f
 }
 
+; FIXME: This reassociation is not working.
+define float @test18_unary_fneg(float %a, float %b, float %z) {
+; CHECK-LABEL: @test18_unary_fneg(
+; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[Z:%.*]], -4.000000e+01
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[C]], [[A:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    ret float [[F]]
+;
+  %d = fmul fast float %z, 4.000000e+01
+  %c = fneg fast float %d
+  %e = fmul fast float %a, %c
+  %f = fsub fast float 0.000000e+00, %e
+  ret float %f
+}
+
 define float @test18_reassoc(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test18_reassoc(
 ; CHECK-NEXT:    [[D:%.*]] = fmul reassoc float [[Z:%.*]], 4.000000e+01

From 8b83a9c6b13d6156727de15da05409c8b730fa5b Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Wed, 5 Jun 2019 18:50:07 +0000
Subject: [PATCH 1145/1176] [NFC][Reassociate] Fix mistake in 468b2ad

Missed 2 'fast fsub(0.0,X) -> fneg(X)' changes.

llvm-svn: 362631
---
 .../Transforms/Reassociate/fast-basictest.ll   | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index 5d15ca46c00f0..0a3e40c87f914 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -541,20 +541,18 @@ define float @test17(float %a, float %b, float %z) {
   ret float %g
 }
 
-; FIXME: This reassociation is not working.
 define float @test17_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17_unary_fneg(
-; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], -1.234500e+04
-; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F]], [[Z:%.*]]
-; CHECK-NEXT:    [[G:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
-; CHECK-NEXT:    ret float [[G]]
+; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
+; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[D]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[E]], [[Z:%.*]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = fneg fast float %z
   %d = fmul fast float %a, %b
   %e = fmul fast float %c, %d
   %f = fmul fast float %e, 1.234500e+04
-  %g = fsub fast float 0.000000e+00, %f
+  %g = fneg fast float %f
   ret float %g
 }
 
@@ -592,14 +590,14 @@ define float @test18(float %a, float %b, float %z) {
 define float @test18_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test18_unary_fneg(
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[Z:%.*]], -4.000000e+01
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[C]], [[A:%.*]]
-; CHECK-NEXT:    [[F:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
+; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[C]], [[A:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = fneg fast float [[E]]
 ; CHECK-NEXT:    ret float [[F]]
 ;
   %d = fmul fast float %z, 4.000000e+01
   %c = fneg fast float %d
   %e = fmul fast float %a, %c
-  %f = fsub fast float 0.000000e+00, %e
+  %f = fneg fast float %e
   ret float %f
 }
 

From 84cfca0f2b7634a3cb538368ff661ecead6d3666 Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Wed, 5 Jun 2019 18:55:39 +0000
Subject: [PATCH 1146/1176] [analyzer] PathDiagnosticPopUpPiece: working with
 CharSourceRange

Summary: Sometimes we are at character boundaries so past the token-range.
llvm-svn: 362632
---
 clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 8ede3f15e60f7..64c42699fcf3c 100644
--- a/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -612,7 +612,7 @@ HandlePopUpPieceStartTag(Rewriter &R,
   for (const auto &Range : PopUpRanges) {
     html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "",
                          "<table class='variable_popup'><tbody>",
-                         /*IsTokenRange=*/true);
+                         /*IsTokenRange=*/false);
   }
 }
 
@@ -644,12 +644,12 @@ static void HandlePopUpPieceEndTag(Rewriter &R,
     Out << "</tbody></table></span>";
     html::HighlightRange(R, Range.getBegin(), Range.getEnd(),
                          "<span class='variable'>", Buf.c_str(),
-                         /*IsTokenRange=*/true);
+                         /*IsTokenRange=*/false);
 
   // Otherwise inject just the new row at the end of the range.
   } else {
     html::HighlightRange(R, Range.getBegin(), Range.getEnd(), "", Buf.c_str(),
-                         /*IsTokenRange=*/true);
+                         /*IsTokenRange=*/false);
   }
 }
 

From 036fa5346f2d2f2432e1c70242f17c270e040fc6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 5 Jun 2019 18:55:54 +0000
Subject: [PATCH 1147/1176] [X86][SSE] Add vector tests to cover more
 isNegatibleForFree/GetNegatedExpression cases (PR42105)

Some already combine correctly, but vector constant analysis is weak.

llvm-svn: 362633
---
 llvm/test/CodeGen/X86/dag-fmf-cse.ll | 13 +++++++++-
 llvm/test/CodeGen/X86/fdiv.ll        | 11 ++++++++
 llvm/test/CodeGen/X86/fp-fold.ll     | 38 ++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/dag-fmf-cse.ll b/llvm/test/CodeGen/X86/dag-fmf-cse.ll
index 021459eb4bde5..609ccdc367395 100644
--- a/llvm/test/CodeGen/X86/dag-fmf-cse.ll
+++ b/llvm/test/CodeGen/X86/dag-fmf-cse.ll
@@ -12,7 +12,6 @@ define float @fmf_should_not_break_cse(float %a, float %b) {
 ; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vaddss %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
-
   %mul1 = fmul fast float %a, %b
   %nega = fsub fast float 0.0, %a
   %mul2 = fmul fast float %nega, %b
@@ -20,3 +19,15 @@ define float @fmf_should_not_break_cse(float %a, float %b) {
   ret float %abx2
 }
 
+define <4 x float> @fmf_should_not_break_cse_vector(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: fmf_should_not_break_cse_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %mul1 = fmul fast <4 x float> %a, %b
+  %nega = fsub fast <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %a
+  %mul2 = fmul fast <4 x float> %nega, %b
+  %abx2 = fsub fast <4 x float> %mul1, %mul2
+  ret <4 x float> %abx2
+}
diff --git a/llvm/test/CodeGen/X86/fdiv.ll b/llvm/test/CodeGen/X86/fdiv.ll
index f3956ecc0ea37..259cd91cca528 100644
--- a/llvm/test/CodeGen/X86/fdiv.ll
+++ b/llvm/test/CodeGen/X86/fdiv.ll
@@ -65,5 +65,16 @@ define float @double_negative(float %x, float %y) #0 {
   ret float %div
 }
 
+define <4 x float> @double_negative_vector(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: double_negative_vector:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    divps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
+  %neg2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %y
+  %div = fdiv <4 x float> %neg1, %neg2
+  ret <4 x float> %div
+}
+
 attributes #0 = { "unsafe-fp-math"="false" }
 
diff --git a/llvm/test/CodeGen/X86/fp-fold.ll b/llvm/test/CodeGen/X86/fp-fold.ll
index 53fe3945c273d..29a657a1f34d4 100644
--- a/llvm/test/CodeGen/X86/fp-fold.ll
+++ b/llvm/test/CodeGen/X86/fp-fold.ll
@@ -99,6 +99,18 @@ define float @fsub_neg_y(float %x, float %y) {
   ret float %r
 }
 
+define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float>%y) {
+; ANY-LABEL: fsub_neg_y_vector:
+; ANY:       # %bb.0:
+; ANY-NEXT:    mulps {{.*}}(%rip), %xmm0
+; ANY-NEXT:    xorps {{.*}}(%rip), %xmm0
+; ANY-NEXT:    retq
+  %mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
+  %add = fadd <4 x float> %mul, %y
+  %r = fsub nsz reassoc <4 x float> %y, %add
+  ret <4 x float> %r
+}
+
 define float @fsub_neg_y_commute(float %x, float %y) {
 ; ANY-LABEL: fsub_neg_y_commute:
 ; ANY:       # %bb.0:
@@ -109,6 +121,19 @@ define float @fsub_neg_y_commute(float %x, float %y) {
   %r = fsub nsz reassoc float %y, %add
   ret float %r
 }
+
+define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {
+; ANY-LABEL: fsub_neg_y_commute_vector:
+; ANY:       # %bb.0:
+; ANY-NEXT:    mulps {{.*}}(%rip), %xmm0
+; ANY-NEXT:    xorps {{.*}}(%rip), %xmm0
+; ANY-NEXT:    retq
+  %mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
+  %add = fadd <4 x float> %y, %mul
+  %r = fsub nsz reassoc <4 x float> %y, %add
+  ret <4 x float> %r
+}
+
 ; Y - (X + Y) --> -X
 
 define float @fsub_fadd_common_op_fneg(float %x, float %y) {
@@ -172,6 +197,19 @@ define float @fsub_negzero(float %x) {
   ret float %r
 }
 
+define <4 x float> @fsub_negzero_vector(<4 x float> %x) {
+; STRICT-LABEL: fsub_negzero_vector:
+; STRICT:       # %bb.0:
+; STRICT-NEXT:    subps {{.*}}(%rip), %xmm0
+; STRICT-NEXT:    retq
+;
+; UNSAFE-LABEL: fsub_negzero_vector:
+; UNSAFE:       # %bb.0:
+; UNSAFE-NEXT:    retq
+  %r = fsub <4 x float> %x, <float -0.0, float -0.0, float -0.0, float -0.0>
+  ret <4 x float> %r
+}
+
 define float @fsub_zero_nsz_1(float %x) {
 ; ANY-LABEL: fsub_zero_nsz_1:
 ; ANY:       # %bb.0:

From a95edb9dc1ddaf70761e8c90be175f144a28f757 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Wed, 5 Jun 2019 19:42:48 +0000
Subject: [PATCH 1148/1176] [GWP-ASan] Core Guarded Pool Allocator [4].

Summary:
See D60593 for further information.

This patch introduces the core of GWP-ASan, being the guarded pool allocator. This class contains the logic for creating and maintaining allocations in the guarded pool. Its public interface is to be utilised by supporting allocators in order to provide sampled guarded allocation behaviour.

This patch also contains basic functionality tests of the allocator as unittests. The error-catching behaviour will be tested in upcoming patches that use Scudo as an implementing allocator.

Reviewers: vlad.tsyrklevich, eugenis, jfb

Reviewed By: vlad.tsyrklevich

Subscribers: dexonsmith, kubamracek, mgorny, cryptoad, jfb, #sanitizers, llvm-commits, morehouse

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D62872

llvm-svn: 362636
---
 compiler-rt/lib/gwp_asan/CMakeLists.txt       |   6 +-
 compiler-rt/lib/gwp_asan/definitions.h        |  29 ++
 .../lib/gwp_asan/guarded_pool_allocator.cpp   | 433 ++++++++++++++++++
 .../lib/gwp_asan/guarded_pool_allocator.h     | 254 ++++++++++
 .../guarded_pool_allocator_posix.cpp          |  96 ++++
 compiler-rt/lib/gwp_asan/tests/CMakeLists.txt |   8 +-
 compiler-rt/lib/gwp_asan/tests/alignment.cpp  |  27 ++
 compiler-rt/lib/gwp_asan/tests/basic.cpp      |  60 +++
 compiler-rt/lib/gwp_asan/tests/harness.h      |  60 +++
 compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp |  72 +++
 .../lib/gwp_asan/tests/thread_contention.cpp  |  69 +++
 11 files changed, 1111 insertions(+), 3 deletions(-)
 create mode 100644 compiler-rt/lib/gwp_asan/definitions.h
 create mode 100644 compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
 create mode 100644 compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/alignment.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/basic.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/harness.h
 create mode 100644 compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp
 create mode 100644 compiler-rt/lib/gwp_asan/tests/thread_contention.cpp

diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt
index c5315245066e4..94784adf4694d 100644
--- a/compiler-rt/lib/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt
@@ -3,15 +3,19 @@ add_compiler_rt_component(gwp_asan)
 include_directories(..)
 
 set(GWP_ASAN_SOURCES
+  platform_specific/guarded_pool_allocator_posix.cpp
   platform_specific/mutex_posix.cpp
+  guarded_pool_allocator.cpp
   random.cpp
 )
 
 set(GWP_ASAN_HEADERS
+  definitions.h
+  guarded_pool_allocator.h
   mutex.h
-  random.h
   options.h
   options.inc
+  random.h
 )
 
 # Ensure that GWP-ASan meets the delegated requirements of some supporting
diff --git a/compiler-rt/lib/gwp_asan/definitions.h b/compiler-rt/lib/gwp_asan/definitions.h
new file mode 100644
index 0000000000000..1190adbd4f4fc
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/definitions.h
@@ -0,0 +1,29 @@
+//===-- gwp_asan_definitions.h ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_DEFINITIONS_H_
+#define GWP_ASAN_DEFINITIONS_H_
+
+#define TLS_INITIAL_EXEC __thread __attribute__((tls_model("initial-exec")))
+
+#ifdef LIKELY
+# undef LIKELY
+#endif // defined(LIKELY)
+#define LIKELY(X) __builtin_expect(!!(X), 1)
+
+#ifdef UNLIKELY
+# undef UNLIKELY
+#endif // defined(UNLIKELY)
+#define UNLIKELY(X) __builtin_expect(!!(X), 0)
+
+#ifdef ALWAYS_INLINE
+# undef ALWAYS_INLINE
+#endif // defined(ALWAYS_INLINE)
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+#endif // GWP_ASAN_DEFINITIONS_H_
diff --git a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
new file mode 100644
index 0000000000000..cd24a9fe79b54
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp
@@ -0,0 +1,433 @@
+//===-- guarded_pool_allocator.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/guarded_pool_allocator.h"
+
+#include "gwp_asan/options.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+using AllocationMetadata = gwp_asan::GuardedPoolAllocator::AllocationMetadata;
+using Error = gwp_asan::GuardedPoolAllocator::Error;
+
+namespace gwp_asan {
+namespace {
+// Forward declare the pointer to the singleton version of this class.
+// Instantiated during initialisation, this allows the signal handler
+// to find this class in order to deduce the root cause of failures. Must not be
+// referenced by users outside this translation unit, in order to avoid
+// init-order-fiasco.
+GuardedPoolAllocator *SingletonPtr = nullptr;
+} // anonymous namespace
+
+// Gets the singleton implementation of this class. Thread-compatible until
+// init() is called, thread-safe afterwards.
+GuardedPoolAllocator *getSingleton() { return SingletonPtr; }
+
+void GuardedPoolAllocator::AllocationMetadata::RecordAllocation(
+    uintptr_t AllocAddr, size_t AllocSize) {
+  Addr = AllocAddr;
+  Size = AllocSize;
+  IsDeallocated = false;
+
+  // TODO(hctim): Implement stack trace collection.
+  // TODO(hctim): Ask the caller to provide the thread ID, so we don't waste
+  // other thread's time getting the thread ID under lock.
+  AllocationTrace.ThreadID = getThreadID();
+  DeallocationTrace.ThreadID = kInvalidThreadID;
+  AllocationTrace.Trace[0] = 0;
+  DeallocationTrace.Trace[0] = 0;
+}
+
+void GuardedPoolAllocator::AllocationMetadata::RecordDeallocation() {
+  IsDeallocated = true;
+  // TODO(hctim): Implement stack trace collection.
+  DeallocationTrace.ThreadID = getThreadID();
+}
+
+void GuardedPoolAllocator::init(const options::Options &Opts) {
+  // Note: We return from the constructor here if GWP-ASan is not available.
+  // This will stop heap-allocation of class members, as well as mmap() of the
+  // guarded slots.
+  if (!Opts.Enabled || Opts.SampleRate == 0 ||
+      Opts.MaxSimultaneousAllocations == 0)
+    return;
+
+  // TODO(hctim): Add a death unit test for this.
+  if (SingletonPtr) {
+    (*SingletonPtr->Printf)(
+        "GWP-ASan Error: init() has already been called.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (Opts.SampleRate < 0) {
+    Opts.Printf("GWP-ASan Error: SampleRate is < 0.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (Opts.SampleRate > INT32_MAX) {
+    Opts.Printf("GWP-ASan Error: SampleRate is > 2^31.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  if (Opts.MaxSimultaneousAllocations < 0) {
+    Opts.Printf("GWP-ASan Error: MaxSimultaneousAllocations is < 0.\n");
+    exit(EXIT_FAILURE);
+  }
+
+  SingletonPtr = this;
+
+  MaxSimultaneousAllocations = Opts.MaxSimultaneousAllocations;
+
+  PageSize = getPlatformPageSize();
+
+  PerfectlyRightAlign = Opts.PerfectlyRightAlign;
+  Printf = Opts.Printf;
+
+  size_t PoolBytesRequired =
+      PageSize * (1 + MaxSimultaneousAllocations) +
+      MaxSimultaneousAllocations * maximumAllocationSize();
+  void *GuardedPoolMemory = mapMemory(PoolBytesRequired);
+
+  size_t BytesRequired = MaxSimultaneousAllocations * sizeof(*Metadata);
+  Metadata = reinterpret_cast<AllocationMetadata *>(mapMemory(BytesRequired));
+  markReadWrite(Metadata, BytesRequired);
+
+  // Allocate memory and set up the free pages queue.
+  BytesRequired = MaxSimultaneousAllocations * sizeof(*FreeSlots);
+  FreeSlots = reinterpret_cast<size_t *>(mapMemory(BytesRequired));
+  markReadWrite(FreeSlots, BytesRequired);
+
+  // Multiply the sample rate by 2 to give a good, fast approximation for (1 /
+  // SampleRate) chance of sampling.
+  if (Opts.SampleRate != 1)
+    AdjustedSampleRate = static_cast<uint32_t>(Opts.SampleRate) * 2;
+  else
+    AdjustedSampleRate = 1;
+
+  GuardedPagePool = reinterpret_cast<uintptr_t>(GuardedPoolMemory);
+  GuardedPagePoolEnd =
+      reinterpret_cast<uintptr_t>(GuardedPoolMemory) + PoolBytesRequired;
+
+  // Ensure that signal handlers are installed as late as possible, as the class
+  // is not thread-safe until init() is finished, and thus a SIGSEGV may cause a
+  // race to members if recieved during init().
+  if (Opts.InstallSignalHandlers)
+    installSignalHandlers();
+}
+
+void *GuardedPoolAllocator::allocate(size_t Size) {
+  if (Size == 0 || Size > maximumAllocationSize())
+    return nullptr;
+
+  size_t Index;
+  {
+    ScopedLock L(PoolMutex);
+    Index = reserveSlot();
+  }
+
+  if (Index == kInvalidSlotID)
+    return nullptr;
+
+  uintptr_t Ptr = slotToAddr(Index);
+  Ptr += allocationSlotOffset(Size);
+  AllocationMetadata *Meta = addrToMetadata(Ptr);
+
+  // If a slot is multiple pages in size, and the allocation takes up a single
+  // page, we can improve overflow detection by leaving the unused pages as
+  // unmapped.
+  markReadWrite(reinterpret_cast<void *>(getPageAddr(Ptr)), Size);
+
+  Meta->RecordAllocation(Ptr, Size);
+
+  return reinterpret_cast<void *>(Ptr);
+}
+
+void GuardedPoolAllocator::deallocate(void *Ptr) {
+  assert(pointerIsMine(Ptr) && "Pointer is not mine!");
+  uintptr_t UPtr = reinterpret_cast<uintptr_t>(Ptr);
+  uintptr_t SlotStart = slotToAddr(addrToSlot(UPtr));
+  AllocationMetadata *Meta = addrToMetadata(UPtr);
+  if (Meta->Addr != UPtr) {
+    reportError(UPtr, Error::INVALID_FREE);
+    exit(EXIT_FAILURE);
+  }
+
+  // Intentionally scope the mutex here, so that other threads can access the
+  // pool during the expensive markInaccessible() call.
+  {
+    ScopedLock L(PoolMutex);
+    if (Meta->IsDeallocated) {
+      reportError(UPtr, Error::DOUBLE_FREE);
+      exit(EXIT_FAILURE);
+    }
+
+    // Ensure that the deallocation is recorded before marking the page as
+    // inaccessible. Otherwise, a racy use-after-free will have inconsistent
+    // metadata.
+    Meta->RecordDeallocation();
+  }
+
+  markInaccessible(reinterpret_cast<void *>(SlotStart),
+                   maximumAllocationSize());
+
+  // And finally, lock again to release the slot back into the pool.
+  ScopedLock L(PoolMutex);
+  freeSlot(addrToSlot(UPtr));
+}
+
+size_t GuardedPoolAllocator::getSize(const void *Ptr) {
+  assert(pointerIsMine(Ptr));
+  ScopedLock L(PoolMutex);
+  AllocationMetadata *Meta = addrToMetadata(reinterpret_cast<uintptr_t>(Ptr));
+  assert(Meta->Addr == reinterpret_cast<uintptr_t>(Ptr));
+  return Meta->Size;
+}
+
+size_t GuardedPoolAllocator::maximumAllocationSize() const { return PageSize; }
+
+AllocationMetadata *GuardedPoolAllocator::addrToMetadata(uintptr_t Ptr) const {
+  return &Metadata[addrToSlot(Ptr)];
+}
+
+size_t GuardedPoolAllocator::addrToSlot(uintptr_t Ptr) const {
+  assert(pointerIsMine(reinterpret_cast<void *>(Ptr)));
+  size_t ByteOffsetFromPoolStart = Ptr - GuardedPagePool;
+  return ByteOffsetFromPoolStart / (maximumAllocationSize() + PageSize);
+}
+
+uintptr_t GuardedPoolAllocator::slotToAddr(size_t N) const {
+  return GuardedPagePool + (PageSize * (1 + N)) + (maximumAllocationSize() * N);
+}
+
+uintptr_t GuardedPoolAllocator::getPageAddr(uintptr_t Ptr) const {
+  assert(pointerIsMine(reinterpret_cast<void *>(Ptr)));
+  return Ptr & ~(static_cast<uintptr_t>(PageSize) - 1);
+}
+
+bool GuardedPoolAllocator::isGuardPage(uintptr_t Ptr) const {
+  assert(pointerIsMine(reinterpret_cast<void *>(Ptr)));
+  size_t PageOffsetFromPoolStart = (Ptr - GuardedPagePool) / PageSize;
+  size_t PagesPerSlot = maximumAllocationSize() / PageSize;
+  return (PageOffsetFromPoolStart % (PagesPerSlot + 1)) == 0;
+}
+
+size_t GuardedPoolAllocator::reserveSlot() {
+  // Avoid potential reuse of a slot before we have made at least a single
+  // allocation in each slot. Helps with our use-after-free detection.
+  if (NumSampledAllocations < MaxSimultaneousAllocations)
+    return NumSampledAllocations++;
+
+  if (FreeSlotsLength == 0)
+    return kInvalidSlotID;
+
+  size_t ReservedIndex = getRandomUnsigned32() % FreeSlotsLength;
+  size_t SlotIndex = FreeSlots[ReservedIndex];
+  FreeSlots[ReservedIndex] = FreeSlots[--FreeSlotsLength];
+  return SlotIndex;
+}
+
+void GuardedPoolAllocator::freeSlot(size_t SlotIndex) {
+  assert(FreeSlotsLength < MaxSimultaneousAllocations);
+  FreeSlots[FreeSlotsLength++] = SlotIndex;
+}
+
+uintptr_t GuardedPoolAllocator::allocationSlotOffset(size_t Size) const {
+  assert(Size > 0);
+
+  bool ShouldRightAlign = getRandomUnsigned32() % 2 == 0;
+  if (!ShouldRightAlign)
+    return 0;
+
+  uintptr_t Offset = maximumAllocationSize();
+  if (!PerfectlyRightAlign) {
+    if (Size == 3)
+      Size = 4;
+    else if (Size > 4 && Size <= 8)
+      Size = 8;
+    else if (Size > 8 && (Size % 16) != 0)
+      Size += 16 - (Size % 16);
+  }
+  Offset -= Size;
+  return Offset;
+}
+
+void GuardedPoolAllocator::reportError(uintptr_t AccessPtr, Error Error) {
+  if (SingletonPtr)
+    SingletonPtr->reportErrorInternal(AccessPtr, Error);
+}
+
+size_t GuardedPoolAllocator::getNearestSlot(uintptr_t Ptr) const {
+  if (Ptr <= GuardedPagePool + PageSize)
+    return 0;
+  if (Ptr > GuardedPagePoolEnd - PageSize)
+    return MaxSimultaneousAllocations - 1;
+
+  if (!isGuardPage(Ptr))
+    return addrToSlot(Ptr);
+
+  if (Ptr % PageSize <= PageSize / 2)
+    return addrToSlot(Ptr - PageSize); // Round down.
+  return addrToSlot(Ptr + PageSize);   // Round up.
+}
+
+Error GuardedPoolAllocator::diagnoseUnknownError(uintptr_t AccessPtr,
+                                                 AllocationMetadata **Meta) {
+  // Let's try and figure out what the source of this error is.
+  if (isGuardPage(AccessPtr)) {
+    size_t Slot = getNearestSlot(AccessPtr);
+    AllocationMetadata *SlotMeta = addrToMetadata(slotToAddr(Slot));
+
+    // Ensure that this slot was allocated once upon a time.
+    if (!SlotMeta->Addr)
+      return Error::UNKNOWN;
+    *Meta = SlotMeta;
+
+    if (SlotMeta->Addr < AccessPtr)
+      return Error::BUFFER_OVERFLOW;
+    return Error::BUFFER_UNDERFLOW;
+  }
+
+  // Access wasn't a guard page, check for use-after-free.
+  AllocationMetadata *SlotMeta = addrToMetadata(AccessPtr);
+  if (SlotMeta->IsDeallocated) {
+    *Meta = SlotMeta;
+    return Error::USE_AFTER_FREE;
+  }
+
+  // If we have reached here, the error is still unknown. There is no metadata
+  // available.
+  return Error::UNKNOWN;
+}
+
+// Prints the provided error and metadata information. Returns true if there is
+// additional context that can be provided, false otherwise (i.e. returns false
+// if Error == {UNKNOWN, INVALID_FREE without metadata}).
+bool printErrorType(Error Error, uintptr_t AccessPtr, AllocationMetadata *Meta,
+                    options::Printf_t Printf) {
+  switch (Error) {
+  case Error::UNKNOWN:
+    Printf("GWP-ASan couldn't automatically determine the source of the "
+           "memory error when accessing 0x%zx. It was likely caused by a wild "
+           "memory access into the GWP-ASan pool.\n",
+           AccessPtr);
+    return false;
+  case Error::USE_AFTER_FREE:
+    Printf("Use after free occurred when accessing memory at: 0x%zx\n",
+           AccessPtr);
+    break;
+  case Error::DOUBLE_FREE:
+    Printf("Double free occurred when trying to free memory at: 0x%zx\n",
+           AccessPtr);
+    break;
+  case Error::INVALID_FREE:
+    Printf(
+        "Invalid (wild) free occurred when trying to free memory at: 0x%zx\n",
+        AccessPtr);
+    // It's possible for an invalid free to fall onto a slot that has never been
+    // allocated. If this is the case, there is no valid metadata.
+    if (Meta == nullptr)
+      return false;
+    break;
+  case Error::BUFFER_OVERFLOW:
+    Printf("Buffer overflow occurred when accessing memory at: 0x%zx\n",
+           AccessPtr);
+    break;
+  case Error::BUFFER_UNDERFLOW:
+    Printf("Buffer underflow occurred when accessing memory at: 0x%zx\n",
+           AccessPtr);
+    break;
+  }
+
+  Printf("0x%zx is ", AccessPtr);
+  if (AccessPtr < Meta->Addr)
+    Printf("located %zu bytes to the left of a %zu-byte allocation located at "
+           "0x%zx\n",
+           Meta->Addr - AccessPtr, Meta->Size, Meta->Addr);
+  else if (AccessPtr > Meta->Addr)
+    Printf("located %zu bytes to the right of a %zu-byte allocation located at "
+           "0x%zx\n",
+           AccessPtr - Meta->Addr, Meta->Size, Meta->Addr);
+  else
+    Printf("a %zu-byte allocation\n", Meta->Size);
+  return true;
+}
+
+void printThreadInformation(Error Error, uintptr_t AccessPtr,
+                            AllocationMetadata *Meta,
+                            options::Printf_t Printf) {
+  Printf("0x%zx was allocated by thread ", AccessPtr);
+  if (Meta->AllocationTrace.ThreadID == UINT64_MAX)
+    Printf("UNKNOWN.\n");
+  else
+    Printf("%zu.\n", Meta->AllocationTrace.ThreadID);
+
+  if (Error == Error::USE_AFTER_FREE || Error == Error::DOUBLE_FREE) {
+    Printf("0x%zx was freed by thread ", AccessPtr);
+    if (Meta->AllocationTrace.ThreadID == UINT64_MAX)
+      Printf("UNKNOWN.\n");
+    else
+      Printf("%zu.\n", Meta->AllocationTrace.ThreadID);
+  }
+}
+
+struct ScopedEndOfReportDecorator {
+  ScopedEndOfReportDecorator(options::Printf_t Printf) : Printf(Printf) {}
+  ~ScopedEndOfReportDecorator() { Printf("*** End GWP-ASan report ***\n"); }
+  options::Printf_t Printf;
+};
+
+void GuardedPoolAllocator::reportErrorInternal(uintptr_t AccessPtr,
+                                               Error Error) {
+  if (!pointerIsMine(reinterpret_cast<void *>(AccessPtr))) {
+    return;
+  }
+
+  // Attempt to prevent races to re-use the same slot that triggered this error.
+  // This does not guarantee that there are no races, because another thread can
+  // take the locks during the time that the signal handler is being called.
+  PoolMutex.tryLock();
+
+  Printf("*** GWP-ASan detected a memory error ***\n");
+  ScopedEndOfReportDecorator Decorator(Printf);
+
+  AllocationMetadata *Meta = nullptr;
+
+  if (Error == Error::UNKNOWN) {
+    Error = diagnoseUnknownError(AccessPtr, &Meta);
+  } else {
+    size_t Slot = getNearestSlot(AccessPtr);
+    Meta = addrToMetadata(slotToAddr(Slot));
+    // Ensure that this slot has been previously allocated.
+    if (!Meta->Addr)
+      Meta = nullptr;
+  }
+
+  // Print the error information, and if there is no valid metadata, stop here.
+  if (!printErrorType(Error, AccessPtr, Meta, Printf)) {
+    return;
+  }
+
+  // Ensure that we have a valid metadata pointer from this point forward.
+  if (Meta == nullptr) {
+    Printf("GWP-ASan internal unreachable error. Metadata is not null.\n");
+    return;
+  }
+
+  printThreadInformation(Error, AccessPtr, Meta, Printf);
+  // TODO(hctim): Implement stack unwinding here. Ask the caller to provide us
+  // with the base pointer, and we unwind the stack to give a stack trace for
+  // the access.
+  // TODO(hctim): Implement dumping here of allocation/deallocation traces.
+}
+
+TLS_INITIAL_EXEC uint64_t GuardedPoolAllocator::NextSampleCounter = 0;
+} // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
new file mode 100644
index 0000000000000..af2a5044a14f7
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
@@ -0,0 +1,254 @@
+//===-- guarded_pool_allocator.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_GUARDED_POOL_ALLOCATOR_H_
+#define GWP_ASAN_GUARDED_POOL_ALLOCATOR_H_
+
+#include "gwp_asan/definitions.h"
+#include "gwp_asan/mutex.h"
+#include "gwp_asan/options.h"
+#include "gwp_asan/random.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace gwp_asan {
+// This class is the primary implementation of the allocator portion of GWP-
+// ASan. It is the sole owner of the pool of sequentially allocated guarded
+// slots. It should always be treated as a singleton.
+
+// Functions in the public interface of this class are thread-compatible until
+// init() is called, at which point they become thread-safe (unless specified
+// otherwise).
+class GuardedPoolAllocator {
+public:
+  static constexpr uint64_t kInvalidThreadID = UINT64_MAX;
+
+  enum class Error {
+    UNKNOWN,
+    USE_AFTER_FREE,
+    DOUBLE_FREE,
+    INVALID_FREE,
+    BUFFER_OVERFLOW,
+    BUFFER_UNDERFLOW
+  };
+
+  struct AllocationMetadata {
+    // Maximum number of stack trace frames to collect for allocations + frees.
+    // TODO(hctim): Implement stack frame compression, a-la Chromium.
+    // Currently the maximum stack frames is one, as we don't collect traces.
+    static constexpr size_t kMaximumStackFrames = 1;
+
+    // Records the given allocation metadata into this struct. In the future,
+    // this will collect the allocation trace as well.
+    void RecordAllocation(uintptr_t Addr, size_t Size);
+
+    // Record that this allocation is now deallocated. In future, this will
+    // collect the deallocation trace as well.
+    void RecordDeallocation();
+
+    struct CallSiteInfo {
+      // The backtrace to the allocation/deallocation. If the first value is
+      // zero, we did not collect a trace.
+      uintptr_t Trace[kMaximumStackFrames] = {};
+      // The thread ID for this trace, or kInvalidThreadID if not available.
+      uint64_t ThreadID = kInvalidThreadID;
+    };
+
+    // The address of this allocation.
+    uintptr_t Addr = 0;
+    // Represents the actual size of the allocation.
+    size_t Size = 0;
+
+    CallSiteInfo AllocationTrace;
+    CallSiteInfo DeallocationTrace;
+
+    // Whether this allocation has been deallocated yet.
+    bool IsDeallocated = false;
+  };
+
+  // During program startup, we must ensure that memory allocations do not land
+  // in this allocation pool if the allocator decides to runtime-disable
+  // GWP-ASan. The constructor value-initialises the class such that if no
+  // further initialisation takes place, calls to shouldSample() and
+  // pointerIsMine() will return false.
+  constexpr GuardedPoolAllocator(){};
+  GuardedPoolAllocator(const GuardedPoolAllocator &) = delete;
+  GuardedPoolAllocator &operator=(const GuardedPoolAllocator &) = delete;
+
+  // Note: This class is expected to be a singleton for the lifetime of the
+  // program. If this object is initialised, it will leak the guarded page pool
+  // and metadata allocations during destruction. We can't clean up these areas
+  // as this may cause a use-after-free on shutdown.
+  ~GuardedPoolAllocator() = default;
+
+  // Initialise the rest of the members of this class. Create the allocation
+  // pool using the provided options. See options.inc for runtime configuration
+  // options.
+  void init(const options::Options &Opts);
+
+  // Return whether the allocation should be randomly chosen for sampling.
+  ALWAYS_INLINE bool shouldSample() {
+    // NextSampleCounter == 0 means we "should regenerate the counter".
+    //                   == 1 means we "should sample this allocation".
+    if (UNLIKELY(NextSampleCounter == 0)) {
+      // GuardedPagePoolEnd == 0 if GWP-ASan is disabled.
+      if (UNLIKELY(GuardedPagePoolEnd == 0))
+        return false;
+      NextSampleCounter = (getRandomUnsigned32() % AdjustedSampleRate) + 1;
+    }
+
+    return UNLIKELY(--NextSampleCounter == 0);
+  }
+
+  // Returns whether the provided pointer is a current sampled allocation that
+  // is owned by this pool.
+  ALWAYS_INLINE bool pointerIsMine(const void *Ptr) const {
+    uintptr_t P = reinterpret_cast<uintptr_t>(Ptr);
+    return GuardedPagePool <= P && P < GuardedPagePoolEnd;
+  }
+
+  // Allocate memory in a guarded slot, and return a pointer to the new
+  // allocation. Returns nullptr if the pool is empty, the requested size is too
+  // large for this pool to handle, or the requested size is zero.
+  void *allocate(size_t Size);
+
+  // Deallocate memory in a guarded slot. The provided pointer must have been
+  // allocated using this pool. This will set the guarded slot as inaccessible.
+  void deallocate(void *Ptr);
+
+  // Returns the size of the allocation at Ptr.
+  size_t getSize(const void *Ptr);
+
+  // Returns the largest allocation that is supported by this pool. Any
+  // allocations larger than this should go to the regular system allocator.
+  size_t maximumAllocationSize() const;
+
+  // Dumps an error report (including allocation and deallocation stack traces).
+  // An optional error may be provided if the caller knows what the error is
+  // ahead of time. This is primarily a helper function to locate the static
+  // singleton pointer and call the internal version of this function. This
+  // method is never thread safe, and should only be called when fatal errors
+  // occur.
+  static void reportError(uintptr_t AccessPtr, Error Error = Error::UNKNOWN);
+
+private:
+  static constexpr size_t kInvalidSlotID = SIZE_MAX;
+
+  // These functions anonymously map memory or change the permissions of mapped
+  // memory into this process in a platform-specific way. Pointer and size
+  // arguments are expected to be page-aligned. These functions will never
+  // return on error, instead electing to kill the calling process on failure.
+  // Note that memory is initially mapped inaccessible. In order for RW
+  // mappings, call mapMemory() followed by markReadWrite() on the returned
+  // pointer.
+  void *mapMemory(size_t Size) const;
+  void markReadWrite(void *Ptr, size_t Size) const;
+  void markInaccessible(void *Ptr, size_t Size) const;
+
+  // Get the current thread ID, or kInvalidThreadID if failure. Note: This
+  // implementation is platform-specific.
+  static uint64_t getThreadID();
+
+  // Get the page size from the platform-specific implementation. Only needs to
+  // be called once, and the result should be cached in PageSize in this class.
+  static size_t getPlatformPageSize();
+
+  // Install the SIGSEGV crash handler for printing use-after-free and heap-
+  // buffer-{under|over}flow exceptions. This is platform specific as even
+  // though POSIX and Windows both support registering handlers through
+  // signal(), we have to use platform-specific signal handlers to obtain the
+  // address that caused the SIGSEGV exception.
+  static void installSignalHandlers();
+
+  // Returns the index of the slot that this pointer resides in. If the pointer
+  // is not owned by this pool, the result is undefined.
+  size_t addrToSlot(uintptr_t Ptr) const;
+
+  // Returns the address of the N-th guarded slot.
+  uintptr_t slotToAddr(size_t N) const;
+
+  // Returns a pointer to the metadata for the owned pointer. If the pointer is
+  // not owned by this pool, the result is undefined.
+  AllocationMetadata *addrToMetadata(uintptr_t Ptr) const;
+
+  // Returns the address of the page that this pointer resides in.
+  uintptr_t getPageAddr(uintptr_t Ptr) const;
+
+  // Gets the nearest slot to the provided address.
+  size_t getNearestSlot(uintptr_t Ptr) const;
+
+  // Returns whether the provided pointer is a guard page or not. The pointer
+  // must be within memory owned by this pool, else the result is undefined.
+  bool isGuardPage(uintptr_t Ptr) const;
+
+  // Reserve a slot for a new guarded allocation. Returns kInvalidSlotID if no
+  // slot is available to be reserved.
+  size_t reserveSlot();
+
+  // Unreserve the guarded slot.
+  void freeSlot(size_t SlotIndex);
+
+  // Returns the offset (in bytes) between the start of a guarded slot and where
+  // the start of the allocation should take place. Determined using the size of
+  // the allocation and the options provided at init-time.
+  uintptr_t allocationSlotOffset(size_t AllocationSize) const;
+
+  // Returns the diagnosis for an unknown error. If the diagnosis is not
+  // Error::INVALID_FREE or Error::UNKNOWN, the metadata for the slot
+  // responsible for the error is placed in *Meta.
+  Error diagnoseUnknownError(uintptr_t AccessPtr, AllocationMetadata **Meta);
+
+  void reportErrorInternal(uintptr_t AccessPtr, Error Error);
+
+  // Cached page size for this system in bytes.
+  size_t PageSize = 0;
+
+  // A mutex to protect the guarded slot and metadata pool for this class.
+  Mutex PoolMutex;
+  // The number of guarded slots that this pool holds.
+  size_t MaxSimultaneousAllocations = 0;
+  // Record the number allocations that we've sampled. We store this amount so
+  // that we don't randomly choose to recycle a slot that previously had an
+  // allocation before all the slots have been utilised.
+  size_t NumSampledAllocations = 0;
+  // Pointer to the pool of guarded slots. Note that this points to the start of
+  // the pool (which is a guard page), not a pointer to the first guarded page.
+  uintptr_t GuardedPagePool = UINTPTR_MAX;
+  uintptr_t GuardedPagePoolEnd = 0;
+  // Pointer to the allocation metadata (allocation/deallocation stack traces),
+  // if any.
+  AllocationMetadata *Metadata = nullptr;
+
+  // Pointer to an array of free slot indexes.
+  size_t *FreeSlots = nullptr;
+  // The current length of the list of free slots.
+  size_t FreeSlotsLength = 0;
+
+  // See options.{h, inc} for more information.
+  bool PerfectlyRightAlign = false;
+
+  // Printf function supplied by the implementing allocator. We can't (in
+  // general) use printf() from the cstdlib as it may malloc(), causing infinite
+  // recursion.
+  options::Printf_t Printf = nullptr;
+
+  // The adjusted sample rate for allocation sampling. Default *must* be
+  // nonzero, as dynamic initialisation may call malloc (e.g. from libstdc++)
+  // before GPA::init() is called. This would cause an error in shouldSample(),
+  // where we would calculate modulo zero. This value is set UINT32_MAX, as when
+  // GWP-ASan is disabled, we wish to never spend wasted cycles recalculating
+  // the sample rate.
+  uint32_t AdjustedSampleRate = UINT32_MAX;
+  // Thread-local decrementing counter that indicates that a given allocation
+  // should be sampled when it reaches zero.
+  static TLS_INITIAL_EXEC uint64_t NextSampleCounter;
+};
+} // namespace gwp_asan
+
+#endif // GWP_ASAN_GUARDED_POOL_ALLOCATOR_H_
diff --git a/compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.cpp b/compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.cpp
new file mode 100644
index 0000000000000..8bc0aefeec445
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.cpp
@@ -0,0 +1,96 @@
+//===-- guarded_pool_allocator_posix.cpp ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/guarded_pool_allocator.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace gwp_asan {
+
+void *GuardedPoolAllocator::mapMemory(size_t Size) const {
+  void *Ptr =
+      mmap(nullptr, Size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+  if (Ptr == MAP_FAILED) {
+    Printf("Failed to map guarded pool allocator memory, errno: %d\n", errno);
+    Printf("  mmap(nullptr, %zu, ...) failed.\n", Size);
+    exit(EXIT_FAILURE);
+  }
+  return Ptr;
+}
+
+void GuardedPoolAllocator::markReadWrite(void *Ptr, size_t Size) const {
+  if (mprotect(Ptr, Size, PROT_READ | PROT_WRITE) != 0) {
+    Printf("Failed to set guarded pool allocator memory at as RW, errno: %d\n",
+           errno);
+    Printf("  mprotect(%p, %zu, RW) failed.\n", Ptr, Size);
+    exit(EXIT_FAILURE);
+  }
+}
+
+void GuardedPoolAllocator::markInaccessible(void *Ptr, size_t Size) const {
+  // mmap() a PROT_NONE page over the address to release it to the system, if
+  // we used mprotect() here the system would count pages in the quarantine
+  // against the RSS.
+  if (mmap(Ptr, Size, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1,
+           0) == MAP_FAILED) {
+    Printf("Failed to set guarded pool allocator memory as inaccessible, "
+           "errno: %d\n",
+           errno);
+    Printf("  mmap(%p, %zu, NONE, ...) failed.\n", Ptr, Size);
+    exit(EXIT_FAILURE);
+  }
+}
+
+size_t GuardedPoolAllocator::getPlatformPageSize() {
+  return sysconf(_SC_PAGESIZE);
+}
+
+struct sigaction PreviousHandler;
+
+static void sigSegvHandler(int sig, siginfo_t *info, void *ucontext) {
+  gwp_asan::GuardedPoolAllocator::reportError(
+      reinterpret_cast<uintptr_t>(info->si_addr));
+
+  // Process any previous handlers.
+  if (PreviousHandler.sa_flags & SA_SIGINFO) {
+    PreviousHandler.sa_sigaction(sig, info, ucontext);
+  } else if (PreviousHandler.sa_handler == SIG_IGN ||
+             PreviousHandler.sa_handler == SIG_DFL) {
+    // If the previous handler was the default handler, or was ignoring this
+    // signal, install the default handler and re-raise the signal in order to
+    // get a core dump and terminate this process.
+    signal(SIGSEGV, SIG_DFL);
+    raise(SIGSEGV);
+  } else {
+    PreviousHandler.sa_handler(sig);
+  }
+}
+
+void GuardedPoolAllocator::installSignalHandlers() {
+  struct sigaction Action;
+  Action.sa_sigaction = sigSegvHandler;
+  Action.sa_flags = SA_SIGINFO;
+  sigaction(SIGSEGV, &Action, &PreviousHandler);
+}
+
+uint64_t GuardedPoolAllocator::getThreadID() {
+#ifdef SYS_gettid
+  return syscall(SYS_gettid);
+#else
+  return kInvalidThreadID;
+#endif
+}
+
+} // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
index 6a59be5bca615..f2f72c858619a 100644
--- a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
@@ -9,7 +9,8 @@ set(GWP_ASAN_UNITTEST_CFLAGS
 file(GLOB GWP_ASAN_HEADERS ../*.h)
 file(GLOB GWP_ASAN_UNITTESTS *.cpp)
 set(GWP_ASAN_UNIT_TEST_HEADERS
-  ${GWP_ASAN_HEADERS})
+  ${GWP_ASAN_HEADERS}
+  harness.h)
 
 add_custom_target(GwpAsanUnitTests)
 set_target_properties(GwpAsanUnitTests PROPERTIES FOLDER "Compiler-RT Tests")
@@ -26,8 +27,11 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST GWP_ASAN_SUPPORTED_ARCH)
 
   set(GWP_ASAN_TEST_RUNTIME RTGwpAsanTest.${arch})
 
+  # RTSanitizerCommonNoTermination(NoLibc) required for __sanitizer::Printf.
   set(GWP_ASAN_TEST_RUNTIME_OBJECTS
-    $<TARGET_OBJECTS:RTGwpAsan.${arch}>)
+    $<TARGET_OBJECTS:RTGwpAsan.${arch}>
+    $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+    $<TARGET_OBJECTS:RTSanitizerCommonNoLibc.${arch}>)
 
   add_library(${GWP_ASAN_TEST_RUNTIME} STATIC
     ${GWP_ASAN_TEST_RUNTIME_OBJECTS})
diff --git a/compiler-rt/lib/gwp_asan/tests/alignment.cpp b/compiler-rt/lib/gwp_asan/tests/alignment.cpp
new file mode 100644
index 0000000000000..ffb91d5b57af1
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/alignment.cpp
@@ -0,0 +1,27 @@
+//===-- alignment.cc --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/tests/harness.h"
+
+TEST_F(DefaultGuardedPoolAllocator, BasicAllocation) {
+  std::vector<std::pair<int, int>> AllocSizeToAlignment = {
+      {1, 1},   {2, 2},   {3, 4},       {4, 4},       {5, 8},   {7, 8},
+      {8, 8},   {9, 16},  {15, 16},     {16, 16},     {17, 16}, {31, 16},
+      {32, 16}, {33, 16}, {4095, 4096}, {4096, 4096},
+  };
+
+  for (const auto &KV : AllocSizeToAlignment) {
+    void *Ptr = GPA.allocate(KV.first);
+    EXPECT_NE(nullptr, Ptr);
+
+    // Check the alignment of the pointer is as expected.
+    EXPECT_EQ(0u, reinterpret_cast<uintptr_t>(Ptr) % KV.second);
+
+    GPA.deallocate(Ptr);
+  }
+}
diff --git a/compiler-rt/lib/gwp_asan/tests/basic.cpp b/compiler-rt/lib/gwp_asan/tests/basic.cpp
new file mode 100644
index 0000000000000..9c80e2765104b
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/basic.cpp
@@ -0,0 +1,60 @@
+//===-- basic.cc ------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/tests/harness.h"
+
+TEST_F(CustomGuardedPoolAllocator, BasicAllocation) {
+  InitNumSlots(1);
+  void *Ptr = GPA.allocate(1);
+  EXPECT_NE(nullptr, Ptr);
+  EXPECT_TRUE(GPA.pointerIsMine(Ptr));
+  EXPECT_EQ(1u, GPA.getSize(Ptr));
+  GPA.deallocate(Ptr);
+}
+
+TEST_F(DefaultGuardedPoolAllocator, NullptrIsNotMine) {
+  EXPECT_FALSE(GPA.pointerIsMine(nullptr));
+}
+
+TEST_F(CustomGuardedPoolAllocator, SizedAllocations) {
+  InitNumSlots(1);
+
+  std::size_t MaxAllocSize = GPA.maximumAllocationSize();
+  EXPECT_TRUE(MaxAllocSize > 0);
+
+  for (unsigned AllocSize = 1; AllocSize <= MaxAllocSize; AllocSize <<= 1) {
+    void *Ptr = GPA.allocate(AllocSize);
+    EXPECT_NE(nullptr, Ptr);
+    EXPECT_TRUE(GPA.pointerIsMine(Ptr));
+    EXPECT_EQ(AllocSize, GPA.getSize(Ptr));
+    GPA.deallocate(Ptr);
+  }
+}
+
+TEST_F(DefaultGuardedPoolAllocator, TooLargeAllocation) {
+  EXPECT_EQ(nullptr, GPA.allocate(GPA.maximumAllocationSize() + 1));
+}
+
+TEST_F(CustomGuardedPoolAllocator, AllocAllSlots) {
+  constexpr unsigned kNumSlots = 128;
+  InitNumSlots(kNumSlots);
+  void *Ptrs[kNumSlots];
+  for (unsigned i = 0; i < kNumSlots; ++i) {
+    Ptrs[i] = GPA.allocate(1);
+    EXPECT_NE(nullptr, Ptrs[i]);
+    EXPECT_TRUE(GPA.pointerIsMine(Ptrs[i]));
+  }
+
+  // This allocation should fail as all the slots are used.
+  void *Ptr = GPA.allocate(1);
+  EXPECT_EQ(nullptr, Ptr);
+  EXPECT_FALSE(GPA.pointerIsMine(nullptr));
+
+  for (unsigned i = 0; i < kNumSlots; ++i)
+    GPA.deallocate(Ptrs[i]);
+}
diff --git a/compiler-rt/lib/gwp_asan/tests/harness.h b/compiler-rt/lib/gwp_asan/tests/harness.h
new file mode 100644
index 0000000000000..987564dd9afe9
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/harness.h
@@ -0,0 +1,60 @@
+//===-- harness.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GWP_ASAN_TESTS_HARNESS_H_
+#define GWP_ASAN_TESTS_HARNESS_H_
+
+#include "gtest/gtest.h"
+
+// Include sanitizer_common first as gwp_asan/guarded_pool_allocator.h
+// transiently includes definitions.h, which overwrites some of the definitions
+// in sanitizer_common.
+#include "sanitizer_common/sanitizer_common.h"
+
+#include "gwp_asan/guarded_pool_allocator.h"
+#include "gwp_asan/options.h"
+
+class DefaultGuardedPoolAllocator : public ::testing::Test {
+public:
+  DefaultGuardedPoolAllocator() {
+    gwp_asan::options::Options Opts;
+    Opts.setDefaults();
+    MaxSimultaneousAllocations = Opts.MaxSimultaneousAllocations;
+
+    Opts.Printf = __sanitizer::Printf;
+    GPA.init(Opts);
+  }
+
+protected:
+  gwp_asan::GuardedPoolAllocator GPA;
+  decltype(gwp_asan::options::Options::MaxSimultaneousAllocations)
+      MaxSimultaneousAllocations;
+};
+
+class CustomGuardedPoolAllocator : public ::testing::Test {
+public:
+  void
+  InitNumSlots(decltype(gwp_asan::options::Options::MaxSimultaneousAllocations)
+                   MaxSimultaneousAllocationsArg) {
+    gwp_asan::options::Options Opts;
+    Opts.setDefaults();
+
+    Opts.MaxSimultaneousAllocations = MaxSimultaneousAllocationsArg;
+    MaxSimultaneousAllocations = MaxSimultaneousAllocationsArg;
+
+    Opts.Printf = __sanitizer::Printf;
+    GPA.init(Opts);
+  }
+
+protected:
+  gwp_asan::GuardedPoolAllocator GPA;
+  decltype(gwp_asan::options::Options::MaxSimultaneousAllocations)
+      MaxSimultaneousAllocations;
+};
+
+#endif // GWP_ASAN_TESTS_HARNESS_H_
diff --git a/compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp b/compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp
new file mode 100644
index 0000000000000..e243739023193
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp
@@ -0,0 +1,72 @@
+//===-- slot_reuse.cc -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/tests/harness.h"
+
+void singleByteGoodAllocDealloc(gwp_asan::GuardedPoolAllocator *GPA) {
+  void *Ptr = GPA->allocate(1);
+  EXPECT_NE(nullptr, Ptr);
+  EXPECT_TRUE(GPA->pointerIsMine(Ptr));
+  EXPECT_EQ(1u, GPA->getSize(Ptr));
+  GPA->deallocate(Ptr);
+}
+
+TEST_F(CustomGuardedPoolAllocator, EnsureReuseOfQuarantine1) {
+  InitNumSlots(1);
+  for (unsigned i = 0; i < 128; ++i)
+    singleByteGoodAllocDealloc(&GPA);
+}
+
+TEST_F(CustomGuardedPoolAllocator, EnsureReuseOfQuarantine2) {
+  InitNumSlots(2);
+  for (unsigned i = 0; i < 128; ++i)
+    singleByteGoodAllocDealloc(&GPA);
+}
+
+TEST_F(CustomGuardedPoolAllocator, EnsureReuseOfQuarantine127) {
+  InitNumSlots(127);
+  for (unsigned i = 0; i < 128; ++i)
+    singleByteGoodAllocDealloc(&GPA);
+}
+
+// This test ensures that our slots are not reused ahead of time. We increase
+// the use-after-free detection by not reusing slots until all of them have been
+// allocated. This is done by always using the slots from left-to-right in the
+// pool before we used each slot once, at which point random selection takes
+// over.
+void runNoReuseBeforeNecessary(gwp_asan::GuardedPoolAllocator *GPA,
+                               unsigned PoolSize) {
+  std::set<void *> Ptrs;
+  for (unsigned i = 0; i < PoolSize; ++i) {
+    void *Ptr = GPA->allocate(1);
+
+    EXPECT_TRUE(GPA->pointerIsMine(Ptr));
+    EXPECT_EQ(0u, Ptrs.count(Ptr));
+
+    Ptrs.insert(Ptr);
+    GPA->deallocate(Ptr);
+  }
+}
+
+TEST_F(CustomGuardedPoolAllocator, NoReuseBeforeNecessary2) {
+  constexpr unsigned kPoolSize = 2;
+  InitNumSlots(kPoolSize);
+  runNoReuseBeforeNecessary(&GPA, kPoolSize);
+}
+
+TEST_F(CustomGuardedPoolAllocator, NoReuseBeforeNecessary128) {
+  constexpr unsigned kPoolSize = 128;
+  InitNumSlots(kPoolSize);
+  runNoReuseBeforeNecessary(&GPA, kPoolSize);
+}
+
+TEST_F(CustomGuardedPoolAllocator, NoReuseBeforeNecessary129) {
+  constexpr unsigned kPoolSize = 129;
+  InitNumSlots(kPoolSize);
+  runNoReuseBeforeNecessary(&GPA, kPoolSize);
+}
diff --git a/compiler-rt/lib/gwp_asan/tests/thread_contention.cpp b/compiler-rt/lib/gwp_asan/tests/thread_contention.cpp
new file mode 100644
index 0000000000000..1c00f4413dde4
--- /dev/null
+++ b/compiler-rt/lib/gwp_asan/tests/thread_contention.cpp
@@ -0,0 +1,69 @@
+//===-- thread_contention.cc ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gwp_asan/tests/harness.h"
+
+// Note: Compilation of <atomic> and <thread> are extremely expensive for
+// non-opt builds of clang.
+#include <atomic>
+#include <cstdlib>
+#include <thread>
+#include <vector>
+
+void asyncTask(gwp_asan::GuardedPoolAllocator *GPA,
+               std::atomic<bool> *StartingGun, unsigned NumIterations) {
+  while (!*StartingGun) {
+    // Wait for starting gun.
+  }
+
+  // Get ourselves a new allocation.
+  for (unsigned i = 0; i < NumIterations; ++i) {
+    volatile char *Ptr = reinterpret_cast<volatile char *>(
+        GPA->allocate(GPA->maximumAllocationSize()));
+    // Do any other threads have access to this page?
+    EXPECT_EQ(*Ptr, 0);
+
+    // Mark the page as from malloc. Wait to see if another thread also takes
+    // this page.
+    *Ptr = 'A';
+    std::this_thread::sleep_for(std::chrono::nanoseconds(10000));
+
+    // Check we still own the page.
+    EXPECT_EQ(*Ptr, 'A');
+
+    // And now release it.
+    *Ptr = 0;
+    GPA->deallocate(const_cast<char *>(Ptr));
+  }
+}
+
+void runThreadContentionTest(unsigned NumThreads, unsigned NumIterations,
+                             gwp_asan::GuardedPoolAllocator *GPA) {
+
+  std::atomic<bool> StartingGun{false};
+  std::vector<std::thread> Threads;
+  if (std::thread::hardware_concurrency() < NumThreads) {
+    NumThreads = std::thread::hardware_concurrency();
+  }
+
+  for (unsigned i = 0; i < NumThreads; ++i) {
+    Threads.emplace_back(asyncTask, GPA, &StartingGun, NumIterations);
+  }
+
+  StartingGun = true;
+
+  for (auto &T : Threads)
+    T.join();
+}
+
+TEST_F(CustomGuardedPoolAllocator, ThreadContention) {
+  unsigned NumThreads = 4;
+  unsigned NumIterations = 10000;
+  InitNumSlots(NumThreads);
+  runThreadContentionTest(NumThreads, NumIterations, &GPA);
+}

From e34d1a4e07b8d477c279388e1a05f6dd64ab0ada Mon Sep 17 00:00:00 2001
From: Pengxuan Zheng <pzheng@quicinc.com>
Date: Wed, 5 Jun 2019 19:44:08 +0000
Subject: [PATCH 1149/1176] [cmake] Remove duplicate TestingSupport library for
 linking

Summary: This patch cleans up a duplicate use of TestingSupport library.

Subscribers: mgorny, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D62924

llvm-svn: 362637
---
 clang/unittests/Tooling/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt
index a3d2fc2842f80..5039db3f651c4 100644
--- a/clang/unittests/Tooling/CMakeLists.txt
+++ b/clang/unittests/Tooling/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
   Support
-  TestingSupport
   )
 
 # By default MSVC has a 2^16 limit on the number of sections in an object file,

From 53572d0470c92d3c10ab46b681d579680ad546c2 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dan433584@gmail.com>
Date: Wed, 5 Jun 2019 20:01:01 +0000
Subject: [PATCH 1150/1176] [WebAssembly] Limit PIC support to the Emscripten
 target

The current PIC support currently only works with Emscripten, so
disable it for other targets.

This is the PIC portion of https://reviews.llvm.org/D62542.

Reviewed By: dschuff, sbc100

llvm-svn: 362638
---
 .../Target/WebAssembly/WebAssemblyTargetMachine.cpp | 13 +++++++++++--
 llvm/test/CodeGen/WebAssembly/address-offsets.ll    |  2 +-
 llvm/test/CodeGen/WebAssembly/call-pic.ll           |  2 +-
 llvm/test/CodeGen/WebAssembly/load-store-pic.ll     |  2 +-
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 8b73e827856e7..937d272d2139b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -83,13 +83,22 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
 // WebAssembly Lowering public interface.
 //===----------------------------------------------------------------------===//
 
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM,
+                                           const Triple &TT) {
   if (!RM.hasValue()) {
     // Default to static relocation model.  This should always be more optimial
     // than PIC since the static linker can determine all global addresses and
     // assume direct function calls.
     return Reloc::Static;
   }
+
+  if (!TT.isOSEmscripten()) {
+    // Relocation modes other than static are currently implemented in a way
+    // that only works for Emscripten, so disable them if we aren't targeting
+    // Emscripten.
+    return Reloc::Static;
+  }
+
   return *RM;
 }
 
@@ -102,7 +111,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     : LLVMTargetMachine(T,
                         TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
                                          : "e-m:e-p:32:32-i64:64-n32:64-S128",
-                        TT, CPU, FS, Options, getEffectiveRelocModel(RM),
+                        TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT),
                         getEffectiveCodeModel(CM, CodeModel::Large), OL),
       TLOF(new WebAssemblyTargetObjectFile()) {
   // WebAssembly type-checks instructions, but a noreturn function with a return
diff --git a/llvm/test/CodeGen/WebAssembly/address-offsets.ll b/llvm/test/CodeGen/WebAssembly/address-offsets.ll
index 4f522335907d1..7fe40a1dbec6e 100644
--- a/llvm/test/CodeGen/WebAssembly/address-offsets.ll
+++ b/llvm/test/CodeGen/WebAssembly/address-offsets.ll
@@ -6,7 +6,7 @@
 ; a variety of circumstances.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+target triple = "wasm32-unknown-emscripten"
 
 @g = external global [0 x i32], align 4
 
diff --git a/llvm/test/CodeGen/WebAssembly/call-pic.ll b/llvm/test/CodeGen/WebAssembly/call-pic.ll
index 53a397ac76960..631fedbfb24d2 100644
--- a/llvm/test/CodeGen/WebAssembly/call-pic.ll
+++ b/llvm/test/CodeGen/WebAssembly/call-pic.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -relocation-model=pic -fast-isel=1 | FileCheck %s
 ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -relocation-model=pic -fast-isel=0 | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+target triple = "wasm32-unknown-emscripten"
 
 declare i32 @foo()
 declare i32 @bar()
diff --git a/llvm/test/CodeGen/WebAssembly/load-store-pic.ll b/llvm/test/CodeGen/WebAssembly/load-store-pic.ll
index 2f7f34ddf6fe7..9090c3520b570 100644
--- a/llvm/test/CodeGen/WebAssembly/load-store-pic.ll
+++ b/llvm/test/CodeGen/WebAssembly/load-store-pic.ll
@@ -6,7 +6,7 @@
 ; We test here both with and without fast-isel.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
+target triple = "wasm32-unknown-emscripten"
 
 @hidden_global         = external hidden global i32
 @hidden_global_array   = external hidden global [10 x i32]

From ecf3ae4a7032d2a5ee42433d4e5d002ac349adbf Mon Sep 17 00:00:00 2001
From: Davide Italiano <davide@freebsd.org>
Date: Wed, 5 Jun 2019 20:23:03 +0000
Subject: [PATCH 1151/1176] [NativeProcessDarwin] Remove dead code. NFCI.

llvm-svn: 362639
---
 .../source/Plugins/Process/Darwin/NativeProcessDarwin.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp b/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp
index 1d7851d2dbe04..fe7de27e0ee68 100644
--- a/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp
+++ b/lldb/source/Plugins/Process/Darwin/NativeProcessDarwin.cpp
@@ -171,14 +171,6 @@ Status NativeProcessDarwin::FinalizeLaunch(LaunchFlavor launch_flavor,
   Status error;
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS));
 
-#if 0
-    m_path = path;
-    size_t i;
-    char const *arg;
-    for (i=0; (arg = argv[i]) != NULL; i++)
-        m_args.push_back(arg);
-#endif
-
   error = StartExceptionThread();
   if (!error.Success()) {
     if (log)

From 0f8a764e8fa831c037f07c109aea947ec4a1e4f5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 5 Jun 2019 20:32:25 +0000
Subject: [PATCH 1152/1176] AMDGPU: Fix using 2 different enums for same
 operand flags

These enums are really for the same namespace of flags set on
arbitrary MachineOperands, so merge them to avoid value collisions.

llvm-svn: 362640
---
 llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp |  4 ++--
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp       |  4 ++--
 llvm/lib/Target/AMDGPU/SIInstrInfo.h         | 11 ++++-------
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ce19f2559ef88..017d4ad1625f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -112,10 +112,10 @@ const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
   const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
   SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
 
-  if (MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_FORWARD)
+  if (MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_FORWARD)
     return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
 
-  assert(MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_BACKWARD);
+  assert(MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_BACKWARD);
   return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c25be611cb7c7..1c3c52ba02c28 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1532,7 +1532,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
-      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
+      .addMBB(&DestBB, MO_LONG_BRANCH_FORWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
@@ -1542,7 +1542,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub0)
       .addReg(PCReg, 0, AMDGPU::sub0)
-      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
+      .addMBB(&DestBB, MO_LONG_BRANCH_BACKWARD);
     BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
       .addReg(PCReg, RegState::Define, AMDGPU::sub1)
       .addReg(PCReg, 0, AMDGPU::sub1)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c4038b3dc29b0..3dfb4deb0fe45 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -157,7 +157,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     MO_REL32 = 4,
     MO_REL32_LO = 4,
     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
-    MO_REL32_HI = 5
+    MO_REL32_HI = 5,
+
+    MO_LONG_BRANCH_FORWARD = 6,
+    MO_LONG_BRANCH_BACKWARD = 7
   };
 
   explicit SIInstrInfo(const GCNSubtarget &ST);
@@ -1030,12 +1033,6 @@ namespace AMDGPU {
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
 
-  // For MachineOperands.
-  enum TargetFlags {
-    TF_LONG_BRANCH_FORWARD = 1 << 0,
-    TF_LONG_BRANCH_BACKWARD = 1 << 1
-  };
-
 } // end namespace AMDGPU
 
 namespace SI {

From 4fb580c31475aa5cf1481927ed6cfefa7a14d5aa Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 5 Jun 2019 20:32:32 +0000
Subject: [PATCH 1153/1176] AMDGPU: Remove amdgpu-max-work-group-size attribute

This has been deprecated for a long time, and mesa recently switched
to amdgpu-flat-work-group-size.

llvm-svn: 362641
---
 llvm/docs/AMDGPUUsage.rst                             |  2 --
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp            | 11 +----------
 .../CodeGen/AMDGPU/large-work-group-promote-alloca.ll |  2 +-
 .../CodeGen/AMDGPU/promote-alloca-calling-conv.ll     |  2 +-
 4 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 38a8cbf2b7dc2..f443604557143 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -424,8 +424,6 @@ The AMDGPU backend supports the following LLVM IR attributes.
                                              argument block size for the implicit arguments. This
                                              varies by OS and language (for OpenCL see
                                              :ref:`opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table`).
-     "amdgpu-max-work-group-size"="n"        Specify the maximum work-group size that will be specifed
-                                             when the kernel is dispatched.
      "amdgpu-num-sgpr"="n"                   Specifies the number of SGPRs to use. Generated by
                                              the ``amdgpu_num_sgpr`` CLANG attribute [CLANG-ATTR]_.
      "amdgpu-num-vgpr"="n"                   Specifies the number of VGPRs to use. Generated by the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 09b806bd06a55..2a0f5654e5a89 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -340,12 +340,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
   std::pair<unsigned, unsigned> Default =
     getDefaultFlatWorkGroupSize(F.getCallingConv());
 
-  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
-  // starts using "amdgpu-flat-work-group-size" attribute.
-  Default.second = AMDGPU::getIntegerAttribute(
-    F, "amdgpu-max-work-group-size", Default.second);
-  Default.first = std::min(Default.first, Default.second);
-
   // Requested minimum/maximum flat work group sizes.
   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
     F, "amdgpu-flat-work-group-size", Default);
@@ -379,10 +373,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
     getMaxWavesPerEU(FlatWorkGroupSizes.second);
   bool RequestedFlatWorkGroupSize = false;
 
-  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
-  // starts using "amdgpu-flat-work-group-size" attribute.
-  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
-      F.hasFnAttribute("amdgpu-flat-work-group-size")) {
+  if (F.hasFnAttribute("amdgpu-flat-work-group-size")) {
     Default.first = MinImpliedByFlatWorkGroupSize;
     RequestedFlatWorkGroupSize = true;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
index 4fbf5e657dffc..9129c56bfd472 100644
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -264,7 +264,7 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="63" }
+attributes #0 = { nounwind "amdgpu-flat-work-group-size"="63,63" }
 attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
 attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" }
 attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index afe05cd79b2a4..15e8604930d59 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -96,5 +96,5 @@ entry:
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
+attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
 attributes #1 = { nounwind readnone }

From 607c8a9d1481312acb421425ac8c8df56a0c9012 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 5 Jun 2019 20:37:47 +0000
Subject: [PATCH 1154/1176] IR: make getParamByValType Just Work. NFC.

Most parts of LLVM don't care whether the byval type is derived from an
explicit Attribute or from the parameter's pointee type, so it makes
sense for the main access function to just return the right value.

The very few users who do care (only BitcodeReader so far) can find out
how it's specified by accessing the Attribute directly.

llvm-svn: 362642
---
 llvm/include/llvm/IR/Argument.h                       |  2 ++
 llvm/include/llvm/IR/Function.h                       | 10 ++++++++--
 llvm/include/llvm/IR/InstrTypes.h                     |  5 +++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp             |  3 ++-
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp           |  2 +-
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp |  3 +--
 llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp      |  4 +++-
 llvm/lib/IR/Function.cpp                              |  4 ++++
 8 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index 952fbcdffb142..5f514b9c47d29 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -124,6 +124,8 @@ class Argument final : public Value {
   /// Check if an argument has a given attribute.
   bool hasAttribute(Attribute::AttrKind Kind) const;
 
+  Attribute getAttribute(Attribute::AttrKind Kind) const;
+
   /// Method for support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() == ArgumentVal;
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 896c2189eb824..b93541cbb16b7 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -401,6 +401,11 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return getAttributes().hasParamAttribute(ArgNo, Kind);
   }
 
+  /// gets the specified attribute from the list of attributes.
+  Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return getAttributes().getParamAttr(ArgNo, Kind);
+  }
+
   /// gets the attribute from the list of attributes.
   Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
     return AttributeSets.getAttribute(i, Kind);
@@ -431,9 +436,10 @@ class Function : public GlobalObject, public ilist_node<Function> {
     return AttributeSets.getParamAlignment(ArgNo);
   }
 
-  /// Extract the byval type for a parameter (nullptr=unknown).
+  /// Extract the byval type for a parameter.
   Type *getParamByValType(unsigned ArgNo) const {
-    return AttributeSets.getParamByValType(ArgNo);
+    Type *Ty = AttributeSets.getParamByValType(ArgNo);
+    return Ty ? Ty : (arg_begin() + ArgNo)->getType()->getPointerElementType();
   }
 
   /// Extract the number of dereferenceable bytes for a call or
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 6ce76811c0e66..237929f5e609d 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1560,9 +1560,10 @@ class CallBase : public Instruction {
     return Attrs.getParamAlignment(ArgNo);
   }
 
-  /// Extract the byval type for a call or parameter (nullptr=unknown).
+  /// Extract the byval type for a call or parameter.
   Type *getParamByValType(unsigned ArgNo) const {
-    return Attrs.getParamByValType(ArgNo);
+    Type *Ty = Attrs.getParamByValType(ArgNo);
+    return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
   }
 
   /// Extract the number of dereferenceable bytes for a call or
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9f562ba82db93..c33fc568abe85 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3049,7 +3049,8 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   // pointee type. There should be no opaque pointers where the byval type is
   // implicit.
   for (auto &Arg : Func->args()) {
-    if (Arg.hasByValAttr() && !Arg.getParamByValType()) {
+    if (Arg.hasByValAttr() &&
+        !Arg.getAttribute(Attribute::ByVal).getValueAsType()) {
       Arg.removeAttr(Attribute::ByVal);
       Arg.addAttr(Attribute::getWithByValType(
           Context, Arg.getType()->getPointerElementType()));
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 143570fb20a8c..f59c906c7b757 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -951,7 +951,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   // Adding function arguments to the value table.
   for (const auto &I : F.args()) {
     EnumerateValue(&I);
-    if (I.hasAttribute(Attribute::ByVal) && I.getParamByValType())
+    if (I.hasAttribute(Attribute::ByVal))
       EnumerateType(I.getParamByValType());
   }
   FirstFuncConstantID = Values.size();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4f7257d4a151d..07d6ac83e03b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9584,8 +9584,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // For ByVal, size and alignment should be passed from FE.  BE will
         // guess if this info is not there but there are cases it cannot get
         // right.
-        unsigned FrameSize = DL.getTypeAllocSize(
-            Arg.getParamByValType() ? Arg.getParamByValType() : ElementTy);
+        unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
         Flags.setByValSize(FrameSize);
 
         unsigned FrameAlign;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6e6917b39b6d5..c2123dbfdd95f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -112,7 +112,9 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlignment(ArgIdx);
-  ByValType = Call->getParamByValType(ArgIdx);
+  ByValType = nullptr;
+  if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+    ByValType = Call->getParamByValType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index a4a78ca4deb96..c88fd1a82cd3b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -194,6 +194,10 @@ bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
   return getParent()->hasParamAttribute(getArgNo(), Kind);
 }
 
+Attribute Argument::getAttribute(Attribute::AttrKind Kind) const {
+  return getParent()->getParamAttribute(getArgNo(), Kind);
+}
+
 //===----------------------------------------------------------------------===//
 // Helper Methods in Function
 //===----------------------------------------------------------------------===//

From 8d7f118ab2b9e51d6cf2811291e319b4d977eb8c Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 5 Jun 2019 20:38:17 +0000
Subject: [PATCH 1155/1176] InstCombine: correctly change byval type attribute
 alongside call args.

When the byval attribute has a type, it must match the pointee type of
any parameter; but InstCombine was not updating the attribute when
folding casts of various kinds away.

llvm-svn: 362643
---
 .../InstCombine/InstCombineCalls.cpp          | 24 +++++++++++++++----
 llvm/test/Transforms/InstCombine/byval.ll     | 24 +++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/byval.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 39aae2f2e140b..2c8fa20b25934 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4015,7 +4015,9 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
 
   Type* SrcTy =
             cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
-  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  Type *DstTy = Call.isByValArgument(ix)
+                    ? Call.getParamByValType(ix)
+                    : cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
     return false;
   if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
@@ -4223,6 +4225,15 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
       CastInst *CI = dyn_cast<CastInst>(*I);
       if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
         *I = CI->getOperand(0);
+
+        // Update the byval type to match the argument type.
+        if (Call.isByValArgument(ix)) {
+          Call.removeParamAttr(ix, Attribute::ByVal);
+          Call.addParamAttr(
+              ix, Attribute::getWithByValType(
+                      Call.getContext(),
+                      CI->getOperand(0)->getType()->getPointerElementType()));
+        }
         Changed = true;
       }
     }
@@ -4353,7 +4364,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
       if (!ParamPTy || !ParamPTy->getElementType()->isSized())
         return false;
 
-      Type *CurElTy = ActTy->getPointerElementType();
+      Type *CurElTy = Call.getParamByValType(i);
       if (DL.getTypeAllocSize(CurElTy) !=
           DL.getTypeAllocSize(ParamPTy->getElementType()))
         return false;
@@ -4407,6 +4418,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
   // with the existing attributes.  Wipe out any problematic attributes.
   RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
 
+  LLVMContext &Ctx = Call.getContext();
   AI = Call.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
     Type *ParamTy = FT->getParamType(i);
@@ -4417,7 +4429,12 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
     Args.push_back(NewArg);
 
     // Add any parameter attributes.
-    ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+    if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+      AttrBuilder AB(CallerPAL.getParamAttributes(i));
+      AB.addByValAttr(NewArg->getType()->getPointerElementType());
+      ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
+    } else
+      ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
   }
 
   // If the function takes more arguments than the call was taking, add them
@@ -4456,7 +4473,6 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
 
   assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
          "missing argument attributes");
-  LLVMContext &Ctx = Callee->getContext();
   AttributeList NewCallerPAL = AttributeList::get(
       Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
 
diff --git a/llvm/test/Transforms/InstCombine/byval.ll b/llvm/test/Transforms/InstCombine/byval.ll
new file mode 100644
index 0000000000000..2af32b8b62c58
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/byval.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+declare void @add_byval_callee(double*)
+
+; CHECK-LABEL: define void @add_byval
+; CHECK: [[ARG:%.*]] = bitcast i64* %in to double*
+; CHECK: call void @add_byval_callee(double* byval(double) [[ARG]])
+define void @add_byval(i64* %in) {
+  %tmp = bitcast void (double*)* @add_byval_callee to void (i64*)*
+  call void %tmp(i64* byval(i64) %in)
+  ret void
+}
+
+%t2 = type { i8 }
+
+; CHECK-LABEL: define void @vararg_byval
+; CHECK: call void (i8, ...) @vararg_callee(i8 undef, i8* byval(i8) %p)
+define void @vararg_byval(i8* %p) {
+  %tmp = bitcast i8* %p to %t2*
+  call void (i8, ...) @vararg_callee(i8 undef, %t2* byval(%t2) %tmp)
+  ret void
+}
+
+declare void @vararg_callee(i8, ...)

From 2d0896c1cb90243f12df698e84458016c0c121dc Mon Sep 17 00:00:00 2001
From: Whitney Tsang <whitney.uwaterloo@gmail.com>
Date: Wed, 5 Jun 2019 20:42:47 +0000
Subject: [PATCH 1156/1176] [LOOPINFO] Extend Loop object to add utilities to
 get the loop bounds, step, and loop induction variable.

Summary: This PR extends the loop object with more utilities to get loop
bounds, step, and loop induction variable. There already exists passes
which try to obtain the loop induction variable in their own pass, e.g.
loop interchange. It would be useful to have a common area to get these
information.

/// Example:
/// for (int i = lb; i < ub; i+=step)
///   <loop body>
/// --- pseudo LLVMIR ---
/// beforeloop:
///   guardcmp = (lb < ub)
///   if (guardcmp) goto preheader; else goto afterloop
/// preheader:
/// loop:
///   i1 = phi[{lb, preheader}, {i2, latch}]
///   <loop body>
///   i2 = i1 + step
/// latch:
///   cmp = (i2 < ub)
///   if (cmp) goto loop
/// exit:
/// afterloop:
///
/// getBounds
///   getInitialIVValue      --> lb
///   getStepInst            --> i2 = i1 + step
///   getStepValue           --> step
///   getFinalIVValue        --> ub
///   getCanonicalPredicate  --> '<'
///   getDirection           --> Increasing
/// getInductionVariable          --> i1
/// getAuxiliaryInductionVariable --> {i1}
/// isCanonical                   --> false

Reviewers: kbarton, hfinkel, dmgreen, Meinersbur, jdoerfert, syzaara,
fhahn
Reviewed By: kbarton
Subscribers: tvvikram, bmahjour, etiotto, fhahn, jsji, hiraditya,
llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D60565

llvm-svn: 362644
---
 llvm/include/llvm/Analysis/LoopInfo.h    | 161 ++++
 llvm/lib/Analysis/LoopInfo.cpp           | 214 ++++++
 llvm/unittests/Analysis/LoopInfoTest.cpp | 900 +++++++++++++++++++++++
 3 files changed, 1275 insertions(+)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 6b964cdf9eae5..a4c8f648685c8 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -54,9 +54,11 @@ namespace llvm {
 class DominatorTree;
 class LoopInfo;
 class Loop;
+class InductionDescriptor;
 class MDNode;
 class MemorySSAUpdater;
 class PHINode;
+class ScalarEvolution;
 class raw_ostream;
 template <class N, bool IsPostDom> class DominatorTreeBase;
 template <class N, class M> class LoopInfoBase;
@@ -529,6 +531,165 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool getIncomingAndBackEdge(BasicBlock *&Incoming,
                               BasicBlock *&Backedge) const;
 
+  /// Below are some utilities to get loop bounds and induction variable, and
+  /// check if a given phinode is an auxiliary induction variable, as well as
+  /// checking if the loop is canonical.
+  ///
+  /// Here is an example:
+  /// \code
+  /// for (int i = lb; i < ub; i+=step)
+  ///   <loop body>
+  /// --- pseudo LLVMIR ---
+  /// beforeloop:
+  ///   guardcmp = (lb < ub)
+  ///   if (guardcmp) goto preheader; else goto afterloop
+  /// preheader:
+  /// loop:
+  ///   i_1 = phi[{lb, preheader}, {i_2, latch}]
+  ///   <loop body>
+  ///   i_2 = i_1 + step
+  /// latch:
+  ///   cmp = (i_2 < ub)
+  ///   if (cmp) goto loop
+  /// exit:
+  /// afterloop:
+  /// \endcode
+  ///
+  /// - getBounds
+  ///   - getInitialIVValue      --> lb
+  ///   - getStepInst            --> i_2 = i_1 + step
+  ///   - getStepValue           --> step
+  ///   - getFinalIVValue        --> ub
+  ///   - getCanonicalPredicate  --> '<'
+  ///   - getDirection           --> Increasing
+  ///
+  /// - getInductionVariable            --> i_1
+  /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+  /// - isCanonical                     --> false
+  struct LoopBounds {
+    /// Return the LoopBounds object if
+    /// - the given \p IndVar is an induction variable
+    /// - the initial value of the induction variable can be found
+    /// - the step instruction of the induction variable can be found
+    /// - the final value of the induction variable can be found
+    ///
+    /// Else None.
+    static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+                                                ScalarEvolution &SE);
+
+    /// Get the initial value of the loop induction variable.
+    Value &getInitialIVValue() const { return InitialIVValue; }
+
+    /// Get the instruction that updates the loop induction variable.
+    Instruction &getStepInst() const { return StepInst; }
+
+    /// Get the step that the loop induction variable gets updated by in each
+    /// loop iteration. Return nullptr if not found.
+    Value *getStepValue() const { return StepValue; }
+
+    /// Get the final value of the loop induction variable.
+    Value &getFinalIVValue() const { return FinalIVValue; }
+
+    /// Return the canonical predicate for the latch compare instruction, if
+    /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+    ///
+    /// A predicate is considered as canonical if requirements below are all
+    /// satisfied:
+    /// 1. The first successor of the latch branch is the loop header
+    ///    If not, inverse the predicate.
+    /// 2. One of the operands of the latch comparison is StepInst
+    ///    If not, and
+    ///    - if the current calcuated predicate is not ne or eq, flip the
+    ///      predicate.
+    ///    - else if the loop is increasing, return slt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///    - else if the loop is decreasing, return sgt
+    ///      (notice that it is safe to change from ne or eq to sign compare)
+    ///
+    /// Here is an example when both (1) and (2) are not satisfied:
+    /// \code
+    /// loop.header:
+    ///  %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+    ///  %inc = add %iv, %step
+    ///  %cmp = slt %iv, %finaliv
+    ///  br %cmp, %loop.exit, %loop.header
+    /// loop.exit:
+    /// \endcode
+    /// - The second successor of the latch branch is the loop header instead
+    ///   of the first successor (slt -> sge)
+    /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+    ///   instead of the StepInst (%inc) (sge -> sgt)
+    ///
+    /// The predicate would be sgt if both (1) and (2) are satisfied.
+    /// getCanonicalPredicate() returns sgt for this example.
+    /// Note: The IR is not changed.
+    ICmpInst::Predicate getCanonicalPredicate() const;
+
+    /// An enum for the direction of the loop
+    /// - for (int i = 0; i < ub; ++i)  --> Increasing
+    /// - for (int i = ub; i > 0; --i)  --> Descresing
+    /// - for (int i = x; i != y; i+=z) --> Unknown
+    enum class Direction { Increasing, Decreasing, Unknown };
+
+    /// Get the direction of the loop.
+    Direction getDirection() const;
+
+  private:
+    LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+               ScalarEvolution &SE)
+        : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+          FinalIVValue(F), SE(SE) {}
+
+    const Loop &L;
+
+    // The initial value of the loop induction variable
+    Value &InitialIVValue;
+
+    // The instruction that updates the loop induction variable
+    Instruction &StepInst;
+
+    // The value that the loop induction variable gets updated by in each loop
+    // iteration
+    Value *StepValue;
+
+    // The final value of the loop induction variable
+    Value &FinalIVValue;
+
+    ScalarEvolution &SE;
+  };
+
+  /// Return the struct LoopBounds collected if all struct members are found,
+  /// else None.
+  Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+  /// Return the loop induction variable if found, else return nullptr.
+  /// An instruction is considered as the loop induction variable if
+  /// - it is an induction variable of the loop; and
+  /// - it is used to determine the condition of the branch in the loop latch
+  ///
+  /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+  /// zero and increments by one each time through the loop (but it can be).
+  PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+  /// Get the loop induction descriptor for the loop induction variable. Return
+  /// true if the loop induction variable is found.
+  bool getInductionDescriptor(ScalarEvolution &SE,
+                              InductionDescriptor &IndDesc) const;
+
+  /// Return true if the given PHINode \p AuxIndVar is
+  /// - in the loop header
+  /// - not used outside of the loop
+  /// - incremented by a loop invariant step for each loop iteration
+  /// - step instruction opcode should be add or sub
+  /// Note: auxiliary induction variable is not required to be used in the
+  ///       conditional branch in the loop latch. (but it can be)
+  bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                    ScalarEvolution &SE) const;
+
+  /// Return true if the loop induction variable starts at zero and increments
+  /// by one each time through the loop.
+  bool isCanonical(ScalarEvolution &SE) const;
+
   /// Return true if the Loop is in LCSSA form.
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index aa933d98f249b..00dbe30c2b3db 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,10 +17,12 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/CFG.h"
@@ -164,6 +166,218 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+  if (BasicBlock *Latch = L.getLoopLatch())
+    if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+      if (BI->isConditional())
+        return dyn_cast<ICmpInst>(BI->getCondition());
+
+  return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+                               const Instruction &StepInst) {
+  ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+  if (!LatchCmpInst)
+    return nullptr;
+
+  Value *Op0 = LatchCmpInst->getOperand(0);
+  Value *Op1 = LatchCmpInst->getOperand(1);
+  if (Op0 == &IndVar || Op0 == &StepInst)
+    return Op1;
+
+  if (Op1 == &IndVar || Op1 == &StepInst)
+    return Op0;
+
+  return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+                                                       PHINode &IndVar,
+                                                       ScalarEvolution &SE) {
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+    return None;
+
+  Value *InitialIVValue = IndDesc.getStartValue();
+  Instruction *StepInst = IndDesc.getInductionBinOp();
+  if (!InitialIVValue || !StepInst)
+    return None;
+
+  const SCEV *Step = IndDesc.getStep();
+  Value *StepInstOp1 = StepInst->getOperand(1);
+  Value *StepInstOp0 = StepInst->getOperand(0);
+  Value *StepValue = nullptr;
+  if (SE.getSCEV(StepInstOp1) == Step)
+    StepValue = StepInstOp1;
+  else if (SE.getSCEV(StepInstOp0) == Step)
+    StepValue = StepInstOp0;
+
+  Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+  if (!FinalIVValue)
+    return None;
+
+  return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+                    SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+  BasicBlock *Latch = L.getLoopLatch();
+  assert(Latch && "Expecting valid latch");
+
+  BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+  assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+  ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+  assert(LatchCmpInst &&
+         "Expecting the latch compare instruction to be a CmpInst");
+
+  // Need to inverse the predicate when first successor is not the loop
+  // header
+  ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+                                 ? LatchCmpInst->getPredicate()
+                                 : LatchCmpInst->getInversePredicate();
+
+  if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+
+  // Need to flip strictness of the predicate when the latch compare instruction
+  // is not using StepInst
+  if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+      LatchCmpInst->getOperand(1) == &getStepInst())
+    return Pred;
+
+  // Cannot flip strictness of NE and EQ
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+  Direction D = getDirection();
+  if (D == Direction::Increasing)
+    return ICmpInst::ICMP_SLT;
+
+  if (D == Direction::Decreasing)
+    return ICmpInst::ICMP_SGT;
+
+  // If cannot determine the direction, then unable to find the canonical
+  // predicate
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+  if (const SCEVAddRecExpr *StepAddRecExpr =
+          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+    if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+      if (SE.isKnownPositive(StepRecur))
+        return Direction::Increasing;
+      if (SE.isKnownNegative(StepRecur))
+        return Direction::Decreasing;
+    }
+
+  return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return LoopBounds::getBounds(*this, *IndVar, SE);
+
+  return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+  if (!isLoopSimplifyForm())
+    return nullptr;
+
+  BasicBlock *Header = getHeader();
+  assert(Header && "Expected a valid loop header");
+  ICmpInst *CmpInst = getLatchCmpInst(*this);
+  if (!CmpInst)
+    return nullptr;
+
+  Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+  Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+
+  for (PHINode &IndVar : Header->phis()) {
+    InductionDescriptor IndDesc;
+    if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
+      continue;
+
+    Instruction *StepInst = IndDesc.getInductionBinOp();
+
+    // case 1:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = StepInst < FinalValue
+    if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
+      return &IndVar;
+
+    // case 2:
+    // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+    // StepInst = IndVar + step
+    // cmp = IndVar < FinalValue
+    if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
+      return &IndVar;
+  }
+
+  return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+                                  InductionDescriptor &IndDesc) const {
+  if (PHINode *IndVar = getInductionVariable(SE))
+    return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+  return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+                                        ScalarEvolution &SE) const {
+  // Located in the loop header
+  BasicBlock *Header = getHeader();
+  if (AuxIndVar.getParent() != Header)
+    return false;
+
+  // No uses outside of the loop
+  for (User *U : AuxIndVar.users())
+    if (const Instruction *I = dyn_cast<Instruction>(U))
+      if (!contains(I))
+        return false;
+
+  InductionDescriptor IndDesc;
+  if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+    return false;
+
+  // The step instruction opcode should be add or sub.
+  if (IndDesc.getInductionOpcode() != Instruction::Add &&
+      IndDesc.getInductionOpcode() != Instruction::Sub)
+    return false;
+
+  // Incremented by a loop invariant step for each loop iteration
+  return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+  InductionDescriptor IndDesc;
+  if (!getInductionDescriptor(SE, IndDesc))
+    return false;
+
+  ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+  if (!Init || !Init->isZero())
+    return false;
+
+  if (IndDesc.getInductionOpcode() != Instruction::Add)
+    return false;
+
+  ConstantInt *Step = IndDesc.getConstIntStepValue();
+  if (!Step || !Step->isOne())
+    return false;
+
+  return true;
+}
+
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
                                DominatorTree &DT) {
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 483532a187527..005e1dc405b75 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -7,6 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/SourceMgr.h"
@@ -26,6 +30,26 @@ runWithLoopInfo(Module &M, StringRef FuncName,
   Test(*F, LI);
 }
 
+/// Build the loop info and scalar evolution for the function and run the Test.
+static void runWithLoopInfoPlus(
+    Module &M, StringRef FuncName,
+    function_ref<void(Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                      PostDominatorTree &PDT)>
+        Test) {
+  auto *F = M.getFunction(FuncName);
+  ASSERT_NE(F, nullptr) << "Could not find " << FuncName;
+
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI(TLII);
+  AssumptionCache AC(*F);
+  DominatorTree DT(*F);
+  LoopInfo LI(DT);
+  ScalarEvolution SE(*F, TLI, AC, DT, LI);
+
+  PostDominatorTree PDT(*F);
+  Test(*F, LI, SE, PDT);
+}
+
 static std::unique_ptr<Module> makeLLVMModule(LLVMContext &Context,
                                               const char *ModuleStr) {
   SMDiagnostic Err;
@@ -210,3 +234,879 @@ TEST(LoopInfoTest, PreorderTraversals) {
   EXPECT_EQ(&L_0_1, ReverseSiblingPreorder[6]);
   EXPECT_EQ(&L_0_0, ReverseSiblingPreorder[7]);
 }
+
+TEST(LoopInfoTest, CanonicalLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseGuardSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sge i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.end, label %for.preheader\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithSwappedGuardCmp) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp sgt i32 %ub, 0\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithInverseLatchSuccs) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp sge i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.exit, label %for.body\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithLatchCmpNE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopWithGuardCmpSLE) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %ubPlusOne = add i32 %ub, 1\n"
+      "  %guardcmp = icmp sle i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp ne i32 %i, %ubPlusOne\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ubPlusOne");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNonConstantStep) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(), Loop::LoopBounds::Direction::Unknown);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopUnsignedBounds) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp ult i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = zext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add i32 %i, 1\n"
+      "  %cmp = icmp ult i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_ULT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, DecreasingLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ %ub, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = sub nsw i32 %i, 1\n"
+      "  %cmp = icmp sgt i32 %inc, 0\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        EXPECT_EQ(Bounds->getInitialIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_EQ(StepValue, nullptr);
+        ConstantInt *FinalIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getFinalIVValue());
+        EXPECT_TRUE(FinalIVValue && FinalIVValue->isZero());
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SGT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Decreasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, CannotFindDirection) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i32 %step) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, %step\n"
+      "  %cmp = icmp ne i32 %i, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(*M, "foo",
+                      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+                          PostDominatorTree &PDT) {
+                        Function::iterator FI = F.begin();
+                        // First two basic block are entry and for.preheader
+                        // - skip them.
+                        ++FI;
+                        BasicBlock *Header = &*(++FI);
+                        assert(Header->getName() == "for.body");
+                        Loop *L = LI.getLoopFor(Header);
+                        EXPECT_NE(L, nullptr);
+
+                        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+                        EXPECT_NE(Bounds, None);
+                        ConstantInt *InitialIVValue =
+                            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+                        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+                        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+                        EXPECT_EQ(Bounds->getStepValue()->getName(), "step");
+                        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+                        EXPECT_EQ(Bounds->getCanonicalPredicate(),
+                                  ICmpInst::BAD_ICMP_PREDICATE);
+                        EXPECT_EQ(Bounds->getDirection(),
+                                  Loop::LoopBounds::Direction::Unknown);
+                        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+                      });
+}
+
+TEST(LoopInfoTest, ZextIndVar) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %for.body ]\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %wide.trip.count = zext i32 %ub to i64\n"
+      "  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count\n"
+      "  br i1 %exitcond, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "indvars.iv.next");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "wide.trip.count");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_NE);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "indvars.iv");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoop) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First basic block is entry - skip it.
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, UnguardedLoopWithControlFlow) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub, i1 %cond) {\n"
+      "entry:\n"
+      "  br i1 %cond, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, LoopNest) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.outer.preheader, label %for.end\n"
+      "for.outer.preheader:\n"
+      "  br label %for.outer\n"
+      "for.outer:\n"
+      "  %j = phi i32 [ 0, %for.outer.preheader ], [ %inc.outer, %for.outer.latch ]\n"
+      "  br i1 %guardcmp, label %for.inner.preheader, label %for.outer.latch\n"
+      "for.inner.preheader:\n"
+      "  br label %for.inner\n"
+      "for.inner:\n"
+      "  %i = phi i32 [ 0, %for.inner.preheader ], [ %inc, %for.inner ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.inner, label %for.inner.exit\n"
+      "for.inner.exit:\n"
+      "  br label %for.outer.latch\n"
+      "for.outer.latch:\n"
+      "  %inc.outer = add nsw i32 %j, 1\n"
+      "  %cmp.outer = icmp slt i32 %inc.outer, %ub\n"
+      "  br i1 %cmp.outer, label %for.outer, label %for.outer.exit\n"
+      "for.outer.exit:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.outer.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.outer");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc.outer");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "j");
+
+        // Next two basic blocks are for.outer and for.inner.preheader - skip
+        // them.
+        ++FI;
+        Header = &*(++FI);
+        assert(Header->getName() == "for.inner");
+        L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> InnerBounds = L->getBounds(SE);
+        EXPECT_NE(InnerBounds, None);
+        InitialIVValue =
+            dyn_cast<ConstantInt>(&InnerBounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(InnerBounds->getStepInst().getName(), "inc");
+        StepValue = dyn_cast_or_null<ConstantInt>(InnerBounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(InnerBounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(InnerBounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(InnerBounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+      });
+}
+
+TEST(LoopInfoTest, AuxiliaryIV) {
+  const char *ModuleStr =
+      "define void @foo(i32* %A, i32 %ub) {\n"
+      "entry:\n"
+      "  %guardcmp = icmp slt i32 0, %ub\n"
+      "  br i1 %guardcmp, label %for.preheader, label %for.end\n"
+      "for.preheader:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %i = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]\n"
+      "  %aux = phi i32 [ 0, %for.preheader ], [ %auxinc, %for.body ]\n"
+      "  %loopvariant = phi i32 [ 0, %for.preheader ], [ %loopvariantinc, %for.body ]\n"
+      "  %usedoutside = phi i32 [ 0, %for.preheader ], [ %usedoutsideinc, %for.body ]\n"
+      "  %mulopcode = phi i32 [ 0, %for.preheader ], [ %mulopcodeinc, %for.body ]\n"
+      "  %idxprom = sext i32 %i to i64\n"
+      "  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom\n"
+      "  store i32 %i, i32* %arrayidx, align 4\n"
+      "  %mulopcodeinc = mul nsw i32 %mulopcode, 5\n"
+      "  %usedoutsideinc = add nsw i32 %usedoutside, 5\n"
+      "  %loopvariantinc = add nsw i32 %loopvariant, %i\n"
+      "  %auxinc = add nsw i32 %aux, 5\n"
+      "  %inc = add nsw i32 %i, 1\n"
+      "  %cmp = icmp slt i32 %inc, %ub\n"
+      "  br i1 %cmp, label %for.body, label %for.exit\n"
+      "for.exit:\n"
+      "  %lcssa = phi i32 [ %usedoutside, %for.body ]\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfoPlus(
+      *M, "foo",
+      [&](Function &F, LoopInfo &LI, ScalarEvolution &SE,
+          PostDominatorTree &PDT) {
+        Function::iterator FI = F.begin();
+        // First two basic block are entry and for.preheader - skip them.
+        ++FI;
+        BasicBlock *Header = &*(++FI);
+        assert(Header->getName() == "for.body");
+        Loop *L = LI.getLoopFor(Header);
+        EXPECT_NE(L, nullptr);
+
+        Optional<Loop::LoopBounds> Bounds = L->getBounds(SE);
+        EXPECT_NE(Bounds, None);
+        ConstantInt *InitialIVValue =
+            dyn_cast<ConstantInt>(&Bounds->getInitialIVValue());
+        EXPECT_TRUE(InitialIVValue && InitialIVValue->isZero());
+        EXPECT_EQ(Bounds->getStepInst().getName(), "inc");
+        ConstantInt *StepValue =
+            dyn_cast_or_null<ConstantInt>(Bounds->getStepValue());
+        EXPECT_TRUE(StepValue && StepValue->isOne());
+        EXPECT_EQ(Bounds->getFinalIVValue().getName(), "ub");
+        EXPECT_EQ(Bounds->getCanonicalPredicate(), ICmpInst::ICMP_SLT);
+        EXPECT_EQ(Bounds->getDirection(),
+                  Loop::LoopBounds::Direction::Increasing);
+        EXPECT_EQ(L->getInductionVariable(SE)->getName(), "i");
+        BasicBlock::iterator II = Header->begin();
+        PHINode &Instruction_i = cast<PHINode>(*(II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_i, SE));
+        PHINode &Instruction_aux = cast<PHINode>(*(++II));
+        EXPECT_TRUE(L->isAuxiliaryInductionVariable(Instruction_aux, SE));
+        PHINode &Instruction_loopvariant = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_loopvariant, SE));
+        PHINode &Instruction_usedoutside = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_usedoutside, SE));
+        PHINode &Instruction_mulopcode = cast<PHINode>(*(++II));
+        EXPECT_FALSE(
+            L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
+      });
+}

From ba86f2a22e7a95020e924baf9203ccfd10df5162 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dan433584@gmail.com>
Date: Wed, 5 Jun 2019 20:59:20 +0000
Subject: [PATCH 1157/1176] [WebAssembly] Use Emscripten triples in PIC tests.

With r362638, llc doesn't support -relocation-model=pic with non-Emscripten
triples. Update these tests in lld which use -relocation-model=pic to also
use Emscripten triples.

llvm-svn: 362645
---
 lld/test/wasm/pie.ll    | 2 +-
 lld/test/wasm/shared.ll | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lld/test/wasm/pie.ll b/lld/test/wasm/pie.ll
index 90952faab5e20..e7aa243e27269 100644
--- a/lld/test/wasm/pie.ll
+++ b/lld/test/wasm/pie.ll
@@ -2,7 +2,7 @@
 ; RUN: wasm-ld --no-gc-sections --allow-undefined -pie -o %t.wasm %t.o
 ; RUN: obj2yaml %t.wasm | FileCheck %s
 
-target triple = "wasm32-unknown-unknown"
+target triple = "wasm32-unknown-emscripten"
 
 @data = global i32 2, align 4
 @data_external = external global i32
diff --git a/lld/test/wasm/shared.ll b/lld/test/wasm/shared.ll
index 9fbbfc099e5e0..89fae3342ac2a 100644
--- a/lld/test/wasm/shared.ll
+++ b/lld/test/wasm/shared.ll
@@ -2,7 +2,7 @@
 ; RUN: wasm-ld -shared -o %t.wasm %t.o
 ; RUN: obj2yaml %t.wasm | FileCheck %s
 
-target triple = "wasm32-unknown-unknown"
+target triple = "wasm32-unknown-emscripten"
 
 @data = hidden global i32 2, align 4
 @data_external = external global i32

From a3701caad826a6da3915a1b485cbe0a333e6a484 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Wed, 5 Jun 2019 20:59:48 +0000
Subject: [PATCH 1158/1176] [clang-format][NFC] Fix BS_Allman style example in
 the header docs are generated from

Differential Revision: https://reviews.llvm.org/D61729

llvm-svn: 362646
---
 clang/docs/ClangFormatStyleOptions.rst | 21 +++++++++++++++------
 clang/include/clang/Format/Format.h    | 21 +++++++++++++++------
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 342ecc7914ad6..8bd0e9c8f7e05 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -1059,19 +1059,28 @@ the configuration (without a prefix: ``Auto``).
 
     .. code-block:: c++
 
-      try {
+      try
+      {
         foo();
       }
-      catch () {
+      catch ()
+      {
       }
       void foo() { bar(); }
-      class foo {
+      class foo
+      {
       };
-      if (foo()) {
+      if (foo())
+      {
       }
-      else {
+      else
+      {
       }
-      enum X : int { A, B };
+      enum X : int
+      {
+        A,
+        B
+      };
 
   * ``BS_GNU`` (in configuration: ``GNU``)
     Always break before braces and add an extra level of indentation to
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index af7c351d79bb9..0aae75276fa3f 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -642,19 +642,28 @@ struct FormatStyle {
     BS_Stroustrup,
     /// Always break before braces.
     /// \code
-    ///   try {
+    ///   try
+    ///   {
     ///     foo();
     ///   }
-    ///   catch () {
+    ///   catch ()
+    ///   {
     ///   }
     ///   void foo() { bar(); }
-    ///   class foo {
+    ///   class foo
+    ///   {
     ///   };
-    ///   if (foo()) {
+    ///   if (foo())
+    ///   {
     ///   }
-    ///   else {
+    ///   else
+    ///   {
     ///   }
-    ///   enum X : int { A, B };
+    ///   enum X : int
+    ///   {
+    ///     A,
+    ///     B
+    ///   };
     /// \endcode
     BS_Allman,
     /// Always break before braces and add an extra level of indentation to

From 3975b15dbabd9ee595bcbada2088dd1395f52e0f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 5 Jun 2019 21:00:31 +0000
Subject: [PATCH 1159/1176] [X86] Fix mistake that marked
 VADDSSrrb_Int/VADDSDrrb_Int/VMULSSrrb_Int/VMULSDrrb_Int as commutable.

One of the sources controls the pass through value for the upper bits
of the result so we can't really commute it.

In practice this problem isn't a functional issue because we would
only try to commute this instruction in order to fold a load. But
we can't do embedded rounding and fold a load at the same time. So
the load fold would never succeed so I don't think we would ever
commute or at least keep the version after commuting.

llvm-svn: 362647
---
 llvm/lib/Target/X86/X86InstrAVX512.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b230cbf611b2e..4302b3e1ed559 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -5408,7 +5408,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                           "$rc, $src2, $src1", "$src1, $src2, $rc",
                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
-                          (i32 timm:$rc)), IsCommutable>,
+                          (i32 timm:$rc))>,
                           EVEX_B, EVEX_RC, Sched<[sched]>;
 }
 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,

From 7ce7110e6d964778141c0866488e154b1ce73d69 Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Wed, 5 Jun 2019 21:04:26 +0000
Subject: [PATCH 1160/1176] Speedup to_string and to_wstring for integers using
 stack buffer and SSO. Reviewed as https://reviews.llvm.org/D59178  Thanks to
 ivafanas for all his work on this patch.

llvm-svn: 362649
---
 libcxx/src/string.cpp | 140 +++++++++++-------------------------------
 1 file changed, 37 insertions(+), 103 deletions(-)

diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp
index 6c89a5d7e068f..4802d63c811ba 100644
--- a/libcxx/src/string.cpp
+++ b/libcxx/src/string.cpp
@@ -7,12 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "string"
+#include "charconv"
 #include "cstdlib"
 #include "cwchar"
 #include "cerrno"
 #include "limits"
 #include "stdexcept"
 #include <stdio.h>
+#include "__debug"
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
@@ -171,7 +173,7 @@ as_integer( const string& func, const wstring& s, size_t* idx, int base )
 
 // as_float
 
-template<typename V, typename S, typename F> 
+template<typename V, typename S, typename F>
 inline
 V
 as_float_helper(const string& func, const S& str, size_t* idx, F f )
@@ -375,11 +377,11 @@ as_string(P sprintf_like, S s, const typename S::value_type* fmt, V a)
     return s;
 }
 
-template <class S, class V, bool = is_floating_point<V>::value>
+template <class S>
 struct initial_string;
 
-template <class V, bool b>
-struct initial_string<string, V, b>
+template <>
+struct initial_string<string>
 {
     string
     operator()() const
@@ -390,23 +392,8 @@ struct initial_string<string, V, b>
     }
 };
 
-template <class V>
-struct initial_string<wstring, V, false>
-{
-    wstring
-    operator()() const
-    {
-        const size_t n = (numeric_limits<unsigned long long>::digits / 3)
-          + ((numeric_limits<unsigned long long>::digits % 3) != 0)
-          + 1;
-        wstring s(n, wchar_t());
-        s.resize(s.capacity());
-        return s;
-    }
-};
-
-template <class V>
-struct initial_string<wstring, V, true>
+template <>
+struct initial_string<wstring>
 {
     wstring
     operator()() const
@@ -430,95 +417,42 @@ get_swprintf()
 #endif
 }
 
-}  // unnamed namespace
-
-string to_string(int val)
-{
-    return as_string(snprintf, initial_string<string, int>()(), "%d", val);
-}
-
-string to_string(unsigned val)
+template <typename S, typename V>
+S i_to_string(const V v)
 {
-    return as_string(snprintf, initial_string<string, unsigned>()(), "%u", val);
+//  numeric_limits::digits10 returns value less on 1 than desired for unsigned numbers.
+//  For example, for 1-byte unsigned value digits10 is 2 (999 can not be represented),
+//  so we need +1 here.
+    constexpr size_t bufsize = numeric_limits<V>::digits10 + 2;  // +1 for minus, +1 for digits10
+    char buf[bufsize];
+    const auto res = to_chars(buf, buf + bufsize, v);
+    _LIBCPP_ASSERT(res.ec == errc(), "bufsize must be large enough to accomodate the value");
+    return S(buf, res.ptr);
 }
 
-string to_string(long val)
-{
-    return as_string(snprintf, initial_string<string, long>()(), "%ld", val);
-}
-
-string to_string(unsigned long val)
-{
-    return as_string(snprintf, initial_string<string, unsigned long>()(), "%lu", val);
-}
-
-string to_string(long long val)
-{
-    return as_string(snprintf, initial_string<string, long long>()(), "%lld", val);
-}
-
-string to_string(unsigned long long val)
-{
-    return as_string(snprintf, initial_string<string, unsigned long long>()(), "%llu", val);
-}
-
-string to_string(float val)
-{
-    return as_string(snprintf, initial_string<string, float>()(), "%f", val);
-}
-
-string to_string(double val)
-{
-    return as_string(snprintf, initial_string<string, double>()(), "%f", val);
-}
-
-string to_string(long double val)
-{
-    return as_string(snprintf, initial_string<string, long double>()(), "%Lf", val);
-}
-
-wstring to_wstring(int val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, int>()(), L"%d", val);
-}
-
-wstring to_wstring(unsigned val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, unsigned>()(), L"%u", val);
-}
-
-wstring to_wstring(long val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, long>()(), L"%ld", val);
-}
+}  // unnamed namespace
 
-wstring to_wstring(unsigned long val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, unsigned long>()(), L"%lu", val);
-}
+string  to_string (int val)                { return i_to_string< string>(val); }
+string  to_string (long val)               { return i_to_string< string>(val); }
+string  to_string (long long val)          { return i_to_string< string>(val); }
+string  to_string (unsigned val)           { return i_to_string< string>(val); }
+string  to_string (unsigned long val)      { return i_to_string< string>(val); }
+string  to_string (unsigned long long val) { return i_to_string< string>(val); }
 
-wstring to_wstring(long long val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, long long>()(), L"%lld", val);
-}
+wstring to_wstring(int val)                { return i_to_string<wstring>(val); }
+wstring to_wstring(long val)               { return i_to_string<wstring>(val); }
+wstring to_wstring(long long val)          { return i_to_string<wstring>(val); }
+wstring to_wstring(unsigned val)           { return i_to_string<wstring>(val); }
+wstring to_wstring(unsigned long val)      { return i_to_string<wstring>(val); }
+wstring to_wstring(unsigned long long val) { return i_to_string<wstring>(val); }
 
-wstring to_wstring(unsigned long long val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, unsigned long long>()(), L"%llu", val);
-}
 
-wstring to_wstring(float val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, float>()(), L"%f", val);
-}
+string  to_string (float val)       { return as_string(snprintf,       initial_string< string>()(),   "%f", val); }
+string  to_string (double val)      { return as_string(snprintf,       initial_string< string>()(),   "%f", val); }
+string  to_string (long double val) { return as_string(snprintf,       initial_string< string>()(),  "%Lf", val); }
 
-wstring to_wstring(double val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, double>()(), L"%f", val);
-}
+wstring to_wstring(float val)       { return as_string(get_swprintf(), initial_string<wstring>()(),  L"%f", val); }
+wstring to_wstring(double val)      { return as_string(get_swprintf(), initial_string<wstring>()(),  L"%f", val); }
+wstring to_wstring(long double val) { return as_string(get_swprintf(), initial_string<wstring>()(), L"%Lf", val); }
 
-wstring to_wstring(long double val)
-{
-    return as_string(get_swprintf(), initial_string<wstring, long double>()(), L"%Lf", val);
-}
 _LIBCPP_END_NAMESPACE_STD

From 7c663cde14ed973dd3c23327b1f5b364c8a3f0a4 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 5 Jun 2019 21:08:30 +0000
Subject: [PATCH 1161/1176] [WebAssembly] Improve lto/comdat.ll test. NFC.

We were not previously testing the comdat exclusion in bitcode objects
because we were linking two copies of the .bc file and the
`linkonce_odr` linkage type was removing the duplicate `_start` at
the LTO stage.

Now we link an bitcode and non-bitcode version both of which contains a
copy of _start.  We link them in both orders, which means this test will
fail if comdat exclusion is not working correctly in bitcode parsing.

Differential Revision: https://reviews.llvm.org/D62923

llvm-svn: 362650
---
 lld/test/wasm/lto/comdat.ll | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lld/test/wasm/lto/comdat.ll b/lld/test/wasm/lto/comdat.ll
index 446469c337d52..54c9f0f3b47b6 100644
--- a/lld/test/wasm/lto/comdat.ll
+++ b/lld/test/wasm/lto/comdat.ll
@@ -1,15 +1,17 @@
 ; Verify that comdat symbols can be defined in LTO objects.  We had a
 ; regression where the comdat handling code was causing symbol in the lto object
 ; to be ignored.
-; RUN: llvm-as %s -o %t.o
-; RUN: wasm-ld %t.o %t.o -o %t.wasm
+; RUN: llvm-as %s -o %t.bc
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld %t.bc %t.o -o %t.wasm
+; RUN: wasm-ld %t.o %t.bc -o %t.wasm
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
 $foo = comdat any
 
-define linkonce_odr void @_start() comdat($foo) {
+define void @_start() comdat($foo) {
 entry:
   ret void
 }

From 9423f5ef56d23c099987ceec2a140fe9efa47934 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Wed, 5 Jun 2019 21:11:06 +0000
Subject: [PATCH 1162/1176] Fix FileCheck prefixes in test case.

llvm-svn: 362651
---
 clang/test/CodeGenObjC/local-static-block.m | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenObjC/local-static-block.m b/clang/test/CodeGenObjC/local-static-block.m
index 67ede63fc0a29..4d52029c18ea6 100644
--- a/clang/test/CodeGenObjC/local-static-block.m
+++ b/clang/test/CodeGenObjC/local-static-block.m
@@ -1,7 +1,11 @@
-// RUN: %clang_cc1 -fblocks -triple x86_64-apple-darwin -fobjc-runtime=macosx-fragile-10.5 -emit-llvm %s -o %t-64.ll
-// RUN: FileCheck -check-prefix CHECK-LP64 --input-file=%t-64.ll %s
+// RUN: %clang_cc1 -fblocks -triple x86_64-apple-darwin -fobjc-runtime=macosx-fragile-10.5 -emit-llvm -o - %s | FileCheck %s
 // rdar: // 8390455
 
+// CHECK: @ArrayRecurs = internal global
+// CHECK: @FUNC.ArrayRecurs = internal global
+// CHECK: @FUNC.ArrayRecurs.1 = internal global
+// CHECK: @FUNC1.ArrayRecurs = internal global
+
 @class NSArray;
 
 static  NSArray *(^ArrayRecurs)(NSArray *addresses, unsigned long level) = ^(NSArray *addresses, unsigned long level) {
@@ -53,7 +57,7 @@ static void (^const block1)(int) = ^(int a){
   };
 }
 
-// CHECK-LABEL-LP64: define void @FUNC2(
+// CHECK-LABEL: define void @FUNC2(
 // CHECK: define internal void @_block_invoke{{.*}}(
 // CHECK: call void %{{.*}}(i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global{{.*}} to i8*), i32 %{{.*}})
 
@@ -70,7 +74,3 @@ void FUNC1()
  };
  ArrayRecurs(address, level);
 }
-// CHECK-LP64: @ArrayRecurs = internal global
-// CHECK-LP64: @FUNC.ArrayRecurs = internal global
-// CHECK-LP64: @FUNC.ArrayRecurs.1 = internal global
-// CHECK-LP64: @FUNC1.ArrayRecurs = internal global

From c46827c7eda30cd0bc3e70d08670c8dc39fe6166 Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 5 Jun 2019 21:12:14 +0000
Subject: [PATCH 1163/1176] LLVM IR: Generate new-style byval-with-Type from
 Clang

LLVM IR recently added a Type parameter to the byval Attribute, so that
when pointers become opaque and no longer have an element type the
information will still be present in IR.

For now the Type parameter is optional (which is why Clang didn't need
this change at the time), but it will become mandatory soon.

llvm-svn: 362652
---
 clang/lib/CodeGen/CGBuiltin.cpp               |  2 +-
 clang/lib/CodeGen/CGCall.cpp                  |  2 +-
 clang/test/CodeGen/aapcs-align.cpp            |  8 +-
 clang/test/CodeGen/arm-aapcs-vfp.c            |  6 +-
 clang/test/CodeGen/arm-arguments.c            |  8 +-
 clang/test/CodeGen/arm-byval-align.c          |  2 +-
 clang/test/CodeGen/blocks.c                   |  2 +-
 clang/test/CodeGen/complex-builtins.c         | 76 +++++++++---------
 clang/test/CodeGen/complex-libcalls.c         | 80 +++++++++----------
 clang/test/CodeGen/le32-arguments.c           |  6 +-
 clang/test/CodeGen/mingw-long-double.c        |  2 +-
 clang/test/CodeGen/nvptx-abi.c                | 10 +--
 clang/test/CodeGen/ppc64-align-struct.c       |  6 +-
 clang/test/CodeGen/ppc64le-aggregates.c       |  8 +-
 clang/test/CodeGen/ppc64le-f128Aggregates.c   |  4 +-
 clang/test/CodeGen/regcall.c                  | 10 +--
 clang/test/CodeGen/regparm-struct.c           |  2 +-
 clang/test/CodeGen/renderscript.c             |  2 +-
 clang/test/CodeGen/sparc-arguments.c          |  4 +-
 clang/test/CodeGen/sparcv8-abi.c              |  6 +-
 clang/test/CodeGen/stdcall-fastcall.c         |  4 +-
 clang/test/CodeGen/struct-passing.c           |  4 +-
 clang/test/CodeGen/vectorcall.c               |  4 +-
 clang/test/CodeGen/wasm-arguments.c           | 12 +--
 clang/test/CodeGen/x86_32-arguments-darwin.c  | 46 +++++------
 clang/test/CodeGen/x86_32-arguments-iamcu.c   |  6 +-
 clang/test/CodeGen/x86_32-arguments-linux.c   | 28 +++----
 clang/test/CodeGen/x86_32-arguments-realign.c |  2 +-
 clang/test/CodeGen/x86_64-arguments-nacl.c    |  4 +-
 clang/test/CodeGen/x86_64-arguments.c         | 34 ++++----
 .../test/CodeGenCUDA/kernel-args-alignment.cu |  2 +-
 clang/test/CodeGenCUDA/kernel-args.cu         |  8 +-
 clang/test/CodeGenCXX/amdgcn-func-arg.cpp     |  6 +-
 .../microsoft-abi-cdecl-method-sret.cpp       |  2 +-
 .../microsoft-abi-sret-and-byval.cpp          | 30 +++----
 .../test/CodeGenCXX/ms-inline-asm-fields.cpp  |  2 +-
 clang/test/CodeGenCXX/regcall.cpp             |  4 +-
 clang/test/CodeGenCXX/regparm.cpp             |  2 +-
 clang/test/CodeGenCXX/stmtexpr.cpp            |  2 +-
 clang/test/CodeGenCXX/wasm-args-returns.cpp   |  6 +-
 clang/test/CodeGenCXX/x86_32-arguments.cpp    |  4 +-
 .../test/CodeGenCXX/x86_64-arguments-avx.cpp  |  2 +-
 .../CodeGenCXX/x86_64-arguments-nacl-x32.cpp  |  4 +-
 clang/test/CodeGenCXX/x86_64-arguments.cpp    | 16 ++--
 .../CodeGenOpenCL/addr-space-struct-arg.cl    | 20 ++---
 .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 16 ++--
 .../CodeGenOpenCL/cl20-device-side-enqueue.cl |  6 +-
 .../kernels-have-spir-cc-by-default.cl        |  6 +-
 .../OpenMP/nvptx_unsupported_type_codegen.cpp |  4 +-
 49 files changed, 266 insertions(+), 266 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9fe2b315edc2c..c6d3aff7bc03c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3730,7 +3730,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
           Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
 
       AttrBuilder B;
-      B.addAttribute(Attribute::ByVal);
+      B.addByValAttr(NDRangeL.getAddress().getElementType());
       llvm::AttributeList ByValAttrSet =
           llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
 
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index bc9be14ede4a9..7b57f08482299 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2055,7 +2055,7 @@ void CodeGenModule::ConstructAttributeList(
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       if (AI.getIndirectByVal())
-        Attrs.addAttribute(llvm::Attribute::ByVal);
+        Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
 
       CharUnits Align = AI.getIndirectAlign();
 
diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
index bcc4604d7422d..3d2c7ef6a4d34 100644
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -95,10 +95,10 @@ void g4() {
   f4m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define void @g4
-// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval align 8
-// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval align 8
-// CHECK: declare void @f4(i32, %struct.SF16* byval align 8)
-// CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval align 8)
+// CHECK: call void @f4(i32 1, %struct.SF16* nonnull byval(%struct.SF16) align 8
+// CHECK: call void @f4m(i32 1, i32 2, i32 3, i32 4, i32 5, %struct.SF16* nonnull byval(%struct.SF16) align 8
+// CHECK: declare void @f4(i32, %struct.SF16* byval(%struct.SF16) align 8)
+// CHECK: declare void @f4m(i32, i32, i32, i32, i32, %struct.SF16* byval(%struct.SF16) align 8)
 
 // Packed structure.
 struct  __attribute__((packed)) P {
diff --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c
index ba5a969b0edc5..69581fcab2479 100644
--- a/clang/test/CodeGen/arm-aapcs-vfp.c
+++ b/clang/test/CodeGen/arm-aapcs-vfp.c
@@ -114,7 +114,7 @@ void test_neon(struct neon_struct arg) {
   neon_callee(arg);
 }
 
-// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval align 4 %s)
+// CHECK-LABEL: define arm_aapcs_vfpcc void @f33(%struct.s33* byval(%struct.s33) align 4 %s)
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
 
@@ -140,10 +140,10 @@ void test_vfp_stack_gpr_split_4(double a, double b, double c, double d, double e
 // This very large struct (passed byval) uses up the GPRs, so no padding is needed
 typedef struct { int x[17]; } struct_seventeen_ints;
 typedef struct { int x[4]; } struct_four_ints;
-// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_5(%struct.struct_seventeen_ints* byval align 4 %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, [4 x i32] %k.coerce)
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_5(%struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, [4 x i32] %k.coerce)
 void test_vfp_stack_gpr_split_5(struct_seventeen_ints a, double b, double c, double d, double e, double f, double g, double h, double i, double j, struct_four_ints k) {}
 
 // Here, parameter k would need padding to prevent it from being split, but it
 // is passed ByVal (due to being > 64 bytes), so the backend handles this instead.
 void test_vfp_stack_gpr_split_6(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_seventeen_ints k) {}
-// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval align 4 %k)
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_6(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, %struct.struct_seventeen_ints* byval(%struct.struct_seventeen_ints) align 4 %k)
diff --git a/clang/test/CodeGen/arm-arguments.c b/clang/test/CodeGen/arm-arguments.c
index ca6b70b44621b..6ae7ed86dbfed 100644
--- a/clang/test/CodeGen/arm-arguments.c
+++ b/clang/test/CodeGen/arm-arguments.c
@@ -176,8 +176,8 @@ void f32(struct s32 s) { }
 // PR13350
 struct s33 { char buf[32*32]; };
 void f33(struct s33 s) { }
-// APCS-GNU-LABEL: define void @f33(%struct.s33* byval align 4 %s)
-// AAPCS-LABEL: define arm_aapcscc void @f33(%struct.s33* byval align 4 %s)
+// APCS-GNU-LABEL: define void @f33(%struct.s33* byval(%struct.s33) align 4 %s)
+// AAPCS-LABEL: define arm_aapcscc void @f33(%struct.s33* byval(%struct.s33) align 4 %s)
 
 // PR14048
 struct s34 { char c; };
@@ -204,7 +204,7 @@ float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
                             *(float32x4_t *)&s2);
   return v;
 }
-// APCS-GNU-LABEL: define <4 x float> @f35(i32 %i, %struct.s35* byval align 4, %struct.s35* byval align 4)
+// APCS-GNU-LABEL: define <4 x float> @f35(i32 %i, %struct.s35* byval(%struct.s35) align 4, %struct.s35* byval(%struct.s35) align 4)
 // APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
 // APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
 // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
@@ -212,7 +212,7 @@ float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
 // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
 // APCS-GNU: load <4 x float>, <4 x float>* %[[d]], align 16
 
-// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 4 %s1, %struct.s35* byval align 4 %s2)
+// AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval(%struct.s35) align 4 %s1, %struct.s35* byval(%struct.s35) align 4 %s2)
 // AAPCS: %[[a_addr:.*]] = alloca <4 x float>, align 16
 // AAPCS: %[[b_addr:.*]] = alloca <4 x float>, align 16
 // AAPCS: %[[p1:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
diff --git a/clang/test/CodeGen/arm-byval-align.c b/clang/test/CodeGen/arm-byval-align.c
index aa22503fe4bba..c3e887773fb04 100644
--- a/clang/test/CodeGen/arm-byval-align.c
+++ b/clang/test/CodeGen/arm-byval-align.c
@@ -7,7 +7,7 @@ struct foo {
   int d[16];
 };
 
-// CHECK: %struct.foo* byval align 8 %z
+// CHECK: %struct.foo* byval(%struct.foo) align 8 %z
 long long bar(int a, int b, int c, int d, int e,
               struct foo z) {
   return z.a;
diff --git a/clang/test/CodeGen/blocks.c b/clang/test/CodeGen/blocks.c
index 6b355b898ed08..3bf1939e18e9b 100644
--- a/clang/test/CodeGen/blocks.c
+++ b/clang/test/CodeGen/blocks.c
@@ -18,7 +18,7 @@ struct s0 {
   int a[64];
 };
 
-// CHECK: define internal void @__f2_block_invoke(%struct.s0* noalias sret {{%.*}}, i8* {{%.*}}, %struct.s0* byval align 4 {{.*}})
+// CHECK: define internal void @__f2_block_invoke(%struct.s0* noalias sret {{%.*}}, i8* {{%.*}}, %struct.s0* byval(%struct.s0) align 4 {{.*}})
 struct s0 f2(struct s0 a0) {
   return ^(struct s0 a1){ return a1; }(a0);
 }
diff --git a/clang/test/CodeGen/complex-builtins.c b/clang/test/CodeGen/complex-builtins.c
index dbf3b5901866d..7ee2d6d848576 100644
--- a/clang/test/CodeGen/complex-builtins.c
+++ b/clang/test/CodeGen/complex-builtins.c
@@ -8,100 +8,100 @@ void foo(float f) {
 
 // NO__ERRNO: declare double @cabs(double, double) [[READNONE:#[0-9]+]]
 // NO__ERRNO: declare float @cabsf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE:#[0-9]+]]
+// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE:#[0-9]+]]
 // HAS_ERRNO: declare double @cabs(double, double) [[NOT_READNONE:#[0-9]+]]
 // HAS_ERRNO: declare float @cabsf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cacos(f);      __builtin_cacosf(f);     __builtin_cacosl(f);
 
 // NO__ERRNO: declare { double, double } @cacos(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cacosf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cacos(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cacosh(f);     __builtin_cacoshf(f);    __builtin_cacoshl(f);
 
 // NO__ERRNO: declare { double, double } @cacosh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cacosh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_carg(f);       __builtin_cargf(f);      __builtin_cargl(f);
 
 // NO__ERRNO: declare double @carg(double, double) [[READNONE]]
 // NO__ERRNO: declare float @cargf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare double @carg(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare float @cargf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_casin(f);      __builtin_casinf(f);     __builtin_casinl(f);
 
 // NO__ERRNO: declare { double, double } @casin(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @casinf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @casin(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @casinf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_casinh(f);     __builtin_casinhf(f);    __builtin_casinhl(f); 
 
 // NO__ERRNO: declare { double, double } @casinh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @casinhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @casinh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_catan(f);      __builtin_catanf(f);     __builtin_catanl(f); 
 
 // NO__ERRNO: declare { double, double } @catan(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @catanf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @catan(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @catanf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_catanh(f);     __builtin_catanhf(f);    __builtin_catanhl(f);
 
 // NO__ERRNO: declare { double, double } @catanh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @catanhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @catanh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_ccos(f);       __builtin_ccosf(f);      __builtin_ccosl(f);
 
 // NO__ERRNO: declare { double, double } @ccos(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ccosf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ccos(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_ccosh(f);      __builtin_ccoshf(f);     __builtin_ccoshl(f);
 
 // NO__ERRNO: declare { double, double } @ccosh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ccosh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cexp(f);       __builtin_cexpf(f);      __builtin_cexpl(f);
 
 // NO__ERRNO: declare { double, double } @cexp(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cexpf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cexp(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cimag(f);      __builtin_cimagf(f);     __builtin_cimagl(f);
 
@@ -121,28 +121,28 @@ void foo(float f) {
 
 // NO__ERRNO: declare { double, double } @clog(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @clogf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @clog(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @clogf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cproj(f);      __builtin_cprojf(f);     __builtin_cprojl(f); 
 
 // NO__ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE:#[0-9]+]]
 // HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_cpow(f,f);       __builtin_cpowf(f,f);      __builtin_cpowl(f,f);
 
 // NO__ERRNO: declare { double, double } @cpow(double, double, double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cpow(double, double, double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_creal(f);      __builtin_crealf(f);     __builtin_creall(f);
 
@@ -155,46 +155,46 @@ void foo(float f) {
 
 // NO__ERRNO: declare { double, double } @csin(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csinf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csin(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csinf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_csinh(f);      __builtin_csinhf(f);     __builtin_csinhl(f);
 
 // NO__ERRNO: declare { double, double } @csinh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csinhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csinh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_csqrt(f);      __builtin_csqrtf(f);     __builtin_csqrtl(f);  
 
 // NO__ERRNO: declare { double, double } @csqrt(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csqrt(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_ctan(f);       __builtin_ctanf(f);      __builtin_ctanl(f);
 
 // NO__ERRNO: declare { double, double } @ctan(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ctanf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ctan(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   __builtin_ctanh(f);      __builtin_ctanhf(f);     __builtin_ctanhl(f); 
 
 // NO__ERRNO: declare { double, double } @ctanh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ctanh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 };
 
 
diff --git a/clang/test/CodeGen/complex-libcalls.c b/clang/test/CodeGen/complex-libcalls.c
index db56628835fdf..5690119e10139 100644
--- a/clang/test/CodeGen/complex-libcalls.c
+++ b/clang/test/CodeGen/complex-libcalls.c
@@ -8,100 +8,100 @@ void foo(float f) {
 
 // NO__ERRNO: declare double @cabs(double, double) [[READNONE:#[0-9]+]]
 // NO__ERRNO: declare float @cabsf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE:#[0-9]+]]
+// NO__ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE:#[0-9]+]]
 // HAS_ERRNO: declare double @cabs(double, double) [[NOT_READNONE:#[0-9]+]]
 // HAS_ERRNO: declare float @cabsf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cabsl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cacos(f);      cacosf(f);     cacosl(f);
 
 // NO__ERRNO: declare { double, double } @cacos(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cacosf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cacos(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cacosf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cacosh(f);     cacoshf(f);    cacoshl(f);
 
 // NO__ERRNO: declare { double, double } @cacosh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cacosh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cacoshf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cacoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   carg(f);       cargf(f);      cargl(f);
 
 // NO__ERRNO: declare double @carg(double, double) [[READNONE]]
 // NO__ERRNO: declare float @cargf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare double @carg(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare float @cargf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare x86_fp80 @cargl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   casin(f);      casinf(f);     casinl(f);
 
 // NO__ERRNO: declare { double, double } @casin(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @casinf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @casin(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @casinf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   casinh(f);     casinhf(f);    casinhl(f); 
 
 // NO__ERRNO: declare { double, double } @casinh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @casinhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @casinh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @casinhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @casinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   catan(f);      catanf(f);     catanl(f); 
 
 // NO__ERRNO: declare { double, double } @catan(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @catanf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @catan(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @catanf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   catanh(f);     catanhf(f);    catanhl(f);
 
 // NO__ERRNO: declare { double, double } @catanh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @catanhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @catanh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @catanhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @catanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   ccos(f);       ccosf(f);      ccosl(f);
 
 // NO__ERRNO: declare { double, double } @ccos(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ccosf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ccos(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ccosf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccosl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   ccosh(f);      ccoshf(f);     ccoshl(f);
 
 // NO__ERRNO: declare { double, double } @ccosh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ccosh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ccoshf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ccoshl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cexp(f);       cexpf(f);      cexpl(f);
 
 // NO__ERRNO: declare { double, double } @cexp(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cexpf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cexp(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cexpf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cexpl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cimag(f);      cimagf(f);     cimagl(f);
 
@@ -114,37 +114,37 @@ void foo(float f) {
 
 // NO__ERRNO: declare { double, double } @conj(double, double) [[READNONE:#[0-9]+]]
 // NO__ERRNO: declare <2 x float> @conjf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @conj(double, double) [[READNONE:#[0-9]+]]
 // HAS_ERRNO: declare <2 x float> @conjf(<2 x float>) [[READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @conjl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   clog(f);       clogf(f);      clogl(f);
 
 // NO__ERRNO: declare { double, double } @clog(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @clogf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @clog(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @clogf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @clogl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cproj(f);      cprojf(f);     cprojl(f); 
 
 // NO__ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cproj(double, double) [[READNONE]]
 // HAS_ERRNO: declare <2 x float> @cprojf(<2 x float>) [[READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cprojl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   cpow(f,f);       cpowf(f,f);      cpowl(f,f);
 
 // NO__ERRNO: declare { double, double } @cpow(double, double, double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @cpow(double, double, double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @cpowf(<2 x float>, <2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval align 16, { x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @cpowl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   creal(f);      crealf(f);     creall(f);
 
@@ -157,46 +157,46 @@ void foo(float f) {
 
 // NO__ERRNO: declare { double, double } @csin(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csinf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csin(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csinf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   csinh(f);      csinhf(f);     csinhl(f);
 
 // NO__ERRNO: declare { double, double } @csinh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csinhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csinh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csinhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csinhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   csqrt(f);      csqrtf(f);     csqrtl(f);  
 
 // NO__ERRNO: declare { double, double } @csqrt(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @csqrt(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @csqrtf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @csqrtl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   ctan(f);       ctanf(f);      ctanl(f);
 
 // NO__ERRNO: declare { double, double } @ctan(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ctanf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ctan(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ctanf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 
   ctanh(f);      ctanhf(f);     ctanhl(f); 
 
 // NO__ERRNO: declare { double, double } @ctanh(double, double) [[READNONE]]
 // NO__ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[READNONE]]
-// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// NO__ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 // HAS_ERRNO: declare { double, double } @ctanh(double, double) [[NOT_READNONE]]
 // HAS_ERRNO: declare <2 x float> @ctanhf(<2 x float>) [[NOT_READNONE]]
-// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval align 16) [[NOT_READNONE]]
+// HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl({ x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]]
 };
 
 
diff --git a/clang/test/CodeGen/le32-arguments.c b/clang/test/CodeGen/le32-arguments.c
index e81d84387d32a..9e6908d7fc41c 100644
--- a/clang/test/CodeGen/le32-arguments.c
+++ b/clang/test/CodeGen/le32-arguments.c
@@ -10,7 +10,7 @@ typedef struct {
   int bb;
 } s1;
 // Structs should be passed byval and not split up
-// CHECK-LABEL: define void @f1(%struct.s1* byval align 4 %i)
+// CHECK-LABEL: define void @f1(%struct.s1* byval(%struct.s1) align 4 %i)
 void f1(s1 i) {}
 
 typedef struct {
@@ -48,7 +48,7 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK-LABEL: define void @f7(%union.simple_union* byval align 4 %s)
+// CHECK-LABEL: define void @f7(%union.simple_union* byval(%union.simple_union) align 4 %s)
 void f7(union simple_union s) {}
 
 typedef struct {
@@ -57,5 +57,5 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK-LABEL: define void @f8(%struct.bitfield1* byval align 4 %bf1)
+// CHECK-LABEL: define void @f8(%struct.bitfield1* byval(%struct.bitfield1) align 4 %bf1)
 void f8(bitfield1 bf1) {}
diff --git a/clang/test/CodeGen/mingw-long-double.c b/clang/test/CodeGen/mingw-long-double.c
index 6026c24b367d5..58f94402b8ac3 100644
--- a/clang/test/CodeGen/mingw-long-double.c
+++ b/clang/test/CodeGen/mingw-long-double.c
@@ -38,7 +38,7 @@ long double TestLD(long double x) {
 long double _Complex TestLDC(long double _Complex x) {
   return x * x;
 }
-// GNU32: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %x)
+// GNU32: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %x)
 // GNU64: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
 // MSC64: define dso_local void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
 
diff --git a/clang/test/CodeGen/nvptx-abi.c b/clang/test/CodeGen/nvptx-abi.c
index 7973bf06533ab..2f055b5f33480 100644
--- a/clang/test/CodeGen/nvptx-abi.c
+++ b/clang/test/CodeGen/nvptx-abi.c
@@ -21,14 +21,14 @@ float bar(void) {
 
 void foo(float4_t x) {
 // CHECK-LABEL: @foo
-// CHECK: %struct.float4_s* byval align 4 %x
+// CHECK: %struct.float4_s* byval(%struct.float4_s) align 4 %x
 }
 
 void fooN(float4_t x, float4_t y, float4_t z) {
 // CHECK-LABEL: @fooN
-// CHECK: %struct.float4_s* byval align 4 %x
-// CHECK: %struct.float4_s* byval align 4 %y
-// CHECK: %struct.float4_s* byval align 4 %z
+// CHECK: %struct.float4_s* byval(%struct.float4_s) align 4 %x
+// CHECK: %struct.float4_s* byval(%struct.float4_s) align 4 %y
+// CHECK: %struct.float4_s* byval(%struct.float4_s) align 4 %z
 }
 
 typedef struct nested_s {
@@ -39,5 +39,5 @@ typedef struct nested_s {
 
 void baz(nested_t x) {
 // CHECK-LABEL: @baz
-// CHECK: %struct.nested_s* byval align 8 %x)
+// CHECK: %struct.nested_s* byval(%struct.nested_s) align 8 %x)
 }
diff --git a/clang/test/CodeGen/ppc64-align-struct.c b/clang/test/CodeGen/ppc64-align-struct.c
index 5e2dc8b2eb8ad..e6ca3bb6dfa6d 100644
--- a/clang/test/CodeGen/ppc64-align-struct.c
+++ b/clang/test/CodeGen/ppc64-align-struct.c
@@ -30,18 +30,18 @@ void test4 (int x, struct test4 y)
 {
 }
 
-// CHECK: define void @test5(i32 signext %x, %struct.test5* byval align 8 %y)
+// CHECK: define void @test5(i32 signext %x, %struct.test5* byval(%struct.test5) align 8 %y)
 void test5 (int x, struct test5 y)
 {
 }
 
-// CHECK: define void @test6(i32 signext %x, %struct.test6* byval align 16 %y)
+// CHECK: define void @test6(i32 signext %x, %struct.test6* byval(%struct.test6) align 16 %y)
 void test6 (int x, struct test6 y)
 {
 }
 
 // This case requires run-time realignment of the incoming struct
-// CHECK-LABEL: define void @test7(i32 signext %x, %struct.test7* byval align 16)
+// CHECK-LABEL: define void @test7(i32 signext %x, %struct.test7* byval(%struct.test7) align 16)
 // CHECK: %y = alloca %struct.test7, align 32
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
 void test7 (int x, struct test7 y)
diff --git a/clang/test/CodeGen/ppc64le-aggregates.c b/clang/test/CodeGen/ppc64le-aggregates.c
index a7780beec5f61..e36faa2b80258 100644
--- a/clang/test/CodeGen/ppc64le-aggregates.c
+++ b/clang/test/CodeGen/ppc64le-aggregates.c
@@ -162,7 +162,7 @@ struct v7 func_v7(struct v7 x) { return x; }
 // CHECK: define [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %x.coerce)
 struct v8 func_v8(struct v8 x) { return x; }
 
-// CHECK: define void @func_v9(%struct.v9* noalias sret %agg.result, %struct.v9* byval align 16 %x)
+// CHECK: define void @func_v9(%struct.v9* noalias sret %agg.result, %struct.v9* byval(%struct.v9) align 16 %x)
 struct v9 func_v9(struct v9 x) { return x; }
 
 // CHECK: define [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %x.coerce)
@@ -220,7 +220,7 @@ struct v8 global_v8;
 void call_v8(void) { global_v8 = func_v8(global_v8); }
 
 // CHECK-LABEL: @call_v9
-// CHECK: call void @func_v9(%struct.v9* sret %{{[^ ]+}}, %struct.v9* byval align 16 @global_v9)
+// CHECK: call void @func_v9(%struct.v9* sret %{{[^ ]+}}, %struct.v9* byval(%struct.v9) align 16 @global_v9)
 struct v9 global_v9;
 void call_v9(void) { global_v9 = func_v9(global_v9); }
 
@@ -279,7 +279,7 @@ struct v3f7 func_v3f7(struct v3f7 x) { return x; }
 // CHECK: define [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %x.coerce)
 struct v3f8 func_v3f8(struct v3f8 x) { return x; }
 
-// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x)
+// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval(%struct.v3f9) align 16 %x)
 struct v3f9 func_v3f9(struct v3f9 x) { return x; }
 
 // CHECK: define [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %x.coerce)
@@ -337,7 +337,7 @@ struct v3f8 global_v3f8;
 void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }
 
 // CHECK-LABEL: @call_v3f9
-// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval align 16 @global_v3f9)
+// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval(%struct.v3f9) align 16 @global_v3f9)
 struct v3f9 global_v3f9;
 void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }
 
diff --git a/clang/test/CodeGen/ppc64le-f128Aggregates.c b/clang/test/CodeGen/ppc64le-f128Aggregates.c
index a51b6d5e709c5..3b363bf0f2eac 100644
--- a/clang/test/CodeGen/ppc64le-f128Aggregates.c
+++ b/clang/test/CodeGen/ppc64le-f128Aggregates.c
@@ -42,7 +42,7 @@ struct fp7 func_f7(struct fp7 x) { return x; }
 // CHECK: define [8 x fp128] @func_f8([8 x fp128] %x.coerce)
 struct fp8 func_f8(struct fp8 x) { return x; }
 
-// CHECK: define void @func_f9(%struct.fp9* noalias sret %agg.result, %struct.fp9* byval align 16 %x)
+// CHECK: define void @func_f9(%struct.fp9* noalias sret %agg.result, %struct.fp9* byval(%struct.fp9) align 16 %x)
 struct fp9 func_f9(struct fp9 x) { return x; }
 
 // CHECK: define [2 x fp128] @func_fab([2 x fp128] %x.coerce)
@@ -104,7 +104,7 @@ void call_fp8(void) { global_f8 = func_f8(global_f8); }
 
 // CHECK-LABEL: @call_fp9
 // CHECK: %[[TMP1:[^ ]+]] = alloca %struct.fp9, align 16
-// CHECK: call void @func_f9(%struct.fp9* sret %[[TMP2:[^ ]+]], %struct.fp9* byval align 16 @global_f9
+// CHECK: call void @func_f9(%struct.fp9* sret %[[TMP2:[^ ]+]], %struct.fp9* byval(%struct.fp9) align 16 @global_f9
 // CHECK: %[[TMP3:[^ ]+]] = bitcast %struct.fp9* %[[TMP2]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast (%struct.fp9* @global_f9 to i8*), i8* align 16 %[[TMP3]], i64 144, i1 false
 // CHECK: ret void
diff --git a/clang/test/CodeGen/regcall.c b/clang/test/CodeGen/regcall.c
index b7389904844a0..5820bcfac1656 100644
--- a/clang/test/CodeGen/regcall.c
+++ b/clang/test/CodeGen/regcall.c
@@ -32,9 +32,9 @@ void __regcall v3(int a, struct Small b, int c) {}
 
 struct Large { int a[5]; };
 void __regcall v4(int a, struct Large b, int c) {}
-// Win32: define dso_local x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// Win32: define dso_local x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval(%struct.Large) align 4 %b, i32 inreg %c)
 // Win64: define dso_local x86_regcallcc void @__regcall3__v4(i32 %a, %struct.Large* %b, i32 %c)
-// Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval align 4 %b, i32 %c)
+// Lin32: define x86_regcallcc void @__regcall3__v4(i32 inreg %a, %struct.Large* byval(%struct.Large) align 4 %b, i32 %c)
 // Lin64: define x86_regcallcc void @__regcall3__v4(i32 %a, [5 x i32] %b.coerce, i32 %c)
 
 struct HFA2 { double x, y; };
@@ -64,13 +64,13 @@ void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA
 // Lin32: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.0, double %f.1)
 // Lin64: define x86_regcallcc void @__regcall3__hfa3(double %a, double %b, double %c, double %d, double %e, double %f.coerce0, double %f.coerce1)
 
-// Aggregates with more than four elements are not HFAs and are passed byval.
+// Aggregates with more than four elements are not HFAs and are passed byval(%b.3, double).
 // Because they are not classified as homogeneous, they don't get special
 // handling to ensure alignment.
 void __regcall hfa4(struct HFA5 a) {}
-// Win32: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4)
+// Win32: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval(%struct.HFA5) align 4)
 // Win64: define dso_local x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* %a)
-// Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval align 4 %a)
+// Lin32: define x86_regcallcc void @__regcall3__hfa4(%struct.HFA5* byval(%struct.HFA5) align 4 %a)
 // Lin64: define x86_regcallcc void @__regcall3__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4)
 
 // Return HFAs of 4 or fewer elements in registers.
diff --git a/clang/test/CodeGen/regparm-struct.c b/clang/test/CodeGen/regparm-struct.c
index b31901266e6f0..7f56ae094a69c 100644
--- a/clang/test/CodeGen/regparm-struct.c
+++ b/clang/test/CodeGen/regparm-struct.c
@@ -170,7 +170,7 @@ struct s13 {
   } y;
 };
 __attribute__((regparm(3))) void f18(struct s13 a, int b, int c, int d);
-// CHECK: declare void @f18(%struct.s13* byval align 4, i32 inreg, i32 inreg, i32 inreg)
+// CHECK: declare void @f18(%struct.s13* byval(%struct.s13) align 4, i32 inreg, i32 inreg, i32 inreg)
 void g18(void) {
   struct s13 x = {{41}};
   f18(x, 42, 43, 44);
diff --git a/clang/test/CodeGen/renderscript.c b/clang/test/CodeGen/renderscript.c
index 5482d36f583dc..a85dc35c61496 100644
--- a/clang/test/CodeGen/renderscript.c
+++ b/clang/test/CodeGen/renderscript.c
@@ -131,7 +131,7 @@ sLong2Char retLong2Char() { sLong2Char r; return r;}
 
 typedef struct {long l1, l2, l3, l4, l5, l6, l7, l8, l9; } sLong9;
 
-// CHECK-RS32: void @argLong9(%struct.sLong9* byval align 8 %s)
+// CHECK-RS32: void @argLong9(%struct.sLong9* byval(%struct.sLong9) align 8 %s)
 // CHECK-RS64: void @argLong9(%struct.sLong9* %s)
 void argLong9(sLong9 s) {}
 
diff --git a/clang/test/CodeGen/sparc-arguments.c b/clang/test/CodeGen/sparc-arguments.c
index c86b40b11fbaf..c5925de892649 100644
--- a/clang/test/CodeGen/sparc-arguments.c
+++ b/clang/test/CodeGen/sparc-arguments.c
@@ -17,9 +17,9 @@ struct s1 x1;
 
 // Ensure the align 8 is passed through:
 // CHECK-LABEL: define void @f1()
-// CHECK: call void @f1_helper(%struct.s1* byval align 8 @x1)
+// CHECK: call void @f1_helper(%struct.s1* byval(%struct.s1) align 8 @x1)
 // Also ensure the declaration of f1_helper includes it
-// CHECK: declare void @f1_helper(%struct.s1* byval align 8)
+// CHECK: declare void @f1_helper(%struct.s1* byval(%struct.s1) align 8)
 
 void f1_helper(struct s1);
 void f1() {
diff --git a/clang/test/CodeGen/sparcv8-abi.c b/clang/test/CodeGen/sparcv8-abi.c
index cd8832f6534b8..ee99ee420a74a 100644
--- a/clang/test/CodeGen/sparcv8-abi.c
+++ b/clang/test/CodeGen/sparcv8-abi.c
@@ -1,18 +1,18 @@
 // RUN: %clang_cc1 -triple sparc-unknown-unknown -emit-llvm %s -o - | FileCheck %s
 
-// CHECK-LABEL: define { float, float } @p({ float, float }* byval align 4 %a, { float, float }* byval align 4 %b) #0 {
+// CHECK-LABEL: define { float, float } @p({ float, float }* byval({ float, float }) align 4 %a, { float, float }* byval({ float, float }) align 4 %b) #0 {
 float __complex__
 p (float __complex__  a, float __complex__  b)
 {
 }
 
-// CHECK-LABEL: define { double, double } @q({ double, double }* byval align 8 %a, { double, double }* byval align 8 %b) #0 {
+// CHECK-LABEL: define { double, double } @q({ double, double }* byval({ double, double }) align 8 %a, { double, double }* byval({ double, double }) align 8 %b) #0 {
 double __complex__
 q (double __complex__  a, double __complex__  b)
 {
 }
 
-// CHECK-LABEL: define { i64, i64 } @r({ i64, i64 }* byval align 8 %a, { i64, i64 }* byval align 8 %b) #0 {
+// CHECK-LABEL: define { i64, i64 } @r({ i64, i64 }* byval({ i64, i64 }) align 8 %a, { i64, i64 }* byval({ i64, i64 }) align 8 %b) #0 {
 long long __complex__
 r (long long __complex__  a, long long __complex__  b)
 {
diff --git a/clang/test/CodeGen/stdcall-fastcall.c b/clang/test/CodeGen/stdcall-fastcall.c
index b6217ac1cf3c3..cfb430cd06e7c 100644
--- a/clang/test/CodeGen/stdcall-fastcall.c
+++ b/clang/test/CodeGen/stdcall-fastcall.c
@@ -89,7 +89,7 @@ struct S2 {
 void __attribute__((fastcall)) foo5(struct S2 y);
 void bar5(struct S2 y) {
   // CHECK-LABEL: define void @bar5
-  // CHECK: call x86_fastcallcc void @foo5(%struct.S2* byval align 4 %
+  // CHECK: call x86_fastcallcc void @foo5(%struct.S2* byval(%struct.S2) align 4 %
   foo5(y);
 }
 
@@ -117,7 +117,7 @@ void bar8(struct S1 a, int b) {
 void __attribute__((fastcall)) foo9(struct S2 a, int b);
 void bar9(struct S2 a, int b) {
   // CHECK-LABEL: define void @bar9
-  // CHECK: call x86_fastcallcc void @foo9(%struct.S2* byval align 4 %{{.*}}, i32 %
+  // CHECK: call x86_fastcallcc void @foo9(%struct.S2* byval(%struct.S2) align 4 %{{.*}}, i32 %
   foo9(a, b);
 }
 
diff --git a/clang/test/CodeGen/struct-passing.c b/clang/test/CodeGen/struct-passing.c
index d28fee213eb10..80847b9fea64f 100644
--- a/clang/test/CodeGen/struct-passing.c
+++ b/clang/test/CodeGen/struct-passing.c
@@ -20,8 +20,8 @@ void *ps[] = { f0, f1, f2, f3, f4, f5 };
 // CHECK: declare i32 @f1() [[RO:#[0-9]+]]
 // CHECK: declare void @f2({{.*}} sret)
 // CHECK: declare void @f3({{.*}} sret)
-// CHECK: declare void @f4({{.*}} byval align 4)
-// CHECK: declare void @f5({{.*}} byval align 4)
+// CHECK: declare void @f4({{.*}} byval({{.*}}) align 4)
+// CHECK: declare void @f5({{.*}} byval({{.*}}) align 4)
 
 // CHECK: attributes [[RN]] = { nounwind readnone{{.*}} }
 // CHECK: attributes [[RO]] = { nounwind readonly{{.*}} }
diff --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c
index 564b41e1f54dc..db0ba02ba39e7 100644
--- a/clang/test/CodeGen/vectorcall.c
+++ b/clang/test/CodeGen/vectorcall.c
@@ -16,7 +16,7 @@ void __vectorcall v3(int a, struct Small b, int c) {}
 
 struct Large { int a[5]; };
 void __vectorcall v4(int a, struct Large b, int c) {}
-// X32: define dso_local x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c)
+// X32: define dso_local x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval(%struct.Large) align 4 %b, i32 inreg %c)
 // X64: define dso_local x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c)
 
 struct HFA2 { double x, y; };
@@ -44,7 +44,7 @@ void __vectorcall hfa3(double a, double b, double c, double d, double e, struct
 // Because they are not classified as homogeneous, they don't get special
 // handling to ensure alignment.
 void __vectorcall hfa4(struct HFA5 a) {}
-// X32: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval(%struct.HFA5) align 4)
 // X64: define dso_local x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a)
 
 // Return HFAs of 4 or fewer elements in registers.
diff --git a/clang/test/CodeGen/wasm-arguments.c b/clang/test/CodeGen/wasm-arguments.c
index cd3f45ab1a7e5..c92028bae2db0 100644
--- a/clang/test/CodeGen/wasm-arguments.c
+++ b/clang/test/CodeGen/wasm-arguments.c
@@ -14,8 +14,8 @@ typedef struct {
   int bb;
 } s1;
 // Structs should be passed byval and not split up.
-// WEBASSEMBLY32: define void @f1(%struct.s1* byval align 4 %i)
-// WEBASSEMBLY64: define void @f1(%struct.s1* byval align 4 %i)
+// WEBASSEMBLY32: define void @f1(%struct.s1* byval(%struct.s1) align 4 %i)
+// WEBASSEMBLY64: define void @f1(%struct.s1* byval(%struct.s1) align 4 %i)
 void f1(s1 i) {}
 
 typedef struct {
@@ -77,8 +77,8 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs.
-// WEBASSEMBLY32: define void @f9(%union.simple_union* byval align 4 %s)
-// WEBASSEMBLY64: define void @f9(%union.simple_union* byval align 4 %s)
+// WEBASSEMBLY32: define void @f9(%union.simple_union* byval(%union.simple_union) align 4 %s)
+// WEBASSEMBLY64: define void @f9(%union.simple_union* byval(%union.simple_union) align 4 %s)
 void f9(union simple_union s) {}
 
 typedef struct {
@@ -87,6 +87,6 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs.
-// WEBASSEMBLY32: define void @f10(%struct.bitfield1* byval align 4 %bf1)
-// WEBASSEMBLY64: define void @f10(%struct.bitfield1* byval align 4 %bf1)
+// WEBASSEMBLY32: define void @f10(%struct.bitfield1* byval(%struct.bitfield1) align 4 %bf1)
+// WEBASSEMBLY64: define void @f10(%struct.bitfield1* byval(%struct.bitfield1) align 4 %bf1)
 void f10(bitfield1 bf1) {}
diff --git a/clang/test/CodeGen/x86_32-arguments-darwin.c b/clang/test/CodeGen/x86_32-arguments-darwin.c
index 1536631fb0f3f..2bb2f1bb64a2c 100644
--- a/clang/test/CodeGen/x86_32-arguments-darwin.c
+++ b/clang/test/CodeGen/x86_32-arguments-darwin.c
@@ -52,7 +52,7 @@ void f8_2(struct s8 a0) {}
 
 // FIXME: llvm-gcc expands this, this may have some value for the
 // backend in terms of optimization but doesn't change the ABI.
-// CHECK-LABEL: define void @f9_2(%struct.s9* byval align 4 %a0)
+// CHECK-LABEL: define void @f9_2(%struct.s9* byval(%struct.s9) align 4 %a0)
 struct s9 {
   int a : 17;
   int b;
@@ -153,7 +153,7 @@ struct s37 { float c[1][1]; } f37(void) { while (1) {} }
 // CHECK-LABEL: define void @f38(%struct.s38* noalias sret %agg.result)
 struct s38 { char a[3]; short b; } f38(void) { while (1) {} }
 
-// CHECK-LABEL: define void @f39(%struct.s39* byval align 16 %x)
+// CHECK-LABEL: define void @f39(%struct.s39* byval(%struct.s39) align 16 %x)
 typedef int v39 __attribute((vector_size(16)));
 struct s39 { v39 x; };
 void f39(struct s39 x) {}
@@ -201,13 +201,13 @@ void f50(struct s50 a0) { }
 struct s51 { vvbp f0; int f1; };
 void f51(struct s51 a0) { }
 
-// CHECK-LABEL: define void @f52(%struct.s52* byval align 4)
+// CHECK-LABEL: define void @f52(%struct.s52* byval(%struct.s52) align 4)
 struct s52 {
   long double a;
 };
 void f52(struct s52 x) {}
 
-// CHECK-LABEL: define void @f53(%struct.s53* byval align 4)
+// CHECK-LABEL: define void @f53(%struct.s53* byval(%struct.s53) align 4)
 struct __attribute__((aligned(32))) s53 {
   int x;
   int y;
@@ -228,22 +228,22 @@ typedef int v4i32 __attribute__((__vector_size__(16)));
 v4i32 f55(v4i32 arg) { return arg+arg; }
 
 // CHECK-LABEL: define void @f56(
-// CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
-// CHECK: i64 %a4.coerce, %struct.s56_2* byval align 4,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4)
+// CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
+// CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
+// CHECK: i64 %a4.coerce, %struct.s56_2* byval(%struct.s56_2) align 4,
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 16 %a7,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 16 %a9,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 4,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 4)
 
 // CHECK:   call void (i32, ...) @f56_0(i32 1,
-// CHECK: i32 %{{[^ ]*}}, %struct.s56_0* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}},
-// CHECK: <8 x i32> {{[^ ]*}}, %struct.s56_5* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x double> {{[^ ]*}}, %struct.s56_6* byval align 4 %{{[^ ]*}})
+// CHECK: i32 %{{[^ ]*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 16 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 16 %{{[^ ]*}},
+// CHECK: <8 x i32> {{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}},
+// CHECK: <4 x double> {{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}})
 // CHECK: }
 //
 // <rdar://problem/7964854> [i386] clang misaligns long double in structures
@@ -289,16 +289,16 @@ void f58(union u58 x) {}
 struct s59 { float x __attribute((aligned(8))); };
 struct s59 f59() { while (1) {} }
 
-// CHECK-LABEL: define void @f60(%struct.s60* byval align 4, i32 %y)
+// CHECK-LABEL: define void @f60(%struct.s60* byval(%struct.s60) align 4, i32 %y)
 struct s60 { int x __attribute((aligned(8))); };
 void f60(struct s60 x, int y) {}
 
-// CHECK-LABEL: define void @f61(i32 %x, %struct.s61* byval align 16 %y)
+// CHECK-LABEL: define void @f61(i32 %x, %struct.s61* byval(%struct.s61) align 16 %y)
 typedef int T61 __attribute((vector_size(16)));
 struct s61 { T61 x; int y; };
 void f61(int x, struct s61 y) {}
 
-// CHECK-LABEL: define void @f62(i32 %x, %struct.s62* byval align 4)
+// CHECK-LABEL: define void @f62(i32 %x, %struct.s62* byval(%struct.s62) align 4)
 typedef int T62 __attribute((vector_size(16)));
 struct s62 { T62 x; int y; } __attribute((packed, aligned(8)));
 void f62(int x, struct s62 y) {}
@@ -317,7 +317,7 @@ int f63(int i, ...) {
   return s.y;
 }
 
-// CHECK-LABEL: define void @f64(%struct.s64* byval align 4 %x)
+// CHECK-LABEL: define void @f64(%struct.s64* byval(%struct.s64) align 4 %x)
 struct s64 { signed char a[0]; signed char b[]; };
 void f64(struct s64 x) {}
 
@@ -341,4 +341,4 @@ T66 f66(int i, ...) {
 // PR14453
 struct s67 { _Complex unsigned short int a; };
 void f67(struct s67 x) {}
-// CHECK-LABEL: define void @f67(%struct.s67* byval align 4 %x)
+// CHECK-LABEL: define void @f67(%struct.s67* byval(%struct.s67) align 4 %x)
diff --git a/clang/test/CodeGen/x86_32-arguments-iamcu.c b/clang/test/CodeGen/x86_32-arguments-iamcu.c
index b53d34a3e835d..e391c711ea101 100644
--- a/clang/test/CodeGen/x86_32-arguments-iamcu.c
+++ b/clang/test/CodeGen/x86_32-arguments-iamcu.c
@@ -40,10 +40,10 @@ void smallStructs(st4_t st1, st4_t st2, st4_t st3) {}
 // CHECK-LABEL: define void @paddedStruct(i32 %i1, i32 %st.coerce0, i32 %st.coerce1, i32 %st4.0)
 void paddedStruct(int i1, st5_t st, st4_t st4) {}
 
-// CHECK-LABEL: define void @largeStructBegin(%struct.st12_t* byval align 4 %st)
+// CHECK-LABEL: define void @largeStructBegin(%struct.st12_t* byval(%struct.st12_t) align 4 %st)
 void largeStructBegin(st12_t st) {}
 
-// CHECK-LABEL: define void @largeStructMiddle(i32 %i1, %struct.st12_t* byval align 4 %st, i32 %i2, i32 %i3)
+// CHECK-LABEL: define void @largeStructMiddle(i32 %i1, %struct.st12_t* byval(%struct.st12_t) align 4 %st, i32 %i2, i32 %i3)
 void largeStructMiddle(int i1, st12_t st, int i2, int i3) {}
 
 // CHECK-LABEL: define void @largeStructEnd(i32 %i1, i32 %i2, i32 %i3, i32 %st.0, i32 %st.1, i32 %st.2)
@@ -58,7 +58,7 @@ st4_t retSmallStruct(st4_t r) { return r; }
 // CHECK-LABEL: define i64 @retPaddedStruct(i32 %r.coerce0, i32 %r.coerce1)
 st5_t retPaddedStruct(st5_t r) { return r; }
 
-// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* noalias sret %agg.result, i32 %i1, %struct.st12_t* byval align 4 %r)
+// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* noalias sret %agg.result, i32 %i1, %struct.st12_t* byval(%struct.st12_t) align 4 %r)
 st12_t retLargeStruct(int i1, st12_t r) { return r; }
 
 // CHECK-LABEL: define i32 @varArgs(i32 %i1, ...)
diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/x86_32-arguments-linux.c
index 02eac51216af7..3c64fbcc15b3a 100644
--- a/clang/test/CodeGen/x86_32-arguments-linux.c
+++ b/clang/test/CodeGen/x86_32-arguments-linux.c
@@ -2,22 +2,22 @@
 // RUN: FileCheck < %t %s
 
 // CHECK-LABEL: define void @f56(
-// CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4,
-// CHECK: <1 x double> %a4, %struct.s56_2* byval align 4,
-// CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4,
-// CHECK: <2 x double> %a8, %struct.s56_4* byval align 4,
-// CHECK: <8 x i32> %a10, %struct.s56_5* byval align 4,
-// CHECK: <4 x double> %a12, %struct.s56_6* byval align 4)
+// CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
+// CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
+// CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4,
+// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4,
+// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4,
+// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 4,
+// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 4)
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
-// CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}},
-// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 4 %{{[^ ]*}},
-// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 4 %{{[^ ]*}},
-// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval align 4 %{{[^ ]*}},
-// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval align 4 %{{[^ ]*}})
+// CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
+// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
+// CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
+// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}},
+// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}},
+// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}},
+// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}})
 // CHECK: }
 //
 // <rdar://problem/7964854> [i386] clang misaligns long double in structures
diff --git a/clang/test/CodeGen/x86_32-arguments-realign.c b/clang/test/CodeGen/x86_32-arguments-realign.c
index b99523b7eee66..cd1d1cf26893d 100644
--- a/clang/test/CodeGen/x86_32-arguments-realign.c
+++ b/clang/test/CodeGen/x86_32-arguments-realign.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -w -fblocks -triple i386-apple-darwin9 -emit-llvm -o %t %s
 // RUN: FileCheck < %t %s
 
-// CHECK-LABEL: define void @f0(%struct.s0* byval align 4)
+// CHECK-LABEL: define void @f0(%struct.s0* byval(%struct.s0) align 4)
 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %{{.*}}, i8* align 4 %{{.*}}, i32 16, i1 false)
 // CHECK: }
 struct s0 { long double a; };
diff --git a/clang/test/CodeGen/x86_64-arguments-nacl.c b/clang/test/CodeGen/x86_64-arguments-nacl.c
index cbfad1a1cb572..ea4483422dfe2 100644
--- a/clang/test/CodeGen/x86_64-arguments-nacl.c
+++ b/clang/test/CodeGen/x86_64-arguments-nacl.c
@@ -61,14 +61,14 @@ void f12_1(struct s12 a0) {}
 
 // Check that sret parameter is accounted for when checking available integer
 // registers.
-// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
+// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
 
 struct s13_0 { long long f0[3]; };
 struct s13_1 { long long f0[2]; };
 struct s13_0 f13(int a, int b, int c, int d,
                  struct s13_1 e, int f) { while (1) {} }
 
-// CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
+// CHECK-LABEL: define void @f20(%struct.s20* byval(%struct.s20) align 32 %x)
 struct __attribute__((aligned(32))) s20 {
   int x;
   int y;
diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c
index 548980b32ae41..107571d8140bb 100644
--- a/clang/test/CodeGen/x86_64-arguments.c
+++ b/clang/test/CodeGen/x86_64-arguments.c
@@ -48,7 +48,7 @@ void f7(e7 a0) {
 // Test merging/passing of upper eightbyte with X87 class.
 //
 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
-// CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
+// CHECK-LABEL: define void @f8_2(%union.u8* byval(%union.u8) align 16 %a0)
 union u8 {
   long double a;
   int b;
@@ -74,7 +74,7 @@ void f12_1(struct s12 a0) {}
 
 // Check that sret parameter is accounted for when checking available integer
 // registers.
-// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
+// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
 
 struct s13_0 { long long f0[3]; };
 struct s13_1 { long long f0[2]; };
@@ -104,13 +104,13 @@ void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
 
 // Check byval alignment.
 
-// CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
+// CHECK-LABEL: define void @f19(%struct.s19* byval(%struct.s19) align 16 %x)
 struct s19 {
   long double a;
 };
 void f19(struct s19 x) {}
 
-// CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
+// CHECK-LABEL: define void @f20(%struct.s20* byval(%struct.s20) align 32 %x)
 struct __attribute__((aligned(32))) s20 {
   int x;
   int y;
@@ -271,7 +271,7 @@ typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
 
 // rdar://9122143
-// CHECK: declare void @func(%struct._str* byval align 16)
+// CHECK: declare void @func(%struct._str* byval(%struct._str) align 16)
 typedef struct _str {
   union {
     long double a;
@@ -292,8 +292,8 @@ v2i32 f36(v2i32 arg) { return arg; }
 
 // AVX: declare void @f38(<8 x float>)
 // AVX: declare void @f37(<8 x float>)
-// SSE: declare void @f38(%struct.s256* byval align 32)
-// SSE: declare void @f37(<8 x float>* byval align 32)
+// SSE: declare void @f38(%struct.s256* byval(%struct.s256) align 32)
+// SSE: declare void @f37(<8 x float>* byval(<8 x float>) align 32)
 typedef float __m256 __attribute__ ((__vector_size__ (32)));
 typedef struct {
   __m256 m;
@@ -309,7 +309,7 @@ void f39() { f38(x38); f37(x37); }
 // The two next tests make sure that the struct below is passed
 // in the same way regardless of avx being used
 
-// CHECK: declare void @func40(%struct.t128* byval align 16)
+// CHECK: declare void @func40(%struct.t128* byval(%struct.t128) align 16)
 typedef float __m128 __attribute__ ((__vector_size__ (16)));
 typedef struct t128 {
   __m128 m;
@@ -321,7 +321,7 @@ void func41(two128 s) {
   func40(s);
 }
 
-// CHECK: declare void @func42(%struct.t128_2* byval align 16)
+// CHECK: declare void @func42(%struct.t128_2* byval(%struct.t128_2) align 16)
 typedef struct xxx {
   __m128 array[2];
 } Atwo128;
@@ -368,7 +368,7 @@ void test45() { f45(x45); }
 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
 // lowering can't handle this case correctly because it runs after legalization.
 // CHECK: @test46
-// CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}})
+// CHECK: call void @f46({{.*}}<2 x float>* byval(<2 x float>) align 8 {{.*}}, <2 x float>* byval(<2 x float>) align 8 {{.*}})
 typedef float v46 __attribute((vector_size(8)));
 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
@@ -460,7 +460,7 @@ void test54() {
   test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
 }
 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
-// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval({ double, double }) align 8 {{%[^)]+}})
 
 typedef float __m512 __attribute__ ((__vector_size__ (64)));
 typedef struct {
@@ -474,20 +474,20 @@ __m512 x56;
 // as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1
 //
 // AVX512: declare void @f55(<16 x float>)
-// NO-AVX512: declare void @f55(%struct.s512* byval align 64)
+// NO-AVX512: declare void @f55(%struct.s512* byval(%struct.s512) align 64)
 void f55(s512 x);
 
 // __m512 has type SSE/SSEUP on AVX512.
 //
 // AVX512: declare void @f56(<16 x float>)
-// NO-AVX512: declare void @f56(<16 x float>* byval align 64)
+// NO-AVX512: declare void @f56(<16 x float>* byval(<16 x float>) align 64)
 void f56(__m512 x);
 void f57() { f55(x55); f56(x56); }
 
 // Like for __m128 on AVX, check that the struct below is passed
 // in the same way regardless of AVX512 being used.
 //
-// CHECK: declare void @f58(%struct.t256* byval align 32)
+// CHECK: declare void @f58(%struct.t256* byval(%struct.t256) align 32)
 typedef struct t256 {
   __m256 m;
   __m256 n;
@@ -498,7 +498,7 @@ void f59(two256 s) {
   f58(s);
 }
 
-// CHECK: declare void @f60(%struct.sat256* byval align 32)
+// CHECK: declare void @f60(%struct.sat256* byval(%struct.sat256) align 32)
 typedef struct at256 {
   __m256 array[2];
 } Atwo256;
@@ -529,7 +529,7 @@ void f63(__m512 *m, __builtin_va_list argList) {
 }
 
 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
-// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
+// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval({ double, double }) align 8 {{%[^)]+}})
 void f64_helper(__m512, ...);
 __m512 x64;
 void f64() {
@@ -541,7 +541,7 @@ struct t65 {
   __m256 m;
   int : 0;
 };
-// SSE-LABEL: @f65(%struct.t65* byval align 32 %{{[^,)]+}})
+// SSE-LABEL: @f65(%struct.t65* byval(%struct.t65) align 32 %{{[^,)]+}})
 // AVX: @f65(<8 x float> %{{[^,)]+}})
 void f65(struct t65 a0) {
 }
diff --git a/clang/test/CodeGenCUDA/kernel-args-alignment.cu b/clang/test/CodeGenCUDA/kernel-args-alignment.cu
index 653f3eb23d2a2..2bfd098a8540a 100644
--- a/clang/test/CodeGenCUDA/kernel-args-alignment.cu
+++ b/clang/test/CodeGenCUDA/kernel-args-alignment.cu
@@ -36,5 +36,5 @@ static_assert(alignof(S) == 8, "Unexpected alignment.");
 // HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24)
 
 // DEVICE-LABEL: @_Z6kernelc1SPi
-// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32*
+// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval(%struct.S) align 8{{[^,]*}}, i32*
 __global__ void kernel(char a, S s, int *b) {}
diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu
index d0986629f8d4c..74d91b4d57f37 100644
--- a/clang/test/CodeGenCUDA/kernel-args.cu
+++ b/clang/test/CodeGenCUDA/kernel-args.cu
@@ -9,14 +9,14 @@ struct A {
 };
 
 // AMDGCN: define amdgpu_kernel void @_Z6kernel1A(%struct.A %x.coerce)
-// NVPTX: define void @_Z6kernel1A(%struct.A* byval align 4 %x)
+// NVPTX: define void @_Z6kernel1A(%struct.A* byval(%struct.A) align 4 %x)
 __global__ void kernel(A x) {
 }
 
 class Kernel {
 public:
   // AMDGCN: define amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %x.coerce)
-  // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval align 4 %x)
+  // NVPTX: define void @_ZN6Kernel12memberKernelE1A(%struct.A* byval(%struct.A) align 4 %x)
   static __global__ void memberKernel(A x){}
   template<typename T> static __global__ void templateMemberKernel(T x) {}
 };
@@ -30,10 +30,10 @@ void launch(void*);
 void test() {
   Kernel K;
   // AMDGCN: define amdgpu_kernel void @_Z14templateKernelI1AEvT_(%struct.A %x.coerce)
-  // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval align 4 %x)
+  // NVPTX: define void @_Z14templateKernelI1AEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)templateKernel<A>);
 
   // AMDGCN: define amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %x.coerce)
-  // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval align 4 %x)
+  // NVPTX: define void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A* byval(%struct.A) align 4 %x)
   launch((void*)Kernel::templateMemberKernel<A>);
 }
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index 9ac143a520052..38f3e200f6851 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -57,7 +57,7 @@ void test_indirect_arg_global() {
   func_with_ref_arg(g_a);
 }
 
-// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b)
+// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %b)
 // CHECK:  %p = alloca %class.B*, align 8, addrspace(5)
 // CHECK:  %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B**
 // CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
@@ -73,7 +73,7 @@ void func_with_byval_arg(B b) {
 // CHECK:  %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
 // CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
 // CHECK:  %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)*
-// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]])
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r4]])
 // CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) %[[r0]])
 void test_byval_arg_auto() {
   B b;
@@ -86,7 +86,7 @@ void test_byval_arg_auto() {
 // CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
 // CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
 // CHECK:  %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)*
-// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]])
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval(%class.B) align 4 %[[r2]])
 // CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*))
 void test_byval_arg_global() {
   func_with_byval_arg(g_b);
diff --git a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
index 34e2bc5706353..5a8bdf78100f4 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
@@ -21,7 +21,7 @@ S C::byval_and_sret(S a) { return S(); }
 
 // CHECK: define dso_local void @"?variadic_sret@C@@QAA?AUS@@PBDZZ"(%struct.C* %this, %struct.S* noalias sret %agg.result, i8* %f, ...)
 // CHECK: define dso_local void @"?cdecl_sret@C@@QAA?AUS@@XZ"(%struct.C* %this, %struct.S* noalias sret %agg.result)
-// CHECK: define dso_local void @"?byval_and_sret@C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.S* byval align 4 %a)
+// CHECK: define dso_local void @"?byval_and_sret@C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.S* byval(%struct.S) align 4 %a)
 
 int main() {
   C c;
diff --git a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
index ad4073099ce6d..96dec27460aa7 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -138,27 +138,27 @@ void medium_arg(Medium s) {}
 // WOA: define dso_local arm_aapcs_vfpcc void @"?medium_arg@@YAXUMedium@@@Z"([2 x i32] %s.coerce)
 
 void base_no_byval_arg(BaseNoByval s) {}
-// LINUX-LABEL: define void @_Z17base_no_byval_arg11BaseNoByval(%struct.BaseNoByval* byval align 4 %s)
+// LINUX-LABEL: define void @_Z17base_no_byval_arg11BaseNoByval(%struct.BaseNoByval* byval(%struct.BaseNoByval) align 4 %s)
 // WIN32: define dso_local void @"?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i32 %s.0, i32 %s.1)
 // WIN64: define dso_local void @"?base_no_byval_arg@@YAXUBaseNoByval@@@Z"(i64 %s.coerce)
 // WOA: define dso_local arm_aapcs_vfpcc void @"?base_no_byval_arg@@YAXUBaseNoByval@@@Z"([2 x i32] %s.coerce)
 
 void small_arg_with_ctor(SmallWithCtor s) {}
-// LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s)
+// LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval(%struct.SmallWithCtor) align 4 %s)
 // WIN32: define dso_local void @"?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0)
 // WIN64: define dso_local void @"?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce)
 // WOA: define dso_local arm_aapcs_vfpcc void @"?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"([1 x i32] %s.coerce)
 
 // FIXME: We could coerce to a series of i32s here if we wanted to.
 void multibyte_arg(Multibyte s) {}
-// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s)
-// WIN32: define dso_local void @"?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s)
+// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval(%struct.Multibyte) align 4 %s)
+// WIN32: define dso_local void @"?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval(%struct.Multibyte) align 4 %s)
 // WIN64: define dso_local void @"?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce)
 // WOA: define dso_local arm_aapcs_vfpcc void @"?multibyte_arg@@YAXUMultibyte@@@Z"([1 x i32] %s.coerce)
 
 void packed_arg(Packed s) {}
-// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s)
-// WIN32: define dso_local void @"?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s)
+// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval(%struct.Packed) align 4 %s)
+// WIN32: define dso_local void @"?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval(%struct.Packed) align 4 %s)
 // WIN64: define dso_local void @"?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s)
 
 // Test that dtors are invoked in the callee.
@@ -252,8 +252,8 @@ void medium_arg_with_copy_ctor(MediumWithCopyCtor s) {}
 // WOA64: define dso_local void @"?medium_arg_with_copy_ctor@@YAXUMediumWithCopyCtor@@@Z"(%struct.MediumWithCopyCtor* %s)
 
 void big_arg(Big s) {}
-// LINUX-LABEL: define void @_Z7big_arg3Big(%struct.Big* byval align 4 %s)
-// WIN32: define dso_local void @"?big_arg@@YAXUBig@@@Z"(%struct.Big* byval align 4 %s)
+// LINUX-LABEL: define void @_Z7big_arg3Big(%struct.Big* byval(%struct.Big) align 4 %s)
+// WIN32: define dso_local void @"?big_arg@@YAXUBig@@@Z"(%struct.Big* byval(%struct.Big) align 4 %s)
 // WIN64: define dso_local void @"?big_arg@@YAXUBig@@@Z"(%struct.Big* %s)
 
 // PR27607: We would attempt to load i32 value out of the reference instead of
@@ -263,7 +263,7 @@ struct RefField {
   int &x;
 };
 void takes_ref_field(RefField s) {}
-// LINUX-LABEL: define void @_Z15takes_ref_field8RefField(%struct.RefField* byval align 4 %s)
+// LINUX-LABEL: define void @_Z15takes_ref_field8RefField(%struct.RefField* byval(%struct.RefField) align 4 %s)
 // WIN32: define dso_local void @"?takes_ref_field@@YAXURefField@@@Z"(i32* %s.0)
 // WIN64: define dso_local void @"?takes_ref_field@@YAXURefField@@@Z"(i64 %s.coerce)
 
@@ -272,7 +272,7 @@ void pass_ref_field() {
   takes_ref_field(RefField(x));
 }
 // LINUX-LABEL: define void @_Z14pass_ref_fieldv()
-// LINUX: call void @_Z15takes_ref_field8RefField(%struct.RefField* byval align 4 %{{.*}})
+// LINUX: call void @_Z15takes_ref_field8RefField(%struct.RefField* byval(%struct.RefField) align 4 %{{.*}})
 // WIN32-LABEL: define dso_local void @"?pass_ref_field@@YAXXZ"()
 // WIN32: call void @"?takes_ref_field@@YAXURefField@@@Z"(i32* %{{.*}})
 // WIN64-LABEL: define dso_local void @"?pass_ref_field@@YAXXZ"()
@@ -302,12 +302,12 @@ class Class {
 
   void thiscall_method_arg(Empty s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Empty(%class.Class* %this)
-  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUEmpty@@@Z"(%class.Class* %this, %struct.Empty* byval align 4 %s)
+  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUEmpty@@@Z"(%class.Class* %this, %struct.Empty* byval(%struct.Empty) align 4 %s)
   // WIN64: define linkonce_odr dso_local void @"?thiscall_method_arg@Class@@QEAAXUEmpty@@@Z"(%class.Class* %this, i8 %s.coerce)
 
   void thiscall_method_arg(EmptyWithCtor s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13EmptyWithCtor(%class.Class* %this)
-  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUEmptyWithCtor@@@Z"(%class.Class* %this, %struct.EmptyWithCtor* byval align 4 %s)
+  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUEmptyWithCtor@@@Z"(%class.Class* %this, %struct.EmptyWithCtor* byval(%struct.EmptyWithCtor) align 4 %s)
   // WIN64: define linkonce_odr dso_local void @"?thiscall_method_arg@Class@@QEAAXUEmptyWithCtor@@@Z"(%class.Class* %this, i8 %s.coerce)
 
   void thiscall_method_arg(Small s) {}
@@ -316,13 +316,13 @@ class Class {
   // WIN64: define linkonce_odr dso_local void @"?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(SmallWithCtor s) {}
-  // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s)
+  // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval(%struct.SmallWithCtor) align 4 %s)
   // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0)
   // WIN64: define linkonce_odr dso_local void @"?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce)
 
   void thiscall_method_arg(Big s) {}
-  // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE3Big(%class.Class* %this, %struct.Big* byval align 4 %s)
-  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUBig@@@Z"(%class.Class* %this, %struct.Big* byval align 4 %s)
+  // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE3Big(%class.Class* %this, %struct.Big* byval(%struct.Big) align 4 %s)
+  // WIN32: define {{.*}} void @"?thiscall_method_arg@Class@@QAEXUBig@@@Z"(%class.Class* %this, %struct.Big* byval(%struct.Big) align 4 %s)
   // WIN64: define linkonce_odr dso_local void @"?thiscall_method_arg@Class@@QEAAXUBig@@@Z"(%class.Class* %this, %struct.Big* %s)
 };
 
diff --git a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
index 5ee6acf57466f..243a19198e296 100644
--- a/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
+++ b/clang/test/CodeGenCXX/ms-inline-asm-fields.cpp
@@ -15,7 +15,7 @@ A a_global;
 }
 
 extern "C" int test_param_field(A p) {
-// CHECK: define i32 @test_param_field(%struct.A* byval align 4 %p)
+// CHECK: define i32 @test_param_field(%struct.A* byval(%struct.A) align 4 %p)
 // CHECK: getelementptr inbounds %struct.A, %struct.A* %p, i32 0, i32 0
 // CHECK: call i32 asm sideeffect inteldialect "mov eax, $1"
 // CHECK: ret i32
diff --git a/clang/test/CodeGenCXX/regcall.cpp b/clang/test/CodeGenCXX/regcall.cpp
index fbc1dbf7d7c9a..315c5bc0e66c6 100644
--- a/clang/test/CodeGenCXX/regcall.cpp
+++ b/clang/test/CodeGenCXX/regcall.cpp
@@ -99,7 +99,7 @@ void force_gen() {
 long double _Complex __regcall foo(long double _Complex f) {
   return f;
 }
-// CHECK-LIN64-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 16 %f)
-// CHECK-LIN32-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* inreg noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %f)
+// CHECK-LIN64-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16 %f)
+// CHECK-LIN32-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* inreg noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %f)
 // CHECK-WIN64-DAG: define dso_local x86_regcallcc { double, double } @"?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
 // CHECK-WIN32-DAG: define dso_local x86_regcallcc { double, double } @"?foo@@YwU?$_Complex@O@__clang@@U12@@Z"(double %f.0, double %f.1)
diff --git a/clang/test/CodeGenCXX/regparm.cpp b/clang/test/CodeGenCXX/regparm.cpp
index 2196c798bf3e6..eb354465e8100 100644
--- a/clang/test/CodeGenCXX/regparm.cpp
+++ b/clang/test/CodeGenCXX/regparm.cpp
@@ -32,7 +32,7 @@ struct S3 {
   } a;
 };
 __attribute((regparm(2))) void foo4(S3 a, int b);
-// CHECK: declare void @_Z4foo42S3i(%struct.S3* byval align 4, i32 inreg)
+// CHECK: declare void @_Z4foo42S3i(%struct.S3* byval(%struct.S3) align 4, i32 inreg)
 void bar3(S3 a, int b) {
   foo4(a, b);
 }
diff --git a/clang/test/CodeGenCXX/stmtexpr.cpp b/clang/test/CodeGenCXX/stmtexpr.cpp
index fe5ff2c7dee8b..67e1688492963 100644
--- a/clang/test/CodeGenCXX/stmtexpr.cpp
+++ b/clang/test/CodeGenCXX/stmtexpr.cpp
@@ -146,7 +146,7 @@ extern "C" int cleanup_exit_lvalue_byval(bool cond, ByVal arg) {
   ByVal &r = (A(1), ({ if (cond) return 0; (void)ByVal(); }), arg);
   return r.x[0];
 }
-// CHECK-LABEL: define{{.*}} i32 @cleanup_exit_lvalue_byval({{.*}}, %struct.ByVal* byval align 4 %arg)
+// CHECK-LABEL: define{{.*}} i32 @cleanup_exit_lvalue_byval({{.*}}, %struct.ByVal* byval(%struct.ByVal) align 4 %arg)
 // CHECK: call {{.*}} @_ZN1AC1Ei
 // CHECK: call {{.*}} @_ZN1AD1Ev
 // CHECK: switch
diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp
index 506540e86fd9c..50ebdb96848f5 100644
--- a/clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,15 +30,15 @@ struct two_fields {
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval(%struct.two_fields) align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
 // CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
-// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval align 8 %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval(%struct.two_fields) align 8 %[[tmp]])
 // CHECK: ret void
 //
-// CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
+// CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval(%struct.two_fields) align 8)
 // CHECK: declare void @_Z14def_two_fieldsv(%struct.two_fields* sret)
 
 struct copy_ctor {
diff --git a/clang/test/CodeGenCXX/x86_32-arguments.cpp b/clang/test/CodeGenCXX/x86_32-arguments.cpp
index 2c7234e038c9e..148a3a96495e8 100644
--- a/clang/test/CodeGenCXX/x86_32-arguments.cpp
+++ b/clang/test/CodeGenCXX/x86_32-arguments.cpp
@@ -31,7 +31,7 @@ void f(C) { }
 
 // CHECK-LABEL: define void @_ZThn4_N18BasicAliasAnalysis13getModRefInfoE8CallSite
 // ...
-// CHECK: %struct.CallSite* byval align 4 %CS)
+// CHECK: %struct.CallSite* byval(%struct.CallSite) align 4 %CS)
 struct CallSite {
   unsigned Ptr;
   CallSite(unsigned XX) : Ptr(XX) {}
@@ -89,7 +89,7 @@ struct s5 { s5(); int &x; };
 s5 f5() { return s5(); }
 
 // CHECK-LABEL: define i32 @_Z4f6_0M2s6i(i32 %a)
-// CHECK: define i64 @_Z4f6_1M2s6FivE({ i32, i32 }* byval align 4)
+// CHECK: define i64 @_Z4f6_1M2s6FivE({ i32, i32 }* byval({ i32, i32 }) align 4)
 // FIXME: It would be nice to avoid byval on the previous case.
 struct s6 {};
 typedef int s6::* s6_mdp;
diff --git a/clang/test/CodeGenCXX/x86_64-arguments-avx.cpp b/clang/test/CodeGenCXX/x86_64-arguments-avx.cpp
index 2933d9445b76d..2e04cf5c52cae 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments-avx.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments-avx.cpp
@@ -56,6 +56,6 @@ union U {
   __attribute__((__vector_size__(32))) float f1;
   int f2;
 };
-// CHECK: define i32 @_ZN5test31fENS_1UE({{.*}}* byval align 32
+// CHECK: define i32 @_ZN5test31fENS_1UE({{.*}}* byval({{.*}}) align 32
 int f(U u) { return u.f2; }
 }
diff --git a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
index 89d6dae5d30fc..f53576632d07e 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments-nacl-x32.cpp
@@ -20,7 +20,7 @@ void f_struct_with_mdp(struct_with_mdp a) { (void)a; }
 struct struct_with_mdp_too_much {
   char *a; char *b; char *c; char *d; test_struct_mdp e;
 };
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mdp_too_much{{.*}}({{.*}} byval({{.*}} {{.*}} %a)
 void f_struct_with_mdp_too_much(struct_with_mdp_too_much a) {
   (void)a;
 }
@@ -35,7 +35,7 @@ void f_struct_with_mfp_0(struct_with_mfp_0 a) { (void)a; }
 struct struct_with_mfp_1 { char *a; char *b; test_struct_mfp c; };
 void f_struct_with_mfp_1(struct_with_mfp_1 a) { (void)a; }
 
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_too_much{{.*}}({{.*}} byval {{.*}} %a, i32 %x)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_too_much{{.*}}({{.*}} byval({{.*}}) {{.*}} %a, i32 %x)
 struct struct_with_mfp_too_much {
   char *a; char *b; char *c; test_struct_mfp d;
 };
diff --git a/clang/test/CodeGenCXX/x86_64-arguments.cpp b/clang/test/CodeGenCXX/x86_64-arguments.cpp
index c7eca2386af78..2ea2df3dc9cd8 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments.cpp
@@ -40,11 +40,11 @@ void f_struct_with_mdp(struct_with_mdp a) { (void)a; }
 
 // A struct with anything before a member function will be too big and
 // goes in memory.
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_0{{.*}}(%struct{{.*}} byval align 8 %a)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_0{{.*}}(%struct{{.*}} byval(%struct{{.*}}) align 8 %a)
 struct struct_with_mfp_0 { char a; s4_mfp b; };
 void f_struct_with_mfp_0(struct_with_mfp_0 a) { (void)a; }
 
-// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_1{{.*}}(%struct{{.*}} byval align 8 %a)
+// CHECK-LABEL: define void @{{.*}}f_struct_with_mfp_1{{.*}}(%struct{{.*}} byval(%struct{{.*}}) align 8 %a)
 struct struct_with_mfp_1 { void *a; s4_mfp b; };
 void f_struct_with_mfp_1(struct_with_mfp_1 a) { (void)a; }
 
@@ -139,7 +139,7 @@ namespace test7 {
   // Check that the StringRef is passed byval instead of expanded
   // (which would split it between registers and memory).
   // rdar://problem/9686430
-  // CHECK: define void @_ZN5test71xENS_1AES0_llNS_9StringRefE({{.*}} byval align 8)
+  // CHECK: define void @_ZN5test71xENS_1AES0_llNS_9StringRefE({{.*}} byval({{.*}}) align 8)
 
   // And a couple extra related tests:
   A y(A, long double, long, long, StringRef) { return A(); }
@@ -147,12 +147,12 @@ namespace test7 {
   struct StringDouble {char * ptr; double d;};
   A z(A, A, A, A, A, StringDouble) { return A(); }
   A zz(A, A, A, A, StringDouble) { return A(); }
-  // CHECK: define void @_ZN5test71zENS_1AES0_S0_S0_S0_NS_12StringDoubleE({{.*}} byval align 8)
+  // CHECK: define void @_ZN5test71zENS_1AES0_S0_S0_S0_NS_12StringDoubleE({{.*}} byval({{.*}}) align 8)
   // CHECK: define void @_ZN5test72zzENS_1AES0_S0_S0_NS_12StringDoubleE({{.*}} i8*
 }
 
 namespace test8 {
-  // CHECK: declare void @_ZN5test83fooENS_1BE(%"class.test8::B"* byval align 8)
+  // CHECK: declare void @_ZN5test83fooENS_1BE(%"class.test8::B"* byval(%"class.test8::B") align 8)
   class A {
    char big[17];
   };
@@ -176,12 +176,12 @@ namespace test9 {
   // CHECK: define void @_ZN5test93fooEPNS_1SEPNS_1TE([[S:%.*]]*, [[T:%.*]]*)
   void foo(S*, T*) {}
 
-  // CHECK: define void @_ZN5test91aEiiiiNS_1TEPv([[S]]* noalias sret {{%.*}}, i32, i32, i32, i32, [[T]]* byval align 8, i8*)
+  // CHECK: define void @_ZN5test91aEiiiiNS_1TEPv([[S]]* noalias sret {{%.*}}, i32, i32, i32, i32, [[T]]* byval([[T]]) align 8, i8*)
   S a(int, int, int, int, T, void*) {
     return S();
   }
 
-  // CHECK: define [[S]]* @_ZN5test91bEPNS_1SEiiiiNS_1TEPv([[S]]* {{%.*}}, i32, i32, i32, i32, [[T:%.*]]* byval align 8, i8*)
+  // CHECK: define [[S]]* @_ZN5test91bEPNS_1SEiiiiNS_1TEPv([[S]]* {{%.*}}, i32, i32, i32, i32, [[T:%.*]]* byval([[T]]) align 8, i8*)
   S* b(S* sret, int, int, int, int, T, void*) {
     return sret;
   }
@@ -207,7 +207,7 @@ struct BasePacked {
 struct DerivedPacked : public BasePacked {
   int three;
 };
-// CHECK-LABEL: define i32 @_ZN6test1020FuncForDerivedPackedENS_13DerivedPackedE({{.*}}* byval align 8
+// CHECK-LABEL: define i32 @_ZN6test1020FuncForDerivedPackedENS_13DerivedPackedE({{.*}}* byval({{.*}}) align 8
 int FuncForDerivedPacked(DerivedPacked d) {
   return d.three;
 }
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 6f923b7fd403f..cdbf28bbcad87 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -43,7 +43,7 @@ struct LargeStructTwoMember {
 struct LargeStructOneMember g_s;
 #endif
 
-// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
+// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
 // AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
@@ -63,8 +63,8 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
   Mat64X64 out;
   return out;
@@ -86,7 +86,7 @@ void FuncOneMember(struct StructOneMember u) {
   u.x = (int2)(0, 0);
 }
 
-// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %u)
 // AMDGCN-NOT: addrspacecast
 // AMDGCN:   store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
 void FuncOneLargeMember(struct LargeStructOneMember u) {
@@ -97,7 +97,7 @@ void FuncOneLargeMember(struct LargeStructOneMember u) {
 // AMDGCN20:  %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN20:  %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
 // AMDGCN20:  call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
-// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
 #if __OPENCL_C_VERSION__ >= 200
 void test_indirect_arg_globl(void) {
   FuncOneLargeMember(g_s);
@@ -108,7 +108,7 @@ void test_indirect_arg_globl(void) {
 // AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
 // AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
-// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
 kernel void test_indirect_arg_local(void) {
   local struct LargeStructOneMember l_s;
   FuncOneLargeMember(l_s);
@@ -117,7 +117,7 @@ kernel void test_indirect_arg_local(void) {
 // AMDGCN-LABEL: define void @test_indirect_arg_private()
 // AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN-NOT: @llvm.memcpy
-// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[p_s]])
 void test_indirect_arg_private(void) {
   struct LargeStructOneMember p_s;
   FuncOneLargeMember(p_s);
@@ -142,7 +142,7 @@ kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
 // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval(%struct.LargeStructOneMember) align 8 %[[U]])
 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
   FuncOneLargeMember(u);
 }
@@ -152,7 +152,7 @@ void FuncTwoMember(struct StructTwoMember u) {
   u.y = (int2)(0, 0);
 }
 
-// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %u)
 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
   u.y[0] = (int2)(0, 0);
 }
@@ -171,7 +171,7 @@ kernel void KernelTwoMember(struct StructTwoMember u) {
 // AMDGCN-SAME:  (%struct.LargeStructTwoMember %[[u_coerce:.*]])
 // AMDGCN:  %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN:  call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval(%struct.LargeStructTwoMember) align 8 %[[u]])
 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
   FuncLargeTwoMember(u);
 }
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 8b03fb00e3fde..0a7f289cb2f7c 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -309,7 +309,7 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
 // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
 void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
 
-// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* nocapture byval(%struct.flexible_array) align 4 %arg)
 void func_flexible_array_arg(flexible_array arg) { }
 
 // CHECK: define float @func_f32_ret()
@@ -450,11 +450,11 @@ flexible_array func_flexible_array_ret()
 // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
 void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
 
-// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* nocapture byval(%struct.struct_arg) align 4 %s)
 void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
 
 // XXX - Why don't the inner structs flatten?
-// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* nocapture byval(%struct.num_regs_nested_struct) align 8 %arg4)
 void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
 
 // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -469,7 +469,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
 // CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
 void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
 
-// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* nocapture readonly byval(%struct.large_struct_padding) align 8 %arg)
 void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
   *out = arg;
 }
@@ -487,7 +487,7 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
 void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                      short4 arg4, short4 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
 void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
                                short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
 
@@ -495,7 +495,7 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
 void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                      short3 arg4, short3 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg7)
 void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
                           short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
 
@@ -505,7 +505,7 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                      short2 arg8, short2 arg9, short2 arg10, short2 arg11,
                      struct_4regs arg13) { }
 
-// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs) align 4 %arg13)
 void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
                           short2 arg4, short2 arg5, short2 arg6, short2 arg7,
                           short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -515,7 +515,7 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
 void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                     char2 arg4, char2 arg5, struct_4regs arg6) { }
 
-// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval  align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* nocapture byval(%struct.struct_4regs)  align 4 %arg7)
 void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
                          char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
 
diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 8d77c18e7a550..4abeb92f600bf 100644
--- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -81,7 +81,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic*
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange,
@@ -319,7 +319,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   enqueue_kernel(default_queue, flags, ndrange, block_A);
@@ -366,7 +366,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
   // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)*
   // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
-  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+  // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
   enqueue_kernel(default_queue, flags, ndrange, block_C);
diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
index 4392ef90677c5..e89237623478c 100644
--- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -27,7 +27,7 @@ typedef struct test_struct {
 kernel void test_single(int_single input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
-// CHECK: struct.int_single* nocapture {{.*}} byval
+// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
 // CHECK: i32* nocapture %output
  output[0] = input.a;
 }
@@ -35,7 +35,7 @@ kernel void test_single(int_single input, global int* output) {
 kernel void test_pair(int_pair input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
-// CHECK: struct.int_pair* nocapture {{.*}} byval
+// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
 // CHECK: i32* nocapture %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
@@ -44,7 +44,7 @@ kernel void test_pair(int_pair input, global int* output) {
 kernel void test_kernel(test_struct input, global int* output) {
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
-// CHECK: struct.test_struct* nocapture {{.*}} byval
+// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
 // CHECK: i32* nocapture %output
  output[0] = input.elementA;
  output[1] = input.elementB;
diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
index e3a4cd370341f..167496cb84fcb 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
@@ -26,7 +26,7 @@ struct T1 {
 #pragma omp declare target
 T a = T();
 T f = a;
-// CHECK: define{{ dso_local | }}void @{{.+}}foo{{.+}}([[T]]* byval align {{.+}})
+// CHECK: define{{ dso_local | }}void @{{.+}}foo{{.+}}([[T]]* byval([[T]]) align {{.+}})
 void foo(T a = T()) {
   return;
 }
@@ -46,7 +46,7 @@ void baz() {
 }
 T1 a1 = T1();
 T1 f1 = a1;
-// CHECK: define{{ dso_local | }}void @{{.+}}foo1{{.+}}([[T1]]* byval align {{.+}})
+// CHECK: define{{ dso_local | }}void @{{.+}}foo1{{.+}}([[T1]]* byval([[T1]]) align {{.+}})
 void foo1(T1 a = T1()) {
   return;
 }

From 663d762c9a5344c9209d1180e823fe22b26c433f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 5 Jun 2019 21:15:52 +0000
Subject: [PATCH 1164/1176] NewGVN: Handle addrspacecast

The AllConstant check needs to be moved out of the if/else if chain to
avoid a test regression. The "there is no SimplifyZExt" comment
puzzles me, since there is SimplifyCastInst. Additionally, the
Simplify* calls seem to not see the operand as constant, so this needs
to be tried if the simplify failed.

llvm-svn: 362653
---
 llvm/lib/Transforms/Scalar/NewGVN.cpp        |   5 +-
 llvm/test/Transforms/NewGVN/addrspacecast.ll | 108 +++++++++++++++++++
 2 files changed, 111 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/NewGVN/addrspacecast.ll

diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index c54da4f72df3e..412bd235d7994 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1166,9 +1166,9 @@ const Expression *NewGVN::createExpression(Instruction *I) const {
         SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
-  } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
+  } else if (auto *CI = dyn_cast<CastInst>(I)) {
     Value *V =
-        SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
+        SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
     if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
       return SimplifiedE;
   } else if (isa<GetElementPtrInst>(I)) {
@@ -1984,6 +1984,7 @@ NewGVN::performSymbolicEvaluation(Value *V,
       E = performSymbolicLoadEvaluation(I);
       break;
     case Instruction::BitCast:
+    case Instruction::AddrSpaceCast:
       E = createExpression(I);
       break;
     case Instruction::ICmp:
diff --git a/llvm/test/Transforms/NewGVN/addrspacecast.ll b/llvm/test/Transforms/NewGVN/addrspacecast.ll
new file mode 100644
index 0000000000000..e9cef440cfbe7
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/addrspacecast.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -newgvn < %s | FileCheck %s
+
+define i32 addrspace(1)* @addrspacecast(i32* %ptr) {
+; CHECK-LABEL: @addrspacecast(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    [[Z1:%.*]] = addrspacecast i32* [[PTR:%.*]] to i32 addrspace(1)*
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    store i32 addrspace(1)* [[Z1]], i32 addrspace(1)** undef
+; CHECK-NEXT:    ret i32 addrspace(1)* [[Z1]]
+;
+block1:
+  %z1 = addrspacecast i32* %ptr to i32 addrspace(1)*
+  br label %block2
+
+block2:
+  %z2 = addrspacecast i32* %ptr to i32 addrspace(1)*
+  store i32 addrspace(1)* %z1, i32 addrspace(1)** undef
+  ret i32 addrspace(1)* %z2
+}
+
+; Make sure casts with the same source value but different result
+; address spaces aren't incorrectly merged.
+define i32 addrspace(1)* @addrspacecast_different_result_types(i32* %ptr) {
+; CHECK-LABEL: @addrspacecast_different_result_types(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    [[Z1:%.*]] = addrspacecast i32* [[PTR:%.*]] to i32 addrspace(2)*
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    [[Z2:%.*]] = addrspacecast i32* [[PTR]] to i32 addrspace(1)*
+; CHECK-NEXT:    store i32 addrspace(2)* [[Z1]], i32 addrspace(2)** undef
+; CHECK-NEXT:    ret i32 addrspace(1)* [[Z2]]
+;
+block1:
+  %z1 = addrspacecast i32* %ptr to i32 addrspace(2)*
+  br label %block2
+
+block2:
+  %z2 = addrspacecast i32* %ptr to i32 addrspace(1)*
+  store i32 addrspace(2)* %z1, i32 addrspace(2)** undef
+  ret i32 addrspace(1)* %z2
+}
+
+define i32 addrspace(1)* @addrspacecast_simplify(i32 addrspace(1)* %ptr) {
+; CHECK-LABEL: @addrspacecast_simplify(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    [[CAST0:%.*]] = addrspacecast i32 addrspace(1)* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    store i32 addrspace(1)* [[PTR]], i32 addrspace(1)** undef
+; CHECK-NEXT:    ret i32 addrspace(1)* [[PTR]]
+;
+block1:
+  %cast0 = addrspacecast i32 addrspace(1)* %ptr to i32*
+  %z1 = addrspacecast i32* %cast0 to i32 addrspace(1)*
+  br label %block2
+
+block2:
+  %z2 = addrspacecast i32* %cast0 to i32 addrspace(1)*
+  store i32 addrspace(1)* %z1, i32 addrspace(1)** undef
+  ret i32 addrspace(1)* %z2
+}
+
+@h = common local_unnamed_addr global i32* null, align 4
+
+define i32 addrspace(1)* @addrspacecast_constant() {
+; CHECK-LABEL: @addrspacecast_constant(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    store i32* undef, i32** @h, align 4
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    store i32 addrspace(1)* undef, i32 addrspace(1)** undef
+; CHECK-NEXT:    ret i32 addrspace(1)* undef
+;
+block1:
+  store i32* undef, i32** @h, align 4
+  %ptr = load i32*, i32** @h, align 4
+  %z1 = addrspacecast i32* %ptr to i32 addrspace(1)*
+  br label %block2
+
+block2:
+  %z2 = addrspacecast i32* %ptr to i32 addrspace(1)*
+  store i32 addrspace(1)* %z1, i32 addrspace(1)** undef
+  ret i32 addrspace(1)* %z2
+}
+
+define i32 addrspace(1)* @addrspacecast_leader(i32** %arg.ptr) {
+; CHECK-LABEL: @addrspacecast_leader(
+; CHECK-NEXT:  block1:
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32*, i32** [[ARG_PTR:%.*]]
+; CHECK-NEXT:    [[Z1:%.*]] = addrspacecast i32* [[LOAD0]] to i32 addrspace(1)*
+; CHECK-NEXT:    br label [[BLOCK2:%.*]]
+; CHECK:       block2:
+; CHECK-NEXT:    store i32 addrspace(1)* [[Z1]], i32 addrspace(1)** undef
+; CHECK-NEXT:    ret i32 addrspace(1)* [[Z1]]
+;
+block1:
+  %load0 = load i32*, i32** %arg.ptr
+  %z1 = addrspacecast i32* %load0 to i32 addrspace(1)*
+  br label %block2
+
+block2:
+  %load1 = load i32*, i32** %arg.ptr
+  %z2 = addrspacecast i32* %load1 to i32 addrspace(1)*
+  store i32 addrspace(1)* %z1, i32 addrspace(1)** undef
+  ret i32 addrspace(1)* %z2
+}

From ac111e526dd7fdcc39f93fa4722e12fac1c8fcf4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 5 Jun 2019 21:26:52 +0000
Subject: [PATCH 1165/1176] [InstCombine] simplify code for bitcast of
 insertelement; NFC

llvm-svn: 362655
---
 llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 1faaf0bf6af85..36be8bdf6f3ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2376,11 +2376,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       }
 
       // Otherwise, see if our source is an insert. If so, then use the scalar
-      // component directly.
-      if (InsertElementInst *IEI =
-            dyn_cast<InsertElementInst>(CI.getOperand(0)))
-        return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
-                                DestTy);
+      // component directly:
+      // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
+      if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
+        return new BitCastInst(InsElt->getOperand(1), DestTy);
     }
   }
 

From e3eeacd70a891941b10ad5a911e054dd5c87b3e3 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 5 Jun 2019 21:28:13 +0000
Subject: [PATCH 1166/1176] [CallSite removal] Refactoring llvm::InlineFunction
 APIs

Summary:
This change only unifies the API previous API pair accepting
CallInst and InvokeInst, thus making it easier to refactor
inliner pass ode to CallBase. The implementation of the unified
API still relies on the CallSite implementation.

Reviewers: eraman, chandlerc, jdoerfert

Reviewed By: jdoerfert

Subscribers: jdoerfert, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62283

llvm-svn: 362656
---
 llvm/include/llvm/Transforms/Utils/Cloning.h |  5 +----
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++--------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 86775b1af7586..872ab9cab85ca 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -229,10 +229,7 @@ class InlineFunctionInfo {
 /// and all varargs at the callsite will be passed to any calls to
 /// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
 /// are only used by ForwardVarArgsTo.
-InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
-                            AAResults *CalleeAAR = nullptr,
-                            bool InsertLifetime = true);
-InlineResult InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
                             AAResults *CalleeAAR = nullptr,
                             bool InsertLifetime = true);
 InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 7f1074915a345..6e4be9ea727a6 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -84,16 +84,10 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
   cl::init(true), cl::Hidden,
   cl::desc("Convert align attributes to assumptions during inlining."));
 
-llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
                                         AAResults *CalleeAAR,
                                         bool InsertLifetime) {
-  return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
-}
-
-llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
-                                        AAResults *CalleeAAR,
-                                        bool InsertLifetime) {
-  return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
+  return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
 }
 
 namespace {

From acb56090639de1413b0147c66f78698844f37114 Mon Sep 17 00:00:00 2001
From: Joseph Tremoulet <jotrem@microsoft.com>
Date: Wed, 5 Jun 2019 21:30:10 +0000
Subject: [PATCH 1167/1176] [EarlyCSE] Add tests for negated min/max/abs [NFC]

Summary:
I'm planning to update the hashing logic to recognize their equivalence
in a subsequent change (D62644).

Reviewers: spatel

Reviewed By: spatel

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62918

llvm-svn: 362657
---
 llvm/test/Transforms/EarlyCSE/commute.ll | 102 +++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll
index 488acf69ea49a..32dd55b5bcebf 100644
--- a/llvm/test/Transforms/EarlyCSE/commute.ll
+++ b/llvm/test/Transforms/EarlyCSE/commute.ll
@@ -107,6 +107,24 @@ define i1 @smin_swapped(i8 %a, i8 %b) {
   ret i1 %r
 }
 
+; Min/max can also have an inverted predicate and select operands.
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+
+define i1 @smin_inverted(i8 %a, i8 %b) {
+; CHECK-LABEL: @smin_inverted(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[B]]
+; CHECK:         ret i1
+;
+  %cmp1 = icmp slt i8 %a, %b
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %b
+  %m2 = select i1 %cmp2, i8 %b, i8 %a
+  %r = icmp eq i8 %m1, %m2
+  ret i1 %r
+}
+
 define i8 @smax_commute(i8 %a, i8 %b) {
 ; CHECK-LABEL: @smax_commute(
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[A:%.*]], [[B:%.*]]
@@ -137,6 +155,22 @@ define i8 @smax_swapped(i8 %a, i8 %b) {
   ret i8 %r
 }
 
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+define i1 @smax_inverted(i8 %a, i8 %b) {
+; CHECK-LABEL: @smax_inverted(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[B]]
+; CHECK:         ret i1
+;
+  %cmp1 = icmp sgt i8 %a, %b
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %b
+  %m2 = select i1 %cmp2, i8 %b, i8 %a
+  %r = icmp eq i8 %m1, %m2
+  ret i1 %r
+}
+
 define i8 @umin_commute(i8 %a, i8 %b) {
 ; CHECK-LABEL: @umin_commute(
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
@@ -169,6 +203,22 @@ define <2 x i8> @umin_swapped(<2 x i8> %a, <2 x i8> %b) {
   ret <2 x i8> %r
 }
 
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+define i1 @umin_inverted(i8 %a, i8 %b) {
+; CHECK-LABEL: @umin_inverted(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[B]]
+; CHECK:         ret i1
+;
+  %cmp1 = icmp ult i8 %a, %b
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %b
+  %m2 = select i1 %cmp2, i8 %b, i8 %a
+  %r = icmp eq i8 %m1, %m2
+  ret i1 %r
+}
+
 define i8 @umax_commute(i8 %a, i8 %b) {
 ; CHECK-LABEL: @umax_commute(
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
@@ -200,6 +250,22 @@ define i8 @umax_swapped(i8 %a, i8 %b) {
   ret i8 %r
 }
 
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+define i1 @umax_inverted(i8 %a, i8 %b) {
+; CHECK-LABEL: @umax_inverted(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[B]]
+; CHECK:         ret i1
+;
+  %cmp1 = icmp ugt i8 %a, %b
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %b
+  %m2 = select i1 %cmp2, i8 %b, i8 %a
+  %r = icmp eq i8 %m1, %m2
+  ret i1 %r
+}
+
 ; Min/max may exist with non-canonical operands. Value tracking can match those.
 
 define i8 @smax_nsw(i8 %a, i8 %b) {
@@ -236,6 +302,24 @@ define i8 @abs_swapped(i8 %a) {
   ret i8 %r
 }
 
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+define i8 @abs_inverted(i8 %a) {
+; CHECK-LABEL: @abs_inverted(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i8 [[A]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
+; CHECK:         ret i8
+;
+  %neg = sub i8 0, %a
+  %cmp1 = icmp sgt i8 %a, 0
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %neg
+  %m2 = select i1 %cmp2, i8 %neg, i8 %a
+  %r = or i8 %m2, %m1
+  ret i8 %r
+}
+
 define i8 @nabs_swapped(i8 %a) {
 ; CHECK-LABEL: @nabs_swapped(
 ; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[A:%.*]]
@@ -253,6 +337,24 @@ define i8 @nabs_swapped(i8 %a) {
   ret i8 %r
 }
 
+; TODO: Ensure we always recognize this (currently depends on hash collision)
+define i8 @nabs_inverted(i8 %a) {
+; CHECK-LABEL: @nabs_inverted(
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i8 [[A]], 0
+; CHECK-NEXT:    [[CMP2:%.*]] = xor i1 [[CMP1]], true
+; CHECK-NEXT:    [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
+; CHECK:         ret i8
+;
+  %neg = sub i8 0, %a
+  %cmp1 = icmp slt i8 %a, 0
+  %cmp2 = xor i1 %cmp1, -1
+  %m1 = select i1 %cmp1, i8 %a, i8 %neg
+  %m2 = select i1 %cmp2, i8 %neg, i8 %a
+  %r = xor i8 %m2, %m1
+  ret i8 %r
+}
+
 ; These two tests make sure we still consider it a match when the RHS of the
 ; compares are different.
 define i8 @abs_different_constants(i8 %a) {

From 5347024e283c6a9630bf86433d45d7c3d03cfec4 Mon Sep 17 00:00:00 2001
From: Marshall Clow <mclow.lists@gmail.com>
Date: Wed, 5 Jun 2019 21:52:19 +0000
Subject: [PATCH 1168/1176] Update issue statuses. Reviewed as
 https://reviews.llvm.org/D62932

llvm-svn: 362659
---
 libcxx/www/cxx1z_status.html | 6 +++---
 libcxx/www/cxx2a_status.html | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libcxx/www/cxx1z_status.html b/libcxx/www/cxx1z_status.html
index b8357ff2840b2..94950ded285ce 100644
--- a/libcxx/www/cxx1z_status.html
+++ b/libcxx/www/cxx1z_status.html
@@ -321,7 +321,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2426">2426</a></td><td>Issue about compare_exchange</td><td>Oulu</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2436">2436</a></td><td>Comparators for associative containers should always be CopyConstructible</td><td>Oulu</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2441">2441</a></td><td>Exact-width atomic typedefs should be provided</td><td>Oulu</td><td>Complete</td></tr>
-	<tr><td><a href="https://wg21.link/LWG2451">2451</a></td><td>[fund.ts.v2] optional should 'forward' T's implicit conversions</td><td>Oulu</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG2451">2451</a></td><td>[fund.ts.v2] optional should 'forward' T's implicit conversions</td><td>Oulu</td><td>Nothing to do</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2509">2509</a></td><td>[fund.ts.v2] any_cast doesn't work with rvalue reference targets and cannot move with a value target</td><td>Oulu</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2516">2516</a></td><td>[fund.ts.v2] Public "exposition only" members in observer_ptr</td><td>Oulu</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2542">2542</a></td><td>Missing const requirements for associative containers</td><td>Oulu</td><td></td></tr>
@@ -408,7 +408,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2682">2682</a></td><td>filesystem::copy() won't create a symlink to a directory</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2686">2686</a></td><td>Why is std::hash specialized for error_code, but not error_condition?</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2694">2694</a></td><td>Application of LWG 436 accidentally deleted definition of "facet"</td><td>Issaquah</td><td>Complete</td></tr>
-	<tr><td><a href="https://wg21.link/LWG2696">2696</a></td><td>Interaction between make_shared and enable_shared_from_this is underspecified</td><td>Issaquah</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG2696">2696</a></td><td>Interaction between make_shared and enable_shared_from_this is underspecified</td><td>Issaquah</td><td>Nothing to do</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2699">2699</a></td><td>Missing restriction in [numeric.requirements]</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2712">2712</a></td><td>copy_file(from, to, ...) has a number of unspecified error conditions</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2722">2722</a></td><td>equivalent incorrectly specifies throws clause</td><td>Issaquah</td><td>Complete</td></tr>
@@ -426,7 +426,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2747">2747</a></td><td>Possibly redundant std::move in [alg.foreach]</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2748">2748</a></td><td>swappable traits for optionals</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2749">2749</a></td><td>swappable traits for variants</td><td>Issaquah</td><td>Complete</td></tr>
-	<tr><td><a href="https://wg21.link/LWG2750">2750</a></td><td>[fund.ts.v2] LWG 2451 conversion constructor constraint</td><td>Issaquah</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG2750">2750</a></td><td>[fund.ts.v2] LWG 2451 conversion constructor constraint</td><td>Issaquah</td><td>Nothing to do</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2752">2752</a></td><td>"Throws:" clauses of async and packaged_task are unimplementable</td><td>Issaquah</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2755">2755</a></td><td>[string.view.io] uses non-existent basic_string_view::to_string function</td><td>Issaquah</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2756">2756</a></td><td>C++ WP optional<T> should 'forward' T's implicit conversions</td><td>Issaquah</td><td>Complete</td></tr>
diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index 99438fffa0615..4ee69978024ec 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -223,7 +223,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2849">2849</a></td><td>Why does <tt>!is_regular_file(from)</tt> cause <tt>copy_file</tt> to report a "file already exists" error?</td><td>Jacksonville</td><td><i>Nothing to do</i></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2851">2851</a></td><td><tt>std::filesystem</tt> enum classes are now underspecified</td><td>Jacksonville</td><td><i>Nothing to do</i></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2946">2946</a></td><td>LWG 2758's resolution missed further corrections</td><td>Jacksonville</td><td>Complete</td></tr>
-	<tr><td><a href="https://wg21.link/LWG2969">2969</a></td><td><tt>polymorphic_allocator::construct()</tt> shouldn't pass <tt>resource()</tt></td><td>Jacksonville</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG2969">2969</a></td><td><tt>polymorphic_allocator::construct()</tt> shouldn't pass <tt>resource()</tt></td><td>Jacksonville</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG2975">2975</a></td><td>Missing case for <tt>pair</tt> construction in scoped and polymorphic allocators</td><td>Jacksonville</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG2989">2989</a></td><td><tt>path</tt>'s stream insertion operator lets you insert everything under the sun</td><td>Jacksonville</td><td>Completed</td></tr>
 	<tr><td><a href="https://wg21.link/LWG3000">3000</a></td><td><tt>monotonic_memory_resource::do_is_equal</tt> uses <tt>dynamic_cast</tt> unnecessarily</td><td>Jacksonville</td><td></td></tr>

From 8f500a6f9ca0e56e41513435d0257c84ddabb566 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Wed, 5 Jun 2019 21:54:34 +0000
Subject: [PATCH 1169/1176] [libcxx][test] Include test_workarounds.h where
 needed

Some tests require `TEST_WORKAROUND_CONSTEXPR_IMPLIES_NOEXCEPT`, but they
did not include the header that defines that macro.

Thanks to Michael Park for the patch.

Differential Revision: https://reviews.llvm.org/D62920

llvm-svn: 362660
---
 libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp | 1 +
 libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
index 4b0ae15c3525b..d8711c7636653 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
@@ -27,6 +27,7 @@
 //  variant<Types...>&& v);
 
 #include "test_macros.h"
+#include "test_workarounds.h"
 #include "variant_test_helpers.hpp"
 #include <cassert>
 #include <type_traits>
diff --git a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
index bd9153671e295..5272c0981a8ac 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
@@ -21,6 +21,7 @@
 // variant<Types...>&& v);
 
 #include "test_macros.h"
+#include "test_workarounds.h"
 #include "variant_test_helpers.hpp"
 #include <cassert>
 #include <type_traits>

From b812b7a45ed159fcc4b1b26f9200885d93b68fc5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 5 Jun 2019 22:20:47 +0000
Subject: [PATCH 1170/1176] AMDGPU: Invert frame index offset interpretation

Since the beginning, the offset of a frame index has been consistently
interpreted backwards. It was treating it as an offset from the
scratch wave offset register as a frame register. The correct
interpretation is the offset from the SP on entry to the function,
before the prolog. Frame index elimination then should select either
SP or another register as an FP.

Treat the scratch wave offset on kernel entry as the pre-incremented
SP. Rely more heavily on the standard hasFP and frame pointer
elimination logic, and clean up the private reservation code. This
saves a copy in most callee functions.

The kernel prolog emission code is still kind of a mess relying on
checking the uses of physical registers, which I would prefer to
eliminate.

Currently selection directly emits MUBUF instructions, which require
using a reference to some register. Use the register chosen for SP,
and then ignore this later. This should probably be cleaned up to use
pseudos that don't refer to any specific base register until frame
index elimination.

Add a workaround for shaders using large numbers of SGPRs. I'm not
sure these cases were ever working correctly, since as far as I can
tell the logic for figuring out which SGPR is the scratch wave offset
doesn't match up with the shader input initialization in the shader
programming guide.

llvm-svn: 362661
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |   8 +-
 llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp |   5 +-
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    | 175 ++++++++---------
 llvm/lib/Target/AMDGPU/SIFrameLowering.h      |  10 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 142 +++++++-------
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  14 +-
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |  10 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  61 +++---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h       |   2 -
 llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll |  18 +-
 .../CodeGen/AMDGPU/call-argument-types.ll     |  38 ++--
 .../test/CodeGen/AMDGPU/callee-frame-setup.ll |   7 +-
 .../AMDGPU/callee-special-input-sgprs.ll      |   6 +-
 .../AMDGPU/callee-special-input-vgprs.ll      |  31 ++-
 .../CodeGen/AMDGPU/frame-index-elimination.ll |  22 +--
 llvm/test/CodeGen/AMDGPU/function-args.ll     | 184 +++++++++---------
 llvm/test/CodeGen/AMDGPU/load-hi16.ll         |  34 ++--
 llvm/test/CodeGen/AMDGPU/load-lo16.ll         |  30 +--
 .../CodeGen/AMDGPU/mubuf-legalize-operands.ll |  22 +--
 .../AMDGPU/pei-reg-scavenger-position.mir     |  12 +-
 .../AMDGPU/sgpr-spill-wrong-stack-id.mir      |  52 ++---
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      |  57 +++---
 .../CodeGen/AMDGPU/sp-too-many-input-sgprs.ll | 102 ++++++++++
 .../AMDGPU/spill-empty-live-interval.mir      |   6 +-
 .../AMDGPU/spill-offset-calculation.ll        |   4 +-
 llvm/test/CodeGen/AMDGPU/stack-realign.ll     |   9 +-
 .../stack-slot-color-sgpr-vgpr-spills.mir     |   8 +-
 llvm/test/CodeGen/AMDGPU/store-hi16.ll        |   8 +-
 .../AMDGPU/subreg-split-live-in-error.mir     |   7 +-
 .../MIR/AMDGPU/machine-function-info.ll       |   4 +-
 30 files changed, 594 insertions(+), 494 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/sp-too-many-input-sgprs.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 59a27ab140155..2c104758047e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1361,10 +1361,10 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const
     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
                                               FI->getValueType(0));
 
-    // If we can resolve this to a frame index access, this is relative to the
-    // frame pointer SGPR.
-    return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
-                                                   MVT::i32));
+    // If we can resolve this to a frame index access, this will be relative to
+    // either the stack or frame pointer SGPR.
+    return std::make_pair(
+        TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
   }
 
   // If we don't know this private access is a local stack object, it needs to
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 60f23f7d778b4..140ca6e33fe75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -83,6 +83,9 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
 }
 
 unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const SIFrameLowering *TFI =
+      MF.getSubtarget<GCNSubtarget>().getFrameLowering();
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  return FuncInfo->getFrameOffsetReg();
+  return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
+                        : FuncInfo->getStackPtrOffsetReg();
 }
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4b2124b14c054..7f3150bdd011f 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -164,34 +164,29 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
   return ScratchRsrcReg;
 }
 
-// Shift down registers reserved for the scratch wave offset and stack pointer
-// SGPRs.
-std::pair<unsigned, unsigned>
-SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
-  const GCNSubtarget &ST,
-  const SIInstrInfo *TII,
-  const SIRegisterInfo *TRI,
-  SIMachineFunctionInfo *MFI,
-  MachineFunction &MF) const {
+// Shift down registers reserved for the scratch wave offset.
+unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+    const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+    SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
 
+  assert(MFI->isEntryFunction());
+
   // No replacement necessary.
   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
-      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
-    assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
-    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+      (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
+    return AMDGPU::NoRegister;
   }
 
-  unsigned SPReg = MFI->getStackPtrOffsetReg();
   if (ST.hasSGPRInitBug())
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return ScratchWaveOffsetReg;
 
   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
 
   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
   if (NumPreloaded > AllSGPRs.size())
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return ScratchWaveOffsetReg;
 
   AllSGPRs = AllSGPRs.slice(NumPreloaded);
 
@@ -212,7 +207,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
   unsigned ReservedRegCount = 13;
 
   if (AllSGPRs.size() < ReservedRegCount)
-    return std::make_pair(ScratchWaveOffsetReg, SPReg);
+    return ScratchWaveOffsetReg;
 
   bool HandledScratchWaveOffsetReg =
     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
@@ -225,14 +220,20 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
         HandledScratchWaveOffsetReg = true;
 
         MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+        if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
+          assert(!hasFP(MF));
+          MFI->setStackPtrOffsetReg(Reg);
+        }
+
         MFI->setScratchWaveOffsetReg(Reg);
+        MFI->setFrameOffsetReg(Reg);
         ScratchWaveOffsetReg = Reg;
         break;
       }
     }
   }
 
-  return std::make_pair(ScratchWaveOffsetReg, SPReg);
+  return ScratchWaveOffsetReg;
 }
 
 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
@@ -265,38 +266,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   if (MFI->hasFlatScratchInit())
     emitFlatScratchInit(ST, MF, MBB);
 
-  unsigned SPReg = MFI->getStackPtrOffsetReg();
-  if (SPReg != AMDGPU::SP_REG) {
-    assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
-
-    DebugLoc DL;
-    const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
-    int64_t StackSize = FrameInfo.getStackSize();
-
-    if (StackSize == 0) {
-      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
-        .addReg(MFI->getScratchWaveOffsetReg());
-    } else {
-      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
-        .addReg(MFI->getScratchWaveOffsetReg())
-        .addImm(StackSize * ST.getWavefrontSize());
-    }
-  }
-
   unsigned ScratchRsrcReg
     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
 
-  unsigned ScratchWaveOffsetReg;
-  std::tie(ScratchWaveOffsetReg, SPReg)
-    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
-  // It's possible to have uses of only ScratchWaveOffsetReg without
-  // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
-  // but the inverse is not true.
-  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
-    assert(ScratchRsrcReg == AMDGPU::NoRegister);
-    return;
-  }
+  unsigned ScratchWaveOffsetReg =
+      getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 
   // We need to insert initialization of the scratch resource descriptor.
   unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
@@ -308,18 +282,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
   }
 
-  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
+                       MRI.isPhysRegUsed(ScratchWaveOffsetReg);
   bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
                          MRI.isPhysRegUsed(ScratchRsrcReg);
 
+  // FIXME: Hack to not crash in situations which emitted an error.
+  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
+    return;
+
   // We added live-ins during argument lowering, but since they were not used
   // they were deleted. We're adding the uses now, so add them back.
-  if (OffsetRegUsed) {
-    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
-           "scratch wave offset input is required");
-    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
-    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
-  }
+  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
 
   if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
     assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
@@ -360,11 +335,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (OffsetRegUsed &&
-      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+  unsigned SPReg = MFI->getStackPtrOffsetReg();
+  assert(SPReg != AMDGPU::SP_REG);
+
+  // FIXME: Remove the isPhysRegUsed checks
+  const bool HasFP = hasFP(MF);
+
+  if (HasFP || OffsetRegUsed) {
+    assert(ScratchWaveOffsetReg);
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
-      .addReg(PreloadedScratchWaveOffsetReg,
-              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
+      .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
   }
 
   if (CopyBuffer && !CopyBufferFirst) {
@@ -372,9 +352,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
       .addReg(PreloadedPrivateBufferReg, RegState::Kill);
   }
 
-  if (ResourceRegUsed)
+  if (ResourceRegUsed) {
     emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
         PreloadedPrivateBufferReg, ScratchRsrcReg);
+  }
+
+  if (HasFP) {
+    DebugLoc DL;
+    const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+    int64_t StackSize = FrameInfo.getStackSize();
+
+    // On kernel entry, the private scratch wave offset is the SP value.
+    if (StackSize == 0) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
+        .addReg(MFI->getScratchWaveOffsetReg());
+    } else {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+        .addReg(MFI->getScratchWaveOffsetReg())
+        .addImm(StackSize * ST.getWavefrontSize());
+    }
+  }
 }
 
 // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
@@ -567,15 +564,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc DL;
 
-  // XXX - Is this the right predicate?
-
-  bool NeedFP = hasFP(MF);
+  bool HasFP = false;
   uint32_t NumBytes = MFI.getStackSize();
   uint32_t RoundedSize = NumBytes;
-  const bool NeedsRealignment = TRI.needsStackRealignment(MF);
 
-  if (NeedsRealignment) {
-    assert(NeedFP);
+  if (TRI.needsStackRealignment(MF)) {
+    HasFP = true;
     const unsigned Alignment = MFI.getMaxAlignment();
 
     RoundedSize += Alignment;
@@ -599,7 +593,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .addImm(-Alignment * ST.getWavefrontSize())
       .setMIFlag(MachineInstr::FrameSetup);
     FuncInfo->setIsStackRealigned(true);
-  } else if (NeedFP) {
+  } else if ((HasFP = hasFP(MF))) {
     // If we need a base pointer, set it up here. It's whatever the value of
     // the stack pointer is at this point. Any variable size objects will be
     // allocated after this, so we can still use the base pointer to reference
@@ -609,7 +603,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (RoundedSize != 0 && hasSP(MF)) {
+  if (HasFP && RoundedSize != 0) {
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
       .addReg(StackPtrReg)
       .addImm(RoundedSize * ST.getWavefrontSize())
@@ -693,23 +687,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
       .addReg(ScratchExecCopy);
   }
 
-  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
-  if (StackPtrReg == AMDGPU::NoRegister)
-    return;
-
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  uint32_t NumBytes = MFI.getStackSize();
-
-  // FIXME: Clarify distinction between no set SP and SP. For callee functions,
-  // it's really whether we need SP to be accurate or not.
-
-  if (NumBytes != 0 && hasSP(MF)) {
+  if (hasFP(MF)) {
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    uint32_t NumBytes = MFI.getStackSize();
     uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
       NumBytes + MFI.getMaxAlignment() : NumBytes;
 
+    const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
       .addReg(StackPtrReg)
-      .addImm(RoundedSize * ST.getWavefrontSize());
+      .addImm(RoundedSize * ST.getWavefrontSize())
+      .setMIFlag(MachineInstr::FrameDestroy);
   }
 }
 
@@ -849,18 +837,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
 }
 
 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
-  // All stack operations are relative to the frame offset SGPR.
-  // TODO: Still want to eliminate sometimes.
   const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MFI.hasCalls()) {
+    // All offsets are unsigned, so need to be addressed in the same direction
+    // as stack growth.
+    if (MFI.getStackSize() != 0)
+      return true;
+
+    // For the entry point, the input wave scratch offset must be copied to the
+    // API SP if there are calls.
+    if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
+      return true;
+
+    // Retain behavior of always omitting the FP for leaf functions when
+    // possible.
+    if (MF.getTarget().Options.DisableFramePointerElim(MF))
+      return true;
+  }
 
-  // XXX - Is this only called after frame is finalized? Should be able to check
-  // frame size.
-  return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
-}
-
-bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
-  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
-  // All stack operations are relative to the frame offset SGPR.
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
+  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+    MFI.hasStackMap() || MFI.hasPatchPoint() ||
+    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);
 }
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index c5b707cba06c0..a9e765aa36e46 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -58,12 +58,9 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
     SIMachineFunctionInfo *MFI,
     MachineFunction &MF) const;
 
-  std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg(
-    const GCNSubtarget &ST,
-    const SIInstrInfo *TII,
-    const SIRegisterInfo *TRI,
-    SIMachineFunctionInfo *MFI,
-    MachineFunction &MF) const;
+  unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+      const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+      SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
 
   // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
   void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
@@ -73,7 +70,6 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
 
 public:
   bool hasFP(const MachineFunction &MF) const override;
-  bool hasSP(const MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ca11da247ee3..8a08bc463da5e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1770,6 +1770,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
   // should reserve the arguments and use them directly.
   MachineFrameInfo &MFI = MF.getFrameInfo();
   bool HasStackObjects = MFI.hasStackObjects();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 
   // Record that we know we have non-spill stack objects so we don't need to
   // check all stack objects later.
@@ -1785,65 +1786,85 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
   // the scratch registers to pass in.
   bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
 
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  if (ST.isAmdHsaOrMesa(MF.getFunction())) {
-    if (RequiresStackAccess) {
-      // If we have stack objects, we unquestionably need the private buffer
-      // resource. For the Code Object V2 ABI, this will be the first 4 user
-      // SGPR inputs. We can reserve those and use them directly.
-
-      unsigned PrivateSegmentBufferReg = Info.getPreloadedReg(
-        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
-      Info.setScratchRSrcReg(PrivateSegmentBufferReg);
-
-      if (MFI.hasCalls()) {
-        // If we have calls, we need to keep the frame register in a register
-        // that won't be clobbered by a call, so ensure it is copied somewhere.
-
-        // This is not a problem for the scratch wave offset, because the same
-        // registers are reserved in all functions.
-
-        // FIXME: Nothing is really ensuring this is a call preserved register,
-        // it's just selected from the end so it happens to be.
-        unsigned ReservedOffsetReg
-          = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-        Info.setScratchWaveOffsetReg(ReservedOffsetReg);
-      } else {
-        unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg(
-          AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-        Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
-      }
-    } else {
-      unsigned ReservedBufferReg
-        = TRI.reservedPrivateSegmentBufferReg(MF);
-      unsigned ReservedOffsetReg
-        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-
-      // We tentatively reserve the last registers (skipping the last two
-      // which may contain VCC). After register allocation, we'll replace
-      // these with the ones immediately after those which were really
-      // allocated. In the prologue copies will be inserted from the argument
-      // to these reserved registers.
-      Info.setScratchRSrcReg(ReservedBufferReg);
-      Info.setScratchWaveOffsetReg(ReservedOffsetReg);
-    }
+  if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
+    // If we have stack objects, we unquestionably need the private buffer
+    // resource. For the Code Object V2 ABI, this will be the first 4 user
+    // SGPR inputs. We can reserve those and use them directly.
+
+    unsigned PrivateSegmentBufferReg =
+        Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+    Info.setScratchRSrcReg(PrivateSegmentBufferReg);
   } else {
     unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
+    // We tentatively reserve the last registers (skipping the last registers
+    // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
+    // we'll replace these with the ones immediately after those which were
+    // really allocated. In the prologue copies will be inserted from the
+    // argument to these reserved registers.
 
     // Without HSA, relocations are used for the scratch pointer and the
     // buffer resource setup is always inserted in the prologue. Scratch wave
     // offset is still in an input SGPR.
     Info.setScratchRSrcReg(ReservedBufferReg);
+  }
 
-    if (HasStackObjects && !MFI.hasCalls()) {
-      unsigned ScratchWaveOffsetReg = Info.getPreloadedReg(
-        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-      Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+  // This should be accurate for kernels even before the frame is finalized.
+  const bool HasFP = ST.getFrameLowering()->hasFP(MF);
+  if (HasFP) {
+    unsigned ReservedOffsetReg =
+        TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    // Try to use s32 as the SP, but move it if it would interfere with input
+    // arguments. This won't work with calls though.
+    //
+    // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
+    // registers.
+    if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
+      Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
     } else {
-      unsigned ReservedOffsetReg
-        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-      Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+      assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
+
+      if (MFI.hasCalls())
+        report_fatal_error("call in graphics shader with too many input SGPRs");
+
+      for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
+        if (!MRI.isLiveIn(Reg)) {
+          Info.setStackPtrOffsetReg(Reg);
+          break;
+        }
+      }
+
+      if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
+        report_fatal_error("failed to find register for SP");
     }
+
+    Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+    Info.setFrameOffsetReg(ReservedOffsetReg);
+  } else if (RequiresStackAccess) {
+    assert(!MFI.hasCalls());
+    // We know there are accesses and they will be done relative to SP, so just
+    // pin it to the input.
+    //
+    // FIXME: Should not do this if inline asm is reading/writing these
+    // registers.
+    unsigned PreloadedSP = Info.getPreloadedReg(
+        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+    Info.setStackPtrOffsetReg(PreloadedSP);
+    Info.setScratchWaveOffsetReg(PreloadedSP);
+    Info.setFrameOffsetReg(PreloadedSP);
+  } else {
+    assert(!MFI.hasCalls());
+
+    // There may not be stack access at all. There may still be spills, or
+    // access of a constant pointer (in which cases an extra copy will be
+    // emitted in the prolog).
+    unsigned ReservedOffsetReg
+      = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+    Info.setStackPtrOffsetReg(ReservedOffsetReg);
+    Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+    Info.setFrameOffsetReg(ReservedOffsetReg);
   }
 }
 
@@ -9939,7 +9960,6 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
 void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
   if (Info->isEntryFunction()) {
@@ -9947,24 +9967,10 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
     reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
   }
 
-  // We have to assume the SP is needed in case there are calls in the function
-  // during lowering. Calls are only detected after the function is
-  // lowered. We're about to reserve registers, so don't bother using it if we
-  // aren't really going to use it.
-  bool NeedSP = !Info->isEntryFunction() ||
-    MFI.hasVarSizedObjects() ||
-    MFI.hasCalls();
-
-  if (NeedSP) {
-    unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
-    Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
-
-    assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
-    assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
-                               Info->getStackPtrOffsetReg()));
-    if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
-      MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
-  }
+  assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+                             Info->getStackPtrOffsetReg()));
+  if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
+    MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
 
   // We need to worry about replacing the default register with itself in case
   // of MIR testcases missing the MFI.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1c3c52ba02c28..48257b01b8622 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -928,7 +928,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIndex)               // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
     // Add the scratch resource registers as implicit uses because we may end up
     // needing them, and need to ensure that the reserved registers are
     // correctly handled.
@@ -950,7 +950,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(isKill)) // data
     .addFrameIndex(FrameIndex)               // addr
     .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
-    .addReg(MFI->getFrameOffsetReg())        // scratch_offset
+    .addReg(MFI->getStackPtrOffsetReg())     // scratch_offset
     .addImm(0)                               // offset
     .addMemOperand(MMO);
 }
@@ -1032,7 +1032,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FrameIndex) // addr
       .addMemOperand(MMO)
       .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
-      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
@@ -1046,10 +1046,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
   unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
   BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-    .addFrameIndex(FrameIndex)        // vaddr
-    .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
-    .addReg(MFI->getFrameOffsetReg()) // scratch_offset
-    .addImm(0)                        // offset
+    .addFrameIndex(FrameIndex)           // vaddr
+    .addReg(MFI->getScratchRSrcReg())    // scratch_rsrc
+    .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+    .addImm(0)                           // offset
     .addMemOperand(MMO);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7e09f41aa8de6..bfe6182a7c1cd 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -444,7 +444,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   }
 
   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
-    return ArgInfo.getPreloadedValue(Value).first->getRegister();
+    auto Arg = ArgInfo.getPreloadedValue(Value).first;
+    return Arg ? Arg->getRegister() : 0;
   }
 
   unsigned getGITPtrHigh() const {
@@ -486,6 +487,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
     return FrameOffsetReg;
   }
 
+  void setFrameOffsetReg(unsigned Reg) {
+    assert(Reg != 0 && "Should never be unset");
+    FrameOffsetReg = Reg;
+  }
+
   void setStackPtrOffsetReg(unsigned Reg) {
     assert(Reg != 0 && "Should never be unset");
     StackPtrOffsetReg = Reg;
@@ -502,8 +508,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   void setScratchWaveOffsetReg(unsigned Reg) {
     assert(Reg != 0 && "Should never be unset");
     ScratchWaveOffsetReg = Reg;
-    if (isEntryFunction())
-      FrameOffsetReg = ScratchWaveOffsetReg;
   }
 
   unsigned getQueuePtrUserSGPR() const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2e96b9866671f..520d5198c5f6e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -138,11 +138,6 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
 }
 
-unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
-  const MachineFunction &MF) const {
-  return AMDGPU::SGPR32;
-}
-
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
 
@@ -718,6 +713,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
   if (SpillToSMEM && OnlyToVGPR)
     return false;
 
+  unsigned FrameReg = getFrameRegister(*MF);
+
   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
                          SuperReg != MFI->getFrameOffsetReg() &&
                          SuperReg != MFI->getScratchWaveOffsetReg()));
@@ -777,11 +774,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg());
+          .addReg(FrameReg);
       }
 
       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
@@ -849,11 +846,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
                                    EltSize, MinAlign(Align, EltSize * i));
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
-        .addReg(TmpReg, RegState::Kill)    // src
-        .addFrameIndex(Index)              // vaddr
-        .addReg(MFI->getScratchRSrcReg())  // srrsrc
-        .addReg(MFI->getFrameOffsetReg())  // soffset
-        .addImm(i * 4)                     // offset
+        .addReg(TmpReg, RegState::Kill)       // src
+        .addFrameIndex(Index)                 // vaddr
+        .addReg(MFI->getScratchRSrcReg())     // srrsrc
+        .addReg(MFI->getStackPtrOffsetReg())  // soffset
+        .addImm(i * 4)                        // offset
         .addMemOperand(MMO);
     }
   }
@@ -909,6 +906,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
   unsigned EltSize = 4;
   unsigned ScalarLoadOp;
 
+  unsigned FrameReg = getFrameRegister(*MF);
+
   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
   if (SpillToSMEM && isSGPRClass(RC)) {
     // XXX - if private_element_size is larger than 4 it might be useful to be
@@ -940,11 +939,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
       if (Offset != 0) {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addImm(Offset);
       } else {
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-          .addReg(MFI->getFrameOffsetReg());
+          .addReg(FrameReg);
       }
 
       auto MIB =
@@ -988,10 +987,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
         MinAlign(Align, EltSize * i));
 
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
-        .addFrameIndex(Index)              // vaddr
-        .addReg(MFI->getScratchRSrcReg())  // srsrc
-        .addReg(MFI->getFrameOffsetReg())  // soffset
-        .addImm(i * 4)                     // offset
+        .addFrameIndex(Index)                 // vaddr
+        .addReg(MFI->getScratchRSrcReg())     // srsrc
+        .addReg(MFI->getStackPtrOffsetReg())  // soffset
+        .addImm(i * 4)                        // offset
         .addMemOperand(MMO);
 
       auto MIB =
@@ -1056,6 +1055,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
   int Index = MI->getOperand(FIOperandNum).getIndex();
 
+  unsigned FrameReg = getFrameRegister(*MF);
+
   switch (MI->getOpcode()) {
     // SGPR register spill
     case AMDGPU::SI_SPILL_S512_SAVE:
@@ -1091,11 +1092,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V32_SAVE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
+      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+             MFI->getStackPtrOffsetReg());
+
       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
             Index,
             VData->getReg(), VData->isKill(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            FrameReg,
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
             *MI->memoperands_begin(),
             RS);
@@ -1112,12 +1116,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V512_RESTORE: {
       const MachineOperand *VData = TII->getNamedOperand(*MI,
                                                          AMDGPU::OpName::vdata);
+      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+             MFI->getStackPtrOffsetReg());
 
       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
             Index,
             VData->getReg(), VData->isKill(),
             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            FrameReg,
             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
             *MI->memoperands_begin(),
             RS);
@@ -1129,13 +1135,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       const DebugLoc &DL = MI->getDebugLoc();
       bool IsMUBUF = TII->isMUBUF(*MI);
 
-      if (!IsMUBUF &&
-          MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
+      if (!IsMUBUF && !MFI->isEntryFunction()) {
         // Convert to an absolute stack address by finding the offset from the
         // scratch wave base and scaling by the wave size.
         //
-        // In an entry function/kernel the stack address is already the
-        // absolute address relative to the scratch wave offset.
+        // In an entry function/kernel the offset is already the absolute
+        // address relative to the frame register.
 
         unsigned DiffReg
           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -1146,7 +1151,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
-          .addReg(MFI->getFrameOffsetReg())
+          .addReg(FrameReg)
           .addReg(MFI->getScratchWaveOffsetReg());
 
         int64_t Offset = FrameInfo.getObjectOffset(Index);
@@ -1196,8 +1201,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                           AMDGPU::OpName::vaddr));
 
-        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
-               == MFI->getFrameOffsetReg());
+        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+               MFI->getStackPtrOffsetReg());
+
+        TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
 
         int64_t Offset = FrameInfo.getObjectOffset(Index);
         int64_t OldImm
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index de10e92c96573..9780824683b33 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -56,8 +56,6 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
   unsigned reservedPrivateSegmentWaveByteOffsetReg(
     const MachineFunction &MF) const;
 
-  unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const;
-
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index cd1ce13eb16b9..c21abaeaaf5d1 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -4,15 +4,14 @@
 %struct.ByValStruct = type { [4 x i32] }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:4{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
 ; GCN-NOT: s32
 
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:20{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:20{{$}}
+; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:20{{$}}
 ; GCN-NOT: s32
 define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
 entry:
@@ -183,15 +182,14 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_align8:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s5 offset:8{{$}}
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
 ; GCN-NOT: s32
 
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:24{{$}}
+; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:24{{$}}
 ; GCN-NOT: s32
-; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s5 offset:24{{$}}
+; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:24{{$}}
 ; GCN-NOT: s32
 define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 4d05fcc7de2ca..79a238a287dde 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -768,16 +768,17 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}tail_call_byval_align16:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Spill
-; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:32
-; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:36
-; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:20
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16
+; GCN-NOT: s32
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:32
+; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:36
+; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:20
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:16
 ; GCN: s_getpc_b64
-; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:24 ; 4-byte Folded Reload
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:28 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-NOT: s32
 ; GCN: s_setpc_b64
 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
 entry:
@@ -787,16 +788,17 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
-; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
-; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:8
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
-; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8
+; GCN-NOT: s32
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
+; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8
 ; GCN: s_getpc_b64
-; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-NOT: s32
 ; GCN: s_setpc_b64
 define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index bc9160772e2cb..70c69d9f6c031 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -22,9 +22,8 @@ define void @callee_no_stack_no_fp_elim() #1 {
 ; GCN-LABEL: {{^}}callee_with_stack:
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
-; GCN-NEXT: s_mov_b32 s5, s32
 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @callee_with_stack() #0 {
@@ -100,7 +99,7 @@ declare void @external_void_func_void() #0
 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored
 ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
 ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
 
 ; GCN: v_writelane_b32 v32
@@ -108,7 +107,7 @@ declare void @external_void_func_void() #0
 ; GCN: v_readlane_b32 s{{[0-9]+}}, v32
 
 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
 
 ; GCN-NEXT: s_waitcnt
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 5060c0fed1a9b..fa5d20a1e964b 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -115,8 +115,8 @@ define void @use_workgroup_id_x() #1 {
 
 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
 ; GCN: s_waitcnt
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
+; GCN-NOT: s32
+; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
 ; GCN: ; use s6
 ; GCN: s_setpc_b64
 define void @use_stack_workgroup_id_x() #1 {
@@ -429,7 +429,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
 }
 
 ; GCN-LABEL: {{^}}use_every_sgpr_input:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index c63d96917d914..aa33dfa367594 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -230,12 +230,11 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
 }
 
 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
 
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @too_many_args_use_workitem_id_x(
@@ -357,12 +356,12 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x(
 ; frame[3] = VGPR spill slot
 
 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
-; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v0, off, s[0:3], s32 offset:4
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 ; GCN: s_setpc_b64
 define void @too_many_args_use_workitem_id_x_byval(
   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
@@ -476,16 +475,15 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
 }
 
 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}}
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}}
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
 
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @too_many_args_use_workitem_id_xyz(
@@ -574,11 +572,10 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
 ; frame[2] = workitem Z
 
 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
-; GCN: s_mov_b32 s5, s32
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
-; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}}
+; GCN: buffer_load_dword v31, off, s[0:3], s32 offset:4{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
-; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}}
+; GCN: buffer_load_dword v31, off, s[0:3], s32 offset:8{{$}}
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
 
 ; GCN: s_waitcnt
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
index 92a255ceae6de..819b0e5bfc536 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
@@ -7,7 +7,7 @@
 ; Materialize into a mov. Make sure there isn't an unnecessary copy.
 ; GCN-LABEL: {{^}}func_mov_fi_i32:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 s6, s5, s4
+; GCN: s_sub_u32 s6, s32, s4
 
 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
 ; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
@@ -28,7 +28,7 @@ define void @func_mov_fi_i32() #0 {
 
 ; GCN-LABEL: {{^}}func_add_constant_to_fi_i32:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_sub_u32 s6, s5, s4
+; GCN: s_sub_u32 s6, s32, s4
 
 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
 ; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
@@ -52,7 +52,7 @@ define void @func_add_constant_to_fi_i32() #0 {
 ; into.
 
 ; GCN-LABEL: {{^}}func_other_fi_user_i32:
-; GCN: s_sub_u32 s6, s5, s4
+; GCN: s_sub_u32 s6, s32, s4
 
 ; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
 ; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]]
@@ -89,8 +89,7 @@ define void @func_load_private_arg_i32_ptr(i32 addrspace(5)* %ptr) #0 {
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr:
 ; GCN: s_waitcnt
-; GCN-NEXT: s_mov_b32 s5, s32
-; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4
+; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s4
 
 ; CI-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
 ; CI-NEXT: v_add_i32_e64 [[ADD:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
@@ -112,9 +111,8 @@ define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 } addrspace(5)* byval %
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s5, s32
-; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5
-; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
+; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s32
+; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
 define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* byval %arg0) #0 {
   %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
   %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
@@ -129,7 +127,7 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 } addrspace(5)* b
 ; FrameIndex is hidden behind a CopyFromReg in the second block.
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block:
-; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4
+; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s32, s4
 
 ; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6
 ; CI: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]
@@ -163,7 +161,7 @@ ret:
 
 ; Added offset can't be used with VOP3 add
 ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32:
-; GCN: s_sub_u32 s6, s5, s4
+; GCN: s_sub_u32 s6, s32, s4
 ; GCN-DAG: s_movk_i32 s6, 0x204
 
 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6
@@ -187,7 +185,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
 }
 
 ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
-; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s5, s4
+; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s32, s4
 ; GCN-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x204
 
 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6
@@ -243,7 +241,7 @@ bb5:
 
 ; GCN-LABEL: {{^}}alloca_ptr_nonentry_block:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s5 offset:12
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:12
 define void @alloca_ptr_nonentry_block(i32 %arg0) #0 {
   %alloca0 = alloca { i8, i32 }, align 4, addrspace(5)
   %cmp = icmp eq i32 %arg0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index 442cdc92c300c..9186b91f76377 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -220,7 +220,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {
 ; GCN-DAG: buffer_store_dwordx4 v[4:7], off
 ; GCN-DAG: buffer_store_dwordx4 v[8:11], off
 ; GCN-DAG: buffer_store_dwordx4 v[12:15], off
-; GCN-DAG: buffer_load_dword [[STACKLOAD:v[0-9]+]], off, s[0:3], s5
+; GCN-DAG: buffer_load_dword [[STACKLOAD:v[0-9]+]], off, s[0:3], s32
 ; GCN-DAG: buffer_store_dwordx4 v[16:19], off
 ; GCN-DAG: buffer_store_dwordx4 v[20:23], off
 ; GCN-DAG: buffer_store_dwordx4 v[24:27], off
@@ -516,8 +516,8 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32:
-; GCN-DAG: buffer_load_ubyte v[[ELT0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GCN-DAG: buffer_load_ubyte v[[ELT0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
 ; GCN-DAG: buffer_store_dword v[[ELT1]]
 ; GCN-DAG: buffer_store_byte v[[ELT0]]
 define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 {
@@ -527,10 +527,10 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_x2:
-; GCN: buffer_load_ubyte v[[ELT0_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN: buffer_load_dword v[[ELT1_0:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN: buffer_load_ubyte v[[ELT0_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN: buffer_load_dword v[[ELT1_1:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN: buffer_load_ubyte v[[ELT0_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN: buffer_load_dword v[[ELT1_0:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN: buffer_load_ubyte v[[ELT0_1:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN: buffer_load_dword v[[ELT1_1:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
 ; GCN: ds_write_b32 v0, v0
 ; GCN: s_setpc_b64
@@ -544,9 +544,9 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %a
 }
 
 ; GCN-LABEL: {{^}}void_func_byval_i32_byval_i64:
-; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
 ; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off
 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off
 define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 {
@@ -566,9 +566,9 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 ad
 ; GCN-DAG: buffer_store_dwordx4 v[20:23], off
 ; GCN-DAG: buffer_store_dwordx4 v[24:27], off
 ; GCN-DAG: buffer_store_dwordx4 v[28:31], off
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:8
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:12
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:8
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:12
 
 ; GCN: buffer_store_dword v[[LOAD_ARG1]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
@@ -581,14 +581,14 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
 
 ; FIXME: Different ext load types on CI vs. VI
 ; GCN-LABEL: {{^}}void_func_v32i32_i1_i8_i16:
-; GCN-DAG: buffer_load_ubyte [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; VI-DAG: buffer_load_ushort [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_ubyte [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; VI-DAG: buffer_load_ushort [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
-; CI-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; CI-DAG: buffer_load_dword [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; CI-DAG: buffer_load_dword [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; CI-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; CI-DAG: buffer_load_dword [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; CI-DAG: buffer_load_dword [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
 ; GCN-DAG: v_and_b32_e32 [[TRUNC_ARG1_I1:v[0-9]+]], 1, [[LOAD_ARG1]]
 ; CI-DAG: v_cvt_f16_f32_e32 [[CVT_ARG4:v[0-9]+]], [[LOAD_ARG4]]
@@ -609,10 +609,10 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i32_v2f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]{{\]}}, off
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off
@@ -624,8 +624,8 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i16_v2f16:
-; GFX9-DAG: buffer_load_dword [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GFX9-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:8{{$}}
+; GFX9-DAG: buffer_load_dword [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GFX9-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
 ; GFX9: buffer_store_dword [[LOAD_ARG1]], off
 ; GFX9: buffer_store_short [[LOAD_ARG2]], off
 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {
@@ -636,15 +636,15 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v2i64_v2f64:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
@@ -656,15 +656,15 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v4i32_v4f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
 
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off
@@ -676,23 +676,23 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v8i32_v8f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:64{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s32 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s32 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:36{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:40{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:44{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:48{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s32 offset:52{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s32 offset:56{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s32 offset:60{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s32 offset:64{{$}}
 
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]{{\]}}, off
 ; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off
@@ -706,39 +706,39 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
 }
 
 ; GCN-LABEL: {{^}}void_func_v32i32_v16i32_v16f32:
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:12{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_:[0-9]+]], off, s[0:3], s5 offset:16{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:24{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:28{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:32{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_8:[0-9]+]], off, s[0:3], s5 offset:36{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_9:[0-9]+]], off, s[0:3], s5 offset:40{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_10:[0-9]+]], off, s[0:3], s5 offset:44{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_11:[0-9]+]], off, s[0:3], s5 offset:48{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_12:[0-9]+]], off, s[0:3], s5 offset:52{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_13:[0-9]+]], off, s[0:3], s5 offset:56{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_14:[0-9]+]], off, s[0:3], s5 offset:60{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:64{{$}}
-
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:68{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:72{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:76{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:80{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:84{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:88{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:92{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:96{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_8:[0-9]+]], off, s[0:3], s5 offset:100{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_9:[0-9]+]], off, s[0:3], s5 offset:104{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_10:[0-9]+]], off, s[0:3], s5 offset:108{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_11:[0-9]+]], off, s[0:3], s5 offset:112{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_12:[0-9]+]], off, s[0:3], s5 offset:116{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_13:[0-9]+]], off, s[0:3], s5 offset:120{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s5 offset:124{{$}}
-; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:128{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s32 offset:8{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s32 offset:12{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_:[0-9]+]], off, s[0:3], s32 offset:16{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s32 offset:20{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s32 offset:24{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s32 offset:28{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s32 offset:32{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_8:[0-9]+]], off, s[0:3], s32 offset:36{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_9:[0-9]+]], off, s[0:3], s32 offset:40{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_10:[0-9]+]], off, s[0:3], s32 offset:44{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_11:[0-9]+]], off, s[0:3], s32 offset:48{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_12:[0-9]+]], off, s[0:3], s32 offset:52{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_13:[0-9]+]], off, s[0:3], s32 offset:56{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_14:[0-9]+]], off, s[0:3], s32 offset:60{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s32 offset:64{{$}}
+
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s32 offset:68{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s32 offset:72{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s32 offset:76{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s32 offset:80{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s32 offset:84{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s32 offset:88{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s32 offset:92{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s32 offset:96{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_8:[0-9]+]], off, s[0:3], s32 offset:100{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_9:[0-9]+]], off, s[0:3], s32 offset:104{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_10:[0-9]+]], off, s[0:3], s32 offset:108{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_11:[0-9]+]], off, s[0:3], s32 offset:112{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_12:[0-9]+]], off, s[0:3], s32 offset:116{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_13:[0-9]+]], off, s[0:3], s32 offset:120{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s32 offset:124{{$}}
+; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s32 offset:128{{$}}
 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
   store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
   store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/load-hi16.ll b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
index 357ce3d9a9ab2..07ae3d00d0735 100644
--- a/llvm/test/CodeGen/AMDGPU/load-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-hi16.ll
@@ -494,13 +494,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s32 offset:4094{{$}}
 define void @load_private_hi_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
 entry:
   %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
@@ -513,13 +513,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_load_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s32 offset:4094{{$}}
 define void @load_private_hi_v2f16_reglo_vreg(half addrspace(5)* byval %in, half %reg) #0 {
 entry:
   %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045
@@ -568,13 +568,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_zexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_ubyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_ubyte_d16_hi v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
 entry:
   %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
@@ -588,13 +588,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_zexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_ubyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_ubyte_d16_hi v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_hi_v2f16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, half %reg) #0 {
 entry:
   %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
@@ -609,13 +609,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2f16_reglo_vreg_sexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_sbyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_sbyte_d16_hi v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_hi_v2f16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, half %reg) #0 {
 entry:
   %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
@@ -630,13 +630,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_sexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_sbyte_d16_hi v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_sbyte_d16_hi v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i16 %reg) #0 {
 entry:
   %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091
@@ -789,7 +789,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4094
+; GFX900-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4094
 define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg) #0 {
 entry:
   %obj0 = alloca [10 x i32], align 4, addrspace(5)
@@ -806,7 +806,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_sexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4095
 define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg) #0 {
 entry:
   %obj0 = alloca [10 x i32], align 4, addrspace(5)
@@ -824,7 +824,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_zexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4095
 define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg) #0 {
 entry:
   %obj0 = alloca [10 x i32], align 4, addrspace(5)
@@ -975,9 +975,9 @@ entry:
 ; FIXME: Is there a cost to using the extload over not?
 ; GCN-LABEL: {{^}}load_private_v2i16_split:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_ushort v0, off, s[0:3], s5 offset:4{{$}}
+; GFX900: buffer_load_ushort v0, off, s[0:3], s32 offset:4{{$}}
 ; GFX900-NEXT: s_waitcnt
-; GFX900-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], s5 offset:6
+; GFX900-NEXT: buffer_load_short_d16_hi v0, off, s[0:3], s32 offset:6
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 define <2 x i16> @load_private_v2i16_split(i16 addrspace(5)* byval %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
index 62bc8c26988de..47f5ea3555097 100644
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -590,13 +590,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s32 offset:4094{{$}}
 define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 {
 entry:
   %reg.bc = bitcast i32 %reg to <2 x i16>
@@ -609,7 +609,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_load_ushort v1, off, s[0:3], s32 offset:4094{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900: v_and_b32
 ; GFX900: v_lshl_or_b32
@@ -618,7 +618,7 @@ entry:
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s32 offset:4094{{$}}
 define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 {
 entry:
   %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045
@@ -631,13 +631,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s32 offset:4094{{$}}
 define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 {
 entry:
   %reg.bc = bitcast i32 %reg to <2 x half>
@@ -704,13 +704,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
 entry:
   %reg.bc = bitcast i32 %reg to <2 x i16>
@@ -724,13 +724,13 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8:
 ; GCN: s_waitcnt
-; GFX900: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}}
+; GFX900: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095{{$}}
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
 ; GFX900-NEXT: s_waitcnt
 ; GFX900-NEXT: s_setpc_b64
 
-; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}}
+; NO-D16-HI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s32 offset:4095{{$}}
 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 {
 entry:
   %reg.bc = bitcast i32 %reg to <2 x i16>
@@ -895,7 +895,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094
+; GFX900-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094
 
 ; NO-D16-HI: buffer_load_ushort v
 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
@@ -914,7 +914,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
 
 ; NO-D16-HI: buffer_load_sbyte v
 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
@@ -934,7 +934,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
 
 ; NO-D16-HI: buffer_load_ubyte v
 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
@@ -954,7 +954,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_sexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095
 
 ; NO-D16-HI: buffer_load_sbyte v
 define void @load_private_lo_v2f16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
@@ -975,7 +975,7 @@ entry:
 
 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_zexti8_to_offset:
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095
 
 ; NO-D16-HI: buffer_load_ubyte v
 define void @load_private_lo_v2f16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 4822818e901af..361b8035f61e1 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -126,7 +126,7 @@ entry:
 ; CHECK-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s4
 ; CHECK-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], [[IDX_S]]
 ; CHECK-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
-; CHECK-O0-DAG: buffer_store_dword [[IDX_V]], off, s[0:3], s5 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
+; CHECK-O0-DAG: buffer_store_dword [[IDX_V]], off, s[0:3], s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
 
 ; CHECK-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
 ; CHECK-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], {{.*}} ; 4-byte Folded Reload
@@ -149,22 +149,22 @@ entry:
 ; CHECK-O0: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
 ; CHECK-O0: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
 ; CHECK-O0: s_and_saveexec_b64 [[CMP]], [[CMP]]
-; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s5 offset:[[IDX_OFF]] ; 4-byte Folded Reload
+; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload
 ; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen
 ; CHECK-O0: s_waitcnt vmcnt(0)
-; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
+; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
 ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
 ; CHECK-O0: v_readlane_b32 s[[S1:[0-9]+]], v{{[0-9]+}}, 4
 ; CHECK-O0: v_readlane_b32 s[[S2:[0-9]+]], v{{[0-9]+}}, 5
 ; CHECK-O0: s_mov_b64 exec, s{{\[}}[[S1]]:[[S2]]{{\]}}
-; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
-; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
+; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s32 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
+; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
 
 ; CHECK-O0: BB{{[0-9]+_[0-9]+}}:
 ; CHECK-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
-; CHECK-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s5 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
+; CHECK-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
 ; CHECK-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
 
@@ -189,21 +189,21 @@ entry:
 ; CHECK-O0: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
 ; CHECK-O0: s_and_b64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
 ; CHECK-O0: s_and_saveexec_b64 [[CMP]], [[CMP]]
-; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s5 offset:[[IDX_OFF]] ; 4-byte Folded Reload
+; CHECK-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s[0:3], s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload
 ; CHECK-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, {{.*}} idxen
 ; CHECK-O0: s_waitcnt vmcnt(0)
-; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
+; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; CHECK-O0: s_xor_b64 exec, exec, [[CMP]]
 ; CHECK-O0-NEXT: s_cbranch_execnz [[LOOPBB1]]
 
 ; CHECK-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]]
 ; CHECK-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]]
 ; CHECK-O0: s_mov_b64 exec, s{{\[}}[[SAVEEXEC0]]:[[SAVEEXEC1]]{{\]}}
-; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
-; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s5 offset:[[RES_OFF]] ; 4-byte Folded Spill
+; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s32 offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
+; CHECK-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF]] ; 4-byte Folded Spill
 
 ; CHECK-O0: [[TERMBB]]:
-; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s5 offset:[[RES_OFF]] ; 4-byte Folded Reload
+; CHECK-O0: buffer_load_dword [[RES:v[0-9]+]], off, s[0:3], s32 offset:[[RES_OFF]] ; 4-byte Folded Reload
 ; CHECK-O0: global_store_dword v[{{[0-9]+:[0-9]+}}], [[RES]], off
 
 define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %in, float addrspace(1)* %out) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index 3aae677844d90..06075ac8e33e6 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -19,26 +19,26 @@ machineFunctionInfo:
   scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
   scratchWaveOffsetReg: $sgpr5
   frameOffsetReg:  $sgpr5
+  stackPtrOffsetReg:  $sgpr32
 
 body:             |
   ; CHECK-LABEL: name: scavenge_register_position
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr5 = COPY $sgpr4
-  ; CHECK:   $sgpr6 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
+  ; CHECK:   $sgpr6 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
   ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
   ; CHECK:   S_BRANCH %bb.1
   ; CHECK: bb.1:
-  ; CHECK:   liveins: $sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr4 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc
+  ; CHECK:   liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK:   $sgpr4 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
   ; CHECK:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
   ; CHECK:   S_ENDPGM 0, implicit $vgpr0
   bb.0:
-    $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+    $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
     S_BRANCH %bb.1
 
   bb.1:
-    $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+    $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
     S_ENDPGM 0, implicit $vgpr0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
index 906e37e992690..f69c324f94847 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir
@@ -33,16 +33,16 @@
 # SHARE:       stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
 # SHARE:       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
 
-# SHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5)
-# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
-# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5)
-# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
+# SHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5)
+# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5)
+# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5)
 # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
-# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
-# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
+# SHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5)
+# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5)
 # SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0
-# SHARE:  $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
+# SHARE:  $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5)
 
 # NOSHARE: stack:
 # NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
@@ -58,17 +58,17 @@
 # NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true,
 # NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
 
-# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5)
-# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
-# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5)
-# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
+# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5)
+# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.1, align 4, addrspace 5)
+# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5)
 # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
-# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5)
-# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.3, addrspace 5)
-# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5)
+# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5)
+# NOSHARE: SI_SPILL_S32_SAVE $sgpr32, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 4 into %stack.3, addrspace 5)
+# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.1, align 4, addrspace 5)
 # NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0
-# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.3, addrspace 5)
+# NOSHARE: $sgpr32 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 4 from %stack.3, addrspace 5)
 
 ...
 
@@ -79,23 +79,23 @@ frameInfo:
 machineFunctionInfo:
   scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
   scratchWaveOffsetReg: $sgpr4
-  frameOffsetReg: $sgpr5
+  frameOffsetReg: $sgpr32
   stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
-    %0:sreg_32_xm0 = COPY $sgpr5
+    %0:sreg_32_xm0 = COPY $sgpr32
     %1:vreg_64 = IMPLICIT_DEF
     %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
-    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
+    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0
-    $sgpr5 = COPY %0
-    %4:sreg_32_xm0 = COPY $sgpr5
-    ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
-    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
+    $sgpr32 = COPY %0
+    %4:sreg_32_xm0 = COPY $sgpr32
+    ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
+    ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
     $vgpr0 = COPY %2
     dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0
-    $sgpr5 = COPY %4
-    ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5
+    $sgpr32 = COPY %4
+    ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr32
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index ba0acbc2573a9..0892bb8549f96 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -16,10 +16,10 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
 
 ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 [[K:v[0-9]+]], 9
 ; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v0, v1
 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
+; GCN: buffer_store_dword [[K]], off, s[0:3], s32 offset:24
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 68
@@ -40,7 +40,7 @@ entry:
 
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_stack_object:
 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
-; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:24
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 68
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
@@ -54,7 +54,7 @@ entry:
 
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_callee_stack_object:
 ; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
-; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:24
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:24
 ; GCN: s_setpc_b64
 ; GCN: ; ScratchSize: 136
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
@@ -84,8 +84,7 @@ entry:
 
 ; GCN-LABEL: {{^}}i32_fastcc_i32_byval_i32:
 ; GCN: s_waitcnt
-; GCN-NEXT: s_mov_b32 s5, s32
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4
 ; GCN-NEXT: s_waitcnt vmcnt(0)
 
 ; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v0, v1
@@ -116,8 +115,7 @@ entry:
 ; GCN-NOT: v0
 ; GCN-NOT: s32
 ; GCN: buffer_load_dword v1, off, s[0:3], s4 offset:16
-; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v1, off, s[0:3], s5 offset:4
+; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:4
 ; GCN-NEXT: s_setpc_b64
 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [16 x i32] %large) #1 {
 entry:
@@ -127,8 +125,8 @@ entry:
 
 ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_a32i32:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4
-; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s32 offset:4
+; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s32 offset:8
 
 ; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v0, v1
 ; CIVI: v_add_{{i|u}}32_e32 v0, vcc, v0, [[LOAD_0]]
@@ -150,21 +148,20 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
 
 ; FIXME: Why load and store same location for stack args?
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_a32i32:
-; GCN: s_mov_b32 s5, s32
 
-; GCN-DAG: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
-; GCN-DAG: buffer_store_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
+; GCN-DAG: buffer_store_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-DAG: buffer_store_dword v33, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
 
-; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4
-; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s32 offset:4
+; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s32 offset:8
 
 ; GCN-NOT: s32
 
-; GCN-DAG: buffer_store_dword [[LOAD_0]], off, s[0:3], s5 offset:4
-; GCN-DAG: buffer_store_dword [[LOAD_1]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_store_dword [[LOAD_0]], off, s[0:3], s32 offset:4
+; GCN-DAG: buffer_store_dword [[LOAD_1]], off, s[0:3], s32 offset:8
 
-; GCN-DAG: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
-; GCN-DAG: buffer_load_dword v33, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
+; GCN-DAG: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
 
 ; GCN-NOT: s32
 ; GCN: s_setpc_b64
@@ -175,12 +172,8 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_a32i32_stack_object:
-; GCN-DAG: s_mov_b32 s5, s32
-; GCN-NOT: s32
 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
-; GCN: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:44
-
-; GCN-NOT: s32
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s32 offset:44
 ; GCN: s_setpc_b64
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
 entry:
@@ -217,7 +210,6 @@ entry:
 ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
 ; GCN-DAG: v_writelane_b32 v34, s33, 0
 ; GCN-DAG: v_writelane_b32 v34, s34, 1
-; GCN-DAG: v_writelane_b32 v34, s35, 2
 
 ; GCN-DAG: s_getpc_b64
 ; GCN: s_swappc_b64
@@ -228,7 +220,6 @@ entry:
 
 ; GCN-DAG: v_readlane_b32 s33, v34, 0
 ; GCN-DAG: v_readlane_b32 s34, v34, 1
-; GCN-DAG: v_readlane_b32 s35, v34, 2
 
 ; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4
 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
@@ -249,8 +240,12 @@ entry:
 ; in same place at function exit.
 
 ; GCN-LABEL: {{^}}sibling_call_stack_objecti32_fastcc_i32_i32_a32i32:
-; GCN: s_mov_b32 s5, s32
-; GCN-NOT: s32
+; GCN-NOT: s33
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:
+
+; GCN-NOT: s33
+
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:
 ; GCN: s_setpc_b64 s[6:7]
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
 entry:
@@ -262,8 +257,10 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area:
-; GCN: s_mov_b32 s5, s32
-; GCN-NOT: s32
+; GCN-NOT: s33
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 offset:48
+
+; GCN-NOT: s33
 ; GCN: s_setpc_b64 s[6:7]
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/sp-too-many-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/sp-too-many-input-sgprs.ll
new file mode 100644
index 0000000000000..e1f6eb715a312
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sp-too-many-input-sgprs.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=MESA3D,ALL %s
+; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefixes=UNKNOWN,ALL %s
+
+; Make sure shaders pick a workable SP with > 32 input SGPRs.
+; FIXME: Doesn't seem to be getting initial value from right register?
+
+; ALL-LABEL: {{^}}too_many_input_sgprs_32:
+; MESA3D-NOT: s34
+; MESA3D: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s34 offset:4
+
+; Happens to end up in s32 anyway
+; UNKNOWN-NOT: s32
+; UNKNOWN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4
+define amdgpu_ps i32 @too_many_input_sgprs_32(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
+                                              i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
+                                              i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
+                                              i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) {
+bb:
+  %alloca = alloca i32, align 4, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  %tmp = add i32 %arg, %arg1
+  %tmp32 = add i32 %tmp, %arg2
+  %tmp33 = add i32 %tmp32, %arg3
+  %tmp34 = add i32 %tmp33, %arg4
+  %tmp35 = add i32 %tmp34, %arg5
+  %tmp36 = add i32 %tmp35, %arg6
+  %tmp37 = add i32 %tmp36, %arg7
+  %tmp38 = add i32 %tmp37, %arg8
+  %tmp39 = add i32 %tmp38, %arg9
+  %tmp40 = add i32 %tmp39, %arg10
+  %tmp41 = add i32 %tmp40, %arg11
+  %tmp42 = add i32 %tmp41, %arg12
+  %tmp43 = add i32 %tmp42, %arg13
+  %tmp44 = add i32 %tmp43, %arg14
+  %tmp45 = add i32 %tmp44, %arg15
+  %tmp46 = add i32 %tmp45, %arg16
+  %tmp47 = add i32 %tmp46, %arg17
+  %tmp48 = add i32 %tmp47, %arg18
+  %tmp49 = add i32 %tmp48, %arg19
+  %tmp50 = add i32 %tmp49, %arg20
+  %tmp51 = add i32 %tmp50, %arg21
+  %tmp52 = add i32 %tmp51, %arg22
+  %tmp53 = add i32 %tmp52, %arg23
+  %tmp54 = add i32 %tmp53, %arg24
+  %tmp55 = add i32 %tmp54, %arg25
+  %tmp56 = add i32 %tmp55, %arg26
+  %tmp57 = add i32 %tmp56, %arg27
+  %tmp58 = add i32 %tmp57, %arg28
+  %tmp59 = add i32 %tmp58, %arg29
+  %tmp60 = add i32 %tmp59, %arg30
+  %tmp61 = add i32 %tmp60, %arg31
+  ret i32 %tmp61
+}
+
+; ALL-LABEL: {{^}}too_many_input_sgprs_33:
+; MESA3D-NOT: s35
+; MESA3D: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s35 offset:4
+
+; UNKNOWN-NOT: s33
+; UNKNOWN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s33 offset:4
+define amdgpu_ps i32 @too_many_input_sgprs_33(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
+                                              i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
+                                              i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
+                                              i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31,
+                                              i32 inreg %arg32) {
+bb:
+  %alloca = alloca i32, align 4, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  %tmp = add i32 %arg, %arg1
+  %tmp32 = add i32 %tmp, %arg2
+  %tmp33 = add i32 %tmp32, %arg3
+  %tmp34 = add i32 %tmp33, %arg4
+  %tmp35 = add i32 %tmp34, %arg5
+  %tmp36 = add i32 %tmp35, %arg6
+  %tmp37 = add i32 %tmp36, %arg7
+  %tmp38 = add i32 %tmp37, %arg8
+  %tmp39 = add i32 %tmp38, %arg9
+  %tmp40 = add i32 %tmp39, %arg10
+  %tmp41 = add i32 %tmp40, %arg11
+  %tmp42 = add i32 %tmp41, %arg12
+  %tmp43 = add i32 %tmp42, %arg13
+  %tmp44 = add i32 %tmp43, %arg14
+  %tmp45 = add i32 %tmp44, %arg15
+  %tmp46 = add i32 %tmp45, %arg16
+  %tmp47 = add i32 %tmp46, %arg17
+  %tmp48 = add i32 %tmp47, %arg18
+  %tmp49 = add i32 %tmp48, %arg19
+  %tmp50 = add i32 %tmp49, %arg20
+  %tmp51 = add i32 %tmp50, %arg21
+  %tmp52 = add i32 %tmp51, %arg22
+  %tmp53 = add i32 %tmp52, %arg23
+  %tmp54 = add i32 %tmp53, %arg24
+  %tmp55 = add i32 %tmp54, %arg25
+  %tmp56 = add i32 %tmp55, %arg26
+  %tmp57 = add i32 %tmp56, %arg27
+  %tmp58 = add i32 %tmp57, %arg28
+  %tmp59 = add i32 %tmp58, %arg29
+  %tmp60 = add i32 %tmp59, %arg30
+  %tmp61 = add i32 %tmp60, %arg31
+  %tmp62 = add i32 %tmp61, %arg32
+  ret i32 %tmp62
+}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index cb804bafb5485..0cf19cea78110 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -8,12 +8,12 @@
 # CHECK-LABEL: name: expecting_non_empty_interval
 
 # CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit $exec
-# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
 # CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
 # CHECK-NEXT: dead %3:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit $exec
 
 # CHECK: S_NOP 0, implicit %6.sub1
-# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
+# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
 # CHECK-NEXT: S_NOP 0, implicit %8.sub1
 # CHECK-NEXT: S_NOP 0, implicit undef %9.sub0
 
@@ -22,7 +22,6 @@ tracksRegLiveness: true
 machineFunctionInfo:
   scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
   scratchWaveOffsetReg: $sgpr4
-  frameOffsetReg: $sgpr5
   stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
@@ -57,7 +56,6 @@ tracksRegLiveness: true
 machineFunctionInfo:
   scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
   scratchWaveOffsetReg: $sgpr4
-  frameOffsetReg: $sgpr5
   stackPtrOffsetReg: $sgpr32
 body:             |
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index 57722e202bc7c..972d4e6692b9f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -171,7 +171,7 @@ entry:
 
   %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
   ; 0x40000 / 64 = 4096 (for wave64)
-  ; CHECK: s_add_u32 s6, s5, 0x40000
+  ; CHECK: s_add_u32 s6, s32, 0x40000
   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
   %a = load volatile i32, i32 addrspace(5)* %aptr
 
@@ -223,7 +223,7 @@ entry:
   %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
 
   ; 0x3ff00 / 64 = 4092 (for wave64)
-  ; CHECK: s_add_u32 s6, s5, 0x3ff00
+  ; CHECK: s_add_u32 s6, s32, 0x3ff00
   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
   ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
   %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
index aece86d9a31eb..91453d73c9e1f 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll
@@ -9,7 +9,10 @@
 ; = 144 bytes with padding between them
 
 ; GCN-LABEL: {{^}}needs_align16_default_stack_align:
-; GCN: s_mov_b32 s5, s32
+; GCN: s_sub_u32 [[SUB:s[0-9]+]], s32, s4
+; GCN-NEXT: v_lshrrev_b32_e64 [[FRAMEDIFF:v[0-9]+]], 6, [[SUB]]
+; GCN: v_add_u32_e64 [[FI:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 16, [[FRAMEDIFF]]
+
 ; GCN-NOT: s32
 
 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen
@@ -135,9 +138,7 @@ define void @default_realign_align128(i32 %idx) #0 {
 
 ; GCN-LABEL: {{^}}disable_realign_align128:
 ; GCN-NOT: s32
-; GCN: s_mov_b32 s5, s32
-; GCN-NOT: s32
-; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:16
+; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:16
 ; GCN-NOT: s32
 define void @disable_realign_align128(i32 %idx) #3 {
   %alloca.align = alloca i32, align 128, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
index dc02327395db2..11cf52ba3e549 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir
@@ -9,11 +9,11 @@
 # CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4,
 # CHECK-NEXT: stack-id: 1,
 
-# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
-# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+# CHECK: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+# CHECK: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
 
-# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (store 4 into %stack.1, addrspace 5)
-# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5, implicit-def dead $m0 :: (load 4 from %stack.1, addrspace 5)
+# CHECK: SI_SPILL_S32_SAVE killed renamable $sgpr6, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32, implicit-def dead $m0 :: (store 4 into %stack.1, addrspace 5)
+# CHECK: $sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32, implicit-def dead $m0 :: (load 4 from %stack.1, addrspace 5)
 
 name: no_merge_sgpr_vgpr_spill_slot
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
index dbb09217dac20..2f9d0616fa6dc 100644
--- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -481,10 +481,10 @@ entry:
 
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
 ; GCN: s_waitcnt
-; GFX900: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
+; GFX900: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
 
 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
-; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}}
+; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s32 offset:4094{{$}}
 
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -635,7 +635,7 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
 ; GCN: s_waitcnt
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
+; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094
 define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
 entry:
   %obj0 = alloca [10 x i32], align 4, addrspace(5)
@@ -652,7 +652,7 @@ entry:
 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
 ; GCN: s_waitcnt
 ; GFX900: buffer_store_dword
-; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
+; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4095
 define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
 entry:
   %obj0 = alloca [10 x i32], align 4, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
index cc864bc02ecaf..f4932d6987cb5 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
@@ -42,7 +42,6 @@ tracksRegLiveness: true
 machineFunctionInfo:
   scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
   scratchWaveOffsetReg: $sgpr4
-  frameOffsetReg: $sgpr5
   stackPtrOffsetReg: $sgpr32
 liveins:
   - { reg: '$vgpr2', virtual-reg: '%0' }
@@ -112,7 +111,7 @@ body: |
     ; and inserting a spill. Here we just check that the point where the error
     ; occurs we see a correctly generated spill.
     ; GCN-LABEL: bb.7:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0
@@ -128,7 +127,7 @@ body: |
     successors: %bb.12(0x80000000)
 
     ; GCN-LABEL: bb.9:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0
@@ -139,7 +138,7 @@ body: |
     successors: %bb.12(0x80000000)
 
     ; GCN-LABEL: bb.10:
-    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec
+    ; GCN: SI_SPILL_V128_SAVE %{{[0-9]+}}, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec
 
     undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
     %15.sub1:vreg_128 = COPY %15.sub0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index 01bcd6fd84b91..8ff09bbd23705 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -18,7 +18,7 @@
 ; CHECK-NEXT: scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
 ; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr101'
 ; CHECK-NEXT: frameOffsetReg:  '$sgpr101'
-; CHECK-NEXT: stackPtrOffsetReg: '$sp_reg'
+; CHECK-NEXT: stackPtrOffsetReg: '$sgpr101'
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
   %gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
@@ -38,7 +38,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
 ; CHECK-NEXT: scratchWaveOffsetReg: '$sgpr101'
 ; CHECK-NEXT: frameOffsetReg:  '$sgpr101'
-; CHECK-NEXT: stackPtrOffsetReg: '$sp_reg'
+; CHECK-NEXT: stackPtrOffsetReg: '$sgpr101'
 ; CHECK-NEXT: body:
 define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
   ret void

From 2f94203e23dee0e1a2ffb18030ea32cd44e3b25f Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Wed, 5 Jun 2019 22:27:31 +0000
Subject: [PATCH 1171/1176] Revert "[AArch64][GlobalISel] Optimize G_FCMP +
 G_SELECT pairs when G_SELECT is fp"

This reverts commit r362435 as this triggers ICE, see PR42129 for details.

llvm-svn: 362662
---
 .../AArch64/AArch64InstructionSelector.cpp    | 104 +-----
 .../AArch64/GlobalISel/fold-fp-select.mir     | 351 ------------------
 2 files changed, 8 insertions(+), 447 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index c9af8fa1d65a6..5fc272707f50e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -172,7 +172,6 @@ class AArch64InstructionSelector : public InstructionSelector {
 
   bool tryOptVectorShuffle(MachineInstr &I) const;
   bool tryOptVectorDup(MachineInstr &MI) const;
-  bool tryOptSelect(MachineInstr &MI) const;
 
   const AArch64TargetMachine &TM;
   const AArch64Subtarget &STI;
@@ -742,19 +741,6 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
   return GenericOpc;
 }
 
-static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
-                                const RegisterBankInfo &RBI) {
-  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-  bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
-               AArch64::GPRRegBankID);
-  LLT Ty = MRI.getType(I.getOperand(0).getReg());
-  if (Ty == LLT::scalar(32))
-    return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
-  else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
-    return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
-  return 0;
-}
-
 /// Helper function to select the opcode for a G_FCMP.
 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
   // If this is a compare against +0.0, then we don't have to explicitly
@@ -1788,11 +1774,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     // select instead of an integer select.
     bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
                  AArch64::GPRRegBankID);
+    unsigned CSelOpc = 0;
 
-    if (IsFP && tryOptSelect(I))
-      return true;
+    if (Ty == LLT::scalar(32)) {
+      CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
+    } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
+      CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
+    } else {
+      return false;
+    }
 
-    unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
     MachineInstr &TstMI =
         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
              .addDef(AArch64::WZR)
@@ -2819,85 +2810,6 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
   return &I;
 }
 
-bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
-  MachineIRBuilder MIB(I);
-  MachineRegisterInfo &MRI = *MIB.getMRI();
-  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
-
-  // We want to recognize this pattern:
-  //
-  // $z = G_FCMP pred, $x, $y
-  // ...
-  // $w = G_SELECT $z, $a, $b
-  //
-  // Where the value of $z is *only* ever used by the G_SELECT (possibly with
-  // some copies/truncs in between.)
-  //
-  // If we see this, then we can emit something like this:
-  //
-  // fcmp $x, $y
-  // fcsel $w, $a, $b, pred
-  //
-  // Rather than emitting both of the rather long sequences in the standard
-  // G_FCMP/G_SELECT select methods.
-
-  // First, check if the condition is defined by a compare.
-  MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
-  while (CondDef) {
-    // We can only fold if all of the defs have one use.
-    if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
-      return false;
-
-    // We can skip over G_TRUNC since the condition is 1-bit.
-    // Truncating/extending can have no impact on the value.
-    unsigned Opc = CondDef->getOpcode();
-    if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
-      break;
-
-    CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
-  }
-
-  // Is the condition defined by a compare?
-  // TODO: Handle G_ICMP.
-  if (!CondDef || CondDef->getOpcode() != TargetOpcode::G_FCMP)
-    return false;
-
-  // Get the condition code for the select.
-  AArch64CC::CondCode CondCode;
-  AArch64CC::CondCode CondCode2;
-  changeFCMPPredToAArch64CC(
-      (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
-      CondCode2);
-
-  // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
-  // instructions to emit the comparison.
-  // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
-  // unnecessary.
-  if (CondCode2 != AArch64CC::AL)
-    return false;
-
-  // Make sure we'll be able to select the compare.
-  unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
-  if (!CmpOpc)
-    return false;
-
-  // Emit a new compare.
-  auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
-  if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
-    Cmp.addUse(CondDef->getOperand(3).getReg());
-
-  // Emit the select.
-  unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
-  auto CSel =
-      MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
-                     {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
-          .addImm(CondCode);
-  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
-  constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
-  I.eraseFromParent();
-  return true;
-}
-
 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
   // Try to match a vector splat operation into a dup instruction.
   // We're looking for this pattern:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir
deleted file mode 100644
index 619b9276179a9..0000000000000
--- a/llvm/test/CodeGen/AArch64/GlobalISel/fold-fp-select.mir
+++ /dev/null
@@ -1,351 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
-#
-# Verify the following:
-#
-# - We can fold compares into selects.
-# - This only happens when the result of the compare is only used by the select.
-#
-# Also verify that, for now:
-#
-# - We only support doing this with G_FCMP.
-# - We only support condition flags that require a single instruction.
-#
-
-...
----
-name:            fcmp_more_than_one_user_no_fold
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $s1, $w1
-
-    ; CHECK-LABEL: name: fcmp_more_than_one_user_no_fold
-    ; CHECK: liveins: $s0, $s1, $w1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
-    ; CHECK: $w1 = COPY [[CSINCWr]]
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:fpr(s32) = COPY $s0
-    %1:fpr(s32) = COPY $s1
-    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
-    $w1 = COPY %5(s32)
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            using_icmp
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $w0
-
-    ; CHECK-LABEL: name: using_icmp
-    ; CHECK: liveins: $s0, $w0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 0
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: $wzr = SUBSWrr [[COPY]], [[MOVi32imm]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[CSINCWr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:gpr(s32) = COPY $w0
-    %1:fpr(s32) = COPY $s0
-    %2:gpr(s32) = G_CONSTANT i32 0
-    %5:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %6:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %6(s32)
-    %7:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %7(s1), %1, %5
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            foeq
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $s1
-
-    ; CHECK-LABEL: name: foeq
-    ; CHECK: liveins: $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 0, implicit $nzcv
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:fpr(s32) = COPY $s0
-    %1:fpr(s32) = COPY $s1
-    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            fueq
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $s1
-
-    ; CHECK-LABEL: name: fueq
-    ; CHECK: liveins: $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
-    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[FMOVS0_]], [[COPY1]], 1, implicit $nzcv
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:fpr(s32) = COPY $s0
-    %1:fpr(s32) = COPY $s1
-    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %6(s1), %2, %1
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            fone
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $s1
-
-    ; CHECK-LABEL: name: fone
-    ; CHECK: liveins: $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
-    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
-    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:fpr(s32) = COPY $s0
-    %1:fpr(s32) = COPY $s1
-    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(one), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %6(s1), %1, %2
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            fune
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $s0, $s1
-
-    ; CHECK-LABEL: name: fune
-    ; CHECK: liveins: $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
-    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[FMOVS0_]], 1, implicit $nzcv
-    ; CHECK: $s0 = COPY [[FCSELSrrr]]
-    ; CHECK: RET_ReallyLR implicit $s0
-    %0:fpr(s32) = COPY $s0
-    %1:fpr(s32) = COPY $s1
-    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(une), %0(s32), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s32) = G_SELECT %6(s1), %1, %2
-    $s0 = COPY %4(s32)
-    RET_ReallyLR implicit $s0
-
-...
----
-name:            doeq
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0, $d1
-
-    ; CHECK-LABEL: name: doeq
-    ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
-    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 0, implicit $nzcv
-    ; CHECK: $d0 = COPY [[FCSELDrrr]]
-    ; CHECK: RET_ReallyLR implicit $d0
-    %0:fpr(s64) = COPY $d0
-    %1:fpr(s64) = COPY $d1
-    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(oeq), %0(s64), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s64) = G_SELECT %6(s1), %2, %1
-    $d0 = COPY %4(s64)
-    RET_ReallyLR implicit $d0
-
-...
----
-name:            dueq
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0, $d1
-
-    ; CHECK-LABEL: name: dueq
-    ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
-    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
-    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
-    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[FMOVD0_]], [[COPY1]], 1, implicit $nzcv
-    ; CHECK: $d0 = COPY [[FCSELDrrr]]
-    ; CHECK: RET_ReallyLR implicit $d0
-    %0:fpr(s64) = COPY $d0
-    %1:fpr(s64) = COPY $d1
-    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(ueq), %0(s64), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s64) = G_SELECT %6(s1), %2, %1
-    $d0 = COPY %4(s64)
-    RET_ReallyLR implicit $d0
-
-...
----
-name:            done
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0, $d1
-
-    ; CHECK-LABEL: name: done
-    ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
-    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
-    ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
-    ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[CSINCWr]], [[CSINCWr1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[ORRWrr]]
-    ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY2]]
-    ; CHECK: $wzr = ANDSWri [[COPY3]], 0, implicit-def $nzcv
-    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
-    ; CHECK: $d0 = COPY [[FCSELDrrr]]
-    ; CHECK: RET_ReallyLR implicit $d0
-    %0:fpr(s64) = COPY $d0
-    %1:fpr(s64) = COPY $d1
-    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(one), %0(s64), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s64) = G_SELECT %6(s1), %1, %2
-    $d0 = COPY %4(s64)
-    RET_ReallyLR implicit $d0
-
-...
----
-name:            dune
-alignment:       2
-legalized:       true
-regBankSelected: true
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    liveins: $d0, $d1
-
-    ; CHECK-LABEL: name: dune
-    ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
-    ; CHECK: FCMPDri [[COPY]], implicit-def $nzcv
-    ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[FMOVD0_]], 1, implicit $nzcv
-    ; CHECK: $d0 = COPY [[FCSELDrrr]]
-    ; CHECK: RET_ReallyLR implicit $d0
-    %0:fpr(s64) = COPY $d0
-    %1:fpr(s64) = COPY $d1
-    %2:fpr(s64) = G_FCONSTANT double 0.000000e+00
-    %5:gpr(s32) = G_FCMP floatpred(une), %0(s64), %2
-    %3:gpr(s1) = G_TRUNC %5(s32)
-    %6:fpr(s1) = COPY %3(s1)
-    %4:fpr(s64) = G_SELECT %6(s1), %1, %2
-    $d0 = COPY %4(s64)
-    RET_ReallyLR implicit $d0
-
-...

From 0da48535551f901449567edd18425d3452aff592 Mon Sep 17 00:00:00 2001
From: Sergey Zverev <sergey.i.zverev@intel.com>
Date: Mon, 17 Jun 2019 17:10:33 -0700
Subject: [PATCH 1172/1176] Additional changes after merge to fix merge
 conflicts.

Signed-off-by: Sergey Zverev <sergey.i.zverev@intel.com>
---
 clang/lib/Sema/SemaStmtAsm.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index e48f7df09aaea..a59eb8b812465 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -258,10 +258,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   // Skip all the checks if we are compiling SYCL device code, but the function
   // is not marked to be used on device, this code won't be codegen'ed anyway.
   if (getLangOpts().SYCLIsDevice) {
-    GCCAsmStmt *NS = new (Context) GCCAsmStmt(
-        Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs, Names,
-        Constraints, Exprs.data(), AsmString, NumClobbers, Clobbers, RParenLoc);
-    return NS;
+    return new (Context)
+      GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs,
+                 NumInputs, Names, Constraints, Exprs.data(), AsmString,
+                 NumClobbers, Clobbers, NumLabels, RParenLoc);
   }
 
   for (unsigned i = 0; i != NumOutputs; i++) {

From 1674e9e115242cf7d1d02b9b83f3accd0d719f5b Mon Sep 17 00:00:00 2001
From: Sergey Zverev <sergey.i.zverev@intel.com>
Date: Tue, 25 Jun 2019 16:27:50 -0700
Subject: [PATCH 1173/1176] Fix to CodeGenSYCL/basic-kernel-wrapper.cpp to
 match the current dump.

Signed-off-by: Sergey Zverev <sergey.i.zverev@intel.com>
---
 clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
index e5d58b91f0c1a..71fbfa8ca3dbb 100644
--- a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
+++ b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp
@@ -19,7 +19,7 @@ int main() {
   return 0;
 }
 
-// CHECK: define spir_kernel void @{{.*}}kernel_function(i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::range"* byval align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::range"* byval align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::id"* byval align 4 [[OFFSET:%[a-zA-Z0-9_]+]])
+// CHECK: define spir_kernel void @{{.*}}kernel_function(i32 addrspace(1)* [[MEM_ARG:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE:%[a-zA-Z0-9_]+]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET:%[a-zA-Z0-9_]+]])
 //
 // Check alloca for pointer argument
 // CHECK: [[MEM_ARG]].addr = alloca i32 addrspace(1)*
@@ -37,7 +37,7 @@ int main() {
 // CHECK: [[MEM_LOAD:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG]].addr
 
 // Check accessor __init method call
-// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor"* [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.{{.*}}.cl::sycl::range"* byval align 4 [[ACC_RANGE]], %"struct.{{.*}}.cl::sycl::range"* byval align 4 [[MEM_RANGE]], %"struct.{{.*}}.cl::sycl::id"* byval align 4 [[OFFSET]])
+// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor"* [[ACCESSOR]], i32 addrspace(1)* [[MEM_LOAD]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET]])
 
 // Check lambda "()" operator call
 // CHECK: call spir_func void @{{.*}}(%"class.{{.*}}.anon"* [[ANON]])

From 2906ec360a5b50c800d82ae616f1e5a99a422cea Mon Sep 17 00:00:00 2001
From: Sergey Zverev <sergey.i.zverev@intel.com>
Date: Wed, 10 Jul 2019 18:04:45 -0700
Subject: [PATCH 1174/1176] [SYCL] update to sycl-offload.c to match the new
 options ordering.

Signed-off-by: Sergey Zverev <sergey.i.zverev@intel.com>
---
 clang/test/Driver/sycl-offload.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c
index bbd171d1188ed..a630245a5dc1c 100644
--- a/clang/test/Driver/sycl-offload.c
+++ b/clang/test/Driver/sycl-offload.c
@@ -106,8 +106,8 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -fsycl -c %s -### 2>&1 \
 // RUN:  | FileCheck %s -check-prefix=CHK-INT-HEADER
 // CHK-INT-HEADER: clang{{.*}} "-fsycl-is-device" {{.*}} "-o" "[[OUTPUT1:.+\.o]]"
-// CHK-INT-HEADER: clang{{.*}} "-triple" "spir64-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]"
-// CHK-INT-HEADER: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-o" "[[OUTPUT2:.+\.o]]" {{.*}} "-include" "[[INPUT1]]" "-dependency-filter" "[[INPUT1]]"
+// CHK-INT-HEADER: clang{{.*}} "-triple" "spir64-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
+// CHK-INT-HEADER: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-include" "[[INPUT1]]" "-dependency-filter" "[[INPUT1]]" {{.*}} "-o" "[[OUTPUT2:.+.o]]"
 // CHK-INT-HEADER: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-linux-sycldevice,host-x86_64-unknown-linux-gnu" {{.*}} "-inputs=[[OUTPUT1]],[[OUTPUT2]]"
 
 /// ###########################################################################

From 04dc5fbde8aa7012293d1bad8d1cdf47d56a62c9 Mon Sep 17 00:00:00 2001
From: Sergey Zverev <sergey.i.zverev@intel.com>
Date: Mon, 15 Jul 2019 16:13:03 -0700
Subject: [PATCH 1175/1176] XFAILing
 llvm-spirv/test/DebugInfo/UnknownBaseType.ll

Signed-off-by: Sergey Zverev <sergey.i.zverev@intel.com>
---
 llvm-spirv/test/DebugInfo/UnknownBaseType.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm-spirv/test/DebugInfo/UnknownBaseType.ll b/llvm-spirv/test/DebugInfo/UnknownBaseType.ll
index 2a422ea38ad92..091c3290faa13 100644
--- a/llvm-spirv/test/DebugInfo/UnknownBaseType.ll
+++ b/llvm-spirv/test/DebugInfo/UnknownBaseType.ll
@@ -1,4 +1,5 @@
 ; REQUIRES: object-emission
+; XFAIL: *
 
 ; RUN: llvm-as < %s -o %t.bc
 ; RUN: llvm-spirv %t.bc -o %t.spv -spirv-mem2reg=false

From 98b829e56eb2b837861b3f480dfc59f006380bee Mon Sep 17 00:00:00 2001
From: Sergey Zverev <sergey.i.zverev@intel.com>
Date: Tue, 16 Jul 2019 10:58:35 -0700
Subject: [PATCH 1176/1176] Further update for
 clang/test/Driver/sycl-offload.c.

Signed-off-by: Sergey Zverev <sergey.i.zverev@intel.com>
---
 clang/test/Driver/sycl-offload.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c
index d8843193083be..858615d29f2d2 100644
--- a/clang/test/Driver/sycl-offload.c
+++ b/clang/test/Driver/sycl-offload.c
@@ -452,9 +452,9 @@
 // CHK-TOOLS-GEN: ocloc{{.*}} "-output" "[[OUTPUT4:.+\.out]]" {{.*}} "[[OUTPUT3]]"
 // CHK-TOOLS-AOT: clang-offload-wrapper{{.*}} "-o=[[OUTPUT5:.+\.bc]]" "-host=x86_64-unknown-linux-gnu" "-kind=sycl" "[[OUTPUT4]]"
 // CHK-TOOLS-AOT: llc{{.*}} "-filetype=obj" "-o" "[[OUTPUT6:.+\.o]]" "[[OUTPUT5]]"
-// CHK-TOOLS-FPGA: clang{{.*}} "-triple" "spir64_fpga-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]"
-// CHK-TOOLS-GEN: clang{{.*}} "-triple" "spir64_gen-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]"
-// CHK-TOOLS-AOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-o" "[[OUTPUT7:.+\.o]]" {{.*}} "-include" "[[INPUT1]]"
+// CHK-TOOLS-FPGA: clang{{.*}} "-triple" "spir64_fpga-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
+// CHK-TOOLS-GEN: clang{{.*}} "-triple" "spir64_gen-unknown-linux-sycldevice" {{.*}} "-fsycl-int-header=[[INPUT1:.+\.h]]" "-faddrsig"
+// CHK-TOOLS-AOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-include" "[[INPUT1]]" {{.*}} "-o" "[[OUTPUT7:.+\.o]]"
 // CHK-TOOLS-AOT: ld{{.*}} "[[OUTPUT7]]" "[[OUTPUT6]]" {{.*}} "-lsycl"
 
 /// ###########################################################################